diff --git a/.gitignore b/.gitignore
index ef26322a19d..dce5b5ea934 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,16 +3,18 @@
 *.reject
 *.spec
 *.bak
+*.rpm
 .*.swp
 *.ninja
 .ninja_*
 .gdb_history
 errmsg.sys
 typescript
+_CPack_Packages
 CMakeCache.txt
 CMakeFiles/
 MakeFile
-install_manifest.txt
+install_manifest*.txt
 CPackConfig.cmake
 CPackSourceConfig.cmake
 CTestTestfile.cmake
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 00000000000..f08786dd641
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "libmariadb"]
+	path = libmariadb
+	url = https://github.com/MariaDB/mariadb-connector-c
diff --git a/BUILD/SETUP.sh b/BUILD/SETUP.sh
index 36072ebaa7e..01d654dba53 100755
--- a/BUILD/SETUP.sh
+++ b/BUILD/SETUP.sh
@@ -170,7 +170,7 @@ debug_cflags="-DEXTRA_DEBUG -DSAFE_MUTEX -DSAFEMALLOC"
 error_inject="--with-error-inject "
 #
 # Base C++ flags for all builds
-base_cxxflags="-felide-constructors -fno-exceptions -fno-rtti"
+base_cxxflags="-felide-constructors -fexceptions -fno-rtti"
 #
 # Flags for optimizing builds.
 # Be as fast as we can be without losing our ability to backtrace.
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 48742995722..f35136f00f9 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -31,6 +31,9 @@ IF(CMAKE_VERSION VERSION_EQUAL "3.0.0" OR
  CMAKE_POLICY(SET CMP0045 OLD)
  CMAKE_POLICY(SET CMP0042 OLD)
 ENDIF()
+IF(POLICY CMP0054)
+  CMAKE_POLICY(SET CMP0054 NEW)
+ENDIF()
 
 MESSAGE(STATUS "Running cmake version ${CMAKE_VERSION}")
 
@@ -112,6 +115,11 @@ FOREACH(_base
   ENDIF()
 ENDFOREACH()
 
+FOREACH(tool gtar tar git)
+ STRING(TOUPPER ${tool}  TOOL)
+ FIND_PROGRAM(${TOOL}_EXECUTABLE ${tool} DOC "path to the executable")
+ MARK_AS_ADVANCED(${TOOL}_EXECUTABLE)
+ENDFOREACH()
 
 # Following autotools tradition, add preprocessor definitions
 # specified in environment variable CPPFLAGS
@@ -364,7 +372,7 @@ ADD_SUBDIRECTORY(strings)
 ADD_SUBDIRECTORY(vio)
 ADD_SUBDIRECTORY(mysys)
 ADD_SUBDIRECTORY(mysys_ssl)
-ADD_SUBDIRECTORY(libmysql)
+INCLUDE(mariadb_connector_c) # this does ADD_SUBDIRECTORY(libmariadb)
 ADD_SUBDIRECTORY(client)
 ADD_SUBDIRECTORY(extra)
 ADD_SUBDIRECTORY(libservices)
diff --git a/client/CMakeLists.txt b/client/CMakeLists.txt
index e4643ad9358..e0d34b9f5ed 100644
--- a/client/CMakeLists.txt
+++ b/client/CMakeLists.txt
@@ -19,20 +19,25 @@ INCLUDE_DIRECTORIES(
   ${CMAKE_SOURCE_DIR}/mysys_ssl
   ${ZLIB_INCLUDE_DIR}
   ${SSL_INCLUDE_DIRS}
-  ${CMAKE_SOURCE_DIR}/libmysql
   ${CMAKE_SOURCE_DIR}/sql
   ${CMAKE_SOURCE_DIR}/strings
   ${MY_READLINE_INCLUDE_DIR}
   ${CMAKE_CURRENT_BINARY_DIR}
 )
 
+INCLUDE_DIRECTORIES(BEFORE
+  ${CMAKE_BINARY_DIR}/libmariadb/include
+  ${CMAKE_SOURCE_DIR}/libmariadb/include)
+
 ## We will need libeay32.dll and ssleay32.dll when running client executables.
 COPY_OPENSSL_DLLS(copy_openssl_client)
 
+SET(CLIENT_LIB mariadbclient mysys)
+
 ADD_DEFINITIONS(${SSL_DEFINES})
 MYSQL_ADD_EXECUTABLE(mysql completion_hash.cc mysql.cc readline.cc
                            ${CMAKE_SOURCE_DIR}/sql/sql_string.cc)
-TARGET_LINK_LIBRARIES(mysql mysqlclient)
+TARGET_LINK_LIBRARIES(mysql ${CLIENT_LIB})
 IF(UNIX)
   TARGET_LINK_LIBRARIES(mysql ${MY_READLINE_LIBRARY})
   SET_TARGET_PROPERTIES(mysql PROPERTIES ENABLE_EXPORTS TRUE)
@@ -40,39 +45,40 @@ ENDIF(UNIX)
 
 MYSQL_ADD_EXECUTABLE(mysqltest mysqltest.cc COMPONENT Test)
 SET_SOURCE_FILES_PROPERTIES(mysqltest.cc PROPERTIES COMPILE_FLAGS "-DTHREADS")
-TARGET_LINK_LIBRARIES(mysqltest mysqlclient pcre pcreposix)
+TARGET_LINK_LIBRARIES(mysqltest  ${CLIENT_LIB} pcre pcreposix)
 SET_TARGET_PROPERTIES(mysqltest PROPERTIES ENABLE_EXPORTS TRUE)
 
 
 MYSQL_ADD_EXECUTABLE(mysqlcheck mysqlcheck.c)
-TARGET_LINK_LIBRARIES(mysqlcheck mysqlclient)
+TARGET_LINK_LIBRARIES(mysqlcheck ${CLIENT_LIB})
 
 MYSQL_ADD_EXECUTABLE(mysqldump mysqldump.c ../sql-common/my_user.c)
-TARGET_LINK_LIBRARIES(mysqldump mysqlclient)
+TARGET_LINK_LIBRARIES(mysqldump ${CLIENT_LIB})
+
 
 MYSQL_ADD_EXECUTABLE(mysqlimport mysqlimport.c)
 SET_SOURCE_FILES_PROPERTIES(mysqlimport.c PROPERTIES COMPILE_FLAGS "-DTHREADS")
-TARGET_LINK_LIBRARIES(mysqlimport mysqlclient)
+TARGET_LINK_LIBRARIES(mysqlimport ${CLIENT_LIB})
 
 MYSQL_ADD_EXECUTABLE(mysql_upgrade mysql_upgrade.c COMPONENT Server)
-TARGET_LINK_LIBRARIES(mysql_upgrade mysqlclient)
+TARGET_LINK_LIBRARIES(mysql_upgrade ${CLIENT_LIB})
 ADD_DEPENDENCIES(mysql_upgrade GenFixPrivs)
 
 MYSQL_ADD_EXECUTABLE(mysqlshow mysqlshow.c)
-TARGET_LINK_LIBRARIES(mysqlshow mysqlclient)
+TARGET_LINK_LIBRARIES(mysqlshow ${CLIENT_LIB})
 
 MYSQL_ADD_EXECUTABLE(mysql_plugin mysql_plugin.c)
-TARGET_LINK_LIBRARIES(mysql_plugin mysqlclient)
+TARGET_LINK_LIBRARIES(mysql_plugin ${CLIENT_LIB})
 
 MYSQL_ADD_EXECUTABLE(mysqlbinlog mysqlbinlog.cc)
-TARGET_LINK_LIBRARIES(mysqlbinlog mysqlclient)
+TARGET_LINK_LIBRARIES(mysqlbinlog ${CLIENT_LIB})
 
-MYSQL_ADD_EXECUTABLE(mysqladmin mysqladmin.cc)
-TARGET_LINK_LIBRARIES(mysqladmin mysqlclient)
+MYSQL_ADD_EXECUTABLE(mysqladmin mysqladmin.cc ../sql/password.c)
+TARGET_LINK_LIBRARIES(mysqladmin ${CLIENT_LIB})
 
 MYSQL_ADD_EXECUTABLE(mysqlslap mysqlslap.c)
 SET_SOURCE_FILES_PROPERTIES(mysqlslap.c PROPERTIES COMPILE_FLAGS "-DTHREADS")
-TARGET_LINK_LIBRARIES(mysqlslap mysqlclient)
+TARGET_LINK_LIBRARIES(mysqlslap ${CLIENT_LIB})
 
 # "WIN32" also covers 64 bit. "echo" is used in some files below "mysql-test/".
 IF(WIN32)
@@ -81,10 +87,16 @@ ENDIF(WIN32)
 
 # async_example is just a code example, do not install it.
 ADD_EXECUTABLE(async_example async_example.c)
-TARGET_LINK_LIBRARIES(async_example mysqlclient)
+TARGET_LINK_LIBRARIES(async_example ${CLIENT_LIB})
 
 SET_TARGET_PROPERTIES (mysqlcheck mysqldump mysqlimport mysql_upgrade mysqlshow mysqlslap mysql_plugin async_example
 PROPERTIES HAS_CXX TRUE)
 
+
+FOREACH(t mysql mysqltest mysqltest mysqlcheck mysqldump mysqlimport mysql_upgrade mysqlshow mysql_plugin mysqlbinlog
+  mysqladmin mysqlslap async_example)
+  ADD_DEPENDENCIES(${t} GenError ${CLIENT_LIB})
+ENDFOREACH()
+
 ADD_DEFINITIONS(-DHAVE_DLOPEN)
 
diff --git a/client/client_priv.h b/client/client_priv.h
index c0c4954cdf0..1d85791fa73 100644
--- a/client/client_priv.h
+++ b/client/client_priv.h
@@ -24,6 +24,7 @@
 #include <mysql.h>
 #include <errmsg.h>
 #include <my_getopt.h>
+#include <mysql_version.h>
 
 #ifndef WEXITSTATUS
 # ifdef __WIN__
diff --git a/client/mysql.cc b/client/mysql.cc
index 2cc818bd6c1..65b7c192595 100644
--- a/client/mysql.cc
+++ b/client/mysql.cc
@@ -1143,6 +1143,9 @@ int main(int argc,char *argv[])
 
   outfile[0]=0;			// no (default) outfile
   strmov(pager, "stdout");	// the default, if --pager wasn't given
+
+  mysql_init(&mysql);
+
   {
     char *tmp=getenv("PAGER");
     if (tmp && strlen(tmp))
@@ -1203,7 +1206,6 @@ int main(int argc,char *argv[])
   glob_buffer.realloc(512);
   completion_hash_init(&ht, 128);
   init_alloc_root(&hash_mem_root, 16384, 0, MYF(0));
-  bzero((char*) &mysql, sizeof(mysql));
   if (sql_connect(current_host,current_db,current_user,opt_password,
 		  opt_silent))
   {
@@ -4641,12 +4643,13 @@ sql_real_connect(char *host,char *database,char *user,char *password,
     }
     return -1;					// Retryable
   }
-  
-  charset_info= mysql.charset;
+
+  charset_info= get_charset_by_name(mysql.charset->name, MYF(0));
+
   
   connected=1;
 #ifndef EMBEDDED_LIBRARY
-  mysql.reconnect= debug_info_flag; // We want to know if this happens
+  mysql_options(&mysql, MYSQL_OPT_RECONNECT, &debug_info_flag);
 
   /*
     CLIENT_PROGRESS_OBSOLETE is set only if we requested it in
@@ -4655,7 +4658,10 @@ sql_real_connect(char *host,char *database,char *user,char *password,
   if (mysql.client_flag & CLIENT_PROGRESS_OBSOLETE)
     mysql_options(&mysql, MYSQL_PROGRESS_CALLBACK, (void*) report_progress);
 #else
-  mysql.reconnect= 1;
+  {
+    my_bool reconnect= 1;
+    mysql_options(&mysql, MYSQL_OPT_RECONNECT, &reconnect);
+  }
 #endif
 #ifdef HAVE_READLINE
   build_completion_hash(opt_rehash, 1);
diff --git a/client/mysql_plugin.c b/client/mysql_plugin.c
index ebf04c9a8c3..72fa9485c6c 100644
--- a/client/mysql_plugin.c
+++ b/client/mysql_plugin.c
@@ -20,6 +20,7 @@
 #include <mysql.h>
 #include <my_getopt.h>
 #include <my_dir.h>
+#include <mysql_version.h>
 
 #define SHOW_VERSION "1.0.0"
 #define PRINT_VERSION do { printf("%s  Ver %s Distrib %s\n",    \
diff --git a/client/mysqladmin.cc b/client/mysqladmin.cc
index fe9db6ea93d..d186a4c7fcc 100644
--- a/client/mysqladmin.cc
+++ b/client/mysqladmin.cc
@@ -22,9 +22,10 @@
 #include <my_pthread.h>				/* because of signal()	*/
 #include <sys/stat.h>
 #include <mysql.h>
-#include <sql_common.h>
+#include <mysql_version.h>
 #include <welcome_copyright_notice.h>
 #include <my_rnd.h>
+#include <password.h>
 
 #define ADMIN_VERSION "9.1"
 #define MAX_MYSQL_VAR 512
@@ -440,7 +441,7 @@ int main(int argc,char *argv[])
           didn't signal for us to die. Otherwise, signal failure.
         */
 
-	if (mysql.net.vio == 0)
+	if (mysql.net.pvio == 0)
 	{
 	  if (option_wait && !interrupted)
 	  {
@@ -521,7 +522,8 @@ static my_bool sql_connect(MYSQL *mysql, uint wait)
     if (mysql_real_connect(mysql,host,user,opt_password,NullS,tcp_port,
 			   unix_port, CLIENT_REMEMBER_OPTIONS))
     {
-      mysql->reconnect= 1;
+      my_bool reconnect= 1;
+      mysql_options(mysql, MYSQL_OPT_RECONNECT, &reconnect);
       if (info)
       {
 	fputs("\n",stderr);
@@ -1077,9 +1079,9 @@ static int execute_commands(MYSQL *mysql,int argc, char **argv)
           }
         }
         if (old)
-          make_scrambled_password_323(crypted_pw, typed_password);
+          my_make_scrambled_password_323(crypted_pw, typed_password, strlen(typed_password));
         else
-          make_scrambled_password(crypted_pw, typed_password);
+          my_make_scrambled_password(crypted_pw, typed_password, strlen(typed_password));
       }
       else
 	crypted_pw[0]=0;			/* No password */
@@ -1187,7 +1189,9 @@ static int execute_commands(MYSQL *mysql,int argc, char **argv)
       break;
     }
     case ADMIN_PING:
-      mysql->reconnect=0;	/* We want to know of reconnects */
+    {
+      my_bool reconnect= 0;
+      mysql_options(mysql, MYSQL_OPT_RECONNECT, &reconnect);
       if (!mysql_ping(mysql))
       {
 	if (option_silent < 2)
@@ -1197,7 +1201,8 @@ static int execute_commands(MYSQL *mysql,int argc, char **argv)
       {
 	if (mysql_errno(mysql) == CR_SERVER_GONE_ERROR)
 	{
-	  mysql->reconnect=1;
+          reconnect= 1;
+          mysql_options(mysql, MYSQL_OPT_RECONNECT, &reconnect);
 	  if (!mysql_ping(mysql))
 	    puts("connection was down, but mysqld is now alive");
 	}
@@ -1208,8 +1213,10 @@ static int execute_commands(MYSQL *mysql,int argc, char **argv)
 	  return -1;
 	}
       }
-      mysql->reconnect=1;	/* Automatic reconnect is default */
+      reconnect=1;	/* Automatic reconnect is default */
+      mysql_options(mysql, MYSQL_OPT_RECONNECT, &reconnect);
       break;
+    }
     default:
       my_printf_error(0, "Unknown command: '%-.60s'", error_flags, argv[0]);
       return 1;
diff --git a/client/mysqlbinlog.cc b/client/mysqlbinlog.cc
index fa75c423d1b..1a184e2ffe8 100644
--- a/client/mysqlbinlog.cc
+++ b/client/mysqlbinlog.cc
@@ -52,14 +52,18 @@
 
 #include <algorithm>
 
+#define my_net_write ma_net_write
+#define net_flush ma_net_flush
+#define cli_safe_read mysql_net_read_packet
+#define my_net_read ma_net_read
+extern "C" unsigned char *mysql_net_store_length(unsigned char *packet, size_t length);
+#define net_store_length mysql_net_store_length
+
 Rpl_filter *binlog_filter= 0;
 
 #define BIN_LOG_HEADER_SIZE	4
 #define PROBE_HEADER_LEN	(EVENT_LEN_OFFSET+4)
 
-
-#define CLIENT_CAPABILITIES	(CLIENT_LONG_PASSWORD | CLIENT_LONG_FLAG | CLIENT_LOCAL_FILES)
-
 /* Needed for Rpl_filter */
 CHARSET_INFO* system_charset_info= &my_charset_utf8_general_ci;
 
@@ -1764,6 +1768,7 @@ static int parse_args(int *argc, char*** argv)
 */
 static Exit_status safe_connect()
 {
+  my_bool reconnect= 1;
   /* Close any old connections to MySQL */
   if (mysql)
     mysql_close(mysql);
@@ -1809,7 +1814,7 @@ static Exit_status safe_connect()
     error("Failed on connect: %s", mysql_error(mysql));
     return ERROR_STOP;
   }
-  mysql->reconnect= 1;
+  mysql_options(mysql, MYSQL_OPT_RECONNECT, &reconnect);
   return OK_CONTINUE;
 }
 
@@ -2843,6 +2848,8 @@ struct encryption_service_st encryption_handler=
 #include "my_decimal.h"
 #include "decimal.c"
 #include "my_decimal.cc"
+#include "../sql-common/my_time.c"
+#include "password.c"
 #include "log_event.cc"
 #include "log_event_old.cc"
 #include "rpl_utility.cc"
diff --git a/client/mysqlcheck.c b/client/mysqlcheck.c
index 3a6f5462ced..c1ea002515b 100644
--- a/client/mysqlcheck.c
+++ b/client/mysqlcheck.c
@@ -1073,6 +1073,7 @@ static void print_result()
 
 static int dbConnect(char *host, char *user, char *passwd)
 {
+  my_bool reconnect= 1;
   DBUG_ENTER("dbConnect");
   if (verbose > 1)
   {
@@ -1111,7 +1112,7 @@ static int dbConnect(char *host, char *user, char *passwd)
     DBerror(&mysql_connection, "when trying to connect");
     DBUG_RETURN(1);
   }
-  mysql_connection.reconnect= 1;
+  mysql_options(&mysql_connection, MYSQL_OPT_RECONNECT, &reconnect);
   DBUG_RETURN(0);
 } /* dbConnect */
 
diff --git a/client/mysqldump.c b/client/mysqldump.c
index 60f244bf591..acb72a12bf3 100644
--- a/client/mysqldump.c
+++ b/client/mysqldump.c
@@ -1643,6 +1643,7 @@ static void maybe_exit(int error)
 static int connect_to_db(char *host, char *user,char *passwd)
 {
   char buff[20+FN_REFLEN];
+  my_bool reconnect;
   DBUG_ENTER("connect_to_db");
 
   verbose_msg("-- Connecting to %s...\n", host ? host : "localhost");
@@ -1697,7 +1698,8 @@ static int connect_to_db(char *host, char *user,char *passwd)
     As we're going to set SQL_MODE, it would be lost on reconnect, so we
     cannot reconnect.
   */
-  mysql->reconnect= 0;
+  reconnect= 0;
+  mysql_options(&mysql_connection, MYSQL_OPT_RECONNECT, &reconnect);
   my_snprintf(buff, sizeof(buff), "/*!40100 SET @@SQL_MODE='%s' */",
               compatible_mode_normal_str);
   if (mysql_query_with_error_report(mysql, 0, buff))
diff --git a/client/mysqlimport.c b/client/mysqlimport.c
index aee445d387d..688789ec436 100644
--- a/client/mysqlimport.c
+++ b/client/mysqlimport.c
@@ -420,6 +420,7 @@ static MYSQL *db_connect(char *host, char *database,
                          char *user, char *passwd)
 {
   MYSQL *mysql;
+  my_bool reconnect;
   if (verbose)
     fprintf(stdout, "Connecting to %s\n", host ? host : "localhost");
   if (opt_use_threads && !lock_tables)
@@ -475,7 +476,8 @@ static MYSQL *db_connect(char *host, char *database,
     ignore_errors=0;	  /* NO RETURN FROM db_error */
     db_error(mysql);
   }
-  mysql->reconnect= 0;
+  reconnect= 0;
+  mysql_options(mysql, MYSQL_OPT_RECONNECT, &reconnect);
   if (verbose)
     fprintf(stdout, "Selecting database %s\n", database);
   if (mysql_select_db(mysql, database))
diff --git a/client/mysqlshow.c b/client/mysqlshow.c
index eec4a8d3268..e3e30abc426 100644
--- a/client/mysqlshow.c
+++ b/client/mysqlshow.c
@@ -68,6 +68,7 @@ int main(int argc, char **argv)
   my_bool first_argument_uses_wildcards=0;
   char *wild;
   MYSQL mysql;
+  my_bool reconnect;
   static char **defaults_argv;
   MY_INIT(argv[0]);
   sf_leaking_memory=1; /* don't report memory leaks on early exits */
@@ -155,7 +156,8 @@ int main(int argc, char **argv)
     error= 1;
     goto error;
   }
-  mysql.reconnect= 1;
+  reconnect= 1;
+  mysql_options(&mysql, MYSQL_OPT_RECONNECT, &reconnect);
 
   switch (argc) {
   case 0:  error=list_dbs(&mysql,wild); break;
diff --git a/client/mysqltest.cc b/client/mysqltest.cc
index b802642a02c..cae33fd5522 100644
--- a/client/mysqltest.cc
+++ b/client/mysqltest.cc
@@ -191,6 +191,8 @@ static char global_subst_from[200];
 static char global_subst_to[200];
 static char *global_subst= NULL;
 static MEM_ROOT require_file_root;
+static const my_bool my_true= 1;
+static const my_bool my_false= 0;
 
 /* Block stack */
 enum block_cmd {
@@ -5406,18 +5408,6 @@ static char *get_string(char **to_ptr, char **from_ptr,
 }
 
 
-void set_reconnect(MYSQL* mysql, my_bool val)
-{
-  my_bool reconnect= val;
-  DBUG_ENTER("set_reconnect");
-  DBUG_PRINT("info", ("val: %d", (int) val));
-#if MYSQL_VERSION_ID < 50000
-  mysql->reconnect= reconnect;
-#else
-  mysql_options(mysql, MYSQL_OPT_RECONNECT, (char *)&reconnect);
-#endif
-  DBUG_VOID_RETURN;
-}
 
 
 /**
@@ -5502,11 +5492,7 @@ void do_close_connection(struct st_command *command)
 #ifndef EMBEDDED_LIBRARY
   if (command->type == Q_DIRTY_CLOSE)
   {
-    if (con->mysql->net.vio)
-    {
-      vio_delete(con->mysql->net.vio);
-      con->mysql->net.vio = 0;
-    }
+    mariadb_cancel(con->mysql);
   }
 #endif /*!EMBEDDED_LIBRARY*/
   if (con->stmt)
@@ -8231,10 +8217,18 @@ end:
   revert_properties();
 
   /* Close the statement if reconnect, need new prepare */
-  if (mysql->reconnect)
   {
-    mysql_stmt_close(stmt);
-    cn->stmt= NULL;
+#ifndef EMBEDDED_LIBRARY
+    my_bool reconnect;
+    mysql_get_option(mysql, MYSQL_OPT_RECONNECT, &reconnect);
+    if (reconnect)
+#else
+    if (mysql->reconnect)
+#endif
+    {
+      mysql_stmt_close(stmt);
+      cn->stmt= NULL;
+    }
   }
 
   DBUG_VOID_RETURN;
@@ -8766,7 +8760,7 @@ static void dump_backtrace(void)
 #endif
   }
   fputs("Attempting backtrace...\n", stderr);
-  my_print_stacktrace(NULL, my_thread_stack_size);
+  my_print_stacktrace(NULL, (ulong)my_thread_stack_size);
 }
 
 #else
@@ -9409,10 +9403,10 @@ int main(int argc, char **argv)
         non_blocking_api_enabled= 1;
         break;
       case Q_DISABLE_RECONNECT:
-        set_reconnect(cur_con->mysql, 0);
+        mysql_options(cur_con->mysql, MYSQL_OPT_RECONNECT, &my_false);
         break;
       case Q_ENABLE_RECONNECT:
-        set_reconnect(cur_con->mysql, 1);
+        mysql_options(cur_con->mysql, MYSQL_OPT_RECONNECT, &my_true);
         /* Close any open statements - no reconnect, need new prepare */
         close_statements();
         break;
diff --git a/cmake/CPackRPM.cmake b/cmake/CPackRPM.cmake
index 1d22387d098..3b57decc5bb 100644
--- a/cmake/CPackRPM.cmake
+++ b/cmake/CPackRPM.cmake
@@ -2,12 +2,28 @@
 # Wrapper for CPackRPM.cmake
 #
 
+macro(set_from_component WHAT)
+  set(orig_CPACK_RPM_PACKAGE_${WHAT} ${CPACK_RPM_PACKAGE_${WHAT}})
+  if(CPACK_RPM_${CPACK_RPM_PACKAGE_COMPONENT}_PACKAGE_${WHAT})
+    set(CPACK_RPM_PACKAGE_${WHAT} ${CPACK_RPM_${CPACK_RPM_PACKAGE_COMPONENT}_PACKAGE_${WHAT}})
+  endif()
+endmacro()
+macro(restore WHAT)
+  set(CPACK_RPM_PACKAGE_${WHAT} ${orig_CPACK_RPM_PACKAGE_${WHAT}})
+endmacro()
+
+set_from_component(LICENSE)
+set_from_component(VENDOR)
+
 # load the original CPackRPM.cmake
 set(orig_CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH})
 unset(CMAKE_MODULE_PATH)
 include(CPackRPM)
 set(CMAKE_MODULE_PATH ${orig_CMAKE_MODULE_PATH})
 
+restore(LICENSE)
+restore(VENDOR)
+
 # per-component cleanup
 foreach(_RPM_SPEC_HEADER URL REQUIRES SUGGESTS PROVIDES OBSOLETES PREFIX CONFLICTS AUTOPROV AUTOREQ AUTOREQPROV)
   unset(TMP_RPM_${_RPM_SPEC_HEADER})
diff --git a/cmake/FindJNI.cmake b/cmake/FindJNI.cmake
new file mode 100644
index 00000000000..fb2f4801a70
--- /dev/null
+++ b/cmake/FindJNI.cmake
@@ -0,0 +1,8 @@
+if(DEFINED JAVA_AWT_LIBRARY)
+  return()
+endif()
+
+set(orig_CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH})
+unset(CMAKE_MODULE_PATH)
+include(FindJNI)
+set(CMAKE_MODULE_PATH ${orig_CMAKE_MODULE_PATH})
diff --git a/cmake/FindJava.cmake b/cmake/FindJava.cmake
new file mode 100644
index 00000000000..95bbf8682cd
--- /dev/null
+++ b/cmake/FindJava.cmake
@@ -0,0 +1,8 @@
+if(DEFINED Java_JAVA_EXECUTABLE)
+  return()
+endif()
+
+set(orig_CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH})
+unset(CMAKE_MODULE_PATH)
+include(FindJava)
+set(CMAKE_MODULE_PATH ${orig_CMAKE_MODULE_PATH})
diff --git a/cmake/compile_flags.cmake b/cmake/compile_flags.cmake
new file mode 100644
index 00000000000..b39bf7b79d6
--- /dev/null
+++ b/cmake/compile_flags.cmake
@@ -0,0 +1,44 @@
+# Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved.
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
+
+
+## ADD_COMPILE_FLAGS(<source files> COMPILE_FLAGS <flags>)
+MACRO(ADD_COMPILE_FLAGS)
+  SET(FILES "")
+  SET(FLAGS "")
+  SET(COMPILE_FLAGS_SEEN)
+  FOREACH(ARG ${ARGV})
+    IF(ARG STREQUAL "COMPILE_FLAGS")
+      SET(COMPILE_FLAGS_SEEN 1)
+    ELSEIF(COMPILE_FLAGS_SEEN)
+      LIST(APPEND FLAGS ${ARG})
+    ELSE()
+      LIST(APPEND FILES ${ARG})
+    ENDIF()
+  ENDFOREACH()
+  FOREACH(FILE ${FILES})
+    FOREACH(FLAG ${FLAGS})
+      GET_SOURCE_FILE_PROPERTY(PROP ${FILE} COMPILE_FLAGS)
+      IF(NOT PROP)
+        SET(PROP ${FLAG})
+      ELSE()
+        SET(PROP "${PROP} ${FLAG}")
+      ENDIF()
+      SET_SOURCE_FILES_PROPERTIES(
+        ${FILE} PROPERTIES COMPILE_FLAGS "${PROP}"
+        )
+    ENDFOREACH()
+  ENDFOREACH()
+ENDMACRO()
diff --git a/cmake/cpack_rpm.cmake b/cmake/cpack_rpm.cmake
index 0e0a121dbb8..6e3cf6d7ce0 100644
--- a/cmake/cpack_rpm.cmake
+++ b/cmake/cpack_rpm.cmake
@@ -48,6 +48,20 @@ MariaDB bug reports should be submitted through https://jira.mariadb.org
 
 ")
 
+SET(CPACK_RPM_shared_PACKAGE_VENDOR "MariaDB Corporation Ab")
+SET(CPACK_RPM_shared_PACKAGE_LICENSE "LGPLv2.1")
+SET(CPACK_RPM_shared_PACKAGE_SUMMARY "LGPL MariaDB client library")
+SET(CPACK_RPM_shared_PACKAGE_DESCRIPTION "
+This is LGPL MariaDB client library that can be used to connect to MySQL
+or MariaDB.
+
+This code is based on the LGPL libmysql client library from MySQL 3.23
+and PHP's mysqlnd extension.
+
+This product includes PHP software, freely available from
+<http://www.php.net/software/>
+")
+
 SET(CPACK_RPM_SPEC_MORE_DEFINE "
 %define mysql_vendor ${CPACK_PACKAGE_VENDOR}
 %define mysqlversion ${MYSQL_NO_DASH_VERSION}
@@ -108,15 +122,13 @@ ENDMACRO(SETA)
 
 SETA(CPACK_RPM_client_PACKAGE_OBSOLETES
   "mysql-client"
-  "MySQL-client"
-  "MySQL-OurDelta-client")
+  "MySQL-client")
 SETA(CPACK_RPM_client_PACKAGE_PROVIDES
   "MySQL-client"
   "mysql-client")
 
 SETA(CPACK_RPM_devel_PACKAGE_OBSOLETES
-  "MySQL-devel"
-  "MySQL-OurDelta-devel")
+  "MySQL-devel")
 SETA(CPACK_RPM_devel_PACKAGE_PROVIDES
   "MySQL-devel")
 
@@ -125,8 +137,7 @@ SETA(CPACK_RPM_server_PACKAGE_OBSOLETES
   "MySQL"
   "mysql-server"
   "MySQL-server"
-  "MariaDB-Galera-server"
-  "MySQL-OurDelta-server")
+  "MariaDB-Galera-server")
 SETA(CPACK_RPM_server_PACKAGE_PROVIDES
   "MariaDB"
   "MySQL"
@@ -134,22 +145,20 @@ SETA(CPACK_RPM_server_PACKAGE_PROVIDES
   "msqlormysql"
   "mysql-server")
 
-SETA(CPACK_RPM_shared_PACKAGE_OBSOLETES
+SETA(CPACK_RPM_compat_PACKAGE_OBSOLETES
   "mysql-shared"
   "MySQL-shared-standard"
   "MySQL-shared-pro"
   "MySQL-shared-pro-cert"
   "MySQL-shared-pro-gpl"
   "MySQL-shared-pro-gpl-cert"
-  "MySQL-shared"
-  "MySQL-OurDelta-shared")
-SETA(CPACK_RPM_shared_PACKAGE_PROVIDES
+  "MySQL-shared")
+SETA(CPACK_RPM_compat_PACKAGE_PROVIDES
   "MySQL-shared"
   "mysql-shared")
 
 SETA(CPACK_RPM_test_PACKAGE_OBSOLETES
-  "MySQL-test"
-  "MySQL-OurDelta-test")
+  "MySQL-test")
 SETA(CPACK_RPM_test_PACKAGE_PROVIDES
   "MySQL-test")
 
@@ -169,6 +178,8 @@ SET(CPACK_RPM_server_POST_INSTALL_SCRIPT_FILE ${CMAKE_SOURCE_DIR}/support-files/
 SET(CPACK_RPM_server_POST_UNINSTALL_SCRIPT_FILE ${CMAKE_SOURCE_DIR}/support-files/rpm/server-postun.sh)
 SET(CPACK_RPM_shared_POST_INSTALL_SCRIPT_FILE ${CMAKE_SOURCE_DIR}/support-files/rpm/shared-post.sh)
 SET(CPACK_RPM_shared_POST_UNINSTALL_SCRIPT_FILE ${CMAKE_SOURCE_DIR}/support-files/rpm/shared-post.sh)
+SET(CPACK_RPM_compat_POST_INSTALL_SCRIPT_FILE ${CMAKE_SOURCE_DIR}/support-files/rpm/shared-post.sh)
+SET(CPACK_RPM_compat_POST_UNINSTALL_SCRIPT_FILE ${CMAKE_SOURCE_DIR}/support-files/rpm/shared-post.sh)
 
 MACRO(ALTERNATIVE_NAME real alt)
   SET(ver "%{version}-%{release}")
@@ -236,18 +247,32 @@ SETA(CPACK_RPM_test_PACKAGE_PROVIDES
 
 # If we want to build build MariaDB-shared-compat,
 # extract compat libraries from MariaDB-shared-5.3 rpm
-FILE(GLOB compat_rpm RELATIVE ${CMAKE_SOURCE_DIR}
+FILE(GLOB compat53 RELATIVE ${CMAKE_SOURCE_DIR}
     "${CMAKE_SOURCE_DIR}/../MariaDB-shared-5.3.*.rpm")
-IF (compat_rpm)
-  MESSAGE("Using ${compat_rpm} to build MariaDB-compat")
-  INSTALL(CODE "EXECUTE_PROCESS(
-                 COMMAND rpm2cpio ${CMAKE_SOURCE_DIR}/${compat_rpm}
-                 COMMAND cpio --extract --make-directories */libmysqlclient*.so.* -
-                 WORKING_DIRECTORY \$ENV{DESTDIR})
-                EXECUTE_PROCESS(
-                 COMMAND chmod -R a+rX .
-                 WORKING_DIRECTORY \$ENV{DESTDIR})"
-                 COMPONENT Compat)
+FILE(GLOB compat101 RELATIVE ${CMAKE_SOURCE_DIR}
+    "${CMAKE_SOURCE_DIR}/../MariaDB-shared-10.1.*.rpm")
+IF(compat53 AND compat101)
+  FOREACH(compat_rpm "${compat53}" "${compat101}")
+    MESSAGE("Using ${compat_rpm} to build MariaDB-compat")
+    INSTALL(CODE "EXECUTE_PROCESS(
+                   COMMAND rpm2cpio ${CMAKE_SOURCE_DIR}/${compat_rpm}
+                   COMMAND cpio --extract --make-directories */libmysqlclient*.so.* -
+                   WORKING_DIRECTORY \$ENV{DESTDIR})
+                  EXECUTE_PROCESS(
+                   COMMAND chmod -R a+rX .
+                   WORKING_DIRECTORY \$ENV{DESTDIR})"
+                   COMPONENT Compat)
+  ENDFOREACH()
+
+  EXECUTE_PROCESS(
+    COMMAND rpm -q --provides -p "${CMAKE_SOURCE_DIR}/${compat101}"
+    COMMAND grep "=.*10\\.1"
+    ERROR_QUIET
+    OUTPUT_VARIABLE compat_provides)
+
+  STRING(REPLACE "\n" " " compat_provides "${compat_provides}")
+  SETA(CPACK_RPM_compat_PACKAGE_PROVIDES "${compat_provides}")
+
   SET(CPACK_COMPONENTS_ALL ${CPACK_COMPONENTS_ALL} Compat)
 
   # RHEL6/CentOS6 install Postfix by default, and it requires
@@ -258,10 +283,7 @@ IF (compat_rpm)
   IF(RPM MATCHES "(rhel|centos)6")
     SET(CPACK_RPM_common_PACKAGE_REQUIRES "MariaDB-compat")
   ENDIF()
-ENDIF(compat_rpm)
-
-SET(CPACK_RPM_compat_PACKAGE_PROVIDES "mysql-libs = 5.3.5") # exact version doesn't matter as long as it greater than 5.1
-SET(CPACK_RPM_compat_PACKAGE_OBSOLETES "mysql-libs < 5.3.5")
+ENDIF()
 
 ENDIF(RPM)
 
diff --git a/cmake/libutils.cmake b/cmake/libutils.cmake
index bcba924dfa8..0965ec8cb83 100644
--- a/cmake/libutils.cmake
+++ b/cmake/libutils.cmake
@@ -58,13 +58,13 @@ IF(WIN32 OR CYGWIN OR APPLE OR WITH_PIC OR DISABLE_SHARED OR NOT CMAKE_SHARED_LI
 ENDIF()
 
 INCLUDE(CMakeParseArguments)
-# CREATE_EXPORT_FILE (VAR target api_functions)
+# CREATE_EXPORTS_FILE (VAR target api_functions)
 # Internal macro, used to create source file for shared libraries that 
 # otherwise consists entirely of "convenience" libraries. On Windows, 
 # also exports API functions as dllexport. On unix, creates a dummy file 
 # that references all exports and this prevents linker from creating an 
 # empty library(there are unportable alternatives, --whole-archive)
-MACRO(CREATE_EXPORT_FILE VAR TARGET API_FUNCTIONS)
+MACRO(CREATE_EXPORTS_FILE VAR TARGET API_FUNCTIONS)
   IF(WIN32)
     SET(DUMMY ${CMAKE_CURRENT_BINARY_DIR}/${TARGET}_dummy.c)
     SET(EXPORTS ${CMAKE_CURRENT_BINARY_DIR}/${TARGET}_exports.def)
@@ -255,7 +255,7 @@ MACRO(MERGE_LIBRARIES)
         ENDIF()
       ENDFOREACH()
     ENDIF()
-    CREATE_EXPORT_FILE(SRC ${TARGET} "${ARG_EXPORTS}")
+    CREATE_EXPORTS_FILE(SRC ${TARGET} "${ARG_EXPORTS}")
     IF(NOT ARG_NOINSTALL)
       ADD_VERSION_INFO(${TARGET} SHARED SRC)
     ENDIF()
diff --git a/cmake/make_dist.cmake.in b/cmake/make_dist.cmake.in
index f35d16834b6..3cc93d10fb8 100644
--- a/cmake/make_dist.cmake.in
+++ b/cmake/make_dist.cmake.in
@@ -43,12 +43,12 @@ IF(GIT_EXECUTABLE)
   MESSAGE(STATUS "Running git checkout-index")
   EXECUTE_PROCESS(
     COMMAND "${GIT_EXECUTABLE}" checkout-index --all --prefix=${PACKAGE_DIR}/
+    COMMAND "${GIT_EXECUTABLE}" submodule foreach "${GIT_EXECUTABLE} checkout-index --all --prefix=${PACKAGE_DIR}/$path/"
     WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
     RESULT_VARIABLE RESULT
   )
-
   IF(NOT RESULT EQUAL 0)
-   SET(GIT_EXECUTABLE)
+    SET(GIT_EXECUTABLE)
   ENDIF()
 ENDIF()
 
diff --git a/cmake/mariadb_connector_c.cmake b/cmake/mariadb_connector_c.cmake
new file mode 100644
index 00000000000..400fcda0c1a
--- /dev/null
+++ b/cmake/mariadb_connector_c.cmake
@@ -0,0 +1,54 @@
+IF(NOT EXISTS ${CMAKE_SOURCE_DIR}/libmariadb/CMakeLists.txt AND GIT_EXECUTABLE)
+  EXECUTE_PROCESS(COMMAND "${GIT_EXECUTABLE}" submodule init
+                  WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}")
+  EXECUTE_PROCESS(COMMAND "${GIT_EXECUTABLE}" submodule update
+                  WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}")
+ENDIF()
+IF(NOT EXISTS ${CMAKE_SOURCE_DIR}/libmariadb/CMakeLists.txt)
+  MESSAGE(FATAL_ERROR "No MariaDB Connector/C! Run
+    git submodule init
+    git submodule update
+Then restart the build.
+")
+ENDIF()
+
+SET(OPT CONC_)
+
+IF (CMAKE_BUILD_TYPE STREQUAL "Debug")
+  SET(CONC_WITH_RTC ON)
+ENDIF()
+
+SET(CONC_WITH_SIGNCODE ${SIGNCODE})
+SET(SIGN_OPTIONS ${SIGNTOOL_PARAMETERS})
+
+IF(TARGET zlib)
+  GET_PROPERTY(ZLIB_LIBRARY_LOCATION TARGET zlib PROPERTY LOCATION)
+ELSE()
+  SET(ZLIB_LIBRARY_LOCATION ${ZLIB_LIBRARY})
+ENDIF()
+
+IF(SSL_DEFINES MATCHES "YASSL")
+  IF(WIN32)
+    SET(CONC_WITH_SSL "SCHANNEL")
+  ELSE()
+    SET(CONC_WITH_SSL "GNUTLS") # that's what debian wants, right?
+  ENDIF()
+ELSE()
+  SET(CONC_WITH_SSL "OPENSSL")
+  SET(OPENSSL_FOUND TRUE)
+ENDIF()
+
+SET(CONC_WITH_CURL OFF)
+SET(CONC_WITH_MYSQLCOMPAT ON)
+
+IF (INSTALL_LAYOUT STREQUAL "RPM")
+  SET(CONC_INSTALL_LAYOUT "RPM")
+ELSE()
+  SET(CONC_INSTALL_LAYOUT "DEFAULT")
+ENDIF()
+
+SET(PLUGIN_INSTALL_DIR ${INSTALL_PLUGINDIR})
+SET(MARIADB_UNIX_ADDR ${MYSQL_UNIX_ADDR})
+
+MESSAGE("== Configuring MariaDB Connector/C")
+ADD_SUBDIRECTORY(libmariadb)
diff --git a/cmake/misc.cmake b/cmake/misc.cmake
index c87fc41e8e3..928c33fd8d2 100644
--- a/cmake/misc.cmake
+++ b/cmake/misc.cmake
@@ -1,6 +1,13 @@
-FUNCTION(MESSAGE_ONCE id out)
-  IF(NOT __msg1_${id} STREQUAL "${out}")
+IF ("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}.${CMAKE_PATCH_VERSION}" VERSION_LESS "2.8.7")
+  FUNCTION(MESSAGE_ONCE id out)
     MESSAGE(STATUS "${out}")
-  ENDIF()
-  SET(__msg1_${id} "${out}" CACHE INTERNAL "")
-ENDFUNCTION()
+  ENDFUNCTION()
+ELSE()
+  FUNCTION(MESSAGE_ONCE id out)
+    STRING(MD5 hash "${out}")
+    IF(NOT __msg1_${id} STREQUAL "${hash}")
+      MESSAGE(STATUS "${out}")
+    ENDIF()
+    SET(__msg1_${id} ${hash} CACHE INTERNAL "")
+  ENDFUNCTION()
+ENDIF()
diff --git a/cmake/mysql_version.cmake b/cmake/mysql_version.cmake
index 1802b1a2678..5033c8b5213 100644
--- a/cmake/mysql_version.cmake
+++ b/cmake/mysql_version.cmake
@@ -17,7 +17,7 @@
 # Global constants, only to be changed between major releases.
 #
 
-SET(SHARED_LIB_MAJOR_VERSION "18")
+SET(SHARED_LIB_MAJOR_VERSION "19")
 SET(PROTOCOL_VERSION "10")
 SET(DOT_FRM_VERSION "6")
 
diff --git a/cmake/ssl.cmake b/cmake/ssl.cmake
index 0173cf026f1..9f2c8797d2c 100644
--- a/cmake/ssl.cmake
+++ b/cmake/ssl.cmake
@@ -111,9 +111,9 @@ MACRO (MYSQL_CHECK_SSL)
       UNSET(OPENSSL_APPLINK_C)
       UNSET(OPENSSL_APPLINK_C CACHE)
     ENDIF()
-    IF (OPENSSL_LIBRARIES)
-      UNSET(OPENSSL_LIBRARIES)
-      UNSET(OPENSSL_LIBRARIES CACHE)
+    IF (OPENSSL_SSL_LIBRARY)
+      UNSET(OPENSSL_SSL_LIBRARY)
+      UNSET(OPENSSL_SSL_LIBRARY CACHE)
     ENDIF()
   ELSEIF(WITH_SSL STREQUAL "system" OR
          WITH_SSL STREQUAL "yes" OR
@@ -149,13 +149,13 @@ MACRO (MYSQL_CHECK_SSL)
     IF (WITH_SSL_PATH)
       LIST(REVERSE CMAKE_FIND_LIBRARY_SUFFIXES)
     ENDIF()
-    FIND_LIBRARY(OPENSSL_LIBRARIES
+    FIND_LIBRARY(OPENSSL_SSL_LIBRARY
                  NAMES ssl ssleay32 ssleay32MD
                  HINTS ${OPENSSL_ROOT_DIR}/lib)
-    FIND_LIBRARY(CRYPTO_LIBRARY
+    FIND_LIBRARY(OPENSSL_CRYPTO_LIBRARY
                  NAMES crypto libeay32
                  HINTS ${OPENSSL_ROOT_DIR}/lib)
-    MARK_AS_ADVANCED(CRYPTO_LIBRARY OPENSSL_LIBRARIES OPENSSL_ROOT_DIR
+    MARK_AS_ADVANCED(OPENSSL_CRYPTO_LIBRARY OPENSSL_SSL_LIBRARY OPENSSL_ROOT_DIR
       OPENSSL_INCLUDE_DIR)
     IF (WITH_SSL_PATH)
       LIST(REVERSE CMAKE_FIND_LIBRARY_SUFFIXES)
@@ -166,10 +166,10 @@ MACRO (MYSQL_CHECK_SSL)
     CHECK_SYMBOL_EXISTS(SHA512_DIGEST_LENGTH "openssl/sha.h" 
                         HAVE_SHA512_DIGEST_LENGTH)
     SET(CMAKE_REQUIRED_INCLUDES)
-    IF(OPENSSL_INCLUDE_DIR AND OPENSSL_LIBRARIES   AND
-       CRYPTO_LIBRARY AND HAVE_SHA512_DIGEST_LENGTH)
+    IF(OPENSSL_INCLUDE_DIR AND OPENSSL_SSL_LIBRARY   AND
+       OPENSSL_CRYPTO_LIBRARY AND HAVE_SHA512_DIGEST_LENGTH)
       SET(SSL_SOURCES "")
-      SET(SSL_LIBRARIES ${OPENSSL_LIBRARIES} ${CRYPTO_LIBRARY})
+      SET(SSL_LIBRARIES ${OPENSSL_SSL_LIBRARY} ${OPENSSL_CRYPTO_LIBRARY})
       IF(CMAKE_SYSTEM_NAME MATCHES "SunOS")
         SET(SSL_LIBRARIES ${SSL_LIBRARIES} ${LIBSOCKET})
       ENDIF()
@@ -181,17 +181,16 @@ MACRO (MYSQL_CHECK_SSL)
       #   #define OPENSSL_VERSION_NUMBER 0x1000103fL
       # Encoded as MNNFFPPS: major minor fix patch status
       FILE(STRINGS "${OPENSSL_INCLUDE_DIR}/openssl/opensslv.h"
-        OPENSSL_VERSION_NUMBER
-        REGEX "^#define[\t ]+OPENSSL_VERSION_NUMBER[\t ]+0x[0-9].*"
-      )
+           OPENSSL_VERSION_TEXT
+           REGEX "^# *define[\t ]+OPENSSL_VERSION_TEXT[\t ]+")
       STRING(REGEX REPLACE
-        "^.*OPENSSL_VERSION_NUMBER[\t ]+0x([0-9]).*$" "\\1"
-        OPENSSL_MAJOR_VERSION "${OPENSSL_VERSION_NUMBER}"
-      )
+             "^.*OPENSSL_VERSION_TEXT[\t ]+\"OpenSSL ([-0-9a-z.]+).*$" "\\1"
+             OPENSSL_VERSION "${OPENSSL_VERSION_TEXT}")
+
       MESSAGE_ONCE(OPENSSL_INCLUDE_DIR "OPENSSL_INCLUDE_DIR = ${OPENSSL_INCLUDE_DIR}")
-      MESSAGE_ONCE(OPENSSL_LIBRARIES "OPENSSL_LIBRARIES = ${OPENSSL_LIBRARIES}")
-      MESSAGE_ONCE(CRYPTO_LIBRARY "CRYPTO_LIBRARY = ${CRYPTO_LIBRARY}")
-      MESSAGE_ONCE(OPENSSL_MAJOR_VERSION "OPENSSL_MAJOR_VERSION = ${OPENSSL_MAJOR_VERSION}")
+      MESSAGE_ONCE(OPENSSL_SSL_LIBRARY "OPENSSL_SSL_LIBRARY = ${OPENSSL_SSL_LIBRARY}")
+      MESSAGE_ONCE(OPENSSL_CRYPTO_LIBRARY "OPENSSL_CRYPTO_LIBRARY = ${OPENSSL_CRYPTO_LIBRARY}")
+      MESSAGE_ONCE(OPENSSL_VERSION "OPENSSL_VERSION = ${OPENSSL_VERSION}")
       MESSAGE_ONCE(SSL_LIBRARIES "SSL_LIBRARIES = ${SSL_LIBRARIES}")
       SET(SSL_INCLUDE_DIRS ${OPENSSL_INCLUDE_DIR})
       SET(SSL_INTERNAL_INCLUDE_DIRS "")
@@ -225,8 +224,8 @@ ENDMACRO()
 # So we should be linking static versions of the libraries.
 MACRO (COPY_OPENSSL_DLLS target_name)
   IF (WIN32 AND WITH_SSL_PATH)
-    GET_FILENAME_COMPONENT(CRYPTO_NAME "${CRYPTO_LIBRARY}" NAME_WE)
-    GET_FILENAME_COMPONENT(OPENSSL_NAME "${OPENSSL_LIBRARIES}" NAME_WE)
+    GET_FILENAME_COMPONENT(CRYPTO_NAME "${OPENSSL_CRYPTO_LIBRARY}" NAME_WE)
+    GET_FILENAME_COMPONENT(OPENSSL_NAME "${OPENSSL_SSL_LIBRARY}" NAME_WE)
     FILE(GLOB HAVE_CRYPTO_DLL "${WITH_SSL_PATH}/bin/${CRYPTO_NAME}.dll")
     FILE(GLOB HAVE_OPENSSL_DLL "${WITH_SSL_PATH}/bin/${OPENSSL_NAME}.dll")
     IF (HAVE_CRYPTO_DLL AND HAVE_OPENSSL_DLL)
diff --git a/configure.cmake b/configure.cmake
index cc80a8a5e4e..8dbc3aa18b5 100644
--- a/configure.cmake
+++ b/configure.cmake
@@ -56,7 +56,7 @@ IF(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
   # MySQL "canonical" GCC flags. At least -fno-rtti flag affects
   # ABI and cannot be simply removed. 
   SET(CMAKE_CXX_FLAGS 
-    "${CMAKE_CXX_FLAGS} -fno-exceptions -fno-rtti")
+    "${CMAKE_CXX_FLAGS} -fno-rtti")
 
   IF (CMAKE_EXE_LINKER_FLAGS MATCHES " -static " 
      OR CMAKE_EXE_LINKER_FLAGS MATCHES " -static$")
diff --git a/dbug/CMakeLists.txt b/dbug/CMakeLists.txt
index c40c70b684d..014efdeb9a3 100644
--- a/dbug/CMakeLists.txt
+++ b/dbug/CMakeLists.txt
@@ -58,13 +58,13 @@ IF(NOT WIN32 AND NOT CMAKE_GENERATOR MATCHES Xcode)
     ADD_CUSTOM_COMMAND(OUTPUT user.ps
                        DEPENDS user.r ${OUTPUT_INC} ${SOURCE_INC}
                        COMMAND ${GROFF} -mm ${CMAKE_CURRENT_SOURCE_DIR}/user.r > user.ps || touch user.ps)
-    ADD_CUSTOM_TARGET(ps ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/user.ps)
+    ADD_CUSTOM_TARGET(user.ps ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/user.ps)
   ENDIF(GROFF)
   IF(NROFF)
     ADD_CUSTOM_COMMAND(OUTPUT user.t
                        DEPENDS user.r ${OUTPUT_INC} ${SOURCE_INC}
                        COMMAND ${NROFF} -mm ${CMAKE_CURRENT_SOURCE_DIR}/user.r > user.t || touch user.t)
-    ADD_CUSTOM_TARGET(t ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/user.t)
+    ADD_CUSTOM_TARGET(user.t ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/user.t)
   ENDIF(NROFF)
 
 ENDIF()
diff --git a/debian/libmariadbclient-dev.files b/debian/libmariadbclient-dev.files
index a7595dceffd..77260e88d1c 100644
--- a/debian/libmariadbclient-dev.files
+++ b/debian/libmariadbclient-dev.files
@@ -1,7 +1,7 @@
 usr/bin/mysql_config
 usr/include/mysql
-usr/lib/libmysqlclient.a
-usr/lib/libmysqlclient_r.a
+usr/include/mariadb
+usr/lib/mariadb/libmariadbclient.a
 usr/lib/libmysqlservices.a
 usr/share/aclocal/mysql.m4
 usr/share/pkgconfig/mariadb.pc
diff --git a/debian/libmariadbclient18.files b/debian/libmariadbclient18.files
index 75020ecbd16..dfef3d26d0a 100644
--- a/debian/libmariadbclient18.files
+++ b/debian/libmariadbclient18.files
@@ -1,3 +1,3 @@
-usr/lib/libmysqlclient*.so.*
+usr/lib/mariadb/libmariadb.so.*
 usr/lib/mysql/plugin/mysql_clear_password.so
 usr/lib/mysql/plugin/dialog.so
diff --git a/debian/mariadb-server-10.2.files.in b/debian/mariadb-server-10.2.files.in
index d562d285994..d4e8746f023 100644
--- a/debian/mariadb-server-10.2.files.in
+++ b/debian/mariadb-server-10.2.files.in
@@ -5,7 +5,6 @@ usr/lib/mysql/plugin/ha_archive.so
 usr/lib/mysql/plugin/ha_blackhole.so
 usr/lib/mysql/plugin/ha_federated.so
 usr/lib/mysql/plugin/ha_federatedx.so
-usr/lib/mysql/plugin/ha_innodb.so
 usr/lib/mysql/plugin/ha_mroonga.so
 usr/lib/mysql/plugin/ha_sphinx.so
 usr/lib/mysql/plugin/handlersocket.so
diff --git a/extra/CMakeLists.txt b/extra/CMakeLists.txt
index 9c8d82d8537..ee696c156f9 100644
--- a/extra/CMakeLists.txt
+++ b/extra/CMakeLists.txt
@@ -77,7 +77,9 @@ IF(WITH_INNOBASE_STORAGE_ENGINE OR WITH_XTRADB_STORAGE_ENGINE)
       ../storage/innobase/buf/buf0checksum.cc
       ../storage/innobase/ut/ut0crc32.cc
       ../storage/innobase/ut/ut0ut.cc
+      ../storage/innobase/buf/buf0buf.cc
       ../storage/innobase/page/page0zip.cc
+      ../storage/innobase/os/os0file.cc
       )
 
 
diff --git a/extra/innochecksum.cc b/extra/innochecksum.cc
index ae6ffdb582e..dd97564c8a5 100644
--- a/extra/innochecksum.cc
+++ b/extra/innochecksum.cc
@@ -1,6 +1,5 @@
 /*
-   Copyright (c) 2005, 2012, Oracle and/or its affiliates.
-   Copyright (c) 2014, 2015, MariaDB Corporation.
+   Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -26,43 +25,36 @@
   Published with a permission.
 */
 
+#include <my_config.h>
 #include <my_global.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <time.h>
 #include <sys/types.h>
 #include <sys/stat.h>
-#ifndef __WIN__
+#ifdef HAVE_UNISTD_H
 # include <unistd.h>
 #endif
 #include <my_getopt.h>
 #include <m_string.h>
-#include <welcome_copyright_notice.h> /* ORACLE_WELCOME_COPYRIGHT_NOTICE */
+#include <welcome_copyright_notice.h>	/* ORACLE_WELCOME_COPYRIGHT_NOTICE */
 
 /* Only parts of these files are included from the InnoDB codebase.
 The parts not included are excluded by #ifndef UNIV_INNOCHECKSUM. */
 
-#include "univ.i"                /*  include all of this */
-
-#define FLST_BASE_NODE_SIZE (4 + 2 * FIL_ADDR_SIZE)
-#define FLST_NODE_SIZE (2 * FIL_ADDR_SIZE)
-#define FSEG_PAGE_DATA FIL_PAGE_DATA
-#define MLOG_1BYTE (1)
-
-#include "ut0ut.h"
-#include "ut0byte.h"
-#include "mach0data.h"
-#include "fsp0types.h"
-#include "rem0rec.h"
-#include "buf0checksum.h"        /* buf_calc_page_*() */
-#include "fil0fil.h"             /* FIL_* */
-#include "page0page.h"           /* PAGE_* */
-#include "page0zip.h"            /* page_zip_*() */
-#include "trx0undo.h"            /* TRX_* */
-#include "fsp0fsp.h"             /* fsp_flags_get_page_size() &
-                                    fsp_flags_get_zip_size() */
-#include "ut0crc32.h"            /* ut_crc32_init() */
-#include "fsp0pagecompress.h"    /* fil_get_compression_alg_name */
+#include "univ.i"			/* include all of this */
+#include "page0size.h"			/* page_size_t */
+#include "page0zip.h"			/* page_zip_calc_checksum() */
+#include "page0page.h"			/* PAGE_* */
+#include "trx0undo.h"			/* TRX_UNDO_* */
+#include "fut0lst.h"			/* FLST_NODE_SIZE */
+#include "buf0checksum.h"		/* buf_calc_page_*() */
+#include "fil0fil.h"			/* FIL_* */
+#include "os0file.h"
+#include "fsp0fsp.h"			/* fsp_flags_get_page_size() &
+					   fsp_flags_get_zip_size() */
+#include "mach0data.h"			/* mach_read_from_4() */
+#include "ut0crc32.h"			/* ut_crc32_init() */
 
 #ifdef UNIV_NONINL
 # include "fsp0fsp.ic"
@@ -71,148 +63,851 @@ The parts not included are excluded by #ifndef UNIV_INNOCHECKSUM. */
 #endif
 
 /* Global variables */
-static my_bool verbose;
-static my_bool debug;
-static my_bool skip_corrupt;
-static my_bool just_count;
-static ulong start_page;
-static ulong end_page;
-static ulong do_page;
-static my_bool use_end_page;
-static my_bool do_one_page;
-static my_bool per_page_details;
-static my_bool do_leaf;
-static ulong n_merge;
-ulong srv_page_size;              /* replaces declaration in srv0srv.c */
-static ulong physical_page_size;  /* Page size in bytes on disk. */
-static ulong logical_page_size;   /* Page size when uncompressed. */
-static bool compressed= false;    /* Is tablespace compressed */
+static bool			verbose;
+static bool			just_count;
+static uintmax_t		start_page;
+static uintmax_t		end_page;
+static uintmax_t		do_page;
+static bool			use_end_page;
+static bool			do_one_page;
+/* replaces declaration in srv0srv.c */
+ulong				srv_page_size;
+page_size_t			univ_page_size(0, 0, false);
+extern ulong			srv_checksum_algorithm;
+/* Current page number (0 based). */
+uintmax_t			cur_page_num;
+/* Skip the checksum verification. */
+static bool			no_check;
+/* Enabled for strict checksum verification. */
+bool				strict_verify = 0;
+/* Enabled for rewrite checksum. */
+static bool			do_write;
+/* Mismatches count allowed (0 by default). */
+static uintmax_t		allow_mismatches;
+static bool			page_type_summary;
+static bool			page_type_dump;
+/* Store filename for page-type-dump option. */
+char*				page_dump_filename = 0;
+/* skip the checksum verification & rewrite if page is doublewrite buffer. */
+static bool			skip_page = 0;
+const char			*dbug_setting = "FALSE";
+char*				log_filename = NULL;
+/* User defined filename for logging. */
+FILE*				log_file = NULL;
+/* Enabled for log write option. */
+static bool			is_log_enabled = false;
 
-int n_undo_state_active;
-int n_undo_state_cached;
-int n_undo_state_to_free;
-int n_undo_state_to_purge;
-int n_undo_state_prepared;
-int n_undo_state_other;
-int n_undo_insert, n_undo_update, n_undo_other;
-int n_bad_checksum;
-int n_fil_page_index;
-int n_fil_page_undo_log;
-int n_fil_page_inode;
-int n_fil_page_ibuf_free_list;
-int n_fil_page_allocated;
-int n_fil_page_ibuf_bitmap;
-int n_fil_page_type_sys;
-int n_fil_page_type_trx_sys;
-int n_fil_page_type_fsp_hdr;
-int n_fil_page_type_allocated;
-int n_fil_page_type_xdes;
-int n_fil_page_type_blob;
-int n_fil_page_type_zblob;
-int n_fil_page_type_other;
-int n_fil_page_type_page_compressed;
-int n_fil_page_type_page_compressed_encrypted;
+#ifndef _WIN32
+/* advisory lock for non-window system. */
+struct flock			lk;
+#endif /* _WIN32 */
 
-int n_fil_page_max_index_id;
+/* Strict check algorithm name. */
+static ulong			strict_check;
+/* Rewrite checksum algorithm name. */
+static ulong			write_check;
 
-#define SIZE_RANGES_FOR_PAGE 10
-#define NUM_RETRIES 3
-#define DEFAULT_RETRY_DELAY 1000000
+/* Innodb page type. */
+struct innodb_page_type {
+	int n_undo_state_active;
+	int n_undo_state_cached;
+	int n_undo_state_to_free;
+	int n_undo_state_to_purge;
+	int n_undo_state_prepared;
+	int n_undo_state_other;
+	int n_undo_insert;
+	int n_undo_update;
+	int n_undo_other;
+	int n_fil_page_index;
+	int n_fil_page_undo_log;
+	int n_fil_page_inode;
+	int n_fil_page_ibuf_free_list;
+	int n_fil_page_ibuf_bitmap;
+	int n_fil_page_type_sys;
+	int n_fil_page_type_trx_sys;
+	int n_fil_page_type_fsp_hdr;
+	int n_fil_page_type_allocated;
+	int n_fil_page_type_xdes;
+	int n_fil_page_type_blob;
+	int n_fil_page_type_zblob;
+	int n_fil_page_type_other;
+	int n_fil_page_type_zblob2;
+} page_type;
 
-struct per_page_stats {
-  ulint n_recs;
-  ulint data_size;
-  ulint left_page_no;
-  ulint right_page_no;
-  per_page_stats(ulint n, ulint data, ulint left, ulint right) :
-      n_recs(n), data_size(data), left_page_no(left), right_page_no(right) {}
-  per_page_stats() : n_recs(0), data_size(0), left_page_no(0), right_page_no(0) {}
+/* Possible values for "--strict-check" for strictly verify checksum
+and "--write" for rewrite checksum. */
+static const char *innochecksum_algorithms[] = {
+	"crc32",
+	"crc32",
+	"innodb",
+	"innodb",
+	"none",
+	"none",
+	NullS
 };
 
-struct per_index_stats {
-  unsigned long long pages;
-  unsigned long long leaf_pages;
-  ulint first_leaf_page;
-  ulint count;
-  ulint free_pages;
-  ulint max_data_size;
-  unsigned long long total_n_recs;
-  unsigned long long total_data_bytes;
-
-  /*!< first element for empty pages,
-  last element for pages with more than logical_page_size */
-  unsigned long long pages_in_size_range[SIZE_RANGES_FOR_PAGE+2];
-
-  std::map<ulint, per_page_stats> leaves;
-
-  per_index_stats():pages(0), leaf_pages(0), first_leaf_page(0),
-                    count(0), free_pages(0), max_data_size(0), total_n_recs(0),
-                    total_data_bytes(0)
-  {
-    memset(pages_in_size_range, 0, sizeof(pages_in_size_range));
-  }
+/* Used to define an enumerate type of the "innochecksum algorithm". */
+static TYPELIB innochecksum_algorithms_typelib = {
+	array_elements(innochecksum_algorithms)-1,"",
+	innochecksum_algorithms, NULL
 };
 
-std::map<unsigned long long, per_index_stats> index_ids;
-
-bool encrypted = false;
-
-/* Get the page size of the filespace from the filespace header. */
+/** Get the page size of the filespace from the filespace header.
+@param[in]	buf	buffer used to read the page.
+@return page size */
 static
-my_bool
+const page_size_t
 get_page_size(
-/*==========*/
-  FILE*  f,                     /*!< in: file pointer, must be open
-                                         and set to start of file */
-  byte* buf,                    /*!< in: buffer used to read the page */
-  ulong* logical_page_size,     /*!< out: Logical/Uncompressed page size */
-  ulong* physical_page_size)    /*!< out: Physical/Commpressed page size */
+	byte*	buf)
 {
-  ulong flags;
+	const ulint	flags = mach_read_from_4(buf + FIL_PAGE_DATA
+						 + FSP_SPACE_FLAGS);
 
-  int bytes= fread(buf, 1, UNIV_PAGE_SIZE_MIN, f);
+	const ulint	ssize = FSP_FLAGS_GET_PAGE_SSIZE(flags);
 
-  if (ferror(f))
-  {
-    perror("Error reading file header");
-    return FALSE;
-  }
+	if (ssize == 0) {
+		srv_page_size = UNIV_PAGE_SIZE_ORIG;
+	} else {
+		srv_page_size = ((UNIV_ZIP_SIZE_MIN >> 1) << ssize);
+	}
 
-  if (bytes != UNIV_PAGE_SIZE_MIN)
-  {
-    fprintf(stderr, "Error; Was not able to read the minimum page size ");
-    fprintf(stderr, "of %d bytes.  Bytes read was %d\n", UNIV_PAGE_SIZE_MIN, bytes);
-    return FALSE;
-  }
+	univ_page_size.copy_from(
+		page_size_t(srv_page_size, srv_page_size, false));
 
-  rewind(f);
-
-  flags = mach_read_from_4(buf + FIL_PAGE_DATA + FSP_SPACE_FLAGS);
-
-  /* srv_page_size is used by InnoDB code as UNIV_PAGE_SIZE */
-  srv_page_size = *logical_page_size = fsp_flags_get_page_size(flags);
-
-  /* fsp_flags_get_zip_size() will return zero if not compressed. */
-  *physical_page_size = fsp_flags_get_zip_size(flags);
-  if (*physical_page_size == 0)
-  {
-    *physical_page_size= *logical_page_size;
-  }
-  else
-  {
-    compressed= true;
-  }
-
-
-  return TRUE;
+	return(page_size_t(flags));
 }
 
-
-/* command line argument to do page checks (that's it) */
-/* another argument to specify page ranges... seek to right spot and go from there */
-
-static struct my_option innochecksum_options[] =
+/** Decompress a page
+@param[in,out]	buf		Page read from disk, uncompressed data will
+				also be copied to this page
+@param[in, out] scratch		Page to use for temporary decompress
+@param[in]	page_size	scratch physical size
+@return true if decompress succeeded */
+static
+bool page_decompress(
+	byte*		buf,
+	byte*		scratch,
+	page_size_t	page_size)
 {
+	dberr_t		err=DB_SUCCESS;
+
+	/* Set the dblwr recover flag to false. */
+	/* JAN: TODO: Decompress
+	err = os_file_decompress_page(
+		false, buf, scratch, page_size.physical());
+	*/
+
+	return(err == DB_SUCCESS);
+}
+
+#ifdef _WIN32
+/***********************************************//*
+ @param		[in] error	error no. from the getLastError().
+
+ @retval error message corresponding to error no.
+*/
+static
+char*
+error_message(
+	int	error)
+{
+	static char err_msg[1024] = {'\0'};
+	FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM,
+		NULL, error, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
+		(LPTSTR)err_msg, sizeof(err_msg), NULL );
+
+	return (err_msg);
+}
+#endif /* _WIN32 */
+
+/***********************************************//*
+ @param>>_______[in] name>_____name of file.
+ @retval file pointer; file pointer is NULL when error occured.
+*/
+
+FILE*
+open_file(
+	const char*	name)
+{
+	int	fd;		/* file descriptor. */
+	FILE*	fil_in;
+#ifdef _WIN32
+	HANDLE		hFile;		/* handle to open file. */
+	DWORD		access;		/* define access control */
+	int		flags = 0;	/* define the mode for file
+					descriptor */
+
+	if (do_write) {
+		access =  GENERIC_READ | GENERIC_WRITE;
+		flags =  _O_RDWR | _O_BINARY;
+	} else {
+		access = GENERIC_READ;
+		flags = _O_RDONLY | _O_BINARY;
+	}
+	/* CreateFile() also provide advisory lock with the usage of
+	access and share mode of the file.*/
+	hFile = CreateFile(
+			(LPCTSTR) name, access, 0L, NULL,
+			OPEN_EXISTING, NULL, NULL);
+
+	if (hFile == INVALID_HANDLE_VALUE) {
+		/* print the error message. */
+		fprintf(stderr, "Filename::%s %s\n",
+			error_message(GetLastError()));
+
+			return (NULL);
+		}
+
+	/* get the file descriptor. */
+	fd= _open_osfhandle((intptr_t)hFile, flags);
+#else /* _WIN32 */
+
+	int	create_flag;
+	/* define the advisory lock and open file mode. */
+	if (do_write) {
+		create_flag = O_RDWR;
+		lk.l_type = F_WRLCK;
+	}
+	else {
+		create_flag = O_RDONLY;
+		lk.l_type = F_RDLCK;
+	}
+
+	fd = open(name, create_flag);
+
+	lk.l_whence = SEEK_SET;
+	lk.l_start = lk.l_len = 0;
+
+	if (fcntl(fd, F_SETLK, &lk) == -1) {
+		fprintf(stderr, "Error: Unable to lock file::"
+			" %s\n", name);
+		perror("fcntl");
+		return (NULL);
+	}
+#endif /* _WIN32 */
+
+	if (do_write) {
+		fil_in = fdopen(fd, "rb+");
+	} else {
+		fil_in = fdopen(fd, "rb");
+	}
+
+	return (fil_in);
+}
+
+/************************************************************//*
+ Read the content of file
+
+ @param  [in,out]	buf			read the file in buffer
+ @param  [in]		partial_page_read	enable when to read the
+						remaining buffer for first page.
+ @param  [in]		physical_page_size	Physical/Commpressed page size.
+ @param  [in,out]	fil_in			file pointer created for the
+						tablespace.
+ @retval no. of bytes read.
+*/
+ulong read_file(
+	byte*	buf,
+	bool	partial_page_read,
+	ulong	physical_page_size,
+	FILE*	fil_in)
+{
+	ulong bytes = 0;
+
+	DBUG_ASSERT(physical_page_size >= UNIV_ZIP_SIZE_MIN);
+
+	if (partial_page_read) {
+		buf += UNIV_ZIP_SIZE_MIN;
+		physical_page_size -= UNIV_ZIP_SIZE_MIN;
+		bytes = UNIV_ZIP_SIZE_MIN;
+	}
+
+	bytes += ulong(fread(buf, 1, physical_page_size, fil_in));
+
+	return bytes;
+}
+
+/** Check if page is corrupted or not.
+@param[in]	buf		page frame
+@param[in]	page_size	page size
+@retval true if page is corrupted otherwise false. */
+static
+bool
+is_page_corrupted(
+	const byte*		buf,
+	const page_size_t&	page_size)
+{
+
+	/* enable if page is corrupted. */
+	bool is_corrupted;
+	/* use to store LSN values. */
+	ulint logseq;
+	ulint logseqfield;
+
+	if (!page_size.is_compressed()) {
+		/* check the stored log sequence numbers
+		for uncompressed tablespace. */
+		logseq = mach_read_from_4(buf + FIL_PAGE_LSN + 4);
+		logseqfield = mach_read_from_4(
+				buf + page_size.logical() -
+				FIL_PAGE_END_LSN_OLD_CHKSUM + 4);
+
+		if (is_log_enabled) {
+			fprintf(log_file,
+				"page::%lu log sequence number:first = %lu;"
+				" second = %lu\n",
+				cur_page_num, logseq, logseqfield);
+			if (logseq != logseqfield) {
+				fprintf(log_file,
+					"Fail; page %lu invalid (fails log "
+					"sequence number check)\n",
+					cur_page_num);
+			}
+		}
+	}
+
+	is_corrupted = buf_page_is_corrupted(
+		true, buf, page_size, false, cur_page_num, strict_verify,
+		is_log_enabled, log_file);
+
+	return(is_corrupted);
+}
+
+/********************************************//*
+ Check if page is doublewrite buffer or not.
+ @param [in] page	buffer page
+
+ @retval true  if page is doublewrite buffer otherwise false.
+*/
+static
+bool
+is_page_doublewritebuffer(
+	const byte*	page)
+{
+	if ((cur_page_num >= FSP_EXTENT_SIZE)
+		&& (cur_page_num < FSP_EXTENT_SIZE * 3)) {
+		/* page is doublewrite buffer. */
+		return (true);
+	}
+
+	return (false);
+}
+
+/*******************************************************//*
+Check if page is empty or not.
+ @param		[in] page		page to checked for empty.
+ @param		[in] len	size of page.
+
+ @retval true if page is empty.
+ @retval false if page is not empty.
+*/
+static
+bool
+is_page_empty(
+	const byte*	page,
+	size_t		len)
+{
+	while (len--) {
+		if (*page++) {
+			return (false);
+		}
+        }
+        return (true);
+}
+
+/********************************************************************//**
+Rewrite the checksum for the page.
+@param	[in/out] page			page buffer
+@param	[in] physical_page_size		page size in bytes on disk.
+@param	[in] iscompressed		Is compressed/Uncompressed Page.
+
+@retval true  : do rewrite
+@retval false : skip the rewrite as checksum stored match with
+		calculated or page is doublwrite buffer.
+*/
+
+bool
+update_checksum(
+	byte*	page,
+	ulong	physical_page_size,
+	bool	iscompressed)
+{
+	ib_uint32_t	checksum = 0;
+	byte		stored1[4];	/* get FIL_PAGE_SPACE_OR_CHKSUM field checksum */
+	byte		stored2[4];	/* get FIL_PAGE_END_LSN_OLD_CHKSUM field checksum */
+
+	ut_ad(page);
+	/* If page is doublewrite buffer, skip the rewrite of checksum. */
+	if (skip_page) {
+		return (false);
+	}
+
+	memcpy(stored1, page + FIL_PAGE_SPACE_OR_CHKSUM, 4);
+	memcpy(stored2, page + physical_page_size -
+	       FIL_PAGE_END_LSN_OLD_CHKSUM, 4);
+
+	/* Check if page is empty, exclude the checksum field */
+	if (is_page_empty(page + 4, physical_page_size - 12)
+	    && is_page_empty(page + physical_page_size - 4, 4)) {
+
+		memset(page + FIL_PAGE_SPACE_OR_CHKSUM, 0, 4);
+		memset(page + physical_page_size -
+		       FIL_PAGE_END_LSN_OLD_CHKSUM, 0, 4);
+
+		goto func_exit;
+	}
+
+	if (iscompressed) {
+		/* page is compressed */
+		checksum = page_zip_calc_checksum(
+			page, physical_page_size,
+			static_cast<srv_checksum_algorithm_t>(write_check));
+
+		mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM, checksum);
+		if (is_log_enabled) {
+			fprintf(log_file, "page::%lu; Updated checksum ="
+				" %u\n", cur_page_num, checksum);
+		}
+
+	} else {
+		/* page is uncompressed. */
+
+		/* Store the new formula checksum */
+		switch ((srv_checksum_algorithm_t) write_check) {
+
+		case SRV_CHECKSUM_ALGORITHM_CRC32:
+		case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32:
+			checksum = buf_calc_page_crc32(page);
+			break;
+
+		case SRV_CHECKSUM_ALGORITHM_INNODB:
+		case SRV_CHECKSUM_ALGORITHM_STRICT_INNODB:
+			checksum = (ib_uint32_t)
+					buf_calc_page_new_checksum(page);
+			break;
+
+		case SRV_CHECKSUM_ALGORITHM_NONE:
+		case SRV_CHECKSUM_ALGORITHM_STRICT_NONE:
+			checksum = BUF_NO_CHECKSUM_MAGIC;
+			break;
+		/* no default so the compiler will emit a warning if new
+		enum is added and not handled here */
+		}
+
+		mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM, checksum);
+		if (is_log_enabled) {
+			fprintf(log_file, "page::%lu; Updated checksum field1"
+				" = %u\n", cur_page_num, checksum);
+		}
+
+		if (write_check == SRV_CHECKSUM_ALGORITHM_STRICT_INNODB
+		    || write_check == SRV_CHECKSUM_ALGORITHM_INNODB) {
+			checksum = (ib_uint32_t)
+					buf_calc_page_old_checksum(page);
+		}
+
+		mach_write_to_4(page + physical_page_size -
+				FIL_PAGE_END_LSN_OLD_CHKSUM,checksum);
+
+		if (is_log_enabled) {
+			fprintf(log_file, "page::%lu ; Updated checksum "
+				"field2 = %u\n", cur_page_num, checksum);
+		}
+
+	}
+
+	func_exit:
+	/* The following code is to check the stored checksum with the
+	calculated checksum. If it matches, then return FALSE to skip
+	the rewrite of checksum, otherwise return TRUE. */
+	if (iscompressed) {
+		if (!memcmp(stored1, page + FIL_PAGE_SPACE_OR_CHKSUM, 4)) {
+			return (false);
+		}
+		return (true);
+	}
+
+	if (!memcmp(stored1, page + FIL_PAGE_SPACE_OR_CHKSUM, 4)
+	    && !memcmp(stored2, page + physical_page_size -
+		       FIL_PAGE_END_LSN_OLD_CHKSUM, 4)) {
+		return (false);
+
+	}
+
+	return (true);
+}
+
+/**
+ Write the content to the file
+@param[in]		filename	name of the file.
+@param[in,out]		file		file pointer where content
+					have to be written
+@param[in]		buf		file buffer read
+@param[in]		compressed	Enabled if tablespace is
+					compressed.
+@param[in,out]		pos		current file position.
+@param[in]		page_size	page size in bytes on disk.
+
+@retval true	if successfully written
+@retval false	if a non-recoverable error occurred
+*/
+static
+bool
+write_file(
+	const char*	filename,
+	FILE*		file,
+	byte*		buf,
+	bool		compressed,
+	fpos_t*		pos,
+	ulong		page_size)
+{
+	bool	do_update;
+
+	do_update = update_checksum(buf, page_size, compressed);
+
+	if (file != stdin) {
+		if (do_update) {
+			/* Set the previous file pointer position
+			saved in pos to current file position. */
+			if (0 != fsetpos(file, pos)) {
+				perror("fsetpos");
+				return(false);
+			}
+		} else {
+			/* Store the current file position in pos */
+			if (0 != fgetpos(file, pos)) {
+				perror("fgetpos");
+				return(false);
+			}
+			return(true);
+		}
+	}
+
+	if (page_size
+		!= fwrite(buf, 1, page_size, file == stdin ? stdout : file)) {
+		fprintf(stderr, "Failed to write page %lu to %s: %s\n",
+			cur_page_num, filename, strerror(errno));
+
+		return(false);
+	}
+	if (file != stdin) {
+		fflush(file);
+		/* Store the current file position in pos */
+		if (0 != fgetpos(file, pos)) {
+			perror("fgetpos");
+			return(false);
+		}
+	}
+
+	return(true);
+}
+
+/*
+Parse the page and collect/dump the information about page type
+@param [in] page	buffer page
+@param [in] file	file for diagnosis.
+*/
+void
+parse_page(
+	const byte*	page,
+	FILE*		file)
+{
+	unsigned long long id;
+	ulint undo_page_type;
+	char str[20]={'\0'};
+
+	/* Check whether page is doublewrite buffer. */
+	if(skip_page) {
+		strcpy(str, "Double_write_buffer");
+	} else {
+		strcpy(str, "-");
+	}
+
+	switch (mach_read_from_2(page + FIL_PAGE_TYPE)) {
+
+	case FIL_PAGE_INDEX:
+		page_type.n_fil_page_index++;
+		id = mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID);
+		if (page_type_dump) {
+			fprintf(file, "#::%lu \t\t|\t\tIndex page\t\t\t|"
+				"\tindex id=%llu,", cur_page_num, id);
+
+			fprintf(file,
+				" page level=%lu, No. of records=%lu,"
+				" garbage=%lu, %s\n",
+				page_header_get_field(page, PAGE_LEVEL),
+				page_header_get_field(page, PAGE_N_RECS),
+				page_header_get_field(page, PAGE_GARBAGE), str);
+		}
+		break;
+
+	case FIL_PAGE_UNDO_LOG:
+		page_type.n_fil_page_undo_log++;
+		undo_page_type = mach_read_from_2(page +
+				     TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE);
+		if (page_type_dump) {
+			fprintf(file, "#::%lu \t\t|\t\tUndo log page\t\t\t|",
+				cur_page_num);
+		}
+		if (undo_page_type == TRX_UNDO_INSERT) {
+			page_type.n_undo_insert++;
+			if (page_type_dump) {
+				fprintf(file, "\t%s",
+					"Insert Undo log page");
+			}
+
+		} else if (undo_page_type == TRX_UNDO_UPDATE) {
+			page_type.n_undo_update++;
+			if (page_type_dump) {
+				fprintf(file, "\t%s",
+					"Update undo log page");
+			}
+		}
+
+		undo_page_type = mach_read_from_2(page + TRX_UNDO_SEG_HDR +
+						  TRX_UNDO_STATE);
+		switch (undo_page_type) {
+			case TRX_UNDO_ACTIVE:
+				page_type.n_undo_state_active++;
+				if (page_type_dump) {
+					fprintf(file, ", %s", "Undo log of "
+						"an active transaction");
+				}
+				break;
+
+			case TRX_UNDO_CACHED:
+				page_type.n_undo_state_cached++;
+				if (page_type_dump) {
+					fprintf(file, ", %s", "Page is "
+						"cached for quick reuse");
+				}
+				break;
+
+			case TRX_UNDO_TO_FREE:
+				page_type.n_undo_state_to_free++;
+				if (page_type_dump) {
+					fprintf(file, ", %s", "Insert undo "
+						"segment that can be freed");
+				}
+				break;
+
+			case TRX_UNDO_TO_PURGE:
+				page_type.n_undo_state_to_purge++;
+				if (page_type_dump) {
+					fprintf(file, ", %s", "Will be "
+						"freed in purge when all undo"
+					"data in it is removed");
+				}
+				break;
+
+			case TRX_UNDO_PREPARED:
+				page_type.n_undo_state_prepared++;
+				if (page_type_dump) {
+					fprintf(file, ", %s", "Undo log of "
+						"an prepared transaction");
+				}
+				break;
+
+			default:
+				page_type.n_undo_state_other++;
+				break;
+		}
+		if(page_type_dump) {
+			fprintf(file, ", %s\n", str);
+		}
+		break;
+
+	case FIL_PAGE_INODE:
+		page_type.n_fil_page_inode++;
+		if (page_type_dump) {
+			fprintf(file, "#::%lu \t\t|\t\tInode page\t\t\t|"
+				"\t%s\n",cur_page_num, str);
+		}
+		break;
+
+	case FIL_PAGE_IBUF_FREE_LIST:
+		page_type.n_fil_page_ibuf_free_list++;
+		if (page_type_dump) {
+			fprintf(file, "#::%lu \t\t|\t\tInsert buffer free list"
+				" page\t|\t%s\n", cur_page_num, str);
+		}
+		break;
+
+	case FIL_PAGE_TYPE_ALLOCATED:
+		page_type.n_fil_page_type_allocated++;
+		if (page_type_dump) {
+			fprintf(file, "#::%lu \t\t|\t\tFreshly allocated "
+				"page\t\t|\t%s\n", cur_page_num, str);
+		}
+		break;
+
+	case FIL_PAGE_IBUF_BITMAP:
+		page_type.n_fil_page_ibuf_bitmap++;
+		if (page_type_dump) {
+			fprintf(file, "#::%lu \t\t|\t\tInsert Buffer "
+				"Bitmap\t\t|\t%s\n", cur_page_num, str);
+		}
+		break;
+
+	case FIL_PAGE_TYPE_SYS:
+		page_type.n_fil_page_type_sys++;
+		if (page_type_dump) {
+			fprintf(file, "#::%lu \t\t|\t\tSystem page\t\t\t|"
+				"\t%s\n",cur_page_num, str);
+		}
+		break;
+
+	case FIL_PAGE_TYPE_TRX_SYS:
+		page_type.n_fil_page_type_trx_sys++;
+		if (page_type_dump) {
+			fprintf(file, "#::%lu \t\t|\t\tTransaction system "
+				"page\t\t|\t%s\n", cur_page_num, str);
+		}
+		break;
+
+	case FIL_PAGE_TYPE_FSP_HDR:
+		page_type.n_fil_page_type_fsp_hdr++;
+		if (page_type_dump) {
+			fprintf(file, "#::%lu \t\t|\t\tFile Space "
+				"Header\t\t|\t%s\n", cur_page_num, str);
+		}
+		break;
+
+	case FIL_PAGE_TYPE_XDES:
+		page_type.n_fil_page_type_xdes++;
+		if (page_type_dump) {
+			fprintf(file, "#::%lu \t\t|\t\tExtent descriptor "
+				"page\t\t|\t%s\n", cur_page_num, str);
+		}
+		break;
+
+	case FIL_PAGE_TYPE_BLOB:
+		page_type.n_fil_page_type_blob++;
+		if (page_type_dump) {
+			fprintf(file, "#::%lu \t\t|\t\tBLOB page\t\t\t|\t%s\n",
+				cur_page_num, str);
+		}
+		break;
+
+	case FIL_PAGE_TYPE_ZBLOB:
+		page_type.n_fil_page_type_zblob++;
+		if (page_type_dump) {
+			fprintf(file, "#::%lu \t\t|\t\tCompressed BLOB "
+				"page\t\t|\t%s\n", cur_page_num, str);
+		}
+		break;
+
+	case FIL_PAGE_TYPE_ZBLOB2:
+		page_type.n_fil_page_type_zblob2++;
+		if (page_type_dump) {
+			fprintf(file, "#::%lu \t\t|\t\tSubsequent Compressed "
+				"BLOB page\t|\t%s\n", cur_page_num, str);
+		}
+			break;
+
+	default:
+		page_type.n_fil_page_type_other++;
+		break;
+	}
+}
+/**
+@param [in/out] file_name	name of the filename
+
+@retval FILE pointer if successfully created else NULL when error occured.
+*/
+FILE*
+create_file(
+	char*	file_name)
+{
+	FILE*	file = NULL;
+
+#ifndef _WIN32
+	file = fopen(file_name, "wb");
+	if (file == NULL) {
+		fprintf(stderr, "Failed to create file: %s: %s\n",
+			file_name, strerror(errno));
+		return(NULL);
+	}
+#else
+	HANDLE		hFile;		/* handle to open file. */
+	int fd = 0;
+	hFile = CreateFile((LPCTSTR) file_name,
+			  GENERIC_READ | GENERIC_WRITE,
+			  FILE_SHARE_READ | FILE_SHARE_DELETE,
+			  NULL, CREATE_NEW, NULL, NULL);
+
+	if (hFile == INVALID_HANDLE_VALUE) {
+		/* print the error message. */
+		fprintf(stderr, "Filename::%s %s\n",
+			file_name,
+			error_message(GetLastError()));
+
+			return(NULL);
+		}
+
+	/* get the file descriptor. */
+	fd= _open_osfhandle((intptr_t)hFile, _O_RDWR | _O_BINARY);
+	file = fdopen(fd, "wb");
+#endif /* _WIN32 */
+
+	return(file);
+}
+
+/*
+ Print the page type count of a tablespace.
+ @param [in] fil_out	stream where the output goes.
+*/
+void
+print_summary(
+	FILE*	fil_out)
+{
+	fprintf(fil_out, "\n================PAGE TYPE SUMMARY==============\n");
+	fprintf(fil_out, "#PAGE_COUNT\tPAGE_TYPE");
+	fprintf(fil_out, "\n===============================================\n");
+	fprintf(fil_out, "%8d\tIndex page\n",
+		page_type.n_fil_page_index);
+	fprintf(fil_out, "%8d\tUndo log page\n",
+		page_type.n_fil_page_undo_log);
+	fprintf(fil_out, "%8d\tInode page\n",
+		page_type.n_fil_page_inode);
+	fprintf(fil_out, "%8d\tInsert buffer free list page\n",
+		page_type.n_fil_page_ibuf_free_list);
+	fprintf(fil_out, "%8d\tFreshly allocated page\n",
+		page_type.n_fil_page_type_allocated);
+	fprintf(fil_out, "%8d\tInsert buffer bitmap\n",
+		page_type.n_fil_page_ibuf_bitmap);
+	fprintf(fil_out, "%8d\tSystem page\n",
+		page_type.n_fil_page_type_sys);
+	fprintf(fil_out, "%8d\tTransaction system page\n",
+		page_type.n_fil_page_type_trx_sys);
+	fprintf(fil_out, "%8d\tFile Space Header\n",
+		page_type.n_fil_page_type_fsp_hdr);
+	fprintf(fil_out, "%8d\tExtent descriptor page\n",
+		page_type.n_fil_page_type_xdes);
+	fprintf(fil_out, "%8d\tBLOB page\n",
+		page_type.n_fil_page_type_blob);
+	fprintf(fil_out, "%8d\tCompressed BLOB page\n",
+		page_type.n_fil_page_type_zblob);
+	fprintf(fil_out, "%8d\tOther type of page",
+		page_type.n_fil_page_type_other);
+	fprintf(fil_out, "\n===============================================\n");
+	fprintf(fil_out, "Additional information:\n");
+	fprintf(fil_out, "Undo page type: %d insert, %d update, %d other\n",
+		page_type.n_undo_insert,
+		page_type.n_undo_update,
+		page_type.n_undo_other);
+	fprintf(fil_out, "Undo page state: %d active, %d cached, %d to_free, %d"
+		" to_purge, %d prepared, %d other\n",
+		page_type.n_undo_state_active,
+		page_type.n_undo_state_cached,
+		page_type.n_undo_state_to_free,
+		page_type.n_undo_state_to_purge,
+		page_type.n_undo_state_prepared,
+		page_type.n_undo_state_other);
+}
+
+/* command line argument for innochecksum tool. */
+static struct my_option innochecksum_options[] = {
   {"help", '?', "Displays this help and exits.",
     0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
   {"info", 'I', "Synonym for --help.",
@@ -221,779 +916,599 @@ static struct my_option innochecksum_options[] =
     0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
   {"verbose", 'v', "Verbose (prints progress every 5 seconds).",
     &verbose, &verbose, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
-  {"debug", 'd', "Debug mode (prints checksums for each page, implies verbose).",
-    &debug, &debug, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
-  {"skip_corrupt", 'u', "Skip corrupt pages.",
-    &skip_corrupt, &skip_corrupt, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
-  {"count", 'c', "Print the count of pages in the file.",
+#ifndef DBUG_OFF
+  {"debug", '#', "Output debug log. See " REFMAN "dbug-package.html",
+    &dbug_setting, &dbug_setting, 0, GET_STR, OPT_ARG, 0, 0, 0, 0, 0, 0},
+#endif /* !DBUG_OFF */
+  {"count", 'c', "Print the count of pages in the file and exits.",
     &just_count, &just_count, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
   {"start_page", 's', "Start on this page number (0 based).",
-    &start_page, &start_page, 0, GET_ULONG, REQUIRED_ARG,
-    0, 0, (longlong) 2L*1024L*1024L*1024L, 0, 1, 0},
+    &start_page, &start_page, 0, GET_ULL, REQUIRED_ARG,
+    0, 0, ULLONG_MAX, 0, 1, 0},
   {"end_page", 'e', "End at this page number (0 based).",
-    &end_page, &end_page, 0, GET_ULONG, REQUIRED_ARG,
-    0, 0, (longlong) 2L*1024L*1024L*1024L, 0, 1, 0},
+    &end_page, &end_page, 0, GET_ULL, REQUIRED_ARG,
+    0, 0, ULLONG_MAX, 0, 1, 0},
   {"page", 'p', "Check only this page (0 based).",
-    &do_page, &do_page, 0, GET_ULONG, REQUIRED_ARG,
-    0, 0, (longlong) 2L*1024L*1024L*1024L, 0, 1, 0},
-  {"per_page_details", 'i', "Print out per-page detail information.",
-    &per_page_details, &per_page_details, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}
-    ,
-  {"leaf", 'l', "Examine leaf index pages",
-    &do_leaf, &do_leaf, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
-  {"merge", 'm', "leaf page count if merge given number of consecutive pages",
-   &n_merge, &n_merge, 0, GET_ULONG, REQUIRED_ARG,
-   0, 0, (longlong)10L, 0, 1, 0},
+    &do_page, &do_page, 0, GET_ULL, REQUIRED_ARG,
+    0, 0, ULLONG_MAX, 0, 1, 0},
+  {"strict-check", 'C', "Specify the strict checksum algorithm by the user.",
+    &strict_check, &strict_check, &innochecksum_algorithms_typelib,
+    GET_ENUM, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+  {"no-check", 'n', "Ignore the checksum verification.",
+    &no_check, &no_check, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
+  {"allow-mismatches", 'a', "Maximum checksum mismatch allowed.",
+    &allow_mismatches, &allow_mismatches, 0,
+    GET_ULL, REQUIRED_ARG, 0, 0, ULLONG_MAX, 0, 1, 0},
+  {"write", 'w', "Rewrite the checksum algorithm by the user.",
+    &write_check, &write_check, &innochecksum_algorithms_typelib,
+    GET_ENUM, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+  {"page-type-summary", 'S', "Display a count of each page type "
+   "in a tablespace.", &page_type_summary, &page_type_summary, 0,
+   GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
+  {"page-type-dump", 'D', "Dump the page type info for each page in a "
+   "tablespace.", &page_dump_filename, &page_dump_filename, 0,
+   GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+   {"log", 'l', "log output.",
+     &log_filename, &log_filename, 0,
+      GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+
   {0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
 };
 
+/* Print out the Innodb version and machine information. */
 static void print_version(void)
 {
-  printf("%s Ver %s, for %s (%s)\n",
-         my_progname, INNODB_VERSION_STR,
-         SYSTEM_TYPE, MACHINE_TYPE);
+#ifdef DBUG_OFF
+	printf("%s Ver %s, for %s (%s)\n",
+		my_progname, INNODB_VERSION_STR,
+		SYSTEM_TYPE, MACHINE_TYPE);
+#else
+	printf("%s-debug Ver %s, for %s (%s)\n",
+		my_progname, INNODB_VERSION_STR,
+		SYSTEM_TYPE, MACHINE_TYPE);
+#endif /* DBUG_OFF */
 }
 
 static void usage(void)
 {
-  print_version();
-  puts(ORACLE_WELCOME_COPYRIGHT_NOTICE("2000"));
-  printf("InnoDB offline file checksum utility.\n");
-  printf("Usage: %s [-c] [-s <start page>] [-e <end page>] [-p <page>] [-v] [-d] <filename>\n", my_progname);
-  my_print_help(innochecksum_options);
-  my_print_variables(innochecksum_options);
+	print_version();
+	puts(ORACLE_WELCOME_COPYRIGHT_NOTICE("2000"));
+	printf("InnoDB offline file checksum utility.\n");
+	printf("Usage: %s [-c] [-s <start page>] [-e <end page>] "
+		"[-p <page>] [-v]  [-a <allow mismatches>] [-n] "
+		"[-C <strict-check>] [-w <write>] [-S] [-D <page type dump>] "
+		"[-l <log>] <filename or [-]>\n", my_progname);
+	printf("See " REFMAN "innochecksum.html for usage hints.\n");
+	my_print_help(innochecksum_options);
+	my_print_variables(innochecksum_options);
 }
 
 extern "C" my_bool
 innochecksum_get_one_option(
-/*========================*/
-  int optid,
-  const struct my_option *opt __attribute__((unused)),
-  char *argument __attribute__((unused)))
+	int			optid,
+	const struct my_option	*opt __attribute__((unused)),
+	char			*argument __attribute__((unused)))
 {
-  switch (optid) {
-  case 'd':
-    verbose=1;	/* debug implies verbose... */
-    break;
-  case 'e':
-    use_end_page= 1;
-    break;
-  case 'p':
-    end_page= start_page= do_page;
-    use_end_page= 1;
-    do_one_page= 1;
-    break;
-  case 'V':
-    print_version();
-    exit(0);
-    break;
-  case 'I':
-  case '?':
-    usage();
-    exit(0);
-    break;
-  }
-  return 0;
-}
+	switch (optid) {
+#ifndef DBUG_OFF
+	case '#':
+		dbug_setting = argument
+			? argument
+			: IF_WIN("d:O,innochecksum.trace",
+				 "d:o,/tmp/innochecksum.trace");
+		DBUG_PUSH(dbug_setting);
+		break;
+#endif /* !DBUG_OFF */
+	case 'e':
+		use_end_page = true;
+		break;
+	case 'p':
+		end_page = start_page = do_page;
+		use_end_page = true;
+		do_one_page = true;
+		break;
+	case 'V':
+		print_version();
+		exit(EXIT_SUCCESS);
+		break;
+	case 'C':
+		strict_verify = true;
+		switch ((srv_checksum_algorithm_t) strict_check) {
 
-static int get_options(
-/*===================*/
-  int *argc,
-  char ***argv)
-{
-  int ho_error;
+		case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32:
+		case SRV_CHECKSUM_ALGORITHM_CRC32:
+			srv_checksum_algorithm =
+				SRV_CHECKSUM_ALGORITHM_STRICT_CRC32;
+			break;
 
-  if ((ho_error=handle_options(argc, argv, innochecksum_options, innochecksum_get_one_option)))
-    exit(ho_error);
+		case SRV_CHECKSUM_ALGORITHM_STRICT_INNODB:
+		case SRV_CHECKSUM_ALGORITHM_INNODB:
+			srv_checksum_algorithm =
+				SRV_CHECKSUM_ALGORITHM_STRICT_INNODB;
+			break;
 
-  /* The next arg must be the filename */
-  if (!*argc)
-  {
-    usage();
-    return 1;
-  }
-  return 0;
-} /* get_options */
-
-/*********************************************************************//**
-Gets the file page type.
-@return type; NOTE that if the type has not been written to page, the
-return value not defined */
-ulint
-fil_page_get_type(
-/*==============*/
-       uchar*  page)   /*!< in: file page */
-{
-       return(mach_read_from_2(page + FIL_PAGE_TYPE));
-}
-
-/**************************************************************//**
-Gets the index id field of a page.
-@return        index id */
-ib_uint64_t
-btr_page_get_index_id(
-/*==================*/
-       uchar*  page)   /*!< in: index page */
-{
-       return(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID));
-}
-
-/********************************************************//**
-Gets the next index page number.
-@return	next page number */
-ulint
-btr_page_get_next(
-/*==============*/
-  const page_t* page) /*!< in: index page */
-{
-  return(mach_read_from_4(page + FIL_PAGE_NEXT));
-}
-
-/********************************************************//**
-Gets the previous index page number.
-@return	prev page number */
-ulint
-btr_page_get_prev(
-/*==============*/
-  const page_t* page) /*!< in: index page */
-{
-  return(mach_read_from_4(page + FIL_PAGE_PREV));
-}
-
-void
-parse_page(
-/*=======*/
-  uchar* page, /* in: buffer page */
-  uchar* xdes) /* in: extend descriptor page */
-{
-       ib_uint64_t id;
-       ulint x;
-       ulint n_recs;
-       ulint page_no;
-       ulint left_page_no;
-       ulint right_page_no;
-       ulint data_bytes;
-       int is_leaf;
-       int size_range_id;
-
-       switch (fil_page_get_type(page)) {
-       case FIL_PAGE_INDEX:
-               n_fil_page_index++;
-               id = btr_page_get_index_id(page);
-               n_recs = page_get_n_recs(page);
-               page_no = page_get_page_no(page);
-               left_page_no = btr_page_get_prev(page);
-               right_page_no = btr_page_get_next(page);
-               data_bytes = page_get_data_size(page);
-               is_leaf = page_is_leaf(page);
-               size_range_id = (data_bytes * SIZE_RANGES_FOR_PAGE
-                                + logical_page_size - 1) /
-                                logical_page_size;
-               if (size_range_id > SIZE_RANGES_FOR_PAGE + 1) {
-                 /* data_bytes is bigger than logical_page_size */
-                 size_range_id = SIZE_RANGES_FOR_PAGE + 1;
-               }
-               if (per_page_details) {
-                 printf("index %lu page %lu leaf %u n_recs %lu data_bytes %lu"
-                        "\n", (ulong) id, (ulong) page_no, is_leaf, n_recs, data_bytes);
-               }
-               /* update per-index statistics */
-               {
-                 if (index_ids.count(id) == 0) {
-                   index_ids[id] = per_index_stats();
-                 }
-		 std::map<unsigned long long, per_index_stats>::iterator it;
-		 it = index_ids.find(id);
-                 per_index_stats &index = (it->second);
-                 uchar* des = xdes + XDES_ARR_OFFSET
-                   + XDES_SIZE * ((page_no & (physical_page_size - 1))
-                                  / FSP_EXTENT_SIZE);
-                 if (xdes_get_bit(des, XDES_FREE_BIT,
-                                  page_no % FSP_EXTENT_SIZE)) {
-                   index.free_pages++;
-                   return;
-                 }
-                 index.pages++;
-                 if (is_leaf) {
-                   index.leaf_pages++;
-                   if (data_bytes > index.max_data_size) {
-                     index.max_data_size = data_bytes;
-                   }
-		   struct per_page_stats pp(n_recs, data_bytes,
-			   left_page_no, right_page_no);
-
-                   index.leaves[page_no] = pp;
-
-                   if (left_page_no == ULINT32_UNDEFINED) {
-                     index.first_leaf_page = page_no;
-                     index.count++;
-                   }
-                 }
-                 index.total_n_recs += n_recs;
-                 index.total_data_bytes += data_bytes;
-                 index.pages_in_size_range[size_range_id] ++;
-               }
-
-               break;
-       case FIL_PAGE_UNDO_LOG:
-               if (per_page_details) {
-                       printf("FIL_PAGE_UNDO_LOG\n");
-               }
-               n_fil_page_undo_log++;
-               x = mach_read_from_2(page + TRX_UNDO_PAGE_HDR +
-                                    TRX_UNDO_PAGE_TYPE);
-               if (x == TRX_UNDO_INSERT)
-                       n_undo_insert++;
-               else if (x == TRX_UNDO_UPDATE)
-                       n_undo_update++;
-               else
-                       n_undo_other++;
-
-               x = mach_read_from_2(page + TRX_UNDO_SEG_HDR + TRX_UNDO_STATE);
-               switch (x) {
-                       case TRX_UNDO_ACTIVE: n_undo_state_active++; break;
-                       case TRX_UNDO_CACHED: n_undo_state_cached++; break;
-                       case TRX_UNDO_TO_FREE: n_undo_state_to_free++; break;
-                       case TRX_UNDO_TO_PURGE: n_undo_state_to_purge++; break;
-                       case TRX_UNDO_PREPARED: n_undo_state_prepared++; break;
-                       default: n_undo_state_other++; break;
-               }
-               break;
-       case FIL_PAGE_INODE:
-               if (per_page_details) {
-                       printf("FIL_PAGE_INODE\n");
-               }
-               n_fil_page_inode++;
-               break;
-       case FIL_PAGE_IBUF_FREE_LIST:
-               if (per_page_details) {
-                       printf("FIL_PAGE_IBUF_FREE_LIST\n");
-               }
-               n_fil_page_ibuf_free_list++;
-               break;
-       case FIL_PAGE_TYPE_ALLOCATED:
-               if (per_page_details) {
-                       printf("FIL_PAGE_TYPE_ALLOCATED\n");
-               }
-               n_fil_page_type_allocated++;
-               break;
-       case FIL_PAGE_IBUF_BITMAP:
-               if (per_page_details) {
-                       printf("FIL_PAGE_IBUF_BITMAP\n");
-               }
-               n_fil_page_ibuf_bitmap++;
-               break;
-       case FIL_PAGE_TYPE_SYS:
-               if (per_page_details) {
-                       printf("FIL_PAGE_TYPE_SYS\n");
-               }
-               n_fil_page_type_sys++;
-               break;
-       case FIL_PAGE_TYPE_TRX_SYS:
-               if (per_page_details) {
-                       printf("FIL_PAGE_TYPE_TRX_SYS\n");
-               }
-               n_fil_page_type_trx_sys++;
-               break;
-       case FIL_PAGE_TYPE_FSP_HDR:
-               if (per_page_details) {
-                       printf("FIL_PAGE_TYPE_FSP_HDR\n");
-               }
-               memcpy(xdes, page, physical_page_size);
-               n_fil_page_type_fsp_hdr++;
-               break;
-       case FIL_PAGE_TYPE_XDES:
-               if (per_page_details) {
-                       printf("FIL_PAGE_TYPE_XDES\n");
-               }
-               memcpy(xdes, page, physical_page_size);
-               n_fil_page_type_xdes++;
-               break;
-       case FIL_PAGE_TYPE_BLOB:
-               if (per_page_details) {
-                       printf("FIL_PAGE_TYPE_BLOB\n");
-               }
-               n_fil_page_type_blob++;
-               break;
-       case FIL_PAGE_TYPE_ZBLOB:
-       case FIL_PAGE_TYPE_ZBLOB2:
-               if (per_page_details) {
-                       printf("FIL_PAGE_TYPE_ZBLOB/2\n");
-               }
-               n_fil_page_type_zblob++;
-               break;
-       case FIL_PAGE_PAGE_COMPRESSED:
-	       if (per_page_details) {
-		       printf("FIL_PAGE_PAGE_COMPRESSED\n");
-	       }
-	       n_fil_page_type_page_compressed++;
-	       break;
-       case FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED:
-	       if (per_page_details) {
-		       printf("FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED\n");
-	       }
-	       n_fil_page_type_page_compressed_encrypted++;
-	       break;
-       default:
-               if (per_page_details) {
-                       printf("FIL_PAGE_TYPE_OTHER\n");
-               }
-               n_fil_page_type_other++;
-       }
-}
-
-void print_index_leaf_stats(unsigned long long id, const per_index_stats& index)
-{
-  ulint page_no = index.first_leaf_page;
-  std::map<ulint, per_page_stats>::const_iterator it_page = index.leaves.find(page_no);
-  printf("\nindex: %llu leaf page stats: n_pages = %llu\n",
-         id, index.leaf_pages);
-  printf("page_no\tdata_size\tn_recs\n");
-  while (it_page != index.leaves.end()) {
-    const per_page_stats& stat = it_page->second;
-    printf("%lu\t%lu\t%lu\n", it_page->first, stat.data_size, stat.n_recs);
-    page_no = stat.right_page_no;
-    it_page = index.leaves.find(page_no);
-  }
-}
-
-void defrag_analysis(unsigned long long id, const per_index_stats& index)
-{
-  // TODO: make it work for compressed pages too
-  std::map<ulint, per_page_stats>::const_iterator it = index.leaves.find(index.first_leaf_page);
-  ulint n_pages = 0;
-  ulint n_leaf_pages = 0;
-  while (it != index.leaves.end()) {
-    ulint data_size_total = 0;
-    for (ulong i = 0; i < n_merge; i++) {
-      const per_page_stats& stat = it->second;
-      n_leaf_pages ++;
-      data_size_total += stat.data_size;
-      it = index.leaves.find(stat.right_page_no);
-      if (it == index.leaves.end()) {
-        break;
-      }
-    }
-    if (index.max_data_size) {
-      n_pages += data_size_total / index.max_data_size;
-      if (data_size_total % index.max_data_size != 0) {
-        n_pages += 1;
-      }
-    }
-  }
-  if (index.leaf_pages)
-    printf("count = %lu free = %lu\n", index.count, index.free_pages);
-    printf("%llu\t\t%llu\t\t%lu\t\t%lu\t\t%lu\t\t%.2f\t%lu\n",
-           id, index.leaf_pages, n_leaf_pages, n_merge, n_pages,
-           1.0 - (double)n_pages / (double)n_leaf_pages, index.max_data_size);
-}
-
-void print_leaf_stats()
-{
-  printf("\n**************************************************\n");
-  printf("index_id\t#leaf_pages\t#actual_leaf_pages\tn_merge\t"
-         "#leaf_after_merge\tdefrag\n");
-  for (std::map<unsigned long long, per_index_stats>::const_iterator it = index_ids.begin(); it != index_ids.end(); it++) {
-    const per_index_stats& index = it->second;
-    if (verbose) {
-      print_index_leaf_stats(it->first, index);
-    }
-    if (n_merge) {
-      defrag_analysis(it->first, index);
-    }
-  }
-}
-
-void
-print_stats()
-/*========*/
-{
-       unsigned long long i;
-
-       printf("%d\tbad checksum\n", n_bad_checksum);
-       printf("%d\tFIL_PAGE_INDEX\n", n_fil_page_index);
-       printf("%d\tFIL_PAGE_UNDO_LOG\n", n_fil_page_undo_log);
-       printf("%d\tFIL_PAGE_INODE\n", n_fil_page_inode);
-       printf("%d\tFIL_PAGE_IBUF_FREE_LIST\n", n_fil_page_ibuf_free_list);
-       printf("%d\tFIL_PAGE_TYPE_ALLOCATED\n", n_fil_page_type_allocated);
-       printf("%d\tFIL_PAGE_IBUF_BITMAP\n", n_fil_page_ibuf_bitmap);
-       printf("%d\tFIL_PAGE_TYPE_SYS\n", n_fil_page_type_sys);
-       printf("%d\tFIL_PAGE_TYPE_TRX_SYS\n", n_fil_page_type_trx_sys);
-       printf("%d\tFIL_PAGE_TYPE_FSP_HDR\n", n_fil_page_type_fsp_hdr);
-       printf("%d\tFIL_PAGE_TYPE_XDES\n", n_fil_page_type_xdes);
-       printf("%d\tFIL_PAGE_TYPE_BLOB\n", n_fil_page_type_blob);
-       printf("%d\tFIL_PAGE_TYPE_ZBLOB\n", n_fil_page_type_zblob);
-       printf("%d\tFIL_PAGE_PAGE_COMPRESSED\n", n_fil_page_type_page_compressed);
-       printf("%d\tFIL_PAGE_PAGE_COMPRESSED_ENCRYPTED\n", n_fil_page_type_page_compressed_encrypted);
-       printf("%d\tother\n", n_fil_page_type_other);
-       printf("%d\tmax index_id\n", n_fil_page_max_index_id);
-       printf("undo type: %d insert, %d update, %d other\n",
-               n_undo_insert, n_undo_update, n_undo_other);
-       printf("undo state: %d active, %d cached, %d to_free, %d to_purge,"
-               " %d prepared, %d other\n", n_undo_state_active,
-               n_undo_state_cached, n_undo_state_to_free,
-               n_undo_state_to_purge, n_undo_state_prepared,
-               n_undo_state_other);
-
-       printf("index_id\t#pages\t\t#leaf_pages\t#recs_per_page"
-               "\t#bytes_per_page\n");
-       for (std::map<unsigned long long, per_index_stats>::const_iterator it = index_ids.begin(); it != index_ids.end(); it++) {
-         const per_index_stats& index = it->second;
-         printf("%lld\t\t%lld\t\t%lld\t\t%lld\t\t%lld\n",
-                it->first, index.pages, index.leaf_pages,
-                index.total_n_recs / index.pages,
-                index.total_data_bytes / index.pages);
-       }
-       printf("\n");
-       printf("index_id\tpage_data_bytes_histgram(empty,...,oversized)\n");
-       for (std::map<unsigned long long, per_index_stats>::const_iterator it = index_ids.begin(); it != index_ids.end(); it++) {
-         printf("%lld\t", it->first);
-         const per_index_stats& index = it->second;
-         for (i = 0; i < SIZE_RANGES_FOR_PAGE+2; i++) {
-           printf("\t%lld", index.pages_in_size_range[i]);
-         }
-         printf("\n");
-       }
-       if (do_leaf) {
-         print_leaf_stats();
-       }
-}
-
-int main(int argc, char **argv)
-{
-  FILE* f;                       /* our input file */
-  char* filename;                /* our input filename. */
-  unsigned char *big_buf= 0, *buf;
-  unsigned char *big_xdes= 0, *xdes;
-  ulong bytes;                   /* bytes read count */
-  ulint ct;                      /* current page number (0 based) */
-  time_t now;                    /* current time */
-  time_t lastt;                  /* last time */
-  ulint oldcsum, oldcsumfield, csum, csumfield, crc32, logseq, logseqfield;
-                                 /* ulints for checksum storage */
-  struct stat st;                /* for stat, if you couldn't guess */
-  unsigned long long int size;   /* size of file (has to be 64 bits) */
-  ulint pages;                   /* number of pages in file */
-  off_t offset= 0;
-  int fd;
-
-  printf("InnoDB offline file checksum utility.\n");
-
-  ut_crc32_init();
-
-  MY_INIT(argv[0]);
-
-  if (get_options(&argc,&argv))
-    exit(1);
-
-  if (verbose)
-    my_print_variables(innochecksum_options);
-
-  /* The file name is not optional */
-  filename = *argv;
-  if (*filename == '\0')
-  {
-    fprintf(stderr, "Error; File name missing\n");
-    goto error_out;
-  }
-
-  /* stat the file to get size and page count */
-  if (stat(filename, &st))
-  {
-    fprintf(stderr, "Error; %s cannot be found\n", filename);
-    goto error_out;
-  }
-  size= st.st_size;
-
-  /* Open the file for reading */
-  f= fopen(filename, "rb");
-  if (f == NULL)
-  {
-    fprintf(stderr, "Error; %s cannot be opened", filename);
-    perror(" ");
-    goto error_out;
-  }
-
-  big_buf = (unsigned char *)malloc(2 * UNIV_PAGE_SIZE_MAX);
-  if (big_buf == NULL)
-  {
-    fprintf(stderr, "Error; failed to allocate memory\n");
-    perror("");
-    goto error_f;
-  }
-
-  /* Make sure the page is aligned */
-  buf = (unsigned char*)ut_align_down(big_buf
-                                      + UNIV_PAGE_SIZE_MAX, UNIV_PAGE_SIZE_MAX);
-
-  big_xdes = (unsigned char *)malloc(2 * UNIV_PAGE_SIZE_MAX);
-  if (big_xdes == NULL)
-  {
-    fprintf(stderr, "Error; failed to allocate memory\n");
-    perror("");
-    goto error_big_buf;
-  }
-
-  /* Make sure the page is aligned */
-  xdes = (unsigned char*)ut_align_down(big_xdes
-                                      + UNIV_PAGE_SIZE_MAX, UNIV_PAGE_SIZE_MAX);
-
-
-  if (!get_page_size(f, buf, &logical_page_size, &physical_page_size))
-    goto error;
-
-  if (compressed)
-  {
-    printf("Table is compressed\n");
-    printf("Key block size is %lu\n", physical_page_size);
-  }
-  else
-  {
-    printf("Table is uncompressed\n");
-    printf("Page size is %lu\n", physical_page_size);
-  }
-
-  pages= (ulint) (size / physical_page_size);
-
-  if (just_count)
-  {
-    if (verbose)
-      printf("Number of pages: ");
-    printf("%lu\n", pages);
-    goto ok;
-  }
-  else if (verbose)
-  {
-    printf("file %s = %llu bytes (%lu pages)...\n", filename, size, pages);
-    if (do_one_page)
-      printf("InnoChecksum; checking page %lu\n", do_page);
-    else
-      printf("InnoChecksum; checking pages in range %lu to %lu\n", start_page, use_end_page ? end_page : (pages - 1));
-  }
-
-#ifdef UNIV_LINUX
-  if (posix_fadvise(fileno(f), 0, 0, POSIX_FADV_SEQUENTIAL) ||
-      posix_fadvise(fileno(f), 0, 0, POSIX_FADV_NOREUSE))
-  {
-    perror("posix_fadvise failed");
-  }
-#endif
-
-  /* seek to the necessary position */
-  if (start_page)
-  {
-    fd= fileno(f);
-    if (!fd)
-    {
-      perror("Error; Unable to obtain file descriptor number");
-      goto error;
-    }
-
-    offset= (off_t)start_page * (off_t)physical_page_size;
-
-    if (lseek(fd, offset, SEEK_SET) != offset)
-    {
-      perror("Error; Unable to seek to necessary offset");
-      goto error;
-    }
-  }
-
-  /* main checksumming loop */
-  ct= start_page;
-  lastt= 0;
-  while (!feof(f))
-  {
-    int page_ok = 1;
-
-    bytes= fread(buf, 1, physical_page_size, f);
-
-    if (!bytes && feof(f))
-      goto ok;
-
-    if (ferror(f))
-    {
-      fprintf(stderr, "Error reading %lu bytes", physical_page_size);
-      perror(" ");
-      goto error;
-    }
-
-    ulint page_type = mach_read_from_2(buf+FIL_PAGE_TYPE);
-    ulint key_version = mach_read_from_4(buf + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
-
-    if (key_version && page_type != FIL_PAGE_PAGE_COMPRESSED) {
-	    encrypted = true;
-    } else {
-	    encrypted = false;
-    }
-
-    ulint comp_method = 0;
-
-    if (encrypted) {
-	    comp_method = mach_read_from_2(buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE);
-    } else {
-	    comp_method = mach_read_from_8(buf+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
-    }
-
-    ulint comp_size = mach_read_from_2(buf+FIL_PAGE_DATA);
-    ib_uint32_t encryption_checksum = mach_read_from_4(buf+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + 4);
-
-
-    if (page_type == FIL_PAGE_PAGE_COMPRESSED) {
-      /* Page compressed tables do not have any checksum */
-      if (debug)
-	fprintf(stderr, "Page %lu page compressed with method %s real_size %lu\n", ct,
-	        fil_get_compression_alg_name(comp_method), comp_size);
-      page_ok = 1;
-    } else if (compressed) {
-        /* compressed pages */
-	ulint crccsum = page_zip_calc_checksum(buf, physical_page_size, SRV_CHECKSUM_ALGORITHM_CRC32);
-	ulint icsum = page_zip_calc_checksum(buf, physical_page_size,  SRV_CHECKSUM_ALGORITHM_INNODB);
-
-        if (debug) {
-	  if (key_version != 0) {
-	    fprintf(stderr,
-		    "Page %lu encrypted key_version %lu calculated = %lu; crc32 = %lu; recorded = %u\n",
-		    ct, key_version, icsum, crccsum, encryption_checksum);
-	  }
-        }
-
-	if (encrypted) {
-	  if (encryption_checksum != 0 && crccsum != encryption_checksum && icsum != encryption_checksum) {
-	    if (debug)
-	      fprintf(stderr, "page %lu: compressed: calculated = %lu; crc32 = %lu; recorded = %u\n",
-		      ct, icsum, crccsum, encryption_checksum);
-	    fprintf(stderr, "Fail; page %lu invalid (fails compressed page checksum).\n", ct);
-	  }
-	} else {
-          if (!page_zip_verify_checksum(buf, physical_page_size)) {
-            fprintf(stderr, "Fail; page %lu invalid (fails compressed page checksum).\n", ct);
-            if (!skip_corrupt)
-              goto error;
-            page_ok = 0;
-          }
+		case SRV_CHECKSUM_ALGORITHM_STRICT_NONE:
+		case SRV_CHECKSUM_ALGORITHM_NONE:
+			srv_checksum_algorithm =
+				SRV_CHECKSUM_ALGORITHM_STRICT_NONE;
+			break;
+		default:
+			return(true);
+		}
+		break;
+	case 'n':
+		no_check = true;
+		break;
+	case 'a':
+	case 'S':
+		break;
+	case 'w':
+		do_write = true;
+		break;
+	case 'D':
+		page_type_dump = true;
+		break;
+	case 'l':
+		is_log_enabled = true;
+		break;
+	case 'I':
+	case '?':
+		usage();
+		exit(EXIT_SUCCESS);
+		break;
 	}
-    } else {
-      if (key_version != 0) {
-      /* Encrypted page */
-        if (debug) {
-	  if (page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED) {
-	    fprintf(stderr,
-		    "Page %lu page compressed with method %s real_size %lu and encrypted key_version %lu checksum %u\n",
-		    ct, fil_get_compression_alg_name(comp_method), comp_size, key_version, encryption_checksum);
-	  } else {
-	    fprintf(stderr,
-		    "Page %lu encrypted key_version %lu checksum %u\n",
-		    ct, key_version, encryption_checksum);
-	  }
-        }
-      }
 
-      /* Page compressed tables do not contain FIL tailer */
-      if (page_type != FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED && page_type != FIL_PAGE_PAGE_COMPRESSED) {
-        /* check the "stored log sequence numbers" */
-         logseq= mach_read_from_4(buf + FIL_PAGE_LSN + 4);
-        logseqfield= mach_read_from_4(buf + logical_page_size - FIL_PAGE_END_LSN_OLD_CHKSUM + 4);
-        if (debug)
-          printf("page %lu: log sequence number: first = %lu; second = %lu\n", ct, logseq, logseqfield);
-        if (logseq != logseqfield)
-        {
-          fprintf(stderr, "Fail; page %lu invalid (fails log sequence number check)\n", ct);
-          if (!skip_corrupt)
-            goto error;
-          page_ok = 0;
-        }
-
-        /* check old method of checksumming */
-        oldcsum= buf_calc_page_old_checksum(buf);
-        oldcsumfield= mach_read_from_4(buf + logical_page_size - FIL_PAGE_END_LSN_OLD_CHKSUM);
-        if (debug)
-          printf("page %lu: old style: calculated = %lu; recorded = %lu\n", ct, oldcsum, oldcsumfield);
-        if (oldcsumfield != mach_read_from_4(buf + FIL_PAGE_LSN) && oldcsumfield != oldcsum)
-        {
-          fprintf(stderr, "Fail;  page %lu invalid (fails old style checksum)\n", ct);
-          if (!skip_corrupt)
-            goto error;
-          page_ok = 0;
-        }
-      }
-
-      /* now check the new method */
-      csum= buf_calc_page_new_checksum(buf);
-      crc32= buf_calc_page_crc32(buf);
-      csumfield= mach_read_from_4(buf + FIL_PAGE_SPACE_OR_CHKSUM);
-
-      if (key_version)
-	      csumfield = encryption_checksum;
-
-      if (debug)
-        printf("page %lu: new style: calculated = %lu; crc32 = %lu; recorded = %lu\n",
-               ct, csum, crc32, csumfield);
-      if (csumfield != 0 && crc32 != csumfield && csum != csumfield)
-      {
-        fprintf(stderr, "Fail; page %lu invalid (fails innodb and crc32 checksum)\n", ct);
-        if (!skip_corrupt)
-          goto error;
-        page_ok = 0;
-      }
-    }
-    /* end if this was the last page we were supposed to check */
-    if (use_end_page && (ct >= end_page))
-      goto ok;
-
-    if (per_page_details)
-    {
-      printf("page %ld ", ct);
-    }
-
-    /* do counter increase and progress printing */
-    ct++;
-
-    if (!page_ok)
-    {
-      if (per_page_details)
-      {
-        printf("BAD_CHECKSUM\n");
-      }
-      n_bad_checksum++;
-      continue;
-    }
-
-    /* Can't parse compressed or/and encrypted pages */
-    if (page_type != FIL_PAGE_PAGE_COMPRESSED && !encrypted) {
-      parse_page(buf, xdes);
-    }
-
-    if (verbose)
-    {
-      if (ct % 64 == 0)
-      {
-        now= time(0);
-        if (!lastt) lastt= now;
-        if (now - lastt >= 1)
-        {
-          printf("page %lu okay: %.3f%% done\n", (ct - 1), (float) ct / pages * 100);
-          lastt= now;
-        }
-      }
-    }
-  }
-
-ok:
-  if (!just_count)
-    print_stats();
-  free(big_xdes);
-  free(big_buf);
-  fclose(f);
-  my_end(0);
-  exit(0);
-
-error:
-  free(big_xdes);
-error_big_buf:
-  free(big_buf);
-error_f:
-  fclose(f);
-error_out:
-  my_end(0);
-  exit(1);
+	return(false);
+}
+
+static
+bool
+get_options(
+	int	*argc,
+	char	***argv)
+{
+	if (handle_options(argc, argv, innochecksum_options,
+		innochecksum_get_one_option))
+		exit(true);
+
+	/* The next arg must be the filename */
+	if (!*argc) {
+		usage();
+		return (true);
+	}
+
+	return (false);
+}
+
+int main(
+	int	argc,
+	char	**argv)
+{
+	/* our input file. */
+	FILE*		fil_in = NULL;
+	/* our input filename. */
+	char*		filename;
+	/* Buffer to store pages read. */
+	byte*		buf = NULL;
+	/* bytes read count */
+	ulong		bytes;
+	/* Buffer to decompress page.*/
+	byte*		tbuf = NULL;
+	/* current time */
+	time_t		now;
+	/* last time */
+	time_t		lastt;
+	/* stat, to get file size. */
+#ifdef _WIN32
+	struct _stat64	st;
+#else
+	struct stat	st;
+#endif /* _WIN32 */
+
+	/* size of file (has to be 64 bits) */
+	unsigned long long int	size		= 0;
+	/* number of pages in file */
+	ulint		pages;
+
+	off_t		offset			= 0;
+	/* count the no. of page corrupted. */
+	ulint		mismatch_count		= 0;
+	/* Variable to ack the page is corrupted or not. */
+	bool		is_corrupted		= false;
+
+	bool		partial_page_read	= false;
+	/* Enabled when read from stdin is done. */
+	bool		read_from_stdin		= false;
+	FILE*		fil_page_type		= NULL;
+	fpos_t		pos;
+
+	/* Use to check the space id of given file. If space_id is zero,
+	then check whether page is doublewrite buffer.*/
+	ulint		space_id = 0UL;
+	/* enable when space_id of given file is zero. */
+	bool		is_system_tablespace = false;
+
+	ut_crc32_init();
+	MY_INIT(argv[0]);
+	DBUG_ENTER("main");
+	DBUG_PROCESS(argv[0]);
+
+	if (get_options(&argc,&argv)) {
+		DBUG_RETURN(1);
+	}
+
+	if (strict_verify && no_check) {
+		fprintf(stderr, "Error: --strict-check option cannot be used "
+			"together with --no-check option.\n");
+		DBUG_RETURN(1);
+	}
+
+	if (no_check && !do_write) {
+		fprintf(stderr, "Error: --no-check must be associated with "
+			"--write option.\n");
+		DBUG_RETURN(1);
+	}
+
+	if (page_type_dump) {
+		fil_page_type = create_file(page_dump_filename);
+		if (!fil_page_type) {
+			DBUG_RETURN(1);
+		}
+	}
+
+	if (is_log_enabled) {
+		log_file = create_file(log_filename);
+		if (!log_file) {
+			DBUG_RETURN(1);
+		}
+		fprintf(log_file, "InnoDB File Checksum Utility.\n");
+	}
+
+	if (verbose) {
+		/* JAN: TODO: MySQL 5.7
+		my_print_variables_ex(innochecksum_options, stderr);
+		*/
+		my_print_variables(innochecksum_options);
+	}
+
+
+	buf = (byte*) malloc(UNIV_PAGE_SIZE_MAX * 2);
+	tbuf = buf + UNIV_PAGE_SIZE_MAX;
+
+	/* The file name is not optional. */
+	for (int i = 0; i < argc; ++i) {
+		/* Reset parameters for each file. */
+		filename = argv[i];
+		memset(&page_type, 0, sizeof(innodb_page_type));
+		is_corrupted = false;
+		partial_page_read = false;
+		skip_page = false;
+
+		if (is_log_enabled) {
+			fprintf(log_file, "Filename = %s\n", filename);
+		}
+
+		if (*filename == '-') {
+			/* read from stdin. */
+			fil_in = stdin;
+			read_from_stdin = true;
+
+		}
+
+		/* stat the file to get size and page count. */
+		if (!read_from_stdin &&
+#ifdef _WIN32
+			_stat64(filename, &st)) {
+#else
+			stat(filename, &st)) {
+#endif /* _WIN32 */
+			fprintf(stderr, "Error: %s cannot be found\n",
+				filename);
+
+			DBUG_RETURN(1);
+		}
+
+		if (!read_from_stdin) {
+			size = st.st_size;
+			fil_in = open_file(filename);
+			/*If fil_in is NULL, terminate as some error encountered */
+			if(fil_in == NULL) {
+				DBUG_RETURN(1);
+			}
+			/* Save the current file pointer in pos variable.*/
+			if (0 != fgetpos(fil_in, &pos)) {
+				perror("fgetpos");
+				DBUG_RETURN(1);
+			}
+		}
+
+		/* Testing for lock mechanism. The innochecksum
+		acquire lock on given file. So other tools accessing the same
+		file for processsing must fail. */
+#ifdef _WIN32
+		DBUG_EXECUTE_IF("innochecksum_cause_mysqld_crash",
+			ut_ad(page_dump_filename);
+			while((_access( page_dump_filename, 0)) == 0) {
+				sleep(1);
+			}
+			DBUG_RETURN(0); );
+#else
+		DBUG_EXECUTE_IF("innochecksum_cause_mysqld_crash",
+			ut_ad(page_dump_filename);
+			struct stat status_buf;
+			while(stat(page_dump_filename, &status_buf) == 0) {
+				sleep(1);
+			}
+			DBUG_RETURN(0); );
+#endif /* _WIN32 */
+
+		/* Read the minimum page size. */
+		bytes = ulong(fread(buf, 1, UNIV_ZIP_SIZE_MIN, fil_in));
+		partial_page_read = true;
+
+		if (bytes != UNIV_ZIP_SIZE_MIN) {
+			fprintf(stderr, "Error: Was not able to read the "
+				"minimum page size ");
+			fprintf(stderr, "of %d bytes.  Bytes read was %lu\n",
+				UNIV_ZIP_SIZE_MIN, bytes);
+
+			free(buf);
+			DBUG_RETURN(1);
+		}
+
+		/* enable variable is_system_tablespace when space_id of given
+		file is zero. Use to skip the checksum verification and rewrite
+		for doublewrite pages. */
+		is_system_tablespace = (!memcmp(&space_id, buf +
+					FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 4))
+					? true : false;
+
+		const page_size_t&	page_size = get_page_size(buf);
+
+		pages = (ulint) (size / page_size.physical());
+
+		if (just_count) {
+			if (read_from_stdin) {
+				fprintf(stderr, "Number of pages:%lu\n", pages);
+			} else {
+				printf("Number of pages:%lu\n", pages);
+			}
+			continue;
+		} else if (verbose && !read_from_stdin) {
+			if (is_log_enabled) {
+				fprintf(log_file, "file %s = %llu bytes "
+					"(%lu pages)\n", filename, size, pages);
+				if (do_one_page) {
+					fprintf(log_file, "Innochecksum: "
+						"checking page %lu \n",
+						do_page);
+				}
+			}
+		} else {
+			if (is_log_enabled) {
+				fprintf(log_file, "Innochecksum: checking "
+					"pages in range %lu to %lu\n",
+					start_page, use_end_page ?
+					end_page : (pages - 1));
+			}
+		}
+
+		/* seek to the necessary position */
+		if (start_page) {
+			if (!read_from_stdin) {
+				/* If read is not from stdin, we can use
+				fseeko() to position the file pointer to
+				the desired page. */
+				partial_page_read = false;
+
+				offset = (off_t) start_page
+					* (off_t) page_size.physical();
+#ifdef _WIN32
+				if (_fseeki64(fil_in, offset, SEEK_SET)) {
+#else
+				if (fseeko(fil_in, offset, SEEK_SET)) {
+#endif /* _WIN32 */
+					perror("Error: Unable to seek to "
+						"necessary offset");
+
+					free(buf);
+					DBUG_RETURN(1);
+				}
+				/* Save the current file pointer in
+				pos variable. */
+				if (0 != fgetpos(fil_in, &pos)) {
+					perror("fgetpos");
+
+					free(buf);
+					DBUG_RETURN(1);
+				}
+			} else {
+
+				ulong count = 0;
+
+				while (!feof(fil_in)) {
+					if (start_page == count) {
+						break;
+					}
+					/* We read a part of page to find the
+					minimum page size. We cannot reset
+					the file pointer to the beginning of
+					the page if we are reading from stdin
+					(fseeko() on stdin doesn't work). So
+					read only the remaining part of page,
+					if partial_page_read is enable. */
+					bytes = read_file(buf,
+							  partial_page_read,
+							  static_cast<ulong>(
+							  page_size.physical()),
+							  fil_in);
+
+					partial_page_read = false;
+					count++;
+
+					if (!bytes || feof(fil_in)) {
+						fprintf(stderr, "Error: Unable "
+							"to seek to necessary "
+							"offset");
+
+						free(buf);
+						DBUG_RETURN(1);
+					}
+				}
+			}
+		}
+
+		if (page_type_dump) {
+			fprintf(fil_page_type,
+				"\n\nFilename::%s\n", filename);
+			fprintf(fil_page_type,
+				"========================================"
+				"======================================\n");
+			fprintf(fil_page_type,
+				"\tPAGE_NO\t\t|\t\tPAGE_TYPE\t\t"
+				"\t|\tEXTRA INFO\n");
+			fprintf(fil_page_type,
+				"========================================"
+				"======================================\n");
+		}
+
+		/* main checksumming loop */
+		cur_page_num = start_page;
+		lastt = 0;
+		while (!feof(fil_in)) {
+
+			bytes = read_file(buf, partial_page_read,
+					  static_cast<ulong>(
+					  page_size.physical()), fil_in);
+			partial_page_read = false;
+
+			if (!bytes && feof(fil_in)) {
+				break;
+			}
+
+			if (ferror(fil_in)) {
+				fprintf(stderr, "Error reading %lu bytes",
+					page_size.physical());
+				perror(" ");
+
+				free(buf);
+				DBUG_RETURN(1);
+			}
+
+			if (bytes != page_size.physical()) {
+				fprintf(stderr, "Error: bytes read (%lu) "
+					"doesn't match page size (%lu)\n",
+					bytes, page_size.physical());
+				free(buf);
+				DBUG_RETURN(1);
+			}
+
+			if (is_system_tablespace) {
+				/* enable when page is double write buffer.*/
+				skip_page = is_page_doublewritebuffer(buf);
+			} else {
+				skip_page = false;
+
+				if (!page_decompress(buf, tbuf, page_size)) {
+
+					fprintf(stderr,
+						"Page decompress failed");
+
+					free(buf);
+					DBUG_RETURN(1);
+				}
+			}
+
+			/* If no-check is enabled, skip the
+			checksum verification.*/
+			if (!no_check) {
+				/* Checksum verification */
+				if (!skip_page) {
+					is_corrupted = is_page_corrupted(
+						buf, page_size);
+
+					if (is_corrupted) {
+						fprintf(stderr, "Fail: page "
+							"%lu invalid\n",
+							cur_page_num);
+
+						mismatch_count++;
+
+						if(mismatch_count > allow_mismatches) {
+							fprintf(stderr,
+								"Exceeded the "
+								"maximum allowed "
+								"checksum mismatch "
+								"count::%lu\n",
+								allow_mismatches);
+
+							free(buf);
+							DBUG_RETURN(1);
+						}
+					}
+				}
+			}
+
+			/* Rewrite checksum */
+			if (do_write
+			    && !write_file(filename, fil_in, buf,
+					   page_size.is_compressed(), &pos,
+					   static_cast<ulong>(page_size.physical()))) {
+
+				free(buf);
+				DBUG_RETURN(1);
+			}
+
+			/* end if this was the last page we were supposed to check */
+			if (use_end_page && (cur_page_num >= end_page)) {
+				break;
+			}
+
+			if (page_type_summary || page_type_dump) {
+				parse_page(buf, fil_page_type);
+			}
+
+			/* do counter increase and progress printing */
+			cur_page_num++;
+			if (verbose && !read_from_stdin) {
+				if ((cur_page_num % 64) == 0) {
+					now = time(0);
+					if (!lastt) {
+						lastt= now;
+					}
+					if (now - lastt >= 1
+					    && is_log_enabled) {
+						fprintf(log_file, "page %lu "
+							"okay: %.3f%% done\n",
+							(cur_page_num - 1),
+							(float) cur_page_num / pages * 100);
+						lastt = now;
+					}
+				}
+			}
+		}
+
+		if (!read_from_stdin) {
+			/* flcose() will flush the data and release the lock if
+			any acquired. */
+			fclose(fil_in);
+		}
+
+		/* Enabled for page type summary. */
+		if (page_type_summary) {
+			if (!read_from_stdin) {
+				fprintf(stdout, "\nFile::%s",filename);
+				print_summary(stdout);
+			} else {
+				print_summary(stderr);
+			}
+		}
+	}
+
+	if (is_log_enabled) {
+		fclose(log_file);
+	}
+
+	free(buf);
+	DBUG_RETURN(0);
 }
diff --git a/include/CMakeLists.txt b/include/CMakeLists.txt
index d0c4768e882..8ebd72b9c58 100644
--- a/include/CMakeLists.txt
+++ b/include/CMakeLists.txt
@@ -25,6 +25,7 @@ SET(HEADERS
   mysql.h 
   mysql_com.h
   mysql_com_server.h
+  pack.h
   my_byteorder.h
   byte_order_generic.h
   byte_order_generic_x86.h
diff --git a/include/base64.h b/include/base64.h
index 9a843b5088e..cb5ac5e0b5e 100644
--- a/include/base64.h
+++ b/include/base64.h
@@ -22,34 +22,34 @@ extern "C" {
 #endif
 
 /*
-  Calculate how much memory needed for dst of base64_encode()
+  Calculate how much memory needed for dst of my_base64_encode()
 */
-int base64_needed_encoded_length(int length_of_data);
+int my_base64_needed_encoded_length(int length_of_data);
 
 /*
-  Maximum length base64_encode_needed_length() can accept with no overflow.
+  Maximum length my_base64_encode_needed_length() can accept with no overflow.
 */
-int base64_encode_max_arg_length(void);
+int my_base64_encode_max_arg_length(void);
 
 /*
-  Calculate how much memory needed for dst of base64_decode()
+  Calculate how much memory needed for dst of my_base64_decode()
 */
-int base64_needed_decoded_length(int length_of_encoded_data);
+int my_base64_needed_decoded_length(int length_of_encoded_data);
 
 /*
-  Maximum length base64_decode_needed_length() can accept with no overflow.
+  Maximum length my_base64_decode_needed_length() can accept with no overflow.
 */
-int base64_decode_max_arg_length();
+int my_base64_decode_max_arg_length();
 
 /*
   Encode data as a base64 string
 */
-int base64_encode(const void *src, size_t src_len, char *dst);
+int my_base64_encode(const void *src, size_t src_len, char *dst);
 
 /*
   Decode a base64 string into data
 */
-int base64_decode(const char *src, size_t src_len,
+int my_base64_decode(const char *src, size_t src_len,
                   void *dst, const char **end_ptr, int flags);
 
 /* Allow multuple chunks 'AAA= AA== AA==', binlog uses this */
diff --git a/include/dur_prop.h b/include/dur_prop.h
new file mode 100644
index 00000000000..558ce5acc01
--- /dev/null
+++ b/include/dur_prop.h
@@ -0,0 +1,32 @@
+/* Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
+
+#ifndef _my_dur_prop_h
+#define _my_dur_prop_h
+
+enum durability_properties
+{
+  /*
+    Preserves the durability properties defined by the engine
+  */
+  HA_REGULAR_DURABILITY= 0,
+  /*
+     Ignore the durability properties defined by the engine and
+     write only in-memory entries.
+  */
+  HA_IGNORE_DURABILITY= 1
+};
+
+#endif /* _my_dur_prop_h */
diff --git a/include/my_base.h b/include/my_base.h
index 1317639c528..f5842685f9d 100644
--- a/include/my_base.h
+++ b/include/my_base.h
@@ -497,13 +497,19 @@ enum ha_base_keytype {
 #define HA_ERR_DISK_FULL          189
 #define HA_ERR_INCOMPATIBLE_DEFINITION 190
 #define HA_ERR_FTS_TOO_MANY_WORDS_IN_PHRASE 191 /* Too many words in a phrase */
-#define HA_ERR_DECRYPTION_FAILED  192 /* Table encrypted but
-						decypt failed */
-#define HA_ERR_LAST               192    /* Copy of last error nr */
+#define HA_ERR_DECRYPTION_FAILED  192 /* Table encrypted but decypt failed */
+#define HA_ERR_FK_DEPTH_EXCEEDED  193 /* FK cascade depth exceeded */
+#define HA_ERR_TABLESPACE_MISSING 194  /* Missing Tablespace */
+#define HA_ERR_LAST               194  /* Copy of last error nr * */
 
 /* Number of different errors */
 #define HA_ERR_ERRORS            (HA_ERR_LAST - HA_ERR_FIRST + 1)
 
+/* aliases */
+#define HA_ERR_TABLE_CORRUPT HA_ERR_WRONG_IN_RECORD
+#define HA_ERR_QUERY_INTERRUPTED HA_ERR_ABORTED_BY_USER
+#define HA_ERR_NOT_ALLOWED_COMMAND HA_ERR_WRONG_COMMAND
+
 	/* Other constants */
 
 #define HA_NAMELEN 64			/* Max length of saved filename */
@@ -632,17 +638,4 @@ C_MODE_START
 typedef void (* invalidator_by_filename)(const char * filename);
 C_MODE_END
 
-
-enum durability_properties
-{
-  /*
-    Preserves the durability properties defined by the engine */
-  HA_REGULAR_DURABILITY= 0,
-  /* 
-     Ignore the durability properties defined by the engine and
-     write only in-memory entries.
-  */
-  HA_IGNORE_DURABILITY= 1
-};
-
 #endif /* _my_base_h */
diff --git a/include/my_global.h b/include/my_global.h
index 1cf3f217549..0f24570c2f7 100644
--- a/include/my_global.h
+++ b/include/my_global.h
@@ -817,6 +817,7 @@ static inline bool isfinite(double x) { return std::isfinite(x); }
 #ifndef HAVE_ISNAN
 #define isnan(x) ((x) != (x))
 #endif
+#define my_isnan(x) isnan(x)
 
 #ifdef HAVE_ISINF
 #define my_isinf(X) isinf(X)
@@ -1250,4 +1251,13 @@ static inline double rint(double x)
   #endif
 #endif
 
+#define FLOATING_POINT_DECIMALS 31
+
+/* Keep client compatible with earlier versions */
+#ifdef MYSQL_SERVER
+#define NOT_FIXED_DEC           DECIMAL_NOT_SPECIFIED
+#else
+#define NOT_FIXED_DEC           FLOATING_POINT_DECIMALS
+#endif
+
 #endif /* my_global_h */
diff --git a/include/my_handler_errors.h b/include/my_handler_errors.h
index 5af6a359348..bdea4f71eaf 100644
--- a/include/my_handler_errors.h
+++ b/include/my_handler_errors.h
@@ -23,16 +23,18 @@
 
 static const char *handler_error_messages[]=
 {
+  /* 120 */
   "Didn't find key on read or update",
   "Duplicate key on write or update",
   "Internal (unspecified) error in handler",
   "Someone has changed the row since it was read (while the table was locked to prevent it)",
   "Wrong index given to function",
   "Undefined handler error 125",
-  "Index file is crashed",
-  "Record file is crashed",
+  "Index is corrupted",
+  "Table file is corrupted",
   "Out of memory in engine",
   "Undefined handler error 129",
+  /* 130 */
   "Incorrect file format",
   "Command not supported by database",
   "Old database file",
@@ -43,6 +45,7 @@ static const char *handler_error_messages[]=
   "No more records (read after end of file)",
   "Unsupported extension used for table",
   "Too big row",
+  /* 140 */
   "Wrong create options",
   "Duplicate unique key or constraint on write or update",
   "Unknown character set used in table",
@@ -53,6 +56,7 @@ static const char *handler_error_messages[]=
   "Lock table is full;  Restart program with a larger lock table",
   "Updates are not allowed under a read only transactions",
   "Lock deadlock; Retry transaction",
+  /* 150 */
   "Foreign key constraint is incorrectly formed",
   "Cannot add a child row",
   "Cannot delete a parent row",
@@ -63,6 +67,7 @@ static const char *handler_error_messages[]=
   "Could not connect to storage engine",
   "Unexpected null pointer found when using spatial index",
   "The table changed in storage engine",
+  /* 160 */
   "There's no partition in table for the given value",
   "Row-based binary logging of row failed",
   "Index needed in foreign key constraint",
@@ -73,6 +78,7 @@ static const char *handler_error_messages[]=
   "Failed to set row auto increment value",
   "Unknown (generic) error from engine",
   "Record was not update. Original values was same as new values",
+  /* 170 */
   "It is not possible to log this statement",
   "The event was corrupt, leading to illegal data being read",
   "The table is of a new format not supported by this version",
@@ -83,6 +89,7 @@ static const char *handler_error_messages[]=
   "Too many active concurrent transactions",
   "Record not matching the given partition set",
   "Index column length exceeds limit",
+  /* 180 */
   "Index corrupted",
   "Undo record too big",
   "Invalid InnoDB FTS Doc ID",
@@ -93,9 +100,12 @@ static const char *handler_error_messages[]=
   "Row is not visible by the current transaction",
   "Operation was interrupted by end user (probably kill command?)",
   "Disk full",
+  /* 190 */
   "Incompatible key or row definition between the MariaDB .frm file and the information in the storage engine. You have to dump and restore the table to fix this",
   "Too many words in a FTS phrase or proximity search",
-  "Table encrypted but decryption failed. This could be because correct encryption management plugin is not loaded, used encryption key is not available or encryption method does not match."
+  "Table encrypted but decryption failed. This could be because correct encryption management plugin is not loaded, used encryption key is not available or encryption method does not match.",
+  "Foreign key cascade delete/update exceeds max depth",
+  "Tablespace is missing for table"
 };
 
 #endif /* MYSYS_MY_HANDLER_ERRORS_INCLUDED */
diff --git a/include/my_pthread.h b/include/my_pthread.h
index 0d0e5d51bac..bf61a024390 100644
--- a/include/my_pthread.h
+++ b/include/my_pthread.h
@@ -266,7 +266,7 @@ struct tm *gmtime_r(const time_t *clock, struct tm *res);
 #undef	pthread_detach_this_thread
 #define pthread_detach_this_thread() { pthread_t tmp=pthread_self() ; pthread_detach(&tmp); }
 #else /* HAVE_PTHREAD_ATTR_CREATE && !HAVE_SIGWAIT */
-#define HAVE_PTHREAD_KILL
+#define HAVE_PTHREAD_KILL 1
 #endif
 
 #endif /* defined(__WIN__) */
diff --git a/include/my_sys.h b/include/my_sys.h
index 7b7158573b4..25554701a8c 100644
--- a/include/my_sys.h
+++ b/include/my_sys.h
@@ -1019,6 +1019,7 @@ extern void add_compiled_collation(struct charset_info_st *cs);
 extern size_t escape_string_for_mysql(CHARSET_INFO *charset_info,
                                       char *to, size_t to_length,
                                       const char *from, size_t length);
+extern char *get_tty_password(const char *opt_message);
 #ifdef __WIN__
 #define BACKSLASH_MBTAIL
 /* File system character set */
diff --git a/include/my_time.h b/include/my_time.h
index a5fe7858797..8dc1f09ba0f 100644
--- a/include/my_time.h
+++ b/include/my_time.h
@@ -171,6 +171,10 @@ static inline my_bool validate_timestamp_range(const MYSQL_TIME *t)
   return TRUE;
 }
 
+/* Can't include mysqld_error.h, it needs mysys to build, thus hardcode 2 error values here. */
+#define ER_WARN_DATA_OUT_OF_RANGE 1264
+#define ER_WARN_INVALID_TIMESTAMP 1299
+
 my_time_t 
 my_system_gmt_sec(const MYSQL_TIME *t, long *my_timezone, uint *error_code);
 
diff --git a/include/my_valgrind.h b/include/my_valgrind.h
index 0662f5dce71..9ceb49c1094 100644
--- a/include/my_valgrind.h
+++ b/include/my_valgrind.h
@@ -41,3 +41,5 @@
 #define TRASH_FREE(A,B) TRASH_FILL(A,B,0x8F)
 #define TRASH(A,B) TRASH_FREE(A,B)
 
+# define DBUG_ASSERT_DEFINED(x) \
+  DBUG_ASSERT(!VALGRIND_CHECK_MEM_IS_DEFINED(&(x), sizeof(x)))
diff --git a/include/mysql/psi/psi_base.h b/include/mysql/psi/psi_base.h
new file mode 100644
index 00000000000..10593c4dab4
--- /dev/null
+++ b/include/mysql/psi/psi_base.h
@@ -0,0 +1,147 @@
+/* Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved.
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; version 2 of the License.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software Foundation,
+  51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
+
+#ifndef MYSQL_PSI_BASE_H
+#define MYSQL_PSI_BASE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+  @file mysql/psi/psi_base.h
+  Performance schema instrumentation interface.
+
+  @defgroup Instrumentation_interface Instrumentation Interface
+  @ingroup Performance_schema
+  @{
+*/
+
+#define PSI_INSTRUMENT_ME 0
+
+#define PSI_NOT_INSTRUMENTED 0
+
+/**
+  Global flag.
+  This flag indicate that an instrumentation point is a global variable,
+  or a singleton.
+*/
+#define PSI_FLAG_GLOBAL (1 << 0)
+
+/**
+  Mutable flag.
+  This flag indicate that an instrumentation point is a general placeholder,
+  that can mutate into a more specific instrumentation point.
+*/
+#define PSI_FLAG_MUTABLE (1 << 1)
+
+#define PSI_FLAG_THREAD (1 << 2)
+
+/**
+  Stage progress flag.
+  This flag apply to the stage instruments only.
+  It indicates the instrumentation provides progress data.
+*/
+#define PSI_FLAG_STAGE_PROGRESS (1 << 3)
+
+/**
+  Shared Exclusive flag.
+  Indicates that rwlock support the shared exclusive state.
+*/
+#define PSI_RWLOCK_FLAG_SX (1 << 4)
+
+/**
+  Transferable flag.
+  This flag indicate that an instrumented object can
+  be created by a thread and destroyed by another thread.
+*/
+#define PSI_FLAG_TRANSFER (1 << 5)
+
+#ifdef HAVE_PSI_INTERFACE
+
+/**
+  @def PSI_VERSION_1
+  Performance Schema Interface number for version 1.
+  This version is supported.
+*/
+#define PSI_VERSION_1 1
+
+/**
+  @def PSI_VERSION_2
+  Performance Schema Interface number for version 2.
+  This version is not implemented, it's a placeholder.
+*/
+#define PSI_VERSION_2 2
+
+/**
+  @def PSI_CURRENT_VERSION
+  Performance Schema Interface number for the most recent version.
+  The most current version is @c PSI_VERSION_1
+*/
+#define PSI_CURRENT_VERSION 1
+
+/**
+  @def USE_PSI_1
+  Define USE_PSI_1 to use the interface version 1.
+*/
+
+/**
+  @def USE_PSI_2
+  Define USE_PSI_2 to use the interface version 2.
+*/
+
+/**
+  @def HAVE_PSI_1
+  Define HAVE_PSI_1 if the interface version 1 needs to be compiled in.
+*/
+
+/**
+  @def HAVE_PSI_2
+  Define HAVE_PSI_2 if the interface version 2 needs to be compiled in.
+*/
+
+#ifndef USE_PSI_2
+#ifndef USE_PSI_1
+#define USE_PSI_1
+#endif
+#endif
+
+#ifdef USE_PSI_1
+#define HAVE_PSI_1
+#endif
+
+#ifdef USE_PSI_2
+#define HAVE_PSI_2
+#endif
+
+/*
+  Allow to override PSI_XXX_CALL at compile time
+  with more efficient implementations, if available.
+  If nothing better is available,
+  make a dynamic call using the PSI_server function pointer.
+*/
+
+#define PSI_DYNAMIC_CALL(M) PSI_server->M
+
+#endif /* HAVE_PSI_INTERFACE */
+
+/** @} */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* MYSQL_PSI_BASE_H */
+
diff --git a/include/mysql/psi/psi_memory.h b/include/mysql/psi/psi_memory.h
new file mode 100644
index 00000000000..725b3ed77d0
--- /dev/null
+++ b/include/mysql/psi/psi_memory.h
@@ -0,0 +1,155 @@
+/* Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; version 2 of the License.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software Foundation,
+  51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
+
+#ifndef MYSQL_PSI_MEMORY_H
+#define MYSQL_PSI_MEMORY_H
+
+#include "psi_base.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+  @file mysql/psi/psi_memory.h
+  Performance schema instrumentation interface.
+
+  @defgroup Instrumentation_interface Instrumentation Interface
+  @ingroup Performance_schema
+  @{
+*/
+
+#ifdef HAVE_PSI_INTERFACE
+#ifndef DISABLE_ALL_PSI
+#ifndef DISABLE_PSI_MEMORY
+#define HAVE_PSI_MEMORY_INTERFACE
+#endif /* DISABLE_PSI_MEMORY */
+#endif /* DISABLE_ALL_PSI */
+#endif /* HAVE_PSI_INTERFACE */
+
+struct PSI_thread;
+
+/**
+  Instrumented memory key.
+  To instrument memory, a memory key must be obtained using @c register_memory.
+  Using a zero key always disable the instrumentation.
+*/
+typedef unsigned int PSI_memory_key;
+
+#ifdef HAVE_PSI_1
+
+/**
+  @defgroup Group_PSI_v1 Application Binary Interface, version 1
+  @ingroup Instrumentation_interface
+  @{
+*/
+
+/**
+  Memory instrument information.
+  @since PSI_VERSION_1
+  This structure is used to register instrumented memory.
+*/
+struct PSI_memory_info_v1
+{
+  /** Pointer to the key assigned to the registered memory. */
+  PSI_memory_key *m_key;
+  /** The name of the memory instrument to register. */
+  const char *m_name;
+  /**
+    The flags of the socket instrument to register.
+    @sa PSI_FLAG_GLOBAL
+  */
+  int m_flags;
+};
+typedef struct PSI_memory_info_v1 PSI_memory_info_v1;
+
+/**
+  Memory registration API.
+  @param category a category name (typically a plugin name)
+  @param info an array of memory info to register
+  @param count the size of the info array
+*/
+typedef void (*register_memory_v1_t)
+  (const char *category, struct PSI_memory_info_v1 *info, int count);
+
+/**
+  Instrument memory allocation.
+  @param key the memory instrument key
+  @param size the size of memory allocated
+  @param[out] owner the memory owner
+  @return the effective memory instrument key
+*/
+typedef PSI_memory_key (*memory_alloc_v1_t)
+  (PSI_memory_key key, size_t size, struct PSI_thread ** owner);
+
+/**
+  Instrument memory re allocation.
+  @param key the memory instrument key
+  @param old_size the size of memory previously allocated
+  @param new_size the size of memory re allocated
+  @param[in, out] owner the memory owner
+  @return the effective memory instrument key
+*/
+typedef PSI_memory_key (*memory_realloc_v1_t)
+  (PSI_memory_key key, size_t old_size, size_t new_size, struct PSI_thread ** owner);
+
+/**
+  Instrument memory claim.
+  @param key the memory instrument key
+  @param size the size of memory allocated
+  @param[in, out] owner the memory owner
+  @return the effective memory instrument key
+*/
+typedef PSI_memory_key (*memory_claim_v1_t)
+  (PSI_memory_key key, size_t size, struct PSI_thread ** owner);
+
+/**
+  Instrument memory free.
+  @param key the memory instrument key
+  @param size the size of memory allocated
+  @param owner the memory owner
+*/
+typedef void (*memory_free_v1_t)
+  (PSI_memory_key key, size_t size, struct PSI_thread * owner);
+
+/** @} (end of group Group_PSI_v1) */
+
+#endif /* HAVE_PSI_1 */
+
+#ifdef HAVE_PSI_2
+struct PSI_memory_info_v2
+{
+  int placeholder;
+};
+
+#endif /* HAVE_PSI_2 */
+
+#ifdef USE_PSI_1
+typedef struct PSI_memory_info_v1 PSI_memory_info;
+#endif
+
+#ifdef USE_PSI_2
+typedef struct PSI_memory_info_v2 PSI_memory_info;
+#endif
+
+/** @} (end of group Instrumentation_interface) */
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* MYSQL_PSI_MEMORY_H */
+
diff --git a/include/mysql_com.h b/include/mysql_com.h
index 96b872ace37..461800f3ce7 100644
--- a/include/mysql_com.h
+++ b/include/mysql_com.h
@@ -680,12 +680,7 @@ my_bool my_thread_init(void);
 void my_thread_end(void);
 
 #ifdef MY_GLOBAL_INCLUDED
-ulong STDCALL net_field_length(uchar **packet);
-my_ulonglong net_field_length_ll(uchar **packet);
-my_ulonglong safe_net_field_length_ll(uchar **packet, size_t packet_len);
-uchar *net_store_length(uchar *pkg, ulonglong length);
-uchar *safe_net_store_length(uchar *pkg, size_t pkg_len, ulonglong length);
-unsigned int net_length_size(ulonglong num);
+#include "pack.h"
 #endif
 
 #ifdef __cplusplus
@@ -702,12 +697,5 @@ unsigned int net_length_size(ulonglong num);
   decimals
 */
 
-#define FLOATING_POINT_DECIMALS 31
 
-/* Keep client compatible with earlier versions */
-#ifdef MYSQL_SERVER
-#define NOT_FIXED_DEC           DECIMAL_NOT_SPECIFIED
-#else
-#define NOT_FIXED_DEC           FLOATING_POINT_DECIMALS
-#endif
 #endif
diff --git a/include/pack.h b/include/pack.h
new file mode 100644
index 00000000000..f991e72326b
--- /dev/null
+++ b/include/pack.h
@@ -0,0 +1,29 @@
+/* Copyright (c) 2016, MariaDB
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+ulong net_field_length(uchar **packet);
+my_ulonglong net_field_length_ll(uchar **packet);
+my_ulonglong safe_net_field_length_ll(uchar **packet, size_t packet_len);
+uchar *net_store_length(uchar *pkg, ulonglong length);
+uchar *safe_net_store_length(uchar *pkg, size_t pkg_len, ulonglong length);
+unsigned int net_length_size(ulonglong num);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/include/sql_common.h b/include/sql_common.h
index 39b8ce18517..49616f6d56c 100644
--- a/include/sql_common.h
+++ b/include/sql_common.h
@@ -77,9 +77,13 @@ typedef struct st_mysql_methods
 #endif
 } MYSQL_METHODS;
 
+#ifdef LIBMARIADB
+#define simple_command(mysql, command, arg, length, skip_check) ma_simple_command(mysql, command, (char *)arg, length, skip_check, NULL)
+#else
 #define simple_command(mysql, command, arg, length, skip_check) \
   (*(mysql)->methods->advanced_command)(mysql, command, 0,  \
                                         0, arg, length, skip_check, NULL)
+#endif
 #define stmt_command(mysql, command, arg, length, stmt) \
   (*(mysql)->methods->advanced_command)(mysql, command, 0,  \
                                         0, arg, length, 1, stmt)
@@ -109,8 +113,9 @@ void set_mysql_extended_error(MYSQL *mysql, int errcode, const char *sqlstate,
                               const char *format, ...);
 
 /* client side of the pluggable authentication */
+struct st_vio;
 struct st_plugin_vio_info;
-void mpvio_info(Vio *vio, struct st_plugin_vio_info *info);
+void mpvio_info(struct st_vio *vio, struct st_plugin_vio_info *info);
 int run_plugin_auth(MYSQL *mysql, char *data, uint data_len,
                     const char *data_plugin, const char *db);
 int mysql_client_plugin_init();
diff --git a/libmariadb b/libmariadb
new file mode 160000
index 00000000000..63bdcec6437
--- /dev/null
+++ b/libmariadb
@@ -0,0 +1 @@
+Subproject commit 63bdcec64372006b872273d5249068ab044b3f72
diff --git a/libmysql/CMakeLists.txt b/libmysql/CMakeLists.txt
deleted file mode 100644
index 5eb89c19f45..00000000000
--- a/libmysql/CMakeLists.txt
+++ /dev/null
@@ -1,524 +0,0 @@
-# Copyright (c) 2006, 2013, Oracle and/or its affiliates.
-# Copyright (c) 2009, 2013, SkySQL Ab.
-# 
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; version 2 of the License.
-# 
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-# 
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
-
-INCLUDE_DIRECTORIES(
-  ${CMAKE_SOURCE_DIR}/include
-  ${CMAKE_SOURCE_DIR}/libmysql
-  ${PCRE_INCLUDES}
-  ${CMAKE_SOURCE_DIR}/strings
-  ${SSL_INCLUDE_DIRS}
-  ${SSL_INTERNAL_INCLUDE_DIRS}
-  ${ZLIB_INCLUDE_DIR})
-ADD_DEFINITIONS(${SSL_DEFINES})
-
-SET(CLIENT_API_FUNCTIONS_5_1
-get_tty_password
-mysql_thread_end
-mysql_thread_init
-myodbc_remove_escape
-mysql_affected_rows
-mysql_autocommit
-mysql_stmt_bind_param
-mysql_stmt_bind_result
-mysql_change_user
-mysql_character_set_name
-mysql_close
-mysql_commit
-mysql_data_seek
-mysql_debug
-mysql_dump_debug_info
-mysql_eof
-mysql_errno
-mysql_error
-mysql_escape_string
-mysql_hex_string
-mysql_stmt_execute
-mysql_stmt_fetch
-mysql_stmt_fetch_column
-mysql_fetch_field
-mysql_fetch_field_direct
-mysql_fetch_fields
-mysql_fetch_lengths
-mysql_fetch_row
-mysql_field_count
-mysql_field_seek
-mysql_field_tell
-mysql_free_result
-mysql_get_parameters
-mysql_get_client_info
-mysql_get_host_info
-mysql_get_proto_info
-mysql_get_server_info
-mysql_get_client_version
-mysql_get_ssl_cipher
-mysql_info
-mysql_init
-mysql_insert_id
-mysql_kill
-mysql_set_server_option
-mysql_list_dbs
-mysql_list_fields
-mysql_list_processes
-mysql_list_tables
-mysql_more_results
-mysql_next_result
-mysql_num_fields
-mysql_num_rows
-mysql_options
-mysql_stmt_param_count
-mysql_stmt_param_metadata
-mysql_ping
-mysql_stmt_result_metadata
-mysql_query
-mysql_read_query_result
-mysql_real_connect
-mysql_real_escape_string
-mysql_real_query
-mysql_refresh
-mysql_rollback
-mysql_row_seek
-mysql_row_tell
-mysql_select_db
-mysql_stmt_send_long_data
-mysql_send_query
-mysql_shutdown
-mysql_ssl_set
-mysql_stat
-mysql_stmt_affected_rows
-mysql_stmt_close
-mysql_stmt_reset
-mysql_stmt_data_seek
-mysql_stmt_errno
-mysql_stmt_error
-mysql_stmt_free_result
-mysql_stmt_num_rows
-mysql_stmt_row_seek
-mysql_stmt_row_tell
-mysql_stmt_store_result
-mysql_store_result
-mysql_thread_id
-mysql_thread_safe
-mysql_use_result
-mysql_warning_count
-mysql_stmt_sqlstate
-mysql_sqlstate
-mysql_get_server_version
-mysql_stmt_prepare
-mysql_stmt_init
-mysql_stmt_insert_id
-mysql_stmt_attr_get
-mysql_stmt_attr_set
-mysql_stmt_field_count
-mysql_set_local_infile_default
-mysql_set_local_infile_handler
-mysql_embedded
-mysql_server_init
-mysql_server_end
-mysql_set_character_set
-mysql_get_character_set_info
-# These are documented in Paul DuBois' MySQL book,
-# so we treat them as part of the de-facto API.
-handle_options
-load_defaults
-free_defaults
-my_print_help
-)
-
-SET(CLIENT_API_FUNCTIONS_5_5
-my_progname
-mysql_stmt_next_result
-# Charsets
-my_charset_bin
-my_charset_latin1
-my_charset_utf8_general_ci
-# Client plugins
-mysql_client_find_plugin
-mysql_client_register_plugin
-mysql_load_plugin
-mysql_load_plugin_v
-mysql_plugin_options
-# Async API
-mysql_get_timeout_value
-mysql_get_timeout_value_ms
-mysql_get_socket
-mysql_autocommit_cont
-mysql_autocommit_start
-mysql_change_user_cont
-mysql_change_user_start
-mysql_close_cont
-mysql_close_start
-mysql_commit_cont
-mysql_commit_start
-mysql_dump_debug_info_cont
-mysql_dump_debug_info_start
-mysql_fetch_row_cont
-mysql_fetch_row_start
-mysql_free_result_cont
-mysql_free_result_start
-mysql_kill_cont
-mysql_kill_start
-mysql_list_dbs_cont
-mysql_list_dbs_start
-mysql_list_fields_cont
-mysql_list_fields_start
-mysql_list_processes_cont
-mysql_list_processes_start
-mysql_list_tables_cont
-mysql_list_tables_start
-mysql_next_result_cont
-mysql_next_result_start
-mysql_ping_cont
-mysql_ping_start
-mysql_query_cont
-mysql_query_start
-mysql_read_query_result_cont
-mysql_read_query_result_start
-mysql_real_connect_cont
-mysql_real_connect_start
-mysql_real_query_cont
-mysql_real_query_start
-mysql_refresh_cont
-mysql_refresh_start
-mysql_rollback_cont
-mysql_rollback_start
-mysql_select_db_cont
-mysql_select_db_start
-mysql_send_query_cont
-mysql_send_query_start
-mysql_set_character_set_cont
-mysql_set_character_set_start
-mysql_set_server_option_cont
-mysql_set_server_option_start
-mysql_shutdown_cont
-mysql_shutdown_start
-mysql_stat_cont
-mysql_stat_start
-mysql_stmt_close_cont
-mysql_stmt_close_start
-mysql_stmt_execute_cont
-mysql_stmt_execute_start
-mysql_stmt_fetch_cont
-mysql_stmt_fetch_start
-mysql_stmt_free_result_cont
-mysql_stmt_free_result_start
-mysql_stmt_next_result_cont
-mysql_stmt_next_result_start
-mysql_stmt_prepare_cont
-mysql_stmt_prepare_start
-mysql_stmt_reset_cont
-mysql_stmt_reset_start
-mysql_stmt_send_long_data_cont
-mysql_stmt_send_long_data_start
-mysql_stmt_store_result_cont
-mysql_stmt_store_result_start
-mysql_store_result_cont
-mysql_store_result_start
-#dynamic columns api
-dynamic_column_create
-dynamic_column_create_many
-dynamic_column_update
-dynamic_column_update_many
-dynamic_column_exists
-dynamic_column_list
-dynamic_column_get
-dynamic_column_prepare_decimal
-mariadb_dyncol_create_many_num
-mariadb_dyncol_create_many_named
-mariadb_dyncol_update_many_num
-mariadb_dyncol_update_many_named
-mariadb_dyncol_exists_num
-mariadb_dyncol_exists_named
-mariadb_dyncol_free
-mariadb_dyncol_list_num
-mariadb_dyncol_list_named
-mariadb_dyncol_get_num
-mariadb_dyncol_get_named
-mariadb_dyncol_has_names
-mariadb_dyncol_check
-mariadb_dyncol_json
-mariadb_dyncol_val_str
-mariadb_dyncol_val_long
-mariadb_dyncol_val_double
-mariadb_dyncol_unpack
-mariadb_dyncol_unpack_free
-mariadb_dyncol_column_cmp_named
-mariadb_dyncol_column_count
-mariadb_dyncol_prepare_decimal
-#
-mariadb_deinitialize_ssl
-# low-level API to MySQL protocol
-mysql_net_read_packet
-mysql_net_field_length
-# Added in MariaDB-10.0 to stay compatible with MySQL-5.6, yuck!
-mysql_options4
-)
-
-SET(CLIENT_API_FUNCTIONS
-  ${CLIENT_API_FUNCTIONS_5_1}
-  ${CLIENT_API_FUNCTIONS_5_5}
-  CACHE INTERNAL
-  "Client functions"
-)
-
-IF(CMAKE_SYSTEM_NAME MATCHES "Linux") 
-  IF (NOT DISABLE_LIBMYSQLCLIENT_SYMBOL_VERSIONING)
-
-    INCLUDE (CheckCSourceCompiles)
-    FILE(WRITE "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/src.ld"
-      "VERSION {\nlibmysqlclient_18 {\nglobal: *;\n};\n}\n")
-    SET(CMAKE_REQUIRED_LIBRARIES "-Wl,src.ld")
-    CHECK_C_SOURCE_COMPILES("int main() { return 0; }"
-      SUPPORTS_VERSION_IN_LINK_SCRIPT)
-    SET(CMAKE_REQUIRED_LIBRARIES)
-
-    IF (NOT SUPPORTS_VERSION_IN_LINK_SCRIPT)
-      # https://sourceware.org/bugzilla/show_bug.cgi?id=16895
-      MESSAGE(SEND_ERROR "Your current linker does not support VERSION "
-        "command in linker scripts like a GNU ld or any compatible linker "
-        "should. Perhaps you're using gold? Either switch to GNU ld compatible "
-        "linker or run cmake with -DDISABLE_LIBMYSQLCLIENT_SYMBOL_VERSIONING=TRUE "
-        "to be able to complete the build")
-    ENDIF (NOT SUPPORTS_VERSION_IN_LINK_SCRIPT)
-
-  # When building RPM, or DEB package on Debian, use ELF symbol versioning
-  # for compatibility with distribution packages, so client shared library can 
-  # painlessly replace the one supplied by the distribution.
-  
-  # Also list of exported symbols in distributions may differ from what is
-  # considered official API. Define CLIENT_API_5_1_EXTRA for the set of
-  # symbols, that required to be exported on different platforms.
-  
-  # Fedora & Co declared following functions as part of API 
-  SET(CLIENT_API_5_1_EXTRA
-    # why does Fedora export these?
-    _fini
-    _init
-    my_init
-
-    #  mysql-connector-odbc requires these
-    mysql_default_charset_info
-    mysql_get_charset
-    mysql_get_charset_by_csname
-    mysql_net_realloc
-
-    # PHP's mysqli.so requires this (via the ER() macro)
-    mysql_client_errors
-
-    # Also export the non-renamed variants
-    # (in case someone wants to rebuild mysqli-php or something similar)
-    # See MDEV-4127
-    default_charset_info
-    get_charset
-    get_charset_by_csname
-    net_realloc
-    client_errors
-
-    # pure-ftpd requires this
-    my_make_scrambled_password
-
-    # hydra requires this
-    scramble
-
-    # ODB requires this: https://bugzilla.redhat.com/show_bug.cgi?id=846602
-    THR_KEY_mysys
-
-    # DBD::mysql requires this
-    is_prefix
-  )
-
- 
-  # And even more so on Debian
-  SET(CLIENT_API_5_5_EXTRA
-    # libmyodbc. Argh!
-    alloc_dynamic
-    alloc_root
-    delete_dynamic
-    dynstr_append
-    dynstr_append_mem
-    dynstr_append_os_quoted
-    dynstr_free
-    dynstr_realloc
-    free_root
-    get_dynamic
-    init_dynamic_array2
-    init_dynamic_string
-    int2str
-    list_add
-    list_delete
-    my_end
-    my_free
-    my_malloc
-    my_memdup
-    my_realloc
-    my_strdup
-    set_dynamic
-    strdup_root
-    strend
-    strfill
-    strmake
-    strmake_root
-    strxmov
-
-    # pam_mysql.so
-    make_scrambled_password
-    make_scrambled_password_323
-  )
-
-  # Generate version script.  
-  # Create semicolon separated lists of functions to export from 
-  # Since RPM packages use separate versioning for 5.1 API
-  # and 5.5 API (libmysqlclient_16 vs libmysqlclient_18), 
-  # we need 2 lists.
-  SET (VERSION_HEADER
-"VERSION {
-  libmysqlclient_18 {
-    global:")
-  SET (VERSION_FOOTER
-"    local:
-      *;
-  };
-
-  libmysqlclient_16 {
-    /* empty here. aliases are added above */
-  };
-}
-")
-
-  SET (CLIENT_API_5_1_LIST)
-  SET (CLIENT_API_5_1_ALIASES)
-  FOREACH (f ${CLIENT_API_FUNCTIONS_5_1} ${CLIENT_API_5_1_EXTRA})
-    SET(CLIENT_API_5_1_LIST "${CLIENT_API_5_1_LIST}\t${f};\n")
-    SET(CLIENT_API_5_1_ALIASES "${CLIENT_API_5_1_ALIASES}\"${f}@libmysqlclient_16\" = ${f};\n")
-  ENDFOREACH()
-      
-  SET (CLIENT_API_5_5_LIST)
-  FOREACH (f ${CLIENT_API_FUNCTIONS_5_5} ${CLIENT_API_5_5_EXTRA})
-    SET(CLIENT_API_5_5_LIST "${CLIENT_API_5_5_LIST}\t${f};\n")
-  ENDFOREACH()
-
-  ELSE (NOT DISABLE_LIBMYSQLCLIENT_SYMBOL_VERSIONING)
-    SET (CLIENT_API_5_1_ALIASES "/* Versioning disabled per user request. MDEV-5982 */")
-  ENDIF (NOT DISABLE_LIBMYSQLCLIENT_SYMBOL_VERSIONING)
-
-  # Linker script to version symbols in Fedora- and Debian- compatible way, MDEV-5529
-  SET(VERSION_SCRIPT_TEMPLATE ${CMAKE_CURRENT_SOURCE_DIR}/libmysql_versions.ld.in)
-
-  CONFIGURE_FILE(
-    ${VERSION_SCRIPT_TEMPLATE}
-    ${CMAKE_CURRENT_BINARY_DIR}/libmysql_versions.ld
-    @ONLY
-  )
-  SET(VERSION_SCRIPT_LINK_FLAGS 
-    "-Wl,${CMAKE_CURRENT_BINARY_DIR}/libmysql_versions.ld")
-
-ENDIF(CMAKE_SYSTEM_NAME MATCHES "Linux")
-
-
-SET(CLIENT_SOURCES
-  get_password.c 
-  libmysql.c
-  errmsg.c
-  ../sql-common/client.c 
-  ../sql-common/mysql_async.c
-  ../sql-common/my_time.c 
-  ../sql-common/client_plugin.c
-  ../sql/net_serv.cc
-  ../sql-common/pack.c 
-  ../sql/password.c
-  ${CLIENT_SOURCES_EXTRA}
-)
-ADD_CONVENIENCE_LIBRARY(clientlib ${CLIENT_SOURCES})
-DTRACE_INSTRUMENT(clientlib)
-ADD_DEPENDENCIES(clientlib GenError)
-
-SET(LIBS clientlib dbug strings vio mysys mysys_ssl ${ZLIB_LIBRARY} ${SSL_LIBRARIES} ${LIBDL} ${CRC32_VPMSUM_LIBRARY})
-
-# Merge several convenience libraries into one big mysqlclient
-# and link them together into shared library.
-MERGE_LIBRARIES(mysqlclient STATIC ${LIBS} COMPONENT Development)
-
-# Visual Studio users need debug  static library for debug projects
-IF(MSVC)
- INSTALL_DEBUG_TARGET(mysqlclient DESTINATION ${INSTALL_LIBDIR}/debug)
- INSTALL_DEBUG_TARGET(clientlib DESTINATION ${INSTALL_LIBDIR}/debug)
-ENDIF()
-
-IF(UNIX)
-  MACRO(GET_VERSIONED_LIBNAME LIBNAME EXTENSION VERSION OUTNAME)
-    SET(DOT_VERSION ".${VERSION}")
-    IF(DOT_VERSION STREQUAL ".") 
-      SET(DOT_VERSION "")
-    ENDIF()
-    IF(APPLE)
-      SET(${OUTNAME} ${LIBNAME}${DOT_VERSION}${EXTENSION})
-    ELSE()
-      SET(${OUTNAME} ${LIBNAME}${EXTENSION}${DOT_VERSION})
-    ENDIF() 
-  ENDMACRO()
-  INSTALL_SYMLINK(${CMAKE_STATIC_LIBRARY_PREFIX}mysqlclient_r.a mysqlclient ${INSTALL_LIBDIR} Development)
-ENDIF()
-
-IF(NOT DISABLE_SHARED)
-  MERGE_LIBRARIES(libmysql SHARED ${LIBS}
-    EXPORTS ${CLIENT_API_FUNCTIONS} ${CLIENT_API_5_1_EXTRA} ${CLIENT_API_5_5_EXTRA}
-    COMPONENT SharedLibraries)
-  IF(UNIX)
-    # libtool compatability
-    IF(CMAKE_SYSTEM_NAME MATCHES "FreeBSD" OR APPLE)
-      SET(OS_SHARED_LIB_VERSION "${SHARED_LIB_MAJOR_VERSION}")
-    ELSEIF(CMAKE_SYSTEM_NAME MATCHES "HP-UX")
-      SET(OS_SHARED_LIB_VERSION "${SHARED_LIB_MAJOR_VERSION}.0")
-    ELSE()
-      SET(OS_SHARED_LIB_VERSION "${SHARED_LIB_MAJOR_VERSION}.0.0")
-    ENDIF()
-    # Name of shared library is mysqlclient on Unix
-    SET_TARGET_PROPERTIES(libmysql PROPERTIES 
-      OUTPUT_NAME mysqlclient 
-      VERSION "${OS_SHARED_LIB_VERSION}" 
-      SOVERSION "${SHARED_LIB_MAJOR_VERSION}")
-    IF(LINK_FLAG_NO_UNDEFINED OR VERSION_SCRIPT_LINK_FLAGS)
-      GET_TARGET_PROPERTY(libmysql_link_flags libmysql LINK_FLAGS)
-      IF(NOT libmysql_link_flags)
-        # Avoid libmysql_link_flags-NOTFOUND
-        SET(libmysql_link_flags)
-      ENDIF()
-      SET_TARGET_PROPERTIES(libmysql PROPERTIES LINK_FLAGS 
-        "${libmysql_link_flags} ${LINK_FLAG_NO_UNDEFINED} ${VERSION_SCRIPT_LINK_FLAGS}")
-    ENDIF() 
-    # clean direct output needs to be set several targets have the same name
-    #(mysqlclient in this case)
-    SET_TARGET_PROPERTIES(mysqlclient PROPERTIES CLEAN_DIRECT_OUTPUT 1)
-    SET_TARGET_PROPERTIES(libmysql PROPERTIES CLEAN_DIRECT_OUTPUT 1)
-
-    # Install links to libmysqlclient.so (client_r)
-    GET_VERSIONED_LIBNAME(
-      "${CMAKE_SHARED_LIBRARY_PREFIX}mysqlclient_r"
-      "${CMAKE_SHARED_LIBRARY_SUFFIX}"
-      ""
-      linkname)
-    INSTALL_SYMLINK(${linkname} libmysql ${INSTALL_LIBDIR} SharedLibraries)
-    SET(OS_SHARED_LIB_SYMLINKS "${SHARED_LIB_MAJOR_VERSION}" "${OS_SHARED_LIB_VERSION}")
-    LIST(REMOVE_DUPLICATES OS_SHARED_LIB_SYMLINKS)
-    FOREACH(ver ${OS_SHARED_LIB_SYMLINKS})
-      GET_VERSIONED_LIBNAME(
-        "${CMAKE_SHARED_LIBRARY_PREFIX}mysqlclient_r"
-        "${CMAKE_SHARED_LIBRARY_SUFFIX}"
-        "${ver}"
-        linkname)
-      INSTALL_SYMLINK(${linkname} libmysql ${INSTALL_LIBDIR} SharedLibraries)
-    ENDFOREACH()
-  ENDIF()
-ENDIF()
diff --git a/libmysql/get_password.c b/libmysql/get_password.c
deleted file mode 100644
index a113306ed57..00000000000
--- a/libmysql/get_password.c
+++ /dev/null
@@ -1,227 +0,0 @@
-/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
-
-   This program is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation.
-
-   There are special exceptions to the terms and conditions of the GPL as it
-   is applied to this software.
-
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
-
-/*
-** Ask for a password from tty
-** This is an own file to avoid conflicts with curses
-*/
-#include <my_global.h>
-#include <my_sys.h>
-#include "mysql.h"
-#include <m_string.h>
-#include <m_ctype.h>
-
-#ifdef HAVE_GETPASS
-#ifdef HAVE_PWD_H
-#include <pwd.h>
-#endif /* HAVE_PWD_H */
-#else /* ! HAVE_GETPASS */
-#if !defined(__WIN__)
-#include <sys/ioctl.h>
-#ifdef HAVE_TERMIOS_H				/* For tty-password */
-#include	<termios.h>
-#define TERMIO	struct termios
-#else
-#ifdef HAVE_TERMIO_H				/* For tty-password */
-#include	<termio.h>
-#define TERMIO	struct termio
-#else
-#include	<sgtty.h>
-#define TERMIO	struct sgttyb
-#endif
-#endif
-#ifdef alpha_linux_port
-#include <asm/ioctls.h>
-#include <asm/termiobits.h>
-#endif
-#else
-#include <conio.h>
-#endif /* __WIN__ */
-#endif /* HAVE_GETPASS */
-
-#ifdef HAVE_GETPASSPHRASE			/* For Solaris */
-#define getpass(A) getpassphrase(A)
-#endif
-
-#if defined(__WIN__)
-/* were just going to fake it here and get input from the keyboard */
-void get_tty_password_buff(const char *opt_message, char *to, size_t length)
-{
-  HANDLE consoleinput;
-  DWORD oldstate;
-  char *pos=to,*end=to+length-1;
-  int i=0;
-
-  consoleinput= CreateFile("CONIN$", GENERIC_WRITE | GENERIC_READ, FILE_SHARE_READ ,
-    NULL, OPEN_EXISTING, 0, NULL); 
-  if (consoleinput == NULL || consoleinput == INVALID_HANDLE_VALUE) 
-  {
-     /* This is a GUI application or service  without console input, bail out. */
-     *to= 0;
-     return;
-  }
-  _cputs(opt_message ? opt_message : "Enter password: ");
-
-  /* 
-     Switch to raw mode (no line input, no echo input).
-     Allow Ctrl-C handler with ENABLE_PROCESSED_INPUT.
-  */
-  GetConsoleMode(consoleinput, &oldstate);
-  SetConsoleMode(consoleinput, ENABLE_PROCESSED_INPUT);
-  for (;;)
-  {
-    char tmp;
-    DWORD chars_read;
-    if (!ReadConsole(consoleinput, &tmp, 1, &chars_read, NULL))
-      break;
-    if (chars_read == 0)
-      break;
-    if (tmp == '\b' || tmp == 127)
-    {
-      if (pos != to)
-      {
-	_cputs("\b \b");
-	pos--;
-	continue;
-      }
-    }
-    if (tmp == '\n' || tmp == '\r')
-      break;
-    if (iscntrl(tmp) || pos == end)
-      continue;
-    _cputs("*");
-    *(pos++) = tmp;
-  }
-  /* Reset console mode after password input. */ 
-  SetConsoleMode(consoleinput, oldstate);
-  CloseHandle(consoleinput);
-  *pos=0;
-  _cputs("\n");
-}
-
-#else
-
-#ifndef HAVE_GETPASS
-/*
-  Can't use fgets, because readline will get confused
-  length is max number of chars in to, not counting \0
-  to will not include the eol characters.
-*/
-
-static void get_password(char *to,uint length,int fd, my_bool echo)
-{
-  char *pos=to,*end=to+length;
-
-  for (;;)
-  {
-    char tmp;
-    if (my_read(fd,&tmp,1,MYF(0)) != 1)
-      break;
-    if (tmp == '\b' || (int) tmp == 127)
-    {
-      if (pos != to)
-      {
-	if (echo)
-	{
-	  fputs("\b \b",stdout);
-	  fflush(stdout);
-	}
-	pos--;
-	continue;
-      }
-    }
-    if (tmp == '\n' || tmp == '\r' || tmp == 3)
-      break;
-    if (iscntrl(tmp) || pos == end)
-      continue;
-    if (echo)
-    {
-      fputc('*',stdout);
-      fflush(stdout);
-    }
-    *(pos++) = tmp;
-  }
-  *pos=0;
-  return;
-}
-#endif /* ! HAVE_GETPASS */
-
-
-void get_tty_password_buff(const char *opt_message, char *buff, size_t buflen)
-{
-#ifdef HAVE_GETPASS
-  char *passbuff;
-#else /* ! HAVE_GETPASS */
-  TERMIO org,tmp;
-#endif /* HAVE_GETPASS */
-
-#ifdef HAVE_GETPASS
-  passbuff = getpass(opt_message ? opt_message : "Enter password: ");
-
-  /* copy the password to buff and clear original (static) buffer */
-  strncpy(buff, passbuff, buflen - 1);
-#ifdef _PASSWORD_LEN
-  memset(passbuff, 0, _PASSWORD_LEN);
-#endif
-#else 
-  if (isatty(fileno(stdout)))
-  {
-    fputs(opt_message ? opt_message : "Enter password: ",stdout);
-    fflush(stdout);
-  }
-#if defined(HAVE_TERMIOS_H)
-  tcgetattr(fileno(stdin), &org);
-  tmp = org;
-  tmp.c_lflag &= ~(ECHO | ISIG | ICANON);
-  tmp.c_cc[VMIN] = 1;
-  tmp.c_cc[VTIME] = 0;
-  tcsetattr(fileno(stdin), TCSADRAIN, &tmp);
-  get_password(buff, buflen, fileno(stdin), isatty(fileno(stdout)));
-  tcsetattr(fileno(stdin), TCSADRAIN, &org);
-#elif defined(HAVE_TERMIO_H)
-  ioctl(fileno(stdin), (int) TCGETA, &org);
-  tmp=org;
-  tmp.c_lflag &= ~(ECHO | ISIG | ICANON);
-  tmp.c_cc[VMIN] = 1;
-  tmp.c_cc[VTIME]= 0;
-  ioctl(fileno(stdin),(int) TCSETA, &tmp);
-  get_password(buff,buflen-1,fileno(stdin),isatty(fileno(stdout)));
-  ioctl(fileno(stdin),(int) TCSETA, &org);
-#else
-  gtty(fileno(stdin), &org);
-  tmp=org;
-  tmp.sg_flags &= ~ECHO;
-  tmp.sg_flags |= RAW;
-  stty(fileno(stdin), &tmp);
-  get_password(buff,buflen-1,fileno(stdin),isatty(fileno(stdout)));
-  stty(fileno(stdin), &org);
-#endif
-  if (isatty(fileno(stdout)))
-    fputc('\n',stdout);
-#endif /* HAVE_GETPASS */
-}
-#endif /*__WIN__*/
-
-#ifndef MYSQL_DYNAMIC_PLUGIN
-char *get_tty_password(const char *opt_message)
-{
-  char buff[80];
-  get_tty_password_buff(opt_message, buff, sizeof(buff));
-  return my_strdup(buff, MYF(MY_FAE));
-}
-#endif
diff --git a/libmysql/libmysql.def b/libmysql/libmysql.def
deleted file mode 100644
index 5a6bee4919f..00000000000
--- a/libmysql/libmysql.def
+++ /dev/null
@@ -1,107 +0,0 @@
-LIBRARY		LIBMYSQL
-VERSION		6.0
-EXPORTS
-	load_defaults
-	mysql_thread_end
-	mysql_thread_init
-	myodbc_remove_escape
-	mysql_affected_rows
-	mysql_autocommit
-	mysql_stmt_bind_param
-	mysql_stmt_bind_result
-	mysql_change_user
-	mysql_character_set_name
-	mysql_close
-	mysql_commit
-	mysql_data_seek
-	mysql_debug
-	mysql_dump_debug_info
-	mysql_eof
-	mysql_errno
-	mysql_error
-	mysql_escape_string
-	mysql_hex_string
-	mysql_stmt_execute
-	mysql_stmt_fetch
-	mysql_stmt_fetch_column
-	mysql_fetch_field
-	mysql_fetch_field_direct
-	mysql_fetch_fields
-	mysql_fetch_lengths
-	mysql_fetch_row
-	mysql_field_count
-	mysql_field_seek
-	mysql_field_tell
-	mysql_free_result
-	mysql_get_client_info
-	mysql_get_host_info
-	mysql_get_proto_info
-	mysql_get_server_info
-	mysql_get_client_version
-	mysql_get_ssl_cipher
-	mysql_info
-	mysql_init
-	mysql_insert_id
-	mysql_kill
-	mysql_set_server_option
-	mysql_list_dbs
-	mysql_list_fields
-	mysql_list_processes
-	mysql_list_tables
-	mysql_more_results
-	mysql_next_result
-	mysql_num_fields
-	mysql_num_rows
-	mysql_options
-	mysql_stmt_param_count
-	mysql_stmt_param_metadata
-	mysql_ping
-	mysql_stmt_result_metadata
-	mysql_query
-	mysql_read_query_result
-	mysql_real_connect
-	mysql_real_escape_string
-	mysql_real_query
-	mysql_refresh
-	mysql_rollback
-	mysql_row_seek
-	mysql_row_tell
-	mysql_select_db
-	mysql_stmt_send_long_data
-	mysql_send_query
-	mysql_shutdown
-	mysql_ssl_set
-	mysql_stat
-	mysql_stmt_affected_rows
-	mysql_stmt_close
-	mysql_stmt_reset
-	mysql_stmt_data_seek
-	mysql_stmt_errno
-	mysql_stmt_error
-	mysql_stmt_free_result
-	mysql_stmt_num_rows
-	mysql_stmt_row_seek
-	mysql_stmt_row_tell
-	mysql_stmt_store_result
-	mysql_store_result
-	mysql_thread_id
-	mysql_thread_safe
-	mysql_use_result
-	mysql_warning_count
-	mysql_stmt_sqlstate
-	mysql_sqlstate
-	mysql_get_server_version
-	mysql_stmt_prepare
-	mysql_stmt_init
-	mysql_stmt_insert_id
-	mysql_stmt_attr_get
-	mysql_stmt_attr_set
-	mysql_stmt_field_count
-	mysql_set_local_infile_default
-	mysql_set_local_infile_handler
-	mysql_embedded
-	mysql_server_init
-	mysql_server_end
-	mysql_set_character_set
-	mysql_get_character_set_info
-	mysql_get_server_name
diff --git a/libmysql/libmysql_versions.ld.in b/libmysql/libmysql_versions.ld.in
deleted file mode 100644
index 0cf5b45cc18..00000000000
--- a/libmysql/libmysql_versions.ld.in
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
-  This version script is heavily inspired  by Fedora's and Mageia's version
-  scripts for MySQL client shared library.
-  But it was modified to support Debian-compatible versioning too.
-
-  In RedHat universe, symbols from old libmysqlclient.so.16
-  keep their libmysqlclient_16 version. New symbols added in
-  libmysqlclient.so.18 get the new libmysqlclient_18 version.
-
-  In Debian all symbols in libmysqlclient.so.18 have libmysqlclient_18 version,
-  including symbols that existed in libmysqlclient.so.16
-
-  We solve this by putting all symbols into libmysqlclient_18 version node,
-  but creating aliases for old symbols in the libmysqlclient_16 version node.
-*/
-
-@CLIENT_API_5_1_ALIASES@
-
-/*
-  On Fedora the following symbols are exported, but renamed into a mysql_
-  namespace. We export them as aliases, but keep original symbols too. See
-  MDEV-4127.
-*/
-mysql_default_charset_info = default_charset_info;
-mysql_get_charset = get_charset;
-mysql_get_charset_by_csname = get_charset_by_csname;
-mysql_net_realloc = net_realloc;
-mysql_client_errors = client_errors;
-
-@VERSION_HEADER@
-@CLIENT_API_5_1_LIST@
-@CLIENT_API_5_5_LIST@
-@VERSION_FOOTER@
diff --git a/libmysqld/CMakeLists.txt b/libmysqld/CMakeLists.txt
index 4518329a3dd..b64348cdd70 100644
--- a/libmysqld/CMakeLists.txt
+++ b/libmysqld/CMakeLists.txt
@@ -18,7 +18,6 @@ ADD_DEFINITIONS(-DMYSQL_SERVER -DEMBEDDED_LIBRARY
 
 INCLUDE_DIRECTORIES(
 ${CMAKE_SOURCE_DIR}/include 
-${CMAKE_SOURCE_DIR}/libmysql
 ${CMAKE_SOURCE_DIR}/libmysqld
 ${CMAKE_SOURCE_DIR}/sql
 ${CMAKE_BINARY_DIR}/sql 
@@ -37,8 +36,8 @@ ${CMAKE_BINARY_DIR}/sql/lex_hash.h
 SET_SOURCE_FILES_PROPERTIES(${GEN_SOURCES} PROPERTIES GENERATED TRUE)
 
 SET(SQL_EMBEDDED_SOURCES emb_qcache.cc libmysqld.c lib_sql.cc 
-           ../libmysql/libmysql.c ../libmysql/errmsg.c ../client/get_password.c
-           ../sql-common/client.c ../sql-common/my_time.c 
+           libmysql.c ../sql-common/errmsg.c
+           ../sql-common/client.c
            ../sql-common/my_user.c ../sql-common/pack.c
            ../sql-common/client_plugin.c ../sql-common/mysql_async.c
            ../sql/password.c ../sql/discover.cc ../sql/derror.cc 
@@ -170,6 +169,254 @@ IF(UNIX)
   ${CMAKE_STATIC_LIBRARY_PREFIX}mysqld-debug)
 ENDIF()
 
+SET(CLIENT_API_FUNCTIONS_5_1
+get_tty_password
+mysql_thread_end
+mysql_thread_init
+myodbc_remove_escape
+mysql_affected_rows
+mysql_autocommit
+mysql_stmt_bind_param
+mysql_stmt_bind_result
+mysql_change_user
+mysql_character_set_name
+mysql_close
+mysql_commit
+mysql_data_seek
+mysql_debug
+mysql_dump_debug_info
+mysql_eof
+mysql_errno
+mysql_error
+mysql_escape_string
+mysql_hex_string
+mysql_stmt_execute
+mysql_stmt_fetch
+mysql_stmt_fetch_column
+mysql_fetch_field
+mysql_fetch_field_direct
+mysql_fetch_fields
+mysql_fetch_lengths
+mysql_fetch_row
+mysql_field_count
+mysql_field_seek
+mysql_field_tell
+mysql_free_result
+mysql_get_parameters
+mysql_get_client_info
+mysql_get_host_info
+mysql_get_proto_info
+mysql_get_server_info
+mysql_get_client_version
+mysql_get_ssl_cipher
+mysql_info
+mysql_init
+mysql_insert_id
+mysql_kill
+mysql_set_server_option
+mysql_list_dbs
+mysql_list_fields
+mysql_list_processes
+mysql_list_tables
+mysql_more_results
+mysql_next_result
+mysql_num_fields
+mysql_num_rows
+mysql_options
+mysql_stmt_param_count
+mysql_stmt_param_metadata
+mysql_ping
+mysql_stmt_result_metadata
+mysql_query
+mysql_read_query_result
+mysql_real_connect
+mysql_real_escape_string
+mysql_real_query
+mysql_refresh
+mysql_rollback
+mysql_row_seek
+mysql_row_tell
+mysql_select_db
+mysql_stmt_send_long_data
+mysql_send_query
+mysql_shutdown
+mysql_ssl_set
+mysql_stat
+mysql_stmt_affected_rows
+mysql_stmt_close
+mysql_stmt_reset
+mysql_stmt_data_seek
+mysql_stmt_errno
+mysql_stmt_error
+mysql_stmt_free_result
+mysql_stmt_num_rows
+mysql_stmt_row_seek
+mysql_stmt_row_tell
+mysql_stmt_store_result
+mysql_store_result
+mysql_thread_id
+mysql_thread_safe
+mysql_use_result
+mysql_warning_count
+mysql_stmt_sqlstate
+mysql_sqlstate
+mysql_get_server_version
+mysql_stmt_prepare
+mysql_stmt_init
+mysql_stmt_insert_id
+mysql_stmt_attr_get
+mysql_stmt_attr_set
+mysql_stmt_field_count
+mysql_set_local_infile_default
+mysql_set_local_infile_handler
+mysql_embedded
+mysql_server_init
+mysql_server_end
+mysql_set_character_set
+mysql_get_character_set_info
+# These are documented in Paul DuBois' MySQL book,
+# so we treat them as part of the de-facto API.
+handle_options
+load_defaults
+free_defaults
+my_print_help
+)
+
+SET(CLIENT_API_FUNCTIONS_5_5
+my_progname
+mysql_stmt_next_result
+# Charsets
+my_charset_bin
+my_charset_latin1
+my_charset_utf8_general_ci
+# Client plugins
+mysql_client_find_plugin
+mysql_client_register_plugin
+mysql_load_plugin
+mysql_load_plugin_v
+mysql_plugin_options
+# Async API
+mysql_get_timeout_value
+mysql_get_timeout_value_ms
+mysql_get_socket
+mysql_autocommit_cont
+mysql_autocommit_start
+mysql_change_user_cont
+mysql_change_user_start
+mysql_close_cont
+mysql_close_start
+mysql_commit_cont
+mysql_commit_start
+mysql_dump_debug_info_cont
+mysql_dump_debug_info_start
+mysql_fetch_row_cont
+mysql_fetch_row_start
+mysql_free_result_cont
+mysql_free_result_start
+mysql_kill_cont
+mysql_kill_start
+mysql_list_dbs_cont
+mysql_list_dbs_start
+mysql_list_fields_cont
+mysql_list_fields_start
+mysql_list_processes_cont
+mysql_list_processes_start
+mysql_list_tables_cont
+mysql_list_tables_start
+mysql_next_result_cont
+mysql_next_result_start
+mysql_ping_cont
+mysql_ping_start
+mysql_query_cont
+mysql_query_start
+mysql_read_query_result_cont
+mysql_read_query_result_start
+mysql_real_connect_cont
+mysql_real_connect_start
+mysql_real_query_cont
+mysql_real_query_start
+mysql_refresh_cont
+mysql_refresh_start
+mysql_rollback_cont
+mysql_rollback_start
+mysql_select_db_cont
+mysql_select_db_start
+mysql_send_query_cont
+mysql_send_query_start
+mysql_set_character_set_cont
+mysql_set_character_set_start
+mysql_set_server_option_cont
+mysql_set_server_option_start
+mysql_shutdown_cont
+mysql_shutdown_start
+mysql_stat_cont
+mysql_stat_start
+mysql_stmt_close_cont
+mysql_stmt_close_start
+mysql_stmt_execute_cont
+mysql_stmt_execute_start
+mysql_stmt_fetch_cont
+mysql_stmt_fetch_start
+mysql_stmt_free_result_cont
+mysql_stmt_free_result_start
+mysql_stmt_next_result_cont
+mysql_stmt_next_result_start
+mysql_stmt_prepare_cont
+mysql_stmt_prepare_start
+mysql_stmt_reset_cont
+mysql_stmt_reset_start
+mysql_stmt_send_long_data_cont
+mysql_stmt_send_long_data_start
+mysql_stmt_store_result_cont
+mysql_stmt_store_result_start
+mysql_store_result_cont
+mysql_store_result_start
+#dynamic columns api
+dynamic_column_create
+dynamic_column_create_many
+dynamic_column_update
+dynamic_column_update_many
+dynamic_column_exists
+dynamic_column_list
+dynamic_column_get
+dynamic_column_prepare_decimal
+mariadb_dyncol_create_many_num
+mariadb_dyncol_create_many_named
+mariadb_dyncol_update_many_num
+mariadb_dyncol_update_many_named
+mariadb_dyncol_exists_num
+mariadb_dyncol_exists_named
+mariadb_dyncol_free
+mariadb_dyncol_list_num
+mariadb_dyncol_list_named
+mariadb_dyncol_get_num
+mariadb_dyncol_get_named
+mariadb_dyncol_has_names
+mariadb_dyncol_check
+mariadb_dyncol_json
+mariadb_dyncol_val_str
+mariadb_dyncol_val_long
+mariadb_dyncol_val_double
+mariadb_dyncol_unpack
+mariadb_dyncol_unpack_free
+mariadb_dyncol_column_cmp_named
+mariadb_dyncol_column_count
+mariadb_dyncol_prepare_decimal
+#
+mariadb_deinitialize_ssl
+# low-level API to MySQL protocol
+mysql_net_read_packet
+mysql_net_field_length
+# Added in MariaDB-10.0 to stay compatible with MySQL-5.6, yuck!
+mysql_options4
+)
+
+SET(CLIENT_API_FUNCTIONS
+  ${CLIENT_API_FUNCTIONS_5_1}
+  ${CLIENT_API_FUNCTIONS_5_5}
+)
+
+
 # List of exported functions in embedded (client api except client plugin or 
 # async (*_start/*_cont functions)
 
diff --git a/libmysql/client_settings.h b/libmysqld/client_settings.h
similarity index 99%
rename from libmysql/client_settings.h
rename to libmysqld/client_settings.h
index 2577870bfa3..14be8b61f70 100644
--- a/libmysql/client_settings.h
+++ b/libmysqld/client_settings.h
@@ -1,14 +1,14 @@
 /* Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
-   
+
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; version 2 of the License.
-   
+
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
-   
+
    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
diff --git a/libmysql/libmysql.c b/libmysqld/libmysql.c
similarity index 99%
rename from libmysql/libmysql.c
rename to libmysqld/libmysql.c
index de50acde2f4..379a2d15fa0 100644
--- a/libmysql/libmysql.c
+++ b/libmysqld/libmysql.c
@@ -122,9 +122,7 @@ typedef struct st_mysql_stmt_extension
     1  could not initialize environment (out of memory or thread keys)
 */
 
-int STDCALL mysql_server_init(int argc __attribute__((unused)),
-			      char **argv __attribute__((unused)),
-			      char **groups __attribute__((unused)))
+int STDCALL mysql_server_init(int argc, char **argv, char **groups)
 {
   int result= 0;
   if (!mysql_client_init)
diff --git a/mysql-test/extra/rpl_tests/rpl_get_master_version_and_clock.test b/mysql-test/extra/rpl_tests/rpl_get_master_version_and_clock.test
index 1ef8544884d..eb0e5e21d62 100644
--- a/mysql-test/extra/rpl_tests/rpl_get_master_version_and_clock.test
+++ b/mysql-test/extra/rpl_tests/rpl_get_master_version_and_clock.test
@@ -61,9 +61,8 @@ connection slave;
 # '2003' CR_CONN_HOST_ERROR 
 # '2002' CR_CONNECTION_ERROR 
 # '2006' CR_SERVER_GONE_ERROR 
-# '1040' ER_CON_COUNT_ERROR 
 # '1053' ER_SERVER_SHUTDOWN
-let $slave_io_errno= 1040, 1053, 2002, 2003, 2006, 2013;
+let $slave_io_errno= 1053, 2002, 2003, 2006, 2013;
 --let $slave_io_error_is_nonfatal= 1
 source include/wait_for_slave_io_error.inc;
 
diff --git a/mysql-test/include/expect_crash.inc b/mysql-test/include/expect_crash.inc
new file mode 100644
index 00000000000..af8b0908104
--- /dev/null
+++ b/mysql-test/include/expect_crash.inc
@@ -0,0 +1,5 @@
+--let $_server_id= `SELECT @@server_id`
+--let $_expect_file_name= $MYSQLTEST_VARDIR/tmp/mysqld.$_server_id.expect
+
+# There should be a debug crash after using this .inc file
+--exec echo "wait" > $_expect_file_name
diff --git a/mysql-test/include/have_innodb_32k.inc b/mysql-test/include/have_innodb_32k.inc
index 76a8d2d59a3..4f7f5454e87 100644
--- a/mysql-test/include/have_innodb_32k.inc
+++ b/mysql-test/include/have_innodb_32k.inc
@@ -1,6 +1,4 @@
 if (`SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE LOWER(variable_name) = 'innodb_page_size' AND variable_value = 32768`)
 {
-
-  --skip Test requires InnoDB with 32k Page size.
-
-}
\ No newline at end of file
+  --skip Test requires InnoDB with 32k page size.
+}
diff --git a/mysql-test/include/have_innodb_4k.inc b/mysql-test/include/have_innodb_4k.inc
new file mode 100644
index 00000000000..f51b8bf66b9
--- /dev/null
+++ b/mysql-test/include/have_innodb_4k.inc
@@ -0,0 +1,6 @@
+--disable_warnings
+if (`SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE LOWER(variable_name) = 'innodb_page_size' AND variable_value = 4096`)
+{
+  --skip Test requires InnoDB with 4k Page size.
+}
+--enable_warnings
diff --git a/mysql-test/include/have_innodb_64k.inc b/mysql-test/include/have_innodb_64k.inc
index bcb76c4f54e..6f0fadc25ac 100644
--- a/mysql-test/include/have_innodb_64k.inc
+++ b/mysql-test/include/have_innodb_64k.inc
@@ -1,4 +1,4 @@
 if (`SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE LOWER(variable_name) = 'innodb_page_size' AND variable_value = 65536`)
 {
-  --skip Test requires InnoDB with 64k Page size.
+  --skip Test requires InnoDB with 64k page size.
 }
diff --git a/mysql-test/include/have_innodb_8k.inc b/mysql-test/include/have_innodb_8k.inc
new file mode 100644
index 00000000000..125a4e4d6af
--- /dev/null
+++ b/mysql-test/include/have_innodb_8k.inc
@@ -0,0 +1,6 @@
+--disable_warnings
+if (`SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE LOWER(variable_name) = 'innodb_page_size' AND variable_value = 8192`)
+{
+  --skip Test requires InnoDB with 8k Page size.
+}
+--enable_warnings
diff --git a/mysql-test/include/have_xtradb.combinations b/mysql-test/include/have_xtradb.combinations
index 0419dc91171..3454f83cb4d 100644
--- a/mysql-test/include/have_xtradb.combinations
+++ b/mysql-test/include/have_xtradb.combinations
@@ -6,8 +6,8 @@ innodb-cmpmem
 innodb-trx
 innodb-sys-indexes
 
-[xtradb]
-innodb
-innodb-cmpmem
-innodb-trx
-innodb-sys-indexes
+#[xtradb]
+#innodb
+#innodb-cmpmem
+#innodb-trx
+#innodb-sys-indexes
diff --git a/mysql-test/include/mix1.inc b/mysql-test/include/mix1.inc
index bfe1567691d..7eae4235baa 100644
--- a/mysql-test/include/mix1.inc
+++ b/mysql-test/include/mix1.inc
@@ -624,6 +624,11 @@ DROP TABLE t1,t2,t3;
 # Test bug when trying to drop data file which no InnoDB directory entry
 #
 
+--disable_query_log
+call mtr.add_suppression("InnoDB: Table .*bug29807.*");
+call mtr.add_suppression("InnoDB: Cannot open table test/bug29807 from");
+--enable_query_log
+
 create table t1 (a int) engine=innodb;
 let $MYSQLD_DATADIR= `select @@datadir`;
 copy_file $MYSQLD_DATADIR/test/t1.frm $MYSQLD_DATADIR/test/bug29807.frm;
@@ -631,10 +636,6 @@ copy_file $MYSQLD_DATADIR/test/t1.frm $MYSQLD_DATADIR/test/bug29807.frm;
 select * from bug29807;
 drop table t1;
 drop table bug29807;
---disable_query_log
-call mtr.add_suppression("InnoDB: Error: table .test...bug29807. does not exist in the InnoDB internal");
-call mtr.add_suppression("InnoDB: Cannot open table test/bug29807 from");
---enable_query_log
 
 
 #
diff --git a/mysql-test/include/mtr_check.sql b/mysql-test/include/mtr_check.sql
index 6b25c75276c..d47e7d322b5 100644
--- a/mysql-test/include/mtr_check.sql
+++ b/mysql-test/include/mtr_check.sql
@@ -32,6 +32,7 @@ BEGIN
      AND variable_name not like "Last_IO_Err*"
      AND variable_name != 'INNODB_IBUF_MAX_SIZE'
      AND variable_name != 'INNODB_USE_NATIVE_AIO'
+     AND variable_name != 'INNODB_BUFFER_POOL_LOAD_AT_STARTUP'
      AND variable_name not like 'GTID%POS'
      AND variable_name != 'GTID_BINLOG_STATE'
    ORDER BY variable_name;
diff --git a/mysql-test/include/wait_for_slave_io_error.inc b/mysql-test/include/wait_for_slave_io_error.inc
index 4177bf4b858..96844106fa9 100644
--- a/mysql-test/include/wait_for_slave_io_error.inc
+++ b/mysql-test/include/wait_for_slave_io_error.inc
@@ -18,7 +18,7 @@
 #   $slave_io_errno
 #     The expected IO error numbers. This can be either a single
 #     number, or a comma-separated list of numbers. Examples:
-#       --let $slave_io_errno= 1040, 1053, 2002, 2003, 2006, 2013
+#       --let $slave_io_errno= 1053, 2002, 2003, 2006, 2013
 #       --let $slave_io_errno= 1045
 #     (After BUG#41956 has been fixed, this will be required to be
 #     symbolic names instead of numbers.)
diff --git a/mysql-test/include/wait_until_connected_again.inc b/mysql-test/include/wait_until_connected_again.inc
index 6f64ef45440..bdd99af2fc1 100644
--- a/mysql-test/include/wait_until_connected_again.inc
+++ b/mysql-test/include/wait_until_connected_again.inc
@@ -11,10 +11,7 @@ let $counter= 5000;
 let $mysql_errno= 9999;
 while ($mysql_errno)
 {
-  # Strangely enough, the server might return "Too many connections"
-  # while being shutdown, thus 1040 is an "allowed" error
-  # See BUG#36228
-  --error 0,1040,1053,2002,2003,2005,2006,2013,1927
+  --error 0,ER_SERVER_SHUTDOWN,ER_CONNECTION_KILLED,2002,2006,2013
   show status;
 
   dec $counter;
diff --git a/mysql-test/include/wait_until_disconnected.inc b/mysql-test/include/wait_until_disconnected.inc
index 658bf0be739..15bc6474995 100644
--- a/mysql-test/include/wait_until_disconnected.inc
+++ b/mysql-test/include/wait_until_disconnected.inc
@@ -9,10 +9,7 @@ let $counter= 600;
 let $mysql_errno= 0;
 while (!$mysql_errno)
 {
-  # Strangely enough, the server might return "Too many connections"
-  # while being shutdown, thus 1040 is an "allowed" error.
-  # See BUG#36228.
-  --error 0,1040,1053,2002,2003,2005,2006,2013,1927
+  --error 0,ER_SERVER_SHUTDOWN,ER_CONNECTION_KILLED,2002,2006,2013
   show status;
 
   dec $counter;
diff --git a/mysql-test/mysql-test-run.pl b/mysql-test/mysql-test-run.pl
index 60c46f71ada..bbe98c599ec 100755
--- a/mysql-test/mysql-test-run.pl
+++ b/mysql-test/mysql-test-run.pl
@@ -2138,35 +2138,6 @@ sub have_maria_support () {
   return defined $maria_var;
 }
 
-#
-# Set environment to be used by childs of this process for
-# things that are constant during the whole lifetime of mysql-test-run
-#
-
-sub find_plugin($$)
-{
-  my ($plugin, $location)  = @_;
-  my $plugin_filename;
-
-  if (IS_WINDOWS)
-  {
-     $plugin_filename = $plugin.".dll"; 
-  }
-  else 
-  {
-     $plugin_filename = $plugin.".so";
-  }
-
-  my $lib_plugin=
-    mtr_file_exists(vs_config_dirs($location,$plugin_filename),
-                    "$basedir/lib/plugin/".$plugin_filename,
-                    "$basedir/lib64/plugin/".$plugin_filename,
-                    "$basedir/$location/.libs/".$plugin_filename,
-                    "$basedir/lib/mysql/plugin/".$plugin_filename,
-                    "$basedir/lib64/mysql/plugin/".$plugin_filename,
-                    );
-  return $lib_plugin;
-}
 
 sub environment_setup {
 
@@ -2588,6 +2559,7 @@ sub setup_vardir() {
       {
         for (<$bindir/storage/*$opt_vs_config/*.dll>,
              <$bindir/plugin/*$opt_vs_config/*.dll>,
+             <$bindir/libmariadb/plugins/*$opt_vs_config/*.dll>,
              <$bindir/sql$opt_vs_config/*.dll>)
         {
           my $pname=basename($_);
@@ -2605,12 +2577,9 @@ sub setup_vardir() {
         unlink "$plugindir/symlink_test";
       }
 
-      for (<../storage/*/.libs/*.so>,
-           <../plugin/*/.libs/*.so>,
-           <../plugin/*/*/.libs/*.so>,
-           <../sql/.libs/*.so>,
-           <$bindir/storage/*/*.so>,
+      for (<$bindir/storage/*/*.so>,
            <$bindir/plugin/*/*.so>,
+           <$bindir/libmariadb/plugins/*/*.so>,
            <$bindir/sql/*.so>)
       {
         my $pname=basename($_);
@@ -2632,6 +2601,8 @@ sub setup_vardir() {
     # hm, what paths work for debs and for rpms ?
     for (<$bindir/lib64/mysql/plugin/*.so>,
          <$bindir/lib/mysql/plugin/*.so>,
+         <$bindir/lib64/mariadb/plugin/*.so>,
+         <$bindir/lib/mariadb/plugin/*.so>,
          <$bindir/lib/plugin/*.so>,             # bintar
          <$bindir/lib/plugin/*.dll>)
     {
@@ -4395,7 +4366,6 @@ sub extract_warning_lines ($$) {
      qr/Slave SQL thread retried transaction/,
      qr/Slave \(additional info\)/,
      qr/Incorrect information in file/,
-     qr/Incorrect key file for table .*crashed.*/,
      qr/Slave I\/O: Get master SERVER_ID failed with error:.*/,
      qr/Slave I\/O: Get master clock failed with error:.*/,
      qr/Slave I\/O: Get master COLLATION_SERVER failed with error:.*/,
@@ -4451,7 +4421,14 @@ sub extract_warning_lines ($$) {
      qr|nnoDB: fix the corruption by dumping, dropping, and reimporting|,
      qr|InnoDB: the corrupt table. You can use CHECK|,
      qr|InnoDB: TABLE to scan your table for corruption|,
-     qr/InnoDB: See also */
+     qr/InnoDB: See also */,
+     qr/InnoDB: Cannot open .*ib_buffer_pool.* for reading: No such file or directory*/,
+     qr/InnoDB: Upgrading redo log:*/,
+     qr|InnoDB: Starting to delete and rewrite log files.|,
+     qr/InnoDB: New log files created, LSN=*/,
+     qr|InnoDB: Creating foreign key constraint system tables.|,
+     qr/InnoDB: Table .*mysql.*innodb_table_stats.* not found./,
+     qr/InnoDB: User stopword table .* does not exist./
 
     );
 
diff --git a/mysql-test/r/alter_table.result b/mysql-test/r/alter_table.result
index de93fab4c9e..522d8ab45f9 100644
--- a/mysql-test/r/alter_table.result
+++ b/mysql-test/r/alter_table.result
@@ -1860,8 +1860,8 @@ ALTER TABLE tm1 MODIFY COLUMN c INT NULL;
 affected rows: 2
 info: Records: 2  Duplicates: 0  Warnings: 0
 ALTER TABLE ti1 MODIFY COLUMN h VARCHAR(30);
-affected rows: 2
-info: Records: 2  Duplicates: 0  Warnings: 0
+affected rows: 0
+info: Records: 0  Duplicates: 0  Warnings: 0
 ALTER TABLE tm1 MODIFY COLUMN h VARCHAR(30);
 affected rows: 2
 info: Records: 2  Duplicates: 0  Warnings: 0
diff --git a/mysql-test/r/cte_recursive.result b/mysql-test/r/cte_recursive.result
index b37a32a8ae7..1aa469029dd 100644
--- a/mysql-test/r/cte_recursive.result
+++ b/mysql-test/r/cte_recursive.result
@@ -1556,6 +1556,54 @@ EXPLAIN
     }
   }
 }
+create table my_ancestors
+with recursive
+ancestor_ids (id)
+as
+(
+select father from folks where name = 'Me'
+  union
+select mother from folks where name = 'Me'
+  union
+select father from folks, ancestor_ids a  where folks.id = a.id
+union
+select mother from folks, ancestor_ids a  where folks.id = a.id
+)
+select p.* from folks as p, ancestor_ids as a where p.id = a.id;
+select * from my_ancestors;
+id	name	dob	father	mother
+20	Dad	1970-02-02	10	9
+30	Mom	1975-03-03	8	7
+10	Grandpa Bill	1940-04-05	NULL	NULL
+9	Grandma Ann	1941-10-15	NULL	NULL
+7	Grandma Sally	1943-08-23	NULL	6
+8	Grandpa Ben	1940-10-21	NULL	NULL
+6	Grandgrandma Martha	1923-05-17	NULL	NULL
+delete from my_ancestors;
+insert into my_ancestors 
+with recursive
+ancestor_ids (id)
+as
+(
+select father from folks where name = 'Me'
+  union
+select mother from folks where name = 'Me'
+  union
+select father from folks, ancestor_ids a  where folks.id = a.id
+union
+select mother from folks, ancestor_ids a  where folks.id = a.id
+)
+select p.* from folks as p, ancestor_ids as a where p.id = a.id;
+select * from my_ancestors;
+id	name	dob	father	mother
+20	Dad	1970-02-02	10	9
+30	Mom	1975-03-03	8	7
+10	Grandpa Bill	1940-04-05	NULL	NULL
+9	Grandma Ann	1941-10-15	NULL	NULL
+7	Grandma Sally	1943-08-23	NULL	6
+8	Grandpa Ben	1940-10-21	NULL	NULL
+6	Grandgrandma Martha	1923-05-17	NULL	NULL
+drop table my_ancestors;
 drop table folks;
 #
 # MDEV-10372: [bb-10.2-mdev9864 tree] EXPLAIN with recursive CTE enters endless recursion
diff --git a/mysql-test/r/group_min_max_innodb.result b/mysql-test/r/group_min_max_innodb.result
index 77c74fbc041..9d8f8e7a26c 100644
--- a/mysql-test/r/group_min_max_innodb.result
+++ b/mysql-test/r/group_min_max_innodb.result
@@ -194,7 +194,7 @@ EXPLAIN SELECT c1, i1, max(i2) FROM t2
 WHERE (c1 = 'C' OR ( c1 = 'F' AND i1 < 35)) AND ( i2 = 17 )
 GROUP BY c1,i1;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t2	range	k2	k2	5	NULL	59	Using where; Using index
+1	SIMPLE	t2	range	k2	k2	5	NULL	60	Using where; Using index
 SELECT c1, i1, max(i2) FROM t2
 WHERE (c1 = 'C' OR ( c1 = 'F' AND i1 < 35)) AND ( i2 = 17 )
 GROUP BY c1,i1;
@@ -205,7 +205,7 @@ EXPLAIN SELECT c1, i1, max(i2) FROM t2
 WHERE (((c1 = 'C' AND i1 < 40) OR ( c1 = 'F' AND i1 < 35)) AND ( i2 = 17 ))
 GROUP BY c1,i1;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t2	range	k2	k2	5	NULL	58	Using where; Using index
+1	SIMPLE	t2	range	k2	k2	5	NULL	60	Using where; Using index
 SELECT c1, i1, max(i2) FROM t2 
 WHERE (((c1 = 'C' AND i1 < 40) OR ( c1 = 'F' AND i1 < 35)) AND ( i2 = 17 ))
 GROUP BY c1,i1;
diff --git a/mysql-test/r/index_intersect_innodb.result b/mysql-test/r/index_intersect_innodb.result
index 9c3a501111d..15244098170 100644
--- a/mysql-test/r/index_intersect_innodb.result
+++ b/mysql-test/r/index_intersect_innodb.result
@@ -485,7 +485,7 @@ SELECT * FROM City
 WHERE ID BETWEEN 3001 AND 4000 AND Population > 600000
 AND Country BETWEEN 'S' AND 'Z' ;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	City	index_merge	PRIMARY,Population,Country	PRIMARY,Country,Population	4,7,4	NULL	#	Using sort_intersect(PRIMARY,Country,Population); Using where
+1	SIMPLE	City	index_merge	PRIMARY,Population,Country	PRIMARY,Population,Country	4,4,7	NULL	#	Using sort_intersect(PRIMARY,Population,Country); Using where
 SELECT * FROM City USE INDEX ()
 WHERE ID BETWEEN 501 AND 1000 AND Population > 700000 AND Country LIKE 'C%';
 ID	Name	Country	Population
@@ -745,7 +745,7 @@ SELECT * FROM City
 WHERE ID BETWEEN 3001 AND 4000 AND Population > 600000
 AND Country BETWEEN 'S' AND 'Z';
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	City	index_merge	PRIMARY,Population,Country	PRIMARY,Country,Population	4,7,4	NULL	#	Using sort_intersect(PRIMARY,Country,Population); Using where
+1	SIMPLE	City	index_merge	PRIMARY,Population,Country	PRIMARY,Population,Country	4,4,7	NULL	#	Using sort_intersect(PRIMARY,Population,Country); Using where
 SELECT * FROM City WHERE
 Name LIKE 'C%' AND Population > 1000000;
 ID	Name	Country	Population
diff --git a/mysql-test/r/innodb_icp.result b/mysql-test/r/innodb_icp.result
index bc1c24276ed..8a519d6415d 100644
--- a/mysql-test/r/innodb_icp.result
+++ b/mysql-test/r/innodb_icp.result
@@ -409,7 +409,7 @@ WHERE (pk BETWEEN 4 AND 5 OR pk < 2) AND c1 < 240
 ORDER BY c1
 LIMIT 1;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	range	PRIMARY,k1	k1	5	NULL	3	Using where; Using index
+1	SIMPLE	t1	range	PRIMARY,k1	k1	5	NULL	4	Using where; Using index
 DROP TABLE t1;
 #
 #
diff --git a/mysql-test/r/merge_debug.result b/mysql-test/r/merge_debug.result
index d6ca17f0659..51905e52d24 100644
--- a/mysql-test/r/merge_debug.result
+++ b/mysql-test/r/merge_debug.result
@@ -1,6 +1,7 @@
 set @default_storage_engine= @@global.storage_engine;
 set global storage_engine=myisam;
 set session storage_engine=myisam;
+call mtr.add_suppression("Index for table .*crashed' is corrupt; try to repair it");
 drop table if exists crashed,t2,t3,t4;
 SET @orig_debug=@@debug;
 CREATE TABLE crashed (c1 INT);
diff --git a/mysql-test/r/mysqlbinlog_row_minimal.result b/mysql-test/r/mysqlbinlog_row_minimal.result
index 84c0e668981..2737d61eca4 100644
--- a/mysql-test/r/mysqlbinlog_row_minimal.result
+++ b/mysql-test/r/mysqlbinlog_row_minimal.result
@@ -54,7 +54,7 @@ CREATE TABLE t2 (pk INT PRIMARY KEY, f1 INT, f2 INT, f3 INT, f4 INT, f5 MEDIUMIN
 BEGIN
 /*!*/;
 # at 809
-#<date> server id 1  end_log_pos 865 CRC32 XXX 	Table_map: `test`.`t1` mapped to number 30
+#<date> server id 1  end_log_pos 865 CRC32 XXX 	Table_map: `test`.`t1` mapped to number num
 # at 865
 #<date> server id 1  end_log_pos 934 CRC32 XXX 	Write_rows: table id 30 flags: STMT_END_F
 ### INSERT INTO `test`.`t1`
@@ -79,7 +79,7 @@ COMMIT
 BEGIN
 /*!*/;
 # at 1049
-#<date> server id 1  end_log_pos 1105 CRC32 XXX 	Table_map: `test`.`t1` mapped to number 30
+#<date> server id 1  end_log_pos 1105 CRC32 XXX 	Table_map: `test`.`t1` mapped to number num
 # at 1105
 #<date> server id 1  end_log_pos 1173 CRC32 XXX 	Write_rows: table id 30 flags: STMT_END_F
 ### INSERT INTO `test`.`t1`
@@ -104,7 +104,7 @@ COMMIT
 BEGIN
 /*!*/;
 # at 1288
-#<date> server id 1  end_log_pos 1344 CRC32 XXX 	Table_map: `test`.`t1` mapped to number 30
+#<date> server id 1  end_log_pos 1344 CRC32 XXX 	Table_map: `test`.`t1` mapped to number num
 # at 1344
 #<date> server id 1  end_log_pos 1411 CRC32 XXX 	Write_rows: table id 30 flags: STMT_END_F
 ### INSERT INTO `test`.`t1`
@@ -129,7 +129,7 @@ COMMIT
 BEGIN
 /*!*/;
 # at 1526
-#<date> server id 1  end_log_pos 1582 CRC32 XXX 	Table_map: `test`.`t1` mapped to number 30
+#<date> server id 1  end_log_pos 1582 CRC32 XXX 	Table_map: `test`.`t1` mapped to number num
 # at 1582
 #<date> server id 1  end_log_pos 1652 CRC32 XXX 	Write_rows: table id 30 flags: STMT_END_F
 ### INSERT INTO `test`.`t1`
@@ -154,7 +154,7 @@ COMMIT
 BEGIN
 /*!*/;
 # at 1767
-#<date> server id 1  end_log_pos 1823 CRC32 XXX 	Table_map: `test`.`t2` mapped to number 31
+#<date> server id 1  end_log_pos 1823 CRC32 XXX 	Table_map: `test`.`t2` mapped to number num
 # at 1823
 #<date> server id 1  end_log_pos 1990 CRC32 XXX 	Write_rows: table id 31 flags: STMT_END_F
 ### INSERT INTO `test`.`t2`
@@ -212,7 +212,7 @@ COMMIT
 BEGIN
 /*!*/;
 # at 2105
-#<date> server id 1  end_log_pos 2161 CRC32 XXX 	Table_map: `test`.`t2` mapped to number 31
+#<date> server id 1  end_log_pos 2161 CRC32 XXX 	Table_map: `test`.`t2` mapped to number num
 # at 2161
 #<date> server id 1  end_log_pos 2235 CRC32 XXX 	Update_rows: table id 31 flags: STMT_END_F
 ### UPDATE `test`.`t2`
@@ -244,7 +244,7 @@ COMMIT
 BEGIN
 /*!*/;
 # at 2350
-#<date> server id 1  end_log_pos 2406 CRC32 XXX 	Table_map: `test`.`t1` mapped to number 30
+#<date> server id 1  end_log_pos 2406 CRC32 XXX 	Table_map: `test`.`t1` mapped to number num
 # at 2406
 #<date> server id 1  end_log_pos 2460 CRC32 XXX 	Delete_rows: table id 30 flags: STMT_END_F
 ### DELETE FROM `test`.`t1`
@@ -270,7 +270,7 @@ COMMIT
 BEGIN
 /*!*/;
 # at 2575
-#<date> server id 1  end_log_pos 2631 CRC32 XXX 	Table_map: `test`.`t2` mapped to number 31
+#<date> server id 1  end_log_pos 2631 CRC32 XXX 	Table_map: `test`.`t2` mapped to number num
 # at 2631
 #<date> server id 1  end_log_pos 2685 CRC32 XXX 	Delete_rows: table id 31 flags: STMT_END_F
 ### DELETE FROM `test`.`t2`
diff --git a/mysql-test/r/mysqld--help,win.rdiff b/mysql-test/r/mysqld--help,win.rdiff
index 12c9588bb3f..4d66e8bad60 100644
--- a/mysql-test/r/mysqld--help,win.rdiff
+++ b/mysql-test/r/mysqld--help,win.rdiff
@@ -1,6 +1,6 @@
---- mysqld--help.result
-+++ mysqld--help,win.reject
-@@ -321,7 +321,6 @@
+--- mysqld--help.result	2016-09-21 13:50:58.682767100 +0000
++++ mysqld--help,win.reject	2016-09-21 13:57:57.494626000 +0000
+@@ -318,7 +318,6 @@
   The number of segments in a key cache
   -L, --language=name Client error messages in given language. May be given as
   a full path. Deprecated. Use --lc-messages-dir instead.
@@ -8,7 +8,7 @@
   --lc-messages=name  Set the language used for the error messages.
   -L, --lc-messages-dir=name 
   Directory where error messages are
-@@ -520,6 +519,7 @@
+@@ -517,6 +516,7 @@
   Use MySQL-5.6 (instead of MariaDB-5.3) format for TIME,
   DATETIME, TIMESTAMP columns.
   (Defaults to on; use --skip-mysql56-temporal-format to disable.)
@@ -16,7 +16,7 @@
   --net-buffer-length=# 
   Buffer length for TCP/IP and socket communication
   --net-read-timeout=# 
-@@ -927,6 +927,9 @@
+@@ -924,6 +924,9 @@
   characteristics (isolation level, read only/read
   write,snapshot - but not any work done / data modified
   within the transaction).
@@ -26,7 +26,7 @@
   --show-slave-auth-info 
   Show user and password in SHOW SLAVE HOSTS on this
   master.
-@@ -1039,6 +1042,10 @@
+@@ -1036,6 +1039,10 @@
   Log slow queries to given log file. Defaults logging to
   'hostname'-slow.log. Must be enabled to activate other
   slow log options
@@ -37,7 +37,7 @@
   --socket=name       Socket file to use for connection
   --sort-buffer-size=# 
   Each thread that needs to do a sort allocates a buffer of
-@@ -1057,6 +1064,7 @@
+@@ -1054,6 +1061,7 @@
   NO_ENGINE_SUBSTITUTION, PAD_CHAR_TO_FULL_LENGTH
   --stack-trace       Print a symbolic stack trace on failure
   (Defaults to on; use --skip-stack-trace to disable.)
@@ -45,35 +45,19 @@
   --standards-compliant-cte 
   Allow only standards compiant CTE
   (Defaults to on; use --skip-standards-compliant-cte to disable.)
-@@ -1099,25 +1107,11 @@
-  --thread-cache-size=# 
-  How many threads we should keep in a cache for reuse.
-  These are freed after 5 minutes of idle time
-- --thread-pool-idle-timeout=# 
-- Timeout in seconds for an idle thread in the thread
-- pool.Worker thread will be shut down after timeout
+@@ -1102,6 +1110,11 @@
   --thread-pool-max-threads=# 
   Maximum allowed number of worker threads in the thread
   pool
-- --thread-pool-oversubscribe=# 
-- How many additional active worker threads in a group are
-- allowed.
-- --thread-pool-size=# 
-- Number of thread groups in the pool. This parameter is
-- roughly equivalent to maximum number of concurrently
-- executing threads (threads in a waiting state do not
-- count as executing).
-- --thread-pool-stall-limit=# 
-- Maximum query execution time in milliseconds,before an
-- executing non-yielding thread is considered stalled.If a
-- worker thread is stalled, additional worker thread may be
-- created to handle remaining clients.
 + --thread-pool-min-threads=# 
 + Minimum number of threads in the thread pool.
-  --thread-stack=#    The stack size for each thread
-  --time-format=name  The TIME format (ignored)
-  --timed-mutexes     Specify whether to time mutexes. Deprecated, has no
-@@ -1126,8 +1120,8 @@
++ --thread-pool-mode=name 
++ Chose implementation of the threadpool. One of: windows, 
++ generic
+  --thread-pool-oversubscribe=# 
+  How many additional active worker threads in a group are
+  allowed.
+@@ -1132,8 +1145,8 @@
   size, MySQL will automatically convert it to an on-disk
   MyISAM or Aria table
   -t, --tmpdir=name   Path for temporary files. Several paths may be specified,
@@ -84,7 +68,7 @@
   --transaction-alloc-block-size=# 
   Allocation block size for transactions to be stored in
   binary log
-@@ -1252,7 +1246,6 @@
+@@ -1257,7 +1270,6 @@
  key-cache-division-limit 100
  key-cache-file-hash-size 512
  key-cache-segments 0
@@ -92,7 +76,7 @@
  lc-messages en_US
  lc-messages-dir MYSQL_SHAREDIR/
  lc-time-names en_US
-@@ -1319,6 +1312,7 @@
+@@ -1324,6 +1336,7 @@
  myisam-stats-method NULLS_UNEQUAL
  myisam-use-mmap FALSE
  mysql56-temporal-format TRUE
@@ -100,7 +84,7 @@
  net-buffer-length 16384
  net-read-timeout 30
  net-retry-count 10
-@@ -1419,6 +1413,8 @@
+@@ -1424,6 +1437,8 @@
  session-track-state-change FALSE
  session-track-system-variables 
  session-track-transaction-info OFF
@@ -109,7 +93,7 @@
  show-slave-auth-info FALSE
  silent-startup FALSE
  skip-grant-tables TRUE
-@@ -1443,6 +1439,7 @@
+@@ -1448,6 +1463,7 @@
  slave-type-conversions 
  slow-launch-time 2
  slow-query-log FALSE
@@ -117,7 +101,7 @@
  sort-buffer-size 2097152
  sql-mode NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION
  stack-trace TRUE
-@@ -1456,16 +1453,14 @@
+@@ -1461,14 +1477,16 @@
  sync-relay-log 10000
  sync-relay-log-info 10000
  sysdate-is-now FALSE
@@ -129,11 +113,10 @@
  table-open-cache-instances 8
  tc-heuristic-recover OFF
  thread-cache-size 151
--thread-pool-idle-timeout 60
+ thread-pool-idle-timeout 60
  thread-pool-max-threads 1000
--thread-pool-oversubscribe 3
--thread-pool-stall-limit 500
 +thread-pool-min-threads 1
- thread-stack 297984
- time-format %H:%i:%s
- timed-mutexes FALSE
++thread-pool-mode windows
+ thread-pool-oversubscribe 3
+ thread-pool-prio-kickup-timer 1000
+ thread-pool-priority auto
diff --git a/mysql-test/r/mysqld--help.result b/mysql-test/r/mysqld--help.result
index 25aa8990998..3ca74f24e54 100644
--- a/mysql-test/r/mysqld--help.result
+++ b/mysql-test/r/mysqld--help.result
@@ -108,9 +108,6 @@ The following options may be given as the first argument:
  --bulk-insert-buffer-size=# 
  Size of tree cache used in bulk insert optimisation. Note
  that this is a limit per thread!
- --changed-page-bitmaps[=name] 
- Enable or disable CHANGED_PAGE_BITMAPS plugin. One of:
- ON, OFF, FORCE (don't start if the plugin fails to load).
  --character-set-client-handshake 
  Don't ignore client side character set value sent during
  handshake.
@@ -1108,6 +1105,15 @@ The following options may be given as the first argument:
  --thread-pool-oversubscribe=# 
  How many additional active worker threads in a group are
  allowed.
+ --thread-pool-prio-kickup-timer=# 
+ The number of milliseconds before a dequeued low-priority
+ statement is moved to the high-priority queue
+ --thread-pool-priority=name 
+ Threadpool priority. High priority connections usually
+ start executing earlier than low priority.If priority set
+ to 'auto', the the actual priority(low or high) is
+ determined based on whether or not connection is inside
+ transaction.
  --thread-pool-size=# 
  Number of thread groups in the pool. This parameter is
  roughly equivalent to maximum number of concurrently
@@ -1182,7 +1188,6 @@ binlog-row-event-max-size 1024
 binlog-row-image FULL
 binlog-stmt-cache-size 32768
 bulk-insert-buffer-size 8388608
-changed-page-bitmaps ON
 character-set-client-handshake TRUE
 character-set-filesystem binary
 character-sets-dir MYSQL_CHARSETSDIR/
@@ -1465,6 +1470,8 @@ thread-cache-size 151
 thread-pool-idle-timeout 60
 thread-pool-max-threads 1000
 thread-pool-oversubscribe 3
+thread-pool-prio-kickup-timer 1000
+thread-pool-priority auto
 thread-pool-stall-limit 500
 thread-stack 297984
 time-format %H:%i:%s
diff --git a/mysql-test/r/openssl_1.result b/mysql-test/r/openssl_1.result
index 712fbf725da..f28c399ce74 100644
--- a/mysql-test/r/openssl_1.result
+++ b/mysql-test/r/openssl_1.result
@@ -4,25 +4,25 @@ drop table if exists t1;
 create table t1(f1 int);
 insert into t1 values (5);
 grant select on test.* to ssl_user1@localhost require SSL;
-grant select on test.* to ssl_user2@localhost require cipher "DHE-RSA-AES256-SHA";
-grant select on test.* to ssl_user3@localhost require cipher "DHE-RSA-AES256-SHA" AND SUBJECT "/C=FI/ST=Helsinki/L=Helsinki/O=MariaDB/CN=client";
-grant select on test.* to ssl_user4@localhost require cipher "DHE-RSA-AES256-SHA" AND SUBJECT "/C=FI/ST=Helsinki/L=Helsinki/O=MariaDB/CN=client" ISSUER "/CN=cacert/C=FI/ST=Helsinki/L=Helsinki/O=MariaDB";
-grant select on test.* to ssl_user5@localhost require cipher "DHE-RSA-AES256-SHA" AND SUBJECT "xxx";
+grant select on test.* to ssl_user2@localhost require cipher "AES256-SHA";
+grant select on test.* to ssl_user3@localhost require cipher "AES256-SHA" AND SUBJECT "/C=FI/ST=Helsinki/L=Helsinki/O=MariaDB/CN=client";
+grant select on test.* to ssl_user4@localhost require cipher "AES256-SHA" AND SUBJECT "/C=FI/ST=Helsinki/L=Helsinki/O=MariaDB/CN=client" ISSUER "/CN=cacert/C=FI/ST=Helsinki/L=Helsinki/O=MariaDB";
+grant select on test.* to ssl_user5@localhost require cipher "AES256-SHA" AND SUBJECT "xxx";
 flush privileges;
-connect  con1,localhost,ssl_user1,,,,,SSL-CIPHER=DHE-RSA-AES256-SHA;
+connect  con1,localhost,ssl_user1,,,,,SSL-CIPHER=AES256-SHA;
 connect(localhost,ssl_user2,,test,MASTER_PORT,MASTER_SOCKET);
-connect  con2,localhost,ssl_user2,,,,,SSL-CIPHER=AES256-SHA;
+connect  con2,localhost,ssl_user2,,,,,SSL-CIPHER=AES128-SHA;
 ERROR 28000: Access denied for user 'ssl_user2'@'localhost' (using password: NO)
-connect  con2,localhost,ssl_user2,,,,,SSL-CIPHER=DHE-RSA-AES256-SHA;
-connect  con3,localhost,ssl_user3,,,,,SSL-CIPHER=DHE-RSA-AES256-SHA;
-connect  con4,localhost,ssl_user4,,,,,SSL-CIPHER=DHE-RSA-AES256-SHA;
+connect  con2,localhost,ssl_user2,,,,,SSL-CIPHER=AES256-SHA;
+connect  con3,localhost,ssl_user3,,,,,SSL-CIPHER=AES256-SHA;
+connect  con4,localhost,ssl_user4,,,,,SSL-CIPHER=AES256-SHA;
 connect(localhost,ssl_user5,,test,MASTER_PORT,MASTER_SOCKET);
-connect  con5,localhost,ssl_user5,,,,,SSL-CIPHER=DHE-RSA-AES256-SHA;
+connect  con5,localhost,ssl_user5,,,,,SSL-CIPHER=AES256-SHA;
 ERROR 28000: Access denied for user 'ssl_user5'@'localhost' (using password: NO)
 connection con1;
 SHOW STATUS LIKE 'Ssl_cipher';
 Variable_name	Value
-Ssl_cipher	DHE-RSA-AES256-SHA
+Ssl_cipher	AES256-SHA
 select * from t1;
 f1
 5
@@ -31,7 +31,7 @@ ERROR 42000: DELETE command denied to user 'ssl_user1'@'localhost' for table 't1
 connection con2;
 SHOW STATUS LIKE 'Ssl_cipher';
 Variable_name	Value
-Ssl_cipher	DHE-RSA-AES256-SHA
+Ssl_cipher	AES256-SHA
 select * from t1;
 f1
 5
@@ -40,7 +40,7 @@ ERROR 42000: DELETE command denied to user 'ssl_user2'@'localhost' for table 't1
 connection con3;
 SHOW STATUS LIKE 'Ssl_cipher';
 Variable_name	Value
-Ssl_cipher	DHE-RSA-AES256-SHA
+Ssl_cipher	AES256-SHA
 select * from t1;
 f1
 5
@@ -49,7 +49,7 @@ ERROR 42000: DELETE command denied to user 'ssl_user3'@'localhost' for table 't1
 connection con4;
 SHOW STATUS LIKE 'Ssl_cipher';
 Variable_name	Value
-Ssl_cipher	DHE-RSA-AES256-SHA
+Ssl_cipher	AES256-SHA
 select * from t1;
 f1
 5
@@ -66,13 +66,11 @@ drop table t1;
 mysqltest: Could not open connection 'default': 2026 SSL connection error: xxxx
 mysqltest: Could not open connection 'default': 2026 SSL connection error: xxxx
 mysqltest: Could not open connection 'default': 2026 SSL connection error: xxxx
-SSL error: Unable to get private key from ''
-mysqltest: Could not open connection 'default': 2026 SSL connection error: Unable to get private key
-SSL error: Unable to get certificate from ''
-mysqltest: Could not open connection 'default': 2026 SSL connection error: Unable to get certificate
+mysqltest: Could not open connection 'default': 2026 SSL connection error: xxxx
+mysqltest: Could not open connection 'default': 2026 SSL connection error: xxxx
 SHOW STATUS LIKE 'Ssl_cipher';
 Variable_name	Value
-Ssl_cipher	DHE-RSA-AES256-SHA
+Ssl_cipher	AES256-SHA
 have_ssl
 1
 End of 5.0 tests
@@ -102,8 +100,7 @@ Ssl_cipher	AES128-SHA
 SHOW STATUS LIKE 'Ssl_cipher';
 Variable_name	Value
 Ssl_cipher	AES128-SHA
-mysqltest: Could not open connection 'default': 2026 SSL connection error: Failed to set ciphers to use
-CREATE TABLE t1(a int);
+mysqltest: Could not open connection 'default': 2026 SSL connection error: xxxxCREATE TABLE t1(a int);
 INSERT INTO t1 VALUES (1), (2);
 
 /*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;
@@ -207,13 +204,12 @@ UNLOCK TABLES;
 /*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;
 /*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */;
 
-SSL error: Unable to get private key from 'MYSQL_TEST_DIR/std_data/client-cert.pem'
-mysqldump: Got error: 2026: "SSL connection error: Unable to get private key" when trying to connect
+mysqldump: Got error: 2026: SSL connection error: xxxx
 DROP TABLE t1;
 Variable_name	Value
-Ssl_cipher	DHE-RSA-AES256-SHA
+Ssl_cipher	AES256-SHA
 Variable_name	Value
-Ssl_cipher	EDH-RSA-DES-CBC3-SHA
+Ssl_cipher	DES-CBC3-SHA
 select 'is still running; no cipher request crashed the server' as result from dual;
 result
 is still running; no cipher request crashed the server
diff --git a/mysql-test/r/openssl_6975,tlsv10.result b/mysql-test/r/openssl_6975,tlsv10.result
index 6285faa0143..a65167ff427 100644
--- a/mysql-test/r/openssl_6975,tlsv10.result
+++ b/mysql-test/r/openssl_6975,tlsv10.result
@@ -3,14 +3,14 @@ grant select on test.* to ssl_sslv3@localhost require cipher "RC4-SHA";
 create user ssl_tls12@localhost;
 grant select on test.* to ssl_tls12@localhost require cipher "AES128-SHA256";
 TLS1.2 ciphers: user is ok with any cipher
-ERROR 2026 (HY000): SSL connection error: error:14077410:SSL routines:SSL23_GET_SERVER_HELLO:sslv3 alert handshake failure
-ERROR 2026 (HY000): SSL connection error: error:14077410:SSL routines:SSL23_GET_SERVER_HELLO:sslv3 alert handshake failure
+ERROR 2026 (HY000): SSL connection error: sslv3 alert handshake failure
+ERROR 2026 (HY000): SSL connection error: sslv3 alert handshake failure
 TLS1.2 ciphers: user requires SSLv3 cipher RC4-SHA
-ERROR 2026 (HY000): SSL connection error: error:14077410:SSL routines:SSL23_GET_SERVER_HELLO:sslv3 alert handshake failure
-ERROR 2026 (HY000): SSL connection error: error:14077410:SSL routines:SSL23_GET_SERVER_HELLO:sslv3 alert handshake failure
+ERROR 2026 (HY000): SSL connection error: sslv3 alert handshake failure
+ERROR 2026 (HY000): SSL connection error: sslv3 alert handshake failure
 TLS1.2 ciphers: user requires TLSv1.2 cipher AES128-SHA256
-ERROR 2026 (HY000): SSL connection error: error:14077410:SSL routines:SSL23_GET_SERVER_HELLO:sslv3 alert handshake failure
-ERROR 2026 (HY000): SSL connection error: error:14077410:SSL routines:SSL23_GET_SERVER_HELLO:sslv3 alert handshake failure
+ERROR 2026 (HY000): SSL connection error: sslv3 alert handshake failure
+ERROR 2026 (HY000): SSL connection error: sslv3 alert handshake failure
 SSLv3 ciphers: user is ok with any cipher
 Variable_name	Value
 Ssl_cipher	RC4-SHA
diff --git a/mysql-test/r/openssl_6975,tlsv12.result b/mysql-test/r/openssl_6975,tlsv12.result
index 31d2658c829..8758daa7011 100644
--- a/mysql-test/r/openssl_6975,tlsv12.result
+++ b/mysql-test/r/openssl_6975,tlsv12.result
@@ -15,13 +15,13 @@ Variable_name	Value
 Ssl_cipher	AES128-SHA256
 ERROR 1045 (28000): Access denied for user 'ssl_tls12'@'localhost' (using password: NO)
 SSLv3 ciphers: user is ok with any cipher
-ERROR 2026 (HY000): SSL connection error: error:14077410:SSL routines:SSL23_GET_SERVER_HELLO:sslv3 alert handshake failure
-ERROR 2026 (HY000): SSL connection error: error:14077410:SSL routines:SSL23_GET_SERVER_HELLO:sslv3 alert handshake failure
+ERROR 2026 (HY000): SSL connection error: sslv3 alert handshake failure
+ERROR 2026 (HY000): SSL connection error: sslv3 alert handshake failure
 SSLv3 ciphers: user requires SSLv3 cipher RC4-SHA
-ERROR 2026 (HY000): SSL connection error: error:14077410:SSL routines:SSL23_GET_SERVER_HELLO:sslv3 alert handshake failure
-ERROR 2026 (HY000): SSL connection error: error:14077410:SSL routines:SSL23_GET_SERVER_HELLO:sslv3 alert handshake failure
+ERROR 2026 (HY000): SSL connection error: sslv3 alert handshake failure
+ERROR 2026 (HY000): SSL connection error: sslv3 alert handshake failure
 SSLv3 ciphers: user requires TLSv1.2 cipher AES128-SHA256
-ERROR 2026 (HY000): SSL connection error: error:14077410:SSL routines:SSL23_GET_SERVER_HELLO:sslv3 alert handshake failure
-ERROR 2026 (HY000): SSL connection error: error:14077410:SSL routines:SSL23_GET_SERVER_HELLO:sslv3 alert handshake failure
+ERROR 2026 (HY000): SSL connection error: sslv3 alert handshake failure
+ERROR 2026 (HY000): SSL connection error: sslv3 alert handshake failure
 drop user ssl_sslv3@localhost;
 drop user ssl_tls12@localhost;
diff --git a/mysql-test/r/order_by_optimizer_innodb.result b/mysql-test/r/order_by_optimizer_innodb.result
index f3167db4b9a..0b62ba997d8 100644
--- a/mysql-test/r/order_by_optimizer_innodb.result
+++ b/mysql-test/r/order_by_optimizer_innodb.result
@@ -40,11 +40,11 @@ pk1	count(*)
 # The following should use range(ux_pk1_fd5), two key parts (key_len=5+8=13)
 EXPLAIN SELECT * FROM t2 USE INDEX(ux_pk1_fd5) WHERE pk1=9 AND fd5 < 500 ORDER BY fd5 DESC LIMIT 10;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t2	range	ux_pk1_fd5	ux_pk1_fd5	13	NULL	137	Using where
+1	SIMPLE	t2	range	ux_pk1_fd5	ux_pk1_fd5	13	NULL	138	Using where
 # This also must use range, not ref. key_len must be 13
 EXPLAIN SELECT * FROM t2                       WHERE pk1=9 AND fd5 < 500 ORDER BY fd5 DESC LIMIT 10;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t2	range	PRIMARY,ux_pk1_fd5	ux_pk1_fd5	13	NULL	137	Using where
+1	SIMPLE	t2	range	PRIMARY,ux_pk1_fd5	ux_pk1_fd5	13	NULL	138	Using where
 drop table t0,t1, t2;
 #
 # MDEV-6814: Server crashes in calculate_key_len on query with ORDER BY
diff --git a/mysql-test/r/partition_innodb.result b/mysql-test/r/partition_innodb.result
index c9568f7b07f..d1ec12efe31 100644
--- a/mysql-test/r/partition_innodb.result
+++ b/mysql-test/r/partition_innodb.result
@@ -385,33 +385,33 @@ DROP TABLE t1;
 create table t1 (a int) engine=innodb partition by hash(a) ;
 show table status like 't1';
 Name	Engine	Version	Row_format	Rows	Avg_row_length	Data_length	Max_data_length	Index_length	Data_free	Auto_increment	Create_time	Update_time	Check_time	Collation	Checksum	Create_options	Comment
-t1	InnoDB	10	Compact	2	8192	16384	0	0	#	NULL	#	NULL	NULL	latin1_swedish_ci	NULL	partitioned	
+t1	InnoDB	10	Dynamic	2	8192	16384	0	0	#	NULL	#	NULL	NULL	latin1_swedish_ci	NULL	partitioned	
 drop table t1;
 create table t1 (a int)
 engine = innodb
 partition by key (a);
 show table status;
 Name	Engine	Version	Row_format	Rows	Avg_row_length	Data_length	Max_data_length	Index_length	Data_free	Auto_increment	Create_time	Update_time	Check_time	Collation	Checksum	Create_options	Comment
-t1	InnoDB	10	Compact	2	8192	16384	0	0	#	NULL	#	NULL	NULL	latin1_swedish_ci	NULL	partitioned	
+t1	InnoDB	10	Dynamic	2	8192	16384	0	0	#	NULL	#	NULL	NULL	latin1_swedish_ci	NULL	partitioned	
 insert into t1 values (0), (1), (2), (3);
 show table status;
 Name	Engine	Version	Row_format	Rows	Avg_row_length	Data_length	Max_data_length	Index_length	Data_free	Auto_increment	Create_time	Update_time	Check_time	Collation	Checksum	Create_options	Comment
-t1	InnoDB	10	Compact	4	4096	16384	0	0	#	NULL	#	NULL	NULL	latin1_swedish_ci	NULL	partitioned	
+t1	InnoDB	10	Dynamic	4	4096	16384	0	0	#	NULL	#	NULL	NULL	latin1_swedish_ci	NULL	partitioned	
 drop table t1;
 create table t1 (a int auto_increment primary key)
 engine = innodb
 partition by key (a);
 show table status;
 Name	Engine	Version	Row_format	Rows	Avg_row_length	Data_length	Max_data_length	Index_length	Data_free	Auto_increment	Create_time	Update_time	Check_time	Collation	Checksum	Create_options	Comment
-t1	InnoDB	10	Compact	2	8192	16384	0	0	#	1	#	NULL	NULL	latin1_swedish_ci	NULL	partitioned	
+t1	InnoDB	10	Dynamic	2	8192	16384	0	0	#	1	#	NULL	NULL	latin1_swedish_ci	NULL	partitioned	
 insert into t1 values (NULL), (NULL), (NULL), (NULL);
 show table status;
 Name	Engine	Version	Row_format	Rows	Avg_row_length	Data_length	Max_data_length	Index_length	Data_free	Auto_increment	Create_time	Update_time	Check_time	Collation	Checksum	Create_options	Comment
-t1	InnoDB	10	Compact	4	4096	16384	0	0	#	5	#	NULL	NULL	latin1_swedish_ci	NULL	partitioned	
+t1	InnoDB	10	Dynamic	4	4096	16384	0	0	#	5	#	NULL	NULL	latin1_swedish_ci	NULL	partitioned	
 insert into t1 values (NULL), (NULL), (NULL), (NULL);
 show table status;
 Name	Engine	Version	Row_format	Rows	Avg_row_length	Data_length	Max_data_length	Index_length	Data_free	Auto_increment	Create_time	Update_time	Check_time	Collation	Checksum	Create_options	Comment
-t1	InnoDB	10	Compact	8	2048	16384	0	0	#	9	#	NULL	NULL	latin1_swedish_ci	NULL	partitioned	
+t1	InnoDB	10	Dynamic	8	2048	16384	0	0	#	9	#	NULL	NULL	latin1_swedish_ci	NULL	partitioned	
 drop table t1;
 create table t1 (a int)
 partition by key (a)
diff --git a/mysql-test/r/partition_innodb_plugin.result b/mysql-test/r/partition_innodb_plugin.result
index d53d2edb581..16b5daad620 100644
--- a/mysql-test/r/partition_innodb_plugin.result
+++ b/mysql-test/r/partition_innodb_plugin.result
@@ -1,3 +1,4 @@
+call mtr.add_suppression("InnoDB: Table .* does not exist in the InnoDB internal data dictionary .*");
 #
 # Bug#11766879/Bug#60106: DIFF BETWEEN # OF INDEXES IN MYSQL VS INNODB,
 #                         PARTITONING, ON INDEX CREATE
@@ -42,6 +43,8 @@ SET @old_innodb_strict_mode = @@global.innodb_strict_mode;
 SET @@global.innodb_file_format = Barracuda,
 @@global.innodb_file_per_table = ON,
 @@global.innodb_strict_mode = ON;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 connect con1,localhost,root,,;
 CREATE TABLE t1 (id INT NOT NULL
 PRIMARY KEY,
@@ -104,6 +107,8 @@ disconnect con2;
 connection default;
 SET @@global.innodb_strict_mode = @old_innodb_strict_mode;
 SET @@global.innodb_file_format = @old_innodb_file_format;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SET @@global.innodb_file_per_table = @old_innodb_file_per_table;
 SET NAMES utf8;
 CREATE TABLE `t``\""e` (a INT, PRIMARY KEY (a))
@@ -143,12 +148,12 @@ ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
 # First table reported in 'SHOW ENGINE InnoDB STATUS'
 SHOW ENGINE InnoDB STATUS;
 Type	Name	Status
-InnoDB		index `PRIMARY` of table `test`.`t``\""e` /* Partition `p0``\""e`, Subpartition `sp0``\""e` */ 
+InnoDB		index PRIMARY of table `test`.`t``\""e` /* Partition `p0``\""e`, Subpartition `sp0``\""e` */
 set @old_sql_mode = @@sql_mode;
 set sql_mode = 'ANSI_QUOTES';
 SHOW ENGINE InnoDB STATUS;
 Type	Name	Status
-InnoDB		index `PRIMARY` of table `test`.`t``\""e` /* Partition `p0``\""e`, Subpartition `sp0``\""e` */ 
+InnoDB		index PRIMARY of table `test`.`t``\""e` /* Partition `p0``\""e`, Subpartition `sp0``\""e` */
 set @@sql_mode = @old_sql_mode;
 connection con1;
 ROLLBACK;
diff --git a/mysql-test/r/range_vs_index_merge_innodb.result b/mysql-test/r/range_vs_index_merge_innodb.result
index 601ae9b7613..225d341f1de 100644
--- a/mysql-test/r/range_vs_index_merge_innodb.result
+++ b/mysql-test/r/range_vs_index_merge_innodb.result
@@ -57,7 +57,7 @@ WHERE Population < 200000 AND Name LIKE 'P%' AND
 (Population > 300000 OR Name LIKE 'T%') AND
 (Population < 100000 OR Name LIKE 'Pa%');
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	City	range	Population,Name	Name	35	NULL	235	Using index condition; Using where
+1	SIMPLE	City	range	Population,Name	Name	35	NULL	236	Using index condition; Using where
 EXPLAIN
 SELECT * FROM City
 WHERE Population > 100000 AND Name LIKE 'Aba%' OR
@@ -65,34 +65,34 @@ Country IN ('CAN', 'ARG') AND ID < 3800 OR
 Country < 'U' AND Name LIKE 'Zhu%' OR
 ID BETWEEN 3800 AND 3810;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	City	index_merge	PRIMARY,Population,Country,Name	Name,Country,PRIMARY	35,7,4	NULL	123	Using sort_union(Name,Country,PRIMARY); Using where
+1	SIMPLE	City	index_merge	PRIMARY,Population,Country,Name	Name,Country,PRIMARY	35,7,4	NULL	125	Using sort_union(Name,Country,PRIMARY); Using where
 EXPLAIN 
 SELECT * FROM City
 WHERE (Population > 101000 AND Population < 115000);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	City	range	Population	Population	4	NULL	458	Using index condition
+1	SIMPLE	City	range	Population	Population	4	NULL	459	Using index condition
 EXPLAIN 
 SELECT * FROM City
 WHERE (Population > 101000 AND Population < 102000);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	City	range	Population	Population	4	NULL	38	Using index condition
+1	SIMPLE	City	range	Population	Population	4	NULL	39	Using index condition
 EXPLAIN 
 SELECT * FROM City
 WHERE ((Name > 'Ca' AND Name < 'Cf') OR (Country > 'E' AND Country < 'F'));
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	City	index_merge	Country,Name	Name,Country	35,3	NULL	213	Using sort_union(Name,Country); Using where
+1	SIMPLE	City	index_merge	Country,Name	Name,Country	35,3	NULL	215	Using sort_union(Name,Country); Using where
 EXPLAIN 
 SELECT * FROM City
 WHERE ((Name > 'Ca' AND Name < 'Cf') OR (Country > 'E' AND Country < 'F'))
 AND (Population > 101000 AND Population < 115000);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	City	index_merge	Population,Country,Name	Name,Country	35,3	NULL	213	Using sort_union(Name,Country); Using where
+1	SIMPLE	City	index_merge	Population,Country,Name	Name,Country	35,3	NULL	215	Using sort_union(Name,Country); Using where
 EXPLAIN 
 SELECT * FROM City
 WHERE ((Name > 'Ca' AND Name < 'Cf') OR (Country > 'E' AND Country < 'F'))
 AND (Population > 101000 AND Population < 102000);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	City	range	Population,Country,Name	Population	4	NULL	38	Using index condition; Using where
+1	SIMPLE	City	range	Population,Country,Name	Population	4	NULL	39	Using index condition; Using where
 SELECT * FROM City USE INDEX ()
 WHERE ((Name > 'Ca' AND Name < 'Cf') OR (Country > 'E' AND Country < 'F'))
 AND (Population > 101000 AND Population < 115000);
@@ -176,11 +176,11 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 EXPLAIN
 SELECT  * FROM City WHERE (Name < 'Bb');
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	City	range	Name	Name	35	NULL	373	Using index condition
+1	SIMPLE	City	range	Name	Name	35	NULL	374	Using index condition
 EXPLAIN
 SELECT  * FROM City WHERE (Country > 'A' AND Country < 'B');
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	City	range	Country	Country	3	NULL	106	Using index condition
+1	SIMPLE	City	range	Country	Country	3	NULL	107	Using index condition
 EXPLAIN
 SELECT  * FROM City WHERE (Name BETWEEN 'P' AND 'Pb');
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
@@ -188,15 +188,15 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 EXPLAIN
 SELECT  * FROM City WHERE (Name BETWEEN 'P' AND 'S');
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	City	range	Name	Name	35	NULL	384	Using index condition
+1	SIMPLE	City	range	Name	Name	35	NULL	385	Using index condition
 EXPLAIN
 SELECT  * FROM City WHERE (Population > 101000 AND Population < 110000);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	City	range	Population	Population	4	NULL	327	Using index condition
+1	SIMPLE	City	range	Population	Population	4	NULL	328	Using index condition
 EXPLAIN
 SELECT  * FROM City WHERE (Population > 103000 AND Population < 104000);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	City	range	Population	Population	4	NULL	36	Using index condition
+1	SIMPLE	City	range	Population	Population	4	NULL	37	Using index condition
 EXPLAIN
 SELECT  * FROM City 
 WHERE (Name < 'Ac' AND (Country > 'A' AND Country < 'B')) OR
@@ -208,19 +208,19 @@ SELECT  * FROM City
 WHERE (Name < 'Ac' AND (Country > 'A' AND Country < 'B')) OR
 (Name BETWEEN 'P' AND 'S' AND (Population > 103000 AND Population < 104000));
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	City	index_merge	Population,Country,Name	Name,Population	35,4	NULL	59	Using sort_union(Name,Population); Using where
+1	SIMPLE	City	index_merge	Population,Country,Name	Name,Population	35,4	NULL	60	Using sort_union(Name,Population); Using where
 EXPLAIN
 SELECT  * FROM City
 WHERE (Name < 'Bb' AND (Country > 'A' AND Country < 'B')) OR
 (Name BETWEEN 'P' AND 'Pb' AND (Population > 101000 AND Population < 110000));
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	City	index_merge	Population,Country,Name	Country,Name	3,35	NULL	177	Using sort_union(Country,Name); Using where
+1	SIMPLE	City	index_merge	Population,Country,Name	Country,Name	3,35	NULL	178	Using sort_union(Country,Name); Using where
 EXPLAIN
 SELECT  * FROM City
 WHERE (Name < 'Bb' AND (Country > 'A' AND Country < 'B')) OR
 (Name BETWEEN 'P' AND 'S' AND (Population > 103000 AND Population < 104000));
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	City	index_merge	Population,Country,Name	Country,Population	3,4	NULL	142	Using sort_union(Country,Population); Using where
+1	SIMPLE	City	index_merge	Population,Country,Name	Country,Population	3,4	NULL	144	Using sort_union(Country,Population); Using where
 SELECT  * FROM City USE INDEX ()
 WHERE (Name < 'Ac' AND (Country > 'A' AND Country < 'B')) OR
 (Name BETWEEN 'P' AND 'Pb' AND (Population > 101000 AND Population < 110000));
@@ -336,15 +336,15 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 EXPLAIN
 SELECT * FROM City WHERE (ID < 600) OR (ID BETWEEN 900 AND 1500);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	City	range	PRIMARY	PRIMARY	4	NULL	1198	Using where
+1	SIMPLE	City	range	PRIMARY	PRIMARY	4	NULL	1200	Using where
 EXPLAIN
 SELECT * FROM City WHERE Country > 'A' AND Country < 'ARG';
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	City	range	Country	Country	3	NULL	19	Using index condition
+1	SIMPLE	City	range	Country	Country	3	NULL	20	Using index condition
 EXPLAIN
 SELECT * FROM City WHERE Name LIKE 'H%' OR Name LIKE 'P%' ;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	City	range	Name	Name	35	NULL	394	Using index condition; Using where
+1	SIMPLE	City	range	Name	Name	35	NULL	395	Using index condition; Using where
 EXPLAIN
 SELECT * FROM City WHERE Name LIKE 'Ha%' OR Name LIKE 'Pa%' ;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
@@ -362,7 +362,7 @@ WHERE ((ID < 800) AND (Name LIKE 'Ha%' OR (Country > 'A' AND Country < 'ARG')))
 OR ((ID BETWEEN 900 AND 1500) AND 
 (Name LIKE 'Pa%' OR (Population > 103000 AND Population < 104000)));
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	City	index_merge	PRIMARY,Population,Country,Name	Name,Country,PRIMARY	39,3,4	NULL	680	Using sort_union(Name,Country,PRIMARY); Using where
+1	SIMPLE	City	index_merge	PRIMARY,Population,Country,Name	Name,Country,PRIMARY	39,3,4	NULL	683	Using sort_union(Name,Country,PRIMARY); Using where
 EXPLAIN
 SELECT * FROM City
 WHERE ((ID < 200) AND (Name LIKE 'Ha%' OR (Country > 'A' AND Country < 'ARG')))
@@ -577,11 +577,11 @@ ID	Name	Country	Population
 EXPLAIN 
 SELECT * FROM City WHERE Population > 101000 AND Population < 102000;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	City	range	Population	Population	4	NULL	38	Using index condition
+1	SIMPLE	City	range	Population	Population	4	NULL	39	Using index condition
 EXPLAIN 
 SELECT * FROM City WHERE Population > 101000 AND Population < 110000;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	City	range	Population	Population	4	NULL	327	Using index condition
+1	SIMPLE	City	range	Population	Population	4	NULL	328	Using index condition
 EXPLAIN 
 SELECT * FROM City WHERE Country < 'C';
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
@@ -593,7 +593,7 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 EXPLAIN 
 SELECT * FROM City WHERE Name BETWEEN 'P' AND 'S';
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	City	range	Name	Name	35	NULL	384	Using index condition
+1	SIMPLE	City	range	Name	Name	35	NULL	385	Using index condition
 EXPLAIN 
 SELECT * FROM City WHERE Name BETWEEN 'P' AND 'Pb';
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
@@ -601,7 +601,7 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 EXPLAIN 
 SELECT * FROM City WHERE ID BETWEEN 3400 AND 3800;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	City	range	PRIMARY	PRIMARY	4	NULL	400	Using where
+1	SIMPLE	City	range	PRIMARY	PRIMARY	4	NULL	401	Using where
 EXPLAIN 
 SELECT * FROM City WHERE ID BETWEEN 3790 AND 3800;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
@@ -609,7 +609,7 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 EXPLAIN 
 SELECT * FROM City WHERE Name LIKE 'P%';
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	City	range	Name	Name	35	NULL	235	Using index condition
+1	SIMPLE	City	range	Name	Name	35	NULL	236	Using index condition
 EXPLAIN
 SELECT * FROM City
 WHERE ((Population > 101000 AND Population < 102000) AND
@@ -617,7 +617,7 @@ WHERE ((Population > 101000 AND Population < 102000) AND
 ((ID BETWEEN 3400 AND 3800) AND 
 (Country < 'AGO' OR Name LIKE 'Pa%'));
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	City	index_merge	PRIMARY,Population,Country,Name	Population,PRIMARY	4,4	NULL	438	Using sort_union(Population,PRIMARY); Using where
+1	SIMPLE	City	index_merge	PRIMARY,Population,Country,Name	Population,PRIMARY	4,4	NULL	440	Using sort_union(Population,PRIMARY); Using where
 EXPLAIN
 SELECT * FROM City
 WHERE ((Population > 101000 AND Population < 110000) AND
@@ -684,11 +684,11 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 EXPLAIN
 SELECT * FROM City WHERE Name LIKE 'P%';
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	City	range	Name	Name	35	NULL	235	Using index condition
+1	SIMPLE	City	range	Name	Name	35	NULL	236	Using index condition
 EXPLAIN
 SELECT * FROM City WHERE (Population > 101000 AND Population < 103000);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	City	range	Population	Population	4	NULL	80	Using index condition
+1	SIMPLE	City	range	Population	Population	4	NULL	81	Using index condition
 EXPLAIN
 SELECT * FROM City WHERE Country='USA';
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
@@ -702,7 +702,7 @@ SELECT * FROM City
 WHERE ((Population > 101000 AND Population < 103000) OR Name LIKE 'Pas%')
 AND Country='USA';
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	City	index_merge	Population,Country,Name,CountryPopulation	CountryPopulation,Name	7,35	NULL	17	Using sort_union(CountryPopulation,Name); Using where
+1	SIMPLE	City	index_merge	Population,Country,Name,CountryPopulation	CountryPopulation,Name	7,35	NULL	18	Using sort_union(CountryPopulation,Name); Using where
 EXPLAIN
 SELECT * FROM City 
 WHERE ((Population > 101000 AND Population < 103000) OR Name LIKE 'P%')
@@ -777,7 +777,7 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 EXPLAIN
 SELECT * FROM City WHERE ID BETWEEN 3500 AND 3800;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	City	range	PRIMARY	PRIMARY	4	NULL	300	Using where
+1	SIMPLE	City	range	PRIMARY	PRIMARY	4	NULL	301	Using where
 EXPLAIN
 SELECT * FROM City WHERE ID BETWEEN 4000 AND 4300;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
@@ -789,11 +789,11 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 EXPLAIN
 SELECT * FROM City WHERE (Population > 101000 AND Population < 102000);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	City	range	Population	Population	4	NULL	38	Using index condition
+1	SIMPLE	City	range	Population	Population	4	NULL	39	Using index condition
 EXPLAIN
 SELECT * FROM City WHERE (Population > 101000 AND Population < 103000);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	City	range	Population	Population	4	NULL	80	Using index condition
+1	SIMPLE	City	range	Population	Population	4	NULL	81	Using index condition
 EXPLAIN
 SELECT * FROM City WHERE Name LIKE 'Pa%';
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
@@ -806,7 +806,7 @@ WHERE ((Population > 101000 AND Population < 102000) OR
 ID BETWEEN 3790 AND 3800) AND Country='USA'
         AND (Name LIKE 'Pa%' OR ID BETWEEN 4025 AND 4035);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	City	index_merge	PRIMARY,Population,Country,Name,CountryPopulation,CountryName	CountryPopulation,PRIMARY	7,4	NULL	13	Using sort_union(CountryPopulation,PRIMARY); Using where
+1	SIMPLE	City	index_merge	PRIMARY,Population,Country,Name,CountryPopulation,CountryName	CountryPopulation,PRIMARY	7,4	NULL	14	Using sort_union(CountryPopulation,PRIMARY); Using where
 EXPLAIN
 SELECT * FROM City
 WHERE ((Population > 101000 AND Population < 103000) OR
@@ -869,7 +869,7 @@ WHERE ((Population > 101000 and Population < 102000) OR
 ID BETWEEN 3790 AND 3800) AND Country='USA'
         OR (Name LIKE 'Pa%' OR ID BETWEEN 250 AND 260) AND Country='BRA';
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	City	index_merge	PRIMARY,Population,Country,Name,CountryPopulation,CountryName	CountryPopulation,CountryName,PRIMARY	7,38,4	NULL	35	Using sort_union(CountryPopulation,CountryName,PRIMARY); Using where
+1	SIMPLE	City	index_merge	PRIMARY,Population,Country,Name,CountryPopulation,CountryName	CountryPopulation,CountryName,PRIMARY	7,38,4	NULL	36	Using sort_union(CountryPopulation,CountryName,PRIMARY); Using where
 SELECT * FROM City USE INDEX ()
 WHERE ((Population > 101000 and Population < 102000) OR
 ID BETWEEN 3790 AND 3800) AND Country='USA'
@@ -1448,7 +1448,7 @@ explain
 select * from t1
 where (home_state = 'ia' or work_state='ia') and account_id = 1;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	index_merge	account_id,user_home_state_indx,user_work_state_indx	user_home_state_indx,user_work_state_indx	3,3	NULL	10	Using union(user_home_state_indx,user_work_state_indx); Using where
+1	SIMPLE	t1	index_merge	account_id,user_home_state_indx,user_work_state_indx	user_home_state_indx,user_work_state_indx	3,3	NULL	12	Using union(user_home_state_indx,user_work_state_indx); Using where
 drop table t1;
 CREATE TABLE t1 (
 c1 int(11) NOT NULL auto_increment,
diff --git a/mysql-test/r/row-checksum-old.result b/mysql-test/r/row-checksum-old.result
index ef523463860..5789fc64c68 100644
--- a/mysql-test/r/row-checksum-old.result
+++ b/mysql-test/r/row-checksum-old.result
@@ -73,7 +73,7 @@ test.t1	4108368782
 drop table if exists t1;
 create table t1 (a int null, v varchar(100)) engine=innodb checksum=0 row_format=fixed;
 Warnings:
-Warning	1478	InnoDB: assuming ROW_FORMAT=COMPACT.
+Warning	1478	InnoDB: assuming ROW_FORMAT=DYNAMIC.
 insert into t1 values(null, null), (1, "hello");
 checksum table t1;
 Table	Checksum
diff --git a/mysql-test/r/row-checksum.result b/mysql-test/r/row-checksum.result
index fb8a1260a1d..0fbebba073b 100644
--- a/mysql-test/r/row-checksum.result
+++ b/mysql-test/r/row-checksum.result
@@ -73,7 +73,7 @@ test.t1	3885665021
 drop table if exists t1;
 create table t1 (a int null, v varchar(100)) engine=innodb checksum=0 row_format=fixed;
 Warnings:
-Warning	1478	InnoDB: assuming ROW_FORMAT=COMPACT.
+Warning	1478	InnoDB: assuming ROW_FORMAT=DYNAMIC.
 insert into t1 values(null, null), (1, "hello");
 checksum table t1;
 Table	Checksum
diff --git a/mysql-test/r/selectivity_innodb.result b/mysql-test/r/selectivity_innodb.result
index daf28073cf1..2c1913f0929 100644
--- a/mysql-test/r/selectivity_innodb.result
+++ b/mysql-test/r/selectivity_innodb.result
@@ -144,9 +144,9 @@ order by s_suppkey;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
 1	PRIMARY	supplier	index	PRIMARY	PRIMARY	4	NULL	10	100.00	
 1	PRIMARY	<derived3>	ref	key0	key0	5	dbt3_s001.supplier.s_suppkey	10	100.00	Using where
-3	DERIVED	lineitem	range	i_l_shipdate	i_l_shipdate	4	NULL	228	100.00	Using where; Using temporary; Using filesort
-2	SUBQUERY	<derived4>	ALL	NULL	NULL	NULL	NULL	228	100.00	
-4	DERIVED	lineitem	range	i_l_shipdate	i_l_shipdate	4	NULL	228	100.00	Using where; Using temporary; Using filesort
+3	DERIVED	lineitem	range	i_l_shipdate	i_l_shipdate	4	NULL	229	100.00	Using where; Using temporary; Using filesort
+2	SUBQUERY	<derived4>	ALL	NULL	NULL	NULL	NULL	229	100.00	
+4	DERIVED	lineitem	range	i_l_shipdate	i_l_shipdate	4	NULL	229	100.00	Using where; Using temporary; Using filesort
 Warnings:
 Note	1003	select `dbt3_s001`.`supplier`.`s_suppkey` AS `s_suppkey`,`dbt3_s001`.`supplier`.`s_name` AS `s_name`,`dbt3_s001`.`supplier`.`s_address` AS `s_address`,`dbt3_s001`.`supplier`.`s_phone` AS `s_phone`,`revenue0`.`total_revenue` AS `total_revenue` from `dbt3_s001`.`supplier` join `dbt3_s001`.`revenue0` where ((`revenue0`.`supplier_no` = `dbt3_s001`.`supplier`.`s_suppkey`) and (`revenue0`.`total_revenue` = (select max(`revenue0`.`total_revenue`) from `dbt3_s001`.`revenue0`))) order by `dbt3_s001`.`supplier`.`s_suppkey`
 select s_suppkey, s_name, s_address, s_phone, total_revenue
@@ -165,9 +165,9 @@ order by s_suppkey;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
 1	PRIMARY	supplier	index	PRIMARY	PRIMARY	4	NULL	10	100.00	
 1	PRIMARY	<derived3>	ref	key0	key0	5	dbt3_s001.supplier.s_suppkey	10	100.00	Using where
-3	DERIVED	lineitem	range	i_l_shipdate	i_l_shipdate	4	NULL	228	100.00	Using where; Using temporary; Using filesort
-2	SUBQUERY	<derived4>	ALL	NULL	NULL	NULL	NULL	227	100.00	
-4	DERIVED	lineitem	range	i_l_shipdate	i_l_shipdate	4	NULL	228	100.00	Using where; Using temporary; Using filesort
+3	DERIVED	lineitem	range	i_l_shipdate	i_l_shipdate	4	NULL	229	100.00	Using where; Using temporary; Using filesort
+2	SUBQUERY	<derived4>	ALL	NULL	NULL	NULL	NULL	228	100.00	
+4	DERIVED	lineitem	range	i_l_shipdate	i_l_shipdate	4	NULL	229	100.00	Using where; Using temporary; Using filesort
 Warnings:
 Note	1003	select `dbt3_s001`.`supplier`.`s_suppkey` AS `s_suppkey`,`dbt3_s001`.`supplier`.`s_name` AS `s_name`,`dbt3_s001`.`supplier`.`s_address` AS `s_address`,`dbt3_s001`.`supplier`.`s_phone` AS `s_phone`,`revenue0`.`total_revenue` AS `total_revenue` from `dbt3_s001`.`supplier` join `dbt3_s001`.`revenue0` where ((`revenue0`.`supplier_no` = `dbt3_s001`.`supplier`.`s_suppkey`) and (`revenue0`.`total_revenue` = (select max(`revenue0`.`total_revenue`) from `dbt3_s001`.`revenue0`))) order by `dbt3_s001`.`supplier`.`s_suppkey`
 select s_suppkey, s_name, s_address, s_phone, total_revenue
@@ -544,7 +544,7 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
 1	PRIMARY	<subquery2>	eq_ref	distinct_key	distinct_key	4	func	1	100.00	
 2	MATERIALIZED	part	ALL	PRIMARY	NULL	NULL	NULL	200	4.17	Using where
 2	MATERIALIZED	partsupp	ref	PRIMARY,i_ps_partkey,i_ps_suppkey	PRIMARY	4	dbt3_s001.part.p_partkey	3	100.00	Using where
-4	DEPENDENT SUBQUERY	lineitem	ref	i_l_shipdate,i_l_suppkey_partkey,i_l_partkey,i_l_suppkey	i_l_suppkey_partkey	10	dbt3_s001.partsupp.ps_partkey,dbt3_s001.partsupp.ps_suppkey	8	14.37	Using where
+4	DEPENDENT SUBQUERY	lineitem	ref	i_l_shipdate,i_l_suppkey_partkey,i_l_partkey,i_l_suppkey	i_l_suppkey_partkey	10	dbt3_s001.partsupp.ps_partkey,dbt3_s001.partsupp.ps_suppkey	8	14.40	Using where
 Warnings:
 Note	1276	Field or reference 'dbt3_s001.partsupp.ps_partkey' of SELECT #4 was resolved in SELECT #2
 Note	1276	Field or reference 'dbt3_s001.partsupp.ps_suppkey' of SELECT #4 was resolved in SELECT #2
@@ -600,7 +600,7 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
 1	PRIMARY	<subquery2>	eq_ref	distinct_key	distinct_key	4	func	1	100.00	
 2	MATERIALIZED	part	ALL	PRIMARY	NULL	NULL	NULL	200	7.03	Using where
 2	MATERIALIZED	partsupp	ref	PRIMARY,i_ps_partkey,i_ps_suppkey	PRIMARY	4	dbt3_s001.part.p_partkey	3	100.00	Using where
-4	DEPENDENT SUBQUERY	lineitem	ref	i_l_shipdate,i_l_suppkey_partkey,i_l_partkey,i_l_suppkey	i_l_suppkey_partkey	10	dbt3_s001.partsupp.ps_partkey,dbt3_s001.partsupp.ps_suppkey	8	14.37	Using where
+4	DEPENDENT SUBQUERY	lineitem	ref	i_l_shipdate,i_l_suppkey_partkey,i_l_partkey,i_l_suppkey	i_l_suppkey_partkey	10	dbt3_s001.partsupp.ps_partkey,dbt3_s001.partsupp.ps_suppkey	8	14.40	Using where
 Warnings:
 Note	1276	Field or reference 'dbt3_s001.partsupp.ps_partkey' of SELECT #4 was resolved in SELECT #2
 Note	1276	Field or reference 'dbt3_s001.partsupp.ps_suppkey' of SELECT #4 was resolved in SELECT #2
@@ -656,7 +656,7 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
 1	PRIMARY	<subquery2>	eq_ref	distinct_key	distinct_key	4	func	1	100.00	
 2	MATERIALIZED	part	ALL	PRIMARY	NULL	NULL	NULL	200	7.81	Using where
 2	MATERIALIZED	partsupp	ref	PRIMARY,i_ps_partkey,i_ps_suppkey	PRIMARY	4	dbt3_s001.part.p_partkey	3	100.00	Using where
-4	DEPENDENT SUBQUERY	lineitem	ref	i_l_shipdate,i_l_suppkey_partkey,i_l_partkey,i_l_suppkey	i_l_suppkey_partkey	10	dbt3_s001.partsupp.ps_partkey,dbt3_s001.partsupp.ps_suppkey	8	14.37	Using where
+4	DEPENDENT SUBQUERY	lineitem	ref	i_l_shipdate,i_l_suppkey_partkey,i_l_partkey,i_l_suppkey	i_l_suppkey_partkey	10	dbt3_s001.partsupp.ps_partkey,dbt3_s001.partsupp.ps_suppkey	8	14.40	Using where
 Warnings:
 Note	1276	Field or reference 'dbt3_s001.partsupp.ps_partkey' of SELECT #4 was resolved in SELECT #2
 Note	1276	Field or reference 'dbt3_s001.partsupp.ps_suppkey' of SELECT #4 was resolved in SELECT #2
@@ -712,7 +712,7 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
 1	PRIMARY	<subquery2>	eq_ref	distinct_key	distinct_key	4	func	1	100.00	
 2	MATERIALIZED	part	ALL	PRIMARY	NULL	NULL	NULL	200	7.81	Using where
 2	MATERIALIZED	partsupp	ref	PRIMARY,i_ps_partkey,i_ps_suppkey	PRIMARY	4	dbt3_s001.part.p_partkey	3	100.00	Using where
-4	DEPENDENT SUBQUERY	lineitem	ref	i_l_shipdate,i_l_suppkey_partkey,i_l_partkey,i_l_suppkey	i_l_suppkey_partkey	10	dbt3_s001.partsupp.ps_partkey,dbt3_s001.partsupp.ps_suppkey	8	14.37	Using where
+4	DEPENDENT SUBQUERY	lineitem	ref	i_l_shipdate,i_l_suppkey_partkey,i_l_partkey,i_l_suppkey	i_l_suppkey_partkey	10	dbt3_s001.partsupp.ps_partkey,dbt3_s001.partsupp.ps_suppkey	8	14.40	Using where
 Warnings:
 Note	1276	Field or reference 'dbt3_s001.partsupp.ps_partkey' of SELECT #4 was resolved in SELECT #2
 Note	1276	Field or reference 'dbt3_s001.partsupp.ps_suppkey' of SELECT #4 was resolved in SELECT #2
diff --git a/mysql-test/r/ssl.result b/mysql-test/r/ssl.result
index 2a45a5e4d62..41af96621b2 100644
--- a/mysql-test/r/ssl.result
+++ b/mysql-test/r/ssl.result
@@ -2176,7 +2176,7 @@ still connected?
 connection default;
 disconnect ssl_con;
 create user mysqltest_1@localhost;
-grant usage on mysqltest.* to mysqltest_1@localhost require cipher "EDH-RSA-DES-CBC3-SHA";
+grant usage on mysqltest.* to mysqltest_1@localhost require cipher "AES256-SHA";
 Variable_name	Value
-Ssl_cipher	EDH-RSA-DES-CBC3-SHA
+Ssl_cipher	AES256-SHA
 drop user mysqltest_1@localhost;
diff --git a/mysql-test/r/ssl_8k_key.result b/mysql-test/r/ssl_8k_key.result
index b33a1d2854f..ff9d0cce9dc 100644
--- a/mysql-test/r/ssl_8k_key.result
+++ b/mysql-test/r/ssl_8k_key.result
@@ -1,2 +1,2 @@
-Variable_name	Value
-Ssl_cipher	DHE-RSA-AES256-SHA
+have_ssl
+1
diff --git a/mysql-test/r/ssl_ca.result b/mysql-test/r/ssl_ca.result
index 83a98902581..8ea3e30eb06 100644
--- a/mysql-test/r/ssl_ca.result
+++ b/mysql-test/r/ssl_ca.result
@@ -2,7 +2,7 @@
 # Bug#21920657: SSL-CA FAILS SILENTLY IF THE PATH CANNOT BE FOUND
 #
 # try to connect with wrong '--ssl-ca' path : should fail
-ERROR 2026 (HY000): SSL connection error: SSL_CTX_set_default_verify_paths failed
+ERROR 2026 (HY000): SSL connection error: xxxx
 # try to connect with correct '--ssl-ca' path : should connect
 have_ssl
 1
diff --git a/mysql-test/r/stat_tables_innodb.result b/mysql-test/r/stat_tables_innodb.result
index 0e866755532..42443bfcb72 100644
--- a/mysql-test/r/stat_tables_innodb.result
+++ b/mysql-test/r/stat_tables_innodb.result
@@ -67,7 +67,7 @@ and r_name = 'AMERICA' and o_orderdate >= date '1995-01-01'
 group by n_name
 order by revenue desc;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	orders	range	PRIMARY,i_o_orderdate,i_o_custkey	i_o_orderdate	4	NULL	211	Using where; Using temporary; Using filesort
+1	SIMPLE	orders	range	PRIMARY,i_o_orderdate,i_o_custkey	i_o_orderdate	4	NULL	213	Using where; Using temporary; Using filesort
 1	SIMPLE	customer	eq_ref	PRIMARY,i_c_nationkey	PRIMARY	4	dbt3_s001.orders.o_custkey	1	Using where
 1	SIMPLE	nation	eq_ref	PRIMARY,i_n_regionkey	PRIMARY	4	dbt3_s001.customer.c_nationkey	1	
 1	SIMPLE	supplier	ref	PRIMARY,i_s_nationkey	i_s_nationkey	5	dbt3_s001.customer.c_nationkey	1	Using index
@@ -198,7 +198,7 @@ and r_name = 'AMERICA' and o_orderdate >= date '1995-01-01'
 group by n_name
 order by revenue desc;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	orders	range	PRIMARY,i_o_orderdate,i_o_custkey	i_o_orderdate	4	NULL	211	Using where; Using temporary; Using filesort
+1	SIMPLE	orders	range	PRIMARY,i_o_orderdate,i_o_custkey	i_o_orderdate	4	NULL	213	Using where; Using temporary; Using filesort
 1	SIMPLE	customer	eq_ref	PRIMARY,i_c_nationkey	PRIMARY	4	dbt3_s001.orders.o_custkey	1	Using where
 1	SIMPLE	nation	eq_ref	PRIMARY,i_n_regionkey	PRIMARY	4	dbt3_s001.customer.c_nationkey	1	
 1	SIMPLE	supplier	ref	PRIMARY,i_s_nationkey	i_s_nationkey	5	dbt3_s001.customer.c_nationkey	1	Using index
diff --git a/mysql-test/r/type_bit_innodb.result b/mysql-test/r/type_bit_innodb.result
index 80fc942e77c..acb3c311cf5 100644
--- a/mysql-test/r/type_bit_innodb.result
+++ b/mysql-test/r/type_bit_innodb.result
@@ -256,7 +256,7 @@ a+0	b+0
 127	403
 explain select a+0, b+0 from t1 where a > 40 and a < 70 order by 2;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	range	a	a	2	NULL	8	Using where; Using index; Using filesort
+1	SIMPLE	t1	range	a	a	2	NULL	9	Using where; Using index; Using filesort
 select a+0, b+0 from t1 where a > 40 and a < 70 order by 2;
 a+0	b+0
 57	135
diff --git a/mysql-test/r/userstat.result b/mysql-test/r/userstat.result
index ea366e5284e..37d07439fee 100644
--- a/mysql-test/r/userstat.result
+++ b/mysql-test/r/userstat.result
@@ -81,9 +81,9 @@ select * from t1 where a=999;
 a	b
 drop table t1;
 connect  ssl_con,localhost,root,,,,,SSL;
-SHOW STATUS LIKE 'Ssl_cipher';
-Variable_name	Value
-Ssl_cipher	DHE-RSA-AES256-SHA
+SELECT (VARIABLE_VALUE <> '') AS have_ssl FROM INFORMATION_SCHEMA.SESSION_STATUS WHERE VARIABLE_NAME='Ssl_cipher';
+have_ssl
+1
 connection default;
 create table t1 (a int, primary key (a), b int default 0) engine=innodb;
 begin;
@@ -150,11 +150,11 @@ TOTAL_CONNECTIONS	2
 TOTAL_SSL_CONNECTIONS	1
 CONCURRENT_CONNECTIONS	0
 ROWS_READ	6
-ROWS_SENT	2
+ROWS_SENT	3
 ROWS_DELETED	1
 ROWS_INSERTED	7
 ROWS_UPDATED	5
-SELECT_COMMANDS	3
+SELECT_COMMANDS	4
 UPDATE_COMMANDS	11
 OTHER_COMMANDS	7
 COMMIT_TRANSACTIONS	19
@@ -168,11 +168,11 @@ TOTAL_CONNECTIONS	2
 TOTAL_SSL_CONNECTIONS	1
 CONCURRENT_CONNECTIONS	0
 ROWS_READ	6
-ROWS_SENT	2
+ROWS_SENT	3
 ROWS_DELETED	1
 ROWS_INSERTED	7
 ROWS_UPDATED	5
-SELECT_COMMANDS	3
+SELECT_COMMANDS	4
 UPDATE_COMMANDS	11
 OTHER_COMMANDS	7
 COMMIT_TRANSACTIONS	19
diff --git a/mysql-test/r/win.result b/mysql-test/r/win.result
index d23e8651eb3..27824a3074e 100644
--- a/mysql-test/r/win.result
+++ b/mysql-test/r/win.result
@@ -2015,3 +2015,42 @@ AND 15 FOLLOWING)
 242	NULL
 238	NULL
 DROP table orders;
+#
+# MDEV-10842: window functions with the same order column 
+#             but different directions
+#
+create table t1 (
+pk int primary key,
+a int,
+b int,
+c char(10)
+);
+insert into t1 values
+( 1, 0, 1, 'one'),
+( 2, 0, 2, 'two'),
+( 3, 0, 3, 'three'),
+( 4, 1, 1, 'one'),
+( 5, 1, 1, 'two'),
+( 6, 1, 2, 'three'),
+( 7, 2, NULL, 'n_one'),
+( 8, 2, 1,    'n_two'),
+( 9, 2, 2,    'n_three'),
+(10, 2, 0,    'n_four'),
+(11, 2, 10,   NULL);
+select pk,
+row_number() over (order by pk desc) as r_desc,
+row_number() over (order by pk asc) as r_asc
+from t1;
+pk	r_desc	r_asc
+1	11	1
+2	10	2
+3	9	3
+4	8	4
+5	7	5
+6	6	6
+7	5	7
+8	4	8
+9	3	9
+10	2	10
+11	1	11
+drop table t1;
diff --git a/mysql-test/suite/encryption/disabled.def b/mysql-test/suite/encryption/disabled.def
index 8c0d47983fd..979a2a5a460 100644
--- a/mysql-test/suite/encryption/disabled.def
+++ b/mysql-test/suite/encryption/disabled.def
@@ -13,5 +13,5 @@
 innodb_scrub : MDEV-8139
 innodb_scrub_compressed : MDEV-8139
 innodb_scrub_background : MDEV-8139
-innodb_encryption-page-compression : Fails with lost connection at line 156
-
+innochecksum : MDEV-10727
+innodb_encryption_discard_import : MDEV-9099
diff --git a/mysql-test/suite/encryption/r/create_or_replace.result b/mysql-test/suite/encryption/r/create_or_replace.result
index 1671043b50d..d52572d6d23 100644
--- a/mysql-test/suite/encryption/r/create_or_replace.result
+++ b/mysql-test/suite/encryption/r/create_or_replace.result
@@ -1,3 +1,4 @@
+call mtr.add_suppression("InnoDB: Error: trying to do an operation on a dropped tablespace.*");
 SET default_storage_engine = InnoDB;
 CREATE TABLE t1 (pk INT PRIMARY KEY, c VARCHAR(256));
 CREATE TABLE t2 AS SELECT * FROM t1;
diff --git a/mysql-test/suite/encryption/r/debug_key_management.result b/mysql-test/suite/encryption/r/debug_key_management.result
index 8793e6ba363..e185740aa25 100644
--- a/mysql-test/suite/encryption/r/debug_key_management.result
+++ b/mysql-test/suite/encryption/r/debug_key_management.result
@@ -9,13 +9,13 @@ innodb_encryption_threads	4
 select space,name,current_key_version from information_schema.innodb_tablespaces_encryption order by space;
 space	name	current_key_version
 0	NULL	1
-1	mysql/innodb_table_stats	1
-2	mysql/innodb_index_stats	1
+2	mysql/innodb_table_stats	1
+3	mysql/innodb_index_stats	1
 set global debug_key_management_version=10;
 select space,name,current_key_version from information_schema.innodb_tablespaces_encryption order by space;
 space	name	current_key_version
 0	NULL	10
-1	mysql/innodb_table_stats	10
-2	mysql/innodb_index_stats	10
+2	mysql/innodb_table_stats	10
+3	mysql/innodb_index_stats	10
 set global innodb_encrypt_tables=OFF;
 set global debug_key_management_version=1;
diff --git a/mysql-test/suite/encryption/r/innodb-bad-key-change.result b/mysql-test/suite/encryption/r/innodb-bad-key-change.result
index cf9791887cc..84c8c2f6773 100644
--- a/mysql-test/suite/encryption/r/innodb-bad-key-change.result
+++ b/mysql-test/suite/encryption/r/innodb-bad-key-change.result
@@ -8,9 +8,12 @@ call mtr.add_suppression(".*InnoDB: Cannot open table test/.* from the internal
 call mtr.add_suppression("InnoDB: .ibd file is missing for table test/.*");
 call mtr.add_suppression("mysqld: File .*");
 call mtr.add_suppression("InnoDB: Tablespace id .* is encrypted but encryption service or used key_id .* is not available. Can't continue opening tablespace.");
+call mtr.add_suppression("InnoDB: InnoDB: Page may be an index page where index id is .*");
 
 # Start server with keys2.txt
 SET GLOBAL innodb_file_format = `Barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SET GLOBAL innodb_file_per_table = ON;
 CREATE TABLE t1 (c VARCHAR(8)) ENGINE=InnoDB ENCRYPTED=YES ENCRYPTION_KEY_ID=2;
 INSERT INTO t1 VALUES ('foobar');
@@ -36,10 +39,12 @@ SELECT * FROM t1;
 ERROR HY000: Got error 192 'Table encrypted but decryption failed. This could be because correct encryption management plugin is not loaded, used encryption key is not available or encryption method does not match.' from InnoDB
 SHOW WARNINGS;
 Level	Code	Message
-Warning	1812	Tablespace is missing for table 'test/t1'
+Warning	192	Table test/t1 in tablespace 8 is encrypted but encryption service or used key_id is not available.  Can't continue reading table.
 Warning	192	Table test/t1 is encrypted but encryption service or used key_id 2 is not available.  Can't continue reading table.
 Error	1296	Got error 192 'Table encrypted but decryption failed. This could be because correct encryption management plugin is not loaded, used encryption key is not available or encryption method does not match.' from InnoDB
 DROP TABLE t1;
+Warnings:
+Warning	192	Table in tablespace 8 encrypted.However key management plugin or used key_id 1 is not found or used encryption algorithm or method does not match. Can't continue opening the table.
 # Start server with keys.txt
 CREATE TABLE t2 (c VARCHAR(8), id int not null primary key, b int, key(b)) ENGINE=InnoDB ENCRYPTED=YES;
 INSERT INTO t2 VALUES ('foobar',1,2);
diff --git a/mysql-test/suite/encryption/r/innodb-bad-key-change3.result b/mysql-test/suite/encryption/r/innodb-bad-key-change3.result
index 68d8552a0a3..70501986257 100644
--- a/mysql-test/suite/encryption/r/innodb-bad-key-change3.result
+++ b/mysql-test/suite/encryption/r/innodb-bad-key-change3.result
@@ -1,5 +1,8 @@
-call mtr.add_suppression("InnoDB: Table .* tablespace is set as discarded");
+call mtr.add_suppression("InnoDB: Tablespace for table .* is set as discarded.");
+call mtr.add_suppression("InnoDB: Cannot calculate statistics for table .* because the .ibd file is missing. Please refer to .* for how to resolve the issue.");
 SET GLOBAL innodb_file_format = `Barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SET GLOBAL innodb_file_per_table = ON;
 set global innodb_compression_algorithm = 1;
 CREATE TABLE t1 (pk INT PRIMARY KEY, f VARCHAR(255)) ENGINE=InnoDB PAGE_COMPRESSED=1 ENCRYPTED=YES ENCRYPTION_KEY_ID=4;
@@ -24,6 +27,8 @@ UNLOCK TABLES;
 NOT FOUND /foobar/ in t1.ibd
 ALTER TABLE t1 DISCARD TABLESPACE;
 SET GLOBAL innodb_file_format = `Barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SET GLOBAL innodb_file_per_table = ON;
 # List after t1 DISCARD
 t1.frm
@@ -37,8 +42,10 @@ t1	CREATE TABLE `t1` (
   PRIMARY KEY (`pk`)
 ) ENGINE=InnoDB DEFAULT CHARSET=latin1 `PAGE_COMPRESSED`=1 `ENCRYPTED`=YES `ENCRYPTION_KEY_ID`=4
 SELECT * FROM t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 # Tablespaces should be still encrypted
 # t1 yes on expecting NOT FOUND
 NOT FOUND /foobar/ in t1.ibd
 DROP TABLE t1;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
diff --git a/mysql-test/suite/encryption/r/innodb-bad-key-change4.result b/mysql-test/suite/encryption/r/innodb-bad-key-change4.result
index 531ba4063a4..3ced393f38b 100644
--- a/mysql-test/suite/encryption/r/innodb-bad-key-change4.result
+++ b/mysql-test/suite/encryption/r/innodb-bad-key-change4.result
@@ -5,16 +5,22 @@ call mtr.add_suppression(".*InnoDB: Cannot open table test/.* from the internal
 call mtr.add_suppression("InnoDB: .ibd file is missing for table test/.*");
 call mtr.add_suppression("Couldn't load plugins from 'file_key_management*");
 SET GLOBAL innodb_file_format = `Barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SET GLOBAL innodb_file_per_table = ON;
 CREATE TABLE t1 (pk INT PRIMARY KEY, f VARCHAR(8)) ENGINE=InnoDB ENCRYPTION_KEY_ID=4;
 INSERT INTO t1 VALUES (1,'foo'),(2,'bar');
 SET GLOBAL innodb_file_format = `Barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SET GLOBAL innodb_file_per_table = ON;
 CHECK TABLE t1;
 Table	Op	Msg_type	Msg_text
-test.t1	check	Warning	Table test/t1 in tablespace 4 is encrypted but encryption service or used key_id is not available.  Can't continue reading table.
+test.t1	check	Warning	Table test/t1 in tablespace 7 is encrypted but encryption service or used key_id is not available.  Can't continue reading table.
 test.t1	check	Warning	Table test/t1 is encrypted but encryption service or used key_id is not available.  Can't continue checking table.
 test.t1	check	error	Corrupt
 SHOW WARNINGS;
 Level	Code	Message
 DROP TABLE t1;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
diff --git a/mysql-test/suite/encryption/r/innodb-bad-key-change5.result b/mysql-test/suite/encryption/r/innodb-bad-key-change5.result
index 11130a7a20b..9d8b1ddd23b 100644
--- a/mysql-test/suite/encryption/r/innodb-bad-key-change5.result
+++ b/mysql-test/suite/encryption/r/innodb-bad-key-change5.result
@@ -5,10 +5,14 @@ call mtr.add_suppression(".*InnoDB: Cannot open table test/.* from the internal
 call mtr.add_suppression("InnoDB: .ibd file is missing for table test/.*");
 call mtr.add_suppression("Couldn't load plugins from 'file_key_management*");
 SET GLOBAL innodb_file_format = `Barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SET GLOBAL innodb_file_per_table = ON;
 CREATE TABLE t1 (pk INT PRIMARY KEY, f VARCHAR(8)) ENGINE=InnoDB ENCRYPTION_KEY_ID=4;
 INSERT INTO t1 VALUES (1,'foo'),(2,'bar');
 SET GLOBAL innodb_file_format = `Barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SET GLOBAL innodb_file_per_table = ON;
 OPTIMIZE TABLE t1;
 Table	Op	Msg_type	Msg_text
@@ -26,3 +30,5 @@ Level	Code	Message
 Warning	192	Table test/t1 is encrypted but encryption service or used key_id is not available.  Can't continue reading table.
 Error	1296	Got error 192 'Table encrypted but decryption failed. This could be because correct encryption management plugin is not loaded, used encryption key is not available or encryption method does not match.' from InnoDB
 DROP TABLE t1;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
diff --git a/mysql-test/suite/encryption/r/innodb-discard-import.result b/mysql-test/suite/encryption/r/innodb-discard-import.result
index 195b82f7488..edcacaf530a 100644
--- a/mysql-test/suite/encryption/r/innodb-discard-import.result
+++ b/mysql-test/suite/encryption/r/innodb-discard-import.result
@@ -1,5 +1,8 @@
-call mtr.add_suppression("InnoDB: Table .* tablespace is set as discarded");
+call mtr.add_suppression("InnoDB: Tablespace for table .* is set as discarded.");
+call mtr.add_suppression("InnoDB: Cannot calculate statistics for table .* because the .ibd file is missing. Please refer to .* for how to resolve the issue.");
 SET GLOBAL innodb_file_format = `Barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SET GLOBAL innodb_file_per_table = ON;
 SET GLOBAL innodb_compression_algorithm = 1;
 create table t1(c1 bigint not null, b char(200))  engine=innodb encrypted=yes encryption_key_id=4;
@@ -72,6 +75,8 @@ ALTER TABLE t2 DISCARD TABLESPACE;
 ALTER TABLE t3 DISCARD TABLESPACE;
 ALTER TABLE t4 DISCARD TABLESPACE;
 SET GLOBAL innodb_file_format = `Barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SET GLOBAL innodb_file_per_table = ON;
 SET GLOBAL innodb_compression_algorithm = 1;
 # List after t1 DISCARD
@@ -81,7 +86,7 @@ t3.frm
 t4.frm
 ALTER TABLE t1 IMPORT TABLESPACE;
 Warnings:
-Warning	1814	Tablespace has been discarded for table 't1'
+Warning	1814	Tablespace has been discarded for table `t1`
 SHOW CREATE TABLE t1;
 Table	Create Table
 t1	CREATE TABLE `t1` (
@@ -93,7 +98,7 @@ COUNT(*)
 2000
 ALTER TABLE t2 IMPORT TABLESPACE;
 Warnings:
-Warning	1814	Tablespace has been discarded for table 't2'
+Warning	1814	Tablespace has been discarded for table `t2`
 SHOW CREATE TABLE t2;
 Table	Create Table
 t2	CREATE TABLE `t2` (
@@ -105,7 +110,7 @@ COUNT(*)
 2000
 ALTER TABLE t3 IMPORT TABLESPACE;
 Warnings:
-Warning	1814	Tablespace has been discarded for table 't3'
+Warning	1814	Tablespace has been discarded for table `t3`
 SHOW CREATE TABLE t3;
 Table	Create Table
 t3	CREATE TABLE `t3` (
@@ -117,7 +122,7 @@ COUNT(*)
 2000
 ALTER TABLE t4 IMPORT TABLESPACE;
 Warnings:
-Warning	1814	Tablespace has been discarded for table 't4'
+Warning	1814	Tablespace has been discarded for table `t4`
 SHOW CREATE TABLE t4;
 Table	Create Table
 t4	CREATE TABLE `t4` (
@@ -129,6 +134,8 @@ COUNT(*)
 2000
 flush data to disk
 SET GLOBAL innodb_file_format = `Barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SET GLOBAL innodb_file_per_table = ON;
 SET GLOBAL innodb_compression_algorithm = 1;
 # tables should be still either encrypted and/or compressed
@@ -142,3 +149,5 @@ NOT FOUND /tmpres/ in t3.ibd
 NOT FOUND /mysql/ in t4.ibd
 DROP PROCEDURE innodb_insert_proc;
 DROP TABLE t1,t2,t3,t4;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
diff --git a/mysql-test/suite/encryption/r/innodb-encryption-alter.result b/mysql-test/suite/encryption/r/innodb-encryption-alter.result
index 5869c5d7000..2003df0f4f4 100644
--- a/mysql-test/suite/encryption/r/innodb-encryption-alter.result
+++ b/mysql-test/suite/encryption/r/innodb-encryption-alter.result
@@ -1,4 +1,6 @@
 SET GLOBAL innodb_file_format = `Barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SET GLOBAL innodb_file_per_table = ON;
 SET GLOBAL innodb_encrypt_tables = ON;
 SET GLOBAL innodb_encryption_threads = 4;
@@ -51,3 +53,5 @@ Error	1005	Can't create table `test`.`#sql-temporary` (errno: 140 "Wrong create
 Warning	1030	Got error 140 "Wrong create options" from storage engine InnoDB
 set innodb_default_encryption_key_id = 1;
 drop table t1,t2;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
diff --git a/mysql-test/suite/encryption/r/innodb-encryption-disable.result b/mysql-test/suite/encryption/r/innodb-encryption-disable.result
index 63ff1dcda71..62b233c1c93 100644
--- a/mysql-test/suite/encryption/r/innodb-encryption-disable.result
+++ b/mysql-test/suite/encryption/r/innodb-encryption-disable.result
@@ -1,4 +1,6 @@
 SET GLOBAL innodb_file_format = `Barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SET GLOBAL innodb_file_per_table = ON;
 call mtr.add_suppression("InnoDB: Block in space_id .* in file test/.* encrypted");
 call mtr.add_suppression("InnoDB: However key management plugin or used key_id 1 is not found or used encryption algorithm or method does not match.");
@@ -23,10 +25,24 @@ CREATE TABLE `t1` (
 `charcol3` varchar(128) DEFAULT NULL
 ) ENGINE=InnoDB;
 insert into t1 values (1,2,'maria','db','encryption');
+select * from t1;
+intcol1	intcol2	charcol1	charcol2	charcol3
+1	2	maria	db	encryption
+select * from t5;
+intcol1	intcol2	charcol1	charcol2	charcol3
+1	2	maria	db	encryption
 alter table t1 encrypted='yes' `encryption_key_id`=1;
 select * from t1;
+intcol1	intcol2	charcol1	charcol2	charcol3
+1	2	maria	db	encryption
+select * from t5;
+intcol1	intcol2	charcol1	charcol2	charcol3
+1	2	maria	db	encryption
+select * from t1;
 ERROR HY000: Got error 192 'Table encrypted but decryption failed. This could be because correct encryption management plugin is not loaded, used encryption key is not available or encryption method does not match.' from InnoDB
 select * from t5;
 ERROR HY000: Got error 192 'Table encrypted but decryption failed. This could be because correct encryption management plugin is not loaded, used encryption key is not available or encryption method does not match.' from InnoDB
 drop table t1;
 drop table t5;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
diff --git a/mysql-test/suite/encryption/r/innodb-log-encrypt.result b/mysql-test/suite/encryption/r/innodb-log-encrypt.result
index 655e3023f7a..fb62292e1f3 100644
--- a/mysql-test/suite/encryption/r/innodb-log-encrypt.result
+++ b/mysql-test/suite/encryption/r/innodb-log-encrypt.result
@@ -1,4 +1,6 @@
 SET GLOBAL innodb_file_format = `Barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SET GLOBAL innodb_file_per_table = ON;
 create table t1(c1 bigint not null, b char(200), c varchar(200)) engine=innodb encrypted=yes encryption_key_id=1;
 show warnings;
@@ -53,3 +55,5 @@ FOUND /publicmessage/ in ib_logfile0
 NOT FOUND /publicmessage/ in ib_logfile1
 drop procedure innodb_insert_proc;
 drop table t1;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
diff --git a/mysql-test/suite/encryption/r/innodb-page_encryption.result b/mysql-test/suite/encryption/r/innodb-page_encryption.result
index c4814983af4..051fd602db5 100644
--- a/mysql-test/suite/encryption/r/innodb-page_encryption.result
+++ b/mysql-test/suite/encryption/r/innodb-page_encryption.result
@@ -1,4 +1,6 @@
 SET GLOBAL innodb_file_format = `Barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SET GLOBAL innodb_file_per_table = ON;
 create table innodb_normal(c1 bigint not null, b char(200)) engine=innodb;
 show warnings;
@@ -121,6 +123,8 @@ SELECT variable_value >= 0 FROM information_schema.global_status WHERE variable_
 variable_value >= 0
 1
 SET GLOBAL innodb_file_format = `Barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SET GLOBAL innodb_file_per_table = ON;
 update innodb_normal set c1 = c1 +1;
 update innodb_compact set c1 = c1 + 1;
@@ -198,6 +202,8 @@ innodb_redundant	CREATE TABLE `innodb_redundant` (
   `b` char(200) DEFAULT NULL
 ) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=REDUNDANT
 SET GLOBAL innodb_file_format = `Barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SET GLOBAL innodb_file_per_table = ON;
 show create table innodb_compact;
 Table	Create Table
@@ -275,3 +281,5 @@ drop table innodb_compressed;
 drop table innodb_dynamic;
 drop table innodb_redundant;
 drop table innodb_defkey;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
diff --git a/mysql-test/suite/encryption/r/innodb-page_encryption_compression.result b/mysql-test/suite/encryption/r/innodb-page_encryption_compression.result
index f7ffc77fd66..6efefb23b87 100644
--- a/mysql-test/suite/encryption/r/innodb-page_encryption_compression.result
+++ b/mysql-test/suite/encryption/r/innodb-page_encryption_compression.result
@@ -1,4 +1,6 @@
 SET GLOBAL innodb_file_format = `Barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SET GLOBAL innodb_file_per_table = ON;
 set global innodb_compression_algorithm = 1;
 create table innodb_normal(c1 bigint not null, b char(200)) engine=innodb page_compressed=1;
@@ -75,6 +77,8 @@ variable_value >= 0
 SELECT variable_value >= 0 FROM information_schema.global_status WHERE variable_name = 'innodb_num_pages_decompressed';
 variable_value >= 0
 SET GLOBAL innodb_file_format = `Barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SET GLOBAL innodb_file_per_table = ON;
 set global innodb_compression_algorithm = 1;
 update innodb_normal set c1 = c1 + 1;
@@ -129,6 +133,8 @@ innodb_dynamic	CREATE TABLE `innodb_dynamic` (
   `b` char(200) DEFAULT NULL
 ) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC
 SET GLOBAL innodb_file_format = `Barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SET GLOBAL innodb_file_per_table = ON;
 show create table innodb_normal;
 Table	Create Table
@@ -182,3 +188,5 @@ drop procedure innodb_insert_proc;
 drop table innodb_normal;
 drop table innodb_compact;
 drop table innodb_dynamic;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
diff --git a/mysql-test/suite/encryption/r/innodb-page_encryption_log_encryption.result b/mysql-test/suite/encryption/r/innodb-page_encryption_log_encryption.result
index 92130da19e9..672202de774 100644
--- a/mysql-test/suite/encryption/r/innodb-page_encryption_log_encryption.result
+++ b/mysql-test/suite/encryption/r/innodb-page_encryption_log_encryption.result
@@ -2,6 +2,8 @@ call mtr.add_suppression("KeyID 0 not found or with error. Check the key and the
 call mtr.add_suppression("Disabling redo log encryp*");
 call mtr.add_suppression("InnoDB: Redo log crypto: Can't initialize to key version*");
 SET GLOBAL innodb_file_format = `Barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SET GLOBAL innodb_file_per_table = ON;
 create table innodb_normal(c1 bigint not null, b char(200)) engine=innodb;
 show warnings;
@@ -100,6 +102,8 @@ SELECT variable_value >= 0 FROM information_schema.global_status WHERE variable_
 variable_value >= 0
 1
 SET GLOBAL innodb_file_format = `Barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SET GLOBAL innodb_file_per_table = ON;
 update innodb_normal set c1 = c1 +1;
 update innodb_compact set c1 = c1 + 1;
@@ -169,6 +173,8 @@ innodb_redundant	CREATE TABLE `innodb_redundant` (
   `b` char(200) DEFAULT NULL
 ) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=REDUNDANT
 SET GLOBAL innodb_file_format = `Barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SET GLOBAL innodb_file_per_table = ON;
 show create table innodb_compact;
 Table	Create Table
@@ -247,3 +253,5 @@ pk
 1
 2
 DROP TABLE t1;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
diff --git a/mysql-test/suite/encryption/r/innodb_encryption-page-compression.result b/mysql-test/suite/encryption/r/innodb_encryption-page-compression.result
index e3238990aaa..9523c7c3982 100644
--- a/mysql-test/suite/encryption/r/innodb_encryption-page-compression.result
+++ b/mysql-test/suite/encryption/r/innodb_encryption-page-compression.result
@@ -1,4 +1,6 @@
 SET GLOBAL innodb_file_format = `Barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SET GLOBAL innodb_file_per_table = ON;
 SET GLOBAL innodb_encryption_threads = 4;
 SET GLOBAL innodb_encrypt_tables = on;
@@ -262,3 +264,5 @@ drop table innodb_page_compressed6;
 drop table innodb_page_compressed7;
 drop table innodb_page_compressed8;
 drop table innodb_page_compressed9;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
diff --git a/mysql-test/suite/encryption/r/innodb_encryption_discard_import.result b/mysql-test/suite/encryption/r/innodb_encryption_discard_import.result
index f8c59b7bcc3..5bb3b2bc41e 100644
--- a/mysql-test/suite/encryption/r/innodb_encryption_discard_import.result
+++ b/mysql-test/suite/encryption/r/innodb_encryption_discard_import.result
@@ -1,5 +1,8 @@
-call mtr.add_suppression("InnoDB: Table .* tablespace is set as discarded.");
+call mtr.add_suppression("InnoDB: Tablespace for table .* is set as discarded.");
+call mtr.add_suppression("InnoDB: Cannot calculate statistics for table .* because the .ibd file is missing. Please refer to .* for how to resolve the issue.");
 SET GLOBAL innodb_file_format = `Barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SET GLOBAL innodb_file_per_table = ON;
 CREATE TABLE t1 (id INT NOT NULL PRIMARY KEY, a VARCHAR(255)) ENGINE=InnoDB encrypted=yes;
 CREATE TABLE t2 (id INT NOT NULL PRIMARY KEY, a VARCHAR(255)) ENGINE=InnoDB;
@@ -50,6 +53,8 @@ t2.ibd
 t3.frm
 t3.ibd
 SET GLOBAL innodb_file_format = `Barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SET GLOBAL innodb_file_per_table = ON;
 ALTER TABLE t1 DISCARD TABLESPACE;
 ALTER TABLE t2 DISCARD TABLESPACE;
@@ -61,6 +66,8 @@ t3.frm
 # Restarting server
 # Done restarting server
 SET GLOBAL innodb_file_format = `Barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SET GLOBAL innodb_file_per_table = ON;
 # Tablespaces should be still encrypted
 # t1 yes on expecting NOT FOUND
@@ -147,3 +154,5 @@ NOT FOUND /temp/ in t2.ibd
 NOT FOUND /barfoo/ in t3.ibd
 DROP PROCEDURE innodb_insert_proc;
 DROP TABLE t1, t2, t3;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
diff --git a/mysql-test/suite/encryption/r/innodb_encryption_filekeys.result b/mysql-test/suite/encryption/r/innodb_encryption_filekeys.result
index ab958004eab..576b44fe897 100644
--- a/mysql-test/suite/encryption/r/innodb_encryption_filekeys.result
+++ b/mysql-test/suite/encryption/r/innodb_encryption_filekeys.result
@@ -1,5 +1,7 @@
 call mtr.add_suppression("trying to do an operation on a dropped tablespace .*");
 SET GLOBAL innodb_file_format = `Barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SET GLOBAL innodb_file_per_table = ON;
 SET GLOBAL innodb_encrypt_tables = OFF;
 SET GLOBAL innodb_encryption_threads = 4;
@@ -63,5 +65,7 @@ COUNT(1)
 SELECT COUNT(1) FROM t5;
 COUNT(1)
 400
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 drop table t1,t2,t3,t4, t5;
 set GLOBAL innodb_default_encryption_key_id=1;
diff --git a/mysql-test/suite/encryption/r/innodb_encryption_is.result b/mysql-test/suite/encryption/r/innodb_encryption_is.result
index 5bbcbbe6bb6..591c5a84ccc 100644
--- a/mysql-test/suite/encryption/r/innodb_encryption_is.result
+++ b/mysql-test/suite/encryption/r/innodb_encryption_is.result
@@ -1,4 +1,6 @@
 SET GLOBAL innodb_file_format = `Barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SET GLOBAL innodb_file_per_table = ON;
 CREATE TABLE t1 (c VARCHAR(8)) ENGINE=InnoDB ENCRYPTED=YES ENCRYPTION_KEY_ID=1;
 CREATE TABLE t2 (c VARCHAR(8)) ENGINE=InnoDB ENCRYPTED=YES ENCRYPTION_KEY_ID=2;
@@ -12,3 +14,5 @@ NAME	ENCRYPTION_SCHEME	MIN_KEY_VERSION	CURRENT_KEY_VERSION	CURRENT_KEY_ID
 test/t1	1	1	1	1
 test/t2	1	1	1	2
 DROP TABLE t1, t2;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
diff --git a/mysql-test/suite/encryption/r/innodb_encryption_row_compressed.result b/mysql-test/suite/encryption/r/innodb_encryption_row_compressed.result
new file mode 100644
index 00000000000..e49e38a8f3f
--- /dev/null
+++ b/mysql-test/suite/encryption/r/innodb_encryption_row_compressed.result
@@ -0,0 +1,159 @@
+SET GLOBAL innodb_file_format = `Barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+SET GLOBAL innodb_file_per_table = ON;
+create table innodb_compressed1(c1 bigint not null primary key, d int, a varchar(20), b char(200)) engine=innodb row_format=compressed encrypted=yes;
+create table innodb_compressed2(c1 bigint not null primary key, d int, a varchar(20), b char(200)) engine=innodb row_format=compressed key_block_size=1 encrypted=yes;
+create table innodb_compressed3(c1 bigint not null primary key, d int, a varchar(20), b char(200)) engine=innodb row_format=compressed key_block_size=2 encrypted=yes;
+create table innodb_compressed4(c1 bigint not null primary key, d int, a varchar(20), b char(200)) engine=innodb row_format=compressed key_block_size=4 encrypted=yes;
+insert into innodb_compressed1 values (1, 20, 'private', 'evenmoreprivate');
+insert into innodb_compressed1 values (2, 20, 'private', 'evenmoreprivate');
+insert into innodb_compressed1 values (3, 30, 'private', 'evenmoreprivate');
+insert into innodb_compressed1 values (4, 30, 'private', 'evenmoreprivate');
+insert into innodb_compressed1 values (5, 30, 'private', 'evenmoreprivate');
+insert into innodb_compressed1 values (6, 30, 'private', 'evenmoreprivate');
+insert into innodb_compressed1 values (7, 30, 'private', 'evenmoreprivate');
+insert into innodb_compressed1 values (8, 20, 'private', 'evenmoreprivate');
+insert into innodb_compressed1 values (9, 20, 'private', 'evenmoreprivate');
+insert into innodb_compressed1 values (10, 20, 'private', 'evenmoreprivate');
+insert into innodb_compressed2 select * from innodb_compressed1;
+insert into innodb_compressed3 select * from innodb_compressed1;
+insert into innodb_compressed4 select * from innodb_compressed1;
+# t1 yes on expecting NOT FOUND
+NOT FOUND /private/ in innodb_compressed1.ibd
+# t2 yes on expecting NOT FOUND
+NOT FOUND /private/ in innodb_compressed2.ibd
+# t3 yes on expecting NOT FOUND
+NOT FOUND /private/ in innodb_compressed3.ibd
+# t4 yes on expecting NOT FOUND
+NOT FOUND /private/ in innodb_compressed4.ibd
+SET GLOBAL innodb_file_format = `Barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+SET GLOBAL innodb_file_per_table = ON;
+select * from innodb_compressed1 where d = 20;
+c1	d	a	b
+1	20	private	evenmoreprivate
+2	20	private	evenmoreprivate
+8	20	private	evenmoreprivate
+9	20	private	evenmoreprivate
+10	20	private	evenmoreprivate
+select * from innodb_compressed1 where d = 30;
+c1	d	a	b
+3	30	private	evenmoreprivate
+4	30	private	evenmoreprivate
+5	30	private	evenmoreprivate
+6	30	private	evenmoreprivate
+7	30	private	evenmoreprivate
+select * from innodb_compressed2 where d = 20;
+c1	d	a	b
+1	20	private	evenmoreprivate
+2	20	private	evenmoreprivate
+8	20	private	evenmoreprivate
+9	20	private	evenmoreprivate
+10	20	private	evenmoreprivate
+select * from innodb_compressed2 where d = 30;
+c1	d	a	b
+3	30	private	evenmoreprivate
+4	30	private	evenmoreprivate
+5	30	private	evenmoreprivate
+6	30	private	evenmoreprivate
+7	30	private	evenmoreprivate
+select * from innodb_compressed3 where d = 20;
+c1	d	a	b
+1	20	private	evenmoreprivate
+2	20	private	evenmoreprivate
+8	20	private	evenmoreprivate
+9	20	private	evenmoreprivate
+10	20	private	evenmoreprivate
+select * from innodb_compressed3 where d = 30;
+c1	d	a	b
+3	30	private	evenmoreprivate
+4	30	private	evenmoreprivate
+5	30	private	evenmoreprivate
+6	30	private	evenmoreprivate
+7	30	private	evenmoreprivate
+select * from innodb_compressed4 where d = 20;
+c1	d	a	b
+1	20	private	evenmoreprivate
+2	20	private	evenmoreprivate
+8	20	private	evenmoreprivate
+9	20	private	evenmoreprivate
+10	20	private	evenmoreprivate
+select * from innodb_compressed4 where d = 30;
+c1	d	a	b
+3	30	private	evenmoreprivate
+4	30	private	evenmoreprivate
+5	30	private	evenmoreprivate
+6	30	private	evenmoreprivate
+7	30	private	evenmoreprivate
+update innodb_compressed1 set d = d + 10 where d = 30;
+update innodb_compressed2 set d = d + 10 where d = 30;
+update innodb_compressed3 set d = d + 10 where d = 30;
+update innodb_compressed4 set d = d + 10 where d = 30;
+insert into innodb_compressed1 values (20, 60, 'newprivate', 'newevenmoreprivate');
+insert into innodb_compressed2 values (20, 60, 'newprivate', 'newevenmoreprivate');
+insert into innodb_compressed3 values (20, 60, 'newprivate', 'newevenmoreprivate');
+insert into innodb_compressed4 values (20, 60, 'newprivate', 'newevenmoreprivate');
+# t1 yes on expecting NOT FOUND
+NOT FOUND /private/ in innodb_compressed1.ibd
+# t2 yes on expecting NOT FOUND
+NOT FOUND /private/ in innodb_compressed2.ibd
+# t3 yes on expecting NOT FOUND
+NOT FOUND /private/ in innodb_compressed3.ibd
+# t4 yes on expecting NOT FOUND
+NOT FOUND /private/ in innodb_compressed4.ibd
+select * from innodb_compressed1 where d = 40;
+c1	d	a	b
+3	40	private	evenmoreprivate
+4	40	private	evenmoreprivate
+5	40	private	evenmoreprivate
+6	40	private	evenmoreprivate
+7	40	private	evenmoreprivate
+select * from innodb_compressed1 where d = 60;
+c1	d	a	b
+20	60	newprivate	newevenmoreprivate
+select * from innodb_compressed2 where d = 40;
+c1	d	a	b
+3	40	private	evenmoreprivate
+4	40	private	evenmoreprivate
+5	40	private	evenmoreprivate
+6	40	private	evenmoreprivate
+7	40	private	evenmoreprivate
+select * from innodb_compressed2 where d = 60;
+c1	d	a	b
+20	60	newprivate	newevenmoreprivate
+select * from innodb_compressed3 where d = 40;
+c1	d	a	b
+3	40	private	evenmoreprivate
+4	40	private	evenmoreprivate
+5	40	private	evenmoreprivate
+6	40	private	evenmoreprivate
+7	40	private	evenmoreprivate
+select * from innodb_compressed3 where d = 60;
+c1	d	a	b
+20	60	newprivate	newevenmoreprivate
+select * from innodb_compressed4 where d = 40;
+c1	d	a	b
+3	40	private	evenmoreprivate
+4	40	private	evenmoreprivate
+5	40	private	evenmoreprivate
+6	40	private	evenmoreprivate
+7	40	private	evenmoreprivate
+select * from innodb_compressed4 where d = 60;
+c1	d	a	b
+20	60	newprivate	newevenmoreprivate
+# t1 yes on expecting NOT FOUND
+NOT FOUND /private/ in innodb_compressed1.ibd
+# t2 yes on expecting NOT FOUND
+NOT FOUND /private/ in innodb_compressed2.ibd
+# t3 yes on expecting NOT FOUND
+NOT FOUND /private/ in innodb_compressed3.ibd
+# t4 yes on expecting NOT FOUND
+NOT FOUND /private/ in innodb_compressed4.ibd
+drop table innodb_compressed1;
+drop table innodb_compressed2;
+drop table innodb_compressed3;
+drop table innodb_compressed4;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
diff --git a/mysql-test/suite/encryption/r/innodb_encryption_tables.result b/mysql-test/suite/encryption/r/innodb_encryption_tables.result
index 640e2be87a2..da62c0a2f0e 100644
--- a/mysql-test/suite/encryption/r/innodb_encryption_tables.result
+++ b/mysql-test/suite/encryption/r/innodb_encryption_tables.result
@@ -1,4 +1,6 @@
 SET GLOBAL innodb_file_format = `Barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SET GLOBAL innodb_file_per_table = ON;
 create table innodb_normal(c1 bigint not null, b char(200)) engine=innodb;
 create table innodb_compact(c1 bigint not null, b char(200)) engine=innodb row_format=compact;
@@ -104,6 +106,8 @@ SELECT variable_value >= 0 FROM information_schema.global_status WHERE variable_
 variable_value >= 0
 1
 SET GLOBAL innodb_file_format = `Barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SET GLOBAL innodb_file_per_table = ON;
 update innodb_normal set c1 = c1 + 1;
 update innodb_compact set c1 = c1 + 1;
@@ -159,3 +163,5 @@ drop table innodb_compact;
 drop table innodb_dynamic;
 drop table innodb_compressed;
 drop table innodb_redundant;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
diff --git a/mysql-test/suite/encryption/r/innodb_onlinealter_encryption.result b/mysql-test/suite/encryption/r/innodb_onlinealter_encryption.result
index 47bcfea87c8..798e0ca4357 100644
--- a/mysql-test/suite/encryption/r/innodb_onlinealter_encryption.result
+++ b/mysql-test/suite/encryption/r/innodb_onlinealter_encryption.result
@@ -1,4 +1,6 @@
 SET GLOBAL innodb_file_format = `Barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SET GLOBAL innodb_file_per_table = ON;
 CREATE TABLE t1 (id INT NOT NULL PRIMARY KEY, a VARCHAR(255)) ENGINE=InnoDB encrypted=yes;
 CREATE TABLE t2 (id INT NOT NULL PRIMARY KEY, a VARCHAR(255)) ENGINE=InnoDB;
@@ -174,3 +176,5 @@ NOT FOUND /mangled/ in t6.ibd
 NOT FOUND /mysql/ in t7.ibd
 DROP PROCEDURE innodb_insert_proc;
 DROP TABLE t1, t2, t3, t4, t5, t6, t7;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
diff --git a/mysql-test/suite/encryption/r/innodb_page_encryption_key_change.result b/mysql-test/suite/encryption/r/innodb_page_encryption_key_change.result
index 43fb7368654..22038c0e933 100644
--- a/mysql-test/suite/encryption/r/innodb_page_encryption_key_change.result
+++ b/mysql-test/suite/encryption/r/innodb_page_encryption_key_change.result
@@ -1,5 +1,7 @@
 # Restart mysqld --loose-file-key-management-filename=keys2.txt
 SET GLOBAL innodb_file_format = `Barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SET GLOBAL innodb_file_per_table = ON;
 create table innodb_normal(c1 bigint not null, b char(200)) engine=innodb;
 show warnings;
@@ -105,6 +107,8 @@ SELECT variable_value >= 0 FROM information_schema.global_status WHERE variable_
 variable_value >= 0
 1
 SET GLOBAL innodb_file_format = `Barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SET GLOBAL innodb_file_per_table = ON;
 alter table innodb_compact engine=innodb encryption_key_id = 2;
 alter table innodb_compressed engine=innodb encryption_key_id = 3;
@@ -151,3 +155,5 @@ drop table innodb_compact;
 drop table innodb_compressed;
 drop table innodb_dynamic;
 drop table innodb_redundant;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
diff --git a/mysql-test/suite/encryption/t/create_or_replace.test b/mysql-test/suite/encryption/t/create_or_replace.test
index 3b2970e5162..98d5b7cc017 100644
--- a/mysql-test/suite/encryption/t/create_or_replace.test
+++ b/mysql-test/suite/encryption/t/create_or_replace.test
@@ -7,6 +7,9 @@
 #
 # MDEV-8164: Server crashes in pfs_mutex_enter_func after fil_crypt_is_closing or alike
 #
+
+call mtr.add_suppression("InnoDB: Error: trying to do an operation on a dropped tablespace.*");
+
 SET default_storage_engine = InnoDB;
 
 CREATE TABLE t1 (pk INT PRIMARY KEY, c VARCHAR(256));
diff --git a/mysql-test/suite/encryption/t/innodb-bad-key-change.test b/mysql-test/suite/encryption/t/innodb-bad-key-change.test
index 9180fb12085..9fd6ac3a011 100644
--- a/mysql-test/suite/encryption/t/innodb-bad-key-change.test
+++ b/mysql-test/suite/encryption/t/innodb-bad-key-change.test
@@ -22,6 +22,8 @@ call mtr.add_suppression(".*InnoDB: Cannot open table test/.* from the internal
 call mtr.add_suppression("InnoDB: .ibd file is missing for table test/.*");
 call mtr.add_suppression("mysqld: File .*");
 call mtr.add_suppression("InnoDB: Tablespace id .* is encrypted but encryption service or used key_id .* is not available. Can't continue opening tablespace.");
+call mtr.add_suppression("InnoDB: InnoDB: Page may be an index page where index id is .*");
+
 --echo
 --echo # Start server with keys2.txt
 -- let $restart_parameters=--file-key-management-filename=$MYSQL_TEST_DIR/std_data/keys2.txt
diff --git a/mysql-test/suite/encryption/t/innodb-bad-key-change3.test b/mysql-test/suite/encryption/t/innodb-bad-key-change3.test
index 20d63b10649..d0480a6b424 100644
--- a/mysql-test/suite/encryption/t/innodb-bad-key-change3.test
+++ b/mysql-test/suite/encryption/t/innodb-bad-key-change3.test
@@ -8,7 +8,9 @@
 #
 # MDEV-8772: Assertion failure in file ha_innodb.cc line 20027 when importing page compressed and encrypted tablespace using incorrect keys
 #
-call mtr.add_suppression("InnoDB: Table .* tablespace is set as discarded");
+
+call mtr.add_suppression("InnoDB: Tablespace for table .* is set as discarded.");
+call mtr.add_suppression("InnoDB: Cannot calculate statistics for table .* because the .ibd file is missing. Please refer to .* for how to resolve the issue.");
 
 --disable_query_log
 let $innodb_file_format_orig = `SELECT @@innodb_file_format`;
diff --git a/mysql-test/suite/encryption/t/innodb-discard-import.test b/mysql-test/suite/encryption/t/innodb-discard-import.test
index 6d9f6c5dbb3..3bcb8d39862 100644
--- a/mysql-test/suite/encryption/t/innodb-discard-import.test
+++ b/mysql-test/suite/encryption/t/innodb-discard-import.test
@@ -10,7 +10,8 @@
 # MDEV-8770: Incorrect error message when importing page compressed tablespace
 #
 
-call mtr.add_suppression("InnoDB: Table .* tablespace is set as discarded");
+call mtr.add_suppression("InnoDB: Tablespace for table .* is set as discarded.");
+call mtr.add_suppression("InnoDB: Cannot calculate statistics for table .* because the .ibd file is missing. Please refer to .* for how to resolve the issue.");
 
 --disable_query_log
 let $innodb_file_format_orig = `SELECT @@innodb_file_format`;
diff --git a/mysql-test/suite/encryption/t/innodb-encryption-disable.test b/mysql-test/suite/encryption/t/innodb-encryption-disable.test
index 42d8008d1aa..e8e2ba02402 100644
--- a/mysql-test/suite/encryption/t/innodb-encryption-disable.test
+++ b/mysql-test/suite/encryption/t/innodb-encryption-disable.test
@@ -30,12 +30,14 @@ call mtr.add_suppression("InnoDB: Tablespace id.* is encrypted but encryption se
 --shutdown_server
 --source include/wait_until_disconnected.inc
 
---write_file $MYSQLTEST_VARDIR/keys1.txt
+--error 0,1,2
+--remove_file $MYSQLTEST_VARDIR/encryption-disable-keys1.txt
+--write_file $MYSQLTEST_VARDIR/encryption-disable-keys1.txt
 1;770A8A65DA156D24EE2A093277530142
 4;770A8A65DA156D24EE2A093277530143
 EOF
 
---exec echo "restart:--innodb-encrypt-tables --plugin-load-add=file_key_management.so --file-key-management --file-key-management-filename=$MYSQLTEST_VARDIR/keys1.txt" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--exec echo "restart:--innodb-encrypt-tables --plugin-load-add=file_key_management.so --file-key-management --file-key-management-filename=$MYSQLTEST_VARDIR/encryption-disable-keys1.txt" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
 --enable_reconnect
 --source include/wait_until_connected_again.inc
 
@@ -58,8 +60,15 @@ CREATE TABLE `t1` (
 ) ENGINE=InnoDB;
 
 insert into t1 values (1,2,'maria','db','encryption');
+
+select * from t1;
+select * from t5;
+
 alter table t1 encrypted='yes' `encryption_key_id`=1;
 
+select * from t1;
+select * from t5;
+
 --exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
 --shutdown_server
 --source include/wait_until_disconnected.inc
@@ -77,7 +86,7 @@ select * from t5;
 --shutdown_server
 --source include/wait_until_disconnected.inc
 
---exec echo "restart:--innodb-encrypt-tables --plugin-load-add=file_key_management.so --file-key-management --file-key-management-filename=$MYSQLTEST_VARDIR/keys1.txt" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--exec echo "restart:--innodb-encrypt-tables --plugin-load-add=file_key_management.so --file-key-management --file-key-management-filename=$MYSQLTEST_VARDIR/encryption-disable-keys1.txt" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
 --enable_reconnect
 --source include/wait_until_connected_again.inc
 
@@ -89,4 +98,4 @@ EVAL SET GLOBAL innodb_file_per_table = $innodb_file_per_table_orig;
 EVAL SET GLOBAL innodb_file_format = $innodb_file_format_orig;
 --enable_query_log
 
---remove_file $MYSQLTEST_VARDIR/keys1.txt
+--remove_file $MYSQLTEST_VARDIR/encryption-disable-keys1.txt
diff --git a/mysql-test/suite/encryption/t/innodb_encryption_discard_import.test b/mysql-test/suite/encryption/t/innodb_encryption_discard_import.test
index def3665eeff..0361fddecff 100644
--- a/mysql-test/suite/encryption/t/innodb_encryption_discard_import.test
+++ b/mysql-test/suite/encryption/t/innodb_encryption_discard_import.test
@@ -4,7 +4,8 @@
 -- source include/not_embedded.inc
 -- source include/not_windows.inc
 
-call mtr.add_suppression("InnoDB: Table .* tablespace is set as discarded.");
+call mtr.add_suppression("InnoDB: Tablespace for table .* is set as discarded.");
+call mtr.add_suppression("InnoDB: Cannot calculate statistics for table .* because the .ibd file is missing. Please refer to .* for how to resolve the issue.");
 
 --let $MYSQLD_TMPDIR = `SELECT @@tmpdir`
 --let $MYSQLD_DATADIR = `SELECT @@datadir`
@@ -107,6 +108,7 @@ ALTER TABLE t3 DISCARD TABLESPACE;
 
 --echo # List after t1 DISCARD
 --list_files $MYSQLD_DATADIR/test
+--disable_result_log
 --error 0,1,2
 --remove_file $MYSQLD_DATADIR/test/t1.cfg
 --error 0,1,2
diff --git a/mysql-test/suite/encryption/t/innodb_encryption_row_compressed.opt b/mysql-test/suite/encryption/t/innodb_encryption_row_compressed.opt
new file mode 100644
index 00000000000..7ebf81a07f3
--- /dev/null
+++ b/mysql-test/suite/encryption/t/innodb_encryption_row_compressed.opt
@@ -0,0 +1,4 @@
+--innodb-encrypt-tables=ON
+--innodb-encryption-rotate-key-age=15
+--innodb-encryption-threads=4
+--innodb-tablespaces-encryption
diff --git a/mysql-test/suite/encryption/t/innodb_encryption_row_compressed.test b/mysql-test/suite/encryption/t/innodb_encryption_row_compressed.test
new file mode 100644
index 00000000000..0a28c1690a2
--- /dev/null
+++ b/mysql-test/suite/encryption/t/innodb_encryption_row_compressed.test
@@ -0,0 +1,125 @@
+-- source include/have_innodb.inc
+-- source include/have_file_key_management_plugin.inc
+-- source include/not_embedded.inc
+
+--disable_query_log
+let $innodb_file_format_orig = `SELECT @@innodb_file_format`;
+let $innodb_file_per_table_orig = `SELECT @@innodb_file_per_table`;
+--enable_query_log
+
+SET GLOBAL innodb_file_format = `Barracuda`;
+SET GLOBAL innodb_file_per_table = ON;
+
+create table innodb_compressed1(c1 bigint not null primary key, d int, a varchar(20), b char(200)) engine=innodb row_format=compressed encrypted=yes;
+create table innodb_compressed2(c1 bigint not null primary key, d int, a varchar(20), b char(200)) engine=innodb row_format=compressed key_block_size=1 encrypted=yes;
+create table innodb_compressed3(c1 bigint not null primary key, d int, a varchar(20), b char(200)) engine=innodb row_format=compressed key_block_size=2 encrypted=yes;
+create table innodb_compressed4(c1 bigint not null primary key, d int, a varchar(20), b char(200)) engine=innodb row_format=compressed key_block_size=4 encrypted=yes;
+
+insert into innodb_compressed1 values (1, 20, 'private', 'evenmoreprivate');
+insert into innodb_compressed1 values (2, 20, 'private', 'evenmoreprivate');
+insert into innodb_compressed1 values (3, 30, 'private', 'evenmoreprivate');
+insert into innodb_compressed1 values (4, 30, 'private', 'evenmoreprivate');
+insert into innodb_compressed1 values (5, 30, 'private', 'evenmoreprivate');
+insert into innodb_compressed1 values (6, 30, 'private', 'evenmoreprivate');
+insert into innodb_compressed1 values (7, 30, 'private', 'evenmoreprivate');
+insert into innodb_compressed1 values (8, 20, 'private', 'evenmoreprivate');
+insert into innodb_compressed1 values (9, 20, 'private', 'evenmoreprivate');
+insert into innodb_compressed1 values (10, 20, 'private', 'evenmoreprivate');
+
+insert into innodb_compressed2 select * from innodb_compressed1;
+insert into innodb_compressed3 select * from innodb_compressed1;
+insert into innodb_compressed4 select * from innodb_compressed1;
+
+--source include/restart_mysqld.inc
+
+--let $MYSQLD_DATADIR=`select @@datadir`
+--let t1_IBD = $MYSQLD_DATADIR/test/innodb_compressed1.ibd
+--let t2_IBD = $MYSQLD_DATADIR/test/innodb_compressed2.ibd
+--let t3_IBD = $MYSQLD_DATADIR/test/innodb_compressed3.ibd
+--let t4_IBD = $MYSQLD_DATADIR/test/innodb_compressed4.ibd
+--let SEARCH_RANGE = 10000000
+--let SEARCH_PATTERN=private
+--echo # t1 yes on expecting NOT FOUND
+-- let SEARCH_FILE=$t1_IBD
+-- source include/search_pattern_in_file.inc
+--echo # t2 yes on expecting NOT FOUND
+-- let SEARCH_FILE=$t2_IBD
+-- source include/search_pattern_in_file.inc
+--echo # t3 yes on expecting NOT FOUND
+-- let SEARCH_FILE=$t3_IBD
+-- source include/search_pattern_in_file.inc
+--echo # t4 yes on expecting NOT FOUND
+-- let SEARCH_FILE=$t4_IBD
+-- source include/search_pattern_in_file.inc
+
+SET GLOBAL innodb_file_format = `Barracuda`;
+SET GLOBAL innodb_file_per_table = ON;
+
+select * from innodb_compressed1 where d = 20;
+select * from innodb_compressed1 where d = 30;
+select * from innodb_compressed2 where d = 20;
+select * from innodb_compressed2 where d = 30;
+select * from innodb_compressed3 where d = 20;
+select * from innodb_compressed3 where d = 30;
+select * from innodb_compressed4 where d = 20;
+select * from innodb_compressed4 where d = 30;
+
+update innodb_compressed1 set d = d + 10 where d = 30;
+update innodb_compressed2 set d = d + 10 where d = 30;
+update innodb_compressed3 set d = d + 10 where d = 30;
+update innodb_compressed4 set d = d + 10 where d = 30;
+
+insert into innodb_compressed1 values (20, 60, 'newprivate', 'newevenmoreprivate');
+insert into innodb_compressed2 values (20, 60, 'newprivate', 'newevenmoreprivate');
+insert into innodb_compressed3 values (20, 60, 'newprivate', 'newevenmoreprivate');
+insert into innodb_compressed4 values (20, 60, 'newprivate', 'newevenmoreprivate');
+
+--let SEARCH_PATTERN=private
+--echo # t1 yes on expecting NOT FOUND
+-- let SEARCH_FILE=$t1_IBD
+-- source include/search_pattern_in_file.inc
+--echo # t2 yes on expecting NOT FOUND
+-- let SEARCH_FILE=$t2_IBD
+-- source include/search_pattern_in_file.inc
+--echo # t3 yes on expecting NOT FOUND
+-- let SEARCH_FILE=$t3_IBD
+-- source include/search_pattern_in_file.inc
+--echo # t4 yes on expecting NOT FOUND
+-- let SEARCH_FILE=$t4_IBD
+-- source include/search_pattern_in_file.inc
+
+--source include/restart_mysqld.inc
+
+select * from innodb_compressed1 where d = 40;
+select * from innodb_compressed1 where d = 60;
+select * from innodb_compressed2 where d = 40;
+select * from innodb_compressed2 where d = 60;
+select * from innodb_compressed3 where d = 40;
+select * from innodb_compressed3 where d = 60;
+select * from innodb_compressed4 where d = 40;
+select * from innodb_compressed4 where d = 60;
+
+--let SEARCH_PATTERN=private
+--echo # t1 yes on expecting NOT FOUND
+-- let SEARCH_FILE=$t1_IBD
+-- source include/search_pattern_in_file.inc
+--echo # t2 yes on expecting NOT FOUND
+-- let SEARCH_FILE=$t2_IBD
+-- source include/search_pattern_in_file.inc
+--echo # t3 yes on expecting NOT FOUND
+-- let SEARCH_FILE=$t3_IBD
+-- source include/search_pattern_in_file.inc
+--echo # t4 yes on expecting NOT FOUND
+-- let SEARCH_FILE=$t4_IBD
+-- source include/search_pattern_in_file.inc
+
+drop table innodb_compressed1;
+drop table innodb_compressed2;
+drop table innodb_compressed3;
+drop table innodb_compressed4;
+
+# reset system
+--disable_query_log
+EVAL SET GLOBAL innodb_file_per_table = $innodb_file_per_table_orig;
+EVAL SET GLOBAL innodb_file_format = $innodb_file_format_orig;
+--enable_query_log
diff --git a/mysql-test/suite/engines/funcs/t/tc_partition_list_directory.opt b/mysql-test/suite/engines/funcs/t/tc_partition_list_directory.opt
new file mode 100644
index 00000000000..990e4941ae9
--- /dev/null
+++ b/mysql-test/suite/engines/funcs/t/tc_partition_list_directory.opt
@@ -0,0 +1 @@
+--loose-innodb-strict-mode=0
diff --git a/mysql-test/suite/funcs_1/r/is_columns_is_embedded.result b/mysql-test/suite/funcs_1/r/is_columns_is_embedded.result
index 15c94e52478..b740a04f015 100644
--- a/mysql-test/suite/funcs_1/r/is_columns_is_embedded.result
+++ b/mysql-test/suite/funcs_1/r/is_columns_is_embedded.result
@@ -128,7 +128,7 @@ def	information_schema	FILES	ENGINE	10		NO	varchar	64	192	NULL	NULL	NULL	utf8	ut
 def	information_schema	FILES	EXTENT_SIZE	16	0	NO	bigint	NULL	NULL	19	0	NULL	NULL	NULL	bigint(4)				
 def	information_schema	FILES	EXTRA	38	NULL	YES	varchar	255	765	NULL	NULL	NULL	utf8	utf8_general_ci	varchar(255)				
 def	information_schema	FILES	FILE_ID	1	0	NO	bigint	NULL	NULL	19	0	NULL	NULL	NULL	bigint(4)				
-def	information_schema	FILES	FILE_NAME	2	NULL	YES	varchar	64	192	NULL	NULL	NULL	utf8	utf8_general_ci	varchar(64)				
+def	information_schema	FILES	FILE_NAME	2	NULL	YES	varchar	512	1536	NULL	NULL	NULL	utf8	utf8_general_ci	varchar(512)				
 def	information_schema	FILES	FILE_TYPE	3		NO	varchar	20	60	NULL	NULL	NULL	utf8	utf8_general_ci	varchar(20)				
 def	information_schema	FILES	FREE_EXTENTS	14	NULL	YES	bigint	NULL	NULL	19	0	NULL	NULL	NULL	bigint(4)				
 def	information_schema	FILES	FULLTEXT_KEYS	11	NULL	YES	varchar	64	192	NULL	NULL	NULL	utf8	utf8_general_ci	varchar(64)				
@@ -644,7 +644,7 @@ NULL	information_schema	EVENTS	ORIGINATOR	bigint	NULL	NULL	NULL	NULL	bigint(10)
 3.0000	information_schema	EVENTS	COLLATION_CONNECTION	varchar	32	96	utf8	utf8_general_ci	varchar(32)
 3.0000	information_schema	EVENTS	DATABASE_COLLATION	varchar	32	96	utf8	utf8_general_ci	varchar(32)
 NULL	information_schema	FILES	FILE_ID	bigint	NULL	NULL	NULL	NULL	bigint(4)
-3.0000	information_schema	FILES	FILE_NAME	varchar	64	192	utf8	utf8_general_ci	varchar(64)
+3.0000	information_schema	FILES	FILE_NAME	varchar	512	1536	utf8	utf8_general_ci	varchar(512)
 3.0000	information_schema	FILES	FILE_TYPE	varchar	20	60	utf8	utf8_general_ci	varchar(20)
 3.0000	information_schema	FILES	TABLESPACE_NAME	varchar	64	192	utf8	utf8_general_ci	varchar(64)
 3.0000	information_schema	FILES	TABLE_CATALOG	varchar	64	192	utf8	utf8_general_ci	varchar(64)
diff --git a/mysql-test/suite/funcs_1/r/is_tables_innodb.result b/mysql-test/suite/funcs_1/r/is_tables_innodb.result
index 23e6ad77309..0e2389771b2 100644
--- a/mysql-test/suite/funcs_1/r/is_tables_innodb.result
+++ b/mysql-test/suite/funcs_1/r/is_tables_innodb.result
@@ -21,7 +21,7 @@ TABLE_NAME	t1
 TABLE_TYPE	BASE TABLE
 ENGINE	InnoDB
 VERSION	10
-ROW_FORMAT	Compact
+ROW_FORMAT	DYNAMIC_OR_PAGE
 TABLE_ROWS	#TBLR#
 AVG_ROW_LENGTH	#ARL#
 DATA_LENGTH	#DL#
@@ -44,7 +44,7 @@ TABLE_NAME	t2
 TABLE_TYPE	BASE TABLE
 ENGINE	InnoDB
 VERSION	10
-ROW_FORMAT	Compact
+ROW_FORMAT	DYNAMIC_OR_PAGE
 TABLE_ROWS	#TBLR#
 AVG_ROW_LENGTH	#ARL#
 DATA_LENGTH	#DL#
@@ -67,7 +67,7 @@ TABLE_NAME	t1
 TABLE_TYPE	BASE TABLE
 ENGINE	InnoDB
 VERSION	10
-ROW_FORMAT	Compact
+ROW_FORMAT	DYNAMIC_OR_PAGE
 TABLE_ROWS	#TBLR#
 AVG_ROW_LENGTH	#ARL#
 DATA_LENGTH	#DL#
@@ -111,7 +111,7 @@ TABLE_NAME	t1
 TABLE_TYPE	BASE TABLE
 ENGINE	InnoDB
 VERSION	10
-ROW_FORMAT	Compact
+ROW_FORMAT	DYNAMIC_OR_PAGE
 TABLE_ROWS	#TBLR#
 AVG_ROW_LENGTH	#ARL#
 DATA_LENGTH	#DL#
@@ -134,7 +134,7 @@ TABLE_NAME	t2
 TABLE_TYPE	BASE TABLE
 ENGINE	InnoDB
 VERSION	10
-ROW_FORMAT	Compact
+ROW_FORMAT	DYNAMIC_OR_PAGE
 TABLE_ROWS	#TBLR#
 AVG_ROW_LENGTH	#ARL#
 DATA_LENGTH	#DL#
diff --git a/mysql-test/suite/funcs_1/r/is_tables_mysql.result b/mysql-test/suite/funcs_1/r/is_tables_mysql.result
index c9c86b4e96e..8e0c9b64dab 100644
--- a/mysql-test/suite/funcs_1/r/is_tables_mysql.result
+++ b/mysql-test/suite/funcs_1/r/is_tables_mysql.result
@@ -315,7 +315,7 @@ TABLE_NAME	innodb_index_stats
 TABLE_TYPE	BASE TABLE
 ENGINE	InnoDB
 VERSION	10
-ROW_FORMAT	Compact
+ROW_FORMAT	DYNAMIC_OR_PAGE
 TABLE_ROWS	#TBLR#
 AVG_ROW_LENGTH	#ARL#
 DATA_LENGTH	#DL#
@@ -338,7 +338,7 @@ TABLE_NAME	innodb_table_stats
 TABLE_TYPE	BASE TABLE
 ENGINE	InnoDB
 VERSION	10
-ROW_FORMAT	Compact
+ROW_FORMAT	DYNAMIC_OR_PAGE
 TABLE_ROWS	#TBLR#
 AVG_ROW_LENGTH	#ARL#
 DATA_LENGTH	#DL#
diff --git a/mysql-test/suite/funcs_1/r/is_tables_mysql_embedded.result b/mysql-test/suite/funcs_1/r/is_tables_mysql_embedded.result
index 479312334a7..f40294491cc 100644
--- a/mysql-test/suite/funcs_1/r/is_tables_mysql_embedded.result
+++ b/mysql-test/suite/funcs_1/r/is_tables_mysql_embedded.result
@@ -315,7 +315,7 @@ TABLE_NAME	innodb_index_stats
 TABLE_TYPE	BASE TABLE
 ENGINE	InnoDB
 VERSION	10
-ROW_FORMAT	Compact
+ROW_FORMAT	DYNAMIC_OR_PAGE
 TABLE_ROWS	#TBLR#
 AVG_ROW_LENGTH	#ARL#
 DATA_LENGTH	#DL#
@@ -338,7 +338,7 @@ TABLE_NAME	innodb_table_stats
 TABLE_TYPE	BASE TABLE
 ENGINE	InnoDB
 VERSION	10
-ROW_FORMAT	Compact
+ROW_FORMAT	DYNAMIC_OR_PAGE
 TABLE_ROWS	#TBLR#
 AVG_ROW_LENGTH	#ARL#
 DATA_LENGTH	#DL#
@@ -1019,7 +1019,7 @@ TABLE_NAME	innodb_index_stats
 TABLE_TYPE	BASE TABLE
 ENGINE	InnoDB
 VERSION	10
-ROW_FORMAT	Compact
+ROW_FORMAT	DYNAMIC_OR_PAGE
 TABLE_ROWS	#TBLR#
 AVG_ROW_LENGTH	#ARL#
 DATA_LENGTH	#DL#
@@ -1042,7 +1042,7 @@ TABLE_NAME	innodb_table_stats
 TABLE_TYPE	BASE TABLE
 ENGINE	InnoDB
 VERSION	10
-ROW_FORMAT	Compact
+ROW_FORMAT	DYNAMIC_OR_PAGE
 TABLE_ROWS	#TBLR#
 AVG_ROW_LENGTH	#ARL#
 DATA_LENGTH	#DL#
diff --git a/mysql-test/suite/galera/r/create.result b/mysql-test/suite/galera/r/create.result
index b93cd7b9496..4d6488d324b 100644
--- a/mysql-test/suite/galera/r/create.result
+++ b/mysql-test/suite/galera/r/create.result
@@ -76,13 +76,17 @@ DROP TABLE t1, t2;
 # MDEV-10235: Deadlock in CREATE TABLE ... AS SELECT .. if result set
 # is empty in Galera
 #
+connection node_1;
 CREATE TABLE t1(c1 INT) ENGINE=INNODB;
 INSERT INTO t1 VALUES(1);
 CREATE TABLE t2 AS SELECT * FROM t1 WHERE c1=2;
+connection node_2;
 SELECT * FROM t1;
 c1
 1
 SELECT * FROM t2;
 c1
 DROP TABLE t1, t2;
+disconnect node_2;
+disconnect node_1;
 # End of tests
diff --git a/mysql-test/suite/handler/disabled.def b/mysql-test/suite/handler/disabled.def
new file mode 100644
index 00000000000..888298bbb09
--- /dev/null
+++ b/mysql-test/suite/handler/disabled.def
@@ -0,0 +1,11 @@
+##############################################################################
+#
+#  List the test cases that are to be disabled temporarily.
+#
+#  Separate the test case name and the comment with ':'.
+#
+#    <testcasename> : BUG#<xxxx> <date disabled> <disabler> <comment>
+#
+#  Do not use any TAB characters for whitespace.
+#
+##############################################################################
diff --git a/mysql-test/suite/innodb/disabled.def b/mysql-test/suite/innodb/disabled.def
index 8cae44a3607..1580474de29 100644
--- a/mysql-test/suite/innodb/disabled.def
+++ b/mysql-test/suite/innodb/disabled.def
@@ -10,3 +10,4 @@
 #
 ##############################################################################
 
+innodb_defragment_fill_factor : MDEV-10771
\ No newline at end of file
diff --git a/mysql-test/include/have_innodb_bzip2.inc b/mysql-test/suite/innodb/include/have_innodb_bzip2.inc
similarity index 100%
rename from mysql-test/include/have_innodb_bzip2.inc
rename to mysql-test/suite/innodb/include/have_innodb_bzip2.inc
diff --git a/mysql-test/include/have_innodb_lz4.inc b/mysql-test/suite/innodb/include/have_innodb_lz4.inc
similarity index 100%
rename from mysql-test/include/have_innodb_lz4.inc
rename to mysql-test/suite/innodb/include/have_innodb_lz4.inc
diff --git a/mysql-test/include/have_innodb_lzma.inc b/mysql-test/suite/innodb/include/have_innodb_lzma.inc
similarity index 100%
rename from mysql-test/include/have_innodb_lzma.inc
rename to mysql-test/suite/innodb/include/have_innodb_lzma.inc
diff --git a/mysql-test/include/have_innodb_lzo.inc b/mysql-test/suite/innodb/include/have_innodb_lzo.inc
similarity index 100%
rename from mysql-test/include/have_innodb_lzo.inc
rename to mysql-test/suite/innodb/include/have_innodb_lzo.inc
diff --git a/mysql-test/include/have_innodb_snappy.inc b/mysql-test/suite/innodb/include/have_innodb_snappy.inc
similarity index 100%
rename from mysql-test/include/have_innodb_snappy.inc
rename to mysql-test/suite/innodb/include/have_innodb_snappy.inc
diff --git a/mysql-test/suite/innodb/include/innodb_simulate_comp_failures.inc b/mysql-test/suite/innodb/include/innodb_simulate_comp_failures.inc
index bc64937669a..75cab775528 100644
--- a/mysql-test/suite/innodb/include/innodb_simulate_comp_failures.inc
+++ b/mysql-test/suite/innodb/include/innodb_simulate_comp_failures.inc
@@ -23,6 +23,8 @@ call mtr.add_suppression(".*");
 # create the table with compressed pages of size 8K.
 CREATE TABLE t1(id INT AUTO_INCREMENT PRIMARY KEY, msg VARCHAR(255), KEY msg_i(msg)) ENGINE=INNODB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8;
 
+SHOW CREATE TABLE t1;
+
 # percentage of compressions that will be forced to fail
 SET GLOBAL innodb_simulate_comp_failures = 25;
 
@@ -35,16 +37,16 @@ let $commit_iterations=50;
 while ($num_inserts_ind)
 {
   let $repeat = `select floor(rand() * 10)`;
-  eval
-INSERT INTO t1(id, msg)
-VALUES ($num_inserts_ind, REPEAT('abcdefghijklmnopqrstuvwxyz', $repeat));
+  eval INSERT INTO t1(id, msg)
+       VALUES ($num_inserts_ind, REPEAT('abcdefghijklmnopqrstuvwxyz', $repeat));
   dec $num_inserts_ind;
 }
 
 --enable_query_log
 --enable_result_log
 
-SELECT COUNT(*) FROM t1;
+COMMIT;
+SELECT COUNT(id) FROM t1;
 
 --disable_query_log
 --disable_result_log
diff --git a/mysql-test/suite/innodb/include/innodb_wl6501_crash.inc b/mysql-test/suite/innodb/include/innodb_wl6501_crash.inc
new file mode 100644
index 00000000000..047be4dba34
--- /dev/null
+++ b/mysql-test/suite/innodb/include/innodb_wl6501_crash.inc
@@ -0,0 +1,430 @@
+#
+# WL#6501: make truncate table atomic
+#
+
+--source include/have_innodb.inc
+--source include/have_debug.inc
+--source include/big_test.inc
+
+# Valgrind would complain about memory leaks when we crash on purpose.
+--source include/not_valgrind.inc
+# Embedded server does not support crashing
+--source include/not_embedded.inc
+# Avoid CrashReporter popup on Mac
+--source include/not_crashrep.inc
+
+# suppress expected warnings.
+call mtr.add_suppression("The file '.*' already exists though the corresponding table did not exist in the InnoDB data dictionary");
+call mtr.add_suppression("Cannot create file '.*'");
+call mtr.add_suppression("InnoDB: Error number 17 means 'File exists'");
+
+################################################################################
+#
+# Will test following scenarios:
+# 1. Hit crash point while writing redo log.
+# 2. Hit crash point on completion of redo log write.
+# 3. Hit crash point while dropping indexes.
+# 4. Hit crash point on completing drop of all indexes before creation of index
+#    is commenced.
+# 5. Hit crash point while creating indexes.
+# 6. Hit crash point after data is updated to system-table and in-memory dict.
+# 7. Hit crash point before/after log checkpoint is done.
+#
+################################################################################
+
+#-----------------------------------------------------------------------------
+#
+# create test-bed
+#
+let $per_table = `select @@innodb_file_per_table`;
+let $format = `select @@innodb_file_format`;
+
+eval set global innodb_file_per_table = on;
+let $WL6501_TMP_DIR = `select @@tmpdir`;
+let $WL6501_DATA_DIR = `select @@datadir`;
+let SEARCH_FILE = $MYSQLTEST_VARDIR/log/my_restart.err;
+
+#-----------------------------------------------------------------------------
+#
+# 1. Hit crash point while writing redo log.
+#
+--echo "1. Hit crash point while writing redo log."
+use test;
+eval set global innodb_file_per_table = $wl6501_file_per_table;
+eval set global innodb_file_format = $wl6501_file_format;
+SET innodb_strict_mode=OFF;
+--disable_warnings
+eval create $wl6501_temp table t (
+	i int, f float, c char,
+	primary key pk(i), unique findex(f), index ck(c))
+	engine=innodb row_format=$wl6501_row_fmt
+	key_block_size=$wl6501_kbs;
+--enable_warnings
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+check table t;
+#
+set session debug = "+d,ib_trunc_crash_while_writing_redo_log";
+--source include/expect_crash.inc
+--error 2013
+truncate table t;
+#
+--source include/start_mysqld.inc
+check table t;
+select * from t;
+select * from t where f < 2.5;
+drop table t;
+
+#-----------------------------------------------------------------------------
+#
+# 2. Hit crash point on completion of redo log write.
+#
+--echo "2. Hit crash point on completion of redo log write."
+use test;
+eval set global innodb_file_per_table = $wl6501_file_per_table;
+eval set global innodb_file_format = $wl6501_file_format;
+SET innodb_strict_mode=OFF;
+--disable_warnings
+eval create $wl6501_temp table t (
+	i int, f float, c char,
+	primary key pk(i), unique findex(f), index ck(c))
+	engine = innodb row_format = $wl6501_row_fmt
+	key_block_size = $wl6501_kbs;
+--enable_warnings
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+check table t;
+#
+set session debug = "+d,ib_trunc_crash_after_redo_log_write_complete";
+--source include/expect_crash.inc
+--error 2013
+truncate table t;
+#
+--source include/start_mysqld.inc
+check table t;
+select * from t;
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+select * from t where f < 2.5;
+drop table t;
+
+#-----------------------------------------------------------------------------
+#
+# 3. Hit crash point while dropping indexes.
+#
+--echo "3. Hit crash point while dropping indexes."
+use test;
+eval set global innodb_file_per_table = $wl6501_file_per_table;
+eval set global innodb_file_format = $wl6501_file_format;
+SET innodb_strict_mode=OFF;
+--disable_warnings
+eval create $wl6501_temp table t (
+	i int, f float, c char,
+	primary key pk(i), unique findex(f), index ck(c))
+	engine = innodb row_format = $wl6501_row_fmt
+	key_block_size = $wl6501_kbs;
+--enable_warnings
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+check table t;
+#
+set session debug = "+d,ib_trunc_crash_on_drop_of_clust_index";
+--source include/expect_crash.inc
+--error 2013
+truncate table t;
+#
+--source include/start_mysqld.inc
+check table t;
+select * from t;
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+select * from t where f < 2.5;
+drop table t;
+#
+#
+use test;
+eval set global innodb_file_per_table = $wl6501_file_per_table;
+eval set global innodb_file_format = $wl6501_file_format;
+SET innodb_strict_mode=OFF;
+--disable_warnings
+eval create $wl6501_temp table t (
+	i int, f float, c char,
+	primary key pk(i), unique findex(f))
+	engine = innodb row_format = $wl6501_row_fmt
+	key_block_size = $wl6501_kbs;
+--enable_warnings
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+check table t;
+#
+set session debug = "+d,ib_trunc_crash_on_drop_of_uniq_index";
+--source include/expect_crash.inc
+--error 2013
+truncate table t;
+#
+--source include/start_mysqld.inc
+#
+check table t;
+select * from t;
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+select * from t where f < 2.5;
+drop table t;
+#
+#
+use test;
+eval set global innodb_file_per_table = $wl6501_file_per_table;
+eval set global innodb_file_format = $wl6501_file_format;
+SET innodb_strict_mode=OFF;
+--disable_warnings
+eval create $wl6501_temp table t (
+	i int, f float, c char,
+	primary key pk(i), index ck(c))
+	engine = innodb row_format = $wl6501_row_fmt
+	key_block_size = $wl6501_kbs;
+--enable_warnings
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+check table t;
+#
+set session debug = "+d,ib_trunc_crash_on_drop_of_sec_index";
+--source include/expect_crash.inc
+--error 2013
+truncate table t;
+#
+--source include/start_mysqld.inc
+check table t;
+select * from t;
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+select * from t where f < 2.5;
+drop table t;
+
+#-----------------------------------------------------------------------------
+#
+# 4. Hit crash point on completing drop of all indexes before creation of index
+#    is commenced.
+#
+--echo "4. Hit crash point on completing drop of all indexes before creation"
+--echo "   of index is commenced."
+use test;
+eval set global innodb_file_per_table = $wl6501_file_per_table;
+eval set global innodb_file_format = $wl6501_file_format;
+SET innodb_strict_mode=OFF;
+--disable_warnings
+eval create $wl6501_temp table t (
+	i int, f float, c char,
+	primary key pk(i), unique findex(f), index ck(c))
+	engine = innodb row_format = $wl6501_row_fmt
+	key_block_size = $wl6501_kbs;
+--enable_warnings
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+check table t;
+#
+set session debug = "+d,ib_trunc_crash_drop_reinit_done_create_to_start";
+--source include/expect_crash.inc
+--error 2013
+truncate table t;
+#
+--source include/start_mysqld.inc
+check table t;
+select * from t;
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+select * from t where f < 2.5;
+drop table t;
+
+#-----------------------------------------------------------------------------
+#
+# 5. Hit crash point while creating indexes.
+#
+--echo "5. Hit crash point while creating indexes."
+use test;
+eval set global innodb_file_per_table = $wl6501_file_per_table;
+eval set global innodb_file_format = $wl6501_file_format;
+SET innodb_strict_mode=OFF;
+--disable_warnings
+eval create $wl6501_temp table t (
+	i int, f float, c char,
+	primary key pk(i), unique findex(f), index ck(c))
+	engine = innodb row_format = $wl6501_row_fmt
+	key_block_size = $wl6501_kbs;
+--enable_warnings
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+check table t;
+#
+set session debug = "+d,ib_trunc_crash_on_create_of_clust_index";
+--source include/expect_crash.inc
+--error 2013
+truncate table t;
+#
+--source include/start_mysqld.inc
+check table t;
+select * from t;
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+select * from t where f < 2.5;
+drop table t;
+#
+#
+use test;
+eval set global innodb_file_per_table = $wl6501_file_per_table;
+eval set global innodb_file_format = $wl6501_file_format;
+SET innodb_strict_mode=OFF;
+--disable_warnings
+eval create $wl6501_temp table t (
+	i int, f float, c char,
+	primary key pk(i), unique findex(f))
+	engine = innodb row_format = $wl6501_row_fmt
+	key_block_size = $wl6501_kbs;
+--enable_warnings
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+check table t;
+#
+set session debug = "+d,ib_trunc_crash_on_create_of_uniq_index";
+--source include/expect_crash.inc
+--error 2013
+truncate table t;
+#
+--source include/start_mysqld.inc
+check table t;
+select * from t;
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+select * from t where f < 2.5;
+drop table t;
+#
+#
+use test;
+eval set global innodb_file_per_table = $wl6501_file_per_table;
+eval set global innodb_file_format = $wl6501_file_format;
+SET innodb_strict_mode=OFF;
+--disable_warnings
+eval create $wl6501_temp table t (
+	i int, f float, c char,
+	primary key pk(i), index ck(c))
+	engine = innodb row_format = $wl6501_row_fmt
+	key_block_size = $wl6501_kbs;
+--enable_warnings
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+check table t;
+#
+set session debug = "+d,ib_trunc_crash_on_create_of_sec_index";
+--source include/expect_crash.inc
+--error 2013
+truncate table t;
+#
+--source include/start_mysqld.inc
+check table t;
+select * from t;
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+select * from t where f < 2.5;
+drop table t;
+
+#-----------------------------------------------------------------------------
+#
+# 6. Hit crash point after data is updated to system-table and in-memory dict.
+#
+--echo "6. Hit crash point after data is updated to system-table and"
+--echo "   in-memory dict."
+use test;
+eval set global innodb_file_per_table = $wl6501_file_per_table;
+eval set global innodb_file_format = $wl6501_file_format;
+SET innodb_strict_mode=OFF;
+--disable_warnings
+eval create $wl6501_temp table t (
+	i int, f float, c char,
+	primary key pk(i), unique findex(f), index ck(c))
+	engine = innodb row_format = $wl6501_row_fmt
+	key_block_size = $wl6501_kbs;
+--enable_warnings
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+check table t;
+#
+set session debug = "+d,ib_trunc_crash_on_updating_dict_sys_info";
+--source include/expect_crash.inc
+--error 2013
+truncate table t;
+#
+--source include/start_mysqld.inc
+check table t;
+select * from t;
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+select * from t where f < 2.5;
+drop table t;
+
+#-----------------------------------------------------------------------------
+#
+# 7. Hit crash point before/after log checkpoint is done.
+#
+--echo "7. Hit crash point before/after log checkpoint is done."
+use test;
+eval set global innodb_file_per_table = $wl6501_file_per_table;
+eval set global innodb_file_format = $wl6501_file_format;
+SET innodb_strict_mode=OFF;
+--disable_warnings
+eval create $wl6501_temp table t (
+	i int, f float, c char,
+	primary key pk(i), unique findex(f), index ck(c))
+	engine = innodb row_format = $wl6501_row_fmt
+	key_block_size = $wl6501_kbs;
+--enable_warnings
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+check table t;
+#
+set session debug = "+d,ib_trunc_crash_before_log_removal";
+--source include/expect_crash.inc
+--error 2013
+truncate table t;
+#
+--source include/start_mysqld.inc
+check table t;
+select * from t;
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t where f < 2.5;
+drop table t;
+#
+#
+use test;
+eval set global innodb_file_per_table = $wl6501_file_per_table;
+eval set global innodb_file_format = $wl6501_file_format;
+SET innodb_strict_mode=OFF;
+--disable_warnings
+eval create $wl6501_temp table t (
+	i int, f float, c char,
+	primary key pk(i), unique findex(f), index ck(c))
+	engine = innodb row_format = $wl6501_row_fmt
+	key_block_size = $wl6501_kbs;
+--enable_warnings
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+check table t;
+#
+set session debug = "+d,ib_trunc_crash_after_truncate_done";
+--source include/expect_crash.inc
+--error 2013
+truncate table t;
+#
+--source include/start_mysqld.inc
+check table t;
+select * from t;
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+select * from t where f < 2.5;
+drop table t;
+
+
+#-----------------------------------------------------------------------------
+#
+# remove test-bed
+#
+eval set global innodb_file_format = $format;
+eval set global innodb_file_per_table = $per_table;
diff --git a/mysql-test/suite/innodb/include/innodb_wl6501_crash_temp.inc b/mysql-test/suite/innodb/include/innodb_wl6501_crash_temp.inc
new file mode 100644
index 00000000000..76b7b5d59b5
--- /dev/null
+++ b/mysql-test/suite/innodb/include/innodb_wl6501_crash_temp.inc
@@ -0,0 +1,102 @@
+#
+# WL#6501: make truncate table atomic
+#
+
+--source include/have_innodb.inc
+--source include/have_debug.inc
+--source include/big_test.inc
+
+# Valgrind would complain about memory leaks when we crash on purpose.
+--source include/not_valgrind.inc
+# Embedded server does not support crashing
+--source include/not_embedded.inc
+# Avoid CrashReporter popup on Mac
+--source include/not_crashrep.inc
+
+# suppress expected warnings
+call mtr.add_suppression("does not exist in the InnoDB internal");
+
+################################################################################
+#
+# Will test following scenarios:
+# 1. Hit crash point on completing drop of all indexes before creation of index
+#    is commenced.
+# 2. Hit crash point after data is updated to system-table and in-memory dict.
+#
+################################################################################
+
+#-----------------------------------------------------------------------------
+#
+# create test-bed
+#
+let $per_table = `select @@innodb_file_per_table`;
+let $format = `select @@innodb_file_format`;
+
+eval set global innodb_file_per_table = on;
+let $WL6501_TMP_DIR = `select @@tmpdir`;
+let $WL6501_DATA_DIR = `select @@datadir`;
+let SEARCH_FILE = $MYSQLTEST_VARDIR/log/my_restart.err;
+
+#-----------------------------------------------------------------------------
+#
+# 1. Hit crash point on completing drop of all indexes before creation of index
+#    is commenced.
+#
+--echo "1. Hit crash point on completing drop of all indexes before creation"
+--echo "   of index is commenced."
+eval set global innodb_file_per_table = $wl6501_file_per_table;
+eval set global innodb_file_format = $wl6501_file_format;
+set innodb_strict_mode=off;
+--disable_warnings
+eval create $wl6501_temp table t (
+	i int, f float, c char,
+	primary key pk(i), unique findex(f), index ck(c))
+	engine = innodb row_format = $wl6501_row_fmt
+	key_block_size = $wl6501_kbs;
+--enable_warnings
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+check table t;
+#
+set session debug = "+d,ib_trunc_crash_drop_reinit_done_create_to_start";
+--source include/expect_crash.inc
+--error 2013
+truncate table t;
+#
+--source include/start_mysqld.inc
+check table t;
+
+#-----------------------------------------------------------------------------
+#
+# 2. Hit crash point after data is updated to system-table and in-memory dict.
+#
+--echo "2. Hit crash point after data is updated to system-table and"
+--echo "   in-memory dict."
+eval set global innodb_file_per_table = $wl6501_file_per_table;
+eval set global innodb_file_format = $wl6501_file_format;
+set innodb_strict_mode=off;
+--disable_warnings
+eval create $wl6501_temp table t (
+	i int, f float, c char,
+	primary key pk(i), unique findex(f), index ck(c))
+	engine = innodb row_format = $wl6501_row_fmt
+	key_block_size = $wl6501_kbs;
+--enable_warnings
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+check table t;
+#
+set session debug = "+d,ib_trunc_crash_on_updating_dict_sys_info";
+--source include/expect_crash.inc
+--error 2013
+truncate table t;
+#
+--source include/start_mysqld.inc
+check table t;
+
+#-----------------------------------------------------------------------------
+#
+# remove test-bed
+#
+eval set global innodb_file_format = $format;
+eval set global innodb_file_per_table = $per_table;
diff --git a/mysql-test/suite/innodb/include/show_i_s_tablespaces.inc b/mysql-test/suite/innodb/include/show_i_s_tablespaces.inc
new file mode 100644
index 00000000000..a79bc3c01a8
--- /dev/null
+++ b/mysql-test/suite/innodb/include/show_i_s_tablespaces.inc
@@ -0,0 +1,38 @@
+# This script assumes that the caller did the following;
+#     LET $MYSQLD_DATADIR = `select @@datadir`;
+#     LET $INNODB_PAGE_SIZE = `select @@innodb_page_size`;
+--echo === information_schema.innodb_sys_tablespaces and innodb_sys_datafiles ===
+--disable_query_log
+--replace_regex  /#P#/#p#/  /#SP#/#sp#/
+--replace_result ./ MYSQLD_DATADIR/  $MYSQLD_DATADIR/ MYSQLD_DATADIR/  $MYSQLD_DATADIR MYSQLD_DATADIR/  $MYSQL_TMP_DIR MYSQL_TMP_DIR  $INNODB_PAGE_SIZE DEFAULT
+SELECT s.name 'Space_Name',
+	s.space_type 'Space_Type',
+	s.page_size 'Page_Size',
+	s.zip_page_size 'Zip_Size',
+	s.row_format 'Formats_Permitted',
+	d.path 'Path'
+	FROM information_schema.innodb_sys_tablespaces s,
+	     information_schema.innodb_sys_datafiles d
+	WHERE s.space = d.space
+	AND s.name NOT LIKE 'mysql/%'
+	AND s.name NOT LIKE 'sys/%'
+	ORDER BY s.space;
+
+# This SELECT will not show UNDO or TEMPORARY tablespaces since
+# they are only in FILES, not SYS_TABLESPACES.
+--echo === information_schema.files ===
+--replace_regex  /innodb_file_per_table.[0-9]+/innodb_file_per_table.##/   /#P#/#p#/  /#SP#/#sp#/
+--replace_result ./ MYSQLD_DATADIR/  $MYSQLD_DATADIR/ MYSQLD_DATADIR/  $MYSQLD_DATADIR MYSQLD_DATADIR/  $MYSQL_TMP_DIR MYSQL_TMP_DIR  $INNODB_PAGE_SIZE DEFAULT
+SELECT 	s.name 'Space_Name',
+	f.file_type 'File_Type',
+	f.engine 'Engine',
+	f.status 'Status',
+	f.tablespace_name 'Tablespace_Name',
+	f.file_name 'Path'
+	FROM information_schema.files f,
+	     information_schema.innodb_sys_tablespaces s
+	WHERE f.file_id = s.space
+	AND s.name NOT LIKE 'mysql/%'
+	AND s.name NOT LIKE 'sys/%'
+	ORDER BY f.file_id;
+--enable_query_log
diff --git a/mysql-test/suite/innodb/r/auto_increment_dup.result b/mysql-test/suite/innodb/r/auto_increment_dup.result
index 25a0a072c24..fa0921b57a5 100644
--- a/mysql-test/suite/innodb/r/auto_increment_dup.result
+++ b/mysql-test/suite/innodb/r/auto_increment_dup.result
@@ -1,4 +1,139 @@
 drop table if exists t1;
+set global transaction isolation level repeatable read;
+CREATE TABLE t1(
+id INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
+k INT,
+c CHAR(1),
+UNIQUE KEY(k)) ENGINE=InnoDB;
+SHOW CREATE TABLE t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `id` int(11) NOT NULL AUTO_INCREMENT,
+  `k` int(11) DEFAULT NULL,
+  `c` char(1) DEFAULT NULL,
+  PRIMARY KEY (`id`),
+  UNIQUE KEY `k` (`k`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+#
+# Sequential execution
+#
+INSERT INTO t1(k) VALUES (1), (2), (3) ON DUPLICATE KEY UPDATE c='1';
+affected rows: 3
+info: Records: 3  Duplicates: 0  Warnings: 0
+#
+# 1 duplicate
+#
+INSERT INTO t1(k) VALUES (2), (4), (5) ON DUPLICATE KEY UPDATE c='2';
+affected rows: 4
+info: Records: 3  Duplicates: 1  Warnings: 0
+#
+# 5 rows, consecutive auto_inc values
+#
+SELECT * FROM t1 order by k;
+id	k	c
+1	1	NULL
+2	2	2
+3	3	NULL
+4	4	NULL
+5	5	NULL
+affected rows: 5
+DROP TABLE t1;
+affected rows: 0
+CREATE TABLE t1(
+id INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
+k INT,
+c CHAR(1),
+UNIQUE KEY(k)) ENGINE=InnoDB;
+affected rows: 0
+#
+# Sequential execution 2
+#
+INSERT INTO t1(k) VALUES (2), (4), (5) ON DUPLICATE KEY UPDATE c='2';
+affected rows: 3
+info: Records: 3  Duplicates: 0  Warnings: 0
+#
+# 1 duplicate
+#
+INSERT INTO t1(k) VALUES (1), (2), (3) ON DUPLICATE KEY UPDATE c='1';
+affected rows: 4
+info: Records: 3  Duplicates: 1  Warnings: 0
+#
+# 5 rows, consecutive auto_inc values
+#
+SELECT * FROM t1 order by k;
+id	k	c
+4	1	NULL
+1	2	1
+5	3	NULL
+2	4	NULL
+3	5	NULL
+affected rows: 5
+DROP TABLE t1;
+affected rows: 0
+CREATE TABLE t1(
+id INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
+k INT,
+c CHAR(1),
+UNIQUE KEY(k)) ENGINE=InnoDB;
+affected rows: 0
+#
+# Parallel execution
+#
+connect con1, localhost, root;
+connect con2, localhost, root;
+SET DEBUG_SYNC='now WAIT_FOR write_row_done';
+connection con1;
+#
+# Connection 1
+#
+SET DEBUG_SYNC='ha_write_row_end SIGNAL write_row_done WAIT_FOR continue';
+affected rows: 0
+INSERT INTO t1(k) VALUES (1), (2), (3) ON DUPLICATE KEY UPDATE c='1';
+connection con2;
+#
+# Connection 2
+#
+affected rows: 0
+SET DEBUG_SYNC='execute_command_after_close_tables SIGNAL continue';
+affected rows: 0
+INSERT INTO t1(k) VALUES (2), (4), (5) ON DUPLICATE KEY UPDATE c='2';
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+connection con1;
+#
+# 2 duplicates
+#
+affected rows: 3
+info: Records: 3  Duplicates: 0  Warnings: 0
+connection default;
+#
+# 3 rows
+#
+SELECT * FROM t1 order by k;
+id	k	c
+1	1	NULL
+2	2	NULL
+3	3	NULL
+affected rows: 3
+INSERT INTO t1(k) VALUES (2), (4), (5) ON DUPLICATE KEY UPDATE c='2';
+affected rows: 4
+info: Records: 3  Duplicates: 1  Warnings: 0
+SELECT * FROM t1 order by k;
+id	k	c
+1	1	NULL
+2	2	2
+3	3	NULL
+7	4	NULL
+8	5	NULL
+affected rows: 5
+disconnect con1;
+disconnect con2;
+connection default;
+DROP TABLE t1;
+#
+# Parallel test with read_committed
+#
+set global transaction isolation level read committed;
+drop table if exists t1;
 CREATE TABLE t1(
 id INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
 k INT,
@@ -7,10 +142,16 @@ UNIQUE KEY(k)) ENGINE=InnoDB;
 connect con1, localhost, root;
 connect con2, localhost, root;
 connection con1;
+#
+# Connection 1
+#
 SET DEBUG_SYNC='ha_write_row_end SIGNAL continue2 WAIT_FOR continue1';
 affected rows: 0
 INSERT INTO t1(k) VALUES (1), (2), (3) ON DUPLICATE KEY UPDATE c='1';
 connection con2;
+#
+# Connection 2
+#
 SET DEBUG_SYNC='ha_write_row_start WAIT_FOR continue2';
 affected rows: 0
 SET DEBUG_SYNC='after_mysql_insert SIGNAL continue1';
@@ -22,6 +163,9 @@ connection con1;
 affected rows: 4
 info: Records: 3  Duplicates: 1  Warnings: 0
 SET DEBUG_SYNC='RESET';
+#
+# 5 rows, gap in autoinc values
+#
 SELECT * FROM t1 ORDER BY k;
 id	k	c
 1	1	NULL
@@ -33,3 +177,4 @@ disconnect con1;
 disconnect con2;
 connection default;
 DROP TABLE t1;
+set global transaction isolation level repeatable read;
diff --git a/mysql-test/suite/innodb/r/group_commit_crash.result b/mysql-test/suite/innodb/r/group_commit_crash.result
index 5d5dffab33e..80a780ba2c5 100644
--- a/mysql-test/suite/innodb/r/group_commit_crash.result
+++ b/mysql-test/suite/innodb/r/group_commit_crash.result
@@ -124,3 +124,5 @@ delete from t1;
 DROP TABLE t1;
 DROP TABLE t2;
 DROP PROCEDURE setcrash;
+Warnings:
+Warning	131	Using innodb_file_format_max is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
diff --git a/mysql-test/suite/innodb/r/group_commit_crash_no_optimize_thread.result b/mysql-test/suite/innodb/r/group_commit_crash_no_optimize_thread.result
index 542ce9d496e..06fdeaef6a7 100644
--- a/mysql-test/suite/innodb/r/group_commit_crash_no_optimize_thread.result
+++ b/mysql-test/suite/innodb/r/group_commit_crash_no_optimize_thread.result
@@ -124,3 +124,5 @@ delete from t1;
 DROP TABLE t1;
 DROP TABLE t2;
 DROP PROCEDURE setcrash;
+Warnings:
+Warning	131	Using innodb_file_format_max is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
diff --git a/mysql-test/suite/innodb/r/help_url.result b/mysql-test/suite/innodb/r/help_url.result
index 10affe78f0c..87530bc2f7d 100644
--- a/mysql-test/suite/innodb/r/help_url.result
+++ b/mysql-test/suite/innodb/r/help_url.result
@@ -1,4 +1,2 @@
 create table innodb_table_monitor (a int) engine=InnoDB;
-Warnings:
-Warning	131	Using the table name innodb_table_monitor to enable diagnostic output is deprecated and may be removed in future releases. Use INFORMATION_SCHEMA or PERFORMANCE_SCHEMA tables or SET GLOBAL innodb_status_output=ON.
 drop table innodb_table_monitor;
diff --git a/mysql-test/suite/innodb/r/innodb-16k.result b/mysql-test/suite/innodb/r/innodb-16k.result
index 0537315122c..adfbc97ea66 100644
--- a/mysql-test/suite/innodb/r/innodb-16k.result
+++ b/mysql-test/suite/innodb/r/innodb-16k.result
@@ -1,5 +1,11 @@
+call mtr.add_suppression("InnoDB: Cannot add field .* in table .* because after adding it, the row size is .* which is greater than maximum allowed size (.*) for a record on index leaf page.");
 SET GLOBAL innodb_file_format = `Barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SET GLOBAL innodb_file_per_table = ON;
+SET GLOBAL innodb_large_prefix = OFF;
+Warnings:
+Warning	131	Using innodb_large_prefix is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 # Test 1) Show the page size from Information Schema
 SELECT variable_value FROM information_schema.global_status
 WHERE LOWER(variable_name) = 'innodb_page_size';
@@ -224,6 +230,8 @@ table_name	row_format	create_options
 t1	Compressed	row_format=COMPRESSED
 DROP TABLE t1;
 SET GLOBAL innodb_file_format = `Barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 CREATE TABLE t2(d varchar(17) PRIMARY KEY) ENGINE=innodb DEFAULT CHARSET=utf8;
 CREATE TABLE t3(a int PRIMARY KEY) ENGINE=innodb;
 INSERT INTO t3 VALUES (22),(44),(33),(55),(66);
@@ -368,11 +376,6 @@ UPDATE t1 SET s=@e;
 CREATE INDEX t1t ON t1 (t(767));
 UPDATE t1 SET t=@e;
 ERROR HY000: Undo log record is too big.
-CREATE INDEX t1u ON t1 (u(767));
-ERROR 42000: Row size too large. The maximum row size for the used table type, not counting BLOBs, is 8126. This includes storage overhead, check the manual. You have to change some columns to TEXT or BLOBs
-CREATE INDEX t1ut ON t1 (u(767), t(767));
-ERROR 42000: Row size too large. The maximum row size for the used table type, not counting BLOBs, is 8126. This includes storage overhead, check the manual. You have to change some columns to TEXT or BLOBs
-CREATE INDEX t1st ON t1 (s(767), t(767));
 SHOW CREATE TABLE t1;
 Table	Create Table
 t1	CREATE TABLE `t1` (
@@ -416,11 +419,12 @@ t1	CREATE TABLE `t1` (
   KEY `t1q` (`q`(767)),
   KEY `t1r` (`r`(767)),
   KEY `t1s` (`s`(767)),
-  KEY `t1t` (`t`(767)),
-  KEY `t1st` (`s`(767),`t`(767))
+  KEY `t1t` (`t`(767))
 ) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC
 DROP TABLE t1;
 SET GLOBAL innodb_file_format = `Barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SET GLOBAL innodb_file_per_table = ON;
 Test an assertion failure on purge.
 CREATE TABLE t1_purge (
@@ -464,6 +468,8 @@ DELETE FROM t3_purge;
 DELETE FROM t4_purge;
 SET GLOBAL innodb_file_per_table=on;
 SET GLOBAL innodb_file_format='Barracuda';
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SET @r=REPEAT('a',500);
 CREATE TABLE tlong(a int,
 v1 varchar(500), v2 varchar(500), v3 varchar(500),
@@ -552,7 +558,7 @@ SHOW WARNINGS;
 Level	Code	Message
 Error	1713	Undo log record is too big.
 DROP TABLE bug12547647;
-SET SESSION innodb_strict_mode = off;
+SET SESSION innodb_strict_mode = on;
 CREATE TABLE t1(
 c text NOT NULL, d text NOT NULL,
 PRIMARY KEY (c(767),d(767)))
@@ -977,3 +983,7 @@ COL196 TEXT,
 COL197 TEXT)
 row_format=compact,ENGINE=INNODB;
 ERROR 42000: Row size too large (> 8126). Changing some columns to TEXT or BLOB or using ROW_FORMAT=DYNAMIC or ROW_FORMAT=COMPRESSED may help. In current row format, BLOB prefix of 768 bytes is stored inline.
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+Warnings:
+Warning	131	Using innodb_large_prefix is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
diff --git a/mysql-test/suite/innodb/r/innodb-alter-discard.result b/mysql-test/suite/innodb/r/innodb-alter-discard.result
index 29712868239..b05b905bc34 100644
--- a/mysql-test/suite/innodb/r/innodb-alter-discard.result
+++ b/mysql-test/suite/innodb/r/innodb-alter-discard.result
@@ -1,5 +1,12 @@
+call mtr.add_suppression("InnoDB: Cannot open datafile for read-only: .*");
 SET GLOBAL innodb_file_per_table=1;
 CREATE TABLE t(a INT)ENGINE=InnoDB;
+call mtr.add_suppression("InnoDB: Operating system error number .* in a file operation.");
+call mtr.add_suppression("InnoDB: The error means the system cannot find the path specified.");
+call mtr.add_suppression("InnoDB: If you are installing InnoDB, remember that you must create directories yourself, InnoDB does not create them.");
+call mtr.add_suppression("InnoDB: Cannot open datafile for read-only: './test/t.ibd' OS error: .*");
+call mtr.add_suppression("InnoDB: Ignoring tablespace `test/t` because it could not be opened.");
+call mtr.add_suppression("InnoDB: Cannot calculate statistics for table .* because the .ibd file is missing. Please refer to .*");
 call mtr.add_suppression("InnoDB: Error: trying to open a table, but could not$");
 call mtr.add_suppression("MySQL is trying to open a table handle but the \.ibd file for$");
 call mtr.add_suppression("InnoDB: Table 'test/t'$");
@@ -17,5 +24,5 @@ ERROR 42S02: Table 'test.t1' doesn't exist
 ALTER TABLE t DISCARD TABLESPACE;
 Warnings:
 Warning	1812	Tablespace is missing for table 'test/t'
-Warning	1812	Tablespace is missing for table 't'
+Warning	1812	Tablespace is missing for table 'test/t'
 DROP TABLE t;
diff --git a/mysql-test/suite/innodb/r/innodb-blob.result b/mysql-test/suite/innodb/r/innodb-blob.result
index ec5a4a8b0ac..fe4b1908fcb 100644
--- a/mysql-test/suite/innodb/r/innodb-blob.result
+++ b/mysql-test/suite/innodb/r/innodb-blob.result
@@ -1,3 +1,4 @@
+call mtr.add_suppression("InnoDB: The log sequence numbers [0-9]+ and [0-9]+ in ibdata files do not match the log sequence number [0-9]+ in the ib_logfiles!");
 CREATE TABLE t1 (a INT PRIMARY KEY, b TEXT) ENGINE=InnoDB;
 CREATE TABLE t2 (a INT PRIMARY KEY) ENGINE=InnoDB;
 CREATE TABLE t3 (a INT PRIMARY KEY, b TEXT, c TEXT) ENGINE=InnoDB;
@@ -18,7 +19,9 @@ a	RIGHT(b,20)
 1	aaaaaaaaaaaaaaaaaaaa
 2	bbbbbbbbbbbbbbbbbbbb
 connection default;
-SET DEBUG_DBUG='+d,row_ins_extern_checkpoint';
+SET DEBUG='+d,row_ins_extern_checkpoint';
+Warnings:
+Warning	1287	'@@debug' is deprecated and will be removed in a future release. Please use '@@debug_dbug' instead
 SET DEBUG_SYNC='before_row_ins_extern_latch SIGNAL rec_not_blob WAIT_FOR crash';
 ROLLBACK;
 BEGIN;
@@ -38,7 +41,9 @@ a
 1
 2
 3
-SET DEBUG_DBUG='+d,crash_commit_before';
+SET DEBUG='+d,crash_commit_before';
+Warnings:
+Warning	1287	'@@debug' is deprecated and will be removed in a future release. Please use '@@debug_dbug' instead
 INSERT INTO t2 VALUES (42);
 ERROR HY000: Lost connection to MySQL server during query
 disconnect con1;
@@ -51,18 +56,25 @@ test.t1	check	status	OK
 INSERT INTO t3 VALUES
 (1,REPEAT('d',7000),REPEAT('e',100)),
 (2,REPEAT('g',7000),REPEAT('h',100));
-SET DEBUG_SYNC='before_row_upd_extern SIGNAL have_latch WAIT_FOR go';
+SET DEBUG_SYNC='blob_write_middle SIGNAL go_sel WAIT_FOR go_upd';
 UPDATE t3 SET c=REPEAT('f',3000) WHERE a=1;
+# Connection con1:
 connect  con1,localhost,root,,;
-SET DEBUG_SYNC='now WAIT_FOR have_latch';
+SET DEBUG_SYNC='now WAIT_FOR go_sel';
 SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
 SELECT @@tx_isolation;
 @@tx_isolation
 READ-UNCOMMITTED
 SELECT a, RIGHT(b,20), RIGHT(c,20) FROM t3;
-connect  con2,localhost,root,,;
-SET DEBUG_SYNC='now SIGNAL go';
+a	RIGHT(b,20)	RIGHT(c,20)
+2	gggggggggggggggggggg	hhhhhhhhhhhhhhhhhhhh
+set debug_sync='now SIGNAL go_upd';
+# Connection default:
+connection default;
+# reap UPDATE t3 SET c=REPEAT('f',3000) WHERE a=1;
+# Connection con1:
 connection con1;
+SELECT a, RIGHT(b,20), RIGHT(c,20) FROM t3;
 a	RIGHT(b,20)	RIGHT(c,20)
 1	dddddddddddddddddddd	ffffffffffffffffffff
 2	gggggggggggggggggggg	hhhhhhhhhhhhhhhhhhhh
@@ -73,11 +85,13 @@ Table	Op	Msg_type	Msg_text
 test.t1	check	status	OK
 test.t2	check	status	OK
 test.t3	check	status	OK
-connection con2;
+connect  con2,localhost,root,,;
 BEGIN;
 INSERT INTO t2 VALUES (347);
 connection default;
-SET DEBUG_DBUG='+d,row_upd_extern_checkpoint';
+SET DEBUG='+d,row_upd_extern_checkpoint';
+Warnings:
+Warning	1287	'@@debug' is deprecated and will be removed in a future release. Please use '@@debug_dbug' instead
 SET DEBUG_SYNC='before_row_upd_extern SIGNAL have_latch WAIT_FOR crash';
 UPDATE t3 SET c=REPEAT('i',3000) WHERE a=2;
 connection con2;
@@ -86,7 +100,9 @@ SELECT info FROM information_schema.processlist
 WHERE state = 'debug sync point: before_row_upd_extern';
 info
 UPDATE t3 SET c=REPEAT('i',3000) WHERE a=2
-SET DEBUG_DBUG='+d,crash_commit_before';
+SET DEBUG='+d,crash_commit_before';
+Warnings:
+Warning	1287	'@@debug' is deprecated and will be removed in a future release. Please use '@@debug_dbug' instead
 COMMIT;
 ERROR HY000: Lost connection to MySQL server during query
 disconnect con2;
@@ -109,7 +125,9 @@ connect  con2,localhost,root,,;
 BEGIN;
 INSERT INTO t2 VALUES (33101);
 connection default;
-SET DEBUG_DBUG='+d,row_upd_extern_checkpoint';
+SET DEBUG='+d,row_upd_extern_checkpoint';
+Warnings:
+Warning	1287	'@@debug' is deprecated and will be removed in a future release. Please use '@@debug_dbug' instead
 SET DEBUG_SYNC='after_row_upd_extern SIGNAL have_latch WAIT_FOR crash';
 UPDATE t3 SET c=REPEAT('j',3000) WHERE a=2;
 connection con2;
@@ -118,7 +136,9 @@ SELECT info FROM information_schema.processlist
 WHERE state = 'debug sync point: after_row_upd_extern';
 info
 UPDATE t3 SET c=REPEAT('j',3000) WHERE a=2
-SET DEBUG_DBUG='+d,crash_commit_before';
+SET DEBUG='+d,crash_commit_before';
+Warnings:
+Warning	1287	'@@debug' is deprecated and will be removed in a future release. Please use '@@debug_dbug' instead
 COMMIT;
 ERROR HY000: Lost connection to MySQL server during query
 disconnect con2;
diff --git a/mysql-test/suite/innodb/r/innodb-bug-14068765.result b/mysql-test/suite/innodb/r/innodb-bug-14068765.result
index 7a8f959b995..f6d37b23114 100644
--- a/mysql-test/suite/innodb/r/innodb-bug-14068765.result
+++ b/mysql-test/suite/innodb/r/innodb-bug-14068765.result
@@ -38,5 +38,7 @@ COUNT(*)
 2
 DROP TABLE testdb_wl5522.t1;
 DROP DATABASE testdb_wl5522;
-SET GLOBAL INNODB_FILE_FORMAT=Antelope;
+SET GLOBAL INNODB_FILE_FORMAT=Barracuda;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SET GLOBAL INNODB_FILE_PER_TABLE=1;
diff --git a/mysql-test/suite/innodb/r/innodb-bug-14084530.result b/mysql-test/suite/innodb/r/innodb-bug-14084530.result
index 4b4f201300c..3ba8e0e8440 100644
--- a/mysql-test/suite/innodb/r/innodb-bug-14084530.result
+++ b/mysql-test/suite/innodb/r/innodb-bug-14084530.result
@@ -27,5 +27,7 @@ c1
 SET AUTOCOMMIT = 1;
 DROP TABLE testdb_wl5522.t1;
 DROP DATABASE testdb_wl5522;
-SET GLOBAL INNODB_FILE_FORMAT=Antelope;
+SET GLOBAL INNODB_FILE_FORMAT=Barracuda;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SET GLOBAL INNODB_FILE_PER_TABLE=1;
diff --git a/mysql-test/suite/innodb/r/innodb-change-buffer-recovery.result b/mysql-test/suite/innodb/r/innodb-change-buffer-recovery.result
index 508d578193f..5abfb8cffa7 100644
--- a/mysql-test/suite/innodb/r/innodb-change-buffer-recovery.result
+++ b/mysql-test/suite/innodb/r/innodb-change-buffer-recovery.result
@@ -2,13 +2,13 @@
 # Bug#69122 - INNODB DOESN'T REDO-LOG INSERT BUFFER MERGE
 #             OPERATION IF IT IS DONE IN-PLACE
 #
-SET GLOBAL innodb_change_buffering_debug = 1;
 CREATE TABLE t1(
 a INT AUTO_INCREMENT PRIMARY KEY,
 b CHAR(1),
 c INT,
 INDEX(b))
-ENGINE=InnoDB;
+ENGINE=InnoDB STATS_PERSISTENT=0;
+SET GLOBAL innodb_change_buffering_debug = 1;
 INSERT INTO t1 VALUES(0,'x',1);
 INSERT INTO t1 SELECT 0,b,c FROM t1;
 INSERT INTO t1 SELECT 0,b,c FROM t1;
@@ -23,7 +23,6 @@ INSERT INTO t1 SELECT 0,b,c FROM t1;
 INSERT INTO t1 SELECT 0,b,c FROM t1;
 INSERT INTO t1 SELECT 0,b,c FROM t1;
 INSERT INTO t1 SELECT 0,b,c FROM t1;
-INSERT INTO t1 SELECT 0,b,c FROM t1;
 BEGIN;
 SELECT b FROM t1 LIMIT 3;
 b
@@ -35,7 +34,9 @@ connection con1;
 BEGIN;
 DELETE FROM t1 WHERE a=1;
 INSERT INTO t1 VALUES(1,'X',1);
-SET DEBUG_DBUG='+d,crash_after_log_ibuf_upd_inplace';
+SET DEBUG='+d,crash_after_log_ibuf_upd_inplace';
+Warnings:
+Warning	1287	'@@debug' is deprecated and will be removed in a future release. Please use '@@debug_dbug' instead
 SELECT b FROM t1 LIMIT 3;
 ERROR HY000: Lost connection to MySQL server during query
 FOUND /Wrote log record for ibuf update in place operation/ in my_restart.err
diff --git a/mysql-test/suite/innodb/r/innodb-fk-warnings.result b/mysql-test/suite/innodb/r/innodb-fk-warnings.result
index d7c7acfb424..792cae85b55 100644
--- a/mysql-test/suite/innodb/r/innodb-fk-warnings.result
+++ b/mysql-test/suite/innodb/r/innodb-fk-warnings.result
@@ -16,7 +16,7 @@ CONSTRAINT test FOREIGN KEY (b) REFERENCES t2 (id)
 ERROR HY000: Can't create table `test`.`t2` (errno: 121 "Duplicate key on write or update")
 show warnings;
 Level	Code	Message
-Warning	121	Create or Alter table `test`.`t2` with foreign key constraint failed. Foreign key constraint `test/test` already exists on data dictionary. Foreign key constraint names need to be unique in database. Error in foreign key definition: CONSTRAINT `test` FOREIGN KEY (`b`) REFERENCES `test`.`t2` (`id`).
+Warning	121	Create or Alter table `test`.`t2` with foreign key constraint failed. Foreign key constraint `test`.`test` already exists on data dictionary. Foreign key constraint names need to be unique in database. Error in foreign key definition: CONSTRAINT `test` FOREIGN KEY (`b`) REFERENCES `test`.`t2` (`id`).
 Error	1005	Can't create table `test`.`t2` (errno: 121 "Duplicate key on write or update")
 Warning	1022	Can't write; duplicate key in table 't2'
 drop table t1;
diff --git a/mysql-test/suite/innodb/r/innodb-index.result b/mysql-test/suite/innodb/r/innodb-index.result
index e6ee836ae13..7cb8252b5cc 100644
--- a/mysql-test/suite/innodb/r/innodb-index.result
+++ b/mysql-test/suite/innodb/r/innodb-index.result
@@ -1,5 +1,7 @@
 set global innodb_file_per_table=on;
 set global innodb_file_format='Barracuda';
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS;
 create table t1(a varchar(2) primary key) engine=innodb;
 insert into t1 values('');
@@ -467,9 +469,9 @@ ERROR HY000: Cannot drop column 'b': needed in a foreign key constraint 't2_ibfk
 alter table t2 DROP COLUMN b;
 ERROR HY000: Cannot drop column 'b': needed in a foreign key constraint 'test/t2_ibfk_1'
 alter table t1 DROP COLUMN b, ALGORITHM=COPY;
-ERROR HY000: Cannot drop column 'b': needed in a foreign key constraint 't2_ibfk_1' of table 'test.t2'
+ERROR HY000: Cannot drop column 'b': needed in a foreign key constraint 't2_ibfk_1' of table `test`.`t2`
 alter table t1 DROP COLUMN b;
-ERROR HY000: Cannot drop column 'b': needed in a foreign key constraint 'test/t2_ibfk_1' of table '"test"."t2"'
+ERROR HY000: Cannot drop column 'b': needed in a foreign key constraint 'test/t2_ibfk_1' of table `test`.`t2`
 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS;
 create unique index dc on t2 (d,c);
 affected rows: 0
@@ -857,8 +859,12 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	SIMPLE	t1	index	NULL	PRIMARY	4	NULL	2	Using index
 drop table t1;
 set global innodb_file_per_table=1;
-set global innodb_file_format=Antelope;
-set global innodb_file_format_max=Antelope;
+set global innodb_file_format=Barracuda;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+set global innodb_file_format_max=Barracuda;
+Warnings:
+Warning	131	Using innodb_file_format_max is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0;
 SET FOREIGN_KEY_CHECKS=0;
 CREATE TABLE t1(
@@ -1189,3 +1195,7 @@ t2c	CREATE TABLE `t2c` (
   KEY `t2a` (`a`)
 ) ENGINE=InnoDB DEFAULT CHARSET=latin1
 DROP TABLE t1,t2,t2c,t2i;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+Warnings:
+Warning	131	Using innodb_file_format_max is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
diff --git a/mysql-test/suite/innodb/r/innodb-mdev-7408.result b/mysql-test/suite/innodb/r/innodb-mdev-7408.result
index 8f6ad139192..80b46d3425c 100644
--- a/mysql-test/suite/innodb/r/innodb-mdev-7408.result
+++ b/mysql-test/suite/innodb/r/innodb-mdev-7408.result
@@ -1,3 +1,4 @@
+call mtr.add_suppression("InnoDB: User stopword table .* does not exist.");
 select @@global.innodb_ft_server_stopword_table;
 @@global.innodb_ft_server_stopword_table
 NULL
diff --git a/mysql-test/suite/innodb/r/innodb-mdev-7513.result b/mysql-test/suite/innodb/r/innodb-mdev-7513.result
index bb3531e3f90..55b4d3462b0 100644
--- a/mysql-test/suite/innodb/r/innodb-mdev-7513.result
+++ b/mysql-test/suite/innodb/r/innodb-mdev-7513.result
@@ -1,3 +1,4 @@
+call mtr.add_suppression("InnoDB: Cannot add field `.* in table .* because after adding it, the row size is .* which is greater than maximum allowed size (.*) for a record on index leaf page.");
 call mtr.add_suppression("Row size too large (> 8126)*");
 CREATE TABLE t1 ( text1 TEXT,
 text2 TEXT,
diff --git a/mysql-test/suite/innodb/r/innodb-page_compression_bzip2.result b/mysql-test/suite/innodb/r/innodb-page_compression_bzip2.result
index 8d3bc063a71..a566c94bd3b 100644
--- a/mysql-test/suite/innodb/r/innodb-page_compression_bzip2.result
+++ b/mysql-test/suite/innodb/r/innodb-page_compression_bzip2.result
@@ -1,4 +1,6 @@
 set global innodb_file_format = `barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 set global innodb_file_per_table = on;
 set global innodb_compression_algorithm = 5;
 create table innodb_compressed(c1 int, b char(20)) engine=innodb row_format=compressed key_block_size=8;
@@ -435,3 +437,5 @@ drop table innodb_page_compressed6;
 drop table innodb_page_compressed7;
 drop table innodb_page_compressed8;
 drop table innodb_page_compressed9;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
diff --git a/mysql-test/suite/innodb/r/innodb-page_compression_lz4.result b/mysql-test/suite/innodb/r/innodb-page_compression_lz4.result
index eeab2622cb6..76cd5b16f28 100644
--- a/mysql-test/suite/innodb/r/innodb-page_compression_lz4.result
+++ b/mysql-test/suite/innodb/r/innodb-page_compression_lz4.result
@@ -1,4 +1,6 @@
 set global innodb_file_format = `barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 set global innodb_file_per_table = on;
 set global innodb_compression_algorithm = 2;
 create table innodb_compressed(c1 int, b char(20)) engine=innodb row_format=compressed key_block_size=8;
@@ -436,3 +438,5 @@ drop table innodb_page_compressed6;
 drop table innodb_page_compressed7;
 drop table innodb_page_compressed8;
 drop table innodb_page_compressed9;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
diff --git a/mysql-test/suite/innodb/r/innodb-page_compression_lzma.result b/mysql-test/suite/innodb/r/innodb-page_compression_lzma.result
index d340801b656..cceff820ee0 100644
--- a/mysql-test/suite/innodb/r/innodb-page_compression_lzma.result
+++ b/mysql-test/suite/innodb/r/innodb-page_compression_lzma.result
@@ -1,4 +1,6 @@
 set global innodb_file_format = `barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 set global innodb_file_per_table = on;
 set global innodb_compression_algorithm = 4;
 create table innodb_compressed(c1 int, b char(20)) engine=innodb row_format=compressed key_block_size=8;
@@ -435,3 +437,5 @@ drop table innodb_page_compressed6;
 drop table innodb_page_compressed7;
 drop table innodb_page_compressed8;
 drop table innodb_page_compressed9;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
diff --git a/mysql-test/suite/innodb/r/innodb-page_compression_lzo.result b/mysql-test/suite/innodb/r/innodb-page_compression_lzo.result
index fdbc99f60d9..1a9235fab62 100644
--- a/mysql-test/suite/innodb/r/innodb-page_compression_lzo.result
+++ b/mysql-test/suite/innodb/r/innodb-page_compression_lzo.result
@@ -1,4 +1,6 @@
 set global innodb_file_format = `barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 set global innodb_file_per_table = on;
 set global innodb_compression_algorithm = 3;
 create table innodb_compressed(c1 int, b char(20)) engine=innodb row_format=compressed key_block_size=8;
@@ -349,3 +351,5 @@ drop table innodb_page_compressed6;
 drop table innodb_page_compressed7;
 drop table innodb_page_compressed8;
 drop table innodb_page_compressed9;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
diff --git a/mysql-test/suite/innodb/r/innodb-page_compression_snappy.result b/mysql-test/suite/innodb/r/innodb-page_compression_snappy.result
index a0b2f947fd3..e1d46b04a7f 100644
--- a/mysql-test/suite/innodb/r/innodb-page_compression_snappy.result
+++ b/mysql-test/suite/innodb/r/innodb-page_compression_snappy.result
@@ -1,5 +1,7 @@
 call mtr.add_suppression("Compression failed for space*");
 set global innodb_file_format = `barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 set global innodb_file_per_table = on;
 set global innodb_compression_algorithm = 6;
 create table innodb_compressed(c1 int, b char(20)) engine=innodb row_format=compressed key_block_size=8;
@@ -436,3 +438,5 @@ drop table innodb_page_compressed6;
 drop table innodb_page_compressed7;
 drop table innodb_page_compressed8;
 drop table innodb_page_compressed9;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
diff --git a/mysql-test/suite/innodb/r/innodb-page_compression_tables.result b/mysql-test/suite/innodb/r/innodb-page_compression_tables.result
index 98de5db3c12..a0ac8986b9e 100644
--- a/mysql-test/suite/innodb/r/innodb-page_compression_tables.result
+++ b/mysql-test/suite/innodb/r/innodb-page_compression_tables.result
@@ -1,4 +1,6 @@
 SET GLOBAL innodb_file_format = `Barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SET GLOBAL innodb_file_per_table = ON;
 set global innodb_compression_algorithm = 1;
 create table innodb_normal(c1 bigint not null, b char(200)) engine=innodb;
@@ -91,6 +93,8 @@ select count(*) from innodb_dynamic where c1 < 1500000;
 count(*)
 5000
 SET GLOBAL innodb_file_format = `Barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SET GLOBAL innodb_file_per_table = ON;
 set global innodb_compression_algorithm = 0;
 alter table innodb_compact engine=innodb page_compressed=DEFAULT;
@@ -119,3 +123,5 @@ drop procedure innodb_insert_proc;
 drop table innodb_normal;
 drop table innodb_compact;
 drop table innodb_dynamic;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
diff --git a/mysql-test/suite/innodb/r/innodb-page_compression_zip.result b/mysql-test/suite/innodb/r/innodb-page_compression_zip.result
index 4c3ab273b2e..a8078c02e13 100644
--- a/mysql-test/suite/innodb/r/innodb-page_compression_zip.result
+++ b/mysql-test/suite/innodb/r/innodb-page_compression_zip.result
@@ -1,4 +1,6 @@
 SET GLOBAL innodb_file_format = `Barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SET GLOBAL innodb_file_per_table = ON;
 set global innodb_compression_algorithm = 1;
 create table innodb_compressed(c1 int, b char(20)) engine=innodb row_format=compressed key_block_size=8;
@@ -349,3 +351,5 @@ drop table innodb_page_compressed6;
 drop table innodb_page_compressed7;
 drop table innodb_page_compressed8;
 drop table innodb_page_compressed9;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
diff --git a/mysql-test/suite/innodb/r/innodb-virtual-columns.result b/mysql-test/suite/innodb/r/innodb-virtual-columns.result
index e613f76d5bf..9837f567954 100644
--- a/mysql-test/suite/innodb/r/innodb-virtual-columns.result
+++ b/mysql-test/suite/innodb/r/innodb-virtual-columns.result
@@ -23,6 +23,22 @@ deg_start_term char(4) NOT NULL DEFAULT '' COMMENT 'Educated guess at the beginn
 deg_as_of_term char(4) NOT NULL COMMENT 'In most cases also end term', 
 CONSTRAINT grad_degree_stu_plan_admit_pky PRIMARY KEY (student_id, plan, admit_term)
 ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
+SHOW CREATE TABLE grad_degree;
+Table	Create Table
+grad_degree	CREATE TABLE `grad_degree` (
+  `student_id` int(8) unsigned NOT NULL,
+  `plan` varchar(10) NOT NULL,
+  `admit_term` char(4) NOT NULL,
+  `wdraw_rsn` varchar(4) NOT NULL DEFAULT '',
+  `ofis_deg_status` varchar(15) AS (CASE
+WHEN wdraw_rsn = '' THEN 'In progress'
+      WHEN wdraw_rsn = 'DCMP' OR wdraw_rsn = 'TRDC' THEN 'Completed'
+      ELSE 'Not Completed'
+    END) VIRTUAL,
+  `deg_start_term` char(4) NOT NULL DEFAULT '' COMMENT 'Educated guess at the beginning of the data',
+  `deg_as_of_term` char(4) NOT NULL COMMENT 'In most cases also end term',
+  PRIMARY KEY (`student_id`,`plan`,`admit_term`)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8
 CREATE INDEX grad_degree_wdraw_rsn_ndx ON grad_degree (wdraw_rsn);
 CREATE INDEX grad_degree_as_of_term_ndx ON grad_degree (deg_as_of_term);
 INSERT IGNORE grad_degree (
@@ -117,6 +133,57 @@ deg_start_term char(4) NOT NULL DEFAULT '' COMMENT 'Educated guess at the beginn
 deg_as_of_term char(4) NOT NULL COMMENT 'In most cases also end term', 
 CONSTRAINT grad_degree_stu_plan_admit_pky PRIMARY KEY (student_id, plan, admit_term)
 ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
+SHOW CREATE TABLE grad_degree;
+Table	Create Table
+grad_degree	CREATE TABLE `grad_degree` (
+  `student_id` int(8) unsigned NOT NULL,
+  `plan` varchar(10) NOT NULL,
+  `admit_term` char(4) NOT NULL,
+  `wdraw_rsn` varchar(4) NOT NULL DEFAULT '',
+  `ofis_deg_status` varchar(15) AS (CASE
+WHEN wdraw_rsn = '' THEN 'In progress'
+      WHEN wdraw_rsn = 'DCMP' OR wdraw_rsn = 'TRDC' THEN 'Completed'
+      ELSE 'Not Completed'
+    END) VIRTUAL,
+  `ofis_deg_status2` varchar(15) AS (CASE
+WHEN wdraw_rsn = '' THEN 'In progress2'
+      WHEN wdraw_rsn = 'DCMP' OR wdraw_rsn = 'TRDC' THEN 'Completed2'
+      ELSE 'Not Completed2'
+    END) VIRTUAL,
+  `ofis_deg_status3` varchar(15) AS (CASE
+WHEN wdraw_rsn = '' THEN 'In progress3'
+      WHEN wdraw_rsn = 'DCMP' OR wdraw_rsn = 'TRDC' THEN 'Completed3'
+      ELSE 'Not Completed3'
+    END) VIRTUAL,
+  `ofis_deg_status4` varchar(15) AS (CASE
+WHEN wdraw_rsn = '' THEN 'In progress4'
+      WHEN wdraw_rsn = 'DCMP' OR wdraw_rsn = 'TRDC' THEN 'Completed4'
+      ELSE 'Not Completed4'
+    END) VIRTUAL,
+  `ofis_deg_status5` varchar(15) AS (CASE
+WHEN wdraw_rsn = '' THEN 'In progress5'
+      WHEN wdraw_rsn = 'DCMP' OR wdraw_rsn = 'TRDC' THEN 'Completed5'
+      ELSE 'Not Completed5'
+    END) VIRTUAL,
+  `ofis_deg_status6` varchar(15) AS (CASE
+WHEN wdraw_rsn = '' THEN 'In progress6'
+      WHEN wdraw_rsn = 'DCMP' OR wdraw_rsn = 'TRDC' THEN 'Completed6'
+      ELSE 'Not Completed6'
+    END) VIRTUAL,
+  `ofis_deg_status7` varchar(15) AS (CASE
+WHEN wdraw_rsn = '' THEN 'In progress7'
+      WHEN wdraw_rsn = 'DCMP' OR wdraw_rsn = 'TRDC' THEN 'Completed7'
+      ELSE 'Not Completed7'
+    END) VIRTUAL,
+  `ofis_deg_status8` varchar(15) AS (CASE
+WHEN wdraw_rsn = '' THEN 'In progress8'
+      WHEN wdraw_rsn = 'DCMP' OR wdraw_rsn = 'TRDC' THEN 'Completed8'
+      ELSE 'Not Completed8'
+    END) VIRTUAL,
+  `deg_start_term` char(4) NOT NULL DEFAULT '' COMMENT 'Educated guess at the beginning of the data',
+  `deg_as_of_term` char(4) NOT NULL COMMENT 'In most cases also end term',
+  PRIMARY KEY (`student_id`,`plan`,`admit_term`)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8
 CREATE INDEX grad_degree_wdraw_rsn_ndx ON grad_degree (wdraw_rsn);
 CREATE INDEX grad_degree_as_of_term_ndx ON grad_degree (deg_as_of_term);
 INSERT IGNORE grad_degree (
@@ -264,6 +331,57 @@ deg_start_term char(4) NOT NULL DEFAULT '' COMMENT 'Educated guess at the beginn
 deg_as_of_term char(4) NOT NULL COMMENT 'In most cases also end term', 
 CONSTRAINT grad_degree_stu_plan_admit_pky PRIMARY KEY (student_id, plan, admit_term)
 ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
+SHOW CREATE TABLE grad_degree;
+Table	Create Table
+grad_degree	CREATE TABLE `grad_degree` (
+  `student_id` int(8) unsigned NOT NULL,
+  `plan` varchar(10) NOT NULL,
+  `admit_term` char(4) NOT NULL,
+  `wdraw_rsn` varchar(4) NOT NULL DEFAULT '',
+  `ofis_deg_status` varchar(15) AS (CASE
+WHEN wdraw_rsn = '' THEN 'In progress'
+      WHEN wdraw_rsn = 'DCMP' OR wdraw_rsn = 'TRDC' THEN 'Completed'
+      ELSE 'Not Completed'
+    END) VIRTUAL,
+  `ofis_deg_status2` varchar(15) AS (CASE
+WHEN wdraw_rsn = '' THEN 'In progress2'
+      WHEN wdraw_rsn = 'DCMP' OR wdraw_rsn = 'TRDC' THEN 'Completed2'
+      ELSE 'Not Completed2'
+    END) VIRTUAL,
+  `ofis_deg_status3` varchar(15) AS (CASE
+WHEN wdraw_rsn = '' THEN 'In progress3'
+      WHEN wdraw_rsn = 'DCMP' OR wdraw_rsn = 'TRDC' THEN 'Completed3'
+      ELSE 'Not Completed3'
+    END) VIRTUAL,
+  `ofis_deg_status4` varchar(15) AS (CASE
+WHEN wdraw_rsn = '' THEN 'In progress4'
+      WHEN wdraw_rsn = 'DCMP' OR wdraw_rsn = 'TRDC' THEN 'Completed4'
+      ELSE 'Not Completed4'
+    END) VIRTUAL,
+  `ofis_deg_status5` varchar(15) AS (CASE
+WHEN wdraw_rsn = '' THEN 'In progress5'
+      WHEN wdraw_rsn = 'DCMP' OR wdraw_rsn = 'TRDC' THEN 'Completed5'
+      ELSE 'Not Completed5'
+    END) VIRTUAL,
+  `ofis_deg_status6` varchar(15) AS (CASE
+WHEN wdraw_rsn = '' THEN 'In progress6'
+      WHEN wdraw_rsn = 'DCMP' OR wdraw_rsn = 'TRDC' THEN 'Completed6'
+      ELSE 'Not Completed6'
+    END) VIRTUAL,
+  `ofis_deg_status7` varchar(15) AS (CASE
+WHEN wdraw_rsn = '' THEN 'In progress7'
+      WHEN wdraw_rsn = 'DCMP' OR wdraw_rsn = 'TRDC' THEN 'Completed7'
+      ELSE 'Not Completed7'
+    END) VIRTUAL,
+  `ofis_deg_status8` varchar(15) AS (CASE
+WHEN wdraw_rsn = '' THEN 'In progress8'
+      WHEN wdraw_rsn = 'DCMP' OR wdraw_rsn = 'TRDC' THEN 'Completed8'
+      ELSE 'Not Completed8'
+    END) VIRTUAL,
+  `deg_start_term` char(4) NOT NULL DEFAULT '' COMMENT 'Educated guess at the beginning of the data',
+  `deg_as_of_term` char(4) NOT NULL COMMENT 'In most cases also end term',
+  PRIMARY KEY (`student_id`,`plan`,`admit_term`)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8
 CREATE INDEX grad_degree_wdraw_rsn_ndx ON grad_degree (wdraw_rsn);
 ALTER TABLE grad_degree DROP COLUMN ofis_deg_status2, DROP COLUMN ofis_deg_status3,
 DROP COLUMN ofis_deg_status4, DROP COLUMN ofis_deg_status5, DROP COLUMN ofis_deg_status6,
diff --git a/mysql-test/suite/innodb/r/innodb-wl5522,xtradb.rdiff b/mysql-test/suite/innodb/r/innodb-wl5522,xtradb.rdiff-disabled
similarity index 100%
rename from mysql-test/suite/innodb/r/innodb-wl5522,xtradb.rdiff
rename to mysql-test/suite/innodb/r/innodb-wl5522,xtradb.rdiff-disabled
diff --git a/mysql-test/suite/innodb/r/innodb-wl5522-1.result b/mysql-test/suite/innodb/r/innodb-wl5522-1.result
index 060840859a7..ec28ead12b7 100644
--- a/mysql-test/suite/innodb/r/innodb-wl5522-1.result
+++ b/mysql-test/suite/innodb/r/innodb-wl5522-1.result
@@ -1,9 +1,12 @@
+call mtr.add_suppression("InnoDB: Unable to import tablespace .* because it already exists.  Please DISCARD the tablespace before IMPORT.");
 DROP TABLE IF EXISTS t1;
 SET GLOBAL innodb_file_per_table = 1;
 SELECT @@innodb_file_per_table;
 @@innodb_file_per_table
 1
 SET GLOBAL innodb_file_format = `Barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SELECT @@innodb_file_format;
 @@innodb_file_format
 Barracuda
@@ -126,6 +129,8 @@ COUNT(*)
 2
 DROP TABLE testdb_wl5522.t1;
 SET GLOBAL innodb_file_format='Barracuda';
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 CREATE TABLE testdb_wl5522.t1 (
 col_1_varbinary VARBINARY (4000) ,
 col_2_varchar VARCHAR (4000),
@@ -410,7 +415,7 @@ ALTER TABLE testdb_wl5522.t1 DISCARD TABLESPACE;
 restore: t1 .ibd and .cfg files
 ALTER TABLE testdb_wl5522.t1 IMPORT TABLESPACE;
 ALTER TABLE testdb_wl5522.t1 IMPORT TABLESPACE;
-ERROR HY000: Tablespace for table 't1' exists. Please DISCARD the tablespace before IMPORT.
+ERROR HY000: Tablespace for table 'testdb_wl5522/t1' exists. Please DISCARD the tablespace before IMPORT.
 SELECT * FROM testdb_wl5522.t1 ORDER BY i;
 i
 100
@@ -807,5 +812,7 @@ DROP DATABASE testdb_wl5522;
 call mtr.add_suppression("Got error -1 when reading table '.*'");
 call mtr.add_suppression("InnoDB: Error: tablespace id and flags in file '.*'.*");
 call mtr.add_suppression("InnoDB: The table .* doesn't have a corresponding tablespace, it was discarded");
-SET GLOBAL INNODB_FILE_FORMAT=Antelope;
+SET GLOBAL INNODB_FILE_FORMAT=Barracuda;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SET GLOBAL INNODB_FILE_PER_TABLE=1;
diff --git a/mysql-test/suite/innodb/r/innodb-wl5522-debug-zip.result b/mysql-test/suite/innodb/r/innodb-wl5522-debug-zip.result
index 0e863f5849e..1246cb36e99 100644
--- a/mysql-test/suite/innodb/r/innodb-wl5522-debug-zip.result
+++ b/mysql-test/suite/innodb/r/innodb-wl5522-debug-zip.result
@@ -1,8 +1,16 @@
+call mtr.add_suppression("InnoDB: Tablespace for table .* is set as discarded.");
+call mtr.add_suppression("InnoDB: Cannot calculate statistics for table .* because the .ibd file is missing. Please refer to .* for how to resolve the issue.");
+call mtr.add_suppression("InnoDB: Error: Tablespace flags .* corrupted unused .*");
+call mtr.add_suppression("InnoDB: Tablespace flags: .* corrupted  in file: .* ");
+call mtr.add_suppression("InnoDB: Page 0 at offset 0 looks corrupted in file .*");
+flush tables;
 SET GLOBAL innodb_file_per_table = 1;
 SELECT @@innodb_file_per_table;
 @@innodb_file_per_table
 1
 SET GLOBAL innodb_file_format = `Barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SELECT @@innodb_file_format;
 @@innodb_file_format
 Barracuda
@@ -26,17 +34,17 @@ ROW_FORMAT=COMPRESSED;
 INSERT INTO test_wl5522.t1 VALUES (1);
 ALTER TABLE test_wl5522.t1 DISCARD TABLESPACE;
 SELECT COUNT(*) FROM test_wl5522.t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 SET SESSION debug_dbug="+d,ib_import_before_commit_crash";
 SELECT * FROM test_wl5522.t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 ALTER TABLE test_wl5522.t1 IMPORT TABLESPACE;
 ERROR HY000: Lost connection to MySQL server during query
 SET SESSION debug_dbug="-d,ib_import_before_commit_crash";
 SET SESSION debug_dbug="+d,ib_import_before_checkpoint_crash";
 SELECT COUNT(*) FROM test_wl5522.t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 ALTER TABLE test_wl5522.t1 IMPORT TABLESPACE;
 ERROR HY000: Lost connection to MySQL server during query
 unlink: t1.ibd
@@ -49,6 +57,8 @@ SELECT @@innodb_file_per_table;
 @@innodb_file_per_table
 1
 SET GLOBAL innodb_file_format = `Barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SELECT @@innodb_file_format;
 @@innodb_file_format
 Barracuda
@@ -82,11 +92,11 @@ CREATE TABLE test_wl5522.t1 (c1 INT) ENGINE = Innodb
 ROW_FORMAT=COMPRESSED;
 ALTER TABLE test_wl5522.t1 DISCARD TABLESPACE;
 SELECT COUNT(*) FROM test_wl5522.t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 SET SESSION debug_dbug="+d,ib_import_internal_error";
 ALTER TABLE test_wl5522.t1 IMPORT TABLESPACE;
-ERROR HY000: Internal error: While updating the <space, root page number> of index "GEN_CLUST_INDEX" - Generic error
+ERROR HY000: Internal error: While updating the <space, root page number> of index GEN_CLUST_INDEX - Generic error
 SET SESSION debug_dbug="-d,ib_import_internal_error";
 restore: t1 .ibd and .cfg files
 ALTER TABLE test_wl5522.t1 IMPORT TABLESPACE;
@@ -95,11 +105,11 @@ CREATE TABLE test_wl5522.t1 (c1 INT) ENGINE = Innodb
 ROW_FORMAT=COMPRESSED;
 ALTER TABLE test_wl5522.t1 DISCARD TABLESPACE;
 SELECT COUNT(*) FROM test_wl5522.t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 SET SESSION debug_dbug="+d,ib_import_reset_space_and_lsn_failure";
 ALTER TABLE test_wl5522.t1 IMPORT TABLESPACE;
-ERROR HY000: Internal error: Cannot reset LSNs in table '"test_wl5522"."t1"' : Too many concurrent transactions
+ERROR HY000: Internal error: Cannot reset LSNs in table `test_wl5522`.`t1` : Too many concurrent transactions
 restore: t1 .ibd and .cfg files
 SET SESSION debug_dbug="-d,ib_import_reset_space_and_lsn_failure";
 SET SESSION debug_dbug="+d,ib_import_open_tablespace_failure";
@@ -109,27 +119,27 @@ SET SESSION debug_dbug="-d,ib_import_open_tablespace_failure";
 restore: t1 .ibd and .cfg files
 SET SESSION debug_dbug="+d,ib_import_check_bitmap_failure";
 ALTER TABLE test_wl5522.t1 IMPORT TABLESPACE;
-ERROR HY000: Incorrect key file for table 't1'; try to repair it
+ERROR HY000: Index for table 't1' is corrupt; try to repair it
 SET SESSION debug_dbug="-d,ib_import_check_bitmap_failure";
 restore: t1 .ibd and .cfg files
 SET SESSION debug_dbug="+d,ib_import_cluster_root_adjust_failure";
 ALTER TABLE test_wl5522.t1 IMPORT TABLESPACE;
-ERROR HY000: Incorrect key file for table 't1'; try to repair it
+ERROR HY000: Index for table 't1' is corrupt; try to repair it
 SET SESSION debug_dbug="-d,ib_import_cluster_root_adjust_failure";
 restore: t1 .ibd and .cfg files
 SET SESSION debug_dbug="+d,ib_import_cluster_failure";
 ALTER TABLE test_wl5522.t1 IMPORT TABLESPACE;
-ERROR HY000: Incorrect key file for table 't1'; try to repair it
+ERROR HY000: Index for table 't1' is corrupt; try to repair it
 SET SESSION debug_dbug="-d,ib_import_cluster_failure";
 restore: t1 .ibd and .cfg files
 SET SESSION debug_dbug="+d,ib_import_sec_root_adjust_failure";
 ALTER TABLE test_wl5522.t1 IMPORT TABLESPACE;
-ERROR HY000: Incorrect key file for table 't1'; try to repair it
+ERROR HY000: Index for table 't1' is corrupt; try to repair it
 SET SESSION debug_dbug="-d,ib_import_sec_root_adjust_failure";
 restore: t1 .ibd and .cfg files
 SET SESSION debug_dbug="+d,ib_import_set_max_rowid_failure";
 ALTER TABLE test_wl5522.t1 IMPORT TABLESPACE;
-ERROR HY000: Incorrect key file for table 't1'; try to repair it
+ERROR HY000: Index for table 't1' is corrupt; try to repair it
 SET SESSION debug_dbug="-d,ib_import_set_max_rowid_failure";
 unlink: t1.ibd
 unlink: t1.cfg
@@ -442,7 +452,7 @@ t1	CREATE TABLE `t1` (
   KEY `idx1` (`c2`),
   KEY `idx2` (`c3`(512)),
   KEY `idx3` (`c4`(512))
-) ENGINE=InnoDB AUTO_INCREMENT=185 DEFAULT CHARSET=latin1 ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8
+) ENGINE=InnoDB AUTO_INCREMENT=248 DEFAULT CHARSET=latin1 ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8
 DROP TABLE test_wl5522.t1;
 CREATE TABLE test_wl5522.t1 (c1 INT, c2 VARCHAR(1024), c3 BLOB) ENGINE = Innodb
 ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4;
@@ -469,11 +479,11 @@ CREATE TABLE test_wl5522.t1 (c1 INT, c2 VARCHAR(1024), c3 BLOB) ENGINE = Innodb
 ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4;
 ALTER TABLE test_wl5522.t1 DISCARD TABLESPACE;
 SELECT COUNT(*) FROM test_wl5522.t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 SET SESSION debug_dbug="+d,ib_import_trigger_corruption_1";
 ALTER TABLE test_wl5522.t1 IMPORT TABLESPACE;
-ERROR HY000: Internal error: Cannot reset LSNs in table '"test_wl5522"."t1"' : Data structure corruption
+ERROR HY000: Internal error: Cannot reset LSNs in table `test_wl5522`.`t1` : Data structure corruption
 SET SESSION debug_dbug="-d,ib_import_trigger_corruption_1";
 DROP TABLE test_wl5522.t1;
 unlink: t1.ibd
@@ -482,10 +492,11 @@ CREATE TABLE test_wl5522.t1 (c1 INT, c2 VARCHAR(1024), c3 BLOB) ENGINE = Innodb
 ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4;
 ALTER TABLE test_wl5522.t1 DISCARD TABLESPACE;
 SELECT COUNT(*) FROM test_wl5522.t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 SET SESSION debug_dbug="+d,buf_page_is_corrupt_failure";
 ALTER TABLE test_wl5522.t1 IMPORT TABLESPACE;
+ERROR HY000: Internal error: Cannot reset LSNs in table `test_wl5522`.`t1` : Data structure corruption
 SET SESSION debug_dbug="-d,buf_page_is_corrupt_failure";
 DROP TABLE test_wl5522.t1;
 unlink: t1.ibd
@@ -494,11 +505,11 @@ CREATE TABLE test_wl5522.t1 (c1 INT, c2 VARCHAR(1024), c3 BLOB) ENGINE = Innodb
 ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4;
 ALTER TABLE test_wl5522.t1 DISCARD TABLESPACE;
 SELECT COUNT(*) FROM test_wl5522.t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 SET SESSION debug_dbug="+d,ib_import_trigger_corruption_2";
 ALTER TABLE test_wl5522.t1 IMPORT TABLESPACE;
-ERROR HY000: Index corrupt: Externally stored column(5) has a reference length of 19 in the cluster index "GEN_CLUST_INDEX"
+ERROR HY000: Index corrupt: Externally stored column(5) has a reference length of 19 in the cluster index GEN_CLUST_INDEX
 SET SESSION debug_dbug="-d,ib_import_trigger_corruption_2";
 DROP TABLE test_wl5522.t1;
 unlink: t1.ibd
@@ -507,11 +518,11 @@ CREATE TABLE test_wl5522.t1 (c1 INT, c2 VARCHAR(1024), c3 BLOB) ENGINE = Innodb
 ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4;
 ALTER TABLE test_wl5522.t1 DISCARD TABLESPACE;
 SELECT COUNT(*) FROM test_wl5522.t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 SET SESSION debug_dbug="+d,ib_import_trigger_corruption_3";
 ALTER TABLE test_wl5522.t1 IMPORT TABLESPACE;
-ERROR HY000: Incorrect key file for table 't1'; try to repair it
+ERROR HY000: Index for table 't1' is corrupt; try to repair it
 SET SESSION debug_dbug="-d,ib_import_trigger_corruption_3";
 DROP TABLE test_wl5522.t1;
 unlink: t1.ibd
@@ -520,11 +531,11 @@ CREATE TABLE test_wl5522.t1 (c1 INT, c2 VARCHAR(1024), c3 BLOB) ENGINE = Innodb
 ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4;
 ALTER TABLE test_wl5522.t1 DISCARD TABLESPACE;
 SELECT COUNT(*) FROM test_wl5522.t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 SET SESSION debug_dbug="+d,ib_import_create_index_failure_1";
 ALTER TABLE test_wl5522.t1 ADD INDEX idx(c1);
 Warnings:
-Warning	1814	Tablespace has been discarded for table 't1'
+Warning	1814	Tablespace has been discarded for table `t1`
 SET SESSION debug_dbug="-d,ib_import_create_index_failure_1";
 DROP TABLE test_wl5522.t1;
 unlink: t1.ibd
@@ -533,7 +544,7 @@ CREATE TABLE test_wl5522.t1 (c1 INT, c2 VARCHAR(1024), c3 BLOB) ENGINE = Innodb
 ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4;
 ALTER TABLE test_wl5522.t1 DISCARD TABLESPACE;
 SELECT COUNT(*) FROM test_wl5522.t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 SET SESSION debug_dbug="+d,fil_space_create_failure";
 ALTER TABLE test_wl5522.t1 IMPORT TABLESPACE;
@@ -546,7 +557,7 @@ CREATE TABLE test_wl5522.t1 (c1 INT, c2 VARCHAR(1024), c3 BLOB) ENGINE = Innodb
 ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4;
 ALTER TABLE test_wl5522.t1 DISCARD TABLESPACE;
 SELECT COUNT(*) FROM test_wl5522.t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 SET SESSION debug_dbug="+d,dict_tf_to_fsp_flags_failure";
 ALTER TABLE test_wl5522.t1 IMPORT TABLESPACE;
@@ -559,11 +570,11 @@ CREATE TABLE test_wl5522.t1 (c1 INT, c2 VARCHAR(1024), c3 BLOB) ENGINE = Innodb
 ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4;
 ALTER TABLE test_wl5522.t1 DISCARD TABLESPACE;
 SELECT COUNT(*) FROM test_wl5522.t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 SET SESSION debug_dbug="+d,fsp_flags_is_valid_failure";
 ALTER TABLE test_wl5522.t1 IMPORT TABLESPACE;
-ERROR HY000: Internal error: Cannot reset LSNs in table '"test_wl5522"."t1"' : Unsupported
+ERROR HY000: Internal error: Cannot reset LSNs in table `test_wl5522`.`t1` : Unsupported
 SET SESSION debug_dbug="-d,fsp_flags_is_valid_failure";
 DROP TABLE test_wl5522.t1;
 unlink: t1.ibd
@@ -576,5 +587,7 @@ set global innodb_monitor_disable = default;
 set global innodb_monitor_reset = default;
 set global innodb_monitor_reset_all = default;
 SET GLOBAL INNODB_FILE_PER_TABLE=1;
-SET GLOBAL INNODB_FILE_FORMAT=Antelope;
-SET SESSION innodb_strict_mode=0;
+SET GLOBAL INNODB_FILE_FORMAT=Barracuda;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+SET SESSION innodb_strict_mode=1;
diff --git a/mysql-test/suite/innodb/r/innodb-wl5522-debug.result b/mysql-test/suite/innodb/r/innodb-wl5522-debug.result
index 0c914ebc7a6..0138d2ad19a 100644
--- a/mysql-test/suite/innodb/r/innodb-wl5522-debug.result
+++ b/mysql-test/suite/innodb/r/innodb-wl5522-debug.result
@@ -1,3 +1,12 @@
+call mtr.add_suppression("InnoDB: Operating system error number .* in a file operation.");
+call mtr.add_suppression("InnoDB: The error means the system cannot find the path specified.");
+call mtr.add_suppression("InnoDB: If you are installing InnoDB, remember that you must create directories yourself, InnoDB does not create them.");
+call mtr.add_suppression("InnoDB: Cannot open datafile for read-only: .*");
+call mtr.add_suppression("InnoDB: Tablespace flags: .*");
+call mtr.add_suppression("InnoDB: Ignoring tablespace .* because it could not be opened.");
+call mtr.add_suppression("InnoDB: Tablespace for table .* is set as discarded.");
+call mtr.add_suppression("InnoDB: Cannot calculate statistics for table .*");
+call mtr.add_suppression("InnoDB: Page 0 at offset 0 looks corrupted in file .*");
 SET GLOBAL innodb_file_per_table = 1;
 SELECT @@innodb_file_per_table;
 @@innodb_file_per_table
@@ -38,17 +47,17 @@ CREATE TABLE test_wl5522.t1 (c1 INT) ENGINE = Innodb;
 INSERT INTO test_wl5522.t1 VALUES (1);
 ALTER TABLE test_wl5522.t1 DISCARD TABLESPACE;
 SELECT COUNT(*) FROM test_wl5522.t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 SET SESSION debug_dbug="+d,ib_import_before_commit_crash";
 SELECT * FROM test_wl5522.t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 ALTER TABLE test_wl5522.t1 IMPORT TABLESPACE;
 ERROR HY000: Lost connection to MySQL server during query
 SET SESSION debug_dbug="-d,ib_import_before_commit_crash";
 SET SESSION debug_dbug="+d,ib_import_before_checkpoint_crash";
 SELECT COUNT(*) FROM test_wl5522.t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 ALTER TABLE test_wl5522.t1 IMPORT TABLESPACE;
 ERROR HY000: Lost connection to MySQL server during query
 unlink: t1.ibd
@@ -214,7 +223,7 @@ CREATE TABLE test_wl5522.t1 (c1 INT) ENGINE = Innodb;
 INSERT INTO test_wl5522.t1 VALUES (1);
 ALTER TABLE test_wl5522.t1 DISCARD TABLESPACE;
 SELECT COUNT(*) FROM test_wl5522.t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 SET SESSION debug_dbug="+d,ib_import_io_read_error_1";
 restore: t1 .cfg file
 ALTER TABLE test_wl5522.t1 IMPORT TABLESPACE;
@@ -225,7 +234,7 @@ CREATE TABLE test_wl5522.t1 (c1 INT) ENGINE = Innodb;
 INSERT INTO test_wl5522.t1 VALUES (1);
 ALTER TABLE test_wl5522.t1 DISCARD TABLESPACE;
 SELECT COUNT(*) FROM test_wl5522.t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 SET SESSION debug_dbug="+d,ib_import_io_read_error_2";
 restore: t1 .cfg file
 ALTER TABLE test_wl5522.t1 IMPORT TABLESPACE;
@@ -236,7 +245,7 @@ CREATE TABLE test_wl5522.t1 (c1 INT) ENGINE = Innodb;
 INSERT INTO test_wl5522.t1 VALUES (1);
 ALTER TABLE test_wl5522.t1 DISCARD TABLESPACE;
 SELECT COUNT(*) FROM test_wl5522.t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 SET SESSION debug_dbug="+d,ib_import_io_read_error_3";
 restore: t1 .cfg file
 ALTER TABLE test_wl5522.t1 IMPORT TABLESPACE;
@@ -247,7 +256,7 @@ CREATE TABLE test_wl5522.t1 (c1 INT) ENGINE = Innodb;
 INSERT INTO test_wl5522.t1 VALUES (1);
 ALTER TABLE test_wl5522.t1 DISCARD TABLESPACE;
 SELECT COUNT(*) FROM test_wl5522.t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 SET SESSION debug_dbug="+d,ib_import_io_read_error_4";
 restore: t1 .cfg file
 ALTER TABLE test_wl5522.t1 IMPORT TABLESPACE;
@@ -258,7 +267,7 @@ CREATE TABLE test_wl5522.t1 (c1 INT) ENGINE = Innodb;
 INSERT INTO test_wl5522.t1 VALUES (1);
 ALTER TABLE test_wl5522.t1 DISCARD TABLESPACE;
 SELECT COUNT(*) FROM test_wl5522.t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 SET SESSION debug_dbug="+d,ib_import_io_read_error_5";
 restore: t1 .cfg file
 ALTER TABLE test_wl5522.t1 IMPORT TABLESPACE;
@@ -269,7 +278,7 @@ CREATE TABLE test_wl5522.t1 (c1 INT) ENGINE = Innodb;
 INSERT INTO test_wl5522.t1 VALUES (1);
 ALTER TABLE test_wl5522.t1 DISCARD TABLESPACE;
 SELECT COUNT(*) FROM test_wl5522.t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 SET SESSION debug_dbug="+d,ib_import_io_read_error_6";
 restore: t1 .cfg file
 ALTER TABLE test_wl5522.t1 IMPORT TABLESPACE;
@@ -280,7 +289,7 @@ CREATE TABLE test_wl5522.t1 (c1 INT) ENGINE = Innodb;
 INSERT INTO test_wl5522.t1 VALUES (1);
 ALTER TABLE test_wl5522.t1 DISCARD TABLESPACE;
 SELECT COUNT(*) FROM test_wl5522.t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 SET SESSION debug_dbug="+d,ib_import_io_read_error_7";
 restore: t1 .cfg file
 ALTER TABLE test_wl5522.t1 IMPORT TABLESPACE;
@@ -291,7 +300,7 @@ CREATE TABLE test_wl5522.t1 (c1 INT) ENGINE = Innodb;
 INSERT INTO test_wl5522.t1 VALUES (1);
 ALTER TABLE test_wl5522.t1 DISCARD TABLESPACE;
 SELECT COUNT(*) FROM test_wl5522.t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 SET SESSION debug_dbug="+d,ib_import_io_read_error_8";
 restore: t1 .cfg file
 ALTER TABLE test_wl5522.t1 IMPORT TABLESPACE;
@@ -302,7 +311,7 @@ CREATE TABLE test_wl5522.t1 (c1 INT) ENGINE = Innodb;
 INSERT INTO test_wl5522.t1 VALUES (1);
 ALTER TABLE test_wl5522.t1 DISCARD TABLESPACE;
 SELECT COUNT(*) FROM test_wl5522.t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 SET SESSION debug_dbug="+d,ib_import_io_read_error_9";
 restore: t1 .cfg file
 ALTER TABLE test_wl5522.t1 IMPORT TABLESPACE;
@@ -313,7 +322,7 @@ CREATE TABLE test_wl5522.t1 (c1 INT) ENGINE = Innodb;
 INSERT INTO test_wl5522.t1 VALUES (1);
 ALTER TABLE test_wl5522.t1 DISCARD TABLESPACE;
 SELECT COUNT(*) FROM test_wl5522.t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 SET SESSION debug_dbug="+d,ib_import_string_read_error";
 restore: t1 .cfg file
 ALTER TABLE test_wl5522.t1 IMPORT TABLESPACE;
@@ -324,7 +333,7 @@ CREATE TABLE test_wl5522.t1 (c1 INT) ENGINE = Innodb;
 INSERT INTO test_wl5522.t1 VALUES (1);
 ALTER TABLE test_wl5522.t1 DISCARD TABLESPACE;
 SELECT COUNT(*) FROM test_wl5522.t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 SET SESSION debug_dbug="+d,ib_import_OOM_1";
 ALTER TABLE test_wl5522.t1 IMPORT TABLESPACE;
@@ -337,7 +346,7 @@ CREATE TABLE test_wl5522.t1 (c1 INT) ENGINE = Innodb;
 INSERT INTO test_wl5522.t1 VALUES (1);
 ALTER TABLE test_wl5522.t1 DISCARD TABLESPACE;
 SELECT COUNT(*) FROM test_wl5522.t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 SET SESSION debug_dbug="+d,ib_import_OOM_2";
 ALTER TABLE test_wl5522.t1 IMPORT TABLESPACE;
@@ -350,7 +359,7 @@ CREATE TABLE test_wl5522.t1 (c1 INT) ENGINE = Innodb;
 INSERT INTO test_wl5522.t1 VALUES (1);
 ALTER TABLE test_wl5522.t1 DISCARD TABLESPACE;
 SELECT COUNT(*) FROM test_wl5522.t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 SET SESSION debug_dbug="+d,ib_import_OOM_4";
 ALTER TABLE test_wl5522.t1 IMPORT TABLESPACE;
@@ -363,7 +372,7 @@ CREATE TABLE test_wl5522.t1 (c1 INT) ENGINE = Innodb;
 INSERT INTO test_wl5522.t1 VALUES (1);
 ALTER TABLE test_wl5522.t1 DISCARD TABLESPACE;
 SELECT COUNT(*) FROM test_wl5522.t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 SET SESSION debug_dbug="+d,ib_import_OOM_5";
 ALTER TABLE test_wl5522.t1 IMPORT TABLESPACE;
@@ -376,7 +385,7 @@ CREATE TABLE test_wl5522.t1 (c1 INT) ENGINE = Innodb;
 INSERT INTO test_wl5522.t1 VALUES (1);
 ALTER TABLE test_wl5522.t1 DISCARD TABLESPACE;
 SELECT COUNT(*) FROM test_wl5522.t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 SET SESSION debug_dbug="+d,ib_import_OOM_6";
 ALTER TABLE test_wl5522.t1 IMPORT TABLESPACE;
@@ -389,7 +398,7 @@ CREATE TABLE test_wl5522.t1 (c1 INT) ENGINE = Innodb;
 INSERT INTO test_wl5522.t1 VALUES (1);
 ALTER TABLE test_wl5522.t1 DISCARD TABLESPACE;
 SELECT COUNT(*) FROM test_wl5522.t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 SET SESSION debug_dbug="+d,ib_import_OOM_7";
 ALTER TABLE test_wl5522.t1 IMPORT TABLESPACE;
@@ -402,7 +411,7 @@ CREATE TABLE test_wl5522.t1 (c1 INT) ENGINE = Innodb;
 INSERT INTO test_wl5522.t1 VALUES (1);
 ALTER TABLE test_wl5522.t1 DISCARD TABLESPACE;
 SELECT COUNT(*) FROM test_wl5522.t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 SET SESSION debug_dbug="+d,ib_import_OOM_8";
 ALTER TABLE test_wl5522.t1 IMPORT TABLESPACE;
@@ -415,7 +424,7 @@ CREATE TABLE test_wl5522.t1 (c1 INT) ENGINE = Innodb;
 INSERT INTO test_wl5522.t1 VALUES (1);
 ALTER TABLE test_wl5522.t1 DISCARD TABLESPACE;
 SELECT COUNT(*) FROM test_wl5522.t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 SET SESSION debug_dbug="+d,ib_import_OOM_9";
 ALTER TABLE test_wl5522.t1 IMPORT TABLESPACE;
@@ -428,7 +437,7 @@ CREATE TABLE test_wl5522.t1 (c1 INT) ENGINE = Innodb;
 INSERT INTO test_wl5522.t1 VALUES (1);
 ALTER TABLE test_wl5522.t1 DISCARD TABLESPACE;
 SELECT COUNT(*) FROM test_wl5522.t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 SET SESSION debug_dbug="+d,ib_import_OOM_10";
 ALTER TABLE test_wl5522.t1 IMPORT TABLESPACE;
@@ -440,11 +449,11 @@ unlink: t1.cfg
 CREATE TABLE test_wl5522.t1 (c1 INT) ENGINE = Innodb;
 ALTER TABLE test_wl5522.t1 DISCARD TABLESPACE;
 SELECT COUNT(*) FROM test_wl5522.t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 SET SESSION debug_dbug="+d,ib_import_internal_error";
 ALTER TABLE test_wl5522.t1 IMPORT TABLESPACE;
-ERROR HY000: Internal error: While updating the <space, root page number> of index "GEN_CLUST_INDEX" - Generic error
+ERROR HY000: Internal error: While updating the <space, root page number> of index GEN_CLUST_INDEX - Generic error
 SET SESSION debug_dbug="-d,ib_import_internal_error";
 restore: t1 .ibd and .cfg files
 ALTER TABLE test_wl5522.t1 IMPORT TABLESPACE;
@@ -452,11 +461,11 @@ DROP TABLE test_wl5522.t1;
 CREATE TABLE test_wl5522.t1 (c1 INT) ENGINE = Innodb;
 ALTER TABLE test_wl5522.t1 DISCARD TABLESPACE;
 SELECT COUNT(*) FROM test_wl5522.t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 SET SESSION debug_dbug="+d,ib_import_reset_space_and_lsn_failure";
 ALTER TABLE test_wl5522.t1 IMPORT TABLESPACE;
-ERROR HY000: Internal error: Cannot reset LSNs in table '"test_wl5522"."t1"' : Too many concurrent transactions
+ERROR HY000: Internal error: Cannot reset LSNs in table `test_wl5522`.`t1` : Too many concurrent transactions
 restore: t1 .ibd and .cfg files
 SET SESSION debug_dbug="-d,ib_import_reset_space_and_lsn_failure";
 SET SESSION debug_dbug="+d,ib_import_open_tablespace_failure";
@@ -466,27 +475,27 @@ SET SESSION debug_dbug="-d,ib_import_open_tablespace_failure";
 restore: t1 .ibd and .cfg files
 SET SESSION debug_dbug="+d,ib_import_check_bitmap_failure";
 ALTER TABLE test_wl5522.t1 IMPORT TABLESPACE;
-ERROR HY000: Incorrect key file for table 't1'; try to repair it
+ERROR HY000: Index for table 't1' is corrupt; try to repair it
 SET SESSION debug_dbug="-d,ib_import_check_bitmap_failure";
 restore: t1 .ibd and .cfg files
 SET SESSION debug_dbug="+d,ib_import_cluster_root_adjust_failure";
 ALTER TABLE test_wl5522.t1 IMPORT TABLESPACE;
-ERROR HY000: Incorrect key file for table 't1'; try to repair it
+ERROR HY000: Index for table 't1' is corrupt; try to repair it
 SET SESSION debug_dbug="-d,ib_import_cluster_root_adjust_failure";
 restore: t1 .ibd and .cfg files
 SET SESSION debug_dbug="+d,ib_import_cluster_failure";
 ALTER TABLE test_wl5522.t1 IMPORT TABLESPACE;
-ERROR HY000: Incorrect key file for table 't1'; try to repair it
+ERROR HY000: Index for table 't1' is corrupt; try to repair it
 SET SESSION debug_dbug="-d,ib_import_cluster_failure";
 restore: t1 .ibd and .cfg files
 SET SESSION debug_dbug="+d,ib_import_sec_root_adjust_failure";
 ALTER TABLE test_wl5522.t1 IMPORT TABLESPACE;
-ERROR HY000: Incorrect key file for table 't1'; try to repair it
+ERROR HY000: Index for table 't1' is corrupt; try to repair it
 SET SESSION debug_dbug="-d,ib_import_sec_root_adjust_failure";
 restore: t1 .ibd and .cfg files
 SET SESSION debug_dbug="+d,ib_import_set_max_rowid_failure";
 ALTER TABLE test_wl5522.t1 IMPORT TABLESPACE;
-ERROR HY000: Incorrect key file for table 't1'; try to repair it
+ERROR HY000: Index for table 't1' is corrupt; try to repair it
 SET SESSION debug_dbug="-d,ib_import_set_max_rowid_failure";
 unlink: t1.ibd
 unlink: t1.cfg
@@ -797,7 +806,7 @@ t1	CREATE TABLE `t1` (
   KEY `idx1` (`c2`),
   KEY `idx2` (`c3`(512)),
   KEY `idx3` (`c4`(512))
-) ENGINE=InnoDB AUTO_INCREMENT=185 DEFAULT CHARSET=latin1
+) ENGINE=InnoDB AUTO_INCREMENT=248 DEFAULT CHARSET=latin1
 DROP TABLE test_wl5522.t1;
 CREATE TABLE test_wl5522.t1 (c1 INT, c2 VARCHAR(1024), c3 BLOB) ENGINE = Innodb;
 INSERT INTO test_wl5522.t1 VALUES
@@ -822,11 +831,11 @@ DROP TABLE test_wl5522.t1;
 CREATE TABLE test_wl5522.t1 (c1 INT, c2 VARCHAR(1024), c3 BLOB) ENGINE = Innodb;
 ALTER TABLE test_wl5522.t1 DISCARD TABLESPACE;
 SELECT COUNT(*) FROM test_wl5522.t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 SET SESSION debug_dbug="+d,ib_import_trigger_corruption_1";
 ALTER TABLE test_wl5522.t1 IMPORT TABLESPACE;
-ERROR HY000: Internal error: Cannot reset LSNs in table '"test_wl5522"."t1"' : Data structure corruption
+ERROR HY000: Internal error: Cannot reset LSNs in table `test_wl5522`.`t1` : Data structure corruption
 SET SESSION debug_dbug="-d,ib_import_trigger_corruption_1";
 DROP TABLE test_wl5522.t1;
 unlink: t1.ibd
@@ -834,11 +843,11 @@ unlink: t1.cfg
 CREATE TABLE test_wl5522.t1 (c1 INT, c2 VARCHAR(1024), c3 BLOB) ENGINE = Innodb;
 ALTER TABLE test_wl5522.t1 DISCARD TABLESPACE;
 SELECT COUNT(*) FROM test_wl5522.t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 SET SESSION debug_dbug="+d,buf_page_is_corrupt_failure";
 ALTER TABLE test_wl5522.t1 IMPORT TABLESPACE;
-ERROR HY000: Internal error: Cannot reset LSNs in table '"test_wl5522"."t1"' : Data structure corruption
+ERROR HY000: Internal error: Cannot reset LSNs in table `test_wl5522`.`t1` : Data structure corruption
 SET SESSION debug_dbug="-d,buf_page_is_corrupt_failure";
 DROP TABLE test_wl5522.t1;
 unlink: t1.ibd
@@ -846,11 +855,11 @@ unlink: t1.cfg
 CREATE TABLE test_wl5522.t1 (c1 INT, c2 VARCHAR(1024), c3 BLOB) ENGINE = Innodb;
 ALTER TABLE test_wl5522.t1 DISCARD TABLESPACE;
 SELECT COUNT(*) FROM test_wl5522.t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 SET SESSION debug_dbug="+d,ib_import_trigger_corruption_2";
 ALTER TABLE test_wl5522.t1 IMPORT TABLESPACE;
-ERROR HY000: Index corrupt: Externally stored column(5) has a reference length of 19 in the cluster index "GEN_CLUST_INDEX"
+ERROR HY000: Index corrupt: Externally stored column(5) has a reference length of 19 in the cluster index GEN_CLUST_INDEX
 SET SESSION debug_dbug="-d,ib_import_trigger_corruption_2";
 DROP TABLE test_wl5522.t1;
 unlink: t1.ibd
@@ -858,11 +867,11 @@ unlink: t1.cfg
 CREATE TABLE test_wl5522.t1 (c1 INT, c2 VARCHAR(1024), c3 BLOB) ENGINE = Innodb;
 ALTER TABLE test_wl5522.t1 DISCARD TABLESPACE;
 SELECT COUNT(*) FROM test_wl5522.t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 SET SESSION debug_dbug="+d,ib_import_trigger_corruption_3";
 ALTER TABLE test_wl5522.t1 IMPORT TABLESPACE;
-ERROR HY000: Incorrect key file for table 't1'; try to repair it
+ERROR HY000: Index for table 't1' is corrupt; try to repair it
 SET SESSION debug_dbug="-d,ib_import_trigger_corruption_3";
 DROP TABLE test_wl5522.t1;
 unlink: t1.ibd
@@ -870,11 +879,11 @@ unlink: t1.cfg
 CREATE TABLE test_wl5522.t1 (c1 INT, c2 VARCHAR(1024), c3 BLOB) ENGINE = Innodb;
 ALTER TABLE test_wl5522.t1 DISCARD TABLESPACE;
 SELECT COUNT(*) FROM test_wl5522.t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 SET SESSION debug_dbug="+d,ib_import_create_index_failure_1";
 ALTER TABLE test_wl5522.t1 ADD INDEX idx(c1);
 Warnings:
-Warning	1814	Tablespace has been discarded for table 't1'
+Warning	1814	Tablespace has been discarded for table `t1`
 SET SESSION debug_dbug="-d,ib_import_create_index_failure_1";
 DROP TABLE test_wl5522.t1;
 unlink: t1.ibd
@@ -882,7 +891,7 @@ unlink: t1.cfg
 CREATE TABLE test_wl5522.t1 (c1 INT, c2 VARCHAR(1024), c3 BLOB) ENGINE = Innodb;
 ALTER TABLE test_wl5522.t1 DISCARD TABLESPACE;
 SELECT COUNT(*) FROM test_wl5522.t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 SET SESSION debug_dbug="+d,fil_space_create_failure";
 ALTER TABLE test_wl5522.t1 IMPORT TABLESPACE;
@@ -894,7 +903,7 @@ unlink: t1.cfg
 CREATE TABLE test_wl5522.t1 (c1 INT, c2 VARCHAR(1024), c3 BLOB) ENGINE = Innodb;
 ALTER TABLE test_wl5522.t1 DISCARD TABLESPACE;
 SELECT COUNT(*) FROM test_wl5522.t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 SET SESSION debug_dbug="+d,dict_tf_to_fsp_flags_failure";
 ALTER TABLE test_wl5522.t1 IMPORT TABLESPACE;
@@ -906,11 +915,11 @@ unlink: t1.cfg
 CREATE TABLE test_wl5522.t1 (c1 INT, c2 VARCHAR(1024), c3 BLOB) ENGINE = Innodb;
 ALTER TABLE test_wl5522.t1 DISCARD TABLESPACE;
 SELECT COUNT(*) FROM test_wl5522.t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 SET SESSION debug_dbug="+d,fsp_flags_is_valid_failure";
 ALTER TABLE test_wl5522.t1 IMPORT TABLESPACE;
-ERROR HY000: Internal error: Cannot reset LSNs in table '"test_wl5522"."t1"' : Unsupported
+ERROR HY000: Internal error: Cannot reset LSNs in table `test_wl5522`.`t1` : Unsupported
 SET SESSION debug_dbug="-d,fsp_flags_is_valid_failure";
 DROP TABLE test_wl5522.t1;
 unlink: t1.ibd
@@ -922,4 +931,8 @@ set global innodb_monitor_enable = default;
 set global innodb_monitor_disable = default;
 set global innodb_monitor_reset = default;
 set global innodb_monitor_reset_all = default;
+Warnings:
+Error	145	Table './mtr/test_suppressions' is marked as crashed and should be repaired
+Error	1194	Table 'test_suppressions' is marked as crashed and should be repaired
+Error	1034	1 client is using or hasn't closed the table properly
 SET GLOBAL INNODB_FILE_PER_TABLE=1;
diff --git a/mysql-test/suite/innodb/r/innodb-wl5522-zip.result b/mysql-test/suite/innodb/r/innodb-wl5522-zip.result
index 47413b18ce9..c97c596be67 100644
--- a/mysql-test/suite/innodb/r/innodb-wl5522-zip.result
+++ b/mysql-test/suite/innodb/r/innodb-wl5522-zip.result
@@ -1,9 +1,12 @@
+call mtr.add_suppression("InnoDB: Unable to import tablespace .* because it already exists.  Please DISCARD the tablespace before IMPORT.");
 DROP TABLE IF EXISTS t1;
 SET GLOBAL innodb_file_per_table = 1;
 SELECT @@innodb_file_per_table;
 @@innodb_file_per_table
 1
 SET GLOBAL innodb_file_format = `Barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SELECT @@innodb_file_format;
 @@innodb_file_format
 Barracuda
@@ -74,9 +77,6 @@ ALTER TABLE t1 DISCARD TABLESPACE;
 t1.frm
 ALTER TABLE t1 IMPORT TABLESPACE;
 ALTER TABLE t1 ENGINE InnoDB;
-Warnings:
-Warning	1478	InnoDB: ROW_FORMAT=COMPRESSED requires innodb_file_format > Antelope.
-Warning	1478	InnoDB: assuming ROW_FORMAT=COMPACT.
 SELECT COUNT(*) FROM t1;
 COUNT(*)
 640
@@ -112,6 +112,8 @@ SELECT @@innodb_file_per_table;
 @@innodb_file_per_table
 1
 SET GLOBAL innodb_file_format = `Barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SELECT @@innodb_file_format;
 @@innodb_file_format
 Barracuda
@@ -124,7 +126,7 @@ c1 INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
 c2 INT) ENGINE=InnoDB  ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1;
 INSERT INTO t1(c2) VALUES(1);
 ALTER TABLE t1 IMPORT TABLESPACE;
-ERROR HY000: Tablespace for table 't1' exists. Please DISCARD the tablespace before IMPORT.
+ERROR HY000: Tablespace for table 'test/t1' exists. Please DISCARD the tablespace before IMPORT.
 SELECT * FROM t1;
 c1	c2
 1	1
@@ -154,7 +156,7 @@ c1 INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
 c2 INT) ENGINE=InnoDB  ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=2;
 ALTER TABLE t1 DISCARD TABLESPACE;
 SELECT * FROM t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 t1.cfg
 t1.frm
@@ -196,7 +198,7 @@ c1 INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
 c2 INT) ENGINE=InnoDB  ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4;
 ALTER TABLE t1 DISCARD TABLESPACE;
 SELECT * FROM t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 t1.cfg
 t1.frm
@@ -236,7 +238,7 @@ c2 INT, INDEX(c2)) ENGINE=InnoDB
 ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8;
 ALTER TABLE t1 DISCARD TABLESPACE;
 SELECT * FROM t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 ALTER TABLE t1 IMPORT TABLESPACE;
 CHECK TABLE t1;
@@ -270,16 +272,16 @@ c2 INT, INDEX x(c2)) ENGINE=InnoDB
 ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=16;
 ALTER TABLE t1 DISCARD TABLESPACE;
 SELECT * FROM t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 ALTER TABLE t1 IMPORT TABLESPACE;
 ERROR HY000: Schema mismatch (Index x not found in tablespace meta-data file.)
 ALTER TABLE t1 DROP INDEX x;
 Warnings:
-Warning	1814	Tablespace has been discarded for table 't1'
+Warning	1814	Tablespace has been discarded for table `t1`
 ALTER TABLE t1 ADD INDEX idx(c2);
 Warnings:
-Warning	1814	Tablespace has been discarded for table 't1'
+Warning	1814	Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 ALTER TABLE t1 IMPORT TABLESPACE;
 CHECK TABLE t1;
@@ -326,7 +328,7 @@ t1	CREATE TABLE `t1` (
 ) ENGINE=InnoDB AUTO_INCREMENT=28 DEFAULT CHARSET=latin1
 FLUSH TABLES t1 FOR EXPORT;
 Warnings:
-Warning	1809	Table '"test"."t1"' in system tablespace
+Warning	1809	Table `test`.`t1` in system tablespace
 UNLOCK TABLES;
 DROP TABLE t1;
 SET GLOBAL innodb_file_per_table = 1;
@@ -392,7 +394,7 @@ c2 INT) ENGINE=InnoDB
 ROW_FORMAT=COMPRESSED;
 ALTER TABLE t1 DISCARD TABLESPACE;
 SELECT * FROM t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 ALTER TABLE t1 IMPORT TABLESPACE;
 ERROR HY000: Schema mismatch (Number of indexes don't match, table has 1 indexes but the tablespace meta-data file has 2 indexes)
@@ -406,7 +408,7 @@ c3 INT, INDEX idx(c2)) ENGINE=InnoDB
 ROW_FORMAT=COMPRESSED;
 ALTER TABLE t1 DISCARD TABLESPACE;
 SELECT * FROM t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 ALTER TABLE t1 IMPORT TABLESPACE;
 ERROR HY000: Schema mismatch (Number of columns don't match, table has 6 columns but the tablespace meta-data file has 5 columns)
@@ -419,7 +421,7 @@ c2 BIGINT, INDEX idx(c2)) ENGINE=InnoDB
 ROW_FORMAT=COMPRESSED;
 ALTER TABLE t1 DISCARD TABLESPACE;
 SELECT * FROM t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 ALTER TABLE t1 IMPORT TABLESPACE;
 ERROR HY000: Schema mismatch (Column c2 precise type mismatch.)
@@ -432,7 +434,7 @@ c2 INT, INDEX idx(c2)) ENGINE=InnoDB
 ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4;
 ALTER TABLE t1 DISCARD TABLESPACE;
 SELECT * FROM t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 ALTER TABLE t1 IMPORT TABLESPACE;
 ERROR HY000: Schema mismatch 
@@ -445,7 +447,7 @@ c2 INT, INDEX idx(c2)) ENGINE=InnoDB
 ROW_FORMAT=COMPRESSED;
 ALTER TABLE t1 DISCARD TABLESPACE;
 SELECT * FROM t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 ALTER TABLE t1 IMPORT TABLESPACE;
 CHECK TABLE t1;
@@ -459,7 +461,7 @@ t1	CREATE TABLE `t1` (
   `c2` int(11) DEFAULT NULL,
   PRIMARY KEY (`c1`),
   KEY `idx` (`c2`)
-) ENGINE=InnoDB AUTO_INCREMENT=44 DEFAULT CHARSET=latin1 ROW_FORMAT=COMPRESSED
+) ENGINE=InnoDB AUTO_INCREMENT=59 DEFAULT CHARSET=latin1 ROW_FORMAT=COMPRESSED
 SELECT * FROM t1;
 c1	c2
 1	1
@@ -498,6 +500,8 @@ DROP TABLE t1;
 call mtr.add_suppression("Got error -1 when reading table '.*'");
 call mtr.add_suppression("InnoDB: Error: tablespace id and flags in file '.*'.*");
 call mtr.add_suppression("InnoDB: The table .* doesn't have a corresponding tablespace, it was discarded");
-SET GLOBAL INNODB_FILE_FORMAT=Antelope;
+SET GLOBAL INNODB_FILE_FORMAT=Barracuda;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SET GLOBAL INNODB_FILE_PER_TABLE=1;
-SET SESSION innodb_strict_mode=0;
+SET SESSION innodb_strict_mode=1;
diff --git a/mysql-test/suite/innodb/r/innodb-wl5522.result b/mysql-test/suite/innodb/r/innodb-wl5522.result
index fb4ac37b9fd..b5296131106 100644
--- a/mysql-test/suite/innodb/r/innodb-wl5522.result
+++ b/mysql-test/suite/innodb/r/innodb-wl5522.result
@@ -1,9 +1,12 @@
+call mtr.add_suppression("InnoDB: Unable to import tablespace .* because it already exists.  Please DISCARD the tablespace before IMPORT.");
 DROP TABLE IF EXISTS t1;
 SET GLOBAL innodb_file_per_table = 1;
 SELECT @@innodb_file_per_table;
 @@innodb_file_per_table
 1
 SET GLOBAL innodb_file_format = `Barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SELECT @@innodb_file_format;
 @@innodb_file_format
 Barracuda
@@ -105,6 +108,8 @@ SELECT @@innodb_file_per_table;
 @@innodb_file_per_table
 1
 SET GLOBAL innodb_file_format = `Barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SELECT @@innodb_file_format;
 @@innodb_file_format
 Barracuda
@@ -113,7 +118,7 @@ c1 INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
 c2 INT) ENGINE=InnoDB;
 INSERT INTO t1(c2) VALUES(1);
 ALTER TABLE t1 IMPORT TABLESPACE;
-ERROR HY000: Tablespace for table 't1' exists. Please DISCARD the tablespace before IMPORT.
+ERROR HY000: Tablespace for table 'test/t1' exists. Please DISCARD the tablespace before IMPORT.
 SELECT * FROM t1;
 c1	c2
 1	1
@@ -143,7 +148,7 @@ c1 INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
 c2 INT) ENGINE=InnoDB;
 ALTER TABLE t1 DISCARD TABLESPACE;
 SELECT * FROM t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 t1.cfg
 t1.frm
@@ -185,7 +190,7 @@ c1 INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
 c2 INT) ENGINE=InnoDB;
 ALTER TABLE t1 DISCARD TABLESPACE;
 SELECT * FROM t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 t1.cfg
 t1.frm
@@ -223,7 +228,7 @@ c1 INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
 c2 INT, INDEX(c2)) ENGINE=InnoDB;
 ALTER TABLE t1 DISCARD TABLESPACE;
 SELECT * FROM t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 ALTER TABLE t1 IMPORT TABLESPACE;
 CHECK TABLE t1;
@@ -255,16 +260,16 @@ c1 INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
 c2 INT, INDEX x(c2)) ENGINE=InnoDB;
 ALTER TABLE t1 DISCARD TABLESPACE;
 SELECT * FROM t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 ALTER TABLE t1 IMPORT TABLESPACE;
 ERROR HY000: Schema mismatch (Index x not found in tablespace meta-data file.)
 ALTER TABLE t1 DROP INDEX x;
 Warnings:
-Warning	1814	Tablespace has been discarded for table 't1'
+Warning	1814	Tablespace has been discarded for table `t1`
 ALTER TABLE t1 ADD INDEX idx(c2);
 Warnings:
-Warning	1814	Tablespace has been discarded for table 't1'
+Warning	1814	Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 ALTER TABLE t1 IMPORT TABLESPACE;
 CHECK TABLE t1;
@@ -311,7 +316,7 @@ t1	CREATE TABLE `t1` (
 ) ENGINE=InnoDB AUTO_INCREMENT=28 DEFAULT CHARSET=latin1
 FLUSH TABLES t1 FOR EXPORT;
 Warnings:
-Warning	1809	Table '"test"."t1"' in system tablespace
+Warning	1809	Table `test`.`t1` in system tablespace
 UNLOCK TABLES;
 DROP TABLE t1;
 SET GLOBAL innodb_file_per_table = 1;
@@ -375,7 +380,7 @@ c1 INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
 c2 INT) ENGINE=InnoDB;
 ALTER TABLE t1 DISCARD TABLESPACE;
 SELECT * FROM t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 ALTER TABLE t1 IMPORT TABLESPACE;
 ERROR HY000: Schema mismatch (Number of indexes don't match, table has 1 indexes but the tablespace meta-data file has 2 indexes)
@@ -388,7 +393,7 @@ c2 INT,
 c3 INT, INDEX idx(c2)) ENGINE=InnoDB;
 ALTER TABLE t1 DISCARD TABLESPACE;
 SELECT * FROM t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 ALTER TABLE t1 IMPORT TABLESPACE;
 ERROR HY000: Schema mismatch (Number of columns don't match, table has 6 columns but the tablespace meta-data file has 5 columns)
@@ -400,7 +405,7 @@ c1 INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
 c2 BIGINT, INDEX idx(c2)) ENGINE=InnoDB;
 ALTER TABLE t1 DISCARD TABLESPACE;
 SELECT * FROM t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 ALTER TABLE t1 IMPORT TABLESPACE;
 ERROR HY000: Schema mismatch (Column c2 precise type mismatch.)
@@ -412,7 +417,7 @@ c1 INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
 c2 INT, INDEX idx(c2)) ENGINE=InnoDB;
 ALTER TABLE t1 DISCARD TABLESPACE;
 SELECT * FROM t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 ALTER TABLE t1 IMPORT TABLESPACE;
 CHECK TABLE t1;
@@ -426,7 +431,7 @@ t1	CREATE TABLE `t1` (
   `c2` int(11) DEFAULT NULL,
   PRIMARY KEY (`c1`),
   KEY `idx` (`c2`)
-) ENGINE=InnoDB AUTO_INCREMENT=44 DEFAULT CHARSET=latin1
+) ENGINE=InnoDB AUTO_INCREMENT=59 DEFAULT CHARSET=latin1
 SELECT * FROM t1;
 c1	c2
 1	1
@@ -522,7 +527,7 @@ c1 INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
 c2 INT, INDEX idx(c2)) ENGINE=InnoDB ROW_FORMAT=REDUNDANT;
 ALTER TABLE t1 DISCARD TABLESPACE;
 SELECT * FROM t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 ALTER TABLE t1 IMPORT TABLESPACE;
 CHECK TABLE t1;
@@ -536,7 +541,7 @@ t1	CREATE TABLE `t1` (
   `c2` int(11) DEFAULT NULL,
   PRIMARY KEY (`c1`),
   KEY `idx` (`c2`)
-) ENGINE=InnoDB AUTO_INCREMENT=44 DEFAULT CHARSET=latin1 ROW_FORMAT=REDUNDANT
+) ENGINE=InnoDB AUTO_INCREMENT=59 DEFAULT CHARSET=latin1 ROW_FORMAT=REDUNDANT
 SELECT * FROM t1;
 c1	c2
 1	1
@@ -577,7 +582,7 @@ c1 INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
 c2 INT, INDEX idx(c2)) ENGINE=InnoDB ROW_FORMAT=COMPACT;
 ALTER TABLE t1 DISCARD TABLESPACE;
 SELECT * FROM t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 ALTER TABLE t1 IMPORT TABLESPACE;
 ERROR HY000: Schema mismatch (Table flags don't match, server table has 0x5 and the meta-data file has 0x0)
@@ -589,7 +594,7 @@ c1 INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
 c2 INT, INDEX idx(c2)) ENGINE=InnoDB ROW_FORMAT=DYNAMIC;
 ALTER TABLE t1 DISCARD TABLESPACE;
 SELECT * FROM t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 ALTER TABLE t1 IMPORT TABLESPACE;
 ERROR HY000: Schema mismatch (Table flags don't match, server table has 0x5 and the meta-data file has 0x0)
@@ -601,7 +606,7 @@ c1 INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
 c2 INT, INDEX idx(c2)) ENGINE=InnoDB ROW_FORMAT=REDUNDANT;
 ALTER TABLE t1 DISCARD TABLESPACE;
 SELECT * FROM t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 ALTER TABLE t1 IMPORT TABLESPACE;
 unlink: t1.cfg
@@ -612,7 +617,7 @@ t1	CREATE TABLE `t1` (
   `c2` int(11) DEFAULT NULL,
   PRIMARY KEY (`c1`),
   KEY `idx` (`c2`)
-) ENGINE=InnoDB AUTO_INCREMENT=44 DEFAULT CHARSET=latin1 ROW_FORMAT=REDUNDANT
+) ENGINE=InnoDB AUTO_INCREMENT=59 DEFAULT CHARSET=latin1 ROW_FORMAT=REDUNDANT
 SELECT * FROM t1;
 c1	c2
 1	1
@@ -708,7 +713,7 @@ c1 INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
 c2 INT, INDEX idx(c2)) ENGINE=InnoDB ROW_FORMAT=COMPACT;
 ALTER TABLE t1 DISCARD TABLESPACE;
 SELECT * FROM t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 ALTER TABLE t1 IMPORT TABLESPACE;
 CHECK TABLE t1;
@@ -722,7 +727,7 @@ t1	CREATE TABLE `t1` (
   `c2` int(11) DEFAULT NULL,
   PRIMARY KEY (`c1`),
   KEY `idx` (`c2`)
-) ENGINE=InnoDB AUTO_INCREMENT=44 DEFAULT CHARSET=latin1 ROW_FORMAT=COMPACT
+) ENGINE=InnoDB AUTO_INCREMENT=59 DEFAULT CHARSET=latin1 ROW_FORMAT=COMPACT
 SELECT * FROM t1;
 c1	c2
 1	1
@@ -763,7 +768,7 @@ c1 INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
 c2 INT, INDEX idx(c2)) ENGINE=InnoDB ROW_FORMAT=REDUNDANT;
 ALTER TABLE t1 DISCARD TABLESPACE;
 SELECT * FROM t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 ALTER TABLE t1 IMPORT TABLESPACE;
 ERROR HY000: Schema mismatch (Table flags don't match, server table has 0x5 and the meta-data file has 0x1)
@@ -775,7 +780,7 @@ c1 INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
 c2 INT, INDEX idx(c2)) ENGINE=InnoDB ROW_FORMAT=DYNAMIC;
 ALTER TABLE t1 DISCARD TABLESPACE;
 SELECT * FROM t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 ALTER TABLE t1 IMPORT TABLESPACE;
 ERROR HY000: Schema mismatch (Table flags don't match, server table has 0x5 and the meta-data file has 0x1)
@@ -787,7 +792,7 @@ c1 INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
 c2 INT, INDEX idx(c2)) ENGINE=InnoDB ROW_FORMAT=COMPACT;
 ALTER TABLE t1 DISCARD TABLESPACE;
 SELECT * FROM t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 ALTER TABLE t1 IMPORT TABLESPACE;
 CHECK TABLE t1;
@@ -801,7 +806,7 @@ t1	CREATE TABLE `t1` (
   `c2` int(11) DEFAULT NULL,
   PRIMARY KEY (`c1`),
   KEY `idx` (`c2`)
-) ENGINE=InnoDB AUTO_INCREMENT=44 DEFAULT CHARSET=latin1 ROW_FORMAT=COMPACT
+) ENGINE=InnoDB AUTO_INCREMENT=59 DEFAULT CHARSET=latin1 ROW_FORMAT=COMPACT
 SELECT * FROM t1;
 c1	c2
 1	1
@@ -897,7 +902,7 @@ c1 INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
 c2 INT, INDEX idx(c2)) ENGINE=InnoDB ROW_FORMAT=DYNAMIC;
 ALTER TABLE t1 DISCARD TABLESPACE;
 SELECT * FROM t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 ALTER TABLE t1 IMPORT TABLESPACE;
 CHECK TABLE t1;
@@ -911,7 +916,7 @@ t1	CREATE TABLE `t1` (
   `c2` int(11) DEFAULT NULL,
   PRIMARY KEY (`c1`),
   KEY `idx` (`c2`)
-) ENGINE=InnoDB AUTO_INCREMENT=44 DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC
+) ENGINE=InnoDB AUTO_INCREMENT=59 DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC
 SELECT * FROM t1;
 c1	c2
 1	1
@@ -952,7 +957,7 @@ c1 INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
 c2 INT, INDEX idx(c2)) ENGINE=InnoDB ROW_FORMAT=COMPACT;
 ALTER TABLE t1 DISCARD TABLESPACE;
 SELECT * FROM t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 ALTER TABLE t1 IMPORT TABLESPACE;
 ERROR HY000: Schema mismatch (Table flags don't match, server table has 0x5 and the meta-data file has 0x21)
@@ -964,7 +969,7 @@ c1 INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
 c2 INT, INDEX idx(c2)) ENGINE=InnoDB ROW_FORMAT=REDUNDANT;
 ALTER TABLE t1 DISCARD TABLESPACE;
 SELECT * FROM t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 ALTER TABLE t1 IMPORT TABLESPACE;
 ERROR HY000: Schema mismatch (Table flags don't match, server table has 0x5 and the meta-data file has 0x21)
@@ -976,7 +981,7 @@ c1 INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
 c2 INT, INDEX idx(c2)) ENGINE=InnoDB ROW_FORMAT=DYNAMIC;
 ALTER TABLE t1 DISCARD TABLESPACE;
 SELECT * FROM t1;
-ERROR HY000: Tablespace has been discarded for table 't1'
+ERROR HY000: Tablespace has been discarded for table `t1`
 restore: t1 .ibd and .cfg files
 ALTER TABLE t1 IMPORT TABLESPACE;
 CHECK TABLE t1;
@@ -990,7 +995,7 @@ t1	CREATE TABLE `t1` (
   `c2` int(11) DEFAULT NULL,
   PRIMARY KEY (`c1`),
   KEY `idx` (`c2`)
-) ENGINE=InnoDB AUTO_INCREMENT=44 DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC
+) ENGINE=InnoDB AUTO_INCREMENT=59 DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC
 SELECT * FROM t1;
 c1	c2
 1	1
@@ -1029,5 +1034,7 @@ DROP TABLE t1;
 call mtr.add_suppression("Got error -1 when reading table '.*'");
 call mtr.add_suppression("InnoDB: Error: tablespace id and flags in file '.*'.*");
 call mtr.add_suppression("InnoDB: The table .* doesn't have a corresponding tablespace, it was discarded");
-SET GLOBAL INNODB_FILE_FORMAT=Antelope;
+SET GLOBAL INNODB_FILE_FORMAT=Barracuda;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SET GLOBAL INNODB_FILE_PER_TABLE=1;
diff --git a/mysql-test/suite/innodb/r/innodb.result b/mysql-test/suite/innodb/r/innodb.result
index 4c890ddf0c0..d278feeee11 100644
--- a/mysql-test/suite/innodb/r/innodb.result
+++ b/mysql-test/suite/innodb/r/innodb.result
@@ -947,6 +947,7 @@ desc t1;
 Field	Type	Null	Key	Default	Extra
 t	int(11)	NO	MUL	1	
 drop table t1;
+SET sql_mode = 'NO_ENGINE_SUBSTITUTION';
 CREATE TABLE t1 (
 number bigint(20) NOT NULL default '0',
 cname char(15) NOT NULL default '',
@@ -1009,6 +1010,7 @@ select * from t2;
 number	cname	carrier_id	privacy	last_mod_date	last_mod_id	last_app_date	last_app_id	version	assigned_scps	status
 333	tubs	99	2	2002-01-09 11:34:53	501	2002-01-09 11:34:53	500	3	10	0
 drop table t1,t2;
+SET sql_mode = default;
 create table t1 (id int unsigned not null auto_increment, code tinyint unsigned not null, name char(20) not null, primary key (id), key (code), unique (name)) engine=innodb;
 BEGIN;
 SET SESSION TRANSACTION ISOLATION LEVEL SERIALIZABLE;
@@ -1316,16 +1318,80 @@ INSERT INTO t2 VALUES (10, 'old'), (20, 'other');
 UPDATE t1 SET c1 = 'other' WHERE c1 = 'old';
 ERROR 23000: Foreign key constraint for table 't1', record 'other-somevalu' would lead to a duplicate entry in table 't2', key 'c1'
 DROP TABLE t2,t1;
+call mtr.add_suppression("Cannot delete/update rows with cascading foreign key constraints that exceed max depth of 255. Please drop excessive foreign constraints and try again");
 create table t1(
 id int primary key,
 pid int,
 index(pid),
 foreign key(pid) references t1(id) on delete cascade) engine=innodb;
-insert into t1 values(0,0),(1,0),(2,1),(3,2),(4,3),(5,4),(6,5),(7,6),
-(8,7),(9,8),(10,9),(11,10),(12,11),(13,12),(14,13),(15,14);
+insert into t1 values
+(  0,   0), (  1,   0), (  2,   1), (  3,   2),
+(  4,   3), (  5,   4), (  6,   5), (  7,   6),
+(  8,   7), (  9,   8), ( 10,   9), ( 11,  10),
+( 12,  11), ( 13,  12), ( 14,  13), ( 15,  14),
+( 16,  15), ( 17,  16), ( 18,  17), ( 19,  18),
+( 20,  19), ( 21,  20), ( 22,  21), ( 23,  22),
+( 24,  23), ( 25,  24), ( 26,  25), ( 27,  26),
+( 28,  27), ( 29,  28), ( 30,  29), ( 31,  30),
+( 32,  31), ( 33,  32), ( 34,  33), ( 35,  34),
+( 36,  35), ( 37,  36), ( 38,  37), ( 39,  38),
+( 40,  39), ( 41,  40), ( 42,  41), ( 43,  42),
+( 44,  43), ( 45,  44), ( 46,  45), ( 47,  46),
+( 48,  47), ( 49,  48), ( 50,  49), ( 51,  50),
+( 52,  51), ( 53,  52), ( 54,  53), ( 55,  54),
+( 56,  55), ( 57,  56), ( 58,  57), ( 59,  58),
+( 60,  59), ( 61,  60), ( 62,  61), ( 63,  62),
+( 64,  63), ( 65,  64), ( 66,  65), ( 67,  66),
+( 68,  67), ( 69,  68), ( 70,  69), ( 71,  70),
+( 72,  71), ( 73,  72), ( 74,  73), ( 75,  74),
+( 76,  75), ( 77,  76), ( 78,  77), ( 79,  78),
+( 80,  79), ( 81,  80), ( 82,  81), ( 83,  82),
+( 84,  83), ( 85,  84), ( 86,  85), ( 87,  86),
+( 88,  87), ( 89,  88), ( 90,  89), ( 91,  90),
+( 92,  91), ( 93,  92), ( 94,  93), ( 95,  94),
+( 96,  95), ( 97,  96), ( 98,  97), ( 99,  98),
+(100,  99), (101, 100), (102, 101), (103, 102),
+(104, 103), (105, 104), (106, 105), (107, 106),
+(108, 107), (109, 108), (110, 109), (111, 110),
+(112, 111), (113, 112), (114, 113), (115, 114),
+(116, 115), (117, 116), (118, 117), (119, 118),
+(120, 119), (121, 120), (122, 121), (123, 122),
+(124, 123), (125, 124), (126, 125), (127, 126),
+(128, 127), (129, 128), (130, 129), (131, 130),
+(132, 131), (133, 132), (134, 133), (135, 134),
+(136, 135), (137, 136), (138, 137), (139, 138),
+(140, 139), (141, 140), (142, 141), (143, 142),
+(144, 143), (145, 144), (146, 145), (147, 146),
+(148, 147), (149, 148), (150, 149), (151, 150),
+(152, 151), (153, 152), (154, 153), (155, 154),
+(156, 155), (157, 156), (158, 157), (159, 158),
+(160, 159), (161, 160), (162, 161), (163, 162),
+(164, 163), (165, 164), (166, 165), (167, 166),
+(168, 167), (169, 168), (170, 169), (171, 170),
+(172, 171), (173, 172), (174, 173), (175, 174),
+(176, 175), (177, 176), (178, 177), (179, 178),
+(180, 179), (181, 180), (182, 181), (183, 182),
+(184, 183), (185, 184), (186, 185), (187, 186),
+(188, 187), (189, 188), (190, 189), (191, 190),
+(192, 191), (193, 192), (194, 193), (195, 194),
+(196, 195), (197, 196), (198, 197), (199, 198),
+(200, 199), (201, 200), (202, 201), (203, 202),
+(204, 203), (205, 204), (206, 205), (207, 206),
+(208, 207), (209, 208), (210, 209), (211, 210),
+(212, 211), (213, 212), (214, 213), (215, 214),
+(216, 215), (217, 216), (218, 217), (219, 218),
+(220, 219), (221, 220), (222, 221), (223, 222),
+(224, 223), (225, 224), (226, 225), (227, 226),
+(228, 227), (229, 228), (230, 229), (231, 230),
+(232, 231), (233, 232), (234, 233), (235, 234),
+(236, 235), (237, 236), (238, 237), (239, 238),
+(240, 239), (241, 240), (242, 241), (243, 242),
+(244, 243), (245, 244), (246, 245), (247, 246),
+(248, 247), (249, 248), (250, 249), (251, 250),
+(252, 251), (253, 252), (254, 253), (255, 254);
 delete from t1 where id=0;
-ERROR 23000: Cannot delete or update a parent row: a foreign key constraint fails (`test`.`t1`, CONSTRAINT `t1_ibfk_1` FOREIGN KEY (`pid`) REFERENCES `t1` (`id`) ON DELETE CASCADE)
-delete from t1 where id=15;
+Got one of the listed errors
+delete from t1 where id=255;
 delete from t1 where id=0;
 drop table t1;
 CREATE TABLE t1 (col1 int(1))ENGINE=InnoDB;
@@ -1628,6 +1694,7 @@ a
 drop table t1;
 create table t1 (a int not null, b int not null, c blob not null, d int not null, e int, primary key (a,b,c(255),d)) engine=innodb;
 insert into t1 values (2,2,"b",2,2),(1,1,"a",1,1),(3,3,"ab",3,3);
+analyze table t1;
 select * from t1 order by a,b,c,d;
 a	b	c	d	e
 1	1	a	1	1
@@ -1689,10 +1756,10 @@ variable_value
 16384
 SELECT variable_value - @innodb_rows_deleted_orig FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_rows_deleted';
 variable_value - @innodb_rows_deleted_orig
-71
+311
 SELECT variable_value - @innodb_rows_inserted_orig FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_rows_inserted';
 variable_value - @innodb_rows_inserted_orig
-964
+1204
 SELECT variable_value - @innodb_rows_updated_orig FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_rows_updated';
 variable_value - @innodb_rows_updated_orig
 866
@@ -2259,7 +2326,7 @@ t1	CREATE TABLE `t1` (
 drop table t1;
 create table t1 (v varchar(10), c char(10)) row_format=fixed;
 Warnings:
-Warning	1478	InnoDB: assuming ROW_FORMAT=COMPACT.
+Warning	1478	InnoDB: assuming ROW_FORMAT=DYNAMIC.
 show create table t1;
 Table	Create Table
 t1	CREATE TABLE `t1` (
@@ -2287,9 +2354,16 @@ select * from t1 where a=20 and b is null;
 a	b
 20	NULL
 drop table t1;
+SET sql_mode = 'NO_ENGINE_SUBSTITUTION';
+SET GLOBAL innodb_large_prefix=OFF;
+Warnings:
+Warning	131	Using innodb_large_prefix is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 create table t1 (v varchar(65530), key(v));
 Warnings:
 Warning	1071	Specified key was too long; max key length is 767 bytes
+SET GLOBAL innodb_large_prefix=default;
+Warnings:
+Warning	131	Using innodb_large_prefix is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 drop table t1;
 create table t1 (v varchar(65536));
 Warnings:
@@ -2309,7 +2383,8 @@ t1	CREATE TABLE `t1` (
   `v` mediumtext CHARACTER SET utf8 DEFAULT NULL
 ) ENGINE=InnoDB DEFAULT CHARSET=latin1
 drop table t1;
-set storage_engine=MyISAM;
+SET sql_mode = default;
+set default_storage_engine=MyISAM;
 create table t1 (v varchar(16384)) engine=innodb;
 drop table t1;
 create table t1 (a char(1), b char(1), key(a, b)) engine=innodb;
@@ -2434,6 +2509,10 @@ t9	CREATE TABLE `t9` (
   KEY `col1` (`col1`,`col2`)
 ) ENGINE=InnoDB DEFAULT CHARSET=latin1
 drop table t1, t2, t3, t4, t5, t6, t7, t8, t9;
+SET sql_mode = 'NO_ENGINE_SUBSTITUTION';
+SET GLOBAL innodb_large_prefix=OFF;
+Warnings:
+Warning	131	Using innodb_large_prefix is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 create table t1 (col1 varchar(768), index(col1))
 character set = latin1 engine = innodb;
 Warnings:
@@ -2450,6 +2529,9 @@ create table t4 (col1 blob, index(col1(768)))
 character set = latin1 engine = innodb;
 Warnings:
 Warning	1071	Specified key was too long; max key length is 767 bytes
+SET GLOBAL innodb_large_prefix=default;
+Warnings:
+Warning	131	Using innodb_large_prefix is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 show create table t1;
 Table	Create Table
 t1	CREATE TABLE `t1` (
@@ -2457,6 +2539,9 @@ t1	CREATE TABLE `t1` (
   KEY `col1` (`col1`(767))
 ) ENGINE=InnoDB DEFAULT CHARSET=latin1
 drop table t1, t2, t3, t4;
+set global innodb_large_prefix=OFF;
+Warnings:
+Warning	131	Using innodb_large_prefix is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 create table t1 (col1 varchar(768) primary key)
 character set = latin1 engine = innodb;
 ERROR 42000: Specified key was too long; max key length is 767 bytes
@@ -2469,6 +2554,10 @@ ERROR 42000: Specified key was too long; max key length is 767 bytes
 create table t4 (col1 blob, primary key(col1(768)))
 character set = latin1 engine = innodb;
 ERROR 42000: Specified key was too long; max key length is 767 bytes
+SET sql_mode = default;
+set global innodb_large_prefix=default;
+Warnings:
+Warning	131	Using innodb_large_prefix is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 CREATE TABLE t1
 (
 id INT PRIMARY KEY
@@ -2485,7 +2574,7 @@ INSERT INTO t2 VALUES(1);
 DELETE FROM t1 WHERE id = 1;
 ERROR 23000: Cannot delete or update a parent row: a foreign key constraint fails (`test`.`t2`, CONSTRAINT `c1` FOREIGN KEY (`v`) REFERENCES `t1` (`id`))
 DROP TABLE t1;
-ERROR 23000: Cannot delete or update a parent row: a foreign key constraint fails (`test`.`t2`, CONSTRAINT `c1` FOREIGN KEY (`v`) REFERENCES `t1` (`id`))
+Got one of the listed errors
 SET FOREIGN_KEY_CHECKS=0;
 DROP TABLE t1;
 SET FOREIGN_KEY_CHECKS=1;
@@ -2608,6 +2697,7 @@ d varchar(255) character set utf8,
 e varchar(255) character set utf8,
 key (a,b,c,d,e)) engine=innodb;
 ERROR 42000: Specified key was too long; max key length is 3072 bytes
+SET sql_mode = 'NO_ENGINE_SUBSTITUTION';
 create table t1 (s1 varbinary(2),primary key (s1)) engine=innodb;
 create table t2 (s1 binary(2),primary key (s1)) engine=innodb;
 create table t3 (s1 varchar(2) binary,primary key (s1)) engine=innodb;
@@ -2723,6 +2813,7 @@ t2	CREATE TABLE `t2` (
   KEY `t2_ibfk_0` (`a`)
 ) ENGINE=InnoDB DEFAULT CHARSET=latin1
 DROP TABLE t2,t1;
+SET sql_mode = default;
 CREATE TABLE t1 (
 field1 varchar(8) NOT NULL DEFAULT '',
 field2 varchar(8) NOT NULL DEFAULT '',
@@ -3095,7 +3186,7 @@ t1	CREATE TABLE `t1` (
   CONSTRAINT `t1_t2` FOREIGN KEY (`id`) REFERENCES `t2` (`id`)
 ) ENGINE=InnoDB AUTO_INCREMENT=349 DEFAULT CHARSET=latin1
 DROP TABLE t1,t2;
-set innodb_strict_mode=on;
+SET innodb_strict_mode=ON;
 CREATE TABLE t1 (
 c01 CHAR(255), c02 CHAR(255), c03 CHAR(255), c04 CHAR(255),
 c05 CHAR(255), c06 CHAR(255), c07 CHAR(255), c08 CHAR(255),
@@ -3106,7 +3197,8 @@ c21 CHAR(255), c22 CHAR(255), c23 CHAR(255), c24 CHAR(255),
 c25 CHAR(255), c26 CHAR(255), c27 CHAR(255), c28 CHAR(255),
 c29 CHAR(255), c30 CHAR(255), c31 CHAR(255), c32 CHAR(255)
 ) ENGINE = InnoDB;
-ERROR 42000: Row size too large (> 8126). Changing some columns to TEXT or BLOB or using ROW_FORMAT=DYNAMIC or ROW_FORMAT=COMPRESSED may help. In current row format, BLOB prefix of 768 bytes is stored inline.
+ERROR 42000: Row size too large (> {checked_valid}). Changing some columns to TEXT or BLOB may help. In current row format, BLOB prefix of 0 bytes is stored inline.
+SET innodb_strict_mode=OFF;
 DROP TABLE IF EXISTS t1;
 Warnings:
 Note	1051	Unknown table 'test.t1'
diff --git a/mysql-test/suite/innodb/r/innodb_blob_truncate.result b/mysql-test/suite/innodb/r/innodb_blob_truncate.result
index 569e6b03452..a71dd7678c0 100644
--- a/mysql-test/suite/innodb/r/innodb_blob_truncate.result
+++ b/mysql-test/suite/innodb/r/innodb_blob_truncate.result
@@ -1,4 +1,6 @@
 SET GLOBAL innodb_file_format = `Barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SET GLOBAL innodb_file_per_table = ON;
 create table t1(a blob) engine=innodb key_block_size=8;
 create function generate_blob()
@@ -17,3 +19,5 @@ truncate t1;
 insert into t1 select generate_blob();
 drop table t1;
 drop function generate_blob;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
diff --git a/mysql-test/suite/innodb/r/innodb_blob_unrecoverable_crash.result b/mysql-test/suite/innodb/r/innodb_blob_unrecoverable_crash.result
deleted file mode 100644
index c467193bca7..00000000000
--- a/mysql-test/suite/innodb/r/innodb_blob_unrecoverable_crash.result
+++ /dev/null
@@ -1,24 +0,0 @@
-call mtr.add_suppression("InnoDB: The total blob data length");
-SET GLOBAL max_allowed_packet = 100*1024*1024;
-connect big_packets,localhost,root,,;
-connection big_packets;
-CREATE TABLE t1 (a BIGINT PRIMARY KEY, b LONGBLOB) ENGINE=InnoDB;
-INSERT INTO t1 (a, b) VALUES (1, '1');
-INSERT INTO t1 (a, b) VALUES (2, '2');
-INSERT INTO t1 (a, b) VALUES (3, '3');
-INSERT INTO t1 (a, b) VALUES (4, '4');
-INSERT INTO t1 (a, b) VALUES (5, '5');
-start transaction;
-INSERT INTO t1 (a, b) VALUES (6, REPEAT('a', 20*1024*1024));
-ERROR 42000: The size of BLOB/TEXT data inserted in one transaction is greater than 10% of redo log size. Increase the redo log size using innodb_log_file_size.
-connection default;
-# Quick shutdown and restart server
-connection default;
-SELECT a FROM t1;
-a
-1
-2
-3
-4
-5
-DROP TABLE t1;
diff --git a/mysql-test/suite/innodb/r/innodb_bug12400341.result b/mysql-test/suite/innodb/r/innodb_bug12400341.result
index 31a064e624d..3bb786c4654 100644
--- a/mysql-test/suite/innodb/r/innodb_bug12400341.result
+++ b/mysql-test/suite/innodb/r/innodb_bug12400341.result
@@ -1,4 +1,5 @@
 call mtr.add_suppression("InnoDB: Warning: cannot find a free slot for an undo log. Do you have too*");
+call mtr.add_suppression("\\[Warning\\] InnoDB: Cannot find a free slot for an undo log. Do you have too");
 set @old_innodb_undo_logs = @@innodb_undo_logs;
 set global innodb_undo_logs=1;
 show variables like "max_connections";
diff --git a/mysql-test/suite/innodb/r/innodb_bug12902967.result b/mysql-test/suite/innodb/r/innodb_bug12902967.result
index 5958a8dce31..e784c6b306a 100644
--- a/mysql-test/suite/innodb/r/innodb_bug12902967.result
+++ b/mysql-test/suite/innodb/r/innodb_bug12902967.result
@@ -1,6 +1,5 @@
+call mtr.add_suppression("In ALTER TABLE .* has or is referenced in foreign key constraints which are not compatible with the new table definition.");
 create table t1 (f1 integer primary key) engine innodb;
 alter table t1 add constraint c1 foreign key (f1) references t1(f1);
 ERROR HY000: Error on rename of '#sql-temporary' to './test/t1' (errno: 150 "Foreign key constraint is incorrectly formed")
-InnoDB: has or is referenced in foreign key constraints
-InnoDB: which are not compatible with the new table definition.
 drop table t1;
diff --git a/mysql-test/suite/innodb/r/innodb_bug14147491.result b/mysql-test/suite/innodb/r/innodb_bug14147491.result
index bd3c388fae1..cf960e3a6ee 100644
--- a/mysql-test/suite/innodb/r/innodb_bug14147491.result
+++ b/mysql-test/suite/innodb/r/innodb_bug14147491.result
@@ -1,31 +1,10 @@
-call mtr.add_suppression("InnoDB: Database page corruption on disk or a failed.*");
-CALL mtr.add_suppression("InnoDB: Error: Unable to read tablespace .* page no .* into the buffer pool after 100 attempts");
-CALL mtr.add_suppression("InnoDB: Warning: database page corruption or a failed");
-CALL mtr.add_suppression("InnoDB: Database page corruption on disk or a failed");
-CALL mtr.add_suppression("InnoDB: Space .* file test/t1 read of page .*");
-CALL mtr.add_suppression("InnoDB: You may have to recover from a backup.");
-CALL mtr.add_suppression("InnoDB: It is also possible that your operatingsystem has corrupted its own file cache.");
-CALL mtr.add_suppression("InnoDB: and rebooting your computer removes the error.");
-CALL mtr.add_suppression("InnoDB: If the corrupt page is an index page you can also try to");
-CALL mtr.add_suppression("InnoDB: fix the corruption by dumping, dropping, and reimporting");
-CALL mtr.add_suppression("InnoDB: the corrupt table. You can use CHECK");
-CALL mtr.add_suppression("InnoDB: TABLE to scan your table for corruption.");
-CALL mtr.add_suppression("InnoDB: See also .* about forcing recovery.");
-# Create and populate the table to be corrupted
-CREATE TABLE t1 (a INT AUTO_INCREMENT PRIMARY KEY, b TEXT) ENGINE=InnoDB;
+CREATE TABLE t1 (a INT AUTO_INCREMENT PRIMARY KEY, b TEXT) ROW_FORMAT=COMPACT ENGINE=InnoDB;
 INSERT INTO t1 (b) VALUES ('corrupt me');
 INSERT INTO t1 (b) VALUES ('corrupt me');
-# Write file to make mysql-test-run.pl expect the "crash", but don't
-# start it until it's told to
-# We give 30 seconds to do a clean shutdown because we do not want
-# to redo apply the pages of t1.ibd at the time of recovery.
-# We want SQL to initiate the first access to t1.ibd.
-# Wait until disconnected.
 # Backup the t1.ibd before corrupting
 # Corrupt the table
 Munged a string.
 Munged a string.
-# Write file to make mysql-test-run.pl start up the server again
 SET DEBUG_DBUG = '+d,innodb_page_corruption_retries';
 # Write file to make mysql-test-run.pl expect the "crash", but don't
 # start it until it's told to
@@ -34,6 +13,5 @@ SET DEBUG_DBUG = '+d,innodb_page_corruption_retries';
 SELECT * FROM t1;
 ERROR HY000: Lost connection to MySQL server during query
 # Restore the original t1.ibd
-# Write file to make mysql-test-run.pl start up the server again
 # Cleanup
 DROP TABLE t1;
diff --git a/mysql-test/suite/innodb/r/innodb_bug30423.result b/mysql-test/suite/innodb/r/innodb_bug30423.result
index d7b72b1ec2a..c7f823a06ae 100644
--- a/mysql-test/suite/innodb/r/innodb_bug30423.result
+++ b/mysql-test/suite/innodb/r/innodb_bug30423.result
@@ -48,9 +48,9 @@ ON orgs.org_id=sa_opportunities.org_id
 LEFT JOIN bug30243_2 contacts
 ON orgs.org_id=contacts.org_id ;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	orgs	index	NULL	org_id	4	NULL	128	Using index
-1	SIMPLE	sa_opportunities	ref	org_id	org_id	5	test.orgs.org_id	1	Using index
-1	SIMPLE	contacts	ref	contacts$org_id	contacts$org_id	5	test.orgs.org_id	1	Using index
+1	SIMPLE	orgs	index	NULL	org_id	4	NULL	ROWS	Using index
+1	SIMPLE	sa_opportunities	ref	org_id	org_id	5	test.orgs.org_id	ROWS	Using index
+1	SIMPLE	contacts	ref	contacts$org_id	contacts$org_id	5	test.orgs.org_id	ROWS	Using index
 select @@innodb_stats_method;
 @@innodb_stats_method
 nulls_ignored
@@ -74,9 +74,9 @@ ON orgs.org_id=sa_opportunities.org_id
 LEFT JOIN bug30243_2 contacts
 ON orgs.org_id=contacts.org_id;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	orgs	index	NULL	org_id	4	NULL	128	Using index
-1	SIMPLE	sa_opportunities	ref	org_id	org_id	5	test.orgs.org_id	1	Using index
-1	SIMPLE	contacts	ref	contacts$org_id	contacts$org_id	5	test.orgs.org_id	1	Using index
+1	SIMPLE	orgs	index	NULL	org_id	4	NULL	ROWS	Using index
+1	SIMPLE	sa_opportunities	ref	org_id	org_id	5	test.orgs.org_id	ROWS	Using index
+1	SIMPLE	contacts	ref	contacts$org_id	contacts$org_id	5	test.orgs.org_id	ROWS	Using index
 SELECT COUNT(*) FROM table_bug30423 WHERE org_id IS NULL;
 COUNT(*)
 1024
diff --git a/mysql-test/suite/innodb/r/innodb_bug34053.result b/mysql-test/suite/innodb/r/innodb_bug34053.result
index 195775f74c8..23c5b0cc2f7 100644
--- a/mysql-test/suite/innodb/r/innodb_bug34053.result
+++ b/mysql-test/suite/innodb/r/innodb_bug34053.result
@@ -1 +1 @@
-SET storage_engine=InnoDB;
+SET default_storage_engine=InnoDB;
diff --git a/mysql-test/suite/innodb/r/innodb_bug34300.result b/mysql-test/suite/innodb/r/innodb_bug34300.result
index 09fc0b44579..b168c7782bb 100644
--- a/mysql-test/suite/innodb/r/innodb_bug34300.result
+++ b/mysql-test/suite/innodb/r/innodb_bug34300.result
@@ -1,3 +1,20 @@
-ERROR 42000: The size of BLOB/TEXT data inserted in one transaction is greater than 10% of redo log size. Increase the redo log size using innodb_log_file_size.
+#
+# Bug#34300 Tinyblob & tinytext fields currupted after export/import and alter in 5.1
+#
+SET @@global.max_allowed_packet=16777216;
+connect  newconn, localhost, root,,;
+CREATE TABLE bug34300 (
+f4 TINYTEXT,
+f6 MEDIUMTEXT,
+f8 TINYBLOB
+) ENGINE=InnoDB;
+INSERT INTO bug34300 VALUES ('xxx', repeat('a', 8459264), 'zzz');
+SELECT f4, f8 FROM bug34300;
 f4	f8
+xxx	zzz
+ALTER TABLE bug34300 ADD COLUMN (f10 INT);
+SELECT f4, f8 FROM bug34300;
 f4	f8
+xxx	zzz
+# Cleanup
+DROP TABLE bug34300;
diff --git a/mysql-test/suite/innodb/r/innodb_bug46000.result b/mysql-test/suite/innodb/r/innodb_bug46000.result
index 0e3f0ef59ae..7c5ef13f3dc 100644
--- a/mysql-test/suite/innodb/r/innodb_bug46000.result
+++ b/mysql-test/suite/innodb/r/innodb_bug46000.result
@@ -6,7 +6,7 @@ show warnings;
 Level	Code	Message
 Warning	1280	Cannot Create Index with name 'GEN_CLUST_INDEX'. The name is reserved for the system default primary index.
 Error	1280	Incorrect index name 'GEN_CLUST_INDEX'
-Warning	1030	Got error -1 "Internal error < 0 (Not system error)" from storage engine InnoDB
+Warning	1030	Got error 124 "Wrong index given to function" from storage engine InnoDB
 create table bug46000(id int) engine=innodb;
 create index GEN_CLUST_INDEX on bug46000(id);
 ERROR 42000: Incorrect index name 'GEN_CLUST_INDEX'
diff --git a/mysql-test/suite/innodb/r/innodb_bug47167.result b/mysql-test/suite/innodb/r/innodb_bug47167.result
index 656a4846a52..b678046e308 100644
--- a/mysql-test/suite/innodb/r/innodb_bug47167.result
+++ b/mysql-test/suite/innodb/r/innodb_bug47167.result
@@ -1,19 +1,25 @@
 set @old_innodb_file_format_max=@@innodb_file_format_max;
 select @old_innodb_file_format_max;
 @old_innodb_file_format_max
-Antelope
+Barracuda
 set global innodb_file_format_max = Barracuda;
+Warnings:
+Warning	131	Using innodb_file_format_max is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 select @@innodb_file_format_max;
 @@innodb_file_format_max
 Barracuda
 set global innodb_file_format_max = DEFAULT;
+Warnings:
+Warning	131	Using innodb_file_format_max is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 select @@innodb_file_format_max;
 @@innodb_file_format_max
 Antelope
 set global innodb_file_format_max = @old_innodb_file_format_max;
+Warnings:
+Warning	131	Using innodb_file_format_max is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 select @@innodb_file_format_max;
 @@innodb_file_format_max
-Antelope
+Barracuda
 set global innodb_file_format_max = cheetah;
 ERROR 42000: Variable 'innodb_file_format_max' can't be set to the value of 'cheetah'
 set global innodb_file_format_max = Bear;
@@ -22,3 +28,5 @@ set global innodb_file_format_max = on;
 ERROR 42000: Variable 'innodb_file_format_max' can't be set to the value of 'ON'
 set global innodb_file_format_max = off;
 ERROR 42000: Variable 'innodb_file_format_max' can't be set to the value of 'off'
+Warnings:
+Warning	131	Using innodb_file_format_max is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
diff --git a/mysql-test/suite/innodb/r/innodb_bug54044.result b/mysql-test/suite/innodb/r/innodb_bug54044.result
index d80c451c841..1c34ea9de1d 100644
--- a/mysql-test/suite/innodb/r/innodb_bug54044.result
+++ b/mysql-test/suite/innodb/r/innodb_bug54044.result
@@ -19,5 +19,5 @@ CREATE TABLE t1 (a VARCHAR(3)) ENGINE=InnoDB;
 INSERT INTO t1 VALUES ('foo'),('bar');
 FLUSH TABLES;
 CREATE TEMPORARY TABLE tmp ENGINE=InnoDB AS SELECT VALUES(a) FROM t1;
-ERROR HY000: Can't create table `test`.`tmp` (errno: -1 "Internal error < 0 (Not system error)")
+ERROR HY000: Can't create table `test`.`tmp` (errno: 168 "Unknown (generic) error from engine")
 DROP TABLE t1;
diff --git a/mysql-test/suite/innodb/r/innodb_bug60049.result b/mysql-test/suite/innodb/r/innodb_bug60049.result
index 8e3be130e48..f34ebc0a955 100644
--- a/mysql-test/suite/innodb/r/innodb_bug60049.result
+++ b/mysql-test/suite/innodb/r/innodb_bug60049.result
@@ -1,5 +1,3 @@
-call mtr.add_suppression('InnoDB: Error: Table "mysql"."innodb_(table|index)_stats" not found');
-call mtr.add_suppression('InnoDB: Error: Fetch of persistent statistics requested');
 CREATE TABLE t(a INT)ENGINE=InnoDB STATS_PERSISTENT=0;
 RENAME TABLE t TO u;
 DROP TABLE u;
diff --git a/mysql-test/suite/innodb/r/innodb_corrupt_bit.result b/mysql-test/suite/innodb/r/innodb_corrupt_bit.result
index 0ef6f65d0ff..d614ebff751 100644
--- a/mysql-test/suite/innodb/r/innodb_corrupt_bit.result
+++ b/mysql-test/suite/innodb/r/innodb_corrupt_bit.result
@@ -1,48 +1,74 @@
+set names utf8;
+CREATE TABLE corrupt_bit_test_ā(
+a INT AUTO_INCREMENT PRIMARY KEY,
+b CHAR(100),
+c INT,
+z INT,
+INDEX idx(b))
+ENGINE=InnoDB STATS_PERSISTENT=0;
+INSERT INTO corrupt_bit_test_ā VALUES(0,'x',1, 1);
+CREATE UNIQUE INDEX idxā ON corrupt_bit_test_ā(c, b);
+CREATE UNIQUE INDEX idxē ON corrupt_bit_test_ā(z, b);
+SELECT * FROM corrupt_bit_test_ā;
 a	b	c	z
 1	x	1	1
+INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+1,z+1 FROM corrupt_bit_test_ā;
+select count(*) from corrupt_bit_test_ā;
 count(*)
 2
+SET SESSION debug="+d,dict_set_index_corrupted";
+Warnings:
+Warning	1287	'@@debug' is deprecated and will be removed in a future release. Please use '@@debug_dbug' instead
+check table corrupt_bit_test_ā;
 Table	Op	Msg_type	Msg_text
-test.corrupt_bit_test_ā	check	Warning	InnoDB: Index "idx" is marked as corrupted
-test.corrupt_bit_test_ā	check	Warning	InnoDB: Index "idxā" is marked as corrupted
-test.corrupt_bit_test_ā	check	Warning	InnoDB: Index "idxē" is marked as corrupted
+test.corrupt_bit_test_ā	check	Warning	InnoDB: Index idx is marked as corrupted
+test.corrupt_bit_test_ā	check	Warning	InnoDB: Index idxā is marked as corrupted
+test.corrupt_bit_test_ā	check	Warning	InnoDB: Index idxē is marked as corrupted
 test.corrupt_bit_test_ā	check	error	Corrupt
-ERROR HY000: Index "idx" is corrupted
-ERROR HY000: Index "idx" is corrupted
+SET SESSION debug="-d,dict_set_index_corrupted";
+Warnings:
+Warning	1287	'@@debug' is deprecated and will be removed in a future release. Please use '@@debug_dbug' instead
+CREATE INDEX idx3 ON corrupt_bit_test_ā(b, c);
+ERROR HY000: Index idx is corrupted
+CREATE INDEX idx4 ON corrupt_bit_test_ā(b, z);
+ERROR HY000: Index idx is corrupted
+select c from corrupt_bit_test_ā;
 ERROR HY000: Index corrupt_bit_test_ā is corrupted
+select z from corrupt_bit_test_ā;
 ERROR HY000: Index corrupt_bit_test_ā is corrupted
+show warnings;
 Level	Code	Message
-Warning	180	InnoDB: Index "idxē" for table "test"."corrupt_bit_test_ā" is marked as corrupted
+Warning	180	InnoDB: Index idxē for table `test`.`corrupt_bit_test_ā` is marked as corrupted
 Error	1712	Index corrupt_bit_test_ā is corrupted
+insert into corrupt_bit_test_ā values (10001, "a", 20001, 20001);
+select * from corrupt_bit_test_ā use index(primary) where a = 10001;
 a	b	c	z
 10001	a	20001	20001
+begin;
+insert into corrupt_bit_test_ā values (10002, "a", 20002, 20002);
+delete from corrupt_bit_test_ā where a = 10001;
+insert into corrupt_bit_test_ā values (10001, "a", 20001, 20001);
+rollback;
+drop index idxā on corrupt_bit_test_ā;
+check table corrupt_bit_test_ā;
 Table	Op	Msg_type	Msg_text
-test.corrupt_bit_test_ā	check	Warning	InnoDB: Index "idx" is marked as corrupted
-test.corrupt_bit_test_ā	check	Warning	InnoDB: Index "idxē" is marked as corrupted
+test.corrupt_bit_test_ā	check	Warning	InnoDB: Index idx is marked as corrupted
+test.corrupt_bit_test_ā	check	Warning	InnoDB: Index idxē is marked as corrupted
 test.corrupt_bit_test_ā	check	error	Corrupt
+set names utf8;
+select z from corrupt_bit_test_ā;
 ERROR HY000: Index corrupt_bit_test_ā is corrupted
-Table	Create Table
-corrupt_bit_test_ā	CREATE TABLE `corrupt_bit_test_ā` (
-  `a` int(11) NOT NULL AUTO_INCREMENT,
-  `b` char(100) DEFAULT NULL,
-  `c` int(11) DEFAULT NULL,
-  `z` int(11) DEFAULT NULL,
-  PRIMARY KEY (`a`),
-  UNIQUE KEY `idxē` (`z`,`b`),
-  KEY `idx` (`b`)
-) ENGINE=InnoDB AUTO_INCREMENT=10003 DEFAULT CHARSET=latin1
-ERROR HY000: Index "idx" is corrupted
-ERROR HY000: Index "idx" is corrupted
-Table	Create Table
-corrupt_bit_test_ā	CREATE TABLE `corrupt_bit_test_ā` (
-  `a` int(11) NOT NULL AUTO_INCREMENT,
-  `b` char(100) DEFAULT NULL,
-  `c` int(11) DEFAULT NULL,
-  `z` int(11) DEFAULT NULL,
-  PRIMARY KEY (`a`),
-  KEY `idx` (`b`)
-) ENGINE=InnoDB AUTO_INCREMENT=10003 DEFAULT CHARSET=latin1
+drop index idxē on corrupt_bit_test_ā;
+CREATE INDEX idx3 ON corrupt_bit_test_ā(b, c);
+ERROR HY000: Index idx is corrupted
+CREATE INDEX idx4 ON corrupt_bit_test_ā(b, z);
+ERROR HY000: Index idx is corrupted
+drop index idx on corrupt_bit_test_ā;
+CREATE INDEX idx3 ON corrupt_bit_test_ā(b, c);
+CREATE INDEX idx4 ON corrupt_bit_test_ā(b, z);
+select z from corrupt_bit_test_ā limit 10;
 z
 20001
 1
 2
+drop table corrupt_bit_test_ā;
diff --git a/mysql-test/suite/innodb/r/innodb_file_format.result b/mysql-test/suite/innodb/r/innodb_file_format.result
index 77328a360a9..e489911afb5 100644
--- a/mysql-test/suite/innodb/r/innodb_file_format.result
+++ b/mysql-test/suite/innodb/r/innodb_file_format.result
@@ -1,38 +1,50 @@
 select @@innodb_file_format;
 @@innodb_file_format
-Antelope
+Barracuda
 select @@innodb_file_format_check;
 @@innodb_file_format_check
 1
 select @@innodb_file_format_max;
 @@innodb_file_format_max
-Antelope
+Barracuda
 set global innodb_file_format=antelope;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 set global innodb_file_format=barracuda;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 set global innodb_file_format=cheetah;
 ERROR 42000: Variable 'innodb_file_format' can't be set to the value of 'cheetah'
 select @@innodb_file_format;
 @@innodb_file_format
 Barracuda
 set global innodb_file_format=default;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 select @@innodb_file_format;
 @@innodb_file_format
-Antelope
+Barracuda
 set global innodb_file_format=on;
 ERROR 42000: Variable 'innodb_file_format' can't be set to the value of 'ON'
 set global innodb_file_format=off;
 ERROR 42000: Variable 'innodb_file_format' can't be set to the value of 'off'
 select @@innodb_file_format;
 @@innodb_file_format
-Antelope
+Barracuda
 set global innodb_file_format_max=antelope;
+Warnings:
+Warning	131	Using innodb_file_format_max is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 set global innodb_file_format_max=barracuda;
+Warnings:
+Warning	131	Using innodb_file_format_max is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 set global innodb_file_format_max=cheetah;
 ERROR 42000: Variable 'innodb_file_format_max' can't be set to the value of 'cheetah'
 select @@innodb_file_format_max;
 @@innodb_file_format_max
 Barracuda
 set global innodb_file_format_max=default;
+Warnings:
+Warning	131	Using innodb_file_format_max is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 select @@innodb_file_format_max;
 @@innodb_file_format_max
 Antelope
@@ -44,5 +56,11 @@ select @@innodb_file_format_max;
 @@innodb_file_format_max
 Antelope
 set global innodb_file_format_max=antelope;
+Warnings:
+Warning	131	Using innodb_file_format_max is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 set global innodb_file_format_check=off;
 ERROR HY000: Variable 'innodb_file_format_check' is a read only variable
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+Warnings:
+Warning	131	Using innodb_file_format_max is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
diff --git a/mysql-test/suite/innodb/r/innodb_gis.result b/mysql-test/suite/innodb/r/innodb_gis.result
index fc13ba1ab2c..5a978ea303c 100644
--- a/mysql-test/suite/innodb/r/innodb_gis.result
+++ b/mysql-test/suite/innodb/r/innodb_gis.result
@@ -598,4 +598,4 @@ create table t1 (a int not null, b linestring not null, unique key b (b(12)));
 create unique index a on t1(a);
 drop table t1;
 create table t1 (g geometry not null, spatial gk(g)) engine=innodb;
-ERROR HY000: The storage engine InnoDB doesn't support SPATIAL indexes
+drop table t1;
diff --git a/mysql-test/suite/innodb/r/innodb_information_schema.result b/mysql-test/suite/innodb/r/innodb_information_schema.result
index 1b83bc29493..15c3af325ad 100644
--- a/mysql-test/suite/innodb/r/innodb_information_schema.result
+++ b/mysql-test/suite/innodb/r/innodb_information_schema.result
@@ -47,7 +47,7 @@ trx_adaptive_hash_timeout	bigint(21) unsigned	NO		0
 trx_is_read_only	int(1)	NO		0	
 trx_autocommit_non_locking	int(1)	NO		0	
 trx_state	trx_weight	trx_tables_in_use	trx_tables_locked	trx_rows_locked	trx_rows_modified	trx_concurrency_tickets	trx_isolation_level	trx_unique_checks	trx_foreign_key_checks
-RUNNING	4	0	0	7	1	0	REPEATABLE READ	1	1
+RUNNING	4	0	1	7	1	0	REPEATABLE READ	1	1
 trx_isolation_level	trx_unique_checks	trx_foreign_key_checks
 SERIALIZABLE	0	0
 trx_state	trx_isolation_level	trx_last_foreign_key_error
diff --git a/mysql-test/suite/innodb/r/innodb_information_schema_buffer.result b/mysql-test/suite/innodb/r/innodb_information_schema_buffer.result
index dcdf3082067..6328458d46e 100644
--- a/mysql-test/suite/innodb/r/innodb_information_schema_buffer.result
+++ b/mysql-test/suite/innodb/r/innodb_information_schema_buffer.result
@@ -24,6 +24,7 @@ WHERE TABLE_NAME like "%infoschema_buffer_test%"
 and PAGE_STATE="file_page" and INDEX_NAME = "idx" and PAGE_TYPE="index";
 TABLE_NAME	INDEX_NAME	NUMBER_RECORDS	DATA_SIZE	PAGE_STATE	PAGE_TYPE
 `test`.`infoschema_buffer_test`	idx	2	32	FILE_PAGE	INDEX
+`test`.`infoschema_buffer_test`	idx	2	32	FILE_PAGE	INDEX
 DROP TABLE infoschema_buffer_test;
 SELECT TABLE_NAME, INDEX_NAME, NUMBER_RECORDS, DATA_SIZE, PAGE_STATE, PAGE_TYPE
 FROM INFORMATION_SCHEMA.INNODB_BUFFER_PAGE
diff --git a/mysql-test/suite/innodb/r/innodb_monitor.result b/mysql-test/suite/innodb/r/innodb_monitor.result
index 8c580348e1a..bda1462ed33 100644
--- a/mysql-test/suite/innodb/r/innodb_monitor.result
+++ b/mysql-test/suite/innodb/r/innodb_monitor.result
@@ -4,7 +4,6 @@ name	status
 metadata_table_handles_opened	disabled
 metadata_table_handles_closed	disabled
 metadata_table_reference_count	disabled
-metadata_mem_pool_size	disabled
 lock_deadlocks	disabled
 lock_timeouts	disabled
 lock_rec_lock_waits	disabled
@@ -47,7 +46,6 @@ buffer_data_written	disabled
 buffer_flush_batch_scanned	disabled
 buffer_flush_batch_num_scan	disabled
 buffer_flush_batch_scanned_per_call	disabled
-buffer_flush_batch_rescan	disabled
 buffer_flush_batch_total_pages	disabled
 buffer_flush_batches	disabled
 buffer_flush_batch_pages	disabled
@@ -55,6 +53,19 @@ buffer_flush_neighbor_total_pages	disabled
 buffer_flush_neighbor	disabled
 buffer_flush_neighbor_pages	disabled
 buffer_flush_n_to_flush_requested	disabled
+buffer_flush_n_to_flush_by_age	disabled
+buffer_flush_adaptive_avg_time_slot	disabled
+buffer_LRU_batch_flush_avg_time_slot	disabled
+buffer_flush_adaptive_avg_time_thread	disabled
+buffer_LRU_batch_flush_avg_time_thread	disabled
+buffer_flush_adaptive_avg_time_est	disabled
+buffer_LRU_batch_flush_avg_time_est	disabled
+buffer_flush_avg_time	disabled
+buffer_flush_adaptive_avg_pass	disabled
+buffer_LRU_batch_flush_avg_pass	disabled
+buffer_flush_avg_pass	disabled
+buffer_LRU_get_free_loops	disabled
+buffer_LRU_get_free_waits	disabled
 buffer_flush_avg_page_rate	disabled
 buffer_flush_lsn_avg_rate	disabled
 buffer_flush_pct_for_dirty	disabled
@@ -157,12 +168,13 @@ log_lsn_checkpoint_age	disabled
 log_lsn_buf_pool_oldest	disabled
 log_max_modified_age_async	disabled
 log_max_modified_age_sync	disabled
-log_pending_log_writes	disabled
+log_pending_log_flushes	disabled
 log_pending_checkpoint_writes	disabled
 log_num_log_io	disabled
 log_waits	disabled
 log_write_requests	disabled
 log_writes	disabled
+log_padded	disabled
 compress_pages_compressed	disabled
 compress_pages_decompressed	disabled
 compression_pad_increments	disabled
@@ -223,10 +235,13 @@ innodb_dblwr_pages_written	disabled
 innodb_page_size	disabled
 innodb_rwlock_s_spin_waits	disabled
 innodb_rwlock_x_spin_waits	disabled
+innodb_rwlock_sx_spin_waits	disabled
 innodb_rwlock_s_spin_rounds	disabled
 innodb_rwlock_x_spin_rounds	disabled
+innodb_rwlock_sx_spin_rounds	disabled
 innodb_rwlock_s_os_waits	disabled
 innodb_rwlock_x_os_waits	disabled
+innodb_rwlock_sx_os_waits	disabled
 dml_reads	disabled
 dml_inserts	disabled
 dml_deletes	disabled
@@ -239,6 +254,8 @@ ddl_background_drop_indexes	disabled
 ddl_background_drop_tables	disabled
 ddl_online_create_index	disabled
 ddl_pending_alter_table	disabled
+ddl_sort_file_alter_table	disabled
+ddl_log_file_alter_table	disabled
 icp_attempts	disabled
 icp_no_match	disabled
 icp_out_of_range	disabled
@@ -280,10 +297,13 @@ lock_row_lock_waits	disabled
 lock_row_lock_time_avg	disabled
 innodb_rwlock_s_spin_waits	disabled
 innodb_rwlock_x_spin_waits	disabled
+innodb_rwlock_sx_spin_waits	disabled
 innodb_rwlock_s_spin_rounds	disabled
 innodb_rwlock_x_spin_rounds	disabled
+innodb_rwlock_sx_spin_rounds	disabled
 innodb_rwlock_s_os_waits	disabled
 innodb_rwlock_x_os_waits	disabled
+innodb_rwlock_sx_os_waits	disabled
 set global innodb_monitor_enable = "%lock*";
 ERROR 42000: Variable 'innodb_monitor_enable' can't be set to the value of '%lock*'
 set global innodb_monitor_enable="%%%%%%%%%%%%%%%%%%%%%%%%%%%";
@@ -408,7 +428,6 @@ name	max_count	min_count	count	max_count_reset	min_count_reset	count_reset	statu
 metadata_table_handles_opened	2	NULL	2	2	NULL	2	enabled
 metadata_table_handles_closed	1	NULL	1	1	NULL	1	enabled
 metadata_table_reference_count	NULL	NULL	0	NULL	NULL	0	disabled
-metadata_mem_pool_size	NULL	NULL	0	NULL	NULL	0	disabled
 set global innodb_monitor_disable = module_metadata;
 set global innodb_monitor_reset = module_metadata;
 select name, max_count, min_count, count,
@@ -419,7 +438,6 @@ name	max_count	min_count	count	max_count_reset	min_count_reset	count_reset	statu
 metadata_table_handles_opened	2	NULL	2	NULL	NULL	0	disabled
 metadata_table_handles_closed	1	NULL	1	NULL	NULL	0	disabled
 metadata_table_reference_count	NULL	NULL	0	NULL	NULL	0	disabled
-metadata_mem_pool_size	NULL	NULL	0	NULL	NULL	0	disabled
 set global innodb_monitor_reset_all = module_metadata;
 select name, max_count, min_count, count,
 max_count_reset, min_count_reset, count_reset, status
@@ -429,7 +447,6 @@ name	max_count	min_count	count	max_count_reset	min_count_reset	count_reset	statu
 metadata_table_handles_opened	NULL	NULL	0	NULL	NULL	0	disabled
 metadata_table_handles_closed	NULL	NULL	0	NULL	NULL	0	disabled
 metadata_table_reference_count	NULL	NULL	0	NULL	NULL	0	disabled
-metadata_mem_pool_size	NULL	NULL	0	NULL	NULL	0	disabled
 set global innodb_monitor_enable = module_trx;
 begin;
 insert into monitor_test values(9);
diff --git a/mysql-test/suite/innodb/r/innodb_mysql.result b/mysql-test/suite/innodb/r/innodb_mysql.result
index 900e3814e49..a057719d18b 100644
--- a/mysql-test/suite/innodb/r/innodb_mysql.result
+++ b/mysql-test/suite/innodb/r/innodb_mysql.result
@@ -1,5 +1,9 @@
 set global innodb_support_xa=default;
+Warnings:
+Warning	131	Using innodb_support_xa is deprecated and the parameter may be removed in future releases.
 set session innodb_support_xa=default;
+Warnings:
+Warning	131	Using innodb_support_xa is deprecated and the parameter may be removed in future releases.
 SET SESSION DEFAULT_STORAGE_ENGINE = InnoDB;
 drop table if exists t1,t2,t3,t1m,t1i,t2m,t2i,t4;
 drop procedure if exists p1;
diff --git a/mysql-test/suite/innodb/r/innodb_prefix_index_restart_server.result b/mysql-test/suite/innodb/r/innodb_prefix_index_restart_server.result
index cf5d77a679c..5ae138477b9 100644
--- a/mysql-test/suite/innodb/r/innodb_prefix_index_restart_server.result
+++ b/mysql-test/suite/innodb/r/innodb_prefix_index_restart_server.result
@@ -1,6 +1,10 @@
 set global innodb_file_format="Barracuda";
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 set global innodb_file_per_table=1;
 set global innodb_large_prefix=1;
+Warnings:
+Warning	131	Using innodb_large_prefix is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 DROP TABLE IF EXISTS worklog5743;
 CREATE TABLE worklog5743 (
 col_1_text TEXT(4000) , col_2_text TEXT(4000) ,
@@ -88,6 +92,10 @@ worklog5743;
 col_1_text = REPEAT("a", 3500)	col_2_text  = REPEAT("o", 3500)
 1	1
 DROP TABLE worklog5743;
-SET GLOBAL innodb_file_format=Antelope;
+SET GLOBAL innodb_file_format=Barracuda;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SET GLOBAL innodb_file_per_table=1;
-SET GLOBAL innodb_large_prefix=0;
+SET GLOBAL innodb_large_prefix=1;
+Warnings:
+Warning	131	Using innodb_large_prefix is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
diff --git a/mysql-test/suite/innodb/r/innodb_simulate_comp_failures.result b/mysql-test/suite/innodb/r/innodb_simulate_comp_failures.result
index c2a8ba1f4db..f35e4159603 100644
--- a/mysql-test/suite/innodb/r/innodb_simulate_comp_failures.result
+++ b/mysql-test/suite/innodb/r/innodb_simulate_comp_failures.result
@@ -2,7 +2,16 @@
 # Testing robustness against random compression failures
 #
 CREATE TABLE t1(id INT AUTO_INCREMENT PRIMARY KEY, msg VARCHAR(255), KEY msg_i(msg)) ENGINE=INNODB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8;
+SHOW CREATE TABLE t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `id` int(11) NOT NULL AUTO_INCREMENT,
+  `msg` varchar(255) DEFAULT NULL,
+  PRIMARY KEY (`id`),
+  KEY `msg_i` (`msg`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8
 SET GLOBAL innodb_simulate_comp_failures = 25;
-SELECT COUNT(*) FROM t1;
-COUNT(*)
+COMMIT;
+SELECT COUNT(id) FROM t1;
+COUNT(id)
 1500
diff --git a/mysql-test/suite/innodb/r/innodb_simulate_comp_failures_small.result b/mysql-test/suite/innodb/r/innodb_simulate_comp_failures_small.result
index 92ae1637ad5..099c673bca7 100644
--- a/mysql-test/suite/innodb/r/innodb_simulate_comp_failures_small.result
+++ b/mysql-test/suite/innodb/r/innodb_simulate_comp_failures_small.result
@@ -2,7 +2,16 @@
 # Testing robustness against random compression failures
 #
 CREATE TABLE t1(id INT AUTO_INCREMENT PRIMARY KEY, msg VARCHAR(255), KEY msg_i(msg)) ENGINE=INNODB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8;
+SHOW CREATE TABLE t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `id` int(11) NOT NULL AUTO_INCREMENT,
+  `msg` varchar(255) DEFAULT NULL,
+  PRIMARY KEY (`id`),
+  KEY `msg_i` (`msg`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8
 SET GLOBAL innodb_simulate_comp_failures = 25;
-SELECT COUNT(*) FROM t1;
-COUNT(*)
+COMMIT;
+SELECT COUNT(id) FROM t1;
+COUNT(id)
 1000
diff --git a/mysql-test/suite/innodb/r/innodb_skip_innodb_is_tables.result b/mysql-test/suite/innodb/r/innodb_skip_innodb_is_tables.result
index 3280726729f..4dce5d41885 100644
--- a/mysql-test/suite/innodb/r/innodb_skip_innodb_is_tables.result
+++ b/mysql-test/suite/innodb/r/innodb_skip_innodb_is_tables.result
@@ -39,7 +39,6 @@ NAME	SUBSYSTEM	COUNT	MAX_COUNT	MIN_COUNT	AVG_COUNT	COUNT_RESET	MAX_COUNT_RESET	M
 metadata_table_handles_opened	metadata	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	counter	Number of table handles opened
 metadata_table_handles_closed	metadata	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	counter	Number of table handles closed
 metadata_table_reference_count	metadata	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	counter	Table reference counter
-metadata_mem_pool_size	metadata	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	value	Size of a memory pool InnoDB uses to store data dictionary and internal data structures in bytes
 lock_deadlocks	lock	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	counter	Number of deadlocks
 lock_timeouts	lock	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	counter	Number of lock timeouts
 lock_rec_lock_waits	lock	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	counter	Number of times enqueued into record lock wait queue
@@ -82,7 +81,6 @@ buffer_data_written	buffer	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL
 buffer_flush_batch_scanned	buffer	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	set_owner	Total pages scanned as part of flush batch
 buffer_flush_batch_num_scan	buffer	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	set_member	Number of times buffer flush list flush is called
 buffer_flush_batch_scanned_per_call	buffer	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	set_member	Pages scanned per flush batch scan
-buffer_flush_batch_rescan	buffer	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	counter	Number of times rescan of flush list forced
 buffer_flush_batch_total_pages	buffer	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	set_owner	Total pages flushed as part of flush batch
 buffer_flush_batches	buffer	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	set_member	Number of flush batches
 buffer_flush_batch_pages	buffer	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	set_member	Pages queued as a flush batch
@@ -90,6 +88,19 @@ buffer_flush_neighbor_total_pages	buffer	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL
 buffer_flush_neighbor	buffer	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	set_member	Number of times neighbors flushing is invoked
 buffer_flush_neighbor_pages	buffer	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	set_member	Pages queued as a neighbor batch
 buffer_flush_n_to_flush_requested	buffer	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	counter	Number of pages requested for flushing.
+buffer_flush_n_to_flush_by_age	buffer	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	counter	Number of pages target by LSN Age for flushing.
+buffer_flush_adaptive_avg_time_slot	buffer	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	counter	Avg time (ms) spent for adaptive flushing recently per slot.
+buffer_LRU_batch_flush_avg_time_slot	buffer	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	counter	Avg time (ms) spent for LRU batch flushing recently per slot.
+buffer_flush_adaptive_avg_time_thread	buffer	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	counter	Avg time (ms) spent for adaptive flushing recently per thread.
+buffer_LRU_batch_flush_avg_time_thread	buffer	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	counter	Avg time (ms) spent for LRU batch flushing recently per thread.
+buffer_flush_adaptive_avg_time_est	buffer	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	counter	Estimated time (ms) spent for adaptive flushing recently.
+buffer_LRU_batch_flush_avg_time_est	buffer	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	counter	Estimated time (ms) spent for LRU batch flushing recently.
+buffer_flush_avg_time	buffer	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	counter	Avg time (ms) spent for flushing recently.
+buffer_flush_adaptive_avg_pass	buffer	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	counter	Numner of adaptive flushes passed during the recent Avg period.
+buffer_LRU_batch_flush_avg_pass	buffer	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	counter	Number of LRU batch flushes passed during the recent Avg period.
+buffer_flush_avg_pass	buffer	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	counter	Number of flushes passed during the recent Avg period.
+buffer_LRU_get_free_loops	buffer	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	counter	Total loops in LRU get free.
+buffer_LRU_get_free_waits	buffer	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	counter	Total sleep waits in LRU get free.
 buffer_flush_avg_page_rate	buffer	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	counter	Average number of pages at which flushing is happening
 buffer_flush_lsn_avg_rate	buffer	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	counter	Average redo generation rate
 buffer_flush_pct_for_dirty	buffer	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	counter	Percent of IO capacity used to avoid max dirty page limit
@@ -165,7 +176,7 @@ os_log_bytes_written	os	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	di
 os_log_fsyncs	os	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	status_counter	Number of fsync log writes (innodb_os_log_fsyncs)
 os_log_pending_fsyncs	os	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	status_counter	Number of pending fsync write (innodb_os_log_pending_fsyncs)
 os_log_pending_writes	os	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	status_counter	Number of pending log file writes (innodb_os_log_pending_writes)
-trx_rw_commits	transaction	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	counter	Number of read-write transactions committed
+trx_rw_commits	transaction	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	counter	Number of read-write transactions  committed
 trx_ro_commits	transaction	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	counter	Number of read-only transactions committed
 trx_nl_ro_commits	transaction	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	counter	Number of non-locking auto-commit read-only transactions committed
 trx_commits_insert_update	transaction	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	counter	Number of transactions committed with inserts and updates
@@ -178,7 +189,7 @@ trx_undo_slots_used	transaction	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL
 trx_undo_slots_cached	transaction	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	counter	Number of undo slots cached
 trx_rseg_current_size	transaction	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	value	Current rollback segment size in pages
 purge_del_mark_records	purge	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	counter	Number of delete-marked rows purged
-purge_upd_exist_or_extern_records	purge	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	counter	Number of purges on updates of existing records and  updates on delete marked record with externally stored field
+purge_upd_exist_or_extern_records	purge	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	counter	Number of purges on updates of existing records and updates on delete marked record with externally stored field
 purge_invoked	purge	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	counter	Number of times purge was invoked
 purge_undo_log_pages	purge	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	counter	Number of undo log pages handled by the purge
 purge_dml_delay_usec	purge	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	value	Microseconds DML to be delayed due to purge lagging
@@ -192,12 +203,13 @@ log_lsn_checkpoint_age	recovery	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL
 log_lsn_buf_pool_oldest	recovery	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	value	The oldest modified block LSN in the buffer pool
 log_max_modified_age_async	recovery	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	value	Maximum LSN difference; when exceeded, start asynchronous preflush
 log_max_modified_age_sync	recovery	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	value	Maximum LSN difference; when exceeded, start synchronous preflush
-log_pending_log_writes	recovery	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	counter	Pending log writes
+log_pending_log_flushes	recovery	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	counter	Pending log flushes
 log_pending_checkpoint_writes	recovery	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	counter	Pending checkpoints
 log_num_log_io	recovery	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	counter	Number of log I/Os
 log_waits	recovery	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	status_counter	Number of log waits due to small log buffer (innodb_log_waits)
 log_write_requests	recovery	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	status_counter	Number of log write requests (innodb_log_write_requests)
 log_writes	recovery	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	status_counter	Number of log writes (innodb_log_writes)
+log_padded	recovery	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	status_counter	Bytes of log padded for log write ahead
 compress_pages_compressed	compression	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	counter	Number of pages compressed
 compress_pages_decompressed	compression	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	counter	Number of pages decompressed
 compression_pad_increments	compression	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	counter	Number of times padding is incremented to avoid compression failures
@@ -258,10 +270,13 @@ innodb_dblwr_pages_written	server	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NU
 innodb_page_size	server	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	value	InnoDB page size in bytes (innodb_page_size)
 innodb_rwlock_s_spin_waits	server	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	status_counter	Number of rwlock spin waits due to shared latch request
 innodb_rwlock_x_spin_waits	server	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	status_counter	Number of rwlock spin waits due to exclusive latch request
+innodb_rwlock_sx_spin_waits	server	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	status_counter	Number of rwlock spin waits due to sx latch request
 innodb_rwlock_s_spin_rounds	server	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	status_counter	Number of rwlock spin loop rounds due to shared latch request
 innodb_rwlock_x_spin_rounds	server	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	status_counter	Number of rwlock spin loop rounds due to exclusive latch request
+innodb_rwlock_sx_spin_rounds	server	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	status_counter	Number of rwlock spin loop rounds due to sx latch request
 innodb_rwlock_s_os_waits	server	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	status_counter	Number of OS waits due to shared latch request
 innodb_rwlock_x_os_waits	server	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	status_counter	Number of OS waits due to exclusive latch request
+innodb_rwlock_sx_os_waits	server	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	status_counter	Number of OS waits due to sx latch request
 dml_reads	dml	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	status_counter	Number of rows read
 dml_inserts	dml	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	status_counter	Number of rows inserted
 dml_deletes	dml	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	status_counter	Number of rows deleted
@@ -274,6 +289,8 @@ ddl_background_drop_indexes	ddl	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL
 ddl_background_drop_tables	ddl	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	counter	Number of tables in background drop table list
 ddl_online_create_index	ddl	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	counter	Number of indexes being created online
 ddl_pending_alter_table	ddl	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	counter	Number of ALTER TABLE, CREATE INDEX, DROP INDEX in progress
+ddl_sort_file_alter_table	ddl	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	counter	Number of sort files created during alter table
+ddl_log_file_alter_table	ddl	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	counter	Number of log files created during alter table
 icp_attempts	icp	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	counter	Number of attempts for index push-down condition checks
 icp_no_match	icp	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	counter	Index push-down condition does not match
 icp_out_of_range	icp	0	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	disabled	counter	Index push-down condition out of range
@@ -336,7 +353,7 @@ Warnings:
 Warning	1012	InnoDB: SELECTing from INFORMATION_SCHEMA.innodb_buffer_page_lru but the InnoDB storage engine is not installed
 select * from information_schema.innodb_buffer_stats;
 select * from information_schema.innodb_sys_tables;
-TABLE_ID	NAME	FLAG	N_COLS	SPACE	FILE_FORMAT	ROW_FORMAT	ZIP_PAGE_SIZE
+TABLE_ID	NAME	FLAG	N_COLS	SPACE	FILE_FORMAT	ROW_FORMAT	ZIP_PAGE_SIZE	SPACE_TYPE
 Warnings:
 Warning	1012	InnoDB: SELECTing from INFORMATION_SCHEMA.innodb_sys_tables but the InnoDB storage engine is not installed
 select * from information_schema.innodb_sys_tablestats;
@@ -344,7 +361,7 @@ TABLE_ID	NAME	STATS_INITIALIZED	NUM_ROWS	CLUST_INDEX_SIZE	OTHER_INDEX_SIZE	MODIF
 Warnings:
 Warning	1012	InnoDB: SELECTing from INFORMATION_SCHEMA.innodb_sys_tablestats but the InnoDB storage engine is not installed
 select * from information_schema.innodb_sys_indexes;
-INDEX_ID	NAME	TABLE_ID	TYPE	N_FIELDS	PAGE_NO	SPACE
+INDEX_ID	NAME	TABLE_ID	TYPE	N_FIELDS	PAGE_NO	SPACE	MERGE_THRESHOLD
 Warnings:
 Warning	1012	InnoDB: SELECTing from INFORMATION_SCHEMA.innodb_sys_indexes but the InnoDB storage engine is not installed
 select * from information_schema.innodb_sys_columns;
@@ -364,7 +381,7 @@ ID	FOR_COL_NAME	REF_COL_NAME	POS
 Warnings:
 Warning	1012	InnoDB: SELECTing from INFORMATION_SCHEMA.innodb_sys_foreign_cols but the InnoDB storage engine is not installed
 select * from information_schema.innodb_sys_tablespaces;
-SPACE	NAME	FLAG	FILE_FORMAT	ROW_FORMAT	PAGE_SIZE	ZIP_PAGE_SIZE
+SPACE	NAME	FLAG	FILE_FORMAT	ROW_FORMAT	PAGE_SIZE	ZIP_PAGE_SIZE	SPACE_TYPE	FS_BLOCK_SIZE	FILE_SIZE	ALLOCATED_SIZE
 Warnings:
 Warning	1012	InnoDB: SELECTing from INFORMATION_SCHEMA.innodb_sys_tablespaces but the InnoDB storage engine is not installed
 select * from information_schema.innodb_sys_datafiles;
@@ -372,9 +389,6 @@ SPACE	PATH
 Warnings:
 Warning	1012	InnoDB: SELECTing from INFORMATION_SCHEMA.innodb_sys_datafiles but the InnoDB storage engine is not installed
 select * from information_schema.innodb_changed_pages;
-space_id	page_id	start_lsn	end_lsn
-Warnings:
-Warning	1012	InnoDB: SELECTing from INFORMATION_SCHEMA.innodb_changed_pages but the InnoDB storage engine is not installed
 select * from information_schema.innodb_tablespaces_encryption;
 SPACE	NAME	ENCRYPTION_SCHEME	KEYSERVER_REQUESTS	MIN_KEY_VERSION	CURRENT_KEY_VERSION	KEY_ROTATION_PAGE_NUMBER	KEY_ROTATION_MAX_PAGE_NUMBER	CURRENT_KEY_ID
 Warnings:
diff --git a/mysql-test/suite/innodb/r/innodb_stats_create_on_corrupted.result b/mysql-test/suite/innodb/r/innodb_stats_create_on_corrupted.result
index c351b222496..b9429046b36 100644
--- a/mysql-test/suite/innodb/r/innodb_stats_create_on_corrupted.result
+++ b/mysql-test/suite/innodb/r/innodb_stats_create_on_corrupted.result
@@ -1,5 +1,5 @@
-call mtr.add_suppression("InnoDB: Error: Table \"mysql\".\"innodb_index_stats\" not found");
-call mtr.add_suppression("InnoDB: Error: Fetch of persistent statistics requested for table");
+call mtr.add_suppression("InnoDB: Table .*innodb_index_stats.* not found");
+call mtr.add_suppression("InnoDB: Fetch of persistent statistics requested for table .*");
 ALTER TABLE mysql.innodb_index_stats RENAME TO mysql.innodb_index_stats_;
 CREATE TABLE test_ps_create_on_corrupted
 (a INT, PRIMARY KEY (a))
diff --git a/mysql-test/suite/innodb/r/innodb_stats_fetch_corrupted.result b/mysql-test/suite/innodb/r/innodb_stats_fetch_corrupted.result
index f6cce754527..0f0e941b838 100644
--- a/mysql-test/suite/innodb/r/innodb_stats_fetch_corrupted.result
+++ b/mysql-test/suite/innodb/r/innodb_stats_fetch_corrupted.result
@@ -1,5 +1,5 @@
-call mtr.add_suppression("InnoDB: Error: Table \"mysql\".\"innodb_index_stats\" not found");
-call mtr.add_suppression("InnoDB: Error: Fetch of persistent statistics requested for table");
+call mtr.add_suppression("InnoDB: Table `mysql`.`innodb_index_stats` not found");
+call mtr.add_suppression("InnoDB: Fetch of persistent statistics requested for table.*");
 CREATE TABLE test_ps_fetch_corrupted
 (a INT, PRIMARY KEY (a))
 ENGINE=INNODB STATS_PERSISTENT=1;
diff --git a/mysql-test/suite/innodb/r/strict_mode.result b/mysql-test/suite/innodb/r/strict_mode.result
index d6a621212c3..2f120afbc09 100644
--- a/mysql-test/suite/innodb/r/strict_mode.result
+++ b/mysql-test/suite/innodb/r/strict_mode.result
@@ -2,6 +2,7 @@
 # Bug #17852083 PRINT A WARNING WHEN DDL HAS AN ERROR IN
 # INNODB_STRICT_MODE = 1
 #
+call mtr.add_suppression("InnoDB: Cannot add field .* in table .* because after adding it, the row size is .* which is greater than maximum allowed size (.*) for a record on index leaf page.");
 set innodb_strict_mode = 0;
 create table t1 (id int auto_increment primary key,
 v varchar(32),
@@ -235,7 +236,7 @@ col227 text,
 col228 text
 ) ENGINE=InnoDB;
 Warnings:
-Warning	139	Row size too large (> 8126). Changing some columns to TEXT or BLOB or using ROW_FORMAT=DYNAMIC or ROW_FORMAT=COMPRESSED may help. In current row format, BLOB prefix of 768 bytes is stored inline.
+Warning	139	Row size too large (> 8126). Changing some columns to TEXT or BLOB may help. In current row format, BLOB prefix of 0 bytes is stored inline.
 set innodb_strict_mode = 1;
 alter table t1 engine=InnoDB;
 ERROR 42000: Row size too large. The maximum row size for the used table type, not counting BLOBs, is 8126. This includes storage overhead, check the manual. You have to change some columns to TEXT or BLOBs
diff --git a/mysql-test/suite/innodb/t/auto_increment_dup.opt b/mysql-test/suite/innodb/t/auto_increment_dup.opt
new file mode 100644
index 00000000000..ab372c3a6fc
--- /dev/null
+++ b/mysql-test/suite/innodb/t/auto_increment_dup.opt
@@ -0,0 +1,2 @@
+--innodb_autoinc_lock_mode=1
+--innodb_lock_wait_timeout=2
diff --git a/mysql-test/suite/innodb/t/auto_increment_dup.test b/mysql-test/suite/innodb/t/auto_increment_dup.test
index 798a4ecbff7..45e4559a038 100644
--- a/mysql-test/suite/innodb/t/auto_increment_dup.test
+++ b/mysql-test/suite/innodb/t/auto_increment_dup.test
@@ -10,6 +10,125 @@
 drop table if exists t1;
 --enable_warnings
 
+set global transaction isolation level repeatable read;
+
+CREATE TABLE t1(
+       id INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
+       k INT,
+       c CHAR(1),
+       UNIQUE KEY(k)) ENGINE=InnoDB;
+
+SHOW CREATE TABLE t1;
+
+--enable_info
+
+--echo #
+--echo # Sequential execution
+--echo #
+
+INSERT INTO t1(k) VALUES (1), (2), (3) ON DUPLICATE KEY UPDATE c='1';
+
+--echo #
+--echo # 1 duplicate
+--echo #
+INSERT INTO t1(k) VALUES (2), (4), (5) ON DUPLICATE KEY UPDATE c='2';
+--echo #
+--echo # 5 rows, consecutive auto_inc values
+--echo #
+
+SELECT * FROM t1 order by k;
+
+DROP TABLE t1;
+
+CREATE TABLE t1(
+       id INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
+       k INT,
+       c CHAR(1),
+       UNIQUE KEY(k)) ENGINE=InnoDB;
+
+--echo #
+--echo # Sequential execution 2
+--echo #
+
+INSERT INTO t1(k) VALUES (2), (4), (5) ON DUPLICATE KEY UPDATE c='2';
+
+--echo #
+--echo # 1 duplicate
+--echo #
+INSERT INTO t1(k) VALUES (1), (2), (3) ON DUPLICATE KEY UPDATE c='1';
+--echo #
+--echo # 5 rows, consecutive auto_inc values
+--echo #
+
+SELECT * FROM t1 order by k;
+
+DROP TABLE t1;
+
+CREATE TABLE t1(
+       id INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
+       k INT,
+       c CHAR(1),
+       UNIQUE KEY(k)) ENGINE=InnoDB;
+       
+--echo #
+--echo # Parallel execution
+--echo #
+
+--connect(con1, localhost, root)
+--connect(con2, localhost, root)
+
+--send SET DEBUG_SYNC='now WAIT_FOR write_row_done'
+--connection con1
+--echo #
+--echo # Connection 1
+--echo #
+SET DEBUG_SYNC='ha_write_row_end SIGNAL write_row_done WAIT_FOR continue';
+--send INSERT INTO t1(k) VALUES (1), (2), (3) ON DUPLICATE KEY UPDATE c='1'
+--connection con2
+--echo #
+--echo # Connection 2
+--echo #
+--reap
+SET DEBUG_SYNC='execute_command_after_close_tables SIGNAL continue';
+--error ER_LOCK_WAIT_TIMEOUT
+INSERT INTO t1(k) VALUES (2), (4), (5) ON DUPLICATE KEY UPDATE c='2';
+
+--connection con1
+--echo #
+--echo # 2 duplicates
+--echo #
+--reap
+--connection default
+--echo #
+--echo # 3 rows
+--echo #
+
+SELECT * FROM t1 order by k;
+
+INSERT INTO t1(k) VALUES (2), (4), (5) ON DUPLICATE KEY UPDATE c='2';
+
+SELECT * FROM t1 order by k;
+
+
+--disable_info
+
+--disconnect con1
+--disconnect con2
+
+--connection default
+
+DROP TABLE t1;
+
+--echo #
+--echo # Parallel test with read_committed
+--echo #
+
+set global transaction isolation level read committed;
+
+--disable_warnings
+drop table if exists t1;
+--enable_warnings
+
 CREATE TABLE t1(
        id INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
        k INT,
@@ -23,10 +142,16 @@ CREATE TABLE t1(
 
 --connection con1
 
+--echo #
+--echo # Connection 1
+--echo #
 SET DEBUG_SYNC='ha_write_row_end SIGNAL continue2 WAIT_FOR continue1';
 --send INSERT INTO t1(k) VALUES (1), (2), (3) ON DUPLICATE KEY UPDATE c='1'
 
 --connection con2
+--echo #
+--echo # Connection 2
+--echo #
 SET DEBUG_SYNC='ha_write_row_start WAIT_FOR continue2';
 SET DEBUG_SYNC='after_mysql_insert SIGNAL continue1';
 INSERT INTO t1(k) VALUES (2), (4), (5) ON DUPLICATE KEY UPDATE c='2';
@@ -35,6 +160,10 @@ INSERT INTO t1(k) VALUES (2), (4), (5) ON DUPLICATE KEY UPDATE c='2';
 --reap
 --disable_info
 SET DEBUG_SYNC='RESET';
+
+--echo #
+--echo # 5 rows, gap in autoinc values
+--echo #
 SELECT * FROM t1 ORDER BY k;
 
 --disconnect con1
@@ -43,3 +172,6 @@ SELECT * FROM t1 ORDER BY k;
 --connection default
 
 DROP TABLE t1;
+
+set global transaction isolation level repeatable read;
+
diff --git a/mysql-test/suite/innodb/t/create_isl_with_direct.test b/mysql-test/suite/innodb/t/create_isl_with_direct.test
index 34ea9c77703..83c7507e025 100644
--- a/mysql-test/suite/innodb/t/create_isl_with_direct.test
+++ b/mysql-test/suite/innodb/t/create_isl_with_direct.test
@@ -3,7 +3,7 @@
 --source include/not_windows.inc
 
 --disable_query_log
-CALL mtr.add_suppression("\\[Warning\\] InnoDB: Failed to set O_DIRECT on file ./ibdata1: OPEN: Invalid argument, continuing anyway. O_DIRECT is known to result in 'Invalid argument' on Linux on tmpfs, see MySQL Bug#26662.");
+CALL mtr.add_suppression(".*Failed to set O_DIRECT on file.*");
 
 # The below mtr suppression to avoid failure in solaris platform.
 CALL mtr.add_suppression("\\[ERROR\\] InnoDB: Failed to set DIRECTIO_ON on file.*");
diff --git a/mysql-test/suite/innodb/t/innodb-16k.test b/mysql-test/suite/innodb/t/innodb-16k.test
index 3cd90a00d55..ad09666442d 100644
--- a/mysql-test/suite/innodb/t/innodb-16k.test
+++ b/mysql-test/suite/innodb/t/innodb-16k.test
@@ -3,16 +3,20 @@
 --source include/have_innodb.inc
 --source include/have_innodb_16k.inc
 
+call mtr.add_suppression("InnoDB: Cannot add field .* in table .* because after adding it, the row size is .* which is greater than maximum allowed size (.*) for a record on index leaf page.");
+
 --disable_query_log
 let $MYSQLD_DATADIR= `select @@datadir`;
 # These values can change during the test
 let $innodb_file_format_orig = `SELECT @@innodb_file_format`;
 let $innodb_file_per_table_orig = `SELECT @@innodb_file_per_table`;
 let $innodb_strict_mode_orig = `SELECT @@session.innodb_strict_mode`;
+let $innodb_large_prefix_orig = `SELECT @@innodb_large_prefix`;
 --enable_query_log
 
 SET GLOBAL innodb_file_format = `Barracuda`;
 SET GLOBAL innodb_file_per_table = ON;
+SET GLOBAL innodb_large_prefix = OFF;
 
 
 --echo # Test 1) Show the page size from Information Schema
@@ -374,16 +378,6 @@ CREATE INDEX t1t ON t1 (t(767));
 --error 1713
 UPDATE t1 SET t=@e;
 
-# The function dict_index_too_big_for_undo() prevents us from adding
-# one more index.  But it is too late.  The record is already too big.
-
---error ER_TOO_BIG_ROWSIZE
-CREATE INDEX t1u ON t1 (u(767));
-
---error ER_TOO_BIG_ROWSIZE
-CREATE INDEX t1ut ON t1 (u(767), t(767));
-CREATE INDEX t1st ON t1 (s(767), t(767));
-
 SHOW CREATE TABLE t1;
 DROP TABLE t1;
 
@@ -520,11 +514,7 @@ UPDATE bug12547647 SET c = REPEAT('b',16928);
 SHOW WARNINGS;
 DROP TABLE bug12547647;
 
-
-
-# The following should fail in non-strict mode too.
-
-SET SESSION innodb_strict_mode = off;
+SET SESSION innodb_strict_mode = on;
 --error ER_TOO_BIG_ROWSIZE
 CREATE TABLE t1(
 	c text NOT NULL, d text NOT NULL,
@@ -974,4 +964,5 @@ row_format=compact,ENGINE=INNODB;
 EVAL SET GLOBAL innodb_file_format = $innodb_file_format_orig;
 EVAL SET GLOBAL innodb_file_per_table = $innodb_file_per_table_orig;
 EVAL SET SESSION innodb_strict_mode = $innodb_strict_mode_orig;
+EVAL SET GLOBAL innodb_large_prefix = $innodb_large_prefix_orig;
 --enable_query_log
diff --git a/mysql-test/suite/innodb/t/innodb-alter-discard.test b/mysql-test/suite/innodb/t/innodb-alter-discard.test
index 80678cef0a6..19ba7c3a3ca 100644
--- a/mysql-test/suite/innodb/t/innodb-alter-discard.test
+++ b/mysql-test/suite/innodb/t/innodb-alter-discard.test
@@ -3,6 +3,8 @@
 --source include/not_embedded.inc
 --source include/have_innodb.inc
 
+call mtr.add_suppression("InnoDB: Cannot open datafile for read-only: .*");
+
 let $MYSQLD_DATADIR=`select @@datadir`;
 SET GLOBAL innodb_file_per_table=1;
 CREATE TABLE t(a INT)ENGINE=InnoDB;
@@ -23,6 +25,12 @@ EOF
 -- enable_reconnect
 -- source include/wait_until_connected_again.inc
 
+call mtr.add_suppression("InnoDB: Operating system error number .* in a file operation.");
+call mtr.add_suppression("InnoDB: The error means the system cannot find the path specified.");
+call mtr.add_suppression("InnoDB: If you are installing InnoDB, remember that you must create directories yourself, InnoDB does not create them.");
+call mtr.add_suppression("InnoDB: Cannot open datafile for read-only: './test/t.ibd' OS error: .*");
+call mtr.add_suppression("InnoDB: Ignoring tablespace `test/t` because it could not be opened.");
+call mtr.add_suppression("InnoDB: Cannot calculate statistics for table .* because the .ibd file is missing. Please refer to .*");
 call mtr.add_suppression("InnoDB: Error: trying to open a table, but could not$");
 call mtr.add_suppression("MySQL is trying to open a table handle but the \.ibd file for$");
 call mtr.add_suppression("InnoDB: Table 'test/t'$");
diff --git a/mysql-test/suite/innodb/t/innodb-blob.test b/mysql-test/suite/innodb/t/innodb-blob.test
index 8aa113fc3ca..c1f9ee5992f 100644
--- a/mysql-test/suite/innodb/t/innodb-blob.test
+++ b/mysql-test/suite/innodb/t/innodb-blob.test
@@ -3,6 +3,7 @@
 # columns are stored off-page.
 
 --source include/have_innodb.inc
+# The 7000 in this test is a bit less than half the innodb_page_size.
 --source include/have_innodb_16k.inc
 
 # DEBUG_SYNC must be compiled in.
@@ -15,6 +16,8 @@
 # Avoid CrashReporter popup on Mac
 --source include/not_crashrep.inc
 
+call mtr.add_suppression("InnoDB: The log sequence numbers [0-9]+ and [0-9]+ in ibdata files do not match the log sequence number [0-9]+ in the ib_logfiles!");
+
 CREATE TABLE t1 (a INT PRIMARY KEY, b TEXT) ENGINE=InnoDB;
 CREATE TABLE t2 (a INT PRIMARY KEY) ENGINE=InnoDB;
 CREATE TABLE t3 (a INT PRIMARY KEY, b TEXT, c TEXT) ENGINE=InnoDB;
@@ -41,7 +44,7 @@ connect (con2,localhost,root,,);
 # Check that the above SELECT is blocked
 let $wait_condition=
   select count(*) = 1 from information_schema.processlist
-  where state = 'Sending data' and
+  where state in ('Sending data', 'Opening tables') and
         info = 'SELECT a, RIGHT(b,20) FROM t1';
 --source include/wait_condition.inc
 
@@ -51,7 +54,7 @@ connection con1;
 reap;
 connection default;
 reap;
-SET DEBUG_DBUG='+d,row_ins_extern_checkpoint';
+SET DEBUG='+d,row_ins_extern_checkpoint';
 SET DEBUG_SYNC='before_row_ins_extern_latch SIGNAL rec_not_blob WAIT_FOR crash';
 ROLLBACK;
 BEGIN;
@@ -68,7 +71,8 @@ SELECT a, RIGHT(b,20) FROM t1;
 SELECT a FROM t1;
 
 # Request a crash, and restart the server.
-SET DEBUG_DBUG='+d,crash_commit_before';
+SET DEBUG='+d,crash_commit_before';
+# Write file to make mysql-test-run.pl restart the server
 --exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
 --error 2013
 INSERT INTO t2 VALUES (42);
@@ -80,7 +84,6 @@ connection default;
 --error 2013
 reap;
 
-# Write file to make mysql-test-run.pl restart the server
 --enable_reconnect
 --source include/wait_until_connected_again.inc
 --disable_reconnect
@@ -90,41 +93,34 @@ CHECK TABLE t1;
 INSERT INTO t3 VALUES
        (1,REPEAT('d',7000),REPEAT('e',100)),
        (2,REPEAT('g',7000),REPEAT('h',100));
-SET DEBUG_SYNC='before_row_upd_extern SIGNAL have_latch WAIT_FOR go';
+SET DEBUG_SYNC='blob_write_middle SIGNAL go_sel WAIT_FOR go_upd';
 # This should move column b off-page.
 --send
 UPDATE t3 SET c=REPEAT('f',3000) WHERE a=1;
 
+--echo # Connection con1:
 connect (con1,localhost,root,,);
-SET DEBUG_SYNC='now WAIT_FOR have_latch';
+SET DEBUG_SYNC='now WAIT_FOR go_sel';
 SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
 SELECT @@tx_isolation;
+SELECT a, RIGHT(b,20), RIGHT(c,20) FROM t3;
+set debug_sync='now SIGNAL go_upd';
 
-# this one should block
--- send
+--echo # Connection default:
+connection default;
+--echo # reap UPDATE t3 SET c=REPEAT('f',3000) WHERE a=1;
+reap;
+
+--echo # Connection con1:
+connection con1;
 SELECT a, RIGHT(b,20), RIGHT(c,20) FROM t3;
 
-connect (con2,localhost,root,,);
-
-# Check that the above SELECT is blocked
-let $wait_condition=
-  select count(*) = 1 from information_schema.processlist
-  where state = 'Sending data' and
-        info = 'SELECT a, RIGHT(b,20), RIGHT(c,20) FROM t3';
---source include/wait_condition.inc
-
-SET DEBUG_SYNC='now SIGNAL go';
-
-connection con1;
-reap;
 disconnect con1;
 
 connection default;
-reap;
-
 CHECK TABLE t1,t2,t3;
 
-connection con2;
+connect (con2,localhost,root,,);
 BEGIN;
 INSERT INTO t2 VALUES (347);
 connection default;
@@ -134,7 +130,7 @@ connection default;
 # remain open while we are writing the off-page columns and are
 # stuck in the DEBUG_SYNC. A checkpoint involves a flush, which
 # would wait for the buffer-fix to cease.
-SET DEBUG_DBUG='+d,row_upd_extern_checkpoint';
+SET DEBUG='+d,row_upd_extern_checkpoint';
 SET DEBUG_SYNC='before_row_upd_extern SIGNAL have_latch WAIT_FOR crash';
 # This should move column b off-page.
 --send
@@ -148,7 +144,8 @@ SELECT info FROM information_schema.processlist
 WHERE state = 'debug sync point: before_row_upd_extern';
 
 # Request a crash, and restart the server.
-SET DEBUG_DBUG='+d,crash_commit_before';
+SET DEBUG='+d,crash_commit_before';
+# Write file to make mysql-test-run.pl restart the server
 --exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
 --error 2013
 COMMIT;
@@ -159,7 +156,6 @@ connection default;
 --error 2013
 reap;
 
-# Write file to make mysql-test-run.pl restart the server
 --enable_reconnect
 --source include/wait_until_connected_again.inc
 --disable_reconnect
@@ -178,7 +174,7 @@ connection default;
 # remain open while we are writing the off-page columns and are
 # stuck in the DEBUG_SYNC. A checkpoint involves a flush, which
 # would wait for the buffer-fix to cease.
-SET DEBUG_DBUG='+d,row_upd_extern_checkpoint';
+SET DEBUG='+d,row_upd_extern_checkpoint';
 SET DEBUG_SYNC='after_row_upd_extern SIGNAL have_latch WAIT_FOR crash';
 # This should move column b off-page.
 --send
@@ -192,7 +188,8 @@ SELECT info FROM information_schema.processlist
 WHERE state = 'debug sync point: after_row_upd_extern';
 
 # Request a crash, and restart the server.
-SET DEBUG_DBUG='+d,crash_commit_before';
+SET DEBUG='+d,crash_commit_before';
+# Write file to make mysql-test-run.pl restart the server
 --exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
 --error 2013
 COMMIT;
@@ -203,7 +200,6 @@ connection default;
 --error 2013
 reap;
 
-# Write file to make mysql-test-run.pl restart the server
 --enable_reconnect
 --source include/wait_until_connected_again.inc
 --disable_reconnect
diff --git a/mysql-test/suite/innodb/t/innodb-change-buffer-recovery-master.opt b/mysql-test/suite/innodb/t/innodb-change-buffer-recovery-master.opt
index 33e2b863684..97b259ee047 100644
--- a/mysql-test/suite/innodb/t/innodb-change-buffer-recovery-master.opt
+++ b/mysql-test/suite/innodb/t/innodb-change-buffer-recovery-master.opt
@@ -1 +1,2 @@
 --log-error=$MYSQLTEST_VARDIR/tmp/my_restart.err
+--innodb_buffer_pool_size=24M
diff --git a/mysql-test/suite/innodb/t/innodb-change-buffer-recovery.test b/mysql-test/suite/innodb/t/innodb-change-buffer-recovery.test
index 79f7999d115..cbf2d0c9805 100644
--- a/mysql-test/suite/innodb/t/innodb-change-buffer-recovery.test
+++ b/mysql-test/suite/innodb/t/innodb-change-buffer-recovery.test
@@ -1,8 +1,3 @@
-if (`select plugin_auth_version < "5.6.17" from information_schema.plugins where plugin_name='innodb'`)
-{
-  --skip Not fixed in InnoDB before 5.6.17
-}
-
 --echo #
 --echo # Bug#69122 - INNODB DOESN'T REDO-LOG INSERT BUFFER MERGE
 --echo #             OPERATION IF IT IS DONE IN-PLACE
@@ -14,8 +9,13 @@ if (`select plugin_auth_version < "5.6.17" from information_schema.plugins where
 --source include/not_embedded.inc
 # DBUG_SUICIDE() hangs under valgrind
 --source include/not_valgrind.inc
-# No windows, need perl
---source include/not_windows.inc
+
+CREATE TABLE t1(
+	a INT AUTO_INCREMENT PRIMARY KEY,
+	b CHAR(1),
+	c INT,
+	INDEX(b))
+ENGINE=InnoDB STATS_PERSISTENT=0;
 
 # The flag innodb_change_buffering_debug is only available in debug builds.
 # It instructs InnoDB to try to evict pages from the buffer pool when
@@ -24,13 +24,6 @@ if (`select plugin_auth_version < "5.6.17" from information_schema.plugins where
 SET GLOBAL innodb_change_buffering_debug = 1;
 let SEARCH_FILE = $MYSQLTEST_VARDIR/tmp/my_restart.err;
 
-CREATE TABLE t1(
-	a INT AUTO_INCREMENT PRIMARY KEY,
-	b CHAR(1),
-	c INT,
-	INDEX(b))
-ENGINE=InnoDB;
-
 # Create enough rows for the table, so that the change buffer will be
 # used for modifying the secondary index page. There must be multiple
 # index pages, because changes to the root page are never buffered.
@@ -48,7 +41,6 @@ INSERT INTO t1 SELECT 0,b,c FROM t1;
 INSERT INTO t1 SELECT 0,b,c FROM t1;
 INSERT INTO t1 SELECT 0,b,c FROM t1;
 INSERT INTO t1 SELECT 0,b,c FROM t1;
-INSERT INTO t1 SELECT 0,b,c FROM t1;
 
 BEGIN;
 SELECT b FROM t1 LIMIT 3;
@@ -60,7 +52,7 @@ DELETE FROM t1 WHERE a=1;
 # This should be buffered, if innodb_change_buffering_debug = 1 is in effect.
 INSERT INTO t1 VALUES(1,'X',1);
 
-SET DEBUG_DBUG='+d,crash_after_log_ibuf_upd_inplace';
+SET DEBUG='+d,crash_after_log_ibuf_upd_inplace';
 --exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
 --error 2013
 # This should force a change buffer merge
diff --git a/mysql-test/suite/innodb/t/innodb-mdev-7408.test b/mysql-test/suite/innodb/t/innodb-mdev-7408.test
index d1cd1879bb1..46f1afca27b 100644
--- a/mysql-test/suite/innodb/t/innodb-mdev-7408.test
+++ b/mysql-test/suite/innodb/t/innodb-mdev-7408.test
@@ -1,5 +1,7 @@
 --source include/have_innodb.inc
 
+call mtr.add_suppression("InnoDB: User stopword table .* does not exist.");
+
 select @@global.innodb_ft_server_stopword_table;
 CREATE TABLE `stop_it-IT` ENGINE = InnoDB SELECT * FROM information_schema.INNODB_FT_DEFAULT_STOPWORD;
 --error 1231
diff --git a/mysql-test/suite/innodb/t/innodb-mdev-7513-master.opt b/mysql-test/suite/innodb/t/innodb-mdev-7513-master.opt
new file mode 100644
index 00000000000..a2a7d5f6adf
--- /dev/null
+++ b/mysql-test/suite/innodb/t/innodb-mdev-7513-master.opt
@@ -0,0 +1 @@
+--innodb-strict-mode=0
diff --git a/mysql-test/suite/innodb/t/innodb-mdev-7513.test b/mysql-test/suite/innodb/t/innodb-mdev-7513.test
index b929ea14781..88f941ef70d 100644
--- a/mysql-test/suite/innodb/t/innodb-mdev-7513.test
+++ b/mysql-test/suite/innodb/t/innodb-mdev-7513.test
@@ -3,6 +3,7 @@
 
 # MDEV-7513: ib_warn_row_too_big dereferences null thd
 
+call mtr.add_suppression("InnoDB: Cannot add field `.* in table .* because after adding it, the row size is .* which is greater than maximum allowed size (.*) for a record on index leaf page.");
 call mtr.add_suppression("Row size too large (> 8126)*");
 
 --disable_warnings
diff --git a/mysql-test/suite/innodb/t/innodb-mdev7046.test b/mysql-test/suite/innodb/t/innodb-mdev7046.test
index b4085228e02..208dcd52f35 100644
--- a/mysql-test/suite/innodb/t/innodb-mdev7046.test
+++ b/mysql-test/suite/innodb/t/innodb-mdev7046.test
@@ -12,6 +12,7 @@
 call mtr.add_suppression("InnoDB: File ./test/t1*");
 call mtr.add_suppression("InnoDB: Error number*");
 call mtr.add_suppression("InnoDB: File ./test/t1#p#p1#sp#p1sp0.ibd: 'rename' returned OS error*");
+call mtr.add_suppression("InnoDB: Operating system error number .* in a file operation.");
 
 # MDEV-7046: MySQL#74480 - Failing assertion: os_file_status(newpath, &exists, &type) 
 # after Operating system error number 36 in a file operation
diff --git a/mysql-test/suite/innodb/t/innodb-virtual-columns.test b/mysql-test/suite/innodb/t/innodb-virtual-columns.test
index 368c6fc8cb1..0e0d6dbb2f5 100644
--- a/mysql-test/suite/innodb/t/innodb-virtual-columns.test
+++ b/mysql-test/suite/innodb/t/innodb-virtual-columns.test
@@ -33,6 +33,8 @@ CREATE TABLE IF NOT EXISTS grad_degree (
   CONSTRAINT grad_degree_stu_plan_admit_pky PRIMARY KEY (student_id, plan, admit_term)
 ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
 
+SHOW CREATE TABLE grad_degree;
+
 CREATE INDEX grad_degree_wdraw_rsn_ndx ON grad_degree (wdraw_rsn);
 CREATE INDEX grad_degree_as_of_term_ndx ON grad_degree (deg_as_of_term);
 
@@ -137,6 +139,8 @@ CREATE TABLE IF NOT EXISTS grad_degree (
   CONSTRAINT grad_degree_stu_plan_admit_pky PRIMARY KEY (student_id, plan, admit_term)
 ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
 
+SHOW CREATE TABLE grad_degree;
+
 CREATE INDEX grad_degree_wdraw_rsn_ndx ON grad_degree (wdraw_rsn);
 CREATE INDEX grad_degree_as_of_term_ndx ON grad_degree (deg_as_of_term);
 
@@ -251,6 +255,8 @@ CREATE TABLE IF NOT EXISTS grad_degree (
   CONSTRAINT grad_degree_stu_plan_admit_pky PRIMARY KEY (student_id, plan, admit_term)
 ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
 
+SHOW CREATE TABLE grad_degree;
+
 CREATE INDEX grad_degree_wdraw_rsn_ndx ON grad_degree (wdraw_rsn);
 
 ALTER TABLE grad_degree DROP COLUMN ofis_deg_status2, DROP COLUMN ofis_deg_status3,
diff --git a/mysql-test/suite/innodb/t/innodb-wl5522-1.test b/mysql-test/suite/innodb/t/innodb-wl5522-1.test
index b1db34976a9..6c2607effe8 100644
--- a/mysql-test/suite/innodb/t/innodb-wl5522-1.test
+++ b/mysql-test/suite/innodb/t/innodb-wl5522-1.test
@@ -3,6 +3,8 @@
 
 -- source include/have_innodb.inc
 
+call mtr.add_suppression("InnoDB: Unable to import tablespace .* because it already exists.  Please DISCARD the tablespace before IMPORT.");
+
 --disable_warnings
 DROP TABLE IF EXISTS t1;
 --enable_warnings
diff --git a/mysql-test/suite/innodb/t/innodb-wl5522-debug-zip.test b/mysql-test/suite/innodb/t/innodb-wl5522-debug-zip.test
index 4b03ac008d2..982149f356c 100644
--- a/mysql-test/suite/innodb/t/innodb-wl5522-debug-zip.test
+++ b/mysql-test/suite/innodb/t/innodb-wl5522-debug-zip.test
@@ -17,6 +17,12 @@
 # allow test to run only when innodb-page-size=16  
 --source include/have_innodb_16k.inc
 
+call mtr.add_suppression("InnoDB: Tablespace for table .* is set as discarded.");
+call mtr.add_suppression("InnoDB: Cannot calculate statistics for table .* because the .ibd file is missing. Please refer to .* for how to resolve the issue.");
+call mtr.add_suppression("InnoDB: Error: Tablespace flags .* corrupted unused .*");
+call mtr.add_suppression("InnoDB: Tablespace flags: .* corrupted  in file: .* ");
+call mtr.add_suppression("InnoDB: Page 0 at offset 0 looks corrupted in file .*");
+flush tables;
 
 let MYSQLD_DATADIR =`SELECT @@datadir`;
 let $innodb_file_per_table = `SELECT @@innodb_file_per_table`;
@@ -524,8 +530,8 @@ SET SESSION debug_dbug="+d,buf_page_is_corrupt_failure";
 
 --replace_regex /'.*t1.cfg'/'t1.cfg'/
 
-# Following alter is not failing 
-#--error ER_INTERNAL_ERROR
+# Following alter is failing 
+--error ER_INTERNAL_ERROR
 ALTER TABLE test_wl5522.t1 IMPORT TABLESPACE;
 
 SET SESSION debug_dbug="-d,buf_page_is_corrupt_failure";
diff --git a/mysql-test/suite/innodb/t/innodb-wl5522-debug.test b/mysql-test/suite/innodb/t/innodb-wl5522-debug.test
index 9c0c11fadab..05c4c04f2d3 100644
--- a/mysql-test/suite/innodb/t/innodb-wl5522-debug.test
+++ b/mysql-test/suite/innodb/t/innodb-wl5522-debug.test
@@ -15,6 +15,16 @@
 
 -- source include/have_innodb.inc
 
+call mtr.add_suppression("InnoDB: Operating system error number .* in a file operation.");
+call mtr.add_suppression("InnoDB: The error means the system cannot find the path specified.");
+call mtr.add_suppression("InnoDB: If you are installing InnoDB, remember that you must create directories yourself, InnoDB does not create them.");
+call mtr.add_suppression("InnoDB: Cannot open datafile for read-only: .*");
+call mtr.add_suppression("InnoDB: Tablespace flags: .*");
+call mtr.add_suppression("InnoDB: Ignoring tablespace .* because it could not be opened.");
+call mtr.add_suppression("InnoDB: Tablespace for table .* is set as discarded.");
+call mtr.add_suppression("InnoDB: Cannot calculate statistics for table .*");
+call mtr.add_suppression("InnoDB: Page 0 at offset 0 looks corrupted in file .*");
+
 let MYSQLD_DATADIR =`SELECT @@datadir`;
 let $innodb_file_per_table = `SELECT @@innodb_file_per_table`;
 let $pathfix=/: '.*test_wl5522.*t1.ibd'/: 'test_wl5522\\t1.ibd'/;
diff --git a/mysql-test/suite/innodb/t/innodb-wl5522-zip.test b/mysql-test/suite/innodb/t/innodb-wl5522-zip.test
index d139e0b700d..395e4def85d 100644
--- a/mysql-test/suite/innodb/t/innodb-wl5522-zip.test
+++ b/mysql-test/suite/innodb/t/innodb-wl5522-zip.test
@@ -7,6 +7,8 @@
 # allow test to run only when innodb-page-size=16  
 --source include/have_innodb_16k.inc
 
+call mtr.add_suppression("InnoDB: Unable to import tablespace .* because it already exists.  Please DISCARD the tablespace before IMPORT.");
+
 --disable_warnings
 DROP TABLE IF EXISTS t1;
 --enable_warnings
diff --git a/mysql-test/suite/innodb/t/innodb-wl5522.test b/mysql-test/suite/innodb/t/innodb-wl5522.test
index c9e7748cb47..b04c726b74a 100644
--- a/mysql-test/suite/innodb/t/innodb-wl5522.test
+++ b/mysql-test/suite/innodb/t/innodb-wl5522.test
@@ -3,6 +3,8 @@
 
 -- source include/have_innodb.inc
 
+call mtr.add_suppression("InnoDB: Unable to import tablespace .* because it already exists.  Please DISCARD the tablespace before IMPORT.");
+
 --disable_warnings
 DROP TABLE IF EXISTS t1;
 --enable_warnings
diff --git a/mysql-test/suite/innodb/t/innodb.test b/mysql-test/suite/innodb/t/innodb.test
index 2e7306c8e29..e456d48b5c2 100644
--- a/mysql-test/suite/innodb/t/innodb.test
+++ b/mysql-test/suite/innodb/t/innodb.test
@@ -31,8 +31,15 @@ set optimizer_switch = 'mrr=on,mrr_sort_keys=on,index_condition_pushdown=on';
 # the test to be run multiple times without restarting the mysqld server.
 # See Bug#43309 Test main.innodb can't be run twice
 -- disable_query_log
+call mtr.add_suppression("Cannot add field .* in table .* because after adding it, the row size is");
+call mtr.add_suppression("\\[ERROR\\] InnoDB: in ALTER TABLE `test`.`t1`");
+call mtr.add_suppression("\\[ERROR\\] InnoDB: in RENAME TABLE table `test`.`t1`");
+
+SET @innodb_thread_sleep_delay_orig = @@innodb_thread_sleep_delay;
+
 SET @innodb_thread_concurrency_orig = @@innodb_thread_concurrency;
 
+--disable_warnings
 SET @innodb_rows_deleted_orig = (SELECT variable_value FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_rows_deleted');
 SET @innodb_rows_inserted_orig = (SELECT variable_value FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_rows_inserted');
 SET @innodb_rows_updated_orig = (SELECT variable_value FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_rows_updated');
@@ -41,6 +48,7 @@ SET @innodb_row_lock_current_waits_orig = (SELECT variable_value FROM informatio
 SET @innodb_row_lock_time_orig = (SELECT variable_value FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_row_lock_time');
 SET @innodb_row_lock_time_max_orig = (SELECT variable_value FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_row_lock_time_max');
 SET @innodb_row_lock_time_avg_orig = (SELECT variable_value FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_row_lock_time_avg');
+--enable_warnings
 -- enable_query_log
 
 --disable_warnings
@@ -677,6 +685,8 @@ drop table t1;
 # Test of multi-table-delete
 #
 
+SET sql_mode = 'NO_ENGINE_SUBSTITUTION';
+
 CREATE TABLE t1 (
   number bigint(20) NOT NULL default '0',
   cname char(15) NOT NULL default '',
@@ -720,6 +730,7 @@ select * from t1;
 select * from t2; 
 select * from t2;
 drop table t1,t2;
+SET sql_mode = default;
 
 #
 # A simple test with some isolation levels
@@ -1056,18 +1067,84 @@ UPDATE t1 SET c1 = 'other' WHERE c1 = 'old';
 DROP TABLE t2,t1;
 
 #
-# test for recursion depth limit
+# test for FK cascade depth limit
 #
+call mtr.add_suppression("Cannot delete/update rows with cascading foreign key constraints that exceed max depth of 255. Please drop excessive foreign constraints and try again");
+
 create table t1(
 	id int primary key,
 	pid int,
 	index(pid),
 	foreign key(pid) references t1(id) on delete cascade) engine=innodb;
-insert into t1 values(0,0),(1,0),(2,1),(3,2),(4,3),(5,4),(6,5),(7,6),
-	(8,7),(9,8),(10,9),(11,10),(12,11),(13,12),(14,13),(15,14);
--- error 1451
+insert into t1 values
+(  0,   0), (  1,   0), (  2,   1), (  3,   2),
+(  4,   3), (  5,   4), (  6,   5), (  7,   6),
+(  8,   7), (  9,   8), ( 10,   9), ( 11,  10),
+( 12,  11), ( 13,  12), ( 14,  13), ( 15,  14),
+( 16,  15), ( 17,  16), ( 18,  17), ( 19,  18),
+( 20,  19), ( 21,  20), ( 22,  21), ( 23,  22),
+( 24,  23), ( 25,  24), ( 26,  25), ( 27,  26),
+( 28,  27), ( 29,  28), ( 30,  29), ( 31,  30),
+( 32,  31), ( 33,  32), ( 34,  33), ( 35,  34),
+( 36,  35), ( 37,  36), ( 38,  37), ( 39,  38),
+( 40,  39), ( 41,  40), ( 42,  41), ( 43,  42),
+( 44,  43), ( 45,  44), ( 46,  45), ( 47,  46),
+( 48,  47), ( 49,  48), ( 50,  49), ( 51,  50),
+( 52,  51), ( 53,  52), ( 54,  53), ( 55,  54),
+( 56,  55), ( 57,  56), ( 58,  57), ( 59,  58),
+( 60,  59), ( 61,  60), ( 62,  61), ( 63,  62),
+( 64,  63), ( 65,  64), ( 66,  65), ( 67,  66),
+( 68,  67), ( 69,  68), ( 70,  69), ( 71,  70),
+( 72,  71), ( 73,  72), ( 74,  73), ( 75,  74),
+( 76,  75), ( 77,  76), ( 78,  77), ( 79,  78),
+( 80,  79), ( 81,  80), ( 82,  81), ( 83,  82),
+( 84,  83), ( 85,  84), ( 86,  85), ( 87,  86),
+( 88,  87), ( 89,  88), ( 90,  89), ( 91,  90),
+( 92,  91), ( 93,  92), ( 94,  93), ( 95,  94),
+( 96,  95), ( 97,  96), ( 98,  97), ( 99,  98),
+(100,  99), (101, 100), (102, 101), (103, 102),
+(104, 103), (105, 104), (106, 105), (107, 106),
+(108, 107), (109, 108), (110, 109), (111, 110),
+(112, 111), (113, 112), (114, 113), (115, 114),
+(116, 115), (117, 116), (118, 117), (119, 118),
+(120, 119), (121, 120), (122, 121), (123, 122),
+(124, 123), (125, 124), (126, 125), (127, 126),
+(128, 127), (129, 128), (130, 129), (131, 130),
+(132, 131), (133, 132), (134, 133), (135, 134),
+(136, 135), (137, 136), (138, 137), (139, 138),
+(140, 139), (141, 140), (142, 141), (143, 142),
+(144, 143), (145, 144), (146, 145), (147, 146),
+(148, 147), (149, 148), (150, 149), (151, 150),
+(152, 151), (153, 152), (154, 153), (155, 154),
+(156, 155), (157, 156), (158, 157), (159, 158),
+(160, 159), (161, 160), (162, 161), (163, 162),
+(164, 163), (165, 164), (166, 165), (167, 166),
+(168, 167), (169, 168), (170, 169), (171, 170),
+(172, 171), (173, 172), (174, 173), (175, 174),
+(176, 175), (177, 176), (178, 177), (179, 178),
+(180, 179), (181, 180), (182, 181), (183, 182),
+(184, 183), (185, 184), (186, 185), (187, 186),
+(188, 187), (189, 188), (190, 189), (191, 190),
+(192, 191), (193, 192), (194, 193), (195, 194),
+(196, 195), (197, 196), (198, 197), (199, 198),
+(200, 199), (201, 200), (202, 201), (203, 202),
+(204, 203), (205, 204), (206, 205), (207, 206),
+(208, 207), (209, 208), (210, 209), (211, 210),
+(212, 211), (213, 212), (214, 213), (215, 214),
+(216, 215), (217, 216), (218, 217), (219, 218),
+(220, 219), (221, 220), (222, 221), (223, 222),
+(224, 223), (225, 224), (226, 225), (227, 226),
+(228, 227), (229, 228), (230, 229), (231, 230),
+(232, 231), (233, 232), (234, 233), (235, 234),
+(236, 235), (237, 236), (238, 237), (239, 238),
+(240, 239), (241, 240), (242, 241), (243, 242),
+(244, 243), (245, 244), (246, 245), (247, 246),
+(248, 247), (249, 248), (250, 249), (251, 250),
+(252, 251), (253, 252), (254, 253), (255, 254);
+--error ER_GET_ERRMSG,ER_ROW_IS_REFERENCED_2
 delete from t1 where id=0;
-delete from t1 where id=15;
+delete from t1 where id=255;
+--error 0,ER_ROW_IS_REFERENCED_2
 delete from t1 where id=0;
 
 drop table t1;
@@ -1279,6 +1356,9 @@ drop table t1;
 
 create table t1 (a int not null, b int not null, c blob not null, d int not null, e int, primary key (a,b,c(255),d)) engine=innodb;
 insert into t1 values (2,2,"b",2,2),(1,1,"a",1,1),(3,3,"ab",3,3);
+-- disable_result_log
+analyze table t1;
+-- enable_result_log
 select * from t1 order by a,b,c,d;
 explain select * from t1 order by a,b,c,d;
 drop table t1;
@@ -1342,10 +1422,12 @@ source include/varchar.inc;
 #
 # Some errors/warnings on create
 #
-
+SET sql_mode = 'NO_ENGINE_SUBSTITUTION';
 # Embedded server doesn't chdir to data directory
 --replace_result $MYSQLTEST_VARDIR . master-data/ ''
+SET GLOBAL innodb_large_prefix=OFF;
 create table t1 (v varchar(65530), key(v));
+SET GLOBAL innodb_large_prefix=default;
 drop table t1;
 create table t1 (v varchar(65536));
 show create table t1;
@@ -1353,8 +1435,8 @@ drop table t1;
 create table t1 (v varchar(65530) character set utf8);
 show create table t1;
 drop table t1;
-
-eval set storage_engine=$default;
+SET sql_mode = default;
+eval set default_storage_engine=$default;
 
 # InnoDB specific varchar tests
 create table t1 (v varchar(16384)) engine=innodb;
@@ -1459,7 +1541,7 @@ CREATE TABLE t1
  id INT PRIMARY KEY
 ) ENGINE=InnoDB;
 
---error 1005,1005
+--error ER_CANNOT_ADD_FOREIGN,1005
 CREATE TEMPORARY TABLE t2
 (
  id INT NOT NULL PRIMARY KEY,
@@ -1500,6 +1582,8 @@ show create table t9;
 
 drop table t1, t2, t3, t4, t5, t6, t7, t8, t9;
 
+SET sql_mode = 'NO_ENGINE_SUBSTITUTION';
+SET GLOBAL innodb_large_prefix=OFF;
 # these should have their index length trimmed
 create table t1 (col1 varchar(768), index(col1))
  character set = latin1 engine = innodb;
@@ -1509,25 +1593,28 @@ create table t3 (col1 text, index(col1(768)))
  character set = latin1 engine = innodb;
 create table t4 (col1 blob, index(col1(768)))
  character set = latin1 engine = innodb;
+SET GLOBAL innodb_large_prefix=default;
 
 show create table t1;
 
 drop table t1, t2, t3, t4;
 
 # these should be refused
---error 1071
+set global innodb_large_prefix=OFF;
+--error ER_TOO_LONG_KEY
 create table t1 (col1 varchar(768) primary key)
  character set = latin1 engine = innodb;
---error 1071
+--error ER_TOO_LONG_KEY
 create table t2 (col1 varbinary(768) primary key)
  character set = latin1 engine = innodb;
---error 1071
+--error ER_TOO_LONG_KEY
 create table t3 (col1 text, primary key(col1(768)))
  character set = latin1 engine = innodb;
---error 1071
+--error ER_TOO_LONG_KEY
 create table t4 (col1 blob, primary key(col1(768)))
  character set = latin1 engine = innodb;
-
+SET sql_mode = default;
+set global innodb_large_prefix=default;
 #
 # Test improved foreign key error messages (bug #3443)
 #
@@ -1549,10 +1636,10 @@ INSERT INTO t2 VALUES(2);
 INSERT INTO t1 VALUES(1);
 INSERT INTO t2 VALUES(1);
 
---error 1451
+--error ER_ROW_IS_REFERENCED_2
 DELETE FROM t1 WHERE id = 1;
 
---error 1451
+--error ER_ROW_IS_REFERENCED_2, 1217
 DROP TABLE t1;
 
 SET FOREIGN_KEY_CHECKS=0;
@@ -1678,11 +1765,11 @@ insert into t2 values(2);
 insert into t4 values(2);
 insert into t2 values(1);
 insert into t4 values(1);
--- error 1451
+-- error ER_ROW_IS_REFERENCED_2
 update t1 set a=2;
 -- error 1452
 update t2 set a=2;
--- error 1451
+-- error ER_ROW_IS_REFERENCED_2
 update t3 set a=2;
 -- error 1452
 update t4 set a=2;
@@ -1719,7 +1806,7 @@ create table t1 (a varchar(255) character set utf8,
 
 
 # test the padding of BINARY types and collations (Bug #14189)
-
+SET sql_mode = 'NO_ENGINE_SUBSTITUTION';
 create table t1 (s1 varbinary(2),primary key (s1)) engine=innodb;
 create table t2 (s1 binary(2),primary key (s1)) engine=innodb;
 create table t3 (s1 varchar(2) binary,primary key (s1)) engine=innodb;
@@ -1751,11 +1838,11 @@ insert into t2 values(0x41);
 select hex(s1) from t2;
 update t1 set s1=0x123456 where a=2;
 select hex(s1) from t2;
--- error 1451
+-- error ER_ROW_IS_REFERENCED_2
 update t1 set s1=0x12 where a=1;
--- error 1451
+-- error ER_ROW_IS_REFERENCED_2
 update t1 set s1=0x12345678 where a=1;
--- error 1451
+-- error ER_ROW_IS_REFERENCED_2
 update t1 set s1=0x123457 where a=1;
 update t1 set s1=0x1220 where a=1;
 select hex(s1) from t2;
@@ -1763,11 +1850,11 @@ update t1 set s1=0x1200 where a=1;
 select hex(s1) from t2;
 update t1 set s1=0x4200 where a=1;
 select hex(s1) from t2;
--- error 1451
+-- error ER_ROW_IS_REFERENCED_2
 delete from t1 where a=1;
 delete from t1 where a=2;
 update t2 set s1=0x4120;
--- error 1451
+-- error ER_ROW_IS_REFERENCED_2
 delete from t1;
 delete from t1 where a!=3;
 select a,hex(s1) from t1;
@@ -1786,7 +1873,7 @@ select hex(s1) from t2;
 update t1 set s1=0x12 where a=2;
 select hex(s1) from t2;
 delete from t1 where a=1;
--- error 1451
+-- error ER_ROW_IS_REFERENCED_2
 delete from t1 where a=2;
 select a,hex(s1) from t1;
 select hex(s1) from t2;
@@ -1803,7 +1890,7 @@ ALTER TABLE t2 ADD CONSTRAINT t2_ibfk_0 FOREIGN KEY (a) REFERENCES t1(a);
 ALTER TABLE t2 DROP FOREIGN KEY t2_ibfk_0;
 SHOW CREATE TABLE t2;
 DROP TABLE t2,t1;
-
+SET sql_mode = default;
 #
 # Test case for bug #16229: MySQL/InnoDB uses full explicit table locks in trigger processing
 #
@@ -2344,8 +2431,9 @@ DROP TABLE t1,t2;
 #
 # Bug #21101 (Prints wrong error message if max row size is too large)
 #
-set innodb_strict_mode=on;
---error 1118
+SET innodb_strict_mode=ON;
+--replace_result 8126 {checked_valid} 4030 {checked_valid} 1982 {checked_valid}
+--error ER_TOO_BIG_ROWSIZE
 CREATE TABLE t1 (
 	c01 CHAR(255), c02 CHAR(255), c03 CHAR(255), c04 CHAR(255),
 	c05 CHAR(255), c06 CHAR(255), c07 CHAR(255), c08 CHAR(255),
@@ -2356,6 +2444,7 @@ CREATE TABLE t1 (
 	c25 CHAR(255), c26 CHAR(255), c27 CHAR(255), c28 CHAR(255),
 	c29 CHAR(255), c30 CHAR(255), c31 CHAR(255), c32 CHAR(255)
 	) ENGINE = InnoDB;
+SET innodb_strict_mode=OFF;
 
 #
 # Bug #31860 InnoDB assumes AUTOINC values can only be positive.
@@ -2512,6 +2601,7 @@ DROP TABLE bug35537;
 DISCONNECT c1;
 CONNECTION default;
 
+SET GLOBAL innodb_thread_sleep_delay = @innodb_thread_sleep_delay_orig;
 SET GLOBAL innodb_thread_concurrency = @innodb_thread_concurrency_orig;
 
 -- enable_query_log
diff --git a/mysql-test/suite/innodb/t/innodb_blob_unrecoverable_crash.test b/mysql-test/suite/innodb/t/innodb_blob_unrecoverable_crash.test
deleted file mode 100644
index 8553d97bd9e..00000000000
--- a/mysql-test/suite/innodb/t/innodb_blob_unrecoverable_crash.test
+++ /dev/null
@@ -1,52 +0,0 @@
---source include/not_embedded.inc
---source include/not_crashrep.inc
---source include/have_innodb.inc
-
-call mtr.add_suppression("InnoDB: The total blob data length");
-
-let $old_max_allowed_packet = `select @@max_allowed_packet`;
-SET GLOBAL max_allowed_packet = 100*1024*1024;
-
-connect(big_packets,localhost,root,,);
-connection big_packets;
-
-CREATE TABLE t1 (a BIGINT PRIMARY KEY, b LONGBLOB) ENGINE=InnoDB;
-
-# Insert a few rows (it doesn't really matter how many). These transactions
-# are committed once they are acked, so they should not be lost.
-INSERT INTO t1 (a, b) VALUES (1, '1');
-INSERT INTO t1 (a, b) VALUES (2, '2');
-INSERT INTO t1 (a, b) VALUES (3, '3');
-INSERT INTO t1 (a, b) VALUES (4, '4');
-INSERT INTO t1 (a, b) VALUES (5, '5');
-
-# The BLOB insert will fail, and should disappear. However all data committed
-# up to this point should not be lost.
-start transaction;
---replace_regex /\(> [0-9]*\)/(> ####)/
---error ER_TOO_BIG_ROWSIZE
-INSERT INTO t1 (a, b) VALUES (6, REPEAT('a', 20*1024*1024));
-
-connection default;
-
-# We expect a restart.
---exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
-
---echo # Quick shutdown and restart server
---shutdown_server 0
-
-# Wait for the server to come back up, and reconnect.
---enable_reconnect
---source include/wait_until_connected_again.inc
-
-connection default;
-
-# We should see (1,2,3,4,5) here.
-SELECT a FROM t1;
-
-# Clean up.
-DROP TABLE t1;
-
---disable_query_log
-eval set global max_allowed_packet = $old_max_allowed_packet;
---enable_query_log
diff --git a/mysql-test/suite/innodb/t/innodb_bug12400341.test b/mysql-test/suite/innodb/t/innodb_bug12400341.test
index 235ece04a8c..9a96f29fc3b 100644
--- a/mysql-test/suite/innodb/t/innodb_bug12400341.test
+++ b/mysql-test/suite/innodb/t/innodb_bug12400341.test
@@ -14,10 +14,8 @@ if (`select count(*)=0 from information_schema.global_variables where variable_n
 # undo slots of the previous test might exist still
 --source include/not_windows.inc
 
-# Previous undo slots cause unnecessary failures
---source include/not_windows.inc
-
 call mtr.add_suppression("InnoDB: Warning: cannot find a free slot for an undo log. Do you have too*");
+call mtr.add_suppression("\\[Warning\\] InnoDB: Cannot find a free slot for an undo log. Do you have too");
 
 --disable_query_log
 set @old_innodb_trx_rseg_n_slots_debug = @@innodb_trx_rseg_n_slots_debug;
diff --git a/mysql-test/suite/innodb/t/innodb_bug12902967.test b/mysql-test/suite/innodb/t/innodb_bug12902967.test
index 8e1b8199524..1b5df7fa165 100644
--- a/mysql-test/suite/innodb/t/innodb_bug12902967.test
+++ b/mysql-test/suite/innodb/t/innodb_bug12902967.test
@@ -9,6 +9,8 @@
 --source include/have_innodb.inc
 --source include/not_embedded.inc
 
+call mtr.add_suppression("In ALTER TABLE .* has or is referenced in foreign key constraints which are not compatible with the new table definition.");
+
 let error_log= $MYSQLTEST_VARDIR/log/mysqld.1.err;
 --source include/restart_mysqld.inc
 
@@ -20,11 +22,4 @@ create table t1 (f1 integer primary key) engine innodb;
 --replace_regex /'\.\/test\/#sql-[0-9a-f_]*'/'#sql-temporary'/
 --error ER_ERROR_ON_RENAME
 alter table t1 add constraint c1 foreign key (f1) references t1(f1);
---source include/restart_mysqld.inc
-perl;
-$file = $ENV{error_log};
-open (FILE, '<', $file) or die "can't open(< $file): $!\n";
-print ((grep { /^InnoDB:/ and not /aio/i and not /io_setup\(\) attempt [0-9]+ failed/ } <FILE>)[-2..-1]);
-EOF
-
 drop table t1;
diff --git a/mysql-test/suite/innodb/t/innodb_bug14147491-master.opt b/mysql-test/suite/innodb/t/innodb_bug14147491-master.opt
index 6b82baca147..410738202bd 100644
--- a/mysql-test/suite/innodb/t/innodb_bug14147491-master.opt
+++ b/mysql-test/suite/innodb/t/innodb_bug14147491-master.opt
@@ -1 +1,4 @@
---innodb_file_per_table=1 --skip-stack-trace --skip-core-file
+--innodb_file_per_table=1
+--skip-stack-trace
+--skip-core-file
+--loose-innodb_buffer_pool_load_at_startup=OFF
diff --git a/mysql-test/suite/innodb/t/innodb_bug14147491.test b/mysql-test/suite/innodb/t/innodb_bug14147491.test
index 16e88826c85..c73571af6dd 100644
--- a/mysql-test/suite/innodb/t/innodb_bug14147491.test
+++ b/mysql-test/suite/innodb/t/innodb_bug14147491.test
@@ -1,41 +1,29 @@
 #
 # Test opening a corrupted table.
 #
-
--- source include/not_encrypted.inc
-
-call mtr.add_suppression("InnoDB: Database page corruption on disk or a failed.*");
-
-# Don't test under valgrind, memory leaks will occur
+# Valgrind can hang or return spurious messages on DBUG_SUICIDE
 source include/not_valgrind.inc;
 # Avoid CrashReporter popup on Mac
 source include/not_crashrep.inc;
-# Don't test under embedded
+# Restarting is not supported under embedded
 source include/not_embedded.inc;
 # Require InnoDB
 source include/have_innodb.inc;
 # Require Debug for SET DEBUG
 source include/have_debug.inc;
+# Not encrypted tables
+source include/not_encrypted.inc;
 # Test could open crash reporter on Windows 
 # if compiler set up
 source include/not_windows.inc;
 
-CALL mtr.add_suppression("InnoDB: Error: Unable to read tablespace .* page no .* into the buffer pool after 100 attempts");
-CALL mtr.add_suppression("InnoDB: Warning: database page corruption or a failed");
-CALL mtr.add_suppression("InnoDB: Database page corruption on disk or a failed");
-CALL mtr.add_suppression("InnoDB: Space .* file test/t1 read of page .*");
-CALL mtr.add_suppression("InnoDB: You may have to recover from a backup.");
-CALL mtr.add_suppression("InnoDB: It is also possible that your operatingsystem has corrupted its own file cache.");
-CALL mtr.add_suppression("InnoDB: and rebooting your computer removes the error.");
-CALL mtr.add_suppression("InnoDB: If the corrupt page is an index page you can also try to");
-CALL mtr.add_suppression("InnoDB: fix the corruption by dumping, dropping, and reimporting");
-CALL mtr.add_suppression("InnoDB: the corrupt table. You can use CHECK");
-CALL mtr.add_suppression("InnoDB: TABLE to scan your table for corruption.");
-CALL mtr.add_suppression("InnoDB: See also .* about forcing recovery.");
+--disable_query_log
+CALL mtr.add_suppression("\\[ERROR\\] \\[FATAL\\] InnoDB: Unable to read page \\[page id: space=.*, page number=.*\\] into the buffer pool after 100 attempts");
+CALL mtr.add_suppression("\\[ERROR\\] InnoDB: Database page corruption on disk or a failed");
+--enable_query_log
 
 
---echo # Create and populate the table to be corrupted
-CREATE TABLE t1 (a INT AUTO_INCREMENT PRIMARY KEY, b TEXT) ENGINE=InnoDB;
+CREATE TABLE t1 (a INT AUTO_INCREMENT PRIMARY KEY, b TEXT) ROW_FORMAT=COMPACT ENGINE=InnoDB;
 INSERT INTO t1 (b) VALUES ('corrupt me');
 --disable_query_log
 --let $i = 10
@@ -50,17 +38,7 @@ INSERT INTO t1 (b) VALUES ('corrupt me');
 let $MYSQLD_DATADIR=`select @@datadir`;
 let t1_IBD = $MYSQLD_DATADIR/test/t1.ibd;
 
---echo # Write file to make mysql-test-run.pl expect the "crash", but don't
---echo # start it until it's told to
---exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
-
---echo # We give 30 seconds to do a clean shutdown because we do not want
---echo # to redo apply the pages of t1.ibd at the time of recovery.
---echo # We want SQL to initiate the first access to t1.ibd.
-shutdown_server 30;
-
---echo # Wait until disconnected.
---source include/wait_until_disconnected.inc
+--source include/shutdown_mysqld.inc
 
 --echo # Backup the t1.ibd before corrupting
 --copy_file $t1_IBD $MYSQLD_DATADIR/test/t1.ibd.backup
@@ -92,10 +70,7 @@ while ($len = sysread IBD_FILE, $chunk, 1024)
 close IBD_FILE;
 EOF
 
---echo # Write file to make mysql-test-run.pl start up the server again
---exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
---enable_reconnect
---source include/wait_until_connected_again.inc
+--source include/start_mysqld.inc
 
 SET DEBUG_DBUG = '+d,innodb_page_corruption_retries';
 
@@ -117,10 +92,7 @@ SLEEP 1;
 --remove_file $MYSQLD_DATADIR/test/t1.ibd
 --move_file $MYSQLD_DATADIR/test/t1.ibd.backup $MYSQLD_DATADIR/test/t1.ibd
 
---echo # Write file to make mysql-test-run.pl start up the server again
---exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
---enable_reconnect
---source include/wait_until_connected_again.inc
+--source include/start_mysqld.inc
 
 # Note SET DEBUG = '-d,innodb_page_corruption_retries' is not required
 # because the session information is lost after server restart
diff --git a/mysql-test/suite/innodb/t/innodb_bug30423.test b/mysql-test/suite/innodb/t/innodb_bug30423.test
index bbb6f1155ff..de53a935156 100644
--- a/mysql-test/suite/innodb/t/innodb_bug30423.test
+++ b/mysql-test/suite/innodb/t/innodb_bug30423.test
@@ -139,6 +139,7 @@ analyze table bug30243_3;
 
 # Following query plan shows that we get the correct rows per
 # unique value (should be approximately 1 row per value)
+-- replace_column 9 ROWS
 explain SELECT COUNT(*), 0
 	FROM bug30243_1 orgs
 	LEFT JOIN bug30243_3 sa_opportunities
@@ -159,6 +160,7 @@ analyze table bug30243_3;
 
 # Following query plan shows that we get the correct rows per
 # unique value (~1)
+-- replace_column 9 ROWS
 explain SELECT COUNT(*), 0
 	FROM bug30243_1 orgs
 	LEFT JOIN bug30243_3 sa_opportunities
diff --git a/mysql-test/suite/innodb/t/innodb_bug34053.test b/mysql-test/suite/innodb/t/innodb_bug34053.test
index 56c26acb632..d93d5100d81 100644
--- a/mysql-test/suite/innodb/t/innodb_bug34053.test
+++ b/mysql-test/suite/innodb/t/innodb_bug34053.test
@@ -5,7 +5,7 @@
 -- source include/not_embedded.inc
 -- source include/have_innodb.inc
 
-SET storage_engine=InnoDB;
+SET default_storage_engine=InnoDB;
 
 # we do not really care about what gets printed, we are only
 # interested in getting success or failure according to our
@@ -20,8 +20,12 @@ FLUSH PRIVILEGES;
 
 -- connection con1
 -- error ER_SPECIFIC_ACCESS_DENIED_ERROR
+SET GLOBAL innodb_status_output=ON;
+-- error ER_SPECIFIC_ACCESS_DENIED_ERROR
+SET GLOBAL innodb_status_output_locks=ON;
+
 CREATE TABLE innodb_monitor (a INT) ENGINE=INNODB;
-# this should only fail with UNIV_MEM_DEBUG
+DROP TABLE innodb_monitor;
 CREATE TABLE innodb_mem_validate (a INT) ENGINE=INNODB;
 DROP TABLE innodb_mem_validate;
 CREATE TABLE innodb_sql (a INT) ENGINE=INNODB;
@@ -36,16 +40,18 @@ CREATE TABLE nnodb_monitor (a INT) ENGINE=INNODB;
 DROP TABLE nnodb_monitor;
 
 -- connection default
-CREATE TABLE innodb_monitor (a INT) ENGINE=INNODB;
-CREATE TABLE innodb_mem_validate (a INT) ENGINE=INNODB;
+SET GLOBAL innodb_status_output=ON;
+SET GLOBAL innodb_status_output_locks=ON;
 
 -- connection con1
 -- error ER_SPECIFIC_ACCESS_DENIED_ERROR
-DROP TABLE innodb_monitor;
-DROP TABLE innodb_mem_validate;
+SET GLOBAL innodb_status_output=OFF;
+-- error ER_SPECIFIC_ACCESS_DENIED_ERROR
+SET GLOBAL innodb_status_output_locks=OFF;
 
 -- connection default
-DROP TABLE innodb_monitor;
+SET GLOBAL innodb_status_output=OFF;
+SET GLOBAL innodb_status_output_locks=OFF;
 DROP USER 'shane'@'localhost';
 
 -- disconnect con1
diff --git a/mysql-test/suite/innodb/t/innodb_bug34300.test b/mysql-test/suite/innodb/t/innodb_bug34300.test
index 13c708b48d6..8c73af13bc1 100644
--- a/mysql-test/suite/innodb/t/innodb_bug34300.test
+++ b/mysql-test/suite/innodb/t/innodb_bug34300.test
@@ -1,35 +1,26 @@
-#
-# Bug#34300 Tinyblob & tinytext fields currupted after export/import and alter in 5.1
-# http://bugs.mysql.com/34300
-#
+--echo #
+--echo # Bug#34300 Tinyblob & tinytext fields currupted after export/import and alter in 5.1
+--echo #
 
 -- source include/have_innodb.inc
 
-if (`select plugin_auth_version <= "5.6.22-MariaDB-72.0" from information_schema.plugins where plugin_name='innodb'`)
-{
-  --skip Not fixed in XtraDB as of 5.6.22-MariaDB-72.0 or earlier
-}
-
 -- disable_query_log
--- disable_result_log
-
-call mtr.add_suppression("InnoDB: The total blob data length");
+call mtr.add_suppression("InnoDB: Warning: a long semaphore wait:");
+call mtr.add_suppression("The age of the last checkpoint is");
 
 # set packet size and reconnect 
 let $max_packet=`select @@global.max_allowed_packet`;
+-- enable_query_log
+
 SET @@global.max_allowed_packet=16777216;
 --connect (newconn, localhost, root,,)
 
---enable_result_log
-
 CREATE TABLE bug34300 (
   f4 TINYTEXT,
   f6 MEDIUMTEXT,
   f8 TINYBLOB
 ) ENGINE=InnoDB;
 
---replace_regex /\(> [0-9]*\)/(> ####)/
---error ER_TOO_BIG_ROWSIZE
 INSERT INTO bug34300 VALUES ('xxx', repeat('a', 8459264), 'zzz');
 
 SELECT f4, f8 FROM bug34300;
@@ -38,5 +29,10 @@ ALTER TABLE bug34300 ADD COLUMN (f10 INT);
 
 SELECT f4, f8 FROM bug34300;
 
+--echo # Cleanup
 DROP TABLE bug34300;
+
+-- disable_query_log
 EVAL SET @@global.max_allowed_packet=$max_packet;
+-- enable_query_log
+
diff --git a/mysql-test/suite/innodb/t/innodb_bug60049-master.opt b/mysql-test/suite/innodb/t/innodb_bug60049-master.opt
index 741d8685459..22a5d4ed221 100644
--- a/mysql-test/suite/innodb/t/innodb_bug60049-master.opt
+++ b/mysql-test/suite/innodb/t/innodb_bug60049-master.opt
@@ -1 +1 @@
---loose-innodb-fast-shutdown=0
+--innodb_fast_shutdown=0
diff --git a/mysql-test/suite/innodb/t/innodb_bug60049.test b/mysql-test/suite/innodb/t/innodb_bug60049.test
index b1d56d16a5e..cb05ca297ea 100644
--- a/mysql-test/suite/innodb/t/innodb_bug60049.test
+++ b/mysql-test/suite/innodb/t/innodb_bug60049.test
@@ -5,12 +5,11 @@
 -- source include/not_embedded.inc
 -- source include/have_innodb.inc
 -- source include/have_innodb_16k.inc
--- source include/not_encrypted.inc
-
-call mtr.add_suppression('InnoDB: Error: Table "mysql"."innodb_(table|index)_stats" not found');
-call mtr.add_suppression('InnoDB: Error: Fetch of persistent statistics requested');
 
 -- disable_query_log
+call mtr.add_suppression('\\[ERROR\\] InnoDB: Table `mysql`.`innodb_(table|index)_stats` not found');
+call mtr.add_suppression('\\[ERROR\\] InnoDB: Fetch of persistent statistics requested for table `mysql`.`gtid_executed`');
+
 let $create1 = query_get_value(SHOW CREATE TABLE mysql.innodb_table_stats, Create Table, 1);
 let $create2 = query_get_value(SHOW CREATE TABLE mysql.innodb_index_stats, Create Table, 1);
 DROP TABLE mysql.innodb_index_stats;
@@ -23,10 +22,7 @@ DROP TABLE u;
 SELECT @@innodb_fast_shutdown;
 let $MYSQLD_DATADIR=`select @@datadir`;
 
-# Shut down the server
--- exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
--- shutdown_server 30
--- source include/wait_until_disconnected.inc
+--source include/shutdown_mysqld.inc
 
 # Check the tail of ID_IND (SYS_TABLES.ID)
 let IBDATA1=$MYSQLD_DATADIR/ibdata1;
@@ -45,10 +41,7 @@ close(FILE);
 print unpack("H*", $_), "\n";
 EOF
 
-# Restart the server.
--- exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
--- enable_reconnect
--- source include/wait_until_connected_again.inc
+--source include/start_mysqld.inc
 
 -- disable_query_log
 USE mysql;
diff --git a/mysql-test/suite/innodb/t/innodb_corrupt_bit.test b/mysql-test/suite/innodb/t/innodb_corrupt_bit.test
index ee04e8d66fc..52a318f0fa8 100644
--- a/mysql-test/suite/innodb/t/innodb_corrupt_bit.test
+++ b/mysql-test/suite/innodb/t/innodb_corrupt_bit.test
@@ -2,32 +2,28 @@
 # Test for persistent corrupt bit for corrupted index and table
 #
 -- source include/have_innodb.inc
-#-- source include/have_innodb_16k.inc
--- source include/not_embedded.inc
+--source include/not_embedded.inc
 # This test needs debug server
 -- source include/have_debug.inc
 
--- disable_query_log
-call mtr.add_suppression("Flagged corruption of idx.*in");
+--disable_query_log
+call mtr.add_suppression("Flagged corruption of.* in table .* in .*");
+--enable_query_log
 
 set names utf8;
 
-SET UNIQUE_CHECKS=0;
-
 CREATE TABLE corrupt_bit_test_ā(
        a INT AUTO_INCREMENT PRIMARY KEY,
        b CHAR(100),
        c INT,
        z INT,
        INDEX idx(b))
-ENGINE=InnoDB;
+ENGINE=InnoDB STATS_PERSISTENT=0;
 
 INSERT INTO corrupt_bit_test_ā VALUES(0,'x',1, 1);
 
-# This is the first unique index we intend to corrupt
 CREATE UNIQUE INDEX idxā ON corrupt_bit_test_ā(c, b);
 
-# This is the second unique index we intend to corrupt
 CREATE UNIQUE INDEX idxē ON corrupt_bit_test_ā(z, b);
 
 SELECT * FROM corrupt_bit_test_ā;
@@ -37,9 +33,9 @@ INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+1,z+1 FROM corrupt_bit_test_ā;
 select count(*) from corrupt_bit_test_ā;
 
 # This will flag all secondary indexes corrupted
-SET SESSION debug_dbug="+d,dict_set_index_corrupted";
+SET SESSION debug="+d,dict_set_index_corrupted";
 check table corrupt_bit_test_ā;
-SET SESSION debug_dbug="";
+SET SESSION debug="-d,dict_set_index_corrupted";
 
 # Cannot create new indexes while corrupted indexes exist
 --error ER_INDEX_CORRUPT
@@ -79,8 +75,6 @@ set names utf8;
 -- error ER_INDEX_CORRUPT
 select z from corrupt_bit_test_ā;
 
-show create table corrupt_bit_test_ā;
-
 # Drop the corrupted index
 drop index idxē on corrupt_bit_test_ā;
 
@@ -90,13 +84,13 @@ CREATE INDEX idx3 ON corrupt_bit_test_ā(b, c);
 --error ER_INDEX_CORRUPT
 CREATE INDEX idx4 ON corrupt_bit_test_ā(b, z);
 
-show create table corrupt_bit_test_ā;
 drop index idx on corrupt_bit_test_ā;
 
 # Now that there exist no corrupted indexes, we can create new indexes.
 CREATE INDEX idx3 ON corrupt_bit_test_ā(b, c);
 CREATE INDEX idx4 ON corrupt_bit_test_ā(b, z);
 
+
 # Now select back to normal
 select z from corrupt_bit_test_ā limit 10;
 
diff --git a/mysql-test/suite/innodb/t/innodb_gis.test b/mysql-test/suite/innodb/t/innodb_gis.test
index 1adb14ea482..45d66d95002 100644
--- a/mysql-test/suite/innodb/t/innodb_gis.test
+++ b/mysql-test/suite/innodb/t/innodb_gis.test
@@ -6,5 +6,8 @@ SET storage_engine=innodb;
 #
 # Bug #15680 (SPATIAL key in innodb)
 #
---error ER_TABLE_CANT_HANDLE_SPKEYS
+# MySQL 5.7 Introduces SPATIAL keys for InnoDB
+#--error ER_TABLE_CANT_HANDLE_SPKEYS
 create table t1 (g geometry not null, spatial gk(g)) engine=innodb;
+drop table t1;
+
diff --git a/mysql-test/suite/innodb/t/innodb_mysql-master.opt b/mysql-test/suite/innodb/t/innodb_mysql-master.opt
index a177f285d66..a1ee2c096cf 100644
--- a/mysql-test/suite/innodb/t/innodb_mysql-master.opt
+++ b/mysql-test/suite/innodb/t/innodb_mysql-master.opt
@@ -1 +1,3 @@
---loose-innodb-lock-wait-timeout=2 --default-storage-engine=MyISAM
+--loose-innodb-lock-wait-timeout=2
+--default-storage-engine=MyISAM
+--loose-innodb-large-prefix=off
diff --git a/mysql-test/suite/innodb/t/innodb_simulate_comp_failures.test b/mysql-test/suite/innodb/t/innodb_simulate_comp_failures.test
index cf22935fad6..5a4978c9b37 100644
--- a/mysql-test/suite/innodb/t/innodb_simulate_comp_failures.test
+++ b/mysql-test/suite/innodb/t/innodb_simulate_comp_failures.test
@@ -1,6 +1,7 @@
 --source include/big_test.inc
 # test takes too long with valgrind
 --source include/not_valgrind.inc
+--source include/have_debug.inc
 --let $num_inserts = 1500
 --let $num_ops = 3500
 --source suite/innodb/include/innodb_simulate_comp_failures.inc
diff --git a/mysql-test/suite/innodb/t/innodb_simulate_comp_failures_small.test b/mysql-test/suite/innodb/t/innodb_simulate_comp_failures_small.test
index 1677a092e0c..79a16d36917 100644
--- a/mysql-test/suite/innodb/t/innodb_simulate_comp_failures_small.test
+++ b/mysql-test/suite/innodb/t/innodb_simulate_comp_failures_small.test
@@ -1,3 +1,6 @@
+--source include/have_debug.inc
+--source include/not_valgrind.inc
+
 --let $num_inserts = 1000
 --let $num_ops = 30
 --source suite/innodb/include/innodb_simulate_comp_failures.inc
diff --git a/mysql-test/suite/innodb/t/innodb_skip_innodb_is_tables.test b/mysql-test/suite/innodb/t/innodb_skip_innodb_is_tables.test
index 73cd8a26f6f..01ced047302 100644
--- a/mysql-test/suite/innodb/t/innodb_skip_innodb_is_tables.test
+++ b/mysql-test/suite/innodb/t/innodb_skip_innodb_is_tables.test
@@ -29,6 +29,7 @@ select * from information_schema.innodb_sys_foreign;
 select * from information_schema.innodb_sys_foreign_cols;
 select * from information_schema.innodb_sys_tablespaces;
 select * from information_schema.innodb_sys_datafiles;
+--error 0,1109
 select * from information_schema.innodb_changed_pages;
 select * from information_schema.innodb_tablespaces_encryption;
 select * from information_schema.innodb_tablespaces_scrubbing;
diff --git a/mysql-test/suite/innodb/t/innodb_stats_create_on_corrupted.test b/mysql-test/suite/innodb/t/innodb_stats_create_on_corrupted.test
index c932e45591d..5d36cfdcbb9 100644
--- a/mysql-test/suite/innodb/t/innodb_stats_create_on_corrupted.test
+++ b/mysql-test/suite/innodb/t/innodb_stats_create_on_corrupted.test
@@ -10,8 +10,8 @@
 -- source include/have_innodb_16k.inc
 -- source include/not_embedded.inc
 
-call mtr.add_suppression("InnoDB: Error: Table \"mysql\".\"innodb_index_stats\" not found");
-call mtr.add_suppression("InnoDB: Error: Fetch of persistent statistics requested for table");
+call mtr.add_suppression("InnoDB: Table .*innodb_index_stats.* not found");
+call mtr.add_suppression("InnoDB: Fetch of persistent statistics requested for table .*");
 
 -- vertical_results
 
diff --git a/mysql-test/suite/innodb/t/innodb_stats_fetch_corrupted.test b/mysql-test/suite/innodb/t/innodb_stats_fetch_corrupted.test
index 4a3f7527c09..1603f3cd764 100644
--- a/mysql-test/suite/innodb/t/innodb_stats_fetch_corrupted.test
+++ b/mysql-test/suite/innodb/t/innodb_stats_fetch_corrupted.test
@@ -8,8 +8,8 @@
 # test with 16k page size.
 -- source include/have_innodb_16k.inc
 
-call mtr.add_suppression("InnoDB: Error: Table \"mysql\".\"innodb_index_stats\" not found");
-call mtr.add_suppression("InnoDB: Error: Fetch of persistent statistics requested for table");
+call mtr.add_suppression("InnoDB: Table `mysql`.`innodb_index_stats` not found");
+call mtr.add_suppression("InnoDB: Fetch of persistent statistics requested for table.*");
 
 -- vertical_results
 
diff --git a/mysql-test/suite/innodb/t/innodb_sys_semaphore_waits.test b/mysql-test/suite/innodb/t/innodb_sys_semaphore_waits.test
index 53d43e6b711..e7acb98b0d0 100644
--- a/mysql-test/suite/innodb/t/innodb_sys_semaphore_waits.test
+++ b/mysql-test/suite/innodb/t/innodb_sys_semaphore_waits.test
@@ -77,10 +77,10 @@ let $counter= 80;
 let $mysql_errno= 0;
 while (!$mysql_errno)
 {
-  --error 0,1040,1053,2002,2003,2006,2013
+  --error 0,ER_SERVER_SHUTDOWN,ER_CONNECTION_KILLED,2002,2006,2013
   show status;
 
-  --error 0,1040,1053,2002,2003,2006,2013
+  --error 0,ER_SERVER_SHUTDOWN,ER_CONNECTION_KILLED,2002,2006,2013
   select * from information_schema.innodb_sys_semaphore_waits;
 
   dec $counter;
diff --git a/mysql-test/suite/innodb/t/strict_mode.test b/mysql-test/suite/innodb/t/strict_mode.test
index 86b56a09c0e..48fc1ef7881 100644
--- a/mysql-test/suite/innodb/t/strict_mode.test
+++ b/mysql-test/suite/innodb/t/strict_mode.test
@@ -5,6 +5,8 @@
 --echo # INNODB_STRICT_MODE = 1
 --echo #
 
+call mtr.add_suppression("InnoDB: Cannot add field .* in table .* because after adding it, the row size is .* which is greater than maximum allowed size (.*) for a record on index leaf page.");
+
 set innodb_strict_mode = 0;
 
 create table t1 (id int auto_increment primary key,
diff --git a/mysql-test/suite/innodb_fts/r/fulltext.result b/mysql-test/suite/innodb_fts/r/fulltext.result
index 2f23a484508..f096f1c7dd5 100644
--- a/mysql-test/suite/innodb_fts/r/fulltext.result
+++ b/mysql-test/suite/innodb_fts/r/fulltext.result
@@ -8,8 +8,8 @@ INSERT INTO t1 VALUES('MySQL has now support', 'for full-text search'),
 ANALYZE TABLE t1;
 SHOW INDEX FROM t1;
 Table	Non_unique	Key_name	Seq_in_index	Column_name	Collation	Cardinality	Sub_part	Packed	Null	Index_type	Comment	Index_comment
-t1	1	a	1	a	NULL	5	NULL	NULL	YES	FULLTEXT		
-t1	1	a	2	b	NULL	5	NULL	NULL	YES	FULLTEXT		
+t1	1	a	1	a	NULL	NULL	NULL	NULL	YES	FULLTEXT		
+t1	1	a	2	b	NULL	NULL	NULL	NULL	YES	FULLTEXT		
 select * from t1 where MATCH(a,b) AGAINST ("collections");
 a	b
 Full-text indexes	are called collections
@@ -235,7 +235,7 @@ id
 show keys from t2;
 Table	Non_unique	Key_name	Seq_in_index	Column_name	Collation	Cardinality	Sub_part	Packed	Null	Index_type	Comment	Index_comment
 t2	1	tig	1	ticket	A	3	NULL	NULL	YES	BTREE		
-t2	1	tix	1	inhalt	NULL	3	NULL	NULL	YES	FULLTEXT		
+t2	1	tix	1	inhalt	NULL	NULL	NULL	NULL	YES	FULLTEXT		
 show create table t2;
 Table	Create Table
 t2	CREATE TABLE `t2` (
diff --git a/mysql-test/suite/innodb_fts/r/innodb-fts-fic.result b/mysql-test/suite/innodb_fts/r/innodb-fts-fic.result
index e5df6ca8b05..731abad9198 100644
--- a/mysql-test/suite/innodb_fts/r/innodb-fts-fic.result
+++ b/mysql-test/suite/innodb_fts/r/innodb-fts-fic.result
@@ -1,3 +1,5 @@
+call mtr.add_suppression("\\[Warning\\] InnoDB: A new Doc ID must be supplied while updating FTS indexed columns.");
+call mtr.add_suppression("\\[Warning\\] InnoDB: FTS Doc ID must be larger than [0-9]+ for table `test`.`articles`");
 CREATE TABLE articles (
 id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,
 title VARCHAR(200),
diff --git a/mysql-test/suite/innodb_fts/r/innodb_fts_misc_1.result b/mysql-test/suite/innodb_fts/r/innodb_fts_misc_1.result
index a74d6094a7f..b76784d4ffd 100644
--- a/mysql-test/suite/innodb_fts/r/innodb_fts_misc_1.result
+++ b/mysql-test/suite/innodb_fts/r/innodb_fts_misc_1.result
@@ -1,5 +1,6 @@
-drop table if exists t2,t1;
 set names utf8;
+call mtr.add_suppression("\\[Warning\\] InnoDB: A new Doc ID must be supplied while updating FTS indexed columns.");
+call mtr.add_suppression("\\[Warning\\] InnoDB: FTS Doc ID must be larger than [0-9]+ for table `test`.`t1`");
 CREATE TABLE t1 (
 id1 INT ,
 a1 VARCHAR(200) ,
@@ -34,46 +35,52 @@ INSERT INTO t2 (a2,b2) VALUES
 ERROR 23000: Cannot add or update a child row: a foreign key constraint fails (`test`.`t2`, CONSTRAINT `t2_ibfk_1` FOREIGN KEY (`a2`) REFERENCES `t1` (`a1`) ON UPDATE CASCADE)
 DELETE FROM t1;
 ERROR 23000: Cannot delete or update a parent row: a foreign key constraint fails (`test`.`t2`, CONSTRAINT `t2_ibfk_1` FOREIGN KEY (`a2`) REFERENCES `t1` (`a1`) ON UPDATE CASCADE)
-SELECT id1 FROM t1 WHERE MATCH (a1,b1) AGAINST ('tutorial') ;
+ANALYZE TABLE t1;
+Table	Op	Msg_type	Msg_text
+test.t1	analyze	status	OK
+ANALYZE TABLE t2;
+Table	Op	Msg_type	Msg_text
+test.t2	analyze	status	OK
+SELECT id1 FROM t1 WHERE MATCH (a1,b1) AGAINST ('tutorial') ORDER BY id1;
 id1
 1
 3
-SELECT id2 FROM t2 WHERE MATCH (a2,b2) AGAINST ('tutorial') ;
+SELECT id2 FROM t2 WHERE MATCH (a2,b2) AGAINST ('tutorial') ORDER BY id2;
 id2
 1
 3
-SELECT id1 FROM t1 WHERE MATCH (a1,b1) AGAINST ('tutorial (+mysql -VÐƷWİ)' IN BOOLEAN MODE) ;
+SELECT id1 FROM t1 WHERE MATCH (a1,b1) AGAINST ('tutorial (+mysql -VÐƷWİ)' IN BOOLEAN MODE) ORDER BY id1;
 id1
 1
-3
-6
 2
+3
 4
 5
-SELECT id2 FROM t2 WHERE MATCH (a2,b2) AGAINST ('tutorial (+mysql -VÐƷWİ)' IN BOOLEAN MODE) ;
+6
+SELECT id2 FROM t2 WHERE MATCH (a2,b2) AGAINST ('tutorial (+mysql -VÐƷWİ)' IN BOOLEAN MODE) ORDER BY id2;
 id2
 1
-3
-6
 2
+3
 4
 5
-SELECT id1 FROM t1 WHERE MATCH (a1,b1) AGAINST ('tutorial' WITH QUERY EXPANSION) ;
+6
+SELECT id1 FROM t1 WHERE MATCH (a1,b1) AGAINST ('tutorial' WITH QUERY EXPANSION) ORDER BY id1;
 id1
 1
+2
 3
+4
 5
 6
-2
-4
-SELECT id2 FROM t2 WHERE MATCH (a2,b2) AGAINST ('tutorial' WITH QUERY EXPANSION) ;
+SELECT id2 FROM t2 WHERE MATCH (a2,b2) AGAINST ('tutorial' WITH QUERY EXPANSION) ORDER BY id2;
 id2
 1
+2
 3
+4
 5
 6
-2
-4
 SELECT id1 FROM t1 WHERE MATCH (a1,b1) AGAINST ('"dbms database"@4' IN BOOLEAN MODE) ;
 id1
 1
@@ -93,23 +100,23 @@ SELECT id2 FROM t2 WHERE MATCH (a2,b2) AGAINST ('tutorial (+mysql -VÐƷWİ)' IN
 id2
 3
 6
-SELECT id1 FROM t1 WHERE MATCH (a1,b1) AGAINST ('+update +cascade' IN BOOLEAN MODE) ;
+SELECT id1 FROM t1 WHERE MATCH (a1,b1) AGAINST ('+update +cascade' IN BOOLEAN MODE) ORDER BY id1;
 id1
-4
-2
-6
 1
-5
+2
 3
-SELECT id2 FROM t2 WHERE MATCH (a2,b2) AGAINST ('+update +cascade' IN BOOLEAN MODE) ;
+4
+5
+6
+SELECT id2 FROM t2 WHERE MATCH (a2,b2) AGAINST ('+update +cascade' IN BOOLEAN MODE) ORDER BY id2;
 id2
-4
-2
-6
 1
-5
+2
 3
-SELECT id2 FROM t2 WHERE a2 LIKE '%UPDATE CASCADE%';
+4
+5
+6
+SELECT id2 FROM t2 WHERE a2 LIKE '%UPDATE CASCADE%' ORDER BY id2;
 id2
 1
 2
@@ -221,8 +228,7 @@ id1	a1	b1
 SELECT * FROM t2 WHERE a2 LIKE '%tutorial%';
 id2	a2	b2
 DROP TABLE t2 , t1;
-DROP TABLE IF EXISTS t2,t1;
-SET NAMES utf8;
+call mtr.add_suppression("\\[ERROR\\] InnoDB: FTS Doc ID must be larger than 3 for table `test`.`t2`");
 CREATE TABLE t1 (
 id1 INT ,
 a1 VARCHAR(200) ,
@@ -258,34 +264,34 @@ INSERT INTO t2 (a2,b2) VALUES
 ERROR 23000: Cannot add or update a child row: a foreign key constraint fails (`test`.`t2`, CONSTRAINT `t2_ibfk_1` FOREIGN KEY (`a2`) REFERENCES `t1` (`a1`) ON UPDATE CASCADE)
 DELETE FROM t1;
 ERROR 23000: Cannot delete or update a parent row: a foreign key constraint fails (`test`.`t2`, CONSTRAINT `t2_ibfk_1` FOREIGN KEY (`a2`) REFERENCES `t1` (`a1`) ON UPDATE CASCADE)
-SELECT * FROM t1 WHERE MATCH (a1,b1) AGAINST ('tutorial') ;
+SELECT * FROM t1 WHERE MATCH (a1,b1) AGAINST ('tutorial') ORDER BY id1;
 id1	a1	b1
 1	MySQL Tutorial	DBMS stands for DataBase VÐƷWİ...
 3	Optimizing MySQL	In this tutorial we will show ...
-SELECT * FROM t2 WHERE MATCH (a2,b2) AGAINST ('tutorial') ;
+SELECT * FROM t2 WHERE MATCH (a2,b2) AGAINST ('tutorial') ORDER BY id2;
 id2	a2	b2
 1	MySQL Tutorial	DBMS stands for DataBase VÐƷWİ...
 3	Optimizing MySQL	In this tutorial we will show ...
-SELECT * FROM t1 WHERE MATCH (a1,b1) AGAINST ('tutorial (+mysql -VÐƷWİ)' IN BOOLEAN MODE) ;
-id1	a1	b1
-3	Optimizing MySQL	In this tutorial we will show ...
-1	MySQL Tutorial	DBMS stands for DataBase VÐƷWİ...
-2	How To Use MySQL Well	After you went through a ...
-SELECT * FROM t2 WHERE MATCH (a2,b2) AGAINST ('tutorial (+mysql -VÐƷWİ)' IN BOOLEAN MODE) ;
-id2	a2	b2
-3	Optimizing MySQL	In this tutorial we will show ...
-1	MySQL Tutorial	DBMS stands for DataBase VÐƷWİ...
-2	How To Use MySQL Well	After you went through a ...
-SELECT * FROM t1 WHERE MATCH (a1,b1) AGAINST ('tutorial' WITH QUERY EXPANSION) ;
+SELECT * FROM t1 WHERE MATCH (a1,b1) AGAINST ('tutorial (+mysql -VÐƷWİ)' IN BOOLEAN MODE) ORDER BY id1;
 id1	a1	b1
 1	MySQL Tutorial	DBMS stands for DataBase VÐƷWİ...
-3	Optimizing MySQL	In this tutorial we will show ...
 2	How To Use MySQL Well	After you went through a ...
-SELECT * FROM t2 WHERE MATCH (a2,b2) AGAINST ('tutorial' WITH QUERY EXPANSION) ;
+3	Optimizing MySQL	In this tutorial we will show ...
+SELECT * FROM t2 WHERE MATCH (a2,b2) AGAINST ('tutorial (+mysql -VÐƷWİ)' IN BOOLEAN MODE) ORDER BY id2;
 id2	a2	b2
 1	MySQL Tutorial	DBMS stands for DataBase VÐƷWİ...
-3	Optimizing MySQL	In this tutorial we will show ...
 2	How To Use MySQL Well	After you went through a ...
+3	Optimizing MySQL	In this tutorial we will show ...
+SELECT * FROM t1 WHERE MATCH (a1,b1) AGAINST ('tutorial' WITH QUERY EXPANSION) ORDER BY id1;
+id1	a1	b1
+1	MySQL Tutorial	DBMS stands for DataBase VÐƷWİ...
+2	How To Use MySQL Well	After you went through a ...
+3	Optimizing MySQL	In this tutorial we will show ...
+SELECT * FROM t2 WHERE MATCH (a2,b2) AGAINST ('tutorial' WITH QUERY EXPANSION) ORDER BY id2;
+id2	a2	b2
+1	MySQL Tutorial	DBMS stands for DataBase VÐƷWİ...
+2	How To Use MySQL Well	After you went through a ...
+3	Optimizing MySQL	In this tutorial we will show ...
 SELECT * FROM t1 WHERE MATCH (a1,b1) AGAINST ('"dbms database"@4' IN BOOLEAN MODE) ;
 id1	a1	b1
 1	MySQL Tutorial	DBMS stands for DataBase VÐƷWİ...
@@ -308,15 +314,15 @@ SELECT * FROM t1 WHERE MATCH (a1,b1) AGAINST ('"database comparison"@02' IN BOOL
 id1	a1	b1
 SELECT * FROM t2 WHERE MATCH (a2,b2) AGAINST ('"database comparison"@02' IN BOOLEAN MODE) ;
 id2	a2	b2
-SELECT * FROM t1;
+SELECT * FROM t1 ORDER BY id1;
 id1	a1	b1
-4	1001 MySQL Tricks	1. Never run mysqld as root. 2. ...
-2	How To Use MySQL Well	After you went through a ...
-6	MySQL Security	When configured properly, MySQL ...
 1	MySQL Tutorial	DBMS stands for DataBase VÐƷWİ...
-5	MySQL vs. YourSQL	In the following database comparison ...
+2	How To Use MySQL Well	After you went through a ...
 3	Optimizing MySQL	In this tutorial we will show ...
-SELECT * FROM t2;
+4	1001 MySQL Tricks	1. Never run mysqld as root. 2. ...
+5	MySQL vs. YourSQL	In the following database comparison ...
+6	MySQL Security	When configured properly, MySQL ...
+SELECT * FROM t2 ORDER BY id2;
 id2	a2	b2
 1	MySQL Tutorial	DBMS stands for DataBase VÐƷWİ...
 2	How To Use MySQL Well	After you went through a ...
@@ -335,23 +341,23 @@ SELECT * FROM t2 WHERE MATCH (a2,b2) AGAINST ('tutorial (+mysql -VÐƷWİ)' IN B
 id2	a2	b2
 3	changing column - on UPDATE cascade	In this tutorial we will show ...
 6	changing column - on UPDATE cascade	When configured properly, MySQL ...
-SELECT * FROM t1 WHERE MATCH (a1,b1) AGAINST ('+UPDATE +cascade' IN BOOLEAN MODE) ;
+SELECT * FROM t1 WHERE MATCH (a1,b1) AGAINST ('+UPDATE +cascade' IN BOOLEAN MODE) ORDER BY id1;
 id1	a1	b1
-4	changing column - on UPDATE cascade	to check foreign constraint
-2	changing column - on UPDATE cascade	to check foreign constraint
-6	changing column - on UPDATE cascade	to check foreign constraint
 1	changing column - on UPDATE cascade	to check foreign constraint
-5	changing column - on UPDATE cascade	to check foreign constraint
+2	changing column - on UPDATE cascade	to check foreign constraint
 3	changing column - on UPDATE cascade	to check foreign constraint
-SELECT * FROM t2 WHERE MATCH (a2,b2) AGAINST ('+UPDATE +cascade' IN BOOLEAN MODE) ;
+4	changing column - on UPDATE cascade	to check foreign constraint
+5	changing column - on UPDATE cascade	to check foreign constraint
+6	changing column - on UPDATE cascade	to check foreign constraint
+SELECT * FROM t2 WHERE MATCH (a2,b2) AGAINST ('+UPDATE +cascade' IN BOOLEAN MODE) ORDER BY id2;
 id2	a2	b2
-4	changing column - on UPDATE cascade	1. Never run mysqld as root. 2. ...
-2	changing column - on UPDATE cascade	After you went through a ...
-6	changing column - on UPDATE cascade	When configured properly, MySQL ...
 1	changing column - on UPDATE cascade	DBMS stands for DataBase VÐƷWİ...
-5	changing column - on UPDATE cascade	In the following database comparison ...
+2	changing column - on UPDATE cascade	After you went through a ...
 3	changing column - on UPDATE cascade	In this tutorial we will show ...
-SELECT * FROM t2 WHERE a2 LIKE '%UPDATE CASCADE%';
+4	changing column - on UPDATE cascade	1. Never run mysqld as root. 2. ...
+5	changing column - on UPDATE cascade	In the following database comparison ...
+6	changing column - on UPDATE cascade	When configured properly, MySQL ...
+SELECT * FROM t2 WHERE a2 LIKE '%UPDATE CASCADE%' ORDER BY id2;
 id2	a2	b2
 1	changing column - on UPDATE cascade	DBMS stands for DataBase VÐƷWİ...
 2	changing column - on UPDATE cascade	After you went through a ...
@@ -456,10 +462,7 @@ ROLLBACK;
 SELECT * FROM t2 WHERE MATCH(s2) AGAINST ('Lollipops');
 s1	s2
 DROP TABLE t2 , t1;
-set global innodb_file_format="Barracuda";
 set global innodb_file_per_table=1;
-set global innodb_large_prefix=1;
-set names utf8;
 CREATE TABLE t1 (
 id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,
 a VARCHAR(200),
@@ -481,13 +484,16 @@ t1	CREATE TABLE `t1` (
   PRIMARY KEY (`id`),
   FULLTEXT KEY `idx` (`a`,`b`)
 ) ENGINE=InnoDB AUTO_INCREMENT=4 DEFAULT CHARSET=utf8 ROW_FORMAT=COMPRESSED
+SELECT count(*) FROM information_schema.innodb_sys_tables WHERE name LIKE "%FTS_%" AND space !=0;
+count(*)
+11
 INSERT INTO t1 (a,b) VALUES
 ('1001 MySQL Tricks','1. Never run mysqld as root. 2. ...'),
 ('MySQL vs. YourSQL','In the following database comparison ...'),
 ('MySQL Security','When configured properly, MySQL ...');
 ANALYZE TABLE t1;
 SELECT * FROM t1 WHERE MATCH (a,b)
-AGAINST ('Tutorial' IN NATURAL LANGUAGE MODE);
+AGAINST ('Tutorial' IN NATURAL LANGUAGE MODE) ORDER BY id;
 id	a	b
 1	MySQL Tutorial	DBMS stands for DataBase VÐƷWİ...
 3	Optimizing MySQL	In this tutorial we will show ...
@@ -499,14 +505,14 @@ ERROR 42000: syntax error, unexpected '-'
 select * from t1 where MATCH(a,b) AGAINST("+Mysql +(tricks never)" IN BOOLEAN MODE);
 id	a	b
 4	1001 MySQL Tricks	1. Never run mysqld as root. 2. ...
-select * from t1 where MATCH(a,b) AGAINST("+mysql -(tricks never)" IN BOOLEAN MODE);
+select * from t1 where MATCH(a,b) AGAINST("+mysql -(tricks never)" IN BOOLEAN MODE) ORDER BY id;
 id	a	b
-6	MySQL Security	When configured properly, MySQL ...
 1	MySQL Tutorial	DBMS stands for DataBase VÐƷWİ...
 2	How To Use MySQL Well	After you went through a ...
 3	Optimizing MySQL	In this tutorial we will show ...
 5	MySQL vs. YourSQL	In the following database comparison ...
-select *, MATCH(a,b) AGAINST("mysql stands" IN BOOLEAN MODE) as x from t1;
+6	MySQL Security	When configured properly, MySQL ...
+select *, MATCH(a,b) AGAINST("mysql stands" IN BOOLEAN MODE) as x from t1 ORDER BY id;
 id	a	b	x
 1	MySQL Tutorial	DBMS stands for DataBase VÐƷWİ...	0.6055193543434143
 2	How To Use MySQL Well	After you went through a ...	0.000000001885928302414186
@@ -519,18 +525,18 @@ id	a	b
 1	MySQL Tutorial	DBMS stands for DataBase VÐƷWİ...
 select * from t1 where MATCH a,b AGAINST ('"security mysql"' IN BOOLEAN MODE);
 id	a	b
-select * from t1 where MATCH(a,b) AGAINST ("VÐƷWİ" WITH QUERY EXPANSION);
+select * from t1 where MATCH(a,b) AGAINST ("VÐƷWİ" WITH QUERY EXPANSION) ORDER BY id;
 id	a	b
 1	MySQL Tutorial	DBMS stands for DataBase VÐƷWİ...
+2	How To Use MySQL Well	After you went through a ...
 3	Optimizing MySQL	In this tutorial we will show ...
+4	1001 MySQL Tricks	1. Never run mysqld as root. 2. ...
 5	MySQL vs. YourSQL	In the following database comparison ...
 6	MySQL Security	When configured properly, MySQL ...
-2	How To Use MySQL Well	After you went through a ...
-4	1001 MySQL Tricks	1. Never run mysqld as root. 2. ...
 ALTER TABLE t1 DROP INDEX idx;
 CREATE FULLTEXT INDEX idx on t1 (a,b);
 SELECT * FROM t1 WHERE MATCH (a,b)
-AGAINST ('Tutorial' IN NATURAL LANGUAGE MODE);
+AGAINST ('Tutorial' IN NATURAL LANGUAGE MODE) ORDER BY id;
 id	a	b
 1	MySQL Tutorial	DBMS stands for DataBase VÐƷWİ...
 3	Optimizing MySQL	In this tutorial we will show ...
@@ -543,14 +549,14 @@ id	a	b
 select * from t1 where MATCH(a,b) AGAINST("+Mysql +(tricks never)" IN BOOLEAN MODE);
 id	a	b
 4	1001 MySQL Tricks	1. Never run mysqld as root. 2. ...
-select * from t1 where MATCH(a,b) AGAINST("+mysql -(tricks never)" IN BOOLEAN MODE);
+select * from t1 where MATCH(a,b) AGAINST("+mysql -(tricks never)" IN BOOLEAN MODE) ORDER BY id;
 id	a	b
-6	MySQL Security	When configured properly, MySQL ...
 1	MySQL Tutorial	DBMS stands for DataBase VÐƷWİ...
 2	How To Use MySQL Well	After you went through a ...
 3	Optimizing MySQL	In this tutorial we will show ...
 5	MySQL vs. YourSQL	In the following database comparison ...
-select *, MATCH(a,b) AGAINST("mysql VÐƷWİ" IN BOOLEAN MODE) as x from t1;
+6	MySQL Security	When configured properly, MySQL ...
+select *, MATCH(a,b) AGAINST("mysql VÐƷWİ" IN BOOLEAN MODE) as x from t1 ORDER BY id;
 id	a	b	x
 1	MySQL Tutorial	DBMS stands for DataBase VÐƷWİ...	0.6055193543434143
 2	How To Use MySQL Well	After you went through a ...	0.000000001885928302414186
@@ -560,14 +566,14 @@ id	a	b	x
 6	MySQL Security	When configured properly, MySQL ...	0.000000003771856604828372
 select * from t1 where MATCH a,b AGAINST ('"security mysql"' IN BOOLEAN MODE);
 id	a	b
-select * from t1 where MATCH(a,b) AGAINST ("VÐƷWİ" WITH QUERY EXPANSION);
+select * from t1 where MATCH(a,b) AGAINST ("VÐƷWİ" WITH QUERY EXPANSION) ORDER BY id;
 id	a	b
 1	MySQL Tutorial	DBMS stands for DataBase VÐƷWİ...
+2	How To Use MySQL Well	After you went through a ...
 3	Optimizing MySQL	In this tutorial we will show ...
+4	1001 MySQL Tricks	1. Never run mysqld as root. 2. ...
 5	MySQL vs. YourSQL	In the following database comparison ...
 6	MySQL Security	When configured properly, MySQL ...
-2	How To Use MySQL Well	After you went through a ...
-4	1001 MySQL Tricks	1. Never run mysqld as root. 2. ...
 INSERT INTO t1 (a,b) VALUES ('test query expansion','for database ...');
 INSERT INTO t1 (a,b) VALUES
 ('test proximity search, test, proximity and phrase',
@@ -589,14 +595,14 @@ AGAINST ('"proximity search"@1' IN BOOLEAN MODE);
 id	a	b
 SELECT * FROM t1
 WHERE MATCH (a,b)
-AGAINST ('"proximity search"@3' IN BOOLEAN MODE);
+AGAINST ('"proximity search"@3' IN BOOLEAN MODE) ORDER BY id;
 id	a	b
 8	test proximity search, test, proximity and phrase	search, with proximity innodb
 9	test proximity fts search, test, proximity and phrase	search, with proximity innodb
 10	test more proximity fts search, test, more proximity and phrase	search, with proximity innodb
 SELECT * FROM t1
 WHERE MATCH (a,b)
-AGAINST ('"test proximity"@5' IN BOOLEAN MODE);
+AGAINST ('"test proximity"@5' IN BOOLEAN MODE) ORDER BY id;
 id	a	b
 8	test proximity search, test, proximity and phrase	search, with proximity innodb
 9	test proximity fts search, test, proximity and phrase	search, with proximity innodb
@@ -624,7 +630,7 @@ select * from t1 where MATCH(a,b) AGAINST("+VÐƷWİ" IN BOOLEAN MODE);
 id	a	b
 1	MYSQL TUTORIAL	dbms stands for database vðʒwi...
 SELECT * FROM t1 WHERE MATCH (a,b)
-AGAINST ('Tutorial' IN NATURAL LANGUAGE MODE);
+AGAINST ('Tutorial' IN NATURAL LANGUAGE MODE) ORDER BY id;
 id	a	b
 1	MYSQL TUTORIAL	dbms stands for database vðʒwi...
 3	OPTIMIZING MYSQL	in this tutorial we will show ...
@@ -633,7 +639,7 @@ DELETE FROM t1 WHERE MATCH (a,b) AGAINST ('"proximity search"@14' IN BOOLEAN MOD
 SELECT * FROM t1 WHERE MATCH (a,b)
 AGAINST ('Tutorial' IN NATURAL LANGUAGE MODE);
 id	a	b
-SELECT * FROM t1;
+SELECT * FROM t1 ORDER BY id;
 id	a	b
 2	HOW TO USE MYSQL WELL	after you went through a ...
 4	1001 MYSQL TRICKS	1. never run mysqld as root. 2. ...
@@ -641,7 +647,7 @@ id	a	b
 6	MYSQL SECURITY	when configured properly, mysql ...
 7	TEST QUERY EXPANSION	for database ...
 DROP TABLE t1;
-set names utf8;
+SET GLOBAL innodb_file_per_table=1;
 CREATE TABLE t1 (
 id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,
 a VARCHAR(200),
@@ -662,14 +668,14 @@ INSERT INTO t1 (a,b) VALUES
 CREATE FULLTEXT INDEX idx on t1 (a,b);
 Warnings:
 Warning	124	InnoDB rebuilding table to add column FTS_DOC_ID
-SELECT * FROM t1 WHERE MATCH(a,b) AGAINST ("вредит χωρὶς");
+SELECT * FROM t1 WHERE MATCH(a,b) AGAINST ("вредит χωρὶς") ORDER BY id;
 id	a	b
 1	Я могу есть стекло	оно мне не вредит
 3	Μπορῶ νὰ φάω σπασμένα	γυαλιὰ χωρὶς νὰ πάθω τίποτα
 SELECT * FROM t1 WHERE MATCH(a,b) AGAINST ("оно" WITH QUERY EXPANSION);
 id	a	b
 1	Я могу есть стекло	оно мне не вредит
-SELECT * FROM t1 WHERE MATCH(a,b) AGAINST("вред*" IN BOOLEAN MODE);
+SELECT * FROM t1 WHERE MATCH(a,b) AGAINST("вред*" IN BOOLEAN MODE) ORDER BY id;
 id	a	b
 1	Я могу есть стекло	оно мне не вредит
 2	Мога да ям стъкло	то не ми вреди
@@ -731,7 +737,7 @@ id	a	b
 7	Pchnąć w tę łódź jeża	lub osiem skrzyń fig
 SELECT * FROM t1 WHERE MATCH(a,b) AGAINST("вред*" IN BOOLEAN MODE);
 id	a	b
-SELECT * FROM t1 WHERE MATCH(a,b) AGAINST("фальшив*" IN BOOLEAN MODE);
+SELECT * FROM t1 WHERE MATCH(a,b) AGAINST("фальшив*" IN BOOLEAN MODE) ORDER BY id;
 id	a	b
 1	В чащах юга жил-был цитрус? Да	но фальшивый экземпляр! ёъ
 2	В чащах юга жил-был цитрус? Да	но фальшивый экземпляр! ёъ
@@ -742,7 +748,7 @@ WHERE MATCH (a,b)
 AGAINST ('"łódź jeża"@2' IN BOOLEAN MODE);
 id	a	b
 7	Pchnąć w tę łódź jeża	lub osiem skrzyń fig
-SELECT * FROM t1;
+SELECT * FROM t1 ORDER BY id;
 id	a	b
 1	В чащах юга жил-был цитрус? Да	но фальшивый экземпляр! ёъ
 2	В чащах юга жил-был цитрус? Да	но фальшивый экземпляр! ёъ
diff --git a/mysql-test/suite/innodb_fts/r/innodb_fts_plugin.result b/mysql-test/suite/innodb_fts/r/innodb_fts_plugin.result
index 569de081762..c7c86290f3c 100644
--- a/mysql-test/suite/innodb_fts/r/innodb_fts_plugin.result
+++ b/mysql-test/suite/innodb_fts/r/innodb_fts_plugin.result
@@ -1,29 +1,198 @@
 INSTALL PLUGIN simple_parser SONAME 'mypluglib';
+# Test Part 1: Grammar Test
 CREATE TABLE articles (
 id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,
 title VARCHAR(200),
-body TEXT,
 FULLTEXT (title) WITH PARSER simple_parser
 ) ENGINE=MyISAM;
 ALTER TABLE articles ENGINE=InnoDB;
-ERROR HY000: Cannot CREATE FULLTEXT INDEX WITH PARSER on InnoDB table
 DROP TABLE articles;
 CREATE TABLE articles (
 id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,
 title VARCHAR(200),
 body TEXT,
+comment TEXT,
 FULLTEXT (title) WITH PARSER simple_parser
 ) ENGINE=InnoDB;
-ERROR HY000: Cannot CREATE FULLTEXT INDEX WITH PARSER on InnoDB table
+ALTER TABLE articles ADD FULLTEXT INDEX (body) WITH PARSER simple_parser;
+CREATE FULLTEXT INDEX ft_index ON articles(comment) WITH PARSER simple_parser;
+DROP TABLE articles;
+# Test Part 2: Create Index Test(CREATE TABLE WITH FULLTEXT INDEX)
 CREATE TABLE articles (
 id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,
 title VARCHAR(200),
 body TEXT,
-FULLTEXT (title)
+FULLTEXT (title, body) WITH PARSER simple_parser
 ) ENGINE=InnoDB;
-ALTER TABLE articles ADD FULLTEXT INDEX (body) WITH PARSER simple_parser;
-ERROR HY000: Cannot CREATE FULLTEXT INDEX WITH PARSER on InnoDB table
-CREATE FULLTEXT INDEX ft_index ON articles(body) WITH PARSER simple_parser;
-ERROR HY000: Cannot CREATE FULLTEXT INDEX WITH PARSER on InnoDB table
+INSERT INTO articles (title, body) VALUES
+('MySQL Tutorial','DBMS stands for MySQL DataBase ...'),
+('How To Use MySQL Well','After you went through a ...'),
+('Optimizing MySQL','In this tutorial we will show ...'),
+('1001 MySQL Tricks','How to use full-text search engine'),
+('Go MySQL Tricks','How to use full text search engine');
+SELECT * FROM articles WHERE
+MATCH(title, body) AGAINST('mysql');
+id	title	body
+1	MySQL Tutorial	DBMS stands for MySQL DataBase ...
+2	How To Use MySQL Well	After you went through a ...
+3	Optimizing MySQL	In this tutorial we will show ...
+4	1001 MySQL Tricks	How to use full-text search engine
+5	Go MySQL Tricks	How to use full text search engine
+SELECT * FROM articles WHERE
+MATCH(title, body) AGAINST('will go');
+id	title	body
+# Test plugin parser tokenizer difference
+SELECT * FROM articles WHERE
+MATCH(title, body) AGAINST('full-text');
+id	title	body
+4	1001 MySQL Tricks	How to use full-text search engine
+SELECT * FROM articles WHERE
+MATCH(title, body) AGAINST('full text');
+id	title	body
+5	Go MySQL Tricks	How to use full text search engine
+SELECT * FROM articles WHERE
+MATCH(title, body) AGAINST('"mysql database"' IN BOOLEAN MODE);
+id	title	body
+DROP TABLE articles;
+# Test Part 3: Row Merge Create Index Test(ALTER TABLE ADD FULLTEXT INDEX)
+CREATE TABLE articles (
+id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,
+title VARCHAR(200),
+body TEXT
+) ENGINE=InnoDB;
+INSERT INTO articles (title, body) VALUES
+('MySQL Tutorial','DBMS stands for MySQL DataBase ...'),
+('How To Use MySQL Well','After you went through a ...'),
+('Optimizing MySQL','In this tutorial we will show ...'),
+('1001 MySQL Tricks','How to use full-text search engine'),
+('Go MySQL Tricks','How to use full text search engine');
+ALTER TABLE articles ADD FULLTEXT INDEX (title, body) WITH PARSER simple_parser;
+Warnings:
+Warning	124	InnoDB rebuilding table to add column FTS_DOC_ID
+SELECT * FROM articles WHERE
+MATCH(title, body) AGAINST('mysql');
+id	title	body
+1	MySQL Tutorial	DBMS stands for MySQL DataBase ...
+2	How To Use MySQL Well	After you went through a ...
+3	Optimizing MySQL	In this tutorial we will show ...
+4	1001 MySQL Tricks	How to use full-text search engine
+5	Go MySQL Tricks	How to use full text search engine
+SELECT * FROM articles WHERE
+MATCH(title, body) AGAINST('will go');
+id	title	body
+# Test plugin parser tokenizer difference
+SELECT * FROM articles WHERE
+MATCH(title, body) AGAINST('full-text');
+id	title	body
+4	1001 MySQL Tricks	How to use full-text search engine
+SELECT * FROM articles WHERE
+MATCH(title, body) AGAINST('full text');
+id	title	body
+5	Go MySQL Tricks	How to use full text search engine
+SELECT * FROM articles WHERE
+MATCH(title, body) AGAINST('full-text' WITH QUERY EXPANSION);
+id	title	body
+4	1001 MySQL Tricks	How to use full-text search engine
+5	Go MySQL Tricks	How to use full text search engine
+2	How To Use MySQL Well	After you went through a ...
+1	MySQL Tutorial	DBMS stands for MySQL DataBase ...
+3	Optimizing MySQL	In this tutorial we will show ...
+SELECT * FROM articles WHERE
+MATCH(title, body) AGAINST('full text' WITH QUERY EXPANSION);
+id	title	body
+5	Go MySQL Tricks	How to use full text search engine
+4	1001 MySQL Tricks	How to use full-text search engine
+2	How To Use MySQL Well	After you went through a ...
+1	MySQL Tutorial	DBMS stands for MySQL DataBase ...
+3	Optimizing MySQL	In this tutorial we will show ...
+SELECT * FROM articles WHERE
+MATCH(title, body) AGAINST('"mysql database"' IN BOOLEAN MODE);
+id	title	body
+DROP TABLE articles;
+# Test Part 3 END
+# Test Part 4:crash on commit(before/after)
+CREATE TABLE articles (
+id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,
+title VARCHAR(200),
+body TEXT,
+FULLTEXT (title, body) WITH PARSER simple_parser
+) ENGINE=InnoDB;
+BEGIN;
+INSERT INTO articles (title, body) VALUES
+('MySQL Tutorial','DBMS stands for MySQL DataBase ...'),
+('How To Use MySQL Well','After you went through a ...'),
+('Optimizing MySQL','In this tutorial we will show ...'),
+('1001 MySQL Tricks','How to use full-text search engine'),
+('Go MySQL Tricks','How to use full text search engine');
+SELECT COUNT(*) FROM articles;
+COUNT(*)
+0
+SELECT * FROM articles WHERE
+MATCH(title, body) AGAINST('mysql');
+id	title	body
+INSERT INTO articles (title, body) VALUES
+('MySQL Tutorial','DBMS stands for MySQL DataBase ...'),
+('How To Use MySQL Well','After you went through a ...'),
+('Optimizing MySQL','In this tutorial we will show ...'),
+('1001 MySQL Tricks','How to use full-text search engine'),
+('Go MySQL Tricks','How to use full text search engine');
+SELECT * FROM articles WHERE
+MATCH(title, body) AGAINST('Tricks');
+id	title	body
+4	1001 MySQL Tricks	How to use full-text search engine
+5	Go MySQL Tricks	How to use full text search engine
+SELECT COUNT(*) FROM articles;
+COUNT(*)
+5
+DROP TABLE articles;
+# Test Part 5: Test Uninstall Plugin After Index is Built
+CREATE TABLE articles (
+id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,
+title VARCHAR(200),
+body TEXT,
+FULLTEXT (title, body) WITH PARSER simple_parser
+) ENGINE=InnoDB;
+UNINSTALL PLUGIN simple_parser;
+INSERT INTO articles (title, body) VALUES
+('MySQL Tutorial','DBMS stands for MySQL DataBase ...');
+ERROR HY000: Plugin 'simple_parser' is not loaded
+INSTALL PLUGIN simple_parser SONAME 'mypluglib';
+INSERT INTO articles (title, body) VALUES
+('MySQL Tutorial','DBMS stands for MySQL DataBase ...'),
+('How To Use MySQL Well','After you went through a ...'),
+('Optimizing MySQL','In this tutorial we will show ...'),
+('1001 MySQL Tricks','How to use full-text search engine'),
+('Go MySQL Tricks','How to use full text search engine');
+UNINSTALL PLUGIN simple_parser;
+Warnings:
+Warning	1620	Plugin is busy and will be uninstalled on shutdown
+SELECT * FROM articles WHERE
+MATCH(title, body) AGAINST('mysql');
+id	title	body
+1	MySQL Tutorial	DBMS stands for MySQL DataBase ...
+2	How To Use MySQL Well	After you went through a ...
+3	Optimizing MySQL	In this tutorial we will show ...
+4	1001 MySQL Tricks	How to use full-text search engine
+5	Go MySQL Tricks	How to use full text search engine
+SELECT * FROM articles WHERE
+MATCH(title, body) AGAINST('will go');
+id	title	body
+# Test plugin parser tokenizer difference
+SELECT * FROM articles WHERE
+MATCH(title, body) AGAINST('full-text');
+id	title	body
+4	1001 MySQL Tricks	How to use full-text search engine
+SELECT * FROM articles WHERE
+MATCH(title, body) AGAINST('full text');
+id	title	body
+5	Go MySQL Tricks	How to use full text search engine
+CREATE TABLE articles2 (
+id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,
+title VARCHAR(200),
+body TEXT,
+FULLTEXT (title, body) WITH PARSER simple_parser
+) ENGINE=InnoDB;
+ERROR HY000: Function 'simple_parser' is not defined
 DROP TABLE articles;
 UNINSTALL PLUGIN simple_parser;
+ERROR 42000: PLUGIN simple_parser does not exist
diff --git a/mysql-test/suite/innodb_fts/r/innodb_fts_proximity.result b/mysql-test/suite/innodb_fts/r/innodb_fts_proximity.result
index a61ff47c9a1..ca9a57dc3e7 100644
--- a/mysql-test/suite/innodb_fts/r/innodb_fts_proximity.result
+++ b/mysql-test/suite/innodb_fts/r/innodb_fts_proximity.result
@@ -128,7 +128,7 @@ WHERE MATCH (a,b)
 AGAINST ('"mysql use"@1' IN BOOLEAN MODE);
 id	a	b
 INSERT INTO t1 (a,b) VALUES   ('XYZ, long blob', repeat("a", 9000));
-INSERT INTO t1 (a,b) VALUES   (repeat("b", 9000), 'XYZ, long blob');
+INSERT IGNORE INTO t1 (a,b) VALUES   (repeat("b", 9000), 'XYZ, long blob');
 Warnings:
 Warning	1265	Data truncated for column 'a' at row 1
 SELECT count(*) FROM t1
@@ -137,7 +137,6 @@ AGAINST ('"xyz blob"@3' IN BOOLEAN MODE);
 count(*)
 2
 DROP TABLE t1;
-set global innodb_file_format="Barracuda";
 set global innodb_file_per_table=1;
 CREATE TABLE t1 (
 id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,
@@ -225,5 +224,4 @@ AGAINST ('"very blob"@3' IN BOOLEAN MODE);
 count(*)
 1
 DROP TABLE t1;
-SET GLOBAL innodb_file_format=Antelope;
 SET GLOBAL innodb_file_per_table=1;
diff --git a/mysql-test/suite/innodb_fts/t/innodb-fts-fic.test b/mysql-test/suite/innodb_fts/t/innodb-fts-fic.test
index 6570e6cf216..669aa69e835 100644
--- a/mysql-test/suite/innodb_fts/t/innodb-fts-fic.test
+++ b/mysql-test/suite/innodb_fts/t/innodb-fts-fic.test
@@ -2,6 +2,8 @@
 
 -- source include/have_innodb.inc
 
+call mtr.add_suppression("\\[Warning\\] InnoDB: A new Doc ID must be supplied while updating FTS indexed columns.");
+call mtr.add_suppression("\\[Warning\\] InnoDB: FTS Doc ID must be larger than [0-9]+ for table `test`.`articles`");
 # Create FTS table
 CREATE TABLE articles (
 	id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,
diff --git a/mysql-test/suite/innodb_fts/t/innodb_fts_misc_1.test b/mysql-test/suite/innodb_fts/t/innodb_fts_misc_1.test
index 6ef0452f4c6..3c794ec6440 100644
--- a/mysql-test/suite/innodb_fts/t/innodb_fts_misc_1.test
+++ b/mysql-test/suite/innodb_fts/t/innodb_fts_misc_1.test
@@ -1,19 +1,13 @@
+--source include/have_innodb.inc
+
 #------------------------------------------------------------------------------
 # FTS with FK and update cascade
 #-------------------------------------------------------------------------------
---source include/have_innodb.inc
-
-if (`select plugin_auth_version <= "5.6.10" from information_schema.plugins where plugin_name='innodb'`)
-{
-  --skip Not fixed in InnoDB 5.6.10 or earlier
-}
-
---disable_warnings
-drop table if exists t2,t1;
---enable_warnings
-
 set names utf8;
 
+call mtr.add_suppression("\\[Warning\\] InnoDB: A new Doc ID must be supplied while updating FTS indexed columns.");
+call mtr.add_suppression("\\[Warning\\] InnoDB: FTS Doc ID must be larger than [0-9]+ for table `test`.`t1`");
+
 # Create FTS table
 CREATE TABLE t1 (
         id1 INT ,
@@ -63,14 +57,17 @@ INSERT INTO t2 (a2,b2) VALUES
 --error 1451
 DELETE FROM t1;
 
-SELECT id1 FROM t1 WHERE MATCH (a1,b1) AGAINST ('tutorial') ;
-SELECT id2 FROM t2 WHERE MATCH (a2,b2) AGAINST ('tutorial') ;
+ANALYZE TABLE t1; 
+ANALYZE TABLE t2;
 
-SELECT id1 FROM t1 WHERE MATCH (a1,b1) AGAINST ('tutorial (+mysql -VÐƷWİ)' IN BOOLEAN MODE) ;
-SELECT id2 FROM t2 WHERE MATCH (a2,b2) AGAINST ('tutorial (+mysql -VÐƷWİ)' IN BOOLEAN MODE) ;
+SELECT id1 FROM t1 WHERE MATCH (a1,b1) AGAINST ('tutorial') ORDER BY id1;
+SELECT id2 FROM t2 WHERE MATCH (a2,b2) AGAINST ('tutorial') ORDER BY id2;
 
-SELECT id1 FROM t1 WHERE MATCH (a1,b1) AGAINST ('tutorial' WITH QUERY EXPANSION) ;
-SELECT id2 FROM t2 WHERE MATCH (a2,b2) AGAINST ('tutorial' WITH QUERY EXPANSION) ;
+SELECT id1 FROM t1 WHERE MATCH (a1,b1) AGAINST ('tutorial (+mysql -VÐƷWİ)' IN BOOLEAN MODE) ORDER BY id1;
+SELECT id2 FROM t2 WHERE MATCH (a2,b2) AGAINST ('tutorial (+mysql -VÐƷWİ)' IN BOOLEAN MODE) ORDER BY id2;
+
+SELECT id1 FROM t1 WHERE MATCH (a1,b1) AGAINST ('tutorial' WITH QUERY EXPANSION) ORDER BY id1;
+SELECT id2 FROM t2 WHERE MATCH (a2,b2) AGAINST ('tutorial' WITH QUERY EXPANSION) ORDER BY id2;
 
 
 SELECT id1 FROM t1 WHERE MATCH (a1,b1) AGAINST ('"dbms database"@4' IN BOOLEAN MODE) ;
@@ -89,10 +86,10 @@ SELECT id1 FROM t1 WHERE MATCH (a1,b1) AGAINST ('tutorial (+mysql -VÐƷWİ)' IN
 SELECT id2 FROM t2 WHERE MATCH (a2,b2) AGAINST ('tutorial (+mysql -VÐƷWİ)' IN BOOLEAN MODE) ;
 
 # it shows updated record
-SELECT id1 FROM t1 WHERE MATCH (a1,b1) AGAINST ('+update +cascade' IN BOOLEAN MODE) ;
+SELECT id1 FROM t1 WHERE MATCH (a1,b1) AGAINST ('+update +cascade' IN BOOLEAN MODE) ORDER BY id1;
 # InnoDB:Error child table does not show the expected record
-SELECT id2 FROM t2 WHERE MATCH (a2,b2) AGAINST ('+update +cascade' IN BOOLEAN MODE) ;
-SELECT id2 FROM t2 WHERE a2 LIKE '%UPDATE CASCADE%';
+SELECT id2 FROM t2 WHERE MATCH (a2,b2) AGAINST ('+update +cascade' IN BOOLEAN MODE) ORDER BY id2;
+SELECT id2 FROM t2 WHERE a2 LIKE '%UPDATE CASCADE%' ORDER BY id2;
 
 DROP TABLE t2 , t1;
 
@@ -220,11 +217,8 @@ DROP TABLE t2 , t1;
 #------------------------------------------------------------------------------
 # FTS with FK+transactions and UPDATE casecade with transaction
 #-------------------------------------------------------------------------------
---disable_warnings
-DROP TABLE IF EXISTS t2,t1;
---enable_warnings
 
-SET NAMES utf8;
+call mtr.add_suppression("\\[ERROR\\] InnoDB: FTS Doc ID must be larger than 3 for table `test`.`t2`");
 
 # Create FTS table
 CREATE TABLE t1 (
@@ -277,12 +271,12 @@ INSERT INTO t2 (a2,b2) VALUES
 DELETE FROM t1;
 
 # records expected
-SELECT * FROM t1 WHERE MATCH (a1,b1) AGAINST ('tutorial') ;
-SELECT * FROM t2 WHERE MATCH (a2,b2) AGAINST ('tutorial') ;
-SELECT * FROM t1 WHERE MATCH (a1,b1) AGAINST ('tutorial (+mysql -VÐƷWİ)' IN BOOLEAN MODE) ;
-SELECT * FROM t2 WHERE MATCH (a2,b2) AGAINST ('tutorial (+mysql -VÐƷWİ)' IN BOOLEAN MODE) ;
-SELECT * FROM t1 WHERE MATCH (a1,b1) AGAINST ('tutorial' WITH QUERY EXPANSION) ;
-SELECT * FROM t2 WHERE MATCH (a2,b2) AGAINST ('tutorial' WITH QUERY EXPANSION) ;
+SELECT * FROM t1 WHERE MATCH (a1,b1) AGAINST ('tutorial') ORDER BY id1;
+SELECT * FROM t2 WHERE MATCH (a2,b2) AGAINST ('tutorial') ORDER BY id2;
+SELECT * FROM t1 WHERE MATCH (a1,b1) AGAINST ('tutorial (+mysql -VÐƷWİ)' IN BOOLEAN MODE) ORDER BY id1;
+SELECT * FROM t2 WHERE MATCH (a2,b2) AGAINST ('tutorial (+mysql -VÐƷWİ)' IN BOOLEAN MODE) ORDER BY id2;
+SELECT * FROM t1 WHERE MATCH (a1,b1) AGAINST ('tutorial' WITH QUERY EXPANSION) ORDER BY id1;
+SELECT * FROM t2 WHERE MATCH (a2,b2) AGAINST ('tutorial' WITH QUERY EXPANSION) ORDER BY id2;
 SELECT * FROM t1 WHERE MATCH (a1,b1) AGAINST ('"dbms database"@4' IN BOOLEAN MODE) ;
 SELECT * FROM t2 WHERE MATCH (a2,b2) AGAINST ('"dbms database"@4' IN BOOLEAN MODE) ;
 
@@ -296,8 +290,8 @@ SELECT * FROM t2 WHERE MATCH (a2,b2) AGAINST ('root' WITH QUERY EXPANSION) ;
 SELECT * FROM t1 WHERE MATCH (a1,b1) AGAINST ('"database comparison"@02' IN BOOLEAN MODE) ;
 SELECT * FROM t2 WHERE MATCH (a2,b2) AGAINST ('"database comparison"@02' IN BOOLEAN MODE) ;
 
-SELECT * FROM t1;
-SELECT * FROM t2;
+SELECT * FROM t1 ORDER BY id1;
+SELECT * FROM t2 ORDER BY id2;
 
 COMMIT;
 
@@ -312,9 +306,9 @@ SELECT * FROM t1 WHERE MATCH (a1,b1) AGAINST ('tutorial (+mysql -VÐƷWİ)' IN B
 SELECT * FROM t2 WHERE MATCH (a2,b2) AGAINST ('tutorial (+mysql -VÐƷWİ)' IN BOOLEAN MODE) ;
 
 # it shows updated record
-SELECT * FROM t1 WHERE MATCH (a1,b1) AGAINST ('+UPDATE +cascade' IN BOOLEAN MODE) ;
-SELECT * FROM t2 WHERE MATCH (a2,b2) AGAINST ('+UPDATE +cascade' IN BOOLEAN MODE) ;
-SELECT * FROM t2 WHERE a2 LIKE '%UPDATE CASCADE%';
+SELECT * FROM t1 WHERE MATCH (a1,b1) AGAINST ('+UPDATE +cascade' IN BOOLEAN MODE) ORDER BY id1;
+SELECT * FROM t2 WHERE MATCH (a2,b2) AGAINST ('+UPDATE +cascade' IN BOOLEAN MODE) ORDER BY id2;
+SELECT * FROM t2 WHERE a2 LIKE '%UPDATE CASCADE%' ORDER BY id2;
 
 DROP TABLE t2 , t1;
 
@@ -434,18 +428,9 @@ DROP TABLE t2 , t1;
 #------------------------------------------------------------------------------
 
 # Save innodb variables
---disable_query_log
-let $innodb_file_format_orig=`select @@innodb_file_format`;
 let $innodb_file_per_table_orig=`select @@innodb_file_per_table`;
-let $innodb_file_format_max_orig=`select @@innodb_file_format_max`;
-let $innodb_large_prefix_orig=`select @@innodb_large_prefix`;
---enable_query_log
 
-# Set Innodb file format as feature works for Barracuda file format
-set global innodb_file_format="Barracuda";
 set global innodb_file_per_table=1;
-set global innodb_large_prefix=1;
-set names utf8;
 
 # Create FTS table
 CREATE TABLE t1 (
@@ -464,6 +449,9 @@ INSERT INTO t1 (a,b) VALUES
 ALTER TABLE t1 ADD FULLTEXT INDEX idx (a,b);
 EVAL SHOW CREATE TABLE t1;
 
+# Check whether individual space id created for AUX tables
+SELECT count(*) FROM information_schema.innodb_sys_tables WHERE name LIKE "%FTS_%" AND space !=0;
+
 # Insert rows
 INSERT INTO t1 (a,b) VALUES
         ('1001 MySQL Tricks','1. Never run mysqld as root. 2. ...'),
@@ -476,20 +464,20 @@ ANALYZE TABLE t1;
 
 # Select word "tutorial" in the table
 SELECT * FROM t1 WHERE MATCH (a,b)
-        AGAINST ('Tutorial' IN NATURAL LANGUAGE MODE);
+        AGAINST ('Tutorial' IN NATURAL LANGUAGE MODE) ORDER BY id;
 
 # boolean mode
 select * from t1 where MATCH(a,b) AGAINST("+tutorial +VÐƷWİ" IN BOOLEAN MODE);
 --error ER_PARSE_ERROR
 select * from t1 where MATCH(a,b) AGAINST("+-VÐƷWİ" IN BOOLEAN MODE);
 select * from t1 where MATCH(a,b) AGAINST("+Mysql +(tricks never)" IN BOOLEAN MODE);
-select * from t1 where MATCH(a,b) AGAINST("+mysql -(tricks never)" IN BOOLEAN MODE);
-select *, MATCH(a,b) AGAINST("mysql stands" IN BOOLEAN MODE) as x from t1;
+select * from t1 where MATCH(a,b) AGAINST("+mysql -(tricks never)" IN BOOLEAN MODE) ORDER BY id;
+select *, MATCH(a,b) AGAINST("mysql stands" IN BOOLEAN MODE) as x from t1 ORDER BY id;
 select * from t1 where MATCH a,b AGAINST ("+database* +VÐƷW*" IN BOOLEAN MODE);
 select * from t1 where MATCH a,b AGAINST ('"security mysql"' IN BOOLEAN MODE);
 
 # query expansion
-select * from t1 where MATCH(a,b) AGAINST ("VÐƷWİ" WITH QUERY EXPANSION);
+select * from t1 where MATCH(a,b) AGAINST ("VÐƷWİ" WITH QUERY EXPANSION) ORDER BY id;
 
 # Drop index 
 ALTER TABLE t1 DROP INDEX idx;
@@ -505,20 +493,20 @@ ANALYZE TABLE t1;
 
 # Select word "tutorial" in the table
 SELECT * FROM t1 WHERE MATCH (a,b)
-        AGAINST ('Tutorial' IN NATURAL LANGUAGE MODE);
+        AGAINST ('Tutorial' IN NATURAL LANGUAGE MODE) ORDER BY id;
 
 # boolean mode
 select * from t1 where MATCH(a,b) AGAINST("+tutorial +VÐƷWİ" IN BOOLEAN MODE);
 select * from t1 where MATCH(a,b) AGAINST("+dbms" IN BOOLEAN MODE);
 select * from t1 where MATCH(a,b) AGAINST("+Mysql +(tricks never)" IN BOOLEAN MODE);
-select * from t1 where MATCH(a,b) AGAINST("+mysql -(tricks never)" IN BOOLEAN MODE);
-select *, MATCH(a,b) AGAINST("mysql VÐƷWİ" IN BOOLEAN MODE) as x from t1;
+select * from t1 where MATCH(a,b) AGAINST("+mysql -(tricks never)" IN BOOLEAN MODE) ORDER BY id;
+select *, MATCH(a,b) AGAINST("mysql VÐƷWİ" IN BOOLEAN MODE) as x from t1 ORDER BY id;
 # Innodb:Assert eval0eval.c line 148
 #select * from t1 where MATCH a,b AGAINST ("+database* +VÐƷWİ*" IN BOOLEAN MODE);
 select * from t1 where MATCH a,b AGAINST ('"security mysql"' IN BOOLEAN MODE);
 
 # query expansion
-select * from t1 where MATCH(a,b) AGAINST ("VÐƷWİ" WITH QUERY EXPANSION);
+select * from t1 where MATCH(a,b) AGAINST ("VÐƷWİ" WITH QUERY EXPANSION) ORDER BY id;
 
 
 # insert for proximity search 
@@ -549,12 +537,12 @@ SELECT * FROM t1
 # This give you all three documents
 SELECT * FROM t1
         WHERE MATCH (a,b)
-        AGAINST ('"proximity search"@3' IN BOOLEAN MODE);
+        AGAINST ('"proximity search"@3' IN BOOLEAN MODE) ORDER BY id;
 
 # Similar boundary testing for the words
 SELECT * FROM t1
         WHERE MATCH (a,b)
-        AGAINST ('"test proximity"@5' IN BOOLEAN MODE);
+        AGAINST ('"test proximity"@5' IN BOOLEAN MODE) ORDER BY id;
 
 # Test with more word The last document will return, please notice there
 # is no ordering requirement for proximity search.
@@ -579,7 +567,7 @@ select * from t1 where MATCH(a,b) AGAINST("+tutorial +dbms" IN BOOLEAN MODE);
 select * from t1 where MATCH(a,b) AGAINST("+VÐƷWİ" IN BOOLEAN MODE);
 
 SELECT * FROM t1 WHERE MATCH (a,b)
-        AGAINST ('Tutorial' IN NATURAL LANGUAGE MODE);
+        AGAINST ('Tutorial' IN NATURAL LANGUAGE MODE) ORDER BY id;
 
 DELETE FROM t1 WHERE MATCH (a,b) AGAINST ('Tutorial' IN NATURAL LANGUAGE MODE);
 DELETE FROM t1 WHERE MATCH (a,b) AGAINST ('"proximity search"@14' IN BOOLEAN MODE);
@@ -588,20 +576,14 @@ DELETE FROM t1 WHERE MATCH (a,b) AGAINST ('"proximity search"@14' IN BOOLEAN MOD
 SELECT * FROM t1 WHERE MATCH (a,b)
         AGAINST ('Tutorial' IN NATURAL LANGUAGE MODE);
 
-SELECT * FROM t1;
+SELECT * FROM t1 ORDER BY id;
 
 DROP TABLE t1;
---disable_query_log
-eval SET GLOBAL innodb_file_format=$innodb_file_format_orig;
 eval SET GLOBAL innodb_file_per_table=$innodb_file_per_table_orig;
-eval SET GLOBAL innodb_file_format_max=$innodb_file_format_max_orig;
-eval SET GLOBAL innodb_large_prefix=$innodb_large_prefix_orig;
---enable_query_log
 
 #------------------------------------------------------------------------------
 # FTS index with utf8 character testcase
 #------------------------------------------------------------------------------
-set names utf8;
 
 # Create FTS table
 EVAL CREATE TABLE t1 (
@@ -631,10 +613,10 @@ INSERT INTO t1 (a,b) VALUES
 CREATE FULLTEXT INDEX idx on t1 (a,b);
 
 # FTS Queries
-SELECT * FROM t1 WHERE MATCH(a,b) AGAINST ("вредит χωρὶς");
+SELECT * FROM t1 WHERE MATCH(a,b) AGAINST ("вредит χωρὶς") ORDER BY id;
 SELECT * FROM t1 WHERE MATCH(a,b) AGAINST ("оно" WITH QUERY EXPANSION);
 
-SELECT * FROM t1 WHERE MATCH(a,b) AGAINST("вред*" IN BOOLEAN MODE);
+SELECT * FROM t1 WHERE MATCH(a,b) AGAINST("вред*" IN BOOLEAN MODE) ORDER BY id;
 SELECT * FROM t1 WHERE MATCH(a,b) AGAINST("+γυαλιὰ +tutorial" IN BOOLEAN MODE);
 SELECT * FROM t1 WHERE MATCH(a,b) AGAINST("+tutorial +(Мога τίποτα)" IN BOOLEAN MODE);
 
@@ -682,14 +664,14 @@ DELETE FROM t1 WHERE MATCH(a,b) AGAINST("+Sævör +úlpan" IN BOOLEAN MODE);
 SELECT * FROM t1 WHERE MATCH(a,b) AGAINST ("あさきゆめみじ　ゑひもせず");
 SELECT * FROM t1 WHERE MATCH(a,b) AGAINST ("łódź osiem");
 SELECT * FROM t1 WHERE MATCH(a,b) AGAINST("вред*" IN BOOLEAN MODE);
-SELECT * FROM t1 WHERE MATCH(a,b) AGAINST("фальшив*" IN BOOLEAN MODE);
+SELECT * FROM t1 WHERE MATCH(a,b) AGAINST("фальшив*" IN BOOLEAN MODE) ORDER BY id;
 SELECT * FROM t1 WHERE MATCH(a,b) AGAINST("+Sævör +úlpan" IN BOOLEAN MODE);
 
 SELECT * FROM t1
         WHERE MATCH (a,b)
         AGAINST ('"łódź jeża"@2' IN BOOLEAN MODE);
 
-SELECT * FROM t1;
+SELECT * FROM t1 ORDER BY id;
 DROP TABLE t1;
 
 # This is to test the update operation on FTS indexed and non-indexed
diff --git a/mysql-test/suite/innodb_fts/t/innodb_fts_plugin.test b/mysql-test/suite/innodb_fts/t/innodb_fts_plugin.test
index e800faed0f5..9cc1afd4e60 100644
--- a/mysql-test/suite/innodb_fts/t/innodb_fts_plugin.test
+++ b/mysql-test/suite/innodb_fts/t/innodb_fts_plugin.test
@@ -1,45 +1,213 @@
---source include/have_simple_parser.inc
 --source include/have_innodb.inc
+--source include/have_simple_parser.inc
+# Restart is not supported in embedded
+--source include/not_embedded.inc
 
 # Install fts parser plugin
 INSTALL PLUGIN simple_parser SONAME 'mypluglib';
 
+-- echo # Test Part 1: Grammar Test
 # Create a myisam table and alter it to innodb table
 CREATE TABLE articles (
         id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,
         title VARCHAR(200),
-        body TEXT,
         FULLTEXT (title) WITH PARSER simple_parser
         ) ENGINE=MyISAM;
---error ER_INNODB_NO_FT_USES_PARSER
+
 ALTER TABLE articles ENGINE=InnoDB;
 
 DROP TABLE articles;
 
 # Create a table having a full text index with parser
---error ER_INNODB_NO_FT_USES_PARSER
 CREATE TABLE articles (
         id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,
         title VARCHAR(200),
         body TEXT,
+	comment TEXT,
         FULLTEXT (title) WITH PARSER simple_parser
         ) ENGINE=InnoDB;
 
+# Alter table to add a full text index with parser
+ALTER TABLE articles ADD FULLTEXT INDEX (body) WITH PARSER simple_parser;
+
+# Create a full text index with parser
+CREATE FULLTEXT INDEX ft_index ON articles(comment) WITH PARSER simple_parser;
+
+DROP TABLE articles;
+
+-- echo # Test Part 2: Create Index Test(CREATE TABLE WITH FULLTEXT INDEX)
 CREATE TABLE articles (
         id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,
         title VARCHAR(200),
         body TEXT,
-        FULLTEXT (title)
+	FULLTEXT (title, body) WITH PARSER simple_parser
         ) ENGINE=InnoDB;
 
-# Alter table to add a full text index with parser
---error ER_INNODB_NO_FT_USES_PARSER
-ALTER TABLE articles ADD FULLTEXT INDEX (body) WITH PARSER simple_parser;
+INSERT INTO articles (title, body) VALUES
+  ('MySQL Tutorial','DBMS stands for MySQL DataBase ...'),
+  ('How To Use MySQL Well','After you went through a ...'),
+  ('Optimizing MySQL','In this tutorial we will show ...'),
+  ('1001 MySQL Tricks','How to use full-text search engine'),
+  ('Go MySQL Tricks','How to use full text search engine');
 
-# Create a full text index with parser
---error ER_INNODB_NO_FT_USES_PARSER
-CREATE FULLTEXT INDEX ft_index ON articles(body) WITH PARSER simple_parser;
+# Simple term search
+SELECT * FROM articles WHERE
+	MATCH(title, body) AGAINST('mysql');
+
+# Test stopword and word len less than fts_min_token_size
+SELECT * FROM articles WHERE
+	MATCH(title, body) AGAINST('will go');
+
+-- echo # Test plugin parser tokenizer difference
+SELECT * FROM articles WHERE
+	MATCH(title, body) AGAINST('full-text');
+
+SELECT * FROM articles WHERE
+	MATCH(title, body) AGAINST('full text');
+
+# No result here, we get '"mysql' 'database"' by simple parser
+SELECT * FROM articles WHERE
+	MATCH(title, body) AGAINST('"mysql database"' IN BOOLEAN MODE);
+
+DROP TABLE articles;
+
+-- echo # Test Part 3: Row Merge Create Index Test(ALTER TABLE ADD FULLTEXT INDEX)
+CREATE TABLE articles (
+	id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,
+	title VARCHAR(200),
+	body TEXT
+	) ENGINE=InnoDB;
+
+INSERT INTO articles (title, body) VALUES
+  ('MySQL Tutorial','DBMS stands for MySQL DataBase ...'),
+  ('How To Use MySQL Well','After you went through a ...'),
+  ('Optimizing MySQL','In this tutorial we will show ...'),
+  ('1001 MySQL Tricks','How to use full-text search engine'),
+  ('Go MySQL Tricks','How to use full text search engine');
+
+# Create fulltext index
+ALTER TABLE articles ADD FULLTEXT INDEX (title, body) WITH PARSER simple_parser;
+
+# Simple term search
+SELECT * FROM articles WHERE
+	MATCH(title, body) AGAINST('mysql');
+
+# Test stopword and word len less than fts_min_token_size
+SELECT * FROM articles WHERE
+	MATCH(title, body) AGAINST('will go');
+
+-- echo # Test plugin parser tokenizer difference
+SELECT * FROM articles WHERE
+	MATCH(title, body) AGAINST('full-text');
+
+SELECT * FROM articles WHERE
+	MATCH(title, body) AGAINST('full text');
+
+# Test query expansion
+SELECT * FROM articles WHERE
+	MATCH(title, body) AGAINST('full-text' WITH QUERY EXPANSION);
+
+SELECT * FROM articles WHERE
+	MATCH(title, body) AGAINST('full text' WITH QUERY EXPANSION);
+
+# No result here, we get '"mysql' 'database"' by simple parser
+SELECT * FROM articles WHERE
+	MATCH(title, body) AGAINST('"mysql database"' IN BOOLEAN MODE);
+
+DROP TABLE articles;
+-- echo # Test Part 3 END
+
+-- echo # Test Part 4:crash on commit(before/after)
+CREATE TABLE articles (
+       id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,
+       title VARCHAR(200),
+       body TEXT,
+       FULLTEXT (title, body) WITH PARSER simple_parser
+) ENGINE=InnoDB;
+
+BEGIN;
+INSERT INTO articles (title, body) VALUES
+  ('MySQL Tutorial','DBMS stands for MySQL DataBase ...'),
+  ('How To Use MySQL Well','After you went through a ...'),
+  ('Optimizing MySQL','In this tutorial we will show ...'),
+  ('1001 MySQL Tricks','How to use full-text search engine'),
+  ('Go MySQL Tricks','How to use full text search engine');
+
+--source include/restart_mysqld.inc
+
+SELECT COUNT(*) FROM articles;
+# Simple term search - no records expected
+SELECT * FROM articles WHERE
+	MATCH(title, body) AGAINST('mysql');
+
+INSERT INTO articles (title, body) VALUES
+  ('MySQL Tutorial','DBMS stands for MySQL DataBase ...'),
+  ('How To Use MySQL Well','After you went through a ...'),
+  ('Optimizing MySQL','In this tutorial we will show ...'),
+  ('1001 MySQL Tricks','How to use full-text search engine'),
+  ('Go MySQL Tricks','How to use full text search engine');
+
+--source include/restart_mysqld.inc
+
+# Simple term search - 4 records expected
+SELECT * FROM articles WHERE
+         MATCH(title, body) AGAINST('Tricks');
+SELECT COUNT(*) FROM articles;
+DROP TABLE articles;
+
+-- echo # Test Part 5: Test Uninstall Plugin After Index is Built
+# Note: this test should be the last one because we uninstall plugin
+CREATE TABLE articles (
+	id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,
+	title VARCHAR(200),
+	body TEXT,
+	FULLTEXT (title, body) WITH PARSER simple_parser
+	) ENGINE=InnoDB;
+
+# Uninstall plugin
+UNINSTALL PLUGIN simple_parser;
+
+-- error ER_PLUGIN_IS_NOT_LOADED
+INSERT INTO articles (title, body) VALUES
+  ('MySQL Tutorial','DBMS stands for MySQL DataBase ...');
+
+# Reinstall plugin
+INSTALL PLUGIN simple_parser SONAME 'mypluglib';
+
+INSERT INTO articles (title, body) VALUES
+  ('MySQL Tutorial','DBMS stands for MySQL DataBase ...'),
+  ('How To Use MySQL Well','After you went through a ...'),
+  ('Optimizing MySQL','In this tutorial we will show ...'),
+  ('1001 MySQL Tricks','How to use full-text search engine'),
+  ('Go MySQL Tricks','How to use full text search engine');
+
+# Get warning here
+UNINSTALL PLUGIN simple_parser;
+
+# Simple term search
+SELECT * FROM articles WHERE
+	MATCH(title, body) AGAINST('mysql');
+
+# Test stopword and word len less than fts_min_token_size
+SELECT * FROM articles WHERE
+	MATCH(title, body) AGAINST('will go');
+
+-- echo # Test plugin parser tokenizer difference
+SELECT * FROM articles WHERE
+	MATCH(title, body) AGAINST('full-text');
+
+SELECT * FROM articles WHERE
+	MATCH(title, body) AGAINST('full text');
+
+-- error ER_FUNCTION_NOT_DEFINED
+CREATE TABLE articles2 (
+	id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,
+	title VARCHAR(200),
+	body TEXT,
+	FULLTEXT (title, body) WITH PARSER simple_parser
+	) ENGINE=InnoDB;
 
 DROP TABLE articles;
 # Uninstall plugin
+-- error ER_SP_DOES_NOT_EXIST
 UNINSTALL PLUGIN simple_parser;
diff --git a/mysql-test/suite/innodb_fts/t/innodb_fts_proximity.test b/mysql-test/suite/innodb_fts/t/innodb_fts_proximity.test
index b2ac81e2840..20eee3fac23 100644
--- a/mysql-test/suite/innodb_fts/t/innodb_fts_proximity.test
+++ b/mysql-test/suite/innodb_fts/t/innodb_fts_proximity.test
@@ -1,19 +1,14 @@
+--source include/have_innodb.inc
+
 # This is the DDL function tests for innodb FTS
 # Functional testing with FTS proximity search using '@'
 # and try search default words
---source include/have_innodb.inc
-
-if (`select plugin_auth_version <= "5.6.10" from information_schema.plugins where plugin_name='innodb'`)
-{
-  --skip Not fixed in InnoDB 5.6.10 or earlier
-}
 
 --disable_warnings
 drop table if exists t1;
 --enable_warnings
 
 --disable_query_log
-let $innodb_file_format_orig = `select @@innodb_file_format`;
 let $innodb_file_per_table_orig = `select @@innodb_file_per_table`;
 --enable_query_log
 
@@ -156,7 +151,7 @@ SELECT * FROM t1
 
 INSERT INTO t1 (a,b) VALUES   ('XYZ, long blob', repeat("a", 9000));
 
-INSERT INTO t1 (a,b) VALUES   (repeat("b", 9000), 'XYZ, long blob');
+INSERT IGNORE INTO t1 (a,b) VALUES   (repeat("b", 9000), 'XYZ, long blob');
 
 # 2 rows match
 SELECT count(*) FROM t1
@@ -165,7 +160,6 @@ SELECT count(*) FROM t1
 
 DROP TABLE t1;
 
-set global innodb_file_format="Barracuda";
 set global innodb_file_per_table=1;
 
 # Test fts with externally stored long column
@@ -263,5 +257,4 @@ SELECT count(*) FROM t1
 
 DROP TABLE t1;
 
-eval SET GLOBAL innodb_file_format=$innodb_file_format_orig;
 eval SET GLOBAL innodb_file_per_table=$innodb_file_per_table_orig;
diff --git a/mysql-test/suite/innodb_zip/include/have_innodb_zip.inc b/mysql-test/suite/innodb_zip/include/have_innodb_zip.inc
new file mode 100644
index 00000000000..6af83d51304
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/include/have_innodb_zip.inc
@@ -0,0 +1,4 @@
+if (`SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE LOWER(variable_name) = 'innodb_page_size' AND variable_value <= 16384`)
+{
+  --skip Test with InnoDB zip requires page size not greater than 16k.
+}
diff --git a/mysql-test/suite/innodb_zip/include/have_no_undo_tablespaces.inc b/mysql-test/suite/innodb_zip/include/have_no_undo_tablespaces.inc
new file mode 100644
index 00000000000..4c163e7c1b0
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/include/have_no_undo_tablespaces.inc
@@ -0,0 +1,4 @@
+if (`select count(*) = 0 from information_schema.global_variables where variable_name like 'innodb_undo_tablespaces' and variable_value = 0`)
+{
+  --skip Test requires innodb_undo_tablespaces=0
+}
diff --git a/mysql-test/suite/innodb_zip/include/innodb-wl6045.inc b/mysql-test/suite/innodb_zip/include/innodb-wl6045.inc
new file mode 100644
index 00000000000..26ce7e72983
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/include/innodb-wl6045.inc
@@ -0,0 +1,20 @@
+--echo ===> Testing  size=$size
+--disable_warnings
+--eval CREATE TABLE t1(id INT AUTO_INCREMENT PRIMARY KEY, msg VARCHAR(255)) ENGINE=INNODB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=$size
+--enable_warnings
+
+insert into t1 values(1,"I");
+insert into t1 values(2,"AM");
+insert into t1 values(3,"COMPRESSED");
+
+--source include/shutdown_mysqld.inc
+
+--exec $INNOCHECKSUM $MYSQLD_DATADIR/test/t1.ibd
+--exec $INNOCHECKSUM --write=crc32 $MYSQLD_DATADIR/test/t1.ibd
+--exec $INNOCHECKSUM --strict-check=crc32 $MYSQLD_DATADIR/test/t1.ibd
+--exec $INNOCHECKSUM --write=none $MYSQLD_DATADIR/test/t1.ibd
+--exec $INNOCHECKSUM --strict-check=none $MYSQLD_DATADIR/test/t1.ibd
+
+--source include/start_mysqld.inc
+select * from t1;
+drop table t1;
diff --git a/mysql-test/suite/innodb_zip/include/innodb_create_tab_indx.inc b/mysql-test/suite/innodb_zip/include/innodb_create_tab_indx.inc
new file mode 100644
index 00000000000..413a026265e
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/include/innodb_create_tab_indx.inc
@@ -0,0 +1,16 @@
+--echo # Create table & Index
+
+eval CREATE TABLE tab5(col_1 TINYBLOB, col_2 TINYTEXT,col_3 BLOB,
+col_4 TEXT,col_5 MEDIUMBLOB,col_6 MEDIUMTEXT,
+col_7 LONGBLOB,col_8 LONGTEXT,col_9 VARCHAR(255))
+ENGINE=INNODB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=$block_size;
+
+
+let $i = 9;
+while ($i) {
+
+eval CREATE INDEX  idx$i ON tab5(col_$i(10));
+dec $i;
+}
+
+
diff --git a/mysql-test/suite/innodb_zip/include/innodb_dml_ops.inc b/mysql-test/suite/innodb_zip/include/innodb_dml_ops.inc
new file mode 100644
index 00000000000..4908dfb6bee
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/include/innodb_dml_ops.inc
@@ -0,0 +1,82 @@
+delimiter |;
+create procedure populate_t1()
+begin
+	declare i int default 1;
+	while (i <= 200) do
+		insert into t1 values (i, 'a', 'b');
+		set i = i + 1;
+	end while;
+end|
+create procedure populate_t1_small()
+begin
+	declare i int default 1;
+	while (i <= 20) do
+		insert into t1 values (i, 'c', 'd');
+		set i = i + 1;
+	end while;
+end|
+create procedure populate_t1_small2()
+begin
+	declare i int default 30;
+	while (i <= 50) do
+		insert into t1 values (i, 'e', 'f');
+		set i = i + 1;
+	end while;
+end|
+delimiter ;|
+#
+begin;
+select count(*) from t1;
+call populate_t1();
+select count(*) from t1;
+select * from t1 limit 10;
+rollback;
+select count(*) from t1;
+#
+begin;
+call populate_t1();
+select count(*) from t1;
+commit;
+select count(*) from t1;
+#
+truncate table t1;
+select count(*) from t1;
+#
+call populate_t1_small();
+select count(*) from t1;
+rollback;
+select count(*) from t1;
+truncate table t1;
+#
+call populate_t1();
+select count(*) from t1;
+delete from t1 where keyc <= 60;
+select count(*) from t1;
+call populate_t1_small();
+select count(*) from t1;
+select * from t1 limit 10;
+begin;
+call populate_t1_small2();
+select count(*) from t1;
+select * from t1 where keyc > 30 limit 10;
+rollback;
+select count(*) from t1;
+select * from t1 where keyc > 30 limit 10;
+#
+update t1 set keyc = keyc + 2000;
+select * from t1 limit 10;
+rollback;
+begin;
+update t1 set keyc = keyc + 2000;
+select * from t1 limit 10;
+rollback;
+select * from t1 limit 10;
+commit;
+select * from t1 limit 10;
+#
+insert into t2 select * from t1 where keyc < 2101;
+select count(*) from t2;
+#
+drop procedure populate_t1;
+drop procedure populate_t1_small;
+drop procedure populate_t1_small2;
diff --git a/mysql-test/suite/innodb_zip/include/innodb_fetch_records.inc b/mysql-test/suite/innodb_zip/include/innodb_fetch_records.inc
new file mode 100644
index 00000000000..5e55293c18c
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/include/innodb_fetch_records.inc
@@ -0,0 +1,7 @@
+--echo ===============
+--echo Fetch Records
+--echo ===============
+
+SELECT col_7,col_8,col_9 FROM tab5
+WHERE col_2='bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
+LIMIT 1;
diff --git a/mysql-test/suite/innodb_zip/include/innodb_load_data.inc b/mysql-test/suite/innodb_zip/include/innodb_load_data.inc
new file mode 100644
index 00000000000..1bcb30131ac
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/include/innodb_load_data.inc
@@ -0,0 +1,19 @@
+--echo # Load the data
+
+SET @col_1 = repeat('a', 100);
+SET @col_2 = repeat('b', 100);
+SET @col_3 = repeat('c', 100);
+SET @col_4 = repeat('d', 100);
+SET @col_5 = repeat('e', 100);
+SET @col_6 = repeat('f', 100);
+SET @col_7 = repeat('g', 100);
+SET @col_8 = repeat('h', 100);
+SET @col_9 = repeat('i', 100);
+
+while ($i) {
+
+eval INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+dec $i;
+}
+commit;
diff --git a/mysql-test/suite/innodb_zip/include/innodb_stats_comp_index.inc b/mysql-test/suite/innodb_zip/include/innodb_stats_comp_index.inc
new file mode 100644
index 00000000000..d2fe05e5d13
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/include/innodb_stats_comp_index.inc
@@ -0,0 +1,26 @@
+--echo # Check the stats of the table
+--echo # Check the size of the ibd file
+
+-- echo # testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+
+perl;
+ my $dir = $ENV{'MYSQLD_DATADIR'}."test";
+ my $size;
+ opendir(DIR, $dir) or die $!;
+ while (my $file = readdir(DIR))
+ {
+
+   next unless ($file =~ m/\.ibd$/);
+   $size = -s "$dir/$file";
+   print "The size of the tab5.ibd file: $size\n";
+ }
+ close(DIR);
+ exit(0)
+EOF
diff --git a/mysql-test/suite/innodb_zip/include/innodb_stats_restart.inc b/mysql-test/suite/innodb_zip/include/innodb_stats_restart.inc
new file mode 100644
index 00000000000..13952459847
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/include/innodb_stats_restart.inc
@@ -0,0 +1,12 @@
+--echo ===============
+--echo After Restart Chekc the stats of the table
+--echo ===============
+
+-- echo # testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE 
+compress_ops_ok BETWEEN @comp_val AND 1000
+AND uncompress_ops BETWEEN @uncomp_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
diff --git a/mysql-test/suite/innodb_zip/include/innodb_temp_table_dml.inc b/mysql-test/suite/innodb_zip/include/innodb_temp_table_dml.inc
new file mode 100644
index 00000000000..42e0908f810
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/include/innodb_temp_table_dml.inc
@@ -0,0 +1,40 @@
+# insert test
+insert into t1 values (100, 1.1, 'pune');
+insert into t1 values (99, 1.2, 'mumbai');
+insert into t1 values (98, 1.3, 'jaipur');
+insert into t1 values (97, 1.4, 'delhi');
+insert into t1 values (96, 1.5, 'ahmedabad');
+select * from t1;
+select * from t1 where i = 98;
+select * from t1 where i < 100;
+#
+# add index using alter table table
+explain select * from t1 where f > 1.29999;
+alter table t1 add index sec_index(f);
+explain select * from t1 where f > 1.29999;
+select * from t1 where f > 1.29999;
+#
+explain select * from t1 where i = 100;
+alter table t1 add unique index pri_index(i);
+explain select * from t1 where i = 100;
+select * from t1 where i = 100;
+#
+# delete test
+delete from t1 where i < 97;
+select * from t1;
+insert into t1 values (96, 1.5, 'kolkata');
+select * from t1;
+#
+# update test
+update t1 set f = 1.44 where c = 'delhi';
+select * from t1;
+#
+# truncate table
+truncate table t1;
+insert into t1 values (100, 1.1, 'pune');
+insert into t1 values (99, 1.2, 'mumbai');
+insert into t1 values (98, 1.3, 'jaipur');
+insert into t1 values (97, 1.4, 'delhi');
+insert into t1 values (96, 1.5, 'ahmedabad');
+select * from t1;
+
diff --git a/mysql-test/suite/innodb_zip/include/innodb_wl6501_crash_stripped.inc b/mysql-test/suite/innodb_zip/include/innodb_wl6501_crash_stripped.inc
new file mode 100644
index 00000000000..fcefd0cdf7e
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/include/innodb_wl6501_crash_stripped.inc
@@ -0,0 +1,144 @@
+#
+# WL#6501: make truncate table atomic
+#
+
+--source include/have_innodb.inc
+--source include/have_debug.inc
+
+# Valgrind would complain about memory leaks when we crash on purpose.
+--source include/not_valgrind.inc
+# Embedded server does not support crashing
+--source include/not_embedded.inc
+# Avoid CrashReporter popup on Mac
+--source include/not_crashrep.inc
+
+# suppress expected warnings.
+call mtr.add_suppression("The file '.*' already exists though the corresponding table did not exist in the InnoDB data dictionary");
+call mtr.add_suppression("Cannot create file '.*'");
+call mtr.add_suppression("InnoDB: Error number 17 means 'File exists'");
+
+################################################################################
+#
+# Will test following scenarios:
+# 1. Hit crash point on completing drop of all indexes before creation of index
+#    is commenced.
+# 2. Hit crash point after data is updated to system-table and in-memory dict.
+# 3. Post truncate recovery, abruptly shutdown the server.
+#    On restart ensure table state is maintained.
+#
+################################################################################
+
+#-----------------------------------------------------------------------------
+#
+# create test-bed
+#
+
+let $WL6501_TMP_DIR = `select @@tmpdir`;
+let $WL6501_DATA_DIR = `select @@datadir`;
+let SEARCH_FILE = $MYSQLTEST_VARDIR/log/my_restart.err;
+
+#-----------------------------------------------------------------------------
+#
+# 1. Hit crash point on completing drop of all indexes before creation of index
+#    is commenced.
+#
+--echo "1. Hit crash point on completing drop of all indexes before creation"
+--echo "   of index is commenced."
+eval set global innodb_file_per_table = $wl6501_file_per_table;
+eval set global innodb_file_format = $wl6501_file_format;
+set innodb_strict_mode=off;
+--disable_warnings
+eval create $wl6501_temp table t (
+	i int, f float, c char,
+	primary key pk(i), unique findex(f), index ck(c))
+	engine = innodb row_format = $wl6501_row_fmt
+	key_block_size = $wl6501_kbs;
+--enable_warnings
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+check table t;
+#
+set session debug = "+d,ib_trunc_crash_drop_reinit_done_create_to_start";
+--source include/expect_crash.inc
+--error 2013
+truncate table t;
+#
+--source include/start_mysqld.inc
+
+check table t;
+select * from t;
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+select * from t where f < 2.5;
+drop table t;
+
+#-----------------------------------------------------------------------------
+#
+# 2. Hit crash point after data is updated to system-table and in-memory dict.
+#
+--echo "2. Hit crash point after data is updated to system-table and"
+--echo "   in-memory dict."
+eval set global innodb_file_per_table = $wl6501_file_per_table;
+eval set global innodb_file_format = $wl6501_file_format;
+set innodb_strict_mode=off;
+--disable_warnings
+eval create $wl6501_temp table t (
+	i int, f float, c char,
+	primary key pk(i), unique findex(f), index ck(c))
+	engine = innodb row_format = $wl6501_row_fmt
+	key_block_size = $wl6501_kbs;
+--enable_warnings
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+check table t;
+#
+set session debug = "+d,ib_trunc_crash_on_updating_dict_sys_info";
+--source include/expect_crash.inc
+--error 2013
+truncate table t;
+#
+--source include/start_mysqld.inc
+check table t;
+select * from t;
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+select * from t where f < 2.5;
+drop table t;
+
+#-----------------------------------------------------------------------------
+#
+# 3. Post truncate recovery, abruptly shutdown the server.
+#    On restart ensure table state is maintained.
+#
+--echo "3. Post truncate recovery, abruptly shutdown the server."
+--echo "   On restart ensure table state is maintained."
+eval set global innodb_file_per_table = $wl6501_file_per_table;
+eval set global innodb_file_format = $wl6501_file_format;
+set innodb_strict_mode=off;
+--disable_warnings
+eval create $wl6501_temp table t (
+	i int, f float, c char,
+	primary key pk(i), unique findex(f), index ck(c))
+	engine = innodb row_format = $wl6501_row_fmt
+	key_block_size = $wl6501_kbs;
+--enable_warnings
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+check table t;
+#
+set session debug = "+d,ib_trunc_crash_after_redo_log_write_complete";
+--source include/expect_crash.inc
+--error 2013
+truncate table t;
+#
+--source include/start_mysqld.inc
+check table t;
+select * from t;
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+#
+--source include/kill_and_restart_mysqld.inc
+#
+check table t;
+select * from t;
+select * from t where f < 2.5;
+drop table t;
diff --git a/mysql-test/suite/innodb_zip/include/innodb_wl6501_error.inc b/mysql-test/suite/innodb_zip/include/innodb_wl6501_error.inc
new file mode 100644
index 00000000000..0939d452dae
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/include/innodb_wl6501_error.inc
@@ -0,0 +1,234 @@
+#
+# WL#6501: make truncate table atomic
+#
+
+--source include/have_innodb.inc
+--source include/have_debug.inc
+
+--disable_query_log
+# suppress expected warnings
+call mtr.add_suppression("Unable to truncate FTS index for table");
+call mtr.add_suppression("Unable to assign a new identifier to table "
+			 "`.*`\.`.*` after truncating it");
+call mtr.add_suppression("Flagged corruption of .* in table "
+			 "`.*`\.`.*` in TRUNCATE TABLE");
+call mtr.add_suppression("Parent table of FTS auxiliary table "
+			 ".*\/.* not found");
+--enable_query_log
+################################################################################
+#
+# Will test following scenarios:
+# 1. Error in assigning undo logs for truncate action.
+# 2. Error while preparing for truncate.
+# 3. Error while dropping/creating indexes.
+# 4. Error while completing truncate of table involving FTS.
+# 5. Error while updating sys-tables.
+#
+################################################################################
+
+#-----------------------------------------------------------------------------
+#
+# create test-bed
+#
+let $per_table = `select @@innodb_file_per_table`;
+let $format = `select @@innodb_file_format`;
+
+eval set global innodb_file_per_table = on;
+let $WL6501_TMP_DIR = `select @@tmpdir`;
+let $WL6501_DATA_DIR = `select @@datadir`;
+set innodb_strict_mode=off;
+
+#-----------------------------------------------------------------------------
+#
+# 1. Error in assigning undo logs for truncate action.
+#
+--echo "1. Error in assigning undo logs for truncate action."
+eval set global innodb_file_per_table = $wl6501_file_per_table;
+eval set global innodb_file_format = $wl6501_file_format;
+--disable_warnings
+eval create $wl6501_temp table t (
+        i int, f float, c char,
+        primary key pk(i), unique findex(f), index ck(c))
+        engine = innodb row_format = $wl6501_row_fmt
+        key_block_size = $wl6501_kbs;
+--enable_warnings
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+#check table t;
+#
+set session debug = "+d,ib_err_trunc_assigning_undo_log";
+--error ER_GET_ERRNO
+truncate table t;
+set session debug = "-d,ib_err_trunc_assigning_undo_log";
+#
+#check table t;
+select * from t;
+drop table t;
+
+#-----------------------------------------------------------------------------
+#
+# 2. Error while preparing for truncate.
+#
+--echo "2. Error while preparing for truncate."
+eval set global innodb_file_per_table = $wl6501_file_per_table;
+eval set global innodb_file_format = $wl6501_file_format;
+--disable_warnings
+eval create $wl6501_temp table t (
+        i int, f float, c char,
+        primary key pk(i), unique findex(f), index ck(c))
+        engine = innodb row_format = $wl6501_row_fmt
+        key_block_size = $wl6501_kbs;
+--enable_warnings
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+#check table t;
+#
+set session debug = "+d,ib_err_trunc_preparing_for_truncate";
+--error ER_GET_ERRNO
+truncate table t;
+set session debug = "-d,ib_err_trunc_preparing_for_truncate";
+#
+#check table t;
+select * from t;
+drop table t;
+
+#-----------------------------------------------------------------------------
+#
+# 3. Error while dropping/creating indexes
+#
+--echo "3. Error while dropping/creating indexes"
+eval set global innodb_file_per_table = $wl6501_file_per_table;
+eval set global innodb_file_format = $wl6501_file_format;
+--disable_warnings
+eval create $wl6501_temp table t (
+        i int, f float, c char,
+        primary key pk(i), unique findex(f), index ck(c))
+        engine = innodb row_format = $wl6501_row_fmt
+        key_block_size = $wl6501_kbs;
+--enable_warnings
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+#check table t;
+#
+set session debug = "+d,ib_err_trunc_drop_index";
+--error ER_GET_ERRNO
+truncate table t;
+set session debug = "-d,ib_err_trunc_drop_index";
+#
+#check table t;
+--error ER_TABLE_CORRUPT, 1030
+select * from t;
+drop table t;
+#
+#
+eval set global innodb_file_per_table = $wl6501_file_per_table;
+eval set global innodb_file_format = $wl6501_file_format;
+--disable_warnings
+eval create $wl6501_temp table t (
+        i int, f float, c char,
+        primary key pk(i), unique findex(f), index ck(c))
+        engine = innodb row_format = $wl6501_row_fmt
+        key_block_size = $wl6501_kbs;
+--enable_warnings
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+#check table t;
+#
+set session debug = "+d,ib_err_trunc_create_index";
+--error ER_GET_ERRNO
+truncate table t;
+set session debug = "-d,ib_err_trunc_create_index";
+#
+#check table t;
+--error ER_TABLE_CORRUPT, 1030
+select * from t;
+drop table t;
+#
+#
+eval set global innodb_file_per_table = $wl6501_file_per_table;
+eval set global innodb_file_format = $wl6501_file_format;
+--disable_warnings
+eval create temporary table t (
+        i int, f float, c char,
+        primary key pk(i), unique findex(f), index ck(c))
+        engine = innodb row_format = $wl6501_row_fmt
+        key_block_size = $wl6501_kbs;
+--enable_warnings
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+#check table t;
+#
+set session debug = "+d,ib_err_trunc_temp_recreate_index";
+--error ER_GET_ERRNO
+truncate table t;
+set session debug = "-d,ib_err_trunc_temp_recreate_index";
+#
+#check table t;
+--error ER_TABLE_CORRUPT, 1030
+select * from t;
+drop table t;
+
+#-----------------------------------------------------------------------------
+#
+# 4. Error while completing truncate of table involving FTS.
+#
+--echo "4. Error while completing truncate of table involving FTS."
+eval set global innodb_file_per_table = $wl6501_file_per_table;
+eval set global innodb_file_format = $wl6501_file_format;
+--disable_warnings
+eval create $wl6501_temp table t (i int, f float, c char(100),
+	primary key pk(i), index fk(f), fulltext index ck(c))
+	engine=innodb row_format=$wl6501_row_fmt
+	key_block_size=$wl6501_kbs;
+--enable_warnings
+insert into t values (1, 1.1, 'mysql is now oracle company'),
+		     (2, 2.2, 'innodb is part of mysql'),
+		     (3, 3.3, 'innodb is default storage engine of mysql');
+select * from t;
+#check table t;
+#
+set session debug = "+d,ib_err_trunc_during_fts_trunc";
+--error ER_GET_ERRNO
+truncate table t;
+set session debug = "-d,ib_err_trunc_during_fts_trunc";
+#
+#check table t;
+--error ER_TABLE_CORRUPT, 1030
+select * from t;
+drop table t;
+
+#-----------------------------------------------------------------------------
+#
+# 5. Error while updating sys-tables.
+#
+--echo "5. Error while updating sys-tables."
+eval set global innodb_file_per_table = $wl6501_file_per_table;
+eval set global innodb_file_format = $wl6501_file_format;
+--disable_warnings
+eval create $wl6501_temp table t (i int, f float, c char(100),
+	primary key pk(i), index fk(f), fulltext index ck(c))
+	engine=innodb row_format=$wl6501_row_fmt
+	key_block_size=$wl6501_kbs;
+--enable_warnings
+insert into t values (1, 1.1, 'mysql is now oracle company'),
+		     (2, 2.2, 'innodb is part of mysql'),
+		     (3, 3.3, 'innodb is default storage engine of mysql');
+select * from t order by i;
+#check table t;
+#
+set session debug = "+d,ib_err_trunc_during_sys_table_update";
+--error ER_GET_ERRNO
+truncate table t;
+set session debug = "-d,ib_err_trunc_during_sys_table_update";
+#
+#check table t;
+--error ER_TABLE_CORRUPT, 1030
+select * from t order by i;
+drop table t;
+
+#-----------------------------------------------------------------------------
+#
+# remove test-bed
+#
+eval set global innodb_file_format = $format;
+eval set global innodb_file_per_table = $per_table;
diff --git a/mysql-test/suite/innodb_zip/include/innodb_wl6501_scale.inc b/mysql-test/suite/innodb_zip/include/innodb_wl6501_scale.inc
new file mode 100644
index 00000000000..67569d3dae9
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/include/innodb_wl6501_scale.inc
@@ -0,0 +1,113 @@
+#
+# load tables with some significant amount of data and then truncate it.
+#
+
+#-----------------------------------------------------------------------------
+#
+# create test-bed
+#
+let $per_table = `select @@innodb_file_per_table`;
+let $format = `select @@innodb_file_format`;
+
+let $WL6501_TMP_DIR = `select @@tmpdir`;
+let $WL6501_DATA_DIR = `select @@datadir`;
+set innodb_strict_mode=OFF;
+
+#-----------------------------------------------------------------------------
+#
+# create procedure to load data
+#
+delimiter |;
+create procedure populate()
+begin
+	declare i int default 1;
+	while (i <= 5000) do
+		insert into t1 values (i, 'a', 'b');
+		insert into t2 values (i, 'a', 'b');
+		insert into t3 values (i, 'a', 'b');
+		set i = i + 1;
+	end while;
+end|
+create procedure populate_small()
+begin
+	declare i int default 10001;
+	while (i <= 12000) do
+		insert into t1 values (i, 'c', 'd');
+		insert into t2 values (i, 'a', 'b');
+		insert into t3 values (i, 'a', 'b');
+		set i = i + 1;
+	end while;
+end|
+delimiter ;|
+
+#-----------------------------------------------------------------------------
+#
+# create and load the tables.
+#
+eval set global innodb_file_per_table = $wl6501_file_per_table;
+eval set global innodb_file_format = $wl6501_file_format;
+--replace_regex /[0-9]+/NUMBER/
+eval create table t1
+	(i int, c1 char(100), c2 char(100),
+	 index c1_idx(c1))
+	engine=innodb row_format=$wl6501_row_fmt
+	key_block_size=$wl6501_kbs;
+eval create table t2
+	(i int, c1 char(100), c2 char(100),
+	 index c1_idx(c1))
+	engine=innodb row_format=$wl6501_row_fmt
+	key_block_size=$wl6501_kbs;
+eval create temporary table t3
+	(i int, c1 char(100), c2 char(100),
+	 index c1_idx(c1))
+	engine=innodb row_format=$wl6501_row_fmt
+	key_block_size=$wl6501_kbs;
+#
+select count(*) from t1;
+select count(*) from t2;
+select count(*) from t3;
+begin;
+call populate();
+commit;
+select count(*) from t1;
+select count(*) from t2;
+select count(*) from t3;
+#
+truncate table t1;
+select count(*) from t1;
+select count(*) from t2;
+select count(*) from t3;
+#
+call populate_small();
+select count(*) from t1;
+select count(*) from t2;
+select count(*) from t3;
+#
+truncate table t2;
+truncate table t3;
+select count(*) from t1;
+select count(*) from t2;
+select count(*) from t3;
+#
+call populate_small();
+select count(*) from t1;
+select count(*) from t2;
+select count(*) from t3;
+#
+drop table t1;
+drop table t2;
+drop table t3;
+
+#-----------------------------------------------------------------------------
+#
+# drop the procedure
+#
+drop procedure populate;
+drop procedure populate_small;
+
+#-----------------------------------------------------------------------------
+#
+# remove test-bed
+#
+eval set global innodb_file_format = $format;
+eval set global innodb_file_per_table = $per_table;
diff --git a/mysql-test/suite/innodb_zip/r/16k.result b/mysql-test/suite/innodb_zip/r/16k.result
new file mode 100644
index 00000000000..fbdd44f328f
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/r/16k.result
@@ -0,0 +1,721 @@
+SET default_storage_engine=InnoDB;
+# Test 1) Show the page size from Information Schema
+SELECT variable_value FROM information_schema.global_status
+WHERE LOWER(variable_name) = 'innodb_page_size';
+variable_value
+16384
+# Test 2) The number of buffer pool pages is dependent upon the page size.
+# Test 3) Query some information_shema tables that are dependent upon
+#         the page size.
+SELECT	t.name table_name, t.n_cols, t.flag table_flags,
+i.name index_name, i.page_no root_page, i.type,
+i.n_fields, i.merge_threshold
+FROM	INFORMATION_SCHEMA.INNODB_SYS_TABLES  t,
+INFORMATION_SCHEMA.INNODB_SYS_INDEXES i
+WHERE	t.table_id = i.table_id
+AND	t.name LIKE 'mysql%'
+	ORDER BY t.name, i.index_id;
+table_name	n_cols	table_flags	index_name	root_page	type	n_fields	merge_threshold
+mysql/innodb_index_stats	11	33	PRIMARY	3	3	4	50
+mysql/innodb_table_stats	9	33	PRIMARY	3	3	2	50
+CREATE TABLE t1 (a INT KEY, b TEXT) ROW_FORMAT=REDUNDANT ENGINE=innodb;
+CREATE TABLE t2 (a INT KEY, b TEXT) ROW_FORMAT=COMPACT ENGINE=innodb;
+CREATE TABLE t3 (a INT KEY, b TEXT) ROW_FORMAT=COMPRESSED ENGINE=innodb;
+CREATE TABLE t4 (a INT KEY, b TEXT) ROW_FORMAT=DYNAMIC ENGINE=innodb;
+SELECT	t.name table_name, t.n_cols, t.flag table_flags,
+i.name index_name, i.page_no root_page, i.type,
+i.n_fields, i.merge_threshold
+FROM	INFORMATION_SCHEMA.INNODB_SYS_TABLES  t,
+INFORMATION_SCHEMA.INNODB_SYS_INDEXES i
+WHERE	t.table_id = i.table_id
+AND	t.name LIKE 'test%'
+	ORDER BY t.name, i.name;
+table_name	n_cols	table_flags	index_name	root_page	type	n_fields	merge_threshold
+test/t1	5	0	PRIMARY	3	3	1	50
+test/t2	5	1	PRIMARY	3	3	1	50
+test/t3	5	41	PRIMARY	3	3	1	50
+test/t4	5	33	PRIMARY	3	3	1	50
+=== information_schema.innodb_sys_tablespaces and innodb_sys_datafiles ===
+Space_Name	Space_Type	Page_Size	Zip_Size	Formats_Permitted	Path
+test/t1	Single	DEFAULT	0	Compact or Redundant	MYSQLD_DATADIR/test/t1.ibd
+test/t2	Single	DEFAULT	0	Compact or Redundant	MYSQLD_DATADIR/test/t2.ibd
+test/t3	Single	DEFAULT	8192	Compressed	MYSQLD_DATADIR/test/t3.ibd
+test/t4	Single	DEFAULT	0	Dynamic	MYSQLD_DATADIR/test/t4.ibd
+=== information_schema.files ===
+Space_Name	File_Type	Engine	Status	Tablespace_Name	Path
+test/t1	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQLD_DATADIR/test/t1.ibd
+test/t2	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQLD_DATADIR/test/t2.ibd
+test/t3	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQLD_DATADIR/test/t3.ibd
+test/t4	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQLD_DATADIR/test/t4.ibd
+DROP TABLE t1, t2, t3, t4;
+# Test 4) The maximum row size is dependent upon the page size.
+#         Redundant: 8123, Compact: 8126.
+#         Compressed: 8126, Dynamic: 8126.
+#         Each row format has its own amount of overhead that
+#         varies depending on number of fields and other overhead.
+SET SESSION innodb_strict_mode = ON;
+CREATE TABLE t1 (
+c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200),
+c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200),
+c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200),
+c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(200),
+c21 char(200), c22 char(200), c23 char(200), c24 char(200), c25 char(200),
+c26 char(200), c27 char(200), c28 char(200), c29 char(200), c30 char(200),
+c31 char(200), c32 char(200), c33 char(200), c34 char(200), c35 char(200),
+c36 char(200), c37 char(200), c38 char(200), c39 char(200), c40 char(211)
+) ROW_FORMAT=redundant;
+DROP TABLE t1;
+CREATE TABLE t1 (
+c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200),
+c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200),
+c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200),
+c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(200),
+c21 char(200), c22 char(200), c23 char(200), c24 char(200), c25 char(200),
+c26 char(200), c27 char(200), c28 char(200), c29 char(200), c30 char(200),
+c31 char(200), c32 char(200), c33 char(200), c34 char(200), c35 char(200),
+c36 char(200), c37 char(200), c38 char(200), c39 char(200), c40 char(212)
+) ROW_FORMAT=redundant;
+ERROR 42000: Row size too large (> 8123). Changing some columns to TEXT or BLOB or using ROW_FORMAT=DYNAMIC or ROW_FORMAT=COMPRESSED may help. In current row format, BLOB prefix of 768 bytes is stored inline.
+CREATE TABLE t1 (
+c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200),
+c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200),
+c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200),
+c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(200),
+c21 char(200), c22 char(200), c23 char(200), c24 char(200), c25 char(200),
+c26 char(200), c27 char(200), c28 char(200), c29 char(200), c30 char(200),
+c31 char(200), c32 char(200), c33 char(200), c34 char(200), c35 char(200),
+c36 char(200), c37 char(200), c38 char(200), c39 char(250), c40 char(246)
+) ROW_FORMAT=compact;
+DROP TABLE t1;
+CREATE TABLE t1 (
+c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200),
+c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200),
+c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200),
+c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(200),
+c21 char(200), c22 char(200), c23 char(200), c24 char(200), c25 char(200),
+c26 char(200), c27 char(200), c28 char(200), c29 char(200), c30 char(200),
+c31 char(200), c32 char(200), c33 char(200), c34 char(200), c35 char(200),
+c36 char(200), c37 char(200), c38 char(200), c39 char(250), c40 char(247)
+) ROW_FORMAT=compact;
+ERROR 42000: Row size too large (> 8126). Changing some columns to TEXT or BLOB or using ROW_FORMAT=DYNAMIC or ROW_FORMAT=COMPRESSED may help. In current row format, BLOB prefix of 768 bytes is stored inline.
+CREATE TABLE t1 (
+c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200),
+c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200),
+c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200),
+c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(200),
+c21 char(200), c22 char(200), c23 char(200), c24 char(200), c25 char(200),
+c26 char(200), c27 char(200), c28 char(200), c29 char(200), c30 char(200),
+c31 char(200), c32 char(200), c33 char(200), c34 char(200), c35 char(200),
+c36 char(200), c37 char(200), c38 char(200), c39 char(200), c40 char(157)
+) ROW_FORMAT=compressed;
+DROP TABLE t1;
+CREATE TABLE t1 (
+c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200),
+c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200),
+c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200),
+c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(200),
+c21 char(200), c22 char(200), c23 char(200), c24 char(200), c25 char(200),
+c26 char(200), c27 char(200), c28 char(200), c29 char(200), c30 char(200),
+c31 char(200), c32 char(200), c33 char(200), c34 char(200), c35 char(200),
+c36 char(200), c37 char(200), c38 char(200), c39 char(200), c40 char(160)
+) ROW_FORMAT=compressed;
+ERROR 42000: Row size too large (> 8126). Changing some columns to TEXT or BLOB may help. In current row format, BLOB prefix of 0 bytes is stored inline.
+CREATE TABLE t1 (
+c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200),
+c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200),
+c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200),
+c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(200),
+c21 char(200), c22 char(200), c23 char(200), c24 char(200), c25 char(200),
+c26 char(200), c27 char(200), c28 char(200), c29 char(200), c30 char(200),
+c31 char(200), c32 char(200), c33 char(200), c34 char(200), c35 char(200),
+c36 char(200), c37 char(200), c38 char(200), c39 char(250), c40 char(246)
+) ROW_FORMAT=dynamic;
+DROP TABLE t1;
+CREATE TABLE t1 (
+c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200),
+c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200),
+c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200),
+c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(200),
+c21 char(200), c22 char(200), c23 char(200), c24 char(200), c25 char(200),
+c26 char(200), c27 char(200), c28 char(200), c29 char(200), c30 char(200),
+c31 char(200), c32 char(200), c33 char(200), c34 char(200), c35 char(200),
+c36 char(200), c37 char(200), c38 char(200), c39 char(250), c40 char(247)
+) ROW_FORMAT=dynamic;
+ERROR 42000: Row size too large (> 8126). Changing some columns to TEXT or BLOB may help. In current row format, BLOB prefix of 0 bytes is stored inline.
+CREATE TABLE t1 (a varchar(255) character set utf8,
+b varchar(255) character set utf8,
+c varchar(255) character set utf8,
+d varchar(255) character set utf8,
+e varchar(4) character set utf8,
+PRIMARY KEY (a,b,c,d,e))
+ENGINE=innodb;
+DROP TABLE t1;
+CREATE TABLE t1 (a varchar(255) character set utf8,
+b varchar(255) character set utf8,
+c varchar(255) character set utf8,
+d varchar(255) character set utf8,
+e varchar(5) character set utf8,
+PRIMARY KEY (a,b,c,d,e))
+ENGINE=innodb;
+ERROR 42000: Specified key was too long; max key length is 3072 bytes
+CREATE TABLE t1 (a varchar(255) character set utf8,
+b varchar(255) character set utf8,
+c varchar(255) character set utf8,
+d varchar(255) character set utf8,
+e varchar(255) character set utf8,
+f varchar(4)   character set utf8,
+PRIMARY KEY (a), KEY (b,c,d,e,f))
+ENGINE=innodb;
+DROP TABLE t1;
+CREATE TABLE t1 (a varchar(255) character set utf8,
+b varchar(255) character set utf8,
+c varchar(255) character set utf8,
+d varchar(255) character set utf8,
+e varchar(255) character set utf8,
+f varchar(5)   character set utf8,
+PRIMARY KEY (a), KEY (b,c,d,e,f))
+ENGINE=innodb;
+ERROR 42000: Specified key was too long; max key length is 3072 bytes
+# Test 5) Make sure that KEY_BLOCK_SIZE=16, 8, 4, 2 & 1
+#         are all accepted.
+SET SESSION innodb_strict_mode = ON;
+CREATE TABLE t1 (i int) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=16;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT table_name, row_format, create_options
+FROM information_schema.tables WHERE table_name = 't1';
+table_name	row_format	create_options
+t1	Compressed	row_format=COMPRESSED key_block_size=16
+ALTER TABLE t1 KEY_BLOCK_SIZE=8;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT table_name, row_format, create_options
+FROM information_schema.tables WHERE table_name = 't1';
+table_name	row_format	create_options
+t1	Compressed	row_format=COMPRESSED key_block_size=8
+ALTER TABLE t1 KEY_BLOCK_SIZE=4;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT table_name, row_format, create_options
+FROM information_schema.tables WHERE table_name = 't1';
+table_name	row_format	create_options
+t1	Compressed	row_format=COMPRESSED key_block_size=4
+ALTER TABLE t1 KEY_BLOCK_SIZE=2;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT table_name, row_format, create_options
+FROM information_schema.tables WHERE table_name = 't1';
+table_name	row_format	create_options
+t1	Compressed	row_format=COMPRESSED key_block_size=2
+ALTER TABLE t1 KEY_BLOCK_SIZE=1;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT table_name, row_format, create_options
+FROM information_schema.tables WHERE table_name = 't1';
+table_name	row_format	create_options
+t1	Compressed	row_format=COMPRESSED key_block_size=1
+ALTER TABLE t1 KEY_BLOCK_SIZE=0;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT table_name, row_format, create_options
+FROM information_schema.tables WHERE table_name = 't1';
+table_name	row_format	create_options
+t1	Compressed	row_format=COMPRESSED
+DROP TABLE t1;
+SET SESSION innodb_strict_mode = OFF;
+CREATE TABLE t1 (i int) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=16;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT table_name, row_format, create_options
+FROM information_schema.tables WHERE table_name = 't1';
+table_name	row_format	create_options
+t1	Compressed	row_format=COMPRESSED key_block_size=16
+ALTER TABLE t1 KEY_BLOCK_SIZE=8;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT table_name, row_format, create_options
+FROM information_schema.tables WHERE table_name = 't1';
+table_name	row_format	create_options
+t1	Compressed	row_format=COMPRESSED key_block_size=8
+ALTER TABLE t1 KEY_BLOCK_SIZE=4;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT table_name, row_format, create_options
+FROM information_schema.tables WHERE table_name = 't1';
+table_name	row_format	create_options
+t1	Compressed	row_format=COMPRESSED key_block_size=4
+ALTER TABLE t1 KEY_BLOCK_SIZE=2;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT table_name, row_format, create_options
+FROM information_schema.tables WHERE table_name = 't1';
+table_name	row_format	create_options
+t1	Compressed	row_format=COMPRESSED key_block_size=2
+ALTER TABLE t1 KEY_BLOCK_SIZE=1;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT table_name, row_format, create_options
+FROM information_schema.tables WHERE table_name = 't1';
+table_name	row_format	create_options
+t1	Compressed	row_format=COMPRESSED key_block_size=1
+ALTER TABLE t1 KEY_BLOCK_SIZE=0;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT table_name, row_format, create_options
+FROM information_schema.tables WHERE table_name = 't1';
+table_name	row_format	create_options
+t1	Compressed	row_format=COMPRESSED
+DROP TABLE t1;
+# Test 6) Make sure that KEY_BLOCK_SIZE = 8 and 16
+# are rejected when innodb_file_per_table=OFF
+SET SESSION innodb_strict_mode = ON;
+SET GLOBAL innodb_file_per_table = OFF;
+SHOW VARIABLES LIKE 'innodb_file_per_table';
+Variable_name	Value
+innodb_file_per_table	OFF
+CREATE TABLE t4 (id int PRIMARY KEY) ENGINE=innodb KEY_BLOCK_SIZE=8;
+Got one of the listed errors
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_per_table.
+Error	1005	Can't create table `test`.`t4` (errno: 140 "Wrong create options")
+Warning	1030	Got error 140 "Wrong create options" from storage engine InnoDB
+CREATE TABLE t5 (id int PRIMARY KEY) ENGINE=innodb KEY_BLOCK_SIZE=16;
+Got one of the listed errors
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_per_table.
+Error	1005	Can't create table `test`.`t5` (errno: 140 "Wrong create options")
+Warning	1030	Got error 140 "Wrong create options" from storage engine InnoDB
+SET GLOBAL innodb_file_per_table = ON;
+SET GLOBAL innodb_file_format = `Antelope`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+CREATE TABLE t4 (id int PRIMARY KEY) ENGINE=innodb KEY_BLOCK_SIZE=8;
+Got one of the listed errors
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_format > Antelope.
+Error	1005	Can't create table `test`.`t4` (errno: 140 "Wrong create options")
+Warning	1030	Got error 140 "Wrong create options" from storage engine InnoDB
+CREATE TABLE t5 (id int PRIMARY KEY) ENGINE=innodb KEY_BLOCK_SIZE=16;
+Got one of the listed errors
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_format > Antelope.
+Error	1005	Can't create table `test`.`t5` (errno: 140 "Wrong create options")
+Warning	1030	Got error 140 "Wrong create options" from storage engine InnoDB
+SET GLOBAL innodb_file_format = `Barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+# Test 7) This series of tests were moved from innodb-index to here
+# because the second alter table t1 assumes a 16k page size.
+# Moving the test allows the rest of innodb-index to be run on all
+# page sizes.  The previously disabled portions of this test were
+# moved as well.
+CREATE TABLE t2(d varchar(17) PRIMARY KEY) ENGINE=innodb DEFAULT CHARSET=utf8;
+CREATE TABLE t3(a int PRIMARY KEY) ENGINE=innodb;
+INSERT INTO t3 VALUES (22),(44),(33),(55),(66);
+INSERT INTO t2 VALUES ('jejdkrun87'),('adfd72nh9k'),
+('adfdpplkeock'),('adfdijnmnb78k'),('adfdijn0loKNHJik');
+CREATE TABLE t1(a int, b blob, c text, d text NOT NULL)
+ENGINE=innodb DEFAULT CHARSET=utf8 STATS_PERSISTENT=0;
+INSERT INTO t1
+SELECT a,LEFT(REPEAT(d,100*a),65535),REPEAT(d,20*a),d FROM t2,t3 order by a, d;
+DROP TABLE t2, t3;
+SELECT COUNT(*) FROM t1 WHERE a=44;
+COUNT(*)
+5
+SELECT a,
+LENGTH(b),b=LEFT(REPEAT(d,100*a),65535),LENGTH(c),c=REPEAT(d,20*a),d FROM t1
+ORDER BY 1, 2, 3, 4, 5, 6;
+a	LENGTH(b)	b=LEFT(REPEAT(d,100*a),65535)	LENGTH(c)	c=REPEAT(d,20*a)	d
+22	22000	1	4400	1	adfd72nh9k
+22	22000	1	4400	1	jejdkrun87
+22	26400	1	5280	1	adfdpplkeock
+22	28600	1	5720	1	adfdijnmnb78k
+22	35200	1	7040	1	adfdijn0loKNHJik
+33	33000	1	6600	1	adfd72nh9k
+33	33000	1	6600	1	jejdkrun87
+33	39600	1	7920	1	adfdpplkeock
+33	42900	1	8580	1	adfdijnmnb78k
+33	52800	1	10560	1	adfdijn0loKNHJik
+44	44000	1	8800	1	adfd72nh9k
+44	44000	1	8800	1	jejdkrun87
+44	52800	1	10560	1	adfdpplkeock
+44	57200	1	11440	1	adfdijnmnb78k
+44	65535	1	14080	1	adfdijn0loKNHJik
+55	55000	1	11000	1	adfd72nh9k
+55	55000	1	11000	1	jejdkrun87
+55	65535	1	13200	1	adfdpplkeock
+55	65535	1	14300	1	adfdijnmnb78k
+55	65535	1	17600	1	adfdijn0loKNHJik
+66	65535	1	13200	1	adfd72nh9k
+66	65535	1	13200	1	jejdkrun87
+66	65535	1	15840	1	adfdpplkeock
+66	65535	1	17160	1	adfdijnmnb78k
+66	65535	1	21120	1	adfdijn0loKNHJik
+ALTER TABLE t1 ADD PRIMARY KEY (a), ADD KEY (b(20));
+ERROR 23000: Duplicate entry '22' for key 'PRIMARY'
+DELETE FROM t1 WHERE d='null';
+ALTER TABLE t1 ADD PRIMARY KEY (a), ADD KEY (b(20));
+ERROR 23000: Duplicate entry '22' for key 'PRIMARY'
+DELETE FROM t1 WHERE a%2;
+CHECK TABLE t1;
+Table	Op	Msg_type	Msg_text
+test.t1	check	status	OK
+ALTER TABLE t1 ADD PRIMARY KEY (a,b(255),c(255)), ADD KEY (b(767));
+SELECT COUNT(*) FROM t1 WHERE a=44;
+COUNT(*)
+5
+SELECT a,
+LENGTH(b), b=LEFT(REPEAT(d,100*a), 65535),LENGTH(c), c=REPEAT(d,20*a), d FROM t1;
+a	LENGTH(b)	b=LEFT(REPEAT(d,100*a), 65535)	LENGTH(c)	c=REPEAT(d,20*a)	d
+22	22000	1	4400	1	adfd72nh9k
+22	35200	1	7040	1	adfdijn0loKNHJik
+22	28600	1	5720	1	adfdijnmnb78k
+22	26400	1	5280	1	adfdpplkeock
+22	22000	1	4400	1	jejdkrun87
+44	44000	1	8800	1	adfd72nh9k
+44	65535	1	14080	1	adfdijn0loKNHJik
+44	57200	1	11440	1	adfdijnmnb78k
+44	52800	1	10560	1	adfdpplkeock
+44	44000	1	8800	1	jejdkrun87
+66	65535	1	13200	1	adfd72nh9k
+66	65535	1	21120	1	adfdijn0loKNHJik
+66	65535	1	17160	1	adfdijnmnb78k
+66	65535	1	15840	1	adfdpplkeock
+66	65535	1	13200	1	jejdkrun87
+SHOW CREATE TABLE t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL,
+  `b` blob NOT NULL,
+  `c` text NOT NULL,
+  `d` text NOT NULL,
+  PRIMARY KEY (`a`,`b`(255),`c`(255)),
+  KEY `b` (`b`(767))
+) ENGINE=InnoDB DEFAULT CHARSET=utf8 STATS_PERSISTENT=0
+CHECK TABLE t1;
+Table	Op	Msg_type	Msg_text
+test.t1	check	status	OK
+EXPLAIN SELECT * FROM t1 WHERE b LIKE 'adfd%';
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ALL	b	NULL	NULL	NULL	15	Using where
+DROP TABLE t1;
+# Test 8) Test creating a table that could lead to undo log overflow.
+CREATE TABLE t1(a blob,b blob,c blob,d blob,e blob,f blob,g blob,
+h blob,i blob,j blob,k blob,l blob,m blob,n blob,
+o blob,p blob,q blob,r blob,s blob,t blob,u blob)
+ENGINE=InnoDB ROW_FORMAT=dynamic;
+SET @a = repeat('a', 767);
+SET @b = repeat('b', 767);
+SET @c = repeat('c', 767);
+SET @d = repeat('d', 767);
+SET @e = repeat('e', 767);
+INSERT INTO t1 VALUES (@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a);
+UPDATE t1 SET a=@b,b=@b,c=@b,d=@b,e=@b,f=@b,g=@b,h=@b,i=@b,j=@b,
+k=@b,l=@b,m=@b,n=@b,o=@b,p=@b,q=@b,r=@b,s=@b,t=@b,u=@b;
+CREATE INDEX t1a ON t1 (a(767));
+CREATE INDEX t1b ON t1 (b(767));
+CREATE INDEX t1c ON t1 (c(767));
+CREATE INDEX t1d ON t1 (d(767));
+CREATE INDEX t1e ON t1 (e(767));
+UPDATE t1 SET a=@c,b=@c,c=@c,d=@c,e=@c,f=@c,g=@c,h=@c,i=@c,j=@c,
+k=@c,l=@c,m=@c,n=@c,o=@c,p=@c,q=@c,r=@c,s=@c,t=@c,u=@c;
+CREATE INDEX t1f ON t1 (f(767));
+UPDATE t1 SET a=@d,b=@d,c=@d,d=@d,e=@d,f=@d,g=@d,h=@d,i=@d,j=@d,
+k=@d,l=@d,m=@d,n=@d,o=@d,p=@d,q=@d,r=@d,s=@d,t=@d,u=@d;
+ERROR HY000: Undo log record is too big.
+BEGIN;
+UPDATE t1 SET a=@d,b=@d,c=@d,d=@d,e=@d;
+UPDATE t1 SET f=@d,g=@d,h=@d,i=@d,j=@d,k=@d,l=@d,m=@d,
+n=@d,o=@d,p=@d,q=@d,r=@d,s=@d,t=@d,u=@d;
+COMMIT;
+CREATE INDEX t1g ON t1 (g(767));
+UPDATE t1 SET g=@e;
+CREATE INDEX t1h ON t1 (h(767));
+UPDATE t1 SET h=@e;
+CREATE INDEX t1i ON t1 (i(767));
+UPDATE t1 SET i=@e;
+CREATE INDEX t1j ON t1 (j(767));
+UPDATE t1 SET j=@e;
+CREATE INDEX t1k ON t1 (k(767));
+UPDATE t1 SET k=@e;
+CREATE INDEX t1l ON t1 (l(767));
+UPDATE t1 SET l=@e;
+CREATE INDEX t1m ON t1 (m(767));
+UPDATE t1 SET m=@e;
+CREATE INDEX t1n ON t1 (n(767));
+UPDATE t1 SET n=@e;
+CREATE INDEX t1o ON t1 (o(767));
+UPDATE t1 SET o=@e;
+CREATE INDEX t1p ON t1 (p(767));
+UPDATE t1 SET p=@e;
+CREATE INDEX t1q ON t1 (q(767));
+UPDATE t1 SET q=@e;
+CREATE INDEX t1r ON t1 (r(767));
+UPDATE t1 SET r=@e;
+CREATE INDEX t1s ON t1 (s(767));
+UPDATE t1 SET s=@e;
+CREATE INDEX t1t ON t1 (t(767));
+UPDATE t1 SET t=@e;
+ERROR HY000: Undo log record is too big.
+CREATE INDEX t1u ON t1 (u(767));
+CREATE INDEX t1ut ON t1 (u(767), t(767));
+CREATE INDEX t1st ON t1 (s(767), t(767));
+SHOW CREATE TABLE t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` blob DEFAULT NULL,
+  `b` blob DEFAULT NULL,
+  `c` blob DEFAULT NULL,
+  `d` blob DEFAULT NULL,
+  `e` blob DEFAULT NULL,
+  `f` blob DEFAULT NULL,
+  `g` blob DEFAULT NULL,
+  `h` blob DEFAULT NULL,
+  `i` blob DEFAULT NULL,
+  `j` blob DEFAULT NULL,
+  `k` blob DEFAULT NULL,
+  `l` blob DEFAULT NULL,
+  `m` blob DEFAULT NULL,
+  `n` blob DEFAULT NULL,
+  `o` blob DEFAULT NULL,
+  `p` blob DEFAULT NULL,
+  `q` blob DEFAULT NULL,
+  `r` blob DEFAULT NULL,
+  `s` blob DEFAULT NULL,
+  `t` blob DEFAULT NULL,
+  `u` blob DEFAULT NULL,
+  KEY `t1a` (`a`(767)),
+  KEY `t1b` (`b`(767)),
+  KEY `t1c` (`c`(767)),
+  KEY `t1d` (`d`(767)),
+  KEY `t1e` (`e`(767)),
+  KEY `t1f` (`f`(767)),
+  KEY `t1g` (`g`(767)),
+  KEY `t1h` (`h`(767)),
+  KEY `t1i` (`i`(767)),
+  KEY `t1j` (`j`(767)),
+  KEY `t1k` (`k`(767)),
+  KEY `t1l` (`l`(767)),
+  KEY `t1m` (`m`(767)),
+  KEY `t1n` (`n`(767)),
+  KEY `t1o` (`o`(767)),
+  KEY `t1p` (`p`(767)),
+  KEY `t1q` (`q`(767)),
+  KEY `t1r` (`r`(767)),
+  KEY `t1s` (`s`(767)),
+  KEY `t1t` (`t`(767)),
+  KEY `t1u` (`u`(767)),
+  KEY `t1ut` (`u`(767),`t`(767)),
+  KEY `t1st` (`s`(767),`t`(767))
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC
+DROP TABLE t1;
+# Bug #12429576 - Test an assertion failure on purge.
+CREATE TABLE t1_purge (
+A int,
+B blob, C blob, D blob, E blob,
+F blob, G blob, H blob,
+PRIMARY KEY (B(767), C(767), D(767), E(767), A),
+INDEX (A)
+) ENGINE=InnoDB ROW_FORMAT=DYNAMIC;
+INSERT INTO t1_purge VALUES (1,
+REPEAT('b', 766), REPEAT('c', 766), REPEAT('d', 766), REPEAT('e', 766),
+REPEAT('f', 766), REPEAT('g', 766), REPEAT('h', 766));
+CREATE TABLE t2_purge (
+A int PRIMARY KEY,
+B blob, C blob, D blob, E blob,
+F blob, G blob, H blob, I blob,
+J blob, K blob, L blob,
+INDEX (B(767))) ENGINE=InnoDB ROW_FORMAT=DYNAMIC;
+INSERT INTO t2_purge VALUES (1,
+REPEAT('b', 766), REPEAT('c', 766), REPEAT('d', 766), REPEAT('e', 766),
+REPEAT('f', 766), REPEAT('g', 766), REPEAT('h', 766), REPEAT('i', 766),
+REPEAT('j', 766), REPEAT('k', 766), REPEAT('l', 766));
+CREATE TABLE t3_purge (
+A int,
+B varchar(800), C varchar(800), D varchar(800), E varchar(800),
+F varchar(800), G varchar(800), H varchar(800),
+PRIMARY KEY (B(767), C(767), D(767), E(767), A),
+INDEX (A)
+) ENGINE=InnoDB ROW_FORMAT=DYNAMIC;
+INSERT INTO t3_purge SELECT * FROM t1_purge;
+CREATE TABLE t4_purge (
+A int PRIMARY KEY,
+B varchar(800), C varchar(800), D varchar(800), E varchar(800),
+F varchar(800), G varchar(800), H varchar(800), I varchar(800),
+J varchar(800), K varchar(800), L varchar(800),
+INDEX (B(767))) ENGINE=InnoDB ROW_FORMAT=DYNAMIC;
+INSERT INTO t4_purge SELECT * FROM t2_purge;
+DELETE FROM t1_purge;
+DELETE FROM t2_purge;
+DELETE FROM t3_purge;
+DELETE FROM t4_purge;
+SET @r=REPEAT('a',500);
+CREATE TABLE t12637786(a int,
+v1 varchar(500), v2 varchar(500), v3 varchar(500),
+v4 varchar(500), v5 varchar(500), v6 varchar(500),
+v7 varchar(500), v8 varchar(500), v9 varchar(500),
+v10 varchar(500), v11 varchar(500), v12 varchar(500),
+v13 varchar(500), v14 varchar(500), v15 varchar(500),
+v16 varchar(500), v17 varchar(500), v18 varchar(500)
+) ENGINE=InnoDB ROW_FORMAT=DYNAMIC;
+CREATE INDEX idx1 ON t12637786(a,v1);
+INSERT INTO t12637786 VALUES(9,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r);
+UPDATE t12637786 SET a=1000;
+DELETE FROM t12637786;
+# Bug#12963823 - Test that the purge thread does not crash when
+CREATE TABLE t12963823(a blob,b blob,c blob,d blob,e blob,f blob,g blob,h blob,
+i blob,j blob,k blob,l blob,m blob,n blob,o blob,p blob)
+ENGINE=innodb ROW_FORMAT=dynamic;
+SET @r = REPEAT('a', 767);
+INSERT INTO t12963823 VALUES (@r,@r,@r,@r, @r,@r,@r,@r, @r,@r,@r,@r, @r,@r,@r,@r);
+CREATE INDEX ndx_a ON t12963823 (a(500));
+CREATE INDEX ndx_b ON t12963823 (b(500));
+CREATE INDEX ndx_c ON t12963823 (c(500));
+CREATE INDEX ndx_d ON t12963823 (d(500));
+CREATE INDEX ndx_e ON t12963823 (e(500));
+CREATE INDEX ndx_f ON t12963823 (f(500));
+CREATE INDEX ndx_k ON t12963823 (k(500));
+CREATE INDEX ndx_l ON t12963823 (l(500));
+SET @r = REPEAT('b', 500);
+UPDATE t12963823 set a=@r,b=@r,c=@r,d=@r;
+UPDATE t12963823 set e=@r,f=@r,g=@r,h=@r;
+UPDATE t12963823 set i=@r,j=@r,k=@r,l=@r;
+UPDATE t12963823 set m=@r,n=@r,o=@r,p=@r;
+ALTER TABLE t12963823 DROP INDEX ndx_a;
+ALTER TABLE t12963823 DROP INDEX ndx_b;
+CREATE INDEX ndx_g ON t12963823 (g(500));
+CREATE INDEX ndx_h ON t12963823 (h(500));
+CREATE INDEX ndx_i ON t12963823 (i(500));
+CREATE INDEX ndx_j ON t12963823 (j(500));
+CREATE INDEX ndx_m ON t12963823 (m(500));
+CREATE INDEX ndx_n ON t12963823 (n(500));
+CREATE INDEX ndx_o ON t12963823 (o(500));
+CREATE INDEX ndx_p ON t12963823 (p(500));
+SHOW CREATE TABLE t12963823;
+Table	Create Table
+t12963823	CREATE TABLE `t12963823` (
+  `a` blob DEFAULT NULL,
+  `b` blob DEFAULT NULL,
+  `c` blob DEFAULT NULL,
+  `d` blob DEFAULT NULL,
+  `e` blob DEFAULT NULL,
+  `f` blob DEFAULT NULL,
+  `g` blob DEFAULT NULL,
+  `h` blob DEFAULT NULL,
+  `i` blob DEFAULT NULL,
+  `j` blob DEFAULT NULL,
+  `k` blob DEFAULT NULL,
+  `l` blob DEFAULT NULL,
+  `m` blob DEFAULT NULL,
+  `n` blob DEFAULT NULL,
+  `o` blob DEFAULT NULL,
+  `p` blob DEFAULT NULL,
+  KEY `ndx_c` (`c`(500)),
+  KEY `ndx_d` (`d`(500)),
+  KEY `ndx_e` (`e`(500)),
+  KEY `ndx_f` (`f`(500)),
+  KEY `ndx_k` (`k`(500)),
+  KEY `ndx_l` (`l`(500)),
+  KEY `ndx_g` (`g`(500)),
+  KEY `ndx_h` (`h`(500)),
+  KEY `ndx_i` (`i`(500)),
+  KEY `ndx_j` (`j`(500)),
+  KEY `ndx_m` (`m`(500)),
+  KEY `ndx_n` (`n`(500)),
+  KEY `ndx_o` (`o`(500)),
+  KEY `ndx_p` (`p`(500))
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC
+# Bug#12547647 UPDATE LOGGING COULD EXCEED LOG PAGE SIZE
+SET SESSION innodb_strict_mode = ON;
+CREATE TABLE bug12547647(
+a int NOT NULL, b blob NOT NULL, c text,
+PRIMARY KEY (b(10), a), INDEX (c(767)), INDEX(b(767))
+) ENGINE=InnoDB ROW_FORMAT=DYNAMIC;
+INSERT INTO bug12547647 VALUES (5,REPEAT('khdfo5AlOq',1900),REPEAT('g',7751));
+COMMIT;
+UPDATE bug12547647 SET c = REPEAT('b',16928);
+ERROR HY000: Undo log record is too big.
+SHOW WARNINGS;
+Level	Code	Message
+Error	1713	Undo log record is too big.
+DROP TABLE bug12547647;
+SET SESSION innodb_strict_mode = off;
+CREATE TABLE t1(
+c text NOT NULL, d text NOT NULL,
+PRIMARY KEY (c(767),d(767)))
+ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1 CHARSET=ASCII;
+Warnings:
+Warning	139	Row size too large (> 8126). Changing some columns to TEXT or BLOB may help. In current row format, BLOB prefix of 0 bytes is stored inline.
+DROP TABLE t1;
+CREATE TABLE t1(
+c text NOT NULL, d text NOT NULL,
+PRIMARY KEY (c(767),d(767)))
+ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=2 CHARSET=ASCII;
+Warnings:
+Warning	139	Row size too large (> 8126). Changing some columns to TEXT or BLOB may help. In current row format, BLOB prefix of 0 bytes is stored inline.
+DROP TABLE t1;
+CREATE TABLE t1(
+c text NOT NULL, d text NOT NULL,
+PRIMARY KEY (c(767),d(767)))
+ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4 CHARSET=ASCII;
+drop table t1;
+CREATE TABLE t1(c text, PRIMARY KEY (c(440)))
+ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1 CHARSET=ASCII;
+Warnings:
+Warning	139	Row size too large (> 8126). Changing some columns to TEXT or BLOB may help. In current row format, BLOB prefix of 0 bytes is stored inline.
+DROP TABLE t1;
+CREATE TABLE t1(c text, PRIMARY KEY (c(438)))
+ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1 CHARSET=ASCII;
+INSERT INTO t1 VALUES(REPEAT('A',512)),(REPEAT('B',512));
+DROP TABLE t1;
+#
+# Bug#56862 Execution of a query that uses index merge returns a wrong result
+#
+CREATE TABLE t1 (
+pk int NOT NULL AUTO_INCREMENT PRIMARY KEY,
+a int,
+b int,
+INDEX idx(a))
+ENGINE=INNODB;
+INSERT INTO t1(a,b) VALUES
+(11, 1100), (2, 200), (1, 100), (14, 1400), (5, 500),
+(3, 300), (17, 1700), (4, 400), (12, 1200), (8, 800),
+(6, 600), (18, 1800), (9, 900), (10, 1000), (7, 700),
+(13, 1300), (15, 1500), (19, 1900), (16, 1600), (20, 2000);
+INSERT INTO t1(a,b) SELECT a+20, b+2000 FROM t1;
+INSERT INTO t1(a,b) SELECT a+40, b+4000 FROM t1;
+INSERT INTO t1(a,b) SELECT a+80, b+8000 FROM t1;
+INSERT INTO t1(a,b) SELECT a,b FROM t1;
+INSERT INTO t1(a,b) SELECT a,b FROM t1;
+INSERT INTO t1(a,b) SELECT a,b FROM t1;
+INSERT INTO t1(a,b) SELECT a,b FROM t1;
+INSERT INTO t1(a,b) SELECT a,b FROM t1;
+INSERT INTO t1(a,b) SELECT a,b FROM t1;
+INSERT INTO t1(a,b) SELECT a,b FROM t1;
+INSERT INTO t1(a,b) SELECT a,b FROM t1;
+INSERT INTO t1 VALUES (1000000, 0, 0);
+set @optimizer_switch_saved=@@optimizer_switch;
+SET SESSION optimizer_switch='derived_merge=off';
+SET SESSION sort_buffer_size = 1024*36;
+EXPLAIN
+SELECT COUNT(*) FROM
+(SELECT * FROM t1 FORCE INDEX (idx,PRIMARY)
+WHERE a BETWEEN 2 AND 7 OR pk=1000000) AS t;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	<derived2>	ALL	NULL	NULL	NULL	NULL	1537	
+2	DERIVED	t1	index_merge	PRIMARY,idx	idx,PRIMARY	5,4	NULL	1537	Using sort_union(idx,PRIMARY); Using where
+SELECT COUNT(*) FROM
+(SELECT * FROM t1 FORCE INDEX (idx,PRIMARY)
+WHERE a BETWEEN 2 AND 7 OR pk=1000000) AS t;
+COUNT(*)
+1537
+set @@optimizer_switch=@optimizer_switch_saved;
+SET SESSION sort_buffer_size = DEFAULT;
+DROP TABLE t1;
+DROP TABLE t1_purge, t2_purge, t3_purge, t4_purge;
+DROP TABLE t12637786;
+DROP TABLE t12963823;
diff --git a/mysql-test/suite/innodb_zip/r/4k.result b/mysql-test/suite/innodb_zip/r/4k.result
new file mode 100644
index 00000000000..721943e7f5a
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/r/4k.result
@@ -0,0 +1,442 @@
+SET default_storage_engine=InnoDB;
+# Test 1) Show the page size from Information Schema
+SELECT variable_value FROM information_schema.global_status
+WHERE LOWER(variable_name) = 'innodb_page_size';
+variable_value
+4096
+# Test 2) The number of buffer pool pages is dependent upon the page size.
+SELECT variable_value FROM information_schema.global_status
+WHERE LOWER(variable_name) = 'innodb_buffer_pool_pages_total';
+variable_value
+{checked_valid}
+# Test 3) Query some information_shema tables that are dependent upon
+#         the page size.
+SELECT	t.name table_name, t.n_cols, t.flag table_flags,
+i.name index_name, i.page_no root_page, i.type,
+i.n_fields, i.merge_threshold
+FROM	INFORMATION_SCHEMA.INNODB_SYS_TABLES  t,
+INFORMATION_SCHEMA.INNODB_SYS_INDEXES i
+WHERE	t.table_id = i.table_id
+AND	t.name LIKE 'mysql%'
+	ORDER BY t.name, i.index_id;
+table_name	n_cols	table_flags	index_name	root_page	type	n_fields	merge_threshold
+mysql/engine_cost	9	33	PRIMARY	3	3	3	50
+mysql/gtid_executed	6	33	PRIMARY	3	3	2	50
+mysql/help_category	7	33	PRIMARY	3	3	1	50
+mysql/help_category	7	33	name	4	2	1	50
+mysql/help_keyword	5	33	PRIMARY	3	3	1	50
+mysql/help_keyword	5	33	name	4	2	1	50
+mysql/help_relation	5	33	PRIMARY	3	3	2	50
+mysql/help_topic	9	33	PRIMARY	3	3	1	50
+mysql/help_topic	9	33	name	4	2	1	50
+mysql/innodb_index_stats	11	33	PRIMARY	3	3	4	50
+mysql/innodb_table_stats	9	33	PRIMARY	3	3	2	50
+mysql/plugin	5	33	PRIMARY	3	3	1	50
+mysql/servers	12	33	PRIMARY	3	3	1	50
+mysql/server_cost	7	33	PRIMARY	3	3	1	50
+mysql/slave_master_info	28	33	PRIMARY	3	3	1	50
+mysql/slave_relay_log_info	12	33	PRIMARY	3	3	1	50
+mysql/slave_worker_info	16	33	PRIMARY	3	3	2	50
+mysql/time_zone	5	33	PRIMARY	3	3	1	50
+mysql/time_zone_leap_second	5	33	PRIMARY	3	3	1	50
+mysql/time_zone_name	5	33	PRIMARY	3	3	1	50
+mysql/time_zone_transition	6	33	PRIMARY	3	3	2	50
+mysql/time_zone_transition_type	8	33	PRIMARY	3	3	2	50
+CREATE TABLE t1 (a INT KEY, b TEXT) ROW_FORMAT=REDUNDANT ENGINE=innodb;
+CREATE TABLE t2 (a INT KEY, b TEXT) ROW_FORMAT=COMPACT ENGINE=innodb;
+CREATE TABLE t3 (a INT KEY, b TEXT) ROW_FORMAT=COMPRESSED ENGINE=innodb;
+CREATE TABLE t4 (a INT KEY, b TEXT) ROW_FORMAT=DYNAMIC ENGINE=innodb;
+SELECT	t.name table_name, t.n_cols, t.flag table_flags,
+i.name index_name, i.page_no root_page, i.type,
+i.n_fields, i.merge_threshold
+FROM	INFORMATION_SCHEMA.INNODB_SYS_TABLES  t,
+INFORMATION_SCHEMA.INNODB_SYS_INDEXES i
+WHERE	t.table_id = i.table_id
+AND	t.name LIKE 'test%'
+	ORDER BY t.name, i.name;
+table_name	n_cols	table_flags	index_name	root_page	type	n_fields	merge_threshold
+test/t1	5	0	PRIMARY	3	3	1	50
+test/t2	5	1	PRIMARY	3	3	1	50
+test/t3	5	37	PRIMARY	3	3	1	50
+test/t4	5	33	PRIMARY	3	3	1	50
+=== information_schema.innodb_sys_tablespaces and innodb_sys_datafiles ===
+Space_Name	Space_Type	Page_Size	Zip_Size	Formats_Permitted	Path
+test/t1	Single	DEFAULT	0	Compact or Redundant	MYSQLD_DATADIR/test/t1.ibd
+test/t2	Single	DEFAULT	0	Compact or Redundant	MYSQLD_DATADIR/test/t2.ibd
+test/t3	Single	DEFAULT	2048	Compressed	MYSQLD_DATADIR/test/t3.ibd
+test/t4	Single	DEFAULT	0	Dynamic	MYSQLD_DATADIR/test/t4.ibd
+=== information_schema.files ===
+Space_Name	File_Type	Engine	Status	Tablespace_Name	Path
+test/t1	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQLD_DATADIR/test/t1.ibd
+test/t2	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQLD_DATADIR/test/t2.ibd
+test/t3	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQLD_DATADIR/test/t3.ibd
+test/t4	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQLD_DATADIR/test/t4.ibd
+DROP TABLE t1, t2, t3, t4;
+# Test 4) The maximum row size is dependent upon the page size.
+#         Redundant: 1979, Compact: 1982.
+#         Compressed: 1982, Dynamic: 1982.
+#         Each row format has its own amount of overhead that
+#         varies depending on number of fields and other overhead.
+SET SESSION innodb_strict_mode = ON;
+CREATE TABLE t1 (
+c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200),
+c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(127)
+) ROW_FORMAT=redundant;
+DROP TABLE t1;
+CREATE TABLE t1 (
+c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200),
+c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(128)
+) ROW_FORMAT=redundant;
+ERROR 42000: Row size too large (> max_row_size). Changing some columns to TEXT or BLOB or using ROW_FORMAT=DYNAMIC or ROW_FORMAT=COMPRESSED may help. In current row format, BLOB prefix of 768 bytes is stored inline.
+CREATE TABLE t1 (
+c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200),
+c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(155)
+) ROW_FORMAT=compact;
+DROP TABLE t1;
+CREATE TABLE t1 (
+c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200),
+c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(156)
+) ROW_FORMAT=compact;
+ERROR 42000: Row size too large (> max_row_size). Changing some columns to TEXT or BLOB or using ROW_FORMAT=DYNAMIC or ROW_FORMAT=COMPRESSED may help. In current row format, BLOB prefix of 768 bytes is stored inline.
+CREATE TABLE t1 (
+c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200),
+c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(76)
+) ROW_FORMAT=compressed;
+DROP TABLE t1;
+CREATE TABLE t1 (
+c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200),
+c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(79)
+) ROW_FORMAT=compressed;
+ERROR 42000: Row size too large (> max_row_size). Changing some columns to TEXT or BLOB may help. In current row format, BLOB prefix of 0 bytes is stored inline.
+CREATE TABLE t1 (
+c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200),
+c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(155)
+) ROW_FORMAT=dynamic;
+DROP TABLE t1;
+CREATE TABLE t1 (
+c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200),
+c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(156)
+) ROW_FORMAT=dynamic;
+ERROR 42000: Row size too large (> max_row_size). Changing some columns to TEXT or BLOB may help. In current row format, BLOB prefix of 0 bytes is stored inline.
+CREATE TABLE t1 (a varchar(64) character set utf8,
+b varchar(64) character set utf8,
+c varchar(64) character set utf8,
+d varchar(64) character set utf8,
+PRIMARY KEY (a,b,c,d))
+ENGINE=innodb;
+DROP TABLE t1;
+CREATE TABLE t1 (a varchar(64) character set utf8,
+b varchar(64) character set utf8,
+c varchar(64) character set utf8,
+d varchar(65) character set utf8,
+PRIMARY KEY (a,b,c,d))
+ENGINE=innodb;
+ERROR 42000: Specified key was too long; max key length is 768 bytes
+CREATE TABLE t1 (a varchar(64) character set utf8,
+b varchar(64) character set utf8,
+c varchar(64) character set utf8,
+d varchar(64) character set utf8,
+e varchar(64) character set utf8,
+PRIMARY KEY (a), KEY (b,c,d,e))
+ENGINE=innodb;
+DROP TABLE t1;
+CREATE TABLE t1 (a varchar(64) character set utf8,
+b varchar(64) character set utf8,
+c varchar(64) character set utf8,
+d varchar(64) character set utf8,
+e varchar(65) character set utf8,
+PRIMARY KEY (a), KEY (b,c,d,e))
+ENGINE=innodb;
+ERROR 42000: Specified key was too long; max key length is 768 bytes
+# Test 5) Make sure that KEY_BLOCK_SIZE=4, 2 & 1 are all
+#         accepted and that KEY_BLOCK_SIZE=16 & 8 are rejected
+#         in strict mode and converted to 4 in non-strict mode.
+SET SESSION innodb_strict_mode = ON;
+CREATE TABLE t1 (i int) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=16;
+ERROR HY000: Table storage engine for 't1' doesn't have this option
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1478	InnoDB: KEY_BLOCK_SIZE=16 cannot be larger than 4.
+Error	1031	Table storage engine for 't1' doesn't have this option
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8;
+ERROR HY000: Table storage engine for 't1' doesn't have this option
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1478	InnoDB: KEY_BLOCK_SIZE=8 cannot be larger than 4.
+Error	1031	Table storage engine for 't1' doesn't have this option
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT table_name, row_format, create_options
+FROM information_schema.tables WHERE table_name = 't1';
+table_name	row_format	create_options
+t1	Compressed	row_format=COMPRESSED KEY_BLOCK_SIZE=4
+ALTER TABLE t1 KEY_BLOCK_SIZE=2;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT table_name, row_format, create_options
+FROM information_schema.tables WHERE table_name = 't1';
+table_name	row_format	create_options
+t1	Compressed	row_format=COMPRESSED KEY_BLOCK_SIZE=2
+ALTER TABLE t1 KEY_BLOCK_SIZE=1;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT table_name, row_format, create_options
+FROM information_schema.tables WHERE table_name = 't1';
+table_name	row_format	create_options
+t1	Compressed	row_format=COMPRESSED KEY_BLOCK_SIZE=1
+ALTER TABLE t1 KEY_BLOCK_SIZE=0;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT table_name, row_format, create_options
+FROM information_schema.tables WHERE table_name = 't1';
+table_name	row_format	create_options
+t1	Compressed	row_format=COMPRESSED
+DROP TABLE t1;
+SET SESSION innodb_strict_mode = OFF;
+CREATE TABLE t1 (i int) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=16;
+Warnings:
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=16.
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=16.
+SELECT table_name, row_format, create_options
+FROM information_schema.tables WHERE table_name = 't1';
+table_name	row_format	create_options
+t1	Compressed	row_format=COMPRESSED KEY_BLOCK_SIZE=16
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8;
+Warnings:
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=8.
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=8.
+SELECT table_name, row_format, create_options
+FROM information_schema.tables WHERE table_name = 't1';
+table_name	row_format	create_options
+t1	Compressed	row_format=COMPRESSED KEY_BLOCK_SIZE=8
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT table_name, row_format, create_options
+FROM information_schema.tables WHERE table_name = 't1';
+table_name	row_format	create_options
+t1	Compressed	row_format=COMPRESSED KEY_BLOCK_SIZE=4
+ALTER TABLE t1 KEY_BLOCK_SIZE=2;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT table_name, row_format, create_options
+FROM information_schema.tables WHERE table_name = 't1';
+table_name	row_format	create_options
+t1	Compressed	row_format=COMPRESSED KEY_BLOCK_SIZE=2
+ALTER TABLE t1 KEY_BLOCK_SIZE=1;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT table_name, row_format, create_options
+FROM information_schema.tables WHERE table_name = 't1';
+table_name	row_format	create_options
+t1	Compressed	row_format=COMPRESSED KEY_BLOCK_SIZE=1
+ALTER TABLE t1 KEY_BLOCK_SIZE=0;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT table_name, row_format, create_options
+FROM information_schema.tables WHERE table_name = 't1';
+table_name	row_format	create_options
+t1	Compressed	row_format=COMPRESSED
+DROP TABLE t1;
+# Test 6) Make sure that KEY_BLOCK_SIZE = 8 and 16
+# are both rejected when innodb_file_per_table=OFF
+SET SESSION innodb_strict_mode = ON;
+SET GLOBAL innodb_file_per_table = OFF;
+SHOW VARIABLES LIKE 'innodb_file_per_table';
+Variable_name	Value
+innodb_file_per_table	OFF
+CREATE TABLE t4 (id int PRIMARY KEY) ENGINE=innodb KEY_BLOCK_SIZE=8;
+ERROR HY000: Table storage engine for 't4' doesn't have this option
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1478	InnoDB: KEY_BLOCK_SIZE=8 cannot be larger than 4.
+Warning	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_per_table.
+Error	1031	Table storage engine for 't4' doesn't have this option
+CREATE TABLE t5 (id int PRIMARY KEY) ENGINE=innodb KEY_BLOCK_SIZE=16;
+ERROR HY000: Table storage engine for 't5' doesn't have this option
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1478	InnoDB: KEY_BLOCK_SIZE=16 cannot be larger than 4.
+Warning	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_per_table.
+Error	1031	Table storage engine for 't5' doesn't have this option
+SET GLOBAL innodb_file_per_table = ON;
+SET GLOBAL innodb_file_format = `Antelope`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+CREATE TABLE t4 (id int PRIMARY KEY) ENGINE=innodb KEY_BLOCK_SIZE=8;
+ERROR HY000: Table storage engine for 't4' doesn't have this option
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1478	InnoDB: KEY_BLOCK_SIZE=8 cannot be larger than 4.
+Warning	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_format > Antelope.
+Error	1031	Table storage engine for 't4' doesn't have this option
+CREATE TABLE t5 (id int PRIMARY KEY) ENGINE=innodb KEY_BLOCK_SIZE=16;
+ERROR HY000: Table storage engine for 't5' doesn't have this option
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1478	InnoDB: KEY_BLOCK_SIZE=16 cannot be larger than 4.
+Warning	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_format > Antelope.
+Error	1031	Table storage engine for 't5' doesn't have this option
+SET GLOBAL innodb_file_format = `Barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+# Test 7) Not included here; 16k only
+# Test 8) Test creating a table that could lead to undo log overflow.
+CREATE TABLE t1(a blob,b blob,c blob,d blob,e blob,f blob,g blob,
+h blob,i blob,j blob,k blob,l blob,m blob,n blob,
+o blob,p blob,q blob,r blob,s blob,t blob,u blob)
+ENGINE=InnoDB ROW_FORMAT=dynamic;
+SET @a = repeat('a', 767);
+SET @b = repeat('b', 767);
+SET @c = repeat('c', 767);
+SET @d = repeat('d', 767);
+SET @e = repeat('e', 767);
+INSERT INTO t1 VALUES (@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a);
+UPDATE t1 SET a=@b,b=@b,c=@b,d=@b,e=@b,f=@b,g=@b,h=@b,i=@b,j=@b,
+k=@b,l=@b,m=@b,n=@b,o=@b,p=@b,q=@b,r=@b,s=@b,t=@b,u=@b;
+CREATE INDEX t1a ON t1 (a(767));
+UPDATE t1 SET a=@c,b=@c,c=@c,d=@c,e=@c,f=@c,g=@c,h=@c,i=@c,j=@c,
+k=@c,l=@c,m=@c,n=@c,o=@c,p=@c,q=@c,r=@c,s=@c,t=@c,u=@c;
+CREATE INDEX t1b ON t1 (b(767));
+UPDATE t1 SET a=@d,b=@d,c=@d,d=@d,e=@d,f=@d,g=@d,h=@d,i=@d,j=@d,
+k=@d,l=@d,m=@d,n=@d,o=@d,p=@d,q=@d,r=@d,s=@d,t=@d,u=@d;
+ERROR HY000: Undo log record is too big.
+BEGIN;
+UPDATE t1 SET a=@d,b=@d,c=@d,d=@d,e=@d;
+UPDATE t1 SET f=@d,g=@d,h=@d,i=@d,j=@d,k=@d,l=@d,m=@d,
+n=@d,o=@d,p=@d,q=@d,r=@d,s=@d,t=@d,u=@d;
+COMMIT;
+CREATE INDEX t1c ON t1 (c(767));
+UPDATE t1 SET c=@e;
+CREATE INDEX t1d ON t1 (d(767));
+UPDATE t1 SET d=@e;
+ERROR HY000: Undo log record is too big.
+CREATE INDEX t1e ON t1 (e(767));
+SHOW CREATE TABLE t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` blob,
+  `b` blob,
+  `c` blob,
+  `d` blob,
+  `e` blob,
+  `f` blob,
+  `g` blob,
+  `h` blob,
+  `i` blob,
+  `j` blob,
+  `k` blob,
+  `l` blob,
+  `m` blob,
+  `n` blob,
+  `o` blob,
+  `p` blob,
+  `q` blob,
+  `r` blob,
+  `s` blob,
+  `t` blob,
+  `u` blob,
+  KEY `t1a` (`a`(767)),
+  KEY `t1b` (`b`(767)),
+  KEY `t1c` (`c`(767)),
+  KEY `t1d` (`d`(767)),
+  KEY `t1e` (`e`(767))
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC
+DROP TABLE t1;
+SET SESSION innodb_strict_mode = OFF;
+CREATE TABLE t1(
+pk01 varchar(48), pk02 varchar(48), pk03 varchar(48), pk04 varchar(48),
+pk05 varchar(48), pk06 varchar(48), pk07 varchar(48), pk08 varchar(48),
+pk09 varchar(48), pk10 varchar(48), pk11 varchar(48), pk12 varchar(48),
+pk13 varchar(48), pk14 varchar(48), pk15 varchar(48), pk16 varchar(48),
+sk01 varchar(48), sk02 varchar(48), sk03 varchar(48), sk04 varchar(48),
+sk05 varchar(48), sk06 varchar(48), sk07 varchar(48), sk08 varchar(48),
+sk09 varchar(48), sk10 varchar(48), sk11 varchar(48), sk12 varchar(48),
+sk13 varchar(48), sk14 varchar(48), sk15 varchar(48), sk16 varchar(48),
+PRIMARY KEY pk(pk01,pk02,pk03,pk04,pk05,pk06,pk07,pk08,
+pk09,pk10,pk11,pk12,pk13,pk14,pk15,pk16),
+KEY pk(sk01,sk02,sk03,sk04,sk05,sk06,sk07,sk08,
+sk09,sk10,sk11,sk12,sk13,sk14,sk15,sk16))
+ROW_FORMAT=Redundant ENGINE=InnoDB;
+SET @r = repeat('a', 48);
+INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,
+@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r);
+SET @r = repeat('b', 48);
+INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,
+@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r);
+SET @r = repeat('c', 48);
+INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,
+@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r);
+SET @r = repeat('d', 48);
+INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,
+@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r);
+SET @r = repeat('e', 48);
+INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,
+@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r);
+DELETE from t1;
+DROP TABLE t1;
+CREATE TABLE t1(
+pk01 varchar(48), pk02 varchar(48), pk03 varchar(48), pk04 varchar(48),
+pk05 varchar(48), pk06 varchar(48), pk07 varchar(48), pk08 varchar(48),
+pk09 varchar(48), pk10 varchar(48), pk11 varchar(48), pk12 varchar(48),
+pk13 varchar(48), pk14 varchar(48), pk15 varchar(48), pk16 varchar(48),
+sk01 varchar(48), sk02 varchar(48), sk03 varchar(48), sk04 varchar(48),
+sk05 varchar(48), sk06 varchar(48), sk07 varchar(48), sk08 varchar(48),
+sk09 varchar(48), sk10 varchar(48), sk11 varchar(48), sk12 varchar(48),
+sk13 varchar(48), sk14 varchar(48), sk15 varchar(48), sk16 varchar(48),
+PRIMARY KEY pk(pk01,pk02,pk03,pk04,pk05,pk06,pk07,pk08,
+pk09,pk10,pk11,pk12,pk13,pk14,pk15,pk16),
+KEY pk(sk01,sk02,sk03,sk04,sk05,sk06,sk07,sk08,
+sk09,sk10,sk11,sk12,sk13,sk14,sk15,sk16))
+ROW_FORMAT=Compressed KEY_BLOCK_SIZE=4 ENGINE=InnoDB;
+SET @r = repeat('a', 48);
+INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,
+@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r);
+SET @r = repeat('b', 48);
+INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,
+@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r);
+SET @r = repeat('c', 48);
+INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,
+@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r);
+SET @r = repeat('d', 48);
+INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,
+@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r);
+SET @r = repeat('e', 48);
+INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,
+@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r);
+DELETE from t1;
+DROP TABLE t1;
+SET SESSION innodb_strict_mode = off;
+CREATE TABLE t1(
+c text NOT NULL, d text NOT NULL,
+PRIMARY KEY (c(767)))
+ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1 CHARSET=ASCII;
+Warnings:
+Warning	139	Row size too large (> max_row_size). Changing some columns to TEXT or BLOB may help. In current row format, BLOB prefix of 0 bytes is stored inline.
+DROP TABLE t1;
+CREATE TABLE t1(
+c text NOT NULL, d text NOT NULL,
+PRIMARY KEY (c(767)))
+ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=2 CHARSET=ASCII;
+drop table t1;
+CREATE TABLE t1(
+c text NOT NULL, d text NOT NULL,
+PRIMARY KEY (c(767)))
+ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4 CHARSET=ASCII;
+drop table t1;
+CREATE TABLE t1(c text, PRIMARY KEY (c(440)))
+ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1 CHARSET=ASCII;
+Warnings:
+Warning	139	Row size too large (> max_row_size). Changing some columns to TEXT or BLOB may help. In current row format, BLOB prefix of 0 bytes is stored inline.
+DROP TABLE t1;
+CREATE TABLE t1(c text, PRIMARY KEY (c(438)))
+ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1 CHARSET=ASCII;
+INSERT INTO t1 VALUES(REPEAT('A',512)),(REPEAT('B',512));
+DROP TABLE t1;
diff --git a/mysql-test/suite/innodb_zip/r/8k.result b/mysql-test/suite/innodb_zip/r/8k.result
new file mode 100644
index 00000000000..dc2b5ca1363
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/r/8k.result
@@ -0,0 +1,473 @@
+SET default_storage_engine=InnoDB;
+# Test 1) Show the page size from Information Schema
+SELECT variable_value FROM information_schema.global_status
+WHERE LOWER(variable_name) = 'innodb_page_size';
+variable_value
+8192
+# Test 2) The number of buffer pool pages is dependent upon the page size.
+SELECT variable_value FROM information_schema.global_status
+WHERE LOWER(variable_name) = 'innodb_buffer_pool_pages_total';
+variable_value
+{checked_valid}
+# Test 3) Query some information_shema tables that are dependent upon
+#         the page size.
+SELECT	t.name table_name, t.n_cols, t.flag table_flags,
+i.name index_name, i.page_no root_page, i.type,
+i.n_fields, i.merge_threshold
+FROM	INFORMATION_SCHEMA.INNODB_SYS_TABLES  t,
+INFORMATION_SCHEMA.INNODB_SYS_INDEXES i
+WHERE	t.table_id = i.table_id
+AND	t.name LIKE 'mysql%'
+	ORDER BY t.name, i.index_id;
+table_name	n_cols	table_flags	index_name	root_page	type	n_fields	merge_threshold
+mysql/engine_cost	9	33	PRIMARY	3	3	3	50
+mysql/gtid_executed	6	33	PRIMARY	3	3	2	50
+mysql/help_category	7	33	PRIMARY	3	3	1	50
+mysql/help_category	7	33	name	4	2	1	50
+mysql/help_keyword	5	33	PRIMARY	3	3	1	50
+mysql/help_keyword	5	33	name	4	2	1	50
+mysql/help_relation	5	33	PRIMARY	3	3	2	50
+mysql/help_topic	9	33	PRIMARY	3	3	1	50
+mysql/help_topic	9	33	name	4	2	1	50
+mysql/innodb_index_stats	11	33	PRIMARY	3	3	4	50
+mysql/innodb_table_stats	9	33	PRIMARY	3	3	2	50
+mysql/plugin	5	33	PRIMARY	3	3	1	50
+mysql/servers	12	33	PRIMARY	3	3	1	50
+mysql/server_cost	7	33	PRIMARY	3	3	1	50
+mysql/slave_master_info	28	33	PRIMARY	3	3	1	50
+mysql/slave_relay_log_info	12	33	PRIMARY	3	3	1	50
+mysql/slave_worker_info	16	33	PRIMARY	3	3	2	50
+mysql/time_zone	5	33	PRIMARY	3	3	1	50
+mysql/time_zone_leap_second	5	33	PRIMARY	3	3	1	50
+mysql/time_zone_name	5	33	PRIMARY	3	3	1	50
+mysql/time_zone_transition	6	33	PRIMARY	3	3	2	50
+mysql/time_zone_transition_type	8	33	PRIMARY	3	3	2	50
+CREATE TABLE t1 (a INT KEY, b TEXT) ROW_FORMAT=REDUNDANT ENGINE=innodb;
+CREATE TABLE t2 (a INT KEY, b TEXT) ROW_FORMAT=COMPACT ENGINE=innodb;
+CREATE TABLE t3 (a INT KEY, b TEXT) ROW_FORMAT=COMPRESSED ENGINE=innodb;
+CREATE TABLE t4 (a INT KEY, b TEXT) ROW_FORMAT=DYNAMIC ENGINE=innodb;
+SELECT	t.name table_name, t.n_cols, t.flag table_flags,
+i.name index_name, i.page_no root_page, i.type,
+i.n_fields, i.merge_threshold
+FROM	INFORMATION_SCHEMA.INNODB_SYS_TABLES  t,
+INFORMATION_SCHEMA.INNODB_SYS_INDEXES i
+WHERE	t.table_id = i.table_id
+AND	t.name LIKE 'test%'
+	ORDER BY t.name, i.name;
+table_name	n_cols	table_flags	index_name	root_page	type	n_fields	merge_threshold
+test/t1	5	0	PRIMARY	3	3	1	50
+test/t2	5	1	PRIMARY	3	3	1	50
+test/t3	5	39	PRIMARY	3	3	1	50
+test/t4	5	33	PRIMARY	3	3	1	50
+=== information_schema.innodb_sys_tablespaces and innodb_sys_datafiles ===
+Space_Name	Space_Type	Page_Size	Zip_Size	Formats_Permitted	Path
+test/t1	Single	DEFAULT	0	Compact or Redundant	MYSQLD_DATADIR/test/t1.ibd
+test/t2	Single	DEFAULT	0	Compact or Redundant	MYSQLD_DATADIR/test/t2.ibd
+test/t3	Single	DEFAULT	4096	Compressed	MYSQLD_DATADIR/test/t3.ibd
+test/t4	Single	DEFAULT	0	Dynamic	MYSQLD_DATADIR/test/t4.ibd
+=== information_schema.files ===
+Space_Name	File_Type	Engine	Status	Tablespace_Name	Path
+test/t1	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQLD_DATADIR/test/t1.ibd
+test/t2	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQLD_DATADIR/test/t2.ibd
+test/t3	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQLD_DATADIR/test/t3.ibd
+test/t4	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQLD_DATADIR/test/t4.ibd
+DROP TABLE t1, t2, t3, t4;
+# Test 4) The maximum row size is dependent upon the page size.
+#         Redundant: 4027, Compact: 4030.
+#         Compressed: 4030, Dynamic: 4030.
+#         Each row format has its own amount of overhead that
+#         varies depending on number of fields and other overhead.
+SET SESSION innodb_strict_mode = ON;
+CREATE TABLE t1 (
+c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200),
+c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200),
+c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200),
+c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(155)
+) ROW_FORMAT=redundant;
+DROP TABLE t1;
+CREATE TABLE t1 (
+c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200),
+c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200),
+c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200),
+c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(156)
+) ROW_FORMAT=redundant;
+ERROR 42000: Row size too large (> max_row_size). Changing some columns to TEXT or BLOB or using ROW_FORMAT=DYNAMIC or ROW_FORMAT=COMPRESSED may help. In current row format, BLOB prefix of 768 bytes is stored inline.
+CREATE TABLE t1 (
+c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200),
+c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200),
+c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200),
+c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(202)
+) ROW_FORMAT=compact;
+DROP TABLE t1;
+CREATE TABLE t1 (
+c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200),
+c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200),
+c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200),
+c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(203)
+) ROW_FORMAT=compact;
+ERROR 42000: Row size too large (> max_row_size). Changing some columns to TEXT or BLOB or using ROW_FORMAT=DYNAMIC or ROW_FORMAT=COMPRESSED may help. In current row format, BLOB prefix of 768 bytes is stored inline.
+CREATE TABLE t1 (
+c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200),
+c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200),
+c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200),
+c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(103)
+) ROW_FORMAT=compressed;
+DROP TABLE t1;
+CREATE TABLE t1 (
+c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200),
+c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200),
+c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200),
+c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(106)
+) ROW_FORMAT=compressed;
+ERROR 42000: Row size too large (> max_row_size). Changing some columns to TEXT or BLOB may help. In current row format, BLOB prefix of 0 bytes is stored inline.
+CREATE TABLE t1 (
+c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200),
+c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200),
+c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200),
+c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(202)
+) ROW_FORMAT=dynamic;
+DROP TABLE t1;
+CREATE TABLE t1 (
+c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200),
+c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200),
+c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200),
+c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(203)
+) ROW_FORMAT=dynamic;
+ERROR 42000: Row size too large (> max_row_size). Changing some columns to TEXT or BLOB may help. In current row format, BLOB prefix of 0 bytes is stored inline.
+CREATE TABLE t1 (a varchar(128) character set utf8,
+b varchar(128) character set utf8,
+c varchar(128) character set utf8,
+d varchar(128) character set utf8,
+PRIMARY KEY (a,b,c,d))
+ENGINE=innodb;
+DROP TABLE t1;
+CREATE TABLE t1 (a varchar(128) character set utf8,
+b varchar(128) character set utf8,
+c varchar(128) character set utf8,
+d varchar(129) character set utf8,
+PRIMARY KEY (a,b,c,d))
+ENGINE=innodb;
+ERROR 42000: Specified key was too long; max key length is 1536 bytes
+CREATE TABLE t1 (a varchar(128) character set utf8,
+b varchar(128) character set utf8,
+c varchar(128) character set utf8,
+d varchar(128) character set utf8,
+e varchar(128) character set utf8,
+PRIMARY KEY (a), KEY (b,c,d,e))
+ENGINE=innodb;
+DROP TABLE t1;
+CREATE TABLE t1 (a varchar(128) character set utf8,
+b varchar(128) character set utf8,
+c varchar(128) character set utf8,
+d varchar(128) character set utf8,
+e varchar(129) character set utf8,
+PRIMARY KEY (a), KEY (b,c,d,e))
+ENGINE=innodb;
+ERROR 42000: Specified key was too long; max key length is 1536 bytes
+# Test 5) Make sure that KEY_BLOCK_SIZE=8, 4, 2 & 1 are all
+#         accepted and that KEY_BLOCK_SIZE=16 is rejected in
+#         strict mode and converted to 8 in non-strict mode.
+SET SESSION innodb_strict_mode = ON;
+CREATE TABLE t1 (i int) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=16;
+ERROR HY000: Table storage engine for 't1' doesn't have this option
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1478	InnoDB: KEY_BLOCK_SIZE=16 cannot be larger than 8.
+Error	1031	Table storage engine for 't1' doesn't have this option
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT table_name, row_format, create_options
+FROM information_schema.tables WHERE table_name = 't1';
+table_name	row_format	create_options
+t1	Compressed	row_format=COMPRESSED KEY_BLOCK_SIZE=8
+ALTER TABLE t1 KEY_BLOCK_SIZE=4;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT table_name, row_format, create_options
+FROM information_schema.tables WHERE table_name = 't1';
+table_name	row_format	create_options
+t1	Compressed	row_format=COMPRESSED KEY_BLOCK_SIZE=4
+ALTER TABLE t1 KEY_BLOCK_SIZE=2;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT table_name, row_format, create_options
+FROM information_schema.tables WHERE table_name = 't1';
+table_name	row_format	create_options
+t1	Compressed	row_format=COMPRESSED KEY_BLOCK_SIZE=2
+ALTER TABLE t1 KEY_BLOCK_SIZE=1;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT table_name, row_format, create_options
+FROM information_schema.tables WHERE table_name = 't1';
+table_name	row_format	create_options
+t1	Compressed	row_format=COMPRESSED KEY_BLOCK_SIZE=1
+ALTER TABLE t1 KEY_BLOCK_SIZE=0;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT table_name, row_format, create_options
+FROM information_schema.tables WHERE table_name = 't1';
+table_name	row_format	create_options
+t1	Compressed	row_format=COMPRESSED
+DROP TABLE t1;
+SET SESSION innodb_strict_mode = OFF;
+CREATE TABLE t1 (i int) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=16;
+Warnings:
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=16.
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=16.
+SELECT table_name, row_format, create_options
+FROM information_schema.tables WHERE table_name = 't1';
+table_name	row_format	create_options
+t1	Compressed	row_format=COMPRESSED KEY_BLOCK_SIZE=16
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT table_name, row_format, create_options
+FROM information_schema.tables WHERE table_name = 't1';
+table_name	row_format	create_options
+t1	Compressed	row_format=COMPRESSED KEY_BLOCK_SIZE=8
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT table_name, row_format, create_options
+FROM information_schema.tables WHERE table_name = 't1';
+table_name	row_format	create_options
+t1	Compressed	row_format=COMPRESSED KEY_BLOCK_SIZE=4
+ALTER TABLE t1 KEY_BLOCK_SIZE=2;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT table_name, row_format, create_options
+FROM information_schema.tables WHERE table_name = 't1';
+table_name	row_format	create_options
+t1	Compressed	row_format=COMPRESSED KEY_BLOCK_SIZE=2
+ALTER TABLE t1 KEY_BLOCK_SIZE=1;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT table_name, row_format, create_options
+FROM information_schema.tables WHERE table_name = 't1';
+table_name	row_format	create_options
+t1	Compressed	row_format=COMPRESSED KEY_BLOCK_SIZE=1
+ALTER TABLE t1 KEY_BLOCK_SIZE=0;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT table_name, row_format, create_options
+FROM information_schema.tables WHERE table_name = 't1';
+table_name	row_format	create_options
+t1	Compressed	row_format=COMPRESSED
+DROP TABLE t1;
+# Test 6) Make sure that KEY_BLOCK_SIZE = 8 and 16
+# are rejected when innodb_file_per_table=OFF
+SET SESSION innodb_strict_mode = ON;
+SET GLOBAL innodb_file_per_table = OFF;
+SHOW VARIABLES LIKE 'innodb_file_per_table';
+Variable_name	Value
+innodb_file_per_table	OFF
+CREATE TABLE t4 (id int PRIMARY KEY) ENGINE=innodb KEY_BLOCK_SIZE=8;
+ERROR HY000: Table storage engine for 't4' doesn't have this option
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_per_table.
+Error	1031	Table storage engine for 't4' doesn't have this option
+CREATE TABLE t5 (id int PRIMARY KEY) ENGINE=innodb KEY_BLOCK_SIZE=16;
+ERROR HY000: Table storage engine for 't5' doesn't have this option
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1478	InnoDB: KEY_BLOCK_SIZE=16 cannot be larger than 8.
+Warning	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_per_table.
+Error	1031	Table storage engine for 't5' doesn't have this option
+SET GLOBAL innodb_file_per_table = ON;
+SET GLOBAL innodb_file_format = `Antelope`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+CREATE TABLE t4 (id int PRIMARY KEY) ENGINE=innodb KEY_BLOCK_SIZE=8;
+ERROR HY000: Table storage engine for 't4' doesn't have this option
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_format > Antelope.
+Error	1031	Table storage engine for 't4' doesn't have this option
+CREATE TABLE t5 (id int PRIMARY KEY) ENGINE=innodb KEY_BLOCK_SIZE=16;
+ERROR HY000: Table storage engine for 't5' doesn't have this option
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1478	InnoDB: KEY_BLOCK_SIZE=16 cannot be larger than 8.
+Warning	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_format > Antelope.
+Error	1031	Table storage engine for 't5' doesn't have this option
+SET GLOBAL innodb_file_format = `Barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+# Test 7) Not included here; 16k only
+# Test 8) Test creating a table that could lead to undo log overflow.
+CREATE TABLE t1(a blob,b blob,c blob,d blob,e blob,f blob,g blob,
+h blob,i blob,j blob,k blob,l blob,m blob,n blob,
+o blob,p blob,q blob,r blob,s blob,t blob,u blob)
+ENGINE=InnoDB ROW_FORMAT=dynamic;
+SET @a = repeat('a', 767);
+SET @b = repeat('b', 767);
+SET @c = repeat('c', 767);
+SET @d = repeat('d', 767);
+SET @e = repeat('e', 767);
+INSERT INTO t1 VALUES (@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a);
+UPDATE t1 SET a=@b,b=@b,c=@b,d=@b,e=@b,f=@b,g=@b,h=@b,i=@b,j=@b,
+k=@b,l=@b,m=@b,n=@b,o=@b,p=@b,q=@b,r=@b,s=@b,t=@b,u=@b;
+CREATE INDEX t1a ON t1 (a(767));
+CREATE INDEX t1b ON t1 (b(767));
+UPDATE t1 SET a=@c,b=@c,c=@c,d=@c,e=@c,f=@c,g=@c,h=@c,i=@c,j=@c,
+k=@c,l=@c,m=@c,n=@c,o=@c,p=@c,q=@c,r=@c,s=@c,t=@c,u=@c;
+CREATE INDEX t1c ON t1 (c(767));
+UPDATE t1 SET a=@d,b=@d,c=@d,d=@d,e=@d,f=@d,g=@d,h=@d,i=@d,j=@d,
+k=@d,l=@d,m=@d,n=@d,o=@d,p=@d,q=@d,r=@d,s=@d,t=@d,u=@d;
+ERROR HY000: Undo log record is too big.
+BEGIN;
+UPDATE t1 SET a=@d,b=@d,c=@d,d=@d,e=@d;
+UPDATE t1 SET f=@d,g=@d,h=@d,i=@d,j=@d,k=@d,l=@d,m=@d,
+n=@d,o=@d,p=@d,q=@d,r=@d,s=@d,t=@d,u=@d;
+COMMIT;
+CREATE INDEX t1d ON t1 (d(767));
+UPDATE t1 SET d=@e;
+CREATE INDEX t1e ON t1 (e(767));
+UPDATE t1 SET e=@e;
+CREATE INDEX t1f ON t1 (f(767));
+UPDATE t1 SET f=@e;
+CREATE INDEX t1g ON t1 (g(767));
+UPDATE t1 SET g=@e;
+CREATE INDEX t1h ON t1 (h(767));
+UPDATE t1 SET h=@e;
+CREATE INDEX t1i ON t1 (i(767));
+UPDATE t1 SET i=@e;
+CREATE INDEX t1k ON t1 (j(767));
+CREATE INDEX t1j ON t1 (j(500));
+UPDATE t1 SET j=@e;
+ERROR HY000: Undo log record is too big.
+SHOW CREATE TABLE t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` blob,
+  `b` blob,
+  `c` blob,
+  `d` blob,
+  `e` blob,
+  `f` blob,
+  `g` blob,
+  `h` blob,
+  `i` blob,
+  `j` blob,
+  `k` blob,
+  `l` blob,
+  `m` blob,
+  `n` blob,
+  `o` blob,
+  `p` blob,
+  `q` blob,
+  `r` blob,
+  `s` blob,
+  `t` blob,
+  `u` blob,
+  KEY `t1a` (`a`(767)),
+  KEY `t1b` (`b`(767)),
+  KEY `t1c` (`c`(767)),
+  KEY `t1d` (`d`(767)),
+  KEY `t1e` (`e`(767)),
+  KEY `t1f` (`f`(767)),
+  KEY `t1g` (`g`(767)),
+  KEY `t1h` (`h`(767)),
+  KEY `t1i` (`i`(767)),
+  KEY `t1k` (`j`(767)),
+  KEY `t1j` (`j`(500))
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC
+DROP TABLE t1;
+SET SESSION innodb_strict_mode = OFF;
+CREATE TABLE t1(
+pk01 varchar(96), pk02 varchar(96), pk03 varchar(96), pk04 varchar(96),
+pk05 varchar(96), pk06 varchar(96), pk07 varchar(96), pk08 varchar(96),
+pk09 varchar(96), pk10 varchar(96), pk11 varchar(96), pk12 varchar(96),
+pk13 varchar(96), pk14 varchar(96), pk15 varchar(96), pk16 varchar(96),
+sk01 varchar(96), sk02 varchar(96), sk03 varchar(96), sk04 varchar(96),
+sk05 varchar(96), sk06 varchar(96), sk07 varchar(96), sk08 varchar(96),
+sk09 varchar(96), sk10 varchar(96), sk11 varchar(96), sk12 varchar(96),
+sk13 varchar(96), sk14 varchar(96), sk15 varchar(96), sk16 varchar(96),
+PRIMARY KEY pk(pk01,pk02,pk03,pk04,pk05,pk06,pk07,pk08,
+pk09,pk10,pk11,pk12,pk13,pk14,pk15,pk16),
+KEY pk(sk01,sk02,sk03,sk04,sk05,sk06,sk07,sk08,
+sk09,sk10,sk11,sk12,sk13,sk14,sk15,sk16))
+ROW_FORMAT=Redundant ENGINE=InnoDB;
+SET @r = repeat('a', 96);
+INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,
+@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r);
+SET @r = repeat('b', 96);
+INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,
+@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r);
+SET @r = repeat('c', 96);
+INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,
+@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r);
+SET @r = repeat('d', 96);
+INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,
+@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r);
+SET @r = repeat('e', 96);
+INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,
+@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r);
+DELETE from t1;
+DROP TABLE t1;
+CREATE TABLE t1(
+pk01 varchar(96), pk02 varchar(96), pk03 varchar(96), pk04 varchar(96),
+pk05 varchar(96), pk06 varchar(96), pk07 varchar(96), pk08 varchar(96),
+pk09 varchar(96), pk10 varchar(96), pk11 varchar(96), pk12 varchar(96),
+pk13 varchar(96), pk14 varchar(96), pk15 varchar(96), pk16 varchar(96),
+sk01 varchar(96), sk02 varchar(96), sk03 varchar(96), sk04 varchar(96),
+sk05 varchar(96), sk06 varchar(96), sk07 varchar(96), sk08 varchar(96),
+sk09 varchar(96), sk10 varchar(96), sk11 varchar(96), sk12 varchar(96),
+sk13 varchar(96), sk14 varchar(96), sk15 varchar(96), sk16 varchar(96),
+PRIMARY KEY pk(pk01,pk02,pk03,pk04,pk05,pk06,pk07,pk08,
+pk09,pk10,pk11,pk12,pk13,pk14,pk15,pk16),
+KEY pk(sk01,sk02,sk03,sk04,sk05,sk06,sk07,sk08,
+sk09,sk10,sk11,sk12,sk13,sk14,sk15,sk16))
+ROW_FORMAT=Compressed KEY_BLOCK_SIZE=8 ENGINE=InnoDB;
+SET @r = repeat('a', 96);
+INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,
+@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r);
+SET @r = repeat('b', 96);
+INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,
+@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r);
+SET @r = repeat('c', 96);
+INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,
+@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r);
+SET @r = repeat('d', 96);
+INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,
+@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r);
+SET @r = repeat('e', 96);
+INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,
+@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r);
+DELETE from t1;
+DROP TABLE t1;
+SET SESSION innodb_strict_mode = off;
+CREATE TABLE t1(
+c text NOT NULL, d text NOT NULL,
+PRIMARY KEY (c(767),d(767)))
+ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1 CHARSET=ASCII;
+Warnings:
+Warning	139	Row size too large (> max_row_size). Changing some columns to TEXT or BLOB may help. In current row format, BLOB prefix of 0 bytes is stored inline.
+DROP TABLE t1;
+CREATE TABLE t1(
+c text NOT NULL, d text NOT NULL,
+PRIMARY KEY (c(767),d(767)))
+ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=2 CHARSET=ASCII;
+Warnings:
+Warning	139	Row size too large (> max_row_size). Changing some columns to TEXT or BLOB may help. In current row format, BLOB prefix of 0 bytes is stored inline.
+DROP TABLE t1;
+CREATE TABLE t1(
+c text NOT NULL, d text NOT NULL,
+PRIMARY KEY (c(767),d(767)))
+ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4 CHARSET=ASCII;
+drop table t1;
+CREATE TABLE t1(c text, PRIMARY KEY (c(440)))
+ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1 CHARSET=ASCII;
+Warnings:
+Warning	139	Row size too large (> max_row_size). Changing some columns to TEXT or BLOB may help. In current row format, BLOB prefix of 0 bytes is stored inline.
+DROP TABLE t1;
+CREATE TABLE t1(c text, PRIMARY KEY (c(438)))
+ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1 CHARSET=ASCII;
+INSERT INTO t1 VALUES(REPEAT('A',512)),(REPEAT('B',512));
+DROP TABLE t1;
diff --git a/mysql-test/suite/innodb_zip/r/bug36169.result b/mysql-test/suite/innodb_zip/r/bug36169.result
new file mode 100644
index 00000000000..7e165e0f7d4
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/r/bug36169.result
@@ -0,0 +1 @@
+SET GLOBAL innodb_file_per_table=ON;
diff --git a/mysql-test/suite/innodb_zip/r/bug36172.result b/mysql-test/suite/innodb_zip/r/bug36172.result
new file mode 100644
index 00000000000..23c5b0cc2f7
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/r/bug36172.result
@@ -0,0 +1 @@
+SET default_storage_engine=InnoDB;
diff --git a/mysql-test/suite/innodb_zip/r/bug52745.result b/mysql-test/suite/innodb_zip/r/bug52745.result
new file mode 100644
index 00000000000..20605eb274d
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/r/bug52745.result
@@ -0,0 +1,129 @@
+SET GLOBAL innodb_file_per_table=on;
+SET sql_mode = 'NO_ENGINE_SUBSTITUTION';
+CREATE TABLE bug52745 (
+a2 int(10) unsigned DEFAULT NULL,
+col37 time DEFAULT NULL,
+col38 char(229) CHARACTER SET utf8 DEFAULT NULL,
+col39 text,
+col40 timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
+col41 int(10) unsigned DEFAULT NULL,
+col42 varchar(248) CHARACTER SET utf8 DEFAULT NULL,
+col43 smallint(5) unsigned zerofill DEFAULT NULL,
+col44 varchar(150) CHARACTER SET utf8 DEFAULT NULL,
+col45 float unsigned zerofill DEFAULT NULL,
+col46 binary(1) DEFAULT NULL,
+col47 tinyint(4) DEFAULT NULL,
+col48 tinyint(1) DEFAULT NULL,
+col49 timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',
+col50 binary(1) DEFAULT NULL,
+col51 double unsigned zerofill DEFAULT NULL,
+col52 int(10) unsigned DEFAULT NULL,
+col53 time DEFAULT NULL,
+col54 double unsigned DEFAULT NULL,
+col55 time DEFAULT NULL,
+col56 mediumtext CHARACTER SET latin2,
+col57 blob,
+col58 decimal(52,16) unsigned zerofill NOT NULL DEFAULT '000000000000000000000000000000000000.0000000000000000',
+col59 binary(1) DEFAULT NULL,
+col60 longblob,
+col61 time DEFAULT NULL,
+col62 longtext CHARACTER SET utf8 COLLATE utf8_persian_ci,
+col63 timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',
+col64 int(10) unsigned DEFAULT NULL,
+col65 date DEFAULT NULL,
+col66 timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',
+col67 binary(1) DEFAULT NULL,
+col68 tinyblob,
+col69 date DEFAULT NULL,
+col70 tinyint(3) unsigned zerofill DEFAULT NULL,
+col71 varchar(44) CHARACTER SET utf8 DEFAULT NULL,
+col72 datetime DEFAULT NULL,
+col73 smallint(5) unsigned zerofill DEFAULT NULL,
+col74 longblob,
+col75 bit(34) DEFAULT NULL,
+col76 float unsigned zerofill DEFAULT NULL,
+col77 year(4) DEFAULT NULL,
+col78 tinyint(3) unsigned DEFAULT NULL,
+col79 set('msfheowh','tbpxbgf','by','wahnrjw','myqfasxz','rsokyumrt') CHARACTER SET latin2 DEFAULT NULL,
+col80 datetime DEFAULT NULL,
+col81 smallint(6) DEFAULT NULL,
+col82 enum('xtaurnqfqz','rifrse','kuzwpbvb','niisabk','zxavro','rbvasv','','uulrfaove','','') DEFAULT NULL,
+col83 bigint(20) unsigned zerofill DEFAULT NULL,
+col84 float unsigned zerofill DEFAULT NULL,
+col85 double DEFAULT NULL,
+col86 enum('ylannv','','vlkhycqc','snke','cxifustp','xiaxaswzp','oxl') CHARACTER SET latin1 COLLATE latin1_german2_ci DEFAULT NULL,
+col87 varbinary(221) DEFAULT NULL,
+col88 double unsigned DEFAULT NULL,
+col89 float unsigned zerofill DEFAULT NULL,
+col90 tinyblob
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1;
+Warnings:
+Note	1291	Column 'col82' has duplicated value '' in ENUM
+Note	1291	Column 'col82' has duplicated value '' in ENUM
+SET sql_mode = default;
+INSERT IGNORE INTO bug52745 SET
+col40='0000-00-00 00:00:00',
+col51=16547,
+col53='7711484',
+col54=-28604,
+col55='7112612',
+col56='wakefulness\'',
+col57=repeat('absorbefacient\'',106),
+col58=11027,
+col59='AM09gW7',
+col60=repeat('Noelani\'',16),
+col61='2520576',
+col62='substitutiv',
+col63='19950106155112',
+col64=-12038,
+col65='86238806',
+col66='19600719080256',
+col68=repeat('Sagittarius\'',54),
+col69='38943902',
+col70=1232,
+col71='Elora\'',
+col74=repeat('zipp',11),
+col75='0',
+col76=23254,
+col78=13247,
+col79='56219',
+col80='20500609035724',
+col81=11632,
+col82=7,
+col84=-23863,
+col85=6341,
+col87='HZdkf.4 s7t,5Rmq	8so fmr,ruGLUG25TrtI.yQ	2SuHq0ML7rw7.4 b2yf2E5TJxOtBBZImezDnzpj,uPYfznnEUDN1e9aQoO 2DsplB7TFWy	oQJ br  HLF :F,eQ p4i1oWsr lL3PG,hjCz6hYqN h1QTjLCjrv:QCdSzpYBibJAtZCxLOk3l6Blsh.W',
+col88=16894,
+col89=6161,
+col90=repeat('gale',48);
+Warnings:
+Warning	1265	Data truncated for column 'col53' at row 1
+Warning	1264	Out of range value for column 'col54' at row 1
+Warning	1265	Data truncated for column 'col59' at row 1
+Warning	1265	Data truncated for column 'col61' at row 1
+Warning	1264	Out of range value for column 'col64' at row 1
+Warning	1265	Data truncated for column 'col65' at row 1
+Warning	1264	Out of range value for column 'col66' at row 1
+Warning	1265	Data truncated for column 'col68' at row 1
+Warning	1265	Data truncated for column 'col69' at row 1
+Warning	1264	Out of range value for column 'col70' at row 1
+Warning	1264	Out of range value for column 'col78' at row 1
+Warning	1265	Data truncated for column 'col79' at row 1
+Warning	1264	Out of range value for column 'col84' at row 1
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1265	Data truncated for column 'col53' at row 1
+Warning	1264	Out of range value for column 'col54' at row 1
+Warning	1265	Data truncated for column 'col59' at row 1
+Warning	1265	Data truncated for column 'col61' at row 1
+Warning	1264	Out of range value for column 'col64' at row 1
+Warning	1265	Data truncated for column 'col65' at row 1
+Warning	1264	Out of range value for column 'col66' at row 1
+Warning	1265	Data truncated for column 'col68' at row 1
+Warning	1265	Data truncated for column 'col69' at row 1
+Warning	1264	Out of range value for column 'col70' at row 1
+Warning	1264	Out of range value for column 'col78' at row 1
+Warning	1265	Data truncated for column 'col79' at row 1
+Warning	1264	Out of range value for column 'col84' at row 1
+DROP TABLE bug52745;
+SET GLOBAL innodb_file_per_table=1;
diff --git a/mysql-test/suite/innodb_zip/r/bug53591.result b/mysql-test/suite/innodb_zip/r/bug53591.result
new file mode 100644
index 00000000000..e14a1942750
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/r/bug53591.result
@@ -0,0 +1,13 @@
+SET GLOBAL innodb_file_per_table=on;
+SET GLOBAL innodb_strict_mode=on;
+set old_alter_table=0;
+CREATE TABLE bug53591(a text charset utf8 not null)
+ENGINE=InnoDB KEY_BLOCK_SIZE=1;
+ALTER TABLE bug53591 ADD PRIMARY KEY(a(220));
+ERROR 42000: Row size too large. The maximum row size for the used table type, not counting BLOBs, is {checked_valid}. This includes storage overhead, check the manual. You have to change some columns to TEXT or BLOBs
+SHOW WARNINGS;
+Level	Code	Message
+Error	1118	Row size too large. The maximum row size for the used table type, not counting BLOBs, is {checked_valid}. This includes storage overhead, check the manual. You have to change some columns to TEXT or BLOBs
+DROP TABLE bug53591;
+SET GLOBAL innodb_file_per_table=1;
+SET GLOBAL innodb_strict_mode=DEFAULT;
diff --git a/mysql-test/suite/innodb_zip/r/bug56680.result b/mysql-test/suite/innodb_zip/r/bug56680.result
new file mode 100644
index 00000000000..40660f435fb
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/r/bug56680.result
@@ -0,0 +1,120 @@
+SET GLOBAL tx_isolation='REPEATABLE-READ';
+SET GLOBAL innodb_file_per_table=on;
+CREATE TABLE bug56680(
+a INT AUTO_INCREMENT PRIMARY KEY,
+b CHAR(1),
+c INT,
+INDEX(b))
+ENGINE=InnoDB STATS_PERSISTENT=0;
+INSERT INTO bug56680 VALUES(0,'x',1);
+BEGIN;
+SELECT b FROM bug56680;
+b
+x
+connect  con1,localhost,root,,;
+connection con1;
+BEGIN;
+UPDATE bug56680 SET b='X';
+connection default;
+SELECT b FROM bug56680;
+b
+x
+SELECT * FROM bug56680;
+a	b	c
+1	x	1
+connection con1;
+ROLLBACK;
+disconnect con1;
+connection default;
+SELECT b FROM bug56680;
+b
+x
+SET GLOBAL tx_isolation='READ-UNCOMMITTED';
+INSERT INTO bug56680 SELECT 0,b,c FROM bug56680;
+INSERT INTO bug56680 SELECT 0,b,c FROM bug56680;
+INSERT INTO bug56680 SELECT 0,b,c FROM bug56680;
+INSERT INTO bug56680 SELECT 0,b,c FROM bug56680;
+INSERT INTO bug56680 SELECT 0,b,c FROM bug56680;
+INSERT INTO bug56680 SELECT 0,b,c FROM bug56680;
+INSERT INTO bug56680 SELECT 0,b,c FROM bug56680;
+INSERT INTO bug56680 SELECT 0,b,c FROM bug56680;
+INSERT INTO bug56680 SELECT 0,b,c FROM bug56680;
+INSERT INTO bug56680 SELECT 0,b,c FROM bug56680;
+INSERT INTO bug56680 SELECT 0,b,c FROM bug56680;
+BEGIN;
+SELECT b FROM bug56680 LIMIT 2;
+b
+x
+x
+connect  con1,localhost,root,,;
+connection con1;
+BEGIN;
+DELETE FROM bug56680 WHERE a=1;
+INSERT INTO bug56680 VALUES(1,'X',1);
+SELECT b FROM bug56680 LIMIT 3;
+b
+X
+x
+x
+connection default;
+SELECT b FROM bug56680 LIMIT 2;
+b
+x
+x
+CHECK TABLE bug56680;
+Table	Op	Msg_type	Msg_text
+test.bug56680	check	status	OK
+connection con1;
+ROLLBACK;
+SELECT b FROM bug56680 LIMIT 2;
+b
+x
+x
+CHECK TABLE bug56680;
+Table	Op	Msg_type	Msg_text
+test.bug56680	check	status	OK
+connection default;
+disconnect con1;
+SELECT b FROM bug56680 LIMIT 2;
+b
+x
+x
+CREATE TABLE bug56680_2(
+a INT AUTO_INCREMENT PRIMARY KEY,
+b VARCHAR(2) CHARSET latin1 COLLATE latin1_german2_ci,
+c INT,
+INDEX(b))
+ENGINE=InnoDB STATS_PERSISTENT=0;
+INSERT INTO bug56680_2 SELECT 0,_latin1 0xdf,c FROM bug56680;
+BEGIN;
+SELECT HEX(b) FROM bug56680_2 LIMIT 2;
+HEX(b)
+DF
+DF
+DELETE FROM bug56680_2 WHERE a=1;
+INSERT INTO bug56680_2 VALUES(1,'SS',1);
+SELECT HEX(b) FROM bug56680_2 LIMIT 3;
+HEX(b)
+5353
+DF
+DF
+CHECK TABLE bug56680_2;
+Table	Op	Msg_type	Msg_text
+test.bug56680_2	check	status	OK
+ALTER TABLE bug56680_2 ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1;
+SELECT HEX(b) FROM bug56680_2 LIMIT 2;
+HEX(b)
+5353
+DF
+DELETE FROM bug56680_2 WHERE a=1;
+INSERT INTO bug56680_2 VALUES(1,_latin1 0xdf,1);
+SELECT HEX(b) FROM bug56680_2 LIMIT 3;
+HEX(b)
+DF
+DF
+DF
+CHECK TABLE bug56680_2;
+Table	Op	Msg_type	Msg_text
+test.bug56680_2	check	status	OK
+DROP TABLE bug56680_2;
+DROP TABLE bug56680;
diff --git a/mysql-test/suite/innodb_zip/r/cmp_drop_table.result b/mysql-test/suite/innodb_zip/r/cmp_drop_table.result
new file mode 100644
index 00000000000..c1743cac2e1
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/r/cmp_drop_table.result
@@ -0,0 +1,13 @@
+set global innodb_file_per_table=on;
+create table t1(a text) engine=innodb key_block_size=8;
+SELECT page_size FROM information_schema.innodb_cmpmem WHERE pages_used > 0;
+page_size
+8192
+drop table t1;
+SELECT page_size FROM information_schema.innodb_cmpmem WHERE pages_used > 0;
+page_size
+8192
+create table t2(a text) engine=innodb;
+SELECT page_size FROM information_schema.innodb_cmpmem WHERE pages_used > 0;
+page_size
+drop table t2;
diff --git a/mysql-test/suite/innodb_zip/r/cmp_per_index.result b/mysql-test/suite/innodb_zip/r/cmp_per_index.result
new file mode 100644
index 00000000000..5b899e9ff71
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/r/cmp_per_index.result
@@ -0,0 +1,94 @@
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+SELECT * FROM information_schema.innodb_cmp_per_index;
+CREATE TABLE t (
+a INT,
+b VARCHAR(512),
+c VARCHAR(16),
+PRIMARY KEY (a),
+INDEX (b(512)),
+INDEX (c(16))
+) ENGINE=INNODB KEY_BLOCK_SIZE=2;
+SELECT
+database_name,
+table_name,
+index_name,
+compress_ops,
+compress_ops_ok,
+uncompress_ops
+FROM information_schema.innodb_cmp_per_index
+ORDER BY 1, 2, 3;
+database_name	test
+table_name	t
+index_name	b
+compress_ops	1
+compress_ops_ok	1
+uncompress_ops	0
+database_name	test
+table_name	t
+index_name	c
+compress_ops	1
+compress_ops_ok	1
+uncompress_ops	0
+database_name	test
+table_name	t
+index_name	PRIMARY
+compress_ops	1
+compress_ops_ok	1
+uncompress_ops	0
+BEGIN;
+COMMIT;
+ALTER TABLE t DROP INDEX c;
+GRANT USAGE ON *.* TO 'tuser01'@'localhost' IDENTIFIED BY 'cDJvI9s_Uq';
+Warnings:
+Level	Warning
+Code	1287
+Message	Using GRANT for creating new user is deprecated and will be removed in future release. Create new user with CREATE USER statement.
+FLUSH PRIVILEGES;
+SELECT * FROM information_schema.innodb_cmp_per_index;
+ERROR 42000: Access denied; you need (at least one of) the PROCESS privilege(s) for this operation
+DROP USER 'tuser01'@'localhost';
+SELECT
+database_name,
+table_name,
+index_name,
+CASE WHEN compress_ops=47 and @@innodb_compression_level IN (4,8,9) THEN 65
+ELSE compress_ops END as compress_ops,
+CASE WHEN compress_ops_ok=47 and @@innodb_compression_level IN (4,8,9) THEN 65
+ELSE compress_ops_ok END as compress_ops_ok,
+uncompress_ops
+FROM information_schema.innodb_cmp_per_index
+ORDER BY 1, 2, 3;
+database_name	test
+table_name	t
+index_name	b
+compress_ops	43
+compress_ops_ok	43
+uncompress_ops	0
+database_name	test
+table_name	t
+index_name	PRIMARY
+compress_ops	65
+compress_ops_ok	65
+uncompress_ops	0
+# restart
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+SELECT COUNT(*) FROM t;
+COUNT(*)	128
+SELECT
+database_name,
+table_name,
+index_name,
+compress_ops,
+compress_ops_ok,
+CASE WHEN uncompress_ops=6 and @@innodb_compression_level IN (4,8,9) THEN 9
+ELSE uncompress_ops END as uncompress_ops
+FROM information_schema.innodb_cmp_per_index
+ORDER BY 1, 2, 3;
+database_name	test
+table_name	t
+index_name	PRIMARY
+compress_ops	0
+compress_ops_ok	0
+uncompress_ops	9
+DROP TABLE t;
+SET GLOBAL innodb_cmp_per_index_enabled=default;
diff --git a/mysql-test/suite/innodb_zip/r/create_options.result b/mysql-test/suite/innodb_zip/r/create_options.result
new file mode 100644
index 00000000000..2d80894c8cd
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/r/create_options.result
@@ -0,0 +1,839 @@
+SET default_storage_engine=InnoDB;
+SET GLOBAL innodb_file_per_table=ON;
+SET SESSION innodb_strict_mode = ON;
+# Test 1) StrictMode=ON, CREATE and ALTER with each ROW_FORMAT & KEY_BLOCK_SIZE=0
+#         KEY_BLOCK_SIZE=0 means 'no KEY_BLOCK_SIZE is specified'
+#         'FIXED' is sent to InnoDB since it is used by MyISAM.
+#         But it is an invalid mode in InnoDB
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=FIXED;
+Got one of the listed errors
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1478	InnoDB: invalid ROW_FORMAT specifier.
+Error	1005	Can't create table `test`.`t1` (errno: 140 "Wrong create options")
+Warning	1030	Got error 140 "Wrong create options" from storage engine InnoDB
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=0;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Compressed	row_format=COMPRESSED
+ALTER TABLE t1 ROW_FORMAT=COMPACT KEY_BLOCK_SIZE=0;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Compact	row_format=COMPACT
+ALTER TABLE t1 ROW_FORMAT=DYNAMIC KEY_BLOCK_SIZE=0;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Dynamic	row_format=DYNAMIC
+ALTER TABLE t1 ROW_FORMAT=REDUNDANT KEY_BLOCK_SIZE=0;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Redundant	row_format=REDUNDANT
+ALTER TABLE t1 ROW_FORMAT=DEFAULT KEY_BLOCK_SIZE=0;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Dynamic	
+ALTER TABLE t1 ROW_FORMAT=FIXED KEY_BLOCK_SIZE=0;
+ERROR HY000: Table storage engine 'InnoDB' does not support the create option 'ROW_TYPE'
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1478	InnoDB: invalid ROW_FORMAT specifier.
+Error	1478	Table storage engine 'InnoDB' does not support the create option 'ROW_TYPE'
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Dynamic	
+# Test 2) StrictMode=ON, CREATE with each ROW_FORMAT & a valid non-zero KEY_BLOCK_SIZE
+#         KEY_BLOCK_SIZE is incompatible with COMPACT, REDUNDANT, & DYNAMIC
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPACT KEY_BLOCK_SIZE=1;
+Got one of the listed errors
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1478	InnoDB: cannot specify ROW_FORMAT = COMPACT with KEY_BLOCK_SIZE.
+Error	1005	Can't create table `test`.`t1` (errno: 140 "Wrong create options")
+Warning	1030	Got error 140 "Wrong create options" from storage engine InnoDB
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=REDUNDANT KEY_BLOCK_SIZE=2;
+Got one of the listed errors
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1478	InnoDB: cannot specify ROW_FORMAT = REDUNDANT with KEY_BLOCK_SIZE.
+Error	1005	Can't create table `test`.`t1` (errno: 140 "Wrong create options")
+Warning	1030	Got error 140 "Wrong create options" from storage engine InnoDB
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=DYNAMIC KEY_BLOCK_SIZE=4;
+Got one of the listed errors
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1478	InnoDB: cannot specify ROW_FORMAT = DYNAMIC with KEY_BLOCK_SIZE.
+Error	1005	Can't create table `test`.`t1` (errno: 140 "Wrong create options")
+Warning	1030	Got error 140 "Wrong create options" from storage engine InnoDB
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=2;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Compressed	row_format=COMPRESSED key_block_size=2
+ALTER TABLE t1 ADD COLUMN f1 INT;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Compressed	row_format=COMPRESSED key_block_size=2
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=DEFAULT KEY_BLOCK_SIZE=1;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Compressed	key_block_size=1
+ALTER TABLE t1 ADD COLUMN f1 INT;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Compressed	key_block_size=1
+# Test 3) StrictMode=ON, ALTER with each ROW_FORMAT & a valid non-zero KEY_BLOCK_SIZE
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT );
+ALTER TABLE t1 ROW_FORMAT=FIXED KEY_BLOCK_SIZE=1;
+ERROR HY000: Table storage engine 'InnoDB' does not support the create option 'ROW_TYPE'
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1478	InnoDB: invalid ROW_FORMAT specifier.
+Error	1478	Table storage engine 'InnoDB' does not support the create option 'ROW_TYPE'
+ALTER TABLE t1 ROW_FORMAT=COMPACT KEY_BLOCK_SIZE=2;
+ERROR HY000: Table storage engine 'InnoDB' does not support the create option 'KEY_BLOCK_SIZE'
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1478	InnoDB: cannot specify ROW_FORMAT = COMPACT with KEY_BLOCK_SIZE.
+Error	1478	Table storage engine 'InnoDB' does not support the create option 'KEY_BLOCK_SIZE'
+ALTER TABLE t1 ROW_FORMAT=DYNAMIC KEY_BLOCK_SIZE=4;
+ERROR HY000: Table storage engine 'InnoDB' does not support the create option 'KEY_BLOCK_SIZE'
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1478	InnoDB: cannot specify ROW_FORMAT = DYNAMIC with KEY_BLOCK_SIZE.
+Error	1478	Table storage engine 'InnoDB' does not support the create option 'KEY_BLOCK_SIZE'
+ALTER TABLE t1 ROW_FORMAT=REDUNDANT KEY_BLOCK_SIZE=2;
+ERROR HY000: Table storage engine 'InnoDB' does not support the create option 'KEY_BLOCK_SIZE'
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1478	InnoDB: cannot specify ROW_FORMAT = REDUNDANT with KEY_BLOCK_SIZE.
+Error	1478	Table storage engine 'InnoDB' does not support the create option 'KEY_BLOCK_SIZE'
+ALTER TABLE t1 ROW_FORMAT=DEFAULT KEY_BLOCK_SIZE=1;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Compressed	key_block_size=1
+ALTER TABLE t1 ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Compressed	row_format=COMPRESSED key_block_size=1
+# Test 4) StrictMode=ON, CREATE with ROW_FORMAT=COMPACT, ALTER with a valid non-zero KEY_BLOCK_SIZE
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPACT;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Compact	row_format=COMPACT
+ALTER TABLE t1 KEY_BLOCK_SIZE=2;
+ERROR HY000: Table storage engine 'InnoDB' does not support the create option 'KEY_BLOCK_SIZE'
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1478	InnoDB: cannot specify ROW_FORMAT = COMPACT with KEY_BLOCK_SIZE.
+Error	1478	Table storage engine 'InnoDB' does not support the create option 'KEY_BLOCK_SIZE'
+ALTER TABLE t1 ROW_FORMAT=REDUNDANT;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Redundant	row_format=REDUNDANT
+ALTER TABLE t1 KEY_BLOCK_SIZE=4;
+ERROR HY000: Table storage engine 'InnoDB' does not support the create option 'KEY_BLOCK_SIZE'
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1478	InnoDB: cannot specify ROW_FORMAT = REDUNDANT with KEY_BLOCK_SIZE.
+Error	1478	Table storage engine 'InnoDB' does not support the create option 'KEY_BLOCK_SIZE'
+ALTER TABLE t1 ROW_FORMAT=DYNAMIC;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Dynamic	row_format=DYNAMIC
+ALTER TABLE t1 KEY_BLOCK_SIZE=2;
+ERROR HY000: Table storage engine 'InnoDB' does not support the create option 'KEY_BLOCK_SIZE'
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1478	InnoDB: cannot specify ROW_FORMAT = DYNAMIC with KEY_BLOCK_SIZE.
+Error	1478	Table storage engine 'InnoDB' does not support the create option 'KEY_BLOCK_SIZE'
+ALTER TABLE t1 ROW_FORMAT=COMPRESSED;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Compressed	row_format=COMPRESSED
+ALTER TABLE t1 KEY_BLOCK_SIZE=1;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Compressed	row_format=COMPRESSED key_block_size=1
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPACT;
+ALTER TABLE t1 ROW_FORMAT=DEFAULT KEY_BLOCK_SIZE=1;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Compressed	key_block_size=1
+# Test 5) StrictMode=ON, CREATE with a valid KEY_BLOCK_SIZE
+#         ALTER with each ROW_FORMAT
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT ) KEY_BLOCK_SIZE=2;
+SHOW CREATE TABLE t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `i` int(11) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 KEY_BLOCK_SIZE=2
+ALTER TABLE t1 ADD COLUMN f1 INT;
+SHOW CREATE TABLE t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `i` int(11) DEFAULT NULL,
+  `f1` int(11) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 KEY_BLOCK_SIZE=2
+ALTER TABLE t1 ROW_FORMAT=COMPACT;
+ERROR HY000: Table storage engine 'InnoDB' does not support the create option 'KEY_BLOCK_SIZE'
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1478	InnoDB: cannot specify ROW_FORMAT = COMPACT with KEY_BLOCK_SIZE.
+Error	1478	Table storage engine 'InnoDB' does not support the create option 'KEY_BLOCK_SIZE'
+ALTER TABLE t1 ROW_FORMAT=REDUNDANT;
+ERROR HY000: Table storage engine 'InnoDB' does not support the create option 'KEY_BLOCK_SIZE'
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1478	InnoDB: cannot specify ROW_FORMAT = REDUNDANT with KEY_BLOCK_SIZE.
+Error	1478	Table storage engine 'InnoDB' does not support the create option 'KEY_BLOCK_SIZE'
+ALTER TABLE t1 ROW_FORMAT=DYNAMIC;
+ERROR HY000: Table storage engine 'InnoDB' does not support the create option 'KEY_BLOCK_SIZE'
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1478	InnoDB: cannot specify ROW_FORMAT = DYNAMIC with KEY_BLOCK_SIZE.
+Error	1478	Table storage engine 'InnoDB' does not support the create option 'KEY_BLOCK_SIZE'
+ALTER TABLE t1 ROW_FORMAT=COMPRESSED;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Compressed	row_format=COMPRESSED key_block_size=2
+ALTER TABLE t1 ROW_FORMAT=DEFAULT KEY_BLOCK_SIZE=0;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Dynamic	
+ALTER TABLE t1 ROW_FORMAT=COMPACT;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Compact	row_format=COMPACT
+# Test 6) StrictMode=ON, CREATE with an invalid KEY_BLOCK_SIZE.
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT ) KEY_BLOCK_SIZE=9;
+Got one of the listed errors
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1478	InnoDB: invalid KEY_BLOCK_SIZE = 9. Valid values are [1, 2, 4, 8, 16]
+Error	1005	Can't create table `test`.`t1` (errno: 140 "Wrong create options")
+Warning	1030	Got error 140 "Wrong create options" from storage engine InnoDB
+# Test 7) StrictMode=ON, Make sure ROW_FORMAT= COMPRESSED & DYNAMIC and
+#         and a valid non-zero KEY_BLOCK_SIZE are rejected with Antelope
+#         and that they can be set to default values during strict mode.
+SET GLOBAL innodb_file_format=Antelope;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+CREATE TABLE t1 ( i INT ) KEY_BLOCK_SIZE=4;
+Got one of the listed errors
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_format > Antelope.
+Error	1005	Can't create table `test`.`t1` (errno: 140 "Wrong create options")
+Warning	1030	Got error 140 "Wrong create options" from storage engine InnoDB
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPRESSED;
+Got one of the listed errors
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1478	InnoDB: ROW_FORMAT=COMPRESSED requires innodb_file_format > Antelope.
+Error	1005	Can't create table `test`.`t1` (errno: 140 "Wrong create options")
+Warning	1030	Got error 140 "Wrong create options" from storage engine InnoDB
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=DYNAMIC;
+ERROR HY000: Can't create table `test`.`t1` (errno: 140 "Wrong create options")
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=REDUNDANT;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Redundant	row_format=REDUNDANT
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPACT;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Compact	row_format=COMPACT
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=DEFAULT;
+SHOW WARNINGS;
+Level	Code	Message
+ALTER TABLE t1 KEY_BLOCK_SIZE=2;
+ERROR HY000: Table storage engine 'InnoDB' does not support the create option 'KEY_BLOCK_SIZE'
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_format > Antelope.
+Error	1478	Table storage engine 'InnoDB' does not support the create option 'KEY_BLOCK_SIZE'
+ALTER TABLE t1 ROW_FORMAT=COMPRESSED;
+ERROR HY000: Table storage engine 'InnoDB' does not support the create option 'ROW_FORMAT'
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1478	InnoDB: ROW_FORMAT=COMPRESSED requires innodb_file_format > Antelope.
+Error	1478	Table storage engine 'InnoDB' does not support the create option 'ROW_FORMAT'
+ALTER TABLE t1 ROW_FORMAT=DYNAMIC;
+ERROR HY000: Table storage engine 'InnoDB' does not support the create option 'ROW_FORMAT'
+SET GLOBAL innodb_file_format=Barracuda;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4;
+SET GLOBAL innodb_file_format=Antelope;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+ALTER TABLE t1 ADD COLUMN f1 INT;
+Warnings:
+Warning	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_format > Antelope.
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=4.
+Warning	1478	InnoDB: ROW_FORMAT=COMPRESSED requires innodb_file_format > Antelope.
+Warning	1478	InnoDB: assuming ROW_FORMAT=DYNAMIC.
+SHOW CREATE TABLE t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `i` int(11) DEFAULT NULL,
+  `f1` int(11) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4
+SHOW WARNINGS;
+Level	Code	Message
+ALTER TABLE t1 ROW_FORMAT=DEFAULT KEY_BLOCK_SIZE=0;
+SHOW WARNINGS;
+Level	Code	Message
+ALTER TABLE t1 ADD COLUMN f2 INT;
+SHOW WARNINGS;
+Level	Code	Message
+SET GLOBAL innodb_file_format=Barracuda;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+# Test 8) StrictMode=ON, Make sure ROW_FORMAT= COMPRESSED & DYNAMIC and
+#         and a valid non-zero KEY_BLOCK_SIZE are rejected with
+#         innodb_file_per_table=OFF and that they can be set to default
+#         values during strict mode.
+SET GLOBAL innodb_file_per_table=OFF;
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT ) KEY_BLOCK_SIZE=1;
+Got one of the listed errors
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_per_table.
+Error	1005	Can't create table `test`.`t1` (errno: 140 "Wrong create options")
+Warning	1030	Got error 140 "Wrong create options" from storage engine InnoDB
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPRESSED;
+Got one of the listed errors
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1478	InnoDB: ROW_FORMAT=COMPRESSED requires innodb_file_per_table.
+Error	1005	Can't create table `test`.`t1` (errno: 140 "Wrong create options")
+Warning	1030	Got error 140 "Wrong create options" from storage engine InnoDB
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=DYNAMIC;
+ERROR HY000: Can't create table `test`.`t1` (errno: 140 "Wrong create options")
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=REDUNDANT;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Redundant	row_format=REDUNDANT
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPACT;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Compact	row_format=COMPACT
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=DEFAULT;
+SHOW WARNINGS;
+Level	Code	Message
+ALTER TABLE t1 KEY_BLOCK_SIZE=1;
+ERROR HY000: Table storage engine 'InnoDB' does not support the create option 'KEY_BLOCK_SIZE'
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_per_table.
+Error	1478	Table storage engine 'InnoDB' does not support the create option 'KEY_BLOCK_SIZE'
+ALTER TABLE t1 ROW_FORMAT=COMPRESSED;
+ERROR HY000: Table storage engine 'InnoDB' does not support the create option 'ROW_FORMAT'
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1478	InnoDB: ROW_FORMAT=COMPRESSED requires innodb_file_per_table.
+Error	1478	Table storage engine 'InnoDB' does not support the create option 'ROW_FORMAT'
+ALTER TABLE t1 ROW_FORMAT=DYNAMIC;
+ERROR HY000: Table storage engine 'InnoDB' does not support the create option 'ROW_FORMAT'
+ALTER TABLE t1 ROW_FORMAT=COMPACT;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Compact	row_format=COMPACT
+ALTER TABLE t1 ROW_FORMAT=REDUNDANT;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Redundant	row_format=REDUNDANT
+ALTER TABLE t1 ROW_FORMAT=DEFAULT;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Dynamic	
+SET GLOBAL innodb_file_per_table=ON;
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4;
+SET GLOBAL innodb_file_per_table=OFF;
+ALTER TABLE t1 ADD COLUMN f1 INT;
+SHOW WARNINGS;
+Level	Code	Message
+ALTER TABLE t1 ROW_FORMAT=DEFAULT KEY_BLOCK_SIZE=0;
+SHOW WARNINGS;
+Level	Code	Message
+ALTER TABLE t1 ADD COLUMN f2 INT;
+SHOW WARNINGS;
+Level	Code	Message
+SET GLOBAL innodb_file_per_table=ON;
+##################################################
+SET SESSION innodb_strict_mode = OFF;
+# Test 9) StrictMode=OFF, CREATE and ALTER with each ROW_FORMAT & KEY_BLOCK_SIZE=0
+#         KEY_BLOCK_SIZE=0 means 'no KEY_BLOCK_SIZE is specified'
+#         'FIXED' is sent to InnoDB since it is used by MyISAM.
+#         It is an invalid mode in InnoDB, use COMPACT
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=FIXED;
+Warnings:
+Warning	1478	InnoDB: assuming ROW_FORMAT=DYNAMIC.
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1478	InnoDB: assuming ROW_FORMAT=DYNAMIC.
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Dynamic	row_format=FIXED
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=0;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Compressed	row_format=COMPRESSED
+ALTER TABLE t1 ROW_FORMAT=COMPACT KEY_BLOCK_SIZE=0;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Compact	row_format=COMPACT
+ALTER TABLE t1 ROW_FORMAT=DYNAMIC KEY_BLOCK_SIZE=0;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Dynamic	row_format=DYNAMIC
+ALTER TABLE t1 ROW_FORMAT=REDUNDANT KEY_BLOCK_SIZE=0;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Redundant	row_format=REDUNDANT
+ALTER TABLE t1 ROW_FORMAT=DEFAULT KEY_BLOCK_SIZE=0;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Dynamic	
+ALTER TABLE t1 ROW_FORMAT=FIXED KEY_BLOCK_SIZE=0;
+Warnings:
+Warning	1478	InnoDB: assuming ROW_FORMAT=DYNAMIC.
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1478	InnoDB: assuming ROW_FORMAT=DYNAMIC.
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Dynamic	row_format=FIXED
+# Test 10) StrictMode=OFF, CREATE with each ROW_FORMAT & a valid KEY_BLOCK_SIZE
+#          KEY_BLOCK_SIZE is ignored with COMPACT, REDUNDANT, & DYNAMIC
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPACT KEY_BLOCK_SIZE=1;
+Warnings:
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=1 unless ROW_FORMAT=COMPRESSED.
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=1 unless ROW_FORMAT=COMPRESSED.
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Compact	row_format=COMPACT key_block_size=1
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=REDUNDANT KEY_BLOCK_SIZE=2;
+Warnings:
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=2 unless ROW_FORMAT=COMPRESSED.
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=2 unless ROW_FORMAT=COMPRESSED.
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Redundant	row_format=REDUNDANT key_block_size=2
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=DYNAMIC KEY_BLOCK_SIZE=4;
+Warnings:
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=4 unless ROW_FORMAT=COMPRESSED.
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=4 unless ROW_FORMAT=COMPRESSED.
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Dynamic	row_format=DYNAMIC key_block_size=4
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=2;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Compressed	row_format=COMPRESSED key_block_size=2
+ALTER TABLE t1 ADD COLUMN f1 INT;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Compressed	row_format=COMPRESSED key_block_size=2
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=DEFAULT KEY_BLOCK_SIZE=1;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Compressed	key_block_size=1
+ALTER TABLE t1 ADD COLUMN f1 INT;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Compressed	key_block_size=1
+# Test 11) StrictMode=OFF, ALTER with each ROW_FORMAT & a valid KEY_BLOCK_SIZE
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT );
+ALTER TABLE t1 ROW_FORMAT=FIXED KEY_BLOCK_SIZE=1;
+Warnings:
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=1 unless ROW_FORMAT=COMPRESSED.
+Warning	1478	InnoDB: assuming ROW_FORMAT=DYNAMIC.
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=1 unless ROW_FORMAT=COMPRESSED.
+Warning	1478	InnoDB: assuming ROW_FORMAT=DYNAMIC.
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Dynamic	row_format=FIXED key_block_size=1
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT );
+ALTER TABLE t1 ROW_FORMAT=COMPACT KEY_BLOCK_SIZE=2;
+Warnings:
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=2 unless ROW_FORMAT=COMPRESSED.
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=2 unless ROW_FORMAT=COMPRESSED.
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Compact	row_format=COMPACT key_block_size=2
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT );
+ALTER TABLE t1 ROW_FORMAT=DYNAMIC KEY_BLOCK_SIZE=4;
+Warnings:
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=4 unless ROW_FORMAT=COMPRESSED.
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=4 unless ROW_FORMAT=COMPRESSED.
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Dynamic	row_format=DYNAMIC key_block_size=4
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT );
+ALTER TABLE t1 ROW_FORMAT=REDUNDANT KEY_BLOCK_SIZE=2;
+Warnings:
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=2 unless ROW_FORMAT=COMPRESSED.
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=2 unless ROW_FORMAT=COMPRESSED.
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Redundant	row_format=REDUNDANT key_block_size=2
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT );
+ALTER TABLE t1 ROW_FORMAT=DEFAULT KEY_BLOCK_SIZE=1;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Compressed	key_block_size=1
+ALTER TABLE t1 ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Compressed	row_format=COMPRESSED key_block_size=1
+# Test 12) StrictMode=OFF, CREATE with ROW_FORMAT=COMPACT, ALTER with a valid KEY_BLOCK_SIZE
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPACT;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Compact	row_format=COMPACT
+ALTER TABLE t1 KEY_BLOCK_SIZE=2;
+Warnings:
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=2 unless ROW_FORMAT=COMPRESSED.
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=2 unless ROW_FORMAT=COMPRESSED.
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Compact	row_format=COMPACT key_block_size=2
+ALTER TABLE t1 ROW_FORMAT=REDUNDANT;
+Warnings:
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=2 unless ROW_FORMAT=COMPRESSED.
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=2 unless ROW_FORMAT=COMPRESSED.
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Redundant	row_format=REDUNDANT key_block_size=2
+ALTER TABLE t1 ROW_FORMAT=DYNAMIC;
+Warnings:
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=2 unless ROW_FORMAT=COMPRESSED.
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=2 unless ROW_FORMAT=COMPRESSED.
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Dynamic	row_format=DYNAMIC key_block_size=2
+ALTER TABLE t1 ROW_FORMAT=COMPRESSED;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Compressed	row_format=COMPRESSED key_block_size=2
+ALTER TABLE t1 KEY_BLOCK_SIZE=4;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Compressed	row_format=COMPRESSED key_block_size=4
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPACT;
+ALTER TABLE t1 ROW_FORMAT=DEFAULT KEY_BLOCK_SIZE=2;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Compressed	key_block_size=2
+# Test 13) StrictMode=OFF, CREATE with a valid KEY_BLOCK_SIZE
+#          ALTER with each ROW_FORMAT
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT ) KEY_BLOCK_SIZE=1;
+SHOW WARNINGS;
+Level	Code	Message
+SHOW CREATE TABLE t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `i` int(11) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 KEY_BLOCK_SIZE=1
+ALTER TABLE t1 ADD COLUMN f1 INT;
+SHOW WARNINGS;
+Level	Code	Message
+SHOW CREATE TABLE t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `i` int(11) DEFAULT NULL,
+  `f1` int(11) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 KEY_BLOCK_SIZE=1
+ALTER TABLE t1 ROW_FORMAT=COMPACT;
+Warnings:
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=1 unless ROW_FORMAT=COMPRESSED.
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=1 unless ROW_FORMAT=COMPRESSED.
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Compact	row_format=COMPACT key_block_size=1
+ALTER TABLE t1 ROW_FORMAT=REDUNDANT;
+Warnings:
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=1 unless ROW_FORMAT=COMPRESSED.
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=1 unless ROW_FORMAT=COMPRESSED.
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Redundant	row_format=REDUNDANT key_block_size=1
+ALTER TABLE t1 ROW_FORMAT=DYNAMIC;
+Warnings:
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=1 unless ROW_FORMAT=COMPRESSED.
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=1 unless ROW_FORMAT=COMPRESSED.
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Dynamic	row_format=DYNAMIC key_block_size=1
+ALTER TABLE t1 ROW_FORMAT=COMPRESSED;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Compressed	row_format=COMPRESSED key_block_size=1
+ALTER TABLE t1 ROW_FORMAT=DEFAULT KEY_BLOCK_SIZE=0;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Dynamic	
+ALTER TABLE t1 ROW_FORMAT=COMPACT;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Compact	row_format=COMPACT
+# Test 14) StrictMode=OFF, CREATE with an invalid KEY_BLOCK_SIZE,
+# it defaults to half of the page size.
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT ) KEY_BLOCK_SIZE=15;
+Warnings:
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=15.
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=15.
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Dynamic	key_block_size=15
+# Test 15) StrictMode=OFF, Make sure ROW_FORMAT= COMPRESSED & DYNAMIC and a
+valid KEY_BLOCK_SIZE are remembered but not used when ROW_FORMAT
+is reverted to Antelope and then used again when ROW_FORMAT=Barracuda.
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Compressed	row_format=COMPRESSED key_block_size=1
+SET GLOBAL innodb_file_format=Antelope;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+ALTER TABLE t1 ADD COLUMN f1 INT;
+Warnings:
+Warning	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_format > Antelope.
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=1.
+Warning	1478	InnoDB: ROW_FORMAT=COMPRESSED requires innodb_file_format > Antelope.
+Warning	1478	InnoDB: assuming ROW_FORMAT=DYNAMIC.
+SHOW WARNINGS;
+Level	Code	Message
+Warning	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_format > Antelope.
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=1.
+Warning	1478	InnoDB: ROW_FORMAT=COMPRESSED requires innodb_file_format > Antelope.
+Warning	1478	InnoDB: assuming ROW_FORMAT=DYNAMIC.
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Dynamic	row_format=COMPRESSED key_block_size=1
+SET GLOBAL innodb_file_format=Barracuda;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+ALTER TABLE t1 ADD COLUMN f2 INT;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Compressed	row_format=COMPRESSED key_block_size=1
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=DYNAMIC;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Dynamic	row_format=DYNAMIC
+SET GLOBAL innodb_file_format=Antelope;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+ALTER TABLE t1 ADD COLUMN f1 INT;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Dynamic	row_format=DYNAMIC
+SET GLOBAL innodb_file_format=Barracuda;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+ALTER TABLE t1 ADD COLUMN f2 INT;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Dynamic	row_format=DYNAMIC
+# Test 16) StrictMode=OFF, Make sure ROW_FORMAT= COMPRESSED & DYNAMIC and a
+valid KEY_BLOCK_SIZE are remembered but not used when innodb_file_per_table=OFF
+and then used again when innodb_file_per_table=ON.
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=2;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Compressed	row_format=COMPRESSED key_block_size=2
+SET GLOBAL innodb_file_per_table=OFF;
+ALTER TABLE t1 ADD COLUMN f1 INT;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Compressed	row_format=COMPRESSED key_block_size=2
+SET GLOBAL innodb_file_per_table=ON;
+ALTER TABLE t1 ADD COLUMN f2 INT;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Compressed	row_format=COMPRESSED key_block_size=2
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=DYNAMIC;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Dynamic	row_format=DYNAMIC
+SET GLOBAL innodb_file_per_table=OFF;
+ALTER TABLE t1 ADD COLUMN f1 INT;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Dynamic	row_format=DYNAMIC
+SET GLOBAL innodb_file_per_table=ON;
+ALTER TABLE t1 ADD COLUMN f2 INT;
+SHOW WARNINGS;
+Level	Code	Message
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
+t1	Dynamic	row_format=DYNAMIC
+# Cleanup
+DROP TABLE t1;
diff --git a/mysql-test/suite/innodb_zip/r/index_large_prefix.result b/mysql-test/suite/innodb_zip/r/index_large_prefix.result
new file mode 100644
index 00000000000..7285f4dfad7
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/r/index_large_prefix.result
@@ -0,0 +1,534 @@
+SET default_storage_engine=InnoDB;
+set global innodb_file_per_table=1;
+### Test 1 ###
+create table worklog5743(a TEXT not null, primary key (a(1000))) ROW_FORMAT=DYNAMIC;
+show warnings;
+Level	Code	Message
+insert into worklog5743 values(repeat("a", 20000));
+update worklog5743 set a = (repeat("b", 16000));
+create index idx on worklog5743(a(2000));
+show warnings;
+Level	Code	Message
+begin;
+update worklog5743 set a = (repeat("x", 17000));
+select @@session.tx_isolation;
+@@session.tx_isolation
+REPEATABLE-READ
+connect  con1,localhost,root,,;
+select a = repeat("x", 17000) from worklog5743;
+a = repeat("x", 17000)
+0
+select a = repeat("b", 16000) from worklog5743;
+a = repeat("b", 16000)
+1
+connect  con2,localhost,root,,;
+SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
+select @@session.tx_isolation;
+@@session.tx_isolation
+READ-UNCOMMITTED
+select a = repeat("x", 17000) from worklog5743;
+a = repeat("x", 17000)
+1
+connection default;
+rollback;
+drop table worklog5743;
+### Test 2 ###
+create table worklog5743(a1 int, a2 TEXT not null) ROW_FORMAT=DYNAMIC;
+show warnings;
+Level	Code	Message
+create index idx on worklog5743(a1, a2(2000));
+show warnings;
+Level	Code	Message
+insert into worklog5743 values(9, repeat("a", 10000));
+begin;
+update worklog5743 set a1 = 1000;
+connection con1;
+select @@session.tx_isolation;
+@@session.tx_isolation
+REPEATABLE-READ
+explain select a1, a2 = repeat("a", 10000) from worklog5743 where a1 = 9;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	worklog5743	ref	idx	idx	5	const	1	
+select a1, a2 = repeat("a", 10000) from worklog5743 where a1 = 9;
+a1	a2 = repeat("a", 10000)
+9	1
+connection con2;
+SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
+select @@session.tx_isolation;
+@@session.tx_isolation
+READ-UNCOMMITTED
+select a1, a2 = repeat("a", 10000) from worklog5743 where a1 = 9;
+a1	a2 = repeat("a", 10000)
+connection default;
+rollback;
+drop table worklog5743;
+### Test 3 ###
+create table worklog5743(a1 int, a2 TEXT not null) ROW_FORMAT=DYNAMIC;
+create index idx on worklog5743(a1, a2(50));
+insert into worklog5743 values(9, repeat("a", 10000));
+begin;
+update worklog5743 set a1 = 1000;
+connection con1;
+select @@session.tx_isolation;
+@@session.tx_isolation
+REPEATABLE-READ
+explain select a1, a2 = repeat("a", 10000) from worklog5743 where a1 = 9;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	worklog5743	ref	idx	idx	5	const	1	
+select a1, a2 = repeat("a", 10000) from worklog5743 where a1 = 9;
+a1	a2 = repeat("a", 10000)
+9	1
+connection con2;
+SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
+select @@session.tx_isolation;
+@@session.tx_isolation
+READ-UNCOMMITTED
+select a1, a2 = repeat("a", 10000) from worklog5743 where a1 = 9;
+a1	a2 = repeat("a", 10000)
+connection default;
+rollback;
+drop table worklog5743;
+### Test 4 ###
+create table worklog5743_1(a1 int, a2 TEXT not null) KEY_BLOCK_SIZE=1;
+create table worklog5743_2(a1 int, a2 TEXT not null) KEY_BLOCK_SIZE=2;
+create table worklog5743_4(a1 int, a2 TEXT not null) KEY_BLOCK_SIZE=4;
+create table worklog5743_8(a1 int, a2 TEXT, a3 TEXT) KEY_BLOCK_SIZE=8;
+create table worklog5743_16(a1 int, a2 TEXT, a3 TEXT) KEY_BLOCK_SIZE=16;
+set global innodb_large_prefix=0;
+Warnings:
+Warning	131	Using innodb_large_prefix is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+create index idx1 on worklog5743_1(a2(4000));
+Got one of the listed errors
+show warnings;
+Level	Code	Message
+Warning	1071	Specified key was too long; max key length is 767 bytes
+Error	1118	Row size too large. The maximum row size for the used table type, not counting BLOBs, is 8126. This includes storage overhead, check the manual. You have to change some columns to TEXT or BLOBs
+set global innodb_large_prefix=1;
+Warnings:
+Warning	131	Using innodb_large_prefix is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+create index idx2 on worklog5743_1(a2(4000));
+Got one of the listed errors
+show warnings;
+Level	Code	Message
+Warning	1071	Specified key was too long; max key length is 3072 bytes
+Error	1118	Row size too large. The maximum row size for the used table type, not counting BLOBs, is 8126. This includes storage overhead, check the manual. You have to change some columns to TEXT or BLOBs
+create index idx3 on worklog5743_1(a2(436));
+ERROR 42000: Row size too large. The maximum row size for the used table type, not counting BLOBs, is 8126. This includes storage overhead, check the manual. You have to change some columns to TEXT or BLOBs
+show warnings;
+Level	Code	Message
+Error	1118	Row size too large. The maximum row size for the used table type, not counting BLOBs, is 8126. This includes storage overhead, check the manual. You have to change some columns to TEXT or BLOBs
+create index idx4 on worklog5743_1(a2(434));
+show warnings;
+Level	Code	Message
+create index idx5 on worklog5743_1(a1, a2(430));
+ERROR 42000: Row size too large. The maximum row size for the used table type, not counting BLOBs, is 8126. This includes storage overhead, check the manual. You have to change some columns to TEXT or BLOBs
+show warnings;
+Level	Code	Message
+Error	1118	Row size too large. The maximum row size for the used table type, not counting BLOBs, is 8126. This includes storage overhead, check the manual. You have to change some columns to TEXT or BLOBs
+create index idx6 on worklog5743_1(a1, a2(428));
+show warnings;
+Level	Code	Message
+set global innodb_large_prefix=0;
+Warnings:
+Warning	131	Using innodb_large_prefix is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+SET sql_mode= '';
+create index idx1 on worklog5743_2(a2(4000));
+Warnings:
+Warning	1071	Specified key was too long; max key length is 767 bytes
+show warnings;
+Level	Code	Message
+Warning	1071	Specified key was too long; max key length is 767 bytes
+set global innodb_large_prefix=1;
+Warnings:
+Warning	131	Using innodb_large_prefix is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+create index idx2 on worklog5743_2(a2(4000));
+ERROR 42000: Row size too large. The maximum row size for the used table type, not counting BLOBs, is 8126. This includes storage overhead, check the manual. You have to change some columns to TEXT or BLOBs
+show warnings;
+Level	Code	Message
+Warning	1071	Specified key was too long; max key length is 3072 bytes
+Error	1118	Row size too large. The maximum row size for the used table type, not counting BLOBs, is 8126. This includes storage overhead, check the manual. You have to change some columns to TEXT or BLOBs
+create index idx3 on worklog5743_2(a2(948));
+ERROR 42000: Row size too large. The maximum row size for the used table type, not counting BLOBs, is 8126. This includes storage overhead, check the manual. You have to change some columns to TEXT or BLOBs
+show warnings;
+Level	Code	Message
+Error	1118	Row size too large. The maximum row size for the used table type, not counting BLOBs, is 8126. This includes storage overhead, check the manual. You have to change some columns to TEXT or BLOBs
+create index idx4 on worklog5743_2(a2(946));
+show warnings;
+Level	Code	Message
+create index idx5 on worklog5743_2(a1, a2(942));
+ERROR 42000: Row size too large. The maximum row size for the used table type, not counting BLOBs, is 8126. This includes storage overhead, check the manual. You have to change some columns to TEXT or BLOBs
+show warnings;
+Level	Code	Message
+Error	1118	Row size too large. The maximum row size for the used table type, not counting BLOBs, is 8126. This includes storage overhead, check the manual. You have to change some columns to TEXT or BLOBs
+create index idx6 on worklog5743_2(a1, a2(940));
+show warnings;
+Level	Code	Message
+set global innodb_large_prefix=0;
+Warnings:
+Warning	131	Using innodb_large_prefix is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+create index idx1 on worklog5743_4(a2(4000));
+Warnings:
+Warning	1071	Specified key was too long; max key length is 767 bytes
+show warnings;
+Level	Code	Message
+Warning	1071	Specified key was too long; max key length is 767 bytes
+set global innodb_large_prefix=1;
+Warnings:
+Warning	131	Using innodb_large_prefix is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+create index idx2 on worklog5743_4(a2(4000));
+ERROR 42000: Row size too large. The maximum row size for the used table type, not counting BLOBs, is 8126. This includes storage overhead, check the manual. You have to change some columns to TEXT or BLOBs
+show warnings;
+Level	Code	Message
+Warning	1071	Specified key was too long; max key length is 3072 bytes
+Error	1118	Row size too large. The maximum row size for the used table type, not counting BLOBs, is 8126. This includes storage overhead, check the manual. You have to change some columns to TEXT or BLOBs
+create index idx3 on worklog5743_4(a2(1972));
+ERROR 42000: Row size too large. The maximum row size for the used table type, not counting BLOBs, is 8126. This includes storage overhead, check the manual. You have to change some columns to TEXT or BLOBs
+show warnings;
+Level	Code	Message
+Error	1118	Row size too large. The maximum row size for the used table type, not counting BLOBs, is 8126. This includes storage overhead, check the manual. You have to change some columns to TEXT or BLOBs
+create index idx4 on worklog5743_4(a2(1970));
+show warnings;
+Level	Code	Message
+create index idx5 on worklog5743_4(a1, a2(1966));
+ERROR 42000: Row size too large. The maximum row size for the used table type, not counting BLOBs, is 8126. This includes storage overhead, check the manual. You have to change some columns to TEXT or BLOBs
+show warnings;
+Level	Code	Message
+Error	1118	Row size too large. The maximum row size for the used table type, not counting BLOBs, is 8126. This includes storage overhead, check the manual. You have to change some columns to TEXT or BLOBs
+create index idx6 on worklog5743_4(a1, a2(1964));
+show warnings;
+Level	Code	Message
+set global innodb_large_prefix=0;
+Warnings:
+Warning	131	Using innodb_large_prefix is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+create index idx1 on worklog5743_8(a2(1000));
+Warnings:
+Warning	1071	Specified key was too long; max key length is 767 bytes
+show warnings;
+Level	Code	Message
+Warning	1071	Specified key was too long; max key length is 767 bytes
+set global innodb_large_prefix=1;
+Warnings:
+Warning	131	Using innodb_large_prefix is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+create index idx2 on worklog5743_8(a2(3073));
+Warnings:
+Warning	1071	Specified key was too long; max key length is 3072 bytes
+show warnings;
+Level	Code	Message
+Warning	1071	Specified key was too long; max key length is 3072 bytes
+create index idx3 on worklog5743_8(a2(3072));
+Warnings:
+Note	1831	Duplicate index 'idx3' defined on the table 'test.worklog5743_8'. This is deprecated and will be disallowed in a future release.
+show warnings;
+Level	Code	Message
+Note	1831	Duplicate index 'idx3' defined on the table 'test.worklog5743_8'. This is deprecated and will be disallowed in a future release.
+create index idx4 on worklog5743_8(a1, a2(3069));
+ERROR 42000: Specified key was too long; max key length is 3072 bytes
+show warnings;
+Level	Code	Message
+Error	1071	Specified key was too long; max key length is 3072 bytes
+create index idx5 on worklog5743_8(a1, a2(3068));
+show warnings;
+Level	Code	Message
+create index idx6 on worklog5743_8(a1, a2(2000), a3(1069));
+ERROR 42000: Specified key was too long; max key length is 3072 bytes
+show warnings;
+Level	Code	Message
+Error	1071	Specified key was too long; max key length is 3072 bytes
+create index idx7 on worklog5743_8(a1, a2(2000), a3(1068));
+show warnings;
+Level	Code	Message
+set global innodb_large_prefix=0;
+Warnings:
+Warning	131	Using innodb_large_prefix is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+create index idx1 on worklog5743_16(a2(1000));
+Warnings:
+Warning	1071	Specified key was too long; max key length is 767 bytes
+show warnings;
+Level	Code	Message
+Warning	1071	Specified key was too long; max key length is 767 bytes
+set global innodb_large_prefix=1;
+Warnings:
+Warning	131	Using innodb_large_prefix is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+create index idx2 on worklog5743_16(a2(3073));
+Warnings:
+Warning	1071	Specified key was too long; max key length is 3072 bytes
+show warnings;
+Level	Code	Message
+Warning	1071	Specified key was too long; max key length is 3072 bytes
+create index idx3 on worklog5743_16(a2(3072));
+Warnings:
+Note	1831	Duplicate index 'idx3' defined on the table 'test.worklog5743_16'. This is deprecated and will be disallowed in a future release.
+show warnings;
+Level	Code	Message
+Note	1831	Duplicate index 'idx3' defined on the table 'test.worklog5743_16'. This is deprecated and will be disallowed in a future release.
+create index idx4 on worklog5743_16(a1, a2(3069));
+ERROR 42000: Specified key was too long; max key length is 3072 bytes
+show warnings;
+Level	Code	Message
+Error	1071	Specified key was too long; max key length is 3072 bytes
+create index idx5 on worklog5743_16(a1, a2(3068));
+show warnings;
+Level	Code	Message
+create index idx6 on worklog5743_16(a1, a2(2000), a3(1069));
+ERROR 42000: Specified key was too long; max key length is 3072 bytes
+show warnings;
+Level	Code	Message
+Error	1071	Specified key was too long; max key length is 3072 bytes
+create index idx7 on worklog5743_16(a1, a2(2000), a3(1068));
+show warnings;
+Level	Code	Message
+set sql_mode= default;
+insert into worklog5743_1 values(9, repeat("a", 10000));
+insert into worklog5743_2 values(9, repeat("a", 10000));
+insert into worklog5743_4 values(9, repeat("a", 10000));
+insert into worklog5743_8 values(9, repeat("a", 10000), repeat("a", 10000));
+insert into worklog5743_16 values(9, repeat("a", 10000), repeat("a", 10000));
+set global innodb_large_prefix=0;
+Warnings:
+Warning	131	Using innodb_large_prefix is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+insert into worklog5743_1 values(2, repeat("b", 10000));
+insert into worklog5743_2 values(2, repeat("b", 10000));
+insert into worklog5743_4 values(2, repeat("b", 10000));
+insert into worklog5743_8 values(2, repeat("b", 10000), repeat("b", 10000));
+insert into worklog5743_16 values(2, repeat("b", 10000), repeat("b", 10000));
+set global innodb_large_prefix=1;
+Warnings:
+Warning	131	Using innodb_large_prefix is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+select a1, left(a2, 20) from worklog5743_1;
+a1	left(a2, 20)
+9	aaaaaaaaaaaaaaaaaaaa
+2	bbbbbbbbbbbbbbbbbbbb
+select a1, left(a2, 20) from worklog5743_2;
+a1	left(a2, 20)
+9	aaaaaaaaaaaaaaaaaaaa
+2	bbbbbbbbbbbbbbbbbbbb
+select a1, left(a2, 20) from worklog5743_4;
+a1	left(a2, 20)
+9	aaaaaaaaaaaaaaaaaaaa
+2	bbbbbbbbbbbbbbbbbbbb
+select a1, left(a2, 20) from worklog5743_8;
+a1	left(a2, 20)
+9	aaaaaaaaaaaaaaaaaaaa
+2	bbbbbbbbbbbbbbbbbbbb
+select a1, left(a2, 20) from worklog5743_16;
+a1	left(a2, 20)
+9	aaaaaaaaaaaaaaaaaaaa
+2	bbbbbbbbbbbbbbbbbbbb
+begin;
+update worklog5743_1 set a1 = 1000;
+update worklog5743_2 set a1 = 1000;
+update worklog5743_4 set a1 = 1000;
+update worklog5743_8 set a1 = 1000;
+update worklog5743_16 set a1 = 1000;
+select a1, left(a2, 20) from worklog5743_1;
+a1	left(a2, 20)
+1000	aaaaaaaaaaaaaaaaaaaa
+1000	bbbbbbbbbbbbbbbbbbbb
+select a1, left(a2, 20) from worklog5743_2;
+a1	left(a2, 20)
+1000	aaaaaaaaaaaaaaaaaaaa
+1000	bbbbbbbbbbbbbbbbbbbb
+select a1, left(a2, 20) from worklog5743_4;
+a1	left(a2, 20)
+1000	aaaaaaaaaaaaaaaaaaaa
+1000	bbbbbbbbbbbbbbbbbbbb
+select a1, left(a2, 20) from worklog5743_8;
+a1	left(a2, 20)
+1000	aaaaaaaaaaaaaaaaaaaa
+1000	bbbbbbbbbbbbbbbbbbbb
+select a1, left(a2, 20) from worklog5743_16;
+a1	left(a2, 20)
+1000	aaaaaaaaaaaaaaaaaaaa
+1000	bbbbbbbbbbbbbbbbbbbb
+connection con1;
+select @@session.tx_isolation;
+@@session.tx_isolation
+REPEATABLE-READ
+explain select a1, left(a2, 20) from worklog5743_1 where a1 = 9;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	worklog5743_1	ref	idx6	idx6	5	const	1	
+explain select a1, left(a2, 20) from worklog5743_2 where a1 = 9;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	worklog5743_2	ref	idx6	idx6	5	const	1	
+explain select a1, left(a2, 20) from worklog5743_4 where a1 = 9;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	worklog5743_4	ref	idx6	idx6	5	const	1	
+explain select a1, left(a2, 20) from worklog5743_8 where a1 = 9;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	worklog5743_8	ref	idx5,idx7	idx5	5	const	1	
+explain select a1, left(a2, 20) from worklog5743_16 where a1 = 9;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	worklog5743_16	ref	idx5,idx7	idx5	5	const	1	
+select a1, left(a2, 20) from worklog5743_1 where a1 = 9;
+a1	left(a2, 20)
+9	aaaaaaaaaaaaaaaaaaaa
+select a1, left(a2, 20) from worklog5743_2 where a1 = 9;
+a1	left(a2, 20)
+9	aaaaaaaaaaaaaaaaaaaa
+select a1, left(a2, 20) from worklog5743_4 where a1 = 9;
+a1	left(a2, 20)
+9	aaaaaaaaaaaaaaaaaaaa
+select a1, left(a2, 20) from worklog5743_8 where a1 = 9;
+a1	left(a2, 20)
+9	aaaaaaaaaaaaaaaaaaaa
+select a1, left(a2, 20) from worklog5743_16 where a1 = 9;
+a1	left(a2, 20)
+9	aaaaaaaaaaaaaaaaaaaa
+connection con2;
+SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
+select @@session.tx_isolation;
+@@session.tx_isolation
+READ-UNCOMMITTED
+select a1, left(a2, 20) from worklog5743_1 where a1 = 9;
+a1	left(a2, 20)
+select a1, left(a2, 20) from worklog5743_2 where a1 = 9;
+a1	left(a2, 20)
+select a1, left(a2, 20) from worklog5743_4 where a1 = 9;
+a1	left(a2, 20)
+select a1, left(a2, 20) from worklog5743_8 where a1 = 9;
+a1	left(a2, 20)
+select a1, left(a2, 20) from worklog5743_16 where a1 = 9;
+a1	left(a2, 20)
+connection default;
+rollback;
+drop table worklog5743_1;
+drop table worklog5743_2;
+drop table worklog5743_4;
+drop table worklog5743_8;
+drop table worklog5743_16;
+### Test 5 ###
+create table worklog5743(a1 int,
+a2 varchar(20000),
+a3 varchar(3073),
+a4 varchar(3072),
+a5 varchar(3069),
+a6 varchar(3068))
+ROW_FORMAT=DYNAMIC;
+SET sql_mode='';
+create index idx1 on worklog5743(a2);
+Warnings:
+Warning	1071	Specified key was too long; max key length is 3072 bytes
+create index idx2 on worklog5743(a3);
+Warnings:
+Warning	1071	Specified key was too long; max key length is 3072 bytes
+create index idx3 on worklog5743(a4);
+show warnings;
+Level	Code	Message
+SET sql_mode= default;
+create index idx4 on worklog5743(a1, a2);
+ERROR 42000: Specified key was too long; max key length is 3072 bytes
+show warnings;
+Level	Code	Message
+Warning	1071	Specified key was too long; max key length is 3072 bytes
+Error	1071	Specified key was too long; max key length is 3072 bytes
+create index idx5 on worklog5743(a1, a5);
+ERROR 42000: Specified key was too long; max key length is 3072 bytes
+show warnings;
+Level	Code	Message
+Error	1071	Specified key was too long; max key length is 3072 bytes
+create index idx6 on worklog5743(a1, a6);
+show warnings;
+Level	Code	Message
+show create table worklog5743;
+Table	Create Table
+worklog5743	CREATE TABLE `worklog5743` (
+  `a1` int(11) DEFAULT NULL,
+  `a2` varchar(20000) DEFAULT NULL,
+  `a3` varchar(3073) DEFAULT NULL,
+  `a4` varchar(3072) DEFAULT NULL,
+  `a5` varchar(3069) DEFAULT NULL,
+  `a6` varchar(3068) DEFAULT NULL,
+  KEY `idx1` (`a2`(3072)),
+  KEY `idx2` (`a3`(3072)),
+  KEY `idx3` (`a4`),
+  KEY `idx6` (`a1`,`a6`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC
+insert into worklog5743 values(9,
+repeat("a", 20000), repeat("a", 3073),
+repeat("a", 3072), repeat("a", 3069),
+repeat("a", 3068));
+begin;
+update worklog5743 set a1 = 1000;
+connection con1;
+select @@session.tx_isolation;
+@@session.tx_isolation
+REPEATABLE-READ
+explain select a1 from worklog5743 where a1 = 9;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	worklog5743	ref	idx6	idx6	5	const	1	Using index
+select a1 from worklog5743 where a1 = 9;
+a1
+9
+connection con2;
+SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
+select @@session.tx_isolation;
+@@session.tx_isolation
+READ-UNCOMMITTED
+select a1 from worklog5743 where a1 = 9;
+a1
+connection default;
+rollback;
+drop table worklog5743;
+### Test 6 ###
+create table worklog5743(a TEXT not null, primary key (a(1000)))
+row_format=compact;
+ERROR HY000: Index column size too large. The maximum column size is 767 bytes.
+create table worklog5743(a TEXT)
+row_format=compact;
+create index idx on worklog5743(a(768));
+ERROR HY000: Index column size too large. The maximum column size is 767 bytes.
+create index idx on worklog5743(a(767));
+insert into worklog5743 values(repeat("a", 20000));
+begin;
+insert into worklog5743 values(repeat("b", 20000));
+update worklog5743 set a = (repeat("x", 25000));
+select @@session.tx_isolation;
+@@session.tx_isolation
+REPEATABLE-READ
+connection con1;
+select a = repeat("a", 20000) from worklog5743;
+a = repeat("a", 20000)
+1
+disconnect con1;
+connection con2;
+SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
+select @@session.tx_isolation;
+@@session.tx_isolation
+READ-UNCOMMITTED
+select a = repeat("x", 25000) from worklog5743;
+a = repeat("x", 25000)
+1
+1
+disconnect con2;
+connection default;
+rollback;
+drop table worklog5743;
+### Test 7 ###
+create table worklog5743(a TEXT not null) ROW_FORMAT=DYNAMIC;
+SET sql_mode='';
+create index idx1 on worklog5743(a(3073));
+Warnings:
+Warning	1071	Specified key was too long; max key length is 3072 bytes
+create index idx2 on worklog5743(a(3072));
+Warnings:
+Note	1831	Duplicate index 'idx2' defined on the table 'test.worklog5743'. This is deprecated and will be disallowed in a future release.
+show create table worklog5743;
+Table	Create Table
+worklog5743	CREATE TABLE `worklog5743` (
+  `a` text NOT NULL,
+  KEY `idx1` (`a`(3072)),
+  KEY `idx2` (`a`(3072))
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC
+drop table worklog5743;
+SET sql_mode= default;
+create table worklog5743(a TEXT not null) ROW_FORMAT=REDUNDANT;
+create index idx on worklog5743(a(768));
+ERROR HY000: Index column size too large. The maximum column size is 767 bytes.
+create index idx2 on worklog5743(a(767));
+drop table worklog5743;
+create table worklog5743(a TEXT not null) ROW_FORMAT=COMPACT;
+create index idx on worklog5743(a(768));
+ERROR HY000: Index column size too large. The maximum column size is 767 bytes.
+create index idx2 on worklog5743(a(767));
+drop table worklog5743;
+SET GLOBAL innodb_file_per_table=1;
diff --git a/mysql-test/suite/innodb_zip/r/index_large_prefix_4k.result b/mysql-test/suite/innodb_zip/r/index_large_prefix_4k.result
new file mode 100644
index 00000000000..f010c522614
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/r/index_large_prefix_4k.result
@@ -0,0 +1,404 @@
+SET default_storage_engine=InnoDB;
+set global innodb_file_per_table=1;
+### Test 1 ###
+create table worklog5743(a TEXT not null, primary key (a(768))) ROW_FORMAT=DYNAMIC;
+show warnings;
+Level	Code	Message
+insert into worklog5743 values(repeat("a", 20000));
+update worklog5743 set a = (repeat("b", 16000));
+SET sql_mode= '';
+Warnings:
+Warning	3090	Changing sql mode 'NO_AUTO_CREATE_USER' is deprecated. It will be removed in a future release.
+create index idx on worklog5743(a(900));
+Warnings:
+Warning	1071	Specified key was too long; max key length is 768 bytes
+show warnings;
+Level	Code	Message
+Warning	1071	Specified key was too long; max key length is 768 bytes
+SET sql_mode= default;
+begin;
+update worklog5743 set a = (repeat("x", 17000));
+select @@session.tx_isolation;
+@@session.tx_isolation
+REPEATABLE-READ
+select a = repeat("x", 17000) from worklog5743;
+a = repeat("x", 17000)
+0
+select a = repeat("b", 16000) from worklog5743;
+a = repeat("b", 16000)
+1
+SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
+select @@session.tx_isolation;
+@@session.tx_isolation
+READ-UNCOMMITTED
+select a = repeat("x", 17000) from worklog5743;
+a = repeat("x", 17000)
+1
+rollback;
+drop table worklog5743;
+### Test 2 ###
+create table worklog5743(a1 int, a2 TEXT not null) ROW_FORMAT=DYNAMIC;
+show warnings;
+Level	Code	Message
+create index idx on worklog5743(a1, a2(750));
+show warnings;
+Level	Code	Message
+insert into worklog5743 values(9, repeat("a", 10000));
+begin;
+update worklog5743 set a1 = 1111;
+select @@session.tx_isolation;
+@@session.tx_isolation
+REPEATABLE-READ
+explain select a1, a2 = repeat("a", 10000) from worklog5743 where a1 = 9;
+id	select_type	table	partitions	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	SIMPLE	worklog5743	NULL	ref	idx	idx	5	const	1	100.00	NULL
+Warnings:
+Note	1003	/* select#1 */ select `test`.`worklog5743`.`a1` AS `a1`,(`test`.`worklog5743`.`a2` = repeat('a',10000)) AS `a2 = repeat("a", 10000)` from `test`.`worklog5743` where (`test`.`worklog5743`.`a1` = 9)
+select a1, a2 = repeat("a", 10000) from worklog5743 where a1 = 9;
+a1	a2 = repeat("a", 10000)
+9	1
+SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
+select @@session.tx_isolation;
+@@session.tx_isolation
+READ-UNCOMMITTED
+select a1, a2 = repeat("a", 10000) from worklog5743 where a1 = 9;
+a1	a2 = repeat("a", 10000)
+rollback;
+drop table worklog5743;
+### Test 3 ###
+create table worklog5743(a1 int, a2 TEXT not null) ROW_FORMAT=DYNAMIC;
+create index idx on worklog5743(a1, a2(50));
+insert into worklog5743 values(9, repeat("a", 10000));
+begin;
+update worklog5743 set a1 = 2222;
+select @@session.tx_isolation;
+@@session.tx_isolation
+REPEATABLE-READ
+explain select a1, a2 = repeat("a", 10000) from worklog5743 where a1 = 9;
+id	select_type	table	partitions	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	SIMPLE	worklog5743	NULL	ref	idx	idx	5	const	1	100.00	NULL
+Warnings:
+Note	1003	/* select#1 */ select `test`.`worklog5743`.`a1` AS `a1`,(`test`.`worklog5743`.`a2` = repeat('a',10000)) AS `a2 = repeat("a", 10000)` from `test`.`worklog5743` where (`test`.`worklog5743`.`a1` = 9)
+select a1, a2 = repeat("a", 10000) from worklog5743 where a1 = 9;
+a1	a2 = repeat("a", 10000)
+9	1
+SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
+select @@session.tx_isolation;
+@@session.tx_isolation
+READ-UNCOMMITTED
+select a1, a2 = repeat("a", 10000) from worklog5743 where a1 = 9;
+a1	a2 = repeat("a", 10000)
+rollback;
+drop table worklog5743;
+### Test 4 ###
+create table worklog5743_1(a1 int, a2 TEXT not null) KEY_BLOCK_SIZE=1;
+create table worklog5743_2(a1 int, a2 TEXT not null) KEY_BLOCK_SIZE=2;
+create table worklog5743_4(a1 int, a2 TEXT not null) KEY_BLOCK_SIZE=4;
+set global innodb_large_prefix=0;
+Warnings:
+Warning	131	Using innodb_large_prefix is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+create index idx1 on worklog5743_1(a2(4000));
+ERROR 42000: Specified key was too long; max key length is 767 bytes
+show warnings;
+Level	Code	Message
+Error	1071	Specified key was too long; max key length is 767 bytes
+create index idx3 on worklog5743_1(a2(436));
+ERROR 42000: Row size too large. The maximum row size for the used table type, not counting BLOBs, is 1982. This includes storage overhead, check the manual. You have to change some columns to TEXT or BLOBs
+show warnings;
+Level	Code	Message
+Error	1118	Row size too large. The maximum row size for the used table type, not counting BLOBs, is 1982. This includes storage overhead, check the manual. You have to change some columns to TEXT or BLOBs
+create index idx4 on worklog5743_1(a2(434));
+show warnings;
+Level	Code	Message
+create index idx5 on worklog5743_1(a1, a2(430));
+ERROR 42000: Row size too large. The maximum row size for the used table type, not counting BLOBs, is 1982. This includes storage overhead, check the manual. You have to change some columns to TEXT or BLOBs
+show warnings;
+Level	Code	Message
+Error	1118	Row size too large. The maximum row size for the used table type, not counting BLOBs, is 1982. This includes storage overhead, check the manual. You have to change some columns to TEXT or BLOBs
+create index idx6 on worklog5743_1(a1, a2(428));
+show warnings;
+Level	Code	Message
+set global innodb_large_prefix=1;
+Warnings:
+Warning	131	Using innodb_large_prefix is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+SET sql_mode= '';
+Warnings:
+Warning	3090	Changing sql mode 'NO_AUTO_CREATE_USER' is deprecated. It will be removed in a future release.
+create index idx1 on worklog5743_2(a2(4000));
+Warnings:
+Warning	1071	Specified key was too long; max key length is 768 bytes
+show warnings;
+Level	Code	Message
+Warning	1071	Specified key was too long; max key length is 768 bytes
+show create table worklog5743_2;
+Table	Create Table
+worklog5743_2	CREATE TABLE `worklog5743_2` (
+  `a1` int(11) DEFAULT NULL,
+  `a2` text NOT NULL,
+  KEY `idx1` (`a2`(768))
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 KEY_BLOCK_SIZE=2
+create index idx3 on worklog5743_2(a2(769));
+Warnings:
+Warning	1071	Specified key was too long; max key length is 768 bytes
+show warnings;
+Level	Code	Message
+Warning	1071	Specified key was too long; max key length is 768 bytes
+create index idx4 on worklog5743_2(a2(768));
+Warnings:
+Warning	1831	Duplicate index 'idx4' defined on the table 'test.worklog5743_2'. This is deprecated and will be disallowed in a future release.
+show warnings;
+Level	Code	Message
+Warning	1831	Duplicate index 'idx4' defined on the table 'test.worklog5743_2'. This is deprecated and will be disallowed in a future release.
+create index idx5 on worklog5743_2(a1, a2(765));
+ERROR 42000: Specified key was too long; max key length is 768 bytes
+show warnings;
+Level	Code	Message
+Error	1071	Specified key was too long; max key length is 768 bytes
+create index idx6 on worklog5743_2(a1, a2(764));
+show warnings;
+Level	Code	Message
+set global innodb_large_prefix=0;
+Warnings:
+Warning	131	Using innodb_large_prefix is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+create index idx1 on worklog5743_4(a2(4000));
+Warnings:
+Warning	1071	Specified key was too long; max key length is 767 bytes
+show warnings;
+Level	Code	Message
+Warning	1071	Specified key was too long; max key length is 767 bytes
+show create table worklog5743_4;
+Table	Create Table
+worklog5743_4	CREATE TABLE `worklog5743_4` (
+  `a1` int(11) DEFAULT NULL,
+  `a2` text NOT NULL,
+  KEY `idx1` (`a2`(767))
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 KEY_BLOCK_SIZE=4
+create index idx3 on worklog5743_4(a2(769));
+Warnings:
+Warning	1071	Specified key was too long; max key length is 767 bytes
+show warnings;
+Level	Code	Message
+Warning	1071	Specified key was too long; max key length is 767 bytes
+create index idx4 on worklog5743_4(a2(768));
+Warnings:
+Warning	1071	Specified key was too long; max key length is 767 bytes
+show warnings;
+Level	Code	Message
+Warning	1071	Specified key was too long; max key length is 767 bytes
+create index idx5 on worklog5743_4(a1, a2(765));
+ERROR 42000: Specified key was too long; max key length is 768 bytes
+show warnings;
+Level	Code	Message
+Error	1071	Specified key was too long; max key length is 768 bytes
+create index idx6 on worklog5743_4(a1, a2(764));
+show warnings;
+Level	Code	Message
+SET sql_mode= default;
+insert into worklog5743_1 values(9, repeat("a", 10000));
+insert into worklog5743_2 values(9, repeat("a", 10000));
+insert into worklog5743_4 values(9, repeat("a", 10000));
+set global innodb_large_prefix=0;
+Warnings:
+Warning	131	Using innodb_large_prefix is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+insert into worklog5743_1 values(2, repeat("b", 10000));
+insert into worklog5743_2 values(2, repeat("b", 10000));
+insert into worklog5743_4 values(2, repeat("b", 10000));
+set global innodb_large_prefix=1;
+Warnings:
+Warning	131	Using innodb_large_prefix is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+select a1, left(a2, 20) from worklog5743_1;
+a1	left(a2, 20)
+9	aaaaaaaaaaaaaaaaaaaa
+2	bbbbbbbbbbbbbbbbbbbb
+select a1, left(a2, 20) from worklog5743_2;
+a1	left(a2, 20)
+9	aaaaaaaaaaaaaaaaaaaa
+2	bbbbbbbbbbbbbbbbbbbb
+select a1, left(a2, 20) from worklog5743_4;
+a1	left(a2, 20)
+9	aaaaaaaaaaaaaaaaaaaa
+2	bbbbbbbbbbbbbbbbbbbb
+begin;
+update worklog5743_1 set a1 = 1000;
+update worklog5743_2 set a1 = 1000;
+update worklog5743_4 set a1 = 1000;
+select a1, left(a2, 20) from worklog5743_1;
+a1	left(a2, 20)
+1000	aaaaaaaaaaaaaaaaaaaa
+1000	bbbbbbbbbbbbbbbbbbbb
+select a1, left(a2, 20) from worklog5743_2;
+a1	left(a2, 20)
+1000	aaaaaaaaaaaaaaaaaaaa
+1000	bbbbbbbbbbbbbbbbbbbb
+select a1, left(a2, 20) from worklog5743_4;
+a1	left(a2, 20)
+1000	aaaaaaaaaaaaaaaaaaaa
+1000	bbbbbbbbbbbbbbbbbbbb
+select @@session.tx_isolation;
+@@session.tx_isolation
+REPEATABLE-READ
+explain select a1, left(a2, 20) from worklog5743_1 where a1 = 9;
+id	select_type	table	partitions	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	SIMPLE	worklog5743_1	NULL	ref	idx6	idx6	5	const	1	100.00	NULL
+Warnings:
+Note	1003	/* select#1 */ select `test`.`worklog5743_1`.`a1` AS `a1`,left(`test`.`worklog5743_1`.`a2`,20) AS `left(a2, 20)` from `test`.`worklog5743_1` where (`test`.`worklog5743_1`.`a1` = 9)
+explain select a1, left(a2, 20) from worklog5743_2 where a1 = 9;
+id	select_type	table	partitions	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	SIMPLE	worklog5743_2	NULL	ref	idx6	idx6	5	const	1	100.00	NULL
+Warnings:
+Note	1003	/* select#1 */ select `test`.`worklog5743_2`.`a1` AS `a1`,left(`test`.`worklog5743_2`.`a2`,20) AS `left(a2, 20)` from `test`.`worklog5743_2` where (`test`.`worklog5743_2`.`a1` = 9)
+explain select a1, left(a2, 20) from worklog5743_4 where a1 = 9;
+id	select_type	table	partitions	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	SIMPLE	worklog5743_4	NULL	ref	idx6	idx6	5	const	1	100.00	NULL
+Warnings:
+Note	1003	/* select#1 */ select `test`.`worklog5743_4`.`a1` AS `a1`,left(`test`.`worklog5743_4`.`a2`,20) AS `left(a2, 20)` from `test`.`worklog5743_4` where (`test`.`worklog5743_4`.`a1` = 9)
+select a1, left(a2, 20) from worklog5743_1 where a1 = 9;
+a1	left(a2, 20)
+9	aaaaaaaaaaaaaaaaaaaa
+select a1, left(a2, 20) from worklog5743_2 where a1 = 9;
+a1	left(a2, 20)
+9	aaaaaaaaaaaaaaaaaaaa
+select a1, left(a2, 20) from worklog5743_4 where a1 = 9;
+a1	left(a2, 20)
+9	aaaaaaaaaaaaaaaaaaaa
+SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
+select @@session.tx_isolation;
+@@session.tx_isolation
+READ-UNCOMMITTED
+select a1, left(a2, 20) from worklog5743_1 where a1 = 9;
+a1	left(a2, 20)
+select a1, left(a2, 20) from worklog5743_2 where a1 = 9;
+a1	left(a2, 20)
+select a1, left(a2, 20) from worklog5743_4 where a1 = 9;
+a1	left(a2, 20)
+rollback;
+drop table worklog5743_1;
+drop table worklog5743_2;
+drop table worklog5743_4;
+### Test 5 ###
+create table worklog5743(a1 int, a2 varchar(20000)) ROW_FORMAT=DYNAMIC;
+create index idx1 on worklog5743(a2);
+ERROR 42000: Specified key was too long; max key length is 3072 bytes
+show warnings;
+Level	Code	Message
+Error	1071	Specified key was too long; max key length is 3072 bytes
+drop table worklog5743;
+create table worklog5743(a1 int, a2 varchar(3072)) ROW_FORMAT=DYNAMIC;
+create index idx1 on worklog5743(a2);
+ERROR 42000: Specified key was too long; max key length is 768 bytes
+show warnings;
+Level	Code	Message
+Error	1071	Specified key was too long; max key length is 768 bytes
+drop table worklog5743;
+create table worklog5743(a1 int, a2 varchar(769)) ROW_FORMAT=DYNAMIC;
+create index idx1 on worklog5743(a2);
+ERROR 42000: Specified key was too long; max key length is 768 bytes
+show warnings;
+Level	Code	Message
+Error	1071	Specified key was too long; max key length is 768 bytes
+drop table worklog5743;
+create table worklog5743(a1 int, a2 varchar(768)) ROW_FORMAT=DYNAMIC;
+create index idx1 on worklog5743(a2);
+show warnings;
+Level	Code	Message
+insert into worklog5743 values(9, repeat("a", 768));
+update worklog5743 set a1 = 3333;
+drop table worklog5743;
+create table worklog5743(a1 int, a2 varchar(765)) ROW_FORMAT=DYNAMIC;
+create index idx1 on worklog5743(a1, a2);
+ERROR 42000: Specified key was too long; max key length is 768 bytes
+show warnings;
+Level	Code	Message
+Error	1071	Specified key was too long; max key length is 768 bytes
+drop table worklog5743;
+create table worklog5743(a1 int, a2 varchar(764)) ROW_FORMAT=DYNAMIC;
+create index idx1 on worklog5743(a1, a2);
+show warnings;
+Level	Code	Message
+insert into worklog5743 values(9, repeat("a", 764));
+begin;
+update worklog5743 set a1 = 4444;
+select @@session.tx_isolation;
+@@session.tx_isolation
+REPEATABLE-READ
+explain select a1 from worklog5743 where a1 = 9;
+id	select_type	table	partitions	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	SIMPLE	worklog5743	NULL	ref	idx1	idx1	5	const	1	100.00	Using index
+Warnings:
+Note	1003	/* select#1 */ select `test`.`worklog5743`.`a1` AS `a1` from `test`.`worklog5743` where (`test`.`worklog5743`.`a1` = 9)
+select a1 from worklog5743 where a1 = 9;
+a1
+9
+SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
+select @@session.tx_isolation;
+@@session.tx_isolation
+READ-UNCOMMITTED
+select a1 from worklog5743 where a1 = 9;
+a1
+rollback;
+drop table worklog5743;
+### Test 6 ###
+create table worklog5743(a TEXT not null, primary key (a(1000)));
+ERROR 42000: Specified key was too long; max key length is 768 bytes
+create table worklog5743(a TEXT) ROW_FORMAT=COMPACT;
+create index idx on worklog5743(a(768));
+ERROR HY000: Index column size too large. The maximum column size is 767 bytes.
+create index idx on worklog5743(a(767));
+insert into worklog5743 values(repeat("a", 20000));
+begin;
+insert into worklog5743 values(repeat("b", 20000));
+update worklog5743 set a = (repeat("x", 25000));
+select @@session.tx_isolation;
+@@session.tx_isolation
+REPEATABLE-READ
+select a = repeat("a", 20000) from worklog5743;
+a = repeat("a", 20000)
+1
+SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
+select @@session.tx_isolation;
+@@session.tx_isolation
+READ-UNCOMMITTED
+select a = repeat("x", 25000) from worklog5743;
+a = repeat("x", 25000)
+1
+1
+rollback;
+drop table worklog5743;
+### Test 7 ###
+create table worklog5743(a TEXT not null) ROW_FORMAT=DYNAMIC;
+SET sql_mode= '';
+Warnings:
+Warning	3090	Changing sql mode 'NO_AUTO_CREATE_USER' is deprecated. It will be removed in a future release.
+create index idx1 on worklog5743(a(769));
+Warnings:
+Warning	1071	Specified key was too long; max key length is 768 bytes
+show warnings;
+Level	Code	Message
+Warning	1071	Specified key was too long; max key length is 768 bytes
+SET sql_mode= default;
+create index idx2 on worklog5743(a(768));
+Warnings:
+Warning	1831	Duplicate index 'idx2' defined on the table 'test.worklog5743'. This is deprecated and will be disallowed in a future release.
+show warnings;
+Level	Code	Message
+Warning	1831	Duplicate index 'idx2' defined on the table 'test.worklog5743'. This is deprecated and will be disallowed in a future release.
+show create table worklog5743;
+Table	Create Table
+worklog5743	CREATE TABLE `worklog5743` (
+  `a` text NOT NULL,
+  KEY `idx1` (`a`(768)),
+  KEY `idx2` (`a`(768))
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC
+insert into worklog5743 values(repeat("a", 768));
+drop table worklog5743;
+create table worklog5743(a TEXT not null) ROW_FORMAT=REDUNDANT;
+create index idx on worklog5743(a(768));
+ERROR HY000: Index column size too large. The maximum column size is 767 bytes.
+create index idx2 on worklog5743(a(767));
+drop table worklog5743;
+create table worklog5743(a TEXT not null) ROW_FORMAT=COMPACT;
+create index idx on worklog5743(a(768));
+ERROR HY000: Index column size too large. The maximum column size is 767 bytes.
+create index idx2 on worklog5743(a(767));
+drop table worklog5743;
+SET GLOBAL innodb_file_per_table=1;
diff --git a/mysql-test/suite/innodb_zip/r/index_large_prefix_8k.result b/mysql-test/suite/innodb_zip/r/index_large_prefix_8k.result
new file mode 100644
index 00000000000..3176ffe2204
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/r/index_large_prefix_8k.result
@@ -0,0 +1,442 @@
+SET default_storage_engine=InnoDB;
+set global innodb_file_per_table=1;
+### Test 1 ###
+create table worklog5743(a TEXT not null, primary key (a(1000))) ROW_FORMAT=DYNAMIC;
+show warnings;
+Level	Code	Message
+insert into worklog5743 values(repeat("a", 20000));
+update worklog5743 set a = (repeat("b", 16000));
+SET sql_mode= '';
+Warnings:
+Warning	3090	Changing sql mode 'NO_AUTO_CREATE_USER' is deprecated. It will be removed in a future release.
+create index idx on worklog5743(a(2000));
+Warnings:
+Warning	1071	Specified key was too long; max key length is 1536 bytes
+show warnings;
+Level	Code	Message
+Warning	1071	Specified key was too long; max key length is 1536 bytes
+SET sql_mode= default;
+begin;
+update worklog5743 set a = (repeat("x", 17000));
+select @@session.tx_isolation;
+@@session.tx_isolation
+REPEATABLE-READ
+select a = repeat("x", 17000) from worklog5743;
+a = repeat("x", 17000)
+0
+select a = repeat("b", 16000) from worklog5743;
+a = repeat("b", 16000)
+1
+SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
+select @@session.tx_isolation;
+@@session.tx_isolation
+READ-UNCOMMITTED
+select a = repeat("x", 17000) from worklog5743;
+a = repeat("x", 17000)
+1
+rollback;
+drop table worklog5743;
+### Test 2 ###
+create table worklog5743(a1 int, a2 TEXT not null) ROW_FORMAT=DYNAMIC;
+show warnings;
+Level	Code	Message
+create index idx on worklog5743(a1, a2(1250));
+show warnings;
+Level	Code	Message
+insert into worklog5743 values(9, repeat("a", 10000));
+begin;
+update worklog5743 set a1 = 1000;
+select @@session.tx_isolation;
+@@session.tx_isolation
+REPEATABLE-READ
+explain select a1, a2 = repeat("a", 10000) from worklog5743 where a1 = 9;
+id	select_type	table	partitions	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	SIMPLE	worklog5743	NULL	ref	idx	idx	5	const	1	100.00	NULL
+Warnings:
+Note	1003	/* select#1 */ select `test`.`worklog5743`.`a1` AS `a1`,(`test`.`worklog5743`.`a2` = repeat('a',10000)) AS `a2 = repeat("a", 10000)` from `test`.`worklog5743` where (`test`.`worklog5743`.`a1` = 9)
+select a1, a2 = repeat("a", 10000) from worklog5743 where a1 = 9;
+a1	a2 = repeat("a", 10000)
+9	1
+SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
+select @@session.tx_isolation;
+@@session.tx_isolation
+READ-UNCOMMITTED
+select a1, a2 = repeat("a", 10000) from worklog5743 where a1 = 9;
+a1	a2 = repeat("a", 10000)
+rollback;
+drop table worklog5743;
+### Test 3 ###
+create table worklog5743(a1 int, a2 TEXT not null) ROW_FORMAT=DYNAMIC;
+create index idx on worklog5743(a1, a2(50));
+insert into worklog5743 values(9, repeat("a", 10000));
+begin;
+update worklog5743 set a1 = 1000;
+select @@session.tx_isolation;
+@@session.tx_isolation
+REPEATABLE-READ
+explain select a1, a2 = repeat("a", 10000) from worklog5743 where a1 = 9;
+id	select_type	table	partitions	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	SIMPLE	worklog5743	NULL	ref	idx	idx	5	const	1	100.00	NULL
+Warnings:
+Note	1003	/* select#1 */ select `test`.`worklog5743`.`a1` AS `a1`,(`test`.`worklog5743`.`a2` = repeat('a',10000)) AS `a2 = repeat("a", 10000)` from `test`.`worklog5743` where (`test`.`worklog5743`.`a1` = 9)
+select a1, a2 = repeat("a", 10000) from worklog5743 where a1 = 9;
+a1	a2 = repeat("a", 10000)
+9	1
+SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
+select @@session.tx_isolation;
+@@session.tx_isolation
+READ-UNCOMMITTED
+select a1, a2 = repeat("a", 10000) from worklog5743 where a1 = 9;
+a1	a2 = repeat("a", 10000)
+rollback;
+drop table worklog5743;
+### Test 4 ###
+create table worklog5743_1(a1 int, a2 TEXT not null) KEY_BLOCK_SIZE=1;
+create table worklog5743_2(a1 int, a2 TEXT not null) KEY_BLOCK_SIZE=2;
+create table worklog5743_4(a1 int, a2 TEXT not null) KEY_BLOCK_SIZE=4;
+create table worklog5743_8(a1 int, a2 TEXT, a3 TEXT) KEY_BLOCK_SIZE=8;
+set global innodb_large_prefix=0;
+Warnings:
+Warning	131	Using innodb_large_prefix is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+create index idx1 on worklog5743_1(a2(4000));
+ERROR 42000: Specified key was too long; max key length is 767 bytes
+show warnings;
+Level	Code	Message
+Error	1071	Specified key was too long; max key length is 767 bytes
+set global innodb_large_prefix=1;
+Warnings:
+Warning	131	Using innodb_large_prefix is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+create index idx2 on worklog5743_1(a2(4000));
+ERROR 42000: Specified key was too long; max key length is 1536 bytes
+show warnings;
+Level	Code	Message
+Error	1071	Specified key was too long; max key length is 1536 bytes
+create index idx3 on worklog5743_1(a2(436));
+ERROR 42000: Row size too large. The maximum row size for the used table type, not counting BLOBs, is 4030. This includes storage overhead, check the manual. You have to change some columns to TEXT or BLOBs
+show warnings;
+Level	Code	Message
+Error	1118	Row size too large. The maximum row size for the used table type, not counting BLOBs, is 4030. This includes storage overhead, check the manual. You have to change some columns to TEXT or BLOBs
+create index idx4 on worklog5743_1(a2(434));
+show warnings;
+Level	Code	Message
+create index idx5 on worklog5743_1(a1, a2(430));
+ERROR 42000: Row size too large. The maximum row size for the used table type, not counting BLOBs, is 4030. This includes storage overhead, check the manual. You have to change some columns to TEXT or BLOBs
+show warnings;
+Level	Code	Message
+Error	1118	Row size too large. The maximum row size for the used table type, not counting BLOBs, is 4030. This includes storage overhead, check the manual. You have to change some columns to TEXT or BLOBs
+create index idx6 on worklog5743_1(a1, a2(428));
+show warnings;
+Level	Code	Message
+set global innodb_large_prefix=0;
+Warnings:
+Warning	131	Using innodb_large_prefix is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+SET sql_mode= '';
+Warnings:
+Warning	3090	Changing sql mode 'NO_AUTO_CREATE_USER' is deprecated. It will be removed in a future release.
+create index idx1 on worklog5743_2(a2(4000));
+Warnings:
+Warning	1071	Specified key was too long; max key length is 767 bytes
+show warnings;
+Level	Code	Message
+Warning	1071	Specified key was too long; max key length is 767 bytes
+set global innodb_large_prefix=1;
+Warnings:
+Warning	131	Using innodb_large_prefix is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+create index idx2 on worklog5743_2(a2(4000));
+ERROR 42000: Row size too large. The maximum row size for the used table type, not counting BLOBs, is 4030. This includes storage overhead, check the manual. You have to change some columns to TEXT or BLOBs
+show warnings;
+Level	Code	Message
+Warning	1071	Specified key was too long; max key length is 1536 bytes
+Error	1118	Row size too large. The maximum row size for the used table type, not counting BLOBs, is 4030. This includes storage overhead, check the manual. You have to change some columns to TEXT or BLOBs
+create index idx3 on worklog5743_2(a2(948));
+ERROR 42000: Row size too large. The maximum row size for the used table type, not counting BLOBs, is 4030. This includes storage overhead, check the manual. You have to change some columns to TEXT or BLOBs
+show warnings;
+Level	Code	Message
+Error	1118	Row size too large. The maximum row size for the used table type, not counting BLOBs, is 4030. This includes storage overhead, check the manual. You have to change some columns to TEXT or BLOBs
+create index idx4 on worklog5743_2(a2(946));
+show warnings;
+Level	Code	Message
+create index idx5 on worklog5743_2(a1, a2(942));
+ERROR 42000: Row size too large. The maximum row size for the used table type, not counting BLOBs, is 4030. This includes storage overhead, check the manual. You have to change some columns to TEXT or BLOBs
+show warnings;
+Level	Code	Message
+Error	1118	Row size too large. The maximum row size for the used table type, not counting BLOBs, is 4030. This includes storage overhead, check the manual. You have to change some columns to TEXT or BLOBs
+create index idx6 on worklog5743_2(a1, a2(940));
+show warnings;
+Level	Code	Message
+set global innodb_large_prefix=0;
+Warnings:
+Warning	131	Using innodb_large_prefix is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+create index idx1 on worklog5743_4(a2(4000));
+Warnings:
+Warning	1071	Specified key was too long; max key length is 767 bytes
+show warnings;
+Level	Code	Message
+Warning	1071	Specified key was too long; max key length is 767 bytes
+set global innodb_large_prefix=1;
+Warnings:
+Warning	131	Using innodb_large_prefix is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+create index idx3 on worklog5743_4(a2(1537));
+Warnings:
+Warning	1071	Specified key was too long; max key length is 1536 bytes
+show warnings;
+Level	Code	Message
+Warning	1071	Specified key was too long; max key length is 1536 bytes
+create index idx4 on worklog5743_4(a2(1536));
+Warnings:
+Warning	1831	Duplicate index 'idx4' defined on the table 'test.worklog5743_4'. This is deprecated and will be disallowed in a future release.
+show warnings;
+Level	Code	Message
+Warning	1831	Duplicate index 'idx4' defined on the table 'test.worklog5743_4'. This is deprecated and will be disallowed in a future release.
+create index idx5 on worklog5743_4(a1, a2(1533));
+ERROR 42000: Specified key was too long; max key length is 1536 bytes
+show warnings;
+Level	Code	Message
+Error	1071	Specified key was too long; max key length is 1536 bytes
+create index idx6 on worklog5743_4(a1, a2(1532));
+show warnings;
+Level	Code	Message
+set global innodb_large_prefix=0;
+Warnings:
+Warning	131	Using innodb_large_prefix is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+create index idx1 on worklog5743_8(a2(1000));
+Warnings:
+Warning	1071	Specified key was too long; max key length is 767 bytes
+show warnings;
+Level	Code	Message
+Warning	1071	Specified key was too long; max key length is 767 bytes
+set global innodb_large_prefix=1;
+Warnings:
+Warning	131	Using innodb_large_prefix is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+create index idx2 on worklog5743_8(a2(3073));
+Warnings:
+Warning	1071	Specified key was too long; max key length is 1536 bytes
+show warnings;
+Level	Code	Message
+Warning	1071	Specified key was too long; max key length is 1536 bytes
+create index idx3 on worklog5743_8(a2(3072));
+Warnings:
+Warning	1071	Specified key was too long; max key length is 1536 bytes
+show warnings;
+Level	Code	Message
+Warning	1071	Specified key was too long; max key length is 1536 bytes
+create index idx4 on worklog5743_8(a1, a2(1533));
+ERROR 42000: Specified key was too long; max key length is 1536 bytes
+show warnings;
+Level	Code	Message
+Error	1071	Specified key was too long; max key length is 1536 bytes
+create index idx5 on worklog5743_8(a1, a2(1532));
+show warnings;
+Level	Code	Message
+SET sql_mode= default;
+insert into worklog5743_1 values(9, repeat("a", 10000));
+insert into worklog5743_2 values(9, repeat("a", 10000));
+insert into worklog5743_4 values(9, repeat("a", 10000));
+insert into worklog5743_8 values(9, repeat("a", 10000), repeat("a", 10000));
+set global innodb_large_prefix=0;
+Warnings:
+Warning	131	Using innodb_large_prefix is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+insert into worklog5743_1 values(2, repeat("b", 10000));
+insert into worklog5743_2 values(2, repeat("b", 10000));
+insert into worklog5743_4 values(2, repeat("b", 10000));
+insert into worklog5743_8 values(2, repeat("b", 10000), repeat("b", 10000));
+set global innodb_large_prefix=1;
+Warnings:
+Warning	131	Using innodb_large_prefix is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+select a1, left(a2, 20) from worklog5743_1;
+a1	left(a2, 20)
+9	aaaaaaaaaaaaaaaaaaaa
+2	bbbbbbbbbbbbbbbbbbbb
+select a1, left(a2, 20) from worklog5743_2;
+a1	left(a2, 20)
+9	aaaaaaaaaaaaaaaaaaaa
+2	bbbbbbbbbbbbbbbbbbbb
+select a1, left(a2, 20) from worklog5743_4;
+a1	left(a2, 20)
+9	aaaaaaaaaaaaaaaaaaaa
+2	bbbbbbbbbbbbbbbbbbbb
+select a1, left(a2, 20) from worklog5743_8;
+a1	left(a2, 20)
+9	aaaaaaaaaaaaaaaaaaaa
+2	bbbbbbbbbbbbbbbbbbbb
+begin;
+update worklog5743_1 set a1 = 1000;
+update worklog5743_2 set a1 = 1000;
+update worklog5743_4 set a1 = 1000;
+update worklog5743_8 set a1 = 1000;
+select a1, left(a2, 20) from worklog5743_1;
+a1	left(a2, 20)
+1000	aaaaaaaaaaaaaaaaaaaa
+1000	bbbbbbbbbbbbbbbbbbbb
+select a1, left(a2, 20) from worklog5743_2;
+a1	left(a2, 20)
+1000	aaaaaaaaaaaaaaaaaaaa
+1000	bbbbbbbbbbbbbbbbbbbb
+select a1, left(a2, 20) from worklog5743_4;
+a1	left(a2, 20)
+1000	aaaaaaaaaaaaaaaaaaaa
+1000	bbbbbbbbbbbbbbbbbbbb
+select a1, left(a2, 20) from worklog5743_8;
+a1	left(a2, 20)
+1000	aaaaaaaaaaaaaaaaaaaa
+1000	bbbbbbbbbbbbbbbbbbbb
+select @@session.tx_isolation;
+@@session.tx_isolation
+REPEATABLE-READ
+explain select a1, left(a2, 20) from worklog5743_1 where a1 = 9;
+id	select_type	table	partitions	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	SIMPLE	worklog5743_1	NULL	ref	idx6	idx6	5	const	1	100.00	NULL
+Warnings:
+Note	1003	/* select#1 */ select `test`.`worklog5743_1`.`a1` AS `a1`,left(`test`.`worklog5743_1`.`a2`,20) AS `left(a2, 20)` from `test`.`worklog5743_1` where (`test`.`worklog5743_1`.`a1` = 9)
+explain select a1, left(a2, 20) from worklog5743_2 where a1 = 9;
+id	select_type	table	partitions	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	SIMPLE	worklog5743_2	NULL	ref	idx6	idx6	5	const	1	100.00	NULL
+Warnings:
+Note	1003	/* select#1 */ select `test`.`worklog5743_2`.`a1` AS `a1`,left(`test`.`worklog5743_2`.`a2`,20) AS `left(a2, 20)` from `test`.`worklog5743_2` where (`test`.`worklog5743_2`.`a1` = 9)
+explain select a1, left(a2, 20) from worklog5743_4 where a1 = 9;
+id	select_type	table	partitions	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	SIMPLE	worklog5743_4	NULL	ref	idx6	idx6	5	const	1	100.00	NULL
+Warnings:
+Note	1003	/* select#1 */ select `test`.`worklog5743_4`.`a1` AS `a1`,left(`test`.`worklog5743_4`.`a2`,20) AS `left(a2, 20)` from `test`.`worklog5743_4` where (`test`.`worklog5743_4`.`a1` = 9)
+explain select a1, left(a2, 20) from worklog5743_8 where a1 = 9;
+id	select_type	table	partitions	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	SIMPLE	worklog5743_8	NULL	ref	idx5	idx5	5	const	1	100.00	NULL
+Warnings:
+Note	1003	/* select#1 */ select `test`.`worklog5743_8`.`a1` AS `a1`,left(`test`.`worklog5743_8`.`a2`,20) AS `left(a2, 20)` from `test`.`worklog5743_8` where (`test`.`worklog5743_8`.`a1` = 9)
+select a1, left(a2, 20) from worklog5743_1 where a1 = 9;
+a1	left(a2, 20)
+9	aaaaaaaaaaaaaaaaaaaa
+select a1, left(a2, 20) from worklog5743_2 where a1 = 9;
+a1	left(a2, 20)
+9	aaaaaaaaaaaaaaaaaaaa
+select a1, left(a2, 20) from worklog5743_4 where a1 = 9;
+a1	left(a2, 20)
+9	aaaaaaaaaaaaaaaaaaaa
+select a1, left(a2, 20) from worklog5743_8 where a1 = 9;
+a1	left(a2, 20)
+9	aaaaaaaaaaaaaaaaaaaa
+SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
+select @@session.tx_isolation;
+@@session.tx_isolation
+READ-UNCOMMITTED
+select a1, left(a2, 20) from worklog5743_1 where a1 = 9;
+a1	left(a2, 20)
+select a1, left(a2, 20) from worklog5743_2 where a1 = 9;
+a1	left(a2, 20)
+select a1, left(a2, 20) from worklog5743_4 where a1 = 9;
+a1	left(a2, 20)
+select a1, left(a2, 20) from worklog5743_8 where a1 = 9;
+a1	left(a2, 20)
+rollback;
+drop table worklog5743_1;
+drop table worklog5743_2;
+drop table worklog5743_4;
+drop table worklog5743_8;
+### Test 5 ###
+create table worklog5743(a1 int, a2 varchar(20000)) ROW_FORMAT=DYNAMIC;
+create index idx1 on worklog5743(a2);
+ERROR 42000: Specified key was too long; max key length is 3072 bytes
+drop table worklog5743;
+create table worklog5743(a1 int, a2 varchar(1537)) ROW_FORMAT=DYNAMIC;
+create index idx1 on worklog5743(a2);
+ERROR 42000: Specified key was too long; max key length is 1536 bytes
+drop table worklog5743;
+create table worklog5743(a1 int, a2 varchar(1536)) ROW_FORMAT=DYNAMIC;
+create index idx1 on worklog5743(a2);
+show warnings;
+Level	Code	Message
+insert into worklog5743 values(9, repeat("a", 1536));
+update worklog5743 set a1 = 1000;
+drop table worklog5743;
+create table worklog5743(a1 int, a2 varchar(1533)) ROW_FORMAT=DYNAMIC;
+create index idx1 on worklog5743(a1, a2);
+ERROR 42000: Specified key was too long; max key length is 1536 bytes
+show warnings;
+Level	Code	Message
+Error	1071	Specified key was too long; max key length is 1536 bytes
+drop table worklog5743;
+create table worklog5743(a1 int, a2 varchar(1532)) ROW_FORMAT=DYNAMIC;
+create index idx1 on worklog5743(a1, a2);
+show warnings;
+Level	Code	Message
+insert into worklog5743 values(9, repeat("a", 1532));
+update worklog5743 set a1 = 1000;
+begin;
+update worklog5743 set a1 = 1000;
+select @@session.tx_isolation;
+@@session.tx_isolation
+REPEATABLE-READ
+explain select a1 from worklog5743 where a1 = 9;
+id	select_type	table	partitions	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	SIMPLE	worklog5743	NULL	ref	idx1	idx1	5	const	1	100.00	Using index
+Warnings:
+Note	1003	/* select#1 */ select `test`.`worklog5743`.`a1` AS `a1` from `test`.`worklog5743` where (`test`.`worklog5743`.`a1` = 9)
+select a1 from worklog5743 where a1 = 9;
+a1
+SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
+select @@session.tx_isolation;
+@@session.tx_isolation
+READ-UNCOMMITTED
+select a1 from worklog5743 where a1 = 9;
+a1
+rollback;
+drop table worklog5743;
+### Test 6 ###
+create table worklog5743(a TEXT not null, primary key (a(1000)))
+row_format=compact;
+ERROR HY000: Index column size too large. The maximum column size is 767 bytes.
+create table worklog5743(a TEXT) row_format=compact;
+create index idx on worklog5743(a(768));
+ERROR HY000: Index column size too large. The maximum column size is 767 bytes.
+create index idx on worklog5743(a(767));
+insert into worklog5743 values(repeat("a", 20000));
+begin;
+insert into worklog5743 values(repeat("b", 20000));
+update worklog5743 set a = (repeat("x", 25000));
+select @@session.tx_isolation;
+@@session.tx_isolation
+REPEATABLE-READ
+select a = repeat("a", 20000) from worklog5743;
+a = repeat("a", 20000)
+1
+SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
+select @@session.tx_isolation;
+@@session.tx_isolation
+READ-UNCOMMITTED
+select a = repeat("x", 25000) from worklog5743;
+a = repeat("x", 25000)
+1
+1
+rollback;
+drop table worklog5743;
+### Test 7 ###
+create table worklog5743(a TEXT not null) ROW_FORMAT=DYNAMIC;
+SET sql_mode= '';
+Warnings:
+Warning	3090	Changing sql mode 'NO_AUTO_CREATE_USER' is deprecated. It will be removed in a future release.
+create index idx1 on worklog5743(a(3073));
+Warnings:
+Warning	1071	Specified key was too long; max key length is 1536 bytes
+create index idx2 on worklog5743(a(3072));
+Warnings:
+Warning	1071	Specified key was too long; max key length is 1536 bytes
+SET sql_mode= default;
+show create table worklog5743;
+Table	Create Table
+worklog5743	CREATE TABLE `worklog5743` (
+  `a` text NOT NULL,
+  KEY `idx1` (`a`(1536)),
+  KEY `idx2` (`a`(1536))
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC
+drop table worklog5743;
+create table worklog5743(a TEXT not null) ROW_FORMAT=REDUNDANT;
+create index idx on worklog5743(a(768));
+ERROR HY000: Index column size too large. The maximum column size is 767 bytes.
+create index idx2 on worklog5743(a(767));
+drop table worklog5743;
+create table worklog5743(a TEXT not null) ROW_FORMAT=COMPACT;
+create index idx on worklog5743(a(768));
+ERROR HY000: Index column size too large. The maximum column size is 767 bytes.
+create index idx2 on worklog5743(a(767));
+drop table worklog5743;
+SET GLOBAL innodb_file_per_table=1;
diff --git a/mysql-test/suite/innodb_zip/r/innochecksum.result b/mysql-test/suite/innodb_zip/r/innochecksum.result
new file mode 100644
index 00000000000..694de4d9c83
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/r/innochecksum.result
@@ -0,0 +1,82 @@
+# Set the environmental variables
+call mtr.add_suppression("InnoDB: Unable to read tablespace .* page no .* into the buffer pool after 100 attempts");
+call mtr.add_suppression("InnoDB: Warning: database page corruption or a failed");
+SET GLOBAL innodb_file_per_table=on;
+CREATE TABLE tab1(c1 INT PRIMARY KEY,c2 VARCHAR(20)) ENGINE=InnoDB;
+CREATE INDEX idx1 ON tab1(c2(10));
+INSERT INTO tab1 VALUES(1, 'Innochecksum InnoDB1');
+CREATE TABLE t1(id INT AUTO_INCREMENT PRIMARY KEY, msg VARCHAR(255)) ENGINE=INNODB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4;
+insert into t1 values(1,"i");
+insert into t1 values(2,"am");
+insert into t1 values(3,"compressed table");
+# Shutdown the Server
+# Server Default checksum = innodb
+[1a]: check the innochecksum when file doesn't exists
+[1b]: check the innochecksum without --strict-check
+[2]: check the innochecksum with full form --strict-check=crc32
+[3]: check the innochecksum with short form -C crc32
+[4]: check the innochecksum with --no-check ignores algorithm check, warning is expected
+[5]: check the innochecksum with short form --no-check ignores algorithm check, warning is expected
+[6]: check the innochecksum with full form strict-check & no-check , an error is expected
+[7]: check the innochecksum with short form strict-check & no-check , an error is expected
+[8]: check the innochecksum with short & full form combination
+# strict-check & no-check, an error is expected
+[9]: check the innochecksum with full form --strict-check=innodb
+[10]: check the innochecksum with full form --strict-check=none
+# when server Default checksum=crc32
+[11]: check the innochecksum with short form -C innodb
+# when server Default checksum=crc32
+[12]: check the innochecksum with short form -C none
+# when server Default checksum=crc32
+[13]: check strict-check with invalid values
+[14a]: when server default checksum=crc32 rewrite new checksum=crc32 with innochecksum
+# Also check the long form of write option.
+[14b]: when server default checksum=crc32 rewrite new checksum=innodb with innochecksum
+# Also check the long form of write option.
+# start the server with innodb_checksum_algorithm=InnoDB
+# restart
+INSERT INTO tab1 VALUES(2, 'Innochecksum CRC32');
+SELECT c1,c2 FROM tab1 order by c1,c2;
+c1	c2
+1	Innochecksum InnoDB1
+2	Innochecksum CRC32
+# Stop the server
+[15]: when server default checksum=crc32 rewrite new checksum=none with innochecksum
+# Also check the short form of write option.
+# Start the server with checksum algorithm=none
+# restart
+INSERT INTO tab1 VALUES(3, 'Innochecksum None');
+SELECT c1,c2 FROM tab1 order by c1,c2;
+c1	c2
+1	Innochecksum InnoDB1
+2	Innochecksum CRC32
+3	Innochecksum None
+DROP TABLE t1;
+# Stop the server
+[16]: rewrite into new checksum=crc32 with innochecksum
+# Restart the DB server with  innodb_checksum_algorithm=crc32
+# restart
+SELECT * FROM tab1;
+c1	c2
+1	Innochecksum InnoDB1
+2	Innochecksum CRC32
+3	Innochecksum None
+DELETE FROM tab1 where c1=3;
+SELECT c1,c2 FROM tab1 order by c1,c2;
+c1	c2
+1	Innochecksum InnoDB1
+2	Innochecksum CRC32
+# Stop server
+[17]: rewrite into new checksum=InnoDB
+# Restart the DB server with innodb_checksum_algorithm=InnoDB
+# restart
+DELETE FROM tab1 where c1=2;
+SELECT * FROM tab1;
+c1	c2
+1	Innochecksum InnoDB1
+# Stop server
+[18]:check Innochecksum with invalid write options
+# Restart the server
+# restart
+DROP TABLE tab1;
+SET GLOBAL innodb_file_per_table=default;
diff --git a/mysql-test/suite/innodb_zip/r/innochecksum_2.result b/mysql-test/suite/innodb_zip/r/innochecksum_2.result
new file mode 100644
index 00000000000..0b6adaa33a2
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/r/innochecksum_2.result
@@ -0,0 +1,140 @@
+SET GLOBAL innodb_compression_level=0;
+SELECT @@innodb_compression_level;
+@@innodb_compression_level
+0
+CREATE TABLE t1 (j LONGBLOB) ENGINE = InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1;
+INSERT INTO t1 VALUES (repeat('abcdefghijklmnopqrstuvwxyz',200));
+INSERT INTO t1 SELECT * from t1;
+INSERT INTO t1 SELECT * from t1;
+INSERT INTO t1 SELECT * from t1;
+INSERT INTO t1 SELECT * from t1;
+INSERT INTO t1 SELECT * from t1;
+INSERT INTO t1 SELECT * from t1;
+INSERT INTO t1 SELECT * from t1;
+INSERT INTO t1 SELECT * from t1;
+INSERT INTO t1 SELECT * from t1;
+INSERT INTO t1 SELECT * from t1;
+# stop the server
+[1]:# check the both short and long options for "help"
+[2]:# Run the innochecksum when file isn't provided.
+# It will print the innochecksum usage similar to --help option.
+innochecksum Ver #.#.#
+Copyright (c) YEAR, YEAR , Oracle and/or its affiliates. All rights reserved.
+
+Oracle is a registered trademark of Oracle Corporation and/or its
+affiliates. Other names may be trademarks of their respective
+owners.
+
+InnoDB offline file checksum utility.
+Usage: innochecksum [-c] [-s <start page>] [-e <end page>] [-p <page>] [-v]  [-a <allow mismatches>] [-n] [-C <strict-check>] [-w <write>] [-S] [-D <page type dump>] [-l <log>] <filename or [-]>
+  -?, --help          Displays this help and exits.
+  -I, --info          Synonym for --help.
+  -V, --version       Displays version information and exits.
+  -v, --verbose       Verbose (prints progress every 5 seconds).
+  -c, --count         Print the count of pages in the file and exits.
+  -s, --start-page=#  Start on this page number (0 based).
+  -e, --end-page=#    End at this page number (0 based).
+  -p, --page=#        Check only this page (0 based).
+  -C, --strict-check=name 
+                      Specify the strict checksum algorithm by the user.
+  -n, --no-check      Ignore the checksum verification.
+  -a, --allow-mismatches=# 
+                      Maximum checksum mismatch allowed.
+  -w, --write=name    Rewrite the checksum algorithm by the user.
+  -S, --page-type-summary 
+                      Display a count of each page type in a tablespace.
+  -D, --page-type-dump=name 
+                      Dump the page type info for each page in a tablespace.
+  -l, --log=name      log output.
+
+Variables (--variable-name=value)
+and boolean options {FALSE|TRUE}  Value (after reading options)
+--------------------------------- ----------------------------------------
+verbose                           FALSE
+count                             FALSE
+start-page                        0
+end-page                          0
+page                              0
+strict-check                      crc32
+no-check                          FALSE
+allow-mismatches                  0
+write                             crc32
+page-type-summary                 FALSE
+page-type-dump                    (No default value)
+log                               (No default value)
+[3]:# check the both short and long options for "count" and exit
+Number of pages:#
+Number of pages:#
+[4]:# Print the version of innochecksum and exit
+innochecksum Ver #.#.## Restart the DB server
+# restart
+DROP TABLE t1;
+[5]:# Check the innochecksum for compressed table t1 with different key_block_size
+# Test for KEY_BLOCK_SIZE=1
+===> Testing  size=1
+CREATE TABLE t1(id INT AUTO_INCREMENT PRIMARY KEY, msg VARCHAR(255)) ENGINE=INNODB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1;
+insert into t1 values(1,"I");
+insert into t1 values(2,"AM");
+insert into t1 values(3,"COMPRESSED");
+# restart
+select * from t1;
+id	msg
+1	I
+2	AM
+3	COMPRESSED
+drop table t1;
+# Test for KEY_BLOCK_SIZE=2
+===> Testing  size=2
+CREATE TABLE t1(id INT AUTO_INCREMENT PRIMARY KEY, msg VARCHAR(255)) ENGINE=INNODB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=2;
+insert into t1 values(1,"I");
+insert into t1 values(2,"AM");
+insert into t1 values(3,"COMPRESSED");
+# restart
+select * from t1;
+id	msg
+1	I
+2	AM
+3	COMPRESSED
+drop table t1;
+# Test for for KEY_BLOCK_SIZE=4
+===> Testing  size=4
+CREATE TABLE t1(id INT AUTO_INCREMENT PRIMARY KEY, msg VARCHAR(255)) ENGINE=INNODB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4;
+insert into t1 values(1,"I");
+insert into t1 values(2,"AM");
+insert into t1 values(3,"COMPRESSED");
+# restart
+select * from t1;
+id	msg
+1	I
+2	AM
+3	COMPRESSED
+drop table t1;
+set innodb_strict_mode=off;
+# Test for for KEY_BLOCK_SIZE=8
+===> Testing  size=8
+CREATE TABLE t1(id INT AUTO_INCREMENT PRIMARY KEY, msg VARCHAR(255)) ENGINE=INNODB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8;
+insert into t1 values(1,"I");
+insert into t1 values(2,"AM");
+insert into t1 values(3,"COMPRESSED");
+# restart
+select * from t1;
+id	msg
+1	I
+2	AM
+3	COMPRESSED
+drop table t1;
+set innodb_strict_mode=off;
+# Test for KEY_BLOCK_SIZE=16
+===> Testing  size=16
+CREATE TABLE t1(id INT AUTO_INCREMENT PRIMARY KEY, msg VARCHAR(255)) ENGINE=INNODB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=16;
+insert into t1 values(1,"I");
+insert into t1 values(2,"AM");
+insert into t1 values(3,"COMPRESSED");
+# restart
+select * from t1;
+id	msg
+1	I
+2	AM
+3	COMPRESSED
+drop table t1;
+# Test[5] completed
diff --git a/mysql-test/suite/innodb_zip/r/innochecksum_3.result b/mysql-test/suite/innodb_zip/r/innochecksum_3.result
new file mode 100644
index 00000000000..85058c41e04
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/r/innochecksum_3.result
@@ -0,0 +1,184 @@
+# Set the environmental variables
+call mtr.add_suppression("InnoDB: Unable to read tablespace .* page no .* into the buffer pool after 100 attempts");
+SET GLOBAL innodb_file_per_table=on;
+[1]: Further Test are for rewrite checksum (innodb|crc32|none) for all ibd file & start the server.
+CREATE TABLE tab1 (pk INTEGER NOT NULL PRIMARY KEY,
+linestring_key GEOMETRY NOT NULL,
+linestring_nokey GEOMETRY NOT NULL)
+ENGINE=InnoDB ;
+INSERT INTO tab1 (pk, linestring_key, linestring_nokey)
+VALUES (1, ST_GeomFromText('POINT(10 10) '), ST_GeomFromText('POINT(10 10) '));
+CREATE INDEX linestring_index ON tab1(linestring_nokey(5));
+ALTER TABLE tab1 ADD  KEY (linestring_key(5));
+# create a compressed table
+CREATE TABLE tab2(col_1 CHAR (255) ,
+col_2 VARCHAR (255), col_3 longtext,
+col_4 longtext,col_5 longtext,
+col_6 longtext , col_7 int )
+engine = innodb row_format=compressed key_block_size=4;
+CREATE INDEX idx1 ON tab2(col_3(10));
+CREATE INDEX idx2 ON tab2(col_4(10));
+CREATE INDEX idx3 ON tab2(col_5(10));
+SET @col_1 = repeat('a', 5);
+SET @col_2 = repeat('b', 20);
+SET @col_3 = repeat('c', 100);
+SET @col_4 = repeat('d', 100);
+SET @col_5 = repeat('e', 100);
+SET @col_6 = repeat('f', 100);
+INSERT INTO tab2(col_1,col_2,col_3,col_4,col_5,col_6,col_7)
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,5);
+INSERT INTO tab2(col_1,col_2,col_3,col_4,col_5,col_6,col_7)
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,4);
+INSERT INTO tab2(col_1,col_2,col_3,col_4,col_5,col_6,col_7)
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,3);
+INSERT INTO tab2(col_1,col_2,col_3,col_4,col_5,col_6,col_7)
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,2);
+INSERT INTO tab2(col_1,col_2,col_3,col_4,col_5,col_6,col_7)
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,1);
+SELECT * FROM tab2 ORDER BY col_7;
+# stop the server
+[1(a)]: Rewrite into new checksum=InnoDB for all *.ibd file and ibdata1
+: start the server with innodb_checksum_algorithm=strict_innodb
+# restart
+INSERT INTO tab1 (pk, linestring_key, linestring_nokey)
+VALUES (2, ST_GeomFromText('LINESTRING(10 10,20 20,30 30)'), ST_GeomFromText('LINESTRING(10 10,20 20,30 30)'));
+SET @col_1 = repeat('a', 5);
+SET @col_2 = repeat('b', 20);
+SET @col_3 = repeat('c', 100);
+SET @col_4 = repeat('d', 100);
+SET @col_5 = repeat('e', 100);
+SET @col_6 = repeat('f', 100);
+INSERT INTO tab2(col_1,col_2,col_3,col_4,col_5,col_6,col_7)
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,6);
+SELECT pk,ST_AsText(linestring_key),ST_AsText(linestring_nokey)
+FROM tab1 ORDER BY pk;
+SELECT * FROM tab2 ORDER BY col_7;
+# stop the server
+[1(b)]: Rewrite into new checksum=crc32 for all *.ibd file and ibdata1
+# start the server with innodb_checksum_algorithm=strict_crc32
+# restart
+INSERT INTO tab1 (pk, linestring_key, linestring_nokey)
+VALUES (3, ST_GeomFromText('POLYGON((0 0,5 5,10 10,15 15,0 0),(10 10,20 20,30 30,40 40,10 10))'),
+ST_GeomFromText('POLYGON((0 0,5 5,10 10,15 15,0 0),(10 10,20 20,30 30,40 40,10 10))'));
+SET @col_1 = repeat('g', 5);
+SET @col_2 = repeat('h', 20);
+SET @col_3 = repeat('i', 100);
+SET @col_4 = repeat('j', 100);
+SET @col_5 = repeat('k', 100);
+SET @col_6 = repeat('l', 100);
+INSERT INTO tab2(col_1,col_2,col_3,col_4,col_5,col_6,col_7)
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,7);
+SELECT pk,ST_AsText(linestring_key),ST_AsText(linestring_nokey)
+FROM tab1 ORDER BY pk;
+SELECT * FROM tab2 ORDER BY col_7;
+# stop the server
+[1(c)]: Rewrite into new checksum=none for all *.ibd file and ibdata1
+# restart: --innodb_checksum_algorithm=strict_none --default_storage_engine=InnoDB
+INSERT INTO tab1 (pk, linestring_key, linestring_nokey)
+VALUES (4, ST_GeomFromText('MULTIPOINT(0 0,5 5,10 10,20 20) '), ST_GeomFromText('MULTIPOINT(0 0,5 5,10 10,20 20) '));
+SET @col_1 = repeat('m', 5);
+SET @col_2 = repeat('n', 20);
+SET @col_3 = repeat('o', 100);
+SET @col_4 = repeat('p', 100);
+SET @col_5 = repeat('q', 100);
+SET @col_6 = repeat('r', 100);
+INSERT INTO tab2(col_1,col_2,col_3,col_4,col_5,col_6,col_7)
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,8);
+SELECT pk,ST_AsText(linestring_key),ST_AsText(linestring_nokey)
+FROM tab1 ORDER BY pk;
+SELECT * FROM tab2 ORDER BY col_7;
+# stop the server
+[2]: Check the page type summary with shortform for tab1.ibd
+
+File::tab#.ibd
+================PAGE TYPE SUMMARY==============
+#PAGE_COUNT	PAGE_TYPE
+===============================================
+       #	Index page
+       #	Undo log page
+       #	Inode page
+       #	Insert buffer free list page
+       #	Freshly allocated page
+       #	Insert buffer bitmap
+       #	System page
+       #	Transaction system page
+       #	File Space Header
+       #	Extent descriptor page
+       #	BLOB page
+       #	Compressed BLOB page
+       #	Other type of page
+===============================================
+Additional information:
+Undo page type: # insert, # update, # other
+Undo page state: # active, # cached, # to_free, # to_purge, # prepared, # other
+[3]: Check the page type summary with longform for tab1.ibd
+
+File::tab#.ibd
+================PAGE TYPE SUMMARY==============
+#PAGE_COUNT	PAGE_TYPE
+===============================================
+       #	Index page
+       #	Undo log page
+       #	Inode page
+       #	Insert buffer free list page
+       #	Freshly allocated page
+       #	Insert buffer bitmap
+       #	System page
+       #	Transaction system page
+       #	File Space Header
+       #	Extent descriptor page
+       #	BLOB page
+       #	Compressed BLOB page
+       #	Other type of page
+===============================================
+Additional information:
+Undo page type: # insert, # update, # other
+Undo page state: # active, # cached, # to_free, # to_purge, # prepared, # other
+[4]: Page type dump for  with longform for tab1.ibd
+# Print the contents stored in dump.txt
+
+
+Filename::tab#.ibd
+==============================================================================
+	PAGE_NO		|		PAGE_TYPE			|	EXTRA INFO
+==============================================================================
+#::       #		|		File Space Header		|	-
+#::       #		|		Insert Buffer Bitmap		|	-
+#::       #		|		Inode page			|	-
+#::       #		|		Index page			|	index id=#, page level=#, No. of records=#, garbage=#, -
+#::       #		|		Index page			|	index id=#, page level=#, No. of records=#, garbage=#, -
+#::       #		|		Index page			|	index id=#, page level=#, No. of records=#, garbage=#, -
+#::       #		|		Index page			|	index id=#, page level=#, No. of records=#, garbage=#, -
+#::       #		|		Freshly allocated page		|	-
+[5]: Page type dump for with shortform for tab1.ibd
+
+
+Filename::tab#.ibd
+==============================================================================
+	PAGE_NO		|		PAGE_TYPE			|	EXTRA INFO
+==============================================================================
+#::       #		|		File Space Header		|	-
+#::       #		|		Insert Buffer Bitmap		|	-
+#::       #		|		Inode page			|	-
+#::       #		|		Index page			|	index id=#, page level=#, No. of records=#, garbage=#, -
+#::       #		|		Index page			|	index id=#, page level=#, No. of records=#, garbage=#, -
+#::       #		|		Index page			|	index id=#, page level=#, No. of records=#, garbage=#, -
+#::       #		|		Index page			|	index id=#, page level=#, No. of records=#, garbage=#, -
+#::       #		|		Freshly allocated page		|	-
+[6]: check the valid lower bound values for option
+# allow-mismatches,page,start-page,end-page
+[7]: check the negative values for option
+# allow-mismatches,page,start-page,end-page.
+# They will reset to zero for negative values.
+# check the invalid lower bound values
+[8]: check the valid upper bound values for
+# both short and long options "allow-mismatches" and "end-page"
+[9]: check the both short and long options "page" and "start-page" when
+# seek value is larger than file size.
+[34]: check the invalid upper bound values for options, allow-mismatches, end-page, start-page and page.
+# innochecksum will fail with error code: 1
+# Restart the server
+# restart: --innodb_checksum_algorithm=strict_none --default_storage_engine=InnoDB
+DROP TABLE tab1;
+DROP TABLE tab2;
+SET GLOBAL innodb_file_per_table=default;
diff --git a/mysql-test/suite/innodb_zip/r/innodb-create-options.result b/mysql-test/suite/innodb_zip/r/innodb-create-options.result
index fe3d799229d..1b92eb71fba 100644
--- a/mysql-test/suite/innodb_zip/r/innodb-create-options.result
+++ b/mysql-test/suite/innodb_zip/r/innodb-create-options.result
@@ -1,5 +1,7 @@
 SET default_storage_engine=InnoDB;
 SET GLOBAL innodb_file_format=`Barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SET GLOBAL innodb_file_per_table=ON;
 SET SESSION innodb_strict_mode = ON;
 # Test 1) StrictMode=ON, CREATE and ALTER with each ROW_FORMAT & KEY_BLOCK_SIZE=0
@@ -45,7 +47,7 @@ SHOW WARNINGS;
 Level	Code	Message
 SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
 TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
-t1	Compact	
+t1	Dynamic	
 ALTER TABLE t1 ROW_FORMAT=FIXED KEY_BLOCK_SIZE=0;
 ERROR HY000: Table storage engine 'InnoDB' does not support the create option 'ROW_TYPE'
 SHOW WARNINGS;
@@ -54,7 +56,7 @@ Warning	1478	InnoDB: invalid ROW_FORMAT specifier.
 Error	1478	Table storage engine 'InnoDB' does not support the create option 'ROW_TYPE'
 SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
 TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
-t1	Compact	
+t1	Dynamic	
 # Test 2) StrictMode=ON, CREATE with each ROW_FORMAT & a valid non-zero KEY_BLOCK_SIZE
 #         KEY_BLOCK_SIZE is incompatible with COMPACT, REDUNDANT, & DYNAMIC
 DROP TABLE IF EXISTS t1;
@@ -244,7 +246,7 @@ SHOW WARNINGS;
 Level	Code	Message
 SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
 TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
-t1	Compact	
+t1	Dynamic	
 ALTER TABLE t1 ROW_FORMAT=COMPACT;
 SHOW WARNINGS;
 Level	Code	Message
@@ -264,6 +266,8 @@ Warning	1030	Got error 140 "Wrong create options" from storage engine InnoDB
 #         and a valid non-zero KEY_BLOCK_SIZE are rejected with Antelope
 #         and that they can be set to default values during strict mode.
 SET GLOBAL innodb_file_format=Antelope;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 DROP TABLE IF EXISTS t1;
 Warnings:
 Note	1051	Unknown table 'test.t1'
@@ -324,15 +328,19 @@ Level	Code	Message
 Warning	1478	InnoDB: ROW_FORMAT=DYNAMIC requires innodb_file_format > Antelope.
 Error	1478	Table storage engine 'InnoDB' does not support the create option 'ROW_FORMAT'
 SET GLOBAL innodb_file_format=Barracuda;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 DROP TABLE IF EXISTS t1;
 CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4;
 SET GLOBAL innodb_file_format=Antelope;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 ALTER TABLE t1 ADD COLUMN f1 INT;
 Warnings:
 Warning	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_format > Antelope.
 Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=4.
 Warning	1478	InnoDB: ROW_FORMAT=COMPRESSED requires innodb_file_format > Antelope.
-Warning	1478	InnoDB: assuming ROW_FORMAT=COMPACT.
+Warning	1478	InnoDB: assuming ROW_FORMAT=DYNAMIC.
 SHOW CREATE TABLE t1;
 Table	Create Table
 t1	CREATE TABLE `t1` (
@@ -348,6 +356,8 @@ ALTER TABLE t1 ADD COLUMN f2 INT;
 SHOW WARNINGS;
 Level	Code	Message
 SET GLOBAL innodb_file_format=Barracuda;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 # Test 8) StrictMode=ON, Make sure ROW_FORMAT= COMPRESSED & DYNAMIC and
 #         and a valid non-zero KEY_BLOCK_SIZE are rejected with
 #         innodb_file_per_table=OFF and that they can be set to default
@@ -427,7 +437,7 @@ SHOW WARNINGS;
 Level	Code	Message
 SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
 TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
-t1	Compact	
+t1	Dynamic	
 SET GLOBAL innodb_file_per_table=ON;
 DROP TABLE IF EXISTS t1;
 CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4;
@@ -451,13 +461,13 @@ SET SESSION innodb_strict_mode = OFF;
 DROP TABLE IF EXISTS t1;
 CREATE TABLE t1 ( i INT ) ROW_FORMAT=FIXED;
 Warnings:
-Warning	1478	InnoDB: assuming ROW_FORMAT=COMPACT.
+Warning	1478	InnoDB: assuming ROW_FORMAT=DYNAMIC.
 SHOW WARNINGS;
 Level	Code	Message
-Warning	1478	InnoDB: assuming ROW_FORMAT=COMPACT.
+Warning	1478	InnoDB: assuming ROW_FORMAT=DYNAMIC.
 SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
 TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
-t1	Compact	row_format=FIXED
+t1	Dynamic	row_format=FIXED
 DROP TABLE IF EXISTS t1;
 CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=0;
 SHOW WARNINGS;
@@ -488,16 +498,16 @@ SHOW WARNINGS;
 Level	Code	Message
 SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
 TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
-t1	Compact	
+t1	Dynamic	
 ALTER TABLE t1 ROW_FORMAT=FIXED KEY_BLOCK_SIZE=0;
 Warnings:
-Warning	1478	InnoDB: assuming ROW_FORMAT=COMPACT.
+Warning	1478	InnoDB: assuming ROW_FORMAT=DYNAMIC.
 SHOW WARNINGS;
 Level	Code	Message
-Warning	1478	InnoDB: assuming ROW_FORMAT=COMPACT.
+Warning	1478	InnoDB: assuming ROW_FORMAT=DYNAMIC.
 SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
 TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
-t1	Compact	row_format=FIXED
+t1	Dynamic	row_format=FIXED
 # Test 10) StrictMode=OFF, CREATE with each ROW_FORMAT & a valid KEY_BLOCK_SIZE
 #          KEY_BLOCK_SIZE is ignored with COMPACT, REDUNDANT, & DYNAMIC
 DROP TABLE IF EXISTS t1;
@@ -562,14 +572,14 @@ CREATE TABLE t1 ( i INT );
 ALTER TABLE t1 ROW_FORMAT=FIXED KEY_BLOCK_SIZE=1;
 Warnings:
 Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=1 unless ROW_FORMAT=COMPRESSED.
-Warning	1478	InnoDB: assuming ROW_FORMAT=COMPACT.
+Warning	1478	InnoDB: assuming ROW_FORMAT=DYNAMIC.
 SHOW WARNINGS;
 Level	Code	Message
 Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=1 unless ROW_FORMAT=COMPRESSED.
-Warning	1478	InnoDB: assuming ROW_FORMAT=COMPACT.
+Warning	1478	InnoDB: assuming ROW_FORMAT=DYNAMIC.
 SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
 TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
-t1	Compact	row_format=FIXED key_block_size=1
+t1	Dynamic	row_format=FIXED key_block_size=1
 DROP TABLE IF EXISTS t1;
 CREATE TABLE t1 ( i INT );
 ALTER TABLE t1 ROW_FORMAT=COMPACT KEY_BLOCK_SIZE=2;
@@ -728,7 +738,7 @@ SHOW WARNINGS;
 Level	Code	Message
 SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
 TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
-t1	Compact	
+t1	Dynamic	
 ALTER TABLE t1 ROW_FORMAT=COMPACT;
 SHOW WARNINGS;
 Level	Code	Message
@@ -746,7 +756,7 @@ Level	Code	Message
 Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=15.
 SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
 TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
-t1	Compact	key_block_size=15
+t1	Dynamic	key_block_size=15
 # Test 15) StrictMode=OFF, Make sure ROW_FORMAT= COMPRESSED & DYNAMIC and a
 valid KEY_BLOCK_SIZE are remembered but not used when ROW_FORMAT
 is reverted to Antelope and then used again when ROW_FORMAT=Barracuda.
@@ -758,22 +768,26 @@ SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE
 TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
 t1	Compressed	row_format=COMPRESSED key_block_size=1
 SET GLOBAL innodb_file_format=Antelope;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 ALTER TABLE t1 ADD COLUMN f1 INT;
 Warnings:
 Warning	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_format > Antelope.
 Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=1.
 Warning	1478	InnoDB: ROW_FORMAT=COMPRESSED requires innodb_file_format > Antelope.
-Warning	1478	InnoDB: assuming ROW_FORMAT=COMPACT.
+Warning	1478	InnoDB: assuming ROW_FORMAT=DYNAMIC.
 SHOW WARNINGS;
 Level	Code	Message
 Warning	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_format > Antelope.
 Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=1.
 Warning	1478	InnoDB: ROW_FORMAT=COMPRESSED requires innodb_file_format > Antelope.
-Warning	1478	InnoDB: assuming ROW_FORMAT=COMPACT.
+Warning	1478	InnoDB: assuming ROW_FORMAT=DYNAMIC.
 SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
 TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
-t1	Compact	row_format=COMPRESSED key_block_size=1
+t1	Dynamic	row_format=COMPRESSED key_block_size=1
 SET GLOBAL innodb_file_format=Barracuda;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 ALTER TABLE t1 ADD COLUMN f2 INT;
 SHOW WARNINGS;
 Level	Code	Message
@@ -788,18 +802,17 @@ SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE
 TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
 t1	Dynamic	row_format=DYNAMIC
 SET GLOBAL innodb_file_format=Antelope;
-ALTER TABLE t1 ADD COLUMN f1 INT;
 Warnings:
-Warning	1478	InnoDB: ROW_FORMAT=DYNAMIC requires innodb_file_format > Antelope.
-Warning	1478	InnoDB: assuming ROW_FORMAT=COMPACT.
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+ALTER TABLE t1 ADD COLUMN f1 INT;
 SHOW WARNINGS;
 Level	Code	Message
-Warning	1478	InnoDB: ROW_FORMAT=DYNAMIC requires innodb_file_format > Antelope.
-Warning	1478	InnoDB: assuming ROW_FORMAT=COMPACT.
 SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
 TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
-t1	Compact	row_format=DYNAMIC
+t1	Dynamic	row_format=DYNAMIC
 SET GLOBAL innodb_file_format=Barracuda;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 ALTER TABLE t1 ADD COLUMN f2 INT;
 SHOW WARNINGS;
 Level	Code	Message
@@ -853,3 +866,5 @@ TABLE_NAME	ROW_FORMAT	CREATE_OPTIONS
 t1	Dynamic	row_format=DYNAMIC
 # Cleanup
 DROP TABLE IF EXISTS t1;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
diff --git a/mysql-test/suite/innodb_zip/r/innodb-zip.result b/mysql-test/suite/innodb_zip/r/innodb-zip.result
index 318f65d49c3..47215a1e077 100644
--- a/mysql-test/suite/innodb_zip/r/innodb-zip.result
+++ b/mysql-test/suite/innodb_zip/r/innodb-zip.result
@@ -9,11 +9,13 @@ SET @save_innodb_stats_on_metadata=@@global.innodb_stats_on_metadata;
 set session innodb_strict_mode=0;
 set global innodb_file_per_table=off;
 set global innodb_file_format=`0`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SET @@global.innodb_stats_on_metadata=ON;
 create table t0(a int primary key) engine=innodb row_format=compressed;
 Warnings:
 Warning	1478	InnoDB: ROW_FORMAT=COMPRESSED requires innodb_file_per_table.
-Warning	1478	InnoDB: assuming ROW_FORMAT=COMPACT.
+Warning	1478	InnoDB: assuming ROW_FORMAT=DYNAMIC.
 create table t00(a int primary key) engine=innodb
 key_block_size=4 row_format=compressed;
 Warnings:
@@ -21,11 +23,8 @@ Warning	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_per_table.
 Warning	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_format > Antelope.
 Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=4.
 Warning	1478	InnoDB: ROW_FORMAT=COMPRESSED requires innodb_file_per_table.
-Warning	1478	InnoDB: assuming ROW_FORMAT=COMPACT.
+Warning	1478	InnoDB: assuming ROW_FORMAT=DYNAMIC.
 create table t1(a int primary key) engine=innodb row_format=dynamic;
-Warnings:
-Warning	1478	InnoDB: ROW_FORMAT=DYNAMIC requires innodb_file_per_table.
-Warning	1478	InnoDB: assuming ROW_FORMAT=COMPACT.
 create table t2(a int primary key) engine=innodb row_format=redundant;
 create table t3(a int primary key) engine=innodb row_format=compact;
 create table t4(a int primary key) engine=innodb key_block_size=9;
@@ -46,6 +45,8 @@ Warnings:
 Warning	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_format > Antelope.
 Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=1.
 set global innodb_file_format=`1`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 create table t7(a int primary key) engine=innodb
 key_block_size=1 row_format=redundant;
 Warnings:
@@ -54,7 +55,7 @@ create table t8(a int primary key) engine=innodb
 key_block_size=1 row_format=fixed;
 Warnings:
 Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=1 unless ROW_FORMAT=COMPRESSED.
-Warning	1478	InnoDB: assuming ROW_FORMAT=COMPACT.
+Warning	1478	InnoDB: assuming ROW_FORMAT=DYNAMIC.
 create table t9(a int primary key) engine=innodb
 key_block_size=1 row_format=compact;
 Warnings:
@@ -74,21 +75,21 @@ Warnings:
 Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=9.
 SELECT table_schema, table_name, row_format, data_length, index_length FROM information_schema.tables WHERE engine='innodb' AND table_schema != 'mysql';
 table_schema	table_name	row_format	data_length	index_length
-mysqltest_innodb_zip	t0	Compact	{valid}	0
-mysqltest_innodb_zip	t00	Compact	{valid}	0
-mysqltest_innodb_zip	t1	Compact	{valid}	0
+mysqltest_innodb_zip	t0	Dynamic	{valid}	0
+mysqltest_innodb_zip	t00	Dynamic	{valid}	0
+mysqltest_innodb_zip	t1	Dynamic	{valid}	0
 mysqltest_innodb_zip	t10	Dynamic	{valid}	0
 mysqltest_innodb_zip	t11	Compressed	1024	0
 mysqltest_innodb_zip	t12	Compressed	1024	0
 mysqltest_innodb_zip	t13	Compressed	{valid}	0
-mysqltest_innodb_zip	t14	Compact	{valid}	0
+mysqltest_innodb_zip	t14	Dynamic	{valid}	0
 mysqltest_innodb_zip	t2	Redundant	{valid}	0
 mysqltest_innodb_zip	t3	Compact	{valid}	0
-mysqltest_innodb_zip	t4	Compact	{valid}	0
+mysqltest_innodb_zip	t4	Dynamic	{valid}	0
 mysqltest_innodb_zip	t5	Redundant	{valid}	0
 mysqltest_innodb_zip	t6	Redundant	{valid}	0
 mysqltest_innodb_zip	t7	Redundant	{valid}	0
-mysqltest_innodb_zip	t8	Compact	{valid}	0
+mysqltest_innodb_zip	t8	Dynamic	{valid}	0
 mysqltest_innodb_zip	t9	Compact	{valid}	0
 drop table t0,t00,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12,t13,t14;
 alter table t1 key_block_size=0;
@@ -138,7 +139,7 @@ mysqltest_innodb_zip.t2	analyze	status	OK
 SELECT table_schema, table_name, row_format, data_length, index_length FROM information_schema.tables WHERE engine='innodb' AND table_schema != 'mysql';
 table_schema	table_name	row_format	data_length	index_length
 mysqltest_innodb_zip	t1	Compressed	2048	1024
-mysqltest_innodb_zip	t2	Compact	{valid}	0
+mysqltest_innodb_zip	t2	Dynamic	{valid}	0
 drop table t1,t2;
 create table t1( c1 int not null, c2 blob, c3 blob, c4 blob,
 primary key(c1, c2(22), c3(22)))
@@ -160,10 +161,14 @@ count(*)
 update t1 set c3 = repeat('E', 20000) where c1 = 1;
 drop table t1;
 set global innodb_file_format=`0`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 select @@innodb_file_format;
 @@innodb_file_format
 Antelope
 set global innodb_file_format=`1`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 select @@innodb_file_format;
 @@innodb_file_format
 Barracuda
@@ -172,7 +177,11 @@ ERROR 42000: Variable 'innodb_file_format' can't be set to the value of '2'
 set global innodb_file_format=`-1`;
 ERROR 42000: Variable 'innodb_file_format' can't be set to the value of '-1'
 set global innodb_file_format=`Antelope`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 set global innodb_file_format=`Barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 set global innodb_file_format=`Cheetah`;
 ERROR 42000: Variable 'innodb_file_format' can't be set to the value of 'Cheetah'
 set global innodb_file_format=`abc`;
@@ -183,6 +192,8 @@ set global innodb_file_format=``;
 ERROR 42000: Variable 'innodb_file_format' can't be set to the value of ''
 set global innodb_file_per_table = on;
 set global innodb_file_format = `1`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 set innodb_strict_mode = off;
 create table t1 (id int primary key) engine = innodb key_block_size = 0;
 drop table t1;
@@ -204,7 +215,7 @@ create table t10(id int primary key) engine = innodb row_format = compact;
 create table t11(id int primary key) engine = innodb row_format = redundant;
 SELECT table_schema, table_name, row_format, data_length, index_length FROM information_schema.tables WHERE engine='innodb' AND table_schema != 'mysql';
 table_schema	table_name	row_format	data_length	index_length
-mysqltest_innodb_zip	t1	Compact	{valid}	0
+mysqltest_innodb_zip	t1	Dynamic	{valid}	0
 mysqltest_innodb_zip	t10	Compact	{valid}	0
 mysqltest_innodb_zip	t11	Redundant	{valid}	0
 mysqltest_innodb_zip	t3	Compressed	1024	0
@@ -320,6 +331,8 @@ mysqltest_innodb_zip	t9	Redundant	{valid}	0
 drop table t8, t9;
 set global innodb_file_per_table = on;
 set global innodb_file_format = `0`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 create table t1 (id int primary key) engine = innodb key_block_size = 1;
 ERROR HY000: Can't create table `mysqltest_innodb_zip`.`t1` (errno: 140 "Wrong create options")
 show warnings;
@@ -363,16 +376,22 @@ mysqltest_innodb_zip	t8	Compact	{valid}	0
 mysqltest_innodb_zip	t9	Redundant	{valid}	0
 drop table t8, t9;
 set global innodb_file_per_table=1;
-set global innodb_file_format=Antelope;
+set global innodb_file_format=Barracuda;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 set global innodb_file_per_table=on;
 set global innodb_file_format=`Barracuda`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 set global innodb_file_format_max=`Antelope`;
+Warnings:
+Warning	131	Using innodb_file_format_max is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 create table normal_table (
 c1 int
 ) engine = innodb;
 select @@innodb_file_format_max;
 @@innodb_file_format_max
-Antelope
+Barracuda
 create table zip_table (
 c1 int
 ) engine = innodb key_block_size = 4;
@@ -380,6 +399,8 @@ select @@innodb_file_format_max;
 @@innodb_file_format_max
 Barracuda
 set global innodb_file_format_max=`Antelope`;
+Warnings:
+Warning	131	Using innodb_file_format_max is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 select @@innodb_file_format_max;
 @@innodb_file_format_max
 Antelope
diff --git a/mysql-test/suite/innodb_zip/r/innodb_bug36169.result b/mysql-test/suite/innodb_zip/r/innodb_bug36169.result
index aa80e4d7aa4..161cef10ad5 100644
--- a/mysql-test/suite/innodb_zip/r/innodb_bug36169.result
+++ b/mysql-test/suite/innodb_zip/r/innodb_bug36169.result
@@ -1,2 +1,5 @@
+call mtr.add_suppression("Cannot add field .* in table .* because after adding it, the row size is .* which is greater than maximum allowed size .* for a record on index leaf page.");
 SET GLOBAL innodb_file_format='Barracuda';
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SET GLOBAL innodb_file_per_table=ON;
diff --git a/mysql-test/suite/innodb_zip/r/innodb_bug52745.result b/mysql-test/suite/innodb_zip/r/innodb_bug52745.result
index f4393e8fae0..5ba26753ea6 100644
--- a/mysql-test/suite/innodb_zip/r/innodb_bug52745.result
+++ b/mysql-test/suite/innodb_zip/r/innodb_bug52745.result
@@ -1,4 +1,6 @@
 SET GLOBAL innodb_file_format='Barracuda';
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SET GLOBAL innodb_file_per_table=on;
 CREATE TABLE bug52745 (
 a2 int(10) unsigned DEFAULT NULL,
@@ -126,5 +128,7 @@ Warning	1264	Out of range value for column 'col78' at row 1
 Warning	1265	Data truncated for column 'col79' at row 1
 Warning	1264	Out of range value for column 'col84' at row 1
 DROP TABLE bug52745;
-SET GLOBAL innodb_file_format=Antelope;
+SET GLOBAL innodb_file_format=Barracuda;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SET GLOBAL innodb_file_per_table=1;
diff --git a/mysql-test/suite/innodb_zip/r/innodb_bug53591.result b/mysql-test/suite/innodb_zip/r/innodb_bug53591.result
index dbebb9d2d33..3b10942c2de 100644
--- a/mysql-test/suite/innodb_zip/r/innodb_bug53591.result
+++ b/mysql-test/suite/innodb_zip/r/innodb_bug53591.result
@@ -1,5 +1,8 @@
 SET GLOBAL innodb_file_format='Barracuda';
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SET GLOBAL innodb_file_per_table=on;
+SET GLOBAL innodb_strict_mode=on;
 set old_alter_table=0;
 CREATE TABLE bug53591(a text charset utf8 not null)
 ENGINE=InnoDB KEY_BLOCK_SIZE=1;
@@ -9,5 +12,8 @@ SHOW WARNINGS;
 Level	Code	Message
 Error	1118	Row size too large. The maximum row size for the used table type, not counting BLOBs, is {checked_valid}. This includes storage overhead, check the manual. You have to change some columns to TEXT or BLOBs
 DROP TABLE bug53591;
-SET GLOBAL innodb_file_format=Antelope;
+SET GLOBAL innodb_file_format=Barracuda;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SET GLOBAL innodb_file_per_table=1;
+SET GLOBAL innodb_strict_mode=DEFAULT;
diff --git a/mysql-test/suite/innodb_zip/r/innodb_bug56680.result b/mysql-test/suite/innodb_zip/r/innodb_bug56680.result
index 40c39d21243..92b589c6b7e 100644
--- a/mysql-test/suite/innodb_zip/r/innodb_bug56680.result
+++ b/mysql-test/suite/innodb_zip/r/innodb_bug56680.result
@@ -1,5 +1,7 @@
 SET GLOBAL tx_isolation='REPEATABLE-READ';
 SET GLOBAL innodb_file_format=Barracuda;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SET GLOBAL innodb_file_per_table=on;
 CREATE TABLE bug56680(
 a INT AUTO_INCREMENT PRIMARY KEY,
@@ -119,3 +121,5 @@ Table	Op	Msg_type	Msg_text
 test.bug56680_2	check	status	OK
 DROP TABLE bug56680_2;
 DROP TABLE bug56680;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
diff --git a/mysql-test/suite/innodb_zip/r/innodb_cmp_drop_table.result b/mysql-test/suite/innodb_zip/r/innodb_cmp_drop_table.result
index 1f6d6948756..11e90b9e7d6 100644
--- a/mysql-test/suite/innodb_zip/r/innodb_cmp_drop_table.result
+++ b/mysql-test/suite/innodb_zip/r/innodb_cmp_drop_table.result
@@ -1,5 +1,7 @@
 set global innodb_file_per_table=on;
 set global innodb_file_format=`1`;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 create table t1(a text) engine=innodb key_block_size=8;
 SELECT page_size FROM information_schema.innodb_cmpmem WHERE pages_used > 0;
 page_size
@@ -12,3 +14,5 @@ create table t2(a text) engine=innodb;
 SELECT page_size FROM information_schema.innodb_cmpmem WHERE pages_used > 0;
 page_size
 drop table t2;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
diff --git a/mysql-test/suite/innodb_zip/r/innodb_index_large_prefix.result b/mysql-test/suite/innodb_zip/r/innodb_index_large_prefix.result
index f11988034ad..f69fa5c7659 100644
--- a/mysql-test/suite/innodb_zip/r/innodb_index_large_prefix.result
+++ b/mysql-test/suite/innodb_zip/r/innodb_index_large_prefix.result
@@ -1,7 +1,13 @@
 SET default_storage_engine=InnoDB;
+call mtr.add_suppression("Cannot add field .* in table .* because after adding it, the row size is .* which is greater than maximum allowed size (.*) for a record on index leaf page.");
 set global innodb_file_format="Barracuda";
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 set global innodb_file_per_table=1;
 set global innodb_large_prefix=1;
+Warnings:
+Warning	131	Using innodb_large_prefix is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+set global innodb_strict_mode=1;
 ### Test 1 ###
 create table worklog5743(a TEXT not null, primary key (a(1000))) ROW_FORMAT=DYNAMIC;
 show warnings;
@@ -97,6 +103,8 @@ create table worklog5743_4(a1 int, a2 TEXT not null) KEY_BLOCK_SIZE=4;
 create table worklog5743_8(a1 int, a2 TEXT, a3 TEXT) KEY_BLOCK_SIZE=8;
 create table worklog5743_16(a1 int, a2 TEXT, a3 TEXT) KEY_BLOCK_SIZE=16;
 set global innodb_large_prefix=0;
+Warnings:
+Warning	131	Using innodb_large_prefix is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 create index idx1 on worklog5743_1(a2(4000));
 ERROR 42000: Row size too large. The maximum row size for the used table type, not counting BLOBs, is 8126. This includes storage overhead, check the manual. You have to change some columns to TEXT or BLOBs
 show warnings;
@@ -104,6 +112,8 @@ Level	Code	Message
 Warning	1071	Specified key was too long; max key length is 767 bytes
 Error	1118	Row size too large. The maximum row size for the used table type, not counting BLOBs, is 8126. This includes storage overhead, check the manual. You have to change some columns to TEXT or BLOBs
 set global innodb_large_prefix=1;
+Warnings:
+Warning	131	Using innodb_large_prefix is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 create index idx2 on worklog5743_1(a2(4000));
 ERROR 42000: Row size too large. The maximum row size for the used table type, not counting BLOBs, is 8126. This includes storage overhead, check the manual. You have to change some columns to TEXT or BLOBs
 show warnings;
@@ -127,6 +137,8 @@ create index idx6 on worklog5743_1(a1, a2(428));
 show warnings;
 Level	Code	Message
 set global innodb_large_prefix=0;
+Warnings:
+Warning	131	Using innodb_large_prefix is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 create index idx1 on worklog5743_2(a2(4000));
 Warnings:
 Warning	1071	Specified key was too long; max key length is 767 bytes
@@ -134,6 +146,8 @@ show warnings;
 Level	Code	Message
 Warning	1071	Specified key was too long; max key length is 767 bytes
 set global innodb_large_prefix=1;
+Warnings:
+Warning	131	Using innodb_large_prefix is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 create index idx2 on worklog5743_2(a2(4000));
 ERROR 42000: Row size too large. The maximum row size for the used table type, not counting BLOBs, is 8126. This includes storage overhead, check the manual. You have to change some columns to TEXT or BLOBs
 show warnings;
@@ -157,6 +171,8 @@ create index idx6 on worklog5743_2(a1, a2(940));
 show warnings;
 Level	Code	Message
 set global innodb_large_prefix=0;
+Warnings:
+Warning	131	Using innodb_large_prefix is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 create index idx1 on worklog5743_4(a2(4000));
 Warnings:
 Warning	1071	Specified key was too long; max key length is 767 bytes
@@ -164,6 +180,8 @@ show warnings;
 Level	Code	Message
 Warning	1071	Specified key was too long; max key length is 767 bytes
 set global innodb_large_prefix=1;
+Warnings:
+Warning	131	Using innodb_large_prefix is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 create index idx2 on worklog5743_4(a2(4000));
 ERROR 42000: Row size too large. The maximum row size for the used table type, not counting BLOBs, is 8126. This includes storage overhead, check the manual. You have to change some columns to TEXT or BLOBs
 show warnings;
@@ -187,6 +205,8 @@ create index idx6 on worklog5743_4(a1, a2(1964));
 show warnings;
 Level	Code	Message
 set global innodb_large_prefix=0;
+Warnings:
+Warning	131	Using innodb_large_prefix is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 create index idx1 on worklog5743_8(a2(1000));
 Warnings:
 Warning	1071	Specified key was too long; max key length is 767 bytes
@@ -194,6 +214,8 @@ show warnings;
 Level	Code	Message
 Warning	1071	Specified key was too long; max key length is 767 bytes
 set global innodb_large_prefix=1;
+Warnings:
+Warning	131	Using innodb_large_prefix is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 create index idx2 on worklog5743_8(a2(3073));
 Warnings:
 Warning	1071	Specified key was too long; max key length is 3072 bytes
@@ -223,6 +245,8 @@ create index idx7 on worklog5743_8(a1, a2(2000), a3(1068));
 show warnings;
 Level	Code	Message
 set global innodb_large_prefix=0;
+Warnings:
+Warning	131	Using innodb_large_prefix is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 create index idx1 on worklog5743_16(a2(1000));
 Warnings:
 Warning	1071	Specified key was too long; max key length is 767 bytes
@@ -230,6 +254,8 @@ show warnings;
 Level	Code	Message
 Warning	1071	Specified key was too long; max key length is 767 bytes
 set global innodb_large_prefix=1;
+Warnings:
+Warning	131	Using innodb_large_prefix is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 create index idx2 on worklog5743_16(a2(3073));
 Warnings:
 Warning	1071	Specified key was too long; max key length is 3072 bytes
@@ -264,12 +290,16 @@ insert into worklog5743_4 values(9, repeat("a", 10000));
 insert into worklog5743_8 values(9, repeat("a", 10000), repeat("a", 10000));
 insert into worklog5743_16 values(9, repeat("a", 10000), repeat("a", 10000));
 set global innodb_large_prefix=0;
+Warnings:
+Warning	131	Using innodb_large_prefix is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 insert into worklog5743_1 values(2, repeat("b", 10000));
 insert into worklog5743_2 values(2, repeat("b", 10000));
 insert into worklog5743_4 values(2, repeat("b", 10000));
 insert into worklog5743_8 values(2, repeat("b", 10000), repeat("b", 10000));
 insert into worklog5743_16 values(2, repeat("b", 10000), repeat("b", 10000));
 set global innodb_large_prefix=1;
+Warnings:
+Warning	131	Using innodb_large_prefix is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 select a1, left(a2, 20) from worklog5743_1;
 a1	left(a2, 20)
 9	aaaaaaaaaaaaaaaaaaaa
@@ -444,9 +474,9 @@ connection default;
 rollback;
 drop table worklog5743;
 ### Test 6 ###
-create table worklog5743(a TEXT not null, primary key (a(1000)));
+create table worklog5743(a TEXT not null, primary key (a(1000))) row_format=COMPACT;
 ERROR HY000: Index column size too large. The maximum column size is 767 bytes.
-create table worklog5743(a TEXT);
+create table worklog5743(a TEXT) row_format=COMPACT;
 create index idx on worklog5743(a(768));
 ERROR HY000: Index column size too large. The maximum column size is 767 bytes.
 create index idx on worklog5743(a(767));
@@ -499,9 +529,14 @@ create index idx on worklog5743(a(768));
 ERROR HY000: Index column size too large. The maximum column size is 767 bytes.
 create index idx2 on worklog5743(a(767));
 drop table worklog5743;
-SET GLOBAL innodb_file_format=Antelope;
+SET GLOBAL innodb_file_format=Barracuda;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SET GLOBAL innodb_file_per_table=1;
-SET GLOBAL innodb_large_prefix=0;
+SET GLOBAL innodb_large_prefix=1;
+Warnings:
+Warning	131	Using innodb_large_prefix is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+SET GLOBAL innodb_strict_mode = DEFAULT;
 connection con1;
 disconnect con1;
 connection con2;
diff --git a/mysql-test/suite/innodb_zip/r/innodb_prefix_index_liftedlimit.result b/mysql-test/suite/innodb_zip/r/innodb_prefix_index_liftedlimit.result
deleted file mode 100644
index 7d52ab135b3..00000000000
--- a/mysql-test/suite/innodb_zip/r/innodb_prefix_index_liftedlimit.result
+++ /dev/null
@@ -1,1396 +0,0 @@
-set global innodb_file_format="Barracuda";
-set global innodb_file_per_table=1;
-set global innodb_large_prefix=1;
-DROP TABLE IF EXISTS worklog5743;
-CREATE TABLE worklog5743 (
-col_1_varchar VARCHAR (4000) , col_2_varchar VARCHAR (4000) ,
-PRIMARY KEY (col_1_varchar(3072))
-) ROW_FORMAT=DYNAMIC, engine = innodb;
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000));
-CREATE INDEX prefix_idx ON worklog5743(col_1_varchar (3072));
-SELECT COLUMN_NAME,INDEX_NAME,SUB_PART,INDEX_TYPE FROM
-INFORMATION_SCHEMA.STATISTICS WHERE table_name = 'worklog5743' ;
-COLUMN_NAME	INDEX_NAME	SUB_PART	INDEX_TYPE
-col_1_varchar	PRIMARY	3072	BTREE
-col_1_varchar	prefix_idx	3072	BTREE
-INSERT INTO worklog5743 VALUES(REPEAT("b", 4000) , REPEAT("p", 4000));
-SELECT col_1_varchar = REPEAT("a", 4000) , col_2_varchar = REPEAT("o", 4000)
-FROM worklog5743;
-col_1_varchar = REPEAT("a", 4000)	col_2_varchar = REPEAT("o", 4000)
-1	1
-0	0
-UPDATE worklog5743 SET col_1_varchar = REPEAT("c", 4000)
-WHERE col_1_varchar = REPEAT("a", 4000) AND col_2_varchar = REPEAT("o", 4000);
-SELECT col_1_varchar = REPEAT("c", 4000) FROM worklog5743
-WHERE col_1_varchar = REPEAT("c", 4000) AND col_2_varchar = REPEAT("o", 4000);
-col_1_varchar = REPEAT("c", 4000)
-1
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-DELETE FROM worklog5743 WHERE col_1_varchar = REPEAT("b", 4000);
-SELECT col_1_varchar = REPEAT("c", 4000) FROM worklog5743;
-col_1_varchar = REPEAT("c", 4000)
-0
-1
-ALTER TABLE worklog5743 ROW_FORMAT=REDUNDANT;
-ERROR HY000: Index column size too large. The maximum column size is 767 bytes.
-ALTER TABLE worklog5743 ROW_FORMAT=COMPACT;
-ERROR HY000: Index column size too large. The maximum column size is 767 bytes.
-ALTER TABLE worklog5743 ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=16;
-DROP TABLE worklog5743;
-CREATE TABLE worklog5743 (
-col_1_text TEXT (4000) , col_2_text TEXT (4000) ,
-PRIMARY KEY (col_1_text(3072))
-) ROW_FORMAT=DYNAMIC, engine = innodb;
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000));
-CREATE INDEX prefix_idx ON worklog5743(col_1_text (3072));
-SELECT COLUMN_NAME,INDEX_NAME,SUB_PART,INDEX_TYPE FROM
-INFORMATION_SCHEMA.STATISTICS WHERE table_name = 'worklog5743' ;
-COLUMN_NAME	INDEX_NAME	SUB_PART	INDEX_TYPE
-col_1_text	PRIMARY	3072	BTREE
-col_1_text	prefix_idx	3072	BTREE
-INSERT INTO worklog5743 VALUES(REPEAT("b", 4000) , REPEAT("p", 4000));
-SELECT col_1_text = REPEAT("a", 4000) , col_2_text = REPEAT("o", 4000)
-FROM worklog5743;
-col_1_text = REPEAT("a", 4000)	col_2_text = REPEAT("o", 4000)
-1	1
-0	0
-UPDATE worklog5743 SET col_1_text = REPEAT("c", 4000)
-WHERE col_1_text = REPEAT("a", 4000) AND col_2_text = REPEAT("o", 4000);
-SELECT col_1_text = REPEAT("c", 4000) FROM worklog5743
-WHERE col_1_text = REPEAT("c", 4000) AND col_2_text = REPEAT("o", 4000);
-col_1_text = REPEAT("c", 4000)
-1
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-DELETE FROM worklog5743 WHERE col_1_text = REPEAT("b", 4000);
-SELECT col_1_text = REPEAT("c", 4000) FROM worklog5743;
-col_1_text = REPEAT("c", 4000)
-0
-1
-DROP TABLE worklog5743;
-CREATE TABLE worklog5743 (
-col_1_mediumtext MEDIUMTEXT , col_2_mediumtext MEDIUMTEXT ,
-PRIMARY KEY (col_1_mediumtext(3072))
-) ROW_FORMAT=DYNAMIC, engine = innodb;
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000));
-CREATE INDEX prefix_idx ON worklog5743(col_1_mediumtext (3072));
-INSERT INTO worklog5743 VALUES(REPEAT("b", 4000) , REPEAT("p", 4000));
-SELECT col_1_mediumtext = REPEAT("a", 4000),col_2_mediumtext = REPEAT("o", 4000)
-FROM worklog5743;
-col_1_mediumtext = REPEAT("a", 4000)	col_2_mediumtext = REPEAT("o", 4000)
-1	1
-0	0
-UPDATE worklog5743 SET col_1_mediumtext = REPEAT("c", 4000)
-WHERE col_1_mediumtext = REPEAT("a", 4000)
-AND col_2_mediumtext = REPEAT("o", 4000);
-SELECT col_1_mediumtext = REPEAT("c", 4000) FROM worklog5743
-WHERE col_1_mediumtext = REPEAT("c", 4000)
-AND col_2_mediumtext = REPEAT("o", 4000);
-col_1_mediumtext = REPEAT("c", 4000)
-1
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-DELETE FROM worklog5743 WHERE col_1_mediumtext = REPEAT("b", 4000);
-SELECT col_1_mediumtext = REPEAT("c", 4000) FROM worklog5743;
-col_1_mediumtext = REPEAT("c", 4000)
-0
-1
-DROP TABLE worklog5743;
-CREATE TABLE worklog5743 (
-col_1_longtext LONGTEXT , col_2_longtext LONGTEXT ,
-PRIMARY KEY (col_1_longtext(3072))
-) ROW_FORMAT=DYNAMIC, engine = innodb;
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000));
-CREATE INDEX prefix_idx ON worklog5743(col_1_longtext (3072));
-INSERT INTO worklog5743 VALUES(REPEAT("b", 4000) , REPEAT("p", 4000));
-SELECT col_1_longtext = REPEAT("a", 4000) , col_2_longtext = REPEAT("o", 4000)
-FROM worklog5743;
-col_1_longtext = REPEAT("a", 4000)	col_2_longtext = REPEAT("o", 4000)
-1	1
-0	0
-UPDATE worklog5743 SET col_1_longtext = REPEAT("c", 4000)
-WHERE col_1_longtext = REPEAT("a", 4000)
-AND col_2_longtext = REPEAT("o", 4000);
-SELECT col_1_longtext = REPEAT("c", 4000) FROM worklog5743
-WHERE col_1_longtext = REPEAT("c", 4000)
-AND col_2_longtext = REPEAT("o", 4000);
-col_1_longtext = REPEAT("c", 4000)
-1
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-DELETE FROM worklog5743 WHERE col_1_longtext = REPEAT("b", 4000);
-SELECT col_1_longtext = REPEAT("c", 4000) FROM worklog5743;
-col_1_longtext = REPEAT("c", 4000)
-0
-1
-DROP TABLE worklog5743;
-CREATE TABLE worklog5743 (
-col_1_blob BLOB (4000) , col_2_blob BLOB (4000) ,
-PRIMARY KEY (col_1_blob(3072))
-) ROW_FORMAT=DYNAMIC, engine = innodb;
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000));
-CREATE INDEX prefix_idx ON worklog5743(col_1_blob (3072));
-SELECT COLUMN_NAME,INDEX_NAME,SUB_PART,INDEX_TYPE FROM
-INFORMATION_SCHEMA.STATISTICS WHERE table_name = 'worklog5743' ;
-COLUMN_NAME	INDEX_NAME	SUB_PART	INDEX_TYPE
-col_1_blob	PRIMARY	3072	BTREE
-col_1_blob	prefix_idx	3072	BTREE
-INSERT INTO worklog5743 VALUES(REPEAT("b", 4000) , REPEAT("p", 4000));
-SELECT col_1_blob = REPEAT("a", 4000) , col_2_blob = REPEAT("o", 4000)
-FROM worklog5743;
-col_1_blob = REPEAT("a", 4000)	col_2_blob = REPEAT("o", 4000)
-1	1
-0	0
-UPDATE worklog5743 SET col_1_blob = REPEAT("c", 4000)
-WHERE col_1_blob = REPEAT("a", 4000) AND col_2_blob = REPEAT("o", 4000);
-SELECT col_1_blob = REPEAT("c", 4000) FROM worklog5743
-WHERE col_1_blob = REPEAT("c", 4000) AND col_2_blob = REPEAT("o", 4000);
-col_1_blob = REPEAT("c", 4000)
-1
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-DELETE FROM worklog5743 WHERE col_1_blob = REPEAT("b", 4000);
-SELECT col_1_blob = REPEAT("c", 4000) FROM worklog5743;
-col_1_blob = REPEAT("c", 4000)
-0
-1
-DROP TABLE worklog5743;
-CREATE TABLE worklog5743 (
-col_1_mediumblob MEDIUMBLOB  , col_2_mediumblob MEDIUMBLOB  ,
-PRIMARY KEY (col_1_mediumblob(3072))
-) ROW_FORMAT=DYNAMIC, engine = innodb;
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000));
-CREATE INDEX prefix_idx ON worklog5743(col_1_mediumblob (3072));
-INSERT INTO worklog5743 VALUES(REPEAT("b", 4000) , REPEAT("p", 4000));
-SELECT col_1_mediumblob = REPEAT("a", 4000),col_2_mediumblob = REPEAT("o", 4000)
-FROM worklog5743;
-col_1_mediumblob = REPEAT("a", 4000)	col_2_mediumblob = REPEAT("o", 4000)
-1	1
-0	0
-UPDATE worklog5743 SET col_1_mediumblob = REPEAT("c", 4000)
-WHERE col_1_mediumblob = REPEAT("a", 4000)
-AND col_2_mediumblob = REPEAT("o", 4000);
-SELECT col_1_mediumblob = REPEAT("c", 4000) FROM worklog5743
-WHERE col_1_mediumblob = REPEAT("c", 4000)
-AND col_2_mediumblob = REPEAT("o", 4000);
-col_1_mediumblob = REPEAT("c", 4000)
-1
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-DELETE FROM worklog5743 WHERE col_1_mediumblob = REPEAT("b", 4000);
-SELECT col_1_mediumblob = REPEAT("c", 4000) FROM worklog5743;
-col_1_mediumblob = REPEAT("c", 4000)
-0
-1
-DROP TABLE worklog5743;
-CREATE TABLE worklog5743 (
-col_1_longblob LONGBLOB  , col_2_longblob LONGBLOB  ,
-PRIMARY KEY (col_1_longblob(3072))
-) ROW_FORMAT=DYNAMIC, engine = innodb;
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000));
-CREATE INDEX prefix_idx ON worklog5743(col_1_longblob (3072));
-INSERT INTO worklog5743 VALUES(REPEAT("b", 4000) , REPEAT("p", 4000));
-SELECT col_1_longblob = REPEAT("a", 4000) , col_2_longblob = REPEAT("o", 4000)
-FROM worklog5743;
-col_1_longblob = REPEAT("a", 4000)	col_2_longblob = REPEAT("o", 4000)
-1	1
-0	0
-UPDATE worklog5743 SET col_1_longblob = REPEAT("c", 4000)
-WHERE col_1_longblob = REPEAT("a", 4000)
-AND col_2_longblob = REPEAT("o", 4000);
-SELECT col_1_longblob = REPEAT("c", 4000) FROM worklog5743
-WHERE col_1_longblob = REPEAT("c", 4000)
-AND col_2_longblob = REPEAT("o", 4000);
-col_1_longblob = REPEAT("c", 4000)
-1
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-DELETE FROM worklog5743 WHERE col_1_longblob = REPEAT("b", 4000);
-SELECT col_1_longblob = REPEAT("c", 4000) FROM worklog5743;
-col_1_longblob = REPEAT("c", 4000)
-0
-1
-DROP TABLE worklog5743;
-CREATE TABLE worklog5743 (
-col_1_varbinary VARBINARY (4000) , col_2_varbinary VARBINARY (4000) ,
-PRIMARY KEY (col_1_varbinary(3072))
-) ROW_FORMAT=DYNAMIC, engine = innodb;
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000));
-CREATE INDEX prefix_idx ON worklog5743(col_1_varbinary (3072));
-INSERT INTO worklog5743 VALUES(REPEAT("b", 4000) , REPEAT("p", 4000));
-SELECT col_1_varbinary = REPEAT("a", 4000) , col_2_varbinary = REPEAT("o", 4000)
-FROM worklog5743;
-col_1_varbinary = REPEAT("a", 4000)	col_2_varbinary = REPEAT("o", 4000)
-1	1
-0	0
-UPDATE worklog5743 SET col_1_varbinary = REPEAT("c", 4000)
-WHERE col_1_varbinary = REPEAT("a", 4000)
-AND col_2_varbinary = REPEAT("o", 4000);
-SELECT col_1_varbinary = REPEAT("c", 4000) FROM worklog5743
-WHERE col_1_varbinary = REPEAT("c", 4000)
-AND col_2_varbinary = REPEAT("o", 4000);
-col_1_varbinary = REPEAT("c", 4000)
-1
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-DELETE FROM worklog5743 WHERE col_1_varbinary = REPEAT("b", 4000);
-SELECT col_1_varbinary = REPEAT("c", 4000) FROM worklog5743;
-col_1_varbinary = REPEAT("c", 4000)
-0
-1
-DROP TABLE worklog5743;
-CREATE TABLE worklog5743 (col_1_char CHAR (255) , col_2_char CHAR (255),
-col_3_char CHAR (255), col_4_char CHAR (255),col_5_char CHAR (255),
-col_6_char CHAR (255), col_7_char CHAR (255),col_8_char CHAR (255),
-col_9_char CHAR (255), col_10_char CHAR (255),col_11_char CHAR (255),
-col_12_char CHAR (255), col_13_char CHAR (255),col_14_char CHAR (255)
-) ROW_FORMAT=DYNAMIC, engine = innodb;
-INSERT INTO worklog5743 VALUES(REPEAT("a", 255) , REPEAT("o", 255),
-REPEAT("a", 255) , REPEAT("o", 255),REPEAT("a", 255),
-REPEAT("a", 255) , REPEAT("o", 255),REPEAT("a", 255),
-REPEAT("a", 255) , REPEAT("o", 255),REPEAT("a", 255),
-REPEAT("a", 255) , REPEAT("o", 255),REPEAT("a", 255)
-);
-CREATE INDEX prefix_idx ON worklog5743(col_1_char(250),col_2_char(250),
-col_3_char(250),col_4_char(250),col_5_char(250),col_6_char(250),
-col_7_char(250),col_8_char(250),col_9_char(250),col_10_char(250),
-col_11_char(250),col_12_char(250),col_13_char(72)
-);
-INSERT INTO worklog5743 VALUES(REPEAT("b", 255) , REPEAT("p", 255),
-REPEAT("a", 255) , REPEAT("o", 255),REPEAT("a", 255),
-REPEAT("a", 255) , REPEAT("o", 255),REPEAT("a", 255),
-REPEAT("a", 255) , REPEAT("o", 255),REPEAT("a", 255),
-REPEAT("a", 255) , REPEAT("o", 255),REPEAT("a", 255)
-);
-SELECT col_1_char = REPEAT("a", 255) , col_2_char = REPEAT("o", 255) FROM worklog5743;
-col_1_char = REPEAT("a", 255)	col_2_char = REPEAT("o", 255)
-1	1
-0	0
-UPDATE worklog5743 SET col_1_char = REPEAT("c", 255)
-WHERE col_1_char = REPEAT("a", 255) AND col_2_char = REPEAT("o", 255);
-SELECT col_1_char = REPEAT("c", 255) FROM worklog5743
-WHERE col_1_char = REPEAT("c", 255) AND col_2_char = REPEAT("o", 255);
-col_1_char = REPEAT("c", 255)
-1
-INSERT INTO worklog5743 VALUES(REPEAT("a", 255) , REPEAT("o", 255),
-REPEAT("a", 255) , REPEAT("o", 255),REPEAT("a", 255),
-REPEAT("a", 255) , REPEAT("o", 255),REPEAT("a", 255),
-REPEAT("a", 255) , REPEAT("o", 255),REPEAT("a", 255),
-REPEAT("a", 255) , REPEAT("o", 255),REPEAT("a", 255)
-);
-DELETE FROM worklog5743 WHERE col_1_char = REPEAT("b", 255);
-SELECT col_1_char = REPEAT("c", 255) FROM worklog5743;
-col_1_char = REPEAT("c", 255)
-1
-0
-DROP TABLE worklog5743;
-CREATE TABLE worklog5743 (col_1_binary BINARY (255) , col_2_binary BINARY (255),
-col_3_binary BINARY(255),col_4_binary BINARY (255),col_5_binary BINARY (255),
-col_6_binary BINARY(255),col_7_binary BINARY (255),col_8_binary BINARY (255),
-col_9_binary BINARY(255),col_10_binary BINARY (255),col_11_binary BINARY (255),
-col_12_binary BINARY(255),col_13_binary BINARY (255),col_14_binary BINARY (255)
-) ROW_FORMAT=DYNAMIC, engine = innodb;
-INSERT INTO worklog5743 VALUES(REPEAT("a", 255) , REPEAT("o", 255),
-REPEAT("a", 255) , REPEAT("o", 255), REPEAT("a", 255),
-REPEAT("a", 255) , REPEAT("o", 255), REPEAT("a", 255),
-REPEAT("a", 255) , REPEAT("o", 255), REPEAT("a", 255),
-REPEAT("a", 255) , REPEAT("o", 255), REPEAT("a", 255)
-);
-CREATE INDEX prefix_idx ON worklog5743(col_1_binary (250),col_2_binary (250),
-col_3_binary (250),col_4_binary (250),col_5_binary (250),
-col_6_binary (250),col_7_binary (250),col_8_binary (250),
-col_9_binary (250),col_10_binary (250),col_11_binary (250),
-col_12_binary (250),col_13_binary (72)
-);
-INSERT INTO worklog5743 VALUES(REPEAT("b", 255) , REPEAT("p", 255),
-REPEAT("a", 255) , REPEAT("o", 255), REPEAT("a", 255),
-REPEAT("a", 255) , REPEAT("o", 255), REPEAT("a", 255),
-REPEAT("a", 255) , REPEAT("o", 255), REPEAT("a", 255),
-REPEAT("a", 255) , REPEAT("o", 255), REPEAT("a", 255)
-);
-SELECT col_1_binary = REPEAT("a", 255) , col_2_binary = REPEAT("o", 255) FROM worklog5743;
-col_1_binary = REPEAT("a", 255)	col_2_binary = REPEAT("o", 255)
-1	1
-0	0
-UPDATE worklog5743 SET col_1_binary = REPEAT("c", 255)
-WHERE col_1_binary = REPEAT("a", 255)
-AND col_2_binary = REPEAT("o", 255);
-SELECT col_1_binary = REPEAT("c", 255) FROM worklog5743
-WHERE col_1_binary = REPEAT("c", 255)
-AND col_2_binary = REPEAT("o", 255);
-col_1_binary = REPEAT("c", 255)
-1
-INSERT INTO worklog5743 VALUES(REPEAT("a", 255) , REPEAT("o", 255),
-REPEAT("a", 255) , REPEAT("o", 255), REPEAT("a", 255),
-REPEAT("a", 255) , REPEAT("o", 255), REPEAT("a", 255),
-REPEAT("a", 255) , REPEAT("o", 255), REPEAT("a", 255),
-REPEAT("a", 255) , REPEAT("o", 255), REPEAT("a", 255)
-);
-DELETE FROM worklog5743 WHERE col_1_binary = REPEAT("b", 255);
-SELECT col_1_binary = REPEAT("c", 255) FROM worklog5743;
-col_1_binary = REPEAT("c", 255)
-1
-0
-DROP TABLE worklog5743;
-CREATE TABLE worklog5743_key2 (
-col_1_varchar VARCHAR (4000) , col_2_varchar VARCHAR (4000) ,
-PRIMARY KEY (col_1_varchar(948))
-) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=2, engine = innodb;
-INSERT INTO worklog5743_key2 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000));
-INSERT INTO worklog5743_key2 VALUES(REPEAT("b", 4000) , REPEAT("p", 4000));
-SELECT col_1_varchar  = REPEAT("a", 4000) , col_2_varchar = REPEAT("o", 4000)
-FROM worklog5743_key2;
-col_1_varchar  = REPEAT("a", 4000)	col_2_varchar = REPEAT("o", 4000)
-1	1
-0	0
-UPDATE worklog5743_key2 SET col_1_varchar = REPEAT("c", 4000)
-WHERE col_1_varchar = REPEAT("a", 4000) AND col_2_varchar = REPEAT("o", 4000);
-SELECT col_1_varchar = REPEAT("c", 4000) FROM worklog5743_key2
-WHERE col_2_varchar = REPEAT("o", 4000);
-col_1_varchar = REPEAT("c", 4000)
-1
-INSERT INTO worklog5743_key2 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-DELETE FROM worklog5743_key2 WHERE col_1_varchar = REPEAT("b", 4000);
-SELECT col_1_varchar = REPEAT("c", 4000) FROM worklog5743_key2;
-col_1_varchar = REPEAT("c", 4000)
-0
-1
-DROP TABLE worklog5743_key2;
-CREATE TABLE worklog5743_key4 (
-col_1_varchar VARCHAR (4000) , col_2_varchar VARCHAR (4000) ,
-PRIMARY KEY (col_1_varchar(1964))
-) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4, engine = innodb;
-INSERT INTO worklog5743_key4 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000));
-INSERT INTO worklog5743_key4 VALUES(REPEAT("b", 4000) , REPEAT("p", 4000));
-SELECT col_1_varchar  = REPEAT("a", 4000) , col_2_varchar = REPEAT("o", 4000)
-FROM worklog5743_key4;
-col_1_varchar  = REPEAT("a", 4000)	col_2_varchar = REPEAT("o", 4000)
-1	1
-0	0
-UPDATE worklog5743_key4 SET col_1_varchar = REPEAT("c", 4000)
-WHERE col_1_varchar = REPEAT("a", 4000)
-AND col_2_varchar = REPEAT("o", 4000);
-SELECT col_1_varchar = REPEAT("b", 3500) FROM worklog5743_key4
-WHERE col_1_varchar = REPEAT("c", 4000) AND col_2_varchar = REPEAT("o", 4000);
-col_1_varchar = REPEAT("b", 3500)
-0
-INSERT INTO worklog5743_key4 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-DELETE FROM worklog5743_key4 WHERE col_1_varchar = REPEAT("b", 4000);
-SELECT col_1_varchar = REPEAT("c", 4000) FROM worklog5743_key4;
-col_1_varchar = REPEAT("c", 4000)
-0
-1
-DROP TABLE worklog5743_key4;
-CREATE TABLE worklog5743_key8 (
-col_1_varchar VARCHAR (4000) , col_2_varchar VARCHAR (4000) ,
-PRIMARY KEY (col_1_varchar(3072))
-) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8, engine = innodb;
-INSERT INTO worklog5743_key8 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000));
-INSERT INTO worklog5743_key8 VALUES(REPEAT("b", 4000) , REPEAT("p", 4000));
-SELECT col_1_varchar  = REPEAT("a", 4000) , col_2_varchar = REPEAT("o", 4000)
-FROM worklog5743_key8;
-col_1_varchar  = REPEAT("a", 4000)	col_2_varchar = REPEAT("o", 4000)
-1	1
-0	0
-UPDATE worklog5743_key8 SET col_1_varchar = REPEAT("c", 4000)
-WHERE col_1_varchar = REPEAT("a", 4000) AND col_2_varchar = REPEAT("o", 4000);
-SELECT col_1_varchar = REPEAT("b", 3500) FROM worklog5743_key8
-WHERE col_1_varchar = REPEAT("c", 4000) AND col_2_varchar = REPEAT("o", 4000);
-col_1_varchar = REPEAT("b", 3500)
-0
-INSERT INTO worklog5743_key8 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-DELETE FROM worklog5743_key8 WHERE col_1_varchar = REPEAT("b", 4000);
-SELECT col_1_varchar = REPEAT("c", 4000) FROM worklog5743_key8;
-col_1_varchar = REPEAT("c", 4000)
-0
-1
-DROP TABLE worklog5743_key8;
-CREATE TABLE worklog5743_key2 (
-col_1_text TEXT (4000) , col_2_text TEXT (4000) ,
-PRIMARY KEY (col_1_text(948))
-) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=2, engine = innodb;
-INSERT INTO worklog5743_key2 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000));
-INSERT INTO worklog5743_key2 VALUES(REPEAT("b", 4000) , REPEAT("p", 4000));
-SELECT col_1_text  = REPEAT("a", 4000) , col_2_text = REPEAT("o", 4000)
-FROM worklog5743_key2;
-col_1_text  = REPEAT("a", 4000)	col_2_text = REPEAT("o", 4000)
-1	1
-0	0
-UPDATE worklog5743_key2 SET col_1_text = REPEAT("c", 4000)
-WHERE col_1_text = REPEAT("a", 4000) AND col_2_text = REPEAT("o", 4000);
-SELECT col_1_text = REPEAT("b", 3500) FROM worklog5743_key2
-WHERE col_1_text = REPEAT("c", 4000) AND col_2_text = REPEAT("o", 4000);
-col_1_text = REPEAT("b", 3500)
-0
-INSERT INTO worklog5743_key2 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-DELETE FROM worklog5743_key2 WHERE col_1_text = REPEAT("b", 4000);
-SELECT col_1_text = REPEAT("c", 4000) FROM worklog5743_key2;
-col_1_text = REPEAT("c", 4000)
-0
-1
-DROP TABLE worklog5743_key2;
-CREATE TABLE worklog5743_key4 (
-col_1_text TEXT (4000) , col_2_text TEXT (4000) ,
-PRIMARY KEY (col_1_text(1964))
-) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4, engine = innodb;
-INSERT INTO worklog5743_key4 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000));
-INSERT INTO worklog5743_key4 VALUES(REPEAT("b", 4000) , REPEAT("p", 4000));
-SELECT col_1_text  = REPEAT("a", 4000) , col_2_text = REPEAT("o", 4000)
-FROM worklog5743_key4;
-col_1_text  = REPEAT("a", 4000)	col_2_text = REPEAT("o", 4000)
-1	1
-0	0
-UPDATE worklog5743_key4 SET col_1_text = REPEAT("c", 4000)
-WHERE col_1_text = REPEAT("a", 4000) AND col_2_text = REPEAT("o", 4000);
-SELECT col_1_text = REPEAT("b", 3500) FROM worklog5743_key4
-WHERE col_1_text = REPEAT("c", 4000) AND col_2_text = REPEAT("o", 4000);
-col_1_text = REPEAT("b", 3500)
-0
-INSERT INTO worklog5743_key4 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-DELETE FROM worklog5743_key4 WHERE col_1_text = REPEAT("b", 4000);
-SELECT col_1_text = REPEAT("c", 4000) FROM worklog5743_key4;
-col_1_text = REPEAT("c", 4000)
-0
-1
-DROP TABLE worklog5743_key4;
-CREATE TABLE worklog5743_key8 (
-col_1_text TEXT (4000) , col_2_text TEXT (4000) ,
-PRIMARY KEY (col_1_text(3072))
-) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8, engine = innodb;
-INSERT INTO worklog5743_key8 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000));
-INSERT INTO worklog5743_key8 VALUES(REPEAT("b", 4000) , REPEAT("p", 4000));
-SELECT col_1_text  = REPEAT("a", 4000) , col_2_text = REPEAT("o", 4000)
-FROM worklog5743_key8;
-col_1_text  = REPEAT("a", 4000)	col_2_text = REPEAT("o", 4000)
-1	1
-0	0
-UPDATE worklog5743_key8 SET col_1_text = REPEAT("c", 4000)
-WHERE col_1_text = REPEAT("a", 4000) AND col_2_text = REPEAT("o", 4000);
-SELECT col_1_text = REPEAT("b", 3500) FROM worklog5743_key8
-WHERE col_1_text = REPEAT("c", 4000) AND col_2_text = REPEAT("o", 4000);
-col_1_text = REPEAT("b", 3500)
-0
-INSERT INTO worklog5743_key8 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-DELETE FROM worklog5743_key8 WHERE col_1_text = REPEAT("b", 4000);
-SELECT col_1_text = REPEAT("c", 4000) FROM worklog5743_key8;
-col_1_text = REPEAT("c", 4000)
-0
-1
-DROP TABLE worklog5743_key8;
-CREATE TABLE worklog5743_key2 (
-col_1_blob BLOB (4000) , col_2_blob BLOB (4000) ,
-PRIMARY KEY (col_1_blob(948))
-) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=2, engine = innodb;
-INSERT INTO worklog5743_key2 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000));
-INSERT INTO worklog5743_key2 VALUES(REPEAT("b", 4000) , REPEAT("p", 4000));
-SELECT col_1_blob  = REPEAT("a", 4000) , col_2_blob = REPEAT("o", 4000)
-FROM worklog5743_key2;
-col_1_blob  = REPEAT("a", 4000)	col_2_blob = REPEAT("o", 4000)
-1	1
-0	0
-UPDATE worklog5743_key2 SET col_1_blob = REPEAT("c", 4000)
-WHERE col_1_blob = REPEAT("a", 4000) AND col_2_blob = REPEAT("o", 4000);
-SELECT col_1_blob = REPEAT("b", 3500) FROM worklog5743_key2
-WHERE col_1_blob = REPEAT("c", 4000) AND col_2_blob = REPEAT("o", 4000);
-col_1_blob = REPEAT("b", 3500)
-0
-INSERT INTO worklog5743_key2 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-DELETE FROM worklog5743_key2 WHERE col_1_blob = REPEAT("b", 4000);
-SELECT col_1_blob = REPEAT("c", 4000) FROM worklog5743_key2;
-col_1_blob = REPEAT("c", 4000)
-0
-1
-DROP TABLE worklog5743_key2;
-CREATE TABLE worklog5743_key4 (
-col_1_blob BLOB (4000) , col_2_blob BLOB (4000) ,
-PRIMARY KEY (col_1_blob(1964))
-) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4, engine = innodb;
-INSERT INTO worklog5743_key4 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000));
-INSERT INTO worklog5743_key4 VALUES(REPEAT("b", 4000) , REPEAT("p", 4000));
-SELECT col_1_blob  = REPEAT("a", 4000) , col_2_blob = REPEAT("o", 4000)
-FROM worklog5743_key4;
-col_1_blob  = REPEAT("a", 4000)	col_2_blob = REPEAT("o", 4000)
-1	1
-0	0
-UPDATE worklog5743_key4 SET col_1_blob = REPEAT("c", 4000)
-WHERE col_1_blob = REPEAT("a", 4000) AND col_2_blob = REPEAT("o", 4000);
-SELECT col_1_blob = REPEAT("b", 3500) FROM worklog5743_key4
-WHERE col_1_blob = REPEAT("c", 4000) AND col_2_blob = REPEAT("o", 4000);
-col_1_blob = REPEAT("b", 3500)
-0
-INSERT INTO worklog5743_key4 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-DELETE FROM worklog5743_key4 WHERE col_1_blob = REPEAT("b", 4000);
-SELECT col_1_blob = REPEAT("c", 4000) FROM worklog5743_key4;
-col_1_blob = REPEAT("c", 4000)
-0
-1
-DROP TABLE worklog5743_key4;
-CREATE TABLE worklog5743_key8 (
-col_1_blob BLOB (4000) , col_2_blob BLOB (4000) ,
-PRIMARY KEY (col_1_blob(3072))
-) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8, engine = innodb;
-INSERT INTO worklog5743_key8 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000));
-INSERT INTO worklog5743_key8 VALUES(REPEAT("b", 4000) , REPEAT("p", 4000));
-SELECT col_1_blob  = REPEAT("a", 4000) , col_2_blob = REPEAT("o", 4000)
-FROM worklog5743_key8;
-col_1_blob  = REPEAT("a", 4000)	col_2_blob = REPEAT("o", 4000)
-1	1
-0	0
-UPDATE worklog5743_key8 SET col_1_blob = REPEAT("c", 4000)
-WHERE col_1_blob = REPEAT("a", 4000) AND col_2_blob = REPEAT("o", 4000);
-SELECT col_1_blob = REPEAT("b", 3500) FROM worklog5743_key8
-WHERE col_1_blob = REPEAT("c", 4000) AND col_2_blob = REPEAT("o", 4000);
-col_1_blob = REPEAT("b", 3500)
-0
-INSERT INTO worklog5743_key8 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-DELETE FROM worklog5743_key8 WHERE col_1_blob = REPEAT("b", 4000);
-SELECT col_1_blob = REPEAT("c", 4000) FROM worklog5743_key8;
-col_1_blob = REPEAT("c", 4000)
-0
-1
-DROP TABLE worklog5743_key8;
-CREATE TABLE worklog5743 (
-col_1_varbinary VARBINARY (4000) , col_2_varchar VARCHAR (4000) ,
-col_3_text TEXT (4000), col_4_blob BLOB (4000), col_5_text TEXT (4000),
-col_6_varchar VARCHAR (4000), col_7_binary BINARY (255)
-) ROW_FORMAT=DYNAMIC, engine = innodb;
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000),
-REPEAT("a", 4000) , REPEAT("o", 4000), REPEAT("a", 4000),
-REPEAT("a", 4000) ,  REPEAT("a", 255)
-);
-CREATE INDEX prefix_idx1 ON worklog5743(col_1_varbinary (3072));
-CREATE INDEX prefix_idx2 ON worklog5743(col_2_varchar (3072));
-INSERT INTO worklog5743 VALUES(REPEAT("b", 4000) , REPEAT("p", 4000),
-REPEAT("a", 4000) , REPEAT("o", 4000), REPEAT("a", 4000),
-REPEAT("a", 4000) , REPEAT("a", 255)
-);
-SELECT col_1_varbinary = REPEAT("a", 4000) , col_2_varchar = REPEAT("o", 4000)
-FROM worklog5743;
-col_1_varbinary = REPEAT("a", 4000)	col_2_varchar = REPEAT("o", 4000)
-1	1
-0	0
-UPDATE worklog5743 SET col_1_varbinary = REPEAT("c", 4000)
-WHERE col_1_varbinary = REPEAT("a", 4000) AND col_2_varchar = REPEAT("o", 4000);
-SELECT col_1_varbinary = REPEAT("c", 4000) FROM worklog5743
-WHERE col_1_varbinary = REPEAT("c", 4000) AND col_2_varchar = REPEAT("o", 4000);
-col_1_varbinary = REPEAT("c", 4000)
-1
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000),
-REPEAT("a", 4000) , REPEAT("o", 4000), REPEAT("a", 4000),
-REPEAT("a", 4000) , REPEAT("a", 255)
-);
-DELETE FROM worklog5743 WHERE col_1_varbinary = REPEAT("b", 4000);
-SELECT col_1_varbinary = REPEAT("c", 4000) FROM worklog5743;
-col_1_varbinary = REPEAT("c", 4000)
-1
-0
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000),
-REPEAT("a", 4000) , REPEAT("o", 4000), REPEAT("a", 4000),
-REPEAT("a", 4000) ,  REPEAT("a", 255)
-);
-CREATE INDEX prefix_idx3 ON worklog5743(col_3_text (3072));
-CREATE INDEX prefix_idx4 ON worklog5743(col_4_blob (3072));
-CREATE INDEX prefix_idx5 ON worklog5743(col_5_text (3072));
-UPDATE worklog5743 SET col_1_varbinary = REPEAT("c", 4000)
-WHERE col_1_varbinary = REPEAT("a", 4000) AND col_2_varchar = REPEAT("o", 4000);
-ERROR HY000: Undo log record is too big.
-SHOW WARNINGS;
-Level	Code	Message
-Error	1713	Undo log record is too big.
-DROP TABLE worklog5743;
-CREATE TABLE worklog5743 (
-col_1_varbinary VARBINARY (4000) , col_2_varchar VARCHAR (4000) ,
-col_3_text TEXT (4000), col_4_blob BLOB (4000),col_5_text TEXT (4000),
-col_6_varchar VARCHAR (4000), col_7_binary BINARY (255)
-) ROW_FORMAT=DYNAMIC, engine = innodb;
-CREATE INDEX prefix_idx1 ON worklog5743(col_1_varbinary (3072));
-CREATE INDEX prefix_idx2 ON worklog5743(col_2_varchar (3072));
-CREATE INDEX prefix_idx3 ON worklog5743(col_3_text (3072));
-CREATE INDEX prefix_idx4 ON worklog5743(col_4_blob (3072));
-CREATE INDEX prefix_idx5 ON worklog5743(col_5_text (3072));
-START TRANSACTION;
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000),
-REPEAT("a", 4000) , REPEAT("o", 4000), REPEAT("a", 4000),
-REPEAT("a", 4000) ,  REPEAT("a", 255)
-);
-SELECT col_1_varbinary = REPEAT("a", 4000) , col_2_varchar = REPEAT("o", 4000)
-FROM worklog5743;
-col_1_varbinary = REPEAT("a", 4000)	col_2_varchar = REPEAT("o", 4000)
-1	1
-ROLLBACK;
-START TRANSACTION;
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000),
-REPEAT("a", 4000) , REPEAT("o", 4000), REPEAT("a", 4000),
-REPEAT("a", 4000) ,  REPEAT("a", 255)
-);
-COMMIT;
-SELECT col_1_varbinary = REPEAT("a", 4000) , col_2_varchar = REPEAT("o", 4000)
-FROM worklog5743;
-col_1_varbinary = REPEAT("a", 4000)	col_2_varchar = REPEAT("o", 4000)
-1	1
-START TRANSACTION;
-INSERT INTO worklog5743 VALUES(REPEAT("b", 4000) , REPEAT("p", 4000),
-REPEAT("a", 4000) , REPEAT("o", 4000), REPEAT("a", 4000),
-REPEAT("a", 4000) , REPEAT("a", 255)
-);
-ROLLBACK;
-UPDATE worklog5743 SET col_1_varbinary = REPEAT("c", 4000)
-WHERE col_1_varbinary = REPEAT("a", 4000)
-AND col_2_varchar = REPEAT("o", 4000);
-ERROR HY000: Undo log record is too big.
-SHOW WARNINGS;
-Level	Code	Message
-Error	1713	Undo log record is too big.
-SELECT col_1_varbinary = REPEAT("c", 4000) FROM worklog5743
-WHERE col_1_varbinary = REPEAT("c", 4000) AND col_2_varchar = REPEAT("o", 4000);
-col_1_varbinary = REPEAT("c", 4000)
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000),
-REPEAT("a", 4000) , REPEAT("o", 4000), REPEAT("a", 4000),
-REPEAT("a", 4000) , REPEAT("a", 255)
-);
-DELETE FROM worklog5743 WHERE col_1_varbinary = REPEAT("b", 4000);
-SELECT col_1_varbinary = REPEAT("c", 4000) FROM worklog5743;
-col_1_varbinary = REPEAT("c", 4000)
-0
-0
-DROP TABLE worklog5743;
-CREATE TABLE worklog5743 (
-col_1_text TEXT (4000) CHARACTER SET 'utf8',
-col_2_text TEXT (4000) CHARACTER SET 'utf8',
-PRIMARY KEY (col_1_text(1024))
-) ROW_FORMAT=DYNAMIC, engine = innodb;
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000));
-CREATE INDEX prefix_idx ON worklog5743(col_1_text (1024));
-INSERT INTO worklog5743 VALUES(REPEAT("b", 4000) , REPEAT("p", 4000));
-SELECT col_1_text = REPEAT("a", 4000) , col_2_text = REPEAT("o", 4000) FROM worklog5743;
-col_1_text = REPEAT("a", 4000)	col_2_text = REPEAT("o", 4000)
-1	1
-0	0
-UPDATE worklog5743 SET col_1_text = REPEAT("c", 4000)
-WHERE col_1_text = REPEAT("a", 4000) AND col_2_text = REPEAT("o", 4000);
-SELECT col_1_text = REPEAT("c", 4000) FROM worklog5743
-WHERE col_1_text = REPEAT("c", 4000) AND col_2_text = REPEAT("o", 4000);
-col_1_text = REPEAT("c", 4000)
-1
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-DELETE FROM worklog5743 WHERE col_1_text = REPEAT("b", 4000);
-SELECT col_1_text = REPEAT("c", 4000) FROM worklog5743;
-col_1_text = REPEAT("c", 4000)
-0
-1
-DROP TABLE worklog5743;
-CREATE TABLE worklog5743 (col_1_varchar VARCHAR (4000) CHARACTER SET 'utf8',
-col_2_varchar VARCHAR (4000) CHARACTER SET 'utf8' ,
-PRIMARY KEY (col_1_varchar(1024))
-) ROW_FORMAT=DYNAMIC, engine = innodb;
-ERROR 42000: Row size too large (> max_row_size). Changing some columns to TEXT or BLOB may help. In current row format, BLOB prefix of 0 bytes is stored inline.
-CREATE TABLE worklog5743 (
-col_1_varbinary VARBINARY (4000) ,
-col_2_varchar VARCHAR (4000) CHARACTER SET 'utf8',
-col_3_text TEXT (4000) CHARACTER SET 'utf8',
-col_4_blob BLOB (4000),col_5_text TEXT (4000),
-col_6_varchar VARCHAR (4000), col_7_binary BINARY (255)
-) ROW_FORMAT=DYNAMIC, engine = innodb;
-CREATE INDEX prefix_idx2 ON worklog5743(col_2_varchar (500));
-CREATE INDEX prefix_idx3 ON worklog5743(col_3_text (500));
-START TRANSACTION;
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000),
-REPEAT("a", 4000) , REPEAT("o", 4000), REPEAT("a", 4000),
-REPEAT("a", 4000) ,  REPEAT("a", 255)
-);
-SELECT col_1_varbinary = REPEAT("a", 4000) , col_2_varchar = REPEAT("o", 4000)
-FROM worklog5743;
-col_1_varbinary = REPEAT("a", 4000)	col_2_varchar = REPEAT("o", 4000)
-1	1
-ROLLBACK;
-START TRANSACTION;
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000),
-REPEAT("a", 4000) , REPEAT("o", 4000), REPEAT("a", 4000),
-REPEAT("a", 4000) ,  REPEAT("a", 255)
-);
-COMMIT;
-SELECT col_1_varbinary = REPEAT("a", 4000) , col_2_varchar = REPEAT("o", 4000)
-FROM worklog5743;
-col_1_varbinary = REPEAT("a", 4000)	col_2_varchar = REPEAT("o", 4000)
-1	1
-START TRANSACTION;
-INSERT INTO worklog5743 VALUES(REPEAT("b", 4000) , REPEAT("p", 4000),
-REPEAT("a", 4000) , REPEAT("o", 4000), REPEAT("a", 4000),
-REPEAT("a", 4000) , REPEAT("a", 255)
-);
-ROLLBACK;
-SELECT col_1_varbinary = REPEAT("c", 4000) FROM worklog5743
-WHERE col_1_varbinary = REPEAT("c", 4000)
-AND col_2_varchar = REPEAT("o", 4000);
-col_1_varbinary = REPEAT("c", 4000)
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000),
-REPEAT("a", 4000) , REPEAT("o", 4000), REPEAT("a", 4000),
-REPEAT("a", 4000) , REPEAT("a", 255)
-);
-DELETE FROM worklog5743 WHERE col_1_varbinary = REPEAT("b", 4000);
-SELECT col_1_varbinary = REPEAT("c", 4000) FROM worklog5743;
-col_1_varbinary = REPEAT("c", 4000)
-0
-0
-DROP TABLE worklog5743;
-CREATE TABLE worklog5743 (
-col_1_text TEXT (4000) CHARACTER SET 'utf8',
-col_2_text TEXT (4000) ,
-PRIMARY KEY (col_1_text(1024))
-) ROW_FORMAT=DYNAMIC, engine = innodb;
-INSERT INTO worklog5743 VALUES(REPEAT("स", 4000) , REPEAT("o", 4000));
-CREATE INDEX prefix_idx ON worklog5743(col_1_text (1024));
-INSERT INTO worklog5743 VALUES(REPEAT("b", 4000) , REPEAT("p", 4000));
-SELECT col_1_text = REPEAT("स", 4000) , col_2_text = REPEAT("o", 4000)
-FROM worklog5743;
-col_1_text = REPEAT("स", 4000)	col_2_text = REPEAT("o", 4000)
-1	1
-0	0
-UPDATE worklog5743 SET col_1_text = REPEAT("क", 4000)
-WHERE col_1_text = REPEAT("स", 4000) AND col_2_text = REPEAT("o", 4000);
-SELECT col_1_text = REPEAT("क", 4000) FROM worklog5743
-WHERE col_1_text = REPEAT("c", 4000) AND col_2_text = REPEAT("o", 4000);
-col_1_text = REPEAT("क", 4000)
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-DELETE FROM worklog5743 WHERE col_1_text = REPEAT("b", 4000);
-SELECT col_1_text = REPEAT("क", 4000) FROM worklog5743;
-col_1_text = REPEAT("क", 4000)
-0
-1
-DROP TABLE worklog5743;
-CREATE TABLE worklog5743 (
-col_1_text TEXT(4000) , col_2_text TEXT(4000) ,
-PRIMARY KEY (col_1_text(3072))
-) ROW_FORMAT=DYNAMIC, engine = innodb;
-INSERT INTO worklog5743 VALUES(REPEAT("a", 200) , REPEAT("o", 200));
-SELECT col_1_text = REPEAT("a", 200) , col_2_text  = REPEAT("o", 200) FROM
-worklog5743;
-col_1_text = REPEAT("a", 200)	col_2_text  = REPEAT("o", 200)
-1	1
-connect  con1,localhost,root,,;
-SELECT col_1_text = REPEAT("a", 200) , col_2_text = REPEAT("o", 200) FROM
-worklog5743;
-col_1_text = REPEAT("a", 200)	col_2_text = REPEAT("o", 200)
-1	1
-SELECT COUNT(*) FROM worklog5743;
-COUNT(*)
-1
-connect  con2,localhost,root,,;
-START TRANSACTION;
-INSERT INTO worklog5743 VALUES(REPEAT("b", 200) , REPEAT("o", 200));
-SELECT col_1_text = REPEAT("a", 200) , col_2_text  = REPEAT("o", 200) FROM
-worklog5743;
-col_1_text = REPEAT("a", 200)	col_2_text  = REPEAT("o", 200)
-1	1
-0	1
-connection con1;
-select @@session.tx_isolation;
-@@session.tx_isolation
-REPEATABLE-READ
-SELECT col_1_text = REPEAT("b", 200) , col_2_text = REPEAT("o", 200) FROM
-worklog5743;
-col_1_text = REPEAT("b", 200)	col_2_text = REPEAT("o", 200)
-0	1
-SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
-select @@session.tx_isolation;
-@@session.tx_isolation
-READ-UNCOMMITTED
-SELECT col_1_text = REPEAT("b", 200) , col_2_text = REPEAT("o", 200) FROM
-worklog5743;
-col_1_text = REPEAT("b", 200)	col_2_text = REPEAT("o", 200)
-0	1
-1	1
-SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
-START TRANSACTION;
-SELECT col_1_text = REPEAT("a", 200) , col_2_text = REPEAT("o", 200) FROM
-worklog5743;
-col_1_text = REPEAT("a", 200)	col_2_text = REPEAT("o", 200)
-1	1
-SELECT COUNT(*) FROM worklog5743;
-COUNT(*)
-1
-connection con2;
-COMMIT;
-connection con1;
-SELECT col_1_text = REPEAT("b", 200) , col_2_text = REPEAT("o", 200) FROM
-worklog5743;
-col_1_text = REPEAT("b", 200)	col_2_text = REPEAT("o", 200)
-0	1
-SELECT col_1_text = REPEAT("a", 200) , col_2_text = REPEAT("o", 200) FROM
-worklog5743;
-col_1_text = REPEAT("a", 200)	col_2_text = REPEAT("o", 200)
-1	1
-SELECT COUNT(*) FROM worklog5743;
-COUNT(*)
-1
-COMMIT;
-connection default;
-DROP TABLE worklog5743;
-CREATE TABLE worklog5743 (
-col_1_text TEXT(4000) , col_2_text TEXT(4000) ,
-PRIMARY KEY (col_1_text(3072))
-) ROW_FORMAT=DYNAMIC, engine = innodb;
-INSERT INTO worklog5743 VALUES(REPEAT("a", 200) , REPEAT("o", 200));
-SELECT col_1_text = REPEAT("a", 200) , col_2_text  = REPEAT("o", 200) FROM
-worklog5743;
-col_1_text = REPEAT("a", 200)	col_2_text  = REPEAT("o", 200)
-1	1
-connection con1;
-SELECT col_1_text = REPEAT("a", 200) , col_2_text = REPEAT("o", 200) FROM
-worklog5743;
-col_1_text = REPEAT("a", 200)	col_2_text = REPEAT("o", 200)
-1	1
-SELECT COUNT(*) FROM worklog5743;
-COUNT(*)
-1
-START TRANSACTION;
-connection con2;
-START TRANSACTION;
-INSERT INTO worklog5743 VALUES(REPEAT("b", 200) , REPEAT("o", 200));
-DELETE FROM worklog5743 WHERE col_1_text = REPEAT("a", 200);
-SELECT col_1_text = REPEAT("a", 200) , col_2_text  = REPEAT("o", 200) FROM
-worklog5743;
-col_1_text = REPEAT("a", 200)	col_2_text  = REPEAT("o", 200)
-0	1
-COMMIT;
-connection con1;
-SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
-select @@session.tx_isolation;
-@@session.tx_isolation
-READ-UNCOMMITTED
-SELECT col_1_text = REPEAT("b", 200) , col_2_text = REPEAT("o", 200) FROM
-worklog5743;
-col_1_text = REPEAT("b", 200)	col_2_text = REPEAT("o", 200)
-1	1
-SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
-SELECT col_1_text = REPEAT("b", 200) , col_2_text = REPEAT("o", 200) FROM
-worklog5743;
-col_1_text = REPEAT("b", 200)	col_2_text = REPEAT("o", 200)
-1	1
-SELECT COUNT(*) FROM worklog5743;
-COUNT(*)
-1
-COMMIT;
-connection default;
-DROP TABLE worklog5743;
-CREATE TABLE worklog5743 (
-col_1_text TEXT(4000) , col_2_text TEXT(4000) ,
-PRIMARY KEY (col_1_text(3072))
-) ROW_FORMAT=DYNAMIC, engine = innodb;
-INSERT INTO worklog5743 VALUES(REPEAT("a", 200) , REPEAT("o", 200));
-SELECT col_1_text = REPEAT("a", 200) , col_2_text  = REPEAT("o", 200) FROM
-worklog5743;
-col_1_text = REPEAT("a", 200)	col_2_text  = REPEAT("o", 200)
-1	1
-connection con1;
-SELECT col_1_text = REPEAT("a", 200) , col_2_text = REPEAT("o", 200) FROM
-worklog5743;
-col_1_text = REPEAT("a", 200)	col_2_text = REPEAT("o", 200)
-1	1
-SELECT COUNT(*) FROM worklog5743;
-COUNT(*)
-1
-START TRANSACTION;
-connection con2;
-START TRANSACTION;
-INSERT INTO worklog5743 VALUES(REPEAT("b", 200) , REPEAT("o", 200));
-DELETE FROM worklog5743 WHERE col_1_text = REPEAT("a", 200);
-SELECT col_1_text = REPEAT("a", 200) , col_2_text  = REPEAT("o", 200) FROM
-worklog5743;
-col_1_text = REPEAT("a", 200)	col_2_text  = REPEAT("o", 200)
-0	1
-ROLLBACK;
-connection con1;
-SELECT col_1_text = REPEAT("b", 200) , col_2_text = REPEAT("o", 200) FROM
-worklog5743;
-col_1_text = REPEAT("b", 200)	col_2_text = REPEAT("o", 200)
-0	1
-SELECT COUNT(*) FROM worklog5743;
-COUNT(*)
-1
-COMMIT;
-disconnect con1;
-connection con2;
-disconnect con2;
-connection default;
-DROP TABLE worklog5743;
-CREATE TABLE worklog5743 (
-col_1_varchar VARCHAR (4000) , col_2_varchar VARCHAR (4000) ,
-PRIMARY KEY (col_1_varchar(3072))
-) ROW_FORMAT=DYNAMIC, engine = innodb;
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000));
-CREATE INDEX prefix_idx ON worklog5743(col_1_varchar (3072));
-INSERT INTO worklog5743 VALUES(REPEAT("b", 4000) , REPEAT("p", 4000));
-SELECT col_1_varchar = REPEAT("a", 4000) , col_2_varchar = REPEAT("o", 4000)
-FROM worklog5743;
-col_1_varchar = REPEAT("a", 4000)	col_2_varchar = REPEAT("o", 4000)
-1	1
-0	0
-UPDATE worklog5743 SET col_1_varchar = REPEAT("c", 4000)
-WHERE col_1_varchar = REPEAT("a", 4000)
-AND col_2_varchar = REPEAT("o", 4000);
-SELECT col_1_varchar = REPEAT("c", 4000) FROM worklog5743
-WHERE col_1_varchar = REPEAT("c", 4000)
-AND col_2_varchar = REPEAT("o", 4000);
-col_1_varchar = REPEAT("c", 4000)
-1
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-SELECT col_1_varchar = REPEAT("c", 4000) FROM worklog5743;
-col_1_varchar = REPEAT("c", 4000)
-0
-0
-1
-SELECT tbl1.col_1_varchar = tbl2.col_1_varchar
-FROM worklog5743 tbl1 , worklog5743 tbl2
-WHERE tbl1.col_1_varchar = tbl2.col_1_varchar ;
-tbl1.col_1_varchar = tbl2.col_1_varchar
-1
-1
-1
-SELECT tbl1.col_1_varchar = REPEAT("c", 4000) FROM worklog5743 tbl1
-WHERE col_1_varchar IN (SELECT tbl2.col_1_varchar FROM worklog5743 tbl2) ;
-tbl1.col_1_varchar = REPEAT("c", 4000)
-0
-0
-1
-SELECT tbl1.col_1_varchar = REPEAT("c", 4000) FROM worklog5743 tbl1
-WHERE col_1_varchar NOT IN (SELECT tbl2.col_1_varchar FROM worklog5743 tbl2) ;
-tbl1.col_1_varchar = REPEAT("c", 4000)
-SELECT tbl1.col_1_varchar = REPEAT("c", 4000) FROM worklog5743 tbl1 WHERE
-col_1_varchar IN (SELECT tbl2.col_1_varchar FROM worklog5743 tbl2)
-AND col_1_varchar = REPEAT("c", 4000);
-tbl1.col_1_varchar = REPEAT("c", 4000)
-1
-SELECT tbl1.col_1_varchar = REPEAT("c", 4000) FROM worklog5743 tbl1
-WHERE col_1_varchar in (
-SELECT tbl2.col_1_varchar FROM worklog5743 tbl2
-WHERE tbl1.col_1_varchar != tbl2.col_1_varchar
-) ;
-tbl1.col_1_varchar = REPEAT("c", 4000)
-SELECT tbl1.col_1_varchar = REPEAT("c", 4000) FROM worklog5743 tbl1
-WHERE col_1_varchar in (
-SELECT tbl2.col_1_varchar FROM worklog5743 tbl2
-WHERE tbl1.col_1_varchar = tbl2.col_1_varchar
-) ;
-tbl1.col_1_varchar = REPEAT("c", 4000)
-0
-0
-1
-SELECT
-REVERSE(col_1_varchar) = REPEAT("c", 4000) ,
-REVERSE(REVERSE(col_1_varchar)) = REPEAT("c", 4000)
-FROM worklog5743;
-REVERSE(col_1_varchar) = REPEAT("c", 4000)	REVERSE(REVERSE(col_1_varchar)) = REPEAT("c", 4000)
-0	0
-0	0
-1	1
-SELECT
-UPPER(col_1_varchar) = REPEAT("c", 4000) ,
-UPPER(col_1_varchar) = REPEAT("C", 4000) ,
-LOWER(UPPER(col_1_varchar)) = REPEAT("c", 4000)
-FROM worklog5743;
-UPPER(col_1_varchar) = REPEAT("c", 4000)	UPPER(col_1_varchar) = REPEAT("C", 4000)	LOWER(UPPER(col_1_varchar)) = REPEAT("c", 4000)
-0	0	0
-0	0	0
-1	1	1
-SELECT
-col_1_varchar = REPEAT("c", 4000)
-FROM worklog5743 WHERE col_1_varchar like '%c__%';
-col_1_varchar = REPEAT("c", 4000)
-1
-SELECT SUBSTRING(INSERT(col_1_varchar, 1, 4, 'kkkk'),1,10) FROM worklog5743 ;
-SUBSTRING(INSERT(col_1_varchar, 1, 4, 'kkkk'),1,10)
-kkkkaaaaaa
-kkkkbbbbbb
-kkkkcccccc
-SELECT CONCAT(SUBSTRING(col_1_varchar,-5,3),'append') FROM worklog5743 ;
-CONCAT(SUBSTRING(col_1_varchar,-5,3),'append')
-aaaappend
-bbbappend
-cccappend
-DROP TABLE worklog5743;
-CREATE TABLE worklog5743 (
-col_1_varchar VARCHAR (4000) ,
-col_2_varchar VARCHAR (4000) ,
-UNIQUE INDEX (col_1_varchar(3072))
-) ROW_FORMAT=DYNAMIC, engine = innodb;
-INSERT INTO worklog5743
-VALUES(concat(REPEAT("a", 2000),REPEAT("b", 1000),REPEAT("c", 1000)), REPEAT("o", 4000));
-INSERT INTO worklog5743
-VALUES(concat(REPEAT("a", 2000),REPEAT("b", 2000)), REPEAT("o", 4000));
-INSERT INTO worklog5743 VALUES(NULL,NULL);
-INSERT INTO worklog5743 VALUES(NULL,NULL);
-SELECT COLUMN_NAME,INDEX_NAME,SUB_PART,INDEX_TYPE
-FROM INFORMATION_SCHEMA.STATISTICS WHERE table_name = 'worklog5743' ;
-COLUMN_NAME	INDEX_NAME	SUB_PART	INDEX_TYPE
-col_1_varchar	col_1_varchar	3072	BTREE
-SELECT col_1_varchar FROM worklog5743 WHERE col_1_varchar IS NULL;
-col_1_varchar
-NULL
-NULL
-SELECT col_1_varchar = concat(REPEAT("a", 2000),REPEAT("b", 2000))
-FROM worklog5743 WHERE col_1_varchar IS NOT NULL ORDER BY 1;
-col_1_varchar = concat(REPEAT("a", 2000),REPEAT("b", 2000))
-0
-1
-DROP TABLE worklog5743;
-CREATE TABLE worklog5743 (
-col_1_varchar VARCHAR (4000) , col_2_varchar VARCHAR (4000) ,
-PRIMARY KEY (col_1_varchar(3072))) ROW_FORMAT=DYNAMIC, engine = innodb;
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000));
-CREATE INDEX prefix_idx ON worklog5743(col_1_varchar (3072));
-INSERT INTO worklog5743 VALUES(REPEAT("b", 4000) , REPEAT("p", 4000));
-DROP INDEX prefix_idx ON worklog5743;
-SELECT col_1_varchar = REPEAT("a", 4000) , col_2_varchar = REPEAT("o", 4000)
-FROM worklog5743;
-col_1_varchar = REPEAT("a", 4000)	col_2_varchar = REPEAT("o", 4000)
-1	1
-0	0
-UPDATE worklog5743 SET col_1_varchar = REPEAT("c", 4000)
-WHERE col_1_varchar = REPEAT("a", 4000) AND col_2_varchar = REPEAT("o", 4000);
-SELECT col_1_varchar = REPEAT("c", 4000) FROM worklog5743
-WHERE col_1_varchar = REPEAT("c", 4000) AND col_2_varchar = REPEAT("o", 4000);
-col_1_varchar = REPEAT("c", 4000)
-1
-CREATE INDEX prefix_idx ON worklog5743(col_1_varchar (3072));
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-DELETE FROM worklog5743 WHERE col_1_varchar = REPEAT("b", 4000);
-SELECT col_1_varchar = REPEAT("c", 4000) FROM worklog5743;
-col_1_varchar = REPEAT("c", 4000)
-0
-1
-DROP TABLE worklog5743;
-CREATE TABLE worklog5743 (
-col_1_varchar VARCHAR (4000) , col_2_varchar VARCHAR (4000) ,
-PRIMARY KEY `prefix_primary` (col_1_varchar(3072))
-) ROW_FORMAT=DYNAMIC, engine = innodb;
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000));
-CREATE INDEX prefix_idx ON worklog5743(col_1_varchar (3072));
-INSERT INTO worklog5743 VALUES(REPEAT("b", 4000) , REPEAT("p", 4000));
-ALTER TABLE worklog5743 DROP PRIMARY KEY;
-SELECT col_1_varchar = REPEAT("a", 4000) , col_2_varchar = REPEAT("o", 4000)
-FROM worklog5743;
-col_1_varchar = REPEAT("a", 4000)	col_2_varchar = REPEAT("o", 4000)
-1	1
-0	0
-UPDATE worklog5743 SET col_1_varchar = REPEAT("c", 4000)
-WHERE col_1_varchar = REPEAT("a", 4000)
-AND col_2_varchar = REPEAT("o", 4000);
-SELECT col_1_varchar = REPEAT("c", 4000) FROM worklog5743
-WHERE col_1_varchar = REPEAT("c", 4000)
-AND col_2_varchar = REPEAT("o", 4000);
-col_1_varchar = REPEAT("c", 4000)
-1
-ALTER TABLE worklog5743 ADD PRIMARY KEY (col_1_varchar(3072));
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-ERROR 23000: Duplicate entry 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa' for key 'PRIMARY'
-DELETE FROM worklog5743 WHERE col_1_varchar = REPEAT("b", 4000);
-SELECT col_1_varchar = REPEAT("c", 4000) FROM worklog5743;
-col_1_varchar = REPEAT("c", 4000)
-0
-1
-DROP TABLE worklog5743;
-CREATE TABLE worklog5743 (
-col_1_varchar VARCHAR (4000) , col_2_varchar VARCHAR (4000) ,
-PRIMARY KEY `prefix_primary` (col_1_varchar(3072))
-) ROW_FORMAT=DYNAMIC, engine = innodb;
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000));
-CREATE INDEX prefix_idx ON worklog5743(col_1_varchar (3072));
-INSERT INTO worklog5743 VALUES(REPEAT("b", 4000) , REPEAT("p", 4000));
-ALTER TABLE worklog5743 DROP PRIMARY KEY;
-DROP INDEX prefix_idx ON worklog5743;
-SELECT col_1_varchar = REPEAT("a", 4000) , col_2_varchar = REPEAT("o", 4000)
-FROM worklog5743;
-col_1_varchar = REPEAT("a", 4000)	col_2_varchar = REPEAT("o", 4000)
-1	1
-0	0
-UPDATE worklog5743 SET col_1_varchar = REPEAT("c", 4000)
-WHERE col_1_varchar = REPEAT("a", 4000) AND col_2_varchar = REPEAT("o", 4000);
-SELECT col_1_varchar = REPEAT("c", 4000) FROM worklog5743
-WHERE col_1_varchar = REPEAT("c", 4000) AND col_2_varchar = REPEAT("o", 4000);
-col_1_varchar = REPEAT("c", 4000)
-1
-ALTER TABLE worklog5743 ADD PRIMARY KEY (col_1_varchar(3072));
-CREATE INDEX prefix_idx ON worklog5743(col_1_varchar (3072));
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-ERROR 23000: Duplicate entry 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa' for key 'PRIMARY'
-DELETE FROM worklog5743 WHERE col_1_varchar = REPEAT("b", 4000);
-SELECT col_1_varchar = REPEAT("c", 4000) FROM worklog5743;
-col_1_varchar = REPEAT("c", 4000)
-0
-1
-DROP TABLE worklog5743;
-CREATE TABLE worklog5743 (
-col_1_varchar VARCHAR(4000) , col_2_varchar VARCHAR(4000) ,
-PRIMARY KEY (col_1_varchar (3072))
-) ROW_FORMAT=DYNAMIC, engine = innodb;
-INSERT INTO worklog5743 VALUES(REPEAT("c", 3500) , REPEAT("o", 3500));
-CREATE INDEX prefix_idx ON worklog5743(col_1_varchar (3072));
-connect  con1,localhost,root,,;
-connection con1;
-SELECT col_1_varchar = REPEAT("c", 3500) , col_2_varchar = REPEAT("o", 3500)
-FROM worklog5743;
-col_1_varchar = REPEAT("c", 3500)	col_2_varchar = REPEAT("o", 3500)
-1	1
-connection default;
-START TRANSACTION;
-INSERT INTO worklog5743 VALUES(REPEAT("a", 3500) , REPEAT("o", 3500));
-SELECT col_1_varchar = REPEAT("b", 3500) FROM worklog5743
-WHERE col_2_varchar = REPEAT("o", 3500);
-col_1_varchar = REPEAT("b", 3500)
-0
-0
-COMMIT;
-connection con1;
-START TRANSACTION;
-INSERT INTO worklog5743 VALUES(REPEAT("k", 3500),REPEAT("p", 3500));
-ALTER TABLE worklog5743 DROP PRIMARY KEY;
-UPDATE worklog5743 SET col_1_varchar  = REPEAT("b", 3500)
-WHERE col_1_varchar = REPEAT("a", 3500)
-AND col_2_varchar = REPEAT("o", 3500);
-SELECT col_1_varchar = REPEAT("b", 3500) FROM worklog5743
-WHERE col_2_varchar = REPEAT("o", 3500);
-col_1_varchar = REPEAT("b", 3500)
-1
-0
-connection default;
-DELETE FROM worklog5743 WHERE col_1_varchar  = REPEAT("b", 3500);
-SELECT col_1_varchar = REPEAT("a", 3500) FROM worklog5743
-WHERE col_2_varchar = REPEAT("p", 3500);
-col_1_varchar = REPEAT("a", 3500)
-0
-connection con1;
-COMMIT;
-connection default;
-DROP TABLE worklog5743;
-CREATE TABLE worklog5743 (
-col_1_varbinary VARBINARY (4000) , col_2_varbinary VARBINARY (4000) ,
-PRIMARY KEY (col_1_varbinary(3072))) ROW_FORMAT=DYNAMIC, engine = innodb;
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000));
-CREATE INDEX prefix_idx ON worklog5743(col_1_varbinary (3072));
-INSERT INTO worklog5743 VALUES(REPEAT("b", 4000) , REPEAT("p", 4000));
-SELECT col_1_varbinary = REPEAT("a", 4000) , col_2_varbinary = REPEAT("o", 4000)
-FROM worklog5743;
-col_1_varbinary = REPEAT("a", 4000)	col_2_varbinary = REPEAT("o", 4000)
-1	1
-0	0
-UPDATE worklog5743 SET col_1_varbinary = REPEAT("c", 4000)
-WHERE col_1_varbinary = REPEAT("a", 4000)
-AND col_2_varbinary = REPEAT("o", 4000);
-SELECT col_1_varbinary = REPEAT("c", 4000) FROM worklog5743
-WHERE col_1_varbinary = REPEAT("c", 4000)
-AND col_2_varbinary = REPEAT("o", 4000);
-col_1_varbinary = REPEAT("c", 4000)
-1
-DELETE FROM worklog5743 WHERE col_1_varbinary = REPEAT("c", 4000);
-SELECT col_1_varbinary = REPEAT("c", 4000) FROM worklog5743
-WHERE col_1_varbinary = REPEAT("c", 4000)
-AND col_2_varbinary = REPEAT("o", 4000);
-col_1_varbinary = REPEAT("c", 4000)
-DROP INDEX prefix_idx ON worklog5743;
-SELECT col_1_varbinary = REPEAT("b", 4000) FROM worklog5743
-WHERE col_1_varbinary = REPEAT("b", 4000)
-AND col_2_varbinary = REPEAT("p", 4000);
-col_1_varbinary = REPEAT("b", 4000)
-1
-CREATE INDEX prefix_idx ON worklog5743(col_1_varbinary (2000));
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-SELECT col_1_varbinary = REPEAT("a", 4000) FROM worklog5743;
-col_1_varbinary = REPEAT("a", 4000)
-1
-0
-UPDATE worklog5743 SET col_1_varbinary = REPEAT("c", 4000)
-WHERE col_1_varbinary = REPEAT("a", 4000)
-AND col_2_varbinary = REPEAT("o", 4000);
-DELETE FROM worklog5743 WHERE col_1_varbinary = REPEAT("c", 4000);
-SELECT col_1_varbinary = REPEAT("c", 4000) FROM worklog5743
-WHERE col_1_varbinary = REPEAT("c", 4000)
-AND col_2_varbinary = REPEAT("o", 4000);
-col_1_varbinary = REPEAT("c", 4000)
-DROP INDEX prefix_idx ON worklog5743;
-CREATE INDEX prefix_idx ON worklog5743(col_1_varbinary (4000));
-Warnings:
-Warning	1071	Specified key was too long; max key length is 3072 bytes
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-SELECT col_1_varbinary = REPEAT("a", 4000) FROM worklog5743;
-col_1_varbinary = REPEAT("a", 4000)
-1
-0
-UPDATE worklog5743 SET col_1_varbinary = REPEAT("c", 4000)
-WHERE col_1_varbinary = REPEAT("a", 4000)
-AND col_2_varbinary = REPEAT("o", 4000);
-DELETE FROM worklog5743 WHERE col_1_varbinary = REPEAT("c", 4000);
-SELECT col_1_varbinary = REPEAT("c", 4000) FROM worklog5743
-WHERE col_1_varbinary = REPEAT("c", 4000)
-AND col_2_varbinary = REPEAT("o", 4000);
-col_1_varbinary = REPEAT("c", 4000)
-DROP TABLE worklog5743;
-CREATE TABLE worklog5743 (col_1_text TEXT (4000) , col_2_text TEXT (4000) ,
-PRIMARY KEY (col_1_text(500))
-) ROW_FORMAT=DYNAMIC, engine = innodb;
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000));
-CREATE INDEX prefix_idx ON worklog5743(col_1_text (3072));
-INSERT INTO worklog5743 VALUES(REPEAT("b", 4000) , REPEAT("p", 4000));
-SELECT col_1_text = REPEAT("a", 4000) , col_2_text = REPEAT("o", 4000)
-FROM worklog5743;
-col_1_text = REPEAT("a", 4000)	col_2_text = REPEAT("o", 4000)
-1	1
-0	0
-UPDATE worklog5743 SET col_1_text = REPEAT("c", 4000)
-WHERE col_1_text = REPEAT("a", 4000)
-AND col_2_text = REPEAT("o", 4000);
-SELECT col_1_text = REPEAT("c", 4000) FROM worklog5743
-WHERE col_1_text = REPEAT("c", 4000) AND col_2_text = REPEAT("o", 4000);
-col_1_text = REPEAT("c", 4000)
-1
-DELETE FROM worklog5743 WHERE col_1_text = REPEAT("c", 4000);
-SELECT col_1_text = REPEAT("c", 4000) FROM worklog5743
-WHERE col_1_text = REPEAT("c", 4000) AND col_2_text = REPEAT("o", 4000);
-col_1_text = REPEAT("c", 4000)
-DROP INDEX prefix_idx ON worklog5743;
-SELECT col_1_text = REPEAT("b", 4000) FROM worklog5743
-WHERE col_1_text = REPEAT("b", 4000) AND col_2_text = REPEAT("p", 4000);
-col_1_text = REPEAT("b", 4000)
-1
-CREATE INDEX prefix_idx ON worklog5743(col_1_text (1000));
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-SELECT col_1_text = REPEAT("a", 4000) FROM worklog5743;
-col_1_text = REPEAT("a", 4000)
-1
-0
-UPDATE worklog5743 SET col_1_text = REPEAT("c", 4000)
-WHERE col_1_text = REPEAT("a", 4000) AND col_2_text = REPEAT("o", 4000);
-DELETE FROM worklog5743 WHERE col_1_text = REPEAT("c", 4000);
-SELECT col_1_text = REPEAT("c", 4000) FROM worklog5743
-WHERE col_1_text = REPEAT("c", 4000) AND col_2_text = REPEAT("o", 4000);
-col_1_text = REPEAT("c", 4000)
-DROP INDEX prefix_idx ON worklog5743;
-CREATE INDEX prefix_idx ON worklog5743(col_1_text (4000));
-Warnings:
-Warning	1071	Specified key was too long; max key length is 3072 bytes
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-SELECT col_1_text = REPEAT("a", 4000) FROM worklog5743;
-col_1_text = REPEAT("a", 4000)
-1
-0
-UPDATE worklog5743 SET col_1_text = REPEAT("c", 4000)
-WHERE col_1_text = REPEAT("a", 4000) AND col_2_text = REPEAT("o", 4000);
-DELETE FROM worklog5743 WHERE col_1_text = REPEAT("c", 4000);
-SELECT col_1_text = REPEAT("c", 4000) FROM worklog5743
-WHERE col_1_text = REPEAT("c", 4000) AND col_2_text = REPEAT("o", 4000);
-col_1_text = REPEAT("c", 4000)
-DROP TABLE worklog5743;
-CREATE TABLE worklog5743 (
-col_1_text TEXT (4000) , col_2_text TEXT (4000) ,
-PRIMARY KEY (col_1_text(948))
-) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=2, engine = innodb;
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000));
-INSERT INTO worklog5743 VALUES(REPEAT("b", 4000) , REPEAT("p", 4000));
-SELECT col_1_text = REPEAT("a", 4000) , col_2_text = REPEAT("o", 4000) FROM worklog5743;
-col_1_text = REPEAT("a", 4000)	col_2_text = REPEAT("o", 4000)
-1	1
-0	0
-UPDATE worklog5743 SET col_1_text = REPEAT("c", 4000)
-WHERE col_1_text = REPEAT("a", 4000)
-AND col_2_text = REPEAT("o", 4000);
-SELECT col_1_text = REPEAT("c", 4000) FROM worklog5743
-WHERE col_1_text = REPEAT("c", 4000)
-AND col_2_text = REPEAT("o", 4000);
-col_1_text = REPEAT("c", 4000)
-1
-DELETE FROM worklog5743 WHERE col_1_text = REPEAT("c", 4000);
-SELECT col_1_text = REPEAT("c", 4000) FROM worklog5743
-WHERE col_1_text = REPEAT("c", 4000)
-AND col_2_text = REPEAT("o", 4000);
-col_1_text = REPEAT("c", 4000)
-ALTER TABLE worklog5743 DROP PRIMARY KEY;
-SELECT col_1_text = REPEAT("b", 4000) FROM worklog5743
-WHERE col_1_text = REPEAT("b", 4000)
-AND col_2_text = REPEAT("p", 4000);
-col_1_text = REPEAT("b", 4000)
-1
-ALTER TABLE worklog5743 ADD PRIMARY KEY (col_1_text (700));
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-SELECT col_1_text = REPEAT("a", 4000) FROM worklog5743;
-col_1_text = REPEAT("a", 4000)
-1
-0
-UPDATE worklog5743 SET col_1_text = REPEAT("c", 4000)
-WHERE col_1_text = REPEAT("a", 4000)
-AND col_2_text = REPEAT("o", 4000);
-DELETE FROM worklog5743 WHERE col_1_text = REPEAT("c", 4000);
-SELECT col_1_text = REPEAT("c", 4000) FROM worklog5743
-WHERE col_1_text = REPEAT("c", 4000)
-AND col_2_text = REPEAT("o", 4000);
-col_1_text = REPEAT("c", 4000)
-ALTER TABLE worklog5743 DROP PRIMARY KEY;
-ALTER TABLE worklog5743 ADD PRIMARY KEY (col_1_text (950));
-ERROR 42000: Row size too large. The maximum row size for the used table type, not counting BLOBs, is 8126. This includes storage overhead, check the manual. You have to change some columns to TEXT or BLOBs
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-SELECT col_1_text = REPEAT("a", 4000) FROM worklog5743;
-col_1_text = REPEAT("a", 4000)
-0
-1
-UPDATE worklog5743 SET col_1_text = REPEAT("c", 4000)
-WHERE col_1_text = REPEAT("a", 4000)
-AND col_2_text = REPEAT("o", 4000);
-DELETE FROM worklog5743 WHERE col_1_text = REPEAT("c", 4000);
-SELECT col_1_text = REPEAT("c", 4000) FROM worklog5743
-WHERE col_1_text = REPEAT("c", 4000)
-AND col_2_text = REPEAT("o", 4000);
-col_1_text = REPEAT("c", 4000)
-DROP TABLE worklog5743;
-CREATE TABLE worklog5743 (
-col_1_varchar VARCHAR (4000) , PRIMARY KEY (col_1_varchar(3072))
-) ROW_FORMAT=DYNAMIC, engine = innodb;
-ALTER TABLE worklog5743 DROP PRIMARY KEY;
-ALTER TABLE worklog5743 ADD PRIMARY KEY (col_1_varchar (900));
-ALTER TABLE worklog5743 DROP PRIMARY KEY;
-ALTER TABLE worklog5743 ADD PRIMARY KEY (col_1_varchar (3073));
-ERROR 42000: Specified key was too long; max key length is 3072 bytes
-DROP TABLE worklog5743;
-CREATE TABLE worklog5743 (
-col_1_BLOB BLOB (4000) , PRIMARY KEY (col_1_BLOB(3072))
-) ROW_FORMAT=DYNAMIC, engine = innodb;
-ALTER TABLE worklog5743 DROP PRIMARY KEY;
-ALTER TABLE worklog5743 ADD PRIMARY KEY (col_1_BLOB (500));
-ALTER TABLE worklog5743 DROP PRIMARY KEY;
-ALTER TABLE worklog5743 ADD PRIMARY KEY (col_1_BLOB (3073));
-ERROR 42000: Specified key was too long; max key length is 3072 bytes
-DROP TABLE worklog5743;
-CREATE TABLE worklog5743 (
-col_1_varchar VARCHAR (4000) , col_2_varchar VARCHAR (4000)
-) ROW_FORMAT=DYNAMIC, engine = innodb;
-INSERT INTO worklog5743
-VALUES(concat(REPEAT("a", 2000),REPEAT("b", 1000),REPEAT("c", 1000)),
-REPEAT("o", 4000));
-INSERT INTO worklog5743
-VALUES(concat(REPEAT("a", 2000),REPEAT("b", 2000)), REPEAT("o", 4000));
-ALTER TABLE worklog5743 ADD PRIMARY KEY `pk_idx` (col_1_varchar(3000));
-ERROR 23000: Duplicate entry 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa' for key 'PRIMARY'
-DROP TABLE worklog5743;
-set global innodb_large_prefix=0;
-CREATE TABLE worklog5743 (
-col_1_varchar VARCHAR (4000) , col_2_varchar VARCHAR (4000) ,
-PRIMARY KEY (col_1_varchar(3072))
-) ROW_FORMAT=DYNAMIC, engine = innodb;
-ERROR 42000: Specified key was too long; max key length is 767 bytes
-set global innodb_large_prefix=0;
-CREATE TABLE worklog5743 (
-col_1_varchar VARCHAR (4000) , col_2_varchar VARCHAR (4000) ,
-PRIMARY KEY (col_1_varchar(767))
-) engine = innodb;
-INSERT INTO worklog5743 VALUES(REPEAT('a',4000),REPEAT('b',4000));
-CREATE INDEX prefix_idx ON worklog5743(col_1_varchar (1000));
-affected rows: 0
-info: Records: 0  Duplicates: 0  Warnings: 1
-Warnings:
-Warning	1071	Specified key was too long; max key length is 767 bytes
-ALTER TABLE worklog5743 ROW_FORMAT=REDUNDANT;
-affected rows: 0
-info: Records: 0  Duplicates: 0  Warnings: 0
-SHOW CREATE TABLE worklog5743;
-Table	Create Table
-worklog5743	CREATE TABLE `worklog5743` (
-  `col_1_varchar` varchar(4000) NOT NULL,
-  `col_2_varchar` varchar(4000) DEFAULT NULL,
-  PRIMARY KEY (`col_1_varchar`(767)),
-  KEY `prefix_idx` (`col_1_varchar`(767))
-) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=REDUNDANT
-DROP TABLE worklog5743;
-connection default;
diff --git a/mysql-test/suite/innodb_zip/r/large_blob.result b/mysql-test/suite/innodb_zip/r/large_blob.result
new file mode 100644
index 00000000000..7070d610f58
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/r/large_blob.result
@@ -0,0 +1,83 @@
+#
+# This tests the use of large blobs in InnoDB.
+#
+call mtr.add_suppression("InnoDB: Warning: a long semaphore wait");
+SET GLOBAL innodb_file_per_table = OFF;
+#
+# System tablespace, Row Format = Redundant
+#
+CREATE TABLE t1 (
+c1 INT DEFAULT NULL,
+c2 LONGBLOB NOT NULL,
+KEY k2 (c2(250), c1)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=REDUNDANT;
+INSERT INTO t1 VALUES (1, '');
+UPDATE t1 SET c2=@longblob;
+DROP TABLE t1;
+#
+# System tablespace, Row Format = Compact
+#
+CREATE TABLE t1 (
+c1 INT DEFAULT NULL,
+c2 LONGBLOB NOT NULL,
+KEY k2 (c2(250), c1)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=COMPACT;
+INSERT INTO t1 VALUES (1, '');
+UPDATE t1 SET c2=@longblob;
+DROP TABLE t1;
+SET GLOBAL innodb_file_per_table = ON;
+#
+# Separate tablespace, Row Format = Redundant
+#
+CREATE TABLE t1 (
+c1 INT DEFAULT NULL,
+c2 LONGBLOB NOT NULL,
+KEY k2 (c2(250), c1)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=REDUNDANT;
+INSERT INTO t1 VALUES (1, '');
+UPDATE t1 SET c2=@longblob;
+DROP TABLE t1;
+#
+# Separate tablespace, Row Format = Compact
+#
+CREATE TABLE t1 (
+c1 INT DEFAULT NULL,
+c2 LONGBLOB NOT NULL,
+KEY k2 (c2(250), c1)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=COMPACT;
+INSERT INTO t1 VALUES (1, '');
+UPDATE t1 SET c2=@longblob;
+DROP TABLE t1;
+#
+# Separate tablespace, Row Format = Compressed, Key Block Size = 2k
+#
+CREATE TABLE t1 (
+c1 INT DEFAULT NULL,
+c2 LONGBLOB NOT NULL,
+KEY k2 (c2(250), c1)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 KEY_BLOCK_SIZE=2;
+INSERT INTO t1 VALUES (1, '');
+UPDATE t1 SET c2=@longblob;
+DROP TABLE t1;
+#
+# Separate tablespace, Row Format = Compressed, Key Block Size = 1k
+#
+CREATE TABLE t1 (
+c1 INT DEFAULT NULL,
+c2 LONGBLOB NOT NULL,
+KEY k2 (c2(250), c1)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 KEY_BLOCK_SIZE=1;
+INSERT INTO t1 VALUES (1, '');
+UPDATE t1 SET c2=@longblob;
+DROP TABLE t1;
+#
+# Separate tablespace, Row Format = Dynamic
+#
+CREATE TABLE t1 (
+c1 INT DEFAULT NULL,
+c2 LONGBLOB NOT NULL,
+KEY k2 (c2(250), c1)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC;
+INSERT INTO t1 VALUES (1, '');
+UPDATE t1 SET c2=@longblob;
+DROP TABLE t1;
diff --git a/mysql-test/suite/innodb_zip/r/restart.result b/mysql-test/suite/innodb_zip/r/restart.result
new file mode 100644
index 00000000000..5645b1ee310
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/r/restart.result
@@ -0,0 +1,1236 @@
+SET default_storage_engine=InnoDB;
+#
+# A series of tests to make sure tables are opened after restart.
+# Bug#13357607 Compressed file-per-table tablespaces fail to open
+#
+set global innodb_file_per_table=on;
+#
+# Create and insert records into a REDUNDANT row formatted table.
+#
+CREATE TABLE t1_restart(c1 DOUBLE AUTO_INCREMENT KEY, c2 CHAR(10), c3 VARCHAR(100), c4 DATE, c5 TEXT)
+ROW_FORMAT=REDUNDANT  ENGINE=InnoDB;
+INSERT INTO t1_restart VALUES (1000000000, 'MySQL', 'InnoDB', '2011-11-11', 'Read this after reboot');
+INSERT INTO t1_restart (SELECT 0, c2, c3, c4, c5 FROM t1_restart);
+INSERT INTO t1_restart (SELECT 0, c2, c3, c4, c5 FROM t1_restart);
+INSERT INTO t1_restart (SELECT 0, c2, c3, c4, c5 FROM t1_restart);
+INSERT INTO t1_restart (SELECT 0, c2, c3, c4, c5 FROM t1_restart);
+SHOW CREATE TABLE t1_restart;
+Table	Create Table
+t1_restart	CREATE TABLE `t1_restart` (
+  `c1` double NOT NULL AUTO_INCREMENT,
+  `c2` char(10) DEFAULT NULL,
+  `c3` varchar(100) DEFAULT NULL,
+  `c4` date DEFAULT NULL,
+  `c5` text,
+  PRIMARY KEY (`c1`)
+) ENGINE=InnoDB AUTO_INCREMENT=1000000027 DEFAULT CHARSET=latin1 ROW_FORMAT=REDUNDANT
+SELECT count(*) FROM t1_restart;
+count(*)
+16
+#
+# Create and insert records into a COMPACT row formatted table.
+#
+CREATE TABLE t2_restart(c1 DOUBLE AUTO_INCREMENT KEY, c2 CHAR(10), c3 VARCHAR(100), c4 DATE, c5 TEXT)
+ROW_FORMAT=COMPACT  ENGINE=InnoDB;
+INSERT INTO t2_restart VALUES (1000000000, 'MySQL', 'InnoDB', '2011-11-11', 'Read this after reboot');
+INSERT INTO t2_restart (SELECT 0, c2, c3, c4, c5 FROM t2_restart);
+INSERT INTO t2_restart (SELECT 0, c2, c3, c4, c5 FROM t2_restart);
+INSERT INTO t2_restart (SELECT 0, c2, c3, c4, c5 FROM t2_restart);
+INSERT INTO t2_restart (SELECT 0, c2, c3, c4, c5 FROM t2_restart);
+SHOW CREATE TABLE t2_restart;
+Table	Create Table
+t2_restart	CREATE TABLE `t2_restart` (
+  `c1` double NOT NULL AUTO_INCREMENT,
+  `c2` char(10) DEFAULT NULL,
+  `c3` varchar(100) DEFAULT NULL,
+  `c4` date DEFAULT NULL,
+  `c5` text,
+  PRIMARY KEY (`c1`)
+) ENGINE=InnoDB AUTO_INCREMENT=1000000027 DEFAULT CHARSET=latin1 ROW_FORMAT=COMPACT
+SELECT count(*) FROM t2_restart;
+count(*)
+16
+#
+# Create and insert records into a COMPRESSED row formatted table.
+#
+CREATE TABLE t3_restart(c1 DOUBLE AUTO_INCREMENT KEY, c2 CHAR(10), c3 VARCHAR(100), c4 DATE, c5 TEXT)
+ROW_FORMAT=COMPRESSED  KEY_BLOCK_SIZE=2  ENGINE=InnoDB;
+INSERT INTO t3_restart VALUES (1000000000, 'MySQL', 'InnoDB', '2011-11-11', 'Read this after reboot');
+INSERT INTO t3_restart (SELECT 0, c2, c3, c4, c5 FROM t3_restart);
+INSERT INTO t3_restart (SELECT 0, c2, c3, c4, c5 FROM t3_restart);
+INSERT INTO t3_restart (SELECT 0, c2, c3, c4, c5 FROM t3_restart);
+INSERT INTO t3_restart (SELECT 0, c2, c3, c4, c5 FROM t3_restart);
+SHOW CREATE TABLE t3_restart;
+Table	Create Table
+t3_restart	CREATE TABLE `t3_restart` (
+  `c1` double NOT NULL AUTO_INCREMENT,
+  `c2` char(10) DEFAULT NULL,
+  `c3` varchar(100) DEFAULT NULL,
+  `c4` date DEFAULT NULL,
+  `c5` text,
+  PRIMARY KEY (`c1`)
+) ENGINE=InnoDB AUTO_INCREMENT=1000000027 DEFAULT CHARSET=latin1 ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=2
+SELECT count(*) FROM t3_restart;
+count(*)
+16
+#
+# Create and insert records into a DYNAMIC row formatted table.
+#
+CREATE TABLE t4_restart(c1 DOUBLE AUTO_INCREMENT KEY, c2 CHAR(10), c3 VARCHAR(100), c4 DATE, c5 TEXT)
+ROW_FORMAT=DYNAMIC  ENGINE=InnoDB;
+INSERT INTO t4_restart VALUES (1000000000, 'MySQL', 'InnoDB', '2011-11-11', 'Read this after reboot');
+INSERT INTO t4_restart (SELECT 0, c2, c3, c4, c5 FROM t4_restart);
+INSERT INTO t4_restart (SELECT 0, c2, c3, c4, c5 FROM t4_restart);
+INSERT INTO t4_restart (SELECT 0, c2, c3, c4, c5 FROM t4_restart);
+INSERT INTO t4_restart (SELECT 0, c2, c3, c4, c5 FROM t4_restart);
+SHOW CREATE TABLE t4_restart;
+Table	Create Table
+t4_restart	CREATE TABLE `t4_restart` (
+  `c1` double NOT NULL AUTO_INCREMENT,
+  `c2` char(10) DEFAULT NULL,
+  `c3` varchar(100) DEFAULT NULL,
+  `c4` date DEFAULT NULL,
+  `c5` text,
+  PRIMARY KEY (`c1`)
+) ENGINE=InnoDB AUTO_INCREMENT=1000000027 DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC
+SELECT count(*) FROM t4_restart;
+count(*)
+16
+#
+# Create and insert records into a table that uses a remote DATA DIRECTORY.
+#
+CREATE TABLE t5_restart(c1 DOUBLE AUTO_INCREMENT KEY, c2 CHAR(10), c3 VARCHAR(100), c4 DATE, c5 TEXT)
+ROW_FORMAT=DYNAMIC  ENGINE=InnoDB  DATA DIRECTORY='MYSQL_TMP_DIR/alt_dir';
+INSERT INTO t5_restart VALUES (1000000000, 'MySQL', 'InnoDB', '2011-11-11', 'Read this after reboot');
+INSERT INTO t5_restart (SELECT 0, c2, c3, c4, c5 FROM t5_restart);
+INSERT INTO t5_restart (SELECT 0, c2, c3, c4, c5 FROM t5_restart);
+INSERT INTO t5_restart (SELECT 0, c2, c3, c4, c5 FROM t5_restart);
+INSERT INTO t5_restart (SELECT 0, c2, c3, c4, c5 FROM t5_restart);
+SHOW CREATE TABLE t5_restart;
+Table	Create Table
+t5_restart	CREATE TABLE `t5_restart` (
+  `c1` double NOT NULL AUTO_INCREMENT,
+  `c2` char(10) DEFAULT NULL,
+  `c3` varchar(100) DEFAULT NULL,
+  `c4` date DEFAULT NULL,
+  `c5` text,
+  PRIMARY KEY (`c1`)
+) ENGINE=InnoDB AUTO_INCREMENT=1000000027 DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC DATA DIRECTORY='MYSQL_TMP_DIR/alt_dir/'
+SELECT count(*) FROM t5_restart;
+count(*)
+16
+#
+# Create and insert records into a partitioned table that uses
+# a remote DATA DIRECTORY for each partition.
+#
+CREATE TABLE t6_restart(
+c1 INT AUTO_INCREMENT KEY, c2 CHAR(10), c3 VARCHAR(100), c4 DATE, c5 TEXT)
+ROW_FORMAT=COMPRESSED  KEY_BLOCK_SIZE=2  ENGINE=InnoDB
+PARTITION BY HASH(c1) (
+PARTITION p0  DATA DIRECTORY = 'MYSQL_TMP_DIR/alt_dir',
+PARTITION p1  DATA DIRECTORY = 'MYSQL_TMP_DIR/alt_dir',
+PARTITION p2  DATA DIRECTORY = 'MYSQL_TMP_DIR/alt_dir');
+INSERT INTO t6_restart VALUES (0, 'MySQL', 'InnoDB', '2011-11-11', 'Read this after reboot');
+INSERT INTO t6_restart (SELECT 0, c2, c3, c4, c5 FROM t6_restart);
+INSERT INTO t6_restart (SELECT 0, c2, c3, c4, c5 FROM t6_restart);
+INSERT INTO t6_restart (SELECT 0, c2, c3, c4, c5 FROM t6_restart);
+INSERT INTO t6_restart (SELECT 0, c2, c3, c4, c5 FROM t6_restart);
+SHOW CREATE TABLE t6_restart;
+Table	Create Table
+t6_restart	CREATE TABLE `t6_restart` (
+  `c1` int(11) NOT NULL AUTO_INCREMENT,
+  `c2` char(10) DEFAULT NULL,
+  `c3` varchar(100) DEFAULT NULL,
+  `c4` date DEFAULT NULL,
+  `c5` text,
+  PRIMARY KEY (`c1`)
+) ENGINE=InnoDB AUTO_INCREMENT=17 DEFAULT CHARSET=latin1 ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=2
+/*!50100 PARTITION BY HASH (c1)
+(PARTITION p0 DATA DIRECTORY = 'MYSQL_TMP_DIR/alt_dir/' ENGINE = InnoDB,
+ PARTITION p1 DATA DIRECTORY = 'MYSQL_TMP_DIR/alt_dir/' ENGINE = InnoDB,
+ PARTITION p2 DATA DIRECTORY = 'MYSQL_TMP_DIR/alt_dir/' ENGINE = InnoDB) */
+SELECT count(*) FROM t6_restart;
+count(*)
+16
+#
+# Create and insert records into a subpartitioned table that uses
+# a remote DATA DIRECTORY for each subpartition.
+#
+CREATE TABLE t7_restart(
+c1 INT AUTO_INCREMENT KEY, c2 CHAR(10), c3 VARCHAR(100), c4 DATE, c5 TEXT)
+ROW_FORMAT=DYNAMIC  ENGINE=InnoDB
+PARTITION BY RANGE(c1) SUBPARTITION BY HASH(c1) (
+PARTITION p0 VALUES LESS THAN (10) (
+SUBPARTITION s0 DATA DIRECTORY = 'MYSQL_TMP_DIR/alt_dir',
+SUBPARTITION s1 DATA DIRECTORY = 'MYSQL_TMP_DIR/alt_dir'),
+PARTITION p1 VALUES LESS THAN MAXVALUE (
+SUBPARTITION s2 DATA DIRECTORY = 'MYSQL_TMP_DIR/alt_dir',
+SUBPARTITION s3 DATA DIRECTORY = 'MYSQL_TMP_DIR/alt_dir'));
+INSERT INTO t7_restart VALUES (0, 'MySQL', 'InnoDB', '2011-11-11', 'Read this after reboot');
+INSERT INTO t7_restart (SELECT 0, c2, c3, c4, c5 FROM t7_restart);
+INSERT INTO t7_restart (SELECT 0, c2, c3, c4, c5 FROM t7_restart);
+INSERT INTO t7_restart (SELECT 0, c2, c3, c4, c5 FROM t7_restart);
+INSERT INTO t7_restart (SELECT 0, c2, c3, c4, c5 FROM t7_restart);
+SHOW CREATE TABLE t7_restart;
+Table	Create Table
+t7_restart	CREATE TABLE `t7_restart` (
+  `c1` int(11) NOT NULL AUTO_INCREMENT,
+  `c2` char(10) DEFAULT NULL,
+  `c3` varchar(100) DEFAULT NULL,
+  `c4` date DEFAULT NULL,
+  `c5` text,
+  PRIMARY KEY (`c1`)
+) ENGINE=InnoDB AUTO_INCREMENT=17 DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC
+/*!50100 PARTITION BY RANGE (c1)
+SUBPARTITION BY HASH (c1)
+(PARTITION p0 VALUES LESS THAN (10)
+ (SUBPARTITION s0 DATA DIRECTORY = 'MYSQL_TMP_DIR/alt_dir/' ENGINE = InnoDB,
+  SUBPARTITION s1 DATA DIRECTORY = 'MYSQL_TMP_DIR/alt_dir/' ENGINE = InnoDB),
+ PARTITION p1 VALUES LESS THAN MAXVALUE
+ (SUBPARTITION s2 DATA DIRECTORY = 'MYSQL_TMP_DIR/alt_dir/' ENGINE = InnoDB,
+  SUBPARTITION s3 DATA DIRECTORY = 'MYSQL_TMP_DIR/alt_dir/' ENGINE = InnoDB)) */
+SELECT count(*) FROM t7_restart;
+count(*)
+16
+#
+# Create and insert records into a table that uses a general tablespace.
+#
+CREATE TABLESPACE s1_restart ADD DATAFILE 's1_restart.ibd';
+CREATE TABLE t8_restart(c1 DOUBLE AUTO_INCREMENT KEY, c2 CHAR(10), c3 VARCHAR(100), c4 DATE, c5 TEXT)
+ROW_FORMAT=COMPACT  ENGINE=InnoDB TABLESPACE=s1_restart;
+INSERT INTO t8_restart VALUES (1000000000, 'MySQL', 'InnoDB', '2011-11-11', 'Read this after reboot');
+INSERT INTO t8_restart (SELECT 0, c2, c3, c4, c5 FROM t2_restart);
+INSERT INTO t8_restart (SELECT 0, c2, c3, c4, c5 FROM t2_restart);
+INSERT INTO t8_restart (SELECT 0, c2, c3, c4, c5 FROM t2_restart);
+INSERT INTO t8_restart (SELECT 0, c2, c3, c4, c5 FROM t2_restart);
+SHOW CREATE TABLE t8_restart;
+Table	Create Table
+t8_restart	CREATE TABLE `t8_restart` (
+  `c1` double NOT NULL AUTO_INCREMENT,
+  `c2` char(10) DEFAULT NULL,
+  `c3` varchar(100) DEFAULT NULL,
+  `c4` date DEFAULT NULL,
+  `c5` text,
+  PRIMARY KEY (`c1`)
+) /*!50100 TABLESPACE `s1_restart` */ ENGINE=InnoDB AUTO_INCREMENT=1000000125 DEFAULT CHARSET=latin1 ROW_FORMAT=COMPACT
+SELECT count(*) FROM t8_restart;
+count(*)
+65
+CREATE TABLE t9_restart(c1 DOUBLE AUTO_INCREMENT KEY, c2 CHAR(10), c3 VARCHAR(100), c4 DATE, c5 TEXT)
+ROW_FORMAT=DYNAMIC  ENGINE=InnoDB TABLESPACE=s1_restart;
+INSERT INTO t9_restart VALUES (1000000000, 'MySQL', 'InnoDB', '2011-11-11', 'Read this after reboot');
+INSERT INTO t9_restart (SELECT 0, c2, c3, c4, c5 FROM t2_restart);
+INSERT INTO t9_restart (SELECT 0, c2, c3, c4, c5 FROM t2_restart);
+INSERT INTO t9_restart (SELECT 0, c2, c3, c4, c5 FROM t2_restart);
+INSERT INTO t9_restart (SELECT 0, c2, c3, c4, c5 FROM t2_restart);
+SHOW CREATE TABLE t9_restart;
+Table	Create Table
+t9_restart	CREATE TABLE `t9_restart` (
+  `c1` double NOT NULL AUTO_INCREMENT,
+  `c2` char(10) DEFAULT NULL,
+  `c3` varchar(100) DEFAULT NULL,
+  `c4` date DEFAULT NULL,
+  `c5` text,
+  PRIMARY KEY (`c1`)
+) /*!50100 TABLESPACE `s1_restart` */ ENGINE=InnoDB AUTO_INCREMENT=1000000125 DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC
+SELECT count(*) FROM t9_restart;
+count(*)
+65
+#
+# Show these tables in information_schema.
+#
+=== information_schema.innodb_sys_tables  and innodb_sys_tablespaces ===
+Table Name	Tablespace	Table Flags	Columns	Row Format	Zip Size	Space Type
+test/t1_restart	test/t1_restart	0	8	Redundant	0	Single
+test/t2_restart	test/t2_restart	1	8	Compact	0	Single
+test/t3_restart	test/t3_restart	37	8	Compressed	2048	Single
+test/t4_restart	test/t4_restart	33	8	Dynamic	0	Single
+test/t5_restart	test/t5_restart	97	8	Dynamic	0	Single
+test/t6_restart#p#p0	test/t6_restart#p#p0	101	8	Compressed	2048	Single
+test/t6_restart#p#p1	test/t6_restart#p#p1	101	8	Compressed	2048	Single
+test/t6_restart#p#p2	test/t6_restart#p#p2	101	8	Compressed	2048	Single
+test/t7_restart#p#p0#sp#s0	test/t7_restart#p#p0#sp#s0	97	8	Dynamic	0	Single
+test/t7_restart#p#p0#sp#s1	test/t7_restart#p#p0#sp#s1	97	8	Dynamic	0	Single
+test/t7_restart#p#p1#sp#s2	test/t7_restart#p#p1#sp#s2	97	8	Dynamic	0	Single
+test/t7_restart#p#p1#sp#s3	test/t7_restart#p#p1#sp#s3	97	8	Dynamic	0	Single
+test/t8_restart	s1_restart	129	8	Compact	0	General
+test/t9_restart	s1_restart	161	8	Dynamic	0	General
+=== information_schema.innodb_sys_tablespaces and innodb_sys_datafiles ===
+Space_Name	Space_Type	Page_Size	Zip_Size	Formats_Permitted	Path
+test/t1_restart	Single	DEFAULT	0	Compact or Redundant	MYSQLD_DATADIR/test/t1_restart.ibd
+test/t2_restart	Single	DEFAULT	0	Compact or Redundant	MYSQLD_DATADIR/test/t2_restart.ibd
+test/t3_restart	Single	DEFAULT	2048	Compressed	MYSQLD_DATADIR/test/t3_restart.ibd
+test/t4_restart	Single	DEFAULT	0	Dynamic	MYSQLD_DATADIR/test/t4_restart.ibd
+test/t5_restart	Single	DEFAULT	0	Dynamic	MYSQL_TMP_DIR/alt_dir/test/t5_restart.ibd
+test/t6_restart#p#p0	Single	DEFAULT	2048	Compressed	MYSQL_TMP_DIR/alt_dir/test/t6_restart#p#p0.ibd
+test/t6_restart#p#p1	Single	DEFAULT	2048	Compressed	MYSQL_TMP_DIR/alt_dir/test/t6_restart#p#p1.ibd
+test/t6_restart#p#p2	Single	DEFAULT	2048	Compressed	MYSQL_TMP_DIR/alt_dir/test/t6_restart#p#p2.ibd
+test/t7_restart#p#p0#sp#s0	Single	DEFAULT	0	Dynamic	MYSQL_TMP_DIR/alt_dir/test/t7_restart#p#p0#sp#s0.ibd
+test/t7_restart#p#p0#sp#s1	Single	DEFAULT	0	Dynamic	MYSQL_TMP_DIR/alt_dir/test/t7_restart#p#p0#sp#s1.ibd
+test/t7_restart#p#p1#sp#s2	Single	DEFAULT	0	Dynamic	MYSQL_TMP_DIR/alt_dir/test/t7_restart#p#p1#sp#s2.ibd
+test/t7_restart#p#p1#sp#s3	Single	DEFAULT	0	Dynamic	MYSQL_TMP_DIR/alt_dir/test/t7_restart#p#p1#sp#s3.ibd
+s1_restart	General	DEFAULT	0	Any	MYSQLD_DATADIR/s1_restart.ibd
+=== information_schema.files ===
+Space_Name	File_Type	Engine	Status	Tablespace_Name	Path
+test/t1_restart	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQLD_DATADIR/test/t1_restart.ibd
+test/t2_restart	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQLD_DATADIR/test/t2_restart.ibd
+test/t3_restart	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQLD_DATADIR/test/t3_restart.ibd
+test/t4_restart	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQLD_DATADIR/test/t4_restart.ibd
+test/t5_restart	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQL_TMP_DIR/alt_dir/test/t5_restart.ibd
+test/t6_restart#p#p0	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQL_TMP_DIR/alt_dir/test/t6_restart#p#p0.ibd
+test/t6_restart#p#p1	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQL_TMP_DIR/alt_dir/test/t6_restart#p#p1.ibd
+test/t6_restart#p#p2	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQL_TMP_DIR/alt_dir/test/t6_restart#p#p2.ibd
+test/t7_restart#p#p0#sp#s0	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQL_TMP_DIR/alt_dir/test/t7_restart#p#p0#sp#s0.ibd
+test/t7_restart#p#p0#sp#s1	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQL_TMP_DIR/alt_dir/test/t7_restart#p#p0#sp#s1.ibd
+test/t7_restart#p#p1#sp#s2	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQL_TMP_DIR/alt_dir/test/t7_restart#p#p1#sp#s2.ibd
+test/t7_restart#p#p1#sp#s3	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQL_TMP_DIR/alt_dir/test/t7_restart#p#p1#sp#s3.ibd
+s1_restart	TABLESPACE	InnoDB	NORMAL	s1_restart	MYSQLD_DATADIR/s1_restart.ibd
+#
+# Shutdown the server and list the tablespace OS files
+#
+---- MYSQL_DATA_DIR/test
+t1_restart.frm
+t1_restart.ibd
+t2_restart.frm
+t2_restart.ibd
+t3_restart.frm
+t3_restart.ibd
+t4_restart.frm
+t4_restart.ibd
+t5_restart.frm
+t5_restart.isl
+t6_restart#p#p0.isl
+t6_restart#p#p1.isl
+t6_restart#p#p2.isl
+t6_restart.frm
+t7_restart#p#p0#sp#s0.isl
+t7_restart#p#p0#sp#s1.isl
+t7_restart#p#p1#sp#s2.isl
+t7_restart#p#p1#sp#s3.isl
+t7_restart.frm
+t8_restart.frm
+t9_restart.frm
+---- MYSQL_TMP_DIR/alt_dir
+test
+---- MYSQL_TMP_DIR/alt_dir/test
+t5_restart.ibd
+t6_restart#p#p0.ibd
+t6_restart#p#p1.ibd
+t6_restart#p#p2.ibd
+t7_restart#p#p0#sp#s0.ibd
+t7_restart#p#p0#sp#s1.ibd
+t7_restart#p#p1#sp#s2.ibd
+t7_restart#p#p1#sp#s3.ibd
+#
+# Start the server and show that tables are still visible and accessible.
+#
+# restart
+SHOW VARIABLES LIKE 'innodb_file_per_table';
+Variable_name	Value
+innodb_file_per_table	ON
+SHOW CREATE TABLE t1_restart;
+Table	Create Table
+t1_restart	CREATE TABLE `t1_restart` (
+  `c1` double NOT NULL AUTO_INCREMENT,
+  `c2` char(10) DEFAULT NULL,
+  `c3` varchar(100) DEFAULT NULL,
+  `c4` date DEFAULT NULL,
+  `c5` text,
+  PRIMARY KEY (`c1`)
+) ENGINE=InnoDB AUTO_INCREMENT=1000000020 DEFAULT CHARSET=latin1 ROW_FORMAT=REDUNDANT
+SHOW CREATE TABLE t2_restart;
+Table	Create Table
+t2_restart	CREATE TABLE `t2_restart` (
+  `c1` double NOT NULL AUTO_INCREMENT,
+  `c2` char(10) DEFAULT NULL,
+  `c3` varchar(100) DEFAULT NULL,
+  `c4` date DEFAULT NULL,
+  `c5` text,
+  PRIMARY KEY (`c1`)
+) ENGINE=InnoDB AUTO_INCREMENT=1000000020 DEFAULT CHARSET=latin1 ROW_FORMAT=COMPACT
+SHOW CREATE TABLE t3_restart;
+Table	Create Table
+t3_restart	CREATE TABLE `t3_restart` (
+  `c1` double NOT NULL AUTO_INCREMENT,
+  `c2` char(10) DEFAULT NULL,
+  `c3` varchar(100) DEFAULT NULL,
+  `c4` date DEFAULT NULL,
+  `c5` text,
+  PRIMARY KEY (`c1`)
+) ENGINE=InnoDB AUTO_INCREMENT=1000000020 DEFAULT CHARSET=latin1 ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=2
+SHOW CREATE TABLE t4_restart;
+Table	Create Table
+t4_restart	CREATE TABLE `t4_restart` (
+  `c1` double NOT NULL AUTO_INCREMENT,
+  `c2` char(10) DEFAULT NULL,
+  `c3` varchar(100) DEFAULT NULL,
+  `c4` date DEFAULT NULL,
+  `c5` text,
+  PRIMARY KEY (`c1`)
+) ENGINE=InnoDB AUTO_INCREMENT=1000000020 DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC
+SHOW CREATE TABLE t5_restart;
+Table	Create Table
+t5_restart	CREATE TABLE `t5_restart` (
+  `c1` double NOT NULL AUTO_INCREMENT,
+  `c2` char(10) DEFAULT NULL,
+  `c3` varchar(100) DEFAULT NULL,
+  `c4` date DEFAULT NULL,
+  `c5` text,
+  PRIMARY KEY (`c1`)
+) ENGINE=InnoDB AUTO_INCREMENT=1000000020 DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC DATA DIRECTORY='MYSQL_TMP_DIR/alt_dir/'
+SHOW CREATE TABLE t6_restart;
+Table	Create Table
+t6_restart	CREATE TABLE `t6_restart` (
+  `c1` int(11) NOT NULL AUTO_INCREMENT,
+  `c2` char(10) DEFAULT NULL,
+  `c3` varchar(100) DEFAULT NULL,
+  `c4` date DEFAULT NULL,
+  `c5` text,
+  PRIMARY KEY (`c1`)
+) ENGINE=InnoDB AUTO_INCREMENT=17 DEFAULT CHARSET=latin1 ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=2
+/*!50100 PARTITION BY HASH (c1)
+(PARTITION p0 DATA DIRECTORY = 'MYSQL_TMP_DIR/alt_dir' ENGINE = InnoDB,
+ PARTITION p1 DATA DIRECTORY = 'MYSQL_TMP_DIR/alt_dir' ENGINE = InnoDB,
+ PARTITION p2 DATA DIRECTORY = 'MYSQL_TMP_DIR/alt_dir' ENGINE = InnoDB) */
+SHOW CREATE TABLE t7_restart;
+Table	Create Table
+t7_restart	CREATE TABLE `t7_restart` (
+  `c1` int(11) NOT NULL AUTO_INCREMENT,
+  `c2` char(10) DEFAULT NULL,
+  `c3` varchar(100) DEFAULT NULL,
+  `c4` date DEFAULT NULL,
+  `c5` text,
+  PRIMARY KEY (`c1`)
+) ENGINE=InnoDB AUTO_INCREMENT=17 DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC
+/*!50100 PARTITION BY RANGE (c1)
+SUBPARTITION BY HASH (c1)
+(PARTITION p0 VALUES LESS THAN (10)
+ (SUBPARTITION s0 DATA DIRECTORY = 'MYSQL_TMP_DIR/alt_dir' ENGINE = InnoDB,
+  SUBPARTITION s1 DATA DIRECTORY = 'MYSQL_TMP_DIR/alt_dir' ENGINE = InnoDB),
+ PARTITION p1 VALUES LESS THAN MAXVALUE
+ (SUBPARTITION s2 DATA DIRECTORY = 'MYSQL_TMP_DIR/alt_dir' ENGINE = InnoDB,
+  SUBPARTITION s3 DATA DIRECTORY = 'MYSQL_TMP_DIR/alt_dir' ENGINE = InnoDB)) */
+SHOW CREATE TABLE t8_restart;
+Table	Create Table
+t8_restart	CREATE TABLE `t8_restart` (
+  `c1` double NOT NULL AUTO_INCREMENT,
+  `c2` char(10) DEFAULT NULL,
+  `c3` varchar(100) DEFAULT NULL,
+  `c4` date DEFAULT NULL,
+  `c5` text,
+  PRIMARY KEY (`c1`)
+) /*!50100 TABLESPACE `s1_restart` */ ENGINE=InnoDB AUTO_INCREMENT=1000000110 DEFAULT CHARSET=latin1 ROW_FORMAT=COMPACT
+SHOW CREATE TABLE t9_restart;
+Table	Create Table
+t9_restart	CREATE TABLE `t9_restart` (
+  `c1` double NOT NULL AUTO_INCREMENT,
+  `c2` char(10) DEFAULT NULL,
+  `c3` varchar(100) DEFAULT NULL,
+  `c4` date DEFAULT NULL,
+  `c5` text,
+  PRIMARY KEY (`c1`)
+) /*!50100 TABLESPACE `s1_restart` */ ENGINE=InnoDB AUTO_INCREMENT=1000000110 DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC
+INSERT INTO t1_restart (SELECT 0, c2, c3, c4, c5 FROM t1_restart);
+INSERT INTO t2_restart (SELECT 0, c2, c3, c4, c5 FROM t2_restart);
+INSERT INTO t3_restart (SELECT 0, c2, c3, c4, c5 FROM t3_restart);
+INSERT INTO t4_restart (SELECT 0, c2, c3, c4, c5 FROM t4_restart);
+INSERT INTO t5_restart (SELECT 0, c2, c3, c4, c5 FROM t5_restart);
+INSERT INTO t6_restart (SELECT 0, c2, c3, c4, c5 FROM t6_restart);
+INSERT INTO t7_restart (SELECT 0, c2, c3, c4, c5 FROM t7_restart);
+INSERT INTO t8_restart (SELECT 0, c2, c3, c4, c5 FROM t8_restart);
+INSERT INTO t9_restart (SELECT 0, c2, c3, c4, c5 FROM t9_restart);
+SELECT count(*) FROM t1_restart;
+count(*)
+32
+SELECT count(*) FROM t2_restart;
+count(*)
+32
+SELECT count(*) FROM t3_restart;
+count(*)
+32
+SELECT count(*) FROM t4_restart;
+count(*)
+32
+SELECT count(*) FROM t5_restart;
+count(*)
+32
+SELECT count(*) FROM t6_restart;
+count(*)
+32
+SELECT count(*) FROM t7_restart;
+count(*)
+32
+SELECT count(*) FROM t8_restart;
+count(*)
+130
+SELECT count(*) FROM t9_restart;
+count(*)
+130
+#
+# Show these tables in information_schema.
+#
+=== information_schema.innodb_sys_tables  and innodb_sys_tablespaces ===
+Table Name	Tablespace	Table Flags	Columns	Row Format	Zip Size	Space Type
+test/t1_restart	test/t1_restart	0	8	Redundant	0	Single
+test/t2_restart	test/t2_restart	1	8	Compact	0	Single
+test/t3_restart	test/t3_restart	37	8	Compressed	2048	Single
+test/t4_restart	test/t4_restart	33	8	Dynamic	0	Single
+test/t5_restart	test/t5_restart	97	8	Dynamic	0	Single
+test/t6_restart#p#p0	test/t6_restart#p#p0	101	8	Compressed	2048	Single
+test/t6_restart#p#p1	test/t6_restart#p#p1	101	8	Compressed	2048	Single
+test/t6_restart#p#p2	test/t6_restart#p#p2	101	8	Compressed	2048	Single
+test/t7_restart#p#p0#sp#s0	test/t7_restart#p#p0#sp#s0	97	8	Dynamic	0	Single
+test/t7_restart#p#p0#sp#s1	test/t7_restart#p#p0#sp#s1	97	8	Dynamic	0	Single
+test/t7_restart#p#p1#sp#s2	test/t7_restart#p#p1#sp#s2	97	8	Dynamic	0	Single
+test/t7_restart#p#p1#sp#s3	test/t7_restart#p#p1#sp#s3	97	8	Dynamic	0	Single
+test/t8_restart	s1_restart	129	8	Compact	0	General
+test/t9_restart	s1_restart	161	8	Dynamic	0	General
+=== information_schema.innodb_sys_tablespaces and innodb_sys_datafiles ===
+Space_Name	Space_Type	Page_Size	Zip_Size	Formats_Permitted	Path
+test/t1_restart	Single	DEFAULT	0	Compact or Redundant	MYSQLD_DATADIR/test/t1_restart.ibd
+test/t2_restart	Single	DEFAULT	0	Compact or Redundant	MYSQLD_DATADIR/test/t2_restart.ibd
+test/t3_restart	Single	DEFAULT	2048	Compressed	MYSQLD_DATADIR/test/t3_restart.ibd
+test/t4_restart	Single	DEFAULT	0	Dynamic	MYSQLD_DATADIR/test/t4_restart.ibd
+test/t5_restart	Single	DEFAULT	0	Dynamic	MYSQL_TMP_DIR/alt_dir/test/t5_restart.ibd
+test/t6_restart#p#p0	Single	DEFAULT	2048	Compressed	MYSQL_TMP_DIR/alt_dir/test/t6_restart#p#p0.ibd
+test/t6_restart#p#p1	Single	DEFAULT	2048	Compressed	MYSQL_TMP_DIR/alt_dir/test/t6_restart#p#p1.ibd
+test/t6_restart#p#p2	Single	DEFAULT	2048	Compressed	MYSQL_TMP_DIR/alt_dir/test/t6_restart#p#p2.ibd
+test/t7_restart#p#p0#sp#s0	Single	DEFAULT	0	Dynamic	MYSQL_TMP_DIR/alt_dir/test/t7_restart#p#p0#sp#s0.ibd
+test/t7_restart#p#p0#sp#s1	Single	DEFAULT	0	Dynamic	MYSQL_TMP_DIR/alt_dir/test/t7_restart#p#p0#sp#s1.ibd
+test/t7_restart#p#p1#sp#s2	Single	DEFAULT	0	Dynamic	MYSQL_TMP_DIR/alt_dir/test/t7_restart#p#p1#sp#s2.ibd
+test/t7_restart#p#p1#sp#s3	Single	DEFAULT	0	Dynamic	MYSQL_TMP_DIR/alt_dir/test/t7_restart#p#p1#sp#s3.ibd
+s1_restart	General	DEFAULT	0	Any	MYSQLD_DATADIR/s1_restart.ibd
+=== information_schema.files ===
+Space_Name	File_Type	Engine	Status	Tablespace_Name	Path
+test/t1_restart	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQLD_DATADIR/test/t1_restart.ibd
+test/t2_restart	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQLD_DATADIR/test/t2_restart.ibd
+test/t3_restart	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQLD_DATADIR/test/t3_restart.ibd
+test/t4_restart	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQLD_DATADIR/test/t4_restart.ibd
+test/t5_restart	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQL_TMP_DIR/alt_dir/test/t5_restart.ibd
+test/t6_restart#p#p0	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQL_TMP_DIR/alt_dir/test/t6_restart#p#p0.ibd
+test/t6_restart#p#p1	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQL_TMP_DIR/alt_dir/test/t6_restart#p#p1.ibd
+test/t6_restart#p#p2	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQL_TMP_DIR/alt_dir/test/t6_restart#p#p2.ibd
+test/t7_restart#p#p0#sp#s0	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQL_TMP_DIR/alt_dir/test/t7_restart#p#p0#sp#s0.ibd
+test/t7_restart#p#p0#sp#s1	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQL_TMP_DIR/alt_dir/test/t7_restart#p#p0#sp#s1.ibd
+test/t7_restart#p#p1#sp#s2	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQL_TMP_DIR/alt_dir/test/t7_restart#p#p1#sp#s2.ibd
+test/t7_restart#p#p1#sp#s3	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQL_TMP_DIR/alt_dir/test/t7_restart#p#p1#sp#s3.ibd
+s1_restart	TABLESPACE	InnoDB	NORMAL	s1_restart	MYSQLD_DATADIR/s1_restart.ibd
+DROP TABLE t1_restart;
+DROP TABLE t2_restart;
+DROP TABLE t3_restart;
+DROP TABLE t8_restart;
+DROP TABLE t9_restart;
+DROP TABLESPACE s1_restart;
+#
+# Truncate the remote tablespaces.
+#
+TRUNCATE TABLE t5_restart;
+ALTER TABLE t6_restart TRUNCATE PARTITION p2;
+ALTER TABLE t7_restart TRUNCATE PARTITION p1;
+=== information_schema.innodb_sys_tablespaces and innodb_sys_datafiles ===
+Space_Name	Space_Type	Page_Size	Zip_Size	Formats_Permitted	Path
+test/t4_restart	Single	DEFAULT	0	Dynamic	MYSQLD_DATADIR/test/t4_restart.ibd
+test/t5_restart	Single	DEFAULT	0	Dynamic	MYSQL_TMP_DIR/alt_dir/test/t5_restart.ibd
+test/t6_restart#p#p0	Single	DEFAULT	2048	Compressed	MYSQL_TMP_DIR/alt_dir/test/t6_restart#p#p0.ibd
+test/t6_restart#p#p1	Single	DEFAULT	2048	Compressed	MYSQL_TMP_DIR/alt_dir/test/t6_restart#p#p1.ibd
+test/t6_restart#p#p2	Single	DEFAULT	2048	Compressed	MYSQL_TMP_DIR/alt_dir/test/t6_restart#p#p2.ibd
+test/t7_restart#p#p0#sp#s0	Single	DEFAULT	0	Dynamic	MYSQL_TMP_DIR/alt_dir/test/t7_restart#p#p0#sp#s0.ibd
+test/t7_restart#p#p0#sp#s1	Single	DEFAULT	0	Dynamic	MYSQL_TMP_DIR/alt_dir/test/t7_restart#p#p0#sp#s1.ibd
+test/t7_restart#p#p1#sp#s2	Single	DEFAULT	0	Dynamic	MYSQL_TMP_DIR/alt_dir/test/t7_restart#p#p1#sp#s2.ibd
+test/t7_restart#p#p1#sp#s3	Single	DEFAULT	0	Dynamic	MYSQL_TMP_DIR/alt_dir/test/t7_restart#p#p1#sp#s3.ibd
+=== information_schema.files ===
+Space_Name	File_Type	Engine	Status	Tablespace_Name	Path
+test/t4_restart	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQLD_DATADIR/test/t4_restart.ibd
+test/t5_restart	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQL_TMP_DIR/alt_dir/test/t5_restart.ibd
+test/t6_restart#p#p0	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQL_TMP_DIR/alt_dir/test/t6_restart#p#p0.ibd
+test/t6_restart#p#p1	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQL_TMP_DIR/alt_dir/test/t6_restart#p#p1.ibd
+test/t6_restart#p#p2	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQL_TMP_DIR/alt_dir/test/t6_restart#p#p2.ibd
+test/t7_restart#p#p0#sp#s0	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQL_TMP_DIR/alt_dir/test/t7_restart#p#p0#sp#s0.ibd
+test/t7_restart#p#p0#sp#s1	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQL_TMP_DIR/alt_dir/test/t7_restart#p#p0#sp#s1.ibd
+test/t7_restart#p#p1#sp#s2	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQL_TMP_DIR/alt_dir/test/t7_restart#p#p1#sp#s2.ibd
+test/t7_restart#p#p1#sp#s3	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQL_TMP_DIR/alt_dir/test/t7_restart#p#p1#sp#s3.ibd
+INSERT INTO t5_restart VALUES (1000000000, 'MySQL', 'InnoDB', '2011-11-11', 'Read this after reboot');
+INSERT INTO t5_restart (SELECT 0, c2, c3, c4, c5 FROM t5_restart);
+INSERT INTO t5_restart (SELECT 0, c2, c3, c4, c5 FROM t5_restart);
+INSERT INTO t5_restart (SELECT 0, c2, c3, c4, c5 FROM t5_restart);
+SELECT count(*) FROM t5_restart;
+count(*)
+8
+SHOW CREATE TABLE t5_restart;
+Table	Create Table
+t5_restart	CREATE TABLE `t5_restart` (
+  `c1` double NOT NULL AUTO_INCREMENT,
+  `c2` char(10) DEFAULT NULL,
+  `c3` varchar(100) DEFAULT NULL,
+  `c4` date DEFAULT NULL,
+  `c5` text,
+  PRIMARY KEY (`c1`)
+) ENGINE=InnoDB AUTO_INCREMENT=1000000012 DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC DATA DIRECTORY='MYSQL_TMP_DIR/alt_dir/'
+SELECT count(*) FROM t6_restart;
+count(*)
+21
+SHOW CREATE TABLE t6_restart;
+Table	Create Table
+t6_restart	CREATE TABLE `t6_restart` (
+  `c1` int(11) NOT NULL AUTO_INCREMENT,
+  `c2` char(10) DEFAULT NULL,
+  `c3` varchar(100) DEFAULT NULL,
+  `c4` date DEFAULT NULL,
+  `c5` text,
+  PRIMARY KEY (`c1`)
+) ENGINE=InnoDB AUTO_INCREMENT=32 DEFAULT CHARSET=latin1 ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=2
+/*!50100 PARTITION BY HASH (c1)
+(PARTITION p0 DATA DIRECTORY = 'MYSQL_TMP_DIR/alt_dir' ENGINE = InnoDB,
+ PARTITION p1 DATA DIRECTORY = 'MYSQL_TMP_DIR/alt_dir' ENGINE = InnoDB,
+ PARTITION p2 DATA DIRECTORY = 'MYSQL_TMP_DIR/alt_dir' ENGINE = InnoDB) */
+SELECT count(*) FROM t7_restart;
+count(*)
+9
+SHOW CREATE TABLE t7_restart;
+Table	Create Table
+t7_restart	CREATE TABLE `t7_restart` (
+  `c1` int(11) NOT NULL AUTO_INCREMENT,
+  `c2` char(10) DEFAULT NULL,
+  `c3` varchar(100) DEFAULT NULL,
+  `c4` date DEFAULT NULL,
+  `c5` text,
+  PRIMARY KEY (`c1`)
+) ENGINE=InnoDB AUTO_INCREMENT=10 DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC
+/*!50100 PARTITION BY RANGE (c1)
+SUBPARTITION BY HASH (c1)
+(PARTITION p0 VALUES LESS THAN (10)
+ (SUBPARTITION s0 DATA DIRECTORY = 'MYSQL_TMP_DIR/alt_dir' ENGINE = InnoDB,
+  SUBPARTITION s1 DATA DIRECTORY = 'MYSQL_TMP_DIR/alt_dir' ENGINE = InnoDB),
+ PARTITION p1 VALUES LESS THAN MAXVALUE
+ (SUBPARTITION s2 DATA DIRECTORY = 'MYSQL_TMP_DIR/alt_dir' ENGINE = InnoDB,
+  SUBPARTITION s3 DATA DIRECTORY = 'MYSQL_TMP_DIR/alt_dir' ENGINE = InnoDB)) */
+#
+# Shutdown the server and make a backup of a tablespace
+#
+---- MYSQL_DATA_DIR/test
+t4_restart.frm
+t4_restart.ibd
+t5_restart.frm
+t5_restart.frm.bak
+t5_restart.isl
+t5_restart.isl.bak
+t6_restart#p#p0.isl
+t6_restart#p#p1.isl
+t6_restart#p#p2.isl
+t6_restart.frm
+t7_restart#p#p0#sp#s0.isl
+t7_restart#p#p0#sp#s1.isl
+t7_restart#p#p1#sp#s2.isl
+t7_restart#p#p1#sp#s3.isl
+t7_restart.frm
+---- MYSQL_TMP_DIR/alt_dir/test
+t5_restart.ibd
+t5_restart.ibd.bak
+t6_restart#p#p0.ibd
+t6_restart#p#p1.ibd
+t6_restart#p#p2.ibd
+t7_restart#p#p0#sp#s0.ibd
+t7_restart#p#p0#sp#s1.ibd
+t7_restart#p#p1#sp#s2.ibd
+t7_restart#p#p1#sp#s3.ibd
+#
+# Start the server and show the tablespaces.
+#
+# restart
+SHOW VARIABLES LIKE 'innodb_file_per_table';
+Variable_name	Value
+innodb_file_per_table	ON
+=== information_schema.innodb_sys_tablespaces and innodb_sys_datafiles ===
+Space_Name	Space_Type	Page_Size	Zip_Size	Formats_Permitted	Path
+test/t4_restart	Single	DEFAULT	0	Dynamic	MYSQLD_DATADIR/test/t4_restart.ibd
+test/t5_restart	Single	DEFAULT	0	Dynamic	MYSQL_TMP_DIR/alt_dir/test/t5_restart.ibd
+test/t6_restart#p#p0	Single	DEFAULT	2048	Compressed	MYSQL_TMP_DIR/alt_dir/test/t6_restart#p#p0.ibd
+test/t6_restart#p#p1	Single	DEFAULT	2048	Compressed	MYSQL_TMP_DIR/alt_dir/test/t6_restart#p#p1.ibd
+test/t6_restart#p#p2	Single	DEFAULT	2048	Compressed	MYSQL_TMP_DIR/alt_dir/test/t6_restart#p#p2.ibd
+test/t7_restart#p#p0#sp#s0	Single	DEFAULT	0	Dynamic	MYSQL_TMP_DIR/alt_dir/test/t7_restart#p#p0#sp#s0.ibd
+test/t7_restart#p#p0#sp#s1	Single	DEFAULT	0	Dynamic	MYSQL_TMP_DIR/alt_dir/test/t7_restart#p#p0#sp#s1.ibd
+test/t7_restart#p#p1#sp#s2	Single	DEFAULT	0	Dynamic	MYSQL_TMP_DIR/alt_dir/test/t7_restart#p#p1#sp#s2.ibd
+test/t7_restart#p#p1#sp#s3	Single	DEFAULT	0	Dynamic	MYSQL_TMP_DIR/alt_dir/test/t7_restart#p#p1#sp#s3.ibd
+=== information_schema.files ===
+Space_Name	File_Type	Engine	Status	Tablespace_Name	Path
+test/t4_restart	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQLD_DATADIR/test/t4_restart.ibd
+test/t5_restart	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQL_TMP_DIR/alt_dir/test/t5_restart.ibd
+test/t6_restart#p#p0	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQL_TMP_DIR/alt_dir/test/t6_restart#p#p0.ibd
+test/t6_restart#p#p1	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQL_TMP_DIR/alt_dir/test/t6_restart#p#p1.ibd
+test/t6_restart#p#p2	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQL_TMP_DIR/alt_dir/test/t6_restart#p#p2.ibd
+test/t7_restart#p#p0#sp#s0	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQL_TMP_DIR/alt_dir/test/t7_restart#p#p0#sp#s0.ibd
+test/t7_restart#p#p0#sp#s1	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQL_TMP_DIR/alt_dir/test/t7_restart#p#p0#sp#s1.ibd
+test/t7_restart#p#p1#sp#s2	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQL_TMP_DIR/alt_dir/test/t7_restart#p#p1#sp#s2.ibd
+test/t7_restart#p#p1#sp#s3	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQL_TMP_DIR/alt_dir/test/t7_restart#p#p1#sp#s3.ibd
+SELECT count(*) FROM t5_restart;
+count(*)
+8
+SHOW CREATE TABLE t5_restart;
+Table	Create Table
+t5_restart	CREATE TABLE `t5_restart` (
+  `c1` double NOT NULL AUTO_INCREMENT,
+  `c2` char(10) DEFAULT NULL,
+  `c3` varchar(100) DEFAULT NULL,
+  `c4` date DEFAULT NULL,
+  `c5` text,
+  PRIMARY KEY (`c1`)
+) ENGINE=InnoDB AUTO_INCREMENT=1000000009 DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC DATA DIRECTORY='MYSQL_TMP_DIR/alt_dir/'
+SELECT count(*) FROM t6_restart;
+count(*)
+21
+SHOW CREATE TABLE t6_restart;
+Table	Create Table
+t6_restart	CREATE TABLE `t6_restart` (
+  `c1` int(11) NOT NULL AUTO_INCREMENT,
+  `c2` char(10) DEFAULT NULL,
+  `c3` varchar(100) DEFAULT NULL,
+  `c4` date DEFAULT NULL,
+  `c5` text,
+  PRIMARY KEY (`c1`)
+) ENGINE=InnoDB AUTO_INCREMENT=32 DEFAULT CHARSET=latin1 ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=2
+/*!50100 PARTITION BY HASH (c1)
+(PARTITION p0 DATA DIRECTORY = 'MYSQL_TMP_DIR/alt_dir' ENGINE = InnoDB,
+ PARTITION p1 DATA DIRECTORY = 'MYSQL_TMP_DIR/alt_dir' ENGINE = InnoDB,
+ PARTITION p2 DATA DIRECTORY = 'MYSQL_TMP_DIR/alt_dir' ENGINE = InnoDB) */
+SELECT count(*) FROM t7_restart;
+count(*)
+9
+SHOW CREATE TABLE t7_restart;
+Table	Create Table
+t7_restart	CREATE TABLE `t7_restart` (
+  `c1` int(11) NOT NULL AUTO_INCREMENT,
+  `c2` char(10) DEFAULT NULL,
+  `c3` varchar(100) DEFAULT NULL,
+  `c4` date DEFAULT NULL,
+  `c5` text,
+  PRIMARY KEY (`c1`)
+) ENGINE=InnoDB AUTO_INCREMENT=10 DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC
+/*!50100 PARTITION BY RANGE (c1)
+SUBPARTITION BY HASH (c1)
+(PARTITION p0 VALUES LESS THAN (10)
+ (SUBPARTITION s0 DATA DIRECTORY = 'MYSQL_TMP_DIR/alt_dir' ENGINE = InnoDB,
+  SUBPARTITION s1 DATA DIRECTORY = 'MYSQL_TMP_DIR/alt_dir' ENGINE = InnoDB),
+ PARTITION p1 VALUES LESS THAN MAXVALUE
+ (SUBPARTITION s2 DATA DIRECTORY = 'MYSQL_TMP_DIR/alt_dir' ENGINE = InnoDB,
+  SUBPARTITION s3 DATA DIRECTORY = 'MYSQL_TMP_DIR/alt_dir' ENGINE = InnoDB)) */
+#
+# Try to rename a tablespace to a file that already exists
+#
+RENAME TABLE t5_restart TO t55_restart;
+ERROR 42S01: Table 't55_restart' already exists
+RENAME TABLE t5_restart TO t55_restart;
+ERROR HY000: Error on rename of './test/t5_restart' to './test/t55_restart' (errno: 184 - Tablespace already exists)
+---- MYSQL_DATA_DIR/test
+t4_restart.frm
+t4_restart.ibd
+t5_restart.frm
+t5_restart.isl
+t6_restart#p#p0.isl
+t6_restart#p#p1.isl
+t6_restart#p#p2.isl
+t6_restart.frm
+t7_restart#p#p0#sp#s0.isl
+t7_restart#p#p0#sp#s1.isl
+t7_restart#p#p1#sp#s2.isl
+t7_restart#p#p1#sp#s3.isl
+t7_restart.frm
+---- MYSQL_TMP_DIR/alt_dir/test
+t5_restart.ibd
+t6_restart#p#p0.ibd
+t6_restart#p#p1.ibd
+t6_restart#p#p2.ibd
+t7_restart#p#p0#sp#s0.ibd
+t7_restart#p#p0#sp#s1.ibd
+t7_restart#p#p1#sp#s2.ibd
+t7_restart#p#p1#sp#s3.ibd
+#
+# Rename file table and tablespace
+#
+RENAME TABLE t5_restart TO t55_restart;
+RENAME TABLE t6_restart TO t66_restart;
+RENAME TABLE t7_restart TO t77_restart;
+=== information_schema.innodb_sys_tablespaces and innodb_sys_datafiles ===
+Space_Name	Space_Type	Page_Size	Zip_Size	Formats_Permitted	Path
+test/t4_restart	Single	DEFAULT	0	Dynamic	MYSQLD_DATADIR/test/t4_restart.ibd
+test/t55_restart	Single	DEFAULT	0	Dynamic	MYSQL_TMP_DIR/alt_dir/test/t55_restart.ibd
+test/t66_restart#p#p0	Single	DEFAULT	2048	Compressed	MYSQL_TMP_DIR/alt_dir/test/t66_restart#p#p0.ibd
+test/t66_restart#p#p1	Single	DEFAULT	2048	Compressed	MYSQL_TMP_DIR/alt_dir/test/t66_restart#p#p1.ibd
+test/t66_restart#p#p2	Single	DEFAULT	2048	Compressed	MYSQL_TMP_DIR/alt_dir/test/t66_restart#p#p2.ibd
+test/t77_restart#p#p0#sp#s0	Single	DEFAULT	0	Dynamic	MYSQL_TMP_DIR/alt_dir/test/t77_restart#p#p0#sp#s0.ibd
+test/t77_restart#p#p0#sp#s1	Single	DEFAULT	0	Dynamic	MYSQL_TMP_DIR/alt_dir/test/t77_restart#p#p0#sp#s1.ibd
+test/t77_restart#p#p1#sp#s2	Single	DEFAULT	0	Dynamic	MYSQL_TMP_DIR/alt_dir/test/t77_restart#p#p1#sp#s2.ibd
+test/t77_restart#p#p1#sp#s3	Single	DEFAULT	0	Dynamic	MYSQL_TMP_DIR/alt_dir/test/t77_restart#p#p1#sp#s3.ibd
+=== information_schema.files ===
+Space_Name	File_Type	Engine	Status	Tablespace_Name	Path
+test/t4_restart	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQLD_DATADIR/test/t4_restart.ibd
+test/t55_restart	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQL_TMP_DIR/alt_dir/test/t55_restart.ibd
+test/t66_restart#p#p0	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQL_TMP_DIR/alt_dir/test/t66_restart#p#p0.ibd
+test/t66_restart#p#p1	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQL_TMP_DIR/alt_dir/test/t66_restart#p#p1.ibd
+test/t66_restart#p#p2	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQL_TMP_DIR/alt_dir/test/t66_restart#p#p2.ibd
+test/t77_restart#p#p0#sp#s0	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQL_TMP_DIR/alt_dir/test/t77_restart#p#p0#sp#s0.ibd
+test/t77_restart#p#p0#sp#s1	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQL_TMP_DIR/alt_dir/test/t77_restart#p#p0#sp#s1.ibd
+test/t77_restart#p#p1#sp#s2	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQL_TMP_DIR/alt_dir/test/t77_restart#p#p1#sp#s2.ibd
+test/t77_restart#p#p1#sp#s3	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQL_TMP_DIR/alt_dir/test/t77_restart#p#p1#sp#s3.ibd
+INSERT INTO t55_restart (SELECT 0, c2, c3, c4, c5 FROM t55_restart);
+SELECT count(*) FROM t55_restart;
+count(*)
+16
+SHOW CREATE TABLE t55_restart;
+Table	Create Table
+t55_restart	CREATE TABLE `t55_restart` (
+  `c1` double NOT NULL AUTO_INCREMENT,
+  `c2` char(10) DEFAULT NULL,
+  `c3` varchar(100) DEFAULT NULL,
+  `c4` date DEFAULT NULL,
+  `c5` text,
+  PRIMARY KEY (`c1`)
+) ENGINE=InnoDB AUTO_INCREMENT=1000000024 DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC DATA DIRECTORY='MYSQL_TMP_DIR/alt_dir/'
+INSERT INTO t66_restart (SELECT 0, c2, c3, c4, c5 FROM t66_restart);
+SELECT count(*) FROM t66_restart;
+count(*)
+42
+SHOW CREATE TABLE t66_restart;
+Table	Create Table
+t66_restart	CREATE TABLE `t66_restart` (
+  `c1` int(11) NOT NULL AUTO_INCREMENT,
+  `c2` char(10) DEFAULT NULL,
+  `c3` varchar(100) DEFAULT NULL,
+  `c4` date DEFAULT NULL,
+  `c5` text,
+  PRIMARY KEY (`c1`)
+) ENGINE=InnoDB AUTO_INCREMENT=53 DEFAULT CHARSET=latin1 ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=2
+/*!50100 PARTITION BY HASH (c1)
+(PARTITION p0 DATA DIRECTORY = 'MYSQL_TMP_DIR/alt_dir' ENGINE = InnoDB,
+ PARTITION p1 DATA DIRECTORY = 'MYSQL_TMP_DIR/alt_dir' ENGINE = InnoDB,
+ PARTITION p2 DATA DIRECTORY = 'MYSQL_TMP_DIR/alt_dir' ENGINE = InnoDB) */
+INSERT INTO t77_restart (SELECT 0, c2, c3, c4, c5 FROM t77_restart);
+SELECT count(*) FROM t77_restart;
+count(*)
+18
+SHOW CREATE TABLE t77_restart;
+Table	Create Table
+t77_restart	CREATE TABLE `t77_restart` (
+  `c1` int(11) NOT NULL AUTO_INCREMENT,
+  `c2` char(10) DEFAULT NULL,
+  `c3` varchar(100) DEFAULT NULL,
+  `c4` date DEFAULT NULL,
+  `c5` text,
+  PRIMARY KEY (`c1`)
+) ENGINE=InnoDB AUTO_INCREMENT=19 DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC
+/*!50100 PARTITION BY RANGE (c1)
+SUBPARTITION BY HASH (c1)
+(PARTITION p0 VALUES LESS THAN (10)
+ (SUBPARTITION s0 DATA DIRECTORY = 'MYSQL_TMP_DIR/alt_dir' ENGINE = InnoDB,
+  SUBPARTITION s1 DATA DIRECTORY = 'MYSQL_TMP_DIR/alt_dir' ENGINE = InnoDB),
+ PARTITION p1 VALUES LESS THAN MAXVALUE
+ (SUBPARTITION s2 DATA DIRECTORY = 'MYSQL_TMP_DIR/alt_dir' ENGINE = InnoDB,
+  SUBPARTITION s3 DATA DIRECTORY = 'MYSQL_TMP_DIR/alt_dir' ENGINE = InnoDB)) */
+---- MYSQL_DATA_DIR/test
+t4_restart.frm
+t4_restart.ibd
+t55_restart.frm
+t55_restart.isl
+t66_restart#p#p0.isl
+t66_restart#p#p1.isl
+t66_restart#p#p2.isl
+t66_restart.frm
+t77_restart#p#p0#sp#s0.isl
+t77_restart#p#p0#sp#s1.isl
+t77_restart#p#p1#sp#s2.isl
+t77_restart#p#p1#sp#s3.isl
+t77_restart.frm
+---- MYSQL_TMP_DIR/alt_dir/test
+t55_restart.ibd
+t66_restart#p#p0.ibd
+t66_restart#p#p1.ibd
+t66_restart#p#p2.ibd
+t77_restart#p#p0#sp#s0.ibd
+t77_restart#p#p0#sp#s1.ibd
+t77_restart#p#p1#sp#s2.ibd
+t77_restart#p#p1#sp#s3.ibd
+#
+# Restart the server
+#
+# restart
+SHOW VARIABLES LIKE 'innodb_file_per_table';
+Variable_name	Value
+innodb_file_per_table	ON
+=== information_schema.innodb_sys_tablespaces and innodb_sys_datafiles ===
+Space_Name	Space_Type	Page_Size	Zip_Size	Formats_Permitted	Path
+test/t4_restart	Single	DEFAULT	0	Dynamic	MYSQLD_DATADIR/test/t4_restart.ibd
+test/t55_restart	Single	DEFAULT	0	Dynamic	MYSQL_TMP_DIR/alt_dir/test/t55_restart.ibd
+test/t66_restart#p#p0	Single	DEFAULT	2048	Compressed	MYSQL_TMP_DIR/alt_dir/test/t66_restart#p#p0.ibd
+test/t66_restart#p#p1	Single	DEFAULT	2048	Compressed	MYSQL_TMP_DIR/alt_dir/test/t66_restart#p#p1.ibd
+test/t66_restart#p#p2	Single	DEFAULT	2048	Compressed	MYSQL_TMP_DIR/alt_dir/test/t66_restart#p#p2.ibd
+test/t77_restart#p#p0#sp#s0	Single	DEFAULT	0	Dynamic	MYSQL_TMP_DIR/alt_dir/test/t77_restart#p#p0#sp#s0.ibd
+test/t77_restart#p#p0#sp#s1	Single	DEFAULT	0	Dynamic	MYSQL_TMP_DIR/alt_dir/test/t77_restart#p#p0#sp#s1.ibd
+test/t77_restart#p#p1#sp#s2	Single	DEFAULT	0	Dynamic	MYSQL_TMP_DIR/alt_dir/test/t77_restart#p#p1#sp#s2.ibd
+test/t77_restart#p#p1#sp#s3	Single	DEFAULT	0	Dynamic	MYSQL_TMP_DIR/alt_dir/test/t77_restart#p#p1#sp#s3.ibd
+=== information_schema.files ===
+Space_Name	File_Type	Engine	Status	Tablespace_Name	Path
+test/t4_restart	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQLD_DATADIR/test/t4_restart.ibd
+test/t55_restart	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQL_TMP_DIR/alt_dir/test/t55_restart.ibd
+test/t66_restart#p#p0	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQL_TMP_DIR/alt_dir/test/t66_restart#p#p0.ibd
+test/t66_restart#p#p1	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQL_TMP_DIR/alt_dir/test/t66_restart#p#p1.ibd
+test/t66_restart#p#p2	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQL_TMP_DIR/alt_dir/test/t66_restart#p#p2.ibd
+test/t77_restart#p#p0#sp#s0	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQL_TMP_DIR/alt_dir/test/t77_restart#p#p0#sp#s0.ibd
+test/t77_restart#p#p0#sp#s1	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQL_TMP_DIR/alt_dir/test/t77_restart#p#p0#sp#s1.ibd
+test/t77_restart#p#p1#sp#s2	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQL_TMP_DIR/alt_dir/test/t77_restart#p#p1#sp#s2.ibd
+test/t77_restart#p#p1#sp#s3	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQL_TMP_DIR/alt_dir/test/t77_restart#p#p1#sp#s3.ibd
+INSERT INTO t55_restart (SELECT 0, c2, c3, c4, c5 FROM t55_restart);
+SELECT count(*) FROM t55_restart;
+count(*)
+32
+SHOW CREATE TABLE t55_restart;
+Table	Create Table
+t55_restart	CREATE TABLE `t55_restart` (
+  `c1` double NOT NULL AUTO_INCREMENT,
+  `c2` char(10) DEFAULT NULL,
+  `c3` varchar(100) DEFAULT NULL,
+  `c4` date DEFAULT NULL,
+  `c5` text,
+  PRIMARY KEY (`c1`)
+) ENGINE=InnoDB AUTO_INCREMENT=1000000048 DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC DATA DIRECTORY='MYSQL_TMP_DIR/alt_dir/'
+INSERT INTO t66_restart (SELECT 0, c2, c3, c4, c5 FROM t66_restart);
+SELECT count(*) FROM t66_restart;
+count(*)
+84
+SHOW CREATE TABLE t66_restart;
+Table	Create Table
+t66_restart	CREATE TABLE `t66_restart` (
+  `c1` int(11) NOT NULL AUTO_INCREMENT,
+  `c2` char(10) DEFAULT NULL,
+  `c3` varchar(100) DEFAULT NULL,
+  `c4` date DEFAULT NULL,
+  `c5` text,
+  PRIMARY KEY (`c1`)
+) ENGINE=InnoDB AUTO_INCREMENT=95 DEFAULT CHARSET=latin1 ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=2
+/*!50100 PARTITION BY HASH (c1)
+(PARTITION p0 DATA DIRECTORY = 'MYSQL_TMP_DIR/alt_dir' ENGINE = InnoDB,
+ PARTITION p1 DATA DIRECTORY = 'MYSQL_TMP_DIR/alt_dir' ENGINE = InnoDB,
+ PARTITION p2 DATA DIRECTORY = 'MYSQL_TMP_DIR/alt_dir' ENGINE = InnoDB) */
+INSERT INTO t77_restart (SELECT 0, c2, c3, c4, c5 FROM t77_restart);
+SELECT count(*) FROM t77_restart;
+count(*)
+36
+SHOW CREATE TABLE t77_restart;
+Table	Create Table
+t77_restart	CREATE TABLE `t77_restart` (
+  `c1` int(11) NOT NULL AUTO_INCREMENT,
+  `c2` char(10) DEFAULT NULL,
+  `c3` varchar(100) DEFAULT NULL,
+  `c4` date DEFAULT NULL,
+  `c5` text,
+  PRIMARY KEY (`c1`)
+) ENGINE=InnoDB AUTO_INCREMENT=37 DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC
+/*!50100 PARTITION BY RANGE (c1)
+SUBPARTITION BY HASH (c1)
+(PARTITION p0 VALUES LESS THAN (10)
+ (SUBPARTITION s0 DATA DIRECTORY = 'MYSQL_TMP_DIR/alt_dir' ENGINE = InnoDB,
+  SUBPARTITION s1 DATA DIRECTORY = 'MYSQL_TMP_DIR/alt_dir' ENGINE = InnoDB),
+ PARTITION p1 VALUES LESS THAN MAXVALUE
+ (SUBPARTITION s2 DATA DIRECTORY = 'MYSQL_TMP_DIR/alt_dir' ENGINE = InnoDB,
+  SUBPARTITION s3 DATA DIRECTORY = 'MYSQL_TMP_DIR/alt_dir' ENGINE = InnoDB)) */
+#
+# Shutdown the server
+#
+#
+# Move the remote tablespaces to a new location and change the ISL files
+#
+---- MYSQL_DATA_DIR/test
+t4_restart.frm
+t4_restart.ibd
+t55_restart.frm
+t55_restart.isl
+t66_restart#p#p0.isl
+t66_restart#p#p1.isl
+t66_restart#p#p2.isl
+t66_restart.frm
+t77_restart#p#p0#sp#s0.isl
+t77_restart#p#p0#sp#s1.isl
+t77_restart#p#p1#sp#s2.isl
+t77_restart#p#p1#sp#s3.isl
+t77_restart.frm
+---- MYSQL_TMP_DIR/alt_dir/test
+t55_restart.ibd
+t66_restart#p#p0.ibd
+t66_restart#p#p1.ibd
+t66_restart#p#p2.ibd
+t77_restart#p#p0#sp#s0.ibd
+t77_restart#p#p0#sp#s1.ibd
+t77_restart#p#p1#sp#s2.ibd
+t77_restart#p#p1#sp#s3.ibd
+---- MYSQL_TMP_DIR/new_dir/test
+# Moving tablespace 't4_restart' from MYSQL_DATA_DIR to MYSQL_TMP_DIR/new_dir
+# Moving tablespace 't55_restart' from MYSQL_TMP_DIR/alt_dir to MYSQL_TMP_DIR/new_dir
+# Moving tablespace 't66_restart' from MYSQL_TMP_DIR/alt_dir to MYSQL_TMP_DIR/new_dir
+# Moving tablespace 't77_restart' from MYSQL_TMP_DIR/alt_dir to MYSQL_TMP_DIR/new_dir
+---- MYSQL_DATA_DIR/test
+t4_restart.frm
+t4_restart.isl
+t55_restart.frm
+t55_restart.isl
+t66_restart#p#p0.isl
+t66_restart#p#p1.isl
+t66_restart#p#p2.isl
+t66_restart.frm
+t77_restart#p#p0#sp#s0.isl
+t77_restart#p#p0#sp#s1.isl
+t77_restart#p#p1#sp#s2.isl
+t77_restart#p#p1#sp#s3.isl
+t77_restart.frm
+---- MYSQL_TMP_DIR/alt_dir/test
+---- MYSQL_TMP_DIR/new_dir/test
+t4_restart.ibd
+t55_restart.ibd
+t66_restart#p#p0.ibd
+t66_restart#p#p1.ibd
+t66_restart#p#p2.ibd
+t77_restart#p#p0#sp#s0.ibd
+t77_restart#p#p0#sp#s1.ibd
+t77_restart#p#p1#sp#s2.ibd
+t77_restart#p#p1#sp#s3.ibd
+#
+# Start the server and check tablespaces.
+#
+# restart
+=== information_schema.innodb_sys_tablespaces and innodb_sys_datafiles ===
+Space_Name	Space_Type	Page_Size	Zip_Size	Formats_Permitted	Path
+test/t4_restart	Single	DEFAULT	0	Dynamic	MYSQL_TMP_DIR/new_dir/test/t4_restart.ibd
+test/t55_restart	Single	DEFAULT	0	Dynamic	MYSQL_TMP_DIR/new_dir/test/t55_restart.ibd
+test/t66_restart#p#p0	Single	DEFAULT	2048	Compressed	MYSQL_TMP_DIR/new_dir/test/t66_restart#p#p0.ibd
+test/t66_restart#p#p1	Single	DEFAULT	2048	Compressed	MYSQL_TMP_DIR/new_dir/test/t66_restart#p#p1.ibd
+test/t66_restart#p#p2	Single	DEFAULT	2048	Compressed	MYSQL_TMP_DIR/new_dir/test/t66_restart#p#p2.ibd
+test/t77_restart#p#p0#sp#s0	Single	DEFAULT	0	Dynamic	MYSQL_TMP_DIR/new_dir/test/t77_restart#p#p0#sp#s0.ibd
+test/t77_restart#p#p0#sp#s1	Single	DEFAULT	0	Dynamic	MYSQL_TMP_DIR/new_dir/test/t77_restart#p#p0#sp#s1.ibd
+test/t77_restart#p#p1#sp#s2	Single	DEFAULT	0	Dynamic	MYSQL_TMP_DIR/new_dir/test/t77_restart#p#p1#sp#s2.ibd
+test/t77_restart#p#p1#sp#s3	Single	DEFAULT	0	Dynamic	MYSQL_TMP_DIR/new_dir/test/t77_restart#p#p1#sp#s3.ibd
+=== information_schema.files ===
+Space_Name	File_Type	Engine	Status	Tablespace_Name	Path
+test/t4_restart	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQL_TMP_DIR/new_dir/test/t4_restart.ibd
+test/t55_restart	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQL_TMP_DIR/new_dir/test/t55_restart.ibd
+test/t66_restart#p#p0	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQL_TMP_DIR/new_dir/test/t66_restart#p#p0.ibd
+test/t66_restart#p#p1	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQL_TMP_DIR/new_dir/test/t66_restart#p#p1.ibd
+test/t66_restart#p#p2	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQL_TMP_DIR/new_dir/test/t66_restart#p#p2.ibd
+test/t77_restart#p#p0#sp#s0	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQL_TMP_DIR/new_dir/test/t77_restart#p#p0#sp#s0.ibd
+test/t77_restart#p#p0#sp#s1	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQL_TMP_DIR/new_dir/test/t77_restart#p#p0#sp#s1.ibd
+test/t77_restart#p#p1#sp#s2	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQL_TMP_DIR/new_dir/test/t77_restart#p#p1#sp#s2.ibd
+test/t77_restart#p#p1#sp#s3	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQL_TMP_DIR/new_dir/test/t77_restart#p#p1#sp#s3.ibd
+INSERT INTO t4_restart (SELECT 0, c2, c3, c4, c5 FROM t4_restart);
+SELECT count(*) FROM t4_restart;
+count(*)
+64
+SHOW CREATE TABLE t4_restart;
+Table	Create Table
+t4_restart	CREATE TABLE `t4_restart` (
+  `c1` double NOT NULL AUTO_INCREMENT,
+  `c2` char(10) DEFAULT NULL,
+  `c3` varchar(100) DEFAULT NULL,
+  `c4` date DEFAULT NULL,
+  `c5` text,
+  PRIMARY KEY (`c1`)
+) ENGINE=InnoDB AUTO_INCREMENT=1000000099 DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC
+INSERT INTO t55_restart (SELECT 0, c2, c3, c4, c5 FROM t55_restart);
+SELECT count(*) FROM t55_restart;
+count(*)
+64
+SHOW CREATE TABLE t55_restart;
+Table	Create Table
+t55_restart	CREATE TABLE `t55_restart` (
+  `c1` double NOT NULL AUTO_INCREMENT,
+  `c2` char(10) DEFAULT NULL,
+  `c3` varchar(100) DEFAULT NULL,
+  `c4` date DEFAULT NULL,
+  `c5` text,
+  PRIMARY KEY (`c1`)
+) ENGINE=InnoDB AUTO_INCREMENT=1000000096 DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC DATA DIRECTORY='MYSQL_TMP_DIR/new_dir/'
+INSERT INTO t66_restart (SELECT 0, c2, c3, c4, c5 FROM t66_restart);
+SELECT count(*) FROM t66_restart;
+count(*)
+168
+SHOW CREATE TABLE t66_restart;
+Table	Create Table
+t66_restart	CREATE TABLE `t66_restart` (
+  `c1` int(11) NOT NULL AUTO_INCREMENT,
+  `c2` char(10) DEFAULT NULL,
+  `c3` varchar(100) DEFAULT NULL,
+  `c4` date DEFAULT NULL,
+  `c5` text,
+  PRIMARY KEY (`c1`)
+) ENGINE=InnoDB AUTO_INCREMENT=179 DEFAULT CHARSET=latin1 ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=2
+/*!50100 PARTITION BY HASH (c1)
+(PARTITION p0 DATA DIRECTORY = 'MYSQL_TMP_DIR/new_dir' ENGINE = InnoDB,
+ PARTITION p1 DATA DIRECTORY = 'MYSQL_TMP_DIR/new_dir' ENGINE = InnoDB,
+ PARTITION p2 DATA DIRECTORY = 'MYSQL_TMP_DIR/new_dir' ENGINE = InnoDB) */
+INSERT INTO t77_restart (SELECT 0, c2, c3, c4, c5 FROM t77_restart);
+SELECT count(*) FROM t77_restart;
+count(*)
+72
+SHOW CREATE TABLE t77_restart;
+Table	Create Table
+t77_restart	CREATE TABLE `t77_restart` (
+  `c1` int(11) NOT NULL AUTO_INCREMENT,
+  `c2` char(10) DEFAULT NULL,
+  `c3` varchar(100) DEFAULT NULL,
+  `c4` date DEFAULT NULL,
+  `c5` text,
+  PRIMARY KEY (`c1`)
+) ENGINE=InnoDB AUTO_INCREMENT=73 DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC
+/*!50100 PARTITION BY RANGE (c1)
+SUBPARTITION BY HASH (c1)
+(PARTITION p0 VALUES LESS THAN (10)
+ (SUBPARTITION s0 DATA DIRECTORY = 'MYSQL_TMP_DIR/new_dir' ENGINE = InnoDB,
+  SUBPARTITION s1 DATA DIRECTORY = 'MYSQL_TMP_DIR/new_dir' ENGINE = InnoDB),
+ PARTITION p1 VALUES LESS THAN MAXVALUE
+ (SUBPARTITION s2 DATA DIRECTORY = 'MYSQL_TMP_DIR/new_dir' ENGINE = InnoDB,
+  SUBPARTITION s3 DATA DIRECTORY = 'MYSQL_TMP_DIR/new_dir' ENGINE = InnoDB)) */
+#
+# Shutdown the server
+#
+#
+# Move the remote tablespaces back to the default datadir and delete the ISL file.
+#
+---- MYSQL_DATA_DIR/test
+t4_restart.frm
+t4_restart.isl
+t55_restart.frm
+t55_restart.isl
+t66_restart#p#p0.isl
+t66_restart#p#p1.isl
+t66_restart#p#p2.isl
+t66_restart.frm
+t77_restart#p#p0#sp#s0.isl
+t77_restart#p#p0#sp#s1.isl
+t77_restart#p#p1#sp#s2.isl
+t77_restart#p#p1#sp#s3.isl
+t77_restart.frm
+---- MYSQL_TMP_DIR/new_dir/test
+t4_restart.ibd
+t55_restart.ibd
+t66_restart#p#p0.ibd
+t66_restart#p#p1.ibd
+t66_restart#p#p2.ibd
+t77_restart#p#p0#sp#s0.ibd
+t77_restart#p#p0#sp#s1.ibd
+t77_restart#p#p1#sp#s2.ibd
+t77_restart#p#p1#sp#s3.ibd
+# Moving 't4_restart' from MYSQL_TMP_DIR/new_dir to MYSQL_DATA_DIR
+# Moving 't55_restart' from MYSQL_TMP_DIR/new_dir to MYSQL_DATA_DIR
+# Moving 't66_restart' from MYSQL_TMP_DIR/new_dir to MYSQL_DATA_DIR
+# Moving 't77_restart' from MYSQL_TMP_DIR/new_dir to MYSQL_DATA_DIR
+---- MYSQL_DATA_DIR/test
+t4_restart.frm
+t4_restart.ibd
+t55_restart.frm
+t55_restart.ibd
+t66_restart#p#p0.ibd
+t66_restart#p#p1.ibd
+t66_restart#p#p2.ibd
+t66_restart.frm
+t77_restart#p#p0#sp#s0.ibd
+t77_restart#p#p0#sp#s1.ibd
+t77_restart#p#p1#sp#s2.ibd
+t77_restart#p#p1#sp#s3.ibd
+t77_restart.frm
+---- MYSQL_TMP_DIR/new_dir/test
+#
+# Start the server and check tablespaces.
+#
+# restart
+=== information_schema.innodb_sys_tablespaces and innodb_sys_datafiles ===
+Space_Name	Space_Type	Page_Size	Zip_Size	Formats_Permitted	Path
+test/t4_restart	Single	DEFAULT	0	Dynamic	MYSQLD_DATADIR/test/t4_restart.ibd
+test/t55_restart	Single	DEFAULT	0	Dynamic	MYSQLD_DATADIR/test/t55_restart.ibd
+test/t66_restart#p#p0	Single	DEFAULT	2048	Compressed	MYSQLD_DATADIR/test/t66_restart#p#p0.ibd
+test/t66_restart#p#p1	Single	DEFAULT	2048	Compressed	MYSQLD_DATADIR/test/t66_restart#p#p1.ibd
+test/t66_restart#p#p2	Single	DEFAULT	2048	Compressed	MYSQLD_DATADIR/test/t66_restart#p#p2.ibd
+test/t77_restart#p#p0#sp#s0	Single	DEFAULT	0	Dynamic	MYSQLD_DATADIR/test/t77_restart#p#p0#sp#s0.ibd
+test/t77_restart#p#p0#sp#s1	Single	DEFAULT	0	Dynamic	MYSQLD_DATADIR/test/t77_restart#p#p0#sp#s1.ibd
+test/t77_restart#p#p1#sp#s2	Single	DEFAULT	0	Dynamic	MYSQLD_DATADIR/test/t77_restart#p#p1#sp#s2.ibd
+test/t77_restart#p#p1#sp#s3	Single	DEFAULT	0	Dynamic	MYSQLD_DATADIR/test/t77_restart#p#p1#sp#s3.ibd
+=== information_schema.files ===
+Space_Name	File_Type	Engine	Status	Tablespace_Name	Path
+test/t4_restart	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQLD_DATADIR/test/t4_restart.ibd
+test/t55_restart	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQLD_DATADIR/test/t55_restart.ibd
+test/t66_restart#p#p0	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQLD_DATADIR/test/t66_restart#p#p0.ibd
+test/t66_restart#p#p1	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQLD_DATADIR/test/t66_restart#p#p1.ibd
+test/t66_restart#p#p2	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQLD_DATADIR/test/t66_restart#p#p2.ibd
+test/t77_restart#p#p0#sp#s0	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQLD_DATADIR/test/t77_restart#p#p0#sp#s0.ibd
+test/t77_restart#p#p0#sp#s1	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQLD_DATADIR/test/t77_restart#p#p0#sp#s1.ibd
+test/t77_restart#p#p1#sp#s2	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQLD_DATADIR/test/t77_restart#p#p1#sp#s2.ibd
+test/t77_restart#p#p1#sp#s3	TABLESPACE	InnoDB	NORMAL	innodb_file_per_table.##	MYSQLD_DATADIR/test/t77_restart#p#p1#sp#s3.ibd
+INSERT INTO t4_restart (SELECT 0, c2, c3, c4, c5 FROM t4_restart);
+SELECT count(*) FROM t4_restart;
+count(*)
+128
+SHOW CREATE TABLE t4_restart;
+Table	Create Table
+t4_restart	CREATE TABLE `t4_restart` (
+  `c1` double NOT NULL AUTO_INCREMENT,
+  `c2` char(10) DEFAULT NULL,
+  `c3` varchar(100) DEFAULT NULL,
+  `c4` date DEFAULT NULL,
+  `c5` text,
+  PRIMARY KEY (`c1`)
+) ENGINE=InnoDB AUTO_INCREMENT=1000000195 DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC
+INSERT INTO t55_restart (SELECT 0, c2, c3, c4, c5 FROM t55_restart);
+SELECT count(*) FROM t55_restart;
+count(*)
+128
+SHOW CREATE TABLE t55_restart;
+Table	Create Table
+t55_restart	CREATE TABLE `t55_restart` (
+  `c1` double NOT NULL AUTO_INCREMENT,
+  `c2` char(10) DEFAULT NULL,
+  `c3` varchar(100) DEFAULT NULL,
+  `c4` date DEFAULT NULL,
+  `c5` text,
+  PRIMARY KEY (`c1`)
+) ENGINE=InnoDB AUTO_INCREMENT=1000000192 DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC
+INSERT INTO t66_restart (SELECT 0, c2, c3, c4, c5 FROM t66_restart);
+SELECT count(*) FROM t66_restart;
+count(*)
+336
+SHOW CREATE TABLE t66_restart;
+Table	Create Table
+t66_restart	CREATE TABLE `t66_restart` (
+  `c1` int(11) NOT NULL AUTO_INCREMENT,
+  `c2` char(10) DEFAULT NULL,
+  `c3` varchar(100) DEFAULT NULL,
+  `c4` date DEFAULT NULL,
+  `c5` text,
+  PRIMARY KEY (`c1`)
+) ENGINE=InnoDB AUTO_INCREMENT=347 DEFAULT CHARSET=latin1 ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=2
+/*!50100 PARTITION BY HASH (c1)
+(PARTITION p0 ENGINE = InnoDB,
+ PARTITION p1 ENGINE = InnoDB,
+ PARTITION p2 ENGINE = InnoDB) */
+INSERT INTO t77_restart (SELECT 0, c2, c3, c4, c5 FROM t77_restart);
+SELECT count(*) FROM t77_restart;
+count(*)
+144
+SHOW CREATE TABLE t77_restart;
+Table	Create Table
+t77_restart	CREATE TABLE `t77_restart` (
+  `c1` int(11) NOT NULL AUTO_INCREMENT,
+  `c2` char(10) DEFAULT NULL,
+  `c3` varchar(100) DEFAULT NULL,
+  `c4` date DEFAULT NULL,
+  `c5` text,
+  PRIMARY KEY (`c1`)
+) ENGINE=InnoDB AUTO_INCREMENT=145 DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC
+/*!50100 PARTITION BY RANGE (c1)
+SUBPARTITION BY HASH (c1)
+(PARTITION p0 VALUES LESS THAN (10)
+ (SUBPARTITION s0 ENGINE = InnoDB,
+  SUBPARTITION s1 ENGINE = InnoDB),
+ PARTITION p1 VALUES LESS THAN MAXVALUE
+ (SUBPARTITION s2 ENGINE = InnoDB,
+  SUBPARTITION s3 ENGINE = InnoDB)) */
+#
+# Cleanup
+#
+DROP TABLE t4_restart;
+DROP TABLE t55_restart;
+DROP TABLE t66_restart;
+DROP TABLE t77_restart;
diff --git a/mysql-test/suite/innodb_zip/r/wl6344_compress_level.result b/mysql-test/suite/innodb_zip/r/wl6344_compress_level.result
new file mode 100644
index 00000000000..5cdfe162b6a
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/r/wl6344_compress_level.result
@@ -0,0 +1,135 @@
+USE test;
+DROP TABLE IF EXISTS tab5;
+Warnings:
+Note	1051	Unknown table 'test.tab5'
+DROP TABLE IF EXISTS tab6;
+Warnings:
+Note	1051	Unknown table 'test.tab6'
+#set the other madatory flags before test starts
+SET GLOBAL Innodb_file_per_table=on;
+#set the compression level=0  (No compress)
+SET global innodb_compression_level=0;
+#check the compression level and the compressed_pages is default
+SELECT @@innodb_compression_level;
+@@innodb_compression_level
+0
+SELECT @@Innodb_file_per_table;
+@@Innodb_file_per_table
+1
+#create table with 1K block size
+CREATE TABLE tab5 (col_1 CHAR (255) ,
+col_2 VARCHAR (255), col_3 longtext,
+col_4 longtext,col_5 longtext,
+col_6 longtext , col_7 longtext ,
+col_8 longtext ,col_9 longtext ,
+col_10 longtext ,col_11  int auto_increment primary key) 
+ENGINE = innodb ROW_FORMAT=compressed key_block_size=1;
+#create indexes
+CREATE INDEX idx1 ON tab5(col_4(10));
+CREATE INDEX idx2 ON tab5(col_5(10));
+CREATE INDEX idx3 ON tab5(col_6(10));
+CREATE INDEX idx4 ON tab5(col_7(10));
+CREATE INDEX idx5 ON tab5(col_8(10));
+CREATE INDEX idx6 ON tab5(col_11);
+#load the with repeat function
+SET @col_1 = repeat('a', 100);
+SET @col_2 = repeat('b', 100);
+SET @col_3 = repeat('c', 100);
+SET @col_4 = repeat('d', 100);
+SET @col_5 = repeat('e', 100);
+SET @col_6 = repeat('f', 100);
+SET @col_7 = repeat('g', 100);
+SET @col_8 = repeat('h', 100);
+SET @col_9 = repeat('i', 100);
+SET @col_10 = repeat('j', 100);
+#insert  10 records 
+INSERT INTO tab5(col_1,col_2,col_3,col_4,col_5,col_6,col_7,col_8,col_9,col_10)
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9,@col_10);
+INSERT INTO tab5(col_1,col_2,col_3,col_4,col_5,col_6,col_7,col_8,col_9,col_10)
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9,@col_10);
+INSERT INTO tab5(col_1,col_2,col_3,col_4,col_5,col_6,col_7,col_8,col_9,col_10)
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9,@col_10);
+INSERT INTO tab5(col_1,col_2,col_3,col_4,col_5,col_6,col_7,col_8,col_9,col_10)
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9,@col_10);
+INSERT INTO tab5(col_1,col_2,col_3,col_4,col_5,col_6,col_7,col_8,col_9,col_10)
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9,@col_10);
+INSERT INTO tab5(col_1,col_2,col_3,col_4,col_5,col_6,col_7,col_8,col_9,col_10)
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9,@col_10);
+INSERT INTO tab5(col_1,col_2,col_3,col_4,col_5,col_6,col_7,col_8,col_9,col_10)
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9,@col_10);
+INSERT INTO tab5(col_1,col_2,col_3,col_4,col_5,col_6,col_7,col_8,col_9,col_10)
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9,@col_10);
+INSERT INTO tab5(col_1,col_2,col_3,col_4,col_5,col_6,col_7,col_8,col_9,col_10)
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9,@col_10);
+INSERT INTO tab5(col_1,col_2,col_3,col_4,col_5,col_6,col_7,col_8,col_9,col_10)
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9,@col_10);
+#set the compression level=9 (High compress) 
+SET global innodb_compression_level=9;
+#create table with 1K block size
+CREATE TABLE tab6 (col_1 CHAR (255) ,
+col_2 VARCHAR (255), col_3 longtext,
+col_4 longtext,col_5 longtext,
+col_6 longtext , col_7 longtext ,
+col_8 longtext ,col_9 longtext ,
+col_10 longtext ,col_11  int auto_increment primary key) 
+ENGINE = innodb ROW_FORMAT=compressed key_block_size=1;
+#create indexes
+CREATE INDEX idx1 ON tab6(col_4(10));
+CREATE INDEX idx2 ON tab6(col_5(10));
+CREATE INDEX idx3 ON tab6(col_6(10));
+CREATE INDEX idx4 ON tab6(col_7(10));
+CREATE INDEX idx5 ON tab6(col_8(10));
+CREATE INDEX idx6 ON tab6(col_11);
+#load the with repeat function
+SET @col_1 = repeat('a', 100);
+SET @col_2 = repeat('b', 100);
+SET @col_3 = repeat('c', 100);
+SET @col_4 = repeat('d', 100);
+SET @col_5 = repeat('e', 100);
+SET @col_6 = repeat('f', 100);
+SET @col_7 = repeat('g', 100);
+SET @col_8 = repeat('h', 100);
+SET @col_9 = repeat('i', 100);
+SET @col_10 = repeat('j', 100);
+#insert  10 records 
+INSERT INTO tab6(col_1,col_2,col_3,col_4,col_5,col_6,col_7,col_8,col_9,col_10)
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9,@col_10);
+INSERT INTO tab6(col_1,col_2,col_3,col_4,col_5,col_6,col_7,col_8,col_9,col_10)
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9,@col_10);
+INSERT INTO tab6(col_1,col_2,col_3,col_4,col_5,col_6,col_7,col_8,col_9,col_10)
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9,@col_10);
+INSERT INTO tab6(col_1,col_2,col_3,col_4,col_5,col_6,col_7,col_8,col_9,col_10)
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9,@col_10);
+INSERT INTO tab6(col_1,col_2,col_3,col_4,col_5,col_6,col_7,col_8,col_9,col_10)
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9,@col_10);
+INSERT INTO tab6(col_1,col_2,col_3,col_4,col_5,col_6,col_7,col_8,col_9,col_10)
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9,@col_10);
+INSERT INTO tab6(col_1,col_2,col_3,col_4,col_5,col_6,col_7,col_8,col_9,col_10)
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9,@col_10);
+INSERT INTO tab6(col_1,col_2,col_3,col_4,col_5,col_6,col_7,col_8,col_9,col_10)
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9,@col_10);
+INSERT INTO tab6(col_1,col_2,col_3,col_4,col_5,col_6,col_7,col_8,col_9,col_10)
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9,@col_10);
+INSERT INTO tab6(col_1,col_2,col_3,col_4,col_5,col_6,col_7,col_8,col_9,col_10)
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9,@col_10);
+#diff the sizes of the No compressed table and high compressed table
+SET @size=(SELECT 
+(SELECT (SUM(DATA_LENGTH+INDEX_LENGTH)/1024/1024) 
+FROM INFORMATION_SCHEMA.TABLES 
+WHERE table_name='tab5' AND ENGINE='InnoDB'  AND table_schema='test')
+-
+(SELECT SUM(DATA_LENGTH+INDEX_LENGTH)/1024/1024 
+FROM INFORMATION_SCHEMA.TABLES 
+WHERE table_name='tab6' AND ENGINE='InnoDB' AND table_schema='test')
+FROM DUAL);
+#check the size of the table, it should not be Negative value 
+#The results of this query Test pass = 1 and fail=0
+SELECT @size >= 0;
+@size >= 0
+1
+#
+# Cleanup
+#
+DROP TABLE tab5;
+DROP TABLE tab6;
+#reset back the compression_level to default.
diff --git a/mysql-test/suite/innodb_zip/r/wl6347_comp_indx_stat.result b/mysql-test/suite/innodb_zip/r/wl6347_comp_indx_stat.result
new file mode 100644
index 00000000000..509ffe91de5
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/r/wl6347_comp_indx_stat.result
@@ -0,0 +1,8078 @@
+#******************************************************************
+# Test 1: Test the interaction between stats and compression level
+#         and logging of compressed pages configuration
+# This testcase is to verify the table/idex level compression stats
+# When the flags are set as follows
+# innodb_cmp_per_index_enabled=ON and innodb_compression_level=0
+# page size 1K,2K,4K,8K,16K
+# check the size and compression stats of the table tab5
+#******************************************************************
+# set the flags
+SET GLOBAL innodb_file_per_table=on;
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+SET GLOBAL innodb_compression_level=0;
+#******************************************************************
+# Test 1-1K: innodb_cmp_per_index_enabled=ON and innodb_compression_level=0 with page size 1K
+#******************************************************************
+# create a table with page size=1K
+# create indexes on each column.(total 9 indexes)
+# Create table & Index
+CREATE TABLE tab5(col_1 TINYBLOB, col_2 TINYTEXT,col_3 BLOB,
+col_4 TEXT,col_5 MEDIUMBLOB,col_6 MEDIUMTEXT,
+col_7 LONGBLOB,col_8 LONGTEXT,col_9 VARCHAR(255))
+ENGINE=INNODB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1;
+CREATE INDEX  idx9 ON tab5(col_9(10));
+CREATE INDEX  idx8 ON tab5(col_8(10));
+CREATE INDEX  idx7 ON tab5(col_7(10));
+CREATE INDEX  idx6 ON tab5(col_6(10));
+CREATE INDEX  idx5 ON tab5(col_5(10));
+CREATE INDEX  idx4 ON tab5(col_4(10));
+CREATE INDEX  idx3 ON tab5(col_3(10));
+CREATE INDEX  idx2 ON tab5(col_2(10));
+CREATE INDEX  idx1 ON tab5(col_1(10));
+# check the stats of the table & size of the table
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 65536
+# for deterministic reasons simple data should be inserted.
+# insert some 100 records
+# Load the data
+SET @col_1 = repeat('a', 100);
+SET @col_2 = repeat('b', 100);
+SET @col_3 = repeat('c', 100);
+SET @col_4 = repeat('d', 100);
+SET @col_5 = repeat('e', 100);
+SET @col_6 = repeat('f', 100);
+SET @col_7 = repeat('g', 100);
+SET @col_8 = repeat('h', 100);
+SET @col_9 = repeat('i', 100);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+commit;
+# check the stats of the table & size of the table
+SET @inl_val=2;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 5242880
+# fetch the compressed page and check the stats
+===============
+Fetch Records
+===============
+SELECT col_7,col_8,col_9 FROM tab5
+WHERE col_2='bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
+LIMIT 1;
+col_7	gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
+col_8	hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+col_9	iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
+# check the stats of the table & size of the table
+SET @inl_val=2;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 5242880
+# fetch the compressed same page once again and check the stats
+# the stat figures should be same as above query
+===============
+Fetch Records
+===============
+SELECT col_7,col_8,col_9 FROM tab5
+WHERE col_2='bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
+LIMIT 1;
+col_7	gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
+col_8	hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+col_9	iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
+# check the stats of the table & size of the table
+SET @inl_val=2;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 5242880
+# set the flag on (default off)
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+# set the flags
+SET GLOBAL innodb_file_per_table=on;
+SET GLOBAL innodb_compression_level=0;
+# fetch the compressed page and check the stats
+# The stats figure may be different/same for each restart.
+===============
+Fetch Records
+===============
+SELECT col_7,col_8,col_9 FROM tab5
+WHERE col_2='bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
+LIMIT 1;
+col_7	gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
+col_8	hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+col_9	iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
+# check the stats of the table
+# testcase : pass = 1 fail = 0
+SET @comp_val=0;
+SET @uncomp_val=1;
+===============
+After Restart Chekc the stats of the table
+===============
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE 
+compress_ops_ok BETWEEN @comp_val AND 1000
+AND uncompress_ops BETWEEN @uncomp_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+DROP TABLE tab5;
+#******************************************************************
+# Test 1-2K: innodb_cmp_per_index_enabled=ON and innodb_compression_level=0 with page size 2K
+#******************************************************************
+# reset the stat table before starting next testcase
+SET GLOBAL innodb_cmp_per_index_enabled=0;
+SET GLOBAL innodb_cmp_per_index_enabled=1;
+# create a table with page size=2K
+# create indexes on each column.(total 9 indexes)
+# Create table & Index
+CREATE TABLE tab5(col_1 TINYBLOB, col_2 TINYTEXT,col_3 BLOB,
+col_4 TEXT,col_5 MEDIUMBLOB,col_6 MEDIUMTEXT,
+col_7 LONGBLOB,col_8 LONGTEXT,col_9 VARCHAR(255))
+ENGINE=INNODB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=2;
+CREATE INDEX  idx9 ON tab5(col_9(10));
+CREATE INDEX  idx8 ON tab5(col_8(10));
+CREATE INDEX  idx7 ON tab5(col_7(10));
+CREATE INDEX  idx6 ON tab5(col_6(10));
+CREATE INDEX  idx5 ON tab5(col_5(10));
+CREATE INDEX  idx4 ON tab5(col_4(10));
+CREATE INDEX  idx3 ON tab5(col_3(10));
+CREATE INDEX  idx2 ON tab5(col_2(10));
+CREATE INDEX  idx1 ON tab5(col_1(10));
+# check the stats of the table & size of the table
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 65536
+# for determintic resons simple data should be inserted.
+# insert some 100 records
+# Load the data
+SET @col_1 = repeat('a', 100);
+SET @col_2 = repeat('b', 100);
+SET @col_3 = repeat('c', 100);
+SET @col_4 = repeat('d', 100);
+SET @col_5 = repeat('e', 100);
+SET @col_6 = repeat('f', 100);
+SET @col_7 = repeat('g', 100);
+SET @col_8 = repeat('h', 100);
+SET @col_9 = repeat('i', 100);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+commit;
+# check the stats of the table & size of the table
+SET @inl_val=2;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 2097152
+# fetch the compressed page and check the stats
+===============
+Fetch Records
+===============
+SELECT col_7,col_8,col_9 FROM tab5
+WHERE col_2='bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
+LIMIT 1;
+col_7	gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
+col_8	hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+col_9	iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
+# check the stats of the table & size of the table
+SET @inl_val=2;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 2097152
+# fetch the compressed same page once again and check the stats
+# the stat figures should be same as above query
+===============
+Fetch Records
+===============
+SELECT col_7,col_8,col_9 FROM tab5
+WHERE col_2='bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
+LIMIT 1;
+col_7	gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
+col_8	hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+col_9	iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
+# check the stats of the table & size of the table
+SET @inl_val=2;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 2097152
+# set the flag on (default off)
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+# set the flags
+SET GLOBAL innodb_file_per_table=on;
+SET GLOBAL innodb_compression_level=0;
+# fetch the compressed page and check the stats
+# The stats figure may be different/same for each restart.
+===============
+Fetch Records
+===============
+SELECT col_7,col_8,col_9 FROM tab5
+WHERE col_2='bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
+LIMIT 1;
+col_7	gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
+col_8	hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+col_9	iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
+# check the stats of the table
+# testcase : pass = 1 fail = 0
+SET @comp_val=0;
+SET @uncomp_val=2;
+===============
+After Restart Chekc the stats of the table
+===============
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE 
+compress_ops_ok BETWEEN @comp_val AND 1000
+AND uncompress_ops BETWEEN @uncomp_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+DROP TABLE tab5;
+#******************************************************************
+# Test 1-4K: innodb_cmp_per_index_enabled=ON and innodb_compression_level=0 with page size 4K
+#******************************************************************
+# reset the stat table before starting next testcase
+SET GLOBAL innodb_cmp_per_index_enabled=0;
+SET GLOBAL innodb_cmp_per_index_enabled=1;
+# create a table with page size=4K
+# create indexes on each column.(total 9 indexes)
+# Create table & Index
+CREATE TABLE tab5(col_1 TINYBLOB, col_2 TINYTEXT,col_3 BLOB,
+col_4 TEXT,col_5 MEDIUMBLOB,col_6 MEDIUMTEXT,
+col_7 LONGBLOB,col_8 LONGTEXT,col_9 VARCHAR(255))
+ENGINE=INNODB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4;
+CREATE INDEX  idx9 ON tab5(col_9(10));
+CREATE INDEX  idx8 ON tab5(col_8(10));
+CREATE INDEX  idx7 ON tab5(col_7(10));
+CREATE INDEX  idx6 ON tab5(col_6(10));
+CREATE INDEX  idx5 ON tab5(col_5(10));
+CREATE INDEX  idx4 ON tab5(col_4(10));
+CREATE INDEX  idx3 ON tab5(col_3(10));
+CREATE INDEX  idx2 ON tab5(col_2(10));
+CREATE INDEX  idx1 ON tab5(col_1(10));
+# check the stats of the table & size of the table
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 65536
+# for determintic resons simple data should be inserted.
+# insert some 100 records
+# Load the data
+SET @col_1 = repeat('a', 100);
+SET @col_2 = repeat('b', 100);
+SET @col_3 = repeat('c', 100);
+SET @col_4 = repeat('d', 100);
+SET @col_5 = repeat('e', 100);
+SET @col_6 = repeat('f', 100);
+SET @col_7 = repeat('g', 100);
+SET @col_8 = repeat('h', 100);
+SET @col_9 = repeat('i', 100);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+commit;
+# check the stats of the table & size of the table
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 163840
+# fetch the compressed page and check the stats
+===============
+Fetch Records
+===============
+SELECT col_7,col_8,col_9 FROM tab5
+WHERE col_2='bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
+LIMIT 1;
+col_7	gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
+col_8	hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+col_9	iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
+# check the stats of the table & size of the table
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 163840
+# fetch the compressed same page once again and check the stats
+# the stat figures should be same as above query
+===============
+Fetch Records
+===============
+SELECT col_7,col_8,col_9 FROM tab5
+WHERE col_2='bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
+LIMIT 1;
+col_7	gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
+col_8	hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+col_9	iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
+# check the stats of the table & size of the table
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 163840
+DROP TABLE tab5;
+#******************************************************************
+# Test 1-8K: innodb_cmp_per_index_enabled=ON and innodb_compression_level=0 with page size 8K
+#******************************************************************
+# reset the stat table before starting next testcase
+SET GLOBAL innodb_cmp_per_index_enabled=0;
+SET GLOBAL innodb_cmp_per_index_enabled=1;
+# create a table with page size=8K
+# create indexes on each column.(total 9 indexes)
+# Create table & Index
+CREATE TABLE tab5(col_1 TINYBLOB, col_2 TINYTEXT,col_3 BLOB,
+col_4 TEXT,col_5 MEDIUMBLOB,col_6 MEDIUMTEXT,
+col_7 LONGBLOB,col_8 LONGTEXT,col_9 VARCHAR(255))
+ENGINE=INNODB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8;
+CREATE INDEX  idx9 ON tab5(col_9(10));
+CREATE INDEX  idx8 ON tab5(col_8(10));
+CREATE INDEX  idx7 ON tab5(col_7(10));
+CREATE INDEX  idx6 ON tab5(col_6(10));
+CREATE INDEX  idx5 ON tab5(col_5(10));
+CREATE INDEX  idx4 ON tab5(col_4(10));
+CREATE INDEX  idx3 ON tab5(col_3(10));
+CREATE INDEX  idx2 ON tab5(col_2(10));
+CREATE INDEX  idx1 ON tab5(col_1(10));
+# check the stats of the table & size of the table
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 122880
+# for determintic resons simple data should be inserted.
+# insert some 100 records
+# Load the data
+SET @col_1 = repeat('a', 100);
+SET @col_2 = repeat('b', 100);
+SET @col_3 = repeat('c', 100);
+SET @col_4 = repeat('d', 100);
+SET @col_5 = repeat('e', 100);
+SET @col_6 = repeat('f', 100);
+SET @col_7 = repeat('g', 100);
+SET @col_8 = repeat('h', 100);
+SET @col_9 = repeat('i', 100);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+commit;
+# check the stats of the table & size of the table
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 212992
+# fetch the compressed page and check the stats
+===============
+Fetch Records
+===============
+SELECT col_7,col_8,col_9 FROM tab5
+WHERE col_2='bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
+LIMIT 1;
+col_7	gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
+col_8	hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+col_9	iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
+# check the stats of the table & size of the table
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 212992
+# fetch the compressed same page once again and check the stats
+# the stat figures should be same as above query
+===============
+Fetch Records
+===============
+SELECT col_7,col_8,col_9 FROM tab5
+WHERE col_2='bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
+LIMIT 1;
+col_7	gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
+col_8	hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+col_9	iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
+# check the stats of the table & size of the table
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 212992
+DROP TABLE tab5;
+#******************************************************************
+# Test 1-16K: innodb_cmp_per_index_enabled=ON and innodb_compression_level=0 with page size 16K
+#******************************************************************
+# reset the stat table before starting next testcase
+SET GLOBAL innodb_cmp_per_index_enabled=0;
+SET GLOBAL innodb_cmp_per_index_enabled=1;
+# create a table with page size=16K
+# create indexes on each column.(total 9 indexes)
+# Create table & Index
+CREATE TABLE tab5(col_1 TINYBLOB, col_2 TINYTEXT,col_3 BLOB,
+col_4 TEXT,col_5 MEDIUMBLOB,col_6 MEDIUMTEXT,
+col_7 LONGBLOB,col_8 LONGTEXT,col_9 VARCHAR(255))
+ENGINE=INNODB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=16;
+CREATE INDEX  idx9 ON tab5(col_9(10));
+CREATE INDEX  idx8 ON tab5(col_8(10));
+CREATE INDEX  idx7 ON tab5(col_7(10));
+CREATE INDEX  idx6 ON tab5(col_6(10));
+CREATE INDEX  idx5 ON tab5(col_5(10));
+CREATE INDEX  idx4 ON tab5(col_4(10));
+CREATE INDEX  idx3 ON tab5(col_3(10));
+CREATE INDEX  idx2 ON tab5(col_2(10));
+CREATE INDEX  idx1 ON tab5(col_1(10));
+# check the stats of the table & size of the table
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 245760
+# for determintic resons simple data should be inserted.
+# insert some 100 records
+# Load the data
+SET @col_1 = repeat('a', 100);
+SET @col_2 = repeat('b', 100);
+SET @col_3 = repeat('c', 100);
+SET @col_4 = repeat('d', 100);
+SET @col_5 = repeat('e', 100);
+SET @col_6 = repeat('f', 100);
+SET @col_7 = repeat('g', 100);
+SET @col_8 = repeat('h', 100);
+SET @col_9 = repeat('i', 100);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+commit;
+# check the stats of the table & size of the table
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 327680
+# fetch the compressed page and check the stats
+===============
+Fetch Records
+===============
+SELECT col_7,col_8,col_9 FROM tab5
+WHERE col_2='bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
+LIMIT 1;
+col_7	gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
+col_8	hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+col_9	iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
+# check the stats of the table & size of the table
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 327680
+# fetch the compressed same page once again and check the stats
+# the stat figures should be same as above query
+===============
+Fetch Records
+===============
+SELECT col_7,col_8,col_9 FROM tab5
+WHERE col_2='bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
+LIMIT 1;
+col_7	gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
+col_8	hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+col_9	iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
+# check the stats of the table & size of the table
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 327680
+DROP TABLE tab5;
+#******************************************************************
+# Test 2: test the interaction between wl6347 & wl6344 (2.2)
+# This testcase is to verify the table/idex level compression stats
+# When the flags are set as follows
+# innodb_cmp_per_index_enabled=ON and innodb_compression_level=9
+# page size 1K,2K,4K,8K,16K
+# check the size and compression stats of the table tab5
+#******************************************************************
+# reset the stat table before starting next testcase
+SET GLOBAL innodb_cmp_per_index_enabled=0;
+SET GLOBAL innodb_cmp_per_index_enabled=1;
+# set the flags
+SET GLOBAL innodb_file_per_table=on;
+SET GLOBAL innodb_compression_level=9;
+#******************************************************************
+# Test 2-1K: innodb_cmp_per_index_enabled=ON and innodb_compression_level=9 with page size 1K
+#******************************************************************
+# create a table with page size=1K
+# create indexes on each column.(total 9 indexes)
+# Create table & Index
+CREATE TABLE tab5(col_1 TINYBLOB, col_2 TINYTEXT,col_3 BLOB,
+col_4 TEXT,col_5 MEDIUMBLOB,col_6 MEDIUMTEXT,
+col_7 LONGBLOB,col_8 LONGTEXT,col_9 VARCHAR(255))
+ENGINE=INNODB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1;
+CREATE INDEX  idx9 ON tab5(col_9(10));
+CREATE INDEX  idx8 ON tab5(col_8(10));
+CREATE INDEX  idx7 ON tab5(col_7(10));
+CREATE INDEX  idx6 ON tab5(col_6(10));
+CREATE INDEX  idx5 ON tab5(col_5(10));
+CREATE INDEX  idx4 ON tab5(col_4(10));
+CREATE INDEX  idx3 ON tab5(col_3(10));
+CREATE INDEX  idx2 ON tab5(col_2(10));
+CREATE INDEX  idx1 ON tab5(col_1(10));
+# check the stats of the table & size of the table
+# The size of the file with 0 compress = 65536
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 65536
+# for determintic resons simple data should be inserted.
+# insert some 100 records
+# Load the data
+SET @col_1 = repeat('a', 100);
+SET @col_2 = repeat('b', 100);
+SET @col_3 = repeat('c', 100);
+SET @col_4 = repeat('d', 100);
+SET @col_5 = repeat('e', 100);
+SET @col_6 = repeat('f', 100);
+SET @col_7 = repeat('g', 100);
+SET @col_8 = repeat('h', 100);
+SET @col_9 = repeat('i', 100);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+commit;
+# check the stats of the table & size of the table
+# The size of the file with 0 compress = 2097152
+SET @inl_val=2;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 65536
+# fetch the compressed page and check the stats
+===============
+Fetch Records
+===============
+SELECT col_7,col_8,col_9 FROM tab5
+WHERE col_2='bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
+LIMIT 1;
+col_7	gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
+col_8	hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+col_9	iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
+# check the stats of the table & size of the table
+SET @inl_val=2;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 65536
+# fetch the compressed same page once again and check the stats
+# the stat figures should be same as above query
+===============
+Fetch Records
+===============
+SELECT col_7,col_8,col_9 FROM tab5
+WHERE col_2='bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
+LIMIT 1;
+col_7	gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
+col_8	hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+col_9	iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
+# check the stats of the table & size of the table
+SET @inl_val=2;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 65536
+# set the flag on (default off)
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+# set the flags
+SET GLOBAL innodb_file_per_table=on;
+SET GLOBAL innodb_compression_level=9;
+# fetch the compressed page and check the stats
+# The stats figure may be different/same for each restart.
+===============
+Fetch Records
+===============
+SELECT col_7,col_8,col_9 FROM tab5
+WHERE col_2='bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
+LIMIT 1;
+col_7	gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
+col_8	hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+col_9	iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
+# check the stats of the table
+# testcase : pass = 1 fail = 0
+SET @comp_val=0;
+SET @uncomp_val=1;
+===============
+After Restart Chekc the stats of the table
+===============
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE 
+compress_ops_ok BETWEEN @comp_val AND 1000
+AND uncompress_ops BETWEEN @uncomp_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+DROP TABLE tab5;
+#******************************************************************
+# Test 2-2K: innodb_cmp_per_index_enabled=ON and innodb_compression_level=9 with page size 2K
+#******************************************************************
+# reset the stat table before starting next testcase
+SET GLOBAL innodb_cmp_per_index_enabled=OFF;
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+# create a table with page size=2K
+# create indexes on each column.(total 9 indexes)
+# Create table & Index
+CREATE TABLE tab5(col_1 TINYBLOB, col_2 TINYTEXT,col_3 BLOB,
+col_4 TEXT,col_5 MEDIUMBLOB,col_6 MEDIUMTEXT,
+col_7 LONGBLOB,col_8 LONGTEXT,col_9 VARCHAR(255))
+ENGINE=INNODB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=2;
+CREATE INDEX  idx9 ON tab5(col_9(10));
+CREATE INDEX  idx8 ON tab5(col_8(10));
+CREATE INDEX  idx7 ON tab5(col_7(10));
+CREATE INDEX  idx6 ON tab5(col_6(10));
+CREATE INDEX  idx5 ON tab5(col_5(10));
+CREATE INDEX  idx4 ON tab5(col_4(10));
+CREATE INDEX  idx3 ON tab5(col_3(10));
+CREATE INDEX  idx2 ON tab5(col_2(10));
+CREATE INDEX  idx1 ON tab5(col_1(10));
+# check the stats of the table & size of the table
+# The size of the file with 0 compress = 65536
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 65536
+# for determintic resons simple data should be inserted.
+# insert some 100 records
+# Load the data
+SET @col_1 = repeat('a', 100);
+SET @col_2 = repeat('b', 100);
+SET @col_3 = repeat('c', 100);
+SET @col_4 = repeat('d', 100);
+SET @col_5 = repeat('e', 100);
+SET @col_6 = repeat('f', 100);
+SET @col_7 = repeat('g', 100);
+SET @col_8 = repeat('h', 100);
+SET @col_9 = repeat('i', 100);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+commit;
+# check the stats of the table & size of the table
+# The size of the file with 0 compress = 2097152
+SET @inl_val=2;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 65536
+# fetch the compressed page and check the stats
+===============
+Fetch Records
+===============
+SELECT col_7,col_8,col_9 FROM tab5
+WHERE col_2='bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
+LIMIT 1;
+col_7	gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
+col_8	hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+col_9	iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
+# check the stats of the table & size of the table
+SET @inl_val=2;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 65536
+# fetch the compressed same page once again and check the stats
+# the stat figures should be same as above query
+===============
+Fetch Records
+===============
+SELECT col_7,col_8,col_9 FROM tab5
+WHERE col_2='bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
+LIMIT 1;
+col_7	gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
+col_8	hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+col_9	iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
+# check the stats of the table & size of the table
+SET @inl_val=2;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 65536
+# set the flag on (default off)
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+# set the flags
+SET GLOBAL innodb_file_per_table=on;
+SET GLOBAL innodb_compression_level=9;
+# fetch the compressed page and check the stats
+# The stats figure may be different/same for each restart.
+===============
+Fetch Records
+===============
+SELECT col_7,col_8,col_9 FROM tab5
+WHERE col_2='bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
+LIMIT 1;
+col_7	gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
+col_8	hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+col_9	iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
+# check the stats of the table
+# testcase : pass = 1 fail = 0
+SET @comp_val=0;
+SET @uncomp_val=1;
+===============
+After Restart Chekc the stats of the table
+===============
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE 
+compress_ops_ok BETWEEN @comp_val AND 1000
+AND uncompress_ops BETWEEN @uncomp_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+DROP TABLE tab5;
+#******************************************************************
+# Test 2-4K: innodb_cmp_per_index_enabled=ON and innodb_compression_level=9 with page size 4K
+#******************************************************************
+# reset the stat table before starting next testcase
+SET GLOBAL innodb_cmp_per_index_enabled=OFF;
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+# create a table with page size=4K
+# create indexes on each column.(total 9 indexes)
+# Create table & Index
+CREATE TABLE tab5(col_1 TINYBLOB, col_2 TINYTEXT,col_3 BLOB,
+col_4 TEXT,col_5 MEDIUMBLOB,col_6 MEDIUMTEXT,
+col_7 LONGBLOB,col_8 LONGTEXT,col_9 VARCHAR(255))
+ENGINE=INNODB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4;
+CREATE INDEX  idx9 ON tab5(col_9(10));
+CREATE INDEX  idx8 ON tab5(col_8(10));
+CREATE INDEX  idx7 ON tab5(col_7(10));
+CREATE INDEX  idx6 ON tab5(col_6(10));
+CREATE INDEX  idx5 ON tab5(col_5(10));
+CREATE INDEX  idx4 ON tab5(col_4(10));
+CREATE INDEX  idx3 ON tab5(col_3(10));
+CREATE INDEX  idx2 ON tab5(col_2(10));
+CREATE INDEX  idx1 ON tab5(col_1(10));
+# check the stats of the table & size of the table
+# The size of the file with 0 compress = 65536
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 65536
+# for determintic resons simple data should be inserted.
+# insert some 100 records
+# Load the data
+SET @col_1 = repeat('a', 100);
+SET @col_2 = repeat('b', 100);
+SET @col_3 = repeat('c', 100);
+SET @col_4 = repeat('d', 100);
+SET @col_5 = repeat('e', 100);
+SET @col_6 = repeat('f', 100);
+SET @col_7 = repeat('g', 100);
+SET @col_8 = repeat('h', 100);
+SET @col_9 = repeat('i', 100);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+commit;
+# check the stats of the table & size of the table
+# The size of the file with 0 compress = 159744
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 81920
+# fetch the compressed page and check the stats
+===============
+Fetch Records
+===============
+SELECT col_7,col_8,col_9 FROM tab5
+WHERE col_2='bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
+LIMIT 1;
+col_7	gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
+col_8	hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+col_9	iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
+# check the stats of the table & size of the table
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 81920
+# fetch the compressed same page once again and check the stats
+# the stat figures should be same as above query
+===============
+Fetch Records
+===============
+SELECT col_7,col_8,col_9 FROM tab5
+WHERE col_2='bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
+LIMIT 1;
+col_7	gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
+col_8	hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+col_9	iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
+# check the stats of the table & size of the table
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 81920
+DROP TABLE tab5;
+#******************************************************************
+# Test 2-8K: innodb_cmp_per_index_enabled=ON and innodb_compression_level=9 with page size 8K
+#******************************************************************
+# reset the stat table before starting next testcase
+SET GLOBAL innodb_cmp_per_index_enabled=OFF;
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+# create a table with page size=8K
+# create indexes on each column.(total 9 indexes)
+# Create table & Index
+CREATE TABLE tab5(col_1 TINYBLOB, col_2 TINYTEXT,col_3 BLOB,
+col_4 TEXT,col_5 MEDIUMBLOB,col_6 MEDIUMTEXT,
+col_7 LONGBLOB,col_8 LONGTEXT,col_9 VARCHAR(255))
+ENGINE=INNODB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8;
+CREATE INDEX  idx9 ON tab5(col_9(10));
+CREATE INDEX  idx8 ON tab5(col_8(10));
+CREATE INDEX  idx7 ON tab5(col_7(10));
+CREATE INDEX  idx6 ON tab5(col_6(10));
+CREATE INDEX  idx5 ON tab5(col_5(10));
+CREATE INDEX  idx4 ON tab5(col_4(10));
+CREATE INDEX  idx3 ON tab5(col_3(10));
+CREATE INDEX  idx2 ON tab5(col_2(10));
+CREATE INDEX  idx1 ON tab5(col_1(10));
+# check the stats of the table & size of the table
+# The size of the file with 0 compress = 122880
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 122880
+# for determintic resons simple data should be inserted.
+# insert some 100 records
+# Load the data
+SET @col_1 = repeat('a', 100);
+SET @col_2 = repeat('b', 100);
+SET @col_3 = repeat('c', 100);
+SET @col_4 = repeat('d', 100);
+SET @col_5 = repeat('e', 100);
+SET @col_6 = repeat('f', 100);
+SET @col_7 = repeat('g', 100);
+SET @col_8 = repeat('h', 100);
+SET @col_9 = repeat('i', 100);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+commit;
+# check the stats of the table & size of the table
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 163840
+# fetch the compressed page and check the stats
+===============
+Fetch Records
+===============
+SELECT col_7,col_8,col_9 FROM tab5
+WHERE col_2='bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
+LIMIT 1;
+col_7	gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
+col_8	hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+col_9	iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
+# check the stats of the table & size of the table
+# The size of the file with 0 compress = 212992
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 163840
+# fetch the compressed same page once again and check the stats
+# the stat figures should be same as above query
+===============
+Fetch Records
+===============
+SELECT col_7,col_8,col_9 FROM tab5
+WHERE col_2='bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
+LIMIT 1;
+col_7	gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
+col_8	hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+col_9	iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
+# check the stats of the table & size of the table
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 163840
+DROP TABLE tab5;
+#******************************************************************
+# Test 2-16K: innodb_cmp_per_index_enabled=ON and innodb_compression_level=9 with page size 16K
+#******************************************************************
+# reset the stat table before starting next testcase
+SET GLOBAL innodb_cmp_per_index_enabled=OFF;
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+# create a table with page size=16K
+# create indexes on each column.(total 9 indexes)
+# Create table & Index
+CREATE TABLE tab5(col_1 TINYBLOB, col_2 TINYTEXT,col_3 BLOB,
+col_4 TEXT,col_5 MEDIUMBLOB,col_6 MEDIUMTEXT,
+col_7 LONGBLOB,col_8 LONGTEXT,col_9 VARCHAR(255))
+ENGINE=INNODB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=16;
+CREATE INDEX  idx9 ON tab5(col_9(10));
+CREATE INDEX  idx8 ON tab5(col_8(10));
+CREATE INDEX  idx7 ON tab5(col_7(10));
+CREATE INDEX  idx6 ON tab5(col_6(10));
+CREATE INDEX  idx5 ON tab5(col_5(10));
+CREATE INDEX  idx4 ON tab5(col_4(10));
+CREATE INDEX  idx3 ON tab5(col_3(10));
+CREATE INDEX  idx2 ON tab5(col_2(10));
+CREATE INDEX  idx1 ON tab5(col_1(10));
+# check the stats of the table & size of the table
+# The size of the file with 0 compress = 245760
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 245760
+# for determintic resons simple data should be inserted.
+# insert some 100 records
+# Load the data
+SET @col_1 = repeat('a', 100);
+SET @col_2 = repeat('b', 100);
+SET @col_3 = repeat('c', 100);
+SET @col_4 = repeat('d', 100);
+SET @col_5 = repeat('e', 100);
+SET @col_6 = repeat('f', 100);
+SET @col_7 = repeat('g', 100);
+SET @col_8 = repeat('h', 100);
+SET @col_9 = repeat('i', 100);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+commit;
+# check the stats of the table & size of the table
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 327680
+# fetch the compressed page and check the stats
+===============
+Fetch Records
+===============
+SELECT col_7,col_8,col_9 FROM tab5
+WHERE col_2='bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
+LIMIT 1;
+col_7	gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
+col_8	hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+col_9	iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
+# check the stats of the table & size of the table
+# The size of the file with 0 compress = 344064
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 327680
+# fetch the compressed same page once again and check the stats
+# the stat figures should be same as above query
+===============
+Fetch Records
+===============
+SELECT col_7,col_8,col_9 FROM tab5
+WHERE col_2='bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
+LIMIT 1;
+col_7	gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
+col_8	hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+col_9	iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
+# check the stats of the table & size of the table
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 327680
+DROP TABLE tab5;
+#******************************************************************
+# Test 3: test the interaction between wl6347 & wl6344 (2.3)
+# This testcase is to verify the table/idex level compression stats
+# When the flags are set as follows
+# innodb_cmp_per_index_enabled=ON and
+# innodb_compression_level=6 (default)
+# page size 1K,2K,4K,8K,16K
+# check the size and compression stats of the table tab5
+#******************************************************************
+#******************************************************************
+# Test 3-1K: innodb_cmp_per_index_enabled=ON and innodb_compression_level=Def with page size 1K
+#******************************************************************
+# reset the stat table before starting next testcase
+SET GLOBAL innodb_cmp_per_index_enabled=OFF;
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+SET GLOBAL innodb_compression_level=default;
+# create a table with page size=1K
+# create indexes on each column.(total 9 indexes)
+# Create table & Index
+CREATE TABLE tab5(col_1 TINYBLOB, col_2 TINYTEXT,col_3 BLOB,
+col_4 TEXT,col_5 MEDIUMBLOB,col_6 MEDIUMTEXT,
+col_7 LONGBLOB,col_8 LONGTEXT,col_9 VARCHAR(255))
+ENGINE=INNODB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1;
+CREATE INDEX  idx9 ON tab5(col_9(10));
+CREATE INDEX  idx8 ON tab5(col_8(10));
+CREATE INDEX  idx7 ON tab5(col_7(10));
+CREATE INDEX  idx6 ON tab5(col_6(10));
+CREATE INDEX  idx5 ON tab5(col_5(10));
+CREATE INDEX  idx4 ON tab5(col_4(10));
+CREATE INDEX  idx3 ON tab5(col_3(10));
+CREATE INDEX  idx2 ON tab5(col_2(10));
+CREATE INDEX  idx1 ON tab5(col_1(10));
+# check the stats of the table & size of the table
+# The size of the file with 0 compress = 65536
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 65536
+# for determintic resons simple data should be inserted.
+# insert some 100 records
+# Load the data
+SET @col_1 = repeat('a', 100);
+SET @col_2 = repeat('b', 100);
+SET @col_3 = repeat('c', 100);
+SET @col_4 = repeat('d', 100);
+SET @col_5 = repeat('e', 100);
+SET @col_6 = repeat('f', 100);
+SET @col_7 = repeat('g', 100);
+SET @col_8 = repeat('h', 100);
+SET @col_9 = repeat('i', 100);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+commit;
+# check the stats of the table & size of the table
+# The size of the file with 0 compress = 65536
+SET @inl_val=2;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 65536
+# fetch the compressed page and check the stats
+===============
+Fetch Records
+===============
+SELECT col_7,col_8,col_9 FROM tab5
+WHERE col_2='bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
+LIMIT 1;
+col_7	gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
+col_8	hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+col_9	iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
+# check the stats of the table & size of the table
+SET @inl_val=2;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 65536
+# fetch the compressed same page once again and check the stats
+# the stat figures should be same as above query
+===============
+Fetch Records
+===============
+SELECT col_7,col_8,col_9 FROM tab5
+WHERE col_2='bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
+LIMIT 1;
+col_7	gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
+col_8	hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+col_9	iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
+# check the stats of the table & size of the table
+SET @inl_val=2;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 65536
+DROP TABLE tab5;
+#******************************************************************
+# Test 3-2K: innodb_cmp_per_index_enabled=ON and innodb_compression_level=Def with page size 2K
+#******************************************************************
+# reset the stat table before starting next testcase
+SET GLOBAL innodb_cmp_per_index_enabled=OFF;
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+SET GLOBAL innodb_compression_level=default;
+# create a table with page size=2K
+# create indexes on each column.(total 9 indexes)
+# Create table & Index
+CREATE TABLE tab5(col_1 TINYBLOB, col_2 TINYTEXT,col_3 BLOB,
+col_4 TEXT,col_5 MEDIUMBLOB,col_6 MEDIUMTEXT,
+col_7 LONGBLOB,col_8 LONGTEXT,col_9 VARCHAR(255))
+ENGINE=INNODB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=2;
+CREATE INDEX  idx9 ON tab5(col_9(10));
+CREATE INDEX  idx8 ON tab5(col_8(10));
+CREATE INDEX  idx7 ON tab5(col_7(10));
+CREATE INDEX  idx6 ON tab5(col_6(10));
+CREATE INDEX  idx5 ON tab5(col_5(10));
+CREATE INDEX  idx4 ON tab5(col_4(10));
+CREATE INDEX  idx3 ON tab5(col_3(10));
+CREATE INDEX  idx2 ON tab5(col_2(10));
+CREATE INDEX  idx1 ON tab5(col_1(10));
+# check the stats of the table & size of the table
+# The size of the file with 0 compress = 65536
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 65536
+# for determintic resons simple data should be inserted.
+# insert some 100 records
+# Load the data
+SET @col_1 = repeat('a', 100);
+SET @col_2 = repeat('b', 100);
+SET @col_3 = repeat('c', 100);
+SET @col_4 = repeat('d', 100);
+SET @col_5 = repeat('e', 100);
+SET @col_6 = repeat('f', 100);
+SET @col_7 = repeat('g', 100);
+SET @col_8 = repeat('h', 100);
+SET @col_9 = repeat('i', 100);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+commit;
+# check the stats of the table & size of the table
+# The size of the file with 0 compress = 86016
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 65536
+# fetch the compressed page and check the stats
+===============
+Fetch Records
+===============
+SELECT col_7,col_8,col_9 FROM tab5
+WHERE col_2='bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
+LIMIT 1;
+col_7	gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
+col_8	hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+col_9	iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
+# check the stats of the table & size of the table
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 65536
+DROP TABLE tab5;
+#******************************************************************
+# Test 3-4K: innodb_cmp_per_index_enabled=ON and innodb_compression_level=Def with page size 4K
+#******************************************************************
+# reset the stat table before starting next testcase
+SET GLOBAL innodb_cmp_per_index_enabled=OFF;
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+SET GLOBAL innodb_compression_level=default;
+# create a table with page size=4K
+# create indexes on each column.(total 9 indexes)
+# Create table & Index
+CREATE TABLE tab5(col_1 TINYBLOB, col_2 TINYTEXT,col_3 BLOB,
+col_4 TEXT,col_5 MEDIUMBLOB,col_6 MEDIUMTEXT,
+col_7 LONGBLOB,col_8 LONGTEXT,col_9 VARCHAR(255))
+ENGINE=INNODB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4;
+CREATE INDEX  idx9 ON tab5(col_9(10));
+CREATE INDEX  idx8 ON tab5(col_8(10));
+CREATE INDEX  idx7 ON tab5(col_7(10));
+CREATE INDEX  idx6 ON tab5(col_6(10));
+CREATE INDEX  idx5 ON tab5(col_5(10));
+CREATE INDEX  idx4 ON tab5(col_4(10));
+CREATE INDEX  idx3 ON tab5(col_3(10));
+CREATE INDEX  idx2 ON tab5(col_2(10));
+CREATE INDEX  idx1 ON tab5(col_1(10));
+# check the stats of the table & size of the table
+# The size of the file with 0 compress = 65536
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 65536
+# for determintic resons simple data should be inserted.
+# insert some 100 records
+# Load the data
+SET @col_1 = repeat('a', 100);
+SET @col_2 = repeat('b', 100);
+SET @col_3 = repeat('c', 100);
+SET @col_4 = repeat('d', 100);
+SET @col_5 = repeat('e', 100);
+SET @col_6 = repeat('f', 100);
+SET @col_7 = repeat('g', 100);
+SET @col_8 = repeat('h', 100);
+SET @col_9 = repeat('i', 100);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+commit;
+# check the stats of the table & size of the table
+# The size of the file with 0 compress = 86016
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 81920
+# fetch the compressed page and check the stats
+===============
+Fetch Records
+===============
+SELECT col_7,col_8,col_9 FROM tab5
+WHERE col_2='bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
+LIMIT 1;
+col_7	gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
+col_8	hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+col_9	iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
+# check the stats of the table & size of the table
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 81920
+DROP TABLE tab5;
+#******************************************************************
+# Test 3-8K: innodb_cmp_per_index_enabled=ON and innodb_compression_level=Def with page size 8K
+#******************************************************************
+# reset the stat table before starting next testcase
+SET GLOBAL innodb_cmp_per_index_enabled=OFF;
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+SET GLOBAL innodb_compression_level=default;
+# create a table with page size=8K
+# create indexes on each column.(total 9 indexes)
+# Create table & Index
+CREATE TABLE tab5(col_1 TINYBLOB, col_2 TINYTEXT,col_3 BLOB,
+col_4 TEXT,col_5 MEDIUMBLOB,col_6 MEDIUMTEXT,
+col_7 LONGBLOB,col_8 LONGTEXT,col_9 VARCHAR(255))
+ENGINE=INNODB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8;
+CREATE INDEX  idx9 ON tab5(col_9(10));
+CREATE INDEX  idx8 ON tab5(col_8(10));
+CREATE INDEX  idx7 ON tab5(col_7(10));
+CREATE INDEX  idx6 ON tab5(col_6(10));
+CREATE INDEX  idx5 ON tab5(col_5(10));
+CREATE INDEX  idx4 ON tab5(col_4(10));
+CREATE INDEX  idx3 ON tab5(col_3(10));
+CREATE INDEX  idx2 ON tab5(col_2(10));
+CREATE INDEX  idx1 ON tab5(col_1(10));
+# check the stats of the table & size of the table
+# The size of the file with 0 compress = 122880
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 122880
+# for determintic resons simple data should be inserted.
+# insert some 100 records
+# Load the data
+SET @col_1 = repeat('a', 100);
+SET @col_2 = repeat('b', 100);
+SET @col_3 = repeat('c', 100);
+SET @col_4 = repeat('d', 100);
+SET @col_5 = repeat('e', 100);
+SET @col_6 = repeat('f', 100);
+SET @col_7 = repeat('g', 100);
+SET @col_8 = repeat('h', 100);
+SET @col_9 = repeat('i', 100);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+commit;
+# check the stats of the table & size of the table
+# The size of the file with 0 compress = 172032
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 163840
+# fetch the compressed page and check the stats
+===============
+Fetch Records
+===============
+SELECT col_7,col_8,col_9 FROM tab5
+WHERE col_2='bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
+LIMIT 1;
+col_7	gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
+col_8	hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+col_9	iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
+# check the stats of the table & size of the table
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 163840
+DROP TABLE tab5;
+#******************************************************************
+# Test 3-16K: innodb_cmp_per_index_enabled=ON and innodb_compression_level=Def with page size 16K
+#******************************************************************
+# reset the stat table before starting next testcase
+SET GLOBAL innodb_cmp_per_index_enabled=OFF;
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+SET GLOBAL innodb_compression_level=default;
+# create a table with page size=16K
+# create indexes on each column.(total 9 indexes)
+# Create table & Index
+CREATE TABLE tab5(col_1 TINYBLOB, col_2 TINYTEXT,col_3 BLOB,
+col_4 TEXT,col_5 MEDIUMBLOB,col_6 MEDIUMTEXT,
+col_7 LONGBLOB,col_8 LONGTEXT,col_9 VARCHAR(255))
+ENGINE=INNODB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=16;
+CREATE INDEX  idx9 ON tab5(col_9(10));
+CREATE INDEX  idx8 ON tab5(col_8(10));
+CREATE INDEX  idx7 ON tab5(col_7(10));
+CREATE INDEX  idx6 ON tab5(col_6(10));
+CREATE INDEX  idx5 ON tab5(col_5(10));
+CREATE INDEX  idx4 ON tab5(col_4(10));
+CREATE INDEX  idx3 ON tab5(col_3(10));
+CREATE INDEX  idx2 ON tab5(col_2(10));
+CREATE INDEX  idx1 ON tab5(col_1(10));
+# check the stats of the table & size of the table
+# The size of the file with 0 compress = 245760
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 245760
+# for determintic resons simple data should be inserted.
+# insert some 100 records
+# Load the data
+SET @col_1 = repeat('a', 100);
+SET @col_2 = repeat('b', 100);
+SET @col_3 = repeat('c', 100);
+SET @col_4 = repeat('d', 100);
+SET @col_5 = repeat('e', 100);
+SET @col_6 = repeat('f', 100);
+SET @col_7 = repeat('g', 100);
+SET @col_8 = repeat('h', 100);
+SET @col_9 = repeat('i', 100);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+commit;
+# check the stats of the table & size of the table
+# The size of the file with 0 compress = 344064
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 327680
+# fetch the compressed page and check the stats
+===============
+Fetch Records
+===============
+SELECT col_7,col_8,col_9 FROM tab5
+WHERE col_2='bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
+LIMIT 1;
+col_7	gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
+col_8	hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+col_9	iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
+# check the stats of the table & size of the table
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 327680
+DROP TABLE tab5;
+#******************************************************************
+# Test 4: test the interaction between wl6347 & wl6344 (2.5 & 2.6)
+# This testcase is to verify the table/idex level compression stats
+# When the flags are set as follows
+# innodb_cmp_per_index_enabled=ON and
+# Innodb_compression_failure_threshold_pct=0
+# page size 1K,2K,4K,8K,16K
+# check the size and compression stats of the table tab5
+#******************************************************************
+# reset the stat table before starting next testcase
+SET GLOBAL innodb_cmp_per_index_enabled=OFF;
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+# set the flags
+SET GLOBAL innodb_compression_failure_threshold_pct=0;
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+# check the flags
+SELECT @@innodb_cmp_per_index_enabled;
+@@innodb_cmp_per_index_enabled	1
+SELECT @@innodb_compression_failure_threshold_pct;
+@@innodb_compression_failure_threshold_pct	0
+SELECT @@innodb_file_per_table;
+@@innodb_file_per_table	1
+SELECT @@innodb_compression_level;
+@@innodb_compression_level	6
+#******************************************************************
+# Test 4-1K: innodb_cmp_per_index_enabled=ON and Innodb_compression_failure_threshold_pct=0 with page size 1K
+#******************************************************************
+# create a table with page size=1K
+# create indexes on each column.(total 9 indexes)
+# Create table & Index
+CREATE TABLE tab5(col_1 TINYBLOB, col_2 TINYTEXT,col_3 BLOB,
+col_4 TEXT,col_5 MEDIUMBLOB,col_6 MEDIUMTEXT,
+col_7 LONGBLOB,col_8 LONGTEXT,col_9 VARCHAR(255))
+ENGINE=INNODB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1;
+CREATE INDEX  idx9 ON tab5(col_9(10));
+CREATE INDEX  idx8 ON tab5(col_8(10));
+CREATE INDEX  idx7 ON tab5(col_7(10));
+CREATE INDEX  idx6 ON tab5(col_6(10));
+CREATE INDEX  idx5 ON tab5(col_5(10));
+CREATE INDEX  idx4 ON tab5(col_4(10));
+CREATE INDEX  idx3 ON tab5(col_3(10));
+CREATE INDEX  idx2 ON tab5(col_2(10));
+CREATE INDEX  idx1 ON tab5(col_1(10));
+# check the stats of the table & size of the table
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 65536
+# for determintic resons simple data should be inserted.
+# insert some 100 records
+# Load the data
+SET @col_1 = repeat('a', 100);
+SET @col_2 = repeat('b', 100);
+SET @col_3 = repeat('c', 100);
+SET @col_4 = repeat('d', 100);
+SET @col_5 = repeat('e', 100);
+SET @col_6 = repeat('f', 100);
+SET @col_7 = repeat('g', 100);
+SET @col_8 = repeat('h', 100);
+SET @col_9 = repeat('i', 100);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+commit;
+# check the stats of the table & size of the table
+SET @inl_val=2;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 65536
+# fetch the compressed page and check the stats
+===============
+Fetch Records
+===============
+SELECT col_7,col_8,col_9 FROM tab5
+WHERE col_2='bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
+LIMIT 1;
+col_7	gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
+col_8	hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+col_9	iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
+# check the stats of the table & size of the table
+SET @inl_val=2;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 65536
+# fetch the compressed same page once again and check the stats
+# the stat figures should be same as above query
+===============
+Fetch Records
+===============
+SELECT col_7,col_8,col_9 FROM tab5
+WHERE col_2='bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
+LIMIT 1;
+col_7	gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
+col_8	hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+col_9	iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
+# check the stats of the table & size of the table
+SET @inl_val=2;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 65536
+# set the flag on (default off)
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+# set the flags
+SET GLOBAL innodb_compression_failure_threshold_pct=0;
+SET GLOBAL innodb_file_per_table=on;
+# fetch the compressed page and check the stats
+# The stats figure may be different/same for each restart.
+===============
+Fetch Records
+===============
+SELECT col_7,col_8,col_9 FROM tab5
+WHERE col_2='bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
+LIMIT 1;
+col_7	gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
+col_8	hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+col_9	iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
+# check the stats of the table
+# testcase : pass = 1 fail = 0
+SET @comp_val=0;
+SET @uncomp_val=1;
+===============
+After Restart Chekc the stats of the table
+===============
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE 
+compress_ops_ok BETWEEN @comp_val AND 1000
+AND uncompress_ops BETWEEN @uncomp_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+DROP TABLE tab5;
+#******************************************************************
+# Test 4-2K: innodb_cmp_per_index_enabled=ON and Innodb_compression_failure_threshold_pct=0 with page size 2K
+#******************************************************************
+# reset the stat table before starting next testcase
+SET GLOBAL innodb_cmp_per_index_enabled=OFF;
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+# create a table with page size=2K
+# create indexes on each column.(total 9 indexes)
+# Create table & Index
+CREATE TABLE tab5(col_1 TINYBLOB, col_2 TINYTEXT,col_3 BLOB,
+col_4 TEXT,col_5 MEDIUMBLOB,col_6 MEDIUMTEXT,
+col_7 LONGBLOB,col_8 LONGTEXT,col_9 VARCHAR(255))
+ENGINE=INNODB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=2;
+CREATE INDEX  idx9 ON tab5(col_9(10));
+CREATE INDEX  idx8 ON tab5(col_8(10));
+CREATE INDEX  idx7 ON tab5(col_7(10));
+CREATE INDEX  idx6 ON tab5(col_6(10));
+CREATE INDEX  idx5 ON tab5(col_5(10));
+CREATE INDEX  idx4 ON tab5(col_4(10));
+CREATE INDEX  idx3 ON tab5(col_3(10));
+CREATE INDEX  idx2 ON tab5(col_2(10));
+CREATE INDEX  idx1 ON tab5(col_1(10));
+# check the stats of the table & size of the table
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 65536
+# for determintic resons simple data should be inserted.
+# insert some 100 records
+# Load the data
+SET @col_1 = repeat('a', 100);
+SET @col_2 = repeat('b', 100);
+SET @col_3 = repeat('c', 100);
+SET @col_4 = repeat('d', 100);
+SET @col_5 = repeat('e', 100);
+SET @col_6 = repeat('f', 100);
+SET @col_7 = repeat('g', 100);
+SET @col_8 = repeat('h', 100);
+SET @col_9 = repeat('i', 100);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+commit;
+# check the stats of the table & size of the table
+SET @inl_val=2;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 65536
+# fetch the compressed page and check the stats
+===============
+Fetch Records
+===============
+SELECT col_7,col_8,col_9 FROM tab5
+WHERE col_2='bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
+LIMIT 1;
+col_7	gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
+col_8	hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+col_9	iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
+# check the stats of the table & size of the table
+SET @inl_val=2;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 65536
+# fetch the compressed same page once again and check the stats
+# the stat figures should be same as above query
+===============
+Fetch Records
+===============
+SELECT col_7,col_8,col_9 FROM tab5
+WHERE col_2='bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
+LIMIT 1;
+col_7	gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
+col_8	hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+col_9	iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
+# check the stats of the table & size of the table
+SET @inl_val=2;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 65536
+DROP TABLE tab5;
+#******************************************************************
+# Test 4-4K: innodb_cmp_per_index_enabled=ON and Innodb_compression_failure_threshold_pct=0 with page size 4K
+#******************************************************************
+# reset the stat table before starting next testcase
+SET GLOBAL innodb_cmp_per_index_enabled=OFF;
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+# create a table with page size=4K
+# create indexes on each column.(total 9 indexes)
+# Create table & Index
+CREATE TABLE tab5(col_1 TINYBLOB, col_2 TINYTEXT,col_3 BLOB,
+col_4 TEXT,col_5 MEDIUMBLOB,col_6 MEDIUMTEXT,
+col_7 LONGBLOB,col_8 LONGTEXT,col_9 VARCHAR(255))
+ENGINE=INNODB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4;
+CREATE INDEX  idx9 ON tab5(col_9(10));
+CREATE INDEX  idx8 ON tab5(col_8(10));
+CREATE INDEX  idx7 ON tab5(col_7(10));
+CREATE INDEX  idx6 ON tab5(col_6(10));
+CREATE INDEX  idx5 ON tab5(col_5(10));
+CREATE INDEX  idx4 ON tab5(col_4(10));
+CREATE INDEX  idx3 ON tab5(col_3(10));
+CREATE INDEX  idx2 ON tab5(col_2(10));
+CREATE INDEX  idx1 ON tab5(col_1(10));
+# check the stats of the table & size of the table
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 65536
+# for determintic resons simple data should be inserted.
+# insert some 100 records
+# Load the data
+SET @col_1 = repeat('a', 100);
+SET @col_2 = repeat('b', 100);
+SET @col_3 = repeat('c', 100);
+SET @col_4 = repeat('d', 100);
+SET @col_5 = repeat('e', 100);
+SET @col_6 = repeat('f', 100);
+SET @col_7 = repeat('g', 100);
+SET @col_8 = repeat('h', 100);
+SET @col_9 = repeat('i', 100);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+commit;
+# check the stats of the table & size of the table
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 81920
+# fetch the compressed page and check the stats
+===============
+Fetch Records
+===============
+SELECT col_7,col_8,col_9 FROM tab5
+WHERE col_2='bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
+LIMIT 1;
+col_7	gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
+col_8	hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+col_9	iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
+# check the stats of the table & size of the table
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 81920
+# fetch the compressed same page once again and check the stats
+# the stat figures should be same as above query
+===============
+Fetch Records
+===============
+SELECT col_7,col_8,col_9 FROM tab5
+WHERE col_2='bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
+LIMIT 1;
+col_7	gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
+col_8	hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+col_9	iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
+# check the stats of the table & size of the table
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 81920
+DROP TABLE tab5;
+#******************************************************************
+# Test 4-8K: innodb_cmp_per_index_enabled=ON and Innodb_compression_failure_threshold_pct=0 with page size 8K
+#******************************************************************
+# reset the stat table before starting next testcase
+SET GLOBAL innodb_cmp_per_index_enabled=OFF;
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+# create a table with page size=8K
+# create indexes on each column.(total 9 indexes)
+# Create table & Index
+CREATE TABLE tab5(col_1 TINYBLOB, col_2 TINYTEXT,col_3 BLOB,
+col_4 TEXT,col_5 MEDIUMBLOB,col_6 MEDIUMTEXT,
+col_7 LONGBLOB,col_8 LONGTEXT,col_9 VARCHAR(255))
+ENGINE=INNODB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8;
+CREATE INDEX  idx9 ON tab5(col_9(10));
+CREATE INDEX  idx8 ON tab5(col_8(10));
+CREATE INDEX  idx7 ON tab5(col_7(10));
+CREATE INDEX  idx6 ON tab5(col_6(10));
+CREATE INDEX  idx5 ON tab5(col_5(10));
+CREATE INDEX  idx4 ON tab5(col_4(10));
+CREATE INDEX  idx3 ON tab5(col_3(10));
+CREATE INDEX  idx2 ON tab5(col_2(10));
+CREATE INDEX  idx1 ON tab5(col_1(10));
+# check the stats of the table & size of the table
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 122880
+# for determintic resons simple data should be inserted.
+# insert some 100 records
+# Load the data
+SET @col_1 = repeat('a', 100);
+SET @col_2 = repeat('b', 100);
+SET @col_3 = repeat('c', 100);
+SET @col_4 = repeat('d', 100);
+SET @col_5 = repeat('e', 100);
+SET @col_6 = repeat('f', 100);
+SET @col_7 = repeat('g', 100);
+SET @col_8 = repeat('h', 100);
+SET @col_9 = repeat('i', 100);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+commit;
+# check the stats of the table & size of the table
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 163840
+# fetch the compressed page and check the stats
+===============
+Fetch Records
+===============
+SELECT col_7,col_8,col_9 FROM tab5
+WHERE col_2='bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
+LIMIT 1;
+col_7	gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
+col_8	hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+col_9	iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
+# check the stats of the table & size of the table
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 163840
+# fetch the compressed same page once again and check the stats
+# the stat figures should be same as above query
+===============
+Fetch Records
+===============
+SELECT col_7,col_8,col_9 FROM tab5
+WHERE col_2='bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
+LIMIT 1;
+col_7	gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
+col_8	hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+col_9	iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
+# check the stats of the table & size of the table
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 163840
+DROP TABLE tab5;
+#******************************************************************
+# Test 4-16K: innodb_cmp_per_index_enabled=ON and Innodb_compression_failure_threshold_pct=0 with page size 16K
+#******************************************************************
+# reset the stat table before starting next testcase
+SET GLOBAL innodb_cmp_per_index_enabled=OFF;
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+# create a table with page size=16K
+# create indexes on each column.(total 9 indexes)
+# Create table & Index
+CREATE TABLE tab5(col_1 TINYBLOB, col_2 TINYTEXT,col_3 BLOB,
+col_4 TEXT,col_5 MEDIUMBLOB,col_6 MEDIUMTEXT,
+col_7 LONGBLOB,col_8 LONGTEXT,col_9 VARCHAR(255))
+ENGINE=INNODB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=16;
+CREATE INDEX  idx9 ON tab5(col_9(10));
+CREATE INDEX  idx8 ON tab5(col_8(10));
+CREATE INDEX  idx7 ON tab5(col_7(10));
+CREATE INDEX  idx6 ON tab5(col_6(10));
+CREATE INDEX  idx5 ON tab5(col_5(10));
+CREATE INDEX  idx4 ON tab5(col_4(10));
+CREATE INDEX  idx3 ON tab5(col_3(10));
+CREATE INDEX  idx2 ON tab5(col_2(10));
+CREATE INDEX  idx1 ON tab5(col_1(10));
+# check the stats of the table & size of the table
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 245760
+# for determintic resons simple data should be inserted.
+# insert some 100 records
+# Load the data
+SET @col_1 = repeat('a', 100);
+SET @col_2 = repeat('b', 100);
+SET @col_3 = repeat('c', 100);
+SET @col_4 = repeat('d', 100);
+SET @col_5 = repeat('e', 100);
+SET @col_6 = repeat('f', 100);
+SET @col_7 = repeat('g', 100);
+SET @col_8 = repeat('h', 100);
+SET @col_9 = repeat('i', 100);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+commit;
+# check the stats of the table & size of the table
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 327680
+# fetch the compressed page and check the stats
+===============
+Fetch Records
+===============
+SELECT col_7,col_8,col_9 FROM tab5
+WHERE col_2='bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
+LIMIT 1;
+col_7	gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
+col_8	hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+col_9	iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
+# check the stats of the table & size of the table
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 327680
+# fetch the compressed same page once again and check the stats
+# the stat figures should be same as above query
+===============
+Fetch Records
+===============
+SELECT col_7,col_8,col_9 FROM tab5
+WHERE col_2='bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
+LIMIT 1;
+col_7	gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
+col_8	hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+col_9	iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
+# check the stats of the table & size of the table
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 327680
+DROP TABLE tab5;
+#******************************************************************
+# Test 5: test the interaction between wl6347 & wl6344 (2.7)
+# This testcase is to verify the table/idex level compression stats
+# When the flags are set as follows
+# innodb_cmp_per_index_enabled=ON and
+# Innodb_compression_failure_threshold_pct=10
+# page size 1K,2K,4K,8K,16K
+# check the size and compression stats of the table tab5
+#******************************************************************
+# reset the stat table before starting next testcase
+SET GLOBAL innodb_cmp_per_index_enabled=OFF;
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+# set the flags
+SET GLOBAL innodb_file_per_table=on;
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+SET GLOBAL innodb_compression_failure_threshold_pct=10;
+SET GLOBAL innodb_compression_level=Default;
+# check the flags
+SELECT @@innodb_cmp_per_index_enabled;
+@@innodb_cmp_per_index_enabled	1
+SELECT @@innodb_compression_failure_threshold_pct;
+@@innodb_compression_failure_threshold_pct	10
+SELECT @@innodb_file_per_table;
+@@innodb_file_per_table	1
+SELECT @@innodb_compression_level;
+@@innodb_compression_level	6
+#******************************************************************
+# Test 5-1K: innodb_cmp_per_index_enabled=ON and Innodb_compression_failure_threshold_pct=10 with page size 1K
+#******************************************************************
+# create a table with page size=1K
+# create indexes on each column.(total 9 indexes)
+# Create table & Index
+CREATE TABLE tab5(col_1 TINYBLOB, col_2 TINYTEXT,col_3 BLOB,
+col_4 TEXT,col_5 MEDIUMBLOB,col_6 MEDIUMTEXT,
+col_7 LONGBLOB,col_8 LONGTEXT,col_9 VARCHAR(255))
+ENGINE=INNODB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1;
+CREATE INDEX  idx9 ON tab5(col_9(10));
+CREATE INDEX  idx8 ON tab5(col_8(10));
+CREATE INDEX  idx7 ON tab5(col_7(10));
+CREATE INDEX  idx6 ON tab5(col_6(10));
+CREATE INDEX  idx5 ON tab5(col_5(10));
+CREATE INDEX  idx4 ON tab5(col_4(10));
+CREATE INDEX  idx3 ON tab5(col_3(10));
+CREATE INDEX  idx2 ON tab5(col_2(10));
+CREATE INDEX  idx1 ON tab5(col_1(10));
+# check the stats of the table & size of the table
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 65536
+# for determintic resons simple data should be inserted.
+# insert some 100 records
+# Load the data
+SET @col_1 = repeat('a', 100);
+SET @col_2 = repeat('b', 100);
+SET @col_3 = repeat('c', 100);
+SET @col_4 = repeat('d', 100);
+SET @col_5 = repeat('e', 100);
+SET @col_6 = repeat('f', 100);
+SET @col_7 = repeat('g', 100);
+SET @col_8 = repeat('h', 100);
+SET @col_9 = repeat('i', 100);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+commit;
+# check the stats of the table & size of the table
+SET @inl_val=2;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 65536
+# fetch the compressed page and check the stats
+===============
+Fetch Records
+===============
+SELECT col_7,col_8,col_9 FROM tab5
+WHERE col_2='bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
+LIMIT 1;
+col_7	gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
+col_8	hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+col_9	iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
+# check the stats of the table & size of the table
+SET @inl_val=2;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 65536
+# fetch the compressed same page once again and check the stats
+# the stat figures should be same as above query
+===============
+Fetch Records
+===============
+SELECT col_7,col_8,col_9 FROM tab5
+WHERE col_2='bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
+LIMIT 1;
+col_7	gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
+col_8	hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+col_9	iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
+# check the stats of the table & size of the table
+SET @inl_val=2;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 65536
+# set the flag on (default off)
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+# set the flags
+SET GLOBAL innodb_compression_failure_threshold_pct=10;
+SET GLOBAL innodb_file_per_table=on;
+SET GLOBAL innodb_compression_failure_threshold_pct=10;
+# fetch the compressed page and check the stats
+# The stats figure may be different/same for each restart.
+===============
+Fetch Records
+===============
+SELECT col_7,col_8,col_9 FROM tab5
+WHERE col_2='bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
+LIMIT 1;
+col_7	gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
+col_8	hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+col_9	iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
+# check the stats of the table
+# testcase : pass = 1 fail = 0
+SET @comp_val=0;
+SET @uncomp_val=1;
+===============
+After Restart Chekc the stats of the table
+===============
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE 
+compress_ops_ok BETWEEN @comp_val AND 1000
+AND uncompress_ops BETWEEN @uncomp_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+DROP TABLE tab5;
+#******************************************************************
+# Test 5-2K: innodb_cmp_per_index_enabled=ON and Innodb_compression_failure_threshold_pct=10 with page size 2K
+#******************************************************************
+# reset the stat table before starting next testcase
+SET GLOBAL innodb_cmp_per_index_enabled=OFF;
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+SET GLOBAL innodb_compression_failure_threshold_pct=10;
+# create a table with page size=2K
+# create indexes on each column.(total 9 indexes)
+# Create table & Index
+CREATE TABLE tab5(col_1 TINYBLOB, col_2 TINYTEXT,col_3 BLOB,
+col_4 TEXT,col_5 MEDIUMBLOB,col_6 MEDIUMTEXT,
+col_7 LONGBLOB,col_8 LONGTEXT,col_9 VARCHAR(255))
+ENGINE=INNODB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=2;
+CREATE INDEX  idx9 ON tab5(col_9(10));
+CREATE INDEX  idx8 ON tab5(col_8(10));
+CREATE INDEX  idx7 ON tab5(col_7(10));
+CREATE INDEX  idx6 ON tab5(col_6(10));
+CREATE INDEX  idx5 ON tab5(col_5(10));
+CREATE INDEX  idx4 ON tab5(col_4(10));
+CREATE INDEX  idx3 ON tab5(col_3(10));
+CREATE INDEX  idx2 ON tab5(col_2(10));
+CREATE INDEX  idx1 ON tab5(col_1(10));
+# check the stats of the table & size of the table
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 65536
+# for determintic resons simple data should be inserted.
+# insert some 100 records
+# Load the data
+SET @col_1 = repeat('a', 100);
+SET @col_2 = repeat('b', 100);
+SET @col_3 = repeat('c', 100);
+SET @col_4 = repeat('d', 100);
+SET @col_5 = repeat('e', 100);
+SET @col_6 = repeat('f', 100);
+SET @col_7 = repeat('g', 100);
+SET @col_8 = repeat('h', 100);
+SET @col_9 = repeat('i', 100);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+commit;
+# check the stats of the table & size of the table
+SET @inl_val=2;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 65536
+# fetch the compressed page and check the stats
+===============
+Fetch Records
+===============
+SELECT col_7,col_8,col_9 FROM tab5
+WHERE col_2='bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
+LIMIT 1;
+col_7	gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
+col_8	hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+col_9	iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
+# check the stats of the table & size of the table
+SET @inl_val=2;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 65536
+# fetch the compressed same page once again and check the stats
+# the stat figures should be same as above query
+===============
+Fetch Records
+===============
+SELECT col_7,col_8,col_9 FROM tab5
+WHERE col_2='bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
+LIMIT 1;
+col_7	gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
+col_8	hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+col_9	iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
+# check the stats of the table & size of the table
+SET @inl_val=2;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 65536
+DROP TABLE tab5;
+#******************************************************************
+# Test 5-4K: innodb_cmp_per_index_enabled=ON and Innodb_compression_failure_threshold_pct=10 with page size 4K
+#******************************************************************
+# reset the stat table before starting next testcase
+SET GLOBAL innodb_cmp_per_index_enabled=OFF;
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+# create a table with page size=4K
+# create indexes on each column.(total 9 indexes)
+# Create table & Index
+CREATE TABLE tab5(col_1 TINYBLOB, col_2 TINYTEXT,col_3 BLOB,
+col_4 TEXT,col_5 MEDIUMBLOB,col_6 MEDIUMTEXT,
+col_7 LONGBLOB,col_8 LONGTEXT,col_9 VARCHAR(255))
+ENGINE=INNODB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4;
+CREATE INDEX  idx9 ON tab5(col_9(10));
+CREATE INDEX  idx8 ON tab5(col_8(10));
+CREATE INDEX  idx7 ON tab5(col_7(10));
+CREATE INDEX  idx6 ON tab5(col_6(10));
+CREATE INDEX  idx5 ON tab5(col_5(10));
+CREATE INDEX  idx4 ON tab5(col_4(10));
+CREATE INDEX  idx3 ON tab5(col_3(10));
+CREATE INDEX  idx2 ON tab5(col_2(10));
+CREATE INDEX  idx1 ON tab5(col_1(10));
+# check the stats of the table & size of the table
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 65536
+# for determintic resons simple data should be inserted.
+# insert some 100 records
+# Load the data
+SET @col_1 = repeat('a', 100);
+SET @col_2 = repeat('b', 100);
+SET @col_3 = repeat('c', 100);
+SET @col_4 = repeat('d', 100);
+SET @col_5 = repeat('e', 100);
+SET @col_6 = repeat('f', 100);
+SET @col_7 = repeat('g', 100);
+SET @col_8 = repeat('h', 100);
+SET @col_9 = repeat('i', 100);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+commit;
+# check the stats of the table & size of the table
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 81920
+# fetch the compressed page and check the stats
+===============
+Fetch Records
+===============
+SELECT col_7,col_8,col_9 FROM tab5
+WHERE col_2='bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
+LIMIT 1;
+col_7	gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
+col_8	hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+col_9	iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
+# check the stats of the table & size of the table
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 81920
+# fetch the compressed same page once again and check the stats
+# the stat figures should be same as above query
+===============
+Fetch Records
+===============
+SELECT col_7,col_8,col_9 FROM tab5
+WHERE col_2='bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
+LIMIT 1;
+col_7	gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
+col_8	hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+col_9	iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
+# check the stats of the table & size of the table
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 81920
+DROP TABLE tab5;
+#******************************************************************
+# Test 5-8K: innodb_cmp_per_index_enabled=ON and Innodb_compression_failure_threshold_pct=10 with page size 8K
+#******************************************************************
+# reset the stat table before starting next testcase
+SET GLOBAL innodb_cmp_per_index_enabled=OFF;
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+SET GLOBAL innodb_compression_failure_threshold_pct=10;
+# create a table with page size=8K
+# create indexes on each column.(total 9 indexes)
+# Create table & Index
+CREATE TABLE tab5(col_1 TINYBLOB, col_2 TINYTEXT,col_3 BLOB,
+col_4 TEXT,col_5 MEDIUMBLOB,col_6 MEDIUMTEXT,
+col_7 LONGBLOB,col_8 LONGTEXT,col_9 VARCHAR(255))
+ENGINE=INNODB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8;
+CREATE INDEX  idx9 ON tab5(col_9(10));
+CREATE INDEX  idx8 ON tab5(col_8(10));
+CREATE INDEX  idx7 ON tab5(col_7(10));
+CREATE INDEX  idx6 ON tab5(col_6(10));
+CREATE INDEX  idx5 ON tab5(col_5(10));
+CREATE INDEX  idx4 ON tab5(col_4(10));
+CREATE INDEX  idx3 ON tab5(col_3(10));
+CREATE INDEX  idx2 ON tab5(col_2(10));
+CREATE INDEX  idx1 ON tab5(col_1(10));
+# check the stats of the table & size of the table
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 122880
+# for determintic resons simple data should be inserted.
+# insert some 100 records
+# Load the data
+SET @col_1 = repeat('a', 100);
+SET @col_2 = repeat('b', 100);
+SET @col_3 = repeat('c', 100);
+SET @col_4 = repeat('d', 100);
+SET @col_5 = repeat('e', 100);
+SET @col_6 = repeat('f', 100);
+SET @col_7 = repeat('g', 100);
+SET @col_8 = repeat('h', 100);
+SET @col_9 = repeat('i', 100);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+commit;
+# check the stats of the table & size of the table
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 163840
+# fetch the compressed page and check the stats
+===============
+Fetch Records
+===============
+SELECT col_7,col_8,col_9 FROM tab5
+WHERE col_2='bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
+LIMIT 1;
+col_7	gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
+col_8	hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+col_9	iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
+# check the stats of the table & size of the table
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 163840
+# fetch the compressed same page once again and check the stats
+# the stat figures should be same as above query
+===============
+Fetch Records
+===============
+SELECT col_7,col_8,col_9 FROM tab5
+WHERE col_2='bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
+LIMIT 1;
+col_7	gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
+col_8	hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+col_9	iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
+# check the stats of the table & size of the table
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 163840
+DROP TABLE tab5;
+#******************************************************************
+# Test 5-16K: innodb_cmp_per_index_enabled=ON and Innodb_compression_failure_threshold_pct=10 with page size 16K
+#******************************************************************
+# reset the stat table before starting next testcase
+SET GLOBAL innodb_cmp_per_index_enabled=OFF;
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+# create a table with page size=16K
+# create indexes on each column.(total 9 indexes)
+# Create table & Index
+CREATE TABLE tab5(col_1 TINYBLOB, col_2 TINYTEXT,col_3 BLOB,
+col_4 TEXT,col_5 MEDIUMBLOB,col_6 MEDIUMTEXT,
+col_7 LONGBLOB,col_8 LONGTEXT,col_9 VARCHAR(255))
+ENGINE=INNODB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=16;
+CREATE INDEX  idx9 ON tab5(col_9(10));
+CREATE INDEX  idx8 ON tab5(col_8(10));
+CREATE INDEX  idx7 ON tab5(col_7(10));
+CREATE INDEX  idx6 ON tab5(col_6(10));
+CREATE INDEX  idx5 ON tab5(col_5(10));
+CREATE INDEX  idx4 ON tab5(col_4(10));
+CREATE INDEX  idx3 ON tab5(col_3(10));
+CREATE INDEX  idx2 ON tab5(col_2(10));
+CREATE INDEX  idx1 ON tab5(col_1(10));
+# check the stats of the table & size of the table
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 245760
+# for determintic resons simple data should be inserted.
+# insert some 100 records
+# Load the data
+SET @col_1 = repeat('a', 100);
+SET @col_2 = repeat('b', 100);
+SET @col_3 = repeat('c', 100);
+SET @col_4 = repeat('d', 100);
+SET @col_5 = repeat('e', 100);
+SET @col_6 = repeat('f', 100);
+SET @col_7 = repeat('g', 100);
+SET @col_8 = repeat('h', 100);
+SET @col_9 = repeat('i', 100);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+INSERT INTO tab5
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9);
+commit;
+# check the stats of the table & size of the table
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 327680
+# fetch the compressed page and check the stats
+===============
+Fetch Records
+===============
+SELECT col_7,col_8,col_9 FROM tab5
+WHERE col_2='bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
+LIMIT 1;
+col_7	gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
+col_8	hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+col_9	iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
+# check the stats of the table & size of the table
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 327680
+# fetch the compressed same page once again and check the stats
+# the stat figures should be same as above query
+===============
+Fetch Records
+===============
+SELECT col_7,col_8,col_9 FROM tab5
+WHERE col_2='bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
+LIMIT 1;
+col_7	gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
+col_8	hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+col_9	iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
+# check the stats of the table & size of the table
+SET @inl_val=1;
+# Check the stats of the table
+# Check the size of the ibd file
+# testcase : pass = 1 fail = 0
+SELECT count(*) > 0 as "compress_stat"
+FROM information_schema.innodb_cmp_per_index
+WHERE
+compress_ops_ok BETWEEN @inl_val AND 1000
+AND compress_ops BETWEEN @inl_val AND 1000
+AND table_name='tab5' AND database_name='test'
+AND index_name like 'idx%' ;
+compress_stat	1
+The size of the tab5.ibd file: 327680
+DROP TABLE tab5;
+#******************************************************************
+# Test 6: Create multiple tables & indexes having same name in 2 diff DB's
+# Check the stats of the table. (1.1)
+#******************************************************************
+# reset the stat table before starting next testcase
+SET GLOBAL innodb_cmp_per_index_enabled=0;
+SET GLOBAL innodb_cmp_per_index_enabled=1;
+SET GLOBAL innodb_file_per_table=ON;
+SET GLOBAL innodb_compression_level=default;
+SET GLOBAL innodb_compression_failure_threshold_pct=default;
+# create a table page size=1K
+CREATE TABLE tab5(col_1 TINYBLOB, col_2 TINYTEXT,col_3 BLOB,
+col_4 TEXT,col_5 MEDIUMBLOB,col_6 MEDIUMTEXT,
+col_7 LONGBLOB,col_8 LONGTEXT,col_9 VARCHAR(255))
+ENGINE=INNODB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1;
+CREATE INDEX idx1 ON tab5(col_1(10));
+# check the stats of the table
+SELECT database_name,table_name,index_name,compress_ops,compress_ops_ok
+FROM information_schema.innodb_cmp_per_index
+WHERE database_name='test' and table_name='tab5'
+ORDER BY index_name,table_name,database_name ;
+database_name	test
+table_name	tab5
+index_name	GEN_CLUST_INDEX
+compress_ops	1
+compress_ops_ok	1
+database_name	test
+table_name	tab5
+index_name	idx1
+compress_ops	1
+compress_ops_ok	1
+CREATE DATABASE sb;
+USE sb;
+# create a table page size=1K (testcase-1)
+CREATE TABLE tab5(col_1 TINYBLOB, col_2 TINYTEXT,col_3 BLOB,
+col_4 TEXT,col_5 MEDIUMBLOB,col_6 MEDIUMTEXT,
+col_7 LONGBLOB,col_8 LONGTEXT,col_9 VARCHAR(255))
+ENGINE=INNODB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1;
+CREATE INDEX idx1 ON tab5(col_1(10));
+SELECT database_name,table_name,index_name,compress_ops,compress_ops_ok
+FROM information_schema.innodb_cmp_per_index
+WHERE database_name='sb' and table_name='tab5'
+ORDER BY index_name,table_name,database_name ;
+database_name	sb
+table_name	tab5
+index_name	GEN_CLUST_INDEX
+compress_ops	1
+compress_ops_ok	1
+database_name	sb
+table_name	tab5
+index_name	idx1
+compress_ops	1
+compress_ops_ok	1
+DROP TABLE tab5, test.tab5;
+DROP DATABASE sb;
+# reset the flags
+SET GLOBAL innodb_file_per_table=default;
+SET GLOBAL innodb_cmp_per_index_enabled=default;
+SET GLOBAL innodb_compression_failure_threshold_pct=default;
diff --git a/mysql-test/suite/innodb_zip/r/wl6470_1.result b/mysql-test/suite/innodb_zip/r/wl6470_1.result
new file mode 100644
index 00000000000..ea1866d69eb
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/r/wl6470_1.result
@@ -0,0 +1,598 @@
+create temporary table t1
+(keyc int, c1 char(100), c2 char(100),
+primary key(keyc), index sec_index(c1)
+) engine = innodb;
+create temporary table t2
+(keyc int, c1 char(100), c2 char(100),
+primary key(keyc), index sec_index(c1)
+) engine = innodb;
+create procedure populate_t1()
+begin
+declare i int default 1;
+while (i <= 200) do
+insert into t1 values (i, 'a', 'b');
+set i = i + 1;
+end while;
+end|
+create procedure populate_t1_small()
+begin
+declare i int default 1;
+while (i <= 20) do
+insert into t1 values (i, 'c', 'd');
+set i = i + 1;
+end while;
+end|
+create procedure populate_t1_small2()
+begin
+declare i int default 30;
+while (i <= 50) do
+insert into t1 values (i, 'e', 'f');
+set i = i + 1;
+end while;
+end|
+begin;
+select count(*) from t1;
+count(*)
+0
+call populate_t1();
+select count(*) from t1;
+count(*)
+200
+select * from t1 limit 10;
+keyc	c1	c2
+1	a	b
+2	a	b
+3	a	b
+4	a	b
+5	a	b
+6	a	b
+7	a	b
+8	a	b
+9	a	b
+10	a	b
+rollback;
+select count(*) from t1;
+count(*)
+0
+begin;
+call populate_t1();
+select count(*) from t1;
+count(*)
+200
+commit;
+select count(*) from t1;
+count(*)
+200
+truncate table t1;
+select count(*) from t1;
+count(*)
+0
+call populate_t1_small();
+select count(*) from t1;
+count(*)
+20
+rollback;
+select count(*) from t1;
+count(*)
+20
+truncate table t1;
+call populate_t1();
+select count(*) from t1;
+count(*)
+200
+delete from t1 where keyc <= 60;
+select count(*) from t1;
+count(*)
+140
+call populate_t1_small();
+select count(*) from t1;
+count(*)
+160
+select * from t1 limit 10;
+keyc	c1	c2
+1	c	d
+2	c	d
+3	c	d
+4	c	d
+5	c	d
+6	c	d
+7	c	d
+8	c	d
+9	c	d
+10	c	d
+begin;
+call populate_t1_small2();
+select count(*) from t1;
+count(*)
+181
+select * from t1 where keyc > 30 limit 10;
+keyc	c1	c2
+31	e	f
+32	e	f
+33	e	f
+34	e	f
+35	e	f
+36	e	f
+37	e	f
+38	e	f
+39	e	f
+40	e	f
+rollback;
+select count(*) from t1;
+count(*)
+160
+select * from t1 where keyc > 30 limit 10;
+keyc	c1	c2
+61	a	b
+62	a	b
+63	a	b
+64	a	b
+65	a	b
+66	a	b
+67	a	b
+68	a	b
+69	a	b
+70	a	b
+update t1 set keyc = keyc + 2000;
+select * from t1 limit 10;
+keyc	c1	c2
+2001	c	d
+2002	c	d
+2003	c	d
+2004	c	d
+2005	c	d
+2006	c	d
+2007	c	d
+2008	c	d
+2009	c	d
+2010	c	d
+rollback;
+begin;
+update t1 set keyc = keyc + 2000;
+select * from t1 limit 10;
+keyc	c1	c2
+4001	c	d
+4002	c	d
+4003	c	d
+4004	c	d
+4005	c	d
+4006	c	d
+4007	c	d
+4008	c	d
+4009	c	d
+4010	c	d
+rollback;
+select * from t1 limit 10;
+keyc	c1	c2
+2001	c	d
+2002	c	d
+2003	c	d
+2004	c	d
+2005	c	d
+2006	c	d
+2007	c	d
+2008	c	d
+2009	c	d
+2010	c	d
+commit;
+select * from t1 limit 10;
+keyc	c1	c2
+2001	c	d
+2002	c	d
+2003	c	d
+2004	c	d
+2005	c	d
+2006	c	d
+2007	c	d
+2008	c	d
+2009	c	d
+2010	c	d
+insert into t2 select * from t1 where keyc < 2101;
+select count(*) from t2;
+count(*)
+60
+drop procedure populate_t1;
+drop procedure populate_t1_small;
+drop procedure populate_t1_small2;
+drop table t1;
+drop table t2;
+create temporary table t1
+(keyc int, c1 char(100), c2 char(100),
+primary key(keyc), index sec_index(c1)
+) engine = innodb key_block_size = 4;
+set innodb_strict_mode=off;
+create temporary table t2
+(keyc int, c1 char(100), c2 char(100),
+primary key(keyc), index sec_index(c1)
+) engine = innodb key_block_size = 8;
+set innodb_strict_mode=default;
+create procedure populate_t1()
+begin
+declare i int default 1;
+while (i <= 200) do
+insert into t1 values (i, 'a', 'b');
+set i = i + 1;
+end while;
+end|
+create procedure populate_t1_small()
+begin
+declare i int default 1;
+while (i <= 20) do
+insert into t1 values (i, 'c', 'd');
+set i = i + 1;
+end while;
+end|
+create procedure populate_t1_small2()
+begin
+declare i int default 30;
+while (i <= 50) do
+insert into t1 values (i, 'e', 'f');
+set i = i + 1;
+end while;
+end|
+begin;
+select count(*) from t1;
+count(*)
+0
+call populate_t1();
+select count(*) from t1;
+count(*)
+200
+select * from t1 limit 10;
+keyc	c1	c2
+1	a	b
+2	a	b
+3	a	b
+4	a	b
+5	a	b
+6	a	b
+7	a	b
+8	a	b
+9	a	b
+10	a	b
+rollback;
+select count(*) from t1;
+count(*)
+0
+begin;
+call populate_t1();
+select count(*) from t1;
+count(*)
+200
+commit;
+select count(*) from t1;
+count(*)
+200
+truncate table t1;
+select count(*) from t1;
+count(*)
+0
+call populate_t1_small();
+select count(*) from t1;
+count(*)
+20
+rollback;
+select count(*) from t1;
+count(*)
+20
+truncate table t1;
+call populate_t1();
+select count(*) from t1;
+count(*)
+200
+delete from t1 where keyc <= 60;
+select count(*) from t1;
+count(*)
+140
+call populate_t1_small();
+select count(*) from t1;
+count(*)
+160
+select * from t1 limit 10;
+keyc	c1	c2
+1	c	d
+2	c	d
+3	c	d
+4	c	d
+5	c	d
+6	c	d
+7	c	d
+8	c	d
+9	c	d
+10	c	d
+begin;
+call populate_t1_small2();
+select count(*) from t1;
+count(*)
+181
+select * from t1 where keyc > 30 limit 10;
+keyc	c1	c2
+31	e	f
+32	e	f
+33	e	f
+34	e	f
+35	e	f
+36	e	f
+37	e	f
+38	e	f
+39	e	f
+40	e	f
+rollback;
+select count(*) from t1;
+count(*)
+160
+select * from t1 where keyc > 30 limit 10;
+keyc	c1	c2
+61	a	b
+62	a	b
+63	a	b
+64	a	b
+65	a	b
+66	a	b
+67	a	b
+68	a	b
+69	a	b
+70	a	b
+update t1 set keyc = keyc + 2000;
+select * from t1 limit 10;
+keyc	c1	c2
+2001	c	d
+2002	c	d
+2003	c	d
+2004	c	d
+2005	c	d
+2006	c	d
+2007	c	d
+2008	c	d
+2009	c	d
+2010	c	d
+rollback;
+begin;
+update t1 set keyc = keyc + 2000;
+select * from t1 limit 10;
+keyc	c1	c2
+4001	c	d
+4002	c	d
+4003	c	d
+4004	c	d
+4005	c	d
+4006	c	d
+4007	c	d
+4008	c	d
+4009	c	d
+4010	c	d
+rollback;
+select * from t1 limit 10;
+keyc	c1	c2
+2001	c	d
+2002	c	d
+2003	c	d
+2004	c	d
+2005	c	d
+2006	c	d
+2007	c	d
+2008	c	d
+2009	c	d
+2010	c	d
+commit;
+select * from t1 limit 10;
+keyc	c1	c2
+2001	c	d
+2002	c	d
+2003	c	d
+2004	c	d
+2005	c	d
+2006	c	d
+2007	c	d
+2008	c	d
+2009	c	d
+2010	c	d
+insert into t2 select * from t1 where keyc < 2101;
+select count(*) from t2;
+count(*)
+60
+drop procedure populate_t1;
+drop procedure populate_t1_small;
+drop procedure populate_t1_small2;
+drop table t1;
+drop table t2;
+set global innodb_file_per_table = 0;
+create temporary table t1
+(keyc int, c1 char(100), c2 char(100),
+primary key(keyc), index sec_index(c1)
+) engine = innodb;
+create temporary table t2
+(keyc int, c1 char(100), c2 char(100),
+primary key(keyc), index sec_index(c1)
+) engine = innodb;
+create procedure populate_t1()
+begin
+declare i int default 1;
+while (i <= 200) do
+insert into t1 values (i, 'a', 'b');
+set i = i + 1;
+end while;
+end|
+create procedure populate_t1_small()
+begin
+declare i int default 1;
+while (i <= 20) do
+insert into t1 values (i, 'c', 'd');
+set i = i + 1;
+end while;
+end|
+create procedure populate_t1_small2()
+begin
+declare i int default 30;
+while (i <= 50) do
+insert into t1 values (i, 'e', 'f');
+set i = i + 1;
+end while;
+end|
+begin;
+select count(*) from t1;
+count(*)
+0
+call populate_t1();
+select count(*) from t1;
+count(*)
+200
+select * from t1 limit 10;
+keyc	c1	c2
+1	a	b
+2	a	b
+3	a	b
+4	a	b
+5	a	b
+6	a	b
+7	a	b
+8	a	b
+9	a	b
+10	a	b
+rollback;
+select count(*) from t1;
+count(*)
+0
+begin;
+call populate_t1();
+select count(*) from t1;
+count(*)
+200
+commit;
+select count(*) from t1;
+count(*)
+200
+truncate table t1;
+select count(*) from t1;
+count(*)
+0
+call populate_t1_small();
+select count(*) from t1;
+count(*)
+20
+rollback;
+select count(*) from t1;
+count(*)
+20
+truncate table t1;
+call populate_t1();
+select count(*) from t1;
+count(*)
+200
+delete from t1 where keyc <= 60;
+select count(*) from t1;
+count(*)
+140
+call populate_t1_small();
+select count(*) from t1;
+count(*)
+160
+select * from t1 limit 10;
+keyc	c1	c2
+1	c	d
+2	c	d
+3	c	d
+4	c	d
+5	c	d
+6	c	d
+7	c	d
+8	c	d
+9	c	d
+10	c	d
+begin;
+call populate_t1_small2();
+select count(*) from t1;
+count(*)
+181
+select * from t1 where keyc > 30 limit 10;
+keyc	c1	c2
+31	e	f
+32	e	f
+33	e	f
+34	e	f
+35	e	f
+36	e	f
+37	e	f
+38	e	f
+39	e	f
+40	e	f
+rollback;
+select count(*) from t1;
+count(*)
+160
+select * from t1 where keyc > 30 limit 10;
+keyc	c1	c2
+61	a	b
+62	a	b
+63	a	b
+64	a	b
+65	a	b
+66	a	b
+67	a	b
+68	a	b
+69	a	b
+70	a	b
+update t1 set keyc = keyc + 2000;
+select * from t1 limit 10;
+keyc	c1	c2
+2001	c	d
+2002	c	d
+2003	c	d
+2004	c	d
+2005	c	d
+2006	c	d
+2007	c	d
+2008	c	d
+2009	c	d
+2010	c	d
+rollback;
+begin;
+update t1 set keyc = keyc + 2000;
+select * from t1 limit 10;
+keyc	c1	c2
+4001	c	d
+4002	c	d
+4003	c	d
+4004	c	d
+4005	c	d
+4006	c	d
+4007	c	d
+4008	c	d
+4009	c	d
+4010	c	d
+rollback;
+select * from t1 limit 10;
+keyc	c1	c2
+2001	c	d
+2002	c	d
+2003	c	d
+2004	c	d
+2005	c	d
+2006	c	d
+2007	c	d
+2008	c	d
+2009	c	d
+2010	c	d
+commit;
+select * from t1 limit 10;
+keyc	c1	c2
+2001	c	d
+2002	c	d
+2003	c	d
+2004	c	d
+2005	c	d
+2006	c	d
+2007	c	d
+2008	c	d
+2009	c	d
+2010	c	d
+insert into t2 select * from t1 where keyc < 2101;
+select count(*) from t2;
+count(*)
+60
+drop procedure populate_t1;
+drop procedure populate_t1_small;
+drop procedure populate_t1_small2;
+drop table t1;
+drop table t2;
+set global innodb_file_per_table = 1;
diff --git a/mysql-test/suite/innodb_zip/r/wl6470_2.result b/mysql-test/suite/innodb_zip/r/wl6470_2.result
new file mode 100644
index 00000000000..b001cd73882
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/r/wl6470_2.result
@@ -0,0 +1,667 @@
+create procedure populate_tables()
+begin
+declare n int default 20;
+declare inner_loop int default 100;
+set global innodb_file_per_table=on;
+drop table if exists t1,t2,t3,t4;
+create temporary table t1(c1 int not null,
+c2 int not null,
+c3 char(255) not null,
+c4 text(6000) not null,
+c5 blob(6000) not null,
+c6 varchar(2000) not null,
+c7 varchar(2000) not null,
+c8 datetime,
+c9 decimal(6,3),
+primary key (c1),
+index (c3,c4(50),c5(50)), 
+index (c2))
+engine=innodb row_format=redundant;
+create temporary table t2(c1 int not null,
+c2 int not null,
+c3 char(255) not null,
+c4 text(6000) not null,
+c5 blob(6000) not null,
+c6 varchar(2000) not null,
+c7 varchar(2000) not null,
+c8 datetime,
+c9 decimal(6,3),
+primary key (c1),
+index (c3,c4(50),c5(50)),
+index (c2))
+engine=innodb row_format=compact;
+create temporary table t3(c1 int not null,
+c2 int not null,
+c3 char(255) not null,
+c4 text(6000) not null,
+c5 blob(6000) not null,
+c6 varchar(2000) not null,
+c7 varchar(2000) not null,
+c8 datetime,
+c9 decimal(6,3),
+primary key (c1),
+index (c3,c4(50),c5(50)),
+index (c2))
+engine=innodb row_format=compressed key_block_size=4;
+create temporary table t4(c1 int not null,
+c2 int not null,
+c3 char(255) not null,
+c4 text(6000) not null,
+c5 blob(6000) not null,
+c6 varchar(2000) not null,
+c7 varchar(2000) not null,
+c8 datetime,
+c9 decimal(6,3),
+primary key (c1),
+index (c3,c4(50),c5(50)),
+index (c2))
+engine=innodb row_format=dynamic;
+create temporary table t5(c1 int not null,
+c2 int not null,
+c3 char(255) not null,
+c4 text(6000) not null,
+c5 blob(6000) not null,
+c6 varchar(2000) not null,
+c7 varchar(2000) not null,
+c8 datetime,
+c9 decimal(6,3),
+primary key (c1),
+index (c3,c4(50),c5(50)),
+index (c2))
+engine=innodb;
+create temporary table t6 ( a int ) engine = innodb;
+insert into t6 values (50),(100),(150),(190);
+while (n > 0) do
+start transaction;
+insert into t1 values(n,n,repeat(concat(' tc3_',n),30),
+repeat(concat(' tc4_',n),800),repeat(concat(' tc_',n),800),
+repeat(concat(' tc6_',n),800),repeat(concat(' tc7_',n),800),
+now(),(100.55+n));
+insert into t2 values(n,n,repeat(concat(' tc3_',n),30),
+repeat(concat(' tc4_',n),800),repeat(concat(' tc_',n),800),
+repeat(concat(' tc6_',n),800),repeat(concat(' tc7_',n),800),
+now(),(100.55+n));
+insert into t3 values(n,n,repeat(concat(' tc3_',n),30),
+repeat(concat(' tc4_',n),800),repeat(concat(' tc_',n),800),
+repeat(concat(' tc6_',n),800),repeat(concat(' tc7_',n),800),
+now(),(100.55+n));
+insert into t4 values(n,n,repeat(concat(' tc3_',n),30),
+repeat(concat(' tc4_',n),800),repeat(concat(' tc_',n),800),
+repeat(concat(' tc6_',n),800),repeat(concat(' tc7_',n),800),
+now(),(100.55+n));
+insert into t5 values(n,n,repeat(concat(' tc3_',n),30),
+repeat(concat(' tc4_',n),800),repeat(concat(' tc_',n),800),
+repeat(concat(' tc6_',n),800),repeat(concat(' tc7_',n),800),
+now(),(100.55+n));
+if (n > 10) then
+commit;
+else     
+delete from t1 where c1 > 10 ;
+delete from t2 where c1 > 10 ;
+delete from t3 where c1 > 10 ;
+delete from t4 where c1 > 10 ;
+delete from t5 where c1 > 10 ;
+rollback;
+start transaction;
+update t1 set c1 = c1 + 1000 where c1 > 10;
+update t2 set c1 = c1 + 1000 where c1 > 10;
+update t3 set c1 = c1 + 1000 where c1 > 10;
+update t4 set c1 = c1 + 1000 where c1 > 10;
+update t5 set c1 = c1 + 1000 where c1 > 10;
+rollback;
+end if;     
+start transaction;
+insert into t1 values(n+inner_loop,n+inner_loop,repeat(concat(' tc3_',n+inner_loop),30),
+repeat(concat(' tc4_',n+inner_loop),800),repeat(concat(' tc_',n+inner_loop),800),
+repeat(concat(' tc6_',n+inner_loop),245),repeat(concat(' tc7_',n+inner_loop),245),
+now(),(100.55+n+inner_loop));
+insert into t2 values(n+inner_loop,n+inner_loop,repeat(concat(' tc3_',n+inner_loop),30),
+repeat(concat(' tc4_',n+inner_loop),800),repeat(concat(' tc_',n+inner_loop),800),
+repeat(concat(' tc6_',n+inner_loop),245),repeat(concat(' tc7_',n+inner_loop),245),
+now(),(100.55+n+inner_loop));
+insert into t3 values(n+inner_loop,n+inner_loop,repeat(concat(' tc3_',n+inner_loop),30),
+repeat(concat(' tc4_',n+inner_loop),800),repeat(concat(' tc_',n+inner_loop),800),
+repeat(concat(' tc6_',n+inner_loop),245),repeat(concat(' tc7_',n+inner_loop),245),
+now(),(100.55+n+inner_loop));
+insert into t4 values(n+inner_loop,n+inner_loop,repeat(concat(' tc3_',n+inner_loop),30),
+repeat(concat(' tc4_',n+inner_loop),800),repeat(concat(' tc_',n+inner_loop),800),
+repeat(concat(' tc6_',n+inner_loop),245),repeat(concat(' tc7_',n+inner_loop),245),
+now(),(100.55+n+inner_loop));
+insert into t5 values(n+inner_loop,n+inner_loop,repeat(concat(' tc3_',n+inner_loop),30),
+repeat(concat(' tc4_',n+inner_loop),800),repeat(concat(' tc_',n+inner_loop),800),
+repeat(concat(' tc6_',n+inner_loop),245),repeat(concat(' tc7_',n+inner_loop),245),
+now(),(100.55+n+inner_loop));
+delete from t1 where c1 between 100 and 110;
+delete from t2 where c1 between 100 and 110;
+delete from t3 where c1 between 100 and 110;
+delete from t4 where c1 between 100 and 110;
+delete from t5 where c1 between 100 and 110;
+update t1 set c1 = c1+1 where c1>110; 
+update t2 set c1 = c1+1 where c1>110; 
+update t3 set c1 = c1+1 where c1>110; 
+update t4 set c1 = c1+1 where c1>110; 
+update t5 set c1 = c1+1 where c1>110; 
+savepoint a;
+insert into t1 values(300+n+inner_loop,n+inner_loop,repeat(concat(' tc3_',n+inner_loop),30),
+repeat(concat(' tc4_',n+inner_loop),800),repeat(concat(' tc_',n+inner_loop),800),
+repeat(concat(' tc6_',n+inner_loop),245),repeat(concat(' tc7_',n+inner_loop),245),
+now(),(100.55+n+inner_loop));
+insert into t2 values(300+n+inner_loop,n+inner_loop,repeat(concat(' tc3_',n+inner_loop),30),
+repeat(concat(' tc4_',n+inner_loop),800),repeat(concat(' tc_',n+inner_loop),800),
+repeat(concat(' tc6_',n+inner_loop),245),repeat(concat(' tc7_',n+inner_loop),245),
+now(),(100.55+n+inner_loop));
+insert into t3 values(300+n+inner_loop,n+inner_loop,repeat(concat(' tc3_',n+inner_loop),30),
+repeat(concat(' tc4_',n+inner_loop),800),repeat(concat(' tc_',n+inner_loop),800),
+repeat(concat(' tc6_',n+inner_loop),245),repeat(concat(' tc7_',n+inner_loop),245),
+now(),(100.55+n+inner_loop));
+insert into t4 values(300+n+inner_loop,n+inner_loop,repeat(concat(' tc3_',n+inner_loop),30),
+repeat(concat(' tc4_',n+inner_loop),800),repeat(concat(' tc_',n+inner_loop),800),
+repeat(concat(' tc6_',n+inner_loop),245),repeat(concat(' tc7_',n+inner_loop),245),
+now(),(100.55+n+inner_loop));
+insert into t5 values(300+n+inner_loop,n+inner_loop,repeat(concat(' tc3_',n+inner_loop),30),
+repeat(concat(' tc4_',n+inner_loop),800),repeat(concat(' tc_',n+inner_loop),800),
+repeat(concat(' tc6_',n+inner_loop),245),repeat(concat(' tc7_',n+inner_loop),245),
+now(),(100.55+n+inner_loop));
+savepoint b;
+insert into t1 values(400+n+inner_loop,n+inner_loop,repeat(concat(' tc3_',n+inner_loop),30),
+repeat(concat(' tc4_',n+inner_loop),800),repeat(concat(' tc_',n+inner_loop),800),
+repeat(concat(' tc6_',n+inner_loop),245),repeat(concat(' tc7_',n+inner_loop),245),
+now(),(100.55+n+inner_loop));
+insert into t2 values(400+n+inner_loop,n+inner_loop,repeat(concat(' tc3_',n+inner_loop),30),
+repeat(concat(' tc4_',n+inner_loop),800),repeat(concat(' tc_',n+inner_loop),800),
+repeat(concat(' tc6_',n+inner_loop),245),repeat(concat(' tc7_',n+inner_loop),245),
+now(),(100.55+n+inner_loop));
+insert into t3 values(400+n+inner_loop,n+inner_loop,repeat(concat(' tc3_',n+inner_loop),30),
+repeat(concat(' tc4_',n+inner_loop),800),repeat(concat(' tc_',n+inner_loop),800),
+repeat(concat(' tc6_',n+inner_loop),245),repeat(concat(' tc7_',n+inner_loop),245),
+now(),(100.55+n+inner_loop));
+insert into t4 values(400+n+inner_loop,n+inner_loop,repeat(concat(' tc3_',n+inner_loop),30),
+repeat(concat(' tc4_',n+inner_loop),800),repeat(concat(' tc_',n+inner_loop),800),
+repeat(concat(' tc6_',n+inner_loop),245),repeat(concat(' tc7_',n+inner_loop),245),
+now(),(100.55+n+inner_loop));
+insert into t5 values(400+n+inner_loop,n+inner_loop,repeat(concat(' tc3_',n+inner_loop),30),
+repeat(concat(' tc4_',n+inner_loop),800),repeat(concat(' tc_',n+inner_loop),800),
+repeat(concat(' tc6_',n+inner_loop),245),repeat(concat(' tc7_',n+inner_loop),245),
+now(),(100.55+n+inner_loop));
+savepoint c;
+rollback to b;
+rollback to a;
+commit;
+commit;
+rollback;
+set n = n - 1; 
+end while;
+end|
+connect  con1,localhost,root,,;
+connect  con2,localhost,root,,;
+#---client 1 : dml operation ---"
+connection con1;
+#---client 2 : dml operation ---"
+connection con2;
+# In connection 1
+connection con1;
+select count(*) from t1;
+count(*)
+20
+select count(*) from t2;
+count(*)
+20
+select count(*) from t3;
+count(*)
+20
+select count(*) from t4;
+count(*)
+20
+select count(*) from t5;
+count(*)
+20
+select c1 from t1;
+c1
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+122
+124
+126
+128
+130
+132
+134
+136
+138
+140
+select c1 from t2;
+c1
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+122
+124
+126
+128
+130
+132
+134
+136
+138
+140
+select c1 from t3;
+c1
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+122
+124
+126
+128
+130
+132
+134
+136
+138
+140
+select c1 from t4;
+c1
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+122
+124
+126
+128
+130
+132
+134
+136
+138
+140
+select c1 from t5;
+c1
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+122
+124
+126
+128
+130
+132
+134
+136
+138
+140
+# In connection 2
+connection con2;
+select count(*) from t1;
+count(*)
+20
+select count(*) from t2;
+count(*)
+20
+select count(*) from t3;
+count(*)
+20
+select count(*) from t4;
+count(*)
+20
+select count(*) from t5;
+count(*)
+20
+select c1 from t1;
+c1
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+122
+124
+126
+128
+130
+132
+134
+136
+138
+140
+select c1 from t2;
+c1
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+122
+124
+126
+128
+130
+132
+134
+136
+138
+140
+select c1 from t3;
+c1
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+122
+124
+126
+128
+130
+132
+134
+136
+138
+140
+select c1 from t4;
+c1
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+122
+124
+126
+128
+130
+132
+134
+136
+138
+140
+select c1 from t5;
+c1
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+122
+124
+126
+128
+130
+132
+134
+136
+138
+140
+# In connection 1
+connection con1;
+set autocommit = 0;
+insert into t1 values (20,1,'a','a','a','a','a',now(),100.55);
+ERROR 23000: Duplicate entry '20' for key 'PRIMARY'
+insert ignore into t1 values (20,1,'a','a','a','a','a',now(),100.55);
+Warnings:
+Warning	1062	Duplicate entry '20' for key 'PRIMARY'
+insert into t2 values (20,1,'a','a','a','a','a',now(),100.55);
+ERROR 23000: Duplicate entry '20' for key 'PRIMARY'
+insert ignore into t2 values (20,1,'a','a','a','a','a',now(),100.55);
+Warnings:
+Warning	1062	Duplicate entry '20' for key 'PRIMARY'
+insert into t3 values (20,1,'a','a','a','a','a',now(),100.55);
+ERROR 23000: Duplicate entry '20' for key 'PRIMARY'
+insert ignore into t3 values (20,1,'a','a','a','a','a',now(),100.55);
+Warnings:
+Warning	1062	Duplicate entry '20' for key 'PRIMARY'
+insert into t4 values (20,1,'a','a','a','a','a',now(),100.55);
+ERROR 23000: Duplicate entry '20' for key 'PRIMARY'
+insert ignore into t4 values (20,1,'a','a','a','a','a',now(),100.55);
+Warnings:
+Warning	1062	Duplicate entry '20' for key 'PRIMARY'
+insert into t5 values (20,1,'a','a','a','a','a',now(),100.55);
+ERROR 23000: Duplicate entry '20' for key 'PRIMARY'
+insert ignore into t5 values (20,1,'a','a','a','a','a',now(),100.55);
+Warnings:
+Warning	1062	Duplicate entry '20' for key 'PRIMARY'
+insert into t1 values (1,1,'a','a','a','a','a',now(),100.55),
+(20,1,'a','a','a','a','a',now(),100.55);
+ERROR 23000: Duplicate entry '20' for key 'PRIMARY'
+insert into t2 values (1,1,'a','a','a','a','a',now(),100.55),
+(20,1,'a','a','a','a','a',now(),100.55);
+ERROR 23000: Duplicate entry '20' for key 'PRIMARY'
+insert into t3 values (1,1,'a','a','a','a','a',now(),100.55),
+(20,1,'a','a','a','a','a',now(),100.55);
+ERROR 23000: Duplicate entry '20' for key 'PRIMARY'
+insert into t4 values (1,1,'a','a','a','a','a',now(),100.55),
+(20,1,'a','a','a','a','a',now(),100.55);
+ERROR 23000: Duplicate entry '20' for key 'PRIMARY'
+insert into t5 values (1,1,'a','a','a','a','a',now(),100.55),
+(20,1,'a','a','a','a','a',now(),100.55);
+ERROR 23000: Duplicate entry '20' for key 'PRIMARY'
+set autocommit = 1;
+select c1,c2 from t1 where c1 in (20,1);
+c1	c2
+20	20
+select c1,c2 from t2 where c1 in (20,1);
+c1	c2
+20	20
+select c1,c2 from t3 where c1 in (20,1);
+c1	c2
+20	20
+select c1,c2 from t4 where c1 in (20,1);
+c1	c2
+20	20
+select c1,c2 from t5 where c1 in (20,1);
+c1	c2
+20	20
+replace into t1 values (20,1,'a','a','a','a','a',now(),100.55);
+replace into t2 values (20,1,'a','a','a','a','a',now(),100.55);
+replace into t3 values (20,1,'a','a','a','a','a',now(),100.55);
+replace into t4 values (20,1,'a','a','a','a','a',now(),100.55);
+replace into t5 values (20,1,'a','a','a','a','a',now(),100.55);
+select c1,c2,c3,c4,c5,c6,c7,c9 from t1 where c1 = 20;
+c1	c2	c3	c4	c5	c6	c7	c9
+20	1	a	a	a	a	a	100.550
+select c1,c2,c3,c4,c5,c6,c7,c9 from t2 where c1 = 20;
+c1	c2	c3	c4	c5	c6	c7	c9
+20	1	a	a	a	a	a	100.550
+select c1,c2,c3,c4,c5,c6,c7,c9 from t3 where c1 = 20;
+c1	c2	c3	c4	c5	c6	c7	c9
+20	1	a	a	a	a	a	100.550
+select c1,c2,c3,c4,c5,c6,c7,c9 from t4 where c1 = 20;
+c1	c2	c3	c4	c5	c6	c7	c9
+20	1	a	a	a	a	a	100.550
+select c1,c2,c3,c4,c5,c6,c7,c9 from t5 where c1 = 20;
+c1	c2	c3	c4	c5	c6	c7	c9
+20	1	a	a	a	a	a	100.550
+update ignore t1 set c1 = 20 where c1 = 140 ;
+update ignore t2 set c1 = 20 where c1 = 140 ;
+update ignore t3 set c1 = 20 where c1 = 140 ;
+update ignore t4 set c1 = 20 where c1 = 140 ;
+update ignore t5 set c1 = 20 where c1 = 140 ;
+select count(*) from t1 where c1 = 140;
+count(*)
+1
+select count(*) from t2 where c1 = 140;
+count(*)
+1
+select count(*) from t3 where c1 = 140;
+count(*)
+1
+select count(*) from t4 where c1 = 140;
+count(*)
+1
+select count(*) from t5 where c1 = 140;
+count(*)
+1
+"running select * into outfile <file> from t1 ;
+create temporary table temp_1 engine = innodb as select * from t1 where 1=2;
+select count(*) from temp_1;
+count(*)
+0
+"running load data infile <file> into temp_1 ;
+select count(*) from temp_1;
+count(*)
+20
+alter table temp_1 add column c10 int default 99 , 
+add column c11 varchar(100) default 'test';
+alter table temp_1 add primary key (c1);
+insert into temp_1 (c1,c2,c3,c4,c5,c6,c7,c8,c9) values (-1,-1,'a','a','a','a','a',now(),100.55);
+select c1,c2,c3,c4,c5,c6,c7,c9,c10,c11 from temp_1 where c1 < 0;
+c1	c2	c3	c4	c5	c6	c7	c9	c10	c11
+-1	-1	a	a	a	a	a	100.550	99	test
+select count(*) from temp_1 where c10 = 99 and c11 like 'test';
+count(*)
+21
+insert into temp_1 (c1,c2,c3,c4,c5,c6,c7,c8,c9) values (-1,-1,'a','a','a','a','a',now(),100.55) 
+on duplicate  key update c1=-2,c2=-2;
+select c1,c2,c3,c4,c5,c6,c7,c9,c10,c11 from temp_1 where c1 < 0;
+c1	c2	c3	c4	c5	c6	c7	c9	c10	c11
+-2	-2	a	a	a	a	a	100.550	99	test
+drop table t1 ,t2 ,t3,t4,t5,t6,temp_1;
+disconnect con1;
+connection con2;
+drop table t1 ,t2 ,t3,t4,t5,t6;
+disconnect con2;
+connection default;
+drop procedure populate_tables;
+create temporary table prep_1(c1 int not null,
+c2 int not null,
+c3 char(255) not null,
+c4 text(6000) not null,
+c5 blob(6000) not null,
+c6 varchar(2000) not null,
+c7 varchar(2000) not null,
+c8 datetime,
+c9 decimal(6,3),
+index (c3,c4(50),c5(50)),
+index (c2))
+engine=innodb;
+PREPARE stm FROM "insert into prep_1 values(?,?,repeat(concat(' tc3_',?),30),repeat(concat(' tc4_',?),800),repeat(concat(' tc_',?),800),repeat(concat(' tc6_',?),245),repeat(concat(' tc7_',?),245),now(),(100.55+?))";
+set @var = 5;
+set @var_static = 5;
+EXECUTE stm USING @var,@var,@var,@var,@var,@var,@var,@var;
+EXECUTE stm USING @var_static,@var_static,@var_static,@var_static,@var_static,@var_static,@var_static,@var_static;
+set @var = @var - 1;
+EXECUTE stm USING @var,@var,@var,@var,@var,@var,@var,@var;
+EXECUTE stm USING @var_static,@var_static,@var_static,@var_static,@var_static,@var_static,@var_static,@var_static;
+set @var = @var - 1;
+EXECUTE stm USING @var,@var,@var,@var,@var,@var,@var,@var;
+EXECUTE stm USING @var_static,@var_static,@var_static,@var_static,@var_static,@var_static,@var_static,@var_static;
+set @var = @var - 1;
+EXECUTE stm USING @var,@var,@var,@var,@var,@var,@var,@var;
+EXECUTE stm USING @var_static,@var_static,@var_static,@var_static,@var_static,@var_static,@var_static,@var_static;
+set @var = @var - 1;
+EXECUTE stm USING @var,@var,@var,@var,@var,@var,@var,@var;
+EXECUTE stm USING @var_static,@var_static,@var_static,@var_static,@var_static,@var_static,@var_static,@var_static;
+set @var = @var - 1;
+select c1,left(c3,15) from prep_1 order by c1 ;
+c1	left(c3,15)
+1	 tc3_1 tc3_1 tc
+2	 tc3_2 tc3_2 tc
+3	 tc3_3 tc3_3 tc
+4	 tc3_4 tc3_4 tc
+5	 tc3_5 tc3_5 tc
+5	 tc3_5 tc3_5 tc
+5	 tc3_5 tc3_5 tc
+5	 tc3_5 tc3_5 tc
+5	 tc3_5 tc3_5 tc
+5	 tc3_5 tc3_5 tc
+select count(*) from prep_1;
+count(*)
+10
+PREPARE stm_1 FROM "UPDATE prep_1 SET c1 = c1 + 1";
+EXECUTE stm_1;
+EXECUTE stm_1;
+select c1,left(c3,15) from prep_1 order by c1 ;
+c1	left(c3,15)
+3	 tc3_1 tc3_1 tc
+4	 tc3_2 tc3_2 tc
+5	 tc3_3 tc3_3 tc
+6	 tc3_4 tc3_4 tc
+7	 tc3_5 tc3_5 tc
+7	 tc3_5 tc3_5 tc
+7	 tc3_5 tc3_5 tc
+7	 tc3_5 tc3_5 tc
+7	 tc3_5 tc3_5 tc
+7	 tc3_5 tc3_5 tc
+select count(*) from prep_1;
+count(*)
+10
+PREPARE stm_2 FROM "DELETE FROM prep_1 ORDER BY c1 LIMIT 1";
+EXECUTE stm_2;
+EXECUTE stm_2;
+select c1,left(c3,15) from prep_1 order by c1 ;
+c1	left(c3,15)
+5	 tc3_3 tc3_3 tc
+6	 tc3_4 tc3_4 tc
+7	 tc3_5 tc3_5 tc
+7	 tc3_5 tc3_5 tc
+7	 tc3_5 tc3_5 tc
+7	 tc3_5 tc3_5 tc
+7	 tc3_5 tc3_5 tc
+7	 tc3_5 tc3_5 tc
+select count(*) from prep_1;
+count(*)
+8
+drop prepare stm;
+drop prepare stm_1;
+drop prepare stm_2;
+drop table prep_1;
diff --git a/mysql-test/suite/innodb_zip/r/wl6501_1.result b/mysql-test/suite/innodb_zip/r/wl6501_1.result
new file mode 100644
index 00000000000..bf901804be5
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/r/wl6501_1.result
@@ -0,0 +1,1202 @@
+set global innodb_file_per_table=on;
+# Verify that 'TRUNCATE TABLE' statement works fine and the size
+# of .ibd file is equal to the initial size after truncation.
+drop table if exists t1,t2,t3,t4,t6;
+Warnings:
+Note	1051	Unknown table 'test.t1'
+Note	1051	Unknown table 'test.t2'
+Note	1051	Unknown table 'test.t3'
+Note	1051	Unknown table 'test.t4'
+Note	1051	Unknown table 'test.t6'
+create table t1(c1 int not null,
+c2 int not null,
+c3 char(255) not null,
+c4 text(500) not null,
+c5 blob(500) not null,
+c6 varchar(500) not null,
+c7 varchar(500) not null,
+c8 datetime,
+c9 decimal(5,3),
+primary key (c1),
+index (c3,c4(50),c5(50)),
+index (c2))
+engine=innodb row_format=redundant;
+create table t2(c1 int not null,
+c2 int not null,
+c3 char(255) not null,
+c4 text(500) not null,
+c5 blob(500) not null,
+c6 varchar(500) not null,
+c7 varchar(500) not null,
+c8 datetime,
+c9 decimal(5,3),
+primary key (c1),
+index (c3,c4(50),c5(50)),
+index (c2))
+engine=innodb row_format=compact;
+create table t3(c1 int not null,
+c2 int not null,
+c3 char(255) not null,
+c4 text(500) not null,
+c5 blob(500) not null,
+c6 varchar(500) not null,
+c7 varchar(500) not null,
+c8 datetime,
+c9 decimal(5,3),
+primary key (c1),
+index (c3,c4(50),c5(50)),
+index (c2))
+engine=innodb row_format=compressed key_block_size=4;
+create table t4(c1 int not null,
+c2 int not null,
+c3 char(255) not null,
+c4 text(500) not null,
+c5 blob(500) not null,
+c6 varchar(500) not null,
+c7 varchar(500) not null,
+c8 datetime,
+c9 decimal(5,3),
+primary key (c1),
+index (c3,c4(50),c5(50)),
+index (c2))
+engine=innodb row_format=dynamic;
+create temporary table t5(c1 int not null,
+c2 int not null,
+c3 char(255) not null,
+c4 text(500) not null,
+c5 blob(500) not null,
+c6 varchar(500) not null,
+c7 varchar(500) not null,
+c8 datetime,
+c9 decimal(5,3),
+primary key (c1),
+index (c3,c4(50),c5(50)),
+index (c2))
+engine=innodb;
+create table t6 ( a int ) engine = innodb;
+insert into t6 values (50),(100),(150);
+select count(*) from t1;
+count(*)
+3
+select count(*) from t2;
+count(*)
+3
+select count(*) from t3;
+count(*)
+3
+select count(*) from t4;
+count(*)
+3
+select count(*) from t5;
+count(*)
+3
+select count(*) from t6;
+count(*)
+3
+set session debug="+d,ib_trunc_crash_during_drop_index_temp_table";
+Warnings:
+Warning	1287	'@@debug' is deprecated and will be removed in a future release. Please use '@@debug_dbug' instead
+"---debug ib_trunc_crash_during_drop_index_temp_table point---"
+# Write file to make mysql-test-run.pl expect crash and restart
+# Run the crashing query
+truncate table t5;
+ERROR HY000: Lost connection to MySQL server during query
+# Restart the MySQL server
+select count(*) from t1;
+count(*)
+3
+select count(*) from t2;
+count(*)
+3
+select count(*) from t3;
+count(*)
+3
+select count(*) from t4;
+count(*)
+3
+select count(*) from t5;
+ERROR 42S02: Table 'test.t5' doesn't exist
+select count(*) from t6;
+count(*)
+3
+set session debug="+d,ib_trunc_crash_on_drop_of_sec_index";
+Warnings:
+Warning	1287	'@@debug' is deprecated and will be removed in a future release. Please use '@@debug_dbug' instead
+"---debug ib_trunc_crash_on_drop_of_sec_index point---"
+# Write file to make mysql-test-run.pl expect crash and restart
+# Run the crashing query
+truncate table t1;
+ERROR HY000: Lost connection to MySQL server during query
+# Restart the MySQL server
+select count(*) from t1;
+count(*)
+0
+select count(*) from t2;
+count(*)
+3
+select count(*) from t3;
+count(*)
+3
+select count(*) from t4;
+count(*)
+3
+select count(*) from t5;
+ERROR 42S02: Table 'test.t5' doesn't exist
+select count(*) from t6;
+count(*)
+3
+set session debug="+d,ib_trunc_crash_on_drop_of_sec_index";
+Warnings:
+Warning	1287	'@@debug' is deprecated and will be removed in a future release. Please use '@@debug_dbug' instead
+"---debug ib_trunc_crash_on_drop_of_sec_index point---"
+# Write file to make mysql-test-run.pl expect crash and restart
+# Run the crashing query
+truncate table t2;
+ERROR HY000: Lost connection to MySQL server during query
+# Restart the MySQL server
+select count(*) from t1;
+count(*)
+0
+select count(*) from t2;
+count(*)
+0
+select count(*) from t3;
+count(*)
+3
+select count(*) from t4;
+count(*)
+3
+select count(*) from t5;
+ERROR 42S02: Table 'test.t5' doesn't exist
+select count(*) from t6;
+count(*)
+3
+set session debug="+d,ib_trunc_crash_on_drop_of_sec_index";
+Warnings:
+Warning	1287	'@@debug' is deprecated and will be removed in a future release. Please use '@@debug_dbug' instead
+"---debug ib_trunc_crash_on_drop_of_sec_index point---"
+# Write file to make mysql-test-run.pl expect crash and restart
+# Run the crashing query
+truncate table t3;
+ERROR HY000: Lost connection to MySQL server during query
+# Restart the MySQL server
+select count(*) from t1;
+count(*)
+0
+select count(*) from t2;
+count(*)
+0
+select count(*) from t3;
+count(*)
+0
+select count(*) from t4;
+count(*)
+3
+select count(*) from t5;
+ERROR 42S02: Table 'test.t5' doesn't exist
+select count(*) from t6;
+count(*)
+3
+set session debug="+d,ib_trunc_crash_on_drop_of_sec_index";
+Warnings:
+Warning	1287	'@@debug' is deprecated and will be removed in a future release. Please use '@@debug_dbug' instead
+"---debug ib_trunc_crash_on_drop_of_sec_index point---"
+# Write file to make mysql-test-run.pl expect crash and restart
+# Run the crashing query
+truncate table t4;
+ERROR HY000: Lost connection to MySQL server during query
+# Restart the MySQL server
+select count(*) from t1;
+count(*)
+0
+select count(*) from t2;
+count(*)
+0
+select count(*) from t3;
+count(*)
+0
+select count(*) from t4;
+count(*)
+0
+select count(*) from t5;
+ERROR 42S02: Table 'test.t5' doesn't exist
+select count(*) from t6;
+count(*)
+3
+drop table t1, t2, t3, t4, t6;
+create table t1(c1 int not null,
+c2 int not null,
+c3 char(255) not null,
+c4 text(500) not null,
+c5 blob(500) not null,
+c6 varchar(500) not null,
+c7 varchar(500) not null,
+c8 datetime,
+c9 decimal(5,3),
+primary key (c1),
+index (c3,c4(50),c5(50)),
+index (c2))
+engine=innodb row_format=redundant;
+create table t2(c1 int not null,
+c2 int not null,
+c3 char(255) not null,
+c4 text(500) not null,
+c5 blob(500) not null,
+c6 varchar(500) not null,
+c7 varchar(500) not null,
+c8 datetime,
+c9 decimal(5,3),
+primary key (c1),
+index (c3,c4(50),c5(50)),
+index (c2))
+engine=innodb row_format=compact;
+create table t3(c1 int not null,
+c2 int not null,
+c3 char(255) not null,
+c4 text(500) not null,
+c5 blob(500) not null,
+c6 varchar(500) not null,
+c7 varchar(500) not null,
+c8 datetime,
+c9 decimal(5,3),
+primary key (c1),
+index (c3,c4(50),c5(50)),
+index (c2))
+engine=innodb row_format=compressed key_block_size=4;
+create table t4(c1 int not null,
+c2 int not null,
+c3 char(255) not null,
+c4 text(500) not null,
+c5 blob(500) not null,
+c6 varchar(500) not null,
+c7 varchar(500) not null,
+c8 datetime,
+c9 decimal(5,3),
+primary key (c1),
+index (c3,c4(50),c5(50)),
+index (c2))
+engine=innodb row_format=dynamic;
+create temporary table t5(c1 int not null,
+c2 int not null,
+c3 char(255) not null,
+c4 text(500) not null,
+c5 blob(500) not null,
+c6 varchar(500) not null,
+c7 varchar(500) not null,
+c8 datetime,
+c9 decimal(5,3),
+primary key (c1),
+index (c3,c4(50),c5(50)),
+index (c2))
+engine=innodb;
+create table t6 ( a int ) engine = innodb;
+insert into t6 values (50),(100),(150);
+select count(*) from t1;
+count(*)
+3
+select count(*) from t2;
+count(*)
+3
+select count(*) from t3;
+count(*)
+3
+select count(*) from t4;
+count(*)
+3
+select count(*) from t5;
+count(*)
+3
+select count(*) from t6;
+count(*)
+3
+set session debug="+d,ib_trunc_crash_drop_reinit_done_create_to_start";
+Warnings:
+Warning	1287	'@@debug' is deprecated and will be removed in a future release. Please use '@@debug_dbug' instead
+"---debug ib_trunc_crash_drop_reinit_done_create_to_start---"
+# Write file to make mysql-test-run.pl expect crash and restart
+# Run the crashing query
+truncate table t5;
+ERROR HY000: Lost connection to MySQL server during query
+# Restart the MySQL server
+select count(*) from t1;
+count(*)
+3
+select count(*) from t2;
+count(*)
+3
+select count(*) from t3;
+count(*)
+3
+select count(*) from t4;
+count(*)
+3
+select count(*) from t5;
+ERROR 42S02: Table 'test.t5' doesn't exist
+select count(*) from t6;
+count(*)
+3
+set session debug="+d,ib_trunc_crash_on_create_of_sec_index";
+Warnings:
+Warning	1287	'@@debug' is deprecated and will be removed in a future release. Please use '@@debug_dbug' instead
+"---debug ib_trunc_crash_on_create_of_sec_index---"
+# Write file to make mysql-test-run.pl expect crash and restart
+# Run the crashing query
+truncate table t1;
+ERROR HY000: Lost connection to MySQL server during query
+# Restart the MySQL server
+select count(*) from t1;
+count(*)
+0
+select count(*) from t2;
+count(*)
+3
+select count(*) from t3;
+count(*)
+3
+select count(*) from t4;
+count(*)
+3
+select count(*) from t5;
+ERROR 42S02: Table 'test.t5' doesn't exist
+select count(*) from t6;
+count(*)
+3
+set session debug="+d,ib_trunc_crash_on_create_of_sec_index";
+Warnings:
+Warning	1287	'@@debug' is deprecated and will be removed in a future release. Please use '@@debug_dbug' instead
+"---debug ib_trunc_crash_on_create_of_sec_index---"
+# Write file to make mysql-test-run.pl expect crash and restart
+# Run the crashing query
+truncate table t2;
+ERROR HY000: Lost connection to MySQL server during query
+# Restart the MySQL server
+select count(*) from t1;
+count(*)
+0
+select count(*) from t2;
+count(*)
+0
+select count(*) from t3;
+count(*)
+3
+select count(*) from t4;
+count(*)
+3
+select count(*) from t5;
+ERROR 42S02: Table 'test.t5' doesn't exist
+select count(*) from t6;
+count(*)
+3
+set session debug="+d,ib_trunc_crash_on_create_of_sec_index";
+Warnings:
+Warning	1287	'@@debug' is deprecated and will be removed in a future release. Please use '@@debug_dbug' instead
+"---debug ib_trunc_crash_on_create_of_sec_index---"
+# Write file to make mysql-test-run.pl expect crash and restart
+# Run the crashing query
+truncate table t3;
+ERROR HY000: Lost connection to MySQL server during query
+# Restart the MySQL server
+select count(*) from t1;
+count(*)
+0
+select count(*) from t2;
+count(*)
+0
+select count(*) from t3;
+count(*)
+0
+select count(*) from t4;
+count(*)
+3
+select count(*) from t5;
+ERROR 42S02: Table 'test.t5' doesn't exist
+select count(*) from t6;
+count(*)
+3
+set session debug="+d,ib_trunc_crash_on_create_of_sec_index";
+Warnings:
+Warning	1287	'@@debug' is deprecated and will be removed in a future release. Please use '@@debug_dbug' instead
+"---debug ib_trunc_crash_on_create_of_sec_index---"
+# Write file to make mysql-test-run.pl expect crash and restart
+# Run the crashing query
+truncate table t4;
+ERROR HY000: Lost connection to MySQL server during query
+# Restart the MySQL server
+select count(*) from t1;
+count(*)
+0
+select count(*) from t2;
+count(*)
+0
+select count(*) from t3;
+count(*)
+0
+select count(*) from t4;
+count(*)
+0
+select count(*) from t5;
+ERROR 42S02: Table 'test.t5' doesn't exist
+select count(*) from t6;
+count(*)
+3
+drop table t1, t2, t3, t4, t6;
+create table t1(c1 int not null,
+c2 int not null,
+c3 char(255) not null,
+c4 text(500) not null,
+c5 blob(500) not null,
+c6 varchar(500) not null,
+c7 varchar(500) not null,
+c8 datetime,
+c9 decimal(5,3),
+primary key (c1),
+index (c3,c4(50),c5(50)),
+index (c2))
+engine=innodb row_format=redundant;
+create table t2(c1 int not null,
+c2 int not null,
+c3 char(255) not null,
+c4 text(500) not null,
+c5 blob(500) not null,
+c6 varchar(500) not null,
+c7 varchar(500) not null,
+c8 datetime,
+c9 decimal(5,3),
+primary key (c1),
+index (c3,c4(50),c5(50)),
+index (c2))
+engine=innodb row_format=compact;
+create table t3(c1 int not null,
+c2 int not null,
+c3 char(255) not null,
+c4 text(500) not null,
+c5 blob(500) not null,
+c6 varchar(500) not null,
+c7 varchar(500) not null,
+c8 datetime,
+c9 decimal(5,3),
+primary key (c1),
+index (c3,c4(50),c5(50)),
+index (c2))
+engine=innodb row_format=compressed key_block_size=4;
+create table t4(c1 int not null,
+c2 int not null,
+c3 char(255) not null,
+c4 text(500) not null,
+c5 blob(500) not null,
+c6 varchar(500) not null,
+c7 varchar(500) not null,
+c8 datetime,
+c9 decimal(5,3),
+primary key (c1),
+index (c3,c4(50),c5(50)),
+index (c2))
+engine=innodb row_format=dynamic;
+create temporary table t5(c1 int not null,
+c2 int not null,
+c3 char(255) not null,
+c4 text(500) not null,
+c5 blob(500) not null,
+c6 varchar(500) not null,
+c7 varchar(500) not null,
+c8 datetime,
+c9 decimal(5,3),
+primary key (c1),
+index (c3,c4(50),c5(50)),
+index (c2))
+engine=innodb;
+create table t6 ( a int ) engine = innodb;
+insert into t6 values (50),(100),(150);
+select count(*) from t1;
+count(*)
+3
+select count(*) from t2;
+count(*)
+3
+select count(*) from t3;
+count(*)
+3
+select count(*) from t4;
+count(*)
+3
+select count(*) from t5;
+count(*)
+3
+select count(*) from t6;
+count(*)
+3
+set session debug="+d,ib_trunc_crash_before_log_removal";
+Warnings:
+Warning	1287	'@@debug' is deprecated and will be removed in a future release. Please use '@@debug_dbug' instead
+"---debug ib_trunc_crash_before_log_removal point---"
+# Write file to make mysql-test-run.pl expect crash and restart
+# Run the crashing query
+truncate table t1;
+ERROR HY000: Lost connection to MySQL server during query
+# Restart the MySQL server
+select count(*) from t1;
+count(*)
+0
+select count(*) from t2;
+count(*)
+3
+select count(*) from t3;
+count(*)
+3
+select count(*) from t4;
+count(*)
+3
+select count(*) from t5;
+ERROR 42S02: Table 'test.t5' doesn't exist
+select count(*) from t6;
+count(*)
+3
+set session debug="+d,ib_trunc_crash_before_log_removal";
+Warnings:
+Warning	1287	'@@debug' is deprecated and will be removed in a future release. Please use '@@debug_dbug' instead
+"---debug ib_trunc_crash_before_log_removal point---"
+# Write file to make mysql-test-run.pl expect crash and restart
+# Run the crashing query
+truncate table t2;
+ERROR HY000: Lost connection to MySQL server during query
+# Restart the MySQL server
+select count(*) from t1;
+count(*)
+0
+select count(*) from t2;
+count(*)
+0
+select count(*) from t3;
+count(*)
+3
+select count(*) from t4;
+count(*)
+3
+select count(*) from t5;
+ERROR 42S02: Table 'test.t5' doesn't exist
+select count(*) from t6;
+count(*)
+3
+set session debug="+d,ib_trunc_crash_before_log_removal";
+Warnings:
+Warning	1287	'@@debug' is deprecated and will be removed in a future release. Please use '@@debug_dbug' instead
+"---debug ib_trunc_crash_before_log_removal point---"
+# Write file to make mysql-test-run.pl expect crash and restart
+# Run the crashing query
+truncate table t3;
+ERROR HY000: Lost connection to MySQL server during query
+# Restart the MySQL server
+select count(*) from t1;
+count(*)
+0
+select count(*) from t2;
+count(*)
+0
+select count(*) from t3;
+count(*)
+0
+select count(*) from t4;
+count(*)
+3
+select count(*) from t5;
+ERROR 42S02: Table 'test.t5' doesn't exist
+select count(*) from t6;
+count(*)
+3
+set session debug="+d,ib_trunc_crash_before_log_removal";
+Warnings:
+Warning	1287	'@@debug' is deprecated and will be removed in a future release. Please use '@@debug_dbug' instead
+"---debug ib_trunc_crash_before_log_removal point---"
+# Write file to make mysql-test-run.pl expect crash and restart
+# Run the crashing query
+truncate table t4;
+ERROR HY000: Lost connection to MySQL server during query
+# Restart the MySQL server
+select count(*) from t1;
+count(*)
+0
+select count(*) from t2;
+count(*)
+0
+select count(*) from t3;
+count(*)
+0
+select count(*) from t4;
+count(*)
+0
+select count(*) from t5;
+ERROR 42S02: Table 'test.t5' doesn't exist
+select count(*) from t6;
+count(*)
+3
+drop table t1, t2, t3, t4, t6;
+create table t1(c1 int not null,
+c2 int not null,
+c3 char(255) not null,
+c4 text(500) not null,
+c5 blob(500) not null,
+c6 varchar(500) not null,
+c7 varchar(500) not null,
+c8 datetime,
+c9 decimal(5,3),
+primary key (c1),
+index (c3,c4(50),c5(50)),
+index (c2))
+engine=innodb row_format=redundant;
+create table t2(c1 int not null,
+c2 int not null,
+c3 char(255) not null,
+c4 text(500) not null,
+c5 blob(500) not null,
+c6 varchar(500) not null,
+c7 varchar(500) not null,
+c8 datetime,
+c9 decimal(5,3),
+primary key (c1),
+index (c3,c4(50),c5(50)),
+index (c2))
+engine=innodb row_format=compact;
+create table t3(c1 int not null,
+c2 int not null,
+c3 char(255) not null,
+c4 text(500) not null,
+c5 blob(500) not null,
+c6 varchar(500) not null,
+c7 varchar(500) not null,
+c8 datetime,
+c9 decimal(5,3),
+primary key (c1),
+index (c3,c4(50),c5(50)),
+index (c2))
+engine=innodb row_format=compressed key_block_size=4;
+create table t4(c1 int not null,
+c2 int not null,
+c3 char(255) not null,
+c4 text(500) not null,
+c5 blob(500) not null,
+c6 varchar(500) not null,
+c7 varchar(500) not null,
+c8 datetime,
+c9 decimal(5,3),
+primary key (c1),
+index (c3,c4(50),c5(50)),
+index (c2))
+engine=innodb row_format=dynamic;
+create temporary table t5(c1 int not null,
+c2 int not null,
+c3 char(255) not null,
+c4 text(500) not null,
+c5 blob(500) not null,
+c6 varchar(500) not null,
+c7 varchar(500) not null,
+c8 datetime,
+c9 decimal(5,3),
+primary key (c1),
+index (c3,c4(50),c5(50)),
+index (c2))
+engine=innodb;
+create table t6 ( a int ) engine = innodb;
+insert into t6 values (50),(100),(150);
+select count(*) from t1;
+count(*)
+3
+select count(*) from t2;
+count(*)
+3
+select count(*) from t3;
+count(*)
+3
+select count(*) from t4;
+count(*)
+3
+select count(*) from t5;
+count(*)
+3
+select count(*) from t6;
+count(*)
+3
+set session debug="+d,ib_trunc_crash_after_truncate_done";
+Warnings:
+Warning	1287	'@@debug' is deprecated and will be removed in a future release. Please use '@@debug_dbug' instead
+"---debug ib_trunc_crash_after_truncate_done point---"
+# Write file to make mysql-test-run.pl expect crash and restart
+# Run the crashing query
+truncate table t1;
+ERROR HY000: Lost connection to MySQL server during query
+# Restart the MySQL server
+select count(*) from t1;
+count(*)
+0
+select count(*) from t2;
+count(*)
+3
+select count(*) from t3;
+count(*)
+3
+select count(*) from t4;
+count(*)
+3
+select count(*) from t5;
+ERROR 42S02: Table 'test.t5' doesn't exist
+select count(*) from t6;
+count(*)
+3
+set session debug="+d,ib_trunc_crash_after_truncate_done";
+Warnings:
+Warning	1287	'@@debug' is deprecated and will be removed in a future release. Please use '@@debug_dbug' instead
+"---debug ib_trunc_crash_after_truncate_done point---"
+# Write file to make mysql-test-run.pl expect crash and restart
+# Run the crashing query
+truncate table t2;
+ERROR HY000: Lost connection to MySQL server during query
+# Restart the MySQL server
+select count(*) from t1;
+count(*)
+0
+select count(*) from t2;
+count(*)
+0
+select count(*) from t3;
+count(*)
+3
+select count(*) from t4;
+count(*)
+3
+select count(*) from t5;
+ERROR 42S02: Table 'test.t5' doesn't exist
+select count(*) from t6;
+count(*)
+3
+set session debug="+d,ib_trunc_crash_after_truncate_done";
+Warnings:
+Warning	1287	'@@debug' is deprecated and will be removed in a future release. Please use '@@debug_dbug' instead
+"---debug ib_trunc_crash_after_truncate_done point---"
+# Write file to make mysql-test-run.pl expect crash and restart
+# Run the crashing query
+truncate table t3;
+ERROR HY000: Lost connection to MySQL server during query
+# Restart the MySQL server
+select count(*) from t1;
+count(*)
+0
+select count(*) from t2;
+count(*)
+0
+select count(*) from t3;
+count(*)
+0
+select count(*) from t4;
+count(*)
+3
+select count(*) from t5;
+ERROR 42S02: Table 'test.t5' doesn't exist
+select count(*) from t6;
+count(*)
+3
+set session debug="+d,ib_trunc_crash_after_truncate_done";
+Warnings:
+Warning	1287	'@@debug' is deprecated and will be removed in a future release. Please use '@@debug_dbug' instead
+"---debug ib_trunc_crash_after_truncate_done point---"
+# Write file to make mysql-test-run.pl expect crash and restart
+# Run the crashing query
+truncate table t4;
+ERROR HY000: Lost connection to MySQL server during query
+# Restart the MySQL server
+select count(*) from t1;
+count(*)
+0
+select count(*) from t2;
+count(*)
+0
+select count(*) from t3;
+count(*)
+0
+select count(*) from t4;
+count(*)
+0
+select count(*) from t5;
+ERROR 42S02: Table 'test.t5' doesn't exist
+select count(*) from t6;
+count(*)
+3
+drop table t1, t2, t3, t4, t6;
+create table t1(c1 int not null,
+c2 int not null,
+c3 char(255) not null,
+c4 text(500) not null,
+c5 blob(500) not null,
+c6 varchar(500) not null,
+c7 varchar(500) not null,
+c8 datetime,
+c9 decimal(5,3),
+primary key (c1),
+index (c3,c4(50),c5(50)),
+index (c2))
+engine=innodb row_format=redundant;
+create table t2(c1 int not null,
+c2 int not null,
+c3 char(255) not null,
+c4 text(500) not null,
+c5 blob(500) not null,
+c6 varchar(500) not null,
+c7 varchar(500) not null,
+c8 datetime,
+c9 decimal(5,3),
+primary key (c1),
+index (c3,c4(50),c5(50)),
+index (c2))
+engine=innodb row_format=compact;
+create table t3(c1 int not null,
+c2 int not null,
+c3 char(255) not null,
+c4 text(500) not null,
+c5 blob(500) not null,
+c6 varchar(500) not null,
+c7 varchar(500) not null,
+c8 datetime,
+c9 decimal(5,3),
+primary key (c1),
+index (c3,c4(50),c5(50)),
+index (c2))
+engine=innodb row_format=compressed key_block_size=4;
+create table t4(c1 int not null,
+c2 int not null,
+c3 char(255) not null,
+c4 text(500) not null,
+c5 blob(500) not null,
+c6 varchar(500) not null,
+c7 varchar(500) not null,
+c8 datetime,
+c9 decimal(5,3),
+primary key (c1),
+index (c3,c4(50),c5(50)),
+index (c2))
+engine=innodb row_format=dynamic;
+create temporary table t5(c1 int not null,
+c2 int not null,
+c3 char(255) not null,
+c4 text(500) not null,
+c5 blob(500) not null,
+c6 varchar(500) not null,
+c7 varchar(500) not null,
+c8 datetime,
+c9 decimal(5,3),
+primary key (c1),
+index (c3,c4(50),c5(50)),
+index (c2))
+engine=innodb;
+create table t6 ( a int ) engine = innodb;
+insert into t6 values (50),(100),(150);
+select count(*) from t1;
+count(*)
+3
+select count(*) from t2;
+count(*)
+3
+select count(*) from t3;
+count(*)
+3
+select count(*) from t4;
+count(*)
+3
+select count(*) from t5;
+count(*)
+3
+select count(*) from t6;
+count(*)
+3
+set session debug="+d,ib_trunc_crash_after_truncate_done";
+Warnings:
+Warning	1287	'@@debug' is deprecated and will be removed in a future release. Please use '@@debug_dbug' instead
+"---debug ib_trunc_crash_after_truncate_done point---"
+# Write file to make mysql-test-run.pl expect crash and restart
+# Run the crashing query
+truncate table t1;
+ERROR HY000: Lost connection to MySQL server during query
+# Restart the MySQL server
+select count(*) from t1;
+count(*)
+0
+select count(*) from t2;
+count(*)
+3
+select count(*) from t3;
+count(*)
+3
+select count(*) from t4;
+count(*)
+3
+select count(*) from t5;
+ERROR 42S02: Table 'test.t5' doesn't exist
+select count(*) from t6;
+count(*)
+3
+set session debug="+d,ib_trunc_crash_after_truncate_done";
+Warnings:
+Warning	1287	'@@debug' is deprecated and will be removed in a future release. Please use '@@debug_dbug' instead
+"---debug ib_trunc_crash_after_truncate_done point---"
+# Write file to make mysql-test-run.pl expect crash and restart
+# Run the crashing query
+truncate table t2;
+ERROR HY000: Lost connection to MySQL server during query
+# Restart the MySQL server
+select count(*) from t1;
+count(*)
+0
+select count(*) from t2;
+count(*)
+0
+select count(*) from t3;
+count(*)
+3
+select count(*) from t4;
+count(*)
+3
+select count(*) from t5;
+ERROR 42S02: Table 'test.t5' doesn't exist
+select count(*) from t6;
+count(*)
+3
+set session debug="+d,ib_trunc_crash_after_truncate_done";
+Warnings:
+Warning	1287	'@@debug' is deprecated and will be removed in a future release. Please use '@@debug_dbug' instead
+"---debug ib_trunc_crash_after_truncate_done point---"
+# Write file to make mysql-test-run.pl expect crash and restart
+# Run the crashing query
+truncate table t3;
+ERROR HY000: Lost connection to MySQL server during query
+# Restart the MySQL server
+select count(*) from t1;
+count(*)
+0
+select count(*) from t2;
+count(*)
+0
+select count(*) from t3;
+count(*)
+0
+select count(*) from t4;
+count(*)
+3
+select count(*) from t5;
+ERROR 42S02: Table 'test.t5' doesn't exist
+select count(*) from t6;
+count(*)
+3
+set session debug="+d,ib_trunc_crash_after_truncate_done";
+Warnings:
+Warning	1287	'@@debug' is deprecated and will be removed in a future release. Please use '@@debug_dbug' instead
+"---debug ib_trunc_crash_after_truncate_done point---"
+# Write file to make mysql-test-run.pl expect crash and restart
+# Run the crashing query
+truncate table t4;
+ERROR HY000: Lost connection to MySQL server during query
+# Restart the MySQL server
+select count(*) from t1;
+count(*)
+0
+select count(*) from t2;
+count(*)
+0
+select count(*) from t3;
+count(*)
+0
+select count(*) from t4;
+count(*)
+0
+select count(*) from t5;
+ERROR 42S02: Table 'test.t5' doesn't exist
+select count(*) from t6;
+count(*)
+3
+drop table t1, t2, t3, t4, t6;
+create table t1(c1 int not null,
+c2 int not null,
+c3 char(255) not null,
+c4 text(500) not null,
+c5 blob(500) not null,
+c6 varchar(500) not null,
+c7 varchar(500) not null,
+c8 datetime,
+c9 decimal(5,3),
+primary key (c1),
+index (c3,c4(50),c5(50)),
+index (c2))
+engine=innodb row_format=redundant;
+create table t2(c1 int not null,
+c2 int not null,
+c3 char(255) not null,
+c4 text(500) not null,
+c5 blob(500) not null,
+c6 varchar(500) not null,
+c7 varchar(500) not null,
+c8 datetime,
+c9 decimal(5,3),
+primary key (c1),
+index (c3,c4(50),c5(50)),
+index (c2))
+engine=innodb row_format=compact;
+create table t3(c1 int not null,
+c2 int not null,
+c3 char(255) not null,
+c4 text(500) not null,
+c5 blob(500) not null,
+c6 varchar(500) not null,
+c7 varchar(500) not null,
+c8 datetime,
+c9 decimal(5,3),
+primary key (c1),
+index (c3,c4(50),c5(50)),
+index (c2))
+engine=innodb row_format=compressed key_block_size=4;
+create table t4(c1 int not null,
+c2 int not null,
+c3 char(255) not null,
+c4 text(500) not null,
+c5 blob(500) not null,
+c6 varchar(500) not null,
+c7 varchar(500) not null,
+c8 datetime,
+c9 decimal(5,3),
+primary key (c1),
+index (c3,c4(50),c5(50)),
+index (c2))
+engine=innodb row_format=dynamic;
+create temporary table t5(c1 int not null,
+c2 int not null,
+c3 char(255) not null,
+c4 text(500) not null,
+c5 blob(500) not null,
+c6 varchar(500) not null,
+c7 varchar(500) not null,
+c8 datetime,
+c9 decimal(5,3),
+primary key (c1),
+index (c3,c4(50),c5(50)),
+index (c2))
+engine=innodb;
+create table t6 ( a int ) engine = innodb;
+insert into t6 values (50),(100),(150);
+select count(*) from t1;
+count(*)
+3
+select count(*) from t2;
+count(*)
+3
+select count(*) from t3;
+count(*)
+3
+select count(*) from t4;
+count(*)
+3
+select count(*) from t5;
+count(*)
+3
+select count(*) from t6;
+count(*)
+3
+set session debug="+d,ib_trunc_crash_after_redo_log_write_complete";
+Warnings:
+Warning	1287	'@@debug' is deprecated and will be removed in a future release. Please use '@@debug_dbug' instead
+"---debug ib_trunc_crash_after_redo_log_write_complete point---"
+# Write file to make mysql-test-run.pl expect crash and restart
+# Run the crashing query
+truncate table t1;
+ERROR HY000: Lost connection to MySQL server during query
+# Restart the MySQL server
+select count(*) from t1;
+count(*)
+0
+select count(*) from t2;
+count(*)
+3
+select count(*) from t3;
+count(*)
+3
+select count(*) from t4;
+count(*)
+3
+select count(*) from t5;
+ERROR 42S02: Table 'test.t5' doesn't exist
+select count(*) from t6;
+count(*)
+3
+set session debug="+d,ib_trunc_crash_after_redo_log_write_complete";
+Warnings:
+Warning	1287	'@@debug' is deprecated and will be removed in a future release. Please use '@@debug_dbug' instead
+"---debug ib_trunc_crash_after_redo_log_write_complete point---"
+# Write file to make mysql-test-run.pl expect crash and restart
+# Run the crashing query
+truncate table t2;
+ERROR HY000: Lost connection to MySQL server during query
+# Restart the MySQL server
+select count(*) from t1;
+count(*)
+0
+select count(*) from t2;
+count(*)
+0
+select count(*) from t3;
+count(*)
+3
+select count(*) from t4;
+count(*)
+3
+select count(*) from t5;
+ERROR 42S02: Table 'test.t5' doesn't exist
+select count(*) from t6;
+count(*)
+3
+set session debug="+d,ib_trunc_crash_after_redo_log_write_complete";
+Warnings:
+Warning	1287	'@@debug' is deprecated and will be removed in a future release. Please use '@@debug_dbug' instead
+"---debug ib_trunc_crash_after_redo_log_write_complete point---"
+# Write file to make mysql-test-run.pl expect crash and restart
+# Run the crashing query
+truncate table t3;
+ERROR HY000: Lost connection to MySQL server during query
+# Restart the MySQL server
+select count(*) from t1;
+count(*)
+0
+select count(*) from t2;
+count(*)
+0
+select count(*) from t3;
+count(*)
+0
+select count(*) from t4;
+count(*)
+3
+select count(*) from t5;
+ERROR 42S02: Table 'test.t5' doesn't exist
+select count(*) from t6;
+count(*)
+3
+set session debug="+d,ib_trunc_crash_after_redo_log_write_complete";
+Warnings:
+Warning	1287	'@@debug' is deprecated and will be removed in a future release. Please use '@@debug_dbug' instead
+"---debug ib_trunc_crash_after_redo_log_write_complete point---"
+# Write file to make mysql-test-run.pl expect crash and restart
+# Run the crashing query
+truncate table t4;
+ERROR HY000: Lost connection to MySQL server during query
+# Restart the MySQL server
+select count(*) from t1;
+count(*)
+0
+select count(*) from t2;
+count(*)
+0
+select count(*) from t3;
+count(*)
+0
+select count(*) from t4;
+count(*)
+0
+select count(*) from t5;
+ERROR 42S02: Table 'test.t5' doesn't exist
+select count(*) from t6;
+count(*)
+3
+drop table t1, t2, t3, t4, t6;
diff --git a/mysql-test/suite/innodb_zip/r/wl6501_crash_3.result b/mysql-test/suite/innodb_zip/r/wl6501_crash_3.result
new file mode 100644
index 00000000000..e874ab1a37a
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/r/wl6501_crash_3.result
@@ -0,0 +1,501 @@
+call mtr.add_suppression("The file '.*' already exists though the corresponding table did not exist in the InnoDB data dictionary");
+call mtr.add_suppression("Cannot create file '.*'");
+call mtr.add_suppression("InnoDB: Error number 17 means 'File exists'");
+set global innodb_file_per_table = on;
+"1. Hit crash point while writing redo log."
+use test;
+set global innodb_file_per_table = 1;
+set global innodb_file_format = 'Barracuda';
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+SET innodb_strict_mode=OFF;
+create  table t (
+i int, f float, c char,
+primary key pk(i), unique findex(f), index ck(c))
+engine=innodb row_format=compressed
+key_block_size=16;
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+set session debug = "+d,ib_trunc_crash_while_writing_redo_log";
+Warnings:
+Warning	1287	'@@debug' is deprecated and will be removed in a future release. Please use '@@debug_dbug' instead
+truncate table t;
+ERROR HY000: Lost connection to MySQL server during query
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+select * from t where f < 2.5;
+i	f	c
+1	1.1	a
+2	2.2	b
+drop table t;
+"2. Hit crash point on completion of redo log write."
+use test;
+set global innodb_file_per_table = 1;
+set global innodb_file_format = 'Barracuda';
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+SET innodb_strict_mode=OFF;
+create  table t (
+i int, f float, c char,
+primary key pk(i), unique findex(f), index ck(c))
+engine = innodb row_format = compressed
+key_block_size = 16;
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+set session debug = "+d,ib_trunc_crash_after_redo_log_write_complete";
+Warnings:
+Warning	1287	'@@debug' is deprecated and will be removed in a future release. Please use '@@debug_dbug' instead
+truncate table t;
+ERROR HY000: Lost connection to MySQL server during query
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+select * from t;
+i	f	c
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+select * from t where f < 2.5;
+i	f	c
+1	1.1	a
+2	2.2	b
+drop table t;
+"3. Hit crash point while dropping indexes."
+use test;
+set global innodb_file_per_table = 1;
+set global innodb_file_format = 'Barracuda';
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+SET innodb_strict_mode=OFF;
+create  table t (
+i int, f float, c char,
+primary key pk(i), unique findex(f), index ck(c))
+engine = innodb row_format = compressed
+key_block_size = 16;
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+set session debug = "+d,ib_trunc_crash_on_drop_of_clust_index";
+Warnings:
+Warning	1287	'@@debug' is deprecated and will be removed in a future release. Please use '@@debug_dbug' instead
+truncate table t;
+ERROR HY000: Lost connection to MySQL server during query
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+select * from t;
+i	f	c
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+select * from t where f < 2.5;
+i	f	c
+1	1.1	a
+2	2.2	b
+drop table t;
+use test;
+set global innodb_file_per_table = 1;
+set global innodb_file_format = 'Barracuda';
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+SET innodb_strict_mode=OFF;
+create  table t (
+i int, f float, c char,
+primary key pk(i), unique findex(f))
+engine = innodb row_format = compressed
+key_block_size = 16;
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+set session debug = "+d,ib_trunc_crash_on_drop_of_uniq_index";
+Warnings:
+Warning	1287	'@@debug' is deprecated and will be removed in a future release. Please use '@@debug_dbug' instead
+truncate table t;
+ERROR HY000: Lost connection to MySQL server during query
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+select * from t;
+i	f	c
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+select * from t where f < 2.5;
+i	f	c
+1	1.1	a
+2	2.2	b
+drop table t;
+use test;
+set global innodb_file_per_table = 1;
+set global innodb_file_format = 'Barracuda';
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+SET innodb_strict_mode=OFF;
+create  table t (
+i int, f float, c char,
+primary key pk(i), index ck(c))
+engine = innodb row_format = compressed
+key_block_size = 16;
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+set session debug = "+d,ib_trunc_crash_on_drop_of_sec_index";
+Warnings:
+Warning	1287	'@@debug' is deprecated and will be removed in a future release. Please use '@@debug_dbug' instead
+truncate table t;
+ERROR HY000: Lost connection to MySQL server during query
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+select * from t;
+i	f	c
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+select * from t where f < 2.5;
+i	f	c
+1	1.1	a
+2	2.2	b
+drop table t;
+"4. Hit crash point on completing drop of all indexes before creation"
+"   of index is commenced."
+use test;
+set global innodb_file_per_table = 1;
+set global innodb_file_format = 'Barracuda';
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+SET innodb_strict_mode=OFF;
+create  table t (
+i int, f float, c char,
+primary key pk(i), unique findex(f), index ck(c))
+engine = innodb row_format = compressed
+key_block_size = 16;
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+set session debug = "+d,ib_trunc_crash_drop_reinit_done_create_to_start";
+Warnings:
+Warning	1287	'@@debug' is deprecated and will be removed in a future release. Please use '@@debug_dbug' instead
+truncate table t;
+ERROR HY000: Lost connection to MySQL server during query
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+select * from t;
+i	f	c
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+select * from t where f < 2.5;
+i	f	c
+1	1.1	a
+2	2.2	b
+drop table t;
+"5. Hit crash point while creating indexes."
+use test;
+set global innodb_file_per_table = 1;
+set global innodb_file_format = 'Barracuda';
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+SET innodb_strict_mode=OFF;
+create  table t (
+i int, f float, c char,
+primary key pk(i), unique findex(f), index ck(c))
+engine = innodb row_format = compressed
+key_block_size = 16;
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+set session debug = "+d,ib_trunc_crash_on_create_of_clust_index";
+Warnings:
+Warning	1287	'@@debug' is deprecated and will be removed in a future release. Please use '@@debug_dbug' instead
+truncate table t;
+ERROR HY000: Lost connection to MySQL server during query
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+select * from t;
+i	f	c
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+select * from t where f < 2.5;
+i	f	c
+1	1.1	a
+2	2.2	b
+drop table t;
+use test;
+set global innodb_file_per_table = 1;
+set global innodb_file_format = 'Barracuda';
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+SET innodb_strict_mode=OFF;
+create  table t (
+i int, f float, c char,
+primary key pk(i), unique findex(f))
+engine = innodb row_format = compressed
+key_block_size = 16;
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+set session debug = "+d,ib_trunc_crash_on_create_of_uniq_index";
+Warnings:
+Warning	1287	'@@debug' is deprecated and will be removed in a future release. Please use '@@debug_dbug' instead
+truncate table t;
+ERROR HY000: Lost connection to MySQL server during query
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+select * from t;
+i	f	c
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+select * from t where f < 2.5;
+i	f	c
+1	1.1	a
+2	2.2	b
+drop table t;
+use test;
+set global innodb_file_per_table = 1;
+set global innodb_file_format = 'Barracuda';
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+SET innodb_strict_mode=OFF;
+create  table t (
+i int, f float, c char,
+primary key pk(i), index ck(c))
+engine = innodb row_format = compressed
+key_block_size = 16;
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+set session debug = "+d,ib_trunc_crash_on_create_of_sec_index";
+Warnings:
+Warning	1287	'@@debug' is deprecated and will be removed in a future release. Please use '@@debug_dbug' instead
+truncate table t;
+ERROR HY000: Lost connection to MySQL server during query
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+select * from t;
+i	f	c
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+select * from t where f < 2.5;
+i	f	c
+1	1.1	a
+2	2.2	b
+drop table t;
+"6. Hit crash point after data is updated to system-table and"
+"   in-memory dict."
+use test;
+set global innodb_file_per_table = 1;
+set global innodb_file_format = 'Barracuda';
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+SET innodb_strict_mode=OFF;
+create  table t (
+i int, f float, c char,
+primary key pk(i), unique findex(f), index ck(c))
+engine = innodb row_format = compressed
+key_block_size = 16;
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+set session debug = "+d,ib_trunc_crash_on_updating_dict_sys_info";
+Warnings:
+Warning	1287	'@@debug' is deprecated and will be removed in a future release. Please use '@@debug_dbug' instead
+truncate table t;
+ERROR HY000: Lost connection to MySQL server during query
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+select * from t;
+i	f	c
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+select * from t where f < 2.5;
+i	f	c
+1	1.1	a
+2	2.2	b
+drop table t;
+"7. Hit crash point before/after log checkpoint is done."
+use test;
+set global innodb_file_per_table = 1;
+set global innodb_file_format = 'Barracuda';
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+SET innodb_strict_mode=OFF;
+create  table t (
+i int, f float, c char,
+primary key pk(i), unique findex(f), index ck(c))
+engine = innodb row_format = compressed
+key_block_size = 16;
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+set session debug = "+d,ib_trunc_crash_before_log_removal";
+Warnings:
+Warning	1287	'@@debug' is deprecated and will be removed in a future release. Please use '@@debug_dbug' instead
+truncate table t;
+ERROR HY000: Lost connection to MySQL server during query
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+select * from t;
+i	f	c
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t where f < 2.5;
+i	f	c
+1	1.1	a
+2	2.2	b
+drop table t;
+use test;
+set global innodb_file_per_table = 1;
+set global innodb_file_format = 'Barracuda';
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+SET innodb_strict_mode=OFF;
+create  table t (
+i int, f float, c char,
+primary key pk(i), unique findex(f), index ck(c))
+engine = innodb row_format = compressed
+key_block_size = 16;
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+set session debug = "+d,ib_trunc_crash_after_truncate_done";
+Warnings:
+Warning	1287	'@@debug' is deprecated and will be removed in a future release. Please use '@@debug_dbug' instead
+truncate table t;
+ERROR HY000: Lost connection to MySQL server during query
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+select * from t;
+i	f	c
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+select * from t where f < 2.5;
+i	f	c
+1	1.1	a
+2	2.2	b
+drop table t;
+set global innodb_file_format = Barracuda;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+set global innodb_file_per_table = 1;
diff --git a/mysql-test/suite/innodb_zip/r/wl6501_crash_4.result b/mysql-test/suite/innodb_zip/r/wl6501_crash_4.result
new file mode 100644
index 00000000000..cb8a4d5a157
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/r/wl6501_crash_4.result
@@ -0,0 +1,553 @@
+call mtr.add_suppression("The file '.*' already exists though the corresponding table did not exist in the InnoDB data dictionary");
+call mtr.add_suppression("Cannot create file '.*'");
+call mtr.add_suppression("InnoDB: Error number 17 means 'File exists'");
+set global innodb_file_per_table = on;
+"1. Hit crash point while writing redo log."
+use test;
+set global innodb_file_per_table = 1;
+set global innodb_file_format = 'Barracuda';
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+SET innodb_strict_mode=OFF;
+create  table t (
+i int, f float, c char,
+primary key pk(i), unique findex(f), index ck(c))
+engine=innodb row_format=compressed
+key_block_size=4;
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+set session debug = "+d,ib_trunc_crash_while_writing_redo_log";
+truncate table t;
+ERROR HY000: Lost connection to MySQL server during query
+# restart
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+select * from t where f < 2.5;
+i	f	c
+1	1.1	a
+2	2.2	b
+drop table t;
+"2. Hit crash point on completion of redo log write."
+use test;
+set global innodb_file_per_table = 1;
+set global innodb_file_format = 'Barracuda';
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+SET innodb_strict_mode=OFF;
+create  table t (
+i int, f float, c char,
+primary key pk(i), unique findex(f), index ck(c))
+engine = innodb row_format = compressed
+key_block_size = 4;
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+set session debug = "+d,ib_trunc_crash_after_redo_log_write_complete";
+truncate table t;
+ERROR HY000: Lost connection to MySQL server during query
+# restart
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+select * from t;
+i	f	c
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+select * from t where f < 2.5;
+i	f	c
+1	1.1	a
+2	2.2	b
+drop table t;
+"3. Hit crash point while dropping indexes."
+use test;
+set global innodb_file_per_table = 1;
+set global innodb_file_format = 'Barracuda';
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+SET innodb_strict_mode=OFF;
+create  table t (
+i int, f float, c char,
+primary key pk(i), unique findex(f), index ck(c))
+engine = innodb row_format = compressed
+key_block_size = 4;
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+set session debug = "+d,ib_trunc_crash_on_drop_of_clust_index";
+truncate table t;
+ERROR HY000: Lost connection to MySQL server during query
+# restart
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+select * from t;
+i	f	c
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+select * from t where f < 2.5;
+i	f	c
+1	1.1	a
+2	2.2	b
+drop table t;
+use test;
+set global innodb_file_per_table = 1;
+set global innodb_file_format = 'Barracuda';
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+SET innodb_strict_mode=OFF;
+create  table t (
+i int, f float, c char,
+primary key pk(i), unique findex(f))
+engine = innodb row_format = compressed
+key_block_size = 4;
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+set session debug = "+d,ib_trunc_crash_on_drop_of_uniq_index";
+truncate table t;
+ERROR HY000: Lost connection to MySQL server during query
+# restart
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+select * from t;
+i	f	c
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+select * from t where f < 2.5;
+i	f	c
+1	1.1	a
+2	2.2	b
+drop table t;
+use test;
+set global innodb_file_per_table = 1;
+set global innodb_file_format = 'Barracuda';
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+SET innodb_strict_mode=OFF;
+create  table t (
+i int, f float, c char,
+primary key pk(i), index ck(c))
+engine = innodb row_format = compressed
+key_block_size = 4;
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+set session debug = "+d,ib_trunc_crash_on_drop_of_sec_index";
+truncate table t;
+ERROR HY000: Lost connection to MySQL server during query
+# restart
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+select * from t;
+i	f	c
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+select * from t where f < 2.5;
+i	f	c
+1	1.1	a
+2	2.2	b
+drop table t;
+"4. Hit crash point on completing drop of all indexes before creation"
+"   of index is commenced."
+use test;
+set global innodb_file_per_table = 1;
+set global innodb_file_format = 'Barracuda';
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+SET innodb_strict_mode=OFF;
+create  table t (
+i int, f float, c char,
+primary key pk(i), unique findex(f), index ck(c))
+engine = innodb row_format = compressed
+key_block_size = 4;
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+set session debug = "+d,ib_trunc_crash_drop_reinit_done_create_to_start";
+truncate table t;
+ERROR HY000: Lost connection to MySQL server during query
+# restart
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+select * from t;
+i	f	c
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+select * from t where f < 2.5;
+i	f	c
+1	1.1	a
+2	2.2	b
+drop table t;
+"5. Hit crash point while creating indexes."
+use test;
+set global innodb_file_per_table = 1;
+set global innodb_file_format = 'Barracuda';
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+SET innodb_strict_mode=OFF;
+create  table t (
+i int, f float, c char,
+primary key pk(i), unique findex(f), index ck(c))
+engine = innodb row_format = compressed
+key_block_size = 4;
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+set session debug = "+d,ib_trunc_crash_on_create_of_clust_index";
+truncate table t;
+ERROR HY000: Lost connection to MySQL server during query
+# restart
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+select * from t;
+i	f	c
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+select * from t where f < 2.5;
+i	f	c
+1	1.1	a
+2	2.2	b
+drop table t;
+use test;
+set global innodb_file_per_table = 1;
+set global innodb_file_format = 'Barracuda';
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+SET innodb_strict_mode=OFF;
+create  table t (
+i int, f float, c char,
+primary key pk(i), unique findex(f))
+engine = innodb row_format = compressed
+key_block_size = 4;
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+set session debug = "+d,ib_trunc_crash_on_create_of_uniq_index";
+truncate table t;
+ERROR HY000: Lost connection to MySQL server during query
+# restart
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+select * from t;
+i	f	c
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+select * from t where f < 2.5;
+i	f	c
+1	1.1	a
+2	2.2	b
+drop table t;
+use test;
+set global innodb_file_per_table = 1;
+set global innodb_file_format = 'Barracuda';
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+SET innodb_strict_mode=OFF;
+create  table t (
+i int, f float, c char,
+primary key pk(i), index ck(c))
+engine = innodb row_format = compressed
+key_block_size = 4;
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+set session debug = "+d,ib_trunc_crash_on_create_of_sec_index";
+truncate table t;
+ERROR HY000: Lost connection to MySQL server during query
+# restart
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+select * from t;
+i	f	c
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+select * from t where f < 2.5;
+i	f	c
+1	1.1	a
+2	2.2	b
+drop table t;
+"6. Hit crash point after data is updated to system-table and"
+"   in-memory dict."
+use test;
+set global innodb_file_per_table = 1;
+set global innodb_file_format = 'Barracuda';
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+SET innodb_strict_mode=OFF;
+create  table t (
+i int, f float, c char,
+primary key pk(i), unique findex(f), index ck(c))
+engine = innodb row_format = compressed
+key_block_size = 4;
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+set session debug = "+d,ib_trunc_crash_on_updating_dict_sys_info";
+truncate table t;
+ERROR HY000: Lost connection to MySQL server during query
+# restart
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+select * from t;
+i	f	c
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+select * from t where f < 2.5;
+i	f	c
+1	1.1	a
+2	2.2	b
+drop table t;
+"7. Hit crash point before/after log checkpoint is done."
+use test;
+set global innodb_file_per_table = 1;
+set global innodb_file_format = 'Barracuda';
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+SET innodb_strict_mode=OFF;
+create  table t (
+i int, f float, c char,
+primary key pk(i), unique findex(f), index ck(c))
+engine = innodb row_format = compressed
+key_block_size = 4;
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+set session debug = "+d,ib_trunc_crash_before_log_removal";
+truncate table t;
+ERROR HY000: Lost connection to MySQL server during query
+# restart
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+select * from t;
+i	f	c
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t where f < 2.5;
+i	f	c
+1	1.1	a
+2	2.2	b
+drop table t;
+use test;
+set global innodb_file_per_table = 1;
+set global innodb_file_format = 'Barracuda';
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+SET innodb_strict_mode=OFF;
+create  table t (
+i int, f float, c char,
+primary key pk(i), unique findex(f), index ck(c))
+engine = innodb row_format = compressed
+key_block_size = 4;
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+set session debug = "+d,ib_trunc_crash_after_truncate_done";
+truncate table t;
+ERROR HY000: Lost connection to MySQL server during query
+# restart
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+select * from t;
+i	f	c
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+select * from t where f < 2.5;
+i	f	c
+1	1.1	a
+2	2.2	b
+drop table t;
+set global innodb_file_format = Barracuda;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+set global innodb_file_per_table = 1;
+call mtr.add_suppression("does not exist in the InnoDB internal");
+set global innodb_file_per_table = on;
+"1. Hit crash point on completing drop of all indexes before creation"
+"   of index is commenced."
+set global innodb_file_per_table = 1;
+set global innodb_file_format = 'Barracuda';
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+set innodb_strict_mode=off;
+create temporary table t (
+i int, f float, c char,
+primary key pk(i), unique findex(f), index ck(c))
+engine = innodb row_format = compressed
+key_block_size = 4;
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+set session debug = "+d,ib_trunc_crash_drop_reinit_done_create_to_start";
+truncate table t;
+ERROR HY000: Lost connection to MySQL server during query
+# restart
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	Error	Table 'test.t' doesn't exist
+test.t	check	status	Operation failed
+"2. Hit crash point after data is updated to system-table and"
+"   in-memory dict."
+set global innodb_file_per_table = 1;
+set global innodb_file_format = 'Barracuda';
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+set innodb_strict_mode=off;
+create temporary table t (
+i int, f float, c char,
+primary key pk(i), unique findex(f), index ck(c))
+engine = innodb row_format = compressed
+key_block_size = 4;
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+set session debug = "+d,ib_trunc_crash_on_updating_dict_sys_info";
+truncate table t;
+ERROR HY000: Lost connection to MySQL server during query
+# restart
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	Error	Table 'test.t' doesn't exist
+test.t	check	status	Operation failed
+set global innodb_file_format = Barracuda;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+set global innodb_file_per_table = 1;
diff --git a/mysql-test/suite/innodb_zip/r/wl6501_crash_5.result b/mysql-test/suite/innodb_zip/r/wl6501_crash_5.result
new file mode 100644
index 00000000000..74f1e9dd1ad
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/r/wl6501_crash_5.result
@@ -0,0 +1,489 @@
+call mtr.add_suppression("The file '.*' already exists though the corresponding table did not exist in the InnoDB data dictionary");
+call mtr.add_suppression("Cannot create file '.*'");
+call mtr.add_suppression("InnoDB: Error number 17 means 'File exists'");
+set global innodb_file_per_table = on;
+"1. Hit crash point while writing redo log."
+use test;
+set global innodb_file_per_table = 1;
+set global innodb_file_format = 'Barracuda';
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+SET innodb_strict_mode=OFF;
+create  table t (
+i int, f float, c char,
+primary key pk(i), unique findex(f), index ck(c))
+engine=innodb row_format=compressed
+key_block_size=8;
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+set session debug = "+d,ib_trunc_crash_while_writing_redo_log";
+truncate table t;
+ERROR HY000: Lost connection to MySQL server during query
+# restart
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+select * from t where f < 2.5;
+i	f	c
+1	1.1	a
+2	2.2	b
+drop table t;
+"2. Hit crash point on completion of redo log write."
+use test;
+set global innodb_file_per_table = 1;
+set global innodb_file_format = 'Barracuda';
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+SET innodb_strict_mode=OFF;
+create  table t (
+i int, f float, c char,
+primary key pk(i), unique findex(f), index ck(c))
+engine = innodb row_format = compressed
+key_block_size = 8;
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+set session debug = "+d,ib_trunc_crash_after_redo_log_write_complete";
+truncate table t;
+ERROR HY000: Lost connection to MySQL server during query
+# restart
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+select * from t;
+i	f	c
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+select * from t where f < 2.5;
+i	f	c
+1	1.1	a
+2	2.2	b
+drop table t;
+"3. Hit crash point while dropping indexes."
+use test;
+set global innodb_file_per_table = 1;
+set global innodb_file_format = 'Barracuda';
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+SET innodb_strict_mode=OFF;
+create  table t (
+i int, f float, c char,
+primary key pk(i), unique findex(f), index ck(c))
+engine = innodb row_format = compressed
+key_block_size = 8;
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+set session debug = "+d,ib_trunc_crash_on_drop_of_clust_index";
+truncate table t;
+ERROR HY000: Lost connection to MySQL server during query
+# restart
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+select * from t;
+i	f	c
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+select * from t where f < 2.5;
+i	f	c
+1	1.1	a
+2	2.2	b
+drop table t;
+use test;
+set global innodb_file_per_table = 1;
+set global innodb_file_format = 'Barracuda';
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+SET innodb_strict_mode=OFF;
+create  table t (
+i int, f float, c char,
+primary key pk(i), unique findex(f))
+engine = innodb row_format = compressed
+key_block_size = 8;
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+set session debug = "+d,ib_trunc_crash_on_drop_of_uniq_index";
+truncate table t;
+ERROR HY000: Lost connection to MySQL server during query
+# restart
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+select * from t;
+i	f	c
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+select * from t where f < 2.5;
+i	f	c
+1	1.1	a
+2	2.2	b
+drop table t;
+use test;
+set global innodb_file_per_table = 1;
+set global innodb_file_format = 'Barracuda';
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+SET innodb_strict_mode=OFF;
+create  table t (
+i int, f float, c char,
+primary key pk(i), index ck(c))
+engine = innodb row_format = compressed
+key_block_size = 8;
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+set session debug = "+d,ib_trunc_crash_on_drop_of_sec_index";
+truncate table t;
+ERROR HY000: Lost connection to MySQL server during query
+# restart
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+select * from t;
+i	f	c
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+select * from t where f < 2.5;
+i	f	c
+1	1.1	a
+2	2.2	b
+drop table t;
+"4. Hit crash point on completing drop of all indexes before creation"
+"   of index is commenced."
+use test;
+set global innodb_file_per_table = 1;
+set global innodb_file_format = 'Barracuda';
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+SET innodb_strict_mode=OFF;
+create  table t (
+i int, f float, c char,
+primary key pk(i), unique findex(f), index ck(c))
+engine = innodb row_format = compressed
+key_block_size = 8;
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+set session debug = "+d,ib_trunc_crash_drop_reinit_done_create_to_start";
+truncate table t;
+ERROR HY000: Lost connection to MySQL server during query
+# restart
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+select * from t;
+i	f	c
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+select * from t where f < 2.5;
+i	f	c
+1	1.1	a
+2	2.2	b
+drop table t;
+"5. Hit crash point while creating indexes."
+use test;
+set global innodb_file_per_table = 1;
+set global innodb_file_format = 'Barracuda';
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+SET innodb_strict_mode=OFF;
+create  table t (
+i int, f float, c char,
+primary key pk(i), unique findex(f), index ck(c))
+engine = innodb row_format = compressed
+key_block_size = 8;
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+set session debug = "+d,ib_trunc_crash_on_create_of_clust_index";
+truncate table t;
+ERROR HY000: Lost connection to MySQL server during query
+# restart
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+select * from t;
+i	f	c
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+select * from t where f < 2.5;
+i	f	c
+1	1.1	a
+2	2.2	b
+drop table t;
+use test;
+set global innodb_file_per_table = 1;
+set global innodb_file_format = 'Barracuda';
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+SET innodb_strict_mode=OFF;
+create  table t (
+i int, f float, c char,
+primary key pk(i), unique findex(f))
+engine = innodb row_format = compressed
+key_block_size = 8;
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+set session debug = "+d,ib_trunc_crash_on_create_of_uniq_index";
+truncate table t;
+ERROR HY000: Lost connection to MySQL server during query
+# restart
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+select * from t;
+i	f	c
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+select * from t where f < 2.5;
+i	f	c
+1	1.1	a
+2	2.2	b
+drop table t;
+use test;
+set global innodb_file_per_table = 1;
+set global innodb_file_format = 'Barracuda';
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+SET innodb_strict_mode=OFF;
+create  table t (
+i int, f float, c char,
+primary key pk(i), index ck(c))
+engine = innodb row_format = compressed
+key_block_size = 8;
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+set session debug = "+d,ib_trunc_crash_on_create_of_sec_index";
+truncate table t;
+ERROR HY000: Lost connection to MySQL server during query
+# restart
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+select * from t;
+i	f	c
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+select * from t where f < 2.5;
+i	f	c
+1	1.1	a
+2	2.2	b
+drop table t;
+"6. Hit crash point after data is updated to system-table and"
+"   in-memory dict."
+use test;
+set global innodb_file_per_table = 1;
+set global innodb_file_format = 'Barracuda';
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+SET innodb_strict_mode=OFF;
+create  table t (
+i int, f float, c char,
+primary key pk(i), unique findex(f), index ck(c))
+engine = innodb row_format = compressed
+key_block_size = 8;
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+set session debug = "+d,ib_trunc_crash_on_updating_dict_sys_info";
+truncate table t;
+ERROR HY000: Lost connection to MySQL server during query
+# restart
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+select * from t;
+i	f	c
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+select * from t where f < 2.5;
+i	f	c
+1	1.1	a
+2	2.2	b
+drop table t;
+"7. Hit crash point before/after log checkpoint is done."
+use test;
+set global innodb_file_per_table = 1;
+set global innodb_file_format = 'Barracuda';
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+SET innodb_strict_mode=OFF;
+create  table t (
+i int, f float, c char,
+primary key pk(i), unique findex(f), index ck(c))
+engine = innodb row_format = compressed
+key_block_size = 8;
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+set session debug = "+d,ib_trunc_crash_before_log_removal";
+truncate table t;
+ERROR HY000: Lost connection to MySQL server during query
+# restart
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+select * from t;
+i	f	c
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t where f < 2.5;
+i	f	c
+1	1.1	a
+2	2.2	b
+drop table t;
+use test;
+set global innodb_file_per_table = 1;
+set global innodb_file_format = 'Barracuda';
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+SET innodb_strict_mode=OFF;
+create  table t (
+i int, f float, c char,
+primary key pk(i), unique findex(f), index ck(c))
+engine = innodb row_format = compressed
+key_block_size = 8;
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+set session debug = "+d,ib_trunc_crash_after_truncate_done";
+truncate table t;
+ERROR HY000: Lost connection to MySQL server during query
+# restart
+check table t;
+Table	Op	Msg_type	Msg_text
+test.t	check	status	OK
+select * from t;
+i	f	c
+insert into t values (1, 1.1, 'a'), (2, 2.2, 'b'), (3, 3.3, 'c');
+select * from t;
+i	f	c
+1	1.1	a
+2	2.2	b
+3	3.3	c
+select * from t where f < 2.5;
+i	f	c
+1	1.1	a
+2	2.2	b
+drop table t;
+set global innodb_file_format = Barracuda;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+set global innodb_file_per_table = 1;
diff --git a/mysql-test/suite/innodb_zip/r/wl6501_scale_1.result b/mysql-test/suite/innodb_zip/r/wl6501_scale_1.result
new file mode 100644
index 00000000000..9c197737137
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/r/wl6501_scale_1.result
@@ -0,0 +1,354 @@
+set innodb_strict_mode=OFF;
+create procedure populate()
+begin
+declare i int default 1;
+while (i <= 5000) do
+insert into t1 values (i, 'a', 'b');
+insert into t2 values (i, 'a', 'b');
+insert into t3 values (i, 'a', 'b');
+set i = i + 1;
+end while;
+end|
+create procedure populate_small()
+begin
+declare i int default 10001;
+while (i <= 12000) do
+insert into t1 values (i, 'c', 'd');
+insert into t2 values (i, 'a', 'b');
+insert into t3 values (i, 'a', 'b');
+set i = i + 1;
+end while;
+end|
+set global innodb_file_per_table = 1;
+set global innodb_file_format = 'Antelope';
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+create table tNUMBER
+(i int, cNUMBER char(NUMBER), cNUMBER char(NUMBER),
+index cNUMBER_idx(cNUMBER))
+engine=innodb row_format=compact
+key_block_size=NUMBER;
+Warnings:
+Warning	NUMBER	InnoDB: KEY_BLOCK_SIZE requires innodb_file_format > Antelope.
+Warning	NUMBER	InnoDB: ignoring KEY_BLOCK_SIZE=NUMBER.
+create table t2
+(i int, c1 char(100), c2 char(100),
+index c1_idx(c1))
+engine=innodb row_format=compact
+key_block_size=16;
+Warnings:
+Warning	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_format > Antelope.
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=16.
+create temporary table t3
+(i int, c1 char(100), c2 char(100),
+index c1_idx(c1))
+engine=innodb row_format=compact
+key_block_size=16;
+Warnings:
+Warning	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_format > Antelope.
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=16.
+select count(*) from t1;
+count(*)
+0
+select count(*) from t2;
+count(*)
+0
+select count(*) from t3;
+count(*)
+0
+begin;
+call populate();
+commit;
+select count(*) from t1;
+count(*)
+5000
+select count(*) from t2;
+count(*)
+5000
+select count(*) from t3;
+count(*)
+5000
+truncate table t1;
+select count(*) from t1;
+count(*)
+0
+select count(*) from t2;
+count(*)
+5000
+select count(*) from t3;
+count(*)
+5000
+call populate_small();
+select count(*) from t1;
+count(*)
+2000
+select count(*) from t2;
+count(*)
+7000
+select count(*) from t3;
+count(*)
+7000
+truncate table t2;
+truncate table t3;
+select count(*) from t1;
+count(*)
+2000
+select count(*) from t2;
+count(*)
+0
+select count(*) from t3;
+count(*)
+0
+call populate_small();
+select count(*) from t1;
+count(*)
+4000
+select count(*) from t2;
+count(*)
+2000
+select count(*) from t3;
+count(*)
+2000
+drop table t1;
+drop table t2;
+drop table t3;
+drop procedure populate;
+drop procedure populate_small;
+set global innodb_file_format = Barracuda;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+set global innodb_file_per_table = 1;
+set innodb_strict_mode=OFF;
+create procedure populate()
+begin
+declare i int default 1;
+while (i <= 5000) do
+insert into t1 values (i, 'a', 'b');
+insert into t2 values (i, 'a', 'b');
+insert into t3 values (i, 'a', 'b');
+set i = i + 1;
+end while;
+end|
+create procedure populate_small()
+begin
+declare i int default 10001;
+while (i <= 12000) do
+insert into t1 values (i, 'c', 'd');
+insert into t2 values (i, 'a', 'b');
+insert into t3 values (i, 'a', 'b');
+set i = i + 1;
+end while;
+end|
+set global innodb_file_per_table = 1;
+set global innodb_file_format = 'Barracuda';
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+create table tNUMBER
+(i int, cNUMBER char(NUMBER), cNUMBER char(NUMBER),
+index cNUMBER_idx(cNUMBER))
+engine=innodb row_format=compressed
+key_block_size=NUMBER;
+create table t2
+(i int, c1 char(100), c2 char(100),
+index c1_idx(c1))
+engine=innodb row_format=compressed
+key_block_size=16;
+create temporary table t3
+(i int, c1 char(100), c2 char(100),
+index c1_idx(c1))
+engine=innodb row_format=compressed
+key_block_size=16;
+select count(*) from t1;
+count(*)
+0
+select count(*) from t2;
+count(*)
+0
+select count(*) from t3;
+count(*)
+0
+begin;
+call populate();
+commit;
+select count(*) from t1;
+count(*)
+5000
+select count(*) from t2;
+count(*)
+5000
+select count(*) from t3;
+count(*)
+5000
+truncate table t1;
+select count(*) from t1;
+count(*)
+0
+select count(*) from t2;
+count(*)
+5000
+select count(*) from t3;
+count(*)
+5000
+call populate_small();
+select count(*) from t1;
+count(*)
+2000
+select count(*) from t2;
+count(*)
+7000
+select count(*) from t3;
+count(*)
+7000
+truncate table t2;
+truncate table t3;
+select count(*) from t1;
+count(*)
+2000
+select count(*) from t2;
+count(*)
+0
+select count(*) from t3;
+count(*)
+0
+call populate_small();
+select count(*) from t1;
+count(*)
+4000
+select count(*) from t2;
+count(*)
+2000
+select count(*) from t3;
+count(*)
+2000
+drop table t1;
+drop table t2;
+drop table t3;
+drop procedure populate;
+drop procedure populate_small;
+set global innodb_file_format = Barracuda;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+set global innodb_file_per_table = 1;
+set innodb_strict_mode=OFF;
+create procedure populate()
+begin
+declare i int default 1;
+while (i <= 5000) do
+insert into t1 values (i, 'a', 'b');
+insert into t2 values (i, 'a', 'b');
+insert into t3 values (i, 'a', 'b');
+set i = i + 1;
+end while;
+end|
+create procedure populate_small()
+begin
+declare i int default 10001;
+while (i <= 12000) do
+insert into t1 values (i, 'c', 'd');
+insert into t2 values (i, 'a', 'b');
+insert into t3 values (i, 'a', 'b');
+set i = i + 1;
+end while;
+end|
+set global innodb_file_per_table = 0;
+set global innodb_file_format = 'Antelope';
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+create table tNUMBER
+(i int, cNUMBER char(NUMBER), cNUMBER char(NUMBER),
+index cNUMBER_idx(cNUMBER))
+engine=innodb row_format=compact
+key_block_size=NUMBER;
+Warnings:
+Warning	NUMBER	InnoDB: KEY_BLOCK_SIZE requires innodb_file_per_table.
+Warning	NUMBER	InnoDB: KEY_BLOCK_SIZE requires innodb_file_format > Antelope.
+Warning	NUMBER	InnoDB: ignoring KEY_BLOCK_SIZE=NUMBER.
+create table t2
+(i int, c1 char(100), c2 char(100),
+index c1_idx(c1))
+engine=innodb row_format=compact
+key_block_size=16;
+Warnings:
+Warning	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_per_table.
+Warning	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_format > Antelope.
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=16.
+create temporary table t3
+(i int, c1 char(100), c2 char(100),
+index c1_idx(c1))
+engine=innodb row_format=compact
+key_block_size=16;
+Warnings:
+Warning	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_per_table.
+Warning	1478	InnoDB: KEY_BLOCK_SIZE requires innodb_file_format > Antelope.
+Warning	1478	InnoDB: ignoring KEY_BLOCK_SIZE=16.
+select count(*) from t1;
+count(*)
+0
+select count(*) from t2;
+count(*)
+0
+select count(*) from t3;
+count(*)
+0
+begin;
+call populate();
+commit;
+select count(*) from t1;
+count(*)
+5000
+select count(*) from t2;
+count(*)
+5000
+select count(*) from t3;
+count(*)
+5000
+truncate table t1;
+select count(*) from t1;
+count(*)
+0
+select count(*) from t2;
+count(*)
+5000
+select count(*) from t3;
+count(*)
+5000
+call populate_small();
+select count(*) from t1;
+count(*)
+2000
+select count(*) from t2;
+count(*)
+7000
+select count(*) from t3;
+count(*)
+7000
+truncate table t2;
+truncate table t3;
+select count(*) from t1;
+count(*)
+2000
+select count(*) from t2;
+count(*)
+0
+select count(*) from t3;
+count(*)
+0
+call populate_small();
+select count(*) from t1;
+count(*)
+4000
+select count(*) from t2;
+count(*)
+2000
+select count(*) from t3;
+count(*)
+2000
+drop table t1;
+drop table t2;
+drop table t3;
+drop procedure populate;
+drop procedure populate_small;
+set global innodb_file_format = Barracuda;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
+set global innodb_file_per_table = 1;
diff --git a/mysql-test/suite/innodb_zip/r/wl6915_1.result b/mysql-test/suite/innodb_zip/r/wl6915_1.result
new file mode 100644
index 00000000000..bba81098e9d
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/r/wl6915_1.result
@@ -0,0 +1,2079 @@
+call mtr.ADD_suppression(".*Resizing redo log.*");
+call mtr.ADD_suppression(".*Starting to delete and rewrite log files.*");
+call mtr.ADD_suppression(".*New log files created.*");
+SELECT @@global.innodb_undo_tablespaces;
+@@global.innodb_undo_tablespaces
+0
+CREATE PROCEDURE populate_tables(IN id VARCHAR(10))
+begin
+declare n int default 20;
+set global innodb_file_per_table=on;
+DROP TABLE IF EXISTS t1,t2,t3,t4;
+CREATE TEMPORARY TABLE t1_temp(c1 int NOT NULL,
+c2 int NOT NULL,
+c3 char(255) NOT NULL,
+c4 text(600) NOT NULL,
+c5 blob(600) NOT NULL,
+c6 varchar(600) NOT NULL,
+c7 varchar(600) NOT NULL,
+c8 datetime,
+c9 decimal(6,3),
+PRIMARY KEY (c1),
+INDEX (c3,c4(50),c5(50)), 
+INDEX (c2))
+ENGINE=InnoDB ROW_FORMAT=redundant;
+set @s = concat("CREATE TABLE t1",id," ( c1 int NOT NULL,   c2 int NOT NULL,   c3 char(255) NOT NULL,   c4 text(600) NOT NULL,   c5 blob(600) NOT NULL,   c6 varchar(600) NOT NULL,   c7 varchar(600) NOT NULL,   c8 datetime,   c9 decimal(6,3),   PRIMARY KEY (c1),   INDEX (c3,c4(50),c5(50)),   INDEX (c2))   ENGINE=InnoDB ROW_FORMAT=redundant;");
+PREPARE createTable FROM @s;
+EXECUTE createTable;
+DEALLOCATE PREPARE createTable;
+CREATE TEMPORARY TABLE t2_temp(c1 int NOT NULL,
+c2 int NOT NULL,
+c3 char(255) NOT NULL,
+c4 text(600) NOT NULL,
+c5 blob(600) NOT NULL,
+c6 varchar(600) NOT NULL,
+c7 varchar(600) NOT NULL,
+c8 datetime,
+c9 decimal(6,3),
+PRIMARY KEY (c1),
+INDEX (c3,c4(50),c5(50)),
+INDEX (c2))
+ENGINE=InnoDB ROW_FORMAT=compact;
+set @s = concat("CREATE TABLE t2",id," (c1 int NOT NULL,   c2 int NOT NULL,   c3 char(255) NOT NULL,   c4 text(600) NOT NULL,   c5 blob(600) NOT NULL,   c6 varchar(600) NOT NULL,   c7 varchar(600) NOT NULL,   c8 datetime,   c9 decimal(6,3),   PRIMARY KEY (c1),   INDEX (c3,c4(50),c5(50)),   INDEX (c2))   ENGINE=InnoDB ROW_FORMAT=compact;");
+PREPARE createTable FROM @s;
+EXECUTE createTable;
+DEALLOCATE PREPARE createTable;
+CREATE TEMPORARY TABLE t3_temp(c1 int NOT NULL,
+c2 int NOT NULL,
+c3 char(255) NOT NULL,
+c4 text(600) NOT NULL,
+c5 blob(600) NOT NULL,
+c6 varchar(600) NOT NULL,
+c7 varchar(600) NOT NULL,
+c8 datetime,
+c9 decimal(6,3),
+PRIMARY KEY (c1),
+INDEX (c3,c4(50),c5(50)),
+INDEX (c2))
+ENGINE=InnoDB ROW_FORMAT=compressed key_block_size=4;
+set @s = concat("CREATE TABLE t3",id," (c1 int NOT NULL,   c2 int NOT NULL,   c3 char(255) NOT NULL,   c4 text(600) NOT NULL,   c5 blob(600) NOT NULL,   c6 varchar(600) NOT NULL,   c7 varchar(600) NOT NULL,   c8 datetime,   c9 decimal(6,3),   PRIMARY KEY (c1),   INDEX (c3,c4(50),c5(50)),   INDEX (c2))   ENGINE=InnoDB ROW_FORMAT=compressed key_block_size=4;");
+PREPARE createTable FROM @s;
+EXECUTE createTable;
+DEALLOCATE PREPARE createTable;
+CREATE TEMPORARY TABLE t4_temp(c1 int NOT NULL,
+c2 int NOT NULL,
+c3 char(255) NOT NULL,
+c4 text(600) NOT NULL,
+c5 blob(600) NOT NULL,
+c6 varchar(600) NOT NULL,
+c7 varchar(600) NOT NULL,
+c8 datetime,
+c9 decimal(6,3),
+PRIMARY KEY (c1),
+INDEX (c3,c4(50),c5(50)),
+INDEX (c2))
+ENGINE=InnoDB ROW_FORMAT=dynamic;
+set @s = concat("CREATE TABLE t4",id," (c1 int NOT NULL,   c2 int NOT NULL,   c3 char(255) NOT NULL,   c4 text(600) NOT NULL,   c5 blob(600) NOT NULL,   c6 varchar(600) NOT NULL,   c7 varchar(600) NOT NULL,   c8 datetime,   c9 decimal(6,3),   PRIMARY KEY (c1),   INDEX (c3,c4(50),c5(50)),   INDEX (c2))   ENGINE=InnoDB ROW_FORMAT=dynamic;");
+PREPARE createTable FROM @s;
+EXECUTE createTable;
+DEALLOCATE PREPARE createTable;
+while (n > 0) do
+START TRANSACTION;
+set @s = concat("INSERT INTO t1",id," VALUES(",n,",",n,",REPEAT(concat(' tc3_',",n,"),30),     REPEAT(concat(' tc4_',",n,"),70),REPEAT(concat(' tc_',",n,"),70),     REPEAT(concat(' tc6_',",n,"),70),REPEAT(concat(' tc7_',",n,"),70),     NOW(),(100.55+",n,"));");     
+PREPARE insertIntoTable FROM @s;     
+EXECUTE insertIntoTable;     
+DEALLOCATE PREPARE insertIntoTable;
+INSERT INTO t1_temp VALUES(n,n,REPEAT(concat(' tc3_',n),30),
+REPEAT(concat(' tc4_',n),70),REPEAT(concat(' tc_',n),70),
+REPEAT(concat(' tc6_',n),70),REPEAT(concat(' tc7_',n),70),
+NOW(),(100.55+n));
+set @s = concat("INSERT INTO t2",id," VALUES(",n,",",n,",REPEAT(concat(' tc3_',",n,"),30),     REPEAT(concat(' tc4_',",n,"),70),REPEAT(concat(' tc_',",n,"),70),     REPEAT(concat(' tc6_',",n,"),70),REPEAT(concat(' tc7_',",n,"),70),     NOW(),(100.55+",n,"));");     
+PREPARE insertIntoTable FROM @s;
+EXECUTE insertIntoTable;
+DEALLOCATE PREPARE insertIntoTable;
+INSERT INTO t2_temp VALUES(n,n,REPEAT(concat(' tc3_',n),30),
+REPEAT(concat(' tc4_',n),70),REPEAT(concat(' tc_',n),70),
+REPEAT(concat(' tc6_',n),70),REPEAT(concat(' tc7_',n),70),
+NOW(),(100.55+n));
+savepoint a;
+set @s = concat("INSERT INTO t3",id," VALUES(",n,",",n,",REPEAT(concat(' tc3_',",n,"),30),     REPEAT(concat(' tc4_',",n,"),70),REPEAT(concat(' tc_',",n,"),70),     REPEAT(concat(' tc6_',",n,"),70),REPEAT(concat(' tc7_',",n,"),70),     NOW(),(100.55+",n,"));");     
+PREPARE insertIntoTable FROM @s;
+EXECUTE insertIntoTable;
+DEALLOCATE PREPARE insertIntoTable;
+INSERT INTO t3_temp VALUES(n,n,REPEAT(concat(' tc3_',n),30),
+REPEAT(concat(' tc4_',n),70),REPEAT(concat(' tc_',n),70),
+REPEAT(concat(' tc6_',n),70),REPEAT(concat(' tc7_',n),70),
+NOW(),(100.55+n));
+savepoint b;
+set @s = concat("INSERT INTO t4",id," VALUES(",n,",",n,",REPEAT(concat(' tc3_',",n,"),30),     REPEAT(concat(' tc4_',",n,"),70),REPEAT(concat(' tc_',",n,"),70),     REPEAT(concat(' tc6_',",n,"),70),REPEAT(concat(' tc7_',",n,"),70),     NOW(),(100.55+",n,"));");     
+PREPARE insertIntoTable FROM @s;
+EXECUTE insertIntoTable;
+DEALLOCATE PREPARE insertIntoTable;
+INSERT INTO t4_temp VALUES(n,n,REPEAT(concat(' tc3_',n),30),
+REPEAT(concat(' tc4_',n),70),REPEAT(concat(' tc_',n),70),
+REPEAT(concat(' tc6_',n),70),REPEAT(concat(' tc7_',n),70),
+NOW(),(100.55+n));
+if (n > 10) then
+if (n > 10 and n <=12) then
+ROLLBACK TO SAVEPOINT a;
+COMMIT;
+end if;
+if (n > 12 and n < 15) then
+ROLLBACK TO SAVEPOINT b;
+COMMIT;
+end if;
+if (n > 15) then
+COMMIT;
+end if;
+else     
+if (n > 5) then
+START TRANSACTION;
+DELETE FROM t1_temp WHERE c1 > 10 ;
+DELETE FROM t2_temp WHERE c1 > 10 ;
+DELETE FROM t3_temp WHERE c1 > 10 ;
+DELETE FROM t4_temp WHERE c1 > 10 ;
+rollback;
+START TRANSACTION;
+update t1_temp set c1 = c1 + 1000 WHERE c1 > 10;
+update t2_temp set c1 = c1 + 1000 WHERE c1 > 10;
+update t3_temp set c1 = c1 + 1000 WHERE c1 > 10;
+update t4_temp set c1 = c1 + 1000 WHERE c1 > 10;
+rollback;
+end if;
+end if;    
+if (n < 5) then
+rollback; 
+end if;
+FLUSH logs;
+ALTER TABLE t1_temp DROP PRIMARY KEY;
+ALTER TABLE t1_temp ADD PRIMARY KEY (c1,c3(10),c4(10));
+ALTER TABLE t2_temp DROP PRIMARY KEY;
+ALTER TABLE t2_temp ADD PRIMARY KEY (c1,c3(10),c4(10));
+ALTER TABLE t3_temp DROP PRIMARY KEY;
+ALTER TABLE t3_temp ADD PRIMARY KEY (c1,c3(10),c4(10));
+ALTER TABLE t4_temp DROP PRIMARY KEY;
+ALTER TABLE t4_temp ADD PRIMARY KEY (c1,c3(10),c4(10));
+FLUSH tables;
+START TRANSACTION;
+set @s = concat("INSERT INTO t1",id," VALUES(",n,"+100,",n,"+100,REPEAT(concat(' tc3_',",n,"+100),30),        REPEAT(concat(' tc4_',",n,"+100),70),REPEAT(concat(' tc_',",n,"+100),70),        REPEAT(concat(' tc6_',",n,"+100),60),REPEAT(concat(' tc7_',",n,"+100),60),        NOW(),(100.55+",n,"+100));");
+PREPARE insertIntoTable FROM @s;
+EXECUTE insertIntoTable;
+DEALLOCATE PREPARE insertIntoTable;
+INSERT INTO t1_temp VALUES(n+100,n+100,REPEAT(concat(' tc3_',n+100),30),
+REPEAT(concat(' tc4_',n+100),70),REPEAT(concat(' tc_',n+100),70),
+REPEAT(concat(' tc6_',n+100),60),REPEAT(concat(' tc7_',n+100),60),
+NOW(),(100.55+n+100));
+set @s = concat("INSERT INTO t2",id," VALUES(",n,"+100,",n,"+100,REPEAT(concat(' tc3_',",n,"+100),30),        REPEAT(concat(' tc4_',",n,"+100),70),REPEAT(concat(' tc_',",n,"+100),70),        REPEAT(concat(' tc6_',",n,"+100),60),REPEAT(concat(' tc7_',",n,"+100),60),        NOW(),(100.55+",n,"+100));");
+PREPARE insertIntoTable FROM @s;
+EXECUTE insertIntoTable;
+DEALLOCATE PREPARE insertIntoTable;
+INSERT INTO t2_temp VALUES(n+100,n+100,REPEAT(concat(' tc3_',n+100),30),
+REPEAT(concat(' tc4_',n+100),70),REPEAT(concat(' tc_',n+100),70),
+REPEAT(concat(' tc6_',n+100),60),REPEAT(concat(' tc7_',n+100),60),
+NOW(),(100.55+n+100));
+set @s = concat("INSERT INTO t3",id," VALUES(",n,"+100,",n,"+100,REPEAT(concat(' tc3_',",n,"+100),30),        REPEAT(concat(' tc4_',",n,"+100),70),REPEAT(concat(' tc_',",n,"+100),70),        REPEAT(concat(' tc6_',",n,"+100),60),REPEAT(concat(' tc7_',",n,"+100),60),        NOW(),(100.55+",n,"+100));");
+PREPARE insertIntoTable FROM @s;
+EXECUTE insertIntoTable;
+DEALLOCATE PREPARE insertIntoTable;
+INSERT INTO t3_temp VALUES(n+100,n+100,REPEAT(concat(' tc3_',n+100),30),
+REPEAT(concat(' tc4_',n+100),70),REPEAT(concat(' tc_',n+100),70),
+REPEAT(concat(' tc6_',n+100),60),REPEAT(concat(' tc7_',n+100),60),
+NOW(),(100.55+n+100));
+set @s = concat("INSERT INTO t4",id," VALUES(",n,"+100,",n,"+100,REPEAT(concat(' tc3_',",n,"+100),30),        REPEAT(concat(' tc4_',",n,"+100),70),REPEAT(concat(' tc_',",n,"+100),70),        REPEAT(concat(' tc6_',",n,"+100),60),REPEAT(concat(' tc7_',",n,"+100),60),        NOW(),(100.55+",n,"+100));");
+PREPARE insertIntoTable FROM @s;
+EXECUTE insertIntoTable;
+DEALLOCATE PREPARE insertIntoTable;
+INSERT INTO t4_temp VALUES(n+100,n+100,REPEAT(concat(' tc3_',n+100),30),
+REPEAT(concat(' tc4_',n+100),70),REPEAT(concat(' tc_',n+100),70),
+REPEAT(concat(' tc6_',n+100),60),REPEAT(concat(' tc7_',n+100),60),
+NOW(),(100.55+n+100));
+DELETE FROM t1_temp WHERE c1 between 100 and 110;
+DELETE FROM t2_temp WHERE c1 between 100 and 110;
+DELETE FROM t3_temp WHERE c1 between 100 and 110;
+DELETE FROM t4_temp WHERE c1 between 100 and 110;
+update t1_temp set c1 = c1+1 WHERE c1>110; 
+update t2_temp set c1 = c1+1 WHERE c1>110; 
+update t3_temp set c1 = c1+1 WHERE c1>110; 
+update t4_temp set c1 = c1+1 WHERE c1>110; 
+savepoint a;
+set @s = concat("INSERT INTO t1",id," VALUES(300+",n,"+100,",n,"+100,REPEAT(concat(' tc3_',",n,"+100),30),        REPEAT(concat(' tc4_',",n,"+100),70),REPEAT(concat(' tc_',",n,"+100),70),        REPEAT(concat(' tc6_',",n,"+100),60),REPEAT(concat(' tc7_',",n,"+100),60),        NOW(),(100.55+",n,"+100));");
+PREPARE insertIntoTable FROM @s;
+EXECUTE insertIntoTable;
+DEALLOCATE PREPARE insertIntoTable;
+INSERT INTO t1_temp VALUES(300+n+100,n+100,REPEAT(concat(' tc3_',n+100),30),
+REPEAT(concat(' tc4_',n+100),70),REPEAT(concat(' tc_',n+100),70),
+REPEAT(concat(' tc6_',n+100),60),REPEAT(concat(' tc7_',n+100),60),
+NOW(),(100.55+n+100));
+set @s = concat("INSERT INTO t2",id," VALUES(300+",n,"+100,",n,"+100,REPEAT(concat(' tc3_',",n,"+100),30),        REPEAT(concat(' tc4_',",n,"+100),70),REPEAT(concat(' tc_',",n,"+100),70),        REPEAT(concat(' tc6_',",n,"+100),60),REPEAT(concat(' tc7_',",n,"+100),60),        NOW(),(100.55+",n,"+100));");
+PREPARE insertIntoTable FROM @s;
+EXECUTE insertIntoTable;
+DEALLOCATE PREPARE insertIntoTable;
+INSERT INTO t2_temp VALUES(300+n+100,n+100,REPEAT(concat(' tc3_',n+100),30),
+REPEAT(concat(' tc4_',n+100),70),REPEAT(concat(' tc_',n+100),70),
+REPEAT(concat(' tc6_',n+100),60),REPEAT(concat(' tc7_',n+100),60),
+NOW(),(100.55+n+100));
+set @s = concat("INSERT INTO t3",id," VALUES(300+",n,"+100,",n,"+100,REPEAT(concat(' tc3_',",n,"+100),30),        REPEAT(concat(' tc4_',",n,"+100),70),REPEAT(concat(' tc_',",n,"+100),70),        REPEAT(concat(' tc6_',",n,"+100),60),REPEAT(concat(' tc7_',",n,"+100),60),        NOW(),(100.55+",n,"+100));");
+PREPARE insertIntoTable FROM @s;
+EXECUTE insertIntoTable;
+DEALLOCATE PREPARE insertIntoTable;
+INSERT INTO t3_temp VALUES(300+n+100,n+100,REPEAT(concat(' tc3_',n+100),30),
+REPEAT(concat(' tc4_',n+100),70),REPEAT(concat(' tc_',n+100),70),
+REPEAT(concat(' tc6_',n+100),60),REPEAT(concat(' tc7_',n+100),60),
+NOW(),(100.55+n+100));
+set @s = concat("INSERT INTO t4",id," VALUES(300+",n,"+100,",n,"+100,REPEAT(concat(' tc3_',",n,"+100),30),        REPEAT(concat(' tc4_',",n,"+100),70),REPEAT(concat(' tc_',",n,"+100),70),        REPEAT(concat(' tc6_',",n,"+100),60),REPEAT(concat(' tc7_',",n,"+100),60),        NOW(),(100.55+",n,"+100));");
+PREPARE insertIntoTable FROM @s;
+EXECUTE insertIntoTable;
+DEALLOCATE PREPARE insertIntoTable;
+INSERT INTO t4_temp VALUES(300+n+100,n+100,REPEAT(concat(' tc3_',n+100),30),
+REPEAT(concat(' tc4_',n+100),70),REPEAT(concat(' tc_',n+100),70),
+REPEAT(concat(' tc6_',n+100),60),REPEAT(concat(' tc7_',n+100),60),
+NOW(),(100.55+n+100));
+savepoint b;
+set @s = concat("INSERT INTO t1",id," VALUES(400+",n,"+100,",n,"+100,REPEAT(concat(' tc3_',",n,"+100),30),        REPEAT(concat(' tc4_',",n,"+100),70),REPEAT(concat(' tc_',",n,"+100),70),        REPEAT(concat(' tc6_',",n,"+100),60),REPEAT(concat(' tc7_',",n,"+100),60),        NOW(),(100.55+",n,"+100));");
+PREPARE insertIntoTable FROM @s;
+EXECUTE insertIntoTable;
+DEALLOCATE PREPARE insertIntoTable;
+INSERT INTO t1_temp VALUES(400+n+100,n+100,REPEAT(concat(' tc3_',n+100),30),
+REPEAT(concat(' tc4_',n+100),70),REPEAT(concat(' tc_',n+100),70),
+REPEAT(concat(' tc6_',n+100),60),REPEAT(concat(' tc7_',n+100),60),
+NOW(),(100.55+n+100));
+set @s = concat("INSERT INTO t2",id," VALUES(400+",n,"+100,",n,"+100,REPEAT(concat(' tc3_',",n,"+100),30),        REPEAT(concat(' tc4_',",n,"+100),70),REPEAT(concat(' tc_',",n,"+100),70),        REPEAT(concat(' tc6_',",n,"+100),60),REPEAT(concat(' tc7_',",n,"+100),60),        NOW(),(100.55+",n,"+100));");
+PREPARE insertIntoTable FROM @s;
+EXECUTE insertIntoTable;
+DEALLOCATE PREPARE insertIntoTable;
+INSERT INTO t2_temp VALUES(400+n+100,n+100,REPEAT(concat(' tc3_',n+100),30),
+REPEAT(concat(' tc4_',n+100),70),REPEAT(concat(' tc_',n+100),70),
+REPEAT(concat(' tc6_',n+100),60),REPEAT(concat(' tc7_',n+100),60),
+NOW(),(100.55+n+100));
+set @s = concat("INSERT INTO t3",id," VALUES(400+",n,"+100,",n,"+100,REPEAT(concat(' tc3_',",n,"+100),30),        REPEAT(concat(' tc4_',",n,"+100),70),REPEAT(concat(' tc_',",n,"+100),70),        REPEAT(concat(' tc6_',",n,"+100),60),REPEAT(concat(' tc7_',",n,"+100),60),        NOW(),(100.55+",n,"+100));");
+PREPARE insertIntoTable FROM @s;
+EXECUTE insertIntoTable;
+DEALLOCATE PREPARE insertIntoTable;
+INSERT INTO t3_temp VALUES(400+n+100,n+100,REPEAT(concat(' tc3_',n+100),30),
+REPEAT(concat(' tc4_',n+100),70),REPEAT(concat(' tc_',n+100),70),
+REPEAT(concat(' tc6_',n+100),60),REPEAT(concat(' tc7_',n+100),60),
+NOW(),(100.55+n+100));
+set @s = concat("INSERT INTO t4",id," VALUES(400+",n,"+100,",n,"+100,REPEAT(concat(' tc3_',",n,"+100),30),        REPEAT(concat(' tc4_',",n,"+100),70),REPEAT(concat(' tc_',",n,"+100),70),        REPEAT(concat(' tc6_',",n,"+100),60),REPEAT(concat(' tc7_',",n,"+100),60),        NOW(),(100.55+",n,"+100));");
+PREPARE insertIntoTable FROM @s;
+EXECUTE insertIntoTable;
+DEALLOCATE PREPARE insertIntoTable;
+INSERT INTO t4_temp VALUES(400+n+100,n+100,REPEAT(concat(' tc3_',n+100),30),
+REPEAT(concat(' tc4_',n+100),70),REPEAT(concat(' tc_',n+100),70),
+REPEAT(concat(' tc6_',n+100),60),REPEAT(concat(' tc7_',n+100),60),
+NOW(),(100.55+n+100));
+savepoint c;
+rollback to b;
+rollback to a;
+COMMIT;
+COMMIT;
+rollback;
+set n = n - 1; 
+end while;
+end|
+connect  con1,localhost,root,,;
+connect  con2,localhost,root,,;
+#---client 1 : dml operation ---"
+connection con1;
+#---client 2 : dml operation ---"
+connection con2;
+# In connection 1
+connection con1;
+SELECT count(*) FROM t1_1;
+count(*)
+36
+SELECT count(*) FROM t2_1;
+count(*)
+36
+SELECT count(*) FROM t3_1;
+count(*)
+34
+SELECT count(*) FROM t4_1;
+count(*)
+32
+SELECT c1 FROM t1_1;
+c1
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+SELECT c1 FROM t2_1;
+c1
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+SELECT c1 FROM t3_1;
+c1
+5
+6
+7
+8
+9
+10
+13
+14
+15
+16
+17
+18
+19
+20
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+SELECT c1 FROM t4_1;
+c1
+5
+6
+7
+8
+9
+10
+15
+16
+17
+18
+19
+20
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+SELECT count(*) FROM t1_temp;
+count(*)
+26
+SELECT count(*) FROM t2_temp;
+count(*)
+26
+SELECT count(*) FROM t3_temp;
+count(*)
+24
+SELECT count(*) FROM t4_temp;
+count(*)
+22
+SELECT c1 FROM t1_temp;
+c1
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+122
+124
+126
+128
+130
+132
+134
+136
+138
+140
+SELECT c1 FROM t2_temp;
+c1
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+122
+124
+126
+128
+130
+132
+134
+136
+138
+140
+SELECT c1 FROM t3_temp;
+c1
+5
+6
+7
+8
+9
+10
+13
+14
+15
+16
+17
+18
+19
+20
+122
+124
+126
+128
+130
+132
+134
+136
+138
+140
+SELECT c1 FROM t4_temp;
+c1
+5
+6
+7
+8
+9
+10
+15
+16
+17
+18
+19
+20
+122
+124
+126
+128
+130
+132
+134
+136
+138
+140
+# In connection 2
+connection con2;
+SELECT count(*) FROM t1_2;
+count(*)
+36
+SELECT count(*) FROM t2_2;
+count(*)
+36
+SELECT count(*) FROM t3_2;
+count(*)
+34
+SELECT count(*) FROM t4_2;
+count(*)
+32
+SELECT c1 FROM t1_2;
+c1
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+SELECT c1 FROM t2_2;
+c1
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+SELECT c1 FROM t3_2;
+c1
+5
+6
+7
+8
+9
+10
+13
+14
+15
+16
+17
+18
+19
+20
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+SELECT c1 FROM t4_2;
+c1
+5
+6
+7
+8
+9
+10
+15
+16
+17
+18
+19
+20
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+SELECT count(*) FROM t1_temp;
+count(*)
+26
+SELECT count(*) FROM t2_temp;
+count(*)
+26
+SELECT count(*) FROM t3_temp;
+count(*)
+24
+SELECT count(*) FROM t4_temp;
+count(*)
+22
+SELECT c1 FROM t1_temp;
+c1
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+122
+124
+126
+128
+130
+132
+134
+136
+138
+140
+SELECT c1 FROM t2_temp;
+c1
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+122
+124
+126
+128
+130
+132
+134
+136
+138
+140
+SELECT c1 FROM t3_temp;
+c1
+5
+6
+7
+8
+9
+10
+13
+14
+15
+16
+17
+18
+19
+20
+122
+124
+126
+128
+130
+132
+134
+136
+138
+140
+SELECT c1 FROM t4_temp;
+c1
+5
+6
+7
+8
+9
+10
+15
+16
+17
+18
+19
+20
+122
+124
+126
+128
+130
+132
+134
+136
+138
+140
+# In connection 1
+connection con1;
+set AUTOCOMMIT = 0;
+ALTER TABLE t1_temp DROP PRIMARY KEY;
+ALTER TABLE t1_temp ADD PRIMARY KEY (c1);
+ALTER TABLE t2_temp DROP PRIMARY KEY;
+ALTER TABLE t2_temp ADD PRIMARY KEY (c1);
+ALTER TABLE t3_temp DROP PRIMARY KEY;
+ALTER TABLE t3_temp ADD PRIMARY KEY (c1);
+ALTER TABLE t4_temp DROP PRIMARY KEY;
+ALTER TABLE t4_temp ADD PRIMARY KEY (c1);
+INSERT INTO t1_temp VALUES (20,1,'a','a','a','a','a',NOW(),100.55);
+ERROR 23000: Duplicate entry '20' for key 'PRIMARY'
+insert ignore into t1_temp VALUES (20,1,'a','a','a','a','a',NOW(),100.55);
+Warnings:
+Warning	1062	Duplicate entry '20' for key 'PRIMARY'
+INSERT INTO t2_temp VALUES (20,1,'a','a','a','a','a',NOW(),100.55);
+ERROR 23000: Duplicate entry '20' for key 'PRIMARY'
+insert ignore into t2_temp VALUES (20,1,'a','a','a','a','a',NOW(),100.55);
+Warnings:
+Warning	1062	Duplicate entry '20' for key 'PRIMARY'
+INSERT INTO t3_temp VALUES (20,1,'a','a','a','a','a',NOW(),100.55);
+ERROR 23000: Duplicate entry '20' for key 'PRIMARY'
+insert ignore into t3_temp VALUES (20,1,'a','a','a','a','a',NOW(),100.55);
+Warnings:
+Warning	1062	Duplicate entry '20' for key 'PRIMARY'
+INSERT INTO t4_temp VALUES (20,1,'a','a','a','a','a',NOW(),100.55);
+ERROR 23000: Duplicate entry '20' for key 'PRIMARY'
+insert ignore into t4_temp VALUES (20,1,'a','a','a','a','a',NOW(),100.55);
+Warnings:
+Warning	1062	Duplicate entry '20' for key 'PRIMARY'
+INSERT INTO t1_temp VALUES (1,1,'a','a','a','a','a',NOW(),100.55),
+(20,1,'a','a','a','a','a',NOW(),100.55);
+ERROR 23000: Duplicate entry '20' for key 'PRIMARY'
+INSERT INTO t2_temp VALUES (1,1,'a','a','a','a','a',NOW(),100.55),
+(20,1,'a','a','a','a','a',NOW(),100.55);
+ERROR 23000: Duplicate entry '20' for key 'PRIMARY'
+INSERT INTO t3_temp VALUES (1,1,'a','a','a','a','a',NOW(),100.55),
+(20,1,'a','a','a','a','a',NOW(),100.55);
+ERROR 23000: Duplicate entry '20' for key 'PRIMARY'
+INSERT INTO t4_temp VALUES (1,1,'a','a','a','a','a',NOW(),100.55),
+(20,1,'a','a','a','a','a',NOW(),100.55);
+ERROR 23000: Duplicate entry '20' for key 'PRIMARY'
+set AUTOCOMMIT = 1;
+SELECT c1,c2 FROM t1_temp WHERE c1 in (20,1);
+c1	c2
+20	20
+SELECT c1,c2 FROM t2_temp WHERE c1 in (20,1);
+c1	c2
+20	20
+SELECT c1,c2 FROM t3_temp WHERE c1 in (20,1);
+c1	c2
+20	20
+SELECT c1,c2 FROM t4_temp WHERE c1 in (20,1);
+c1	c2
+20	20
+REPLACE INTO t1_temp VALUES (20,1,'a','a','a','a','a',NOW(),100.55);
+REPLACE INTO t2_temp VALUES (20,1,'a','a','a','a','a',NOW(),100.55);
+REPLACE INTO t3_temp VALUES (20,1,'a','a','a','a','a',NOW(),100.55);
+REPLACE INTO t4_temp VALUES (20,1,'a','a','a','a','a',NOW(),100.55);
+SELECT c1,c2,c3,c4,c5,c6,c7,c9 FROM t1_temp WHERE c1 = 20;
+c1	c2	c3	c4	c5	c6	c7	c9
+20	1	a	a	a	a	a	100.550
+SELECT c1,c2,c3,c4,c5,c6,c7,c9 FROM t2_temp WHERE c1 = 20;
+c1	c2	c3	c4	c5	c6	c7	c9
+20	1	a	a	a	a	a	100.550
+SELECT c1,c2,c3,c4,c5,c6,c7,c9 FROM t3_temp WHERE c1 = 20;
+c1	c2	c3	c4	c5	c6	c7	c9
+20	1	a	a	a	a	a	100.550
+SELECT c1,c2,c3,c4,c5,c6,c7,c9 FROM t4_temp WHERE c1 = 20;
+c1	c2	c3	c4	c5	c6	c7	c9
+20	1	a	a	a	a	a	100.550
+update ignore t1_temp set c1 = 20 WHERE c1 = 140 ;
+update ignore t2_temp set c1 = 20 WHERE c1 = 140 ;
+update ignore t3_temp set c1 = 20 WHERE c1 = 140 ;
+update ignore t4_temp set c1 = 20 WHERE c1 = 140 ;
+SELECT count(*) FROM t1_temp WHERE c1 = 140;
+count(*)
+1
+SELECT count(*) FROM t2_temp WHERE c1 = 140;
+count(*)
+1
+SELECT count(*) FROM t3_temp WHERE c1 = 140;
+count(*)
+1
+SELECT count(*) FROM t4_temp WHERE c1 = 140;
+count(*)
+1
+ALTER TABLE t1_temp ADD COLUMN c10 int default 99 , 
+ADD COLUMN c11 varchar(100) default 'test';
+ALTER TABLE t1_temp DROP PRIMARY KEY;
+ALTER TABLE t1_temp ADD PRIMARY KEY (c1);
+INSERT INTO t1_temp (c1,c2,c3,c4,c5,c6,c7,c8,c9) VALUES (-1,-1,'a','a','a','a','a',NOW(),100.55);
+SELECT c1,c2,c3,c4,c5,c6,c7,c9,c10,c11 FROM t1_temp WHERE c1 < 0;
+c1	c2	c3	c4	c5	c6	c7	c9	c10	c11
+-1	-1	a	a	a	a	a	100.550	99	test
+SELECT count(*) FROM t1_temp WHERE c10 = 99 and c11 like 'test';
+count(*)
+27
+INSERT INTO t1_temp (c1,c2,c3,c4,c5,c6,c7,c8,c9) VALUES (-1,-1,'a','a','a','a','a',NOW(),100.55) 
+ON DUPLICATE KEY UPDATE c1=-2,c2=-2;
+SELECT c1,c2,c3,c4,c5,c6,c7,c9,c10,c11 FROM t1_temp WHERE c1 < 0;
+c1	c2	c3	c4	c5	c6	c7	c9	c10	c11
+-2	-2	a	a	a	a	a	100.550	99	test
+DROP TABLE t1_1 ,t2_1 ,t3_1,t4_1;
+disconnect con1;
+connection con2;
+DROP TABLE t1_2 ,t2_2 ,t3_2,t4_2;
+disconnect con2;
+connection default;
+connect  con1,localhost,root,,;
+connect  con2,localhost,root,,;
+connection con1;
+call populate_tables('_1');;
+connection con2;
+call populate_tables('_2');;
+"#connection 1 - verify tables"
+connection con1;
+SELECT count(*) FROM t1_1;
+count(*)
+36
+SELECT count(*) FROM t2_1;
+count(*)
+36
+SELECT count(*) FROM t3_1;
+count(*)
+34
+SELECT count(*) FROM t4_1;
+count(*)
+32
+SELECT c1 FROM t1_1;
+c1
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+SELECT c1 FROM t2_1;
+c1
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+SELECT c1 FROM t3_1;
+c1
+5
+6
+7
+8
+9
+10
+13
+14
+15
+16
+17
+18
+19
+20
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+SELECT c1 FROM t4_1;
+c1
+5
+6
+7
+8
+9
+10
+15
+16
+17
+18
+19
+20
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+SELECT count(*) FROM t1_temp;
+count(*)
+26
+SELECT count(*) FROM t2_temp;
+count(*)
+26
+SELECT count(*) FROM t3_temp;
+count(*)
+24
+SELECT count(*) FROM t4_temp;
+count(*)
+22
+SELECT c1 FROM t1_temp;
+c1
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+122
+124
+126
+128
+130
+132
+134
+136
+138
+140
+SELECT c1 FROM t2_temp;
+c1
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+122
+124
+126
+128
+130
+132
+134
+136
+138
+140
+SELECT c1 FROM t3_temp;
+c1
+5
+6
+7
+8
+9
+10
+13
+14
+15
+16
+17
+18
+19
+20
+122
+124
+126
+128
+130
+132
+134
+136
+138
+140
+SELECT c1 FROM t4_temp;
+c1
+5
+6
+7
+8
+9
+10
+15
+16
+17
+18
+19
+20
+122
+124
+126
+128
+130
+132
+134
+136
+138
+140
+DROP TABLE t1_1 ,t2_1 ,t3_1,t4_1;
+disconnect con1;
+"#connection 2 - verify tables"
+connection con2;
+SELECT count(*) FROM t1_2;
+count(*)
+36
+SELECT count(*) FROM t2_2;
+count(*)
+36
+SELECT count(*) FROM t3_2;
+count(*)
+34
+SELECT count(*) FROM t4_2;
+count(*)
+32
+SELECT c1 FROM t1_2;
+c1
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+SELECT c1 FROM t2_2;
+c1
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+SELECT c1 FROM t3_2;
+c1
+5
+6
+7
+8
+9
+10
+13
+14
+15
+16
+17
+18
+19
+20
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+SELECT c1 FROM t4_2;
+c1
+5
+6
+7
+8
+9
+10
+15
+16
+17
+18
+19
+20
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+SELECT count(*) FROM t1_temp;
+count(*)
+26
+SELECT count(*) FROM t2_temp;
+count(*)
+26
+SELECT count(*) FROM t3_temp;
+count(*)
+24
+SELECT count(*) FROM t4_temp;
+count(*)
+22
+SELECT c1 FROM t1_temp;
+c1
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+122
+124
+126
+128
+130
+132
+134
+136
+138
+140
+SELECT c1 FROM t2_temp;
+c1
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+122
+124
+126
+128
+130
+132
+134
+136
+138
+140
+SELECT c1 FROM t3_temp;
+c1
+5
+6
+7
+8
+9
+10
+13
+14
+15
+16
+17
+18
+19
+20
+122
+124
+126
+128
+130
+132
+134
+136
+138
+140
+SELECT c1 FROM t4_temp;
+c1
+5
+6
+7
+8
+9
+10
+15
+16
+17
+18
+19
+20
+122
+124
+126
+128
+130
+132
+134
+136
+138
+140
+DROP TABLE t1_2 ,t2_2 ,t3_2,t4_2;
+disconnect con2;
+connection default;
+connect  con1,localhost,root,,;
+connect  con2,localhost,root,,;
+connection con1;
+call populate_tables('_1');;
+connection con2;
+call populate_tables('_2');;
+"#connection 1 - verify tables"
+connection con1;
+SELECT count(*) FROM t1_1;
+count(*)
+36
+SELECT count(*) FROM t2_1;
+count(*)
+36
+SELECT count(*) FROM t3_1;
+count(*)
+34
+SELECT count(*) FROM t4_1;
+count(*)
+32
+SELECT c1 FROM t1_1;
+c1
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+SELECT c1 FROM t2_1;
+c1
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+SELECT c1 FROM t3_1;
+c1
+5
+6
+7
+8
+9
+10
+13
+14
+15
+16
+17
+18
+19
+20
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+SELECT c1 FROM t4_1;
+c1
+5
+6
+7
+8
+9
+10
+15
+16
+17
+18
+19
+20
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+SELECT count(*) FROM t1_temp;
+count(*)
+26
+SELECT count(*) FROM t2_temp;
+count(*)
+26
+SELECT count(*) FROM t3_temp;
+count(*)
+24
+SELECT count(*) FROM t4_temp;
+count(*)
+22
+SELECT c1 FROM t1_temp;
+c1
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+122
+124
+126
+128
+130
+132
+134
+136
+138
+140
+SELECT c1 FROM t2_temp;
+c1
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+122
+124
+126
+128
+130
+132
+134
+136
+138
+140
+SELECT c1 FROM t3_temp;
+c1
+5
+6
+7
+8
+9
+10
+13
+14
+15
+16
+17
+18
+19
+20
+122
+124
+126
+128
+130
+132
+134
+136
+138
+140
+SELECT c1 FROM t4_temp;
+c1
+5
+6
+7
+8
+9
+10
+15
+16
+17
+18
+19
+20
+122
+124
+126
+128
+130
+132
+134
+136
+138
+140
+DROP TABLE t1_1 ,t2_1 ,t3_1,t4_1;
+disconnect con1;
+"#connection 2 - verify tables"
+connection con2;
+SELECT count(*) FROM t1_2;
+count(*)
+36
+SELECT count(*) FROM t2_2;
+count(*)
+36
+SELECT count(*) FROM t3_2;
+count(*)
+34
+SELECT count(*) FROM t4_2;
+count(*)
+32
+SELECT c1 FROM t1_2;
+c1
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+SELECT c1 FROM t2_2;
+c1
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+SELECT c1 FROM t3_2;
+c1
+5
+6
+7
+8
+9
+10
+13
+14
+15
+16
+17
+18
+19
+20
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+SELECT c1 FROM t4_2;
+c1
+5
+6
+7
+8
+9
+10
+15
+16
+17
+18
+19
+20
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+SELECT count(*) FROM t1_temp;
+count(*)
+26
+SELECT count(*) FROM t2_temp;
+count(*)
+26
+SELECT count(*) FROM t3_temp;
+count(*)
+24
+SELECT count(*) FROM t4_temp;
+count(*)
+22
+SELECT c1 FROM t1_temp;
+c1
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+122
+124
+126
+128
+130
+132
+134
+136
+138
+140
+SELECT c1 FROM t2_temp;
+c1
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+122
+124
+126
+128
+130
+132
+134
+136
+138
+140
+SELECT c1 FROM t3_temp;
+c1
+5
+6
+7
+8
+9
+10
+13
+14
+15
+16
+17
+18
+19
+20
+122
+124
+126
+128
+130
+132
+134
+136
+138
+140
+SELECT c1 FROM t4_temp;
+c1
+5
+6
+7
+8
+9
+10
+15
+16
+17
+18
+19
+20
+122
+124
+126
+128
+130
+132
+134
+136
+138
+140
+DROP TABLE t1_2 ,t2_2 ,t3_2,t4_2;
+disconnect con2;
+connection default;
+DROP PROCEDURE populate_tables;
diff --git a/mysql-test/suite/innodb_zip/t/16k-master.opt b/mysql-test/suite/innodb_zip/t/16k-master.opt
new file mode 100644
index 00000000000..82f574a8039
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/t/16k-master.opt
@@ -0,0 +1,3 @@
+--loose-innodb-sys-indexes
+--loose-innodb-sys-tablespaces
+--loose-innodb-sys-datafiles
diff --git a/mysql-test/suite/innodb_zip/t/16k.test b/mysql-test/suite/innodb_zip/t/16k.test
new file mode 100644
index 00000000000..884a729410b
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/t/16k.test
@@ -0,0 +1,718 @@
+# Tests for setting innodb-page-size=16k; default value
+--source include/big_test.inc
+--source include/have_innodb.inc
+--source include/have_innodb_16k.inc
+SET default_storage_engine=InnoDB;
+
+--disable_query_log
+let $MYSQLD_DATADIR = `select @@datadir`;
+let $INNODB_PAGE_SIZE = `select @@innodb_page_size`;
+
+call mtr.add_suppression("Cannot add field .* in table .* because after adding it, the row size is");
+# These values can change during the test
+--enable_query_log
+
+--echo # Test 1) Show the page size from Information Schema
+--disable_warnings
+SELECT variable_value FROM information_schema.global_status
+       WHERE LOWER(variable_name) = 'innodb_page_size';
+--enable_warnings
+
+--echo # Test 2) The number of buffer pool pages is dependent upon the page size.
+#
+# buffer pool pages is dependent upon buffer pool size and what other
+# tests are run concurrently
+#--disable_warnings
+#--replace_result 1535 {checked_valid} 1536 {checked_valid}
+#SELECT variable_value FROM information_schema.global_status
+#       WHERE LOWER(variable_name) = 'innodb_buffer_pool_pages_total';
+#--enable_warnings
+
+--echo # Test 3) Query some information_shema tables that are dependent upon
+--echo #         the page size.
+# Show the metadata for tables in schema 'mysql'.
+# Pulled from innodb-system-table-view.test
+# The IDs of mysql.innodb_table_stats and mysql.innodb_index_stats are
+# unpredictable. They depend on whether mtr has created the database for
+# this test from scratch or is using a previously created database where
+# those tables have been dropped and recreated. Since we cannot force mtr
+# to use a freshly created database for this test we do not return the
+# table or index IDs. We can return the space IS of mysql schema tables
+# since they are created consistently during bootstrap.
+SELECT	t.name table_name, t.n_cols, t.flag table_flags,
+	i.name index_name, i.page_no root_page, i.type,
+	i.n_fields, i.merge_threshold
+	FROM	INFORMATION_SCHEMA.INNODB_SYS_TABLES  t,
+		INFORMATION_SCHEMA.INNODB_SYS_INDEXES i
+	WHERE	t.table_id = i.table_id
+	AND	t.name LIKE 'mysql%'
+	ORDER BY t.name, i.index_id;
+
+CREATE TABLE t1 (a INT KEY, b TEXT) ROW_FORMAT=REDUNDANT ENGINE=innodb;
+CREATE TABLE t2 (a INT KEY, b TEXT) ROW_FORMAT=COMPACT ENGINE=innodb;
+CREATE TABLE t3 (a INT KEY, b TEXT) ROW_FORMAT=COMPRESSED ENGINE=innodb;
+CREATE TABLE t4 (a INT KEY, b TEXT) ROW_FORMAT=DYNAMIC ENGINE=innodb;
+
+# Show the metadata for tables in schema 'test'.
+# Do not return the space ID since this tablespace may have existed before
+# this test runs.  The root page number of each index should be consistent
+# within a file-per-table tablespace.
+SELECT	t.name table_name, t.n_cols, t.flag table_flags,
+	i.name index_name, i.page_no root_page, i.type,
+	i.n_fields, i.merge_threshold
+	FROM	INFORMATION_SCHEMA.INNODB_SYS_TABLES  t,
+		INFORMATION_SCHEMA.INNODB_SYS_INDEXES i
+	WHERE	t.table_id = i.table_id
+	AND	t.name LIKE 'test%'
+	ORDER BY t.name, i.name;
+--source suite/innodb/include/show_i_s_tablespaces.inc
+DROP TABLE t1, t2, t3, t4;
+
+--echo # Test 4) The maximum row size is dependent upon the page size.
+--echo #         Redundant: 8123, Compact: 8126.
+--echo #         Compressed: 8126, Dynamic: 8126.
+--echo #         Each row format has its own amount of overhead that
+--echo #         varies depending on number of fields and other overhead.
+
+SET SESSION innodb_strict_mode = ON;
+
+# Redundant table; 8011 bytes with 40 char fields
+CREATE TABLE t1 (
+c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200),
+c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200),
+c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200),
+c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(200),
+c21 char(200), c22 char(200), c23 char(200), c24 char(200), c25 char(200),
+c26 char(200), c27 char(200), c28 char(200), c29 char(200), c30 char(200),
+c31 char(200), c32 char(200), c33 char(200), c34 char(200), c35 char(200),
+c36 char(200), c37 char(200), c38 char(200), c39 char(200), c40 char(211)
+) ROW_FORMAT=redundant;
+DROP TABLE t1;
+--error ER_TOO_BIG_ROWSIZE
+CREATE TABLE t1 (
+c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200),
+c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200),
+c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200),
+c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(200),
+c21 char(200), c22 char(200), c23 char(200), c24 char(200), c25 char(200),
+c26 char(200), c27 char(200), c28 char(200), c29 char(200), c30 char(200),
+c31 char(200), c32 char(200), c33 char(200), c34 char(200), c35 char(200),
+c36 char(200), c37 char(200), c38 char(200), c39 char(200), c40 char(212)
+) ROW_FORMAT=redundant;
+
+# Compact table; 8096 bytes with 40 CHAR fields
+CREATE TABLE t1 (
+c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200),
+c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200),
+c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200),
+c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(200),
+c21 char(200), c22 char(200), c23 char(200), c24 char(200), c25 char(200),
+c26 char(200), c27 char(200), c28 char(200), c29 char(200), c30 char(200),
+c31 char(200), c32 char(200), c33 char(200), c34 char(200), c35 char(200),
+c36 char(200), c37 char(200), c38 char(200), c39 char(250), c40 char(246)
+) ROW_FORMAT=compact;
+DROP TABLE t1;
+--error ER_TOO_BIG_ROWSIZE
+CREATE TABLE t1 (
+c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200),
+c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200),
+c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200),
+c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(200),
+c21 char(200), c22 char(200), c23 char(200), c24 char(200), c25 char(200),
+c26 char(200), c27 char(200), c28 char(200), c29 char(200), c30 char(200),
+c31 char(200), c32 char(200), c33 char(200), c34 char(200), c35 char(200),
+c36 char(200), c37 char(200), c38 char(200), c39 char(250), c40 char(247)
+) ROW_FORMAT=compact;
+
+# Compressed table; 7959 bytes with 40 CHAR fields
+# Bug#13391353 Limit is 7957 on 32-Linux only
+CREATE TABLE t1 (
+c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200),
+c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200),
+c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200),
+c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(200),
+c21 char(200), c22 char(200), c23 char(200), c24 char(200), c25 char(200),
+c26 char(200), c27 char(200), c28 char(200), c29 char(200), c30 char(200),
+c31 char(200), c32 char(200), c33 char(200), c34 char(200), c35 char(200),
+c36 char(200), c37 char(200), c38 char(200), c39 char(200), c40 char(157)
+) ROW_FORMAT=compressed;
+DROP TABLE t1;
+--error ER_TOO_BIG_ROWSIZE
+CREATE TABLE t1 (
+c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200),
+c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200),
+c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200),
+c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(200),
+c21 char(200), c22 char(200), c23 char(200), c24 char(200), c25 char(200),
+c26 char(200), c27 char(200), c28 char(200), c29 char(200), c30 char(200),
+c31 char(200), c32 char(200), c33 char(200), c34 char(200), c35 char(200),
+c36 char(200), c37 char(200), c38 char(200), c39 char(200), c40 char(160)
+) ROW_FORMAT=compressed;
+
+# Dynamic table; 8096 bytes with 40 CHAR fields
+CREATE TABLE t1 (
+c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200),
+c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200),
+c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200),
+c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(200),
+c21 char(200), c22 char(200), c23 char(200), c24 char(200), c25 char(200),
+c26 char(200), c27 char(200), c28 char(200), c29 char(200), c30 char(200),
+c31 char(200), c32 char(200), c33 char(200), c34 char(200), c35 char(200),
+c36 char(200), c37 char(200), c38 char(200), c39 char(250), c40 char(246)
+) ROW_FORMAT=dynamic;
+DROP TABLE t1;
+--error ER_TOO_BIG_ROWSIZE
+CREATE TABLE t1 (
+c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200),
+c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200),
+c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200),
+c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(200),
+c21 char(200), c22 char(200), c23 char(200), c24 char(200), c25 char(200),
+c26 char(200), c27 char(200), c28 char(200), c29 char(200), c30 char(200),
+c31 char(200), c32 char(200), c33 char(200), c34 char(200), c35 char(200),
+c36 char(200), c37 char(200), c38 char(200), c39 char(250), c40 char(247)
+) ROW_FORMAT=dynamic;
+
+#
+# Test the maximum key length
+# Moved from innodb-index.test since each page size has its own max key length.
+# Max Key Length is 3072 for 16k pages.
+# Max key Part length is 767
+# InnoDB assumes 3 bytes for each UTF8 character.
+#
+CREATE TABLE t1 (a varchar(255) character set utf8,
+                 b varchar(255) character set utf8,
+                 c varchar(255) character set utf8,
+                 d varchar(255) character set utf8,
+                 e varchar(4) character set utf8,
+                 PRIMARY KEY (a,b,c,d,e))
+		 ENGINE=innodb;
+DROP TABLE t1;
+--error ER_TOO_LONG_KEY
+CREATE TABLE t1 (a varchar(255) character set utf8,
+                 b varchar(255) character set utf8,
+                 c varchar(255) character set utf8,
+                 d varchar(255) character set utf8,
+                 e varchar(5) character set utf8,
+                 PRIMARY KEY (a,b,c,d,e))
+		 ENGINE=innodb;
+CREATE TABLE t1 (a varchar(255) character set utf8,
+                 b varchar(255) character set utf8,
+                 c varchar(255) character set utf8,
+                 d varchar(255) character set utf8,
+                 e varchar(255) character set utf8,
+                 f varchar(4)   character set utf8,
+                 PRIMARY KEY (a), KEY (b,c,d,e,f))
+		 ENGINE=innodb;
+DROP TABLE t1;
+--error ER_TOO_LONG_KEY
+CREATE TABLE t1 (a varchar(255) character set utf8,
+                 b varchar(255) character set utf8,
+                 c varchar(255) character set utf8,
+                 d varchar(255) character set utf8,
+                 e varchar(255) character set utf8,
+                 f varchar(5)   character set utf8,
+                 PRIMARY KEY (a), KEY (b,c,d,e,f))
+		 ENGINE=innodb;
+
+--echo # Test 5) Make sure that KEY_BLOCK_SIZE=16, 8, 4, 2 & 1
+--echo #         are all accepted.
+
+SET SESSION innodb_strict_mode = ON;
+
+CREATE TABLE t1 (i int) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=16;
+SHOW WARNINGS;
+SELECT table_name, row_format, create_options
+   FROM information_schema.tables WHERE table_name = 't1';
+
+ALTER TABLE t1 KEY_BLOCK_SIZE=8;
+SHOW WARNINGS;
+SELECT table_name, row_format, create_options
+   FROM information_schema.tables WHERE table_name = 't1';
+
+ALTER TABLE t1 KEY_BLOCK_SIZE=4;
+SHOW WARNINGS;
+SELECT table_name, row_format, create_options
+   FROM information_schema.tables WHERE table_name = 't1';
+
+ALTER TABLE t1 KEY_BLOCK_SIZE=2;
+SHOW WARNINGS;
+SELECT table_name, row_format, create_options
+   FROM information_schema.tables WHERE table_name = 't1';
+
+ALTER TABLE t1 KEY_BLOCK_SIZE=1;
+SHOW WARNINGS;
+SELECT table_name, row_format, create_options
+   FROM information_schema.tables WHERE table_name = 't1';
+
+ALTER TABLE t1 KEY_BLOCK_SIZE=0;
+SHOW WARNINGS;
+SELECT table_name, row_format, create_options
+   FROM information_schema.tables WHERE table_name = 't1';
+DROP TABLE t1;
+
+SET SESSION innodb_strict_mode = OFF;
+
+CREATE TABLE t1 (i int) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=16;
+SHOW WARNINGS;
+SELECT table_name, row_format, create_options
+   FROM information_schema.tables WHERE table_name = 't1';
+
+ALTER TABLE t1 KEY_BLOCK_SIZE=8;
+SHOW WARNINGS;
+SELECT table_name, row_format, create_options
+   FROM information_schema.tables WHERE table_name = 't1';
+
+ALTER TABLE t1 KEY_BLOCK_SIZE=4;
+SHOW WARNINGS;
+SELECT table_name, row_format, create_options
+   FROM information_schema.tables WHERE table_name = 't1';
+
+ALTER TABLE t1 KEY_BLOCK_SIZE=2;
+SHOW WARNINGS;
+SELECT table_name, row_format, create_options
+   FROM information_schema.tables WHERE table_name = 't1';
+
+ALTER TABLE t1 KEY_BLOCK_SIZE=1;
+SHOW WARNINGS;
+SELECT table_name, row_format, create_options
+   FROM information_schema.tables WHERE table_name = 't1';
+
+ALTER TABLE t1 KEY_BLOCK_SIZE=0;
+SHOW WARNINGS;
+SELECT table_name, row_format, create_options
+   FROM information_schema.tables WHERE table_name = 't1';
+DROP TABLE t1;
+
+
+--echo # Test 6) Make sure that KEY_BLOCK_SIZE = 8 and 16
+--echo # are rejected when innodb_file_per_table=OFF
+# Moved from innodb-zip.test
+SET SESSION innodb_strict_mode = ON;
+SET GLOBAL innodb_file_per_table = OFF;
+SHOW VARIABLES LIKE 'innodb_file_per_table';
+--error ER_ILLEGAL_HA,1005
+CREATE TABLE t4 (id int PRIMARY KEY) ENGINE=innodb KEY_BLOCK_SIZE=8;
+SHOW WARNINGS;
+--error ER_ILLEGAL_HA,1005
+CREATE TABLE t5 (id int PRIMARY KEY) ENGINE=innodb KEY_BLOCK_SIZE=16;
+SHOW WARNINGS;
+SET GLOBAL innodb_file_per_table = ON;
+SET GLOBAL innodb_file_format = `Antelope`;
+--error ER_ILLEGAL_HA,1005
+CREATE TABLE t4 (id int PRIMARY KEY) ENGINE=innodb KEY_BLOCK_SIZE=8;
+SHOW WARNINGS;
+--error ER_ILLEGAL_HA,1005
+CREATE TABLE t5 (id int PRIMARY KEY) ENGINE=innodb KEY_BLOCK_SIZE=16;
+SHOW WARNINGS;
+SET GLOBAL innodb_file_format = `Barracuda`;
+
+
+--echo # Test 7) This series of tests were moved from innodb-index to here
+--echo # because the second alter table t1 assumes a 16k page size.
+--echo # Moving the test allows the rest of innodb-index to be run on all
+--echo # page sizes.  The previously disabled portions of this test were
+--echo # moved as well.
+
+CREATE TABLE t2(d varchar(17) PRIMARY KEY) ENGINE=innodb DEFAULT CHARSET=utf8;
+CREATE TABLE t3(a int PRIMARY KEY) ENGINE=innodb;
+
+INSERT INTO t3 VALUES (22),(44),(33),(55),(66);
+
+INSERT INTO t2 VALUES ('jejdkrun87'),('adfd72nh9k'),
+('adfdpplkeock'),('adfdijnmnb78k'),('adfdijn0loKNHJik');
+
+CREATE TABLE t1(a int, b blob, c text, d text NOT NULL)
+ENGINE=innodb DEFAULT CHARSET=utf8 STATS_PERSISTENT=0;
+
+INSERT INTO t1
+SELECT a,LEFT(REPEAT(d,100*a),65535),REPEAT(d,20*a),d FROM t2,t3 order by a, d;
+DROP TABLE t2, t3;
+SELECT COUNT(*) FROM t1 WHERE a=44;
+SELECT a,
+LENGTH(b),b=LEFT(REPEAT(d,100*a),65535),LENGTH(c),c=REPEAT(d,20*a),d FROM t1
+ORDER BY 1, 2, 3, 4, 5, 6;
+# in-place alter table should trigger ER_PRIMARY_CANT_HAVE_NULL
+--error ER_DUP_ENTRY
+ALTER TABLE t1 ADD PRIMARY KEY (a), ADD KEY (b(20));
+DELETE FROM t1 WHERE d='null';
+--error ER_DUP_ENTRY
+ALTER TABLE t1 ADD PRIMARY KEY (a), ADD KEY (b(20));
+DELETE FROM t1 WHERE a%2;
+CHECK TABLE t1;
+# NULL -> NOT NULL only allowed INPLACE if strict sql_mode is on.
+# And adding a PRIMARY KEY will also add NOT NULL implicitly!
+ALTER TABLE t1 ADD PRIMARY KEY (a,b(255),c(255)), ADD KEY (b(767));
+SELECT COUNT(*) FROM t1 WHERE a=44;
+SELECT a,
+LENGTH(b), b=LEFT(REPEAT(d,100*a), 65535),LENGTH(c), c=REPEAT(d,20*a), d FROM t1;
+SHOW CREATE TABLE t1;
+CHECK TABLE t1;
+EXPLAIN SELECT * FROM t1 WHERE b LIKE 'adfd%';
+
+# The following tests are disabled because of the introduced timeouts for
+# metadata locks at the MySQL level as part of the fix for
+# Bug#45225 Locking: hang if drop table with no timeout
+# The following commands now play with MySQL metadata locks instead of
+# InnoDB locks
+# start disabled45225_1
+##
+## Test locking
+##
+#
+#CREATE TABLE t2(a int, b varchar(255), PRIMARY KEY(a,b)) ENGINE=innodb;
+#INSERT INTO t2 SELECT a,LEFT(b,255) FROM t1;
+#DROP TABLE t1;
+#RENAME TABLE t2 to t1;
+#
+#connect (a,localhost,root,,);
+#connect (b,localhost,root,,);
+#connection a;
+#SET innodb_lock_wait_timeout=1;
+#begin;
+## Obtain an IX lock on the table
+#SELECT a FROM t1 limit 1 FOR UPDATE;
+#connection b;
+#SET innodb_lock_wait_timeout=1;
+## This would require an S lock on the table, conflicting with the IX lock.
+#--error ER_LOCK_WAIT_TIMEOUT
+#CREATE INDEX t1ba ON t1 (b,a);
+#connection a;
+#commit;
+#begin;
+## Obtain an IS lock on the table
+#SELECT a FROM t1 limit 1 lock in share mode;
+#connection b;
+## This will require an S lock on the table.  No conflict with the IS lock.
+#CREATE INDEX t1ba ON t1 (b,a);
+## This would require an X lock on the table, conflicting with the IS lock.
+#--error ER_LOCK_WAIT_TIMEOUT
+#DROP INDEX t1ba ON t1;
+#connection a;
+#commit;
+#EXPLAIN SELECT a FROM t1 ORDER BY b;
+#--send
+#SELECT a,sleep(2+a/100) FROM t1 ORDER BY b limit 3;
+#
+## The following DROP INDEX will succeed, altough the SELECT above has
+## opened a read view.  However, during the execution of the SELECT,
+## MySQL should hold a table lock that should block the execution
+## of the DROP INDEX below.
+#
+#connection b;
+#SELECT sleep(1);
+#DROP INDEX t1ba ON t1;
+#
+## After the index was dropped, subsequent SELECTs will use the same
+## read view, but they should not be accessing the dropped index any more.
+#
+#connection a;
+#reap;
+#EXPLAIN SELECT a FROM t1 ORDER BY b;
+#SELECT a FROM t1 ORDER BY b limit 3;
+#commit;
+#
+#connection default;
+#disconnect a;
+#disconnect b;
+#
+# end disabled45225_1
+DROP TABLE t1;
+
+--echo # Test 8) Test creating a table that could lead to undo log overflow.
+CREATE TABLE t1(a blob,b blob,c blob,d blob,e blob,f blob,g blob,
+                h blob,i blob,j blob,k blob,l blob,m blob,n blob,
+		o blob,p blob,q blob,r blob,s blob,t blob,u blob)
+		ENGINE=InnoDB ROW_FORMAT=dynamic;
+SET @a = repeat('a', 767);
+SET @b = repeat('b', 767);
+SET @c = repeat('c', 767);
+SET @d = repeat('d', 767);
+SET @e = repeat('e', 767);
+
+# With no indexes defined, we can update all columns to max key part length.
+INSERT INTO t1 VALUES (@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a);
+UPDATE t1 SET a=@b,b=@b,c=@b,d=@b,e=@b,f=@b,g=@b,h=@b,i=@b,j=@b,
+              k=@b,l=@b,m=@b,n=@b,o=@b,p=@b,q=@b,r=@b,s=@b,t=@b,u=@b;
+
+# With this many indexes defined, we can still update all fields.
+CREATE INDEX t1a ON t1 (a(767));
+CREATE INDEX t1b ON t1 (b(767));
+CREATE INDEX t1c ON t1 (c(767));
+CREATE INDEX t1d ON t1 (d(767));
+CREATE INDEX t1e ON t1 (e(767));
+UPDATE t1 SET a=@c,b=@c,c=@c,d=@c,e=@c,f=@c,g=@c,h=@c,i=@c,j=@c,
+              k=@c,l=@c,m=@c,n=@c,o=@c,p=@c,q=@c,r=@c,s=@c,t=@c,u=@c;
+
+# Add one more index and the UNDO record becomes too big to update all columns.
+# But a single transaction can update the columns in separate statements.
+# because the UNDO records will be smaller.
+CREATE INDEX t1f ON t1 (f(767));
+--error ER_UNDO_RECORD_TOO_BIG
+UPDATE t1 SET a=@d,b=@d,c=@d,d=@d,e=@d,f=@d,g=@d,h=@d,i=@d,j=@d,
+              k=@d,l=@d,m=@d,n=@d,o=@d,p=@d,q=@d,r=@d,s=@d,t=@d,u=@d;
+BEGIN;
+UPDATE t1 SET a=@d,b=@d,c=@d,d=@d,e=@d;
+UPDATE t1 SET f=@d,g=@d,h=@d,i=@d,j=@d,k=@d,l=@d,m=@d,
+              n=@d,o=@d,p=@d,q=@d,r=@d,s=@d,t=@d,u=@d;
+COMMIT;
+
+# More indexes can still be added and a single field can still be updated
+CREATE INDEX t1g ON t1 (g(767));
+UPDATE t1 SET g=@e;
+CREATE INDEX t1h ON t1 (h(767));
+UPDATE t1 SET h=@e;
+CREATE INDEX t1i ON t1 (i(767));
+UPDATE t1 SET i=@e;
+CREATE INDEX t1j ON t1 (j(767));
+UPDATE t1 SET j=@e;
+CREATE INDEX t1k ON t1 (k(767));
+UPDATE t1 SET k=@e;
+CREATE INDEX t1l ON t1 (l(767));
+UPDATE t1 SET l=@e;
+CREATE INDEX t1m ON t1 (m(767));
+UPDATE t1 SET m=@e;
+CREATE INDEX t1n ON t1 (n(767));
+UPDATE t1 SET n=@e;
+CREATE INDEX t1o ON t1 (o(767));
+UPDATE t1 SET o=@e;
+CREATE INDEX t1p ON t1 (p(767));
+UPDATE t1 SET p=@e;
+CREATE INDEX t1q ON t1 (q(767));
+UPDATE t1 SET q=@e;
+CREATE INDEX t1r ON t1 (r(767));
+UPDATE t1 SET r=@e;
+CREATE INDEX t1s ON t1 (s(767));
+UPDATE t1 SET s=@e;
+
+# Add one more index and we cannot update a column to its defined index length.
+# This is a problem.  It means that the DDL is allowed to create a table
+# that CANNOT be updated.  See bug#12953735.
+CREATE INDEX t1t ON t1 (t(767));
+--error ER_UNDO_RECORD_TOO_BIG
+UPDATE t1 SET t=@e;
+
+CREATE INDEX t1u ON t1 (u(767));
+CREATE INDEX t1ut ON t1 (u(767), t(767));
+CREATE INDEX t1st ON t1 (s(767), t(767));
+
+SHOW CREATE TABLE t1;
+DROP TABLE t1;
+
+--echo # Bug #12429576 - Test an assertion failure on purge.
+# This test is not in innodb_8k or innodb_4k since the bug is not about
+# page size.  It just tests the condition that caused the assertion.
+CREATE TABLE t1_purge (
+A int,
+B blob, C blob, D blob, E blob,
+F blob, G blob, H blob,
+PRIMARY KEY (B(767), C(767), D(767), E(767), A),
+INDEX (A)
+) ENGINE=InnoDB ROW_FORMAT=DYNAMIC;
+
+INSERT INTO t1_purge VALUES (1,
+REPEAT('b', 766), REPEAT('c', 766), REPEAT('d', 766), REPEAT('e', 766),
+REPEAT('f', 766), REPEAT('g', 766), REPEAT('h', 766));
+
+CREATE TABLE t2_purge (
+A int PRIMARY KEY,
+B blob, C blob, D blob, E blob,
+F blob, G blob, H blob, I blob,
+J blob, K blob, L blob,
+INDEX (B(767))) ENGINE=InnoDB ROW_FORMAT=DYNAMIC;
+
+INSERT INTO t2_purge VALUES (1,
+REPEAT('b', 766), REPEAT('c', 766), REPEAT('d', 766), REPEAT('e', 766),
+REPEAT('f', 766), REPEAT('g', 766), REPEAT('h', 766), REPEAT('i', 766),
+REPEAT('j', 766), REPEAT('k', 766), REPEAT('l', 766));
+
+CREATE TABLE t3_purge (
+A int,
+B varchar(800), C varchar(800), D varchar(800), E varchar(800),
+F varchar(800), G varchar(800), H varchar(800),
+PRIMARY KEY (B(767), C(767), D(767), E(767), A),
+INDEX (A)
+) ENGINE=InnoDB ROW_FORMAT=DYNAMIC;
+
+INSERT INTO t3_purge SELECT * FROM t1_purge;
+
+CREATE TABLE t4_purge (
+A int PRIMARY KEY,
+B varchar(800), C varchar(800), D varchar(800), E varchar(800),
+F varchar(800), G varchar(800), H varchar(800), I varchar(800),
+J varchar(800), K varchar(800), L varchar(800),
+INDEX (B(767))) ENGINE=InnoDB ROW_FORMAT=DYNAMIC;
+
+INSERT INTO t4_purge SELECT * FROM t2_purge;
+
+# This would trigger the failure (Bug #12429576)
+# if purge gets a chance to run before DROP TABLE t1_purge, ....
+DELETE FROM t1_purge;
+DELETE FROM t2_purge;
+DELETE FROM t3_purge;
+DELETE FROM t4_purge;
+# We need to activate the purge thread.
+# Instead of doing a --sleep 10 now,  do it once at the end.
+
+# Bug#12637786 - Assertion hit; ut_ad(dict_index_is_clust(index));
+# A secondary index tuple is found to be too long to fit into a page.
+# This test is not in innodb_8k or innodb_4k since the bug is not about
+# page size.  It just tests the condition that caused the assertion.
+SET @r=REPEAT('a',500);
+CREATE TABLE t12637786(a int,
+ v1 varchar(500), v2 varchar(500), v3 varchar(500),
+ v4 varchar(500), v5 varchar(500), v6 varchar(500),
+ v7 varchar(500), v8 varchar(500), v9 varchar(500),
+ v10 varchar(500), v11 varchar(500), v12 varchar(500),
+ v13 varchar(500), v14 varchar(500), v15 varchar(500),
+ v16 varchar(500), v17 varchar(500), v18 varchar(500)
+) ENGINE=InnoDB ROW_FORMAT=DYNAMIC;
+CREATE INDEX idx1 ON t12637786(a,v1);
+INSERT INTO t12637786 VALUES(9,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r);
+UPDATE t12637786 SET a=1000;
+DELETE FROM t12637786;
+# We need to activate the purge thread to make sure it does not assert and
+# is able to clean up the old versions of secondary index entries.
+# Instead of doing a --sleep 10 now for each test,  do it once at the end.
+
+--echo # Bug#12963823 - Test that the purge thread does not crash when
+# the number of indexes has changed since the UNDO record was logged.
+# This test is not in innodb_8k or innodb_4k since the bug is not about
+# page size.  It just tests the condition that caused the crash.
+CREATE TABLE t12963823(a blob,b blob,c blob,d blob,e blob,f blob,g blob,h blob,
+		       i blob,j blob,k blob,l blob,m blob,n blob,o blob,p blob)
+	ENGINE=innodb ROW_FORMAT=dynamic;
+SET @r = REPEAT('a', 767);
+INSERT INTO t12963823 VALUES (@r,@r,@r,@r, @r,@r,@r,@r, @r,@r,@r,@r, @r,@r,@r,@r);
+CREATE INDEX ndx_a ON t12963823 (a(500));
+CREATE INDEX ndx_b ON t12963823 (b(500));
+CREATE INDEX ndx_c ON t12963823 (c(500));
+CREATE INDEX ndx_d ON t12963823 (d(500));
+CREATE INDEX ndx_e ON t12963823 (e(500));
+CREATE INDEX ndx_f ON t12963823 (f(500));
+CREATE INDEX ndx_k ON t12963823 (k(500));
+CREATE INDEX ndx_l ON t12963823 (l(500));
+
+SET @r = REPEAT('b', 500);
+UPDATE t12963823 set a=@r,b=@r,c=@r,d=@r;
+UPDATE t12963823 set e=@r,f=@r,g=@r,h=@r;
+UPDATE t12963823 set i=@r,j=@r,k=@r,l=@r;
+UPDATE t12963823 set m=@r,n=@r,o=@r,p=@r;
+ALTER TABLE t12963823 DROP INDEX ndx_a;
+ALTER TABLE t12963823 DROP INDEX ndx_b;
+CREATE INDEX ndx_g ON t12963823 (g(500));
+CREATE INDEX ndx_h ON t12963823 (h(500));
+CREATE INDEX ndx_i ON t12963823 (i(500));
+CREATE INDEX ndx_j ON t12963823 (j(500));
+CREATE INDEX ndx_m ON t12963823 (m(500));
+CREATE INDEX ndx_n ON t12963823 (n(500));
+CREATE INDEX ndx_o ON t12963823 (o(500));
+CREATE INDEX ndx_p ON t12963823 (p(500));
+SHOW CREATE TABLE t12963823;
+# We need to activate the purge thread at this point to see if it crashes.
+# Instead of doing a --sleep 10 now for each test,  do it once at the end.
+
+--echo # Bug#12547647 UPDATE LOGGING COULD EXCEED LOG PAGE SIZE
+# InnoDB cannot know that this undo record would be too big for the undo
+# page. Too much of text field is stored in the clustered record in this
+# DYNAMIC row formatted record.
+# This test is not in innodb_8k or innodb_4k since the bug is not about
+# page size.  It just tests the condition that caused the hang.
+
+SET SESSION innodb_strict_mode = ON;
+CREATE TABLE bug12547647(
+a int NOT NULL, b blob NOT NULL, c text,
+PRIMARY KEY (b(10), a), INDEX (c(767)), INDEX(b(767))
+) ENGINE=InnoDB ROW_FORMAT=DYNAMIC;
+INSERT INTO bug12547647 VALUES (5,REPEAT('khdfo5AlOq',1900),REPEAT('g',7751));
+COMMIT;
+# The following used to cause a hang while doing infinite undo log allocation.
+--error ER_UNDO_RECORD_TOO_BIG
+UPDATE bug12547647 SET c = REPEAT('b',16928);
+SHOW WARNINGS;
+DROP TABLE bug12547647;
+
+# The following should fail in non-strict mode too.
+# (The fix of Bug #50945 only affects REDUNDANT and COMPACT tables.)
+SET SESSION innodb_strict_mode = off;
+CREATE TABLE t1(
+	c text NOT NULL, d text NOT NULL,
+	PRIMARY KEY (c(767),d(767)))
+ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1 CHARSET=ASCII;
+DROP TABLE t1;
+CREATE TABLE t1(
+	c text NOT NULL, d text NOT NULL,
+	PRIMARY KEY (c(767),d(767)))
+ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=2 CHARSET=ASCII;
+DROP TABLE t1;
+CREATE TABLE t1(
+	c text NOT NULL, d text NOT NULL,
+	PRIMARY KEY (c(767),d(767)))
+ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4 CHARSET=ASCII;
+drop table t1;
+CREATE TABLE t1(c text, PRIMARY KEY (c(440)))
+ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1 CHARSET=ASCII;
+DROP TABLE t1;
+CREATE TABLE t1(c text, PRIMARY KEY (c(438)))
+ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1 CHARSET=ASCII;
+INSERT INTO t1 VALUES(REPEAT('A',512)),(REPEAT('B',512));
+DROP TABLE t1;
+
+
+--echo #
+--echo # Bug#56862 Execution of a query that uses index merge returns a wrong result
+--echo #
+
+# Moved to here from innodb_mysql.test.  Some PB3 systems sporadically
+# had timeouts doing this with smaller page sizes.
+
+CREATE TABLE t1 (
+  pk int NOT NULL AUTO_INCREMENT PRIMARY KEY,
+  a int,
+  b int,
+  INDEX idx(a))
+ENGINE=INNODB;
+
+INSERT INTO t1(a,b) VALUES
+  (11, 1100), (2, 200), (1, 100), (14, 1400), (5, 500),
+  (3, 300), (17, 1700), (4, 400), (12, 1200), (8, 800),
+  (6, 600), (18, 1800), (9, 900), (10, 1000), (7, 700),
+  (13, 1300), (15, 1500), (19, 1900), (16, 1600), (20, 2000);
+INSERT INTO t1(a,b) SELECT a+20, b+2000 FROM t1;
+INSERT INTO t1(a,b) SELECT a+40, b+4000 FROM t1;
+INSERT INTO t1(a,b) SELECT a+80, b+8000 FROM t1;
+INSERT INTO t1(a,b) SELECT a,b FROM t1;
+INSERT INTO t1(a,b) SELECT a,b FROM t1;
+INSERT INTO t1(a,b) SELECT a,b FROM t1;
+INSERT INTO t1(a,b) SELECT a,b FROM t1;
+INSERT INTO t1(a,b) SELECT a,b FROM t1;
+INSERT INTO t1(a,b) SELECT a,b FROM t1;
+INSERT INTO t1(a,b) SELECT a,b FROM t1;
+INSERT INTO t1(a,b) SELECT a,b FROM t1;
+INSERT INTO t1 VALUES (1000000, 0, 0);
+
+set @optimizer_switch_saved=@@optimizer_switch;
+SET SESSION optimizer_switch='derived_merge=off';
+SET SESSION sort_buffer_size = 1024*36;
+
+EXPLAIN
+SELECT COUNT(*) FROM
+  (SELECT * FROM t1 FORCE INDEX (idx,PRIMARY)
+     WHERE a BETWEEN 2 AND 7 OR pk=1000000) AS t;
+
+SELECT COUNT(*) FROM
+  (SELECT * FROM t1 FORCE INDEX (idx,PRIMARY)
+     WHERE a BETWEEN 2 AND 7 OR pk=1000000) AS t;
+
+set @@optimizer_switch=@optimizer_switch_saved;
+SET SESSION sort_buffer_size = DEFAULT;
+
+DROP TABLE t1;
+
+
+# The tests that uses these tables required the purge thread to run.
+# Just in case it has not by now, provide a 10 second wait.
+--sleep 10
+DROP TABLE t1_purge, t2_purge, t3_purge, t4_purge;
+DROP TABLE t12637786;
+DROP TABLE t12963823;
diff --git a/mysql-test/suite/innodb_zip/t/4k.test b/mysql-test/suite/innodb_zip/t/4k.test
new file mode 100644
index 00000000000..6226c4abcee
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/t/4k.test
@@ -0,0 +1,440 @@
+# Tests for setting innodb-page-size=4k
+
+--source include/have_innodb.inc
+--source include/have_innodb_4k.inc
+SET default_storage_engine=InnoDB;
+
+--disable_query_log
+let $MYSQLD_DATADIR = `select @@datadir`;
+let $INNODB_PAGE_SIZE = `select @@innodb_page_size`;
+
+call mtr.add_suppression("Cannot add field .* in table .* because after adding it, the row size is");
+--enable_query_log
+
+--echo # Test 1) Show the page size from Information Schema
+--disable_warnings
+SELECT variable_value FROM information_schema.global_status
+       WHERE LOWER(variable_name) = 'innodb_page_size';
+--enable_warnings
+
+--echo # Test 2) The number of buffer pool pages is dependent upon the page size.
+--disable_warnings
+--replace_result 6144 {checked_valid}
+SELECT variable_value FROM information_schema.global_status
+       WHERE LOWER(variable_name) = 'innodb_buffer_pool_pages_total';
+--enable_warnings
+
+--echo # Test 3) Query some information_shema tables that are dependent upon
+--echo #         the page size.
+# Show the metadata for tables in schema 'mysql'.
+# Pulled from innodb-system-table-view.test
+# The IDs of mysql.innodb_table_stats and mysql.innodb_index_stats are
+# unpredictable. They depend on whether mtr has created the database for
+# this test from scratch or is using a previously created database where
+# those tables have been dropped and recreated. Since we cannot force mtr
+# to use a freshly created database for this test we do not return the
+# table or index IDs. We can return the space IS of mysql schema tables
+# since they are created consistently during bootstrap.
+SELECT	t.name table_name, t.n_cols, t.flag table_flags,
+	i.name index_name, i.page_no root_page, i.type,
+	i.n_fields, i.merge_threshold
+	FROM	INFORMATION_SCHEMA.INNODB_SYS_TABLES  t,
+		INFORMATION_SCHEMA.INNODB_SYS_INDEXES i
+	WHERE	t.table_id = i.table_id
+	AND	t.name LIKE 'mysql%'
+	ORDER BY t.name, i.index_id;
+
+CREATE TABLE t1 (a INT KEY, b TEXT) ROW_FORMAT=REDUNDANT ENGINE=innodb;
+CREATE TABLE t2 (a INT KEY, b TEXT) ROW_FORMAT=COMPACT ENGINE=innodb;
+CREATE TABLE t3 (a INT KEY, b TEXT) ROW_FORMAT=COMPRESSED ENGINE=innodb;
+CREATE TABLE t4 (a INT KEY, b TEXT) ROW_FORMAT=DYNAMIC ENGINE=innodb;
+
+# Show the metadata for tables in schema 'test'.
+# Do not return the space ID since this tablespace may have existed before
+# this test runs.  The root page number of each index should be consistent
+# within a file-per-table tablespace.
+SELECT	t.name table_name, t.n_cols, t.flag table_flags,
+	i.name index_name, i.page_no root_page, i.type,
+	i.n_fields, i.merge_threshold
+	FROM	INFORMATION_SCHEMA.INNODB_SYS_TABLES  t,
+		INFORMATION_SCHEMA.INNODB_SYS_INDEXES i
+	WHERE	t.table_id = i.table_id
+	AND	t.name LIKE 'test%'
+	ORDER BY t.name, i.name;
+--source suite/innodb/include/show_i_s_tablespaces.inc
+DROP TABLE t1, t2, t3, t4;
+
+--echo # Test 4) The maximum row size is dependent upon the page size.
+--echo #         Redundant: 1979, Compact: 1982.
+--echo #         Compressed: 1982, Dynamic: 1982.
+--echo #         Each row format has its own amount of overhead that
+--echo #         varies depending on number of fields and other overhead.
+
+SET SESSION innodb_strict_mode = ON;
+
+# Redundant table; 1927 bytes with 10 CHAR fields
+CREATE TABLE t1 (
+c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200),
+c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(127)
+) ROW_FORMAT=redundant;
+DROP TABLE t1;
+--replace_regex /> [0-9]*/> max_row_size/
+--error ER_TOO_BIG_ROWSIZE
+CREATE TABLE t1 (
+c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200),
+c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(128)
+) ROW_FORMAT=redundant;
+
+# Compact table; 1955 bytes with 10 CHAR fields
+CREATE TABLE t1 (
+c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200),
+c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(155)
+) ROW_FORMAT=compact;
+DROP TABLE t1;
+--replace_regex /> [0-9]*/> max_row_size/
+--error ER_TOO_BIG_ROWSIZE
+CREATE TABLE t1 (
+c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200),
+c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(156)
+) ROW_FORMAT=compact;
+
+# Compressed table; 1878 bytes with 10 CHAR fields
+# Bug#13391353 Limit is 1876 on 32-Linux only
+CREATE TABLE t1 (
+c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200),
+c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(76)
+) ROW_FORMAT=compressed;
+DROP TABLE t1;
+--replace_regex /> [0-9]*/> max_row_size/
+--error ER_TOO_BIG_ROWSIZE
+CREATE TABLE t1 (
+c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200),
+c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(79)
+) ROW_FORMAT=compressed;
+
+# Dynamic table; 1955 bytes with 10 CHAR fields
+CREATE TABLE t1 (
+c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200),
+c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(155)
+) ROW_FORMAT=dynamic;
+DROP TABLE t1;
+--replace_regex /> [0-9]*/> max_row_size/
+--error ER_TOO_BIG_ROWSIZE
+CREATE TABLE t1 (
+c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200),
+c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(156)
+) ROW_FORMAT=dynamic;
+
+#
+# Test the maximum key length
+# Moved from innodb-index.test since each page size has its own max key length.
+# Max Key Length is 768 for 4k pages.
+#
+# InnoDB assumes 3 bytes for each UTF8 character.
+#
+CREATE TABLE t1 (a varchar(64) character set utf8,
+                 b varchar(64) character set utf8,
+                 c varchar(64) character set utf8,
+                 d varchar(64) character set utf8,
+                 PRIMARY KEY (a,b,c,d))
+		 ENGINE=innodb;
+DROP TABLE t1;
+--error ER_TOO_LONG_KEY
+CREATE TABLE t1 (a varchar(64) character set utf8,
+                 b varchar(64) character set utf8,
+                 c varchar(64) character set utf8,
+                 d varchar(65) character set utf8,
+                 PRIMARY KEY (a,b,c,d))
+		 ENGINE=innodb;
+CREATE TABLE t1 (a varchar(64) character set utf8,
+                 b varchar(64) character set utf8,
+                 c varchar(64) character set utf8,
+                 d varchar(64) character set utf8,
+                 e varchar(64) character set utf8,
+                 PRIMARY KEY (a), KEY (b,c,d,e))
+		 ENGINE=innodb;
+DROP TABLE t1;
+--error ER_TOO_LONG_KEY
+CREATE TABLE t1 (a varchar(64) character set utf8,
+                 b varchar(64) character set utf8,
+                 c varchar(64) character set utf8,
+                 d varchar(64) character set utf8,
+                 e varchar(65) character set utf8,
+                 PRIMARY KEY (a), KEY (b,c,d,e))
+		 ENGINE=innodb;
+
+--echo # Test 5) Make sure that KEY_BLOCK_SIZE=4, 2 & 1 are all
+--echo #         accepted and that KEY_BLOCK_SIZE=16 & 8 are rejected
+--echo #         in strict mode and converted to 4 in non-strict mode.
+
+SET SESSION innodb_strict_mode = ON;
+
+--error ER_ILLEGAL_HA
+CREATE TABLE t1 (i int) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=16;
+SHOW WARNINGS;
+
+--error ER_ILLEGAL_HA
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8;
+SHOW WARNINGS;
+
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4;
+SHOW WARNINGS;
+SELECT table_name, row_format, create_options
+   FROM information_schema.tables WHERE table_name = 't1';
+
+ALTER TABLE t1 KEY_BLOCK_SIZE=2;
+SHOW WARNINGS;
+SELECT table_name, row_format, create_options
+   FROM information_schema.tables WHERE table_name = 't1';
+
+ALTER TABLE t1 KEY_BLOCK_SIZE=1;
+SHOW WARNINGS;
+SELECT table_name, row_format, create_options
+   FROM information_schema.tables WHERE table_name = 't1';
+
+ALTER TABLE t1 KEY_BLOCK_SIZE=0;
+SHOW WARNINGS;
+SELECT table_name, row_format, create_options
+   FROM information_schema.tables WHERE table_name = 't1';
+DROP TABLE t1;
+
+SET SESSION innodb_strict_mode = OFF;
+
+CREATE TABLE t1 (i int) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=16;
+SHOW WARNINGS;
+SELECT table_name, row_format, create_options
+   FROM information_schema.tables WHERE table_name = 't1';
+DROP TABLE t1;
+
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8;
+SHOW WARNINGS;
+SELECT table_name, row_format, create_options
+   FROM information_schema.tables WHERE table_name = 't1';
+DROP TABLE t1;
+
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4;
+SHOW WARNINGS;
+SELECT table_name, row_format, create_options
+   FROM information_schema.tables WHERE table_name = 't1';
+
+ALTER TABLE t1 KEY_BLOCK_SIZE=2;
+SHOW WARNINGS;
+SELECT table_name, row_format, create_options
+   FROM information_schema.tables WHERE table_name = 't1';
+
+ALTER TABLE t1 KEY_BLOCK_SIZE=1;
+SHOW WARNINGS;
+SELECT table_name, row_format, create_options
+   FROM information_schema.tables WHERE table_name = 't1';
+
+ALTER TABLE t1 KEY_BLOCK_SIZE=0;
+SHOW WARNINGS;
+SELECT table_name, row_format, create_options
+   FROM information_schema.tables WHERE table_name = 't1';
+DROP TABLE t1;
+
+
+--echo # Test 6) Make sure that KEY_BLOCK_SIZE = 8 and 16
+--echo # are both rejected when innodb_file_per_table=OFF
+# Moved from innodb-zip.test
+SET SESSION innodb_strict_mode = ON;
+SET GLOBAL innodb_file_per_table = OFF;
+SHOW VARIABLES LIKE 'innodb_file_per_table';
+--error ER_ILLEGAL_HA
+CREATE TABLE t4 (id int PRIMARY KEY) ENGINE=innodb KEY_BLOCK_SIZE=8;
+SHOW WARNINGS;
+--error ER_ILLEGAL_HA
+CREATE TABLE t5 (id int PRIMARY KEY) ENGINE=innodb KEY_BLOCK_SIZE=16;
+SHOW WARNINGS;
+SET GLOBAL innodb_file_per_table = ON;
+SET GLOBAL innodb_file_format = `Antelope`;
+--error ER_ILLEGAL_HA
+CREATE TABLE t4 (id int PRIMARY KEY) ENGINE=innodb KEY_BLOCK_SIZE=8;
+SHOW WARNINGS;
+--error ER_ILLEGAL_HA
+CREATE TABLE t5 (id int PRIMARY KEY) ENGINE=innodb KEY_BLOCK_SIZE=16;
+SHOW WARNINGS;
+SET GLOBAL innodb_file_format = `Barracuda`;
+
+
+--echo # Test 7) Not included here; 16k only
+
+
+--echo # Test 8) Test creating a table that could lead to undo log overflow.
+CREATE TABLE t1(a blob,b blob,c blob,d blob,e blob,f blob,g blob,
+                h blob,i blob,j blob,k blob,l blob,m blob,n blob,
+		o blob,p blob,q blob,r blob,s blob,t blob,u blob)
+		ENGINE=InnoDB ROW_FORMAT=dynamic;
+SET @a = repeat('a', 767);
+SET @b = repeat('b', 767);
+SET @c = repeat('c', 767);
+SET @d = repeat('d', 767);
+SET @e = repeat('e', 767);
+
+# With no indexes defined, we can update all columns to max key part length.
+INSERT INTO t1 VALUES (@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a);
+UPDATE t1 SET a=@b,b=@b,c=@b,d=@b,e=@b,f=@b,g=@b,h=@b,i=@b,j=@b,
+              k=@b,l=@b,m=@b,n=@b,o=@b,p=@b,q=@b,r=@b,s=@b,t=@b,u=@b;
+
+# With one index defined, we can still update all fields.
+CREATE INDEX t1a ON t1 (a(767));
+UPDATE t1 SET a=@c,b=@c,c=@c,d=@c,e=@c,f=@c,g=@c,h=@c,i=@c,j=@c,
+              k=@c,l=@c,m=@c,n=@c,o=@c,p=@c,q=@c,r=@c,s=@c,t=@c,u=@c;
+
+# Add one more index and the UNDO record becomes too big to update all columns.
+# But a single transaction can update the columns in separate statements.
+# because the UNDO records will be smaller.
+CREATE INDEX t1b ON t1 (b(767));
+--error ER_UNDO_RECORD_TOO_BIG
+UPDATE t1 SET a=@d,b=@d,c=@d,d=@d,e=@d,f=@d,g=@d,h=@d,i=@d,j=@d,
+              k=@d,l=@d,m=@d,n=@d,o=@d,p=@d,q=@d,r=@d,s=@d,t=@d,u=@d;
+BEGIN;
+UPDATE t1 SET a=@d,b=@d,c=@d,d=@d,e=@d;
+UPDATE t1 SET f=@d,g=@d,h=@d,i=@d,j=@d,k=@d,l=@d,m=@d,
+              n=@d,o=@d,p=@d,q=@d,r=@d,s=@d,t=@d,u=@d;
+COMMIT;
+
+# Another index can still be added and a single field can still be updated
+CREATE INDEX t1c ON t1 (c(767));
+UPDATE t1 SET c=@e;
+
+# Add one more index and we cannot update a column to its defined index length.
+# This is a problem.  It means that the DDL is allowed to create a table
+# that CANNOT be updated.  See bug#12953735.
+CREATE INDEX t1d ON t1 (d(767));
+--error ER_UNDO_RECORD_TOO_BIG
+UPDATE t1 SET d=@e;
+
+--replace_regex /> [0-9]*/> max_row_size/
+CREATE INDEX t1e ON t1 (e(767));
+
+SHOW CREATE TABLE t1;
+DROP TABLE t1;
+
+#
+# Bug #13336585 - INNODB: CHANGE BUFFERING WITH 4K PAGES CAN ASSERT
+#                 IF SECONDARY KEY IS NEAR MAX
+# If the secondary index tuple is close to half the page size,
+# ibuf_insert_low() could return DB_TOO_BIG_RECORD, which is not expected
+# in ibuf_insert().  In order to insure this does not happen, WL5756
+# imposes a maximum key length of 768 for 4k pages and 1536 for 8k pages.
+# The existing max key Size for 16k pages is 3072.
+#
+
+#-- disable_query_log
+# The flag innodb_change_buffering_debug is only available in debug builds.
+# It instructs InnoDB to try to evict pages from the buffer pool when
+# change buffering is possible, so that the change buffer will be used
+# whenever possible.
+# This flag is not used currently since it exposes valgrind error in ibuf
+# code with the following SQL
+#-- error 0,ER_UNKNOWN_SYSTEM_VARIABLE
+#SET @innodb_change_buffering_debug_orig = @@innodb_change_buffering_debug;
+#-- error 0,ER_UNKNOWN_SYSTEM_VARIABLE
+#SET GLOBAL innodb_change_buffering_debug = 1;
+#-- enable_query_log
+
+# make sure the largest possible key entry can be added to the insert buffer.
+# Make enough records so that the root page is not a leaf page.
+SET SESSION innodb_strict_mode = OFF;
+CREATE TABLE t1(
+   pk01 varchar(48), pk02 varchar(48), pk03 varchar(48), pk04 varchar(48),
+   pk05 varchar(48), pk06 varchar(48), pk07 varchar(48), pk08 varchar(48),
+   pk09 varchar(48), pk10 varchar(48), pk11 varchar(48), pk12 varchar(48),
+   pk13 varchar(48), pk14 varchar(48), pk15 varchar(48), pk16 varchar(48),
+   sk01 varchar(48), sk02 varchar(48), sk03 varchar(48), sk04 varchar(48),
+   sk05 varchar(48), sk06 varchar(48), sk07 varchar(48), sk08 varchar(48),
+   sk09 varchar(48), sk10 varchar(48), sk11 varchar(48), sk12 varchar(48),
+   sk13 varchar(48), sk14 varchar(48), sk15 varchar(48), sk16 varchar(48),
+   PRIMARY KEY pk(pk01,pk02,pk03,pk04,pk05,pk06,pk07,pk08,
+                  pk09,pk10,pk11,pk12,pk13,pk14,pk15,pk16),
+   KEY pk(sk01,sk02,sk03,sk04,sk05,sk06,sk07,sk08,
+          sk09,sk10,sk11,sk12,sk13,sk14,sk15,sk16))
+   ROW_FORMAT=Redundant ENGINE=InnoDB;
+SET @r = repeat('a', 48);
+INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,
+                      @r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r);
+SET @r = repeat('b', 48);
+INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,
+                      @r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r);
+SET @r = repeat('c', 48);
+INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,
+                      @r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r);
+SET @r = repeat('d', 48);
+INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,
+                      @r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r);
+SET @r = repeat('e', 48);
+INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,
+                      @r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r);
+DELETE from t1;
+DROP TABLE t1;
+
+# Compressed tables do not compress parent pages.  So the whole uncompressed
+# secondary tuple including the primary key must be able to fit in half the
+# compressed page size. This record length is enforced at index creation.
+# So the only way to get an ibuf tuple too big is to make the KEY_BLOCK_SIZE
+# the same as the page size.
+CREATE TABLE t1(
+   pk01 varchar(48), pk02 varchar(48), pk03 varchar(48), pk04 varchar(48),
+   pk05 varchar(48), pk06 varchar(48), pk07 varchar(48), pk08 varchar(48),
+   pk09 varchar(48), pk10 varchar(48), pk11 varchar(48), pk12 varchar(48),
+   pk13 varchar(48), pk14 varchar(48), pk15 varchar(48), pk16 varchar(48),
+   sk01 varchar(48), sk02 varchar(48), sk03 varchar(48), sk04 varchar(48),
+   sk05 varchar(48), sk06 varchar(48), sk07 varchar(48), sk08 varchar(48),
+   sk09 varchar(48), sk10 varchar(48), sk11 varchar(48), sk12 varchar(48),
+   sk13 varchar(48), sk14 varchar(48), sk15 varchar(48), sk16 varchar(48),
+   PRIMARY KEY pk(pk01,pk02,pk03,pk04,pk05,pk06,pk07,pk08,
+                  pk09,pk10,pk11,pk12,pk13,pk14,pk15,pk16),
+   KEY pk(sk01,sk02,sk03,sk04,sk05,sk06,sk07,sk08,
+          sk09,sk10,sk11,sk12,sk13,sk14,sk15,sk16))
+   ROW_FORMAT=Compressed KEY_BLOCK_SIZE=4 ENGINE=InnoDB;
+SET @r = repeat('a', 48);
+INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,
+                      @r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r);
+SET @r = repeat('b', 48);
+INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,
+                      @r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r);
+SET @r = repeat('c', 48);
+INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,
+                      @r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r);
+SET @r = repeat('d', 48);
+INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,
+                      @r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r);
+SET @r = repeat('e', 48);
+INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,
+                      @r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r);
+DELETE from t1;
+DROP TABLE t1;
+
+#-- disable_query_log
+#-- error 0,ER_UNKNOWN_SYSTEM_VARIABLE
+#SET GLOBAL innodb_change_buffering_debug = 0;
+#-- enable_query_log
+
+# The following should fail in non-strict mode too.
+# (The fix of Bug #50945 only affects REDUNDANT and COMPACT tables.)
+SET SESSION innodb_strict_mode = off;
+--replace_regex /> [0-9]*/> max_row_size/
+CREATE TABLE t1(
+	c text NOT NULL, d text NOT NULL,
+	PRIMARY KEY (c(767)))
+ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1 CHARSET=ASCII;
+DROP TABLE t1;
+CREATE TABLE t1(
+	c text NOT NULL, d text NOT NULL,
+	PRIMARY KEY (c(767)))
+ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=2 CHARSET=ASCII;
+drop table t1;
+CREATE TABLE t1(
+	c text NOT NULL, d text NOT NULL,
+	PRIMARY KEY (c(767)))
+ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4 CHARSET=ASCII;
+drop table t1;
+--replace_regex /> [0-9]*/> max_row_size/
+CREATE TABLE t1(c text, PRIMARY KEY (c(440)))
+ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1 CHARSET=ASCII;
+DROP TABLE t1;
+CREATE TABLE t1(c text, PRIMARY KEY (c(438)))
+ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1 CHARSET=ASCII;
+INSERT INTO t1 VALUES(REPEAT('A',512)),(REPEAT('B',512));
+DROP TABLE t1;
diff --git a/mysql-test/suite/innodb_zip/t/8k.test b/mysql-test/suite/innodb_zip/t/8k.test
new file mode 100644
index 00000000000..3a2e8755f57
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/t/8k.test
@@ -0,0 +1,468 @@
+# Tests for setting innodb-page-size=8k
+
+--source include/have_innodb.inc
+--source include/have_innodb_8k.inc
+SET default_storage_engine=InnoDB;
+
+--disable_query_log
+let $MYSQLD_DATADIR = `select @@datadir`;
+let $INNODB_PAGE_SIZE = `select @@innodb_page_size`;
+
+call mtr.add_suppression("Cannot add field .* in table .* because after adding it, the row size is");
+--enable_query_log
+
+--echo # Test 1) Show the page size from Information Schema
+--disable_warnings
+SELECT variable_value FROM information_schema.global_status
+       WHERE LOWER(variable_name) = 'innodb_page_size';
+--enable_warnings
+
+--echo # Test 2) The number of buffer pool pages is dependent upon the page size.
+--disable_warnings
+--replace_result 3071 {checked_valid} 3072 {checked_valid}
+SELECT variable_value FROM information_schema.global_status
+       WHERE LOWER(variable_name) = 'innodb_buffer_pool_pages_total';
+--enable_warnings
+
+--echo # Test 3) Query some information_shema tables that are dependent upon
+--echo #         the page size.
+# Show the metadata for tables in schema 'mysql'.
+# Pulled from innodb-system-table-view.test
+# The IDs of mysql.innodb_table_stats and mysql.innodb_index_stats are
+# unpredictable. They depend on whether mtr has created the database for
+# this test from scratch or is using a previously created database where
+# those tables have been dropped and recreated. Since we cannot force mtr
+# to use a freshly created database for this test we do not return the
+# table or index IDs. We can return the space IS of mysql schema tables
+# since they are created consistently during bootstrap.
+SELECT	t.name table_name, t.n_cols, t.flag table_flags,
+	i.name index_name, i.page_no root_page, i.type,
+	i.n_fields, i.merge_threshold
+	FROM	INFORMATION_SCHEMA.INNODB_SYS_TABLES  t,
+		INFORMATION_SCHEMA.INNODB_SYS_INDEXES i
+	WHERE	t.table_id = i.table_id
+	AND	t.name LIKE 'mysql%'
+	ORDER BY t.name, i.index_id;
+
+CREATE TABLE t1 (a INT KEY, b TEXT) ROW_FORMAT=REDUNDANT ENGINE=innodb;
+CREATE TABLE t2 (a INT KEY, b TEXT) ROW_FORMAT=COMPACT ENGINE=innodb;
+CREATE TABLE t3 (a INT KEY, b TEXT) ROW_FORMAT=COMPRESSED ENGINE=innodb;
+CREATE TABLE t4 (a INT KEY, b TEXT) ROW_FORMAT=DYNAMIC ENGINE=innodb;
+
+# Show the metadata for tables in schema 'test'.
+# Do not return the space ID since this tablespace may have existed before
+# this test runs.  The root page number of each index should be consistent
+# within a file-per-table tablespace.
+SELECT	t.name table_name, t.n_cols, t.flag table_flags,
+	i.name index_name, i.page_no root_page, i.type,
+	i.n_fields, i.merge_threshold
+	FROM	INFORMATION_SCHEMA.INNODB_SYS_TABLES  t,
+		INFORMATION_SCHEMA.INNODB_SYS_INDEXES i
+	WHERE	t.table_id = i.table_id
+	AND	t.name LIKE 'test%'
+	ORDER BY t.name, i.name;
+--source suite/innodb/include/show_i_s_tablespaces.inc
+DROP TABLE t1, t2, t3, t4;
+
+--echo # Test 4) The maximum row size is dependent upon the page size.
+--echo #         Redundant: 4027, Compact: 4030.
+--echo #         Compressed: 4030, Dynamic: 4030.
+--echo #         Each row format has its own amount of overhead that
+--echo #         varies depending on number of fields and other overhead.
+
+SET SESSION innodb_strict_mode = ON;
+
+# Redundant table; 3955 bytes with 20 CHAR fields
+CREATE TABLE t1 (
+c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200),
+c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200),
+c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200),
+c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(155)
+) ROW_FORMAT=redundant;
+DROP TABLE t1;
+--replace_regex /> [0-9]*/> max_row_size/
+--error ER_TOO_BIG_ROWSIZE
+CREATE TABLE t1 (
+c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200),
+c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200),
+c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200),
+c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(156)
+) ROW_FORMAT=redundant;
+
+# Compact table; 4002 bytes with 20 CHAR fields
+CREATE TABLE t1 (
+c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200),
+c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200),
+c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200),
+c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(202)
+) ROW_FORMAT=compact;
+DROP TABLE t1;
+--replace_regex /> [0-9]*/> max_row_size/
+--error ER_TOO_BIG_ROWSIZE
+CREATE TABLE t1 (
+c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200),
+c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200),
+c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200),
+c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(203)
+) ROW_FORMAT=compact;
+
+# Compressed table; 3905 bytes with 20 CHAR fields
+# Bug#13391353 Limit is 3903 on 32-Linux only
+CREATE TABLE t1 (
+c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200),
+c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200),
+c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200),
+c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(103)
+) ROW_FORMAT=compressed;
+DROP TABLE t1;
+--replace_regex /> [0-9]*/> max_row_size/
+--error ER_TOO_BIG_ROWSIZE
+CREATE TABLE t1 (
+c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200),
+c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200),
+c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200),
+c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(106)
+) ROW_FORMAT=compressed;
+
+# Dynamic table; 4002 bytes with 20 CHAR fields
+CREATE TABLE t1 (
+c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200),
+c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200),
+c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200),
+c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(202)
+) ROW_FORMAT=dynamic;
+DROP TABLE t1;
+--replace_regex /> [0-9]*/> max_row_size/
+--error ER_TOO_BIG_ROWSIZE
+CREATE TABLE t1 (
+c01 char(200), c02 char(200), c03 char(200), c04 char(200), c05 char(200),
+c06 char(200), c07 char(200), c08 char(200), c09 char(200), c10 char(200),
+c11 char(200), c12 char(200), c13 char(200), c14 char(200), c15 char(200),
+c16 char(200), c17 char(200), c18 char(200), c19 char(200), c20 char(203)
+) ROW_FORMAT=dynamic;
+
+#
+# Test the maximum key length
+# Moved from innodb-index.test since each page size has its own max key length.
+# Max Key Length is 1536 for 8k pages.
+#
+# InnoDB assumes 3 bytes for each UTF8 character.
+#
+CREATE TABLE t1 (a varchar(128) character set utf8,
+                 b varchar(128) character set utf8,
+                 c varchar(128) character set utf8,
+                 d varchar(128) character set utf8,
+                 PRIMARY KEY (a,b,c,d))
+		 ENGINE=innodb;
+DROP TABLE t1;
+--error ER_TOO_LONG_KEY
+CREATE TABLE t1 (a varchar(128) character set utf8,
+                 b varchar(128) character set utf8,
+                 c varchar(128) character set utf8,
+                 d varchar(129) character set utf8,
+                 PRIMARY KEY (a,b,c,d))
+		 ENGINE=innodb;
+CREATE TABLE t1 (a varchar(128) character set utf8,
+                 b varchar(128) character set utf8,
+                 c varchar(128) character set utf8,
+                 d varchar(128) character set utf8,
+                 e varchar(128) character set utf8,
+                 PRIMARY KEY (a), KEY (b,c,d,e))
+		 ENGINE=innodb;
+DROP TABLE t1;
+--error ER_TOO_LONG_KEY
+CREATE TABLE t1 (a varchar(128) character set utf8,
+                 b varchar(128) character set utf8,
+                 c varchar(128) character set utf8,
+                 d varchar(128) character set utf8,
+                 e varchar(129) character set utf8,
+                 PRIMARY KEY (a), KEY (b,c,d,e))
+		 ENGINE=innodb;
+
+--echo # Test 5) Make sure that KEY_BLOCK_SIZE=8, 4, 2 & 1 are all
+--echo #         accepted and that KEY_BLOCK_SIZE=16 is rejected in
+--echo #         strict mode and converted to 8 in non-strict mode.
+
+SET SESSION innodb_strict_mode = ON;
+
+--error ER_ILLEGAL_HA
+CREATE TABLE t1 (i int) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=16;
+SHOW WARNINGS;
+
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8;
+SHOW WARNINGS;
+SELECT table_name, row_format, create_options
+   FROM information_schema.tables WHERE table_name = 't1';
+
+ALTER TABLE t1 KEY_BLOCK_SIZE=4;
+SHOW WARNINGS;
+SELECT table_name, row_format, create_options
+   FROM information_schema.tables WHERE table_name = 't1';
+
+ALTER TABLE t1 KEY_BLOCK_SIZE=2;
+SHOW WARNINGS;
+SELECT table_name, row_format, create_options
+   FROM information_schema.tables WHERE table_name = 't1';
+
+ALTER TABLE t1 KEY_BLOCK_SIZE=1;
+SHOW WARNINGS;
+SELECT table_name, row_format, create_options
+   FROM information_schema.tables WHERE table_name = 't1';
+
+ALTER TABLE t1 KEY_BLOCK_SIZE=0;
+SHOW WARNINGS;
+SELECT table_name, row_format, create_options
+   FROM information_schema.tables WHERE table_name = 't1';
+DROP TABLE t1;
+
+SET SESSION innodb_strict_mode = OFF;
+
+CREATE TABLE t1 (i int) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=16;
+SHOW WARNINGS;
+SELECT table_name, row_format, create_options
+   FROM information_schema.tables WHERE table_name = 't1';
+DROP TABLE t1;
+
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8;
+SHOW WARNINGS;
+SELECT table_name, row_format, create_options
+   FROM information_schema.tables WHERE table_name = 't1';
+DROP TABLE t1;
+
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4;
+SHOW WARNINGS;
+SELECT table_name, row_format, create_options
+   FROM information_schema.tables WHERE table_name = 't1';
+
+ALTER TABLE t1 KEY_BLOCK_SIZE=2;
+SHOW WARNINGS;
+SELECT table_name, row_format, create_options
+   FROM information_schema.tables WHERE table_name = 't1';
+
+ALTER TABLE t1 KEY_BLOCK_SIZE=1;
+SHOW WARNINGS;
+SELECT table_name, row_format, create_options
+   FROM information_schema.tables WHERE table_name = 't1';
+
+ALTER TABLE t1 KEY_BLOCK_SIZE=0;
+SHOW WARNINGS;
+SELECT table_name, row_format, create_options
+   FROM information_schema.tables WHERE table_name = 't1';
+DROP TABLE t1;
+
+
+--echo # Test 6) Make sure that KEY_BLOCK_SIZE = 8 and 16
+--echo # are rejected when innodb_file_per_table=OFF
+# Moved from innodb-zip.test
+SET SESSION innodb_strict_mode = ON;
+SET GLOBAL innodb_file_per_table = OFF;
+SHOW VARIABLES LIKE 'innodb_file_per_table';
+--error ER_ILLEGAL_HA
+CREATE TABLE t4 (id int PRIMARY KEY) ENGINE=innodb KEY_BLOCK_SIZE=8;
+SHOW WARNINGS;
+--error ER_ILLEGAL_HA
+CREATE TABLE t5 (id int PRIMARY KEY) ENGINE=innodb KEY_BLOCK_SIZE=16;
+SHOW WARNINGS;
+SET GLOBAL innodb_file_per_table = ON;
+SET GLOBAL innodb_file_format = `Antelope`;
+--error ER_ILLEGAL_HA
+CREATE TABLE t4 (id int PRIMARY KEY) ENGINE=innodb KEY_BLOCK_SIZE=8;
+SHOW WARNINGS;
+--error ER_ILLEGAL_HA
+CREATE TABLE t5 (id int PRIMARY KEY) ENGINE=innodb KEY_BLOCK_SIZE=16;
+SHOW WARNINGS;
+SET GLOBAL innodb_file_format = `Barracuda`;
+
+
+--echo # Test 7) Not included here; 16k only
+
+
+--echo # Test 8) Test creating a table that could lead to undo log overflow.
+CREATE TABLE t1(a blob,b blob,c blob,d blob,e blob,f blob,g blob,
+                h blob,i blob,j blob,k blob,l blob,m blob,n blob,
+		o blob,p blob,q blob,r blob,s blob,t blob,u blob)
+		ENGINE=InnoDB ROW_FORMAT=dynamic;
+SET @a = repeat('a', 767);
+SET @b = repeat('b', 767);
+SET @c = repeat('c', 767);
+SET @d = repeat('d', 767);
+SET @e = repeat('e', 767);
+
+# With no indexes defined, we can update all columns to max key part length.
+INSERT INTO t1 VALUES (@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a,@a);
+UPDATE t1 SET a=@b,b=@b,c=@b,d=@b,e=@b,f=@b,g=@b,h=@b,i=@b,j=@b,
+              k=@b,l=@b,m=@b,n=@b,o=@b,p=@b,q=@b,r=@b,s=@b,t=@b,u=@b;
+
+# With this many indexes defined, we can still update all fields.
+CREATE INDEX t1a ON t1 (a(767));
+CREATE INDEX t1b ON t1 (b(767));
+UPDATE t1 SET a=@c,b=@c,c=@c,d=@c,e=@c,f=@c,g=@c,h=@c,i=@c,j=@c,
+              k=@c,l=@c,m=@c,n=@c,o=@c,p=@c,q=@c,r=@c,s=@c,t=@c,u=@c;
+
+# Add one more index and the UNDO record becomes too big to update all columns.
+# But a single transaction can update the columns in separate statements.
+# because the UNDO records will be smaller.
+CREATE INDEX t1c ON t1 (c(767));
+--error ER_UNDO_RECORD_TOO_BIG
+UPDATE t1 SET a=@d,b=@d,c=@d,d=@d,e=@d,f=@d,g=@d,h=@d,i=@d,j=@d,
+              k=@d,l=@d,m=@d,n=@d,o=@d,p=@d,q=@d,r=@d,s=@d,t=@d,u=@d;
+BEGIN;
+UPDATE t1 SET a=@d,b=@d,c=@d,d=@d,e=@d;
+UPDATE t1 SET f=@d,g=@d,h=@d,i=@d,j=@d,k=@d,l=@d,m=@d,
+              n=@d,o=@d,p=@d,q=@d,r=@d,s=@d,t=@d,u=@d;
+COMMIT;
+
+# More indexes can still be added and a single field can still be updated
+CREATE INDEX t1d ON t1 (d(767));
+UPDATE t1 SET d=@e;
+CREATE INDEX t1e ON t1 (e(767));
+UPDATE t1 SET e=@e;
+CREATE INDEX t1f ON t1 (f(767));
+UPDATE t1 SET f=@e;
+CREATE INDEX t1g ON t1 (g(767));
+UPDATE t1 SET g=@e;
+CREATE INDEX t1h ON t1 (h(767));
+UPDATE t1 SET h=@e;
+CREATE INDEX t1i ON t1 (i(767));
+UPDATE t1 SET i=@e;
+
+--replace_regex /> [0-9]*/> max_row_size/
+CREATE INDEX t1k ON t1 (j(767));
+
+# But it does allow a 500 byte index.  And with this, we cannot
+# update the record. This is a problem.  It means that the DDL is
+# allowed to create a table and a record that CANNOT be updated.
+# See bug#12953735
+--replace_regex /> [0-9]*/> max_row_size/
+CREATE INDEX t1j ON t1 (j(500));
+--error ER_UNDO_RECORD_TOO_BIG
+UPDATE t1 SET j=@e;
+SHOW CREATE TABLE t1;
+DROP TABLE t1;
+
+#
+# Bug #13336585 - INNODB: CHANGE BUFFERING WITH 4K PAGES CAN ASSERT
+#                 IF SECONDARY KEY IS NEAR MAX
+# If the secondary index tuple is close to half the page size,
+# ibuf_insert_low() could return DB_TOO_BIG_RECORD, which is not expected
+# in ibuf_insert().  In order to insure this does not happen, WL5756
+# imposes a maximum key length of 768 for 4k pages and 1536 for 8k pages.
+# The existing max key Size for 16k pages is 3072.
+#
+
+#-- disable_query_log
+# The flag innodb_change_buffering_debug is only available in debug builds.
+# It instructs InnoDB to try to evict pages from the buffer pool when
+# change buffering is possible, so that the change buffer will be used
+# whenever possible.
+#-- error 0,ER_UNKNOWN_SYSTEM_VARIABLE
+#SET @innodb_change_buffering_debug_orig = @@innodb_change_buffering_debug;
+#-- error 0,ER_UNKNOWN_SYSTEM_VARIABLE
+#SET GLOBAL innodb_change_buffering_debug = 1;
+#-- enable_query_log
+
+# make sure the largest possible key entry can be added to the insert buffer.
+# Make enough records so that the root page is not a leaf page.
+SET SESSION innodb_strict_mode = OFF;
+CREATE TABLE t1(
+   pk01 varchar(96), pk02 varchar(96), pk03 varchar(96), pk04 varchar(96),
+   pk05 varchar(96), pk06 varchar(96), pk07 varchar(96), pk08 varchar(96),
+   pk09 varchar(96), pk10 varchar(96), pk11 varchar(96), pk12 varchar(96),
+   pk13 varchar(96), pk14 varchar(96), pk15 varchar(96), pk16 varchar(96),
+   sk01 varchar(96), sk02 varchar(96), sk03 varchar(96), sk04 varchar(96),
+   sk05 varchar(96), sk06 varchar(96), sk07 varchar(96), sk08 varchar(96),
+   sk09 varchar(96), sk10 varchar(96), sk11 varchar(96), sk12 varchar(96),
+   sk13 varchar(96), sk14 varchar(96), sk15 varchar(96), sk16 varchar(96),
+   PRIMARY KEY pk(pk01,pk02,pk03,pk04,pk05,pk06,pk07,pk08,
+                  pk09,pk10,pk11,pk12,pk13,pk14,pk15,pk16),
+   KEY pk(sk01,sk02,sk03,sk04,sk05,sk06,sk07,sk08,
+          sk09,sk10,sk11,sk12,sk13,sk14,sk15,sk16))
+   ROW_FORMAT=Redundant ENGINE=InnoDB;
+SET @r = repeat('a', 96);
+INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,
+                      @r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r);
+SET @r = repeat('b', 96);
+INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,
+                      @r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r);
+SET @r = repeat('c', 96);
+INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,
+                      @r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r);
+SET @r = repeat('d', 96);
+INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,
+                      @r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r);
+SET @r = repeat('e', 96);
+INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,
+                      @r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r);
+DELETE from t1;
+DROP TABLE t1;
+
+# Compressed tables do not compress parent pages.  So the whole uncompressed
+# secondary tuple including the primary key must be able to fit in half the
+# compressed page size. This record length is enforced at index creation.
+# So the only way to get an ibuf tuple too big is to make the KEY_BLOCK_SIZE
+# the same as the page size.
+CREATE TABLE t1(
+   pk01 varchar(96), pk02 varchar(96), pk03 varchar(96), pk04 varchar(96),
+   pk05 varchar(96), pk06 varchar(96), pk07 varchar(96), pk08 varchar(96),
+   pk09 varchar(96), pk10 varchar(96), pk11 varchar(96), pk12 varchar(96),
+   pk13 varchar(96), pk14 varchar(96), pk15 varchar(96), pk16 varchar(96),
+   sk01 varchar(96), sk02 varchar(96), sk03 varchar(96), sk04 varchar(96),
+   sk05 varchar(96), sk06 varchar(96), sk07 varchar(96), sk08 varchar(96),
+   sk09 varchar(96), sk10 varchar(96), sk11 varchar(96), sk12 varchar(96),
+   sk13 varchar(96), sk14 varchar(96), sk15 varchar(96), sk16 varchar(96),
+   PRIMARY KEY pk(pk01,pk02,pk03,pk04,pk05,pk06,pk07,pk08,
+                  pk09,pk10,pk11,pk12,pk13,pk14,pk15,pk16),
+   KEY pk(sk01,sk02,sk03,sk04,sk05,sk06,sk07,sk08,
+          sk09,sk10,sk11,sk12,sk13,sk14,sk15,sk16))
+   ROW_FORMAT=Compressed KEY_BLOCK_SIZE=8 ENGINE=InnoDB;
+SET @r = repeat('a', 96);
+INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,
+                      @r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r);
+SET @r = repeat('b', 96);
+INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,
+                      @r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r);
+SET @r = repeat('c', 96);
+INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,
+                      @r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r);
+SET @r = repeat('d', 96);
+INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,
+                      @r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r);
+SET @r = repeat('e', 96);
+INSERT INTO t1 VALUES(@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,
+                      @r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r);
+DELETE from t1;
+DROP TABLE t1;
+
+#-- disable_query_log
+#-- error 0,ER_UNKNOWN_SYSTEM_VARIABLE
+#SET GLOBAL innodb_change_buffering_debug = 0;
+#-- enable_query_log
+
+# The following should fail in non-strict mode too.
+# (The fix of Bug #50945 only affects REDUNDANT and COMPACT tables.)
+SET SESSION innodb_strict_mode = off;
+--replace_regex /> [0-9]*/> max_row_size/
+CREATE TABLE t1(
+	c text NOT NULL, d text NOT NULL,
+	PRIMARY KEY (c(767),d(767)))
+ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1 CHARSET=ASCII;
+DROP TABLE t1;
+--replace_regex /> [0-9]*/> max_row_size/
+CREATE TABLE t1(
+	c text NOT NULL, d text NOT NULL,
+	PRIMARY KEY (c(767),d(767)))
+ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=2 CHARSET=ASCII;
+DROP TABLE t1;
+CREATE TABLE t1(
+	c text NOT NULL, d text NOT NULL,
+	PRIMARY KEY (c(767),d(767)))
+ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4 CHARSET=ASCII;
+drop table t1;
+--replace_regex /> [0-9]*/> max_row_size/
+CREATE TABLE t1(c text, PRIMARY KEY (c(440)))
+ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1 CHARSET=ASCII;
+DROP TABLE t1;
+CREATE TABLE t1(c text, PRIMARY KEY (c(438)))
+ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1 CHARSET=ASCII;
+INSERT INTO t1 VALUES(REPEAT('A',512)),(REPEAT('B',512));
+DROP TABLE t1;
diff --git a/mysql-test/suite/innodb_zip/t/bug36169.test b/mysql-test/suite/innodb_zip/t/bug36169.test
new file mode 100644
index 00000000000..5452c929b92
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/t/bug36169.test
@@ -0,0 +1,1162 @@
+#
+# Bug#36169 create innodb compressed table with too large row size crashed
+# http://bugs.mysql.com/36169
+#
+
+-- source include/have_innodb.inc
+-- source include/have_innodb_zip.inc
+
+let $file_per_table=`select @@innodb_file_per_table`;
+SET GLOBAL innodb_file_per_table=ON;
+
+#
+# The following is copied from http://bugs.mysql.com/36169
+# (http://bugs.mysql.com/file.php?id=9121)
+# Probably it can be simplified but that is not obvious.
+#
+
+# we care only that the following SQL commands do produce errors
+# as expected and do not crash the server
+-- disable_query_log
+-- disable_result_log
+call mtr.add_suppression("Cannot add field .* in table .* because after adding it, the row size is");
+# Generating 10 tables
+# Creating a table with 94 columns and 24 indexes
+DROP TABLE IF EXISTS `table0`;
+set innodb_strict_mode=on;
+SET sql_mode = 'NO_ENGINE_SUBSTITUTION';
+--error ER_TOO_BIG_ROWSIZE
+CREATE TABLE IF NOT EXISTS `table0`
+(`col0` BOOL,
+`col1` BOOL,
+`col2` TINYINT,
+`col3` DATE,
+`col4` TIME,
+`col5` SET ('test1','test2','test3'),
+`col6` TIME,
+`col7` TEXT,
+`col8` DECIMAL,
+`col9` SET ('test1','test2','test3'),
+`col10` FLOAT,
+`col11` DOUBLE PRECISION,
+`col12` ENUM ('test1','test2','test3'),
+`col13` TINYBLOB,
+`col14` YEAR,
+`col15` SET ('test1','test2','test3'),
+`col16` NUMERIC,
+`col17` NUMERIC,
+`col18` BLOB,
+`col19` DATETIME,
+`col20` DOUBLE PRECISION,
+`col21` DECIMAL,
+`col22` DATETIME,
+`col23` NUMERIC,
+`col24` NUMERIC,
+`col25` LONGTEXT,
+`col26` TINYBLOB,
+`col27` TIME,
+`col28` TINYBLOB,
+`col29` ENUM ('test1','test2','test3'),
+`col30` SMALLINT,
+`col31` REAL,
+`col32` FLOAT,
+`col33` CHAR (175),
+`col34` TINYTEXT,
+`col35` TINYTEXT,
+`col36` TINYBLOB,
+`col37` TINYBLOB,
+`col38` TINYTEXT,
+`col39` MEDIUMBLOB,
+`col40` TIMESTAMP,
+`col41` DOUBLE,
+`col42` SMALLINT,
+`col43` LONGBLOB,
+`col44` VARCHAR (80),
+`col45` MEDIUMTEXT,
+`col46` NUMERIC,
+`col47` BIGINT,
+`col48` DATE,
+`col49` TINYBLOB,
+`col50` DATE,
+`col51` BOOL,
+`col52` MEDIUMINT,
+`col53` FLOAT,
+`col54` TINYBLOB,
+`col55` LONGTEXT,
+`col56` SMALLINT,
+`col57` ENUM ('test1','test2','test3'),
+`col58` DATETIME,
+`col59` MEDIUMTEXT,
+`col60` VARCHAR (232),
+`col61` NUMERIC,
+`col62` YEAR,
+`col63` SMALLINT,
+`col64` TIMESTAMP,
+`col65` BLOB,
+`col66` LONGBLOB,
+`col67` INT,
+`col68` LONGTEXT,
+`col69` ENUM ('test1','test2','test3'),
+`col70` INT,
+`col71` TIME,
+`col72` TIMESTAMP,
+`col73` TIMESTAMP,
+`col74` VARCHAR (170),
+`col75` SET ('test1','test2','test3'),
+`col76` TINYBLOB,
+`col77` BIGINT,
+`col78` NUMERIC,
+`col79` DATETIME,
+`col80` YEAR,
+`col81` NUMERIC,
+`col82` LONGBLOB,
+`col83` TEXT,
+`col84` CHAR (83),
+`col85` DECIMAL,
+`col86` FLOAT,
+`col87` INT,
+`col88` VARCHAR (145),
+`col89` DATE,
+`col90` DECIMAL,
+`col91` DECIMAL,
+`col92` MEDIUMBLOB,
+`col93` TIME,
+KEY `idx0` (`col69`,`col90`,`col8`),
+KEY `idx1` (`col60`),
+KEY `idx2` (`col60`,`col70`,`col74`),
+KEY `idx3` (`col22`,`col32`,`col72`,`col30`),
+KEY `idx4` (`col29`),
+KEY `idx5` (`col19`,`col45`(143)),
+KEY `idx6` (`col46`,`col48`,`col5`,`col39`(118)),
+KEY `idx7` (`col48`,`col61`),
+KEY `idx8` (`col93`),
+KEY `idx9` (`col31`),
+KEY `idx10` (`col30`,`col21`),
+KEY `idx11` (`col67`),
+KEY `idx12` (`col44`,`col6`,`col8`,`col38`(226)),
+KEY `idx13` (`col71`,`col41`,`col15`,`col49`(88)),
+KEY `idx14` (`col78`),
+KEY `idx15` (`col63`,`col67`,`col64`),
+KEY `idx16` (`col17`,`col86`),
+KEY `idx17` (`col77`,`col56`,`col10`,`col55`(24)),
+KEY `idx18` (`col62`),
+KEY `idx19` (`col31`,`col57`,`col56`,`col53`),
+KEY `idx20` (`col46`),
+KEY `idx21` (`col83`(54)),
+KEY `idx22` (`col51`,`col7`(120)),
+KEY `idx23` (`col7`(163),`col31`,`col71`,`col14`)
+)engine=innodb ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1;
+SET sql_mode = default;
+
+# Creating a table with 10 columns and 32 indexes
+DROP TABLE IF EXISTS `table1`;
+--error ER_TOO_BIG_ROWSIZE
+CREATE TABLE IF NOT EXISTS `table1`
+(`col0` CHAR (113),
+`col1` FLOAT,
+`col2` BIGINT,
+`col3` DECIMAL,
+`col4` BLOB,
+`col5` LONGTEXT,
+`col6` SET ('test1','test2','test3'),
+`col7` BIGINT,
+`col8` BIGINT,
+`col9` TINYBLOB,
+KEY `idx0` (`col5`(101),`col7`,`col8`),
+KEY `idx1` (`col8`),
+KEY `idx2` (`col4`(177),`col9`(126),`col6`,`col3`),
+KEY `idx3` (`col5`(160)),
+KEY `idx4` (`col9`(242)),
+KEY `idx5` (`col4`(139),`col2`,`col3`),
+KEY `idx6` (`col7`),
+KEY `idx7` (`col6`,`col2`,`col0`,`col3`),
+KEY `idx8` (`col9`(66)),
+KEY `idx9` (`col5`(253)),
+KEY `idx10` (`col1`,`col7`,`col2`),
+KEY `idx11` (`col9`(242),`col0`,`col8`,`col5`(163)),
+KEY `idx12` (`col8`),
+KEY `idx13` (`col0`,`col9`(37)),
+KEY `idx14` (`col0`),
+KEY `idx15` (`col5`(111)),
+KEY `idx16` (`col8`,`col0`,`col5`(13)),
+KEY `idx17` (`col4`(139)),
+KEY `idx18` (`col5`(189),`col2`,`col3`,`col9`(136)),
+KEY `idx19` (`col0`,`col3`,`col1`,`col8`),
+KEY `idx20` (`col8`),
+KEY `idx21` (`col0`,`col7`,`col9`(227),`col3`),
+KEY `idx22` (`col0`),
+KEY `idx23` (`col2`),
+KEY `idx24` (`col3`),
+KEY `idx25` (`col2`,`col3`),
+KEY `idx26` (`col0`),
+KEY `idx27` (`col5`(254)),
+KEY `idx28` (`col3`),
+KEY `idx29` (`col3`),
+KEY `idx30` (`col7`,`col3`,`col0`,`col4`(220)),
+KEY `idx31` (`col4`(1),`col0`)
+)engine=innodb ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1;
+
+# Creating a table with 141 columns and 18 indexes
+DROP TABLE IF EXISTS `table2`;
+SET sql_mode = 'NO_ENGINE_SUBSTITUTION';
+--error ER_TOO_BIG_ROWSIZE
+CREATE TABLE IF NOT EXISTS `table2`
+(`col0` BOOL,
+`col1` MEDIUMINT,
+`col2` VARCHAR (209),
+`col3` MEDIUMBLOB,
+`col4` CHAR (13),
+`col5` DOUBLE,
+`col6` TINYTEXT,
+`col7` REAL,
+`col8` SMALLINT,
+`col9` BLOB,
+`col10` TINYINT,
+`col11` DECIMAL,
+`col12` BLOB,
+`col13` DECIMAL,
+`col14` LONGBLOB,
+`col15` SMALLINT,
+`col16` LONGBLOB,
+`col17` TINYTEXT,
+`col18` FLOAT,
+`col19` CHAR (78),
+`col20` MEDIUMTEXT,
+`col21` SET ('test1','test2','test3'),
+`col22` MEDIUMINT,
+`col23` INT,
+`col24` MEDIUMBLOB,
+`col25` ENUM ('test1','test2','test3'),
+`col26` TINYBLOB,
+`col27` VARCHAR (116),
+`col28` TIMESTAMP,
+`col29` BLOB,
+`col30` SMALLINT,
+`col31` DOUBLE PRECISION,
+`col32` DECIMAL,
+`col33` DECIMAL,
+`col34` TEXT,
+`col35` MEDIUMINT,
+`col36` MEDIUMINT,
+`col37` BIGINT,
+`col38` VARCHAR (253),
+`col39` TINYBLOB,
+`col40` MEDIUMBLOB,
+`col41` BIGINT,
+`col42` DOUBLE,
+`col43` TEXT,
+`col44` BLOB,
+`col45` TIME,
+`col46` MEDIUMINT,
+`col47` DOUBLE PRECISION,
+`col48` SET ('test1','test2','test3'),
+`col49` DOUBLE PRECISION,
+`col50` VARCHAR (97),
+`col51` TEXT,
+`col52` NUMERIC,
+`col53` ENUM ('test1','test2','test3'),
+`col54` MEDIUMTEXT,
+`col55` MEDIUMINT,
+`col56` DATETIME,
+`col57` DATETIME,
+`col58` MEDIUMTEXT,
+`col59` CHAR (244),
+`col60` LONGBLOB,
+`col61` MEDIUMBLOB,
+`col62` DOUBLE,
+`col63` SMALLINT,
+`col64` BOOL,
+`col65` SMALLINT,
+`col66` VARCHAR (212),
+`col67` TIME,
+`col68` REAL,
+`col69` BOOL,
+`col70` BIGINT,
+`col71` DATE,
+`col72` TINYINT,
+`col73` ENUM ('test1','test2','test3'),
+`col74` DATE,
+`col75` TIME,
+`col76` DATETIME,
+`col77` BOOL,
+`col78` TINYTEXT,
+`col79` MEDIUMINT,
+`col80` NUMERIC,
+`col81` LONGTEXT,
+`col82` SET ('test1','test2','test3'),
+`col83` DOUBLE PRECISION,
+`col84` NUMERIC,
+`col85` VARCHAR (184),
+`col86` DOUBLE PRECISION,
+`col87` MEDIUMTEXT,
+`col88` MEDIUMBLOB,
+`col89` BOOL,
+`col90` SMALLINT,
+`col91` TINYINT,
+`col92` ENUM ('test1','test2','test3'),
+`col93` BOOL,
+`col94` TIMESTAMP,
+`col95` BOOL,
+`col96` MEDIUMTEXT,
+`col97` DECIMAL,
+`col98` BOOL,
+`col99` DECIMAL,
+`col100` MEDIUMINT,
+`col101` DOUBLE PRECISION,
+`col102` TINYINT,
+`col103` BOOL,
+`col104` MEDIUMINT,
+`col105` DECIMAL,
+`col106` NUMERIC,
+`col107` TIMESTAMP,
+`col108` MEDIUMBLOB,
+`col109` TINYBLOB,
+`col110` SET ('test1','test2','test3'),
+`col111` YEAR,
+`col112` TIMESTAMP,
+`col113` CHAR (201),
+`col114` BOOL,
+`col115` TINYINT,
+`col116` DOUBLE,
+`col117` TINYINT,
+`col118` TIMESTAMP,
+`col119` SET ('test1','test2','test3'),
+`col120` SMALLINT,
+`col121` TINYBLOB,
+`col122` TIMESTAMP,
+`col123` BLOB,
+`col124` DATE,
+`col125` SMALLINT,
+`col126` ENUM ('test1','test2','test3'),
+`col127` MEDIUMBLOB,
+`col128` DOUBLE PRECISION,
+`col129` REAL,
+`col130` VARCHAR (159),
+`col131` MEDIUMBLOB,
+`col132` BIGINT,
+`col133` INT,
+`col134` SET ('test1','test2','test3'),
+`col135` CHAR (198),
+`col136` SET ('test1','test2','test3'),
+`col137` MEDIUMTEXT,
+`col138` SMALLINT,
+`col139` BLOB,
+`col140` LONGBLOB,
+KEY `idx0` (`col14`(139),`col24`(208),`col38`,`col35`),
+KEY `idx1` (`col48`,`col118`,`col29`(131),`col100`),
+KEY `idx2` (`col86`,`col67`,`col43`(175)),
+KEY `idx3` (`col19`),
+KEY `idx4` (`col40`(220),`col67`),
+KEY `idx5` (`col99`,`col56`),
+KEY `idx6` (`col68`,`col28`,`col137`(157)),
+KEY `idx7` (`col51`(160),`col99`,`col45`,`col39`(9)),
+KEY `idx8` (`col15`,`col52`,`col90`,`col94`),
+KEY `idx9` (`col24`(3),`col139`(248),`col108`(118),`col41`),
+KEY `idx10` (`col36`,`col92`,`col114`),
+KEY `idx11` (`col115`,`col9`(116)),
+KEY `idx12` (`col130`,`col93`,`col134`),
+KEY `idx13` (`col123`(65)),
+KEY `idx14` (`col44`(90),`col86`,`col119`),
+KEY `idx15` (`col69`),
+KEY `idx16` (`col132`,`col81`(118),`col18`),
+KEY `idx17` (`col24`(250),`col7`,`col92`,`col45`)
+)engine=innodb ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1;
+
+# Creating a table with 199 columns and 1 indexes
+DROP TABLE IF EXISTS `table3`;
+--error ER_TOO_BIG_ROWSIZE
+CREATE TABLE IF NOT EXISTS `table3`
+(`col0` SMALLINT,
+`col1` SET ('test1','test2','test3'),
+`col2` TINYTEXT,
+`col3` DOUBLE,
+`col4` NUMERIC,
+`col5` DATE,
+`col6` BIGINT,
+`col7` DOUBLE,
+`col8` TEXT,
+`col9` INT,
+`col10` REAL,
+`col11` TINYINT,
+`col12` NUMERIC,
+`col13` NUMERIC,
+`col14` TIME,
+`col15` DOUBLE,
+`col16` REAL,
+`col17` MEDIUMBLOB,
+`col18` YEAR,
+`col19` TINYTEXT,
+`col20` YEAR,
+`col21` CHAR (250),
+`col22` TINYINT,
+`col23` TINYINT,
+`col24` SMALLINT,
+`col25` DATETIME,
+`col26` MEDIUMINT,
+`col27` LONGBLOB,
+`col28` VARCHAR (106),
+`col29` FLOAT,
+`col30` MEDIUMTEXT,
+`col31` TINYBLOB,
+`col32` BIGINT,
+`col33` YEAR,
+`col34` REAL,
+`col35` MEDIUMBLOB,
+`col36` LONGTEXT,
+`col37` LONGBLOB,
+`col38` BIGINT,
+`col39` FLOAT,
+`col40` TIME,
+`col41` DATETIME,
+`col42` BOOL,
+`col43` BIGINT,
+`col44` SMALLINT,
+`col45` TIME,
+`col46` DOUBLE PRECISION,
+`col47` TIME,
+`col48` TINYTEXT,
+`col49` DOUBLE PRECISION,
+`col50` BIGINT,
+`col51` NUMERIC,
+`col52` TINYBLOB,
+`col53` DATE,
+`col54` DECIMAL,
+`col55` SMALLINT,
+`col56` TINYTEXT,
+`col57` ENUM ('test1','test2','test3'),
+`col58` YEAR,
+`col59` TIME,
+`col60` TINYINT,
+`col61` DECIMAL,
+`col62` DOUBLE,
+`col63` DATE,
+`col64` LONGTEXT,
+`col65` DOUBLE,
+`col66` VARCHAR (88),
+`col67` MEDIUMTEXT,
+`col68` DATE,
+`col69` MEDIUMINT,
+`col70` DECIMAL,
+`col71` MEDIUMTEXT,
+`col72` LONGTEXT,
+`col73` REAL,
+`col74` DOUBLE,
+`col75` TIME,
+`col76` DATE,
+`col77` DECIMAL,
+`col78` MEDIUMBLOB,
+`col79` NUMERIC,
+`col80` BIGINT,
+`col81` YEAR,
+`col82` SMALLINT,
+`col83` MEDIUMINT,
+`col84` TINYINT,
+`col85` MEDIUMBLOB,
+`col86` TIME,
+`col87` MEDIUMBLOB,
+`col88` LONGTEXT,
+`col89` BOOL,
+`col90` BLOB,
+`col91` LONGBLOB,
+`col92` YEAR,
+`col93` BLOB,
+`col94` INT,
+`col95` TINYTEXT,
+`col96` TINYINT,
+`col97` DECIMAL,
+`col98` ENUM ('test1','test2','test3'),
+`col99` MEDIUMINT,
+`col100` TINYINT,
+`col101` MEDIUMBLOB,
+`col102` TINYINT,
+`col103` SET ('test1','test2','test3'),
+`col104` TIMESTAMP,
+`col105` TEXT,
+`col106` DATETIME,
+`col107` MEDIUMTEXT,
+`col108` CHAR (220),
+`col109` TIME,
+`col110` VARCHAR (131),
+`col111` DECIMAL,
+`col112` FLOAT,
+`col113` SMALLINT,
+`col114` BIGINT,
+`col115` LONGBLOB,
+`col116` SET ('test1','test2','test3'),
+`col117` ENUM ('test1','test2','test3'),
+`col118` BLOB,
+`col119` MEDIUMTEXT,
+`col120` SET ('test1','test2','test3'),
+`col121` DATETIME,
+`col122` FLOAT,
+`col123` VARCHAR (242),
+`col124` YEAR,
+`col125` MEDIUMBLOB,
+`col126` TIME,
+`col127` BOOL,
+`col128` TINYBLOB,
+`col129` DOUBLE,
+`col130` TINYINT,
+`col131` BIGINT,
+`col132` SMALLINT,
+`col133` INT,
+`col134` DOUBLE PRECISION,
+`col135` MEDIUMBLOB,
+`col136` SET ('test1','test2','test3'),
+`col137` TINYTEXT,
+`col138` DOUBLE PRECISION,
+`col139` NUMERIC,
+`col140` BLOB,
+`col141` SET ('test1','test2','test3'),
+`col142` INT,
+`col143` VARCHAR (26),
+`col144` BLOB,
+`col145` REAL,
+`col146` SET ('test1','test2','test3'),
+`col147` LONGBLOB,
+`col148` TEXT,
+`col149` BLOB,
+`col150` CHAR (189),
+`col151` LONGTEXT,
+`col152` INT,
+`col153` FLOAT,
+`col154` LONGTEXT,
+`col155` DATE,
+`col156` LONGBLOB,
+`col157` TINYBLOB,
+`col158` REAL,
+`col159` DATE,
+`col160` TIME,
+`col161` YEAR,
+`col162` DOUBLE,
+`col163` VARCHAR (90),
+`col164` FLOAT,
+`col165` NUMERIC,
+`col166` ENUM ('test1','test2','test3'),
+`col167` DOUBLE PRECISION,
+`col168` DOUBLE PRECISION,
+`col169` TINYBLOB,
+`col170` TIME,
+`col171` SMALLINT,
+`col172` TINYTEXT,
+`col173` SMALLINT,
+`col174` DOUBLE,
+`col175` VARCHAR (14),
+`col176` VARCHAR (90),
+`col177` REAL,
+`col178` MEDIUMINT,
+`col179` TINYBLOB,
+`col180` FLOAT,
+`col181` TIMESTAMP,
+`col182` REAL,
+`col183` DOUBLE PRECISION,
+`col184` BIGINT,
+`col185` INT,
+`col186` MEDIUMTEXT,
+`col187` TIME,
+`col188` FLOAT,
+`col189` TIME,
+`col190` INT,
+`col191` FLOAT,
+`col192` MEDIUMINT,
+`col193` TINYINT,
+`col194` MEDIUMTEXT,
+`col195` DATE,
+`col196` TIME,
+`col197` YEAR,
+`col198` CHAR (206),
+KEY `idx0` (`col39`,`col23`)
+)engine=innodb ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1;
+
+# Creating a table with 133 columns and 16 indexes
+DROP TABLE IF EXISTS `table4`;
+--error ER_TOO_BIG_ROWSIZE
+CREATE TABLE IF NOT EXISTS `table4`
+(`col0` VARCHAR (60),
+`col1` NUMERIC,
+`col2` LONGTEXT,
+`col3` MEDIUMTEXT,
+`col4` LONGTEXT,
+`col5` LONGBLOB,
+`col6` LONGBLOB,
+`col7` DATETIME,
+`col8` TINYTEXT,
+`col9` BLOB,
+`col10` BOOL,
+`col11` BIGINT,
+`col12` TEXT,
+`col13` VARCHAR (213),
+`col14` TINYBLOB,
+`col15` BOOL,
+`col16` MEDIUMTEXT,
+`col17` DOUBLE,
+`col18` TEXT,
+`col19` BLOB,
+`col20` SET ('test1','test2','test3'),
+`col21` TINYINT,
+`col22` DATETIME,
+`col23` TINYINT,
+`col24` ENUM ('test1','test2','test3'),
+`col25` REAL,
+`col26` BOOL,
+`col27` FLOAT,
+`col28` LONGBLOB,
+`col29` DATETIME,
+`col30` FLOAT,
+`col31` SET ('test1','test2','test3'),
+`col32` LONGBLOB,
+`col33` NUMERIC,
+`col34` YEAR,
+`col35` VARCHAR (146),
+`col36` BIGINT,
+`col37` DATETIME,
+`col38` DATE,
+`col39` SET ('test1','test2','test3'),
+`col40` CHAR (112),
+`col41` FLOAT,
+`col42` YEAR,
+`col43` TIME,
+`col44` DOUBLE,
+`col45` NUMERIC,
+`col46` FLOAT,
+`col47` DECIMAL,
+`col48` BIGINT,
+`col49` DECIMAL,
+`col50` YEAR,
+`col51` MEDIUMTEXT,
+`col52` LONGBLOB,
+`col53` SET ('test1','test2','test3'),
+`col54` BLOB,
+`col55` FLOAT,
+`col56` REAL,
+`col57` REAL,
+`col58` TEXT,
+`col59` MEDIUMBLOB,
+`col60` INT,
+`col61` INT,
+`col62` DATE,
+`col63` TEXT,
+`col64` DATE,
+`col65` ENUM ('test1','test2','test3'),
+`col66` DOUBLE PRECISION,
+`col67` TINYTEXT,
+`col68` TINYBLOB,
+`col69` FLOAT,
+`col70` BLOB,
+`col71` DATETIME,
+`col72` DOUBLE,
+`col73` LONGTEXT,
+`col74` TIME,
+`col75` DATETIME,
+`col76` VARCHAR (122),
+`col77` MEDIUMTEXT,
+`col78` MEDIUMTEXT,
+`col79` BOOL,
+`col80` LONGTEXT,
+`col81` TINYTEXT,
+`col82` NUMERIC,
+`col83` DOUBLE PRECISION,
+`col84` DATE,
+`col85` YEAR,
+`col86` BLOB,
+`col87` TINYTEXT,
+`col88` DOUBLE PRECISION,
+`col89` MEDIUMINT,
+`col90` MEDIUMTEXT,
+`col91` NUMERIC,
+`col92` DATETIME,
+`col93` NUMERIC,
+`col94` SET ('test1','test2','test3'),
+`col95` TINYTEXT,
+`col96` SET ('test1','test2','test3'),
+`col97` YEAR,
+`col98` MEDIUMINT,
+`col99` TEXT,
+`col100` TEXT,
+`col101` TIME,
+`col102` VARCHAR (225),
+`col103` TINYTEXT,
+`col104` TEXT,
+`col105` MEDIUMTEXT,
+`col106` TINYINT,
+`col107` TEXT,
+`col108` LONGBLOB,
+`col109` LONGTEXT,
+`col110` TINYTEXT,
+`col111` CHAR (56),
+`col112` YEAR,
+`col113` ENUM ('test1','test2','test3'),
+`col114` TINYBLOB,
+`col115` DATETIME,
+`col116` DATE,
+`col117` TIME,
+`col118` MEDIUMTEXT,
+`col119` DOUBLE PRECISION,
+`col120` FLOAT,
+`col121` TIMESTAMP,
+`col122` MEDIUMINT,
+`col123` YEAR,
+`col124` DATE,
+`col125` TEXT,
+`col126` FLOAT,
+`col127` TINYTEXT,
+`col128` BOOL,
+`col129` NUMERIC,
+`col130` TIMESTAMP,
+`col131` INT,
+`col132` MEDIUMBLOB,
+KEY `idx0` (`col130`),
+KEY `idx1` (`col30`,`col55`,`col19`(31)),
+KEY `idx2` (`col104`(186)),
+KEY `idx3` (`col131`),
+KEY `idx4` (`col64`,`col93`,`col2`(11)),
+KEY `idx5` (`col34`,`col121`,`col22`),
+KEY `idx6` (`col33`,`col55`,`col83`),
+KEY `idx7` (`col17`,`col87`(245),`col99`(17)),
+KEY `idx8` (`col65`,`col120`),
+KEY `idx9` (`col82`),
+KEY `idx10` (`col9`(72)),
+KEY `idx11` (`col88`),
+KEY `idx12` (`col128`,`col9`(200),`col71`,`col66`),
+KEY `idx13` (`col77`(126)),
+KEY `idx14` (`col105`(26),`col13`,`col117`),
+KEY `idx15` (`col4`(246),`col130`,`col115`,`col3`(141))
+)engine=innodb ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1;
+
+# Creating a table with 176 columns and 13 indexes
+DROP TABLE IF EXISTS `table5`;
+--error ER_TOO_BIG_ROWSIZE
+CREATE TABLE IF NOT EXISTS `table5`
+(`col0` MEDIUMTEXT,
+`col1` VARCHAR (90),
+`col2` TINYTEXT,
+`col3` TIME,
+`col4` BOOL,
+`col5` TINYTEXT,
+`col6` BOOL,
+`col7` TIMESTAMP,
+`col8` TINYBLOB,
+`col9` TINYINT,
+`col10` YEAR,
+`col11` SET ('test1','test2','test3'),
+`col12` TEXT,
+`col13` CHAR (248),
+`col14` BIGINT,
+`col15` TEXT,
+`col16` TINYINT,
+`col17` NUMERIC,
+`col18` SET ('test1','test2','test3'),
+`col19` LONGBLOB,
+`col20` FLOAT,
+`col21` INT,
+`col22` TEXT,
+`col23` BOOL,
+`col24` DECIMAL,
+`col25` DOUBLE PRECISION,
+`col26` FLOAT,
+`col27` TINYBLOB,
+`col28` NUMERIC,
+`col29` MEDIUMBLOB,
+`col30` DATE,
+`col31` LONGTEXT,
+`col32` DATE,
+`col33` FLOAT,
+`col34` BIGINT,
+`col35` TINYTEXT,
+`col36` MEDIUMTEXT,
+`col37` TIME,
+`col38` INT,
+`col39` TINYINT,
+`col40` SET ('test1','test2','test3'),
+`col41` CHAR (130),
+`col42` SMALLINT,
+`col43` INT,
+`col44` MEDIUMTEXT,
+`col45` VARCHAR (126),
+`col46` INT,
+`col47` DOUBLE PRECISION,
+`col48` BIGINT,
+`col49` MEDIUMTEXT,
+`col50` TINYBLOB,
+`col51` MEDIUMINT,
+`col52` TEXT,
+`col53` VARCHAR (208),
+`col54` VARCHAR (207),
+`col55` NUMERIC,
+`col56` DATETIME,
+`col57` ENUM ('test1','test2','test3'),
+`col58` NUMERIC,
+`col59` TINYBLOB,
+`col60` VARCHAR (73),
+`col61` MEDIUMTEXT,
+`col62` TINYBLOB,
+`col63` DATETIME,
+`col64` NUMERIC,
+`col65` MEDIUMINT,
+`col66` DATETIME,
+`col67` NUMERIC,
+`col68` TINYINT,
+`col69` VARCHAR (58),
+`col70` DECIMAL,
+`col71` MEDIUMTEXT,
+`col72` DATE,
+`col73` TIME,
+`col74` DOUBLE PRECISION,
+`col75` DECIMAL,
+`col76` MEDIUMBLOB,
+`col77` REAL,
+`col78` YEAR,
+`col79` YEAR,
+`col80` LONGBLOB,
+`col81` BLOB,
+`col82` BIGINT,
+`col83` ENUM ('test1','test2','test3'),
+`col84` NUMERIC,
+`col85` SET ('test1','test2','test3'),
+`col86` MEDIUMTEXT,
+`col87` LONGBLOB,
+`col88` TIME,
+`col89` ENUM ('test1','test2','test3'),
+`col90` DECIMAL,
+`col91` FLOAT,
+`col92` DATETIME,
+`col93` TINYTEXT,
+`col94` TIMESTAMP,
+`col95` TIMESTAMP,
+`col96` TEXT,
+`col97` REAL,
+`col98` VARCHAR (198),
+`col99` TIME,
+`col100` TINYINT,
+`col101` BIGINT,
+`col102` LONGBLOB,
+`col103` LONGBLOB,
+`col104` MEDIUMINT,
+`col105` MEDIUMTEXT,
+`col106` TIMESTAMP,
+`col107` SMALLINT,
+`col108` NUMERIC,
+`col109` DECIMAL,
+`col110` FLOAT,
+`col111` DECIMAL,
+`col112` REAL,
+`col113` TINYTEXT,
+`col114` FLOAT,
+`col115` VARCHAR (7),
+`col116` LONGTEXT,
+`col117` DATE,
+`col118` BIGINT,
+`col119` TEXT,
+`col120` BIGINT,
+`col121` BLOB,
+`col122` CHAR (110),
+`col123` NUMERIC,
+`col124` MEDIUMBLOB,
+`col125` NUMERIC,
+`col126` NUMERIC,
+`col127` BOOL,
+`col128` TIME,
+`col129` TINYBLOB,
+`col130` TINYBLOB,
+`col131` DATE,
+`col132` INT,
+`col133` VARCHAR (123),
+`col134` CHAR (238),
+`col135` VARCHAR (225),
+`col136` LONGTEXT,
+`col137` LONGBLOB,
+`col138` REAL,
+`col139` TINYBLOB,
+`col140` DATETIME,
+`col141` TINYTEXT,
+`col142` LONGBLOB,
+`col143` BIGINT,
+`col144` VARCHAR (236),
+`col145` TEXT,
+`col146` YEAR,
+`col147` DECIMAL,
+`col148` TEXT,
+`col149` MEDIUMBLOB,
+`col150` TINYINT,
+`col151` BOOL,
+`col152` VARCHAR (72),
+`col153` INT,
+`col154` VARCHAR (165),
+`col155` TINYINT,
+`col156` MEDIUMTEXT,
+`col157` DOUBLE PRECISION,
+`col158` TIME,
+`col159` MEDIUMBLOB,
+`col160` LONGBLOB,
+`col161` DATETIME,
+`col162` DOUBLE PRECISION,
+`col163` BLOB,
+`col164` ENUM ('test1','test2','test3'),
+`col165` TIMESTAMP,
+`col166` DATE,
+`col167` TINYBLOB,
+`col168` TINYBLOB,
+`col169` LONGBLOB,
+`col170` DATETIME,
+`col171` BIGINT,
+`col172` VARCHAR (30),
+`col173` LONGTEXT,
+`col174` TIME,
+`col175` FLOAT,
+KEY `idx0` (`col16`,`col156`(139),`col97`,`col120`),
+KEY `idx1` (`col24`,`col0`(108)),
+KEY `idx2` (`col117`,`col173`(34),`col132`,`col82`),
+KEY `idx3` (`col2`(86)),
+KEY `idx4` (`col2`(43)),
+KEY `idx5` (`col83`,`col35`(87),`col111`),
+KEY `idx6` (`col6`,`col134`,`col92`),
+KEY `idx7` (`col56`),
+KEY `idx8` (`col30`,`col53`,`col129`(66)),
+KEY `idx9` (`col53`,`col113`(211),`col32`,`col15`(75)),
+KEY `idx10` (`col34`),
+KEY `idx11` (`col126`),
+KEY `idx12` (`col24`)
+)engine=innodb ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1;
+
+# Creating a table with 179 columns and 46 indexes
+DROP TABLE IF EXISTS `table6`;
+-- error ER_TOO_BIG_ROWSIZE
+--error ER_TOO_BIG_ROWSIZE
+CREATE TABLE IF NOT EXISTS `table6`
+(`col0` ENUM ('test1','test2','test3'),
+`col1` MEDIUMBLOB,
+`col2` MEDIUMBLOB,
+`col3` DATETIME,
+`col4` DATE,
+`col5` YEAR,
+`col6` REAL,
+`col7` NUMERIC,
+`col8` MEDIUMBLOB,
+`col9` TEXT,
+`col10` TIMESTAMP,
+`col11` DOUBLE,
+`col12` DOUBLE,
+`col13` SMALLINT,
+`col14` TIMESTAMP,
+`col15` DECIMAL,
+`col16` DATE,
+`col17` TEXT,
+`col18` LONGBLOB,
+`col19` BIGINT,
+`col20` FLOAT,
+`col21` DATETIME,
+`col22` TINYINT,
+`col23` MEDIUMBLOB,
+`col24` SET ('test1','test2','test3'),
+`col25` TIME,
+`col26` TEXT,
+`col27` LONGTEXT,
+`col28` BIGINT,
+`col29` REAL,
+`col30` YEAR,
+`col31` MEDIUMBLOB,
+`col32` MEDIUMINT,
+`col33` FLOAT,
+`col34` TEXT,
+`col35` DATE,
+`col36` TIMESTAMP,
+`col37` REAL,
+`col38` BLOB,
+`col39` BLOB,
+`col40` BLOB,
+`col41` TINYBLOB,
+`col42` INT,
+`col43` TINYINT,
+`col44` REAL,
+`col45` BIGINT,
+`col46` TIMESTAMP,
+`col47` BLOB,
+`col48` ENUM ('test1','test2','test3'),
+`col49` BOOL,
+`col50` CHAR (109),
+`col51` DOUBLE,
+`col52` DOUBLE PRECISION,
+`col53` ENUM ('test1','test2','test3'),
+`col54` FLOAT,
+`col55` DOUBLE PRECISION,
+`col56` CHAR (166),
+`col57` TEXT,
+`col58` TIME,
+`col59` DECIMAL,
+`col60` TEXT,
+`col61` ENUM ('test1','test2','test3'),
+`col62` LONGTEXT,
+`col63` YEAR,
+`col64` DOUBLE,
+`col65` CHAR (87),
+`col66` DATE,
+`col67` BOOL,
+`col68` MEDIUMBLOB,
+`col69` DATETIME,
+`col70` DECIMAL,
+`col71` TIME,
+`col72` REAL,
+`col73` LONGTEXT,
+`col74` BLOB,
+`col75` REAL,
+`col76` INT,
+`col77` INT,
+`col78` FLOAT,
+`col79` DOUBLE,
+`col80` MEDIUMINT,
+`col81` ENUM ('test1','test2','test3'),
+`col82` VARCHAR (221),
+`col83` BIGINT,
+`col84` TINYINT,
+`col85` BIGINT,
+`col86` FLOAT,
+`col87` MEDIUMBLOB,
+`col88` CHAR (126),
+`col89` MEDIUMBLOB,
+`col90` DATETIME,
+`col91` TINYINT,
+`col92` DOUBLE,
+`col93` NUMERIC,
+`col94` DATE,
+`col95` BLOB,
+`col96` DATETIME,
+`col97` TIME,
+`col98` LONGBLOB,
+`col99` INT,
+`col100` SET ('test1','test2','test3'),
+`col101` TINYBLOB,
+`col102` INT,
+`col103` MEDIUMBLOB,
+`col104` MEDIUMTEXT,
+`col105` FLOAT,
+`col106` TINYBLOB,
+`col107` VARCHAR (26),
+`col108` TINYINT,
+`col109` TIME,
+`col110` TINYBLOB,
+`col111` LONGBLOB,
+`col112` TINYTEXT,
+`col113` FLOAT,
+`col114` TINYINT,
+`col115` NUMERIC,
+`col116` TIME,
+`col117` SET ('test1','test2','test3'),
+`col118` DATE,
+`col119` SMALLINT,
+`col120` BLOB,
+`col121` TINYTEXT,
+`col122` REAL,
+`col123` YEAR,
+`col124` REAL,
+`col125` BOOL,
+`col126` BLOB,
+`col127` REAL,
+`col128` MEDIUMBLOB,
+`col129` TIMESTAMP,
+`col130` LONGBLOB,
+`col131` MEDIUMBLOB,
+`col132` YEAR,
+`col133` YEAR,
+`col134` INT,
+`col135` MEDIUMINT,
+`col136` MEDIUMINT,
+`col137` TINYTEXT,
+`col138` TINYBLOB,
+`col139` BLOB,
+`col140` SET ('test1','test2','test3'),
+`col141` ENUM ('test1','test2','test3'),
+`col142` ENUM ('test1','test2','test3'),
+`col143` TINYTEXT,
+`col144` DATETIME,
+`col145` TEXT,
+`col146` DOUBLE PRECISION,
+`col147` DECIMAL,
+`col148` MEDIUMTEXT,
+`col149` TINYTEXT,
+`col150` SET ('test1','test2','test3'),
+`col151` MEDIUMTEXT,
+`col152` CHAR (126),
+`col153` DOUBLE,
+`col154` CHAR (243),
+`col155` SET ('test1','test2','test3'),
+`col156` SET ('test1','test2','test3'),
+`col157` DATETIME,
+`col158` DOUBLE,
+`col159` NUMERIC,
+`col160` DECIMAL,
+`col161` FLOAT,
+`col162` LONGBLOB,
+`col163` LONGTEXT,
+`col164` INT,
+`col165` TIME,
+`col166` CHAR (27),
+`col167` VARCHAR (63),
+`col168` TEXT,
+`col169` TINYBLOB,
+`col170` TINYBLOB,
+`col171` ENUM ('test1','test2','test3'),
+`col172` INT,
+`col173` TIME,
+`col174` DECIMAL,
+`col175` DOUBLE,
+`col176` MEDIUMBLOB,
+`col177` LONGBLOB,
+`col178` CHAR (43),
+KEY `idx0` (`col131`(219)),
+KEY `idx1` (`col67`,`col122`,`col59`,`col87`(33)),
+KEY `idx2` (`col83`,`col42`,`col57`(152)),
+KEY `idx3` (`col106`(124)),
+KEY `idx4` (`col173`,`col80`,`col165`,`col89`(78)),
+KEY `idx5` (`col174`,`col145`(108),`col23`(228),`col141`),
+KEY `idx6` (`col157`,`col140`),
+KEY `idx7` (`col130`(188),`col15`),
+KEY `idx8` (`col52`),
+KEY `idx9` (`col144`),
+KEY `idx10` (`col155`),
+KEY `idx11` (`col62`(230),`col1`(109)),
+KEY `idx12` (`col151`(24),`col95`(85)),
+KEY `idx13` (`col114`),
+KEY `idx14` (`col42`,`col98`(56),`col146`),
+KEY `idx15` (`col147`,`col39`(254),`col35`),
+KEY `idx16` (`col79`),
+KEY `idx17` (`col65`),
+KEY `idx18` (`col149`(165),`col168`(119),`col32`,`col117`),
+KEY `idx19` (`col64`),
+KEY `idx20` (`col93`),
+KEY `idx21` (`col64`,`col113`,`col104`(182)),
+KEY `idx22` (`col52`,`col111`(189)),
+KEY `idx23` (`col45`),
+KEY `idx24` (`col154`,`col107`,`col110`(159)),
+KEY `idx25` (`col149`(1),`col87`(131)),
+KEY `idx26` (`col58`,`col115`,`col63`),
+KEY `idx27` (`col95`(9),`col0`,`col87`(113)),
+KEY `idx28` (`col92`,`col130`(1)),
+KEY `idx29` (`col151`(129),`col137`(254),`col13`),
+KEY `idx30` (`col49`),
+KEY `idx31` (`col28`),
+KEY `idx32` (`col83`,`col146`),
+KEY `idx33` (`col155`,`col90`,`col17`(245)),
+KEY `idx34` (`col174`,`col169`(44),`col107`),
+KEY `idx35` (`col113`),
+KEY `idx36` (`col52`),
+KEY `idx37` (`col16`,`col120`(190)),
+KEY `idx38` (`col28`),
+KEY `idx39` (`col131`(165)),
+KEY `idx40` (`col135`,`col26`(86)),
+KEY `idx41` (`col69`,`col94`),
+KEY `idx42` (`col105`,`col151`(38),`col97`),
+KEY `idx43` (`col88`),
+KEY `idx44` (`col176`(100),`col42`,`col73`(189),`col94`),
+KEY `idx45` (`col2`(27),`col27`(116))
+)engine=innodb ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1;
+
+DROP TABLE IF EXISTS table0;
+DROP TABLE IF EXISTS table1;
+DROP TABLE IF EXISTS table2;
+DROP TABLE IF EXISTS table3;
+DROP TABLE IF EXISTS table4;
+DROP TABLE IF EXISTS table5;
+DROP TABLE IF EXISTS table6;
+
+EVAL SET GLOBAL innodb_file_per_table=$file_per_table;
+SET sql_mode = default;
diff --git a/mysql-test/suite/innodb_zip/t/bug36172.test b/mysql-test/suite/innodb_zip/t/bug36172.test
new file mode 100644
index 00000000000..49590f40192
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/t/bug36172.test
@@ -0,0 +1,30 @@
+#
+# Test case for bug 36172
+#
+
+-- source include/not_embedded.inc
+-- source include/have_innodb.inc
+-- source include/have_innodb_16k.inc
+
+SET default_storage_engine=InnoDB;
+
+# we do not really care about what gets printed, we are only
+# interested in getting success or failure according to our
+# expectations
+
+-- disable_query_log
+-- disable_result_log
+
+let $file_per_table=`select @@innodb_file_per_table`;
+SET GLOBAL innodb_file_per_table=on;
+
+DROP TABLE IF EXISTS `table0`;
+SET sql_mode = 'NO_ENGINE_SUBSTITUTION';
+CREATE TABLE `table0` (   `col0` tinyint(1) DEFAULT NULL,   `col1` tinyint(1) DEFAULT NULL,   `col2` tinyint(4) DEFAULT NULL,   `col3` date DEFAULT NULL,   `col4` time DEFAULT NULL,   `col5` set('test1','test2','test3') DEFAULT NULL,   `col6` time DEFAULT NULL,   `col7` text,   `col8` decimal(10,0) DEFAULT NULL,   `col9` set('test1','test2','test3') DEFAULT NULL,   `col10` float DEFAULT NULL,   `col11` double DEFAULT NULL,   `col12` enum('test1','test2','test3') DEFAULT NULL,   `col13` tinyblob,   `col14` year(4) DEFAULT NULL,   `col15` set('test1','test2','test3') DEFAULT NULL,   `col16` decimal(10,0) DEFAULT NULL,   `col17` decimal(10,0) DEFAULT NULL,   `col18` blob,   `col19` datetime DEFAULT NULL,   `col20` double DEFAULT NULL,   `col21` decimal(10,0) DEFAULT NULL,   `col22` datetime DEFAULT NULL,   `col23` decimal(10,0) DEFAULT NULL,   `col24` decimal(10,0) DEFAULT NULL,   `col25` longtext,   `col26` tinyblob,   `col27` time DEFAULT NULL,   `col28` tinyblob,   `col29` enum('test1','test2','test3') DEFAULT NULL,   `col30` smallint(6) DEFAULT NULL,   `col31` double DEFAULT NULL,   `col32` float DEFAULT NULL,   `col33` char(175) DEFAULT NULL,   `col34` tinytext,   `col35` tinytext,   `col36` tinyblob,   `col37` tinyblob,   `col38` tinytext,   `col39` mediumblob,   `col40` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,   `col41` double DEFAULT NULL,   `col42` smallint(6) DEFAULT NULL,   `col43` longblob,   `col44` varchar(80) DEFAULT NULL,   `col45` mediumtext,   `col46` decimal(10,0) DEFAULT NULL,   `col47` bigint(20) DEFAULT NULL,   `col48` date DEFAULT NULL,   `col49` tinyblob,   `col50` date DEFAULT NULL,   `col51` tinyint(1) DEFAULT NULL,   `col52` mediumint(9) DEFAULT NULL,   `col53` float DEFAULT NULL,   `col54` tinyblob,   `col55` longtext,   `col56` smallint(6) DEFAULT NULL,   `col57` enum('test1','test2','test3') DEFAULT NULL,   `col58` datetime DEFAULT NULL,   `col59` mediumtext,   `col60` varchar(232) DEFAULT NULL,   `col61` decimal(10,0) DEFAULT NULL,   `col62` year(4) DEFAULT NULL,   `col63` smallint(6) DEFAULT NULL,   `col64` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',   `col65` blob,   `col66` longblob,   `col67` int(11) DEFAULT NULL,   `col68` longtext,   `col69` enum('test1','test2','test3') DEFAULT NULL,   `col70` int(11) DEFAULT NULL,   `col71` time DEFAULT NULL,   `col72` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',   `col73` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',   `col74` varchar(170) DEFAULT NULL,   `col75` set('test1','test2','test3') DEFAULT NULL,   `col76` tinyblob,   `col77` bigint(20) DEFAULT NULL,   `col78` decimal(10,0) DEFAULT NULL,   `col79` datetime DEFAULT NULL,   `col80` year(4) DEFAULT NULL,   `col81` decimal(10,0) DEFAULT NULL,   `col82` longblob,   `col83` text,   `col84` char(83) DEFAULT NULL,   `col85` decimal(10,0) DEFAULT NULL,   `col86` float DEFAULT NULL,   `col87` int(11) DEFAULT NULL,   `col88` varchar(145) DEFAULT NULL,   `col89` date DEFAULT NULL,   `col90` decimal(10,0) DEFAULT NULL,   `col91` decimal(10,0) DEFAULT NULL,   `col92` mediumblob,   `col93` time DEFAULT NULL,   KEY `idx0` (`col69`,`col90`,`col8`),   KEY `idx1` (`col60`),   KEY `idx2` (`col60`,`col70`,`col74`),   KEY `idx3` (`col22`,`col32`,`col72`,`col30`),   KEY `idx4` (`col29`),   KEY `idx5` (`col19`,`col45`(143)),   KEY `idx6` (`col46`,`col48`,`col5`,`col39`(118)),   KEY `idx7` (`col48`,`col61`),   KEY `idx8` (`col93`),   KEY `idx9` (`col31`),   KEY `idx10` (`col30`,`col21`),   KEY `idx11` (`col67`),   KEY `idx12` (`col44`,`col6`,`col8`,`col38`(226)),   KEY `idx13` (`col71`,`col41`,`col15`,`col49`(88)),   KEY `idx14` (`col78`),   KEY `idx15` (`col63`,`col67`,`col64`),   KEY `idx16` (`col17`,`col86`),   KEY `idx17` (`col77`,`col56`,`col10`,`col55`(24)),   KEY `idx18` (`col62`),   KEY `idx19` (`col31`,`col57`,`col56`,`col53`),   KEY `idx20` (`col46`),   KEY `idx21` (`col83`(54)),   KEY `idx22` (`col51`,`col7`(120)),   KEY `idx23` (`col7`(163),`col31`,`col71`,`col14`) ) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=2;
+SET sql_mode = default;
+insert ignore into `table0` set  `col23` = 7887371.5084383683, `col24` = 4293854615.6906948000, `col25` = 'vitalist', `col26` = 'widespread', `col27` = '3570490', `col28` = 'habitual', `col30` = -5471, `col31` = 4286985783.6771750000, `col32` = 6354540.9826654866, `col33` = 'defoliation', `col34` = 'logarithms', `col35` = 'tegument\'s', `col36` = 'scouting\'s', `col37` = 'intermittency', `col38` = 'elongates', `col39` = 'prophecies', `col40` = '20560103035939', `col41` = 4292809130.0544143000, `col42` = 22057, `col43` = 'Hess\'s', `col44` = 'bandstand', `col45` = 'phenylketonuria', `col46` = 6338767.4018677324, `col47` = 5310247, `col48` = '12592418', `col49` = 'churchman\'s', `col50` = '32226125', `col51` = -58, `col52` = -6207968, `col53` = 1244839.3255104220, `col54` = 'robotized', `col55` = 'monotonous', `col56` = -26909, `col58` = '20720107023550', `col59` = 'suggestiveness\'s', `col60` = 'gemology', `col61` = 4287800670.2229986000, `col62` = '1944', `col63` = -16827, `col64` = '20700107212324', `col65` = 'Nicolais', `col66` = 'apteryx', `col67` = 6935317, `col68` = 'stroganoff', `col70` = 3316430, `col71` = '3277608', `col72` = '19300511045918', `col73` = '20421201003327', `col74` = 'attenuant', `col75` = '15173', `col76` = 'upstroke\'s', `col77` = 8118987, `col78` = 6791516.2735374002, `col79` = '20780701144624', `col80` = '2134', `col81` = 4290682351.3127537000, `col82` = 'unexplainably', `col83` = 'Storm', `col84` = 'Greyso\'s', `col85` = 4289119212.4306774000, `col86` = 7617575.8796655172, `col87` = -6325335, `col88` = 'fondue\'s', `col89` = '40608940', `col90` = 1659421.8093508712, `col91` = 8346904.6584368423, `col92` = 'reloads', `col93` = '5188366';
+CHECK TABLE table0 EXTENDED;
+INSERT IGNORE INTO `table0` SET `col19` = '19940127002709', `col20` = 2383927.9055146948, `col21` = 4293243420.5621204000, `col22` = '20511211123705', `col23` = 4289899778.6573381000, `col24` = 4293449279.0540481000, `col25` = 'emphysemic', `col26` = 'dentally', `col27` = '2347406', `col28` = 'eruct', `col30` = 1222, `col31` = 4294372994.9941406000, `col32` = 4291385574.1173744000, `col33` = 'borrowing\'s', `col34` = 'septics', `col35` = 'ratter\'s', `col36` = 'Kaye', `col37` = 'Florentia', `col38` = 'allium', `col39` = 'barkeep', `col40` = '19510407003441', `col41` = 4293559200.4215522000, `col42` = 22482, `col43` = 'decussate', `col44` = 'Brom\'s', `col45` = 'violated', `col46` = 4925506.4635456400, `col47` = 930549, `col48` = '51296066', `col49` = 'voluminously', `col50` = '29306676', `col51` = -88, `col52` = -2153690, `col53` = 4290250202.1464887000, `col54` = 'expropriation', `col55` = 'Aberdeen\'s', `col56` = 20343, `col58` = '19640415171532', `col59` = 'extern', `col60` = 'Ubana', `col61` = 4290487961.8539081000, `col62` = '2147', `col63` = -24271, `col64` = '20750801194548', `col65` = 'Cunaxa\'s', `col66` = 'pasticcio', `col67` = 2795817, `col68` = 'Indore\'s', `col70` = 6864127, `col71` = '1817832', `col72` = '20540506114211', `col73` = '20040101012300', `col74` = 'rationalized', `col75` = '45522', `col76` = 'indene', `col77` = -6964559, `col78` = 4247535.5266884370, `col79` = '20720416124357', `col80` = '2143', `col81` = 4292060102.4466386000, `col82` = 'striving', `col83` = 'boneblack\'s', `col84` = 'redolent', `col85` = 6489697.9009369183, `col86` = 4287473465.9731131000, `col87` = 7726015, `col88` = 'perplexed', `col89` = '17153791', `col90` = 5478587.1108127078, `col91` = 4287091404.7004304000, `col92` = 'Boulez\'s', `col93` = '2931278';
+CHECK TABLE table0 EXTENDED;
+DROP TABLE table0;
+EVAL SET GLOBAL innodb_file_per_table=$file_per_table;
diff --git a/mysql-test/suite/innodb_zip/t/bug52745.test b/mysql-test/suite/innodb_zip/t/bug52745.test
new file mode 100644
index 00000000000..a3de7323efe
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/t/bug52745.test
@@ -0,0 +1,105 @@
+-- source include/have_innodb.inc
+-- source include/have_innodb_zip.inc
+
+let $file_per_table=`select @@innodb_file_per_table`;
+SET GLOBAL innodb_file_per_table=on;
+SET sql_mode = 'NO_ENGINE_SUBSTITUTION';
+CREATE TABLE bug52745 (
+  a2 int(10) unsigned DEFAULT NULL,
+  col37 time DEFAULT NULL,
+  col38 char(229) CHARACTER SET utf8 DEFAULT NULL,
+  col39 text,
+  col40 timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
+  col41 int(10) unsigned DEFAULT NULL,
+  col42 varchar(248) CHARACTER SET utf8 DEFAULT NULL,
+  col43 smallint(5) unsigned zerofill DEFAULT NULL,
+  col44 varchar(150) CHARACTER SET utf8 DEFAULT NULL,
+  col45 float unsigned zerofill DEFAULT NULL,
+  col46 binary(1) DEFAULT NULL,
+  col47 tinyint(4) DEFAULT NULL,
+  col48 tinyint(1) DEFAULT NULL,
+  col49 timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',
+  col50 binary(1) DEFAULT NULL,
+  col51 double unsigned zerofill DEFAULT NULL,
+  col52 int(10) unsigned DEFAULT NULL,
+  col53 time DEFAULT NULL,
+  col54 double unsigned DEFAULT NULL,
+  col55 time DEFAULT NULL,
+  col56 mediumtext CHARACTER SET latin2,
+  col57 blob,
+  col58 decimal(52,16) unsigned zerofill NOT NULL DEFAULT '000000000000000000000000000000000000.0000000000000000',
+  col59 binary(1) DEFAULT NULL,
+  col60 longblob,
+  col61 time DEFAULT NULL,
+  col62 longtext CHARACTER SET utf8 COLLATE utf8_persian_ci,
+  col63 timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',
+  col64 int(10) unsigned DEFAULT NULL,
+  col65 date DEFAULT NULL,
+  col66 timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',
+  col67 binary(1) DEFAULT NULL,
+  col68 tinyblob,
+  col69 date DEFAULT NULL,
+  col70 tinyint(3) unsigned zerofill DEFAULT NULL,
+  col71 varchar(44) CHARACTER SET utf8 DEFAULT NULL,
+  col72 datetime DEFAULT NULL,
+  col73 smallint(5) unsigned zerofill DEFAULT NULL,
+  col74 longblob,
+  col75 bit(34) DEFAULT NULL,
+  col76 float unsigned zerofill DEFAULT NULL,
+  col77 year(4) DEFAULT NULL,
+  col78 tinyint(3) unsigned DEFAULT NULL,
+  col79 set('msfheowh','tbpxbgf','by','wahnrjw','myqfasxz','rsokyumrt') CHARACTER SET latin2 DEFAULT NULL,
+  col80 datetime DEFAULT NULL,
+  col81 smallint(6) DEFAULT NULL,
+  col82 enum('xtaurnqfqz','rifrse','kuzwpbvb','niisabk','zxavro','rbvasv','','uulrfaove','','') DEFAULT NULL,
+  col83 bigint(20) unsigned zerofill DEFAULT NULL,
+  col84 float unsigned zerofill DEFAULT NULL,
+  col85 double DEFAULT NULL,
+  col86 enum('ylannv','','vlkhycqc','snke','cxifustp','xiaxaswzp','oxl') CHARACTER SET latin1 COLLATE latin1_german2_ci DEFAULT NULL,
+  col87 varbinary(221) DEFAULT NULL,
+  col88 double unsigned DEFAULT NULL,
+  col89 float unsigned zerofill DEFAULT NULL,
+  col90 tinyblob
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1;
+SET sql_mode = default;
+INSERT IGNORE INTO bug52745 SET
+col40='0000-00-00 00:00:00',
+col51=16547,
+col53='7711484',
+col54=-28604,
+col55='7112612',
+col56='wakefulness\'',
+col57=repeat('absorbefacient\'',106),
+col58=11027,
+col59='AM09gW7',
+col60=repeat('Noelani\'',16),
+col61='2520576',
+col62='substitutiv',
+col63='19950106155112',
+col64=-12038,
+col65='86238806',
+col66='19600719080256',
+col68=repeat('Sagittarius\'',54),
+col69='38943902',
+col70=1232,
+col71='Elora\'',
+col74=repeat('zipp',11),
+col75='0',
+col76=23254,
+col78=13247,
+col79='56219',
+col80='20500609035724',
+col81=11632,
+col82=7,
+col84=-23863,
+col85=6341,
+col87='HZdkf.4 s7t,5Rmq	8so fmr,ruGLUG25TrtI.yQ	2SuHq0ML7rw7.4 b2yf2E5TJxOtBBZImezDnzpj,uPYfznnEUDN1e9aQoO 2DsplB7TFWy	oQJ br  HLF :F,eQ p4i1oWsr lL3PG,hjCz6hYqN h1QTjLCjrv:QCdSzpYBibJAtZCxLOk3l6Blsh.W',
+col88=16894,
+col89=6161,
+col90=repeat('gale',48);
+
+SHOW WARNINGS;
+
+DROP TABLE bug52745;
+
+EVAL SET GLOBAL innodb_file_per_table=$file_per_table;
diff --git a/mysql-test/suite/innodb_zip/t/bug53591.test b/mysql-test/suite/innodb_zip/t/bug53591.test
new file mode 100644
index 00000000000..1943c59fe17
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/t/bug53591.test
@@ -0,0 +1,22 @@
+-- source include/have_innodb.inc
+-- source include/have_innodb_zip.inc
+
+let $file_per_table=`select @@innodb_file_per_table`;
+
+SET GLOBAL innodb_file_per_table=on;
+SET GLOBAL innodb_strict_mode=on;
+
+set old_alter_table=0;
+
+CREATE TABLE bug53591(a text charset utf8 not null)
+ENGINE=InnoDB KEY_BLOCK_SIZE=1;
+-- replace_result 8126 {checked_valid} 4030 {checked_valid} 1982 {checked_valid}
+-- error ER_TOO_BIG_ROWSIZE
+ALTER TABLE bug53591 ADD PRIMARY KEY(a(220));
+-- replace_result 8126 {checked_valid} 4030 {checked_valid} 1982 {checked_valid}
+SHOW WARNINGS;
+
+DROP TABLE bug53591;
+
+EVAL SET GLOBAL innodb_file_per_table=$file_per_table;
+SET GLOBAL innodb_strict_mode=DEFAULT;
diff --git a/mysql-test/suite/innodb_zip/t/bug56680.test b/mysql-test/suite/innodb_zip/t/bug56680.test
new file mode 100644
index 00000000000..694c5ffac59
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/t/bug56680.test
@@ -0,0 +1,140 @@
+#Want to skip this test from daily Valgrind execution
+--source include/no_valgrind_without_big.inc
+#
+# Bug #56680 InnoDB may return wrong results from a case-insensitive index
+#
+-- source include/have_innodb.inc
+-- source include/have_innodb_zip.inc
+
+-- disable_query_log
+SET @tx_isolation_orig = @@tx_isolation;
+SET @innodb_file_per_table_orig = @@innodb_file_per_table;
+# The flag innodb_change_buffering_debug is only available in debug builds.
+# It instructs InnoDB to try to evict pages from the buffer pool when
+# change buffering is possible, so that the change buffer will be used
+# whenever possible.
+-- error 0,ER_UNKNOWN_SYSTEM_VARIABLE
+SET @innodb_change_buffering_debug_orig = @@innodb_change_buffering_debug;
+-- error 0,ER_UNKNOWN_SYSTEM_VARIABLE
+SET GLOBAL innodb_change_buffering_debug = 1;
+-- enable_query_log
+SET GLOBAL tx_isolation='REPEATABLE-READ';
+SET GLOBAL innodb_file_per_table=on;
+
+CREATE TABLE bug56680(
+       a INT AUTO_INCREMENT PRIMARY KEY,
+       b CHAR(1),
+       c INT,
+       INDEX(b))
+ENGINE=InnoDB STATS_PERSISTENT=0;
+
+INSERT INTO bug56680 VALUES(0,'x',1);
+BEGIN;
+SELECT b FROM bug56680;
+
+connect (con1,localhost,root,,);
+connection con1;
+BEGIN;
+UPDATE bug56680 SET b='X';
+
+connection default;
+# This should return the last committed value 'x', but would return 'X'
+# due to a bug in row_search_for_mysql().
+SELECT b FROM bug56680;
+# This would always return the last committed value 'x'.
+SELECT * FROM bug56680;
+
+connection con1;
+ROLLBACK;
+disconnect con1;
+
+connection default;
+
+SELECT b FROM bug56680;
+
+# For the rest of this test, use the READ UNCOMMITTED isolation level
+# to see what exists in the secondary index.
+SET GLOBAL tx_isolation='READ-UNCOMMITTED';
+
+# Create enough rows for the table, so that the insert buffer will be
+# used for modifying the secondary index page. There must be multiple
+# index pages, because changes to the root page are never buffered.
+
+INSERT INTO bug56680 SELECT 0,b,c FROM bug56680;
+INSERT INTO bug56680 SELECT 0,b,c FROM bug56680;
+INSERT INTO bug56680 SELECT 0,b,c FROM bug56680;
+INSERT INTO bug56680 SELECT 0,b,c FROM bug56680;
+INSERT INTO bug56680 SELECT 0,b,c FROM bug56680;
+INSERT INTO bug56680 SELECT 0,b,c FROM bug56680;
+INSERT INTO bug56680 SELECT 0,b,c FROM bug56680;
+INSERT INTO bug56680 SELECT 0,b,c FROM bug56680;
+INSERT INTO bug56680 SELECT 0,b,c FROM bug56680;
+INSERT INTO bug56680 SELECT 0,b,c FROM bug56680;
+INSERT INTO bug56680 SELECT 0,b,c FROM bug56680;
+
+BEGIN;
+SELECT b FROM bug56680 LIMIT 2;
+
+connect (con1,localhost,root,,);
+connection con1;
+BEGIN;
+DELETE FROM bug56680 WHERE a=1;
+# This should be buffered, if innodb_change_buffering_debug = 1 is in effect.
+INSERT INTO bug56680 VALUES(1,'X',1);
+
+# This should force an insert buffer merge, and return 'X' in the first row.
+SELECT b FROM bug56680 LIMIT 3;
+
+connection default;
+SELECT b FROM bug56680 LIMIT 2;
+CHECK TABLE bug56680;
+
+connection con1;
+ROLLBACK;
+SELECT b FROM bug56680 LIMIT 2;
+CHECK TABLE bug56680;
+
+connection default;
+disconnect con1;
+
+SELECT b FROM bug56680 LIMIT 2;
+
+CREATE TABLE bug56680_2(
+       a INT AUTO_INCREMENT PRIMARY KEY,
+       b VARCHAR(2) CHARSET latin1 COLLATE latin1_german2_ci,
+       c INT,
+       INDEX(b))
+ENGINE=InnoDB STATS_PERSISTENT=0;
+
+INSERT INTO bug56680_2 SELECT 0,_latin1 0xdf,c FROM bug56680;
+
+BEGIN;
+SELECT HEX(b) FROM bug56680_2 LIMIT 2;
+DELETE FROM bug56680_2 WHERE a=1;
+# This should be buffered, if innodb_change_buffering_debug = 1 is in effect.
+INSERT INTO bug56680_2 VALUES(1,'SS',1);
+
+# This should force an insert buffer merge, and return 'SS' in the first row.
+SELECT HEX(b) FROM bug56680_2 LIMIT 3;
+CHECK TABLE bug56680_2;
+
+# Test this with compressed tables.
+ALTER TABLE bug56680_2 ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1;
+
+SELECT HEX(b) FROM bug56680_2 LIMIT 2;
+DELETE FROM bug56680_2 WHERE a=1;
+# This should be buffered, if innodb_change_buffering_debug = 1 is in effect.
+INSERT INTO bug56680_2 VALUES(1,_latin1 0xdf,1);
+
+# This should force an insert buffer merge, and return 0xdf in the first row.
+SELECT HEX(b) FROM bug56680_2 LIMIT 3;
+CHECK TABLE bug56680_2;
+
+DROP TABLE bug56680_2;
+DROP TABLE bug56680;
+
+-- disable_query_log
+SET GLOBAL tx_isolation = @tx_isolation_orig;
+SET GLOBAL innodb_file_per_table = @innodb_file_per_table_orig;
+-- error 0, ER_UNKNOWN_SYSTEM_VARIABLE
+SET GLOBAL innodb_change_buffering_debug = @innodb_change_buffering_debug_orig;
diff --git a/mysql-test/suite/innodb_zip/t/cmp_drop_table-master.opt b/mysql-test/suite/innodb_zip/t/cmp_drop_table-master.opt
new file mode 100644
index 00000000000..a9a3d8c3db8
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/t/cmp_drop_table-master.opt
@@ -0,0 +1 @@
+--innodb-buffer-pool-size=8M
diff --git a/mysql-test/suite/innodb_zip/t/cmp_drop_table.test b/mysql-test/suite/innodb_zip/t/cmp_drop_table.test
new file mode 100644
index 00000000000..145f55bb160
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/t/cmp_drop_table.test
@@ -0,0 +1,57 @@
+-- source include/have_innodb.inc
+-- source include/have_innodb_16k.inc
+
+let $per_table=`select @@innodb_file_per_table`;
+
+-- let $query_i_s = SELECT page_size FROM information_schema.innodb_cmpmem WHERE pages_used > 0
+
+set global innodb_file_per_table=on;
+ 
+create table t1(a text) engine=innodb key_block_size=8;
+
+-- disable_query_log
+
+# insert some rows so we are using compressed pages
+-- let $i = 10
+while ($i)
+{
+  insert into t1 values(repeat('abcdefghijklmnopqrstuvwxyz',100));
+  dec $i;
+}
+-- enable_query_log
+
+# we should be using some 8K pages
+-- eval $query_i_s
+
+drop table t1;
+
+# because of lazy eviction at drop table there should still be some
+# used 8K pages
+-- eval $query_i_s
+
+# create a non-compressed table and insert enough into it to evict
+# compressed pages
+create table t2(a text) engine=innodb;
+
+-- disable_query_log
+
+-- let $i = 500
+while ($i)
+{
+  insert into t2 values(repeat('abcdefghijklmnopqrstuvwxyz',1000));
+  dec $i;
+}
+
+-- enable_query_log
+
+# now there should be no 8K pages in the buffer pool
+-- eval $query_i_s
+
+drop table t2;
+
+#
+# restore environment to the state it was before this test execution
+#
+
+-- disable_query_log
+eval set global innodb_file_per_table=$per_table;
diff --git a/mysql-test/suite/innodb_zip/t/cmp_per_index.test b/mysql-test/suite/innodb_zip/t/cmp_per_index.test
new file mode 100644
index 00000000000..58b7855219b
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/t/cmp_per_index.test
@@ -0,0 +1,118 @@
+#
+# Test information_schema.innodb_cmp_per_index
+#
+
+-- source include/have_innodb.inc
+
+# Using innodb_log_compressed=0 leads to a larger number of page
+# compressions, because page_cur_insert_rec_zip() will reorganize the
+# page before attempting an insert followed by page compression and
+# page_zip_compress_write_log_no_data().
+
+if (`SELECT @@innodb_log_compressed_pages = 0`)
+{
+  --skip Needs innodb_log_compressed_pages
+}
+
+# numbers read in this test depend on the page size
+-- source include/have_innodb_16k.inc
+# include/restart_mysqld.inc does not work in embedded mode
+-- source include/not_embedded.inc
+
+-- vertical_results
+
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+
+# reset any leftover stats from previous tests
+-- disable_query_log
+-- disable_result_log
+SELECT * FROM information_schema.innodb_cmp_per_index_reset;
+-- enable_result_log
+-- enable_query_log
+
+# see that the table is empty
+SELECT * FROM information_schema.innodb_cmp_per_index;
+
+# create a table that uses compression
+CREATE TABLE t (
+	a INT,
+	b VARCHAR(512),
+	c VARCHAR(16),
+	PRIMARY KEY (a),
+	INDEX (b(512)),
+	INDEX (c(16))
+) ENGINE=INNODB KEY_BLOCK_SIZE=2;
+
+SELECT
+database_name,
+table_name,
+index_name,
+compress_ops,
+compress_ops_ok,
+uncompress_ops
+FROM information_schema.innodb_cmp_per_index
+ORDER BY 1, 2, 3;
+
+# insert some data into it
+BEGIN;
+-- disable_query_log
+let $i=128;
+while ($i)
+{
+	-- eval INSERT INTO t VALUES ($i, REPEAT('x', 512), NULL);
+	dec $i;
+}
+-- enable_query_log
+COMMIT;
+
+ALTER TABLE t DROP INDEX c;
+
+GRANT USAGE ON *.* TO 'tuser01'@'localhost' IDENTIFIED BY 'cDJvI9s_Uq';
+FLUSH PRIVILEGES;
+
+-- connect (con1,localhost,tuser01,cDJvI9s_Uq,)
+-- connection con1
+
+-- error ER_SPECIFIC_ACCESS_DENIED_ERROR
+SELECT * FROM information_schema.innodb_cmp_per_index;
+
+-- connection default
+-- disconnect con1
+
+DROP USER 'tuser01'@'localhost';
+
+SELECT
+database_name,
+table_name,
+index_name,
+CASE WHEN compress_ops=47 and @@innodb_compression_level IN (4,8,9) THEN 65
+ELSE compress_ops END as compress_ops,
+CASE WHEN compress_ops_ok=47 and @@innodb_compression_level IN (4,8,9) THEN 65
+ELSE compress_ops_ok END as compress_ops_ok,
+uncompress_ops
+FROM information_schema.innodb_cmp_per_index
+ORDER BY 1, 2, 3;
+
+# restart mysqld and see that uncompress ops also gets increased when
+# selecting from the table again
+
+-- source include/restart_mysqld.inc
+
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+
+SELECT COUNT(*) FROM t;
+
+SELECT
+database_name,
+table_name,
+index_name,
+compress_ops,
+compress_ops_ok,
+CASE WHEN uncompress_ops=6 and @@innodb_compression_level IN (4,8,9) THEN 9
+ELSE uncompress_ops END as uncompress_ops
+FROM information_schema.innodb_cmp_per_index
+ORDER BY 1, 2, 3;
+
+DROP TABLE t;
+
+SET GLOBAL innodb_cmp_per_index_enabled=default;
diff --git a/mysql-test/suite/innodb_zip/t/create_options.test b/mysql-test/suite/innodb_zip/t/create_options.test
new file mode 100644
index 00000000000..1a3dbdff90a
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/t/create_options.test
@@ -0,0 +1,528 @@
+# Tests for various combinations of ROW_FORMAT and KEY_BLOCK_SIZE
+# Related bugs;
+# Bug#54679: ALTER TABLE causes compressed row_format to revert to compact
+# Bug#56628: ALTER TABLE .. KEY_BLOCK_SIZE=0 produces untrue warning or unnecessary error
+# Bug#56632: ALTER TABLE implicitly changes ROW_FORMAT to COMPRESSED
+# Rules for interpreting CREATE_OPTIONS
+# 1) Create options on an ALTER are added to the options on the
+#    previous CREATE or ALTER statements.
+# 2) KEY_BLOCK_SIZE=0 is considered a unspecified value.
+#    If the current ROW_FORMAT has explicitly been set to COMPRESSED,
+#    InnoDB will use a default value of 8.  Otherwise KEY_BLOCK_SIZE
+#    will not be used.
+# 3) ROW_FORMAT=DEFAULT allows InnoDB to choose its own default, COMPACT.
+# 4) ROW_FORMAT=DEFAULT and KEY_BLOCK_SIZE=0 can be used at any time to
+#    unset or erase the values persisted in the MySQL dictionary and
+#    by SHOW CTREATE TABLE.
+# 5) When incompatible values for ROW_FORMAT and KEY_BLOCK_SIZE are
+#    both explicitly given, the ROW_FORMAT is always used in non-strict
+#    mode.
+# 6) InnoDB will automatically convert a table to COMPRESSED only if a
+#    valid non-zero KEY_BLOCK_SIZE has been given and ROW_FORMAT=DEFAULT
+#    or has not been used on a previous CREATE TABLE or ALTER TABLE.
+# 7) InnoDB strict mode is designed to prevent incompatible create
+#    options from being used together.
+# 8) The non-strict behavior is intended to permit you to import a
+#    mysqldump file into a database that does not support compressed
+#    tables, even if the source database contained compressed tables.
+#    All invalid values and/or incompatible combinations of ROW_FORMAT
+#    and KEY_BLOCK_SIZE are automatically corrected
+#
+# *** innodb_strict_mode=ON ***
+# 1) Valid ROW_FORMATs are COMPRESSED, COMPACT, DEFAULT, DYNAMIC
+#    & REDUNDANT. All others are rejected.
+# 2) Valid KEY_BLOCK_SIZEs are 0,1,2,4,8,16.  All others are rejected.
+# 3) KEY_BLOCK_SIZE=0 can be used to set it to 'unspecified'.
+# 4) KEY_BLOCK_SIZE=1,2,4,8 & 16 are incompatible with COMPACT, DYNAMIC &
+#    REDUNDANT.
+# 5) KEY_BLOCK_SIZE=1,2,4,8 & 16 as well as ROW_FORMAT=COMPRESSED
+#    are incompatible with innodb_file_format=Antelope
+#    and innodb_file_per_table=OFF
+# 6) KEY_BLOCK_SIZE on an ALTER must occur with ROW_FORMAT=COMPRESSED
+#    or ROW_FORMAT=DEFAULT if the ROW_FORMAT was previously specified
+#    as COMPACT, DYNAMIC or REDUNDANT.
+# 7) KEY_BLOCK_SIZE on an ALTER can occur without a ROW_FORMAT if the
+#    previous ROW_FORMAT was DEFAULT, COMPRESSED, or unspecified.
+#
+# *** innodb_strict_mode=OFF ***
+# 1. Ignore a bad KEY_BLOCK_SIZE, defaulting it to 8.
+# 2. Ignore a bad ROW_FORMAT, defaulting to COMPACT.
+# 3. Ignore a valid KEY_BLOCK_SIZE when an incompatible but valid
+#    ROW_FORMAT is specified.
+# 4. If innodb_file_format=Antelope or innodb_file_per_table=OFF
+#    it will ignore ROW_FORMAT=COMPRESSED and non-zero KEY_BLOCK_SIZEs.
+#
+# See InnoDB documentation page "SQL Compression Syntax Warnings and Errors"
+# This test case does not try to create tables with KEY_BLOCK_SIZE > 4
+# since they are rejected for InnoDB page sizes of 8k and 16k.
+# See innodb_16k and innodb_8k for those tests.
+
+-- source include/have_innodb.inc
+-- source include/have_innodb_zip.inc
+SET default_storage_engine=InnoDB;
+
+--disable_query_log
+# These values can change during the test
+LET $innodb_file_per_table_orig=`select @@innodb_file_per_table`;
+LET $innodb_strict_mode_orig=`select @@session.innodb_strict_mode`;
+--enable_query_log
+
+SET GLOBAL innodb_file_per_table=ON;
+
+# The first half of these tests are with strict mode ON.
+SET SESSION innodb_strict_mode = ON;
+
+--echo # Test 1) StrictMode=ON, CREATE and ALTER with each ROW_FORMAT & KEY_BLOCK_SIZE=0
+--echo #         KEY_BLOCK_SIZE=0 means 'no KEY_BLOCK_SIZE is specified'
+--echo #         'FIXED' is sent to InnoDB since it is used by MyISAM.
+--echo #         But it is an invalid mode in InnoDB
+--error ER_ILLEGAL_HA, 1005
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=FIXED;
+SHOW WARNINGS;
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=0;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+ALTER TABLE t1 ROW_FORMAT=COMPACT KEY_BLOCK_SIZE=0;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+ALTER TABLE t1 ROW_FORMAT=DYNAMIC KEY_BLOCK_SIZE=0;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+ALTER TABLE t1 ROW_FORMAT=REDUNDANT KEY_BLOCK_SIZE=0;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+ALTER TABLE t1 ROW_FORMAT=DEFAULT KEY_BLOCK_SIZE=0;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+--error ER_ILLEGAL_HA_CREATE_OPTION
+ALTER TABLE t1 ROW_FORMAT=FIXED KEY_BLOCK_SIZE=0;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+
+
+
+--echo # Test 2) StrictMode=ON, CREATE with each ROW_FORMAT & a valid non-zero KEY_BLOCK_SIZE
+--echo #         KEY_BLOCK_SIZE is incompatible with COMPACT, REDUNDANT, & DYNAMIC
+DROP TABLE t1;
+--error ER_ILLEGAL_HA,1005
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPACT KEY_BLOCK_SIZE=1;
+SHOW WARNINGS;
+--error ER_ILLEGAL_HA,1005
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=REDUNDANT KEY_BLOCK_SIZE=2;
+SHOW WARNINGS;
+--error ER_ILLEGAL_HA,1005
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=DYNAMIC KEY_BLOCK_SIZE=4;
+SHOW WARNINGS;
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=2;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+ALTER TABLE t1 ADD COLUMN f1 INT;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=DEFAULT KEY_BLOCK_SIZE=1;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+ALTER TABLE t1 ADD COLUMN f1 INT;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+
+
+--echo # Test 3) StrictMode=ON, ALTER with each ROW_FORMAT & a valid non-zero KEY_BLOCK_SIZE
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT );
+--error ER_ILLEGAL_HA_CREATE_OPTION
+ALTER TABLE t1 ROW_FORMAT=FIXED KEY_BLOCK_SIZE=1;
+SHOW WARNINGS;
+--error ER_ILLEGAL_HA_CREATE_OPTION
+ALTER TABLE t1 ROW_FORMAT=COMPACT KEY_BLOCK_SIZE=2;
+SHOW WARNINGS;
+--error ER_ILLEGAL_HA_CREATE_OPTION
+ALTER TABLE t1 ROW_FORMAT=DYNAMIC KEY_BLOCK_SIZE=4;
+SHOW WARNINGS;
+--error ER_ILLEGAL_HA_CREATE_OPTION
+ALTER TABLE t1 ROW_FORMAT=REDUNDANT KEY_BLOCK_SIZE=2;
+SHOW WARNINGS;
+ALTER TABLE t1 ROW_FORMAT=DEFAULT KEY_BLOCK_SIZE=1;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+ALTER TABLE t1 ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+
+
+--echo # Test 4) StrictMode=ON, CREATE with ROW_FORMAT=COMPACT, ALTER with a valid non-zero KEY_BLOCK_SIZE
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPACT;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+--error ER_ILLEGAL_HA_CREATE_OPTION
+ALTER TABLE t1 KEY_BLOCK_SIZE=2;
+SHOW WARNINGS;
+ALTER TABLE t1 ROW_FORMAT=REDUNDANT;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+--error ER_ILLEGAL_HA_CREATE_OPTION
+ALTER TABLE t1 KEY_BLOCK_SIZE=4;
+SHOW WARNINGS;
+ALTER TABLE t1 ROW_FORMAT=DYNAMIC;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+--error ER_ILLEGAL_HA_CREATE_OPTION
+ALTER TABLE t1 KEY_BLOCK_SIZE=2;
+SHOW WARNINGS;
+ALTER TABLE t1 ROW_FORMAT=COMPRESSED;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+ALTER TABLE t1 KEY_BLOCK_SIZE=1;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPACT;
+ALTER TABLE t1 ROW_FORMAT=DEFAULT KEY_BLOCK_SIZE=1;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+
+--echo # Test 5) StrictMode=ON, CREATE with a valid KEY_BLOCK_SIZE
+--echo #         ALTER with each ROW_FORMAT
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT ) KEY_BLOCK_SIZE=2;
+SHOW CREATE TABLE t1;
+ALTER TABLE t1 ADD COLUMN f1 INT;
+SHOW CREATE TABLE t1;
+--error ER_ILLEGAL_HA_CREATE_OPTION
+ALTER TABLE t1 ROW_FORMAT=COMPACT;
+SHOW WARNINGS;
+--error ER_ILLEGAL_HA_CREATE_OPTION
+ALTER TABLE t1 ROW_FORMAT=REDUNDANT;
+SHOW WARNINGS;
+--error ER_ILLEGAL_HA_CREATE_OPTION
+ALTER TABLE t1 ROW_FORMAT=DYNAMIC;
+SHOW WARNINGS;
+ALTER TABLE t1 ROW_FORMAT=COMPRESSED;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+ALTER TABLE t1 ROW_FORMAT=DEFAULT KEY_BLOCK_SIZE=0;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+ALTER TABLE t1 ROW_FORMAT=COMPACT;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+
+--echo # Test 6) StrictMode=ON, CREATE with an invalid KEY_BLOCK_SIZE.
+DROP TABLE t1;
+--error ER_ILLEGAL_HA, 1005
+CREATE TABLE t1 ( i INT ) KEY_BLOCK_SIZE=9;
+SHOW WARNINGS;
+
+--echo # Test 7) StrictMode=ON, Make sure ROW_FORMAT= COMPRESSED & DYNAMIC and
+--echo #         and a valid non-zero KEY_BLOCK_SIZE are rejected with Antelope
+--echo #         and that they can be set to default values during strict mode.
+SET GLOBAL innodb_file_format=Antelope;
+--error ER_ILLEGAL_HA,1005
+CREATE TABLE t1 ( i INT ) KEY_BLOCK_SIZE=4;
+SHOW WARNINGS;
+--error ER_ILLEGAL_HA,1005
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPRESSED;
+SHOW WARNINGS;
+--error 1005
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=DYNAMIC;
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=REDUNDANT;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPACT;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=DEFAULT;
+SHOW WARNINGS;
+--error ER_ILLEGAL_HA_CREATE_OPTION
+ALTER TABLE t1 KEY_BLOCK_SIZE=2;
+SHOW WARNINGS;
+--error ER_ILLEGAL_HA_CREATE_OPTION
+ALTER TABLE t1 ROW_FORMAT=COMPRESSED;
+SHOW WARNINGS;
+--error 1478
+ALTER TABLE t1 ROW_FORMAT=DYNAMIC;
+SET GLOBAL innodb_file_format=Barracuda;
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4;
+SET GLOBAL innodb_file_format=Antelope;
+ALTER TABLE t1 ADD COLUMN f1 INT;
+SHOW CREATE TABLE t1;
+SHOW WARNINGS;
+ALTER TABLE t1 ROW_FORMAT=DEFAULT KEY_BLOCK_SIZE=0;
+SHOW WARNINGS;
+ALTER TABLE t1 ADD COLUMN f2 INT;
+SHOW WARNINGS;
+SET GLOBAL innodb_file_format=Barracuda;
+
+--echo # Test 8) StrictMode=ON, Make sure ROW_FORMAT= COMPRESSED & DYNAMIC and
+--echo #         and a valid non-zero KEY_BLOCK_SIZE are rejected with
+--echo #         innodb_file_per_table=OFF and that they can be set to default
+--echo #         values during strict mode.
+SET GLOBAL innodb_file_per_table=OFF;
+DROP TABLE t1;
+--error ER_ILLEGAL_HA,1005
+CREATE TABLE t1 ( i INT ) KEY_BLOCK_SIZE=1;
+SHOW WARNINGS;
+--error ER_ILLEGAL_HA,1005
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPRESSED;
+SHOW WARNINGS;
+--error 1005
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=DYNAMIC;
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=REDUNDANT;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPACT;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=DEFAULT;
+SHOW WARNINGS;
+--error ER_ILLEGAL_HA_CREATE_OPTION
+ALTER TABLE t1 KEY_BLOCK_SIZE=1;
+SHOW WARNINGS;
+--error ER_ILLEGAL_HA_CREATE_OPTION
+ALTER TABLE t1 ROW_FORMAT=COMPRESSED;
+SHOW WARNINGS;
+--error 1478
+ALTER TABLE t1 ROW_FORMAT=DYNAMIC;
+ALTER TABLE t1 ROW_FORMAT=COMPACT;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+ALTER TABLE t1 ROW_FORMAT=REDUNDANT;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+ALTER TABLE t1 ROW_FORMAT=DEFAULT;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+SET GLOBAL innodb_file_per_table=ON;
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4;
+SET GLOBAL innodb_file_per_table=OFF;
+ALTER TABLE t1 ADD COLUMN f1 INT;
+SHOW WARNINGS;
+ALTER TABLE t1 ROW_FORMAT=DEFAULT KEY_BLOCK_SIZE=0;
+SHOW WARNINGS;
+ALTER TABLE t1 ADD COLUMN f2 INT;
+SHOW WARNINGS;
+SET GLOBAL innodb_file_per_table=ON;
+
+--echo ##################################################
+SET SESSION innodb_strict_mode = OFF;
+
+--echo # Test 9) StrictMode=OFF, CREATE and ALTER with each ROW_FORMAT & KEY_BLOCK_SIZE=0
+--echo #         KEY_BLOCK_SIZE=0 means 'no KEY_BLOCK_SIZE is specified'
+--echo #         'FIXED' is sent to InnoDB since it is used by MyISAM.
+--echo #         It is an invalid mode in InnoDB, use COMPACT
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=FIXED;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=0;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+ALTER TABLE t1 ROW_FORMAT=COMPACT KEY_BLOCK_SIZE=0;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+ALTER TABLE t1 ROW_FORMAT=DYNAMIC KEY_BLOCK_SIZE=0;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+ALTER TABLE t1 ROW_FORMAT=REDUNDANT KEY_BLOCK_SIZE=0;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+ALTER TABLE t1 ROW_FORMAT=DEFAULT KEY_BLOCK_SIZE=0;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+ALTER TABLE t1 ROW_FORMAT=FIXED KEY_BLOCK_SIZE=0;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+
+--echo # Test 10) StrictMode=OFF, CREATE with each ROW_FORMAT & a valid KEY_BLOCK_SIZE
+--echo #          KEY_BLOCK_SIZE is ignored with COMPACT, REDUNDANT, & DYNAMIC
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPACT KEY_BLOCK_SIZE=1;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=REDUNDANT KEY_BLOCK_SIZE=2;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=DYNAMIC KEY_BLOCK_SIZE=4;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=2;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+ALTER TABLE t1 ADD COLUMN f1 INT;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=DEFAULT KEY_BLOCK_SIZE=1;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+ALTER TABLE t1 ADD COLUMN f1 INT;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+
+
+--echo # Test 11) StrictMode=OFF, ALTER with each ROW_FORMAT & a valid KEY_BLOCK_SIZE
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT );
+ALTER TABLE t1 ROW_FORMAT=FIXED KEY_BLOCK_SIZE=1;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT );
+ALTER TABLE t1 ROW_FORMAT=COMPACT KEY_BLOCK_SIZE=2;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT );
+ALTER TABLE t1 ROW_FORMAT=DYNAMIC KEY_BLOCK_SIZE=4;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT );
+ALTER TABLE t1 ROW_FORMAT=REDUNDANT KEY_BLOCK_SIZE=2;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT );
+ALTER TABLE t1 ROW_FORMAT=DEFAULT KEY_BLOCK_SIZE=1;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+ALTER TABLE t1 ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+
+
+--echo # Test 12) StrictMode=OFF, CREATE with ROW_FORMAT=COMPACT, ALTER with a valid KEY_BLOCK_SIZE
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPACT;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+ALTER TABLE t1 KEY_BLOCK_SIZE=2;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+ALTER TABLE t1 ROW_FORMAT=REDUNDANT;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+ALTER TABLE t1 ROW_FORMAT=DYNAMIC;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+ALTER TABLE t1 ROW_FORMAT=COMPRESSED;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+ALTER TABLE t1 KEY_BLOCK_SIZE=4;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPACT;
+ALTER TABLE t1 ROW_FORMAT=DEFAULT KEY_BLOCK_SIZE=2;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+
+--echo # Test 13) StrictMode=OFF, CREATE with a valid KEY_BLOCK_SIZE
+--echo #          ALTER with each ROW_FORMAT
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT ) KEY_BLOCK_SIZE=1;
+SHOW WARNINGS;
+SHOW CREATE TABLE t1;
+ALTER TABLE t1 ADD COLUMN f1 INT;
+SHOW WARNINGS;
+SHOW CREATE TABLE t1;
+ALTER TABLE t1 ROW_FORMAT=COMPACT;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+ALTER TABLE t1 ROW_FORMAT=REDUNDANT;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+ALTER TABLE t1 ROW_FORMAT=DYNAMIC;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+ALTER TABLE t1 ROW_FORMAT=COMPRESSED;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+ALTER TABLE t1 ROW_FORMAT=DEFAULT KEY_BLOCK_SIZE=0;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+ALTER TABLE t1 ROW_FORMAT=COMPACT;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+
+--echo # Test 14) StrictMode=OFF, CREATE with an invalid KEY_BLOCK_SIZE,
+--echo # it defaults to half of the page size.
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT ) KEY_BLOCK_SIZE=15;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+
+--echo # Test 15) StrictMode=OFF, Make sure ROW_FORMAT= COMPRESSED & DYNAMIC and a
+--echo            valid KEY_BLOCK_SIZE are remembered but not used when ROW_FORMAT
+--echo            is reverted to Antelope and then used again when ROW_FORMAT=Barracuda.
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+SET GLOBAL innodb_file_format=Antelope;
+ALTER TABLE t1 ADD COLUMN f1 INT;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+SET GLOBAL innodb_file_format=Barracuda;
+ALTER TABLE t1 ADD COLUMN f2 INT;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=DYNAMIC;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+SET GLOBAL innodb_file_format=Antelope;
+ALTER TABLE t1 ADD COLUMN f1 INT;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+SET GLOBAL innodb_file_format=Barracuda;
+ALTER TABLE t1 ADD COLUMN f2 INT;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+
+--echo # Test 16) StrictMode=OFF, Make sure ROW_FORMAT= COMPRESSED & DYNAMIC and a
+--echo            valid KEY_BLOCK_SIZE are remembered but not used when innodb_file_per_table=OFF
+--echo            and then used again when innodb_file_per_table=ON.
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=2;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+SET GLOBAL innodb_file_per_table=OFF;
+ALTER TABLE t1 ADD COLUMN f1 INT;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+SET GLOBAL innodb_file_per_table=ON;
+ALTER TABLE t1 ADD COLUMN f2 INT;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+DROP TABLE t1;
+CREATE TABLE t1 ( i INT ) ROW_FORMAT=DYNAMIC;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+SET GLOBAL innodb_file_per_table=OFF;
+ALTER TABLE t1 ADD COLUMN f1 INT;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+SET GLOBAL innodb_file_per_table=ON;
+ALTER TABLE t1 ADD COLUMN f2 INT;
+SHOW WARNINGS;
+SELECT TABLE_NAME,ROW_FORMAT,CREATE_OPTIONS FROM information_schema.tables WHERE TABLE_NAME = 't1';
+
+
+--echo # Cleanup
+DROP TABLE t1;
+
+--disable_query_log
+EVAL SET GLOBAL innodb_file_per_table=$innodb_file_per_table_orig;
+EVAL SET SESSION innodb_strict_mode=$innodb_strict_mode_orig;
+--enable_query_log
+
diff --git a/mysql-test/suite/innodb_zip/t/disabled.def b/mysql-test/suite/innodb_zip/t/disabled.def
new file mode 100644
index 00000000000..2808bc556a7
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/t/disabled.def
@@ -0,0 +1,17 @@
+##############################################################################
+#
+#  List the test cases that are to be disabled temporarily.
+#
+#  Separate the test case name and the comment with ':'.
+#
+#    <testcasename> : BUG#<xxxx> <date disabled> <disabler> <comment>
+#
+#  Do not use any TAB characters for whitespace.
+#
+##############################################################################
+
+restart : Not supported by MariaDB 10.2 2/9/2016 jplindst
+innochecksum   : MDEV-10727 2/9/2016 jplindst 
+innochecksum_2 : MDEV-10727 2/9/2016 jplindst
+innochecksum_3 : MDEV-10727 2/9/2016 jplindst
+wl6560 : MDEV_10727
diff --git a/mysql-test/suite/innodb_zip/t/index_large_prefix.test b/mysql-test/suite/innodb_zip/t/index_large_prefix.test
new file mode 100644
index 00000000000..d61cce8d484
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/t/index_large_prefix.test
@@ -0,0 +1,441 @@
+# Testcase for worklog #5743: Lift the limit of index key prefixes
+
+--source include/have_innodb.inc
+--source include/have_innodb_16k.inc
+SET default_storage_engine=InnoDB;
+
+--disable_query_log
+call mtr.add_suppression("Cannot add field .* in table .* because after adding it, the row size is");
+--enable_query_log
+
+let $innodb_file_per_table_orig=`select @@innodb_file_per_table`;
+
+set global innodb_file_per_table=1;
+
+-- echo ### Test 1 ###
+# Create a table of DYNAMIC format, with a primary index of 1000 bytes in
+# size
+create table worklog5743(a TEXT not null, primary key (a(1000))) ROW_FORMAT=DYNAMIC;
+show warnings;
+
+# Do some insertion and update to excercise the external cache
+# code path
+insert into worklog5743 values(repeat("a", 20000));
+
+# default session, update the table
+update worklog5743 set a = (repeat("b", 16000));
+
+# Create a secondary index
+create index idx on worklog5743(a(2000));
+show warnings;
+
+# Start a few sessions to do selections on table being updated in default
+# session, so it would rebuild the previous version from undo log.
+# 1) Default session:   Initiate an update on the externally stored column
+# 2) Session con1:      Select from table with repeated read
+# 3) Session con2:      Select from table with read uncommitted
+# 4) Default session:   rollback updates
+
+begin;
+update worklog5743 set a = (repeat("x", 17000));
+
+# Start a new session to select the column to force it build
+# an earlier version of the clustered index through undo log. So it should
+# just see the result of repeat("b", 16000)
+select @@session.tx_isolation;
+--connect (con1,localhost,root,,)
+select a = repeat("x", 17000) from worklog5743;
+select a = repeat("b", 16000) from worklog5743;
+
+# Start another session doing "read uncommitted" query, it
+# should see the uncommitted update
+--connect (con2,localhost,root,,)
+SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
+select @@session.tx_isolation;
+select a = repeat("x", 17000) from worklog5743;
+
+# Roll back the transaction
+--connection default
+rollback;
+
+drop table worklog5743;
+
+-- echo ### Test 2 ###
+# Create a table with only a secondary index has large prefix column
+create table worklog5743(a1 int, a2 TEXT not null) ROW_FORMAT=DYNAMIC;
+show warnings;
+create index idx on worklog5743(a1, a2(2000));
+show warnings;
+
+insert into worklog5743 values(9, repeat("a", 10000));
+
+begin;
+
+update worklog5743 set a1 = 1000;
+
+# Do a select from another connection that would use the secondary index
+--connection con1
+select @@session.tx_isolation;
+explain select a1, a2 = repeat("a", 10000) from worklog5743 where a1 = 9;
+select a1, a2 = repeat("a", 10000) from worklog5743 where a1 = 9;
+
+# Do read uncommitted in another session, it would show there is no
+# row with a1 = 9
+--connection con2
+SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
+select @@session.tx_isolation;
+select a1, a2 = repeat("a", 10000) from worklog5743 where a1 = 9;
+
+--connection default
+rollback;
+
+drop table worklog5743;
+
+-- echo ### Test 3 ###
+# Create a table with a secondary index has small (50 bytes) prefix column
+create table worklog5743(a1 int, a2 TEXT not null) ROW_FORMAT=DYNAMIC;
+
+create index idx on worklog5743(a1, a2(50));
+
+insert into worklog5743 values(9, repeat("a", 10000));
+
+begin;
+
+update worklog5743 set a1 = 1000;
+
+# Do a select from another connection that would use the secondary index
+--connection con1
+select @@session.tx_isolation;
+explain select a1, a2 = repeat("a", 10000) from worklog5743 where a1 = 9;
+select a1, a2 = repeat("a", 10000) from worklog5743 where a1 = 9;
+
+# Do read uncommitted in another session, it would show there is no
+# row with a1 = 9
+--connection con2
+SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
+select @@session.tx_isolation;
+select a1, a2 = repeat("a", 10000) from worklog5743 where a1 = 9;
+
+--connection default
+rollback;
+
+drop table worklog5743;
+
+-- echo ### Test 4 ###
+# Create compressed tables with each KEY_BLOCK_SIZE.
+create table worklog5743_1(a1 int, a2 TEXT not null) KEY_BLOCK_SIZE=1;
+create table worklog5743_2(a1 int, a2 TEXT not null) KEY_BLOCK_SIZE=2;
+create table worklog5743_4(a1 int, a2 TEXT not null) KEY_BLOCK_SIZE=4;
+create table worklog5743_8(a1 int, a2 TEXT, a3 TEXT) KEY_BLOCK_SIZE=8;
+create table worklog5743_16(a1 int, a2 TEXT, a3 TEXT) KEY_BLOCK_SIZE=16;
+
+# The maximum overall index record (not prefix) length of a
+# compressed table is dependent on innodb-page-size (IPS),
+# key_block_size (KBS) and the number of fields (NF).
+# "Too big row" error (HA_ERR_TOO_BIG_ROW) will be returned if this
+# limit is exceeded.
+# See page_zip_empty_size() and Bug #47495 for more detail.
+
+# Test edge cases for indexes using key_block_size=1
+set global innodb_large_prefix=0;
+-- error ER_TOO_LONG_KEY,1118
+create index idx1 on worklog5743_1(a2(4000));
+show warnings;
+set global innodb_large_prefix=1;
+-- error ER_TOO_LONG_KEY,1118
+create index idx2 on worklog5743_1(a2(4000));
+show warnings;
+-- error ER_TOO_BIG_ROWSIZE
+create index idx3 on worklog5743_1(a2(436));
+show warnings;
+# Bug#13391353 Limit is one byte less on on 32bit-Linux only
+create index idx4 on worklog5743_1(a2(434));
+show warnings;
+-- error ER_TOO_BIG_ROWSIZE
+create index idx5 on worklog5743_1(a1, a2(430));
+show warnings;
+# Bug#13391353 Limit is one byte less on on 32bit-Linux only
+create index idx6 on worklog5743_1(a1, a2(428));
+show warnings;
+
+# Test edge cases for indexes using key_block_size=2
+set global innodb_large_prefix=0;
+# Check index creation behavior without STRICT mode
+SET sql_mode= '';
+create index idx1 on worklog5743_2(a2(4000));
+show warnings;
+set global innodb_large_prefix=1;
+-- error ER_TOO_BIG_ROWSIZE
+create index idx2 on worklog5743_2(a2(4000));
+show warnings;
+-- error ER_TOO_BIG_ROWSIZE
+create index idx3 on worklog5743_2(a2(948));
+show warnings;
+# Bug#13391353 Limit is one byte less on on 32bit-Linux only
+create index idx4 on worklog5743_2(a2(946));
+show warnings;
+-- error ER_TOO_BIG_ROWSIZE
+create index idx5 on worklog5743_2(a1, a2(942));
+show warnings;
+# Bug#13391353 Limit is one byte less on on 32bit-Linux only
+create index idx6 on worklog5743_2(a1, a2(940));
+show warnings;
+
+# Test edge cases for indexes using key_block_size=4
+set global innodb_large_prefix=0;
+create index idx1 on worklog5743_4(a2(4000));
+show warnings;
+set global innodb_large_prefix=1;
+-- error ER_TOO_BIG_ROWSIZE
+create index idx2 on worklog5743_4(a2(4000));
+show warnings;
+-- error ER_TOO_BIG_ROWSIZE
+create index idx3 on worklog5743_4(a2(1972));
+show warnings;
+# Bug#13391353 Limit is one byte less on on 32bit-Linux only
+create index idx4 on worklog5743_4(a2(1970));
+show warnings;
+-- error ER_TOO_BIG_ROWSIZE
+create index idx5 on worklog5743_4(a1, a2(1966));
+show warnings;
+# Bug#13391353 Limit is one byte less on on 32bit-Linux only
+create index idx6 on worklog5743_4(a1, a2(1964));
+show warnings;
+
+# Test edge cases for indexes using key_block_size=8
+set global innodb_large_prefix=0;
+create index idx1 on worklog5743_8(a2(1000));
+show warnings;
+set global innodb_large_prefix=1;
+create index idx2 on worklog5743_8(a2(3073));
+show warnings;
+create index idx3 on worklog5743_8(a2(3072));
+show warnings;
+-- error ER_TOO_LONG_KEY
+create index idx4 on worklog5743_8(a1, a2(3069));
+show warnings;
+create index idx5 on worklog5743_8(a1, a2(3068));
+show warnings;
+-- error ER_TOO_LONG_KEY
+create index idx6 on worklog5743_8(a1, a2(2000), a3(1069));
+show warnings;
+create index idx7 on worklog5743_8(a1, a2(2000), a3(1068));
+show warnings;
+
+# Test edge cases for indexes using key_block_size=16
+set global innodb_large_prefix=0;
+create index idx1 on worklog5743_16(a2(1000));
+show warnings;
+set global innodb_large_prefix=1;
+create index idx2 on worklog5743_16(a2(3073));
+show warnings;
+create index idx3 on worklog5743_16(a2(3072));
+show warnings;
+-- error ER_TOO_LONG_KEY
+create index idx4 on worklog5743_16(a1, a2(3069));
+show warnings;
+create index idx5 on worklog5743_16(a1, a2(3068));
+show warnings;
+-- error ER_TOO_LONG_KEY
+create index idx6 on worklog5743_16(a1, a2(2000), a3(1069));
+show warnings;
+create index idx7 on worklog5743_16(a1, a2(2000), a3(1068));
+show warnings;
+set sql_mode= default;
+
+# Insert a large record into each of these tables.
+insert into worklog5743_1 values(9, repeat("a", 10000));
+insert into worklog5743_2 values(9, repeat("a", 10000));
+insert into worklog5743_4 values(9, repeat("a", 10000));
+insert into worklog5743_8 values(9, repeat("a", 10000), repeat("a", 10000));
+insert into worklog5743_16 values(9, repeat("a", 10000), repeat("a", 10000));
+
+# Now if we change the global innodb_large_prefix back to 767,
+# updates to these indexes should still be allowed.
+set global innodb_large_prefix=0;
+insert into worklog5743_1 values(2, repeat("b", 10000));
+insert into worklog5743_2 values(2, repeat("b", 10000));
+insert into worklog5743_4 values(2, repeat("b", 10000));
+insert into worklog5743_8 values(2, repeat("b", 10000), repeat("b", 10000));
+insert into worklog5743_16 values(2, repeat("b", 10000), repeat("b", 10000));
+set global innodb_large_prefix=1;
+
+select a1, left(a2, 20) from worklog5743_1;
+select a1, left(a2, 20) from worklog5743_2;
+select a1, left(a2, 20) from worklog5743_4;
+select a1, left(a2, 20) from worklog5743_8;
+select a1, left(a2, 20) from worklog5743_16;
+
+begin;
+
+update worklog5743_1 set a1 = 1000;
+update worklog5743_2 set a1 = 1000;
+update worklog5743_4 set a1 = 1000;
+update worklog5743_8 set a1 = 1000;
+update worklog5743_16 set a1 = 1000;
+select a1, left(a2, 20) from worklog5743_1;
+select a1, left(a2, 20) from worklog5743_2;
+select a1, left(a2, 20) from worklog5743_4;
+select a1, left(a2, 20) from worklog5743_8;
+select a1, left(a2, 20) from worklog5743_16;
+
+
+# Do a select from another connection that would use the secondary index
+--connection con1
+select @@session.tx_isolation;
+explain select a1, left(a2, 20) from worklog5743_1 where a1 = 9;
+explain select a1, left(a2, 20) from worklog5743_2 where a1 = 9;
+explain select a1, left(a2, 20) from worklog5743_4 where a1 = 9;
+explain select a1, left(a2, 20) from worklog5743_8 where a1 = 9;
+explain select a1, left(a2, 20) from worklog5743_16 where a1 = 9;
+select a1, left(a2, 20) from worklog5743_1 where a1 = 9;
+select a1, left(a2, 20) from worklog5743_2 where a1 = 9;
+select a1, left(a2, 20) from worklog5743_4 where a1 = 9;
+select a1, left(a2, 20) from worklog5743_8 where a1 = 9;
+select a1, left(a2, 20) from worklog5743_16 where a1 = 9;
+
+# Do read uncommitted in another session, it would show there is no
+# row with a1 = 9
+--connection con2
+SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
+select @@session.tx_isolation;
+select a1, left(a2, 20) from worklog5743_1 where a1 = 9;
+select a1, left(a2, 20) from worklog5743_2 where a1 = 9;
+select a1, left(a2, 20) from worklog5743_4 where a1 = 9;
+select a1, left(a2, 20) from worklog5743_8 where a1 = 9;
+select a1, left(a2, 20) from worklog5743_16 where a1 = 9;
+
+--connection default
+rollback;
+
+drop table worklog5743_1;
+drop table worklog5743_2;
+drop table worklog5743_4;
+drop table worklog5743_8;
+drop table worklog5743_16;
+
+-- echo ### Test 5 ###
+# Create a table with large varchar columns and create indexes
+# directly on these large columns to show that prefix limit is
+# automatically applied and to show that limit.
+create table worklog5743(a1 int,
+   a2 varchar(20000),
+   a3 varchar(3073),
+   a4 varchar(3072),
+   a5 varchar(3069),
+   a6 varchar(3068))
+   ROW_FORMAT=DYNAMIC;
+# Check index creation behavior without STRICT mode
+SET sql_mode='';
+create index idx1 on worklog5743(a2);
+create index idx2 on worklog5743(a3);
+create index idx3 on worklog5743(a4);
+show warnings;
+SET sql_mode= default;
+-- error ER_TOO_LONG_KEY
+create index idx4 on worklog5743(a1, a2);
+show warnings;
+-- error ER_TOO_LONG_KEY
+create index idx5 on worklog5743(a1, a5);
+show warnings;
+create index idx6 on worklog5743(a1, a6);
+show warnings;
+show create table worklog5743;
+
+insert into worklog5743 values(9,
+   repeat("a", 20000), repeat("a", 3073),
+   repeat("a", 3072), repeat("a", 3069),
+   repeat("a", 3068));
+
+begin;
+
+update worklog5743 set a1 = 1000;
+
+# Do a select from another connection that would use the secondary index
+--connection con1
+select @@session.tx_isolation;
+explain select a1 from worklog5743 where a1 = 9;
+select a1 from worklog5743 where a1 = 9;
+
+# Do read uncommitted, it would show there is no row with a1 = 9
+--connection con2
+SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
+select @@session.tx_isolation;
+select a1 from worklog5743 where a1 = 9;
+
+--connection default
+rollback;
+
+drop table worklog5743;
+
+-- echo ### Test 6 ###
+# Create a table with old format, and the limit is 768 bytes.
+-- error ER_INDEX_COLUMN_TOO_LONG
+create table worklog5743(a TEXT not null, primary key (a(1000)))
+row_format=compact;
+
+create table worklog5743(a TEXT)
+row_format=compact;
+
+# Excercise the column length check in ha_innobase::add_index()
+-- error ER_INDEX_COLUMN_TOO_LONG
+create index idx on worklog5743(a(768));
+
+# This should be successful
+create index idx on worklog5743(a(767));
+
+# Perform some DMLs
+insert into worklog5743 values(repeat("a", 20000));
+
+begin;
+insert into worklog5743 values(repeat("b", 20000));
+update worklog5743 set a = (repeat("x", 25000));
+
+# Start a new session to select the table to force it build
+# an earlier version of the cluster index through undo log
+select @@session.tx_isolation;
+--connection con1
+select a = repeat("a", 20000) from worklog5743;
+--disconnect con1
+
+--connection con2
+SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
+select @@session.tx_isolation;
+select a = repeat("x", 25000) from worklog5743;
+--disconnect con2
+
+--connection default
+rollback;
+
+drop table worklog5743;
+
+-- echo ### Test 7 ###
+# Some border line tests on the column length.
+# We have a limit of 3072 bytes for Barracuda table
+create table worklog5743(a TEXT not null) ROW_FORMAT=DYNAMIC;
+
+# Length exceeds maximum supported key length
+# It will be auto-truncated to 3072
+# Check index creation behavior without STRICT mode
+SET sql_mode='';
+create index idx1 on worklog5743(a(3073));
+create index idx2 on worklog5743(a(3072));
+show create table worklog5743;
+drop table worklog5743;
+SET sql_mode= default;
+
+# We have a limit of 767 bytes for Antelope tables
+create table worklog5743(a TEXT not null) ROW_FORMAT=REDUNDANT;
+-- error ER_INDEX_COLUMN_TOO_LONG
+create index idx on worklog5743(a(768));
+create index idx2 on worklog5743(a(767));
+drop table worklog5743;
+
+create table worklog5743(a TEXT not null) ROW_FORMAT=COMPACT;
+-- error ER_INDEX_COLUMN_TOO_LONG
+create index idx on worklog5743(a(768));
+create index idx2 on worklog5743(a(767));
+drop table worklog5743;
+
+
+eval SET GLOBAL innodb_file_per_table=$innodb_file_per_table_orig;
diff --git a/mysql-test/suite/innodb_zip/t/index_large_prefix_4k.test b/mysql-test/suite/innodb_zip/t/index_large_prefix_4k.test
new file mode 100644
index 00000000000..a0229abc4f8
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/t/index_large_prefix_4k.test
@@ -0,0 +1,400 @@
+# Testcase for worklog #5743: Lift the limit of index key prefixes
+
+--source include/have_innodb.inc
+--source include/have_innodb_4k.inc
+SET default_storage_engine=InnoDB;
+
+--disable_query_log
+call mtr.add_suppression("Cannot add field .* in table .* because after adding it, the row size is");
+--enable_query_log
+
+let $innodb_file_per_table_orig=`select @@innodb_file_per_table`;
+
+set global innodb_file_per_table=1;
+
+-- echo ### Test 1 ###
+# Create a table of DYNAMIC format, with a primary index of 768 bytes in
+# size
+create table worklog5743(a TEXT not null, primary key (a(768))) ROW_FORMAT=DYNAMIC;
+show warnings;
+
+# Do some insertion and update to excercise the external cache
+# code path
+insert into worklog5743 values(repeat("a", 20000));
+
+# default session, update the table
+update worklog5743 set a = (repeat("b", 16000));
+
+# Create a secondary index
+SET sql_mode= '';
+create index idx on worklog5743(a(900));
+show warnings;
+SET sql_mode= default;
+# Start a few sessions to do selections on table being updated in default
+# session, so it would rebuild the previous version from undo log.
+# 1) Default session:   Initiate an update on the externally stored column
+# 2) Session con1:      Select from table with repeated read
+# 3) Session con2:      Select from table with read uncommitted
+# 4) Default session:   rollback updates
+
+begin;
+update worklog5743 set a = (repeat("x", 17000));
+
+# Start a new session to select the column to force it build
+# an earlier version of the clustered index through undo log. So it should
+# just see the result of repeat("b", 16000)
+select @@session.tx_isolation;
+--connect (con1,localhost,root,,)
+select a = repeat("x", 17000) from worklog5743;
+select a = repeat("b", 16000) from worklog5743;
+
+# Start another session doing "read uncommitted" query, it
+# should see the uncommitted update
+--connect (con2,localhost,root,,)
+SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
+select @@session.tx_isolation;
+select a = repeat("x", 17000) from worklog5743;
+
+# Roll back the transaction
+--connection default
+rollback;
+
+drop table worklog5743;
+
+-- echo ### Test 2 ###
+# Create a table with only a secondary index has large prefix column
+create table worklog5743(a1 int, a2 TEXT not null) ROW_FORMAT=DYNAMIC;
+show warnings;
+create index idx on worklog5743(a1, a2(750));
+show warnings;
+
+insert into worklog5743 values(9, repeat("a", 10000));
+
+begin;
+
+update worklog5743 set a1 = 1111;
+
+# Do a select from another connection that would use the secondary index
+--connection con1
+select @@session.tx_isolation;
+explain select a1, a2 = repeat("a", 10000) from worklog5743 where a1 = 9;
+select a1, a2 = repeat("a", 10000) from worklog5743 where a1 = 9;
+
+# Do read uncommitted in another session, it would show there is no
+# row with a1 = 9
+--connection con2
+SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
+select @@session.tx_isolation;
+select a1, a2 = repeat("a", 10000) from worklog5743 where a1 = 9;
+
+--connection default
+rollback;
+
+drop table worklog5743;
+
+-- echo ### Test 3 ###
+# Create a table with a secondary index has small (50 bytes) prefix column
+create table worklog5743(a1 int, a2 TEXT not null) ROW_FORMAT=DYNAMIC;
+
+create index idx on worklog5743(a1, a2(50));
+
+insert into worklog5743 values(9, repeat("a", 10000));
+
+begin;
+
+update worklog5743 set a1 = 2222;
+
+# Do a select from another connection that would use the secondary index
+--connection con1
+select @@session.tx_isolation;
+explain select a1, a2 = repeat("a", 10000) from worklog5743 where a1 = 9;
+select a1, a2 = repeat("a", 10000) from worklog5743 where a1 = 9;
+
+# Do read uncommitted in another session, it would show there is no
+# row with a1 = 9
+--connection con2
+SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
+select @@session.tx_isolation;
+select a1, a2 = repeat("a", 10000) from worklog5743 where a1 = 9;
+
+--connection default
+rollback;
+
+drop table worklog5743;
+
+-- echo ### Test 4 ###
+# Create compressed tables with each KEY_BLOCK_SIZE.
+create table worklog5743_1(a1 int, a2 TEXT not null) KEY_BLOCK_SIZE=1;
+create table worklog5743_2(a1 int, a2 TEXT not null) KEY_BLOCK_SIZE=2;
+create table worklog5743_4(a1 int, a2 TEXT not null) KEY_BLOCK_SIZE=4;
+
+# The maximum overall index record (not prefix) length of a
+# compressed table is dependent on innodb-page-size (IPS),
+# key_block_size (KBS) and the number of fields (NF).
+# "Too big row" error (HA_ERR_TOO_BIG_ROW) will be returned if this
+# limit is exceeded.
+# See page_zip_empty_size() and Bug #47495 for more detail.
+
+# Test edge cases for indexes using key_block_size=1
+set global innodb_large_prefix=0;
+-- error ER_TOO_LONG_KEY
+create index idx1 on worklog5743_1(a2(4000));
+show warnings;
+-- error ER_TOO_BIG_ROWSIZE
+create index idx3 on worklog5743_1(a2(436));
+show warnings;
+# Bug#13391353 Limit is one byte less on on 32bit-Linux only
+create index idx4 on worklog5743_1(a2(434));
+show warnings;
+-- error ER_TOO_BIG_ROWSIZE
+create index idx5 on worklog5743_1(a1, a2(430));
+show warnings;
+# Bug#13391353 Limit is one byte less on on 32bit-Linux only
+create index idx6 on worklog5743_1(a1, a2(428));
+show warnings;
+
+# Test edge cases for indexes using key_block_size=2
+set global innodb_large_prefix=1;
+SET sql_mode= '';
+create index idx1 on worklog5743_2(a2(4000));
+show warnings;
+show create table worklog5743_2;
+create index idx3 on worklog5743_2(a2(769));
+show warnings;
+create index idx4 on worklog5743_2(a2(768));
+show warnings;
+-- error ER_TOO_LONG_KEY
+create index idx5 on worklog5743_2(a1, a2(765));
+show warnings;
+create index idx6 on worklog5743_2(a1, a2(764));
+show warnings;
+# Test edge cases for indexes using key_block_size=4
+set global innodb_large_prefix=0;
+create index idx1 on worklog5743_4(a2(4000));
+show warnings;
+show create table worklog5743_4;
+create index idx3 on worklog5743_4(a2(769));
+show warnings;
+create index idx4 on worklog5743_4(a2(768));
+show warnings;
+-- error ER_TOO_LONG_KEY
+create index idx5 on worklog5743_4(a1, a2(765));
+show warnings;
+create index idx6 on worklog5743_4(a1, a2(764));
+show warnings;
+SET sql_mode= default;
+# Insert a large record into each of these tables.
+insert into worklog5743_1 values(9, repeat("a", 10000));
+insert into worklog5743_2 values(9, repeat("a", 10000));
+insert into worklog5743_4 values(9, repeat("a", 10000));
+
+# Now if we change the global innodb_large_prefix back to 767,
+# updates to these indexes should still be allowed.
+set global innodb_large_prefix=0;
+insert into worklog5743_1 values(2, repeat("b", 10000));
+insert into worklog5743_2 values(2, repeat("b", 10000));
+insert into worklog5743_4 values(2, repeat("b", 10000));
+set global innodb_large_prefix=1;
+
+select a1, left(a2, 20) from worklog5743_1;
+select a1, left(a2, 20) from worklog5743_2;
+select a1, left(a2, 20) from worklog5743_4;
+
+begin;
+
+update worklog5743_1 set a1 = 1000;
+update worklog5743_2 set a1 = 1000;
+update worklog5743_4 set a1 = 1000;
+select a1, left(a2, 20) from worklog5743_1;
+select a1, left(a2, 20) from worklog5743_2;
+select a1, left(a2, 20) from worklog5743_4;
+
+# Do a select from another connection that would use the secondary index
+--connection con1
+select @@session.tx_isolation;
+explain select a1, left(a2, 20) from worklog5743_1 where a1 = 9;
+explain select a1, left(a2, 20) from worklog5743_2 where a1 = 9;
+explain select a1, left(a2, 20) from worklog5743_4 where a1 = 9;
+select a1, left(a2, 20) from worklog5743_1 where a1 = 9;
+select a1, left(a2, 20) from worklog5743_2 where a1 = 9;
+select a1, left(a2, 20) from worklog5743_4 where a1 = 9;
+
+# Do read uncommitted in another session, it would show there is no
+# row with a1 = 9
+--connection con2
+SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
+select @@session.tx_isolation;
+select a1, left(a2, 20) from worklog5743_1 where a1 = 9;
+select a1, left(a2, 20) from worklog5743_2 where a1 = 9;
+select a1, left(a2, 20) from worklog5743_4 where a1 = 9;
+
+--connection default
+rollback;
+
+drop table worklog5743_1;
+drop table worklog5743_2;
+drop table worklog5743_4;
+
+-- echo ### Test 5 ###
+# Create a table with large varchar columns and create indexes
+# directly on these large columns to show that prefix limit is
+# automatically applied and to show that limit.
+
+# This commented form of the test causes an unlimited page split
+# on update of the int field - Bug 12636590 - INNODB; UPDATE OF
+# LARGE RECORD CAUSES UNLIMITED PAGE SPLITS IN 8K PAGE SIZE
+#create table worklog5743(a1 int,
+#  a2 varchar(20000),
+#  a3 varchar(3073),
+#  a4 varchar(3072),
+#  a5 varchar(3069),
+#  a6 varchar(3068))
+#  ROW_FORMAT=DYNAMIC;
+#create index idx1 on worklog5743(a2);
+#create index idx2 on worklog5743(a3);
+#create index idx3 on worklog5743(a4);
+#show warnings;
+#-- error ER_TOO_LONG_KEY
+#create index idx4 on worklog5743(a1, a2);
+#show warnings;
+#-- error ER_TOO_LONG_KEY
+#create index idx5 on worklog5743(a1, a5);
+#show warnings;
+#create index idx6 on worklog5743(a1, a6);
+#show warnings;
+#show create table worklog5743;
+#
+#insert into worklog5743 values(9,
+#  repeat("a", 20000), repeat("a", 3073),
+#  repeat("a", 3072), repeat("a", 3069),
+#  repeat("a", 3068));
+#
+
+create table worklog5743(a1 int, a2 varchar(20000)) ROW_FORMAT=DYNAMIC;
+-- error ER_TOO_LONG_KEY
+create index idx1 on worklog5743(a2);
+show warnings;
+drop table worklog5743;
+
+create table worklog5743(a1 int, a2 varchar(3072)) ROW_FORMAT=DYNAMIC;
+-- error ER_TOO_LONG_KEY
+create index idx1 on worklog5743(a2);
+show warnings;
+drop table worklog5743;
+
+create table worklog5743(a1 int, a2 varchar(769)) ROW_FORMAT=DYNAMIC;
+-- error ER_TOO_LONG_KEY
+create index idx1 on worklog5743(a2);
+show warnings;
+drop table worklog5743;
+
+create table worklog5743(a1 int, a2 varchar(768)) ROW_FORMAT=DYNAMIC;
+create index idx1 on worklog5743(a2);
+show warnings;
+insert into worklog5743 values(9, repeat("a", 768));
+update worklog5743 set a1 = 3333;
+drop table worklog5743;
+
+create table worklog5743(a1 int, a2 varchar(765)) ROW_FORMAT=DYNAMIC;
+-- error ER_TOO_LONG_KEY
+create index idx1 on worklog5743(a1, a2);
+show warnings;
+drop table worklog5743;
+
+create table worklog5743(a1 int, a2 varchar(764)) ROW_FORMAT=DYNAMIC;
+create index idx1 on worklog5743(a1, a2);
+show warnings;
+insert into worklog5743 values(9, repeat("a", 764));
+
+begin;
+update worklog5743 set a1 = 4444;
+
+# Do a select from another connection that would use the secondary index
+--connection con1
+select @@session.tx_isolation;
+explain select a1 from worklog5743 where a1 = 9;
+select a1 from worklog5743 where a1 = 9;
+
+# Do read uncommitted, it would show there is no row with a1 = 9
+--connection con2
+SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
+select @@session.tx_isolation;
+select a1 from worklog5743 where a1 = 9;
+
+--connection default
+rollback;
+
+drop table worklog5743;
+
+-- echo ### Test 6 ###
+# Create a table with old format, and the limit is 768 bytes.
+-- error ER_TOO_LONG_KEY
+create table worklog5743(a TEXT not null, primary key (a(1000)));
+
+create table worklog5743(a TEXT) ROW_FORMAT=COMPACT;
+
+# Excercise the column length check in ha_innobase::add_index()
+-- error ER_INDEX_COLUMN_TOO_LONG
+create index idx on worklog5743(a(768));
+
+# This should be successful
+create index idx on worklog5743(a(767));
+
+# Perform some DMLs
+insert into worklog5743 values(repeat("a", 20000));
+
+begin;
+insert into worklog5743 values(repeat("b", 20000));
+update worklog5743 set a = (repeat("x", 25000));
+
+# Start a new session to select the table to force it build
+# an earlier version of the cluster index through undo log
+select @@session.tx_isolation;
+--connection con1
+select a = repeat("a", 20000) from worklog5743;
+--disconnect con1
+
+--connection con2
+SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
+select @@session.tx_isolation;
+select a = repeat("x", 25000) from worklog5743;
+--disconnect con2
+
+--connection default
+rollback;
+
+drop table worklog5743;
+
+-- echo ### Test 7 ###
+# Some border line tests on the column length.
+# We have a limit of 3072 bytes for Barracuda table
+create table worklog5743(a TEXT not null) ROW_FORMAT=DYNAMIC;
+
+# Length exceeds maximum supported key length
+# It will be auto-truncated to 3072 if the page size were not 4k.
+# With this page size, the prefix length is less.
+SET sql_mode= '';
+create index idx1 on worklog5743(a(769));
+show warnings;
+SET sql_mode= default;
+create index idx2 on worklog5743(a(768));
+show warnings;
+show create table worklog5743;
+insert into worklog5743 values(repeat("a", 768));
+drop table worklog5743;
+
+# We have a limit of 767 bytes for Antelope tables
+create table worklog5743(a TEXT not null) ROW_FORMAT=REDUNDANT;
+-- error ER_INDEX_COLUMN_TOO_LONG
+create index idx on worklog5743(a(768));
+create index idx2 on worklog5743(a(767));
+drop table worklog5743;
+
+create table worklog5743(a TEXT not null) ROW_FORMAT=COMPACT;
+-- error ER_INDEX_COLUMN_TOO_LONG
+create index idx on worklog5743(a(768));
+create index idx2 on worklog5743(a(767));
+drop table worklog5743;
+
+
+eval SET GLOBAL innodb_file_per_table=$innodb_file_per_table_orig;
diff --git a/mysql-test/suite/innodb_zip/t/index_large_prefix_8k.test b/mysql-test/suite/innodb_zip/t/index_large_prefix_8k.test
new file mode 100644
index 00000000000..00a97249a95
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/t/index_large_prefix_8k.test
@@ -0,0 +1,429 @@
+# Testcase for worklog #5743: Lift the limit of index key prefixes
+
+--source include/have_innodb.inc
+--source include/have_innodb_8k.inc
+SET default_storage_engine=InnoDB;
+
+--disable_query_log
+call mtr.add_suppression("Cannot add field .* in table .* because after adding it, the row size is");
+--enable_query_log
+
+let $innodb_file_per_table_orig=`select @@innodb_file_per_table`;
+
+set global innodb_file_per_table=1;
+
+-- echo ### Test 1 ###
+# Create a table of DYNAMIC format, with a primary index of 1000 bytes in
+# size
+create table worklog5743(a TEXT not null, primary key (a(1000))) ROW_FORMAT=DYNAMIC;
+show warnings;
+
+# Do some insertion and update to excercise the external cache
+# code path
+insert into worklog5743 values(repeat("a", 20000));
+
+# default session, update the table
+update worklog5743 set a = (repeat("b", 16000));
+
+# Create a secondary index
+SET sql_mode= '';
+create index idx on worklog5743(a(2000));
+show warnings;
+SET sql_mode= default;
+
+# Start a few sessions to do selections on table being updated in default
+# session, so it would rebuild the previous version from undo log.
+# 1) Default session:   Initiate an update on the externally stored column
+# 2) Session con1:      Select from table with repeated read
+# 3) Session con2:      Select from table with read uncommitted
+# 4) Default session:   rollback updates
+
+begin;
+update worklog5743 set a = (repeat("x", 17000));
+
+# Start a new session to select the column to force it build
+# an earlier version of the clustered index through undo log. So it should
+# just see the result of repeat("b", 16000)
+select @@session.tx_isolation;
+--connect (con1,localhost,root,,)
+select a = repeat("x", 17000) from worklog5743;
+select a = repeat("b", 16000) from worklog5743;
+
+# Start another session doing "read uncommitted" query, it
+# should see the uncommitted update
+--connect (con2,localhost,root,,)
+SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
+select @@session.tx_isolation;
+select a = repeat("x", 17000) from worklog5743;
+
+# Roll back the transaction
+--connection default
+rollback;
+
+drop table worklog5743;
+
+-- echo ### Test 2 ###
+# Create a table with only a secondary index has large prefix column
+create table worklog5743(a1 int, a2 TEXT not null) ROW_FORMAT=DYNAMIC;
+show warnings;
+create index idx on worklog5743(a1, a2(1250));
+show warnings;
+
+insert into worklog5743 values(9, repeat("a", 10000));
+
+begin;
+
+update worklog5743 set a1 = 1000;
+
+# Do a select from another connection that would use the secondary index
+--connection con1
+select @@session.tx_isolation;
+explain select a1, a2 = repeat("a", 10000) from worklog5743 where a1 = 9;
+select a1, a2 = repeat("a", 10000) from worklog5743 where a1 = 9;
+
+# Do read uncommitted in another session, it would show there is no
+# row with a1 = 9
+--connection con2
+SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
+select @@session.tx_isolation;
+select a1, a2 = repeat("a", 10000) from worklog5743 where a1 = 9;
+
+--connection default
+rollback;
+
+drop table worklog5743;
+
+-- echo ### Test 3 ###
+# Create a table with a secondary index has small (50 bytes) prefix column
+create table worklog5743(a1 int, a2 TEXT not null) ROW_FORMAT=DYNAMIC;
+
+create index idx on worklog5743(a1, a2(50));
+
+insert into worklog5743 values(9, repeat("a", 10000));
+
+begin;
+
+update worklog5743 set a1 = 1000;
+
+# Do a select from another connection that would use the secondary index
+--connection con1
+select @@session.tx_isolation;
+explain select a1, a2 = repeat("a", 10000) from worklog5743 where a1 = 9;
+select a1, a2 = repeat("a", 10000) from worklog5743 where a1 = 9;
+
+# Do read uncommitted in another session, it would show there is no
+# row with a1 = 9
+--connection con2
+SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
+select @@session.tx_isolation;
+select a1, a2 = repeat("a", 10000) from worklog5743 where a1 = 9;
+
+--connection default
+rollback;
+
+drop table worklog5743;
+
+-- echo ### Test 4 ###
+# Create compressed tables with each KEY_BLOCK_SIZE.
+create table worklog5743_1(a1 int, a2 TEXT not null) KEY_BLOCK_SIZE=1;
+create table worklog5743_2(a1 int, a2 TEXT not null) KEY_BLOCK_SIZE=2;
+create table worklog5743_4(a1 int, a2 TEXT not null) KEY_BLOCK_SIZE=4;
+create table worklog5743_8(a1 int, a2 TEXT, a3 TEXT) KEY_BLOCK_SIZE=8;
+
+# The maximum overall index record (not prefix) length of a
+# compressed table is dependent on innodb-page-size (IPS),
+# key_block_size (KBS) and the number of fields (NF).
+# "Too big row" error (HA_ERR_TOO_BIG_ROW) will be returned if this
+# limit is exceeded.
+# See page_zip_empty_size() and Bug #47495 for more detail.
+
+# Test edge cases for indexes using key_block_size=1
+set global innodb_large_prefix=0;
+-- error ER_TOO_LONG_KEY
+create index idx1 on worklog5743_1(a2(4000));
+show warnings;
+set global innodb_large_prefix=1;
+-- error ER_TOO_LONG_KEY
+create index idx2 on worklog5743_1(a2(4000));
+show warnings;
+-- error ER_TOO_BIG_ROWSIZE
+create index idx3 on worklog5743_1(a2(436));
+show warnings;
+# Bug#13391353 Limit is one byte less on on 32bit-Linux only
+create index idx4 on worklog5743_1(a2(434));
+show warnings;
+-- error ER_TOO_BIG_ROWSIZE
+create index idx5 on worklog5743_1(a1, a2(430));
+show warnings;
+# Bug#13391353 Limit is one byte less on on 32bit-Linux only
+create index idx6 on worklog5743_1(a1, a2(428));
+show warnings;
+
+# Test edge cases for indexes using key_block_size=2
+set global innodb_large_prefix=0;
+SET sql_mode= '';
+create index idx1 on worklog5743_2(a2(4000));
+show warnings;
+set global innodb_large_prefix=1;
+-- error ER_TOO_BIG_ROWSIZE
+create index idx2 on worklog5743_2(a2(4000));
+show warnings;
+-- error ER_TOO_BIG_ROWSIZE
+create index idx3 on worklog5743_2(a2(948));
+show warnings;
+# Bug#13391353 Limit is one byte less on on 32bit-Linux only
+create index idx4 on worklog5743_2(a2(946));
+show warnings;
+-- error ER_TOO_BIG_ROWSIZE
+create index idx5 on worklog5743_2(a1, a2(942));
+show warnings;
+# Bug#13391353 Limit is one byte less on on 32bit-Linux only
+create index idx6 on worklog5743_2(a1, a2(940));
+show warnings;
+
+# Test edge cases for indexes using key_block_size=4
+set global innodb_large_prefix=0;
+create index idx1 on worklog5743_4(a2(4000));
+show warnings;
+set global innodb_large_prefix=1;
+create index idx3 on worklog5743_4(a2(1537));
+show warnings;
+create index idx4 on worklog5743_4(a2(1536));
+show warnings;
+-- error ER_TOO_LONG_KEY
+create index idx5 on worklog5743_4(a1, a2(1533));
+show warnings;
+create index idx6 on worklog5743_4(a1, a2(1532));
+show warnings;
+
+# Test edge cases for indexes using key_block_size=8
+set global innodb_large_prefix=0;
+create index idx1 on worklog5743_8(a2(1000));
+show warnings;
+set global innodb_large_prefix=1;
+create index idx2 on worklog5743_8(a2(3073));
+show warnings;
+create index idx3 on worklog5743_8(a2(3072));
+show warnings;
+-- error ER_TOO_LONG_KEY
+create index idx4 on worklog5743_8(a1, a2(1533));
+show warnings;
+create index idx5 on worklog5743_8(a1, a2(1532));
+show warnings;
+SET sql_mode= default;
+
+# Insert a large record into each of these tables.
+insert into worklog5743_1 values(9, repeat("a", 10000));
+insert into worklog5743_2 values(9, repeat("a", 10000));
+insert into worklog5743_4 values(9, repeat("a", 10000));
+insert into worklog5743_8 values(9, repeat("a", 10000), repeat("a", 10000));
+
+# Now if we change the global innodb_large_prefix back to 767,
+# updates to these indexes should still be allowed.
+set global innodb_large_prefix=0;
+insert into worklog5743_1 values(2, repeat("b", 10000));
+insert into worklog5743_2 values(2, repeat("b", 10000));
+insert into worklog5743_4 values(2, repeat("b", 10000));
+insert into worklog5743_8 values(2, repeat("b", 10000), repeat("b", 10000));
+set global innodb_large_prefix=1;
+
+select a1, left(a2, 20) from worklog5743_1;
+select a1, left(a2, 20) from worklog5743_2;
+select a1, left(a2, 20) from worklog5743_4;
+select a1, left(a2, 20) from worklog5743_8;
+
+begin;
+
+update worklog5743_1 set a1 = 1000;
+update worklog5743_2 set a1 = 1000;
+update worklog5743_4 set a1 = 1000;
+update worklog5743_8 set a1 = 1000;
+select a1, left(a2, 20) from worklog5743_1;
+select a1, left(a2, 20) from worklog5743_2;
+select a1, left(a2, 20) from worklog5743_4;
+select a1, left(a2, 20) from worklog5743_8;
+
+
+# Do a select from another connection that would use the secondary index
+--connection con1
+select @@session.tx_isolation;
+explain select a1, left(a2, 20) from worklog5743_1 where a1 = 9;
+explain select a1, left(a2, 20) from worklog5743_2 where a1 = 9;
+explain select a1, left(a2, 20) from worklog5743_4 where a1 = 9;
+explain select a1, left(a2, 20) from worklog5743_8 where a1 = 9;
+select a1, left(a2, 20) from worklog5743_1 where a1 = 9;
+select a1, left(a2, 20) from worklog5743_2 where a1 = 9;
+select a1, left(a2, 20) from worklog5743_4 where a1 = 9;
+select a1, left(a2, 20) from worklog5743_8 where a1 = 9;
+
+# Do read uncommitted in another session, it would show there is no
+# row with a1 = 9
+--connection con2
+SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
+select @@session.tx_isolation;
+select a1, left(a2, 20) from worklog5743_1 where a1 = 9;
+select a1, left(a2, 20) from worklog5743_2 where a1 = 9;
+select a1, left(a2, 20) from worklog5743_4 where a1 = 9;
+select a1, left(a2, 20) from worklog5743_8 where a1 = 9;
+
+--connection default
+rollback;
+
+drop table worklog5743_1;
+drop table worklog5743_2;
+drop table worklog5743_4;
+drop table worklog5743_8;
+
+-- echo ### Test 5 ###
+# Create a table with large varchar columns and create indexes
+# directly on these large columns to show that prefix limit is
+# automatically applied and to show that limit.
+
+# This commented form of the test causes an unlimited page split
+# on update of the int field - Bug 12636590 - INNODB; UPDATE OF
+# LARGE RECORD CAUSES UNLIMITED PAGE SPLITS IN 8K PAGE SIZE
+#create table worklog5743(a1 int,
+#  a2 varchar(20000),
+#  a3 varchar(3073),
+#  a4 varchar(3072),
+#  a5 varchar(3069),
+#  a6 varchar(3068))
+#  ROW_FORMAT=DYNAMIC;
+#create index idx1 on worklog5743(a2);
+#create index idx2 on worklog5743(a3);
+#create index idx3 on worklog5743(a4);
+#show warnings;
+#-- error ER_TOO_LONG_KEY
+#create index idx4 on worklog5743(a1, a2);
+#show warnings;
+#-- error ER_TOO_LONG_KEY
+#create index idx5 on worklog5743(a1, a5);
+#show warnings;
+#create index idx6 on worklog5743(a1, a6);
+#show warnings;
+#show create table worklog5743;
+#
+#insert into worklog5743 values(9,
+#  repeat("a", 20000), repeat("a", 3073),
+#  repeat("a", 3072), repeat("a", 3069),
+#  repeat("a", 3068));
+#
+
+create table worklog5743(a1 int, a2 varchar(20000)) ROW_FORMAT=DYNAMIC;
+-- error ER_TOO_LONG_KEY
+create index idx1 on worklog5743(a2);
+drop table worklog5743;
+
+create table worklog5743(a1 int, a2 varchar(1537)) ROW_FORMAT=DYNAMIC;
+-- error ER_TOO_LONG_KEY
+create index idx1 on worklog5743(a2);
+drop table worklog5743;
+
+create table worklog5743(a1 int, a2 varchar(1536)) ROW_FORMAT=DYNAMIC;
+create index idx1 on worklog5743(a2);
+show warnings;
+insert into worklog5743 values(9, repeat("a", 1536));
+update worklog5743 set a1 = 1000;
+drop table worklog5743;
+
+create table worklog5743(a1 int, a2 varchar(1533)) ROW_FORMAT=DYNAMIC;
+-- error ER_TOO_LONG_KEY
+create index idx1 on worklog5743(a1, a2);
+show warnings;
+drop table worklog5743;
+
+create table worklog5743(a1 int, a2 varchar(1532)) ROW_FORMAT=DYNAMIC;
+create index idx1 on worklog5743(a1, a2);
+show warnings;
+insert into worklog5743 values(9, repeat("a", 1532));
+update worklog5743 set a1 = 1000;
+
+begin;
+update worklog5743 set a1 = 1000;
+
+# Do a select from another connection that would use the secondary index
+--connection con1
+select @@session.tx_isolation;
+explain select a1 from worklog5743 where a1 = 9;
+select a1 from worklog5743 where a1 = 9;
+
+# Do read uncommitted, it would show there is no row with a1 = 9
+--connection con2
+SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
+select @@session.tx_isolation;
+select a1 from worklog5743 where a1 = 9;
+
+--connection default
+rollback;
+
+drop table worklog5743;
+
+-- echo ### Test 6 ###
+# Create a table with old format, and the limit is 768 bytes.
+-- error ER_INDEX_COLUMN_TOO_LONG
+create table worklog5743(a TEXT not null, primary key (a(1000)))
+row_format=compact;
+
+create table worklog5743(a TEXT) row_format=compact;
+
+# Excercise the column length check in ha_innobase::add_index()
+-- error ER_INDEX_COLUMN_TOO_LONG
+create index idx on worklog5743(a(768));
+
+# This should be successful
+create index idx on worklog5743(a(767));
+
+# Perform some DMLs
+insert into worklog5743 values(repeat("a", 20000));
+
+begin;
+insert into worklog5743 values(repeat("b", 20000));
+update worklog5743 set a = (repeat("x", 25000));
+
+# Start a new session to select the table to force it build
+# an earlier version of the cluster index through undo log
+select @@session.tx_isolation;
+--connection con1
+select a = repeat("a", 20000) from worklog5743;
+--disconnect con1
+
+--connection con2
+SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
+select @@session.tx_isolation;
+select a = repeat("x", 25000) from worklog5743;
+--disconnect con2
+
+--connection default
+rollback;
+
+drop table worklog5743;
+
+-- echo ### Test 7 ###
+# Some border line tests on the column length.
+# We have a limit of 3072 bytes for Barracuda table
+create table worklog5743(a TEXT not null) ROW_FORMAT=DYNAMIC;
+
+# Length exceeds maximum supported key length
+# It will be auto-truncated to 3072
+SET sql_mode= '';
+create index idx1 on worklog5743(a(3073));
+create index idx2 on worklog5743(a(3072));
+SET sql_mode= default;
+show create table worklog5743;
+drop table worklog5743;
+
+# We have a limit of 767 bytes for Antelope tables
+create table worklog5743(a TEXT not null) ROW_FORMAT=REDUNDANT;
+-- error ER_INDEX_COLUMN_TOO_LONG
+create index idx on worklog5743(a(768));
+create index idx2 on worklog5743(a(767));
+drop table worklog5743;
+
+create table worklog5743(a TEXT not null) ROW_FORMAT=COMPACT;
+-- error ER_INDEX_COLUMN_TOO_LONG
+create index idx on worklog5743(a(768));
+create index idx2 on worklog5743(a(767));
+drop table worklog5743;
+
+
+eval SET GLOBAL innodb_file_per_table=$innodb_file_per_table_orig;
diff --git a/mysql-test/suite/innodb_zip/t/innochecksum.test b/mysql-test/suite/innodb_zip/t/innochecksum.test
new file mode 100644
index 00000000000..fd64e6d0d0c
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/t/innochecksum.test
@@ -0,0 +1,240 @@
+#************************************************************
+# WL6045:Improve Innochecksum
+#************************************************************
+--source include/have_innodb.inc
+--source include/have_innodb_zip.inc
+--source include/no_valgrind_without_big.inc
+# Embedded server does not support crashing.
+--source include/not_embedded.inc
+
+# Avoid CrashReporter popup on Mac.
+--source include/not_crashrep.inc
+
+--echo # Set the environmental variables
+let MYSQLD_BASEDIR= `SELECT @@basedir`;
+let MYSQLD_DATADIR= `SELECT @@datadir`;
+let SEARCH_FILE= $MYSQLTEST_VARDIR/log/my_restart.err;
+call mtr.add_suppression("InnoDB: Unable to read tablespace .* page no .* into the buffer pool after 100 attempts");
+call mtr.add_suppression("InnoDB: Warning: database page corruption or a failed");
+
+SET GLOBAL innodb_file_per_table=on;
+
+CREATE TABLE tab1(c1 INT PRIMARY KEY,c2 VARCHAR(20)) ENGINE=InnoDB;
+CREATE INDEX idx1 ON tab1(c2(10));
+INSERT INTO tab1 VALUES(1, 'Innochecksum InnoDB1');
+CREATE TABLE t1(id INT AUTO_INCREMENT PRIMARY KEY, msg VARCHAR(255)) ENGINE=INNODB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4;
+insert into t1 values(1,"i");
+insert into t1 values(2,"am");
+insert into t1 values(3,"compressed table");
+
+--echo # Shutdown the Server
+--source include/shutdown_mysqld.inc
+--echo # Server Default checksum = innodb
+
+--echo [1a]: check the innochecksum when file doesn't exists
+--error 1
+--exec $INNOCHECKSUM  $MYSQLD_DATADIR/test/aa.ibd 2> $SEARCH_FILE
+let SEARCH_PATTERN= Error: $MYSQLD_DATADIR/test/aa.ibd cannot be found;
+--source include/search_pattern_in_file.inc
+
+--echo [1b]: check the innochecksum without --strict-check
+--exec $INNOCHECKSUM  $MYSQLD_DATADIR/test/tab1.ibd
+
+--echo [2]: check the innochecksum with full form --strict-check=crc32
+--exec $INNOCHECKSUM  --strict-check=crc32 $MYSQLD_DATADIR/test/tab1.ibd
+
+--echo [3]: check the innochecksum with short form -C crc32
+--exec $INNOCHECKSUM  -C crc32 $MYSQLD_DATADIR/test/tab1.ibd
+
+--echo [4]: check the innochecksum with --no-check ignores algorithm check, warning is expected
+--error 1
+--exec $INNOCHECKSUM --no-check $MYSQLD_DATADIR/test/tab1.ibd 2> $SEARCH_FILE
+let SEARCH_PATTERN= Error: --no-check must be associated with --write option.;
+--source include/search_pattern_in_file.inc
+
+--echo [5]: check the innochecksum with short form --no-check ignores algorithm check, warning is expected
+--error 1
+--exec $INNOCHECKSUM  -n $MYSQLD_DATADIR/test/tab1.ibd 2> $SEARCH_FILE
+let SEARCH_PATTERN= Error: --no-check must be associated with --write option.;
+--source include/search_pattern_in_file.inc
+
+--echo [6]: check the innochecksum with full form strict-check & no-check , an error is expected
+--error 1
+--exec $INNOCHECKSUM --strict-check=innodb  --no-check $MYSQLD_DATADIR/test/tab1.ibd 2> $SEARCH_FILE
+let SEARCH_PATTERN= Error: --strict-check option cannot be used together with --no-check option.;
+--source include/search_pattern_in_file.inc
+
+--echo [7]: check the innochecksum with short form strict-check & no-check , an error is expected
+--error 1
+--exec $INNOCHECKSUM -C innodb -n $MYSQLD_DATADIR/test/tab1.ibd 2> $SEARCH_FILE
+let SEARCH_PATTERN= Error: --strict-check option cannot be used together with --no-check option.;
+--source include/search_pattern_in_file.inc
+
+--echo [8]: check the innochecksum with short & full form combination
+--echo # strict-check & no-check, an error is expected
+--error 1
+--exec $INNOCHECKSUM --strict-check=innodb -n $MYSQLD_DATADIR/test/tab1.ibd 2> $SEARCH_FILE
+let SEARCH_PATTERN= Error: --strict-check option cannot be used together with --no-check option.;
+--source include/search_pattern_in_file.inc
+
+--echo [9]: check the innochecksum with full form --strict-check=innodb
+# Server Default checksum = crc32
+--exec $INNOCHECKSUM  --strict-check=innodb $MYSQLD_DATADIR/test/tab1.ibd 2> $SEARCH_FILE
+
+--echo [10]: check the innochecksum with full form --strict-check=none
+--echo # when server Default checksum=crc32
+--exec $INNOCHECKSUM  --strict-check=none $MYSQLD_DATADIR/test/tab1.ibd 2> $SEARCH_FILE
+
+--echo [11]: check the innochecksum with short form -C innodb
+--echo # when server Default checksum=crc32
+--exec $INNOCHECKSUM  -C innodb $MYSQLD_DATADIR/test/tab1.ibd 2> $SEARCH_FILE
+
+--echo [12]: check the innochecksum with short form -C none
+--echo # when server Default checksum=crc32
+--exec $INNOCHECKSUM  -C none $MYSQLD_DATADIR/test/tab1.ibd 2> $SEARCH_FILE
+
+--echo [13]: check strict-check with invalid values
+--error 1
+--exec $INNOCHECKSUM --strict-check=strict_innodb $MYSQLD_DATADIR/test/tab1.ibd 2> $SEARCH_FILE
+let SEARCH_PATTERN= Error while setting value \'strict_innodb\' to \'strict-check\';
+--source include/search_pattern_in_file.inc
+
+--error 1
+--exec $INNOCHECKSUM -C strict_innodb $MYSQLD_DATADIR/test/tab1.ibd 2> $SEARCH_FILE
+let SEARCH_PATTERN= Error while setting value \'strict_innodb\' to \'strict-check\';
+--source include/search_pattern_in_file.inc
+
+--error 1
+--exec $INNOCHECKSUM --strict-check=strict_crc32 $MYSQLD_DATADIR/test/tab1.ibd 2> $SEARCH_FILE
+let SEARCH_PATTERN= Error while setting value \'strict_crc32\' to \'strict-check\';
+--source include/search_pattern_in_file.inc
+
+--error 1
+--exec $INNOCHECKSUM -C strict_crc32 $MYSQLD_DATADIR/test/tab1.ibd 2> $SEARCH_FILE
+let SEARCH_PATTERN= Error while setting value \'strict_crc32\' to \'strict-check\';
+--source include/search_pattern_in_file.inc
+
+--error 1
+--exec $INNOCHECKSUM --strict-check=strict_none $MYSQLD_DATADIR/test/tab1.ibd 2> $SEARCH_FILE
+let SEARCH_PATTERN= Error while setting value \'strict_none\' to \'strict-check\';
+--source include/search_pattern_in_file.inc
+
+--error 1
+--exec $INNOCHECKSUM -C strict_none $MYSQLD_DATADIR/test/tab1.ibd 2> $SEARCH_FILE
+let SEARCH_PATTERN= Error while setting value \'strict_none\' to \'strict-check\';
+--source include/search_pattern_in_file.inc
+
+--error 1
+--exec $INNOCHECKSUM --strict-check=InnoBD $MYSQLD_DATADIR/test/tab1.ibd 2> $SEARCH_FILE
+let SEARCH_PATTERN= Error while setting value \'InnoBD\' to \'strict-check\';
+--source include/search_pattern_in_file.inc
+
+--error 1
+--exec $INNOCHECKSUM -C InnoBD $MYSQLD_DATADIR/test/tab1.ibd 2>$SEARCH_FILE
+let SEARCH_PATTERN= Error while setting value \'InnoBD\' to \'strict-check\';
+--source include/search_pattern_in_file.inc
+
+--error 1
+--exec $INNOCHECKSUM --strict-check=crc $MYSQLD_DATADIR/test/tab1.ibd 2> $SEARCH_FILE
+let SEARCH_PATTERN= Error while setting value \'crc\' to \'strict-check\';
+--source include/search_pattern_in_file.inc
+
+--error 1
+--exec $INNOCHECKSUM --strict-check=no $MYSQLD_DATADIR/test/tab1.ibd 2> $SEARCH_FILE
+let SEARCH_PATTERN= Error while setting value \'no\' to \'strict-check\';
+--source include/search_pattern_in_file.inc
+
+--echo [14a]: when server default checksum=crc32 rewrite new checksum=crc32 with innochecksum
+--echo # Also check the long form of write option.
+--exec $INNOCHECKSUM --strict-check=crc32 --write=crc32 $MYSQLD_DATADIR/test/tab1.ibd
+--exec $INNOCHECKSUM --strict-check=crc32 --write=crc32 $MYSQLD_DATADIR/test/t1.ibd
+# Rewrite done, verify with --strict-check=crc32
+--exec $INNOCHECKSUM --strict-check=crc32 $MYSQLD_DATADIR/test/tab1.ibd
+--exec $INNOCHECKSUM --strict-check=crc32 $MYSQLD_DATADIR/test/t1.ibd
+
+--echo [14b]: when server default checksum=crc32 rewrite new checksum=innodb with innochecksum
+--echo # Also check the long form of write option.
+--exec $INNOCHECKSUM --no-check --write=innodb $MYSQLD_DATADIR/test/tab1.ibd
+--exec $INNOCHECKSUM --strict-check=crc32 --write=innodb $MYSQLD_DATADIR/test/t1.ibd
+# Rewrite done, verify with --strict-check=innodb
+--exec $INNOCHECKSUM --strict-check=innodb $MYSQLD_DATADIR/test/tab1.ibd
+
+--echo # start the server with innodb_checksum_algorithm=InnoDB
+--let restart_options= : --innodb_checksum_algorithm=innodb
+--source include/start_mysqld.inc
+
+INSERT INTO tab1 VALUES(2, 'Innochecksum CRC32');
+SELECT c1,c2 FROM tab1 order by c1,c2;
+
+--echo # Stop the server
+--source include/shutdown_mysqld.inc
+
+--echo [15]: when server default checksum=crc32 rewrite new checksum=none with innochecksum
+--echo # Also check the short form of write option.
+--exec $INNOCHECKSUM --no-check -w none $MYSQLD_DATADIR/test/tab1.ibd
+--exec $INNOCHECKSUM --no-check -w none $MYSQLD_DATADIR/test/t1.ibd
+# Rewrite done, verify with --strict-check=none
+--exec $INNOCHECKSUM --strict-check=none $MYSQLD_DATADIR/test/tab1.ibd
+--exec $INNOCHECKSUM --strict-check=none $MYSQLD_DATADIR/test/t1.ibd
+
+--echo # Start the server with checksum algorithm=none
+--let restart_options= : --innodb_checksum_algorithm=none
+--source include/start_mysqld.inc
+
+INSERT INTO tab1 VALUES(3, 'Innochecksum None');
+SELECT c1,c2 FROM tab1 order by c1,c2;
+DROP TABLE t1;
+
+--echo # Stop the server
+--source include/shutdown_mysqld.inc
+
+--echo [16]: rewrite into new checksum=crc32 with innochecksum
+--exec $INNOCHECKSUM --no-check --write=crc32 $MYSQLD_DATADIR/test/tab1.ibd
+
+--echo # Restart the DB server with  innodb_checksum_algorithm=crc32
+--let restart_options= : --innodb_checksum_algorithm=crc32 --innodb_file_per_table=on
+--source include/start_mysqld.inc
+
+SELECT * FROM tab1;
+DELETE FROM tab1 where c1=3;
+SELECT c1,c2 FROM tab1 order by c1,c2;
+
+--echo # Stop server
+--source include/shutdown_mysqld.inc
+
+--echo [17]: rewrite into new checksum=InnoDB
+--exec $INNOCHECKSUM --no-check --write=InnoDB $MYSQLD_DATADIR/test/tab1.ibd
+
+--echo # Restart the DB server with innodb_checksum_algorithm=InnoDB
+--let restart_options= : --innodb_checksum_algorithm=innodb --innodb_file_per_table=on
+--source include/start_mysqld.inc
+
+DELETE FROM tab1 where c1=2;
+SELECT * FROM tab1;
+
+--echo # Stop server
+--source include/shutdown_mysqld.inc
+
+--echo [18]:check Innochecksum with invalid write options
+--error 1
+--exec $INNOCHECKSUM --no-check --write=strict_crc32 $MYSQLD_DATADIR/test/tab1.ibd 2> $SEARCH_FILE
+let SEARCH_PATTERN=Error while setting value \'strict_crc32\' to \'write\';
+--source include/search_pattern_in_file.inc
+
+--error 1
+--exec $INNOCHECKSUM --no-check --write=strict_innodb $MYSQLD_DATADIR/test/tab1.ibd 2> $SEARCH_FILE
+let SEARCH_PATTERN=Error while setting value \'strict_innodb\' to \'write\';
+--source include/search_pattern_in_file.inc
+
+--error 1
+--exec $INNOCHECKSUM --no-check --write=crc23 $MYSQLD_DATADIR/test/tab1.ibd 2> $SEARCH_FILE
+let SEARCH_PATTERN=Error while setting value \'crc23\' to \'write\';
+--source include/search_pattern_in_file.inc
+--remove_file $SEARCH_FILE
+
+# Cleanup
+--echo # Restart the server
+--source include/start_mysqld.inc
+
+DROP TABLE tab1;
+SET GLOBAL innodb_file_per_table=default;
diff --git a/mysql-test/suite/innodb_zip/t/innochecksum_2.test b/mysql-test/suite/innodb_zip/t/innochecksum_2.test
new file mode 100644
index 00000000000..decec8e0f0a
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/t/innochecksum_2.test
@@ -0,0 +1,114 @@
+#************************************************************
+# WL6045:Improve Innochecksum
+#************************************************************
+--source include/have_innodb.inc
+--source include/have_innodb_zip.inc
+
+--source include/not_embedded.inc
+-- source include/big_test.inc
+
+--disable_query_log
+# This warning occurs due to small buffer pool size(i.e. 8MB). It doesn't occur
+# with --mysqld=--innodb_buffer_pool_size=10MB
+call mtr.add_suppression("\\[Warning\\] InnoDB: Difficult to find free blocks in the buffer pool.*");
+--enable_query_log
+let MYSQLD_BASEDIR= `SELECT @@basedir`;
+let MYSQLD_DATADIR= `SELECT @@datadir`;
+let SEARCH_FILE= $MYSQLTEST_VARDIR/log/my_restart.err;
+
+SET GLOBAL innodb_compression_level=0;
+SELECT @@innodb_compression_level;
+
+CREATE TABLE t1 (j LONGBLOB) ENGINE = InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1;
+INSERT INTO t1 VALUES (repeat('abcdefghijklmnopqrstuvwxyz',200));
+let $i=10;
+while ($i > 0) {
+  INSERT INTO t1 SELECT * from t1;
+  dec $i;
+}
+
+--echo # stop the server
+--source include/shutdown_mysqld.inc
+
+# Page_type_dump for t1
+--exec $INNOCHECKSUM -v --page-type-dump $MYSQLTEST_VARDIR/tmp/dump.txt $MYSQLD_DATADIR/test/t1.ibd
+--file_exists $MYSQLTEST_VARDIR/tmp/dump.txt
+--remove_file $MYSQLTEST_VARDIR/tmp/dump.txt
+
+--echo [1]:# check the both short and long options for "help"
+--exec $INNOCHECKSUM --help $MYSQLD_DATADIR/test/t1.ibd > $MYSQLTEST_VARDIR/tmp/help_output_long.txt
+--exec $INNOCHECKSUM -I $MYSQLD_DATADIR/test/t1.ibd > $MYSQLTEST_VARDIR/tmp/help_output_short.txt
+--diff_files $MYSQLTEST_VARDIR/tmp/help_output_long.txt $MYSQLTEST_VARDIR/tmp/help_output_short.txt
+
+--echo [2]:# Run the innochecksum when file isn't provided.
+--echo # It will print the innochecksum usage similar to --help option.
+--error 1
+--exec $INNOCHECKSUM > $MYSQLTEST_VARDIR/tmp/usage.txt
+--diff_files $MYSQLTEST_VARDIR/tmp/help_output_long.txt $MYSQLTEST_VARDIR/tmp/usage.txt
+--remove_file $MYSQLTEST_VARDIR/tmp/usage.txt
+
+perl;
+use strict;
+use warnings;
+use File::Copy;
+my $dir = $ENV{'MYSQLTEST_VARDIR'};
+my $file= 'help_output_long.txt';
+# open file in write mode
+open IN_FILE,"<", "$dir/tmp/$file" or die $!;
+open OUT_FILE, ">", "$dir/tmp/tmpfile" or die $!;
+while(<IN_FILE>) {
+ unless ($_=~ /^debug.*$/ || $_=~ /\-#, \-\-debug.*$/ || $_=~ /http:.*html/) {
+    $_=~ s/^\S*innochecksum.+Ver.+[0-9]*\.[0-9]*\.[0-9]*.+$/innochecksum Ver #.#.#/g;
+    $_=~ s/(Copyright\s\(c\))\s([0-9]*),\s([0-9]*)(.*)/$1 YEAR, YEAR $4/g;
+    $_=~ s/Usage:.*\[-c/Usage: innochecksum [-c/g;
+    print OUT_FILE $_;
+ }
+}
+close(IN_FILE);
+close(OUT_FILE);
+# move the new content from tmp file to the orginal file.
+move ("$dir/tmp/tmpfile", "$dir/tmp/$file");
+EOF
+
+--cat_file $MYSQLTEST_VARDIR/tmp/help_output_long.txt
+--remove_file $MYSQLTEST_VARDIR/tmp/help_output_long.txt
+--remove_file $MYSQLTEST_VARDIR/tmp/help_output_short.txt
+
+--echo [3]:# check the both short and long options for "count" and exit
+--replace_regex /[0-9]+/#/
+--exec $INNOCHECKSUM --count $MYSQLD_DATADIR/test/t1.ibd
+--replace_regex /[0-9]+/#/
+--exec $INNOCHECKSUM -c $MYSQLD_DATADIR/test/t1.ibd
+
+--echo [4]:# Print the version of innochecksum and exit
+--replace_regex /.*innochecksum.*Ver.*[0-9]*.[0-9]*.[0-9]*.*/innochecksum Ver #.#.#/
+--exec $INNOCHECKSUM -V $MYSQLD_DATADIR/test/t1.ibd
+
+--echo # Restart the DB server
+--source include/start_mysqld.inc
+
+DROP TABLE t1;
+
+--echo [5]:# Check the innochecksum for compressed table t1 with different key_block_size
+--echo # Test for KEY_BLOCK_SIZE=1
+--let $size=1
+--source ../include/innodb-wl6045.inc
+
+--echo # Test for KEY_BLOCK_SIZE=2
+--let $size=2
+--source ../include/innodb-wl6045.inc
+
+--echo # Test for for KEY_BLOCK_SIZE=4
+--let $size=4
+--source ../include/innodb-wl6045.inc
+
+set innodb_strict_mode=off;
+--echo # Test for for KEY_BLOCK_SIZE=8
+--let $size=8
+--source ../include/innodb-wl6045.inc
+
+set innodb_strict_mode=off;
+--echo # Test for KEY_BLOCK_SIZE=16
+--let $size=16
+--source ../include/innodb-wl6045.inc
+--echo # Test[5] completed
diff --git a/mysql-test/suite/innodb_zip/t/innochecksum_3.test b/mysql-test/suite/innodb_zip/t/innochecksum_3.test
new file mode 100644
index 00000000000..30e98aa25f5
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/t/innochecksum_3.test
@@ -0,0 +1,378 @@
+#************************************************************
+# WL6045:Improve Innochecksum
+#************************************************************
+--source include/have_innodb.inc
+--source include/have_innodb_zip.inc
+
+# Valgrind would complain about memory leaks when we crash on purpose.
+--source include/not_valgrind.inc
+
+# Embedded server does not support crashing.
+--source include/not_embedded.inc
+
+# Avoid CrashReporter popup on Mac.
+--source include/not_crashrep.inc
+
+--echo # Set the environmental variables
+let MYSQLD_BASEDIR= `SELECT @@basedir`;
+let MYSQLD_DATADIR= `SELECT @@datadir`;
+let SEARCH_FILE= $MYSQLTEST_VARDIR/log/my_restart.err;
+call mtr.add_suppression("InnoDB: Unable to read tablespace .* page no .* into the buffer pool after 100 attempts");
+
+SET GLOBAL innodb_file_per_table=on;
+
+--echo [1]: Further Test are for rewrite checksum (innodb|crc32|none) for all ibd file & start the server.
+
+CREATE TABLE tab1 (pk INTEGER NOT NULL PRIMARY KEY,
+linestring_key GEOMETRY NOT NULL,
+linestring_nokey GEOMETRY NOT NULL)
+ENGINE=InnoDB ;
+
+INSERT INTO tab1 (pk, linestring_key, linestring_nokey)
+VALUES (1, ST_GeomFromText('POINT(10 10) '), ST_GeomFromText('POINT(10 10) '));
+
+CREATE INDEX linestring_index ON tab1(linestring_nokey(5));
+ALTER TABLE tab1 ADD  KEY (linestring_key(5));
+
+--echo # create a compressed table
+CREATE TABLE tab2(col_1 CHAR (255) ,
+col_2 VARCHAR (255), col_3 longtext,
+col_4 longtext,col_5 longtext,
+col_6 longtext , col_7 int )
+engine = innodb row_format=compressed key_block_size=4;
+
+CREATE INDEX idx1 ON tab2(col_3(10));
+CREATE INDEX idx2 ON tab2(col_4(10));
+CREATE INDEX idx3 ON tab2(col_5(10));
+
+# load the with repeat function
+SET @col_1 = repeat('a', 5);
+SET @col_2 = repeat('b', 20);
+SET @col_3 = repeat('c', 100);
+SET @col_4 = repeat('d', 100);
+SET @col_5 = repeat('e', 100);
+SET @col_6 = repeat('f', 100);
+
+# insert 5 records
+let $i = 5;
+while ($i) {
+ eval INSERT INTO tab2(col_1,col_2,col_3,col_4,col_5,col_6,col_7)
+ VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,$i);
+ dec $i;
+}
+
+--disable_result_log
+SELECT * FROM tab2 ORDER BY col_7;
+
+--echo # stop the server
+--source include/shutdown_mysqld.inc
+
+--echo [1(a)]: Rewrite into new checksum=InnoDB for all *.ibd file and ibdata1
+--exec $INNOCHECKSUM --write=InnoDB $MYSQLD_DATADIR/test/tab1.ibd
+--exec $INNOCHECKSUM --write=InnoDB $MYSQLD_DATADIR/test/tab2.ibd
+--exec $INNOCHECKSUM --write=InnoDB $MYSQLD_DATADIR/ibdata1
+perl;
+foreach (glob("$ENV{MYSQLD_DATADIR}/*/*.ibd")) {
+	system("$ENV{INNOCHECKSUM} --no-check --write=InnoDB $_")
+}
+EOF
+
+--echo : start the server with innodb_checksum_algorithm=strict_innodb
+--let restart_options= : --innodb_checksum_algorithm=strict_innodb --default_storage_engine=InnoDB
+--source include/start_mysqld.inc
+
+INSERT INTO tab1 (pk, linestring_key, linestring_nokey)
+VALUES (2, ST_GeomFromText('LINESTRING(10 10,20 20,30 30)'), ST_GeomFromText('LINESTRING(10 10,20 20,30 30)'));
+
+# load the with repeat function
+SET @col_1 = repeat('a', 5);
+SET @col_2 = repeat('b', 20);
+SET @col_3 = repeat('c', 100);
+SET @col_4 = repeat('d', 100);
+SET @col_5 = repeat('e', 100);
+SET @col_6 = repeat('f', 100);
+
+# check the table status is GOOD with DML
+let $i = 6;
+eval INSERT INTO tab2(col_1,col_2,col_3,col_4,col_5,col_6,col_7)
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,$i);
+
+-- disable_result_log
+SELECT pk,ST_AsText(linestring_key),ST_AsText(linestring_nokey)
+FROM tab1 ORDER BY pk;
+
+-- disable_result_log
+SELECT * FROM tab2 ORDER BY col_7;
+
+--echo # stop the server
+--source include/shutdown_mysqld.inc
+
+--echo [1(b)]: Rewrite into new checksum=crc32 for all *.ibd file and ibdata1
+--exec $INNOCHECKSUM  --write=CRC32 $MYSQLD_DATADIR/test/tab1.ibd
+--exec $INNOCHECKSUM  --write=CRC32 $MYSQLD_DATADIR/test/tab2.ibd
+--exec $INNOCHECKSUM  --write=CRC32 $MYSQLD_DATADIR/ibdata1
+perl;
+foreach (glob("$ENV{MYSQLD_DATADIR}/*/*.ibd")) {
+	system("$ENV{INNOCHECKSUM} --no-check --write=crc32 $_")
+}
+EOF
+
+--echo # start the server with innodb_checksum_algorithm=strict_crc32
+--let restart_options= : --innodb_checksum_algorithm=strict_crc32 --default_storage_engine=InnoDB
+--source include/start_mysqld.inc
+
+# check the table status is GOOD with DML
+INSERT INTO tab1 (pk, linestring_key, linestring_nokey)
+VALUES (3, ST_GeomFromText('POLYGON((0 0,5 5,10 10,15 15,0 0),(10 10,20 20,30 30,40 40,10 10))'),
+ST_GeomFromText('POLYGON((0 0,5 5,10 10,15 15,0 0),(10 10,20 20,30 30,40 40,10 10))'));
+
+# load the with repeat function
+SET @col_1 = repeat('g', 5);
+SET @col_2 = repeat('h', 20);
+SET @col_3 = repeat('i', 100);
+SET @col_4 = repeat('j', 100);
+SET @col_5 = repeat('k', 100);
+SET @col_6 = repeat('l', 100);
+
+# check the table status is GOOD with DML
+let $i = 7;
+eval INSERT INTO tab2(col_1,col_2,col_3,col_4,col_5,col_6,col_7)
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,$i);
+
+# check the records from table
+-- disable_result_log
+SELECT pk,ST_AsText(linestring_key),ST_AsText(linestring_nokey)
+FROM tab1 ORDER BY pk;
+
+-- disable_result_log
+SELECT * FROM tab2 ORDER BY col_7;
+
+--echo # stop the server
+--source include/shutdown_mysqld.inc
+
+--echo [1(c)]: Rewrite into new checksum=none for all *.ibd file and ibdata1
+--exec $INNOCHECKSUM --write=none  $MYSQLD_DATADIR/test/tab1.ibd
+--exec $INNOCHECKSUM --write=none  $MYSQLD_DATADIR/test/tab2.ibd
+--exec $INNOCHECKSUM --write=none $MYSQLD_DATADIR/ibdata1
+perl;
+foreach (glob("$ENV{MYSQLD_DATADIR}/undo*")) {
+	system("$ENV{INNOCHECKSUM} --no-check --write=NONE $_")
+}
+foreach (glob("$ENV{MYSQLD_DATADIR}/*/*.ibd")) {
+	system("$ENV{INNOCHECKSUM} --no-check --write=NONE $_")
+}
+EOF
+
+let $restart_parameters = restart: --innodb_checksum_algorithm=strict_none --default_storage_engine=InnoDB;
+--source include/start_mysqld.inc
+
+# check the table status is GOOD with DML
+INSERT INTO tab1 (pk, linestring_key, linestring_nokey)
+VALUES (4, ST_GeomFromText('MULTIPOINT(0 0,5 5,10 10,20 20) '), ST_GeomFromText('MULTIPOINT(0 0,5 5,10 10,20 20) '));
+
+# load the with repeat function
+SET @col_1 = repeat('m', 5);
+SET @col_2 = repeat('n', 20);
+SET @col_3 = repeat('o', 100);
+SET @col_4 = repeat('p', 100);
+SET @col_5 = repeat('q', 100);
+SET @col_6 = repeat('r', 100);
+
+# check the table status is GOOD with DML
+let $i = 8;
+eval INSERT INTO tab2(col_1,col_2,col_3,col_4,col_5,col_6,col_7)
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,$i);
+
+# check the records from table
+-- disable_result_log
+SELECT pk,ST_AsText(linestring_key),ST_AsText(linestring_nokey)
+FROM tab1 ORDER BY pk;
+
+--disable_result_log
+SELECT * FROM tab2 ORDER BY col_7;
+--enable_result_log
+
+--echo # stop the server
+--source include/shutdown_mysqld.inc
+
+--echo [2]: Check the page type summary with shortform for tab1.ibd
+--replace_regex /File.*.ibd/File::tab1.ibd/ /[0-9]+/#/
+--exec $INNOCHECKSUM -S $MYSQLD_DATADIR/test/tab1.ibd 2>$MYSQLTEST_VARDIR/tmp/page_summary_short.txt
+
+--echo [3]: Check the page type summary with longform for tab1.ibd
+--replace_regex /File.*.ibd/File::tab1.ibd/ /[0-9]+/#/
+--exec $INNOCHECKSUM --page-type-summary  $MYSQLD_DATADIR/test/tab1.ibd 2>$MYSQLTEST_VARDIR/tmp/page_summary_long.txt
+
+--remove_file $MYSQLTEST_VARDIR/tmp/page_summary_short.txt
+--remove_file $MYSQLTEST_VARDIR/tmp/page_summary_long.txt
+--echo [4]: Page type dump for  with longform for tab1.ibd
+--exec $INNOCHECKSUM --page-type-dump $MYSQLTEST_VARDIR/tmp/dump.txt $MYSQLD_DATADIR/test/tab1.ibd
+
+perl;
+use strict;
+use warnings;
+use File::Copy;
+my $dir = $ENV{'MYSQLTEST_VARDIR'};
+opendir(DIR, $dir) or die $!;
+my $file= 'dump.txt';
+# open file in write mode
+open IN_FILE,"<", "$dir/tmp/$file" or die $!;
+open OUT_FILE, ">", "$dir/tmp/tmpfile" or die $!;
+while(<IN_FILE>)
+{
+ # Replace the intergers to # and complete file patht to file name only.
+ $_=~ s/Filename.+/Filename::tab1.ibd/g;
+ $_=~ s/\d+/#/g;
+ print OUT_FILE $_;
+}
+close(IN_FILE);
+close(OUT_FILE);
+# move the new content from tmp file to the orginal file.
+move ("$dir/tmp/tmpfile", "$dir/tmp/$file");
+closedir(DIR);
+EOF
+
+--echo # Print the contents stored in dump.txt
+cat_file $MYSQLTEST_VARDIR/tmp/dump.txt;
+--remove_file $MYSQLTEST_VARDIR/tmp/dump.txt
+
+# Page type dump for ibdata1
+--exec $INNOCHECKSUM -v --page-type-dump $MYSQLTEST_VARDIR/tmp/dump.txt $MYSQLD_DATADIR/ibdata1
+--file_exists $MYSQLTEST_VARDIR/tmp/dump.txt
+--remove_file $MYSQLTEST_VARDIR/tmp/dump.txt
+
+--echo [5]: Page type dump for with shortform for tab1.ibd
+--exec $INNOCHECKSUM -D $MYSQLTEST_VARDIR/tmp/dump.txt $MYSQLD_DATADIR/test/tab1.ibd
+
+perl;
+use strict;
+use warnings;
+use File::Copy;
+my $dir = $ENV{'MYSQLTEST_VARDIR'};
+opendir(DIR, $dir) or die $!;
+my $file= 'dump.txt';
+# open file in write mode
+open IN_FILE,"<", "$dir/tmp/$file" or die $!;
+open OUT_FILE, ">", "$dir/tmp/tmpfile" or die $!;
+while(<IN_FILE>)
+{
+ # Replace teh intergers to # and complete file patht to file name only.
+ $_=~ s/Filename.+/Filename::tab1.ibd/g;
+ $_=~ s/\d+/#/g;
+ print OUT_FILE $_;
+}
+close(IN_FILE);
+close(OUT_FILE);
+# move the new content from tmp file to the orginal file.
+move ("$dir/tmp/tmpfile", "$dir/tmp/$file");
+closedir(DIR);
+EOF
+
+# Print the contents stored in dump.txt
+cat_file $MYSQLTEST_VARDIR/tmp/dump.txt;
+--remove_file $MYSQLTEST_VARDIR/tmp/dump.txt
+
+--echo [6]: check the valid lower bound values for option
+--echo # allow-mismatches,page,start-page,end-page
+--exec $INNOCHECKSUM  --allow-mismatches=0 $MYSQLD_DATADIR/test/tab1.ibd
+--exec $INNOCHECKSUM  -a 0 $MYSQLD_DATADIR/test/tab1.ibd
+--exec $INNOCHECKSUM  --page=0 $MYSQLD_DATADIR/test/tab1.ibd
+--exec $INNOCHECKSUM  -p 0 $MYSQLD_DATADIR/test/tab1.ibd
+--exec $INNOCHECKSUM  --start-page=0 $MYSQLD_DATADIR/test/tab1.ibd
+--exec $INNOCHECKSUM  -s 0 $MYSQLD_DATADIR/test/tab1.ibd
+--exec $INNOCHECKSUM  --end-page=0 $MYSQLD_DATADIR/test/tab1.ibd
+--exec $INNOCHECKSUM  -e 0 $MYSQLD_DATADIR/test/tab1.ibd
+
+--echo [7]: check the negative values for option
+--echo # allow-mismatches,page,start-page,end-page.
+--echo # They will reset to zero for negative values.
+--echo # check the invalid lower bound values
+--exec $INNOCHECKSUM  --allow-mismatches=-1 $MYSQLD_DATADIR/test/tab1.ibd
+--exec $INNOCHECKSUM  -a -1 $MYSQLD_DATADIR/test/tab1.ibd
+--exec $INNOCHECKSUM  --page=-1 $MYSQLD_DATADIR/test/tab1.ibd
+--exec $INNOCHECKSUM  -p -1 $MYSQLD_DATADIR/test/tab1.ibd
+--exec $INNOCHECKSUM  --start-page=-1 $MYSQLD_DATADIR/test/tab1.ibd
+--exec $INNOCHECKSUM  -s -1 $MYSQLD_DATADIR/test/tab1.ibd
+--exec $INNOCHECKSUM  --end-page=-1 $MYSQLD_DATADIR/test/tab1.ibd
+--exec $INNOCHECKSUM  -e -1 $MYSQLD_DATADIR/test/tab1.ibd
+
+--echo [8]: check the valid upper bound values for
+--echo # both short and long options "allow-mismatches" and "end-page"
+
+--exec $INNOCHECKSUM  --allow-mismatches=18446744073709551615 $MYSQLD_DATADIR/test/tab1.ibd
+--exec $INNOCHECKSUM  -a 18446744073709551615 $MYSQLD_DATADIR/test/tab1.ibd
+--exec $INNOCHECKSUM  --end-page=18446744073709551615 $MYSQLD_DATADIR/test/tab1.ibd
+--exec $INNOCHECKSUM  -e 18446744073709551615 $MYSQLD_DATADIR/test/tab1.ibd
+
+--echo [9]: check the both short and long options "page" and "start-page" when
+--echo # seek value is larger than file size.
+--error 1
+--exec $INNOCHECKSUM --page=18446744073709551615 $MYSQLD_DATADIR/test/tab1.ibd 2> $SEARCH_FILE
+let SEARCH_PATTERN= Error: Unable to seek to necessary offset: Invalid argument;
+--source include/search_pattern_in_file.inc
+
+--error 1
+--exec $INNOCHECKSUM -p 18446744073709551615 $MYSQLD_DATADIR/test/tab1.ibd 2> $SEARCH_FILE
+let SEARCH_PATTERN= Error: Unable to seek to necessary offset: Invalid argument;
+--source include/search_pattern_in_file.inc
+
+--error 1
+--exec $INNOCHECKSUM --start-page=18446744073709551615 $MYSQLD_DATADIR/test/tab1.ibd 2> $SEARCH_FILE
+let SEARCH_PATTERN= Error: Unable to seek to necessary offset: Invalid argument;
+--source include/search_pattern_in_file.inc
+
+--error 1
+--exec $INNOCHECKSUM -s 18446744073709551615 $MYSQLD_DATADIR/test/tab1.ibd 2> $SEARCH_FILE
+let SEARCH_PATTERN= Error: Unable to seek to necessary offset: Invalid argument;
+--source include/search_pattern_in_file.inc
+
+--echo [34]: check the invalid upper bound values for options, allow-mismatches, end-page, start-page and page.
+--echo # innochecksum will fail with error code: 1
+--error 1
+--exec $INNOCHECKSUM --allow-mismatches=18446744073709551616 $MYSQLD_DATADIR/test/tab1.ibd 2> $SEARCH_FILE
+let SEARCH_PATTERN= Incorrect unsigned integer value: '18446744073709551616';
+--source include/search_pattern_in_file.inc
+
+--error 1
+--exec $INNOCHECKSUM -a 18446744073709551616 $MYSQLD_DATADIR/test/tab1.ibd 2> $SEARCH_FILE
+let SEARCH_PATTERN= Incorrect unsigned integer value: '18446744073709551616';
+--source include/search_pattern_in_file.inc
+
+--error 1
+--exec $INNOCHECKSUM --end-page=18446744073709551616 $MYSQLD_DATADIR/test/tab1.ibd 2> $SEARCH_FILE
+let SEARCH_PATTERN= Incorrect unsigned integer value: '18446744073709551616';
+--source include/search_pattern_in_file.inc
+
+--error 1
+--exec $INNOCHECKSUM -e 18446744073709551616 $MYSQLD_DATADIR/test/tab1.ibd 2> $SEARCH_FILE
+let SEARCH_PATTERN= Incorrect unsigned integer value: '18446744073709551616';
+--source include/search_pattern_in_file.inc
+
+--error 1
+--exec $INNOCHECKSUM --page=18446744073709551616 $MYSQLD_DATADIR/test/tab1.ibd 2> $SEARCH_FILE
+let SEARCH_PATTERN= Incorrect unsigned integer value: '18446744073709551616';
+--source include/search_pattern_in_file.inc
+
+--error 1
+--exec $INNOCHECKSUM -p 18446744073709551616 $MYSQLD_DATADIR/test/tab1.ibd 2> $SEARCH_FILE
+let SEARCH_PATTERN= Incorrect unsigned integer value: '18446744073709551616';
+--source include/search_pattern_in_file.inc
+
+--error 1
+--exec $INNOCHECKSUM --start-page=18446744073709551616 $MYSQLD_DATADIR/test/tab1.ibd 2> $SEARCH_FILE
+let SEARCH_PATTERN= Incorrect unsigned integer value: '18446744073709551616';
+--source include/search_pattern_in_file.inc
+
+--error 1
+--exec $INNOCHECKSUM -s 18446744073709551616 $MYSQLD_DATADIR/test/tab1.ibd 2> $SEARCH_FILE
+let SEARCH_PATTERN= Incorrect unsigned integer value: '18446744073709551616';
+--source include/search_pattern_in_file.inc
+--remove_file $SEARCH_FILE
+
+# Cleanup
+--echo # Restart the server
+--source include/start_mysqld.inc
+
+DROP TABLE tab1;
+DROP TABLE tab2;
+SET GLOBAL innodb_file_per_table=default;
diff --git a/mysql-test/suite/innodb_zip/t/innodb_bug36169.opt b/mysql-test/suite/innodb_zip/t/innodb_bug36169.opt
new file mode 100644
index 00000000000..3a4e594f382
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/t/innodb_bug36169.opt
@@ -0,0 +1 @@
+--innodb_large_prefix=ON
\ No newline at end of file
diff --git a/mysql-test/suite/innodb_zip/t/innodb_bug36169.test b/mysql-test/suite/innodb_zip/t/innodb_bug36169.test
index 6426bd683ae..1d82b95a602 100644
--- a/mysql-test/suite/innodb_zip/t/innodb_bug36169.test
+++ b/mysql-test/suite/innodb_zip/t/innodb_bug36169.test
@@ -4,6 +4,8 @@
 # http://bugs.mysql.com/36169
 #
 
+call mtr.add_suppression("Cannot add field .* in table .* because after adding it, the row size is .* which is greater than maximum allowed size .* for a record on index leaf page.");
+
 let $file_format=`select @@innodb_file_format`;
 let $file_per_table=`select @@innodb_file_per_table`;
 SET GLOBAL innodb_file_format='Barracuda';
diff --git a/mysql-test/suite/innodb_zip/t/innodb_bug53591.test b/mysql-test/suite/innodb_zip/t/innodb_bug53591.test
index 8bc461719b8..6c80165f6eb 100644
--- a/mysql-test/suite/innodb_zip/t/innodb_bug53591.test
+++ b/mysql-test/suite/innodb_zip/t/innodb_bug53591.test
@@ -5,7 +5,7 @@ let $file_per_table=`select @@innodb_file_per_table`;
 
 SET GLOBAL innodb_file_format='Barracuda';
 SET GLOBAL innodb_file_per_table=on;
-
+SET GLOBAL innodb_strict_mode=on;
 set old_alter_table=0;
 
 CREATE TABLE bug53591(a text charset utf8 not null)
@@ -20,3 +20,4 @@ DROP TABLE bug53591;
 
 EVAL SET GLOBAL innodb_file_format=$file_format;
 EVAL SET GLOBAL innodb_file_per_table=$file_per_table;
+SET GLOBAL innodb_strict_mode=DEFAULT;
diff --git a/mysql-test/suite/innodb_zip/t/innodb_index_large_prefix.test b/mysql-test/suite/innodb_zip/t/innodb_index_large_prefix.test
index 17f82f88fef..8af2bc7ffc7 100644
--- a/mysql-test/suite/innodb_zip/t/innodb_index_large_prefix.test
+++ b/mysql-test/suite/innodb_zip/t/innodb_index_large_prefix.test
@@ -4,6 +4,8 @@
 --source include/have_innodb_16k.inc
 SET default_storage_engine=InnoDB;
 
+call mtr.add_suppression("Cannot add field .* in table .* because after adding it, the row size is .* which is greater than maximum allowed size (.*) for a record on index leaf page.");
+
 let $innodb_file_format_orig=`select @@innodb_file_format`;
 let $innodb_file_per_table_orig=`select @@innodb_file_per_table`;
 let $innodb_large_prefix_orig=`select @@innodb_large_prefix`;
@@ -11,6 +13,7 @@ let $innodb_large_prefix_orig=`select @@innodb_large_prefix`;
 set global innodb_file_format="Barracuda";
 set global innodb_file_per_table=1;
 set global innodb_large_prefix=1;
+set global innodb_strict_mode=1;
 
 -- echo ### Test 1 ###
 # Create a table of DYNAMIC format, with a primary index of 1000 bytes in
@@ -365,9 +368,9 @@ drop table worklog5743;
 -- echo ### Test 6 ###
 # Create a table with old format, and the limit is 768 bytes.
 -- error ER_INDEX_COLUMN_TOO_LONG
-create table worklog5743(a TEXT not null, primary key (a(1000)));
+create table worklog5743(a TEXT not null, primary key (a(1000))) row_format=COMPACT;
 
-create table worklog5743(a TEXT);
+create table worklog5743(a TEXT) row_format=COMPACT;
 
 # Excercise the column length check in ha_innobase::add_index()
 -- error ER_INDEX_COLUMN_TOO_LONG
@@ -428,6 +431,7 @@ drop table worklog5743;
 eval SET GLOBAL innodb_file_format=$innodb_file_format_orig;
 eval SET GLOBAL innodb_file_per_table=$innodb_file_per_table_orig;
 eval SET GLOBAL innodb_large_prefix=$innodb_large_prefix_orig;
+SET GLOBAL innodb_strict_mode = DEFAULT;
 --connection con1
 --disconnect con1
 --source include/wait_until_disconnected.inc
diff --git a/mysql-test/suite/innodb_zip/t/innodb_prefix_index_liftedlimit.test b/mysql-test/suite/innodb_zip/t/innodb_prefix_index_liftedlimit.test
deleted file mode 100644
index 1c02cafa47e..00000000000
--- a/mysql-test/suite/innodb_zip/t/innodb_prefix_index_liftedlimit.test
+++ /dev/null
@@ -1,1371 +0,0 @@
-######## suite/innodb/t/innodb_prefix_index_liftedlimit.test ##########
-#                                                                    #
-# Testcase for worklog WL#5743: Lift the limit of index key prefixes #
-# Accorrding to WL#5743 - prefix index limit is increased from 767   #
-# to 3072 for innodb. This change is applicable with Barracuda file  #
-# format.                                                            #
-# All sub-test in this file focus on prefix index along with other   #
-# operations                                                         #
-#                                                                    #
-#                                                                    #
-# Creation:                                                          #
-# 2011-05-19 Implemented this test as part of WL#5743                #
-#                                                                    #
-######################################################################
-
---source include/have_innodb.inc
---source include/have_innodb_16k.inc
-
-# Save innodb variables
---disable_query_log
-let $innodb_file_format_orig = `select @@innodb_file_format`;
-let $innodb_file_per_table_orig = `select @@innodb_file_per_table`;
-let $innodb_large_prefix_orig = `select @@innodb_large_prefix`;
---enable_query_log
-
-# Set Innodb file format as feature works for Barracuda file format
-set global innodb_file_format="Barracuda";
-set global innodb_file_per_table=1;
-set global innodb_large_prefix=1;
-
--- disable_warnings
-DROP TABLE IF EXISTS worklog5743;
--- enable_warnings
-#------------------------------------------------------------------------------
-# Prefix index with VARCHAR data type , primary/secondary index and DML ops
-CREATE TABLE worklog5743 (
-col_1_varchar VARCHAR (4000) , col_2_varchar VARCHAR (4000) ,
-PRIMARY KEY (col_1_varchar(3072))
-) ROW_FORMAT=DYNAMIC, engine = innodb;
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000));
-CREATE INDEX prefix_idx ON worklog5743(col_1_varchar (3072));
-# check IS
-SELECT COLUMN_NAME,INDEX_NAME,SUB_PART,INDEX_TYPE FROM
-INFORMATION_SCHEMA.STATISTICS WHERE table_name = 'worklog5743' ;
-INSERT INTO worklog5743 VALUES(REPEAT("b", 4000) , REPEAT("p", 4000));
-SELECT col_1_varchar = REPEAT("a", 4000) , col_2_varchar = REPEAT("o", 4000)
-FROM worklog5743;
-UPDATE worklog5743 SET col_1_varchar = REPEAT("c", 4000)
-WHERE col_1_varchar = REPEAT("a", 4000) AND col_2_varchar = REPEAT("o", 4000);
-SELECT col_1_varchar = REPEAT("c", 4000) FROM worklog5743
-WHERE col_1_varchar = REPEAT("c", 4000) AND col_2_varchar = REPEAT("o", 4000);
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-DELETE FROM worklog5743 WHERE col_1_varchar = REPEAT("b", 4000);
-SELECT col_1_varchar = REPEAT("c", 4000) FROM worklog5743;
---error ER_INDEX_COLUMN_TOO_LONG
-ALTER TABLE worklog5743 ROW_FORMAT=REDUNDANT;
---error ER_INDEX_COLUMN_TOO_LONG
-ALTER TABLE worklog5743 ROW_FORMAT=COMPACT;
-ALTER TABLE worklog5743 ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=16;
-DROP TABLE worklog5743;
-
-
-#------------------------------------------------------------------------------
-# Prefix index with TEXT data type , primary/secondary index and DML ops
-CREATE TABLE worklog5743 (
-col_1_text TEXT (4000) , col_2_text TEXT (4000) ,
-PRIMARY KEY (col_1_text(3072))
-) ROW_FORMAT=DYNAMIC, engine = innodb;
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000));
-CREATE INDEX prefix_idx ON worklog5743(col_1_text (3072));
-# check IS
-SELECT COLUMN_NAME,INDEX_NAME,SUB_PART,INDEX_TYPE FROM
-INFORMATION_SCHEMA.STATISTICS WHERE table_name = 'worklog5743' ;
-INSERT INTO worklog5743 VALUES(REPEAT("b", 4000) , REPEAT("p", 4000));
-SELECT col_1_text = REPEAT("a", 4000) , col_2_text = REPEAT("o", 4000)
-FROM worklog5743;
-UPDATE worklog5743 SET col_1_text = REPEAT("c", 4000)
-WHERE col_1_text = REPEAT("a", 4000) AND col_2_text = REPEAT("o", 4000);
-SELECT col_1_text = REPEAT("c", 4000) FROM worklog5743
-WHERE col_1_text = REPEAT("c", 4000) AND col_2_text = REPEAT("o", 4000);
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-DELETE FROM worklog5743 WHERE col_1_text = REPEAT("b", 4000);
-SELECT col_1_text = REPEAT("c", 4000) FROM worklog5743;
-DROP TABLE worklog5743;
-
-#------------------------------------------------------------------------------
-# Prefix index with MEDIUMTEXT data type , primary/secondary index and DML ops
-CREATE TABLE worklog5743 (
-col_1_mediumtext MEDIUMTEXT , col_2_mediumtext MEDIUMTEXT ,
-PRIMARY KEY (col_1_mediumtext(3072))
-) ROW_FORMAT=DYNAMIC, engine = innodb;
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000));
-CREATE INDEX prefix_idx ON worklog5743(col_1_mediumtext (3072));
-INSERT INTO worklog5743 VALUES(REPEAT("b", 4000) , REPEAT("p", 4000));
-SELECT col_1_mediumtext = REPEAT("a", 4000),col_2_mediumtext = REPEAT("o", 4000)
-FROM worklog5743;
-UPDATE worklog5743 SET col_1_mediumtext = REPEAT("c", 4000)
-WHERE col_1_mediumtext = REPEAT("a", 4000)
-AND col_2_mediumtext = REPEAT("o", 4000);
-SELECT col_1_mediumtext = REPEAT("c", 4000) FROM worklog5743
-WHERE col_1_mediumtext = REPEAT("c", 4000)
-AND col_2_mediumtext = REPEAT("o", 4000);
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-DELETE FROM worklog5743 WHERE col_1_mediumtext = REPEAT("b", 4000);
-SELECT col_1_mediumtext = REPEAT("c", 4000) FROM worklog5743;
-DROP TABLE worklog5743;
-
-
-#------------------------------------------------------------------------------
-# Prefix index with LONGTEXT data type , primary/secondary index and DML ops
-CREATE TABLE worklog5743 (
-col_1_longtext LONGTEXT , col_2_longtext LONGTEXT ,
-PRIMARY KEY (col_1_longtext(3072))
-) ROW_FORMAT=DYNAMIC, engine = innodb;
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000));
-CREATE INDEX prefix_idx ON worklog5743(col_1_longtext (3072));
-INSERT INTO worklog5743 VALUES(REPEAT("b", 4000) , REPEAT("p", 4000));
-SELECT col_1_longtext = REPEAT("a", 4000) , col_2_longtext = REPEAT("o", 4000)
-FROM worklog5743;
-UPDATE worklog5743 SET col_1_longtext = REPEAT("c", 4000)
-WHERE col_1_longtext = REPEAT("a", 4000)
-AND col_2_longtext = REPEAT("o", 4000);
-SELECT col_1_longtext = REPEAT("c", 4000) FROM worklog5743
-WHERE col_1_longtext = REPEAT("c", 4000)
-AND col_2_longtext = REPEAT("o", 4000);
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-DELETE FROM worklog5743 WHERE col_1_longtext = REPEAT("b", 4000);
-SELECT col_1_longtext = REPEAT("c", 4000) FROM worklog5743;
-DROP TABLE worklog5743;
-
-
-#------------------------------------------------------------------------------
-# Prefix index with BLOB data type , primary/secondary index and DML ops
-CREATE TABLE worklog5743 (
-col_1_blob BLOB (4000) , col_2_blob BLOB (4000) ,
-PRIMARY KEY (col_1_blob(3072))
-) ROW_FORMAT=DYNAMIC, engine = innodb;
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000));
-CREATE INDEX prefix_idx ON worklog5743(col_1_blob (3072));
-# check IS
-SELECT COLUMN_NAME,INDEX_NAME,SUB_PART,INDEX_TYPE FROM
-INFORMATION_SCHEMA.STATISTICS WHERE table_name = 'worklog5743' ;
-INSERT INTO worklog5743 VALUES(REPEAT("b", 4000) , REPEAT("p", 4000));
-SELECT col_1_blob = REPEAT("a", 4000) , col_2_blob = REPEAT("o", 4000)
-FROM worklog5743;
-UPDATE worklog5743 SET col_1_blob = REPEAT("c", 4000)
-WHERE col_1_blob = REPEAT("a", 4000) AND col_2_blob = REPEAT("o", 4000);
-SELECT col_1_blob = REPEAT("c", 4000) FROM worklog5743
-WHERE col_1_blob = REPEAT("c", 4000) AND col_2_blob = REPEAT("o", 4000);
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-DELETE FROM worklog5743 WHERE col_1_blob = REPEAT("b", 4000);
-SELECT col_1_blob = REPEAT("c", 4000) FROM worklog5743;
-DROP TABLE worklog5743;
-
-
-#------------------------------------------------------------------------------
-# Prefix index with MEDIUMBLOB data type , primary/secondary index and DML ops
-CREATE TABLE worklog5743 (
-col_1_mediumblob MEDIUMBLOB  , col_2_mediumblob MEDIUMBLOB  ,
-PRIMARY KEY (col_1_mediumblob(3072))
-) ROW_FORMAT=DYNAMIC, engine = innodb;
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000));
-CREATE INDEX prefix_idx ON worklog5743(col_1_mediumblob (3072));
-INSERT INTO worklog5743 VALUES(REPEAT("b", 4000) , REPEAT("p", 4000));
-SELECT col_1_mediumblob = REPEAT("a", 4000),col_2_mediumblob = REPEAT("o", 4000)
-FROM worklog5743;
-UPDATE worklog5743 SET col_1_mediumblob = REPEAT("c", 4000)
-WHERE col_1_mediumblob = REPEAT("a", 4000)
-AND col_2_mediumblob = REPEAT("o", 4000);
-SELECT col_1_mediumblob = REPEAT("c", 4000) FROM worklog5743
-WHERE col_1_mediumblob = REPEAT("c", 4000)
-AND col_2_mediumblob = REPEAT("o", 4000);
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-DELETE FROM worklog5743 WHERE col_1_mediumblob = REPEAT("b", 4000);
-SELECT col_1_mediumblob = REPEAT("c", 4000) FROM worklog5743;
-DROP TABLE worklog5743;
-
-#------------------------------------------------------------------------------
-# Prefix index with LONGBLOB data type , primary/secondary index and DML ops
-CREATE TABLE worklog5743 (
-col_1_longblob LONGBLOB  , col_2_longblob LONGBLOB  ,
-PRIMARY KEY (col_1_longblob(3072))
-) ROW_FORMAT=DYNAMIC, engine = innodb;
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000));
-CREATE INDEX prefix_idx ON worklog5743(col_1_longblob (3072));
-INSERT INTO worklog5743 VALUES(REPEAT("b", 4000) , REPEAT("p", 4000));
-SELECT col_1_longblob = REPEAT("a", 4000) , col_2_longblob = REPEAT("o", 4000)
-FROM worklog5743;
-UPDATE worklog5743 SET col_1_longblob = REPEAT("c", 4000)
-WHERE col_1_longblob = REPEAT("a", 4000)
-AND col_2_longblob = REPEAT("o", 4000);
-SELECT col_1_longblob = REPEAT("c", 4000) FROM worklog5743
-WHERE col_1_longblob = REPEAT("c", 4000)
-AND col_2_longblob = REPEAT("o", 4000);
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-DELETE FROM worklog5743 WHERE col_1_longblob = REPEAT("b", 4000);
-SELECT col_1_longblob = REPEAT("c", 4000) FROM worklog5743;
-DROP TABLE worklog5743;
-
-#------------------------------------------------------------------------------
-# Prefix index with VARBINARY data type , primary/secondary index and DML ops
-CREATE TABLE worklog5743 (
-col_1_varbinary VARBINARY (4000) , col_2_varbinary VARBINARY (4000) ,
-PRIMARY KEY (col_1_varbinary(3072))
-) ROW_FORMAT=DYNAMIC, engine = innodb;
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000));
-CREATE INDEX prefix_idx ON worklog5743(col_1_varbinary (3072));
-INSERT INTO worklog5743 VALUES(REPEAT("b", 4000) , REPEAT("p", 4000));
-SELECT col_1_varbinary = REPEAT("a", 4000) , col_2_varbinary = REPEAT("o", 4000)
-FROM worklog5743;
-UPDATE worklog5743 SET col_1_varbinary = REPEAT("c", 4000)
-WHERE col_1_varbinary = REPEAT("a", 4000)
-AND col_2_varbinary = REPEAT("o", 4000);
-SELECT col_1_varbinary = REPEAT("c", 4000) FROM worklog5743
-WHERE col_1_varbinary = REPEAT("c", 4000)
-AND col_2_varbinary = REPEAT("o", 4000);
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-DELETE FROM worklog5743 WHERE col_1_varbinary = REPEAT("b", 4000);
-SELECT col_1_varbinary = REPEAT("c", 4000) FROM worklog5743;
-DROP TABLE worklog5743;
-
-#------------------------------------------------------------------------------
-# Prefix index with CHAR data type , composite index and DML ops
-CREATE TABLE worklog5743 (col_1_char CHAR (255) , col_2_char CHAR (255),
-col_3_char CHAR (255), col_4_char CHAR (255),col_5_char CHAR (255),
-col_6_char CHAR (255), col_7_char CHAR (255),col_8_char CHAR (255),
-col_9_char CHAR (255), col_10_char CHAR (255),col_11_char CHAR (255),
-col_12_char CHAR (255), col_13_char CHAR (255),col_14_char CHAR (255)
-) ROW_FORMAT=DYNAMIC, engine = innodb;
-INSERT INTO worklog5743 VALUES(REPEAT("a", 255) , REPEAT("o", 255),
-REPEAT("a", 255) , REPEAT("o", 255),REPEAT("a", 255),
-REPEAT("a", 255) , REPEAT("o", 255),REPEAT("a", 255),
-REPEAT("a", 255) , REPEAT("o", 255),REPEAT("a", 255),
-REPEAT("a", 255) , REPEAT("o", 255),REPEAT("a", 255)
-);
-# Create index with total prefix index length = 3072
-CREATE INDEX prefix_idx ON worklog5743(col_1_char(250),col_2_char(250),
-col_3_char(250),col_4_char(250),col_5_char(250),col_6_char(250),
-col_7_char(250),col_8_char(250),col_9_char(250),col_10_char(250),
-col_11_char(250),col_12_char(250),col_13_char(72)
-);
-INSERT INTO worklog5743 VALUES(REPEAT("b", 255) , REPEAT("p", 255),
-REPEAT("a", 255) , REPEAT("o", 255),REPEAT("a", 255),
-REPEAT("a", 255) , REPEAT("o", 255),REPEAT("a", 255),
-REPEAT("a", 255) , REPEAT("o", 255),REPEAT("a", 255),
-REPEAT("a", 255) , REPEAT("o", 255),REPEAT("a", 255)
-);
-SELECT col_1_char = REPEAT("a", 255) , col_2_char = REPEAT("o", 255) FROM worklog5743;
-UPDATE worklog5743 SET col_1_char = REPEAT("c", 255)
-WHERE col_1_char = REPEAT("a", 255) AND col_2_char = REPEAT("o", 255);
-SELECT col_1_char = REPEAT("c", 255) FROM worklog5743
-WHERE col_1_char = REPEAT("c", 255) AND col_2_char = REPEAT("o", 255);
-INSERT INTO worklog5743 VALUES(REPEAT("a", 255) , REPEAT("o", 255),
-REPEAT("a", 255) , REPEAT("o", 255),REPEAT("a", 255),
-REPEAT("a", 255) , REPEAT("o", 255),REPEAT("a", 255),
-REPEAT("a", 255) , REPEAT("o", 255),REPEAT("a", 255),
-REPEAT("a", 255) , REPEAT("o", 255),REPEAT("a", 255)
-);
-DELETE FROM worklog5743 WHERE col_1_char = REPEAT("b", 255);
-SELECT col_1_char = REPEAT("c", 255) FROM worklog5743;
-DROP TABLE worklog5743;
-
-#------------------------------------------------------------------------------
-# Prefix index with BINARY data type , composite index and DML ops
-CREATE TABLE worklog5743 (col_1_binary BINARY (255) , col_2_binary BINARY (255),
-col_3_binary BINARY(255),col_4_binary BINARY (255),col_5_binary BINARY (255),
-col_6_binary BINARY(255),col_7_binary BINARY (255),col_8_binary BINARY (255),
-col_9_binary BINARY(255),col_10_binary BINARY (255),col_11_binary BINARY (255),
-col_12_binary BINARY(255),col_13_binary BINARY (255),col_14_binary BINARY (255)
-) ROW_FORMAT=DYNAMIC, engine = innodb;
-INSERT INTO worklog5743 VALUES(REPEAT("a", 255) , REPEAT("o", 255),
-REPEAT("a", 255) , REPEAT("o", 255), REPEAT("a", 255),
-REPEAT("a", 255) , REPEAT("o", 255), REPEAT("a", 255),
-REPEAT("a", 255) , REPEAT("o", 255), REPEAT("a", 255),
-REPEAT("a", 255) , REPEAT("o", 255), REPEAT("a", 255)
-);
-# Create index with total prefix index length = 3072
-CREATE INDEX prefix_idx ON worklog5743(col_1_binary (250),col_2_binary (250),
-col_3_binary (250),col_4_binary (250),col_5_binary (250),
-col_6_binary (250),col_7_binary (250),col_8_binary (250),
-col_9_binary (250),col_10_binary (250),col_11_binary (250),
-col_12_binary (250),col_13_binary (72)
-);
-INSERT INTO worklog5743 VALUES(REPEAT("b", 255) , REPEAT("p", 255),
-REPEAT("a", 255) , REPEAT("o", 255), REPEAT("a", 255),
-REPEAT("a", 255) , REPEAT("o", 255), REPEAT("a", 255),
-REPEAT("a", 255) , REPEAT("o", 255), REPEAT("a", 255),
-REPEAT("a", 255) , REPEAT("o", 255), REPEAT("a", 255)
-);
-SELECT col_1_binary = REPEAT("a", 255) , col_2_binary = REPEAT("o", 255) FROM worklog5743;
-UPDATE worklog5743 SET col_1_binary = REPEAT("c", 255)
-WHERE col_1_binary = REPEAT("a", 255)
-AND col_2_binary = REPEAT("o", 255);
-SELECT col_1_binary = REPEAT("c", 255) FROM worklog5743
-WHERE col_1_binary = REPEAT("c", 255)
-AND col_2_binary = REPEAT("o", 255);
-INSERT INTO worklog5743 VALUES(REPEAT("a", 255) , REPEAT("o", 255),
-REPEAT("a", 255) , REPEAT("o", 255), REPEAT("a", 255),
-REPEAT("a", 255) , REPEAT("o", 255), REPEAT("a", 255),
-REPEAT("a", 255) , REPEAT("o", 255), REPEAT("a", 255),
-REPEAT("a", 255) , REPEAT("o", 255), REPEAT("a", 255)
-);
-DELETE FROM worklog5743 WHERE col_1_binary = REPEAT("b", 255);
-SELECT col_1_binary = REPEAT("c", 255) FROM worklog5743;
-DROP TABLE worklog5743;
-
-#------------------------------------------------------------------------------
-# Prefix index with VARCHAR data type , primary/seconday index , DML ops
-# and COMPRESSED row format. KEY_BLOCK_SIZE is varied as 2 , 4 , 8.
-
-# With KEY_BLOCK_SIZE = 2,prefix index limit comes around ~948 for following
-CREATE TABLE worklog5743_key2 (
-col_1_varchar VARCHAR (4000) , col_2_varchar VARCHAR (4000) ,
-PRIMARY KEY (col_1_varchar(948))
-) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=2, engine = innodb;
-INSERT INTO worklog5743_key2 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000));
-#CREATE INDEX prefix_idx ON worklog5743_key2 (col_1_varchar (767));
-INSERT INTO worklog5743_key2 VALUES(REPEAT("b", 4000) , REPEAT("p", 4000));
-SELECT col_1_varchar  = REPEAT("a", 4000) , col_2_varchar = REPEAT("o", 4000)
-FROM worklog5743_key2;
-UPDATE worklog5743_key2 SET col_1_varchar = REPEAT("c", 4000)
-WHERE col_1_varchar = REPEAT("a", 4000) AND col_2_varchar = REPEAT("o", 4000);
-SELECT col_1_varchar = REPEAT("c", 4000) FROM worklog5743_key2
-WHERE col_2_varchar = REPEAT("o", 4000);
-INSERT INTO worklog5743_key2 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-DELETE FROM worklog5743_key2 WHERE col_1_varchar = REPEAT("b", 4000);
-SELECT col_1_varchar = REPEAT("c", 4000) FROM worklog5743_key2;
-DROP TABLE worklog5743_key2;
-
-# With KEY_BLOCK_SIZE = 4,prefix index limit comes around ~1964 for following
-CREATE TABLE worklog5743_key4 (
-col_1_varchar VARCHAR (4000) , col_2_varchar VARCHAR (4000) ,
-PRIMARY KEY (col_1_varchar(1964))
-) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4, engine = innodb;
-INSERT INTO worklog5743_key4 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000));
-#CREATE INDEX prefix_idx ON worklog5743_key4 (col_1_varchar (767));
-INSERT INTO worklog5743_key4 VALUES(REPEAT("b", 4000) , REPEAT("p", 4000));
-SELECT col_1_varchar  = REPEAT("a", 4000) , col_2_varchar = REPEAT("o", 4000)
-FROM worklog5743_key4;
-UPDATE worklog5743_key4 SET col_1_varchar = REPEAT("c", 4000)
-WHERE col_1_varchar = REPEAT("a", 4000)
-AND col_2_varchar = REPEAT("o", 4000);
-SELECT col_1_varchar = REPEAT("b", 3500) FROM worklog5743_key4
-WHERE col_1_varchar = REPEAT("c", 4000) AND col_2_varchar = REPEAT("o", 4000);
-INSERT INTO worklog5743_key4 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-DELETE FROM worklog5743_key4 WHERE col_1_varchar = REPEAT("b", 4000);
-SELECT col_1_varchar = REPEAT("c", 4000) FROM worklog5743_key4;
-DROP TABLE worklog5743_key4;
-
-# With KEY_BLOCK_SIZE = 8,prefix index limit comes around ~3072 for following
-CREATE TABLE worklog5743_key8 (
-col_1_varchar VARCHAR (4000) , col_2_varchar VARCHAR (4000) ,
-PRIMARY KEY (col_1_varchar(3072))
-) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8, engine = innodb;
-INSERT INTO worklog5743_key8 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000));
-#CREATE INDEX prefix_idx ON worklog5743_key8 (col_1_varchar (767));
-INSERT INTO worklog5743_key8 VALUES(REPEAT("b", 4000) , REPEAT("p", 4000));
-SELECT col_1_varchar  = REPEAT("a", 4000) , col_2_varchar = REPEAT("o", 4000)
-FROM worklog5743_key8;
-UPDATE worklog5743_key8 SET col_1_varchar = REPEAT("c", 4000)
-WHERE col_1_varchar = REPEAT("a", 4000) AND col_2_varchar = REPEAT("o", 4000);
-SELECT col_1_varchar = REPEAT("b", 3500) FROM worklog5743_key8
-WHERE col_1_varchar = REPEAT("c", 4000) AND col_2_varchar = REPEAT("o", 4000);
-INSERT INTO worklog5743_key8 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-DELETE FROM worklog5743_key8 WHERE col_1_varchar = REPEAT("b", 4000);
-SELECT col_1_varchar = REPEAT("c", 4000) FROM worklog5743_key8;
-DROP TABLE worklog5743_key8;
-
-# Prefix index with TEXT data type , primary/seconday index , DML ops
-# and COMPRESSED row format. KEY_BLOCK_SIZE is varied as 2 , 4 , 8.
-
-# With KEY_BLOCK_SIZE = 2,prefix index limit comes around ~948 for following
-CREATE TABLE worklog5743_key2 (
-col_1_text TEXT (4000) , col_2_text TEXT (4000) ,
-PRIMARY KEY (col_1_text(948))
-) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=2, engine = innodb;
-INSERT INTO worklog5743_key2 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000));
-#CREATE INDEX prefix_idx ON worklog5743_key2 (col_1_text (767));
-INSERT INTO worklog5743_key2 VALUES(REPEAT("b", 4000) , REPEAT("p", 4000));
-SELECT col_1_text  = REPEAT("a", 4000) , col_2_text = REPEAT("o", 4000)
-FROM worklog5743_key2;
-UPDATE worklog5743_key2 SET col_1_text = REPEAT("c", 4000)
-WHERE col_1_text = REPEAT("a", 4000) AND col_2_text = REPEAT("o", 4000);
-SELECT col_1_text = REPEAT("b", 3500) FROM worklog5743_key2
-WHERE col_1_text = REPEAT("c", 4000) AND col_2_text = REPEAT("o", 4000);
-INSERT INTO worklog5743_key2 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-DELETE FROM worklog5743_key2 WHERE col_1_text = REPEAT("b", 4000);
-SELECT col_1_text = REPEAT("c", 4000) FROM worklog5743_key2;
-DROP TABLE worklog5743_key2;
-
-# With KEY_BLOCK_SIZE = 4,prefix index limit comes around ~1964 for following
-CREATE TABLE worklog5743_key4 (
-col_1_text TEXT (4000) , col_2_text TEXT (4000) ,
-PRIMARY KEY (col_1_text(1964))
-) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4, engine = innodb;
-INSERT INTO worklog5743_key4 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000));
-#CREATE INDEX prefix_idx ON worklog5743_key4 (col_1_text (767));
-INSERT INTO worklog5743_key4 VALUES(REPEAT("b", 4000) , REPEAT("p", 4000));
-SELECT col_1_text  = REPEAT("a", 4000) , col_2_text = REPEAT("o", 4000)
-FROM worklog5743_key4;
-UPDATE worklog5743_key4 SET col_1_text = REPEAT("c", 4000)
-WHERE col_1_text = REPEAT("a", 4000) AND col_2_text = REPEAT("o", 4000);
-SELECT col_1_text = REPEAT("b", 3500) FROM worklog5743_key4
-WHERE col_1_text = REPEAT("c", 4000) AND col_2_text = REPEAT("o", 4000);
-INSERT INTO worklog5743_key4 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-DELETE FROM worklog5743_key4 WHERE col_1_text = REPEAT("b", 4000);
-SELECT col_1_text = REPEAT("c", 4000) FROM worklog5743_key4;
-DROP TABLE worklog5743_key4;
-
-# With KEY_BLOCK_SIZE = 8,prefix index limit comes around ~3072 for following
-CREATE TABLE worklog5743_key8 (
-col_1_text TEXT (4000) , col_2_text TEXT (4000) ,
-PRIMARY KEY (col_1_text(3072))
-) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8, engine = innodb;
-INSERT INTO worklog5743_key8 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000));
-#CREATE INDEX prefix_idx ON worklog5743_key8 (col_1_text (767));
-INSERT INTO worklog5743_key8 VALUES(REPEAT("b", 4000) , REPEAT("p", 4000));
-SELECT col_1_text  = REPEAT("a", 4000) , col_2_text = REPEAT("o", 4000)
-FROM worklog5743_key8;
-UPDATE worklog5743_key8 SET col_1_text = REPEAT("c", 4000)
-WHERE col_1_text = REPEAT("a", 4000) AND col_2_text = REPEAT("o", 4000);
-SELECT col_1_text = REPEAT("b", 3500) FROM worklog5743_key8
-WHERE col_1_text = REPEAT("c", 4000) AND col_2_text = REPEAT("o", 4000);
-INSERT INTO worklog5743_key8 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-DELETE FROM worklog5743_key8 WHERE col_1_text = REPEAT("b", 4000);
-SELECT col_1_text = REPEAT("c", 4000) FROM worklog5743_key8;
-DROP TABLE worklog5743_key8;
-
-# Prefix index with BLOB data type , primary/seconday index , DML ops
-# and COMPRESSED row format. KEY_BLOCK_SIZE is varied as 2 , 4 , 8.
-
-# With KEY_BLOCK_SIZE = 2,prefix index limit comes around ~948 for following
-CREATE TABLE worklog5743_key2 (
-col_1_blob BLOB (4000) , col_2_blob BLOB (4000) ,
-PRIMARY KEY (col_1_blob(948))
-) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=2, engine = innodb;
-INSERT INTO worklog5743_key2 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000));
-#CREATE INDEX prefix_idx ON worklog5743_key2 (col_1_blob (767));
-INSERT INTO worklog5743_key2 VALUES(REPEAT("b", 4000) , REPEAT("p", 4000));
-SELECT col_1_blob  = REPEAT("a", 4000) , col_2_blob = REPEAT("o", 4000)
-FROM worklog5743_key2;
-UPDATE worklog5743_key2 SET col_1_blob = REPEAT("c", 4000)
-WHERE col_1_blob = REPEAT("a", 4000) AND col_2_blob = REPEAT("o", 4000);
-SELECT col_1_blob = REPEAT("b", 3500) FROM worklog5743_key2
-WHERE col_1_blob = REPEAT("c", 4000) AND col_2_blob = REPEAT("o", 4000);
-INSERT INTO worklog5743_key2 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-DELETE FROM worklog5743_key2 WHERE col_1_blob = REPEAT("b", 4000);
-SELECT col_1_blob = REPEAT("c", 4000) FROM worklog5743_key2;
-DROP TABLE worklog5743_key2;
-
-# With KEY_BLOCK_SIZE = 4,prefix index limit comes around ~1964 for following
-CREATE TABLE worklog5743_key4 (
-col_1_blob BLOB (4000) , col_2_blob BLOB (4000) ,
-PRIMARY KEY (col_1_blob(1964))
-) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4, engine = innodb;
-INSERT INTO worklog5743_key4 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000));
-#CREATE INDEX prefix_idx ON worklog5743_key4 (col_1_blob (767));
-INSERT INTO worklog5743_key4 VALUES(REPEAT("b", 4000) , REPEAT("p", 4000));
-SELECT col_1_blob  = REPEAT("a", 4000) , col_2_blob = REPEAT("o", 4000)
-FROM worklog5743_key4;
-UPDATE worklog5743_key4 SET col_1_blob = REPEAT("c", 4000)
-WHERE col_1_blob = REPEAT("a", 4000) AND col_2_blob = REPEAT("o", 4000);
-SELECT col_1_blob = REPEAT("b", 3500) FROM worklog5743_key4
-WHERE col_1_blob = REPEAT("c", 4000) AND col_2_blob = REPEAT("o", 4000);
-INSERT INTO worklog5743_key4 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-DELETE FROM worklog5743_key4 WHERE col_1_blob = REPEAT("b", 4000);
-SELECT col_1_blob = REPEAT("c", 4000) FROM worklog5743_key4;
-DROP TABLE worklog5743_key4;
-
-# With KEY_BLOCK_SIZE = 8,prefix index limit comes around ~3072 for following
-CREATE TABLE worklog5743_key8 (
-col_1_blob BLOB (4000) , col_2_blob BLOB (4000) ,
-PRIMARY KEY (col_1_blob(3072))
-) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8, engine = innodb;
-INSERT INTO worklog5743_key8 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000));
-#CREATE INDEX prefix_idx ON worklog5743_key8 (col_1_blob (767));
-INSERT INTO worklog5743_key8 VALUES(REPEAT("b", 4000) , REPEAT("p", 4000));
-SELECT col_1_blob  = REPEAT("a", 4000) , col_2_blob = REPEAT("o", 4000)
-FROM worklog5743_key8;
-UPDATE worklog5743_key8 SET col_1_blob = REPEAT("c", 4000)
-WHERE col_1_blob = REPEAT("a", 4000) AND col_2_blob = REPEAT("o", 4000);
-SELECT col_1_blob = REPEAT("b", 3500) FROM worklog5743_key8
-WHERE col_1_blob = REPEAT("c", 4000) AND col_2_blob = REPEAT("o", 4000);
-INSERT INTO worklog5743_key8 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-DELETE FROM worklog5743_key8 WHERE col_1_blob = REPEAT("b", 4000);
-SELECT col_1_blob = REPEAT("c", 4000) FROM worklog5743_key8;
-DROP TABLE worklog5743_key8;
-
-
-#------------------------------------------------------------------------------
-# Create mutiple prefix index. We can not create prefix index length > 16K
-# as index is written in undo log page which of 16K size.
-# So we can create max 2 prefix index of length 3072 on table
-CREATE TABLE worklog5743 (
-col_1_varbinary VARBINARY (4000) , col_2_varchar VARCHAR (4000) ,
-col_3_text TEXT (4000), col_4_blob BLOB (4000), col_5_text TEXT (4000),
-col_6_varchar VARCHAR (4000), col_7_binary BINARY (255)
-) ROW_FORMAT=DYNAMIC, engine = innodb;
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000),
-REPEAT("a", 4000) , REPEAT("o", 4000), REPEAT("a", 4000),
-REPEAT("a", 4000) ,  REPEAT("a", 255)
-);
-
-# Update reports ER_UNDO_RECORD_TOO_BIG if we create more than 2 indexes.
-# Bug#12547647 - UPDATE LOGGING COULD EXCEED LOG PAGE SIZE
-CREATE INDEX prefix_idx1 ON worklog5743(col_1_varbinary (3072));
-CREATE INDEX prefix_idx2 ON worklog5743(col_2_varchar (3072));
-
-INSERT INTO worklog5743 VALUES(REPEAT("b", 4000) , REPEAT("p", 4000),
-REPEAT("a", 4000) , REPEAT("o", 4000), REPEAT("a", 4000),
-REPEAT("a", 4000) , REPEAT("a", 255)
-);
-SELECT col_1_varbinary = REPEAT("a", 4000) , col_2_varchar = REPEAT("o", 4000)
-FROM worklog5743;
-UPDATE worklog5743 SET col_1_varbinary = REPEAT("c", 4000)
-WHERE col_1_varbinary = REPEAT("a", 4000) AND col_2_varchar = REPEAT("o", 4000);
-SELECT col_1_varbinary = REPEAT("c", 4000) FROM worklog5743
-WHERE col_1_varbinary = REPEAT("c", 4000) AND col_2_varchar = REPEAT("o", 4000);
-
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000),
-REPEAT("a", 4000) , REPEAT("o", 4000), REPEAT("a", 4000),
-REPEAT("a", 4000) , REPEAT("a", 255)
-);
-DELETE FROM worklog5743 WHERE col_1_varbinary = REPEAT("b", 4000);
-SELECT col_1_varbinary = REPEAT("c", 4000) FROM worklog5743;
-
-# Add 3 more indexes.
-# Update used to hang but now ER_UNDO_RECORD_TOO_BIG is reported;
-# Bug#12547647 - UPDATE LOGGING COULD EXCEED UNDO LOG PAGE SIZE
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000),
-REPEAT("a", 4000) , REPEAT("o", 4000), REPEAT("a", 4000),
-REPEAT("a", 4000) ,  REPEAT("a", 255)
-);
-CREATE INDEX prefix_idx3 ON worklog5743(col_3_text (3072));
-CREATE INDEX prefix_idx4 ON worklog5743(col_4_blob (3072));
-CREATE INDEX prefix_idx5 ON worklog5743(col_5_text (3072));
---error ER_UNDO_RECORD_TOO_BIG
-UPDATE worklog5743 SET col_1_varbinary = REPEAT("c", 4000)
-WHERE col_1_varbinary = REPEAT("a", 4000) AND col_2_varchar = REPEAT("o", 4000);
-SHOW WARNINGS;
-DROP TABLE worklog5743;
-
-#------------------------------------------------------------------------------
-# Create mutiple prefix index. We can not create prefix index length > 16K as
-# we write in undo log page which of 16K size.
-# so we can create max 5 prefix index of length 3072 on table.
-# Similar to above case but with transactions
-CREATE TABLE worklog5743 (
-col_1_varbinary VARBINARY (4000) , col_2_varchar VARCHAR (4000) ,
-col_3_text TEXT (4000), col_4_blob BLOB (4000),col_5_text TEXT (4000),
-col_6_varchar VARCHAR (4000), col_7_binary BINARY (255)
-) ROW_FORMAT=DYNAMIC, engine = innodb;
-
-
-# Update used to hang if we create following 5 indexes. Fixed in;
-# Bug#12547647 - UPDATE LOGGING COULD EXCEED UNDO LOG PAGE SIZE
-CREATE INDEX prefix_idx1 ON worklog5743(col_1_varbinary (3072));
-CREATE INDEX prefix_idx2 ON worklog5743(col_2_varchar (3072));
-CREATE INDEX prefix_idx3 ON worklog5743(col_3_text (3072));
-CREATE INDEX prefix_idx4 ON worklog5743(col_4_blob (3072));
-CREATE INDEX prefix_idx5 ON worklog5743(col_5_text (3072));
-
-START TRANSACTION;
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000),
-REPEAT("a", 4000) , REPEAT("o", 4000), REPEAT("a", 4000),
-REPEAT("a", 4000) ,  REPEAT("a", 255)
-);
-SELECT col_1_varbinary = REPEAT("a", 4000) , col_2_varchar = REPEAT("o", 4000)
-FROM worklog5743;
-ROLLBACK;
-START TRANSACTION;
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000),
-REPEAT("a", 4000) , REPEAT("o", 4000), REPEAT("a", 4000),
-REPEAT("a", 4000) ,  REPEAT("a", 255)
-);
-COMMIT;
-SELECT col_1_varbinary = REPEAT("a", 4000) , col_2_varchar = REPEAT("o", 4000)
-FROM worklog5743;
-
-START TRANSACTION;
-INSERT INTO worklog5743 VALUES(REPEAT("b", 4000) , REPEAT("p", 4000),
-REPEAT("a", 4000) , REPEAT("o", 4000), REPEAT("a", 4000),
-REPEAT("a", 4000) , REPEAT("a", 255)
-);
-ROLLBACK;
-# Bug#12547647 - UPDATE LOGGING COULD EXCEED LOG PAGE SIZE
-# Instead of this error, it would hang before this fix.
---error ER_UNDO_RECORD_TOO_BIG
-UPDATE worklog5743 SET col_1_varbinary = REPEAT("c", 4000)
-WHERE col_1_varbinary = REPEAT("a", 4000)
-AND col_2_varchar = REPEAT("o", 4000);
-SHOW WARNINGS;
-SELECT col_1_varbinary = REPEAT("c", 4000) FROM worklog5743
-WHERE col_1_varbinary = REPEAT("c", 4000) AND col_2_varchar = REPEAT("o", 4000);
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000),
-REPEAT("a", 4000) , REPEAT("o", 4000), REPEAT("a", 4000),
-REPEAT("a", 4000) , REPEAT("a", 255)
-);
-DELETE FROM worklog5743 WHERE col_1_varbinary = REPEAT("b", 4000);
-SELECT col_1_varbinary = REPEAT("c", 4000) FROM worklog5743;
-DROP TABLE worklog5743;
-
-#------------------------------------------------------------------------------
-# Prefix index with utf8 charset
-# utf8 charcter takes 3 bytes in mysql so prefix index limit is 3072/3 = 1024
-CREATE TABLE worklog5743 (
-col_1_text TEXT (4000) CHARACTER SET 'utf8',
-col_2_text TEXT (4000) CHARACTER SET 'utf8',
-PRIMARY KEY (col_1_text(1024))
-) ROW_FORMAT=DYNAMIC, engine = innodb;
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000));
-CREATE INDEX prefix_idx ON worklog5743(col_1_text (1024));
-INSERT INTO worklog5743 VALUES(REPEAT("b", 4000) , REPEAT("p", 4000));
-SELECT col_1_text = REPEAT("a", 4000) , col_2_text = REPEAT("o", 4000) FROM worklog5743;
-UPDATE worklog5743 SET col_1_text = REPEAT("c", 4000)
-WHERE col_1_text = REPEAT("a", 4000) AND col_2_text = REPEAT("o", 4000);
-SELECT col_1_text = REPEAT("c", 4000) FROM worklog5743
-WHERE col_1_text = REPEAT("c", 4000) AND col_2_text = REPEAT("o", 4000);
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-DELETE FROM worklog5743 WHERE col_1_text = REPEAT("b", 4000);
-SELECT col_1_text = REPEAT("c", 4000) FROM worklog5743;
-DROP TABLE worklog5743;
-
-# Prefix index with utf8 charset + varchar.
-# utf8 charcter takes 3 bytes in mysql so prefix index limit is 3072/3 = 1024
-# This is a case where dict_index_too_big_for_undo() is too conservative.
-# If it did not return error 1118, to commented code would work.
-# See bug#12953735.
---replace_regex /> [0-9]*/> max_row_size/
--- error ER_TOO_BIG_ROWSIZE
-CREATE TABLE worklog5743 (col_1_varchar VARCHAR (4000) CHARACTER SET 'utf8',
-col_2_varchar VARCHAR (4000) CHARACTER SET 'utf8' ,
-PRIMARY KEY (col_1_varchar(1024))
-) ROW_FORMAT=DYNAMIC, engine = innodb;
-#INSERT INTO worklog5743 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000));
-#CREATE INDEX prefix_idx ON worklog5743(col_1_varchar (1024));
-#INSERT INTO worklog5743 VALUES(REPEAT("b", 4000) , REPEAT("p", 4000));
-#SELECT col_1_varchar = REPEAT("a", 4000) , col_2_varchar = REPEAT("o", 4000) FROM worklog5743;
-#UPDATE worklog5743 SET col_1_varchar = REPEAT("c", 4000)
-#WHERE col_1_varchar = REPEAT("a", 4000) AND col_2_varchar = REPEAT("o", 4000);
-#SELECT col_1_varchar = REPEAT("c", 4000) FROM worklog5743
-#WHERE col_1_varchar = REPEAT("c", 4000) AND col_2_varchar = REPEAT("o", 4000);
-#INSERT INTO worklog5743 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-#DELETE FROM worklog5743 WHERE col_1_varchar = REPEAT("b", 4000);
-#SELECT col_1_varchar = REPEAT("c", 4000) FROM worklog5743;
-#DROP TABLE worklog5743;
-
-#------------------------------------------------------------------------------
-# prefinx index on utf8 charset with transaction
-CREATE TABLE worklog5743 (
-col_1_varbinary VARBINARY (4000) ,
-col_2_varchar VARCHAR (4000) CHARACTER SET 'utf8',
-col_3_text TEXT (4000) CHARACTER SET 'utf8',
-col_4_blob BLOB (4000),col_5_text TEXT (4000),
-col_6_varchar VARCHAR (4000), col_7_binary BINARY (255)
-) ROW_FORMAT=DYNAMIC, engine = innodb;
-
-
-CREATE INDEX prefix_idx2 ON worklog5743(col_2_varchar (500));
-CREATE INDEX prefix_idx3 ON worklog5743(col_3_text (500));
-
-START TRANSACTION;
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000),
-REPEAT("a", 4000) , REPEAT("o", 4000), REPEAT("a", 4000),
-REPEAT("a", 4000) ,  REPEAT("a", 255)
-);
-SELECT col_1_varbinary = REPEAT("a", 4000) , col_2_varchar = REPEAT("o", 4000)
-FROM worklog5743;
-ROLLBACK;
-START TRANSACTION;
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000),
-REPEAT("a", 4000) , REPEAT("o", 4000), REPEAT("a", 4000),
-REPEAT("a", 4000) ,  REPEAT("a", 255)
-);
-COMMIT;
-SELECT col_1_varbinary = REPEAT("a", 4000) , col_2_varchar = REPEAT("o", 4000)
-FROM worklog5743;
-
-START TRANSACTION;
-INSERT INTO worklog5743 VALUES(REPEAT("b", 4000) , REPEAT("p", 4000),
-REPEAT("a", 4000) , REPEAT("o", 4000), REPEAT("a", 4000),
-REPEAT("a", 4000) , REPEAT("a", 255)
-);
-ROLLBACK;
-SELECT col_1_varbinary = REPEAT("c", 4000) FROM worklog5743
-WHERE col_1_varbinary = REPEAT("c", 4000)
-AND col_2_varchar = REPEAT("o", 4000);
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000),
-REPEAT("a", 4000) , REPEAT("o", 4000), REPEAT("a", 4000),
-REPEAT("a", 4000) , REPEAT("a", 255)
-);
-DELETE FROM worklog5743 WHERE col_1_varbinary = REPEAT("b", 4000);
-SELECT col_1_varbinary = REPEAT("c", 4000) FROM worklog5743;
-DROP TABLE worklog5743;
-
-
-#------------------------------------------------------------------------------
-# Prefix index with utf8 charset on TEXT data type with actual utf8 character
-# like "स" and "क"
-CREATE TABLE worklog5743 (
-col_1_text TEXT (4000) CHARACTER SET 'utf8',
-col_2_text TEXT (4000) ,
-PRIMARY KEY (col_1_text(1024))
-) ROW_FORMAT=DYNAMIC, engine = innodb;
-INSERT INTO worklog5743 VALUES(REPEAT("स", 4000) , REPEAT("o", 4000));
-CREATE INDEX prefix_idx ON worklog5743(col_1_text (1024));
-INSERT INTO worklog5743 VALUES(REPEAT("b", 4000) , REPEAT("p", 4000));
-SELECT col_1_text = REPEAT("स", 4000) , col_2_text = REPEAT("o", 4000)
-FROM worklog5743;
-UPDATE worklog5743 SET col_1_text = REPEAT("क", 4000)
-WHERE col_1_text = REPEAT("स", 4000) AND col_2_text = REPEAT("o", 4000);
-SELECT col_1_text = REPEAT("क", 4000) FROM worklog5743
-WHERE col_1_text = REPEAT("c", 4000) AND col_2_text = REPEAT("o", 4000);
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-DELETE FROM worklog5743 WHERE col_1_text = REPEAT("b", 4000);
-SELECT col_1_text = REPEAT("क", 4000) FROM worklog5743;
-DROP TABLE worklog5743;
-
-
-#------------------------------------------------------------------------------
-# Prefix index with transaction when 2 client are ruuning there transaction
-# in different sessions.With ISOLATION LEVEL as REPEATABLE READ and
-# READ UNCOMMITTED.
-CREATE TABLE worklog5743 (
-col_1_text TEXT(4000) , col_2_text TEXT(4000) ,
-PRIMARY KEY (col_1_text(3072))
-) ROW_FORMAT=DYNAMIC, engine = innodb;
-INSERT INTO worklog5743 VALUES(REPEAT("a", 200) , REPEAT("o", 200));
-SELECT col_1_text = REPEAT("a", 200) , col_2_text  = REPEAT("o", 200) FROM
-worklog5743;
-
---connect (con1,localhost,root,,)
-SELECT col_1_text = REPEAT("a", 200) , col_2_text = REPEAT("o", 200) FROM
-worklog5743;
-SELECT COUNT(*) FROM worklog5743;
-
-
---connect (con2,localhost,root,,)
-START TRANSACTION;
-INSERT INTO worklog5743 VALUES(REPEAT("b", 200) , REPEAT("o", 200));
-# Uncomment after Bug#12552164 - TRANSACTION CAN NOT SEE OLD VERSION ROWS THAT
-# BEING UPDATED
-#UPDATE worklog5743 SET col_1_varchar = REPEAT("d", 200) WHERE col_1_varchar =
-#REPEAT("a", 200) AND col_2_varchar = REPEAT("o", 200);
-SELECT col_1_text = REPEAT("a", 200) , col_2_text  = REPEAT("o", 200) FROM
-worklog5743;
-
-
---connection con1
-select @@session.tx_isolation;
-SELECT col_1_text = REPEAT("b", 200) , col_2_text = REPEAT("o", 200) FROM
-worklog5743;
-SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
-select @@session.tx_isolation;
-SELECT col_1_text = REPEAT("b", 200) , col_2_text = REPEAT("o", 200) FROM
-worklog5743;
-SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
-
-START TRANSACTION;
-
-SELECT col_1_text = REPEAT("a", 200) , col_2_text = REPEAT("o", 200) FROM
-worklog5743;
-SELECT COUNT(*) FROM worklog5743;
-
---connection con2
-COMMIT;
-# Wait for commit
-let $wait_condition=SELECT COUNT(*)=0 FROM information_schema.processlist
-WHERE info='COMMIT';
---source include/wait_condition.inc
-
---connection con1
-SELECT col_1_text = REPEAT("b", 200) , col_2_text = REPEAT("o", 200) FROM
-worklog5743;
-SELECT col_1_text = REPEAT("a", 200) , col_2_text = REPEAT("o", 200) FROM
-worklog5743;
-SELECT COUNT(*) FROM worklog5743;
-COMMIT;
-
---connection default
-DROP TABLE worklog5743;
-
-
-#------------------------------------------------------------------------------
-# Prefix index with transaction when 2 client are ruuning there transaction
-# in different sessions.With ISOLATION LEVEL as REPEATABLE READ and
-# READ UNCOMMITTED. Same as above case but con2 starts tnx before con1
-
-CREATE TABLE worklog5743 (
-col_1_text TEXT(4000) , col_2_text TEXT(4000) ,
-PRIMARY KEY (col_1_text(3072))
-) ROW_FORMAT=DYNAMIC, engine = innodb;
-INSERT INTO worklog5743 VALUES(REPEAT("a", 200) , REPEAT("o", 200));
-SELECT col_1_text = REPEAT("a", 200) , col_2_text  = REPEAT("o", 200) FROM
-worklog5743;
-
---connection con1
-SELECT col_1_text = REPEAT("a", 200) , col_2_text = REPEAT("o", 200) FROM
-worklog5743;
-SELECT COUNT(*) FROM worklog5743;
-START TRANSACTION;
-
-
---connection con2
-START TRANSACTION;
-INSERT INTO worklog5743 VALUES(REPEAT("b", 200) , REPEAT("o", 200));
-DELETE FROM worklog5743 WHERE col_1_text = REPEAT("a", 200);
-SELECT col_1_text = REPEAT("a", 200) , col_2_text  = REPEAT("o", 200) FROM
-worklog5743;
-COMMIT;
-# Wait for commit
-let $wait_condition=SELECT COUNT(*)=0 FROM information_schema.processlist
-WHERE info='COMMIT';
---source include/wait_condition.inc 
-
-
---connection con1
-SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
-select @@session.tx_isolation;
-SELECT col_1_text = REPEAT("b", 200) , col_2_text = REPEAT("o", 200) FROM
-worklog5743;
-SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
-
-SELECT col_1_text = REPEAT("b", 200) , col_2_text = REPEAT("o", 200) FROM
-worklog5743;
-SELECT COUNT(*) FROM worklog5743;
-COMMIT;
-
---connection default
-DROP TABLE worklog5743;
-
-#------------------------------------------------------------------------------
-
-# Prefix index with transaction when 2 client are ruuning there transaction
-# in different sessions.With ISOLATION LEVEL as REPEATABLE READ and
-# READ UNCOMMITTED. Same as above cases but with ROLLBACK
-
-CREATE TABLE worklog5743 (
-col_1_text TEXT(4000) , col_2_text TEXT(4000) ,
-PRIMARY KEY (col_1_text(3072))
-) ROW_FORMAT=DYNAMIC, engine = innodb;
-INSERT INTO worklog5743 VALUES(REPEAT("a", 200) , REPEAT("o", 200));
-SELECT col_1_text = REPEAT("a", 200) , col_2_text  = REPEAT("o", 200) FROM
-worklog5743;
-
---connection con1
-SELECT col_1_text = REPEAT("a", 200) , col_2_text = REPEAT("o", 200) FROM
-worklog5743;
-SELECT COUNT(*) FROM worklog5743;
-START TRANSACTION;
-
-
---connection con2
-START TRANSACTION;
-INSERT INTO worklog5743 VALUES(REPEAT("b", 200) , REPEAT("o", 200));
-DELETE FROM worklog5743 WHERE col_1_text = REPEAT("a", 200);
-SELECT col_1_text = REPEAT("a", 200) , col_2_text  = REPEAT("o", 200) FROM
-worklog5743;
-ROLLBACK;
-# Wait for rollback
-let $wait_condition=SELECT COUNT(*)=0 FROM information_schema.processlist
-WHERE info='COMMIT';
---source include/wait_condition.inc 
-
-
---connection con1
-SELECT col_1_text = REPEAT("b", 200) , col_2_text = REPEAT("o", 200) FROM
-worklog5743;
-SELECT COUNT(*) FROM worklog5743;
-COMMIT;
-
---disconnect con1
---source include/wait_until_disconnected.inc
---connection con2
---disconnect con2
---source include/wait_until_disconnected.inc
-
---connection default
-DROP TABLE worklog5743;
-
-
-#------------------------------------------------------------------------------
-# Select queries on prefix index column as index will be used in queries.
-# Use few select functions , join condition , subqueries.
-
-CREATE TABLE worklog5743 (
-col_1_varchar VARCHAR (4000) , col_2_varchar VARCHAR (4000) ,
-PRIMARY KEY (col_1_varchar(3072))
-) ROW_FORMAT=DYNAMIC, engine = innodb;
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000));
-CREATE INDEX prefix_idx ON worklog5743(col_1_varchar (3072));
-INSERT INTO worklog5743 VALUES(REPEAT("b", 4000) , REPEAT("p", 4000));
-SELECT col_1_varchar = REPEAT("a", 4000) , col_2_varchar = REPEAT("o", 4000)
-FROM worklog5743;
-UPDATE worklog5743 SET col_1_varchar = REPEAT("c", 4000)
-WHERE col_1_varchar = REPEAT("a", 4000)
-AND col_2_varchar = REPEAT("o", 4000);
-SELECT col_1_varchar = REPEAT("c", 4000) FROM worklog5743
-WHERE col_1_varchar = REPEAT("c", 4000)
-AND col_2_varchar = REPEAT("o", 4000);
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-SELECT col_1_varchar = REPEAT("c", 4000) FROM worklog5743;
-
-# Select with Join
-SELECT tbl1.col_1_varchar = tbl2.col_1_varchar
-FROM worklog5743 tbl1 , worklog5743 tbl2
-WHERE tbl1.col_1_varchar = tbl2.col_1_varchar ;
-
-# Select in subquey
-SELECT tbl1.col_1_varchar = REPEAT("c", 4000) FROM worklog5743 tbl1
-WHERE col_1_varchar IN (SELECT tbl2.col_1_varchar FROM worklog5743 tbl2) ;
-SELECT tbl1.col_1_varchar = REPEAT("c", 4000) FROM worklog5743 tbl1
-WHERE col_1_varchar NOT IN (SELECT tbl2.col_1_varchar FROM worklog5743 tbl2) ;
-SELECT tbl1.col_1_varchar = REPEAT("c", 4000) FROM worklog5743 tbl1 WHERE
-col_1_varchar IN (SELECT tbl2.col_1_varchar FROM worklog5743 tbl2)
-AND col_1_varchar = REPEAT("c", 4000);
-SELECT tbl1.col_1_varchar = REPEAT("c", 4000) FROM worklog5743 tbl1
-WHERE col_1_varchar in (
-SELECT tbl2.col_1_varchar FROM worklog5743 tbl2
-WHERE tbl1.col_1_varchar != tbl2.col_1_varchar
-) ;
-SELECT tbl1.col_1_varchar = REPEAT("c", 4000) FROM worklog5743 tbl1
-WHERE col_1_varchar in (
-SELECT tbl2.col_1_varchar FROM worklog5743 tbl2
-WHERE tbl1.col_1_varchar = tbl2.col_1_varchar
-) ;
-
-# function
-SELECT
-REVERSE(col_1_varchar) = REPEAT("c", 4000) ,
-REVERSE(REVERSE(col_1_varchar)) = REPEAT("c", 4000)
-FROM worklog5743;
-SELECT
-UPPER(col_1_varchar) = REPEAT("c", 4000) ,
-UPPER(col_1_varchar) = REPEAT("C", 4000) ,
-LOWER(UPPER(col_1_varchar)) = REPEAT("c", 4000)
-FROM worklog5743;
-SELECT
-col_1_varchar = REPEAT("c", 4000)
-FROM worklog5743 WHERE col_1_varchar like '%c__%';
-SELECT SUBSTRING(INSERT(col_1_varchar, 1, 4, 'kkkk'),1,10) FROM worklog5743 ;
-SELECT CONCAT(SUBSTRING(col_1_varchar,-5,3),'append') FROM worklog5743 ;
-
-
-DROP TABLE worklog5743;
-
-#------------------------------------------------------------------------------
-# Prefix index with NULL values
-CREATE TABLE worklog5743 (
-col_1_varchar VARCHAR (4000) ,
-col_2_varchar VARCHAR (4000) ,
-UNIQUE INDEX (col_1_varchar(3072))
-) ROW_FORMAT=DYNAMIC, engine = innodb;
-INSERT INTO worklog5743
-VALUES(concat(REPEAT("a", 2000),REPEAT("b", 1000),REPEAT("c", 1000)), REPEAT("o", 4000));
-INSERT INTO worklog5743
-VALUES(concat(REPEAT("a", 2000),REPEAT("b", 2000)), REPEAT("o", 4000));
-INSERT INTO worklog5743 VALUES(NULL,NULL);
-INSERT INTO worklog5743 VALUES(NULL,NULL);
-# check IS
-SELECT COLUMN_NAME,INDEX_NAME,SUB_PART,INDEX_TYPE
-FROM INFORMATION_SCHEMA.STATISTICS WHERE table_name = 'worklog5743' ;
-SELECT col_1_varchar FROM worklog5743 WHERE col_1_varchar IS NULL;
-SELECT col_1_varchar = concat(REPEAT("a", 2000),REPEAT("b", 2000))
-FROM worklog5743 WHERE col_1_varchar IS NOT NULL ORDER BY 1;
-
-
-DROP TABLE worklog5743;
-
-# -----------------------------------------------------------------------------
-#  Try drop and add secondary prefix index
-CREATE TABLE worklog5743 (
-col_1_varchar VARCHAR (4000) , col_2_varchar VARCHAR (4000) ,
-PRIMARY KEY (col_1_varchar(3072))) ROW_FORMAT=DYNAMIC, engine = innodb;
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000));
-# Create index
-CREATE INDEX prefix_idx ON worklog5743(col_1_varchar (3072));
-INSERT INTO worklog5743 VALUES(REPEAT("b", 4000) , REPEAT("p", 4000));
-# Drop index
-DROP INDEX prefix_idx ON worklog5743;
-
-SELECT col_1_varchar = REPEAT("a", 4000) , col_2_varchar = REPEAT("o", 4000)
-FROM worklog5743;
-UPDATE worklog5743 SET col_1_varchar = REPEAT("c", 4000)
-WHERE col_1_varchar = REPEAT("a", 4000) AND col_2_varchar = REPEAT("o", 4000);
-SELECT col_1_varchar = REPEAT("c", 4000) FROM worklog5743
-WHERE col_1_varchar = REPEAT("c", 4000) AND col_2_varchar = REPEAT("o", 4000);
-# Again add index
-CREATE INDEX prefix_idx ON worklog5743(col_1_varchar (3072));
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-DELETE FROM worklog5743 WHERE col_1_varchar = REPEAT("b", 4000);
-SELECT col_1_varchar = REPEAT("c", 4000) FROM worklog5743;
-DROP TABLE worklog5743;
-
-# -----------------------------------------------------------------------------
-
-#  Try drop and add primary prefix index
-CREATE TABLE worklog5743 (
-col_1_varchar VARCHAR (4000) , col_2_varchar VARCHAR (4000) ,
-PRIMARY KEY `prefix_primary` (col_1_varchar(3072))
-) ROW_FORMAT=DYNAMIC, engine = innodb;
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000));
-# Create index
-CREATE INDEX prefix_idx ON worklog5743(col_1_varchar (3072));
-INSERT INTO worklog5743 VALUES(REPEAT("b", 4000) , REPEAT("p", 4000));
-# Drop index
-ALTER TABLE worklog5743 DROP PRIMARY KEY;
-
-SELECT col_1_varchar = REPEAT("a", 4000) , col_2_varchar = REPEAT("o", 4000)
-FROM worklog5743;
-UPDATE worklog5743 SET col_1_varchar = REPEAT("c", 4000)
-WHERE col_1_varchar = REPEAT("a", 4000)
-AND col_2_varchar = REPEAT("o", 4000);
-SELECT col_1_varchar = REPEAT("c", 4000) FROM worklog5743
-WHERE col_1_varchar = REPEAT("c", 4000)
-AND col_2_varchar = REPEAT("o", 4000);
-# Again add index
-ALTER TABLE worklog5743 ADD PRIMARY KEY (col_1_varchar(3072));
-
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
---error ER_DUP_ENTRY
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-DELETE FROM worklog5743 WHERE col_1_varchar = REPEAT("b", 4000);
-SELECT col_1_varchar = REPEAT("c", 4000) FROM worklog5743;
-DROP TABLE worklog5743;
-
-
-# -----------------------------------------------------------------------------
-
-#  Try drop and add both (primary/secondary) prefix index
-CREATE TABLE worklog5743 (
-col_1_varchar VARCHAR (4000) , col_2_varchar VARCHAR (4000) ,
-PRIMARY KEY `prefix_primary` (col_1_varchar(3072))
-) ROW_FORMAT=DYNAMIC, engine = innodb;
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000));
-# Create index
-CREATE INDEX prefix_idx ON worklog5743(col_1_varchar (3072));
-INSERT INTO worklog5743 VALUES(REPEAT("b", 4000) , REPEAT("p", 4000));
-# Drop primary index
-ALTER TABLE worklog5743 DROP PRIMARY KEY;
-# Drop secondary index
-DROP INDEX prefix_idx ON worklog5743;
-
-SELECT col_1_varchar = REPEAT("a", 4000) , col_2_varchar = REPEAT("o", 4000)
-FROM worklog5743;
-UPDATE worklog5743 SET col_1_varchar = REPEAT("c", 4000)
-WHERE col_1_varchar = REPEAT("a", 4000) AND col_2_varchar = REPEAT("o", 4000);
-SELECT col_1_varchar = REPEAT("c", 4000) FROM worklog5743
-WHERE col_1_varchar = REPEAT("c", 4000) AND col_2_varchar = REPEAT("o", 4000);
-# Again add index
-ALTER TABLE worklog5743 ADD PRIMARY KEY (col_1_varchar(3072));
-CREATE INDEX prefix_idx ON worklog5743(col_1_varchar (3072));
-
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
---error ER_DUP_ENTRY
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-DELETE FROM worklog5743 WHERE col_1_varchar = REPEAT("b", 4000);
-SELECT col_1_varchar = REPEAT("c", 4000) FROM worklog5743;
-DROP TABLE worklog5743;
-
-
-# -----------------------------------------------------------------------------
-# Drop index from differnt session
-CREATE TABLE worklog5743 (
-col_1_varchar VARCHAR(4000) , col_2_varchar VARCHAR(4000) ,
-PRIMARY KEY (col_1_varchar (3072))
-) ROW_FORMAT=DYNAMIC, engine = innodb;
-INSERT INTO worklog5743 VALUES(REPEAT("c", 3500) , REPEAT("o", 3500));
-CREATE INDEX prefix_idx ON worklog5743(col_1_varchar (3072));
-
---connect (con1,localhost,root,,)
-
-
---connection con1
-SELECT col_1_varchar = REPEAT("c", 3500) , col_2_varchar = REPEAT("o", 3500)
-FROM worklog5743;
-
---connection default
-START TRANSACTION;
-INSERT INTO worklog5743 VALUES(REPEAT("a", 3500) , REPEAT("o", 3500));
-SELECT col_1_varchar = REPEAT("b", 3500) FROM worklog5743
-WHERE col_2_varchar = REPEAT("o", 3500);
-COMMIT;
-
---connection con1
-START TRANSACTION;
-INSERT INTO worklog5743 VALUES(REPEAT("k", 3500),REPEAT("p", 3500));
-# Drop primary index
-ALTER TABLE worklog5743 DROP PRIMARY KEY;
-UPDATE worklog5743 SET col_1_varchar  = REPEAT("b", 3500)
-WHERE col_1_varchar = REPEAT("a", 3500)
-AND col_2_varchar = REPEAT("o", 3500);
-SELECT col_1_varchar = REPEAT("b", 3500) FROM worklog5743
-WHERE col_2_varchar = REPEAT("o", 3500);
-
---connection default
-DELETE FROM worklog5743 WHERE col_1_varchar  = REPEAT("b", 3500);
-SELECT col_1_varchar = REPEAT("a", 3500) FROM worklog5743
-WHERE col_2_varchar = REPEAT("p", 3500);
-
---connection con1
-COMMIT;
-
---connection default
-DROP TABLE worklog5743;
-
-
-
-# -----------------------------------------------------------------------------
-#  Create prefix index with length < 3072 , length = 3072 , length > 3072
-# - varbinary data type + secondary index
-CREATE TABLE worklog5743 (
-col_1_varbinary VARBINARY (4000) , col_2_varbinary VARBINARY (4000) ,
-PRIMARY KEY (col_1_varbinary(3072))) ROW_FORMAT=DYNAMIC, engine = innodb;
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000));
-# Create index of 3072
-CREATE INDEX prefix_idx ON worklog5743(col_1_varbinary (3072));
-INSERT INTO worklog5743 VALUES(REPEAT("b", 4000) , REPEAT("p", 4000));
-SELECT col_1_varbinary = REPEAT("a", 4000) , col_2_varbinary = REPEAT("o", 4000)
-FROM worklog5743;
-UPDATE worklog5743 SET col_1_varbinary = REPEAT("c", 4000)
-WHERE col_1_varbinary = REPEAT("a", 4000)
-AND col_2_varbinary = REPEAT("o", 4000);
-SELECT col_1_varbinary = REPEAT("c", 4000) FROM worklog5743
-WHERE col_1_varbinary = REPEAT("c", 4000)
-AND col_2_varbinary = REPEAT("o", 4000);
-DELETE FROM worklog5743 WHERE col_1_varbinary = REPEAT("c", 4000);
-SELECT col_1_varbinary = REPEAT("c", 4000) FROM worklog5743
-WHERE col_1_varbinary = REPEAT("c", 4000)
-AND col_2_varbinary = REPEAT("o", 4000);
-# Drop index
-DROP INDEX prefix_idx ON worklog5743;
-SELECT col_1_varbinary = REPEAT("b", 4000) FROM worklog5743
-WHERE col_1_varbinary = REPEAT("b", 4000)
-AND col_2_varbinary = REPEAT("p", 4000);
-
-
-# Again add index length < 3072
-CREATE INDEX prefix_idx ON worklog5743(col_1_varbinary (2000));
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-SELECT col_1_varbinary = REPEAT("a", 4000) FROM worklog5743;
-UPDATE worklog5743 SET col_1_varbinary = REPEAT("c", 4000)
-WHERE col_1_varbinary = REPEAT("a", 4000)
-AND col_2_varbinary = REPEAT("o", 4000);
-DELETE FROM worklog5743 WHERE col_1_varbinary = REPEAT("c", 4000);
-SELECT col_1_varbinary = REPEAT("c", 4000) FROM worklog5743
-WHERE col_1_varbinary = REPEAT("c", 4000)
-AND col_2_varbinary = REPEAT("o", 4000);
-# Drop index
-DROP INDEX prefix_idx ON worklog5743;
-
-# Again add index length > 3072.
-# If "innodb_large_prefix" is turned on, than the index prefix larger than 3072
-# will be truncated to 3072. If the table is REDUNDANT and COMPACT, which does
-# not support prefix > 767, the create index will be rejected. 
-CREATE INDEX prefix_idx ON worklog5743(col_1_varbinary (4000));
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-SELECT col_1_varbinary = REPEAT("a", 4000) FROM worklog5743;
-UPDATE worklog5743 SET col_1_varbinary = REPEAT("c", 4000)
-WHERE col_1_varbinary = REPEAT("a", 4000)
-AND col_2_varbinary = REPEAT("o", 4000);
-DELETE FROM worklog5743 WHERE col_1_varbinary = REPEAT("c", 4000);
-SELECT col_1_varbinary = REPEAT("c", 4000) FROM worklog5743
-WHERE col_1_varbinary = REPEAT("c", 4000)
-AND col_2_varbinary = REPEAT("o", 4000);
-
-
-DROP TABLE worklog5743;
-
-# -----------------------------------------------------------------------------
-# Create prefix index with length < 3072 , length = 3072 , length > 3072
-#  text data type + secondary index
-CREATE TABLE worklog5743 (col_1_text TEXT (4000) , col_2_text TEXT (4000) ,
-PRIMARY KEY (col_1_text(500))
-) ROW_FORMAT=DYNAMIC, engine = innodb;
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000));
-# Create index of 3072
-CREATE INDEX prefix_idx ON worklog5743(col_1_text (3072));
-INSERT INTO worklog5743 VALUES(REPEAT("b", 4000) , REPEAT("p", 4000));
-SELECT col_1_text = REPEAT("a", 4000) , col_2_text = REPEAT("o", 4000)
-FROM worklog5743;
-UPDATE worklog5743 SET col_1_text = REPEAT("c", 4000)
-WHERE col_1_text = REPEAT("a", 4000)
-AND col_2_text = REPEAT("o", 4000);
-SELECT col_1_text = REPEAT("c", 4000) FROM worklog5743
-WHERE col_1_text = REPEAT("c", 4000) AND col_2_text = REPEAT("o", 4000);
-DELETE FROM worklog5743 WHERE col_1_text = REPEAT("c", 4000);
-SELECT col_1_text = REPEAT("c", 4000) FROM worklog5743
-WHERE col_1_text = REPEAT("c", 4000) AND col_2_text = REPEAT("o", 4000);
-# Drop index
-DROP INDEX prefix_idx ON worklog5743;
-SELECT col_1_text = REPEAT("b", 4000) FROM worklog5743
-WHERE col_1_text = REPEAT("b", 4000) AND col_2_text = REPEAT("p", 4000);
-
-# Again add index length < 3072
-CREATE INDEX prefix_idx ON worklog5743(col_1_text (1000));
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-SELECT col_1_text = REPEAT("a", 4000) FROM worklog5743;
-UPDATE worklog5743 SET col_1_text = REPEAT("c", 4000)
-WHERE col_1_text = REPEAT("a", 4000) AND col_2_text = REPEAT("o", 4000);
-DELETE FROM worklog5743 WHERE col_1_text = REPEAT("c", 4000);
-SELECT col_1_text = REPEAT("c", 4000) FROM worklog5743
-WHERE col_1_text = REPEAT("c", 4000) AND col_2_text = REPEAT("o", 4000);
-# Drop index
-DROP INDEX prefix_idx ON worklog5743;
-
-# Again add index length > 3072. Expect error.Length exceeds maximum supported
-# key length
-# Again add index length > 3072.
-# If "innodb_large_prefix" is turned on, than the index prefix larger than 3072
-# will be truncated to 3072. If the table is REDUNDANT and COMPACT, which does
-# not support prefix > 767, the create index will be rejected.
-CREATE INDEX prefix_idx ON worklog5743(col_1_text (4000));
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-SELECT col_1_text = REPEAT("a", 4000) FROM worklog5743;
-UPDATE worklog5743 SET col_1_text = REPEAT("c", 4000)
-WHERE col_1_text = REPEAT("a", 4000) AND col_2_text = REPEAT("o", 4000);
-DELETE FROM worklog5743 WHERE col_1_text = REPEAT("c", 4000);
-SELECT col_1_text = REPEAT("c", 4000) FROM worklog5743
-WHERE col_1_text = REPEAT("c", 4000) AND col_2_text = REPEAT("o", 4000);
-
-DROP TABLE worklog5743;
-
-
-# -----------------------------------------------------------------------------
-# Create prefix index with length < 948 , length = 948 , length > 948
-# For compressed row type + primary key
-CREATE TABLE worklog5743 (
-col_1_text TEXT (4000) , col_2_text TEXT (4000) ,
-PRIMARY KEY (col_1_text(948))
-) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=2, engine = innodb;
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000) , REPEAT("o", 4000));
-# Create index of 767
-INSERT INTO worklog5743 VALUES(REPEAT("b", 4000) , REPEAT("p", 4000));
-SELECT col_1_text = REPEAT("a", 4000) , col_2_text = REPEAT("o", 4000) FROM worklog5743;
-UPDATE worklog5743 SET col_1_text = REPEAT("c", 4000)
-WHERE col_1_text = REPEAT("a", 4000)
-AND col_2_text = REPEAT("o", 4000);
-SELECT col_1_text = REPEAT("c", 4000) FROM worklog5743
-WHERE col_1_text = REPEAT("c", 4000)
-AND col_2_text = REPEAT("o", 4000);
-DELETE FROM worklog5743 WHERE col_1_text = REPEAT("c", 4000);
-SELECT col_1_text = REPEAT("c", 4000) FROM worklog5743
-WHERE col_1_text = REPEAT("c", 4000)
-AND col_2_text = REPEAT("o", 4000);
-# Drop index
-#DROP INDEX prefix_idx ON worklog5743;
-ALTER TABLE worklog5743 DROP PRIMARY KEY;
-SELECT col_1_text = REPEAT("b", 4000) FROM worklog5743
-WHERE col_1_text = REPEAT("b", 4000)
-AND col_2_text = REPEAT("p", 4000);
-
-# Again add index length < 767
-ALTER TABLE worklog5743 ADD PRIMARY KEY (col_1_text (700));
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-SELECT col_1_text = REPEAT("a", 4000) FROM worklog5743;
-UPDATE worklog5743 SET col_1_text = REPEAT("c", 4000)
-WHERE col_1_text = REPEAT("a", 4000)
-AND col_2_text = REPEAT("o", 4000);
-DELETE FROM worklog5743 WHERE col_1_text = REPEAT("c", 4000);
-SELECT col_1_text = REPEAT("c", 4000) FROM worklog5743
-WHERE col_1_text = REPEAT("c", 4000)
-AND col_2_text = REPEAT("o", 4000);
-# Drop index
-ALTER TABLE worklog5743 DROP PRIMARY KEY;
-
-# Again add index length > 948. Expect error 'to big row ' due to exceed
-# in key length.
--- error ER_TOO_BIG_ROWSIZE
-ALTER TABLE worklog5743 ADD PRIMARY KEY (col_1_text (950));
-INSERT INTO worklog5743 VALUES(REPEAT("a", 4000),REPEAT("o", 4000));
-SELECT col_1_text = REPEAT("a", 4000) FROM worklog5743;
-UPDATE worklog5743 SET col_1_text = REPEAT("c", 4000)
-WHERE col_1_text = REPEAT("a", 4000)
-AND col_2_text = REPEAT("o", 4000);
-DELETE FROM worklog5743 WHERE col_1_text = REPEAT("c", 4000);
-SELECT col_1_text = REPEAT("c", 4000) FROM worklog5743
-WHERE col_1_text = REPEAT("c", 4000)
-AND col_2_text = REPEAT("o", 4000);
-
-DROP TABLE worklog5743;
-
-# -----------------------------------------------------------------------------
-#  Create prefix index with length < 3072 , length = 3072 , length > 3072
-# data types VARCHAR
-CREATE TABLE worklog5743 (
-col_1_varchar VARCHAR (4000) , PRIMARY KEY (col_1_varchar(3072))
-) ROW_FORMAT=DYNAMIC, engine = innodb;
-ALTER TABLE worklog5743 DROP PRIMARY KEY;
-ALTER TABLE worklog5743 ADD PRIMARY KEY (col_1_varchar (900));
-ALTER TABLE worklog5743 DROP PRIMARY KEY;
-# Again add index length > 3072. Expect error.Length exceeds maximum supported
-# key length
-# Again add index length > 3072.
-# If "innodb_large_prefix" is turned on, than the index prefix larger than 3072
-# will be truncated to 3072. If the table is REDUNDANT and COMPACT, which does
-# not support prefix > 767, the create index will be rejected.
-# Index length is truncated only for 'create index' , but error if we add
-# prefix index with length > 3072
---error ER_TOO_LONG_KEY
-ALTER TABLE worklog5743 ADD PRIMARY KEY (col_1_varchar (3073));
-DROP TABLE worklog5743;
-
-
-CREATE TABLE worklog5743 (
-col_1_BLOB BLOB (4000) , PRIMARY KEY (col_1_BLOB(3072))
-) ROW_FORMAT=DYNAMIC, engine = innodb;
-ALTER TABLE worklog5743 DROP PRIMARY KEY;
-ALTER TABLE worklog5743 ADD PRIMARY KEY (col_1_BLOB (500));
-ALTER TABLE worklog5743 DROP PRIMARY KEY;
-# Negative case
-# Again add index length > 3072. Expect error.Length exceeds maximum supported
-# key length
-# Index length is truncated only for 'create index' , but error if we add
-# prefix index with length > 3072
---error ER_TOO_LONG_KEY
-ALTER TABLE worklog5743 ADD PRIMARY KEY (col_1_BLOB (3073));
-
-DROP TABLE worklog5743;
-
-# -----------------------------------------------------------------------------
-# Error on adding larger prefix if violates unique index.
-CREATE TABLE worklog5743 (
-col_1_varchar VARCHAR (4000) , col_2_varchar VARCHAR (4000)
-) ROW_FORMAT=DYNAMIC, engine = innodb;
-INSERT INTO worklog5743
-VALUES(concat(REPEAT("a", 2000),REPEAT("b", 1000),REPEAT("c", 1000)),
-REPEAT("o", 4000));
-INSERT INTO worklog5743
-VALUES(concat(REPEAT("a", 2000),REPEAT("b", 2000)), REPEAT("o", 4000));
---error ER_DUP_ENTRY
-ALTER TABLE worklog5743 ADD PRIMARY KEY `pk_idx` (col_1_varchar(3000));
-DROP TABLE worklog5743;
-
-# -----------------------------------------------------------------------------
-set global innodb_large_prefix=0;
-# Prefix index > 767 is allowed if innodb_large_prefix is set to 1
---error ER_TOO_LONG_KEY
-CREATE TABLE worklog5743 (
-col_1_varchar VARCHAR (4000) , col_2_varchar VARCHAR (4000) ,
-PRIMARY KEY (col_1_varchar(3072))
-) ROW_FORMAT=DYNAMIC, engine = innodb;
-
-
-# -----------------------------------------------------------------------------
-set global innodb_large_prefix=0;
-# Backward compatibility test - Index lenghth > 767 is truncated for REDUNDANT
-# and COMPACT
-CREATE TABLE worklog5743 (
-col_1_varchar VARCHAR (4000) , col_2_varchar VARCHAR (4000) ,
-PRIMARY KEY (col_1_varchar(767))
-) engine = innodb;
-INSERT INTO worklog5743 VALUES(REPEAT('a',4000),REPEAT('b',4000));
-# Prefix index > 767 is truncated with REDUNDANT and COMPACT
---enable_info
-CREATE INDEX prefix_idx ON worklog5743(col_1_varchar (1000));
-ALTER TABLE worklog5743 ROW_FORMAT=REDUNDANT;
---disable_info
-SHOW CREATE TABLE worklog5743;
-DROP TABLE worklog5743;
-#------------------------------------------------------------------------------
-
---disable_query_log
-eval set global innodb_file_format = $innodb_file_format_orig;
-eval set global innodb_file_per_table = $innodb_file_per_table_orig;
-eval set global innodb_large_prefix = $innodb_large_prefix_orig;
---connection con1
---disconnect con1
---source include/wait_until_disconnected.inc
---enable_query_log
---connection default
diff --git a/mysql-test/suite/innodb_zip/t/large_blob-master.opt b/mysql-test/suite/innodb_zip/t/large_blob-master.opt
new file mode 100644
index 00000000000..90a3f0db9d4
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/t/large_blob-master.opt
@@ -0,0 +1,3 @@
+--max_allowed_packet=200M
+--innodb_buffer_pool_size=10M
+--innodb_log_buffer_size=10M
diff --git a/mysql-test/suite/innodb_zip/t/large_blob.test b/mysql-test/suite/innodb_zip/t/large_blob.test
new file mode 100644
index 00000000000..b9888ccb53c
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/t/large_blob.test
@@ -0,0 +1,122 @@
+--echo #
+--echo # This tests the use of large blobs in InnoDB.
+--echo #
+
+--source include/have_innodb.inc
+--source include/not_debug.inc
+--source include/big_test.inc
+
+--disable_query_log
+# These values can change during the test
+let $innodb_file_per_table_orig = `SELECT @@innodb_file_per_table`;
+
+# Create a 20MB blob that does not compress easily.
+# 1000 Random characters is enough to keep compression low.
+set @alphabet="abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
+set @r=abs(rand()*52) + 1;
+set @blob=substr(@alphabet,@r,1);
+let $1=1000;
+while ($1 > 1)
+{
+  set @r=abs(rand()*52) + 1;
+  set @letter=substr(@alphabet,@r,1);
+  set @blob=concat(@blob,@letter);
+  dec $1;
+}
+# The loop above is extremely slow compared to repeat().
+set @longblob=repeat(@blob,200000);
+--enable_query_log
+
+call mtr.add_suppression("InnoDB: Warning: a long semaphore wait");
+
+SET GLOBAL innodb_file_per_table = OFF;
+
+--echo #
+--echo # System tablespace, Row Format = Redundant
+--echo #
+CREATE TABLE t1 (
+	c1 INT DEFAULT NULL,
+	c2 LONGBLOB NOT NULL,
+	KEY k2 (c2(250), c1)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=REDUNDANT;
+INSERT INTO t1 VALUES (1, '');
+UPDATE t1 SET c2=@longblob;
+DROP TABLE t1;
+
+--echo #
+--echo # System tablespace, Row Format = Compact
+--echo #
+CREATE TABLE t1 (
+	c1 INT DEFAULT NULL,
+	c2 LONGBLOB NOT NULL,
+	KEY k2 (c2(250), c1)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=COMPACT;
+INSERT INTO t1 VALUES (1, '');
+UPDATE t1 SET c2=@longblob;
+DROP TABLE t1;
+
+SET GLOBAL innodb_file_per_table = ON;
+
+--echo #
+--echo # Separate tablespace, Row Format = Redundant
+--echo #
+CREATE TABLE t1 (
+	c1 INT DEFAULT NULL,
+	c2 LONGBLOB NOT NULL,
+	KEY k2 (c2(250), c1)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=REDUNDANT;
+INSERT INTO t1 VALUES (1, '');
+UPDATE t1 SET c2=@longblob;
+DROP TABLE t1;
+
+--echo #
+--echo # Separate tablespace, Row Format = Compact
+--echo #
+CREATE TABLE t1 (
+	c1 INT DEFAULT NULL,
+	c2 LONGBLOB NOT NULL,
+	KEY k2 (c2(250), c1)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=COMPACT;
+INSERT INTO t1 VALUES (1, '');
+UPDATE t1 SET c2=@longblob;
+DROP TABLE t1;
+
+--echo #
+--echo # Separate tablespace, Row Format = Compressed, Key Block Size = 2k
+--echo #
+CREATE TABLE t1 (
+	c1 INT DEFAULT NULL,
+	c2 LONGBLOB NOT NULL,
+	KEY k2 (c2(250), c1)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 KEY_BLOCK_SIZE=2;
+INSERT INTO t1 VALUES (1, '');
+UPDATE t1 SET c2=@longblob;
+DROP TABLE t1;
+
+--echo #
+--echo # Separate tablespace, Row Format = Compressed, Key Block Size = 1k
+--echo #
+CREATE TABLE t1 (
+	c1 INT DEFAULT NULL,
+	c2 LONGBLOB NOT NULL,
+	KEY k2 (c2(250), c1)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 KEY_BLOCK_SIZE=1;
+INSERT INTO t1 VALUES (1, '');
+UPDATE t1 SET c2=@longblob;
+DROP TABLE t1;
+
+--echo #
+--echo # Separate tablespace, Row Format = Dynamic
+--echo #
+CREATE TABLE t1 (
+	c1 INT DEFAULT NULL,
+	c2 LONGBLOB NOT NULL,
+	KEY k2 (c2(250), c1)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC;
+INSERT INTO t1 VALUES (1, '');
+UPDATE t1 SET c2=@longblob;
+DROP TABLE t1;
+
+--disable_query_log
+EVAL SET GLOBAL innodb_file_per_table = $innodb_file_per_table_orig;
+--enable_query_log
diff --git a/mysql-test/suite/innodb_zip/t/restart.test b/mysql-test/suite/innodb_zip/t/restart.test
new file mode 100644
index 00000000000..354e63a69f7
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/t/restart.test
@@ -0,0 +1,602 @@
+#
+# These test make sure that tables are visible after rebooting
+#
+
+--source include/have_innodb.inc
+--source include/have_innodb_zip.inc
+--source include/have_partition.inc
+--source include/not_embedded.inc
+SET default_storage_engine=InnoDB;
+LET $MYSQLD_DATADIR = `select @@datadir`;
+LET $INNODB_PAGE_SIZE = `select @@innodb_page_size`;
+
+--disable_query_log
+# This error is expected in the error log for this test.
+call mtr.add_suppression("\\[ERROR\\] InnoDB: Error number 17 means 'File exists'");
+call mtr.add_suppression("\\[ERROR\\] InnoDB: Operating system error number (17|80) in a file operation.");
+call mtr.add_suppression("\\[ERROR\\] InnoDB: Cannot create file .*t55_restart.isl");
+call mtr.add_suppression("\\[ERROR\\] InnoDB: The link file: .* already exists.");
+call mtr.add_suppression("\\[ERROR\\] InnoDB: Cannot open datafile for read-only:");
+call mtr.add_suppression("\\[ERROR\\] InnoDB: Operating system error number 2 in a file operation.");
+call mtr.add_suppression("\\[ERROR\\] InnoDB: The error means the system cannot find the path specified.");
+call mtr.add_suppression("\\[ERROR\\] InnoDB: If you are installing InnoDB, remember that you must create directories yourself, InnoDB does not create them.");
+--enable_query_log
+
+--echo #
+--echo # A series of tests to make sure tables are opened after restart.
+--echo # Bug#13357607 Compressed file-per-table tablespaces fail to open
+--echo #
+# This bug was introduced without a regression test failing since
+# there were no tests showing that tablespaces could be created and
+# then read after reboot.
+#
+
+--disable_query_log
+let $MYSQL_DATA_DIR= `select @@datadir`;
+let $data_directory = DATA DIRECTORY='$MYSQL_TMP_DIR/alt_dir';
+
+let $innodb_file_per_table_orig=`select @@innodb_file_per_table`;
+--enable_query_log
+
+set global innodb_file_per_table=on;
+
+--echo #
+--echo # Create and insert records into a REDUNDANT row formatted table.
+--echo #
+CREATE TABLE t1_restart(c1 DOUBLE AUTO_INCREMENT KEY, c2 CHAR(10), c3 VARCHAR(100), c4 DATE, c5 TEXT)
+	ROW_FORMAT=REDUNDANT  ENGINE=InnoDB;
+INSERT INTO t1_restart VALUES (1000000000, 'MySQL', 'InnoDB', '2011-11-11', 'Read this after reboot');
+INSERT INTO t1_restart (SELECT 0, c2, c3, c4, c5 FROM t1_restart);
+INSERT INTO t1_restart (SELECT 0, c2, c3, c4, c5 FROM t1_restart);
+INSERT INTO t1_restart (SELECT 0, c2, c3, c4, c5 FROM t1_restart);
+INSERT INTO t1_restart (SELECT 0, c2, c3, c4, c5 FROM t1_restart);
+SHOW CREATE TABLE t1_restart;
+SELECT count(*) FROM t1_restart;
+
+--echo #
+--echo # Create and insert records into a COMPACT row formatted table.
+--echo #
+CREATE TABLE t2_restart(c1 DOUBLE AUTO_INCREMENT KEY, c2 CHAR(10), c3 VARCHAR(100), c4 DATE, c5 TEXT)
+	ROW_FORMAT=COMPACT  ENGINE=InnoDB;
+INSERT INTO t2_restart VALUES (1000000000, 'MySQL', 'InnoDB', '2011-11-11', 'Read this after reboot');
+INSERT INTO t2_restart (SELECT 0, c2, c3, c4, c5 FROM t2_restart);
+INSERT INTO t2_restart (SELECT 0, c2, c3, c4, c5 FROM t2_restart);
+INSERT INTO t2_restart (SELECT 0, c2, c3, c4, c5 FROM t2_restart);
+INSERT INTO t2_restart (SELECT 0, c2, c3, c4, c5 FROM t2_restart);
+SHOW CREATE TABLE t2_restart;
+SELECT count(*) FROM t2_restart;
+
+--echo #
+--echo # Create and insert records into a COMPRESSED row formatted table.
+--echo #
+CREATE TABLE t3_restart(c1 DOUBLE AUTO_INCREMENT KEY, c2 CHAR(10), c3 VARCHAR(100), c4 DATE, c5 TEXT)
+	ROW_FORMAT=COMPRESSED  KEY_BLOCK_SIZE=2  ENGINE=InnoDB;
+INSERT INTO t3_restart VALUES (1000000000, 'MySQL', 'InnoDB', '2011-11-11', 'Read this after reboot');
+INSERT INTO t3_restart (SELECT 0, c2, c3, c4, c5 FROM t3_restart);
+INSERT INTO t3_restart (SELECT 0, c2, c3, c4, c5 FROM t3_restart);
+INSERT INTO t3_restart (SELECT 0, c2, c3, c4, c5 FROM t3_restart);
+INSERT INTO t3_restart (SELECT 0, c2, c3, c4, c5 FROM t3_restart);
+SHOW CREATE TABLE t3_restart;
+SELECT count(*) FROM t3_restart;
+
+--echo #
+--echo # Create and insert records into a DYNAMIC row formatted table.
+--echo #
+CREATE TABLE t4_restart(c1 DOUBLE AUTO_INCREMENT KEY, c2 CHAR(10), c3 VARCHAR(100), c4 DATE, c5 TEXT)
+	ROW_FORMAT=DYNAMIC  ENGINE=InnoDB;
+INSERT INTO t4_restart VALUES (1000000000, 'MySQL', 'InnoDB', '2011-11-11', 'Read this after reboot');
+INSERT INTO t4_restart (SELECT 0, c2, c3, c4, c5 FROM t4_restart);
+INSERT INTO t4_restart (SELECT 0, c2, c3, c4, c5 FROM t4_restart);
+INSERT INTO t4_restart (SELECT 0, c2, c3, c4, c5 FROM t4_restart);
+INSERT INTO t4_restart (SELECT 0, c2, c3, c4, c5 FROM t4_restart);
+SHOW CREATE TABLE t4_restart;
+SELECT count(*) FROM t4_restart;
+
+--echo #
+--echo # Create and insert records into a table that uses a remote DATA DIRECTORY.
+--echo #
+--replace_result $MYSQL_TMP_DIR MYSQL_TMP_DIR
+eval CREATE TABLE t5_restart(c1 DOUBLE AUTO_INCREMENT KEY, c2 CHAR(10), c3 VARCHAR(100), c4 DATE, c5 TEXT)
+	ROW_FORMAT=DYNAMIC  ENGINE=InnoDB  $data_directory;
+INSERT INTO t5_restart VALUES (1000000000, 'MySQL', 'InnoDB', '2011-11-11', 'Read this after reboot');
+INSERT INTO t5_restart (SELECT 0, c2, c3, c4, c5 FROM t5_restart);
+INSERT INTO t5_restart (SELECT 0, c2, c3, c4, c5 FROM t5_restart);
+INSERT INTO t5_restart (SELECT 0, c2, c3, c4, c5 FROM t5_restart);
+INSERT INTO t5_restart (SELECT 0, c2, c3, c4, c5 FROM t5_restart);
+--replace_result $MYSQL_TMP_DIR MYSQL_TMP_DIR
+SHOW CREATE TABLE t5_restart;
+SELECT count(*) FROM t5_restart;
+
+--echo #
+--echo # Create and insert records into a partitioned table that uses
+--echo # a remote DATA DIRECTORY for each partition.
+--echo #
+--replace_result $MYSQL_TMP_DIR MYSQL_TMP_DIR
+eval CREATE TABLE t6_restart(
+  c1 INT AUTO_INCREMENT KEY, c2 CHAR(10), c3 VARCHAR(100), c4 DATE, c5 TEXT)
+  ROW_FORMAT=COMPRESSED  KEY_BLOCK_SIZE=2  ENGINE=InnoDB
+  PARTITION BY HASH(c1) (
+    PARTITION p0  DATA DIRECTORY = '$MYSQL_TMP_DIR/alt_dir',
+    PARTITION p1  DATA DIRECTORY = '$MYSQL_TMP_DIR/alt_dir',
+    PARTITION p2  DATA DIRECTORY = '$MYSQL_TMP_DIR/alt_dir');
+INSERT INTO t6_restart VALUES (0, 'MySQL', 'InnoDB', '2011-11-11', 'Read this after reboot');
+INSERT INTO t6_restart (SELECT 0, c2, c3, c4, c5 FROM t6_restart);
+INSERT INTO t6_restart (SELECT 0, c2, c3, c4, c5 FROM t6_restart);
+INSERT INTO t6_restart (SELECT 0, c2, c3, c4, c5 FROM t6_restart);
+INSERT INTO t6_restart (SELECT 0, c2, c3, c4, c5 FROM t6_restart);
+--replace_result $MYSQL_TMP_DIR MYSQL_TMP_DIR
+SHOW CREATE TABLE t6_restart;
+SELECT count(*) FROM t6_restart;
+
+--echo #
+--echo # Create and insert records into a subpartitioned table that uses
+--echo # a remote DATA DIRECTORY for each subpartition.
+--echo #
+--replace_result $MYSQL_TMP_DIR MYSQL_TMP_DIR
+eval CREATE TABLE t7_restart(
+	c1 INT AUTO_INCREMENT KEY, c2 CHAR(10), c3 VARCHAR(100), c4 DATE, c5 TEXT)
+	ROW_FORMAT=DYNAMIC  ENGINE=InnoDB
+        PARTITION BY RANGE(c1) SUBPARTITION BY HASH(c1) (
+          PARTITION p0 VALUES LESS THAN (10) (
+	    SUBPARTITION s0 DATA DIRECTORY = '$MYSQL_TMP_DIR/alt_dir',
+	    SUBPARTITION s1 DATA DIRECTORY = '$MYSQL_TMP_DIR/alt_dir'),
+	  PARTITION p1 VALUES LESS THAN MAXVALUE (
+	    SUBPARTITION s2 DATA DIRECTORY = '$MYSQL_TMP_DIR/alt_dir',
+	    SUBPARTITION s3 DATA DIRECTORY = '$MYSQL_TMP_DIR/alt_dir'));
+INSERT INTO t7_restart VALUES (0, 'MySQL', 'InnoDB', '2011-11-11', 'Read this after reboot');
+INSERT INTO t7_restart (SELECT 0, c2, c3, c4, c5 FROM t7_restart);
+INSERT INTO t7_restart (SELECT 0, c2, c3, c4, c5 FROM t7_restart);
+INSERT INTO t7_restart (SELECT 0, c2, c3, c4, c5 FROM t7_restart);
+INSERT INTO t7_restart (SELECT 0, c2, c3, c4, c5 FROM t7_restart);
+--replace_result $MYSQL_TMP_DIR MYSQL_TMP_DIR
+SHOW CREATE TABLE t7_restart;
+SELECT count(*) FROM t7_restart;
+
+--echo #
+--echo # Create and insert records into a table that uses a general tablespace.
+--echo #
+CREATE TABLESPACE s1_restart ADD DATAFILE 's1_restart.ibd';
+CREATE TABLE t8_restart(c1 DOUBLE AUTO_INCREMENT KEY, c2 CHAR(10), c3 VARCHAR(100), c4 DATE, c5 TEXT)
+	ROW_FORMAT=COMPACT  ENGINE=InnoDB TABLESPACE=s1_restart;
+INSERT INTO t8_restart VALUES (1000000000, 'MySQL', 'InnoDB', '2011-11-11', 'Read this after reboot');
+INSERT INTO t8_restart (SELECT 0, c2, c3, c4, c5 FROM t2_restart);
+INSERT INTO t8_restart (SELECT 0, c2, c3, c4, c5 FROM t2_restart);
+INSERT INTO t8_restart (SELECT 0, c2, c3, c4, c5 FROM t2_restart);
+INSERT INTO t8_restart (SELECT 0, c2, c3, c4, c5 FROM t2_restart);
+SHOW CREATE TABLE t8_restart;
+SELECT count(*) FROM t8_restart;
+CREATE TABLE t9_restart(c1 DOUBLE AUTO_INCREMENT KEY, c2 CHAR(10), c3 VARCHAR(100), c4 DATE, c5 TEXT)
+	ROW_FORMAT=DYNAMIC  ENGINE=InnoDB TABLESPACE=s1_restart;
+INSERT INTO t9_restart VALUES (1000000000, 'MySQL', 'InnoDB', '2011-11-11', 'Read this after reboot');
+INSERT INTO t9_restart (SELECT 0, c2, c3, c4, c5 FROM t2_restart);
+INSERT INTO t9_restart (SELECT 0, c2, c3, c4, c5 FROM t2_restart);
+INSERT INTO t9_restart (SELECT 0, c2, c3, c4, c5 FROM t2_restart);
+INSERT INTO t9_restart (SELECT 0, c2, c3, c4, c5 FROM t2_restart);
+SHOW CREATE TABLE t9_restart;
+SELECT count(*) FROM t9_restart;
+
+--echo #
+--echo # Show these tables in information_schema.
+--echo #
+--source suite/innodb/include/show_i_s_tables.inc
+--source suite/innodb/include/show_i_s_tablespaces.inc
+
+--echo #
+--echo # Shutdown the server and list the tablespace OS files
+--echo #
+--source include/shutdown_mysqld.inc
+
+--echo ---- MYSQL_DATA_DIR/test
+--replace_result #P# #p#  #SP# #sp#
+--list_files $MYSQL_DATA_DIR/test
+--echo ---- MYSQL_TMP_DIR/alt_dir
+--list_files $MYSQL_TMP_DIR/alt_dir
+--echo ---- MYSQL_TMP_DIR/alt_dir/test
+--replace_result #P# #p#  #SP# #sp#
+--list_files $MYSQL_TMP_DIR/alt_dir/test
+
+--echo #
+--echo # Start the server and show that tables are still visible and accessible.
+--echo #
+--source include/start_mysqld.inc
+
+SHOW VARIABLES LIKE 'innodb_file_per_table';
+SHOW CREATE TABLE t1_restart;
+SHOW CREATE TABLE t2_restart;
+SHOW CREATE TABLE t3_restart;
+SHOW CREATE TABLE t4_restart;
+--replace_result $MYSQL_TMP_DIR MYSQL_TMP_DIR
+SHOW CREATE TABLE t5_restart;
+--replace_result $MYSQL_TMP_DIR MYSQL_TMP_DIR
+SHOW CREATE TABLE t6_restart;
+--replace_result $MYSQL_TMP_DIR MYSQL_TMP_DIR
+SHOW CREATE TABLE t7_restart;
+SHOW CREATE TABLE t8_restart;
+SHOW CREATE TABLE t9_restart;
+
+INSERT INTO t1_restart (SELECT 0, c2, c3, c4, c5 FROM t1_restart);
+INSERT INTO t2_restart (SELECT 0, c2, c3, c4, c5 FROM t2_restart);
+INSERT INTO t3_restart (SELECT 0, c2, c3, c4, c5 FROM t3_restart);
+INSERT INTO t4_restart (SELECT 0, c2, c3, c4, c5 FROM t4_restart);
+INSERT INTO t5_restart (SELECT 0, c2, c3, c4, c5 FROM t5_restart);
+INSERT INTO t6_restart (SELECT 0, c2, c3, c4, c5 FROM t6_restart);
+INSERT INTO t7_restart (SELECT 0, c2, c3, c4, c5 FROM t7_restart);
+INSERT INTO t8_restart (SELECT 0, c2, c3, c4, c5 FROM t8_restart);
+INSERT INTO t9_restart (SELECT 0, c2, c3, c4, c5 FROM t9_restart);
+
+SELECT count(*) FROM t1_restart;
+SELECT count(*) FROM t2_restart;
+SELECT count(*) FROM t3_restart;
+SELECT count(*) FROM t4_restart;
+SELECT count(*) FROM t5_restart;
+SELECT count(*) FROM t6_restart;
+SELECT count(*) FROM t7_restart;
+SELECT count(*) FROM t8_restart;
+SELECT count(*) FROM t9_restart;
+
+--echo #
+--echo # Show these tables in information_schema.
+--echo #
+--source suite/innodb/include/show_i_s_tables.inc
+--source suite/innodb/include/show_i_s_tablespaces.inc
+
+DROP TABLE t1_restart;
+DROP TABLE t2_restart;
+DROP TABLE t3_restart;
+# Tablespace for t4_restart will be moved later from default directory to a new directory
+# and an ISL file will be created not using InnoDB.
+# Table t5_restart will be expanded.
+# Tables t6_restart and t7_restart will be truncated.
+DROP TABLE t8_restart;
+DROP TABLE t9_restart;
+DROP TABLESPACE s1_restart;
+
+--echo #
+--echo # Truncate the remote tablespaces.
+--echo #
+TRUNCATE TABLE t5_restart;
+ALTER TABLE t6_restart TRUNCATE PARTITION p2;
+ALTER TABLE t7_restart TRUNCATE PARTITION p1;
+
+--source suite/innodb/include/show_i_s_tablespaces.inc
+
+INSERT INTO t5_restart VALUES (1000000000, 'MySQL', 'InnoDB', '2011-11-11', 'Read this after reboot');
+INSERT INTO t5_restart (SELECT 0, c2, c3, c4, c5 FROM t5_restart);
+INSERT INTO t5_restart (SELECT 0, c2, c3, c4, c5 FROM t5_restart);
+INSERT INTO t5_restart (SELECT 0, c2, c3, c4, c5 FROM t5_restart);
+
+SELECT count(*) FROM t5_restart;
+--replace_result $MYSQL_TMP_DIR MYSQL_TMP_DIR
+SHOW CREATE TABLE t5_restart;
+
+SELECT count(*) FROM t6_restart;
+--replace_result $MYSQL_TMP_DIR MYSQL_TMP_DIR
+SHOW CREATE TABLE t6_restart;
+
+SELECT count(*) FROM t7_restart;
+--replace_result $MYSQL_TMP_DIR MYSQL_TMP_DIR
+SHOW CREATE TABLE t7_restart;
+
+--echo #
+--echo # Shutdown the server and make a backup of a tablespace
+--echo #
+--source include/shutdown_mysqld.inc
+
+--copy_file $MYSQL_TMP_DIR/alt_dir/test/t5_restart.ibd $MYSQL_TMP_DIR/alt_dir/test/t5_restart.ibd.bak
+--copy_file $MYSQL_DATA_DIR/test/t5_restart.isl $MYSQL_DATA_DIR/test/t5_restart.isl.bak
+--copy_file $MYSQL_DATA_DIR/test/t5_restart.frm $MYSQL_DATA_DIR/test/t5_restart.frm.bak
+
+--echo ---- MYSQL_DATA_DIR/test
+--replace_result #P# #p#  #SP# #sp#
+--list_files $MYSQL_DATA_DIR/test
+--echo ---- MYSQL_TMP_DIR/alt_dir/test
+--replace_result #P# #p#  #SP# #sp#
+--list_files $MYSQL_TMP_DIR/alt_dir/test
+
+--echo #
+--echo # Start the server and show the tablespaces.
+--echo #
+--source include/start_mysqld.inc
+
+SHOW VARIABLES LIKE 'innodb_file_per_table';
+
+--source suite/innodb/include/show_i_s_tablespaces.inc
+
+SELECT count(*) FROM t5_restart;
+--replace_result $MYSQL_TMP_DIR MYSQL_TMP_DIR
+SHOW CREATE TABLE t5_restart;
+
+SELECT count(*) FROM t6_restart;
+--replace_result $MYSQL_TMP_DIR MYSQL_TMP_DIR
+SHOW CREATE TABLE t6_restart;
+
+SELECT count(*) FROM t7_restart;
+--replace_result $MYSQL_TMP_DIR MYSQL_TMP_DIR
+SHOW CREATE TABLE t7_restart;
+
+--echo #
+--echo # Try to rename a tablespace to a file that already exists
+--echo #
+
+--copy_file $MYSQL_DATA_DIR/test/t5_restart.frm.bak $MYSQL_DATA_DIR/test/t55_restart.frm
+--error ER_TABLE_EXISTS_ERROR
+RENAME TABLE t5_restart TO t55_restart;
+--remove_file $MYSQL_DATA_DIR/test/t55_restart.frm
+--remove_file $MYSQL_DATA_DIR/test/t5_restart.frm.bak
+
+--copy_file $MYSQL_DATA_DIR/test/t5_restart.isl.bak $MYSQL_DATA_DIR/test/t55_restart.isl
+--error ER_ERROR_ON_RENAME
+RENAME TABLE t5_restart TO t55_restart;
+--remove_file $MYSQL_DATA_DIR/test/t55_restart.isl
+--remove_file $MYSQL_DATA_DIR/test/t5_restart.isl.bak
+
+#--copy_file $MYSQL_TMP_DIR/alt_dir/test/t5_restart.ibd.bak $MYSQL_TMP_DIR/alt_dir/test/t55_restart.ibd
+# This RENAME TABLE works of Linux but gets ER_ERROR_ON_RENAME on Windows
+#--error ER_ERROR_ON_RENAME
+#RENAME TABLE t5_restart TO t55_restart;
+#--remove_file $MYSQL_TMP_DIR/alt_dir/test/t55_restart.ibd
+--remove_file $MYSQL_TMP_DIR/alt_dir/test/t5_restart.ibd.bak
+
+--echo ---- MYSQL_DATA_DIR/test
+--replace_result #P# #p#  #SP# #sp#
+--list_files $MYSQL_DATA_DIR/test
+--echo ---- MYSQL_TMP_DIR/alt_dir/test
+--replace_result #P# #p#  #SP# #sp#
+--list_files $MYSQL_TMP_DIR/alt_dir/test
+
+--echo #
+--echo # Rename file table and tablespace
+--echo #
+
+RENAME TABLE t5_restart TO t55_restart;
+RENAME TABLE t6_restart TO t66_restart;
+RENAME TABLE t7_restart TO t77_restart;
+
+--source suite/innodb/include/show_i_s_tablespaces.inc
+
+INSERT INTO t55_restart (SELECT 0, c2, c3, c4, c5 FROM t55_restart);
+SELECT count(*) FROM t55_restart;
+--replace_result $MYSQL_TMP_DIR MYSQL_TMP_DIR
+SHOW CREATE TABLE t55_restart;
+
+INSERT INTO t66_restart (SELECT 0, c2, c3, c4, c5 FROM t66_restart);
+SELECT count(*) FROM t66_restart;
+--replace_result $MYSQL_TMP_DIR MYSQL_TMP_DIR
+SHOW CREATE TABLE t66_restart;
+
+INSERT INTO t77_restart (SELECT 0, c2, c3, c4, c5 FROM t77_restart);
+SELECT count(*) FROM t77_restart;
+--replace_result $MYSQL_TMP_DIR MYSQL_TMP_DIR
+SHOW CREATE TABLE t77_restart;
+
+--echo ---- MYSQL_DATA_DIR/test
+--replace_result #P# #p#  #SP# #sp#
+--list_files $MYSQL_DATA_DIR/test
+--echo ---- MYSQL_TMP_DIR/alt_dir/test
+--replace_result #P# #p#  #SP# #sp#
+--list_files $MYSQL_TMP_DIR/alt_dir/test
+
+--echo #
+--echo # Restart the server
+--echo #
+--source include/restart_mysqld.inc
+SHOW VARIABLES LIKE 'innodb_file_per_table';
+
+--source suite/innodb/include/show_i_s_tablespaces.inc
+
+INSERT INTO t55_restart (SELECT 0, c2, c3, c4, c5 FROM t55_restart);
+SELECT count(*) FROM t55_restart;
+--replace_result $MYSQL_TMP_DIR MYSQL_TMP_DIR
+SHOW CREATE TABLE t55_restart;
+
+INSERT INTO t66_restart (SELECT 0, c2, c3, c4, c5 FROM t66_restart);
+SELECT count(*) FROM t66_restart;
+--replace_result $MYSQL_TMP_DIR MYSQL_TMP_DIR
+SHOW CREATE TABLE t66_restart;
+
+INSERT INTO t77_restart (SELECT 0, c2, c3, c4, c5 FROM t77_restart);
+SELECT count(*) FROM t77_restart;
+--replace_result $MYSQL_TMP_DIR MYSQL_TMP_DIR
+SHOW CREATE TABLE t77_restart;
+
+--echo #
+--echo # Shutdown the server
+--echo #
+--source include/shutdown_mysqld.inc
+
+--echo #
+--echo # Move the remote tablespaces to a new location and change the ISL files
+--echo #
+--mkdir $MYSQL_TMP_DIR/new_dir
+--mkdir $MYSQL_TMP_DIR/new_dir/test
+--echo ---- MYSQL_DATA_DIR/test
+--replace_result #P# #p#  #SP# #sp#
+--list_files $MYSQL_DATA_DIR/test
+--echo ---- MYSQL_TMP_DIR/alt_dir/test
+--replace_result #P# #p#  #SP# #sp#
+--list_files $MYSQL_TMP_DIR/alt_dir/test
+--echo ---- MYSQL_TMP_DIR/new_dir/test
+--replace_result #P# #p#  #SP# #sp#
+--list_files $MYSQL_TMP_DIR/new_dir/test
+
+--echo # Moving tablespace 't4_restart' from MYSQL_DATA_DIR to MYSQL_TMP_DIR/new_dir
+--copy_file $MYSQL_DATA_DIR/test/t4_restart.ibd $MYSQL_TMP_DIR/new_dir/test/t4_restart.ibd
+--remove_file $MYSQL_DATA_DIR/test/t4_restart.ibd
+--exec echo $MYSQL_TMP_DIR/new_dir/test/t4_restart.ibd > $MYSQL_DATA_DIR/test/t4_restart.isl
+
+--echo # Moving tablespace 't55_restart' from MYSQL_TMP_DIR/alt_dir to MYSQL_TMP_DIR/new_dir
+--copy_file $MYSQL_TMP_DIR/alt_dir/test/t55_restart.ibd $MYSQL_TMP_DIR/new_dir/test/t55_restart.ibd
+--remove_file $MYSQL_TMP_DIR/alt_dir/test/t55_restart.ibd
+--remove_file $MYSQL_DATA_DIR/test/t55_restart.isl
+--exec echo $MYSQL_TMP_DIR/new_dir/test/t55_restart.ibd > $MYSQL_DATA_DIR/test/t55_restart.isl
+
+--echo # Moving tablespace 't66_restart' from MYSQL_TMP_DIR/alt_dir to MYSQL_TMP_DIR/new_dir
+--copy_file $MYSQL_TMP_DIR/alt_dir/test/t66_restart#P#p0.ibd $MYSQL_TMP_DIR/new_dir/test/t66_restart#P#p0.ibd
+--copy_file $MYSQL_TMP_DIR/alt_dir/test/t66_restart#P#p1.ibd $MYSQL_TMP_DIR/new_dir/test/t66_restart#P#p1.ibd
+--copy_file $MYSQL_TMP_DIR/alt_dir/test/t66_restart#P#p2.ibd $MYSQL_TMP_DIR/new_dir/test/t66_restart#P#p2.ibd
+--remove_file $MYSQL_TMP_DIR/alt_dir/test/t66_restart#P#p0.ibd
+--remove_file $MYSQL_TMP_DIR/alt_dir/test/t66_restart#P#p1.ibd
+--remove_file $MYSQL_TMP_DIR/alt_dir/test/t66_restart#P#p2.ibd
+--remove_file $MYSQL_DATA_DIR/test/t66_restart#P#p0.isl
+--remove_file $MYSQL_DATA_DIR/test/t66_restart#P#p1.isl
+--remove_file $MYSQL_DATA_DIR/test/t66_restart#P#p2.isl
+--exec echo $MYSQL_TMP_DIR/new_dir/test/t66_restart#P#p0.ibd > $MYSQL_DATA_DIR/test/t66_restart#P#p0.isl
+--exec echo $MYSQL_TMP_DIR/new_dir/test/t66_restart#P#p1.ibd > $MYSQL_DATA_DIR/test/t66_restart#P#p1.isl
+--exec echo $MYSQL_TMP_DIR/new_dir/test/t66_restart#P#p2.ibd > $MYSQL_DATA_DIR/test/t66_restart#P#p2.isl
+
+--echo # Moving tablespace 't77_restart' from MYSQL_TMP_DIR/alt_dir to MYSQL_TMP_DIR/new_dir
+--copy_file $MYSQL_TMP_DIR/alt_dir/test/t77_restart#P#p0#SP#s0.ibd $MYSQL_TMP_DIR/new_dir/test/t77_restart#P#p0#SP#s0.ibd
+--copy_file $MYSQL_TMP_DIR/alt_dir/test/t77_restart#P#p0#SP#s1.ibd $MYSQL_TMP_DIR/new_dir/test/t77_restart#P#p0#SP#s1.ibd
+--copy_file $MYSQL_TMP_DIR/alt_dir/test/t77_restart#P#p1#SP#s2.ibd $MYSQL_TMP_DIR/new_dir/test/t77_restart#P#p1#SP#s2.ibd
+--copy_file $MYSQL_TMP_DIR/alt_dir/test/t77_restart#P#p1#SP#s3.ibd $MYSQL_TMP_DIR/new_dir/test/t77_restart#P#p1#SP#s3.ibd
+--remove_file $MYSQL_TMP_DIR/alt_dir/test/t77_restart#P#p0#SP#s0.ibd
+--remove_file $MYSQL_TMP_DIR/alt_dir/test/t77_restart#P#p0#SP#s1.ibd
+--remove_file $MYSQL_TMP_DIR/alt_dir/test/t77_restart#P#p1#SP#s2.ibd
+--remove_file $MYSQL_TMP_DIR/alt_dir/test/t77_restart#P#p1#SP#s3.ibd
+--remove_file $MYSQL_DATA_DIR/test/t77_restart#P#p0#SP#s0.isl
+--remove_file $MYSQL_DATA_DIR/test/t77_restart#P#p0#SP#s1.isl
+--remove_file $MYSQL_DATA_DIR/test/t77_restart#P#p1#SP#s2.isl
+--remove_file $MYSQL_DATA_DIR/test/t77_restart#P#p1#SP#s3.isl
+--exec echo $MYSQL_TMP_DIR/new_dir/test/t77_restart#P#p0#SP#s0.ibd > $MYSQL_DATA_DIR/test/t77_restart#P#p0#SP#s0.isl
+--exec echo $MYSQL_TMP_DIR/new_dir/test/t77_restart#P#p0#SP#s1.ibd > $MYSQL_DATA_DIR/test/t77_restart#P#p0#SP#s1.isl
+--exec echo $MYSQL_TMP_DIR/new_dir/test/t77_restart#P#p1#SP#s2.ibd > $MYSQL_DATA_DIR/test/t77_restart#P#p1#SP#s2.isl
+--exec echo $MYSQL_TMP_DIR/new_dir/test/t77_restart#P#p1#SP#s3.ibd > $MYSQL_DATA_DIR/test/t77_restart#P#p1#SP#s3.isl
+
+--echo ---- MYSQL_DATA_DIR/test
+--replace_result #P# #p#  #SP# #sp#
+--list_files $MYSQL_DATA_DIR/test
+--echo ---- MYSQL_TMP_DIR/alt_dir/test
+--replace_result #P# #p#  #SP# #sp#
+--list_files $MYSQL_TMP_DIR/alt_dir/test
+--echo ---- MYSQL_TMP_DIR/new_dir/test
+--replace_result #P# #p#  #SP# #sp#
+--list_files $MYSQL_TMP_DIR/new_dir/test
+
+--echo #
+--echo # Start the server and check tablespaces.
+--echo #
+--source include/start_mysqld.inc
+
+--source suite/innodb/include/show_i_s_tablespaces.inc
+
+INSERT INTO t4_restart (SELECT 0, c2, c3, c4, c5 FROM t4_restart);
+SELECT count(*) FROM t4_restart;
+--replace_result $MYSQL_TMP_DIR MYSQL_TMP_DIR
+SHOW CREATE TABLE t4_restart;
+
+INSERT INTO t55_restart (SELECT 0, c2, c3, c4, c5 FROM t55_restart);
+SELECT count(*) FROM t55_restart;
+--replace_result $MYSQL_TMP_DIR MYSQL_TMP_DIR
+SHOW CREATE TABLE t55_restart;
+
+INSERT INTO t66_restart (SELECT 0, c2, c3, c4, c5 FROM t66_restart);
+SELECT count(*) FROM t66_restart;
+--replace_result $MYSQL_TMP_DIR MYSQL_TMP_DIR
+SHOW CREATE TABLE t66_restart;
+
+INSERT INTO t77_restart (SELECT 0, c2, c3, c4, c5 FROM t77_restart);
+SELECT count(*) FROM t77_restart;
+--replace_result $MYSQL_TMP_DIR MYSQL_TMP_DIR
+SHOW CREATE TABLE t77_restart;
+
+
+--echo #
+--echo # Shutdown the server
+--echo #
+--source include/shutdown_mysqld.inc
+
+--echo #
+--echo # Move the remote tablespaces back to the default datadir and delete the ISL file.
+--echo #
+
+--echo ---- MYSQL_DATA_DIR/test
+--replace_result #P# #p#  #SP# #sp#
+--list_files $MYSQL_DATA_DIR/test
+--echo ---- MYSQL_TMP_DIR/new_dir/test
+--replace_result #P# #p#  #SP# #sp#
+--list_files $MYSQL_TMP_DIR/new_dir/test
+
+--echo # Moving 't4_restart' from MYSQL_TMP_DIR/new_dir to MYSQL_DATA_DIR
+--copy_file $MYSQL_TMP_DIR/new_dir/test/t4_restart.ibd $MYSQL_DATA_DIR/test/t4_restart.ibd
+--remove_file $MYSQL_TMP_DIR/new_dir/test/t4_restart.ibd
+--remove_file $MYSQL_DATA_DIR/test/t4_restart.isl
+
+--echo # Moving 't55_restart' from MYSQL_TMP_DIR/new_dir to MYSQL_DATA_DIR
+--copy_file $MYSQL_TMP_DIR/new_dir/test/t55_restart.ibd $MYSQL_DATA_DIR/test/t55_restart.ibd
+--remove_file $MYSQL_TMP_DIR/new_dir/test/t55_restart.ibd
+--remove_file $MYSQL_DATA_DIR/test/t55_restart.isl
+
+--echo # Moving 't66_restart' from MYSQL_TMP_DIR/new_dir to MYSQL_DATA_DIR
+--copy_file $MYSQL_TMP_DIR/new_dir/test/t66_restart#P#p0.ibd $MYSQL_DATA_DIR/test/t66_restart#P#p0.ibd
+--copy_file $MYSQL_TMP_DIR/new_dir/test/t66_restart#P#p1.ibd $MYSQL_DATA_DIR/test/t66_restart#P#p1.ibd
+--copy_file $MYSQL_TMP_DIR/new_dir/test/t66_restart#P#p2.ibd $MYSQL_DATA_DIR/test/t66_restart#P#p2.ibd
+--remove_file $MYSQL_TMP_DIR/new_dir/test/t66_restart#P#p0.ibd
+--remove_file $MYSQL_TMP_DIR/new_dir/test/t66_restart#P#p1.ibd
+--remove_file $MYSQL_TMP_DIR/new_dir/test/t66_restart#P#p2.ibd
+--remove_file $MYSQL_DATA_DIR/test/t66_restart#P#p0.isl
+--remove_file $MYSQL_DATA_DIR/test/t66_restart#P#p1.isl
+--remove_file $MYSQL_DATA_DIR/test/t66_restart#P#p2.isl
+
+--echo # Moving 't77_restart' from MYSQL_TMP_DIR/new_dir to MYSQL_DATA_DIR
+--copy_file $MYSQL_TMP_DIR/new_dir/test/t77_restart#P#p0#SP#s0.ibd $MYSQL_DATA_DIR/test/t77_restart#P#p0#SP#s0.ibd
+--copy_file $MYSQL_TMP_DIR/new_dir/test/t77_restart#P#p0#SP#s1.ibd $MYSQL_DATA_DIR/test/t77_restart#P#p0#SP#s1.ibd
+--copy_file $MYSQL_TMP_DIR/new_dir/test/t77_restart#P#p1#SP#s2.ibd $MYSQL_DATA_DIR/test/t77_restart#P#p1#SP#s2.ibd
+--copy_file $MYSQL_TMP_DIR/new_dir/test/t77_restart#P#p1#SP#s3.ibd $MYSQL_DATA_DIR/test/t77_restart#P#p1#SP#s3.ibd
+--remove_file $MYSQL_TMP_DIR/new_dir/test/t77_restart#P#p0#SP#s0.ibd
+--remove_file $MYSQL_TMP_DIR/new_dir/test/t77_restart#P#p0#SP#s1.ibd
+--remove_file $MYSQL_TMP_DIR/new_dir/test/t77_restart#P#p1#SP#s2.ibd
+--remove_file $MYSQL_TMP_DIR/new_dir/test/t77_restart#P#p1#SP#s3.ibd
+--remove_file $MYSQL_DATA_DIR/test/t77_restart#P#p0#SP#s0.isl
+--remove_file $MYSQL_DATA_DIR/test/t77_restart#P#p0#SP#s1.isl
+--remove_file $MYSQL_DATA_DIR/test/t77_restart#P#p1#SP#s2.isl
+--remove_file $MYSQL_DATA_DIR/test/t77_restart#P#p1#SP#s3.isl
+
+--echo ---- MYSQL_DATA_DIR/test
+--replace_result #P# #p#  #SP# #sp#
+--list_files $MYSQL_DATA_DIR/test
+--echo ---- MYSQL_TMP_DIR/new_dir/test
+--replace_result #P# #p#  #SP# #sp#
+--list_files $MYSQL_TMP_DIR/new_dir/test
+
+--echo #
+--echo # Start the server and check tablespaces.
+--echo #
+-- source include/start_mysqld.inc
+
+--source suite/innodb/include/show_i_s_tablespaces.inc
+
+INSERT INTO t4_restart (SELECT 0, c2, c3, c4, c5 FROM t4_restart);
+SELECT count(*) FROM t4_restart;
+SHOW CREATE TABLE t4_restart;
+
+INSERT INTO t55_restart (SELECT 0, c2, c3, c4, c5 FROM t55_restart);
+SELECT count(*) FROM t55_restart;
+SHOW CREATE TABLE t55_restart;
+
+INSERT INTO t66_restart (SELECT 0, c2, c3, c4, c5 FROM t66_restart);
+SELECT count(*) FROM t66_restart;
+SHOW CREATE TABLE t66_restart;
+
+INSERT INTO t77_restart (SELECT 0, c2, c3, c4, c5 FROM t77_restart);
+SELECT count(*) FROM t77_restart;
+SHOW CREATE TABLE t77_restart;
+
+
+--echo #
+--echo # Cleanup
+--echo #
+
+DROP TABLE t4_restart;
+DROP TABLE t55_restart;
+DROP TABLE t66_restart;
+DROP TABLE t77_restart;
+
+--rmdir $MYSQL_TMP_DIR/alt_dir/test
+--rmdir $MYSQL_TMP_DIR/alt_dir
+--rmdir $MYSQL_TMP_DIR/new_dir/test
+--rmdir $MYSQL_TMP_DIR/new_dir
+
+-- disable_query_log
+eval set global innodb_file_per_table=$innodb_file_per_table_orig;
+-- enable_query_log
+
diff --git a/mysql-test/suite/innodb_zip/t/wl6344_compress_level.test b/mysql-test/suite/innodb_zip/t/wl6344_compress_level.test
new file mode 100644
index 00000000000..df4e66967f7
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/t/wl6344_compress_level.test
@@ -0,0 +1,135 @@
+#*******************************************************************
+# This testcase is to test the funcitionality of wl#6344
+# When the innodb_compression_level=0 create a table with page size
+# 1K and load data
+# When the innodb_compression_level=9 create a table with page size
+# 1K and load data
+# compare the size of the both tables.
+# The size of the table when compression level=0 should be
+# greater than the
+# the size of the table when compression level=9
+#*******************************************************************
+--source include/have_innodb.inc
+--source include/have_innodb_zip.inc
+
+USE test;
+DROP TABLE IF EXISTS tab5;
+DROP TABLE IF EXISTS tab6;
+
+--echo #set the other madatory flags before test starts
+SET GLOBAL Innodb_file_per_table=on;
+let $innodb_compression_level = `SELECT @@global.innodb_compression_level`;
+
+--echo #set the compression level=0  (No compress)
+SET global innodb_compression_level=0;
+
+-- echo #check the compression level and the compressed_pages is default
+SELECT @@innodb_compression_level;
+SELECT @@Innodb_file_per_table;
+
+-- echo #create table with 1K block size
+CREATE TABLE tab5 (col_1 CHAR (255) ,
+col_2 VARCHAR (255), col_3 longtext,
+col_4 longtext,col_5 longtext,
+col_6 longtext , col_7 longtext ,
+col_8 longtext ,col_9 longtext ,
+col_10 longtext ,col_11  int auto_increment primary key) 
+ENGINE = innodb ROW_FORMAT=compressed key_block_size=1;
+
+-- echo #create indexes
+CREATE INDEX idx1 ON tab5(col_4(10));
+CREATE INDEX idx2 ON tab5(col_5(10));
+CREATE INDEX idx3 ON tab5(col_6(10));
+CREATE INDEX idx4 ON tab5(col_7(10));
+CREATE INDEX idx5 ON tab5(col_8(10));
+CREATE INDEX idx6 ON tab5(col_11);
+
+--echo #load the with repeat function
+SET @col_1 = repeat('a', 100);
+SET @col_2 = repeat('b', 100);
+SET @col_3 = repeat('c', 100);
+SET @col_4 = repeat('d', 100);
+SET @col_5 = repeat('e', 100);
+SET @col_6 = repeat('f', 100);
+SET @col_7 = repeat('g', 100);
+SET @col_8 = repeat('h', 100);
+SET @col_9 = repeat('i', 100);
+SET @col_10 = repeat('j', 100);
+
+--echo #insert  10 records 
+let $i = 10;
+while ($i) {
+
+eval INSERT INTO tab5(col_1,col_2,col_3,col_4,col_5,col_6,col_7,col_8,col_9,col_10)
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9,@col_10);
+dec $i;
+
+}
+
+--echo #set the compression level=9 (High compress) 
+SET global innodb_compression_level=9;
+
+-- echo #create table with 1K block size
+CREATE TABLE tab6 (col_1 CHAR (255) ,
+col_2 VARCHAR (255), col_3 longtext,
+col_4 longtext,col_5 longtext,
+col_6 longtext , col_7 longtext ,
+col_8 longtext ,col_9 longtext ,
+col_10 longtext ,col_11  int auto_increment primary key) 
+ENGINE = innodb ROW_FORMAT=compressed key_block_size=1;
+
+-- echo #create indexes
+CREATE INDEX idx1 ON tab6(col_4(10));
+CREATE INDEX idx2 ON tab6(col_5(10));
+CREATE INDEX idx3 ON tab6(col_6(10));
+CREATE INDEX idx4 ON tab6(col_7(10));
+CREATE INDEX idx5 ON tab6(col_8(10));
+CREATE INDEX idx6 ON tab6(col_11);
+
+--echo #load the with repeat function
+SET @col_1 = repeat('a', 100);
+SET @col_2 = repeat('b', 100);
+SET @col_3 = repeat('c', 100);
+SET @col_4 = repeat('d', 100);
+SET @col_5 = repeat('e', 100);
+SET @col_6 = repeat('f', 100);
+SET @col_7 = repeat('g', 100);
+SET @col_8 = repeat('h', 100);
+SET @col_9 = repeat('i', 100);
+SET @col_10 = repeat('j', 100);
+
+--echo #insert  10 records 
+let $i = 10;
+while ($i) {
+
+eval INSERT INTO tab6(col_1,col_2,col_3,col_4,col_5,col_6,col_7,col_8,col_9,col_10)
+VALUES (@col_1,@col_2,@col_3,@col_4,@cl_5,@col_6,@col_7,@col_8,@col_9,@col_10);
+dec $i;
+}
+
+-- echo #diff the sizes of the No compressed table and high compressed table
+SET @size=(SELECT 
+(SELECT (SUM(DATA_LENGTH+INDEX_LENGTH)/1024/1024) 
+FROM INFORMATION_SCHEMA.TABLES 
+WHERE table_name='tab5' AND ENGINE='InnoDB'  AND table_schema='test')
+-
+(SELECT SUM(DATA_LENGTH+INDEX_LENGTH)/1024/1024 
+FROM INFORMATION_SCHEMA.TABLES 
+WHERE table_name='tab6' AND ENGINE='InnoDB' AND table_schema='test')
+FROM DUAL);
+
+--echo #check the size of the table, it should not be Negative value 
+--echo #The results of this query Test pass = 1 and fail=0
+SELECT @size >= 0;
+
+
+--echo #
+--echo # Cleanup
+--echo #
+DROP TABLE tab5;
+DROP TABLE tab6;
+
+--echo #reset back the compression_level to default.
+--disable_query_log
+eval SET GLOBAL innodb_compression_level=$innodb_compression_level;
+--enable_query_log
diff --git a/mysql-test/suite/innodb_zip/t/wl6347_comp_indx_stat.test b/mysql-test/suite/innodb_zip/t/wl6347_comp_indx_stat.test
new file mode 100644
index 00000000000..445fd812183
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/t/wl6347_comp_indx_stat.test
@@ -0,0 +1,1337 @@
+--echo #******************************************************************
+--echo # Test 1: Test the interaction between stats and compression level
+--echo #         and logging of compressed pages configuration
+--echo # This testcase is to verify the table/idex level compression stats
+--echo # When the flags are set as follows
+--echo # innodb_cmp_per_index_enabled=ON and innodb_compression_level=0
+--echo # page size 1K,2K,4K,8K,16K
+--echo # check the size and compression stats of the table tab5
+--echo #******************************************************************
+
+# This test case needs InnoDB.
+-- source include/have_innodb.inc
+-- source include/not_embedded.inc
+-- source include/have_innodb_16k.inc
+-- source include/big_test.inc
+
+-- vertical_results
+
+let MYSQLD_DATADIR=`SELECT @@datadir`;
+let $innodb_compression_level = `SELECT @@global.innodb_compression_level`;
+
+--echo # set the flags
+SET GLOBAL innodb_file_per_table=on;
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+SET GLOBAL innodb_compression_level=0;
+
+--echo #******************************************************************
+--echo # Test 1-1K: innodb_cmp_per_index_enabled=ON and innodb_compression_level=0 with page size 1K
+--echo #******************************************************************
+
+--echo # create a table with page size=1K
+--echo # create indexes on each column.(total 9 indexes)
+let $block_size=1;
+--source suite/innodb_zip/include/innodb_create_tab_indx.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # for deterministic reasons simple data should be inserted.
+--echo # insert some 100 records
+let $i = 100;
+--source suite/innodb_zip/include/innodb_load_data.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=2;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # fetch the compressed page and check the stats
+--source suite/innodb_zip/include/innodb_fetch_records.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=2;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # fetch the compressed same page once again and check the stats
+--echo # the stat figures should be same as above query
+--source suite/innodb_zip/include/innodb_fetch_records.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=2;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+-- source include/restart_mysqld.inc
+
+--echo # set the flag on (default off)
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+
+--echo # set the flags
+SET GLOBAL innodb_file_per_table=on;
+SET GLOBAL innodb_compression_level=0;
+
+--echo # fetch the compressed page and check the stats
+--echo # The stats figure may be different/same for each restart.
+--source suite/innodb_zip/include/innodb_fetch_records.inc
+
+--echo # check the stats of the table
+-- echo # testcase : pass = 1 fail = 0
+SET @comp_val=0;
+SET @uncomp_val=1;
+--source suite/innodb_zip/include/innodb_stats_restart.inc
+
+DROP TABLE tab5;
+
+--echo #******************************************************************
+--echo # Test 1-2K: innodb_cmp_per_index_enabled=ON and innodb_compression_level=0 with page size 2K
+--echo #******************************************************************
+
+--echo # reset the stat table before starting next testcase
+SET GLOBAL innodb_cmp_per_index_enabled=0;
+SET GLOBAL innodb_cmp_per_index_enabled=1;
+
+--echo # create a table with page size=2K
+--echo # create indexes on each column.(total 9 indexes)
+let $block_size=2;
+--source suite/innodb_zip/include/innodb_create_tab_indx.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # for determintic resons simple data should be inserted.
+--echo # insert some 100 records
+let $i = 100;
+--source suite/innodb_zip/include/innodb_load_data.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=2;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # fetch the compressed page and check the stats
+--source suite/innodb_zip/include/innodb_fetch_records.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=2;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # fetch the compressed same page once again and check the stats
+--echo # the stat figures should be same as above query
+--source suite/innodb_zip/include/innodb_fetch_records.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=2;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+-- source include/restart_mysqld.inc
+
+--echo # set the flag on (default off)
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+
+--echo # set the flags
+SET GLOBAL innodb_file_per_table=on;
+SET GLOBAL innodb_compression_level=0;
+
+--echo # fetch the compressed page and check the stats
+--echo # The stats figure may be different/same for each restart.
+--source suite/innodb_zip/include/innodb_fetch_records.inc
+
+--echo # check the stats of the table
+-- echo # testcase : pass = 1 fail = 0
+SET @comp_val=0;
+SET @uncomp_val=2;
+--source suite/innodb_zip/include/innodb_stats_restart.inc
+
+DROP TABLE tab5;
+
+--echo #******************************************************************
+--echo # Test 1-4K: innodb_cmp_per_index_enabled=ON and innodb_compression_level=0 with page size 4K
+--echo #******************************************************************
+
+--echo # reset the stat table before starting next testcase
+SET GLOBAL innodb_cmp_per_index_enabled=0;
+SET GLOBAL innodb_cmp_per_index_enabled=1;
+
+--echo # create a table with page size=4K
+--echo # create indexes on each column.(total 9 indexes)
+let $block_size=4;
+--source suite/innodb_zip/include/innodb_create_tab_indx.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # for determintic resons simple data should be inserted.
+--echo # insert some 100 records
+let $i = 100;
+--source suite/innodb_zip/include/innodb_load_data.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # fetch the compressed page and check the stats
+--source suite/innodb_zip/include/innodb_fetch_records.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # fetch the compressed same page once again and check the stats
+--echo # the stat figures should be same as above query
+--source suite/innodb_zip/include/innodb_fetch_records.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+DROP TABLE tab5;
+
+--echo #******************************************************************
+--echo # Test 1-8K: innodb_cmp_per_index_enabled=ON and innodb_compression_level=0 with page size 8K
+--echo #******************************************************************
+
+--echo # reset the stat table before starting next testcase
+SET GLOBAL innodb_cmp_per_index_enabled=0;
+SET GLOBAL innodb_cmp_per_index_enabled=1;
+
+--echo # create a table with page size=8K
+--echo # create indexes on each column.(total 9 indexes)
+let $block_size=8;
+--source suite/innodb_zip/include/innodb_create_tab_indx.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # for determintic resons simple data should be inserted.
+--echo # insert some 100 records
+let $i = 100;
+--source suite/innodb_zip/include/innodb_load_data.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # fetch the compressed page and check the stats
+--source suite/innodb_zip/include/innodb_fetch_records.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # fetch the compressed same page once again and check the stats
+--echo # the stat figures should be same as above query
+--source suite/innodb_zip/include/innodb_fetch_records.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+DROP TABLE tab5;
+
+--echo #******************************************************************
+--echo # Test 1-16K: innodb_cmp_per_index_enabled=ON and innodb_compression_level=0 with page size 16K
+--echo #******************************************************************
+
+--echo # reset the stat table before starting next testcase
+SET GLOBAL innodb_cmp_per_index_enabled=0;
+SET GLOBAL innodb_cmp_per_index_enabled=1;
+
+--echo # create a table with page size=16K
+--echo # create indexes on each column.(total 9 indexes)
+let $block_size=16;
+--source suite/innodb_zip/include/innodb_create_tab_indx.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # for determintic resons simple data should be inserted.
+--echo # insert some 100 records
+let $i = 100;
+--source suite/innodb_zip/include/innodb_load_data.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # fetch the compressed page and check the stats
+--source suite/innodb_zip/include/innodb_fetch_records.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # fetch the compressed same page once again and check the stats
+--echo # the stat figures should be same as above query
+--source suite/innodb_zip/include/innodb_fetch_records.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+DROP TABLE tab5;
+
+--echo #******************************************************************
+--echo # Test 2: test the interaction between wl6347 & wl6344 (2.2)
+--echo # This testcase is to verify the table/idex level compression stats
+--echo # When the flags are set as follows
+--echo # innodb_cmp_per_index_enabled=ON and innodb_compression_level=9
+--echo # page size 1K,2K,4K,8K,16K
+--echo # check the size and compression stats of the table tab5
+--echo #******************************************************************
+
+--echo # reset the stat table before starting next testcase
+SET GLOBAL innodb_cmp_per_index_enabled=0;
+SET GLOBAL innodb_cmp_per_index_enabled=1;
+
+--echo # set the flags
+SET GLOBAL innodb_file_per_table=on;
+SET GLOBAL innodb_compression_level=9;
+
+
+--echo #******************************************************************
+--echo # Test 2-1K: innodb_cmp_per_index_enabled=ON and innodb_compression_level=9 with page size 1K
+--echo #******************************************************************
+
+--echo # create a table with page size=1K
+--echo # create indexes on each column.(total 9 indexes)
+let $block_size=1;
+--source suite/innodb_zip/include/innodb_create_tab_indx.inc
+
+--echo # check the stats of the table & size of the table
+--echo # The size of the file with 0 compress = 65536
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # for determintic resons simple data should be inserted.
+--echo # insert some 100 records
+let $i = 100;
+--source suite/innodb_zip/include/innodb_load_data.inc
+
+--echo # check the stats of the table & size of the table
+--echo # The size of the file with 0 compress = 2097152
+SET @inl_val=2;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # fetch the compressed page and check the stats
+--source suite/innodb_zip/include/innodb_fetch_records.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=2;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # fetch the compressed same page once again and check the stats
+--echo # the stat figures should be same as above query
+--source suite/innodb_zip/include/innodb_fetch_records.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=2;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+-- source include/restart_mysqld.inc
+
+--echo # set the flag on (default off)
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+
+--echo # set the flags
+SET GLOBAL innodb_file_per_table=on;
+SET GLOBAL innodb_compression_level=9;
+
+
+--echo # fetch the compressed page and check the stats
+--echo # The stats figure may be different/same for each restart.
+--source suite/innodb_zip/include/innodb_fetch_records.inc
+
+--echo # check the stats of the table
+-- echo # testcase : pass = 1 fail = 0
+SET @comp_val=0;
+SET @uncomp_val=1;
+--source suite/innodb_zip/include/innodb_stats_restart.inc
+
+DROP TABLE tab5;
+
+--echo #******************************************************************
+--echo # Test 2-2K: innodb_cmp_per_index_enabled=ON and innodb_compression_level=9 with page size 2K
+--echo #******************************************************************
+
+--echo # reset the stat table before starting next testcase
+SET GLOBAL innodb_cmp_per_index_enabled=OFF;
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+
+--echo # create a table with page size=2K
+--echo # create indexes on each column.(total 9 indexes)
+let $block_size=2;
+--source suite/innodb_zip/include/innodb_create_tab_indx.inc
+
+--echo # check the stats of the table & size of the table
+--echo # The size of the file with 0 compress = 65536
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # for determintic resons simple data should be inserted.
+--echo # insert some 100 records
+let $i = 100;
+--source suite/innodb_zip/include/innodb_load_data.inc
+
+--echo # check the stats of the table & size of the table
+--echo # The size of the file with 0 compress = 2097152
+SET @inl_val=2;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # fetch the compressed page and check the stats
+--source suite/innodb_zip/include/innodb_fetch_records.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=2;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # fetch the compressed same page once again and check the stats
+--echo # the stat figures should be same as above query
+--source suite/innodb_zip/include/innodb_fetch_records.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=2;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+-- source include/restart_mysqld.inc
+
+--echo # set the flag on (default off)
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+
+--echo # set the flags
+SET GLOBAL innodb_file_per_table=on;
+SET GLOBAL innodb_compression_level=9;
+
+
+--echo # fetch the compressed page and check the stats
+--echo # The stats figure may be different/same for each restart.
+--source suite/innodb_zip/include/innodb_fetch_records.inc
+
+--echo # check the stats of the table
+-- echo # testcase : pass = 1 fail = 0
+SET @comp_val=0;
+SET @uncomp_val=1;
+--source suite/innodb_zip/include/innodb_stats_restart.inc
+
+DROP TABLE tab5;
+
+--echo #******************************************************************
+--echo # Test 2-4K: innodb_cmp_per_index_enabled=ON and innodb_compression_level=9 with page size 4K
+--echo #******************************************************************
+
+--echo # reset the stat table before starting next testcase
+SET GLOBAL innodb_cmp_per_index_enabled=OFF;
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+
+--echo # create a table with page size=4K
+--echo # create indexes on each column.(total 9 indexes)
+let $block_size=4;
+--source suite/innodb_zip/include/innodb_create_tab_indx.inc
+
+--echo # check the stats of the table & size of the table
+--echo # The size of the file with 0 compress = 65536
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # for determintic resons simple data should be inserted.
+--echo # insert some 100 records
+let $i = 100;
+--source suite/innodb_zip/include/innodb_load_data.inc
+
+--echo # check the stats of the table & size of the table
+--echo # The size of the file with 0 compress = 159744
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # fetch the compressed page and check the stats
+--source suite/innodb_zip/include/innodb_fetch_records.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # fetch the compressed same page once again and check the stats
+--echo # the stat figures should be same as above query
+--source suite/innodb_zip/include/innodb_fetch_records.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+DROP TABLE tab5;
+
+--echo #******************************************************************
+--echo # Test 2-8K: innodb_cmp_per_index_enabled=ON and innodb_compression_level=9 with page size 8K
+--echo #******************************************************************
+
+--echo # reset the stat table before starting next testcase
+SET GLOBAL innodb_cmp_per_index_enabled=OFF;
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+
+--echo # create a table with page size=8K
+--echo # create indexes on each column.(total 9 indexes)
+let $block_size=8;
+--source suite/innodb_zip/include/innodb_create_tab_indx.inc
+
+--echo # check the stats of the table & size of the table
+--echo # The size of the file with 0 compress = 122880
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # for determintic resons simple data should be inserted.
+--echo # insert some 100 records
+let $i = 100;
+--source suite/innodb_zip/include/innodb_load_data.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # fetch the compressed page and check the stats
+--source suite/innodb_zip/include/innodb_fetch_records.inc
+
+--echo # check the stats of the table & size of the table
+--echo # The size of the file with 0 compress = 212992
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # fetch the compressed same page once again and check the stats
+--echo # the stat figures should be same as above query
+--source suite/innodb_zip/include/innodb_fetch_records.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+DROP TABLE tab5;
+
+--echo #******************************************************************
+--echo # Test 2-16K: innodb_cmp_per_index_enabled=ON and innodb_compression_level=9 with page size 16K
+--echo #******************************************************************
+
+--echo # reset the stat table before starting next testcase
+SET GLOBAL innodb_cmp_per_index_enabled=OFF;
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+
+--echo # create a table with page size=16K
+--echo # create indexes on each column.(total 9 indexes)
+let $block_size=16;
+--source suite/innodb_zip/include/innodb_create_tab_indx.inc
+
+--echo # check the stats of the table & size of the table
+--echo # The size of the file with 0 compress = 245760
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # for determintic resons simple data should be inserted.
+--echo # insert some 100 records
+let $i = 100;
+--source suite/innodb_zip/include/innodb_load_data.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # fetch the compressed page and check the stats
+--source suite/innodb_zip/include/innodb_fetch_records.inc
+
+--echo # check the stats of the table & size of the table
+--echo # The size of the file with 0 compress = 344064
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # fetch the compressed same page once again and check the stats
+--echo # the stat figures should be same as above query
+--source suite/innodb_zip/include/innodb_fetch_records.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+DROP TABLE tab5;
+
+--echo #******************************************************************
+--echo # Test 3: test the interaction between wl6347 & wl6344 (2.3)
+--echo # This testcase is to verify the table/idex level compression stats
+--echo # When the flags are set as follows
+--echo # innodb_cmp_per_index_enabled=ON and
+--echo # innodb_compression_level=6 (default)
+--echo # page size 1K,2K,4K,8K,16K
+--echo # check the size and compression stats of the table tab5
+--echo #******************************************************************
+
+--echo #******************************************************************
+--echo # Test 3-1K: innodb_cmp_per_index_enabled=ON and innodb_compression_level=Def with page size 1K
+--echo #******************************************************************
+
+--echo # reset the stat table before starting next testcase
+SET GLOBAL innodb_cmp_per_index_enabled=OFF;
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+
+SET GLOBAL innodb_compression_level=default;
+
+--echo # create a table with page size=1K
+--echo # create indexes on each column.(total 9 indexes)
+let $block_size=1;
+--source suite/innodb_zip/include/innodb_create_tab_indx.inc
+
+--echo # check the stats of the table & size of the table
+--echo # The size of the file with 0 compress = 65536
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # for determintic resons simple data should be inserted.
+--echo # insert some 100 records
+let $i = 100;
+--source suite/innodb_zip/include/innodb_load_data.inc
+
+--echo # check the stats of the table & size of the table
+--echo # The size of the file with 0 compress = 65536
+SET @inl_val=2;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # fetch the compressed page and check the stats
+--source suite/innodb_zip/include/innodb_fetch_records.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=2;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # fetch the compressed same page once again and check the stats
+--echo # the stat figures should be same as above query
+--source suite/innodb_zip/include/innodb_fetch_records.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=2;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+DROP TABLE tab5;
+
+--echo #******************************************************************
+--echo # Test 3-2K: innodb_cmp_per_index_enabled=ON and innodb_compression_level=Def with page size 2K
+--echo #******************************************************************
+
+--echo # reset the stat table before starting next testcase
+SET GLOBAL innodb_cmp_per_index_enabled=OFF;
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+
+SET GLOBAL innodb_compression_level=default;
+
+--echo # create a table with page size=2K
+--echo # create indexes on each column.(total 9 indexes)
+let $block_size=2;
+--source suite/innodb_zip/include/innodb_create_tab_indx.inc
+
+--echo # check the stats of the table & size of the table
+--echo # The size of the file with 0 compress = 65536
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # for determintic resons simple data should be inserted.
+--echo # insert some 100 records
+let $i = 100;
+--source suite/innodb_zip/include/innodb_load_data.inc
+
+--echo # check the stats of the table & size of the table
+--echo # The size of the file with 0 compress = 86016
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # fetch the compressed page and check the stats
+--source suite/innodb_zip/include/innodb_fetch_records.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+DROP TABLE tab5;
+
+--echo #******************************************************************
+--echo # Test 3-4K: innodb_cmp_per_index_enabled=ON and innodb_compression_level=Def with page size 4K
+--echo #******************************************************************
+
+--echo # reset the stat table before starting next testcase
+SET GLOBAL innodb_cmp_per_index_enabled=OFF;
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+
+SET GLOBAL innodb_compression_level=default;
+
+--echo # create a table with page size=4K
+--echo # create indexes on each column.(total 9 indexes)
+let $block_size=4;
+--source suite/innodb_zip/include/innodb_create_tab_indx.inc
+
+--echo # check the stats of the table & size of the table
+--echo # The size of the file with 0 compress = 65536
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # for determintic resons simple data should be inserted.
+--echo # insert some 100 records
+let $i = 100;
+--source suite/innodb_zip/include/innodb_load_data.inc
+
+--echo # check the stats of the table & size of the table
+--echo # The size of the file with 0 compress = 86016
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # fetch the compressed page and check the stats
+--source suite/innodb_zip/include/innodb_fetch_records.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+DROP TABLE tab5;
+
+--echo #******************************************************************
+--echo # Test 3-8K: innodb_cmp_per_index_enabled=ON and innodb_compression_level=Def with page size 8K
+--echo #******************************************************************
+
+--echo # reset the stat table before starting next testcase
+SET GLOBAL innodb_cmp_per_index_enabled=OFF;
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+
+SET GLOBAL innodb_compression_level=default;
+
+--echo # create a table with page size=8K
+--echo # create indexes on each column.(total 9 indexes)
+let $block_size=8;
+--source suite/innodb_zip/include/innodb_create_tab_indx.inc
+
+--echo # check the stats of the table & size of the table
+--echo # The size of the file with 0 compress = 122880
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # for determintic resons simple data should be inserted.
+--echo # insert some 100 records
+let $i = 100;
+--source suite/innodb_zip/include/innodb_load_data.inc
+
+--echo # check the stats of the table & size of the table
+--echo # The size of the file with 0 compress = 172032
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # fetch the compressed page and check the stats
+--source suite/innodb_zip/include/innodb_fetch_records.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+DROP TABLE tab5;
+
+--echo #******************************************************************
+--echo # Test 3-16K: innodb_cmp_per_index_enabled=ON and innodb_compression_level=Def with page size 16K
+--echo #******************************************************************
+
+--echo # reset the stat table before starting next testcase
+SET GLOBAL innodb_cmp_per_index_enabled=OFF;
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+
+SET GLOBAL innodb_compression_level=default;
+
+--echo # create a table with page size=16K
+--echo # create indexes on each column.(total 9 indexes)
+let $block_size=16;
+--source suite/innodb_zip/include/innodb_create_tab_indx.inc
+
+--echo # check the stats of the table & size of the table
+--echo # The size of the file with 0 compress = 245760
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # for determintic resons simple data should be inserted.
+--echo # insert some 100 records
+let $i = 100;
+--source suite/innodb_zip/include/innodb_load_data.inc
+
+--echo # check the stats of the table & size of the table
+--echo # The size of the file with 0 compress = 344064
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # fetch the compressed page and check the stats
+--source suite/innodb_zip/include/innodb_fetch_records.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+DROP TABLE tab5;
+
+--echo #******************************************************************
+--echo # Test 4: test the interaction between wl6347 & wl6344 (2.5 & 2.6)
+--echo # This testcase is to verify the table/idex level compression stats
+--echo # When the flags are set as follows
+--echo # innodb_cmp_per_index_enabled=ON and
+--echo # Innodb_compression_failure_threshold_pct=0
+--echo # page size 1K,2K,4K,8K,16K
+--echo # check the size and compression stats of the table tab5
+--echo #******************************************************************
+
+--echo # reset the stat table before starting next testcase
+SET GLOBAL innodb_cmp_per_index_enabled=OFF;
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+
+--echo # set the flags
+SET GLOBAL innodb_compression_failure_threshold_pct=0;
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+
+--echo # check the flags
+SELECT @@innodb_cmp_per_index_enabled;
+SELECT @@innodb_compression_failure_threshold_pct;
+SELECT @@innodb_file_per_table;
+SELECT @@innodb_compression_level;
+
+--echo #******************************************************************
+--echo # Test 4-1K: innodb_cmp_per_index_enabled=ON and Innodb_compression_failure_threshold_pct=0 with page size 1K
+--echo #******************************************************************
+
+--echo # create a table with page size=1K
+--echo # create indexes on each column.(total 9 indexes)
+let $block_size=1;
+--source suite/innodb_zip/include/innodb_create_tab_indx.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # for determintic resons simple data should be inserted.
+--echo # insert some 100 records
+let $i = 100;
+--source suite/innodb_zip/include/innodb_load_data.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=2;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # fetch the compressed page and check the stats
+--source suite/innodb_zip/include/innodb_fetch_records.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=2;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # fetch the compressed same page once again and check the stats
+--echo # the stat figures should be same as above query
+--source suite/innodb_zip/include/innodb_fetch_records.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=2;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+-- source include/restart_mysqld.inc
+
+--echo # set the flag on (default off)
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+
+--echo # set the flags
+SET GLOBAL innodb_compression_failure_threshold_pct=0;
+SET GLOBAL innodb_file_per_table=on;
+
+
+--echo # fetch the compressed page and check the stats
+--echo # The stats figure may be different/same for each restart.
+--source suite/innodb_zip/include/innodb_fetch_records.inc
+
+--echo # check the stats of the table
+-- echo # testcase : pass = 1 fail = 0
+SET @comp_val=0;
+SET @uncomp_val=1;
+--source suite/innodb_zip/include/innodb_stats_restart.inc
+
+DROP TABLE tab5;
+
+--echo #******************************************************************
+--echo # Test 4-2K: innodb_cmp_per_index_enabled=ON and Innodb_compression_failure_threshold_pct=0 with page size 2K
+--echo #******************************************************************
+
+--echo # reset the stat table before starting next testcase
+SET GLOBAL innodb_cmp_per_index_enabled=OFF;
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+
+--echo # create a table with page size=2K
+--echo # create indexes on each column.(total 9 indexes)
+let $block_size=2;
+--source suite/innodb_zip/include/innodb_create_tab_indx.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # for determintic resons simple data should be inserted.
+--echo # insert some 100 records
+let $i = 100;
+--source suite/innodb_zip/include/innodb_load_data.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=2;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # fetch the compressed page and check the stats
+--source suite/innodb_zip/include/innodb_fetch_records.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=2;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # fetch the compressed same page once again and check the stats
+--echo # the stat figures should be same as above query
+--source suite/innodb_zip/include/innodb_fetch_records.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=2;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+DROP TABLE tab5;
+
+--echo #******************************************************************
+--echo # Test 4-4K: innodb_cmp_per_index_enabled=ON and Innodb_compression_failure_threshold_pct=0 with page size 4K
+--echo #******************************************************************
+
+--echo # reset the stat table before starting next testcase
+SET GLOBAL innodb_cmp_per_index_enabled=OFF;
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+
+--echo # create a table with page size=4K
+--echo # create indexes on each column.(total 9 indexes)
+let $block_size=4;
+--source suite/innodb_zip/include/innodb_create_tab_indx.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # for determintic resons simple data should be inserted.
+--echo # insert some 100 records
+let $i = 100;
+--source suite/innodb_zip/include/innodb_load_data.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # fetch the compressed page and check the stats
+--source suite/innodb_zip/include/innodb_fetch_records.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # fetch the compressed same page once again and check the stats
+--echo # the stat figures should be same as above query
+--source suite/innodb_zip/include/innodb_fetch_records.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+DROP TABLE tab5;
+
+--echo #******************************************************************
+--echo # Test 4-8K: innodb_cmp_per_index_enabled=ON and Innodb_compression_failure_threshold_pct=0 with page size 8K
+--echo #******************************************************************
+
+--echo # reset the stat table before starting next testcase
+SET GLOBAL innodb_cmp_per_index_enabled=OFF;
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+
+--echo # create a table with page size=8K
+--echo # create indexes on each column.(total 9 indexes)
+let $block_size=8;
+--source suite/innodb_zip/include/innodb_create_tab_indx.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # for determintic resons simple data should be inserted.
+--echo # insert some 100 records
+let $i = 100;
+--source suite/innodb_zip/include/innodb_load_data.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # fetch the compressed page and check the stats
+--source suite/innodb_zip/include/innodb_fetch_records.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # fetch the compressed same page once again and check the stats
+--echo # the stat figures should be same as above query
+--source suite/innodb_zip/include/innodb_fetch_records.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+DROP TABLE tab5;
+
+--echo #******************************************************************
+--echo # Test 4-16K: innodb_cmp_per_index_enabled=ON and Innodb_compression_failure_threshold_pct=0 with page size 16K
+--echo #******************************************************************
+
+--echo # reset the stat table before starting next testcase
+SET GLOBAL innodb_cmp_per_index_enabled=OFF;
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+
+--echo # create a table with page size=16K
+--echo # create indexes on each column.(total 9 indexes)
+let $block_size=16;
+--source suite/innodb_zip/include/innodb_create_tab_indx.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # for determintic resons simple data should be inserted.
+--echo # insert some 100 records
+let $i = 100;
+--source suite/innodb_zip/include/innodb_load_data.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # fetch the compressed page and check the stats
+--source suite/innodb_zip/include/innodb_fetch_records.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # fetch the compressed same page once again and check the stats
+--echo # the stat figures should be same as above query
+--source suite/innodb_zip/include/innodb_fetch_records.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+DROP TABLE tab5;
+
+--echo #******************************************************************
+--echo # Test 5: test the interaction between wl6347 & wl6344 (2.7)
+--echo # This testcase is to verify the table/idex level compression stats
+--echo # When the flags are set as follows
+--echo # innodb_cmp_per_index_enabled=ON and
+--echo # Innodb_compression_failure_threshold_pct=10
+--echo # page size 1K,2K,4K,8K,16K
+--echo # check the size and compression stats of the table tab5
+--echo #******************************************************************
+
+--echo # reset the stat table before starting next testcase
+SET GLOBAL innodb_cmp_per_index_enabled=OFF;
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+
+--echo # set the flags
+SET GLOBAL innodb_file_per_table=on;
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+SET GLOBAL innodb_compression_failure_threshold_pct=10;
+SET GLOBAL innodb_compression_level=Default;
+
+
+--echo # check the flags
+SELECT @@innodb_cmp_per_index_enabled;
+SELECT @@innodb_compression_failure_threshold_pct;
+SELECT @@innodb_file_per_table;
+SELECT @@innodb_compression_level;
+
+--echo #******************************************************************
+--echo # Test 5-1K: innodb_cmp_per_index_enabled=ON and Innodb_compression_failure_threshold_pct=10 with page size 1K
+--echo #******************************************************************
+
+--echo # create a table with page size=1K
+--echo # create indexes on each column.(total 9 indexes)
+let $block_size=1;
+--source suite/innodb_zip/include/innodb_create_tab_indx.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # for determintic resons simple data should be inserted.
+--echo # insert some 100 records
+let $i = 100;
+--source suite/innodb_zip/include/innodb_load_data.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=2;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # fetch the compressed page and check the stats
+--source suite/innodb_zip/include/innodb_fetch_records.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=2;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # fetch the compressed same page once again and check the stats
+--echo # the stat figures should be same as above query
+--source suite/innodb_zip/include/innodb_fetch_records.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=2;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+-- source include/restart_mysqld.inc
+
+--echo # set the flag on (default off)
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+
+--echo # set the flags
+SET GLOBAL innodb_compression_failure_threshold_pct=10;
+SET GLOBAL innodb_file_per_table=on;
+SET GLOBAL innodb_compression_failure_threshold_pct=10;
+
+
+--echo # fetch the compressed page and check the stats
+--echo # The stats figure may be different/same for each restart.
+--source suite/innodb_zip/include/innodb_fetch_records.inc
+
+--echo # check the stats of the table
+-- echo # testcase : pass = 1 fail = 0
+SET @comp_val=0;
+SET @uncomp_val=1;
+--source suite/innodb_zip/include/innodb_stats_restart.inc
+
+DROP TABLE tab5;
+
+--echo #******************************************************************
+--echo # Test 5-2K: innodb_cmp_per_index_enabled=ON and Innodb_compression_failure_threshold_pct=10 with page size 2K
+--echo #******************************************************************
+
+--echo # reset the stat table before starting next testcase
+SET GLOBAL innodb_cmp_per_index_enabled=OFF;
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+SET GLOBAL innodb_compression_failure_threshold_pct=10;
+
+--echo # create a table with page size=2K
+--echo # create indexes on each column.(total 9 indexes)
+let $block_size=2;
+--source suite/innodb_zip/include/innodb_create_tab_indx.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # for determintic resons simple data should be inserted.
+--echo # insert some 100 records
+let $i = 100;
+--source suite/innodb_zip/include/innodb_load_data.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=2;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # fetch the compressed page and check the stats
+--source suite/innodb_zip/include/innodb_fetch_records.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=2;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # fetch the compressed same page once again and check the stats
+--echo # the stat figures should be same as above query
+--source suite/innodb_zip/include/innodb_fetch_records.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=2;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+DROP TABLE tab5;
+
+--echo #******************************************************************
+--echo # Test 5-4K: innodb_cmp_per_index_enabled=ON and Innodb_compression_failure_threshold_pct=10 with page size 4K
+--echo #******************************************************************
+
+--echo # reset the stat table before starting next testcase
+SET GLOBAL innodb_cmp_per_index_enabled=OFF;
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+
+--echo # create a table with page size=4K
+--echo # create indexes on each column.(total 9 indexes)
+let $block_size=4;
+--source suite/innodb_zip/include/innodb_create_tab_indx.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # for determintic resons simple data should be inserted.
+--echo # insert some 100 records
+let $i = 100;
+--source suite/innodb_zip/include/innodb_load_data.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # fetch the compressed page and check the stats
+--source suite/innodb_zip/include/innodb_fetch_records.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # fetch the compressed same page once again and check the stats
+--echo # the stat figures should be same as above query
+--source suite/innodb_zip/include/innodb_fetch_records.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+DROP TABLE tab5;
+
+--echo #******************************************************************
+--echo # Test 5-8K: innodb_cmp_per_index_enabled=ON and Innodb_compression_failure_threshold_pct=10 with page size 8K
+--echo #******************************************************************
+
+--echo # reset the stat table before starting next testcase
+SET GLOBAL innodb_cmp_per_index_enabled=OFF;
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+SET GLOBAL innodb_compression_failure_threshold_pct=10;
+
+--echo # create a table with page size=8K
+--echo # create indexes on each column.(total 9 indexes)
+let $block_size=8;
+--source suite/innodb_zip/include/innodb_create_tab_indx.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # for determintic resons simple data should be inserted.
+--echo # insert some 100 records
+let $i = 100;
+--source suite/innodb_zip/include/innodb_load_data.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # fetch the compressed page and check the stats
+--source suite/innodb_zip/include/innodb_fetch_records.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # fetch the compressed same page once again and check the stats
+--echo # the stat figures should be same as above query
+--source suite/innodb_zip/include/innodb_fetch_records.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+DROP TABLE tab5;
+
+--echo #******************************************************************
+--echo # Test 5-16K: innodb_cmp_per_index_enabled=ON and Innodb_compression_failure_threshold_pct=10 with page size 16K
+--echo #******************************************************************
+
+--echo # reset the stat table before starting next testcase
+SET GLOBAL innodb_cmp_per_index_enabled=OFF;
+SET GLOBAL innodb_cmp_per_index_enabled=ON;
+
+--echo # create a table with page size=16K
+--echo # create indexes on each column.(total 9 indexes)
+let $block_size=16;
+--source suite/innodb_zip/include/innodb_create_tab_indx.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # for determintic resons simple data should be inserted.
+--echo # insert some 100 records
+let $i = 100;
+--source suite/innodb_zip/include/innodb_load_data.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # fetch the compressed page and check the stats
+--source suite/innodb_zip/include/innodb_fetch_records.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+--echo # fetch the compressed same page once again and check the stats
+--echo # the stat figures should be same as above query
+--source suite/innodb_zip/include/innodb_fetch_records.inc
+
+--echo # check the stats of the table & size of the table
+SET @inl_val=1;
+--source suite/innodb_zip/include/innodb_stats_comp_index.inc
+
+DROP TABLE tab5;
+
+--echo #******************************************************************
+--echo # Test 6: Create multiple tables & indexes having same name in 2 diff DB's
+--echo # Check the stats of the table. (1.1)
+--echo #******************************************************************
+
+--echo # reset the stat table before starting next testcase
+SET GLOBAL innodb_cmp_per_index_enabled=0;
+SET GLOBAL innodb_cmp_per_index_enabled=1;
+
+SET GLOBAL innodb_file_per_table=ON;
+SET GLOBAL innodb_compression_level=default;
+SET GLOBAL innodb_compression_failure_threshold_pct=default;
+
+
+--echo # create a table page size=1K
+CREATE TABLE tab5(col_1 TINYBLOB, col_2 TINYTEXT,col_3 BLOB,
+col_4 TEXT,col_5 MEDIUMBLOB,col_6 MEDIUMTEXT,
+col_7 LONGBLOB,col_8 LONGTEXT,col_9 VARCHAR(255))
+ENGINE=INNODB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1;
+
+CREATE INDEX idx1 ON tab5(col_1(10));
+
+--echo # check the stats of the table
+SELECT database_name,table_name,index_name,compress_ops,compress_ops_ok
+FROM information_schema.innodb_cmp_per_index
+WHERE database_name='test' and table_name='tab5'
+ORDER BY index_name,table_name,database_name ;
+
+CREATE DATABASE sb;
+USE sb;
+
+--echo # create a table page size=1K (testcase-1)
+CREATE TABLE tab5(col_1 TINYBLOB, col_2 TINYTEXT,col_3 BLOB,
+col_4 TEXT,col_5 MEDIUMBLOB,col_6 MEDIUMTEXT,
+col_7 LONGBLOB,col_8 LONGTEXT,col_9 VARCHAR(255))
+ENGINE=INNODB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1;
+
+CREATE INDEX idx1 ON tab5(col_1(10));
+
+SELECT database_name,table_name,index_name,compress_ops,compress_ops_ok
+FROM information_schema.innodb_cmp_per_index
+WHERE database_name='sb' and table_name='tab5'
+ORDER BY index_name,table_name,database_name ;
+
+DROP TABLE tab5, test.tab5;
+DROP DATABASE sb;
+
+--echo # reset the flags
+eval SET GLOBAL innodb_file_per_table=default;
+eval SET GLOBAL innodb_cmp_per_index_enabled=default;
+--disable_query_log
+eval SET GLOBAL innodb_compression_level=$innodb_compression_level;
+--enable_query_log
+eval SET GLOBAL innodb_compression_failure_threshold_pct=default;
diff --git a/mysql-test/suite/innodb_zip/t/wl6470_1.test b/mysql-test/suite/innodb_zip/t/wl6470_1.test
new file mode 100644
index 00000000000..ecf6b601d3d
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/t/wl6470_1.test
@@ -0,0 +1,60 @@
+--source include/have_innodb.inc
+--source include/have_innodb_zip.inc
+--source include/big_test.inc
+
+####################################################################
+# TC to test temp-table DML optimization changes for correctness   #
+# Sceanrio covered:						   #
+# 1. bulk-insert with rollback + commit: this will ensure btree    #
+#    node split with rollback and commit.                          #
+####################################################################
+
+#-------------------------------------------------------------------
+#
+# 1. bulk-insert with rollback + commit: this will ensure btree    #
+#    node split with rollback and commit.                          #
+#
+create temporary table t1
+	(keyc int, c1 char(100), c2 char(100),
+	 primary key(keyc), index sec_index(c1)
+	) engine = innodb;
+create temporary table t2
+	(keyc int, c1 char(100), c2 char(100),
+	 primary key(keyc), index sec_index(c1)
+	) engine = innodb;
+--source suite/innodb_zip/include/innodb_dml_ops.inc
+drop table t1;
+drop table t2;
+#
+--disable_warnings
+create temporary table t1
+	(keyc int, c1 char(100), c2 char(100),
+	 primary key(keyc), index sec_index(c1)
+	) engine = innodb key_block_size = 4;
+set innodb_strict_mode=off;
+create temporary table t2
+	(keyc int, c1 char(100), c2 char(100),
+	 primary key(keyc), index sec_index(c1)
+	) engine = innodb key_block_size = 8;
+set innodb_strict_mode=default;
+--enable_warnings
+--source suite/innodb_zip/include/innodb_dml_ops.inc
+drop table t1;
+drop table t2;
+#
+let $file_per_table = `select @@innodb_file_per_table`;
+set global innodb_file_per_table = 0;
+create temporary table t1
+	(keyc int, c1 char(100), c2 char(100),
+	 primary key(keyc), index sec_index(c1)
+	) engine = innodb;
+create temporary table t2
+	(keyc int, c1 char(100), c2 char(100),
+	 primary key(keyc), index sec_index(c1)
+	) engine = innodb;
+--source suite/innodb_zip/include/innodb_dml_ops.inc
+drop table t1;
+drop table t2;
+eval set global innodb_file_per_table = $file_per_table;
+#
+
diff --git a/mysql-test/suite/innodb_zip/t/wl6470_2.test b/mysql-test/suite/innodb_zip/t/wl6470_2.test
new file mode 100644
index 00000000000..020c27b97b9
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/t/wl6470_2.test
@@ -0,0 +1,468 @@
+--source include/have_innodb.inc
+--source include/have_innodb_zip.inc
+--source include/no_valgrind_without_big.inc
+
+####################################################################
+# TC to test temp-table DML optimization changes for correctness   #
+# Sceanrio covered in single testcase :                            #
+# - Tables with row format(redundant,compressed,dynamic,compact    #
+# - Table with primary,composite,prefix,secondary index            #
+# - Insert/delete/update with transactioons                        #
+# - Transaction with commit,rollback,savepoint statements          #
+# - Concurrency by execution of two clients creating tables with   #
+#    same names                                                    #
+# - Inserting data using                                           #
+#   - Insert into .. , Load data infile..,insert ignore            #
+#   - Insert into .. on duplicate update                           #
+# - Check basic delete and upadte [ignore]                         #
+# - Check constraints like duplicate key,default value             #
+# - Alter add column , add primary key                             #
+# - with prepare and execute statement                             #
+####################################################################
+
+# run for page size >= 8k
+--disable_warnings
+if (`SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE LOWER(variable_name) = 'innodb_page_size' AND variable_value >= 8192`)
+{
+  --skip Test requires InnoDB with page size >= 8k.
+}
+--enable_warnings
+
+
+# Save initial values of server variable
+--disable_query_log
+let $innodb_file_per_table_orig=`select @@innodb_file_per_table`;
+SET sql_mode = 'NO_ENGINE_SUBSTITUTION';
+--enable_query_log
+
+# Create procedure to perform 
+# 1. Create temp table with row types , index , sufficent data types
+# 2. Perform DML with transaction 
+delimiter |;
+create procedure populate_tables()
+   begin
+   declare n int default 20;
+   declare inner_loop int default 100;
+   set global innodb_file_per_table=on;
+   drop table if exists t1,t2,t3,t4;
+
+   create temporary table t1(c1 int not null,
+   c2 int not null,
+   c3 char(255) not null,
+   c4 text(6000) not null,
+   c5 blob(6000) not null,
+   c6 varchar(2000) not null,
+   c7 varchar(2000) not null,
+   c8 datetime,
+   c9 decimal(6,3),
+   primary key (c1),
+   index (c3,c4(50),c5(50)), 
+   index (c2))
+   engine=innodb row_format=redundant;
+
+   create temporary table t2(c1 int not null,
+   c2 int not null,
+   c3 char(255) not null,
+   c4 text(6000) not null,
+   c5 blob(6000) not null,
+   c6 varchar(2000) not null,
+   c7 varchar(2000) not null,
+   c8 datetime,
+   c9 decimal(6,3),
+   primary key (c1),
+   index (c3,c4(50),c5(50)),
+   index (c2))
+   engine=innodb row_format=compact;
+
+   create temporary table t3(c1 int not null,
+   c2 int not null,
+   c3 char(255) not null,
+   c4 text(6000) not null,
+   c5 blob(6000) not null,
+   c6 varchar(2000) not null,
+   c7 varchar(2000) not null,
+   c8 datetime,
+   c9 decimal(6,3),
+   primary key (c1),
+   index (c3,c4(50),c5(50)),
+   index (c2))
+   engine=innodb row_format=compressed key_block_size=4;
+
+   create temporary table t4(c1 int not null,
+   c2 int not null,
+   c3 char(255) not null,
+   c4 text(6000) not null,
+   c5 blob(6000) not null,
+   c6 varchar(2000) not null,
+   c7 varchar(2000) not null,
+   c8 datetime,
+   c9 decimal(6,3),
+   primary key (c1),
+   index (c3,c4(50),c5(50)),
+   index (c2))
+   engine=innodb row_format=dynamic;
+
+   create temporary table t5(c1 int not null,
+   c2 int not null,
+   c3 char(255) not null,
+   c4 text(6000) not null,
+   c5 blob(6000) not null,
+   c6 varchar(2000) not null,
+   c7 varchar(2000) not null,
+   c8 datetime,
+   c9 decimal(6,3),
+   primary key (c1),
+   index (c3,c4(50),c5(50)),
+   index (c2))
+   engine=innodb;
+ 
+   create temporary table t6 ( a int ) engine = innodb;
+   insert into t6 values (50),(100),(150),(190);
+
+   while (n > 0) do
+     start transaction;
+     insert into t1 values(n,n,repeat(concat(' tc3_',n),30),
+     repeat(concat(' tc4_',n),800),repeat(concat(' tc_',n),800),
+     repeat(concat(' tc6_',n),800),repeat(concat(' tc7_',n),800),
+     now(),(100.55+n));
+     insert into t2 values(n,n,repeat(concat(' tc3_',n),30),
+     repeat(concat(' tc4_',n),800),repeat(concat(' tc_',n),800),
+     repeat(concat(' tc6_',n),800),repeat(concat(' tc7_',n),800),
+     now(),(100.55+n));
+     insert into t3 values(n,n,repeat(concat(' tc3_',n),30),
+     repeat(concat(' tc4_',n),800),repeat(concat(' tc_',n),800),
+     repeat(concat(' tc6_',n),800),repeat(concat(' tc7_',n),800),
+     now(),(100.55+n));
+     insert into t4 values(n,n,repeat(concat(' tc3_',n),30),
+     repeat(concat(' tc4_',n),800),repeat(concat(' tc_',n),800),
+     repeat(concat(' tc6_',n),800),repeat(concat(' tc7_',n),800),
+     now(),(100.55+n));
+     insert into t5 values(n,n,repeat(concat(' tc3_',n),30),
+     repeat(concat(' tc4_',n),800),repeat(concat(' tc_',n),800),
+     repeat(concat(' tc6_',n),800),repeat(concat(' tc7_',n),800),
+     now(),(100.55+n));
+
+     if (n > 10) then
+        commit;
+     else     
+        delete from t1 where c1 > 10 ;
+        delete from t2 where c1 > 10 ;
+        delete from t3 where c1 > 10 ;
+        delete from t4 where c1 > 10 ;
+        delete from t5 where c1 > 10 ;
+ 
+        rollback;
+        start transaction;
+        update t1 set c1 = c1 + 1000 where c1 > 10;
+        update t2 set c1 = c1 + 1000 where c1 > 10;
+        update t3 set c1 = c1 + 1000 where c1 > 10;
+        update t4 set c1 = c1 + 1000 where c1 > 10;
+        update t5 set c1 = c1 + 1000 where c1 > 10;
+        rollback;
+     end if;     
+
+     start transaction;
+     insert into t1 values(n+inner_loop,n+inner_loop,repeat(concat(' tc3_',n+inner_loop),30),
+        repeat(concat(' tc4_',n+inner_loop),800),repeat(concat(' tc_',n+inner_loop),800),
+        repeat(concat(' tc6_',n+inner_loop),245),repeat(concat(' tc7_',n+inner_loop),245),
+        now(),(100.55+n+inner_loop));
+     insert into t2 values(n+inner_loop,n+inner_loop,repeat(concat(' tc3_',n+inner_loop),30),
+        repeat(concat(' tc4_',n+inner_loop),800),repeat(concat(' tc_',n+inner_loop),800),
+        repeat(concat(' tc6_',n+inner_loop),245),repeat(concat(' tc7_',n+inner_loop),245),
+        now(),(100.55+n+inner_loop));
+     insert into t3 values(n+inner_loop,n+inner_loop,repeat(concat(' tc3_',n+inner_loop),30),
+        repeat(concat(' tc4_',n+inner_loop),800),repeat(concat(' tc_',n+inner_loop),800),
+        repeat(concat(' tc6_',n+inner_loop),245),repeat(concat(' tc7_',n+inner_loop),245),
+        now(),(100.55+n+inner_loop));
+     insert into t4 values(n+inner_loop,n+inner_loop,repeat(concat(' tc3_',n+inner_loop),30),
+        repeat(concat(' tc4_',n+inner_loop),800),repeat(concat(' tc_',n+inner_loop),800),
+        repeat(concat(' tc6_',n+inner_loop),245),repeat(concat(' tc7_',n+inner_loop),245),
+        now(),(100.55+n+inner_loop));
+     insert into t5 values(n+inner_loop,n+inner_loop,repeat(concat(' tc3_',n+inner_loop),30),
+        repeat(concat(' tc4_',n+inner_loop),800),repeat(concat(' tc_',n+inner_loop),800),
+        repeat(concat(' tc6_',n+inner_loop),245),repeat(concat(' tc7_',n+inner_loop),245),
+        now(),(100.55+n+inner_loop));
+
+        delete from t1 where c1 between 100 and 110;
+        delete from t2 where c1 between 100 and 110;
+        delete from t3 where c1 between 100 and 110;
+        delete from t4 where c1 between 100 and 110;
+        delete from t5 where c1 between 100 and 110;
+
+        update t1 set c1 = c1+1 where c1>110; 
+        update t2 set c1 = c1+1 where c1>110; 
+        update t3 set c1 = c1+1 where c1>110; 
+        update t4 set c1 = c1+1 where c1>110; 
+        update t5 set c1 = c1+1 where c1>110; 
+
+        savepoint a;
+
+     insert into t1 values(300+n+inner_loop,n+inner_loop,repeat(concat(' tc3_',n+inner_loop),30),
+        repeat(concat(' tc4_',n+inner_loop),800),repeat(concat(' tc_',n+inner_loop),800),
+        repeat(concat(' tc6_',n+inner_loop),245),repeat(concat(' tc7_',n+inner_loop),245),
+        now(),(100.55+n+inner_loop));
+     insert into t2 values(300+n+inner_loop,n+inner_loop,repeat(concat(' tc3_',n+inner_loop),30),
+        repeat(concat(' tc4_',n+inner_loop),800),repeat(concat(' tc_',n+inner_loop),800),
+        repeat(concat(' tc6_',n+inner_loop),245),repeat(concat(' tc7_',n+inner_loop),245),
+        now(),(100.55+n+inner_loop));
+     insert into t3 values(300+n+inner_loop,n+inner_loop,repeat(concat(' tc3_',n+inner_loop),30),
+        repeat(concat(' tc4_',n+inner_loop),800),repeat(concat(' tc_',n+inner_loop),800),
+        repeat(concat(' tc6_',n+inner_loop),245),repeat(concat(' tc7_',n+inner_loop),245),
+        now(),(100.55+n+inner_loop));
+     insert into t4 values(300+n+inner_loop,n+inner_loop,repeat(concat(' tc3_',n+inner_loop),30),
+        repeat(concat(' tc4_',n+inner_loop),800),repeat(concat(' tc_',n+inner_loop),800),
+        repeat(concat(' tc6_',n+inner_loop),245),repeat(concat(' tc7_',n+inner_loop),245),
+        now(),(100.55+n+inner_loop));
+     insert into t5 values(300+n+inner_loop,n+inner_loop,repeat(concat(' tc3_',n+inner_loop),30),
+        repeat(concat(' tc4_',n+inner_loop),800),repeat(concat(' tc_',n+inner_loop),800),
+        repeat(concat(' tc6_',n+inner_loop),245),repeat(concat(' tc7_',n+inner_loop),245),
+        now(),(100.55+n+inner_loop));
+     savepoint b;
+
+     insert into t1 values(400+n+inner_loop,n+inner_loop,repeat(concat(' tc3_',n+inner_loop),30),
+        repeat(concat(' tc4_',n+inner_loop),800),repeat(concat(' tc_',n+inner_loop),800),
+        repeat(concat(' tc6_',n+inner_loop),245),repeat(concat(' tc7_',n+inner_loop),245),
+        now(),(100.55+n+inner_loop));
+     insert into t2 values(400+n+inner_loop,n+inner_loop,repeat(concat(' tc3_',n+inner_loop),30),
+        repeat(concat(' tc4_',n+inner_loop),800),repeat(concat(' tc_',n+inner_loop),800),
+        repeat(concat(' tc6_',n+inner_loop),245),repeat(concat(' tc7_',n+inner_loop),245),
+        now(),(100.55+n+inner_loop));
+     insert into t3 values(400+n+inner_loop,n+inner_loop,repeat(concat(' tc3_',n+inner_loop),30),
+        repeat(concat(' tc4_',n+inner_loop),800),repeat(concat(' tc_',n+inner_loop),800),
+        repeat(concat(' tc6_',n+inner_loop),245),repeat(concat(' tc7_',n+inner_loop),245),
+        now(),(100.55+n+inner_loop));
+     insert into t4 values(400+n+inner_loop,n+inner_loop,repeat(concat(' tc3_',n+inner_loop),30),
+        repeat(concat(' tc4_',n+inner_loop),800),repeat(concat(' tc_',n+inner_loop),800),
+        repeat(concat(' tc6_',n+inner_loop),245),repeat(concat(' tc7_',n+inner_loop),245),
+        now(),(100.55+n+inner_loop));
+     insert into t5 values(400+n+inner_loop,n+inner_loop,repeat(concat(' tc3_',n+inner_loop),30),
+        repeat(concat(' tc4_',n+inner_loop),800),repeat(concat(' tc_',n+inner_loop),800),
+        repeat(concat(' tc6_',n+inner_loop),245),repeat(concat(' tc7_',n+inner_loop),245),
+        now(),(100.55+n+inner_loop));
+     savepoint c;
+     rollback to b;
+     rollback to a;
+     commit;
+     commit;
+     rollback;
+     set n = n - 1; 
+     end while;
+end|
+delimiter ;|
+
+# Create two client for concurrent execution
+connect (con1,localhost,root,,);
+connect (con2,localhost,root,,);
+
+--echo #---client 1 : dml operation ---"
+connection con1;
+-- disable_query_log
+eval set global innodb_file_per_table=$innodb_file_per_table_orig;
+-- enable_query_log
+-- disable_query_log
+# call procedure
+--send call populate_tables();
+-- enable_query_log
+
+--echo #---client 2 : dml operation ---"
+connection con2;
+-- disable_query_log
+eval set global innodb_file_per_table=$innodb_file_per_table_orig;
+-- enable_query_log
+-- disable_query_log
+# call procedure
+--send call populate_tables();
+-- enable_query_log
+
+# check data of client connection 1
+--echo # In connection 1
+connection con1;
+--reap
+# 20 rows exepceted in 5 tables
+select count(*) from t1;
+select count(*) from t2;
+select count(*) from t3;
+select count(*) from t4;
+select count(*) from t5;
+select c1 from t1;
+select c1 from t2;
+select c1 from t3;
+select c1 from t4;
+select c1 from t5;
+# check data of client connection 2
+--echo # In connection 2
+connection con2;
+--reap
+# 20 rows exepceted in 5 tables
+select count(*) from t1;
+select count(*) from t2;
+select count(*) from t3;
+select count(*) from t4;
+select count(*) from t5;
+select c1 from t1;
+select c1 from t2;
+select c1 from t3;
+select c1 from t4;
+select c1 from t5;
+
+--echo # In connection 1
+connection con1;
+
+set autocommit = 0;
+# Check duplicate key constraint + insert ignore
+--error ER_DUP_ENTRY
+insert into t1 values (20,1,'a','a','a','a','a',now(),100.55);
+insert ignore into t1 values (20,1,'a','a','a','a','a',now(),100.55);
+--error ER_DUP_ENTRY
+insert into t2 values (20,1,'a','a','a','a','a',now(),100.55);
+insert ignore into t2 values (20,1,'a','a','a','a','a',now(),100.55);
+--error ER_DUP_ENTRY
+insert into t3 values (20,1,'a','a','a','a','a',now(),100.55);
+insert ignore into t3 values (20,1,'a','a','a','a','a',now(),100.55);
+--error ER_DUP_ENTRY
+insert into t4 values (20,1,'a','a','a','a','a',now(),100.55);
+insert ignore into t4 values (20,1,'a','a','a','a','a',now(),100.55);
+--error ER_DUP_ENTRY
+insert into t5 values (20,1,'a','a','a','a','a',now(),100.55);
+insert ignore into t5 values (20,1,'a','a','a','a','a',now(),100.55);
+
+# check rollback due to duplicate value in second record of insert
+--error ER_DUP_ENTRY
+insert into t1 values (1,1,'a','a','a','a','a',now(),100.55),
+(20,1,'a','a','a','a','a',now(),100.55);
+--error ER_DUP_ENTRY
+insert into t2 values (1,1,'a','a','a','a','a',now(),100.55),
+(20,1,'a','a','a','a','a',now(),100.55);
+--error ER_DUP_ENTRY
+insert into t3 values (1,1,'a','a','a','a','a',now(),100.55),
+(20,1,'a','a','a','a','a',now(),100.55);
+--error ER_DUP_ENTRY
+insert into t4 values (1,1,'a','a','a','a','a',now(),100.55),
+(20,1,'a','a','a','a','a',now(),100.55);
+--error ER_DUP_ENTRY
+insert into t5 values (1,1,'a','a','a','a','a',now(),100.55),
+(20,1,'a','a','a','a','a',now(),100.55);
+
+set autocommit = 1;
+
+select c1,c2 from t1 where c1 in (20,1);
+select c1,c2 from t2 where c1 in (20,1);
+select c1,c2 from t3 where c1 in (20,1);
+select c1,c2 from t4 where c1 in (20,1);
+select c1,c2 from t5 where c1 in (20,1);
+
+#replace statement
+replace into t1 values (20,1,'a','a','a','a','a',now(),100.55);
+replace into t2 values (20,1,'a','a','a','a','a',now(),100.55);
+replace into t3 values (20,1,'a','a','a','a','a',now(),100.55);
+replace into t4 values (20,1,'a','a','a','a','a',now(),100.55);
+replace into t5 values (20,1,'a','a','a','a','a',now(),100.55);
+# verify row is replaced from (20,20) to (20,1)
+select c1,c2,c3,c4,c5,c6,c7,c9 from t1 where c1 = 20;
+select c1,c2,c3,c4,c5,c6,c7,c9 from t2 where c1 = 20;
+select c1,c2,c3,c4,c5,c6,c7,c9 from t3 where c1 = 20;
+select c1,c2,c3,c4,c5,c6,c7,c9 from t4 where c1 = 20;
+select c1,c2,c3,c4,c5,c6,c7,c9 from t5 where c1 = 20;
+
+# Update ignore. statement is gonored as 20 value exits 
+update ignore t1 set c1 = 20 where c1 = 140 ;
+update ignore t2 set c1 = 20 where c1 = 140 ;
+update ignore t3 set c1 = 20 where c1 = 140 ;
+update ignore t4 set c1 = 20 where c1 = 140 ;
+update ignore t5 set c1 = 20 where c1 = 140 ;
+# see record 140 is present as last update ignored
+select count(*) from t1 where c1 = 140;
+select count(*) from t2 where c1 = 140;
+select count(*) from t3 where c1 = 140;
+select count(*) from t4 where c1 = 140;
+select count(*) from t5 where c1 = 140;
+
+# Load data infile 
+--echo "running select * into outfile <file> from t1 ;
+--disable_query_log
+eval select * into outfile "$MYSQLTEST_VARDIR/tmp/t1.outfile" from t1;
+--enable_query_log
+# Create table as select
+create temporary table temp_1 engine = innodb as select * from t1 where 1=2;
+select count(*) from temp_1;
+--echo "running load data infile <file> into temp_1 ;
+--disable_query_log
+eval load data infile '$MYSQLTEST_VARDIR/tmp/t1.outfile' into table temp_1;
+--enable_query_log
+select count(*) from temp_1;
+
+# Alter table to add column and primary key
+alter table temp_1 add column c10 int default 99 , 
+add column c11 varchar(100) default 'test';
+alter table temp_1 add primary key (c1);
+insert into temp_1 (c1,c2,c3,c4,c5,c6,c7,c8,c9) values (-1,-1,'a','a','a','a','a',now(),100.55);
+select c1,c2,c3,c4,c5,c6,c7,c9,c10,c11 from temp_1 where c1 < 0;
+select count(*) from temp_1 where c10 = 99 and c11 like 'test';
+# insert on duplicate key update
+insert into temp_1 (c1,c2,c3,c4,c5,c6,c7,c8,c9) values (-1,-1,'a','a','a','a','a',now(),100.55) 
+on duplicate  key update c1=-2,c2=-2;
+select c1,c2,c3,c4,c5,c6,c7,c9,c10,c11 from temp_1 where c1 < 0;
+
+#cleanup
+drop table t1 ,t2 ,t3,t4,t5,t6,temp_1;
+disconnect con1;
+
+connection con2;
+drop table t1 ,t2 ,t3,t4,t5,t6;
+disconnect con2;
+
+connection default;
+drop procedure populate_tables;
+
+
+# case 2 - with prepare and execute
+let $prep_loop= 5;
+create temporary table prep_1(c1 int not null,
+   c2 int not null,
+   c3 char(255) not null,
+   c4 text(6000) not null,
+   c5 blob(6000) not null,
+   c6 varchar(2000) not null,
+   c7 varchar(2000) not null,
+   c8 datetime,
+   c9 decimal(6,3),
+   index (c3,c4(50),c5(50)),
+   index (c2))
+engine=innodb;
+PREPARE stm FROM "insert into prep_1 values(?,?,repeat(concat(' tc3_',?),30),repeat(concat(' tc4_',?),800),repeat(concat(' tc_',?),800),repeat(concat(' tc6_',?),245),repeat(concat(' tc7_',?),245),now(),(100.55+?))";
+set @var = 5;
+set @var_static = 5;
+while ($prep_loop>0)
+{
+   eval EXECUTE stm USING @var,@var,@var,@var,@var,@var,@var,@var;
+   eval EXECUTE stm USING @var_static,@var_static,@var_static,@var_static,@var_static,@var_static,@var_static,@var_static;
+   dec $prep_loop;
+   set @var = @var - 1;
+}
+select c1,left(c3,15) from prep_1 order by c1 ;
+select count(*) from prep_1;
+
+PREPARE stm_1 FROM "UPDATE prep_1 SET c1 = c1 + 1";
+EXECUTE stm_1;
+EXECUTE stm_1;
+select c1,left(c3,15) from prep_1 order by c1 ;
+select count(*) from prep_1;
+
+PREPARE stm_2 FROM "DELETE FROM prep_1 ORDER BY c1 LIMIT 1";
+EXECUTE stm_2;
+EXECUTE stm_2;
+select c1,left(c3,15) from prep_1 order by c1 ;
+select count(*) from prep_1;
+
+drop prepare stm;
+drop prepare stm_1;
+drop prepare stm_2;
+drop table prep_1;
+
+--remove_file $MYSQLTEST_VARDIR/tmp/t1.outfile
+
+-- disable_query_log
+eval set global innodb_file_per_table=$innodb_file_per_table_orig;
+SET sql_mode = default;
+-- enable_query_log
+
diff --git a/mysql-test/suite/innodb_zip/t/wl6501_1.test b/mysql-test/suite/innodb_zip/t/wl6501_1.test
new file mode 100644
index 00000000000..dd8b5f65b31
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/t/wl6501_1.test
@@ -0,0 +1,451 @@
+
+####################################################################
+# TC to check truncate table statement atomicity for single        #
+# tablespace                                                       #
+# Sceanrio covered:                                                #
+# 1. Debug points added for worklog                                #
+# 2. Table with  differnt row types                                #
+# 3. Transactional statement.                                      #
+####################################################################
+
+
+--source include/have_innodb.inc
+--source include/have_debug.inc
+--source include/big_test.inc
+--source include/have_innodb_16k.inc
+
+# Valgrind would result in a "long semaphore wait" inside InnoDB
+--source include/not_valgrind.inc
+# Embedded server does not support crashing
+--source include/not_embedded.inc
+# Avoid CrashReporter popup on Mac
+--source include/not_crashrep.inc
+
+#-----------------------------------------------------------------------
+--disable_query_log
+let $MYSQL_DATA_DIR= `select @@datadir`;
+let $data_directory = data directory='$MYSQL_TMP_DIR/alt_dir';
+let $innodb_file_per_table_orig=`select @@innodb_file_per_table`;
+
+call mtr.add_suppression("InnoDB.*table did not exist in the InnoDB data dictionary.*");
+call mtr.add_suppression("InnoDB: A page in the doublewrite buffer is not within space bounds.*");
+call mtr.add_suppression("InnoDB: Cannot create file.*");
+call mtr.add_suppression("InnoDB: Error number 17 means 'File exists'.*");
+call mtr.add_suppression("InnoDB: A page in the doublewrite buffer is not within space bounds");
+call mtr.add_suppression("InnoDB: Error: table .* does not exist in the InnoDB internal");
+--enable_query_log
+
+#-----------------------------------------------------------------------
+set global innodb_file_per_table=on;
+--echo # Verify that 'TRUNCATE TABLE' statement works fine and the size
+--echo # of .ibd file is equal to the initial size after truncation.
+
+#-----------------------------------------------------------------------
+drop table if exists t1,t2,t3,t4,t6;
+let $cnt = 6;
+while ($cnt) {
+
+	# table with basic data type + primary ,secondary,composite,prefix index
+	create table t1(c1 int not null,
+			c2 int not null,
+			c3 char(255) not null,
+			c4 text(500) not null,
+			c5 blob(500) not null,
+			c6 varchar(500) not null,
+			c7 varchar(500) not null,
+			c8 datetime,
+			c9 decimal(5,3),
+			primary key (c1),
+			index (c3,c4(50),c5(50)),
+			index (c2))
+		engine=innodb row_format=redundant;
+
+
+	create table t2(c1 int not null,
+			c2 int not null,
+			c3 char(255) not null,
+			c4 text(500) not null,
+			c5 blob(500) not null,
+			c6 varchar(500) not null,
+			c7 varchar(500) not null,
+			c8 datetime,
+			c9 decimal(5,3),
+			primary key (c1),
+			index (c3,c4(50),c5(50)),
+			index (c2))
+		engine=innodb row_format=compact;
+
+
+	# with row type , key block size = 4K
+	create table t3(c1 int not null,
+			c2 int not null,
+			c3 char(255) not null,
+			c4 text(500) not null,
+			c5 blob(500) not null,
+			c6 varchar(500) not null,
+			c7 varchar(500) not null,
+			c8 datetime,
+			c9 decimal(5,3),
+			primary key (c1),
+			index (c3,c4(50),c5(50)),
+			index (c2))
+		engine=innodb row_format=compressed key_block_size=4;
+
+
+	create table t4(c1 int not null,
+			c2 int not null,
+			c3 char(255) not null,
+			c4 text(500) not null,
+			c5 blob(500) not null,
+			c6 varchar(500) not null,
+			c7 varchar(500) not null,
+			c8 datetime,
+			c9 decimal(5,3),
+			primary key (c1),
+			index (c3,c4(50),c5(50)),
+			index (c2))
+		engine=innodb row_format=dynamic;
+
+
+	create temporary table t5(c1 int not null,
+				  c2 int not null,
+				  c3 char(255) not null,
+				  c4 text(500) not null,
+				  c5 blob(500) not null,
+				  c6 varchar(500) not null,
+				  c7 varchar(500) not null,
+				  c8 datetime,
+				  c9 decimal(5,3),
+				  primary key (c1),
+				  index (c3,c4(50),c5(50)),
+				  index (c2))
+		engine=innodb;
+
+	create table t6 ( a int ) engine = innodb;
+	insert into t6 values (50),(100),(150);
+
+	--disable_query_log
+	--disable_result_log
+	let $n=5;
+
+	# load created tables.
+	while ($n)
+	{
+		start transaction;
+
+		eval insert ignore into t1 values(
+			$n, $n,
+			repeat(concat(' tc3_',$n), 42),
+			repeat(concat(' tc4_',$n), 300),
+			repeat(concat(' tc5_',$n), 300),
+			repeat(concat(' tc6_',$n), 300),
+			repeat(concat(' tc7_',$n), 300),
+			now(), (100.55+$n));
+
+		eval insert ignore into t2 values(
+			$n, $n,
+			repeat(concat(' tc3_',$n), 42),
+			repeat(concat(' tc4_',$n), 300),
+			repeat(concat(' tc5_',$n), 300),
+			repeat(concat(' tc6_',$n), 300),
+			repeat(concat(' tc7_',$n), 300),
+			now(), (100.55+$n));
+
+		eval insert ignore into t3 values(
+			$n, $n,
+			repeat(concat(' tc3_',$n), 42),
+			repeat(concat(' tc4_',$n), 300),
+			repeat(concat(' tc5_',$n), 300),
+			repeat(concat(' tc6_',$n), 300),
+			repeat(concat(' tc7_',$n), 300),
+			now(), (100.55+$n));
+
+		eval insert ignore into t4 values(
+			$n, $n,
+			repeat(concat(' tc3_',$n), 42),
+			repeat(concat(' tc4_',$n), 300),
+			repeat(concat(' tc5_',$n), 300),
+			repeat(concat(' tc6_',$n), 300),
+			repeat(concat(' tc7_',$n), 300),
+			now(), (100.55+$n));
+
+		eval insert ignore into t5 values(
+			$n, $n,
+			repeat(concat(' tc3_',$n), 42),
+			repeat(concat(' tc4_',$n), 300),
+			repeat(concat(' tc5_',$n), 300),
+			repeat(concat(' tc6_',$n), 300),
+			repeat(concat(' tc7_',$n), 300),
+			now(), (100.55+$n));
+
+		if ($n <= 3)
+		{
+			commit;
+		}
+
+		if ($n > 3)
+		{
+			rollback;
+		}
+
+		dec $n;
+	}
+
+	# validate loading of the tables.
+	--enable_result_log
+	--enable_query_log
+	select count(*) from t1;
+	select count(*) from t2;
+	select count(*) from t3;
+	select count(*) from t4;
+	select count(*) from t5;
+	select count(*) from t6;
+
+	# set the debug crash point and exercise them.
+	if ($cnt == 6)
+	{
+		set session debug="+d,ib_trunc_crash_during_drop_index_temp_table";
+		--echo "---debug ib_trunc_crash_during_drop_index_temp_table point---"
+	}
+	if ($cnt == 5)
+	{
+		set session debug="+d,ib_trunc_crash_drop_reinit_done_create_to_start";
+		--echo "---debug ib_trunc_crash_drop_reinit_done_create_to_start---"
+	}
+
+	if ($cnt >= 5) {
+		--echo # Write file to make mysql-test-run.pl expect crash and restart
+		--exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+		--echo # Run the crashing query
+		--error 2013
+		truncate table t5;
+		--source include/wait_until_disconnected.inc
+		--enable_reconnect
+		--echo # Restart the MySQL server
+		--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+		--source include/wait_until_connected_again.inc
+		--disable_reconnect
+		select count(*) from t1;
+		select count(*) from t2;
+		select count(*) from t3;
+		select count(*) from t4;
+		--error ER_NO_SUCH_TABLE
+		select count(*) from t5;
+		select count(*) from t6;
+	}
+
+	# set the debug crash point and exercise them.
+	if ($cnt == 6)
+	{
+		set session debug="+d,ib_trunc_crash_on_drop_of_sec_index";
+		--echo "---debug ib_trunc_crash_on_drop_of_sec_index point---"
+	}
+	if ($cnt == 5)
+	{
+		set session debug="+d,ib_trunc_crash_on_create_of_sec_index";
+		--echo "---debug ib_trunc_crash_on_create_of_sec_index---"
+	}
+	if ($cnt == 4)
+	{
+		set session debug="+d,ib_trunc_crash_before_log_removal";
+		--echo "---debug ib_trunc_crash_before_log_removal point---"
+	}
+	if ($cnt == 3)
+	{
+		set session debug="+d,ib_trunc_crash_after_truncate_done";
+		--echo "---debug ib_trunc_crash_after_truncate_done point---"
+	}
+	if ($cnt == 2)
+	{
+		set session debug="+d,ib_trunc_crash_after_truncate_done";
+		--echo "---debug ib_trunc_crash_after_truncate_done point---"
+	}
+	if ($cnt == 1)
+	{
+		set session debug="+d,ib_trunc_crash_after_redo_log_write_complete";
+		--echo "---debug ib_trunc_crash_after_redo_log_write_complete point---"
+	}
+
+	--echo # Write file to make mysql-test-run.pl expect crash and restart
+	--exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+	--echo # Run the crashing query
+	--error 2013
+	truncate table t1;
+	--source include/wait_until_disconnected.inc
+	--enable_reconnect
+	--echo # Restart the MySQL server
+	--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+	--source include/wait_until_connected_again.inc
+	--disable_reconnect
+	select count(*) from t1;
+	select count(*) from t2;
+	select count(*) from t3;
+	select count(*) from t4;
+	--error ER_NO_SUCH_TABLE
+	select count(*) from t5;
+	select count(*) from t6;
+
+	if ($cnt == 6)
+	{
+		set session debug="+d,ib_trunc_crash_on_drop_of_sec_index";
+		--echo "---debug ib_trunc_crash_on_drop_of_sec_index point---"
+	}
+	if ($cnt == 5)
+	{
+		set session debug="+d,ib_trunc_crash_on_create_of_sec_index";
+		--echo "---debug ib_trunc_crash_on_create_of_sec_index---"
+	}
+	if ($cnt == 4)
+	{
+		set session debug="+d,ib_trunc_crash_before_log_removal";
+		--echo "---debug ib_trunc_crash_before_log_removal point---"
+	}
+	if ($cnt == 3)
+	{
+		set session debug="+d,ib_trunc_crash_after_truncate_done";
+		--echo "---debug ib_trunc_crash_after_truncate_done point---"
+	}
+	if ($cnt == 2)
+	{
+		set session debug="+d,ib_trunc_crash_after_truncate_done";
+		--echo "---debug ib_trunc_crash_after_truncate_done point---"
+	}
+	if ($cnt == 1)
+	{
+		set session debug="+d,ib_trunc_crash_after_redo_log_write_complete";
+		--echo "---debug ib_trunc_crash_after_redo_log_write_complete point---"
+	}
+
+
+	--echo # Write file to make mysql-test-run.pl expect crash and restart
+	--exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+	--echo # Run the crashing query
+	--error 2013
+	truncate table t2;
+	--source include/wait_until_disconnected.inc
+	--enable_reconnect
+	--echo # Restart the MySQL server
+	--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+	--source include/wait_until_connected_again.inc
+	--disable_reconnect
+	select count(*) from t1;
+	select count(*) from t2;
+	select count(*) from t3;
+	select count(*) from t4;
+	--error ER_NO_SUCH_TABLE
+	select count(*) from t5;
+	select count(*) from t6;
+
+	if ($cnt == 6)
+	{
+		set session debug="+d,ib_trunc_crash_on_drop_of_sec_index";
+		--echo "---debug ib_trunc_crash_on_drop_of_sec_index point---"
+	}
+	if ($cnt == 5)
+	{
+		set session debug="+d,ib_trunc_crash_on_create_of_sec_index";
+		--echo "---debug ib_trunc_crash_on_create_of_sec_index---"
+	}
+	if ($cnt == 4)
+	{
+		set session debug="+d,ib_trunc_crash_before_log_removal";
+		--echo "---debug ib_trunc_crash_before_log_removal point---"
+	}
+	if ($cnt == 3)
+	{
+		set session debug="+d,ib_trunc_crash_after_truncate_done";
+		--echo "---debug ib_trunc_crash_after_truncate_done point---"
+	}
+	if ($cnt == 2)
+	{
+		set session debug="+d,ib_trunc_crash_after_truncate_done";
+		--echo "---debug ib_trunc_crash_after_truncate_done point---"
+	}
+	if ($cnt == 1)
+	{
+		set session debug="+d,ib_trunc_crash_after_redo_log_write_complete";
+		--echo "---debug ib_trunc_crash_after_redo_log_write_complete point---"
+	}
+
+
+	--echo # Write file to make mysql-test-run.pl expect crash and restart
+	--exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+	--echo # Run the crashing query
+	--error 2013
+	truncate table t3;
+	--source include/wait_until_disconnected.inc
+	--enable_reconnect
+	--echo # Restart the MySQL server
+	--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+	--source include/wait_until_connected_again.inc
+	--disable_reconnect
+	select count(*) from t1;
+	select count(*) from t2;
+	select count(*) from t3;
+	select count(*) from t4;
+	--error ER_NO_SUCH_TABLE
+	select count(*) from t5;
+	select count(*) from t6;
+
+
+	if ($cnt == 6)
+	{
+		set session debug="+d,ib_trunc_crash_on_drop_of_sec_index";
+		--echo "---debug ib_trunc_crash_on_drop_of_sec_index point---"
+	}
+	if ($cnt == 5)
+	{
+		set session debug="+d,ib_trunc_crash_on_create_of_sec_index";
+		--echo "---debug ib_trunc_crash_on_create_of_sec_index---"
+	}
+	if ($cnt == 4)
+	{
+		set session debug="+d,ib_trunc_crash_before_log_removal";
+		--echo "---debug ib_trunc_crash_before_log_removal point---"
+	}
+	if ($cnt == 3)
+	{
+		set session debug="+d,ib_trunc_crash_after_truncate_done";
+		--echo "---debug ib_trunc_crash_after_truncate_done point---"
+	}
+	if ($cnt == 2)
+	{
+		set session debug="+d,ib_trunc_crash_after_truncate_done";
+		--echo "---debug ib_trunc_crash_after_truncate_done point---"
+	}
+	if ($cnt == 1)
+	{
+		set session debug="+d,ib_trunc_crash_after_redo_log_write_complete";
+		--echo "---debug ib_trunc_crash_after_redo_log_write_complete point---"
+	}
+
+	--echo # Write file to make mysql-test-run.pl expect crash and restart
+	--exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+	--echo # Run the crashing query
+	--error 2013
+	truncate table t4;
+	--source include/wait_until_disconnected.inc
+	--enable_reconnect
+	--echo # Restart the MySQL server
+	--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+	--source include/wait_until_connected_again.inc
+	--disable_reconnect
+	select count(*) from t1;
+	select count(*) from t2;
+	select count(*) from t3;
+	select count(*) from t4;
+	--error ER_NO_SUCH_TABLE
+	select count(*) from t5;
+	select count(*) from t6;
+
+	drop table t1, t2, t3, t4, t6;
+
+	dec $cnt;
+
+	--disable_query_log
+	eval set global innodb_file_per_table=$innodb_file_per_table_orig;
+	--enable_query_log
+}
+
+
+
diff --git a/mysql-test/suite/innodb_zip/t/wl6501_crash_3.test b/mysql-test/suite/innodb_zip/t/wl6501_crash_3.test
new file mode 100644
index 00000000000..eb4c23aa66e
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/t/wl6501_crash_3.test
@@ -0,0 +1,26 @@
+#
+# WL#6501: make truncate table atomic
+#
+
+# TC tries to hit crash point during truncate of
+# compressed non-temp table residing in single tablespace
+# with page-size=16k
+
+--source include/have_innodb.inc
+--source include/have_innodb_16k.inc
+--source include/have_debug.inc
+--source include/big_test.inc
+
+# Valgrind would complain about memory leaks when we crash on purpose.
+--source include/not_valgrind.inc
+# Embedded server does not support crashing
+--source include/not_embedded.inc
+# Avoid CrashReporter popup on Mac
+--source include/not_crashrep.inc
+
+let $wl6501_file_per_table = 1;
+let $wl6501_row_fmt = compressed;
+let $wl6501_kbs = 16;
+let $wl6501_file_format = 'Barracuda';
+--source suite/innodb/include/innodb_wl6501_crash.inc
+
diff --git a/mysql-test/suite/innodb_zip/t/wl6501_crash_4.test b/mysql-test/suite/innodb_zip/t/wl6501_crash_4.test
new file mode 100644
index 00000000000..870af3dfc94
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/t/wl6501_crash_4.test
@@ -0,0 +1,29 @@
+#
+# WL#6501: make truncate table atomic
+#
+
+# TC tries to hit crash point during truncate of
+# compressed non-temp table residing in single tablespace.
+# with page-size=4k
+
+--source include/have_innodb.inc
+--source include/have_innodb_4k.inc
+--source include/have_debug.inc
+--source include/big_test.inc
+
+# Valgrind would complain about memory leaks when we crash on purpose.
+--source include/not_valgrind.inc
+# Embedded server does not support crashing
+--source include/not_embedded.inc
+# Avoid CrashReporter popup on Mac
+--source include/not_crashrep.inc
+
+let $wl6501_file_per_table = 1;
+let $wl6501_row_fmt = compressed;
+let $wl6501_kbs = 4;
+let $wl6501_file_format = 'Barracuda';
+--source suite/innodb/include/innodb_wl6501_crash.inc
+
+let $wl6501_temp = temporary;
+--source suite/innodb/include/innodb_wl6501_crash_temp.inc
+
diff --git a/mysql-test/suite/innodb_zip/t/wl6501_crash_5.test b/mysql-test/suite/innodb_zip/t/wl6501_crash_5.test
new file mode 100644
index 00000000000..3432a5a5c76
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/t/wl6501_crash_5.test
@@ -0,0 +1,26 @@
+#
+# WL#6501: make truncate table atomic
+#
+
+# TC tries to hit crash point during truncate of
+# compressed non-temp table residing in single tablespace.
+# with page-size=8k
+
+--source include/have_innodb.inc
+--source include/have_innodb_8k.inc
+--source include/have_debug.inc
+--source include/big_test.inc
+
+# Valgrind would complain about memory leaks when we crash on purpose.
+--source include/not_valgrind.inc
+# Embedded server does not support crashing
+--source include/not_embedded.inc
+# Avoid CrashReporter popup on Mac
+--source include/not_crashrep.inc
+
+let $wl6501_file_per_table = 1;
+let $wl6501_row_fmt = compressed;
+let $wl6501_kbs = 8;
+let $wl6501_file_format = 'Barracuda';
+--source suite/innodb/include/innodb_wl6501_crash.inc
+
diff --git a/mysql-test/suite/innodb_zip/t/wl6501_scale_1.test b/mysql-test/suite/innodb_zip/t/wl6501_scale_1.test
new file mode 100644
index 00000000000..8c746fe8abf
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/t/wl6501_scale_1.test
@@ -0,0 +1,41 @@
+#
+# WL#6501: make truncate table atomic
+#
+
+# load table with some significiant amount of data
+# and then try truncate
+
+--source include/have_innodb.inc
+--source include/have_debug.inc
+--source include/big_test.inc
+--source include/have_innodb_16k.inc
+
+# Valgrind would complain about memory leaks when we crash on purpose.
+--source include/not_valgrind.inc
+# Embedded server does not support crashing
+--source include/not_embedded.inc
+# Avoid CrashReporter popup on Mac
+--source include/not_crashrep.inc
+
+
+# Single-Tablespace/Non-Compressed
+let $wl6501_file_per_table = 1;
+let $wl6501_row_fmt = compact;
+let $wl6501_kbs = 16;
+let $wl6501_file_format = 'Antelope';
+--source suite/innodb_zip/include/innodb_wl6501_scale.inc
+
+# Single-Tablespace/Compressed
+let $wl6501_file_per_table = 1;
+let $wl6501_row_fmt = compressed;
+let $wl6501_kbs = 16;
+let $wl6501_file_format = 'Barracuda';
+--source suite/innodb_zip/include/innodb_wl6501_scale.inc
+
+# System-Tablespace/Non-Compressed
+let $wl6501_file_per_table = 0;
+let $wl6501_row_fmt = compact;
+let $wl6501_kbs = 16;
+let $wl6501_file_format = 'Antelope';
+--source suite/innodb_zip/include/innodb_wl6501_scale.inc
+
diff --git a/mysql-test/suite/innodb_zip/t/wl6560.test b/mysql-test/suite/innodb_zip/t/wl6560.test
new file mode 100644
index 00000000000..55d36747938
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/t/wl6560.test
@@ -0,0 +1,422 @@
+#
+# WL#6560: InnoDB: separate tablespace for innodb-temp-tables.
+#
+
+--source include/have_innodb.inc
+--source include/have_innodb_zip.inc
+# Embedded server does not restart of server
+--source include/not_embedded.inc
+-- source include/big_test.inc
+
+--disable_query_log
+call mtr.add_suppression("Tablespace innodb_temporary ran out of space. Please add another file or use 'autoextend' for the last file in setting innodb_temp_data_file_path.");
+call mtr.add_suppression("The table 't1' is full");
+--enable_query_log
+
+################################################################################
+#
+# Will test following scenarios:
+# 1.  creation of shared temp-tablespace.
+# 2.  ddl + dml operation involving temp-tablespace.
+#     insert/delete/update/select
+#     create/drop/alter/truncate/import-discard (though blocked).
+# 3.  ddl + dml operation on compressed table.
+#     (table doesn't reside in shared temp-tablespace).
+# 4.  Test bulk-loading that result in auto-extension of temp-tablespace.
+# 5.  re-creation of temp-tablespace on re-start.
+#     also to ensure non-existence of existing temp-table.
+# 6.  restart server in innodb-read-only mode. this will also
+#     block creation of temp-tables.
+# 7.  try starting server with shared and temp-tablespace filename same.
+# 8.  try re-starting server with param so that temp-tablespace can't be
+#     expanded and insert enough data to make it full.
+# 9.  tests for different row format types and key block sizes for
+#     compressed tables.
+# 10. try restarting server with raw device specified for temp-tablespace.
+# 11. try restarting server with temp-tablespace less than min. threshold
+# 12. no file specified for temp-tablespace.
+################################################################################
+
+#-----------------------------------------------------------------------------
+#
+# create test-bed
+#
+let $per_table = `select @@innodb_file_per_table`;
+
+set global innodb_file_per_table = off;
+let $MYSQL_TMP_DIR = `select @@tmpdir`;
+let $MYSQL_DATA_DIR = `select @@datadir`;
+let SEARCH_FILE = $MYSQLTEST_VARDIR/log/my_restart.err;
+let $args = --loose-console --core-file > $SEARCH_FILE 2>&1;
+let crash = --loose-console > $SEARCH_FILE 2>&1 --innodb-force-recovery-crash;
+let readonly = $args --innodb_read_only;
+let nameconflicts = $args --innodb_data_file_path="ibdata1:12M:autoextend:max:134217728" --innodb_temp_data_file_path="ibdata1:12M:autoextend";
+let rawdevice1 = $args --innodb_temp_data_file_path="/dev/hdd1:3Gnewraw;/dev/hdd2:2Gnewraw";
+let rawdevice2 = $args --innodb_temp_data_file_path="/dev/hdd1:3Graw;/dev/hdd2:2Graw";
+let sizeoftempfile1 = $args --innodb_temp_data_file_path="ibtmp1:2M:autoextend";
+let sizeoftempfile2 = $args --innodb_data_file_path="ibdata1:2M:autoextend";
+let notemptablespacefile = $args --innodb_temp_data_file_path="";
+
+#-----------------------------------------------------------------------------
+#
+# 1. creation of shared temp-tablespace.
+#
+--echo # files in MYSQL_DATA_DIR
+--list_files $MYSQL_DATA_DIR/ ibtmp*
+
+
+#-----------------------------------------------------------------------------
+#
+# 2. ddl + dml operation involving temp-tablespace.
+#    insert/delete/update/select
+#    create/drop/alter/truncate/import-discard (though blocked).
+#
+select @@global.innodb_file_per_table;
+create temporary table t1 (i int, f float, c char(100)) engine=innodb;
+#
+--source suite/innodb_zip/include/innodb_temp_table_dml.inc
+#
+# alter table
+--error ER_CANNOT_DISCARD_TEMPORARY_TABLE
+alter table t1 discard tablespace;
+--error ER_CANNOT_DISCARD_TEMPORARY_TABLE
+alter table t1 import tablespace;
+#
+# drop table
+drop table t1;
+
+#-----------------------------------------------------------------------------
+#
+# 3. ddl + dml operation on compressed table.
+#    (table doesn't reside in shared temp-tablespace).
+#
+--echo #files in MYSQL_TMP_DIR
+--list_files $MYSQL_TMP_DIR/ *.ibd
+set global innodb_file_per_table = 1;
+select @@global.innodb_file_per_table;
+create temporary table t1
+	(i int, f float, c char(100)) engine = innodb key_block_size = 4;
+show create table t1;
+--echo #files in MYSQL_TMP_DIR
+--replace_regex /#sql[0-9a-f_]*/#sql<temporary>/
+--list_files $MYSQL_TMP_DIR/ *.ibd
+#
+--source suite/innodb_zip/include/innodb_temp_table_dml.inc
+#
+# alter table
+--error ER_CANNOT_DISCARD_TEMPORARY_TABLE
+alter table t1 discard tablespace;
+#
+# drop table
+drop table t1;
+set global innodb_file_per_table = off;
+
+#-----------------------------------------------------------------------------
+#
+# 4. Test bulk-loading that result in auto-extension of temp-tablespace.
+#
+create temporary table t1
+	(keyc int, c1 char(100), c2 char(100),
+	 primary key(keyc)) engine = innodb;
+delimiter |;
+CREATE PROCEDURE populate_t1()
+BEGIN
+	DECLARE i INT DEFAULT 1;
+	while (i <= 20000) DO
+		insert into t1 values (i, 'a', 'b');
+		SET i = i + 1;
+	END WHILE;
+END|
+delimiter ;|
+set autocommit=0;
+select count(*) from t1;
+call populate_t1();
+select count(*) from t1;
+select * from t1 limit 10;
+set autocommit=1;
+truncate table t1;
+select count(*) from t1;
+#
+drop procedure populate_t1;
+drop table t1;
+
+#-----------------------------------------------------------------------------
+#
+# 5. re-creation of temp-tablespace on re-start.
+#    also to ensure non-existence of existing temp-table.
+#
+create temporary table t1 (keyc int, c1 char(100), c2 char(100)) engine = innodb;
+insert into t1 values (1, 'c', 'b');
+select * from t1;
+#
+--source include/restart_mysqld.inc
+#
+--echo # files in MYSQL_DATA_DIR
+--list_files $MYSQL_DATA_DIR/ ibtmp*
+use test;
+--error ER_NO_SUCH_TABLE
+select * from t1;
+
+#-----------------------------------------------------------------------------
+#
+# 6. restart server in innodb-read-only mode. this will also
+#    block creation of temp-tables.
+#
+#
+--echo "testing temp-table creation in --innodb_read_only mode"
+let $restart_parameters=--innodb-read-only;
+--source include/restart_mysqld.inc
+#
+use test;
+show tables;
+--error ER_INNODB_READ_ONLY, 1005
+create temporary table t1 (keyc int, c1 char(100), c2 char(100)) engine = innodb;
+
+#-----------------------------------------------------------------------------
+#
+# 7. try starting server with shared and temp-tablespace filename same.
+#
+--source include/shutdown_mysqld.inc
+--echo "testing system and temp tablespace name conflict"
+--error 1
+--exec $MYSQLD_CMD $nameconflicts
+let SEARCH_PATTERN = innodb_temporary and innodb_system file names seem to be the same;
+--source ./include/search_pattern_in_file.inc
+--remove_file $SEARCH_FILE
+--echo "restarting server in normal mode"
+--enable_reconnect
+let $restart_parameters = restart;
+--source include/start_mysqld.inc
+#
+show tables;
+create temporary table t1 (keyc int, c1 char(100), c2 char(100)) engine = innodb;
+drop table t1;
+
+#-----------------------------------------------------------------------------
+#
+# 8. try re-starting server with param so that temp-tablespace can't be expanded
+#    and insert enough data to make it full.
+#
+--echo # test condition of full-temp-tablespace
+let $restart_parameters=--innodb_temp_data_file_path=ibtmp1:12M;
+--source include/restart_mysqld.inc
+#
+create temporary table t1
+	(keyc int, c1 char(100), c2 char(100),
+	 primary key(keyc)) engine = innodb;
+delimiter |;
+CREATE PROCEDURE populate_t1()
+BEGIN
+	DECLARE i INT DEFAULT 1;
+	while (i <= 20000) DO
+		insert into t1 values (i, 'a', 'b');
+		SET i = i + 1;
+	END WHILE;
+END|
+delimiter ;|
+set autocommit=0;
+select count(*) from t1;
+--error ER_RECORD_FILE_FULL
+call populate_t1();
+#
+drop procedure populate_t1;
+drop table t1;
+
+#-----------------------------------------------------------------------------
+#
+# 9. tests for different row format types and key block sizes for
+#     compressed tables.
+#
+set innodb_strict_mode = off;
+--disable_warnings
+set global innodb_file_per_table = 0;
+set global innodb_file_format = 'Antelope';
+create temporary table t (
+	i int)
+	engine = innodb row_format = compressed;
+--replace_regex /[0-9]+/NUMBER/
+show warnings;
+drop table t;
+#
+create temporary table t (
+	i int)
+	engine = innodb row_format = compressed key_block_size = 8;
+--replace_regex /[0-9]+/NUMBER/
+show warnings;
+#
+drop table t;
+set global innodb_file_per_table = 1;
+create temporary table t (
+	i int)
+	engine = innodb row_format = compressed key_block_size = 8;
+--replace_regex /[0-9]+/NUMBER/
+show warnings;
+drop table t;
+#
+create temporary table t (
+	i int)
+	engine = innodb row_format = dynamic;
+--replace_regex /[0-9]+/NUMBER/
+show warnings;
+--echo #files in MYSQL_TMP_DIR
+--replace_regex /#sql[0-9a-f_]*/#sql<temporary>/
+--list_files $MYSQL_TMP_DIR/ *.ibd
+drop table t;
+#
+set innodb_strict_mode = on;
+create temporary table t (
+	i int)
+	engine = innodb row_format = dynamic;
+--replace_regex /[0-9]+/NUMBER/
+drop table t;
+#
+set global innodb_file_format = 'Barracuda';
+set innodb_strict_mode = off;
+create temporary table t (
+	i int)
+	engine = innodb row_format = compressed key_block_size = 8;
+--replace_regex /[0-9]+/NUMBER/
+# explicitly disabling it else it will generate warning of ignoring
+# key_block_size when suite is run with innodb-page-size=4k
+#show warnings;
+set innodb_strict_mode = default;
+--echo #files in MYSQL_TMP_DIR
+--replace_regex /#sql[0-9a-f_]*/#sql<temporary>/
+--list_files $MYSQL_TMP_DIR/ *.ibd
+#
+drop table t;
+create temporary table t (
+	i int)
+	engine = innodb row_format = compressed;
+--replace_regex /[0-9]+/NUMBER/
+show warnings;
+--echo #files in MYSQL_TMP_DIR
+--replace_regex /#sql[0-9a-f_]*/#sql<temporary>/
+--list_files $MYSQL_TMP_DIR/ *.ibd
+drop table t;
+#
+create temporary table t (
+	i int)
+	engine = innodb row_format = dynamic;
+--replace_regex /[0-9]+/NUMBER/
+show warnings;
+--echo #files in MYSQL_TMP_DIR
+--replace_regex /#sql[0-9a-f_]*/#sql<temporary>/
+--list_files $MYSQL_TMP_DIR/ *.ibd
+drop table t;
+#
+set innodb_strict_mode = on;
+create temporary table t (
+	i int)
+	engine = innodb row_format = dynamic;
+--replace_regex /[0-9]+/NUMBER/
+show warnings;
+drop table t;
+set innodb_strict_mode = off;
+#
+--echo #files in MYSQL_TMP_DIR
+--replace_regex /#sql[0-9a-f_]*/#sql<temporary>/
+--list_files $MYSQL_TMP_DIR/ *.ibd
+create temporary table t (
+	i int)
+	engine = innodb row_format = dynamic key_block_size = 4;
+--replace_regex /[0-9]+/NUMBER/
+show warnings;
+--echo #files in MYSQL_TMP_DIR
+--replace_regex /#sql[0-9a-f_]*/#sql<temporary>/
+--list_files $MYSQL_TMP_DIR/ *.ibd
+drop table t;
+#
+create temporary table t (
+	i int)
+	engine = innodb row_format = compact;
+--replace_regex /[0-9]+/NUMBER/
+show warnings;
+--echo #files in MYSQL_TMP_DIR
+--replace_regex /#sql[0-9a-f_]*/#sql<temporary>/
+--list_files $MYSQL_TMP_DIR/ *.ibd
+drop table t;
+#
+create temporary table t (
+	i int)
+	engine = innodb key_block_size = 4;
+--replace_regex /[0-9]+/NUMBER/
+show warnings;
+--echo #files in MYSQL_TMP_DIR
+--replace_regex /#sql[0-9a-f_]*/#sql<temporary>/
+--list_files $MYSQL_TMP_DIR/ *.ibd
+drop table t;
+#
+
+#-----------------------------------------------------------------------------
+#
+# 10. try restarting server with raw device specified for temp-tablespace.
+#
+--source include/shutdown_mysqld.inc
+--echo "testing temp tablespace non-support for raw device"
+--error 1
+--exec $MYSQLD_CMD $rawdevice1
+let SEARCH_PATTERN = support raw device;
+--source include/search_pattern_in_file.inc
+--remove_file $SEARCH_FILE
+--echo "testing temp tablespace non-support for raw device"
+--error 1
+--exec $MYSQLD_CMD $rawdevice2
+let SEARCH_PATTERN = support raw device;
+--source include/search_pattern_in_file.inc
+--remove_file $SEARCH_FILE
+
+--source include/start_mysqld.inc
+
+show tables;
+create temporary table t1 (
+	keyc int, c1 char(100), c2 char(100)
+	) engine = innodb;
+drop table t1;
+
+#-----------------------------------------------------------------------------
+#
+# 11. try restarting server with temp-tablespace less than min. threshold
+#
+--source include/shutdown_mysqld.inc
+--echo "try starting server with temp-tablespace size < min. threshold"
+--error 1
+--exec $MYSQLD_CMD $sizeoftempfile1
+let SEARCH_PATTERN = Tablespace size must be at least;
+--source ./include/search_pattern_in_file.inc
+--remove_file $SEARCH_FILE
+--echo "try starting server with sys-tablespace size < min. threshold"
+--error 1
+--exec $MYSQLD_CMD $sizeoftempfile2
+let SEARCH_PATTERN = Tablespace size must be at least;
+--source ./include/search_pattern_in_file.inc
+--remove_file $SEARCH_FILE
+
+--source include/start_mysqld.inc
+
+show tables;
+create temporary table t1 (
+	keyc int, c1 char(100), c2 char(100)
+	) engine = innodb;
+drop table t1;
+
+#-----------------------------------------------------------------------------
+#
+# 12. no file specified for temp-tablespace.
+#
+--source include/shutdown_mysqld.inc
+
+--echo "try starting server with no file specified for temp-tablespace"
+--error 1
+--exec $MYSQLD_CMD $notemptablespacefile
+let SEARCH_PATTERN = init function returned error;
+--source ./include/search_pattern_in_file.inc
+--remove_file $SEARCH_FILE
+
+--source include/start_mysqld.inc
+
+show tables;
+create temporary table t1 (
+	keyc int, c1 char(100), c2 char(100)
+	) engine = innodb;
+drop table t1;
diff --git a/mysql-test/suite/innodb_zip/t/wl6915_1.test b/mysql-test/suite/innodb_zip/t/wl6915_1.test
new file mode 100644
index 00000000000..7f0f734d16a
--- /dev/null
+++ b/mysql-test/suite/innodb_zip/t/wl6915_1.test
@@ -0,0 +1,650 @@
+--source include/have_innodb.inc
+--source include/have_innodb_zip.inc
+--source include/have_no_undo_tablespaces.inc
+--source include/big_test.inc
+
+# Embedded server does not support restarting
+--source include/not_embedded.inc
+# Avoid CrashReporter popup on Mac
+--source include/not_crashrep.inc
+
+####################################################################
+# TC to test temp-table undolog changes correctness                #
+# Sceanrio covered in single testcase :                            #
+# - Tables with row format(redundant,compressed,dynamic,compact    #
+# - Table with primary,composite,prefix,secondary INDEX            #
+# - Insert/delete/update with transactioons                        #
+# - Transaction with COMMIT,rollback,savepoint statements          #
+# - Transaction having temporary table and normal table            #
+# - Concurrency by execution of two clients creating tables with   #
+#    same names                                                    #
+# - Inserting data using                                           #
+#   - Insert into .. , Load data infile..,insert ignore            #
+#   - Insert into .. on duplicate update                           #
+# - Check basic delete and upadte [ignore]                         #
+# - Check constraints like duplicate key,default value             #
+# - Alter ADD COLUMN , ADD PRIMARY KEY                             #
+# - Flush Tables, logs command                                     #
+# - Vary innodb_undo_tablespaces=0,innodb_undo_logs                # 
+#    innodb_log_files_in_group                                     #
+# - Verify rseg message from server log                            # 
+####################################################################
+
+# run for page size >= 8k
+--disable_warnings
+if (`SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE LOWER(variable_name) = 'innodb_page_size' AND variable_value >= 8192`)
+{
+  --skip Test requires InnoDB with page size >= 8k.
+}
+--enable_warnings
+
+call mtr.ADD_suppression(".*Resizing redo log.*");
+call mtr.ADD_suppression(".*Starting to delete and rewrite log files.*");
+call mtr.ADD_suppression(".*New log files created.*");
+# Save initial VALUES of server variable
+--disable_query_log
+let $innodb_file_per_table_orig=`SELECT @@innodb_file_per_table`;
+--enable_query_log
+
+SELECT @@global.innodb_undo_tablespaces;
+
+# Create procedure to perform 
+# 1. Create temp table with row types , INDEX , sufficent data types
+# 2. Perform DML with transaction 
+delimiter |;
+CREATE PROCEDURE populate_tables(IN id VARCHAR(10))
+   begin
+   declare n int default 20;
+   set global innodb_file_per_table=on;
+   DROP TABLE IF EXISTS t1,t2,t3,t4;
+
+   CREATE TEMPORARY TABLE t1_temp(c1 int NOT NULL,
+   c2 int NOT NULL,
+   c3 char(255) NOT NULL,
+   c4 text(600) NOT NULL,
+   c5 blob(600) NOT NULL,
+   c6 varchar(600) NOT NULL,
+   c7 varchar(600) NOT NULL,
+   c8 datetime,
+   c9 decimal(6,3),
+   PRIMARY KEY (c1),
+   INDEX (c3,c4(50),c5(50)), 
+   INDEX (c2))
+   ENGINE=InnoDB ROW_FORMAT=redundant;
+
+   set @s = concat("CREATE TABLE t1",id," ( c1 int NOT NULL,   c2 int NOT NULL,   c3 char(255) NOT NULL,   c4 text(600) NOT NULL,   c5 blob(600) NOT NULL,   c6 varchar(600) NOT NULL,   c7 varchar(600) NOT NULL,   c8 datetime,   c9 decimal(6,3),   PRIMARY KEY (c1),   INDEX (c3,c4(50),c5(50)),   INDEX (c2))   ENGINE=InnoDB ROW_FORMAT=redundant;");
+   PREPARE createTable FROM @s;
+   EXECUTE createTable;
+   DEALLOCATE PREPARE createTable;
+
+
+   CREATE TEMPORARY TABLE t2_temp(c1 int NOT NULL,
+   c2 int NOT NULL,
+   c3 char(255) NOT NULL,
+   c4 text(600) NOT NULL,
+   c5 blob(600) NOT NULL,
+   c6 varchar(600) NOT NULL,
+   c7 varchar(600) NOT NULL,
+   c8 datetime,
+   c9 decimal(6,3),
+   PRIMARY KEY (c1),
+   INDEX (c3,c4(50),c5(50)),
+   INDEX (c2))
+   ENGINE=InnoDB ROW_FORMAT=compact;
+
+   set @s = concat("CREATE TABLE t2",id," (c1 int NOT NULL,   c2 int NOT NULL,   c3 char(255) NOT NULL,   c4 text(600) NOT NULL,   c5 blob(600) NOT NULL,   c6 varchar(600) NOT NULL,   c7 varchar(600) NOT NULL,   c8 datetime,   c9 decimal(6,3),   PRIMARY KEY (c1),   INDEX (c3,c4(50),c5(50)),   INDEX (c2))   ENGINE=InnoDB ROW_FORMAT=compact;");
+   PREPARE createTable FROM @s;
+   EXECUTE createTable;
+   DEALLOCATE PREPARE createTable;
+
+   CREATE TEMPORARY TABLE t3_temp(c1 int NOT NULL,
+   c2 int NOT NULL,
+   c3 char(255) NOT NULL,
+   c4 text(600) NOT NULL,
+   c5 blob(600) NOT NULL,
+   c6 varchar(600) NOT NULL,
+   c7 varchar(600) NOT NULL,
+   c8 datetime,
+   c9 decimal(6,3),
+   PRIMARY KEY (c1),
+   INDEX (c3,c4(50),c5(50)),
+   INDEX (c2))
+   ENGINE=InnoDB ROW_FORMAT=compressed key_block_size=4;
+
+   set @s = concat("CREATE TABLE t3",id," (c1 int NOT NULL,   c2 int NOT NULL,   c3 char(255) NOT NULL,   c4 text(600) NOT NULL,   c5 blob(600) NOT NULL,   c6 varchar(600) NOT NULL,   c7 varchar(600) NOT NULL,   c8 datetime,   c9 decimal(6,3),   PRIMARY KEY (c1),   INDEX (c3,c4(50),c5(50)),   INDEX (c2))   ENGINE=InnoDB ROW_FORMAT=compressed key_block_size=4;");
+   PREPARE createTable FROM @s;
+   EXECUTE createTable;
+   DEALLOCATE PREPARE createTable;
+
+   CREATE TEMPORARY TABLE t4_temp(c1 int NOT NULL,
+   c2 int NOT NULL,
+   c3 char(255) NOT NULL,
+   c4 text(600) NOT NULL,
+   c5 blob(600) NOT NULL,
+   c6 varchar(600) NOT NULL,
+   c7 varchar(600) NOT NULL,
+   c8 datetime,
+   c9 decimal(6,3),
+   PRIMARY KEY (c1),
+   INDEX (c3,c4(50),c5(50)),
+   INDEX (c2))
+   ENGINE=InnoDB ROW_FORMAT=dynamic;
+
+   set @s = concat("CREATE TABLE t4",id," (c1 int NOT NULL,   c2 int NOT NULL,   c3 char(255) NOT NULL,   c4 text(600) NOT NULL,   c5 blob(600) NOT NULL,   c6 varchar(600) NOT NULL,   c7 varchar(600) NOT NULL,   c8 datetime,   c9 decimal(6,3),   PRIMARY KEY (c1),   INDEX (c3,c4(50),c5(50)),   INDEX (c2))   ENGINE=InnoDB ROW_FORMAT=dynamic;");
+   PREPARE createTable FROM @s;
+   EXECUTE createTable;
+   DEALLOCATE PREPARE createTable;
+
+   while (n > 0) do
+     START TRANSACTION;
+     set @s = concat("INSERT INTO t1",id," VALUES(",n,",",n,",REPEAT(concat(' tc3_',",n,"),30),     REPEAT(concat(' tc4_',",n,"),70),REPEAT(concat(' tc_',",n,"),70),     REPEAT(concat(' tc6_',",n,"),70),REPEAT(concat(' tc7_',",n,"),70),     NOW(),(100.55+",n,"));");     
+     PREPARE insertIntoTable FROM @s;     
+     EXECUTE insertIntoTable;     
+     DEALLOCATE PREPARE insertIntoTable;
+     INSERT INTO t1_temp VALUES(n,n,REPEAT(concat(' tc3_',n),30),
+     REPEAT(concat(' tc4_',n),70),REPEAT(concat(' tc_',n),70),
+     REPEAT(concat(' tc6_',n),70),REPEAT(concat(' tc7_',n),70),
+     NOW(),(100.55+n));
+
+     set @s = concat("INSERT INTO t2",id," VALUES(",n,",",n,",REPEAT(concat(' tc3_',",n,"),30),     REPEAT(concat(' tc4_',",n,"),70),REPEAT(concat(' tc_',",n,"),70),     REPEAT(concat(' tc6_',",n,"),70),REPEAT(concat(' tc7_',",n,"),70),     NOW(),(100.55+",n,"));");     
+     PREPARE insertIntoTable FROM @s;
+     EXECUTE insertIntoTable;
+     DEALLOCATE PREPARE insertIntoTable;
+
+     INSERT INTO t2_temp VALUES(n,n,REPEAT(concat(' tc3_',n),30),
+     REPEAT(concat(' tc4_',n),70),REPEAT(concat(' tc_',n),70),
+     REPEAT(concat(' tc6_',n),70),REPEAT(concat(' tc7_',n),70),
+     NOW(),(100.55+n));
+
+     savepoint a;
+
+     set @s = concat("INSERT INTO t3",id," VALUES(",n,",",n,",REPEAT(concat(' tc3_',",n,"),30),     REPEAT(concat(' tc4_',",n,"),70),REPEAT(concat(' tc_',",n,"),70),     REPEAT(concat(' tc6_',",n,"),70),REPEAT(concat(' tc7_',",n,"),70),     NOW(),(100.55+",n,"));");     
+     PREPARE insertIntoTable FROM @s;
+     EXECUTE insertIntoTable;
+     DEALLOCATE PREPARE insertIntoTable;
+
+     INSERT INTO t3_temp VALUES(n,n,REPEAT(concat(' tc3_',n),30),
+     REPEAT(concat(' tc4_',n),70),REPEAT(concat(' tc_',n),70),
+     REPEAT(concat(' tc6_',n),70),REPEAT(concat(' tc7_',n),70),
+     NOW(),(100.55+n));
+
+     savepoint b;
+
+     set @s = concat("INSERT INTO t4",id," VALUES(",n,",",n,",REPEAT(concat(' tc3_',",n,"),30),     REPEAT(concat(' tc4_',",n,"),70),REPEAT(concat(' tc_',",n,"),70),     REPEAT(concat(' tc6_',",n,"),70),REPEAT(concat(' tc7_',",n,"),70),     NOW(),(100.55+",n,"));");     
+     PREPARE insertIntoTable FROM @s;
+     EXECUTE insertIntoTable;
+     DEALLOCATE PREPARE insertIntoTable;
+
+     INSERT INTO t4_temp VALUES(n,n,REPEAT(concat(' tc3_',n),30),
+     REPEAT(concat(' tc4_',n),70),REPEAT(concat(' tc_',n),70),
+     REPEAT(concat(' tc6_',n),70),REPEAT(concat(' tc7_',n),70),
+     NOW(),(100.55+n));
+
+
+     if (n > 10) then
+        if (n > 10 and n <=12) then
+           ROLLBACK TO SAVEPOINT a;
+           COMMIT;
+        end if;
+        if (n > 12 and n < 15) then
+           ROLLBACK TO SAVEPOINT b;
+           COMMIT;
+        end if;
+        if (n > 15) then
+           COMMIT;
+        end if;
+        
+     else     
+        if (n > 5) then
+           START TRANSACTION;
+           DELETE FROM t1_temp WHERE c1 > 10 ;
+           DELETE FROM t2_temp WHERE c1 > 10 ;
+           DELETE FROM t3_temp WHERE c1 > 10 ;
+           DELETE FROM t4_temp WHERE c1 > 10 ;
+ 
+           rollback;
+           START TRANSACTION;
+           update t1_temp set c1 = c1 + 1000 WHERE c1 > 10;
+           update t2_temp set c1 = c1 + 1000 WHERE c1 > 10;
+           update t3_temp set c1 = c1 + 1000 WHERE c1 > 10;
+           update t4_temp set c1 = c1 + 1000 WHERE c1 > 10;
+           rollback;
+        end if;
+     end if;    
+
+     if (n < 5) then
+        rollback; 
+     end if;
+ 
+     FLUSH logs;
+     ALTER TABLE t1_temp DROP PRIMARY KEY;
+     ALTER TABLE t1_temp ADD PRIMARY KEY (c1,c3(10),c4(10));
+     ALTER TABLE t2_temp DROP PRIMARY KEY;
+     ALTER TABLE t2_temp ADD PRIMARY KEY (c1,c3(10),c4(10));
+     ALTER TABLE t3_temp DROP PRIMARY KEY;
+     ALTER TABLE t3_temp ADD PRIMARY KEY (c1,c3(10),c4(10));
+     ALTER TABLE t4_temp DROP PRIMARY KEY;
+     ALTER TABLE t4_temp ADD PRIMARY KEY (c1,c3(10),c4(10));
+     FLUSH tables;
+
+     START TRANSACTION;
+     set @s = concat("INSERT INTO t1",id," VALUES(",n,"+100,",n,"+100,REPEAT(concat(' tc3_',",n,"+100),30),        REPEAT(concat(' tc4_',",n,"+100),70),REPEAT(concat(' tc_',",n,"+100),70),        REPEAT(concat(' tc6_',",n,"+100),60),REPEAT(concat(' tc7_',",n,"+100),60),        NOW(),(100.55+",n,"+100));");
+     PREPARE insertIntoTable FROM @s;
+     EXECUTE insertIntoTable;
+     DEALLOCATE PREPARE insertIntoTable;
+     INSERT INTO t1_temp VALUES(n+100,n+100,REPEAT(concat(' tc3_',n+100),30),
+        REPEAT(concat(' tc4_',n+100),70),REPEAT(concat(' tc_',n+100),70),
+        REPEAT(concat(' tc6_',n+100),60),REPEAT(concat(' tc7_',n+100),60),
+        NOW(),(100.55+n+100));
+     set @s = concat("INSERT INTO t2",id," VALUES(",n,"+100,",n,"+100,REPEAT(concat(' tc3_',",n,"+100),30),        REPEAT(concat(' tc4_',",n,"+100),70),REPEAT(concat(' tc_',",n,"+100),70),        REPEAT(concat(' tc6_',",n,"+100),60),REPEAT(concat(' tc7_',",n,"+100),60),        NOW(),(100.55+",n,"+100));");
+     PREPARE insertIntoTable FROM @s;
+     EXECUTE insertIntoTable;
+     DEALLOCATE PREPARE insertIntoTable;
+     INSERT INTO t2_temp VALUES(n+100,n+100,REPEAT(concat(' tc3_',n+100),30),
+        REPEAT(concat(' tc4_',n+100),70),REPEAT(concat(' tc_',n+100),70),
+        REPEAT(concat(' tc6_',n+100),60),REPEAT(concat(' tc7_',n+100),60),
+        NOW(),(100.55+n+100));
+     set @s = concat("INSERT INTO t3",id," VALUES(",n,"+100,",n,"+100,REPEAT(concat(' tc3_',",n,"+100),30),        REPEAT(concat(' tc4_',",n,"+100),70),REPEAT(concat(' tc_',",n,"+100),70),        REPEAT(concat(' tc6_',",n,"+100),60),REPEAT(concat(' tc7_',",n,"+100),60),        NOW(),(100.55+",n,"+100));");
+     PREPARE insertIntoTable FROM @s;
+     EXECUTE insertIntoTable;
+     DEALLOCATE PREPARE insertIntoTable;
+     INSERT INTO t3_temp VALUES(n+100,n+100,REPEAT(concat(' tc3_',n+100),30),
+        REPEAT(concat(' tc4_',n+100),70),REPEAT(concat(' tc_',n+100),70),
+        REPEAT(concat(' tc6_',n+100),60),REPEAT(concat(' tc7_',n+100),60),
+        NOW(),(100.55+n+100));
+     set @s = concat("INSERT INTO t4",id," VALUES(",n,"+100,",n,"+100,REPEAT(concat(' tc3_',",n,"+100),30),        REPEAT(concat(' tc4_',",n,"+100),70),REPEAT(concat(' tc_',",n,"+100),70),        REPEAT(concat(' tc6_',",n,"+100),60),REPEAT(concat(' tc7_',",n,"+100),60),        NOW(),(100.55+",n,"+100));");
+     PREPARE insertIntoTable FROM @s;
+     EXECUTE insertIntoTable;
+     DEALLOCATE PREPARE insertIntoTable;
+     INSERT INTO t4_temp VALUES(n+100,n+100,REPEAT(concat(' tc3_',n+100),30),
+        REPEAT(concat(' tc4_',n+100),70),REPEAT(concat(' tc_',n+100),70),
+        REPEAT(concat(' tc6_',n+100),60),REPEAT(concat(' tc7_',n+100),60),
+        NOW(),(100.55+n+100));
+
+
+        DELETE FROM t1_temp WHERE c1 between 100 and 110;
+        DELETE FROM t2_temp WHERE c1 between 100 and 110;
+        DELETE FROM t3_temp WHERE c1 between 100 and 110;
+        DELETE FROM t4_temp WHERE c1 between 100 and 110;
+
+        update t1_temp set c1 = c1+1 WHERE c1>110; 
+        update t2_temp set c1 = c1+1 WHERE c1>110; 
+        update t3_temp set c1 = c1+1 WHERE c1>110; 
+        update t4_temp set c1 = c1+1 WHERE c1>110; 
+
+        savepoint a;
+
+     set @s = concat("INSERT INTO t1",id," VALUES(300+",n,"+100,",n,"+100,REPEAT(concat(' tc3_',",n,"+100),30),        REPEAT(concat(' tc4_',",n,"+100),70),REPEAT(concat(' tc_',",n,"+100),70),        REPEAT(concat(' tc6_',",n,"+100),60),REPEAT(concat(' tc7_',",n,"+100),60),        NOW(),(100.55+",n,"+100));");
+     PREPARE insertIntoTable FROM @s;
+     EXECUTE insertIntoTable;
+     DEALLOCATE PREPARE insertIntoTable;
+     INSERT INTO t1_temp VALUES(300+n+100,n+100,REPEAT(concat(' tc3_',n+100),30),
+        REPEAT(concat(' tc4_',n+100),70),REPEAT(concat(' tc_',n+100),70),
+        REPEAT(concat(' tc6_',n+100),60),REPEAT(concat(' tc7_',n+100),60),
+        NOW(),(100.55+n+100));
+     set @s = concat("INSERT INTO t2",id," VALUES(300+",n,"+100,",n,"+100,REPEAT(concat(' tc3_',",n,"+100),30),        REPEAT(concat(' tc4_',",n,"+100),70),REPEAT(concat(' tc_',",n,"+100),70),        REPEAT(concat(' tc6_',",n,"+100),60),REPEAT(concat(' tc7_',",n,"+100),60),        NOW(),(100.55+",n,"+100));");
+     PREPARE insertIntoTable FROM @s;
+     EXECUTE insertIntoTable;
+     DEALLOCATE PREPARE insertIntoTable;
+     INSERT INTO t2_temp VALUES(300+n+100,n+100,REPEAT(concat(' tc3_',n+100),30),
+        REPEAT(concat(' tc4_',n+100),70),REPEAT(concat(' tc_',n+100),70),
+        REPEAT(concat(' tc6_',n+100),60),REPEAT(concat(' tc7_',n+100),60),
+        NOW(),(100.55+n+100));
+     set @s = concat("INSERT INTO t3",id," VALUES(300+",n,"+100,",n,"+100,REPEAT(concat(' tc3_',",n,"+100),30),        REPEAT(concat(' tc4_',",n,"+100),70),REPEAT(concat(' tc_',",n,"+100),70),        REPEAT(concat(' tc6_',",n,"+100),60),REPEAT(concat(' tc7_',",n,"+100),60),        NOW(),(100.55+",n,"+100));");
+     PREPARE insertIntoTable FROM @s;
+     EXECUTE insertIntoTable;
+     DEALLOCATE PREPARE insertIntoTable;
+     INSERT INTO t3_temp VALUES(300+n+100,n+100,REPEAT(concat(' tc3_',n+100),30),
+        REPEAT(concat(' tc4_',n+100),70),REPEAT(concat(' tc_',n+100),70),
+        REPEAT(concat(' tc6_',n+100),60),REPEAT(concat(' tc7_',n+100),60),
+        NOW(),(100.55+n+100));
+     set @s = concat("INSERT INTO t4",id," VALUES(300+",n,"+100,",n,"+100,REPEAT(concat(' tc3_',",n,"+100),30),        REPEAT(concat(' tc4_',",n,"+100),70),REPEAT(concat(' tc_',",n,"+100),70),        REPEAT(concat(' tc6_',",n,"+100),60),REPEAT(concat(' tc7_',",n,"+100),60),        NOW(),(100.55+",n,"+100));");
+     PREPARE insertIntoTable FROM @s;
+     EXECUTE insertIntoTable;
+     DEALLOCATE PREPARE insertIntoTable;
+     INSERT INTO t4_temp VALUES(300+n+100,n+100,REPEAT(concat(' tc3_',n+100),30),
+        REPEAT(concat(' tc4_',n+100),70),REPEAT(concat(' tc_',n+100),70),
+        REPEAT(concat(' tc6_',n+100),60),REPEAT(concat(' tc7_',n+100),60),
+        NOW(),(100.55+n+100));
+     savepoint b;
+
+     set @s = concat("INSERT INTO t1",id," VALUES(400+",n,"+100,",n,"+100,REPEAT(concat(' tc3_',",n,"+100),30),        REPEAT(concat(' tc4_',",n,"+100),70),REPEAT(concat(' tc_',",n,"+100),70),        REPEAT(concat(' tc6_',",n,"+100),60),REPEAT(concat(' tc7_',",n,"+100),60),        NOW(),(100.55+",n,"+100));");
+     PREPARE insertIntoTable FROM @s;
+     EXECUTE insertIntoTable;
+     DEALLOCATE PREPARE insertIntoTable;
+     INSERT INTO t1_temp VALUES(400+n+100,n+100,REPEAT(concat(' tc3_',n+100),30),
+        REPEAT(concat(' tc4_',n+100),70),REPEAT(concat(' tc_',n+100),70),
+        REPEAT(concat(' tc6_',n+100),60),REPEAT(concat(' tc7_',n+100),60),
+        NOW(),(100.55+n+100));
+     set @s = concat("INSERT INTO t2",id," VALUES(400+",n,"+100,",n,"+100,REPEAT(concat(' tc3_',",n,"+100),30),        REPEAT(concat(' tc4_',",n,"+100),70),REPEAT(concat(' tc_',",n,"+100),70),        REPEAT(concat(' tc6_',",n,"+100),60),REPEAT(concat(' tc7_',",n,"+100),60),        NOW(),(100.55+",n,"+100));");
+     PREPARE insertIntoTable FROM @s;
+     EXECUTE insertIntoTable;
+     DEALLOCATE PREPARE insertIntoTable;
+     INSERT INTO t2_temp VALUES(400+n+100,n+100,REPEAT(concat(' tc3_',n+100),30),
+        REPEAT(concat(' tc4_',n+100),70),REPEAT(concat(' tc_',n+100),70),
+        REPEAT(concat(' tc6_',n+100),60),REPEAT(concat(' tc7_',n+100),60),
+        NOW(),(100.55+n+100));
+     set @s = concat("INSERT INTO t3",id," VALUES(400+",n,"+100,",n,"+100,REPEAT(concat(' tc3_',",n,"+100),30),        REPEAT(concat(' tc4_',",n,"+100),70),REPEAT(concat(' tc_',",n,"+100),70),        REPEAT(concat(' tc6_',",n,"+100),60),REPEAT(concat(' tc7_',",n,"+100),60),        NOW(),(100.55+",n,"+100));");
+     PREPARE insertIntoTable FROM @s;
+     EXECUTE insertIntoTable;
+     DEALLOCATE PREPARE insertIntoTable;
+     INSERT INTO t3_temp VALUES(400+n+100,n+100,REPEAT(concat(' tc3_',n+100),30),
+        REPEAT(concat(' tc4_',n+100),70),REPEAT(concat(' tc_',n+100),70),
+        REPEAT(concat(' tc6_',n+100),60),REPEAT(concat(' tc7_',n+100),60),
+        NOW(),(100.55+n+100));
+     set @s = concat("INSERT INTO t4",id," VALUES(400+",n,"+100,",n,"+100,REPEAT(concat(' tc3_',",n,"+100),30),        REPEAT(concat(' tc4_',",n,"+100),70),REPEAT(concat(' tc_',",n,"+100),70),        REPEAT(concat(' tc6_',",n,"+100),60),REPEAT(concat(' tc7_',",n,"+100),60),        NOW(),(100.55+",n,"+100));");
+     PREPARE insertIntoTable FROM @s;
+     EXECUTE insertIntoTable;
+     DEALLOCATE PREPARE insertIntoTable;
+     INSERT INTO t4_temp VALUES(400+n+100,n+100,REPEAT(concat(' tc3_',n+100),30),
+        REPEAT(concat(' tc4_',n+100),70),REPEAT(concat(' tc_',n+100),70),
+        REPEAT(concat(' tc6_',n+100),60),REPEAT(concat(' tc7_',n+100),60),
+        NOW(),(100.55+n+100));
+     savepoint c;
+     rollback to b;
+     rollback to a;
+     COMMIT;
+     COMMIT;
+     rollback;
+     set n = n - 1; 
+     end while;
+end|
+delimiter ;|
+
+# Create two client for concurrent execution
+connect (con1,localhost,root,,);
+connect (con2,localhost,root,,);
+
+--echo #---client 1 : dml operation ---"
+connection con1;
+-- disable_query_log
+eval set global innodb_file_per_table=$innodb_file_per_table_orig;
+
+-- enable_query_log
+-- disable_query_log
+# call procedure
+--send call populate_tables('_1');
+-- enable_query_log
+
+--echo #---client 2 : dml operation ---"
+connection con2;
+-- disable_query_log
+eval set global innodb_file_per_table=$innodb_file_per_table_orig;
+-- enable_query_log
+-- disable_query_log
+# call procedure
+--send call populate_tables('_2');
+
+-- enable_query_log
+
+# check data of client connection 1
+--echo # In connection 1
+connection con1;
+--reap
+# 20 rows exepceted in 5 tables
+SELECT count(*) FROM t1_1;
+SELECT count(*) FROM t2_1;
+SELECT count(*) FROM t3_1;
+SELECT count(*) FROM t4_1;
+SELECT c1 FROM t1_1;
+SELECT c1 FROM t2_1;
+SELECT c1 FROM t3_1;
+SELECT c1 FROM t4_1;
+SELECT count(*) FROM t1_temp;
+SELECT count(*) FROM t2_temp;
+SELECT count(*) FROM t3_temp;
+SELECT count(*) FROM t4_temp;
+SELECT c1 FROM t1_temp;
+SELECT c1 FROM t2_temp;
+SELECT c1 FROM t3_temp;
+SELECT c1 FROM t4_temp;
+# check data of client connection 2
+--echo # In connection 2
+connection con2;
+--reap
+# 20 rows exepceted in 5 tables
+SELECT count(*) FROM t1_2;
+SELECT count(*) FROM t2_2;
+SELECT count(*) FROM t3_2;
+SELECT count(*) FROM t4_2;
+SELECT c1 FROM t1_2;
+SELECT c1 FROM t2_2;
+SELECT c1 FROM t3_2;
+SELECT c1 FROM t4_2;
+SELECT count(*) FROM t1_temp;
+SELECT count(*) FROM t2_temp;
+SELECT count(*) FROM t3_temp;
+SELECT count(*) FROM t4_temp;
+SELECT c1 FROM t1_temp;
+SELECT c1 FROM t2_temp;
+SELECT c1 FROM t3_temp;
+SELECT c1 FROM t4_temp;
+
+--echo # In connection 1
+connection con1;
+
+set AUTOCOMMIT = 0;
+ALTER TABLE t1_temp DROP PRIMARY KEY;
+ALTER TABLE t1_temp ADD PRIMARY KEY (c1);
+ALTER TABLE t2_temp DROP PRIMARY KEY;
+ALTER TABLE t2_temp ADD PRIMARY KEY (c1);
+ALTER TABLE t3_temp DROP PRIMARY KEY;
+ALTER TABLE t3_temp ADD PRIMARY KEY (c1);
+ALTER TABLE t4_temp DROP PRIMARY KEY;
+ALTER TABLE t4_temp ADD PRIMARY KEY (c1);
+# Check duplicate key constraint + insert ignore
+--error ER_DUP_ENTRY
+INSERT INTO t1_temp VALUES (20,1,'a','a','a','a','a',NOW(),100.55);
+insert ignore into t1_temp VALUES (20,1,'a','a','a','a','a',NOW(),100.55);
+--error ER_DUP_ENTRY
+INSERT INTO t2_temp VALUES (20,1,'a','a','a','a','a',NOW(),100.55);
+insert ignore into t2_temp VALUES (20,1,'a','a','a','a','a',NOW(),100.55);
+--error ER_DUP_ENTRY
+INSERT INTO t3_temp VALUES (20,1,'a','a','a','a','a',NOW(),100.55);
+insert ignore into t3_temp VALUES (20,1,'a','a','a','a','a',NOW(),100.55);
+--error ER_DUP_ENTRY
+INSERT INTO t4_temp VALUES (20,1,'a','a','a','a','a',NOW(),100.55);
+insert ignore into t4_temp VALUES (20,1,'a','a','a','a','a',NOW(),100.55);
+
+# check rollback due to duplicate value in second record of insert
+--error ER_DUP_ENTRY
+INSERT INTO t1_temp VALUES (1,1,'a','a','a','a','a',NOW(),100.55),
+(20,1,'a','a','a','a','a',NOW(),100.55);
+--error ER_DUP_ENTRY
+INSERT INTO t2_temp VALUES (1,1,'a','a','a','a','a',NOW(),100.55),
+(20,1,'a','a','a','a','a',NOW(),100.55);
+--error ER_DUP_ENTRY
+INSERT INTO t3_temp VALUES (1,1,'a','a','a','a','a',NOW(),100.55),
+(20,1,'a','a','a','a','a',NOW(),100.55);
+--error ER_DUP_ENTRY
+INSERT INTO t4_temp VALUES (1,1,'a','a','a','a','a',NOW(),100.55),
+(20,1,'a','a','a','a','a',NOW(),100.55);
+
+set AUTOCOMMIT = 1;
+
+SELECT c1,c2 FROM t1_temp WHERE c1 in (20,1);
+SELECT c1,c2 FROM t2_temp WHERE c1 in (20,1);
+SELECT c1,c2 FROM t3_temp WHERE c1 in (20,1);
+SELECT c1,c2 FROM t4_temp WHERE c1 in (20,1);
+
+#replace statement
+REPLACE INTO t1_temp VALUES (20,1,'a','a','a','a','a',NOW(),100.55);
+REPLACE INTO t2_temp VALUES (20,1,'a','a','a','a','a',NOW(),100.55);
+REPLACE INTO t3_temp VALUES (20,1,'a','a','a','a','a',NOW(),100.55);
+REPLACE INTO t4_temp VALUES (20,1,'a','a','a','a','a',NOW(),100.55);
+# verify row is replaced FROM (20,20) to (20,1)
+SELECT c1,c2,c3,c4,c5,c6,c7,c9 FROM t1_temp WHERE c1 = 20;
+SELECT c1,c2,c3,c4,c5,c6,c7,c9 FROM t2_temp WHERE c1 = 20;
+SELECT c1,c2,c3,c4,c5,c6,c7,c9 FROM t3_temp WHERE c1 = 20;
+SELECT c1,c2,c3,c4,c5,c6,c7,c9 FROM t4_temp WHERE c1 = 20;
+
+# Update ignore. statement is gonored as 20 value exits 
+update ignore t1_temp set c1 = 20 WHERE c1 = 140 ;
+update ignore t2_temp set c1 = 20 WHERE c1 = 140 ;
+update ignore t3_temp set c1 = 20 WHERE c1 = 140 ;
+update ignore t4_temp set c1 = 20 WHERE c1 = 140 ;
+# see record 140 is present as last update ignored
+SELECT count(*) FROM t1_temp WHERE c1 = 140;
+SELECT count(*) FROM t2_temp WHERE c1 = 140;
+SELECT count(*) FROM t3_temp WHERE c1 = 140;
+SELECT count(*) FROM t4_temp WHERE c1 = 140;
+
+# Alter table to ADD COLUMN and PRIMARY KEY
+ALTER TABLE t1_temp ADD COLUMN c10 int default 99 , 
+ADD COLUMN c11 varchar(100) default 'test';
+ALTER TABLE t1_temp DROP PRIMARY KEY;
+ALTER TABLE t1_temp ADD PRIMARY KEY (c1);
+INSERT INTO t1_temp (c1,c2,c3,c4,c5,c6,c7,c8,c9) VALUES (-1,-1,'a','a','a','a','a',NOW(),100.55);
+SELECT c1,c2,c3,c4,c5,c6,c7,c9,c10,c11 FROM t1_temp WHERE c1 < 0;
+SELECT count(*) FROM t1_temp WHERE c10 = 99 and c11 like 'test';
+# insert on duplicate key update
+INSERT INTO t1_temp (c1,c2,c3,c4,c5,c6,c7,c8,c9) VALUES (-1,-1,'a','a','a','a','a',NOW(),100.55) 
+ON DUPLICATE KEY UPDATE c1=-2,c2=-2;
+SELECT c1,c2,c3,c4,c5,c6,c7,c9,c10,c11 FROM t1_temp WHERE c1 < 0;
+
+#
+
+#cleanup
+DROP TABLE t1_1 ,t2_1 ,t3_1,t4_1;
+disconnect con1;
+
+connection con2;
+DROP TABLE t1_2 ,t2_2 ,t3_2,t4_2;
+disconnect con2;
+
+
+connection default;
+#
+## trying with VALUES innodb_undo_tablespaces, innodb_undo_logs ,innodb_log_files_in_group
+##
+let $restart_parameters=--innodb_undo_tablespaces=0 --innodb_rollback_segments=20 --innodb_undo_logs=20 --innodb_log_files_in_group=4;
+--source include/restart_mysqld.inc
+
+# Create two client for concurrent execution
+connect (con1,localhost,root,,);
+connect (con2,localhost,root,,);
+#
+#
+connection con1;
+--send call populate_tables('_1');
+connection con2;
+--send call populate_tables('_2');
+--echo "#connection 1 - verify tables"
+connection con1;
+--reap
+SELECT count(*) FROM t1_1;
+SELECT count(*) FROM t2_1;
+SELECT count(*) FROM t3_1;
+SELECT count(*) FROM t4_1;
+SELECT c1 FROM t1_1;
+SELECT c1 FROM t2_1;
+SELECT c1 FROM t3_1;
+SELECT c1 FROM t4_1;
+SELECT count(*) FROM t1_temp;
+SELECT count(*) FROM t2_temp;
+SELECT count(*) FROM t3_temp;
+SELECT count(*) FROM t4_temp;
+SELECT c1 FROM t1_temp;
+SELECT c1 FROM t2_temp;
+SELECT c1 FROM t3_temp;
+SELECT c1 FROM t4_temp;
+DROP TABLE t1_1 ,t2_1 ,t3_1,t4_1;
+disconnect con1;
+--echo "#connection 2 - verify tables"
+connection con2;
+--reap
+SELECT count(*) FROM t1_2;
+SELECT count(*) FROM t2_2;
+SELECT count(*) FROM t3_2;
+SELECT count(*) FROM t4_2;
+SELECT c1 FROM t1_2;
+SELECT c1 FROM t2_2;
+SELECT c1 FROM t3_2;
+SELECT c1 FROM t4_2;
+SELECT count(*) FROM t1_temp;
+SELECT count(*) FROM t2_temp;
+SELECT count(*) FROM t3_temp;
+SELECT count(*) FROM t4_temp;
+SELECT c1 FROM t1_temp;
+SELECT c1 FROM t2_temp;
+SELECT c1 FROM t3_temp;
+SELECT c1 FROM t4_temp;
+DROP TABLE t1_2 ,t2_2 ,t3_2,t4_2;
+disconnect con2;
+
+connection default;
+# innodb_undo_logs > non redo rsegment
+let $restart_parameters=--innodb_undo_tablespaces=0 --innodb_rollback_segments=30 --innodb_undo_logs=20 --innodb_log_files_in_group=4;
+--source include/restart_mysqld.inc
+
+connect (con1,localhost,root,,);
+connect (con2,localhost,root,,);
+
+connection con1;
+--send call populate_tables('_1');
+connection con2;
+--send call populate_tables('_2');
+--echo "#connection 1 - verify tables"
+connection con1;
+--reap
+SELECT count(*) FROM t1_1;
+SELECT count(*) FROM t2_1;
+SELECT count(*) FROM t3_1;
+SELECT count(*) FROM t4_1;
+SELECT c1 FROM t1_1;
+SELECT c1 FROM t2_1;
+SELECT c1 FROM t3_1;
+SELECT c1 FROM t4_1;
+SELECT count(*) FROM t1_temp;
+SELECT count(*) FROM t2_temp;
+SELECT count(*) FROM t3_temp;
+SELECT count(*) FROM t4_temp;
+SELECT c1 FROM t1_temp;
+SELECT c1 FROM t2_temp;
+SELECT c1 FROM t3_temp;
+SELECT c1 FROM t4_temp;
+DROP TABLE t1_1 ,t2_1 ,t3_1,t4_1;
+disconnect con1;
+--echo "#connection 2 - verify tables"
+connection con2;
+--reap
+SELECT count(*) FROM t1_2;
+SELECT count(*) FROM t2_2;
+SELECT count(*) FROM t3_2;
+SELECT count(*) FROM t4_2;
+SELECT c1 FROM t1_2;
+SELECT c1 FROM t2_2;
+SELECT c1 FROM t3_2;
+SELECT c1 FROM t4_2;
+SELECT count(*) FROM t1_temp;
+SELECT count(*) FROM t2_temp;
+SELECT count(*) FROM t3_temp;
+SELECT count(*) FROM t4_temp;
+SELECT c1 FROM t1_temp;
+SELECT c1 FROM t2_temp;
+SELECT c1 FROM t3_temp;
+SELECT c1 FROM t4_temp;
+DROP TABLE t1_2 ,t2_2 ,t3_2,t4_2;
+disconnect con2;
+
+#
+
+connection default;
+DROP PROCEDURE populate_tables;
+
+# check message in log
+let $error_log= $MYSQLTEST_VARDIR/log/my_restart.err;
+let SEARCH_FILE= $error_log;
+# We get depending on the platform either "./ibdata1" or ".\ibdata1".
+let SEARCH_PATTERN=redo rollback segment.*found.*redo rollback segment.*active
+--source include/search_pattern_in_file.inc
+let SEARCH_PATTERN=non-redo rollback.*active
+--source include/search_pattern_in_file.inc
+
+
+SHOW TABLES;
+
+-- disable_query_log
+eval set global innodb_file_per_table=$innodb_file_per_table_orig;
+-- enable_query_log
+
diff --git a/mysql-test/suite/perfschema/r/socket_summary_by_instance_func.result b/mysql-test/suite/perfschema/r/socket_summary_by_instance_func.result
index bf515500c2f..dec4dc431f5 100644
--- a/mysql-test/suite/perfschema/r/socket_summary_by_instance_func.result
+++ b/mysql-test/suite/perfschema/r/socket_summary_by_instance_func.result
@@ -1,3 +1,4 @@
+set global session_track_schema=FALSE;
 # The logging of commands and result sets is mostly disabled.
 # There are some messages which help to observe the progress of the test.
 # In case some check fails
@@ -218,3 +219,4 @@ connection default;
 TRUNCATE TABLE performance_schema.socket_summary_by_instance;
 # 6. Cleanup
 connection default;
+set global session_track_schema=DEFAULT;
diff --git a/mysql-test/suite/perfschema/r/threads_innodb,xtradb.rdiff b/mysql-test/suite/perfschema/r/threads_innodb,xtradb.rdiff-disabled
similarity index 100%
rename from mysql-test/suite/perfschema/r/threads_innodb,xtradb.rdiff
rename to mysql-test/suite/perfschema/r/threads_innodb,xtradb.rdiff-disabled
diff --git a/mysql-test/suite/perfschema/r/threads_innodb.result b/mysql-test/suite/perfschema/r/threads_innodb.result
index fdbd7469c7e..3fb469ad00b 100644
--- a/mysql-test/suite/perfschema/r/threads_innodb.result
+++ b/mysql-test/suite/perfschema/r/threads_innodb.result
@@ -5,7 +5,10 @@ FROM performance_schema.threads
 WHERE name LIKE 'thread/innodb/%'
 GROUP BY name;
 name	type	processlist_user	processlist_host	processlist_db	processlist_command	processlist_time	processlist_state	processlist_info	parent_thread_id	role	instrumented
-thread/innodb/io_handler_thread	BACKGROUND	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	YES
+thread/innodb/io_ibuf_thread	BACKGROUND	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	YES
+thread/innodb/io_log_thread	BACKGROUND	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	YES
+thread/innodb/io_read_thread	BACKGROUND	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	YES
+thread/innodb/io_write_thread	BACKGROUND	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	YES
 thread/innodb/page_cleaner_thread	BACKGROUND	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	YES
 thread/innodb/srv_error_monitor_thread	BACKGROUND	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	YES
 thread/innodb/srv_lock_timeout_thread	BACKGROUND	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	YES
diff --git a/mysql-test/suite/perfschema/t/socket_summary_by_instance_func.test b/mysql-test/suite/perfschema/t/socket_summary_by_instance_func.test
index 99f89bd3225..42bcb3e4d70 100644
--- a/mysql-test/suite/perfschema/t/socket_summary_by_instance_func.test
+++ b/mysql-test/suite/perfschema/t/socket_summary_by_instance_func.test
@@ -38,6 +38,7 @@
 # prepared statement.
 --source include/no_protocol.inc
 
+set global session_track_schema=FALSE;
 
 #===================================
 # Set IP address defaults with respect to IPV6 support
@@ -1710,3 +1711,4 @@ DROP SCHEMA mysqlsupertest;
 --connection default
 --enable_query_log
 
+set global session_track_schema=DEFAULT;
diff --git a/mysql-test/suite/plugins/r/audit_null_debug.result b/mysql-test/suite/plugins/r/audit_null_debug.result
index e88f5465e47..840d4307fb3 100644
--- a/mysql-test/suite/plugins/r/audit_null_debug.result
+++ b/mysql-test/suite/plugins/r/audit_null_debug.result
@@ -1,13 +1,13 @@
 set @old_dbug=@@debug_dbug;
-call mtr.add_suppression("Incorrect key file for table.*mysql.plugin.MYI");
+call mtr.add_suppression("Index for table.*mysql.plugin.MYI");
 SET debug_dbug='+d,myisam_pretend_crashed_table_on_usage';
 install plugin audit_null soname 'adt_null';
-ERROR HY000: Incorrect key file for table './mysql/plugin.MYI'; try to repair it
+ERROR HY000: Index for table './mysql/plugin.MYI' is corrupt; try to repair it
 SET debug_dbug=@old_dbug;
 install plugin audit_null soname 'adt_null';
 SET debug_dbug='+d,myisam_pretend_crashed_table_on_usage';
 uninstall plugin audit_null;
-ERROR HY000: Incorrect key file for table './mysql/plugin.MYI'; try to repair it
+ERROR HY000: Index for table './mysql/plugin.MYI' is corrupt; try to repair it
 SET debug_dbug=@old_dbug;
 uninstall plugin audit_null;
 ERROR 42000: PLUGIN audit_null does not exist
diff --git a/mysql-test/suite/plugins/r/show_all_plugins.result b/mysql-test/suite/plugins/r/show_all_plugins.result
index 623e8e9314b..fae706f56fa 100644
--- a/mysql-test/suite/plugins/r/show_all_plugins.result
+++ b/mysql-test/suite/plugins/r/show_all_plugins.result
@@ -26,7 +26,7 @@ three_attempts	NOT INSTALLED	AUTHENTICATION	dialog_examples.so	GPL
 two_questions	NOT INSTALLED	AUTHENTICATION	dialog_examples.so	GPL
 show status like '%libraries%';
 Variable_name	Value
-Opened_plugin_libraries	7
+Opened_plugin_libraries	8
 show plugins soname where library = 'ha_example.so';
 Name	Status	Type	Library	License
 EXAMPLE	NOT INSTALLED	STORAGE ENGINE	ha_example.so	GPL
diff --git a/mysql-test/suite/plugins/t/audit_null_debug.test b/mysql-test/suite/plugins/t/audit_null_debug.test
index 8d77b02938b..2c457df4759 100644
--- a/mysql-test/suite/plugins/t/audit_null_debug.test
+++ b/mysql-test/suite/plugins/t/audit_null_debug.test
@@ -6,7 +6,7 @@ if (!$ADT_NULL_SO) {
 }
 
 set @old_dbug=@@debug_dbug;
-call mtr.add_suppression("Incorrect key file for table.*mysql.plugin.MYI");
+call mtr.add_suppression("Index for table.*mysql.plugin.MYI");
 
 #
 # MySQL BUG#14485479 - INSTALL AUDIT PLUGIN HANGS IF WE TRY TO DISABLE AND ENABLED DURING DDL OPERATION
diff --git a/mysql-test/suite/rpl/r/rpl_get_master_version_and_clock.result b/mysql-test/suite/rpl/r/rpl_get_master_version_and_clock.result
index 2997d4eb426..e7f4e566566 100644
--- a/mysql-test/suite/rpl/r/rpl_get_master_version_and_clock.result
+++ b/mysql-test/suite/rpl/r/rpl_get_master_version_and_clock.result
@@ -12,7 +12,7 @@ include/rpl_stop_server.inc [server_number=1]
 slave is unblocked
 SET DEBUG_SYNC='now SIGNAL signal.get_unix_timestamp';
 Check network error happened here
-include/wait_for_slave_io_error.inc [errno=1040, 1053, 2002, 2003, 2006, 2013]
+include/wait_for_slave_io_error.inc [errno=1053, 2002, 2003, 2006, 2013]
 set @@global.debug = "-d,'debug_lock.before_get_UNIX_TIMESTAMP'";
 include/rpl_start_server.inc [server_number=1]
 include/wait_for_slave_param.inc [Slave_IO_Running]
@@ -24,7 +24,7 @@ include/rpl_stop_server.inc [server_number=1]
 slave is unblocked
 SET DEBUG_SYNC='now SIGNAL signal.get_server_id';
 Check network error happened here
-include/wait_for_slave_io_error.inc [errno=1040, 1053, 2002, 2003, 2006, 2013]
+include/wait_for_slave_io_error.inc [errno=1053, 2002, 2003, 2006, 2013]
 set @@global.debug = "-d,'debug_lock.before_get_SERVER_ID'";
 include/rpl_start_server.inc [server_number=1]
 include/wait_for_slave_param.inc [Slave_IO_Running]
diff --git a/mysql-test/suite/sys_vars/r/innodb_adaptive_hash_index_parts_basic.result b/mysql-test/suite/sys_vars/r/innodb_adaptive_hash_index_parts_basic.result
new file mode 100644
index 00000000000..965e2efedf3
--- /dev/null
+++ b/mysql-test/suite/sys_vars/r/innodb_adaptive_hash_index_parts_basic.result
@@ -0,0 +1,48 @@
+SELECT COUNT(@@GLOBAL.innodb_adaptive_hash_index_parts);
+COUNT(@@GLOBAL.innodb_adaptive_hash_index_parts)
+1
+1 Expected
+SET @@GLOBAL.innodb_adaptive_hash_index_parts=1;
+ERROR HY000: Variable 'innodb_adaptive_hash_index_parts' is a read only variable
+Expected error 'Read only variable'
+SELECT COUNT(@@GLOBAL.innodb_adaptive_hash_index_parts);
+COUNT(@@GLOBAL.innodb_adaptive_hash_index_parts)
+1
+1 Expected
+SELECT @@GLOBAL.innodb_adaptive_hash_index_parts = VARIABLE_VALUE
+FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
+WHERE VARIABLE_NAME='innodb_adaptive_hash_index_parts';
+@@GLOBAL.innodb_adaptive_hash_index_parts = VARIABLE_VALUE
+1
+1 Expected
+SELECT COUNT(@@GLOBAL.innodb_adaptive_hash_index_parts);
+COUNT(@@GLOBAL.innodb_adaptive_hash_index_parts)
+1
+1 Expected
+SELECT COUNT(VARIABLE_VALUE)
+FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
+WHERE VARIABLE_NAME='innodb_adaptive_hash_index_parts';
+COUNT(VARIABLE_VALUE)
+1
+1 Expected
+SELECT @@innodb_adaptive_hash_index_parts = @@GLOBAL.innodb_adaptive_hash_index_parts;
+@@innodb_adaptive_hash_index_parts = @@GLOBAL.innodb_adaptive_hash_index_parts
+1
+1 Expected
+SELECT COUNT(@@innodb_adaptive_hash_index_parts);
+COUNT(@@innodb_adaptive_hash_index_parts)
+1
+1 Expected
+SELECT COUNT(@@local.innodb_adaptive_hash_index_parts);
+ERROR HY000: Variable 'innodb_adaptive_hash_index_parts' is a GLOBAL variable
+Expected error 'Variable is a GLOBAL variable'
+SELECT COUNT(@@SESSION.innodb_adaptive_hash_index_parts);
+ERROR HY000: Variable 'innodb_adaptive_hash_index_parts' is a GLOBAL variable
+Expected error 'Variable is a GLOBAL variable'
+SELECT COUNT(@@GLOBAL.innodb_adaptive_hash_index_parts);
+COUNT(@@GLOBAL.innodb_adaptive_hash_index_parts)
+1
+1 Expected
+SELECT innodb_adaptive_hash_index_parts = @@SESSION.innodb_adaptive_hash_index_parts;
+ERROR 42S22: Unknown column 'innodb_adaptive_hash_index_parts' in 'field list'
+Expected error 'Readonly variable'
diff --git a/mysql-test/suite/sys_vars/r/innodb_adaptive_max_sleep_delay_basic.result b/mysql-test/suite/sys_vars/r/innodb_adaptive_max_sleep_delay_basic.result
index 54b1c1e78b4..b3ba28a4411 100644
--- a/mysql-test/suite/sys_vars/r/innodb_adaptive_max_sleep_delay_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_adaptive_max_sleep_delay_basic.result
@@ -3,6 +3,18 @@ SELECT @@GLOBAL.innodb_adaptive_max_sleep_delay;
 150000
 150000 Expected
 SET @@GLOBAL.innodb_adaptive_max_sleep_delay=100;
+SET @@GLOBAL.innodb_adaptive_max_sleep_delay=0;
+SELECT @@GLOBAL.innodb_adaptive_max_sleep_delay;
+@@GLOBAL.innodb_adaptive_max_sleep_delay
+0
+SET @@GLOBAL.innodb_adaptive_max_sleep_delay=100000;
+SELECT @@GLOBAL.innodb_adaptive_max_sleep_delay;
+@@GLOBAL.innodb_adaptive_max_sleep_delay
+100000
+SET @@GLOBAL.innodb_adaptive_max_sleep_delay=1000000;
+SELECT @@GLOBAL.innodb_adaptive_max_sleep_delay;
+@@GLOBAL.innodb_adaptive_max_sleep_delay
+1000000
 SET @@GLOBAL.innodb_adaptive_max_sleep_delay=1000001;
 Warnings:
 Warning	1292	Truncated incorrect innodb_adaptive_max_sleep_delay value: '1000001'
@@ -10,6 +22,13 @@ SELECT @@GLOBAL.innodb_adaptive_max_sleep_delay;
 @@GLOBAL.innodb_adaptive_max_sleep_delay
 1000000
 1000000 Expected
+SET @@GLOBAL.innodb_adaptive_max_sleep_delay=4294967295;
+Warnings:
+Warning	1292	Truncated incorrect innodb_adaptive_max_sleep_delay value: '4294967295'
+SELECT @@GLOBAL.innodb_adaptive_max_sleep_delay;
+@@GLOBAL.innodb_adaptive_max_sleep_delay
+1000000
+1000000 Expected
 SET @@GLOBAL.innodb_adaptive_max_sleep_delay=-1;
 Warnings:
 Warning	1292	Truncated incorrect innodb_adaptive_max_sleep_delay value: '-1'
@@ -17,6 +36,13 @@ SELECT @@GLOBAL.innodb_adaptive_max_sleep_delay;
 @@GLOBAL.innodb_adaptive_max_sleep_delay
 0
 0 Expected
+SET @@GLOBAL.innodb_adaptive_max_sleep_delay=-1024;
+Warnings:
+Warning	1292	Truncated incorrect innodb_adaptive_max_sleep_delay value: '-1024'
+SELECT @@GLOBAL.innodb_adaptive_max_sleep_delay;
+@@GLOBAL.innodb_adaptive_max_sleep_delay
+0
+0 Expected
 SELECT COUNT(@@GLOBAL.innodb_adaptive_max_sleep_delay);
 COUNT(@@GLOBAL.innodb_adaptive_max_sleep_delay)
 1
diff --git a/mysql-test/suite/sys_vars/r/innodb_additional_mem_pool_size_basic.result b/mysql-test/suite/sys_vars/r/innodb_additional_mem_pool_size_basic.result
deleted file mode 100644
index fb062d62bc6..00000000000
--- a/mysql-test/suite/sys_vars/r/innodb_additional_mem_pool_size_basic.result
+++ /dev/null
@@ -1,53 +0,0 @@
-'#---------------------BS_STVARS_020_01----------------------#'
-SELECT COUNT(@@GLOBAL.innodb_additional_mem_pool_size);
-COUNT(@@GLOBAL.innodb_additional_mem_pool_size)
-1
-1 Expected
-'#---------------------BS_STVARS_020_02----------------------#'
-SET @@GLOBAL.innodb_additional_mem_pool_size=1;
-ERROR HY000: Variable 'innodb_additional_mem_pool_size' is a read only variable
-Expected error 'Read only variable'
-SELECT COUNT(@@GLOBAL.innodb_additional_mem_pool_size);
-COUNT(@@GLOBAL.innodb_additional_mem_pool_size)
-1
-1 Expected
-'#---------------------BS_STVARS_020_03----------------------#'
-SELECT @@GLOBAL.innodb_additional_mem_pool_size = VARIABLE_VALUE
-FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
-WHERE VARIABLE_NAME='innodb_additional_mem_pool_size';
-@@GLOBAL.innodb_additional_mem_pool_size = VARIABLE_VALUE
-1
-1 Expected
-SELECT COUNT(@@GLOBAL.innodb_additional_mem_pool_size);
-COUNT(@@GLOBAL.innodb_additional_mem_pool_size)
-1
-1 Expected
-SELECT COUNT(VARIABLE_VALUE)
-FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES 
-WHERE VARIABLE_NAME='innodb_additional_mem_pool_size';
-COUNT(VARIABLE_VALUE)
-1
-1 Expected
-'#---------------------BS_STVARS_020_04----------------------#'
-SELECT @@innodb_additional_mem_pool_size = @@GLOBAL.innodb_additional_mem_pool_size;
-@@innodb_additional_mem_pool_size = @@GLOBAL.innodb_additional_mem_pool_size
-1
-1 Expected
-'#---------------------BS_STVARS_020_05----------------------#'
-SELECT COUNT(@@innodb_additional_mem_pool_size);
-COUNT(@@innodb_additional_mem_pool_size)
-1
-1 Expected
-SELECT COUNT(@@local.innodb_additional_mem_pool_size);
-ERROR HY000: Variable 'innodb_additional_mem_pool_size' is a GLOBAL variable
-Expected error 'Variable is a GLOBAL variable'
-SELECT COUNT(@@SESSION.innodb_additional_mem_pool_size);
-ERROR HY000: Variable 'innodb_additional_mem_pool_size' is a GLOBAL variable
-Expected error 'Variable is a GLOBAL variable'
-SELECT COUNT(@@GLOBAL.innodb_additional_mem_pool_size);
-COUNT(@@GLOBAL.innodb_additional_mem_pool_size)
-1
-1 Expected
-SELECT innodb_additional_mem_pool_size = @@SESSION.innodb_additional_mem_pool_size;
-ERROR 42S22: Unknown column 'innodb_additional_mem_pool_size' in 'field list'
-Expected error 'Readonly variable'
diff --git a/mysql-test/suite/sys_vars/r/innodb_api_bk_commit_interval_basic.result b/mysql-test/suite/sys_vars/r/innodb_api_bk_commit_interval_basic.result
index d2773b7da69..0bc17e10b06 100644
--- a/mysql-test/suite/sys_vars/r/innodb_api_bk_commit_interval_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_api_bk_commit_interval_basic.result
@@ -42,6 +42,42 @@ VARIABLE_NAME	VARIABLE_VALUE
 INNODB_API_BK_COMMIT_INTERVAL	100
 SET session innodb_api_bk_commit_interval=1;
 ERROR HY000: Variable 'innodb_api_bk_commit_interval' is a GLOBAL variable and should be set with SET GLOBAL
+SET global innodb_api_bk_commit_interval=1;
+SELECT @@global.innodb_api_bk_commit_interval;
+@@global.innodb_api_bk_commit_interval
+1
+SET global innodb_api_bk_commit_interval=100000;
+SELECT @@global.innodb_api_bk_commit_interval;
+@@global.innodb_api_bk_commit_interval
+100000
+SET global innodb_api_bk_commit_interval=1073741824;
+SELECT @@global.innodb_api_bk_commit_interval;
+@@global.innodb_api_bk_commit_interval
+1073741824
+SET global innodb_api_bk_commit_interval=0;
+Warnings:
+Warning	1292	Truncated incorrect innodb_api_bk_commit_interval value: '0'
+SELECT @@global.innodb_api_bk_commit_interval;
+@@global.innodb_api_bk_commit_interval
+1
+SET global innodb_api_bk_commit_interval=-1024;
+Warnings:
+Warning	1292	Truncated incorrect innodb_api_bk_commit_interval value: '-1024'
+SELECT @@global.innodb_api_bk_commit_interval;
+@@global.innodb_api_bk_commit_interval
+1
+SET global innodb_api_bk_commit_interval=1073741825;
+Warnings:
+Warning	1292	Truncated incorrect innodb_api_bk_commit_interval value: '1073741825'
+SELECT @@global.innodb_api_bk_commit_interval;
+@@global.innodb_api_bk_commit_interval
+1073741824
+SET global innodb_api_bk_commit_interval=4294967295;
+Warnings:
+Warning	1292	Truncated incorrect innodb_api_bk_commit_interval value: '4294967295'
+SELECT @@global.innodb_api_bk_commit_interval;
+@@global.innodb_api_bk_commit_interval
+1073741824
 SET global innodb_api_bk_commit_interval=1.1;
 ERROR 42000: Incorrect argument type to variable 'innodb_api_bk_commit_interval'
 SET global innodb_api_bk_commit_interval=1e1;
diff --git a/mysql-test/suite/sys_vars/r/innodb_autoextend_increment_basic.result b/mysql-test/suite/sys_vars/r/innodb_autoextend_increment_basic.result
index 900f0167261..6bcca056278 100644
--- a/mysql-test/suite/sys_vars/r/innodb_autoextend_increment_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_autoextend_increment_basic.result
@@ -53,6 +53,12 @@ Warning	1292	Truncated incorrect innodb_autoextend_increment value: '1001'
 SELECT @@global.innodb_autoextend_increment;
 @@global.innodb_autoextend_increment
 1000
+SET @@global.innodb_autoextend_increment  = 2000 ;
+Warnings:
+Warning	1292	Truncated incorrect innodb_autoextend_increment value: '2000'
+SELECT @@global.innodb_autoextend_increment;
+@@global.innodb_autoextend_increment
+1000
 '#----------------------FN_DYNVARS_046_05------------------------#'
 SELECT @@global.innodb_autoextend_increment  = VARIABLE_VALUE
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
diff --git a/mysql-test/suite/sys_vars/r/innodb_buffer_pool_chunk_size_basic.result b/mysql-test/suite/sys_vars/r/innodb_buffer_pool_chunk_size_basic.result
new file mode 100644
index 00000000000..e9fd5519066
--- /dev/null
+++ b/mysql-test/suite/sys_vars/r/innodb_buffer_pool_chunk_size_basic.result
@@ -0,0 +1,48 @@
+SELECT COUNT(@@GLOBAL.innodb_buffer_pool_chunk_size);
+COUNT(@@GLOBAL.innodb_buffer_pool_chunk_size)
+1
+1 Expected
+SET @@GLOBAL.innodb_buffer_pool_chunk_size=1;
+ERROR HY000: Variable 'innodb_buffer_pool_chunk_size' is a read only variable
+Expected error 'Read only variable'
+SELECT COUNT(@@GLOBAL.innodb_buffer_pool_chunk_size);
+COUNT(@@GLOBAL.innodb_buffer_pool_chunk_size)
+1
+1 Expected
+SELECT @@GLOBAL.innodb_buffer_pool_chunk_size = VARIABLE_VALUE
+FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
+WHERE VARIABLE_NAME='innodb_buffer_pool_chunk_size';
+@@GLOBAL.innodb_buffer_pool_chunk_size = VARIABLE_VALUE
+1
+1 Expected
+SELECT COUNT(@@GLOBAL.innodb_buffer_pool_chunk_size);
+COUNT(@@GLOBAL.innodb_buffer_pool_chunk_size)
+1
+1 Expected
+SELECT COUNT(VARIABLE_VALUE)
+FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
+WHERE VARIABLE_NAME='innodb_buffer_pool_chunk_size';
+COUNT(VARIABLE_VALUE)
+1
+1 Expected
+SELECT @@innodb_buffer_pool_chunk_size = @@GLOBAL.innodb_buffer_pool_chunk_size;
+@@innodb_buffer_pool_chunk_size = @@GLOBAL.innodb_buffer_pool_chunk_size
+1
+1 Expected
+SELECT COUNT(@@innodb_buffer_pool_chunk_size);
+COUNT(@@innodb_buffer_pool_chunk_size)
+1
+1 Expected
+SELECT COUNT(@@local.innodb_buffer_pool_chunk_size);
+ERROR HY000: Variable 'innodb_buffer_pool_chunk_size' is a GLOBAL variable
+Expected error 'Variable is a GLOBAL variable'
+SELECT COUNT(@@SESSION.innodb_buffer_pool_chunk_size);
+ERROR HY000: Variable 'innodb_buffer_pool_chunk_size' is a GLOBAL variable
+Expected error 'Variable is a GLOBAL variable'
+SELECT COUNT(@@GLOBAL.innodb_buffer_pool_chunk_size);
+COUNT(@@GLOBAL.innodb_buffer_pool_chunk_size)
+1
+1 Expected
+SELECT innodb_buffer_pool_chunk_size = @@SESSION.innodb_buffer_pool_chunk_size;
+ERROR 42S22: Unknown column 'innodb_buffer_pool_chunk_size' in 'field list'
+Expected error 'Readonly variable'
diff --git a/mysql-test/suite/sys_vars/r/innodb_buffer_pool_dump_at_shutdown_basic.result b/mysql-test/suite/sys_vars/r/innodb_buffer_pool_dump_at_shutdown_basic.result
index 26bb44b4587..ad329cd336f 100644
--- a/mysql-test/suite/sys_vars/r/innodb_buffer_pool_dump_at_shutdown_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_buffer_pool_dump_at_shutdown_basic.result
@@ -1,7 +1,7 @@
 SET @orig = @@global.innodb_buffer_pool_dump_at_shutdown;
 SELECT @orig;
 @orig
-0
+1
 SET GLOBAL innodb_buffer_pool_dump_at_shutdown = OFF;
 SELECT @@global.innodb_buffer_pool_dump_at_shutdown;
 @@global.innodb_buffer_pool_dump_at_shutdown
@@ -16,3 +16,4 @@ SET GLOBAL innodb_buffer_pool_dump_at_shutdown = "string";
 Got one of the listed errors
 SET GLOBAL innodb_buffer_pool_dump_at_shutdown = 5;
 Got one of the listed errors
+SET GLOBAL innodb_buffer_pool_dump_at_shutdown = default;
diff --git a/mysql-test/suite/sys_vars/r/innodb_buffer_pool_dump_pct_basic.result b/mysql-test/suite/sys_vars/r/innodb_buffer_pool_dump_pct_basic.result
index 51c72cfe791..b2cc55ce71e 100644
--- a/mysql-test/suite/sys_vars/r/innodb_buffer_pool_dump_pct_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_buffer_pool_dump_pct_basic.result
@@ -1,20 +1,35 @@
-SET @orig = @@global.innodb_buffer_pool_dump_pct;
-SELECT @orig;
-@orig
-100
-SET GLOBAL innodb_buffer_pool_dump_pct=3, GLOBAL innodb_buffer_pool_dump_now = ON;
-SET GLOBAL innodb_buffer_pool_dump_pct=0;
+SELECT @@global.innodb_buffer_pool_dump_pct;
+@@global.innodb_buffer_pool_dump_pct
+25
+SET GLOBAL innodb_buffer_pool_dump_pct=20;
+SELECT @@global.innodb_buffer_pool_dump_pct;
+@@global.innodb_buffer_pool_dump_pct
+20
+SET GLOBAL innodb_buffer_pool_dump_pct=1;
 SELECT @@global.innodb_buffer_pool_dump_pct;
 @@global.innodb_buffer_pool_dump_pct
 1
-SHOW WARNINGS;
-Level	Code	Message
-Warning	1292	Truncated incorrect innodb_buffer_pool_dump_pct value: '0'
-SET GLOBAL innodb_buffer_pool_dump_pct=101;
+SET GLOBAL innodb_buffer_pool_dump_pct=100;
 SELECT @@global.innodb_buffer_pool_dump_pct;
 @@global.innodb_buffer_pool_dump_pct
 100
-SHOW WARNINGS;
-Level	Code	Message
+SET GLOBAL innodb_buffer_pool_dump_pct=101;
+Warnings:
 Warning	1292	Truncated incorrect innodb_buffer_pool_dump_pct value: '101'
-SET GLOBAL innodb_buffer_pool_dump_pct=@orig;
+SELECT @@global.innodb_buffer_pool_dump_pct;
+@@global.innodb_buffer_pool_dump_pct
+100
+SET GLOBAL innodb_buffer_pool_dump_pct=-1;
+Warnings:
+Warning	1292	Truncated incorrect innodb_buffer_pool_dump_pct value: '-1'
+SELECT @@global.innodb_buffer_pool_dump_pct;
+@@global.innodb_buffer_pool_dump_pct
+1
+SET GLOBAL innodb_buffer_pool_dump_pct=Default;
+SELECT @@global.innodb_buffer_pool_dump_pct;
+@@global.innodb_buffer_pool_dump_pct
+25
+SET GLOBAL innodb_buffer_pool_dump_pct='foo';
+ERROR 42000: Incorrect argument type to variable 'innodb_buffer_pool_dump_pct'
+SET innodb_buffer_pool_dump_pct=50;
+ERROR HY000: Variable 'innodb_buffer_pool_dump_pct' is a GLOBAL variable and should be set with SET GLOBAL
diff --git a/mysql-test/suite/sys_vars/r/innodb_buffer_pool_filename_basic.result b/mysql-test/suite/sys_vars/r/innodb_buffer_pool_filename_basic.result
deleted file mode 100644
index 5e50a715307..00000000000
--- a/mysql-test/suite/sys_vars/r/innodb_buffer_pool_filename_basic.result
+++ /dev/null
@@ -1,7 +0,0 @@
-SET @orig = @@global.innodb_buffer_pool_filename;
-SELECT @orig;
-@orig
-ib_buffer_pool
-SET GLOBAL innodb_buffer_pool_filename = 'innodb_foobar_dump';
-SET GLOBAL innodb_buffer_pool_dump_now = ON;
-SET GLOBAL innodb_buffer_pool_filename = @orig;
diff --git a/mysql-test/suite/sys_vars/r/innodb_buffer_pool_load_at_startup_basic.result b/mysql-test/suite/sys_vars/r/innodb_buffer_pool_load_at_startup_basic.result
index 1dad72baefd..cd4c924b425 100644
--- a/mysql-test/suite/sys_vars/r/innodb_buffer_pool_load_at_startup_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_buffer_pool_load_at_startup_basic.result
@@ -1,7 +1,7 @@
 SET @orig = @@global.innodb_buffer_pool_load_at_startup;
 SELECT @orig;
 @orig
-0
+1
 SET GLOBAL innodb_buffer_pool_load_at_startup = OFF;
 ERROR HY000: Variable 'innodb_buffer_pool_load_at_startup' is a read only variable
 SET GLOBAL innodb_buffer_pool_load_at_startup = ON;
diff --git a/mysql-test/suite/sys_vars/r/innodb_buffer_pool_size_basic.result b/mysql-test/suite/sys_vars/r/innodb_buffer_pool_size_basic.result
index 27e6cae41ef..e6210165436 100644
--- a/mysql-test/suite/sys_vars/r/innodb_buffer_pool_size_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_buffer_pool_size_basic.result
@@ -1,12 +1,12 @@
+SET @start_buffer_pool_size = @@GLOBAL.innodb_buffer_pool_size;
 '#---------------------BS_STVARS_022_01----------------------#'
 SELECT COUNT(@@GLOBAL.innodb_buffer_pool_size);
 COUNT(@@GLOBAL.innodb_buffer_pool_size)
 1
 1 Expected
 '#---------------------BS_STVARS_022_02----------------------#'
-SET @@GLOBAL.innodb_buffer_pool_size=1;
-ERROR HY000: Variable 'innodb_buffer_pool_size' is a read only variable
-Expected error 'Read only variable'
+SET @@GLOBAL.innodb_buffer_pool_size=10485760;
+Expected succeeded
 SELECT COUNT(@@GLOBAL.innodb_buffer_pool_size);
 COUNT(@@GLOBAL.innodb_buffer_pool_size)
 1
@@ -51,3 +51,4 @@ COUNT(@@GLOBAL.innodb_buffer_pool_size)
 SELECT innodb_buffer_pool_size = @@SESSION.innodb_buffer_pool_size;
 ERROR 42S22: Unknown column 'innodb_buffer_pool_size' in 'field list'
 Expected error 'Readonly variable'
+SET @@GLOBAL.innodb_buffer_pool_size = @start_buffer_pool_size;
diff --git a/mysql-test/suite/sys_vars/r/innodb_checksum_algorithm_basic.result b/mysql-test/suite/sys_vars/r/innodb_checksum_algorithm_basic.result
index 8e05db129dc..9c2e95b3c7c 100644
--- a/mysql-test/suite/sys_vars/r/innodb_checksum_algorithm_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_checksum_algorithm_basic.result
@@ -1,47 +1,47 @@
 SET @orig = @@global.innodb_checksum_algorithm;
 SELECT @orig;
 @orig
-INNODB
+crc32
 SET GLOBAL innodb_checksum_algorithm = 'crc32';
 SELECT @@global.innodb_checksum_algorithm;
 @@global.innodb_checksum_algorithm
-CRC32
+crc32
 SET GLOBAL innodb_checksum_algorithm = 'strict_crc32';
 SELECT @@global.innodb_checksum_algorithm;
 @@global.innodb_checksum_algorithm
-STRICT_CRC32
+strict_crc32
 SET GLOBAL innodb_checksum_algorithm = 'innodb';
 SELECT @@global.innodb_checksum_algorithm;
 @@global.innodb_checksum_algorithm
-INNODB
+innodb
 SET GLOBAL innodb_checksum_algorithm = 'strict_innodb';
 SELECT @@global.innodb_checksum_algorithm;
 @@global.innodb_checksum_algorithm
-STRICT_INNODB
+strict_innodb
 SET GLOBAL innodb_checksum_algorithm = 'none';
 SELECT @@global.innodb_checksum_algorithm;
 @@global.innodb_checksum_algorithm
-NONE
+none
 SET GLOBAL innodb_checksum_algorithm = 'strict_none';
 SELECT @@global.innodb_checksum_algorithm;
 @@global.innodb_checksum_algorithm
-STRICT_NONE
+strict_none
 SET GLOBAL innodb_checksum_algorithm = '';
 ERROR 42000: Variable 'innodb_checksum_algorithm' can't be set to the value of ''
 SELECT @@global.innodb_checksum_algorithm;
 @@global.innodb_checksum_algorithm
-STRICT_NONE
+strict_none
 SET GLOBAL innodb_checksum_algorithm = 'foobar';
 ERROR 42000: Variable 'innodb_checksum_algorithm' can't be set to the value of 'foobar'
 SELECT @@global.innodb_checksum_algorithm;
 @@global.innodb_checksum_algorithm
-STRICT_NONE
+strict_none
 SET GLOBAL innodb_checksum_algorithm = 123;
 ERROR 42000: Variable 'innodb_checksum_algorithm' can't be set to the value of '123'
 SELECT @@global.innodb_checksum_algorithm;
 @@global.innodb_checksum_algorithm
-STRICT_NONE
+strict_none
 SET GLOBAL innodb_checksum_algorithm = @orig;
 SELECT @@global.innodb_checksum_algorithm;
 @@global.innodb_checksum_algorithm
-INNODB
+crc32
diff --git a/mysql-test/suite/sys_vars/r/innodb_cmp_per_index_enabled_basic.result b/mysql-test/suite/sys_vars/r/innodb_cmp_per_index_enabled_basic.result
index 3ee9448bdab..31bc11de717 100644
--- a/mysql-test/suite/sys_vars/r/innodb_cmp_per_index_enabled_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_cmp_per_index_enabled_basic.result
@@ -21,7 +21,6 @@ SET GLOBAL innodb_cmp_per_index_enabled=OFF;
 SELECT @@global.innodb_cmp_per_index_enabled;
 @@global.innodb_cmp_per_index_enabled
 0
-SET GLOBAL innodb_file_format=Barracuda;
 SET GLOBAL innodb_cmp_per_index_enabled=ON;
 CREATE TABLE t (a INT) ENGINE=INNODB KEY_BLOCK_SIZE=8;
 INSERT INTO t VALUES (1);
@@ -61,5 +60,4 @@ compress_time	0
 uncompress_ops	0
 uncompress_time	0
 DROP TABLE t;
-SET GLOBAL innodb_file_format=default;
 SET GLOBAL innodb_cmp_per_index_enabled=default;
diff --git a/mysql-test/suite/sys_vars/r/innodb_commit_concurrency_basic.result b/mysql-test/suite/sys_vars/r/innodb_commit_concurrency_basic.result
index 85a4b008ff0..474818829c5 100644
--- a/mysql-test/suite/sys_vars/r/innodb_commit_concurrency_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_commit_concurrency_basic.result
@@ -28,14 +28,34 @@ SELECT @@global.innodb_commit_concurrency;
 '#--------------------FN_DYNVARS_046_04-------------------------#'
 SET @@global.innodb_commit_concurrency = 1;
 ERROR 42000: Variable 'innodb_commit_concurrency' can't be set to the value of '1'
+SELECT @@global.innodb_commit_concurrency;
+@@global.innodb_commit_concurrency
+0
 SET @@global.innodb_commit_concurrency = -1;
 ERROR 42000: Variable 'innodb_commit_concurrency' can't be set to the value of '-1'
+SELECT @@global.innodb_commit_concurrency;
+@@global.innodb_commit_concurrency
+0
 SET @@global.innodb_commit_concurrency = "T";
 ERROR 42000: Incorrect argument type to variable 'innodb_commit_concurrency'
+SELECT @@global.innodb_commit_concurrency;
+@@global.innodb_commit_concurrency
+0
 SET @@global.innodb_commit_concurrency = "Y";
 ERROR 42000: Incorrect argument type to variable 'innodb_commit_concurrency'
+SELECT @@global.innodb_commit_concurrency;
+@@global.innodb_commit_concurrency
+0
+SET @@global.innodb_commit_concurrency = 1.1;
+ERROR 42000: Incorrect argument type to variable 'innodb_commit_concurrency'
+SELECT @@global.innodb_commit_concurrency;
+@@global.innodb_commit_concurrency
+0
 SET @@global.innodb_commit_concurrency = 1001;
 ERROR 42000: Variable 'innodb_commit_concurrency' can't be set to the value of '1001'
+SELECT @@global.innodb_commit_concurrency;
+@@global.innodb_commit_concurrency
+0
 '#----------------------FN_DYNVARS_046_05------------------------#'
 SELECT @@global.innodb_commit_concurrency =
 VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
diff --git a/mysql-test/suite/sys_vars/r/innodb_compression_failure_threshold_pct_basic.result b/mysql-test/suite/sys_vars/r/innodb_compression_failure_threshold_pct_basic.result
index 9f85eccdb7a..ae556ceedb2 100644
--- a/mysql-test/suite/sys_vars/r/innodb_compression_failure_threshold_pct_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_compression_failure_threshold_pct_basic.result
@@ -45,6 +45,11 @@ ERROR 42000: Incorrect argument type to variable 'innodb_compression_failure_thr
 SELECT @@global.innodb_compression_failure_threshold_pct;
 @@global.innodb_compression_failure_threshold_pct
 0
+SET @@global.innodb_compression_failure_threshold_pct = 1.1;
+ERROR 42000: Incorrect argument type to variable 'innodb_compression_failure_threshold_pct'
+SELECT @@global.innodb_compression_failure_threshold_pct;
+@@global.innodb_compression_failure_threshold_pct
+0
 SET @@global.innodb_compression_failure_threshold_pct = "Y";
 ERROR 42000: Incorrect argument type to variable 'innodb_compression_failure_threshold_pct'
 SELECT @@global.innodb_compression_failure_threshold_pct;
@@ -56,6 +61,16 @@ Warning	1292	Truncated incorrect innodb_compression_failure_thres value: '101'
 SELECT @@global.innodb_compression_failure_threshold_pct;
 @@global.innodb_compression_failure_threshold_pct
 100
+SET @@global.innodb_compression_failure_threshold_pct = " ";
+ERROR 42000: Incorrect argument type to variable 'innodb_compression_failure_threshold_pct'
+SELECT @@global.innodb_compression_failure_threshold_pct;
+@@global.innodb_compression_failure_threshold_pct
+100
+SET @@global.innodb_compression_failure_threshold_pct = ' ';
+ERROR 42000: Incorrect argument type to variable 'innodb_compression_failure_threshold_pct'
+SELECT @@global.innodb_compression_failure_threshold_pct;
+@@global.innodb_compression_failure_threshold_pct
+100
 '#----------------------FN_DYNVARS_046_05------------------------#'
 SELECT @@global.innodb_compression_failure_threshold_pct =
 VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
diff --git a/mysql-test/suite/sys_vars/r/innodb_compression_pad_pct_max_basic.result b/mysql-test/suite/sys_vars/r/innodb_compression_pad_pct_max_basic.result
index 628993ef873..6735d877c6b 100644
--- a/mysql-test/suite/sys_vars/r/innodb_compression_pad_pct_max_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_compression_pad_pct_max_basic.result
@@ -40,12 +40,27 @@ ERROR 42000: Incorrect argument type to variable 'innodb_compression_pad_pct_max
 SELECT @@global.innodb_compression_pad_pct_max;
 @@global.innodb_compression_pad_pct_max
 0
+SET @@global.innodb_compression_pad_pct_max = 1.1;
+ERROR 42000: Incorrect argument type to variable 'innodb_compression_pad_pct_max'
+SELECT @@global.innodb_compression_pad_pct_max;
+@@global.innodb_compression_pad_pct_max
+0
 SET @@global.innodb_compression_pad_pct_max = 76;
 Warnings:
 Warning	1292	Truncated incorrect innodb_compression_pad_pct_max value: '76'
 SELECT @@global.innodb_compression_pad_pct_max;
 @@global.innodb_compression_pad_pct_max
 75
+SET @@global.innodb_compression_pad_pct_max = " ";
+ERROR 42000: Incorrect argument type to variable 'innodb_compression_pad_pct_max'
+SELECT @@global.innodb_compression_pad_pct_max;
+@@global.innodb_compression_pad_pct_max
+75
+SET @@global.innodb_compression_pad_pct_max = ' ';
+ERROR 42000: Incorrect argument type to variable 'innodb_compression_pad_pct_max'
+SELECT @@global.innodb_compression_pad_pct_max;
+@@global.innodb_compression_pad_pct_max
+75
 '#----------------------FN_DYNVARS_046_05------------------------#'
 SELECT @@global.innodb_compression_pad_pct_max =
 VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
diff --git a/mysql-test/suite/sys_vars/r/innodb_concurrency_tickets_basic.result b/mysql-test/suite/sys_vars/r/innodb_concurrency_tickets_basic.result
index 0b790fb3557..dd4488a97b3 100644
--- a/mysql-test/suite/sys_vars/r/innodb_concurrency_tickets_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_concurrency_tickets_basic.result
@@ -38,12 +38,31 @@ SELECT @@global.innodb_concurrency_tickets;
 @@global.innodb_concurrency_tickets
 4294967295
 '#--------------------FN_DYNVARS_046_04-------------------------#'
+SET @@global.innodb_concurrency_tickets = 4294967296;
+SELECT @@global.innodb_concurrency_tickets IN (4294967296,4294967295);
+@@global.innodb_concurrency_tickets IN (4294967296,4294967295)
+1
+SET @@global.innodb_concurrency_tickets = 12345678901;
+SELECT @@global.innodb_concurrency_tickets IN (12345678901,4294967295);
+@@global.innodb_concurrency_tickets IN (12345678901,4294967295)
+1
+SET @@global.innodb_concurrency_tickets = 18446744073709551615;
+SELECT @@global.innodb_concurrency_tickets IN (18446744073709551615,4294967295);
+@@global.innodb_concurrency_tickets IN (18446744073709551615,4294967295)
+1
+'#--------------------FN_DYNVARS_046_05-------------------------#'
 SET @@global.innodb_concurrency_tickets = -1;
 Warnings:
 Warning	1292	Truncated incorrect innodb_concurrency_tickets value: '-1'
 SELECT @@global.innodb_concurrency_tickets;
 @@global.innodb_concurrency_tickets
 1
+SET @@global.innodb_concurrency_tickets = -1024;
+Warnings:
+Warning	1292	Truncated incorrect innodb_concurrency_tickets value: '-1024'
+SELECT @@global.innodb_concurrency_tickets;
+@@global.innodb_concurrency_tickets
+1
 SET @@global.innodb_concurrency_tickets = "T";
 ERROR 42000: Incorrect argument type to variable 'innodb_concurrency_tickets'
 SELECT @@global.innodb_concurrency_tickets;
@@ -54,11 +73,22 @@ ERROR 42000: Incorrect argument type to variable 'innodb_concurrency_tickets'
 SELECT @@global.innodb_concurrency_tickets;
 @@global.innodb_concurrency_tickets
 1
-SET @@global.innodb_concurrency_tickets = 1001;
+SET @@global.innodb_concurrency_tickets = 1.1;
+ERROR 42000: Incorrect argument type to variable 'innodb_concurrency_tickets'
 SELECT @@global.innodb_concurrency_tickets;
 @@global.innodb_concurrency_tickets
-1001
-'#----------------------FN_DYNVARS_046_05------------------------#'
+1
+SET @@global.innodb_concurrency_tickets = " ";
+ERROR 42000: Incorrect argument type to variable 'innodb_concurrency_tickets'
+SELECT @@global.innodb_concurrency_tickets;
+@@global.innodb_concurrency_tickets
+1
+SET @@global.innodb_concurrency_tickets = ' ';
+ERROR 42000: Incorrect argument type to variable 'innodb_concurrency_tickets'
+SELECT @@global.innodb_concurrency_tickets;
+@@global.innodb_concurrency_tickets
+1
+'#----------------------FN_DYNVARS_046_06------------------------#'
 SELECT @@global.innodb_concurrency_tickets =
 VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
 WHERE VARIABLE_NAME='innodb_concurrency_tickets';
@@ -67,23 +97,23 @@ VARIABLE_VALUE
 1
 SELECT @@global.innodb_concurrency_tickets;
 @@global.innodb_concurrency_tickets
-1001
+1
 SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
 WHERE VARIABLE_NAME='innodb_concurrency_tickets';
 VARIABLE_VALUE
-1001
-'#---------------------FN_DYNVARS_046_06-------------------------#'
+1
+'#---------------------FN_DYNVARS_046_07-------------------------#'
 SET @@global.innodb_concurrency_tickets = OFF;
 ERROR 42000: Incorrect argument type to variable 'innodb_concurrency_tickets'
 SELECT @@global.innodb_concurrency_tickets;
 @@global.innodb_concurrency_tickets
-1001
+1
 SET @@global.innodb_concurrency_tickets = ON;
 ERROR 42000: Incorrect argument type to variable 'innodb_concurrency_tickets'
 SELECT @@global.innodb_concurrency_tickets;
 @@global.innodb_concurrency_tickets
-1001
-'#---------------------FN_DYNVARS_046_07----------------------#'
+1
+'#---------------------FN_DYNVARS_046_08----------------------#'
 SET @@global.innodb_concurrency_tickets = TRUE;
 SELECT @@global.innodb_concurrency_tickets;
 @@global.innodb_concurrency_tickets
diff --git a/mysql-test/suite/sys_vars/r/innodb_default_row_format_basic.result b/mysql-test/suite/sys_vars/r/innodb_default_row_format_basic.result
new file mode 100644
index 00000000000..9710c3ef364
--- /dev/null
+++ b/mysql-test/suite/sys_vars/r/innodb_default_row_format_basic.result
@@ -0,0 +1,48 @@
+SELECT  @@global.innodb_default_row_format;
+@@global.innodb_default_row_format
+dynamic
+SET GLOBAL innodb_default_row_format = 'redundant';
+SELECT @@global.innodb_default_row_format;
+@@global.innodb_default_row_format
+redundant
+SET GLOBAL innodb_default_row_format = 'dynamic';
+SELECT @@global.innodb_default_row_format;
+@@global.innodb_default_row_format
+dynamic
+SET GLOBAL innodb_default_row_format = 'compact';
+SELECT @@global.innodb_default_row_format;
+@@global.innodb_default_row_format
+compact
+SET GLOBAL innodb_default_row_format = 'compressed';
+ERROR 42000: Variable 'innodb_default_row_format' can't be set to the value of 'compressed'
+SELECT @@global.innodb_default_row_format;
+@@global.innodb_default_row_format
+compact
+SET GLOBAL innodb_default_row_format = 'foobar';
+ERROR 42000: Variable 'innodb_default_row_format' can't be set to the value of 'foobar'
+SELECT @@global.innodb_default_row_format;
+@@global.innodb_default_row_format
+compact
+SET GLOBAL innodb_default_row_format = 0;
+SELECT @@global.innodb_default_row_format;
+@@global.innodb_default_row_format
+redundant
+SET GLOBAL innodb_default_row_format = 1;
+SELECT @@global.innodb_default_row_format;
+@@global.innodb_default_row_format
+compact
+SET GLOBAL innodb_default_row_format = 2;
+SELECT @@global.innodb_default_row_format;
+@@global.innodb_default_row_format
+dynamic
+SET GLOBAL innodb_default_row_format = 3;
+ERROR 42000: Variable 'innodb_default_row_format' can't be set to the value of '3'
+SELECT @@global.innodb_default_row_format;
+@@global.innodb_default_row_format
+dynamic
+SET GLOBAL innodb_default_row_format = 123;
+ERROR 42000: Variable 'innodb_default_row_format' can't be set to the value of '123'
+SELECT @@global.innodb_default_row_format;
+@@global.innodb_default_row_format
+dynamic
+SET GLOBAL innodb_default_row_format = default;
diff --git a/mysql-test/suite/sys_vars/r/innodb_disable_resize_buffer_pool_debug_basic.result b/mysql-test/suite/sys_vars/r/innodb_disable_resize_buffer_pool_debug_basic.result
new file mode 100644
index 00000000000..bbac9a4cde6
--- /dev/null
+++ b/mysql-test/suite/sys_vars/r/innodb_disable_resize_buffer_pool_debug_basic.result
@@ -0,0 +1,60 @@
+#
+# Basic test for innodb_disable_resize_buffer_pool_debug
+#
+SET @start_global_value = @@global.innodb_disable_resize_buffer_pool_debug;
+SET @@global.innodb_disable_resize_buffer_pool_debug = 0;
+SELECT @@global.innodb_disable_resize_buffer_pool_debug;
+@@global.innodb_disable_resize_buffer_pool_debug
+0
+SET @@global.innodb_disable_resize_buffer_pool_debug ='On' ;
+SELECT @@global.innodb_disable_resize_buffer_pool_debug;
+@@global.innodb_disable_resize_buffer_pool_debug
+1
+SET @@global.innodb_disable_resize_buffer_pool_debug ='Off' ;
+SELECT @@global.innodb_disable_resize_buffer_pool_debug;
+@@global.innodb_disable_resize_buffer_pool_debug
+0
+SET @@global.innodb_disable_resize_buffer_pool_debug = 1;
+SELECT @@global.innodb_disable_resize_buffer_pool_debug;
+@@global.innodb_disable_resize_buffer_pool_debug
+1
+SELECT IF(@@GLOBAL.innodb_disable_resize_buffer_pool_debug,'ON','OFF') = VARIABLE_VALUE
+FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
+WHERE VARIABLE_NAME='innodb_disable_resize_buffer_pool_debug';
+IF(@@GLOBAL.innodb_disable_resize_buffer_pool_debug,'ON','OFF') = VARIABLE_VALUE
+1
+1 Expected
+SELECT COUNT(@@GLOBAL.innodb_disable_resize_buffer_pool_debug);
+COUNT(@@GLOBAL.innodb_disable_resize_buffer_pool_debug)
+1
+1 Expected
+SELECT COUNT(VARIABLE_VALUE)
+FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
+WHERE VARIABLE_NAME='innodb_disable_resize_buffer_pool_debug';
+COUNT(VARIABLE_VALUE)
+1
+1 Expected
+SELECT @@innodb_disable_resize_buffer_pool_debug = @@GLOBAL.innodb_disable_resize_buffer_pool_debug;
+@@innodb_disable_resize_buffer_pool_debug = @@GLOBAL.innodb_disable_resize_buffer_pool_debug
+1
+1 Expected
+SELECT COUNT(@@innodb_disable_resize_buffer_pool_debug);
+COUNT(@@innodb_disable_resize_buffer_pool_debug)
+1
+1 Expected
+SELECT COUNT(@@local.innodb_disable_resize_buffer_pool_debug);
+ERROR HY000: Variable 'innodb_disable_resize_buffer_pool_debug' is a GLOBAL variable
+Expected error 'Variable is a GLOBAL variable'
+SELECT COUNT(@@SESSION.innodb_disable_resize_buffer_pool_debug);
+ERROR HY000: Variable 'innodb_disable_resize_buffer_pool_debug' is a GLOBAL variable
+Expected error 'Variable is a GLOBAL variable'
+SELECT COUNT(@@GLOBAL.innodb_disable_resize_buffer_pool_debug);
+COUNT(@@GLOBAL.innodb_disable_resize_buffer_pool_debug)
+1
+1 Expected
+SELECT innodb_disable_resize_buffer_pool_debug = @@SESSION.innodb_disable_resize_buffer_pool_debug;
+ERROR 42S22: Unknown column 'innodb_disable_resize_buffer_pool_debug' in 'field list'
+SET @@global.innodb_disable_resize_buffer_pool_debug = @start_global_value;
+SELECT @@global.innodb_disable_resize_buffer_pool_debug;
+@@global.innodb_disable_resize_buffer_pool_debug
+1
diff --git a/mysql-test/suite/sys_vars/r/innodb_fast_shutdown_basic.result b/mysql-test/suite/sys_vars/r/innodb_fast_shutdown_basic.result
index fe09a652700..38d5365b3f3 100644
--- a/mysql-test/suite/sys_vars/r/innodb_fast_shutdown_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_fast_shutdown_basic.result
@@ -85,6 +85,21 @@ ERROR 42000: Incorrect argument type to variable 'innodb_fast_shutdown'
 SELECT @@global.innodb_fast_shutdown;
 @@global.innodb_fast_shutdown
 0
+SET @@global.innodb_fast_shutdown = 1.1;
+ERROR 42000: Incorrect argument type to variable 'innodb_fast_shutdown'
+SELECT @@global.innodb_fast_shutdown;
+@@global.innodb_fast_shutdown
+0
+SET @@global.innodb_fast_shutdown = ' ';
+ERROR 42000: Incorrect argument type to variable 'innodb_fast_shutdown'
+SELECT @@global.innodb_fast_shutdown;
+@@global.innodb_fast_shutdown
+0
+SET @@global.innodb_fast_shutdown = " ";
+ERROR 42000: Incorrect argument type to variable 'innodb_fast_shutdown'
+SELECT @@global.innodb_fast_shutdown;
+@@global.innodb_fast_shutdown
+0
 '#-------------------FN_DYNVARS_042_05----------------------------#'
 SET @@session.innodb_fast_shutdown = 0;
 ERROR HY000: Variable 'innodb_fast_shutdown' is a GLOBAL variable and should be set with SET GLOBAL
diff --git a/mysql-test/suite/sys_vars/r/innodb_file_format_basic.result b/mysql-test/suite/sys_vars/r/innodb_file_format_basic.result
index 58e009ea705..c330bbf5c16 100644
--- a/mysql-test/suite/sys_vars/r/innodb_file_format_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_file_format_basic.result
@@ -1,29 +1,31 @@
 SET @start_global_value = @@global.innodb_file_format;
 SELECT @start_global_value;
 @start_global_value
-Antelope
+Barracuda
 Valid values are 'Antelope' and 'Barracuda' 
 select @@global.innodb_file_format in ('Antelope', 'Barracuda');
 @@global.innodb_file_format in ('Antelope', 'Barracuda')
 1
 select @@global.innodb_file_format;
 @@global.innodb_file_format
-Antelope
+Barracuda
 select @@session.innodb_file_format;
 ERROR HY000: Variable 'innodb_file_format' is a GLOBAL variable
 show global variables like 'innodb_file_format';
 Variable_name	Value
-innodb_file_format	Antelope
+innodb_file_format	Barracuda
 show session variables like 'innodb_file_format';
 Variable_name	Value
-innodb_file_format	Antelope
+innodb_file_format	Barracuda
 select * from information_schema.global_variables where variable_name='innodb_file_format';
 VARIABLE_NAME	VARIABLE_VALUE
-INNODB_FILE_FORMAT	Antelope
+INNODB_FILE_FORMAT	Barracuda
 select * from information_schema.session_variables where variable_name='innodb_file_format';
 VARIABLE_NAME	VARIABLE_VALUE
-INNODB_FILE_FORMAT	Antelope
+INNODB_FILE_FORMAT	Barracuda
 set global innodb_file_format='Antelope';
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 select @@global.innodb_file_format;
 @@global.innodb_file_format
 Antelope
@@ -34,6 +36,8 @@ select * from information_schema.session_variables where variable_name='innodb_f
 VARIABLE_NAME	VARIABLE_VALUE
 INNODB_FILE_FORMAT	Antelope
 set @@global.innodb_file_format='Barracuda';
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 select @@global.innodb_file_format;
 @@global.innodb_file_format
 Barracuda
@@ -54,6 +58,8 @@ ERROR 42000: Incorrect argument type to variable 'innodb_file_format'
 set global innodb_file_format='Salmon';
 ERROR 42000: Variable 'innodb_file_format' can't be set to the value of 'Salmon'
 SET @@global.innodb_file_format = @start_global_value;
+Warnings:
+Warning	131	Using innodb_file_format is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SELECT @@global.innodb_file_format;
 @@global.innodb_file_format
-Antelope
+Barracuda
diff --git a/mysql-test/suite/sys_vars/r/innodb_file_format_max_basic.result b/mysql-test/suite/sys_vars/r/innodb_file_format_max_basic.result
index 32b2262c091..5402e16a424 100644
--- a/mysql-test/suite/sys_vars/r/innodb_file_format_max_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_file_format_max_basic.result
@@ -1,31 +1,33 @@
 SET @start_global_value = @@global.innodb_file_format_max;
 SELECT @start_global_value;
 @start_global_value
-Antelope
+Barracuda
 Valid values are 'Antelope' and 'Barracuda' 
 SELECT @@global.innodb_file_format_max in ('Antelope', 'Barracuda');
 @@global.innodb_file_format_max in ('Antelope', 'Barracuda')
 1
 SELECT @@global.innodb_file_format_max;
 @@global.innodb_file_format_max
-Antelope
+Barracuda
 SELECT @@session.innodb_file_format_max;
 ERROR HY000: Variable 'innodb_file_format_max' is a GLOBAL variable
 SHOW global variables LIKE 'innodb_file_format_max';
 Variable_name	Value
-innodb_file_format_max	Antelope
+innodb_file_format_max	Barracuda
 SHOW session variables LIKE 'innodb_file_format_max';
 Variable_name	Value
-innodb_file_format_max	Antelope
+innodb_file_format_max	Barracuda
 SELECT * FROM information_schema.global_variables 
 WHERE variable_name='innodb_file_format_max';
 VARIABLE_NAME	VARIABLE_VALUE
-INNODB_FILE_FORMAT_MAX	Antelope
+INNODB_FILE_FORMAT_MAX	Barracuda
 SELECT * FROM information_schema.session_variables
 WHERE variable_name='innodb_file_format_max';
 VARIABLE_NAME	VARIABLE_VALUE
-INNODB_FILE_FORMAT_MAX	Antelope
+INNODB_FILE_FORMAT_MAX	Barracuda
 SET global innodb_file_format_max='Antelope';
+Warnings:
+Warning	131	Using innodb_file_format_max is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SELECT @@global.innodb_file_format_max;
 @@global.innodb_file_format_max
 Antelope
@@ -38,6 +40,8 @@ WHERE variable_name='innodb_file_format_max';
 VARIABLE_NAME	VARIABLE_VALUE
 INNODB_FILE_FORMAT_MAX	Antelope
 SET @@global.innodb_file_format_max='Barracuda';
+Warnings:
+Warning	131	Using innodb_file_format_max is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SELECT @@global.innodb_file_format_max;
 @@global.innodb_file_format_max
 Barracuda
@@ -60,6 +64,8 @@ ERROR 42000: Incorrect argument type to variable 'innodb_file_format_max'
 SET global innodb_file_format_max='Salmon';
 ERROR 42000: Variable 'innodb_file_format_max' can't be set to the value of 'Salmon'
 SET @@global.innodb_file_format_max = @start_global_value;
+Warnings:
+Warning	131	Using innodb_file_format_max is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SELECT @@global.innodb_file_format_max;
 @@global.innodb_file_format_max
-Antelope
+Barracuda
diff --git a/mysql-test/suite/sys_vars/r/innodb_fill_factor_basic.result b/mysql-test/suite/sys_vars/r/innodb_fill_factor_basic.result
new file mode 100644
index 00000000000..7a4cef2906f
--- /dev/null
+++ b/mysql-test/suite/sys_vars/r/innodb_fill_factor_basic.result
@@ -0,0 +1,42 @@
+select @@global.innodb_fill_factor;
+@@global.innodb_fill_factor
+100
+select @@session.innodb_fill_factor;
+ERROR HY000: Variable 'innodb_fill_factor' is a GLOBAL variable
+show global variables like 'innodb_fill_factor';
+Variable_name	Value
+innodb_fill_factor	100
+show session variables like 'innodb_fill_factor';
+Variable_name	Value
+innodb_fill_factor	100
+select * from information_schema.global_variables where variable_name='innodb_fill_factor';
+VARIABLE_NAME	VARIABLE_VALUE
+INNODB_FILL_FACTOR	100
+select * from information_schema.session_variables where variable_name='innodb_fill_factor';
+VARIABLE_NAME	VARIABLE_VALUE
+INNODB_FILL_FACTOR	100
+set global innodb_fill_factor=9;
+Warnings:
+Warning	1292	Truncated incorrect innodb_fill_factor value: '9'
+select @@innodb_fill_factor;
+@@innodb_fill_factor
+10
+set global innodb_fill_factor=10;
+select @@innodb_fill_factor;
+@@innodb_fill_factor
+10
+set global innodb_fill_factor=75;
+select @@innodb_fill_factor;
+@@innodb_fill_factor
+75
+set global innodb_fill_factor=100;
+select @@innodb_fill_factor;
+@@innodb_fill_factor
+100
+set global innodb_fill_factor=101;
+Warnings:
+Warning	1292	Truncated incorrect innodb_fill_factor value: '101'
+select @@innodb_fill_factor;
+@@innodb_fill_factor
+100
+set global innodb_fill_factor=100;
diff --git a/mysql-test/suite/sys_vars/r/innodb_flush_log_at_timeout_basic.result b/mysql-test/suite/sys_vars/r/innodb_flush_log_at_timeout_basic.result
index 60a4081849f..79455e22b53 100644
--- a/mysql-test/suite/sys_vars/r/innodb_flush_log_at_timeout_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_flush_log_at_timeout_basic.result
@@ -56,6 +56,21 @@ Warning	1292	Truncated incorrect innodb_flush_log_at_timeout value: '2701'
 SELECT @@global.innodb_flush_log_at_timeout;
 @@global.innodb_flush_log_at_timeout
 2700
+SET @@global.innodb_flush_log_at_timeout = ' ';
+ERROR 42000: Incorrect argument type to variable 'innodb_flush_log_at_timeout'
+SELECT @@global.innodb_flush_log_at_timeout;
+@@global.innodb_flush_log_at_timeout
+2700
+SET @@global.innodb_flush_log_at_timeout = " ";
+ERROR 42000: Incorrect argument type to variable 'innodb_flush_log_at_timeout'
+SELECT @@global.innodb_flush_log_at_timeout;
+@@global.innodb_flush_log_at_timeout
+2700
+SET @@global.innodb_flush_log_at_timeout = 1.1;
+ERROR 42000: Incorrect argument type to variable 'innodb_flush_log_at_timeout'
+SELECT @@global.innodb_flush_log_at_timeout;
+@@global.innodb_flush_log_at_timeout
+2700
 '#----------------------FN_DYNVARS_046_05------------------------#'
 SELECT @@global.innodb_flush_log_at_timeout =
 VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
diff --git a/mysql-test/suite/sys_vars/r/innodb_flush_log_at_trx_commit_basic.result b/mysql-test/suite/sys_vars/r/innodb_flush_log_at_trx_commit_basic.result
index 268d40c1be3..0d990d746a9 100644
--- a/mysql-test/suite/sys_vars/r/innodb_flush_log_at_trx_commit_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_flush_log_at_trx_commit_basic.result
@@ -9,6 +9,11 @@ SELECT @@global.innodb_flush_log_at_trx_commit;
 @@global.innodb_flush_log_at_trx_commit
 1
 '#---------------------FN_DYNVARS_046_02-------------------------#'
+SET innodb_flush_log_at_trx_commit = 1;
+ERROR HY000: Variable 'innodb_flush_log_at_trx_commit' is a GLOBAL variable and should be set with SET GLOBAL
+SELECT @@innodb_flush_log_at_trx_commit;
+@@innodb_flush_log_at_trx_commit
+1
 SELECT local.innodb_flush_log_at_trx_commit;
 ERROR 42S02: Unknown table 'local' in field list
 SET global innodb_flush_log_at_trx_commit = 0;
@@ -51,6 +56,27 @@ Warning	1292	Truncated incorrect innodb_flush_log_at_trx_commit value: '1001'
 SELECT @@global.innodb_flush_log_at_trx_commit;
 @@global.innodb_flush_log_at_trx_commit
 3
+SET @@global.innodb_flush_log_at_trx_commit = 100156787;
+Warnings:
+Warning	1292	Truncated incorrect innodb_flush_log_at_trx_commit value: '100156787'
+SELECT @@global.innodb_flush_log_at_trx_commit;
+@@global.innodb_flush_log_at_trx_commit
+3
+SET @@global.innodb_flush_log_at_trx_commit = " ";
+ERROR 42000: Incorrect argument type to variable 'innodb_flush_log_at_trx_commit'
+SELECT @@global.innodb_flush_log_at_trx_commit;
+@@global.innodb_flush_log_at_trx_commit
+3
+SET @@global.innodb_flush_log_at_trx_commit = 1.1;
+ERROR 42000: Incorrect argument type to variable 'innodb_flush_log_at_trx_commit'
+SELECT @@global.innodb_flush_log_at_trx_commit;
+@@global.innodb_flush_log_at_trx_commit
+3
+SET @@global.innodb_flush_log_at_trx_commit = ' ';
+ERROR 42000: Incorrect argument type to variable 'innodb_flush_log_at_trx_commit'
+SELECT @@global.innodb_flush_log_at_trx_commit;
+@@global.innodb_flush_log_at_trx_commit
+3
 '#----------------------FN_DYNVARS_046_05------------------------#'
 SELECT @@global.innodb_flush_log_at_trx_commit =
 VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
diff --git a/mysql-test/suite/sys_vars/r/innodb_flush_sync_basic.result b/mysql-test/suite/sys_vars/r/innodb_flush_sync_basic.result
new file mode 100644
index 00000000000..9e3f7d95eb9
--- /dev/null
+++ b/mysql-test/suite/sys_vars/r/innodb_flush_sync_basic.result
@@ -0,0 +1,92 @@
+SET @start_global_value = @@global.innodb_flush_sync;
+SELECT @start_global_value;
+@start_global_value
+1
+Valid values are 'ON' and 'OFF'
+select @@global.innodb_flush_sync in (0, 1);
+@@global.innodb_flush_sync in (0, 1)
+1
+select @@global.innodb_flush_sync;
+@@global.innodb_flush_sync
+1
+select @@session.innodb_flush_sync;
+ERROR HY000: Variable 'innodb_flush_sync' is a GLOBAL variable
+show global variables like 'innodb_flush_sync';
+Variable_name	Value
+innodb_flush_sync	ON
+show session variables like 'innodb_flush_sync';
+Variable_name	Value
+innodb_flush_sync	ON
+select * from information_schema.global_variables where variable_name='innodb_flush_sync';
+VARIABLE_NAME	VARIABLE_VALUE
+INNODB_FLUSH_SYNC	ON
+select * from information_schema.session_variables where variable_name='innodb_flush_sync';
+VARIABLE_NAME	VARIABLE_VALUE
+INNODB_FLUSH_SYNC	ON
+set global innodb_flush_sync='OFF';
+select @@global.innodb_flush_sync;
+@@global.innodb_flush_sync
+0
+select * from information_schema.global_variables where variable_name='innodb_flush_sync';
+VARIABLE_NAME	VARIABLE_VALUE
+INNODB_FLUSH_SYNC	OFF
+select * from information_schema.session_variables where variable_name='innodb_flush_sync';
+VARIABLE_NAME	VARIABLE_VALUE
+INNODB_FLUSH_SYNC	OFF
+set @@global.innodb_flush_sync=1;
+select @@global.innodb_flush_sync;
+@@global.innodb_flush_sync
+1
+select * from information_schema.global_variables where variable_name='innodb_flush_sync';
+VARIABLE_NAME	VARIABLE_VALUE
+INNODB_FLUSH_SYNC	ON
+select * from information_schema.session_variables where variable_name='innodb_flush_sync';
+VARIABLE_NAME	VARIABLE_VALUE
+INNODB_FLUSH_SYNC	ON
+set global innodb_flush_sync=0;
+select @@global.innodb_flush_sync;
+@@global.innodb_flush_sync
+0
+select * from information_schema.global_variables where variable_name='innodb_flush_sync';
+VARIABLE_NAME	VARIABLE_VALUE
+INNODB_FLUSH_SYNC	OFF
+select * from information_schema.session_variables where variable_name='innodb_flush_sync';
+VARIABLE_NAME	VARIABLE_VALUE
+INNODB_FLUSH_SYNC	OFF
+set @@global.innodb_flush_sync='ON';
+select @@global.innodb_flush_sync;
+@@global.innodb_flush_sync
+1
+select * from information_schema.global_variables where variable_name='innodb_flush_sync';
+VARIABLE_NAME	VARIABLE_VALUE
+INNODB_FLUSH_SYNC	ON
+select * from information_schema.session_variables where variable_name='innodb_flush_sync';
+VARIABLE_NAME	VARIABLE_VALUE
+INNODB_FLUSH_SYNC	ON
+set session innodb_flush_sync='OFF';
+ERROR HY000: Variable 'innodb_flush_sync' is a GLOBAL variable and should be set with SET GLOBAL
+set @@session.innodb_flush_sync='ON';
+ERROR HY000: Variable 'innodb_flush_sync' is a GLOBAL variable and should be set with SET GLOBAL
+set global innodb_flush_sync=1.1;
+ERROR 42000: Incorrect argument type to variable 'innodb_flush_sync'
+set global innodb_flush_sync=1e1;
+ERROR 42000: Incorrect argument type to variable 'innodb_flush_sync'
+set global innodb_flush_sync=2;
+ERROR 42000: Variable 'innodb_flush_sync' can't be set to the value of '2'
+set global innodb_flush_sync=-3;
+ERROR 42000: Variable 'innodb_flush_sync' can't be set to the value of '-3'
+select @@global.innodb_flush_sync;
+@@global.innodb_flush_sync
+1
+select * from information_schema.global_variables where variable_name='innodb_flush_sync';
+VARIABLE_NAME	VARIABLE_VALUE
+INNODB_FLUSH_SYNC	ON
+select * from information_schema.session_variables where variable_name='innodb_flush_sync';
+VARIABLE_NAME	VARIABLE_VALUE
+INNODB_FLUSH_SYNC	ON
+set global innodb_flush_sync='AUTO';
+ERROR 42000: Variable 'innodb_flush_sync' can't be set to the value of 'AUTO'
+SET @@global.innodb_flush_sync = @start_global_value;
+SELECT @@global.innodb_flush_sync;
+@@global.innodb_flush_sync
+1
diff --git a/mysql-test/suite/sys_vars/r/innodb_flushing_avg_loops_basic.result b/mysql-test/suite/sys_vars/r/innodb_flushing_avg_loops_basic.result
index caa41bd64ed..e024581955a 100644
--- a/mysql-test/suite/sys_vars/r/innodb_flushing_avg_loops_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_flushing_avg_loops_basic.result
@@ -56,6 +56,16 @@ Warning	1292	Truncated incorrect innodb_flushing_avg_loops value: '1001'
 SELECT @@global.innodb_flushing_avg_loops;
 @@global.innodb_flushing_avg_loops
 1000
+SET @@global.innodb_flushing_avg_loops = ' ';
+ERROR 42000: Incorrect argument type to variable 'innodb_flushing_avg_loops'
+SELECT @@global.innodb_flushing_avg_loops;
+@@global.innodb_flushing_avg_loops
+1000
+SET @@global.innodb_flushing_avg_loops = " ";
+ERROR 42000: Incorrect argument type to variable 'innodb_flushing_avg_loops'
+SELECT @@global.innodb_flushing_avg_loops;
+@@global.innodb_flushing_avg_loops
+1000
 '#----------------------FN_DYNVARS_046_05------------------------#'
 SELECT @@global.innodb_flushing_avg_loops =
 VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
diff --git a/mysql-test/suite/sys_vars/r/innodb_ft_result_cache_limit_basic.result b/mysql-test/suite/sys_vars/r/innodb_ft_result_cache_limit_basic.result
index 0aefabd48f7..59f6431a65e 100644
--- a/mysql-test/suite/sys_vars/r/innodb_ft_result_cache_limit_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_ft_result_cache_limit_basic.result
@@ -25,8 +25,14 @@ set global innodb_ft_result_cache_limit=1000000;
 select @@innodb_ft_result_cache_limit;
 @@innodb_ft_result_cache_limit
 1000000
-set global innodb_ft_result_cache_limit=4000000000;
+set global innodb_ft_result_cache_limit=4294967295;
 select @@innodb_ft_result_cache_limit;
 @@innodb_ft_result_cache_limit
-4000000000
+4294967295
+set global innodb_ft_result_cache_limit=4*1024*1024*1024;
+Warnings:
+Warning	1292	Truncated incorrect innodb_ft_result_cache_limit value: '4294967296'
+select @@innodb_ft_result_cache_limit;
+@@innodb_ft_result_cache_limit
+4294967295
 set global innodb_ft_result_cache_limit=2000000000;
diff --git a/mysql-test/suite/sys_vars/r/innodb_ft_server_stopword_table_basic.result b/mysql-test/suite/sys_vars/r/innodb_ft_server_stopword_table_basic.result
index 044e8f80951..1851c078e5c 100644
--- a/mysql-test/suite/sys_vars/r/innodb_ft_server_stopword_table_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_ft_server_stopword_table_basic.result
@@ -16,6 +16,7 @@ INNODB_FT_SERVER_STOPWORD_TABLE
 select * from information_schema.session_variables where variable_name='innodb_ft_server_stopword_table';
 VARIABLE_NAME	VARIABLE_VALUE
 INNODB_FT_SERVER_STOPWORD_TABLE	
+call mtr.add_suppression("\\[ERROR\\] InnoDB: user stopword table Salmon does not exist.");
 set session innodb_ft_server_stopword_table='Salmon';
 ERROR HY000: Variable 'innodb_ft_server_stopword_table' is a GLOBAL variable and should be set with SET GLOBAL
 set @@session.innodb_ft_server_stopword_table='Salmon';
diff --git a/mysql-test/suite/sys_vars/r/innodb_ft_user_stopword_table_basic.result b/mysql-test/suite/sys_vars/r/innodb_ft_user_stopword_table_basic.result
index 66298481693..ca54f5f7521 100644
--- a/mysql-test/suite/sys_vars/r/innodb_ft_user_stopword_table_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_ft_user_stopword_table_basic.result
@@ -17,6 +17,7 @@ INNODB_FT_USER_STOPWORD_TABLE
 select * from information_schema.session_variables where variable_name='innodb_ft_user_stopword_table';
 VARIABLE_NAME	VARIABLE_VALUE
 INNODB_FT_USER_STOPWORD_TABLE	
+call mtr.add_suppression("\\[ERROR\\] InnoDB: user stopword table Salmon does not exist.");
 set session innodb_ft_user_stopword_table='Salmon';
 ERROR 42000: Variable 'innodb_ft_user_stopword_table' can't be set to the value of 'Salmon'
 set @@session.innodb_ft_user_stopword_table='Salmon';
@@ -27,4 +28,3 @@ set global innodb_ft_user_stopword_table=1e1;
 ERROR 42000: Incorrect argument type to variable 'innodb_ft_user_stopword_table'
 set global innodb_ft_user_stopword_table='Salmon';
 ERROR 42000: Variable 'innodb_ft_user_stopword_table' can't be set to the value of 'Salmon'
-SET @@session.innodb_ft_user_stopword_table=@start_global_value;
diff --git a/mysql-test/suite/sys_vars/r/innodb_large_prefix_basic.result b/mysql-test/suite/sys_vars/r/innodb_large_prefix_basic.result
index 3877988bbee..c6e803ffef8 100644
--- a/mysql-test/suite/sys_vars/r/innodb_large_prefix_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_large_prefix_basic.result
@@ -1,29 +1,31 @@
 SET @start_global_value = @@global.innodb_large_prefix;
 SELECT @start_global_value;
 @start_global_value
-0
+1
 Valid values are 'ON' and 'OFF' 
 select @@global.innodb_large_prefix in (0, 1);
 @@global.innodb_large_prefix in (0, 1)
 1
 select @@global.innodb_large_prefix;
 @@global.innodb_large_prefix
-0
+1
 select @@session.innodb_large_prefix;
 ERROR HY000: Variable 'innodb_large_prefix' is a GLOBAL variable
 show global variables like 'innodb_large_prefix';
 Variable_name	Value
-innodb_large_prefix	OFF
+innodb_large_prefix	ON
 show session variables like 'innodb_large_prefix';
 Variable_name	Value
-innodb_large_prefix	OFF
+innodb_large_prefix	ON
 select * from information_schema.global_variables where variable_name='innodb_large_prefix';
 VARIABLE_NAME	VARIABLE_VALUE
-INNODB_LARGE_PREFIX	OFF
+INNODB_LARGE_PREFIX	ON
 select * from information_schema.session_variables where variable_name='innodb_large_prefix';
 VARIABLE_NAME	VARIABLE_VALUE
-INNODB_LARGE_PREFIX	OFF
+INNODB_LARGE_PREFIX	ON
 set global innodb_large_prefix='OFF';
+Warnings:
+Warning	131	Using innodb_large_prefix is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 select @@global.innodb_large_prefix;
 @@global.innodb_large_prefix
 0
@@ -34,6 +36,8 @@ select * from information_schema.session_variables where variable_name='innodb_l
 VARIABLE_NAME	VARIABLE_VALUE
 INNODB_LARGE_PREFIX	OFF
 set @@global.innodb_large_prefix=1;
+Warnings:
+Warning	131	Using innodb_large_prefix is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 select @@global.innodb_large_prefix;
 @@global.innodb_large_prefix
 1
@@ -44,6 +48,8 @@ select * from information_schema.session_variables where variable_name='innodb_l
 VARIABLE_NAME	VARIABLE_VALUE
 INNODB_LARGE_PREFIX	ON
 set global innodb_large_prefix=0;
+Warnings:
+Warning	131	Using innodb_large_prefix is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 select @@global.innodb_large_prefix;
 @@global.innodb_large_prefix
 0
@@ -54,6 +60,8 @@ select * from information_schema.session_variables where variable_name='innodb_l
 VARIABLE_NAME	VARIABLE_VALUE
 INNODB_LARGE_PREFIX	OFF
 set @@global.innodb_large_prefix='ON';
+Warnings:
+Warning	131	Using innodb_large_prefix is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 select @@global.innodb_large_prefix;
 @@global.innodb_large_prefix
 1
@@ -87,6 +95,8 @@ INNODB_LARGE_PREFIX	ON
 set global innodb_large_prefix='AUTO';
 ERROR 42000: Variable 'innodb_large_prefix' can't be set to the value of 'AUTO'
 SET @@global.innodb_large_prefix = @start_global_value;
+Warnings:
+Warning	131	Using innodb_large_prefix is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
 SELECT @@global.innodb_large_prefix;
 @@global.innodb_large_prefix
-0
+1
diff --git a/mysql-test/suite/sys_vars/r/innodb_lock_wait_timeout_basic.result b/mysql-test/suite/sys_vars/r/innodb_lock_wait_timeout_basic.result
index 1dcc2d554ce..74b1d21d475 100644
--- a/mysql-test/suite/sys_vars/r/innodb_lock_wait_timeout_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_lock_wait_timeout_basic.result
@@ -55,6 +55,104 @@ COUNT(@@GLOBAL.innodb_lock_wait_timeout)
 1 Expected
 SELECT innodb_lock_wait_timeout = @@SESSION.innodb_lock_wait_timeout;
 ERROR 42S22: Unknown column 'innodb_lock_wait_timeout' in 'field list'
+set @@global.innodb_lock_wait_timeout=100;
+set @@global.innodb_lock_wait_timeout=DEFAULT;
+select @@global.innodb_lock_wait_timeout;
+@@global.innodb_lock_wait_timeout
+50
+set @@session.innodb_lock_wait_timeout=100;
+set @@session.innodb_lock_wait_timeout=DEFAULT;
+select @@session.innodb_lock_wait_timeout;
+@@session.innodb_lock_wait_timeout
+50
+SET @@global.innodb_lock_wait_timeout=1;
+SELECT @@global.innodb_lock_wait_timeout;
+@@global.innodb_lock_wait_timeout
+1
+SET @@global.innodb_lock_wait_timeout=1024;
+SELECT @@global.innodb_lock_wait_timeout;
+@@global.innodb_lock_wait_timeout
+1024
+SET @@global.innodb_lock_wait_timeout=1073741824;
+SELECT @@global.innodb_lock_wait_timeout;
+@@global.innodb_lock_wait_timeout
+1073741824
+SET @@session.innodb_lock_wait_timeout=1;
+SELECT @@session.innodb_lock_wait_timeout;
+@@session.innodb_lock_wait_timeout
+1
+SET @@session.innodb_lock_wait_timeout=1024;
+SELECT @@session.innodb_lock_wait_timeout;
+@@session.innodb_lock_wait_timeout
+1024
+SET @@session.innodb_lock_wait_timeout=1073741824;
+SELECT @@session.innodb_lock_wait_timeout;
+@@session.innodb_lock_wait_timeout
+1073741824
+SET @@global.innodb_lock_wait_timeout="t";
+ERROR 42000: Incorrect argument type to variable 'innodb_lock_wait_timeout'
+SELECT @@global.innodb_lock_wait_timeout;
+@@global.innodb_lock_wait_timeout
+1073741824
+SET @@global.innodb_lock_wait_timeout=-1024;
+Warnings:
+Warning	1292	Truncated incorrect innodb_lock_wait_timeout value: '-1024'
+SELECT @@global.innodb_lock_wait_timeout;
+@@global.innodb_lock_wait_timeout
+1
+SET @@global.innodb_lock_wait_timeout=1073741825;
+Warnings:
+Warning	1292	Truncated incorrect innodb_lock_wait_timeout value: '1073741825'
+SELECT @@global.innodb_lock_wait_timeout;
+@@global.innodb_lock_wait_timeout
+1073741824
+SET @@global.innodb_lock_wait_timeout=" ";
+ERROR 42000: Incorrect argument type to variable 'innodb_lock_wait_timeout'
+SELECT @@global.innodb_lock_wait_timeout;
+@@global.innodb_lock_wait_timeout
+1073741824
+SET @@global.innodb_lock_wait_timeout=' ';
+ERROR 42000: Incorrect argument type to variable 'innodb_lock_wait_timeout'
+SELECT @@global.innodb_lock_wait_timeout;
+@@global.innodb_lock_wait_timeout
+1073741824
+SET @@global.innodb_lock_wait_timeout=1.1;
+ERROR 42000: Incorrect argument type to variable 'innodb_lock_wait_timeout'
+SELECT @@global.innodb_lock_wait_timeout;
+@@global.innodb_lock_wait_timeout
+1073741824
+SET @@session.innodb_lock_wait_timeout="T";
+ERROR 42000: Incorrect argument type to variable 'innodb_lock_wait_timeout'
+SELECT @@session.innodb_lock_wait_timeout;
+@@session.innodb_lock_wait_timeout
+1073741824
+SET @@session.innodb_lock_wait_timeout=-1024;
+Warnings:
+Warning	1292	Truncated incorrect innodb_lock_wait_timeout value: '-1024'
+SELECT @@session.innodb_lock_wait_timeout;
+@@session.innodb_lock_wait_timeout
+1
+SET @@session.innodb_lock_wait_timeout=1073999999;
+Warnings:
+Warning	1292	Truncated incorrect innodb_lock_wait_timeout value: '1073999999'
+SELECT @@session.innodb_lock_wait_timeout;
+@@session.innodb_lock_wait_timeout
+1073741824
+SET @@session.innodb_lock_wait_timeout=' ';
+ERROR 42000: Incorrect argument type to variable 'innodb_lock_wait_timeout'
+SELECT @@session.innodb_lock_wait_timeout;
+@@session.innodb_lock_wait_timeout
+1073741824
+SET @@session.innodb_lock_wait_timeout=" ";
+ERROR 42000: Incorrect argument type to variable 'innodb_lock_wait_timeout'
+SELECT @@session.innodb_lock_wait_timeout;
+@@session.innodb_lock_wait_timeout
+1073741824
+SET @@session.innodb_lock_wait_timeout=1.1;
+ERROR 42000: Incorrect argument type to variable 'innodb_lock_wait_timeout'
+SELECT @@session.innodb_lock_wait_timeout;
+@@session.innodb_lock_wait_timeout
+1073741824
 SET @@global.innodb_lock_wait_timeout = @start_global_value;
 SELECT @@global.innodb_lock_wait_timeout;
 @@global.innodb_lock_wait_timeout
diff --git a/mysql-test/suite/sys_vars/r/innodb_log_checkpoint_now_basic.result b/mysql-test/suite/sys_vars/r/innodb_log_checkpoint_now_basic.result
index d9d067c2cf9..4774c2fe1d7 100644
--- a/mysql-test/suite/sys_vars/r/innodb_log_checkpoint_now_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_log_checkpoint_now_basic.result
@@ -1 +1,80 @@
-XtraDB extension
+SET @start_global_value = @@global.innodb_log_checkpoint_now;
+SELECT @start_global_value;
+@start_global_value
+0
+select @@global.innodb_log_checkpoint_now in (0, 1);
+@@global.innodb_log_checkpoint_now in (0, 1)
+1
+select @@global.innodb_log_checkpoint_now;
+@@global.innodb_log_checkpoint_now
+0
+select @@session.innodb_log_checkpoint_now;
+ERROR HY000: Variable 'innodb_log_checkpoint_now' is a GLOBAL variable
+show global variables like 'innodb_log_checkpoint_now';
+Variable_name	Value
+innodb_log_checkpoint_now	OFF
+show session variables like 'innodb_log_checkpoint_now';
+Variable_name	Value
+innodb_log_checkpoint_now	OFF
+select * from information_schema.global_variables where variable_name='innodb_log_checkpoint_now';
+VARIABLE_NAME	VARIABLE_VALUE
+INNODB_LOG_CHECKPOINT_NOW	OFF
+select * from information_schema.session_variables where variable_name='innodb_log_checkpoint_now';
+VARIABLE_NAME	VARIABLE_VALUE
+INNODB_LOG_CHECKPOINT_NOW	OFF
+set global innodb_log_checkpoint_now=1;
+select @@global.innodb_log_checkpoint_now;
+@@global.innodb_log_checkpoint_now
+0
+select * from information_schema.global_variables where variable_name='innodb_log_checkpoint_now';
+VARIABLE_NAME	VARIABLE_VALUE
+INNODB_LOG_CHECKPOINT_NOW	OFF
+select * from information_schema.session_variables where variable_name='innodb_log_checkpoint_now';
+VARIABLE_NAME	VARIABLE_VALUE
+INNODB_LOG_CHECKPOINT_NOW	OFF
+set @@global.innodb_log_checkpoint_now=0;
+select @@global.innodb_log_checkpoint_now;
+@@global.innodb_log_checkpoint_now
+0
+select * from information_schema.global_variables where variable_name='innodb_log_checkpoint_now';
+VARIABLE_NAME	VARIABLE_VALUE
+INNODB_LOG_CHECKPOINT_NOW	OFF
+select * from information_schema.session_variables where variable_name='innodb_log_checkpoint_now';
+VARIABLE_NAME	VARIABLE_VALUE
+INNODB_LOG_CHECKPOINT_NOW	OFF
+set global innodb_log_checkpoint_now=ON;
+select @@global.innodb_log_checkpoint_now;
+@@global.innodb_log_checkpoint_now
+0
+select * from information_schema.global_variables where variable_name='innodb_log_checkpoint_now';
+VARIABLE_NAME	VARIABLE_VALUE
+INNODB_LOG_CHECKPOINT_NOW	OFF
+select * from information_schema.session_variables where variable_name='innodb_log_checkpoint_now';
+VARIABLE_NAME	VARIABLE_VALUE
+INNODB_LOG_CHECKPOINT_NOW	OFF
+set global innodb_log_checkpoint_now=OFF;
+select @@global.innodb_log_checkpoint_now;
+@@global.innodb_log_checkpoint_now
+0
+select * from information_schema.global_variables where variable_name='innodb_log_checkpoint_now';
+VARIABLE_NAME	VARIABLE_VALUE
+INNODB_LOG_CHECKPOINT_NOW	OFF
+select * from information_schema.session_variables where variable_name='innodb_log_checkpoint_now';
+VARIABLE_NAME	VARIABLE_VALUE
+INNODB_LOG_CHECKPOINT_NOW	OFF
+set session innodb_log_checkpoint_now='some';
+ERROR HY000: Variable 'innodb_log_checkpoint_now' is a GLOBAL variable and should be set with SET GLOBAL
+set @@session.innodb_log_checkpoint_now='some';
+ERROR HY000: Variable 'innodb_log_checkpoint_now' is a GLOBAL variable and should be set with SET GLOBAL
+set global innodb_log_checkpoint_now=1.1;
+ERROR 42000: Incorrect argument type to variable 'innodb_log_checkpoint_now'
+set global innodb_log_checkpoint_now='foo';
+ERROR 42000: Variable 'innodb_log_checkpoint_now' can't be set to the value of 'foo'
+set global innodb_log_checkpoint_now=-2;
+ERROR 42000: Variable 'innodb_log_checkpoint_now' can't be set to the value of '-2'
+set global innodb_log_checkpoint_now=1e1;
+ERROR 42000: Incorrect argument type to variable 'innodb_log_checkpoint_now'
+SET @@global.innodb_log_checkpoint_now = @start_global_value;
+SELECT @@global.innodb_log_checkpoint_now;
+@@global.innodb_log_checkpoint_now
+0
diff --git a/mysql-test/suite/sys_vars/r/innodb_log_checksums_basic.result b/mysql-test/suite/sys_vars/r/innodb_log_checksums_basic.result
new file mode 100644
index 00000000000..6679ca87249
--- /dev/null
+++ b/mysql-test/suite/sys_vars/r/innodb_log_checksums_basic.result
@@ -0,0 +1,42 @@
+SET @orig = @@global.innodb_log_checksums;
+SELECT @orig;
+@orig
+1
+SET GLOBAL innodb_log_checksums = 'crc32';
+ERROR 42000: Variable 'innodb_log_checksums' can't be set to the value of 'crc32'
+SELECT @@global.innodb_log_checksums;
+@@global.innodb_log_checksums
+1
+SET GLOBAL innodb_log_checksums = 2;
+ERROR 42000: Variable 'innodb_log_checksums' can't be set to the value of '2'
+SELECT @@global.innodb_log_checksums;
+@@global.innodb_log_checksums
+1
+SET GLOBAL innodb_log_checksums = 1e2;
+ERROR 42000: Incorrect argument type to variable 'innodb_log_checksums'
+SELECT @@global.innodb_log_checksums;
+@@global.innodb_log_checksums
+1
+SET GLOBAL innodb_log_checksums = 1.0;
+ERROR 42000: Incorrect argument type to variable 'innodb_log_checksums'
+SELECT @@global.innodb_log_checksums;
+@@global.innodb_log_checksums
+1
+SET innodb_log_checksums = OFF;
+ERROR HY000: Variable 'innodb_log_checksums' is a GLOBAL variable and should be set with SET GLOBAL
+SELECT @@global.innodb_log_checksums;
+@@global.innodb_log_checksums
+1
+SET GLOBAL innodb_log_checksums = OFF;
+SELECT @@global.innodb_log_checksums;
+@@global.innodb_log_checksums
+0
+SET GLOBAL innodb_log_checksums = default;
+SET GLOBAL innodb_log_checksums = ON;
+SELECT @@global.innodb_log_checksums;
+@@global.innodb_log_checksums
+1
+SET GLOBAL innodb_log_checksums = @orig;
+SELECT @@global.innodb_log_checksums;
+@@global.innodb_log_checksums
+1
diff --git a/mysql-test/suite/sys_vars/r/innodb_log_write_ahead_size_basic.result b/mysql-test/suite/sys_vars/r/innodb_log_write_ahead_size_basic.result
new file mode 100644
index 00000000000..5c9eb69de50
--- /dev/null
+++ b/mysql-test/suite/sys_vars/r/innodb_log_write_ahead_size_basic.result
@@ -0,0 +1,88 @@
+SET @start_global_value = @@global.innodb_log_write_ahead_size;
+SET global innodb_log_write_ahead_size=4096;
+Valid values are positive number
+SELECT @@global.innodb_log_write_ahead_size >= 512;
+@@global.innodb_log_write_ahead_size >= 512
+1
+SELECT @@global.innodb_log_write_ahead_size <= 16*1024;
+@@global.innodb_log_write_ahead_size <= 16*1024
+1
+SELECT @@session.innodb_log_write_ahead_size;
+ERROR HY000: Variable 'innodb_log_write_ahead_size' is a GLOBAL variable
+SHOW global variables LIKE 'innodb_log_write_ahead_size';
+Variable_name	Value
+innodb_log_write_ahead_size	4096
+SHOW session variables LIKE 'innodb_log_write_ahead_size';
+Variable_name	Value
+innodb_log_write_ahead_size	4096
+SELECT * FROM information_schema.global_variables
+WHERE variable_name='innodb_log_write_ahead_size';
+VARIABLE_NAME	VARIABLE_VALUE
+INNODB_LOG_WRITE_AHEAD_SIZE	4096
+SELECT * FROM information_schema.session_variables
+WHERE variable_name='innodb_log_write_ahead_size';
+VARIABLE_NAME	VARIABLE_VALUE
+INNODB_LOG_WRITE_AHEAD_SIZE	4096
+SET global innodb_log_write_ahead_size=1024;
+SELECT @@global.innodb_log_write_ahead_size;
+@@global.innodb_log_write_ahead_size
+1024
+SELECT * FROM information_schema.global_variables
+WHERE variable_name='innodb_log_write_ahead_size';
+VARIABLE_NAME	VARIABLE_VALUE
+INNODB_LOG_WRITE_AHEAD_SIZE	1024
+SELECT * FROM information_schema.session_variables
+WHERE variable_name='innodb_log_write_ahead_size';
+VARIABLE_NAME	VARIABLE_VALUE
+INNODB_LOG_WRITE_AHEAD_SIZE	1024
+SET session innodb_log_write_ahead_size=2048;
+ERROR HY000: Variable 'innodb_log_write_ahead_size' is a GLOBAL variable and should be set with SET GLOBAL
+SET global innodb_log_write_ahead_size=512;
+SELECT @@global.innodb_log_write_ahead_size;
+@@global.innodb_log_write_ahead_size
+512
+SET global innodb_log_write_ahead_size=2048;
+SELECT @@global.innodb_log_write_ahead_size;
+@@global.innodb_log_write_ahead_size
+2048
+SET global innodb_log_write_ahead_size=4096;
+SELECT @@global.innodb_log_write_ahead_size;
+@@global.innodb_log_write_ahead_size
+4096
+SET global innodb_log_write_ahead_size=0;
+Warnings:
+Warning	1292	Truncated incorrect innodb_log_write_ahead_size value: '0'
+SELECT @@global.innodb_log_write_ahead_size;
+@@global.innodb_log_write_ahead_size
+512
+SET global innodb_log_write_ahead_size=-1024;
+Warnings:
+Warning	1292	Truncated incorrect innodb_log_write_ahead_size value: '-1024'
+SELECT @@global.innodb_log_write_ahead_size;
+@@global.innodb_log_write_ahead_size
+512
+SET global innodb_log_write_ahead_size=3000;
+Warnings:
+Warning	1292	Truncated incorrect innodb_log_write_ahead_size value: '3000'
+Warning	1210	innodb_log_write_ahead_size should be set 2^n value and larger than 512.
+Warning	1210	Setting innodb_log_write_ahead_size to 4096
+SELECT @@global.innodb_log_write_ahead_size;
+@@global.innodb_log_write_ahead_size
+4096
+SET global innodb_log_write_ahead_size=1.1;
+ERROR 42000: Incorrect argument type to variable 'innodb_log_write_ahead_size'
+SET global innodb_log_write_ahead_size=1e1;
+ERROR 42000: Incorrect argument type to variable 'innodb_log_write_ahead_size'
+SET global innodb_log_write_ahead_size="foo";
+ERROR 42000: Incorrect argument type to variable 'innodb_log_write_ahead_size'
+SET global innodb_log_write_ahead_size=-7;
+Warnings:
+Warning	1292	Truncated incorrect innodb_log_write_ahead_size value: '-7'
+SELECT @@global.innodb_log_write_ahead_size;
+@@global.innodb_log_write_ahead_size
+512
+SELECT * FROM information_schema.global_variables
+WHERE variable_name='innodb_log_write_ahead_size';
+VARIABLE_NAME	VARIABLE_VALUE
+INNODB_LOG_WRITE_AHEAD_SIZE	512
+SET @@global.innodb_log_write_ahead_size = @start_global_value;
diff --git a/mysql-test/suite/sys_vars/r/innodb_max_dirty_pages_pct_basic.result b/mysql-test/suite/sys_vars/r/innodb_max_dirty_pages_pct_basic.result
index d705624eb53..13ae9821752 100644
--- a/mysql-test/suite/sys_vars/r/innodb_max_dirty_pages_pct_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_max_dirty_pages_pct_basic.result
@@ -2,11 +2,17 @@ SET @global_start_value = @@global.innodb_max_dirty_pages_pct;
 SELECT @global_start_value;
 @global_start_value
 75
+SET @global_start_max_dirty_lwm_value = @@global.innodb_max_dirty_pages_pct_lwm;
+SELECT @global_start_max_dirty_lwm_value;
+@global_start_max_dirty_lwm_value
+0
+SET @@global.innodb_max_dirty_pages_pct_lwm = 0;
+SELECT @@global.innodb_max_dirty_pages_pct_lwm;
+@@global.innodb_max_dirty_pages_pct_lwm
+0.000000
 '#--------------------FN_DYNVARS_046_01------------------------#'
 SET @@global.innodb_max_dirty_pages_pct = 0;
-Warnings:
-Warning	1292	Truncated incorrect innodb_max_dirty_pages_pct value: '0'
-SET @@global.innodb_max_dirty_pages_pct = @global_start_value;
+SET @@global.innodb_max_dirty_pages_pct = DEFAULT;
 SELECT @@global.innodb_max_dirty_pages_pct;
 @@global.innodb_max_dirty_pages_pct
 75.000000
@@ -19,18 +25,14 @@ SELECT @@innodb_max_dirty_pages_pct;
 SELECT local.innodb_max_dirty_pages_pct;
 ERROR 42S02: Unknown table 'local' in field list
 SET global innodb_max_dirty_pages_pct = 0;
-Warnings:
-Warning	1292	Truncated incorrect innodb_max_dirty_pages_pct value: '0'
 SELECT @@global.innodb_max_dirty_pages_pct;
 @@global.innodb_max_dirty_pages_pct
-0.001000
+0.000000
 '#--------------------FN_DYNVARS_046_03------------------------#'
-SET @@global.innodb_max_dirty_pages_pct = 0;
-Warnings:
-Warning	1292	Truncated incorrect innodb_max_dirty_pages_pct value: '0'
+SET @@global.innodb_max_dirty_pages_pct = 0.0;
 SELECT @@global.innodb_max_dirty_pages_pct;
 @@global.innodb_max_dirty_pages_pct
-0.001000
+0.000000
 SET @@global.innodb_max_dirty_pages_pct = 1;
 SELECT @@global.innodb_max_dirty_pages_pct;
 @@global.innodb_max_dirty_pages_pct
@@ -39,30 +41,94 @@ SET @@global.innodb_max_dirty_pages_pct = 99;
 SELECT @@global.innodb_max_dirty_pages_pct;
 @@global.innodb_max_dirty_pages_pct
 99.000000
-'#--------------------FN_DYNVARS_046_04-------------------------#'
+'#--------------------FN_DYNVARS_046_04------------------------#'
+SET @@global.innodb_max_dirty_pages_pct_lwm = @global_start_value - 1;
+SELECT @@global.innodb_max_dirty_pages_pct_lwm;
+@@global.innodb_max_dirty_pages_pct_lwm
+74.000000
+SET @@global.innodb_max_dirty_pages_pct = @global_start_value - 2;
+Warnings:
+Warning	1210	innodb_max_dirty_pages_pct cannot be set lower than innodb_max_dirty_pages_pct_lwm.
+Warning	1210	Lowering innodb_max_dirty_page_pct_lwm to 73.000000
+SELECT @@global.innodb_max_dirty_pages_pct;
+@@global.innodb_max_dirty_pages_pct
+73.000000
+'#--------------------FN_DYNVARS_046_05-------------------------#'
 SET @@global.innodb_max_dirty_pages_pct = -1;
 Warnings:
 Warning	1292	Truncated incorrect innodb_max_dirty_pages_pct value: '-1'
+Warning	1210	innodb_max_dirty_pages_pct cannot be set lower than innodb_max_dirty_pages_pct_lwm.
+Warning	1210	Lowering innodb_max_dirty_page_pct_lwm to 0.000000
 SELECT @@global.innodb_max_dirty_pages_pct;
 @@global.innodb_max_dirty_pages_pct
-0.001000
+0.000000
+SET @@global.innodb_max_dirty_pages_pct = -1024;
+Warnings:
+Warning	1292	Truncated incorrect innodb_max_dirty_pages_pct value: '-1024'
+SELECT @@global.innodb_max_dirty_pages_pct;
+@@global.innodb_max_dirty_pages_pct
+0.000000
 SET @@global.innodb_max_dirty_pages_pct = "T";
 ERROR 42000: Incorrect argument type to variable 'innodb_max_dirty_pages_pct'
 SELECT @@global.innodb_max_dirty_pages_pct;
 @@global.innodb_max_dirty_pages_pct
-0.001000
+0.000000
 SET @@global.innodb_max_dirty_pages_pct = "Y";
 ERROR 42000: Incorrect argument type to variable 'innodb_max_dirty_pages_pct'
 SELECT @@global.innodb_max_dirty_pages_pct;
 @@global.innodb_max_dirty_pages_pct
-0.001000
+0.000000
+SET @@global.innodb_max_dirty_pages_pct = 100;
+Warnings:
+Warning	1292	Truncated incorrect innodb_max_dirty_pages_pct value: '100'
+SELECT @@global.innodb_max_dirty_pages_pct;
+@@global.innodb_max_dirty_pages_pct
+99.999000
 SET @@global.innodb_max_dirty_pages_pct = 1001;
 Warnings:
 Warning	1292	Truncated incorrect innodb_max_dirty_pages_pct value: '1001'
 SELECT @@global.innodb_max_dirty_pages_pct;
 @@global.innodb_max_dirty_pages_pct
 99.999000
-'#----------------------FN_DYNVARS_046_05------------------------#'
+SET @@global.innodb_max_dirty_pages_pct = 100000;
+Warnings:
+Warning	1292	Truncated incorrect innodb_max_dirty_pages_pct value: '100000'
+SELECT @@global.innodb_max_dirty_pages_pct;
+@@global.innodb_max_dirty_pages_pct
+99.999000
+SET @@global.innodb_max_dirty_pages_pct = ' ';
+ERROR 42000: Incorrect argument type to variable 'innodb_max_dirty_pages_pct'
+SELECT @@global.innodb_max_dirty_pages_pct;
+@@global.innodb_max_dirty_pages_pct
+99.999000
+SET @@global.innodb_max_dirty_pages_pct = " ";
+ERROR 42000: Incorrect argument type to variable 'innodb_max_dirty_pages_pct'
+SELECT @@global.innodb_max_dirty_pages_pct;
+@@global.innodb_max_dirty_pages_pct
+99.999000
+SET @@global.innodb_max_dirty_pages_pct = 1.1;
+SELECT @@global.innodb_max_dirty_pages_pct;
+@@global.innodb_max_dirty_pages_pct
+1.100000
+set global innodb_max_dirty_pages_pct = 0.1;
+SELECT @@global.innodb_max_dirty_pages_pct;
+@@global.innodb_max_dirty_pages_pct
+0.100000
+set global innodb_max_dirty_pages_pct = 31.34;
+SELECT @@global.innodb_max_dirty_pages_pct;
+@@global.innodb_max_dirty_pages_pct
+31.340000
+set global innodb_max_dirty_pages_pct = 100;
+Warnings:
+Warning	1292	Truncated incorrect innodb_max_dirty_pages_pct value: '100'
+SELECT @@global.innodb_max_dirty_pages_pct;
+@@global.innodb_max_dirty_pages_pct
+99.999000
+set global innodb_max_dirty_pages_pct = 99.999;
+SELECT @@global.innodb_max_dirty_pages_pct;
+@@global.innodb_max_dirty_pages_pct
+99.999000
+'#----------------------FN_DYNVARS_046_06------------------------#'
 SELECT @@global.innodb_max_dirty_pages_pct =
 VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
 WHERE VARIABLE_NAME='innodb_max_dirty_pages_pct';
@@ -76,7 +142,7 @@ SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
 WHERE VARIABLE_NAME='innodb_max_dirty_pages_pct';
 VARIABLE_VALUE
 99.999000
-'#---------------------FN_DYNVARS_046_06-------------------------#'
+'#---------------------FN_DYNVARS_046_07-------------------------#'
 SET @@global.innodb_max_dirty_pages_pct = OFF;
 ERROR 42000: Incorrect argument type to variable 'innodb_max_dirty_pages_pct'
 SELECT @@global.innodb_max_dirty_pages_pct;
@@ -87,18 +153,20 @@ ERROR 42000: Incorrect argument type to variable 'innodb_max_dirty_pages_pct'
 SELECT @@global.innodb_max_dirty_pages_pct;
 @@global.innodb_max_dirty_pages_pct
 99.999000
-'#---------------------FN_DYNVARS_046_07----------------------#'
+'#---------------------FN_DYNVARS_046_08----------------------#'
 SET @@global.innodb_max_dirty_pages_pct = TRUE;
 SELECT @@global.innodb_max_dirty_pages_pct;
 @@global.innodb_max_dirty_pages_pct
 1.000000
 SET @@global.innodb_max_dirty_pages_pct = FALSE;
-Warnings:
-Warning	1292	Truncated incorrect innodb_max_dirty_pages_pct value: '0'
 SELECT @@global.innodb_max_dirty_pages_pct;
 @@global.innodb_max_dirty_pages_pct
-0.001000
+0.000000
 SET @@global.innodb_max_dirty_pages_pct = @global_start_value;
 SELECT @@global.innodb_max_dirty_pages_pct;
 @@global.innodb_max_dirty_pages_pct
 75.000000
+SET @@global.innodb_max_dirty_pages_pct_lwm = @global_start_max_dirty_lwm_value;
+SELECT @@global.innodb_max_dirty_pages_pct_lwm;
+@@global.innodb_max_dirty_pages_pct_lwm
+0.000000
diff --git a/mysql-test/suite/sys_vars/r/innodb_max_dirty_pages_pct_func.result b/mysql-test/suite/sys_vars/r/innodb_max_dirty_pages_pct_func.result
index eb0de047df1..8b68f182789 100644
--- a/mysql-test/suite/sys_vars/r/innodb_max_dirty_pages_pct_func.result
+++ b/mysql-test/suite/sys_vars/r/innodb_max_dirty_pages_pct_func.result
@@ -1,19 +1,26 @@
 SET @innodb_max_dirty_pages_pct = @@global.innodb_max_dirty_pages_pct;
 '#--------------------FN_DYNVARS_044_02-------------------------#'
 SET @@global.innodb_max_dirty_pages_pct = 80;
+'connect (con1,localhost,root,,,,)'
 connect  con1,localhost,root,,,,;
+'connection con1'
 connection con1;
 SELECT @@global.innodb_max_dirty_pages_pct;
 @@global.innodb_max_dirty_pages_pct
 80.000000
 SET @@global.innodb_max_dirty_pages_pct = 70;
+'connect (con2,localhost,root,,,,)'
 connect  con2,localhost,root,,,,;
+'connection con2'
 connection con2;
 SELECT @@global.innodb_max_dirty_pages_pct;
 @@global.innodb_max_dirty_pages_pct
 70.000000
+'connection default'
 connection default;
+'disconnect con2'
 disconnect con2;
+'disconnect con1'
 disconnect con1;
 SET @@global.innodb_max_dirty_pages_pct = @innodb_max_dirty_pages_pct;
 '#--------------------FN_DYNVARS_044_02-------------------------#'
diff --git a/mysql-test/suite/sys_vars/r/innodb_max_dirty_pages_pct_lwm_basic.result b/mysql-test/suite/sys_vars/r/innodb_max_dirty_pages_pct_lwm_basic.result
index 676ec103664..b6394d03b46 100644
--- a/mysql-test/suite/sys_vars/r/innodb_max_dirty_pages_pct_lwm_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_max_dirty_pages_pct_lwm_basic.result
@@ -1,23 +1,23 @@
 SET @pct_lwm_start_value = @@global.innodb_max_dirty_pages_pct_lwm;
 SELECT @pct_lwm_start_value;
 @pct_lwm_start_value
-0.001
+0
 SET @pct_start_value = @@global.innodb_max_dirty_pages_pct;
 SELECT @pct_start_value;
 @pct_start_value
 75
 '#--------------------FN_DYNVARS_046_01------------------------#'
 SET @@global.innodb_max_dirty_pages_pct_lwm = 0;
-SET @@global.innodb_max_dirty_pages_pct_lwm = @pct_lwm_start_value;
+SET @@global.innodb_max_dirty_pages_pct_lwm = DEFAULT;
 SELECT @@global.innodb_max_dirty_pages_pct_lwm;
 @@global.innodb_max_dirty_pages_pct_lwm
-0.001000
+0.000000
 '#---------------------FN_DYNVARS_046_02-------------------------#'
 SET innodb_max_dirty_pages_pct_lwm = 1;
 ERROR HY000: Variable 'innodb_max_dirty_pages_pct_lwm' is a GLOBAL variable and should be set with SET GLOBAL
 SELECT @@innodb_max_dirty_pages_pct_lwm;
 @@innodb_max_dirty_pages_pct_lwm
-0.001000
+0.000000
 SELECT local.innodb_max_dirty_pages_pct_lwm;
 ERROR 42S02: Unknown table 'local' in field list
 SET global innodb_max_dirty_pages_pct_lwm = 0;
@@ -57,6 +57,18 @@ Warning	1210	Setting innodb_max_dirty_page_pct_lwm to 75.000000
 SELECT @@global.innodb_max_dirty_pages_pct_lwm;
 @@global.innodb_max_dirty_pages_pct_lwm
 75.000000
+SET @@global.innodb_max_dirty_pages_pct_lwm = 0.0;
+SELECT @@global.innodb_max_dirty_pages_pct_lwm;
+@@global.innodb_max_dirty_pages_pct_lwm
+0.000000
+SET @@global.innodb_max_dirty_pages_pct_lwm = 1.1;
+SELECT @@global.innodb_max_dirty_pages_pct_lwm;
+@@global.innodb_max_dirty_pages_pct_lwm
+1.100000
+SET @@global.innodb_max_dirty_pages_pct_lwm = 51.12;
+SELECT @@global.innodb_max_dirty_pages_pct_lwm;
+@@global.innodb_max_dirty_pages_pct_lwm
+51.120000
 SET @@global.innodb_max_dirty_pages_pct_lwm = 100;
 Warnings:
 Warning	1292	Truncated incorrect innodb_max_dirty_pages_pct_lwm value: '100'
@@ -65,6 +77,16 @@ Warning	1210	Setting innodb_max_dirty_page_pct_lwm to 75.000000
 SELECT @@global.innodb_max_dirty_pages_pct_lwm;
 @@global.innodb_max_dirty_pages_pct_lwm
 75.000000
+SET @@global.innodb_max_dirty_pages_pct_lwm = " ";
+ERROR 42000: Incorrect argument type to variable 'innodb_max_dirty_pages_pct_lwm'
+SELECT @@global.innodb_max_dirty_pages_pct_lwm;
+@@global.innodb_max_dirty_pages_pct_lwm
+75.000000
+SET @@global.innodb_max_dirty_pages_pct_lwm = ' ';
+ERROR 42000: Incorrect argument type to variable 'innodb_max_dirty_pages_pct_lwm'
+SELECT @@global.innodb_max_dirty_pages_pct_lwm;
+@@global.innodb_max_dirty_pages_pct_lwm
+75.000000
 '#----------------------FN_DYNVARS_046_05------------------------#'
 SELECT @@global.innodb_max_dirty_pages_pct_lwm =
 VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
@@ -106,4 +128,4 @@ SELECT @@global.innodb_max_dirty_pages_pct;
 SET @@global.innodb_max_dirty_pages_pct_lwm = @pct_lwm_start_value;
 SELECT @@global.innodb_max_dirty_pages_pct_lwm;
 @@global.innodb_max_dirty_pages_pct_lwm
-0.001000
+0.000000
diff --git a/mysql-test/suite/sys_vars/r/innodb_max_purge_lag_basic.result b/mysql-test/suite/sys_vars/r/innodb_max_purge_lag_basic.result
index a01d2f2dd0c..bf526fc1c3d 100644
--- a/mysql-test/suite/sys_vars/r/innodb_max_purge_lag_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_max_purge_lag_basic.result
@@ -33,13 +33,32 @@ SET @@global.innodb_max_purge_lag = 4294967295;
 SELECT @@global.innodb_max_purge_lag;
 @@global.innodb_max_purge_lag
 4294967295
-'#--------------------FN_DYNVARS_046_04-------------------------#'
+'#--------------------FN_DYNVARS_046_04------------------------#'
+SET @@global.innodb_max_purge_lag = 4294967296;
+SELECT @@global.innodb_max_purge_lag IN (4294967296,4294967295);
+@@global.innodb_max_purge_lag IN (4294967296,4294967295)
+1
+SET @@global.innodb_max_purge_lag = 12345678901;
+SELECT @@global.innodb_max_purge_lag IN (12345678901,4294967295);
+@@global.innodb_max_purge_lag IN (12345678901,4294967295)
+1
+SET @@global.innodb_max_purge_lag = 18446744073709551615;
+SELECT @@global.innodb_max_purge_lag IN (18446744073709551615,4294967295);
+@@global.innodb_max_purge_lag IN (18446744073709551615,4294967295)
+1
+'#--------------------FN_DYNVARS_046_05-------------------------#'
 SET @@global.innodb_max_purge_lag = -1;
 Warnings:
 Warning	1292	Truncated incorrect innodb_max_purge_lag value: '-1'
 SELECT @@global.innodb_max_purge_lag;
 @@global.innodb_max_purge_lag
 0
+SET @@global.innodb_max_purge_lag = -1024;
+Warnings:
+Warning	1292	Truncated incorrect innodb_max_purge_lag value: '-1024'
+SELECT @@global.innodb_max_purge_lag;
+@@global.innodb_max_purge_lag
+0
 SET @@global.innodb_max_purge_lag = "T";
 ERROR 42000: Incorrect argument type to variable 'innodb_max_purge_lag'
 SELECT @@global.innodb_max_purge_lag;
@@ -50,11 +69,22 @@ ERROR 42000: Incorrect argument type to variable 'innodb_max_purge_lag'
 SELECT @@global.innodb_max_purge_lag;
 @@global.innodb_max_purge_lag
 0
-SET @@global.innodb_max_purge_lag = 1001;
+SET @@global.innodb_max_purge_lag = 1.1;
+ERROR 42000: Incorrect argument type to variable 'innodb_max_purge_lag'
 SELECT @@global.innodb_max_purge_lag;
 @@global.innodb_max_purge_lag
-1001
-'#----------------------FN_DYNVARS_046_05------------------------#'
+0
+SET @@global.innodb_max_purge_lag = ' ';
+ERROR 42000: Incorrect argument type to variable 'innodb_max_purge_lag'
+SELECT @@global.innodb_max_purge_lag;
+@@global.innodb_max_purge_lag
+0
+SET @@global.innodb_max_purge_lag = " ";
+ERROR 42000: Incorrect argument type to variable 'innodb_max_purge_lag'
+SELECT @@global.innodb_max_purge_lag;
+@@global.innodb_max_purge_lag
+0
+'#----------------------FN_DYNVARS_046_06------------------------#'
 SELECT @@global.innodb_max_purge_lag =
 VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
 WHERE VARIABLE_NAME='innodb_max_purge_lag';
@@ -63,23 +93,23 @@ VARIABLE_VALUE
 1
 SELECT @@global.innodb_max_purge_lag;
 @@global.innodb_max_purge_lag
-1001
+0
 SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
 WHERE VARIABLE_NAME='innodb_max_purge_lag';
 VARIABLE_VALUE
-1001
-'#---------------------FN_DYNVARS_046_06-------------------------#'
+0
+'#---------------------FN_DYNVARS_046_07-------------------------#'
 SET @@global.innodb_max_purge_lag = OFF;
 ERROR 42000: Incorrect argument type to variable 'innodb_max_purge_lag'
 SELECT @@global.innodb_max_purge_lag;
 @@global.innodb_max_purge_lag
-1001
+0
 SET @@global.innodb_max_purge_lag = ON;
 ERROR 42000: Incorrect argument type to variable 'innodb_max_purge_lag'
 SELECT @@global.innodb_max_purge_lag;
 @@global.innodb_max_purge_lag
-1001
-'#---------------------FN_DYNVARS_046_07----------------------#'
+0
+'#---------------------FN_DYNVARS_046_08----------------------#'
 SET @@global.innodb_max_purge_lag = TRUE;
 SELECT @@global.innodb_max_purge_lag;
 @@global.innodb_max_purge_lag
diff --git a/mysql-test/suite/sys_vars/r/innodb_max_undo_log_size_basic.result b/mysql-test/suite/sys_vars/r/innodb_max_undo_log_size_basic.result
new file mode 100644
index 00000000000..3854060b33b
--- /dev/null
+++ b/mysql-test/suite/sys_vars/r/innodb_max_undo_log_size_basic.result
@@ -0,0 +1,54 @@
+'#---------------------BS_STVARS_035_01----------------------#'
+SELECT COUNT(@@GLOBAL.innodb_max_undo_log_size);
+COUNT(@@GLOBAL.innodb_max_undo_log_size)
+1
+1 Expected
+'#---------------------BS_STVARS_035_02----------------------#'
+SET @@GLOBAL.innodb_max_undo_log_size=1073741824;
+SELECT COUNT(@@GLOBAL.innodb_max_undo_log_size);
+COUNT(@@GLOBAL.innodb_max_undo_log_size)
+1
+1 Expected
+SET @@GLOBAL.innodb_max_undo_log_size=18446744073709551615;
+SELECT @@GLOBAL.innodb_max_undo_log_size;
+@@GLOBAL.innodb_max_undo_log_size
+18446744073709551615
+18446744073709551615 Expected
+SET @@GLOBAL.innodb_max_undo_log_size=1073741824;
+'#---------------------BS_STVARS_035_03----------------------#'
+SELECT @@GLOBAL.innodb_max_undo_log_size = VARIABLE_VALUE
+FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
+WHERE VARIABLE_NAME='innodb_max_undo_log_size';
+@@GLOBAL.innodb_max_undo_log_size = VARIABLE_VALUE
+1
+1 Expected
+SELECT COUNT(@@GLOBAL.innodb_max_undo_log_size);
+COUNT(@@GLOBAL.innodb_max_undo_log_size)
+1
+1 Expected
+SELECT COUNT(VARIABLE_VALUE)
+FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
+WHERE VARIABLE_NAME='innodb_max_undo_log_size';
+COUNT(VARIABLE_VALUE)
+1
+1 Expected
+'#---------------------BS_STVARS_035_04----------------------#'
+SELECT @@innodb_max_undo_log_size = @@GLOBAL.innodb_max_undo_log_size;
+@@innodb_max_undo_log_size = @@GLOBAL.innodb_max_undo_log_size
+1
+1 Expected
+'#---------------------BS_STVARS_035_05----------------------#'
+SELECT COUNT(@@innodb_max_undo_log_size);
+COUNT(@@innodb_max_undo_log_size)
+1
+1 Expected
+SELECT COUNT(@@local.innodb_max_undo_log_size);
+ERROR HY000: Variable 'innodb_max_undo_log_size' is a GLOBAL variable
+Expected error 'Variable is a GLOBAL variable'
+SELECT COUNT(@@SESSION.innodb_max_undo_log_size);
+ERROR HY000: Variable 'innodb_max_undo_log_size' is a GLOBAL variable
+Expected error 'Variable is a GLOBAL variable'
+SELECT COUNT(@@GLOBAL.innodb_max_undo_log_size);
+COUNT(@@GLOBAL.innodb_max_undo_log_size)
+1
+1 Expected
diff --git a/mysql-test/suite/sys_vars/r/innodb_merge_threshold_set_all_debug_basic.result b/mysql-test/suite/sys_vars/r/innodb_merge_threshold_set_all_debug_basic.result
new file mode 100644
index 00000000000..6e325d0be38
--- /dev/null
+++ b/mysql-test/suite/sys_vars/r/innodb_merge_threshold_set_all_debug_basic.result
@@ -0,0 +1,28 @@
+#
+# Basic test for innodb_merge_threshold_set_all_debug
+#
+SELECT @@global.innodb_merge_threshold_set_all_debug;
+@@global.innodb_merge_threshold_set_all_debug
+50
+set global innodb_merge_threshold_set_all_debug = 1;
+SELECT @@global.innodb_merge_threshold_set_all_debug;
+@@global.innodb_merge_threshold_set_all_debug
+1
+set global innodb_merge_threshold_set_all_debug = 51;
+Warnings:
+Warning	1292	Truncated incorrect innodb_merge_threshold_set_all_d value: '51'
+SELECT @@global.innodb_merge_threshold_set_all_debug;
+@@global.innodb_merge_threshold_set_all_debug
+50
+set global innodb_merge_threshold_set_all_debug = 0;
+Warnings:
+Warning	1292	Truncated incorrect innodb_merge_threshold_set_all_d value: '0'
+SELECT @@global.innodb_merge_threshold_set_all_debug;
+@@global.innodb_merge_threshold_set_all_debug
+1
+set innodb_merge_threshold_set_all_debug = 50;
+ERROR HY000: Variable 'innodb_merge_threshold_set_all_debug' is a GLOBAL variable and should be set with SET GLOBAL
+set global innodb_merge_threshold_set_all_debug = 50;
+SELECT @@global.innodb_merge_threshold_set_all_debug;
+@@global.innodb_merge_threshold_set_all_debug
+50
diff --git a/mysql-test/suite/sys_vars/r/innodb_mirrored_log_groups_basic.result b/mysql-test/suite/sys_vars/r/innodb_mirrored_log_groups_basic.result
deleted file mode 100644
index 1645d8163ae..00000000000
--- a/mysql-test/suite/sys_vars/r/innodb_mirrored_log_groups_basic.result
+++ /dev/null
@@ -1,53 +0,0 @@
-'#---------------------BS_STVARS_037_01----------------------#'
-SELECT COUNT(@@GLOBAL.innodb_mirrored_log_groups);
-COUNT(@@GLOBAL.innodb_mirrored_log_groups)
-1
-1 Expected
-'#---------------------BS_STVARS_037_02----------------------#'
-SET @@GLOBAL.innodb_mirrored_log_groups=1;
-ERROR HY000: Variable 'innodb_mirrored_log_groups' is a read only variable
-Expected error 'Read only variable'
-SELECT COUNT(@@GLOBAL.innodb_mirrored_log_groups);
-COUNT(@@GLOBAL.innodb_mirrored_log_groups)
-1
-1 Expected
-'#---------------------BS_STVARS_037_03----------------------#'
-SELECT @@GLOBAL.innodb_mirrored_log_groups = VARIABLE_VALUE
-FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
-WHERE VARIABLE_NAME='innodb_mirrored_log_groups';
-@@GLOBAL.innodb_mirrored_log_groups = VARIABLE_VALUE
-1
-1 Expected
-SELECT COUNT(@@GLOBAL.innodb_mirrored_log_groups);
-COUNT(@@GLOBAL.innodb_mirrored_log_groups)
-1
-1 Expected
-SELECT COUNT(VARIABLE_VALUE)
-FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES 
-WHERE VARIABLE_NAME='innodb_mirrored_log_groups';
-COUNT(VARIABLE_VALUE)
-1
-1 Expected
-'#---------------------BS_STVARS_037_04----------------------#'
-SELECT @@innodb_mirrored_log_groups = @@GLOBAL.innodb_mirrored_log_groups;
-@@innodb_mirrored_log_groups = @@GLOBAL.innodb_mirrored_log_groups
-1
-1 Expected
-'#---------------------BS_STVARS_037_05----------------------#'
-SELECT COUNT(@@innodb_mirrored_log_groups);
-COUNT(@@innodb_mirrored_log_groups)
-1
-1 Expected
-SELECT COUNT(@@local.innodb_mirrored_log_groups);
-ERROR HY000: Variable 'innodb_mirrored_log_groups' is a GLOBAL variable
-Expected error 'Variable is a GLOBAL variable'
-SELECT COUNT(@@SESSION.innodb_mirrored_log_groups);
-ERROR HY000: Variable 'innodb_mirrored_log_groups' is a GLOBAL variable
-Expected error 'Variable is a GLOBAL variable'
-SELECT COUNT(@@GLOBAL.innodb_mirrored_log_groups);
-COUNT(@@GLOBAL.innodb_mirrored_log_groups)
-1
-1 Expected
-SELECT innodb_mirrored_log_groups = @@SESSION.innodb_mirrored_log_groups;
-ERROR 42S22: Unknown column 'innodb_mirrored_log_groups' in 'field list'
-Expected error 'Readonly variable'
diff --git a/mysql-test/suite/sys_vars/r/innodb_monitor_disable_basic.result b/mysql-test/suite/sys_vars/r/innodb_monitor_disable_basic.result
index 6c7051dc3d0..3764b00688b 100644
--- a/mysql-test/suite/sys_vars/r/innodb_monitor_disable_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_monitor_disable_basic.result
@@ -4,7 +4,6 @@ name	status
 metadata_table_handles_opened	disabled
 metadata_table_handles_closed	disabled
 metadata_table_reference_count	disabled
-metadata_mem_pool_size	disabled
 lock_deadlocks	disabled
 lock_timeouts	disabled
 lock_rec_lock_waits	disabled
@@ -47,7 +46,6 @@ buffer_data_written	disabled
 buffer_flush_batch_scanned	disabled
 buffer_flush_batch_num_scan	disabled
 buffer_flush_batch_scanned_per_call	disabled
-buffer_flush_batch_rescan	disabled
 buffer_flush_batch_total_pages	disabled
 buffer_flush_batches	disabled
 buffer_flush_batch_pages	disabled
@@ -55,6 +53,19 @@ buffer_flush_neighbor_total_pages	disabled
 buffer_flush_neighbor	disabled
 buffer_flush_neighbor_pages	disabled
 buffer_flush_n_to_flush_requested	disabled
+buffer_flush_n_to_flush_by_age	disabled
+buffer_flush_adaptive_avg_time_slot	disabled
+buffer_LRU_batch_flush_avg_time_slot	disabled
+buffer_flush_adaptive_avg_time_thread	disabled
+buffer_LRU_batch_flush_avg_time_thread	disabled
+buffer_flush_adaptive_avg_time_est	disabled
+buffer_LRU_batch_flush_avg_time_est	disabled
+buffer_flush_avg_time	disabled
+buffer_flush_adaptive_avg_pass	disabled
+buffer_LRU_batch_flush_avg_pass	disabled
+buffer_flush_avg_pass	disabled
+buffer_LRU_get_free_loops	disabled
+buffer_LRU_get_free_waits	disabled
 buffer_flush_avg_page_rate	disabled
 buffer_flush_lsn_avg_rate	disabled
 buffer_flush_pct_for_dirty	disabled
@@ -157,12 +168,13 @@ log_lsn_checkpoint_age	disabled
 log_lsn_buf_pool_oldest	disabled
 log_max_modified_age_async	disabled
 log_max_modified_age_sync	disabled
-log_pending_log_writes	disabled
+log_pending_log_flushes	disabled
 log_pending_checkpoint_writes	disabled
 log_num_log_io	disabled
 log_waits	disabled
 log_write_requests	disabled
 log_writes	disabled
+log_padded	disabled
 compress_pages_compressed	disabled
 compress_pages_decompressed	disabled
 compression_pad_increments	disabled
@@ -223,10 +235,13 @@ innodb_dblwr_pages_written	disabled
 innodb_page_size	disabled
 innodb_rwlock_s_spin_waits	disabled
 innodb_rwlock_x_spin_waits	disabled
+innodb_rwlock_sx_spin_waits	disabled
 innodb_rwlock_s_spin_rounds	disabled
 innodb_rwlock_x_spin_rounds	disabled
+innodb_rwlock_sx_spin_rounds	disabled
 innodb_rwlock_s_os_waits	disabled
 innodb_rwlock_x_os_waits	disabled
+innodb_rwlock_sx_os_waits	disabled
 dml_reads	disabled
 dml_inserts	disabled
 dml_deletes	disabled
@@ -239,6 +254,8 @@ ddl_background_drop_indexes	disabled
 ddl_background_drop_tables	disabled
 ddl_online_create_index	disabled
 ddl_pending_alter_table	disabled
+ddl_sort_file_alter_table	disabled
+ddl_log_file_alter_table	disabled
 icp_attempts	disabled
 icp_no_match	disabled
 icp_out_of_range	disabled
@@ -280,10 +297,13 @@ lock_row_lock_waits	disabled
 lock_row_lock_time_avg	disabled
 innodb_rwlock_s_spin_waits	disabled
 innodb_rwlock_x_spin_waits	disabled
+innodb_rwlock_sx_spin_waits	disabled
 innodb_rwlock_s_spin_rounds	disabled
 innodb_rwlock_x_spin_rounds	disabled
+innodb_rwlock_sx_spin_rounds	disabled
 innodb_rwlock_s_os_waits	disabled
 innodb_rwlock_x_os_waits	disabled
+innodb_rwlock_sx_os_waits	disabled
 set global innodb_monitor_enable = "%lock*";
 ERROR 42000: Variable 'innodb_monitor_enable' can't be set to the value of '%lock*'
 set global innodb_monitor_enable="%%%%%%%%%%%%%%%%%%%%%%%%%%%";
@@ -408,7 +428,6 @@ name	max_count	min_count	count	max_count_reset	min_count_reset	count_reset	statu
 metadata_table_handles_opened	2	NULL	2	2	NULL	2	enabled
 metadata_table_handles_closed	1	NULL	1	1	NULL	1	enabled
 metadata_table_reference_count	NULL	NULL	0	NULL	NULL	0	disabled
-metadata_mem_pool_size	NULL	NULL	0	NULL	NULL	0	disabled
 set global innodb_monitor_disable = module_metadata;
 set global innodb_monitor_reset = module_metadata;
 select name, max_count, min_count, count,
@@ -419,7 +438,6 @@ name	max_count	min_count	count	max_count_reset	min_count_reset	count_reset	statu
 metadata_table_handles_opened	2	NULL	2	NULL	NULL	0	disabled
 metadata_table_handles_closed	1	NULL	1	NULL	NULL	0	disabled
 metadata_table_reference_count	NULL	NULL	0	NULL	NULL	0	disabled
-metadata_mem_pool_size	NULL	NULL	0	NULL	NULL	0	disabled
 set global innodb_monitor_reset_all = module_metadata;
 select name, max_count, min_count, count,
 max_count_reset, min_count_reset, count_reset, status
@@ -429,7 +447,6 @@ name	max_count	min_count	count	max_count_reset	min_count_reset	count_reset	statu
 metadata_table_handles_opened	NULL	NULL	0	NULL	NULL	0	disabled
 metadata_table_handles_closed	NULL	NULL	0	NULL	NULL	0	disabled
 metadata_table_reference_count	NULL	NULL	0	NULL	NULL	0	disabled
-metadata_mem_pool_size	NULL	NULL	0	NULL	NULL	0	disabled
 set global innodb_monitor_enable = module_trx;
 begin;
 insert into monitor_test values(9);
diff --git a/mysql-test/suite/sys_vars/r/innodb_monitor_enable_basic.result b/mysql-test/suite/sys_vars/r/innodb_monitor_enable_basic.result
index 6c7051dc3d0..3764b00688b 100644
--- a/mysql-test/suite/sys_vars/r/innodb_monitor_enable_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_monitor_enable_basic.result
@@ -4,7 +4,6 @@ name	status
 metadata_table_handles_opened	disabled
 metadata_table_handles_closed	disabled
 metadata_table_reference_count	disabled
-metadata_mem_pool_size	disabled
 lock_deadlocks	disabled
 lock_timeouts	disabled
 lock_rec_lock_waits	disabled
@@ -47,7 +46,6 @@ buffer_data_written	disabled
 buffer_flush_batch_scanned	disabled
 buffer_flush_batch_num_scan	disabled
 buffer_flush_batch_scanned_per_call	disabled
-buffer_flush_batch_rescan	disabled
 buffer_flush_batch_total_pages	disabled
 buffer_flush_batches	disabled
 buffer_flush_batch_pages	disabled
@@ -55,6 +53,19 @@ buffer_flush_neighbor_total_pages	disabled
 buffer_flush_neighbor	disabled
 buffer_flush_neighbor_pages	disabled
 buffer_flush_n_to_flush_requested	disabled
+buffer_flush_n_to_flush_by_age	disabled
+buffer_flush_adaptive_avg_time_slot	disabled
+buffer_LRU_batch_flush_avg_time_slot	disabled
+buffer_flush_adaptive_avg_time_thread	disabled
+buffer_LRU_batch_flush_avg_time_thread	disabled
+buffer_flush_adaptive_avg_time_est	disabled
+buffer_LRU_batch_flush_avg_time_est	disabled
+buffer_flush_avg_time	disabled
+buffer_flush_adaptive_avg_pass	disabled
+buffer_LRU_batch_flush_avg_pass	disabled
+buffer_flush_avg_pass	disabled
+buffer_LRU_get_free_loops	disabled
+buffer_LRU_get_free_waits	disabled
 buffer_flush_avg_page_rate	disabled
 buffer_flush_lsn_avg_rate	disabled
 buffer_flush_pct_for_dirty	disabled
@@ -157,12 +168,13 @@ log_lsn_checkpoint_age	disabled
 log_lsn_buf_pool_oldest	disabled
 log_max_modified_age_async	disabled
 log_max_modified_age_sync	disabled
-log_pending_log_writes	disabled
+log_pending_log_flushes	disabled
 log_pending_checkpoint_writes	disabled
 log_num_log_io	disabled
 log_waits	disabled
 log_write_requests	disabled
 log_writes	disabled
+log_padded	disabled
 compress_pages_compressed	disabled
 compress_pages_decompressed	disabled
 compression_pad_increments	disabled
@@ -223,10 +235,13 @@ innodb_dblwr_pages_written	disabled
 innodb_page_size	disabled
 innodb_rwlock_s_spin_waits	disabled
 innodb_rwlock_x_spin_waits	disabled
+innodb_rwlock_sx_spin_waits	disabled
 innodb_rwlock_s_spin_rounds	disabled
 innodb_rwlock_x_spin_rounds	disabled
+innodb_rwlock_sx_spin_rounds	disabled
 innodb_rwlock_s_os_waits	disabled
 innodb_rwlock_x_os_waits	disabled
+innodb_rwlock_sx_os_waits	disabled
 dml_reads	disabled
 dml_inserts	disabled
 dml_deletes	disabled
@@ -239,6 +254,8 @@ ddl_background_drop_indexes	disabled
 ddl_background_drop_tables	disabled
 ddl_online_create_index	disabled
 ddl_pending_alter_table	disabled
+ddl_sort_file_alter_table	disabled
+ddl_log_file_alter_table	disabled
 icp_attempts	disabled
 icp_no_match	disabled
 icp_out_of_range	disabled
@@ -280,10 +297,13 @@ lock_row_lock_waits	disabled
 lock_row_lock_time_avg	disabled
 innodb_rwlock_s_spin_waits	disabled
 innodb_rwlock_x_spin_waits	disabled
+innodb_rwlock_sx_spin_waits	disabled
 innodb_rwlock_s_spin_rounds	disabled
 innodb_rwlock_x_spin_rounds	disabled
+innodb_rwlock_sx_spin_rounds	disabled
 innodb_rwlock_s_os_waits	disabled
 innodb_rwlock_x_os_waits	disabled
+innodb_rwlock_sx_os_waits	disabled
 set global innodb_monitor_enable = "%lock*";
 ERROR 42000: Variable 'innodb_monitor_enable' can't be set to the value of '%lock*'
 set global innodb_monitor_enable="%%%%%%%%%%%%%%%%%%%%%%%%%%%";
@@ -408,7 +428,6 @@ name	max_count	min_count	count	max_count_reset	min_count_reset	count_reset	statu
 metadata_table_handles_opened	2	NULL	2	2	NULL	2	enabled
 metadata_table_handles_closed	1	NULL	1	1	NULL	1	enabled
 metadata_table_reference_count	NULL	NULL	0	NULL	NULL	0	disabled
-metadata_mem_pool_size	NULL	NULL	0	NULL	NULL	0	disabled
 set global innodb_monitor_disable = module_metadata;
 set global innodb_monitor_reset = module_metadata;
 select name, max_count, min_count, count,
@@ -419,7 +438,6 @@ name	max_count	min_count	count	max_count_reset	min_count_reset	count_reset	statu
 metadata_table_handles_opened	2	NULL	2	NULL	NULL	0	disabled
 metadata_table_handles_closed	1	NULL	1	NULL	NULL	0	disabled
 metadata_table_reference_count	NULL	NULL	0	NULL	NULL	0	disabled
-metadata_mem_pool_size	NULL	NULL	0	NULL	NULL	0	disabled
 set global innodb_monitor_reset_all = module_metadata;
 select name, max_count, min_count, count,
 max_count_reset, min_count_reset, count_reset, status
@@ -429,7 +447,6 @@ name	max_count	min_count	count	max_count_reset	min_count_reset	count_reset	statu
 metadata_table_handles_opened	NULL	NULL	0	NULL	NULL	0	disabled
 metadata_table_handles_closed	NULL	NULL	0	NULL	NULL	0	disabled
 metadata_table_reference_count	NULL	NULL	0	NULL	NULL	0	disabled
-metadata_mem_pool_size	NULL	NULL	0	NULL	NULL	0	disabled
 set global innodb_monitor_enable = module_trx;
 begin;
 insert into monitor_test values(9);
diff --git a/mysql-test/suite/sys_vars/r/innodb_monitor_reset_all_basic.result b/mysql-test/suite/sys_vars/r/innodb_monitor_reset_all_basic.result
index 6c7051dc3d0..3764b00688b 100644
--- a/mysql-test/suite/sys_vars/r/innodb_monitor_reset_all_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_monitor_reset_all_basic.result
@@ -4,7 +4,6 @@ name	status
 metadata_table_handles_opened	disabled
 metadata_table_handles_closed	disabled
 metadata_table_reference_count	disabled
-metadata_mem_pool_size	disabled
 lock_deadlocks	disabled
 lock_timeouts	disabled
 lock_rec_lock_waits	disabled
@@ -47,7 +46,6 @@ buffer_data_written	disabled
 buffer_flush_batch_scanned	disabled
 buffer_flush_batch_num_scan	disabled
 buffer_flush_batch_scanned_per_call	disabled
-buffer_flush_batch_rescan	disabled
 buffer_flush_batch_total_pages	disabled
 buffer_flush_batches	disabled
 buffer_flush_batch_pages	disabled
@@ -55,6 +53,19 @@ buffer_flush_neighbor_total_pages	disabled
 buffer_flush_neighbor	disabled
 buffer_flush_neighbor_pages	disabled
 buffer_flush_n_to_flush_requested	disabled
+buffer_flush_n_to_flush_by_age	disabled
+buffer_flush_adaptive_avg_time_slot	disabled
+buffer_LRU_batch_flush_avg_time_slot	disabled
+buffer_flush_adaptive_avg_time_thread	disabled
+buffer_LRU_batch_flush_avg_time_thread	disabled
+buffer_flush_adaptive_avg_time_est	disabled
+buffer_LRU_batch_flush_avg_time_est	disabled
+buffer_flush_avg_time	disabled
+buffer_flush_adaptive_avg_pass	disabled
+buffer_LRU_batch_flush_avg_pass	disabled
+buffer_flush_avg_pass	disabled
+buffer_LRU_get_free_loops	disabled
+buffer_LRU_get_free_waits	disabled
 buffer_flush_avg_page_rate	disabled
 buffer_flush_lsn_avg_rate	disabled
 buffer_flush_pct_for_dirty	disabled
@@ -157,12 +168,13 @@ log_lsn_checkpoint_age	disabled
 log_lsn_buf_pool_oldest	disabled
 log_max_modified_age_async	disabled
 log_max_modified_age_sync	disabled
-log_pending_log_writes	disabled
+log_pending_log_flushes	disabled
 log_pending_checkpoint_writes	disabled
 log_num_log_io	disabled
 log_waits	disabled
 log_write_requests	disabled
 log_writes	disabled
+log_padded	disabled
 compress_pages_compressed	disabled
 compress_pages_decompressed	disabled
 compression_pad_increments	disabled
@@ -223,10 +235,13 @@ innodb_dblwr_pages_written	disabled
 innodb_page_size	disabled
 innodb_rwlock_s_spin_waits	disabled
 innodb_rwlock_x_spin_waits	disabled
+innodb_rwlock_sx_spin_waits	disabled
 innodb_rwlock_s_spin_rounds	disabled
 innodb_rwlock_x_spin_rounds	disabled
+innodb_rwlock_sx_spin_rounds	disabled
 innodb_rwlock_s_os_waits	disabled
 innodb_rwlock_x_os_waits	disabled
+innodb_rwlock_sx_os_waits	disabled
 dml_reads	disabled
 dml_inserts	disabled
 dml_deletes	disabled
@@ -239,6 +254,8 @@ ddl_background_drop_indexes	disabled
 ddl_background_drop_tables	disabled
 ddl_online_create_index	disabled
 ddl_pending_alter_table	disabled
+ddl_sort_file_alter_table	disabled
+ddl_log_file_alter_table	disabled
 icp_attempts	disabled
 icp_no_match	disabled
 icp_out_of_range	disabled
@@ -280,10 +297,13 @@ lock_row_lock_waits	disabled
 lock_row_lock_time_avg	disabled
 innodb_rwlock_s_spin_waits	disabled
 innodb_rwlock_x_spin_waits	disabled
+innodb_rwlock_sx_spin_waits	disabled
 innodb_rwlock_s_spin_rounds	disabled
 innodb_rwlock_x_spin_rounds	disabled
+innodb_rwlock_sx_spin_rounds	disabled
 innodb_rwlock_s_os_waits	disabled
 innodb_rwlock_x_os_waits	disabled
+innodb_rwlock_sx_os_waits	disabled
 set global innodb_monitor_enable = "%lock*";
 ERROR 42000: Variable 'innodb_monitor_enable' can't be set to the value of '%lock*'
 set global innodb_monitor_enable="%%%%%%%%%%%%%%%%%%%%%%%%%%%";
@@ -408,7 +428,6 @@ name	max_count	min_count	count	max_count_reset	min_count_reset	count_reset	statu
 metadata_table_handles_opened	2	NULL	2	2	NULL	2	enabled
 metadata_table_handles_closed	1	NULL	1	1	NULL	1	enabled
 metadata_table_reference_count	NULL	NULL	0	NULL	NULL	0	disabled
-metadata_mem_pool_size	NULL	NULL	0	NULL	NULL	0	disabled
 set global innodb_monitor_disable = module_metadata;
 set global innodb_monitor_reset = module_metadata;
 select name, max_count, min_count, count,
@@ -419,7 +438,6 @@ name	max_count	min_count	count	max_count_reset	min_count_reset	count_reset	statu
 metadata_table_handles_opened	2	NULL	2	NULL	NULL	0	disabled
 metadata_table_handles_closed	1	NULL	1	NULL	NULL	0	disabled
 metadata_table_reference_count	NULL	NULL	0	NULL	NULL	0	disabled
-metadata_mem_pool_size	NULL	NULL	0	NULL	NULL	0	disabled
 set global innodb_monitor_reset_all = module_metadata;
 select name, max_count, min_count, count,
 max_count_reset, min_count_reset, count_reset, status
@@ -429,7 +447,6 @@ name	max_count	min_count	count	max_count_reset	min_count_reset	count_reset	statu
 metadata_table_handles_opened	NULL	NULL	0	NULL	NULL	0	disabled
 metadata_table_handles_closed	NULL	NULL	0	NULL	NULL	0	disabled
 metadata_table_reference_count	NULL	NULL	0	NULL	NULL	0	disabled
-metadata_mem_pool_size	NULL	NULL	0	NULL	NULL	0	disabled
 set global innodb_monitor_enable = module_trx;
 begin;
 insert into monitor_test values(9);
diff --git a/mysql-test/suite/sys_vars/r/innodb_monitor_reset_basic.result b/mysql-test/suite/sys_vars/r/innodb_monitor_reset_basic.result
index 6c7051dc3d0..3764b00688b 100644
--- a/mysql-test/suite/sys_vars/r/innodb_monitor_reset_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_monitor_reset_basic.result
@@ -4,7 +4,6 @@ name	status
 metadata_table_handles_opened	disabled
 metadata_table_handles_closed	disabled
 metadata_table_reference_count	disabled
-metadata_mem_pool_size	disabled
 lock_deadlocks	disabled
 lock_timeouts	disabled
 lock_rec_lock_waits	disabled
@@ -47,7 +46,6 @@ buffer_data_written	disabled
 buffer_flush_batch_scanned	disabled
 buffer_flush_batch_num_scan	disabled
 buffer_flush_batch_scanned_per_call	disabled
-buffer_flush_batch_rescan	disabled
 buffer_flush_batch_total_pages	disabled
 buffer_flush_batches	disabled
 buffer_flush_batch_pages	disabled
@@ -55,6 +53,19 @@ buffer_flush_neighbor_total_pages	disabled
 buffer_flush_neighbor	disabled
 buffer_flush_neighbor_pages	disabled
 buffer_flush_n_to_flush_requested	disabled
+buffer_flush_n_to_flush_by_age	disabled
+buffer_flush_adaptive_avg_time_slot	disabled
+buffer_LRU_batch_flush_avg_time_slot	disabled
+buffer_flush_adaptive_avg_time_thread	disabled
+buffer_LRU_batch_flush_avg_time_thread	disabled
+buffer_flush_adaptive_avg_time_est	disabled
+buffer_LRU_batch_flush_avg_time_est	disabled
+buffer_flush_avg_time	disabled
+buffer_flush_adaptive_avg_pass	disabled
+buffer_LRU_batch_flush_avg_pass	disabled
+buffer_flush_avg_pass	disabled
+buffer_LRU_get_free_loops	disabled
+buffer_LRU_get_free_waits	disabled
 buffer_flush_avg_page_rate	disabled
 buffer_flush_lsn_avg_rate	disabled
 buffer_flush_pct_for_dirty	disabled
@@ -157,12 +168,13 @@ log_lsn_checkpoint_age	disabled
 log_lsn_buf_pool_oldest	disabled
 log_max_modified_age_async	disabled
 log_max_modified_age_sync	disabled
-log_pending_log_writes	disabled
+log_pending_log_flushes	disabled
 log_pending_checkpoint_writes	disabled
 log_num_log_io	disabled
 log_waits	disabled
 log_write_requests	disabled
 log_writes	disabled
+log_padded	disabled
 compress_pages_compressed	disabled
 compress_pages_decompressed	disabled
 compression_pad_increments	disabled
@@ -223,10 +235,13 @@ innodb_dblwr_pages_written	disabled
 innodb_page_size	disabled
 innodb_rwlock_s_spin_waits	disabled
 innodb_rwlock_x_spin_waits	disabled
+innodb_rwlock_sx_spin_waits	disabled
 innodb_rwlock_s_spin_rounds	disabled
 innodb_rwlock_x_spin_rounds	disabled
+innodb_rwlock_sx_spin_rounds	disabled
 innodb_rwlock_s_os_waits	disabled
 innodb_rwlock_x_os_waits	disabled
+innodb_rwlock_sx_os_waits	disabled
 dml_reads	disabled
 dml_inserts	disabled
 dml_deletes	disabled
@@ -239,6 +254,8 @@ ddl_background_drop_indexes	disabled
 ddl_background_drop_tables	disabled
 ddl_online_create_index	disabled
 ddl_pending_alter_table	disabled
+ddl_sort_file_alter_table	disabled
+ddl_log_file_alter_table	disabled
 icp_attempts	disabled
 icp_no_match	disabled
 icp_out_of_range	disabled
@@ -280,10 +297,13 @@ lock_row_lock_waits	disabled
 lock_row_lock_time_avg	disabled
 innodb_rwlock_s_spin_waits	disabled
 innodb_rwlock_x_spin_waits	disabled
+innodb_rwlock_sx_spin_waits	disabled
 innodb_rwlock_s_spin_rounds	disabled
 innodb_rwlock_x_spin_rounds	disabled
+innodb_rwlock_sx_spin_rounds	disabled
 innodb_rwlock_s_os_waits	disabled
 innodb_rwlock_x_os_waits	disabled
+innodb_rwlock_sx_os_waits	disabled
 set global innodb_monitor_enable = "%lock*";
 ERROR 42000: Variable 'innodb_monitor_enable' can't be set to the value of '%lock*'
 set global innodb_monitor_enable="%%%%%%%%%%%%%%%%%%%%%%%%%%%";
@@ -408,7 +428,6 @@ name	max_count	min_count	count	max_count_reset	min_count_reset	count_reset	statu
 metadata_table_handles_opened	2	NULL	2	2	NULL	2	enabled
 metadata_table_handles_closed	1	NULL	1	1	NULL	1	enabled
 metadata_table_reference_count	NULL	NULL	0	NULL	NULL	0	disabled
-metadata_mem_pool_size	NULL	NULL	0	NULL	NULL	0	disabled
 set global innodb_monitor_disable = module_metadata;
 set global innodb_monitor_reset = module_metadata;
 select name, max_count, min_count, count,
@@ -419,7 +438,6 @@ name	max_count	min_count	count	max_count_reset	min_count_reset	count_reset	statu
 metadata_table_handles_opened	2	NULL	2	NULL	NULL	0	disabled
 metadata_table_handles_closed	1	NULL	1	NULL	NULL	0	disabled
 metadata_table_reference_count	NULL	NULL	0	NULL	NULL	0	disabled
-metadata_mem_pool_size	NULL	NULL	0	NULL	NULL	0	disabled
 set global innodb_monitor_reset_all = module_metadata;
 select name, max_count, min_count, count,
 max_count_reset, min_count_reset, count_reset, status
@@ -429,7 +447,6 @@ name	max_count	min_count	count	max_count_reset	min_count_reset	count_reset	statu
 metadata_table_handles_opened	NULL	NULL	0	NULL	NULL	0	disabled
 metadata_table_handles_closed	NULL	NULL	0	NULL	NULL	0	disabled
 metadata_table_reference_count	NULL	NULL	0	NULL	NULL	0	disabled
-metadata_mem_pool_size	NULL	NULL	0	NULL	NULL	0	disabled
 set global innodb_monitor_enable = module_trx;
 begin;
 insert into monitor_test values(9);
diff --git a/mysql-test/suite/sys_vars/r/innodb_old_blocks_pct_basic.result b/mysql-test/suite/sys_vars/r/innodb_old_blocks_pct_basic.result
index bbcc2dabb22..6309ffc8cb0 100644
--- a/mysql-test/suite/sys_vars/r/innodb_old_blocks_pct_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_old_blocks_pct_basic.result
@@ -35,12 +35,20 @@ VARIABLE_NAME	VARIABLE_VALUE
 INNODB_OLD_BLOCKS_PCT	10
 set session innodb_old_blocks_pct=1;
 ERROR HY000: Variable 'innodb_old_blocks_pct' is a GLOBAL variable and should be set with SET GLOBAL
+set @@global.innodb_old_blocks_pct=DEFAULT;
+select @@global.innodb_old_blocks_pct;
+@@global.innodb_old_blocks_pct
+37
 set global innodb_old_blocks_pct=1.1;
 ERROR 42000: Incorrect argument type to variable 'innodb_old_blocks_pct'
 set global innodb_old_blocks_pct=1e1;
 ERROR 42000: Incorrect argument type to variable 'innodb_old_blocks_pct'
 set global innodb_old_blocks_pct="foo";
 ERROR 42000: Incorrect argument type to variable 'innodb_old_blocks_pct'
+set global innodb_old_blocks_pct=" ";
+ERROR 42000: Incorrect argument type to variable 'innodb_old_blocks_pct'
+set global innodb_old_blocks_pct='';
+ERROR 42000: Incorrect argument type to variable 'innodb_old_blocks_pct'
 set global innodb_old_blocks_pct=4;
 Warnings:
 Warning	1292	Truncated incorrect innodb_old_blocks_pct value: '4'
diff --git a/mysql-test/suite/sys_vars/r/innodb_page_cleaners_basic.result b/mysql-test/suite/sys_vars/r/innodb_page_cleaners_basic.result
new file mode 100644
index 00000000000..5a89af5ca88
--- /dev/null
+++ b/mysql-test/suite/sys_vars/r/innodb_page_cleaners_basic.result
@@ -0,0 +1,41 @@
+SELECT COUNT(@@GLOBAL.innodb_page_cleaners);
+COUNT(@@GLOBAL.innodb_page_cleaners)
+1
+1 Expected
+SELECT COUNT(@@innodb_page_cleaners);
+COUNT(@@innodb_page_cleaners)
+1
+1 Expected
+SET @@GLOBAL.innodb_page_cleaners=1;
+ERROR HY000: Variable 'innodb_page_cleaners' is a read only variable
+Expected error 'Read-only variable'
+SELECT innodb_page_cleaners = @@SESSION.innodb_page_cleaners;
+ERROR 42S22: Unknown column 'innodb_page_cleaners' in 'field list'
+Expected error 'Read-only variable'
+SELECT @@GLOBAL.innodb_page_cleaners = VARIABLE_VALUE
+FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
+WHERE VARIABLE_NAME='innodb_page_cleaners';
+@@GLOBAL.innodb_page_cleaners = VARIABLE_VALUE
+1
+1 Expected
+SELECT COUNT(VARIABLE_VALUE)
+FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
+WHERE VARIABLE_NAME='innodb_page_cleaners';
+COUNT(VARIABLE_VALUE)
+1
+1 Expected
+SELECT @@innodb_page_cleaners = @@GLOBAL.innodb_page_cleaners;
+@@innodb_page_cleaners = @@GLOBAL.innodb_page_cleaners
+1
+1 Expected
+SELECT COUNT(@@local.innodb_page_cleaners);
+ERROR HY000: Variable 'innodb_page_cleaners' is a GLOBAL variable
+Expected error 'Variable is a GLOBAL variable'
+SELECT COUNT(@@SESSION.innodb_page_cleaners);
+ERROR HY000: Variable 'innodb_page_cleaners' is a GLOBAL variable
+Expected error 'Variable is a GLOBAL variable'
+SELECT VARIABLE_NAME, VARIABLE_VALUE
+FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
+WHERE VARIABLE_NAME = 'innodb_page_cleaners';
+VARIABLE_NAME	VARIABLE_VALUE
+INNODB_PAGE_CLEANERS	1
diff --git a/mysql-test/suite/sys_vars/r/innodb_page_size_basic.result b/mysql-test/suite/sys_vars/r/innodb_page_size_basic.result
index d9d067c2cf9..f1a90f0d561 100644
--- a/mysql-test/suite/sys_vars/r/innodb_page_size_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_page_size_basic.result
@@ -1 +1,8 @@
-XtraDB extension
+SET @orig = @@global.innodb_page_size;
+SELECT @orig;
+@orig
+{valid_page_size}
+SET GLOBAL innodb_page_size = 4k;
+ERROR HY000: Variable 'innodb_page_size' is a read only variable
+SET GLOBAL innodb_page_size = 8k;
+ERROR HY000: Variable 'innodb_page_size' is a read only variable
diff --git a/mysql-test/suite/sys_vars/r/innodb_purge_batch_size_basic.result b/mysql-test/suite/sys_vars/r/innodb_purge_batch_size_basic.result
index 8f81df74d5b..6279cd143cf 100644
--- a/mysql-test/suite/sys_vars/r/innodb_purge_batch_size_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_purge_batch_size_basic.result
@@ -29,10 +29,12 @@ SET @@global.innodb_purge_batch_size = 5000;
 SELECT @@global.innodb_purge_batch_size;
 @@global.innodb_purge_batch_size
 5000
-SET @@global.innodb_purge_batch_size = 1000;
+SET @@global.innodb_purge_batch_size = 4294967295;
+Warnings:
+Warning	1292	Truncated incorrect innodb_purge_batch_size value: '4294967295'
 SELECT @@global.innodb_purge_batch_size;
 @@global.innodb_purge_batch_size
-1000
+5000
 '#--------------------FN_DYNVARS_046_04-------------------------#'
 SET @@global.innodb_purge_batch_size = 0;
 Warnings:
@@ -50,9 +52,24 @@ ERROR 42000: Incorrect argument type to variable 'innodb_purge_batch_size'
 SELECT @@global.innodb_purge_batch_size;
 @@global.innodb_purge_batch_size
 1
-SET @@global.innodb_purge_batch_size = 5001;
+SET @@global.innodb_purge_batch_size = ' ';
+ERROR 42000: Incorrect argument type to variable 'innodb_purge_batch_size'
+SELECT @@global.innodb_purge_batch_size;
+@@global.innodb_purge_batch_size
+1
+SET @@global.innodb_purge_batch_size = " ";
+ERROR 42000: Incorrect argument type to variable 'innodb_purge_batch_size'
+SELECT @@global.innodb_purge_batch_size;
+@@global.innodb_purge_batch_size
+1
+SET @@global.innodb_purge_batch_size = 1.1;
+ERROR 42000: Incorrect argument type to variable 'innodb_purge_batch_size'
+SELECT @@global.innodb_purge_batch_size;
+@@global.innodb_purge_batch_size
+1
+SET @@global.innodb_purge_batch_size = 4294967297;
 Warnings:
-Warning	1292	Truncated incorrect innodb_purge_batch_size value: '5001'
+Warning	1292	Truncated incorrect innodb_purge_batch_size value: '4294967297'
 SELECT @@global.innodb_purge_batch_size;
 @@global.innodb_purge_batch_size
 5000
diff --git a/mysql-test/suite/sys_vars/r/innodb_purge_rseg_truncate_frequency_basic.result b/mysql-test/suite/sys_vars/r/innodb_purge_rseg_truncate_frequency_basic.result
new file mode 100644
index 00000000000..79eb0743dfa
--- /dev/null
+++ b/mysql-test/suite/sys_vars/r/innodb_purge_rseg_truncate_frequency_basic.result
@@ -0,0 +1,113 @@
+SET @global_start_value = @@global.innodb_purge_rseg_truncate_frequency;
+SELECT @global_start_value;
+@global_start_value
+128
+'#--------------------FN_DYNVARS_046_01------------------------#'
+SET @@global.innodb_purge_rseg_truncate_frequency = 1;
+SET @@global.innodb_purge_rseg_truncate_frequency = DEFAULT;
+SELECT @@global.innodb_purge_rseg_truncate_frequency;
+@@global.innodb_purge_rseg_truncate_frequency
+128
+'#---------------------FN_DYNVARS_046_02-------------------------#'
+SET innodb_purge_rseg_truncate_frequency = 1;
+ERROR HY000: Variable 'innodb_purge_rseg_truncate_frequency' is a GLOBAL variable and should be set with SET GLOBAL
+SELECT @@innodb_purge_rseg_truncate_frequency;
+@@innodb_purge_rseg_truncate_frequency
+128
+SELECT local.innodb_purge_rseg_truncate_frequency;
+ERROR 42S02: Unknown table 'local' in field list
+SET global innodb_purge_rseg_truncate_frequency = 1;
+SELECT @@global.innodb_purge_rseg_truncate_frequency;
+@@global.innodb_purge_rseg_truncate_frequency
+1
+'#--------------------FN_DYNVARS_046_03------------------------#'
+SET @@global.innodb_purge_rseg_truncate_frequency = 1;
+SELECT @@global.innodb_purge_rseg_truncate_frequency;
+@@global.innodb_purge_rseg_truncate_frequency
+1
+SET @@global.innodb_purge_rseg_truncate_frequency = 1;
+SELECT @@global.innodb_purge_rseg_truncate_frequency;
+@@global.innodb_purge_rseg_truncate_frequency
+1
+SET @@global.innodb_purge_rseg_truncate_frequency = 128;
+SELECT @@global.innodb_purge_rseg_truncate_frequency;
+@@global.innodb_purge_rseg_truncate_frequency
+128
+'#--------------------FN_DYNVARS_046_05-------------------------#'
+SET @@global.innodb_purge_rseg_truncate_frequency = -1;
+Warnings:
+Warning	1292	Truncated incorrect innodb_purge_rseg_truncate_frequ value: '-1'
+SELECT @@global.innodb_purge_rseg_truncate_frequency;
+@@global.innodb_purge_rseg_truncate_frequency
+1
+SET @@global.innodb_purge_rseg_truncate_frequency = -1024;
+Warnings:
+Warning	1292	Truncated incorrect innodb_purge_rseg_truncate_frequ value: '-1024'
+SELECT @@global.innodb_purge_rseg_truncate_frequency;
+@@global.innodb_purge_rseg_truncate_frequency
+1
+SET @@global.innodb_purge_rseg_truncate_frequency = "T";
+ERROR 42000: Incorrect argument type to variable 'innodb_purge_rseg_truncate_frequency'
+SELECT @@global.innodb_purge_rseg_truncate_frequency;
+@@global.innodb_purge_rseg_truncate_frequency
+1
+SET @@global.innodb_purge_rseg_truncate_frequency = "Y";
+ERROR 42000: Incorrect argument type to variable 'innodb_purge_rseg_truncate_frequency'
+SELECT @@global.innodb_purge_rseg_truncate_frequency;
+@@global.innodb_purge_rseg_truncate_frequency
+1
+SET @@global.innodb_purge_rseg_truncate_frequency = 1.1;
+ERROR 42000: Incorrect argument type to variable 'innodb_purge_rseg_truncate_frequency'
+SELECT @@global.innodb_purge_rseg_truncate_frequency;
+@@global.innodb_purge_rseg_truncate_frequency
+1
+SET @@global.innodb_purge_rseg_truncate_frequency = ' ';
+ERROR 42000: Incorrect argument type to variable 'innodb_purge_rseg_truncate_frequency'
+SELECT @@global.innodb_purge_rseg_truncate_frequency;
+@@global.innodb_purge_rseg_truncate_frequency
+1
+SET @@global.innodb_purge_rseg_truncate_frequency = " ";
+ERROR 42000: Incorrect argument type to variable 'innodb_purge_rseg_truncate_frequency'
+SELECT @@global.innodb_purge_rseg_truncate_frequency;
+@@global.innodb_purge_rseg_truncate_frequency
+1
+'#----------------------FN_DYNVARS_046_06------------------------#'
+SELECT @@global.innodb_purge_rseg_truncate_frequency =
+VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
+WHERE VARIABLE_NAME='innodb_purge_rseg_truncate_frequency';
+@@global.innodb_purge_rseg_truncate_frequency =
+VARIABLE_VALUE
+1
+SELECT @@global.innodb_purge_rseg_truncate_frequency;
+@@global.innodb_purge_rseg_truncate_frequency
+1
+SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
+WHERE VARIABLE_NAME='innodb_purge_rseg_truncate_frequency';
+VARIABLE_VALUE
+1
+'#---------------------FN_DYNVARS_046_07-------------------------#'
+SET @@global.innodb_purge_rseg_truncate_frequency = OFF;
+ERROR 42000: Incorrect argument type to variable 'innodb_purge_rseg_truncate_frequency'
+SELECT @@global.innodb_purge_rseg_truncate_frequency;
+@@global.innodb_purge_rseg_truncate_frequency
+1
+SET @@global.innodb_purge_rseg_truncate_frequency = ON;
+ERROR 42000: Incorrect argument type to variable 'innodb_purge_rseg_truncate_frequency'
+SELECT @@global.innodb_purge_rseg_truncate_frequency;
+@@global.innodb_purge_rseg_truncate_frequency
+1
+'#---------------------FN_DYNVARS_046_08----------------------#'
+SET @@global.innodb_purge_rseg_truncate_frequency = TRUE;
+SELECT @@global.innodb_purge_rseg_truncate_frequency;
+@@global.innodb_purge_rseg_truncate_frequency
+1
+SET @@global.innodb_purge_rseg_truncate_frequency = FALSE;
+Warnings:
+Warning	1292	Truncated incorrect innodb_purge_rseg_truncate_frequ value: '0'
+SELECT @@global.innodb_purge_rseg_truncate_frequency;
+@@global.innodb_purge_rseg_truncate_frequency
+1
+SET @@global.innodb_purge_rseg_truncate_frequency = @global_start_value;
+SELECT @@global.innodb_purge_rseg_truncate_frequency;
+@@global.innodb_purge_rseg_truncate_frequency
+128
diff --git a/mysql-test/suite/sys_vars/r/innodb_purge_threads_basic.result b/mysql-test/suite/sys_vars/r/innodb_purge_threads_basic.result
index e3358a14ea2..2cb697acb6d 100644
--- a/mysql-test/suite/sys_vars/r/innodb_purge_threads_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_purge_threads_basic.result
@@ -1,53 +1,41 @@
-'#---------------------BS_STVARS_035_01----------------------#'
 SELECT COUNT(@@GLOBAL.innodb_purge_threads);
 COUNT(@@GLOBAL.innodb_purge_threads)
 1
 1 Expected
-'#---------------------BS_STVARS_035_02----------------------#'
+SELECT COUNT(@@innodb_purge_threads);
+COUNT(@@innodb_purge_threads)
+1
+1 Expected
 SET @@GLOBAL.innodb_purge_threads=1;
 ERROR HY000: Variable 'innodb_purge_threads' is a read only variable
-Expected error 'Read only variable'
-SELECT COUNT(@@GLOBAL.innodb_purge_threads);
-COUNT(@@GLOBAL.innodb_purge_threads)
-1
-1 Expected
-'#---------------------BS_STVARS_035_03----------------------#'
+Expected error 'Read-only variable'
+SELECT innodb_purge_threads = @@SESSION.innodb_purge_threads;
+ERROR 42S22: Unknown column 'innodb_purge_threads' in 'field list'
+Expected error 'Read-only variable'
 SELECT @@GLOBAL.innodb_purge_threads = VARIABLE_VALUE
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
 WHERE VARIABLE_NAME='innodb_purge_threads';
 @@GLOBAL.innodb_purge_threads = VARIABLE_VALUE
 1
 1 Expected
-SELECT COUNT(@@GLOBAL.innodb_purge_threads);
-COUNT(@@GLOBAL.innodb_purge_threads)
-1
-1 Expected
 SELECT COUNT(VARIABLE_VALUE)
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES 
 WHERE VARIABLE_NAME='innodb_purge_threads';
 COUNT(VARIABLE_VALUE)
 1
 1 Expected
-'#---------------------BS_STVARS_035_04----------------------#'
 SELECT @@innodb_purge_threads = @@GLOBAL.innodb_purge_threads;
 @@innodb_purge_threads = @@GLOBAL.innodb_purge_threads
 1
 1 Expected
-'#---------------------BS_STVARS_035_05----------------------#'
-SELECT COUNT(@@innodb_purge_threads);
-COUNT(@@innodb_purge_threads)
-1
-1 Expected
 SELECT COUNT(@@local.innodb_purge_threads);
 ERROR HY000: Variable 'innodb_purge_threads' is a GLOBAL variable
 Expected error 'Variable is a GLOBAL variable'
 SELECT COUNT(@@SESSION.innodb_purge_threads);
 ERROR HY000: Variable 'innodb_purge_threads' is a GLOBAL variable
 Expected error 'Variable is a GLOBAL variable'
-SELECT COUNT(@@GLOBAL.innodb_purge_threads);
-COUNT(@@GLOBAL.innodb_purge_threads)
-1
-1 Expected
-SELECT innodb_purge_threads = @@SESSION.innodb_purge_threads;
-ERROR 42S22: Unknown column 'innodb_purge_threads' in 'field list'
-Expected error 'Readonly variable'
+SELECT VARIABLE_NAME, VARIABLE_VALUE
+FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
+WHERE VARIABLE_NAME = 'innodb_purge_threads';
+VARIABLE_NAME	VARIABLE_VALUE
+INNODB_PURGE_THREADS	4
diff --git a/mysql-test/suite/sys_vars/r/innodb_read_ahead_threshold_basic.result b/mysql-test/suite/sys_vars/r/innodb_read_ahead_threshold_basic.result
index 65a1a8e319f..8ca5862ac09 100644
--- a/mysql-test/suite/sys_vars/r/innodb_read_ahead_threshold_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_read_ahead_threshold_basic.result
@@ -35,12 +35,26 @@ VARIABLE_NAME	VARIABLE_VALUE
 INNODB_READ_AHEAD_THRESHOLD	10
 set session innodb_read_ahead_threshold=1;
 ERROR HY000: Variable 'innodb_read_ahead_threshold' is a GLOBAL variable and should be set with SET GLOBAL
+set global innodb_read_ahead_threshold=DEFAULT;
+select @@global.innodb_read_ahead_threshold;
+@@global.innodb_read_ahead_threshold
+56
 set global innodb_read_ahead_threshold=1.1;
 ERROR 42000: Incorrect argument type to variable 'innodb_read_ahead_threshold'
 set global innodb_read_ahead_threshold=1e1;
 ERROR 42000: Incorrect argument type to variable 'innodb_read_ahead_threshold'
 set global innodb_read_ahead_threshold="foo";
 ERROR 42000: Incorrect argument type to variable 'innodb_read_ahead_threshold'
+set global innodb_read_ahead_threshold=' ';
+ERROR 42000: Incorrect argument type to variable 'innodb_read_ahead_threshold'
+select @@global.innodb_read_ahead_threshold;
+@@global.innodb_read_ahead_threshold
+56
+set global innodb_read_ahead_threshold=" ";
+ERROR 42000: Incorrect argument type to variable 'innodb_read_ahead_threshold'
+select @@global.innodb_read_ahead_threshold;
+@@global.innodb_read_ahead_threshold
+56
 set global innodb_read_ahead_threshold=-7;
 Warnings:
 Warning	1292	Truncated incorrect innodb_read_ahead_threshold value: '-7'
diff --git a/mysql-test/suite/sys_vars/r/innodb_replication_delay_basic.result b/mysql-test/suite/sys_vars/r/innodb_replication_delay_basic.result
index fa00baa218e..5e0fb425f6b 100644
--- a/mysql-test/suite/sys_vars/r/innodb_replication_delay_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_replication_delay_basic.result
@@ -35,21 +35,65 @@ VARIABLE_NAME	VARIABLE_VALUE
 INNODB_REPLICATION_DELAY	10
 set session innodb_replication_delay=1;
 ERROR HY000: Variable 'innodb_replication_delay' is a GLOBAL variable and should be set with SET GLOBAL
+set global innodb_replication_delay=DEFAULT;
+select @@global.innodb_replication_delay;
+@@global.innodb_replication_delay
+0
+set global innodb_replication_delay=0;
+select @@global.innodb_replication_delay;
+@@global.innodb_replication_delay
+0
+set global innodb_replication_delay=65535;
+select @@global.innodb_replication_delay;
+@@global.innodb_replication_delay
+65535
+set global innodb_replication_delay=4294967295;
+select @@global.innodb_replication_delay;
+@@global.innodb_replication_delay
+4294967295
 set global innodb_replication_delay=1.1;
 ERROR 42000: Incorrect argument type to variable 'innodb_replication_delay'
 set global innodb_replication_delay=1e1;
 ERROR 42000: Incorrect argument type to variable 'innodb_replication_delay'
 set global innodb_replication_delay="foo";
 ERROR 42000: Incorrect argument type to variable 'innodb_replication_delay'
+set global innodb_replication_delay=' ';
+ERROR 42000: Incorrect argument type to variable 'innodb_replication_delay'
+select @@global.innodb_replication_delay;
+@@global.innodb_replication_delay
+4294967295
+set global innodb_replication_delay=" ";
+ERROR 42000: Incorrect argument type to variable 'innodb_replication_delay'
+select @@global.innodb_replication_delay;
+@@global.innodb_replication_delay
+4294967295
 set global innodb_replication_delay=-7;
 Warnings:
 Warning	1292	Truncated incorrect innodb_replication_delay value: '-7'
 select @@global.innodb_replication_delay;
 @@global.innodb_replication_delay
 0
+set global innodb_replication_delay=-1024;
+Warnings:
+Warning	1292	Truncated incorrect innodb_replication_delay value: '-1024'
+select @@global.innodb_replication_delay;
+@@global.innodb_replication_delay
+0
 select * from information_schema.global_variables where variable_name='innodb_replication_delay';
 VARIABLE_NAME	VARIABLE_VALUE
 INNODB_REPLICATION_DELAY	0
+SET @@global.innodb_replication_delay = 4294967296;
+SELECT @@global.innodb_replication_delay IN (4294967296,4294967295);
+@@global.innodb_replication_delay IN (4294967296,4294967295)
+1
+SET @@global.innodb_replication_delay = 12345678901;
+SELECT @@global.innodb_replication_delay IN (12345678901,4294967295);
+@@global.innodb_replication_delay IN (12345678901,4294967295)
+1
+SET @@global.innodb_replication_delay = 18446744073709551615;
+SELECT @@global.innodb_replication_delay IN (18446744073709551615,4294967295);
+@@global.innodb_replication_delay IN (18446744073709551615,4294967295)
+1
 SET @@global.innodb_replication_delay = @start_global_value;
 SELECT @@global.innodb_replication_delay;
 @@global.innodb_replication_delay
diff --git a/mysql-test/suite/sys_vars/r/innodb_spin_wait_delay_basic.result b/mysql-test/suite/sys_vars/r/innodb_spin_wait_delay_basic.result
index 05672cbb966..621ef56f61f 100644
--- a/mysql-test/suite/sys_vars/r/innodb_spin_wait_delay_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_spin_wait_delay_basic.result
@@ -35,21 +35,65 @@ VARIABLE_NAME	VARIABLE_VALUE
 INNODB_SPIN_WAIT_DELAY	10
 set session innodb_spin_wait_delay=1;
 ERROR HY000: Variable 'innodb_spin_wait_delay' is a GLOBAL variable and should be set with SET GLOBAL
+set global innodb_spin_wait_delay=DEFAULT;
+select @@global.innodb_spin_wait_delay;
+@@global.innodb_spin_wait_delay
+6
+set global innodb_spin_wait_delay=0;
+select @@global.innodb_spin_wait_delay;
+@@global.innodb_spin_wait_delay
+0
+set global innodb_spin_wait_delay=65535;
+select @@global.innodb_spin_wait_delay;
+@@global.innodb_spin_wait_delay
+65535
+set global innodb_spin_wait_delay=4294967295;
+select @@global.innodb_spin_wait_delay;
+@@global.innodb_spin_wait_delay
+4294967295
 set global innodb_spin_wait_delay=1.1;
 ERROR 42000: Incorrect argument type to variable 'innodb_spin_wait_delay'
 set global innodb_spin_wait_delay=1e1;
 ERROR 42000: Incorrect argument type to variable 'innodb_spin_wait_delay'
 set global innodb_spin_wait_delay="foo";
 ERROR 42000: Incorrect argument type to variable 'innodb_spin_wait_delay'
+set global innodb_spin_wait_delay=' ';
+ERROR 42000: Incorrect argument type to variable 'innodb_spin_wait_delay'
+select @@global.innodb_spin_wait_delay;
+@@global.innodb_spin_wait_delay
+4294967295
+set global innodb_spin_wait_delay=" ";
+ERROR 42000: Incorrect argument type to variable 'innodb_spin_wait_delay'
+select @@global.innodb_spin_wait_delay;
+@@global.innodb_spin_wait_delay
+4294967295
 set global innodb_spin_wait_delay=-7;
 Warnings:
 Warning	1292	Truncated incorrect innodb_spin_wait_delay value: '-7'
 select @@global.innodb_spin_wait_delay;
 @@global.innodb_spin_wait_delay
 0
+set global innodb_spin_wait_delay=-1024;
+Warnings:
+Warning	1292	Truncated incorrect innodb_spin_wait_delay value: '-1024'
+select @@global.innodb_spin_wait_delay;
+@@global.innodb_spin_wait_delay
+0
 select * from information_schema.global_variables where variable_name='innodb_spin_wait_delay';
 VARIABLE_NAME	VARIABLE_VALUE
 INNODB_SPIN_WAIT_DELAY	0
+SET @@global.innodb_spin_wait_delay = 4294967296;
+SELECT @@global.innodb_spin_wait_delay IN (4294967296,4294967295);
+@@global.innodb_spin_wait_delay IN (4294967296,4294967295)
+1
+SET @@global.innodb_spin_wait_delay = 12345678901;
+SELECT @@global.innodb_spin_wait_delay IN (12345678901,4294967295);
+@@global.innodb_spin_wait_delay IN (12345678901,4294967295)
+1
+SET @@global.innodb_spin_wait_delay = 18446744073709551615;
+SELECT @@global.innodb_spin_wait_delay IN (18446744073709551615,4294967295);
+@@global.innodb_spin_wait_delay IN (18446744073709551615,4294967295)
+1
 SET @@global.innodb_spin_wait_delay = @start_global_value;
 SELECT @@global.innodb_spin_wait_delay;
 @@global.innodb_spin_wait_delay
diff --git a/mysql-test/suite/sys_vars/r/innodb_stats_persistent_basic.result b/mysql-test/suite/sys_vars/r/innodb_stats_persistent_basic.result
index 1cbdd16afdf..94de032a0fd 100644
--- a/mysql-test/suite/sys_vars/r/innodb_stats_persistent_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_stats_persistent_basic.result
@@ -21,4 +21,4 @@ SET GLOBAL innodb_stats_persistent=123;
 ERROR 42000: Variable 'innodb_stats_persistent' can't be set to the value of '123'
 SET GLOBAL innodb_stats_persistent='foo';
 ERROR 42000: Variable 'innodb_stats_persistent' can't be set to the value of 'foo'
-SET GLOBAL innodb_stats_persistent=off;
+SET GLOBAL innodb_stats_persistent=OFF;
diff --git a/mysql-test/suite/sys_vars/r/innodb_stats_persistent_sample_pages_basic.result b/mysql-test/suite/sys_vars/r/innodb_stats_persistent_sample_pages_basic.result
index d2e848621dd..ec211b693df 100644
--- a/mysql-test/suite/sys_vars/r/innodb_stats_persistent_sample_pages_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_stats_persistent_sample_pages_basic.result
@@ -29,7 +29,7 @@ SET global innodb_stats_persistent_sample_pages=10;
 SELECT @@global.innodb_stats_persistent_sample_pages;
 @@global.innodb_stats_persistent_sample_pages
 10
-SELECT * FROM information_schema.global_variables 
+SELECT * FROM information_schema.global_variables
 WHERE variable_name='innodb_stats_persistent_sample_pages';
 VARIABLE_NAME	VARIABLE_VALUE
 INNODB_STATS_PERSISTENT_SAMPLE_PAGES	10
@@ -39,12 +39,36 @@ VARIABLE_NAME	VARIABLE_VALUE
 INNODB_STATS_PERSISTENT_SAMPLE_PAGES	10
 SET session innodb_stats_persistent_sample_pages=1;
 ERROR HY000: Variable 'innodb_stats_persistent_sample_pages' is a GLOBAL variable and should be set with SET GLOBAL
+set global innodb_stats_persistent_sample_pages=DEFAULT;
+select @@global.innodb_stats_persistent_sample_pages;
+@@global.innodb_stats_persistent_sample_pages
+20
+SET global innodb_stats_persistent_sample_pages=0;
+Warnings:
+Warning	1292	Truncated incorrect innodb_stats_persistent_sample_p value: '0'
+SELECT @@global.innodb_stats_persistent_sample_pages;
+@@global.innodb_stats_persistent_sample_pages
+1
+SET global innodb_stats_persistent_sample_pages=10;
+SELECT @@global.innodb_stats_persistent_sample_pages;
+@@global.innodb_stats_persistent_sample_pages
+10
 SET global innodb_stats_persistent_sample_pages=1.1;
 ERROR 42000: Incorrect argument type to variable 'innodb_stats_persistent_sample_pages'
 SET global innodb_stats_persistent_sample_pages=1e1;
 ERROR 42000: Incorrect argument type to variable 'innodb_stats_persistent_sample_pages'
 SET global innodb_stats_persistent_sample_pages="foo";
 ERROR 42000: Incorrect argument type to variable 'innodb_stats_persistent_sample_pages'
+SET global innodb_stats_persistent_sample_pages=' ';
+ERROR 42000: Incorrect argument type to variable 'innodb_stats_persistent_sample_pages'
+SELECT @@global.innodb_stats_persistent_sample_pages;
+@@global.innodb_stats_persistent_sample_pages
+10
+SET global innodb_stats_persistent_sample_pages=" ";
+ERROR 42000: Incorrect argument type to variable 'innodb_stats_persistent_sample_pages'
+SELECT @@global.innodb_stats_persistent_sample_pages;
+@@global.innodb_stats_persistent_sample_pages
+10
 SET global innodb_stats_persistent_sample_pages=-7;
 Warnings:
 Warning	1292	Truncated incorrect innodb_stats_persistent_sample_p value: '-7'
diff --git a/mysql-test/suite/sys_vars/r/innodb_stats_sample_pages_basic.result b/mysql-test/suite/sys_vars/r/innodb_stats_sample_pages_basic.result
index e490773b63a..8618d602922 100644
--- a/mysql-test/suite/sys_vars/r/innodb_stats_sample_pages_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_stats_sample_pages_basic.result
@@ -37,12 +37,28 @@ VARIABLE_NAME	VARIABLE_VALUE
 INNODB_STATS_SAMPLE_PAGES	10
 set session innodb_stats_sample_pages=1;
 ERROR HY000: Variable 'innodb_stats_sample_pages' is a GLOBAL variable and should be set with SET GLOBAL
-set global innodb_stats_sample_pages=1.1;
+set global innodb_stats_sample_pages=DEFAULT;
+Warnings:
+Warning	131	Using innodb_stats_sample_pages is deprecated and the variable may be removed in future releases. Please use innodb_stats_transient_sample_pages instead.
+select @@global.innodb_stats_sample_pages;
+@@global.innodb_stats_sample_pages
+8
+set global innodb_stats_sample_pages = 1.1;
 ERROR 42000: Incorrect argument type to variable 'innodb_stats_sample_pages'
-set global innodb_stats_sample_pages=1e1;
+set global innodb_stats_sample_pages = 1e1;
 ERROR 42000: Incorrect argument type to variable 'innodb_stats_sample_pages'
-set global innodb_stats_sample_pages="foo";
+set global innodb_stats_sample_pages = "foo";
 ERROR 42000: Incorrect argument type to variable 'innodb_stats_sample_pages'
+set global innodb_stats_sample_pages=' ';
+ERROR 42000: Incorrect argument type to variable 'innodb_stats_sample_pages'
+select @@global.innodb_stats_sample_pages;
+@@global.innodb_stats_sample_pages
+8
+set global innodb_stats_sample_pages=" ";
+ERROR 42000: Incorrect argument type to variable 'innodb_stats_sample_pages'
+select @@global.innodb_stats_sample_pages;
+@@global.innodb_stats_sample_pages
+8
 set global innodb_stats_sample_pages=-7;
 Warnings:
 Warning	1292	Truncated incorrect innodb_stats_sample_pages value: '-7'
diff --git a/mysql-test/suite/sys_vars/r/innodb_stats_transient_sample_pages_basic.result b/mysql-test/suite/sys_vars/r/innodb_stats_transient_sample_pages_basic.result
index 4c60dd5a697..1ea5ac3d3bc 100644
--- a/mysql-test/suite/sys_vars/r/innodb_stats_transient_sample_pages_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_stats_transient_sample_pages_basic.result
@@ -25,6 +25,10 @@ SELECT * FROM information_schema.session_variables
 WHERE variable_name='innodb_stats_transient_sample_pages';
 VARIABLE_NAME	VARIABLE_VALUE
 INNODB_STATS_TRANSIENT_SAMPLE_PAGES	8
+set global innodb_stats_transient_sample_pages=DEFAULT;
+select @@global.innodb_stats_transient_sample_pages;
+@@global.innodb_stats_transient_sample_pages
+8
 SET global innodb_stats_transient_sample_pages=10;
 SELECT @@global.innodb_stats_transient_sample_pages;
 @@global.innodb_stats_transient_sample_pages
@@ -45,6 +49,8 @@ SET global innodb_stats_transient_sample_pages=1e1;
 ERROR 42000: Incorrect argument type to variable 'innodb_stats_transient_sample_pages'
 SET global innodb_stats_transient_sample_pages="foo";
 ERROR 42000: Incorrect argument type to variable 'innodb_stats_transient_sample_pages'
+SET global innodb_stats_transient_sample_pages=' ';
+ERROR 42000: Incorrect argument type to variable 'innodb_stats_transient_sample_pages'
 SET global innodb_stats_transient_sample_pages=-7;
 Warnings:
 Warning	1292	Truncated incorrect innodb_stats_transient_sample_pa value: '-7'
diff --git a/mysql-test/suite/sys_vars/r/innodb_strict_mode_basic.result b/mysql-test/suite/sys_vars/r/innodb_strict_mode_basic.result
index 9c5e62d2684..8bddb6a1694 100644
--- a/mysql-test/suite/sys_vars/r/innodb_strict_mode_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_strict_mode_basic.result
@@ -1,32 +1,32 @@
 SET @start_global_value = @@global.innodb_strict_mode;
 SELECT @start_global_value;
 @start_global_value
-0
+1
 Valid values are 'ON' and 'OFF' 
 select @@global.innodb_strict_mode in (0, 1);
 @@global.innodb_strict_mode in (0, 1)
 1
 select @@global.innodb_strict_mode;
 @@global.innodb_strict_mode
-0
+1
 select @@session.innodb_strict_mode in (0, 1);
 @@session.innodb_strict_mode in (0, 1)
 1
 select @@session.innodb_strict_mode;
 @@session.innodb_strict_mode
-0
+1
 show global variables like 'innodb_strict_mode';
 Variable_name	Value
-innodb_strict_mode	OFF
+innodb_strict_mode	ON
 show session variables like 'innodb_strict_mode';
 Variable_name	Value
-innodb_strict_mode	OFF
+innodb_strict_mode	ON
 select * from information_schema.global_variables where variable_name='innodb_strict_mode';
 VARIABLE_NAME	VARIABLE_VALUE
-INNODB_STRICT_MODE	OFF
+INNODB_STRICT_MODE	ON
 select * from information_schema.session_variables where variable_name='innodb_strict_mode';
 VARIABLE_NAME	VARIABLE_VALUE
-INNODB_STRICT_MODE	OFF
+INNODB_STRICT_MODE	ON
 set global innodb_strict_mode='OFF';
 set session innodb_strict_mode='OFF';
 select @@global.innodb_strict_mode;
@@ -118,4 +118,4 @@ INNODB_STRICT_MODE	ON
 SET @@global.innodb_strict_mode = @start_global_value;
 SELECT @@global.innodb_strict_mode;
 @@global.innodb_strict_mode
-0
+1
diff --git a/mysql-test/suite/sys_vars/r/innodb_support_xa_basic.result b/mysql-test/suite/sys_vars/r/innodb_support_xa_basic.result
index 754b09310bf..8384ee3d361 100644
--- a/mysql-test/suite/sys_vars/r/innodb_support_xa_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_support_xa_basic.result
@@ -8,17 +8,27 @@ SELECT @global_start_value;
 1
 '#--------------------FN_DYNVARS_046_01------------------------#'
 SET @@session.innodb_support_xa = 0;
+Warnings:
+Warning	131	Using innodb_support_xa is deprecated and the parameter may be removed in future releases. Only innodb_support_xa=ON is allowed.
 SET @@session.innodb_support_xa = DEFAULT;
+Warnings:
+Warning	131	Using innodb_support_xa is deprecated and the parameter may be removed in future releases.
 SELECT @@session.innodb_support_xa;
 @@session.innodb_support_xa
 1
 SET @@global.innodb_support_xa = 0;
+Warnings:
+Warning	131	Using innodb_support_xa is deprecated and the parameter may be removed in future releases. Only innodb_support_xa=ON is allowed.
 SET @@global.innodb_support_xa = DEFAULT;
+Warnings:
+Warning	131	Using innodb_support_xa is deprecated and the parameter may be removed in future releases.
 SELECT @@global.innodb_support_xa;
 @@global.innodb_support_xa
 1
 '#---------------------FN_DYNVARS_046_02-------------------------#'
 SET innodb_support_xa = 1;
+Warnings:
+Warning	131	Using innodb_support_xa is deprecated and the parameter may be removed in future releases.
 SELECT @@innodb_support_xa;
 @@innodb_support_xa
 1
@@ -29,27 +39,39 @@ ERROR 42S02: Unknown table 'local' in field list
 SELECT global.innodb_support_xa;
 ERROR 42S02: Unknown table 'global' in field list
 SET session innodb_support_xa = 0;
+Warnings:
+Warning	131	Using innodb_support_xa is deprecated and the parameter may be removed in future releases. Only innodb_support_xa=ON is allowed.
 SELECT @@session.innodb_support_xa;
 @@session.innodb_support_xa
-0
+1
 SET global innodb_support_xa = 0;
+Warnings:
+Warning	131	Using innodb_support_xa is deprecated and the parameter may be removed in future releases. Only innodb_support_xa=ON is allowed.
 SELECT @@global.innodb_support_xa;
 @@global.innodb_support_xa
-0
+1
 '#--------------------FN_DYNVARS_046_03------------------------#'
 SET @@session.innodb_support_xa = 0;
+Warnings:
+Warning	131	Using innodb_support_xa is deprecated and the parameter may be removed in future releases. Only innodb_support_xa=ON is allowed.
 SELECT @@session.innodb_support_xa;
 @@session.innodb_support_xa
-0
+1
 SET @@session.innodb_support_xa = 1;
+Warnings:
+Warning	131	Using innodb_support_xa is deprecated and the parameter may be removed in future releases.
 SELECT @@session.innodb_support_xa;
 @@session.innodb_support_xa
 1
 SET @@global.innodb_support_xa = 0;
+Warnings:
+Warning	131	Using innodb_support_xa is deprecated and the parameter may be removed in future releases. Only innodb_support_xa=ON is allowed.
 SELECT @@global.innodb_support_xa;
 @@global.innodb_support_xa
-0
+1
 SET @@global.innodb_support_xa = 1;
+Warnings:
+Warning	131	Using innodb_support_xa is deprecated and the parameter may be removed in future releases.
 SELECT @@global.innodb_support_xa;
 @@global.innodb_support_xa
 1
@@ -67,9 +89,11 @@ ERROR 42000: Variable 'innodb_support_xa' can't be set to the value of 'TR
 SET @@session.innodb_support_xa = �N;
 ERROR 42000: Variable 'innodb_support_xa' can't be set to the value of '�N'
 SET @@session.innodb_support_xa = OF;
+Warnings:
+Warning	131	Using innodb_support_xa is deprecated and the parameter may be removed in future releases. Only innodb_support_xa=ON is allowed.
 SELECT @@session.innodb_support_xa;
 @@session.innodb_support_xa
-0
+1
 SET @@session.innodb_support_xa = �FF;
 ERROR 42000: Variable 'innodb_support_xa' can't be set to the value of '�FF'
 SET @@global.innodb_support_xa = -1;
@@ -88,18 +112,26 @@ ERROR 42000: Variable 'innodb_support_xa' can't be set to the value of 'TR
 SET @@global.innodb_support_xa = �N;
 ERROR 42000: Variable 'innodb_support_xa' can't be set to the value of '�N'
 SET @@global.innodb_support_xa = OF;
+Warnings:
+Warning	131	Using innodb_support_xa is deprecated and the parameter may be removed in future releases. Only innodb_support_xa=ON is allowed.
 SELECT @@global.innodb_support_xa;
 @@global.innodb_support_xa
-0
+1
 SET @@global.innodb_support_xa = �FF;
 ERROR 42000: Variable 'innodb_support_xa' can't be set to the value of '�FF'
 '#-------------------FN_DYNVARS_046_05----------------------------#'
 SET @@global.innodb_support_xa = 0;
+Warnings:
+Warning	131	Using innodb_support_xa is deprecated and the parameter may be removed in future releases. Only innodb_support_xa=ON is allowed.
 SET @@session.innodb_support_xa = 1;
+Warnings:
+Warning	131	Using innodb_support_xa is deprecated and the parameter may be removed in future releases.
 SELECT @@global.innodb_support_xa AS res_is_0;
 res_is_0
-0
+1
 SET @@global.innodb_support_xa = 0;
+Warnings:
+Warning	131	Using innodb_support_xa is deprecated and the parameter may be removed in future releases. Only innodb_support_xa=ON is allowed.
 SELECT @@session.innodb_support_xa AS res_is_1;
 res_is_1
 1
@@ -112,11 +144,11 @@ VARIABLE_VALUE
 1
 SELECT @@global.innodb_support_xa;
 @@global.innodb_support_xa
-0
+1
 SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
 WHERE VARIABLE_NAME='innodb_support_xa';
 VARIABLE_VALUE
-OFF
+ON
 '#----------------------FN_DYNVARS_046_07------------------------#'
 SELECT IF(@@session.innodb_support_xa, "ON", "OFF") =
 VARIABLE_VALUE FROM INFORMATION_SCHEMA.SESSION_VARIABLES
@@ -133,43 +165,63 @@ VARIABLE_VALUE
 ON
 '#---------------------FN_DYNVARS_046_08-------------------------#'
 SET @@session.innodb_support_xa = OFF;
+Warnings:
+Warning	131	Using innodb_support_xa is deprecated and the parameter may be removed in future releases. Only innodb_support_xa=ON is allowed.
 SELECT @@session.innodb_support_xa;
 @@session.innodb_support_xa
-0
+1
 SET @@session.innodb_support_xa = ON;
+Warnings:
+Warning	131	Using innodb_support_xa is deprecated and the parameter may be removed in future releases.
 SELECT @@session.innodb_support_xa;
 @@session.innodb_support_xa
 1
 SET @@global.innodb_support_xa = OFF;
+Warnings:
+Warning	131	Using innodb_support_xa is deprecated and the parameter may be removed in future releases. Only innodb_support_xa=ON is allowed.
 SELECT @@global.innodb_support_xa;
 @@global.innodb_support_xa
-0
+1
 SET @@global.innodb_support_xa = ON;
+Warnings:
+Warning	131	Using innodb_support_xa is deprecated and the parameter may be removed in future releases.
 SELECT @@global.innodb_support_xa;
 @@global.innodb_support_xa
 1
 '#---------------------FN_DYNVARS_046_09----------------------#'
 SET @@session.innodb_support_xa = TRUE;
+Warnings:
+Warning	131	Using innodb_support_xa is deprecated and the parameter may be removed in future releases.
 SELECT @@session.innodb_support_xa;
 @@session.innodb_support_xa
 1
 SET @@session.innodb_support_xa = FALSE;
+Warnings:
+Warning	131	Using innodb_support_xa is deprecated and the parameter may be removed in future releases. Only innodb_support_xa=ON is allowed.
 SELECT @@session.innodb_support_xa;
 @@session.innodb_support_xa
-0
+1
 SET @@global.innodb_support_xa = TRUE;
+Warnings:
+Warning	131	Using innodb_support_xa is deprecated and the parameter may be removed in future releases.
 SELECT @@global.innodb_support_xa;
 @@global.innodb_support_xa
 1
 SET @@global.innodb_support_xa = FALSE;
+Warnings:
+Warning	131	Using innodb_support_xa is deprecated and the parameter may be removed in future releases. Only innodb_support_xa=ON is allowed.
 SELECT @@global.innodb_support_xa;
 @@global.innodb_support_xa
-0
+1
 SET @@session.innodb_support_xa = @session_start_value;
+Warnings:
+Warning	131	Using innodb_support_xa is deprecated and the parameter may be removed in future releases.
 SELECT @@session.innodb_support_xa;
 @@session.innodb_support_xa
 1
 SET @@global.innodb_support_xa = @global_start_value;
+Warnings:
+Warning	131	Using innodb_support_xa is deprecated and the parameter may be removed in future releases.
 SELECT @@global.innodb_support_xa;
 @@global.innodb_support_xa
 1
diff --git a/mysql-test/suite/sys_vars/r/innodb_support_xa_func.result b/mysql-test/suite/sys_vars/r/innodb_support_xa_func.result
index 7291b60ea62..d86cf896016 100644
--- a/mysql-test/suite/sys_vars/r/innodb_support_xa_func.result
+++ b/mysql-test/suite/sys_vars/r/innodb_support_xa_func.result
@@ -1,21 +1,27 @@
 '#--------------------FN_DYNVARS_046_01-------------------------#'
 SET @@global.innodb_support_xa = OFF;
+Warnings:
+Warning	131	Using innodb_support_xa is deprecated and the parameter may be removed in future releases. Only innodb_support_xa=ON is allowed.
 connect  con1,localhost,root,,,,;
 connection con1;
 SELECT @@global.innodb_support_xa;
 @@global.innodb_support_xa
-0
+1
 SELECT @@session.innodb_support_xa;
 @@session.innodb_support_xa
-0
+1
 disconnect con1;
 '#--------------------FN_DYNVARS_046_01-------------------------#'
 connection default;
 SET @@global.innodb_support_xa = 1;
+Warnings:
+Warning	131	Using innodb_support_xa is deprecated and the parameter may be removed in future releases.
 drop table if exists t1, t2;
 create table t1 (a int) engine=innodb;
 '---check when innodb_support_xa is 1---'
 SET @@innodb_support_xa = 1;
+Warnings:
+Warning	131	Using innodb_support_xa is deprecated and the parameter may be removed in future releases.
 xa start 'test1';
 INSERT t1 values (10);
 xa end 'test1';
@@ -25,6 +31,8 @@ SELECT * from t1;
 a
 '---check when innodb_support_xa is 0---'
 SET @@innodb_support_xa = 0;
+Warnings:
+Warning	131	Using innodb_support_xa is deprecated and the parameter may be removed in future releases. Only innodb_support_xa=ON is allowed.
 xa start 'test1';
 INSERT t1 values (10);
 xa end 'test1';
@@ -34,7 +42,11 @@ SELECT * from t1;
 a
 '------general xa testing--------'
 SET @@global.innodb_support_xa = 1;
+Warnings:
+Warning	131	Using innodb_support_xa is deprecated and the parameter may be removed in future releases.
 SET @@innodb_support_xa = 1;
+Warnings:
+Warning	131	Using innodb_support_xa is deprecated and the parameter may be removed in future releases.
 xa start 'testa','testb';
 INSERT t1 values (30);
 COMMIT;
diff --git a/mysql-test/suite/sys_vars/r/innodb_sync_debug_basic.result b/mysql-test/suite/sys_vars/r/innodb_sync_debug_basic.result
new file mode 100644
index 00000000000..72420c8595b
--- /dev/null
+++ b/mysql-test/suite/sys_vars/r/innodb_sync_debug_basic.result
@@ -0,0 +1,11 @@
+#
+# Basic test for innodb_sync_debug
+#
+SELECT @@global.innodb_sync_debug;
+@@global.innodb_sync_debug
+0
+set global innodb_sync_debug = 1;
+ERROR HY000: Variable 'innodb_sync_debug' is a read only variable
+SELECT @@global.innodb_sync_debug;
+@@global.innodb_sync_debug
+0
diff --git a/mysql-test/suite/sys_vars/r/innodb_sync_spin_loops_basic.result b/mysql-test/suite/sys_vars/r/innodb_sync_spin_loops_basic.result
index ba45d4f2ed0..3377b690e49 100644
--- a/mysql-test/suite/sys_vars/r/innodb_sync_spin_loops_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_sync_spin_loops_basic.result
@@ -25,15 +25,28 @@ SET @@global.innodb_sync_spin_loops = 0;
 SELECT @@global.innodb_sync_spin_loops;
 @@global.innodb_sync_spin_loops
 0
-SET @@global.innodb_sync_spin_loops = 1;
+SET @@global.innodb_sync_spin_loops = 65535;
 SELECT @@global.innodb_sync_spin_loops;
 @@global.innodb_sync_spin_loops
-1
-SET @@global.innodb_sync_spin_loops = 1000;
+65535
+SET @@global.innodb_sync_spin_loops = 4294967295;
 SELECT @@global.innodb_sync_spin_loops;
 @@global.innodb_sync_spin_loops
-1000
+4294967295
 '#--------------------FN_DYNVARS_046_04-------------------------#'
+SET @@global.innodb_sync_spin_loops = 4294967296;
+SELECT @@global.innodb_sync_spin_loops IN (4294967296,4294967295);
+@@global.innodb_sync_spin_loops IN (4294967296,4294967295)
+1
+SET @@global.innodb_sync_spin_loops = 12345678901;
+SELECT @@global.innodb_sync_spin_loops IN (12345678901,4294967295);
+@@global.innodb_sync_spin_loops IN (12345678901,4294967295)
+1
+SET @@global.innodb_sync_spin_loops = 18446744073709551615;
+SELECT @@global.innodb_sync_spin_loops IN (18446744073709551615,4294967295);
+@@global.innodb_sync_spin_loops IN (18446744073709551615,4294967295)
+1
+'#--------------------FN_DYNVARS_046_05-------------------------#'
 SET @@global.innodb_sync_spin_loops = -1;
 Warnings:
 Warning	1292	Truncated incorrect innodb_sync_spin_loops value: '-1'
@@ -50,11 +63,28 @@ ERROR 42000: Incorrect argument type to variable 'innodb_sync_spin_loops'
 SELECT @@global.innodb_sync_spin_loops;
 @@global.innodb_sync_spin_loops
 0
-SET @@global.innodb_sync_spin_loops = 1001;
+SET @@global.innodb_sync_spin_loops = 65535.01;
+ERROR 42000: Incorrect argument type to variable 'innodb_sync_spin_loops'
 SELECT @@global.innodb_sync_spin_loops;
 @@global.innodb_sync_spin_loops
-1001
-'#----------------------FN_DYNVARS_046_05------------------------#'
+0
+SET @@global.innodb_sync_spin_loops = -1024;
+Warnings:
+Warning	1292	Truncated incorrect innodb_sync_spin_loops value: '-1024'
+SELECT @@global.innodb_sync_spin_loops;
+@@global.innodb_sync_spin_loops
+0
+SET @@global.innodb_sync_spin_loops = " ";
+ERROR 42000: Incorrect argument type to variable 'innodb_sync_spin_loops'
+SELECT @@global.innodb_sync_spin_loops;
+@@global.innodb_sync_spin_loops
+0
+SET @@global.innodb_sync_spin_loops = ' ';
+ERROR 42000: Incorrect argument type to variable 'innodb_sync_spin_loops'
+SELECT @@global.innodb_sync_spin_loops;
+@@global.innodb_sync_spin_loops
+0
+'#----------------------FN_DYNVARS_046_06------------------------#'
 SELECT @@global.innodb_sync_spin_loops =
 VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
 WHERE VARIABLE_NAME='innodb_sync_spin_loops';
@@ -63,23 +93,23 @@ VARIABLE_VALUE
 1
 SELECT @@global.innodb_sync_spin_loops;
 @@global.innodb_sync_spin_loops
-1001
+0
 SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
 WHERE VARIABLE_NAME='innodb_sync_spin_loops';
 VARIABLE_VALUE
-1001
-'#---------------------FN_DYNVARS_046_06-------------------------#'
+0
+'#---------------------FN_DYNVARS_046_07-------------------------#'
 SET @@global.innodb_sync_spin_loops = OFF;
 ERROR 42000: Incorrect argument type to variable 'innodb_sync_spin_loops'
 SELECT @@global.innodb_sync_spin_loops;
 @@global.innodb_sync_spin_loops
-1001
+0
 SET @@global.innodb_sync_spin_loops = ON;
 ERROR 42000: Incorrect argument type to variable 'innodb_sync_spin_loops'
 SELECT @@global.innodb_sync_spin_loops;
 @@global.innodb_sync_spin_loops
-1001
-'#---------------------FN_DYNVARS_046_07----------------------#'
+0
+'#---------------------FN_DYNVARS_046_08----------------------#'
 SET @@global.innodb_sync_spin_loops = TRUE;
 SELECT @@global.innodb_sync_spin_loops;
 @@global.innodb_sync_spin_loops
diff --git a/mysql-test/suite/sys_vars/r/innodb_table_locks_func.result b/mysql-test/suite/sys_vars/r/innodb_table_locks_func.result
index 8e7806ad7e0..0f9e1e8ccf0 100644
--- a/mysql-test/suite/sys_vars/r/innodb_table_locks_func.result
+++ b/mysql-test/suite/sys_vars/r/innodb_table_locks_func.result
@@ -4,7 +4,9 @@ SELECT @start_value;
 @start_value
 1
 SET @@global.innodb_table_locks = OFF;
+'connect (con1,localhost,root,,,,)'
 connect  con1,localhost,root,,,,;
+'connection con1'
 connection con1;
 SELECT @@global.innodb_table_locks;
 @@global.innodb_table_locks
@@ -15,7 +17,9 @@ SELECT @@session.innodb_table_locks;
 disconnect con1;
 '#--------------------FN_DYNVARS_048_02-------------------------#'
 '----check when innodb_table_locks = ON and autocommit = OFF---'
+'connect (con2,localhost,root,,,,)'
 connect  con2,localhost,root,,,,;
+'connection default'
 connection default;
 DROP TABLE IF EXISTS t1;
 CREATE TABLE t1 (a INT) ENGINE=INNODB;
@@ -26,12 +30,15 @@ INSERT INTO t1 VALUES(1);
 SELECT * FROM t1 FOR UPDATE;
 a
 1
+'CONNECTION con2'
 connection con2;
 SET @@innodb_table_locks = ON;
 SET @@autocommit = OFF;
 LOCK TABLES t1 WRITE;
+'CONNECTION default'
 connection default;
 COMMIT;
+'CONNECTION con2'
 connection con2;
 UNLOCK tables;
 DROP TABLE t1;
diff --git a/mysql-test/suite/sys_vars/r/innodb_temp_data_file_path_basic.result b/mysql-test/suite/sys_vars/r/innodb_temp_data_file_path_basic.result
new file mode 100644
index 00000000000..2357a07e3ab
--- /dev/null
+++ b/mysql-test/suite/sys_vars/r/innodb_temp_data_file_path_basic.result
@@ -0,0 +1,53 @@
+'#---------------------BS_STVARS_024_01----------------------#'
+SELECT COUNT(@@GLOBAL.innodb_temp_data_file_path);
+COUNT(@@GLOBAL.innodb_temp_data_file_path)
+1
+1 Expected
+'#---------------------BS_STVARS_024_02----------------------#'
+SET @@GLOBAL.innodb_temp_data_file_path=1;
+ERROR HY000: Variable 'innodb_temp_data_file_path' is a read only variable
+Expected error 'Read only variable'
+SELECT COUNT(@@GLOBAL.innodb_temp_data_file_path);
+COUNT(@@GLOBAL.innodb_temp_data_file_path)
+1
+1 Expected
+'#---------------------BS_STVARS_024_03----------------------#'
+SELECT @@GLOBAL.innodb_temp_data_file_path = VARIABLE_VALUE
+FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
+WHERE VARIABLE_NAME='innodb_temp_data_file_path';
+@@GLOBAL.innodb_temp_data_file_path = VARIABLE_VALUE
+1
+1 Expected
+SELECT COUNT(@@GLOBAL.innodb_temp_data_file_path);
+COUNT(@@GLOBAL.innodb_temp_data_file_path)
+1
+1 Expected
+SELECT COUNT(VARIABLE_VALUE)
+FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
+WHERE VARIABLE_NAME='innodb_temp_data_file_path';
+COUNT(VARIABLE_VALUE)
+1
+1 Expected
+'#---------------------BS_STVARS_024_04----------------------#'
+SELECT @@innodb_temp_data_file_path = @@GLOBAL.innodb_temp_data_file_path;
+@@innodb_temp_data_file_path = @@GLOBAL.innodb_temp_data_file_path
+1
+1 Expected
+'#---------------------BS_STVARS_024_05----------------------#'
+SELECT COUNT(@@innodb_temp_data_file_path);
+COUNT(@@innodb_temp_data_file_path)
+1
+1 Expected
+SELECT COUNT(@@local.innodb_temp_data_file_path);
+ERROR HY000: Variable 'innodb_temp_data_file_path' is a GLOBAL variable
+Expected error 'Variable is a GLOBAL variable'
+SELECT COUNT(@@SESSION.innodb_temp_data_file_path);
+ERROR HY000: Variable 'innodb_temp_data_file_path' is a GLOBAL variable
+Expected error 'Variable is a GLOBAL variable'
+SELECT COUNT(@@GLOBAL.innodb_temp_data_file_path);
+COUNT(@@GLOBAL.innodb_temp_data_file_path)
+1
+1 Expected
+SELECT innodb_temp_data_file_path = @@SESSION.innodb_temp_data_file_path;
+ERROR 42S22: Unknown column 'innodb_temp_data_file_path' in 'field list'
+Expected error 'Readonly variable'
diff --git a/mysql-test/suite/sys_vars/r/innodb_thread_concurrency_basic.result b/mysql-test/suite/sys_vars/r/innodb_thread_concurrency_basic.result
index c7af96bb22a..ca3c253604a 100644
--- a/mysql-test/suite/sys_vars/r/innodb_thread_concurrency_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_thread_concurrency_basic.result
@@ -50,12 +50,27 @@ ERROR 42000: Incorrect argument type to variable 'innodb_thread_concurrency'
 SELECT @@global.innodb_thread_concurrency;
 @@global.innodb_thread_concurrency
 0
+SET @@global.innodb_thread_concurrency = ' ';
+ERROR 42000: Incorrect argument type to variable 'innodb_thread_concurrency'
+SELECT @@global.innodb_thread_concurrency;
+@@global.innodb_thread_concurrency
+0
+SET @@global.innodb_thread_concurrency = " ";
+ERROR 42000: Incorrect argument type to variable 'innodb_thread_concurrency'
+SELECT @@global.innodb_thread_concurrency;
+@@global.innodb_thread_concurrency
+0
 SET @@global.innodb_thread_concurrency = 1001;
 Warnings:
 Warning	1292	Truncated incorrect innodb_thread_concurrency value: '1001'
 SELECT @@global.innodb_thread_concurrency;
 @@global.innodb_thread_concurrency
 1000
+SET @@global.innodb_thread_concurrency = 255.01;
+ERROR 42000: Incorrect argument type to variable 'innodb_thread_concurrency'
+SELECT @@global.innodb_thread_concurrency;
+@@global.innodb_thread_concurrency
+1000
 '#----------------------FN_DYNVARS_046_05------------------------#'
 SELECT @@global.innodb_thread_concurrency =
 VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
diff --git a/mysql-test/suite/sys_vars/r/innodb_undo_directory_basic.result b/mysql-test/suite/sys_vars/r/innodb_undo_directory_basic.result
index e7d7cef67c7..fbafe653d29 100644
--- a/mysql-test/suite/sys_vars/r/innodb_undo_directory_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_undo_directory_basic.result
@@ -1,7 +1,7 @@
-SELECT @@GLOBAL.innodb_undo_directory;
-@@GLOBAL.innodb_undo_directory
-.
-. Expected
+SELECT COUNT(@@GLOBAL.innodb_undo_directory);
+COUNT(@@GLOBAL.innodb_undo_directory)
+1
+1 Expected
 SET @@GLOBAL.innodb_undo_directory="/tmp";
 ERROR HY000: Variable 'innodb_undo_directory' is a read only variable
 Expected error 'Read only variable'
@@ -9,12 +9,12 @@ SELECT COUNT(@@GLOBAL.innodb_undo_directory);
 COUNT(@@GLOBAL.innodb_undo_directory)
 1
 1 Expected
-SELECT VARIABLE_VALUE
+SELECT @@GLOBAL.innodb_undo_directory = VARIABLE_VALUE
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
 WHERE VARIABLE_NAME='innodb_undo_directory';
-VARIABLE_VALUE
-.
-. Expected
+@@GLOBAL.innodb_undo_directory = VARIABLE_VALUE
+1
+1 Expected
 SELECT COUNT(@@GLOBAL.innodb_undo_directory);
 COUNT(@@GLOBAL.innodb_undo_directory)
 1
diff --git a/mysql-test/suite/sys_vars/r/innodb_undo_log_truncate_basic.result b/mysql-test/suite/sys_vars/r/innodb_undo_log_truncate_basic.result
new file mode 100644
index 00000000000..eb42f4965ea
--- /dev/null
+++ b/mysql-test/suite/sys_vars/r/innodb_undo_log_truncate_basic.result
@@ -0,0 +1,69 @@
+SET @start_global_value = @@global.innodb_undo_log_truncate;
+SELECT @start_global_value;
+@start_global_value
+0
+'#---------------------BS_STVARS_028_01----------------------#'
+SELECT COUNT(@@GLOBAL.innodb_undo_log_truncate);
+COUNT(@@GLOBAL.innodb_undo_log_truncate)
+1
+1 Expected
+'#---------------------BS_STVARS_028_02----------------------#'
+SET @@global.innodb_undo_log_truncate = 0;
+SELECT @@global.innodb_undo_log_truncate;
+@@global.innodb_undo_log_truncate
+0
+SET @@global.innodb_undo_log_truncate ='On' ;
+SELECT @@global.innodb_undo_log_truncate;
+@@global.innodb_undo_log_truncate
+1
+SET @@global.innodb_undo_log_truncate ='Off' ;
+SELECT @@global.innodb_undo_log_truncate;
+@@global.innodb_undo_log_truncate
+0
+SET @@global.innodb_undo_log_truncate = 1;
+SELECT @@global.innodb_undo_log_truncate;
+@@global.innodb_undo_log_truncate
+1
+'#---------------------BS_STVARS_028_03----------------------#'
+SELECT IF(@@GLOBAL.innodb_undo_log_truncate,'ON','OFF') = VARIABLE_VALUE
+FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
+WHERE VARIABLE_NAME='innodb_undo_log_truncate';
+IF(@@GLOBAL.innodb_undo_log_truncate,'ON','OFF') = VARIABLE_VALUE
+1
+1 Expected
+SELECT COUNT(@@GLOBAL.innodb_undo_log_truncate);
+COUNT(@@GLOBAL.innodb_undo_log_truncate)
+1
+1 Expected
+SELECT COUNT(VARIABLE_VALUE)
+FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
+WHERE VARIABLE_NAME='innodb_undo_log_truncate';
+COUNT(VARIABLE_VALUE)
+1
+1 Expected
+'#---------------------BS_STVARS_028_04----------------------#'
+SELECT @@innodb_undo_log_truncate = @@GLOBAL.innodb_undo_log_truncate;
+@@innodb_undo_log_truncate = @@GLOBAL.innodb_undo_log_truncate
+1
+1 Expected
+'#---------------------BS_STVARS_028_05----------------------#'
+SELECT COUNT(@@innodb_undo_log_truncate);
+COUNT(@@innodb_undo_log_truncate)
+1
+1 Expected
+SELECT COUNT(@@local.innodb_undo_log_truncate);
+ERROR HY000: Variable 'innodb_undo_log_truncate' is a GLOBAL variable
+Expected error 'Variable is a GLOBAL variable'
+SELECT COUNT(@@SESSION.innodb_undo_log_truncate);
+ERROR HY000: Variable 'innodb_undo_log_truncate' is a GLOBAL variable
+Expected error 'Variable is a GLOBAL variable'
+SELECT COUNT(@@GLOBAL.innodb_undo_log_truncate);
+COUNT(@@GLOBAL.innodb_undo_log_truncate)
+1
+1 Expected
+SELECT innodb_undo_log_truncate = @@SESSION.innodb_undo_log_truncate;
+ERROR 42S22: Unknown column 'innodb_undo_log_truncate' in 'field list'
+SET @@global.innodb_undo_log_truncate = @start_global_value;
+SELECT @@global.innodb_undo_log_truncate;
+@@global.innodb_undo_log_truncate
+0
diff --git a/mysql-test/suite/sys_vars/r/innodb_undo_tablespaces_basic.result b/mysql-test/suite/sys_vars/r/innodb_undo_tablespaces_basic.result
index 6130484ad86..c7e0b21a12b 100644
--- a/mysql-test/suite/sys_vars/r/innodb_undo_tablespaces_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_undo_tablespaces_basic.result
@@ -1,7 +1,6 @@
-SELECT @@GLOBAL.innodb_undo_tablespaces;
-@@GLOBAL.innodb_undo_tablespaces
-0
-0 Expected
+SELECT @@GLOBAL.innodb_undo_tablespaces >= 0;
+@@GLOBAL.innodb_undo_tablespaces >= 0
+1
 SET @@GLOBAL.innodb_undo_tablespaces=128;
 ERROR HY000: Variable 'innodb_undo_tablespaces' is a read only variable
 Expected error 'Read only variable'
@@ -9,10 +8,7 @@ SELECT COUNT(@@GLOBAL.innodb_undo_tablespaces);
 COUNT(@@GLOBAL.innodb_undo_tablespaces)
 1
 1 Expected
-SELECT VARIABLE_VALUE
-FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
-WHERE VARIABLE_NAME='innodb_undo_tablespaces';
-VARIABLE_VALUE
+DIFFERENCE
 0
 0 Expected
 SELECT @@innodb_undo_tablespaces = @@GLOBAL.innodb_undo_tablespaces;
diff --git a/mysql-test/suite/sys_vars/r/innodb_use_sys_malloc_basic.result b/mysql-test/suite/sys_vars/r/innodb_use_sys_malloc_basic.result
deleted file mode 100644
index 70ecef72042..00000000000
--- a/mysql-test/suite/sys_vars/r/innodb_use_sys_malloc_basic.result
+++ /dev/null
@@ -1,22 +0,0 @@
-Valid values are 'ON' and 'OFF' 
-select @@global.innodb_use_sys_malloc;
-@@global.innodb_use_sys_malloc
-1
-select @@session.innodb_use_sys_malloc;
-ERROR HY000: Variable 'innodb_use_sys_malloc' is a GLOBAL variable
-show global variables like 'innodb_use_sys_malloc';
-Variable_name	Value
-innodb_use_sys_malloc	ON
-show session variables like 'innodb_use_sys_malloc';
-Variable_name	Value
-innodb_use_sys_malloc	ON
-select * from information_schema.global_variables where variable_name='innodb_use_sys_malloc';
-VARIABLE_NAME	VARIABLE_VALUE
-INNODB_USE_SYS_MALLOC	ON
-select * from information_schema.session_variables where variable_name='innodb_use_sys_malloc';
-VARIABLE_NAME	VARIABLE_VALUE
-INNODB_USE_SYS_MALLOC	ON
-set global innodb_use_sys_malloc=1;
-ERROR HY000: Variable 'innodb_use_sys_malloc' is a read only variable
-set session innodb_use_sys_malloc=1;
-ERROR HY000: Variable 'innodb_use_sys_malloc' is a read only variable
diff --git a/mysql-test/suite/sys_vars/r/sysvars_innodb,32bit,xtradb.rdiff b/mysql-test/suite/sys_vars/r/sysvars_innodb,32bit,xtradb.rdiff-disabled
similarity index 100%
rename from mysql-test/suite/sys_vars/r/sysvars_innodb,32bit,xtradb.rdiff
rename to mysql-test/suite/sys_vars/r/sysvars_innodb,32bit,xtradb.rdiff-disabled
diff --git a/mysql-test/suite/sys_vars/r/sysvars_innodb,32bit.rdiff b/mysql-test/suite/sys_vars/r/sysvars_innodb,32bit.rdiff
index ccdaa3ec644..b2556feb1ad 100644
--- a/mysql-test/suite/sys_vars/r/sysvars_innodb,32bit.rdiff
+++ b/mysql-test/suite/sys_vars/r/sysvars_innodb,32bit.rdiff
@@ -1,6 +1,15 @@
---- suite/sys_vars/r/sysvars_innodb.result
-+++ suite/sys_vars/r/sysvars_innodb.result
+--- r/sysvars_innodb.result
++++ r/sysvars_innodb,32bit.result~
 @@ -53,7 +53,7 @@
+ GLOBAL_VALUE_ORIGIN	COMPILE-TIME
+ DEFAULT_VALUE	8
+ VARIABLE_SCOPE	GLOBAL
+-VARIABLE_TYPE	BIGINT UNSIGNED
++VARIABLE_TYPE	INT UNSIGNED
+ VARIABLE_COMMENT	Number of InnoDB Adapative Hash Index Partitions. (default = 8). 
+ NUMERIC_MIN_VALUE	1
+ NUMERIC_MAX_VALUE	512
+@@ -67,7 +67,7 @@
  GLOBAL_VALUE_ORIGIN	COMPILE-TIME
  DEFAULT_VALUE	150000
  VARIABLE_SCOPE	GLOBAL
@@ -9,19 +18,6 @@
  VARIABLE_COMMENT	The upper limit of the sleep delay in usec. Value of 0 disables it.
  NUMERIC_MIN_VALUE	0
  NUMERIC_MAX_VALUE	1000000
-@@ -67,10 +67,10 @@
- GLOBAL_VALUE_ORIGIN	COMPILE-TIME
- DEFAULT_VALUE	8388608
- VARIABLE_SCOPE	GLOBAL
--VARIABLE_TYPE	BIGINT
-+VARIABLE_TYPE	INT
- VARIABLE_COMMENT	DEPRECATED. This option may be removed in future releases, together with the option innodb_use_sys_malloc and with the InnoDB's internal memory allocator. Size of a memory pool InnoDB uses to store data dictionary information and other internal data structures.
- NUMERIC_MIN_VALUE	524288
--NUMERIC_MAX_VALUE	9223372036854775807
-+NUMERIC_MAX_VALUE	2147483647
- NUMERIC_BLOCK_SIZE	1024
- ENUM_VALUE_LIST	NULL
- READ_ONLY	YES
 @@ -81,7 +81,7 @@
  GLOBAL_VALUE_ORIGIN	COMPILE-TIME
  DEFAULT_VALUE	5
@@ -55,28 +51,41 @@
  VARIABLE_SCOPE	GLOBAL
 -VARIABLE_TYPE	BIGINT
 +VARIABLE_TYPE	INT
- VARIABLE_COMMENT	The AUTOINC lock modes supported by InnoDB:               0 => Old style AUTOINC locking (for backward compatibility)                                           1 => New style AUTOINC locking                            2 => No AUTOINC locking (unsafe for SBR)
+ VARIABLE_COMMENT	The AUTOINC lock modes supported by InnoDB: 0 => Old style AUTOINC locking (for backward compatibility); 1 => New style AUTOINC locking; 2 => No AUTOINC locking (unsafe for SBR)
  NUMERIC_MIN_VALUE	0
  NUMERIC_MAX_VALUE	2
-@@ -263,7 +263,7 @@
+@@ -249,10 +249,10 @@
  GLOBAL_VALUE_ORIGIN	COMPILE-TIME
- DEFAULT_VALUE	100
+ DEFAULT_VALUE	134217728
  VARIABLE_SCOPE	GLOBAL
 -VARIABLE_TYPE	BIGINT UNSIGNED
 +VARIABLE_TYPE	INT UNSIGNED
- VARIABLE_COMMENT	Dump only the hottest N% of each buffer pool, defaults to 100
+ VARIABLE_COMMENT	Size of a single memory chunk within each buffer pool instance for resizing buffer pool. Online buffer pool resizing happens at this granularity. 0 means disable resizing buffer pool.
+ NUMERIC_MIN_VALUE	1048576
+-NUMERIC_MAX_VALUE	9223372036854775807
++NUMERIC_MAX_VALUE	2147483647
+ NUMERIC_BLOCK_SIZE	1048576
+ ENUM_VALUE_LIST	NULL
+ READ_ONLY	YES
+@@ -291,7 +291,7 @@
+ GLOBAL_VALUE_ORIGIN	COMPILE-TIME
+ DEFAULT_VALUE	25
+ VARIABLE_SCOPE	GLOBAL
+-VARIABLE_TYPE	BIGINT UNSIGNED
++VARIABLE_TYPE	INT UNSIGNED
+ VARIABLE_COMMENT	Dump only the hottest N% of each buffer pool, defaults to 25
  NUMERIC_MIN_VALUE	1
  NUMERIC_MAX_VALUE	100
-@@ -305,7 +305,7 @@
+@@ -333,7 +333,7 @@
  GLOBAL_VALUE_ORIGIN	COMPILE-TIME
  DEFAULT_VALUE	0
  VARIABLE_SCOPE	GLOBAL
--VARIABLE_TYPE	BIGINT
-+VARIABLE_TYPE	INT
+-VARIABLE_TYPE	BIGINT UNSIGNED
++VARIABLE_TYPE	INT UNSIGNED
  VARIABLE_COMMENT	Number of buffer pool instances, set to higher value on high-end machines to increase scalability
  NUMERIC_MIN_VALUE	0
  NUMERIC_MAX_VALUE	64
-@@ -375,7 +375,7 @@
+@@ -403,7 +403,7 @@
  GLOBAL_VALUE_ORIGIN	COMPILE-TIME
  DEFAULT_VALUE	0
  VARIABLE_SCOPE	GLOBAL
@@ -85,7 +94,7 @@
  VARIABLE_COMMENT	A number between [0, 100] that tells how oftern buffer pool dump status in percentages should be printed. E.g. 10 means that buffer pool dump status is printed when every 10% of number of buffer pool pages are dumped. Default is 0 (only start and end status is printed).
  NUMERIC_MIN_VALUE	0
  NUMERIC_MAX_VALUE	100
-@@ -487,7 +487,7 @@
+@@ -515,7 +515,7 @@
  GLOBAL_VALUE_ORIGIN	COMPILE-TIME
  DEFAULT_VALUE	0
  VARIABLE_SCOPE	GLOBAL
@@ -94,7 +103,7 @@
  VARIABLE_COMMENT	Helps in performance tuning in heavily concurrent environments.
  NUMERIC_MIN_VALUE	0
  NUMERIC_MAX_VALUE	1000
-@@ -515,7 +515,7 @@
+@@ -543,7 +543,7 @@
  GLOBAL_VALUE_ORIGIN	COMPILE-TIME
  DEFAULT_VALUE	5
  VARIABLE_SCOPE	GLOBAL
@@ -103,7 +112,7 @@
  VARIABLE_COMMENT	If the compression failure rate of a table is greater than this number more padding is added to the pages to reduce the failures. A value of zero implies no padding
  NUMERIC_MIN_VALUE	0
  NUMERIC_MAX_VALUE	100
-@@ -543,7 +543,7 @@
+@@ -571,7 +571,7 @@
  GLOBAL_VALUE_ORIGIN	COMPILE-TIME
  DEFAULT_VALUE	50
  VARIABLE_SCOPE	GLOBAL
@@ -112,7 +121,7 @@
  VARIABLE_COMMENT	Percentage of empty space on a data page that can be reserved to make the page compressible.
  NUMERIC_MIN_VALUE	0
  NUMERIC_MAX_VALUE	75
-@@ -557,10 +557,10 @@
+@@ -599,10 +599,10 @@
  GLOBAL_VALUE_ORIGIN	COMPILE-TIME
  DEFAULT_VALUE	5000
  VARIABLE_SCOPE	GLOBAL
@@ -125,7 +134,7 @@
  NUMERIC_BLOCK_SIZE	0
  ENUM_VALUE_LIST	NULL
  READ_ONLY	NO
-@@ -753,7 +753,7 @@
+@@ -837,7 +837,7 @@
  GLOBAL_VALUE_ORIGIN	COMPILE-TIME
  DEFAULT_VALUE	120
  VARIABLE_SCOPE	GLOBAL
@@ -134,7 +143,7 @@
  VARIABLE_COMMENT	Number of pages reserved in doublewrite buffer for batch flushing
  NUMERIC_MIN_VALUE	1
  NUMERIC_MAX_VALUE	127
-@@ -837,7 +837,7 @@
+@@ -921,7 +921,7 @@
  GLOBAL_VALUE_ORIGIN	COMPILE-TIME
  DEFAULT_VALUE	1
  VARIABLE_SCOPE	GLOBAL
@@ -143,7 +152,7 @@
  VARIABLE_COMMENT	Speeds up the shutdown process of the InnoDB storage engine. Possible values are 0, 1 (faster) or 2 (fastest - crash-like).
  NUMERIC_MIN_VALUE	0
  NUMERIC_MAX_VALUE	2
-@@ -851,7 +851,7 @@
+@@ -935,7 +935,7 @@
  GLOBAL_VALUE_ORIGIN	COMPILE-TIME
  DEFAULT_VALUE	600
  VARIABLE_SCOPE	GLOBAL
@@ -152,7 +161,16 @@
  VARIABLE_COMMENT	Maximum number of seconds that semaphore times out in InnoDB.
  NUMERIC_MIN_VALUE	1
  NUMERIC_MAX_VALUE	4294967295
-@@ -921,7 +921,7 @@
+@@ -1005,7 +1005,7 @@
+ GLOBAL_VALUE_ORIGIN	COMPILE-TIME
+ DEFAULT_VALUE	100
+ VARIABLE_SCOPE	GLOBAL
+-VARIABLE_TYPE	BIGINT
++VARIABLE_TYPE	INT
+ VARIABLE_COMMENT	Percentage of B-tree page filled during bulk insert
+ NUMERIC_MIN_VALUE	10
+ NUMERIC_MAX_VALUE	100
+@@ -1019,7 +1019,7 @@
  GLOBAL_VALUE_ORIGIN	COMPILE-TIME
  DEFAULT_VALUE	0
  VARIABLE_SCOPE	GLOBAL
@@ -161,7 +179,7 @@
  VARIABLE_COMMENT	Make the first page of the given tablespace dirty.
  NUMERIC_MIN_VALUE	0
  NUMERIC_MAX_VALUE	4294967295
-@@ -935,7 +935,7 @@
+@@ -1033,7 +1033,7 @@
  GLOBAL_VALUE_ORIGIN	COMPILE-TIME
  DEFAULT_VALUE	30
  VARIABLE_SCOPE	GLOBAL
@@ -170,7 +188,7 @@
  VARIABLE_COMMENT	Number of iterations over which the background flushing is averaged.
  NUMERIC_MIN_VALUE	1
  NUMERIC_MAX_VALUE	1000
-@@ -963,7 +963,7 @@
+@@ -1061,7 +1061,7 @@
  GLOBAL_VALUE_ORIGIN	COMPILE-TIME
  DEFAULT_VALUE	1
  VARIABLE_SCOPE	GLOBAL
@@ -179,7 +197,7 @@
  VARIABLE_COMMENT	Controls the durability/speed trade-off for commits. Set to 0 (write and flush redo log to disk only once per second), 1 (flush to disk at each commit), 2 (write to log at commit but flush to disk only once per second) or 3 (flush to disk at prepare and at commit, slower and usually redundant). 1 and 3 guarantees that after a crash, committed transactions will not be lost and will be consistent with the binlog and other transactional engines. 2 can get inconsistent and lose transactions if there is a power failure or kernel crash but not if mysqld crashes. 0 has no guarantees in case of crash. 0 and 2 can be faster than 1 or 3.
  NUMERIC_MIN_VALUE	0
  NUMERIC_MAX_VALUE	3
-@@ -991,7 +991,7 @@
+@@ -1089,7 +1089,7 @@
  GLOBAL_VALUE_ORIGIN	COMPILE-TIME
  DEFAULT_VALUE	1
  VARIABLE_SCOPE	GLOBAL
@@ -188,7 +206,7 @@
  VARIABLE_COMMENT	Set to 0 (don't flush neighbors from buffer pool), 1 (flush contiguous neighbors from buffer pool) or 2 (flush neighbors from buffer pool), when flushing a block
  NUMERIC_MIN_VALUE	0
  NUMERIC_MAX_VALUE	2
-@@ -1033,7 +1033,7 @@
+@@ -1145,7 +1145,7 @@
  GLOBAL_VALUE_ORIGIN	COMPILE-TIME
  DEFAULT_VALUE	0
  VARIABLE_SCOPE	GLOBAL
@@ -197,7 +215,7 @@
  VARIABLE_COMMENT	Helps to save your data in case the disk image of the database becomes corrupt.
  NUMERIC_MIN_VALUE	0
  NUMERIC_MAX_VALUE	6
-@@ -1047,7 +1047,7 @@
+@@ -1159,7 +1159,7 @@
  GLOBAL_VALUE_ORIGIN	COMPILE-TIME
  DEFAULT_VALUE	0
  VARIABLE_SCOPE	GLOBAL
@@ -205,8 +223,8 @@
 +VARIABLE_TYPE	INT UNSIGNED
  VARIABLE_COMMENT	Kills the server during crash recovery.
  NUMERIC_MIN_VALUE	0
- NUMERIC_MAX_VALUE	10
-@@ -1075,7 +1075,7 @@
+ NUMERIC_MAX_VALUE	100
+@@ -1187,7 +1187,7 @@
  GLOBAL_VALUE_ORIGIN	COMPILE-TIME
  DEFAULT_VALUE	8000000
  VARIABLE_SCOPE	GLOBAL
@@ -215,7 +233,7 @@
  VARIABLE_COMMENT	InnoDB Fulltext search cache size in bytes
  NUMERIC_MIN_VALUE	1600000
  NUMERIC_MAX_VALUE	80000000
-@@ -1117,7 +1117,7 @@
+@@ -1229,7 +1229,7 @@
  GLOBAL_VALUE_ORIGIN	COMPILE-TIME
  DEFAULT_VALUE	84
  VARIABLE_SCOPE	GLOBAL
@@ -224,7 +242,7 @@
  VARIABLE_COMMENT	InnoDB Fulltext search maximum token size in characters
  NUMERIC_MIN_VALUE	10
  NUMERIC_MAX_VALUE	84
-@@ -1131,7 +1131,7 @@
+@@ -1243,7 +1243,7 @@
  GLOBAL_VALUE_ORIGIN	COMPILE-TIME
  DEFAULT_VALUE	3
  VARIABLE_SCOPE	GLOBAL
@@ -233,7 +251,7 @@
  VARIABLE_COMMENT	InnoDB Fulltext search minimum token size in characters
  NUMERIC_MIN_VALUE	0
  NUMERIC_MAX_VALUE	16
-@@ -1145,7 +1145,7 @@
+@@ -1257,7 +1257,7 @@
  GLOBAL_VALUE_ORIGIN	COMPILE-TIME
  DEFAULT_VALUE	2000
  VARIABLE_SCOPE	GLOBAL
@@ -242,7 +260,7 @@
  VARIABLE_COMMENT	InnoDB Fulltext search number of words to optimize for each optimize table call 
  NUMERIC_MIN_VALUE	1000
  NUMERIC_MAX_VALUE	10000
-@@ -1159,7 +1159,7 @@
+@@ -1271,7 +1271,7 @@
  GLOBAL_VALUE_ORIGIN	COMPILE-TIME
  DEFAULT_VALUE	2000000000
  VARIABLE_SCOPE	GLOBAL
@@ -251,7 +269,7 @@
  VARIABLE_COMMENT	InnoDB Fulltext search query result cache limit in bytes
  NUMERIC_MIN_VALUE	1000000
  NUMERIC_MAX_VALUE	4294967295
-@@ -1187,7 +1187,7 @@
+@@ -1299,7 +1299,7 @@
  GLOBAL_VALUE_ORIGIN	COMPILE-TIME
  DEFAULT_VALUE	2
  VARIABLE_SCOPE	GLOBAL
@@ -260,7 +278,7 @@
  VARIABLE_COMMENT	InnoDB Fulltext search parallel sort degree, will round up to nearest power of 2 number
  NUMERIC_MIN_VALUE	1
  NUMERIC_MAX_VALUE	16
-@@ -1201,7 +1201,7 @@
+@@ -1313,7 +1313,7 @@
  GLOBAL_VALUE_ORIGIN	COMPILE-TIME
  DEFAULT_VALUE	640000000
  VARIABLE_SCOPE	GLOBAL
@@ -269,7 +287,7 @@
  VARIABLE_COMMENT	Total memory allocated for InnoDB Fulltext Search cache
  NUMERIC_MIN_VALUE	32000000
  NUMERIC_MAX_VALUE	1600000000
-@@ -1229,7 +1229,7 @@
+@@ -1341,7 +1341,7 @@
  GLOBAL_VALUE_ORIGIN	COMPILE-TIME
  DEFAULT_VALUE	100
  VARIABLE_SCOPE	GLOBAL
@@ -278,7 +296,7 @@
  VARIABLE_COMMENT	Up to what percentage of dirty pages should be flushed when innodb finds it has spare resources to do so.
  NUMERIC_MIN_VALUE	0
  NUMERIC_MAX_VALUE	100
-@@ -1271,10 +1271,10 @@
+@@ -1383,10 +1383,10 @@
  GLOBAL_VALUE_ORIGIN	COMPILE-TIME
  DEFAULT_VALUE	200
  VARIABLE_SCOPE	GLOBAL
@@ -291,7 +309,7 @@
  NUMERIC_BLOCK_SIZE	0
  ENUM_VALUE_LIST	NULL
  READ_ONLY	NO
-@@ -1283,12 +1283,12 @@
+@@ -1395,12 +1395,12 @@
  SESSION_VALUE	NULL
  GLOBAL_VALUE	2000
  GLOBAL_VALUE_ORIGIN	COMPILE-TIME
@@ -307,7 +325,7 @@
  NUMERIC_BLOCK_SIZE	0
  ENUM_VALUE_LIST	NULL
  READ_ONLY	NO
-@@ -1341,7 +1341,7 @@
+@@ -1453,7 +1453,7 @@
  GLOBAL_VALUE_ORIGIN	COMPILE-TIME
  DEFAULT_VALUE	50
  VARIABLE_SCOPE	SESSION
@@ -316,7 +334,7 @@
  VARIABLE_COMMENT	Timeout in seconds an InnoDB transaction may wait for a lock before being rolled back. Values above 100000000 disable the timeout.
  NUMERIC_MIN_VALUE	1
  NUMERIC_MAX_VALUE	1073741824
-@@ -1355,10 +1355,10 @@
+@@ -1467,10 +1467,10 @@
  GLOBAL_VALUE_ORIGIN	CONFIG
  DEFAULT_VALUE	16777216
  VARIABLE_SCOPE	GLOBAL
@@ -329,7 +347,7 @@
  NUMERIC_BLOCK_SIZE	1024
  ENUM_VALUE_LIST	NULL
  READ_ONLY	YES
-@@ -1397,7 +1397,7 @@
+@@ -1523,7 +1523,7 @@
  GLOBAL_VALUE_ORIGIN	CONFIG
  DEFAULT_VALUE	2
  VARIABLE_SCOPE	GLOBAL
@@ -338,7 +356,16 @@
  VARIABLE_COMMENT	Number of log files in the log group. InnoDB writes to the files in a circular fashion.
  NUMERIC_MIN_VALUE	2
  NUMERIC_MAX_VALUE	100
-@@ -1439,10 +1439,10 @@
+@@ -1565,7 +1565,7 @@
+ GLOBAL_VALUE_ORIGIN	COMPILE-TIME
+ DEFAULT_VALUE	8192
+ VARIABLE_SCOPE	GLOBAL
+-VARIABLE_TYPE	BIGINT UNSIGNED
++VARIABLE_TYPE	INT UNSIGNED
+ VARIABLE_COMMENT	Redo log write ahead unit size to avoid read-on-write, it should match the OS cache block IO size
+ NUMERIC_MIN_VALUE	512
+ NUMERIC_MAX_VALUE	16384
+@@ -1579,10 +1579,10 @@
  GLOBAL_VALUE_ORIGIN	CONFIG
  DEFAULT_VALUE	1024
  VARIABLE_SCOPE	GLOBAL
@@ -351,7 +378,7 @@
  NUMERIC_BLOCK_SIZE	0
  ENUM_VALUE_LIST	NULL
  READ_ONLY	NO
-@@ -1481,10 +1481,10 @@
+@@ -1635,10 +1635,10 @@
  GLOBAL_VALUE_ORIGIN	COMPILE-TIME
  DEFAULT_VALUE	0
  VARIABLE_SCOPE	GLOBAL
@@ -364,7 +391,7 @@
  NUMERIC_BLOCK_SIZE	0
  ENUM_VALUE_LIST	NULL
  READ_ONLY	NO
-@@ -1495,7 +1495,7 @@
+@@ -1649,7 +1649,7 @@
  GLOBAL_VALUE_ORIGIN	COMPILE-TIME
  DEFAULT_VALUE	0
  VARIABLE_SCOPE	GLOBAL
@@ -373,16 +400,7 @@
  VARIABLE_COMMENT	Maximum delay of user threads in micro-seconds
  NUMERIC_MIN_VALUE	0
  NUMERIC_MAX_VALUE	10000000
-@@ -1509,7 +1509,7 @@
- GLOBAL_VALUE_ORIGIN	COMPILE-TIME
- DEFAULT_VALUE	0
- VARIABLE_SCOPE	GLOBAL
--VARIABLE_TYPE	BIGINT
-+VARIABLE_TYPE	INT
- VARIABLE_COMMENT	Number of identical copies of log groups we keep for the database. Currently this should be set to 1.
- NUMERIC_MIN_VALUE	0
- NUMERIC_MAX_VALUE	10
-@@ -1579,7 +1579,7 @@
+@@ -1747,7 +1747,7 @@
  GLOBAL_VALUE_ORIGIN	COMPILE-TIME
  DEFAULT_VALUE	8
  VARIABLE_SCOPE	GLOBAL
@@ -391,7 +409,7 @@
  VARIABLE_COMMENT	Number of multi-threaded flush threads
  NUMERIC_MIN_VALUE	1
  NUMERIC_MAX_VALUE	64
-@@ -1635,10 +1635,10 @@
+@@ -1803,10 +1803,10 @@
  GLOBAL_VALUE_ORIGIN	COMPILE-TIME
  DEFAULT_VALUE	0
  VARIABLE_SCOPE	GLOBAL
@@ -404,7 +422,16 @@
  NUMERIC_BLOCK_SIZE	0
  ENUM_VALUE_LIST	NULL
  READ_ONLY	YES
-@@ -1663,7 +1663,7 @@
+@@ -1831,7 +1831,7 @@
+ GLOBAL_VALUE_ORIGIN	COMPILE-TIME
+ DEFAULT_VALUE	4
+ VARIABLE_SCOPE	GLOBAL
+-VARIABLE_TYPE	BIGINT UNSIGNED
++VARIABLE_TYPE	INT UNSIGNED
+ VARIABLE_COMMENT	Page cleaner threads can be from 1 to 64. Default is 4.
+ NUMERIC_MIN_VALUE	1
+ NUMERIC_MAX_VALUE	64
+@@ -1859,7 +1859,7 @@
  GLOBAL_VALUE_ORIGIN	COMPILE-TIME
  DEFAULT_VALUE	16
  VARIABLE_SCOPE	GLOBAL
@@ -413,7 +440,7 @@
  VARIABLE_COMMENT	Number of rw_locks protecting buffer pool page_hash. Rounded up to the next power of 2
  NUMERIC_MIN_VALUE	1
  NUMERIC_MAX_VALUE	1024
-@@ -1677,7 +1677,7 @@
+@@ -1873,7 +1873,7 @@
  GLOBAL_VALUE_ORIGIN	COMPILE-TIME
  DEFAULT_VALUE	16384
  VARIABLE_SCOPE	GLOBAL
@@ -422,7 +449,7 @@
  VARIABLE_COMMENT	Page size to use for all InnoDB tablespaces.
  NUMERIC_MIN_VALUE	4096
  NUMERIC_MAX_VALUE	65536
-@@ -1719,7 +1719,7 @@
+@@ -1915,7 +1915,7 @@
  GLOBAL_VALUE_ORIGIN	COMPILE-TIME
  DEFAULT_VALUE	300
  VARIABLE_SCOPE	GLOBAL
@@ -431,16 +458,25 @@
  VARIABLE_COMMENT	Number of UNDO log pages to purge in one batch from the history list.
  NUMERIC_MIN_VALUE	1
  NUMERIC_MAX_VALUE	5000
-@@ -1761,7 +1761,7 @@
+@@ -1929,7 +1929,7 @@
  GLOBAL_VALUE_ORIGIN	COMPILE-TIME
- DEFAULT_VALUE	1
+ DEFAULT_VALUE	128
  VARIABLE_SCOPE	GLOBAL
 -VARIABLE_TYPE	BIGINT UNSIGNED
 +VARIABLE_TYPE	INT UNSIGNED
- VARIABLE_COMMENT	Purge threads can be from 1 to 32. Default is 1.
+ VARIABLE_COMMENT	Dictates rate at which UNDO records are purged. Value N means purge rollback segment(s) on every Nth iteration of purge invocation
+ NUMERIC_MIN_VALUE	1
+ NUMERIC_MAX_VALUE	128
+@@ -1971,7 +1971,7 @@
+ GLOBAL_VALUE_ORIGIN	COMPILE-TIME
+ DEFAULT_VALUE	4
+ VARIABLE_SCOPE	GLOBAL
+-VARIABLE_TYPE	BIGINT UNSIGNED
++VARIABLE_TYPE	INT UNSIGNED
+ VARIABLE_COMMENT	Purge threads can be from 1 to 32. Default is 4.
  NUMERIC_MIN_VALUE	1
  NUMERIC_MAX_VALUE	32
-@@ -1789,7 +1789,7 @@
+@@ -1999,7 +1999,7 @@
  GLOBAL_VALUE_ORIGIN	COMPILE-TIME
  DEFAULT_VALUE	56
  VARIABLE_SCOPE	GLOBAL
@@ -449,7 +485,7 @@
  VARIABLE_COMMENT	Number of pages that must be accessed sequentially for InnoDB to trigger a readahead.
  NUMERIC_MIN_VALUE	0
  NUMERIC_MAX_VALUE	64
-@@ -1803,7 +1803,7 @@
+@@ -2013,7 +2013,7 @@
  GLOBAL_VALUE_ORIGIN	CONFIG
  DEFAULT_VALUE	4
  VARIABLE_SCOPE	GLOBAL
@@ -458,7 +494,7 @@
  VARIABLE_COMMENT	Number of background read I/O threads in InnoDB.
  NUMERIC_MIN_VALUE	1
  NUMERIC_MAX_VALUE	64
-@@ -1831,10 +1831,10 @@
+@@ -2041,10 +2041,10 @@
  GLOBAL_VALUE_ORIGIN	COMPILE-TIME
  DEFAULT_VALUE	0
  VARIABLE_SCOPE	GLOBAL
@@ -471,7 +507,7 @@
  NUMERIC_BLOCK_SIZE	0
  ENUM_VALUE_LIST	NULL
  READ_ONLY	NO
-@@ -1859,7 +1859,7 @@
+@@ -2069,7 +2069,7 @@
  GLOBAL_VALUE_ORIGIN	COMPILE-TIME
  DEFAULT_VALUE	128
  VARIABLE_SCOPE	GLOBAL
@@ -480,7 +516,7 @@
  VARIABLE_COMMENT	Number of undo logs to use (deprecated).
  NUMERIC_MIN_VALUE	1
  NUMERIC_MAX_VALUE	128
-@@ -1873,7 +1873,7 @@
+@@ -2083,7 +2083,7 @@
  GLOBAL_VALUE_ORIGIN	COMPILE-TIME
  DEFAULT_VALUE	0
  VARIABLE_SCOPE	GLOBAL
@@ -489,7 +525,7 @@
  VARIABLE_COMMENT	An InnoDB page number.
  NUMERIC_MIN_VALUE	0
  NUMERIC_MAX_VALUE	4294967295
-@@ -1929,7 +1929,7 @@
+@@ -2139,7 +2139,7 @@
  GLOBAL_VALUE_ORIGIN	COMPILE-TIME
  DEFAULT_VALUE	1048576
  VARIABLE_SCOPE	GLOBAL
@@ -498,7 +534,7 @@
  VARIABLE_COMMENT	Memory buffer size for index creation
  NUMERIC_MIN_VALUE	65536
  NUMERIC_MAX_VALUE	67108864
-@@ -1943,10 +1943,10 @@
+@@ -2153,10 +2153,10 @@
  GLOBAL_VALUE_ORIGIN	COMPILE-TIME
  DEFAULT_VALUE	6
  VARIABLE_SCOPE	GLOBAL
@@ -511,7 +547,7 @@
  NUMERIC_BLOCK_SIZE	0
  ENUM_VALUE_LIST	NULL
  READ_ONLY	NO
-@@ -2139,7 +2139,7 @@
+@@ -2349,7 +2349,7 @@
  GLOBAL_VALUE_ORIGIN	COMPILE-TIME
  DEFAULT_VALUE	1
  VARIABLE_SCOPE	GLOBAL
@@ -520,7 +556,7 @@
  VARIABLE_COMMENT	Size of the mutex/lock wait array.
  NUMERIC_MIN_VALUE	1
  NUMERIC_MAX_VALUE	1024
-@@ -2153,10 +2153,10 @@
+@@ -2377,10 +2377,10 @@
  GLOBAL_VALUE_ORIGIN	COMPILE-TIME
  DEFAULT_VALUE	30
  VARIABLE_SCOPE	GLOBAL
@@ -533,7 +569,7 @@
  NUMERIC_BLOCK_SIZE	0
  ENUM_VALUE_LIST	NULL
  READ_ONLY	NO
-@@ -2181,7 +2181,7 @@
+@@ -2419,7 +2419,7 @@
  GLOBAL_VALUE_ORIGIN	COMPILE-TIME
  DEFAULT_VALUE	0
  VARIABLE_SCOPE	GLOBAL
@@ -542,7 +578,7 @@
  VARIABLE_COMMENT	Helps in performance tuning in heavily concurrent environments. Sets the maximum number of threads allowed inside InnoDB. Value 0 will disable the thread throttling.
  NUMERIC_MIN_VALUE	0
  NUMERIC_MAX_VALUE	1000
-@@ -2195,7 +2195,7 @@
+@@ -2433,7 +2433,7 @@
  GLOBAL_VALUE_ORIGIN	COMPILE-TIME
  DEFAULT_VALUE	10000
  VARIABLE_SCOPE	GLOBAL
@@ -551,7 +587,7 @@
  VARIABLE_COMMENT	Time of innodb thread sleeping before joining InnoDB queue (usec). Value 0 disable a sleep
  NUMERIC_MIN_VALUE	0
  NUMERIC_MAX_VALUE	1000000
-@@ -2265,7 +2265,7 @@
+@@ -2503,7 +2503,7 @@
  GLOBAL_VALUE_ORIGIN	COMPILE-TIME
  DEFAULT_VALUE	128
  VARIABLE_SCOPE	GLOBAL
@@ -560,7 +596,7 @@
  VARIABLE_COMMENT	Number of undo logs to use.
  NUMERIC_MIN_VALUE	1
  NUMERIC_MAX_VALUE	128
-@@ -2279,7 +2279,7 @@
+@@ -2531,7 +2531,7 @@
  GLOBAL_VALUE_ORIGIN	COMPILE-TIME
  DEFAULT_VALUE	0
  VARIABLE_SCOPE	GLOBAL
@@ -568,8 +604,8 @@
 +VARIABLE_TYPE	INT UNSIGNED
  VARIABLE_COMMENT	Number of undo tablespaces to use. 
  NUMERIC_MIN_VALUE	0
- NUMERIC_MAX_VALUE	126
-@@ -2377,7 +2377,7 @@
+ NUMERIC_MAX_VALUE	95
+@@ -2615,7 +2615,7 @@
  GLOBAL_VALUE_ORIGIN	CONFIG
  DEFAULT_VALUE	4
  VARIABLE_SCOPE	GLOBAL
diff --git a/mysql-test/suite/sys_vars/r/sysvars_innodb,xtradb.rdiff b/mysql-test/suite/sys_vars/r/sysvars_innodb,xtradb.rdiff-disabled
similarity index 100%
rename from mysql-test/suite/sys_vars/r/sysvars_innodb,xtradb.rdiff
rename to mysql-test/suite/sys_vars/r/sysvars_innodb,xtradb.rdiff-disabled
diff --git a/mysql-test/suite/sys_vars/r/sysvars_innodb.result b/mysql-test/suite/sys_vars/r/sysvars_innodb.result
index 8ef0d449042..f586b7b294a 100644
--- a/mysql-test/suite/sys_vars/r/sysvars_innodb.result
+++ b/mysql-test/suite/sys_vars/r/sysvars_innodb.result
@@ -47,6 +47,20 @@ NUMERIC_BLOCK_SIZE	NULL
 ENUM_VALUE_LIST	OFF,ON
 READ_ONLY	NO
 COMMAND_LINE_ARGUMENT	OPTIONAL
+VARIABLE_NAME	INNODB_ADAPTIVE_HASH_INDEX_PARTS
+SESSION_VALUE	NULL
+GLOBAL_VALUE	8
+GLOBAL_VALUE_ORIGIN	COMPILE-TIME
+DEFAULT_VALUE	8
+VARIABLE_SCOPE	GLOBAL
+VARIABLE_TYPE	BIGINT UNSIGNED
+VARIABLE_COMMENT	Number of InnoDB Adapative Hash Index Partitions. (default = 8). 
+NUMERIC_MIN_VALUE	1
+NUMERIC_MAX_VALUE	512
+NUMERIC_BLOCK_SIZE	0
+ENUM_VALUE_LIST	NULL
+READ_ONLY	YES
+COMMAND_LINE_ARGUMENT	OPTIONAL
 VARIABLE_NAME	INNODB_ADAPTIVE_MAX_SLEEP_DELAY
 SESSION_VALUE	NULL
 GLOBAL_VALUE	150000
@@ -61,20 +75,6 @@ NUMERIC_BLOCK_SIZE	0
 ENUM_VALUE_LIST	NULL
 READ_ONLY	NO
 COMMAND_LINE_ARGUMENT	REQUIRED
-VARIABLE_NAME	INNODB_ADDITIONAL_MEM_POOL_SIZE
-SESSION_VALUE	NULL
-GLOBAL_VALUE	8388608
-GLOBAL_VALUE_ORIGIN	COMPILE-TIME
-DEFAULT_VALUE	8388608
-VARIABLE_SCOPE	GLOBAL
-VARIABLE_TYPE	BIGINT
-VARIABLE_COMMENT	DEPRECATED. This option may be removed in future releases, together with the option innodb_use_sys_malloc and with the InnoDB's internal memory allocator. Size of a memory pool InnoDB uses to store data dictionary information and other internal data structures.
-NUMERIC_MIN_VALUE	524288
-NUMERIC_MAX_VALUE	9223372036854775807
-NUMERIC_BLOCK_SIZE	1024
-ENUM_VALUE_LIST	NULL
-READ_ONLY	YES
-COMMAND_LINE_ARGUMENT	REQUIRED
 VARIABLE_NAME	INNODB_API_BK_COMMIT_INTERVAL
 SESSION_VALUE	NULL
 GLOBAL_VALUE	5
@@ -166,13 +166,27 @@ GLOBAL_VALUE_ORIGIN	COMPILE-TIME
 DEFAULT_VALUE	1
 VARIABLE_SCOPE	GLOBAL
 VARIABLE_TYPE	BIGINT
-VARIABLE_COMMENT	The AUTOINC lock modes supported by InnoDB:               0 => Old style AUTOINC locking (for backward compatibility)                                           1 => New style AUTOINC locking                            2 => No AUTOINC locking (unsafe for SBR)
+VARIABLE_COMMENT	The AUTOINC lock modes supported by InnoDB: 0 => Old style AUTOINC locking (for backward compatibility); 1 => New style AUTOINC locking; 2 => No AUTOINC locking (unsafe for SBR)
 NUMERIC_MIN_VALUE	0
 NUMERIC_MAX_VALUE	2
 NUMERIC_BLOCK_SIZE	0
 ENUM_VALUE_LIST	NULL
 READ_ONLY	YES
 COMMAND_LINE_ARGUMENT	REQUIRED
+VARIABLE_NAME	INNODB_BACKGROUND_DROP_LIST_EMPTY
+SESSION_VALUE	NULL
+GLOBAL_VALUE	OFF
+GLOBAL_VALUE_ORIGIN	COMPILE-TIME
+DEFAULT_VALUE	OFF
+VARIABLE_SCOPE	GLOBAL
+VARIABLE_TYPE	BOOLEAN
+VARIABLE_COMMENT	Wait for the background drop list to become empty
+NUMERIC_MIN_VALUE	NULL
+NUMERIC_MAX_VALUE	NULL
+NUMERIC_BLOCK_SIZE	NULL
+ENUM_VALUE_LIST	OFF,ON
+READ_ONLY	NO
+COMMAND_LINE_ARGUMENT	OPTIONAL
 VARIABLE_NAME	INNODB_BACKGROUND_SCRUB_DATA_CHECK_INTERVAL
 SESSION_VALUE	NULL
 GLOBAL_VALUE	3600
@@ -229,11 +243,25 @@ NUMERIC_BLOCK_SIZE	NULL
 ENUM_VALUE_LIST	OFF,ON
 READ_ONLY	NO
 COMMAND_LINE_ARGUMENT	REQUIRED
+VARIABLE_NAME	INNODB_BUFFER_POOL_CHUNK_SIZE
+SESSION_VALUE	NULL
+GLOBAL_VALUE	8388608
+GLOBAL_VALUE_ORIGIN	COMPILE-TIME
+DEFAULT_VALUE	134217728
+VARIABLE_SCOPE	GLOBAL
+VARIABLE_TYPE	BIGINT UNSIGNED
+VARIABLE_COMMENT	Size of a single memory chunk within each buffer pool instance for resizing buffer pool. Online buffer pool resizing happens at this granularity. 0 means disable resizing buffer pool.
+NUMERIC_MIN_VALUE	1048576
+NUMERIC_MAX_VALUE	9223372036854775807
+NUMERIC_BLOCK_SIZE	1048576
+ENUM_VALUE_LIST	NULL
+READ_ONLY	YES
+COMMAND_LINE_ARGUMENT	REQUIRED
 VARIABLE_NAME	INNODB_BUFFER_POOL_DUMP_AT_SHUTDOWN
 SESSION_VALUE	NULL
-GLOBAL_VALUE	OFF
+GLOBAL_VALUE	ON
 GLOBAL_VALUE_ORIGIN	COMPILE-TIME
-DEFAULT_VALUE	OFF
+DEFAULT_VALUE	ON
 VARIABLE_SCOPE	GLOBAL
 VARIABLE_TYPE	BOOLEAN
 VARIABLE_COMMENT	Dump the buffer pool into a file named @@innodb_buffer_pool_filename
@@ -259,12 +287,12 @@ READ_ONLY	NO
 COMMAND_LINE_ARGUMENT	REQUIRED
 VARIABLE_NAME	INNODB_BUFFER_POOL_DUMP_PCT
 SESSION_VALUE	NULL
-GLOBAL_VALUE	100
+GLOBAL_VALUE	25
 GLOBAL_VALUE_ORIGIN	COMPILE-TIME
-DEFAULT_VALUE	100
+DEFAULT_VALUE	25
 VARIABLE_SCOPE	GLOBAL
 VARIABLE_TYPE	BIGINT UNSIGNED
-VARIABLE_COMMENT	Dump only the hottest N% of each buffer pool, defaults to 100
+VARIABLE_COMMENT	Dump only the hottest N% of each buffer pool, defaults to 25
 NUMERIC_MIN_VALUE	1
 NUMERIC_MAX_VALUE	100
 NUMERIC_BLOCK_SIZE	0
@@ -301,15 +329,15 @@ READ_ONLY	NO
 COMMAND_LINE_ARGUMENT	REQUIRED
 VARIABLE_NAME	INNODB_BUFFER_POOL_INSTANCES
 SESSION_VALUE	NULL
-GLOBAL_VALUE	8
+GLOBAL_VALUE	1
 GLOBAL_VALUE_ORIGIN	COMPILE-TIME
 DEFAULT_VALUE	0
 VARIABLE_SCOPE	GLOBAL
-VARIABLE_TYPE	BIGINT
+VARIABLE_TYPE	BIGINT UNSIGNED
 VARIABLE_COMMENT	Number of buffer pool instances, set to higher value on high-end machines to increase scalability
 NUMERIC_MIN_VALUE	0
 NUMERIC_MAX_VALUE	64
-NUMERIC_BLOCK_SIZE	1
+NUMERIC_BLOCK_SIZE	0
 ENUM_VALUE_LIST	NULL
 READ_ONLY	YES
 COMMAND_LINE_ARGUMENT	REQUIRED
@@ -329,9 +357,9 @@ READ_ONLY	NO
 COMMAND_LINE_ARGUMENT	REQUIRED
 VARIABLE_NAME	INNODB_BUFFER_POOL_LOAD_AT_STARTUP
 SESSION_VALUE	NULL
-GLOBAL_VALUE	OFF
+GLOBAL_VALUE	ON
 GLOBAL_VALUE_ORIGIN	COMPILE-TIME
-DEFAULT_VALUE	OFF
+DEFAULT_VALUE	ON
 VARIABLE_SCOPE	GLOBAL
 VARIABLE_TYPE	BOOLEAN
 VARIABLE_COMMENT	Load the buffer pool from a file named @@innodb_buffer_pool_filename
@@ -367,7 +395,7 @@ NUMERIC_MIN_VALUE	5242880
 NUMERIC_MAX_VALUE	9223372036854775807
 NUMERIC_BLOCK_SIZE	1048576
 ENUM_VALUE_LIST	NULL
-READ_ONLY	YES
+READ_ONLY	NO
 COMMAND_LINE_ARGUMENT	REQUIRED
 VARIABLE_NAME	INNODB_BUF_DUMP_STATUS_FREQUENCY
 SESSION_VALUE	NULL
@@ -455,16 +483,16 @@ READ_ONLY	YES
 COMMAND_LINE_ARGUMENT	NONE
 VARIABLE_NAME	INNODB_CHECKSUM_ALGORITHM
 SESSION_VALUE	NULL
-GLOBAL_VALUE	INNODB
+GLOBAL_VALUE	crc32
 GLOBAL_VALUE_ORIGIN	COMPILE-TIME
-DEFAULT_VALUE	INNODB
+DEFAULT_VALUE	crc32
 VARIABLE_SCOPE	GLOBAL
 VARIABLE_TYPE	ENUM
 VARIABLE_COMMENT	The algorithm InnoDB uses for page checksumming. Possible values are CRC32 (hardware accelerated if the CPU supports it) write crc32, allow any of the other checksums to match when reading; STRICT_CRC32 write crc32, do not allow other algorithms to match when reading; INNODB write a software calculated checksum, allow any other checksums to match when reading; STRICT_INNODB write a software calculated checksum, do not allow other algorithms to match when reading; NONE write a constant magic number, do not do any checksum verification when reading (same as innodb_checksums=OFF); STRICT_NONE write a constant magic number, do not allow values other than that magic number when reading; Files updated when this option is set to crc32 or strict_crc32 will not be readable by MySQL versions older than 5.6.3
 NUMERIC_MIN_VALUE	NULL
 NUMERIC_MAX_VALUE	NULL
 NUMERIC_BLOCK_SIZE	NULL
-ENUM_VALUE_LIST	CRC32,STRICT_CRC32,INNODB,STRICT_INNODB,NONE,STRICT_NONE
+ENUM_VALUE_LIST	crc32,strict_crc32,innodb,strict_innodb,none,strict_none
 READ_ONLY	NO
 COMMAND_LINE_ARGUMENT	REQUIRED
 VARIABLE_NAME	INNODB_CMP_PER_INDEX_ENABLED
@@ -551,6 +579,20 @@ NUMERIC_BLOCK_SIZE	0
 ENUM_VALUE_LIST	NULL
 READ_ONLY	NO
 COMMAND_LINE_ARGUMENT	OPTIONAL
+VARIABLE_NAME	INNODB_COMPRESS_DEBUG
+SESSION_VALUE	NULL
+GLOBAL_VALUE	none
+GLOBAL_VALUE_ORIGIN	COMPILE-TIME
+DEFAULT_VALUE	none
+VARIABLE_SCOPE	GLOBAL
+VARIABLE_TYPE	ENUM
+VARIABLE_COMMENT	Compress all tables, without specifying the COMPRESS table attribute
+NUMERIC_MIN_VALUE	NULL
+NUMERIC_MAX_VALUE	NULL
+NUMERIC_BLOCK_SIZE	NULL
+ENUM_VALUE_LIST	none,zlib,lz4,lz4hc
+READ_ONLY	NO
+COMMAND_LINE_ARGUMENT	REQUIRED
 VARIABLE_NAME	INNODB_CONCURRENCY_TICKETS
 SESSION_VALUE	NULL
 GLOBAL_VALUE	5000
@@ -621,6 +663,20 @@ NUMERIC_BLOCK_SIZE	0
 ENUM_VALUE_LIST	NULL
 READ_ONLY	NO
 COMMAND_LINE_ARGUMENT	REQUIRED
+VARIABLE_NAME	INNODB_DEFAULT_ROW_FORMAT
+SESSION_VALUE	NULL
+GLOBAL_VALUE	dynamic
+GLOBAL_VALUE_ORIGIN	COMPILE-TIME
+DEFAULT_VALUE	dynamic
+VARIABLE_SCOPE	GLOBAL
+VARIABLE_TYPE	ENUM
+VARIABLE_COMMENT	The default ROW FORMAT for all innodb tables created without explicit ROW_FORMAT. Possible values are REDUNDANT, COMPACT, and DYNAMIC. The ROW_FORMAT value COMPRESSED is not allowed
+NUMERIC_MIN_VALUE	NULL
+NUMERIC_MAX_VALUE	NULL
+NUMERIC_BLOCK_SIZE	NULL
+ENUM_VALUE_LIST	redundant,compact,dynamic
+READ_ONLY	NO
+COMMAND_LINE_ARGUMENT	REQUIRED
 VARIABLE_NAME	INNODB_DEFRAGMENT
 SESSION_VALUE	NULL
 GLOBAL_VALUE	OFF
@@ -705,6 +761,20 @@ NUMERIC_BLOCK_SIZE	0
 ENUM_VALUE_LIST	NULL
 READ_ONLY	NO
 COMMAND_LINE_ARGUMENT	REQUIRED
+VARIABLE_NAME	INNODB_DICT_STATS_DISABLED_DEBUG
+SESSION_VALUE	NULL
+GLOBAL_VALUE	OFF
+GLOBAL_VALUE_ORIGIN	COMPILE-TIME
+DEFAULT_VALUE	OFF
+VARIABLE_SCOPE	GLOBAL
+VARIABLE_TYPE	BOOLEAN
+VARIABLE_COMMENT	Disable dict_stats thread
+NUMERIC_MIN_VALUE	NULL
+NUMERIC_MAX_VALUE	NULL
+NUMERIC_BLOCK_SIZE	NULL
+ENUM_VALUE_LIST	OFF,ON
+READ_ONLY	NO
+COMMAND_LINE_ARGUMENT	OPTIONAL
 VARIABLE_NAME	INNODB_DISABLE_BACKGROUND_MERGE
 SESSION_VALUE	NULL
 GLOBAL_VALUE	OFF
@@ -719,6 +789,20 @@ NUMERIC_BLOCK_SIZE	NULL
 ENUM_VALUE_LIST	OFF,ON
 READ_ONLY	NO
 COMMAND_LINE_ARGUMENT	NONE
+VARIABLE_NAME	INNODB_DISABLE_RESIZE_BUFFER_POOL_DEBUG
+SESSION_VALUE	NULL
+GLOBAL_VALUE	ON
+GLOBAL_VALUE_ORIGIN	COMPILE-TIME
+DEFAULT_VALUE	ON
+VARIABLE_SCOPE	GLOBAL
+VARIABLE_TYPE	BOOLEAN
+VARIABLE_COMMENT	Disable resizing buffer pool to make assertion code not expensive.
+NUMERIC_MIN_VALUE	NULL
+NUMERIC_MAX_VALUE	NULL
+NUMERIC_BLOCK_SIZE	NULL
+ENUM_VALUE_LIST	OFF,ON
+READ_ONLY	NO
+COMMAND_LINE_ARGUMENT	NONE
 VARIABLE_NAME	INNODB_DISABLE_SORT_FILE_CACHE
 SESSION_VALUE	NULL
 GLOBAL_VALUE	OFF
@@ -861,9 +945,9 @@ READ_ONLY	YES
 COMMAND_LINE_ARGUMENT	REQUIRED
 VARIABLE_NAME	INNODB_FILE_FORMAT
 SESSION_VALUE	NULL
-GLOBAL_VALUE	Antelope
+GLOBAL_VALUE	Barracuda
 GLOBAL_VALUE_ORIGIN	COMPILE-TIME
-DEFAULT_VALUE	Antelope
+DEFAULT_VALUE	Barracuda
 VARIABLE_SCOPE	GLOBAL
 VARIABLE_TYPE	VARCHAR
 VARIABLE_COMMENT	File format to use for new tables in .ibd files.
@@ -889,7 +973,7 @@ READ_ONLY	YES
 COMMAND_LINE_ARGUMENT	NONE
 VARIABLE_NAME	INNODB_FILE_FORMAT_MAX
 SESSION_VALUE	NULL
-GLOBAL_VALUE	Antelope
+GLOBAL_VALUE	Barracuda
 GLOBAL_VALUE_ORIGIN	COMPILE-TIME
 DEFAULT_VALUE	Antelope
 VARIABLE_SCOPE	GLOBAL
@@ -915,6 +999,20 @@ NUMERIC_BLOCK_SIZE	NULL
 ENUM_VALUE_LIST	OFF,ON
 READ_ONLY	NO
 COMMAND_LINE_ARGUMENT	NONE
+VARIABLE_NAME	INNODB_FILL_FACTOR
+SESSION_VALUE	NULL
+GLOBAL_VALUE	100
+GLOBAL_VALUE_ORIGIN	COMPILE-TIME
+DEFAULT_VALUE	100
+VARIABLE_SCOPE	GLOBAL
+VARIABLE_TYPE	BIGINT
+VARIABLE_COMMENT	Percentage of B-tree page filled during bulk insert
+NUMERIC_MIN_VALUE	10
+NUMERIC_MAX_VALUE	100
+NUMERIC_BLOCK_SIZE	0
+ENUM_VALUE_LIST	NULL
+READ_ONLY	NO
+COMMAND_LINE_ARGUMENT	REQUIRED
 VARIABLE_NAME	INNODB_FIL_MAKE_PAGE_DIRTY_DEBUG
 SESSION_VALUE	NULL
 GLOBAL_VALUE	0
@@ -999,6 +1097,20 @@ NUMERIC_BLOCK_SIZE	0
 ENUM_VALUE_LIST	NULL
 READ_ONLY	NO
 COMMAND_LINE_ARGUMENT	OPTIONAL
+VARIABLE_NAME	INNODB_FLUSH_SYNC
+SESSION_VALUE	NULL
+GLOBAL_VALUE	ON
+GLOBAL_VALUE_ORIGIN	COMPILE-TIME
+DEFAULT_VALUE	ON
+VARIABLE_SCOPE	GLOBAL
+VARIABLE_TYPE	BOOLEAN
+VARIABLE_COMMENT	Allow IO bursts at the checkpoints ignoring io_capacity setting.
+NUMERIC_MIN_VALUE	NULL
+NUMERIC_MAX_VALUE	NULL
+NUMERIC_BLOCK_SIZE	NULL
+ENUM_VALUE_LIST	OFF,ON
+READ_ONLY	NO
+COMMAND_LINE_ARGUMENT	NONE
 VARIABLE_NAME	INNODB_FORCE_LOAD_CORRUPTED
 SESSION_VALUE	NULL
 GLOBAL_VALUE	OFF
@@ -1050,7 +1162,7 @@ VARIABLE_SCOPE	GLOBAL
 VARIABLE_TYPE	BIGINT UNSIGNED
 VARIABLE_COMMENT	Kills the server during crash recovery.
 NUMERIC_MIN_VALUE	0
-NUMERIC_MAX_VALUE	10
+NUMERIC_MAX_VALUE	100
 NUMERIC_BLOCK_SIZE	0
 ENUM_VALUE_LIST	NULL
 READ_ONLY	YES
@@ -1068,7 +1180,7 @@ NUMERIC_MAX_VALUE	NULL
 NUMERIC_BLOCK_SIZE	NULL
 ENUM_VALUE_LIST	NULL
 READ_ONLY	NO
-COMMAND_LINE_ARGUMENT	NONE
+COMMAND_LINE_ARGUMENT	REQUIRED
 VARIABLE_NAME	INNODB_FT_CACHE_SIZE
 SESSION_VALUE	NULL
 GLOBAL_VALUE	8000000
@@ -1295,9 +1407,9 @@ READ_ONLY	NO
 COMMAND_LINE_ARGUMENT	REQUIRED
 VARIABLE_NAME	INNODB_LARGE_PREFIX
 SESSION_VALUE	NULL
-GLOBAL_VALUE	OFF
+GLOBAL_VALUE	ON
 GLOBAL_VALUE_ORIGIN	COMPILE-TIME
-DEFAULT_VALUE	OFF
+DEFAULT_VALUE	ON
 VARIABLE_SCOPE	GLOBAL
 VARIABLE_TYPE	BOOLEAN
 VARIABLE_COMMENT	Support large index prefix length of REC_VERSION_56_MAX_INDEX_COL_LEN (3072) bytes.
@@ -1377,6 +1489,20 @@ NUMERIC_BLOCK_SIZE	NULL
 ENUM_VALUE_LIST	OFF,ON
 READ_ONLY	NO
 COMMAND_LINE_ARGUMENT	OPTIONAL
+VARIABLE_NAME	INNODB_LOG_CHECKSUMS
+SESSION_VALUE	NULL
+GLOBAL_VALUE	ON
+GLOBAL_VALUE_ORIGIN	COMPILE-TIME
+DEFAULT_VALUE	ON
+VARIABLE_SCOPE	GLOBAL
+VARIABLE_TYPE	BOOLEAN
+VARIABLE_COMMENT	Whether to compute and require checksums for InnoDB redo log blocks
+NUMERIC_MIN_VALUE	NULL
+NUMERIC_MAX_VALUE	NULL
+NUMERIC_BLOCK_SIZE	NULL
+ENUM_VALUE_LIST	OFF,ON
+READ_ONLY	NO
+COMMAND_LINE_ARGUMENT	REQUIRED
 VARIABLE_NAME	INNODB_LOG_COMPRESSED_PAGES
 SESSION_VALUE	NULL
 GLOBAL_VALUE	OFF
@@ -1413,7 +1539,7 @@ DEFAULT_VALUE	50331648
 VARIABLE_SCOPE	GLOBAL
 VARIABLE_TYPE	BIGINT
 VARIABLE_COMMENT	Size of each log file in a log group.
-NUMERIC_MIN_VALUE	1048576
+NUMERIC_MIN_VALUE	4194304
 NUMERIC_MAX_VALUE	9223372036854775807
 NUMERIC_BLOCK_SIZE	1048576
 ENUM_VALUE_LIST	NULL
@@ -1433,6 +1559,20 @@ NUMERIC_BLOCK_SIZE	NULL
 ENUM_VALUE_LIST	NULL
 READ_ONLY	YES
 COMMAND_LINE_ARGUMENT	REQUIRED
+VARIABLE_NAME	INNODB_LOG_WRITE_AHEAD_SIZE
+SESSION_VALUE	NULL
+GLOBAL_VALUE	8192
+GLOBAL_VALUE_ORIGIN	COMPILE-TIME
+DEFAULT_VALUE	8192
+VARIABLE_SCOPE	GLOBAL
+VARIABLE_TYPE	BIGINT UNSIGNED
+VARIABLE_COMMENT	Redo log write ahead unit size to avoid read-on-write, it should match the OS cache block IO size
+NUMERIC_MIN_VALUE	512
+NUMERIC_MAX_VALUE	16384
+NUMERIC_BLOCK_SIZE	512
+ENUM_VALUE_LIST	NULL
+READ_ONLY	NO
+COMMAND_LINE_ARGUMENT	REQUIRED
 VARIABLE_NAME	INNODB_LRU_SCAN_DEPTH
 SESSION_VALUE	NULL
 GLOBAL_VALUE	100
@@ -1447,6 +1587,20 @@ NUMERIC_BLOCK_SIZE	0
 ENUM_VALUE_LIST	NULL
 READ_ONLY	NO
 COMMAND_LINE_ARGUMENT	REQUIRED
+VARIABLE_NAME	INNODB_MASTER_THREAD_DISABLED_DEBUG
+SESSION_VALUE	NULL
+GLOBAL_VALUE	OFF
+GLOBAL_VALUE_ORIGIN	COMPILE-TIME
+DEFAULT_VALUE	OFF
+VARIABLE_SCOPE	GLOBAL
+VARIABLE_TYPE	BOOLEAN
+VARIABLE_COMMENT	Disable master thread
+NUMERIC_MIN_VALUE	NULL
+NUMERIC_MAX_VALUE	NULL
+NUMERIC_BLOCK_SIZE	NULL
+ENUM_VALUE_LIST	OFF,ON
+READ_ONLY	NO
+COMMAND_LINE_ARGUMENT	OPTIONAL
 VARIABLE_NAME	INNODB_MAX_DIRTY_PAGES_PCT
 SESSION_VALUE	NULL
 GLOBAL_VALUE	75.000000
@@ -1455,7 +1609,7 @@ DEFAULT_VALUE	75.000000
 VARIABLE_SCOPE	GLOBAL
 VARIABLE_TYPE	DOUBLE
 VARIABLE_COMMENT	Percentage of dirty pages allowed in bufferpool.
-NUMERIC_MIN_VALUE	0.001
+NUMERIC_MIN_VALUE	0
 NUMERIC_MAX_VALUE	99.999
 NUMERIC_BLOCK_SIZE	NULL
 ENUM_VALUE_LIST	NULL
@@ -1463,9 +1617,9 @@ READ_ONLY	NO
 COMMAND_LINE_ARGUMENT	REQUIRED
 VARIABLE_NAME	INNODB_MAX_DIRTY_PAGES_PCT_LWM
 SESSION_VALUE	NULL
-GLOBAL_VALUE	0.001000
+GLOBAL_VALUE	0.000000
 GLOBAL_VALUE_ORIGIN	COMPILE-TIME
-DEFAULT_VALUE	0.001000
+DEFAULT_VALUE	0.000000
 VARIABLE_SCOPE	GLOBAL
 VARIABLE_TYPE	DOUBLE
 VARIABLE_COMMENT	Percentage of dirty pages at which flushing kicks in.
@@ -1503,19 +1657,33 @@ NUMERIC_BLOCK_SIZE	0
 ENUM_VALUE_LIST	NULL
 READ_ONLY	NO
 COMMAND_LINE_ARGUMENT	REQUIRED
-VARIABLE_NAME	INNODB_MIRRORED_LOG_GROUPS
+VARIABLE_NAME	INNODB_MAX_UNDO_LOG_SIZE
 SESSION_VALUE	NULL
-GLOBAL_VALUE	1
+GLOBAL_VALUE	1073741824
 GLOBAL_VALUE_ORIGIN	COMPILE-TIME
-DEFAULT_VALUE	0
+DEFAULT_VALUE	1073741824
 VARIABLE_SCOPE	GLOBAL
-VARIABLE_TYPE	BIGINT
-VARIABLE_COMMENT	Number of identical copies of log groups we keep for the database. Currently this should be set to 1.
-NUMERIC_MIN_VALUE	0
-NUMERIC_MAX_VALUE	10
+VARIABLE_TYPE	BIGINT UNSIGNED
+VARIABLE_COMMENT	Maximum size of UNDO tablespace in MB (If UNDO tablespace grows beyond this size it will be truncated in due course). 
+NUMERIC_MIN_VALUE	10485760
+NUMERIC_MAX_VALUE	18446744073709551615
 NUMERIC_BLOCK_SIZE	0
 ENUM_VALUE_LIST	NULL
-READ_ONLY	YES
+READ_ONLY	NO
+COMMAND_LINE_ARGUMENT	OPTIONAL
+VARIABLE_NAME	INNODB_MERGE_THRESHOLD_SET_ALL_DEBUG
+SESSION_VALUE	NULL
+GLOBAL_VALUE	50
+GLOBAL_VALUE_ORIGIN	COMPILE-TIME
+DEFAULT_VALUE	50
+VARIABLE_SCOPE	GLOBAL
+VARIABLE_TYPE	INT UNSIGNED
+VARIABLE_COMMENT	Override current MERGE_THRESHOLD setting for all indexes at dictionary cache by the specified value dynamically, at the time.
+NUMERIC_MIN_VALUE	1
+NUMERIC_MAX_VALUE	50
+NUMERIC_BLOCK_SIZE	0
+ENUM_VALUE_LIST	NULL
+READ_ONLY	NO
 COMMAND_LINE_ARGUMENT	REQUIRED
 VARIABLE_NAME	INNODB_MONITOR_DISABLE
 SESSION_VALUE	NULL
@@ -1631,7 +1799,7 @@ READ_ONLY	NO
 COMMAND_LINE_ARGUMENT	REQUIRED
 VARIABLE_NAME	INNODB_OPEN_FILES
 SESSION_VALUE	NULL
-GLOBAL_VALUE	2000
+GLOBAL_VALUE	300
 GLOBAL_VALUE_ORIGIN	COMPILE-TIME
 DEFAULT_VALUE	0
 VARIABLE_SCOPE	GLOBAL
@@ -1657,6 +1825,34 @@ NUMERIC_BLOCK_SIZE	NULL
 ENUM_VALUE_LIST	OFF,ON
 READ_ONLY	NO
 COMMAND_LINE_ARGUMENT	NONE
+VARIABLE_NAME	INNODB_PAGE_CLEANERS
+SESSION_VALUE	NULL
+GLOBAL_VALUE	1
+GLOBAL_VALUE_ORIGIN	COMPILE-TIME
+DEFAULT_VALUE	4
+VARIABLE_SCOPE	GLOBAL
+VARIABLE_TYPE	BIGINT UNSIGNED
+VARIABLE_COMMENT	Page cleaner threads can be from 1 to 64. Default is 4.
+NUMERIC_MIN_VALUE	1
+NUMERIC_MAX_VALUE	64
+NUMERIC_BLOCK_SIZE	0
+ENUM_VALUE_LIST	NULL
+READ_ONLY	YES
+COMMAND_LINE_ARGUMENT	OPTIONAL
+VARIABLE_NAME	INNODB_PAGE_CLEANER_DISABLED_DEBUG
+SESSION_VALUE	NULL
+GLOBAL_VALUE	OFF
+GLOBAL_VALUE_ORIGIN	COMPILE-TIME
+DEFAULT_VALUE	OFF
+VARIABLE_SCOPE	GLOBAL
+VARIABLE_TYPE	BOOLEAN
+VARIABLE_COMMENT	Disable page cleaner
+NUMERIC_MIN_VALUE	NULL
+NUMERIC_MAX_VALUE	NULL
+NUMERIC_BLOCK_SIZE	NULL
+ENUM_VALUE_LIST	OFF,ON
+READ_ONLY	NO
+COMMAND_LINE_ARGUMENT	OPTIONAL
 VARIABLE_NAME	INNODB_PAGE_HASH_LOCKS
 SESSION_VALUE	NULL
 GLOBAL_VALUE	16
@@ -1727,6 +1923,20 @@ NUMERIC_BLOCK_SIZE	0
 ENUM_VALUE_LIST	NULL
 READ_ONLY	NO
 COMMAND_LINE_ARGUMENT	OPTIONAL
+VARIABLE_NAME	INNODB_PURGE_RSEG_TRUNCATE_FREQUENCY
+SESSION_VALUE	NULL
+GLOBAL_VALUE	128
+GLOBAL_VALUE_ORIGIN	COMPILE-TIME
+DEFAULT_VALUE	128
+VARIABLE_SCOPE	GLOBAL
+VARIABLE_TYPE	BIGINT UNSIGNED
+VARIABLE_COMMENT	Dictates rate at which UNDO records are purged. Value N means purge rollback segment(s) on every Nth iteration of purge invocation
+NUMERIC_MIN_VALUE	1
+NUMERIC_MAX_VALUE	128
+NUMERIC_BLOCK_SIZE	0
+ENUM_VALUE_LIST	NULL
+READ_ONLY	NO
+COMMAND_LINE_ARGUMENT	OPTIONAL
 VARIABLE_NAME	INNODB_PURGE_RUN_NOW
 SESSION_VALUE	NULL
 GLOBAL_VALUE	OFF
@@ -1757,12 +1967,12 @@ READ_ONLY	NO
 COMMAND_LINE_ARGUMENT	OPTIONAL
 VARIABLE_NAME	INNODB_PURGE_THREADS
 SESSION_VALUE	NULL
-GLOBAL_VALUE	1
+GLOBAL_VALUE	4
 GLOBAL_VALUE_ORIGIN	COMPILE-TIME
-DEFAULT_VALUE	1
+DEFAULT_VALUE	4
 VARIABLE_SCOPE	GLOBAL
 VARIABLE_TYPE	BIGINT UNSIGNED
-VARIABLE_COMMENT	Purge threads can be from 1 to 32. Default is 1.
+VARIABLE_COMMENT	Purge threads can be from 1 to 32. Default is 4.
 NUMERIC_MIN_VALUE	1
 NUMERIC_MAX_VALUE	32
 NUMERIC_BLOCK_SIZE	0
@@ -2106,10 +2316,10 @@ ENUM_VALUE_LIST	OFF,ON
 READ_ONLY	NO
 COMMAND_LINE_ARGUMENT	OPTIONAL
 VARIABLE_NAME	INNODB_STRICT_MODE
-SESSION_VALUE	OFF
-GLOBAL_VALUE	OFF
+SESSION_VALUE	ON
+GLOBAL_VALUE	ON
 GLOBAL_VALUE_ORIGIN	COMPILE-TIME
-DEFAULT_VALUE	OFF
+DEFAULT_VALUE	ON
 VARIABLE_SCOPE	SESSION
 VARIABLE_TYPE	BOOLEAN
 VARIABLE_COMMENT	Use strict mode when evaluating create options.
@@ -2147,6 +2357,20 @@ NUMERIC_BLOCK_SIZE	0
 ENUM_VALUE_LIST	NULL
 READ_ONLY	YES
 COMMAND_LINE_ARGUMENT	OPTIONAL
+VARIABLE_NAME	INNODB_SYNC_DEBUG
+SESSION_VALUE	NULL
+GLOBAL_VALUE	OFF
+GLOBAL_VALUE_ORIGIN	COMPILE-TIME
+DEFAULT_VALUE	OFF
+VARIABLE_SCOPE	GLOBAL
+VARIABLE_TYPE	BOOLEAN
+VARIABLE_COMMENT	Enable the sync debug checks
+NUMERIC_MIN_VALUE	NULL
+NUMERIC_MAX_VALUE	NULL
+NUMERIC_BLOCK_SIZE	NULL
+ENUM_VALUE_LIST	OFF,ON
+READ_ONLY	YES
+COMMAND_LINE_ARGUMENT	OPTIONAL
 VARIABLE_NAME	INNODB_SYNC_SPIN_LOOPS
 SESSION_VALUE	NULL
 GLOBAL_VALUE	30
@@ -2175,6 +2399,20 @@ NUMERIC_BLOCK_SIZE	NULL
 ENUM_VALUE_LIST	OFF,ON
 READ_ONLY	NO
 COMMAND_LINE_ARGUMENT	OPTIONAL
+VARIABLE_NAME	INNODB_TEMP_DATA_FILE_PATH
+SESSION_VALUE	NULL
+GLOBAL_VALUE	ibtmp1:12M:autoextend
+GLOBAL_VALUE_ORIGIN	COMPILE-TIME
+DEFAULT_VALUE	
+VARIABLE_SCOPE	GLOBAL
+VARIABLE_TYPE	VARCHAR
+VARIABLE_COMMENT	Path to files and their sizes making temp-tablespace.
+NUMERIC_MIN_VALUE	NULL
+NUMERIC_MAX_VALUE	NULL
+NUMERIC_BLOCK_SIZE	NULL
+ENUM_VALUE_LIST	NULL
+READ_ONLY	YES
+COMMAND_LINE_ARGUMENT	REQUIRED
 VARIABLE_NAME	INNODB_THREAD_CONCURRENCY
 SESSION_VALUE	NULL
 GLOBAL_VALUE	0
@@ -2247,9 +2485,9 @@ READ_ONLY	NO
 COMMAND_LINE_ARGUMENT	NULL
 VARIABLE_NAME	INNODB_UNDO_DIRECTORY
 SESSION_VALUE	NULL
-GLOBAL_VALUE	.
+GLOBAL_VALUE	PATH
 GLOBAL_VALUE_ORIGIN	COMPILE-TIME
-DEFAULT_VALUE	.
+DEFAULT_VALUE	
 VARIABLE_SCOPE	GLOBAL
 VARIABLE_TYPE	VARCHAR
 VARIABLE_COMMENT	Directory where undo tablespace files live, this path can be absolute.
@@ -2273,6 +2511,20 @@ NUMERIC_BLOCK_SIZE	0
 ENUM_VALUE_LIST	NULL
 READ_ONLY	NO
 COMMAND_LINE_ARGUMENT	OPTIONAL
+VARIABLE_NAME	INNODB_UNDO_LOG_TRUNCATE
+SESSION_VALUE	NULL
+GLOBAL_VALUE	OFF
+GLOBAL_VALUE_ORIGIN	COMPILE-TIME
+DEFAULT_VALUE	OFF
+VARIABLE_SCOPE	GLOBAL
+VARIABLE_TYPE	BOOLEAN
+VARIABLE_COMMENT	Enable or Disable Truncate of UNDO tablespace.
+NUMERIC_MIN_VALUE	NULL
+NUMERIC_MAX_VALUE	NULL
+NUMERIC_BLOCK_SIZE	NULL
+ENUM_VALUE_LIST	OFF,ON
+READ_ONLY	NO
+COMMAND_LINE_ARGUMENT	OPTIONAL
 VARIABLE_NAME	INNODB_UNDO_TABLESPACES
 SESSION_VALUE	NULL
 GLOBAL_VALUE	0
@@ -2282,7 +2534,7 @@ VARIABLE_SCOPE	GLOBAL
 VARIABLE_TYPE	BIGINT UNSIGNED
 VARIABLE_COMMENT	Number of undo tablespaces to use. 
 NUMERIC_MIN_VALUE	0
-NUMERIC_MAX_VALUE	126
+NUMERIC_MAX_VALUE	95
 NUMERIC_BLOCK_SIZE	0
 ENUM_VALUE_LIST	NULL
 READ_ONLY	YES
@@ -2329,20 +2581,6 @@ NUMERIC_BLOCK_SIZE	NULL
 ENUM_VALUE_LIST	OFF,ON
 READ_ONLY	YES
 COMMAND_LINE_ARGUMENT	NONE
-VARIABLE_NAME	INNODB_USE_SYS_MALLOC
-SESSION_VALUE	NULL
-GLOBAL_VALUE	ON
-GLOBAL_VALUE_ORIGIN	COMPILE-TIME
-DEFAULT_VALUE	ON
-VARIABLE_SCOPE	GLOBAL
-VARIABLE_TYPE	BOOLEAN
-VARIABLE_COMMENT	DEPRECATED. This option may be removed in future releases, together with the InnoDB's internal memory allocator. Use OS memory allocator instead of InnoDB's internal memory allocator
-NUMERIC_MIN_VALUE	NULL
-NUMERIC_MAX_VALUE	NULL
-NUMERIC_BLOCK_SIZE	NULL
-ENUM_VALUE_LIST	OFF,ON
-READ_ONLY	YES
-COMMAND_LINE_ARGUMENT	NONE
 VARIABLE_NAME	INNODB_USE_TRIM
 SESSION_VALUE	NULL
 GLOBAL_VALUE	OFF
@@ -2359,7 +2597,7 @@ READ_ONLY	NO
 COMMAND_LINE_ARGUMENT	OPTIONAL
 VARIABLE_NAME	INNODB_VERSION
 SESSION_VALUE	NULL
-GLOBAL_VALUE	5.6.32
+GLOBAL_VALUE	5.7.14
 GLOBAL_VALUE_ORIGIN	COMPILE-TIME
 DEFAULT_VALUE	NULL
 VARIABLE_SCOPE	GLOBAL
diff --git a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result
index 3c88437551b..57c3a16ac27 100644
--- a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result
+++ b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result
@@ -4755,6 +4755,34 @@ NUMERIC_BLOCK_SIZE	1
 ENUM_VALUE_LIST	NULL
 READ_ONLY	NO
 COMMAND_LINE_ARGUMENT	REQUIRED
+VARIABLE_NAME	THREAD_POOL_PRIORITY
+SESSION_VALUE	auto
+GLOBAL_VALUE	auto
+GLOBAL_VALUE_ORIGIN	COMPILE-TIME
+DEFAULT_VALUE	auto
+VARIABLE_SCOPE	SESSION
+VARIABLE_TYPE	ENUM
+VARIABLE_COMMENT	Threadpool priority. High priority connections usually start executing earlier than low priority.If priority set to 'auto', the the actual priority(low or high) is determined based on whether or not connection is inside transaction.
+NUMERIC_MIN_VALUE	NULL
+NUMERIC_MAX_VALUE	NULL
+NUMERIC_BLOCK_SIZE	NULL
+ENUM_VALUE_LIST	high,low,auto
+READ_ONLY	NO
+COMMAND_LINE_ARGUMENT	REQUIRED
+VARIABLE_NAME	THREAD_POOL_PRIO_KICKUP_TIMER
+SESSION_VALUE	NULL
+GLOBAL_VALUE	1000
+GLOBAL_VALUE_ORIGIN	COMPILE-TIME
+DEFAULT_VALUE	1000
+VARIABLE_SCOPE	GLOBAL
+VARIABLE_TYPE	INT UNSIGNED
+VARIABLE_COMMENT	The number of milliseconds before a dequeued low-priority statement is moved to the high-priority queue
+NUMERIC_MIN_VALUE	0
+NUMERIC_MAX_VALUE	4294967295
+NUMERIC_BLOCK_SIZE	1
+ENUM_VALUE_LIST	NULL
+READ_ONLY	NO
+COMMAND_LINE_ARGUMENT	REQUIRED
 VARIABLE_NAME	THREAD_POOL_SIZE
 SESSION_VALUE	NULL
 GLOBAL_VALUE	4
diff --git a/mysql-test/suite/sys_vars/t/innodb_adaptive_hash_index_parts_basic.test b/mysql-test/suite/sys_vars/t/innodb_adaptive_hash_index_parts_basic.test
new file mode 100644
index 00000000000..3f4a9283339
--- /dev/null
+++ b/mysql-test/suite/sys_vars/t/innodb_adaptive_hash_index_parts_basic.test
@@ -0,0 +1,75 @@
+--source include/have_innodb.inc
+
+####################################################################
+#   Displaying default value                                       #
+####################################################################
+SELECT COUNT(@@GLOBAL.innodb_adaptive_hash_index_parts);
+--echo 1 Expected
+
+
+####################################################################
+#   Check if Value can set                                         #
+####################################################################
+
+--error ER_INCORRECT_GLOBAL_LOCAL_VAR
+SET @@GLOBAL.innodb_adaptive_hash_index_parts=1;
+--echo Expected error 'Read only variable'
+
+SELECT COUNT(@@GLOBAL.innodb_adaptive_hash_index_parts);
+--echo 1 Expected
+
+
+
+
+#################################################################
+# Check if the value in GLOBAL Table matches value in variable  #
+#################################################################
+
+--disable_warnings
+SELECT @@GLOBAL.innodb_adaptive_hash_index_parts = VARIABLE_VALUE
+FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
+WHERE VARIABLE_NAME='innodb_adaptive_hash_index_parts';
+--echo 1 Expected
+
+SELECT COUNT(@@GLOBAL.innodb_adaptive_hash_index_parts);
+--echo 1 Expected
+
+SELECT COUNT(VARIABLE_VALUE)
+FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
+WHERE VARIABLE_NAME='innodb_adaptive_hash_index_parts';
+--echo 1 Expected
+--enable_warnings
+
+
+
+################################################################################
+#  Check if accessing variable with and without GLOBAL point to same variable  #
+################################################################################
+SELECT @@innodb_adaptive_hash_index_parts = @@GLOBAL.innodb_adaptive_hash_index_parts;
+--echo 1 Expected
+
+
+
+################################################################################
+#   Check if innodb_adaptive_hash_index_parts can be accessed with and without @@ sign  #
+################################################################################
+
+SELECT COUNT(@@innodb_adaptive_hash_index_parts);
+--echo 1 Expected
+
+--Error ER_INCORRECT_GLOBAL_LOCAL_VAR
+SELECT COUNT(@@local.innodb_adaptive_hash_index_parts);
+--echo Expected error 'Variable is a GLOBAL variable'
+
+--Error ER_INCORRECT_GLOBAL_LOCAL_VAR
+SELECT COUNT(@@SESSION.innodb_adaptive_hash_index_parts);
+--echo Expected error 'Variable is a GLOBAL variable'
+
+SELECT COUNT(@@GLOBAL.innodb_adaptive_hash_index_parts);
+--echo 1 Expected
+
+--Error ER_BAD_FIELD_ERROR
+SELECT innodb_adaptive_hash_index_parts = @@SESSION.innodb_adaptive_hash_index_parts;
+--echo Expected error 'Readonly variable'
+
+
diff --git a/mysql-test/suite/sys_vars/t/innodb_adaptive_max_sleep_delay_basic.test b/mysql-test/suite/sys_vars/t/innodb_adaptive_max_sleep_delay_basic.test
index a2508b073eb..49349d86713 100644
--- a/mysql-test/suite/sys_vars/t/innodb_adaptive_max_sleep_delay_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_adaptive_max_sleep_delay_basic.test
@@ -32,22 +32,40 @@ SELECT @@GLOBAL.innodb_adaptive_max_sleep_delay;
 # Check if value can be set
 SET @@GLOBAL.innodb_adaptive_max_sleep_delay=100;
 
+# Check for valid values
+SET @@GLOBAL.innodb_adaptive_max_sleep_delay=0;
+SELECT @@GLOBAL.innodb_adaptive_max_sleep_delay;
+SET @@GLOBAL.innodb_adaptive_max_sleep_delay=100000;
+SELECT @@GLOBAL.innodb_adaptive_max_sleep_delay;
+SET @@GLOBAL.innodb_adaptive_max_sleep_delay=1000000;
+SELECT @@GLOBAL.innodb_adaptive_max_sleep_delay;
+
 # Check for out of bounds
 SET @@GLOBAL.innodb_adaptive_max_sleep_delay=1000001;
 SELECT @@GLOBAL.innodb_adaptive_max_sleep_delay;
 --echo 1000000 Expected
 
+SET @@GLOBAL.innodb_adaptive_max_sleep_delay=4294967295;
+SELECT @@GLOBAL.innodb_adaptive_max_sleep_delay;
+--echo 1000000 Expected
+
 SET @@GLOBAL.innodb_adaptive_max_sleep_delay=-1;
 SELECT @@GLOBAL.innodb_adaptive_max_sleep_delay;
 --echo 0 Expected
 
+SET @@GLOBAL.innodb_adaptive_max_sleep_delay=-1024;
+SELECT @@GLOBAL.innodb_adaptive_max_sleep_delay;
+--echo 0 Expected
+
 SELECT COUNT(@@GLOBAL.innodb_adaptive_max_sleep_delay);
 --echo 1 Expected
 
 # Check if the value in GLOBAL table matches value in variable
+--disable_warnings
 SELECT VARIABLE_VALUE
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
 WHERE VARIABLE_NAME='innodb_adaptive_max_sleep_delay';
+--enable_warnings
 --echo 100 Expected
 
 # Check if accessing variable with and without GLOBAL point to same
diff --git a/mysql-test/suite/sys_vars/t/innodb_api_bk_commit_interval_basic.test b/mysql-test/suite/sys_vars/t/innodb_api_bk_commit_interval_basic.test
index b3a7aebce4e..7c9ae6395be 100644
--- a/mysql-test/suite/sys_vars/t/innodb_api_bk_commit_interval_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_api_bk_commit_interval_basic.test
@@ -19,22 +19,47 @@ SELECT @@global.innodb_api_bk_commit_interval;
 SELECT @@session.innodb_api_bk_commit_interval;
 SHOW global variables LIKE 'innodb_api_bk_commit_interval';
 SHOW session variables LIKE 'innodb_api_bk_commit_interval';
+--disable_warnings
 SELECT * FROM information_schema.global_variables 
 WHERE variable_name='innodb_api_bk_commit_interval';
 SELECT * FROM information_schema.session_variables 
 WHERE variable_name='innodb_api_bk_commit_interval';
+--enable_warnings
 
 #
 # show that it's writable
 #
 SET global innodb_api_bk_commit_interval=100;
 SELECT @@global.innodb_api_bk_commit_interval;
+--disable_warnings
 SELECT * FROM information_schema.global_variables 
 WHERE variable_name='innodb_api_bk_commit_interval';
 SELECT * FROM information_schema.session_variables 
 WHERE variable_name='innodb_api_bk_commit_interval';
+--enable_warnings
 --error ER_GLOBAL_VARIABLE
 SET session innodb_api_bk_commit_interval=1;
+#
+# Valid values
+#
+SET global innodb_api_bk_commit_interval=1;
+SELECT @@global.innodb_api_bk_commit_interval;
+SET global innodb_api_bk_commit_interval=100000;
+SELECT @@global.innodb_api_bk_commit_interval;
+SET global innodb_api_bk_commit_interval=1073741824;
+SELECT @@global.innodb_api_bk_commit_interval;
+
+#
+# Invalid values
+#
+SET global innodb_api_bk_commit_interval=0;
+SELECT @@global.innodb_api_bk_commit_interval;
+SET global innodb_api_bk_commit_interval=-1024;
+SELECT @@global.innodb_api_bk_commit_interval;
+SET global innodb_api_bk_commit_interval=1073741825;
+SELECT @@global.innodb_api_bk_commit_interval;
+SET global innodb_api_bk_commit_interval=4294967295;
+SELECT @@global.innodb_api_bk_commit_interval;
 
 #
 # incorrect types
@@ -47,8 +72,10 @@ SET global innodb_api_bk_commit_interval=1e1;
 SET global innodb_api_bk_commit_interval="foo";
 SET global innodb_api_bk_commit_interval=-7;
 SELECT @@global.innodb_api_bk_commit_interval;
+--disable_warnings
 SELECT * FROM information_schema.global_variables 
 WHERE variable_name='innodb_api_bk_commit_interval';
+--enable_warnings
 
 #
 # cleanup
diff --git a/mysql-test/suite/sys_vars/t/innodb_api_disable_rowlock_basic.test b/mysql-test/suite/sys_vars/t/innodb_api_disable_rowlock_basic.test
index c9c04a27229..42e9903df5e 100644
--- a/mysql-test/suite/sys_vars/t/innodb_api_disable_rowlock_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_api_disable_rowlock_basic.test
@@ -52,17 +52,21 @@ SELECT COUNT(@@GLOBAL.innodb_api_disable_rowlock);
 # Check if the value in GLOBAL Table matches value in variable  #
 #################################################################
 
+--disable_warnings
 SELECT IF(@@GLOBAL.innodb_api_disable_rowlock, 'ON', 'OFF') = VARIABLE_VALUE
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
 WHERE VARIABLE_NAME='innodb_api_disable_rowlock';
+--enable_warnings
 --echo 1 Expected
 
 SELECT COUNT(@@GLOBAL.innodb_api_disable_rowlock);
 --echo 1 Expected
 
+--disable_warnings
 SELECT COUNT(VARIABLE_VALUE)
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES 
 WHERE VARIABLE_NAME='innodb_api_disable_rowlock';
+--enable_warnings
 --echo 1 Expected
 
 
diff --git a/mysql-test/suite/sys_vars/t/innodb_api_enable_binlog_basic.test b/mysql-test/suite/sys_vars/t/innodb_api_enable_binlog_basic.test
index 637541ef621..e88d8ecac5c 100644
--- a/mysql-test/suite/sys_vars/t/innodb_api_enable_binlog_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_api_enable_binlog_basic.test
@@ -52,17 +52,21 @@ SELECT COUNT(@@GLOBAL.innodb_api_enable_binlog);
 # Check if the value in GLOBAL Table matches value in variable  #
 #################################################################
 
+--disable_warnings
 SELECT IF(@@GLOBAL.innodb_api_enable_binlog, 'ON', 'OFF') = VARIABLE_VALUE
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
 WHERE VARIABLE_NAME='innodb_api_enable_binlog';
+--enable_warnings
 --echo 1 Expected
 
 SELECT COUNT(@@GLOBAL.innodb_api_enable_binlog);
 --echo 1 Expected
 
+--disable_warnings
 SELECT COUNT(VARIABLE_VALUE)
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES 
 WHERE VARIABLE_NAME='innodb_api_enable_binlog';
+--enable_warnings
 --echo 1 Expected
 
 
diff --git a/mysql-test/suite/sys_vars/t/innodb_api_enable_mdl_basic.test b/mysql-test/suite/sys_vars/t/innodb_api_enable_mdl_basic.test
index 0e440a72cce..d3086878d6a 100644
--- a/mysql-test/suite/sys_vars/t/innodb_api_enable_mdl_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_api_enable_mdl_basic.test
@@ -52,17 +52,21 @@ SELECT COUNT(@@GLOBAL.innodb_api_enable_mdl);
 # Check if the value in GLOBAL Table matches value in variable  #
 #################################################################
 
+--disable_warnings
 SELECT IF(@@GLOBAL.innodb_api_enable_mdl, 'ON', 'OFF') = VARIABLE_VALUE
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
 WHERE VARIABLE_NAME='innodb_api_enable_mdl';
+--enable_warnings
 --echo 1 Expected
 
 SELECT COUNT(@@GLOBAL.innodb_api_enable_mdl);
 --echo 1 Expected
 
+--disable_warnings
 SELECT COUNT(VARIABLE_VALUE)
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES 
 WHERE VARIABLE_NAME='innodb_api_enable_mdl';
+--enable_warnings
 --echo 1 Expected
 
 
diff --git a/mysql-test/suite/sys_vars/t/innodb_api_trx_level_basic.test b/mysql-test/suite/sys_vars/t/innodb_api_trx_level_basic.test
index 49c34b647fd..c77f8471d7b 100644
--- a/mysql-test/suite/sys_vars/t/innodb_api_trx_level_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_api_trx_level_basic.test
@@ -19,20 +19,24 @@ SELECT @@global.innodb_api_trx_level;
 SELECT @@session.innodb_api_trx_level;
 SHOW global variables LIKE 'innodb_api_trx_level';
 SHOW session variables LIKE 'innodb_api_trx_level';
+--disable_warnings
 SELECT * FROM information_schema.global_variables 
 WHERE variable_name='innodb_api_trx_level';
 SELECT * FROM information_schema.session_variables 
 WHERE variable_name='innodb_api_trx_level';
+--enable_warnings
 
 #
 # show that it's writable
 #
 SET global innodb_api_trx_level=100;
 SELECT @@global.innodb_api_trx_level;
+--disable_warnings
 SELECT * FROM information_schema.global_variables 
 WHERE variable_name='innodb_api_trx_level';
 SELECT * FROM information_schema.session_variables 
 WHERE variable_name='innodb_api_trx_level';
+--enable_warnings
 --error ER_GLOBAL_VARIABLE
 SET session innodb_api_trx_level=1;
 
@@ -47,8 +51,10 @@ SET global innodb_api_trx_level=1e1;
 SET global innodb_api_trx_level="foo";
 SET global innodb_api_trx_level=-7;
 SELECT @@global.innodb_api_trx_level;
+--disable_warnings
 SELECT * FROM information_schema.global_variables 
 WHERE variable_name='innodb_api_trx_level';
+--enable_warnings
 
 #
 # cleanup
diff --git a/mysql-test/suite/sys_vars/t/innodb_autoextend_increment_basic.test b/mysql-test/suite/sys_vars/t/innodb_autoextend_increment_basic.test
index cbe62a105ff..864dd732ec7 100644
--- a/mysql-test/suite/sys_vars/t/innodb_autoextend_increment_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_autoextend_increment_basic.test
@@ -94,18 +94,25 @@ SET @@global.innodb_autoextend_increment  = 1001;
 SELECT @@global.innodb_autoextend_increment;
 
 
+SET @@global.innodb_autoextend_increment  = 2000 ;
+SELECT @@global.innodb_autoextend_increment;
+
 --echo '#----------------------FN_DYNVARS_046_05------------------------#'
 #########################################################################
 #     Check if the value in GLOBAL Table matches value in variable      #
 #########################################################################
 
+--disable_warnings
 SELECT @@global.innodb_autoextend_increment  = VARIABLE_VALUE
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
 WHERE VARIABLE_NAME='innodb_autoextend_increment ';
+--enable_warnings
 SELECT @@global.innodb_autoextend_increment ;
+--disable_warnings
 SELECT VARIABLE_VALUE
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
 WHERE VARIABLE_NAME='innodb_autoextend_increment ';
+--enable_warnings
 
 
 --echo '#---------------------FN_DYNVARS_046_06-------------------------#'
diff --git a/mysql-test/suite/sys_vars/t/innodb_autoinc_lock_mode_basic.test b/mysql-test/suite/sys_vars/t/innodb_autoinc_lock_mode_basic.test
index e07234a9152..81e63ddf858 100644
--- a/mysql-test/suite/sys_vars/t/innodb_autoinc_lock_mode_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_autoinc_lock_mode_basic.test
@@ -75,13 +75,17 @@ SELECT @@global.innodb_autoinc_lock_mode;
 #     Check if the value in GLOBAL Table matches value in variable      #
 #########################################################################
 
+--disable_warnings
 SELECT @@global.innodb_autoinc_lock_mode = VARIABLE_VALUE 
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES 
 WHERE VARIABLE_NAME='innodb_autoinc_lock_mode';
+--enable_warnings
 SELECT @@global.innodb_autoinc_lock_mode;
+--disable_warnings
 SELECT VARIABLE_VALUE 
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES 
 WHERE VARIABLE_NAME='innodb_autoinc_lock_mode';
+--enable_warnings
 
 
 ###############################################################
diff --git a/mysql-test/suite/sys_vars/t/innodb_autoinc_lock_mode_func-master.opt b/mysql-test/suite/sys_vars/t/innodb_autoinc_lock_mode_func-master.opt
index f0b6727d6d8..ab9fcb75678 100644
--- a/mysql-test/suite/sys_vars/t/innodb_autoinc_lock_mode_func-master.opt
+++ b/mysql-test/suite/sys_vars/t/innodb_autoinc_lock_mode_func-master.opt
@@ -1,2 +1,2 @@
---loose-innodb-autoinc-lock-mode=1
+--innodb-autoinc-lock-mode=1
 
diff --git a/mysql-test/suite/sys_vars/t/innodb_buffer_pool_chunk_size_basic.test b/mysql-test/suite/sys_vars/t/innodb_buffer_pool_chunk_size_basic.test
new file mode 100644
index 00000000000..561786ee6e5
--- /dev/null
+++ b/mysql-test/suite/sys_vars/t/innodb_buffer_pool_chunk_size_basic.test
@@ -0,0 +1,75 @@
+--source include/have_innodb.inc
+
+####################################################################
+#   Displaying default value                                       #
+####################################################################
+SELECT COUNT(@@GLOBAL.innodb_buffer_pool_chunk_size);
+--echo 1 Expected
+
+
+####################################################################
+#   Check if Value can set                                         #
+####################################################################
+
+--error ER_INCORRECT_GLOBAL_LOCAL_VAR
+SET @@GLOBAL.innodb_buffer_pool_chunk_size=1;
+--echo Expected error 'Read only variable'
+
+SELECT COUNT(@@GLOBAL.innodb_buffer_pool_chunk_size);
+--echo 1 Expected
+
+
+
+
+#################################################################
+# Check if the value in GLOBAL Table matches value in variable  #
+#################################################################
+
+--disable_warnings
+SELECT @@GLOBAL.innodb_buffer_pool_chunk_size = VARIABLE_VALUE
+FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
+WHERE VARIABLE_NAME='innodb_buffer_pool_chunk_size';
+--echo 1 Expected
+
+SELECT COUNT(@@GLOBAL.innodb_buffer_pool_chunk_size);
+--echo 1 Expected
+
+SELECT COUNT(VARIABLE_VALUE)
+FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
+WHERE VARIABLE_NAME='innodb_buffer_pool_chunk_size';
+--echo 1 Expected
+--enable_warnings
+
+
+
+################################################################################
+#  Check if accessing variable with and without GLOBAL point to same variable  #
+################################################################################
+SELECT @@innodb_buffer_pool_chunk_size = @@GLOBAL.innodb_buffer_pool_chunk_size;
+--echo 1 Expected
+
+
+
+################################################################################
+#   Check if innodb_buffer_pool_chunk_size can be accessed with and without @@ sign  #
+################################################################################
+
+SELECT COUNT(@@innodb_buffer_pool_chunk_size);
+--echo 1 Expected
+
+--Error ER_INCORRECT_GLOBAL_LOCAL_VAR
+SELECT COUNT(@@local.innodb_buffer_pool_chunk_size);
+--echo Expected error 'Variable is a GLOBAL variable'
+
+--Error ER_INCORRECT_GLOBAL_LOCAL_VAR
+SELECT COUNT(@@SESSION.innodb_buffer_pool_chunk_size);
+--echo Expected error 'Variable is a GLOBAL variable'
+
+SELECT COUNT(@@GLOBAL.innodb_buffer_pool_chunk_size);
+--echo 1 Expected
+
+--Error ER_BAD_FIELD_ERROR
+SELECT innodb_buffer_pool_chunk_size = @@SESSION.innodb_buffer_pool_chunk_size;
+--echo Expected error 'Readonly variable'
+
+
diff --git a/mysql-test/suite/sys_vars/t/innodb_buffer_pool_dump_at_shutdown_basic.test b/mysql-test/suite/sys_vars/t/innodb_buffer_pool_dump_at_shutdown_basic.test
index b69e856be5a..feb7bf05638 100644
--- a/mysql-test/suite/sys_vars/t/innodb_buffer_pool_dump_at_shutdown_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_buffer_pool_dump_at_shutdown_basic.test
@@ -40,3 +40,5 @@ SET GLOBAL innodb_buffer_pool_dump_at_shutdown = 5;
 -- source include/restart_mysqld.inc
 
 -- file_exists $file
+
+SET GLOBAL innodb_buffer_pool_dump_at_shutdown = default;
diff --git a/mysql-test/suite/sys_vars/t/innodb_buffer_pool_dump_pct_basic.test b/mysql-test/suite/sys_vars/t/innodb_buffer_pool_dump_pct_basic.test
index d2f5cb4a0de..ae45be7f2a3 100644
--- a/mysql-test/suite/sys_vars/t/innodb_buffer_pool_dump_pct_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_buffer_pool_dump_pct_basic.test
@@ -1,34 +1,57 @@
-#
-# Basic test for innodb_buffer_pool_dump_pct
-#
+############################################
+# Variable Name: innodb_buffer_pool_dump_pct
+# Scope: GLOBAL
+# Access Type: Dynamic
+# Data Type: Integer
+# Default Value: 100
+# Range: 1-100
+############################################
 
 -- source include/have_innodb.inc
 
 # Check the default value
-SET @orig = @@global.innodb_buffer_pool_dump_pct;
-SELECT @orig;
-
-# Do the dump
-SET GLOBAL innodb_buffer_pool_dump_pct=3, GLOBAL innodb_buffer_pool_dump_now = ON;
-
-# Wait for the dump to complete
-let $wait_condition =
-  SELECT SUBSTR(variable_value, 1, 33) = 'Buffer pool(s) dump completed at '
-  FROM information_schema.global_status
-  WHERE LOWER(variable_name) = 'innodb_buffer_pool_dump_status';
--- source include/wait_condition.inc
-
-# Confirm that the dump file has been created
--- let $file = `SELECT CONCAT(@@datadir, @@global.innodb_buffer_pool_filename)`
--- file_exists $file
-
---disable_warnings
-SET GLOBAL innodb_buffer_pool_dump_pct=0;
 SELECT @@global.innodb_buffer_pool_dump_pct;
-SHOW WARNINGS;
+
+# Set the valid value
+SET GLOBAL innodb_buffer_pool_dump_pct=20;
+
+# Check the value is 20
+SELECT @@global.innodb_buffer_pool_dump_pct;
+
+# Set the lower Boundary value
+SET GLOBAL innodb_buffer_pool_dump_pct=1;
+
+# Check the value is 1
+SELECT @@global.innodb_buffer_pool_dump_pct;
+
+# Set the upper boundary value
+SET GLOBAL innodb_buffer_pool_dump_pct=100;
+
+# Check the value is 100
+SELECT @@global.innodb_buffer_pool_dump_pct;
+
+# Set the beyond upper boundary value
 SET GLOBAL innodb_buffer_pool_dump_pct=101;
-SELECT @@global.innodb_buffer_pool_dump_pct;
-SHOW WARNINGS;
---enable_warnings
 
-SET GLOBAL innodb_buffer_pool_dump_pct=@orig;
+# Check the value is 100
+SELECT @@global.innodb_buffer_pool_dump_pct;
+
+# Set the beyond lower boundary value
+SET GLOBAL innodb_buffer_pool_dump_pct=-1;
+
+# Check the value is 1
+SELECT @@global.innodb_buffer_pool_dump_pct;
+
+# Set the Default value
+SET GLOBAL innodb_buffer_pool_dump_pct=Default;
+
+# Check the default value
+SELECT @@global.innodb_buffer_pool_dump_pct;
+
+# Set with some invalid value
+--error ER_WRONG_TYPE_FOR_VAR
+SET GLOBAL innodb_buffer_pool_dump_pct='foo';
+
+# Set without using Global
+--error ER_GLOBAL_VARIABLE
+SET innodb_buffer_pool_dump_pct=50;
diff --git a/mysql-test/suite/sys_vars/t/innodb_buffer_pool_filename_basic.test b/mysql-test/suite/sys_vars/t/innodb_buffer_pool_filename_basic.test
deleted file mode 100644
index c50d2d66dff..00000000000
--- a/mysql-test/suite/sys_vars/t/innodb_buffer_pool_filename_basic.test
+++ /dev/null
@@ -1,32 +0,0 @@
-#
-# Basic test for innodb_buffer_pool_filename
-#
-
--- source include/have_innodb.inc
-
-# Check the default value and save for later restoration
-SET @orig = @@global.innodb_buffer_pool_filename;
-SELECT @orig;
-
-let $old_val=query_get_value(SHOW STATUS LIKE 'innodb_buffer_pool_dump_status', Value, 1);
-sleep 1; # to ensure that the previous and the next dumps are at least a second apart
-
-# Try with a non-default filename
-
-SET GLOBAL innodb_buffer_pool_filename = 'innodb_foobar_dump';
-
-SET GLOBAL innodb_buffer_pool_dump_now = ON;
--- let $file = `SELECT CONCAT(@@datadir, @@global.innodb_buffer_pool_filename)`
-
-# Wait for the dump to complete
-let $wait_condition =
-  SELECT variable_value LIKE 'Buffer pool(s) dump completed at %'
-     AND variable_value <> '$old_val'
-  FROM information_schema.global_status
-  WHERE variable_name = 'innodb_buffer_pool_dump_status';
--- source include/wait_condition.inc
-
--- file_exists $file
-
-# Restore the env
-SET GLOBAL innodb_buffer_pool_filename = @orig;
diff --git a/mysql-test/suite/sys_vars/t/innodb_buffer_pool_instances_basic.test b/mysql-test/suite/sys_vars/t/innodb_buffer_pool_instances_basic.test
index 0960f1fb38b..8785272e10d 100644
--- a/mysql-test/suite/sys_vars/t/innodb_buffer_pool_instances_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_buffer_pool_instances_basic.test
@@ -53,17 +53,21 @@ SELECT COUNT(@@GLOBAL.innodb_buffer_pool_instances);
 # Check if the value in GLOBAL Table matches value in variable  #
 #################################################################
 
+--disable_warnings
 SELECT @@GLOBAL.innodb_buffer_pool_instances = VARIABLE_VALUE
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
 WHERE VARIABLE_NAME='innodb_buffer_pool_instances';
+--enable_warnings
 --echo 1 Expected
 
 SELECT COUNT(@@GLOBAL.innodb_buffer_pool_instances);
 --echo 1 Expected
 
+--disable_warnings
 SELECT COUNT(VARIABLE_VALUE)
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES 
 WHERE VARIABLE_NAME='innodb_buffer_pool_instances';
+--enable_warnings
 --echo 1 Expected
 
 
diff --git a/mysql-test/suite/sys_vars/t/innodb_buffer_pool_load_now_basic.test b/mysql-test/suite/sys_vars/t/innodb_buffer_pool_load_now_basic.test
index a0409901865..701d6a8a96a 100644
--- a/mysql-test/suite/sys_vars/t/innodb_buffer_pool_load_now_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_buffer_pool_load_now_basic.test
@@ -20,13 +20,11 @@ let $old_status= `SELECT variable_value FROM information_schema.global_status
 # let $wait_condition =
 #  SELECT TRIM(SUBSTR('$old_status', -8)) != DATE_FORMAT(CURTIME(), '%k:%i:%s');
 # -- source include/wait_condition.inc
-
 if (`SELECT variable_value LIKE '%dump completed at%' FROM information_schema.global_status
      WHERE LOWER(variable_name) = 'innodb_buffer_pool_dump_status'`)
 {
   -- sleep 2
 }
-
 # Do the dump
 SET GLOBAL innodb_buffer_pool_dump_now = ON;
 
@@ -36,7 +34,9 @@ let $wait_condition =
      AND SUBSTR(variable_value, 1, 33) = 'Buffer pool(s) dump completed at '
   FROM information_schema.global_status
   WHERE LOWER(variable_name) = 'innodb_buffer_pool_dump_status';
+--disable_warnings
 -- source include/wait_condition.inc
+--enable_warnings
 
 # Confirm the file is really created
 -- let $file = `SELECT CONCAT(@@datadir, @@global.innodb_buffer_pool_filename)`
@@ -50,10 +50,15 @@ let $wait_condition =
   SELECT SUBSTR(variable_value, 1, 33) = 'Buffer pool(s) load completed at '
   FROM information_schema.global_status
   WHERE LOWER(variable_name) = 'innodb_buffer_pool_load_status';
+--disable_warnings
 -- source include/wait_condition.inc
+--enable_warnings
 
 # Show the status, interesting if the above timed out
+--disable_warnings
 -- replace_regex /[0-9]{6}[[:space:]]+[0-9]{1,2}:[0-9]{2}:[0-9]{2}/TIMESTAMP_NOW/
 SELECT variable_value
 FROM information_schema.global_status
 WHERE LOWER(variable_name) = 'innodb_buffer_pool_load_status';
+--enable_warnings
+
diff --git a/mysql-test/suite/sys_vars/t/innodb_buffer_pool_size_basic-master.opt b/mysql-test/suite/sys_vars/t/innodb_buffer_pool_size_basic-master.opt
new file mode 100644
index 00000000000..aa536bf0070
--- /dev/null
+++ b/mysql-test/suite/sys_vars/t/innodb_buffer_pool_size_basic-master.opt
@@ -0,0 +1 @@
+--innodb-buffer-pool-chunk-size=2M
diff --git a/mysql-test/suite/sys_vars/t/innodb_buffer_pool_size_basic.test b/mysql-test/suite/sys_vars/t/innodb_buffer_pool_size_basic.test
index 190b2d19bc4..c5b4c118da2 100644
--- a/mysql-test/suite/sys_vars/t/innodb_buffer_pool_size_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_buffer_pool_size_basic.test
@@ -24,6 +24,21 @@
 
 --source include/have_innodb.inc
 
+--disable_query_log
+if (`select (version() like '%debug%') > 0`)
+{
+    set @old_innodb_disable_resize = @@innodb_disable_resize_buffer_pool_debug;
+    set global innodb_disable_resize_buffer_pool_debug = OFF;
+}
+--enable_query_log
+
+let $wait_condition =
+  SELECT SUBSTR(variable_value, 1, 34) = 'Completed resizing buffer pool at '
+  FROM information_schema.global_status
+  WHERE LOWER(variable_name) = 'innodb_buffer_pool_resize_status';
+
+SET @start_buffer_pool_size = @@GLOBAL.innodb_buffer_pool_size;
+
 --echo '#---------------------BS_STVARS_022_01----------------------#'
 ####################################################################
 #   Displaying default value                                       #
@@ -37,9 +52,9 @@ SELECT COUNT(@@GLOBAL.innodb_buffer_pool_size);
 #   Check if Value can set                                         #
 ####################################################################
 
---error ER_INCORRECT_GLOBAL_LOCAL_VAR
-SET @@GLOBAL.innodb_buffer_pool_size=1;
---echo Expected error 'Read only variable'
+SET @@GLOBAL.innodb_buffer_pool_size=10485760;
+--echo Expected succeeded
+--source include/wait_condition.inc
 
 SELECT COUNT(@@GLOBAL.innodb_buffer_pool_size);
 --echo 1 Expected
@@ -52,17 +67,21 @@ SELECT COUNT(@@GLOBAL.innodb_buffer_pool_size);
 # Check if the value in GLOBAL Table matches value in variable  #
 #################################################################
 
+--disable_warnings
 SELECT @@GLOBAL.innodb_buffer_pool_size = VARIABLE_VALUE
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
 WHERE VARIABLE_NAME='innodb_buffer_pool_size';
+--enable_warnings
 --echo 1 Expected
 
 SELECT COUNT(@@GLOBAL.innodb_buffer_pool_size);
 --echo 1 Expected
 
+--disable_warnings
 SELECT COUNT(VARIABLE_VALUE)
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES 
 WHERE VARIABLE_NAME='innodb_buffer_pool_size';
+--enable_warnings
 --echo 1 Expected
 
 
@@ -100,3 +119,12 @@ SELECT innodb_buffer_pool_size = @@SESSION.innodb_buffer_pool_size;
 --echo Expected error 'Readonly variable'
 
 
+SET @@GLOBAL.innodb_buffer_pool_size = @start_buffer_pool_size;
+--source include/wait_condition.inc
+
+--disable_query_log
+if (`select (version() like '%debug%') > 0`)
+{
+    set global innodb_disable_resize_buffer_pool_debug = @old_innodb_disable_resize;
+}
+--enable_query_log
diff --git a/mysql-test/suite/sys_vars/t/innodb_change_buffer_max_size_basic.test b/mysql-test/suite/sys_vars/t/innodb_change_buffer_max_size_basic.test
index 5e081b1a0a4..2094ef3dc0b 100644
--- a/mysql-test/suite/sys_vars/t/innodb_change_buffer_max_size_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_change_buffer_max_size_basic.test
@@ -18,16 +18,20 @@ select @@global.innodb_change_buffer_max_size;
 select @@session.innodb_change_buffer_max_size;
 show global variables like 'innodb_change_buffer_max_size';
 show session variables like 'innodb_change_buffer_max_size';
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_change_buffer_max_size';
 select * from information_schema.session_variables where variable_name='innodb_change_buffer_max_size';
+--enable_warnings
 
 #
 # show that it's writable
 #
 set global innodb_change_buffer_max_size=10;
 select @@global.innodb_change_buffer_max_size;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_change_buffer_max_size';
 select * from information_schema.session_variables where variable_name='innodb_change_buffer_max_size';
+--enable_warnings
 --error ER_GLOBAL_VARIABLE
 set session innodb_change_buffer_max_size=1;
 
@@ -43,10 +47,14 @@ set global innodb_change_buffer_max_size="foo";
 
 set global innodb_change_buffer_max_size=-7;
 select @@global.innodb_change_buffer_max_size;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_change_buffer_max_size';
+--enable_warnings
 set global innodb_change_buffer_max_size=56;
 select @@global.innodb_change_buffer_max_size;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_change_buffer_max_size';
+--enable_warnings
 
 #
 # min/max/DEFAULT values
diff --git a/mysql-test/suite/sys_vars/t/innodb_change_buffering_basic.test b/mysql-test/suite/sys_vars/t/innodb_change_buffering_basic.test
index abdfddb4c4b..aba3b1e3479 100644
--- a/mysql-test/suite/sys_vars/t/innodb_change_buffering_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_change_buffering_basic.test
@@ -18,20 +18,26 @@ select @@global.innodb_change_buffering;
 select @@session.innodb_change_buffering;
 show global variables like 'innodb_change_buffering';
 show session variables like 'innodb_change_buffering';
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_change_buffering';
 select * from information_schema.session_variables where variable_name='innodb_change_buffering';
+--enable_warnings
 
 #
 # show that it's writable
 #
 set global innodb_change_buffering='none';
 select @@global.innodb_change_buffering;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_change_buffering';
 select * from information_schema.session_variables where variable_name='innodb_change_buffering';
+--enable_warnings
 set @@global.innodb_change_buffering='inserts';
 select @@global.innodb_change_buffering;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_change_buffering';
 select * from information_schema.session_variables where variable_name='innodb_change_buffering';
+--enable_warnings
 --error ER_GLOBAL_VARIABLE
 set session innodb_change_buffering='some';
 --error ER_GLOBAL_VARIABLE
diff --git a/mysql-test/suite/sys_vars/t/innodb_change_buffering_debug_basic.test b/mysql-test/suite/sys_vars/t/innodb_change_buffering_debug_basic.test
index 893d1cb42e3..a6fc09f767e 100644
--- a/mysql-test/suite/sys_vars/t/innodb_change_buffering_debug_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_change_buffering_debug_basic.test
@@ -13,20 +13,26 @@ select @@global.innodb_change_buffering_debug;
 select @@session.innodb_change_buffering_debug;
 show global variables like 'innodb_change_buffering_debug';
 show session variables like 'innodb_change_buffering_debug';
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_change_buffering_debug';
 select * from information_schema.session_variables where variable_name='innodb_change_buffering_debug';
+--enable_warnings
 
 #
 # show that it's writable
 #
 set global innodb_change_buffering_debug=1;
 select @@global.innodb_change_buffering_debug;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_change_buffering_debug';
 select * from information_schema.session_variables where variable_name='innodb_change_buffering_debug';
+--enable_warnings
 set @@global.innodb_change_buffering_debug=0;
 select @@global.innodb_change_buffering_debug;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_change_buffering_debug';
 select * from information_schema.session_variables where variable_name='innodb_change_buffering_debug';
+--enable_warnings
 --error ER_GLOBAL_VARIABLE
 set session innodb_change_buffering_debug='some';
 --error ER_GLOBAL_VARIABLE
diff --git a/mysql-test/suite/sys_vars/t/innodb_checksum_algorithm_basic.test b/mysql-test/suite/sys_vars/t/innodb_checksum_algorithm_basic.test
index e7098b7e3b3..bb0f3417f87 100644
--- a/mysql-test/suite/sys_vars/t/innodb_checksum_algorithm_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_checksum_algorithm_basic.test
@@ -1,5 +1,4 @@
 --source include/have_innodb.inc
---source include/not_encrypted.inc
 
 # Check the default value
 SET @orig = @@global.innodb_checksum_algorithm;
diff --git a/mysql-test/suite/sys_vars/t/innodb_checksums_basic.test b/mysql-test/suite/sys_vars/t/innodb_checksums_basic.test
index c4c39d7d380..5db0a18e8fd 100644
--- a/mysql-test/suite/sys_vars/t/innodb_checksums_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_checksums_basic.test
@@ -52,17 +52,21 @@ SELECT COUNT(@@GLOBAL.innodb_checksums);
 # Check if the value in GLOBAL Table matches value in variable  #
 #################################################################
 
+--disable_warnings
 SELECT IF(@@GLOBAL.innodb_checksums, "ON", "OFF") = VARIABLE_VALUE
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
 WHERE VARIABLE_NAME='innodb_checksums';
+--enable_warnings
 --echo 1 Expected
 
 SELECT COUNT(@@GLOBAL.innodb_checksums);
 --echo 1 Expected
 
+--disable_warnings
 SELECT COUNT(VARIABLE_VALUE)
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES 
 WHERE VARIABLE_NAME='innodb_checksums';
+--enable_warnings
 --echo 1 Expected
 
 
diff --git a/mysql-test/suite/sys_vars/t/innodb_cmp_per_index_enabled_basic.test b/mysql-test/suite/sys_vars/t/innodb_cmp_per_index_enabled_basic.test
index d729acea02c..432c04857ec 100644
--- a/mysql-test/suite/sys_vars/t/innodb_cmp_per_index_enabled_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_cmp_per_index_enabled_basic.test
@@ -26,8 +26,6 @@ SELECT @@global.innodb_cmp_per_index_enabled;
 SET GLOBAL innodb_cmp_per_index_enabled=OFF;
 SELECT @@global.innodb_cmp_per_index_enabled;
 
-SET GLOBAL innodb_file_format=Barracuda;
-
 -- vertical_results
 
 # Check that enabling after being disabled resets the stats
@@ -65,5 +63,4 @@ DROP TABLE t;
 
 #
 
-SET GLOBAL innodb_file_format=default;
 SET GLOBAL innodb_cmp_per_index_enabled=default;
diff --git a/mysql-test/suite/sys_vars/t/innodb_commit_concurrency_basic.test b/mysql-test/suite/sys_vars/t/innodb_commit_concurrency_basic.test
index 42d172934d1..4ed706b372b 100644
--- a/mysql-test/suite/sys_vars/t/innodb_commit_concurrency_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_commit_concurrency_basic.test
@@ -85,26 +85,38 @@ SELECT @@global.innodb_commit_concurrency;
 #
 --Error ER_WRONG_VALUE_FOR_VAR
 SET @@global.innodb_commit_concurrency = 1;
+SELECT @@global.innodb_commit_concurrency;
 --Error ER_WRONG_VALUE_FOR_VAR
 SET @@global.innodb_commit_concurrency = -1;
+SELECT @@global.innodb_commit_concurrency;
 --Error ER_WRONG_TYPE_FOR_VAR
 SET @@global.innodb_commit_concurrency = "T";
+SELECT @@global.innodb_commit_concurrency;
 --Error ER_WRONG_TYPE_FOR_VAR
 SET @@global.innodb_commit_concurrency = "Y";
+SELECT @@global.innodb_commit_concurrency;
+--Error ER_WRONG_TYPE_FOR_VAR
+SET @@global.innodb_commit_concurrency = 1.1;
+SELECT @@global.innodb_commit_concurrency;
 --Error ER_WRONG_VALUE_FOR_VAR
 SET @@global.innodb_commit_concurrency = 1001;
+SELECT @@global.innodb_commit_concurrency;
+
+
 
 --echo '#----------------------FN_DYNVARS_046_05------------------------#'
 ######################################################################### 
 #     Check if the value in GLOBAL Table matches value in variable      #
 #########################################################################
 
+--disable_warnings
 SELECT @@global.innodb_commit_concurrency =
  VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
   WHERE VARIABLE_NAME='innodb_commit_concurrency';
 SELECT @@global.innodb_commit_concurrency;
 SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
  WHERE VARIABLE_NAME='innodb_commit_concurrency';
+--enable_warnings
 
 --echo '#---------------------FN_DYNVARS_046_06-------------------------#'
 ################################################################### 
diff --git a/mysql-test/suite/sys_vars/t/innodb_compression_failure_threshold_pct_basic.test b/mysql-test/suite/sys_vars/t/innodb_compression_failure_threshold_pct_basic.test
index 1cdfaa6b31d..315fe2df3c7 100644
--- a/mysql-test/suite/sys_vars/t/innodb_compression_failure_threshold_pct_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_compression_failure_threshold_pct_basic.test
@@ -87,6 +87,9 @@ SELECT @@global.innodb_compression_failure_threshold_pct;
 --Error ER_WRONG_TYPE_FOR_VAR
 SET @@global.innodb_compression_failure_threshold_pct = "T";
 SELECT @@global.innodb_compression_failure_threshold_pct;
+--Error ER_WRONG_TYPE_FOR_VAR
+SET @@global.innodb_compression_failure_threshold_pct = 1.1;
+SELECT @@global.innodb_compression_failure_threshold_pct;
 
 --Error ER_WRONG_TYPE_FOR_VAR
 SET @@global.innodb_compression_failure_threshold_pct = "Y";
@@ -94,19 +97,28 @@ SELECT @@global.innodb_compression_failure_threshold_pct;
 
 SET @@global.innodb_compression_failure_threshold_pct = 101;
 SELECT @@global.innodb_compression_failure_threshold_pct;
-
+--Error ER_WRONG_TYPE_FOR_VAR
+SET @@global.innodb_compression_failure_threshold_pct = " ";
+SELECT @@global.innodb_compression_failure_threshold_pct;
+--Error ER_WRONG_TYPE_FOR_VAR
+SET @@global.innodb_compression_failure_threshold_pct = ' ';
+SELECT @@global.innodb_compression_failure_threshold_pct;
 
 --echo '#----------------------FN_DYNVARS_046_05------------------------#'
 #########################################################################
 #     Check if the value in GLOBAL Table matches value in variable      #
 #########################################################################
 
+--disable_warnings
 SELECT @@global.innodb_compression_failure_threshold_pct =
  VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
   WHERE VARIABLE_NAME='innodb_compression_failure_threshold_pct';
+--enable_warnings
 SELECT @@global.innodb_compression_failure_threshold_pct;
+--disable_warnings
 SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
  WHERE VARIABLE_NAME='innodb_compression_failure_threshold_pct';
+--enable_warnings
 
 --echo '#---------------------FN_DYNVARS_046_06-------------------------#'
 ###################################################################
diff --git a/mysql-test/suite/sys_vars/t/innodb_compression_level_basic.test b/mysql-test/suite/sys_vars/t/innodb_compression_level_basic.test
index a90abdde2f1..d19d2971fc9 100644
--- a/mysql-test/suite/sys_vars/t/innodb_compression_level_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_compression_level_basic.test
@@ -18,16 +18,20 @@ select @@global.innodb_compression_level;
 select @@session.innodb_compression_level;
 show global variables like 'innodb_compression_level';
 show session variables like 'innodb_compression_level';
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_compression_level';
 select * from information_schema.session_variables where variable_name='innodb_compression_level';
+--enable_warnings
 
 #
 # show that it's writable
 #
 set global innodb_compression_level=2;
 select @@global.innodb_compression_level;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_compression_level';
 select * from information_schema.session_variables where variable_name='innodb_compression_level';
+--enable_warnings
 --error ER_GLOBAL_VARIABLE
 set session innodb_compression_level=4;
 
@@ -43,10 +47,14 @@ set global innodb_compression_level="foo";
 
 set global innodb_compression_level=10;
 select @@global.innodb_compression_level;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_compression_level';
+--enable_warnings
 set global innodb_compression_level=-7;
 select @@global.innodb_compression_level;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_compression_level';
+--enable_warnings
 
 #
 # min/max values
diff --git a/mysql-test/suite/sys_vars/t/innodb_compression_pad_pct_max_basic.test b/mysql-test/suite/sys_vars/t/innodb_compression_pad_pct_max_basic.test
index 3ca566956ef..1491f705ab2 100644
--- a/mysql-test/suite/sys_vars/t/innodb_compression_pad_pct_max_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_compression_pad_pct_max_basic.test
@@ -84,22 +84,34 @@ SELECT @@global.innodb_compression_pad_pct_max;
 --Error ER_WRONG_TYPE_FOR_VAR
 SET @@global.innodb_compression_pad_pct_max = "T";
 SELECT @@global.innodb_compression_pad_pct_max;
+--Error ER_WRONG_TYPE_FOR_VAR
+SET @@global.innodb_compression_pad_pct_max = 1.1;
+SELECT @@global.innodb_compression_pad_pct_max;
 
 SET @@global.innodb_compression_pad_pct_max = 76;
 SELECT @@global.innodb_compression_pad_pct_max;
-
+--Error ER_WRONG_TYPE_FOR_VAR
+SET @@global.innodb_compression_pad_pct_max = " ";
+SELECT @@global.innodb_compression_pad_pct_max;
+--Error ER_WRONG_TYPE_FOR_VAR
+SET @@global.innodb_compression_pad_pct_max = ' ';
+SELECT @@global.innodb_compression_pad_pct_max;
 
 --echo '#----------------------FN_DYNVARS_046_05------------------------#'
 #########################################################################
 #     Check if the value in GLOBAL Table matches value in variable      #
 #########################################################################
 
+--disable_warnings
 SELECT @@global.innodb_compression_pad_pct_max =
  VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
   WHERE VARIABLE_NAME='innodb_compression_pad_pct_max';
+--enable_warnings
 SELECT @@global.innodb_compression_pad_pct_max;
+--disable_warnings
 SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
  WHERE VARIABLE_NAME='innodb_compression_pad_pct_max';
+--enable_warnings
 
 --echo '#---------------------FN_DYNVARS_046_06-------------------------#'
 ###################################################################
diff --git a/mysql-test/suite/sys_vars/t/innodb_concurrency_tickets_basic.test b/mysql-test/suite/sys_vars/t/innodb_concurrency_tickets_basic.test
index f73e25179ba..d753b8bc344 100644
--- a/mysql-test/suite/sys_vars/t/innodb_concurrency_tickets_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_concurrency_tickets_basic.test
@@ -59,7 +59,6 @@ SELECT @@innodb_concurrency_tickets;
 --Error ER_UNKNOWN_TABLE
 SELECT local.innodb_concurrency_tickets;
 
-
 SET global innodb_concurrency_tickets = 0;
 SELECT @@global.innodb_concurrency_tickets;
 
@@ -69,7 +68,6 @@ SELECT @@global.innodb_concurrency_tickets;
 #      change the value of innodb_concurrency_tickets to a valid value   #
 ########################################################################## 
 
-
 SET @@global.innodb_concurrency_tickets = 1;
 SELECT @@global.innodb_concurrency_tickets;
 
@@ -79,15 +77,38 @@ SELECT @@global.innodb_concurrency_tickets;
 SET @@global.innodb_concurrency_tickets = 4294967295;
 SELECT @@global.innodb_concurrency_tickets;
 
-
 --echo '#--------------------FN_DYNVARS_046_04-------------------------#'
 ########################################################################### 
+#  Check the value of innodb_concurrency_tickets for out of bounds        #
+###########################################################################
+
+# With a 64 bit mysqld:18446744073709551615,with a 32 bit mysqld: 4294967295
+--disable_warnings
+SET @@global.innodb_concurrency_tickets = 4294967296;
+--enable_warnings
+SELECT @@global.innodb_concurrency_tickets IN (4294967296,4294967295);
+
+--disable_warnings
+SET @@global.innodb_concurrency_tickets = 12345678901;
+--enable_warnings
+SELECT @@global.innodb_concurrency_tickets IN (12345678901,4294967295);
+
+--disable_warnings
+SET @@global.innodb_concurrency_tickets = 18446744073709551615;
+--enable_warnings
+SELECT @@global.innodb_concurrency_tickets IN (18446744073709551615,4294967295);
+
+--echo '#--------------------FN_DYNVARS_046_05-------------------------#'
+########################################################################### 
 #  Change the value of innodb_concurrency_tickets to invalid value        #
 ###########################################################################
 
 SET @@global.innodb_concurrency_tickets = -1;
 SELECT @@global.innodb_concurrency_tickets;
 
+SET @@global.innodb_concurrency_tickets = -1024;
+SELECT @@global.innodb_concurrency_tickets;
+
 --Error ER_WRONG_TYPE_FOR_VAR  
 SET @@global.innodb_concurrency_tickets = "T";
 SELECT @@global.innodb_concurrency_tickets;
@@ -96,22 +117,35 @@ SELECT @@global.innodb_concurrency_tickets;
 SET @@global.innodb_concurrency_tickets = "Y";
 SELECT @@global.innodb_concurrency_tickets;
 
-SET @@global.innodb_concurrency_tickets = 1001;
+--Error ER_WRONG_TYPE_FOR_VAR
+SET @@global.innodb_concurrency_tickets = 1.1;
 SELECT @@global.innodb_concurrency_tickets;
 
---echo '#----------------------FN_DYNVARS_046_05------------------------#'
+--Error ER_WRONG_TYPE_FOR_VAR
+SET @@global.innodb_concurrency_tickets = " ";
+SELECT @@global.innodb_concurrency_tickets;
+
+--Error ER_WRONG_TYPE_FOR_VAR
+SET @@global.innodb_concurrency_tickets = ' ';
+SELECT @@global.innodb_concurrency_tickets;
+
+--echo '#----------------------FN_DYNVARS_046_06------------------------#'
 ######################################################################### 
 #     Check if the value in GLOBAL Table matches value in variable      #
 #########################################################################
 
+--disable_warnings
 SELECT @@global.innodb_concurrency_tickets =
  VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
   WHERE VARIABLE_NAME='innodb_concurrency_tickets';
+--enable_warnings
 SELECT @@global.innodb_concurrency_tickets;
+--disable_warnings
 SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
  WHERE VARIABLE_NAME='innodb_concurrency_tickets';
+--enable_warnings
 
---echo '#---------------------FN_DYNVARS_046_06-------------------------#'
+--echo '#---------------------FN_DYNVARS_046_07-------------------------#'
 ###################################################################
 #        Check if ON and OFF values can be used on variable       #
 ###################################################################
@@ -124,7 +158,7 @@ SELECT @@global.innodb_concurrency_tickets;
 SET @@global.innodb_concurrency_tickets = ON;
 SELECT @@global.innodb_concurrency_tickets;
 
---echo '#---------------------FN_DYNVARS_046_07----------------------#'
+--echo '#---------------------FN_DYNVARS_046_08----------------------#'
 ###################################################################
 #      Check if TRUE and FALSE values can be used on variable     #
 ###################################################################
diff --git a/mysql-test/suite/sys_vars/t/innodb_data_file_path_basic.test b/mysql-test/suite/sys_vars/t/innodb_data_file_path_basic.test
index 1d88c47b1bb..c936744297f 100644
--- a/mysql-test/suite/sys_vars/t/innodb_data_file_path_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_data_file_path_basic.test
@@ -52,17 +52,21 @@ SELECT COUNT(@@GLOBAL.innodb_data_file_path);
 # Check if the value in GLOBAL Table matches value in variable  #
 #################################################################
 
+--disable_warnings
 SELECT @@GLOBAL.innodb_data_file_path = VARIABLE_VALUE
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
 WHERE VARIABLE_NAME='innodb_data_file_path';
+--enable_warnings
 --echo 1 Expected
 
 SELECT COUNT(@@GLOBAL.innodb_data_file_path);
 --echo 1 Expected
 
+--disable_warnings
 SELECT COUNT(VARIABLE_VALUE)
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES 
 WHERE VARIABLE_NAME='innodb_data_file_path';
+--enable_warnings
 --echo 1 Expected
 
 
diff --git a/mysql-test/suite/sys_vars/t/innodb_data_home_dir_basic.test b/mysql-test/suite/sys_vars/t/innodb_data_home_dir_basic.test
index acf3741d5fa..b6b7999900a 100644
--- a/mysql-test/suite/sys_vars/t/innodb_data_home_dir_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_data_home_dir_basic.test
@@ -52,17 +52,21 @@ SELECT COUNT(@@GLOBAL.innodb_data_home_dir);
 # Check if the value in GLOBAL Table matches value in variable  #
 #################################################################
 
+--disable_warnings
 SELECT @@GLOBAL.innodb_data_home_dir = VARIABLE_VALUE
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
 WHERE VARIABLE_NAME='innodb_data_home_dir';
+--enable_warnings
 --echo 1 Expected
 
 SELECT COUNT(@@GLOBAL.innodb_data_home_dir);
 --echo 0 Expected
 
+--disable_warnings
 SELECT COUNT(VARIABLE_VALUE)
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES 
 WHERE VARIABLE_NAME='innodb_data_home_dir';
+--enable_warnings
 --echo 1 Expected
 
 
diff --git a/mysql-test/suite/sys_vars/t/innodb_default_row_format_basic.test b/mysql-test/suite/sys_vars/t/innodb_default_row_format_basic.test
new file mode 100644
index 00000000000..f9aabf49ba4
--- /dev/null
+++ b/mysql-test/suite/sys_vars/t/innodb_default_row_format_basic.test
@@ -0,0 +1,41 @@
+--source include/have_innodb.inc
+
+# Check the default value
+SELECT  @@global.innodb_default_row_format;
+
+SET GLOBAL innodb_default_row_format = 'redundant';
+SELECT @@global.innodb_default_row_format;
+
+SET GLOBAL innodb_default_row_format = 'dynamic';
+SELECT @@global.innodb_default_row_format;
+
+SET GLOBAL innodb_default_row_format = 'compact';
+SELECT @@global.innodb_default_row_format;
+
+--error ER_WRONG_VALUE_FOR_VAR
+SET GLOBAL innodb_default_row_format = 'compressed';
+SELECT @@global.innodb_default_row_format;
+
+--error ER_WRONG_VALUE_FOR_VAR
+SET GLOBAL innodb_default_row_format = 'foobar';
+SELECT @@global.innodb_default_row_format;
+
+SET GLOBAL innodb_default_row_format = 0;
+SELECT @@global.innodb_default_row_format;
+
+SET GLOBAL innodb_default_row_format = 1;
+SELECT @@global.innodb_default_row_format;
+
+SET GLOBAL innodb_default_row_format = 2;
+SELECT @@global.innodb_default_row_format;
+
+--error ER_WRONG_VALUE_FOR_VAR
+SET GLOBAL innodb_default_row_format = 3;
+SELECT @@global.innodb_default_row_format;
+
+--error ER_WRONG_VALUE_FOR_VAR
+SET GLOBAL innodb_default_row_format = 123;
+SELECT @@global.innodb_default_row_format;
+
+
+SET GLOBAL innodb_default_row_format = default;
diff --git a/mysql-test/suite/sys_vars/t/innodb_disable_resize_buffer_pool_debug_basic.test b/mysql-test/suite/sys_vars/t/innodb_disable_resize_buffer_pool_debug_basic.test
new file mode 100644
index 00000000000..e381e746c06
--- /dev/null
+++ b/mysql-test/suite/sys_vars/t/innodb_disable_resize_buffer_pool_debug_basic.test
@@ -0,0 +1,72 @@
+--echo #
+--echo # Basic test for innodb_disable_resize_buffer_pool_debug
+--echo #
+
+--source include/have_innodb.inc
+
+# The config variable is a debug variable
+-- source include/have_debug.inc
+
+SET @start_global_value = @@global.innodb_disable_resize_buffer_pool_debug;
+
+# Check if Value can set
+
+SET @@global.innodb_disable_resize_buffer_pool_debug = 0;
+SELECT @@global.innodb_disable_resize_buffer_pool_debug;
+
+SET @@global.innodb_disable_resize_buffer_pool_debug ='On' ;
+SELECT @@global.innodb_disable_resize_buffer_pool_debug;
+
+SET @@global.innodb_disable_resize_buffer_pool_debug ='Off' ;
+SELECT @@global.innodb_disable_resize_buffer_pool_debug;
+
+SET @@global.innodb_disable_resize_buffer_pool_debug = 1;
+SELECT @@global.innodb_disable_resize_buffer_pool_debug;
+
+# Check if the value in GLOBAL Table matches value in variable
+
+--disable_warnings
+SELECT IF(@@GLOBAL.innodb_disable_resize_buffer_pool_debug,'ON','OFF') = VARIABLE_VALUE
+FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
+WHERE VARIABLE_NAME='innodb_disable_resize_buffer_pool_debug';
+--enable_warnings
+--echo 1 Expected
+
+SELECT COUNT(@@GLOBAL.innodb_disable_resize_buffer_pool_debug);
+--echo 1 Expected
+
+--disable_warnings
+SELECT COUNT(VARIABLE_VALUE)
+FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
+WHERE VARIABLE_NAME='innodb_disable_resize_buffer_pool_debug';
+--enable_warnings
+--echo 1 Expected
+
+# Check if accessing variable with and without GLOBAL point to same variable
+
+SELECT @@innodb_disable_resize_buffer_pool_debug = @@GLOBAL.innodb_disable_resize_buffer_pool_debug;
+--echo 1 Expected
+
+# Check if innodb_disable_resize_buffer_pool_debug can be accessed with and without @@ sign
+
+SELECT COUNT(@@innodb_disable_resize_buffer_pool_debug);
+--echo 1 Expected
+
+--Error ER_INCORRECT_GLOBAL_LOCAL_VAR
+SELECT COUNT(@@local.innodb_disable_resize_buffer_pool_debug);
+--echo Expected error 'Variable is a GLOBAL variable'
+
+--Error ER_INCORRECT_GLOBAL_LOCAL_VAR
+SELECT COUNT(@@SESSION.innodb_disable_resize_buffer_pool_debug);
+--echo Expected error 'Variable is a GLOBAL variable'
+
+SELECT COUNT(@@GLOBAL.innodb_disable_resize_buffer_pool_debug);
+--echo 1 Expected
+
+--Error ER_BAD_FIELD_ERROR
+SELECT innodb_disable_resize_buffer_pool_debug = @@SESSION.innodb_disable_resize_buffer_pool_debug;
+
+# Cleanup
+
+SET @@global.innodb_disable_resize_buffer_pool_debug = @start_global_value;
+SELECT @@global.innodb_disable_resize_buffer_pool_debug;
diff --git a/mysql-test/suite/sys_vars/t/innodb_doublewrite_basic.test b/mysql-test/suite/sys_vars/t/innodb_doublewrite_basic.test
index 72dd22cbeb8..1ae10d0f7cf 100644
--- a/mysql-test/suite/sys_vars/t/innodb_doublewrite_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_doublewrite_basic.test
@@ -52,17 +52,21 @@ SELECT COUNT(@@GLOBAL.innodb_doublewrite);
 # Check if the value in GLOBAL Table matches value in variable  #
 #################################################################
 
+--disable_warnings
 SELECT IF(@@GLOBAL.innodb_doublewrite, "ON", "OFF") = VARIABLE_VALUE
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
 WHERE VARIABLE_NAME='innodb_doublewrite';
+--enable_warnings
 --echo 1 Expected
 
 SELECT COUNT(@@GLOBAL.innodb_doublewrite);
 --echo 1 Expected
 
+--disable_warnings
 SELECT COUNT(VARIABLE_VALUE)
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES 
 WHERE VARIABLE_NAME='innodb_doublewrite';
+--enable_warnings
 --echo 1 Expected
 
 
diff --git a/mysql-test/suite/sys_vars/t/innodb_doublewrite_batch_size_basic.test b/mysql-test/suite/sys_vars/t/innodb_doublewrite_batch_size_basic.test
index ccdab532737..5e9104b5335 100644
--- a/mysql-test/suite/sys_vars/t/innodb_doublewrite_batch_size_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_doublewrite_batch_size_basic.test
@@ -10,8 +10,10 @@ select @@global.innodb_doublewrite_batch_size;
 select @@session.innodb_doublewrite_batch_size;
 show global variables like 'innodb_doublewrite_batch_size';
 show session variables like 'innodb_doublewrite_batch_size';
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_doublewrite_batch_size';
 select * from information_schema.session_variables where variable_name='innodb_doublewrite_batch_size';
+--enable_warnings
 
 #
 # show that it's read-only
diff --git a/mysql-test/suite/sys_vars/t/innodb_fast_shutdown_basic.test b/mysql-test/suite/sys_vars/t/innodb_fast_shutdown_basic.test
index e1b62046313..9fe9f490aa4 100644
--- a/mysql-test/suite/sys_vars/t/innodb_fast_shutdown_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_fast_shutdown_basic.test
@@ -116,7 +116,15 @@ SELECT @@global.innodb_fast_shutdown;
 --Error ER_WRONG_TYPE_FOR_VAR
 SET @@global.innodb_fast_shutdown = "0";
 SELECT @@global.innodb_fast_shutdown;
-
+--Error ER_WRONG_TYPE_FOR_VAR
+SET @@global.innodb_fast_shutdown = 1.1;
+SELECT @@global.innodb_fast_shutdown;
+--Error ER_WRONG_TYPE_FOR_VAR
+SET @@global.innodb_fast_shutdown = ' ';
+SELECT @@global.innodb_fast_shutdown;
+--Error ER_WRONG_TYPE_FOR_VAR
+SET @@global.innodb_fast_shutdown = " ";
+SELECT @@global.innodb_fast_shutdown;
 
 --echo '#-------------------FN_DYNVARS_042_05----------------------------#'
 ###########################################################################
@@ -137,9 +145,11 @@ SET @@local.innodb_fast_shutdown = 0;
 #     Check if the value in SESSION Table contains variable value       #
 #########################################################################
 
+--disable_warnings
 SELECT count(VARIABLE_VALUE) AS res_is_0
 FROM INFORMATION_SCHEMA.SESSION_VARIABLES
 WHERE VARIABLE_NAME='innodb_fast_shutdown';
+--enable_warnings
 
 
 --echo '#----------------------FN_DYNVARS_042_07------------------------#'
@@ -147,9 +157,11 @@ WHERE VARIABLE_NAME='innodb_fast_shutdown';
 #      Check if the value in GLOBAL Table matches value in variable     #
 #########################################################################
 
+--disable_warnings
 SELECT @@global.innodb_fast_shutdown =
 VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
 WHERE VARIABLE_NAME='innodb_fast_shutdown';
+--enable_warnings
 
 
 --echo '#---------------------FN_DYNVARS_042_08-------------------------#'
diff --git a/mysql-test/suite/sys_vars/t/innodb_fatal_semaphore_wait_threshold.test b/mysql-test/suite/sys_vars/t/innodb_fatal_semaphore_wait_threshold.test
index 3b97a85db01..e9dd0c71936 100644
--- a/mysql-test/suite/sys_vars/t/innodb_fatal_semaphore_wait_threshold.test
+++ b/mysql-test/suite/sys_vars/t/innodb_fatal_semaphore_wait_threshold.test
@@ -82,7 +82,7 @@ let $counter= 80;
 let $mysql_errno= 0;
 while (!$mysql_errno)
 {
-  --error 0,1040,1053,2002,2003,2006,2013
+  --error 0,ER_SERVER_SHUTDOWN,ER_CONNECTION_KILLED,2002,2006,2013
   show status;
 
   dec $counter;
diff --git a/mysql-test/suite/sys_vars/t/innodb_file_format_basic.test b/mysql-test/suite/sys_vars/t/innodb_file_format_basic.test
index bfc092f2f05..739260c07e5 100644
--- a/mysql-test/suite/sys_vars/t/innodb_file_format_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_file_format_basic.test
@@ -18,20 +18,26 @@ select @@global.innodb_file_format;
 select @@session.innodb_file_format;
 show global variables like 'innodb_file_format';
 show session variables like 'innodb_file_format';
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_file_format';
 select * from information_schema.session_variables where variable_name='innodb_file_format';
+--enable_warnings
 
 #
 # show that it's writable
 #
 set global innodb_file_format='Antelope';
 select @@global.innodb_file_format;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_file_format';
 select * from information_schema.session_variables where variable_name='innodb_file_format';
+--enable_warnings
 set @@global.innodb_file_format='Barracuda';
 select @@global.innodb_file_format;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_file_format';
 select * from information_schema.session_variables where variable_name='innodb_file_format';
+--enable_warnings
 --error ER_GLOBAL_VARIABLE
 set session innodb_file_format='Salmon';
 --error ER_GLOBAL_VARIABLE
diff --git a/mysql-test/suite/sys_vars/t/innodb_file_format_check_basic.test b/mysql-test/suite/sys_vars/t/innodb_file_format_check_basic.test
index f9f61b9380c..56afba48e29 100644
--- a/mysql-test/suite/sys_vars/t/innodb_file_format_check_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_file_format_check_basic.test
@@ -19,10 +19,12 @@ SELECT @@global.innodb_file_format_check;
 SELECT @@session.innodb_file_format_check;
 SHOW global variables LIKE 'innodb_file_format_check';
 SHOW session variables LIKE 'innodb_file_format_check';
+--disable_warnings
 SELECT * FROM information_schema.global_variables 
 WHERE variable_name='innodb_file_format_check';
 SELECT * FROM information_schema.session_variables 
 WHERE variable_name='innodb_file_format_check';
+--enable_warnings
 
 #
 # show that it's read only
@@ -53,17 +55,21 @@ SET @@session.innodb_stats_on_metadata='ON';
 # Check if the value in GLOBAL Table matches value in variable 
 #
 
+--disable_warnings
 SELECT IF(@@GLOBAL.innodb_file_format_check, "ON", "OFF") = VARIABLE_VALUE
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
 WHERE VARIABLE_NAME='innodb_file_format_check';
+--enable_warnings
 --echo 1 Expected
 
 SELECT COUNT(@@GLOBAL.innodb_file_format_check);
 --echo 1 Expected
 
+--disable_warnings
 SELECT COUNT(VARIABLE_VALUE)
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
 WHERE VARIABLE_NAME='innodb_file_format_check';
+--enable_warnings
 --echo 1 Expected
 
 #
diff --git a/mysql-test/suite/sys_vars/t/innodb_file_format_max_basic.test b/mysql-test/suite/sys_vars/t/innodb_file_format_max_basic.test
index 18076cfef7f..494f3817cb8 100644
--- a/mysql-test/suite/sys_vars/t/innodb_file_format_max_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_file_format_max_basic.test
@@ -3,7 +3,6 @@
 #
 --source include/not_embedded.inc
 --source include/have_innodb.inc
---source suite/innodb/include/restart_and_reinit.inc
 
 SET @start_global_value = @@global.innodb_file_format_max;
 SELECT @start_global_value;
@@ -18,26 +17,32 @@ SELECT @@global.innodb_file_format_max;
 SELECT @@session.innodb_file_format_max;
 SHOW global variables LIKE 'innodb_file_format_max';
 SHOW session variables LIKE 'innodb_file_format_max';
+--disable_warnings
 SELECT * FROM information_schema.global_variables 
 WHERE variable_name='innodb_file_format_max';
 SELECT * FROM information_schema.session_variables
 WHERE variable_name='innodb_file_format_max';
+--enable_warnings
 
 #
 # show that it's writable
 #
 SET global innodb_file_format_max='Antelope';
 SELECT @@global.innodb_file_format_max;
+--disable_warnings
 SELECT * FROM information_schema.global_variables 
 WHERE variable_name='innodb_file_format_max';
 SELECT * FROM information_schema.session_variables 
 WHERE variable_name='innodb_file_format_max';
+--enable_warnings
 SET @@global.innodb_file_format_max='Barracuda';
 SELECT @@global.innodb_file_format_max;
+--disable_warnings
 SELECT * FROM information_schema.global_variables 
 WHERE variable_name='innodb_file_format_max';
 SELECT * FROM information_schema.session_variables 
 WHERE variable_name='innodb_file_format_max';
+--enable_warnings
 --error ER_GLOBAL_VARIABLE
 SET session innodb_file_format_max='Salmon';
 --error ER_GLOBAL_VARIABLE
diff --git a/mysql-test/suite/sys_vars/t/innodb_file_io_threads_basic.test b/mysql-test/suite/sys_vars/t/innodb_file_io_threads_basic.test
index 32cdd0beac4..c701c2ee171 100644
--- a/mysql-test/suite/sys_vars/t/innodb_file_io_threads_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_file_io_threads_basic.test
@@ -61,30 +61,38 @@ SELECT COUNT(@@GLOBAL.innodb_write_io_threads);
 # Check if the value in GLOBAL Table matches value in variable  #
 #################################################################
 
+--disable_warnings
 SELECT @@GLOBAL.innodb_read_io_threads = VARIABLE_VALUE
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
 WHERE VARIABLE_NAME='innodb_read_io_threads';
+--enable_warnings
 --echo 1 Expected
 
 SELECT COUNT(@@GLOBAL.innodb_read_io_threads);
 --echo 1 Expected
 
+--disable_warnings
 SELECT COUNT(VARIABLE_VALUE)
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES 
 WHERE VARIABLE_NAME='innodb_read_io_threads';
+--enable_warnings
 --echo 1 Expected
 
+--disable_warnings
 SELECT @@GLOBAL.innodb_write_io_threads = VARIABLE_VALUE
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
 WHERE VARIABLE_NAME='innodb_write_io_threads';
+--enable_warnings
 --echo 1 Expected
 
 SELECT COUNT(@@GLOBAL.innodb_write_io_threads);
 --echo 1 Expected
 
+--disable_warnings
 SELECT COUNT(VARIABLE_VALUE)
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES 
 WHERE VARIABLE_NAME='innodb_write_io_threads';
+--enable_warnings
 --echo 1 Expected
 
 
diff --git a/mysql-test/suite/sys_vars/t/innodb_file_per_table_basic-master.opt b/mysql-test/suite/sys_vars/t/innodb_file_per_table_basic-master.opt
new file mode 100644
index 00000000000..9d2c4f807e0
--- /dev/null
+++ b/mysql-test/suite/sys_vars/t/innodb_file_per_table_basic-master.opt
@@ -0,0 +1 @@
+--innodb_file_per_table=On
diff --git a/mysql-test/suite/sys_vars/t/innodb_file_per_table_basic.test b/mysql-test/suite/sys_vars/t/innodb_file_per_table_basic.test
index 1478d6df2e9..2fd9783e16d 100644
--- a/mysql-test/suite/sys_vars/t/innodb_file_per_table_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_file_per_table_basic.test
@@ -58,17 +58,21 @@ SELECT @@global.innodb_file_per_table;
 # Check if the value in GLOBAL Table matches value in variable  #
 #################################################################
 
+--disable_warnings
 SELECT IF(@@GLOBAL.innodb_file_per_table,'ON','OFF') = VARIABLE_VALUE
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
 WHERE VARIABLE_NAME='innodb_file_per_table';
+--enable_warnings
 --echo 1 Expected
 
 SELECT COUNT(@@GLOBAL.innodb_file_per_table);
 --echo 1 Expected
 
+--disable_warnings
 SELECT COUNT(VARIABLE_VALUE)
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES 
 WHERE VARIABLE_NAME='innodb_file_per_table';
+--enable_warnings
 --echo 1 Expected
 
 
diff --git a/mysql-test/suite/sys_vars/t/innodb_fill_factor_basic.test b/mysql-test/suite/sys_vars/t/innodb_fill_factor_basic.test
new file mode 100644
index 00000000000..8e4caae0088
--- /dev/null
+++ b/mysql-test/suite/sys_vars/t/innodb_fill_factor_basic.test
@@ -0,0 +1,41 @@
+
+#
+#  2014-03-26 - Added
+#
+
+--source include/have_innodb.inc
+
+#
+# show the global and session values;
+#
+select @@global.innodb_fill_factor;
+--error ER_INCORRECT_GLOBAL_LOCAL_VAR
+select @@session.innodb_fill_factor;
+show global variables like 'innodb_fill_factor';
+show session variables like 'innodb_fill_factor';
+--disable_warnings
+select * from information_schema.global_variables where variable_name='innodb_fill_factor';
+select * from information_schema.session_variables where variable_name='innodb_fill_factor';
+--enable_warnings
+
+#
+# test default, min, max value
+#
+let $innodb_fill_factor_orig=`select @@innodb_fill_factor`;
+
+set global innodb_fill_factor=9;
+select @@innodb_fill_factor;
+
+set global innodb_fill_factor=10;
+select @@innodb_fill_factor;
+
+set global innodb_fill_factor=75;
+select @@innodb_fill_factor;
+
+set global innodb_fill_factor=100;
+select @@innodb_fill_factor;
+
+set global innodb_fill_factor=101;
+select @@innodb_fill_factor;
+
+eval set global innodb_fill_factor=$innodb_fill_factor_orig;
diff --git a/mysql-test/suite/sys_vars/t/innodb_flush_log_at_timeout_basic.test b/mysql-test/suite/sys_vars/t/innodb_flush_log_at_timeout_basic.test
index 0ab079adaa8..09a790fc3b6 100644
--- a/mysql-test/suite/sys_vars/t/innodb_flush_log_at_timeout_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_flush_log_at_timeout_basic.test
@@ -101,18 +101,31 @@ SELECT @@global.innodb_flush_log_at_timeout;
 
 SET @@global.innodb_flush_log_at_timeout = 2701;
 SELECT @@global.innodb_flush_log_at_timeout;
+--Error ER_WRONG_TYPE_FOR_VAR
+SET @@global.innodb_flush_log_at_timeout = ' ';
+SELECT @@global.innodb_flush_log_at_timeout;
+--Error ER_WRONG_TYPE_FOR_VAR
+SET @@global.innodb_flush_log_at_timeout = " ";
+SELECT @@global.innodb_flush_log_at_timeout;
+--Error ER_WRONG_TYPE_FOR_VAR
+SET @@global.innodb_flush_log_at_timeout = 1.1;
+SELECT @@global.innodb_flush_log_at_timeout;
 
 --echo '#----------------------FN_DYNVARS_046_05------------------------#'
 #########################################################################
 #     Check if the value in GLOBAL Table matches value in variable      #
 #########################################################################
 
+--disable_warnings
 SELECT @@global.innodb_flush_log_at_timeout =
  VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
   WHERE VARIABLE_NAME='innodb_flush_log_at_timeout';
+--enable_warnings
 SELECT @@global.innodb_flush_log_at_timeout;
+--disable_warnings
 SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
  WHERE VARIABLE_NAME='innodb_flush_log_at_timeout';
+--enable_warnings
 
 --echo '#---------------------FN_DYNVARS_046_06-------------------------#'
 ###################################################################
diff --git a/mysql-test/suite/sys_vars/t/innodb_flush_log_at_trx_commit_basic.test b/mysql-test/suite/sys_vars/t/innodb_flush_log_at_trx_commit_basic.test
index 56cfc2ffebe..34510cdb462 100644
--- a/mysql-test/suite/sys_vars/t/innodb_flush_log_at_trx_commit_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_flush_log_at_trx_commit_basic.test
@@ -56,6 +56,11 @@ SELECT @@global.innodb_flush_log_at_trx_commit;
 # Check if variable can be accessed with and without @@ sign                  #
 ###############################################################################
 
+--Error ER_GLOBAL_VARIABLE
+SET innodb_flush_log_at_trx_commit = 1;
+SELECT @@innodb_flush_log_at_trx_commit;
+
+
 --Error ER_UNKNOWN_TABLE
 SELECT local.innodb_flush_log_at_trx_commit;
 
@@ -96,18 +101,35 @@ SELECT @@global.innodb_flush_log_at_trx_commit;
 
 SET @@global.innodb_flush_log_at_trx_commit = 1001;
 SELECT @@global.innodb_flush_log_at_trx_commit;
+ 
+SET @@global.innodb_flush_log_at_trx_commit = 100156787;
+SELECT @@global.innodb_flush_log_at_trx_commit;
+--Error ER_WRONG_TYPE_FOR_VAR
+SET @@global.innodb_flush_log_at_trx_commit = " ";
+SELECT @@global.innodb_flush_log_at_trx_commit;
+--Error ER_WRONG_TYPE_FOR_VAR
+SET @@global.innodb_flush_log_at_trx_commit = 1.1;
+SELECT @@global.innodb_flush_log_at_trx_commit;
+
+--Error ER_WRONG_TYPE_FOR_VAR
+SET @@global.innodb_flush_log_at_trx_commit = ' ';
+SELECT @@global.innodb_flush_log_at_trx_commit;
 
 --echo '#----------------------FN_DYNVARS_046_05------------------------#'
 ######################################################################### 
 #     Check if the value in GLOBAL Table matches value in variable      #
 #########################################################################
 
+--disable_warnings
 SELECT @@global.innodb_flush_log_at_trx_commit =
  VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
   WHERE VARIABLE_NAME='innodb_flush_log_at_trx_commit';
+--enable_warnings
 SELECT @@global.innodb_flush_log_at_trx_commit;
+--disable_warnings
 SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
  WHERE VARIABLE_NAME='innodb_flush_log_at_trx_commit';
+--enable_warnings
 
 --echo '#---------------------FN_DYNVARS_046_06-------------------------#'
 ################################################################### 
diff --git a/mysql-test/suite/sys_vars/t/innodb_flush_method_basic.test b/mysql-test/suite/sys_vars/t/innodb_flush_method_basic.test
index 75af00e33af..9f99c1305fd 100644
--- a/mysql-test/suite/sys_vars/t/innodb_flush_method_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_flush_method_basic.test
@@ -52,17 +52,21 @@ SELECT COUNT(@@GLOBAL.innodb_flush_method);
 # Check if the value in GLOBAL Table matches value in variable  #
 #################################################################
 
+--disable_warnings
 SELECT @@GLOBAL.innodb_flush_method = VARIABLE_VALUE
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
 WHERE VARIABLE_NAME='innodb_flush_method';
+--enable_warnings
 --echo 1 Expected
 
 SELECT COUNT(@@GLOBAL.innodb_flush_method);
 --echo 0 Expected
 
+--disable_warnings
 SELECT COUNT(VARIABLE_VALUE)
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES 
 WHERE VARIABLE_NAME='innodb_flush_method';
+--enable_warnings
 --echo 1 Expected
 
 
diff --git a/mysql-test/suite/sys_vars/t/innodb_flush_neighbors_basic.test b/mysql-test/suite/sys_vars/t/innodb_flush_neighbors_basic.test
index 698e30b6669..671e6f58310 100644
--- a/mysql-test/suite/sys_vars/t/innodb_flush_neighbors_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_flush_neighbors_basic.test
@@ -16,32 +16,44 @@ select @@global.innodb_flush_neighbors;
 select @@session.innodb_flush_neighbors;
 show global variables like 'innodb_flush_neighbors';
 show session variables like 'innodb_flush_neighbors';
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_flush_neighbors';
 select * from information_schema.session_variables where variable_name='innodb_flush_neighbors';
+--enable_warnings
 
 #
 # show that it's writable
 #
 set global innodb_flush_neighbors=0;
 select @@global.innodb_flush_neighbors;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_flush_neighbors';
 select * from information_schema.session_variables where variable_name='innodb_flush_neighbors';
+--enable_warnings
 set @@global.innodb_flush_neighbors=TRUE;
 select @@global.innodb_flush_neighbors;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_flush_neighbors';
 select * from information_schema.session_variables where variable_name='innodb_flush_neighbors';
+--enable_warnings
 set global innodb_flush_neighbors=0;
 select @@global.innodb_flush_neighbors;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_flush_neighbors';
 select * from information_schema.session_variables where variable_name='innodb_flush_neighbors';
+--enable_warnings
 set @@global.innodb_flush_neighbors=2;
 select @@global.innodb_flush_neighbors;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_flush_neighbors';
 select * from information_schema.session_variables where variable_name='innodb_flush_neighbors';
+--enable_warnings
 set @@global.innodb_flush_neighbors=DEFAULT;
 select @@global.innodb_flush_neighbors;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_flush_neighbors';
 select * from information_schema.session_variables where variable_name='innodb_flush_neighbors';
+--enable_warnings
 --error ER_GLOBAL_VARIABLE
 set session innodb_flush_neighbors=0;
 --error ER_GLOBAL_VARIABLE
diff --git a/mysql-test/suite/sys_vars/t/innodb_flush_sync_basic.test b/mysql-test/suite/sys_vars/t/innodb_flush_sync_basic.test
new file mode 100644
index 00000000000..a73575864bd
--- /dev/null
+++ b/mysql-test/suite/sys_vars/t/innodb_flush_sync_basic.test
@@ -0,0 +1,77 @@
+--source include/have_innodb.inc
+
+SET @start_global_value = @@global.innodb_flush_sync;
+SELECT @start_global_value;
+
+#
+# exists as global only
+#
+--echo Valid values are 'ON' and 'OFF'
+select @@global.innodb_flush_sync in (0, 1);
+select @@global.innodb_flush_sync;
+--error ER_INCORRECT_GLOBAL_LOCAL_VAR
+select @@session.innodb_flush_sync;
+show global variables like 'innodb_flush_sync';
+show session variables like 'innodb_flush_sync';
+--disable_warnings
+select * from information_schema.global_variables where variable_name='innodb_flush_sync';
+select * from information_schema.session_variables where variable_name='innodb_flush_sync';
+--enable_warnings
+
+#
+# show that it's writable
+#
+set global innodb_flush_sync='OFF';
+select @@global.innodb_flush_sync;
+--disable_warnings
+select * from information_schema.global_variables where variable_name='innodb_flush_sync';
+select * from information_schema.session_variables where variable_name='innodb_flush_sync';
+--enable_warnings
+set @@global.innodb_flush_sync=1;
+select @@global.innodb_flush_sync;
+--disable_warnings
+select * from information_schema.global_variables where variable_name='innodb_flush_sync';
+select * from information_schema.session_variables where variable_name='innodb_flush_sync';
+--enable_warnings
+set global innodb_flush_sync=0;
+select @@global.innodb_flush_sync;
+--disable_warnings
+select * from information_schema.global_variables where variable_name='innodb_flush_sync';
+select * from information_schema.session_variables where variable_name='innodb_flush_sync';
+--enable_warnings
+set @@global.innodb_flush_sync='ON';
+select @@global.innodb_flush_sync;
+--disable_warnings
+select * from information_schema.global_variables where variable_name='innodb_flush_sync';
+select * from information_schema.session_variables where variable_name='innodb_flush_sync';
+--enable_warnings
+--error ER_GLOBAL_VARIABLE
+set session innodb_flush_sync='OFF';
+--error ER_GLOBAL_VARIABLE
+set @@session.innodb_flush_sync='ON';
+
+#
+# incorrect types
+#
+--error ER_WRONG_TYPE_FOR_VAR
+set global innodb_flush_sync=1.1;
+--error ER_WRONG_TYPE_FOR_VAR
+set global innodb_flush_sync=1e1;
+--error ER_WRONG_VALUE_FOR_VAR
+set global innodb_flush_sync=2;
+--error ER_WRONG_VALUE_FOR_VAR
+set global innodb_flush_sync=-3;
+select @@global.innodb_flush_sync;
+--disable_warnings
+select * from information_schema.global_variables where variable_name='innodb_flush_sync';
+select * from information_schema.session_variables where variable_name='innodb_flush_sync';
+--enable_warnings
+--error ER_WRONG_VALUE_FOR_VAR
+set global innodb_flush_sync='AUTO';
+
+#
+# Cleanup
+#
+
+SET @@global.innodb_flush_sync = @start_global_value;
+SELECT @@global.innodb_flush_sync;
diff --git a/mysql-test/suite/sys_vars/t/innodb_flushing_avg_loops_basic.test b/mysql-test/suite/sys_vars/t/innodb_flushing_avg_loops_basic.test
index a84e623f2c3..f23f9697197 100644
--- a/mysql-test/suite/sys_vars/t/innodb_flushing_avg_loops_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_flushing_avg_loops_basic.test
@@ -94,18 +94,28 @@ SELECT @@global.innodb_flushing_avg_loops;
 
 SET @@global.innodb_flushing_avg_loops = 1001;
 SELECT @@global.innodb_flushing_avg_loops;
+--Error ER_WRONG_TYPE_FOR_VAR
+SET @@global.innodb_flushing_avg_loops = ' ';
+SELECT @@global.innodb_flushing_avg_loops;
+--Error ER_WRONG_TYPE_FOR_VAR
+SET @@global.innodb_flushing_avg_loops = " ";
+SELECT @@global.innodb_flushing_avg_loops;
 
 --echo '#----------------------FN_DYNVARS_046_05------------------------#'
 #########################################################################
 #     Check if the value in GLOBAL Table matches value in variable      #
 #########################################################################
 
+--disable_warnings
 SELECT @@global.innodb_flushing_avg_loops =
  VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
   WHERE VARIABLE_NAME='innodb_flushing_avg_loops';
+--enable_warnings
 SELECT @@global.innodb_flushing_avg_loops;
+--disable_warnings
 SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
  WHERE VARIABLE_NAME='innodb_flushing_avg_loops';
+--enable_warnings
 
 --echo '#---------------------FN_DYNVARS_046_06-------------------------#'
 ###################################################################
diff --git a/mysql-test/suite/sys_vars/t/innodb_force_load_corrupted_basic.test b/mysql-test/suite/sys_vars/t/innodb_force_load_corrupted_basic.test
index 1726b320f47..f12f2f670a4 100644
--- a/mysql-test/suite/sys_vars/t/innodb_force_load_corrupted_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_force_load_corrupted_basic.test
@@ -52,17 +52,21 @@ SELECT COUNT(@@GLOBAL.innodb_force_load_corrupted);
 # Check if the value in GLOBAL Table matches value in variable  #
 #################################################################
 
+--disable_warnings
 SELECT IF(@@GLOBAL.innodb_force_load_corrupted, "ON", "OFF") = VARIABLE_VALUE
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
 WHERE VARIABLE_NAME='innodb_force_load_corrupted';
+--enable_warnings
 --echo 1 Expected
 
 SELECT COUNT(@@GLOBAL.innodb_force_load_corrupted);
 --echo 1 Expected
 
+--disable_warnings
 SELECT COUNT(VARIABLE_VALUE)
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES 
 WHERE VARIABLE_NAME='innodb_force_load_corrupted';
+--enable_warnings
 --echo 1 Expected
 
 
diff --git a/mysql-test/suite/sys_vars/t/innodb_force_recovery_basic.test b/mysql-test/suite/sys_vars/t/innodb_force_recovery_basic.test
index f5aa769f09f..a62c895c202 100644
--- a/mysql-test/suite/sys_vars/t/innodb_force_recovery_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_force_recovery_basic.test
@@ -52,17 +52,21 @@ SELECT COUNT(@@GLOBAL.innodb_force_recovery);
 # Check if the value in GLOBAL Table matches value in variable  #
 #################################################################
 
+--disable_warnings
 SELECT @@GLOBAL.innodb_force_recovery = VARIABLE_VALUE
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
 WHERE VARIABLE_NAME='innodb_force_recovery';
+--enable_warnings
 --echo 1 Expected
 
 SELECT COUNT(@@GLOBAL.innodb_force_recovery);
 --echo 1 Expected
 
+--disable_warnings
 SELECT COUNT(VARIABLE_VALUE)
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES 
 WHERE VARIABLE_NAME='innodb_force_recovery';
+--enable_warnings
 --echo 1 Expected
 
 
diff --git a/mysql-test/suite/sys_vars/t/innodb_force_recovery_crash_basic.test b/mysql-test/suite/sys_vars/t/innodb_force_recovery_crash_basic.test
index 5eefe1b9219..cfbd10c4e31 100644
--- a/mysql-test/suite/sys_vars/t/innodb_force_recovery_crash_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_force_recovery_crash_basic.test
@@ -10,8 +10,10 @@ select @@global.innodb_force_recovery_crash;
 select @@session.innodb_force_recovery_crash;
 show global variables like 'innodb_force_recovery_crash';
 show session variables like 'innodb_force_recovery_crash';
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_force_recovery_crash';
 select * from information_schema.session_variables where variable_name='innodb_force_recovery_crash';
+--enable_warnings
 
 # show that it's read-only
 #
diff --git a/mysql-test/suite/sys_vars/t/innodb_ft_aux_table_basic.test b/mysql-test/suite/sys_vars/t/innodb_ft_aux_table_basic.test
index 2ea99cf9835..04ca34c2b19 100644
--- a/mysql-test/suite/sys_vars/t/innodb_ft_aux_table_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_ft_aux_table_basic.test
@@ -15,8 +15,10 @@ SELECT @start_global_value;
 select @@session.innodb_ft_aux_table;
 show global variables like 'innodb_ft_aux_table';
 show session variables like 'innodb_ft_aux_table';
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_ft_aux_table';
 select * from information_schema.session_variables where variable_name='innodb_ft_aux_table';
+--enable_warnings
 
 --error ER_GLOBAL_VARIABLE
 set session innodb_ft_aux_table='Salmon';
diff --git a/mysql-test/suite/sys_vars/t/innodb_ft_cache_size_basic.test b/mysql-test/suite/sys_vars/t/innodb_ft_cache_size_basic.test
index f6d62835f0a..30bcd08d4dd 100644
--- a/mysql-test/suite/sys_vars/t/innodb_ft_cache_size_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_ft_cache_size_basic.test
@@ -13,8 +13,10 @@ select @@global.innodb_ft_cache_size;
 select @@session.innodb_ft_cache_size;
 show global variables like 'innodb_ft_cache_size';
 show session variables like 'innodb_ft_cache_size';
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_ft_cache_size';
 select * from information_schema.session_variables where variable_name='innodb_ft_cache_size';
+--enable_warnings
 
 #
 # show that it's read-only
diff --git a/mysql-test/suite/sys_vars/t/innodb_ft_enable_diag_print_basic.test b/mysql-test/suite/sys_vars/t/innodb_ft_enable_diag_print_basic.test
index ebe9cc556ec..630ada004df 100644
--- a/mysql-test/suite/sys_vars/t/innodb_ft_enable_diag_print_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_ft_enable_diag_print_basic.test
@@ -18,28 +18,38 @@ select @@global.innodb_ft_enable_diag_print;
 select @@session.innodb_ft_enable_diag_print;
 show global variables like 'innodb_ft_enable_diag_print';
 show session variables like 'innodb_ft_enable_diag_print';
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_ft_enable_diag_print';
 select * from information_schema.session_variables where variable_name='innodb_ft_enable_diag_print';
+--enable_warnings
 
 #
 # show that it's writable
 #
 set global innodb_ft_enable_diag_print='OFF';
 select @@global.innodb_ft_enable_diag_print;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_ft_enable_diag_print';
 select * from information_schema.session_variables where variable_name='innodb_ft_enable_diag_print';
+--enable_warnings
 set @@global.innodb_ft_enable_diag_print=1;
 select @@global.innodb_ft_enable_diag_print;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_ft_enable_diag_print';
 select * from information_schema.session_variables where variable_name='innodb_ft_enable_diag_print';
+--enable_warnings
 set global innodb_ft_enable_diag_print=0;
 select @@global.innodb_ft_enable_diag_print;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_ft_enable_diag_print';
 select * from information_schema.session_variables where variable_name='innodb_ft_enable_diag_print';
+--enable_warnings
 set @@global.innodb_ft_enable_diag_print='ON';
 select @@global.innodb_ft_enable_diag_print;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_ft_enable_diag_print';
 select * from information_schema.session_variables where variable_name='innodb_ft_enable_diag_print';
+--enable_warnings
 --error ER_GLOBAL_VARIABLE
 set session innodb_ft_enable_diag_print='OFF';
 --error ER_GLOBAL_VARIABLE
@@ -57,8 +67,10 @@ set global innodb_ft_enable_diag_print=2;
 --error ER_WRONG_VALUE_FOR_VAR
 set global innodb_ft_enable_diag_print=-3;
 select @@global.innodb_ft_enable_diag_print;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_ft_enable_diag_print';
 select * from information_schema.session_variables where variable_name='innodb_ft_enable_diag_print';
+--enable_warnings
 --error ER_WRONG_VALUE_FOR_VAR
 set global innodb_ft_enable_diag_print='AUTO';
 
diff --git a/mysql-test/suite/sys_vars/t/innodb_ft_enable_stopword_basic.test b/mysql-test/suite/sys_vars/t/innodb_ft_enable_stopword_basic.test
index 1a983a3d7e6..5eb5af4df23 100644
--- a/mysql-test/suite/sys_vars/t/innodb_ft_enable_stopword_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_ft_enable_stopword_basic.test
@@ -18,8 +18,10 @@ select @@session.innodb_ft_enable_stopword in (0, 1);
 select @@session.innodb_ft_enable_stopword;
 show global variables like 'innodb_ft_enable_stopword';
 show session variables like 'innodb_ft_enable_stopword';
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_ft_enable_stopword';
 select * from information_schema.session_variables where variable_name='innodb_ft_enable_stopword';
+--enable_warnings
 
 #
 # show that it's writable
@@ -28,26 +30,34 @@ set global innodb_ft_enable_stopword='OFF';
 set session innodb_ft_enable_stopword='OFF';
 select @@global.innodb_ft_enable_stopword;
 select @@session.innodb_ft_enable_stopword;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_ft_enable_stopword';
 select * from information_schema.session_variables where variable_name='innodb_ft_enable_stopword';
+--enable_warnings
 set @@global.innodb_ft_enable_stopword=1;
 set @@session.innodb_ft_enable_stopword=1;
 select @@global.innodb_ft_enable_stopword;
 select @@session.innodb_ft_enable_stopword;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_ft_enable_stopword';
 select * from information_schema.session_variables where variable_name='innodb_ft_enable_stopword';
+--enable_warnings
 set global innodb_ft_enable_stopword=0;
 set session innodb_ft_enable_stopword=0;
 select @@global.innodb_ft_enable_stopword;
 select @@session.innodb_ft_enable_stopword;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_ft_enable_stopword';
 select * from information_schema.session_variables where variable_name='innodb_ft_enable_stopword';
+--enable_warnings
 set @@global.innodb_ft_enable_stopword='ON';
 set @@session.innodb_ft_enable_stopword='ON';
 select @@global.innodb_ft_enable_stopword;
 select @@session.innodb_ft_enable_stopword;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_ft_enable_stopword';
 select * from information_schema.session_variables where variable_name='innodb_ft_enable_stopword';
+--enable_warnings
 
 #
 # incorrect types
@@ -74,8 +84,10 @@ set global innodb_ft_enable_stopword=-3;
 set session innodb_ft_enable_stopword=-7;
 select @@global.innodb_ft_enable_stopword;
 select @@session.innodb_ft_enable_stopword;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_ft_enable_stopword';
 select * from information_schema.session_variables where variable_name='innodb_ft_enable_stopword';
+--enable_warnings
 
 #
 # Cleanup
diff --git a/mysql-test/suite/sys_vars/t/innodb_ft_max_token_size_basic.test b/mysql-test/suite/sys_vars/t/innodb_ft_max_token_size_basic.test
index e75517466d7..8f6f93f7517 100644
--- a/mysql-test/suite/sys_vars/t/innodb_ft_max_token_size_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_ft_max_token_size_basic.test
@@ -13,8 +13,10 @@ select @@global.innodb_ft_max_token_size;
 select @@session.innodb_ft_max_token_size;
 show global variables like 'innodb_ft_max_token_size';
 show session variables like 'innodb_ft_max_token_size';
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_ft_max_token_size';
 select * from information_schema.session_variables where variable_name='innodb_ft_max_token_size';
+--enable_warnings
 
 #
 # show that it's read-only
diff --git a/mysql-test/suite/sys_vars/t/innodb_ft_min_token_size_basic.test b/mysql-test/suite/sys_vars/t/innodb_ft_min_token_size_basic.test
index edf63c70782..753985e1af0 100644
--- a/mysql-test/suite/sys_vars/t/innodb_ft_min_token_size_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_ft_min_token_size_basic.test
@@ -13,8 +13,10 @@ select @@global.innodb_ft_min_token_size;
 select @@session.innodb_ft_min_token_size;
 show global variables like 'innodb_ft_min_token_size';
 show session variables like 'innodb_ft_min_token_size';
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_ft_min_token_size';
 select * from information_schema.session_variables where variable_name='innodb_ft_min_token_size';
+--enable_warnings
 
 #
 # show that it's read-only
diff --git a/mysql-test/suite/sys_vars/t/innodb_ft_num_word_optimize_basic.test b/mysql-test/suite/sys_vars/t/innodb_ft_num_word_optimize_basic.test
index 255caf86116..f288398e595 100644
--- a/mysql-test/suite/sys_vars/t/innodb_ft_num_word_optimize_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_ft_num_word_optimize_basic.test
@@ -18,16 +18,20 @@ select @@global.innodb_ft_num_word_optimize;
 select @@session.innodb_ft_num_word_optimize;
 show global variables like 'innodb_ft_num_word_optimize';
 show session variables like 'innodb_ft_num_word_optimize';
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_ft_num_word_optimize';
 select * from information_schema.session_variables where variable_name='innodb_ft_num_word_optimize';
+--enable_warnings
 
 #
 # show that it's writable
 #
 set global innodb_ft_num_word_optimize=1000;
 select @@global.innodb_ft_num_word_optimize;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_ft_num_word_optimize';
 select * from information_schema.session_variables where variable_name='innodb_ft_num_word_optimize';
+--enable_warnings
 --error ER_GLOBAL_VARIABLE
 set session innodb_ft_num_word_optimize=1000;
 
@@ -43,7 +47,9 @@ set global innodb_ft_num_word_optimize="foo";
 
 set global innodb_ft_num_word_optimize=-7;
 select @@global.innodb_ft_num_word_optimize;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_ft_num_word_optimize';
+--enable_warnings
 
 #
 # cleanup
diff --git a/mysql-test/suite/sys_vars/t/innodb_ft_result_cache_limit_basic.test b/mysql-test/suite/sys_vars/t/innodb_ft_result_cache_limit_basic.test
index 245ed4abdfb..0a797a5ab5d 100644
--- a/mysql-test/suite/sys_vars/t/innodb_ft_result_cache_limit_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_ft_result_cache_limit_basic.test
@@ -5,11 +5,6 @@
 
 --source include/have_innodb.inc
 
-if (`select plugin_auth_version <= "5.6.10" from information_schema.plugins where plugin_name='innodb'`)
-{
-  --skip Not fixed in InnoDB 5.6.10 or earlier
-}
-
 #
 # show the global and session values;
 #
@@ -18,8 +13,10 @@ select @@global.innodb_ft_result_cache_limit;
 select @@session.innodb_ft_result_cache_limit;
 show global variables like 'innodb_ft_result_cache_limit';
 show session variables like 'innodb_ft_result_cache_limit';
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_ft_result_cache_limit';
 select * from information_schema.session_variables where variable_name='innodb_ft_result_cache_limit';
+--enable_warnings
 
 #
 # test default, min, max value
@@ -32,7 +29,10 @@ select @@innodb_ft_result_cache_limit;
 set global innodb_ft_result_cache_limit=1000000;
 select @@innodb_ft_result_cache_limit;
 
-set global innodb_ft_result_cache_limit=4000000000;
+set global innodb_ft_result_cache_limit=4294967295;
+select @@innodb_ft_result_cache_limit;
+
+set global innodb_ft_result_cache_limit=4*1024*1024*1024;
 select @@innodb_ft_result_cache_limit;
 
 eval set global innodb_ft_result_cache_limit=$innodb_ft_result_cache_limit_orig;
diff --git a/mysql-test/suite/sys_vars/t/innodb_ft_server_stopword_table_basic.test b/mysql-test/suite/sys_vars/t/innodb_ft_server_stopword_table_basic.test
index e227e790a1d..5de822a54e5 100644
--- a/mysql-test/suite/sys_vars/t/innodb_ft_server_stopword_table_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_ft_server_stopword_table_basic.test
@@ -15,8 +15,12 @@ SELECT @start_global_value;
 select @@session.innodb_ft_server_stopword_table;
 show global variables like 'innodb_ft_server_stopword_table';
 show session variables like 'innodb_ft_server_stopword_table';
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_ft_server_stopword_table';
 select * from information_schema.session_variables where variable_name='innodb_ft_server_stopword_table';
+--enable_warnings
+
+call mtr.add_suppression("\\[ERROR\\] InnoDB: user stopword table Salmon does not exist.");
 
 --error ER_GLOBAL_VARIABLE
 set session innodb_ft_server_stopword_table='Salmon';
diff --git a/mysql-test/suite/sys_vars/t/innodb_ft_sort_pll_degree_basic.test b/mysql-test/suite/sys_vars/t/innodb_ft_sort_pll_degree_basic.test
index 3cf55f6700b..cacd6a690b8 100644
--- a/mysql-test/suite/sys_vars/t/innodb_ft_sort_pll_degree_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_ft_sort_pll_degree_basic.test
@@ -13,8 +13,10 @@ select @@global.innodb_ft_sort_pll_degree;
 select @@session.innodb_ft_sort_pll_degree;
 show global variables like 'innodb_ft_sort_pll_degree';
 show session variables like 'innodb_ft_sort_pll_degree';
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_ft_sort_pll_degree';
 select * from information_schema.session_variables where variable_name='innodb_ft_sort_pll_degree';
+--enable_warnings
 
 #
 # show that it's read-only
diff --git a/mysql-test/suite/sys_vars/t/innodb_ft_total_cache_size_basic.test b/mysql-test/suite/sys_vars/t/innodb_ft_total_cache_size_basic.test
index 772ec5a1919..207ec64b705 100644
--- a/mysql-test/suite/sys_vars/t/innodb_ft_total_cache_size_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_ft_total_cache_size_basic.test
@@ -1,9 +1,9 @@
---source include/have_innodb.inc
 
-if (`select plugin_auth_version <= "5.6.10" from information_schema.plugins where plugin_name='innodb'`)
-{
-  --skip Not fixed in InnoDB 5.6.10 or earlier
-}
+#
+#  2011-11-17 - Added 
+#
+
+--source include/have_innodb.inc
 
 #
 # show the global and session values;
@@ -13,8 +13,10 @@ select @@global.innodb_ft_total_cache_size;
 select @@session.innodb_ft_total_cache_size;
 show global variables like 'innodb_ft_total_cache_size';
 show session variables like 'innodb_ft_total_cache_size';
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_ft_total_cache_size';
 select * from information_schema.session_variables where variable_name='innodb_ft_total_cache_size';
+--enable_warnings
 
 #
 # show that it's read-only
@@ -24,4 +26,3 @@ set global innodb_ft_total_cache_size=1;
 --error ER_INCORRECT_GLOBAL_LOCAL_VAR
 set session innodb_ft_total_cache_size=1;
 
-
diff --git a/mysql-test/suite/sys_vars/t/innodb_ft_user_stopword_table_basic.test b/mysql-test/suite/sys_vars/t/innodb_ft_user_stopword_table_basic.test
index 159e570b3ce..475bf8df526 100644
--- a/mysql-test/suite/sys_vars/t/innodb_ft_user_stopword_table_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_ft_user_stopword_table_basic.test
@@ -16,8 +16,12 @@ select @@session.innodb_ft_user_stopword_table;
 show global variables like 'innodb_ft_user_stopword_table';
 show session variables like 'innodb_ft_user_stopword_table';
 
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_ft_user_stopword_table';
 select * from information_schema.session_variables where variable_name='innodb_ft_user_stopword_table';
+--enable_warnings
+
+call mtr.add_suppression("\\[ERROR\\] InnoDB: user stopword table Salmon does not exist.");
 
 --error ER_WRONG_VALUE_FOR_VAR
 set session innodb_ft_user_stopword_table='Salmon';
@@ -35,4 +39,3 @@ set global innodb_ft_user_stopword_table=1e1;
 --error ER_WRONG_VALUE_FOR_VAR
 set global innodb_ft_user_stopword_table='Salmon';
 
-SET @@session.innodb_ft_user_stopword_table=@start_global_value;
diff --git a/mysql-test/suite/sys_vars/t/innodb_large_prefix_basic.test b/mysql-test/suite/sys_vars/t/innodb_large_prefix_basic.test
index 8d3f3afa0a9..877fe17b003 100644
--- a/mysql-test/suite/sys_vars/t/innodb_large_prefix_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_large_prefix_basic.test
@@ -18,28 +18,38 @@ select @@global.innodb_large_prefix;
 select @@session.innodb_large_prefix;
 show global variables like 'innodb_large_prefix';
 show session variables like 'innodb_large_prefix';
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_large_prefix';
 select * from information_schema.session_variables where variable_name='innodb_large_prefix';
+--enable_warnings
 
 #
 # show that it's writable
 #
 set global innodb_large_prefix='OFF';
 select @@global.innodb_large_prefix;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_large_prefix';
 select * from information_schema.session_variables where variable_name='innodb_large_prefix';
+--enable_warnings
 set @@global.innodb_large_prefix=1;
 select @@global.innodb_large_prefix;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_large_prefix';
 select * from information_schema.session_variables where variable_name='innodb_large_prefix';
+--enable_warnings
 set global innodb_large_prefix=0;
 select @@global.innodb_large_prefix;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_large_prefix';
 select * from information_schema.session_variables where variable_name='innodb_large_prefix';
+--enable_warnings
 set @@global.innodb_large_prefix='ON';
 select @@global.innodb_large_prefix;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_large_prefix';
 select * from information_schema.session_variables where variable_name='innodb_large_prefix';
+--enable_warnings
 --error ER_GLOBAL_VARIABLE
 set session innodb_large_prefix='OFF';
 --error ER_GLOBAL_VARIABLE
@@ -57,8 +67,10 @@ set global innodb_large_prefix=2;
 --error ER_WRONG_VALUE_FOR_VAR
 set global innodb_large_prefix=-3;
 select @@global.innodb_large_prefix;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_large_prefix';
 select * from information_schema.session_variables where variable_name='innodb_large_prefix';
+--enable_warnings
 --error ER_WRONG_VALUE_FOR_VAR
 set global innodb_large_prefix='AUTO';
 
diff --git a/mysql-test/suite/sys_vars/t/innodb_limit_optimistic_insert_debug_basic.test b/mysql-test/suite/sys_vars/t/innodb_limit_optimistic_insert_debug_basic.test
index 7998297c69e..8f2271cbd7f 100644
--- a/mysql-test/suite/sys_vars/t/innodb_limit_optimistic_insert_debug_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_limit_optimistic_insert_debug_basic.test
@@ -12,20 +12,26 @@ select @@global.innodb_limit_optimistic_insert_debug;
 select @@session.innodb_limit_optimistic_insert_debug;
 show global variables like 'innodb_limit_optimistic_insert_debug';
 show session variables like 'innodb_limit_optimistic_insert_debug';
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_limit_optimistic_insert_debug';
 select * from information_schema.session_variables where variable_name='innodb_limit_optimistic_insert_debug';
+--enable_warnings
 
 #
 # show that it's writable
 #
 set global innodb_limit_optimistic_insert_debug=1;
 select @@global.innodb_limit_optimistic_insert_debug;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_limit_optimistic_insert_debug';
 select * from information_schema.session_variables where variable_name='innodb_limit_optimistic_insert_debug';
+--enable_warnings
 set @@global.innodb_limit_optimistic_insert_debug=0;
 select @@global.innodb_limit_optimistic_insert_debug;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_limit_optimistic_insert_debug';
 select * from information_schema.session_variables where variable_name='innodb_limit_optimistic_insert_debug';
+--enable_warnings
 --error ER_GLOBAL_VARIABLE
 set session innodb_limit_optimistic_insert_debug='some';
 --error ER_GLOBAL_VARIABLE
diff --git a/mysql-test/suite/sys_vars/t/innodb_lock_wait_timeout_basic.test b/mysql-test/suite/sys_vars/t/innodb_lock_wait_timeout_basic.test
index f80b8e48736..a2aecf4ca8a 100644
--- a/mysql-test/suite/sys_vars/t/innodb_lock_wait_timeout_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_lock_wait_timeout_basic.test
@@ -50,17 +50,21 @@ SELECT @@session.innodb_lock_wait_timeout;
 # Check if the value in GLOBAL Table matches value in variable  #
 #################################################################
 
+--disable_warnings
 SELECT @@GLOBAL.innodb_lock_wait_timeout = VARIABLE_VALUE
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
 WHERE VARIABLE_NAME='innodb_lock_wait_timeout';
+--enable_warnings
 --echo 1 Expected
 
 SELECT COUNT(@@GLOBAL.innodb_lock_wait_timeout);
 --echo 1 Expected
 
+--disable_warnings
 SELECT COUNT(VARIABLE_VALUE)
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES 
 WHERE VARIABLE_NAME='innodb_lock_wait_timeout';
+--enable_warnings
 --echo 1 Expected
 
 
@@ -93,6 +97,73 @@ SELECT COUNT(@@GLOBAL.innodb_lock_wait_timeout);
 SELECT innodb_lock_wait_timeout = @@SESSION.innodb_lock_wait_timeout;
 
 #
+# check the default value
+# 
+set @@global.innodb_lock_wait_timeout=100;
+set @@global.innodb_lock_wait_timeout=DEFAULT;
+select @@global.innodb_lock_wait_timeout;
+set @@session.innodb_lock_wait_timeout=100;
+set @@session.innodb_lock_wait_timeout=DEFAULT;
+select @@session.innodb_lock_wait_timeout;
+
+#
+# check for valid values
+#
+
+SET @@global.innodb_lock_wait_timeout=1;
+SELECT @@global.innodb_lock_wait_timeout;
+SET @@global.innodb_lock_wait_timeout=1024;
+SELECT @@global.innodb_lock_wait_timeout;
+SET @@global.innodb_lock_wait_timeout=1073741824;
+SELECT @@global.innodb_lock_wait_timeout;
+
+SET @@session.innodb_lock_wait_timeout=1;
+SELECT @@session.innodb_lock_wait_timeout;
+SET @@session.innodb_lock_wait_timeout=1024;
+SELECT @@session.innodb_lock_wait_timeout;
+SET @@session.innodb_lock_wait_timeout=1073741824;
+SELECT @@session.innodb_lock_wait_timeout;
+
+# 
+# check for invalid values
+#
+--Error ER_WRONG_TYPE_FOR_VAR
+SET @@global.innodb_lock_wait_timeout="t";
+SELECT @@global.innodb_lock_wait_timeout;
+SET @@global.innodb_lock_wait_timeout=-1024;
+SELECT @@global.innodb_lock_wait_timeout;
+SET @@global.innodb_lock_wait_timeout=1073741825;
+SELECT @@global.innodb_lock_wait_timeout;
+--Error ER_WRONG_TYPE_FOR_VAR
+SET @@global.innodb_lock_wait_timeout=" ";
+SELECT @@global.innodb_lock_wait_timeout;
+--Error ER_WRONG_TYPE_FOR_VAR
+SET @@global.innodb_lock_wait_timeout=' ';
+SELECT @@global.innodb_lock_wait_timeout;
+--Error ER_WRONG_TYPE_FOR_VAR
+SET @@global.innodb_lock_wait_timeout=1.1;
+SELECT @@global.innodb_lock_wait_timeout;
+
+
+--Error ER_WRONG_TYPE_FOR_VAR
+SET @@session.innodb_lock_wait_timeout="T";
+SELECT @@session.innodb_lock_wait_timeout;
+SET @@session.innodb_lock_wait_timeout=-1024;
+SELECT @@session.innodb_lock_wait_timeout;
+SET @@session.innodb_lock_wait_timeout=1073999999;
+SELECT @@session.innodb_lock_wait_timeout;
+--Error ER_WRONG_TYPE_FOR_VAR
+SET @@session.innodb_lock_wait_timeout=' ';
+SELECT @@session.innodb_lock_wait_timeout;
+--Error ER_WRONG_TYPE_FOR_VAR
+SET @@session.innodb_lock_wait_timeout=" ";
+SELECT @@session.innodb_lock_wait_timeout;
+--Error ER_WRONG_TYPE_FOR_VAR
+SET @@session.innodb_lock_wait_timeout=1.1;
+SELECT @@session.innodb_lock_wait_timeout;
+
+
+
 # Cleanup
 #
 
diff --git a/mysql-test/suite/sys_vars/t/innodb_locks_unsafe_for_binlog_basic.test b/mysql-test/suite/sys_vars/t/innodb_locks_unsafe_for_binlog_basic.test
index 08792d299a1..755c5c62c70 100644
--- a/mysql-test/suite/sys_vars/t/innodb_locks_unsafe_for_binlog_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_locks_unsafe_for_binlog_basic.test
@@ -52,17 +52,21 @@ SELECT COUNT(@@GLOBAL.innodb_locks_unsafe_for_binlog);
 # Check if the value in GLOBAL Table matches value in variable  #
 #################################################################
 
+--disable_warnings
 SELECT IF(@@GLOBAL.innodb_locks_unsafe_for_binlog, "ON", "OFF") = VARIABLE_VALUE
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
 WHERE VARIABLE_NAME='innodb_locks_unsafe_for_binlog';
+--enable_warnings
 --echo 1 Expected
 
 SELECT COUNT(@@GLOBAL.innodb_locks_unsafe_for_binlog);
 --echo 1 Expected
 
+--disable_warnings
 SELECT COUNT(VARIABLE_VALUE)
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES 
 WHERE VARIABLE_NAME='innodb_locks_unsafe_for_binlog';
+--enable_warnings
 --echo 1 Expected
 
 
diff --git a/mysql-test/suite/sys_vars/t/innodb_log_buffer_size_basic.test b/mysql-test/suite/sys_vars/t/innodb_log_buffer_size_basic.test
index 74c1aeab87a..550bba0c0b7 100644
--- a/mysql-test/suite/sys_vars/t/innodb_log_buffer_size_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_log_buffer_size_basic.test
@@ -52,17 +52,21 @@ SELECT COUNT(@@GLOBAL.innodb_log_buffer_size);
 # Check if the value in GLOBAL Table matches value in variable  #
 #################################################################
 
+--disable_warnings
 SELECT @@GLOBAL.innodb_log_buffer_size = VARIABLE_VALUE
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
 WHERE VARIABLE_NAME='innodb_log_buffer_size';
+--enable_warnings
 --echo 1 Expected
 
 SELECT COUNT(@@GLOBAL.innodb_log_buffer_size);
 --echo 1 Expected
 
+--disable_warnings
 SELECT COUNT(VARIABLE_VALUE)
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES 
 WHERE VARIABLE_NAME='innodb_log_buffer_size';
+--enable_warnings
 --echo 1 Expected
 
 
diff --git a/mysql-test/suite/sys_vars/t/innodb_log_checkpoint_now_basic.test b/mysql-test/suite/sys_vars/t/innodb_log_checkpoint_now_basic.test
index 00aa476e8d2..331803fff86 100644
--- a/mysql-test/suite/sys_vars/t/innodb_log_checkpoint_now_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_log_checkpoint_now_basic.test
@@ -1 +1,79 @@
---echo XtraDB extension
+--source include/have_innodb.inc
+--source include/have_debug.inc
+
+SET @start_global_value = @@global.innodb_log_checkpoint_now;
+SELECT @start_global_value;
+
+#
+# exists as global only
+#
+select @@global.innodb_log_checkpoint_now in (0, 1);
+select @@global.innodb_log_checkpoint_now;
+--error ER_INCORRECT_GLOBAL_LOCAL_VAR
+select @@session.innodb_log_checkpoint_now;
+show global variables like 'innodb_log_checkpoint_now';
+show session variables like 'innodb_log_checkpoint_now';
+--disable_warnings
+select * from information_schema.global_variables where variable_name='innodb_log_checkpoint_now';
+select * from information_schema.session_variables where variable_name='innodb_log_checkpoint_now';
+--enable_warnings
+
+#
+# show that it's writable
+#
+set global innodb_log_checkpoint_now=1;
+# Should always be OFF
+select @@global.innodb_log_checkpoint_now;
+--disable_warnings
+select * from information_schema.global_variables where variable_name='innodb_log_checkpoint_now';
+select * from information_schema.session_variables where variable_name='innodb_log_checkpoint_now';
+--enable_warnings
+
+set @@global.innodb_log_checkpoint_now=0;
+# Should always be OFF
+select @@global.innodb_log_checkpoint_now;
+--disable_warnings
+select * from information_schema.global_variables where variable_name='innodb_log_checkpoint_now';
+select * from information_schema.session_variables where variable_name='innodb_log_checkpoint_now';
+--enable_warnings
+
+set global innodb_log_checkpoint_now=ON;
+# Should always be OFF
+select @@global.innodb_log_checkpoint_now;
+--disable_warnings
+select * from information_schema.global_variables where variable_name='innodb_log_checkpoint_now';
+select * from information_schema.session_variables where variable_name='innodb_log_checkpoint_now';
+--enable_warnings
+
+set global innodb_log_checkpoint_now=OFF;
+# Should always be OFF
+select @@global.innodb_log_checkpoint_now;
+--disable_warnings
+select * from information_schema.global_variables where variable_name='innodb_log_checkpoint_now';
+select * from information_schema.session_variables where variable_name='innodb_log_checkpoint_now';
+--enable_warnings
+
+--error ER_GLOBAL_VARIABLE
+set session innodb_log_checkpoint_now='some';
+
+--error ER_GLOBAL_VARIABLE
+set @@session.innodb_log_checkpoint_now='some';
+
+#
+# incorrect types
+#
+--error ER_WRONG_TYPE_FOR_VAR
+set global innodb_log_checkpoint_now=1.1;
+--error ER_WRONG_VALUE_FOR_VAR
+set global innodb_log_checkpoint_now='foo';
+--error ER_WRONG_VALUE_FOR_VAR
+set global innodb_log_checkpoint_now=-2;
+--error ER_WRONG_TYPE_FOR_VAR
+set global innodb_log_checkpoint_now=1e1;
+
+#
+# Cleanup
+#
+
+SET @@global.innodb_log_checkpoint_now = @start_global_value;
+SELECT @@global.innodb_log_checkpoint_now;
diff --git a/mysql-test/suite/sys_vars/t/innodb_log_checksums_basic.test b/mysql-test/suite/sys_vars/t/innodb_log_checksums_basic.test
new file mode 100644
index 00000000000..8ebc9f1652b
--- /dev/null
+++ b/mysql-test/suite/sys_vars/t/innodb_log_checksums_basic.test
@@ -0,0 +1,36 @@
+--source include/have_innodb.inc
+
+# Check the default value
+SET @orig = @@global.innodb_log_checksums;
+SELECT @orig;
+
+-- error ER_WRONG_VALUE_FOR_VAR
+SET GLOBAL innodb_log_checksums = 'crc32';
+SELECT @@global.innodb_log_checksums;
+
+-- error ER_WRONG_VALUE_FOR_VAR
+SET GLOBAL innodb_log_checksums = 2;
+SELECT @@global.innodb_log_checksums;
+
+-- error ER_WRONG_TYPE_FOR_VAR
+SET GLOBAL innodb_log_checksums = 1e2;
+SELECT @@global.innodb_log_checksums;
+
+-- error ER_WRONG_TYPE_FOR_VAR
+SET GLOBAL innodb_log_checksums = 1.0;
+SELECT @@global.innodb_log_checksums;
+
+-- error ER_GLOBAL_VARIABLE
+SET innodb_log_checksums = OFF;
+SELECT @@global.innodb_log_checksums;
+
+SET GLOBAL innodb_log_checksums = OFF;
+SELECT @@global.innodb_log_checksums;
+
+SET GLOBAL innodb_log_checksums = default;
+
+SET GLOBAL innodb_log_checksums = ON;
+SELECT @@global.innodb_log_checksums;
+
+SET GLOBAL innodb_log_checksums = @orig;
+SELECT @@global.innodb_log_checksums;
diff --git a/mysql-test/suite/sys_vars/t/innodb_log_compressed_pages_basic.test b/mysql-test/suite/sys_vars/t/innodb_log_compressed_pages_basic.test
index 8d10309ae02..2c83a36a0fd 100644
--- a/mysql-test/suite/sys_vars/t/innodb_log_compressed_pages_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_log_compressed_pages_basic.test
@@ -1,10 +1,5 @@
 --source include/have_innodb.inc
 
-if (`select plugin_auth_version <= "5.6.10" from information_schema.plugins where plugin_name='innodb'`)
-{
-  --skip Not fixed in InnoDB 5.6.10 or earlier
-}
-
 SET @start_global_value = @@global.innodb_log_compressed_pages;
 SELECT @start_global_value;
 
@@ -39,17 +34,21 @@ SELECT @@global.innodb_log_compressed_pages;
 # Check if the value in GLOBAL Table matches value in variable  #
 #################################################################
 
+--disable_warnings
 SELECT IF(@@GLOBAL.innodb_log_compressed_pages,'ON','OFF') = VARIABLE_VALUE
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
 WHERE VARIABLE_NAME='innodb_log_compressed_pages';
+--enable_warnings
 --echo 1 Expected
 
 SELECT COUNT(@@GLOBAL.innodb_log_compressed_pages);
 --echo 1 Expected
 
+--disable_warnings
 SELECT COUNT(VARIABLE_VALUE)
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
 WHERE VARIABLE_NAME='innodb_log_compressed_pages';
+--enable_warnings
 --echo 1 Expected
 
 
diff --git a/mysql-test/suite/sys_vars/t/innodb_log_file_size_basic.test b/mysql-test/suite/sys_vars/t/innodb_log_file_size_basic.test
index 08925b73957..21fd2a80021 100644
--- a/mysql-test/suite/sys_vars/t/innodb_log_file_size_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_log_file_size_basic.test
@@ -52,17 +52,21 @@ SELECT COUNT(@@GLOBAL.innodb_log_file_size);
 # Check if the value in GLOBAL Table matches value in variable  #
 #################################################################
 
+--disable_warnings
 SELECT @@GLOBAL.innodb_log_file_size = VARIABLE_VALUE
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
 WHERE VARIABLE_NAME='innodb_log_file_size';
+--enable_warnings
 --echo 1 Expected
 
 SELECT COUNT(@@GLOBAL.innodb_log_file_size);
 --echo 1 Expected
 
+--disable_warnings
 SELECT COUNT(VARIABLE_VALUE)
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES 
 WHERE VARIABLE_NAME='innodb_log_file_size';
+--enable_warnings
 --echo 1 Expected
 
 
diff --git a/mysql-test/suite/sys_vars/t/innodb_log_files_in_group_basic.test b/mysql-test/suite/sys_vars/t/innodb_log_files_in_group_basic.test
index 67978efe76a..60046bd09e6 100644
--- a/mysql-test/suite/sys_vars/t/innodb_log_files_in_group_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_log_files_in_group_basic.test
@@ -52,17 +52,21 @@ SELECT COUNT(@@GLOBAL.innodb_log_files_in_group);
 # Check if the value in GLOBAL Table matches value in variable  #
 #################################################################
 
+--disable_warnings
 SELECT @@GLOBAL.innodb_log_files_in_group = VARIABLE_VALUE
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
 WHERE VARIABLE_NAME='innodb_log_files_in_group';
+--enable_warnings
 --echo 1 Expected
 
 SELECT COUNT(@@GLOBAL.innodb_log_files_in_group);
 --echo 1 Expected
 
+--disable_warnings
 SELECT COUNT(VARIABLE_VALUE)
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES 
 WHERE VARIABLE_NAME='innodb_log_files_in_group';
+--enable_warnings
 --echo 1 Expected
 
 
diff --git a/mysql-test/suite/sys_vars/t/innodb_log_group_home_dir_basic.test b/mysql-test/suite/sys_vars/t/innodb_log_group_home_dir_basic.test
index 7e3969c6bd7..d6d5446c4c7 100644
--- a/mysql-test/suite/sys_vars/t/innodb_log_group_home_dir_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_log_group_home_dir_basic.test
@@ -52,17 +52,21 @@ SELECT COUNT(@@GLOBAL.innodb_log_group_home_dir);
 # Check if the value in GLOBAL Table matches value in variable  #
 #################################################################
 
+--disable_warnings
 SELECT @@GLOBAL.innodb_log_group_home_dir = VARIABLE_VALUE
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
 WHERE VARIABLE_NAME='innodb_log_group_home_dir';
+--enable_warnings
 --echo 1 Expected
 
 SELECT COUNT(@@GLOBAL.innodb_log_group_home_dir);
 --echo 1 Expected
 
+--disable_warnings
 SELECT COUNT(VARIABLE_VALUE)
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES 
 WHERE VARIABLE_NAME='innodb_log_group_home_dir';
+--enable_warnings
 --echo 1 Expected
 
 
diff --git a/mysql-test/suite/sys_vars/t/innodb_log_write_ahead_size_basic.test b/mysql-test/suite/sys_vars/t/innodb_log_write_ahead_size_basic.test
new file mode 100644
index 00000000000..8693c6a7b1b
--- /dev/null
+++ b/mysql-test/suite/sys_vars/t/innodb_log_write_ahead_size_basic.test
@@ -0,0 +1,93 @@
+--source include/have_innodb.inc
+
+SET @start_global_value = @@global.innodb_log_write_ahead_size;
+
+# default value is limited by innodb_page_size and varying along with the page size.
+#SELECT @start_global_value;
+
+#set common valid value
+SET global innodb_log_write_ahead_size=4096;
+
+#
+# exists as global only
+#
+--echo Valid values are positive number
+SELECT @@global.innodb_log_write_ahead_size >= 512;
+SELECT @@global.innodb_log_write_ahead_size <= 16*1024;
+
+--error ER_INCORRECT_GLOBAL_LOCAL_VAR
+SELECT @@session.innodb_log_write_ahead_size;
+SHOW global variables LIKE 'innodb_log_write_ahead_size';
+SHOW session variables LIKE 'innodb_log_write_ahead_size';
+--disable_warnings
+SELECT * FROM information_schema.global_variables
+WHERE variable_name='innodb_log_write_ahead_size';
+SELECT * FROM information_schema.session_variables
+WHERE variable_name='innodb_log_write_ahead_size';
+--enable_warnings
+
+#
+# show that it's writable
+#
+SET global innodb_log_write_ahead_size=1024;
+SELECT @@global.innodb_log_write_ahead_size;
+--disable_warnings
+SELECT * FROM information_schema.global_variables
+WHERE variable_name='innodb_log_write_ahead_size';
+SELECT * FROM information_schema.session_variables
+WHERE variable_name='innodb_log_write_ahead_size';
+--enable_warnings
+--error ER_GLOBAL_VARIABLE
+SET session innodb_log_write_ahead_size=2048;
+
+#
+# Valid values
+#
+SET global innodb_log_write_ahead_size=512;
+SELECT @@global.innodb_log_write_ahead_size;
+SET global innodb_log_write_ahead_size=2048;
+SELECT @@global.innodb_log_write_ahead_size;
+SET global innodb_log_write_ahead_size=4096;
+SELECT @@global.innodb_log_write_ahead_size;
+
+# limited by innodb_page_size, and the followings are occationally invalid
+#SET global innodb_log_write_ahead_size=8192;
+#SELECT @@global.innodb_log_write_ahead_size;
+#SET global innodb_log_write_ahead_size=16384;
+#SELECT @@global.innodb_log_write_ahead_size;
+
+#
+# Invalid values
+#
+SET global innodb_log_write_ahead_size=0;
+SELECT @@global.innodb_log_write_ahead_size;
+SET global innodb_log_write_ahead_size=-1024;
+SELECT @@global.innodb_log_write_ahead_size;
+SET global innodb_log_write_ahead_size=3000;
+SELECT @@global.innodb_log_write_ahead_size;
+
+# limited by innodb_page_size, and the followings result occationally different
+#SET global innodb_log_write_ahead_size=32768;
+#SELECT @@global.innodb_log_write_ahead_size;
+
+#
+# incorrect types
+#
+--error ER_WRONG_TYPE_FOR_VAR
+SET global innodb_log_write_ahead_size=1.1;
+--error ER_WRONG_TYPE_FOR_VAR
+SET global innodb_log_write_ahead_size=1e1;
+--error ER_WRONG_TYPE_FOR_VAR
+SET global innodb_log_write_ahead_size="foo";
+SET global innodb_log_write_ahead_size=-7;
+SELECT @@global.innodb_log_write_ahead_size;
+--disable_warnings
+SELECT * FROM information_schema.global_variables
+WHERE variable_name='innodb_log_write_ahead_size';
+--enable_warnings
+
+#
+# cleanup
+#
+
+SET @@global.innodb_log_write_ahead_size = @start_global_value;
diff --git a/mysql-test/suite/sys_vars/t/innodb_lru_scan_depth_basic.test b/mysql-test/suite/sys_vars/t/innodb_lru_scan_depth_basic.test
index 12211308410..8f08a1bff14 100644
--- a/mysql-test/suite/sys_vars/t/innodb_lru_scan_depth_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_lru_scan_depth_basic.test
@@ -18,16 +18,20 @@ select @@global.innodb_lru_scan_depth;
 select @@session.innodb_lru_scan_depth;
 show global variables like 'innodb_lru_scan_depth';
 show session variables like 'innodb_lru_scan_depth';
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_lru_scan_depth';
 select * from information_schema.session_variables where variable_name='innodb_lru_scan_depth';
+--enable_warnings
 
 #
 # show that it's writable
 #
 set global innodb_lru_scan_depth=325;
 select @@global.innodb_lru_scan_depth;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_lru_scan_depth';
 select * from information_schema.session_variables where variable_name='innodb_lru_scan_depth';
+--enable_warnings
 --error ER_GLOBAL_VARIABLE
 set session innodb_lru_scan_depth=444;
 
@@ -43,10 +47,14 @@ set global innodb_lru_scan_depth="foo";
 
 set global innodb_lru_scan_depth=7;
 select @@global.innodb_lru_scan_depth;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_lru_scan_depth';
+--enable_warnings
 set global innodb_lru_scan_depth=-7;
 select @@global.innodb_lru_scan_depth;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_lru_scan_depth';
+--enable_warnings
 
 #
 # min/max values
diff --git a/mysql-test/suite/sys_vars/t/innodb_max_dirty_pages_pct_basic.test b/mysql-test/suite/sys_vars/t/innodb_max_dirty_pages_pct_basic.test
index 5b4eaa41598..e8cc46086bc 100644
--- a/mysql-test/suite/sys_vars/t/innodb_max_dirty_pages_pct_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_max_dirty_pages_pct_basic.test
@@ -31,20 +31,29 @@
 ######################################################################
 
 
-################################################################################ 
+################################################################################
 #   Saving initial value of innodb_max_dirty_pages_pct in a temporary variable #
-################################################################################ 
+################################################################################
 
 SET @global_start_value = @@global.innodb_max_dirty_pages_pct;
 SELECT @global_start_value;
 
+# need this because setting innodb_max_dirty_pages_pct to lower than this
+# should cause a warning
+SET @global_start_max_dirty_lwm_value = @@global.innodb_max_dirty_pages_pct_lwm;
+SELECT @global_start_max_dirty_lwm_value;
+
+
+SET @@global.innodb_max_dirty_pages_pct_lwm = 0;
+SELECT @@global.innodb_max_dirty_pages_pct_lwm;
+
 --echo '#--------------------FN_DYNVARS_046_01------------------------#'
-######################################################################## 
+########################################################################
 #           Display the DEFAULT value of innodb_max_dirty_pages_pct    #
-######################################################################## 
+########################################################################
 
 SET @@global.innodb_max_dirty_pages_pct = 0;
-SET @@global.innodb_max_dirty_pages_pct = @global_start_value;
+SET @@global.innodb_max_dirty_pages_pct = DEFAULT;
 SELECT @@global.innodb_max_dirty_pages_pct;
 
 --echo '#---------------------FN_DYNVARS_046_02-------------------------#'
@@ -63,11 +72,11 @@ SET global innodb_max_dirty_pages_pct = 0;
 SELECT @@global.innodb_max_dirty_pages_pct;
 
 --echo '#--------------------FN_DYNVARS_046_03------------------------#'
-########################################################################## 
+##########################################################################
 #      change the value of innodb_max_dirty_pages_pct to a valid value   #
-########################################################################## 
+##########################################################################
 
-SET @@global.innodb_max_dirty_pages_pct = 0;
+SET @@global.innodb_max_dirty_pages_pct = 0.0;
 SELECT @@global.innodb_max_dirty_pages_pct;
 
 SET @@global.innodb_max_dirty_pages_pct = 1;
@@ -75,14 +84,26 @@ SELECT @@global.innodb_max_dirty_pages_pct;
 SET @@global.innodb_max_dirty_pages_pct = 99;
 SELECT @@global.innodb_max_dirty_pages_pct;
 
---echo '#--------------------FN_DYNVARS_046_04-------------------------#'
-########################################################################### 
+--echo '#--------------------FN_DYNVARS_046_04------------------------#'
+##########################################################################
+#      change value of based on innodb_max_dirty_pages_pct_lwm           #
+##########################################################################
+SET @@global.innodb_max_dirty_pages_pct_lwm = @global_start_value - 1;
+SELECT @@global.innodb_max_dirty_pages_pct_lwm;
+
+# this should cause warning
+SET @@global.innodb_max_dirty_pages_pct = @global_start_value - 2;
+SELECT @@global.innodb_max_dirty_pages_pct;
+
+--echo '#--------------------FN_DYNVARS_046_05-------------------------#'
+###########################################################################
 #      Change the value of innodb_max_dirty_pages_pct to invalid value    #
-########################################################################### 
+###########################################################################
 
 SET @@global.innodb_max_dirty_pages_pct = -1;
 SELECT @@global.innodb_max_dirty_pages_pct;
-
+SET @@global.innodb_max_dirty_pages_pct = -1024;
+SELECT @@global.innodb_max_dirty_pages_pct;
 --Error ER_WRONG_TYPE_FOR_VAR
 SET @@global.innodb_max_dirty_pages_pct = "T";
 SELECT @@global.innodb_max_dirty_pages_pct;
@@ -91,26 +112,49 @@ SELECT @@global.innodb_max_dirty_pages_pct;
 SET @@global.innodb_max_dirty_pages_pct = "Y";
 SELECT @@global.innodb_max_dirty_pages_pct;
 
+SET @@global.innodb_max_dirty_pages_pct = 100;
+SELECT @@global.innodb_max_dirty_pages_pct;
 SET @@global.innodb_max_dirty_pages_pct = 1001;
 SELECT @@global.innodb_max_dirty_pages_pct;
+SET @@global.innodb_max_dirty_pages_pct = 100000;
+SELECT @@global.innodb_max_dirty_pages_pct;
+--Error ER_WRONG_TYPE_FOR_VAR
+SET @@global.innodb_max_dirty_pages_pct = ' ';
+SELECT @@global.innodb_max_dirty_pages_pct;
+--Error ER_WRONG_TYPE_FOR_VAR
+SET @@global.innodb_max_dirty_pages_pct = " ";
+SELECT @@global.innodb_max_dirty_pages_pct;
+SET @@global.innodb_max_dirty_pages_pct = 1.1;
+SELECT @@global.innodb_max_dirty_pages_pct;
+set global innodb_max_dirty_pages_pct = 0.1;
+SELECT @@global.innodb_max_dirty_pages_pct;
+set global innodb_max_dirty_pages_pct = 31.34;
+SELECT @@global.innodb_max_dirty_pages_pct;
+set global innodb_max_dirty_pages_pct = 100;
+SELECT @@global.innodb_max_dirty_pages_pct;
+set global innodb_max_dirty_pages_pct = 99.999;
+SELECT @@global.innodb_max_dirty_pages_pct;
 
-
---echo '#----------------------FN_DYNVARS_046_05------------------------#'
-######################################################################### 
+--echo '#----------------------FN_DYNVARS_046_06------------------------#'
+#########################################################################
 #     Check if the value in GLOBAL Table matches value in variable      #
 #########################################################################
 
+--disable_warnings
 SELECT @@global.innodb_max_dirty_pages_pct =
  VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
   WHERE VARIABLE_NAME='innodb_max_dirty_pages_pct';
+--enable_warnings
 SELECT @@global.innodb_max_dirty_pages_pct;
+--disable_warnings
 SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
  WHERE VARIABLE_NAME='innodb_max_dirty_pages_pct';
+--enable_warnings
 
---echo '#---------------------FN_DYNVARS_046_06-------------------------#'
-################################################################### 
+--echo '#---------------------FN_DYNVARS_046_07-------------------------#'
+###################################################################
 #        Check if ON and OFF values can be used on variable       #
-################################################################### 
+###################################################################
 
 --ERROR ER_WRONG_TYPE_FOR_VAR
 SET @@global.innodb_max_dirty_pages_pct = OFF;
@@ -120,23 +164,26 @@ SELECT @@global.innodb_max_dirty_pages_pct;
 SET @@global.innodb_max_dirty_pages_pct = ON;
 SELECT @@global.innodb_max_dirty_pages_pct;
 
---echo '#---------------------FN_DYNVARS_046_07----------------------#'
-################################################################### 
+--echo '#---------------------FN_DYNVARS_046_08----------------------#'
+###################################################################
 #      Check if TRUE and FALSE values can be used on variable     #
-################################################################### 
+###################################################################
 
 SET @@global.innodb_max_dirty_pages_pct = TRUE;
 SELECT @@global.innodb_max_dirty_pages_pct;
 SET @@global.innodb_max_dirty_pages_pct = FALSE;
 SELECT @@global.innodb_max_dirty_pages_pct;
 
-##############################  
+##############################
 #   Restore initial value    #
 ##############################
 
 SET @@global.innodb_max_dirty_pages_pct = @global_start_value;
 SELECT @@global.innodb_max_dirty_pages_pct;
 
+SET @@global.innodb_max_dirty_pages_pct_lwm = @global_start_max_dirty_lwm_value;
+SELECT @@global.innodb_max_dirty_pages_pct_lwm;
+
 ###############################################################
 #                    END OF innodb_max_dirty_pages_pct TESTS  #
-############################################################### 
+###############################################################
diff --git a/mysql-test/suite/sys_vars/t/innodb_max_dirty_pages_pct_func.test b/mysql-test/suite/sys_vars/t/innodb_max_dirty_pages_pct_func.test
index 62c88f43ebd..c7a9e567e69 100644
--- a/mysql-test/suite/sys_vars/t/innodb_max_dirty_pages_pct_func.test
+++ b/mysql-test/suite/sys_vars/t/innodb_max_dirty_pages_pct_func.test
@@ -33,15 +33,22 @@ SET @innodb_max_dirty_pages_pct = @@global.innodb_max_dirty_pages_pct;
 ############################################################################
 
 SET @@global.innodb_max_dirty_pages_pct = 80;
+--echo 'connect (con1,localhost,root,,,,)'
 connect (con1,localhost,root,,,,);
+--echo 'connection con1'
 connection con1;
 SELECT @@global.innodb_max_dirty_pages_pct;
 SET @@global.innodb_max_dirty_pages_pct = 70;
+--echo 'connect (con2,localhost,root,,,,)'
 connect (con2,localhost,root,,,,);
+--echo 'connection con2'
 connection con2;
 SELECT @@global.innodb_max_dirty_pages_pct;
+--echo 'connection default'
 connection default;
+--echo 'disconnect con2'
 disconnect con2;
+--echo 'disconnect con1'
 disconnect con1;
 # restore initial value
 SET @@global.innodb_max_dirty_pages_pct = @innodb_max_dirty_pages_pct;
diff --git a/mysql-test/suite/sys_vars/t/innodb_max_dirty_pages_pct_lwm_basic.test b/mysql-test/suite/sys_vars/t/innodb_max_dirty_pages_pct_lwm_basic.test
index d81b6cc725b..b06f209a263 100644
--- a/mysql-test/suite/sys_vars/t/innodb_max_dirty_pages_pct_lwm_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_max_dirty_pages_pct_lwm_basic.test
@@ -47,7 +47,7 @@ SELECT @pct_start_value;
 ########################################################################
 
 SET @@global.innodb_max_dirty_pages_pct_lwm = 0;
-SET @@global.innodb_max_dirty_pages_pct_lwm = @pct_lwm_start_value;
+SET @@global.innodb_max_dirty_pages_pct_lwm = DEFAULT;
 SELECT @@global.innodb_max_dirty_pages_pct_lwm;
 
 --echo '#---------------------FN_DYNVARS_046_02-------------------------#'
@@ -96,22 +96,40 @@ SELECT @@global.innodb_max_dirty_pages_pct_lwm;
 SET @@global.innodb_max_dirty_pages_pct_lwm = @pct_start_value + 1;
 SELECT @@global.innodb_max_dirty_pages_pct_lwm;
 
-SET @@global.innodb_max_dirty_pages_pct_lwm = 100;
+
+SET @@global.innodb_max_dirty_pages_pct_lwm = 0.0;
 SELECT @@global.innodb_max_dirty_pages_pct_lwm;
 
+SET @@global.innodb_max_dirty_pages_pct_lwm = 1.1;
+SELECT @@global.innodb_max_dirty_pages_pct_lwm;
 
+SET @@global.innodb_max_dirty_pages_pct_lwm = 51.12;
+SELECT @@global.innodb_max_dirty_pages_pct_lwm;
+
+SET @@global.innodb_max_dirty_pages_pct_lwm = 100;
+SELECT @@global.innodb_max_dirty_pages_pct_lwm;
+--Error ER_WRONG_TYPE_FOR_VAR
+SET @@global.innodb_max_dirty_pages_pct_lwm = " ";
+SELECT @@global.innodb_max_dirty_pages_pct_lwm;
+--Error ER_WRONG_TYPE_FOR_VAR
+SET @@global.innodb_max_dirty_pages_pct_lwm = ' ';
+SELECT @@global.innodb_max_dirty_pages_pct_lwm;
 
 --echo '#----------------------FN_DYNVARS_046_05------------------------#'
 #########################################################################
 #     Check if the value in GLOBAL Table matches value in variable      #
 #########################################################################
 
+--disable_warnings
 SELECT @@global.innodb_max_dirty_pages_pct_lwm =
  VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
   WHERE VARIABLE_NAME='innodb_max_dirty_pages_pct_lwm';
+--enable_warnings
 SELECT @@global.innodb_max_dirty_pages_pct_lwm;
+--disable_warnings
 SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
  WHERE VARIABLE_NAME='innodb_max_dirty_pages_pct_lwm';
+--enable_warnings
 
 --echo '#---------------------FN_DYNVARS_046_06-------------------------#'
 ###################################################################
diff --git a/mysql-test/suite/sys_vars/t/innodb_max_purge_lag_basic.test b/mysql-test/suite/sys_vars/t/innodb_max_purge_lag_basic.test
index 9e6b8201e3d..6c7676f113d 100644
--- a/mysql-test/suite/sys_vars/t/innodb_max_purge_lag_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_max_purge_lag_basic.test
@@ -66,16 +66,37 @@ SELECT @@global.innodb_max_purge_lag;
 #      change the value of innodb_max_purge_lag to a valid value         #
 ########################################################################## 
 
-
 SET @@global.innodb_max_purge_lag = 0;
 SELECT @@global.innodb_max_purge_lag;
 
 SET @@global.innodb_max_purge_lag = 1;
 SELECT @@global.innodb_max_purge_lag;
+
 SET @@global.innodb_max_purge_lag = 4294967295;
 SELECT @@global.innodb_max_purge_lag;
 
---echo '#--------------------FN_DYNVARS_046_04-------------------------#'
+--echo '#--------------------FN_DYNVARS_046_04------------------------#'
+########################################################################## 
+#     check the value of innodb_concurrency_tickets for out of bounds    #
+##########################################################################
+
+# With a 64 bit mysqld:18446744073709551615,with a 32 bit mysqld: 4294967295
+--disable_warnings
+SET @@global.innodb_max_purge_lag = 4294967296;
+--enable_warnings
+SELECT @@global.innodb_max_purge_lag IN (4294967296,4294967295);
+
+--disable_warnings
+SET @@global.innodb_max_purge_lag = 12345678901;
+--enable_warnings
+SELECT @@global.innodb_max_purge_lag IN (12345678901,4294967295);
+
+--disable_warnings
+SET @@global.innodb_max_purge_lag = 18446744073709551615;
+--enable_warnings
+SELECT @@global.innodb_max_purge_lag IN (18446744073709551615,4294967295);
+
+--echo '#--------------------FN_DYNVARS_046_05-------------------------#'
 ########################################################################### 
 #       Change the value of innodb_max_purge_lag to invalid value         #
 ########################################################################### 
@@ -83,6 +104,9 @@ SELECT @@global.innodb_max_purge_lag;
 SET @@global.innodb_max_purge_lag = -1;
 SELECT @@global.innodb_max_purge_lag;
 
+SET @@global.innodb_max_purge_lag = -1024;
+SELECT @@global.innodb_max_purge_lag;
+
 --Error ER_WRONG_TYPE_FOR_VAR
 SET @@global.innodb_max_purge_lag = "T";
 SELECT @@global.innodb_max_purge_lag;
@@ -91,26 +115,35 @@ SELECT @@global.innodb_max_purge_lag;
 SET @@global.innodb_max_purge_lag = "Y";
 SELECT @@global.innodb_max_purge_lag;
 
-
-SET @@global.innodb_max_purge_lag = 1001;
+--Error ER_WRONG_TYPE_FOR_VAR
+SET @@global.innodb_max_purge_lag = 1.1;
 SELECT @@global.innodb_max_purge_lag;
 
---echo '#----------------------FN_DYNVARS_046_05------------------------#'
+--Error ER_WRONG_TYPE_FOR_VAR
+SET @@global.innodb_max_purge_lag = ' ';
+SELECT @@global.innodb_max_purge_lag;
+
+--Error ER_WRONG_TYPE_FOR_VAR
+SET @@global.innodb_max_purge_lag = " ";
+SELECT @@global.innodb_max_purge_lag;
+
+--echo '#----------------------FN_DYNVARS_046_06------------------------#'
 ######################################################################### 
 #     Check if the value in GLOBAL Table matches value in variable      #
 #########################################################################
 
+--disable_warnings
 SELECT @@global.innodb_max_purge_lag =
  VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
   WHERE VARIABLE_NAME='innodb_max_purge_lag';
+--enable_warnings
 SELECT @@global.innodb_max_purge_lag;
+--disable_warnings
 SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
  WHERE VARIABLE_NAME='innodb_max_purge_lag';
+--enable_warnings
 
-
-
-
---echo '#---------------------FN_DYNVARS_046_06-------------------------#'
+--echo '#---------------------FN_DYNVARS_046_07-------------------------#'
 ################################################################### 
 #        Check if ON and OFF values can be used on variable       #
 ################################################################### 
@@ -123,12 +156,11 @@ SELECT @@global.innodb_max_purge_lag;
 SET @@global.innodb_max_purge_lag = ON;
 SELECT @@global.innodb_max_purge_lag;
 
---echo '#---------------------FN_DYNVARS_046_07----------------------#'
+--echo '#---------------------FN_DYNVARS_046_08----------------------#'
 ################################################################### 
 #      Check if TRUE and FALSE values can be used on variable     #
 ################################################################### 
 
-
 SET @@global.innodb_max_purge_lag = TRUE;
 SELECT @@global.innodb_max_purge_lag;
 SET @@global.innodb_max_purge_lag = FALSE;
diff --git a/mysql-test/suite/sys_vars/t/innodb_max_purge_lag_delay_basic.test b/mysql-test/suite/sys_vars/t/innodb_max_purge_lag_delay_basic.test
index 6374e3716df..f7580c99507 100644
--- a/mysql-test/suite/sys_vars/t/innodb_max_purge_lag_delay_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_max_purge_lag_delay_basic.test
@@ -13,9 +13,11 @@ SET @@GLOBAL.innodb_max_purge_lag_delay=1;
 SELECT COUNT(@@GLOBAL.innodb_max_purge_lag_delay);
 --echo 1 Expected
 
+--disable_warnings
 SELECT VARIABLE_VALUE
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
 WHERE VARIABLE_NAME='innodb_max_purge_lag_delay';
+--enable_warnings
 --echo 1 Expected
 
 SELECT @@innodb_max_purge_lag_delay = @@GLOBAL.innodb_max_purge_lag_delay;
diff --git a/mysql-test/suite/sys_vars/t/innodb_mirrored_log_groups_basic.test b/mysql-test/suite/sys_vars/t/innodb_max_undo_log_size_basic.test
similarity index 62%
rename from mysql-test/suite/sys_vars/t/innodb_mirrored_log_groups_basic.test
rename to mysql-test/suite/sys_vars/t/innodb_max_undo_log_size_basic.test
index 6edb07ac39f..9882578923e 100644
--- a/mysql-test/suite/sys_vars/t/innodb_mirrored_log_groups_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_max_undo_log_size_basic.test
@@ -1,18 +1,18 @@
 
 
-################## mysql-test\t\innodb_mirrored_log_groups_basic.test #########
+############### mysql-test\t\innodb_max_undo_log_size_basic.test ###############
 #                                                                             #
-# Variable Name: innodb_mirrored_log_groups                                   #
+# Variable Name: innodb_max_undo_log_size                                     #
 # Scope: Global                                                               #
 # Access Type: Static                                                         #
 # Data Type: numeric                                                          #
 #                                                                             #
 #                                                                             #
-# Creation Date: 2008-02-07                                                   #
-# Author : Sharique Abdullah                                                      #
+# Creation Date: 2014-27-05                                                   #
+# Author : Krunal Bauskar                                                     #
 #                                                                             #
 #                                                                             #
-# Description:Test Cases of Dynamic System Variable innodb_mirrored_log_groups#
+# Description:Test Cases of Dynamic System Variable innodb_max_undo_log_size  #
 #             that checks the behavior of this variable in the following ways #
 #              * Value Check                                                  #
 #              * Scope Check                                                  #
@@ -24,79 +24,76 @@
 
 --source include/have_innodb.inc
 
---echo '#---------------------BS_STVARS_037_01----------------------#'
+--echo '#---------------------BS_STVARS_035_01----------------------#'
 ####################################################################
 #   Displaying default value                                       #
 ####################################################################
-SELECT COUNT(@@GLOBAL.innodb_mirrored_log_groups);
+SELECT COUNT(@@GLOBAL.innodb_max_undo_log_size);
 --echo 1 Expected
 
 
---echo '#---------------------BS_STVARS_037_02----------------------#'
+--echo '#---------------------BS_STVARS_035_02----------------------#'
 ####################################################################
 #   Check if Value can set                                         #
 ####################################################################
 
---error ER_INCORRECT_GLOBAL_LOCAL_VAR
-SET @@GLOBAL.innodb_mirrored_log_groups=1;
---echo Expected error 'Read only variable'
+SET @@GLOBAL.innodb_max_undo_log_size=1073741824;
 
-SELECT COUNT(@@GLOBAL.innodb_mirrored_log_groups);
+SELECT COUNT(@@GLOBAL.innodb_max_undo_log_size);
 --echo 1 Expected
 
+SET @@GLOBAL.innodb_max_undo_log_size=18446744073709551615;
+SELECT @@GLOBAL.innodb_max_undo_log_size;
+--echo 18446744073709551615 Expected
 
+SET @@GLOBAL.innodb_max_undo_log_size=1073741824;
 
-
---echo '#---------------------BS_STVARS_037_03----------------------#'
+--echo '#---------------------BS_STVARS_035_03----------------------#'
 #################################################################
 # Check if the value in GLOBAL Table matches value in variable  #
 #################################################################
 
-SELECT @@GLOBAL.innodb_mirrored_log_groups = VARIABLE_VALUE
+--disable_warnings
+SELECT @@GLOBAL.innodb_max_undo_log_size = VARIABLE_VALUE
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
-WHERE VARIABLE_NAME='innodb_mirrored_log_groups';
+WHERE VARIABLE_NAME='innodb_max_undo_log_size';
 --echo 1 Expected
 
-SELECT COUNT(@@GLOBAL.innodb_mirrored_log_groups);
+SELECT COUNT(@@GLOBAL.innodb_max_undo_log_size);
 --echo 1 Expected
 
 SELECT COUNT(VARIABLE_VALUE)
-FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES 
-WHERE VARIABLE_NAME='innodb_mirrored_log_groups';
+FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
+WHERE VARIABLE_NAME='innodb_max_undo_log_size';
 --echo 1 Expected
+--enable_warnings
 
 
-
---echo '#---------------------BS_STVARS_037_04----------------------#'
+--echo '#---------------------BS_STVARS_035_04----------------------#'
 ################################################################################
 #  Check if accessing variable with and without GLOBAL point to same variable  #
 ################################################################################
-SELECT @@innodb_mirrored_log_groups = @@GLOBAL.innodb_mirrored_log_groups;
+SELECT @@innodb_max_undo_log_size = @@GLOBAL.innodb_max_undo_log_size;
 --echo 1 Expected
 
 
 
---echo '#---------------------BS_STVARS_037_05----------------------#'
+--echo '#---------------------BS_STVARS_035_05----------------------#'
 ################################################################################
-#   Check if innodb_mirrored_log_groups can be accessed with and without @@ sign #
+# Check if innodb_max_undo_log_size can be accessed with and without @@ sign   #
 ################################################################################
 
-SELECT COUNT(@@innodb_mirrored_log_groups);
+SELECT COUNT(@@innodb_max_undo_log_size);
 --echo 1 Expected
 
 --Error ER_INCORRECT_GLOBAL_LOCAL_VAR
-SELECT COUNT(@@local.innodb_mirrored_log_groups);
+SELECT COUNT(@@local.innodb_max_undo_log_size);
 --echo Expected error 'Variable is a GLOBAL variable'
 
 --Error ER_INCORRECT_GLOBAL_LOCAL_VAR
-SELECT COUNT(@@SESSION.innodb_mirrored_log_groups);
+SELECT COUNT(@@SESSION.innodb_max_undo_log_size);
 --echo Expected error 'Variable is a GLOBAL variable'
 
-SELECT COUNT(@@GLOBAL.innodb_mirrored_log_groups);
+SELECT COUNT(@@GLOBAL.innodb_max_undo_log_size);
 --echo 1 Expected
 
---Error ER_BAD_FIELD_ERROR
-SELECT innodb_mirrored_log_groups = @@SESSION.innodb_mirrored_log_groups;
---echo Expected error 'Readonly variable'
-
-
diff --git a/mysql-test/suite/sys_vars/t/innodb_merge_threshold_set_all_debug_basic.test b/mysql-test/suite/sys_vars/t/innodb_merge_threshold_set_all_debug_basic.test
new file mode 100644
index 00000000000..0ea30277801
--- /dev/null
+++ b/mysql-test/suite/sys_vars/t/innodb_merge_threshold_set_all_debug_basic.test
@@ -0,0 +1,30 @@
+--echo #
+--echo # Basic test for innodb_merge_threshold_set_all_debug
+--echo #
+
+--source include/have_innodb.inc
+
+# The config variable is a debug variable
+-- source include/have_debug.inc
+
+SELECT @@global.innodb_merge_threshold_set_all_debug;
+
+set global innodb_merge_threshold_set_all_debug = 1;
+
+SELECT @@global.innodb_merge_threshold_set_all_debug;
+
+set global innodb_merge_threshold_set_all_debug = 51;
+
+SELECT @@global.innodb_merge_threshold_set_all_debug;
+
+set global innodb_merge_threshold_set_all_debug = 0;
+
+SELECT @@global.innodb_merge_threshold_set_all_debug;
+
+--error ER_GLOBAL_VARIABLE
+set innodb_merge_threshold_set_all_debug = 50;
+
+set global innodb_merge_threshold_set_all_debug = 50;
+
+SELECT @@global.innodb_merge_threshold_set_all_debug;
+
diff --git a/mysql-test/suite/sys_vars/t/innodb_monitor_disable_basic.test b/mysql-test/suite/sys_vars/t/innodb_monitor_disable_basic.test
index 0615d62a0e1..1b23ae14e49 100644
--- a/mysql-test/suite/sys_vars/t/innodb_monitor_disable_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_monitor_disable_basic.test
@@ -2,11 +2,6 @@
 # Test the metrics monitor system's control system
 # and counter accuracy.
 
-if (`select plugin_auth_version <= "5.6.10" from information_schema.plugins where plugin_name='innodb'`)
-{
-  --skip Not fixed in InnoDB 5.6.10 or earlier
-}
-
 --source include/have_innodb.inc
 set global innodb_monitor_disable = All;
 # Test turn on/off the monitor counter  with "all" option
diff --git a/mysql-test/suite/sys_vars/t/innodb_monitor_enable_basic.test b/mysql-test/suite/sys_vars/t/innodb_monitor_enable_basic.test
index 0615d62a0e1..1b23ae14e49 100644
--- a/mysql-test/suite/sys_vars/t/innodb_monitor_enable_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_monitor_enable_basic.test
@@ -2,11 +2,6 @@
 # Test the metrics monitor system's control system
 # and counter accuracy.
 
-if (`select plugin_auth_version <= "5.6.10" from information_schema.plugins where plugin_name='innodb'`)
-{
-  --skip Not fixed in InnoDB 5.6.10 or earlier
-}
-
 --source include/have_innodb.inc
 set global innodb_monitor_disable = All;
 # Test turn on/off the monitor counter  with "all" option
diff --git a/mysql-test/suite/sys_vars/t/innodb_monitor_reset_all_basic.test b/mysql-test/suite/sys_vars/t/innodb_monitor_reset_all_basic.test
index 868f69300fa..1b23ae14e49 100644
--- a/mysql-test/suite/sys_vars/t/innodb_monitor_reset_all_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_monitor_reset_all_basic.test
@@ -2,11 +2,6 @@
 # Test the metrics monitor system's control system
 # and counter accuracy.
 
-if (`select plugin_auth_version <= "5.6.10" from information_schema.plugins where plugin_name='innodb'`)
-{
-  --skip not fixed in innodb 5.6.10 or earlier
-}
-
 --source include/have_innodb.inc
 set global innodb_monitor_disable = All;
 # Test turn on/off the monitor counter  with "all" option
diff --git a/mysql-test/suite/sys_vars/t/innodb_monitor_reset_basic.test b/mysql-test/suite/sys_vars/t/innodb_monitor_reset_basic.test
index 868f69300fa..1b23ae14e49 100644
--- a/mysql-test/suite/sys_vars/t/innodb_monitor_reset_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_monitor_reset_basic.test
@@ -2,11 +2,6 @@
 # Test the metrics monitor system's control system
 # and counter accuracy.
 
-if (`select plugin_auth_version <= "5.6.10" from information_schema.plugins where plugin_name='innodb'`)
-{
-  --skip not fixed in innodb 5.6.10 or earlier
-}
-
 --source include/have_innodb.inc
 set global innodb_monitor_disable = All;
 # Test turn on/off the monitor counter  with "all" option
diff --git a/mysql-test/suite/sys_vars/t/innodb_old_blocks_pct_basic.test b/mysql-test/suite/sys_vars/t/innodb_old_blocks_pct_basic.test
index 0dcef3bb09f..1f72fc250ce 100644
--- a/mysql-test/suite/sys_vars/t/innodb_old_blocks_pct_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_old_blocks_pct_basic.test
@@ -18,19 +18,29 @@ select @@global.innodb_old_blocks_pct;
 select @@session.innodb_old_blocks_pct;
 show global variables like 'innodb_old_blocks_pct';
 show session variables like 'innodb_old_blocks_pct';
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_old_blocks_pct';
 select * from information_schema.session_variables where variable_name='innodb_old_blocks_pct';
+--enable_warnings
 
 #
 # show that it's writable
 #
 set global innodb_old_blocks_pct=10;
 select @@global.innodb_old_blocks_pct;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_old_blocks_pct';
 select * from information_schema.session_variables where variable_name='innodb_old_blocks_pct';
+--enable_warnings
 --error ER_GLOBAL_VARIABLE
 set session innodb_old_blocks_pct=1;
 
+#
+# check the default value 
+#
+set @@global.innodb_old_blocks_pct=DEFAULT;
+select @@global.innodb_old_blocks_pct;
+
 #
 # incorrect types
 #
@@ -40,16 +50,26 @@ set global innodb_old_blocks_pct=1.1;
 set global innodb_old_blocks_pct=1e1;
 --error ER_WRONG_TYPE_FOR_VAR
 set global innodb_old_blocks_pct="foo";
-
+--error ER_WRONG_TYPE_FOR_VAR
+set global innodb_old_blocks_pct=" ";
+--error ER_WRONG_TYPE_FOR_VAR
+set global innodb_old_blocks_pct='';
+ 
 set global innodb_old_blocks_pct=4;
 select @@global.innodb_old_blocks_pct;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_old_blocks_pct';
+--enable_warnings
 set global innodb_old_blocks_pct=-7;
 select @@global.innodb_old_blocks_pct;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_old_blocks_pct';
+--enable_warnings
 set global innodb_old_blocks_pct=96;
 select @@global.innodb_old_blocks_pct;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_old_blocks_pct';
+--enable_warnings
 
 #
 # min/max values
diff --git a/mysql-test/suite/sys_vars/t/innodb_old_blocks_time_basic.test b/mysql-test/suite/sys_vars/t/innodb_old_blocks_time_basic.test
index 3efec2bbf15..d05e7244b93 100644
--- a/mysql-test/suite/sys_vars/t/innodb_old_blocks_time_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_old_blocks_time_basic.test
@@ -18,16 +18,20 @@ select @@global.innodb_old_blocks_time;
 select @@session.innodb_old_blocks_time;
 show global variables like 'innodb_old_blocks_time';
 show session variables like 'innodb_old_blocks_time';
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_old_blocks_time';
 select * from information_schema.session_variables where variable_name='innodb_old_blocks_time';
+--enable_warnings
 
 #
 # show that it's writable
 #
 set global innodb_old_blocks_time=10;
 select @@global.innodb_old_blocks_time;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_old_blocks_time';
 select * from information_schema.session_variables where variable_name='innodb_old_blocks_time';
+--enable_warnings
 --error ER_GLOBAL_VARIABLE
 set session innodb_old_blocks_time=1;
 
@@ -43,7 +47,9 @@ set global innodb_old_blocks_time="foo";
 
 set global innodb_old_blocks_time=-7;
 select @@global.innodb_old_blocks_time;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_old_blocks_time';
+--enable_warnings
 
 #
 # cleanup
diff --git a/mysql-test/suite/sys_vars/t/innodb_online_alter_log_max_size_basic.test b/mysql-test/suite/sys_vars/t/innodb_online_alter_log_max_size_basic.test
index aa1cc83819e..b86f04cac15 100644
--- a/mysql-test/suite/sys_vars/t/innodb_online_alter_log_max_size_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_online_alter_log_max_size_basic.test
@@ -12,20 +12,26 @@ select @@global.innodb_online_alter_log_max_size;
 select @@session.innodb_online_alter_log_max_size;
 show global variables like 'innodb_online_alter_log_max_size';
 show session variables like 'innodb_online_alter_log_max_size';
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_online_alter_log_max_size';
 select * from information_schema.session_variables where variable_name='innodb_online_alter_log_max_size';
+--enable_warnings
 
 #
 # show that it's writable
 #
 set global innodb_online_alter_log_max_size=1048576;
 select @@global.innodb_online_alter_log_max_size;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_online_alter_log_max_size';
 select * from information_schema.session_variables where variable_name='innodb_online_alter_log_max_size';
+--enable_warnings
 set @@global.innodb_online_alter_log_max_size=524288;
 select @@global.innodb_online_alter_log_max_size;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_online_alter_log_max_size';
 select * from information_schema.session_variables where variable_name='innodb_online_alter_log_max_size';
+--enable_warnings
 --error ER_GLOBAL_VARIABLE
 set session innodb_online_alter_log_max_size='some';
 --error ER_GLOBAL_VARIABLE
diff --git a/mysql-test/suite/sys_vars/t/innodb_open_files_basic.test b/mysql-test/suite/sys_vars/t/innodb_open_files_basic.test
index c55b7e55937..d6b7c857fb4 100644
--- a/mysql-test/suite/sys_vars/t/innodb_open_files_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_open_files_basic.test
@@ -52,17 +52,21 @@ SELECT COUNT(@@GLOBAL.innodb_open_files);
 # Check if the value in GLOBAL Table matches value in variable  #
 #################################################################
 
+--disable_warnings
 SELECT @@GLOBAL.innodb_open_files = VARIABLE_VALUE
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
 WHERE VARIABLE_NAME='innodb_open_files';
+--enable_warnings
 --echo 1 Expected
 
 SELECT COUNT(@@GLOBAL.innodb_open_files);
 --echo 1 Expected
 
+--disable_warnings
 SELECT COUNT(VARIABLE_VALUE)
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES 
 WHERE VARIABLE_NAME='innodb_open_files';
+--enable_warnings
 --echo 1 Expected
 
 
diff --git a/mysql-test/suite/sys_vars/t/innodb_optimize_fulltext_only_basic.test b/mysql-test/suite/sys_vars/t/innodb_optimize_fulltext_only_basic.test
index e9ff8a651bc..08b8c137342 100644
--- a/mysql-test/suite/sys_vars/t/innodb_optimize_fulltext_only_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_optimize_fulltext_only_basic.test
@@ -18,28 +18,38 @@ select @@global.innodb_optimize_fulltext_only;
 select @@session.innodb_optimize_fulltext_only;
 show global variables like 'innodb_optimize_fulltext_only';
 show session variables like 'innodb_optimize_fulltext_only';
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_optimize_fulltext_only';
 select * from information_schema.session_variables where variable_name='innodb_optimize_fulltext_only';
+--enable_warnings
 
 #
 # show that it's writable
 #
 set global innodb_optimize_fulltext_only='ON';
 select @@global.innodb_optimize_fulltext_only;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_optimize_fulltext_only';
 select * from information_schema.session_variables where variable_name='innodb_optimize_fulltext_only';
+--enable_warnings
 set @@global.innodb_optimize_fulltext_only=0;
 select @@global.innodb_optimize_fulltext_only;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_optimize_fulltext_only';
 select * from information_schema.session_variables where variable_name='innodb_optimize_fulltext_only';
+--enable_warnings
 set global innodb_optimize_fulltext_only=1;
 select @@global.innodb_optimize_fulltext_only;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_optimize_fulltext_only';
 select * from information_schema.session_variables where variable_name='innodb_optimize_fulltext_only';
+--enable_warnings
 set @@global.innodb_optimize_fulltext_only='OFF';
 select @@global.innodb_optimize_fulltext_only;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_optimize_fulltext_only';
 select * from information_schema.session_variables where variable_name='innodb_optimize_fulltext_only';
+--enable_warnings
 --error ER_GLOBAL_VARIABLE
 set session innodb_optimize_fulltext_only='OFF';
 --error ER_GLOBAL_VARIABLE
@@ -57,8 +67,10 @@ set global innodb_optimize_fulltext_only=2;
 --error ER_WRONG_VALUE_FOR_VAR
 set global innodb_optimize_fulltext_only=-3;
 select @@global.innodb_optimize_fulltext_only;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_optimize_fulltext_only';
 select * from information_schema.session_variables where variable_name='innodb_optimize_fulltext_only';
+--enable_warnings
 --error ER_WRONG_VALUE_FOR_VAR
 set global innodb_optimize_fulltext_only='AUTO';
 
diff --git a/mysql-test/suite/sys_vars/t/innodb_page_cleaners_basic.test b/mysql-test/suite/sys_vars/t/innodb_page_cleaners_basic.test
new file mode 100644
index 00000000000..716492ba1b3
--- /dev/null
+++ b/mysql-test/suite/sys_vars/t/innodb_page_cleaners_basic.test
@@ -0,0 +1,53 @@
+# Variable name: innodb_page_cleaners
+# Scope: Global
+# Access type: Static
+# Data type: numeric
+
+--source include/have_innodb.inc
+
+SELECT COUNT(@@GLOBAL.innodb_page_cleaners);
+--echo 1 Expected
+
+SELECT COUNT(@@innodb_page_cleaners);
+--echo 1 Expected
+
+--error ER_INCORRECT_GLOBAL_LOCAL_VAR
+SET @@GLOBAL.innodb_page_cleaners=1;
+--echo Expected error 'Read-only variable'
+
+--Error ER_BAD_FIELD_ERROR
+SELECT innodb_page_cleaners = @@SESSION.innodb_page_cleaners;
+--echo Expected error 'Read-only variable'
+
+--disable_warnings
+SELECT @@GLOBAL.innodb_page_cleaners = VARIABLE_VALUE
+FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
+WHERE VARIABLE_NAME='innodb_page_cleaners';
+--enable_warnings
+--echo 1 Expected
+
+--disable_warnings
+SELECT COUNT(VARIABLE_VALUE)
+FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
+WHERE VARIABLE_NAME='innodb_page_cleaners';
+--enable_warnings
+--echo 1 Expected
+
+SELECT @@innodb_page_cleaners = @@GLOBAL.innodb_page_cleaners;
+--echo 1 Expected
+
+--Error ER_INCORRECT_GLOBAL_LOCAL_VAR
+SELECT COUNT(@@local.innodb_page_cleaners);
+--echo Expected error 'Variable is a GLOBAL variable'
+
+--Error ER_INCORRECT_GLOBAL_LOCAL_VAR
+SELECT COUNT(@@SESSION.innodb_page_cleaners);
+--echo Expected error 'Variable is a GLOBAL variable'
+
+# Check the default value
+--disable_warnings
+SELECT VARIABLE_NAME, VARIABLE_VALUE
+FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
+WHERE VARIABLE_NAME = 'innodb_page_cleaners';
+--enable_warnings
+
diff --git a/mysql-test/suite/sys_vars/t/innodb_page_hash_locks_basic.test b/mysql-test/suite/sys_vars/t/innodb_page_hash_locks_basic.test
index 1479cbad744..ee4798c1f90 100644
--- a/mysql-test/suite/sys_vars/t/innodb_page_hash_locks_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_page_hash_locks_basic.test
@@ -10,8 +10,10 @@ select @@global.innodb_page_hash_locks;
 select @@session.innodb_page_hash_locks;
 show global variables like 'innodb_page_hash_locks';
 show session variables like 'innodb_page_hash_locks';
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_page_hash_locks';
 select * from information_schema.session_variables where variable_name='innodb_page_hash_locks';
+--enable_warnings
 
 #
 # show that it's read-only
diff --git a/mysql-test/suite/sys_vars/t/innodb_page_size_basic.test b/mysql-test/suite/sys_vars/t/innodb_page_size_basic.test
index 00aa476e8d2..1d4f9353f53 100644
--- a/mysql-test/suite/sys_vars/t/innodb_page_size_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_page_size_basic.test
@@ -1 +1,16 @@
---echo XtraDB extension
+#
+# Basic test for innodb_page_size
+#
+
+-- source include/have_innodb.inc
+
+# Check the default value
+SET @orig = @@global.innodb_page_size;
+--replace_result 65536 {valid_page_size} 32768 {valid_page_size} 16384 {valid_page_size} 8192 {valid_page_size} 4096 {valid_page_size}
+SELECT @orig;
+
+# Confirm that we can not change the value
+-- error ER_INCORRECT_GLOBAL_LOCAL_VAR
+SET GLOBAL innodb_page_size = 4k;
+-- error ER_INCORRECT_GLOBAL_LOCAL_VAR
+SET GLOBAL innodb_page_size = 8k;
diff --git a/mysql-test/suite/sys_vars/t/innodb_print_all_deadlocks_basic.test b/mysql-test/suite/sys_vars/t/innodb_print_all_deadlocks_basic.test
index 4cbd7062108..5693a829373 100644
--- a/mysql-test/suite/sys_vars/t/innodb_print_all_deadlocks_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_print_all_deadlocks_basic.test
@@ -17,38 +17,48 @@ SELECT @@global.innodb_print_all_deadlocks;
 SELECT @@session.innodb_print_all_deadlocks;
 SHOW global variables LIKE 'innodb_print_all_deadlocks';
 SHOW session variables LIKE 'innodb_print_all_deadlocks';
+--disable_warnings
 SELECT * FROM information_schema.global_variables 
 WHERE variable_name='innodb_print_all_deadlocks';
 SELECT * FROM information_schema.session_variables 
 WHERE variable_name='innodb_print_all_deadlocks';
+--enable_warnings
 
 #
 # SHOW that it's writable
 #
 SET global innodb_print_all_deadlocks='OFF';
 SELECT @@global.innodb_print_all_deadlocks;
+--disable_warnings
 SELECT * FROM information_schema.global_variables 
 WHERE variable_name='innodb_print_all_deadlocks';
 SELECT * FROM information_schema.session_variables 
 WHERE variable_name='innodb_print_all_deadlocks';
+--enable_warnings
 SET @@global.innodb_print_all_deadlocks=1;
 SELECT @@global.innodb_print_all_deadlocks;
+--disable_warnings
 SELECT * FROM information_schema.global_variables 
 WHERE variable_name='innodb_print_all_deadlocks';
 SELECT * FROM information_schema.session_variables 
 WHERE variable_name='innodb_print_all_deadlocks';
+--enable_warnings
 SET global innodb_print_all_deadlocks=0;
 SELECT @@global.innodb_print_all_deadlocks;
+--disable_warnings
 SELECT * FROM information_schema.global_variables 
 WHERE variable_name='innodb_print_all_deadlocks';
 SELECT * FROM information_schema.session_variables 
 WHERE variable_name='innodb_print_all_deadlocks';
+--enable_warnings
 SET @@global.innodb_print_all_deadlocks='ON';
 SELECT @@global.innodb_print_all_deadlocks;
+--disable_warnings
 SELECT * FROM information_schema.global_variables 
 WHERE variable_name='innodb_print_all_deadlocks';
 SELECT * FROM information_schema.session_variables 
 WHERE variable_name='innodb_print_all_deadlocks';
+--enable_warnings
 --error ER_GLOBAL_VARIABLE
 SET session innodb_print_all_deadlocks='OFF';
 --error ER_GLOBAL_VARIABLE
@@ -66,10 +76,12 @@ SET global innodb_print_all_deadlocks=2;
 --error ER_WRONG_VALUE_FOR_VAR
 SET global innodb_print_all_deadlocks=-3;
 SELECT @@global.innodb_print_all_deadlocks;
+--disable_warnings
 SELECT * FROM information_schema.global_variables 
 WHERE variable_name='innodb_print_all_deadlocks';
 SELECT * FROM information_schema.session_variables 
 WHERE variable_name='innodb_print_all_deadlocks';
+--enable_warnings
 --error ER_WRONG_VALUE_FOR_VAR
 SET global innodb_print_all_deadlocks='AUTO';
 
diff --git a/mysql-test/suite/sys_vars/t/innodb_purge_batch_size_basic.test b/mysql-test/suite/sys_vars/t/innodb_purge_batch_size_basic.test
index 88271d26965..4f3dc9f364b 100644
--- a/mysql-test/suite/sys_vars/t/innodb_purge_batch_size_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_purge_batch_size_basic.test
@@ -72,7 +72,7 @@ SELECT @@global.innodb_purge_batch_size;
 
 SET @@global.innodb_purge_batch_size = 5000;
 SELECT @@global.innodb_purge_batch_size;
-SET @@global.innodb_purge_batch_size = 1000;
+SET @@global.innodb_purge_batch_size = 4294967295;
 SELECT @@global.innodb_purge_batch_size;
 
 --echo '#--------------------FN_DYNVARS_046_04-------------------------#'
@@ -90,8 +90,17 @@ SELECT @@global.innodb_purge_batch_size;
 --Error ER_WRONG_TYPE_FOR_VAR
 SET @@global.innodb_purge_batch_size = "Y";
 SELECT @@global.innodb_purge_batch_size;
+--Error ER_WRONG_TYPE_FOR_VAR
+SET @@global.innodb_purge_batch_size = ' ';
+SELECT @@global.innodb_purge_batch_size;
+--Error ER_WRONG_TYPE_FOR_VAR
+SET @@global.innodb_purge_batch_size = " ";
+SELECT @@global.innodb_purge_batch_size;
+--Error ER_WRONG_TYPE_FOR_VAR
+SET @@global.innodb_purge_batch_size = 1.1;
+SELECT @@global.innodb_purge_batch_size;
 
-SET @@global.innodb_purge_batch_size = 5001;
+SET @@global.innodb_purge_batch_size = 4294967297;
 SELECT @@global.innodb_purge_batch_size;
 
 --echo '#----------------------FN_DYNVARS_046_05------------------------#'
@@ -99,12 +108,16 @@ SELECT @@global.innodb_purge_batch_size;
 #     Check if the value in GLOBAL Table matches value in variable      #
 #########################################################################
 
+--disable_warnings
 SELECT @@global.innodb_purge_batch_size =
  VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
   WHERE VARIABLE_NAME='innodb_purge_batch_size';
+--enable_warnings
 SELECT @@global.innodb_purge_batch_size;
+--disable_warnings
 SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
  WHERE VARIABLE_NAME='innodb_purge_batch_size';
+--enable_warnings
 
 --echo '#---------------------FN_DYNVARS_046_06-------------------------#'
 ################################################################### 
diff --git a/mysql-test/suite/sys_vars/t/innodb_purge_rseg_truncate_frequency_basic.test b/mysql-test/suite/sys_vars/t/innodb_purge_rseg_truncate_frequency_basic.test
new file mode 100644
index 00000000000..e0871ba4ab5
--- /dev/null
+++ b/mysql-test/suite/sys_vars/t/innodb_purge_rseg_truncate_frequency_basic.test
@@ -0,0 +1,161 @@
+###### mysql-test\t\innodb_purge_rseg_truncate_frequency_basic.test ###########
+#                                                                             #
+# Variable Name: innodb_purge_rseg_truncate_frequency                         #
+# Scope: GLOBAL                                                               #
+# Access Type: Dynamic                                                        #
+# Data Type: Numeric                                                          #
+# Default Value: 128                                                          #
+# Range: 1 - 128                                                              #
+#                                                                             #
+#                                                                             #
+# Creation Date: 2014-27-05                                                   #
+# Author:  Krunal Bauskar                                                     #
+#                                                                             #
+#Description:Test Cases of Dynamic System Variable                            #
+#             innodb_purge_rseg_truncate_frequency                            #
+#             that checks the behavior of this variable in the following ways #
+#              * Default Value                                                #
+#              * Valid & Invalid values                                       #
+#              * Scope & Access method                                        #
+#              * Data Integrity                                               #
+#                                                                             #
+# Reference: http://dev.mysql.com/doc/refman/5.1/en/                          #
+#  server-system-variables.html                                               #
+#                                                                             #
+###############################################################################
+
+--source include/have_innodb.inc
+--source include/load_sysvars.inc
+
+########################################################################
+#           START OF innodb_purge_rseg_truncate_frequency TESTS               #
+########################################################################
+
+###############################################################################
+#   Saving initial value of innodb_purge_rseg_truncate_frequency in a         #
+#   temporary variable                                                        #
+###############################################################################
+
+SET @global_start_value = @@global.innodb_purge_rseg_truncate_frequency;
+SELECT @global_start_value;
+
+--echo '#--------------------FN_DYNVARS_046_01------------------------#'
+########################################################################
+#    Display the DEFAULT value of innodb_purge_rseg_truncate_frequency #
+########################################################################
+
+SET @@global.innodb_purge_rseg_truncate_frequency = 1;
+SET @@global.innodb_purge_rseg_truncate_frequency = DEFAULT;
+SELECT @@global.innodb_purge_rseg_truncate_frequency;
+
+--echo '#---------------------FN_DYNVARS_046_02-------------------------#'
+############################################################################
+#   Check if innodb_purge_rseg_truncate_frequency can be accessed with and #
+#   without @@ sign                                                        #
+############################################################################
+
+--Error ER_GLOBAL_VARIABLE
+SET innodb_purge_rseg_truncate_frequency = 1;
+SELECT @@innodb_purge_rseg_truncate_frequency;
+
+--Error ER_UNKNOWN_TABLE
+SELECT local.innodb_purge_rseg_truncate_frequency;
+
+SET global innodb_purge_rseg_truncate_frequency = 1;
+SELECT @@global.innodb_purge_rseg_truncate_frequency;
+
+--echo '#--------------------FN_DYNVARS_046_03------------------------#'
+##########################################################################
+# change the value of innodb_purge_rseg_truncate_frequency to a valid    #
+# value                                                                  #
+##########################################################################
+
+SET @@global.innodb_purge_rseg_truncate_frequency = 1;
+SELECT @@global.innodb_purge_rseg_truncate_frequency;
+
+SET @@global.innodb_purge_rseg_truncate_frequency = 1;
+SELECT @@global.innodb_purge_rseg_truncate_frequency;
+
+SET @@global.innodb_purge_rseg_truncate_frequency = 128;
+SELECT @@global.innodb_purge_rseg_truncate_frequency;
+
+--echo '#--------------------FN_DYNVARS_046_05-------------------------#'
+###########################################################################
+#       Change the value of innodb_purge_rseg_truncate_frequency to       #
+#       invalid value                                                     #
+###########################################################################
+
+SET @@global.innodb_purge_rseg_truncate_frequency = -1;
+SELECT @@global.innodb_purge_rseg_truncate_frequency;
+
+SET @@global.innodb_purge_rseg_truncate_frequency = -1024;
+SELECT @@global.innodb_purge_rseg_truncate_frequency;
+
+--Error ER_WRONG_TYPE_FOR_VAR
+SET @@global.innodb_purge_rseg_truncate_frequency = "T";
+SELECT @@global.innodb_purge_rseg_truncate_frequency;
+
+--Error ER_WRONG_TYPE_FOR_VAR
+SET @@global.innodb_purge_rseg_truncate_frequency = "Y";
+SELECT @@global.innodb_purge_rseg_truncate_frequency;
+
+--Error ER_WRONG_TYPE_FOR_VAR
+SET @@global.innodb_purge_rseg_truncate_frequency = 1.1;
+SELECT @@global.innodb_purge_rseg_truncate_frequency;
+
+--Error ER_WRONG_TYPE_FOR_VAR
+SET @@global.innodb_purge_rseg_truncate_frequency = ' ';
+SELECT @@global.innodb_purge_rseg_truncate_frequency;
+
+--Error ER_WRONG_TYPE_FOR_VAR
+SET @@global.innodb_purge_rseg_truncate_frequency = " ";
+SELECT @@global.innodb_purge_rseg_truncate_frequency;
+
+--echo '#----------------------FN_DYNVARS_046_06------------------------#'
+#########################################################################
+#     Check if the value in GLOBAL Table matches value in variable      #
+#########################################################################
+
+--disable_warnings
+SELECT @@global.innodb_purge_rseg_truncate_frequency =
+ VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
+  WHERE VARIABLE_NAME='innodb_purge_rseg_truncate_frequency';
+SELECT @@global.innodb_purge_rseg_truncate_frequency;
+SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
+ WHERE VARIABLE_NAME='innodb_purge_rseg_truncate_frequency';
+--enable_warnings
+
+--echo '#---------------------FN_DYNVARS_046_07-------------------------#'
+###################################################################
+#        Check if ON and OFF values can be used on variable       #
+###################################################################
+
+--ERROR ER_WRONG_TYPE_FOR_VAR
+SET @@global.innodb_purge_rseg_truncate_frequency = OFF;
+SELECT @@global.innodb_purge_rseg_truncate_frequency;
+
+--ERROR ER_WRONG_TYPE_FOR_VAR
+SET @@global.innodb_purge_rseg_truncate_frequency = ON;
+SELECT @@global.innodb_purge_rseg_truncate_frequency;
+
+--echo '#---------------------FN_DYNVARS_046_08----------------------#'
+###################################################################
+#      Check if TRUE and FALSE values can be used on variable     #
+###################################################################
+
+SET @@global.innodb_purge_rseg_truncate_frequency = TRUE;
+SELECT @@global.innodb_purge_rseg_truncate_frequency;
+SET @@global.innodb_purge_rseg_truncate_frequency = FALSE;
+SELECT @@global.innodb_purge_rseg_truncate_frequency;
+
+##############################
+#   Restore initial value    #
+##############################
+
+
+SET @@global.innodb_purge_rseg_truncate_frequency = @global_start_value;
+SELECT @@global.innodb_purge_rseg_truncate_frequency;
+
+###############################################################
+#    END OF innodb_purge_rseg_truncate_frequency TESTS       #
+###############################################################
diff --git a/mysql-test/suite/sys_vars/t/innodb_purge_threads_basic.test b/mysql-test/suite/sys_vars/t/innodb_purge_threads_basic.test
index 64d834c6344..4d039601e40 100644
--- a/mysql-test/suite/sys_vars/t/innodb_purge_threads_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_purge_threads_basic.test
@@ -1,89 +1,41 @@
-
-
-################## mysql-test\t\innodb_log_purge_threads_basic.test ###########
-#                                                                             #
-# Variable Name: innodb_purge_threads                                         #
-# Scope: Global                                                               #
-# Access Type: Static                                                         #
-# Data Type: numeric                                                          #
-#                                                                             #
-#                                                                             #
-# Creation Date: 2008-02-07                                                   #
-# Author : Sharique Abdullah                                                      #
-#                                                                             #
-#                                                                             #
-# Description:Test Cases of Dynamic System Variable innodb_purge_threads      #
-#             that checks the behavior of this variable in the following ways #
-#              * Value Check                                                  #
-#              * Scope Check                                                  #
-#                                                                             #
-# Reference: http://dev.mysql.com/doc/refman/5.1/en/                          #
-#  server-system-variables.html                                               #
-#                                                                             #
-###############################################################################
+# Variable name: innodb_purge_threads
+# Scope: Global
+# Access type: Static
+# Data type: numeric
 
 --source include/have_innodb.inc
 
---echo '#---------------------BS_STVARS_035_01----------------------#'
-####################################################################
-#   Displaying default value                                       #
-####################################################################
 SELECT COUNT(@@GLOBAL.innodb_purge_threads);
 --echo 1 Expected
 
-
---echo '#---------------------BS_STVARS_035_02----------------------#'
-####################################################################
-#   Check if Value can set                                         #
-####################################################################
+SELECT COUNT(@@innodb_purge_threads);
+--echo 1 Expected
 
 --error ER_INCORRECT_GLOBAL_LOCAL_VAR
 SET @@GLOBAL.innodb_purge_threads=1;
---echo Expected error 'Read only variable'
+--echo Expected error 'Read-only variable'
 
-SELECT COUNT(@@GLOBAL.innodb_purge_threads);
---echo 1 Expected
-
-
-
-
---echo '#---------------------BS_STVARS_035_03----------------------#'
-#################################################################
-# Check if the value in GLOBAL Table matches value in variable  #
-#################################################################
+--Error ER_BAD_FIELD_ERROR
+SELECT innodb_purge_threads = @@SESSION.innodb_purge_threads;
+--echo Expected error 'Read-only variable'
 
+--disable_warnings
 SELECT @@GLOBAL.innodb_purge_threads = VARIABLE_VALUE
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
 WHERE VARIABLE_NAME='innodb_purge_threads';
+--enable_warnings
 --echo 1 Expected
 
-SELECT COUNT(@@GLOBAL.innodb_purge_threads);
---echo 1 Expected
-
+--disable_warnings
 SELECT COUNT(VARIABLE_VALUE)
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES 
 WHERE VARIABLE_NAME='innodb_purge_threads';
+--enable_warnings
 --echo 1 Expected
 
-
-
---echo '#---------------------BS_STVARS_035_04----------------------#'
-################################################################################
-#  Check if accessing variable with and without GLOBAL point to same variable  #
-################################################################################
 SELECT @@innodb_purge_threads = @@GLOBAL.innodb_purge_threads;
 --echo 1 Expected
 
-
-
---echo '#---------------------BS_STVARS_035_05----------------------#'
-################################################################################
-#   Check if innodb_purge_threads can be accessed with and without @@ sign     #
-################################################################################
-
-SELECT COUNT(@@innodb_purge_threads);
---echo 1 Expected
-
 --Error ER_INCORRECT_GLOBAL_LOCAL_VAR
 SELECT COUNT(@@local.innodb_purge_threads);
 --echo Expected error 'Variable is a GLOBAL variable'
@@ -92,11 +44,10 @@ SELECT COUNT(@@local.innodb_purge_threads);
 SELECT COUNT(@@SESSION.innodb_purge_threads);
 --echo Expected error 'Variable is a GLOBAL variable'
 
-SELECT COUNT(@@GLOBAL.innodb_purge_threads);
---echo 1 Expected
-
---Error ER_BAD_FIELD_ERROR
-SELECT innodb_purge_threads = @@SESSION.innodb_purge_threads;
---echo Expected error 'Readonly variable'
-
+# Check the default value
+--disable_warnings
+SELECT VARIABLE_NAME, VARIABLE_VALUE
+FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
+WHERE VARIABLE_NAME = 'innodb_purge_threads';
+--enable_warnings
 
diff --git a/mysql-test/suite/sys_vars/t/innodb_random_read_ahead_basic.test b/mysql-test/suite/sys_vars/t/innodb_random_read_ahead_basic.test
index b7ba6f36b15..a805fc80314 100644
--- a/mysql-test/suite/sys_vars/t/innodb_random_read_ahead_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_random_read_ahead_basic.test
@@ -18,28 +18,38 @@ select @@global.innodb_random_read_ahead;
 select @@session.innodb_random_read_ahead;
 show global variables like 'innodb_random_read_ahead';
 show session variables like 'innodb_random_read_ahead';
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_random_read_ahead';
 select * from information_schema.session_variables where variable_name='innodb_random_read_ahead';
+--enable_warnings
 
 #
 # show that it's writable
 #
 set global innodb_random_read_ahead='ON';
 select @@global.innodb_random_read_ahead;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_random_read_ahead';
 select * from information_schema.session_variables where variable_name='innodb_random_read_ahead';
+--enable_warnings
 set @@global.innodb_random_read_ahead=0;
 select @@global.innodb_random_read_ahead;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_random_read_ahead';
 select * from information_schema.session_variables where variable_name='innodb_random_read_ahead';
+--enable_warnings
 set global innodb_random_read_ahead=1;
 select @@global.innodb_random_read_ahead;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_random_read_ahead';
 select * from information_schema.session_variables where variable_name='innodb_random_read_ahead';
+--enable_warnings
 set @@global.innodb_random_read_ahead='OFF';
 select @@global.innodb_random_read_ahead;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_random_read_ahead';
 select * from information_schema.session_variables where variable_name='innodb_random_read_ahead';
+--enable_warnings
 --error ER_GLOBAL_VARIABLE
 set session innodb_random_read_ahead='OFF';
 --error ER_GLOBAL_VARIABLE
@@ -57,8 +67,10 @@ set global innodb_random_read_ahead=2;
 --error ER_WRONG_VALUE_FOR_VAR
 set global innodb_random_read_ahead=-3;
 select @@global.innodb_random_read_ahead;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_random_read_ahead';
 select * from information_schema.session_variables where variable_name='innodb_random_read_ahead';
+--enable_warnings
 --error ER_WRONG_VALUE_FOR_VAR
 set global innodb_random_read_ahead='AUTO';
 
diff --git a/mysql-test/suite/sys_vars/t/innodb_read_ahead_threshold_basic.test b/mysql-test/suite/sys_vars/t/innodb_read_ahead_threshold_basic.test
index 1298a28b3d3..65bb9c03115 100644
--- a/mysql-test/suite/sys_vars/t/innodb_read_ahead_threshold_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_read_ahead_threshold_basic.test
@@ -18,18 +18,27 @@ select @@global.innodb_read_ahead_threshold;
 select @@session.innodb_read_ahead_threshold;
 show global variables like 'innodb_read_ahead_threshold';
 show session variables like 'innodb_read_ahead_threshold';
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_read_ahead_threshold';
 select * from information_schema.session_variables where variable_name='innodb_read_ahead_threshold';
+--enable_warnings
 
 #
 # show that it's writable
 #
 set global innodb_read_ahead_threshold=10;
 select @@global.innodb_read_ahead_threshold;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_read_ahead_threshold';
 select * from information_schema.session_variables where variable_name='innodb_read_ahead_threshold';
+--enable_warnings
 --error ER_GLOBAL_VARIABLE
 set session innodb_read_ahead_threshold=1;
+#
+# check the default value
+#
+set global innodb_read_ahead_threshold=DEFAULT;
+select @@global.innodb_read_ahead_threshold;
 
 #
 # incorrect types
@@ -40,13 +49,23 @@ set global innodb_read_ahead_threshold=1.1;
 set global innodb_read_ahead_threshold=1e1;
 --error ER_WRONG_TYPE_FOR_VAR
 set global innodb_read_ahead_threshold="foo";
+--error ER_WRONG_TYPE_FOR_VAR
+set global innodb_read_ahead_threshold=' ';
+select @@global.innodb_read_ahead_threshold;
+--error ER_WRONG_TYPE_FOR_VAR
+set global innodb_read_ahead_threshold=" ";
+select @@global.innodb_read_ahead_threshold;
 
 set global innodb_read_ahead_threshold=-7;
 select @@global.innodb_read_ahead_threshold;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_read_ahead_threshold';
+--enable_warnings
 set global innodb_read_ahead_threshold=96;
 select @@global.innodb_read_ahead_threshold;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_read_ahead_threshold';
+--enable_warnings
 
 #
 # min/max values
diff --git a/mysql-test/suite/sys_vars/t/innodb_read_io_threads_basic.test b/mysql-test/suite/sys_vars/t/innodb_read_io_threads_basic.test
index 14426395d6c..c4c49d5bb20 100644
--- a/mysql-test/suite/sys_vars/t/innodb_read_io_threads_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_read_io_threads_basic.test
@@ -13,8 +13,10 @@ select @@global.innodb_read_io_threads;
 select @@session.innodb_read_io_threads;
 show global variables like 'innodb_read_io_threads';
 show session variables like 'innodb_read_io_threads';
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_read_io_threads';
 select * from information_schema.session_variables where variable_name='innodb_read_io_threads';
+--enable_warnings
 
 #
 # show that it's read-only
diff --git a/mysql-test/suite/sys_vars/t/innodb_read_only_basic.test b/mysql-test/suite/sys_vars/t/innodb_read_only_basic.test
index 581eb3538b8..31cbe779ef7 100644
--- a/mysql-test/suite/sys_vars/t/innodb_read_only_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_read_only_basic.test
@@ -9,8 +9,10 @@ select @@global.innodb_read_only;
 select @@session.innodb_read_only;
 show global variables like 'innodb_read_only';
 show session variables like 'innodb_read_only';
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_read_only';
 select * from information_schema.session_variables where variable_name='innodb_read_only';
+--enable_warnings
 
 # Show that it's read-only
 --error ER_INCORRECT_GLOBAL_LOCAL_VAR
diff --git a/mysql-test/suite/sys_vars/t/innodb_replication_delay_basic.test b/mysql-test/suite/sys_vars/t/innodb_replication_delay_basic.test
index e495de46611..c85cc85e78c 100644
--- a/mysql-test/suite/sys_vars/t/innodb_replication_delay_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_replication_delay_basic.test
@@ -18,19 +18,40 @@ select @@global.innodb_replication_delay;
 select @@session.innodb_replication_delay;
 show global variables like 'innodb_replication_delay';
 show session variables like 'innodb_replication_delay';
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_replication_delay';
 select * from information_schema.session_variables where variable_name='innodb_replication_delay';
+--enable_warnings
 
 #
 # show that it's writable
 #
 set global innodb_replication_delay=10;
 select @@global.innodb_replication_delay;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_replication_delay';
 select * from information_schema.session_variables where variable_name='innodb_replication_delay';
+--enable_warnings
 --error ER_GLOBAL_VARIABLE
 set session innodb_replication_delay=1;
 
+#
+# check the default value 
+#
+set global innodb_replication_delay=DEFAULT;
+select @@global.innodb_replication_delay;
+
+#
+# valid values
+#
+
+set global innodb_replication_delay=0;
+select @@global.innodb_replication_delay;
+set global innodb_replication_delay=65535;
+select @@global.innodb_replication_delay;
+set global innodb_replication_delay=4294967295;
+select @@global.innodb_replication_delay;
+
 #
 # incorrect types
 #
@@ -40,10 +61,39 @@ set global innodb_replication_delay=1.1;
 set global innodb_replication_delay=1e1;
 --error ER_WRONG_TYPE_FOR_VAR
 set global innodb_replication_delay="foo";
-
+--error ER_WRONG_TYPE_FOR_VAR
+set global innodb_replication_delay=' ';
+select @@global.innodb_replication_delay;
+--error ER_WRONG_TYPE_FOR_VAR
+set global innodb_replication_delay=" ";
+select @@global.innodb_replication_delay;
 set global innodb_replication_delay=-7;
 select @@global.innodb_replication_delay;
+set global innodb_replication_delay=-1024;
+select @@global.innodb_replication_delay;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_replication_delay';
+--enable_warnings
+
+#
+# Check for out of bounds
+#
+
+# With a 64 bit mysqld:18446744073709551615,with a 32 bit mysqld: 4294967295
+--disable_warnings
+SET @@global.innodb_replication_delay = 4294967296;
+--enable_warnings
+SELECT @@global.innodb_replication_delay IN (4294967296,4294967295);
+
+--disable_warnings
+SET @@global.innodb_replication_delay = 12345678901;
+--enable_warnings
+SELECT @@global.innodb_replication_delay IN (12345678901,4294967295);
+
+--disable_warnings
+SET @@global.innodb_replication_delay = 18446744073709551615;
+--enable_warnings
+SELECT @@global.innodb_replication_delay IN (18446744073709551615,4294967295);
 
 #
 # cleanup
diff --git a/mysql-test/suite/sys_vars/t/innodb_rollback_on_timeout_basic.test b/mysql-test/suite/sys_vars/t/innodb_rollback_on_timeout_basic.test
index 81025bb9d73..2aee2e25db7 100644
--- a/mysql-test/suite/sys_vars/t/innodb_rollback_on_timeout_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_rollback_on_timeout_basic.test
@@ -52,17 +52,21 @@ SELECT COUNT(@@GLOBAL.innodb_rollback_on_timeout);
 # Check if the value in GLOBAL Table matches value in variable  #
 #################################################################
 
+--disable_warnings
 SELECT IF(@@GLOBAL.innodb_rollback_on_timeout, "ON", "OFF") = VARIABLE_VALUE
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
 WHERE VARIABLE_NAME='innodb_rollback_on_timeout';
+--enable_warnings
 --echo 1 Expected
 
 SELECT COUNT(@@GLOBAL.innodb_rollback_on_timeout);
 --echo 1 Expected
 
+--disable_warnings
 SELECT COUNT(VARIABLE_VALUE)
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES 
 WHERE VARIABLE_NAME='innodb_rollback_on_timeout';
+--enable_warnings
 --echo 1 Expected
 
 
diff --git a/mysql-test/suite/sys_vars/t/innodb_rollback_segments_basic.test b/mysql-test/suite/sys_vars/t/innodb_rollback_segments_basic.test
index 9f0b70a528f..33223d4c064 100644
--- a/mysql-test/suite/sys_vars/t/innodb_rollback_segments_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_rollback_segments_basic.test
@@ -19,20 +19,24 @@ SELECT @@global.innodb_rollback_segments;
 SELECT @@session.innodb_rollback_segments;
 SHOW global variables LIKE 'innodb_rollback_segments';
 SHOW session variables LIKE 'innodb_rollback_segments';
+--disable_warnings
 SELECT * FROM information_schema.global_variables 
 WHERE variable_name='innodb_rollback_segments';
 SELECT * FROM information_schema.session_variables 
 WHERE variable_name='innodb_rollback_segments';
+--enable_warnings
 
 #
 # show that it's writable
 #
 SET global innodb_rollback_segments=100;
 SELECT @@global.innodb_rollback_segments;
+--disable_warnings
 SELECT * FROM information_schema.global_variables 
 WHERE variable_name='innodb_rollback_segments';
 SELECT * FROM information_schema.session_variables 
 WHERE variable_name='innodb_rollback_segments';
+--enable_warnings
 --error ER_GLOBAL_VARIABLE
 SET session innodb_rollback_segments=1;
 
@@ -47,8 +51,10 @@ SET global innodb_rollback_segments=1e1;
 SET global innodb_rollback_segments="foo";
 SET global innodb_rollback_segments=-7;
 SELECT @@global.innodb_rollback_segments;
+--disable_warnings
 SELECT * FROM information_schema.global_variables 
 WHERE variable_name='innodb_rollback_segments';
+--enable_warnings
 
 #
 # cleanup
diff --git a/mysql-test/suite/sys_vars/t/innodb_simulate_comp_failures_basic.test b/mysql-test/suite/sys_vars/t/innodb_simulate_comp_failures_basic.test
index 97e69e3f324..07e70bf7343 100644
--- a/mysql-test/suite/sys_vars/t/innodb_simulate_comp_failures_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_simulate_comp_failures_basic.test
@@ -1,4 +1,5 @@
 --source include/have_innodb.inc
+--source include/have_debug.inc
 
 SET @start_global_value = @@global.innodb_simulate_comp_failures;
 SELECT @start_global_value;
diff --git a/mysql-test/suite/sys_vars/t/innodb_sort_buffer_size_basic.test b/mysql-test/suite/sys_vars/t/innodb_sort_buffer_size_basic.test
index 920c992c1f9..49318c00661 100644
--- a/mysql-test/suite/sys_vars/t/innodb_sort_buffer_size_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_sort_buffer_size_basic.test
@@ -13,8 +13,10 @@ select @@global.innodb_sort_buffer_size;
 select @@session.innodb_sort_buffer_size;
 show global variables like 'innodb_sort_buffer_size';
 show session variables like 'innodb_sort_buffer_size';
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_sort_buffer_size';
 select * from information_schema.session_variables where variable_name='innodb_sort_buffer_size';
+--enable_warnings
 
 #
 # show that it's read-only
diff --git a/mysql-test/suite/sys_vars/t/innodb_spin_wait_delay_basic.test b/mysql-test/suite/sys_vars/t/innodb_spin_wait_delay_basic.test
index 8f2eee08b6a..ab0b38bb6ce 100644
--- a/mysql-test/suite/sys_vars/t/innodb_spin_wait_delay_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_spin_wait_delay_basic.test
@@ -18,19 +18,39 @@ select @@global.innodb_spin_wait_delay;
 select @@session.innodb_spin_wait_delay;
 show global variables like 'innodb_spin_wait_delay';
 show session variables like 'innodb_spin_wait_delay';
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_spin_wait_delay';
 select * from information_schema.session_variables where variable_name='innodb_spin_wait_delay';
+--enable_warnings
 
 #
 # show that it's writable
 #
 set global innodb_spin_wait_delay=10;
 select @@global.innodb_spin_wait_delay;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_spin_wait_delay';
 select * from information_schema.session_variables where variable_name='innodb_spin_wait_delay';
+--enable_warnings
 --error ER_GLOBAL_VARIABLE
 set session innodb_spin_wait_delay=1;
 
+#
+# check the default value
+#
+set global innodb_spin_wait_delay=DEFAULT;
+select @@global.innodb_spin_wait_delay;
+
+#
+# valid values
+#
+set global innodb_spin_wait_delay=0;
+select @@global.innodb_spin_wait_delay;
+set global innodb_spin_wait_delay=65535;
+select @@global.innodb_spin_wait_delay;
+set global innodb_spin_wait_delay=4294967295;
+select @@global.innodb_spin_wait_delay;
+
 #
 # incorrect types
 #
@@ -40,10 +60,39 @@ set global innodb_spin_wait_delay=1.1;
 set global innodb_spin_wait_delay=1e1;
 --error ER_WRONG_TYPE_FOR_VAR
 set global innodb_spin_wait_delay="foo";
-
+--error ER_WRONG_TYPE_FOR_VAR
+set global innodb_spin_wait_delay=' ';
+select @@global.innodb_spin_wait_delay;
+--error ER_WRONG_TYPE_FOR_VAR
+set global innodb_spin_wait_delay=" ";
+select @@global.innodb_spin_wait_delay;
 set global innodb_spin_wait_delay=-7;
 select @@global.innodb_spin_wait_delay;
+set global innodb_spin_wait_delay=-1024;
+select @@global.innodb_spin_wait_delay;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_spin_wait_delay';
+--enable_warnings
+
+#
+# Check for out of bounds
+#
+
+# With a 64 bit mysqld:18446744073709551615,with a 32 bit mysqld: 4294967295
+--disable_warnings
+SET @@global.innodb_spin_wait_delay = 4294967296;
+--enable_warnings
+SELECT @@global.innodb_spin_wait_delay IN (4294967296,4294967295);
+
+--disable_warnings
+SET @@global.innodb_spin_wait_delay = 12345678901;
+--enable_warnings
+SELECT @@global.innodb_spin_wait_delay IN (12345678901,4294967295);
+
+--disable_warnings
+SET @@global.innodb_spin_wait_delay = 18446744073709551615;
+--enable_warnings
+SELECT @@global.innodb_spin_wait_delay IN (18446744073709551615,4294967295);
 
 #
 # cleanup
diff --git a/mysql-test/suite/sys_vars/t/innodb_stats_method_basic.test b/mysql-test/suite/sys_vars/t/innodb_stats_method_basic.test
index f01574c3683..77288dfb130 100644
--- a/mysql-test/suite/sys_vars/t/innodb_stats_method_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_stats_method_basic.test
@@ -18,32 +18,40 @@ SELECT @@global.innodb_stats_method;
 SELECT @@session.innodb_stats_method;
 SHOW global variables LIKE 'innodb_stats_method';
 SHOW session variables LIKE 'innodb_stats_method';
+--disable_warnings
 SELECT * FROM information_schema.global_variables 
 WHERE variable_name='innodb_stats_method';
 SELECT * FROM information_schema.session_variables 
 WHERE variable_name='innodb_stats_method';
+--enable_warnings
 
 #
 # show that it's writable
 #
 SET global innodb_stats_method='nulls_equal';
 SELECT @@global.innodb_stats_method;
+--disable_warnings
 SELECT * FROM information_schema.global_variables 
 WHERE variable_name='innodb_stats_method';
 SELECT * FROM information_schema.session_variables 
 WHERE variable_name='innodb_stats_method';
+--enable_warnings
 SET @@global.innodb_stats_method='nulls_unequal';
 SELECT @@global.innodb_stats_method;
+--disable_warnings
 SELECT * FROM information_schema.global_variables 
 WHERE variable_name='innodb_stats_method';
 SELECT * FROM information_schema.session_variables 
 WHERE variable_name='innodb_stats_method';
+--enable_warnings
 SET global innodb_stats_method=2;
 SELECT @@global.innodb_stats_method;
+--disable_warnings
 SELECT * FROM information_schema.global_variables 
 WHERE variable_name='innodb_stats_method';
 SELECT * FROM information_schema.session_variables 
 WHERE variable_name='innodb_stats_method';
+--enable_warnings
 
 --error ER_GLOBAL_VARIABLE
 SET session innodb_stats_method='nulls_equal';
diff --git a/mysql-test/suite/sys_vars/t/innodb_stats_on_metadata_basic.test b/mysql-test/suite/sys_vars/t/innodb_stats_on_metadata_basic.test
index 9028ee7f687..a0bccb50652 100644
--- a/mysql-test/suite/sys_vars/t/innodb_stats_on_metadata_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_stats_on_metadata_basic.test
@@ -18,28 +18,38 @@ select @@global.innodb_stats_on_metadata;
 select @@session.innodb_stats_on_metadata;
 show global variables like 'innodb_stats_on_metadata';
 show session variables like 'innodb_stats_on_metadata';
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_stats_on_metadata';
 select * from information_schema.session_variables where variable_name='innodb_stats_on_metadata';
+--enable_warnings
 
 #
 # show that it's writable
 #
 set global innodb_stats_on_metadata='OFF';
 select @@global.innodb_stats_on_metadata;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_stats_on_metadata';
 select * from information_schema.session_variables where variable_name='innodb_stats_on_metadata';
+--enable_warnings
 set @@global.innodb_stats_on_metadata=1;
 select @@global.innodb_stats_on_metadata;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_stats_on_metadata';
 select * from information_schema.session_variables where variable_name='innodb_stats_on_metadata';
+--enable_warnings
 set global innodb_stats_on_metadata=0;
 select @@global.innodb_stats_on_metadata;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_stats_on_metadata';
 select * from information_schema.session_variables where variable_name='innodb_stats_on_metadata';
+--enable_warnings
 set @@global.innodb_stats_on_metadata='ON';
 select @@global.innodb_stats_on_metadata;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_stats_on_metadata';
 select * from information_schema.session_variables where variable_name='innodb_stats_on_metadata';
+--enable_warnings
 --error ER_GLOBAL_VARIABLE
 set session innodb_stats_on_metadata='OFF';
 --error ER_GLOBAL_VARIABLE
@@ -57,8 +67,10 @@ set global innodb_stats_on_metadata=2;
 --error ER_WRONG_VALUE_FOR_VAR
 set global innodb_stats_on_metadata=-3;
 select @@global.innodb_stats_on_metadata;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_stats_on_metadata';
 select * from information_schema.session_variables where variable_name='innodb_stats_on_metadata';
+--enable_warnings
 --error ER_WRONG_VALUE_FOR_VAR
 set global innodb_stats_on_metadata='AUTO';
 
diff --git a/mysql-test/suite/sys_vars/t/innodb_stats_persistent_basic.test b/mysql-test/suite/sys_vars/t/innodb_stats_persistent_basic.test
index 4277b58de00..c5f977321b7 100644
--- a/mysql-test/suite/sys_vars/t/innodb_stats_persistent_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_stats_persistent_basic.test
@@ -28,4 +28,4 @@ SET GLOBAL innodb_stats_persistent=123;
 SET GLOBAL innodb_stats_persistent='foo';
 
 # restore the environment
-SET GLOBAL innodb_stats_persistent=off;
+SET GLOBAL innodb_stats_persistent=OFF;
\ No newline at end of file
diff --git a/mysql-test/suite/sys_vars/t/innodb_stats_persistent_sample_pages_basic.test b/mysql-test/suite/sys_vars/t/innodb_stats_persistent_sample_pages_basic.test
index cf223c02090..5fc62f0a571 100644
--- a/mysql-test/suite/sys_vars/t/innodb_stats_persistent_sample_pages_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_stats_persistent_sample_pages_basic.test
@@ -18,23 +18,45 @@ SELECT @@global.innodb_stats_persistent_sample_pages;
 SELECT @@session.innodb_stats_persistent_sample_pages;
 SHOW global variables LIKE 'innodb_stats_persistent_sample_pages';
 SHOW session variables LIKE 'innodb_stats_persistent_sample_pages';
+--disable_warnings
 SELECT * FROM information_schema.global_variables 
 WHERE variable_name='innodb_stats_persistent_sample_pages';
 SELECT * FROM information_schema.session_variables 
 WHERE variable_name='innodb_stats_persistent_sample_pages';
+--enable_warnings
 
 #
 # SHOW that it's writable
 #
 SET global innodb_stats_persistent_sample_pages=10;
 SELECT @@global.innodb_stats_persistent_sample_pages;
-SELECT * FROM information_schema.global_variables 
+--disable_warnings
+SELECT * FROM information_schema.global_variables
 WHERE variable_name='innodb_stats_persistent_sample_pages';
 SELECT * FROM information_schema.session_variables 
 WHERE variable_name='innodb_stats_persistent_sample_pages';
+--enable_warnings
 --error ER_GLOBAL_VARIABLE
 SET session innodb_stats_persistent_sample_pages=1;
 
+# 
+# show the default value
+#
+set global innodb_stats_persistent_sample_pages=DEFAULT;
+select @@global.innodb_stats_persistent_sample_pages;
+
+#
+# valid values
+#
+SET global innodb_stats_persistent_sample_pages=0;
+SELECT @@global.innodb_stats_persistent_sample_pages;
+
+SET global innodb_stats_persistent_sample_pages=10;
+SELECT @@global.innodb_stats_persistent_sample_pages;
+
+
+
+
 #
 # incorrect types
 #
@@ -44,11 +66,19 @@ SET global innodb_stats_persistent_sample_pages=1.1;
 SET global innodb_stats_persistent_sample_pages=1e1;
 --error ER_WRONG_TYPE_FOR_VAR
 SET global innodb_stats_persistent_sample_pages="foo";
-
+--error ER_WRONG_TYPE_FOR_VAR
+SET global innodb_stats_persistent_sample_pages=' ';
+SELECT @@global.innodb_stats_persistent_sample_pages;
+--error ER_WRONG_TYPE_FOR_VAR
+SET global innodb_stats_persistent_sample_pages=" ";
+SELECT @@global.innodb_stats_persistent_sample_pages;
 SET global innodb_stats_persistent_sample_pages=-7;
 SELECT @@global.innodb_stats_persistent_sample_pages;
+--disable_warnings
 SELECT * FROM information_schema.global_variables 
 WHERE variable_name='innodb_stats_persistent_sample_pages';
+--enable_warnings
+
 
 #
 # cleanup
diff --git a/mysql-test/suite/sys_vars/t/innodb_stats_sample_pages_basic.test b/mysql-test/suite/sys_vars/t/innodb_stats_sample_pages_basic.test
index 2c91f11405d..0e4fcb508ac 100644
--- a/mysql-test/suite/sys_vars/t/innodb_stats_sample_pages_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_stats_sample_pages_basic.test
@@ -18,32 +18,52 @@ select @@global.innodb_stats_sample_pages;
 select @@session.innodb_stats_sample_pages;
 show global variables like 'innodb_stats_sample_pages';
 show session variables like 'innodb_stats_sample_pages';
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_stats_sample_pages';
 select * from information_schema.session_variables where variable_name='innodb_stats_sample_pages';
+--enable_warnings
 
 #
 # show that it's writable
 #
 set global innodb_stats_sample_pages=10;
 select @@global.innodb_stats_sample_pages;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_stats_sample_pages';
 select * from information_schema.session_variables where variable_name='innodb_stats_sample_pages';
+--enable_warnings
 --error ER_GLOBAL_VARIABLE
 set session innodb_stats_sample_pages=1;
 
+# 
+# show the default value
 #
-# incorrect types
+set global innodb_stats_sample_pages=DEFAULT;
+select @@global.innodb_stats_sample_pages;
+
+
+#
+# invalid values
 #
 --error ER_WRONG_TYPE_FOR_VAR
-set global innodb_stats_sample_pages=1.1;
+set global innodb_stats_sample_pages = 1.1;
 --error ER_WRONG_TYPE_FOR_VAR
-set global innodb_stats_sample_pages=1e1;
+set global innodb_stats_sample_pages = 1e1;
 --error ER_WRONG_TYPE_FOR_VAR
-set global innodb_stats_sample_pages="foo";
+set global innodb_stats_sample_pages = "foo";
+--error ER_WRONG_TYPE_FOR_VAR
+set global innodb_stats_sample_pages=' ';
+select @@global.innodb_stats_sample_pages;
+--error ER_WRONG_TYPE_FOR_VAR
+set global innodb_stats_sample_pages=" ";
+select @@global.innodb_stats_sample_pages;
 
 set global innodb_stats_sample_pages=-7;
 select @@global.innodb_stats_sample_pages;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_stats_sample_pages';
+--enable_warnings
+
 
 #
 # cleanup
diff --git a/mysql-test/suite/sys_vars/t/innodb_stats_transient_sample_pages_basic.test b/mysql-test/suite/sys_vars/t/innodb_stats_transient_sample_pages_basic.test
index ff3a50efa1f..897d3de42e0 100644
--- a/mysql-test/suite/sys_vars/t/innodb_stats_transient_sample_pages_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_stats_transient_sample_pages_basic.test
@@ -18,20 +18,30 @@ SELECT @@global.innodb_stats_transient_sample_pages;
 SELECT @@session.innodb_stats_transient_sample_pages;
 SHOW global variables LIKE 'innodb_stats_transient_sample_pages';
 SHOW session variables LIKE 'innodb_stats_transient_sample_pages';
+--disable_warnings
 SELECT * FROM information_schema.global_variables 
 WHERE variable_name='innodb_stats_transient_sample_pages';
 SELECT * FROM information_schema.session_variables 
 WHERE variable_name='innodb_stats_transient_sample_pages';
+--enable_warnings
+
+#
+# show the default value
+#
+set global innodb_stats_transient_sample_pages=DEFAULT;
+select @@global.innodb_stats_transient_sample_pages;
 
 #
 # SHOW that it's writable
 #
 SET global innodb_stats_transient_sample_pages=10;
 SELECT @@global.innodb_stats_transient_sample_pages;
+--disable_warnings
 SELECT * FROM information_schema.global_variables 
 WHERE variable_name='innodb_stats_transient_sample_pages';
 SELECT * FROM information_schema.session_variables 
 WHERE variable_name='innodb_stats_transient_sample_pages';
+--enable_warnings
 --error ER_GLOBAL_VARIABLE
 SET session innodb_stats_transient_sample_pages=1;
 
@@ -44,11 +54,15 @@ SET global innodb_stats_transient_sample_pages=1.1;
 SET global innodb_stats_transient_sample_pages=1e1;
 --error ER_WRONG_TYPE_FOR_VAR
 SET global innodb_stats_transient_sample_pages="foo";
+--error ER_WRONG_TYPE_FOR_VAR
+SET global innodb_stats_transient_sample_pages=' ';
 
 SET global innodb_stats_transient_sample_pages=-7;
 SELECT @@global.innodb_stats_transient_sample_pages;
+--disable_warnings
 SELECT * FROM information_schema.global_variables 
 WHERE variable_name='innodb_stats_transient_sample_pages';
+--enable_warnings
 
 #
 # cleanup
diff --git a/mysql-test/suite/sys_vars/t/innodb_status_output_basic.test b/mysql-test/suite/sys_vars/t/innodb_status_output_basic.test
index 4459632134d..8e33b364c4e 100644
--- a/mysql-test/suite/sys_vars/t/innodb_status_output_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_status_output_basic.test
@@ -13,28 +13,38 @@ select @@global.innodb_status_output;
 select @@session.innodb_status_output;
 show global variables like 'innodb_status_output';
 show session variables like 'innodb_status_output';
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_status_output';
 select * from information_schema.session_variables where variable_name='innodb_status_output';
+--enable_warnings
 
 #
 # show that it's writable
 #
 set global innodb_status_output='OFF';
 select @@global.innodb_status_output;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_status_output';
 select * from information_schema.session_variables where variable_name='innodb_status_output';
+--enable_warnings
 set @@global.innodb_status_output=1;
 select @@global.innodb_status_output;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_status_output';
 select * from information_schema.session_variables where variable_name='innodb_status_output';
+--enable_warnings
 set global innodb_status_output=0;
 select @@global.innodb_status_output;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_status_output';
 select * from information_schema.session_variables where variable_name='innodb_status_output';
+--enable_warnings
 set @@global.innodb_status_output='ON';
 select @@global.innodb_status_output;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_status_output';
 select * from information_schema.session_variables where variable_name='innodb_status_output';
+--enable_warnings
 --error ER_GLOBAL_VARIABLE
 set session innodb_status_output='OFF';
 --error ER_GLOBAL_VARIABLE
@@ -52,12 +62,16 @@ set global innodb_status_output=2;
 --error ER_WRONG_VALUE_FOR_VAR
 set global innodb_status_output=-3;
 select @@global.innodb_status_output;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_status_output';
 select * from information_schema.session_variables where variable_name='innodb_status_output';
+--enable_warnings
 set global innodb_status_output=DEFAULT;
 select @@global.innodb_status_output;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_status_output';
 select * from information_schema.session_variables where variable_name='innodb_status_output';
+--enable_warnings
 --error ER_WRONG_VALUE_FOR_VAR
 set global innodb_status_output='AUTO';
 
diff --git a/mysql-test/suite/sys_vars/t/innodb_status_output_locks_basic.test b/mysql-test/suite/sys_vars/t/innodb_status_output_locks_basic.test
index 92c82b2ddbf..9f510c2feaa 100644
--- a/mysql-test/suite/sys_vars/t/innodb_status_output_locks_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_status_output_locks_basic.test
@@ -13,28 +13,38 @@ select @@global.innodb_status_output_locks;
 select @@session.innodb_status_output_locks;
 show global variables like 'innodb_status_output_locks';
 show session variables like 'innodb_status_output_locks';
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_status_output_locks';
 select * from information_schema.session_variables where variable_name='innodb_status_output_locks';
+--enable_warnings
 
 #
 # show that it's writable
 #
 set global innodb_status_output_locks='OFF';
 select @@global.innodb_status_output_locks;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_status_output_locks';
 select * from information_schema.session_variables where variable_name='innodb_status_output_locks';
+--enable_warnings
 set @@global.innodb_status_output_locks=1;
 select @@global.innodb_status_output_locks;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_status_output_locks';
 select * from information_schema.session_variables where variable_name='innodb_status_output_locks';
+--enable_warnings
 set global innodb_status_output_locks=0;
 select @@global.innodb_status_output_locks;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_status_output_locks';
 select * from information_schema.session_variables where variable_name='innodb_status_output_locks';
+--enable_warnings
 set @@global.innodb_status_output_locks='ON';
 select @@global.innodb_status_output_locks;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_status_output_locks';
 select * from information_schema.session_variables where variable_name='innodb_status_output_locks';
+--enable_warnings
 --error ER_GLOBAL_VARIABLE
 set session innodb_status_output_locks='OFF';
 --error ER_GLOBAL_VARIABLE
@@ -52,12 +62,16 @@ set global innodb_status_output_locks=2;
 --error ER_WRONG_VALUE_FOR_VAR
 set global innodb_status_output_locks=-3;
 select @@global.innodb_status_output_locks;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_status_output_locks';
 select * from information_schema.session_variables where variable_name='innodb_status_output_locks';
+--enable_warnings
 set global innodb_status_output_locks=DEFAULT;
 select @@global.innodb_status_output_locks;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_status_output_locks';
 select * from information_schema.session_variables where variable_name='innodb_status_output_locks';
+--enable_warnings
 --error ER_WRONG_VALUE_FOR_VAR
 set global innodb_status_output_locks='AUTO';
 
diff --git a/mysql-test/suite/sys_vars/t/innodb_strict_mode_basic.test b/mysql-test/suite/sys_vars/t/innodb_strict_mode_basic.test
index 10f8d1ce4e7..243985f95de 100644
--- a/mysql-test/suite/sys_vars/t/innodb_strict_mode_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_strict_mode_basic.test
@@ -18,8 +18,10 @@ select @@session.innodb_strict_mode in (0, 1);
 select @@session.innodb_strict_mode;
 show global variables like 'innodb_strict_mode';
 show session variables like 'innodb_strict_mode';
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_strict_mode';
 select * from information_schema.session_variables where variable_name='innodb_strict_mode';
+--enable_warnings
 
 #
 # show that it's writable
@@ -28,26 +30,34 @@ set global innodb_strict_mode='OFF';
 set session innodb_strict_mode='OFF';
 select @@global.innodb_strict_mode;
 select @@session.innodb_strict_mode;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_strict_mode';
 select * from information_schema.session_variables where variable_name='innodb_strict_mode';
+--enable_warnings
 set @@global.innodb_strict_mode=1;
 set @@session.innodb_strict_mode=1;
 select @@global.innodb_strict_mode;
 select @@session.innodb_strict_mode;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_strict_mode';
 select * from information_schema.session_variables where variable_name='innodb_strict_mode';
+--enable_warnings
 set global innodb_strict_mode=0;
 set session innodb_strict_mode=0;
 select @@global.innodb_strict_mode;
 select @@session.innodb_strict_mode;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_strict_mode';
 select * from information_schema.session_variables where variable_name='innodb_strict_mode';
+--enable_warnings
 set @@global.innodb_strict_mode='ON';
 set @@session.innodb_strict_mode='ON';
 select @@global.innodb_strict_mode;
 select @@session.innodb_strict_mode;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_strict_mode';
 select * from information_schema.session_variables where variable_name='innodb_strict_mode';
+--enable_warnings
 
 #
 # incorrect types
@@ -74,8 +84,10 @@ set global innodb_strict_mode=-3;
 set session innodb_strict_mode=-7;
 select @@global.innodb_strict_mode;
 select @@session.innodb_strict_mode;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_strict_mode';
 select * from information_schema.session_variables where variable_name='innodb_strict_mode';
+--enable_warnings
 
 #
 # Cleanup
diff --git a/mysql-test/suite/sys_vars/t/innodb_support_xa_basic.test b/mysql-test/suite/sys_vars/t/innodb_support_xa_basic.test
index 988b8f01b93..6668d486090 100644
--- a/mysql-test/suite/sys_vars/t/innodb_support_xa_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_support_xa_basic.test
@@ -127,7 +127,7 @@ SET @@session.innodb_support_xa = 
 # for global
 
 
---error ER_WRONG_VALUE_FOR_VAR
+--Error ER_WRONG_VALUE_FOR_VAR
 SET @@global.innodb_support_xa = -1;
 SELECT @@global.innodb_support_xa;
 
@@ -166,12 +166,16 @@ SELECT @@session.innodb_support_xa AS res_is_1;
 #     Check if the value in GLOBAL Table matches value in variable      #
 #########################################################################
 
+--disable_warnings
 SELECT IF(@@global.innodb_support_xa, "ON", "OFF") =
  VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
   WHERE VARIABLE_NAME='innodb_support_xa';
+--enable_warnings
 SELECT @@global.innodb_support_xa;
+--disable_warnings
 SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
  WHERE VARIABLE_NAME='innodb_support_xa';
+--enable_warnings
 
 
 --echo '#----------------------FN_DYNVARS_046_07------------------------#'
@@ -179,12 +183,16 @@ SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
 #     Check if the value in SESSION Table matches value in variable     #
 #########################################################################
 
+--disable_warnings
 SELECT IF(@@session.innodb_support_xa, "ON", "OFF") =
  VARIABLE_VALUE FROM INFORMATION_SCHEMA.SESSION_VARIABLES
   WHERE VARIABLE_NAME='innodb_support_xa';
+--enable_warnings
 SELECT @@session.innodb_support_xa;
+--disable_warnings
 SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.SESSION_VARIABLES
  WHERE VARIABLE_NAME='innodb_support_xa';
+--enable_warnings
 
 
 --echo '#---------------------FN_DYNVARS_046_08-------------------------#'
diff --git a/mysql-test/suite/sys_vars/t/innodb_sync_array_size_basic.test b/mysql-test/suite/sys_vars/t/innodb_sync_array_size_basic.test
index 53011acb576..39ff69affea 100644
--- a/mysql-test/suite/sys_vars/t/innodb_sync_array_size_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_sync_array_size_basic.test
@@ -2,11 +2,6 @@
 
 --source include/have_innodb.inc
 
-if (`select plugin_auth_version <= "5.6.10" from information_schema.plugins where plugin_name='innodb'`)
-{
-  --skip Not fixed in InnoDB 5.6.10 or earlier
-}
-
 # Exists as global only
 #
 --echo Valid values are between 0 and 1024
@@ -14,12 +9,15 @@ SELECT @@global.innodb_sync_array_size between 0 and 1024;
 SELECT @@global.innodb_sync_array_size;
 --error ER_INCORRECT_GLOBAL_LOCAL_VAR
 SELECT @@session.innodb_sync_array_size;
+
 SHOW GLOBAL variables LIKE 'innodb_sync_array_size';
 SHOW SESSION variables LIKE 'innodb_sync_array_size';
+--disable_warnings
 SELECT * FROM information_schema.global_variables 
 WHERE variable_name='innodb_sync_array_size';
 SELECT * FROM information_schema.session_variables 
 WHERE variable_name='innodb_sync_array_size';
+--enable_warnings
 
 #
 # Show that it's read-only
diff --git a/mysql-test/suite/sys_vars/t/innodb_sync_debug_basic.test b/mysql-test/suite/sys_vars/t/innodb_sync_debug_basic.test
new file mode 100644
index 00000000000..665482e6963
--- /dev/null
+++ b/mysql-test/suite/sys_vars/t/innodb_sync_debug_basic.test
@@ -0,0 +1,16 @@
+--echo #
+--echo # Basic test for innodb_sync_debug
+--echo #
+
+--source include/have_innodb.inc
+
+# The config variable is a debug read-only variable
+-- source include/have_debug.inc
+
+SELECT @@global.innodb_sync_debug;
+
+--error ER_INCORRECT_GLOBAL_LOCAL_VAR
+set global innodb_sync_debug = 1;
+
+SELECT @@global.innodb_sync_debug;
+
diff --git a/mysql-test/suite/sys_vars/t/innodb_sync_spin_loops_basic.test b/mysql-test/suite/sys_vars/t/innodb_sync_spin_loops_basic.test
index 35460fe47f2..138e877dd42 100644
--- a/mysql-test/suite/sys_vars/t/innodb_sync_spin_loops_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_sync_spin_loops_basic.test
@@ -70,13 +70,35 @@ SELECT @@global.innodb_sync_spin_loops;
 SET @@global.innodb_sync_spin_loops = 0;
 SELECT @@global.innodb_sync_spin_loops;
 
-SET @@global.innodb_sync_spin_loops = 1;
+SET @@global.innodb_sync_spin_loops = 65535;
 SELECT @@global.innodb_sync_spin_loops;
-SET @@global.innodb_sync_spin_loops = 1000;
+
+SET @@global.innodb_sync_spin_loops = 4294967295;
 SELECT @@global.innodb_sync_spin_loops;
 
 --echo '#--------------------FN_DYNVARS_046_04-------------------------#'
 ###########################################################################
+#       Check the value of innodb_sync_spin_loops for out of bounds       #
+###########################################################################
+
+# With a 64 bit mysqld:18446744073709551615,with a 32 bit mysqld: 4294967295
+--disable_warnings
+SET @@global.innodb_sync_spin_loops = 4294967296;
+--enable_warnings
+SELECT @@global.innodb_sync_spin_loops IN (4294967296,4294967295);
+
+--disable_warnings
+SET @@global.innodb_sync_spin_loops = 12345678901;
+--enable_warnings
+SELECT @@global.innodb_sync_spin_loops IN (12345678901,4294967295);
+
+--disable_warnings
+SET @@global.innodb_sync_spin_loops = 18446744073709551615;
+--enable_warnings
+SELECT @@global.innodb_sync_spin_loops IN (18446744073709551615,4294967295);
+
+--echo '#--------------------FN_DYNVARS_046_05-------------------------#'
+###########################################################################
 #       Change the value of innodb_sync_spin_loops to invalid value       #
 ########################################################################### 
 
@@ -91,22 +113,38 @@ SELECT @@global.innodb_sync_spin_loops;
 SET @@global.innodb_sync_spin_loops = "Y";
 SELECT @@global.innodb_sync_spin_loops;
 
-SET @@global.innodb_sync_spin_loops = 1001;
+--Error ER_WRONG_TYPE_FOR_VAR
+SET @@global.innodb_sync_spin_loops = 65535.01;
 SELECT @@global.innodb_sync_spin_loops;
 
---echo '#----------------------FN_DYNVARS_046_05------------------------#'
+SET @@global.innodb_sync_spin_loops = -1024;
+SELECT @@global.innodb_sync_spin_loops;
+
+--Error ER_WRONG_TYPE_FOR_VAR
+SET @@global.innodb_sync_spin_loops = " ";
+SELECT @@global.innodb_sync_spin_loops;
+
+--Error ER_WRONG_TYPE_FOR_VAR
+SET @@global.innodb_sync_spin_loops = ' ';
+SELECT @@global.innodb_sync_spin_loops;
+
+--echo '#----------------------FN_DYNVARS_046_06------------------------#'
 ######################################################################### 
 #     Check if the value in GLOBAL Table matches value in variable      #
 #########################################################################
 
+--disable_warnings
 SELECT @@global.innodb_sync_spin_loops =
  VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
   WHERE VARIABLE_NAME='innodb_sync_spin_loops';
+--enable_warnings
 SELECT @@global.innodb_sync_spin_loops;
+--disable_warnings
 SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
  WHERE VARIABLE_NAME='innodb_sync_spin_loops';
+--enable_warnings
 
---echo '#---------------------FN_DYNVARS_046_06-------------------------#'
+--echo '#---------------------FN_DYNVARS_046_07-------------------------#'
 ################################################################### 
 #        Check if ON and OFF values can be used on variable       #
 ###################################################################
@@ -119,7 +157,7 @@ SELECT @@global.innodb_sync_spin_loops;
 SET @@global.innodb_sync_spin_loops = ON;
 SELECT @@global.innodb_sync_spin_loops;
 
---echo '#---------------------FN_DYNVARS_046_07----------------------#'
+--echo '#---------------------FN_DYNVARS_046_08----------------------#'
 ###################################################################
 #      Check if TRUE and FALSE values can be used on variable     #
 ################################################################### 
diff --git a/mysql-test/suite/sys_vars/t/innodb_table_locks_basic.test b/mysql-test/suite/sys_vars/t/innodb_table_locks_basic.test
index e7503bd334d..e3e4bda345e 100644
--- a/mysql-test/suite/sys_vars/t/innodb_table_locks_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_table_locks_basic.test
@@ -106,7 +106,7 @@ SELECT @@global.innodb_table_locks;
 
 # for session
 
---error ER_WRONG_VALUE_FOR_VAR
+--Error ER_WRONG_VALUE_FOR_VAR
 SET @@session.innodb_table_locks = -6;
 --Error ER_WRONG_TYPE_FOR_VAR
 SET @@session.innodb_table_locks = 1.6;
@@ -128,7 +128,7 @@ SET @@session.innodb_table_locks = 
 # for global
 
 
---error ER_WRONG_VALUE_FOR_VAR
+--Error ER_WRONG_VALUE_FOR_VAR
 SET @@global.innodb_table_locks = -1;
 --Error ER_WRONG_VALUE_FOR_VAR
 SET @@global.innodb_table_locks = 2;
@@ -165,24 +165,32 @@ SELECT @@session.innodb_table_locks AS res_is_1;
 #     Check if the value in GLOBAL Table matches value in variable      #
 #########################################################################
 
+--disable_warnings
 SELECT IF(@@global.innodb_table_locks, "ON", "OFF") =
  VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
   WHERE VARIABLE_NAME='innodb_table_locks';
+--enable_warnings
 SELECT @@global.innodb_table_locks;
+--disable_warnings
 SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
  WHERE VARIABLE_NAME='innodb_table_locks';
+--enable_warnings
 
 --echo '#----------------------FN_DYNVARS_046_07------------------------#'
 ######################################################################### 
 #     Check if the value in SESSION Table matches value in variable     #
 #########################################################################
 
+--disable_warnings
 SELECT IF(@@session.innodb_table_locks, "ON", "OFF") =
  VARIABLE_VALUE FROM INFORMATION_SCHEMA.SESSION_VARIABLES
   WHERE VARIABLE_NAME='innodb_table_locks';
+--enable_warnings
 SELECT @@session.innodb_table_locks;
+--disable_warnings
 SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.SESSION_VARIABLES
  WHERE VARIABLE_NAME='innodb_table_locks';
+--enable_warnings
 
 
 --echo '#---------------------FN_DYNVARS_046_08-------------------------#'
diff --git a/mysql-test/suite/sys_vars/t/innodb_table_locks_func.test b/mysql-test/suite/sys_vars/t/innodb_table_locks_func.test
index d69cacd1377..330addd6b3b 100644
--- a/mysql-test/suite/sys_vars/t/innodb_table_locks_func.test
+++ b/mysql-test/suite/sys_vars/t/innodb_table_locks_func.test
@@ -31,7 +31,9 @@ SET @start_value= @@global.innodb_table_locks;
 SELECT @start_value;
 
 SET @@global.innodb_table_locks = OFF;
+--echo 'connect (con1,localhost,root,,,,)'
 connect (con1,localhost,root,,,,);
+--echo 'connection con1'
 connection con1;
 SELECT @@global.innodb_table_locks;
 SELECT @@session.innodb_table_locks;
@@ -46,8 +48,10 @@ disconnect con1;
 #==============================================================================
 --echo '----check when innodb_table_locks = ON and autocommit = OFF---'
 #==============================================================================
+--echo 'connect (con2,localhost,root,,,,)'
 connect (con2,localhost,root,,,,);
 
+--echo 'connection default'
 connection default;
 
 --disable_warnings
@@ -62,14 +66,17 @@ BEGIN;
 INSERT INTO t1 VALUES(1);
 SELECT * FROM t1 FOR UPDATE;
 
+--echo 'CONNECTION con2'
 CONNECTION con2;
 SET @@innodb_table_locks = ON;
 SET @@autocommit = OFF;
 send LOCK TABLES t1 WRITE;
 
+--echo 'CONNECTION default'
 CONNECTION default;
 COMMIT;
 
+--echo 'CONNECTION con2'
 CONNECTION con2;
 reap;
 UNLOCK tables;
diff --git a/mysql-test/suite/sys_vars/t/innodb_additional_mem_pool_size_basic.test b/mysql-test/suite/sys_vars/t/innodb_temp_data_file_path_basic.test
similarity index 63%
rename from mysql-test/suite/sys_vars/t/innodb_additional_mem_pool_size_basic.test
rename to mysql-test/suite/sys_vars/t/innodb_temp_data_file_path_basic.test
index ffb1046ed32..607ee9b27e2 100644
--- a/mysql-test/suite/sys_vars/t/innodb_additional_mem_pool_size_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_temp_data_file_path_basic.test
@@ -1,18 +1,18 @@
 
 
-################## mysql-test\t\innodb_additional_mem_pool_size_basic.test ####
+################ mysql-test\t\innodb_temp_data_file_path_basic.test ############
 #                                                                             #
-# Variable Name: innodb_additional_mem_pool_size                              #
+# Variable Name: innodb_temp_data_file_path                                   #
 # Scope: Global                                                               #
 # Access Type: Static                                                         #
-# Data Type: numeric                                                          #
+# Data Type: filename                                                         #
 #                                                                             #
 #                                                                             #
-# Creation Date: 2008-02-07                                                   #
-# Author : Sharique Abdullah                                                  #
+# Creation Date: 2012-12-27                                                   #
+# Author : Krunal Bauskar                                                     #
 #                                                                             #
 #                                                                             #
-# Description:Test Cases of Dynamic System Variable innodb_additional_mem_pool_size#
+# Description:Test Cases of Dynamic System Variable innodb_temp_data_file_path#
 #             that checks the behavior of this variable in the following ways #
 #              * Value Check                                                  #
 #              * Scope Check                                                  #
@@ -24,79 +24,83 @@
 
 --source include/have_innodb.inc
 
---echo '#---------------------BS_STVARS_020_01----------------------#'
+--echo '#---------------------BS_STVARS_024_01----------------------#'
 ####################################################################
 #   Displaying default value                                       #
 ####################################################################
-SELECT COUNT(@@GLOBAL.innodb_additional_mem_pool_size);
+SELECT COUNT(@@GLOBAL.innodb_temp_data_file_path);
 --echo 1 Expected
 
 
---echo '#---------------------BS_STVARS_020_02----------------------#'
+--echo '#---------------------BS_STVARS_024_02----------------------#'
 ####################################################################
 #   Check if Value can set                                         #
 ####################################################################
 
 --error ER_INCORRECT_GLOBAL_LOCAL_VAR
-SET @@GLOBAL.innodb_additional_mem_pool_size=1;
+SET @@GLOBAL.innodb_temp_data_file_path=1;
 --echo Expected error 'Read only variable'
 
-SELECT COUNT(@@GLOBAL.innodb_additional_mem_pool_size);
+SELECT COUNT(@@GLOBAL.innodb_temp_data_file_path);
 --echo 1 Expected
 
 
 
 
---echo '#---------------------BS_STVARS_020_03----------------------#'
+--echo '#---------------------BS_STVARS_024_03----------------------#'
 #################################################################
 # Check if the value in GLOBAL Table matches value in variable  #
 #################################################################
 
-SELECT @@GLOBAL.innodb_additional_mem_pool_size = VARIABLE_VALUE
+--disable_warnings
+SELECT @@GLOBAL.innodb_temp_data_file_path = VARIABLE_VALUE
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
-WHERE VARIABLE_NAME='innodb_additional_mem_pool_size';
+WHERE VARIABLE_NAME='innodb_temp_data_file_path';
+--enable_warnings
 --echo 1 Expected
 
-SELECT COUNT(@@GLOBAL.innodb_additional_mem_pool_size);
+SELECT COUNT(@@GLOBAL.innodb_temp_data_file_path);
 --echo 1 Expected
 
+--disable_warnings
 SELECT COUNT(VARIABLE_VALUE)
-FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES 
-WHERE VARIABLE_NAME='innodb_additional_mem_pool_size';
+FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
+WHERE VARIABLE_NAME='innodb_temp_data_file_path';
+--enable_warnings
 --echo 1 Expected
 
 
 
---echo '#---------------------BS_STVARS_020_04----------------------#'
+--echo '#---------------------BS_STVARS_024_04----------------------#'
 ################################################################################
 #  Check if accessing variable with and without GLOBAL point to same variable  #
 ################################################################################
-SELECT @@innodb_additional_mem_pool_size = @@GLOBAL.innodb_additional_mem_pool_size;
+SELECT @@innodb_temp_data_file_path = @@GLOBAL.innodb_temp_data_file_path;
 --echo 1 Expected
 
 
 
---echo '#---------------------BS_STVARS_020_05----------------------#'
+--echo '#---------------------BS_STVARS_024_05----------------------#'
 ################################################################################
-#   Check if innodb_additional_mem_pool_size can be accessed with and without @@ sign #
+#Check if innodb_temp_data_file_path can be accessed with and without @@ sign  #
 ################################################################################
 
-SELECT COUNT(@@innodb_additional_mem_pool_size);
+SELECT COUNT(@@innodb_temp_data_file_path);
 --echo 1 Expected
 
 --Error ER_INCORRECT_GLOBAL_LOCAL_VAR
-SELECT COUNT(@@local.innodb_additional_mem_pool_size);
+SELECT COUNT(@@local.innodb_temp_data_file_path);
 --echo Expected error 'Variable is a GLOBAL variable'
 
 --Error ER_INCORRECT_GLOBAL_LOCAL_VAR
-SELECT COUNT(@@SESSION.innodb_additional_mem_pool_size);
+SELECT COUNT(@@SESSION.innodb_temp_data_file_path);
 --echo Expected error 'Variable is a GLOBAL variable'
 
-SELECT COUNT(@@GLOBAL.innodb_additional_mem_pool_size);
+SELECT COUNT(@@GLOBAL.innodb_temp_data_file_path);
 --echo 1 Expected
 
 --Error ER_BAD_FIELD_ERROR
-SELECT innodb_additional_mem_pool_size = @@SESSION.innodb_additional_mem_pool_size;
+SELECT innodb_temp_data_file_path = @@SESSION.innodb_temp_data_file_path;
 --echo Expected error 'Readonly variable'
 
 
diff --git a/mysql-test/suite/sys_vars/t/innodb_thread_concurrency_basic.test b/mysql-test/suite/sys_vars/t/innodb_thread_concurrency_basic.test
index d30ec214f4a..0be32543d26 100644
--- a/mysql-test/suite/sys_vars/t/innodb_thread_concurrency_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_thread_concurrency_basic.test
@@ -99,22 +99,35 @@ SELECT @@global.innodb_thread_concurrency;
 --Error ER_WRONG_TYPE_FOR_VAR
 SET @@global.innodb_thread_concurrency = "Y";
 SELECT @@global.innodb_thread_concurrency;
-
+--Error ER_WRONG_TYPE_FOR_VAR
+SET @@global.innodb_thread_concurrency = ' ';
+SELECT @@global.innodb_thread_concurrency;
+--Error ER_WRONG_TYPE_FOR_VAR
+SET @@global.innodb_thread_concurrency = " ";
+SELECT @@global.innodb_thread_concurrency;
 
 SET @@global.innodb_thread_concurrency = 1001;
 SELECT @@global.innodb_thread_concurrency;
 
+--Error ER_WRONG_TYPE_FOR_VAR
+SET @@global.innodb_thread_concurrency = 255.01;
+SELECT @@global.innodb_thread_concurrency;
+
 --echo '#----------------------FN_DYNVARS_046_05------------------------#'
 ######################################################################### 
 #     Check if the value in GLOBAL Table matches value in variable      #
 #########################################################################
 
+--disable_warnings
 SELECT @@global.innodb_thread_concurrency =
  VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
   WHERE VARIABLE_NAME='innodb_thread_concurrency';
+--enable_warnings
 SELECT @@global.innodb_thread_concurrency;
+--disable_warnings
 SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
  WHERE VARIABLE_NAME='innodb_thread_concurrency';
+--enable_warnings
 
 --echo '#---------------------FN_DYNVARS_046_06-------------------------#'
 ################################################################### 
diff --git a/mysql-test/suite/sys_vars/t/innodb_trx_purge_view_update_only_debug_basic.test b/mysql-test/suite/sys_vars/t/innodb_trx_purge_view_update_only_debug_basic.test
index d7207515fe1..04f406a311d 100644
--- a/mysql-test/suite/sys_vars/t/innodb_trx_purge_view_update_only_debug_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_trx_purge_view_update_only_debug_basic.test
@@ -13,20 +13,26 @@ select @@global.innodb_trx_purge_view_update_only_debug;
 select @@session.innodb_trx_purge_view_update_only_debug;
 show global variables like 'innodb_trx_purge_view_update_only_debug';
 show session variables like 'innodb_trx_purge_view_update_only_debug';
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_trx_purge_view_update_only_debug';
 select * from information_schema.session_variables where variable_name='innodb_trx_purge_view_update_only_debug';
+--enable_warnings
 
 #
 # show that it's writable
 #
 set global innodb_trx_purge_view_update_only_debug=1;
 select @@global.innodb_trx_purge_view_update_only_debug;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_trx_purge_view_update_only_debug';
 select * from information_schema.session_variables where variable_name='innodb_trx_purge_view_update_only_debug';
+--enable_warnings
 set @@global.innodb_trx_purge_view_update_only_debug=0;
 select @@global.innodb_trx_purge_view_update_only_debug;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_trx_purge_view_update_only_debug';
 select * from information_schema.session_variables where variable_name='innodb_trx_purge_view_update_only_debug';
+--enable_warnings
 --error ER_GLOBAL_VARIABLE
 set session innodb_trx_purge_view_update_only_debug='some';
 --error ER_GLOBAL_VARIABLE
diff --git a/mysql-test/suite/sys_vars/t/innodb_trx_rseg_n_slots_debug_basic.test b/mysql-test/suite/sys_vars/t/innodb_trx_rseg_n_slots_debug_basic.test
index d17917de8e9..858e1b63908 100644
--- a/mysql-test/suite/sys_vars/t/innodb_trx_rseg_n_slots_debug_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_trx_rseg_n_slots_debug_basic.test
@@ -13,20 +13,26 @@ select @@global.innodb_trx_rseg_n_slots_debug;
 select @@session.innodb_trx_rseg_n_slots_debug;
 show global variables like 'innodb_trx_rseg_n_slots_debug';
 show session variables like 'innodb_trx_rseg_n_slots_debug';
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_trx_rseg_n_slots_debug';
 select * from information_schema.session_variables where variable_name='innodb_trx_rseg_n_slots_debug';
+--enable_warnings
 
 #
 # show that it's writable
 #
 set global innodb_trx_rseg_n_slots_debug=1;
 select @@global.innodb_trx_rseg_n_slots_debug;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_trx_rseg_n_slots_debug';
 select * from information_schema.session_variables where variable_name='innodb_trx_rseg_n_slots_debug';
+--enable_warnings
 set @@global.innodb_trx_rseg_n_slots_debug=0;
 select @@global.innodb_trx_rseg_n_slots_debug;
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_trx_rseg_n_slots_debug';
 select * from information_schema.session_variables where variable_name='innodb_trx_rseg_n_slots_debug';
+--enable_warnings
 --error ER_GLOBAL_VARIABLE
 set session innodb_trx_rseg_n_slots_debug='some';
 --error ER_GLOBAL_VARIABLE
diff --git a/mysql-test/suite/sys_vars/t/innodb_undo_directory_basic.test b/mysql-test/suite/sys_vars/t/innodb_undo_directory_basic.test
index 583dbe6aa03..0df071c2029 100644
--- a/mysql-test/suite/sys_vars/t/innodb_undo_directory_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_undo_directory_basic.test
@@ -21,8 +21,8 @@
 ####################################################################
 #   Display the default value                                      #
 ####################################################################
-SELECT @@GLOBAL.innodb_undo_directory;
---echo . Expected
+SELECT COUNT(@@GLOBAL.innodb_undo_directory);
+--echo 1 Expected
 
 
 ####################################################################
@@ -41,17 +41,21 @@ SELECT COUNT(@@GLOBAL.innodb_undo_directory);
 # Check if the value in GLOBAL table matches value in variable                 #
 ################################################################################
 
-SELECT VARIABLE_VALUE
+--disable_warnings
+SELECT @@GLOBAL.innodb_undo_directory = VARIABLE_VALUE
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
 WHERE VARIABLE_NAME='innodb_undo_directory';
---echo . Expected
+--enable_warnings
+--echo 1 Expected
 
 SELECT COUNT(@@GLOBAL.innodb_undo_directory);
 --echo 1 Expected
 
+--disable_warnings
 SELECT COUNT(VARIABLE_VALUE)
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES 
 WHERE VARIABLE_NAME='innodb_undo_directory';
+--enable_warnings
 --echo 1 Expected
 
 
diff --git a/mysql-test/suite/sys_vars/t/innodb_undo_log_truncate_basic.test b/mysql-test/suite/sys_vars/t/innodb_undo_log_truncate_basic.test
new file mode 100644
index 00000000000..b60f9be22fa
--- /dev/null
+++ b/mysql-test/suite/sys_vars/t/innodb_undo_log_truncate_basic.test
@@ -0,0 +1,113 @@
+
+
+############### mysql-test\t\innodb_undo_log_truncate_basic.test ##############
+#                                                                             #
+# Variable Name: innodb_undo_log_truncate                                     #
+# Scope: Global                                                               #
+# Access Type: Dynamic                                                        #
+# Data Type: boolean                                                          #
+#                                                                             #
+#                                                                             #
+# Creation Date: 2008-02-07                                                   #
+# Author : Sharique Abdullah                                                  #
+#                                                                             #
+#                                                                             #
+# Description:Test Cases of Dynamic System Variable innodb_undo_log_truncate  #
+#             that checks the behavior of this variable in the following ways #
+#              * Value Check                                                  #
+#              * Scope Check                                                  #
+#                                                                             #
+# Reference: http://dev.mysql.com/doc/refman/5.1/en/                          #
+#  server-system-variables.html                                               #
+#                                                                             #
+###############################################################################
+
+--source include/have_innodb.inc
+
+SET @start_global_value = @@global.innodb_undo_log_truncate;
+SELECT @start_global_value;
+
+
+--echo '#---------------------BS_STVARS_028_01----------------------#'
+####################################################################
+#   Displaying default value                                       #
+####################################################################
+SELECT COUNT(@@GLOBAL.innodb_undo_log_truncate);
+--echo 1 Expected
+
+
+--echo '#---------------------BS_STVARS_028_02----------------------#'
+####################################################################
+#   Check if Value can set                                         #
+####################################################################
+
+SET @@global.innodb_undo_log_truncate = 0;
+SELECT @@global.innodb_undo_log_truncate;
+
+SET @@global.innodb_undo_log_truncate ='On' ;
+SELECT @@global.innodb_undo_log_truncate;
+
+SET @@global.innodb_undo_log_truncate ='Off' ;
+SELECT @@global.innodb_undo_log_truncate;
+
+SET @@global.innodb_undo_log_truncate = 1;
+SELECT @@global.innodb_undo_log_truncate;
+
+--echo '#---------------------BS_STVARS_028_03----------------------#'
+#################################################################
+# Check if the value in GLOBAL Table matches value in variable  #
+#################################################################
+
+--disable_warnings
+SELECT IF(@@GLOBAL.innodb_undo_log_truncate,'ON','OFF') = VARIABLE_VALUE
+FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
+WHERE VARIABLE_NAME='innodb_undo_log_truncate';
+--echo 1 Expected
+
+SELECT COUNT(@@GLOBAL.innodb_undo_log_truncate);
+--echo 1 Expected
+
+SELECT COUNT(VARIABLE_VALUE)
+FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
+WHERE VARIABLE_NAME='innodb_undo_log_truncate';
+--echo 1 Expected
+--enable_warnings
+
+
+--echo '#---------------------BS_STVARS_028_04----------------------#'
+################################################################################
+#  Check if accessing variable with and without GLOBAL point to same variable  #
+################################################################################
+SELECT @@innodb_undo_log_truncate = @@GLOBAL.innodb_undo_log_truncate;
+--echo 1 Expected
+
+
+
+--echo '#---------------------BS_STVARS_028_05----------------------#'
+################################################################################
+#   Check if innodb_undo_log_truncate can be accessed with and without @@ sign     #
+################################################################################
+
+SELECT COUNT(@@innodb_undo_log_truncate);
+--echo 1 Expected
+
+--Error ER_INCORRECT_GLOBAL_LOCAL_VAR
+SELECT COUNT(@@local.innodb_undo_log_truncate);
+--echo Expected error 'Variable is a GLOBAL variable'
+
+--Error ER_INCORRECT_GLOBAL_LOCAL_VAR
+SELECT COUNT(@@SESSION.innodb_undo_log_truncate);
+--echo Expected error 'Variable is a GLOBAL variable'
+
+SELECT COUNT(@@GLOBAL.innodb_undo_log_truncate);
+--echo 1 Expected
+
+--Error ER_BAD_FIELD_ERROR
+SELECT innodb_undo_log_truncate = @@SESSION.innodb_undo_log_truncate;
+
+#
+# Cleanup
+#
+
+SET @@global.innodb_undo_log_truncate = @start_global_value;
+SELECT @@global.innodb_undo_log_truncate;
diff --git a/mysql-test/suite/sys_vars/t/innodb_undo_logs_basic.test b/mysql-test/suite/sys_vars/t/innodb_undo_logs_basic.test
index 77b6af6909c..f83b5ede247 100644
--- a/mysql-test/suite/sys_vars/t/innodb_undo_logs_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_undo_logs_basic.test
@@ -39,9 +39,11 @@ SELECT COUNT(@@GLOBAL.innodb_undo_logs);
 # Check if the value in GLOBAL table matches value in variable                 #
 ################################################################################
 
+--disable_warnings
 SELECT VARIABLE_VALUE
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
 WHERE VARIABLE_NAME='innodb_undo_logs';
+--enable_warnings
 --echo 128 Expected
 
 
diff --git a/mysql-test/suite/sys_vars/t/innodb_undo_tablespaces_basic.test b/mysql-test/suite/sys_vars/t/innodb_undo_tablespaces_basic.test
index 53396249e03..e1744b09038 100644
--- a/mysql-test/suite/sys_vars/t/innodb_undo_tablespaces_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_undo_tablespaces_basic.test
@@ -18,11 +18,8 @@
 
 --source include/have_innodb.inc
 
-####################################################################
-#   Display default value                                          #
-####################################################################
-SELECT @@GLOBAL.innodb_undo_tablespaces;
---echo 0 Expected
+SELECT @@GLOBAL.innodb_undo_tablespaces >= 0;
+let $undo_tablespaces=`SELECT @@GLOBAL.innodb_undo_tablespaces`;
 
 
 ####################################################################
@@ -41,9 +38,13 @@ SELECT COUNT(@@GLOBAL.innodb_undo_tablespaces);
 # Check if the value in GLOBAL table matches value in variable                 #
 ################################################################################
 
-SELECT VARIABLE_VALUE
+--disable_warnings
+--disable_query_log
+eval SELECT VARIABLE_VALUE-$undo_tablespaces DIFFERENCE
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
 WHERE VARIABLE_NAME='innodb_undo_tablespaces';
+--enable_query_log
+--enable_warnings
 --echo 0 Expected
 
 
diff --git a/mysql-test/suite/sys_vars/t/innodb_use_native_aio_basic.test b/mysql-test/suite/sys_vars/t/innodb_use_native_aio_basic.test
index 37879530d75..524b5a7b161 100644
--- a/mysql-test/suite/sys_vars/t/innodb_use_native_aio_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_use_native_aio_basic.test
@@ -52,17 +52,21 @@ SELECT COUNT(@@GLOBAL.innodb_use_native_aio);
 # Check if the value in GLOBAL Table matches value in variable  #
 #################################################################
 
+--disable_warnings
 SELECT IF(@@GLOBAL.innodb_use_native_aio, 'ON', 'OFF') = VARIABLE_VALUE
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
 WHERE VARIABLE_NAME='innodb_use_native_aio';
+--enable_warnings
 --echo 1 Expected
 
 SELECT COUNT(@@GLOBAL.innodb_use_native_aio);
 --echo 1 Expected
 
+--disable_warnings
 SELECT COUNT(VARIABLE_VALUE)
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES 
 WHERE VARIABLE_NAME='innodb_use_native_aio';
+--enable_warnings
 --echo 1 Expected
 
 
diff --git a/mysql-test/suite/sys_vars/t/innodb_use_sys_malloc_basic.test b/mysql-test/suite/sys_vars/t/innodb_use_sys_malloc_basic.test
deleted file mode 100644
index 699773f4a62..00000000000
--- a/mysql-test/suite/sys_vars/t/innodb_use_sys_malloc_basic.test
+++ /dev/null
@@ -1,31 +0,0 @@
-
-#
-#  2010-01-27 OBN - Added 
-#
-
---source include/have_innodb.inc
-
-# when running with valgring, mtr uses --innodb-use-sys-malloc=0,
-# while below we want to see the default value.
---source include/not_valgrind.inc
-
-#
-# show the global and session values;
-#
---echo Valid values are 'ON' and 'OFF' 
-select @@global.innodb_use_sys_malloc;
---error ER_INCORRECT_GLOBAL_LOCAL_VAR
-select @@session.innodb_use_sys_malloc;
-show global variables like 'innodb_use_sys_malloc';
-show session variables like 'innodb_use_sys_malloc';
-select * from information_schema.global_variables where variable_name='innodb_use_sys_malloc';
-select * from information_schema.session_variables where variable_name='innodb_use_sys_malloc';
-
-#
-# show that it's read-only
-#
---error ER_INCORRECT_GLOBAL_LOCAL_VAR
-set global innodb_use_sys_malloc=1;
---error ER_INCORRECT_GLOBAL_LOCAL_VAR
-set session innodb_use_sys_malloc=1;
-
diff --git a/mysql-test/suite/sys_vars/t/innodb_version_basic.test b/mysql-test/suite/sys_vars/t/innodb_version_basic.test
index 6ee2adf6cf9..182841048f7 100644
--- a/mysql-test/suite/sys_vars/t/innodb_version_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_version_basic.test
@@ -15,8 +15,10 @@ select @@global.innodb_version;
 select @@session.innodb_version;
 --echo show global variables like 'innodb_version' disabled so to not change with every version;
 --echo show session variables like 'innodb_version' disabled so to not change with every version;
+--disable_warnings
 select VARIABLE_VALUE=@@global.innodb_version from information_schema.global_variables where variable_name='innodb_version';
 select VARIABLE_VALUE=@@global.innodb_version from information_schema.session_variables where variable_name='innodb_version';
+--enable_warnings
 
 #
 # show that it's read-only
diff --git a/mysql-test/suite/sys_vars/t/innodb_write_io_threads_basic.test b/mysql-test/suite/sys_vars/t/innodb_write_io_threads_basic.test
index 8efa6576a66..d9556af37d1 100644
--- a/mysql-test/suite/sys_vars/t/innodb_write_io_threads_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_write_io_threads_basic.test
@@ -13,8 +13,10 @@ select @@global.innodb_write_io_threads;
 select @@session.innodb_write_io_threads;
 show global variables like 'innodb_write_io_threads';
 show session variables like 'innodb_write_io_threads';
+--disable_warnings
 select * from information_schema.global_variables where variable_name='innodb_write_io_threads';
 select * from information_schema.session_variables where variable_name='innodb_write_io_threads';
+--enable_warnings
 
 #
 # show that it's read-only
diff --git a/mysql-test/suite/unit/suite.pm b/mysql-test/suite/unit/suite.pm
index 966fd278a52..76c62037b3d 100644
--- a/mysql-test/suite/unit/suite.pm
+++ b/mysql-test/suite/unit/suite.pm
@@ -43,11 +43,15 @@ sub start_test {
   my (@ctest_list)= `cd .. && ctest $opt_vs_config --show-only --verbose`;
   return "No ctest" if $?;
 
-  my ($command, %tests);
+  my ($command, %tests, $prefix);
   for (@ctest_list) {
     chomp;
-    $command= $' if /^\d+: Test command: +/;
-    $tests{$'}=$command if /^ +Test +#\d+: +/;
+    if (/^\d+: Test command: +/) {
+      $command= $';
+      $prefix= /libmariadb/ ? 'conc_' : '';
+    } elsif (/^ +Test +#\d+: +/) {
+      $tests{$prefix.$'}=$command;
+    }
   }
   bless { ctests => { %tests } };
 }
diff --git a/mysql-test/t/cte_recursive.test b/mysql-test/t/cte_recursive.test
index 5eb84bae4fb..c2c02a6680d 100644
--- a/mysql-test/t/cte_recursive.test
+++ b/mysql-test/t/cte_recursive.test
@@ -1162,6 +1162,44 @@ select h_name, h_dob, w_name, w_dob
   from ancestor_couples;
 
 
+create table my_ancestors
+with recursive
+ancestor_ids (id)
+as
+(
+  select father from folks where name = 'Me'
+  union
+  select mother from folks where name = 'Me'
+  union
+  select father from folks, ancestor_ids a  where folks.id = a.id
+  union
+  select mother from folks, ancestor_ids a  where folks.id = a.id
+)
+select p.* from folks as p, ancestor_ids as a where p.id = a.id;
+
+select * from my_ancestors;
+
+delete from my_ancestors;
+
+insert into my_ancestors 
+with recursive
+ancestor_ids (id)
+as
+(
+  select father from folks where name = 'Me'
+  union
+  select mother from folks where name = 'Me'
+  union
+  select father from folks, ancestor_ids a  where folks.id = a.id
+  union
+  select mother from folks, ancestor_ids a  where folks.id = a.id
+)
+select p.* from folks as p, ancestor_ids as a where p.id = a.id;
+
+select * from my_ancestors;
+
+drop table my_ancestors;
+
 drop table folks;
 
 --echo #
diff --git a/mysql-test/t/ctype_utf8mb4_innodb-master.opt b/mysql-test/t/ctype_utf8mb4_innodb-master.opt
index 96f0ce3f36c..56d40323eae 100644
--- a/mysql-test/t/ctype_utf8mb4_innodb-master.opt
+++ b/mysql-test/t/ctype_utf8mb4_innodb-master.opt
@@ -1 +1,2 @@
 --default-storage-engine=MyISAM
+--loose-innodb-large-prefix=OFF
diff --git a/mysql-test/t/index_intersect.test b/mysql-test/t/index_intersect.test
index 19918c03479..1be963cb9e5 100644
--- a/mysql-test/t/index_intersect.test
+++ b/mysql-test/t/index_intersect.test
@@ -221,6 +221,7 @@ SELECT * FROM City
         AND Country BETWEEN 'S' AND 'Z';
 
 --replace_column 9 #
+--replace_result PRIMARY,Country,Population PRIMARY,Population,Country 4,7,4 4,4,7
 EXPLAIN
 SELECT * FROM City 
   WHERE ID BETWEEN 3001 AND 4000 AND Population > 600000
@@ -306,6 +307,7 @@ SELECT * FROM City
   WHERE ID BETWEEN 1 AND 500 AND Population > 1000000 AND Country LIKE 'A%';
 
 --replace_column 9 #
+--replace_result PRIMARY,Country,Population PRIMARY,Population,Country 4,7,4 4,4,7
 EXPLAIN
 SELECT * FROM City 
   WHERE ID BETWEEN 3001 AND 4000 AND Population > 600000
diff --git a/mysql-test/t/ipv4_as_ipv6.test b/mysql-test/t/ipv4_as_ipv6.test
index 1fbc0317a36..2a3ab6647ff 100644
--- a/mysql-test/t/ipv4_as_ipv6.test
+++ b/mysql-test/t/ipv4_as_ipv6.test
@@ -52,7 +52,7 @@ echo =============Test of '::1' ========================;
 let $IPv6= ::1;
 --echo connect (con1, $IPv6, root, , test, MASTER_MYPORT,);
 --disable_query_log
---error 2003,2006
+--error 2002,2006
 connect (con1, $IPv6, root, , test, $MASTER_MYPORT,);
 --enable_query_log
 
diff --git a/mysql-test/t/merge_debug.test b/mysql-test/t/merge_debug.test
index e147946b394..3c617cfc545 100644
--- a/mysql-test/t/merge_debug.test
+++ b/mysql-test/t/merge_debug.test
@@ -8,6 +8,8 @@ set @default_storage_engine= @@global.storage_engine;
 set global storage_engine=myisam;
 set session storage_engine=myisam;
 
+call mtr.add_suppression("Index for table .*crashed' is corrupt; try to repair it");
+
 --disable_warnings
 drop table if exists crashed,t2,t3,t4;
 --enable_warnings
diff --git a/mysql-test/t/mysqlbinlog_row_minimal.test b/mysql-test/t/mysqlbinlog_row_minimal.test
index 9c319880fbd..7909f75e9a1 100644
--- a/mysql-test/t/mysqlbinlog_row_minimal.test
+++ b/mysql-test/t/mysqlbinlog_row_minimal.test
@@ -27,7 +27,7 @@ DELETE FROM t2;
 
 FLUSH BINARY LOGS;
 --replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
---replace_regex /\d{6} *\d*:\d\d:\d\d/<date>/ /Start:.*at startup/Start: xxx/ /SET TIMESTAMP=\d*/SET TIMESTAMP=X/ /exec_time=\d*/exec_time=x/ /CRC32 0x[0-9a-f]*/CRC32 XXX/
+--replace_regex /\d{6} *\d*:\d\d:\d\d/<date>/ /Start:.*at startup/Start: xxx/ /SET TIMESTAMP=\d*/SET TIMESTAMP=X/ /exec_time=\d*/exec_time=x/ /mapped to number \d*/mapped to number num/ /CRC32 0x[0-9a-f]+/CRC32 XXX/
 --exec $MYSQL_BINLOG --verbose --verbose --base64-output=DECODE-ROWS $datadir/$binlog
 
 DROP TABLE t1,t2;
diff --git a/mysql-test/t/openssl_1.test b/mysql-test/t/openssl_1.test
index eea74b5b012..8e2d9133359 100644
--- a/mysql-test/t/openssl_1.test
+++ b/mysql-test/t/openssl_1.test
@@ -16,22 +16,22 @@ create table t1(f1 int);
 insert into t1 values (5);
 
 grant select on test.* to ssl_user1@localhost require SSL;
-grant select on test.* to ssl_user2@localhost require cipher "DHE-RSA-AES256-SHA";
-grant select on test.* to ssl_user3@localhost require cipher "DHE-RSA-AES256-SHA" AND SUBJECT "/C=FI/ST=Helsinki/L=Helsinki/O=MariaDB/CN=client";
-grant select on test.* to ssl_user4@localhost require cipher "DHE-RSA-AES256-SHA" AND SUBJECT "/C=FI/ST=Helsinki/L=Helsinki/O=MariaDB/CN=client" ISSUER "/CN=cacert/C=FI/ST=Helsinki/L=Helsinki/O=MariaDB";
-grant select on test.* to ssl_user5@localhost require cipher "DHE-RSA-AES256-SHA" AND SUBJECT "xxx";
+grant select on test.* to ssl_user2@localhost require cipher "AES256-SHA";
+grant select on test.* to ssl_user3@localhost require cipher "AES256-SHA" AND SUBJECT "/C=FI/ST=Helsinki/L=Helsinki/O=MariaDB/CN=client";
+grant select on test.* to ssl_user4@localhost require cipher "AES256-SHA" AND SUBJECT "/C=FI/ST=Helsinki/L=Helsinki/O=MariaDB/CN=client" ISSUER "/CN=cacert/C=FI/ST=Helsinki/L=Helsinki/O=MariaDB";
+grant select on test.* to ssl_user5@localhost require cipher "AES256-SHA" AND SUBJECT "xxx";
 flush privileges;
 
-connect (con1,localhost,ssl_user1,,,,,SSL-CIPHER=DHE-RSA-AES256-SHA);
+connect (con1,localhost,ssl_user1,,,,,SSL-CIPHER=AES256-SHA);
 --replace_result $MASTER_MYSOCK MASTER_SOCKET $MASTER_MYPORT MASTER_PORT
 --error ER_ACCESS_DENIED_ERROR
+connect (con2,localhost,ssl_user2,,,,,SSL-CIPHER=AES128-SHA);
 connect (con2,localhost,ssl_user2,,,,,SSL-CIPHER=AES256-SHA);
-connect (con2,localhost,ssl_user2,,,,,SSL-CIPHER=DHE-RSA-AES256-SHA);
-connect (con3,localhost,ssl_user3,,,,,SSL-CIPHER=DHE-RSA-AES256-SHA);
-connect (con4,localhost,ssl_user4,,,,,SSL-CIPHER=DHE-RSA-AES256-SHA);
+connect (con3,localhost,ssl_user3,,,,,SSL-CIPHER=AES256-SHA);
+connect (con4,localhost,ssl_user4,,,,,SSL-CIPHER=AES256-SHA);
 --replace_result $MASTER_MYSOCK MASTER_SOCKET $MASTER_MYPORT MASTER_PORT
 --error ER_ACCESS_DENIED_ERROR
-connect (con5,localhost,ssl_user5,,,,,SSL-CIPHER=DHE-RSA-AES256-SHA);
+connect (con5,localhost,ssl_user5,,,,,SSL-CIPHER=AES256-SHA);
 
 connection con1;
 # Check ssl turned on
@@ -79,7 +79,6 @@ drop table t1;
 #
 --exec echo "this query should not execute;" > $MYSQLTEST_VARDIR/tmp/test.sql
 # Handle that openssl gives different error messages from YaSSL.
-#--replace_regex /error:00000001:lib\(0\):func\(0\):reason\(1\)/ASN: bad other signature confirmation/
 --replace_regex /2026 SSL connection error.*/2026 SSL connection error: xxxx/
 --error 1
 --exec $MYSQL_TEST --ssl-ca=$MYSQL_TEST_DIR/std_data/untrusted-cacert.pem --max-connect-retries=1 < $MYSQLTEST_VARDIR/tmp/test.sql 2>&1
@@ -89,7 +88,6 @@ drop table t1;
 # Test that we can't open connection to server if we are using
 # a blank ca
 #
-#--replace_regex /error:00000001:lib\(0\):func\(0\):reason\(1\)/ASN: bad other signature confirmation/
 --replace_regex /2026 SSL connection error.*/2026 SSL connection error: xxxx/
 --error 1
 --exec $MYSQL_TEST --ssl-ca= --max-connect-retries=1 < $MYSQLTEST_VARDIR/tmp/test.sql 2>&1
@@ -99,7 +97,6 @@ drop table t1;
 # Test that we can't open connection to server if we are using
 # a nonexistent ca file
 #
-#--replace_regex /error:00000001:lib\(0\):func\(0\):reason\(1\)/ASN: bad other signature confirmation/
 --replace_regex /2026 SSL connection error.*/2026 SSL connection error: xxxx/
 --error 1
 --exec $MYSQL_TEST --ssl-ca=nonexisting_file.pem --max-connect-retries=1 < $MYSQLTEST_VARDIR/tmp/test.sql 2>&1
@@ -109,23 +106,27 @@ drop table t1;
 # Test that we can't open connection to server if we are using
 # a blank client-key
 #
+--replace_regex /2026 SSL connection error.*/2026 SSL connection error: xxxx/
 --error 1
 --exec $MYSQL_TEST --ssl-key= --max-connect-retries=1 < $MYSQLTEST_VARDIR/tmp/test.sql 2>&1
+--echo
 
 #
 # Test that we can't open connection to server if we are using
 # a blank client-cert
 #
+--replace_regex /2026 SSL connection error.*/2026 SSL connection error: xxxx/
 --error 1
 --exec $MYSQL_TEST --ssl-cert= --max-connect-retries=1 < $MYSQLTEST_VARDIR/tmp/test.sql 2>&1
+--echo
 
 #
 # Bug#21611 Slave can't connect when master-ssl-cipher specified
 # - Apparently selecting a cipher doesn't work at all
-# - Usa a cipher that both yaSSL and OpenSSL supports
+# - Use a cipher that both yaSSL and OpenSSL supports
 #
 --exec echo "SHOW STATUS LIKE 'Ssl_cipher'; exit;" > $MYSQLTEST_VARDIR/tmp/test.sql
---exec $MYSQL_TEST --ssl-cipher=DHE-RSA-AES256-SHA < $MYSQLTEST_VARDIR/tmp/test.sql 2>&1
+--exec $MYSQL_TEST --ssl-cipher=AES256-SHA < $MYSQLTEST_VARDIR/tmp/test.sql 2>&1
 
 #
 # Bug#25309 SSL connections without CA certificate broken since MySQL 5.0.23
@@ -190,6 +191,7 @@ SET GLOBAL event_scheduler=0;
 # Test to connect using an unknown cipher
 #
 --exec echo "SHOW STATUS LIKE 'Ssl_cipher'; exit" > $MYSQLTEST_VARDIR/tmp/test.sql
+--replace_regex /2026 SSL connection error.*/2026 SSL connection error: xxxx/
 --error 1
 --exec $MYSQL_TEST --ssl-cipher=UNKNOWN-CIPHER < $MYSQLTEST_VARDIR/tmp/test.sql 2>&1
 
@@ -209,9 +211,10 @@ INSERT INTO t1 VALUES (1), (2);
 
 # With wrong parameters
 --replace_result $MYSQL_TEST_DIR MYSQL_TEST_DIR mysqldump.exe mysqldump
+--replace_regex /\"SSL connection error.*/SSL connection error: xxxx/
 --error 2
 --exec $MYSQL_DUMP --skip-create-options --skip-comments --ssl --ssl-cert=$MYSQL_TEST_DIR/std_data/client-cert.pem test 2>&1
-
+--echo
 DROP TABLE t1;
 --remove_file $MYSQLTEST_VARDIR/tmp/test.sql
 
@@ -221,8 +224,8 @@ DROP TABLE t1;
 #
 
 # Common ciphers to openssl and yassl
---exec $MYSQL --host=localhost -e "SHOW STATUS LIKE 'Ssl_cipher';" --ssl-cipher=DHE-RSA-AES256-SHA
---exec $MYSQL --host=localhost -e "SHOW STATUS LIKE 'Ssl_cipher';" --ssl-cipher=EDH-RSA-DES-CBC3-SHA
+--exec $MYSQL --host=localhost -e "SHOW STATUS LIKE 'Ssl_cipher';" --ssl-cipher=AES256-SHA
+--exec $MYSQL --host=localhost -e "SHOW STATUS LIKE 'Ssl_cipher';" --ssl-cipher=DES-CBC3-SHA
 --disable_query_log
 --disable_result_log
 
@@ -231,20 +234,7 @@ DROP TABLE t1;
 --exec $MYSQL --host=localhost -e "SHOW STATUS LIKE 'Ssl-cipher';" --ssl-cipher=NOT----EXIST
 # These probably exist but the server's keys can't be used to accept these kinds of connections.
 --error 1,0
---exec $MYSQL --host=localhost -e "SHOW STATUS LIKE 'Ssl-cipher';" --ssl-cipher=DHE-DSS-AES128-RMD
---error 1,0
---exec $MYSQL --host=localhost -e "SHOW STATUS LIKE 'Ssl-cipher';" --ssl-cipher=DHE-DSS-AES128-SHA
---error 1,0
---exec $MYSQL --host=localhost -e "SHOW STATUS LIKE 'Ssl-cipher';" --ssl-cipher=DHE-DSS-AES256-RMD
---error 1,0
---exec $MYSQL --host=localhost -e "SHOW STATUS LIKE 'Ssl-cipher';" --ssl-cipher=DHE-DSS-AES256-SHA
---error 1,0
---exec $MYSQL --host=localhost -e "SHOW STATUS LIKE 'Ssl-cipher';" --ssl-cipher=DHE-DSS-DES-CBC3-RMD
---error 1,0
---exec $MYSQL --host=localhost -e "SHOW STATUS LIKE 'Ssl-cipher';" --ssl-cipher=EDH-DSS-DES-CBC3-SHA
---error 1,0
---exec $MYSQL --host=localhost -e "SHOW STATUS LIKE 'Ssl-cipher';" --ssl-cipher=EDH-DSS-DES-CBC-SHA
-# End of crashers.  ##########################
+--exec $MYSQL --host=localhost -e "SHOW STATUS LIKE 'Ssl-cipher';" --ssl-cipher=AES128-RMD
 
 # If this gives a result, then the bug is fixed.
 --enable_result_log
diff --git a/mysql-test/t/partition_exchange-master.opt b/mysql-test/t/partition_exchange-master.opt
new file mode 100644
index 00000000000..5a0380b7a1d
--- /dev/null
+++ b/mysql-test/t/partition_exchange-master.opt
@@ -0,0 +1 @@
+--loose-innodb_default_row_format=COMPACT
diff --git a/mysql-test/t/partition_innodb-master.opt b/mysql-test/t/partition_innodb-master.opt
new file mode 100644
index 00000000000..cf94b2d7dca
--- /dev/null
+++ b/mysql-test/t/partition_innodb-master.opt
@@ -0,0 +1 @@
+--loose-innodb-large-prefix=OFF
diff --git a/mysql-test/t/partition_innodb_plugin.test b/mysql-test/t/partition_innodb_plugin.test
index 3a3ae257beb..a514736ff42 100644
--- a/mysql-test/t/partition_innodb_plugin.test
+++ b/mysql-test/t/partition_innodb_plugin.test
@@ -3,6 +3,8 @@
 
 let $MYSQLD_DATADIR= `SELECT @@datadir`;
 
+call mtr.add_suppression("InnoDB: Table .* does not exist in the InnoDB internal data dictionary .*");
+
 --echo #
 --echo # Bug#11766879/Bug#60106: DIFF BETWEEN # OF INDEXES IN MYSQL VS INNODB,
 --echo #                         PARTITONING, ON INDEX CREATE
diff --git a/mysql-test/t/row-checksum-master.opt b/mysql-test/t/row-checksum-master.opt
new file mode 100644
index 00000000000..990e4941ae9
--- /dev/null
+++ b/mysql-test/t/row-checksum-master.opt
@@ -0,0 +1 @@
+--loose-innodb-strict-mode=0
diff --git a/mysql-test/t/row-checksum-old-master.opt b/mysql-test/t/row-checksum-old-master.opt
index 8e7b7f9e36f..40027795fff 100644
--- a/mysql-test/t/row-checksum-old-master.opt
+++ b/mysql-test/t/row-checksum-old-master.opt
@@ -1 +1,2 @@
 --old
+--loose-innodb-strict-mode=0
diff --git a/mysql-test/t/row-checksum.opt b/mysql-test/t/row-checksum.opt
new file mode 100644
index 00000000000..977b569a781
--- /dev/null
+++ b/mysql-test/t/row-checksum.opt
@@ -0,0 +1 @@
+--loose-innodb-strict-mode=off
diff --git a/mysql-test/t/ssl.test b/mysql-test/t/ssl.test
index 9a08b273b6b..f2ac288db7a 100644
--- a/mysql-test/t/ssl.test
+++ b/mysql-test/t/ssl.test
@@ -33,8 +33,8 @@ connection default;
 disconnect ssl_con;
 
 create user mysqltest_1@localhost;
-grant usage on mysqltest.* to mysqltest_1@localhost require cipher "EDH-RSA-DES-CBC3-SHA";
---exec $MYSQL -umysqltest_1 --ssl-cipher=EDH-RSA-DES-CBC3-SHA -e "show status like 'ssl_cipher'" 2>&1
+grant usage on mysqltest.* to mysqltest_1@localhost require cipher "AES256-SHA";
+--exec $MYSQL -umysqltest_1 --ssl-cipher=AES256-SHA -e "show status like 'ssl_cipher'" 2>&1
 drop user mysqltest_1@localhost;
 
 # Wait till all disconnects are completed
diff --git a/mysql-test/t/ssl_7937.test b/mysql-test/t/ssl_7937.test
index d593b9d936d..8e9d1901907 100644
--- a/mysql-test/t/ssl_7937.test
+++ b/mysql-test/t/ssl_7937.test
@@ -26,10 +26,10 @@ create procedure have_ssl()
 # we fake the test result for yassl
 let yassl=`select variable_value='Unknown' from information_schema.session_status where variable_name='Ssl_session_cache_mode'`;
 if (!$yassl) {
+  --replace_result "self signed certificate in certificate chain" "Failed to verify the server certificate"
   --exec $MYSQL --ssl --ssl-verify-server-cert -e "call test.have_ssl()" 2>&1
 }
 if ($yassl) {
   --echo ERROR 2026 (HY000): SSL connection error: Failed to verify the server certificate
 }
-
 drop procedure have_ssl;
diff --git a/mysql-test/t/ssl_8k_key-master.opt b/mysql-test/t/ssl_8k_key-master.opt
index 531c0abc9f1..856b33e95ee 100644
--- a/mysql-test/t/ssl_8k_key-master.opt
+++ b/mysql-test/t/ssl_8k_key-master.opt
@@ -1,3 +1,2 @@
 --loose-ssl-key=$MYSQL_TEST_DIR/std_data/server8k-key.pem
 --loose-ssl-cert=$MYSQL_TEST_DIR/std_data/server8k-cert.pem
---loose-ssl-cipher=DHE-RSA-AES256-SHA
diff --git a/mysql-test/t/ssl_8k_key.test b/mysql-test/t/ssl_8k_key.test
index 27cffdce1f2..23267a3c611 100644
--- a/mysql-test/t/ssl_8k_key.test
+++ b/mysql-test/t/ssl_8k_key.test
@@ -5,7 +5,7 @@
 #
 # Bug#29784 YaSSL assertion failure when reading 8k key.
 #
---exec $MYSQL --ssl --ssl-key=$MYSQL_TEST_DIR/std_data/client-key.pem --ssl-cert=$MYSQL_TEST_DIR/std_data/client-cert.pem -e "SHOW STATUS LIKE 'ssl_Cipher'" 2>&1
+--exec $MYSQL --connect-timeout=180 --ssl --ssl-key=$MYSQL_TEST_DIR/std_data/client-key.pem --ssl-cert=$MYSQL_TEST_DIR/std_data/client-cert.pem -e "SELECT (VARIABLE_VALUE <> '') as have_ssl FROM INFORMATION_SCHEMA.SESSION_STATUS WHERE VARIABLE_NAME='Ssl_cipher'" 2>&1
 
 ##  This test file is for testing encrypted communication only, not other
 ##  encryption routines that the SSL library happens to provide!
diff --git a/mysql-test/t/ssl_ca.test b/mysql-test/t/ssl_ca.test
index 8d830a75879..5870d9598fc 100644
--- a/mysql-test/t/ssl_ca.test
+++ b/mysql-test/t/ssl_ca.test
@@ -6,11 +6,14 @@
 --echo #
 
 --echo # try to connect with wrong '--ssl-ca' path : should fail
+
+--replace_regex /SSL connection error.*/SSL connection error: xxxx/
 --error 1
---exec $MYSQL --ssl-ca=$MYSQL_TEST_DIR/std_data/wrong-cacert.pem --ssl-key=$MYSQL_TEST_DIR/std_data/client-key.pem --ssl-cert=$MYSQL_TEST_DIR/std_data/client-cert.pem test -e "SELECT (VARIABLE_VALUE <> '') AS have_ssl FROM INFORMATION_SCHEMA.SESSION_STATUS WHERE VARIABLE_NAME='Ssl_cipher'" 2>&1
+--exec $MYSQL --ssl-ca=$MYSQL_TEST_DIR/std_data/wrong-cacert.pem --ssl-key=$MYSQL_TEST_DIR/std_data/client-key.pem --ssl-cert=$MYSQL_TEST_DIR/std_data/client-cert.pem test -e "SELECT (VARIABLE_VALUE <> '') AS have_ssl FROM INFORMATION_SCHEMA.SESSION_STATUS WHERE VARIABLE_NAME='Ssl_cipher';" 2>&1
+--echo
 
 --echo # try to connect with correct '--ssl-ca' path : should connect
---exec $MYSQL --ssl-ca=$MYSQL_TEST_DIR/std_data/cacert.pem --ssl-key=$MYSQL_TEST_DIR/std_data/client-key.pem --ssl-cert=$MYSQL_TEST_DIR/std_data/client-cert.pem test -e "SELECT (VARIABLE_VALUE <> '') AS have_ssl FROM INFORMATION_SCHEMA.SESSION_STATUS WHERE VARIABLE_NAME='Ssl_cipher'"
+--exec $MYSQL --ssl-ca=$MYSQL_TEST_DIR/std_data/cacert.pem --ssl-key=$MYSQL_TEST_DIR/std_data/client-key.pem --ssl-cert=$MYSQL_TEST_DIR/std_data/client-cert.pem test -e "SELECT (VARIABLE_VALUE <> '') AS have_ssl FROM INFORMATION_SCHEMA.SESSION_STATUS WHERE VARIABLE_NAME='Ssl_cipher';"
 
 --echo #
 --echo # Bug#21920678: SSL-CA DOES NOT ACCEPT ~USER TILDE HOME DIRECTORY
@@ -21,12 +24,12 @@
 
 --echo # try to connect with '--ssl-ca' option using tilde home directoy
 --echo # path substitution : should connect
---exec $MYSQL --ssl-ca$mysql_test_dir_path/std_data/cacert.pem --ssl-key=$MYSQL_TEST_DIR/std_data/client-key.pem --ssl-cert=$MYSQL_TEST_DIR/std_data/client-cert.pem test -e "SELECT (VARIABLE_VALUE <> '') AS have_ssl FROM INFORMATION_SCHEMA.SESSION_STATUS WHERE VARIABLE_NAME='Ssl_cipher'"
+--exec $MYSQL --ssl-ca$mysql_test_dir_path/std_data/cacert.pem --ssl-key=$MYSQL_TEST_DIR/std_data/client-key.pem --ssl-cert=$MYSQL_TEST_DIR/std_data/client-cert.pem test -e "SELECT (VARIABLE_VALUE <> '') AS have_ssl FROM INFORMATION_SCHEMA.SESSION_STATUS WHERE VARIABLE_NAME='Ssl_cipher';"
 
 --echo # try to connect with '--ssl-key' option using tilde home directoy
 --echo # path substitution : should connect
---exec $MYSQL --ssl-ca=$MYSQL_TEST_DIR/std_data/cacert.pem --ssl-key$mysql_test_dir_path/std_data/client-key.pem --ssl-cert=$MYSQL_TEST_DIR/std_data/client-cert.pem test -e "SELECT (VARIABLE_VALUE <> '') AS have_ssl FROM INFORMATION_SCHEMA.SESSION_STATUS WHERE VARIABLE_NAME='Ssl_cipher'"
+--exec $MYSQL --ssl-ca=$MYSQL_TEST_DIR/std_data/cacert.pem --ssl-key$mysql_test_dir_path/std_data/client-key.pem --ssl-cert=$MYSQL_TEST_DIR/std_data/client-cert.pem test -e "SELECT (VARIABLE_VALUE <> '') AS have_ssl FROM INFORMATION_SCHEMA.SESSION_STATUS WHERE VARIABLE_NAME='Ssl_cipher';"
 
 --echo # try to connect with '--ssl-cert' option using tilde home directoy
 --echo # path substitution : should connect
---exec $MYSQL --ssl-ca=$MYSQL_TEST_DIR/std_data/cacert.pem --ssl-key=$MYSQL_TEST_DIR/std_data/client-key.pem --ssl-cert$mysql_test_dir_path/std_data/client-cert.pem test -e "SELECT (VARIABLE_VALUE <> '') AS have_ssl FROM INFORMATION_SCHEMA.SESSION_STATUS WHERE VARIABLE_NAME='Ssl_cipher'"
+--exec $MYSQL --ssl-ca=$MYSQL_TEST_DIR/std_data/cacert.pem --ssl-key=$MYSQL_TEST_DIR/std_data/client-key.pem --ssl-cert$mysql_test_dir_path/std_data/client-cert.pem test -e "SELECT (VARIABLE_VALUE <> '') AS have_ssl FROM INFORMATION_SCHEMA.SESSION_STATUS WHERE VARIABLE_NAME='Ssl_cipher';"
diff --git a/mysql-test/t/userstat.test b/mysql-test/t/userstat.test
index cb1250a13ea..9ce3a32c442 100644
--- a/mysql-test/t/userstat.test
+++ b/mysql-test/t/userstat.test
@@ -35,8 +35,7 @@ drop table t1;
 
 # test SSL connections
 --connect (ssl_con,localhost,root,,,,,SSL)
---replace_result DHE-RSA-AES256-GCM-SHA384 DHE-RSA-AES256-SHA
-SHOW STATUS LIKE 'Ssl_cipher';
+SELECT (VARIABLE_VALUE <> '') AS have_ssl FROM INFORMATION_SCHEMA.SESSION_STATUS WHERE VARIABLE_NAME='Ssl_cipher';
 --connection default
 
 #
diff --git a/mysql-test/t/win.test b/mysql-test/t/win.test
index 2ede130156c..de03dd10253 100644
--- a/mysql-test/t/win.test
+++ b/mysql-test/t/win.test
@@ -1230,3 +1230,35 @@ SELECT o_custkey, avg(o_custkey) OVER (PARTITION BY abs(o_custkey)
                                        RANGE BETWEEN 15 FOLLOWING
                                                  AND 15 FOLLOWING) from orders;
 DROP table orders;
+
+--echo #
+--echo # MDEV-10842: window functions with the same order column 
+--echo #             but different directions
+--echo #
+
+create table t1 (
+  pk int primary key,
+  a int,
+  b int,
+  c char(10)
+);
+
+insert into t1 values
+( 1, 0, 1, 'one'),
+( 2, 0, 2, 'two'),
+( 3, 0, 3, 'three'),
+( 4, 1, 1, 'one'),
+( 5, 1, 1, 'two'),
+( 6, 1, 2, 'three'),
+( 7, 2, NULL, 'n_one'),
+( 8, 2, 1,    'n_two'),
+( 9, 2, 2,    'n_three'),
+(10, 2, 0,    'n_four'),
+(11, 2, 10,   NULL);
+
+select pk,
+      row_number() over (order by pk desc) as r_desc,
+      row_number() over (order by pk asc) as r_asc
+from t1;
+
+drop table t1;
diff --git a/mysys/CMakeLists.txt b/mysys/CMakeLists.txt
index 5f4e9156fd2..892928ca69b 100644
--- a/mysys/CMakeLists.txt
+++ b/mysys/CMakeLists.txt
@@ -16,6 +16,7 @@
 INCLUDE_DIRECTORIES(${ZLIB_INCLUDE_DIR} ${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/mysys)
 
 SET(MYSYS_SOURCES  array.c charset-def.c charset.c checksum.c my_default.c
+                get_password.c
 				errors.c hash.c list.c
                                 mf_cache.c mf_dirname.c mf_fn_ext.c
 				mf_format.c mf_getdate.c mf_iocache.c mf_iocache2.c mf_keycache.c 
@@ -39,7 +40,7 @@ SET(MYSYS_SOURCES  array.c charset-def.c charset.c checksum.c my_default.c
 				lf_alloc-pin.c lf_dynarray.c lf_hash.c
                                 safemalloc.c my_new.cc
 				my_atomic.c my_getncpus.c my_safehash.c my_chmod.c my_rnd.c
-                                my_uuid.c wqueue.c waiting_threads.c ma_dyncol.c
+                                my_uuid.c wqueue.c waiting_threads.c ma_dyncol.c ../sql-common/my_time.c
 				my_rdtsc.c my_context.c psi_noop.c
                                 file_logger.c)
 
diff --git a/mysys/base64.c b/mysys/base64.c
index 265b2f22aad..67a9a13120b 100644
--- a/mysys/base64.c
+++ b/mysys/base64.c
@@ -26,22 +26,22 @@ static char base64_table[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
                              "0123456789+/";
 
 /**
- * Maximum length base64_needed_encoded_length()
+ * Maximum length my_base64_needed_encoded_length()
  * can handle without signed integer overflow.
  */
 int
-base64_encode_max_arg_length()
+my_base64_encode_max_arg_length()
 {
   /*
-    base64_needed_encoded_length(1589695686) ->  2147483646 (7FFFFFFE)
-    base64_needed_encoded_length(1589695687) -> -2147483645
+    my_base64_needed_encoded_length(1589695686) ->  2147483646 (7FFFFFFE)
+    my_base64_needed_encoded_length(1589695687) -> -2147483645
   */
   return 0x5EC0D4C6; /* 1589695686 */
 }
 
 
 int
-base64_needed_encoded_length(int length_of_data)
+my_base64_needed_encoded_length(int length_of_data)
 {
   int nb_base64_chars;
   nb_base64_chars= (length_of_data + 2) / 3 * 4;
@@ -54,17 +54,17 @@ base64_needed_encoded_length(int length_of_data)
 
 
 /**
- * Maximum length supported by base64_decode().
+ * Maximum length supported by my_base64_decode().
  */
 int
-base64_decode_max_arg_length()
+my_base64_decode_max_arg_length()
 {
   return 0x7FFFFFFF;
 }
 
 
 int
-base64_needed_decoded_length(int length_of_encoded_data)
+my_base64_needed_decoded_length(int length_of_encoded_data)
 {
   return (int) ((longlong) length_of_encoded_data + 3) / 4 * 3;
 }
@@ -74,7 +74,7 @@ base64_needed_decoded_length(int length_of_encoded_data)
   Encode a data as base64.
 
   Note: We require that dst is pre-allocated to correct size.
-        See base64_needed_encoded_length().
+        See my_base64_needed_encoded_length().
 
   Note: We add line separators every 76 characters.
   
@@ -83,7 +83,7 @@ base64_needed_decoded_length(int length_of_encoded_data)
 */
 
 int
-base64_encode(const void *src, size_t src_len, char *dst)
+my_base64_encode(const void *src, size_t src_len, char *dst)
 {
   const unsigned char *s= (const unsigned char*)src;
   size_t i= 0;
@@ -299,7 +299,7 @@ my_base64_decoder_getch(MY_BASE64_DECODER *decoder)
  * the last read character, even in the presence of error.
  *
  * Note: 'dst' must have sufficient space to store the decoded data.
- * Use base64_needed_decoded_length() to calculate the correct space size.
+ * Use my_base64_needed_decoded_length() to calculate the correct space size.
  *
  * Note: we allow spaces and line separators at any position.
  *
@@ -313,7 +313,7 @@ my_base64_decoder_getch(MY_BASE64_DECODER *decoder)
  * @return Number of bytes written at 'dst', or -1 in case of failure
  */
 int
-base64_decode(const char *src_base, size_t len,
+my_base64_decode(const char *src_base, size_t len,
               void *dst, const char **end_ptr, int flags)
 {
   char *d= (char*) dst;
@@ -397,18 +397,18 @@ main(void)
     }
 
     /* Encode */
-    needed_length= base64_needed_encoded_length(src_len);
+    needed_length= my_base64_needed_encoded_length(src_len);
     str= (char *) malloc(needed_length);
     require(str);
     for (k= 0; k < needed_length; k++)
       str[k]= 0xff; /* Fill memory to check correct NUL termination */
-    require(base64_encode(src, src_len, str) == 0);
+    require(my_base64_encode(src, src_len, str) == 0);
     require(needed_length == strlen(str) + 1);
 
     /* Decode */
-    dst= (char *) malloc(base64_needed_decoded_length(strlen(str)));
+    dst= (char *) malloc(my_base64_needed_decoded_length(strlen(str)));
     require(dst);
-    dst_len= base64_decode(str, strlen(str), dst, NULL);
+    dst_len= my_base64_decode(str, strlen(str), dst, NULL);
     require(dst_len == src_len);
 
     if (memcmp(src, dst, src_len) != 0)
diff --git a/client/get_password.c b/mysys/get_password.c
similarity index 100%
rename from client/get_password.c
rename to mysys/get_password.c
diff --git a/mysys/my_static.c b/mysys/my_static.c
index 4aca78e30a9..ce9e8831be6 100644
--- a/mysys/my_static.c
+++ b/mysys/my_static.c
@@ -98,3 +98,10 @@ my_bool my_disable_sync=0;
 my_bool my_disable_async_io=0;
 my_bool my_disable_flush_key_blocks=0;
 my_bool my_disable_symlinks=0;
+
+/* Typelib by all clients */
+const char *sql_protocol_names_lib[] =
+{ "TCP", "SOCKET", "PIPE", "MEMORY", NullS };
+
+TYPELIB sql_protocol_typelib ={ array_elements(sql_protocol_names_lib) - 1, "",
+                                sql_protocol_names_lib, NULL };
diff --git a/plugin/auth_dialog/CMakeLists.txt b/plugin/auth_dialog/CMakeLists.txt
index 7253b2b2f97..cdc961bb390 100644
--- a/plugin/auth_dialog/CMakeLists.txt
+++ b/plugin/auth_dialog/CMakeLists.txt
@@ -14,5 +14,7 @@
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
 
-MYSQL_ADD_PLUGIN(dialog dialog.c ${CMAKE_SOURCE_DIR}/libmysql/get_password.c
-  MODULE_ONLY CLIENT COMPONENT ClientPlugins)
+# disabled in favor of libmariadb/plugins/auth/dialog.c
+#
+#MYSQL_ADD_PLUGIN(dialog dialog.c ${CMAKE_SOURCE_DIR}/libmysql/get_password.c
+#  MODULE_ONLY CLIENT COMPONENT ClientPlugins)
diff --git a/plugin/auth_examples/CMakeLists.txt b/plugin/auth_examples/CMakeLists.txt
index d1152227eff..b5859f158d6 100644
--- a/plugin/auth_examples/CMakeLists.txt
+++ b/plugin/auth_examples/CMakeLists.txt
@@ -29,5 +29,8 @@ MYSQL_ADD_PLUGIN(qa_auth_client qa_auth_client.c
 
 MYSQL_ADD_PLUGIN(auth_0x0100 auth_0x0100.c MODULE_ONLY COMPONENT Test)
 
-MYSQL_ADD_PLUGIN(mysql_clear_password clear_password_client.c
-  MODULE_ONLY CLIENT COMPONENT ClientPlugins)
+# disabled in favor of
+# libmariadb/plugins/auth/mariadb_cleartext.c
+#
+#MYSQL_ADD_PLUGIN(mysql_clear_password clear_password_client.c
+#  MODULE_ONLY CLIENT COMPONENT ClientPlugins)
diff --git a/plugin/auth_gssapi/CMakeLists.txt b/plugin/auth_gssapi/CMakeLists.txt
index 7d9e58e165f..d2f854c2918 100644
--- a/plugin/auth_gssapi/CMakeLists.txt
+++ b/plugin/auth_gssapi/CMakeLists.txt
@@ -38,8 +38,10 @@ MYSQL_ADD_PLUGIN(auth_gssapi server_plugin.cc ${GSSAPI_SERVER} ${GSSAPI_ERRMSG}
                  COMPONENT gssapi-server
                  MODULE_ONLY)
 
-MYSQL_ADD_PLUGIN(auth_gssapi_client client_plugin.cc ${GSSAPI_CLIENT} ${GSSAPI_ERRMSG}
-                 LINK_LIBRARIES ${GSSAPI_LIBS}
-                 COMPONENT gssapi-client
-                 CLIENT
-                 MODULE_ONLY)
+# disabled in favor of libmariadb/plugins/auth/auth_gssapi_client.c
+#
+#MYSQL_ADD_PLUGIN(auth_gssapi_client client_plugin.cc ${GSSAPI_CLIENT} ${GSSAPI_ERRMSG}
+#                 LINK_LIBRARIES ${GSSAPI_LIBS COMPONENT ClientPlugins}
+#                 COMPONENT gssapi-client
+#                 CLIENT
+#                 MODULE_ONLY)
diff --git a/plugin/feedback/utils.cc b/plugin/feedback/utils.cc
index b83b69be0ce..327db69feda 100644
--- a/plugin/feedback/utils.cc
+++ b/plugin/feedback/utils.cc
@@ -422,8 +422,8 @@ int calculate_server_uid(char *dest)
 
   compute_sha1_hash((uint8*) shabuf, (char*) rawbuf, sizeof(rawbuf));
 
-  assert(base64_needed_encoded_length(sizeof(shabuf)) <= SERVER_UID_SIZE);
-  base64_encode(shabuf, sizeof(shabuf), dest);
+  assert(my_base64_needed_encoded_length(sizeof(shabuf)) <= SERVER_UID_SIZE);
+  my_base64_encode(shabuf, sizeof(shabuf), dest);
 
   return 0;
 }
diff --git a/plugin/file_key_management/parser.cc b/plugin/file_key_management/parser.cc
index 628412bc171..b224391264c 100644
--- a/plugin/file_key_management/parser.cc
+++ b/plugin/file_key_management/parser.cc
@@ -333,7 +333,8 @@ char* Parser::read_and_decrypt_file(const char *secret)
 
 // Check for file encryption
   uchar *decrypted;
-  if (is_prefix((char*)buffer, OpenSSL_prefix))
+  if (file_size > OpenSSL_prefix_len &&
+      is_prefix((char*)buffer, OpenSSL_prefix))
   {
     uchar key[OpenSSL_key_len];
     uchar iv[OpenSSL_iv_len];
diff --git a/scripts/mysqld_safe.sh b/scripts/mysqld_safe.sh
index 07bca1c0318..e37f512b0f3 100644
--- a/scripts/mysqld_safe.sh
+++ b/scripts/mysqld_safe.sh
@@ -550,10 +550,6 @@ fi
 if test -d $MY_BASEDIR_VERSION/data/mysql
 then
   DATADIR=$MY_BASEDIR_VERSION/data
-  if test -z "$defaults" -a -r "$DATADIR/my.cnf"
-  then
-    defaults="--defaults-extra-file=$DATADIR/my.cnf"
-  fi
 # Next try where the source installs put it
 elif test -d $MY_BASEDIR_VERSION/var/mysql
 then
@@ -565,23 +561,13 @@ fi
 
 if test -z "$MYSQL_HOME"
 then 
-  if test -r "$MY_BASEDIR_VERSION/my.cnf" && test -r "$DATADIR/my.cnf"
-  then
-    log_error "WARNING: Found two instances of my.cnf -
-$MY_BASEDIR_VERSION/my.cnf and
-$DATADIR/my.cnf
-IGNORING $DATADIR/my.cnf"
-
-    MYSQL_HOME=$MY_BASEDIR_VERSION
-  elif test -r "$DATADIR/my.cnf"
+  if test -r "$DATADIR/my.cnf"
   then
     log_error "WARNING: Found $DATADIR/my.cnf
-The data directory is a deprecated location for my.cnf, please move it to
+The data directory is not a valid location for my.cnf, please move it to
 $MY_BASEDIR_VERSION/my.cnf"
-    MYSQL_HOME=$DATADIR
-  else
-    MYSQL_HOME=$MY_BASEDIR_VERSION
   fi
+  MYSQL_HOME=$MY_BASEDIR_VERSION
 fi
 export MYSQL_HOME
 
diff --git a/sql-common/client.c b/sql-common/client.c
index c900ec6800f..858e9ec4b5b 100644
--- a/sql-common/client.c
+++ b/sql-common/client.c
@@ -1005,11 +1005,6 @@ enum option_id {
 static TYPELIB option_types={array_elements(default_options)-1,
 			     "options",default_options, NULL};
 
-const char *sql_protocol_names_lib[] =
-{ "TCP", "SOCKET", "PIPE", "MEMORY", NullS };
-TYPELIB sql_protocol_typelib = {array_elements(sql_protocol_names_lib)-1,"",
-				sql_protocol_names_lib, NULL};
-
 static int add_init_command(struct st_mysql_options *options, const char *cmd)
 {
   char *tmp;
@@ -4776,3 +4771,11 @@ mysql_get_socket(const MYSQL *mysql)
     return vio_fd(mysql->net.vio);
   return INVALID_SOCKET;
 }
+
+
+int STDCALL mysql_cancel(MYSQL *mysql)
+{
+  if (mysql->net.vio)
+	return vio_shutdown(mysql->net.vio, SHUT_RDWR);
+  return -1;
+}
diff --git a/libmysql/conf_to_src.c b/sql-common/conf_to_src.c
similarity index 100%
rename from libmysql/conf_to_src.c
rename to sql-common/conf_to_src.c
diff --git a/libmysql/errmsg.c b/sql-common/errmsg.c
similarity index 100%
rename from libmysql/errmsg.c
rename to sql-common/errmsg.c
diff --git a/sql-common/my_time.c b/sql-common/my_time.c
index 7cf8692a3f6..88f28e1d44a 100644
--- a/sql-common/my_time.c
+++ b/sql-common/my_time.c
@@ -20,7 +20,7 @@
 #include <m_ctype.h>
 /* Windows version of localtime_r() is declared in my_ptrhead.h */
 #include <my_pthread.h>
-#include <mysqld_error.h>
+
 
 ulonglong log_10_int[20]=
 {
@@ -777,7 +777,6 @@ long calc_daynr(uint year,uint month,uint day)
   DBUG_RETURN(delsum+(int) y/4-temp);
 } /* calc_daynr */
 
-
 /*
   Convert time in MYSQL_TIME representation in system time zone to its
   my_time_t form (number of seconds in UTC since begginning of Unix Epoch).
diff --git a/sql-common/pack.c b/sql-common/pack.c
index 5428feb623e..da95172c92d 100644
--- a/sql-common/pack.c
+++ b/sql-common/pack.c
@@ -19,7 +19,7 @@
 #include <mysql.h>
 
 /* Get the length of next field. Change parameter to point at fieldstart */
-ulong STDCALL net_field_length(uchar **packet)
+ulong net_field_length(uchar **packet)
 {
   reg1 uchar *pos= (uchar *)*packet;
   if (*pos < 251)
diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt
index a18294e5ae3..28072375bbc 100644
--- a/sql/CMakeLists.txt
+++ b/sql/CMakeLists.txt
@@ -79,7 +79,7 @@ ENDIF()
 
 SET (SQL_SOURCE
               ../sql-common/client.c compat56.cc derror.cc des_key_file.cc
-               discover.cc ../libmysql/errmsg.c field.cc  field_conv.cc 
+               discover.cc ../sql-common/errmsg.c field.cc field_conv.cc
                filesort_utils.cc
                filesort.cc gstream.cc sha2.cc
                signal_handler.cc
@@ -90,7 +90,7 @@ SET (SQL_SOURCE
                key.cc log.cc lock.cc
                log_event.cc rpl_record.cc rpl_reporting.cc
                log_event_old.cc rpl_record_old.cc
-               message.h mf_iocache.cc my_decimal.cc ../sql-common/my_time.c
+               message.h mf_iocache.cc my_decimal.cc
                mysqld.cc net_serv.cc  keycaches.cc
                ../sql-common/client_plugin.c
                opt_range.cc opt_range.h opt_sum.cc 
@@ -157,9 +157,9 @@ IF (CMAKE_SYSTEM_NAME MATCHES "Linux" OR
  ADD_DEFINITIONS(-DHAVE_POOL_OF_THREADS)
  IF(WIN32)
    SET(SQL_SOURCE ${SQL_SOURCE} threadpool_win.cc)
- ELSE()
-   SET(SQL_SOURCE ${SQL_SOURCE} threadpool_unix.cc)
  ENDIF()
+ SET(SQL_SOURCE ${SQL_SOURCE} threadpool_generic.cc)
+
 ENDIF()
 
 MYSQL_ADD_PLUGIN(partition ha_partition.cc STORAGE_ENGINE DEFAULT STATIC_ONLY
@@ -322,12 +322,6 @@ IF(WIN32 OR HAVE_DLOPEN AND NOT DISABLE_SHARED)
   ENDIF()
 ENDIF()
 
-FOREACH(tool gtar tar git)
- STRING(TOUPPER ${tool}  TOOL)
- FIND_PROGRAM(${TOOL}_EXECUTABLE ${tool} DOC "path to the executable")
- MARK_AS_ADVANCED(${TOOL}_EXECUTABLE)
-ENDFOREACH()
-
 CONFIGURE_FILE(
   ${CMAKE_SOURCE_DIR}/cmake/make_dist.cmake.in
   ${CMAKE_BINARY_DIR}/make_dist.cmake @ONLY)
diff --git a/sql/handler.cc b/sql/handler.cc
index 589106dfe93..d7481f8e8ea 100644
--- a/sql/handler.cc
+++ b/sql/handler.cc
@@ -359,7 +359,7 @@ int ha_init_errors(void)
   SETMSG(HA_ERR_NO_CONNECTION,          "Could not connect to storage engine");
   SETMSG(HA_ERR_TABLE_DEF_CHANGED,      ER_DEFAULT(ER_TABLE_DEF_CHANGED));
   SETMSG(HA_ERR_FOREIGN_DUPLICATE_KEY,  "FK constraint would lead to duplicate key");
-  SETMSG(HA_ERR_TABLE_NEEDS_UPGRADE,    "Table upgrade required. Please do \"REPAIR TABLE %`\" or dump/reload to fix it");
+  SETMSG(HA_ERR_TABLE_NEEDS_UPGRADE,    ER_DEFAULT(ER_TABLE_NEEDS_UPGRADE));
   SETMSG(HA_ERR_TABLE_READONLY,         ER_DEFAULT(ER_OPEN_AS_READONLY));
   SETMSG(HA_ERR_AUTOINC_READ_FAILED,    ER_DEFAULT(ER_AUTOINC_READ_FAILED));
   SETMSG(HA_ERR_AUTOINC_ERANGE,         ER_DEFAULT(ER_WARN_DATA_OUT_OF_RANGE));
@@ -370,6 +370,8 @@ int ha_init_errors(void)
   SETMSG(HA_ERR_TABLE_IN_FK_CHECK,	ER_DEFAULT(ER_TABLE_IN_FK_CHECK));
   SETMSG(HA_ERR_DISK_FULL,              ER_DEFAULT(ER_DISK_FULL));
   SETMSG(HA_ERR_FTS_TOO_MANY_WORDS_IN_PHRASE,  "Too many words in a FTS phrase or proximity search");
+  SETMSG(HA_ERR_FK_DEPTH_EXCEEDED,      "Foreign key cascade delete/update exceeds");
+  SETMSG(HA_ERR_TABLESPACE_MISSING,     ER_DEFAULT(ER_TABLESPACE_MISSING));
 
   /* Register the error messages for use with my_error(). */
   return my_error_register(get_handler_errmsgs, HA_ERR_FIRST, HA_ERR_LAST);
@@ -3535,9 +3537,10 @@ void handler::print_error(int error, myf errflag)
     DBUG_VOID_RETURN;
   }
   case HA_ERR_TABLE_NEEDS_UPGRADE:
+    textno= ER_TABLE_NEEDS_UPGRADE;
     my_error(ER_TABLE_NEEDS_UPGRADE, errflag,
              "TABLE", table_share->table_name.str);
-    break;
+    DBUG_VOID_RETURN;
   case HA_ERR_NO_PARTITION_FOUND:
     textno=ER_WRONG_PARTITION_NAME;
     break;
@@ -5819,8 +5822,6 @@ int handler::ha_external_lock(THD *thd, int lock_type)
     }
   }
 
-  ha_statistic_increment(&SSV::ha_external_lock_count);
-
   /*
     We cache the table flags if the locking succeeded. Otherwise, we
     keep them as they were when they were fetched in ha_open().
diff --git a/sql/handler.h b/sql/handler.h
index ef0c2f78915..fbb620c696c 100644
--- a/sql/handler.h
+++ b/sql/handler.h
@@ -573,7 +573,7 @@ struct xid_t {
   long bqual_length;
   char data[XIDDATASIZE];  // not \0-terminated !
 
-  xid_t() {}                                /* Remove gcc warning */  
+  xid_t() {}                                /* Remove gcc warning */
   bool eq(struct xid_t *xid)
   { return !xid->is_null() && eq(xid->gtrid_length, xid->bqual_length, xid->data); }
   bool eq(long g, long b, const char *d)
@@ -3885,9 +3885,6 @@ public:
   TABLE* get_table() { return table; }
   TABLE_SHARE* get_table_share() { return table_share; }
 protected:
-  /* deprecated, don't use in new engines */
-  inline void ha_statistic_increment(ulong SSV::*offset) const { }
-
   /* Service methods for use by storage engines. */
   void **ha_data(THD *) const;
   THD *ha_thd(void) const;
diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc
index bff31ec7b26..acd3d74c12b 100644
--- a/sql/item_strfunc.cc
+++ b/sql/item_strfunc.cc
@@ -427,14 +427,14 @@ void Item_func_to_base64::fix_length_and_dec()
 {
   maybe_null= args[0]->maybe_null;
   collation.set(default_charset(), DERIVATION_COERCIBLE, MY_REPERTOIRE_ASCII);
-  if (args[0]->max_length > (uint) base64_encode_max_arg_length())
+  if (args[0]->max_length > (uint) my_base64_encode_max_arg_length())
   {
     maybe_null= 1;
-    fix_char_length_ulonglong((ulonglong) base64_encode_max_arg_length());
+    fix_char_length_ulonglong((ulonglong) my_base64_encode_max_arg_length());
   }
   else
   {
-    int length= base64_needed_encoded_length((int) args[0]->max_length);
+    int length= my_base64_needed_encoded_length((int) args[0]->max_length);
     DBUG_ASSERT(length > 0);
     fix_char_length_ulonglong((ulonglong) length - 1);
   }
@@ -447,9 +447,9 @@ String *Item_func_to_base64::val_str_ascii(String *str)
   bool too_long= false;
   int length;
   if (!res ||
-      res->length() > (uint) base64_encode_max_arg_length() ||
+      res->length() > (uint) my_base64_encode_max_arg_length() ||
       (too_long=
-       ((uint) (length= base64_needed_encoded_length((int) res->length())) >
+       ((uint) (length= my_base64_needed_encoded_length((int) res->length())) >
         current_thd->variables.max_allowed_packet)) ||
       tmp_value.alloc((uint) length))
   {
@@ -465,7 +465,7 @@ String *Item_func_to_base64::val_str_ascii(String *str)
     }
     return 0;
   }
-  base64_encode(res->ptr(), (int) res->length(), (char*) tmp_value.ptr());
+  my_base64_encode(res->ptr(), (int) res->length(), (char*) tmp_value.ptr());
   DBUG_ASSERT(length > 0);
   tmp_value.length((uint) length - 1); // Without trailing '\0'
   null_value= 0;
@@ -475,13 +475,13 @@ String *Item_func_to_base64::val_str_ascii(String *str)
 
 void Item_func_from_base64::fix_length_and_dec()
 {
-  if (args[0]->max_length > (uint) base64_decode_max_arg_length())
+  if (args[0]->max_length > (uint) my_base64_decode_max_arg_length())
   {
-    fix_char_length_ulonglong((ulonglong) base64_decode_max_arg_length());
+    fix_char_length_ulonglong((ulonglong) my_base64_decode_max_arg_length());
   }
   else
   {
-    int length= base64_needed_decoded_length((int) args[0]->max_length);
+    int length= my_base64_needed_decoded_length((int) args[0]->max_length);
     fix_char_length_ulonglong((ulonglong) length);
   }
   maybe_null= 1; // Can be NULL, e.g. in case of badly formed input string
@@ -497,8 +497,8 @@ String *Item_func_from_base64::val_str(String *str)
   if (!res)
     goto err;
 
-  if (res->length() > (uint) base64_decode_max_arg_length() ||
-      ((uint) (length= base64_needed_decoded_length((int) res->length())) >
+  if (res->length() > (uint) my_base64_decode_max_arg_length() ||
+      ((uint) (length= my_base64_needed_decoded_length((int) res->length())) >
        current_thd->variables.max_allowed_packet))
   {
     THD *thd= current_thd;
@@ -513,7 +513,7 @@ String *Item_func_from_base64::val_str(String *str)
   if (tmp_value.alloc((uint) length))
     goto err;
 
-  if ((length= base64_decode(res->ptr(), (int) res->length(),
+  if ((length= my_base64_decode(res->ptr(), (int) res->length(),
                              (char *) tmp_value.ptr(), &end_ptr, 0)) < 0 ||
       end_ptr < res->ptr() + res->length())
   {
@@ -572,7 +572,7 @@ String *Item_func_decode_histogram::val_str(String *str)
   uint i;
   str->length(0);
   char numbuf[32];
-  const uchar *p= (uchar*)res->c_ptr();
+  const uchar *p= (uchar*)res->c_ptr_safe();
   for (i= 0; i < res->length(); i++)
   {
     double val;
diff --git a/sql/log_event.cc b/sql/log_event.cc
index 9515e5c04a7..6ad9fcebc78 100644
--- a/sql/log_event.cc
+++ b/sql/log_event.cc
@@ -1354,7 +1354,7 @@ int Log_event::read_log_event(IO_CACHE* file, String* packet,
   ulong data_len;
   char buf[LOG_EVENT_MINIMAL_HEADER_LEN];
   uchar ev_offset= packet->length();
-#ifndef max_allowed_packet
+#if !defined(MYSQL_CLIENT)
   THD *thd=current_thd;
   ulong max_allowed_packet= thd ? thd->slave_thread ? slave_max_allowed_packet
                                                     : thd->variables.max_allowed_packet
@@ -2771,7 +2771,7 @@ void Log_event::print_base64(IO_CACHE* file,
   uint32 size= uint4korr(ptr + EVENT_LEN_OFFSET);
   DBUG_ENTER("Log_event::print_base64");
 
-  size_t const tmp_str_sz= base64_needed_encoded_length((int) size);
+  size_t const tmp_str_sz= my_base64_needed_encoded_length((int) size);
   char *const tmp_str= (char *) my_malloc(tmp_str_sz, MYF(MY_WME));
   if (!tmp_str) {
     fprintf(stderr, "\nError: Out of memory. "
@@ -2779,7 +2779,7 @@ void Log_event::print_base64(IO_CACHE* file,
     DBUG_VOID_RETURN;
   }
 
-  if (base64_encode(ptr, (size_t) size, tmp_str))
+  if (my_base64_encode(ptr, (size_t) size, tmp_str))
   {
     DBUG_ASSERT(0);
   }
diff --git a/sql/mysqld.cc b/sql/mysqld.cc
index 18d8d807d90..cf9e99b54a6 100644
--- a/sql/mysqld.cc
+++ b/sql/mysqld.cc
@@ -4425,7 +4425,7 @@ static int init_common_variables()
 #endif /* HAVE_SOLARIS_LARGE_PAGES */
 
 
-#if defined(HAVE_POOL_OF_THREADS) && !defined(_WIN32)
+#if defined(HAVE_POOL_OF_THREADS)
   if (IS_SYSVAR_AUTOSIZE(&threadpool_size))
     SYSVAR_AUTOSIZE(threadpool_size, my_getncpus());
 #endif
@@ -6464,7 +6464,7 @@ static void create_new_thread(CONNECT *connect)
     mysql_mutex_unlock(&LOCK_connection_count);
     statistic_increment(denied_connections, &LOCK_status);
     statistic_increment(connection_errors_max_connection, &LOCK_status);
-    connect->close_with_error(0, NullS, ER_CON_COUNT_ERROR);
+    connect->close_with_error(0, NullS, abort_loop ? ER_SERVER_SHUTDOWN : ER_CON_COUNT_ERROR);
     DBUG_VOID_RETURN;
   }
 
diff --git a/sql/share/errmsg-utf8.txt b/sql/share/errmsg-utf8.txt
index 4efc1bdcc43..4d3861b2936 100644
--- a/sql/share/errmsg-utf8.txt
+++ b/sql/share/errmsg-utf8.txt
@@ -748,7 +748,7 @@ ER_NOT_KEYFILE
         cze "Nesprávný klíč pro tabulku '%-.200s'; pokuste se ho opravit"
         dan "Fejl i indeksfilen til tabellen '%-.200s'; prøv at reparere den"
         nla "Verkeerde zoeksleutel file voor tabel: '%-.200s'; probeer het te repareren"
-        eng "Incorrect key file for table '%-.200s'; try to repair it"
+        eng "Index for table '%-.200s' is corrupt; try to repair it"
         est "Tabeli '%-.200s' võtmefail on vigane; proovi seda parandada"
         fre "Index corrompu dans la table: '%-.200s'; essayez de le réparer"
         ger "Fehlerhafte Index-Datei für Tabelle '%-.200s'; versuche zu reparieren"
@@ -6710,7 +6710,7 @@ ER_TABLE_SCHEMA_MISMATCH
   eng "Schema mismatch (%s)"
 
 ER_TABLE_IN_SYSTEM_TABLESPACE
-  eng "Table '%-.192s' in system tablespace"
+  eng "Table %-.192s in system tablespace"
 
 ER_IO_READ_ERROR
   eng "IO Read error: (%lu, %s) %s"
@@ -6725,7 +6725,7 @@ ER_TABLESPACE_EXISTS
   eng "Tablespace for table '%-.192s' exists. Please DISCARD the tablespace before IMPORT."
 
 ER_TABLESPACE_DISCARDED
-  eng "Tablespace has been discarded for table '%-.192s'"
+  eng "Tablespace has been discarded for table %`s"
 
 ER_INTERNAL_ERROR
   eng "Internal error: %-.192s"
@@ -6773,8 +6773,8 @@ ER_FK_COLUMN_CANNOT_DROP
         ger "Kann Spalte '%-.192s' nicht löschen: wird für eine Fremdschlüsselbeschränkung '%-.192s' benötigt"
 
 ER_FK_COLUMN_CANNOT_DROP_CHILD
-        eng "Cannot drop column '%-.192s': needed in a foreign key constraint '%-.192s' of table '%-.192s'"
-        ger "Kann Spalte '%-.192s' nicht löschen: wird für eine Fremdschlüsselbeschränkung '%-.192s' der Tabelle '%-.192s' benötigt"
+        eng "Cannot drop column '%-.192s': needed in a foreign key constraint '%-.192s' of table %-.192s"
+        ger "Kann Spalte '%-.192s' nicht löschen: wird für eine Fremdschlüsselbeschränkung '%-.192s' der Tabelle %-.192s benötigt"
 
 ER_FK_COLUMN_NOT_NULL
         eng "Column '%-.192s' cannot be NOT NULL: needed in a foreign key constraint '%-.192s' SET NULL"
@@ -7153,6 +7153,10 @@ skip-to-error-number 3000
 
 ER_MYSQL_57_TEST
         eng "5.7 test"
+ER_WRONG_TABLESPACE_NAME 42000
+        eng "Incorrect tablespace name %`-.192s"
+ER_CANNOT_DISCARD_TEMPORARY_TABLE
+        eng "Cannot DISCARD/IMPORT tablespace associated with temporary table"
 
 # MariaDB extra error numbers starts from 4000
 skip-to-error-number 4000
diff --git a/sql/sql_binlog.cc b/sql/sql_binlog.cc
index f0465cdf5bf..1967b74e737 100644
--- a/sql/sql_binlog.cc
+++ b/sql/sql_binlog.cc
@@ -60,7 +60,7 @@ void mysql_client_binlog_statement(THD* thd)
     my_error(ER_SYNTAX_ERROR, MYF(0));
     DBUG_VOID_RETURN;
   }
-  size_t decoded_len= base64_needed_decoded_length(coded_len);
+  size_t decoded_len= my_base64_needed_decoded_length(coded_len);
 
   /*
     option_bits will be changed when applying the event. But we don't expect
@@ -124,7 +124,7 @@ void mysql_client_binlog_statement(THD* thd)
        strptr < thd->lex->comment.str + thd->lex->comment.length ; )
   {
     char const *endptr= 0;
-    int bytes_decoded= base64_decode(strptr, coded_len, buf, &endptr,
+    int bytes_decoded= my_base64_decode(strptr, coded_len, buf, &endptr,
                                      MY_BASE64_DECODE_ALLOW_MULTIPLE_CHUNKS);
 
 #ifndef HAVE_valgrind
diff --git a/sql/sql_class.h b/sql/sql_class.h
index 7cad0fa25c7..d2a489bfef6 100644
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -21,6 +21,7 @@
 /* Classes in mysql */
 
 #include "my_global.h"                          /* NO_EMBEDDED_ACCESS_CHECKS */
+#include "dur_prop.h"
 #include <waiting_threads.h>
 #include "sql_const.h"
 #include <mysql/plugin_audit.h>
@@ -696,6 +697,7 @@ typedef struct system_variables
   my_bool session_track_schema;
   my_bool session_track_state_change;
 
+  ulong threadpool_priority;
 } SV;
 
 /**
diff --git a/sql/sql_cte.h b/sql/sql_cte.h
index 89223170261..38442252f5c 100644
--- a/sql/sql_cte.h
+++ b/sql/sql_cte.h
@@ -350,7 +350,6 @@ void With_element::reset_recursive_for_exec()
   owner->with_prepared_anchor&= ~mutually_recursive;
   owner->cleaned&= ~get_elem_map();
   cleanup_stabilized();
-  rec_result->first_rec_table_to_update= 0;
 }
 
 
diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc
index effc0230536..ac00b21c837 100644
--- a/sql/sql_parse.cc
+++ b/sql/sql_parse.cc
@@ -3781,7 +3781,8 @@ mysql_execute_command(THD *thd)
       /* Copy temporarily the statement flags to thd for lock_table_names() */
       uint save_thd_create_info_options= thd->lex->create_info.options;
       thd->lex->create_info.options|= create_info.options;
-      res= open_and_lock_tables(thd, create_info, lex->query_tables, TRUE, 0);
+      if (!(res= check_dependencies_in_with_clauses(lex->with_clauses_list)))
+        res= open_and_lock_tables(thd, create_info, lex->query_tables, TRUE, 0);
       thd->lex->create_info.options= save_thd_create_info_options;
       if (res)
       {
@@ -4394,7 +4395,8 @@ end_with_restore_list:
 
     unit->set_limit(select_lex);
 
-    if (!(res= open_and_lock_tables(thd, all_tables, TRUE, 0)))
+    if (!(res= check_dependencies_in_with_clauses(lex->with_clauses_list)) &&
+	!(res=open_and_lock_tables(thd, all_tables, TRUE, 0)))
     {
       MYSQL_INSERT_SELECT_START(thd->query());
       /*
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index aa08420931f..c70e0d5b7e2 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -2648,7 +2648,7 @@ bool JOIN::make_aggr_tables_info()
     */
     DBUG_PRINT("info",("Sorting for order by/group by"));
     ORDER *order_arg= group_list ?  group_list : order;
-    if (join_tab &&
+    if (top_join_tab_count + aggr_tables > const_tables &&
         ordered_index_usage !=
         (group_list ? ordered_index_group_by : ordered_index_order_by) &&
         curr_tab->type != JT_CONST &&
diff --git a/sql/sql_show.cc b/sql/sql_show.cc
index 2aa006d5d69..c4ca23cba78 100644
--- a/sql/sql_show.cc
+++ b/sql/sql_show.cc
@@ -8913,7 +8913,7 @@ ST_FIELD_INFO plugin_fields_info[]=
 ST_FIELD_INFO files_fields_info[]=
 {
   {"FILE_ID", 4, MYSQL_TYPE_LONGLONG, 0, 0, 0, SKIP_OPEN_TABLE},
-  {"FILE_NAME", NAME_CHAR_LEN, MYSQL_TYPE_STRING, 0, 1, 0, SKIP_OPEN_TABLE},
+  {"FILE_NAME", FN_REFLEN, MYSQL_TYPE_STRING, 0, 1, 0, SKIP_OPEN_TABLE},
   {"FILE_TYPE", 20, MYSQL_TYPE_STRING, 0, 0, 0, SKIP_OPEN_TABLE},
   {"TABLESPACE_NAME", NAME_CHAR_LEN, MYSQL_TYPE_STRING, 0, 1, 0,
    SKIP_OPEN_TABLE},
diff --git a/sql/sql_string.cc b/sql/sql_string.cc
index a5f266b2d2c..4bb9f835211 100644
--- a/sql/sql_string.cc
+++ b/sql/sql_string.cc
@@ -1158,18 +1158,3 @@ uint convert_to_printable(char *to, size_t to_len,
     *t= '\0';
   return t - to;
 }
-
-void String::q_net_store_length(ulonglong length)
-{
-  DBUG_ASSERT(Alloced_length >= (str_length + net_length_size(length)));
-  char *pos= (char *) net_store_length((uchar *)(Ptr + str_length), length);
-  str_length= pos - Ptr;
-}
-
-void String::q_net_store_data(const uchar *from, size_t length)
-{
-  DBUG_ASSERT(Alloced_length >= (str_length + length +
-				 net_length_size(length)));
-  q_net_store_length(length);
-  q_append((const char *)from, length);
-}
diff --git a/sql/sql_string.h b/sql/sql_string.h
index f53015fbd6b..cc7cff09d77 100644
--- a/sql/sql_string.h
+++ b/sql/sql_string.h
@@ -32,6 +32,7 @@ class String;
 typedef struct st_io_cache IO_CACHE;
 typedef struct st_mem_root MEM_ROOT;
 
+#include "pack.h"
 int sortcmp(const String *a,const String *b, CHARSET_INFO *cs);
 String *copy_if_not_alloced(String *a,String *b,uint32 arg_length);
 inline uint32 copy_and_convert(char *to, uint32 to_length,
@@ -631,8 +632,19 @@ public:
   {
     return !sortcmp(this, other, cs);
   }
-  void q_net_store_length(ulonglong length);
-  void q_net_store_data(const uchar *from, size_t length);
+  void q_net_store_length(ulonglong length)
+  {
+    DBUG_ASSERT(Alloced_length >= (str_length + net_length_size(length)));
+    char *pos= (char *) net_store_length((uchar *)(Ptr + str_length), length);
+    str_length= pos - Ptr;
+  }
+  void q_net_store_data(const uchar *from, size_t length)
+  {
+    DBUG_ASSERT(Alloced_length >= (str_length + length +
+                                   net_length_size(length)));
+    q_net_store_length(length);
+    q_append((const char *)from, length);
+  }
 };
 
 
diff --git a/sql/sql_table.cc b/sql/sql_table.cc
index 6f0ec66ff40..0cee0dc6ad2 100644
--- a/sql/sql_table.cc
+++ b/sql/sql_table.cc
@@ -123,7 +123,7 @@ static char* add_identifier(THD* thd, char *to_p, const char * end_p,
     conv_name_end= conv_string + res;
   }
 
-  quote = thd ? get_quote_char_for_identifier(thd, conv_name, res - 1) : '"';
+  quote = thd ? get_quote_char_for_identifier(thd, conv_name, res - 1) : '`';
 
   if (quote != EOF && (end_p - to_p > 2))
   {
@@ -8251,11 +8251,14 @@ static bool fk_prepare_copy_alter_table(THD *thd, TABLE *table,
       DBUG_RETURN(true);
     case FK_COLUMN_DROPPED:
     {
-      char buff[NAME_LEN*2+2];
-      strxnmov(buff, sizeof(buff)-1, f_key->foreign_db->str, ".",
-               f_key->foreign_table->str, NullS);
+      StringBuffer<NAME_LEN*2+2> buff(system_charset_info);
+      LEX_STRING *db= f_key->foreign_db, *tbl= f_key->foreign_table;
+
+      append_identifier(thd, &buff, db->str, db->length);
+      buff.append('.');
+      append_identifier(thd, &buff, tbl->str,tbl->length);
       my_error(ER_FK_COLUMN_CANNOT_DROP_CHILD, MYF(0), bad_column_name,
-               f_key->foreign_id->str, buff);
+               f_key->foreign_id->str, buff.c_ptr());
       DBUG_RETURN(true);
     }
     default:
diff --git a/sql/sql_union.cc b/sql/sql_union.cc
index 854ebb99ef2..0d05d201f12 100644
--- a/sql/sql_union.cc
+++ b/sql/sql_union.cc
@@ -445,9 +445,10 @@ bool st_select_lex_unit::prepare(THD *thd_arg, select_result *sel_result,
   bool is_union_select;
   bool instantiate_tmp_table= false;
   DBUG_ENTER("st_select_lex_unit::prepare");
-  DBUG_ASSERT(thd == thd_arg && thd == current_thd);
+  DBUG_ASSERT(thd == thd_arg);
+  DBUG_ASSERT(thd == current_thd);
 
-  describe= MY_TEST(additional_options & SELECT_DESCRIBE);
+  describe= additional_options & SELECT_DESCRIBE;
 
   /*
     Save fake_select_lex in case we don't need it for anything but
diff --git a/sql/sql_window.cc b/sql/sql_window.cc
index e720c39eb8f..4705fdce896 100644
--- a/sql/sql_window.cc
+++ b/sql/sql_window.cc
@@ -238,7 +238,7 @@ setup_windows(THD *thd, Ref_ptr_array ref_pointer_array, TABLE_LIST *tables,
 static
 int compare_order_elements(ORDER *ord1, ORDER *ord2)
 {
-  if (*ord1->item == *ord2->item)
+  if (*ord1->item == *ord2->item && ord1->direction == ord2->direction)
     return CMP_EQ;
   Item *item1= (*ord1->item)->real_item();
   Item *item2= (*ord2->item)->real_item();
diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy
index 551a86e4a41..1facf0623d4 100644
--- a/sql/sql_yacc.yy
+++ b/sql/sql_yacc.yy
@@ -4810,16 +4810,22 @@ create_like:
 
 opt_create_select:
           /* empty */ {}
-        | opt_duplicate opt_as create_select_query_expression_body
+        | opt_duplicate opt_as create_select_query_expression
         ;
 
-create_select_query_expression_body:
-          SELECT_SYM create_select_part2 opt_table_expression
+create_select_query_expression:
+          opt_with_clause SELECT_SYM create_select_part2 opt_table_expression
           create_select_part4
-          { Select->set_braces(0);}
+          { 
+            Select->set_braces(0);
+            Select->set_with_clause($1);
+          }
           union_clause
-        | SELECT_SYM create_select_part2 create_select_part3_union_not_ready
-          create_select_part4
+        | opt_with_clause SELECT_SYM create_select_part2 
+          create_select_part3_union_not_ready create_select_part4
+          {
+            Select->set_with_clause($1);
+          }
         | '(' create_select_query_specification ')'
         | '(' create_select_query_specification ')'
           { Select->set_braces(1);} union_list {}
@@ -5519,7 +5525,11 @@ opt_part_option:
 */
 
 create_select_query_specification:
-          SELECT_SYM create_select_part2 create_select_part3 create_select_part4
+          SELECT_SYM opt_with_clause create_select_part2 create_select_part3
+          create_select_part4
+          {
+            Select->set_with_clause($2);
+          }
         ;
 
 create_select_part2:
@@ -12308,7 +12318,7 @@ fields:
 insert_values:
           VALUES values_list {}
         | VALUE_SYM values_list {}
-        | create_select_query_expression_body {}
+        | create_select_query_expression {}
         ;
 
 values_list:
diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc
index 55f2864a93e..5cc81585ed5 100644
--- a/sql/sys_vars.cc
+++ b/sql/sys_vars.cc
@@ -3241,23 +3241,17 @@ static Sys_var_ulong Sys_thread_cache_size(
 #ifdef HAVE_POOL_OF_THREADS
 static bool fix_tp_max_threads(sys_var *, THD *, enum_var_type)
 {
-#ifdef _WIN32
   tp_set_max_threads(threadpool_max_threads);
-#endif
   return false;
 }
 
 
-#ifdef _WIN32
 static bool fix_tp_min_threads(sys_var *, THD *, enum_var_type)
 {
   tp_set_min_threads(threadpool_min_threads);
   return false;
 }
-#endif
 
-
-#ifndef  _WIN32
 static bool check_threadpool_size(sys_var *self, THD *thd, set_var *var)
 {
   ulonglong v= var->save_result.ulonglong_value;
@@ -3282,7 +3276,6 @@ static bool fix_threadpool_stall_limit(sys_var*, THD*, enum_var_type)
   tp_set_threadpool_stall_limit(threadpool_stall_limit);
   return false;
 }
-#endif
 
 #ifdef _WIN32
 static Sys_var_uint Sys_threadpool_min_threads(
@@ -3293,7 +3286,24 @@ static Sys_var_uint Sys_threadpool_min_threads(
   NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0),
   ON_UPDATE(fix_tp_min_threads)
   );
-#else
+
+static const char *threadpool_mode_names[]={ "windows", "generic", 0 };
+static Sys_var_enum Sys_threadpool_mode(
+  "thread_pool_mode",
+  "Chose implementation of the threadpool",
+  READ_ONLY GLOBAL_VAR(threadpool_mode), CMD_LINE(REQUIRED_ARG),
+  threadpool_mode_names, DEFAULT(TP_MODE_WINDOWS)
+  );
+#endif
+
+static const char *threadpool_priority_names[]={ "high", "low", "auto", 0 };
+static Sys_var_enum Sys_thread_pool_priority(
+  "thread_pool_priority",
+  "Threadpool priority. High priority connections usually start executing earlier than low priority."
+  "If priority set to 'auto', the the actual priority(low or high) is determined based on whether or not connection is inside transaction.",
+  SESSION_VAR(threadpool_priority), CMD_LINE(REQUIRED_ARG),
+  threadpool_priority_names, DEFAULT(TP_PRIORITY_AUTO));
+
 static Sys_var_uint Sys_threadpool_idle_thread_timeout(
   "thread_pool_idle_timeout",
   "Timeout in seconds for an idle thread in the thread pool."
@@ -3328,7 +3338,7 @@ static Sys_var_uint Sys_threadpool_stall_limit(
   NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), 
   ON_UPDATE(fix_threadpool_stall_limit)
 );
-#endif /* !WIN32 */
+
 static Sys_var_uint Sys_threadpool_max_threads(
   "thread_pool_max_threads",
   "Maximum allowed number of worker threads in the thread pool",
@@ -3337,6 +3347,13 @@ static Sys_var_uint Sys_threadpool_max_threads(
    NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), 
    ON_UPDATE(fix_tp_max_threads)
 );
+
+static Sys_var_uint Sys_threadpool_threadpool_prio_kickup_timer(
+ "thread_pool_prio_kickup_timer",
+ "The number of milliseconds before a dequeued low-priority statement is moved to the high-priority queue",
+  GLOBAL_VAR(threadpool_prio_kickup_timer), CMD_LINE(REQUIRED_ARG),
+  VALID_RANGE(0, UINT_MAX), DEFAULT(1000), BLOCK_SIZE(1)
+);
 #endif /* HAVE_POOL_OF_THREADS */
 
 /**
diff --git a/sql/threadpool.h b/sql/threadpool.h
index 7ddc661565f..17c8b6ea4ea 100644
--- a/sql/threadpool.h
+++ b/sql/threadpool.h
@@ -23,28 +23,19 @@ extern uint threadpool_max_size;
 extern uint threadpool_stall_limit;  /* time interval in 10 ms units for stall checks*/
 extern uint threadpool_max_threads;  /* Maximum threads in pool */
 extern uint threadpool_oversubscribe;  /* Maximum active threads in group */
+extern uint threadpool_prio_kickup_timer;  /* Time before low prio item gets prio boost */
+#ifdef _WIN32
+extern uint threadpool_mode; /* Thread pool implementation , windows or generic */
+#define TP_MODE_WINDOWS 0
+#define TP_MODE_GENERIC 1
+#endif
 
 
+struct TP_connection;
+extern void tp_callback(TP_connection *c);
+extern void tp_timeout_handler(TP_connection *c);
 
-/* Common thread pool routines, suitable for different implementations */
-extern void threadpool_remove_connection(THD *thd);
-extern int  threadpool_process_request(THD *thd);
-extern THD*  threadpool_add_connection(CONNECT *connect, void *scheduled_data);
 
-/*
-  Functions used by scheduler. 
-  OS-specific implementations are in
-  threadpool_unix.cc or threadpool_win.cc
-*/
-extern bool tp_init();
-extern void tp_add_connection(CONNECT *);
-extern void tp_wait_begin(THD *, int);
-extern void tp_wait_end(THD*);
-extern void tp_post_kill_notification(THD *thd);
-extern void tp_end(void);
-
-/* Used in SHOW for threadpool_idle_thread_count */
-extern int  tp_get_idle_thread_count();
 
 /*
   Threadpool statistics
@@ -63,9 +54,103 @@ extern void tp_set_min_threads(uint val);
 extern void tp_set_max_threads(uint val);
 extern void tp_set_threadpool_size(uint val);
 extern void tp_set_threadpool_stall_limit(uint val);
+extern int tp_get_idle_thread_count();
+extern int tp_get_thread_count();
 
 /* Activate threadpool scheduler */
 extern void tp_scheduler(void);
 
 extern int show_threadpool_idle_threads(THD *thd, SHOW_VAR *var, char *buff,
                                         enum enum_var_type scope);
+
+enum  TP_PRIORITY {
+  TP_PRIORITY_HIGH,
+  TP_PRIORITY_LOW,
+  TP_PRIORITY_AUTO
+};
+
+
+enum TP_STATE
+{
+  TP_STATE_IDLE,
+  TP_STATE_RUNNING,
+};
+
+/*
+  Connection structure, encapsulates THD + structures for asynchronous
+  IO and pool.
+
+  Platform specific parts are specified in subclasses called connection_t,
+  inside threadpool_win.cc and threadpool_unix.cc
+*/
+
+struct TP_connection
+{
+  THD*        thd;
+  CONNECT*    connect;
+  TP_STATE    state;
+  TP_PRIORITY priority;
+  TP_connection(CONNECT *c) :
+    thd(0),
+    connect(c),
+    state(TP_STATE_IDLE),
+    priority(TP_PRIORITY_HIGH)
+  {}
+
+  virtual ~TP_connection()
+  {};
+
+  /* Initialize io structures windows threadpool, epoll etc */
+  virtual int init() = 0;
+
+  virtual void set_io_timeout(int sec) = 0;
+
+  /* Read for the next client command (async) with specified timeout */
+  virtual int start_io() = 0;
+
+  virtual void wait_begin(int type)= 0;
+  virtual void wait_end() = 0;
+
+};
+
+
+struct TP_pool
+{
+  virtual ~TP_pool(){};
+  virtual int init()= 0;
+  virtual TP_connection *new_connection(CONNECT *)= 0;
+  virtual void add(TP_connection *c)= 0;
+  virtual int set_max_threads(uint){ return 0; }
+  virtual int set_min_threads(uint){ return 0; }
+  virtual int set_pool_size(uint){ return 0; }
+  virtual int set_idle_timeout(uint){ return 0; }
+  virtual int set_oversubscribe(uint){ return 0; }
+  virtual int set_stall_limit(uint){ return 0; }
+  virtual int get_thread_count() { return tp_stats.num_worker_threads; }
+  virtual int get_idle_thread_count(){ return 0; }
+};
+
+#ifdef _WIN32
+struct TP_pool_win:TP_pool
+{
+  TP_pool_win(); 
+  virtual int init();
+  virtual ~TP_pool_win();
+  virtual TP_connection *new_connection(CONNECT *c);
+  virtual void add(TP_connection *);
+  virtual int set_max_threads(uint);
+  virtual int set_min_threads(uint);
+};
+#endif
+
+struct TP_pool_generic :TP_pool
+{
+  TP_pool_generic();
+  ~TP_pool_generic();
+  virtual int init();
+  virtual TP_connection *new_connection(CONNECT *c);
+  virtual void add(TP_connection *);
+  virtual int set_pool_size(uint);
+  virtual int set_stall_limit(uint);
+  virtual int get_idle_thread_count();
+};
diff --git a/sql/threadpool_common.cc b/sql/threadpool_common.cc
index d6c343dc04e..2308f4277d6 100644
--- a/sql/threadpool_common.cc
+++ b/sql/threadpool_common.cc
@@ -34,14 +34,25 @@ uint threadpool_max_size;
 uint threadpool_stall_limit;
 uint threadpool_max_threads;
 uint threadpool_oversubscribe;
+uint threadpool_mode;
+uint threadpool_prio_kickup_timer;
 
 /* Stats */
 TP_STATISTICS tp_stats;
 
 
+static void  threadpool_remove_connection(THD *thd);
+static int   threadpool_process_request(THD *thd);
+static THD*  threadpool_add_connection(CONNECT *connect, void *scheduler_data);
+
 extern "C" pthread_key(struct st_my_thread_var*, THR_KEY_mysys);
 extern bool do_command(THD*);
 
+static inline TP_connection *get_TP_connection(THD *thd)
+{
+  return (TP_connection *)thd->event_scheduler.data;
+}
+
 /*
   Worker threads contexts, and THD contexts.
   =========================================
@@ -105,14 +116,80 @@ static void thread_attach(THD* thd)
 #endif
 }
 
+/*
+  Determine connection priority , using current 
+  transaction state and 'threadpool_priority' variable value.
+*/
+static TP_PRIORITY get_priority(TP_connection *c)
+{
+  DBUG_ASSERT(c->thd == current_thd);
+  TP_PRIORITY prio= (TP_PRIORITY)c->thd->variables.threadpool_priority;
+  if (prio == TP_PRIORITY_AUTO)
+  {
+    return c->thd->transaction.is_active() ? TP_PRIORITY_HIGH : TP_PRIORITY_LOW;
+  }
+  return prio;
+}
 
-THD* threadpool_add_connection(CONNECT *connect, void *scheduler_data)
+
+void tp_callback(TP_connection *c)
+{
+  DBUG_ASSERT(c);
+
+  Worker_thread_context worker_context;
+  worker_context.save();
+
+  THD *thd= c->thd;
+
+  c->state = TP_STATE_RUNNING;
+
+  if (!thd)
+  {
+    /* No THD, need to login first. */
+    DBUG_ASSERT(c->connect);
+    thd= c->thd= threadpool_add_connection(c->connect, c);
+    if (!thd)
+    {
+      /* Bail out on connect error.*/
+      goto error;
+    }
+    c->connect= 0;
+  }
+  else if (threadpool_process_request(thd))
+  {
+    /* QUIT or an error occured. */
+    goto error;
+  }
+
+  /* Set priority */
+  c->priority= get_priority(c);
+
+  /* Read next command from client. */
+  c->set_io_timeout(thd->variables.net_wait_timeout);
+  c->state= TP_STATE_IDLE;
+  if (c->start_io())
+    goto error;
+
+  worker_context.restore();
+  return;
+
+error:
+  c->thd= 0;
+  delete c;
+
+  if (thd)
+  {
+    threadpool_remove_connection(thd);
+  }
+  worker_context.restore();
+}
+
+
+static THD* threadpool_add_connection(CONNECT *connect, void *scheduler_data)
 {
   THD *thd= NULL;
   int error=1;
 
-  Worker_thread_context worker_context;
-  worker_context.save();
 
   /*
     Create a new connection context: mysys_thread_var and PSI thread
@@ -137,7 +214,6 @@ THD* threadpool_add_connection(CONNECT *connect, void *scheduler_data)
 #endif
       my_thread_end();
     }
-    worker_context.restore();
     return NULL;
   }
   delete connect;
@@ -184,17 +260,14 @@ THD* threadpool_add_connection(CONNECT *connect, void *scheduler_data)
     threadpool_remove_connection(thd);
     thd= NULL;
   }
-  worker_context.restore();
   return thd;
 }
 
 
-void threadpool_remove_connection(THD *thd)
+static void threadpool_remove_connection(THD *thd)
 {
-  Worker_thread_context worker_context;
-  worker_context.save();
   thread_attach(thd);
-
+  thd->event_scheduler.data= 0;
   thd->net.reading_or_writing = 0;
   end_connection(thd);
   close_connection(thd, 0);
@@ -206,19 +279,14 @@ void threadpool_remove_connection(THD *thd)
     mysys thread_var and PSI thread.
   */
   my_thread_end();
-
-  worker_context.restore();
 }
 
 /**
  Process a single client request or a single batch.
 */
-int threadpool_process_request(THD *thd)
+static int threadpool_process_request(THD *thd)
 {
   int retval= 0;
-  Worker_thread_context  worker_context;
-  worker_context.save();
-
   thread_attach(thd);
 
   if (thd->killed >= KILL_CONNECTION)
@@ -268,7 +336,6 @@ int threadpool_process_request(THD *thd)
   }
 
 end:
-  worker_context.restore();
   return retval;
 }
 
@@ -286,6 +353,119 @@ static bool tp_end_thread(THD *, bool)
   return 0;
 }
 
+static TP_pool *pool;
+
+static bool tp_init()
+{
+
+#ifdef _WIN32
+  if (threadpool_mode == TP_MODE_WINDOWS)
+    pool= new (std::nothrow) TP_pool_win;
+  else
+    pool= new (std::nothrow) TP_pool_generic;
+#else
+  pool= new (std::nothrow) TP_pool_generic;
+#endif
+  if (!pool)
+    return true;
+  if (pool->init())
+  {
+    delete pool;
+    pool= 0;
+    return true;
+  }
+  return false;
+}
+
+static void tp_add_connection(CONNECT *connect)
+{
+  TP_connection *c= pool->new_connection(connect);
+  DBUG_EXECUTE_IF("simulate_failed_connection_1", delete c ; c= 0;);
+  if (c)
+    pool->add(c);
+  else
+    connect->close_and_delete();
+}
+
+int tp_get_idle_thread_count()
+{
+  return pool? pool->get_idle_thread_count(): 0;
+}
+
+int tp_get_thread_count()
+{
+  return pool ? pool->get_thread_count() : 0;
+}
+
+void tp_set_min_threads(uint val)
+{
+  if (pool)
+    pool->set_min_threads(val);
+}
+
+
+void tp_set_max_threads(uint val)
+{
+  if (pool)
+    pool->set_max_threads(val);
+}
+
+void tp_set_threadpool_size(uint val)
+{
+  if (pool)
+    pool->set_pool_size(val);
+}
+
+
+void tp_set_threadpool_stall_limit(uint val)
+{
+  if (pool)
+    pool->set_stall_limit(val);
+}
+
+
+void tp_timeout_handler(TP_connection *c)
+{
+  if (c->state != TP_STATE_IDLE)
+    return;
+  THD *thd=c->thd;
+  mysql_mutex_lock(&thd->LOCK_thd_data);
+  thd->killed= KILL_CONNECTION;
+  c->priority= TP_PRIORITY_HIGH;
+  post_kill_notification(thd);
+  mysql_mutex_unlock(&thd->LOCK_thd_data);
+}
+
+
+static void tp_wait_begin(THD *thd, int type)
+{
+  TP_connection *c = get_TP_connection(thd);
+  if (c)
+    c->wait_begin(type);
+}
+
+
+static void tp_wait_end(THD *thd)
+{
+  TP_connection *c = get_TP_connection(thd);
+  if (c)
+    c->wait_end();
+}
+
+
+static void tp_end()
+{
+  delete pool;
+}
+
+static void tp_post_kill_notification(THD *thd)
+{
+  TP_connection *c= get_TP_connection(thd);
+  if (c)
+    c->priority= TP_PRIORITY_HIGH;
+  post_kill_notification(thd);
+}
+
 static scheduler_functions tp_scheduler_functions=
 {
   0,                                  // max_threads
@@ -296,7 +476,7 @@ static scheduler_functions tp_scheduler_functions=
   tp_add_connection,                  // add_connection
   tp_wait_begin,                      // thd_wait_begin
   tp_wait_end,                        // thd_wait_end
-  post_kill_notification,             // post_kill_notification
+  tp_post_kill_notification,          // post kill notification
   tp_end_thread,                      // Dummy function
   tp_end                              // end
 };
diff --git a/sql/threadpool_unix.cc b/sql/threadpool_generic.cc
similarity index 80%
rename from sql/threadpool_unix.cc
rename to sql/threadpool_generic.cc
index 4079091e217..87c74d18aea 100644
--- a/sql/threadpool_unix.cc
+++ b/sql/threadpool_generic.cc
@@ -22,6 +22,17 @@
 
 #ifdef HAVE_POOL_OF_THREADS
 
+#ifdef _WIN32
+/* AIX may define this, too ?*/
+#define HAVE_IOCP 
+#endif
+
+#ifdef HAVE_IOCP
+#define OPTIONAL_IO_POLL_READ_PARAM &overlapped
+#else 
+#define OPTIONAL_IO_POLL_READ_PARAM 0
+#endif
+
 #include <sql_connect.h>
 #include <mysqld.h>
 #include <debug_sync.h>
@@ -38,10 +49,23 @@ typedef struct kevent native_event;
 #elif defined (__sun)
 #include <port.h>
 typedef port_event_t native_event;
+#elif defined (HAVE_IOCP)
+typedef OVERLAPPED_ENTRY native_event;
 #else
 #error threadpool is not available on this platform
 #endif
 
+
+static void io_poll_close(int fd)
+{
+#ifdef _WIN32
+  CloseHandle((HANDLE)fd);
+#else
+  close(fd);
+#endif
+}
+
+
 /** Maximum number of native events a listener can read in one go */
 #define MAX_EVENTS 1024
 
@@ -108,32 +132,45 @@ typedef I_P_List<worker_thread_t, I_P_List_adapter<worker_thread_t,
                  >
 worker_list_t;
 
-struct connection_t
+struct TP_connection_generic:public TP_connection
 {
+  TP_connection_generic(CONNECT *c);
+  ~TP_connection_generic();
+ 
+  virtual int init(){ return 0; };
+  virtual void set_io_timeout(int sec);
+  virtual int  start_io();
+  virtual void wait_begin(int type);
+  virtual void wait_end();
 
-  THD *thd;
   thread_group_t *thread_group;
-  connection_t *next_in_queue;
-  connection_t **prev_in_queue;
+  TP_connection_generic *next_in_queue;
+  TP_connection_generic **prev_in_queue;
   ulonglong abs_wait_timeout;
-  CONNECT* connect;
-  bool logged_in;
+  ulonglong dequeue_time;
   bool bound_to_poll_descriptor;
-  bool waiting;
+  int waiting;
+#ifdef HAVE_IOCP
+  OVERLAPPED overlapped;
+#endif
 };
 
-typedef I_P_List<connection_t,
-                     I_P_List_adapter<connection_t,
-                                      &connection_t::next_in_queue,
-                                      &connection_t::prev_in_queue>,
+typedef TP_connection_generic TP_connection_generic;
+
+typedef I_P_List<TP_connection_generic,
+                     I_P_List_adapter<TP_connection_generic,
+                                      &TP_connection_generic::next_in_queue,
+                                      &TP_connection_generic::prev_in_queue>,
                      I_P_List_null_counter,
-                     I_P_List_fast_push_back<connection_t> >
+                     I_P_List_fast_push_back<TP_connection_generic> >
 connection_queue_t;
 
+const int NQUEUES=2; /* We have high and low priority queues*/
+
 struct thread_group_t 
 {
   mysql_mutex_t mutex;
-  connection_queue_t queue;
+  connection_queue_t queues[NQUEUES];
   worker_list_t waiting_threads; 
   worker_thread_t *listener;
   pthread_attr_t *pthread_attr;
@@ -147,9 +184,8 @@ struct thread_group_t
   ulonglong last_thread_creation_time;
   int  shutdown_pipe[2];
   bool shutdown;
-  bool stalled;
-  
-} MY_ALIGNED(512);
+  bool stalled; 
+} MY_ALIGNED(CPU_LEVEL1_DCACHE_LINESIZE);
 
 static thread_group_t *all_groups;
 static uint group_count;
@@ -175,15 +211,13 @@ struct pool_timer_t
 
 static pool_timer_t pool_timer;
 
-static void queue_put(thread_group_t *thread_group, connection_t *connection);
+static void queue_put(thread_group_t *thread_group, TP_connection_generic *connection);
+static void queue_put(thread_group_t *thread_group, native_event *ev, int cnt);
 static int  wake_thread(thread_group_t *thread_group);
-static void handle_event(connection_t *connection);
 static int  wake_or_create_thread(thread_group_t *thread_group);
 static int  create_worker(thread_group_t *thread_group);
 static void *worker_main(void *param);
 static void check_stall(thread_group_t *thread_group);
-static void connection_abort(connection_t *connection);
-static void set_wait_timeout(connection_t *connection);
 static void set_next_timeout_check(ulonglong abstime);
 static void print_pool_blocked_message(bool);
 
@@ -194,12 +228,12 @@ static void print_pool_blocked_message(bool);
  This maps to different APIs on different Unixes.
  
  Supported are currently Linux with epoll, Solaris with event ports,
- OSX and BSD with kevent. All those API's are used with one-shot flags
+ OSX and BSD with kevent, Windows with IOCP. All those API's are used with one-shot flags
  (the event is signalled once client has written something into the socket, 
  then socket is removed from the "poll-set" until the  command is finished,
  and we need to re-arm/re-register socket)
  
- No implementation for poll/select/AIO is currently provided.
+ No implementation for poll/select is currently provided.
  
  The API closely resembles all of the above mentioned platform APIs 
  and consists of following functions. 
@@ -208,7 +242,7 @@ static void print_pool_blocked_message(bool);
  Creates an io_poll descriptor 
  On Linux: epoll_create()
  
- - io_poll_associate_fd(int poll_fd, int fd, void *data)
+ - io_poll_associate_fd(int poll_fd, int fd, void *data, void *opt)
  Associate file descriptor with io poll descriptor 
  On Linux : epoll_ctl(..EPOLL_CTL_ADD))
  
@@ -217,7 +251,7 @@ static void print_pool_blocked_message(bool);
   On Linux: epoll_ctl(..EPOLL_CTL_DEL)
  
  
- - io_poll_start_read(int poll_fd,int fd, void *data)
+ - io_poll_start_read(int poll_fd,int fd, void *data, void *opt)
  The same as io_poll_associate_fd(), but cannot be used before 
  io_poll_associate_fd() was called.
  On Linux : epoll_ctl(..EPOLL_CTL_MOD)
@@ -245,7 +279,7 @@ static int io_poll_create()
 }
 
 
-int io_poll_associate_fd(int pollfd, int fd, void *data)
+int io_poll_associate_fd(int pollfd, int fd, void *data, void*)
 {
   struct epoll_event ev;
   ev.data.u64= 0; /* Keep valgrind happy */
@@ -256,7 +290,7 @@ int io_poll_associate_fd(int pollfd, int fd, void *data)
 
 
 
-int io_poll_start_read(int pollfd, int fd, void *data)
+int io_poll_start_read(int pollfd, int fd, void *data, void *)
 {
   struct epoll_event ev;
   ev.data.u64= 0; /* Keep valgrind happy */
@@ -315,7 +349,7 @@ int io_poll_create()
   return kqueue();
 }
 
-int io_poll_start_read(int pollfd, int fd, void *data)
+int io_poll_start_read(int pollfd, int fd, void *data,void *)
 {
   struct kevent ke;
   MY_EV_SET(&ke, fd, EVFILT_READ, EV_ADD|EV_ONESHOT, 
@@ -324,12 +358,12 @@ int io_poll_start_read(int pollfd, int fd, void *data)
 }
 
 
-int io_poll_associate_fd(int pollfd, int fd, void *data)
+int io_poll_associate_fd(int pollfd, int fd, void *data,void *)
 {
   struct kevent ke;
   MY_EV_SET(&ke, fd, EVFILT_READ, EV_ADD|EV_ONESHOT, 
          0, 0, data);
-  return io_poll_start_read(pollfd,fd, data); 
+  return io_poll_start_read(pollfd,fd, data, 0); 
 }
 
 
@@ -371,14 +405,14 @@ static int io_poll_create()
   return port_create();
 }
 
-int io_poll_start_read(int pollfd, int fd, void *data)
+int io_poll_start_read(int pollfd, int fd, void *data, void *)
 {
   return port_associate(pollfd, PORT_SOURCE_FD, fd, POLLIN, data);
 }
 
-static int io_poll_associate_fd(int pollfd, int fd, void *data)
+static int io_poll_associate_fd(int pollfd, int fd, void *data, void *)
 {
-  return io_poll_start_read(pollfd, fd, data);
+  return io_poll_start_read(pollfd, fd, data, 0);
 }
 
 int io_poll_disassociate_fd(int pollfd, int fd)
@@ -410,23 +444,115 @@ static void* native_event_get_userdata(native_event *event)
 {
   return event->portev_user;
 }
+
+#elif defined(HAVE_IOCP)
+
+static int io_poll_create()
+{
+  HANDLE h= CreateIoCompletionPort(INVALID_HANDLE_VALUE, 0, 0, 0);
+  return (int)h;
+}
+
+
+int io_poll_start_read(int pollfd, int fd, void *, void *opt)
+{
+  DWORD num_bytes = 0;
+  static char c;
+
+  WSABUF buf;
+  buf.buf= &c;
+  buf.len= 0;
+  DWORD flags=0;
+  
+  if (WSARecv((SOCKET)fd, &buf, 1, &num_bytes, &flags, (OVERLAPPED *)opt, NULL) == 0)
+    return 0;
+
+  if (GetLastError() == ERROR_IO_PENDING)
+    return 0;
+
+  return 1;
+}
+
+
+static int io_poll_associate_fd(int pollfd, int fd, void *data, void *opt)
+{
+  HANDLE h= CreateIoCompletionPort((HANDLE)fd, (HANDLE)pollfd, (ULONG_PTR)data, 0);
+  if (!h) 
+    return -1;
+  return io_poll_start_read(pollfd,fd, 0, opt); 
+}
+
+
+int io_poll_disassociate_fd(int pollfd, int fd)
+{
+  /* Not possible to unbind/rebind file descriptor in IOCP. */
+  return 0;
+}
+
+
+int io_poll_wait(int pollfd, native_event *events, int maxevents, int timeout_ms)
+{
+  ULONG n;
+  BOOL ok = GetQueuedCompletionStatusEx((HANDLE)pollfd, events, 
+     maxevents, &n, timeout_ms, FALSE);
+ 
+  return ok ? (int)n : -1;
+}
+
+
+static void* native_event_get_userdata(native_event *event)
+{
+  return (void *)event->lpCompletionKey;
+}
+
 #endif
 
 
 /* Dequeue element from a workqueue */
 
-static connection_t *queue_get(thread_group_t *thread_group)
+static TP_connection_generic *queue_get(thread_group_t *thread_group)
 {
   DBUG_ENTER("queue_get");
   thread_group->queue_event_count++;
-  connection_t *c= thread_group->queue.front();
-  if (c)
+  TP_connection_generic *c;
+  for (int i=0; i < NQUEUES;i++)
   {
-    thread_group->queue.remove(c);
+    c= thread_group->queues[i].pop_front();
+    if (c)
+      DBUG_RETURN(c);
   }
-  DBUG_RETURN(c);  
+  DBUG_RETURN(0);  
 }
 
+static bool is_queue_empty(thread_group_t *thread_group)
+{
+  for (int i=0; i < NQUEUES; i++)
+  {
+    if (!thread_group->queues[i].is_empty())
+      return false;
+  }
+  return true;
+}
+
+
+static void queue_init(thread_group_t *thread_group)
+{
+  for (int i=0; i < NQUEUES; i++)
+  {
+    thread_group->queues[i].empty();
+  }
+}
+
+static void queue_put(thread_group_t *thread_group, native_event *ev, int cnt)
+{
+  ulonglong now= pool_timer.current_microtime;
+  for(int i=0; i < cnt; i++)
+  {
+    TP_connection_generic *c = (TP_connection_generic *)native_event_get_userdata(&ev[i]);
+    c->dequeue_time= now;
+    thread_group->queues[c->priority].push_back(c);
+  }
+}
 
 /* 
   Handle wait timeout : 
@@ -450,7 +576,7 @@ static void timeout_check(pool_timer_t *timer)
     if (thd->net.reading_or_writing != 1)
       continue;
  
-    connection_t *connection= (connection_t *)thd->event_scheduler.data;
+    TP_connection_generic *connection= (TP_connection_generic *)thd->event_scheduler.data;
     if (!connection)
     {
       /* 
@@ -462,11 +588,7 @@ static void timeout_check(pool_timer_t *timer)
 
     if(connection->abs_wait_timeout < timer->current_microtime)
     {
-      /* Wait timeout exceeded, kill connection. */
-      mysql_mutex_lock(&thd->LOCK_thd_data);
-      thd->killed = KILL_CONNECTION;
-      post_kill_notification(thd);
-      mysql_mutex_unlock(&thd->LOCK_thd_data);
+      tp_timeout_handler(connection);
     }
     else 
     {
@@ -545,10 +667,23 @@ static void* timer_thread(void *param)
 
 void check_stall(thread_group_t *thread_group)
 {
-  if (mysql_mutex_trylock(&thread_group->mutex) != 0)
+  mysql_mutex_lock(&thread_group->mutex);
+
+  /*
+   Bump priority for the low priority connections that spent too much
+   time in low prio queue.
+  */
+  TP_connection_generic *c;
+  for (;;)
   {
-    /* Something happens. Don't disturb */
-    return;
+    c= thread_group->queues[TP_PRIORITY_LOW].front();
+    if (c && pool_timer.current_microtime - c->dequeue_time > 1000ULL * threadpool_prio_kickup_timer)
+    {
+      thread_group->queues[TP_PRIORITY_LOW].remove(c);
+      thread_group->queues[TP_PRIORITY_HIGH].push_back(c);
+    }
+    else
+      break;
   }
 
   /*
@@ -593,7 +728,7 @@ void check_stall(thread_group_t *thread_group)
     do wait and indicate that via thd_wait_begin/end callbacks, thread creation
     will be faster.
   */
-  if (!thread_group->queue.is_empty() && !thread_group->queue_event_count)
+  if (!is_queue_empty(thread_group) && !thread_group->queue_event_count)
   {
     thread_group->stalled= true;
     wake_or_create_thread(thread_group);
@@ -636,11 +771,11 @@ static void stop_timer(pool_timer_t *timer)
   
   @return a ready connection, or NULL on shutdown
 */
-static connection_t * listener(worker_thread_t *current_thread, 
+static TP_connection_generic * listener(worker_thread_t *current_thread, 
                                thread_group_t *thread_group)
 {
   DBUG_ENTER("listener");
-  connection_t *retval= NULL;
+  TP_connection_generic *retval= NULL;
 
   for(;;)
   {
@@ -707,28 +842,17 @@ static connection_t * listener(worker_thread_t *current_thread,
      and wake a worker.
      
      NOTE: Currently nothing is done to detect or prevent long queuing times. 
-     A solutionc for the future would be to give up "one active thread per 
+     A solution for the future would be to give up "one active thread per 
      group" principle, if events stay  in the queue for too long, and just wake 
      more workers.
     */
     
-    bool listener_picks_event= thread_group->queue.is_empty();
-    
-    /* 
-      If listener_picks_event is set, listener thread will handle first event, 
-      and put the rest into the queue. If listener_pick_event is not set, all 
-      events go to the queue.
-    */
-    for(int i=(listener_picks_event)?1:0; i < cnt ; i++)
-    {
-      connection_t *c= (connection_t *)native_event_get_userdata(&ev[i]);
-      thread_group->queue.push_back(c);
-    }
-    
+    bool listener_picks_event=is_queue_empty(thread_group);
+    queue_put(thread_group, ev, cnt);
     if (listener_picks_event)
     {
       /* Handle the first event. */
-      retval= (connection_t *)native_event_get_userdata(&ev[0]);
+      retval= queue_get(thread_group);
       mysql_mutex_unlock(&thread_group->mutex);
       break;
     }
@@ -914,7 +1038,7 @@ int thread_group_init(thread_group_t *thread_group, pthread_attr_t* thread_attr)
   thread_group->pollfd= -1;
   thread_group->shutdown_pipe[0]= -1;
   thread_group->shutdown_pipe[1]= -1;
-  thread_group->queue.empty();
+  queue_init(thread_group);
   DBUG_RETURN(0);
 }
 
@@ -924,9 +1048,10 @@ void thread_group_destroy(thread_group_t *thread_group)
   mysql_mutex_destroy(&thread_group->mutex);
   if (thread_group->pollfd != -1)
   {
-    close(thread_group->pollfd);
+    io_poll_close(thread_group->pollfd);
     thread_group->pollfd= -1;
   }
+#ifndef HAVE_IOCP
   for(int i=0; i < 2; i++)
   {
     if(thread_group->shutdown_pipe[i] != -1)
@@ -935,6 +1060,8 @@ void thread_group_destroy(thread_group_t *thread_group)
       thread_group->shutdown_pipe[i]= -1;
     }
   }
+#endif
+
   if (my_atomic_add32(&shutdown_group_count, -1) == 1)
     my_free(all_groups);
 }
@@ -957,7 +1084,32 @@ static int wake_thread(thread_group_t *thread_group)
   DBUG_RETURN(1); /* no thread in waiter list => missed wakeup */
 }
 
+/* 
+   Wake listener thread (during shutdown)
+   Self-pipe trick is used in most cases,except IOCP.
+*/
+static int wake_listener(thread_group_t *thread_group)
+{
+#ifndef HAVE_IOCP
+  if (pipe(thread_group->shutdown_pipe))
+  {
+    return -1;
+  }
 
+  /* Wake listener */
+  if (io_poll_associate_fd(thread_group->pollfd,
+    thread_group->shutdown_pipe[0], NULL, NULL))
+  {
+    return -1;
+  }
+  char c= 0;
+  if (write(thread_group->shutdown_pipe[1], &c, 1) < 0)
+    return -1;
+#else
+  PostQueuedCompletionStatus((HANDLE)thread_group->pollfd, 0, 0, 0);
+#endif
+  return 0;
+}
 /**
   Initiate shutdown for thread group.
 
@@ -981,20 +1133,7 @@ static void thread_group_close(thread_group_t *thread_group)
   thread_group->shutdown= true; 
   thread_group->listener= NULL;
 
-  if (pipe(thread_group->shutdown_pipe))
-  {
-    DBUG_VOID_RETURN;
-  }
-  
-  /* Wake listener */
-  if (io_poll_associate_fd(thread_group->pollfd, 
-      thread_group->shutdown_pipe[0], NULL))
-  {
-    DBUG_VOID_RETURN;
-  }
-  char c= 0;
-  if (write(thread_group->shutdown_pipe[1], &c, 1) < 0)
-    DBUG_VOID_RETURN;
+  wake_listener(thread_group);
 
   /* Wake all workers. */
   while(wake_thread(thread_group) == 0) 
@@ -1015,18 +1154,16 @@ static void thread_group_close(thread_group_t *thread_group)
 
 */
 
-static void queue_put(thread_group_t *thread_group, connection_t *connection)
+static void queue_put(thread_group_t *thread_group, TP_connection_generic *connection)
 {
   DBUG_ENTER("queue_put");
 
-  mysql_mutex_lock(&thread_group->mutex);
-  thread_group->queue.push_back(connection);
+  connection->dequeue_time= pool_timer.current_microtime;
+  thread_group->queues[connection->priority].push_back(connection);
 
   if (thread_group->active_thread_count == 0)
     wake_or_create_thread(thread_group);
 
-  mysql_mutex_unlock(&thread_group->mutex);
-
   DBUG_VOID_RETURN;
 }
 
@@ -1061,18 +1198,19 @@ static bool too_many_threads(thread_group_t *thread_group)
   NULL is returned if timeout has expired,or on shutdown.
 */
 
-connection_t *get_event(worker_thread_t *current_thread, 
+TP_connection_generic *get_event(worker_thread_t *current_thread, 
   thread_group_t *thread_group,  struct timespec *abstime)
 { 
   DBUG_ENTER("get_event");
-  connection_t *connection = NULL;
-  int err=0;
+  TP_connection_generic *connection = NULL;
+
 
   mysql_mutex_lock(&thread_group->mutex);
   DBUG_ASSERT(thread_group->active_thread_count >= 0);
 
   for(;;) 
   {
+    int err=0;
     bool oversubscribed = too_many_threads(thread_group); 
     if (thread_group->shutdown)
      break;
@@ -1100,22 +1238,27 @@ connection_t *get_event(worker_thread_t *current_thread,
       thread_group->listener= NULL;
       break;
     }
-    
+ 
+
     /* 
       Last thing we try before going to sleep is to 
-      pick a single event via epoll, without waiting (timeout 0)
+      non-blocking event poll, i.e with timeout = 0.
+      If this returns events, pick one
     */
     if (!oversubscribed)
     {
-      native_event nev;
-      if (io_poll_wait(thread_group->pollfd,&nev,1, 0) == 1)
+
+      native_event ev[MAX_EVENTS];
+      int cnt = io_poll_wait(thread_group->pollfd, ev, MAX_EVENTS, 0);
+      if (cnt > 0)
       {
-        thread_group->io_event_count++;
-        connection = (connection_t *)native_event_get_userdata(&nev);
+        queue_put(thread_group, ev, cnt);
+        connection= queue_get(thread_group);
         break;
       }
     }
 
+
     /* And now, finally sleep */ 
     current_thread->woken = false; /* wake() sets this to true */
 
@@ -1173,9 +1316,9 @@ void wait_begin(thread_group_t *thread_group)
   
   DBUG_ASSERT(thread_group->active_thread_count >=0);
   DBUG_ASSERT(thread_group->connection_count > 0);
- 
+
   if ((thread_group->active_thread_count == 0) && 
-     (thread_group->queue.is_empty() || !thread_group->listener))
+     (is_queue_empty(thread_group) || !thread_group->listener))
   {
     /* 
       Group might stall while this thread waits, thus wake 
@@ -1202,103 +1345,47 @@ void wait_end(thread_group_t *thread_group)
 }
 
 
-/**
-  Allocate/initialize a new connection structure.
-*/
 
-connection_t *alloc_connection()
+
+TP_connection * TP_pool_generic::new_connection(CONNECT *c)
 {
-  connection_t* connection;
-  DBUG_ENTER("alloc_connection");
-  DBUG_EXECUTE_IF("simulate_failed_connection_1", DBUG_RETURN(0); );
-  
-  if ((connection = (connection_t *)my_malloc(sizeof(connection_t),0)))
-  {
-    connection->waiting= false;
-    connection->logged_in= false;
-    connection->bound_to_poll_descriptor= false;
-    connection->abs_wait_timeout= ULONGLONG_MAX;
-    connection->thd= 0;
-  }
-  DBUG_RETURN(connection);
+  return new (std::nothrow) TP_connection_generic(c);
 }
 
-
-
 /**
   Add a new connection to thread pool..
 */
 
-void tp_add_connection(CONNECT *connect)
+void TP_pool_generic::add(TP_connection *c)
 {
-  connection_t *connection;
   DBUG_ENTER("tp_add_connection");
 
-  connection=  alloc_connection();
-  if (!connection)
-  {
-    connect->close_and_delete();
-    DBUG_VOID_RETURN;
-  }
-  connection->connect= connect;
-
-  /* Assign connection to a group. */
-  thread_group_t *group= 
-    &all_groups[connect->thread_id%group_count];
-
-  connection->thread_group=group;
-      
-  mysql_mutex_lock(&group->mutex);
-  group->connection_count++;
-  mysql_mutex_unlock(&group->mutex);
-    
+  TP_connection_generic *connection=(TP_connection_generic *)c;
+  thread_group_t *thread_group= connection->thread_group;
   /*
     Add connection to the work queue.Actual logon 
     will be done by a worker thread.
   */
-  queue_put(group, connection);
+  mysql_mutex_lock(&thread_group->mutex);
+  queue_put(thread_group, connection);
+  mysql_mutex_unlock(&thread_group->mutex);
   DBUG_VOID_RETURN;
 }
 
 
-/**
-  Terminate connection.
-*/
-
-static void connection_abort(connection_t *connection)
-{
-  DBUG_ENTER("connection_abort");
-  thread_group_t *group= connection->thread_group;
-
-  if (connection->thd)
-  {
-    threadpool_remove_connection(connection->thd);
-  }
-
-  mysql_mutex_lock(&group->mutex);
-  group->connection_count--;
-  mysql_mutex_unlock(&group->mutex);
-  
-  my_free(connection);
-  DBUG_VOID_RETURN;
-}
-
 
 /**
   MySQL scheduler callback: wait begin
 */
 
-void tp_wait_begin(THD *thd, int type)
+void TP_connection_generic::wait_begin(int type)
 {
-  DBUG_ENTER("tp_wait_begin");
-  DBUG_ASSERT(thd);
-  connection_t *connection = (connection_t *)thd->event_scheduler.data;
-  if (connection)
-  {
-    DBUG_ASSERT(!connection->waiting);
-    connection->waiting= true;
-    wait_begin(connection->thread_group);
-  }
+  DBUG_ENTER("wait_begin");
+
+  DBUG_ASSERT(!waiting);
+  waiting++;
+  if (waiting == 1)
+    ::wait_begin(thread_group);
   DBUG_VOID_RETURN;
 }
 
@@ -1307,18 +1394,13 @@ void tp_wait_begin(THD *thd, int type)
   MySQL scheduler callback: wait end
 */
 
-void tp_wait_end(THD *thd) 
+void TP_connection_generic::wait_end() 
 { 
-  DBUG_ENTER("tp_wait_end");
-  DBUG_ASSERT(thd);
-
-  connection_t *connection = (connection_t *)thd->event_scheduler.data;
-  if (connection)
-  {
-    DBUG_ASSERT(connection->waiting);
-    connection->waiting = false;
-    wait_end(connection->thread_group);
-  }
+  DBUG_ENTER("wait_end");
+  DBUG_ASSERT(waiting);
+  waiting--;
+  if (waiting == 0)
+    ::wait_end(thread_group);
   DBUG_VOID_RETURN;
 }
 
@@ -1335,12 +1417,41 @@ static void set_next_timeout_check(ulonglong abstime)
   DBUG_VOID_RETURN;
 }
 
+TP_connection_generic::TP_connection_generic(CONNECT *c):
+  TP_connection(c),
+  thread_group(0),
+  next_in_queue(0),
+  prev_in_queue(0),
+  abs_wait_timeout(ULONGLONG_MAX),
+  bound_to_poll_descriptor(false),
+  waiting(false)
+#ifdef HAVE_IOCP
+, overlapped()
+#endif
+{
+  /* Assign connection to a group. */
+  thread_group_t *group=
+    &all_groups[c->thread_id%group_count];
+
+  thread_group=group;
+
+  mysql_mutex_lock(&group->mutex);
+  group->connection_count++;
+  mysql_mutex_unlock(&group->mutex);
+}
+
+TP_connection_generic::~TP_connection_generic()
+{
+  mysql_mutex_lock(&thread_group->mutex);
+  thread_group->connection_count--;
+  mysql_mutex_unlock(&thread_group->mutex);
+}
 
 /**
   Set wait timeout for connection. 
 */
 
-static void set_wait_timeout(connection_t *c)
+void TP_connection_generic::set_io_timeout(int timeout_sec)
 {
   DBUG_ENTER("set_wait_timeout");
   /* 
@@ -1351,11 +1462,11 @@ static void set_wait_timeout(connection_t *c)
     one tick interval.
   */
 
-  c->abs_wait_timeout= pool_timer.current_microtime +
+  abs_wait_timeout= pool_timer.current_microtime +
     1000LL*pool_timer.tick_interval +
-    1000000LL*c->thd->variables.net_wait_timeout;
+    1000000LL*timeout_sec;
 
-  set_next_timeout_check(c->abs_wait_timeout);
+  set_next_timeout_check(abs_wait_timeout);
   DBUG_VOID_RETURN;
 }
 
@@ -1367,7 +1478,7 @@ static void set_wait_timeout(connection_t *c)
   after thread_pool_size setting. 
 */
 
-static int change_group(connection_t *c, 
+static int change_group(TP_connection_generic *c, 
  thread_group_t *old_group,
  thread_group_t *new_group)
 { 
@@ -1398,10 +1509,11 @@ static int change_group(connection_t *c,
 }
 
 
-static int start_io(connection_t *connection)
+int TP_connection_generic::start_io()
 { 
-  int fd = mysql_socket_getfd(connection->thd->net.vio->mysql_socket);
+  int fd= mysql_socket_getfd(thd->net.vio->mysql_socket);
 
+#ifndef HAVE_IOCP
   /*
     Usually, connection will stay in the same group for the entire
     connection's life. However, we do allow group_count to
@@ -1413,56 +1525,25 @@ static int start_io(connection_t *connection)
     on thread_id and current group count, and migrate if necessary.
   */ 
   thread_group_t *group = 
-    &all_groups[connection->thd->thread_id%group_count];
+    &all_groups[thd->thread_id%group_count];
 
-  if (group != connection->thread_group)
+  if (group != thread_group)
   {
-    if (change_group(connection, connection->thread_group, group))
+    if (change_group(this, thread_group, group))
       return -1;
   }
-    
+#endif
+
   /* 
     Bind to poll descriptor if not yet done. 
   */ 
-  if (!connection->bound_to_poll_descriptor)
+  if (!bound_to_poll_descriptor)
   {
-    connection->bound_to_poll_descriptor= true;
-    return io_poll_associate_fd(group->pollfd, fd, connection);
+    bound_to_poll_descriptor= true;
+    return io_poll_associate_fd(thread_group->pollfd, fd, this, OPTIONAL_IO_POLL_READ_PARAM);
   }
   
-  return io_poll_start_read(group->pollfd, fd, connection);
-}
-
-
-
-static void handle_event(connection_t *connection)
-{
-
-  DBUG_ENTER("handle_event");
-  int err;
-
-  if (!connection->logged_in)
-  {
-    connection->thd = threadpool_add_connection(connection->connect, connection);
-    err= (connection->thd == NULL);
-    connection->logged_in= true;
-  }
-  else 
-  {
-    err= threadpool_process_request(connection->thd);
-  }
-
-  if(err)
-    goto end;
-
-  set_wait_timeout(connection);
-  err= start_io(connection);
-
-end:
-  if (err)
-    connection_abort(connection);
-
-  DBUG_VOID_RETURN;
+  return io_poll_start_read(thread_group->pollfd, fd, this, OPTIONAL_IO_POLL_READ_PARAM);
 }
 
 
@@ -1490,14 +1571,14 @@ static void *worker_main(void *param)
   /* Run event loop */
   for(;;)
   {
-    connection_t *connection;
+    TP_connection_generic *connection;
     struct timespec ts;
     set_timespec(ts,threadpool_idle_timeout);
     connection = get_event(&this_thread, thread_group, &ts);
     if (!connection)
       break;
     this_thread.event_count++;
-    handle_event(connection);
+    tp_callback(connection);
   }
 
   /* Thread shutdown: cleanup per-worker-thread structure. */
@@ -1518,30 +1599,33 @@ static void *worker_main(void *param)
 }
 
 
-bool tp_init()
+TP_pool_generic::TP_pool_generic()
+{}
+
+int TP_pool_generic::init()
 {
-  DBUG_ENTER("tp_init");
+  DBUG_ENTER("TP_pool_generic::TP_pool_generic");
   threadpool_max_size= MY_MAX(threadpool_size, 128);
   all_groups= (thread_group_t *)
     my_malloc(sizeof(thread_group_t) * threadpool_max_size, MYF(MY_WME|MY_ZEROFILL));
   if (!all_groups)
   {
     threadpool_max_size= 0;
-    DBUG_RETURN(1);
+    sql_print_error("Allocation failed");
+    DBUG_RETURN(-1);
   }
-  threadpool_started= true;
   scheduler_init();
-
+  threadpool_started= true;
   for (uint i= 0; i < threadpool_max_size; i++)
   {
     thread_group_init(&all_groups[i], get_connection_attrib());  
   }
-  tp_set_threadpool_size(threadpool_size);
+  set_pool_size(threadpool_size);
   if(group_count == 0)
   {
     /* Something went wrong */
     sql_print_error("Can't set threadpool size to %d",threadpool_size);
-    DBUG_RETURN(1);
+    DBUG_RETURN(-1);
   }
   PSI_register(mutex);
   PSI_register(cond);
@@ -1552,7 +1636,7 @@ bool tp_init()
   DBUG_RETURN(0);
 }
 
-void tp_end()
+TP_pool_generic::~TP_pool_generic()
 {
   DBUG_ENTER("tp_end");
   
@@ -1571,13 +1655,10 @@ void tp_end()
 
 
 /** Ensure that poll descriptors are created when threadpool_size changes */
-
-void tp_set_threadpool_size(uint size)
+int TP_pool_generic::set_pool_size(uint size)
 {
   bool success= true;
-  if (!threadpool_started)
-    return;
-
+ 
   for(uint i=0; i< size; i++)
   {
     thread_group_t *group= &all_groups[i];
@@ -1596,20 +1677,20 @@ void tp_set_threadpool_size(uint size)
     if (!success)
     {
       group_count= i;
-      return;
+      return -1;
     }
   }
   group_count= size;
+  return 0;
 }
 
-void tp_set_threadpool_stall_limit(uint limit)
+int TP_pool_generic::set_stall_limit(uint limit)
 {
-  if (!threadpool_started)
-    return;
   mysql_mutex_lock(&(pool_timer.mutex));
   pool_timer.tick_interval= limit;
   mysql_mutex_unlock(&(pool_timer.mutex));
   mysql_cond_signal(&(pool_timer.cond));
+  return 0;
 }
 
 
@@ -1620,7 +1701,7 @@ void tp_set_threadpool_stall_limit(uint limit)
  Don't do any locking, it is not required for stats.
 */
 
-int tp_get_idle_thread_count()
+int TP_pool_generic::get_idle_thread_count()
 {
   int sum=0;
   for (uint i= 0; i < threadpool_max_size && all_groups[i].pollfd >= 0; i++)
diff --git a/sql/threadpool_win.cc b/sql/threadpool_win.cc
index 9b1d8f6a7d8..dec898d92bb 100644
--- a/sql/threadpool_win.cc
+++ b/sql/threadpool_win.cc
@@ -64,8 +64,9 @@ static void tp_log_warning(const char *msg, const char *fct)
 }
 
 
-PTP_POOL pool;
-DWORD fls;
+static PTP_POOL pool;
+static TP_CALLBACK_ENVIRON callback_environ;
+static DWORD fls;
 
 static bool skip_completion_port_on_success = false;
 
@@ -85,13 +86,16 @@ static void CALLBACK timer_callback(PTP_CALLBACK_INSTANCE instance,
 static void CALLBACK io_completion_callback(PTP_CALLBACK_INSTANCE instance, 
   PVOID context,  PVOID overlapped,  ULONG io_result, ULONG_PTR nbytes, PTP_IO io);
 
+
+static void CALLBACK work_callback(PTP_CALLBACK_INSTANCE instance, PVOID context, PTP_WORK work);
+
 static void CALLBACK shm_read_callback(PTP_CALLBACK_INSTANCE instance,
   PVOID Context, PTP_WAIT wait,TP_WAIT_RESULT wait_result);
 
 static void CALLBACK shm_close_callback(PTP_CALLBACK_INSTANCE instance,
   PVOID Context, PTP_WAIT wait,TP_WAIT_RESULT wait_result);
 
-static void check_thread_init();
+static void pre_callback(PVOID context, PTP_CALLBACK_INSTANCE instance);
 
 /* Get current time as Windows time */
 static ulonglong now()
@@ -101,74 +105,86 @@ static ulonglong now()
   return current_time;
 }
 
-/* 
-  Connection structure, encapsulates THD + structures for asynchronous
-  IO and pool.
-*/
-
-struct connection_t
+struct TP_connection_win:public TP_connection
 {
-  THD *thd;
+public:
+  TP_connection_win(CONNECT*);
+  ~TP_connection_win();
+  virtual int init();
+  virtual int start_io();
+  virtual void set_io_timeout(int sec);
+  virtual void wait_begin(int type);
+  virtual void wait_end();
+
+  ulonglong timeout;
+  enum_vio_type vio_type;
   HANDLE handle;
   OVERLAPPED overlapped;
-  /* absolute time for wait timeout (as Windows time) */
-  volatile ulonglong timeout; 
-  TP_CALLBACK_ENVIRON callback_environ;
+  PTP_CALLBACK_INSTANCE callback_instance;
   PTP_IO  io;
   PTP_TIMER timer;
   PTP_WAIT shm_read;
-  /* Callback instance, used to inform treadpool about long callbacks */
-  PTP_CALLBACK_INSTANCE callback_instance;
-  CONNECT* connect;
-  bool logged_in;
+  PTP_WORK  work;
+  bool long_callback;
+
 };
 
-
-void init_connection(connection_t *connection, CONNECT *connect)
+struct TP_connection *new_TP_connection(CONNECT *connect)
 {
-  connection->logged_in = false;
-  connection->handle= 0;
-  connection->io= 0;
-  connection->shm_read= 0;
-  connection->timer= 0;
-  connection->logged_in = false;
-  connection->timeout= ULONGLONG_MAX;
-  connection->callback_instance= 0;
-  connection->thd= 0;
-  memset(&connection->overlapped, 0, sizeof(OVERLAPPED));
-  InitializeThreadpoolEnvironment(&connection->callback_environ);
-  SetThreadpoolCallbackPool(&connection->callback_environ, pool);
-  connection->connect= connect;
+  TP_connection *c = new (std::nothrow) TP_connection_win(connect);
+  if (!c || c->init())
+  {
+    delete c;
+    return 0;
+  }
+  return c;
+}
+
+void TP_pool_win::add(TP_connection *c)
+{
+  SubmitThreadpoolWork(((TP_connection_win *)c)->work);
 }
 
 
-int init_io(connection_t *connection, THD *thd)
+TP_connection_win::TP_connection_win(CONNECT *c) :
+  TP_connection(c),
+  timeout(ULONGLONG_MAX), 
+  callback_instance(0),
+  io(0),
+  shm_read(0),
+  timer(0),
+  work(0)
 {
-  connection->thd= thd;
-  Vio *vio = thd->net.vio;
-  switch(vio->type)
+}
+
+#define CHECK_ALLOC_ERROR(op) if (!(op))  {tp_log_warning("Allocation failed", #op); DBUG_ASSERT(0); return -1; }
+
+int TP_connection_win::init()
+{
+
+  memset(&overlapped, 0, sizeof(OVERLAPPED));
+  Vio *vio = connect->vio;
+  switch ((vio_type =  vio->type))
   {
-    case VIO_TYPE_SSL:
-    case VIO_TYPE_TCPIP:
-      connection->handle= (HANDLE)mysql_socket_getfd(connection->thd->net.vio->mysql_socket);
-      break;
-    case VIO_TYPE_NAMEDPIPE:
-      connection->handle= (HANDLE)vio->hPipe;
-      break;
-    case VIO_TYPE_SHARED_MEMORY:
-      connection->shm_read=  CreateThreadpoolWait(shm_read_callback, connection, 
-        &connection->callback_environ);
-      if (!connection->shm_read)
-      {
-        tp_log_warning("Allocation failed", "CreateThreadpoolWait");
-        return -1;
-      }
-      break;
-    default:
-      abort();
+  case VIO_TYPE_SSL:
+  case VIO_TYPE_TCPIP:
+    handle= (HANDLE)mysql_socket_getfd(vio->mysql_socket);
+    break;
+  case VIO_TYPE_NAMEDPIPE:
+    handle= (HANDLE)vio->hPipe;
+    break;
+  case VIO_TYPE_SHARED_MEMORY:
+    handle= vio->event_server_wrote;
+    break;
+  default:
+    abort();
   }
 
-  if (connection->handle)
+  if (vio_type == VIO_TYPE_SHARED_MEMORY)
+  {
+    CHECK_ALLOC_ERROR(shm_read=  CreateThreadpoolWait(shm_read_callback, this, &callback_environ));
+  }
+  else
   {
     /* Performance tweaks (s. MSDN documentation)*/
     UCHAR flags= FILE_SKIP_SET_EVENT_ON_HANDLE;
@@ -176,25 +192,13 @@ int init_io(connection_t *connection, THD *thd)
     {
       flags |= FILE_SKIP_COMPLETION_PORT_ON_SUCCESS;
     }
-    (void)SetFileCompletionNotificationModes(connection->handle, flags);
-
+    (void)SetFileCompletionNotificationModes(handle, flags);
     /* Assign io completion callback */
-    connection->io= CreateThreadpoolIo(connection->handle, 
-      io_completion_callback, connection, &connection->callback_environ);
-    if(!connection->io)
-    {
-      tp_log_warning("Allocation failed", "CreateThreadpoolWait");
-      return -1;
-    }
-  }
-  connection->timer= CreateThreadpoolTimer(timer_callback, connection, 
-    &connection->callback_environ);
-  if (!connection->timer)
-  {
-    tp_log_warning("Allocation failed", "CreateThreadpoolWait");
-    return -1;
+    CHECK_ALLOC_ERROR(io= CreateThreadpoolIo(handle, io_completion_callback, this, &callback_environ));
   }
 
+  CHECK_ALLOC_ERROR(timer= CreateThreadpoolTimer(timer_callback, this,  &callback_environ));
+  CHECK_ALLOC_ERROR(work= CreateThreadpoolWork(work_callback, this, &callback_environ));
   return 0;
 }
 
@@ -202,9 +206,8 @@ int init_io(connection_t *connection, THD *thd)
 /*
   Start asynchronous read
 */
-int start_io(connection_t *connection, PTP_CALLBACK_INSTANCE instance)
+int TP_connection_win::start_io()
 {
-  /* Start async read */
   DWORD num_bytes = 0;
   static char c;
   WSABUF buf;
@@ -214,33 +217,20 @@ int start_io(connection_t *connection, PTP_CALLBACK_INSTANCE instance)
   DWORD last_error= 0;
 
   int retval;
-  Vio *vio= connection->thd->net.vio;
-
-  if (vio->type == VIO_TYPE_SHARED_MEMORY)
+  if (shm_read)
   {
-      SetThreadpoolWait(connection->shm_read, vio->event_server_wrote, NULL);
-      return 0;
+    SetThreadpoolWait(shm_read, handle, NULL);
+    return 0;
   }
-  if (vio->type == VIO_CLOSED)
-  {
-    return -1;
-  }
-
-  DBUG_ASSERT(vio->type == VIO_TYPE_TCPIP || 
-    vio->type == VIO_TYPE_SSL ||
-    vio->type == VIO_TYPE_NAMEDPIPE);
-
-  OVERLAPPED *overlapped= &connection->overlapped;
-  PTP_IO io= connection->io;
   StartThreadpoolIo(io);
 
-  if (vio->type == VIO_TYPE_TCPIP || vio->type == VIO_TYPE_SSL)
+  if (vio_type == VIO_TYPE_TCPIP || vio_type == VIO_TYPE_SSL)
   {
     /* Start async io (sockets). */
-    if (WSARecv(mysql_socket_getfd(vio->mysql_socket) , &buf, 1, &num_bytes, &flags,
-          overlapped,  NULL) == 0)
+    if (WSARecv((SOCKET)handle , &buf, 1, &num_bytes, &flags,
+          &overlapped,  NULL) == 0)
     {
-        retval= last_error= 0;
+       retval= last_error= 0;
     }
     else
     {
@@ -251,7 +241,7 @@ int start_io(connection_t *connection, PTP_CALLBACK_INSTANCE instance)
   else
   {
     /* Start async io (named pipe) */
-    if (ReadFile(vio->hPipe, &c, 0, &num_bytes ,overlapped))
+    if (ReadFile(handle, &c, 0, &num_bytes,&overlapped))
     {
       retval= last_error= 0;
     }
@@ -272,7 +262,7 @@ int start_io(connection_t *connection, PTP_CALLBACK_INSTANCE instance)
     if(skip_completion_port_on_success)
     {
       CancelThreadpoolIo(io);
-      io_completion_callback(instance, connection, overlapped, last_error, 
+      io_completion_callback(callback_instance, this, &overlapped, last_error, 
         num_bytes, io);
     }
     return 0;
@@ -288,81 +278,81 @@ int start_io(connection_t *connection, PTP_CALLBACK_INSTANCE instance)
   return -1;
 }
 
-
-int login(connection_t *connection, PTP_CALLBACK_INSTANCE instance)
-{
-  if ((connection->thd= threadpool_add_connection(connection->connect, connection))
-      && init_io(connection, connection->thd) == 0
-      && start_io(connection, instance) == 0)
-  {
-    return 0;
-  }
-  return -1;
-}
-
 /*
-  Recalculate wait timeout, maybe reset timer. 
+  Recalculate wait timeout, maybe reset timer.
 */
-void set_wait_timeout(connection_t *connection, ulonglong old_timeout)
+void TP_connection_win::set_io_timeout(int timeout_sec)
 {
-  ulonglong new_timeout = now() + 
-    10000000LL*connection->thd->variables.net_wait_timeout;
+  ulonglong old_timeout= timeout;
+  ulonglong new_timeout = now() + 10000000LL * timeout_sec;
 
   if (new_timeout < old_timeout)
   {
-    SetThreadpoolTimer(connection->timer, (PFILETIME) &new_timeout, 0, 1000);
+    SetThreadpoolTimer(timer, (PFILETIME)&new_timeout, 0, 1000);
   }
-  connection->timeout = new_timeout;
+  /*  new_timeout > old_timeout case is handled by expiring timer. */
+  timeout = new_timeout;
 }
 
 
-/* Connection destructor */
-void destroy_connection(connection_t *connection, PTP_CALLBACK_INSTANCE instance)
+TP_connection_win::~TP_connection_win()
 {
-  if (instance)
-    DisassociateCurrentThreadFromCallback(instance);
-  if (connection->io)
-  {
-     WaitForThreadpoolIoCallbacks(connection->io, TRUE); 
-     CloseThreadpoolIo(connection->io);
-  }
+  if (io)
+    CloseThreadpoolIo(io);
 
-  if(connection->shm_read)
-  {
-    WaitForThreadpoolWaitCallbacks(connection->shm_read, TRUE);
-    CloseThreadpoolWait(connection->shm_read);
-  }
+  if (shm_read)
+    CloseThreadpoolWait(shm_read);
 
-  if(connection->timer)
-  {
-    SetThreadpoolTimer(connection->timer, 0, 0, 0);
-    WaitForThreadpoolTimerCallbacks(connection->timer, TRUE);
-    CloseThreadpoolTimer(connection->timer);
-  }
-  
-  if (connection->thd)
-  {
-    threadpool_remove_connection(connection->thd);
-  }
+  if (work)
+    CloseThreadpoolWork(work);
 
-  DestroyThreadpoolEnvironment(&connection->callback_environ);
+  if (timer)
+  {
+    WaitForThreadpoolTimerCallbacks(timer, TRUE);
+    CloseThreadpoolTimer(timer);
+  }
 }
 
+void TP_connection_win::wait_begin(int type)
+{
+ 
+  /*
+    Signal to the threadpool whenever callback can run long. Currently, binlog
+    waits are a good candidate, its waits are really long
+  */
+  if (type == THD_WAIT_BINLOG)
+  {
+    if (!long_callback)
+    {
+      CallbackMayRunLong(callback_instance);
+      long_callback= true;
+    }
+  }
+}
 
+void TP_connection_win::wait_end()
+{
+  /* Do we need to do anything ? */
+}
 
 /* 
   This function should be called first whenever a callback is invoked in the 
   threadpool, does my_thread_init() if not yet done
 */
 extern ulong thread_created;
-static void check_thread_init()
+static void pre_callback(PVOID context, PTP_CALLBACK_INSTANCE instance)
 {
   if (FlsGetValue(fls) == NULL)
   {
+    /* Running in new  worker thread*/
     FlsSetValue(fls, (void *)1);
     statistic_increment(thread_created, &LOCK_status);
     InterlockedIncrement((volatile long *)&tp_stats.num_worker_threads);
+    my_thread_init();
   }
+  TP_connection_win *c = (TP_connection_win *)context;
+  c->callback_instance = instance;
+  c->long_callback = false;
 }
 
 
@@ -375,153 +365,61 @@ static VOID WINAPI thread_destructor(void *data)
   if(data)
   {
     InterlockedDecrement((volatile long *)&tp_stats.num_worker_threads);
+    my_thread_end();
   }
 }
 
 
-/* Scheduler callback : init */
-bool tp_init(void)
+
+static inline void tp_callback(PTP_CALLBACK_INSTANCE instance, PVOID context)
 {
-  fls= FlsAlloc(thread_destructor);
-  pool= CreateThreadpool(NULL);
-  if(!pool)
-  {
-    sql_print_error("Can't create threadpool. "
-      "CreateThreadpool() failed with %d. Likely cause is memory pressure", 
-      GetLastError());
-    exit(1);
-  }
-
-  if (threadpool_max_threads)
-  {
-    SetThreadpoolThreadMaximum(pool,threadpool_max_threads);
-  }
-
-  if (threadpool_min_threads)
-  {
-    if (!SetThreadpoolThreadMinimum(pool, threadpool_min_threads))
-    {
-      tp_log_warning( "Can't set threadpool minimum threads", 
-        "SetThreadpoolThreadMinimum");
-    }
-  }
-
-  /*
-    Control stack size (OS must be Win7 or later, plus corresponding SDK)
-  */
-#if _MSC_VER >=1600
-  if (SetThreadpoolStackInformation)
-  {
-    TP_POOL_STACK_INFORMATION stackinfo;
-    stackinfo.StackCommit = 0;
-    stackinfo.StackReserve = (SIZE_T)my_thread_stack_size;
-    if (!SetThreadpoolStackInformation(pool, &stackinfo))
-    {
-      tp_log_warning("Can't set threadpool stack size", 
-        "SetThreadpoolStackInformation");
-    }
-  }
-#endif
-
-  return 0;
+  pre_callback(context, instance);
+  tp_callback((TP_connection *)context);
 }
 
 
-/**
-  Scheduler callback : Destroy the scheduler.
-*/
-void tp_end(void)
-{
-  if(pool)
-  {
-    SetThreadpoolThreadMaximum(pool, 0);
-    CloseThreadpool(pool);
-  }
-}
-
 /*
   Handle read completion/notification.
 */
 static VOID CALLBACK io_completion_callback(PTP_CALLBACK_INSTANCE instance, 
   PVOID context,  PVOID overlapped,  ULONG io_result, ULONG_PTR nbytes, PTP_IO io)
 {
-  if(instance)
-  {
-    check_thread_init();
-  }
-
-  connection_t *connection = (connection_t*)context;
-
-  if (io_result != ERROR_SUCCESS)
-    goto error;
-
-  THD *thd= connection->thd;
-  ulonglong old_timeout = connection->timeout;
-  connection->timeout = ULONGLONG_MAX;
-  connection->callback_instance= instance;
-  if (threadpool_process_request(connection->thd))
-    goto error;
-
-  set_wait_timeout(connection, old_timeout);
-  if(start_io(connection, instance))
-    goto error;
-
-  return;
-
-error:
-  /* Some error has occurred. */
-
-  destroy_connection(connection, instance);
-  free(connection);
+  TP_connection_win *c= (TP_connection_win *)context;
+  /*
+    Execute high priority connections immediately.
+    'Yield' in case of low priority connections, i.e SubmitThreadpoolWork (with the same callback)
+    which makes Windows threadpool place the items at the end of its internal work queue.
+  */
+  if (c->priority == TP_PRIORITY_HIGH)
+    tp_callback(instance, context);
+  else
+    SubmitThreadpoolWork(c->work);
 }
 
 
-/* Simple callback for login */
-static void CALLBACK login_callback(PTP_CALLBACK_INSTANCE instance, 
-  PVOID context, PTP_WORK work)
-{
-  if(instance)
-  {
-    check_thread_init();
-  }
-
-  connection_t *connection =(connection_t *)context;
-  if (login(connection, instance) != 0)
-  {
-    destroy_connection(connection, instance);
-    free(connection);
-  }
-}
-
 /*
   Timer callback.
   Invoked when connection times out (wait_timeout)
 */
-static VOID CALLBACK timer_callback(PTP_CALLBACK_INSTANCE instance, 
+static VOID CALLBACK timer_callback(PTP_CALLBACK_INSTANCE instance,
   PVOID parameter, PTP_TIMER timer)
 {
-  check_thread_init();
-
-  connection_t *con= (connection_t*)parameter;
-  ulonglong timeout= con->timeout;
-
-  if (timeout <= now())
+  TP_connection_win *c = (TP_connection_win *)parameter;
+  if (c->timeout <= now())
   {
-    con->thd->killed = KILL_CONNECTION;
-    if(con->thd->net.vio)
-      vio_shutdown(con->thd->net.vio, SD_BOTH);
+    tp_timeout_handler(c);
   }
-  else if(timeout != ULONGLONG_MAX)
+  else
   {
-    /* 
-      Reset timer. 
-      There is a tiny possibility of a race condition, since the value of timeout 
-      could have changed to smaller value in the thread doing io callback. 
+    /*
+      Reset timer.
+      There is a tiny possibility of a race condition, since the value of timeout
+      could have changed to smaller value in the thread doing io callback.
 
-      Given the relative unimportance of the wait timeout, we accept race 
+      Given the relative unimportance of the wait timeout, we accept race
       condition.
-    */
-    SetThreadpoolTimer(timer, (PFILETIME)&timeout, 0, 1000);
+      */
+    SetThreadpoolTimer(timer, (PFILETIME)&c->timeout, 0, 1000);
   }
 }
 
@@ -530,10 +428,11 @@ static VOID CALLBACK timer_callback(PTP_CALLBACK_INSTANCE instance,
   Shared memory read callback.
   Invoked when read event is set on connection.
 */
+
 static void CALLBACK shm_read_callback(PTP_CALLBACK_INSTANCE instance,
   PVOID context, PTP_WAIT wait,TP_WAIT_RESULT wait_result)
 {
-  connection_t *con= (connection_t *)context;
+  TP_connection_win *c= (TP_connection_win *)context;
   /* Disarm wait. */
   SetThreadpoolWait(wait, NULL, NULL);
 
@@ -542,97 +441,106 @@ static void CALLBACK shm_read_callback(PTP_CALLBACK_INSTANCE instance,
     and the current state is "not set". Thus we need to reset the event again, 
     or vio_read will hang.
   */
-  HANDLE h = con->thd->net.vio->event_server_wrote;
-  SetEvent(h);
-  io_completion_callback(instance, context, NULL, 0, 0 , 0);
+  SetEvent(c->handle);
+  tp_callback(instance, context);
 }
 
 
-/*
-  Notify the thread pool about a new connection.
-*/
-
-void tp_add_connection(CONNECT *connect)
+static void CALLBACK work_callback(PTP_CALLBACK_INSTANCE instance, PVOID context, PTP_WORK work)
 {
-  connection_t *con;  
-  con= (connection_t *)malloc(sizeof(connection_t));
-  DBUG_EXECUTE_IF("simulate_failed_connection_1", free(con);con= 0; );
-  if (!con)
+  tp_callback(instance, context);
+}
+
+TP_pool_win::TP_pool_win()
+{}
+
+int TP_pool_win::init()
+{
+  fls= FlsAlloc(thread_destructor);
+  pool= CreateThreadpool(NULL);
+
+  if (!pool)
   {
-    tp_log_warning("Allocation failed", "tp_add_connection");
-    connect->close_and_delete();
-    return;
+    sql_print_error("Can't create threadpool. "
+      "CreateThreadpool() failed with %d. Likely cause is memory pressure",
+      GetLastError());
+    return -1;
   }
 
-  init_connection(con, connect);
+  InitializeThreadpoolEnvironment(&callback_environ);
+  SetThreadpoolCallbackPool(&callback_environ, pool);
 
-  /* Try to login asynchronously, using threads in the pool */
-  PTP_WORK wrk =  CreateThreadpoolWork(login_callback,con, &con->callback_environ);
-  if (wrk)
+  if (threadpool_max_threads)
   {
-    SubmitThreadpoolWork(wrk);
-    CloseThreadpoolWork(wrk);
+    SetThreadpoolThreadMaximum(pool, threadpool_max_threads);
   }
-  else
+
+  if (threadpool_min_threads)
   {
-    /* Likely memory pressure */
-    connect->close_and_delete();
+    if (!SetThreadpoolThreadMinimum(pool, threadpool_min_threads))
+    {
+      tp_log_warning("Can't set threadpool minimum threads",
+        "SetThreadpoolThreadMinimum");
+    }
   }
+
+  /*
+    Control stack size (OS must be Win7 or later)
+  */
+  if (SetThreadpoolStackInformation)
+  {
+    TP_POOL_STACK_INFORMATION stackinfo;
+    stackinfo.StackCommit = 0;
+    stackinfo.StackReserve = (SIZE_T)my_thread_stack_size;
+    if (!SetThreadpoolStackInformation(pool, &stackinfo))
+    {
+      tp_log_warning("Can't set threadpool stack size",
+        "SetThreadpoolStackInformation");
+    }
+  }
+  return 0;
 }
 
 
+/**
+  Scheduler callback : Destroy the scheduler.
+*/
+TP_pool_win::~TP_pool_win()
+{
+  if (!pool)
+    return;
+  DestroyThreadpoolEnvironment(&callback_environ);
+  SetThreadpoolThreadMaximum(pool, 0);
+  CloseThreadpool(pool);
+  if (!tp_stats.num_worker_threads)
+    FlsFree(fls);
+}
 /**
   Sets the number of idle threads the thread pool maintains in anticipation of new
   requests.
 */
-void tp_set_min_threads(uint val)
-{
-  if (pool)
-    SetThreadpoolThreadMinimum(pool, val);
-}
-
-void tp_set_max_threads(uint val)
-{
-  if (pool)
-    SetThreadpoolThreadMaximum(pool, val);
-}
-
-void tp_wait_begin(THD *thd, int type)
-{
-  DBUG_ASSERT(thd);
-
-  /*
-    Signal to the threadpool whenever callback can run long. Currently, binlog
-    waits are a good candidate, its waits are really long
-  */
-  if (type == THD_WAIT_BINLOG)
-  {
-    connection_t *connection= (connection_t *)thd->event_scheduler.data;
-    if(connection && connection->callback_instance)
-    {
-      CallbackMayRunLong(connection->callback_instance);
-      /* 
-        Reset instance, to avoid calling CallbackMayRunLong  twice within 
-        the same callback (it is an error according to docs).
-      */
-      connection->callback_instance= 0;
-    }
-  }
-}
-
-void tp_wait_end(THD *thd) 
-{
-  /* Do we need to do anything ? */
-}
-
-
-/**
- Number of idle threads in pool.
- This info is not available in Windows implementation,
- thus function always returns 0.
-*/
-int tp_get_idle_thread_count()
+int TP_pool_win::set_min_threads(uint val)
 {
+  SetThreadpoolThreadMinimum(pool, val);
   return 0;
 }
 
+int TP_pool_win::set_max_threads(uint val)
+{
+  SetThreadpoolThreadMaximum(pool, val);
+  return 0;
+}
+
+
+TP_connection *TP_pool_win::new_connection(CONNECT *connect)
+{
+  TP_connection *c= new (std::nothrow) TP_connection_win(connect);
+  if (!c )
+    return 0;
+  if (c->init())
+  {
+    delete c;
+    return 0;
+  }
+  return c;
+}
diff --git a/storage/innobase/CMakeLists.txt b/storage/innobase/CMakeLists.txt
index c80ef6f0993..d9b50935d27 100644
--- a/storage/innobase/CMakeLists.txt
+++ b/storage/innobase/CMakeLists.txt
@@ -1,4 +1,5 @@
-# Copyright (c) 2006, 2015, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2006, 2016, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2014, 2016, MariaDB Corporation
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -31,347 +32,13 @@ MYSQL_CHECK_BZIP2()
 MYSQL_CHECK_SNAPPY()
 MYSQL_CHECK_NUMA()
 
-IF(CMAKE_CROSSCOMPILING)
-  # Use CHECK_C_SOURCE_COMPILES instead of CHECK_C_SOURCE_RUNS when
-  # cross-compiling. Not as precise, but usually good enough.
-  # This only make sense for atomic tests in this file, this trick doesn't
-  # work in a general case.
-  MACRO(CHECK_C_SOURCE SOURCE VAR)
-    CHECK_C_SOURCE_COMPILES("${SOURCE}" "${VAR}")
-  ENDMACRO()
-ELSE()
-  MACRO(CHECK_C_SOURCE SOURCE VAR)
-    CHECK_C_SOURCE_RUNS("${SOURCE}" "${VAR}")
-  ENDMACRO()
-ENDIF()
-
-# OS tests
-IF(UNIX)
-  IF(CMAKE_SYSTEM_NAME STREQUAL "Linux")
-    CHECK_INCLUDE_FILES (libaio.h HAVE_LIBAIO_H)
-    CHECK_LIBRARY_EXISTS(aio io_queue_init "" HAVE_LIBAIO)
-    ADD_DEFINITIONS("-DUNIV_LINUX -D_GNU_SOURCE=1")
-    IF(HAVE_LIBAIO_H AND HAVE_LIBAIO)
-      ADD_DEFINITIONS(-DLINUX_NATIVE_AIO=1)
-      LINK_LIBRARIES(aio)
-    ENDIF()
-    IF(HAVE_LIBNUMA)
-      LINK_LIBRARIES(numa)
-    ENDIF()
-  ELSEIF(CMAKE_SYSTEM_NAME MATCHES "HP*")
-    ADD_DEFINITIONS("-DUNIV_HPUX")
-  ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "AIX")
-    ADD_DEFINITIONS("-DUNIV_AIX")
-  ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "SunOS")
-    ADD_DEFINITIONS("-DUNIV_SOLARIS")
-  ENDIF()
-ENDIF()
-
-IF(CMAKE_CXX_COMPILER_ID MATCHES "GNU")
-# After: WL#5825 Using C++ Standard Library with MySQL code
-#       we no longer use -fno-exceptions
-#	SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-exceptions")
-ENDIF()
-
-# Enable InnoDB's UNIV_DEBUG and UNIV_SYNC_DEBUG in debug builds
-SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DUNIV_DEBUG -DUNIV_SYNC_DEBUG")
-
-# Add -Wconversion if compiling with GCC
-## As of Mar 15 2011 this flag causes 3573+ warnings. If you are reading this
-## please fix them and enable the following code:
-#IF(CMAKE_CXX_COMPILER_ID MATCHES "GNU")
-#SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wconversion")
-#ENDIF()
-
-CHECK_FUNCTION_EXISTS(sched_getcpu  HAVE_SCHED_GETCPU)
-IF(HAVE_SCHED_GETCPU)
-  ADD_DEFINITIONS(-DHAVE_SCHED_GETCPU)
-ENDIF()
-
-IF(NOT MSVC)
-  # either define HAVE_IB_GCC_ATOMIC_BUILTINS or not
-  # workaround for gcc 4.1.2 RHEL5/x86, gcc atomic ops only work under -march=i686
-  IF(CMAKE_SYSTEM_PROCESSOR STREQUAL "i686" AND CMAKE_COMPILER_IS_GNUCC AND
-     CMAKE_C_COMPILER_VERSION VERSION_LESS "4.1.3")
-    SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=i686")
-    SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=i686")
-  ENDIF()
-  CHECK_C_SOURCE(
-  "
-  int main()
-  {
-    long	x;
-    long	y;
-    long	res;
-
-    x = 10;
-    y = 123;
-    res = __sync_bool_compare_and_swap(&x, x, y);
-    if (!res || x != y) {
-      return(1);
-    }
-
-    x = 10;
-    y = 123;
-    res = __sync_bool_compare_and_swap(&x, x + 1, y);
-    if (res || x != 10) {
-      return(1);
-    }
-    x = 10;
-    y = 123;
-    res = __sync_add_and_fetch(&x, y);
-    if (res != 123 + 10 || x != 123 + 10) {
-      return(1);
-    }
-    return(0);
-  }"
-  HAVE_IB_GCC_ATOMIC_BUILTINS
-  )
-  CHECK_C_SOURCE(
-  "
-  int main()
-  {
-    long	res;
-    char	c;
-
-    c = 10;
-    res = __sync_lock_test_and_set(&c, 123);
-    if (res != 10 || c != 123) {
-      return(1);
-    }
-    return(0);
-  }"
-  HAVE_IB_GCC_ATOMIC_BUILTINS_BYTE
-  )
-  CHECK_C_SOURCE(
-  "#include<stdint.h>
-  int main()
-  {
-    int64_t	x,y,res;
-
-    x = 10;
-    y = 123;
-    res = __sync_sub_and_fetch(&y, x);
-    if (res != y || y != 113) {
-      return(1);
-    }
-    res = __sync_add_and_fetch(&y, x);
-    if (res != y || y != 123) {
-      return(1);
-    }
-    return(0);
-  }"
-  HAVE_IB_GCC_ATOMIC_BUILTINS_64
-  )
-  CHECK_C_SOURCE(
-  "#include<stdint.h>
-  int main()
-  {
-    __sync_synchronize();
-    return(0);
-  }"
-  HAVE_IB_GCC_SYNC_SYNCHRONISE
-  )
-  CHECK_C_SOURCE(
-  "#include<stdint.h>
-  int main()
-  {
-    __atomic_thread_fence(__ATOMIC_ACQUIRE);
-    __atomic_thread_fence(__ATOMIC_RELEASE);
-    return(0);
-  }"
-  HAVE_IB_GCC_ATOMIC_THREAD_FENCE
-  )
-  CHECK_C_SOURCE(
-  "#include<stdint.h>
-  int main()
-  {
-    unsigned char	c;
-
-    __atomic_test_and_set(&c, __ATOMIC_ACQUIRE);
-    __atomic_clear(&c, __ATOMIC_RELEASE);
-    return(0);
-  }"
-  HAVE_IB_GCC_ATOMIC_TEST_AND_SET
-  )
-
-IF(HAVE_IB_GCC_ATOMIC_BUILTINS)
- ADD_DEFINITIONS(-DHAVE_IB_GCC_ATOMIC_BUILTINS=1)
-ENDIF()
-
-IF(HAVE_IB_GCC_ATOMIC_BUILTINS_BYTE)
- ADD_DEFINITIONS(-DHAVE_IB_GCC_ATOMIC_BUILTINS_BYTE=1)
-ENDIF()
-
-IF(HAVE_IB_GCC_ATOMIC_BUILTINS_64)
- ADD_DEFINITIONS(-DHAVE_IB_GCC_ATOMIC_BUILTINS_64=1)
-ENDIF()
-
-IF(HAVE_IB_GCC_SYNC_SYNCHRONISE)
- ADD_DEFINITIONS(-DHAVE_IB_GCC_SYNC_SYNCHRONISE=1)
-ENDIF()
-
-IF(HAVE_IB_GCC_ATOMIC_THREAD_FENCE)
- ADD_DEFINITIONS(-DHAVE_IB_GCC_ATOMIC_THREAD_FENCE=1)
-ENDIF()
-
-IF(HAVE_IB_GCC_ATOMIC_TEST_AND_SET)
- ADD_DEFINITIONS(-DHAVE_IB_GCC_ATOMIC_TEST_AND_SET=1)
-ENDIF()
-
-# either define HAVE_IB_ATOMIC_PTHREAD_T_GCC or not
-CHECK_C_SOURCE(
-"
-#include <pthread.h>
-#include <string.h>
-
-int main() {
-  pthread_t       x1;
-  pthread_t       x2;
-  pthread_t       x3;
-
-  memset(&x1, 0x0, sizeof(x1));
-  memset(&x2, 0x0, sizeof(x2));
-  memset(&x3, 0x0, sizeof(x3));
-
-  __sync_bool_compare_and_swap(&x1, x2, x3);
-
-  return(0);
-}"
-HAVE_IB_ATOMIC_PTHREAD_T_GCC)
-
-IF(HAVE_IB_ATOMIC_PTHREAD_T_GCC)
-  ADD_DEFINITIONS(-DHAVE_IB_ATOMIC_PTHREAD_T_GCC=1)
-ENDIF()
-
-CHECK_CXX_SOURCE_COMPILES("struct t1{ int a; char *b; }; struct t1 c= { .a=1, .b=0 }; main() { }" HAVE_C99_INITIALIZERS)
-IF(HAVE_C99_INITIALIZERS)
-  ADD_DEFINITIONS(-DHAVE_C99_INITIALIZERS)
-ENDIF()
-
-ENDIF(NOT MSVC)
-
-CHECK_FUNCTION_EXISTS(vasprintf  HAVE_VASPRINTF)
-
-# Solaris atomics
-IF(CMAKE_SYSTEM_NAME STREQUAL "SunOS")
-  CHECK_FUNCTION_EXISTS(atomic_cas_ulong  HAVE_ATOMIC_CAS_ULONG)
-  CHECK_FUNCTION_EXISTS(atomic_cas_32 HAVE_ATOMIC_CAS_32)
-  CHECK_FUNCTION_EXISTS(atomic_cas_64 HAVE_ATOMIC_CAS_64)
-  CHECK_FUNCTION_EXISTS(atomic_add_long_nv HAVE_ATOMIC_ADD_LONG_NV)
-  CHECK_FUNCTION_EXISTS(atomic_swap_uchar HAVE_ATOMIC_SWAP_UCHAR)
-  IF(HAVE_ATOMIC_CAS_ULONG AND
-     HAVE_ATOMIC_CAS_32 AND
-     HAVE_ATOMIC_CAS_64 AND
-     HAVE_ATOMIC_ADD_LONG_NV AND
-     HAVE_ATOMIC_SWAP_UCHAR)
-    SET(HAVE_IB_SOLARIS_ATOMICS 1)
-  ENDIF()
-
-  IF(HAVE_IB_SOLARIS_ATOMICS)
-    ADD_DEFINITIONS(-DHAVE_IB_SOLARIS_ATOMICS=1)
-  ENDIF()
-
-  # either define HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS or not
-  CHECK_C_SOURCE_COMPILES(
-  "   #include <pthread.h>
-      #include <string.h>
-
-      int main(int argc, char** argv) {
-        pthread_t       x1;
-        pthread_t       x2;
-        pthread_t       x3;
-
-        memset(&x1, 0x0, sizeof(x1));
-        memset(&x2, 0x0, sizeof(x2));
-        memset(&x3, 0x0, sizeof(x3));
-
-        if (sizeof(pthread_t) == 4) {
-
-          atomic_cas_32(&x1, x2, x3);
-
-        } else if (sizeof(pthread_t) == 8) {
-
-          atomic_cas_64(&x1, x2, x3);
-
-        } else {
-
-          return(1);
-        }
-
-      return(0);
-    }
-  " HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS)
-  CHECK_C_SOURCE_COMPILES(
-  "#include <mbarrier.h>
-  int main() {
-    __machine_r_barrier();
-    __machine_w_barrier();
-    return(0);
-  }"
-  HAVE_IB_MACHINE_BARRIER_SOLARIS)
-
-  IF(HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS)
-    ADD_DEFINITIONS(-DHAVE_IB_ATOMIC_PTHREAD_T_SOLARIS=1)
-  ENDIF()
-  IF(HAVE_IB_MACHINE_BARRIER_SOLARIS)
-    ADD_DEFINITIONS(-DHAVE_IB_MACHINE_BARRIER_SOLARIS=1)
-  ENDIF()
-ENDIF()
-
-
-IF(UNIX)
-# this is needed to know which one of atomic_cas_32() or atomic_cas_64()
-# to use in the source
-SET(CMAKE_EXTRA_INCLUDE_FILES pthread.h)
-CHECK_TYPE_SIZE(pthread_t SIZEOF_PTHREAD_T)
-SET(CMAKE_EXTRA_INCLUDE_FILES)
-ENDIF()
-
-IF(SIZEOF_PTHREAD_T)
-  ADD_DEFINITIONS(-DSIZEOF_PTHREAD_T=${SIZEOF_PTHREAD_T})
-ENDIF()
-
-IF(MSVC)
-  ADD_DEFINITIONS(-DHAVE_WINDOWS_ATOMICS)
-  ADD_DEFINITIONS(-DHAVE_WINDOWS_MM_FENCE)
-ENDIF()
-
-
-# Include directories under innobase
-INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/storage/innobase/include
-		    ${CMAKE_SOURCE_DIR}/storage/innobase/handler)
-
-# Sun Studio bug with -xO2
-IF(CMAKE_CXX_COMPILER_ID MATCHES "SunPro"
-	AND CMAKE_CXX_FLAGS_RELEASE MATCHES "O2"
-	AND NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
-	# Sun Studio 12 crashes with -xO2 flag, but not with higher optimization
-	# -xO3
-	SET_SOURCE_FILES_PROPERTIES(${CMAKE_CURRENT_SOURCE_DIR}/rem/rem0rec.cc
-    PROPERTIES COMPILE_FLAGS -xO3)
-ENDIF()
-
-# Removing compiler optimizations for innodb/mem/* files on 64-bit Windows
-# due to 64-bit compiler error, See MySQL Bug #19424, #36366, #34297
-IF (MSVC AND CMAKE_SIZEOF_VOID_P EQUAL 8)
-	SET_SOURCE_FILES_PROPERTIES(mem/mem0mem.cc mem/mem0pool.cc
-				    PROPERTIES COMPILE_FLAGS -Od)
-ENDIF()
-
-IF(MSVC)
-  # Avoid "unreferenced label" warning in generated file
-  GET_FILENAME_COMPONENT(_SRC_DIR ${CMAKE_CURRENT_LIST_FILE} PATH)
-  SET_SOURCE_FILES_PROPERTIES(${_SRC_DIR}/pars/pars0grm.c
-          PROPERTIES COMPILE_FLAGS "/wd4102")
-  SET_SOURCE_FILES_PROPERTIES(${_SRC_DIR}/pars/lexyy.c
-          PROPERTIES COMPILE_FLAGS "/wd4003")
-ENDIF()
+INCLUDE(innodb.cmake)
 
 SET(INNOBASE_SOURCES
 	api/api0api.cc
 	api/api0misc.cc
 	btr/btr0btr.cc
+	btr/btr0bulk.cc
 	btr/btr0cur.cc
 	btr/btr0pcur.cc
 	btr/btr0scrub.cc
@@ -395,13 +62,16 @@ SET(INNOBASE_SOURCES
 	dict/dict0mem.cc
 	dict/dict0stats.cc
 	dict/dict0stats_bg.cc
-	dyn/dyn0dyn.cc
+	dict/dict0defrag_bg.cc
 	eval/eval0eval.cc
 	eval/eval0proc.cc
 	fil/fil0fil.cc
 	fil/fil0pagecompress.cc
 	fil/fil0crypt.cc
 	fsp/fsp0fsp.cc
+	fsp/fsp0file.cc
+	fsp/fsp0space.cc
+	fsp/fsp0sysspace.cc
 	fut/fut0fut.cc
 	fut/fut0lst.cc
 	ha/ha0ha.cc
@@ -416,11 +86,17 @@ SET(INNOBASE_SOURCES
 	fts/fts0que.cc
 	fts/fts0sql.cc
 	fts/fts0tlex.cc
+	gis/gis0geo.cc
+	gis/gis0rtree.cc
+	gis/gis0sea.cc
+	fts/fts0plugin.cc
 	handler/ha_innodb.cc
+#	handler/ha_innopart.cc
 	handler/handler0alter.cc
 	handler/i_s.cc
 	ibuf/ibuf0ibuf.cc
 	lock/lock0iter.cc
+	lock/lock0prdt.cc
 	lock/lock0lock.cc
 	lock/lock0wait.cc
 	log/log0log.cc
@@ -428,12 +104,11 @@ SET(INNOBASE_SOURCES
 	log/log0crypt.cc
 	mach/mach0data.cc
 	mem/mem0mem.cc
-	mem/mem0pool.cc
 	mtr/mtr0log.cc
 	mtr/mtr0mtr.cc
 	os/os0file.cc
 	os/os0proc.cc
-	os/os0sync.cc
+	os/os0event.cc
 	os/os0thread.cc
 	page/page0cur.cc
 	page/page0page.cc
@@ -457,6 +132,7 @@ SET(INNOBASE_SOURCES
 	row/row0purge.cc
 	row/row0row.cc
 	row/row0sel.cc
+	row/row0trunc.cc
 	row/row0uins.cc
 	row/row0umod.cc
 	row/row0undo.cc
@@ -469,6 +145,7 @@ SET(INNOBASE_SOURCES
 	srv/srv0start.cc
 	sync/sync0arr.cc
 	sync/sync0rw.cc
+	sync/sync0debug.cc
 	sync/sync0sync.cc
 	trx/trx0i_s.cc
 	trx/trx0purge.cc
@@ -479,12 +156,12 @@ SET(INNOBASE_SOURCES
 	trx/trx0trx.cc
 	trx/trx0undo.cc
 	usr/usr0sess.cc
-	ut/ut0bh.cc
 	ut/ut0byte.cc
 	ut/ut0crc32.cc
 	ut/ut0dbg.cc
 	ut/ut0list.cc
 	ut/ut0mem.cc
+	ut/ut0new.cc
 	ut/ut0rbt.cc
 	ut/ut0rnd.cc
 	ut/ut0ut.cc
@@ -497,27 +174,51 @@ IF(WITH_INNODB)
   SET(WITH_INNOBASE_STORAGE_ENGINE TRUE)
 ENDIF()
 
-# On solaris, reduce symbol visibility, so loader does not mix
-# the same symbols from builtin innodb and from shared one.
-# Only required for old GCC (3.4.3) that does not support hidden visibility
-IF(CMAKE_SYSTEM_NAME MATCHES "SunOS" AND CMAKE_COMPILER_IS_GNUCC 
-    AND NOT HAVE_VISIBILITY_HIDDEN)
-  SET(LINKER_SCRIPT "-Wl,-M${CMAKE_CURRENT_SOURCE_DIR}/plugin_exports")
-ELSE()
-  SET(LINKER_SCRIPT)
-ENDIF()
-
 UNSET(NUMA_LIBRARY)
 IF(HAVE_LIBNUMA)
   SET(NUMA_LIBRARY "numa")
 ENDIF()
 
 MYSQL_ADD_PLUGIN(innobase ${INNOBASE_SOURCES} STORAGE_ENGINE
-  MODULE_ONLY
-  MODULE_OUTPUT_NAME ha_innodb
+#  MODULE_ONLY
+#  MODULE_OUTPUT_NAME ha_innodb
+  DEFAULT RECOMPILE_FOR_EMBEDDED
   LINK_LIBRARIES
 	${ZLIB_LIBRARY}
 	${CRC32_VPMSUM_LIBRARY}
 	${NUMA_LIBRARY}
 	${LINKER_SCRIPT})
 
+IF(WITH_INNOBASE_STORAGE_ENGINE)
+  ADD_DEPENDENCIES(innobase GenError)
+ENDIF()
+
+# Avoid generating Hardware Capabilities due to crc32 instructions
+IF(CMAKE_SYSTEM_NAME MATCHES "SunOS" AND CMAKE_SYSTEM_PROCESSOR MATCHES "i386")
+  INCLUDE(${MYSQL_CMAKE_SCRIPT_DIR}/compile_flags.cmake)
+  MY_CHECK_CXX_COMPILER_FLAG("-Wa,-nH" HAVE_WA_NH)
+  IF(HAVE_WA_NH)
+    ADD_COMPILE_FLAGS(
+      ut/ut0crc32.cc
+      COMPILE_FLAGS "-Wa,-nH"
+    )
+  ENDIF()
+ENDIF()
+
+# A GCC bug causes crash when compiling these files on ARM64 with -O1+
+# Compile them with -O0 as a workaround.
+IF(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
+  # Bug was fixed in GCC 5.2, so workaround only needed < 5.2
+  EXECUTE_PROCESS(COMMAND ${CMAKE_C_COMPILER} -dumpversion
+                  OUTPUT_VARIABLE GCC_VERSION)
+  IF(GCC_VERSION VERSION_LESS 5.2)
+    INCLUDE(${MYSQL_CMAKE_SCRIPT_DIR}/compile_flags.cmake)
+    ADD_COMPILE_FLAGS(
+      btr/btr0btr.cc
+      btr/btr0cur.cc
+      buf/buf0buf.cc
+      gis/gis0sea.cc
+      COMPILE_FLAGS "-O0"
+      )
+  ENDIF()
+ENDIF()
diff --git a/storage/innobase/Doxyfile b/storage/innobase/Doxyfile
deleted file mode 100644
index 7cf5048fa52..00000000000
--- a/storage/innobase/Doxyfile
+++ /dev/null
@@ -1,1419 +0,0 @@
-# Doxyfile 1.5.6
-
-# Usage: SVNVERSION=-r$(svnversion) doxygen
-
-# This file describes the settings to be used by the documentation system
-# doxygen (www.doxygen.org) for a project
-#
-# All text after a hash (#) is considered a comment and will be ignored
-# The format is:
-#       TAG = value [value, ...]
-# For lists items can also be appended using:
-#       TAG += value [value, ...]
-# Values that contain spaces should be placed between quotes (" ")
-
-#---------------------------------------------------------------------------
-# Project related configuration options
-#---------------------------------------------------------------------------
-
-# This tag specifies the encoding used for all characters in the config file
-# that follow. The default is UTF-8 which is also the encoding used for all
-# text before the first occurrence of this tag. Doxygen uses libiconv (or the
-# iconv built into libc) for the transcoding. See
-# http://www.gnu.org/software/libiconv for the list of possible encodings.
-
-DOXYFILE_ENCODING      = UTF-8
-
-# The PROJECT_NAME tag is a single word (or a sequence of words surrounded
-# by quotes) that should identify the project.
-
-PROJECT_NAME           = "InnoDB Plugin"
-
-# The PROJECT_NUMBER tag can be used to enter a project or revision number.
-# This could be handy for archiving the generated documentation or
-# if some version control system is used.
-
-PROJECT_NUMBER         = 1.0$(SVNVERSION)
-
-# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
-# base path where the generated documentation will be put.
-# If a relative path is entered, it will be relative to the location
-# where doxygen was started. If left blank the current directory will be used.
-
-OUTPUT_DIRECTORY       = dox
-
-# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create
-# 4096 sub-directories (in 2 levels) under the output directory of each output
-# format and will distribute the generated files over these directories.
-# Enabling this option can be useful when feeding doxygen a huge amount of
-# source files, where putting all generated files in the same directory would
-# otherwise cause performance problems for the file system.
-
-CREATE_SUBDIRS         = NO
-
-# The OUTPUT_LANGUAGE tag is used to specify the language in which all
-# documentation generated by doxygen is written. Doxygen will use this
-# information to generate all constant output in the proper language.
-# The default language is English, other supported languages are:
-# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional,
-# Croatian, Czech, Danish, Dutch, Farsi, Finnish, French, German, Greek,
-# Hungarian, Italian, Japanese, Japanese-en (Japanese with English messages),
-# Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian, Polish,
-# Portuguese, Romanian, Russian, Serbian, Slovak, Slovene, Spanish, Swedish,
-# and Ukrainian.
-
-OUTPUT_LANGUAGE        = English
-
-# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will
-# include brief member descriptions after the members that are listed in
-# the file and class documentation (similar to JavaDoc).
-# Set to NO to disable this.
-
-BRIEF_MEMBER_DESC      = YES
-
-# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend
-# the brief description of a member or function before the detailed description.
-# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
-# brief descriptions will be completely suppressed.
-
-REPEAT_BRIEF           = YES
-
-# This tag implements a quasi-intelligent brief description abbreviator
-# that is used to form the text in various listings. Each string
-# in this list, if found as the leading text of the brief description, will be
-# stripped from the text and the result after processing the whole list, is
-# used as the annotated text. Otherwise, the brief description is used as-is.
-# If left blank, the following values are used ("$name" is automatically
-# replaced with the name of the entity): "The $name class" "The $name widget"
-# "The $name file" "is" "provides" "specifies" "contains"
-# "represents" "a" "an" "the"
-
-ABBREVIATE_BRIEF       =
-
-# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
-# Doxygen will generate a detailed section even if there is only a brief
-# description.
-
-ALWAYS_DETAILED_SEC    = NO
-
-# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all
-# inherited members of a class in the documentation of that class as if those
-# members were ordinary class members. Constructors, destructors and assignment
-# operators of the base classes will not be shown.
-
-INLINE_INHERITED_MEMB  = NO
-
-# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full
-# path before files name in the file list and in the header files. If set
-# to NO the shortest path that makes the file name unique will be used.
-
-FULL_PATH_NAMES        = YES
-
-# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag
-# can be used to strip a user-defined part of the path. Stripping is
-# only done if one of the specified strings matches the left-hand part of
-# the path. The tag can be used to show relative paths in the file list.
-# If left blank the directory from which doxygen is run is used as the
-# path to strip.
-
-STRIP_FROM_PATH        =
-
-# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of
-# the path mentioned in the documentation of a class, which tells
-# the reader which header file to include in order to use a class.
-# If left blank only the name of the header file containing the class
-# definition is used. Otherwise one should specify the include paths that
-# are normally passed to the compiler using the -I flag.
-
-STRIP_FROM_INC_PATH    =
-
-# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter
-# (but less readable) file names. This can be useful is your file systems
-# doesn't support long names like on DOS, Mac, or CD-ROM.
-
-SHORT_NAMES            = NO
-
-# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen
-# will interpret the first line (until the first dot) of a JavaDoc-style
-# comment as the brief description. If set to NO, the JavaDoc
-# comments will behave just like regular Qt-style comments
-# (thus requiring an explicit @brief command for a brief description.)
-
-JAVADOC_AUTOBRIEF      = NO
-
-# If the QT_AUTOBRIEF tag is set to YES then Doxygen will
-# interpret the first line (until the first dot) of a Qt-style
-# comment as the brief description. If set to NO, the comments
-# will behave just like regular Qt-style comments (thus requiring
-# an explicit \brief command for a brief description.)
-
-QT_AUTOBRIEF           = NO
-
-# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen
-# treat a multi-line C++ special comment block (i.e. a block of //! or ///
-# comments) as a brief description. This used to be the default behaviour.
-# The new default is to treat a multi-line C++ comment block as a detailed
-# description. Set this tag to YES if you prefer the old behaviour instead.
-
-MULTILINE_CPP_IS_BRIEF = NO
-
-# If the DETAILS_AT_TOP tag is set to YES then Doxygen
-# will output the detailed description near the top, like JavaDoc.
-# If set to NO, the detailed description appears after the member
-# documentation.
-
-DETAILS_AT_TOP         = NO
-
-# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented
-# member inherits the documentation from any documented member that it
-# re-implements.
-
-INHERIT_DOCS           = YES
-
-# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce
-# a new page for each member. If set to NO, the documentation of a member will
-# be part of the file/class/namespace that contains it.
-
-SEPARATE_MEMBER_PAGES  = NO
-
-# The TAB_SIZE tag can be used to set the number of spaces in a tab.
-# Doxygen uses this value to replace tabs by spaces in code fragments.
-
-TAB_SIZE               = 8
-
-# This tag can be used to specify a number of aliases that acts
-# as commands in the documentation. An alias has the form "name=value".
-# For example adding "sideeffect=\par Side Effects:\n" will allow you to
-# put the command \sideeffect (or @sideeffect) in the documentation, which
-# will result in a user-defined paragraph with heading "Side Effects:".
-# You can put \n's in the value part of an alias to insert newlines.
-
-ALIASES                =
-
-# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C
-# sources only. Doxygen will then generate output that is more tailored for C.
-# For instance, some of the names that are used will be different. The list
-# of all members will be omitted, etc.
-
-OPTIMIZE_OUTPUT_FOR_C  = YES
-
-# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java
-# sources only. Doxygen will then generate output that is more tailored for
-# Java. For instance, namespaces will be presented as packages, qualified
-# scopes will look different, etc.
-
-OPTIMIZE_OUTPUT_JAVA   = NO
-
-# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran
-# sources only. Doxygen will then generate output that is more tailored for
-# Fortran.
-
-OPTIMIZE_FOR_FORTRAN   = NO
-
-# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL
-# sources. Doxygen will then generate output that is tailored for
-# VHDL.
-
-OPTIMIZE_OUTPUT_VHDL   = NO
-
-# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want
-# to include (a tag file for) the STL sources as input, then you should
-# set this tag to YES in order to let doxygen match functions declarations and
-# definitions whose arguments contain STL classes (e.g. func(std::string); v.s.
-# func(std::string) {}). This also make the inheritance and collaboration
-# diagrams that involve STL classes more complete and accurate.
-
-BUILTIN_STL_SUPPORT    = NO
-
-# If you use Microsoft's C++/CLI language, you should set this option to YES to
-# enable parsing support.
-
-CPP_CLI_SUPPORT        = NO
-
-# Set the SIP_SUPPORT tag to YES if your project consists of sip sources only.
-# Doxygen will parse them like normal C++ but will assume all classes use public
-# instead of private inheritance when no explicit protection keyword is present.
-
-SIP_SUPPORT            = NO
-
-# For Microsoft's IDL there are propget and propput attributes to indicate getter
-# and setter methods for a property. Setting this option to YES (the default)
-# will make doxygen to replace the get and set methods by a property in the
-# documentation. This will only work if the methods are indeed getting or
-# setting a simple type. If this is not the case, or you want to show the
-# methods anyway, you should set this option to NO.
-
-IDL_PROPERTY_SUPPORT   = YES
-
-# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
-# tag is set to YES, then doxygen will reuse the documentation of the first
-# member in the group (if any) for the other members of the group. By default
-# all members of a group must be documented explicitly.
-
-DISTRIBUTE_GROUP_DOC   = NO
-
-# Set the SUBGROUPING tag to YES (the default) to allow class member groups of
-# the same type (for instance a group of public functions) to be put as a
-# subgroup of that type (e.g. under the Public Functions section). Set it to
-# NO to prevent subgrouping. Alternatively, this can be done per class using
-# the \nosubgrouping command.
-
-SUBGROUPING            = YES
-
-# When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum
-# is documented as struct, union, or enum with the name of the typedef. So
-# typedef struct TypeS {} TypeT, will appear in the documentation as a struct
-# with name TypeT. When disabled the typedef will appear as a member of a file,
-# namespace, or class. And the struct will be named TypeS. This can typically
-# be useful for C code in case the coding convention dictates that all compound
-# types are typedef'ed and only the typedef is referenced, never the tag name.
-
-TYPEDEF_HIDES_STRUCT   = NO
-
-#---------------------------------------------------------------------------
-# Build related configuration options
-#---------------------------------------------------------------------------
-
-# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in
-# documentation are documented, even if no documentation was available.
-# Private class members and static file members will be hidden unless
-# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES
-
-EXTRACT_ALL            = NO
-
-# If the EXTRACT_PRIVATE tag is set to YES all private members of a class
-# will be included in the documentation.
-
-EXTRACT_PRIVATE        = YES
-
-# If the EXTRACT_STATIC tag is set to YES all static members of a file
-# will be included in the documentation.
-
-EXTRACT_STATIC         = YES
-
-# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs)
-# defined locally in source files will be included in the documentation.
-# If set to NO only classes defined in header files are included.
-
-EXTRACT_LOCAL_CLASSES  = YES
-
-# This flag is only useful for Objective-C code. When set to YES local
-# methods, which are defined in the implementation section but not in
-# the interface are included in the documentation.
-# If set to NO (the default) only methods in the interface are included.
-
-EXTRACT_LOCAL_METHODS  = NO
-
-# If this flag is set to YES, the members of anonymous namespaces will be
-# extracted and appear in the documentation as a namespace called
-# 'anonymous_namespace{file}', where file will be replaced with the base
-# name of the file that contains the anonymous namespace. By default
-# anonymous namespace are hidden.
-
-EXTRACT_ANON_NSPACES   = NO
-
-# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all
-# undocumented members of documented classes, files or namespaces.
-# If set to NO (the default) these members will be included in the
-# various overviews, but no documentation section is generated.
-# This option has no effect if EXTRACT_ALL is enabled.
-
-HIDE_UNDOC_MEMBERS     = NO
-
-# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all
-# undocumented classes that are normally visible in the class hierarchy.
-# If set to NO (the default) these classes will be included in the various
-# overviews. This option has no effect if EXTRACT_ALL is enabled.
-
-HIDE_UNDOC_CLASSES     = NO
-
-# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all
-# friend (class|struct|union) declarations.
-# If set to NO (the default) these declarations will be included in the
-# documentation.
-
-HIDE_FRIEND_COMPOUNDS  = NO
-
-# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any
-# documentation blocks found inside the body of a function.
-# If set to NO (the default) these blocks will be appended to the
-# function's detailed documentation block.
-
-HIDE_IN_BODY_DOCS      = NO
-
-# The INTERNAL_DOCS tag determines if documentation
-# that is typed after a \internal command is included. If the tag is set
-# to NO (the default) then the documentation will be excluded.
-# Set it to YES to include the internal documentation.
-
-INTERNAL_DOCS          = NO
-
-# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate
-# file names in lower-case letters. If set to YES upper-case letters are also
-# allowed. This is useful if you have classes or files whose names only differ
-# in case and if your file system supports case sensitive file names. Windows
-# and Mac users are advised to set this option to NO.
-
-CASE_SENSE_NAMES       = YES
-
-# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen
-# will show members with their full class and namespace scopes in the
-# documentation. If set to YES the scope will be hidden.
-
-HIDE_SCOPE_NAMES       = NO
-
-# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen
-# will put a list of the files that are included by a file in the documentation
-# of that file.
-
-SHOW_INCLUDE_FILES     = YES
-
-# If the INLINE_INFO tag is set to YES (the default) then a tag [inline]
-# is inserted in the documentation for inline members.
-
-INLINE_INFO            = YES
-
-# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen
-# will sort the (detailed) documentation of file and class members
-# alphabetically by member name. If set to NO the members will appear in
-# declaration order.
-
-SORT_MEMBER_DOCS       = YES
-
-# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the
-# brief documentation of file, namespace and class members alphabetically
-# by member name. If set to NO (the default) the members will appear in
-# declaration order.
-
-SORT_BRIEF_DOCS        = NO
-
-# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the
-# hierarchy of group names into alphabetical order. If set to NO (the default)
-# the group names will appear in their defined order.
-
-SORT_GROUP_NAMES       = NO
-
-# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be
-# sorted by fully-qualified names, including namespaces. If set to
-# NO (the default), the class list will be sorted only by class name,
-# not including the namespace part.
-# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
-# Note: This option applies only to the class list, not to the
-# alphabetical list.
-
-SORT_BY_SCOPE_NAME     = NO
-
-# The GENERATE_TODOLIST tag can be used to enable (YES) or
-# disable (NO) the todo list. This list is created by putting \todo
-# commands in the documentation.
-
-GENERATE_TODOLIST      = YES
-
-# The GENERATE_TESTLIST tag can be used to enable (YES) or
-# disable (NO) the test list. This list is created by putting \test
-# commands in the documentation.
-
-GENERATE_TESTLIST      = YES
-
-# The GENERATE_BUGLIST tag can be used to enable (YES) or
-# disable (NO) the bug list. This list is created by putting \bug
-# commands in the documentation.
-
-GENERATE_BUGLIST       = YES
-
-# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or
-# disable (NO) the deprecated list. This list is created by putting
-# \deprecated commands in the documentation.
-
-GENERATE_DEPRECATEDLIST= YES
-
-# The ENABLED_SECTIONS tag can be used to enable conditional
-# documentation sections, marked by \if sectionname ... \endif.
-
-ENABLED_SECTIONS       =
-
-# The MAX_INITIALIZER_LINES tag determines the maximum number of lines
-# the initial value of a variable or define consists of for it to appear in
-# the documentation. If the initializer consists of more lines than specified
-# here it will be hidden. Use a value of 0 to hide initializers completely.
-# The appearance of the initializer of individual variables and defines in the
-# documentation can be controlled using \showinitializer or \hideinitializer
-# command in the documentation regardless of this setting.
-
-MAX_INITIALIZER_LINES  = 30
-
-# Set the SHOW_USED_FILES tag to NO to disable the list of files generated
-# at the bottom of the documentation of classes and structs. If set to YES the
-# list will mention the files that were used to generate the documentation.
-
-SHOW_USED_FILES        = YES
-
-# If the sources in your project are distributed over multiple directories
-# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy
-# in the documentation. The default is NO.
-
-SHOW_DIRECTORIES       = NO
-
-# Set the SHOW_FILES tag to NO to disable the generation of the Files page.
-# This will remove the Files entry from the Quick Index and from the
-# Folder Tree View (if specified). The default is YES.
-
-SHOW_FILES             = YES
-
-# Set the SHOW_NAMESPACES tag to NO to disable the generation of the
-# Namespaces page.  This will remove the Namespaces entry from the Quick Index
-# and from the Folder Tree View (if specified). The default is YES.
-
-SHOW_NAMESPACES        = YES
-
-# The FILE_VERSION_FILTER tag can be used to specify a program or script that
-# doxygen should invoke to get the current version for each file (typically from
-# the version control system). Doxygen will invoke the program by executing (via
-# popen()) the command <command> <input-file>, where <command> is the value of
-# the FILE_VERSION_FILTER tag, and <input-file> is the name of an input file
-# provided by doxygen. Whatever the program writes to standard output
-# is used as the file version. See the manual for examples.
-
-FILE_VERSION_FILTER    =
-
-#---------------------------------------------------------------------------
-# configuration options related to warning and progress messages
-#---------------------------------------------------------------------------
-
-# The QUIET tag can be used to turn on/off the messages that are generated
-# by doxygen. Possible values are YES and NO. If left blank NO is used.
-
-QUIET                  = YES
-
-# The WARNINGS tag can be used to turn on/off the warning messages that are
-# generated by doxygen. Possible values are YES and NO. If left blank
-# NO is used.
-
-WARNINGS               = YES
-
-# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings
-# for undocumented members. If EXTRACT_ALL is set to YES then this flag will
-# automatically be disabled.
-
-WARN_IF_UNDOCUMENTED   = YES
-
-# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for
-# potential errors in the documentation, such as not documenting some
-# parameters in a documented function, or documenting parameters that
-# don't exist or using markup commands wrongly.
-
-WARN_IF_DOC_ERROR      = YES
-
-# This WARN_NO_PARAMDOC option can be abled to get warnings for
-# functions that are documented, but have no documentation for their parameters
-# or return value. If set to NO (the default) doxygen will only warn about
-# wrong or incomplete parameter documentation, but not about the absence of
-# documentation.
-
-WARN_NO_PARAMDOC       = NO
-
-# The WARN_FORMAT tag determines the format of the warning messages that
-# doxygen can produce. The string should contain the $file, $line, and $text
-# tags, which will be replaced by the file and line number from which the
-# warning originated and the warning text. Optionally the format may contain
-# $version, which will be replaced by the version of the file (if it could
-# be obtained via FILE_VERSION_FILTER)
-
-WARN_FORMAT            = "$file:$line: $text"
-
-# The WARN_LOGFILE tag can be used to specify a file to which warning
-# and error messages should be written. If left blank the output is written
-# to stderr.
-
-WARN_LOGFILE           =
-
-#---------------------------------------------------------------------------
-# configuration options related to the input files
-#---------------------------------------------------------------------------
-
-# The INPUT tag can be used to specify the files and/or directories that contain
-# documented source files. You may enter file names like "myfile.cpp" or
-# directories like "/usr/src/myproject". Separate the files or directories
-# with spaces.
-
-INPUT                  = . include/univ.i
-
-# This tag can be used to specify the character encoding of the source files
-# that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is
-# also the default input encoding. Doxygen uses libiconv (or the iconv built
-# into libc) for the transcoding. See http://www.gnu.org/software/libiconv for
-# the list of possible encodings.
-
-INPUT_ENCODING         = UTF-8
-
-# If the value of the INPUT tag contains directories, you can use the
-# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
-# and *.h) to filter out the source-files in the directories. If left
-# blank the following patterns are tested:
-# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx
-# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90
-
-FILE_PATTERNS          = *.c *.ic *.h
-
-# The RECURSIVE tag can be used to turn specify whether or not subdirectories
-# should be searched for input files as well. Possible values are YES and NO.
-# If left blank NO is used.
-
-RECURSIVE              = YES
-
-# The EXCLUDE tag can be used to specify files and/or directories that should
-# excluded from the INPUT source files. This way you can easily exclude a
-# subdirectory from a directory tree whose root is specified with the INPUT tag.
-
-EXCLUDE                =
-
-# The EXCLUDE_SYMLINKS tag can be used select whether or not files or
-# directories that are symbolic links (a Unix filesystem feature) are excluded
-# from the input.
-
-EXCLUDE_SYMLINKS       = NO
-
-# If the value of the INPUT tag contains directories, you can use the
-# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
-# certain files from those directories. Note that the wildcards are matched
-# against the file with absolute path, so to exclude all test directories
-# for example use the pattern */test/*
-
-EXCLUDE_PATTERNS       =
-
-# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
-# (namespaces, classes, functions, etc.) that should be excluded from the
-# output. The symbol name can be a fully qualified name, a word, or if the
-# wildcard * is used, a substring. Examples: ANamespace, AClass,
-# AClass::ANamespace, ANamespace::*Test
-
-EXCLUDE_SYMBOLS        =
-
-# The EXAMPLE_PATH tag can be used to specify one or more files or
-# directories that contain example code fragments that are included (see
-# the \include command).
-
-EXAMPLE_PATH           =
-
-# If the value of the EXAMPLE_PATH tag contains directories, you can use the
-# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
-# and *.h) to filter out the source-files in the directories. If left
-# blank all files are included.
-
-EXAMPLE_PATTERNS       =
-
-# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be
-# searched for input files to be used with the \include or \dontinclude
-# commands irrespective of the value of the RECURSIVE tag.
-# Possible values are YES and NO. If left blank NO is used.
-
-EXAMPLE_RECURSIVE      = NO
-
-# The IMAGE_PATH tag can be used to specify one or more files or
-# directories that contain image that are included in the documentation (see
-# the \image command).
-
-IMAGE_PATH             =
-
-# The INPUT_FILTER tag can be used to specify a program that doxygen should
-# invoke to filter for each input file. Doxygen will invoke the filter program
-# by executing (via popen()) the command <filter> <input-file>, where <filter>
-# is the value of the INPUT_FILTER tag, and <input-file> is the name of an
-# input file. Doxygen will then use the output that the filter program writes
-# to standard output.  If FILTER_PATTERNS is specified, this tag will be
-# ignored.
-
-INPUT_FILTER           =
-
-# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
-# basis.  Doxygen will compare the file name with each pattern and apply the
-# filter if there is a match.  The filters are a list of the form:
-# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further
-# info on how filters are used. If FILTER_PATTERNS is empty, INPUT_FILTER
-# is applied to all files.
-
-FILTER_PATTERNS        =
-
-# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
-# INPUT_FILTER) will be used to filter the input files when producing source
-# files to browse (i.e. when SOURCE_BROWSER is set to YES).
-
-FILTER_SOURCE_FILES    = NO
-
-#---------------------------------------------------------------------------
-# configuration options related to source browsing
-#---------------------------------------------------------------------------
-
-# If the SOURCE_BROWSER tag is set to YES then a list of source files will
-# be generated. Documented entities will be cross-referenced with these sources.
-# Note: To get rid of all source code in the generated output, make sure also
-# VERBATIM_HEADERS is set to NO.
-
-SOURCE_BROWSER         = NO
-
-# Setting the INLINE_SOURCES tag to YES will include the body
-# of functions and classes directly in the documentation.
-
-INLINE_SOURCES         = NO
-
-# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct
-# doxygen to hide any special comment blocks from generated source code
-# fragments. Normal C and C++ comments will always remain visible.
-
-STRIP_CODE_COMMENTS    = YES
-
-# If the REFERENCED_BY_RELATION tag is set to YES
-# then for each documented function all documented
-# functions referencing it will be listed.
-
-REFERENCED_BY_RELATION = NO
-
-# If the REFERENCES_RELATION tag is set to YES
-# then for each documented function all documented entities
-# called/used by that function will be listed.
-
-REFERENCES_RELATION    = NO
-
-# If the REFERENCES_LINK_SOURCE tag is set to YES (the default)
-# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from
-# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will
-# link to the source code.  Otherwise they will link to the documentstion.
-
-REFERENCES_LINK_SOURCE = YES
-
-# If the USE_HTAGS tag is set to YES then the references to source code
-# will point to the HTML generated by the htags(1) tool instead of doxygen
-# built-in source browser. The htags tool is part of GNU's global source
-# tagging system (see http://www.gnu.org/software/global/global.html). You
-# will need version 4.8.6 or higher.
-
-USE_HTAGS              = NO
-
-# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen
-# will generate a verbatim copy of the header file for each class for
-# which an include is specified. Set to NO to disable this.
-
-VERBATIM_HEADERS       = YES
-
-#---------------------------------------------------------------------------
-# configuration options related to the alphabetical class index
-#---------------------------------------------------------------------------
-
-# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index
-# of all compounds will be generated. Enable this if the project
-# contains a lot of classes, structs, unions or interfaces.
-
-ALPHABETICAL_INDEX     = NO
-
-# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then
-# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns
-# in which this list will be split (can be a number in the range [1..20])
-
-COLS_IN_ALPHA_INDEX    = 5
-
-# In case all classes in a project start with a common prefix, all
-# classes will be put under the same header in the alphabetical index.
-# The IGNORE_PREFIX tag can be used to specify one or more prefixes that
-# should be ignored while generating the index headers.
-
-IGNORE_PREFIX          =
-
-#---------------------------------------------------------------------------
-# configuration options related to the HTML output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_HTML tag is set to YES (the default) Doxygen will
-# generate HTML output.
-
-GENERATE_HTML          = YES
-
-# The HTML_OUTPUT tag is used to specify where the HTML docs will be put.
-# If a relative path is entered the value of OUTPUT_DIRECTORY will be
-# put in front of it. If left blank `html' will be used as the default path.
-
-HTML_OUTPUT            = html
-
-# The HTML_FILE_EXTENSION tag can be used to specify the file extension for
-# each generated HTML page (for example: .htm,.php,.asp). If it is left blank
-# doxygen will generate files with .html extension.
-
-HTML_FILE_EXTENSION    = .html
-
-# The HTML_HEADER tag can be used to specify a personal HTML header for
-# each generated HTML page. If it is left blank doxygen will generate a
-# standard header.
-
-HTML_HEADER            =
-
-# The HTML_FOOTER tag can be used to specify a personal HTML footer for
-# each generated HTML page. If it is left blank doxygen will generate a
-# standard footer.
-
-HTML_FOOTER            =
-
-# The HTML_STYLESHEET tag can be used to specify a user-defined cascading
-# style sheet that is used by each HTML page. It can be used to
-# fine-tune the look of the HTML output. If the tag is left blank doxygen
-# will generate a default style sheet. Note that doxygen will try to copy
-# the style sheet file to the HTML output directory, so don't put your own
-# stylesheet in the HTML output directory as well, or it will be erased!
-
-HTML_STYLESHEET        =
-
-# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes,
-# files or namespaces will be aligned in HTML using tables. If set to
-# NO a bullet list will be used.
-
-HTML_ALIGN_MEMBERS     = YES
-
-# If the GENERATE_HTMLHELP tag is set to YES, additional index files
-# will be generated that can be used as input for tools like the
-# Microsoft HTML help workshop to generate a compiled HTML help file (.chm)
-# of the generated HTML documentation.
-
-GENERATE_HTMLHELP      = NO
-
-# If the GENERATE_DOCSET tag is set to YES, additional index files
-# will be generated that can be used as input for Apple's Xcode 3
-# integrated development environment, introduced with OSX 10.5 (Leopard).
-# To create a documentation set, doxygen will generate a Makefile in the
-# HTML output directory. Running make will produce the docset in that
-# directory and running "make install" will install the docset in
-# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find
-# it at startup.
-
-GENERATE_DOCSET        = NO
-
-# When GENERATE_DOCSET tag is set to YES, this tag determines the name of the
-# feed. A documentation feed provides an umbrella under which multiple
-# documentation sets from a single provider (such as a company or product suite)
-# can be grouped.
-
-DOCSET_FEEDNAME        = "Doxygen generated docs"
-
-# When GENERATE_DOCSET tag is set to YES, this tag specifies a string that
-# should uniquely identify the documentation set bundle. This should be a
-# reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen
-# will append .docset to the name.
-
-DOCSET_BUNDLE_ID       = org.doxygen.Project
-
-# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
-# documentation will contain sections that can be hidden and shown after the
-# page has loaded. For this to work a browser that supports
-# JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox
-# Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari).
-
-HTML_DYNAMIC_SECTIONS  = NO
-
-# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can
-# be used to specify the file name of the resulting .chm file. You
-# can add a path in front of the file if the result should not be
-# written to the html output directory.
-
-CHM_FILE               =
-
-# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can
-# be used to specify the location (absolute path including file name) of
-# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run
-# the HTML help compiler on the generated index.hhp.
-
-HHC_LOCATION           =
-
-# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag
-# controls if a separate .chi index file is generated (YES) or that
-# it should be included in the master .chm file (NO).
-
-GENERATE_CHI           = NO
-
-# If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING
-# is used to encode HtmlHelp index (hhk), content (hhc) and project file
-# content.
-
-CHM_INDEX_ENCODING     =
-
-# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag
-# controls whether a binary table of contents is generated (YES) or a
-# normal table of contents (NO) in the .chm file.
-
-BINARY_TOC             = NO
-
-# The TOC_EXPAND flag can be set to YES to add extra items for group members
-# to the contents of the HTML help documentation and to the tree view.
-
-TOC_EXPAND             = NO
-
-# The DISABLE_INDEX tag can be used to turn on/off the condensed index at
-# top of each HTML page. The value NO (the default) enables the index and
-# the value YES disables it.
-
-DISABLE_INDEX          = NO
-
-# This tag can be used to set the number of enum values (range [1..20])
-# that doxygen will group on one line in the generated HTML documentation.
-
-ENUM_VALUES_PER_LINE   = 4
-
-# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index
-# structure should be generated to display hierarchical information.
-# If the tag value is set to FRAME, a side panel will be generated
-# containing a tree-like index structure (just like the one that
-# is generated for HTML Help). For this to work a browser that supports
-# JavaScript, DHTML, CSS and frames is required (for instance Mozilla 1.0+,
-# Netscape 6.0+, Internet explorer 5.0+, or Konqueror). Windows users are
-# probably better off using the HTML help feature. Other possible values
-# for this tag are: HIERARCHIES, which will generate the Groups, Directories,
-# and Class Hiererachy pages using a tree view instead of an ordered list;
-# ALL, which combines the behavior of FRAME and HIERARCHIES; and NONE, which
-# disables this behavior completely. For backwards compatibility with previous
-# releases of Doxygen, the values YES and NO are equivalent to FRAME and NONE
-# respectively.
-
-GENERATE_TREEVIEW      = NONE
-
-# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be
-# used to set the initial width (in pixels) of the frame in which the tree
-# is shown.
-
-TREEVIEW_WIDTH         = 250
-
-# Use this tag to change the font size of Latex formulas included
-# as images in the HTML documentation. The default is 10. Note that
-# when you change the font size after a successful doxygen run you need
-# to manually remove any form_*.png images from the HTML output directory
-# to force them to be regenerated.
-
-FORMULA_FONTSIZE       = 10
-
-#---------------------------------------------------------------------------
-# configuration options related to the LaTeX output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will
-# generate Latex output.
-
-GENERATE_LATEX         = NO
-
-# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put.
-# If a relative path is entered the value of OUTPUT_DIRECTORY will be
-# put in front of it. If left blank `latex' will be used as the default path.
-
-LATEX_OUTPUT           = latex
-
-# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
-# invoked. If left blank `latex' will be used as the default command name.
-
-LATEX_CMD_NAME         = latex
-
-# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to
-# generate index for LaTeX. If left blank `makeindex' will be used as the
-# default command name.
-
-MAKEINDEX_CMD_NAME     = makeindex
-
-# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact
-# LaTeX documents. This may be useful for small projects and may help to
-# save some trees in general.
-
-COMPACT_LATEX          = NO
-
-# The PAPER_TYPE tag can be used to set the paper type that is used
-# by the printer. Possible values are: a4, a4wide, letter, legal and
-# executive. If left blank a4wide will be used.
-
-PAPER_TYPE             = a4wide
-
-# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX
-# packages that should be included in the LaTeX output.
-
-EXTRA_PACKAGES         =
-
-# The LATEX_HEADER tag can be used to specify a personal LaTeX header for
-# the generated latex document. The header should contain everything until
-# the first chapter. If it is left blank doxygen will generate a
-# standard header. Notice: only use this tag if you know what you are doing!
-
-LATEX_HEADER           =
-
-# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated
-# is prepared for conversion to pdf (using ps2pdf). The pdf file will
-# contain links (just like the HTML output) instead of page references
-# This makes the output suitable for online browsing using a pdf viewer.
-
-PDF_HYPERLINKS         = YES
-
-# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of
-# plain latex in the generated Makefile. Set this option to YES to get a
-# higher quality PDF documentation.
-
-USE_PDFLATEX           = YES
-
-# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode.
-# command to the generated LaTeX files. This will instruct LaTeX to keep
-# running if errors occur, instead of asking the user for help.
-# This option is also used when generating formulas in HTML.
-
-LATEX_BATCHMODE        = NO
-
-# If LATEX_HIDE_INDICES is set to YES then doxygen will not
-# include the index chapters (such as File Index, Compound Index, etc.)
-# in the output.
-
-LATEX_HIDE_INDICES     = NO
-
-#---------------------------------------------------------------------------
-# configuration options related to the RTF output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output
-# The RTF output is optimized for Word 97 and may not look very pretty with
-# other RTF readers or editors.
-
-GENERATE_RTF           = NO
-
-# The RTF_OUTPUT tag is used to specify where the RTF docs will be put.
-# If a relative path is entered the value of OUTPUT_DIRECTORY will be
-# put in front of it. If left blank `rtf' will be used as the default path.
-
-RTF_OUTPUT             = rtf
-
-# If the COMPACT_RTF tag is set to YES Doxygen generates more compact
-# RTF documents. This may be useful for small projects and may help to
-# save some trees in general.
-
-COMPACT_RTF            = NO
-
-# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated
-# will contain hyperlink fields. The RTF file will
-# contain links (just like the HTML output) instead of page references.
-# This makes the output suitable for online browsing using WORD or other
-# programs which support those fields.
-# Note: wordpad (write) and others do not support links.
-
-RTF_HYPERLINKS         = NO
-
-# Load stylesheet definitions from file. Syntax is similar to doxygen's
-# config file, i.e. a series of assignments. You only have to provide
-# replacements, missing definitions are set to their default value.
-
-RTF_STYLESHEET_FILE    =
-
-# Set optional variables used in the generation of an rtf document.
-# Syntax is similar to doxygen's config file.
-
-RTF_EXTENSIONS_FILE    =
-
-#---------------------------------------------------------------------------
-# configuration options related to the man page output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_MAN tag is set to YES (the default) Doxygen will
-# generate man pages
-
-GENERATE_MAN           = NO
-
-# The MAN_OUTPUT tag is used to specify where the man pages will be put.
-# If a relative path is entered the value of OUTPUT_DIRECTORY will be
-# put in front of it. If left blank `man' will be used as the default path.
-
-MAN_OUTPUT             = man
-
-# The MAN_EXTENSION tag determines the extension that is added to
-# the generated man pages (default is the subroutine's section .3)
-
-MAN_EXTENSION          = .3
-
-# If the MAN_LINKS tag is set to YES and Doxygen generates man output,
-# then it will generate one additional man file for each entity
-# documented in the real man page(s). These additional files
-# only source the real man page, but without them the man command
-# would be unable to find the correct page. The default is NO.
-
-MAN_LINKS              = NO
-
-#---------------------------------------------------------------------------
-# configuration options related to the XML output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_XML tag is set to YES Doxygen will
-# generate an XML file that captures the structure of
-# the code including all documentation.
-
-GENERATE_XML           = NO
-
-# The XML_OUTPUT tag is used to specify where the XML pages will be put.
-# If a relative path is entered the value of OUTPUT_DIRECTORY will be
-# put in front of it. If left blank `xml' will be used as the default path.
-
-XML_OUTPUT             = xml
-
-# The XML_SCHEMA tag can be used to specify an XML schema,
-# which can be used by a validating XML parser to check the
-# syntax of the XML files.
-
-XML_SCHEMA             =
-
-# The XML_DTD tag can be used to specify an XML DTD,
-# which can be used by a validating XML parser to check the
-# syntax of the XML files.
-
-XML_DTD                =
-
-# If the XML_PROGRAMLISTING tag is set to YES Doxygen will
-# dump the program listings (including syntax highlighting
-# and cross-referencing information) to the XML output. Note that
-# enabling this will significantly increase the size of the XML output.
-
-XML_PROGRAMLISTING     = YES
-
-#---------------------------------------------------------------------------
-# configuration options for the AutoGen Definitions output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will
-# generate an AutoGen Definitions (see autogen.sf.net) file
-# that captures the structure of the code including all
-# documentation. Note that this feature is still experimental
-# and incomplete at the moment.
-
-GENERATE_AUTOGEN_DEF   = NO
-
-#---------------------------------------------------------------------------
-# configuration options related to the Perl module output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_PERLMOD tag is set to YES Doxygen will
-# generate a Perl module file that captures the structure of
-# the code including all documentation. Note that this
-# feature is still experimental and incomplete at the
-# moment.
-
-GENERATE_PERLMOD       = NO
-
-# If the PERLMOD_LATEX tag is set to YES Doxygen will generate
-# the necessary Makefile rules, Perl scripts and LaTeX code to be able
-# to generate PDF and DVI output from the Perl module output.
-
-PERLMOD_LATEX          = NO
-
-# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be
-# nicely formatted so it can be parsed by a human reader.  This is useful
-# if you want to understand what is going on.  On the other hand, if this
-# tag is set to NO the size of the Perl module output will be much smaller
-# and Perl will parse it just the same.
-
-PERLMOD_PRETTY         = YES
-
-# The names of the make variables in the generated doxyrules.make file
-# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX.
-# This is useful so different doxyrules.make files included by the same
-# Makefile don't overwrite each other's variables.
-
-PERLMOD_MAKEVAR_PREFIX =
-
-#---------------------------------------------------------------------------
-# Configuration options related to the preprocessor
-#---------------------------------------------------------------------------
-
-# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will
-# evaluate all C-preprocessor directives found in the sources and include
-# files.
-
-ENABLE_PREPROCESSING   = YES
-
-# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro
-# names in the source code. If set to NO (the default) only conditional
-# compilation will be performed. Macro expansion can be done in a controlled
-# way by setting EXPAND_ONLY_PREDEF to YES.
-
-MACRO_EXPANSION        = YES
-
-# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES
-# then the macro expansion is limited to the macros specified with the
-# PREDEFINED and EXPAND_AS_DEFINED tags.
-
-EXPAND_ONLY_PREDEF     = YES
-
-# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files
-# in the INCLUDE_PATH (see below) will be search if a #include is found.
-
-SEARCH_INCLUDES        = YES
-
-# The INCLUDE_PATH tag can be used to specify one or more directories that
-# contain include files that are not input files but should be processed by
-# the preprocessor.
-
-INCLUDE_PATH           =
-
-# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
-# patterns (like *.h and *.hpp) to filter out the header-files in the
-# directories. If left blank, the patterns specified with FILE_PATTERNS will
-# be used.
-
-INCLUDE_FILE_PATTERNS  =
-
-# The PREDEFINED tag can be used to specify one or more macro names that
-# are defined before the preprocessor is started (similar to the -D option of
-# gcc). The argument of the tag is a list of macros of the form: name
-# or name=definition (no spaces). If the definition and the = are
-# omitted =1 is assumed. To prevent a macro definition from being
-# undefined via #undef or recursively expanded use the := operator
-# instead of the = operator.
-
-PREDEFINED             = DOXYGEN UNIV_DEBUG UNIV_SYNC_DEBUG __attribute__()=
-
-# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
-# this tag can be used to specify a list of macro names that should be expanded.
-# The macro definition that is found in the sources will be used.
-# Use the PREDEFINED tag if you want to use a different macro definition.
-
-EXPAND_AS_DEFINED      = UT_LIST_BASE_NODE_T UT_LIST_NODE_T
-
-# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then
-# doxygen's preprocessor will remove all function-like macros that are alone
-# on a line, have an all uppercase name, and do not end with a semicolon. Such
-# function macros are typically used for boiler-plate code, and will confuse
-# the parser if not removed.
-
-SKIP_FUNCTION_MACROS   = YES
-
-#---------------------------------------------------------------------------
-# Configuration::additions related to external references
-#---------------------------------------------------------------------------
-
-# The TAGFILES option can be used to specify one or more tagfiles.
-# Optionally an initial location of the external documentation
-# can be added for each tagfile. The format of a tag file without
-# this location is as follows:
-#   TAGFILES = file1 file2 ...
-# Adding location for the tag files is done as follows:
-#   TAGFILES = file1=loc1 "file2 = loc2" ...
-# where "loc1" and "loc2" can be relative or absolute paths or
-# URLs. If a location is present for each tag, the installdox tool
-# does not have to be run to correct the links.
-# Note that each tag file must have a unique name
-# (where the name does NOT include the path)
-# If a tag file is not located in the directory in which doxygen
-# is run, you must also specify the path to the tagfile here.
-
-TAGFILES               =
-
-# When a file name is specified after GENERATE_TAGFILE, doxygen will create
-# a tag file that is based on the input files it reads.
-
-GENERATE_TAGFILE       =
-
-# If the ALLEXTERNALS tag is set to YES all external classes will be listed
-# in the class index. If set to NO only the inherited external classes
-# will be listed.
-
-ALLEXTERNALS           = NO
-
-# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed
-# in the modules index. If set to NO, only the current project's groups will
-# be listed.
-
-EXTERNAL_GROUPS        = NO
-
-# The PERL_PATH should be the absolute path and name of the perl script
-# interpreter (i.e. the result of `which perl').
-
-PERL_PATH              = /usr/bin/perl
-
-#---------------------------------------------------------------------------
-# Configuration options related to the dot tool
-#---------------------------------------------------------------------------
-
-# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will
-# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base
-# or super classes. Setting the tag to NO turns the diagrams off. Note that
-# this option is superseded by the HAVE_DOT option below. This is only a
-# fallback. It is recommended to install and use dot, since it yields more
-# powerful graphs.
-
-CLASS_DIAGRAMS         = YES
-
-# You can define message sequence charts within doxygen comments using the \msc
-# command. Doxygen will then run the mscgen tool (see
-# http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the
-# documentation. The MSCGEN_PATH tag allows you to specify the directory where
-# the mscgen tool resides. If left empty the tool is assumed to be found in the
-# default search path.
-
-MSCGEN_PATH            =
-
-# If set to YES, the inheritance and collaboration graphs will hide
-# inheritance and usage relations if the target is undocumented
-# or is not a class.
-
-HIDE_UNDOC_RELATIONS   = YES
-
-# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
-# available from the path. This tool is part of Graphviz, a graph visualization
-# toolkit from AT&T and Lucent Bell Labs. The other options in this section
-# have no effect if this option is set to NO (the default)
-
-HAVE_DOT               = YES
-
-# By default doxygen will write a font called FreeSans.ttf to the output
-# directory and reference it in all dot files that doxygen generates. This
-# font does not include all possible unicode characters however, so when you need
-# these (or just want a differently looking font) you can specify the font name
-# using DOT_FONTNAME. You need need to make sure dot is able to find the font,
-# which can be done by putting it in a standard location or by setting the
-# DOTFONTPATH environment variable or by setting DOT_FONTPATH to the directory
-# containing the font.
-
-DOT_FONTNAME           = FreeSans
-
-# By default doxygen will tell dot to use the output directory to look for the
-# FreeSans.ttf font (which doxygen will put there itself). If you specify a
-# different font using DOT_FONTNAME you can set the path where dot
-# can find it using this tag.
-
-DOT_FONTPATH           =
-
-# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen
-# will generate a graph for each documented class showing the direct and
-# indirect inheritance relations. Setting this tag to YES will force the
-# the CLASS_DIAGRAMS tag to NO.
-
-CLASS_GRAPH            = YES
-
-# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen
-# will generate a graph for each documented class showing the direct and
-# indirect implementation dependencies (inheritance, containment, and
-# class references variables) of the class with other documented classes.
-
-COLLABORATION_GRAPH    = YES
-
-# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen
-# will generate a graph for groups, showing the direct groups dependencies
-
-GROUP_GRAPHS           = NO
-
-# If the UML_LOOK tag is set to YES doxygen will generate inheritance and
-# collaboration diagrams in a style similar to the OMG's Unified Modeling
-# Language.
-
-UML_LOOK               = NO
-
-# If set to YES, the inheritance and collaboration graphs will show the
-# relations between templates and their instances.
-
-TEMPLATE_RELATIONS     = NO
-
-# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT
-# tags are set to YES then doxygen will generate a graph for each documented
-# file showing the direct and indirect include dependencies of the file with
-# other documented files.
-
-INCLUDE_GRAPH          = YES
-
-# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and
-# HAVE_DOT tags are set to YES then doxygen will generate a graph for each
-# documented header file showing the documented files that directly or
-# indirectly include this file.
-
-INCLUDED_BY_GRAPH      = YES
-
-# If the CALL_GRAPH and HAVE_DOT options are set to YES then
-# doxygen will generate a call dependency graph for every global function
-# or class method. Note that enabling this option will significantly increase
-# the time of a run. So in most cases it will be better to enable call graphs
-# for selected functions only using the \callgraph command.
-
-CALL_GRAPH             = NO
-
-# If the CALLER_GRAPH and HAVE_DOT tags are set to YES then
-# doxygen will generate a caller dependency graph for every global function
-# or class method. Note that enabling this option will significantly increase
-# the time of a run. So in most cases it will be better to enable caller
-# graphs for selected functions only using the \callergraph command.
-
-CALLER_GRAPH           = NO
-
-# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen
-# will graphical hierarchy of all classes instead of a textual one.
-
-GRAPHICAL_HIERARCHY    = YES
-
-# If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES
-# then doxygen will show the dependencies a directory has on other directories
-# in a graphical way. The dependency relations are determined by the #include
-# relations between the files in the directories.
-
-DIRECTORY_GRAPH        = YES
-
-# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
-# generated by dot. Possible values are png, jpg, or gif
-# If left blank png will be used.
-
-DOT_IMAGE_FORMAT       = png
-
-# The tag DOT_PATH can be used to specify the path where the dot tool can be
-# found. If left blank, it is assumed the dot tool can be found in the path.
-
-DOT_PATH               =
-
-# The DOTFILE_DIRS tag can be used to specify one or more directories that
-# contain dot files that are included in the documentation (see the
-# \dotfile command).
-
-DOTFILE_DIRS           =
-
-# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of
-# nodes that will be shown in the graph. If the number of nodes in a graph
-# becomes larger than this value, doxygen will truncate the graph, which is
-# visualized by representing a node as a red box. Note that doxygen if the
-# number of direct children of the root node in a graph is already larger than
-# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note
-# that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH.
-
-DOT_GRAPH_MAX_NODES    = 50
-
-# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the
-# graphs generated by dot. A depth value of 3 means that only nodes reachable
-# from the root by following a path via at most 3 edges will be shown. Nodes
-# that lay further from the root node will be omitted. Note that setting this
-# option to 1 or 2 may greatly reduce the computation time needed for large
-# code bases. Also note that the size of a graph can be further restricted by
-# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction.
-
-MAX_DOT_GRAPH_DEPTH    = 3
-
-# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent
-# background. This is enabled by default, which results in a transparent
-# background. Warning: Depending on the platform used, enabling this option
-# may lead to badly anti-aliased labels on the edges of a graph (i.e. they
-# become hard to read).
-
-DOT_TRANSPARENT        = YES
-
-# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output
-# files in one run (i.e. multiple -o and -T options on the command line). This
-# makes dot run faster, but since only newer versions of dot (>1.8.10)
-# support this, this feature is disabled by default.
-
-DOT_MULTI_TARGETS      = NO
-
-# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will
-# generate a legend page explaining the meaning of the various boxes and
-# arrows in the dot generated graphs.
-
-GENERATE_LEGEND        = YES
-
-# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will
-# remove the intermediate dot files that are used to generate
-# the various graphs.
-
-DOT_CLEANUP            = YES
-
-#---------------------------------------------------------------------------
-# Configuration::additions related to the search engine
-#---------------------------------------------------------------------------
-
-# The SEARCHENGINE tag specifies whether or not a search engine should be
-# used. If set to NO the values of all tags below this one will be ignored.
-
-SEARCHENGINE           = NO
diff --git a/storage/innobase/api/api0api.cc b/storage/innobase/api/api0api.cc
index 739ea9f7572..1b99dcf1564 100644
--- a/storage/innobase/api/api0api.cc
+++ b/storage/innobase/api/api0api.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2008, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2008, 2016, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -24,15 +24,7 @@ InnoDB Native API
 3/20/2011 Jimmy Yang extracted from Embedded InnoDB
 *******************************************************/
 
-#include "univ.i"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdarg.h>
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>
-#endif
+#include "ha_prototypes.h"
 
 #include "api0api.h"
 #include "api0misc.h"
@@ -50,11 +42,9 @@ InnoDB Native API
 #include "row0sel.h"
 #include "lock0lock.h"
 #include "rem0cmp.h"
-#include "ut0dbg.h"
 #include "dict0priv.h"
-#include "ut0ut.h"
-#include "ha_prototypes.h"
 #include "trx0roll.h"
+#include "row0trunc.h"
 
 /** configure variable for binlog option with InnoDB APIs */
 my_bool ib_binlog_enabled = FALSE;
@@ -212,15 +202,15 @@ operation, we only do it every INNOBASE_WAKE_INTERVAL'th step. */
 #define INNOBASE_WAKE_INTERVAL	32
 
 /*****************************************************************//**
-Check whether the Innodb persistent cursor is positioned.
-@return	IB_TRUE if positioned */
+Check whether the InnoDB persistent cursor is positioned.
+@return IB_TRUE if positioned */
 UNIV_INLINE
 ib_bool_t
 ib_btr_cursor_is_positioned(
 /*========================*/
 	btr_pcur_t*	pcur)		/*!< in: InnoDB persistent cursor */
 {
-	return(pcur->old_stored == BTR_PCUR_OLD_STORED
+	return(pcur->old_stored
 	       && (pcur->pos_state == BTR_PCUR_IS_POSITIONED
 	           || pcur->pos_state == BTR_PCUR_WAS_POSITIONED));
 }
@@ -228,7 +218,7 @@ ib_btr_cursor_is_positioned(
 
 /********************************************************************//**
 Open a table using the table id, if found then increment table ref count.
-@return	table instance if found */
+@return table instance if found */
 static
 dict_table_t*
 ib_open_table_by_id(
@@ -260,9 +250,9 @@ ib_open_table_by_id(
 
 /********************************************************************//**
 Open a table using the table name, if found then increment table ref count.
-@return	table instance if found */
-UNIV_INTERN
-void*
+@return table instance if found */
+static
+dict_table_t*
 ib_open_table_by_name(
 /*==================*/
 	const char*	name)		/*!< in: table name to lookup */
@@ -281,7 +271,7 @@ ib_open_table_by_name(
 
 /********************************************************************//**
 Find table using table name.
-@return	table instance if found */
+@return table instance if found */
 static
 dict_table_t*
 ib_lookup_table_by_name(
@@ -358,7 +348,7 @@ ib_read_tuple(
 			*len = offset_size;
 		}
 		ptr = *rec_buf;
-	}  else {
+	} else {
 		/* Make a copy of the rec. */
 		ptr = mem_heap_alloc(tuple->heap, offset_size);
 	}
@@ -392,13 +382,12 @@ ib_read_tuple(
 		/* Fetch and copy any externally stored column. */
 		if (rec_offs_nth_extern(offsets, i)) {
 
-			ulint	zip_size;
-
-			zip_size = dict_table_zip_size(index->table);
+			const page_size_t	page_size(
+				dict_table_page_size(index->table));
 
 			data = btr_rec_copy_externally_stored_field(
-				copy, offsets, zip_size, i, &len,
-				tuple->heap, NULL);
+				copy, offsets, page_size, i, &len,
+				tuple->heap);
 
 			ut_a(len != UNIV_SQL_NULL);
 		}
@@ -409,7 +398,7 @@ ib_read_tuple(
 
 /*****************************************************************//**
 Create an InnoDB key tuple.
-@return	tuple instance created, or NULL */
+@return tuple instance created, or NULL */
 static
 ib_tpl_t
 ib_key_tuple_new_low(
@@ -462,7 +451,7 @@ ib_key_tuple_new_low(
 
 /*****************************************************************//**
 Create an InnoDB key tuple.
-@return	tuple instance created, or NULL */
+@return tuple instance created, or NULL */
 static
 ib_tpl_t
 ib_key_tuple_new(
@@ -483,7 +472,7 @@ ib_key_tuple_new(
 
 /*****************************************************************//**
 Create an InnoDB row tuple.
-@return	tuple instance, or NULL */
+@return tuple instance, or NULL */
 static
 ib_tpl_t
 ib_row_tuple_new_low(
@@ -515,7 +504,7 @@ ib_row_tuple_new_low(
 
 /*****************************************************************//**
 Create an InnoDB row tuple.
-@return	tuple instance, or NULL */
+@return tuple instance, or NULL */
 static
 ib_tpl_t
 ib_row_tuple_new(
@@ -536,8 +525,7 @@ ib_row_tuple_new(
 
 /*****************************************************************//**
 Begin a transaction.
-@return	innobase txn handle */
-UNIV_INTERN
+@return innobase txn handle */
 ib_err_t
 ib_trx_start(
 /*=========*/
@@ -558,7 +546,7 @@ ib_trx_start(
 	trx->api_auto_commit = auto_commit;
 	trx->read_write = read_write;
 
-	trx_start_if_not_started(trx);
+	trx_start_if_not_started(trx, read_write);
 
 	trx->isolation_level = ib_trx_level;
 
@@ -572,8 +560,7 @@ ib_trx_start(
 /*****************************************************************//**
 Begin a transaction. This will allocate a new transaction handle.
 put the transaction in the active state.
-@return	innobase txn handle */
-UNIV_INTERN
+@return innobase txn handle */
 ib_trx_t
 ib_trx_begin(
 /*=========*/
@@ -595,11 +582,9 @@ ib_trx_begin(
 	return(static_cast<ib_trx_t>(trx));
 }
 
-
 /*****************************************************************//**
 Check if transaction is read_only
 @return transaction read_only status */
-UNIV_INTERN
 ib_u32_t
 ib_trx_read_only(
 /*=============*/
@@ -609,25 +594,9 @@ ib_trx_read_only(
 
 	return(trx->read_only);
 }
-
-/*****************************************************************//**
-Get the transaction's state.
-@return	transaction state */
-UNIV_INTERN
-ib_trx_state_t
-ib_trx_state(
-/*=========*/
-	ib_trx_t	ib_trx)		/*!< in: trx handle */
-{
-	trx_t*		trx = (trx_t*) ib_trx;
-
-	return((ib_trx_state_t) trx->state);
-}
-
 /*****************************************************************//**
 Get a trx start time.
-@return	trx start_time */
-UNIV_INTERN
+@return trx start_time */
 ib_u64_t
 ib_trx_get_start_time(
 /*==================*/
@@ -638,8 +607,7 @@ ib_trx_get_start_time(
 }
 /*****************************************************************//**
 Release the resources of the transaction.
-@return	DB_SUCCESS or err code */
-UNIV_INTERN
+@return DB_SUCCESS or err code */
 ib_err_t
 ib_trx_release(
 /*===========*/
@@ -656,17 +624,17 @@ ib_trx_release(
 /*****************************************************************//**
 Commit a transaction. This function will also release the schema
 latches too.
-@return	DB_SUCCESS or err code */
-
+@return DB_SUCCESS or err code */
 ib_err_t
 ib_trx_commit(
 /*==========*/
 	ib_trx_t	ib_trx)		/*!< in: trx handle */
 {
 	ib_err_t	err = DB_SUCCESS;
-	trx_t*		trx = (trx_t*) ib_trx;
+	trx_t*		trx = reinterpret_cast<trx_t*>(ib_trx);
+
+	if (!trx_is_started(trx)) {
 
-	if (trx->state == TRX_STATE_NOT_STARTED) {
 		return(err);
 	}
 
@@ -678,8 +646,7 @@ ib_trx_commit(
 /*****************************************************************//**
 Rollback a transaction. This function will also release the schema
 latches too.
-@return	DB_SUCCESS or err code */
-UNIV_INTERN
+@return DB_SUCCESS or err code */
 ib_err_t
 ib_trx_rollback(
 /*============*/
@@ -696,7 +663,7 @@ ib_trx_rollback(
 	return(err);
 }
 
-#ifdef __WIN__
+#ifdef _WIN32
 /*****************************************************************//**
 Convert a string to lower case. */
 static
@@ -710,7 +677,7 @@ ib_to_lower_case(
 		++ptr;
 	}
 }
-#endif /* __WIN__ */
+#endif /* _WIN32 */
 
 /*****************************************************************//**
 Normalizes a table name string. A normalized name consists of the
@@ -762,7 +729,7 @@ ib_normalize_table_name(
 			ut_strlen(name) + 1 - (db_name - name));
 
 		norm_name[table_name - db_name - 1] = '/';
-#ifdef __WIN__
+#ifdef _WIN32
 		ib_to_lower_case(norm_name);
 #endif
 	} else {
@@ -770,57 +737,9 @@ ib_normalize_table_name(
 	}
 }
 
-/*****************************************************************//**
-Check whether the table name conforms to our requirements. Currently
-we only do a simple check for the presence of a '/'.
-@return	DB_SUCCESS or err code */
-UNIV_INTERN
-ib_err_t
-ib_table_name_check(
-/*================*/
-	const char*	name)		/*!< in: table name to check */
-{
-	const char*	slash = NULL;
-	ulint		len = ut_strlen(name);
-
-	if (len < 2
-	    || *name == '/'
-	    || name[len - 1] == '/'
-	    || (name[0] == '.' && name[1] == '/')
-	    || (name[0] == '.' && name[1] == '.' && name[2] == '/')) {
-
-		return(DB_DATA_MISMATCH);
-	}
-
-	for ( ; *name; ++name) {
-#ifdef __WIN__
-		/* Check for reserved characters in DOS filenames. */
-		switch (*name) {
-		case ':':
-		case '|':
-		case '"':
-		case '*':
-		case '<':
-		case '>':
-			return(DB_DATA_MISMATCH);
-		}
-#endif /* __WIN__ */
-		if (*name == '/') {
-			if (slash) {
-				return(DB_DATA_MISMATCH);
-			}
-			slash = name;
-		}
-	}
-
-	return(slash ? DB_SUCCESS : DB_DATA_MISMATCH);
-}
-
-
-
 /*****************************************************************//**
 Get a table id. The caller must have acquired the dictionary mutex.
-@return	DB_SUCCESS if found */
+@return DB_SUCCESS if found */
 static
 ib_err_t
 ib_table_get_id_low(
@@ -846,7 +765,7 @@ ib_table_get_id_low(
 
 /*****************************************************************//**
 Create an internal cursor instance.
-@return	DB_SUCCESS or err code */
+@return DB_SUCCESS or err code */
 static
 ib_err_t
 ib_create_cursor(
@@ -918,7 +837,7 @@ ib_create_cursor(
 /*****************************************************************//**
 Create an internal cursor instance, and set prebuilt->index to index
 with supplied index_id.
-@return	DB_SUCCESS or err code */
+@return DB_SUCCESS or err code */
 static
 ib_err_t
 ib_create_cursor_with_index_id(
@@ -943,8 +862,7 @@ ib_create_cursor_with_index_id(
 
 /*****************************************************************//**
 Open an InnoDB table and return a cursor handle to it.
-@return	DB_SUCCESS or err code */
-UNIV_INTERN
+@return DB_SUCCESS or err code */
 ib_err_t
 ib_cursor_open_table_using_id(
 /*==========================*/
@@ -955,12 +873,10 @@ ib_cursor_open_table_using_id(
 {
 	ib_err_t	err;
 	dict_table_t*	table;
+	const ib_bool_t	locked
+		= ib_trx && ib_schema_lock_is_exclusive(ib_trx);
 
-	if (ib_trx == NULL || !ib_schema_lock_is_exclusive(ib_trx)) {
-		table = ib_open_table_by_id(table_id, FALSE);
-	} else {
-		table = ib_open_table_by_id(table_id, TRUE);
-	}
+	table = ib_open_table_by_id(table_id, locked);
 
 	if (table == NULL) {
 
@@ -973,59 +889,9 @@ ib_cursor_open_table_using_id(
 	return(err);
 }
 
-/*****************************************************************//**
-Open an InnoDB index and return a cursor handle to it.
-@return	DB_SUCCESS or err code */
-UNIV_INTERN
-ib_err_t
-ib_cursor_open_index_using_id(
-/*==========================*/
-	ib_id_u64_t	index_id,	/*!< in: index id of index to open */
-	ib_trx_t	ib_trx,		/*!< in: Current transaction handle
-					can be NULL */
-	ib_crsr_t*	ib_crsr)	/*!< out: InnoDB cursor */
-{
-	ib_err_t	err;
-	dict_table_t*	table;
-	ulint		table_id = (ulint)( index_id >> 32);
-
-	if (ib_trx == NULL || !ib_schema_lock_is_exclusive(ib_trx)) {
-		table = ib_open_table_by_id(table_id, FALSE);
-	} else {
-		table = ib_open_table_by_id(table_id, TRUE);
-	}
-
-	if (table == NULL) {
-
-		return(DB_TABLE_NOT_FOUND);
-	}
-
-	/* We only return the lower 32 bits of the dulint. */
-	err = ib_create_cursor_with_index_id(
-		ib_crsr, table, index_id, (trx_t*) ib_trx);
-
-	if (ib_crsr != NULL) {
-		const ib_cursor_t*	cursor;
-
-		cursor = *(ib_cursor_t**) ib_crsr;
-
-		if (cursor->prebuilt->index == NULL) {
-			ib_err_t	crsr_err;
-
-			crsr_err = ib_cursor_close(*ib_crsr);
-			ut_a(crsr_err == DB_SUCCESS);
-
-			*ib_crsr = NULL;
-		}
-	}
-
-	return(err);
-}
-
 /*****************************************************************//**
 Open an InnoDB secondary index cursor and return a cursor handle to it.
-@return	DB_SUCCESS or err code */
-UNIV_INTERN
+@return DB_SUCCESS or err code */
 ib_err_t
 ib_cursor_open_index_using_name(
 /*============================*/
@@ -1092,8 +958,7 @@ ib_cursor_open_index_using_name(
 
 /*****************************************************************//**
 Open an InnoDB table and return a cursor handle to it.
-@return	DB_SUCCESS or err code */
-UNIV_INTERN
+@return DB_SUCCESS or err code */
 ib_err_t
 ib_cursor_open_table(
 /*=================*/
@@ -1106,22 +971,22 @@ ib_cursor_open_table(
 	dict_table_t*	table;
 	char*		normalized_name;
 
-	normalized_name = static_cast<char*>(mem_alloc(ut_strlen(name) + 1));
+	normalized_name = static_cast<char*>(ut_malloc_nokey(ut_strlen(name)
+							     + 1));
 	ib_normalize_table_name(normalized_name, name);
 
 	if (ib_trx != NULL) {
-	       if (!ib_schema_lock_is_exclusive(ib_trx)) {
-			table = (dict_table_t*)ib_open_table_by_name(
-				normalized_name);
+		if (!ib_schema_lock_is_exclusive(ib_trx)) {
+			table = ib_open_table_by_name(normalized_name);
 		} else {
 			/* NOTE: We do not acquire MySQL metadata lock */
 			table = ib_lookup_table_by_name(normalized_name);
 		}
 	} else {
-		table = (dict_table_t*)ib_open_table_by_name(normalized_name);
+		table = ib_open_table_by_name(normalized_name);
 	}
 
-	mem_free(normalized_name);
+	ut_free(normalized_name);
 	normalized_name = NULL;
 
 	/* It can happen that another thread has created the table but
@@ -1141,6 +1006,16 @@ ib_cursor_open_table(
 	return(err);
 }
 
+/** Check the table whether it contains virtual columns.
+@param[in]	crsr	InnoDB Cursor
+@return true if table contains virtual column else false. */
+ib_bool_t
+ib_is_virtual_table(
+	ib_crsr_t	crsr)
+{
+	return(crsr->prebuilt->table->n_v_cols > 0);
+}
+
 /********************************************************************//**
 Free a context struct for a table handle. */
 static
@@ -1156,23 +1031,9 @@ ib_qry_proc_free(
 	memset(q_proc, 0x0, sizeof(*q_proc));
 }
 
-/*****************************************************************//**
-set a cursor trx to NULL */
-UNIV_INTERN
-void
-ib_cursor_clear_trx(
-/*================*/
-	ib_crsr_t	ib_crsr)	/*!< in/out: InnoDB cursor */
-{
-	ib_cursor_t*	cursor = (ib_cursor_t*) ib_crsr;
-
-	cursor->prebuilt->trx = NULL;
-}
-
 /*****************************************************************//**
 Reset the cursor.
-@return	DB_SUCCESS or err code */
-UNIV_INTERN
+@return DB_SUCCESS or err code */
 ib_err_t
 ib_cursor_reset(
 /*============*/
@@ -1198,7 +1059,7 @@ ib_cursor_reset(
 
 /*****************************************************************//**
 update the cursor with new transactions and also reset the cursor
-@return	DB_SUCCESS or err code */
+@return DB_SUCCESS or err code */
 ib_err_t
 ib_cursor_new_trx(
 /*==============*/
@@ -1217,16 +1078,16 @@ ib_cursor_new_trx(
 
 	trx_assign_read_view(prebuilt->trx);
 
-        ib_qry_proc_free(&cursor->q_proc);
+	ib_qry_proc_free(&cursor->q_proc);
 
-        mem_heap_empty(cursor->query_heap);
+	mem_heap_empty(cursor->query_heap);
 
 	return(err);
 }
 
 /*****************************************************************//**
 Commit the transaction in a cursor
-@return	DB_SUCCESS or err code */
+@return DB_SUCCESS or err code */
 ib_err_t
 ib_cursor_commit_trx(
 /*=================*/
@@ -1247,8 +1108,7 @@ ib_cursor_commit_trx(
 
 /*****************************************************************//**
 Close an InnoDB table and free the cursor.
-@return	DB_SUCCESS or err code */
-UNIV_INTERN
+@return DB_SUCCESS or err code */
 ib_err_t
 ib_cursor_close(
 /*============*/
@@ -1285,8 +1145,7 @@ ib_cursor_close(
 
 /*****************************************************************//**
 Close the table, decrement n_ref_count count.
-@return	DB_SUCCESS or err code */
-UNIV_INTERN
+@return DB_SUCCESS or err code */
 ib_err_t
 ib_cursor_close_table(
 /*==================*/
@@ -1303,7 +1162,7 @@ ib_cursor_close_table(
 }
 /**********************************************************************//**
 Run the insert query and do error handling.
-@return	DB_SUCCESS or error code */
+@return DB_SUCCESS or error code */
 UNIV_INLINE
 ib_err_t
 ib_insert_row_with_lock_retry(
@@ -1344,7 +1203,7 @@ ib_insert_row_with_lock_retry(
 
 /*****************************************************************//**
 Write a row.
-@return	DB_SUCCESS or err code */
+@return DB_SUCCESS or err code */
 static
 ib_err_t
 ib_execute_insert_query_graph(
@@ -1397,7 +1256,7 @@ ib_insert_query_graph_create(
 	ib_qry_node_t*	node = &q_proc->node;
 	trx_t*		trx = cursor->prebuilt->trx;
 
-	ut_a(trx->state != TRX_STATE_NOT_STARTED);
+	ut_a(trx_is_started(trx));
 
 	if (node->ins == NULL) {
 		dtuple_t*	row;
@@ -1413,12 +1272,14 @@ ib_insert_query_graph_create(
 		row = dtuple_create(heap, dict_table_get_n_cols(table));
 		dict_table_copy_types(row, table);
 
+		ut_ad(!dict_table_have_virtual_index(table));
+
 		ins_node_set_new_row(node->ins, row);
 
 		grph->ins = static_cast<que_fork_t*>(
 			que_node_get_parent(
 				pars_complete_graph_for_exec(node->ins, trx,
-							     heap)));
+							     heap, NULL)));
 
 		grph->ins->state = QUE_FORK_ACTIVE;
 	}
@@ -1426,8 +1287,7 @@ ib_insert_query_graph_create(
 
 /*****************************************************************//**
 Insert a row to a table.
-@return	DB_SUCCESS or err code */
-UNIV_INTERN
+@return DB_SUCCESS or err code */
 ib_err_t
 ib_cursor_insert_row(
 /*=================*/
@@ -1507,7 +1367,7 @@ ib_cursor_insert_row(
 
 /*********************************************************************//**
 Gets pointer to a prebuilt update vector used in updates.
-@return	update vector */
+@return update vector */
 UNIV_INLINE
 upd_t*
 ib_update_vector_create(
@@ -1521,16 +1381,19 @@ ib_update_vector_create(
 	ib_qry_grph_t*	grph = &q_proc->grph;
 	ib_qry_node_t*	node = &q_proc->node;
 
-	ut_a(trx->state != TRX_STATE_NOT_STARTED);
+	ut_a(trx_is_started(trx));
 
 	if (node->upd == NULL) {
 		node->upd = static_cast<upd_node_t*>(
 			row_create_update_node_for_mysql(table, heap));
 	}
 
+	ut_ad(!dict_table_have_virtual_index(table));
+
 	grph->upd = static_cast<que_fork_t*>(
 		que_node_get_parent(
-			pars_complete_graph_for_exec(node->upd, trx, heap)));
+			pars_complete_graph_for_exec(node->upd, trx,
+						     heap, NULL)));
 
 	grph->upd->state = QUE_FORK_ACTIVE;
 
@@ -1572,7 +1435,7 @@ ib_update_col(
 /**********************************************************************//**
 Checks which fields have changed in a row and stores the new data
 to an update vector.
-@return	DB_SUCCESS or err code */
+@return DB_SUCCESS or err code */
 static
 ib_err_t
 ib_calc_diff(
@@ -1639,7 +1502,7 @@ ib_calc_diff(
 
 /**********************************************************************//**
 Run the update query and do error handling.
-@return	DB_SUCCESS or error code */
+@return DB_SUCCESS or error code */
 UNIV_INLINE
 ib_err_t
 ib_update_row_with_lock_retry(
@@ -1687,7 +1550,7 @@ ib_update_row_with_lock_retry(
 
 /*********************************************************************//**
 Does an update or delete of a row.
-@return	DB_SUCCESS or err code */
+@return DB_SUCCESS or err code */
 UNIV_INLINE
 ib_err_t
 ib_execute_update_query_graph(
@@ -1704,7 +1567,7 @@ ib_execute_update_query_graph(
 	ib_qry_proc_t*	q_proc = &cursor->q_proc;
 
 	/* The transaction must be running. */
-	ut_a(trx->state != TRX_STATE_NOT_STARTED);
+	ut_a(trx_is_started(trx));
 
 	node = q_proc->node.upd;
 
@@ -1755,8 +1618,7 @@ ib_execute_update_query_graph(
 
 /*****************************************************************//**
 Update a row in a table.
-@return	DB_SUCCESS or err code */
-UNIV_INTERN
+@return DB_SUCCESS or err code */
 ib_err_t
 ib_cursor_update_row(
 /*=================*/
@@ -1773,9 +1635,9 @@ ib_cursor_update_row(
 	const ib_tuple_t*new_tuple = (const ib_tuple_t*) ib_new_tpl;
 
 	if (dict_index_is_clust(prebuilt->index)) {
-		pcur = &cursor->prebuilt->pcur;
+		pcur = cursor->prebuilt->pcur;
 	} else if (prebuilt->need_to_access_clustered) {
-		pcur = &cursor->prebuilt->clust_pcur;
+		pcur = cursor->prebuilt->clust_pcur;
 	} else {
 		return(DB_ERROR);
 	}
@@ -1801,7 +1663,7 @@ ib_cursor_update_row(
 
 /**********************************************************************//**
 Build the update query graph to delete a row from an index.
-@return	DB_SUCCESS or err code */
+@return DB_SUCCESS or err code */
 static
 ib_err_t
 ib_delete_row(
@@ -1868,8 +1730,7 @@ ib_delete_row(
 
 /*****************************************************************//**
 Delete a row in a table.
-@return	DB_SUCCESS or err code */
-UNIV_INTERN
+@return DB_SUCCESS or err code */
 ib_err_t
 ib_cursor_delete_row(
 /*=================*/
@@ -1886,12 +1747,12 @@ ib_cursor_delete_row(
 	/* Check whether this is a secondary index cursor */
 	if (index != prebuilt->index) {
 		if (prebuilt->need_to_access_clustered) {
-			pcur = &prebuilt->clust_pcur;
+			pcur = prebuilt->clust_pcur;
 		} else {
 			return(DB_ERROR);
 		}
 	} else {
-		pcur = &prebuilt->pcur;
+		pcur = prebuilt->pcur;
 	}
 
 	if (ib_btr_cursor_is_positioned(pcur)) {
@@ -1943,8 +1804,7 @@ ib_cursor_delete_row(
 
 /*****************************************************************//**
 Read current row.
-@return	DB_SUCCESS or err code */
-UNIV_INTERN
+@return DB_SUCCESS or err code */
 ib_err_t
 ib_cursor_read_row(
 /*===============*/
@@ -1957,7 +1817,7 @@ ib_cursor_read_row(
 	ib_tuple_t*	tuple = (ib_tuple_t*) ib_tpl;
 	ib_cursor_t*	cursor = (ib_cursor_t*) ib_crsr;
 
-	ut_a(cursor->prebuilt->trx->state != TRX_STATE_NOT_STARTED);
+	ut_a(trx_is_started(cursor->prebuilt->trx));
 
 	/* When searching with IB_EXACT_MATCH set, row_search_for_mysql()
 	will not position the persistent cursor but will copy the record
@@ -1971,9 +1831,9 @@ ib_cursor_read_row(
 
 		if (prebuilt->need_to_access_clustered
 		    && tuple->type == TPL_TYPE_ROW) {
-			pcur = &prebuilt->clust_pcur;
+			pcur = prebuilt->clust_pcur;
 		} else {
-			pcur = &prebuilt->pcur;
+			pcur = prebuilt->pcur;
 		}
 
 		if (pcur == NULL) {
@@ -2015,7 +1875,7 @@ ib_cursor_read_row(
 
 /*****************************************************************//**
 Move cursor to the first record in the table.
-@return	DB_SUCCESS or err code */
+@return DB_SUCCESS or err code */
 UNIV_INLINE
 ib_err_t
 ib_cursor_position(
@@ -2027,24 +1887,23 @@ ib_cursor_position(
 	row_prebuilt_t*	prebuilt = cursor->prebuilt;
 	unsigned char*	buf;
 
-	buf = static_cast<unsigned char*>(mem_alloc(UNIV_PAGE_SIZE));
+	buf = static_cast<unsigned char*>(ut_malloc_nokey(UNIV_PAGE_SIZE));
 
 	/* We want to position at one of the ends, row_search_for_mysql()
 	uses the search_tuple fields to work out what to do. */
 	dtuple_set_n_fields(prebuilt->search_tuple, 0);
 
 	err = static_cast<ib_err_t>(row_search_for_mysql(
-		buf, mode, prebuilt, 0, 0));
+		buf, static_cast<page_cur_mode_t>(mode), prebuilt, 0, 0));
 
-	mem_free(buf);
+	ut_free(buf);
 
 	return(err);
 }
 
 /*****************************************************************//**
 Move cursor to the first record in the table.
-@return	DB_SUCCESS or err code */
-UNIV_INTERN
+@return DB_SUCCESS or err code */
 ib_err_t
 ib_cursor_first(
 /*============*/
@@ -2055,24 +1914,9 @@ ib_cursor_first(
 	return(ib_cursor_position(cursor, IB_CUR_G));
 }
 
-/*****************************************************************//**
-Move cursor to the last record in the table.
-@return	DB_SUCCESS or err code */
-UNIV_INTERN
-ib_err_t
-ib_cursor_last(
-/*===========*/
-	ib_crsr_t	ib_crsr)	/*!< in: InnoDB cursor instance */
-{
-	ib_cursor_t*	cursor = (ib_cursor_t*) ib_crsr;
-
-	return(ib_cursor_position(cursor, IB_CUR_L));
-}
-
 /*****************************************************************//**
 Move cursor to the next user record in the table.
 @return DB_SUCCESS or err code */
-UNIV_INTERN
 ib_err_t
 ib_cursor_next(
 /*===========*/
@@ -2094,8 +1938,7 @@ ib_cursor_next(
 
 /*****************************************************************//**
 Search for key.
-@return	DB_SUCCESS or err code */
-UNIV_INTERN
+@return DB_SUCCESS or err code */
 ib_err_t
 ib_cursor_moveto(
 /*=============*/
@@ -2133,19 +1976,19 @@ ib_cursor_moveto(
 
 	prebuilt->innodb_api_rec = NULL;
 
-	buf = static_cast<unsigned char*>(mem_alloc(UNIV_PAGE_SIZE));
+	buf = static_cast<unsigned char*>(ut_malloc_nokey(UNIV_PAGE_SIZE));
 
 	err = static_cast<ib_err_t>(row_search_for_mysql(
-		buf, ib_srch_mode, prebuilt, cursor->match_mode, 0));
+		buf, static_cast<page_cur_mode_t>(ib_srch_mode), prebuilt,
+		cursor->match_mode, 0));
 
-	mem_free(buf);
+	ut_free(buf);
 
 	return(err);
 }
 
 /*****************************************************************//**
 Set the cursor search mode. */
-UNIV_INTERN
 void
 ib_cursor_set_match_mode(
 /*=====================*/
@@ -2159,7 +2002,7 @@ ib_cursor_set_match_mode(
 
 /*****************************************************************//**
 Get the dfield instance for the column in the tuple.
-@return	dfield instance in tuple */
+@return dfield instance in tuple */
 UNIV_INLINE
 dfield_t*
 ib_col_get_dfield(
@@ -2176,7 +2019,7 @@ ib_col_get_dfield(
 
 /*****************************************************************//**
 Predicate to check whether a column type contains variable length data.
-@return	DB_SUCCESS or error code */
+@return DB_SUCCESS or error code */
 UNIV_INLINE
 ib_err_t
 ib_col_is_capped(
@@ -2189,14 +2032,14 @@ ib_col_is_capped(
 		|| dtype_get_mtype(dtype) == DATA_MYSQL
 		|| dtype_get_mtype(dtype) == DATA_VARMYSQL
 		|| dtype_get_mtype(dtype) == DATA_FIXBINARY
-		|| dtype_get_mtype(dtype) == DATA_BINARY)
+		|| dtype_get_mtype(dtype) == DATA_BINARY
+		|| dtype_get_mtype(dtype) == DATA_POINT)
 	       && dtype_get_len(dtype) > 0));
 }
 
 /*****************************************************************//**
 Set a column of the tuple. Make a copy using the tuple's heap.
-@return	DB_SUCCESS or error code */
-UNIV_INTERN
+@return DB_SUCCESS or error code */
 ib_err_t
 ib_col_set_value(
 /*=============*/
@@ -2287,24 +2130,19 @@ ib_col_set_value(
 		ut_error;
 		break;
 
-	case DATA_CHAR: {
-		ulint	pad_char = ULINT_UNDEFINED;
-
-		pad_char = dtype_get_pad_char(
-			dtype_get_mtype(dtype),	dtype_get_prtype(dtype));
-
-		ut_a(pad_char != ULINT_UNDEFINED);
-
-		memset((byte*) dst + len,
-		       static_cast<int>(pad_char),
-			   static_cast<size_t>(col_len - len));
-
+	case DATA_CHAR:
 		memcpy(dst, src, len);
-
-		len = static_cast<ib_ulint_t>(col_len);
+		memset((byte*) dst + len, 0x20, col_len - len);
+		len = col_len;
 		break;
-	}
+
+	case DATA_POINT:
+		memcpy(dst, src, len);
+		break;
+
 	case DATA_BLOB:
+	case DATA_VAR_POINT:
+	case DATA_GEOMETRY:
 	case DATA_BINARY:
 	case DATA_DECIMAL:
 	case DATA_VARCHAR:
@@ -2403,8 +2241,7 @@ ib_col_set_value(
 
 /*****************************************************************//**
 Get the size of the data available in a column of the tuple.
-@return	bytes avail or IB_SQL_NULL */
-UNIV_INTERN
+@return bytes avail or IB_SQL_NULL */
 ib_ulint_t
 ib_col_get_len(
 /*===========*/
@@ -2425,7 +2262,7 @@ ib_col_get_len(
 
 /*****************************************************************//**
 Copy a column value from the tuple.
-@return	bytes copied or IB_SQL_NULL */
+@return bytes copied or IB_SQL_NULL */
 UNIV_INLINE
 ib_ulint_t
 ib_col_copy_value_low(
@@ -2452,7 +2289,7 @@ ib_col_copy_value_low(
 		switch (dtype_get_mtype(dfield_get_type(dfield))) {
 		case DATA_INT: {
 			ibool		usign;
-			ullint		ret;
+			uintmax_t	ret;
 
 			ut_a(data_len == len);
 
@@ -2521,8 +2358,7 @@ ib_col_copy_value_low(
 
 /*****************************************************************//**
 Copy a column value from the tuple.
-@return	bytes copied or IB_SQL_NULL */
-UNIV_INTERN
+@return bytes copied or IB_SQL_NULL */
 ib_ulint_t
 ib_col_copy_value(
 /*==============*/
@@ -2536,7 +2372,7 @@ ib_col_copy_value(
 
 /*****************************************************************//**
 Get the InnoDB column attribute from the internal column precise type.
-@return	precise type in api format */
+@return precise type in api format */
 UNIV_INLINE
 ib_col_attr_t
 ib_col_get_attr(
@@ -2558,8 +2394,7 @@ ib_col_get_attr(
 
 /*****************************************************************//**
 Get a column name from the tuple.
-@return	name of the column */
-UNIV_INTERN
+@return name of the column */
 const char*
 ib_col_get_name(
 /*============*/
@@ -2579,8 +2414,7 @@ ib_col_get_name(
 
 /*****************************************************************//**
 Get an index field name from the cursor.
-@return	name of the field */
-UNIV_INTERN
+@return name of the field */
 const char*
 ib_get_idx_field_name(
 /*==================*/
@@ -2604,7 +2438,7 @@ ib_get_idx_field_name(
 
 /*****************************************************************//**
 Get a column type, length and attributes from the tuple.
-@return	len of column data */
+@return len of column data */
 UNIV_INLINE
 ib_ulint_t
 ib_col_get_meta_low(
@@ -2667,8 +2501,7 @@ ib_tuple_check_int(
 
 /*************************************************************//**
 Read a signed int 8 bit column from an InnoDB tuple.
-@return	DB_SUCCESS or error */
-UNIV_INTERN
+@return DB_SUCCESS or error */
 ib_err_t
 ib_tuple_read_i8(
 /*=============*/
@@ -2689,8 +2522,7 @@ ib_tuple_read_i8(
 
 /*************************************************************//**
 Read an unsigned int 8 bit column from an InnoDB tuple.
-@return	DB_SUCCESS or error */
-UNIV_INTERN
+@return DB_SUCCESS or error */
 ib_err_t
 ib_tuple_read_u8(
 /*=============*/
@@ -2711,8 +2543,7 @@ ib_tuple_read_u8(
 
 /*************************************************************//**
 Read a signed int 16 bit column from an InnoDB tuple.
-@return	DB_SUCCESS or error */
-UNIV_INTERN
+@return DB_SUCCESS or error */
 ib_err_t
 ib_tuple_read_i16(
 /*==============*/
@@ -2733,8 +2564,7 @@ ib_tuple_read_i16(
 
 /*************************************************************//**
 Read an unsigned int 16 bit column from an InnoDB tuple.
-@return	DB_SUCCESS or error */
-UNIV_INTERN
+@return DB_SUCCESS or error */
 ib_err_t
 ib_tuple_read_u16(
 /*==============*/
@@ -2755,8 +2585,7 @@ ib_tuple_read_u16(
 
 /*************************************************************//**
 Read a signed int 32 bit column from an InnoDB tuple.
-@return	DB_SUCCESS or error */
-UNIV_INTERN
+@return DB_SUCCESS or error */
 ib_err_t
 ib_tuple_read_i32(
 /*==============*/
@@ -2777,8 +2606,7 @@ ib_tuple_read_i32(
 
 /*************************************************************//**
 Read an unsigned int 32 bit column from an InnoDB tuple.
-@return	DB_SUCCESS or error */
-UNIV_INTERN
+@return DB_SUCCESS or error */
 ib_err_t
 ib_tuple_read_u32(
 /*==============*/
@@ -2799,8 +2627,7 @@ ib_tuple_read_u32(
 
 /*************************************************************//**
 Read a signed int 64 bit column from an InnoDB tuple.
-@return	DB_SUCCESS or error */
-UNIV_INTERN
+@return DB_SUCCESS or error */
 ib_err_t
 ib_tuple_read_i64(
 /*==============*/
@@ -2821,8 +2648,7 @@ ib_tuple_read_i64(
 
 /*************************************************************//**
 Read an unsigned int 64 bit column from an InnoDB tuple.
-@return	DB_SUCCESS or error */
-UNIV_INTERN
+@return DB_SUCCESS or error */
 ib_err_t
 ib_tuple_read_u64(
 /*==============*/
@@ -2843,8 +2669,7 @@ ib_tuple_read_u64(
 
 /*****************************************************************//**
 Get a column value pointer from the tuple.
-@return	NULL or pointer to buffer */
-UNIV_INTERN
+@return NULL or pointer to buffer */
 const void*
 ib_col_get_value(
 /*=============*/
@@ -2866,8 +2691,7 @@ ib_col_get_value(
 
 /*****************************************************************//**
 Get a column type, length and attributes from the tuple.
-@return	len of column data */
-UNIV_INTERN
+@return len of column data */
 ib_ulint_t
 ib_col_get_meta(
 /*============*/
@@ -2880,8 +2704,7 @@ ib_col_get_meta(
 
 /*****************************************************************//**
 "Clear" or reset an InnoDB tuple. We free the heap and recreate the tuple.
-@return	new tuple, or NULL */
-UNIV_INTERN
+@return new tuple, or NULL */
 ib_tpl_t
 ib_tuple_clear(
 /*============*/
@@ -2909,8 +2732,7 @@ ib_tuple_clear(
 Create a new cluster key search tuple and copy the contents of  the
 secondary index key tuple columns that refer to the cluster index record
 to the cluster key. It does a deep copy of the column data.
-@return	DB_SUCCESS or error code */
-UNIV_INTERN
+@return DB_SUCCESS or error code */
 ib_err_t
 ib_tuple_get_cluster_key(
 /*=====================*/
@@ -2981,65 +2803,9 @@ ib_tuple_get_cluster_key(
 	return(err);
 }
 
-/*****************************************************************//**
-Copy the contents of  source tuple to destination tuple. The tuples
-must be of the same type and belong to the same table/index.
-@return	DB_SUCCESS or error code */
-UNIV_INTERN
-ib_err_t
-ib_tuple_copy(
-/*==========*/
-	ib_tpl_t	ib_dst_tpl,	/*!< in: destination tuple */
-	const ib_tpl_t	ib_src_tpl)	/*!< in: source tuple */
-{
-	ulint		i;
-	ulint		n_fields;
-	ib_err_t	err = DB_SUCCESS;
-	const ib_tuple_t*src_tuple = (const ib_tuple_t*) ib_src_tpl;
-	ib_tuple_t*	dst_tuple = (ib_tuple_t*) ib_dst_tpl;
-
-	/* Make sure src and dst are not the same. */
-	ut_a(src_tuple != dst_tuple);
-
-	/* Make sure they are the same type and refer to the same index. */
-	if (src_tuple->type != dst_tuple->type
-	   || src_tuple->index != dst_tuple->index) {
-
-		return(DB_DATA_MISMATCH);
-	}
-
-	n_fields = dtuple_get_n_fields(src_tuple->ptr);
-	ut_ad(n_fields == dtuple_get_n_fields(dst_tuple->ptr));
-
-	/* Do a deep copy of the data fields. */
-	for (i = 0; i < n_fields; ++i) {
-		dfield_t*	src_field;
-		dfield_t*	dst_field;
-
-		src_field = dtuple_get_nth_field(src_tuple->ptr, i);
-		dst_field = dtuple_get_nth_field(dst_tuple->ptr, i);
-
-		if (!dfield_is_null(src_field)) {
-			UNIV_MEM_ASSERT_RW(src_field->data, src_field->len);
-
-			dst_field->data = mem_heap_dup(
-				dst_tuple->heap,
-				src_field->data,
-				src_field->len);
-
-			dst_field->len = src_field->len;
-		} else {
-			dfield_set_null(dst_field);
-		}
-	}
-
-	return(err);
-}
-
 /*****************************************************************//**
 Create an InnoDB tuple used for index/table search.
-@return	own: Tuple for current index */
-UNIV_INTERN
+@return own: Tuple for current index */
 ib_tpl_t
 ib_sec_search_tuple_create(
 /*=======================*/
@@ -3055,8 +2821,7 @@ ib_sec_search_tuple_create(
 
 /*****************************************************************//**
 Create an InnoDB tuple used for index/table search.
-@return	own: Tuple for current index */
-UNIV_INTERN
+@return own: Tuple for current index */
 ib_tpl_t
 ib_sec_read_tuple_create(
 /*=====================*/
@@ -3072,8 +2837,7 @@ ib_sec_read_tuple_create(
 
 /*****************************************************************//**
 Create an InnoDB tuple used for table key operations.
-@return	own: Tuple for current table */
-UNIV_INTERN
+@return own: Tuple for current table */
 ib_tpl_t
 ib_clust_search_tuple_create(
 /*=========================*/
@@ -3091,8 +2855,7 @@ ib_clust_search_tuple_create(
 
 /*****************************************************************//**
 Create an InnoDB tuple for table row operations.
-@return	own: Tuple for current table */
-UNIV_INTERN
+@return own: Tuple for current table */
 ib_tpl_t
 ib_clust_read_tuple_create(
 /*=======================*/
@@ -3110,8 +2873,7 @@ ib_clust_read_tuple_create(
 
 /*****************************************************************//**
 Return the number of user columns in the tuple definition.
-@return	number of user columns */
-UNIV_INTERN
+@return number of user columns */
 ib_ulint_t
 ib_tuple_get_n_user_cols(
 /*=====================*/
@@ -3130,8 +2892,7 @@ ib_tuple_get_n_user_cols(
 
 /*****************************************************************//**
 Return the number of columns in the tuple definition.
-@return	number of columns */
-UNIV_INTERN
+@return number of columns */
 ib_ulint_t
 ib_tuple_get_n_cols(
 /*================*/
@@ -3144,7 +2905,6 @@ ib_tuple_get_n_cols(
 
 /*****************************************************************//**
 Destroy an InnoDB tuple. */
-UNIV_INTERN
 void
 ib_tuple_delete(
 /*============*/
@@ -3161,8 +2921,7 @@ ib_tuple_delete(
 
 /*****************************************************************//**
 Get a table id. This function will acquire the dictionary mutex.
-@return	DB_SUCCESS if found */
-UNIV_INTERN
+@return DB_SUCCESS if found */
 ib_err_t
 ib_table_get_id(
 /*============*/
@@ -3180,63 +2939,9 @@ ib_table_get_id(
 	return(err);
 }
 
-/*****************************************************************//**
-Get an index id.
-@return	DB_SUCCESS if found */
-UNIV_INTERN
-ib_err_t
-ib_index_get_id(
-/*============*/
-	const char*	table_name,	/*!< in: find index for this table */
-	const char*	index_name,	/*!< in: index to find */
-	ib_id_u64_t*	index_id)	/*!< out: index id if found */
-{
-	dict_table_t*	table;
-	char*		normalized_name;
-	ib_err_t	err = DB_TABLE_NOT_FOUND;
-
-	*index_id = 0;
-
-	normalized_name = static_cast<char*>(
-		mem_alloc(ut_strlen(table_name) + 1));
-	ib_normalize_table_name(normalized_name, table_name);
-
-	table = ib_lookup_table_by_name(normalized_name);
-
-	mem_free(normalized_name);
-	normalized_name = NULL;
-
-	if (table != NULL) {
-		dict_index_t*	index;
-
-		index = dict_table_get_index_on_name(table, index_name);
-
-		if (index != NULL) {
-			/* We only support 32 bit table and index ids. Because
-			we need to pack the table id into the index id. */
-
-			*index_id = (table->id);
-			*index_id <<= 32;
-			*index_id |= (index->id);
-
-			err = DB_SUCCESS;
-		}
-	}
-
-	return(err);
-}
-
-#ifdef __WIN__
-#define SRV_PATH_SEPARATOR      '\\'
-#else
-#define SRV_PATH_SEPARATOR      '/'
-#endif
-
-
 /*****************************************************************//**
 Check if cursor is positioned.
-@return	IB_TRUE if positioned */
-UNIV_INTERN
+@return IB_TRUE if positioned */
 ib_bool_t
 ib_cursor_is_positioned(
 /*====================*/
@@ -3245,14 +2950,13 @@ ib_cursor_is_positioned(
 	const ib_cursor_t*	cursor = (const ib_cursor_t*) ib_crsr;
 	row_prebuilt_t*		prebuilt = cursor->prebuilt;
 
-	return(ib_btr_cursor_is_positioned(&prebuilt->pcur));
+	return(ib_btr_cursor_is_positioned(prebuilt->pcur));
 }
 
 
 /*****************************************************************//**
 Checks if the data dictionary is latched in exclusive mode.
-@return	TRUE if exclusive latch */
-UNIV_INTERN
+@return TRUE if exclusive latch */
 ib_bool_t
 ib_schema_lock_is_exclusive(
 /*========================*/
@@ -3265,8 +2969,7 @@ ib_schema_lock_is_exclusive(
 
 /*****************************************************************//**
 Checks if the data dictionary is latched in shared mode.
-@return	TRUE if shared latch */
-UNIV_INTERN
+@return TRUE if shared latch */
 ib_bool_t
 ib_schema_lock_is_shared(
 /*=====================*/
@@ -3279,8 +2982,7 @@ ib_schema_lock_is_shared(
 
 /*****************************************************************//**
 Set the Lock an InnoDB cursor/table.
-@return	DB_SUCCESS or error code */
-UNIV_INTERN
+@return DB_SUCCESS or error code */
 ib_err_t
 ib_cursor_lock(
 /*===========*/
@@ -3298,8 +3000,7 @@ ib_cursor_lock(
 
 /*****************************************************************//**
 Set the Lock an InnoDB table using the table id.
-@return	DB_SUCCESS or error code */
-UNIV_INTERN
+@return DB_SUCCESS or error code */
 ib_err_t
 ib_table_lock(
 /*==========*/
@@ -3314,7 +3015,7 @@ ib_table_lock(
 	ib_qry_proc_t	q_proc;
 	trx_t*		trx = (trx_t*) ib_trx;
 
-	ut_a(trx->state != TRX_STATE_NOT_STARTED);
+	ut_ad(trx_is_started(trx));
 
 	table = ib_open_table_by_id(table_id, FALSE);
 
@@ -3323,12 +3024,13 @@ ib_table_lock(
 	}
 
 	ut_a(ib_lck_mode <= static_cast<ib_lck_mode_t>(LOCK_NUM));
+	ut_ad(!dict_table_have_virtual_index(table));
 
 	heap = mem_heap_create(128);
 
 	q_proc.node.sel = sel_node_create(heap);
 
-	thr = pars_complete_graph_for_exec(q_proc.node.sel, trx, heap);
+	thr = pars_complete_graph_for_exec(q_proc.node.sel, trx, heap, NULL);
 
 	q_proc.grph.sel = static_cast<que_fork_t*>(que_node_get_parent(thr));
 	q_proc.grph.sel->state = QUE_FORK_ACTIVE;
@@ -3348,8 +3050,7 @@ ib_table_lock(
 
 /*****************************************************************//**
 Unlock an InnoDB table.
-@return	DB_SUCCESS or error code */
-UNIV_INTERN
+@return DB_SUCCESS or error code */
 ib_err_t
 ib_cursor_unlock(
 /*=============*/
@@ -3370,8 +3071,7 @@ ib_cursor_unlock(
 
 /*****************************************************************//**
 Set the Lock mode of the cursor.
-@return	DB_SUCCESS or error code */
-UNIV_INTERN
+@return DB_SUCCESS or error code */
 ib_err_t
 ib_cursor_set_lock_mode(
 /*====================*/
@@ -3391,8 +3091,8 @@ ib_cursor_set_lock_mode(
 	}
 
 	if (err == DB_SUCCESS) {
-		prebuilt->select_lock_type = (enum lock_mode) ib_lck_mode;
-		ut_a(prebuilt->trx->state != TRX_STATE_NOT_STARTED);
+		prebuilt->select_lock_type = (lock_mode) ib_lck_mode;
+		ut_a(trx_is_started(prebuilt->trx));
 	}
 
 	return(err);
@@ -3400,7 +3100,6 @@ ib_cursor_set_lock_mode(
 
 /*****************************************************************//**
 Set need to access clustered index record. */
-UNIV_INTERN
 void
 ib_cursor_set_cluster_access(
 /*=========================*/
@@ -3412,129 +3111,8 @@ ib_cursor_set_cluster_access(
 	prebuilt->need_to_access_clustered = TRUE;
 }
 
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
-@return	DB_SUCESS or error */
-UNIV_INTERN
-ib_err_t
-ib_tuple_write_i8(
-/*==============*/
-	ib_tpl_t	ib_tpl,		/*!< in/out: tuple to write to */
-	int		col_no,		/*!< in: column number */
-	ib_i8_t		val)		/*!< in: value to write */
-{
-	return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val), true));
-}
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
-@return	DB_SUCESS or error */
-UNIV_INTERN
-ib_err_t
-ib_tuple_write_i16(
-/*===============*/
-	ib_tpl_t	ib_tpl,		/*!< in/out: tuple to write to */
-	int		col_no,		/*!< in: column number */
-	ib_i16_t	val)		/*!< in: value to write */
-{
-	return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val), true));
-}
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
-@return	DB_SUCCESS or error */
-UNIV_INTERN
-ib_err_t
-ib_tuple_write_i32(
-/*===============*/
-	ib_tpl_t	ib_tpl,		/*!< in/out: tuple to write to */
-	int		col_no,		/*!< in: column number */
-	ib_i32_t	val)		/*!< in: value to write */
-{
-	return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val), true));
-}
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
-@return	DB_SUCCESS or error */
-UNIV_INTERN
-ib_err_t
-ib_tuple_write_i64(
-/*===============*/
-	ib_tpl_t	ib_tpl,		/*!< in/out: tuple to write to */
-	int		col_no,		/*!< in: column number */
-	ib_i64_t	val)		/*!< in: value to write */
-{
-	return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val), true));
-}
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
-@return	DB_SUCCESS or error */
-UNIV_INTERN
-ib_err_t
-ib_tuple_write_u8(
-/*==============*/
-	ib_tpl_t	ib_tpl,		/*!< in/out: tuple to write to */
-	int		col_no,		/*!< in: column number */
-	ib_u8_t		val)		/*!< in: value to write */
-{
-	return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val), true));
-}
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
-@return	DB_SUCCESS or error */
-UNIV_INTERN
-ib_err_t
-ib_tuple_write_u16(
-/*===============*/
-	ib_tpl_t	ib_tpl,		/*!< in/out: tupe to write to */
-	int		col_no,		/*!< in: column number */
-	ib_u16_t	val)		/*!< in: value to write */
-{
-	return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val), true));
-}
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
-@return	DB_SUCCESS or error */
-UNIV_INTERN
-ib_err_t
-ib_tuple_write_u32(
-/*===============*/
-	ib_tpl_t	ib_tpl,		/*!< in/out: tuple to write to */
-	int		col_no,		/*!< in: column number */
-	ib_u32_t	val)		/*!< in: value to write */
-{
-	return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val), true));
-}
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
-@return	DB_SUCCESS or error */
-UNIV_INTERN
-ib_err_t
-ib_tuple_write_u64(
-/*===============*/
-	ib_tpl_t	ib_tpl,		/*!< in/out: tuple to write to */
-	int		col_no,		/*!< in: column number */
-	ib_u64_t	val)		/*!< in: value to write */
-{
-	return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val), true));
-}
-
 /*****************************************************************//**
 Inform the cursor that it's the start of an SQL statement. */
-UNIV_INTERN
 void
 ib_cursor_stmt_begin(
 /*=================*/
@@ -3547,8 +3125,7 @@ ib_cursor_stmt_begin(
 
 /*****************************************************************//**
 Write a double value to a column.
-@return	DB_SUCCESS or error */
-UNIV_INTERN
+@return DB_SUCCESS or error */
 ib_err_t
 ib_tuple_write_double(
 /*==================*/
@@ -3571,8 +3148,7 @@ ib_tuple_write_double(
 
 /*************************************************************//**
 Read a double column value from an InnoDB tuple.
-@return	DB_SUCCESS or error */
-UNIV_INTERN
+@return DB_SUCCESS or error */
 ib_err_t
 ib_tuple_read_double(
 /*=================*/
@@ -3598,8 +3174,7 @@ ib_tuple_read_double(
 
 /*****************************************************************//**
 Write a float value to a column.
-@return	DB_SUCCESS or error */
-UNIV_INTERN
+@return DB_SUCCESS or error */
 ib_err_t
 ib_tuple_write_float(
 /*=================*/
@@ -3622,8 +3197,7 @@ ib_tuple_write_float(
 
 /*************************************************************//**
 Read a float value from an InnoDB tuple.
-@return	DB_SUCCESS or error */
-UNIV_INTERN
+@return DB_SUCCESS or error */
 ib_err_t
 ib_tuple_read_float(
 /*================*/
@@ -3651,7 +3225,6 @@ ib_tuple_read_float(
 Truncate a table. The cursor handle will be closed and set to NULL
 on success.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 ib_err_t
 ib_cursor_truncate(
 /*===============*/
@@ -3704,7 +3277,6 @@ ib_cursor_truncate(
 /*****************************************************************//**
 Truncate a table.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 ib_err_t
 ib_table_truncate(
 /*==============*/
@@ -3755,8 +3327,7 @@ ib_table_truncate(
 	}
 
 	if (trunc_err == DB_SUCCESS) {
-		ut_a(ib_trx_state(ib_trx) == static_cast<ib_trx_state_t>(
-			TRX_STATE_NOT_STARTED));
+		ut_a(!trx_is_started(static_cast<trx_t*>(ib_trx)));
 	} else {
 		err = ib_trx_rollback(ib_trx);
 		ut_a(err == DB_SUCCESS);
@@ -3777,36 +3348,19 @@ ib_table_truncate(
         return(trunc_err);
 }
 
-/*****************************************************************//**
-Frees a possible InnoDB trx object associated with the current THD.
-@return 0 or error number */
-UNIV_INTERN
-ib_err_t
-ib_close_thd(
-/*=========*/
-	void*		thd)	/*!< in: handle to the MySQL thread of the user
-				whose resources should be free'd */
-{
-	innobase_close_thd(static_cast<THD*>(thd));
-
-	return(DB_SUCCESS);
-}
-
 /*****************************************************************//**
 Return isolation configuration set by "innodb_api_trx_level"
 @return trx isolation level*/
-UNIV_INTERN
-ib_trx_state_t
+ib_trx_level_t
 ib_cfg_trx_level()
 /*==============*/
 {
-	return(static_cast<ib_trx_state_t>(ib_trx_level_setting));
+	return(static_cast<ib_trx_level_t>(ib_trx_level_setting));
 }
 
 /*****************************************************************//**
 Return configure value for background commit interval (in seconds)
 @return background commit interval (in seconds) */
-UNIV_INTERN
 ib_ulint_t
 ib_cfg_bk_commit_interval()
 /*=======================*/
@@ -3817,7 +3371,6 @@ ib_cfg_bk_commit_interval()
 /*****************************************************************//**
 Get generic configure status
 @return configure status*/
-UNIV_INTERN
 int
 ib_cfg_get_cfg()
 /*============*/
@@ -3837,11 +3390,22 @@ ib_cfg_get_cfg()
 	return(cfg_status);
 }
 
+/*****************************************************************//**
+Wrapper of ut_strerr() which converts an InnoDB error number to a
+human readable text message.
+@return string, describing the error */
+const char*
+ib_ut_strerr(
+/*=========*/
+	ib_err_t	num)	/*!< in: error number */
+{
+	return(ut_strerr(num));
+}
+
 /*****************************************************************//**
 Increase/decrease the memcached sync count of table to sync memcached
 DML with SQL DDLs.
 @return DB_SUCCESS or error number */
-UNIV_INTERN
 ib_err_t
 ib_cursor_set_memcached_sync(
 /*=========================*/
@@ -3861,21 +3425,9 @@ ib_cursor_set_memcached_sync(
                 }
 
 		if (flag) {
-#ifdef HAVE_ATOMIC_BUILTINS
 			os_atomic_increment_lint(&table->memcached_sync_count, 1);
-#else
-		        dict_mutex_enter_for_mysql();
-                        ++table->memcached_sync_count;
-                        dict_mutex_exit_for_mysql();
-#endif
 		} else {
-#ifdef HAVE_ATOMIC_BUILTINS
 			os_atomic_decrement_lint(&table->memcached_sync_count, 1);
-#else
-		        dict_mutex_enter_for_mysql();
-                        --table->memcached_sync_count;
-                        dict_mutex_exit_for_mysql();
-#endif
 		        ut_a(table->memcached_sync_count >= 0);
 		}
 	} else {
diff --git a/storage/innobase/api/api0misc.cc b/storage/innobase/api/api0misc.cc
index a980d32c33f..3864e4b9a7f 100644
--- a/storage/innobase/api/api0misc.cc
+++ b/storage/innobase/api/api0misc.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2008, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2008, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -24,12 +24,7 @@ InnoDB Native API
 3/20/2011 Jimmy Yang extracted from Embedded InnoDB
 *******************************************************/
 
-#include <my_config.h>
-#include <errno.h>
-
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>
-#endif /* HAVE_UNISTD_H */
+#include "ha_prototypes.h"
 
 #include "api0misc.h"
 #include "trx0roll.h"
@@ -39,15 +34,10 @@ InnoDB Native API
 #include "pars0pars.h"
 #include "row0sel.h"
 #include "lock0lock.h"
-#include "ha_prototypes.h"
-#include <m_ctype.h>
-#include <mysys_err.h>
-#include <mysql/plugin.h>
 
 /*********************************************************************//**
 Sets a lock on a table.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
 dberr_t
 ib_trx_lock_table_with_retry(
 /*=========================*/
@@ -55,153 +45,92 @@ ib_trx_lock_table_with_retry(
 	dict_table_t*	table,		/*!< in: table to lock */
 	enum lock_mode	mode)		/*!< in: LOCK_X or LOCK_S */
 {
-	que_thr_t*	thr;
-	dberr_t		err;
-	mem_heap_t*	heap;
-	sel_node_t*	node;
-
-	heap = mem_heap_create(512);
-
 	trx->op_info = "setting table lock";
 
-	node = sel_node_create(heap);
-	thr = pars_complete_graph_for_exec(node, trx, heap);
-	thr->graph->state = QUE_FORK_ACTIVE;
-
-	/* We use the select query graph as the dummy graph needed
-	in the lock module call */
-
-	thr = que_fork_get_first_thr(static_cast<que_fork_t*>(
-		que_node_get_parent(thr)));
-	que_thr_move_to_run_state_for_mysql(thr, trx);
-
-run_again:
-	thr->run_node = thr;
-	thr->prev_node = thr->common.parent;
-
-	err = lock_table(0, table, mode, thr);
-
-	trx->error_state = err;
-
-	if (UNIV_LIKELY(err == DB_SUCCESS)) {
-		que_thr_stop_for_mysql_no_error(thr, trx);
-	} else {
-		que_thr_stop_for_mysql(thr);
-
-		if (err != DB_QUE_THR_SUSPENDED) {
-			ibool	was_lock_wait;
-
-			was_lock_wait = ib_handle_errors(&err, trx, thr, NULL);
-
-			if (was_lock_wait) {
-				goto run_again;
-			}
-		} else {
-			que_thr_t*	run_thr;
-			que_node_t*	parent;
-
-			parent = que_node_get_parent(thr);
-			run_thr = que_fork_start_command(
-				static_cast<que_fork_t*>(parent));
-
-			ut_a(run_thr == thr);
-
-			/* There was a lock wait but the thread was not
-			in a ready to run or running state. */
-			trx->error_state = DB_LOCK_WAIT;
-
-			goto run_again;
-		}
-	}
-
-	que_graph_free(thr->graph);
-	trx->op_info = "";
-
-	return(err);
+	return(lock_table_for_trx(table, trx, mode));
 }
 /****************************************************************//**
 Handles user errors and lock waits detected by the database engine.
 @return TRUE if it was a lock wait and we should continue running
 the query thread */
-UNIV_INTERN
 ibool
 ib_handle_errors(
 /*=============*/
-        dberr_t*	new_err,/*!< out: possible new error encountered in
-                                lock wait, or if no new error, the value
-                                of trx->error_state at the entry of this
-                                function */
-        trx_t*          trx,    /*!< in: transaction */
-        que_thr_t*      thr,    /*!< in: query thread */
-        trx_savept_t*   savept) /*!< in: savepoint or NULL */
+	dberr_t*	new_err,/*!< out: possible new error encountered in
+				lock wait, or if no new error, the value
+				of trx->error_state at the entry of this
+				function */
+	trx_t*		trx,    /*!< in: transaction */
+	que_thr_t*	thr,    /*!< in: query thread */
+	trx_savept_t*	savept) /*!< in: savepoint or NULL */
 {
-        dberr_t		err;
+	dberr_t		err;
 handle_new_error:
-        err = trx->error_state;
+	err = trx->error_state;
 
-        ut_a(err != DB_SUCCESS);
+	ut_a(err != DB_SUCCESS);
 
-        trx->error_state = DB_SUCCESS;
+	trx->error_state = DB_SUCCESS;
 
-        switch (err) {
-        case DB_LOCK_WAIT_TIMEOUT:
+	switch (err) {
+	case DB_LOCK_WAIT_TIMEOUT:
 		trx_rollback_for_mysql(trx);
 		break;
-                /* fall through */
-        case DB_DUPLICATE_KEY:
-        case DB_FOREIGN_DUPLICATE_KEY:
-        case DB_TOO_BIG_RECORD:
-        case DB_ROW_IS_REFERENCED:
-        case DB_NO_REFERENCED_ROW:
-        case DB_CANNOT_ADD_CONSTRAINT:
-        case DB_TOO_MANY_CONCURRENT_TRXS:
-        case DB_OUT_OF_FILE_SPACE:
-                if (savept) {
-                        /* Roll back the latest, possibly incomplete
-                        insertion or update */
+		/* fall through */
+	case DB_DUPLICATE_KEY:
+	case DB_FOREIGN_DUPLICATE_KEY:
+	case DB_TOO_BIG_RECORD:
+	case DB_ROW_IS_REFERENCED:
+	case DB_NO_REFERENCED_ROW:
+	case DB_CANNOT_ADD_CONSTRAINT:
+	case DB_TOO_MANY_CONCURRENT_TRXS:
+	case DB_OUT_OF_FILE_SPACE:
+		if (savept) {
+			/* Roll back the latest, possibly incomplete
+			insertion or update */
 
 			trx_rollback_to_savepoint(trx, savept);
-                }
-                break;
-        case DB_LOCK_WAIT:
+		}
+		break;
+	case DB_LOCK_WAIT:
 		lock_wait_suspend_thread(thr);
 
-                if (trx->error_state != DB_SUCCESS) {
-                        que_thr_stop_for_mysql(thr);
+		if (trx->error_state != DB_SUCCESS) {
+			que_thr_stop_for_mysql(thr);
 
-                        goto handle_new_error;
-                }
+			goto handle_new_error;
+		}
 
-                *new_err = err;
+		*new_err = err;
 
-                return(TRUE); /* Operation needs to be retried. */
+		return(TRUE); /* Operation needs to be retried. */
 
-        case DB_DEADLOCK:
-        case DB_LOCK_TABLE_FULL:
-                /* Roll back the whole transaction; this resolution was added
-                to version 3.23.43 */
+	case DB_DEADLOCK:
+	case DB_LOCK_TABLE_FULL:
+		/* Roll back the whole transaction; this resolution was added
+		to version 3.23.43 */
 
-                trx_rollback_for_mysql(trx);
-                break;
+		trx_rollback_for_mysql(trx);
+		break;
 
-        case DB_MUST_GET_MORE_FILE_SPACE:
+	case DB_MUST_GET_MORE_FILE_SPACE:
 
-                exit(1);
+		ut_error;
 
-        case DB_CORRUPTION:
+	case DB_CORRUPTION:
 	case DB_FOREIGN_EXCEED_MAX_CASCADE:
-                break;
-        default:
-                ut_error;
-        }
+		break;
+	default:
+		ut_error;
+	}
 
-        if (trx->error_state != DB_SUCCESS) {
-                *new_err = trx->error_state;
-        } else {
-                *new_err = err;
-        }
+	if (trx->error_state != DB_SUCCESS) {
+		*new_err = trx->error_state;
+	} else {
+		*new_err = err;
+	}
 
-        trx->error_state = DB_SUCCESS;
+	trx->error_state = DB_SUCCESS;
 
-        return(FALSE);
+	return(FALSE);
 }
diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc
index 33ca57c9654..255788229e4 100644
--- a/storage/innobase/btr/btr0btr.cc
+++ b/storage/innobase/btr/btr0btr.cc
@@ -2,7 +2,7 @@
 
 Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2012, Facebook Inc.
-Copyright (c) 2014, 2015, MariaDB Corporation
+Copyright (c) 2014, 2016, MariaDB Corporation
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -26,14 +26,16 @@ Created 6/2/1994 Heikki Tuuri
 *******************************************************/
 
 #include "btr0btr.h"
+#include "ha_prototypes.h"
 
 #ifdef UNIV_NONINL
 #include "btr0btr.ic"
 #endif
 
-#include "fsp0fsp.h"
+#include "fsp0sysspace.h"
 #include "page0page.h"
 #include "page0zip.h"
+#include "gis0rtree.h"
 
 #ifndef UNIV_HOTBACKUP
 #include "btr0cur.h"
@@ -45,13 +47,15 @@ Created 6/2/1994 Heikki Tuuri
 #include "ibuf0ibuf.h"
 #include "trx0trx.h"
 #include "srv0mon.h"
+#include "gis0geo.h"
+#include "ut0new.h"
+#include "dict0boot.h"
 
 /**************************************************************//**
 Checks if the page in the cursor can be merged with given page.
 If necessary, re-organize the merge_page.
-@return	TRUE if possible to merge. */
-UNIV_INTERN
-ibool
+@return	true if possible to merge. */
+bool
 btr_can_merge_with_page(
 /*====================*/
 	btr_cur_t*	cursor,		/*!< in: cursor on the page to merge */
@@ -63,581 +67,19 @@ btr_can_merge_with_page(
 
 /**************************************************************//**
 Report that an index page is corrupted. */
-UNIV_INTERN
 void
 btr_corruption_report(
 /*==================*/
 	const buf_block_t*	block,	/*!< in: corrupted block */
 	const dict_index_t*	index)	/*!< in: index tree */
 {
-	fprintf(stderr, "InnoDB: flag mismatch in space %u page %u"
-		" index %s of table %s\n",
-		(unsigned) buf_block_get_space(block),
-		(unsigned) buf_block_get_page_no(block),
-		index->name, index->table_name);
-	if (block->page.zip.data) {
-		buf_page_print(block->page.zip.data,
-			       buf_block_get_zip_size(block),
-			       BUF_PAGE_PRINT_NO_CRASH);
-	}
-	buf_page_print(buf_block_get_frame(block), 0, 0);
+	ib::error()
+		<< "Flag mismatch in page " << block->page.id
+		<< " index " << index->name
+		<< " of table " << index->table->name;
 }
 
 #ifndef UNIV_HOTBACKUP
-#ifdef UNIV_BLOB_DEBUG
-# include "srv0srv.h"
-# include "ut0rbt.h"
-
-/** TRUE when messages about index->blobs modification are enabled. */
-static ibool btr_blob_dbg_msg;
-
-/** Issue a message about an operation on index->blobs.
-@param op	operation
-@param b	the entry being subjected to the operation
-@param ctx	the context of the operation */
-#define btr_blob_dbg_msg_issue(op, b, ctx)			\
-	fprintf(stderr, op " %u:%u:%u->%u %s(%u,%u,%u)\n",	\
-		(b)->ref_page_no, (b)->ref_heap_no,		\
-		(b)->ref_field_no, (b)->blob_page_no, ctx,	\
-		(b)->owner, (b)->always_owner, (b)->del)
-
-/** Insert to index->blobs a reference to an off-page column.
-@param index	the index tree
-@param b	the reference
-@param ctx	context (for logging) */
-UNIV_INTERN
-void
-btr_blob_dbg_rbt_insert(
-/*====================*/
-	dict_index_t*		index,	/*!< in/out: index tree */
-	const btr_blob_dbg_t*	b,	/*!< in: the reference */
-	const char*		ctx)	/*!< in: context (for logging) */
-{
-	if (btr_blob_dbg_msg) {
-		btr_blob_dbg_msg_issue("insert", b, ctx);
-	}
-	mutex_enter(&index->blobs_mutex);
-	rbt_insert(index->blobs, b, b);
-	mutex_exit(&index->blobs_mutex);
-}
-
-/** Remove from index->blobs a reference to an off-page column.
-@param index	the index tree
-@param b	the reference
-@param ctx	context (for logging) */
-UNIV_INTERN
-void
-btr_blob_dbg_rbt_delete(
-/*====================*/
-	dict_index_t*		index,	/*!< in/out: index tree */
-	const btr_blob_dbg_t*	b,	/*!< in: the reference */
-	const char*		ctx)	/*!< in: context (for logging) */
-{
-	if (btr_blob_dbg_msg) {
-		btr_blob_dbg_msg_issue("delete", b, ctx);
-	}
-	mutex_enter(&index->blobs_mutex);
-	ut_a(rbt_delete(index->blobs, b));
-	mutex_exit(&index->blobs_mutex);
-}
-
-/**************************************************************//**
-Comparator for items (btr_blob_dbg_t) in index->blobs.
-The key in index->blobs is (ref_page_no, ref_heap_no, ref_field_no).
-@return negative, 0 or positive if *a<*b, *a=*b, *a>*b */
-static
-int
-btr_blob_dbg_cmp(
-/*=============*/
-	const void*	a,	/*!< in: first btr_blob_dbg_t to compare */
-	const void*	b)	/*!< in: second btr_blob_dbg_t to compare */
-{
-	const btr_blob_dbg_t*	aa = static_cast<const btr_blob_dbg_t*>(a);
-	const btr_blob_dbg_t*	bb = static_cast<const btr_blob_dbg_t*>(b);
-
-	ut_ad(aa != NULL);
-	ut_ad(bb != NULL);
-
-	if (aa->ref_page_no != bb->ref_page_no) {
-		return(aa->ref_page_no < bb->ref_page_no ? -1 : 1);
-	}
-	if (aa->ref_heap_no != bb->ref_heap_no) {
-		return(aa->ref_heap_no < bb->ref_heap_no ? -1 : 1);
-	}
-	if (aa->ref_field_no != bb->ref_field_no) {
-		return(aa->ref_field_no < bb->ref_field_no ? -1 : 1);
-	}
-	return(0);
-}
-
-/**************************************************************//**
-Add a reference to an off-page column to the index->blobs map. */
-UNIV_INTERN
-void
-btr_blob_dbg_add_blob(
-/*==================*/
-	const rec_t*	rec,		/*!< in: clustered index record */
-	ulint		field_no,	/*!< in: off-page column number */
-	ulint		page_no,	/*!< in: start page of the column */
-	dict_index_t*	index,		/*!< in/out: index tree */
-	const char*	ctx)		/*!< in: context (for logging) */
-{
-	btr_blob_dbg_t	b;
-	const page_t*	page	= page_align(rec);
-
-	ut_a(index->blobs);
-
-	b.blob_page_no = page_no;
-	b.ref_page_no = page_get_page_no(page);
-	b.ref_heap_no = page_rec_get_heap_no(rec);
-	b.ref_field_no = field_no;
-	ut_a(b.ref_field_no >= index->n_uniq);
-	b.always_owner = b.owner = TRUE;
-	b.del = FALSE;
-	ut_a(!rec_get_deleted_flag(rec, page_is_comp(page)));
-	btr_blob_dbg_rbt_insert(index, &b, ctx);
-}
-
-/**************************************************************//**
-Add to index->blobs any references to off-page columns from a record.
-@return number of references added */
-UNIV_INTERN
-ulint
-btr_blob_dbg_add_rec(
-/*=================*/
-	const rec_t*	rec,	/*!< in: record */
-	dict_index_t*	index,	/*!< in/out: index */
-	const ulint*	offsets,/*!< in: offsets */
-	const char*	ctx)	/*!< in: context (for logging) */
-{
-	ulint		count	= 0;
-	ulint		i;
-	btr_blob_dbg_t	b;
-	ibool		del;
-
-	ut_ad(rec_offs_validate(rec, index, offsets));
-
-	if (!rec_offs_any_extern(offsets)) {
-		return(0);
-	}
-
-	b.ref_page_no = page_get_page_no(page_align(rec));
-	b.ref_heap_no = page_rec_get_heap_no(rec);
-	del = (rec_get_deleted_flag(rec, rec_offs_comp(offsets)) != 0);
-
-	for (i = 0; i < rec_offs_n_fields(offsets); i++) {
-		if (rec_offs_nth_extern(offsets, i)) {
-			ulint		len;
-			const byte*	field_ref = rec_get_nth_field(
-				rec, offsets, i, &len);
-
-			ut_a(len != UNIV_SQL_NULL);
-			ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
-			field_ref += len - BTR_EXTERN_FIELD_REF_SIZE;
-
-			if (!memcmp(field_ref, field_ref_zero,
-				    BTR_EXTERN_FIELD_REF_SIZE)) {
-				/* the column has not been stored yet */
-				continue;
-			}
-
-			b.ref_field_no = i;
-			b.blob_page_no = mach_read_from_4(
-				field_ref + BTR_EXTERN_PAGE_NO);
-			ut_a(b.ref_field_no >= index->n_uniq);
-			b.always_owner = b.owner
-				= !(field_ref[BTR_EXTERN_LEN]
-				    & BTR_EXTERN_OWNER_FLAG);
-			b.del = del;
-
-			btr_blob_dbg_rbt_insert(index, &b, ctx);
-			count++;
-		}
-	}
-
-	return(count);
-}
-
-/**************************************************************//**
-Display the references to off-page columns.
-This function is to be called from a debugger,
-for example when a breakpoint on ut_dbg_assertion_failed is hit. */
-UNIV_INTERN
-void
-btr_blob_dbg_print(
-/*===============*/
-	const dict_index_t*	index)	/*!< in: index tree */
-{
-	const ib_rbt_node_t*	node;
-
-	if (!index->blobs) {
-		return;
-	}
-
-	/* We intentionally do not acquire index->blobs_mutex here.
-	This function is to be called from a debugger, and the caller
-	should make sure that the index->blobs_mutex is held. */
-
-	for (node = rbt_first(index->blobs);
-	     node != NULL; node = rbt_next(index->blobs, node)) {
-		const btr_blob_dbg_t*	b
-			= rbt_value(btr_blob_dbg_t, node);
-		fprintf(stderr, "%u:%u:%u->%u%s%s%s\n",
-			b->ref_page_no, b->ref_heap_no, b->ref_field_no,
-			b->blob_page_no,
-			b->owner ? "" : "(disowned)",
-			b->always_owner ? "" : "(has disowned)",
-			b->del ? "(deleted)" : "");
-	}
-}
-
-/**************************************************************//**
-Remove from index->blobs any references to off-page columns from a record.
-@return number of references removed */
-UNIV_INTERN
-ulint
-btr_blob_dbg_remove_rec(
-/*====================*/
-	const rec_t*	rec,	/*!< in: record */
-	dict_index_t*	index,	/*!< in/out: index */
-	const ulint*	offsets,/*!< in: offsets */
-	const char*	ctx)	/*!< in: context (for logging) */
-{
-	ulint		i;
-	ulint		count	= 0;
-	btr_blob_dbg_t	b;
-
-	ut_ad(rec_offs_validate(rec, index, offsets));
-
-	if (!rec_offs_any_extern(offsets)) {
-		return(0);
-	}
-
-	b.ref_page_no = page_get_page_no(page_align(rec));
-	b.ref_heap_no = page_rec_get_heap_no(rec);
-
-	for (i = 0; i < rec_offs_n_fields(offsets); i++) {
-		if (rec_offs_nth_extern(offsets, i)) {
-			ulint		len;
-			const byte*	field_ref = rec_get_nth_field(
-				rec, offsets, i, &len);
-
-			ut_a(len != UNIV_SQL_NULL);
-			ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
-			field_ref += len - BTR_EXTERN_FIELD_REF_SIZE;
-
-			b.ref_field_no = i;
-			b.blob_page_no = mach_read_from_4(
-				field_ref + BTR_EXTERN_PAGE_NO);
-
-			switch (b.blob_page_no) {
-			case 0:
-				/* The column has not been stored yet.
-				The BLOB pointer must be all zero.
-				There cannot be a BLOB starting at
-				page 0, because page 0 is reserved for
-				the tablespace header. */
-				ut_a(!memcmp(field_ref, field_ref_zero,
-					     BTR_EXTERN_FIELD_REF_SIZE));
-				/* fall through */
-			case FIL_NULL:
-				/* the column has been freed already */
-				continue;
-			}
-
-			btr_blob_dbg_rbt_delete(index, &b, ctx);
-			count++;
-		}
-	}
-
-	return(count);
-}
-
-/**************************************************************//**
-Check that there are no references to off-page columns from or to
-the given page. Invoked when freeing or clearing a page.
-@return TRUE when no orphan references exist */
-UNIV_INTERN
-ibool
-btr_blob_dbg_is_empty(
-/*==================*/
-	dict_index_t*	index,		/*!< in: index */
-	ulint		page_no)	/*!< in: page number */
-{
-	const ib_rbt_node_t*	node;
-	ibool			success	= TRUE;
-
-	if (!index->blobs) {
-		return(success);
-	}
-
-	mutex_enter(&index->blobs_mutex);
-
-	for (node = rbt_first(index->blobs);
-	     node != NULL; node = rbt_next(index->blobs, node)) {
-		const btr_blob_dbg_t*	b
-			= rbt_value(btr_blob_dbg_t, node);
-
-		if (b->ref_page_no != page_no && b->blob_page_no != page_no) {
-			continue;
-		}
-
-		fprintf(stderr,
-			"InnoDB: orphan BLOB ref%s%s%s %u:%u:%u->%u\n",
-			b->owner ? "" : "(disowned)",
-			b->always_owner ? "" : "(has disowned)",
-			b->del ? "(deleted)" : "",
-			b->ref_page_no, b->ref_heap_no, b->ref_field_no,
-			b->blob_page_no);
-
-		if (b->blob_page_no != page_no || b->owner || !b->del) {
-			success = FALSE;
-		}
-	}
-
-	mutex_exit(&index->blobs_mutex);
-	return(success);
-}
-
-/**************************************************************//**
-Count and process all references to off-page columns on a page.
-@return number of references processed */
-UNIV_INTERN
-ulint
-btr_blob_dbg_op(
-/*============*/
-	const page_t*		page,	/*!< in: B-tree leaf page */
-	const rec_t*		rec,	/*!< in: record to start from
-					(NULL to process the whole page) */
-	dict_index_t*		index,	/*!< in/out: index */
-	const char*		ctx,	/*!< in: context (for logging) */
-	const btr_blob_dbg_op_f	op)	/*!< in: operation on records */
-{
-	ulint		count	= 0;
-	mem_heap_t*	heap	= NULL;
-	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
-	ulint*		offsets	= offsets_;
-	rec_offs_init(offsets_);
-
-	ut_a(fil_page_get_type(page) == FIL_PAGE_INDEX);
-	ut_a(!rec || page_align(rec) == page);
-
-	if (!index->blobs || !page_is_leaf(page)
-	    || !dict_index_is_clust(index)) {
-		return(0);
-	}
-
-	if (rec == NULL) {
-		rec = page_get_infimum_rec(page);
-	}
-
-	do {
-		offsets = rec_get_offsets(rec, index, offsets,
-					  ULINT_UNDEFINED, &heap);
-		count += op(rec, index, offsets, ctx);
-		rec = page_rec_get_next_const(rec);
-	} while (!page_rec_is_supremum(rec));
-
-	if (heap) {
-		mem_heap_free(heap);
-	}
-
-	return(count);
-}
-
-/**************************************************************//**
-Count and add to index->blobs any references to off-page columns
-from records on a page.
-@return number of references added */
-UNIV_INTERN
-ulint
-btr_blob_dbg_add(
-/*=============*/
-	const page_t*	page,	/*!< in: rewritten page */
-	dict_index_t*	index,	/*!< in/out: index */
-	const char*	ctx)	/*!< in: context (for logging) */
-{
-	btr_blob_dbg_assert_empty(index, page_get_page_no(page));
-
-	return(btr_blob_dbg_op(page, NULL, index, ctx, btr_blob_dbg_add_rec));
-}
-
-/**************************************************************//**
-Count and remove from index->blobs any references to off-page columns
-from records on a page.
-Used when reorganizing a page, before copying the records.
-@return number of references removed */
-UNIV_INTERN
-ulint
-btr_blob_dbg_remove(
-/*================*/
-	const page_t*	page,	/*!< in: b-tree page */
-	dict_index_t*	index,	/*!< in/out: index */
-	const char*	ctx)	/*!< in: context (for logging) */
-{
-	ulint	count;
-
-	count = btr_blob_dbg_op(page, NULL, index, ctx,
-				btr_blob_dbg_remove_rec);
-
-	/* Check that no references exist. */
-	btr_blob_dbg_assert_empty(index, page_get_page_no(page));
-
-	return(count);
-}
-
-/**************************************************************//**
-Restore in index->blobs any references to off-page columns
-Used when page reorganize fails due to compressed page overflow. */
-UNIV_INTERN
-void
-btr_blob_dbg_restore(
-/*=================*/
-	const page_t*	npage,	/*!< in: page that failed to compress  */
-	const page_t*	page,	/*!< in: copy of original page */
-	dict_index_t*	index,	/*!< in/out: index */
-	const char*	ctx)	/*!< in: context (for logging) */
-{
-	ulint	removed;
-	ulint	added;
-
-	ut_a(page_get_page_no(npage) == page_get_page_no(page));
-	ut_a(page_get_space_id(npage) == page_get_space_id(page));
-
-	removed = btr_blob_dbg_remove(npage, index, ctx);
-	added = btr_blob_dbg_add(page, index, ctx);
-	ut_a(added == removed);
-}
-
-/**************************************************************//**
-Modify the 'deleted' flag of a record. */
-UNIV_INTERN
-void
-btr_blob_dbg_set_deleted_flag(
-/*==========================*/
-	const rec_t*		rec,	/*!< in: record */
-	dict_index_t*		index,	/*!< in/out: index */
-	const ulint*		offsets,/*!< in: rec_get_offs(rec, index) */
-	ibool			del)	/*!< in: TRUE=deleted, FALSE=exists */
-{
-	const ib_rbt_node_t*	node;
-	btr_blob_dbg_t		b;
-	btr_blob_dbg_t*		c;
-	ulint			i;
-
-	ut_ad(rec_offs_validate(rec, index, offsets));
-	ut_a(dict_index_is_clust(index));
-	ut_a(del == !!del);/* must be FALSE==0 or TRUE==1 */
-
-	if (!rec_offs_any_extern(offsets) || !index->blobs) {
-
-		return;
-	}
-
-	b.ref_page_no = page_get_page_no(page_align(rec));
-	b.ref_heap_no = page_rec_get_heap_no(rec);
-
-	for (i = 0; i < rec_offs_n_fields(offsets); i++) {
-		if (rec_offs_nth_extern(offsets, i)) {
-			ulint		len;
-			const byte*	field_ref = rec_get_nth_field(
-				rec, offsets, i, &len);
-
-			ut_a(len != UNIV_SQL_NULL);
-			ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
-			field_ref += len - BTR_EXTERN_FIELD_REF_SIZE;
-
-			b.ref_field_no = i;
-			b.blob_page_no = mach_read_from_4(
-				field_ref + BTR_EXTERN_PAGE_NO);
-
-			switch (b.blob_page_no) {
-			case 0:
-				ut_a(memcmp(field_ref, field_ref_zero,
-					    BTR_EXTERN_FIELD_REF_SIZE));
-				/* page number 0 is for the
-				page allocation bitmap */
-			case FIL_NULL:
-				/* the column has been freed already */
-				ut_error;
-			}
-
-			mutex_enter(&index->blobs_mutex);
-			node = rbt_lookup(index->blobs, &b);
-			ut_a(node);
-
-			c = rbt_value(btr_blob_dbg_t, node);
-			/* The flag should be modified. */
-			c->del = del;
-			if (btr_blob_dbg_msg) {
-				b = *c;
-				mutex_exit(&index->blobs_mutex);
-				btr_blob_dbg_msg_issue("del_mk", &b, "");
-			} else {
-				mutex_exit(&index->blobs_mutex);
-			}
-		}
-	}
-}
-
-/**************************************************************//**
-Change the ownership of an off-page column. */
-UNIV_INTERN
-void
-btr_blob_dbg_owner(
-/*===============*/
-	const rec_t*		rec,	/*!< in: record */
-	dict_index_t*		index,	/*!< in/out: index */
-	const ulint*		offsets,/*!< in: rec_get_offs(rec, index) */
-	ulint			i,	/*!< in: ith field in rec */
-	ibool			own)	/*!< in: TRUE=owned, FALSE=disowned */
-{
-	const ib_rbt_node_t*	node;
-	btr_blob_dbg_t		b;
-	const byte*		field_ref;
-	ulint			len;
-
-	ut_ad(rec_offs_validate(rec, index, offsets));
-	ut_a(rec_offs_nth_extern(offsets, i));
-
-	field_ref = rec_get_nth_field(rec, offsets, i, &len);
-	ut_a(len != UNIV_SQL_NULL);
-	ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
-	field_ref += len - BTR_EXTERN_FIELD_REF_SIZE;
-
-	b.ref_page_no = page_get_page_no(page_align(rec));
-	b.ref_heap_no = page_rec_get_heap_no(rec);
-	b.ref_field_no = i;
-	b.owner = !(field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_OWNER_FLAG);
-	b.blob_page_no = mach_read_from_4(field_ref + BTR_EXTERN_PAGE_NO);
-
-	ut_a(b.owner == own);
-
-	mutex_enter(&index->blobs_mutex);
-	node = rbt_lookup(index->blobs, &b);
-	/* row_ins_clust_index_entry_by_modify() invokes
-	btr_cur_unmark_extern_fields() also for the newly inserted
-	references, which are all zero bytes until the columns are stored.
-	The node lookup must fail if and only if that is the case. */
-	ut_a(!memcmp(field_ref, field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE)
-	     == !node);
-
-	if (node) {
-		btr_blob_dbg_t*	c = rbt_value(btr_blob_dbg_t, node);
-		/* Some code sets ownership from TRUE to TRUE.
-		We do not allow changing ownership from FALSE to FALSE. */
-		ut_a(own || c->owner);
-
-		c->owner = own;
-		if (!own) {
-			c->always_owner = FALSE;
-		}
-	}
-
-	mutex_exit(&index->blobs_mutex);
-}
-#endif /* UNIV_BLOB_DEBUG */
-
 /*
 Latching strategy of the InnoDB B-tree
 --------------------------------------
@@ -696,7 +138,7 @@ we allocate pages for the non-leaf levels of the tree.
 #ifdef UNIV_BTR_DEBUG
 /**************************************************************//**
 Checks a file segment header within a B-tree root page.
-@return	TRUE if valid */
+@return TRUE if valid */
 static
 ibool
 btr_root_fseg_validate(
@@ -715,8 +157,7 @@ btr_root_fseg_validate(
 
 /**************************************************************//**
 Gets the root node of a tree and x- or s-latches it.
-@return	root page, x- or s-latched */
-static
+@return root page, x- or s-latched */
 buf_block_t*
 btr_root_block_get(
 /*===============*/
@@ -725,16 +166,13 @@ btr_root_block_get(
 					or RW_X_LATCH */
 	mtr_t*			mtr)	/*!< in: mtr */
 {
-	ulint		space;
-	ulint		zip_size;
-	ulint		root_page_no;
-	buf_block_t*	block;
+	const ulint		space = dict_index_get_space(index);
+	const page_id_t		page_id(space, dict_index_get_page(index));
+	const page_size_t	page_size(dict_table_page_size(index->table));
 
-	space = dict_index_get_space(index);
-	zip_size = dict_table_zip_size(index->table);
-	root_page_no = dict_index_get_page(index);
+	buf_block_t*	block = btr_block_get(page_id, page_size, mode,
+					      index, mtr);
 
-	block = btr_block_get(space, zip_size, root_page_no, mode, (dict_index_t*)index, mtr);
 
 	if (!block) {
 		index->table->is_encrypted = TRUE;
@@ -765,17 +203,19 @@ btr_root_block_get(
 }
 
 /**************************************************************//**
-Gets the root node of a tree and x-latches it.
-@return	root page, x-latched */
-UNIV_INTERN
+Gets the root node of a tree and sx-latches it for segment access.
+@return root page, sx-latched */
 page_t*
 btr_root_get(
 /*=========*/
 	const dict_index_t*	index,	/*!< in: index tree */
 	mtr_t*			mtr)	/*!< in: mtr */
 {
-	buf_block_t* root = btr_root_block_get(index, RW_X_LATCH,
-			mtr);
+	/* Intended to be used for segment list access.
+	SX lock doesn't block reading user data by other threads.
+	And block the segment list access by others.*/
+	buf_block_t* root = btr_root_block_get(index, RW_SX_LATCH,
+					       mtr);
 
 	if (root && root->page.encrypted == true) {
 		root = NULL;
@@ -788,8 +228,7 @@ btr_root_get(
 Gets the height of the B-tree (the level of the root, when the leaf
 level is assumed to be 0). The caller must hold an S or X latch on
 the index.
-@return	tree height (level of the root) */
-UNIV_INTERN
+@return tree height (level of the root) */
 ulint
 btr_height_get(
 /*===========*/
@@ -799,22 +238,23 @@ btr_height_get(
 	ulint		height=0;
 	buf_block_t*	root_block;
 
-	ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
-				MTR_MEMO_S_LOCK)
-	      || mtr_memo_contains(mtr, dict_index_get_lock(index),
-				MTR_MEMO_X_LOCK));
+	ut_ad(srv_read_only_mode
+	      || mtr_memo_contains_flagged(mtr, dict_index_get_lock(index),
+					   MTR_MEMO_S_LOCK
+					   | MTR_MEMO_X_LOCK
+					   | MTR_MEMO_SX_LOCK)
+	      || dict_table_is_intrinsic(index->table));
 
-        /* S latches the page */
-        root_block = btr_root_block_get(index, RW_S_LATCH, mtr);
+	/* S latches the page */
+	root_block = btr_root_block_get(index, RW_S_LATCH, mtr);
 
 	if (root_block) {
 		height = btr_page_get_level(buf_block_get_frame(root_block), mtr);
 
 		/* Release the S latch on the root page. */
-		mtr_memo_release(mtr, root_block, MTR_MEMO_PAGE_S_FIX);
-#ifdef UNIV_SYNC_DEBUG
-		sync_thread_reset_level(&root_block->lock);
-#endif /* UNIV_SYNC_DEBUG */
+		mtr->memo_release(root_block, MTR_MEMO_PAGE_S_FIX);
+
+		ut_d(sync_check_unlock(&root_block->lock));
 	}
 
 	return(height);
@@ -823,7 +263,7 @@ btr_height_get(
 /**************************************************************//**
 Checks a file segment header within a B-tree root page and updates
 the segment header space id.
-@return	TRUE if valid */
+@return TRUE if valid */
 static
 bool
 btr_root_fseg_adjust_on_import(
@@ -856,41 +296,34 @@ btr_root_fseg_adjust_on_import(
 /**************************************************************//**
 Checks and adjusts the root node of a tree during IMPORT TABLESPACE.
 @return error code, or DB_SUCCESS */
-UNIV_INTERN
 dberr_t
 btr_root_adjust_on_import(
 /*======================*/
 	const dict_index_t*	index)	/*!< in: index tree */
 {
-	dberr_t		err;
-	mtr_t		mtr;
-	page_t*		page;
-	buf_block_t*	block;
-	page_zip_des_t*	page_zip;
-	dict_table_t*	table		= index->table;
-	ulint		space_id	= dict_index_get_space(index);
-	ulint		zip_size	= dict_table_zip_size(table);
-	ulint		root_page_no	= dict_index_get_page(index);
+	dberr_t			err;
+	mtr_t			mtr;
+	page_t*			page;
+	buf_block_t*		block;
+	page_zip_des_t*		page_zip;
+	dict_table_t*		table = index->table;
+	const ulint		space_id = dict_index_get_space(index);
+	const page_id_t		page_id(space_id, dict_index_get_page(index));
+	const page_size_t	page_size(dict_table_page_size(table));
+
+	DBUG_EXECUTE_IF("ib_import_trigger_corruption_3",
+			return(DB_CORRUPTION););
 
 	mtr_start(&mtr);
 
 	mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
 
-	DBUG_EXECUTE_IF("ib_import_trigger_corruption_3",
-			return(DB_CORRUPTION););
-
-	block = btr_block_get(
-		space_id, zip_size, root_page_no, RW_X_LATCH, (dict_index_t*)index, &mtr);
+	block = btr_block_get(page_id, page_size, RW_X_LATCH, index, &mtr);
 
 	page = buf_block_get_frame(block);
 	page_zip = buf_block_get_page_zip(block);
 
-	/* Check that this is a B-tree page and both the PREV and NEXT
-	pointers are FIL_NULL, because the root page does not have any
-	siblings. */
-	if (fil_page_get_type(page) != FIL_PAGE_INDEX
-	    || fil_page_get_prev(page) != FIL_NULL
-	    || fil_page_get_next(page) != FIL_NULL) {
+	if (!page_is_root(page)) {
 
 		err = DB_CORRUPTION;
 
@@ -903,18 +336,15 @@ btr_root_adjust_on_import(
 		if (page_is_compact_format != dict_table_is_comp(table)) {
 			err = DB_CORRUPTION;
 		} else {
-
 			/* Check that the table flags and the tablespace
 			flags match. */
-			ulint	flags = fil_space_get_flags(table->space);
-
-			if (flags
-			    && flags != dict_tf_to_fsp_flags(table->flags)) {
-
-				err = DB_CORRUPTION;
-			} else {
-				err = DB_SUCCESS;
-			}
+			ulint	flags = dict_tf_to_fsp_flags(
+				table->flags,
+				false,
+				dict_table_is_encrypted(table));
+			ulint	fsp_flags = fil_space_get_flags(table->space);
+			err = fsp_flags_are_equal(flags, fsp_flags)
+			      ? DB_SUCCESS : DB_CORRUPTION;
 		}
 	} else {
 		err = DB_SUCCESS;
@@ -937,124 +367,9 @@ btr_root_adjust_on_import(
 	return(err);
 }
 
-/*************************************************************//**
-Gets pointer to the previous user record in the tree. It is assumed that
-the caller has appropriate latches on the page and its neighbor.
-@return	previous user record, NULL if there is none */
-UNIV_INTERN
-rec_t*
-btr_get_prev_user_rec(
-/*==================*/
-	rec_t*	rec,	/*!< in: record on leaf level */
-	mtr_t*	mtr)	/*!< in: mtr holding a latch on the page, and if
-			needed, also to the previous page */
-{
-	page_t*	page;
-	page_t*	prev_page;
-	ulint	prev_page_no;
-
-	if (!page_rec_is_infimum(rec)) {
-
-		rec_t*	prev_rec = page_rec_get_prev(rec);
-
-		if (!page_rec_is_infimum(prev_rec)) {
-
-			return(prev_rec);
-		}
-	}
-
-	page = page_align(rec);
-	prev_page_no = btr_page_get_prev(page, mtr);
-
-	if (prev_page_no != FIL_NULL) {
-
-		ulint		space;
-		ulint		zip_size;
-		buf_block_t*	prev_block;
-
-		space = page_get_space_id(page);
-		zip_size = fil_space_get_zip_size(space);
-
-		prev_block = buf_page_get_with_no_latch(space, zip_size,
-							prev_page_no, mtr);
-		prev_page = buf_block_get_frame(prev_block);
-		/* The caller must already have a latch to the brother */
-		ut_ad(mtr_memo_contains(mtr, prev_block,
-					MTR_MEMO_PAGE_S_FIX)
-		      || mtr_memo_contains(mtr, prev_block,
-					   MTR_MEMO_PAGE_X_FIX));
-#ifdef UNIV_BTR_DEBUG
-		ut_a(page_is_comp(prev_page) == page_is_comp(page));
-		ut_a(btr_page_get_next(prev_page, mtr)
-		     == page_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
-
-		return(page_rec_get_prev(page_get_supremum_rec(prev_page)));
-	}
-
-	return(NULL);
-}
-
-/*************************************************************//**
-Gets pointer to the next user record in the tree. It is assumed that the
-caller has appropriate latches on the page and its neighbor.
-@return	next user record, NULL if there is none */
-UNIV_INTERN
-rec_t*
-btr_get_next_user_rec(
-/*==================*/
-	rec_t*	rec,	/*!< in: record on leaf level */
-	mtr_t*	mtr)	/*!< in: mtr holding a latch on the page, and if
-			needed, also to the next page */
-{
-	page_t*	page;
-	page_t*	next_page;
-	ulint	next_page_no;
-
-	if (!page_rec_is_supremum(rec)) {
-
-		rec_t*	next_rec = page_rec_get_next(rec);
-
-		if (!page_rec_is_supremum(next_rec)) {
-
-			return(next_rec);
-		}
-	}
-
-	page = page_align(rec);
-	next_page_no = btr_page_get_next(page, mtr);
-
-	if (next_page_no != FIL_NULL) {
-		ulint		space;
-		ulint		zip_size;
-		buf_block_t*	next_block;
-
-		space = page_get_space_id(page);
-		zip_size = fil_space_get_zip_size(space);
-
-		next_block = buf_page_get_with_no_latch(space, zip_size,
-							next_page_no, mtr);
-		next_page = buf_block_get_frame(next_block);
-		/* The caller must already have a latch to the brother */
-		ut_ad(mtr_memo_contains(mtr, next_block, MTR_MEMO_PAGE_S_FIX)
-		      || mtr_memo_contains(mtr, next_block,
-					   MTR_MEMO_PAGE_X_FIX));
-#ifdef UNIV_BTR_DEBUG
-		ut_a(page_is_comp(next_page) == page_is_comp(page));
-		ut_a(btr_page_get_prev(next_page, mtr)
-		     == page_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
-
-		return(page_rec_get_next(page_get_infimum_rec(next_page)));
-	}
-
-	return(NULL);
-}
-
 /**************************************************************//**
 Creates a new index page (not the root, and also not
 used in page reorganization).  @see btr_page_empty(). */
-static
 void
 btr_page_create(
 /*============*/
@@ -1066,18 +381,21 @@ btr_page_create(
 {
 	page_t*		page = buf_block_get_frame(block);
 
-	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
-	btr_blob_dbg_assert_empty(index, buf_block_get_page_no(block));
+	ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
 
 	if (page_zip) {
-		page_create_zip(block, index, level, 0, mtr);
+		page_create_zip(block, index, level, 0, NULL, mtr);
 	} else {
-		page_create(block, mtr, dict_table_is_comp(index->table));
+		page_create(block, mtr, dict_table_is_comp(index->table),
+			    dict_index_is_spatial(index));
 		/* Set the level of the new index page */
 		btr_page_set_level(page, NULL, level, mtr);
 	}
 
-	block->check_index_page_at_flush = TRUE;
+	/* For Spatial Index, initialize the Split Sequence Number */
+	if (dict_index_is_spatial(index)) {
+		page_set_ssn_id(block, page_zip, 0, mtr);
+	}
 
 	btr_page_set_index_id(page, page_zip, index->id, mtr);
 }
@@ -1085,7 +403,7 @@ btr_page_create(
 /**************************************************************//**
 Allocates a new file page to be used in an ibuf tree. Takes the page from
 the free list of the tree, which must contain pages!
-@return	new allocated block, x-latched */
+@return new allocated block, x-latched */
 static
 buf_block_t*
 btr_page_alloc_for_ibuf(
@@ -1104,9 +422,11 @@ btr_page_alloc_for_ibuf(
 				   + PAGE_BTR_IBUF_FREE_LIST, mtr);
 	ut_a(node_addr.page != FIL_NULL);
 
-	new_block = buf_page_get(dict_index_get_space(index),
-				 dict_table_zip_size(index->table),
-				 node_addr.page, RW_X_LATCH, mtr);
+	new_block = buf_page_get(
+		page_id_t(dict_index_get_space(index), node_addr.page),
+		dict_table_page_size(index->table),
+		RW_X_LATCH, mtr);
+
 	new_page = buf_block_get_frame(new_block);
 	buf_block_dbg_add_level(new_block, SYNC_IBUF_TREE_NODE_NEW);
 
@@ -1190,7 +510,6 @@ that the caller has made the reservation for free extents!
 @retval block, rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded
 (init_mtr == mtr, or the page was not previously freed in mtr)
 @retval block (not allocated or initialized) otherwise */
-UNIV_INTERN
 buf_block_t*
 btr_page_alloc(
 /*===========*/
@@ -1225,8 +544,7 @@ btr_page_alloc(
 
 /**************************************************************//**
 Gets the number of pages in a B-tree.
-@return	number of pages, or ULINT_UNDEFINED if the index is unavailable */
-UNIV_INTERN
+@return number of pages, or ULINT_UNDEFINED if the index is unavailable */
 ulint
 btr_get_size(
 /*=========*/
@@ -1235,16 +553,46 @@ btr_get_size(
 	mtr_t*		mtr)	/*!< in/out: mini-transaction where index
 				is s-latched */
 {
-	ulint used;
-	if (flag == BTR_N_LEAF_PAGES) {
-		btr_get_size_and_reserved(index, flag, &used, mtr);
-		return used;
-	} else if (flag == BTR_TOTAL_SIZE) {
-		return btr_get_size_and_reserved(index, flag, &used, mtr);
-	} else {
-		ut_error;
+	fseg_header_t*	seg_header;
+	page_t*		root;
+	ulint		n=0;
+	ulint		dummy;
+
+	ut_ad(srv_read_only_mode
+	      || mtr_memo_contains(mtr, dict_index_get_lock(index),
+				   MTR_MEMO_S_LOCK)
+	      || dict_table_is_intrinsic(index->table));
+
+	if (index->page == FIL_NULL
+	    || dict_index_is_online_ddl(index)
+	    || !index->is_committed()) {
+		return(ULINT_UNDEFINED);
 	}
-	return (ULINT_UNDEFINED);
+
+	root = btr_root_get(index, mtr);
+
+	if (root) {
+		if (flag == BTR_N_LEAF_PAGES) {
+			seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
+
+			fseg_n_reserved_pages(seg_header, &n, mtr);
+
+		} else if (flag == BTR_TOTAL_SIZE) {
+			seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
+
+			n = fseg_n_reserved_pages(seg_header, &dummy, mtr);
+
+			seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
+
+			n += fseg_n_reserved_pages(seg_header, &dummy, mtr);
+		} else {
+			ut_error;
+		}
+	} else {
+		n = ULINT_UNDEFINED;
+	}
+
+	return(n);
 }
 
 /**************************************************************//**
@@ -1271,8 +619,9 @@ btr_get_size_and_reserved(
 
 	ut_a(flag == BTR_N_LEAF_PAGES || flag == BTR_TOTAL_SIZE);
 
-	if (index->page == FIL_NULL || dict_index_is_online_ddl(index)
-	    || *index->name == TEMP_INDEX_PREFIX) {
+	if (index->page == FIL_NULL
+	    || dict_index_is_online_ddl(index)
+	    || !index->is_committed()) {
 		return(ULINT_UNDEFINED);
 	}
 
@@ -1310,7 +659,7 @@ btr_page_free_for_ibuf(
 {
 	page_t*		root;
 
-	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
 	root = btr_root_get(index, mtr);
 
 	flst_add_first(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
@@ -1323,27 +672,24 @@ btr_page_free_for_ibuf(
 
 /**************************************************************//**
 Frees a file page used in an index tree. Can be used also to (BLOB)
-external storage pages, because the page level 0 can be given as an
-argument. */
-UNIV_INTERN
+external storage pages. */
 void
 btr_page_free_low(
 /*==============*/
 	dict_index_t*	index,	/*!< in: index tree */
 	buf_block_t*	block,	/*!< in: block to be freed, x-latched */
-	ulint		level,	/*!< in: page level */
+	ulint		level,	/*!< in: page level (ULINT_UNDEFINED=BLOB) */
 	bool		blob,   /*!< in: blob page */
 	mtr_t*		mtr)	/*!< in: mtr */
 {
 	fseg_header_t*	seg_header;
 	page_t*		root;
 
-	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
 	/* The page gets invalid for optimistic searches: increment the frame
 	modify clock */
 
 	buf_block_modify_clock_inc(block);
-	btr_blob_dbg_assert_empty(index, buf_block_get_page_no(block));
 
 	if (blob) {
 		ut_a(level == 0);
@@ -1424,12 +770,19 @@ btr_page_free_low(
 
 	root = btr_root_get(index, mtr);
 
-	if (level == 0) {
+	if (level == 0 || level == ULINT_UNDEFINED) {
 		seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
 	} else {
 		seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
 	}
 
+#ifdef UNIV_GIS_DEBUG
+	if (dict_index_is_spatial(index)) {
+		fprintf(stderr, "GIS_DIAG: Freed  %ld\n",
+			(long) block->page.id.page_no());
+	}
+#endif
+
 	if (scrub) {
 		/**
 		* Reset page type so that scrub thread won't try to scrub it
@@ -1439,13 +792,14 @@ btr_page_free_low(
 	}
 
 	fseg_free_page(seg_header,
-		       buf_block_get_space(block),
-		       buf_block_get_page_no(block), mtr);
+		       block->page.id.space(),
+		       block->page.id.page_no(),
+		       level != ULINT_UNDEFINED, mtr);
 
 	/* The page was marked free in the allocation bitmap, but it
 	should remain buffer-fixed until mtr_commit(mtr) or until it
 	is explicitly freed from the mini-transaction. */
-	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
 	/* TODO: Discard any operations on the page from the redo log
 	and remove the block from the flush list and the buffer pool.
 	This would free up buffer pool earlier and reduce writes to
@@ -1455,7 +809,6 @@ btr_page_free_low(
 /**************************************************************//**
 Frees a file page used in an index tree. NOTE: cannot free field external
 storage pages because the page must contain info on its level. */
-UNIV_INTERN
 void
 btr_page_free(
 /*==========*/
@@ -1466,7 +819,8 @@ btr_page_free(
 	const page_t*	page	= buf_block_get_frame(block);
 	ulint		level	= btr_page_get_level(page, mtr);
 
-	ut_ad(fil_page_get_type(block->frame) == FIL_PAGE_INDEX);
+	ut_ad(fil_page_index_page_check(block->frame));
+	ut_ad(level != ULINT_UNDEFINED);
 	btr_page_free_low(index, block, level, false, mtr);
 }
 
@@ -1506,8 +860,8 @@ btr_node_ptr_set_child_page_no(
 }
 
 /************************************************************//**
-Returns the child page of a node pointer and x-latches it.
-@return	child page, x-latched */
+Returns the child page of a node pointer and sx-latches it.
+@return child page, sx-latched */
 static
 buf_block_t*
 btr_node_ptr_get_child(
@@ -1517,21 +871,20 @@ btr_node_ptr_get_child(
 	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
 	mtr_t*		mtr)	/*!< in: mtr */
 {
-	ulint	page_no;
-	ulint	space;
-
 	ut_ad(rec_offs_validate(node_ptr, index, offsets));
-	space = page_get_space_id(page_align(node_ptr));
-	page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets);
 
-	return(btr_block_get(space, dict_table_zip_size(index->table),
-			     page_no, RW_X_LATCH, index, mtr));
+	const page_id_t	page_id(
+		page_get_space_id(page_align(node_ptr)),
+		btr_node_ptr_get_child_page_no(node_ptr, offsets));
+
+	return(btr_block_get(page_id, dict_table_page_size(index->table),
+			     RW_SX_LATCH, index, mtr));
 }
 
 /************************************************************//**
 Returns the upper level node pointer to a page. It is assumed that mtr holds
-an x-latch on the tree.
-@return	rec_get_offsets() of the node pointer record */
+an sx-latch on the tree.
+@return rec_get_offsets() of the node pointer record */
 static
 ulint*
 btr_page_get_father_node_ptr_func(
@@ -1541,6 +894,8 @@ btr_page_get_father_node_ptr_func(
 	btr_cur_t*	cursor,	/*!< in: cursor pointing to user record,
 				out: cursor on node pointer record,
 				its page x-latched */
+	ulint		latch_mode,/*!< in: BTR_CONT_MODIFY_TREE
+				or BTR_CONT_SEARCH_TREE */
 	const char*	file,	/*!< in: file name */
 	ulint		line,	/*!< in: line where called */
 	mtr_t*		mtr)	/*!< in: mtr */
@@ -1552,11 +907,18 @@ btr_page_get_father_node_ptr_func(
 	ulint		page_no;
 	dict_index_t*	index;
 
-	page_no = buf_block_get_page_no(btr_cur_get_block(cursor));
-	index = btr_cur_get_index(cursor);
+	ut_ad(latch_mode == BTR_CONT_MODIFY_TREE
+	      || latch_mode == BTR_CONT_SEARCH_TREE);
 
-	ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
-				MTR_MEMO_X_LOCK));
+	page_no = btr_cur_get_block(cursor)->page.id.page_no();
+	index = btr_cur_get_index(cursor);
+	ut_ad(!dict_index_is_spatial(index));
+
+	ut_ad(srv_read_only_mode
+	      || mtr_memo_contains_flagged(mtr, dict_index_get_lock(index),
+					   MTR_MEMO_X_LOCK
+					   | MTR_MEMO_SX_LOCK)
+	      || dict_table_is_intrinsic(index->table));
 
 	ut_ad(dict_index_get_page(index) != page_no);
 
@@ -1564,36 +926,47 @@ btr_page_get_father_node_ptr_func(
 
 	user_rec = btr_cur_get_rec(cursor);
 	ut_a(page_rec_is_user_rec(user_rec));
-	tuple = dict_index_build_node_ptr(index, user_rec, 0, heap, level);
 
-	btr_cur_search_to_nth_level(index, level + 1, tuple, PAGE_CUR_LE,
-				    BTR_CONT_MODIFY_TREE, cursor, 0,
-				    file, line, mtr);
+	tuple = dict_index_build_node_ptr(index, user_rec, 0, heap, level);
+	dberr_t err = DB_SUCCESS;
+	
+	if (dict_table_is_intrinsic(index->table)) {
+		err = btr_cur_search_to_nth_level_with_no_latch(
+			index, level + 1, tuple, PAGE_CUR_LE, cursor,
+			file, line, mtr);
+	} else {
+		err = btr_cur_search_to_nth_level(
+			index, level + 1, tuple,
+			PAGE_CUR_LE, latch_mode, cursor, 0,
+			file, line, mtr);
+	}
+
+	if (err != DB_SUCCESS) {
+		ib::warn() << " Error code: " << err
+			<< " btr_page_get_father_node_ptr_func "
+			<< " level: " << level + 1
+			<< " called from file: "
+			<< file << " line: " << line
+			<< " table: " << index->table->name
+			<< " index: " << index->name();
+	}
 
 	node_ptr = btr_cur_get_rec(cursor);
-	ut_ad(!page_rec_is_comp(node_ptr)
-	      || rec_get_status(node_ptr) == REC_STATUS_NODE_PTR);
+
 	offsets = rec_get_offsets(node_ptr, index, offsets,
 				  ULINT_UNDEFINED, &heap);
 
 	if (btr_node_ptr_get_child_page_no(node_ptr, offsets) != page_no) {
 		rec_t*	print_rec;
-		fputs("InnoDB: Dump of the child page:\n", stderr);
-		buf_page_print(page_align(user_rec), 0,
-			       BUF_PAGE_PRINT_NO_CRASH);
-		fputs("InnoDB: Dump of the parent page:\n", stderr);
-		buf_page_print(page_align(node_ptr), 0,
-			       BUF_PAGE_PRINT_NO_CRASH);
 
-		fputs("InnoDB: Corruption of an index tree: table ", stderr);
-		ut_print_name(stderr, NULL, TRUE, index->table_name);
-		fputs(", index ", stderr);
-		ut_print_name(stderr, NULL, FALSE, index->name);
-		fprintf(stderr, ",\n"
-			"InnoDB: father ptr page no %lu, child page no %lu\n",
-			(ulong)
-			btr_node_ptr_get_child_page_no(node_ptr, offsets),
-			(ulong) page_no);
+		ib::error()
+			<< "Corruption of an index tree: table "
+			<< index->table->name
+			<< " index " << index->name
+			<< ", father ptr page no "
+			<< btr_node_ptr_get_child_page_no(node_ptr, offsets)
+			<< ", child page no " << page_no;
+
 		print_rec = page_rec_get_next(
 			page_get_infimum_rec(page_align(user_rec)));
 		offsets = rec_get_offsets(print_rec, index,
@@ -1603,27 +976,28 @@ btr_page_get_father_node_ptr_func(
 					  ULINT_UNDEFINED, &heap);
 		page_rec_print(node_ptr, offsets);
 
-		fputs("InnoDB: You should dump + drop + reimport the table"
-		      " to fix the\n"
-		      "InnoDB: corruption. If the crash happens at "
-		      "the database startup, see\n"
-		      "InnoDB: " REFMAN "forcing-innodb-recovery.html about\n"
-		      "InnoDB: forcing recovery. "
-		      "Then dump + drop + reimport.\n", stderr);
-
-		ut_error;
+		ib::fatal()
+			<< "You should dump + drop + reimport the table to"
+			<< " fix the corruption. If the crash happens at"
+			<< " database startup. " << FORCE_RECOVERY_MSG
+			<< " Then dump + drop + reimport.";
 	}
 
 	return(offsets);
 }
 
 #define btr_page_get_father_node_ptr(of,heap,cur,mtr)			\
-	btr_page_get_father_node_ptr_func(of,heap,cur,__FILE__,__LINE__,mtr)
+	btr_page_get_father_node_ptr_func(				\
+		of,heap,cur,BTR_CONT_MODIFY_TREE,__FILE__,__LINE__,mtr)
+
+#define btr_page_get_father_node_ptr_for_validate(of,heap,cur,mtr)	\
+	btr_page_get_father_node_ptr_func(				\
+		of,heap,cur,BTR_CONT_SEARCH_TREE,__FILE__,__LINE__,mtr)
 
 /************************************************************//**
 Returns the upper level node pointer to a page. It is assumed that mtr holds
 an x-latch on the tree.
-@return	rec_get_offsets() of the node pointer record */
+@return rec_get_offsets() of the node pointer record */
 static
 ulint*
 btr_page_get_father_block(
@@ -1667,26 +1041,123 @@ btr_page_get_father(
 	mem_heap_free(heap);
 }
 
-/************************************************************//**
-Creates the root node for a new index tree.
-@return	page number of the created root, FIL_NULL if did not succeed */
-UNIV_INTERN
+/** Free a B-tree root page. btr_free_but_not_root() must already
+have been called.
+In a persistent tablespace, the caller must invoke fsp_init_file_page()
+before mtr.commit().
+@param[in,out]	block	index root page
+@param[in,out]	mtr	mini-transaction */
+static
+void
+btr_free_root(
+	buf_block_t*	block,
+	mtr_t*		mtr)
+{
+	fseg_header_t*	header;
+
+	ut_ad(mtr_memo_contains_flagged(mtr, block, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr->is_named_space(block->page.id.space()));
+
+	btr_search_drop_page_hash_index(block);
+
+	header = buf_block_get_frame(block) + PAGE_HEADER + PAGE_BTR_SEG_TOP;
+#ifdef UNIV_BTR_DEBUG
+	ut_a(btr_root_fseg_validate(header, block->page.id.space()));
+#endif /* UNIV_BTR_DEBUG */
+
+	while (!fseg_free_step(header, true, mtr)) {
+		/* Free the entire segment in small steps. */
+	}
+}
+
+/** PAGE_INDEX_ID value for freed index B-trees */
+static const index_id_t	BTR_FREED_INDEX_ID = 0;
+
+/** Invalidate an index root page so that btr_free_root_check()
+will not find it.
+@param[in,out]	block	index root page
+@param[in,out]	mtr	mini-transaction */
+static
+void
+btr_free_root_invalidate(
+	buf_block_t*	block,
+	mtr_t*		mtr)
+{
+	ut_ad(page_is_root(block->frame));
+
+	btr_page_set_index_id(
+		buf_block_get_frame(block),
+		buf_block_get_page_zip(block),
+		BTR_FREED_INDEX_ID, mtr);
+}
+
+/** Prepare to free a B-tree.
+@param[in]	page_id		page id
+@param[in]	page_size	page size
+@param[in]	index_id	PAGE_INDEX_ID contents
+@param[in,out]	mtr		mini-transaction
+@return root block, to invoke btr_free_but_not_root() and btr_free_root()
+@retval NULL if the page is no longer a matching B-tree page */
+static MY_ATTRIBUTE((warn_unused_result))
+buf_block_t*
+btr_free_root_check(
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	index_id_t		index_id,
+	mtr_t*			mtr)
+{
+	ut_ad(page_id.space() != srv_tmp_space.space_id());
+	ut_ad(index_id != BTR_FREED_INDEX_ID);
+
+	buf_block_t*	block = buf_page_get(
+		page_id, page_size, RW_X_LATCH, mtr);
+
+	if (block) {
+		buf_block_dbg_add_level(block, SYNC_TREE_NODE);
+
+		if (fil_page_index_page_check(block->frame)
+			&& index_id == btr_page_get_index_id(block->frame)) {
+			/* This should be a root page.
+			It should not be possible to reassign the same
+			index_id for some other index in the tablespace. */
+			ut_ad(page_is_root(block->frame));
+		} else {
+			block = NULL;
+		}
+	}
+
+	return(block);
+}
+
+/** Create the root node for a new index tree.
+@param[in]	type			type of the index
+@param[in]	space			space where created
+@param[in]	page_size		page size
+@param[in]	index_id		index id
+@param[in]	index			index, or NULL when applying TRUNCATE
+log record during recovery
+@param[in]	btr_redo_create_info	used for applying TRUNCATE log
+@param[in]	mtr			mini-transaction handle
+record during recovery
+@return page number of the created root, FIL_NULL if did not succeed */
 ulint
 btr_create(
-/*=======*/
-	ulint		type,	/*!< in: type of the index */
-	ulint		space,	/*!< in: space where created */
-	ulint		zip_size,/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	index_id_t	index_id,/*!< in: index id */
-	dict_index_t*	index,	/*!< in: index */
-	mtr_t*		mtr)	/*!< in: mini-transaction handle */
+	ulint			type,
+	ulint			space,
+	const page_size_t&	page_size,
+	index_id_t		index_id,
+	dict_index_t*		index,
+	const btr_create_t*	btr_redo_create_info,
+	mtr_t*			mtr)
 {
-	ulint		page_no;
-	buf_block_t*	block;
-	buf_frame_t*	frame;
-	page_t*		page;
-	page_zip_des_t*	page_zip;
+	ulint			page_no;
+	buf_block_t*		block;
+	buf_frame_t*		frame;
+	page_t*			page;
+	page_zip_des_t*		page_zip;
+
+	ut_ad(mtr->is_named_space(space));
+	ut_ad(index_id != BTR_FREED_INDEX_ID);
 
 	/* Create the two new segments (one, in the case of an ibuf tree) for
 	the index tree; the segment headers are put on the allocated root page
@@ -1699,10 +1170,14 @@ btr_create(
 			space, 0,
 			IBUF_HEADER + IBUF_TREE_SEG_HEADER, mtr);
 
+		if (ibuf_hdr_block == NULL) {
+			return(FIL_NULL);
+		}
+
 		buf_block_dbg_add_level(
 			ibuf_hdr_block, SYNC_IBUF_TREE_NODE_NEW);
 
-		ut_ad(buf_block_get_page_no(ibuf_hdr_block)
+		ut_ad(ibuf_hdr_block->page.id.page_no()
 		      == IBUF_HEADER_PAGE_NO);
 		/* Allocate then the next page to the segment: it will be the
 		tree root page */
@@ -1712,16 +1187,8 @@ btr_create(
 			+ IBUF_HEADER + IBUF_TREE_SEG_HEADER,
 			IBUF_TREE_ROOT_PAGE_NO,
 			FSP_UP, mtr);
-		ut_ad(buf_block_get_page_no(block) == IBUF_TREE_ROOT_PAGE_NO);
+		ut_ad(block->page.id.page_no() == IBUF_TREE_ROOT_PAGE_NO);
 	} else {
-#ifdef UNIV_BLOB_DEBUG
-		if ((type & DICT_CLUSTERED) && !index->blobs) {
-			mutex_create(PFS_NOT_INSTRUMENTED,
-				     &index->blobs_mutex, SYNC_ANY_LATCH);
-			index->blobs = rbt_create(sizeof(btr_blob_dbg_t),
-						  btr_blob_dbg_cmp);
-		}
-#endif /* UNIV_BLOB_DEBUG */
 		block = fseg_create(space, 0,
 				    PAGE_HEADER + PAGE_BTR_SEG_TOP, mtr);
 	}
@@ -1731,7 +1198,7 @@ btr_create(
 		return(FIL_NULL);
 	}
 
-	page_no = buf_block_get_page_no(block);
+	page_no = block->page.id.page_no();
 	frame = buf_block_get_frame(block);
 
 	if (type & DICT_IBUF) {
@@ -1750,7 +1217,10 @@ btr_create(
 				 PAGE_HEADER + PAGE_BTR_SEG_LEAF, mtr)) {
 			/* Not enough space for new segment, free root
 			segment before return. */
-			btr_free_root(space, zip_size, page_no, mtr);
+			btr_free_root(block, mtr);
+			if (!dict_table_is_temporary(index->table)) {
+				btr_free_root_invalidate(block, mtr);
+			}
 
 			return(FIL_NULL);
 		}
@@ -1764,16 +1234,48 @@ btr_create(
 	page_zip = buf_block_get_page_zip(block);
 
 	if (page_zip) {
-		page = page_create_zip(block, index, 0, 0, mtr);
+		if (index != NULL) {
+			page = page_create_zip(block, index, 0, 0, NULL, mtr);
+		} else {
+			/* Create a compressed index page when applying
+			TRUNCATE log record during recovery */
+			ut_ad(btr_redo_create_info != NULL);
+
+			redo_page_compress_t	page_comp_info;
+
+			page_comp_info.type = type;
+
+			page_comp_info.index_id = index_id;
+
+			page_comp_info.n_fields =
+				btr_redo_create_info->n_fields;
+
+			page_comp_info.field_len =
+				btr_redo_create_info->field_len;
+
+			page_comp_info.fields = btr_redo_create_info->fields;
+
+			page_comp_info.trx_id_pos =
+				btr_redo_create_info->trx_id_pos;
+
+			page = page_create_zip(block, NULL, 0, 0,
+					       &page_comp_info, mtr);
+		}
 	} else {
-		page = page_create(block, mtr,
-				   dict_table_is_comp(index->table));
+		if (index != NULL) {
+			page = page_create(block, mtr,
+					   dict_table_is_comp(index->table),
+					   dict_index_is_spatial(index));
+		} else {
+			ut_ad(btr_redo_create_info != NULL);
+			page = page_create(
+				block, mtr, btr_redo_create_info->format_flags,
+				type == DICT_SPATIAL);
+		}
 		/* Set the level of the new index page */
 		btr_page_set_level(page, NULL, 0, mtr);
 	}
 
-	block->check_index_page_at_flush = TRUE;
-
 	/* Set the index id of the page */
 	btr_page_set_index_id(page, page_zip, index_id, mtr);
 
@@ -1783,9 +1285,16 @@ btr_create(
 
 	/* We reset the free bits for the page to allow creation of several
 	trees in the same mtr, otherwise the latch on a bitmap page would
-	prevent it because of the latching order */
+	prevent it because of the latching order.
+
+	index will be NULL if we are recreating the table during recovery
+	on behalf of TRUNCATE.
+
+	Note: Insert Buffering is disabled for temporary tables given that
+	most temporary tables are smaller in size and short-lived. */
+	if (!(type & DICT_CLUSTERED)
+	    && (index == NULL || !dict_table_is_temporary(index->table))) {
 
-	if (!(type & DICT_CLUSTERED)) {
 		ibuf_reset_free_bits(block);
 	}
 
@@ -1798,39 +1307,39 @@ btr_create(
 	return(page_no);
 }
 
-/************************************************************//**
-Frees a B-tree except the root page, which MUST be freed after this
-by calling btr_free_root. */
-UNIV_INTERN
+/** Free a B-tree except the root page. The root page MUST be freed after
+this by calling btr_free_root.
+@param[in,out]	block		root page
+@param[in]	log_mode	mtr logging mode */
+static
 void
 btr_free_but_not_root(
-/*==================*/
-	ulint	space,		/*!< in: space where created */
-	ulint	zip_size,	/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	ulint	root_page_no)	/*!< in: root page number */
+	buf_block_t*	block,
+	mtr_log_t	log_mode)
 {
 	ibool	finished;
-	page_t*	root;
 	mtr_t	mtr;
 
+	ut_ad(page_is_root(block->frame));
 leaf_loop:
 	mtr_start(&mtr);
+	mtr_set_log_mode(&mtr, log_mode);
+	mtr.set_named_space(block->page.id.space());
+
+	page_t*	root = block->frame;
 
-	root = btr_page_get(space, zip_size, root_page_no, RW_X_LATCH,
-			    NULL, &mtr);
 #ifdef UNIV_BTR_DEBUG
 	ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
-				    + root, space));
+				    + root, block->page.id.space()));
 	ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
-				    + root, space));
+				    + root, block->page.id.space()));
 #endif /* UNIV_BTR_DEBUG */
 
 	/* NOTE: page hash indexes are dropped when a page is freed inside
 	fsp0fsp. */
 
 	finished = fseg_free_step(root + PAGE_HEADER + PAGE_BTR_SEG_LEAF,
-				  &mtr);
+				  true, &mtr);
 	mtr_commit(&mtr);
 
 	if (!finished) {
@@ -1839,16 +1348,18 @@ leaf_loop:
 	}
 top_loop:
 	mtr_start(&mtr);
+	mtr_set_log_mode(&mtr, log_mode);
+	mtr.set_named_space(block->page.id.space());
+
+	root = block->frame;
 
-	root = btr_page_get(space, zip_size, root_page_no, RW_X_LATCH,
-			    NULL, &mtr);
 #ifdef UNIV_BTR_DEBUG
 	ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
-				    + root, space));
+				    + root, block->page.id.space()));
 #endif /* UNIV_BTR_DEBUG */
 
 	finished = fseg_free_step_not_header(
-		root + PAGE_HEADER + PAGE_BTR_SEG_TOP, &mtr);
+		root + PAGE_HEADER + PAGE_BTR_SEG_TOP, true, &mtr);
 	mtr_commit(&mtr);
 
 	if (!finished) {
@@ -1857,34 +1368,51 @@ top_loop:
 	}
 }
 
-/************************************************************//**
-Frees the B-tree root page. Other tree MUST already have been freed. */
-UNIV_INTERN
+/** Free a persistent index tree if it exists.
+@param[in]	page_id		root page id
+@param[in]	page_size	page size
+@param[in]	index_id	PAGE_INDEX_ID contents
+@param[in,out]	mtr		mini-transaction */
 void
-btr_free_root(
-/*==========*/
-	ulint	space,		/*!< in: space where created */
-	ulint	zip_size,	/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	ulint	root_page_no,	/*!< in: root page number */
-	mtr_t*	mtr)		/*!< in/out: mini-transaction */
+btr_free_if_exists(
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	index_id_t		index_id,
+	mtr_t*			mtr)
 {
-	buf_block_t*	block;
-	fseg_header_t*	header;
+	buf_block_t* root = btr_free_root_check(
+		page_id, page_size, index_id, mtr);
 
-	block = btr_block_get(space, zip_size, root_page_no, RW_X_LATCH,
-			      NULL, mtr);
-
-	btr_search_drop_page_hash_index(block);
-
-	header = buf_block_get_frame(block) + PAGE_HEADER + PAGE_BTR_SEG_TOP;
-#ifdef UNIV_BTR_DEBUG
-	ut_a(btr_root_fseg_validate(header, space));
-#endif /* UNIV_BTR_DEBUG */
-
-	while (!fseg_free_step(header, mtr)) {
-		/* Free the entire segment in small steps. */
+	if (root == NULL) {
+		return;
 	}
+
+	btr_free_but_not_root(root, mtr->get_log_mode());
+	mtr->set_named_space(page_id.space());
+	btr_free_root(root, mtr);
+	btr_free_root_invalidate(root, mtr);
+}
+
+/** Free an index tree in a temporary tablespace or during TRUNCATE TABLE.
+@param[in]	page_id		root page id
+@param[in]	page_size	page size */
+void
+btr_free(
+	const page_id_t&	page_id,
+	const page_size_t&	page_size)
+{
+	mtr_t		mtr;
+	mtr.start();
+	mtr.set_log_mode(MTR_LOG_NO_REDO);
+
+	buf_block_t*	block = buf_page_get(
+		page_id, page_size, RW_X_LATCH, &mtr);
+
+	ut_ad(page_is_root(block->frame));
+
+	btr_free_but_not_root(block, MTR_LOG_NO_REDO);
+	btr_free_root(block, &mtr);
+	mtr.commit();
 }
 #endif /* !UNIV_HOTBACKUP */
 
@@ -1899,7 +1427,6 @@ IBUF_BITMAP_FREE is unaffected by reorganization.
 
 @retval true if the operation was successful
 @retval false if it is a compressed page, and recompression failed */
-UNIV_INTERN
 bool
 btr_page_reorganize_low(
 /*====================*/
@@ -1922,7 +1449,6 @@ btr_page_reorganize_low(
 	page_zip_des_t*	page_zip	= buf_block_get_page_zip(block);
 	buf_block_t*	temp_block;
 	page_t*		temp_page;
-	ulint		log_mode;
 	ulint		data_size1;
 	ulint		data_size2;
 	ulint		max_ins_size1;
@@ -1930,8 +1456,9 @@ btr_page_reorganize_low(
 	bool		success		= false;
 	ulint		pos;
 	bool		log_compressed;
+	bool		is_spatial;
 
-	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
 	btr_assert_not_corrupted(block, index);
 #ifdef UNIV_ZIP_DEBUG
 	ut_a(!page_zip || page_zip_validate(page_zip, page, index));
@@ -1940,7 +1467,7 @@ btr_page_reorganize_low(
 	max_ins_size1 = page_get_max_insert_size_after_reorganize(page, 1);
 
 	/* Turn logging off */
-	log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
+	mtr_log_t	log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
 
 #ifndef UNIV_HOTBACKUP
 	temp_block = buf_block_alloc(buf_pool);
@@ -1952,6 +1479,11 @@ btr_page_reorganize_low(
 
 	MONITOR_INC(MONITOR_INDEX_REORG_ATTEMPTS);
 
+	/* This function can be called by log redo with a "dummy" index.
+	So we would trust more on the original page's type */
+	is_spatial = (fil_page_get_type(page) == FIL_PAGE_RTREE
+		      || dict_index_is_spatial(index));
+
 	/* Copy the old page to temporary space */
 	buf_frame_copy(temp_page, page);
 
@@ -1959,10 +1491,7 @@ btr_page_reorganize_low(
 	if (!recovery) {
 		btr_search_drop_page_hash_index(block);
 	}
-
-	block->check_index_page_at_flush = TRUE;
 #endif /* !UNIV_HOTBACKUP */
-	btr_blob_dbg_remove(page, index, "btr_page_reorganize");
 
 	/* Save the cursor position. */
 	pos = page_rec_get_n_recs_before(page_cur_get_rec(cursor));
@@ -1970,7 +1499,7 @@ btr_page_reorganize_low(
 	/* Recreate the page: note that global data on page (possible
 	segment headers, next page-field, etc.) is preserved intact */
 
-	page_create(block, mtr, dict_table_is_comp(index->table));
+	page_create(block, mtr, dict_table_is_comp(index->table), is_spatial);
 
 	/* Copy the records from the temporary space to the recreated page;
 	do not copy the lock bits yet */
@@ -1979,7 +1508,13 @@ btr_page_reorganize_low(
 					page_get_infimum_rec(temp_page),
 					index, mtr);
 
-	if (dict_index_is_sec_or_ibuf(index) && page_is_leaf(page)) {
+	/* Multiple transactions cannot simultaneously operate on the
+	same temp-table in parallel.
+	max_trx_id is ignored for temp tables because it not required
+	for MVCC. */
+	if (dict_index_is_sec_or_ibuf(index)
+	    && page_is_leaf(page)
+	    && !dict_table_is_temporary(index->table)) {
 		/* Copy max trx id to recreated page */
 		trx_id_t	max_trx_id = page_get_max_trx_id(temp_page);
 		page_set_max_trx_id(block, NULL, max_trx_id, mtr);
@@ -1998,12 +1533,9 @@ btr_page_reorganize_low(
 	}
 
 	if (page_zip
-	    && !page_zip_compress(page_zip, page, index, z_level, mtr)) {
+	    && !page_zip_compress(page_zip, page, index, z_level, NULL, mtr)) {
 
 		/* Restore the old page and exit. */
-		btr_blob_dbg_restore(page, temp_page, index,
-				     "btr_page_reorganize_compress_fail");
-
 #if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
 		/* Check that the bytes that we skip are identical. */
 		ut_a(!memcmp(page, temp_page, PAGE_HEADER));
@@ -2028,7 +1560,8 @@ btr_page_reorganize_low(
 	}
 
 #ifndef UNIV_HOTBACKUP
-	if (!recovery) {
+	/* No locks are acquried for intrinsic tables. */
+	if (!recovery && !dict_table_is_locking_disabled(index->table)) {
 		/* Update the record lock bitmaps */
 		lock_move_reorganize_page(block, temp_block);
 	}
@@ -2038,19 +1571,13 @@ btr_page_reorganize_low(
 	max_ins_size2 = page_get_max_insert_size_after_reorganize(page, 1);
 
 	if (data_size1 != data_size2 || max_ins_size1 != max_ins_size2) {
-		buf_page_print(page, 0, BUF_PAGE_PRINT_NO_CRASH);
-		buf_page_print(temp_page, 0, BUF_PAGE_PRINT_NO_CRASH);
+		ib::error()
+			<< "Page old data size " << data_size1
+			<< " new data size " << data_size2
+			<< ", page old max ins size " << max_ins_size1
+			<< " new max ins size " << max_ins_size2;
 
-		fprintf(stderr,
-			"InnoDB: Error: page old data size %lu"
-			" new data size %lu\n"
-			"InnoDB: Error: page old max ins size %lu"
-			" new max ins size %lu\n"
-			"InnoDB: Submit a detailed bug report"
-			" to http://bugs.mysql.com\n",
-			(unsigned long) data_size1, (unsigned long) data_size2,
-			(unsigned long) max_ins_size1,
-			(unsigned long) max_ins_size2);
+		ib::error() << BUG_REPORT_MSG;
 		ut_ad(0);
 	} else {
 		success = true;
@@ -2076,8 +1603,8 @@ func_exit:
 
 #ifndef UNIV_HOTBACKUP
 	if (success) {
-		byte	type;
-		byte*	log_ptr;
+		mlog_id_t	type;
+		byte*		log_ptr;
 
 		/* Write the log record */
 		if (page_zip) {
@@ -2119,7 +1646,6 @@ IBUF_BITMAP_FREE is unaffected by reorganization.
 
 @retval true if the operation was successful
 @retval false if it is a compressed page, and recompression failed */
-UNIV_INTERN
 bool
 btr_page_reorganize_block(
 /*======================*/
@@ -2152,7 +1678,6 @@ IBUF_BITMAP_FREE is unaffected by reorganization.
 
 @retval true if the operation was successful
 @retval false if it is a compressed page, and recompression failed */
-UNIV_INTERN
 bool
 btr_page_reorganize(
 /*================*/
@@ -2167,8 +1692,7 @@ btr_page_reorganize(
 
 /***********************************************************//**
 Parses a redo log record of reorganizing a page.
-@return	end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
 byte*
 btr_parse_page_reorganize(
 /*======================*/
@@ -2183,6 +1707,7 @@ btr_parse_page_reorganize(
 
 	ut_ad(ptr != NULL);
 	ut_ad(end_ptr != NULL);
+	ut_ad(index != NULL);
 
 	/* If dealing with a compressed page the record has the
 	compression level used during original compression written in
@@ -2222,26 +1747,24 @@ btr_page_empty(
 {
 	page_t*	page = buf_block_get_frame(block);
 
-	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
 	ut_ad(page_zip == buf_block_get_page_zip(block));
 #ifdef UNIV_ZIP_DEBUG
 	ut_a(!page_zip || page_zip_validate(page_zip, page, index));
 #endif /* UNIV_ZIP_DEBUG */
 
 	btr_search_drop_page_hash_index(block);
-	btr_blob_dbg_remove(page, index, "btr_page_empty");
 
 	/* Recreate the page: note that global data on page (possible
 	segment headers, next page-field, etc.) is preserved intact */
 
 	if (page_zip) {
-		page_create_zip(block, index, level, 0, mtr);
+		page_create_zip(block, index, level, 0, NULL, mtr);
 	} else {
-		page_create(block, mtr, dict_table_is_comp(index->table));
+		page_create(block, mtr, dict_table_is_comp(index->table),
+			    dict_index_is_spatial(index));
 		btr_page_set_level(page, NULL, level, mtr);
 	}
-
-	block->check_index_page_at_flush = TRUE;
 }
 
 /*************************************************************//**
@@ -2250,8 +1773,7 @@ the tuple. It is assumed that mtr contains an x-latch on the tree.
 NOTE that the operation of this function must always succeed,
 we cannot reverse it: therefore enough free disk space must be
 guaranteed to be available before this function is called.
-@return	inserted record or NULL if run out of space */
-UNIV_INTERN
+@return inserted record */
 rec_t*
 btr_root_raise_and_insert(
 /*======================*/
@@ -2300,9 +1822,12 @@ btr_root_raise_and_insert(
 
 	ut_a(dict_index_get_page(index) == page_get_page_no(root));
 #endif /* UNIV_BTR_DEBUG */
-	ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
-				MTR_MEMO_X_LOCK));
-	ut_ad(mtr_memo_contains(mtr, root_block, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_flagged(mtr, dict_index_get_lock(index),
+					MTR_MEMO_X_LOCK
+					| MTR_MEMO_SX_LOCK)
+	      || dict_table_is_intrinsic(index->table));
+	ut_ad(mtr_is_block_fix(
+		mtr, root_block, MTR_MEMO_PAGE_X_FIX, index->table));
 
 	/* Allocate a new page to the tree. Root splitting is done by first
 	moving the root records to the new page, emptying the root, putting
@@ -2314,7 +1839,7 @@ btr_root_raise_and_insert(
 
 	if (new_block == NULL && os_has_said_disk_full) {
 		return(NULL);
-        }
+	}
 
 	new_page = buf_block_get_frame(new_block);
 	new_page_zip = buf_block_get_page_zip(new_block);
@@ -2346,8 +1871,15 @@ btr_root_raise_and_insert(
 
 		/* Update the lock table and possible hash index. */
 
-		lock_move_rec_list_end(new_block, root_block,
-				       page_get_infimum_rec(root));
+		if (!dict_table_is_locking_disabled(index->table)) {
+			lock_move_rec_list_end(new_block, root_block,
+					       page_get_infimum_rec(root));
+		}
+
+		/* Move any existing predicate locks */
+		if (dict_index_is_spatial(index)) {
+			lock_prdt_rec_move(new_block, root_block);
+		}
 
 		btr_search_move_or_delete_hash_entries(new_block, root_block,
 						       index);
@@ -2358,7 +1890,9 @@ btr_root_raise_and_insert(
 	information of the record to be inserted on the infimum of the
 	root page: we cannot discard the lock structs on the root page */
 
-	lock_update_root_raise(new_block, root_block);
+	if (!dict_table_is_locking_disabled(index->table)) {
+		lock_update_root_raise(new_block, root_block);
+	}
 
 	/* Create a memory heap where the node pointer is stored */
 	if (!*heap) {
@@ -2366,13 +1900,20 @@ btr_root_raise_and_insert(
 	}
 
 	rec = page_rec_get_next(page_get_infimum_rec(new_page));
-	new_page_no = buf_block_get_page_no(new_block);
+	new_page_no = new_block->page.id.page_no();
 
 	/* Build the node pointer (= node key and page address) for the
 	child */
+	if (dict_index_is_spatial(index)) {
+		rtr_mbr_t		new_mbr;
 
-	node_ptr = dict_index_build_node_ptr(
-		index, rec, new_page_no, *heap, level);
+		rtr_page_cal_mbr(index, new_block, &new_mbr, *heap);
+		node_ptr = rtr_index_build_node_ptr(
+			index, &new_mbr, rec, new_page_no, *heap, level);
+	} else {
+		node_ptr = dict_index_build_node_ptr(
+			index, rec, new_page_no, *heap, level);
+	}
 	/* The node pointer must be marked as the predefined minimum record,
 	as there is no lower alphabetical limit to records in the leftmost
 	node of a level: */
@@ -2406,33 +1947,34 @@ btr_root_raise_and_insert(
 
 	/* We play safe and reset the free bits for the new page */
 
-#if 0
-	fprintf(stderr, "Root raise new page no %lu\n", new_page_no);
-#endif
-
-	if (!dict_index_is_clust(index)) {
+	if (!dict_index_is_clust(index)
+	    && !dict_table_is_temporary(index->table)) {
 		ibuf_reset_free_bits(new_block);
 	}
 
 	if (tuple != NULL) {
 		/* Reposition the cursor to the child node */
-		page_cur_search(new_block, index, tuple,
-				PAGE_CUR_LE, page_cursor);
+		page_cur_search(new_block, index, tuple, page_cursor);
 	} else {
 		/* Set cursor to first record on child node */
 		page_cur_set_before_first(new_block, page_cursor);
 	}
 
 	/* Split the child and insert tuple */
-	return(btr_page_split_and_insert(flags, cursor, offsets, heap,
-					 tuple, n_ext, mtr));
+	if (dict_index_is_spatial(index)) {
+		/* Split rtree page and insert tuple */
+		return(rtr_page_split_and_insert(flags, cursor, offsets, heap,
+						 tuple, n_ext, mtr));
+	} else {
+		return(btr_page_split_and_insert(flags, cursor, offsets, heap,
+						 tuple, n_ext, mtr));
+	}
 }
 
 /*************************************************************//**
 Decides if the page should be split at the convergence point of inserts
 converging to the left.
-@return	TRUE if split recommended */
-UNIV_INTERN
+@return TRUE if split recommended */
 ibool
 btr_page_get_split_rec_to_left(
 /*===========================*/
@@ -2476,8 +2018,7 @@ btr_page_get_split_rec_to_left(
 /*************************************************************//**
 Decides if the page should be split at the convergence point of inserts
 converging to the right.
-@return	TRUE if split recommended */
-UNIV_INTERN
+@return TRUE if split recommended */
 ibool
 btr_page_get_split_rec_to_right(
 /*============================*/
@@ -2653,7 +2194,7 @@ func_exit:
 /*************************************************************//**
 Returns TRUE if the insert fits on the appropriate half-page with the
 chosen split_rec.
-@return	true if fits */
+@return true if fits */
 static MY_ATTRIBUTE((nonnull(1,3,4,6), warn_unused_result))
 bool
 btr_page_insert_fits(
@@ -2746,7 +2287,6 @@ btr_page_insert_fits(
 /*******************************************************//**
 Inserts a data tuple to a tree on a non-leaf level. It is assumed
 that mtr holds an x-latch on the tree. */
-UNIV_INTERN
 void
 btr_insert_on_non_leaf_level_func(
 /*==============================*/
@@ -2762,14 +2302,48 @@ btr_insert_on_non_leaf_level_func(
 	btr_cur_t	cursor;
 	dberr_t		err;
 	rec_t*		rec;
-	ulint*		offsets	= NULL;
 	mem_heap_t*	heap = NULL;
+	ulint           offsets_[REC_OFFS_NORMAL_SIZE];
+	ulint*          offsets         = offsets_;
+	rec_offs_init(offsets_);
+	rtr_info_t	rtr_info;
 
 	ut_ad(level > 0);
 
-	btr_cur_search_to_nth_level(index, level, tuple, PAGE_CUR_LE,
-				    BTR_CONT_MODIFY_TREE,
-				    &cursor, 0, file, line, mtr);
+	if (!dict_index_is_spatial(index)) {
+		dberr_t err = DB_SUCCESS;
+		if (dict_table_is_intrinsic(index->table)) {
+			err = btr_cur_search_to_nth_level_with_no_latch(
+				index, level, tuple, PAGE_CUR_LE, &cursor,
+				__FILE__, __LINE__, mtr);
+		} else {
+			err = btr_cur_search_to_nth_level(
+				index, level, tuple, PAGE_CUR_LE,
+				BTR_CONT_MODIFY_TREE,
+				&cursor, 0, file, line, mtr);
+		}
+
+		if (err != DB_SUCCESS) {
+			ib::warn() << " Error code: " << err
+				   << " btr_page_get_father_node_ptr_func "
+				   << " level: " << level
+				   << " called from file: "
+				   << file << " line: " << line
+				   << " table: " << index->table->name
+				   << " index: " << index->name;
+		}
+	} else {
+		/* For spatial index, initialize structures to track
+		its parents etc. */
+		rtr_init_rtr_info(&rtr_info, false, &cursor, index, false);
+
+		rtr_info_update_btr(&cursor, &rtr_info);
+
+		btr_cur_search_to_nth_level(index, level, tuple,
+					    PAGE_CUR_RTREE_INSERT,
+					    BTR_CONT_MODIFY_TREE,
+					    &cursor, 0, file, line, mtr);
+	}
 
 	ut_ad(cursor.flag == BTR_CUR_BINARY);
 
@@ -2791,7 +2365,16 @@ btr_insert_on_non_leaf_level_func(
 						 &dummy_big_rec, 0, NULL, mtr);
 		ut_a(err == DB_SUCCESS);
 	}
-	mem_heap_free(heap);
+
+	if (heap != NULL) {
+		mem_heap_free(heap);
+	}
+
+	if (dict_index_is_spatial(index)) {
+		ut_ad(cursor.rtr_info);
+
+		rtr_clean_rtr_info(&rtr_info, true);
+	}
 }
 
 /**************************************************************//**
@@ -2811,8 +2394,6 @@ btr_attach_half_pages(
 	ulint		direction,	/*!< in: FSP_UP or FSP_DOWN */
 	mtr_t*		mtr)		/*!< in: mtr */
 {
-	ulint		space;
-	ulint		zip_size;
 	ulint		prev_page_no;
 	ulint		next_page_no;
 	ulint		level;
@@ -2825,9 +2406,12 @@ btr_attach_half_pages(
 	page_zip_des_t*	upper_page_zip;
 	dtuple_t*	node_ptr_upper;
 	mem_heap_t*	heap;
+	buf_block_t*	prev_block = NULL;
+	buf_block_t*	next_block = NULL;
 
-	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
-	ut_ad(mtr_memo_contains(mtr, new_block, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
+	ut_ad(mtr_is_block_fix(
+		mtr, new_block, MTR_MEMO_PAGE_X_FIX, index->table));
 
 	/* Create a memory heap where the data tuple is stored */
 	heap = mem_heap_create(1024);
@@ -2839,10 +2423,10 @@ btr_attach_half_pages(
 		ulint*		offsets;
 
 		lower_page = buf_block_get_frame(new_block);
-		lower_page_no = buf_block_get_page_no(new_block);
+		lower_page_no = new_block->page.id.page_no();
 		lower_page_zip = buf_block_get_page_zip(new_block);
 		upper_page = buf_block_get_frame(block);
-		upper_page_no = buf_block_get_page_no(block);
+		upper_page_no = block->page.id.page_no();
 		upper_page_zip = buf_block_get_page_zip(block);
 
 		/* Look up the index for the node pointer to page */
@@ -2859,13 +2443,31 @@ btr_attach_half_pages(
 		mem_heap_empty(heap);
 	} else {
 		lower_page = buf_block_get_frame(block);
-		lower_page_no = buf_block_get_page_no(block);
+		lower_page_no = block->page.id.page_no();
 		lower_page_zip = buf_block_get_page_zip(block);
 		upper_page = buf_block_get_frame(new_block);
-		upper_page_no = buf_block_get_page_no(new_block);
+		upper_page_no = new_block->page.id.page_no();
 		upper_page_zip = buf_block_get_page_zip(new_block);
 	}
 
+	/* Get the previous and next pages of page */
+	prev_page_no = btr_page_get_prev(page, mtr);
+	next_page_no = btr_page_get_next(page, mtr);
+
+	const ulint	space = block->page.id.space();
+
+	/* for consistency, both blocks should be locked, before change */
+	if (prev_page_no != FIL_NULL && direction == FSP_DOWN) {
+		prev_block = btr_block_get(
+			page_id_t(space, prev_page_no), block->page.size,
+			RW_X_LATCH, index, mtr);
+	}
+	if (next_page_no != FIL_NULL && direction != FSP_DOWN) {
+		next_block = btr_block_get(
+			page_id_t(space, next_page_no), block->page.size,
+			RW_X_LATCH, index, mtr);
+	}
+
 	/* Get the level of the split pages */
 	level = btr_page_get_level(buf_block_get_frame(block), mtr);
 	ut_ad(level
@@ -2886,22 +2488,13 @@ btr_attach_half_pages(
 	/* Free the memory heap */
 	mem_heap_free(heap);
 
-	/* Get the previous and next pages of page */
-
-	prev_page_no = btr_page_get_prev(page, mtr);
-	next_page_no = btr_page_get_next(page, mtr);
-	space = buf_block_get_space(block);
-	zip_size = buf_block_get_zip_size(block);
-
 	/* Update page links of the level */
 
-	if (prev_page_no != FIL_NULL) {
-		buf_block_t*	prev_block = btr_block_get(
-			space, zip_size, prev_page_no, RW_X_LATCH, index, mtr);
+	if (prev_block) {
 #ifdef UNIV_BTR_DEBUG
 		ut_a(page_is_comp(prev_block->frame) == page_is_comp(page));
 		ut_a(btr_page_get_next(prev_block->frame, mtr)
-		     == buf_block_get_page_no(block));
+		     == block->page.id.page_no());
 #endif /* UNIV_BTR_DEBUG */
 
 		btr_page_set_next(buf_block_get_frame(prev_block),
@@ -2909,9 +2502,7 @@ btr_attach_half_pages(
 				  lower_page_no, mtr);
 	}
 
-	if (next_page_no != FIL_NULL) {
-		buf_block_t*	next_block = btr_block_get(
-			space, zip_size, next_page_no, RW_X_LATCH, index, mtr);
+	if (next_block) {
 #ifdef UNIV_BTR_DEBUG
 		ut_a(page_is_comp(next_block->frame) == page_is_comp(page));
 		ut_a(btr_page_get_prev(next_block->frame, mtr)
@@ -2923,11 +2514,24 @@ btr_attach_half_pages(
 				  upper_page_no, mtr);
 	}
 
-	btr_page_set_prev(lower_page, lower_page_zip, prev_page_no, mtr);
-	btr_page_set_next(lower_page, lower_page_zip, upper_page_no, mtr);
+	if (direction == FSP_DOWN) {
+		/* lower_page is new */
+		btr_page_set_prev(lower_page, lower_page_zip,
+				  prev_page_no, mtr);
+	} else {
+		ut_ad(btr_page_get_prev(lower_page, mtr) == prev_page_no);
+	}
 
+	btr_page_set_next(lower_page, lower_page_zip, upper_page_no, mtr);
 	btr_page_set_prev(upper_page, upper_page_zip, lower_page_no, mtr);
-	btr_page_set_next(upper_page, upper_page_zip, next_page_no, mtr);
+
+	if (direction != FSP_DOWN) {
+		/* upper_page is new */
+		btr_page_set_next(upper_page, upper_page_zip,
+				  next_page_no, mtr);
+	} else {
+		ut_ad(btr_page_get_next(upper_page, mtr) == next_page_no);
+	}
 }
 
 /*************************************************************//**
@@ -2989,9 +2593,12 @@ btr_insert_into_right_sibling(
 	page_t*		page = buf_block_get_frame(block);
 	ulint		next_page_no = btr_page_get_next(page, mtr);
 
-	ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(cursor->index),
-				MTR_MEMO_X_LOCK));
-	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(dict_table_is_intrinsic(cursor->index->table)
+	      || mtr_memo_contains_flagged(
+			mtr, dict_index_get_lock(cursor->index),
+			MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK));
+	ut_ad(mtr_is_block_fix(
+		mtr, block, MTR_MEMO_PAGE_X_FIX, cursor->index->table));
 	ut_ad(heap);
 
 	if (next_page_no == FIL_NULL || !page_rec_is_supremum(
@@ -3005,12 +2612,13 @@ btr_insert_into_right_sibling(
 	page_t*		next_page;
 	btr_cur_t	next_father_cursor;
 	rec_t*		rec = NULL;
-	ulint		zip_size = buf_block_get_zip_size(block);
 	ulint		max_size;
 
+	const ulint	space = block->page.id.space();
+
 	next_block = btr_block_get(
-		buf_block_get_space(block), zip_size,
-		next_page_no, RW_X_LATCH, cursor->index, mtr);
+		page_id_t(space, next_page_no), block->page.size,
+		RW_X_LATCH, cursor->index, mtr);
 	next_page = buf_block_get_frame(next_block);
 
 	bool	is_leaf = page_is_leaf(next_page);
@@ -3025,15 +2633,19 @@ btr_insert_into_right_sibling(
 	max_size = page_get_max_insert_size_after_reorganize(next_page, 1);
 
 	/* Extends gap lock for the next page */
-	lock_update_split_left(next_block, block);
+	if (!dict_table_is_locking_disabled(cursor->index->table)) {
+		lock_update_split_left(next_block, block);
+	}
 
 	rec = page_cur_tuple_insert(
 		&next_page_cursor, tuple, cursor->index, offsets, &heap,
 		n_ext, mtr);
 
 	if (rec == NULL) {
-		if (zip_size && is_leaf
-		    && !dict_index_is_clust(cursor->index)) {
+		if (is_leaf
+		    && next_block->page.size.is_compressed()
+		    && !dict_index_is_clust(cursor->index)
+		    && !dict_table_is_temporary(cursor->index->table)) {
 			/* Reset the IBUF_BITMAP_FREE bits, because
 			page_cur_tuple_insert() will have attempted page
 			reorganize before failing. */
@@ -3056,7 +2668,7 @@ btr_insert_into_right_sibling(
 
 	compressed = btr_cur_pessimistic_delete(
 		&err, TRUE, &next_father_cursor,
-		BTR_CREATE_FLAG, RB_NONE, mtr);
+		BTR_CREATE_FLAG, false, mtr);
 
 	ut_a(err == DB_SUCCESS);
 
@@ -3065,7 +2677,7 @@ btr_insert_into_right_sibling(
 	}
 
 	dtuple_t*	node_ptr = dict_index_build_node_ptr(
-		cursor->index, rec, buf_block_get_page_no(next_block),
+		cursor->index, rec, next_block->page.id.page_no(),
 		heap, level);
 
 	btr_insert_on_non_leaf_level(
@@ -3073,11 +2685,13 @@ btr_insert_into_right_sibling(
 
 	ut_ad(rec_offs_validate(rec, cursor->index, *offsets));
 
-	if (is_leaf && !dict_index_is_clust(cursor->index)) {
+	if (is_leaf
+	    && !dict_index_is_clust(cursor->index)
+	    && !dict_table_is_temporary(cursor->index->table)) {
 		/* Update the free bits of the B-tree page in the
 		insert buffer bitmap. */
 
-		if (zip_size) {
+		if (next_block->page.size.is_compressed()) {
 			ibuf_update_free_bits_zip(next_block, mtr);
 		} else {
 			ibuf_update_free_bits_if_full(
@@ -3096,12 +2710,10 @@ released within this function! NOTE that the operation of this
 function must always succeed, we cannot reverse it: therefore enough
 free disk space (2 pages) must be guaranteed to be available before
 this function is called.
-
 NOTE: jonaso added support for calling function with tuple == NULL
 which cause it to only split a page.
 
 @return inserted record or NULL if run out of space */
-UNIV_INTERN
 rec_t*
 btr_page_split_and_insert(
 /*======================*/
@@ -3137,6 +2749,15 @@ btr_page_split_and_insert(
 	ulint		n_iterations = 0;
 	rec_t*		rec;
 	ulint		n_uniq;
+	dict_index_t*	index;
+
+	index = btr_cur_get_index(cursor);
+
+	if (dict_index_is_spatial(index)) {
+		/* Split rtree page and update parent */
+		return(rtr_page_split_and_insert(flags, cursor, offsets, heap,
+						 tuple, n_ext, mtr));
+	}
 
 	if (!*heap) {
 		*heap = mem_heap_create(1024);
@@ -3146,20 +2767,23 @@ func_start:
 	mem_heap_empty(*heap);
 	*offsets = NULL;
 
-	ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(cursor->index),
-				MTR_MEMO_X_LOCK));
+	ut_ad(mtr_memo_contains_flagged(mtr,
+					dict_index_get_lock(cursor->index),
+					MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK)
+	      || dict_table_is_intrinsic(cursor->index->table));
 	ut_ad(!dict_index_is_online_ddl(cursor->index)
 	      || (flags & BTR_CREATE_FLAG)
 	      || dict_index_is_clust(cursor->index));
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(dict_index_get_lock(cursor->index), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(rw_lock_own_flagged(dict_index_get_lock(cursor->index),
+				  RW_LOCK_FLAG_X | RW_LOCK_FLAG_SX)
+	      || dict_table_is_intrinsic(cursor->index->table));
 
 	block = btr_cur_get_block(cursor);
 	page = buf_block_get_frame(block);
 	page_zip = buf_block_get_page_zip(block);
 
-	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_is_block_fix(
+		mtr, block, MTR_MEMO_PAGE_X_FIX, cursor->index->table));
 	ut_ad(!page_is_empty(page));
 
 	/* try to insert to the next page if possible before split */
@@ -3170,7 +2794,7 @@ func_start:
 		return(rec);
 	}
 
-	page_no = buf_block_get_page_no(block);
+	page_no = block->page.id.page_no();
 
 	/* 1. Decide the split record; split_rec == NULL means that the
 	tuple to be inserted should be the first record on the upper
@@ -3216,7 +2840,7 @@ func_start:
 
 	DBUG_EXECUTE_IF("disk_is_full",
 			os_has_said_disk_full = true;
-                        return(NULL););
+			return(NULL););
 
 	/* 2. Allocate a new page to the index */
 	new_block = btr_page_alloc(cursor->index, hint_page_no, direction,
@@ -3224,7 +2848,7 @@ func_start:
 
 	if (new_block == NULL && os_has_said_disk_full) {
 		return(NULL);
-        }
+	}
 
 	new_page = buf_block_get_frame(new_block);
 	new_page_zip = buf_block_get_page_zip(new_block);
@@ -3269,8 +2893,9 @@ func_start:
 insert_empty:
 		ut_ad(!split_rec);
 		ut_ad(!insert_left);
-		buf = (byte*) mem_alloc(rec_get_converted_size(cursor->index,
-							       tuple, n_ext));
+		buf = UT_NEW_ARRAY_NOKEY(
+			byte,
+			rec_get_converted_size(cursor->index, tuple, n_ext));
 
 		first_rec = rec_convert_dtuple_to_rec(buf, cursor->index,
 						      tuple, n_ext);
@@ -3295,7 +2920,7 @@ insert_empty:
 						offsets, tuple, n_ext, heap);
 	} else {
 		if (!insert_left) {
-			mem_free(buf);
+			UT_DELETE_ARRAY(buf);
 			buf = NULL;
 		}
 
@@ -3304,11 +2929,18 @@ insert_empty:
 						offsets, tuple, n_ext, heap);
 	}
 
-	if (insert_will_fit && page_is_leaf(page)
+	if (!srv_read_only_mode
+	    && !dict_table_is_intrinsic(cursor->index->table)
+	    && insert_will_fit
+	    && page_is_leaf(page)
 	    && !dict_index_is_online_ddl(cursor->index)) {
 
-		mtr_memo_release(mtr, dict_index_get_lock(cursor->index),
-				 MTR_MEMO_X_LOCK);
+		mtr->memo_release(
+			dict_index_get_lock(cursor->index),
+			MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK);
+
+		/* NOTE: We cannot release root block latch here, because it
+		has segment header and already modified in most of cases.*/
 	}
 
 	/* 5. Move then the records to the new page */
@@ -3337,9 +2969,12 @@ insert_empty:
 
 			/* Update the lock table and possible hash index. */
 
-			lock_move_rec_list_start(
-				new_block, block, move_limit,
-				new_page + PAGE_NEW_INFIMUM);
+			if (!dict_table_is_locking_disabled(
+				cursor->index->table)) {
+				lock_move_rec_list_start(
+					new_block, block, move_limit,
+					new_page + PAGE_NEW_INFIMUM);
+			}
 
 			btr_search_move_or_delete_hash_entries(
 				new_block, block, cursor->index);
@@ -3353,7 +2988,9 @@ insert_empty:
 		left_block = new_block;
 		right_block = block;
 
-		lock_update_split_left(right_block, left_block);
+		if (!dict_table_is_locking_disabled(cursor->index->table)) {
+			lock_update_split_left(right_block, left_block);
+		}
 	} else {
 		/*		fputs("Split right\n", stderr); */
 
@@ -3377,8 +3014,13 @@ insert_empty:
 						   cursor->index, mtr);
 
 			/* Update the lock table and possible hash index. */
+			if (!dict_table_is_locking_disabled(
+				cursor->index->table)) {
+				lock_move_rec_list_end(
+					new_block, block, move_limit);
+			}
 
-			lock_move_rec_list_end(new_block, block, move_limit);
+			ut_ad(!dict_index_is_spatial(index));
 
 			btr_search_move_or_delete_hash_entries(
 				new_block, block, cursor->index);
@@ -3394,7 +3036,9 @@ insert_empty:
 		left_block = block;
 		right_block = new_block;
 
-		lock_update_split_right(right_block, left_block);
+		if (!dict_table_is_locking_disabled(cursor->index->table)) {
+			lock_update_split_right(right_block, left_block);
+		}
 	}
 
 #ifdef UNIV_ZIP_DEBUG
@@ -3424,8 +3068,7 @@ insert_empty:
 	/* 7. Reposition the cursor for insert and try insertion */
 	page_cursor = btr_cur_get_page_cur(cursor);
 
-	page_cur_search(insert_block, cursor->index, tuple,
-			PAGE_CUR_LE, page_cursor);
+	page_cur_search(insert_block, cursor->index, tuple, page_cursor);
 
 	rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index,
 				    offsets, heap, n_ext, mtr);
@@ -3466,14 +3109,13 @@ insert_empty:
 		/* The insert did not fit on the page: loop back to the
 		start of the function for a new split */
 insert_failed:
-		/* We play safe and reset the free bits */
-		if (!dict_index_is_clust(cursor->index)) {
+		/* We play safe and reset the free bits for new_page */
+		if (!dict_index_is_clust(cursor->index)
+		    && !dict_table_is_temporary(cursor->index->table)) {
 			ibuf_reset_free_bits(new_block);
 			ibuf_reset_free_bits(block);
 		}
 
-		/* fprintf(stderr, "Split second round %lu\n",
-		page_get_page_no(page)); */
 		n_iterations++;
 		ut_ad(n_iterations < 2
 		      || buf_block_get_page_zip(insert_block));
@@ -3486,17 +3128,14 @@ func_exit:
 	/* Insert fit on the page: update the free bits for the
 	left and right pages in the same mtr */
 
-	if (!dict_index_is_clust(cursor->index) && page_is_leaf(page)) {
+	if (!dict_index_is_clust(cursor->index)
+	    && !dict_table_is_temporary(cursor->index->table)
+	    && page_is_leaf(page)) {
+
 		ibuf_update_free_bits_for_two_pages_low(
-			buf_block_get_zip_size(left_block),
 			left_block, right_block, mtr);
 	}
 
-#if 0
-	fprintf(stderr, "Split and insert done %lu %lu\n",
-		buf_block_get_page_no(left_block),
-		buf_block_get_page_no(right_block));
-#endif
 	MONITOR_INC(MONITOR_INDEX_SPLIT);
 
 	ut_ad(page_validate(buf_block_get_frame(left_block), cursor->index));
@@ -3509,37 +3148,36 @@ func_exit:
 	return(rec);
 }
 
-/*************************************************************//**
-Removes a page from the level list of pages. */
-UNIV_INTERN
+/** Removes a page from the level list of pages.
+@param[in]	space		space where removed
+@param[in]	page_size	page size
+@param[in,out]	page		page to remove
+@param[in]	index		index tree
+@param[in,out]	mtr		mini-transaction */
 void
 btr_level_list_remove_func(
-/*=======================*/
-	ulint			space,	/*!< in: space where removed */
-	ulint			zip_size,/*!< in: compressed page size in bytes
-					or 0 for uncompressed pages */
-	page_t*			page,	/*!< in/out: page to remove */
-	dict_index_t*		index,	/*!< in: index tree */
-	mtr_t*			mtr)	/*!< in/out: mini-transaction */
+	ulint			space,
+	const page_size_t&	page_size,
+	page_t*			page,
+	dict_index_t*		index,
+	mtr_t*			mtr)
 {
-	ulint	prev_page_no;
-	ulint	next_page_no;
-
 	ut_ad(page != NULL);
 	ut_ad(mtr != NULL);
-	ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_is_page_fix(mtr, page, MTR_MEMO_PAGE_X_FIX, index->table));
 	ut_ad(space == page_get_space_id(page));
 	/* Get the previous and next page numbers of page */
 
-	prev_page_no = btr_page_get_prev(page, mtr);
-	next_page_no = btr_page_get_next(page, mtr);
+	const ulint	prev_page_no = btr_page_get_prev(page, mtr);
+	const ulint	next_page_no = btr_page_get_next(page, mtr);
 
 	/* Update page links of the level */
 
 	if (prev_page_no != FIL_NULL) {
 		buf_block_t*	prev_block
-			= btr_block_get(space, zip_size, prev_page_no,
-					RW_X_LATCH, index, mtr);
+			= btr_block_get(page_id_t(space, prev_page_no),
+					page_size, RW_X_LATCH, index, mtr);
+
 		page_t*		prev_page
 			= buf_block_get_frame(prev_block);
 #ifdef UNIV_BTR_DEBUG
@@ -3555,8 +3193,10 @@ btr_level_list_remove_func(
 
 	if (next_page_no != FIL_NULL) {
 		buf_block_t*	next_block
-			= btr_block_get(space, zip_size, next_page_no,
-					RW_X_LATCH, index, mtr);
+			= btr_block_get(
+				page_id_t(space, next_page_no), page_size,
+				RW_X_LATCH, index, mtr);
+
 		page_t*		next_page
 			= buf_block_get_frame(next_block);
 #ifdef UNIV_BTR_DEBUG
@@ -3578,9 +3218,10 @@ UNIV_INLINE
 void
 btr_set_min_rec_mark_log(
 /*=====================*/
-	rec_t*	rec,	/*!< in: record */
-	byte	type,	/*!< in: MLOG_COMP_REC_MIN_MARK or MLOG_REC_MIN_MARK */
-	mtr_t*	mtr)	/*!< in: mtr */
+	rec_t*		rec,	/*!< in: record */
+	mlog_id_t	type,	/*!< in: MLOG_COMP_REC_MIN_MARK or
+				MLOG_REC_MIN_MARK */
+	mtr_t*		mtr)	/*!< in: mtr */
 {
 	mlog_write_initial_log_record(rec, type, mtr);
 
@@ -3594,8 +3235,7 @@ btr_set_min_rec_mark_log(
 /****************************************************************//**
 Parses the redo log record for setting an index record as the predefined
 minimum record.
-@return	end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
 byte*
 btr_parse_set_min_rec_mark(
 /*=======================*/
@@ -3625,7 +3265,6 @@ btr_parse_set_min_rec_mark(
 
 /****************************************************************//**
 Sets a record as the predefined minimum record. */
-UNIV_INTERN
 void
 btr_set_min_rec_mark(
 /*=================*/
@@ -3652,7 +3291,6 @@ btr_set_min_rec_mark(
 #ifndef UNIV_HOTBACKUP
 /*************************************************************//**
 Deletes on the upper level the node pointer to a page. */
-UNIV_INTERN
 void
 btr_node_ptr_delete(
 /*================*/
@@ -3664,13 +3302,13 @@ btr_node_ptr_delete(
 	ibool		compressed;
 	dberr_t		err;
 
-	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
 
 	/* Delete node pointer on father page */
 	btr_page_get_father(index, block, mtr, &cursor);
 
 	compressed = btr_cur_pessimistic_delete(&err, TRUE, &cursor,
-						BTR_CREATE_FLAG, RB_NONE, mtr);
+						BTR_CREATE_FLAG, false, mtr);
 	ut_a(err == DB_SUCCESS);
 
 	if (!compressed) {
@@ -3707,7 +3345,7 @@ btr_lift_page_up(
 
 	ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL);
 	ut_ad(btr_page_get_next(page, mtr) == FIL_NULL);
-	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
 
 	page_level = btr_page_get_level(page, mtr);
 	root_page_no = dict_index_get_page(index);
@@ -3720,8 +3358,15 @@ btr_lift_page_up(
 			* (REC_OFFS_HEADER_SIZE + 1 + 1 + index->n_fields));
 		buf_block_t*	b;
 
-		offsets = btr_page_get_father_block(offsets, heap, index,
-						    block, mtr, &cursor);
+		if (dict_index_is_spatial(index)) {
+			offsets = rtr_page_get_father_block(
+				NULL, heap, index, block, mtr,
+				NULL, &cursor);
+		} else {
+			offsets = btr_page_get_father_block(offsets, heap,
+							    index, block,
+							    mtr, &cursor);
+		}
 		father_block = btr_cur_get_block(&cursor);
 		father_page_zip = buf_block_get_page_zip(father_block);
 		father_page = buf_block_get_frame(father_block);
@@ -3734,12 +3379,20 @@ btr_lift_page_up(
 		the first level, the tree is in an inconsistent state
 		and can not be searched. */
 		for (b = father_block;
-		     buf_block_get_page_no(b) != root_page_no; ) {
+		     b->page.id.page_no() != root_page_no; ) {
 			ut_a(n_blocks < BTR_MAX_LEVELS);
 
-			offsets = btr_page_get_father_block(offsets, heap,
-							    index, b,
-							    mtr, &cursor);
+			if (dict_index_is_spatial(index)) {
+				offsets = rtr_page_get_father_block(
+					NULL, heap, index, b, mtr,
+					NULL, &cursor);
+			} else {
+				offsets = btr_page_get_father_block(offsets,
+								    heap,
+								    index, b,
+								    mtr,
+								    &cursor);
+			}
 
 			blocks[n_blocks++] = b = btr_cur_get_block(&cursor);
 		}
@@ -3760,7 +3413,8 @@ btr_lift_page_up(
 
 			ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL);
 			ut_ad(btr_page_get_next(page, mtr) == FIL_NULL);
-			ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+			ut_ad(mtr_is_block_fix(
+				mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
 
 			father_block = blocks[0];
 			father_page_zip = buf_block_get_page_zip(father_block);
@@ -3795,15 +3449,30 @@ btr_lift_page_up(
 
 		/* Update the lock table and possible hash index. */
 
-		lock_move_rec_list_end(father_block, block,
-				       page_get_infimum_rec(page));
+		if (!dict_table_is_locking_disabled(index->table)) {
+			lock_move_rec_list_end(father_block, block,
+					       page_get_infimum_rec(page));
+		}
+
+		/* Also update the predicate locks */
+		if (dict_index_is_spatial(index)) {
+			lock_prdt_rec_move(father_block, block);
+		}
 
 		btr_search_move_or_delete_hash_entries(father_block, block,
 						       index);
 	}
 
-	btr_blob_dbg_remove(page, index, "btr_lift_page_up");
-	lock_update_copy_and_discard(father_block, block);
+	if (!dict_table_is_locking_disabled(index->table)) {
+		/* Free predicate page locks on the block */
+		if (dict_index_is_spatial(index)) {
+			lock_mutex_enter();
+			lock_prdt_page_free_from_discard(
+				block, lock_sys->prdt_page_hash);
+			lock_mutex_exit();
+		}
+		lock_update_copy_and_discard(father_block, block);
+	}
 
 	/* Go upward to root page, decrementing levels by one. */
 	for (i = lift_father_up ? 1 : 0; i < n_blocks; i++, page_level++) {
@@ -3818,11 +3487,16 @@ btr_lift_page_up(
 #endif /* UNIV_ZIP_DEBUG */
 	}
 
+	if (dict_index_is_spatial(index)) {
+		rtr_check_discard_page(index, NULL, block);
+	}
+
 	/* Free the file page */
 	btr_page_free(index, block, mtr);
 
 	/* We play it safe and reset the free bits for the father */
-	if (!dict_index_is_clust(index)) {
+	if (!dict_index_is_clust(index)
+	    && !dict_table_is_temporary(index->table)) {
 		ibuf_reset_free_bits(father_block);
 	}
 	ut_ad(page_validate(father_page, index));
@@ -3840,8 +3514,7 @@ level lifts the records of the page to the father page, thus reducing the
 tree height. It is assumed that mtr holds an x-latch on the tree and on the
 page. If cursor is on the leaf level, mtr must also hold x-latches to the
 brothers, if they exist.
-@return	TRUE on success */
-UNIV_INTERN
+@return TRUE on success */
 ibool
 btr_compress(
 /*=========*/
@@ -3855,7 +3528,6 @@ btr_compress(
 {
 	dict_index_t*	index;
 	ulint		space;
-	ulint		zip_size;
 	ulint		left_page_no;
 	ulint		right_page_no;
 	buf_block_t*	merge_block;
@@ -3868,6 +3540,10 @@ btr_compress(
 	mem_heap_t*	heap;
 	ulint*		offsets;
 	ulint		nth_rec = 0; /* remove bogus warning */
+	bool		mbr_changed = false;
+#ifdef UNIV_DEBUG
+	bool		leftmost_child;
+#endif
 	DBUG_ENTER("btr_compress");
 
 	block = btr_cur_get_block(cursor);
@@ -3876,11 +3552,22 @@ btr_compress(
 
 	btr_assert_not_corrupted(block, index);
 
-	ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
-				MTR_MEMO_X_LOCK));
-	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+#ifdef UNIV_DEBUG
+	if (dict_index_is_spatial(index)) {
+		ut_ad(mtr_memo_contains_flagged(mtr, dict_index_get_lock(index),
+						MTR_MEMO_X_LOCK));
+	} else {
+		ut_ad(mtr_memo_contains_flagged(mtr, dict_index_get_lock(index),
+						MTR_MEMO_X_LOCK
+						| MTR_MEMO_SX_LOCK)
+		      || dict_table_is_intrinsic(index->table));
+	}
+#endif /* UNIV_DEBUG */
+
+	ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
 	space = dict_index_get_space(index);
-	zip_size = dict_table_zip_size(index->table);
+
+	const page_size_t	page_size(dict_table_page_size(index->table));
 
 	MONITOR_INC(MONITOR_INDEX_MERGE_ATTEMPTS);
 
@@ -3896,8 +3583,27 @@ btr_compress(
 #endif /* UNIV_DEBUG */
 
 	heap = mem_heap_create(100);
-	offsets = btr_page_get_father_block(NULL, heap, index, block, mtr,
-					    &father_cursor);
+
+	if (dict_index_is_spatial(index)) {
+		offsets = rtr_page_get_father_block(
+			NULL, heap, index, block, mtr, cursor, &father_cursor);
+		ut_ad(cursor->page_cur.block->page.id.page_no()
+		      == block->page.id.page_no());
+		rec_t*  my_rec = father_cursor.page_cur.rec;
+
+		ulint page_no = btr_node_ptr_get_child_page_no(my_rec, offsets);
+
+		if (page_no != block->page.id.page_no()) {
+			ib::info() << "father positioned on page "
+				<< page_no << "instead of "
+				<< block->page.id.page_no();
+			offsets = btr_page_get_father_block(
+				NULL, heap, index, block, mtr, &father_cursor);
+		}
+	} else {
+		offsets = btr_page_get_father_block(
+			NULL, heap, index, block, mtr, &father_cursor);
+	}
 
 	if (adjust) {
 		nth_rec = page_rec_get_n_recs_before(btr_cur_get_rec(cursor));
@@ -3912,6 +3618,13 @@ btr_compress(
 		goto func_exit;
 	}
 
+	ut_d(leftmost_child =
+		left_page_no != FIL_NULL
+		&& (page_rec_get_next(
+			page_get_infimum_rec(
+				btr_cur_get_page(&father_cursor)))
+		    == btr_cur_get_rec(&father_cursor)));
+
 	/* Decide the page to which we try to merge and which will inherit
 	the locks */
 
@@ -3919,10 +3632,13 @@ btr_compress(
 					  &merge_block, mtr);
 
 	DBUG_EXECUTE_IF("ib_always_merge_right", is_left = FALSE;);
-
-	if(!is_left
+retry:
+	if (!is_left
 	   && !btr_can_merge_with_page(cursor, right_page_no, &merge_block,
 				       mtr)) {
+		if (!merge_block) {
+			merge_page = NULL;
+		}
 		goto err_exit;
 	}
 
@@ -3930,14 +3646,26 @@ btr_compress(
 
 #ifdef UNIV_BTR_DEBUG
 	if (is_left) {
-                ut_a(btr_page_get_next(merge_page, mtr)
-                     == buf_block_get_page_no(block));
+		ut_a(btr_page_get_next(merge_page, mtr)
+		     == block->page.id.page_no());
 	} else {
-               ut_a(btr_page_get_prev(merge_page, mtr)
-                     == buf_block_get_page_no(block));
+		ut_a(btr_page_get_prev(merge_page, mtr)
+		     == block->page.id.page_no());
 	}
 #endif /* UNIV_BTR_DEBUG */
 
+#ifdef UNIV_GIS_DEBUG
+	if (dict_index_is_spatial(index)) {
+		if (is_left) {
+			fprintf(stderr, "GIS_DIAG: merge left  %ld to %ld \n",
+				(long) block->page.id.page_no(), left_page_no);
+		} else {
+			fprintf(stderr, "GIS_DIAG: merge right %ld to %ld\n",
+				(long) block->page.id.page_no(), right_page_no);
+		}
+	}
+#endif /* UNIV_GIS_DEBUG */
+
 	ut_ad(page_validate(merge_page, index));
 
 	merge_page_zip = buf_block_get_page_zip(merge_block);
@@ -3953,6 +3681,38 @@ btr_compress(
 
 	/* Move records to the merge page */
 	if (is_left) {
+		btr_cur_t	cursor2;
+		rtr_mbr_t	new_mbr;
+		ulint*		offsets2 = NULL;
+
+		/* For rtree, we need to update father's mbr. */
+		if (dict_index_is_spatial(index)) {
+			/* We only support merge pages with the same parent
+			page */
+			if (!rtr_check_same_block(
+				index, &cursor2,
+				btr_cur_get_block(&father_cursor),
+				merge_block, heap)) {
+				is_left = false;
+				goto retry;
+			}
+
+			/* Set rtr_info for cursor2, since it is
+			necessary in recursive page merge. */
+			cursor2.rtr_info = cursor->rtr_info;
+			cursor2.tree_height = cursor->tree_height;
+
+			offsets2 = rec_get_offsets(
+				btr_cur_get_rec(&cursor2), index,
+				NULL, ULINT_UNDEFINED, &heap);
+
+			/* Check if parent entry needs to be updated */
+			mbr_changed = rtr_merge_mbr_changed(
+				&cursor2, &father_cursor,
+				offsets2, offsets, &new_mbr,
+				merge_block, block, index);
+		}
+
 		rec_t*	orig_pred = page_copy_rec_list_start(
 			merge_block, block, page_get_supremum_rec(page),
 			index, mtr);
@@ -3964,10 +3724,53 @@ btr_compress(
 		btr_search_drop_page_hash_index(block);
 
 		/* Remove the page from the level list */
-		btr_level_list_remove(space, zip_size, page, index, mtr);
+		btr_level_list_remove(space, page_size, page, index, mtr);
 
-		btr_node_ptr_delete(index, block, mtr);
-		lock_update_merge_left(merge_block, orig_pred, block);
+		if (dict_index_is_spatial(index)) {
+			rec_t*  my_rec = father_cursor.page_cur.rec;
+
+			ulint page_no = btr_node_ptr_get_child_page_no(
+						my_rec, offsets);
+
+			if (page_no != block->page.id.page_no()) {
+
+				ib::fatal() << "father positioned on "
+					<< page_no << " instead of "
+					<< block->page.id.page_no();
+
+				ut_ad(0);
+			}
+
+			if (mbr_changed) {
+#ifdef UNIV_DEBUG
+				bool	success = rtr_update_mbr_field(
+					&cursor2, offsets2, &father_cursor,
+					merge_page, &new_mbr, NULL, mtr);
+
+				ut_ad(success);
+#else
+				rtr_update_mbr_field(
+					&cursor2, offsets2, &father_cursor,
+					merge_page, &new_mbr, NULL, mtr);
+#endif
+			} else {
+				rtr_node_ptr_delete(
+					index, &father_cursor, block, mtr);
+			}
+
+			/* No GAP lock needs to be worrying about */
+			lock_mutex_enter();
+			lock_prdt_page_free_from_discard(
+				block, lock_sys->prdt_page_hash);
+			lock_rec_free_all_from_discard_page(block);
+			lock_mutex_exit();
+		} else {
+			btr_node_ptr_delete(index, block, mtr);
+			if (!dict_table_is_locking_disabled(index->table)) {
+				lock_update_merge_left(
+					merge_block, orig_pred, block);
+			}
+		}
 
 		if (adjust) {
 			nth_rec += page_rec_get_n_recs_before(orig_pred);
@@ -3983,7 +3786,27 @@ btr_compress(
 		byte		fil_page_prev[4];
 #endif /* UNIV_BTR_DEBUG */
 
-		btr_page_get_father(index, merge_block, mtr, &cursor2);
+		if (dict_index_is_spatial(index)) {
+			cursor2.rtr_info = NULL;
+
+			/* For spatial index, we disallow merge of blocks
+			with different parents, since the merge would need
+			to update entry (for MBR and Primary key) in the
+			parent of block being merged */
+			if (!rtr_check_same_block(
+				index, &cursor2,
+				btr_cur_get_block(&father_cursor),
+				merge_block, heap)) {
+				goto err_exit;
+			}
+
+			/* Set rtr_info for cursor2, since it is
+			necessary in recursive page merge. */
+			cursor2.rtr_info = cursor->rtr_info;
+			cursor2.tree_height = cursor->tree_height;
+		} else {
+			btr_page_get_father(index, merge_block, mtr, &cursor2);
+		}
 
 		if (merge_page_zip && left_page_no == FIL_NULL) {
 
@@ -4033,7 +3856,11 @@ btr_compress(
 #endif /* UNIV_BTR_DEBUG */
 
 		/* Remove the page from the level list */
-		btr_level_list_remove(space, zip_size, page, index, mtr);
+		btr_level_list_remove(space, page_size, (page_t*)page, index, mtr);
+
+		ut_ad(btr_node_ptr_get_child_page_no(
+			btr_cur_get_rec(&father_cursor), offsets)
+			== block->page.id.page_no());
 
 		/* Replace the address of the old child node (= page) with the
 		address of the merge page to the right */
@@ -4042,21 +3869,82 @@ btr_compress(
 			btr_cur_get_page_zip(&father_cursor),
 			offsets, right_page_no, mtr);
 
-		compressed = btr_cur_pessimistic_delete(&err, TRUE, &cursor2,
-							BTR_CREATE_FLAG,
-							RB_NONE, mtr);
-		ut_a(err == DB_SUCCESS);
-
-		if (!compressed) {
-			btr_cur_compress_if_useful(&cursor2, FALSE, mtr);
+#ifdef UNIV_DEBUG
+		if (!page_is_leaf(page) && left_page_no == FIL_NULL) {
+			ut_ad(REC_INFO_MIN_REC_FLAG & rec_get_info_bits(
+				page_rec_get_next(page_get_infimum_rec(
+					buf_block_get_frame(merge_block))),
+				page_is_comp(page)));
 		}
+#endif /* UNIV_DEBUG */
 
-		lock_update_merge_right(merge_block, orig_succ, block);
+		/* For rtree, we need to update father's mbr. */
+		if (dict_index_is_spatial(index)) {
+			ulint*	offsets2;
+			ulint	rec_info;
+
+			offsets2 = rec_get_offsets(
+				btr_cur_get_rec(&cursor2),
+				index, NULL, ULINT_UNDEFINED, &heap);
+
+			ut_ad(btr_node_ptr_get_child_page_no(
+				btr_cur_get_rec(&cursor2), offsets2)
+				== right_page_no);
+
+			rec_info = rec_get_info_bits(
+				btr_cur_get_rec(&father_cursor),
+				rec_offs_comp(offsets));
+			if (rec_info & REC_INFO_MIN_REC_FLAG) {
+				/* When the father node ptr is minimal rec,
+				we will keep it and delete the node ptr of
+				merge page. */
+				rtr_merge_and_update_mbr(&father_cursor,
+							 &cursor2,
+							 offsets, offsets2,
+							 merge_page,
+							 merge_block,
+							 block, index, mtr);
+			} else {
+				/* Otherwise, we will keep the node ptr of
+				merge page and delete the father node ptr.
+				This is for keeping the rec order in upper
+				level. */
+				rtr_merge_and_update_mbr(&cursor2,
+							 &father_cursor,
+							 offsets2, offsets,
+							 merge_page,
+							 merge_block,
+							 block, index, mtr);
+			}
+			lock_mutex_enter();
+			lock_prdt_page_free_from_discard(
+				block, lock_sys->prdt_page_hash);
+			lock_rec_free_all_from_discard_page(block);
+			lock_mutex_exit();
+		} else {
+
+			compressed = btr_cur_pessimistic_delete(&err, TRUE,
+								&cursor2,
+								BTR_CREATE_FLAG,
+								false, mtr);
+			ut_a(err == DB_SUCCESS);
+
+			if (!compressed) {
+				btr_cur_compress_if_useful(&cursor2,
+							   FALSE,
+							   mtr);
+			}
+
+			if (!dict_table_is_locking_disabled(index->table)) {
+				lock_update_merge_right(
+					merge_block, orig_succ, block);
+			}
+		}
 	}
 
-	btr_blob_dbg_remove(page, index, "btr_compress");
-
-	if (!dict_index_is_clust(index) && page_is_leaf(merge_page)) {
+	if (!dict_index_is_clust(index)
+	    && !dict_table_is_temporary(index->table)
+	    && page_is_leaf(merge_page)) {
 		/* Update the free bits of the B-tree page in the
 		insert buffer bitmap.  This has to be done in a
 		separate mini-transaction that is committed before the
@@ -4079,7 +3967,7 @@ btr_compress(
 		committed mini-transaction, because in crash recovery,
 		the free bits could momentarily be set too high. */
 
-		if (zip_size) {
+		if (page_size.is_compressed()) {
 			/* Because the free bits may be incremented
 			and we cannot update the insert buffer bitmap
 			in the same mini-transaction, the only safe
@@ -4103,10 +3991,25 @@ btr_compress(
 						  index));
 #endif /* UNIV_ZIP_DEBUG */
 
+	if (dict_index_is_spatial(index)) {
+#ifdef UNIV_GIS_DEBUG
+		fprintf(stderr, "GIS_DIAG: compressed away  %ld\n",
+			(long) block->page.id.page_no());
+		fprintf(stderr, "GIS_DIAG: merged to %ld\n",
+			(long) merge_block->page.id.page_no());
+#endif
+
+		rtr_check_discard_page(index, NULL, block);
+	}
+
 	/* Free the file page */
 	btr_page_free(index, block, mtr);
 
-	ut_ad(btr_check_node_ptr(index, merge_block, mtr));
+	/* btr_check_node_ptr() needs parent block latched.
+	If the merge_block's parent block is not same,
+	we cannot use btr_check_node_ptr() */
+	ut_ad(leftmost_child
+	      || btr_check_node_ptr(index, merge_block, mtr));
 func_exit:
 	mem_heap_free(heap);
 
@@ -4124,10 +4027,11 @@ func_exit:
 
 err_exit:
 	/* We play it safe and reset the free bits. */
-	if (zip_size
+	if (page_size.is_compressed()
 	    && merge_page
 	    && page_is_leaf(merge_page)
 	    && !dict_index_is_clust(index)) {
+
 		ibuf_reset_free_bits(merge_block);
 	}
 
@@ -4154,7 +4058,7 @@ btr_discard_only_page_on_level(
 	/* Save the PAGE_MAX_TRX_ID from the leaf page. */
 	max_trx_id = page_get_max_trx_id(buf_block_get_frame(block));
 
-	while (buf_block_get_page_no(block) != dict_index_get_page(index)) {
+	while (block->page.id.page_no() != dict_index_get_page(index)) {
 		btr_cur_t	cursor;
 		buf_block_t*	father;
 		const page_t*	page	= buf_block_get_frame(block);
@@ -4164,13 +4068,23 @@ btr_discard_only_page_on_level(
 		ut_a(btr_page_get_prev(page, mtr) == FIL_NULL);
 		ut_a(btr_page_get_next(page, mtr) == FIL_NULL);
 
-		ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+		ut_ad(mtr_is_block_fix(
+			mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
 		btr_search_drop_page_hash_index(block);
 
-		btr_page_get_father(index, block, mtr, &cursor);
+		if (dict_index_is_spatial(index)) {
+			/* Check any concurrent search having this page */
+			rtr_check_discard_page(index, NULL, block);
+			rtr_page_get_father(index, block, mtr, NULL, &cursor);
+		} else {
+			btr_page_get_father(index, block, mtr, &cursor);
+		}
 		father = btr_cur_get_block(&cursor);
 
-		lock_update_discard(father, PAGE_HEAP_NO_SUPREMUM, block);
+		if (!dict_table_is_locking_disabled(index->table)) {
+			lock_update_discard(
+				father, PAGE_HEAP_NO_SUPREMUM, block);
+		}
 
 		/* Free the file page */
 		btr_page_free(index, block, mtr);
@@ -4196,7 +4110,8 @@ btr_discard_only_page_on_level(
 	btr_page_empty(block, buf_block_get_page_zip(block), index, 0, mtr);
 	ut_ad(page_is_leaf(buf_block_get_frame(block)));
 
-	if (!dict_index_is_clust(index)) {
+	if (!dict_index_is_clust(index)
+	    && !dict_table_is_temporary(index->table)) {
 		/* We play it safe and reset the free bits for the root */
 		ibuf_reset_free_bits(block);
 
@@ -4211,7 +4126,6 @@ btr_discard_only_page_on_level(
 Discards a page from a B-tree. This is used to remove the last record from
 a B-tree page: the whole page must be removed at the same time. This cannot
 be used for the root page, which is allowed to be empty. */
-UNIV_INTERN
 void
 btr_discard_page(
 /*=============*/
@@ -4220,8 +4134,6 @@ btr_discard_page(
 	mtr_t*		mtr)	/*!< in: mtr */
 {
 	dict_index_t*	index;
-	ulint		space;
-	ulint		zip_size;
 	ulint		left_page_no;
 	ulint		right_page_no;
 	buf_block_t*	merge_block;
@@ -4229,40 +4141,69 @@ btr_discard_page(
 	buf_block_t*	block;
 	page_t*		page;
 	rec_t*		node_ptr;
+#ifdef UNIV_DEBUG
+	btr_cur_t	parent_cursor;
+	bool		parent_is_different = false;
+#endif
 
 	block = btr_cur_get_block(cursor);
 	index = btr_cur_get_index(cursor);
 
-	ut_ad(dict_index_get_page(index) != buf_block_get_page_no(block));
-	ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
-				MTR_MEMO_X_LOCK));
-	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
-	space = dict_index_get_space(index);
-	zip_size = dict_table_zip_size(index->table);
+	ut_ad(dict_index_get_page(index) != block->page.id.page_no());
+
+	ut_ad(mtr_memo_contains_flagged(mtr, dict_index_get_lock(index),
+					MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK)
+	      || dict_table_is_intrinsic(index->table));
+
+	ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
+
+	const ulint	space = dict_index_get_space(index);
 
 	MONITOR_INC(MONITOR_INDEX_DISCARD);
 
+#ifdef UNIV_DEBUG
+	if (dict_index_is_spatial(index)) {
+		rtr_page_get_father(index, block, mtr, cursor, &parent_cursor);
+	} else {
+		btr_page_get_father(index, block, mtr, &parent_cursor);
+	}
+#endif
+
 	/* Decide the page which will inherit the locks */
 
 	left_page_no = btr_page_get_prev(buf_block_get_frame(block), mtr);
 	right_page_no = btr_page_get_next(buf_block_get_frame(block), mtr);
 
+	const page_size_t	page_size(dict_table_page_size(index->table));
+
 	if (left_page_no != FIL_NULL) {
-		merge_block = btr_block_get(space, zip_size, left_page_no,
-					    RW_X_LATCH, index, mtr);
+		merge_block = btr_block_get(
+			page_id_t(space, left_page_no), page_size,
+			RW_X_LATCH, index, mtr);
+
 		merge_page = buf_block_get_frame(merge_block);
 #ifdef UNIV_BTR_DEBUG
 		ut_a(btr_page_get_next(merge_page, mtr)
-		     == buf_block_get_page_no(block));
+		     == block->page.id.page_no());
 #endif /* UNIV_BTR_DEBUG */
+		ut_d(parent_is_different =
+			(page_rec_get_next(
+				page_get_infimum_rec(
+					btr_cur_get_page(
+						&parent_cursor)))
+			 == btr_cur_get_rec(&parent_cursor)));
 	} else if (right_page_no != FIL_NULL) {
-		merge_block = btr_block_get(space, zip_size, right_page_no,
-					    RW_X_LATCH, index, mtr);
+		merge_block = btr_block_get(
+			page_id_t(space, right_page_no), page_size,
+			RW_X_LATCH, index, mtr);
+
 		merge_page = buf_block_get_frame(merge_block);
 #ifdef UNIV_BTR_DEBUG
 		ut_a(btr_page_get_prev(merge_page, mtr)
-		     == buf_block_get_page_no(block));
+		     == block->page.id.page_no());
 #endif /* UNIV_BTR_DEBUG */
+		ut_d(parent_is_different = page_rec_is_supremum(
+			page_rec_get_next(btr_cur_get_rec(&parent_cursor))));
 	} else {
 		btr_discard_only_page_on_level(index, block, mtr);
 
@@ -4289,10 +4230,21 @@ btr_discard_page(
 		btr_set_min_rec_mark(node_ptr, mtr);
 	}
 
-	btr_node_ptr_delete(index, block, mtr);
+	if (dict_index_is_spatial(index)) {
+		btr_cur_t	father_cursor;
+
+		/* Since rtr_node_ptr_delete doesn't contain get father
+		node ptr, so, we need to get father node ptr first and then
+		delete it. */
+		rtr_page_get_father(index, block, mtr, cursor, &father_cursor);
+		rtr_node_ptr_delete(index, &father_cursor, block, mtr);
+	} else {
+		btr_node_ptr_delete(index, block, mtr);
+	}
 
 	/* Remove the page from the level list */
-	btr_level_list_remove(space, zip_size, page, index, mtr);
+	btr_level_list_remove(space, page_size, page, index, mtr);
+
 #ifdef UNIV_ZIP_DEBUG
 	{
 		page_zip_des_t*	merge_page_zip
@@ -4302,27 +4254,34 @@ btr_discard_page(
 	}
 #endif /* UNIV_ZIP_DEBUG */
 
-	if (left_page_no != FIL_NULL) {
-		lock_update_discard(merge_block, PAGE_HEAP_NO_SUPREMUM,
-				    block);
-	} else {
-		lock_update_discard(merge_block,
-				    lock_get_min_heap_no(merge_block),
-				    block);
+	if (!dict_table_is_locking_disabled(index->table)) {
+		if (left_page_no != FIL_NULL) {
+			lock_update_discard(merge_block, PAGE_HEAP_NO_SUPREMUM,
+					    block);
+		} else {
+			lock_update_discard(merge_block,
+					    lock_get_min_heap_no(merge_block),
+					    block);
+		}
 	}
 
-	btr_blob_dbg_remove(page, index, "btr_discard_page");
+	if (dict_index_is_spatial(index)) {
+		rtr_check_discard_page(index, cursor, block);
+	}
 
 	/* Free the file page */
 	btr_page_free(index, block, mtr);
 
-	ut_ad(btr_check_node_ptr(index, merge_block, mtr));
+	/* btr_check_node_ptr() needs parent block latched.
+	If the merge_block's parent block is not same,
+	we cannot use btr_check_node_ptr() */
+	ut_ad(parent_is_different
+	      || btr_check_node_ptr(index, merge_block, mtr));
 }
 
 #ifdef UNIV_BTR_PRINT
 /*************************************************************//**
 Prints size info of a B-tree. */
-UNIV_INTERN
 void
 btr_print_size(
 /*===========*/
@@ -4348,7 +4307,7 @@ btr_print_size(
 	fputs("INFO OF THE NON-LEAF PAGE SEGMENT\n", stderr);
 	fseg_print(seg, &mtr);
 
-	if (!dict_index_is_univ(index)) {
+	if (!dict_index_is_ibuf(index)) {
 
 		seg = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
 
@@ -4379,10 +4338,10 @@ btr_print_recursive(
 	ulint		i	= 0;
 	mtr_t		mtr2;
 
-	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
-	fprintf(stderr, "NODE ON LEVEL %lu page number %lu\n",
-		(ulong) btr_page_get_level(page, mtr),
-		(ulong) buf_block_get_page_no(block));
+	ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_SX_FIX, index->table));
+
+	ib::info() << "NODE ON LEVEL " << btr_page_get_level(page, mtr)
+		<< " page " << block->page.id;
 
 	page_print(block, index, width, width);
 
@@ -4423,7 +4382,6 @@ btr_print_recursive(
 
 /**************************************************************//**
 Prints directories and other info of all nodes in the tree. */
-UNIV_INTERN
 void
 btr_print_index(
 /*============*/
@@ -4443,7 +4401,7 @@ btr_print_index(
 
 	mtr_start(&mtr);
 
-	root = btr_root_block_get(index, RW_X_LATCH, &mtr);
+	root = btr_root_block_get(index, RW_SX_LATCH, &mtr);
 
 	btr_print_recursive(index, root, width, &heap, &offsets, &mtr);
 	if (heap) {
@@ -4452,15 +4410,14 @@ btr_print_index(
 
 	mtr_commit(&mtr);
 
-	btr_validate_index(index, 0);
+	ut_ad(btr_validate_index(index, 0, false));
 }
 #endif /* UNIV_BTR_PRINT */
 
 #ifdef UNIV_DEBUG
 /************************************************************//**
 Checks that the node pointer to a page is appropriate.
-@return	TRUE */
-UNIV_INTERN
+@return TRUE */
 ibool
 btr_check_node_ptr(
 /*===============*/
@@ -4474,15 +4431,22 @@ btr_check_node_ptr(
 	btr_cur_t	cursor;
 	page_t*		page = buf_block_get_frame(block);
 
-	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
-	if (dict_index_get_page(index) == buf_block_get_page_no(block)) {
+	ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
+
+	if (dict_index_get_page(index) == block->page.id.page_no()) {
 
 		return(TRUE);
 	}
 
 	heap = mem_heap_create(256);
-	offsets = btr_page_get_father_block(NULL, heap, index, block, mtr,
-					    &cursor);
+
+	if (dict_index_is_spatial(index)) {
+		offsets = rtr_page_get_father_block(NULL, heap, index, block, mtr,
+						    NULL, &cursor);
+	} else {
+		offsets = btr_page_get_father_block(NULL, heap, index, block, mtr,
+						    &cursor);
+	}
 
 	if (page_is_leaf(page)) {
 
@@ -4493,7 +4457,16 @@ btr_check_node_ptr(
 		index, page_rec_get_next(page_get_infimum_rec(page)), 0, heap,
 		btr_page_get_level(page, mtr));
 
-	ut_a(!cmp_dtuple_rec(tuple, btr_cur_get_rec(&cursor), offsets));
+	/* For spatial index, the MBR in the parent rec could be different
+	with that of first rec of child, their relationship should be
+	"WITHIN" relationship */
+	if (dict_index_is_spatial(index)) {
+		ut_a(!cmp_dtuple_rec_with_gis(
+			tuple, btr_cur_get_rec(&cursor),
+			offsets, PAGE_CUR_WITHIN));
+	} else {
+		ut_a(!cmp_dtuple_rec(tuple, btr_cur_get_rec(&cursor), offsets));
+	}
 func_exit:
 	mem_heap_free(heap);
 
@@ -4511,17 +4484,17 @@ btr_index_rec_validate_report(
 	const rec_t*		rec,	/*!< in: index record */
 	const dict_index_t*	index)	/*!< in: index */
 {
-	fputs("InnoDB: Record in ", stderr);
-	dict_index_name_print(stderr, NULL, index);
-	fprintf(stderr, ", page %lu, at offset %lu\n",
-		page_get_page_no(page), (ulint) page_offset(rec));
+	ib::info() << "Record in index " << index->name
+		<< " of table " << index->table->name
+		<< ", page " << page_id_t(page_get_space_id(page),
+					  page_get_page_no(page))
+		<< ", at offset " << page_offset(rec);
 }
 
 /************************************************************//**
 Checks the size and number of fields in a record based on the definition of
 the index.
-@return	TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
 ibool
 btr_index_rec_validate(
 /*===================*/
@@ -4542,7 +4515,7 @@ btr_index_rec_validate(
 
 	page = page_align(rec);
 
-	if (dict_index_is_univ(index)) {
+	if (dict_index_is_ibuf(index)) {
 		/* The insert buffer index tree can contain records from any
 		other index: we cannot check the number of fields or
 		their length */
@@ -4550,25 +4523,34 @@ btr_index_rec_validate(
 		return(TRUE);
 	}
 
+#ifdef VIRTUAL_INDEX_DEBUG
+	if (dict_index_has_virtual(index)) {
+		fprintf(stderr, "index name is %s\n", index->name());
+	}
+#endif
 	if ((ibool)!!page_is_comp(page) != dict_table_is_comp(index->table)) {
 		btr_index_rec_validate_report(page, rec, index);
-		fprintf(stderr, "InnoDB: compact flag=%lu, should be %lu\n",
-			(ulong) !!page_is_comp(page),
-			(ulong) dict_table_is_comp(index->table));
+
+		ib::error() << "Compact flag=" << !!page_is_comp(page)
+			<< ", should be " << dict_table_is_comp(index->table);
 
 		return(FALSE);
 	}
 
 	n = dict_index_get_n_fields(index);
 
-	if (!page_is_comp(page) && rec_get_n_fields_old(rec) != n) {
+	if (!page_is_comp(page)
+	    && (rec_get_n_fields_old(rec) != n
+		/* a record for older SYS_INDEXES table
+		(missing merge_threshold column) is acceptable. */
+		&& !(index->id == DICT_INDEXES_ID
+		     && rec_get_n_fields_old(rec) == n - 1))) {
 		btr_index_rec_validate_report(page, rec, index);
-		fprintf(stderr, "InnoDB: has %lu fields, should have %lu\n",
-			(ulong) rec_get_n_fields_old(rec), (ulong) n);
+
+		ib::error() << "Has " << rec_get_n_fields_old(rec)
+			<< " fields, should have " << n;
 
 		if (dump_on_error) {
-			buf_page_print(page, 0, BUF_PAGE_PRINT_NO_CRASH);
-
 			fputs("InnoDB: corrupt record ", stderr);
 			rec_print_old(stderr, rec);
 			putc('\n', stderr);
@@ -4579,38 +4561,58 @@ btr_index_rec_validate(
 	offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
 
 	for (i = 0; i < n; i++) {
-		ulint	fixed_size = dict_col_get_fixed_size(
-			dict_index_get_nth_col(index, i), page_is_comp(page));
+		dict_field_t*	field = dict_index_get_nth_field(index, i);
+		ulint		fixed_size = dict_col_get_fixed_size(
+						dict_field_get_col(field),
+						page_is_comp(page));
 
 		rec_get_nth_field_offs(offsets, i, &len);
 
 		/* Note that if fixed_size != 0, it equals the
-		length of a fixed-size column in the clustered index.
+		length of a fixed-size column in the clustered index,
+		except the DATA_POINT, whose length would be MBR_LEN
+		when it's indexed in a R-TREE. We should adjust it here.
 		A prefix index of the column is of fixed, but different
 		length.  When fixed_size == 0, prefix_len is the maximum
 		length of the prefix index column. */
 
-		if ((dict_index_get_nth_field(index, i)->prefix_len == 0
+		if (dict_field_get_col(field)->mtype == DATA_POINT) {
+			ut_ad(fixed_size == DATA_POINT_LEN);
+			if (dict_index_is_spatial(index)) {
+				/* For DATA_POINT data, when it has R-tree
+				index, the fixed_len is the MBR of the point.
+				But if it's a primary key and on R-TREE
+				as the PK pointer, the length shall be
+				DATA_POINT_LEN as well. */
+				ut_ad((field->fixed_len == DATA_MBR_LEN
+				       && i == 0)
+				      || (field->fixed_len == DATA_POINT_LEN
+					  && i != 0));
+				fixed_size = field->fixed_len;
+			}
+		}
+
+		if ((field->prefix_len == 0
 		     && len != UNIV_SQL_NULL && fixed_size
 		     && len != fixed_size)
-		    || (dict_index_get_nth_field(index, i)->prefix_len > 0
+		    || (field->prefix_len > 0
 			&& len != UNIV_SQL_NULL
 			&& len
-			> dict_index_get_nth_field(index, i)->prefix_len)) {
+			> field->prefix_len)) {
 
 			btr_index_rec_validate_report(page, rec, index);
-			fprintf(stderr,
-				"InnoDB: field %lu len is %lu,"
-				" should be %lu\n",
-				(ulong) i, (ulong) len, (ulong) fixed_size);
+
+			ib::error	error;
+
+			error << "Field " << i << " len is " << len
+				<< ", should be " << fixed_size;
 
 			if (dump_on_error) {
-				buf_page_print(page, 0,
-					       BUF_PAGE_PRINT_NO_CRASH);
-
-				fputs("InnoDB: corrupt record ", stderr);
-				rec_print_new(stderr, rec, offsets);
-				putc('\n', stderr);
+				error << "; ";
+				rec_print(error.m_oss, rec,
+					  rec_get_info_bits(
+						  rec, rec_offs_comp(offsets)),
+					  offsets);
 			}
 			if (heap) {
 				mem_heap_free(heap);
@@ -4619,6 +4621,12 @@ btr_index_rec_validate(
 		}
 	}
 
+#ifdef VIRTUAL_INDEX_DEBUG
+	if (dict_index_has_virtual(index)) {
+		rec_print_new(stderr, rec, offsets);
+	}
+#endif
+
 	if (heap) {
 		mem_heap_free(heap);
 	}
@@ -4628,7 +4636,7 @@ btr_index_rec_validate(
 /************************************************************//**
 Checks the size and number of fields in records based on the definition of
 the index.
-@return	TRUE if ok */
+@return TRUE if ok */
 static
 ibool
 btr_index_page_validate(
@@ -4693,13 +4701,14 @@ btr_validate_report1(
 	ulint			level,	/*!< in: B-tree level */
 	const buf_block_t*	block)	/*!< in: index page */
 {
-	fprintf(stderr, "InnoDB: Error in page %lu of ",
-		buf_block_get_page_no(block));
-	dict_index_name_print(stderr, NULL, index);
-	if (level) {
-		fprintf(stderr, ", index tree level %lu", level);
+	ib::error	error;
+	error << "In page " << block->page.id.page_no()
+		<< " of index " << index->name
+		<< " of table " << index->table->name;
+
+	if (level > 0) {
+		error << ", index tree level " << level;
 	}
-	putc('\n', stderr);
 }
 
 /************************************************************//**
@@ -4713,30 +4722,28 @@ btr_validate_report2(
 	const buf_block_t*	block1,	/*!< in: first index page */
 	const buf_block_t*	block2)	/*!< in: second index page */
 {
-	fprintf(stderr, "InnoDB: Error in pages %lu and %lu of ",
-		buf_block_get_page_no(block1),
-		buf_block_get_page_no(block2));
-	dict_index_name_print(stderr, NULL, index);
-	if (level) {
-		fprintf(stderr, ", index tree level %lu", level);
+	ib::error	error;
+	error << "In pages " << block1->page.id
+		<< " and " << block2->page.id << " of index " << index->name
+		<< " of table " << index->table->name;
+
+	if (level > 0) {
+		error << ", index tree level " << level;
 	}
-	putc('\n', stderr);
 }
 
 /************************************************************//**
 Validates index tree level.
-@return	TRUE if ok */
+@return TRUE if ok */
 static
 bool
 btr_validate_level(
 /*===============*/
 	dict_index_t*	index,	/*!< in: index tree */
 	const trx_t*	trx,	/*!< in: transaction or NULL */
-	ulint		level)	/*!< in: level number */
+	ulint		level,	/*!< in: level number */
+	bool		lockout)/*!< in: true if X-latch index is intended */
 {
-	ulint		space;
-	ulint		space_flags;
-	ulint		zip_size;
 	buf_block_t*	block;
 	page_t*		page;
 	buf_block_t*	right_block = 0; /* remove warning */
@@ -4758,25 +4765,42 @@ btr_validate_level(
 #ifdef UNIV_ZIP_DEBUG
 	page_zip_des_t*	page_zip;
 #endif /* UNIV_ZIP_DEBUG */
+	ulint		savepoint = 0;
+	ulint		savepoint2 = 0;
+	ulint		parent_page_no = FIL_NULL;
+	ulint		parent_right_page_no = FIL_NULL;
+	bool		rightmost_child = false;
 
 	mtr_start(&mtr);
 
-	mtr_x_lock(dict_index_get_lock(index), &mtr);
+	if (!srv_read_only_mode) {
+		if (lockout) {
+			mtr_x_lock(dict_index_get_lock(index), &mtr);
+		} else {
+			mtr_sx_lock(dict_index_get_lock(index), &mtr);
+		}
+	}
 
-	block = btr_root_block_get(index, RW_X_LATCH, &mtr);
+	block = btr_root_block_get(index, RW_SX_LATCH, &mtr);
 	page = buf_block_get_frame(block);
 	seg = page + PAGE_HEADER + PAGE_BTR_SEG_TOP;
 
-	space = dict_index_get_space(index);
-	zip_size = dict_table_zip_size(index->table);
+#ifdef UNIV_DEBUG
+	if (dict_index_is_spatial(index)) {
+		fprintf(stderr, "Root page no: %lu\n",
+			(ulong) page_get_page_no(page));
+	}
+#endif
 
-	fil_space_get_latch(space, &space_flags);
+	const fil_space_t*	space	= fil_space_get(index->space);
+	const page_size_t	table_page_size(
+		dict_table_page_size(index->table));
+	const page_size_t	space_page_size(space->flags);
 
-	if (zip_size != dict_tf_get_zip_size(space_flags)) {
+	if (!table_page_size.equals_to(space_page_size)) {
 
-		ib_logf(IB_LOG_LEVEL_WARN,
-			"Flags mismatch: table=%lu, tablespace=%lu",
-			(ulint) index->table->flags, (ulint) space_flags);
+		ib::warn() << "Flags mismatch: table=" << index->table->flags
+			<< ", tablespace=" << space->flags;
 
 		mtr_commit(&mtr);
 
@@ -4787,17 +4811,18 @@ btr_validate_level(
 		const rec_t*	node_ptr;
 
 		if (fseg_page_is_free(seg,
-				      block->page.space, block->page.offset)) {
+				      block->page.id.space(),
+				      block->page.id.page_no())) {
 
 			btr_validate_report1(index, level, block);
 
-			ib_logf(IB_LOG_LEVEL_WARN, "page is free");
+			ib::warn() << "Page is free";
 
 			ret = false;
 		}
 
-		ut_a(space == buf_block_get_space(block));
-		ut_a(space == page_get_space_id(page));
+		ut_a(index->space == block->page.id.space());
+		ut_a(index->space == page_get_space_id(page));
 #ifdef UNIV_ZIP_DEBUG
 		page_zip = buf_block_get_page_zip(block);
 		ut_a(!page_zip || page_zip_validate(page_zip, page, index));
@@ -4810,8 +4835,38 @@ btr_validate_level(
 		node_ptr = page_cur_get_rec(&cursor);
 		offsets = rec_get_offsets(node_ptr, index, offsets,
 					  ULINT_UNDEFINED, &heap);
+
+		savepoint2 = mtr_set_savepoint(&mtr);
 		block = btr_node_ptr_get_child(node_ptr, index, offsets, &mtr);
 		page = buf_block_get_frame(block);
+
+		/* For R-Tree, since record order might not be the same as
+		linked index page in the lower level, we need to travers
+		backwards to get the first page rec in this level.
+		This is only used for index validation. Spatial index
+		does not use such scan for any of its DML or query
+		operations  */
+		if (dict_index_is_spatial(index)) {
+			left_page_no = btr_page_get_prev(page, &mtr);
+
+			while (left_page_no != FIL_NULL) {
+				page_id_t	left_page_id(
+					index->space, left_page_no);
+				/* To obey latch order of tree blocks,
+				we should release the right_block once to
+				obtain lock of the uncle block. */
+				mtr_release_block_at_savepoint(
+					&mtr, savepoint2, block);
+
+				savepoint2 = mtr_set_savepoint(&mtr);
+				block = btr_block_get(
+					left_page_id,
+					table_page_size,
+					RW_SX_LATCH, index, &mtr);
+				page = buf_block_get_frame(block);
+				left_page_no = btr_page_get_prev(page, &mtr);
+			}
+		}
 	}
 
 	/* Now we are on the desired level. Loop through the pages on that
@@ -4825,28 +4880,34 @@ btr_validate_level(
 loop:
 	mem_heap_empty(heap);
 	offsets = offsets2 = NULL;
-	mtr_x_lock(dict_index_get_lock(index), &mtr);
+	if (!srv_read_only_mode) {
+		if (lockout) {
+			mtr_x_lock(dict_index_get_lock(index), &mtr);
+		} else {
+			mtr_sx_lock(dict_index_get_lock(index), &mtr);
+		}
+	}
 
 #ifdef UNIV_ZIP_DEBUG
 	page_zip = buf_block_get_page_zip(block);
 	ut_a(!page_zip || page_zip_validate(page_zip, page, index));
 #endif /* UNIV_ZIP_DEBUG */
 
-	ut_a(block->page.space == space);
+	ut_a(block->page.id.space() == index->space);
 
-	if (fseg_page_is_free(seg, block->page.space, block->page.offset)) {
+	if (fseg_page_is_free(seg,
+			      block->page.id.space(),
+			      block->page.id.page_no())) {
 
 		btr_validate_report1(index, level, block);
 
-		ib_logf(IB_LOG_LEVEL_WARN, "Page is marked as free");
+		ib::warn() << "Page is marked as free";
 		ret = false;
 
 	} else if (btr_page_get_index_id(page) != index->id) {
 
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Page index id " IB_ID_FMT " != data dictionary "
-			"index id " IB_ID_FMT,
-			btr_page_get_index_id(page), index->id);
+		ib::error() << "Page index id " << btr_page_get_index_id(page)
+			<< " != data dictionary index id " << index->id;
 
 		ret = false;
 
@@ -4874,17 +4935,21 @@ loop:
 
 	if (right_page_no != FIL_NULL) {
 		const rec_t*	right_rec;
-		right_block = btr_block_get(space, zip_size, right_page_no,
-					    RW_X_LATCH, index, &mtr);
+		savepoint = mtr_set_savepoint(&mtr);
+
+		right_block = btr_block_get(
+			page_id_t(index->space, right_page_no),
+			table_page_size,
+			RW_SX_LATCH, index, &mtr);
+
 		right_page = buf_block_get_frame(right_block);
+
 		if (btr_page_get_prev(right_page, &mtr)
 		    != page_get_page_no(page)) {
 
 			btr_validate_report2(index, level, block, right_block);
 			fputs("InnoDB: broken FIL_PAGE_NEXT"
 			      " or FIL_PAGE_PREV links\n", stderr);
-			buf_page_print(page, 0, BUF_PAGE_PRINT_NO_CRASH);
-			buf_page_print(right_page, 0, BUF_PAGE_PRINT_NO_CRASH);
 
 			ret = false;
 		}
@@ -4892,8 +4957,6 @@ loop:
 		if (page_is_comp(right_page) != page_is_comp(page)) {
 			btr_validate_report2(index, level, block, right_block);
 			fputs("InnoDB: 'compact' flag mismatch\n", stderr);
-			buf_page_print(page, 0, BUF_PAGE_PRINT_NO_CRASH);
-			buf_page_print(right_page, 0, BUF_PAGE_PRINT_NO_CRASH);
 
 			ret = false;
 
@@ -4907,17 +4970,19 @@ loop:
 					  offsets, ULINT_UNDEFINED, &heap);
 		offsets2 = rec_get_offsets(right_rec, index,
 					   offsets2, ULINT_UNDEFINED, &heap);
-		if (cmp_rec_rec(rec, right_rec, offsets, offsets2,
-			        index) >= 0) {
+
+		/* For spatial index, we cannot guarantee the key ordering
+		across pages, so skip the record compare verification for
+		now. Will enhanced in special R-Tree index validation scheme */
+		if (!dict_index_is_spatial(index)
+		    && cmp_rec_rec(rec, right_rec,
+				   offsets, offsets2, index) >= 0) {
 
 			btr_validate_report2(index, level, block, right_block);
 
 			fputs("InnoDB: records in wrong order"
 			      " on adjacent pages\n", stderr);
 
-			buf_page_print(page, 0, BUF_PAGE_PRINT_NO_CRASH);
-			buf_page_print(right_page, 0, BUF_PAGE_PRINT_NO_CRASH);
-
 			fputs("InnoDB: record ", stderr);
 			rec = page_rec_get_prev(page_get_supremum_rec(page));
 			rec_print(stderr, rec, index);
@@ -4938,35 +5003,49 @@ loop:
 			     page_is_comp(page)));
 	}
 
-	if (buf_block_get_page_no(block) != dict_index_get_page(index)) {
+	/* Similarly skip the father node check for spatial index for now,
+	for a couple of reasons:
+	1) As mentioned, there is no ordering relationship between records
+	in parent level and linked pages in the child level.
+	2) Search parent from root is very costly for R-tree.
+	We will add special validation mechanism for R-tree later (WL #7520) */
+	if (!dict_index_is_spatial(index)
+	    && block->page.id.page_no() != dict_index_get_page(index)) {
 
 		/* Check father node pointers */
-
 		rec_t*	node_ptr;
 
-		offsets = btr_page_get_father_block(offsets, heap, index,
-						    block, &mtr, &node_cur);
+		btr_cur_position(
+			index, page_rec_get_next(page_get_infimum_rec(page)),
+			block, &node_cur);
+		offsets = btr_page_get_father_node_ptr_for_validate(
+			offsets, heap, &node_cur, &mtr);
+
 		father_page = btr_cur_get_page(&node_cur);
 		node_ptr = btr_cur_get_rec(&node_cur);
 
+		parent_page_no = page_get_page_no(father_page);
+		parent_right_page_no = btr_page_get_next(father_page, &mtr);
+		rightmost_child = page_rec_is_supremum(
+					page_rec_get_next(node_ptr));
+
 		btr_cur_position(
-			index, page_rec_get_prev(page_get_supremum_rec(page)),
+			index,
+			page_rec_get_prev(page_get_supremum_rec(page)),
 			block, &node_cur);
-		offsets = btr_page_get_father_node_ptr(offsets, heap,
-						       &node_cur, &mtr);
+
+		offsets = btr_page_get_father_node_ptr_for_validate(
+				offsets, heap, &node_cur, &mtr);
 
 		if (node_ptr != btr_cur_get_rec(&node_cur)
 		    || btr_node_ptr_get_child_page_no(node_ptr, offsets)
-				     != buf_block_get_page_no(block)) {
+				     != block->page.id.page_no()) {
 
 			btr_validate_report1(index, level, block);
 
 			fputs("InnoDB: node pointer to the page is wrong\n",
 			      stderr);
 
-			buf_page_print(father_page, 0, BUF_PAGE_PRINT_NO_CRASH);
-			buf_page_print(page, 0, BUF_PAGE_PRINT_NO_CRASH);
-
 			fputs("InnoDB: node ptr ", stderr);
 			rec_print(stderr, node_ptr, index);
 
@@ -4997,14 +5076,9 @@ loop:
 
 				btr_validate_report1(index, level, block);
 
-				buf_page_print(father_page, 0,
-					       BUF_PAGE_PRINT_NO_CRASH);
-				buf_page_print(page, 0,
-					       BUF_PAGE_PRINT_NO_CRASH);
+				ib::error() << "Node ptrs differ on levels > 0";
 
-				fputs("InnoDB: Error: node ptrs differ"
-				      " on levels > 0\n"
-				      "InnoDB: node ptr ", stderr);
+				fputs("InnoDB: node ptr ",stderr);
 				rec_print_new(stderr, node_ptr, offsets);
 				fputs("InnoDB: first rec ", stderr);
 				rec_print(stderr, first_rec, index);
@@ -5026,12 +5100,41 @@ loop:
 				     page_get_supremum_rec(father_page)));
 			ut_a(btr_page_get_next(father_page, &mtr) == FIL_NULL);
 		} else {
-			const rec_t*	right_node_ptr
-				= page_rec_get_next(node_ptr);
+			const rec_t*	right_node_ptr;
+
+			right_node_ptr = page_rec_get_next(node_ptr);
+
+			if (!lockout && rightmost_child) {
+
+				/* To obey latch order of tree blocks,
+				we should release the right_block once to
+				obtain lock of the uncle block. */
+				mtr_release_block_at_savepoint(
+					&mtr, savepoint, right_block);
+
+				btr_block_get(
+					page_id_t(index->space,
+						  parent_right_page_no),
+					table_page_size,
+					RW_SX_LATCH, index, &mtr);
+
+				right_block = btr_block_get(
+					page_id_t(index->space,
+						  right_page_no),
+					table_page_size,
+					RW_SX_LATCH, index, &mtr);
+			}
+
+			btr_cur_position(
+				index, page_rec_get_next(
+					page_get_infimum_rec(
+						buf_block_get_frame(
+							right_block))),
+				right_block, &right_node_cur);
+
+			offsets = btr_page_get_father_node_ptr_for_validate(
+					offsets, heap, &right_node_cur, &mtr);
 
-			offsets = btr_page_get_father_block(
-				offsets, heap, index, right_block,
-				&mtr, &right_node_cur);
 			if (right_node_ptr
 			    != page_get_supremum_rec(father_page)) {
 
@@ -5044,16 +5147,6 @@ loop:
 
 					btr_validate_report1(index, level,
 							     block);
-
-					buf_page_print(
-						father_page, 0,
-						BUF_PAGE_PRINT_NO_CRASH);
-					buf_page_print(
-						page, 0,
-						BUF_PAGE_PRINT_NO_CRASH);
-					buf_page_print(
-						right_page, 0,
-						BUF_PAGE_PRINT_NO_CRASH);
 				}
 			} else {
 				page_t*	right_father_page
@@ -5070,19 +5163,6 @@ loop:
 
 					btr_validate_report1(index, level,
 							     block);
-
-					buf_page_print(
-						father_page, 0,
-						BUF_PAGE_PRINT_NO_CRASH);
-					buf_page_print(
-						right_father_page, 0,
-						BUF_PAGE_PRINT_NO_CRASH);
-					buf_page_print(
-						page, 0,
-						BUF_PAGE_PRINT_NO_CRASH);
-					buf_page_print(
-						right_page, 0,
-						BUF_PAGE_PRINT_NO_CRASH);
 				}
 
 				if (page_get_page_no(right_father_page)
@@ -5095,19 +5175,6 @@ loop:
 
 					btr_validate_report1(index, level,
 							     block);
-
-					buf_page_print(
-						father_page, 0,
-						BUF_PAGE_PRINT_NO_CRASH);
-					buf_page_print(
-						right_father_page, 0,
-						BUF_PAGE_PRINT_NO_CRASH);
-					buf_page_print(
-						page, 0,
-						BUF_PAGE_PRINT_NO_CRASH);
-					buf_page_print(
-						right_page, 0,
-						BUF_PAGE_PRINT_NO_CRASH);
 				}
 			}
 		}
@@ -5125,9 +5192,29 @@ node_ptr_fails:
 
 		mtr_start(&mtr);
 
+		if (!lockout) {
+			if (rightmost_child) {
+				if (parent_right_page_no != FIL_NULL) {
+					btr_block_get(
+						page_id_t(
+							index->space,
+							parent_right_page_no),
+						table_page_size,
+						RW_SX_LATCH, index, &mtr);
+				}
+			} else if (parent_page_no != FIL_NULL) {
+				btr_block_get(
+					page_id_t(index->space,
+						  parent_page_no),
+					table_page_size,
+					RW_SX_LATCH, index, &mtr);
+			}
+		}
+
 		block = btr_block_get(
-			space, zip_size, right_page_no,
-			RW_X_LATCH, index, &mtr);
+			page_id_t(index->space, right_page_no),
+			table_page_size,
+			RW_SX_LATCH, index, &mtr);
 
 		page = buf_block_get_frame(block);
 
@@ -5139,15 +5226,55 @@ node_ptr_fails:
 	return(ret);
 }
 
+/**************************************************************//**
+Do an index level validation of spaital index tree.
+@return	true if no error found */
+bool
+btr_validate_spatial_index(
+/*=======================*/
+	dict_index_t*	index,	/*!< in: index */
+	const trx_t*	trx)	/*!< in: transaction or NULL */
+{
+
+	mtr_t	mtr;
+	bool	ok = true;
+
+	mtr_start(&mtr);
+
+	mtr_x_lock(dict_index_get_lock(index), &mtr);
+
+	page_t*	root = btr_root_get(index, &mtr);
+	ulint	n = btr_page_get_level(root, &mtr);
+
+#ifdef UNIV_RTR_DEBUG
+	fprintf(stderr, "R-tree level is %lu\n", n);
+#endif /* UNIV_RTR_DEBUG */
+
+	for (ulint i = 0; i <= n; ++i) {
+#ifdef UNIV_RTR_DEBUG
+		fprintf(stderr, "Level %lu:\n", n - i);
+#endif /* UNIV_RTR_DEBUG */
+
+		if (!btr_validate_level(index, trx, n - i, true)) {
+			ok = false;
+			break;
+		}
+	}
+
+	mtr_commit(&mtr);
+
+	return(ok);
+}
+
 /**************************************************************//**
 Checks the consistency of an index tree.
 @return	DB_SUCCESS if ok, error code if not */
-UNIV_INTERN
 dberr_t
 btr_validate_index(
 /*===============*/
 	dict_index_t*	index,	/*!< in: index */
-	const trx_t*	trx)	/*!< in: transaction or NULL */
+	const trx_t*	trx,	/*!< in: transaction or NULL */
+	bool		lockout)/*!< in: true if X-latch index is intended */
 {
 	dberr_t err = DB_SUCCESS;
 
@@ -5157,11 +5284,24 @@ btr_validate_index(
 		return(err);
 	}
 
+	if (dict_index_is_spatial(index)) {
+		if(!btr_validate_spatial_index(index, trx)) {
+			err = DB_ERROR;
+		}
+		return(err);
+	}
+
 	mtr_t		mtr;
 
 	mtr_start(&mtr);
 
-	mtr_x_lock(dict_index_get_lock(index), &mtr);
+	if (!srv_read_only_mode) {
+		if (lockout) {
+			mtr_x_lock(dict_index_get_lock(index), &mtr);
+		} else {
+			mtr_sx_lock(dict_index_get_lock(index), &mtr);
+		}
+	}
 
 	page_t*	root = btr_root_get(index, &mtr);
 
@@ -5175,7 +5315,7 @@ btr_validate_index(
 
 	for (ulint i = 0; i <= n; ++i) {
 
-		if (!btr_validate_level(index, trx, n - i)) {
+		if (!btr_validate_level(index, trx, n - i, lockout)) {
 			err = DB_CORRUPTION;
 			break;
 		}
@@ -5189,9 +5329,8 @@ btr_validate_index(
 /**************************************************************//**
 Checks if the page in the cursor can be merged with given page.
 If necessary, re-organize the merge_page.
-@return	TRUE if possible to merge. */
-UNIV_INTERN
-ibool
+@return	true if possible to merge. */
+bool
 btr_can_merge_with_page(
 /*====================*/
 	btr_cur_t*	cursor,		/*!< in: cursor on the page to merge */
@@ -5201,34 +5340,33 @@ btr_can_merge_with_page(
 {
 	dict_index_t*	index;
 	page_t*		page;
-	ulint		space;
-	ulint		zip_size;
 	ulint		n_recs;
 	ulint		data_size;
-        ulint           max_ins_size_reorg;
+	ulint		max_ins_size_reorg;
 	ulint		max_ins_size;
 	buf_block_t*	mblock;
 	page_t*		mpage;
 	DBUG_ENTER("btr_can_merge_with_page");
 
 	if (page_no == FIL_NULL) {
-		goto error;
+		*merge_block = NULL;
+		DBUG_RETURN(false);
 	}
 
 	index = btr_cur_get_index(cursor);
-	page  = btr_cur_get_page(cursor);
-	space = dict_index_get_space(index);
-        zip_size = dict_table_zip_size(index->table);
+	page = btr_cur_get_page(cursor);
 
-	mblock = btr_block_get(space, zip_size, page_no, RW_X_LATCH, index,
-			       mtr);
+	const page_id_t		page_id(dict_index_get_space(index), page_no);
+	const page_size_t	page_size(dict_table_page_size(index->table));
+
+	mblock = btr_block_get(page_id, page_size, RW_X_LATCH, index, mtr);
 	mpage = buf_block_get_frame(mblock);
 
-        n_recs = page_get_n_recs(page);
-        data_size = page_get_data_size(page);
+	n_recs = page_get_n_recs(page);
+	data_size = page_get_data_size(page);
 
-        max_ins_size_reorg = page_get_max_insert_size_after_reorganize(
-                mpage, n_recs);
+	max_ins_size_reorg = page_get_max_insert_size_after_reorganize(
+		mpage, n_recs);
 
 	if (data_size > max_ins_size_reorg) {
 		goto error;
@@ -5237,7 +5375,7 @@ btr_can_merge_with_page(
 	/* If compression padding tells us that merging will result in
 	too packed up page i.e.: which is likely to cause compression
 	failure then don't merge the pages. */
-	if (zip_size && page_is_leaf(mpage)
+	if (page_size.is_compressed() && page_is_leaf(mpage)
 	    && (page_get_data_size(mpage) + data_size
 		>= dict_index_zip_pad_optimal_page_size(index))) {
 
@@ -5272,11 +5410,11 @@ btr_can_merge_with_page(
 	}
 
 	*merge_block = mblock;
-	DBUG_RETURN(TRUE);
+	DBUG_RETURN(true);
 
 error:
 	*merge_block = NULL;
-	DBUG_RETURN(FALSE);
+	DBUG_RETURN(false);
 }
 
 #endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/btr/btr0bulk.cc b/storage/innobase/btr/btr0bulk.cc
new file mode 100644
index 00000000000..9ff3bc5f6d1
--- /dev/null
+++ b/storage/innobase/btr/btr0bulk.cc
@@ -0,0 +1,1002 @@
+/*****************************************************************************
+
+Copyright (c) 2014, 2016, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file btr/btr0bulk.cc
+The B-tree bulk load
+
+Created 03/11/2014 Shaohua Wang
+*******************************************************/
+
+#include "btr0bulk.h"
+#include "btr0btr.h"
+#include "btr0cur.h"
+#include "btr0pcur.h"
+#include "ibuf0ibuf.h"
+
+/** Innodb B-tree index fill factor for bulk load. */
+long	innobase_fill_factor;
+
+/** Initialize members, allocate page if needed and start mtr.
+Note: we commit all mtrs on failure.
+@return error code. */
+dberr_t
+PageBulk::init()
+{
+	mtr_t*		mtr;
+	buf_block_t*	new_block;
+	page_t*		new_page;
+	page_zip_des_t*	new_page_zip;
+	ulint		new_page_no;
+
+	ut_ad(m_heap == NULL);
+	m_heap = mem_heap_create(1000);
+
+	mtr = static_cast<mtr_t*>(
+		mem_heap_alloc(m_heap, sizeof(mtr_t)));
+	mtr_start(mtr);
+	mtr_x_lock(dict_index_get_lock(m_index), mtr);
+	mtr_set_log_mode(mtr, MTR_LOG_NO_REDO);
+	mtr_set_flush_observer(mtr, m_flush_observer);
+
+	if (m_page_no == FIL_NULL) {
+		mtr_t	alloc_mtr;
+
+		/* We commit redo log for allocation by a separate mtr,
+		because we don't guarantee pages are committed following
+		the allocation order, and we will always generate redo log
+		for page allocation, even when creating a new tablespace. */
+		mtr_start(&alloc_mtr);
+		alloc_mtr.set_named_space(dict_index_get_space(m_index));
+
+		ulint	n_reserved;
+		bool	success;
+		success = fsp_reserve_free_extents(&n_reserved, m_index->space,
+						   1, FSP_NORMAL, &alloc_mtr);
+		if (!success) {
+			mtr_commit(&alloc_mtr);
+			mtr_commit(mtr);
+			return(DB_OUT_OF_FILE_SPACE);
+		}
+
+		/* Allocate a new page. */
+		new_block = btr_page_alloc(m_index, 0, FSP_UP, m_level,
+					   &alloc_mtr, mtr);
+
+		if (n_reserved > 0) {
+			fil_space_release_free_extents(m_index->space,
+						       n_reserved);
+		}
+
+		mtr_commit(&alloc_mtr);
+
+		new_page = buf_block_get_frame(new_block);
+		new_page_zip = buf_block_get_page_zip(new_block);
+		new_page_no = page_get_page_no(new_page);
+
+		if (new_page_zip) {
+			page_create_zip(new_block, m_index, m_level, 0,
+					NULL, mtr);
+		} else {
+			ut_ad(!dict_index_is_spatial(m_index));
+			page_create(new_block, mtr,
+				    dict_table_is_comp(m_index->table),
+				    false);
+			btr_page_set_level(new_page, NULL, m_level, mtr);
+		}
+
+		btr_page_set_next(new_page, NULL, FIL_NULL, mtr);
+		btr_page_set_prev(new_page, NULL, FIL_NULL, mtr);
+
+		btr_page_set_index_id(new_page, NULL, m_index->id, mtr);
+	} else {
+		page_id_t	page_id(dict_index_get_space(m_index), m_page_no);
+		page_size_t	page_size(dict_table_page_size(m_index->table));
+
+		new_block = btr_block_get(page_id, page_size,
+					  RW_X_LATCH, m_index, mtr);
+
+		new_page = buf_block_get_frame(new_block);
+		new_page_zip = buf_block_get_page_zip(new_block);
+		new_page_no = page_get_page_no(new_page);
+		ut_ad(m_page_no == new_page_no);
+
+		ut_ad(page_dir_get_n_heap(new_page) == PAGE_HEAP_NO_USER_LOW);
+
+		btr_page_set_level(new_page, NULL, m_level, mtr);
+	}
+
+	if (dict_index_is_sec_or_ibuf(m_index)
+	    && !dict_table_is_temporary(m_index->table)
+	    && page_is_leaf(new_page)) {
+		page_update_max_trx_id(new_block, NULL, m_trx_id, mtr);
+	}
+
+	m_mtr = mtr;
+	m_block = new_block;
+	m_block->skip_flush_check = true;
+	m_page = new_page;
+	m_page_zip = new_page_zip;
+	m_page_no = new_page_no;
+	m_cur_rec = page_get_infimum_rec(new_page);
+	ut_ad(m_is_comp == !!page_is_comp(new_page));
+	m_free_space = page_get_free_space_of_empty(m_is_comp);
+
+	if (innobase_fill_factor == 100 && dict_index_is_clust(m_index)) {
+		/* Keep default behavior compatible with 5.6 */
+		m_reserved_space = dict_index_get_space_reserve();
+	} else {
+		m_reserved_space =
+			UNIV_PAGE_SIZE * (100 - innobase_fill_factor) / 100;
+	}
+
+	m_padding_space =
+		UNIV_PAGE_SIZE - dict_index_zip_pad_optimal_page_size(m_index);
+	m_heap_top = page_header_get_ptr(new_page, PAGE_HEAP_TOP);
+	m_rec_no = page_header_get_field(new_page, PAGE_N_RECS);
+
+	ut_d(m_total_data = 0);
+	page_header_set_field(m_page, NULL, PAGE_HEAP_TOP, UNIV_PAGE_SIZE - 1);
+
+	return(DB_SUCCESS);
+}
+
+/** Insert a record in the page.
+@param[in]	rec		record
+@param[in]	offsets		record offsets */
+void
+PageBulk::insert(
+	const rec_t*		rec,
+	ulint*			offsets)
+{
+	ulint		rec_size;
+
+	ut_ad(m_heap != NULL);
+
+	rec_size = rec_offs_size(offsets);
+
+#ifdef UNIV_DEBUG
+	/* Check whether records are in order. */
+	if (!page_rec_is_infimum(m_cur_rec)) {
+		rec_t*	old_rec = m_cur_rec;
+		ulint*	old_offsets = rec_get_offsets(
+			old_rec, m_index, NULL,	ULINT_UNDEFINED, &m_heap);
+
+		ut_ad(cmp_rec_rec(rec, old_rec, offsets, old_offsets, m_index)
+		      > 0);
+	}
+
+	m_total_data += rec_size;
+#endif /* UNIV_DEBUG */
+
+	/* 1. Copy the record to page. */
+	rec_t*	insert_rec = rec_copy(m_heap_top, rec, offsets);
+	rec_offs_make_valid(insert_rec, m_index, offsets);
+
+	/* 2. Insert the record in the linked list. */
+	rec_t*	next_rec = page_rec_get_next(m_cur_rec);
+
+	page_rec_set_next(insert_rec, next_rec);
+	page_rec_set_next(m_cur_rec, insert_rec);
+
+	/* 3. Set the n_owned field in the inserted record to zero,
+	and set the heap_no field. */
+	if (m_is_comp) {
+		rec_set_n_owned_new(insert_rec, NULL, 0);
+		rec_set_heap_no_new(insert_rec,
+				    PAGE_HEAP_NO_USER_LOW + m_rec_no);
+	} else {
+		rec_set_n_owned_old(insert_rec, 0);
+		rec_set_heap_no_old(insert_rec,
+				    PAGE_HEAP_NO_USER_LOW + m_rec_no);
+	}
+
+	/* 4. Set member variables. */
+	ulint		slot_size;
+	slot_size = page_dir_calc_reserved_space(m_rec_no + 1)
+		- page_dir_calc_reserved_space(m_rec_no);
+
+	ut_ad(m_free_space >= rec_size + slot_size);
+	ut_ad(m_heap_top + rec_size < m_page + UNIV_PAGE_SIZE);
+
+	m_free_space -= rec_size + slot_size;
+	m_heap_top += rec_size;
+	m_rec_no += 1;
+	m_cur_rec = insert_rec;
+}
+
+/** Mark end of insertion to the page. Scan all records to set page dirs,
+and set page header members.
+Note: we refer to page_copy_rec_list_end_to_created_page. */
+void
+PageBulk::finish()
+{
+	ut_ad(m_rec_no > 0);
+
+#ifdef UNIV_DEBUG
+	ut_ad(m_total_data + page_dir_calc_reserved_space(m_rec_no)
+	      <= page_get_free_space_of_empty(m_is_comp));
+
+	/* To pass the debug tests we have to set these dummy values
+	in the debug version */
+	page_dir_set_n_slots(m_page, NULL, UNIV_PAGE_SIZE / 2);
+#endif
+
+	ulint	count = 0;
+	ulint	n_recs = 0;
+	ulint	slot_index = 0;
+	rec_t*	insert_rec = page_rec_get_next(page_get_infimum_rec(m_page));
+	page_dir_slot_t* slot = NULL;
+
+	/* Set owner & dir. */
+	do {
+
+		count++;
+		n_recs++;
+
+		if (count == (PAGE_DIR_SLOT_MAX_N_OWNED + 1) / 2) {
+
+			slot_index++;
+
+			slot = page_dir_get_nth_slot(m_page, slot_index);
+
+			page_dir_slot_set_rec(slot, insert_rec);
+			page_dir_slot_set_n_owned(slot, NULL, count);
+
+			count = 0;
+		}
+
+		insert_rec = page_rec_get_next(insert_rec);
+	} while (!page_rec_is_supremum(insert_rec));
+
+	if (slot_index > 0
+	    && (count + 1 + (PAGE_DIR_SLOT_MAX_N_OWNED + 1) / 2
+		<= PAGE_DIR_SLOT_MAX_N_OWNED)) {
+		/* We can merge the two last dir slots. This operation is
+		here to make this function imitate exactly the equivalent
+		task made using page_cur_insert_rec, which we use in database
+		recovery to reproduce the task performed by this function.
+		To be able to check the correctness of recovery, it is good
+		that it imitates exactly. */
+
+		count += (PAGE_DIR_SLOT_MAX_N_OWNED + 1) / 2;
+
+		page_dir_slot_set_n_owned(slot, NULL, 0);
+
+		slot_index--;
+	}
+
+	slot = page_dir_get_nth_slot(m_page, 1 + slot_index);
+	page_dir_slot_set_rec(slot, page_get_supremum_rec(m_page));
+	page_dir_slot_set_n_owned(slot, NULL, count + 1);
+
+	ut_ad(!dict_index_is_spatial(m_index));
+	page_dir_set_n_slots(m_page, NULL, 2 + slot_index);
+	page_header_set_ptr(m_page, NULL, PAGE_HEAP_TOP, m_heap_top);
+	page_dir_set_n_heap(m_page, NULL, PAGE_HEAP_NO_USER_LOW + m_rec_no);
+	page_header_set_field(m_page, NULL, PAGE_N_RECS, m_rec_no);
+
+	page_header_set_ptr(m_page, NULL, PAGE_LAST_INSERT, m_cur_rec);
+	page_header_set_field(m_page, NULL, PAGE_DIRECTION, PAGE_RIGHT);
+	page_header_set_field(m_page, NULL, PAGE_N_DIRECTION, 0);
+
+	m_block->skip_flush_check = false;
+}
+
+/** Commit inserts done to the page
+@param[in]	success		Flag whether all inserts succeed. */
+void
+PageBulk::commit(
+	bool	success)
+{
+	if (success) {
+		ut_ad(page_validate(m_page, m_index));
+
+		/* Set no free space left and no buffered changes in ibuf. */
+		if (!dict_index_is_clust(m_index)
+		    && !dict_table_is_temporary(m_index->table)
+		    && page_is_leaf(m_page)) {
+			ibuf_set_bitmap_for_bulk_load(
+				m_block, innobase_fill_factor == 100);
+		}
+	}
+
+	mtr_commit(m_mtr);
+}
+
+/** Compress a page of compressed table
+@return	true	compress successfully or no need to compress
+@return	false	compress failed. */
+bool
+PageBulk::compress()
+{
+	ut_ad(m_page_zip != NULL);
+
+	return(page_zip_compress(m_page_zip, m_page, m_index,
+				 page_zip_level, NULL, m_mtr));
+}
+
+/** Get node pointer
+@return node pointer */
+dtuple_t*
+PageBulk::getNodePtr()
+{
+	rec_t*		first_rec;
+	dtuple_t*	node_ptr;
+
+	/* Create node pointer */
+	first_rec = page_rec_get_next(page_get_infimum_rec(m_page));
+	ut_a(page_rec_is_user_rec(first_rec));
+	node_ptr = dict_index_build_node_ptr(m_index, first_rec, m_page_no,
+					     m_heap, m_level);
+
+	return(node_ptr);
+}
+
+/** Get split rec in left page.We split a page in half when compresssion fails,
+and the split rec will be copied to right page.
+@return split rec */
+rec_t*
+PageBulk::getSplitRec()
+{
+	rec_t*		rec;
+	ulint*		offsets;
+	ulint		total_used_size;
+	ulint		total_recs_size;
+	ulint		n_recs;
+
+	ut_ad(m_page_zip != NULL);
+	ut_ad(m_rec_no >= 2);
+
+	ut_ad(page_get_free_space_of_empty(m_is_comp) > m_free_space);
+	total_used_size = page_get_free_space_of_empty(m_is_comp)
+		- m_free_space;
+
+	total_recs_size = 0;
+	n_recs = 0;
+	offsets = NULL;
+	rec = page_get_infimum_rec(m_page);
+
+	do {
+		rec = page_rec_get_next(rec);
+		ut_ad(page_rec_is_user_rec(rec));
+
+		offsets = rec_get_offsets(rec, m_index,
+					  offsets, ULINT_UNDEFINED,
+					  &(m_heap));
+		total_recs_size += rec_offs_size(offsets);
+		n_recs++;
+	} while (total_recs_size + page_dir_calc_reserved_space(n_recs)
+		 < total_used_size / 2);
+
+	/* Keep at least one record on left page */
+	if (page_rec_is_infimum(page_rec_get_prev(rec))) {
+		rec = page_rec_get_next(rec);
+		ut_ad(page_rec_is_user_rec(rec));
+	}
+
+	return(rec);
+}
+
+/** Copy all records after split rec including itself.
+@param[in]	rec	split rec */
+void
+PageBulk::copyIn(
+	rec_t*		split_rec)
+{
+
+	rec_t*		rec = split_rec;
+	ulint*		offsets = NULL;
+
+	ut_ad(m_rec_no == 0);
+	ut_ad(page_rec_is_user_rec(rec));
+
+	do {
+		offsets = rec_get_offsets(rec, m_index, offsets,
+					  ULINT_UNDEFINED, &(m_heap));
+
+		insert(rec, offsets);
+
+		rec = page_rec_get_next(rec);
+	} while (!page_rec_is_supremum(rec));
+
+	ut_ad(m_rec_no > 0);
+}
+
+/** Remove all records after split rec including itself.
+@param[in]	rec	split rec	*/
+void
+PageBulk::copyOut(
+	rec_t*		split_rec)
+{
+	rec_t*		rec;
+	rec_t*		last_rec;
+	ulint		n;
+
+	/* Suppose before copyOut, we have 5 records on the page:
+	infimum->r1->r2->r3->r4->r5->supremum, and r3 is the split rec.
+
+	after copyOut, we have 2 records on the page:
+	infimum->r1->r2->supremum. slot ajustment is not done. */
+
+	rec = page_rec_get_next(page_get_infimum_rec(m_page));
+	last_rec = page_rec_get_prev(page_get_supremum_rec(m_page));
+	n = 0;
+
+	while (rec != split_rec) {
+		rec = page_rec_get_next(rec);
+		n++;
+	}
+
+	ut_ad(n > 0);
+
+	/* Set last record's next in page */
+	ulint*		offsets = NULL;
+	rec = page_rec_get_prev(split_rec);
+	offsets = rec_get_offsets(rec, m_index,
+				  offsets, ULINT_UNDEFINED,
+				  &(m_heap));
+	page_rec_set_next(rec, page_get_supremum_rec(m_page));
+
+	/* Set related members */
+	m_cur_rec = rec;
+	m_heap_top = rec_get_end(rec, offsets);
+
+	offsets = rec_get_offsets(last_rec, m_index,
+				  offsets, ULINT_UNDEFINED,
+				  &(m_heap));
+
+	m_free_space += rec_get_end(last_rec, offsets)
+		- m_heap_top
+		+ page_dir_calc_reserved_space(m_rec_no)
+		- page_dir_calc_reserved_space(n);
+	ut_ad(m_free_space > 0);
+	m_rec_no = n;
+
+#ifdef UNIV_DEBUG
+	m_total_data -= rec_get_end(last_rec, offsets) - m_heap_top;
+#endif /* UNIV_DEBUG */
+}
+
+/** Set next page
+@param[in]	next_page_no	next page no */
+void
+PageBulk::setNext(
+	ulint		next_page_no)
+{
+	btr_page_set_next(m_page, NULL, next_page_no, m_mtr);
+}
+
+/** Set previous page
+@param[in]	prev_page_no	previous page no */
+void
+PageBulk::setPrev(
+	ulint		prev_page_no)
+{
+	btr_page_set_prev(m_page, NULL, prev_page_no, m_mtr);
+}
+
+/** Check if required space is available in the page for the rec to be inserted.
+We check fill factor & padding here.
+@param[in]	length		required length
+@return true	if space is available */
+bool
+PageBulk::isSpaceAvailable(
+	ulint		rec_size)
+{
+	ulint	slot_size;
+	ulint	required_space;
+
+	slot_size = page_dir_calc_reserved_space(m_rec_no + 1)
+		- page_dir_calc_reserved_space(m_rec_no);
+
+	required_space = rec_size + slot_size;
+
+	if (required_space > m_free_space) {
+		ut_ad(m_rec_no > 0);
+		return false;
+	}
+
+	/* Fillfactor & Padding apply to both leaf and non-leaf pages.
+	Note: we keep at least 2 records in a page to avoid B-tree level
+	growing too high. */
+	if (m_rec_no >= 2
+	    && ((m_page_zip == NULL && m_free_space - required_space
+		 < m_reserved_space)
+		|| (m_page_zip != NULL && m_free_space - required_space
+		    < m_padding_space))) {
+		return(false);
+	}
+
+	return(true);
+}
+
+/** Check whether the record needs to be stored externally.
+@return false if the entire record can be stored locally on the page  */
+bool
+PageBulk::needExt(
+	const dtuple_t*		tuple,
+	ulint			rec_size)
+{
+	return(page_zip_rec_needs_ext(rec_size, m_is_comp,
+		dtuple_get_n_fields(tuple), m_block->page.size));
+}
+
+/** Store external record
+Since the record is not logged yet, so we don't log update to the record.
+the blob data is logged first, then the record is logged in bulk mode.
+@param[in]	big_rec		external recrod
+@param[in]	offsets		record offsets
+@return	error code */
+dberr_t
+PageBulk::storeExt(
+	const big_rec_t*	big_rec,
+	ulint*			offsets)
+{
+	/* Note: not all fileds are initialized in btr_pcur. */
+	btr_pcur_t	btr_pcur;
+	btr_pcur.pos_state = BTR_PCUR_IS_POSITIONED;
+	btr_pcur.latch_mode = BTR_MODIFY_LEAF;
+	btr_pcur.btr_cur.index = m_index;
+
+	page_cur_t*	page_cur = &btr_pcur.btr_cur.page_cur;
+	page_cur->index = m_index;
+	page_cur->rec = m_cur_rec;
+	page_cur->offsets = offsets;
+	page_cur->block = m_block;
+
+	dberr_t	err = btr_store_big_rec_extern_fields(
+		&btr_pcur, NULL, offsets, big_rec, m_mtr,
+		BTR_STORE_INSERT_BULK);
+
+	ut_ad(page_offset(m_cur_rec) == page_offset(page_cur->rec));
+
+	/* Reset m_block and m_cur_rec from page cursor, because
+	block may be changed during blob insert. */
+	m_block = page_cur->block;
+	m_cur_rec = page_cur->rec;
+	m_page = buf_block_get_frame(m_block);
+
+	return(err);
+}
+
+/** Release block by commiting mtr
+Note: log_free_check requires holding no lock/latch in current thread. */
+void
+PageBulk::release()
+{
+	ut_ad(!dict_index_is_spatial(m_index));
+
+	/* We fix the block because we will re-pin it soon. */
+	buf_block_buf_fix_inc(m_block, __FILE__, __LINE__);
+
+	/* No other threads can modify this block. */
+	m_modify_clock = buf_block_get_modify_clock(m_block);
+
+	mtr_commit(m_mtr);
+}
+
+/** Start mtr and latch the block */
+dberr_t
+PageBulk::latch()
+{
+	ibool	ret;
+
+	mtr_start(m_mtr);
+	mtr_x_lock(dict_index_get_lock(m_index), m_mtr);
+	mtr_set_log_mode(m_mtr, MTR_LOG_NO_REDO);
+	mtr_set_flush_observer(m_mtr, m_flush_observer);
+
+	/* TODO: need a simple and wait version of buf_page_optimistic_get. */
+	ret = buf_page_optimistic_get(RW_X_LATCH, m_block, m_modify_clock,
+				      __FILE__, __LINE__, m_mtr);
+	/* In case the block is S-latched by page_cleaner. */
+	if (!ret) {
+		page_id_t       page_id(dict_index_get_space(m_index), m_page_no);
+		page_size_t     page_size(dict_table_page_size(m_index->table));
+
+		m_block = buf_page_get_gen(page_id, page_size, RW_X_LATCH,
+					   m_block, BUF_GET_IF_IN_POOL,
+					   __FILE__, __LINE__, m_mtr, &m_err);
+
+		if (m_err != DB_SUCCESS) {
+			return (m_err);
+		}
+
+		ut_ad(m_block != NULL);
+	}
+
+	buf_block_buf_fix_dec(m_block);
+
+	ut_ad(m_cur_rec > m_page && m_cur_rec < m_heap_top);
+
+	return (m_err);
+}
+
+/** Split a page
+@param[in]	page_bulk	page to split
+@param[in]	next_page_bulk	next page
+@return	error code */
+dberr_t
+BtrBulk::pageSplit(
+	PageBulk*	page_bulk,
+	PageBulk*	next_page_bulk)
+{
+	ut_ad(page_bulk->getPageZip() != NULL);
+
+	/* 1. Check if we have only one user record on the page. */
+	if (page_bulk->getRecNo() <= 1) {
+		return(DB_TOO_BIG_RECORD);
+	}
+
+	/* 2. create a new page. */
+	PageBulk new_page_bulk(m_index, m_trx_id, FIL_NULL,
+			       page_bulk->getLevel(), m_flush_observer);
+	dberr_t	err = new_page_bulk.init();
+	if (err != DB_SUCCESS) {
+		return(err);
+	}
+
+	/* 3. copy the upper half to new page. */
+	rec_t*	split_rec = page_bulk->getSplitRec();
+	new_page_bulk.copyIn(split_rec);
+	page_bulk->copyOut(split_rec);
+
+	/* 4. commit the splitted page. */
+	err = pageCommit(page_bulk, &new_page_bulk, true);
+	if (err != DB_SUCCESS) {
+		pageAbort(&new_page_bulk);
+		return(err);
+	}
+
+	/* 5. commit the new page. */
+	err = pageCommit(&new_page_bulk, next_page_bulk, true);
+	if (err != DB_SUCCESS) {
+		pageAbort(&new_page_bulk);
+		return(err);
+	}
+
+	return(err);
+}
+
+/** Commit(finish) a page. We set next/prev page no, compress a page of
+compressed table and split the page if compression fails, insert a node
+pointer to father page if needed, and commit mini-transaction.
+@param[in]	page_bulk	page to commit
+@param[in]	next_page_bulk	next page
+@param[in]	insert_father	false when page_bulk is a root page and
+				true when it's a non-root page
+@return	error code */
+dberr_t
+BtrBulk::pageCommit(
+	PageBulk*	page_bulk,
+	PageBulk*	next_page_bulk,
+	bool		insert_father)
+{
+	page_bulk->finish();
+
+	/* Set page links */
+	if (next_page_bulk != NULL) {
+		ut_ad(page_bulk->getLevel() == next_page_bulk->getLevel());
+
+		page_bulk->setNext(next_page_bulk->getPageNo());
+		next_page_bulk->setPrev(page_bulk->getPageNo());
+	} else {
+		/** Suppose a page is released and latched again, we need to
+		mark it modified in mini-transaction.  */
+		page_bulk->setNext(FIL_NULL);
+	}
+
+	/* Compress page if it's a compressed table. */
+	if (page_bulk->getPageZip() != NULL && !page_bulk->compress()) {
+		return(pageSplit(page_bulk, next_page_bulk));
+	}
+
+	/* Insert node pointer to father page. */
+	if (insert_father) {
+		dtuple_t*	node_ptr = page_bulk->getNodePtr();
+		dberr_t		err = insert(node_ptr, page_bulk->getLevel()+1);
+
+		if (err != DB_SUCCESS) {
+			return(err);
+		}
+	}
+
+	/* Commit mtr. */
+	page_bulk->commit(true);
+
+	return(DB_SUCCESS);
+}
+
+/** Log free check */
+void
+BtrBulk::logFreeCheck()
+{
+	if (log_sys->check_flush_or_checkpoint) {
+		release();
+
+		log_free_check();
+
+		latch();
+	}
+}
+
+/** Release all latches */
+void
+BtrBulk::release()
+{
+	ut_ad(m_root_level + 1 == m_page_bulks->size());
+
+	for (ulint level = 0; level <= m_root_level; level++) {
+		PageBulk*    page_bulk = m_page_bulks->at(level);
+
+		page_bulk->release();
+	}
+}
+
+/** Re-latch all latches */
+void
+BtrBulk::latch()
+{
+	ut_ad(m_root_level + 1 == m_page_bulks->size());
+
+	for (ulint level = 0; level <= m_root_level; level++) {
+		PageBulk*    page_bulk = m_page_bulks->at(level);
+		page_bulk->latch();
+	}
+}
+
+/** Insert a tuple to page in a level
+@param[in]	tuple	tuple to insert
+@param[in]	level	B-tree level
+@return error code */
+dberr_t
+BtrBulk::insert(
+	dtuple_t*	tuple,
+	ulint		level)
+{
+	bool		is_left_most = false;
+	dberr_t		err = DB_SUCCESS;
+
+	ut_ad(m_heap != NULL);
+
+	/* Check if we need to create a PageBulk for the level. */
+	if (level + 1 > m_page_bulks->size()) {
+		PageBulk*	new_page_bulk
+			= UT_NEW_NOKEY(PageBulk(m_index, m_trx_id, FIL_NULL,
+						level, m_flush_observer));
+		err = new_page_bulk->init();
+		if (err != DB_SUCCESS) {
+			return(err);
+		}
+
+		m_page_bulks->push_back(new_page_bulk);
+		ut_ad(level + 1 == m_page_bulks->size());
+		m_root_level = level;
+
+		is_left_most = true;
+	}
+
+	ut_ad(m_page_bulks->size() > level);
+
+	PageBulk*	page_bulk = m_page_bulks->at(level);
+
+	if (is_left_most && level > 0 && page_bulk->getRecNo() == 0) {
+		/* The node pointer must be marked as the predefined minimum
+		record,	as there is no lower alphabetical limit to records in
+		the leftmost node of a level: */
+		dtuple_set_info_bits(tuple, dtuple_get_info_bits(tuple)
+					    | REC_INFO_MIN_REC_FLAG);
+	}
+
+	ulint		n_ext = 0;
+	ulint		rec_size = rec_get_converted_size(m_index, tuple, n_ext);
+	big_rec_t*	big_rec = NULL;
+	rec_t*		rec = NULL;
+	ulint*		offsets = NULL;
+
+	if (page_bulk->needExt(tuple, rec_size)) {
+		/* The record is so big that we have to store some fields
+		externally on separate database pages */
+		big_rec = dtuple_convert_big_rec(m_index, 0, tuple, &n_ext);
+
+		if (big_rec == NULL) {
+			return(DB_TOO_BIG_RECORD);
+		}
+
+		rec_size = rec_get_converted_size(m_index, tuple, n_ext);
+	}
+
+	if (page_bulk->getPageZip() != NULL
+	    && page_zip_is_too_big(m_index, tuple)) {
+		err = DB_TOO_BIG_RECORD;
+		goto func_exit;
+	}
+
+	if (!page_bulk->isSpaceAvailable(rec_size)) {
+		/* Create a sibling page_bulk. */
+		PageBulk*	sibling_page_bulk;
+		sibling_page_bulk = UT_NEW_NOKEY(PageBulk(m_index, m_trx_id,
+							  FIL_NULL, level,
+							  m_flush_observer));
+		err = sibling_page_bulk->init();
+		if (err != DB_SUCCESS) {
+			UT_DELETE(sibling_page_bulk);
+			goto func_exit;
+		}
+
+		/* Commit page bulk. */
+		err = pageCommit(page_bulk, sibling_page_bulk, true);
+		if (err != DB_SUCCESS) {
+			pageAbort(sibling_page_bulk);
+			UT_DELETE(sibling_page_bulk);
+			goto func_exit;
+		}
+
+		/* Set new page bulk to page_bulks. */
+		ut_ad(sibling_page_bulk->getLevel() <= m_root_level);
+		m_page_bulks->at(level) = sibling_page_bulk;
+
+		UT_DELETE(page_bulk);
+		page_bulk = sibling_page_bulk;
+
+		/* Important: log_free_check whether we need a checkpoint. */
+		if (page_is_leaf(sibling_page_bulk->getPage())) {
+			/* Check whether trx is interrupted */
+			if (m_flush_observer->check_interrupted()) {
+				err = DB_INTERRUPTED;
+				goto func_exit;
+			}
+
+			/* Wake up page cleaner to flush dirty pages. */
+			srv_inc_activity_count();
+			os_event_set(buf_flush_event);
+
+			logFreeCheck();
+		}
+
+	}
+
+	/* Convert tuple to rec. */
+        rec = rec_convert_dtuple_to_rec(static_cast<byte*>(mem_heap_alloc(
+		page_bulk->m_heap, rec_size)), m_index, tuple, n_ext);
+        offsets = rec_get_offsets(rec, m_index, offsets, ULINT_UNDEFINED,
+		&(page_bulk->m_heap));
+
+	page_bulk->insert(rec, offsets);
+
+	if (big_rec != NULL) {
+		ut_ad(dict_index_is_clust(m_index));
+		ut_ad(page_bulk->getLevel() == 0);
+		ut_ad(page_bulk == m_page_bulks->at(0));
+
+		/* Release all latched but leaf node. */
+		for (ulint level = 1; level <= m_root_level; level++) {
+			PageBulk*    page_bulk = m_page_bulks->at(level);
+
+			page_bulk->release();
+		}
+
+		err = page_bulk->storeExt(big_rec, offsets);
+
+		/* Latch */
+		for (ulint level = 1; level <= m_root_level; level++) {
+			PageBulk*    page_bulk = m_page_bulks->at(level);
+			page_bulk->latch();
+		}
+	}
+
+func_exit:
+	if (big_rec != NULL) {
+		dtuple_convert_back_big_rec(m_index, tuple, big_rec);
+	}
+
+	return(err);
+}
+
+/** Btree bulk load finish. We commit the last page in each level
+and copy the last page in top level to the root page of the index
+if no error occurs.
+@param[in]	err	whether bulk load was successful until now
+@return error code  */
+dberr_t
+BtrBulk::finish(dberr_t	err)
+{
+	ulint		last_page_no = FIL_NULL;
+
+	ut_ad(!dict_table_is_temporary(m_index->table));
+
+	if (m_page_bulks->size() == 0) {
+		/* The table is empty. The root page of the index tree
+		is already in a consistent state. No need to flush. */
+		return(err);
+	}
+
+	ut_ad(m_root_level + 1 == m_page_bulks->size());
+
+	/* Finish all page bulks */
+	for (ulint level = 0; level <= m_root_level; level++) {
+		PageBulk*	page_bulk = m_page_bulks->at(level);
+
+		last_page_no = page_bulk->getPageNo();
+
+		if (err == DB_SUCCESS) {
+			err = pageCommit(page_bulk, NULL,
+					 level != m_root_level);
+		}
+
+		if (err != DB_SUCCESS) {
+			pageAbort(page_bulk);
+		}
+
+		UT_DELETE(page_bulk);
+	}
+
+	if (err == DB_SUCCESS) {
+		rec_t*		first_rec;
+		mtr_t		mtr;
+		buf_block_t*	last_block;
+		page_t*		last_page;
+		page_id_t	page_id(dict_index_get_space(m_index),
+					last_page_no);
+		page_size_t	page_size(dict_table_page_size(m_index->table));
+		ulint		root_page_no = dict_index_get_page(m_index);
+		PageBulk	root_page_bulk(m_index, m_trx_id,
+					       root_page_no, m_root_level,
+					       m_flush_observer);
+
+		mtr_start(&mtr);
+		mtr.set_named_space(dict_index_get_space(m_index));
+		mtr_x_lock(dict_index_get_lock(m_index), &mtr);
+
+		ut_ad(last_page_no != FIL_NULL);
+		last_block = btr_block_get(page_id, page_size,
+					   RW_X_LATCH, m_index, &mtr);
+		last_page = buf_block_get_frame(last_block);
+		first_rec = page_rec_get_next(page_get_infimum_rec(last_page));
+		ut_ad(page_rec_is_user_rec(first_rec));
+
+		/* Copy last page to root page. */
+		err = root_page_bulk.init();
+		if (err != DB_SUCCESS) {
+			mtr_commit(&mtr);
+			return(err);
+		}
+		root_page_bulk.copyIn(first_rec);
+
+		/* Remove last page. */
+		btr_page_free_low(m_index, last_block, m_root_level, false, &mtr);
+
+		/* Do not flush the last page. */
+		last_block->page.flush_observer = NULL;
+
+		mtr_commit(&mtr);
+
+		err = pageCommit(&root_page_bulk, NULL, false);
+		ut_ad(err == DB_SUCCESS);
+	}
+
+#ifdef UNIV_DEBUG
+	dict_sync_check check(true);
+
+	ut_ad(!sync_check_iterate(check));
+#endif /* UNIV_DEBUG */
+
+	ut_ad(err != DB_SUCCESS || btr_validate_index(m_index, NULL, false));
+	return(err);
+}
diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc
index f1d4e03e230..7daec068f78 100644
--- a/storage/innobase/btr/btr0cur.cc
+++ b/storage/innobase/btr/btr0cur.cc
@@ -3,7 +3,7 @@
 Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2008, Google Inc.
 Copyright (c) 2012, Facebook Inc.
-Copyright (c) 2015, MariaDB Corporation.
+Copyright (c) 2015, 2016, MariaDB Corporation.
 
 Portions of this file contain modifications contributed and copyrighted by
 Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -63,13 +63,14 @@ Created 10/16/1994 Heikki Tuuri
 #include "row0purge.h"
 #include "row0upd.h"
 #include "trx0rec.h"
-#include "trx0roll.h" /* trx_is_recv() */
+#include "trx0roll.h"
 #include "que0que.h"
 #include "row0row.h"
 #include "srv0srv.h"
 #include "ibuf0ibuf.h"
 #include "lock0lock.h"
 #include "zlib.h"
+#include "srv0start.h"
 
 /** Buffered B-tree operation types, introduced as part of delete buffering. */
 enum btr_op_t {
@@ -80,29 +81,47 @@ enum btr_op_t {
 	BTR_DELMARK_OP			/*!< Mark a record for deletion */
 };
 
-#ifdef UNIV_DEBUG
-/** If the following is set to TRUE, this module prints a lot of
-trace information of individual record operations */
-UNIV_INTERN ibool	btr_cur_print_record_ops = FALSE;
-#endif /* UNIV_DEBUG */
+/** Modification types for the B-tree operation. */
+enum btr_intention_t {
+	BTR_INTENTION_DELETE,
+	BTR_INTENTION_BOTH,
+	BTR_INTENTION_INSERT
+};
+#if BTR_INTENTION_DELETE > BTR_INTENTION_BOTH
+#error "BTR_INTENTION_DELETE > BTR_INTENTION_BOTH"
+#endif
+#if BTR_INTENTION_BOTH > BTR_INTENTION_INSERT
+#error "BTR_INTENTION_BOTH > BTR_INTENTION_INSERT"
+#endif
+
+/** For the index->lock scalability improvement, only possibility of clear
+performance regression observed was caused by grown huge history list length.
+That is because the exclusive use of index->lock also worked as reserving
+free blocks and read IO bandwidth with priority. To avoid huge glowing history
+list as same level with previous implementation, prioritizes pessimistic tree
+operations by purge as the previous, when it seems to be growing huge.
+
+ Experimentally, the history list length starts to affect to performance
+throughput clearly from about 100000. */
+#define BTR_CUR_FINE_HISTORY_LENGTH	100000
 
 /** Number of searches down the B-tree in btr_cur_search_to_nth_level(). */
-UNIV_INTERN ulint	btr_cur_n_non_sea	= 0;
+ulint	btr_cur_n_non_sea	= 0;
 /** Number of successful adaptive hash index lookups in
 btr_cur_search_to_nth_level(). */
-UNIV_INTERN ulint	btr_cur_n_sea		= 0;
+ulint	btr_cur_n_sea		= 0;
 /** Old value of btr_cur_n_non_sea.  Copied by
 srv_refresh_innodb_monitor_stats().  Referenced by
 srv_printf_innodb_monitor(). */
-UNIV_INTERN ulint	btr_cur_n_non_sea_old	= 0;
+ulint	btr_cur_n_non_sea_old	= 0;
 /** Old value of btr_cur_n_sea.  Copied by
 srv_refresh_innodb_monitor_stats().  Referenced by
 srv_printf_innodb_monitor(). */
-UNIV_INTERN ulint	btr_cur_n_sea_old	= 0;
+ulint	btr_cur_n_sea_old	= 0;
 
 #ifdef UNIV_DEBUG
 /* Flag to limit optimistic insert records */
-UNIV_INTERN uint	btr_cur_limit_optimistic_insert_debug = 0;
+uint	btr_cur_limit_optimistic_insert_debug = 0;
 #endif /* UNIV_DEBUG */
 
 /** In the optimistic insert, if the insert does not fit, but this much space
@@ -121,29 +140,19 @@ can be released by page reorganize, then it is reorganized */
 						part header, in bytes */
 
 /** Estimated table level stats from sampled value.
-@param value		sampled stats
-@param index		index being sampled
-@param sample		number of sampled rows
-@param ext_size		external stored data size
-@param not_empty	table not empty
+@param value sampled stats
+@param index index being sampled
+@param sample number of sampled rows
+@param ext_size external stored data size
+@param not_empty table not empty
 @return estimated table wide stats from sampled value */
-#define BTR_TABLE_STATS_FROM_SAMPLE(value, index, sample, ext_size, not_empty)\
-	(((value) * (ib_int64_t) index->stat_n_leaf_pages		\
+#define BTR_TABLE_STATS_FROM_SAMPLE(value, index, sample, ext_size, not_empty) \
+	(((value) * static_cast<int64_t>(index->stat_n_leaf_pages) \
 	  + (sample) - 1 + (ext_size) + (not_empty)) / ((sample) + (ext_size)))
 
 /* @} */
 #endif /* !UNIV_HOTBACKUP */
 
-/** A BLOB field reference full of zero, for use in assertions and tests.
-Initially, BLOB field references are set to zero, in
-dtuple_convert_big_rec(). */
-const byte field_ref_zero[BTR_EXTERN_FIELD_REF_SIZE] = {
-	0, 0, 0, 0, 0,
-	0, 0, 0, 0, 0,
-	0, 0, 0, 0, 0,
-	0, 0, 0, 0, 0,
-};
-
 #ifndef UNIV_HOTBACKUP
 /*******************************************************************//**
 Marks all extern fields in a record as owned by the record. This function
@@ -184,7 +193,7 @@ btr_rec_free_updated_extern_fields(
 				part will be updated, or NULL */
 	const ulint*	offsets,/*!< in: rec_get_offsets(rec, index) */
 	const upd_t*	update,	/*!< in: update vector */
-	enum trx_rb_ctx	rb_ctx,	/*!< in: rollback context */
+	bool		rollback,/*!< in: performing rollback? */
 	mtr_t*		mtr);	/*!< in: mini-transaction handle which contains
 				an X-latch to record page and to the tree */
 /***********************************************************//**
@@ -199,120 +208,173 @@ btr_rec_free_externally_stored_fields(
 	const ulint*	offsets,/*!< in: rec_get_offsets(rec, index) */
 	page_zip_des_t*	page_zip,/*!< in: compressed page whose uncompressed
 				part will be updated, or NULL */
-	enum trx_rb_ctx	rb_ctx,	/*!< in: rollback context */
+	bool		rollback,/*!< in: performing rollback? */
 	mtr_t*		mtr);	/*!< in: mini-transaction handle which contains
 				an X-latch to record page and to the index
 				tree */
 #endif /* !UNIV_HOTBACKUP */
 
-/******************************************************//**
-The following function is used to set the deleted bit of a record. */
-UNIV_INLINE
-void
-btr_rec_set_deleted_flag(
-/*=====================*/
-	rec_t*		rec,	/*!< in/out: physical record */
-	page_zip_des_t*	page_zip,/*!< in/out: compressed page (or NULL) */
-	ulint		flag)	/*!< in: nonzero if delete marked */
-{
-	if (page_rec_is_comp(rec)) {
-		rec_set_deleted_flag_new(rec, page_zip, flag);
-	} else {
-		ut_ad(!page_zip);
-		rec_set_deleted_flag_old(rec, flag);
-	}
-}
-
 #ifndef UNIV_HOTBACKUP
 /*==================== B-TREE SEARCH =========================*/
 
-/********************************************************************//**
-Latches the leaf page or pages requested. */
-static
-void
+#if MTR_MEMO_PAGE_S_FIX != RW_S_LATCH
+#error "MTR_MEMO_PAGE_S_FIX != RW_S_LATCH"
+#endif
+#if MTR_MEMO_PAGE_X_FIX != RW_X_LATCH
+#error "MTR_MEMO_PAGE_X_FIX != RW_X_LATCH"
+#endif
+#if MTR_MEMO_PAGE_SX_FIX != RW_SX_LATCH
+#error "MTR_MEMO_PAGE_SX_FIX != RW_SX_LATCH"
+#endif
+
+/** Latches the leaf page or pages requested.
+@param[in]	block		leaf page where the search converged
+@param[in]	page_id		page id of the leaf
+@param[in]	latch_mode	BTR_SEARCH_LEAF, ...
+@param[in]	cursor		cursor
+@param[in]	mtr		mini-transaction
+@return	blocks and savepoints which actually latched. */
+btr_latch_leaves_t
 btr_cur_latch_leaves(
-/*=================*/
-	page_t*		page,		/*!< in: leaf page where the search
-					converged */
-	ulint		space,		/*!< in: space id */
-	ulint		zip_size,	/*!< in: compressed page size in bytes
-					or 0 for uncompressed pages */
-	ulint		page_no,	/*!< in: page number of the leaf */
-	ulint		latch_mode,	/*!< in: BTR_SEARCH_LEAF, ... */
-	btr_cur_t*	cursor,		/*!< in: cursor */
-	mtr_t*		mtr)		/*!< in: mtr */
+	buf_block_t*		block,
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	ulint			latch_mode,
+	btr_cur_t*		cursor,
+	mtr_t*			mtr)
 {
 	ulint		mode;
 	ulint		left_page_no;
 	ulint		right_page_no;
 	buf_block_t*	get_block;
+	page_t*		page = buf_block_get_frame(block);
+	bool		spatial;
+	btr_latch_leaves_t latch_leaves = {{NULL, NULL, NULL}, {0, 0, 0}};
 
-	ut_ad(page && mtr);
+	spatial = dict_index_is_spatial(cursor->index) && cursor->rtr_info;
+	ut_ad(buf_page_in_file(&block->page));
 
 	switch (latch_mode) {
 	case BTR_SEARCH_LEAF:
 	case BTR_MODIFY_LEAF:
-		mode = latch_mode == BTR_SEARCH_LEAF ? RW_S_LATCH : RW_X_LATCH;
-		get_block = btr_block_get(
-			space, zip_size, page_no, mode, cursor->index, mtr);
+	case BTR_SEARCH_TREE:
+		if (spatial) {
+			cursor->rtr_info->tree_savepoints[RTR_MAX_LEVELS]
+				= mtr_set_savepoint(mtr);
+		}
+
+		mode = latch_mode == BTR_MODIFY_LEAF ? RW_X_LATCH : RW_S_LATCH;
+		latch_leaves.savepoints[1] = mtr_set_savepoint(mtr);
+		get_block = btr_block_get(page_id, page_size, mode,
+					  cursor->index, mtr);
+		latch_leaves.blocks[1] = get_block;
 #ifdef UNIV_BTR_DEBUG
 		ut_a(page_is_comp(get_block->frame) == page_is_comp(page));
 #endif /* UNIV_BTR_DEBUG */
-		get_block->check_index_page_at_flush = TRUE;
-		return;
+		if (spatial) {
+			cursor->rtr_info->tree_blocks[RTR_MAX_LEVELS]
+				= get_block;
+		}
+
+		return(latch_leaves);
 	case BTR_MODIFY_TREE:
-		/* x-latch also brothers from left to right */
+		/* It is exclusive for other operations which calls
+		btr_page_set_prev() */
+		ut_ad(mtr_memo_contains_flagged(mtr,
+			dict_index_get_lock(cursor->index),
+			MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK)
+		      || dict_table_is_intrinsic(cursor->index->table));
+		/* x-latch also siblings from left to right */
 		left_page_no = btr_page_get_prev(page, mtr);
 		mode = latch_mode;
 
 		if (left_page_no != FIL_NULL) {
+
+			if (spatial) {
+				cursor->rtr_info->tree_savepoints[
+					RTR_MAX_LEVELS] = mtr_set_savepoint(mtr);
+			}
+
+			latch_leaves.savepoints[0] = mtr_set_savepoint(mtr);
 			get_block = btr_block_get(
-				space, zip_size, left_page_no,
-				RW_X_LATCH, cursor->index, mtr);
-#ifdef UNIV_BTR_DEBUG
-			ut_a(page_is_comp(get_block->frame)
-			     == page_is_comp(page));
-			ut_a(btr_page_get_next(get_block->frame, mtr)
-			     == page_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
-			get_block->check_index_page_at_flush = TRUE;
+				page_id_t(page_id.space(), left_page_no),
+				page_size, RW_X_LATCH, cursor->index, mtr);
+			latch_leaves.blocks[0] = get_block;
+
+			if (spatial) {
+				cursor->rtr_info->tree_blocks[RTR_MAX_LEVELS]
+					= get_block;
+			}
 		}
 
+		if (spatial) {
+			cursor->rtr_info->tree_savepoints[RTR_MAX_LEVELS + 1]
+				= mtr_set_savepoint(mtr);
+		}
+
+		latch_leaves.savepoints[1] = mtr_set_savepoint(mtr);
 		get_block = btr_block_get(
-			space, zip_size, page_no,
-			RW_X_LATCH, cursor->index, mtr);
+			page_id, page_size, RW_X_LATCH, cursor->index, mtr);
+		latch_leaves.blocks[1] = get_block;
+
 #ifdef UNIV_BTR_DEBUG
+		/* Sanity check only after both the blocks are latched. */
+		if (latch_leaves.blocks[0] != NULL) {
+			ut_a(page_is_comp(latch_leaves.blocks[0]->frame)
+				== page_is_comp(page));
+			ut_a(btr_page_get_next(
+				latch_leaves.blocks[0]->frame, mtr)
+				== page_get_page_no(page));
+		}
 		ut_a(page_is_comp(get_block->frame) == page_is_comp(page));
 #endif /* UNIV_BTR_DEBUG */
-		get_block->check_index_page_at_flush = TRUE;
+
+		if (spatial) {
+			cursor->rtr_info->tree_blocks[RTR_MAX_LEVELS + 1]
+				= get_block;
+		}
 
 		right_page_no = btr_page_get_next(page, mtr);
 
 		if (right_page_no != FIL_NULL) {
+			if (spatial) {
+				cursor->rtr_info->tree_savepoints[
+					RTR_MAX_LEVELS + 2] = mtr_set_savepoint(
+								mtr);
+			}
+			latch_leaves.savepoints[2] = mtr_set_savepoint(mtr);
 			get_block = btr_block_get(
-				space, zip_size, right_page_no,
-				RW_X_LATCH, cursor->index, mtr);
+				page_id_t(page_id.space(), right_page_no),
+				page_size, RW_X_LATCH, cursor->index, mtr);
+			latch_leaves.blocks[2] = get_block;
 #ifdef UNIV_BTR_DEBUG
 			ut_a(page_is_comp(get_block->frame)
 			     == page_is_comp(page));
 			ut_a(btr_page_get_prev(get_block->frame, mtr)
 			     == page_get_page_no(page));
 #endif /* UNIV_BTR_DEBUG */
-			get_block->check_index_page_at_flush = TRUE;
+			if (spatial) {
+				cursor->rtr_info->tree_blocks[
+					RTR_MAX_LEVELS + 2] = get_block;
+			}
 		}
 
-		return;
+		return(latch_leaves);
 
 	case BTR_SEARCH_PREV:
 	case BTR_MODIFY_PREV:
 		mode = latch_mode == BTR_SEARCH_PREV ? RW_S_LATCH : RW_X_LATCH;
-		/* latch also left brother */
+		/* latch also left sibling */
+		rw_lock_s_lock(&block->lock);
 		left_page_no = btr_page_get_prev(page, mtr);
+		rw_lock_s_unlock(&block->lock);
 
 		if (left_page_no != FIL_NULL) {
+			latch_leaves.savepoints[0] = mtr_set_savepoint(mtr);
 			get_block = btr_block_get(
-				space, zip_size,
-				left_page_no, mode, cursor->index, mtr);
+				page_id_t(page_id.space(), left_page_no),
+				page_size, mode, cursor->index, mtr);
+			latch_leaves.blocks[0] = get_block;
 			cursor->left_block = get_block;
 #ifdef UNIV_BTR_DEBUG
 			ut_a(page_is_comp(get_block->frame)
@@ -320,19 +382,333 @@ btr_cur_latch_leaves(
 			ut_a(btr_page_get_next(get_block->frame, mtr)
 			     == page_get_page_no(page));
 #endif /* UNIV_BTR_DEBUG */
-			get_block->check_index_page_at_flush = TRUE;
 		}
 
-		get_block = btr_block_get(
-			space, zip_size, page_no, mode, cursor->index, mtr);
+		latch_leaves.savepoints[1] = mtr_set_savepoint(mtr);
+		get_block = btr_block_get(page_id, page_size, mode,
+					  cursor->index, mtr);
+		latch_leaves.blocks[1] = get_block;
 #ifdef UNIV_BTR_DEBUG
 		ut_a(page_is_comp(get_block->frame) == page_is_comp(page));
 #endif /* UNIV_BTR_DEBUG */
-		get_block->check_index_page_at_flush = TRUE;
-		return;
+		return(latch_leaves);
+	case BTR_CONT_MODIFY_TREE:
+		ut_ad(dict_index_is_spatial(cursor->index));
+		return(latch_leaves);
 	}
 
 	ut_error;
+	return(latch_leaves);
+}
+
+/** Optimistically latches the leaf page or pages requested.
+@param[in]	block		guessed buffer block
+@param[in]	modify_clock	modify clock value
+@param[in,out]	latch_mode	BTR_SEARCH_LEAF, ...
+@param[in,out]	cursor		cursor
+@param[in]	file		file name
+@param[in]	line		line where called
+@param[in]	mtr		mini-transaction
+@return true if success */
+bool
+btr_cur_optimistic_latch_leaves(
+	buf_block_t*	block,
+	ib_uint64_t	modify_clock,
+	ulint*		latch_mode,
+	btr_cur_t*	cursor,
+	const char*	file,
+	ulint		line,
+	mtr_t*		mtr)
+{
+	ulint		mode;
+	ulint		left_page_no;
+
+	switch (*latch_mode) {
+	case BTR_SEARCH_LEAF:
+	case BTR_MODIFY_LEAF:
+		return(buf_page_optimistic_get(*latch_mode, block,
+				modify_clock, file, line, mtr));
+	case BTR_SEARCH_PREV:
+	case BTR_MODIFY_PREV:
+		mode = *latch_mode == BTR_SEARCH_PREV
+			? RW_S_LATCH : RW_X_LATCH;
+
+		buf_page_mutex_enter(block);
+		if (buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
+			buf_page_mutex_exit(block);
+			return(false);
+		}
+		/* pin the block not to be relocated */
+		buf_block_buf_fix_inc(block, file, line);
+		buf_page_mutex_exit(block);
+
+		rw_lock_s_lock(&block->lock);
+		if (block->modify_clock != modify_clock) {
+			rw_lock_s_unlock(&block->lock);
+
+			goto unpin_failed;
+		}
+		left_page_no = btr_page_get_prev(
+			buf_block_get_frame(block), mtr);
+		rw_lock_s_unlock(&block->lock);
+
+		if (left_page_no != FIL_NULL) {
+			const page_id_t	page_id(
+				dict_index_get_space(cursor->index),
+				left_page_no);
+
+			cursor->left_block = btr_block_get(
+				page_id,
+				dict_table_page_size(cursor->index->table),
+				mode, cursor->index, mtr);
+		} else {
+			cursor->left_block = NULL;
+		}
+
+		if (buf_page_optimistic_get(mode, block, modify_clock,
+					    file, line, mtr)) {
+			if (btr_page_get_prev(buf_block_get_frame(block), mtr)
+			    == left_page_no) {
+				/* adjust buf_fix_count */
+				buf_page_mutex_enter(block);
+				buf_block_buf_fix_dec(block);
+				buf_page_mutex_exit(block);
+
+				*latch_mode = mode;
+				return(true);
+			} else {
+				/* release the block */
+				btr_leaf_page_release(block, mode, mtr);
+			}
+		}
+
+		/* release the left block */
+		if (cursor->left_block != NULL) {
+			btr_leaf_page_release(cursor->left_block,
+					      mode, mtr);
+		}
+unpin_failed:
+		/* unpin the block */
+		buf_page_mutex_enter(block);
+		buf_block_buf_fix_dec(block);
+		buf_page_mutex_exit(block);
+
+		return(false);
+
+	default:
+		ut_error;
+		return(false);
+	}
+}
+
+/**
+Gets intention in btr_intention_t from latch_mode, and cleares the intention
+at the latch_mode.
+@param latch_mode	in/out: pointer to latch_mode
+@return intention for latching tree */
+static
+btr_intention_t
+btr_cur_get_and_clear_intention(
+	ulint	*latch_mode)
+{
+	btr_intention_t	intention;
+
+	switch (*latch_mode & (BTR_LATCH_FOR_INSERT | BTR_LATCH_FOR_DELETE)) {
+	case BTR_LATCH_FOR_INSERT:
+		intention = BTR_INTENTION_INSERT;
+		break;
+	case BTR_LATCH_FOR_DELETE:
+		intention = BTR_INTENTION_DELETE;
+		break;
+	default:
+		/* both or unknown */
+		intention = BTR_INTENTION_BOTH;
+	}
+	*latch_mode &= ~(BTR_LATCH_FOR_INSERT | BTR_LATCH_FOR_DELETE);
+
+	return(intention);
+}
+
+/**
+Gets the desired latch type for the root leaf (root page is root leaf)
+at the latch mode.
+@param latch_mode	in: BTR_SEARCH_LEAF, ...
+@return latch type */
+static
+rw_lock_type_t
+btr_cur_latch_for_root_leaf(
+	ulint	latch_mode)
+{
+	switch (latch_mode) {
+	case BTR_SEARCH_LEAF:
+	case BTR_SEARCH_TREE:
+	case BTR_SEARCH_PREV:
+		return(RW_S_LATCH);
+	case BTR_MODIFY_LEAF:
+	case BTR_MODIFY_TREE:
+	case BTR_MODIFY_PREV:
+		return(RW_X_LATCH);
+	case BTR_CONT_MODIFY_TREE:
+	case BTR_CONT_SEARCH_TREE:
+		/* A root page should be latched already,
+		and don't need to be latched here.
+		fall through (RW_NO_LATCH) */
+	case BTR_NO_LATCHES:
+		return(RW_NO_LATCH);
+	}
+
+	ut_error;
+	return(RW_NO_LATCH); /* avoid compiler warnings */
+}
+
+/** Detects whether the modifying record might need a modifying tree structure.
+@param[in]	index		index
+@param[in]	page		page
+@param[in]	lock_intention	lock intention for the tree operation
+@param[in]	rec		record (current node_ptr)
+@param[in]	rec_size	size of the record or max size of node_ptr
+@param[in]	page_size	page size
+@param[in]	mtr		mtr
+@return true if tree modification is needed */
+static
+bool
+btr_cur_will_modify_tree(
+	dict_index_t*	index,
+	const page_t*	page,
+	btr_intention_t	lock_intention,
+	const rec_t*	rec,
+	ulint		rec_size,
+	const page_size_t&	page_size,
+	mtr_t*		mtr)
+{
+	ut_ad(!page_is_leaf(page));
+	ut_ad(mtr_memo_contains_flagged(mtr, dict_index_get_lock(index),
+					MTR_MEMO_X_LOCK
+					| MTR_MEMO_SX_LOCK)
+	      || dict_table_is_intrinsic(index->table));
+
+	/* Pessimistic delete of the first record causes delete & insert
+	of node_ptr at upper level. And a subsequent page shrink is
+	possible. It causes delete of node_ptr at the upper level.
+	So we should pay attention also to 2nd record not only
+	first record and last record. Because if the "delete & insert" are
+	done for the different page, the 2nd record become
+	first record and following compress might delete the record and causes
+	the uppper level node_ptr modification. */
+
+	if (lock_intention <= BTR_INTENTION_BOTH) {
+		ulint	margin;
+
+		/* check delete will cause. (BTR_INTENTION_BOTH
+		or BTR_INTENTION_DELETE) */
+		/* first, 2nd, 2nd-last and last records are 4 records */
+		if (page_get_n_recs(page) < 5) {
+			return(true);
+		}
+
+		/* is first, 2nd or last record */
+		if (page_rec_is_first(rec, page)
+		    || (mach_read_from_4(page + FIL_PAGE_NEXT) != FIL_NULL
+			&& (page_rec_is_last(rec, page)
+			    || page_rec_is_second_last(rec, page)))
+		    || (mach_read_from_4(page + FIL_PAGE_PREV) != FIL_NULL
+			&& page_rec_is_second(rec, page))) {
+			return(true);
+		}
+
+		if (lock_intention == BTR_INTENTION_BOTH) {
+			/* Delete at leftmost record in a page causes delete
+			& insert at its parent page. After that, the delete
+			might cause btr_compress() and delete record at its
+			parent page. Thus we should consider max 2 deletes. */
+
+			margin = rec_size * 2;
+		} else {
+			ut_ad(lock_intention == BTR_INTENTION_DELETE);
+
+			margin = rec_size;
+		}
+		/* NOTE: call mach_read_from_4() directly to avoid assertion
+		failure. It is safe because we already have SX latch of the
+		index tree */
+		if (page_get_data_size(page)
+			< margin + BTR_CUR_PAGE_COMPRESS_LIMIT(index)
+		    || (mach_read_from_4(page + FIL_PAGE_NEXT)
+				== FIL_NULL
+			&& mach_read_from_4(page + FIL_PAGE_PREV)
+				== FIL_NULL)) {
+			return(true);
+		}
+	}
+
+	if (lock_intention >= BTR_INTENTION_BOTH) {
+		/* check insert will cause. BTR_INTENTION_BOTH
+		or BTR_INTENTION_INSERT*/
+
+		/* Once we invoke the btr_cur_limit_optimistic_insert_debug,
+		we should check it here in advance, since the max allowable
+		records in a page is limited. */
+		LIMIT_OPTIMISTIC_INSERT_DEBUG(page_get_n_recs(page),
+					      return(true));
+
+		/* needs 2 records' space for the case the single split and
+		insert cannot fit.
+		page_get_max_insert_size_after_reorganize() includes space
+		for page directory already */
+		ulint	max_size
+			= page_get_max_insert_size_after_reorganize(page, 2);
+
+		if (max_size < BTR_CUR_PAGE_REORGANIZE_LIMIT + rec_size
+		    || max_size < rec_size * 2) {
+			return(true);
+		}
+		/* TODO: optimize this condition for compressed page.
+		this is based on the worst compress rate.
+		currently looking only uncompressed page, but we can look
+		also compressed page page_zip_available() if already in the
+		buffer pool */
+		/* needs 2 records' space also for worst compress rate. */
+		if (page_size.is_compressed()
+		    && page_zip_empty_size(index->n_fields,
+					   page_size.physical())
+		       < rec_size * 2 + page_get_data_size(page)
+			 + page_dir_calc_reserved_space(
+				page_get_n_recs(page) + 2) + 1) {
+			return(true);
+		}
+	}
+
+	return(false);
+}
+
+/** Detects whether the modifying record might need a opposite modification
+to the intention.
+@param[in]	page		page
+@param[in]	lock_intention	lock intention for the tree operation
+@param[in]	rec		record (current node_ptr)
+@return	true if tree modification is needed */
+static
+bool
+btr_cur_need_opposite_intention(
+	const page_t*	page,
+	btr_intention_t	lock_intention,
+	const rec_t*	rec)
+{
+	switch (lock_intention) {
+	case BTR_INTENTION_DELETE:
+		return((mach_read_from_4(page + FIL_PAGE_PREV) != FIL_NULL
+			&& page_rec_is_first(rec, page))
+		       || (mach_read_from_4(page + FIL_PAGE_NEXT) != FIL_NULL
+			   && page_rec_is_last(rec, page)));
+	case BTR_INTENTION_INSERT:
+		return(mach_read_from_4(page + FIL_PAGE_NEXT) != FIL_NULL
+		       && page_rec_is_last(rec, page));
+	case BTR_INTENTION_BOTH:
+		return(false);
+	}
+
+	ut_error;
+	return(false);
 }
 
 /********************************************************************//**
@@ -347,7 +723,6 @@ If mode is PAGE_CUR_LE , cursor is left at the place where an insert of the
 search tuple should be performed in the B-tree. InnoDB does an insert
 immediately after the cursor. Thus, the cursor may end up on a user record,
 or on a page infimum record. */
-UNIV_INTERN
 dberr_t
 btr_cur_search_to_nth_level(
 /*========================*/
@@ -356,7 +731,7 @@ btr_cur_search_to_nth_level(
 	const dtuple_t*	tuple,	/*!< in: data tuple; NOTE: n_fields_cmp in
 				tuple must be set so that it cannot get
 				compared to the node ptr page number field! */
-	ulint		mode,	/*!< in: PAGE_CUR_L, ...;
+	page_cur_mode_t	mode,	/*!< in: PAGE_CUR_L, ...;
 				Inserts should always be made using
 				PAGE_CUR_LE to search the position! */
 	ulint		latch_mode, /*!< in: BTR_SEARCH_LEAF, ..., ORed with
@@ -372,45 +747,71 @@ btr_cur_search_to_nth_level(
 				to protect the record! */
 	btr_cur_t*	cursor, /*!< in/out: tree cursor; the cursor page is
 				s- or x-latched, but see also above! */
-	ulint		has_search_latch,/*!< in: info on the latch mode the
-				caller currently has on btr_search_latch:
+	ulint		has_search_latch,
+				/*!< in: info on the latch mode the
+				caller currently has on search system:
 				RW_S_LATCH, or 0 */
 	const char*	file,	/*!< in: file name */
 	ulint		line,	/*!< in: line where called */
 	mtr_t*		mtr)	/*!< in: mtr */
 {
-	page_t*		page;
+	page_t*		page = NULL; /* remove warning */
 	buf_block_t*	block;
-	ulint		space;
 	buf_block_t*	guess;
 	ulint		height;
-	ulint		page_no;
 	ulint		up_match;
 	ulint		up_bytes;
 	ulint		low_match;
 	ulint		low_bytes;
 	ulint		savepoint;
 	ulint		rw_latch;
-	ulint		page_mode;
+	page_cur_mode_t	page_mode;
+	page_cur_mode_t	search_mode = PAGE_CUR_UNSUPP;
 	ulint		buf_mode;
 	ulint		estimate;
-	ulint		zip_size;
+	ulint		node_ptr_max_size = UNIV_PAGE_SIZE / 2;
 	page_cur_t*	page_cursor;
 	btr_op_t	btr_op;
 	ulint		root_height = 0; /* remove warning */
 	dberr_t		err = DB_SUCCESS;
 
+	ulint		upper_rw_latch, root_leaf_rw_latch;
+	btr_intention_t	lock_intention;
+	bool		modify_external;
+	buf_block_t*	tree_blocks[BTR_MAX_LEVELS];
+	ulint		tree_savepoints[BTR_MAX_LEVELS];
+	ulint		n_blocks = 0;
+	ulint		n_releases = 0;
+	bool		detected_same_key_root = false;
+
+	bool		retrying_for_search_prev = false;
+	ulint		leftmost_from_level = 0;
+	buf_block_t**	prev_tree_blocks = NULL;
+	ulint*		prev_tree_savepoints = NULL;
+	ulint		prev_n_blocks = 0;
+	ulint		prev_n_releases = 0;
+	bool		need_path = true;
+	bool		rtree_parent_modified = false;
+	bool		mbr_adj = false;
+	bool		found = false;
+
+	DBUG_ENTER("btr_cur_search_to_nth_level");
+
 #ifdef BTR_CUR_ADAPT
 	btr_search_t*	info;
-#endif
+#endif /* BTR_CUR_ADAPT */
 	mem_heap_t*	heap		= NULL;
 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
 	ulint*		offsets		= offsets_;
+	ulint		offsets2_[REC_OFFS_NORMAL_SIZE];
+	ulint*		offsets2	= offsets2_;
 	rec_offs_init(offsets_);
+	rec_offs_init(offsets2_);
 	/* Currently, PAGE_CUR_LE is the only search mode used for searches
 	ending to upper levels */
 
-	ut_ad(level == 0 || mode == PAGE_CUR_LE);
+	ut_ad(level == 0 || mode == PAGE_CUR_LE
+	      || RTREE_SEARCH_MODE(mode));
 	ut_ad(dict_index_check_search_tuple(index, tuple));
 	ut_ad(!dict_index_is_ibuf(index) || ibuf_inside(mtr));
 	ut_ad(dtuple_check_typed(tuple));
@@ -424,15 +825,18 @@ btr_cur_search_to_nth_level(
 #ifdef UNIV_DEBUG
 	cursor->up_match = ULINT_UNDEFINED;
 	cursor->low_match = ULINT_UNDEFINED;
-#endif
+#endif /* UNIV_DEBUG */
 
 	ibool	s_latch_by_caller;
 
 	s_latch_by_caller = latch_mode & BTR_ALREADY_S_LATCHED;
 
 	ut_ad(!s_latch_by_caller
-	      || mtr_memo_contains(mtr, dict_index_get_lock(index),
-				   MTR_MEMO_S_LOCK));
+	      || srv_read_only_mode
+	      || mtr_memo_contains_flagged(mtr,
+					   dict_index_get_lock(index),
+					   MTR_MEMO_S_LOCK
+					   | MTR_MEMO_SX_LOCK));
 
 	/* These flags are mutually exclusive, they are lumped together
 	with the latch mode for historical reasons. It's possible for
@@ -465,14 +869,25 @@ btr_cur_search_to_nth_level(
 	ut_ad(btr_op == BTR_NO_OP || !dict_index_is_ibuf(index));
 	/* Operations on the clustered index cannot be buffered. */
 	ut_ad(btr_op == BTR_NO_OP || !dict_index_is_clust(index));
+	/* Operations on the temporary table(indexes) cannot be buffered. */
+	ut_ad(btr_op == BTR_NO_OP || !dict_table_is_temporary(index->table));
+	/* Operation on the spatial index cannot be buffered. */
+	ut_ad(btr_op == BTR_NO_OP || !dict_index_is_spatial(index));
 
 	estimate = latch_mode & BTR_ESTIMATE;
 
+	lock_intention = btr_cur_get_and_clear_intention(&latch_mode);
+
+	modify_external = latch_mode & BTR_MODIFY_EXTERNAL;
+
 	/* Turn the flags unrelated to the latch mode off. */
 	latch_mode = BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode);
 
+	ut_ad(!modify_external || latch_mode == BTR_MODIFY_LEAF);
+
 	ut_ad(!s_latch_by_caller
 	      || latch_mode == BTR_SEARCH_LEAF
+	      || latch_mode == BTR_SEARCH_TREE
 	      || latch_mode == BTR_MODIFY_LEAF);
 
 	cursor->flag = BTR_CUR_BINARY;
@@ -483,24 +898,34 @@ btr_cur_search_to_nth_level(
 #else
 	info = btr_search_get_info(index);
 
-	guess = info->root_guess;
+	if (!buf_pool_is_obsolete(info->withdraw_clock)) {
+		guess = info->root_guess;
+	} else {
+		guess = NULL;
+	}
 
 #ifdef BTR_CUR_HASH_ADAPT
 
 # ifdef UNIV_SEARCH_PERF_STAT
 	info->n_searches++;
 # endif
-	if (rw_lock_get_writer(&btr_search_latch) == RW_LOCK_NOT_LOCKED
+	/* Use of AHI is disabled for intrinsic table as these tables re-use
+	the index-id and AHI validation is based on index-id. */
+	if (rw_lock_get_writer(btr_get_search_latch(index))
+		== RW_LOCK_NOT_LOCKED
 	    && latch_mode <= BTR_MODIFY_LEAF
 	    && info->last_hash_succ
+	    && !index->disable_ahi
 	    && !estimate
 # ifdef PAGE_CUR_LE_OR_EXTENDS
 	    && mode != PAGE_CUR_LE_OR_EXTENDS
 # endif /* PAGE_CUR_LE_OR_EXTENDS */
+	    && !dict_index_is_spatial(index)
 	    /* If !has_search_latch, we do a dirty read of
 	    btr_search_enabled below, and btr_search_guess_on_hash()
 	    will have to check it again. */
 	    && UNIV_LIKELY(btr_search_enabled)
+	    && !modify_external
 	    && btr_search_guess_on_hash(index, info, tuple, mode,
 					latch_mode, cursor,
 					has_search_latch, mtr)) {
@@ -515,7 +940,7 @@ btr_cur_search_to_nth_level(
 		      || mode != PAGE_CUR_LE);
 		btr_cur_n_sea++;
 
-		return err;
+		DBUG_RETURN(err);
 	}
 # endif /* BTR_CUR_HASH_ADAPT */
 #endif /* BTR_CUR_ADAPT */
@@ -526,7 +951,7 @@ btr_cur_search_to_nth_level(
 
 	if (has_search_latch) {
 		/* Release possible search latch to obey latching order */
-		rw_lock_s_unlock(&btr_search_latch);
+		rw_lock_s_unlock(btr_get_search_latch(index));
 	}
 
 	/* Store the position of the tree latch we push to mtr so that we
@@ -536,23 +961,76 @@ btr_cur_search_to_nth_level(
 
 	switch (latch_mode) {
 	case BTR_MODIFY_TREE:
-		mtr_x_lock(dict_index_get_lock(index), mtr);
+		/* Most of delete-intended operations are purging.
+		Free blocks and read IO bandwidth should be prior
+		for them, when the history list is glowing huge. */
+		if (lock_intention == BTR_INTENTION_DELETE
+		    && trx_sys->rseg_history_len > BTR_CUR_FINE_HISTORY_LENGTH
+			&& buf_get_n_pending_read_ios()) {
+			mtr_x_lock(dict_index_get_lock(index), mtr);
+		} else if (dict_index_is_spatial(index)
+			   && lock_intention <= BTR_INTENTION_BOTH) {
+			/* X lock the if there is possibility of
+			pessimistic delete on spatial index. As we could
+			lock upward for the tree */
+
+			mtr_x_lock(dict_index_get_lock(index), mtr);
+		} else {
+			mtr_sx_lock(dict_index_get_lock(index), mtr);
+		}
+		upper_rw_latch = RW_X_LATCH;
 		break;
 	case BTR_CONT_MODIFY_TREE:
+	case BTR_CONT_SEARCH_TREE:
 		/* Do nothing */
-		ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
-					MTR_MEMO_X_LOCK));
+		ut_ad(srv_read_only_mode
+		      || mtr_memo_contains_flagged(mtr,
+						   dict_index_get_lock(index),
+						   MTR_MEMO_X_LOCK
+						   | MTR_MEMO_SX_LOCK));
+		if (dict_index_is_spatial(index)
+		    && latch_mode == BTR_CONT_MODIFY_TREE) {
+			/* If we are about to locating parent page for split
+			and/or merge operation for R-Tree index, X latch
+			the parent */
+			upper_rw_latch = RW_X_LATCH;
+		} else {
+			upper_rw_latch = RW_NO_LATCH;
+		}
 		break;
 	default:
-		if (!s_latch_by_caller) {
-			mtr_s_lock(dict_index_get_lock(index), mtr);
+		if (!srv_read_only_mode) {
+			if (s_latch_by_caller) {
+				ut_ad(rw_lock_own(dict_index_get_lock(index),
+				              RW_LOCK_S));
+			} else if (!modify_external) {
+				/* BTR_SEARCH_TREE is intended to be used with
+				BTR_ALREADY_S_LATCHED */
+				ut_ad(latch_mode != BTR_SEARCH_TREE);
+
+				mtr_s_lock(dict_index_get_lock(index), mtr);
+			} else {
+				/* BTR_MODIFY_EXTERNAL needs to be excluded */
+				mtr_sx_lock(dict_index_get_lock(index), mtr);
+			}
+			upper_rw_latch = RW_S_LATCH;
+		} else {
+			upper_rw_latch = RW_NO_LATCH;
 		}
 	}
+	root_leaf_rw_latch = btr_cur_latch_for_root_leaf(latch_mode);
 
 	page_cursor = btr_cur_get_page_cur(cursor);
 
-	space = dict_index_get_space(index);
-	page_no = dict_index_get_page(index);
+	const ulint		space = dict_index_get_space(index);
+	const page_size_t	page_size(dict_table_page_size(index->table));
+
+	/* Start with the root page. */
+	page_id_t		page_id(space, dict_index_get_page(index));
+
+	if (root_leaf_rw_latch == RW_X_LATCH) {
+		node_ptr_max_size = dict_index_node_ptr_max_size(index);
+	}
 
 	up_match = 0;
 	up_bytes = 0;
@@ -575,22 +1053,41 @@ btr_cur_search_to_nth_level(
 	default:
 #ifdef PAGE_CUR_LE_OR_EXTENDS
 		ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE
+		      || RTREE_SEARCH_MODE(mode)
 		      || mode == PAGE_CUR_LE_OR_EXTENDS);
 #else /* PAGE_CUR_LE_OR_EXTENDS */
-		ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE);
+		ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE
+		      || RTREE_SEARCH_MODE(mode));
 #endif /* PAGE_CUR_LE_OR_EXTENDS */
 		page_mode = mode;
 		break;
 	}
 
 	/* Loop and search until we arrive at the desired level */
+	btr_latch_leaves_t latch_leaves = {{NULL, NULL, NULL}, {0, 0, 0}};
 
 search_loop:
 	buf_mode = BUF_GET;
 	rw_latch = RW_NO_LATCH;
+	rtree_parent_modified = false;
 
 	if (height != 0) {
 		/* We are about to fetch the root or a non-leaf page. */
+		if ((latch_mode != BTR_MODIFY_TREE
+		     || height == level)
+		    && !retrying_for_search_prev) {
+			/* If doesn't have SX or X latch of index,
+			each pages should be latched before reading. */
+			if (modify_external
+			    && height == ULINT_UNDEFINED
+			    && upper_rw_latch == RW_S_LATCH) {
+				/* needs sx-latch of root page
+				for fseg operation */
+				rw_latch = RW_SX_LATCH;
+			} else {
+				rw_latch = upper_rw_latch;
+			}
+		}
 	} else if (latch_mode <= BTR_MODIFY_LEAF) {
 		rw_latch = latch_mode;
 
@@ -606,12 +1103,12 @@ search_loop:
 		}
 	}
 
-	zip_size = dict_table_zip_size(index->table);
-
 retry_page_get:
-	block = buf_page_get_gen(
-		space, zip_size, page_no, rw_latch, guess, buf_mode,
-		file, line, mtr, &err);
+	ut_ad(n_blocks < BTR_MAX_LEVELS);
+	tree_savepoints[n_blocks] = mtr_set_savepoint(mtr);
+	block = buf_page_get_gen(page_id, page_size, rw_latch, guess,
+				 buf_mode, file, line, mtr, &err);
+	tree_blocks[n_blocks] = block;
 
 	if (err != DB_SUCCESS) {
 		if (err == DB_DECRYPTION_FAILED) {
@@ -638,10 +1135,10 @@ retry_page_get:
 		case BTR_INSERT_OP:
 		case BTR_INSERT_IGNORE_UNIQUE_OP:
 			ut_ad(buf_mode == BUF_GET_IF_IN_POOL);
+			ut_ad(!dict_index_is_spatial(index));
 
 			if (ibuf_insert(IBUF_OP_INSERT, tuple, index,
-					space, zip_size, page_no,
-					cursor->thr)) {
+					page_id, page_size, cursor->thr)) {
 
 				cursor->flag = BTR_CUR_INSERT_TO_IBUF;
 
@@ -651,10 +1148,11 @@ retry_page_get:
 
 		case BTR_DELMARK_OP:
 			ut_ad(buf_mode == BUF_GET_IF_IN_POOL);
+			ut_ad(!dict_index_is_spatial(index));
 
 			if (ibuf_insert(IBUF_OP_DELETE_MARK, tuple,
-					index, space, zip_size,
-					page_no, cursor->thr)) {
+					index, page_id, page_size,
+					cursor->thr)) {
 
 				cursor->flag = BTR_CUR_DEL_MARK_IBUF;
 
@@ -665,6 +1163,7 @@ retry_page_get:
 
 		case BTR_DELETE_OP:
 			ut_ad(buf_mode == BUF_GET_IF_IN_POOL_OR_WATCH);
+			ut_ad(!dict_index_is_spatial(index));
 
 			if (!row_purge_poss_sec(cursor->purge_node,
 						index, tuple)) {
@@ -672,19 +1171,18 @@ retry_page_get:
 				/* The record cannot be purged yet. */
 				cursor->flag = BTR_CUR_DELETE_REF;
 			} else if (ibuf_insert(IBUF_OP_DELETE, tuple,
-					       index, space, zip_size,
-					       page_no,
+					       index, page_id, page_size,
 					       cursor->thr)) {
 
 				/* The purge was buffered. */
 				cursor->flag = BTR_CUR_DELETE_IBUF;
 			} else {
 				/* The purge could not be buffered. */
-				buf_pool_watch_unset(space, page_no);
+				buf_pool_watch_unset(page_id);
 				break;
 			}
 
-			buf_pool_watch_unset(space, page_no);
+			buf_pool_watch_unset(page_id);
 			goto func_exit;
 
 		default:
@@ -699,9 +1197,97 @@ retry_page_get:
 		goto retry_page_get;
 	}
 
-	block->check_index_page_at_flush = TRUE;
+	if (retrying_for_search_prev && height != 0) {
+		/* also latch left sibling */
+		ulint		left_page_no;
+		buf_block_t*	get_block;
+
+		ut_ad(rw_latch == RW_NO_LATCH);
+
+		rw_latch = upper_rw_latch;
+
+		rw_lock_s_lock(&block->lock);
+		left_page_no = btr_page_get_prev(
+			buf_block_get_frame(block), mtr);
+		rw_lock_s_unlock(&block->lock);
+
+		if (left_page_no != FIL_NULL) {
+			ut_ad(prev_n_blocks < leftmost_from_level);
+
+			prev_tree_savepoints[prev_n_blocks]
+				= mtr_set_savepoint(mtr);
+			get_block = buf_page_get_gen(
+				page_id_t(page_id.space(), left_page_no),
+				page_size, rw_latch, NULL, buf_mode,
+				file, line, mtr, &err);
+			prev_tree_blocks[prev_n_blocks] = get_block;
+			prev_n_blocks++;
+
+			if (err != DB_SUCCESS) {
+				if (err == DB_DECRYPTION_FAILED) {
+					ib_push_warning((void *)NULL,
+						DB_DECRYPTION_FAILED,
+						"Table %s is encrypted but encryption service or"
+						" used key_id is not available. "
+						" Can't continue reading table.",
+						index->table->name);
+					index->table->is_encrypted = true;
+				}
+
+				goto func_exit;
+			}
+
+			/* BTR_MODIFY_TREE doesn't update prev/next_page_no,
+			without their parent page's lock. So, not needed to
+			retry here, because we have the parent page's lock. */
+		}
+
+		/* release RW_NO_LATCH page and lock with RW_S_LATCH */
+		mtr_release_block_at_savepoint(
+			mtr, tree_savepoints[n_blocks],
+			tree_blocks[n_blocks]);
+
+		tree_savepoints[n_blocks] = mtr_set_savepoint(mtr);
+		block = buf_page_get_gen(page_id, page_size, rw_latch, NULL,
+					 buf_mode, file, line, mtr, &err);
+		tree_blocks[n_blocks] = block;
+
+		if (err != DB_SUCCESS) {
+			if (err == DB_DECRYPTION_FAILED) {
+				ib_push_warning((void *)NULL,
+					DB_DECRYPTION_FAILED,
+					"Table %s is encrypted but encryption service or"
+					" used key_id is not available. "
+					" Can't continue reading table.",
+					index->table->name);
+				index->table->is_encrypted = true;
+			}
+
+			goto func_exit;
+		}
+	}
+
 	page = buf_block_get_frame(block);
 
+	if (height == ULINT_UNDEFINED
+	    && page_is_leaf(page)
+	    && rw_latch != RW_NO_LATCH
+	    && rw_latch != root_leaf_rw_latch) {
+		/* We should retry to get the page, because the root page
+		is latched with different level as a leaf page. */
+		ut_ad(root_leaf_rw_latch != RW_NO_LATCH);
+		ut_ad(rw_latch == RW_S_LATCH || rw_latch == RW_SX_LATCH);
+		ut_ad(rw_latch == RW_S_LATCH || modify_external);
+
+		ut_ad(n_blocks == 0);
+		mtr_release_block_at_savepoint(
+			mtr, tree_savepoints[n_blocks],
+			tree_blocks[n_blocks]);
+
+		upper_rw_latch = root_leaf_rw_latch;
+		goto search_loop;
+	}
+
 	if (rw_latch != RW_NO_LATCH) {
 #ifdef UNIV_ZIP_DEBUG
 		const page_zip_des_t*	page_zip
@@ -714,7 +1300,7 @@ retry_page_get:
 			? SYNC_IBUF_TREE_NODE : SYNC_TREE_NODE);
 	}
 
-	ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
+	ut_ad(fil_page_index_page_check(page));
 	ut_ad(index->id == btr_page_get_index_id(page));
 
 	if (UNIV_UNLIKELY(height == ULINT_UNDEFINED)) {
@@ -724,9 +1310,32 @@ retry_page_get:
 		root_height = height;
 		cursor->tree_height = root_height + 1;
 
+		if (dict_index_is_spatial(index)) {
+			ut_ad(cursor->rtr_info);
+
+			node_seq_t      seq_no = rtr_get_current_ssn_id(index);
+
+			/* If SSN in memory is not initialized, fetch
+			it from root page */
+			if (seq_no < 1) {
+				node_seq_t      root_seq_no;
+
+				root_seq_no = page_get_ssn_id(page);
+
+				mutex_enter(&(index->rtr_ssn.mutex));
+				index->rtr_ssn.seq_no = root_seq_no + 1;
+				mutex_exit(&(index->rtr_ssn.mutex));
+			}
+
+			/* Save the MBR */
+			cursor->rtr_info->thr = cursor->thr;
+			rtr_get_mbr_from_tuple(tuple, &cursor->rtr_info->mbr);
+		}
+
 #ifdef BTR_CUR_ADAPT
 		if (block != guess) {
 			info->root_guess = block;
+			info->withdraw_clock = buf_withdraw_clock;
 		}
 #endif
 	}
@@ -734,30 +1343,151 @@ retry_page_get:
 	if (height == 0) {
 		if (rw_latch == RW_NO_LATCH) {
 
-			btr_cur_latch_leaves(
-				page, space, zip_size, page_no, latch_mode,
+			latch_leaves = btr_cur_latch_leaves(
+				block, page_id, page_size, latch_mode,
 				cursor, mtr);
 		}
 
 		switch (latch_mode) {
 		case BTR_MODIFY_TREE:
 		case BTR_CONT_MODIFY_TREE:
+		case BTR_CONT_SEARCH_TREE:
 			break;
 		default:
-			if (!s_latch_by_caller) {
+			if (!s_latch_by_caller
+			    && !srv_read_only_mode
+			    && !modify_external) {
 				/* Release the tree s-latch */
+				/* NOTE: BTR_MODIFY_EXTERNAL
+				needs to keep tree sx-latch */
 				mtr_release_s_latch_at_savepoint(
 					mtr, savepoint,
 					dict_index_get_lock(index));
 			}
+
+			/* release upper blocks */
+			if (retrying_for_search_prev) {
+				for (;
+				     prev_n_releases < prev_n_blocks;
+				     prev_n_releases++) {
+					mtr_release_block_at_savepoint(
+						mtr,
+						prev_tree_savepoints[
+							prev_n_releases],
+						prev_tree_blocks[
+							prev_n_releases]);
+				}
+			}
+
+			for (; n_releases < n_blocks; n_releases++) {
+				if (n_releases == 0 && modify_external) {
+					/* keep latch of root page */
+					ut_ad(mtr_memo_contains_flagged(
+						mtr, tree_blocks[n_releases],
+						MTR_MEMO_PAGE_SX_FIX
+						| MTR_MEMO_PAGE_X_FIX));
+					continue;
+				}
+
+				mtr_release_block_at_savepoint(
+					mtr, tree_savepoints[n_releases],
+					tree_blocks[n_releases]);
+			}
 		}
 
 		page_mode = mode;
 	}
 
-	page_cur_search_with_match(
-		block, index, tuple, page_mode, &up_match, &up_bytes,
-		&low_match, &low_bytes, page_cursor);
+	if (dict_index_is_spatial(index)) {
+		/* Remember the page search mode */
+		search_mode = page_mode;
+
+		/* Some adjustment on search mode, when the
+		page search mode is PAGE_CUR_RTREE_LOCATE
+		or PAGE_CUR_RTREE_INSERT, as we are searching
+		with MBRs. When it is not the target level, we
+		should search all sub-trees that "CONTAIN" the
+		search range/MBR. When it is at the target
+		level, the search becomes PAGE_CUR_LE */
+		if (page_mode == PAGE_CUR_RTREE_LOCATE
+		    && level == height) {
+			if (level == 0) {
+				page_mode = PAGE_CUR_LE;
+			} else {
+				page_mode = PAGE_CUR_RTREE_GET_FATHER;
+			}
+		}
+
+		if (page_mode == PAGE_CUR_RTREE_INSERT) {
+			page_mode = (level == height)
+					? PAGE_CUR_LE
+					: PAGE_CUR_RTREE_INSERT;
+
+			ut_ad(!page_is_leaf(page) || page_mode == PAGE_CUR_LE);
+		}
+
+		/* "need_path" indicates if we need to tracking the parent
+		pages, if it is not spatial comparison, then no need to
+		track it */
+		if (page_mode < PAGE_CUR_CONTAIN) {
+			need_path = false;
+		}
+
+		up_match = 0;
+		low_match = 0;
+
+		if (latch_mode == BTR_MODIFY_TREE
+		    || latch_mode == BTR_CONT_MODIFY_TREE
+		    || latch_mode == BTR_CONT_SEARCH_TREE) {
+			/* Tree are locked, no need for Page Lock to protect
+			the "path" */
+			cursor->rtr_info->need_page_lock = false;
+		}
+        }
+
+	if (dict_index_is_spatial(index) && page_mode >= PAGE_CUR_CONTAIN) {
+		ut_ad(need_path);
+		found = rtr_cur_search_with_match(
+			block, index, tuple, page_mode, page_cursor,
+			cursor->rtr_info);
+
+		/* Need to use BTR_MODIFY_TREE to do the MBR adjustment */
+		if (search_mode == PAGE_CUR_RTREE_INSERT
+		    && cursor->rtr_info->mbr_adj) {
+			if (latch_mode & BTR_MODIFY_LEAF) {
+				/* Parent MBR needs updated, should retry
+				with BTR_MODIFY_TREE */
+				goto func_exit;
+			} else if (latch_mode & BTR_MODIFY_TREE) {
+				rtree_parent_modified = true;
+				cursor->rtr_info->mbr_adj = false;
+				mbr_adj = true;
+			} else {
+				ut_ad(0);
+			}
+		}
+
+		if (found && page_mode == PAGE_CUR_RTREE_GET_FATHER) {
+			cursor->low_match =
+				DICT_INDEX_SPATIAL_NODEPTR_SIZE + 1;
+		}
+	} else if (height == 0 && btr_search_enabled
+		   && !dict_index_is_spatial(index)) {
+		/* The adaptive hash index is only used when searching
+		for leaf pages (height==0), but not in r-trees.
+		We only need the byte prefix comparison for the purpose
+		of updating the adaptive hash index. */
+		page_cur_search_with_match_bytes(
+			block, index, tuple, page_mode, &up_match, &up_bytes,
+			&low_match, &low_bytes, page_cursor);
+	} else {
+		/* Search for complete index fields. */
+		up_bytes = low_bytes = 0;
+		page_cur_search_with_match(
+			block, index, tuple, page_mode, &up_match,
+			&low_match, page_cursor,
+			need_path ? cursor->rtr_info : NULL);
+	}
 
 	if (estimate) {
 		btr_cur_add_path_info(cursor, height, root_height);
@@ -768,6 +1498,34 @@ retry_page_get:
 	ut_ad(height == btr_page_get_level(page_cur_get_page(page_cursor),
 					   mtr));
 
+	/* Add Predicate lock if it is serializable isolation
+	and only if it is in the search case */
+	if (dict_index_is_spatial(index)
+	    && cursor->rtr_info->need_prdt_lock
+	    && mode != PAGE_CUR_RTREE_INSERT
+	    && mode != PAGE_CUR_RTREE_LOCATE
+	    && mode >= PAGE_CUR_CONTAIN) {
+		trx_t*		trx = thr_get_trx(cursor->thr);
+		lock_prdt_t	prdt;
+
+		lock_mutex_enter();
+		lock_init_prdt_from_mbr(
+			&prdt, &cursor->rtr_info->mbr, mode,
+			trx->lock.lock_heap);
+		lock_mutex_exit();
+
+		if (rw_latch == RW_NO_LATCH && height != 0) {
+			rw_lock_s_lock(&(block->lock));
+		}
+
+		lock_prdt_lock(block, &prdt, index, LOCK_S,
+			       LOCK_PREDICATE, cursor->thr, mtr);
+
+		if (rw_latch == RW_NO_LATCH && height != 0) {
+			rw_lock_s_unlock(&(block->lock));
+		}
+	}
+
 	if (level != height) {
 
 		const rec_t*	node_ptr;
@@ -781,8 +1539,292 @@ retry_page_get:
 		offsets = rec_get_offsets(
 			node_ptr, index, offsets, ULINT_UNDEFINED, &heap);
 
+		/* If the rec is the first or last in the page for
+		pessimistic delete intention, it might cause node_ptr insert
+		for the upper level. We should change the intention and retry.
+		*/
+		if (latch_mode == BTR_MODIFY_TREE
+		    && btr_cur_need_opposite_intention(
+			page, lock_intention, node_ptr)) {
+
+need_opposite_intention:
+			ut_ad(upper_rw_latch == RW_X_LATCH);
+
+			if (n_releases > 0) {
+				/* release root block */
+				mtr_release_block_at_savepoint(
+					mtr, tree_savepoints[0],
+					tree_blocks[0]);
+			}
+
+			/* release all blocks */
+			for (; n_releases <= n_blocks; n_releases++) {
+				mtr_release_block_at_savepoint(
+					mtr, tree_savepoints[n_releases],
+					tree_blocks[n_releases]);
+			}
+
+			lock_intention = BTR_INTENTION_BOTH;
+
+			page_id.reset(space, dict_index_get_page(index));
+			up_match = 0;
+			low_match = 0;
+			height = ULINT_UNDEFINED;
+
+			n_blocks = 0;
+			n_releases = 0;
+
+			goto search_loop;
+		}
+
+		if (dict_index_is_spatial(index)) {
+			if (page_rec_is_supremum(node_ptr)) {
+				cursor->low_match = 0;
+				cursor->up_match = 0;
+				goto func_exit;
+			}
+
+			/* If we are doing insertion or record locating,
+			remember the tree nodes we visited */
+			if (page_mode == PAGE_CUR_RTREE_INSERT
+			    || (search_mode == PAGE_CUR_RTREE_LOCATE
+			        && (latch_mode != BTR_MODIFY_LEAF))) {
+				bool		add_latch = false;
+
+				if (latch_mode == BTR_MODIFY_TREE
+				    && rw_latch == RW_NO_LATCH) {
+					ut_ad(mtr_memo_contains_flagged(
+						mtr, dict_index_get_lock(index),
+						MTR_MEMO_X_LOCK
+						| MTR_MEMO_SX_LOCK));
+					rw_lock_s_lock(&block->lock);
+					add_latch = true;
+				}
+
+				/* Store the parent cursor location */
+#ifdef UNIV_DEBUG
+				ulint	num_stored = rtr_store_parent_path(
+					block, cursor, latch_mode,
+					height + 1, mtr);
+#else
+				rtr_store_parent_path(
+					block, cursor, latch_mode,
+					height + 1, mtr);
+#endif
+
+				if (page_mode == PAGE_CUR_RTREE_INSERT) {
+					btr_pcur_t*     r_cursor =
+						rtr_get_parent_cursor(
+							cursor, height + 1,
+							true);
+					/* If it is insertion, there should
+					be only one parent for each level
+					traverse */
+#ifdef UNIV_DEBUG
+					ut_ad(num_stored == 1);
+#endif
+
+					node_ptr = btr_pcur_get_rec(r_cursor);
+
+				}
+
+				if (add_latch) {
+					rw_lock_s_unlock(&block->lock);
+				}
+
+				ut_ad(!page_rec_is_supremum(node_ptr));
+			}
+
+			ut_ad(page_mode == search_mode
+			      || (page_mode == PAGE_CUR_WITHIN
+				  && search_mode == PAGE_CUR_RTREE_LOCATE));
+
+			page_mode = search_mode;
+		}
+
+		/* If the first or the last record of the page
+		or the same key value to the first record or last record,
+		the another page might be choosen when BTR_CONT_MODIFY_TREE.
+		So, the parent page should not released to avoiding deadlock
+		with blocking the another search with the same key value. */
+		if (!detected_same_key_root
+		    && lock_intention == BTR_INTENTION_BOTH
+		    && !dict_index_is_unique(index)
+		    && latch_mode == BTR_MODIFY_TREE
+		    && (up_match >= rec_offs_n_fields(offsets) - 1
+			|| low_match >= rec_offs_n_fields(offsets) - 1)) {
+			const rec_t*	first_rec
+						= page_rec_get_next_const(
+							page_get_infimum_rec(
+								page));
+			ulint		matched_fields;
+
+			ut_ad(upper_rw_latch == RW_X_LATCH);
+
+			if (node_ptr == first_rec
+			    || page_rec_is_last(node_ptr, page)) {
+				detected_same_key_root = true;
+			} else {
+				matched_fields = 0;
+
+				offsets2 = rec_get_offsets(
+					first_rec, index, offsets2,
+					ULINT_UNDEFINED, &heap);
+				cmp_rec_rec_with_match(node_ptr, first_rec,
+					offsets, offsets2, index, FALSE,
+					&matched_fields);
+
+				if (matched_fields
+				    >= rec_offs_n_fields(offsets) - 1) {
+					detected_same_key_root = true;
+				} else {
+					const rec_t*	last_rec;
+
+					last_rec = page_rec_get_prev_const(
+							page_get_supremum_rec(
+								page));
+
+					matched_fields = 0;
+
+					offsets2 = rec_get_offsets(
+						last_rec, index, offsets2,
+						ULINT_UNDEFINED, &heap);
+					cmp_rec_rec_with_match(
+						node_ptr, last_rec,
+						offsets, offsets2, index,
+						FALSE, &matched_fields);
+					if (matched_fields
+					    >= rec_offs_n_fields(offsets) - 1) {
+						detected_same_key_root = true;
+					}
+				}
+			}
+		}
+
+		/* If the page might cause modify_tree,
+		we should not release the parent page's lock. */
+		if (!detected_same_key_root
+		    && latch_mode == BTR_MODIFY_TREE
+		    && !btr_cur_will_modify_tree(
+				index, page, lock_intention, node_ptr,
+				node_ptr_max_size, page_size, mtr)
+		    && !rtree_parent_modified) {
+			ut_ad(upper_rw_latch == RW_X_LATCH);
+			ut_ad(n_releases <= n_blocks);
+
+			/* we can release upper blocks */
+			for (; n_releases < n_blocks; n_releases++) {
+				if (n_releases == 0) {
+					/* we should not release root page
+					to pin to same block. */
+					continue;
+				}
+
+				/* release unused blocks to unpin */
+				mtr_release_block_at_savepoint(
+					mtr, tree_savepoints[n_releases],
+					tree_blocks[n_releases]);
+			}
+		}
+
+		if (height == level
+		    && latch_mode == BTR_MODIFY_TREE) {
+			ut_ad(upper_rw_latch == RW_X_LATCH);
+			/* we should sx-latch root page, if released already.
+			It contains seg_header. */
+			if (n_releases > 0) {
+				mtr_block_sx_latch_at_savepoint(
+					mtr, tree_savepoints[0],
+					tree_blocks[0]);
+			}
+
+			/* x-latch the branch blocks not released yet. */
+			for (ulint i = n_releases; i <= n_blocks; i++) {
+				mtr_block_x_latch_at_savepoint(
+					mtr, tree_savepoints[i],
+					tree_blocks[i]);
+			}
+		}
+
+		/* We should consider prev_page of parent page, if the node_ptr
+		is the leftmost of the page. because BTR_SEARCH_PREV and
+		BTR_MODIFY_PREV latches prev_page of the leaf page. */
+		if ((latch_mode == BTR_SEARCH_PREV
+		     || latch_mode == BTR_MODIFY_PREV)
+		    && !retrying_for_search_prev) {
+			/* block should be latched for consistent
+			   btr_page_get_prev() */
+			ut_ad(mtr_memo_contains_flagged(mtr, block,
+				MTR_MEMO_PAGE_S_FIX
+				| MTR_MEMO_PAGE_X_FIX));
+
+			if (btr_page_get_prev(page, mtr) != FIL_NULL
+			    && page_rec_is_first(node_ptr, page)) {
+
+				if (leftmost_from_level == 0) {
+					leftmost_from_level = height + 1;
+				}
+			} else {
+				leftmost_from_level = 0;
+			}
+
+			if (height == 0 && leftmost_from_level > 0) {
+				/* should retry to get also prev_page
+				from level==leftmost_from_level. */
+				retrying_for_search_prev = true;
+
+				prev_tree_blocks = static_cast<buf_block_t**>(
+					ut_malloc_nokey(sizeof(buf_block_t*)
+							* leftmost_from_level));
+
+				prev_tree_savepoints = static_cast<ulint*>(
+					ut_malloc_nokey(sizeof(ulint)
+							* leftmost_from_level));
+
+				/* back to the level (leftmost_from_level+1) */
+				ulint	idx = n_blocks
+					- (leftmost_from_level - 1);
+
+				page_id.reset(
+					space,
+					tree_blocks[idx]->page.id.page_no());
+
+				for (ulint i = n_blocks
+					       - (leftmost_from_level - 1);
+				     i <= n_blocks; i++) {
+					mtr_release_block_at_savepoint(
+						mtr, tree_savepoints[i],
+						tree_blocks[i]);
+				}
+
+				n_blocks -= (leftmost_from_level - 1);
+				height = leftmost_from_level;
+				ut_ad(n_releases == 0);
+
+				/* replay up_match, low_match */
+				up_match = 0;
+				low_match = 0;
+				rtr_info_t*	rtr_info	= need_path
+					? cursor->rtr_info : NULL;
+
+				for (ulint i = 0; i < n_blocks; i++) {
+					page_cur_search_with_match(
+						tree_blocks[i], index, tuple,
+						page_mode, &up_match,
+						&low_match, page_cursor,
+						rtr_info);
+				}
+
+				goto search_loop;
+			}
+		}
+
 		/* Go to the child node */
-		page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets);
+		page_id.reset(
+			space,
+			btr_node_ptr_get_child_page_no(node_ptr, offsets));
+
+		n_blocks++;
 
 		if (UNIV_UNLIKELY(height == 0 && dict_index_is_ibuf(index))) {
 			/* We're doing a search on an ibuf tree and we're one
@@ -795,16 +1837,116 @@ retry_page_get:
 			goto retry_page_get;
 		}
 
+		if (dict_index_is_spatial(index)
+		    && page_mode >= PAGE_CUR_CONTAIN
+		    && page_mode != PAGE_CUR_RTREE_INSERT) {
+			ut_ad(need_path);
+			rtr_node_path_t* path =
+				cursor->rtr_info->path;
+
+			if (!path->empty() && found) {
+#ifdef UNIV_DEBUG
+				node_visit_t    last_visit = path->back();
+
+				ut_ad(last_visit.page_no == page_id.page_no());
+#endif /* UNIV_DEBUG */
+
+				path->pop_back();
+
+#ifdef UNIV_DEBUG
+				if (page_mode == PAGE_CUR_RTREE_LOCATE
+				    && (latch_mode != BTR_MODIFY_LEAF)) {
+					btr_pcur_t*	cur
+					= cursor->rtr_info->parent_path->back(
+					  ).cursor;
+					rec_t*	my_node_ptr
+						= btr_pcur_get_rec(cur);
+
+					offsets = rec_get_offsets(
+						my_node_ptr, index, offsets,
+						ULINT_UNDEFINED, &heap);
+
+					ulint	my_page_no
+					= btr_node_ptr_get_child_page_no(
+						my_node_ptr, offsets);
+
+					ut_ad(page_id.page_no() == my_page_no);
+
+				}
+#endif
+			}
+		}
+
 		goto search_loop;
+	} else if (!dict_index_is_spatial(index)
+		   && latch_mode == BTR_MODIFY_TREE
+		   && lock_intention == BTR_INTENTION_INSERT
+		   && mach_read_from_4(page + FIL_PAGE_NEXT) != FIL_NULL
+		   && page_rec_is_last(page_cur_get_rec(page_cursor), page)) {
+
+		/* btr_insert_into_right_sibling() might cause
+		deleting node_ptr at upper level */
+
+		guess = NULL;
+
+		if (height == 0) {
+			/* release the leaf pages if latched */
+			for (uint i = 0; i < 3; i++) {
+				if (latch_leaves.blocks[i] != NULL) {
+					mtr_release_block_at_savepoint(
+						mtr, latch_leaves.savepoints[i],
+						latch_leaves.blocks[i]);
+					latch_leaves.blocks[i] = NULL;
+				}
+			}
+		}
+
+		goto need_opposite_intention;
 	}
 
 	if (level != 0) {
-		/* x-latch the page */
-		buf_block_t*	child_block = btr_block_get(
-			space, zip_size, page_no, RW_X_LATCH, index, mtr);
+		if (upper_rw_latch == RW_NO_LATCH) {
+			/* latch the page */
+			buf_block_t*	child_block;
 
-		page = buf_block_get_frame(child_block);
-		btr_assert_not_corrupted(child_block, index);
+			if (latch_mode == BTR_CONT_MODIFY_TREE) {
+				child_block = btr_block_get(
+					page_id, page_size, RW_X_LATCH,
+					index, mtr);
+			} else {
+				ut_ad(latch_mode == BTR_CONT_SEARCH_TREE);
+				child_block = btr_block_get(
+					page_id, page_size, RW_SX_LATCH,
+					index, mtr);
+			}
+
+			btr_assert_not_corrupted(child_block, index);
+		} else {
+			ut_ad(mtr_memo_contains(mtr, block, upper_rw_latch));
+			btr_assert_not_corrupted(block, index);
+
+			if (s_latch_by_caller) {
+				ut_ad(latch_mode == BTR_SEARCH_TREE);
+				/* to exclude modifying tree operations
+				should sx-latch the index. */
+				ut_ad(mtr_memo_contains(
+					mtr, dict_index_get_lock(index),
+					MTR_MEMO_SX_LOCK));
+				/* because has sx-latch of index,
+				can release upper blocks. */
+				for (; n_releases < n_blocks; n_releases++) {
+					mtr_release_block_at_savepoint(
+						mtr,
+						tree_savepoints[n_releases],
+						tree_blocks[n_releases]);
+				}
+			}
+		}
+
+		if (page_mode <= PAGE_CUR_LE) {
+			cursor->low_match = low_match;
+			cursor->up_match = up_match;
+		}
 	} else {
 		cursor->low_match = low_match;
 		cursor->low_bytes = low_bytes;
@@ -815,8 +1957,8 @@ retry_page_get:
 		/* We do a dirty read of btr_search_enabled here.  We
 		will properly check btr_search_enabled again in
 		btr_search_build_page_hash_index() before building a
-		page hash index, while holding btr_search_latch. */
-		if (btr_search_enabled) {
+		page hash index, while holding search latch. */
+		if (btr_search_enabled && !index->disable_ahi) {
 			btr_search_info_update(index, cursor);
 		}
 #endif
@@ -828,23 +1970,220 @@ retry_page_get:
 		      || mode != PAGE_CUR_LE);
 	}
 
+	/* For spatial index, remember  what blocks are still latched */
+	if (dict_index_is_spatial(index)
+	    && (latch_mode == BTR_MODIFY_TREE
+		|| latch_mode == BTR_MODIFY_LEAF)) {
+		for (ulint i = 0; i < n_releases; i++) {
+			cursor->rtr_info->tree_blocks[i] = NULL;
+			cursor->rtr_info->tree_savepoints[i] = 0;
+		}
+
+		for (ulint i = n_releases; i <= n_blocks; i++) {
+			cursor->rtr_info->tree_blocks[i] = tree_blocks[i];
+			cursor->rtr_info->tree_savepoints[i] = tree_savepoints[i];
+		}
+	}
+
 func_exit:
 
 	if (UNIV_LIKELY_NULL(heap)) {
 		mem_heap_free(heap);
 	}
 
-	if (has_search_latch) {
-
-		rw_lock_s_lock(&btr_search_latch);
+	if (retrying_for_search_prev) {
+		ut_free(prev_tree_blocks);
+		ut_free(prev_tree_savepoints);
 	}
 
-	return err;
+	if (has_search_latch) {
+
+		rw_lock_s_lock(btr_get_search_latch(index));
+	}
+
+	if (mbr_adj) {
+		/* remember that we will need to adjust parent MBR */
+		cursor->rtr_info->mbr_adj = true;
+	}
+
+	DBUG_RETURN(err);
+}
+
+/** Searches an index tree and positions a tree cursor on a given level.
+This function will avoid latching the traversal path and so should be
+used only for cases where-in latching is not needed.
+
+@param[in,out]	index	index
+@param[in]	level	the tree level of search
+@param[in]	tuple	data tuple; Note: n_fields_cmp in compared
+			to the node ptr page node field
+@param[in]	mode	PAGE_CUR_L, ....
+			Insert should always be made using PAGE_CUR_LE
+			to search the position.
+@param[in,out]	cursor	tree cursor; points to record of interest.
+@param[in]	file	file name
+@param[in[	line	line where called from
+@param[in,out]	mtr	mtr
+@param[in]	mark_dirty
+			if true then mark the block as dirty */
+dberr_t
+btr_cur_search_to_nth_level_with_no_latch(
+	dict_index_t*		index,
+	ulint			level,
+	const dtuple_t*		tuple,
+	page_cur_mode_t		mode,
+	btr_cur_t*		cursor,
+	const char*		file,
+	ulint			line,
+	mtr_t*			mtr,
+	bool			mark_dirty)
+{
+	page_t*		page = NULL; /* remove warning */
+	buf_block_t*	block;
+	ulint		height;
+	ulint		up_match;
+	ulint		low_match;
+	ulint		rw_latch;
+	page_cur_mode_t	page_mode;
+	ulint		buf_mode;
+	page_cur_t*	page_cursor;
+	ulint		root_height = 0; /* remove warning */
+	ulint		n_blocks = 0;
+	dberr_t		err = DB_SUCCESS;
+	mem_heap_t*	heap		= NULL;
+	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
+	ulint*		offsets		= offsets_;
+	rec_offs_init(offsets_);
+
+	DBUG_ENTER("btr_cur_search_to_nth_level_with_no_latch");
+
+	ut_ad(dict_table_is_intrinsic(index->table));
+	ut_ad(level == 0 || mode == PAGE_CUR_LE);
+	ut_ad(dict_index_check_search_tuple(index, tuple));
+	ut_ad(dtuple_check_typed(tuple));
+	ut_ad(index->page != FIL_NULL);
+
+	UNIV_MEM_INVALID(&cursor->up_match, sizeof cursor->up_match);
+	UNIV_MEM_INVALID(&cursor->low_match, sizeof cursor->low_match);
+#ifdef UNIV_DEBUG
+	cursor->up_match = ULINT_UNDEFINED;
+	cursor->low_match = ULINT_UNDEFINED;
+#endif /* UNIV_DEBUG */
+
+	cursor->flag = BTR_CUR_BINARY;
+	cursor->index = index;
+
+	page_cursor = btr_cur_get_page_cur(cursor);
+
+        const ulint		space = dict_index_get_space(index);
+        const page_size_t	page_size(dict_table_page_size(index->table));
+        /* Start with the root page. */
+        page_id_t		page_id(space, dict_index_get_page(index));
+
+	up_match = 0;
+	low_match = 0;
+
+	height = ULINT_UNDEFINED;
+
+	/* We use these modified search modes on non-leaf levels of the
+	B-tree. These let us end up in the right B-tree leaf. In that leaf
+	we use the original search mode. */
+
+	switch (mode) {
+	case PAGE_CUR_GE:
+		page_mode = PAGE_CUR_L;
+		break;
+	case PAGE_CUR_G:
+		page_mode = PAGE_CUR_LE;
+		break;
+	default:
+		page_mode = mode;
+		break;
+	}
+
+	/* Loop and search until we arrive at the desired level */
+	bool at_desired_level = false;
+	while (!at_desired_level) {
+		buf_mode = BUF_GET;
+		rw_latch = RW_NO_LATCH;
+
+		ut_ad(n_blocks < BTR_MAX_LEVELS);
+
+		block = buf_page_get_gen(page_id, page_size, rw_latch, NULL,
+			buf_mode, file, line, mtr, &err, mark_dirty);
+
+		if (err != DB_SUCCESS) {
+			if (err == DB_DECRYPTION_FAILED) {
+				ib_push_warning((void *)NULL,
+					DB_DECRYPTION_FAILED,
+					"Table %s is encrypted but encryption service or"
+					" used key_id is not available. "
+					" Can't continue reading table.",
+					index->table->name);
+				index->table->is_encrypted = true;
+			}
+
+			DBUG_RETURN(err);
+		}
+
+		page = buf_block_get_frame(block);
+
+		if (height == ULINT_UNDEFINED) {
+			/* We are in the root node */
+
+			height = btr_page_get_level(page, mtr);
+			root_height = height;
+			cursor->tree_height = root_height + 1;
+		}
+
+		if (height == 0) {
+			/* On leaf level. Switch back to original search mode.*/
+			page_mode = mode;
+		}
+
+		page_cur_search_with_match(
+				block, index, tuple, page_mode, &up_match,
+				&low_match, page_cursor, NULL);
+
+		ut_ad(height == btr_page_get_level(
+			page_cur_get_page(page_cursor), mtr));
+
+		if (level != height) {
+
+			const rec_t*	node_ptr;
+			ut_ad(height > 0);
+
+			height--;
+
+			node_ptr = page_cur_get_rec(page_cursor);
+
+			offsets = rec_get_offsets(
+					node_ptr, index, offsets,
+					ULINT_UNDEFINED, &heap);
+
+			/* Go to the child node */
+			page_id.reset(space, btr_node_ptr_get_child_page_no(
+				node_ptr, offsets));
+
+			n_blocks++;
+		} else {
+			/* If this is the desired level, leave the loop */
+			at_desired_level = true;
+		}
+	}
+
+	cursor->low_match = low_match;
+	cursor->up_match = up_match;
+
+	if (heap != NULL) {
+		mem_heap_free(heap);
+	}
+
+	DBUG_RETURN(err);
 }
 
 /*****************************************************************//**
 Opens a cursor at either end of an index. */
-UNIV_INTERN
 dberr_t
 btr_cur_open_at_index_side_func(
 /*============================*/
@@ -860,14 +2199,18 @@ btr_cur_open_at_index_side_func(
 	mtr_t*		mtr)		/*!< in/out: mini-transaction */
 {
 	page_cur_t*	page_cursor;
-	ulint		page_no;
-	ulint		space;
-	ulint		zip_size;
+	ulint		node_ptr_max_size = UNIV_PAGE_SIZE / 2;
 	ulint		height;
 	ulint		root_height = 0; /* remove warning */
 	rec_t*		node_ptr;
 	ulint		estimate;
 	ulint		savepoint;
+	ulint		upper_rw_latch, root_leaf_rw_latch;
+	btr_intention_t	lock_intention;
+	buf_block_t*	tree_blocks[BTR_MAX_LEVELS];
+	ulint		tree_savepoints[BTR_MAX_LEVELS];
+	ulint		n_blocks = 0;
+	ulint		n_releases = 0;
 	mem_heap_t*	heap		= NULL;
 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
 	ulint*		offsets		= offsets_;
@@ -880,6 +2223,22 @@ btr_cur_open_at_index_side_func(
 
 	ut_ad(level != ULINT_UNDEFINED);
 
+	bool	s_latch_by_caller;
+
+	s_latch_by_caller = latch_mode & BTR_ALREADY_S_LATCHED;
+	latch_mode &= ~BTR_ALREADY_S_LATCHED;
+
+	lock_intention = btr_cur_get_and_clear_intention(&latch_mode);
+
+	ut_ad(!(latch_mode & BTR_MODIFY_EXTERNAL));
+
+	/* This function doesn't need to lock left page of the leaf page */
+	if (latch_mode == BTR_SEARCH_PREV) {
+		latch_mode = BTR_SEARCH_LEAF;
+	} else if (latch_mode == BTR_MODIFY_PREV) {
+		latch_mode = BTR_MODIFY_LEAF;
+	}
+
 	/* Store the position of the tree latch we push to mtr so that we
 	know how to release it when we have latched the leaf node */
 
@@ -887,35 +2246,76 @@ btr_cur_open_at_index_side_func(
 
 	switch (latch_mode) {
 	case BTR_CONT_MODIFY_TREE:
+	case BTR_CONT_SEARCH_TREE:
+		upper_rw_latch = RW_NO_LATCH;
 		break;
 	case BTR_MODIFY_TREE:
-		mtr_x_lock(dict_index_get_lock(index), mtr);
-		break;
-	case BTR_SEARCH_LEAF | BTR_ALREADY_S_LATCHED:
-	case BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED:
-		ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
-					MTR_MEMO_S_LOCK));
+		/* Most of delete-intended operations are purging.
+		Free blocks and read IO bandwidth should be prior
+		for them, when the history list is glowing huge. */
+		if (lock_intention == BTR_INTENTION_DELETE
+		    && trx_sys->rseg_history_len > BTR_CUR_FINE_HISTORY_LENGTH
+		    && buf_get_n_pending_read_ios()) {
+			mtr_x_lock(dict_index_get_lock(index), mtr);
+		} else {
+			mtr_sx_lock(dict_index_get_lock(index), mtr);
+		}
+		upper_rw_latch = RW_X_LATCH;
 		break;
 	default:
-		mtr_s_lock(dict_index_get_lock(index), mtr);
+		ut_ad(!s_latch_by_caller
+		      || mtr_memo_contains_flagged(mtr,
+						 dict_index_get_lock(index),
+						 MTR_MEMO_SX_LOCK
+						 | MTR_MEMO_S_LOCK));
+		if (!srv_read_only_mode) {
+			if (!s_latch_by_caller) {
+				/* BTR_SEARCH_TREE is intended to be used with
+				BTR_ALREADY_S_LATCHED */
+				ut_ad(latch_mode != BTR_SEARCH_TREE);
+
+				mtr_s_lock(dict_index_get_lock(index), mtr);
+			}
+			upper_rw_latch = RW_S_LATCH;
+		} else {
+			upper_rw_latch = RW_NO_LATCH;
+		}
 	}
+	root_leaf_rw_latch = btr_cur_latch_for_root_leaf(latch_mode);
 
 	page_cursor = btr_cur_get_page_cur(cursor);
 	cursor->index = index;
 
-	space = dict_index_get_space(index);
-	zip_size = dict_table_zip_size(index->table);
-	page_no = dict_index_get_page(index);
+	page_id_t		page_id(dict_index_get_space(index),
+					dict_index_get_page(index));
+	const page_size_t&	page_size = dict_table_page_size(index->table);
+
+	if (root_leaf_rw_latch == RW_X_LATCH) {
+		node_ptr_max_size = dict_index_node_ptr_max_size(index);
+	}
 
 	height = ULINT_UNDEFINED;
 
 	for (;;) {
 		buf_block_t*	block=NULL;
 		page_t*		page=NULL;
+		ulint		rw_latch;
+
+		ut_ad(n_blocks < BTR_MAX_LEVELS);
+
+		if (height != 0
+		    && (latch_mode != BTR_MODIFY_TREE
+			|| height == level)) {
+			rw_latch = upper_rw_latch;
+		} else {
+			rw_latch = RW_NO_LATCH;
+		}
+
+		tree_savepoints[n_blocks] = mtr_set_savepoint(mtr);
+		block = buf_page_get_gen(page_id, page_size, rw_latch, NULL,
+					 BUF_GET, file, line, mtr, &err);
+		tree_blocks[n_blocks] = block;
 
-		block = buf_page_get_gen(space, zip_size, page_no,
-					 RW_NO_LATCH, NULL, BUF_GET,
-					 file, line, mtr, &err);
 		if (err != DB_SUCCESS) {
 			if (err == DB_DECRYPTION_FAILED) {
 				ib_push_warning((void *)NULL,
@@ -931,10 +2331,27 @@ btr_cur_open_at_index_side_func(
 		}
 
 		page = buf_block_get_frame(block);
-		ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
-		ut_ad(index->id == btr_page_get_index_id(page));
 
-		block->check_index_page_at_flush = TRUE;
+		if (height == ULINT_UNDEFINED
+		    && btr_page_get_level(page, mtr) == 0
+		    && rw_latch != RW_NO_LATCH
+		    && rw_latch != root_leaf_rw_latch) {
+			/* We should retry to get the page, because the root page
+			is latched with different level as a leaf page. */
+			ut_ad(root_leaf_rw_latch != RW_NO_LATCH);
+			ut_ad(rw_latch == RW_S_LATCH);
+
+			ut_ad(n_blocks == 0);
+			mtr_release_block_at_savepoint(
+				mtr, tree_savepoints[n_blocks],
+				tree_blocks[n_blocks]);
+
+			upper_rw_latch = root_leaf_rw_latch;
+			continue;
+		}
+
+		ut_ad(fil_page_index_page_check(page));
+		ut_ad(index->id == btr_page_get_index_id(page));
 
 		if (height == ULINT_UNDEFINED) {
 			/* We are in the root node */
@@ -948,12 +2365,16 @@ btr_cur_open_at_index_side_func(
 		}
 
 		if (height == level) {
-			btr_cur_latch_leaves(
-				page, space, zip_size, page_no,
-				latch_mode & ~BTR_ALREADY_S_LATCHED,
-				cursor, mtr);
-
-			if (height == 0) {
+			if (srv_read_only_mode) {
+				btr_cur_latch_leaves(
+					block, page_id, page_size,
+					latch_mode, cursor, mtr);
+			} else if (height == 0) {
+				if (rw_latch == RW_NO_LATCH) {
+					btr_cur_latch_leaves(
+						block, page_id, page_size,
+						latch_mode, cursor, mtr);
+				}
 				/* In versions <= 3.23.52 we had
 				forgotten to release the tree latch
 				here. If in an index scan we had to
@@ -965,15 +2386,55 @@ btr_cur_open_at_index_side_func(
 				switch (latch_mode) {
 				case BTR_MODIFY_TREE:
 				case BTR_CONT_MODIFY_TREE:
-				case BTR_SEARCH_LEAF | BTR_ALREADY_S_LATCHED:
-				case BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED:
+				case BTR_CONT_SEARCH_TREE:
 					break;
 				default:
-					/* Release the tree s-latch */
+					if (!s_latch_by_caller) {
+						/* Release the tree s-latch */
+						mtr_release_s_latch_at_savepoint(
+							mtr, savepoint,
+							dict_index_get_lock(
+								index));
+					}
 
-					mtr_release_s_latch_at_savepoint(
-						mtr, savepoint,
-						dict_index_get_lock(index));
+					/* release upper blocks */
+					for (; n_releases < n_blocks;
+					     n_releases++) {
+						mtr_release_block_at_savepoint(
+							mtr,
+							tree_savepoints[
+								n_releases],
+							tree_blocks[
+								n_releases]);
+					}
+				}
+			} else { /* height != 0 */
+				/* We already have the block latched. */
+				ut_ad(latch_mode == BTR_SEARCH_TREE);
+				ut_ad(s_latch_by_caller);
+				ut_ad(upper_rw_latch == RW_S_LATCH);
+
+				ut_ad(mtr_memo_contains(mtr, block,
+							upper_rw_latch));
+
+				if (s_latch_by_caller) {
+					/* to exclude modifying tree operations
+					should sx-latch the index. */
+					ut_ad(mtr_memo_contains(
+						mtr,
+						dict_index_get_lock(index),
+						MTR_MEMO_SX_LOCK));
+					/* because has sx-latch of index,
+					can release upper blocks. */
+					for (; n_releases < n_blocks;
+					     n_releases++) {
+						mtr_release_block_at_savepoint(
+							mtr,
+							tree_savepoints[
+								n_releases],
+							tree_blocks[
+								n_releases]);
+					}
 				}
 			}
 		}
@@ -1010,8 +2471,81 @@ btr_cur_open_at_index_side_func(
 		node_ptr = page_cur_get_rec(page_cursor);
 		offsets = rec_get_offsets(node_ptr, cursor->index, offsets,
 					  ULINT_UNDEFINED, &heap);
+
+		/* If the rec is the first or last in the page for
+		pessimistic delete intention, it might cause node_ptr insert
+		for the upper level. We should change the intention and retry.
+		*/
+		if (latch_mode == BTR_MODIFY_TREE
+		    && btr_cur_need_opposite_intention(
+			page, lock_intention, node_ptr)) {
+
+			ut_ad(upper_rw_latch == RW_X_LATCH);
+			/* release all blocks */
+			for (; n_releases <= n_blocks; n_releases++) {
+				mtr_release_block_at_savepoint(
+					mtr, tree_savepoints[n_releases],
+					tree_blocks[n_releases]);
+			}
+
+			lock_intention = BTR_INTENTION_BOTH;
+
+			page_id.set_page_no(dict_index_get_page(index));
+
+			height = ULINT_UNDEFINED;
+
+			n_blocks = 0;
+			n_releases = 0;
+
+			continue;
+		}
+
+		if (latch_mode == BTR_MODIFY_TREE
+		    && !btr_cur_will_modify_tree(
+				cursor->index, page, lock_intention, node_ptr,
+				node_ptr_max_size, page_size, mtr)) {
+			ut_ad(upper_rw_latch == RW_X_LATCH);
+			ut_ad(n_releases <= n_blocks);
+
+			/* we can release upper blocks */
+			for (; n_releases < n_blocks; n_releases++) {
+				if (n_releases == 0) {
+					/* we should not release root page
+					to pin to same block. */
+					continue;
+				}
+
+				/* release unused blocks to unpin */
+				mtr_release_block_at_savepoint(
+					mtr, tree_savepoints[n_releases],
+					tree_blocks[n_releases]);
+			}
+		}
+
+		if (height == level
+		    && latch_mode == BTR_MODIFY_TREE) {
+			ut_ad(upper_rw_latch == RW_X_LATCH);
+			/* we should sx-latch root page, if released already.
+			It contains seg_header. */
+			if (n_releases > 0) {
+				mtr_block_sx_latch_at_savepoint(
+					mtr, tree_savepoints[0],
+					tree_blocks[0]);
+			}
+
+			/* x-latch the branch blocks not released yet. */
+			for (ulint i = n_releases; i <= n_blocks; i++) {
+				mtr_block_x_latch_at_savepoint(
+					mtr, tree_savepoints[i],
+					tree_blocks[i]);
+			}
+		}
+
 		/* Go to the child node */
-		page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets);
+		page_id.set_page_no(
+			btr_node_ptr_get_child_page_no(node_ptr, offsets));
+
+		n_blocks++;
 	}
 
  exit_loop:
@@ -1022,10 +2556,130 @@ btr_cur_open_at_index_side_func(
 	return err;
 }
 
+/** Opens a cursor at either end of an index.
+Avoid taking latches on buffer, just pin (by incrementing fix_count)
+to keep them in buffer pool. This mode is used by intrinsic table
+as they are not shared and so there is no need of latching.
+@param[in]	from_left	true if open to low end, false if open
+				to high end.
+@param[in]	index		index
+@param[in,out]	cursor		cursor
+@param[in]	file		file name
+@param[in]	line		line where called
+@param[in,out]	mtr		mini transaction
+*/
+dberr_t
+btr_cur_open_at_index_side_with_no_latch_func(
+	bool		from_left,
+	dict_index_t*	index,
+	btr_cur_t*	cursor,
+	ulint		level,
+	const char*	file,
+	ulint		line,
+	mtr_t*		mtr)
+{
+	page_cur_t*	page_cursor;
+	ulint		height;
+	rec_t*		node_ptr;
+	ulint		n_blocks = 0;
+	mem_heap_t*	heap		= NULL;
+	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
+	ulint*		offsets		= offsets_;
+	dberr_t		err = DB_SUCCESS;
+	rec_offs_init(offsets_);
+
+	ut_ad(level != ULINT_UNDEFINED);
+
+	page_cursor = btr_cur_get_page_cur(cursor);
+	cursor->index = index;
+	page_id_t		page_id(dict_index_get_space(index),
+					dict_index_get_page(index));
+	const page_size_t&	page_size = dict_table_page_size(index->table);
+
+	height = ULINT_UNDEFINED;
+
+	for (;;) {
+		buf_block_t*	block;
+		page_t*		page;
+		ulint		rw_latch = RW_NO_LATCH;
+
+		ut_ad(n_blocks < BTR_MAX_LEVELS);
+
+		block = buf_page_get_gen(page_id, page_size, rw_latch, NULL,
+			BUF_GET, file, line, mtr, &err);
+
+		if (err != DB_SUCCESS) {
+			if (err == DB_DECRYPTION_FAILED) {
+				ib_push_warning((void *)NULL,
+					DB_DECRYPTION_FAILED,
+					"Table %s is encrypted but encryption service or"
+					" used key_id is not available. "
+					" Can't continue reading table.",
+					index->table->name);
+				index->table->is_encrypted = true;
+			}
+
+			return (err);
+		}
+
+		page = buf_block_get_frame(block);
+
+		ut_ad(fil_page_index_page_check(page));
+		ut_ad(index->id == btr_page_get_index_id(page));
+
+		if (height == ULINT_UNDEFINED) {
+			/* We are in the root node */
+
+			height = btr_page_get_level(page, mtr);
+			ut_a(height >= level);
+		} else {
+			/* TODO: flag the index corrupted if this fails */
+			ut_ad(height == btr_page_get_level(page, mtr));
+		}
+
+		if (from_left) {
+			page_cur_set_before_first(block, page_cursor);
+		} else {
+			page_cur_set_after_last(block, page_cursor);
+		}
+
+		if (height == level) {
+			break;
+		}
+
+		ut_ad(height > 0);
+
+		if (from_left) {
+			page_cur_move_to_next(page_cursor);
+		} else {
+			page_cur_move_to_prev(page_cursor);
+		}
+
+		height--;
+
+		node_ptr = page_cur_get_rec(page_cursor);
+		offsets = rec_get_offsets(node_ptr, cursor->index, offsets,
+					  ULINT_UNDEFINED, &heap);
+
+		/* Go to the child node */
+		page_id.set_page_no(
+			btr_node_ptr_get_child_page_no(node_ptr, offsets));
+
+		n_blocks++;
+	}
+
+	if (heap != NULL) {
+		mem_heap_free(heap);
+	}
+
+	return(err);
+}
+
 /**********************************************************************//**
-Positions a cursor at a randomly chosen position within a B-tree. */
-UNIV_INTERN
-void
+Positions a cursor at a randomly chosen position within a B-tree.
+@return true if the index is available and we have put the cursor, false
+if the index is unavailable */
+bool
 btr_cur_open_at_rnd_pos_func(
 /*=========================*/
 	dict_index_t*	index,		/*!< in: index */
@@ -1036,42 +2690,108 @@ btr_cur_open_at_rnd_pos_func(
 	mtr_t*		mtr)		/*!< in: mtr */
 {
 	page_cur_t*	page_cursor;
-	ulint		page_no;
-	ulint		space;
-	ulint		zip_size;
+	ulint		node_ptr_max_size = UNIV_PAGE_SIZE / 2;
 	ulint		height;
 	rec_t*		node_ptr;
+	ulint		savepoint;
+	ulint		upper_rw_latch, root_leaf_rw_latch;
+	btr_intention_t	lock_intention;
+	buf_block_t*	tree_blocks[BTR_MAX_LEVELS];
+	ulint		tree_savepoints[BTR_MAX_LEVELS];
+	ulint		n_blocks = 0;
+	ulint		n_releases = 0;
 	mem_heap_t*	heap		= NULL;
 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
 	ulint*		offsets		= offsets_;
 	rec_offs_init(offsets_);
 
+	ut_ad(!dict_index_is_spatial(index));
+
+	lock_intention = btr_cur_get_and_clear_intention(&latch_mode);
+
+	ut_ad(!(latch_mode & BTR_MODIFY_EXTERNAL));
+
+	savepoint = mtr_set_savepoint(mtr);
+
 	switch (latch_mode) {
 	case BTR_MODIFY_TREE:
-		mtr_x_lock(dict_index_get_lock(index), mtr);
+		/* Most of delete-intended operations are purging.
+		Free blocks and read IO bandwidth should be prior
+		for them, when the history list is glowing huge. */
+		if (lock_intention == BTR_INTENTION_DELETE
+		    && trx_sys->rseg_history_len > BTR_CUR_FINE_HISTORY_LENGTH
+		    && buf_get_n_pending_read_ios()) {
+			mtr_x_lock(dict_index_get_lock(index), mtr);
+		} else {
+			mtr_sx_lock(dict_index_get_lock(index), mtr);
+		}
+		upper_rw_latch = RW_X_LATCH;
 		break;
+	case BTR_SEARCH_PREV:
+	case BTR_MODIFY_PREV:
+		/* This function doesn't support left uncle
+		   page lock for left leaf page lock, when
+		   needed. */
+	case BTR_SEARCH_TREE:
+	case BTR_CONT_MODIFY_TREE:
+	case BTR_CONT_SEARCH_TREE:
+		ut_ad(0);
+		/* fall through */
 	default:
-		ut_ad(latch_mode != BTR_CONT_MODIFY_TREE);
-		mtr_s_lock(dict_index_get_lock(index), mtr);
+		if (!srv_read_only_mode) {
+			mtr_s_lock(dict_index_get_lock(index), mtr);
+			upper_rw_latch = RW_S_LATCH;
+		} else {
+			upper_rw_latch = RW_NO_LATCH;
+		}
 	}
 
+	DBUG_EXECUTE_IF("test_index_is_unavailable",
+			return(false););
+
+	if (index->page == FIL_NULL) {
+		/* Since we don't hold index lock until just now, the index
+		could be modified by others, for example, if this is a
+		statistics updater for referenced table, it could be marked
+		as unavailable by 'DROP TABLE' in the mean time, since
+		we don't hold lock for statistics updater */
+		return(false);
+	}
+
+	root_leaf_rw_latch = btr_cur_latch_for_root_leaf(latch_mode);
+
 	page_cursor = btr_cur_get_page_cur(cursor);
 	cursor->index = index;
 
-	space = dict_index_get_space(index);
-	zip_size = dict_table_zip_size(index->table);
-	page_no = dict_index_get_page(index);
+	page_id_t		page_id(dict_index_get_space(index),
+					dict_index_get_page(index));
+	const page_size_t&	page_size = dict_table_page_size(index->table);
+	dberr_t			err = DB_SUCCESS;
+
+	if (root_leaf_rw_latch == RW_X_LATCH) {
+		node_ptr_max_size = dict_index_node_ptr_max_size(index);
+	}
 
 	height = ULINT_UNDEFINED;
 
 	for (;;) {
 		buf_block_t*	block;
 		page_t*		page;
-		dberr_t		err=DB_SUCCESS;
+		ulint		rw_latch;
 
-		block = buf_page_get_gen(space, zip_size, page_no,
-					 RW_NO_LATCH, NULL, BUF_GET,
-					 file, line, mtr, &err);
+		ut_ad(n_blocks < BTR_MAX_LEVELS);
+
+		if (height != 0
+		    && latch_mode != BTR_MODIFY_TREE) {
+			rw_latch = upper_rw_latch;
+		} else {
+			rw_latch = RW_NO_LATCH;
+		}
+
+		tree_savepoints[n_blocks] = mtr_set_savepoint(mtr);
+		block = buf_page_get_gen(page_id, page_size, rw_latch, NULL,
+			BUF_GET, file, line, mtr, &err);
+		tree_blocks[n_blocks] = block;
 
 		if (err != DB_SUCCESS) {
 			if (err == DB_DECRYPTION_FAILED) {
@@ -1087,7 +2807,26 @@ btr_cur_open_at_rnd_pos_func(
 		}
 
 		page = buf_block_get_frame(block);
-		ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
+
+		if (height == ULINT_UNDEFINED
+		    && btr_page_get_level(page, mtr) == 0
+		    && rw_latch != RW_NO_LATCH
+		    && rw_latch != root_leaf_rw_latch) {
+			/* We should retry to get the page, because the root page
+			is latched with different level as a leaf page. */
+			ut_ad(root_leaf_rw_latch != RW_NO_LATCH);
+			ut_ad(rw_latch == RW_S_LATCH);
+
+			ut_ad(n_blocks == 0);
+			mtr_release_block_at_savepoint(
+				mtr, tree_savepoints[n_blocks],
+				tree_blocks[n_blocks]);
+
+			upper_rw_latch = root_leaf_rw_latch;
+			continue;
+		}
+
+		ut_ad(fil_page_index_page_check(page));
 		ut_ad(index->id == btr_page_get_index_id(page));
 
 		if (height == ULINT_UNDEFINED) {
@@ -1097,8 +2836,37 @@ btr_cur_open_at_rnd_pos_func(
 		}
 
 		if (height == 0) {
-			btr_cur_latch_leaves(page, space, zip_size, page_no,
-					     latch_mode, cursor, mtr);
+			if (rw_latch == RW_NO_LATCH
+			    || srv_read_only_mode) {
+				btr_cur_latch_leaves(
+					block, page_id, page_size,
+					latch_mode, cursor, mtr);
+			}
+
+			/* btr_cur_open_at_index_side_func() and
+			btr_cur_search_to_nth_level() release
+			tree s-latch here.*/
+			switch (latch_mode) {
+			case BTR_MODIFY_TREE:
+			case BTR_CONT_MODIFY_TREE:
+			case BTR_CONT_SEARCH_TREE:
+				break;
+			default:
+				/* Release the tree s-latch */
+				if (!srv_read_only_mode) {
+					mtr_release_s_latch_at_savepoint(
+						mtr, savepoint,
+						dict_index_get_lock(index));
+				}
+
+				/* release upper blocks */
+				for (; n_releases < n_blocks; n_releases++) {
+					mtr_release_block_at_savepoint(
+						mtr,
+						tree_savepoints[n_releases],
+						tree_blocks[n_releases]);
+				}
+			}
 		}
 
 		page_cur_open_on_rnd_user_rec(block, page_cursor);
@@ -1115,14 +2883,89 @@ btr_cur_open_at_rnd_pos_func(
 		node_ptr = page_cur_get_rec(page_cursor);
 		offsets = rec_get_offsets(node_ptr, cursor->index, offsets,
 					  ULINT_UNDEFINED, &heap);
+
+		/* If the rec is the first or last in the page for
+		pessimistic delete intention, it might cause node_ptr insert
+		for the upper level. We should change the intention and retry.
+		*/
+		if (latch_mode == BTR_MODIFY_TREE
+		    && btr_cur_need_opposite_intention(
+			page, lock_intention, node_ptr)) {
+
+			ut_ad(upper_rw_latch == RW_X_LATCH);
+			/* release all blocks */
+			for (; n_releases <= n_blocks; n_releases++) {
+				mtr_release_block_at_savepoint(
+					mtr, tree_savepoints[n_releases],
+					tree_blocks[n_releases]);
+			}
+
+			lock_intention = BTR_INTENTION_BOTH;
+
+			page_id.set_page_no(dict_index_get_page(index));
+
+			height = ULINT_UNDEFINED;
+
+			n_blocks = 0;
+			n_releases = 0;
+
+			continue;
+		}
+
+		if (latch_mode == BTR_MODIFY_TREE
+		    && !btr_cur_will_modify_tree(
+				cursor->index, page, lock_intention, node_ptr,
+				node_ptr_max_size, page_size, mtr)) {
+			ut_ad(upper_rw_latch == RW_X_LATCH);
+			ut_ad(n_releases <= n_blocks);
+
+			/* we can release upper blocks */
+			for (; n_releases < n_blocks; n_releases++) {
+				if (n_releases == 0) {
+					/* we should not release root page
+					to pin to same block. */
+					continue;
+				}
+
+				/* release unused blocks to unpin */
+				mtr_release_block_at_savepoint(
+					mtr, tree_savepoints[n_releases],
+					tree_blocks[n_releases]);
+			}
+		}
+
+		if (height == 0
+		    && latch_mode == BTR_MODIFY_TREE) {
+			ut_ad(upper_rw_latch == RW_X_LATCH);
+			/* we should sx-latch root page, if released already.
+			It contains seg_header. */
+			if (n_releases > 0) {
+				mtr_block_sx_latch_at_savepoint(
+					mtr, tree_savepoints[0],
+					tree_blocks[0]);
+			}
+
+			/* x-latch the branch blocks not released yet. */
+			for (ulint i = n_releases; i <= n_blocks; i++) {
+				mtr_block_x_latch_at_savepoint(
+					mtr, tree_savepoints[i],
+					tree_blocks[i]);
+			}
+		}
+
 		/* Go to the child node */
-		page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets);
+		page_id.set_page_no(
+			btr_node_ptr_get_child_page_no(node_ptr, offsets));
+
+		n_blocks++;
 	}
 
  exit_loop:
 	if (UNIV_LIKELY_NULL(heap)) {
 		mem_heap_free(heap);
 	}
+
+	return(true);
 }
 
 /*==================== B-TREE INSERT =========================*/
@@ -1138,7 +2981,7 @@ if this is a compressed leaf page in a secondary index.
 This has to be done either within the same mini-transaction,
 or by invoking ibuf_reset_free_bits() before mtr_commit().
 
-@return	pointer to inserted record if succeed, else NULL */
+@return pointer to inserted record if succeed, else NULL */
 static MY_ATTRIBUTE((nonnull, warn_unused_result))
 rec_t*
 btr_cur_insert_if_possible(
@@ -1157,8 +3000,9 @@ btr_cur_insert_if_possible(
 
 	ut_ad(dtuple_check_typed(tuple));
 
-	ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
-				MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_is_block_fix(
+		mtr, btr_cur_get_block(cursor),
+		MTR_MEMO_PAGE_X_FIX, cursor->index->table));
 	page_cursor = btr_cur_get_page_cur(cursor);
 
 	/* Now, try the insert */
@@ -1181,7 +3025,7 @@ btr_cur_insert_if_possible(
 
 /*************************************************************//**
 For an insert, checks the locks and does the undo logging if desired.
-@return	DB_SUCCESS, DB_WAIT_LOCK, DB_FAIL, or error number */
+@return DB_SUCCESS, DB_WAIT_LOCK, DB_FAIL, or error number */
 UNIV_INLINE MY_ATTRIBUTE((warn_unused_result, nonnull(2,3,5,6)))
 dberr_t
 btr_cur_ins_lock_and_undo(
@@ -1198,7 +3042,7 @@ btr_cur_ins_lock_and_undo(
 				successor record */
 {
 	dict_index_t*	index;
-	dberr_t		err;
+	dberr_t		err = DB_SUCCESS;
 	rec_t*		rec;
 	roll_ptr_t	roll_ptr;
 
@@ -1211,10 +3055,32 @@ btr_cur_ins_lock_and_undo(
 	ut_ad(!dict_index_is_online_ddl(index)
 	      || dict_index_is_clust(index)
 	      || (flags & BTR_CREATE_FLAG));
+	ut_ad(mtr->is_named_space(index->space));
 
-	err = lock_rec_insert_check_and_lock(flags, rec,
-					     btr_cur_get_block(cursor),
-					     index, thr, mtr, inherit);
+	/* Check if there is predicate or GAP lock preventing the insertion */
+	if (!(flags & BTR_NO_LOCKING_FLAG)) {
+		if (dict_index_is_spatial(index)) {
+			lock_prdt_t	prdt;
+			rtr_mbr_t	mbr;
+
+			rtr_get_mbr_from_tuple(entry, &mbr);
+
+			/* Use on stack MBR variable to test if a lock is
+			needed. If so, the predicate (MBR) will be allocated
+			from lock heap in lock_prdt_insert_check_and_lock() */
+			lock_init_prdt_from_mbr(
+				&prdt, &mbr, 0, NULL);
+
+			err = lock_prdt_insert_check_and_lock(
+				flags, rec, btr_cur_get_block(cursor),
+				index, thr, mtr, &prdt);
+			*inherit = false;
+		} else {
+			err = lock_rec_insert_check_and_lock(
+				flags, rec, btr_cur_get_block(cursor),
+				index, thr, mtr, inherit);
+		}
+	}
 
 	if (err != DB_SUCCESS
 	    || !dict_index_is_clust(index) || dict_index_is_ibuf(index)) {
@@ -1231,9 +3097,11 @@ btr_cur_ins_lock_and_undo(
 		return(err);
 	}
 
-	/* Now we can fill in the roll ptr field in entry */
+	/* Now we can fill in the roll ptr field in entry
+	(except if table is intrinsic) */
 
-	if (!(flags & BTR_KEEP_SYS_FLAG)) {
+	if (!(flags & BTR_KEEP_SYS_FLAG)
+	    && !dict_table_is_intrinsic(index->table)) {
 
 		row_upd_index_entry_sys_field(entry, index,
 					      DATA_ROLL_PTR, roll_ptr);
@@ -1242,23 +3110,36 @@ btr_cur_ins_lock_and_undo(
 	return(DB_SUCCESS);
 }
 
-#ifdef UNIV_DEBUG
-/*************************************************************//**
-Report information about a transaction. */
+/**
+Prefetch siblings of the leaf for the pessimistic operation.
+@param block	leaf page */
 static
 void
-btr_cur_trx_report(
-/*===============*/
-	trx_id_t		trx_id,	/*!< in: transaction id */
-	const dict_index_t*	index,	/*!< in: index */
-	const char*		op)	/*!< in: operation */
+btr_cur_prefetch_siblings(
+	buf_block_t*	block)
 {
-	fprintf(stderr, "Trx with id " TRX_ID_FMT " going to ", trx_id);
-	fputs(op, stderr);
-	dict_index_name_print(stderr, NULL, index);
-	putc('\n', stderr);
+	page_t*	page = buf_block_get_frame(block);
+
+	ut_ad(page_is_leaf(page));
+
+	ulint left_page_no = fil_page_get_prev(page);
+	ulint right_page_no = fil_page_get_next(page);
+
+	if (left_page_no != FIL_NULL) {
+		buf_read_page_background(
+			page_id_t(block->page.id.space(), left_page_no),
+			block->page.size, false);
+	}
+	if (right_page_no != FIL_NULL) {
+		buf_read_page_background(
+			page_id_t(block->page.id.space(), right_page_no),
+			block->page.size, false);
+	}
+	if (left_page_no != FIL_NULL
+	    || right_page_no != FIL_NULL) {
+		os_aio_simulated_wake_handler_threads();
+	}
 }
-#endif /* UNIV_DEBUG */
 
 /*************************************************************//**
 Tries to perform an insert to a page in an index tree, next to cursor.
@@ -1266,8 +3147,7 @@ It is assumed that mtr holds an x-latch on the page. The operation does
 not succeed if there is too little space on the page. If there is just
 one record on the page, the insert will always succeed; this is to
 prevent trying to split a page with just one record.
-@return	DB_SUCCESS, DB_WAIT_LOCK, DB_FAIL, or error number */
-UNIV_INTERN
+@return DB_SUCCESS, DB_WAIT_LOCK, DB_FAIL, or error number */
 dberr_t
 btr_cur_optimistic_insert(
 /*======================*/
@@ -1302,7 +3182,6 @@ btr_cur_optimistic_insert(
 	ibool		leaf;
 	ibool		reorg;
 	ibool		inherit = TRUE;
-	ulint		zip_size;
 	ulint		rec_size;
 	dberr_t		err;
 
@@ -1312,38 +3191,34 @@ btr_cur_optimistic_insert(
 	page = buf_block_get_frame(block);
 	index = cursor->index;
 
-	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+	/* Block are not latched for insert if table is intrinsic
+	and index is auto-generated clustered index. */
+	ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
 	ut_ad(!dict_index_is_online_ddl(index)
 	      || dict_index_is_clust(index)
 	      || (flags & BTR_CREATE_FLAG));
 	ut_ad(dtuple_check_typed(entry));
 
-	zip_size = buf_block_get_zip_size(block);
+	const page_size_t&	page_size = block->page.size;
+
 #ifdef UNIV_DEBUG_VALGRIND
-	if (zip_size) {
-		UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
-		UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
+	if (page_size.is_compressed()) {
+		UNIV_MEM_ASSERT_RW(page, page_size.logical());
+		UNIV_MEM_ASSERT_RW(block->page.zip.data, page_size.physical());
 	}
 #endif /* UNIV_DEBUG_VALGRIND */
 
-#ifdef UNIV_DEBUG
-	if (btr_cur_print_record_ops && thr) {
-		btr_cur_trx_report(thr_get_trx(thr)->id, index, "insert ");
-		dtuple_print(stderr, entry);
-	}
-#endif /* UNIV_DEBUG */
-
 	leaf = page_is_leaf(page);
 
 	/* Calculate the record size when entry is converted to a record */
 	rec_size = rec_get_converted_size(index, entry, n_ext);
 
 	if (page_zip_rec_needs_ext(rec_size, page_is_comp(page),
-				   dtuple_get_n_fields(entry), zip_size)) {
+				   dtuple_get_n_fields(entry), page_size)) {
 
 		/* The record is so big that we have to store some fields
 		externally on separate database pages */
-		big_rec_vec = dtuple_convert_big_rec(index, entry, &n_ext);
+		big_rec_vec = dtuple_convert_big_rec(index, 0, entry, &n_ext);
 
 		if (UNIV_UNLIKELY(big_rec_vec == NULL)) {
 
@@ -1353,52 +3228,18 @@ btr_cur_optimistic_insert(
 		rec_size = rec_get_converted_size(index, entry, n_ext);
 	}
 
-	if (zip_size) {
-		/* Estimate the free space of an empty compressed page.
-		Subtract one byte for the encoded heap_no in the
-		modification log. */
-		ulint	free_space_zip = page_zip_empty_size(
-			cursor->index->n_fields, zip_size);
-		ulint	n_uniq = dict_index_get_n_unique_in_tree(index);
-
-		ut_ad(dict_table_is_comp(index->table));
-
-		if (free_space_zip == 0) {
-too_big:
-			if (big_rec_vec) {
-				dtuple_convert_back_big_rec(
-					index, entry, big_rec_vec);
-			}
-
-			return(DB_TOO_BIG_RECORD);
+	if (page_size.is_compressed() && page_zip_is_too_big(index, entry)) {
+		if (big_rec_vec != NULL) {
+			dtuple_convert_back_big_rec(index, entry, big_rec_vec);
 		}
 
-		/* Subtract one byte for the encoded heap_no in the
-		modification log. */
-		free_space_zip--;
-
-		/* There should be enough room for two node pointer
-		records on an empty non-leaf page.  This prevents
-		infinite page splits. */
-
-		if (entry->n_fields >= n_uniq
-		    && (REC_NODE_PTR_SIZE
-			+ rec_get_converted_size_comp_prefix(
-				index, entry->fields, n_uniq, NULL)
-			/* On a compressed page, there is
-			a two-byte entry in the dense
-			page directory for every record.
-			But there is no record header. */
-			- (REC_N_NEW_EXTRA_BYTES - 2)
-			> free_space_zip / 2)) {
-			goto too_big;
-		}
+		return(DB_TOO_BIG_RECORD);
 	}
 
 	LIMIT_OPTIMISTIC_INSERT_DEBUG(page_get_n_recs(page),
 				      goto fail);
 
-	if (leaf && zip_size
+	if (leaf && page_size.is_compressed()
 	    && (page_get_data_size(page) + rec_size
 		>= dict_index_zip_pad_optimal_page_size(index))) {
 		/* If compression padding tells us that insertion will
@@ -1407,6 +3248,12 @@ too_big:
 		insertion. */
 fail:
 		err = DB_FAIL;
+
+		/* prefetch siblings of the leaf for the pessimistic
+		operation, if the page is leaf. */
+		if (page_is_leaf(page)) {
+			btr_cur_prefetch_siblings(block);
+		}
 fail_err:
 
 		if (big_rec_vec) {
@@ -1435,7 +3282,7 @@ fail_err:
 	we have to split the page to reserve enough free space for
 	future updates of records. */
 
-	if (leaf && !zip_size && dict_index_is_clust(index)
+	if (leaf && !page_size.is_compressed() && dict_index_is_clust(index)
 	    && page_get_n_recs(page) >= 2
 	    && dict_index_get_space_reserve() + rec_size > max_size
 	    && (btr_page_get_split_rec_to_right(cursor, &dummy)
@@ -1443,37 +3290,70 @@ fail_err:
 		goto fail;
 	}
 
-	/* Check locks and write to the undo log, if specified */
-	err = btr_cur_ins_lock_and_undo(flags, cursor, entry,
-					thr, mtr, &inherit);
-
-	if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
-
-		goto fail_err;
-	}
-
 	page_cursor = btr_cur_get_page_cur(cursor);
 
-	/* Now, try the insert */
-
+#ifdef UNIV_DEBUG
 	{
-		const rec_t* page_cursor_rec = page_cur_get_rec(page_cursor);
-		*rec = page_cur_tuple_insert(page_cursor, entry, index,
-					     offsets, heap, n_ext, mtr);
+		rec_printer p(entry);
+		DBUG_PRINT("ib_cur", ("insert %s (" IB_ID_FMT ") by " IB_ID_FMT " %s",
+			      index->name(), index->id,
+			      thr != NULL
+			      ? trx_get_id_for_print(thr_get_trx(thr))
+			      : 0,
+			      p.str().c_str()));
+	}
+#endif
+
+	DBUG_EXECUTE_IF("do_page_reorganize",
+			btr_page_reorganize(page_cursor, index, mtr););
+
+	/* Now, try the insert */
+	{
+		const rec_t*	page_cursor_rec = page_cur_get_rec(page_cursor);
+
+		if (dict_table_is_intrinsic(index->table)) {
+
+			index->rec_cache.rec_size = rec_size;
+
+			*rec = page_cur_tuple_direct_insert(
+				page_cursor, entry, index, n_ext, mtr);
+		} else {
+			/* Check locks and write to the undo log,
+			if specified */
+			err = btr_cur_ins_lock_and_undo(flags, cursor, entry,
+							thr, mtr, &inherit);
+			if (err != DB_SUCCESS) {
+				goto fail_err;
+			}
+
+			*rec = page_cur_tuple_insert(
+				page_cursor, entry, index, offsets, heap,
+				n_ext, mtr);
+		}
+
 		reorg = page_cursor_rec != page_cur_get_rec(page_cursor);
 	}
 
 	if (*rec) {
-	} else if (zip_size) {
+	} else if (page_size.is_compressed()) {
 		/* Reset the IBUF_BITMAP_FREE bits, because
 		page_cur_tuple_insert() will have attempted page
 		reorganize before failing. */
-		if (leaf && !dict_index_is_clust(index)) {
+		if (leaf
+		    && !dict_index_is_clust(index)
+		    && !dict_table_is_temporary(index->table)) {
 			ibuf_reset_free_bits(block);
 		}
 
 		goto fail;
 	} else {
+
+		/* For intrinsic table we take a consistent path
+		to re-organize using pessimistic path. */
+		if (dict_table_is_intrinsic(index->table)) {
+			goto fail;
+		}
+
 		ut_ad(!reorg);
 
 		/* If the record did not fit, reorganize */
@@ -1490,30 +3370,31 @@ fail_err:
 					     offsets, heap, n_ext, mtr);
 
 		if (UNIV_UNLIKELY(!*rec)) {
-			fputs("InnoDB: Error: cannot insert tuple ", stderr);
-			dtuple_print(stderr, entry);
-			fputs(" into ", stderr);
-			dict_index_name_print(stderr, thr_get_trx(thr), index);
-			fprintf(stderr, "\nInnoDB: max insert size %lu\n",
-				(ulong) max_size);
-			ut_error;
+			ib::fatal() <<  "Cannot insert tuple " << *entry
+				<< "into index " << index->name
+				<< " of table " << index->table->name
+				<< ". Max size: " << max_size;
 		}
 	}
 
 #ifdef BTR_CUR_HASH_ADAPT
-	if (!reorg && leaf && (cursor->flag == BTR_CUR_HASH)) {
-		btr_search_update_hash_node_on_insert(cursor);
-	} else {
-		btr_search_update_hash_on_insert(cursor);
+	if (!index->disable_ahi) {
+		if (!reorg && leaf && (cursor->flag == BTR_CUR_HASH)) {
+			btr_search_update_hash_node_on_insert(cursor);
+		} else {
+			btr_search_update_hash_on_insert(cursor);
+		}
 	}
-#endif
+#endif /* BTR_CUR_HASH_ADAPT */
 
 	if (!(flags & BTR_NO_LOCKING_FLAG) && inherit) {
 
 		lock_update_insert(block, *rec);
 	}
 
-	if (leaf && !dict_index_is_clust(index)) {
+	if (leaf
+	    && !dict_index_is_clust(index)
+	    && !dict_table_is_temporary(index->table)) {
 		/* Update the free bits of the B-tree page in the
 		insert buffer bitmap. */
 
@@ -1527,7 +3408,7 @@ fail_err:
 		committed mini-transaction, because in crash recovery,
 		the free bits could momentarily be set too high. */
 
-		if (zip_size) {
+		if (page_size.is_compressed()) {
 			/* Update the bits in the same mini-transaction. */
 			ibuf_update_free_bits_zip(block, mtr);
 		} else {
@@ -1549,8 +3430,7 @@ Performs an insert on a page of an index tree. It is assumed that mtr
 holds an x-latch on the tree and on the cursor page. If the insert is
 made on the leaf level, to avoid deadlocks, mtr must also own x-latches
 to brothers of page, if those brothers exist.
-@return	DB_SUCCESS or error number */
-UNIV_INTERN
+@return DB_SUCCESS or error number */
 dberr_t
 btr_cur_pessimistic_insert(
 /*=======================*/
@@ -1576,22 +3456,23 @@ btr_cur_pessimistic_insert(
 	mtr_t*		mtr)	/*!< in/out: mini-transaction */
 {
 	dict_index_t*	index		= cursor->index;
-	ulint		zip_size	= dict_table_zip_size(index->table);
 	big_rec_t*	big_rec_vec	= NULL;
 	dberr_t		err;
 	ibool		inherit = FALSE;
-	ibool		success;
+	bool		success;
 	ulint		n_reserved	= 0;
 
 	ut_ad(dtuple_check_typed(entry));
 
 	*big_rec = NULL;
 
-	ut_ad(mtr_memo_contains(mtr,
-				dict_index_get_lock(btr_cur_get_index(cursor)),
-				MTR_MEMO_X_LOCK));
-	ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
-				MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_flagged(
+		mtr, dict_index_get_lock(btr_cur_get_index(cursor)),
+		MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK)
+	      || dict_table_is_intrinsic(cursor->index->table));
+	ut_ad(mtr_is_block_fix(
+		mtr, btr_cur_get_block(cursor),
+		MTR_MEMO_PAGE_X_FIX, cursor->index->table));
 	ut_ad(!dict_index_is_online_ddl(index)
 	      || dict_index_is_clust(index)
 	      || (flags & BTR_CREATE_FLAG));
@@ -1608,7 +3489,8 @@ btr_cur_pessimistic_insert(
 		return(err);
 	}
 
-	if (!(flags & BTR_NO_UNDO_LOG_FLAG)) {
+	if (!(flags & BTR_NO_UNDO_LOG_FLAG)
+	    || dict_table_is_intrinsic(index->table)) {
 		/* First reserve enough free space for the file segments
 		of the index tree, so that the insert will not fail because
 		of lack of space */
@@ -1625,7 +3507,7 @@ btr_cur_pessimistic_insert(
 	if (page_zip_rec_needs_ext(rec_get_converted_size(index, entry, n_ext),
 				   dict_table_is_comp(index->table),
 				   dtuple_get_n_fields(entry),
-				   zip_size)) {
+				   dict_table_page_size(index->table))) {
 		/* The record is so big that we have to store some fields
 		externally on separate database pages */
 
@@ -1636,7 +3518,7 @@ btr_cur_pessimistic_insert(
 			dtuple_convert_back_big_rec(index, entry, big_rec_vec);
 		}
 
-		big_rec_vec = dtuple_convert_big_rec(index, entry, &n_ext);
+		big_rec_vec = dtuple_convert_big_rec(index, 0, entry, &n_ext);
 
 		if (big_rec_vec == NULL) {
 
@@ -1649,7 +3531,7 @@ btr_cur_pessimistic_insert(
 	}
 
 	if (dict_index_get_page(index)
-	    == buf_block_get_page_no(btr_cur_get_block(cursor))) {
+	    == btr_cur_get_block(cursor)->page.id.page_no()) {
 
 		/* The page is the root page */
 		*rec = btr_root_raise_and_insert(
@@ -1663,40 +3545,41 @@ btr_cur_pessimistic_insert(
 		return(DB_OUT_OF_FILE_SPACE);
 	}
 
-	ut_ad(page_rec_get_next(btr_cur_get_rec(cursor)) == *rec);
+	ut_ad(page_rec_get_next(btr_cur_get_rec(cursor)) == *rec
+	      || dict_index_is_spatial(index));
+
 
 	if (!(flags & BTR_NO_LOCKING_FLAG)) {
-		/* The cursor might be moved to the other page,
-		and the max trx id field should be updated after
-		the cursor was fixed. */
-		if (!dict_index_is_clust(index)) {
-			page_update_max_trx_id(
-				btr_cur_get_block(cursor),
-				btr_cur_get_page_zip(cursor),
-				thr_get_trx(thr)->id, mtr);
-		}
+		ut_ad(!dict_table_is_temporary(index->table));
+		if (dict_index_is_spatial(index)) {
+			/* Do nothing */
+		} else {
+			/* The cursor might be moved to the other page
+			and the max trx id field should be updated after
+			the cursor was fixed. */
+			if (!dict_index_is_clust(index)) {
+				page_update_max_trx_id(
+					btr_cur_get_block(cursor),
+					btr_cur_get_page_zip(cursor),
+					thr_get_trx(thr)->id, mtr);
+			}
 
-		if (!page_rec_is_infimum(btr_cur_get_rec(cursor))) {
-			/* split and inserted need to call
-			lock_update_insert() always. */
-			inherit = TRUE;
-		}
-
-		buf_block_t* block = btr_cur_get_block(cursor);
-		buf_frame_t* frame = NULL;
-
-		if (block) {
-			frame = buf_block_get_frame(block);
-		}
-		/* split and inserted need to call
-		lock_update_insert() always. */
-		if (frame &&  btr_page_get_prev(frame, mtr) == FIL_NULL) {
-			inherit = TRUE;
+			if (!page_rec_is_infimum(btr_cur_get_rec(cursor))
+			    || btr_page_get_prev(
+				buf_block_get_frame(
+					btr_cur_get_block(cursor)), mtr)
+			       == FIL_NULL) {
+				/* split and inserted need to call
+				lock_update_insert() always. */
+				inherit = TRUE;
+			}
 		}
 	}
 
 #ifdef BTR_CUR_ADAPT
-	btr_search_update_hash_on_insert(cursor);
+	if (!index->disable_ahi) {
+		btr_search_update_hash_on_insert(cursor);
+	}
 #endif
 	if (inherit && !(flags & BTR_NO_LOCKING_FLAG)) {
 
@@ -1716,8 +3599,8 @@ btr_cur_pessimistic_insert(
 
 /*************************************************************//**
 For an update, checks the locks and does the undo logging.
-@return	DB_SUCCESS, DB_WAIT_LOCK, or error number */
-UNIV_INLINE MY_ATTRIBUTE((warn_unused_result, nonnull(2,3,6,7)))
+@return DB_SUCCESS, DB_WAIT_LOCK, or error number */
+UNIV_INLINE MY_ATTRIBUTE((warn_unused_result))
 dberr_t
 btr_cur_upd_lock_and_undo(
 /*======================*/
@@ -1742,6 +3625,7 @@ btr_cur_upd_lock_and_undo(
 	index = cursor->index;
 
 	ut_ad(rec_offs_validate(rec, index, offsets));
+	ut_ad(mtr->is_named_space(index->space));
 
 	if (!dict_index_is_clust(index)) {
 		ut_ad(dict_index_is_online_ddl(index)
@@ -1776,7 +3660,6 @@ btr_cur_upd_lock_and_undo(
 
 /***********************************************************//**
 Writes a redo log record of updating a record in-place. */
-UNIV_INTERN
 void
 btr_cur_update_in_place_log(
 /*========================*/
@@ -1824,7 +3707,7 @@ btr_cur_update_in_place_log(
 		trx_write_roll_ptr(log_ptr, 0);
 		log_ptr += DATA_ROLL_PTR_LEN;
 		/* TRX_ID */
-		log_ptr += mach_ull_write_compressed(log_ptr, 0);
+		log_ptr += mach_u64_write_compressed(log_ptr, 0);
 	}
 
 	mach_write_to_2(log_ptr, page_offset(rec));
@@ -1836,8 +3719,7 @@ btr_cur_update_in_place_log(
 
 /***********************************************************//**
 Parses a redo log record of updating a record in-place.
-@return	end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
 byte*
 btr_cur_parse_update_in_place(
 /*==========================*/
@@ -1894,7 +3776,7 @@ btr_cur_parse_update_in_place(
 	ut_a((ibool)!!page_is_comp(page) == dict_table_is_comp(index->table));
 	rec = page + rec_offset;
 
-	/* We do not need to reserve btr_search_latch, as the page is only
+	/* We do not need to reserve search latch, as the page is only
 	being recovered, and there cannot be a hash index to it. */
 
 	offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
@@ -1919,13 +3801,12 @@ an update-in-place.
 
 @retval false if out of space; IBUF_BITMAP_FREE will be reset
 outside mtr if the page was recompressed
-@retval	true if enough place;
+@retval true if enough place;
 
 IMPORTANT: The caller will have to update IBUF_BITMAP_FREE if this is
 a secondary index leaf page. This has to be done either within the
 same mini-transaction, or by invoking ibuf_reset_free_bits() before
 mtr_commit(mtr). */
-UNIV_INTERN
 bool
 btr_cur_update_alloc_zip_func(
 /*==========================*/
@@ -1993,7 +3874,9 @@ out_of_space:
 	ut_ad(rec_offs_validate(page_cur_get_rec(cursor), index, offsets));
 
 	/* Out of space: reset the free bits. */
-	if (!dict_index_is_clust(index) && page_is_leaf(page)) {
+	if (!dict_index_is_clust(index)
+	    && !dict_table_is_temporary(index->table)
+	    && page_is_leaf(page)) {
 		ibuf_reset_free_bits(page_cur_get_block(cursor));
 	}
 
@@ -2007,7 +3890,6 @@ We assume here that the ordering fields of the record do not change.
 @retval DB_SUCCESS on success
 @retval DB_ZIP_OVERFLOW if there is not enough space left
 on the compressed page (IBUF_BITMAP_FREE was reset outside mtr) */
-UNIV_INTERN
 dberr_t
 btr_cur_update_in_place(
 /*====================*/
@@ -2039,6 +3921,9 @@ btr_cur_update_in_place(
 	index = cursor->index;
 	ut_ad(rec_offs_validate(rec, index, offsets));
 	ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
+	ut_ad(trx_id > 0
+	      || (flags & BTR_KEEP_SYS_FLAG)
+	      || dict_table_is_intrinsic(index->table));
 	/* The insert buffer tree should never be updated in place. */
 	ut_ad(!dict_index_is_ibuf(index));
 	ut_ad(dict_index_is_online_ddl(index) == !!(flags & BTR_CREATE_FLAG)
@@ -2047,15 +3932,17 @@ btr_cur_update_in_place(
 	      || (flags & ~(BTR_KEEP_POS_FLAG | BTR_KEEP_IBUF_BITMAP))
 	      == (BTR_NO_UNDO_LOG_FLAG | BTR_NO_LOCKING_FLAG
 		  | BTR_CREATE_FLAG | BTR_KEEP_SYS_FLAG));
-	ut_ad(fil_page_get_type(btr_cur_get_page(cursor)) == FIL_PAGE_INDEX);
+	ut_ad(fil_page_index_page_check(btr_cur_get_page(cursor)));
 	ut_ad(btr_page_get_index_id(btr_cur_get_page(cursor)) == index->id);
 
 #ifdef UNIV_DEBUG
-	if (btr_cur_print_record_ops) {
-		btr_cur_trx_report(trx_id, index, "update ");
-		rec_print_new(stderr, rec, offsets);
+	{
+		rec_printer p(rec, offsets);
+		DBUG_PRINT("ib_cur", ("update-in-place %s (" IB_ID_FMT ") by "IB_ID_FMT ": %s",
+				index->name(), index->id, trx_id,
+				p.str().c_str()));
 	}
-#endif /* UNIV_DEBUG */
+#endif
 
 	block = btr_cur_get_block(cursor);
 	page_zip = buf_block_get_page_zip(block);
@@ -2083,7 +3970,8 @@ btr_cur_update_in_place(
 		goto func_exit;
 	}
 
-	if (!(flags & BTR_KEEP_SYS_FLAG)) {
+	if (!(flags & BTR_KEEP_SYS_FLAG)
+	    && !dict_table_is_intrinsic(index->table)) {
 		row_upd_rec_sys_fields(rec, NULL, index, offsets,
 				       thr_get_trx(thr), roll_ptr);
 	}
@@ -2110,13 +3998,13 @@ btr_cur_update_in_place(
 			btr_search_update_hash_on_delete(cursor);
 		}
 
-		rw_lock_x_lock(&btr_search_latch);
+		rw_lock_x_lock(btr_get_search_latch(index));
 	}
 
 	row_upd_rec_in_place(rec, index, offsets, update, page_zip);
 
 	if (is_hashed) {
-		rw_lock_x_unlock(&btr_search_latch);
+		rw_lock_x_unlock(btr_get_search_latch(index));
 	}
 
 	btr_cur_update_in_place_log(flags, rec, index, update,
@@ -2138,6 +4026,7 @@ func_exit:
 	if (page_zip
 	    && !(flags & BTR_KEEP_IBUF_BITMAP)
 	    && !dict_index_is_clust(index)
+	    && !dict_table_is_temporary(index->table)
 	    && page_is_leaf(buf_block_get_frame(block))) {
 		/* Update the free bits in the insert buffer. */
 		ibuf_update_free_bits_zip(block, mtr);
@@ -2158,7 +4047,6 @@ fields of the record do not change.
 @retval DB_UNDERFLOW if the page would become too empty
 @retval DB_ZIP_OVERFLOW if there is not enough space left
 on the compressed page (IBUF_BITMAP_FREE was reset outside mtr) */
-UNIV_INTERN
 dberr_t
 btr_cur_optimistic_update(
 /*======================*/
@@ -2199,8 +4087,13 @@ btr_cur_optimistic_update(
 	page = buf_block_get_frame(block);
 	rec = btr_cur_get_rec(cursor);
 	index = cursor->index;
+	ut_ad(trx_id > 0
+	      || (flags & BTR_KEEP_SYS_FLAG)
+	      || dict_table_is_intrinsic(index->table));
 	ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
-	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
+	/* This is intended only for leaf page updates */
+	ut_ad(page_is_leaf(page));
 	/* The insert buffer tree should never be updated in place. */
 	ut_ad(!dict_index_is_ibuf(index));
 	ut_ad(dict_index_is_online_ddl(index) == !!(flags & BTR_CREATE_FLAG)
@@ -2209,7 +4102,7 @@ btr_cur_optimistic_update(
 	      || (flags & ~(BTR_KEEP_POS_FLAG | BTR_KEEP_IBUF_BITMAP))
 	      == (BTR_NO_UNDO_LOG_FLAG | BTR_NO_LOCKING_FLAG
 		  | BTR_CREATE_FLAG | BTR_KEEP_SYS_FLAG));
-	ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
+	ut_ad(fil_page_index_page_check(page));
 	ut_ad(btr_page_get_index_id(page) == index->id);
 
 	*offsets = rec_get_offsets(rec, index, *offsets,
@@ -2219,13 +4112,6 @@ btr_cur_optimistic_update(
 	     || trx_is_recv(thr_get_trx(thr)));
 #endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
 
-#ifdef UNIV_DEBUG
-	if (btr_cur_print_record_ops) {
-		btr_cur_trx_report(trx_id, index, "update ");
-		rec_print_new(stderr, rec, *offsets);
-	}
-#endif /* UNIV_DEBUG */
-
 	if (!row_upd_changes_field_size_or_external(index, *offsets, update)) {
 
 		/* The simplest and the most common case: the update does not
@@ -2243,6 +4129,10 @@ any_extern:
 		/* Externally stored fields are treated in pessimistic
 		update */
 
+		/* prefetch siblings of the leaf for the pessimistic
+		operation. */
+		btr_cur_prefetch_siblings(block);
+
 		return(DB_OVERFLOW);
 	}
 
@@ -2253,6 +4143,15 @@ any_extern:
 		}
 	}
 
+#ifdef UNIV_DEBUG
+	{
+		rec_printer p(rec, *offsets);
+		DBUG_PRINT("ib_cur", ("update %s (" IB_ID_FMT ") by " IB_ID_FMT ": %s",
+				index->name(), index->id, trx_id,
+			p.str().c_str()));
+	}
+#endif
+
 	page_cursor = btr_cur_get_page_cur(cursor);
 
 	if (!*heap) {
@@ -2282,7 +4181,7 @@ any_extern:
 	if (page_zip) {
 		if (page_zip_rec_needs_ext(new_rec_size, page_is_comp(page),
 					   dict_index_get_n_fields(index),
-					   page_zip_get_size(page_zip))) {
+					   dict_table_page_size(index->table))) {
 			goto any_extern;
 		}
 
@@ -2295,6 +4194,13 @@ any_extern:
 		rec = page_cur_get_rec(page_cursor);
 	}
 
+	/* We limit max record size to 16k even for 64k page size. */
+	if (new_rec_size >= REC_MAX_DATA_SIZE) {
+		err = DB_OVERFLOW;
+
+		goto func_exit;
+	}
+
 	if (UNIV_UNLIKELY(new_rec_size
 			  >= (page_get_free_space_of_empty(page_is_comp(page))
 			      / 2))) {
@@ -2307,7 +4213,7 @@ any_extern:
 
 	if (UNIV_UNLIKELY(page_get_data_size(page)
 			  - old_rec_size + new_rec_size
-			  < BTR_CUR_PAGE_COMPRESS_LIMIT)) {
+			  < BTR_CUR_PAGE_COMPRESS_LIMIT(index))) {
 		/* We may need to update the IBUF_BITMAP_FREE
 		bits after a reorganize that was done in
 		btr_cur_update_alloc_zip(). */
@@ -2325,7 +4231,8 @@ any_extern:
 		   + page_get_max_insert_size_after_reorganize(page, 1));
 
 	if (!page_zip) {
-		max_ins_size = page_get_max_insert_size_after_reorganize(page, 1);
+		max_ins_size = page_get_max_insert_size_after_reorganize(
+				page, 1);
 	}
 
 	if (!(((max_size >= BTR_CUR_PAGE_REORGANIZE_LIMIT)
@@ -2358,8 +4265,9 @@ any_extern:
 	/* Ok, we may do the replacement. Store on the page infimum the
 	explicit locks on rec, before deleting rec (see the comment in
 	btr_cur_pessimistic_update). */
-
-	lock_rec_store_on_page_infimum(block, rec);
+	if (!dict_table_is_locking_disabled(index->table)) {
+		lock_rec_store_on_page_infimum(block, rec);
+	}
 
 	btr_search_update_hash_on_delete(cursor);
 
@@ -2367,7 +4275,8 @@ any_extern:
 
 	page_cur_move_to_prev(page_cursor);
 
-	if (!(flags & BTR_KEEP_SYS_FLAG)) {
+	if (!(flags & BTR_KEEP_SYS_FLAG)
+	    && !dict_table_is_intrinsic(index->table)) {
 		row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR,
 					      roll_ptr);
 		row_upd_index_entry_sys_field(new_entry, index, DATA_TRX_ID,
@@ -2380,8 +4289,9 @@ any_extern:
 	ut_a(rec); /* <- We calculated above the insert would fit */
 
 	/* Restore the old explicit lock state on the record */
-
-	lock_rec_restore_from_page_infimum(block, rec, block);
+	if (!dict_table_is_locking_disabled(index->table)) {
+		lock_rec_restore_from_page_infimum(block, rec, block);
+	}
 
 	page_cur_move_to_next(page_cursor);
 	ut_ad(err == DB_SUCCESS);
@@ -2389,8 +4299,8 @@ any_extern:
 func_exit:
 	if (!(flags & BTR_KEEP_IBUF_BITMAP)
 	    && !dict_index_is_clust(index)
-	    && page_is_leaf(page)) {
-
+	    && !dict_table_is_temporary(index->table)) {
+		/* Update the free bits in the insert buffer. */
 		if (page_zip) {
 			ibuf_update_free_bits_zip(block, mtr);
 		} else {
@@ -2398,6 +4308,12 @@ func_exit:
 		}
 	}
 
+	if (err != DB_SUCCESS) {
+		/* prefetch siblings of the leaf for the pessimistic
+		operation. */
+		btr_cur_prefetch_siblings(block);
+	}
+
 	return(err);
 }
 
@@ -2417,9 +4333,6 @@ btr_cur_pess_upd_restore_supremum(
 {
 	page_t*		page;
 	buf_block_t*	prev_block;
-	ulint		space;
-	ulint		zip_size;
-	ulint		prev_page_no;
 
 	page = buf_block_get_frame(block);
 
@@ -2429,13 +4342,12 @@ btr_cur_pess_upd_restore_supremum(
 		return;
 	}
 
-	space = buf_block_get_space(block);
-	zip_size = buf_block_get_zip_size(block);
-	prev_page_no = btr_page_get_prev(page, mtr);
+	const ulint	prev_page_no = btr_page_get_prev(page, mtr);
+
+	const page_id_t	page_id(block->page.id.space(), prev_page_no);
 
 	ut_ad(prev_page_no != FIL_NULL);
-	prev_block = buf_page_get_with_no_latch(space, zip_size,
-						prev_page_no, mtr);
+	prev_block = buf_page_get_with_no_latch(page_id, block->page.size, mtr);
 #ifdef UNIV_BTR_DEBUG
 	ut_a(btr_page_get_next(prev_block->frame, mtr)
 	     == page_get_page_no(page));
@@ -2449,46 +4361,13 @@ btr_cur_pess_upd_restore_supremum(
 					     page_rec_get_heap_no(rec));
 }
 
-/*************************************************************//**
-Check if the total length of the modified blob for the row is within 10%
-of the total redo log size.  This constraint on the blob length is to
-avoid overwriting the redo logs beyond the last checkpoint lsn.
-@return	DB_SUCCESS or DB_TOO_BIG_FOR_REDO. */
-static
-dberr_t
-btr_check_blob_limit(const big_rec_t*	big_rec_vec)
-{
-	const	ib_uint64_t redo_size = srv_n_log_files * srv_log_file_size
-		* UNIV_PAGE_SIZE;
-	const	ib_uint64_t redo_10p = redo_size / 10;
-	ib_uint64_t	total_blob_len = 0;
-	dberr_t	err = DB_SUCCESS;
-
-	/* Calculate the total number of bytes for blob data */
-	for (ulint i = 0; i < big_rec_vec->n_fields; i++) {
-		total_blob_len += big_rec_vec->fields[i].len;
-	}
-
-	if (total_blob_len > redo_10p) {
-		ib_logf(IB_LOG_LEVEL_ERROR, "The total blob data"
-			" length (" UINT64PF ") is greater than"
-			" 10%% of the total redo log size (" UINT64PF
-			"). Please increase total redo log size.",
-			total_blob_len, redo_size);
-		err = DB_TOO_BIG_FOR_REDO;
-	}
-
-	return(err);
-}
-
 /*************************************************************//**
 Performs an update of a record on a page of a tree. It is assumed
 that mtr holds an x-latch on the tree and on the cursor page. If the
 update is made on the leaf level, to avoid deadlocks, mtr must also
 own x-latches to brothers of page, if those brothers exist. We assume
 here that the ordering fields of the record do not change.
-@return	DB_SUCCESS or error code */
-UNIV_INTERN
+@return DB_SUCCESS or error code */
 dberr_t
 btr_cur_pessimistic_update(
 /*=======================*/
@@ -2506,9 +4385,10 @@ btr_cur_pessimistic_update(
 				big_rec and the index tuple */
 	big_rec_t**	big_rec,/*!< out: big rec vector whose fields have to
 				be stored externally by the caller, or NULL */
-	const upd_t*	update,	/*!< in: update vector; this is allowed also
-				contain trx id and roll ptr fields, but
-				the values in update vector have no effect */
+	upd_t*		update,	/*!< in/out: update vector; this is allowed to
+				also contain trx id and roll ptr fields.
+				Non-updated columns that are moved offpage will
+				be appended to this. */
 	ulint		cmpl_info,/*!< in: compiler info on secondary index
 				updates */
 	que_thr_t*	thr,	/*!< in: query thread */
@@ -2540,14 +4420,19 @@ btr_cur_pessimistic_update(
 	page_zip = buf_block_get_page_zip(block);
 	index = cursor->index;
 
-	ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
-				MTR_MEMO_X_LOCK));
-	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_flagged(mtr, dict_index_get_lock(index),
+					MTR_MEMO_X_LOCK |
+					MTR_MEMO_SX_LOCK)
+	      || dict_table_is_intrinsic(index->table));
+	ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
 #ifdef UNIV_ZIP_DEBUG
 	ut_a(!page_zip || page_zip_validate(page_zip, page, index));
 #endif /* UNIV_ZIP_DEBUG */
 	/* The insert buffer tree should never be updated in place. */
 	ut_ad(!dict_index_is_ibuf(index));
+	ut_ad(trx_id > 0
+	      || (flags & BTR_KEEP_SYS_FLAG)
+	      || dict_table_is_intrinsic(index->table));
 	ut_ad(dict_index_is_online_ddl(index) == !!(flags & BTR_CREATE_FLAG)
 	      || dict_index_is_clust(index));
 	ut_ad(thr_get_trx(thr)->id == trx_id
@@ -2574,43 +4459,18 @@ btr_cur_pessimistic_update(
 		if (page_zip
 		    && optim_err != DB_ZIP_OVERFLOW
 		    && !dict_index_is_clust(index)
+		    && !dict_table_is_temporary(index->table)
 		    && page_is_leaf(page)) {
 			ibuf_update_free_bits_zip(block, mtr);
 		}
 
+		if (big_rec_vec != NULL) {
+			dtuple_big_rec_free(big_rec_vec);
+		}
+
 		return(err);
 	}
 
-	/* Do lock checking and undo logging */
-	err = btr_cur_upd_lock_and_undo(flags, cursor, *offsets,
-					update, cmpl_info,
-					thr, mtr, &roll_ptr);
-	if (err != DB_SUCCESS) {
-		goto err_exit;
-	}
-
-	if (optim_err == DB_OVERFLOW) {
-		ulint	reserve_flag;
-
-		/* First reserve enough free space for the file segments
-		of the index tree, so that the update will not fail because
-		of lack of space */
-
-		ulint	n_extents = cursor->tree_height / 16 + 3;
-
-		if (flags & BTR_NO_UNDO_LOG_FLAG) {
-			reserve_flag = FSP_CLEANING;
-		} else {
-			reserve_flag = FSP_NORMAL;
-		}
-
-		if (!fsp_reserve_free_extents(&n_reserved, index->space,
-					      n_extents, reserve_flag, mtr)) {
-			err = DB_OUT_OF_FILE_SPACE;
-			goto err_exit;
-		}
-	}
-
 	rec = btr_cur_get_rec(cursor);
 
 	*offsets = rec_get_offsets(
@@ -2627,28 +4487,6 @@ btr_cur_pessimistic_update(
 	itself.  Thus the following call is safe. */
 	row_upd_index_replace_new_col_vals_index_pos(new_entry, index, update,
 						     FALSE, entry_heap);
-	if (!(flags & BTR_KEEP_SYS_FLAG)) {
-		row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR,
-					      roll_ptr);
-		row_upd_index_entry_sys_field(new_entry, index, DATA_TRX_ID,
-					      trx_id);
-	}
-
-	if ((flags & BTR_NO_UNDO_LOG_FLAG) && rec_offs_any_extern(*offsets)) {
-		/* We are in a transaction rollback undoing a row
-		update: we must free possible externally stored fields
-		which got new values in the update, if they are not
-		inherited values. They can be inherited if we have
-		updated the primary key to another value, and then
-		update it back again. */
-
-		ut_ad(big_rec_vec == NULL);
-
-		btr_rec_free_updated_extern_fields(
-			index, rec, page_zip, *offsets, update,
-			trx_is_recv(thr_get_trx(thr))
-			? RB_RECOVERY : RB_NORMAL, mtr);
-	}
 
 	/* We have to set appropriate extern storage bits in the new
 	record to be inserted: we have to remember which fields were such */
@@ -2657,21 +4495,36 @@ btr_cur_pessimistic_update(
 	ut_ad(rec_offs_validate(rec, index, *offsets));
 	n_ext += btr_push_update_extern_fields(new_entry, update, entry_heap);
 
-	if (page_zip) {
-		ut_ad(page_is_comp(page));
-		if (page_zip_rec_needs_ext(
-			    rec_get_converted_size(index, new_entry, n_ext),
-			    TRUE,
-			    dict_index_get_n_fields(index),
-			    page_zip_get_size(page_zip))) {
+	/* UNDO logging is also turned-off during normal operation on intrinsic
+	table so condition needs to ensure that table is not intrinsic. */
+	if ((flags & BTR_NO_UNDO_LOG_FLAG)
+	    && rec_offs_any_extern(*offsets)
+	    && !dict_table_is_intrinsic(index->table)) {
+		/* We are in a transaction rollback undoing a row
+		update: we must free possible externally stored fields
+		which got new values in the update, if they are not
+		inherited values. They can be inherited if we have
+		updated the primary key to another value, and then
+		update it back again. */
 
-			goto make_external;
-		}
-	} else if (page_zip_rec_needs_ext(
-			   rec_get_converted_size(index, new_entry, n_ext),
-			   page_is_comp(page), 0, 0)) {
-make_external:
-		big_rec_vec = dtuple_convert_big_rec(index, new_entry, &n_ext);
+		ut_ad(big_rec_vec == NULL);
+		ut_ad(dict_index_is_clust(index));
+		ut_ad(thr_get_trx(thr)->in_rollback);
+
+		DBUG_EXECUTE_IF("ib_blob_update_rollback", DBUG_SUICIDE(););
+		RECOVERY_CRASH(99);
+
+		btr_rec_free_updated_extern_fields(
+			index, rec, page_zip, *offsets, update, true, mtr);
+	}
+
+	if (page_zip_rec_needs_ext(
+			rec_get_converted_size(index, new_entry, n_ext),
+			page_is_comp(page),
+			dict_index_get_n_fields(index),
+			block->page.size)) {
+
+		big_rec_vec = dtuple_convert_big_rec(index, update, new_entry, &n_ext);
 		if (UNIV_UNLIKELY(big_rec_vec == NULL)) {
 
 			/* We cannot goto return_after_reservations,
@@ -2696,21 +4549,43 @@ make_external:
 		ut_ad(flags & BTR_KEEP_POS_FLAG);
 	}
 
-	if (big_rec_vec) {
+	/* Do lock checking and undo logging */
+	err = btr_cur_upd_lock_and_undo(flags, cursor, *offsets,
+					update, cmpl_info,
+					thr, mtr, &roll_ptr);
+	if (err != DB_SUCCESS) {
+		goto err_exit;
+	}
 
-		err = btr_check_blob_limit(big_rec_vec);
+	if (optim_err == DB_OVERFLOW) {
 
-		if (err != DB_SUCCESS) {
-			if (n_reserved > 0) {
-				fil_space_release_free_extents(
-					index->space, n_reserved);
-			}
+		/* First reserve enough free space for the file segments
+		of the index tree, so that the update will not fail because
+		of lack of space */
+
+		ulint	n_extents = cursor->tree_height / 16 + 3;
+
+		if (!fsp_reserve_free_extents(
+		            &n_reserved, index->space, n_extents,
+		            flags & BTR_NO_UNDO_LOG_FLAG
+		            ? FSP_CLEANING : FSP_NORMAL,
+		            mtr)) {
+			err = DB_OUT_OF_FILE_SPACE;
 			goto err_exit;
 		}
 	}
 
+	if (!(flags & BTR_KEEP_SYS_FLAG)
+	    && !dict_table_is_intrinsic(index->table)) {
+		row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR,
+					      roll_ptr);
+		row_upd_index_entry_sys_field(new_entry, index, DATA_TRX_ID,
+					      trx_id);
+	}
+
 	if (!page_zip) {
-		max_ins_size = page_get_max_insert_size_after_reorganize(page, 1);
+		max_ins_size = page_get_max_insert_size_after_reorganize(
+				page, 1);
 	}
 
 	/* Store state of explicit locks on rec on the page infimum record,
@@ -2721,8 +4596,9 @@ make_external:
 	btr_root_raise_and_insert. Therefore we cannot in the lock system
 	delete the lock structs set on the root page even if the root
 	page carries just node pointers. */
-
-	lock_rec_store_on_page_infimum(block, rec);
+	if (!dict_table_is_locking_disabled(index->table)) {
+		lock_rec_store_on_page_infimum(block, rec);
+	}
 
 	btr_search_update_hash_on_delete(cursor);
 
@@ -2741,8 +4617,10 @@ make_external:
 	if (rec) {
 		page_cursor->rec = rec;
 
-		lock_rec_restore_from_page_infimum(btr_cur_get_block(cursor),
-						   rec, block);
+		if (!dict_table_is_locking_disabled(index->table)) {
+			lock_rec_restore_from_page_infimum(
+				btr_cur_get_block(cursor), rec, block);
+		}
 
 		if (!rec_get_deleted_flag(rec, rec_offs_comp(*offsets))) {
 			/* The new inserted record owns its possible externally
@@ -2759,8 +4637,8 @@ make_external:
 					page_cursor->rec, index, *offsets);
 			}
 		} else if (!dict_index_is_clust(index)
+			   && !dict_table_is_temporary(index->table)
 			   && page_is_leaf(page)) {
-
 			/* Update the free bits in the insert buffer.
 			This is the same block which was skipped by
 			BTR_KEEP_IBUF_BITMAP. */
@@ -2772,6 +4650,18 @@ make_external:
 			}
 		}
 
+		if (!srv_read_only_mode
+		    && !big_rec_vec
+		    && page_is_leaf(page)
+		    && !dict_index_is_online_ddl(index)) {
+
+			mtr_memo_release(mtr, dict_index_get_lock(index),
+					 MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK);
+
+			/* NOTE: We cannot release root block latch here, because it
+			has segment header and already modified in most of cases.*/
+		}
+
 		err = DB_SUCCESS;
 		goto return_after_reservations;
 	} else {
@@ -2785,24 +4675,31 @@ make_external:
 		/* Out of space: reset the free bits.
 		This is the same block which was skipped by
 		BTR_KEEP_IBUF_BITMAP. */
-		if (!dict_index_is_clust(index) && page_is_leaf(page)) {
+		if (!dict_index_is_clust(index)
+		    && !dict_table_is_temporary(index->table)
+		    && page_is_leaf(page)) {
 			ibuf_reset_free_bits(block);
 		}
 	}
 
-	if (big_rec_vec) {
+	if (big_rec_vec != NULL && !dict_table_is_intrinsic(index->table)) {
 		ut_ad(page_is_leaf(page));
 		ut_ad(dict_index_is_clust(index));
 		ut_ad(flags & BTR_KEEP_POS_FLAG);
 
 		/* btr_page_split_and_insert() in
 		btr_cur_pessimistic_insert() invokes
-		mtr_memo_release(mtr, index->lock, MTR_MEMO_X_LOCK).
+		mtr_memo_release(mtr, index->lock, MTR_MEMO_SX_LOCK).
 		We must keep the index->lock when we created a
 		big_rec, so that row_upd_clust_rec() can store the
 		big_rec in the same mini-transaction. */
 
-		mtr_x_lock(dict_index_get_lock(index), mtr);
+		ut_ad(mtr_memo_contains_flagged(mtr,
+						dict_index_get_lock(index),
+						MTR_MEMO_X_LOCK |
+						MTR_MEMO_SX_LOCK));
+
+		mtr_sx_lock(dict_index_get_lock(index), mtr);
 	}
 
 	/* Was the record to be updated positioned as the first user
@@ -2826,7 +4723,12 @@ make_external:
 	ut_ad(rec_offs_validate(rec, cursor->index, *offsets));
 	page_cursor->rec = rec;
 
-	if (dict_index_is_sec_or_ibuf(index)) {
+	/* Multiple transactions cannot simultaneously operate on the
+	same temp-table in parallel.
+	max_trx_id is ignored for temp tables because it not required
+	for MVCC. */
+	if (dict_index_is_sec_or_ibuf(index)
+	    && !dict_table_is_temporary(index->table)) {
 		/* Update PAGE_MAX_TRX_ID in the index page header.
 		It was not updated by btr_cur_pessimistic_insert()
 		because of BTR_NO_LOCKING_FLAG. */
@@ -2854,15 +4756,17 @@ make_external:
 					     rec, index, *offsets, mtr);
 	}
 
-	lock_rec_restore_from_page_infimum(btr_cur_get_block(cursor),
-					   rec, block);
+	if (!dict_table_is_locking_disabled(index->table)) {
+		lock_rec_restore_from_page_infimum(
+			btr_cur_get_block(cursor), rec, block);
+	}
 
 	/* If necessary, restore also the correct lock state for a new,
 	preceding supremum record created in a page split. While the old
 	record was nonexistent, the supremum might have inherited its locks
 	from a wrong record. */
 
-	if (!was_first) {
+	if (!was_first && !dict_table_is_locking_disabled(index->table)) {
 		btr_cur_pess_upd_restore_supremum(btr_cur_get_block(cursor),
 						  rec, mtr);
 	}
@@ -2899,6 +4803,7 @@ btr_cur_del_mark_set_clust_rec_log(
 	byte*	log_ptr;
 
 	ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
+	ut_ad(mtr->is_named_space(index->space));
 
 	log_ptr = mlog_open_and_write_index(mtr, rec, index,
 					    page_rec_is_comp(rec)
@@ -2927,8 +4832,7 @@ btr_cur_del_mark_set_clust_rec_log(
 /****************************************************************//**
 Parses the redo log record for delete marking or unmarking of a clustered
 index record.
-@return	end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
 byte*
 btr_cur_parse_del_mark_set_clust_rec(
 /*=================================*/
@@ -2979,7 +4883,7 @@ btr_cur_parse_del_mark_set_clust_rec(
 	if (page) {
 		rec = page + offset;
 
-		/* We do not need to reserve btr_search_latch, as the page
+		/* We do not need to reserve search latch, as the page
 		is only being recovered, and there cannot be a hash index to
 		it. Besides, these fields are being updated in place
 		and the adaptive hash index does not depend on them. */
@@ -3011,16 +4915,18 @@ Marks a clustered index record deleted. Writes an undo log record to
 undo log on this delete marking. Writes in the trx id field the id
 of the deleting transaction, and in the roll ptr field pointer to the
 undo log record created.
-@return	DB_SUCCESS, DB_LOCK_WAIT, or error number */
-UNIV_INTERN
+@return DB_SUCCESS, DB_LOCK_WAIT, or error number */
 dberr_t
 btr_cur_del_mark_set_clust_rec(
 /*===========================*/
+	ulint		flags,  /*!< in: undo logging and locking flags */
 	buf_block_t*	block,	/*!< in/out: buffer block of the record */
 	rec_t*		rec,	/*!< in/out: record */
 	dict_index_t*	index,	/*!< in: clustered index of the record */
 	const ulint*	offsets,/*!< in: rec_get_offsets(rec) */
 	que_thr_t*	thr,	/*!< in: query thread */
+	const dtuple_t*	entry,	/*!< in: dtuple for the deleting record, also
+				contains the virtual cols if there are any */
 	mtr_t*		mtr)	/*!< in/out: mini-transaction */
 {
 	roll_ptr_t	roll_ptr;
@@ -3033,16 +4939,13 @@ btr_cur_del_mark_set_clust_rec(
 	ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
 	ut_ad(buf_block_get_frame(block) == page_align(rec));
 	ut_ad(page_is_leaf(page_align(rec)));
+	ut_ad(mtr->is_named_space(index->space));
 
-#ifdef UNIV_DEBUG
-	if (btr_cur_print_record_ops && (thr != NULL)) {
-		btr_cur_trx_report(thr_get_trx(thr)->id, index, "del mark ");
-		rec_print_new(stderr, rec, offsets);
+	if (rec_get_deleted_flag(rec, rec_offs_comp(offsets))) {
+		/* While cascading delete operations, this becomes possible. */
+		ut_ad(rec_get_trx_id(rec, index) == thr_get_trx(thr)->id);
+		return(DB_SUCCESS);
 	}
-#endif /* UNIV_DEBUG */
-
-	ut_ad(dict_index_is_clust(index));
-	ut_ad(!rec_get_deleted_flag(rec, rec_offs_comp(offsets)));
 
 	err = lock_clust_rec_modify_check_and_lock(BTR_NO_LOCKING_FLAG, block,
 						   rec, index, offsets, thr);
@@ -3052,27 +4955,46 @@ btr_cur_del_mark_set_clust_rec(
 		return(err);
 	}
 
-	err = trx_undo_report_row_operation(0, TRX_UNDO_MODIFY_OP, thr,
-					    index, NULL, NULL, 0, rec, offsets,
+	err = trx_undo_report_row_operation(flags, TRX_UNDO_MODIFY_OP, thr,
+					    index, entry, NULL, 0, rec, offsets,
 					    &roll_ptr);
 	if (err != DB_SUCCESS) {
 
 		return(err);
 	}
 
-	/* The btr_search_latch is not needed here, because
+	/* The search latch is not needed here, because
 	the adaptive hash index does not depend on the delete-mark
 	and the delete-mark is being updated in place. */
 
 	page_zip = buf_block_get_page_zip(block);
 
-	btr_blob_dbg_set_deleted_flag(rec, index, offsets, TRUE);
 	btr_rec_set_deleted_flag(rec, page_zip, TRUE);
 
+	/* For intrinsic table, roll-ptr is not maintained as there is no UNDO
+	logging. Skip updating it. */
+	if (dict_table_is_intrinsic(index->table)) {
+		return(err);
+	}
+
 	trx = thr_get_trx(thr);
+	/* This function must not be invoked during rollback
+	(of a TRX_STATE_PREPARE transaction or otherwise). */
+	ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
+	ut_ad(!trx->in_rollback);
+
+#ifdef UNIV_DEBUG
+	{
+		rec_printer p(rec, offsets);
+		DBUG_PRINT("ib_cur", ("delete-mark clust %s (" IB_ID_FMT ") by " IB_ID_FMT ": %s",
+				index->table_name, index->id,
+				trx_get_id_for_print(trx),
+				p.str().c_str()));
+	}
+#endif
 
 	if (dict_index_is_online_ddl(index)) {
-		row_log_table_delete(rec, index, offsets, NULL);
+		row_log_table_delete(rec, entry, index, offsets, NULL);
 	}
 
 	row_upd_rec_sys_fields(rec, page_zip, index, offsets, trx, roll_ptr);
@@ -3120,8 +5042,7 @@ btr_cur_del_mark_set_sec_rec_log(
 /****************************************************************//**
 Parses the redo log record for delete marking or unmarking of a secondary
 index record.
-@return	end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
 byte*
 btr_cur_parse_del_mark_set_sec_rec(
 /*===============================*/
@@ -3150,7 +5071,7 @@ btr_cur_parse_del_mark_set_sec_rec(
 	if (page) {
 		rec = page + offset;
 
-		/* We do not need to reserve btr_search_latch, as the page
+		/* We do not need to reserve search latch, as the page
 		is only being recovered, and there cannot be a hash index to
 		it. Besides, the delete-mark flag is being updated in place
 		and the adaptive hash index does not depend on it. */
@@ -3164,8 +5085,7 @@ btr_cur_parse_del_mark_set_sec_rec(
 #ifndef UNIV_HOTBACKUP
 /***********************************************************//**
 Sets a secondary index record delete mark to TRUE or FALSE.
-@return	DB_SUCCESS, DB_LOCK_WAIT, or error number */
-UNIV_INTERN
+@return DB_SUCCESS, DB_LOCK_WAIT, or error number */
 dberr_t
 btr_cur_del_mark_set_sec_rec(
 /*=========================*/
@@ -3182,14 +5102,6 @@ btr_cur_del_mark_set_sec_rec(
 	block = btr_cur_get_block(cursor);
 	rec = btr_cur_get_rec(cursor);
 
-#ifdef UNIV_DEBUG
-	if (btr_cur_print_record_ops && (thr != NULL)) {
-		btr_cur_trx_report(thr_get_trx(thr)->id, cursor->index,
-				   "del mark ");
-		rec_print(stderr, rec, cursor->index);
-	}
-#endif /* UNIV_DEBUG */
-
 	err = lock_sec_rec_modify_check_and_lock(flags,
 						 btr_cur_get_block(cursor),
 						 rec, cursor->index, thr, mtr);
@@ -3201,7 +5113,15 @@ btr_cur_del_mark_set_sec_rec(
 	ut_ad(!!page_rec_is_comp(rec)
 	      == dict_table_is_comp(cursor->index->table));
 
-	/* We do not need to reserve btr_search_latch, as the
+	DBUG_PRINT("ib_cur", ("delete-mark=%u sec %u:%u:%u in %s("
+			      IB_ID_FMT ") by " TRX_ID_FMT,
+			      unsigned(val),
+			      block->page.id.space(), block->page.id.page_no(),
+			      unsigned(page_rec_get_heap_no(rec)),
+			      cursor->index->name(), cursor->index->id,
+			      trx_get_id_for_print(thr_get_trx(thr))));
+
+	/* We do not need to reserve search latch, as the
 	delete-mark flag is being updated in place and the adaptive
 	hash index does not depend on it. */
 	btr_rec_set_deleted_flag(rec, buf_block_get_page_zip(block), val);
@@ -3214,7 +5134,6 @@ btr_cur_del_mark_set_sec_rec(
 /***********************************************************//**
 Sets a secondary index record's delete mark to the given value. This
 function is only used by the insert buffer merge mechanism. */
-UNIV_INTERN
 void
 btr_cur_set_deleted_flag_for_ibuf(
 /*==============================*/
@@ -3226,7 +5145,7 @@ btr_cur_set_deleted_flag_for_ibuf(
 	ibool		val,		/*!< in: value to set */
 	mtr_t*		mtr)		/*!< in/out: mini-transaction */
 {
-	/* We do not need to reserve btr_search_latch, as the page
+	/* We do not need to reserve search latch, as the page
 	has just been read to the buffer pool and there cannot be
 	a hash index to it.  Besides, the delete-mark flag is being
 	updated in place and the adaptive hash index does not depend
@@ -3245,8 +5164,7 @@ that mtr holds an x-latch on the tree and on the cursor page. To avoid
 deadlocks, mtr must also own x-latches to brothers of page, if those
 brothers exist. NOTE: it is assumed that the caller has reserved enough
 free extents so that the compression will always succeed if done!
-@return	TRUE if compression occurred */
-UNIV_INTERN
+@return TRUE if compression occurred */
 ibool
 btr_cur_compress_if_useful(
 /*=======================*/
@@ -3257,11 +5175,34 @@ btr_cur_compress_if_useful(
 				cursor position even if compression occurs */
 	mtr_t*		mtr)	/*!< in/out: mini-transaction */
 {
-	ut_ad(mtr_memo_contains(mtr,
-				dict_index_get_lock(btr_cur_get_index(cursor)),
-				MTR_MEMO_X_LOCK));
-	ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
-				MTR_MEMO_PAGE_X_FIX));
+	/* Avoid applying compression as we don't accept lot of page garbage
+	given the workload of intrinsic table. */
+	if (dict_table_is_intrinsic(cursor->index->table)) {
+		return(FALSE);
+	}
+
+	ut_ad(mtr_memo_contains_flagged(
+		mtr, dict_index_get_lock(btr_cur_get_index(cursor)),
+		MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK)
+	      || dict_table_is_intrinsic(cursor->index->table));
+	ut_ad(mtr_is_block_fix(
+		mtr, btr_cur_get_block(cursor),
+		MTR_MEMO_PAGE_X_FIX, cursor->index->table));
+
+	if (dict_index_is_spatial(cursor->index)) {
+		const page_t*   page = btr_cur_get_page(cursor);
+		const trx_t*	trx = NULL;
+
+		if (cursor->rtr_info->thr != NULL) {
+			trx = thr_get_trx(cursor->rtr_info->thr);
+		}
+
+		/* Check whether page lock prevents the compression */
+		if (!lock_test_prdt_page_lock(trx, page_get_space_id(page),
+					      page_get_page_no(page))) {
+			return(false);
+		}
+	}
 
 	return(btr_cur_compress_recommendation(cursor, mtr)
 	       && btr_compress(cursor, adjust, mtr));
@@ -3271,8 +5212,7 @@ btr_cur_compress_if_useful(
 Removes the record on which the tree cursor is positioned on a leaf page.
 It is assumed that the mtr has an x-latch on the page where the cursor is
 positioned, but no latch on the whole tree.
-@return	TRUE if success, i.e., the page did not become too empty */
-UNIV_INTERN
+@return TRUE if success, i.e., the page did not become too empty */
 ibool
 btr_cur_optimistic_delete_func(
 /*===========================*/
@@ -3299,6 +5239,10 @@ btr_cur_optimistic_delete_func(
 	ut_ad(flags == 0 || flags == BTR_CREATE_FLAG);
 	ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
 				MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_is_block_fix(mtr, btr_cur_get_block(cursor),
+			       MTR_MEMO_PAGE_X_FIX, cursor->index->table));
+	ut_ad(mtr->is_named_space(cursor->index->space));
+
 	/* This is intended only for leaf page deletions */
 
 	block = btr_cur_get_block(cursor);
@@ -3351,12 +5295,16 @@ btr_cur_optimistic_delete_func(
 			/* The change buffer does not handle inserts
 			into non-leaf pages, into clustered indexes,
 			or into the change buffer. */
-			if (page_is_leaf(page)
-			    && !dict_index_is_clust(cursor->index)
+			if (!dict_index_is_clust(cursor->index)
+			    && !dict_table_is_temporary(cursor->index->table)
 			    && !dict_index_is_ibuf(cursor->index)) {
 				ibuf_update_free_bits_low(block, max_ins, mtr);
 			}
 		}
+	} else {
+		/* prefetch siblings of the leaf for the pessimistic
+		operation. */
+		btr_cur_prefetch_siblings(block);
 	}
 
 	if (UNIV_LIKELY_NULL(heap)) {
@@ -3373,8 +5321,8 @@ or if it is the only page on the level. It is assumed that mtr holds
 an x-latch on the tree and on the cursor page. To avoid deadlocks,
 mtr must also own x-latches to brothers of page, if those brothers
 exist.
-@return	TRUE if compression occurred */
-UNIV_INTERN
+@return TRUE if compression occurred and FALSE if not or something
+wrong. */
 ibool
 btr_cur_pessimistic_delete(
 /*=======================*/
@@ -3392,7 +5340,7 @@ btr_cur_pessimistic_delete(
 				stays valid: it points to successor of
 				deleted record on function exit */
 	ulint		flags,	/*!< in: BTR_CREATE_FLAG or 0 */
-	enum trx_rb_ctx	rb_ctx,	/*!< in: rollback context */
+	bool		rollback,/*!< in: performing rollback? */
 	mtr_t*		mtr)	/*!< in: mtr */
 {
 	buf_block_t*	block;
@@ -3401,11 +5349,14 @@ btr_cur_pessimistic_delete(
 	dict_index_t*	index;
 	rec_t*		rec;
 	ulint		n_reserved	= 0;
-	ibool		success;
+	bool		success;
 	ibool		ret		= FALSE;
 	ulint		level;
 	mem_heap_t*	heap;
 	ulint*		offsets;
+#ifdef UNIV_DEBUG
+	bool		parent_latched	= false;
+#endif /* UNIV_DEBUG */
 
 	block = btr_cur_get_block(cursor);
 	page = buf_block_get_frame(block);
@@ -3415,9 +5366,13 @@ btr_cur_pessimistic_delete(
 	ut_ad(!dict_index_is_online_ddl(index)
 	      || dict_index_is_clust(index)
 	      || (flags & BTR_CREATE_FLAG));
-	ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
-				MTR_MEMO_X_LOCK));
-	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_flagged(mtr, dict_index_get_lock(index),
+					MTR_MEMO_X_LOCK
+					| MTR_MEMO_SX_LOCK)
+	      || dict_table_is_intrinsic(index->table));
+	ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
+	ut_ad(mtr->is_named_space(index->space));
+
 	if (!has_reserved_extents) {
 		/* First reserve enough free space for the file segments
 		of the index tree, so that the node pointer updates will
@@ -3448,7 +5403,7 @@ btr_cur_pessimistic_delete(
 	if (rec_offs_any_extern(offsets)) {
 		btr_rec_free_externally_stored_fields(index,
 						      rec, offsets, page_zip,
-						      rb_ctx, mtr);
+						      rollback, mtr);
 #ifdef UNIV_ZIP_DEBUG
 		ut_a(!page_zip || page_zip_validate(page_zip, page, index));
 #endif /* UNIV_ZIP_DEBUG */
@@ -3456,7 +5411,7 @@ btr_cur_pessimistic_delete(
 
 	if (UNIV_UNLIKELY(page_get_n_recs(page) < 2)
 	    && UNIV_UNLIKELY(dict_index_get_page(index)
-			     != buf_block_get_page_no(block))) {
+			     != block->page.id.page_no())) {
 
 		/* If there is only one record, drop the whole page in
 		btr_discard_page, if this is not the root page */
@@ -3492,20 +5447,55 @@ btr_cur_pessimistic_delete(
 			mini-transaction and because writing to the redo log
 			is an atomic operation (performed by mtr_commit()). */
 			btr_set_min_rec_mark(next_rec, mtr);
+		} else if (dict_index_is_spatial(index)) {
+			/* For rtree, if delete the leftmost node pointer,
+			we need to update parent page. */
+			rtr_mbr_t	father_mbr;
+			rec_t*		father_rec;
+			btr_cur_t	father_cursor;
+			ulint*		offsets;
+			bool		upd_ret;
+			ulint		len;
+
+			rtr_page_get_father_block(NULL, heap, index,
+						  block, mtr, NULL,
+						  &father_cursor);
+			offsets = rec_get_offsets(
+				btr_cur_get_rec(&father_cursor), index,
+				NULL, ULINT_UNDEFINED, &heap);
+
+			father_rec = btr_cur_get_rec(&father_cursor);
+			rtr_read_mbr(rec_get_nth_field(
+				father_rec, offsets, 0, &len), &father_mbr);
+
+			upd_ret = rtr_update_mbr_field(&father_cursor, offsets,
+						       NULL, page, &father_mbr,
+						       next_rec, mtr);
+
+			if (!upd_ret) {
+				*err = DB_ERROR;
+
+				mem_heap_free(heap);
+				return(FALSE);
+			}
+
+			ut_d(parent_latched = true);
 		} else {
 			/* Otherwise, if we delete the leftmost node pointer
-			on a page, we have to change the father node pointer
+			on a page, we have to change the parent node pointer
 			so that it is equal to the new leftmost node pointer
 			on the page */
 
 			btr_node_ptr_delete(index, block, mtr);
 
 			dtuple_t*	node_ptr = dict_index_build_node_ptr(
-				index, next_rec, buf_block_get_page_no(block),
+				index, next_rec, block->page.id.page_no(),
 				heap, level);
 
 			btr_insert_on_non_leaf_level(
 				flags, index, level + 1, node_ptr, mtr);
+
+			ut_d(parent_latched = true);
 		}
 	}
 
@@ -3516,7 +5506,8 @@ btr_cur_pessimistic_delete(
 	ut_a(!page_zip || page_zip_validate(page_zip, page, index));
 #endif /* UNIV_ZIP_DEBUG */
 
-	ut_ad(btr_check_node_ptr(index, block, mtr));
+	/* btr_check_node_ptr() needs parent block latched */
+	ut_ad(!parent_latched || btr_check_node_ptr(index, block, mtr));
 
 return_after_reservations:
 	*err = DB_SUCCESS;
@@ -3527,6 +5518,17 @@ return_after_reservations:
 		ret = btr_cur_compress_if_useful(cursor, FALSE, mtr);
 	}
 
+	if (!srv_read_only_mode
+	    && page_is_leaf(page)
+	    && !dict_index_is_online_ddl(index)) {
+
+		mtr_memo_release(mtr, dict_index_get_lock(index),
+				 MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK);
+
+		/* NOTE: We cannot release root block latch here, because it
+		has segment header and already modified in most of cases.*/
+	}
+
 	if (n_reserved > 0) {
 		fil_space_release_free_extents(index->space, n_reserved);
 	}
@@ -3590,63 +5592,63 @@ so far and assume that all pages that we did not scan up to slot2->page
 contain the same number of records, then we multiply that average to
 the number of pages between slot1->page and slot2->page (which is
 n_rows_on_prev_level). In this case we set is_n_rows_exact to FALSE.
-@return	number of rows (exact or estimated) */
+@return number of rows, not including the borders (exact or estimated) */
 static
-ib_int64_t
+int64_t
 btr_estimate_n_rows_in_range_on_level(
 /*==================================*/
 	dict_index_t*	index,			/*!< in: index */
 	btr_path_t*	slot1,			/*!< in: left border */
 	btr_path_t*	slot2,			/*!< in: right border */
-	ib_int64_t	n_rows_on_prev_level,	/*!< in: number of rows
+	int64_t		n_rows_on_prev_level,	/*!< in: number of rows
 						on the previous level for the
 						same descend paths; used to
-						determine the numbe of pages
+						determine the number of pages
 						on this level */
 	ibool*		is_n_rows_exact)	/*!< out: TRUE if the returned
 						value is exact i.e. not an
 						estimation */
 {
-	ulint		space;
-	ib_int64_t	n_rows;
+	int64_t		n_rows;
 	ulint		n_pages_read;
-	ulint		page_no;
-	ulint		zip_size;
 	ulint		level;
 
-	space = dict_index_get_space(index);
-
 	n_rows = 0;
 	n_pages_read = 0;
 
 	/* Assume by default that we will scan all pages between
-	slot1->page_no and slot2->page_no */
+	slot1->page_no and slot2->page_no. */
 	*is_n_rows_exact = TRUE;
 
-	/* add records from slot1->page_no which are to the right of
-	the record which serves as a left border of the range, if any */
-	if (slot1->nth_rec < slot1->n_recs) {
+	/* Add records from slot1->page_no which are to the right of
+	the record which serves as a left border of the range, if any
+	(we don't include the record itself in this count). */
+	if (slot1->nth_rec <= slot1->n_recs) {
 		n_rows += slot1->n_recs - slot1->nth_rec;
 	}
 
-	/* add records from slot2->page_no which are to the left of
-	the record which servers as a right border of the range, if any */
+	/* Add records from slot2->page_no which are to the left of
+	the record which servers as a right border of the range, if any
+	(we don't include the record itself in this count). */
 	if (slot2->nth_rec > 1) {
 		n_rows += slot2->nth_rec - 1;
 	}
 
-	/* count the records in the pages between slot1->page_no and
-	slot2->page_no (non inclusive), if any */
-
-	zip_size = fil_space_get_zip_size(space);
+	/* Count the records in the pages between slot1->page_no and
+	slot2->page_no (non inclusive), if any. */
 
 	/* Do not read more than this number of pages in order not to hurt
 	performance with this code which is just an estimation. If we read
 	this many pages before reaching slot2->page_no then we estimate the
-	average from the pages scanned so far */
+	average from the pages scanned so far. */
 #	define N_PAGES_READ_LIMIT	10
 
-	page_no = slot1->page_no;
+	page_id_t		page_id(
+		dict_index_get_space(index), slot1->page_no);
+	const fil_space_t*	space = fil_space_get(index->space);
+	ut_ad(space);
+	const page_size_t	page_size(space->flags);
+
 	level = slot1->page_level;
 
 	do {
@@ -3662,7 +5664,7 @@ btr_estimate_n_rows_in_range_on_level(
 		attempting to read a page that is no longer part of
 		the B-tree. We pass BUF_GET_POSSIBLY_FREED in order to
 		silence a debug assertion about this. */
-		block = buf_page_get_gen(space, zip_size, page_no, RW_S_LATCH,
+		block = buf_page_get_gen(page_id, page_size, RW_S_LATCH,
 					 NULL, BUF_GET_POSSIBLY_FREED,
 					 __FILE__, __LINE__, &mtr, &err);
 
@@ -3690,7 +5692,7 @@ btr_estimate_n_rows_in_range_on_level(
 		this is only an estimate. We are sure that a page with
 		page_no exists because InnoDB never frees pages, only
 		reuses them. */
-		if (fil_page_get_type(page) != FIL_PAGE_INDEX
+		if (!fil_page_index_page_check(page)
 		    || btr_page_get_index_id(page) != index->id
 		    || btr_page_get_level_low(page) != level) {
 
@@ -3708,18 +5710,18 @@ btr_estimate_n_rows_in_range_on_level(
 
 		n_pages_read++;
 
-		if (page_no != slot1->page_no) {
+		if (page_id.page_no() != slot1->page_no) {
 			/* Do not count the records on slot1->page_no,
 			we already counted them before this loop. */
 			n_rows += page_get_n_recs(page);
 		}
 
-		page_no = btr_page_get_next(page, &mtr);
+		page_id.set_page_no(btr_page_get_next(page, &mtr));
 
 		mtr_commit(&mtr);
 
 		if (n_pages_read == N_PAGES_READ_LIMIT
-		    || page_no == FIL_NULL) {
+		    || page_id.page_no() == FIL_NULL) {
 			/* Either we read too many pages or
 			we reached the end of the level without passing
 			through slot2->page_no, the tree must have changed
@@ -3727,7 +5729,7 @@ btr_estimate_n_rows_in_range_on_level(
 			goto inexact;
 		}
 
-	} while (page_no != slot2->page_no);
+	} while (page_id.page_no() != slot2->page_no);
 
 	return(n_rows);
 
@@ -3752,19 +5754,40 @@ inexact:
 	return(n_rows);
 }
 
-/*******************************************************************//**
-Estimates the number of rows in a given index range.
-@return	estimated number of rows */
-UNIV_INTERN
-ib_int64_t
-btr_estimate_n_rows_in_range(
-/*=========================*/
-	dict_index_t*	index,	/*!< in: index */
-	const dtuple_t*	tuple1,	/*!< in: range start, may also be empty tuple */
-	ulint		mode1,	/*!< in: search mode for range start */
-	const dtuple_t*	tuple2,	/*!< in: range end, may also be empty tuple */
-	ulint		mode2,	/*!< in: search mode for range end */
-	trx_t*		trx)	/*!< in: trx */
+/** If the tree gets changed too much between the two dives for the left
+and right boundary then btr_estimate_n_rows_in_range_low() will retry
+that many times before giving up and returning the value stored in
+rows_in_range_arbitrary_ret_val. */
+static const unsigned	rows_in_range_max_retries = 4;
+
+/** We pretend that a range has that many records if the tree keeps changing
+for rows_in_range_max_retries retries while we try to estimate the records
+in a given range. */
+static const int64_t	rows_in_range_arbitrary_ret_val = 10;
+
+/** Estimates the number of rows in a given index range.
+@param[in]	index		index
+@param[in]	tuple1		range start, may also be empty tuple
+@param[in]	mode1		search mode for range start
+@param[in]	tuple2		range end, may also be empty tuple
+@param[in]	mode2		search mode for range end
+@param[in]	nth_attempt	if the tree gets modified too much while
+we are trying to analyze it, then we will retry (this function will call
+itself, incrementing this parameter)
+@return estimated number of rows; if after rows_in_range_max_retries
+retries the tree keeps changing, then we will just return
+rows_in_range_arbitrary_ret_val as a result (if
+nth_attempt >= rows_in_range_max_retries and the tree is modified between
+the two dives). */
+static
+int64_t
+btr_estimate_n_rows_in_range_low(
+	dict_index_t*	index,
+	const dtuple_t*	tuple1,
+	page_cur_mode_t	mode1,
+	const dtuple_t*	tuple2,
+	page_cur_mode_t	mode2,
+	unsigned	nth_attempt)
 {
 	btr_path_t	path1[BTR_PATH_ARRAY_N_SLOTS];
 	btr_path_t	path2[BTR_PATH_ARRAY_N_SLOTS];
@@ -3774,60 +5797,157 @@ btr_estimate_n_rows_in_range(
 	ibool		diverged;
 	ibool		diverged_lot;
 	ulint		divergence_level;
-	ib_int64_t	n_rows;
+	int64_t		n_rows;
 	ibool		is_n_rows_exact;
 	ulint		i;
 	mtr_t		mtr;
-	ib_int64_t	table_n_rows;
+	int64_t		table_n_rows;
 
 	table_n_rows = dict_table_get_n_rows(index->table);
 
-	mtr_start_trx(&mtr, trx);
+	/* Below we dive to the two records specified by tuple1 and tuple2 and
+	we remember the entire dive paths from the tree root. The place where
+	the tuple1 path ends on the leaf level we call "left border" of our
+	interval and the place where the tuple2 path ends on the leaf level -
+	"right border". We take care to either include or exclude the interval
+	boundaries depending on whether <, <=, > or >= was specified. For
+	example if "5 < x AND x <= 10" then we should not include the left
+	boundary, but should include the right one. */
+
+	mtr_start(&mtr);
 
 	cursor.path_arr = path1;
 
+	bool	should_count_the_left_border;
+
 	if (dtuple_get_n_fields(tuple1) > 0) {
 
 		btr_cur_search_to_nth_level(index, 0, tuple1, mode1,
 					    BTR_SEARCH_LEAF | BTR_ESTIMATE,
 					    &cursor, 0,
 					    __FILE__, __LINE__, &mtr);
+
+		ut_ad(!page_rec_is_infimum(btr_cur_get_rec(&cursor)));
+
+		/* We should count the border if there are any records to
+		match the criteria, i.e. if the maximum record on the tree is
+		5 and x > 3 is specified then the cursor will be positioned at
+		5 and we should count the border, but if x > 7 is specified,
+		then the cursor will be positioned at 'sup' on the rightmost
+		leaf page in the tree and we should not count the border. */
+		should_count_the_left_border
+			= !page_rec_is_supremum(btr_cur_get_rec(&cursor));
 	} else {
-		btr_cur_open_at_index_side(true, index,
+		dberr_t err = DB_SUCCESS;
+
+		err = btr_cur_open_at_index_side(true, index,
 					   BTR_SEARCH_LEAF | BTR_ESTIMATE,
 					   &cursor, 0, &mtr);
+
+		if (err != DB_SUCCESS) {
+			ib::warn() << " Error code: " << err
+				   << " btr_estimate_n_rows_in_range_low "
+				   << " called from file: "
+				   << __FILE__ << " line: " << __LINE__
+				   << " table: " << index->table->name
+				   << " index: " << index->name;
+		}
+
+		ut_ad(page_rec_is_infimum(btr_cur_get_rec(&cursor)));
+
+		/* The range specified is wihout a left border, just
+		'x < 123' or 'x <= 123' and btr_cur_open_at_index_side()
+		positioned the cursor on the infimum record on the leftmost
+		page, which must not be counted. */
+		should_count_the_left_border = false;
 	}
 
 	mtr_commit(&mtr);
 
-	mtr_start_trx(&mtr, trx);
+	mtr_start(&mtr);
 
 	cursor.path_arr = path2;
 
+	bool	should_count_the_right_border;
+
 	if (dtuple_get_n_fields(tuple2) > 0) {
 
 		btr_cur_search_to_nth_level(index, 0, tuple2, mode2,
 					    BTR_SEARCH_LEAF | BTR_ESTIMATE,
 					    &cursor, 0,
 					    __FILE__, __LINE__, &mtr);
+
+		const rec_t*	rec = btr_cur_get_rec(&cursor);
+
+		ut_ad(!(mode2 == PAGE_CUR_L && page_rec_is_supremum(rec)));
+
+		should_count_the_right_border
+			= (mode2 == PAGE_CUR_LE /* if the range is '<=' */
+			   /* and the record was found */
+			   && cursor.low_match >= dtuple_get_n_fields(tuple2))
+			|| (mode2 == PAGE_CUR_L /* or if the range is '<' */
+			    /* and there are any records to match the criteria,
+			    i.e. if the minimum record on the tree is 5 and
+			    x < 7 is specified then the cursor will be
+			    positioned at 5 and we should count the border, but
+			    if x < 2 is specified, then the cursor will be
+			    positioned at 'inf' and we should not count the
+			    border */
+			    && !page_rec_is_infimum(rec));
+		/* Notice that for "WHERE col <= 'foo'" MySQL passes to
+		ha_innobase::records_in_range():
+		min_key=NULL (left-unbounded) which is expected
+		max_key='foo' flag=HA_READ_AFTER_KEY (PAGE_CUR_G), which is
+		unexpected - one would expect
+		flag=HA_READ_KEY_OR_PREV (PAGE_CUR_LE). In this case the
+		cursor will be positioned on the first record to the right of
+		the requested one (can also be positioned on the 'sup') and
+		we should not count the right border. */
 	} else {
-		btr_cur_open_at_index_side(false, index,
+		dberr_t err = DB_SUCCESS;
+
+		err = btr_cur_open_at_index_side(false, index,
 					   BTR_SEARCH_LEAF | BTR_ESTIMATE,
 					   &cursor, 0, &mtr);
+
+		if (err != DB_SUCCESS) {
+			ib::warn() << " Error code: " << err
+				   << " btr_estimate_n_rows_in_range_low "
+				   << " called from file: "
+				   << __FILE__ << " line: " << __LINE__
+				   << " table: " << index->table->name
+				   << " index: " << index->name;
+		}
+
+
+		ut_ad(page_rec_is_supremum(btr_cur_get_rec(&cursor)));
+
+		/* The range specified is wihout a right border, just
+		'x > 123' or 'x >= 123' and btr_cur_open_at_index_side()
+		positioned the cursor on the supremum record on the rightmost
+		page, which must not be counted. */
+		should_count_the_right_border = false;
 	}
 
 	mtr_commit(&mtr);
 
 	/* We have the path information for the range in path1 and path2 */
 
-	n_rows = 1;
+	n_rows = 0;
 	is_n_rows_exact = TRUE;
-	diverged = FALSE;	    /* This becomes true when the path is not
-				    the same any more */
-	diverged_lot = FALSE;	    /* This becomes true when the paths are
-				    not the same or adjacent any more */
-	divergence_level = 1000000; /* This is the level where paths diverged
-				    a lot */
+
+	/* This becomes true when the two paths do not pass through the
+	same pages anymore. */
+	diverged = FALSE;
+
+	/* This becomes true when the paths are not the same or adjacent
+	any more. This means that they pass through the same or
+	neighboring-on-the-same-level pages only. */
+	diverged_lot = FALSE;
+
+	/* This is the level where paths diverged a lot. */
+	divergence_level = 1000000;
+
 	for (i = 0; ; i++) {
 		ut_ad(i < BTR_PATH_ARRAY_N_SLOTS);
 
@@ -3837,6 +5957,70 @@ btr_estimate_n_rows_in_range(
 		if (slot1->nth_rec == ULINT_UNDEFINED
 		    || slot2->nth_rec == ULINT_UNDEFINED) {
 
+			/* Here none of the borders were counted. For example,
+			if on the leaf level we descended to:
+			(inf, a, b, c, d, e, f, sup)
+			         ^        ^
+			       path1    path2
+			then n_rows will be 2 (c and d). */
+
+			if (is_n_rows_exact) {
+				/* Only fiddle to adjust this off-by-one
+				if the number is exact, otherwise we do
+				much grosser adjustments below. */
+
+				btr_path_t*	last1 = &path1[i - 1];
+				btr_path_t*	last2 = &path2[i - 1];
+
+				/* If both paths end up on the same record on
+				the leaf level. */
+				if (last1->page_no == last2->page_no
+				    && last1->nth_rec == last2->nth_rec) {
+
+					/* n_rows can be > 0 here if the paths
+					were first different and then converged
+					to the same record on the leaf level.
+					For example:
+					SELECT ... LIKE 'wait/synch/rwlock%'
+					mode1=PAGE_CUR_GE,
+					tuple1="wait/synch/rwlock"
+					path1[0]={nth_rec=58, n_recs=58,
+						  page_no=3, page_level=1}
+					path1[1]={nth_rec=56, n_recs=55,
+						  page_no=119, page_level=0}
+
+					mode2=PAGE_CUR_G
+					tuple2="wait/synch/rwlock"
+					path2[0]={nth_rec=57, n_recs=57,
+						  page_no=3, page_level=1}
+					path2[1]={nth_rec=56, n_recs=55,
+						  page_no=119, page_level=0} */
+
+					/* If the range is such that we should
+					count both borders, then avoid
+					counting that record twice - once as a
+					left border and once as a right
+					border. */
+					if (should_count_the_left_border
+					    && should_count_the_right_border) {
+
+						n_rows = 1;
+					} else {
+						/* Some of the borders should
+						not be counted, e.g. [3,3). */
+						n_rows = 0;
+					}
+				} else {
+					if (should_count_the_left_border) {
+						n_rows++;
+					}
+
+					if (should_count_the_right_border) {
+						n_rows++;
+					}
+				}
+			}
+
 			if (i > divergence_level + 1 && !is_n_rows_exact) {
 				/* In trees whose height is > 1 our algorithm
 				tends to underestimate: multiply the estimate
@@ -3868,12 +6052,41 @@ btr_estimate_n_rows_in_range(
 
 		if (!diverged && slot1->nth_rec != slot2->nth_rec) {
 
+			/* If both slots do not point to the same page,
+			this means that the tree must have changed between
+			the dive for slot1 and the dive for slot2 at the
+			beginning of this function. */
+			if (slot1->page_no != slot2->page_no
+			    || slot1->page_level != slot2->page_level) {
+
+				/* If the tree keeps changing even after a
+				few attempts, then just return some arbitrary
+				number. */
+				if (nth_attempt >= rows_in_range_max_retries) {
+					return(rows_in_range_arbitrary_ret_val);
+				}
+
+				const int64_t	ret =
+					btr_estimate_n_rows_in_range_low(
+						index, tuple1, mode1,
+						tuple2, mode2, nth_attempt + 1);
+
+				return(ret);
+			}
+
 			diverged = TRUE;
 
 			if (slot1->nth_rec < slot2->nth_rec) {
-				n_rows = slot2->nth_rec - slot1->nth_rec;
+				/* We do not count the borders (nor the left
+				nor the right one), thus "- 1". */
+				n_rows = slot2->nth_rec - slot1->nth_rec - 1;
 
-				if (n_rows > 1) {
+				if (n_rows > 0) {
+					/* There is at least one row between
+					the two borders pointed to by slot1
+					and slot2, so on the level below the
+					slots will point to non-adjacent
+					pages. */
 					diverged_lot = TRUE;
 					divergence_level = i;
 				}
@@ -3885,8 +6098,10 @@ btr_estimate_n_rows_in_range(
 				and we select where x > 20 and x < 30;
 				in this case slot1->nth_rec will point
 				to the supr record and slot2->nth_rec
-				will point to 6 */
+				will point to 6. */
 				n_rows = 0;
+				should_count_the_left_border = false;
+				should_count_the_right_border = false;
 			}
 
 		} else if (diverged && !diverged_lot) {
@@ -3917,6 +6132,27 @@ btr_estimate_n_rows_in_range(
 	}
 }
 
+/** Estimates the number of rows in a given index range.
+@param[in]	index	index
+@param[in]	tuple1	range start, may also be empty tuple
+@param[in]	mode1	search mode for range start
+@param[in]	tuple2	range end, may also be empty tuple
+@param[in]	mode2	search mode for range end
+@return estimated number of rows */
+int64_t
+btr_estimate_n_rows_in_range(
+	dict_index_t*	index,
+	const dtuple_t*	tuple1,
+	page_cur_mode_t	mode1,
+	const dtuple_t*	tuple2,
+	page_cur_mode_t	mode2)
+{
+	const int64_t	ret = btr_estimate_n_rows_in_range_low(
+		index, tuple1, mode1, tuple2, mode2, 1 /* first attempt */);
+
+	return(ret);
+}
+
 /*******************************************************************//**
 Record the number of non_null key values in a given index for
 each n-column prefix of the index where 1 <= n <= dict_index_get_n_unique(index).
@@ -3960,9 +6196,10 @@ The estimates are stored in the array index->stat_n_diff_key_vals[] (indexed
 index->stat_n_sample_sizes[].
 If innodb_stats_method is nulls_ignored, we also record the number of
 non-null values for each prefix and stored the estimates in
-array index->stat_n_non_null_key_vals. */
-UNIV_INTERN
-void
+array index->stat_n_non_null_key_vals.
+@return true if the index is available and we get the estimated numbers,
+false if the index is unavailable. */
+bool
 btr_estimate_number_of_different_key_vals(
 /*======================================*/
 	dict_index_t*	index)	/*!< in: index */
@@ -3971,22 +6208,26 @@ btr_estimate_number_of_different_key_vals(
 	page_t*		page;
 	rec_t*		rec;
 	ulint		n_cols;
-	ulint		matched_fields;
-	ulint		matched_bytes;
 	ib_uint64_t*	n_diff;
 	ib_uint64_t*	n_not_null;
 	ibool		stats_null_not_equal;
-	ullint		n_sample_pages = 1; /* number of pages to sample */
+	uintmax_t	n_sample_pages=1; /* number of pages to sample */
 	ulint		not_empty_flag	= 0;
 	ulint		total_external_size = 0;
 	ulint		i;
 	ulint		j;
-	ullint		add_on;
+	uintmax_t	add_on;
 	mtr_t		mtr;
 	mem_heap_t*	heap		= NULL;
 	ulint*		offsets_rec	= NULL;
 	ulint*		offsets_next_rec = NULL;
 
+	/* For spatial index, there is no such stats can be
+	fetched. */
+	if (dict_index_is_spatial(index)) {
+		return(false);
+	}
+
 	n_cols = dict_index_get_n_unique(index);
 
 	heap = mem_heap_create((sizeof *n_diff + sizeof *n_not_null)
@@ -3996,7 +6237,7 @@ btr_estimate_number_of_different_key_vals(
 				  + sizeof *offsets_next_rec));
 
 	n_diff = (ib_uint64_t*) mem_heap_zalloc(
-		heap, n_cols * sizeof(ib_int64_t));
+		heap, n_cols * sizeof(n_diff[0]));
 
 	n_not_null = NULL;
 
@@ -4021,7 +6262,7 @@ btr_estimate_number_of_different_key_vals(
 
 	default:
 		ut_error;
-        }
+	}
 
 	if (srv_stats_sample_traditional) {
 		/* It makes no sense to test more pages than are contained
@@ -4070,8 +6311,8 @@ btr_estimate_number_of_different_key_vals(
                 */
 		if (index->stat_index_size > 1) {
 			n_sample_pages = (srv_stats_transient_sample_pages < index->stat_index_size) ?
-                               (ulint) ut_min((double) index->stat_index_size,
-				       log2(index->stat_index_size)*srv_stats_transient_sample_pages)
+				ut_min(static_cast<ulint>(index->stat_index_size),
+					static_cast<ulint>(log2(index->stat_index_size)*srv_stats_transient_sample_pages))
 				: index->stat_index_size;
 
 		}
@@ -4085,7 +6326,17 @@ btr_estimate_number_of_different_key_vals(
 	for (i = 0; i < n_sample_pages; i++) {
 		mtr_start(&mtr);
 
-		btr_cur_open_at_rnd_pos(index, BTR_SEARCH_LEAF, &cursor, &mtr);
+		bool	available;
+
+		available = btr_cur_open_at_rnd_pos(index, BTR_SEARCH_LEAF,
+						    &cursor, &mtr);
+
+		if (!available) {
+			mtr_commit(&mtr);
+			mem_heap_free(heap);
+
+			return(false);
+		}
 
 		/* Count the number of different key values for each prefix of
 		the key on this index page. If the prefix does not determine
@@ -4109,6 +6360,7 @@ btr_estimate_number_of_different_key_vals(
 		}
 
 		while (!page_rec_is_supremum(rec)) {
+			ulint	matched_fields;
 			rec_t*	next_rec = page_rec_get_next(rec);
 			if (page_rec_is_supremum(next_rec)) {
 				total_external_size +=
@@ -4117,8 +6369,6 @@ btr_estimate_number_of_different_key_vals(
 				break;
 			}
 
-			matched_fields = 0;
-			matched_bytes = 0;
 			offsets_next_rec = rec_get_offsets(next_rec, index,
 							   offsets_next_rec,
 							   ULINT_UNDEFINED,
@@ -4127,8 +6377,7 @@ btr_estimate_number_of_different_key_vals(
 			cmp_rec_rec_with_match(rec, next_rec,
 					       offsets_rec, offsets_next_rec,
 					       index, stats_null_not_equal,
-					       &matched_fields,
-					       &matched_bytes);
+					       &matched_fields);
 
 			for (j = matched_fields; j < n_cols; j++) {
 				/* We add one if this index record has
@@ -4226,13 +6475,15 @@ btr_estimate_number_of_different_key_vals(
 	}
 
 	mem_heap_free(heap);
+
+	return(true);
 }
 
 /*================== EXTERNAL STORAGE OF BIG FIELDS ===================*/
 
 /***********************************************************//**
 Gets the offset of the pointer to the externally stored part of a field.
-@return	offset of the pointer to the externally stored part */
+@return offset of the pointer to the externally stored part */
 static
 ulint
 btr_rec_get_field_ref_offs(
@@ -4252,9 +6503,9 @@ btr_rec_get_field_ref_offs(
 }
 
 /** Gets a pointer to the externally stored part of a field.
-@param rec	record
-@param offsets	rec_get_offsets(rec)
-@param n	index of the externally stored field
+@param rec record
+@param offsets rec_get_offsets(rec)
+@param n index of the externally stored field
 @return pointer to the externally stored part */
 #define btr_rec_get_field_ref(rec, offsets, n)			\
 	((rec) + btr_rec_get_field_ref_offs(offsets, n))
@@ -4262,8 +6513,7 @@ btr_rec_get_field_ref_offs(
 /** Gets the externally stored size of a record, in units of a database page.
 @param[in]	rec	record
 @param[in]	offsets	array returned by rec_get_offsets()
-@return	externally stored part, in units of a database page */
-
+@return externally stored part, in units of a database page */
 ulint
 btr_rec_get_externally_stored_len(
 	const rec_t*	rec,
@@ -4342,8 +6592,6 @@ btr_cur_set_ownership_of_extern_field(
 	} else {
 		mach_write_to_1(data + local_len + BTR_EXTERN_LEN, byte_val);
 	}
-
-	btr_blob_dbg_owner(rec, index, offsets, i, val);
 }
 
 /*******************************************************************//**
@@ -4351,7 +6599,6 @@ Marks non-updated off-page fields as disowned by this record. The ownership
 must be transferred to the updated record which is inserted elsewhere in the
 index tree. In purge only the owner of externally stored field is allowed
 to free the field. */
-UNIV_INTERN
 void
 btr_cur_disown_inherited_fields(
 /*============================*/
@@ -4372,7 +6619,7 @@ btr_cur_disown_inherited_fields(
 
 	for (i = 0; i < rec_offs_n_fields(offsets); i++) {
 		if (rec_offs_nth_extern(offsets, i)
-		    && !upd_get_field_by_field_no(update, i)) {
+		    && !upd_get_field_by_field_no(update, i, false)) {
 			btr_cur_set_ownership_of_extern_field(
 				page_zip, rec, index, offsets, i, FALSE, mtr);
 		}
@@ -4418,8 +6665,7 @@ btr_cur_unmark_extern_fields(
 Flags the data tuple fields that are marked as extern storage in the
 update vector.  We use this function to remember which fields we must
 mark as extern storage in a record inserted for an update.
-@return	number of flagged external columns */
-UNIV_INTERN
+@return number of flagged external columns */
 ulint
 btr_push_update_extern_fields(
 /*==========================*/
@@ -4459,7 +6705,8 @@ btr_push_update_extern_fields(
 				InnoDB writes a longer prefix of externally
 				stored columns, so that column prefixes
 				in secondary indexes can be reconstructed. */
-				dfield_set_data(field, (byte*) dfield_get_data(field)
+				dfield_set_data(field,
+						(byte*) dfield_get_data(field)
 						+ dfield_get_len(field)
 						- BTR_EXTERN_FIELD_REF_SIZE,
 						BTR_EXTERN_FIELD_REF_SIZE);
@@ -4497,7 +6744,7 @@ btr_push_update_extern_fields(
 
 /*******************************************************************//**
 Returns the length of a BLOB part stored on the header page.
-@return	part length */
+@return part length */
 static
 ulint
 btr_blob_get_part_len(
@@ -4509,7 +6756,7 @@ btr_blob_get_part_len(
 
 /*******************************************************************//**
 Returns the page number where the next BLOB part is stored.
-@return	page number or FIL_NULL if no more pages */
+@return page number or FIL_NULL if no more pages */
 static
 ulint
 btr_blob_get_next_page_no(
@@ -4525,16 +6772,17 @@ static
 void
 btr_blob_free(
 /*==========*/
+	dict_index_t*	index,	/*!< in: index */
 	buf_block_t*	block,	/*!< in: buffer block */
 	ibool		all,	/*!< in: TRUE=remove also the compressed page
 				if there is one */
 	mtr_t*		mtr)	/*!< in: mini-transaction to commit */
 {
 	buf_pool_t*	buf_pool = buf_pool_from_block(block);
-	ulint		space	= buf_block_get_space(block);
-	ulint		page_no	= buf_block_get_page_no(block);
+	ulint		space = block->page.id.space();
+	ulint		page_no	= block->page.id.page_no();
 
-	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
 
 	mtr_commit(mtr);
 
@@ -4545,8 +6793,8 @@ btr_blob_free(
 
 	if (buf_block_get_state(block)
 	    == BUF_BLOCK_FILE_PAGE
-	    && buf_block_get_space(block) == space
-	    && buf_block_get_page_no(block) == page_no) {
+	    && block->page.id.space() == space
+	    && block->page.id.page_no() == page_no) {
 
 		if (!buf_LRU_free_page(&block->page, all)
 		    && all && block->page.zip.data) {
@@ -4560,28 +6808,151 @@ btr_blob_free(
 	buf_pool_mutex_exit(buf_pool);
 }
 
+/** Helper class used while writing blob pages, during insert or update. */
+struct btr_blob_log_check_t {
+	/** Persistent cursor on a clusterex index record with blobs. */
+	btr_pcur_t*	m_pcur;
+	/** Mini transaction holding the latches for m_pcur */
+	mtr_t*		m_mtr;
+	/** rec_get_offsets(rec, index); offset of clust_rec */
+	const ulint*	m_offsets;
+	/** The block containing clustered record */
+	buf_block_t**	m_block;
+	/** The clustered record pointer */
+	rec_t**		m_rec;
+	/** The blob operation code */
+	enum blob_op	m_op;
+
+	/** Constructor
+	@param[in]	pcur		persistent cursor on a clustered
+					index record with blobs.
+	@param[in]	mtr		mini-transaction holding latches for
+					pcur.
+	@param[in]	offsets		offsets of the clust_rec
+	@param[in,out]	block		record block containing pcur record
+	@param[in,out]	rec		the clustered record pointer
+	@param[in]	op		the blob operation code */
+	btr_blob_log_check_t(
+		btr_pcur_t*	pcur,
+		mtr_t*		mtr,
+		const ulint*	offsets,
+		buf_block_t**	block,
+		rec_t**		rec,
+		enum blob_op	op)
+		: m_pcur(pcur),
+		  m_mtr(mtr),
+		  m_offsets(offsets),
+		  m_block(block),
+		  m_rec(rec),
+		  m_op(op)
+	{
+		ut_ad(rec_offs_validate(*m_rec, m_pcur->index(), m_offsets));
+		ut_ad((*m_block)->frame == page_align(*m_rec));
+		ut_ad(*m_rec == btr_pcur_get_rec(m_pcur));
+	}
+
+	/** Check if there is enough space in log file. Commit and re-start the
+	mini transaction. */
+	void check()
+	{
+		dict_index_t*	index = m_pcur->index();
+		ulint		offs = 0;
+		ulint		page_no = ULINT_UNDEFINED;
+		FlushObserver*	observer = m_mtr->get_flush_observer();
+
+		if (m_op == BTR_STORE_INSERT_BULK) {
+			offs = page_offset(*m_rec);
+			page_no = page_get_page_no(
+				buf_block_get_frame(*m_block));
+
+			buf_block_buf_fix_inc(*m_block, __FILE__, __LINE__);
+		} else {
+			btr_pcur_store_position(m_pcur, m_mtr);
+		}
+		m_mtr->commit();
+
+		DEBUG_SYNC_C("blob_write_middle");
+
+		log_free_check();
+
+		DEBUG_SYNC_C("blob_write_middle_after_check");
+
+		const mtr_log_t log_mode = m_mtr->get_log_mode();
+		m_mtr->start();
+		m_mtr->set_log_mode(log_mode);
+		m_mtr->set_named_space(index->space);
+		m_mtr->set_flush_observer(observer);
+
+		if (m_op == BTR_STORE_INSERT_BULK) {
+			page_id_t       page_id(dict_index_get_space(index),
+						page_no);
+			page_size_t     page_size(dict_table_page_size(
+						index->table));
+			page_cur_t*	page_cur = &m_pcur->btr_cur.page_cur;
+
+			mtr_x_lock(dict_index_get_lock(index), m_mtr);
+			page_cur->block = btr_block_get(
+				page_id, page_size, RW_X_LATCH, index, m_mtr);
+			page_cur->rec = buf_block_get_frame(page_cur->block)
+				+ offs;
+
+			buf_block_buf_fix_dec(page_cur->block);
+		} else {
+			ut_ad(m_pcur->rel_pos == BTR_PCUR_ON);
+			bool ret = btr_pcur_restore_position(
+				BTR_MODIFY_LEAF | BTR_MODIFY_EXTERNAL,
+				m_pcur, m_mtr);
+
+			ut_a(ret);
+		}
+
+		*m_block	= btr_pcur_get_block(m_pcur);
+		*m_rec		= btr_pcur_get_rec(m_pcur);
+
+		ut_d(rec_offs_make_valid(
+			*m_rec, index, const_cast<ulint*>(m_offsets)));
+
+		ut_ad(m_mtr->memo_contains_page_flagged(
+		      *m_rec,
+		      MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX)
+		      || dict_table_is_intrinsic(index->table));
+
+		ut_ad(mtr_memo_contains_flagged(m_mtr,
+		      dict_index_get_lock(index),
+		      MTR_MEMO_SX_LOCK | MTR_MEMO_X_LOCK)
+		      || dict_table_is_intrinsic(index->table));
+	}
+};
+
+
 /*******************************************************************//**
 Stores the fields in big_rec_vec to the tablespace and puts pointers to
 them in rec.  The extern flags in rec will have to be set beforehand.
 The fields are stored on pages allocated from leaf node
 file segment of the index tree.
-@return	DB_SUCCESS or DB_OUT_OF_FILE_SPACE or DB_TOO_BIG_FOR_REDO */
-UNIV_INTERN
+
+TODO: If the allocation extends the tablespace, it will not be redo logged, in
+any mini-transaction.  Tablespace extension should be redo-logged, so that
+recovery will not fail when the big_rec was written to the extended portion of
+the file, in case the file was somehow truncated in the crash.
+
+@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
 dberr_t
 btr_store_big_rec_extern_fields(
 /*============================*/
-	dict_index_t*	index,		/*!< in: index of rec; the index tree
-					MUST be X-latched */
-	buf_block_t*	rec_block,	/*!< in/out: block containing rec */
-	rec_t*		rec,		/*!< in/out: record */
-	const ulint*	offsets,	/*!< in: rec_get_offsets(rec, index);
-					the "external storage" flags in offsets
-					will not correspond to rec when
-					this function returns */
+	btr_pcur_t*	pcur,		/*!< in/out: a persistent cursor. if
+					btr_mtr is restarted, then this can
+					be repositioned. */
+	const upd_t*	upd,		/*!< in: update vector */
+	ulint*		offsets,	/*!< in/out: rec_get_offsets() on
+					pcur. the "external storage" flags
+					in offsets will correctly correspond
+					to rec when this function returns */
 	const big_rec_t*big_rec_vec,	/*!< in: vector containing fields
 					to be stored externally */
-	mtr_t*		btr_mtr,	/*!< in: mtr containing the
-					latches to the clustered index */
+	mtr_t*		btr_mtr,	/*!< in/out: mtr containing the
+					latches to the clustered index. can be
+					committed and restarted. */
 	enum blob_op	op)		/*! in: operation code */
 {
 	ulint		rec_page_no;
@@ -4590,43 +6961,41 @@ btr_store_big_rec_extern_fields(
 	ulint		store_len;
 	ulint		page_no;
 	ulint		space_id;
-	ulint		zip_size;
 	ulint		prev_page_no;
 	ulint		hint_page_no;
 	ulint		i;
 	mtr_t		mtr;
-	mtr_t*		alloc_mtr;
+	mtr_t		mtr_bulk;
 	mem_heap_t*	heap = NULL;
 	page_zip_des_t*	page_zip;
 	z_stream	c_stream;
-	buf_block_t**	freed_pages	= NULL;
-	ulint		n_freed_pages	= 0;
 	dberr_t		error		= DB_SUCCESS;
+	dict_index_t*	index		= pcur->index();
+	buf_block_t*	rec_block	= btr_pcur_get_block(pcur);
+	rec_t*		rec		= btr_pcur_get_rec(pcur);
 
 	ut_ad(rec_offs_validate(rec, index, offsets));
 	ut_ad(rec_offs_any_extern(offsets));
 	ut_ad(btr_mtr);
-	ut_ad(mtr_memo_contains(btr_mtr, dict_index_get_lock(index),
-				MTR_MEMO_X_LOCK));
-	ut_ad(mtr_memo_contains(btr_mtr, rec_block, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_flagged(btr_mtr, dict_index_get_lock(index),
+					MTR_MEMO_X_LOCK
+					| MTR_MEMO_SX_LOCK)
+	      || dict_table_is_intrinsic(index->table));
+	ut_ad(mtr_is_block_fix(
+		btr_mtr, rec_block, MTR_MEMO_PAGE_X_FIX, index->table));
 	ut_ad(buf_block_get_frame(rec_block) == page_align(rec));
 	ut_a(dict_index_is_clust(index));
 
+	ut_a(dict_table_page_size(index->table)
+		.equals_to(rec_block->page.size));
+
+	btr_blob_log_check_t redo_log(pcur, btr_mtr, offsets, &rec_block,
+				      &rec, op);
 	page_zip = buf_block_get_page_zip(rec_block);
-	ut_a(dict_table_zip_size(index->table)
-	     == buf_block_get_zip_size(rec_block));
-
-	space_id = buf_block_get_space(rec_block);
-	zip_size = buf_block_get_zip_size(rec_block);
-	rec_page_no = buf_block_get_page_no(rec_block);
-	ut_a(fil_page_get_type(page_align(rec)) == FIL_PAGE_INDEX);
-
-	error = btr_check_blob_limit(big_rec_vec);
-
-	if (error != DB_SUCCESS) {
-		ut_ad(op == BTR_STORE_INSERT);
-		return(error);
-	}
+	space_id = rec_block->page.id.space();
+	rec_page_no = rec_block->page.id.page_no();
+	ut_a(fil_page_index_page_check(page_align(rec))
+	     || op == BTR_STORE_INSERT_BULK);
 
 	if (page_zip) {
 		int	err;
@@ -4644,52 +7013,13 @@ btr_store_big_rec_extern_fields(
 		ut_a(err == Z_OK);
 	}
 
-	if (btr_blob_op_is_update(op)) {
-		/* Avoid reusing pages that have been previously freed
-		in btr_mtr. */
-		if (btr_mtr->n_freed_pages) {
-			if (heap == NULL) {
-				heap = mem_heap_create(
-					btr_mtr->n_freed_pages
-					* sizeof *freed_pages);
-			}
-
-			freed_pages = static_cast<buf_block_t**>(
-				mem_heap_alloc(
-					heap,
-					btr_mtr->n_freed_pages
-					* sizeof *freed_pages));
-			n_freed_pages = 0;
-		}
-
-		/* Because btr_mtr will be committed after mtr, it is
-		possible that the tablespace has been extended when
-		the B-tree record was updated or inserted, or it will
-		be extended while allocating pages for big_rec.
-
-		TODO: In mtr (not btr_mtr), write a redo log record
-		about extending the tablespace to its current size,
-		and remember the current size. Whenever the tablespace
-		grows as pages are allocated, write further redo log
-		records to mtr. (Currently tablespace extension is not
-		covered by the redo log. If it were, the record would
-		only be written to btr_mtr, which is committed after
-		mtr.) */
-		alloc_mtr = btr_mtr;
-	} else {
-		/* Use the local mtr for allocations. */
-		alloc_mtr = &mtr;
-	}
-
 #if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
 	/* All pointers to externally stored columns in the record
 	must either be zero or they must be pointers to inherited
 	columns, owned by this record or an earlier record version. */
-	for (i = 0; i < rec_offs_n_fields(offsets); i++) {
-		if (!rec_offs_nth_extern(offsets, i)) {
-			continue;
-		}
-		field_ref = btr_rec_get_field_ref(rec, offsets, i);
+	for (i = 0; i < big_rec_vec->n_fields; i++) {
+		field_ref = btr_rec_get_field_ref(
+			rec, offsets, big_rec_vec->fields[i].field_no);
 
 		ut_a(!(field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_OWNER_FLAG));
 		/* Either this must be an update in place,
@@ -4701,12 +7031,24 @@ btr_store_big_rec_extern_fields(
 				BTR_EXTERN_FIELD_REF_SIZE));
 	}
 #endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
+
+	const page_size_t	page_size(dict_table_page_size(index->table));
+
+	/* Space available in compressed page to carry blob data */
+	const ulint	payload_size_zip = page_size.physical()
+		- FIL_PAGE_DATA;
+
+	/* Space available in uncompressed page to carry blob data */
+	const ulint	payload_size = page_size.physical()
+		- FIL_PAGE_DATA - BTR_BLOB_HDR_SIZE - FIL_PAGE_DATA_END;
+
 	/* We have to create a file segment to the tablespace
 	for each field and put the pointer to the field in rec */
 
 	for (i = 0; i < big_rec_vec->n_fields; i++) {
-		field_ref = btr_rec_get_field_ref(
-			rec, offsets, big_rec_vec->fields[i].field_no);
+		const ulint field_no = big_rec_vec->fields[i].field_no;
+
+		field_ref = btr_rec_get_field_ref(rec, offsets, field_no);
 #if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
 		/* A zero BLOB pointer should have been initially inserted. */
 		ut_a(!memcmp(field_ref, field_ref_zero,
@@ -4729,11 +7071,32 @@ btr_store_big_rec_extern_fields(
 			c_stream.avail_in = static_cast<uInt>(extern_len);
 		}
 
-		for (;;) {
+		for (ulint blob_npages = 0;; ++blob_npages) {
 			buf_block_t*	block;
 			page_t*		page;
+			const ulint	commit_freq = 4;
+			ulint		r_extents;
+
+			ut_ad(page_align(field_ref) == page_align(rec));
+
+			if (!(blob_npages % commit_freq)) {
+
+				redo_log.check();
+
+				field_ref = btr_rec_get_field_ref(
+					rec, offsets, field_no);
+
+				page_zip = buf_block_get_page_zip(rec_block);
+				rec_page_no = rec_block->page.id.page_no();
+			}
 
 			mtr_start(&mtr);
+			mtr.set_named_space(index->space);
+			mtr.set_log_mode(btr_mtr->get_log_mode());
+			mtr.set_flush_observer(btr_mtr->get_flush_observer());
+
+			buf_page_get(rec_block->page.id,
+				     rec_block->page.size, RW_X_LATCH, &mtr);
 
 			if (prev_page_no == FIL_NULL) {
 				hint_page_no = 1 + rec_page_no;
@@ -4741,36 +7104,48 @@ btr_store_big_rec_extern_fields(
 				hint_page_no = prev_page_no + 1;
 			}
 
-alloc_another:
-			block = btr_page_alloc(index, hint_page_no,
-					       FSP_NO_DIR, 0, alloc_mtr, &mtr);
-			if (UNIV_UNLIKELY(block == NULL)) {
-				mtr_commit(&mtr);
+			mtr_t	*alloc_mtr;
+
+			if (op == BTR_STORE_INSERT_BULK) {
+				mtr_start(&mtr_bulk);
+				mtr_bulk.set_spaces(mtr);
+				alloc_mtr = &mtr_bulk;
+			} else {
+				alloc_mtr = &mtr;
+			}
+
+			if (!fsp_reserve_free_extents(&r_extents, space_id, 1,
+						      FSP_BLOB, alloc_mtr,
+						      1)) {
+
+				mtr_commit(alloc_mtr);
 				error = DB_OUT_OF_FILE_SPACE;
 				goto func_exit;
 			}
 
-			if (rw_lock_get_x_lock_count(&block->lock) > 1) {
-				/* This page must have been freed in
-				btr_mtr previously. Put it aside, and
-				allocate another page for the BLOB data. */
-				ut_ad(alloc_mtr == btr_mtr);
-				ut_ad(btr_blob_op_is_update(op));
-				ut_ad(n_freed_pages < btr_mtr->n_freed_pages);
-				freed_pages[n_freed_pages++] = block;
-				goto alloc_another;
+			block = btr_page_alloc(index, hint_page_no, FSP_NO_DIR,
+					       0, alloc_mtr, &mtr);
+
+			alloc_mtr->release_free_extents(r_extents);
+
+			if (op == BTR_STORE_INSERT_BULK) {
+				mtr_commit(&mtr_bulk);
 			}
 
-			page_no = buf_block_get_page_no(block);
+			ut_a(block != NULL);
+
+			page_no = block->page.id.page_no();
 			page = buf_block_get_frame(block);
 
 			if (prev_page_no != FIL_NULL) {
 				buf_block_t*	prev_block;
 				page_t*		prev_page;
 
-				prev_block = buf_page_get(space_id, zip_size,
-							  prev_page_no,
-							  RW_X_LATCH, &mtr);
+				prev_block = buf_page_get(
+					page_id_t(space_id, prev_page_no),
+					rec_block->page.size,
+					RW_X_LATCH, &mtr);
+
 				buf_block_dbg_add_level(prev_block,
 							SYNC_EXTERN_STORAGE);
 				prev_page = buf_block_get_frame(prev_block);
@@ -4816,9 +7191,8 @@ alloc_another:
 
 				c_stream.next_out = page
 					+ FIL_PAGE_DATA;
-				c_stream.avail_out
-					= static_cast<uInt>(page_zip_get_size(page_zip))
-					- FIL_PAGE_DATA;
+				c_stream.avail_out = static_cast<uInt>(
+					payload_size_zip);
 
 				err = deflate(&c_stream, Z_FINISH);
 				ut_a(err == Z_OK || err == Z_STREAM_END);
@@ -4844,7 +7218,12 @@ alloc_another:
 				btr_page_reorganize().  However, also
 				the page number of the record may
 				change when B-tree nodes are split or
-				merged. */
+				merged.
+				NOTE: FIL_PAGE_FILE_FLUSH_LSN space is
+				used by R-tree index for a Split Sequence
+				Number */
+				ut_ad(!dict_index_is_spatial(index));
+
 				mlog_write_ulint(page
 						 + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION,
 						 space_id,
@@ -4878,16 +7257,6 @@ alloc_another:
 					goto next_zip_page;
 				}
 
-				if (alloc_mtr == &mtr) {
-					rec_block = buf_page_get(
-						space_id, zip_size,
-						rec_page_no,
-						RW_X_LATCH, &mtr);
-					buf_block_dbg_add_level(
-						rec_block,
-						SYNC_NO_ORDER_CHECK);
-				}
-
 				if (err == Z_STREAM_END) {
 					mach_write_to_4(field_ref
 							+ BTR_EXTERN_LEN, 0);
@@ -4900,11 +7269,7 @@ alloc_another:
 				}
 
 				if (prev_page_no == FIL_NULL) {
-					btr_blob_dbg_add_blob(
-						rec, big_rec_vec->fields[i]
-						.field_no, page_no, index,
-						"store");
-
+					ut_ad(blob_npages == 0);
 					mach_write_to_4(field_ref
 							+ BTR_EXTERN_SPACE_ID,
 							space_id);
@@ -4918,17 +7283,19 @@ alloc_another:
 							FIL_PAGE_NEXT);
 				}
 
-				page_zip_write_blob_ptr(
-					page_zip, rec, index, offsets,
-					big_rec_vec->fields[i].field_no,
-					alloc_mtr);
+				/* We compress a page when finish bulk insert.*/
+				if (op != BTR_STORE_INSERT_BULK) {
+					page_zip_write_blob_ptr(
+						page_zip, rec, index, offsets,
+						field_no, &mtr);
+				}
 
 next_zip_page:
 				prev_page_no = page_no;
 
 				/* Commit mtr and release the
 				uncompressed page frame to save memory. */
-				btr_blob_free(block, FALSE, &mtr);
+				btr_blob_free(index, block, FALSE, &mtr);
 
 				if (err == Z_STREAM_END) {
 					break;
@@ -4938,14 +7305,8 @@ next_zip_page:
 						 FIL_PAGE_TYPE_BLOB,
 						 MLOG_2BYTES, &mtr);
 
-				if (extern_len > (UNIV_PAGE_SIZE
-						  - FIL_PAGE_DATA
-						  - BTR_BLOB_HDR_SIZE
-						  - FIL_PAGE_DATA_END)) {
-					store_len = UNIV_PAGE_SIZE
-						- FIL_PAGE_DATA
-						- BTR_BLOB_HDR_SIZE
-						- FIL_PAGE_DATA_END;
+				if (extern_len > payload_size) {
+					store_len = payload_size;
 				} else {
 					store_len = extern_len;
 				}
@@ -4966,45 +7327,31 @@ next_zip_page:
 
 				extern_len -= store_len;
 
-				if (alloc_mtr == &mtr) {
-					rec_block = buf_page_get(
-						space_id, zip_size,
-						rec_page_no,
-						RW_X_LATCH, &mtr);
-					buf_block_dbg_add_level(
-						rec_block,
-						SYNC_NO_ORDER_CHECK);
-				}
-
 				mlog_write_ulint(field_ref + BTR_EXTERN_LEN, 0,
-						 MLOG_4BYTES, alloc_mtr);
+						 MLOG_4BYTES, &mtr);
 				mlog_write_ulint(field_ref
 						 + BTR_EXTERN_LEN + 4,
 						 big_rec_vec->fields[i].len
 						 - extern_len,
-						 MLOG_4BYTES, alloc_mtr);
+						 MLOG_4BYTES, &mtr);
 
 				if (prev_page_no == FIL_NULL) {
-					btr_blob_dbg_add_blob(
-						rec, big_rec_vec->fields[i]
-						.field_no, page_no, index,
-						"store");
-
+					ut_ad(blob_npages == 0);
 					mlog_write_ulint(field_ref
 							 + BTR_EXTERN_SPACE_ID,
 							 space_id, MLOG_4BYTES,
-							 alloc_mtr);
+							 &mtr);
 
 					mlog_write_ulint(field_ref
 							 + BTR_EXTERN_PAGE_NO,
 							 page_no, MLOG_4BYTES,
-							 alloc_mtr);
+							 &mtr);
 
 					mlog_write_ulint(field_ref
 							 + BTR_EXTERN_OFFSET,
 							 FIL_PAGE_DATA,
 							 MLOG_4BYTES,
-							 alloc_mtr);
+							 &mtr);
 				}
 
 				prev_page_no = page_no;
@@ -5020,6 +7367,8 @@ next_zip_page:
 		DBUG_EXECUTE_IF("btr_store_big_rec_extern",
 				error = DB_OUT_OF_FILE_SPACE;
 				goto func_exit;);
+
+		rec_offs_make_nth_extern(offsets, field_no);
 	}
 
 func_exit:
@@ -5027,21 +7376,6 @@ func_exit:
 		deflateEnd(&c_stream);
 	}
 
-	if (n_freed_pages) {
-		ulint	i;
-
-		ut_ad(alloc_mtr == btr_mtr);
-		ut_ad(btr_blob_op_is_update(op));
-
-		for (i = 0; i < n_freed_pages; i++) {
-			btr_page_free_low(index, freed_pages[i], 0, true, alloc_mtr);
-		}
-
-		DBUG_EXECUTE_IF("btr_store_big_rec_extern",
-				error = DB_OUT_OF_FILE_SPACE;
-				goto func_exit;);
-	}
-
 	if (heap != NULL) {
 		mem_heap_free(heap);
 	}
@@ -5097,13 +7431,10 @@ btr_check_blob_fil_page_type(
 		}
 #endif /* !UNIV_DEBUG */
 
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			"  InnoDB: FIL_PAGE_TYPE=%lu"
-			" on BLOB %s space %lu page %lu flags %lx\n",
-			(ulong) type, read ? "read" : "purge",
-			(ulong) space_id, (ulong) page_no, (ulong) flags);
-		ut_error;
+		ib::fatal() << "FIL_PAGE_TYPE=" << type
+			<< " on BLOB " << (read ? "read" : "purge")
+			<< " space " << space_id << " page " << page_no
+			<< " flags " << flags;
 	}
 }
 
@@ -5112,7 +7443,6 @@ Frees the space in an externally stored field to the file space
 management if the field in data is owned by the externally stored field,
 in a rollback we may have the additional condition that the field must
 not be inherited. */
-UNIV_INTERN
 void
 btr_free_externally_stored_field(
 /*=============================*/
@@ -5133,8 +7463,8 @@ btr_free_externally_stored_field(
 					to rec, or NULL if rec == NULL */
 	ulint		i,		/*!< in: field number of field_ref;
 					ignored if rec == NULL */
-	enum trx_rb_ctx	rb_ctx,		/*!< in: rollback context */
-	mtr_t*		local_mtr MY_ATTRIBUTE((unused))) /*!< in: mtr
+	bool		rollback,	/*!< in: performing rollback? */
+	mtr_t*		local_mtr)	/*!< in: mtr
 					containing the latch to data an an
 					X-latch to the index tree */
 {
@@ -5143,110 +7473,68 @@ btr_free_externally_stored_field(
 		field_ref + BTR_EXTERN_SPACE_ID);
 	const ulint	start_page	= mach_read_from_4(
 		field_ref + BTR_EXTERN_PAGE_NO);
-	ulint		rec_zip_size = dict_table_zip_size(index->table);
-	ulint		ext_zip_size;
 	ulint		page_no;
 	ulint		next_page_no;
 	mtr_t		mtr;
 
 	ut_ad(dict_index_is_clust(index));
-	ut_ad(mtr_memo_contains(local_mtr, dict_index_get_lock(index),
-				MTR_MEMO_X_LOCK));
-	ut_ad(mtr_memo_contains_page(local_mtr, field_ref,
-				     MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_flagged(local_mtr, dict_index_get_lock(index),
+					MTR_MEMO_X_LOCK
+					| MTR_MEMO_SX_LOCK)
+	      || dict_table_is_intrinsic(index->table));
+	ut_ad(mtr_is_page_fix(
+		local_mtr, field_ref, MTR_MEMO_PAGE_X_FIX, index->table));
 	ut_ad(!rec || rec_offs_validate(rec, index, offsets));
 	ut_ad(!rec || field_ref == btr_rec_get_field_ref(rec, offsets, i));
+	ut_ad(local_mtr->is_named_space(
+		      page_get_space_id(page_align(field_ref))));
 
 	if (UNIV_UNLIKELY(!memcmp(field_ref, field_ref_zero,
 				  BTR_EXTERN_FIELD_REF_SIZE))) {
 		/* In the rollback, we may encounter a clustered index
 		record with some unwritten off-page columns. There is
 		nothing to free then. */
-		if (rb_ctx == RB_NONE) {
-			char		buf[3 * 512];
-			char		*bufend;
-			ulint ispace = dict_index_get_space(index);
-			bufend = innobase_convert_name(buf, sizeof buf,
-				index->name, strlen(index->name),
-				NULL,
-				FALSE);
-			buf[bufend - buf]='\0';
-			ib_logf(IB_LOG_LEVEL_ERROR, "Unwritten off-page columns in "
-				"rollback context %d. Table %s index %s space_id %lu "
-				"index space %lu.",
-				rb_ctx, index->table->name, buf, space_id, ispace);
-		}
-
-		ut_a(rb_ctx != RB_NONE);
+		ut_a(rollback);
 		return;
 	}
 
+	ut_ad(!(mach_read_from_4(field_ref + BTR_EXTERN_LEN)
+	        & ~((BTR_EXTERN_OWNER_FLAG
+	             | BTR_EXTERN_INHERITED_FLAG) << 24)));
 	ut_ad(space_id == index->space);
 
-	if (UNIV_UNLIKELY(space_id != dict_index_get_space(index))) {
-		ext_zip_size = fil_space_get_zip_size(space_id);
-		/* This must be an undo log record in the system tablespace,
-		that is, in row_purge_upd_exist_or_extern().
-		Currently, externally stored records are stored in the
-		same tablespace as the referring records. */
-		ut_ad(!page_get_space_id(page_align(field_ref)));
-		ut_ad(!rec);
-		ut_ad(!page_zip);
-	} else {
-		ext_zip_size = rec_zip_size;
-	}
-
-	if (!rec) {
+	const page_size_t	ext_page_size(dict_table_page_size(index->table));
+	const page_size_t&	rec_page_size(rec == NULL
+					      ? univ_page_size
+					      : ext_page_size);
+	if (rec == NULL) {
 		/* This is a call from row_purge_upd_exist_or_extern(). */
 		ut_ad(!page_zip);
-		rec_zip_size = 0;
 	}
 
-#ifdef UNIV_BLOB_DEBUG
-	if (!(field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_OWNER_FLAG)
-	    && !((field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_INHERITED_FLAG)
-		 && (rb_ctx == RB_NORMAL || rb_ctx == RB_RECOVERY))) {
-		/* This off-page column will be freed.
-		Check that no references remain. */
-
-		btr_blob_dbg_t	b;
-
-		b.blob_page_no = start_page;
-
-		if (rec) {
-			/* Remove the reference from the record to the
-			BLOB. If the BLOB were not freed, the
-			reference would be removed when the record is
-			removed. Freeing the BLOB will overwrite the
-			BTR_EXTERN_PAGE_NO in the field_ref of the
-			record with FIL_NULL, which would make the
-			btr_blob_dbg information inconsistent with the
-			record. */
-			b.ref_page_no = page_get_page_no(page_align(rec));
-			b.ref_heap_no = page_rec_get_heap_no(rec);
-			b.ref_field_no = i;
-			btr_blob_dbg_rbt_delete(index, &b, "free");
-		}
-
-		btr_blob_dbg_assert_empty(index, b.blob_page_no);
-	}
-#endif /* UNIV_BLOB_DEBUG */
-
 	for (;;) {
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
 		buf_block_t*	rec_block;
-#endif /* UNIV_SYNC_DEBUG */
+#endif /* UNIV_DEBUG */
 		buf_block_t*	ext_block;
 
 		mtr_start(&mtr);
+		mtr.set_spaces(*local_mtr);
+		mtr.set_log_mode(local_mtr->get_log_mode());
 
-#ifdef UNIV_SYNC_DEBUG
+		ut_ad(!dict_table_is_temporary(index->table)
+		      || local_mtr->get_log_mode() == MTR_LOG_NO_REDO);
+
+		const page_t*	p = page_align(field_ref);
+
+		const page_id_t	page_id(page_get_space_id(p),
+					page_get_page_no(p));
+
+#ifdef UNIV_DEBUG
 		rec_block =
-#endif /* UNIV_SYNC_DEBUG */
-		buf_page_get(page_get_space_id(page_align(field_ref)),
-			     rec_zip_size,
-			     page_get_page_no(page_align(field_ref)),
-			     RW_X_LATCH, &mtr);
+#endif /* UNIV_DEBUG */
+		buf_page_get(page_id, rec_page_size, RW_X_LATCH, &mtr);
+
 		buf_block_dbg_add_level(rec_block, SYNC_NO_ORDER_CHECK);
 		page_no = mach_read_from_4(field_ref + BTR_EXTERN_PAGE_NO);
 
@@ -5256,7 +7544,7 @@ btr_free_externally_stored_field(
 		    || (mach_read_from_1(field_ref + BTR_EXTERN_LEN)
 			& BTR_EXTERN_OWNER_FLAG)
 		    /* Rollback and inherited field */
-		    || ((rb_ctx == RB_NORMAL || rb_ctx == RB_RECOVERY)
+		    || (rollback
 			&& (mach_read_from_1(field_ref + BTR_EXTERN_LEN)
 			    & BTR_EXTERN_INHERITED_FLAG))) {
 
@@ -5270,12 +7558,14 @@ btr_free_externally_stored_field(
 			row_log_table_blob_free(index, start_page);
 		}
 
-		ext_block = buf_page_get(space_id, ext_zip_size, page_no,
-					 RW_X_LATCH, &mtr);
+		ext_block = buf_page_get(
+			page_id_t(space_id, page_no), ext_page_size,
+			RW_X_LATCH, &mtr);
+
 		buf_block_dbg_add_level(ext_block, SYNC_EXTERN_STORAGE);
 		page = buf_block_get_frame(ext_block);
 
-		if (ext_zip_size) {
+		if (ext_page_size.is_compressed()) {
 			/* Note that page_zip will be NULL
 			in row_purge_upd_exist_or_extern(). */
 			switch (fil_page_get_type(page)) {
@@ -5287,7 +7577,8 @@ btr_free_externally_stored_field(
 			}
 			next_page_no = mach_read_from_4(page + FIL_PAGE_NEXT);
 
-			btr_page_free_low(index, ext_block, 0, true, &mtr);
+			btr_page_free_low(index, ext_block, 0,
+				true, &mtr);
 
 			if (page_zip != NULL) {
 				mach_write_to_4(field_ref + BTR_EXTERN_PAGE_NO,
@@ -5317,8 +7608,8 @@ btr_free_externally_stored_field(
 			/* We must supply the page level (= 0) as an argument
 			because we did not store it on the page (we save the
 			space overhead from an index page header. */
-
-			btr_page_free_low(index, ext_block, 0, true, &mtr);
+			btr_page_free_low(index, ext_block, 0,
+				true, &mtr);
 
 			mlog_write_ulint(field_ref + BTR_EXTERN_PAGE_NO,
 					 next_page_no,
@@ -5334,7 +7625,7 @@ btr_free_externally_stored_field(
 		}
 
 		/* Commit mtr and release the BLOB block to save memory. */
-		btr_blob_free(ext_block, TRUE, &mtr);
+		btr_blob_free(index, ext_block, TRUE, &mtr);
 	}
 }
 
@@ -5350,7 +7641,7 @@ btr_rec_free_externally_stored_fields(
 	const ulint*	offsets,/*!< in: rec_get_offsets(rec, index) */
 	page_zip_des_t*	page_zip,/*!< in: compressed page whose uncompressed
 				part will be updated, or NULL */
-	enum trx_rb_ctx	rb_ctx,	/*!< in: rollback context */
+	bool		rollback,/*!< in: performing rollback? */
 	mtr_t*		mtr)	/*!< in: mini-transaction handle which contains
 				an X-latch to record page and to the index
 				tree */
@@ -5359,7 +7650,7 @@ btr_rec_free_externally_stored_fields(
 	ulint	i;
 
 	ut_ad(rec_offs_validate(rec, index, offsets));
-	ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_is_page_fix(mtr, rec, MTR_MEMO_PAGE_X_FIX, index->table));
 	/* Free possible externally stored fields in the record */
 
 	ut_ad(dict_table_is_comp(index->table) == !!rec_offs_comp(offsets));
@@ -5369,7 +7660,7 @@ btr_rec_free_externally_stored_fields(
 		if (rec_offs_nth_extern(offsets, i)) {
 			btr_free_externally_stored_field(
 				index, btr_rec_get_field_ref(rec, offsets, i),
-				rec, offsets, page_zip, i, rb_ctx, mtr);
+				rec, offsets, page_zip, i, rollback, mtr);
 		}
 	}
 }
@@ -5388,7 +7679,7 @@ btr_rec_free_updated_extern_fields(
 				part will be updated, or NULL */
 	const ulint*	offsets,/*!< in: rec_get_offsets(rec, index) */
 	const upd_t*	update,	/*!< in: update vector */
-	enum trx_rb_ctx	rb_ctx,	/*!< in: rollback context */
+	bool		rollback,/*!< in: performing rollback? */
 	mtr_t*		mtr)	/*!< in: mini-transaction handle which contains
 				an X-latch to record page and to the tree */
 {
@@ -5396,7 +7687,7 @@ btr_rec_free_updated_extern_fields(
 	ulint	i;
 
 	ut_ad(rec_offs_validate(rec, index, offsets));
-	ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_is_page_fix(mtr, rec, MTR_MEMO_PAGE_X_FIX, index->table));
 
 	/* Free possible externally stored fields in the record */
 
@@ -5414,7 +7705,7 @@ btr_rec_free_updated_extern_fields(
 			btr_free_externally_stored_field(
 				index, data + len - BTR_EXTERN_FIELD_REF_SIZE,
 				rec, offsets, page_zip,
-				ufield->field_no, rb_ctx, mtr);
+				ufield->field_no, rollback, mtr);
 		}
 	}
 }
@@ -5422,7 +7713,7 @@ btr_rec_free_updated_extern_fields(
 /*******************************************************************//**
 Copies the prefix of an uncompressed BLOB.  The clustered index record
 that points to this BLOB must be protected by a lock or a page latch.
-@return	number of bytes written to buf */
+@return number of bytes written to buf */
 static
 ulint
 btr_copy_blob_prefix(
@@ -5432,8 +7723,7 @@ btr_copy_blob_prefix(
 	ulint		len,	/*!< in: length of buf, in bytes */
 	ulint		space_id,/*!< in: space id of the BLOB pages */
 	ulint		page_no,/*!< in: page number of the first BLOB page */
-	ulint		offset,	/*!< in: offset on the first BLOB page */
-	trx_t*		trx)	/*!< in: transaction handle */
+	ulint		offset)	/*!< in: offset on the first BLOB page */
 {
 	ulint	copied_len	= 0;
 
@@ -5445,9 +7735,10 @@ btr_copy_blob_prefix(
 		ulint		part_len;
 		ulint		copy_len;
 
-		mtr_start_trx(&mtr, trx);
+		mtr_start(&mtr);
 
-		block = buf_page_get(space_id, 0, page_no, RW_S_LATCH, &mtr);
+		block = buf_page_get(page_id_t(space_id, page_no),
+				     univ_page_size, RW_S_LATCH, &mtr);
 		buf_block_dbg_add_level(block, SYNC_EXTERN_STORAGE);
 		page = buf_block_get_frame(block);
 
@@ -5479,21 +7770,25 @@ btr_copy_blob_prefix(
 	}
 }
 
-/*******************************************************************//**
-Copies the prefix of a compressed BLOB.  The clustered index record
-that points to this BLOB must be protected by a lock or a page latch.
-@return	number of bytes written to buf */
+/** Copies the prefix of a compressed BLOB.
+The clustered index record that points to this BLOB must be protected
+by a lock or a page latch.
+@param[out]	buf		the externally stored part of the field,
+or a prefix of it
+@param[in]	len		length of buf, in bytes
+@param[in]	page_size	compressed BLOB page size
+@param[in]	space_id	space id of the BLOB pages
+@param[in]	offset		offset on the first BLOB page
+@return number of bytes written to buf */
 static
 ulint
 btr_copy_zblob_prefix(
-/*==================*/
-	byte*		buf,	/*!< out: the externally stored part of
-				the field, or a prefix of it */
-	ulint		len,	/*!< in: length of buf, in bytes */
-	ulint		zip_size,/*!< in: compressed BLOB page size */
-	ulint		space_id,/*!< in: space id of the BLOB pages */
-	ulint		page_no,/*!< in: page number of the first BLOB page */
-	ulint		offset)	/*!< in: offset on the first BLOB page */
+	byte*			buf,
+	ulint			len,
+	const page_size_t&	page_size,
+	ulint			space_id,
+	ulint			page_no,
+	ulint			offset)
 {
 	ulint		page_type = FIL_PAGE_TYPE_ZBLOB;
 	mem_heap_t*	heap;
@@ -5510,9 +7805,7 @@ btr_copy_zblob_prefix(
 	heap = mem_heap_create(40000);
 	page_zip_set_alloc(&d_stream, heap);
 
-	ut_ad(ut_is_2pow(zip_size));
-	ut_ad(zip_size >= UNIV_ZIP_SIZE_MIN);
-	ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX);
+	ut_ad(page_size.is_compressed());
 	ut_ad(space_id);
 
 	err = inflateInit(&d_stream);
@@ -5526,27 +7819,23 @@ btr_copy_zblob_prefix(
 		bpage is protected by the B-tree page latch that
 		is being held on the clustered index record, or,
 		in row_merge_copy_blobs(), by an exclusive table lock. */
-		bpage = buf_page_get_zip(space_id, zip_size, page_no);
+		bpage = buf_page_get_zip(page_id_t(space_id, page_no),
+					 page_size);
 
 		if (UNIV_UNLIKELY(!bpage)) {
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				"  InnoDB: Cannot load"
-				" compressed BLOB"
-				" page %lu space %lu\n",
-				(ulong) page_no, (ulong) space_id);
+			ib::error() << "Cannot load compressed BLOB "
+				<< page_id_t(space_id, page_no);
 			goto func_exit;
 		}
 
 		if (UNIV_UNLIKELY
 		    (fil_page_get_type(bpage->zip.data) != page_type)) {
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				"  InnoDB: Unexpected type %lu of"
-				" compressed BLOB"
-				" page %lu space %lu\n",
-				(ulong) fil_page_get_type(bpage->zip.data),
-				(ulong) page_no, (ulong) space_id);
+
+			ib::error() << "Unexpected type "
+				<< fil_page_get_type(bpage->zip.data)
+				<< " of compressed BLOB page "
+				<< page_id_t(space_id, page_no);
+
 			ut_ad(0);
 			goto end_of_blob;
 		}
@@ -5563,7 +7852,8 @@ btr_copy_zblob_prefix(
 		}
 
 		d_stream.next_in = bpage->zip.data + offset;
-		d_stream.avail_in = static_cast<uInt>(zip_size - offset);
+		d_stream.avail_in = static_cast<uInt>(page_size.physical()
+						      - offset);
 
 		err = inflate(&d_stream, Z_NO_FLUSH);
 		switch (err) {
@@ -5579,26 +7869,21 @@ btr_copy_zblob_prefix(
 			/* fall through */
 		default:
 inflate_error:
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				"  InnoDB: inflate() of"
-				" compressed BLOB"
-				" page %lu space %lu returned %d (%s)\n",
-				(ulong) page_no, (ulong) space_id,
-				err, d_stream.msg);
+			ib::error() << "inflate() of compressed BLOB page "
+				<< page_id_t(space_id, page_no)
+				<< " returned " << err
+				<< " (" << d_stream.msg << ")";
+
 		case Z_BUF_ERROR:
 			goto end_of_blob;
 		}
 
 		if (next_page_no == FIL_NULL) {
 			if (!d_stream.avail_in) {
-				ut_print_timestamp(stderr);
-				fprintf(stderr,
-					"  InnoDB: unexpected end of"
-					" compressed BLOB"
-					" page %lu space %lu\n",
-					(ulong) page_no,
-					(ulong) space_id);
+				ib::error()
+					<< "Unexpected end of compressed "
+					<< "BLOB page "
+					<< page_id_t(space_id, page_no);
 			} else {
 				err = inflate(&d_stream, Z_FINISH);
 				switch (err) {
@@ -5632,57 +7917,59 @@ func_exit:
 	return(d_stream.total_out);
 }
 
-/*******************************************************************//**
-Copies the prefix of an externally stored field of a record.  The
-clustered index record that points to this BLOB must be protected by a
-lock or a page latch.
-@return	number of bytes written to buf */
+/** Copies the prefix of an externally stored field of a record.
+The clustered index record that points to this BLOB must be protected
+by a lock or a page latch.
+@param[out]	buf		the externally stored part of the
+field, or a prefix of it
+@param[in]	len		length of buf, in bytes
+@param[in]	page_size	BLOB page size
+@param[in]	space_id	space id of the first BLOB page
+@param[in]	page_no		page number of the first BLOB page
+@param[in]	offset		offset on the first BLOB page
+@return number of bytes written to buf */
 static
 ulint
 btr_copy_externally_stored_field_prefix_low(
-/*========================================*/
-	byte*		buf,	/*!< out: the externally stored part of
-				the field, or a prefix of it */
-	ulint		len,	/*!< in: length of buf, in bytes */
-	ulint		zip_size,/*!< in: nonzero=compressed BLOB page size,
-				zero for uncompressed BLOBs */
-	ulint		space_id,/*!< in: space id of the first BLOB page */
-	ulint		page_no,/*!< in: page number of the first BLOB page */
-	ulint		offset,	/*!< in: offset on the first BLOB page */
-	trx_t*		trx)	/*!< in: transaction handle */
+	byte*			buf,
+	ulint			len,
+	const page_size_t&	page_size,
+	ulint			space_id,
+	ulint			page_no,
+	ulint			offset)
 {
-	if (UNIV_UNLIKELY(len == 0)) {
+	if (len == 0) {
 		return(0);
 	}
 
-	if (zip_size) {
-		return(btr_copy_zblob_prefix(buf, len, zip_size,
+	if (page_size.is_compressed()) {
+		return(btr_copy_zblob_prefix(buf, len, page_size,
 					     space_id, page_no, offset));
 	} else {
+		ut_ad(page_size.equals_to(univ_page_size));
 		return(btr_copy_blob_prefix(buf, len, space_id,
-					    page_no, offset, trx));
+					    page_no, offset));
 	}
 }
 
-/*******************************************************************//**
-Copies the prefix of an externally stored field of a record.  The
-clustered index record must be protected by a lock or a page latch.
+/** Copies the prefix of an externally stored field of a record.
+The clustered index record must be protected by a lock or a page latch.
+@param[out]	buf		the field, or a prefix of it
+@param[in]	len		length of buf, in bytes
+@param[in]	page_size	BLOB page size
+@param[in]	data		'internally' stored part of the field
+containing also the reference to the external part; must be protected by
+a lock or a page latch
+@param[in]	local_len	length of data, in bytes
 @return the length of the copied field, or 0 if the column was being
 or has been deleted */
-UNIV_INTERN
 ulint
 btr_copy_externally_stored_field_prefix(
-/*====================================*/
-	byte*		buf,	/*!< out: the field, or a prefix of it */
-	ulint		len,	/*!< in: length of buf, in bytes */
-	ulint		zip_size,/*!< in: nonzero=compressed BLOB page size,
-				zero for uncompressed BLOBs */
-	const byte*	data,	/*!< in: 'internally' stored part of the
-				field containing also the reference to
-				the external part; must be protected by
-				a lock or a page latch */
-	ulint		local_len,/*!< in: length of data, in bytes */
-	trx_t*		trx)	/*!< in: transaction handle */
+	byte*			buf,
+	ulint			len,
+	const page_size_t&	page_size,
+	const byte*		data,
+	ulint			local_len)
 {
 	ulint	space_id;
 	ulint	page_no;
@@ -5719,29 +8006,28 @@ btr_copy_externally_stored_field_prefix(
 	return(local_len
 	       + btr_copy_externally_stored_field_prefix_low(buf + local_len,
 							     len - local_len,
-							     zip_size,
+							     page_size,
 							     space_id, page_no,
-							     offset, trx));
+							     offset));
 }
 
-/*******************************************************************//**
-Copies an externally stored field of a record to mem heap.  The
-clustered index record must be protected by a lock or a page latch.
-@return	the whole field copied to heap */
-UNIV_INTERN
+/** Copies an externally stored field of a record to mem heap.
+The clustered index record must be protected by a lock or a page latch.
+@param[out]	len		length of the whole field
+@param[in]	data		'internally' stored part of the field
+containing also the reference to the external part; must be protected by
+a lock or a page latch
+@param[in]	page_size	BLOB page size
+@param[in]	local_len	length of data
+@param[in,out]	heap		mem heap
+@return the whole field copied to heap */
 byte*
 btr_copy_externally_stored_field(
-/*=============================*/
-	ulint*		len,	/*!< out: length of the whole field */
-	const byte*	data,	/*!< in: 'internally' stored part of the
-				field containing also the reference to
-				the external part; must be protected by
-				a lock or a page latch */
-	ulint		zip_size,/*!< in: nonzero=compressed BLOB page size,
-				zero for uncompressed BLOBs */
-	ulint		local_len,/*!< in: length of data */
-	mem_heap_t*	heap,	/*!< in: mem heap */
-	trx_t*		trx)	/*!< in: transaction handle */
+	ulint*			len,
+	const byte*		data,
+	const page_size_t&	page_size,
+	ulint			local_len,
+	mem_heap_t*		heap)
 {
 	ulint	space_id;
 	ulint	page_no;
@@ -5770,30 +8056,30 @@ btr_copy_externally_stored_field(
 	*len = local_len
 		+ btr_copy_externally_stored_field_prefix_low(buf + local_len,
 							      extern_len,
-							      zip_size,
+							      page_size,
 							      space_id,
-							      page_no, offset,
-							      trx);
+							      page_no, offset);
 
 	return(buf);
 }
 
-/*******************************************************************//**
-Copies an externally stored field of a record to mem heap.
-@return	the field copied to heap, or NULL if the field is incomplete */
-UNIV_INTERN
+/** Copies an externally stored field of a record to mem heap.
+@param[in]	rec		record in a clustered index; must be
+protected by a lock or a page latch
+@param[in]	offset		array returned by rec_get_offsets()
+@param[in]	page_size	BLOB page size
+@param[in]	no		field number
+@param[out]	len		length of the field
+@param[in,out]	heap		mem heap
+@return the field copied to heap, or NULL if the field is incomplete */
 byte*
 btr_rec_copy_externally_stored_field(
-/*=================================*/
-	const rec_t*	rec,	/*!< in: record in a clustered index;
-				must be protected by a lock or a page latch */
-	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
-	ulint		zip_size,/*!< in: nonzero=compressed BLOB page size,
-				zero for uncompressed BLOBs */
-	ulint		no,	/*!< in: field number */
-	ulint*		len,	/*!< out: length of the field */
-	mem_heap_t*	heap,	/*!< in: mem heap */
-	trx_t*		trx)	/*!< in: transaction handle */
+	const rec_t*		rec,
+	const ulint*		offsets,
+	const page_size_t&	page_size,
+	ulint			no,
+	ulint*			len,
+	mem_heap_t*		heap)
 {
 	ulint		local_len;
 	const byte*	data;
@@ -5824,7 +8110,6 @@ btr_rec_copy_externally_stored_field(
 	}
 
 	return(btr_copy_externally_stored_field(len, data,
-						zip_size, local_len, heap,
-						trx));
+						page_size, local_len, heap));
 }
 #endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/btr/btr0defragment.cc b/storage/innobase/btr/btr0defragment.cc
index 4d9eab8f2bd..a62351f2954 100644
--- a/storage/innobase/btr/btr0defragment.cc
+++ b/storage/innobase/btr/btr0defragment.cc
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
 Copyright (C) 2013, 2014 Facebook, Inc. All Rights Reserved.
-Copyright (C) 2014, 2015, MariaDB Corporation. All Rights Reserved.
+Copyright (C) 2014, 2016, MariaDB Corporation. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -27,11 +27,13 @@ Modified 30/07/2014 Jan Lindström jan.lindstrom@mariadb.com
 
 #include "btr0defragment.h"
 #ifndef UNIV_HOTBACKUP
+#include "btr0btr.h"
 #include "btr0cur.h"
 #include "btr0sea.h"
 #include "btr0pcur.h"
 #include "dict0stats.h"
 #include "dict0stats_bg.h"
+#include "dict0defrag_bg.h"
 #include "ibuf0ibuf.h"
 #include "lock0lock.h"
 #include "srv0start.h"
@@ -152,8 +154,7 @@ btr_defragment_init()
 {
 	srv_defragment_interval = ut_microseconds_to_timer(
 		(ulonglong) (1000000.0 / srv_defragment_frequency));
-	mutex_create(btr_defragment_mutex_key, &btr_defragment_mutex,
-		     SYNC_ANY_LATCH);
+	mutex_create(LATCH_ID_BTR_DEFRAGMENT_MUTEX, &btr_defragment_mutex);
 	os_thread_create(btr_defragment_thread, NULL, NULL);
 }
 
@@ -163,7 +164,7 @@ void
 btr_defragment_shutdown()
 {
 	mutex_enter(&btr_defragment_mutex);
-	list< btr_defragment_item_t* >::iterator iter = btr_defragment_wq.begin();
+	std::list< btr_defragment_item_t* >::iterator iter = btr_defragment_wq.begin();
 	while(iter != btr_defragment_wq.end()) {
 		btr_defragment_item_t* item = *iter;
 		iter = btr_defragment_wq.erase(iter);
@@ -185,7 +186,7 @@ btr_defragment_find_index(
 	dict_index_t*	index)	/*!< Index to find. */
 {
 	mutex_enter(&btr_defragment_mutex);
-	for (list< btr_defragment_item_t* >::iterator iter = btr_defragment_wq.begin();
+	for (std::list< btr_defragment_item_t* >::iterator iter = btr_defragment_wq.begin();
 	     iter != btr_defragment_wq.end();
 	     ++iter) {
 		btr_defragment_item_t* item = *iter;
@@ -213,14 +214,14 @@ btr_defragment_add_index(
 	dberr_t*	err)	/*!< out: error code */
 {
 	mtr_t mtr;
-	ulint space = dict_index_get_space(index);
-	ulint zip_size = dict_table_zip_size(index->table);
 	ulint page_no = dict_index_get_page(index);
 	*err = DB_SUCCESS;
 
 	mtr_start(&mtr);
 	// Load index rood page.
-	buf_block_t* block = btr_block_get(space, zip_size, page_no, RW_NO_LATCH, index, &mtr);
+	const page_id_t page_id(dict_index_get_space(index), page_no);
+	const page_size_t page_size(dict_table_page_size(index->table));
+	buf_block_t* block = btr_block_get(page_id, page_size, RW_NO_LATCH, index, &mtr);
 	page_t* page = NULL;
 
 	if (block) {
@@ -241,7 +242,7 @@ btr_defragment_add_index(
 	btr_pcur_t* pcur = btr_pcur_create_for_mysql();
 	os_event_t event = NULL;
 	if (!async) {
-		event = os_event_create();
+		event = os_event_create(0);
 	}
 	btr_pcur_open_at_index_side(true, index, BTR_SEARCH_LEAF, pcur,
 				    true, 0, &mtr);
@@ -265,7 +266,7 @@ btr_defragment_remove_table(
 	dict_table_t*	table)	/*!< Index to be removed. */
 {
 	mutex_enter(&btr_defragment_mutex);
-	for (list< btr_defragment_item_t* >::iterator iter = btr_defragment_wq.begin();
+	for (std::list< btr_defragment_item_t* >::iterator iter = btr_defragment_wq.begin();
 	     iter != btr_defragment_wq.end();
 	     ++iter) {
 		btr_defragment_item_t* item = *iter;
@@ -287,7 +288,7 @@ btr_defragment_remove_index(
 	dict_index_t*	index)	/*!< Index to be removed. */
 {
 	mutex_enter(&btr_defragment_mutex);
-	for (list< btr_defragment_item_t* >::iterator iter = btr_defragment_wq.begin();
+	for (std::list< btr_defragment_item_t* >::iterator iter = btr_defragment_wq.begin();
 	     iter != btr_defragment_wq.end();
 	     ++iter) {
 		btr_defragment_item_t* item = *iter;
@@ -316,7 +317,7 @@ btr_defragment_remove_item(
 	btr_defragment_item_t*	item) /*!< Item to be removed. */
 {
 	mutex_enter(&btr_defragment_mutex);
-	for (list< btr_defragment_item_t* >::iterator iter = btr_defragment_wq.begin();
+	for (std::list< btr_defragment_item_t* >::iterator iter = btr_defragment_wq.begin();
 	     iter != btr_defragment_wq.end();
 	     ++iter) {
 		if (item == *iter) {
@@ -345,7 +346,7 @@ btr_defragment_get_item()
 		//return nullptr;
 	}
 	mutex_enter(&btr_defragment_mutex);
-	list< btr_defragment_item_t* >::iterator iter = btr_defragment_wq.begin();
+	std::list< btr_defragment_item_t* >::iterator iter = btr_defragment_wq.begin();
 	if (iter == btr_defragment_wq.end()) {
 		iter = btr_defragment_wq.begin();
 	}
@@ -425,7 +426,7 @@ btr_defragment_merge_pages(
 	dict_index_t*	index,		/*!< in: index tree */
 	buf_block_t*	from_block,	/*!< in: origin of merge */
 	buf_block_t*	to_block,	/*!< in: destination of merge */
-	ulint		zip_size,	/*!< in: zip size of the block */
+	const page_size_t	page_size,	/*!< in: page size of the block */
 	ulint		reserved_space,	/*!< in: space reserved for future
 					insert to avoid immediate page split */
 	ulint*		max_data_size,	/*!< in/out: max data size to
@@ -454,7 +455,7 @@ btr_defragment_merge_pages(
 
 	// Estimate how many records can be moved from the from_page to
 	// the to_page.
-	if (zip_size) {
+	if (page_size.is_compressed()) {
 		ulint page_diff = UNIV_PAGE_SIZE - *max_data_size;
 		max_ins_size_to_use = (max_ins_size_to_use > page_diff)
 			       ? max_ins_size_to_use - page_diff : 0;
@@ -523,7 +524,7 @@ btr_defragment_merge_pages(
 	// Set ibuf free bits if necessary.
 	if (!dict_index_is_clust(index)
 	    && page_is_leaf(to_page)) {
-		if (zip_size) {
+		if (page_size.is_compressed()) {
 			ibuf_reset_free_bits(to_block);
 		} else {
 			ibuf_update_free_bits_if_full(
@@ -538,11 +539,10 @@ btr_defragment_merge_pages(
 		lock_update_merge_left(to_block, orig_pred,
 				       from_block);
 		btr_search_drop_page_hash_index(from_block);
-		btr_level_list_remove(space, zip_size, from_page,
-				      index, mtr);
+		btr_level_list_remove(space, page_size, (page_t*)from_page, index, mtr);
 		btr_node_ptr_delete(index, from_block, mtr);
-		btr_blob_dbg_remove(from_page, index,
-				    "btr_defragment_n_pages");
+		/* btr_blob_dbg_remove(from_page, index,
+		"btr_defragment_n_pages"); */
 		btr_page_free(index, from_block, mtr);
 	} else {
 		// There are still records left on the page, so
@@ -591,7 +591,6 @@ btr_defragment_n_pages(
 	mtr_t*		mtr)	/*!< in/out: mini-transaction */
 {
 	ulint		space;
-	ulint		zip_size;
 	/* We will need to load the n+1 block because if the last page is freed
 	and we need to modify the prev_page_no of that block. */
 	buf_block_t*	blocks[BTR_DEFRAGMENT_MAX_N_PAGES + 1];
@@ -612,8 +611,6 @@ btr_defragment_n_pages(
 	/* It doesn't make sense to call this function with n_pages = 1. */
 	ut_ad(n_pages > 1);
 
-	ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
-				MTR_MEMO_X_LOCK));
 	space = dict_index_get_space(index);
 	if (space == 0) {
 		/* Ignore space 0. */
@@ -624,9 +621,9 @@ btr_defragment_n_pages(
 		n_pages = BTR_DEFRAGMENT_MAX_N_PAGES;
 	}
 
-	zip_size = dict_table_zip_size(index->table);
 	first_page = buf_block_get_frame(block);
 	level = btr_page_get_level(first_page, mtr);
+	const page_size_t page_size(dict_table_page_size(index->table));
 
 	if (level != 0) {
 		return NULL;
@@ -644,7 +641,10 @@ btr_defragment_n_pages(
 			end_of_index = TRUE;
 			break;
 		}
-		blocks[i] = btr_block_get(space, zip_size, page_no,
+
+		const page_id_t page_id(dict_index_get_space(index), page_no);
+
+		blocks[i] = btr_block_get(page_id, page_size,
 					  RW_X_LATCH, index, mtr);
 	}
 
@@ -670,7 +670,7 @@ btr_defragment_n_pages(
 	optimal_page_size = page_get_free_space_of_empty(
 		page_is_comp(first_page));
 	// For compressed pages, we take compression failures into account.
-	if (zip_size) {
+	if (page_size.is_compressed()) {
 		ulint size = 0;
 		int i = 0;
 		// We estimate the optimal data size of the index use samples of
@@ -687,12 +687,12 @@ btr_defragment_n_pages(
 		}
 		if (i != 0) {
 			size = size / i;
-			optimal_page_size = min(optimal_page_size, size);
+			optimal_page_size = ut_min(optimal_page_size, size);
 		}
 		max_data_size = optimal_page_size;
 	}
 
-	reserved_space = min((ulint)(optimal_page_size
+	reserved_space = ut_min((ulint)(optimal_page_size
 			      * (1 - srv_defragment_fill_factor)),
 			     (data_size_per_rec
 			      * srv_defragment_fill_factor_n_recs));
@@ -713,7 +713,7 @@ btr_defragment_n_pages(
 	// Start from the second page.
 	for (uint i = 1; i < n_pages; i ++) {
 		buf_block_t* new_block = btr_defragment_merge_pages(
-			index, blocks[i], current_block, zip_size,
+			index, blocks[i], current_block, page_size,
 			reserved_space, &max_data_size, heap, mtr);
 		if (new_block != current_block) {
 			n_defragmented ++;
@@ -799,6 +799,8 @@ DECLARE_THREAD(btr_defragment_thread)(
 		cursor = btr_pcur_get_btr_cur(pcur);
 		index = btr_cur_get_index(cursor);
 		first_block = btr_cur_get_block(cursor);
+		mtr.set_named_space(index->space);
+
 		last_block = btr_defragment_n_pages(first_block, index,
 						    srv_defragment_n_pages,
 						    &mtr);
@@ -817,16 +819,32 @@ DECLARE_THREAD(btr_defragment_thread)(
 			/* Update the last_processed time of this index. */
 			item->last_processed = now;
 		} else {
+			dberr_t err = DB_SUCCESS;
 			mtr_commit(&mtr);
 			/* Reaching the end of the index. */
 			dict_stats_empty_defrag_stats(index);
-			dict_stats_save_defrag_stats(index);
-			dict_stats_save_defrag_summary(index);
+			err = dict_stats_save_defrag_stats(index);
+			if (err != DB_SUCCESS) {
+				ib::error() << "Saving defragmentation stats for table "
+					    << index->table->name.m_name
+					    << " index " << index->name()
+					    << " failed with error " << err;
+			} else {
+				err = dict_stats_save_defrag_summary(index);
+
+				if (err != DB_SUCCESS) {
+					ib::error() << "Saving defragmentation summary for table "
+					    << index->table->name.m_name
+					    << " index " << index->name()
+					    << " failed with error " << err;
+				}
+			}
+
 			btr_defragment_remove_item(item);
 		}
 	}
 	btr_defragment_shutdown();
-	os_thread_exit(NULL);
+	os_thread_exit();
 	OS_THREAD_DUMMY_RETURN;
 }
 
diff --git a/storage/innobase/btr/btr0pcur.cc b/storage/innobase/btr/btr0pcur.cc
index 01d2e1bb8e2..a5da1b9fb0c 100644
--- a/storage/innobase/btr/btr0pcur.cc
+++ b/storage/innobase/btr/btr0pcur.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -35,43 +35,38 @@ Created 2/23/1996 Heikki Tuuri
 
 /**************************************************************//**
 Allocates memory for a persistent cursor object and initializes the cursor.
-@return	own: persistent cursor */
-UNIV_INTERN
+@return own: persistent cursor */
 btr_pcur_t*
 btr_pcur_create_for_mysql(void)
 /*============================*/
 {
 	btr_pcur_t*	pcur;
+	DBUG_ENTER("btr_pcur_create_for_mysql");
 
-	pcur = (btr_pcur_t*) mem_alloc(sizeof(btr_pcur_t));
+	pcur = (btr_pcur_t*) ut_malloc_nokey(sizeof(btr_pcur_t));
 
 	pcur->btr_cur.index = NULL;
 	btr_pcur_init(pcur);
 
-	return(pcur);
+	DBUG_PRINT("btr_pcur_create_for_mysql", ("pcur: %p", pcur));
+	DBUG_RETURN(pcur);
 }
 
 /**************************************************************//**
 Resets a persistent cursor object, freeing ::old_rec_buf if it is
 allocated and resetting the other members to their initial values. */
-UNIV_INTERN
 void
 btr_pcur_reset(
 /*===========*/
 	btr_pcur_t*	cursor)	/*!< in, out: persistent cursor */
 {
-	if (cursor->old_rec_buf != NULL) {
-
-		mem_free(cursor->old_rec_buf);
-
-		cursor->old_rec_buf = NULL;
-	}
-
+	btr_pcur_free(cursor);
+	cursor->old_rec_buf = NULL;
 	cursor->btr_cur.index = NULL;
 	cursor->btr_cur.page_cur.rec = NULL;
 	cursor->old_rec = NULL;
 	cursor->old_n_fields = 0;
-	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+	cursor->old_stored = false;
 
 	cursor->latch_mode = BTR_NO_LATCHES;
 	cursor->pos_state = BTR_PCUR_NOT_POSITIONED;
@@ -79,14 +74,17 @@ btr_pcur_reset(
 
 /**************************************************************//**
 Frees the memory for a persistent cursor object. */
-UNIV_INTERN
 void
 btr_pcur_free_for_mysql(
 /*====================*/
 	btr_pcur_t*	cursor)	/*!< in, own: persistent cursor */
 {
-	btr_pcur_reset(cursor);
-	mem_free(cursor);
+	DBUG_ENTER("btr_pcur_free_for_mysql");
+	DBUG_PRINT("btr_pcur_free_for_mysql", ("pcur: %p", cursor));
+
+	btr_pcur_free(cursor);
+	ut_free(cursor);
+	DBUG_VOID_RETURN;
 }
 
 /**************************************************************//**
@@ -96,7 +94,6 @@ cursor data structure, or just setting a flag if the cursor id before the
 first in an EMPTY tree, or after the last in an EMPTY tree. NOTE that the
 page where the cursor is positioned must not be empty if the index tree is
 not totally empty! */
-UNIV_INTERN
 void
 btr_pcur_store_position(
 /*====================*/
@@ -122,8 +119,23 @@ btr_pcur_store_position(
 	page = page_align(rec);
 	offs = page_offset(rec);
 
-	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_S_FIX)
-	      || mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+#ifdef UNIV_DEBUG
+	if (dict_index_is_spatial(index)) {
+		/* For spatial index, when we do positioning on parent
+		buffer if necessary, it might not hold latches, but the
+		tree must be locked to prevent change on the page */
+		ut_ad((mtr_memo_contains_flagged(
+				mtr, dict_index_get_lock(index),
+				MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK)
+		       || mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_S_FIX)
+		       || mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX))
+		      && (block->page.buf_fix_count > 0));
+	} else {
+		ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_S_FIX)
+		      || mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX)
+		      || dict_table_is_intrinsic(index->table));
+	}
+#endif /* UNIV_DEBUG */
 
 	if (page_is_empty(page)) {
 		/* It must be an empty index tree; NOTE that in this case
@@ -135,7 +147,7 @@ btr_pcur_store_position(
 		ut_ad(page_is_leaf(page));
 		ut_ad(page_get_page_no(page) == index->page);
 
-		cursor->old_stored = BTR_PCUR_OLD_STORED;
+		cursor->old_stored = true;
 
 		if (page_rec_is_supremum_low(offs)) {
 
@@ -162,18 +174,20 @@ btr_pcur_store_position(
 		cursor->rel_pos = BTR_PCUR_ON;
 	}
 
-	cursor->old_stored = BTR_PCUR_OLD_STORED;
+	cursor->old_stored = true;
 	cursor->old_rec = dict_index_copy_rec_order_prefix(
 		index, rec, &cursor->old_n_fields,
 		&cursor->old_rec_buf, &cursor->buf_size);
 
 	cursor->block_when_stored = block;
+
+	/* Function try to check if block is S/X latch. */
 	cursor->modify_clock = buf_block_get_modify_clock(block);
+	cursor->withdraw_clock = buf_withdraw_clock;
 }
 
 /**************************************************************//**
 Copies the stored position of a pcur to another pcur. */
-UNIV_INTERN
 void
 btr_pcur_copy_stored_position(
 /*==========================*/
@@ -182,16 +196,13 @@ btr_pcur_copy_stored_position(
 	btr_pcur_t*	pcur_donate)	/*!< in: pcur from which the info is
 					copied */
 {
-	if (pcur_receive->old_rec_buf) {
-		mem_free(pcur_receive->old_rec_buf);
-	}
-
+	ut_free(pcur_receive->old_rec_buf);
 	ut_memcpy(pcur_receive, pcur_donate, sizeof(btr_pcur_t));
 
 	if (pcur_donate->old_rec_buf) {
 
 		pcur_receive->old_rec_buf = (byte*)
-			mem_alloc(pcur_donate->buf_size);
+			ut_malloc_nokey(pcur_donate->buf_size);
 
 		ut_memcpy(pcur_receive->old_rec_buf, pcur_donate->old_rec_buf,
 			  pcur_donate->buf_size);
@@ -217,7 +228,6 @@ restores to before first or after the last in the tree.
 @return TRUE if the cursor position was stored when it was on a user
 record and it can be restored on a user record whose ordering fields
 are identical to the ones of the original user record */
-UNIV_INTERN
 ibool
 btr_pcur_restore_position_func(
 /*===========================*/
@@ -229,13 +239,12 @@ btr_pcur_restore_position_func(
 {
 	dict_index_t*	index;
 	dtuple_t*	tuple;
-	ulint		mode;
-	ulint		old_mode;
+	page_cur_mode_t	mode;
+	page_cur_mode_t	old_mode;
 	mem_heap_t*	heap;
 
-	ut_ad(mtr);
-	ut_ad(mtr->state == MTR_ACTIVE);
-	ut_ad(cursor->old_stored == BTR_PCUR_OLD_STORED);
+	ut_ad(mtr->is_active());
+	//ut_ad(cursor->old_stored);
 	ut_ad(cursor->pos_state == BTR_PCUR_WAS_POSITIONED
 	      || cursor->pos_state == BTR_PCUR_IS_POSITIONED);
 
@@ -244,16 +253,27 @@ btr_pcur_restore_position_func(
 	if (UNIV_UNLIKELY
 	    (cursor->rel_pos == BTR_PCUR_AFTER_LAST_IN_TREE
 	     || cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE)) {
+		dberr_t err = DB_SUCCESS;
 
 		/* In these cases we do not try an optimistic restoration,
 		but always do a search */
 
-		btr_cur_open_at_index_side(
+		err = btr_cur_open_at_index_side(
 			cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE,
 			index, latch_mode,
 			btr_pcur_get_btr_cur(cursor), 0, mtr);
 
-		cursor->latch_mode = latch_mode;
+		if (err != DB_SUCCESS) {
+			ib::warn() << " Error code: " << err
+				   << " btr_pcur_restore_position_func "
+				   << " called from file: "
+				   << file << " line: " << line
+				   << " table: " << index->table->name
+				   << " index: " << index->name;
+		}
+
+		cursor->latch_mode =
+			BTR_LATCH_MODE_WITHOUT_INTENTION(latch_mode);
 		cursor->pos_state = BTR_PCUR_IS_POSITIONED;
 		cursor->block_when_stored = btr_pcur_get_block(cursor);
 
@@ -263,14 +283,21 @@ btr_pcur_restore_position_func(
 	ut_a(cursor->old_rec);
 	ut_a(cursor->old_n_fields);
 
-	if (UNIV_LIKELY(latch_mode == BTR_SEARCH_LEAF)
-	    || UNIV_LIKELY(latch_mode == BTR_MODIFY_LEAF)) {
+	/* Optimistic latching involves S/X latch not required for
+	intrinsic table instead we would prefer to search fresh. */
+	if ((latch_mode == BTR_SEARCH_LEAF
+	     || latch_mode == BTR_MODIFY_LEAF
+	     || latch_mode == BTR_SEARCH_PREV
+	     || latch_mode == BTR_MODIFY_PREV)
+            && !dict_table_is_intrinsic(cursor->btr_cur.index->table)) {
 		/* Try optimistic restoration. */
 
-		if (buf_page_optimistic_get(latch_mode,
-					    cursor->block_when_stored,
-					    cursor->modify_clock,
-					    file, line, mtr)) {
+		if (!buf_pool_is_obsolete(cursor->withdraw_clock)
+		    && btr_cur_optimistic_latch_leaves(
+			cursor->block_when_stored, cursor->modify_clock,
+			&latch_mode, btr_pcur_get_btr_cur(cursor),
+			file, line, mtr)) {
+
 			cursor->pos_state = BTR_PCUR_IS_POSITIONED;
 			cursor->latch_mode = latch_mode;
 
@@ -334,7 +361,7 @@ btr_pcur_restore_position_func(
 		break;
 	default:
 		ut_error;
-		mode = 0;
+		mode = PAGE_CUR_UNSUPP;
 	}
 
 	btr_pcur_open_with_no_init_func(index, tuple, mode, latch_mode,
@@ -343,39 +370,28 @@ btr_pcur_restore_position_func(
 	/* Restore the old search mode */
 	cursor->search_mode = old_mode;
 
-	switch (cursor->rel_pos) {
-	case BTR_PCUR_ON:
-		if (btr_pcur_is_on_user_rec(cursor)
-		    && !cmp_dtuple_rec(
-			    tuple, btr_pcur_get_rec(cursor),
-			    rec_get_offsets(btr_pcur_get_rec(cursor),
-					    index, NULL,
-					    ULINT_UNDEFINED, &heap))) {
+	ut_ad(cursor->rel_pos == BTR_PCUR_ON
+	      || cursor->rel_pos == BTR_PCUR_BEFORE
+	      || cursor->rel_pos == BTR_PCUR_AFTER);
+	if (cursor->rel_pos == BTR_PCUR_ON
+	    && btr_pcur_is_on_user_rec(cursor)
+	    && !cmp_dtuple_rec(tuple, btr_pcur_get_rec(cursor),
+			       rec_get_offsets(btr_pcur_get_rec(cursor),
+			       index, NULL, ULINT_UNDEFINED, &heap))) {
 
-			/* We have to store the NEW value for
-			the modify clock, since the cursor can
-			now be on a different page! But we can
-			retain the value of old_rec */
+		/* We have to store the NEW value for the modify clock,
+		since the cursor can now be on a different page!
+		But we can retain the value of old_rec */
 
-			cursor->block_when_stored =
-				btr_pcur_get_block(cursor);
-			cursor->modify_clock =
-				buf_block_get_modify_clock(
-					cursor->block_when_stored);
-			cursor->old_stored = BTR_PCUR_OLD_STORED;
+		cursor->block_when_stored = btr_pcur_get_block(cursor);
+		cursor->modify_clock = buf_block_get_modify_clock(
+						cursor->block_when_stored);
+		cursor->old_stored = true;
+		cursor->withdraw_clock = buf_withdraw_clock;
 
-			mem_heap_free(heap);
+		mem_heap_free(heap);
 
-			return(TRUE);
-		}
-#ifdef UNIV_DEBUG
-		/* fall through */
-	case BTR_PCUR_BEFORE:
-	case BTR_PCUR_AFTER:
-		break;
-	default:
-		ut_error;
-#endif /* UNIV_DEBUG */
+		return(TRUE);
 	}
 
 	mem_heap_free(heap);
@@ -394,7 +410,6 @@ Moves the persistent cursor to the first record on the next page. Releases the
 latch on the current page, and bufferunfixes it. Note that there must not be
 modifications on the current page, as then the x-latch can be released only in
 mtr_commit. */
-UNIV_INTERN
 void
 btr_pcur_move_to_next_page(
 /*=======================*/
@@ -403,42 +418,57 @@ btr_pcur_move_to_next_page(
 	mtr_t*		mtr)	/*!< in: mtr */
 {
 	ulint		next_page_no;
-	ulint		space;
-	ulint		zip_size;
 	page_t*		page;
 	buf_block_t*	next_block;
 	page_t*		next_page;
+	ulint		mode;
+	dict_table_t*	table = btr_pcur_get_btr_cur(cursor)->index->table;
 
 	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
 	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
 	ut_ad(btr_pcur_is_after_last_on_page(cursor));
 
-	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+	cursor->old_stored = false;
 
 	page = btr_pcur_get_page(cursor);
 	next_page_no = btr_page_get_next(page, mtr);
-	space = buf_block_get_space(btr_pcur_get_block(cursor));
-	zip_size = buf_block_get_zip_size(btr_pcur_get_block(cursor));
 
 	ut_ad(next_page_no != FIL_NULL);
 
-	next_block = btr_block_get(space, zip_size, next_page_no,
-				   cursor->latch_mode,
-				   btr_pcur_get_btr_cur(cursor)->index, mtr);
+	mode = cursor->latch_mode;
+	switch (mode) {
+	case BTR_SEARCH_TREE:
+		mode = BTR_SEARCH_LEAF;
+		break;
+	case BTR_MODIFY_TREE:
+		mode = BTR_MODIFY_LEAF;
+	}
+
+	/* For intrinsic tables we avoid taking any latches as table is
+	accessed by only one thread at any given time. */
+	if (dict_table_is_intrinsic(table)) {
+		mode = BTR_NO_LATCHES;
+	}
+
+	buf_block_t*	block = btr_pcur_get_block(cursor);
+
+	next_block = btr_block_get(
+		page_id_t(block->page.id.space(), next_page_no),
+		block->page.size, mode,
+		btr_pcur_get_btr_cur(cursor)->index, mtr);
+
 	next_page = buf_block_get_frame(next_block);
 #ifdef UNIV_BTR_DEBUG
 	ut_a(page_is_comp(next_page) == page_is_comp(page));
 	ut_a(btr_page_get_prev(next_page, mtr)
-	     == buf_block_get_page_no(btr_pcur_get_block(cursor)));
+	     == btr_pcur_get_block(cursor)->page.id.page_no());
 #endif /* UNIV_BTR_DEBUG */
-	next_block->check_index_page_at_flush = TRUE;
 
-	btr_leaf_page_release(btr_pcur_get_block(cursor),
-			      cursor->latch_mode, mtr);
+	btr_leaf_page_release(btr_pcur_get_block(cursor), mode, mtr);
 
 	page_cur_set_before_first(next_block, btr_pcur_get_page_cur(cursor));
 
-	page_check_dir(next_page);
+	ut_d(page_check_dir(next_page));
 }
 
 /*********************************************************//**
@@ -450,7 +480,6 @@ alphabetical position of the cursor is guaranteed to be sensible on
 return, but it may happen that the cursor is not positioned on the last
 record of any page, because the structure of the tree may have changed
 during the time when the cursor had no latches. */
-UNIV_INTERN
 void
 btr_pcur_move_backward_from_page(
 /*=============================*/
@@ -486,7 +515,7 @@ btr_pcur_move_backward_from_page(
 
 	mtr_commit(mtr);
 
-	mtr_start_trx(mtr, mtr->trx);
+	mtr_start(mtr);
 
 	btr_pcur_restore_position(latch_mode2, cursor, mtr);
 
@@ -494,37 +523,42 @@ btr_pcur_move_backward_from_page(
 
 	prev_page_no = btr_page_get_prev(page, mtr);
 
-	if (prev_page_no == FIL_NULL) {
-	} else if (btr_pcur_is_before_first_on_page(cursor)) {
+	/* For intrinsic table we don't do optimistic restore and so there is
+	no left block that is pinned that needs to be released. */
+	if (!dict_table_is_intrinsic(
+		btr_cur_get_index(btr_pcur_get_btr_cur(cursor))->table)) {
 
-		prev_block = btr_pcur_get_btr_cur(cursor)->left_block;
+		if (prev_page_no == FIL_NULL) {
+		} else if (btr_pcur_is_before_first_on_page(cursor)) {
 
-		btr_leaf_page_release(btr_pcur_get_block(cursor),
-				      latch_mode, mtr);
+			prev_block = btr_pcur_get_btr_cur(cursor)->left_block;
 
-		page_cur_set_after_last(prev_block,
+			btr_leaf_page_release(btr_pcur_get_block(cursor),
+					latch_mode, mtr);
+
+			page_cur_set_after_last(prev_block,
 					btr_pcur_get_page_cur(cursor));
-	} else {
+		} else {
 
-		/* The repositioned cursor did not end on an infimum record on
-		a page. Cursor repositioning acquired a latch also on the
-		previous page, but we do not need the latch: release it. */
+			/* The repositioned cursor did not end on an infimum
+			record on a page. Cursor repositioning acquired a latch
+			also on the previous page, but we do not need the latch:
+			release it. */
 
-		prev_block = btr_pcur_get_btr_cur(cursor)->left_block;
+			prev_block = btr_pcur_get_btr_cur(cursor)->left_block;
 
-		btr_leaf_page_release(prev_block, latch_mode, mtr);
+			btr_leaf_page_release(prev_block, latch_mode, mtr);
+		}
 	}
 
 	cursor->latch_mode = latch_mode;
-
-	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+	cursor->old_stored = false;
 }
 
 /*********************************************************//**
 Moves the persistent cursor to the previous record in the tree. If no records
 are left, the cursor stays 'before first in tree'.
-@return	TRUE if the cursor was not before first in tree */
-UNIV_INTERN
+@return TRUE if the cursor was not before first in tree */
 ibool
 btr_pcur_move_to_prev(
 /*==================*/
@@ -535,7 +569,7 @@ btr_pcur_move_to_prev(
 	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
 	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
 
-	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+	cursor->old_stored = false;
 
 	if (btr_pcur_is_before_first_on_page(cursor)) {
 
@@ -561,13 +595,12 @@ PAGE_CUR_LE, on the last user record. If no such user record exists, then
 in the first case sets the cursor after last in tree, and in the latter case
 before first in tree. The latching mode must be BTR_SEARCH_LEAF or
 BTR_MODIFY_LEAF. */
-UNIV_INTERN
 void
 btr_pcur_open_on_user_rec_func(
 /*===========================*/
 	dict_index_t*	index,		/*!< in: index */
 	const dtuple_t*	tuple,		/*!< in: tuple on which search done */
-	ulint		mode,		/*!< in: PAGE_CUR_L, ... */
+	page_cur_mode_t	mode,		/*!< in: PAGE_CUR_L, ... */
 	ulint		latch_mode,	/*!< in: BTR_SEARCH_LEAF or
 					BTR_MODIFY_LEAF */
 	btr_pcur_t*	cursor,		/*!< in: memory buffer for persistent
diff --git a/storage/innobase/btr/btr0scrub.cc b/storage/innobase/btr/btr0scrub.cc
index e6acb7802f1..8ed0117b36e 100644
--- a/storage/innobase/btr/btr0scrub.cc
+++ b/storage/innobase/btr/btr0scrub.cc
@@ -77,6 +77,7 @@ static
 void
 log_scrub_failure(
 /*===============*/
+	dict_index_t* index,     /*!< in: index */
 	btr_scrub_t* scrub_data, /*!< in: data to store statistics on */
 	buf_block_t* block,	 /*!< in: block */
 	dberr_t err)             /*!< in: error */
@@ -100,10 +101,16 @@ log_scrub_failure(
 		reason = "unknown";
 		scrub_data->scrub_stat.page_split_failures_unknown++;
 	}
+
+	buf_frame_t* buf = buf_block_get_frame(block);
+	const ulint	space_id = mach_read_from_4(buf + FIL_PAGE_SPACE_ID);
+	const ulint	page_no = mach_read_from_4(buf + FIL_PAGE_OFFSET);
 	fprintf(stderr,
-		"InnoDB: Warning: Failed to scrub page %lu in space %lu : %s\n",
-		buf_block_get_page_no(block),
-		buf_block_get_space(block),
+		"InnoDB: Warning: Failed to scrub index %s table %s page %lu in space %lu : %s\n",
+		index->name(),
+		index->table->name.m_name,
+		page_no,
+		space_id,
 		reason);
 }
 
@@ -117,7 +124,7 @@ btr_scrub_lock_dict_func(ulint space, bool lock_to_close_table,
 	uint start = time(0);
 	uint last = start;
 
-	while (mutex_enter_nowait_func(&(dict_sys->mutex), file, line)) {
+	while (mutex_enter_nowait(&(dict_sys->mutex))) {
 		/* if we lock to close a table, we wait forever
 		* if we don't lock to close a table, we check if space
 		* is closing, and then instead give up
@@ -344,16 +351,7 @@ btr_optimistic_scrub(
 	    page_get_n_recs(buf_block_get_frame(block)) > 2 &&
 	    (rand() % 100) < test_pessimistic_scrub_pct) {
 
-		fprintf(stderr,
-			"scrub: simulate btr_page_reorganize failed %lu:%lu "
-			" table: %llu:%s index: %llu:%s get_n_recs(): %lu\n",
-			buf_block_get_space(block),
-			buf_block_get_page_no(block),
-			(ulonglong)scrub_data->current_table->id,
-			scrub_data->current_table->name,
-			(ulonglong)scrub_data->current_index->id,
-			scrub_data->current_index->name,
-			page_get_n_recs(buf_block_get_frame(block)));
+		log_scrub_failure(index, scrub_data, block, DB_OVERFLOW);
 		return DB_OVERFLOW;
 	}
 #endif
@@ -392,11 +390,12 @@ btr_pessimistic_scrub(
 	mtr_t* mtr)              /*!< in: mtr */
 {
 	page_t*	page = buf_block_get_frame(block);
+
 	if (page_get_n_recs(page) < 2) {
 		/**
 		* There is no way we can split a page with < 2 records
 		*/
-		log_scrub_failure(scrub_data, block, DB_UNDERFLOW);
+		log_scrub_failure(index, scrub_data, block, DB_UNDERFLOW);
 		return DB_UNDERFLOW;
 	}
 
@@ -407,17 +406,19 @@ btr_pessimistic_scrub(
 	ulint n_reserved = 0;
 	if (!fsp_reserve_free_extents(&n_reserved, index->space,
 				      n_extents, FSP_NORMAL, mtr)) {
-		log_scrub_failure(scrub_data, block,
+		log_scrub_failure(index, scrub_data, block,
 				  DB_OUT_OF_FILE_SPACE);
 		return DB_OUT_OF_FILE_SPACE;
 	}
 
 	/* read block variables */
-	ulint space = buf_block_get_space(block);
-	ulint page_no = buf_block_get_page_no(block);
-	ulint zip_size = buf_block_get_zip_size(block);
-	ulint left_page_no = btr_page_get_prev(page, mtr);
-	ulint right_page_no = btr_page_get_next(page, mtr);
+	const ulint page_no =  mach_read_from_4(page + FIL_PAGE_OFFSET);
+	const page_id_t page_id(dict_index_get_space(index), page_no);
+	const ulint left_page_no = btr_page_get_prev(page, mtr);
+	const ulint right_page_no = btr_page_get_next(page, mtr);
+	const page_id_t lpage_id(dict_index_get_space(index), left_page_no);
+	const page_id_t rpage_id(dict_index_get_space(index), right_page_no);
+	const page_size_t page_size(dict_table_page_size(index->table));
 
 	/**
 	* When splitting page, we need X-latches on left/right brothers
@@ -430,19 +431,17 @@ btr_pessimistic_scrub(
 		* and re-lock. We still have x-lock on index
 		* so this should be safe
 		*/
-		mtr_release_buf_page_at_savepoint(mtr, scrub_data->savepoint,
-						  block);
+		mtr->release_block_at_savepoint(scrub_data->savepoint, block);
 
-		buf_block_t* get_block = btr_block_get(
-			space, zip_size, left_page_no,
+		buf_block_t* get_block __attribute__((unused)) = btr_block_get(
+			lpage_id, page_size,
 			RW_X_LATCH, index, mtr);
-		get_block->check_index_page_at_flush = TRUE;
 
 		/**
 		* Refetch block and re-initialize page
 		*/
 		block = btr_block_get(
-			space, zip_size, page_no,
+			page_id, page_size,
 			RW_X_LATCH, index, mtr);
 
 		page = buf_block_get_frame(block);
@@ -455,10 +454,9 @@ btr_pessimistic_scrub(
 	}
 
 	if (right_page_no != FIL_NULL) {
-		buf_block_t* get_block = btr_block_get(
-			space, zip_size, right_page_no,
+		buf_block_t* get_block __attribute__((unused))= btr_block_get(
+			rpage_id, page_size,
 			RW_X_LATCH, index, mtr);
-		get_block->check_index_page_at_flush = TRUE;
 	}
 
 	/* arguments to btr_page_split_and_insert */
@@ -478,7 +476,7 @@ btr_pessimistic_scrub(
 	/**
 	* call split page with NULL as argument for entry to insert
 	*/
-	if (dict_index_get_page(index) == buf_block_get_page_no(block)) {
+	if (dict_index_get_page(index) == page_no) {
 		/* The page is the root page
 		* NOTE: ibuf_reset_free_bits is called inside
 		* btr_root_raise_and_insert */
@@ -659,8 +657,9 @@ btr_scrub_free_page(
 				FIL_PAGE_TYPE_ALLOCATED);
 	}
 
-	ulint compact = 1;
-	page_create(block, mtr, compact);
+	page_create(block, mtr,
+		    dict_table_is_comp(scrub_data->current_table),
+		    dict_index_is_spatial(scrub_data->current_index));
 
 	mtr_commit(mtr);
 
@@ -828,11 +827,13 @@ btr_scrub_start_space(
 	ulint space,             /*!< in: space */
 	btr_scrub_t* scrub_data) /*!< in/out: scrub data */
 {
+	bool found;
 	scrub_data->space = space;
 	scrub_data->current_table = NULL;
 	scrub_data->current_index = NULL;
+	const page_size_t page_size = fil_space_get_page_size(space, &found);
 
-	scrub_data->compressed = fil_space_get_zip_size(space) > 0;
+	scrub_data->compressed = page_size.is_compressed();
 	scrub_data->scrubbing = check_scrub_setting(scrub_data);
 	return scrub_data->scrubbing;
 }
@@ -891,8 +892,7 @@ UNIV_INTERN
 void
 btr_scrub_init()
 {
-	mutex_create(scrub_stat_mutex_key,
-		     &scrub_stat_mutex, SYNC_NO_ORDER_CHECK);
+	mutex_create(LATCH_ID_SCRUB_STAT_MUTEX, &scrub_stat_mutex);
 
 	memset(&scrub_stat, 0, sizeof(scrub_stat));
 }
@@ -905,3 +905,4 @@ btr_scrub_cleanup()
 {
 	mutex_free(&scrub_stat_mutex);
 }
+
diff --git a/storage/innobase/btr/btr0sea.cc b/storage/innobase/btr/btr0sea.cc
index d1263969ce9..4489775d46c 100644
--- a/storage/innobase/btr/btr0sea.cc
+++ b/storage/innobase/btr/btr0sea.cc
@@ -33,7 +33,7 @@ Created 2/17/1996 Heikki Tuuri
 #include "btr0sea.h"
 #ifdef UNIV_NONINL
 #include "btr0sea.ic"
-#endif
+#endif /* UNIV_NOINL */
 
 #include "buf0buf.h"
 #include "page0page.h"
@@ -42,40 +42,42 @@ Created 2/17/1996 Heikki Tuuri
 #include "btr0pcur.h"
 #include "btr0btr.h"
 #include "ha0ha.h"
+#include "srv0mon.h"
+#include "sync0sync.h"
 
-/** Flag: has the search system been enabled?
-Protected by btr_search_latch. */
-UNIV_INTERN char		btr_search_enabled	= TRUE;
+/** Is search system enabled.
+Search system is protected by array of latches. */
+char		btr_search_enabled	= true;
 
-/** A dummy variable to fool the compiler */
-UNIV_INTERN ulint		btr_search_this_is_zero = 0;
+/** Number of adaptive hash index partition. */
+ulong		btr_ahi_parts		= 8;
+
+#ifdef UNIV_SEARCH_PERF_STAT
+/** Number of successful adaptive hash index lookups */
+ulint		btr_search_n_succ	= 0;
+/** Number of failed adaptive hash index lookups */
+ulint		btr_search_n_hash_fail	= 0;
+#endif /* UNIV_SEARCH_PERF_STAT */
 
 /** padding to prevent other memory update
 hotspots from residing on the same memory
-cache line as btr_search_latch */
+cache line as btr_search_latches */
 UNIV_INTERN byte		btr_sea_pad1[CACHE_LINE_SIZE];
 
-/** The latch protecting the adaptive search system: this latch protects the
+/** The latches protecting the adaptive search system: this latches protects the
 (1) positions of records on those pages where a hash index has been built.
 NOTE: It does not protect values of non-ordering fields within a record from
 being updated in-place! We can use fact (1) to perform unique searches to
-indexes. */
-
-/* We will allocate the latch from dynamic memory to get it to the
+indexes. We will allocate the latches from dynamic memory to get it to the
 same DRAM page as other hotspot semaphores */
-UNIV_INTERN rw_lock_t*		btr_search_latch_temp;
+rw_lock_t**	btr_search_latches;
 
 /** padding to prevent other memory update hotspots from residing on
 the same memory cache line */
 UNIV_INTERN byte		btr_sea_pad2[CACHE_LINE_SIZE];
 
 /** The adaptive hash index */
-UNIV_INTERN btr_search_sys_t*	btr_search_sys;
-
-#ifdef UNIV_PFS_RWLOCK
-/* Key to register btr_search_sys with performance schema */
-UNIV_INTERN mysql_pfs_key_t	btr_search_latch_key;
-#endif /* UNIV_PFS_RWLOCK */
+btr_search_sys_t*	btr_search_sys;
 
 /** If the number of records on the page divided by this parameter
 would have been successfully accessed using a hash index, the index
@@ -86,6 +88,30 @@ is then built on the page, assuming the global limit has been reached */
 before hash index building is started */
 #define BTR_SEARCH_BUILD_LIMIT		100
 
+/** Determine the number of accessed key fields.
+@param[in]	n_fields	number of complete fields
+@param[in]	n_bytes		number of bytes in an incomplete last field
+@return	number of complete or incomplete fields */
+inline MY_ATTRIBUTE((warn_unused_result))
+ulint
+btr_search_get_n_fields(
+	ulint	n_fields,
+	ulint	n_bytes)
+{
+	return(n_fields + (n_bytes > 0 ? 1 : 0));
+}
+
+/** Determine the number of accessed key fields.
+@param[in]	cursor		b-tree cursor
+@return	number of complete or incomplete fields */
+inline MY_ATTRIBUTE((warn_unused_result))
+ulint
+btr_search_get_n_fields(
+	const btr_cur_t*	cursor)
+{
+	return(btr_search_get_n_fields(cursor->n_fields, cursor->n_bytes));
+}
+
 /********************************************************************//**
 Builds a hash index on a page with the given parameters. If the page already
 has a hash index with different parameters, the old hash index is removed.
@@ -103,8 +129,7 @@ btr_search_build_page_hash_index(
 				field */
 	ibool		left_side);/*!< in: hash for searches from left side? */
 
-/*****************************************************************//**
-This function should be called before reserving any btr search mutex, if
+/** This function should be called before reserving any btr search mutex, if
 the intended operation might add nodes to the search system hash table.
 Because of the latching order, once we have reserved the btr search system
 latch, we cannot allocate a free frame from the buffer pool. Checks that
@@ -112,21 +137,19 @@ there is a free buffer frame allocated for hash table heap in the btr search
 system. If not, allocates a free frames for the heap. This check makes it
 probable that, when have reserved the btr search system latch and we need to
 allocate a new node to the hash table, it will succeed. However, the check
-will not guarantee success. */
+will not guarantee success.
+@param[in]	index	index handler */
 static
 void
-btr_search_check_free_space_in_heap(void)
-/*=====================================*/
+btr_search_check_free_space_in_heap(dict_index_t* index)
 {
 	hash_table_t*	table;
 	mem_heap_t*	heap;
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
-	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(!rw_lock_own(btr_get_search_latch(index), RW_LOCK_S));
+	ut_ad(!rw_lock_own(btr_get_search_latch(index), RW_LOCK_X));
 
-	table = btr_search_sys->hash_index;
+	table = btr_get_search_table(index);
 
 	heap = table->heap;
 
@@ -137,96 +160,174 @@ btr_search_check_free_space_in_heap(void)
 	if (heap->free_block == NULL) {
 		buf_block_t*	block = buf_block_alloc(NULL);
 
-		rw_lock_x_lock(&btr_search_latch);
+		btr_search_x_lock(index);
 
-		if (heap->free_block == NULL) {
+		if (btr_search_enabled
+		    && heap->free_block == NULL) {
 			heap->free_block = block;
 		} else {
 			buf_block_free(block);
 		}
 
-		rw_lock_x_unlock(&btr_search_latch);
+		btr_search_x_unlock(index);
 	}
 }
 
-/*****************************************************************//**
-Creates and initializes the adaptive search system at a database start. */
-UNIV_INTERN
+/** Creates and initializes the adaptive search system at a database start.
+@param[in]	hash_size	hash table size. */
 void
-btr_search_sys_create(
-/*==================*/
-	ulint	hash_size)	/*!< in: hash index hash table size */
+btr_search_sys_create(ulint hash_size)
 {
-	/* We allocate the search latch from dynamic memory:
-	see above at the global variable definition */
+	/* Search System is divided into n parts.
+	Each part controls access to distinct set of hash buckets from
+	hash table through its own latch. */
 
-	btr_search_latch_temp = (rw_lock_t*) mem_alloc(sizeof(rw_lock_t));
+	/* Step-1: Allocate latches (1 per part). */
+	btr_search_latches = reinterpret_cast<rw_lock_t**>(
+		ut_malloc(sizeof(rw_lock_t*) * btr_ahi_parts, mem_key_ahi));
 
-	rw_lock_create(btr_search_latch_key, &btr_search_latch,
-		       SYNC_SEARCH_SYS);
+	for (ulint i = 0; i < btr_ahi_parts; ++i) {
 
-	btr_search_sys = (btr_search_sys_t*)
-		mem_alloc(sizeof(btr_search_sys_t));
+		btr_search_latches[i] = reinterpret_cast<rw_lock_t*>(
+			ut_malloc(sizeof(rw_lock_t), mem_key_ahi));
+
+		rw_lock_create(btr_search_latch_key,
+			       btr_search_latches[i], SYNC_SEARCH_SYS);
+	}
+
+	/* Step-2: Allocate hash tablees. */
+	btr_search_sys = reinterpret_cast<btr_search_sys_t*>(
+		ut_malloc(sizeof(btr_search_sys_t), mem_key_ahi));
+
+	btr_search_sys->hash_tables = reinterpret_cast<hash_table_t**>(
+		ut_malloc(sizeof(hash_table_t*) * btr_ahi_parts, mem_key_ahi));
+
+	for (ulint i = 0; i < btr_ahi_parts; ++i) {
+
+		btr_search_sys->hash_tables[i] =
+			ib_create((hash_size / btr_ahi_parts),
+				  LATCH_ID_HASH_TABLE_MUTEX,
+				  0, MEM_HEAP_FOR_BTR_SEARCH);
 
-	btr_search_sys->hash_index = ha_create(hash_size, 0,
-					MEM_HEAP_FOR_BTR_SEARCH, 0);
 #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
-	btr_search_sys->hash_index->adaptive = TRUE;
+		btr_search_sys->hash_tables[i]->adaptive = TRUE;
 #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
-
+	}
 }
 
-/*****************************************************************//**
-Frees the adaptive search system at a database shutdown. */
-UNIV_INTERN
+/** Resize hash index hash table.
+@param[in]	hash_size	hash index hash table size */
 void
-btr_search_sys_free(void)
-/*=====================*/
+btr_search_sys_resize(ulint hash_size)
 {
-	rw_lock_free(&btr_search_latch);
-	mem_free(btr_search_latch_temp);
-	btr_search_latch_temp = NULL;
-	mem_heap_free(btr_search_sys->hash_index->heap);
-	hash_table_free(btr_search_sys->hash_index);
-	mem_free(btr_search_sys);
-	btr_search_sys = NULL;
+	/* Step-1: Lock all search latches in exclusive mode. */
+	btr_search_x_lock_all();
+
+	if (btr_search_enabled) {
+
+		btr_search_x_unlock_all();
+
+		ib::error() << "btr_search_sys_resize failed because"
+			" hash index hash table is not empty.";
+		ut_ad(0);
+		return;
+	}
+
+	/* Step-2: Recreate hash tables with new size. */
+	for (ulint i = 0; i < btr_ahi_parts; ++i) {
+
+		mem_heap_free(btr_search_sys->hash_tables[i]->heap);
+		hash_table_free(btr_search_sys->hash_tables[i]);
+
+		btr_search_sys->hash_tables[i] =
+			ib_create((hash_size / btr_ahi_parts),
+				  LATCH_ID_HASH_TABLE_MUTEX,
+				  0, MEM_HEAP_FOR_BTR_SEARCH);
+
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+		btr_search_sys->hash_tables[i]->adaptive = TRUE;
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+	}
+
+	/* Step-3: Unlock all search latches from exclusive mode. */
+	btr_search_x_unlock_all();
 }
 
-/********************************************************************//**
-Set index->ref_count = 0 on all indexes of a table. */
+/** Frees the adaptive search system at a database shutdown. */
+void
+btr_search_sys_free()
+{
+	ut_ad(btr_search_sys != NULL && btr_search_latches != NULL);
+
+	/* Step-1: Release the hash tables. */
+	for (ulint i = 0; i < btr_ahi_parts; ++i) {
+
+		mem_heap_free(btr_search_sys->hash_tables[i]->heap);
+		hash_table_free(btr_search_sys->hash_tables[i]);
+
+	}
+
+	ut_free(btr_search_sys->hash_tables);
+	ut_free(btr_search_sys);
+	btr_search_sys = NULL;
+
+	/* Step-2: Release all allocates latches. */
+	for (ulint i = 0; i < btr_ahi_parts; ++i) {
+
+		rw_lock_free(btr_search_latches[i]);
+		ut_free(btr_search_latches[i]);
+	}
+
+	ut_free(btr_search_latches);
+	btr_search_latches = NULL;
+}
+
+/** Set index->ref_count = 0 on all indexes of a table.
+@param[in,out]	table	table handler */
 static
 void
 btr_search_disable_ref_count(
-/*=========================*/
-	dict_table_t*	table)	/*!< in/out: table */
+	dict_table_t*	table)
 {
 	dict_index_t*	index;
 
 	ut_ad(mutex_own(&dict_sys->mutex));
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
 
-	for (index = dict_table_get_first_index(table); index;
+	for (index = dict_table_get_first_index(table);
+	     index != NULL;
 	     index = dict_table_get_next_index(index)) {
 
+		ut_ad(rw_lock_own(btr_get_search_latch(index), RW_LOCK_X));
+
 		index->search_info->ref_count = 0;
 	}
 }
 
-/********************************************************************//**
-Disable the adaptive hash search system and empty the index. */
-UNIV_INTERN
+/** Disable the adaptive hash search system and empty the index.
+@param[in]	need_mutex	need to acquire dict_sys->mutex */
 void
-btr_search_disable(void)
-/*====================*/
+btr_search_disable(
+	bool	need_mutex)
 {
 	dict_table_t*	table;
 
-	mutex_enter(&dict_sys->mutex);
-	rw_lock_x_lock(&btr_search_latch);
+	if (need_mutex) {
+		mutex_enter(&dict_sys->mutex);
+	}
 
-	btr_search_enabled = FALSE;
+	ut_ad(mutex_own(&dict_sys->mutex));
+	btr_search_x_lock_all();
+
+	if (!btr_search_enabled) {
+		if (need_mutex) {
+			mutex_exit(&dict_sys->mutex);
+		}
+
+		btr_search_x_unlock_all();
+		return;
+	}
+
+	btr_search_enabled = false;
 
 	/* Clear the index->search_info->ref_count of every index in
 	the data dictionary cache. */
@@ -242,51 +343,53 @@ btr_search_disable(void)
 		btr_search_disable_ref_count(table);
 	}
 
-	mutex_exit(&dict_sys->mutex);
+	if (need_mutex) {
+		mutex_exit(&dict_sys->mutex);
+	}
 
 	/* Set all block->index = NULL. */
 	buf_pool_clear_hash_index();
 
 	/* Clear the adaptive hash index. */
-	hash_table_clear(btr_search_sys->hash_index);
-	mem_heap_empty(btr_search_sys->hash_index->heap);
+	for (ulint i = 0; i < btr_ahi_parts; ++i) {
+		hash_table_clear(btr_search_sys->hash_tables[i]);
+		mem_heap_empty(btr_search_sys->hash_tables[i]->heap);
+	}
 
-	rw_lock_x_unlock(&btr_search_latch);
+	btr_search_x_unlock_all();
 }
 
-/********************************************************************//**
-Enable the adaptive hash search system. */
-UNIV_INTERN
+/** Enable the adaptive hash search system. */
 void
-btr_search_enable(void)
-/*====================*/
+btr_search_enable()
 {
-	rw_lock_x_lock(&btr_search_latch);
+	buf_pool_mutex_enter_all();
+	if (srv_buf_pool_old_size != srv_buf_pool_size) {
+		buf_pool_mutex_exit_all();
+		return;
+	}
+	buf_pool_mutex_exit_all();
 
-	btr_search_enabled = TRUE;
-
-	rw_lock_x_unlock(&btr_search_latch);
+	btr_search_x_lock_all();
+	btr_search_enabled = true;
+	btr_search_x_unlock_all();
 }
 
-/*****************************************************************//**
-Creates and initializes a search info struct.
-@return	own: search info struct */
-UNIV_INTERN
+/** Creates and initializes a search info struct.
+@param[in]	heap		heap where created.
+@return own: search info struct */
 btr_search_t*
-btr_search_info_create(
-/*===================*/
-	mem_heap_t*	heap)	/*!< in: heap where created */
+btr_search_info_create(mem_heap_t* heap)
 {
 	btr_search_t*	info;
 
 	info = (btr_search_t*) mem_heap_alloc(heap, sizeof(btr_search_t));
 
-#ifdef UNIV_DEBUG
-	info->magic_n = BTR_SEARCH_MAGIC_N;
-#endif /* UNIV_DEBUG */
+	ut_d(info->magic_n = BTR_SEARCH_MAGIC_N);
 
 	info->ref_count = 0;
 	info->root_guess = NULL;
+	info->withdraw_clock = 0;
 
 	info->hash_analysis = 0;
 	info->n_hash_potential = 0;
@@ -309,53 +412,50 @@ btr_search_info_create(
 	return(info);
 }
 
-/*****************************************************************//**
-Returns the value of ref_count. The value is protected by
-btr_search_latch.
-@return	ref_count value. */
-UNIV_INTERN
+/** Returns the value of ref_count. The value is protected by latch.
+@param[in]	info		search info
+@param[in]	index		index identifier
+@return ref_count value. */
 ulint
 btr_search_info_get_ref_count(
-/*==========================*/
-	btr_search_t*   info)	/*!< in: search info. */
+	btr_search_t*	info,
+	dict_index_t*	index)
 {
-	ulint ret;
+	ulint ret = 0;
+
+	if (!btr_search_enabled) {
+		return(ret);
+	}
 
 	ut_ad(info);
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
-	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(!rw_lock_own(btr_get_search_latch(index), RW_LOCK_S));
+	ut_ad(!rw_lock_own(btr_get_search_latch(index), RW_LOCK_X));
 
-	rw_lock_s_lock(&btr_search_latch);
+	btr_search_s_lock(index);
 	ret = info->ref_count;
-	rw_lock_s_unlock(&btr_search_latch);
+	btr_search_s_unlock(index);
 
 	return(ret);
 }
 
-/*********************************************************************//**
-Updates the search info of an index about hash successes. NOTE that info
+/** Updates the search info of an index about hash successes. NOTE that info
 is NOT protected by any semaphore, to save CPU time! Do not assume its fields
-are consistent. */
+are consistent.
+@param[in,out]	info	search info
+@param[in]	cursor	cursor which was just positioned */
 static
 void
 btr_search_info_update_hash(
-/*========================*/
-	btr_search_t*	info,	/*!< in/out: search info */
-	const btr_cur_t* cursor)/*!< in: cursor which was just positioned */
+	btr_search_t*	info,
+	btr_cur_t*	cursor)
 {
-	dict_index_t*	index;
+	dict_index_t*	index = cursor->index;
 	ulint		n_unique;
 	int		cmp;
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
-	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
-	index = cursor->index;
+	ut_ad(!rw_lock_own(btr_get_search_latch(index), RW_LOCK_S));
+	ut_ad(!rw_lock_own(btr_get_search_latch(index), RW_LOCK_X));
 
 	if (dict_index_is_ibuf(index)) {
 		/* So many deletes are performed on an insert buffer tree
@@ -441,7 +541,6 @@ set_new_recomm:
 
 			info->n_fields = n_unique;
 			info->n_bytes = 0;
-
 		} else if (cursor->low_match > cursor->up_match) {
 
 			info->n_fields = cursor->up_match + 1;
@@ -455,27 +554,24 @@ set_new_recomm:
 	}
 }
 
-/*********************************************************************//**
-Updates the block search info on hash successes. NOTE that info and
-block->n_hash_helps, n_fields, n_bytes, side are NOT protected by any
+/** Update the block search info on hash successes. NOTE that info and
+block->n_hash_helps, n_fields, n_bytes, left_side are NOT protected by any
 semaphore, to save CPU time! Do not assume the fields are consistent.
-@return	TRUE if building a (new) hash index on the block is recommended */
+@return TRUE if building a (new) hash index on the block is recommended
+@param[in,out]	info	search info
+@param[in,out]	block	buffer block
+@param[in]	cursor	cursor */
 static
 ibool
 btr_search_update_block_hash_info(
-/*==============================*/
-	btr_search_t*	info,	/*!< in: search info */
-	buf_block_t*	block,	/*!< in: buffer block */
-	btr_cur_t*	cursor MY_ATTRIBUTE((unused)))
-				/*!< in: cursor */
+	btr_search_t*		info,
+	buf_block_t*		block,
+	const btr_cur_t*	cursor)
 {
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
-	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
-	ut_ad(rw_lock_own(&block->lock, RW_LOCK_SHARED)
-	      || rw_lock_own(&block->lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-	ut_ad(cursor);
+	ut_ad(!rw_lock_own(btr_get_search_latch(cursor->index), RW_LOCK_S));
+	ut_ad(!rw_lock_own(btr_get_search_latch(cursor->index), RW_LOCK_X));
+	ut_ad(rw_lock_own(&block->lock, RW_LOCK_S)
+	      || rw_lock_own(&block->lock, RW_LOCK_X));
 
 	info->last_hash_succ = FALSE;
 
@@ -533,32 +629,31 @@ btr_search_update_block_hash_info(
 	return(FALSE);
 }
 
-/*********************************************************************//**
-Updates a hash node reference when it has been unsuccessfully used in a
+/** Updates a hash node reference when it has been unsuccessfully used in a
 search which could have succeeded with the used hash parameters. This can
 happen because when building a hash index for a page, we do not check
 what happens at page boundaries, and therefore there can be misleading
 hash nodes. Also, collisions in the fold value can lead to misleading
 references. This function lazily fixes these imperfections in the hash
-index. */
+index.
+@param[in]	info	search info
+@param[in]	block	buffer block where cursor positioned
+@param[in]	cursor	cursor */
 static
 void
 btr_search_update_hash_ref(
-/*=======================*/
-	btr_search_t*	info,	/*!< in: search info */
-	buf_block_t*	block,	/*!< in: buffer block where cursor positioned */
-	btr_cur_t*	cursor)	/*!< in: cursor */
+	const btr_search_t*	info,
+	buf_block_t*		block,
+	const btr_cur_t*	cursor)
 {
 	dict_index_t*	index;
 	ulint		fold;
 	rec_t*		rec;
 
 	ut_ad(cursor->flag == BTR_CUR_HASH_FAIL);
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
-	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
-	      || rw_lock_own(&(block->lock), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(rw_lock_own(btr_get_search_latch(cursor->index), RW_LOCK_X));
+	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_S)
+	      || rw_lock_own(&(block->lock), RW_LOCK_X));
 	ut_ad(page_align(btr_cur_get_rec(cursor))
 	      == buf_block_get_frame(block));
 
@@ -569,6 +664,7 @@ btr_search_update_hash_ref(
 		return;
 	}
 
+	ut_ad(block->page.id.space() == index->space);
 	ut_a(index == cursor->index);
 	ut_a(!dict_index_is_ibuf(index));
 
@@ -595,35 +691,28 @@ btr_search_update_hash_ref(
 		if (UNIV_LIKELY_NULL(heap)) {
 			mem_heap_free(heap);
 		}
-#ifdef UNIV_SYNC_DEBUG
-		ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+		ut_ad(rw_lock_own(btr_get_search_latch(index), RW_LOCK_X));
 
-		ha_insert_for_fold(btr_search_sys->hash_index, fold,
+		ha_insert_for_fold(btr_get_search_table(index), fold,
 				   block, rec);
 
 		MONITOR_INC(MONITOR_ADAPTIVE_HASH_ROW_ADDED);
 	}
 }
 
-/*********************************************************************//**
-Updates the search info. */
-UNIV_INTERN
+/** Updates the search info.
+@param[in,out]	info	search info
+@param[in]	cursor	cursor which was just positioned */
 void
 btr_search_info_update_slow(
-/*========================*/
-	btr_search_t*	info,	/*!< in/out: search info */
-	btr_cur_t* cursor)      /*!< in: cursor which was just positioned */
+	btr_search_t*	info,
+	btr_cur_t*	cursor)
 {
 	buf_block_t*	block;
 	ibool		build_index;
-	ulint*		params;
-	ulint*		params2;
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
-	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(!rw_lock_own(btr_get_search_latch(cursor->index), RW_LOCK_S));
+	ut_ad(!rw_lock_own(btr_get_search_latch(cursor->index), RW_LOCK_X));
 
 	block = btr_cur_get_block(cursor);
 
@@ -638,74 +727,61 @@ btr_search_info_update_slow(
 
 	if (build_index || (cursor->flag == BTR_CUR_HASH_FAIL)) {
 
-		btr_search_check_free_space_in_heap();
+		btr_search_check_free_space_in_heap(cursor->index);
 	}
 
 	if (cursor->flag == BTR_CUR_HASH_FAIL) {
 		/* Update the hash node reference, if appropriate */
 
-		rw_lock_x_lock(&btr_search_latch);
+#ifdef UNIV_SEARCH_PERF_STAT
+		btr_search_n_hash_fail++;
+#endif /* UNIV_SEARCH_PERF_STAT */
+
+		btr_search_x_lock(cursor->index);
 
 		btr_search_update_hash_ref(info, block, cursor);
 
-		rw_lock_x_unlock(&btr_search_latch);
+		btr_search_x_unlock(cursor->index);
 	}
 
 	if (build_index) {
 		/* Note that since we did not protect block->n_fields etc.
 		with any semaphore, the values can be inconsistent. We have
-		to check inside the function call that they make sense. We
-		also malloc an array and store the values there to make sure
-		the compiler does not let the function call parameters change
-		inside the called function. It might be that the compiler
-		would optimize the call just to pass pointers to block. */
-
-		params = (ulint*) mem_alloc(3 * sizeof(ulint));
-		params[0] = block->n_fields;
-		params[1] = block->n_bytes;
-		params[2] = block->left_side;
-
-		/* Make sure the compiler cannot deduce the values and do
-		optimizations */
-
-		params2 = params + btr_search_this_is_zero;
-
-		btr_search_build_page_hash_index(cursor->index,
-						 block,
-						 params2[0],
-						 params2[1],
-						 params2[2]);
-		mem_free(params);
+		to check inside the function call that they make sense. */
+		btr_search_build_page_hash_index(cursor->index, block,
+						 block->n_fields,
+						 block->n_bytes,
+						 block->left_side);
 	}
 }
 
-/******************************************************************//**
-Checks if a guessed position for a tree cursor is right. Note that if
+/** Checks if a guessed position for a tree cursor is right. Note that if
 mode is PAGE_CUR_LE, which is used in inserts, and the function returns
 TRUE, then cursor->up_match and cursor->low_match both have sensible values.
-@return	TRUE if success */
+@param[in,out]	cursor		guess cursor position
+@param[in]	can_only_compare_to_cursor_rec
+				if we do not have a latch on the page of cursor,
+				but a latch corresponding search system, then
+				ONLY the columns of the record UNDER the cursor
+				are protected, not the next or previous record
+				in the chain: we cannot look at the next or
+				previous record to check our guess!
+@param[in]	tuple		data tuple
+@param[in]	mode		PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G, PAGE_CUR_GE
+@param[in]	mtr		mini transaction
+@return TRUE if success */
 static
 ibool
 btr_search_check_guess(
-/*===================*/
-	btr_cur_t*	cursor,	/*!< in: guessed cursor position */
+	btr_cur_t*	cursor,
 	ibool		can_only_compare_to_cursor_rec,
-				/*!< in: if we do not have a latch on the page
-				of cursor, but only a latch on
-				btr_search_latch, then ONLY the columns
-				of the record UNDER the cursor are
-				protected, not the next or previous record
-				in the chain: we cannot look at the next or
-				previous record to check our guess! */
-	const dtuple_t*	tuple,	/*!< in: data tuple */
-	ulint		mode,	/*!< in: PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G,
-				or PAGE_CUR_GE */
-	mtr_t*		mtr)	/*!< in: mtr */
+	const dtuple_t*	tuple,
+	ulint		mode,
+	mtr_t*		mtr)
 {
 	rec_t*		rec;
 	ulint		n_unique;
 	ulint		match;
-	ulint		bytes;
 	int		cmp;
 	mem_heap_t*	heap		= NULL;
 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
@@ -720,15 +796,13 @@ btr_search_check_guess(
 	ut_ad(page_rec_is_user_rec(rec));
 
 	match = 0;
-	bytes = 0;
 
 	offsets = rec_get_offsets(rec, cursor->index, offsets,
 				  n_unique, &heap);
-	cmp = page_cmp_dtuple_rec_with_match(tuple, rec,
-					     offsets, &match, &bytes);
+	cmp = cmp_dtuple_rec_with_match(tuple, rec, offsets, &match);
 
 	if (mode == PAGE_CUR_GE) {
-		if (cmp == 1) {
+		if (cmp > 0) {
 			goto exit_func;
 		}
 
@@ -739,18 +813,18 @@ btr_search_check_guess(
 			goto exit_func;
 		}
 	} else if (mode == PAGE_CUR_LE) {
-		if (cmp == -1) {
+		if (cmp < 0) {
 			goto exit_func;
 		}
 
 		cursor->low_match = match;
 
 	} else if (mode == PAGE_CUR_G) {
-		if (cmp != -1) {
+		if (cmp >= 0) {
 			goto exit_func;
 		}
 	} else if (mode == PAGE_CUR_L) {
-		if (cmp != 1) {
+		if (cmp <= 0) {
 			goto exit_func;
 		}
 	}
@@ -762,7 +836,6 @@ btr_search_check_guess(
 	}
 
 	match = 0;
-	bytes = 0;
 
 	if ((mode == PAGE_CUR_G) || (mode == PAGE_CUR_GE)) {
 		rec_t*	prev_rec;
@@ -780,12 +853,12 @@ btr_search_check_guess(
 
 		offsets = rec_get_offsets(prev_rec, cursor->index, offsets,
 					  n_unique, &heap);
-		cmp = page_cmp_dtuple_rec_with_match(tuple, prev_rec,
-						     offsets, &match, &bytes);
+		cmp = cmp_dtuple_rec_with_match(
+			tuple, prev_rec, offsets, &match);
 		if (mode == PAGE_CUR_GE) {
-			success = cmp == 1;
+			success = cmp > 0;
 		} else {
-			success = cmp != -1;
+			success = cmp >= 0;
 		}
 
 		goto exit_func;
@@ -809,13 +882,13 @@ btr_search_check_guess(
 
 		offsets = rec_get_offsets(next_rec, cursor->index, offsets,
 					  n_unique, &heap);
-		cmp = page_cmp_dtuple_rec_with_match(tuple, next_rec,
-						     offsets, &match, &bytes);
+		cmp = cmp_dtuple_rec_with_match(
+			tuple, next_rec, offsets, &match);
 		if (mode == PAGE_CUR_LE) {
-			success = cmp == -1;
+			success = cmp < 0;
 			cursor->up_match = match;
 		} else {
-			success = cmp != 1;
+			success = cmp <= 0;
 		}
 	}
 exit_func:
@@ -825,34 +898,53 @@ exit_func:
 	return(success);
 }
 
-/******************************************************************//**
-Tries to guess the right search position based on the hash search info
+static
+void
+btr_search_failure(btr_search_t* info, btr_cur_t* cursor)
+{
+	cursor->flag = BTR_CUR_HASH_FAIL;
+
+#ifdef UNIV_SEARCH_PERF_STAT
+	++info->n_hash_fail;
+
+	if (info->n_hash_succ > 0) {
+		--info->n_hash_succ;
+	}
+#endif /* UNIV_SEARCH_PERF_STAT */
+
+	info->last_hash_succ = FALSE;
+}
+
+/** Tries to guess the right search position based on the hash search info
 of the index. Note that if mode is PAGE_CUR_LE, which is used in inserts,
 and the function returns TRUE, then cursor->up_match and cursor->low_match
 both have sensible values.
-@return	TRUE if succeeded */
-UNIV_INTERN
+@param[in,out]	index		index
+@param[in,out]	info		index search info
+@param[in]	tuple		logical record
+@param[in]	mode		PAGE_CUR_L, ....
+@param[in]	latch_mode	BTR_SEARCH_LEAF, ...;
+				NOTE that only if has_search_latch is 0, we will
+				have a latch set on the cursor page, otherwise
+				we assume the caller uses his search latch
+				to protect the record!
+@param[out]	cursor		tree cursor
+@param[in]	has_search_latch
+				latch mode the caller currently has on
+				search system: RW_S/X_LATCH or 0
+@param[in]	mtr		mini transaction
+@return TRUE if succeeded */
 ibool
 btr_search_guess_on_hash(
-/*=====================*/
-	dict_index_t*	index,		/*!< in: index */
-	btr_search_t*	info,		/*!< in: index search info */
-	const dtuple_t*	tuple,		/*!< in: logical record */
-	ulint		mode,		/*!< in: PAGE_CUR_L, ... */
-	ulint		latch_mode,	/*!< in: BTR_SEARCH_LEAF, ...;
-					NOTE that only if has_search_latch
-					is 0, we will have a latch set on
-					the cursor page, otherwise we assume
-					the caller uses his search latch
-					to protect the record! */
-	btr_cur_t*	cursor,		/*!< out: tree cursor */
-	ulint		has_search_latch,/*!< in: latch mode the caller
-					currently has on btr_search_latch:
-					RW_S_LATCH, RW_X_LATCH, or 0 */
-	mtr_t*		mtr)		/*!< in: mtr */
+	dict_index_t*	index,
+	btr_search_t*	info,
+	const dtuple_t*	tuple,
+	ulint		mode,
+	ulint		latch_mode,
+	btr_cur_t*	cursor,
+	ulint		has_search_latch,
+	mtr_t*		mtr)
 {
-	buf_pool_t*	buf_pool;
-	buf_block_t*	block;
 	const rec_t*	rec;
 	ulint		fold;
 	index_id_t	index_id;
@@ -860,15 +952,23 @@ btr_search_guess_on_hash(
 	btr_cur_t	cursor2;
 	btr_pcur_t	pcur;
 #endif
+
+	if (!btr_search_enabled) {
+		return(FALSE);
+	}
+
 	ut_ad(index && info && tuple && cursor && mtr);
 	ut_ad(!dict_index_is_ibuf(index));
 	ut_ad((latch_mode == BTR_SEARCH_LEAF)
 	      || (latch_mode == BTR_MODIFY_LEAF));
 
+	/* Not supported for spatial index */
+	ut_ad(!dict_index_is_spatial(index));
+
 	/* Note that, for efficiency, the struct info may not be protected by
 	any latch here! */
 
-	if (UNIV_UNLIKELY(info->n_hash_potential == 0)) {
+	if (info->n_hash_potential == 0) {
 
 		return(FALSE);
 	}
@@ -876,8 +976,7 @@ btr_search_guess_on_hash(
 	cursor->n_fields = info->n_fields;
 	cursor->n_bytes = info->n_bytes;
 
-	if (UNIV_UNLIKELY(dtuple_get_n_fields(tuple)
-			  < cursor->n_fields + (cursor->n_bytes > 0))) {
+	if (dtuple_get_n_fields(tuple) < btr_search_get_n_fields(cursor)) {
 
 		return(FALSE);
 	}
@@ -892,49 +991,69 @@ btr_search_guess_on_hash(
 	cursor->fold = fold;
 	cursor->flag = BTR_CUR_HASH;
 
-	if (UNIV_LIKELY(!has_search_latch)) {
-		rw_lock_s_lock(&btr_search_latch);
+	if (!has_search_latch) {
+		btr_search_s_lock(index);
 
-		if (UNIV_UNLIKELY(!btr_search_enabled)) {
-			goto failure_unlock;
+		if (!btr_search_enabled) {
+			btr_search_s_unlock(index);
+
+			btr_search_failure(info, cursor);
+
+			return(FALSE);
 		}
 	}
 
-	ut_ad(rw_lock_get_writer(&btr_search_latch) != RW_LOCK_EX);
-	ut_ad(rw_lock_get_reader_count(&btr_search_latch) > 0);
+	ut_ad(rw_lock_get_writer(btr_get_search_latch(index)) != RW_LOCK_X);
+	ut_ad(rw_lock_get_reader_count(btr_get_search_latch(index)) > 0);
 
-	rec = (rec_t*) ha_search_and_get_data(btr_search_sys->hash_index, fold);
+	rec = (rec_t*) ha_search_and_get_data(
+			btr_get_search_table(index), fold);
 
-	if (UNIV_UNLIKELY(!rec)) {
-		goto failure_unlock;
-	}
+	if (rec == NULL) {
 
-	block = buf_block_align(rec);
-
-	if (UNIV_LIKELY(!has_search_latch)) {
-
-		if (UNIV_UNLIKELY(
-			    !buf_page_get_known_nowait(latch_mode, block,
-						       BUF_MAKE_YOUNG,
-						       __FILE__, __LINE__,
-						       mtr))) {
-			goto failure_unlock;
+		if (!has_search_latch) {
+			btr_search_s_unlock(index);
 		}
 
-		rw_lock_s_unlock(&btr_search_latch);
+		btr_search_failure(info, cursor);
+
+		return(FALSE);
+	}
+
+	buf_block_t*	block = buf_block_from_ahi(rec);
+
+	if (!has_search_latch) {
+
+		if (!buf_page_get_known_nowait(
+			latch_mode, block, BUF_MAKE_YOUNG,
+			__FILE__, __LINE__, mtr)) {
+
+			if (!has_search_latch) {
+				btr_search_s_unlock(index);
+			}
+
+			btr_search_failure(info, cursor);
+
+			return(FALSE);
+		}
+
+		btr_search_s_unlock(index);
 
 		buf_block_dbg_add_level(block, SYNC_TREE_NODE_FROM_HASH);
 	}
 
-	if (UNIV_UNLIKELY(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE)) {
+	if (buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
+
 		ut_ad(buf_block_get_state(block) == BUF_BLOCK_REMOVE_HASH);
 
-		if (UNIV_LIKELY(!has_search_latch)) {
+		if (!has_search_latch) {
 
 			btr_leaf_page_release(block, latch_mode, mtr);
 		}
 
-		goto failure;
+		btr_search_failure(info, cursor);
+
+		return(FALSE);
 	}
 
 	ut_ad(page_rec_is_user_rec(rec));
@@ -943,23 +1062,26 @@ btr_search_guess_on_hash(
 
 	/* Check the validity of the guess within the page */
 
-	/* If we only have the latch on btr_search_latch, not on the
+	/* If we only have the latch on search system, not on the
 	page, it only protects the columns of the record the cursor
 	is positioned on. We cannot look at the next of the previous
 	record to determine if our guess for the cursor position is
 	right. */
-	if (UNIV_UNLIKELY(index_id != btr_page_get_index_id(block->frame))
+	if (index_id != btr_page_get_index_id(block->frame)
 	    || !btr_search_check_guess(cursor,
 				       has_search_latch,
 				       tuple, mode, mtr)) {
-		if (UNIV_LIKELY(!has_search_latch)) {
+
+		if (!has_search_latch) {
 			btr_leaf_page_release(block, latch_mode, mtr);
 		}
 
-		goto failure;
+		btr_search_failure(info, cursor);
+
+		return(FALSE);
 	}
 
-	if (UNIV_LIKELY(info->n_hash_potential < BTR_SEARCH_BUILD_LIMIT + 5)) {
+	if (info->n_hash_potential < BTR_SEARCH_BUILD_LIMIT + 5) {
 
 		info->n_hash_potential++;
 	}
@@ -975,8 +1097,9 @@ btr_search_guess_on_hash(
 
 	btr_leaf_page_release(block, latch_mode, mtr);
 
-	btr_cur_search_to_nth_level(index, 0, tuple, mode, latch_mode,
-				    &cursor2, 0, mtr);
+	btr_cur_search_to_nth_level(
+		index, 0, tuple, mode, latch_mode, &cursor2, 0, mtr);
+
 	if (mode == PAGE_CUR_GE
 	    && page_rec_is_supremum(btr_cur_get_rec(&cursor2))) {
 
@@ -986,8 +1109,9 @@ btr_search_guess_on_hash(
 
 		info->last_hash_succ = FALSE;
 
-		btr_pcur_open_on_user_rec(index, tuple, mode, latch_mode,
-					  &pcur, mtr);
+		btr_pcur_open_on_user_rec(
+			index, tuple, mode, latch_mode, &pcur, mtr);
+
 		ut_ad(btr_pcur_get_rec(&pcur) == btr_cur_get_rec(cursor));
 	} else {
 		ut_ad(btr_cur_get_rec(&cursor2) == btr_cur_get_rec(cursor));
@@ -999,62 +1123,40 @@ btr_search_guess_on_hash(
 #endif
 	info->last_hash_succ = TRUE;
 
-	if (UNIV_LIKELY(!has_search_latch)
-	    && buf_page_peek_if_too_old(&block->page)) {
+#ifdef UNIV_SEARCH_PERF_STAT
+	btr_search_n_succ++;
+#endif
+	if (!has_search_latch && buf_page_peek_if_too_old(&block->page)) {
 
 		buf_page_make_young(&block->page);
 	}
 
 	/* Increment the page get statistics though we did not really
 	fix the page: for user info only */
-	buf_pool = buf_pool_from_bpage(&block->page);
-	buf_pool->stat.n_page_gets++;
+	{
+		buf_pool_t*	buf_pool = buf_pool_from_bpage(&block->page);
+
+		++buf_pool->stat.n_page_gets;
+	}
 
 	return(TRUE);
-
-	/*-------------------------------------------*/
-failure_unlock:
-	if (UNIV_LIKELY(!has_search_latch)) {
-		rw_lock_s_unlock(&btr_search_latch);
-	}
-failure:
-	cursor->flag = BTR_CUR_HASH_FAIL;
-
-#ifdef UNIV_SEARCH_PERF_STAT
-	info->n_hash_fail++;
-
-	if (info->n_hash_succ > 0) {
-		info->n_hash_succ--;
-	}
-#endif
-	info->last_hash_succ = FALSE;
-
-	return(FALSE);
 }
 
-/********************************************************************//**
-Drops a page hash index. */
-UNIV_INTERN
+/** Drop any adaptive hash index entries that point to an index page.
+@param[in,out]	block	block containing index page, s- or x-latched, or an
+			index page for which we know that
+			block->buf_fix_count == 0 or it is an index page which
+			has already been removed from the buf_pool->page_hash
+			i.e.: it is in state BUF_BLOCK_REMOVE_HASH */
 void
-btr_search_drop_page_hash_index(
-/*============================*/
-	buf_block_t*	block)	/*!< in: block containing index page,
-				s- or x-latched, or an index page
-				for which we know that
-				block->buf_fix_count == 0 or it is an
-				index page which has already been
-				removed from the buf_pool->page_hash
-				i.e.: it is in state
-				BUF_BLOCK_REMOVE_HASH */
+btr_search_drop_page_hash_index(buf_block_t* block)
 {
-	hash_table_t*		table;
 	ulint			n_fields;
 	ulint			n_bytes;
 	const page_t*		page;
 	const rec_t*		rec;
 	ulint			fold;
 	ulint			prev_fold;
-	index_id_t		index_id;
 	ulint			n_cached;
 	ulint			n_recs;
 	ulint*			folds;
@@ -1062,31 +1164,60 @@ btr_search_drop_page_hash_index(
 	mem_heap_t*		heap;
 	const dict_index_t*	index;
 	ulint*			offsets;
+	rw_lock_t*		latch;
 	btr_search_t*		info;
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
-	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
-	/* Do a dirty check on block->index, return if the block is
-	not in the adaptive hash index. This is to avoid acquiring
-	shared btr_search_latch for performance consideration. */
-	if (!block->index) {
+	if (!btr_search_enabled) {
 		return;
 	}
 
 retry:
-	rw_lock_s_lock(&btr_search_latch);
+	/* Do a dirty check on block->index, return if the block is
+	not in the adaptive hash index. */
 	index = block->index;
 
-	if (UNIV_LIKELY(!index)) {
-
-		rw_lock_s_unlock(&btr_search_latch);
-
+	if (index == NULL) {
 		return;
 	}
 
+	ut_ad(block->page.buf_fix_count == 0
+	      || buf_block_get_state(block) == BUF_BLOCK_REMOVE_HASH
+	      || rw_lock_own(&block->lock, RW_LOCK_S)
+	      || rw_lock_own(&block->lock, RW_LOCK_X));
+
+	/* We must not dereference index here, because it could be freed
+	if (index->table->n_ref_count == 0 && !mutex_own(&dict_sys->mutex)).
+	Determine the ahi_slot based on the block contents. */
+
+	const index_id_t	index_id
+		= btr_page_get_index_id(block->frame);
+	const ulint		ahi_slot
+		= ut_fold_ulint_pair(static_cast<ulint>(index_id),
+				     static_cast<ulint>(block->page.id.space()))
+		% btr_ahi_parts;
+	latch = btr_search_latches[ahi_slot];
+
+	ut_ad(!btr_search_own_any(RW_LOCK_S));
+	ut_ad(!btr_search_own_any(RW_LOCK_X));
+
+	rw_lock_s_lock(latch);
+
+	if (block->index == NULL) {
+		rw_lock_s_unlock(latch);
+		return;
+	}
+
+	/* The index associated with a block must remain the
+	same, because we are holding block->lock or the block is
+	not accessible by other threads (BUF_BLOCK_REMOVE_HASH),
+	or the index is not accessible to other threads
+	(buf_fix_count == 0 when DROP TABLE or similar is executing
+	buf_LRU_drop_page_hash_for_tablespace()). */
+	ut_a(index == block->index);
+	ut_ad(!index->disable_ahi);
+
+	ut_ad(block->page.id.space() == index->space);
+	ut_a(index_id == index->id);
 	ut_a(!dict_index_is_ibuf(index));
 #ifdef UNIV_DEBUG
 	switch (dict_index_get_online_status(index)) {
@@ -1109,25 +1240,15 @@ retry:
 	}
 #endif /* UNIV_DEBUG */
 
-	table = btr_search_sys->hash_index;
-
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
-	      || rw_lock_own(&(block->lock), RW_LOCK_EX)
-	      || block->page.buf_fix_count == 0
-	      || buf_block_get_state(block) == BUF_BLOCK_REMOVE_HASH);
-#endif /* UNIV_SYNC_DEBUG */
-
 	n_fields = block->curr_n_fields;
 	n_bytes = block->curr_n_bytes;
 
-	/* NOTE: The fields of block must not be accessed after
-	releasing btr_search_latch, as the index page might only
-	be s-latched! */
+	/* NOTE: The AHI fields of block must not be accessed after
+	releasing search latch, as the index page might only be s-latched! */
 
-	rw_lock_s_unlock(&btr_search_latch);
+	rw_lock_s_unlock(latch);
 
-	ut_a(n_fields + n_bytes > 0);
+	ut_a(n_fields > 0 || n_bytes > 0);
 
 	page = block->frame;
 	n_recs = page_get_n_recs(page);
@@ -1135,26 +1256,23 @@ retry:
 	/* Calculate and cache fold values into an array for fast deletion
 	from the hash index */
 
-	folds = (ulint*) mem_alloc(n_recs * sizeof(ulint));
+	folds = (ulint*) ut_malloc_nokey(n_recs * sizeof(ulint));
 
 	n_cached = 0;
 
 	rec = page_get_infimum_rec(page);
 	rec = page_rec_get_next_low(rec, page_is_comp(page));
 
-	index_id = btr_page_get_index_id(page);
-
-	ut_a(index_id == index->id);
-
 	prev_fold = 0;
 
 	heap = NULL;
 	offsets = NULL;
 
 	while (!page_rec_is_supremum(rec)) {
-		offsets = rec_get_offsets(rec, index, offsets,
-					  n_fields + (n_bytes > 0), &heap);
-		ut_a(rec_offs_n_fields(offsets) == n_fields + (n_bytes > 0));
+		offsets = rec_get_offsets(
+			rec, index, offsets,
+			btr_search_get_n_fields(n_fields, n_bytes),
+			&heap);
 		fold = rec_fold(rec, offsets, n_fields, n_bytes, index_id);
 
 		if (fold == prev_fold && prev_fold != 0) {
@@ -1176,7 +1294,7 @@ next_rec:
 		mem_heap_free(heap);
 	}
 
-	rw_lock_x_lock(&btr_search_latch);
+	rw_lock_x_lock(latch);
 
 	if (UNIV_UNLIKELY(!block->index)) {
 		/* Someone else has meanwhile dropped the hash index */
@@ -1186,21 +1304,23 @@ next_rec:
 
 	ut_a(block->index == index);
 
-	if (UNIV_UNLIKELY(block->curr_n_fields != n_fields)
-	    || UNIV_UNLIKELY(block->curr_n_bytes != n_bytes)) {
+	if (block->curr_n_fields != n_fields
+	    || block->curr_n_bytes != n_bytes) {
 
 		/* Someone else has meanwhile built a new hash index on the
 		page, with different parameters */
 
-		rw_lock_x_unlock(&btr_search_latch);
+		rw_lock_x_unlock(latch);
 
-		mem_free(folds);
+		ut_free(folds);
 		goto retry;
 	}
 
 	for (i = 0; i < n_cached; i++) {
 
-		ha_remove_all_nodes_to_page(table, folds[i], page);
+		ha_remove_all_nodes_to_page(
+			btr_search_sys->hash_tables[ahi_slot],
+			folds[i], page);
 	}
 
 	info = btr_search_get_info(block->index);
@@ -1216,40 +1336,39 @@ cleanup:
 #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
 	if (UNIV_UNLIKELY(block->n_pointers)) {
 		/* Corruption */
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			"  InnoDB: Corruption of adaptive hash index."
-			" After dropping\n"
-			"InnoDB: the hash index to a page of %s,"
-			" still %lu hash nodes remain.\n",
-			index->name, (ulong) block->n_pointers);
-		rw_lock_x_unlock(&btr_search_latch);
+		ib::error() << "Corruption of adaptive hash index."
+			<< " After dropping, the hash index to a page of "
+			<< index->name
+			<< ", still " << block->n_pointers
+			<< " hash nodes remain.";
+		rw_lock_x_unlock(latch);
 
 		ut_ad(btr_search_validate());
 	} else {
-		rw_lock_x_unlock(&btr_search_latch);
+		rw_lock_x_unlock(latch);
 	}
 #else /* UNIV_AHI_DEBUG || UNIV_DEBUG */
-	rw_lock_x_unlock(&btr_search_latch);
+	rw_lock_x_unlock(latch);
 #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
 
-	mem_free(folds);
+	ut_free(folds);
 }
 
-/********************************************************************//**
-Drops a possible page hash index when a page is evicted from the buffer pool
-or freed in a file segment. */
-UNIV_INTERN
+/** Drop any adaptive hash index entries that may point to an index
+page that may be in the buffer pool, when a page is evicted from the
+buffer pool or freed in a file segment.
+@param[in]	page_id		page id
+@param[in]	page_size	page size */
 void
 btr_search_drop_page_hash_when_freed(
-/*=================================*/
-	ulint	space,		/*!< in: space id */
-	ulint	zip_size,	/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	ulint	page_no)	/*!< in: page number */
+	const page_id_t&	page_id,
+	const page_size_t&	page_size)
 {
 	buf_block_t*	block;
 	mtr_t		mtr;
+	dberr_t		err = DB_SUCCESS;
+
+	ut_d(export_vars.innodb_ahi_drop_lookups++);
 
 	mtr_start(&mtr);
 
@@ -1259,35 +1378,49 @@ btr_search_drop_page_hash_when_freed(
 	are possibly holding, we cannot s-latch the page, but must
 	(recursively) x-latch it, even though we are only reading. */
 
-	block = buf_page_get_gen(space, zip_size, page_no, RW_X_LATCH, NULL,
+	block = buf_page_get_gen(page_id, page_size, RW_X_LATCH, NULL,
 				 BUF_PEEK_IF_IN_POOL, __FILE__, __LINE__,
-				 &mtr);
+				 &mtr, &err);
 
-	if (block && block->index) {
+	if (block) {
+
+		/* If AHI is still valid, page can't be in free state.
+		AHI is dropped when page is freed. */
+		ut_ad(!block->page.file_page_was_freed);
 
 		buf_block_dbg_add_level(block, SYNC_TREE_NODE_FROM_HASH);
 
-		btr_search_drop_page_hash_index(block);
+		dict_index_t*	index = block->index;
+		if (index != NULL) {
+			/* In all our callers, the table handle should
+			be open, or we should be in the process of
+			dropping the table (preventing eviction). */
+			ut_ad(index->table->n_ref_count > 0
+			      || mutex_own(&dict_sys->mutex));
+			btr_search_drop_page_hash_index(block);
+		}
 	}
 
 	mtr_commit(&mtr);
 }
 
-/********************************************************************//**
-Builds a hash index on a page with the given parameters. If the page already
+/** Build a hash index on a page with the given parameters. If the page already
 has a hash index with different parameters, the old hash index is removed.
 If index is non-NULL, this function checks if n_fields and n_bytes are
-sensible values, and does not build a hash index if not. */
+sensible, and does not build a hash index if not.
+@param[in,out]	index		index for which to build.
+@param[in,out]	block		index page, s-/x- latched.
+@param[in]	n_fields	hash this many full fields
+@param[in]	n_bytes		hash this many bytes of the next field
+@param[in]	left_side	hash for searches from left side */
 static
 void
 btr_search_build_page_hash_index(
-/*=============================*/
-	dict_index_t*	index,	/*!< in: index for which to build */
-	buf_block_t*	block,	/*!< in: index page, s- or x-latched */
-	ulint		n_fields,/*!< in: hash this many full fields */
-	ulint		n_bytes,/*!< in: hash this many bytes from the next
-				field */
-	ibool		left_side)/*!< in: hash for searches from left side? */
+	dict_index_t*	index,
+	buf_block_t*	block,
+	ulint		n_fields,
+	ulint		n_bytes,
+	ibool		left_side)
 {
 	hash_table_t*	table;
 	page_t*		page;
@@ -1303,36 +1436,46 @@ btr_search_build_page_hash_index(
 	mem_heap_t*	heap		= NULL;
 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
 	ulint*		offsets		= offsets_;
-	rec_offs_init(offsets_);
 
-	ut_ad(index);
-	ut_a(!dict_index_is_ibuf(index));
-
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
-	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
-	      || rw_lock_own(&(block->lock), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
-	rw_lock_s_lock(&btr_search_latch);
-
-	if (!btr_search_enabled) {
-		rw_lock_s_unlock(&btr_search_latch);
+	if (index->disable_ahi || !btr_search_enabled) {
 		return;
 	}
 
-	table = btr_search_sys->hash_index;
+	rec_offs_init(offsets_);
+	ut_ad(index);
+	ut_ad(block->page.id.space() == index->space);
+	ut_a(!dict_index_is_ibuf(index));
+
+	ut_ad(!rw_lock_own(btr_get_search_latch(index), RW_LOCK_X));
+	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_S)
+	      || rw_lock_own(&(block->lock), RW_LOCK_X));
+
+	btr_search_s_lock(index);
+
+	table = btr_get_search_table(index);
 	page = buf_block_get_frame(block);
 
 	if (block->index && ((block->curr_n_fields != n_fields)
 			     || (block->curr_n_bytes != n_bytes)
 			     || (block->curr_left_side != left_side))) {
 
-		rw_lock_s_unlock(&btr_search_latch);
+		btr_search_s_unlock(index);
 
 		btr_search_drop_page_hash_index(block);
 	} else {
-		rw_lock_s_unlock(&btr_search_latch);
+		btr_search_s_unlock(index);
+	}
+
+	/* Check that the values for hash index build are sensible */
+
+	if (n_fields == 0 && n_bytes == 0) {
+
+		return;
+	}
+
+	if (dict_index_get_n_unique_in_tree(index)
+	    < btr_search_get_n_fields(n_fields, n_bytes)) {
+		return;
 	}
 
 	n_recs = page_get_n_recs(page);
@@ -1342,24 +1485,11 @@ btr_search_build_page_hash_index(
 		return;
 	}
 
-	/* Check that the values for hash index build are sensible */
-
-	if (n_fields + n_bytes == 0) {
-
-		return;
-	}
-
-	if (dict_index_get_n_unique_in_tree(index) < n_fields
-	    || (dict_index_get_n_unique_in_tree(index) == n_fields
-		&& n_bytes > 0)) {
-		return;
-	}
-
 	/* Calculate and cache fold values and corresponding records into
 	an array for fast insertion to the hash index */
 
-	folds = (ulint*) mem_alloc(n_recs * sizeof(ulint));
-	recs = (rec_t**) mem_alloc(n_recs * sizeof(rec_t*));
+	folds = (ulint*) ut_malloc_nokey(n_recs * sizeof(ulint));
+	recs = (rec_t**) ut_malloc_nokey(n_recs * sizeof(rec_t*));
 
 	n_cached = 0;
 
@@ -1367,16 +1497,12 @@ btr_search_build_page_hash_index(
 
 	rec = page_rec_get_next(page_get_infimum_rec(page));
 
-	offsets = rec_get_offsets(rec, index, offsets,
-				  n_fields + (n_bytes > 0), &heap);
-
-	if (!page_rec_is_supremum(rec)) {
-		ut_a(n_fields <= rec_offs_n_fields(offsets));
-
-		if (n_bytes > 0) {
-			ut_a(n_fields < rec_offs_n_fields(offsets));
-		}
-	}
+	offsets = rec_get_offsets(
+		rec, index, offsets,
+		btr_search_get_n_fields(n_fields, n_bytes),
+		&heap);
+	ut_ad(page_rec_is_supremum(rec)
+	      || n_fields + (n_bytes > 0) == rec_offs_n_fields(offsets));
 
 	fold = rec_fold(rec, offsets, n_fields, n_bytes, index->id);
 
@@ -1402,8 +1528,9 @@ btr_search_build_page_hash_index(
 			break;
 		}
 
-		offsets = rec_get_offsets(next_rec, index, offsets,
-					  n_fields + (n_bytes > 0), &heap);
+		offsets = rec_get_offsets(
+			next_rec, index, offsets,
+			btr_search_get_n_fields(n_fields, n_bytes), &heap);
 		next_fold = rec_fold(next_rec, offsets, n_fields,
 				     n_bytes, index->id);
 
@@ -1426,11 +1553,11 @@ btr_search_build_page_hash_index(
 		fold = next_fold;
 	}
 
-	btr_search_check_free_space_in_heap();
+	btr_search_check_free_space_in_heap(index);
 
-	rw_lock_x_lock(&btr_search_latch);
+	btr_search_x_lock(index);
 
-	if (UNIV_UNLIKELY(!btr_search_enabled)) {
+	if (!btr_search_enabled) {
 		goto exit_func;
 	}
 
@@ -1464,42 +1591,42 @@ btr_search_build_page_hash_index(
 	MONITOR_INC(MONITOR_ADAPTIVE_HASH_PAGE_ADDED);
 	MONITOR_INC_VALUE(MONITOR_ADAPTIVE_HASH_ROW_ADDED, n_cached);
 exit_func:
-	rw_lock_x_unlock(&btr_search_latch);
+	btr_search_x_unlock(index);
 
-	mem_free(folds);
-	mem_free(recs);
+	ut_free(folds);
+	ut_free(recs);
 	if (UNIV_LIKELY_NULL(heap)) {
 		mem_heap_free(heap);
 	}
 }
 
-/********************************************************************//**
-Moves or deletes hash entries for moved records. If new_page is already hashed,
-then the hash index for page, if any, is dropped. If new_page is not hashed,
-and page is hashed, then a new hash index is built to new_page with the same
-parameters as page (this often happens when a page is split). */
-UNIV_INTERN
+/** Moves or deletes hash entries for moved records. If new_page is already
+hashed, then the hash index for page, if any, is dropped. If new_page is not
+hashed, and page is hashed, then a new hash index is built to new_page with the
+same parameters as page (this often happens when a page is split).
+@param[in,out]	new_block	records are copied to this page.
+@param[in,out]	block		index page from which record are copied, and the
+				copied records will be deleted from this page.
+@param[in,out]	index		record descriptor */
 void
 btr_search_move_or_delete_hash_entries(
-/*===================================*/
-	buf_block_t*	new_block,	/*!< in: records are copied
-					to this page */
-	buf_block_t*	block,		/*!< in: index page from which
-					records were copied, and the
-					copied records will be deleted
-					from this page */
-	dict_index_t*	index)		/*!< in: record descriptor */
+	buf_block_t*	new_block,
+	buf_block_t*	block,
+	dict_index_t*	index)
 {
-	ulint	n_fields;
-	ulint	n_bytes;
-	ibool	left_side;
+	/* AHI is disabled for intrinsic table as it depends on index-id
+	which is dynamically assigned for intrinsic table indexes and not
+	through a centralized index generator. */
+	if (index->disable_ahi || !btr_search_enabled) {
+		return;
+	}
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
-	ut_ad(rw_lock_own(&(new_block->lock), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(!dict_table_is_intrinsic(index->table));
 
-	rw_lock_s_lock(&btr_search_latch);
+	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_X));
+	ut_ad(rw_lock_own(&(new_block->lock), RW_LOCK_X));
+
+	btr_search_s_lock(index);
 
 	ut_a(!new_block->index || new_block->index == index);
 	ut_a(!block->index || block->index == index);
@@ -1508,7 +1635,7 @@ btr_search_move_or_delete_hash_entries(
 
 	if (new_block->index) {
 
-		rw_lock_s_unlock(&btr_search_latch);
+		btr_search_s_unlock(index);
 
 		btr_search_drop_page_hash_index(block);
 
@@ -1516,39 +1643,34 @@ btr_search_move_or_delete_hash_entries(
 	}
 
 	if (block->index) {
-
-		n_fields = block->curr_n_fields;
-		n_bytes = block->curr_n_bytes;
-		left_side = block->curr_left_side;
+		ulint	n_fields = block->curr_n_fields;
+		ulint	n_bytes = block->curr_n_bytes;
+		ibool	left_side = block->curr_left_side;
 
 		new_block->n_fields = block->curr_n_fields;
 		new_block->n_bytes = block->curr_n_bytes;
 		new_block->left_side = left_side;
 
-		rw_lock_s_unlock(&btr_search_latch);
+		btr_search_s_unlock(index);
 
-		ut_a(n_fields + n_bytes > 0);
+		ut_a(n_fields > 0 || n_bytes > 0);
 
-		btr_search_build_page_hash_index(index, new_block, n_fields,
-						 n_bytes, left_side);
+		btr_search_build_page_hash_index(
+			index, new_block, n_fields, n_bytes, left_side);
 		ut_ad(n_fields == block->curr_n_fields);
 		ut_ad(n_bytes == block->curr_n_bytes);
 		ut_ad(left_side == block->curr_left_side);
 		return;
 	}
 
-	rw_lock_s_unlock(&btr_search_latch);
+	btr_search_s_unlock(index);
 }
 
-/********************************************************************//**
-Updates the page hash index when a single record is deleted from a page. */
-UNIV_INTERN
+/** Updates the page hash index when a single record is deleted from a page.
+@param[in]	cursor	cursor which was positioned on the record to delete
+			using btr_cur_search_, the record is not yet deleted.*/
 void
-btr_search_update_hash_on_delete(
-/*=============================*/
-	btr_cur_t*	cursor)	/*!< in: cursor which was positioned on the
-				record to delete using btr_cur_search_...,
-				the record is not yet deleted */
+btr_search_update_hash_on_delete(btr_cur_t* cursor)
 {
 	hash_table_t*	table;
 	buf_block_t*	block;
@@ -1559,11 +1681,13 @@ btr_search_update_hash_on_delete(
 	mem_heap_t*	heap		= NULL;
 	rec_offs_init(offsets_);
 
+	if (cursor->index->disable_ahi || !btr_search_enabled) {
+		return;
+	}
+
 	block = btr_cur_get_block(cursor);
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_X));
 
 	index = block->index;
 
@@ -1572,11 +1696,12 @@ btr_search_update_hash_on_delete(
 		return;
 	}
 
+	ut_ad(block->page.id.space() == index->space);
 	ut_a(index == cursor->index);
-	ut_a(block->curr_n_fields + block->curr_n_bytes > 0);
+	ut_a(block->curr_n_fields > 0 || block->curr_n_bytes > 0);
 	ut_a(!dict_index_is_ibuf(index));
 
-	table = btr_search_sys->hash_index;
+	table = btr_get_search_table(index);
 
 	rec = btr_cur_get_rec(cursor);
 
@@ -1587,7 +1712,7 @@ btr_search_update_hash_on_delete(
 		mem_heap_free(heap);
 	}
 
-	rw_lock_x_lock(&btr_search_latch);
+	btr_search_x_lock(index);
 
 	if (block->index) {
 		ut_a(block->index == index);
@@ -1600,32 +1725,30 @@ btr_search_update_hash_on_delete(
 		}
 	}
 
-	rw_lock_x_unlock(&btr_search_latch);
+	btr_search_x_unlock(index);
 }
 
-/********************************************************************//**
-Updates the page hash index when a single record is inserted on a page. */
-UNIV_INTERN
+/** Updates the page hash index when a single record is inserted on a page.
+@param[in]	cursor	cursor which was positioned to the place to insert
+			using btr_cur_search_, and the new record has been
+			inserted next to the cursor. */
 void
-btr_search_update_hash_node_on_insert(
-/*==================================*/
-	btr_cur_t*	cursor)	/*!< in: cursor which was positioned to the
-				place to insert using btr_cur_search_...,
-				and the new record has been inserted next
-				to the cursor */
+btr_search_update_hash_node_on_insert(btr_cur_t* cursor)
 {
 	hash_table_t*	table;
 	buf_block_t*	block;
 	dict_index_t*	index;
 	rec_t*		rec;
 
+	if (cursor->index->disable_ahi || !btr_search_enabled) {
+		return;
+	}
+
 	rec = btr_cur_get_rec(cursor);
 
 	block = btr_cur_get_block(cursor);
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_X));
 
 	index = block->index;
 
@@ -1637,7 +1760,7 @@ btr_search_update_hash_node_on_insert(
 	ut_a(cursor->index == index);
 	ut_a(!dict_index_is_ibuf(index));
 
-	rw_lock_x_lock(&btr_search_latch);
+	btr_search_x_lock(index);
 
 	if (!block->index) {
 
@@ -1651,7 +1774,7 @@ btr_search_update_hash_node_on_insert(
 	    && (cursor->n_bytes == block->curr_n_bytes)
 	    && !block->curr_left_side) {
 
-		table = btr_search_sys->hash_index;
+		table = btr_get_search_table(index);
 
 		if (ha_search_and_update_if_found(
 			table, cursor->fold, rec, block,
@@ -1660,24 +1783,21 @@ btr_search_update_hash_node_on_insert(
 		}
 
 func_exit:
-		rw_lock_x_unlock(&btr_search_latch);
+		btr_search_x_unlock(index);
 	} else {
-		rw_lock_x_unlock(&btr_search_latch);
+		btr_search_x_unlock(index);
 
 		btr_search_update_hash_on_insert(cursor);
 	}
 }
 
-/********************************************************************//**
-Updates the page hash index when a single record is inserted on a page. */
-UNIV_INTERN
-void
-btr_search_update_hash_on_insert(
-/*=============================*/
-	btr_cur_t*	cursor)	/*!< in: cursor which was positioned to the
+/** Updates the page hash index when a single record is inserted on a page.
+@param[in,out]	cursor		cursor which was positioned to the
 				place to insert using btr_cur_search_...,
 				and the new record has been inserted next
 				to the cursor */
+void
+btr_search_update_hash_on_insert(btr_cur_t* cursor)
 {
 	hash_table_t*	table;
 	buf_block_t*	block;
@@ -1697,11 +1817,13 @@ btr_search_update_hash_on_insert(
 	ulint*		offsets		= offsets_;
 	rec_offs_init(offsets_);
 
+	if (cursor->index->disable_ahi || !btr_search_enabled) {
+		return;
+	}
+
 	block = btr_cur_get_block(cursor);
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_X));
 
 	index = block->index;
 
@@ -1710,12 +1832,14 @@ btr_search_update_hash_on_insert(
 		return;
 	}
 
-	btr_search_check_free_space_in_heap();
+	ut_ad(block->page.id.space() == index->space);
+	btr_search_check_free_space_in_heap(index);
 
-	table = btr_search_sys->hash_index;
+	table = btr_get_search_table(index);
 
 	rec = btr_cur_get_rec(cursor);
 
+	ut_a(!index->disable_ahi);
 	ut_a(index == cursor->index);
 	ut_a(!dict_index_is_ibuf(index));
 
@@ -1731,20 +1855,22 @@ btr_search_update_hash_on_insert(
 	ins_fold = rec_fold(ins_rec, offsets, n_fields, n_bytes, index->id);
 
 	if (!page_rec_is_supremum(next_rec)) {
-		offsets = rec_get_offsets(next_rec, index, offsets,
-					  n_fields + (n_bytes > 0), &heap);
+		offsets = rec_get_offsets(
+			next_rec, index, offsets,
+			btr_search_get_n_fields(n_fields, n_bytes), &heap);
 		next_fold = rec_fold(next_rec, offsets, n_fields,
 				     n_bytes, index->id);
 	}
 
 	if (!page_rec_is_infimum(rec)) {
-		offsets = rec_get_offsets(rec, index, offsets,
-					  n_fields + (n_bytes > 0), &heap);
+		offsets = rec_get_offsets(
+			rec, index, offsets,
+			btr_search_get_n_fields(n_fields, n_bytes), &heap);
 		fold = rec_fold(rec, offsets, n_fields, n_bytes, index->id);
 	} else {
 		if (left_side) {
 
-			rw_lock_x_lock(&btr_search_latch);
+			btr_search_x_lock(index);
 
 			locked = TRUE;
 
@@ -1762,7 +1888,7 @@ btr_search_update_hash_on_insert(
 
 		if (!locked) {
 
-			rw_lock_x_lock(&btr_search_latch);
+			btr_search_x_lock(index);
 
 			locked = TRUE;
 
@@ -1784,7 +1910,7 @@ check_next_rec:
 		if (!left_side) {
 
 			if (!locked) {
-				rw_lock_x_lock(&btr_search_latch);
+				btr_search_x_lock(index);
 
 				locked = TRUE;
 
@@ -1803,7 +1929,7 @@ check_next_rec:
 
 		if (!locked) {
 
-			rw_lock_x_lock(&btr_search_latch);
+			btr_search_x_lock(index);
 
 			locked = TRUE;
 
@@ -1813,13 +1939,7 @@ check_next_rec:
 		}
 
 		if (!left_side) {
-
 			ha_insert_for_fold(table, ins_fold, block, ins_rec);
-			/*
-			fputs("Hash insert for ", stderr);
-			dict_index_name_print(stderr, index);
-			fprintf(stderr, " fold %lu\n", ins_fold);
-			*/
 		} else {
 			ha_insert_for_fold(table, next_fold, block, next_rec);
 		}
@@ -1830,21 +1950,20 @@ function_exit:
 		mem_heap_free(heap);
 	}
 	if (locked) {
-		rw_lock_x_unlock(&btr_search_latch);
+		btr_search_x_unlock(index);
 	}
 }
 
 #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
-/********************************************************************//**
-Validates the search system.
-@return	TRUE if ok */
-UNIV_INTERN
+
+/** Validates the search system for given hash table.
+@param[in]	hash_table_id	hash table to validate
+@return TRUE if ok */
+static
 ibool
-btr_search_validate(void)
-/*=====================*/
+btr_search_hash_table_validate(ulint hash_table_id)
 {
 	ha_node_t*	node;
-	ulint		n_page_dumps	= 0;
 	ibool		ok		= TRUE;
 	ulint		i;
 	ulint		cell_count;
@@ -1852,34 +1971,54 @@ btr_search_validate(void)
 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
 	ulint*		offsets		= offsets_;
 
+	if (!btr_search_enabled) {
+		return(TRUE);
+	}
+
 	/* How many cells to check before temporarily releasing
-	btr_search_latch. */
+	search latches. */
 	ulint		chunk_size = 10000;
 
 	rec_offs_init(offsets_);
 
-	rw_lock_x_lock(&btr_search_latch);
+	btr_search_x_lock_all();
 	buf_pool_mutex_enter_all();
 
-	cell_count = hash_get_n_cells(btr_search_sys->hash_index);
+	cell_count = hash_get_n_cells(
+			btr_search_sys->hash_tables[hash_table_id]);
 
 	for (i = 0; i < cell_count; i++) {
-		/* We release btr_search_latch every once in a while to
+		/* We release search latches every once in a while to
 		give other queries a chance to run. */
 		if ((i != 0) && ((i % chunk_size) == 0)) {
+
 			buf_pool_mutex_exit_all();
-			rw_lock_x_unlock(&btr_search_latch);
+			btr_search_x_unlock_all();
+
 			os_thread_yield();
-			rw_lock_x_lock(&btr_search_latch);
+
+			btr_search_x_lock_all();
 			buf_pool_mutex_enter_all();
+
+			ulint	curr_cell_count = hash_get_n_cells(
+				btr_search_sys->hash_tables[hash_table_id]);
+
+			if (cell_count != curr_cell_count) {
+
+				cell_count = curr_cell_count;
+
+				if (i >= cell_count) {
+					break;
+				}
+			}
 		}
 
-		node = (ha_node_t*)
-			hash_get_nth_cell(btr_search_sys->hash_index, i)->node;
+		node = (ha_node_t*) hash_get_nth_cell(
+			btr_search_sys->hash_tables[hash_table_id], i)->node;
 
 		for (; node != NULL; node = node->next) {
 			const buf_block_t*	block
-				= buf_block_align((byte*) node->data);
+				= buf_block_from_ahi((byte*) node->data);
 			const buf_block_t*	hash_block;
 			buf_pool_t*		buf_pool;
 			index_id_t		page_index_id;
@@ -1896,8 +2035,7 @@ btr_search_validate(void)
 				assertion and the comment below) */
 				hash_block = buf_block_hash_get(
 					buf_pool,
-					buf_block_get_space(block),
-					buf_block_get_page_no(block));
+					block->page.id);
 			} else {
 				hash_block = NULL;
 			}
@@ -1913,94 +2051,115 @@ btr_search_validate(void)
 				After that, it invokes
 				btr_search_drop_page_hash_index() to
 				remove the block from
-				btr_search_sys->hash_index. */
+				btr_search_sys->hash_tables[i]. */
 
 				ut_a(buf_block_get_state(block)
 				     == BUF_BLOCK_REMOVE_HASH);
 			}
 
 			ut_a(!dict_index_is_ibuf(block->index));
+			ut_ad(block->page.id.space() == block->index->space);
 
 			page_index_id = btr_page_get_index_id(block->frame);
 
-			offsets = rec_get_offsets(node->data,
-						  block->index, offsets,
-						  block->curr_n_fields
-						  + (block->curr_n_bytes > 0),
-						  &heap);
+			offsets = rec_get_offsets(
+				node->data, block->index, offsets,
+				btr_search_get_n_fields(block->curr_n_fields,
+							block->curr_n_bytes),
+				&heap);
 
-			if (!block->index || node->fold
-			    != rec_fold(node->data,
-					offsets,
-					block->curr_n_fields,
-					block->curr_n_bytes,
-					page_index_id)) {
+			const ulint	fold = rec_fold(
+				node->data, offsets,
+				block->curr_n_fields,
+				block->curr_n_bytes,
+				page_index_id);
+
+			if (node->fold != fold) {
 				const page_t*	page = block->frame;
 
 				ok = FALSE;
-				ut_print_timestamp(stderr);
 
-				fprintf(stderr,
-					"  InnoDB: Error in an adaptive hash"
-					" index pointer to page %lu\n"
-					"InnoDB: ptr mem address %p"
-					" index id %llu,"
-					" node fold %lu, rec fold %lu\n",
-					(ulong) page_get_page_no(page),
-					node->data,
-					(ullint) page_index_id,
-					(ulong) node->fold,
-					(ulong) rec_fold(node->data,
-							 offsets,
-							 block->curr_n_fields,
-							 block->curr_n_bytes,
-							 page_index_id));
+				ib::error() << "Error in an adaptive hash"
+					<< " index pointer to page "
+					<< page_id_t(page_get_space_id(page),
+						     page_get_page_no(page))
+					<< ", ptr mem address "
+					<< reinterpret_cast<const void*>(
+						node->data)
+					<< ", index id " << page_index_id
+					<< ", node fold " << node->fold
+					<< ", rec fold " << fold;
 
 				fputs("InnoDB: Record ", stderr);
 				rec_print_new(stderr, node->data, offsets);
 				fprintf(stderr, "\nInnoDB: on that page."
 					" Page mem address %p, is hashed %p,"
-					" n fields %lu, n bytes %lu\n"
+					" n fields %lu\n"
 					"InnoDB: side %lu\n",
 					(void*) page, (void*) block->index,
 					(ulong) block->curr_n_fields,
-					(ulong) block->curr_n_bytes,
 					(ulong) block->curr_left_side);
-
-				if (n_page_dumps < 20) {
-					buf_page_print(
-						page, 0,
-						BUF_PAGE_PRINT_NO_CRASH);
-					n_page_dumps++;
-				}
+				ut_ad(0);
 			}
 		}
 	}
 
 	for (i = 0; i < cell_count; i += chunk_size) {
-		ulint end_index = ut_min(i + chunk_size - 1, cell_count - 1);
-
-		/* We release btr_search_latch every once in a while to
+		/* We release search latches every once in a while to
 		give other queries a chance to run. */
 		if (i != 0) {
+
 			buf_pool_mutex_exit_all();
-			rw_lock_x_unlock(&btr_search_latch);
+			btr_search_x_unlock_all();
+
 			os_thread_yield();
-			rw_lock_x_lock(&btr_search_latch);
+
+			btr_search_x_lock_all();
 			buf_pool_mutex_enter_all();
+
+			ulint	curr_cell_count = hash_get_n_cells(
+				btr_search_sys->hash_tables[hash_table_id]);
+
+			if (cell_count != curr_cell_count) {
+
+				cell_count = curr_cell_count;
+
+				if (i >= cell_count) {
+					break;
+				}
+			}
 		}
 
-		if (!ha_validate(btr_search_sys->hash_index, i, end_index)) {
+		ulint end_index = ut_min(i + chunk_size - 1, cell_count - 1);
+
+		if (!ha_validate(btr_search_sys->hash_tables[hash_table_id],
+				 i, end_index)) {
 			ok = FALSE;
 		}
 	}
 
 	buf_pool_mutex_exit_all();
-	rw_lock_x_unlock(&btr_search_latch);
+	btr_search_x_unlock_all();
+
 	if (UNIV_LIKELY_NULL(heap)) {
 		mem_heap_free(heap);
 	}
 
 	return(ok);
 }
+
+/** Validate the search system.
+@return true if ok. */
+bool
+btr_search_validate()
+{
+	for (ulint i = 0; i < btr_ahi_parts; ++i) {
+		if (!btr_search_hash_table_validate(i)) {
+			return(false);
+		}
+	}
+
+	return(true);
+}
+
 #endif /* defined UNIV_AHI_DEBUG || defined UNIV_DEBUG */
diff --git a/storage/innobase/buf/buf0buddy.cc b/storage/innobase/buf/buf0buddy.cc
index f2ab73217e0..1d6083a5f77 100644
--- a/storage/innobase/buf/buf0buddy.cc
+++ b/storage/innobase/buf/buf0buddy.cc
@@ -23,12 +23,10 @@ Binary buddy allocator for compressed pages
 Created December 2006 by Marko Makela
 *******************************************************/
 
-#define THIS_MODULE
 #include "buf0buddy.h"
 #ifdef UNIV_NONINL
 # include "buf0buddy.ic"
 #endif
-#undef THIS_MODULE
 #include "buf0buf.h"
 #include "buf0lru.h"
 #include "buf0flu.h"
@@ -71,11 +69,11 @@ are written.*/
 
 /** Value that we stamp on all buffers that are currently on the zip_free
 list. This value is stamped at BUF_BUDDY_STAMP_OFFSET offset */
-#define BUF_BUDDY_STAMP_FREE	(SRV_LOG_SPACE_FIRST_ID)
+#define BUF_BUDDY_STAMP_FREE	 SRV_LOG_SPACE_FIRST_ID
 
 /** Stamp value for non-free buffers. Will be overwritten by a non-zero
 value by the consumer of the block */
-#define BUF_BUDDY_STAMP_NONFREE	(0XFFFFFFFF)
+#define BUF_BUDDY_STAMP_NONFREE	0XFFFFFFFFUL
 
 #if BUF_BUDDY_STAMP_FREE >= BUF_BUDDY_STAMP_NONFREE
 # error "BUF_BUDDY_STAMP_FREE >= BUF_BUDDY_STAMP_NONFREE"
@@ -111,7 +109,7 @@ buf_buddy_mem_invalid(
 
 /**********************************************************************//**
 Check if a buddy is stamped free.
-@return	whether the buddy is free */
+@return whether the buddy is free */
 UNIV_INLINE MY_ATTRIBUTE((warn_unused_result))
 bool
 buf_buddy_stamp_is_free(
@@ -140,7 +138,7 @@ buf_buddy_stamp_free(
 
 /**********************************************************************//**
 Stamps a buddy nonfree.
-@param[in/out]	buf	block to stamp
+@param[in,out]	buf	block to stamp
 @param[in]	i	block size */
 #define buf_buddy_stamp_nonfree(buf, i) do {				\
 	buf_buddy_mem_invalid(buf, i);					\
@@ -152,7 +150,7 @@ Stamps a buddy nonfree.
 
 /**********************************************************************//**
 Get the offset of the buddy of a compressed page frame.
-@return	the buddy relative of page */
+@return the buddy relative of page */
 UNIV_INLINE
 void*
 buf_buddy_get(
@@ -174,23 +172,33 @@ buf_buddy_get(
 	}
 }
 
+#ifdef UNIV_DEBUG
 /** Validate a given zip_free list. */
 struct	CheckZipFree {
-	ulint	i;
-	CheckZipFree(ulint i) : i (i) {}
+	CheckZipFree(ulint i) : m_i(i) {}
 
 	void	operator()(const buf_buddy_free_t* elem) const
 	{
 		ut_a(buf_buddy_stamp_is_free(elem));
-		ut_a(elem->stamp.size <= i);
+		ut_a(elem->stamp.size <= m_i);
 	}
+
+	ulint		m_i;
 };
 
-#define BUF_BUDDY_LIST_VALIDATE(bp, i)				\
-	UT_LIST_VALIDATE(list, buf_buddy_free_t,		\
-			 bp->zip_free[i], CheckZipFree(i))
+/** Validate a buddy list.
+@param[in]	buf_pool	buffer pool instance
+@param[in]	i		buddy size to validate */
+static
+void
+buf_buddy_list_validate(
+	const buf_pool_t*	buf_pool,
+	ulint			i)
+{
+	CheckZipFree	check(i);
+	ut_list_validate(buf_pool->zip_free[i], check);
+}
 
-#ifdef UNIV_DEBUG
 /**********************************************************************//**
 Debug function to validate that a buffer is indeed free i.e.: in the
 zip_free[].
@@ -282,8 +290,8 @@ buf_buddy_add_to_free(
 	ut_ad(buf_pool->zip_free[i].start != buf);
 
 	buf_buddy_stamp_free(buf, i);
-	UT_LIST_ADD_FIRST(list, buf_pool->zip_free[i], buf);
-	ut_d(BUF_BUDDY_LIST_VALIDATE(buf_pool, i));
+	UT_LIST_ADD_FIRST(buf_pool->zip_free[i], buf);
+	ut_d(buf_buddy_list_validate(buf_pool, i));
 }
 
 /**********************************************************************//**
@@ -293,20 +301,21 @@ void
 buf_buddy_remove_from_free(
 /*=======================*/
 	buf_pool_t*		buf_pool,	/*!< in: buffer pool instance */
-	buf_buddy_free_t*	buf,		/*!< in,own: block to be freed */
+	buf_buddy_free_t*	buf,		/*!< in,own: block to be
+						freed */
 	ulint			i)		/*!< in: index of
 						buf_pool->zip_free[] */
 {
 	ut_ad(buf_pool_mutex_own(buf_pool));
 	ut_ad(buf_buddy_check_free(buf_pool, buf, i));
 
-	UT_LIST_REMOVE(list, buf_pool->zip_free[i], buf);
+	UT_LIST_REMOVE(buf_pool->zip_free[i], buf);
 	buf_buddy_stamp_nonfree(buf, i);
 }
 
 /**********************************************************************//**
 Try to allocate a block from buf_pool->zip_free[].
-@return	allocated block, or NULL if buf_pool->zip_free[] was empty */
+@return allocated block, or NULL if buf_pool->zip_free[] was empty */
 static
 buf_buddy_free_t*
 buf_buddy_alloc_zip(
@@ -320,10 +329,22 @@ buf_buddy_alloc_zip(
 	ut_a(i < BUF_BUDDY_SIZES);
 	ut_a(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
 
-	ut_d(BUF_BUDDY_LIST_VALIDATE(buf_pool, i));
+	ut_d(buf_buddy_list_validate(buf_pool, i));
 
 	buf = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
 
+	if (buf_pool->curr_size < buf_pool->old_size
+	    && UT_LIST_GET_LEN(buf_pool->withdraw)
+		< buf_pool->withdraw_target) {
+
+		while (buf != NULL
+		       && buf_frame_will_withdrawn(
+			       buf_pool, reinterpret_cast<byte*>(buf))) {
+			/* This should be withdrawn, not to be allocated */
+			buf = UT_LIST_GET_NEXT(list, buf);
+		}
+	}
+
 	if (buf) {
 		buf_buddy_remove_from_free(buf_pool, buf, i);
 	} else if (i + 1 < BUF_BUDDY_SIZES) {
@@ -388,9 +409,9 @@ buf_buddy_block_free(
 	UNIV_MEM_INVALID(buf, UNIV_PAGE_SIZE);
 
 	block = (buf_block_t*) bpage;
-	mutex_enter(&block->mutex);
+	buf_page_mutex_enter(block);
 	buf_LRU_block_free_non_file_page(block);
-	mutex_exit(&block->mutex);
+	buf_page_mutex_exit(block);
 
 	ut_ad(buf_pool->buddy_n_frames > 0);
 	ut_d(buf_pool->buddy_n_frames--);
@@ -425,7 +446,7 @@ buf_buddy_block_register(
 
 /**********************************************************************//**
 Allocate a block from a bigger object.
-@return	allocated block */
+@return allocated block */
 static
 void*
 buf_buddy_alloc_from(
@@ -463,8 +484,7 @@ buf_buddy_alloc_from(
 Allocate a block.  The thread calling this function must hold
 buf_pool->mutex and must not hold buf_pool->zip_mutex or any block->mutex.
 The buf_pool_mutex may be released and reacquired.
-@return	allocated block, never NULL */
-UNIV_INTERN
+@return allocated block, never NULL */
 void*
 buf_buddy_alloc_low(
 /*================*/
@@ -520,7 +540,7 @@ func_exit:
 
 /**********************************************************************//**
 Try to relocate a block.
-@return	true if relocated */
+@return true if relocated */
 static
 bool
 buf_buddy_relocate(
@@ -528,11 +548,13 @@ buf_buddy_relocate(
 	buf_pool_t*	buf_pool,	/*!< in: buffer pool instance */
 	void*		src,		/*!< in: block to relocate */
 	void*		dst,		/*!< in: free block to relocate to */
-	ulint		i)		/*!< in: index of
+	ulint		i,		/*!< in: index of
 					buf_pool->zip_free[] */
+	bool		force)		/*!< in: true if we must relocate
+					always */
 {
 	buf_page_t*	bpage;
-	const ulint	size	= BUF_BUDDY_LOW << i;
+	const ulint	size = BUF_BUDDY_LOW << i;
 	ulint		space;
 	ulint		offset;
 
@@ -555,12 +577,19 @@ buf_buddy_relocate(
 
 	ut_ad(space != BUF_BUDDY_STAMP_FREE);
 
-	ulint		fold = buf_page_address_fold(space, offset);
-	rw_lock_t*	hash_lock = buf_page_hash_lock_get(buf_pool, fold);
+	const page_id_t	page_id(space, offset);
+
+	/* If space,offset is bogus, then we know that the
+	buf_page_hash_get_low() call below will return NULL. */
+	if (!force && buf_pool != buf_pool_get(page_id)) {
+		return(false);
+	}
+
+	rw_lock_t*	hash_lock = buf_page_hash_lock_get(buf_pool, page_id);
 
 	rw_lock_x_lock(hash_lock);
 
-	bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
+	bpage = buf_page_hash_get_low(buf_pool, page_id);
 
 	if (!bpage || bpage->zip.data != src) {
 		/* The block has probably been freshly
@@ -570,7 +599,27 @@ buf_buddy_relocate(
 
 		rw_lock_x_unlock(hash_lock);
 
-		return(false);
+		if (!force || space != 0 || offset != 0) {
+			return(false);
+		}
+
+		/* It might be just uninitialized page.
+		We should search from LRU list also. */
+
+		bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
+		while (bpage != NULL) {
+			if (bpage->zip.data == src) {
+				hash_lock = buf_page_hash_lock_get(
+					buf_pool, bpage->id);
+				rw_lock_x_lock(hash_lock);
+				break;
+			}
+			bpage = UT_LIST_GET_NEXT(LRU, bpage);
+		}
+
+		if (bpage == NULL) {
+			return(false);
+		}
 	}
 
 	if (page_zip_get_size(&bpage->zip) != size) {
@@ -588,20 +637,17 @@ buf_buddy_relocate(
 	contain uninitialized data. */
 	UNIV_MEM_ASSERT_W(src, size);
 
-	ib_mutex_t*	block_mutex = buf_page_get_mutex(bpage);
+	BPageMutex*	block_mutex = buf_page_get_mutex(bpage);
 
 	mutex_enter(block_mutex);
 
 	if (buf_page_can_relocate(bpage)) {
 		/* Relocate the compressed page. */
-		ullint	usec = ut_time_us(NULL);
+		uintmax_t	usec = ut_time_us(NULL);
 
 		ut_a(bpage->zip.data == src);
 
-		/* Note: This is potentially expensive, we need a better
-		solution here. We go with correctness for now. */
-		::memcpy(dst, src, size);
-
+		memcpy(dst, src, size);
 		bpage->zip.data = reinterpret_cast<page_zip_t*>(dst);
 
 		rw_lock_x_unlock(hash_lock);
@@ -612,24 +658,19 @@ buf_buddy_relocate(
 			reinterpret_cast<buf_buddy_free_t*>(src), i);
 
 		buf_buddy_stat_t*	buddy_stat = &buf_pool->buddy_stat[i];
-
-		++buddy_stat->relocated;
-
+		buddy_stat->relocated++;
 		buddy_stat->relocated_usec += ut_time_us(NULL) - usec;
-
 		return(true);
 	}
 
 	rw_lock_x_unlock(hash_lock);
 
 	mutex_exit(block_mutex);
-
 	return(false);
 }
 
 /**********************************************************************//**
 Deallocate a block. */
-UNIV_INTERN
 void
 buf_buddy_free_low(
 /*===============*/
@@ -663,7 +704,8 @@ recombine:
 	/* Do not recombine blocks if there are few free blocks.
 	We may waste up to 15360*max_len bytes to free blocks
 	(1024 + 2048 + 4096 + 8192 = 15360) */
-	if (UT_LIST_GET_LEN(buf_pool->zip_free[i]) < 16) {
+	if (UT_LIST_GET_LEN(buf_pool->zip_free[i]) < 16
+	    && buf_pool->curr_size >= buf_pool->old_size) {
 		goto func_exit;
 	}
 
@@ -684,7 +726,7 @@ buddy_is_free:
 		goto recombine;
 
 	case BUF_BUDDY_STATE_USED:
-		ut_d(BUF_BUDDY_LIST_VALIDATE(buf_pool, i));
+		ut_d(buf_buddy_list_validate(buf_pool, i));
 
 		/* The buddy is not free. Is there a free block of
 		this size? */
@@ -698,7 +740,8 @@ buddy_is_free:
 
 			/* Try to relocate the buddy of buf to the free
 			block. */
-			if (buf_buddy_relocate(buf_pool, buddy, zip_buf, i)) {
+			if (buf_buddy_relocate(buf_pool, buddy, zip_buf, i,
+					       false)) {
 
 				goto buddy_is_free;
 			}
@@ -719,3 +762,119 @@ func_exit:
 			      reinterpret_cast<buf_buddy_free_t*>(buf),
 			      i);
 }
+
+/** Reallocate a block.
+@param[in]	buf_pool	buffer pool instance
+@param[in]	buf		block to be reallocated, must be pointed
+to by the buffer pool
+@param[in]	size		block size, up to UNIV_PAGE_SIZE
+@retval false	if failed because of no free blocks. */
+bool
+buf_buddy_realloc(
+	buf_pool_t*	buf_pool,
+	void*		buf,
+	ulint		size)
+{
+	buf_block_t*	block = NULL;
+	ulint		i = buf_buddy_get_slot(size);
+
+	ut_ad(buf_pool_mutex_own(buf_pool));
+	ut_ad(!mutex_own(&buf_pool->zip_mutex));
+	ut_ad(i <= BUF_BUDDY_SIZES);
+	ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
+
+	if (i < BUF_BUDDY_SIZES) {
+		/* Try to allocate from the buddy system. */
+		block = reinterpret_cast<buf_block_t*>(
+			buf_buddy_alloc_zip(buf_pool, i));
+	}
+
+	if (block == NULL) {
+		/* Try allocating from the buf_pool->free list. */
+		block = buf_LRU_get_free_only(buf_pool);
+
+		if (block == NULL) {
+			return(false); /* free_list was not enough */
+		}
+
+		buf_buddy_block_register(block);
+
+		block = reinterpret_cast<buf_block_t*>(
+			buf_buddy_alloc_from(
+				buf_pool, block->frame, i, BUF_BUDDY_SIZES));
+	}
+
+	buf_pool->buddy_stat[i].used++;
+
+	/* Try to relocate the buddy of buf to the free block. */
+	if (buf_buddy_relocate(buf_pool, buf, block, i, true)) {
+		/* succeeded */
+		buf_buddy_free_low(buf_pool, buf, i);
+	} else {
+		/* failed */
+		buf_buddy_free_low(buf_pool, block, i);
+	}
+
+	return(true); /* free_list was enough */
+}
+
+/** Combine all pairs of free buddies.
+@param[in]	buf_pool	buffer pool instance */
+void
+buf_buddy_condense_free(
+	buf_pool_t*	buf_pool)
+{
+	ut_ad(buf_pool_mutex_own(buf_pool));
+	ut_ad(buf_pool->curr_size < buf_pool->old_size);
+
+	for (ulint i = 0; i < UT_ARR_SIZE(buf_pool->zip_free); ++i) {
+		buf_buddy_free_t* buf =
+			UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
+
+		/* seek to withdraw target */
+		while (buf != NULL
+		       && !buf_frame_will_withdrawn(
+				buf_pool, reinterpret_cast<byte*>(buf))) {
+			buf = UT_LIST_GET_NEXT(list, buf);
+		}
+
+		while (buf != NULL) {
+			buf_buddy_free_t* next =
+				UT_LIST_GET_NEXT(list, buf);
+
+			buf_buddy_free_t* buddy =
+				reinterpret_cast<buf_buddy_free_t*>(
+					buf_buddy_get(
+						reinterpret_cast<byte*>(buf),
+						BUF_BUDDY_LOW << i));
+
+			/* seek to the next withdraw target */
+			while (true) {
+				while (next != NULL
+				       && !buf_frame_will_withdrawn(
+						buf_pool,
+						reinterpret_cast<byte*>(next))) {
+					 next = UT_LIST_GET_NEXT(list, next);
+				}
+
+				if (buddy != next) {
+					break;
+				}
+
+				next = UT_LIST_GET_NEXT(list, next);
+			}
+
+			if (buf_buddy_is_free(buddy, i)
+			    == BUF_BUDDY_STATE_FREE) {
+				/* Both buf and buddy are free.
+				Try to combine them. */
+				buf_buddy_remove_from_free(buf_pool, buf, i);
+				buf_pool->buddy_stat[i].used++;
+
+				buf_buddy_free_low(buf_pool, buf, i);
+			}
+
+			buf = next;
+		}
+	}
+}
diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc
index 9ad0dd3b854..16fbbd14ebd 100644
--- a/storage/innobase/buf/buf0buf.cc
+++ b/storage/innobase/buf/buf0buf.cc
@@ -31,35 +31,54 @@ The database buffer buf_pool
 Created 11/5/1995 Heikki Tuuri
 *******************************************************/
 
+#include "ha_prototypes.h"
+
+#include "page0size.h"
 #include "buf0buf.h"
 
 #ifdef UNIV_NONINL
 #include "buf0buf.ic"
 #endif
-
+#ifdef UNIV_INNOCHECKSUM
+#include "string.h"
+#include "mach0data.h"
+#endif /* UNIV_INNOCHECKSUM */
+#ifndef UNIV_INNOCHECKSUM
 #include "mem0mem.h"
 #include "btr0btr.h"
 #include "fil0fil.h"
 #include "fil0crypt.h"
+#include "fsp0sysspace.h"
 #ifndef UNIV_HOTBACKUP
 #include "buf0buddy.h"
 #include "lock0lock.h"
+#include "sync0rw.h"
 #include "btr0sea.h"
 #include "ibuf0ibuf.h"
 #include "trx0undo.h"
+#include "trx0purge.h"
 #include "log0log.h"
+#include "dict0stats_bg.h"
 #endif /* !UNIV_HOTBACKUP */
 #include "srv0srv.h"
+#include "srv0start.h"
 #include "dict0dict.h"
 #include "log0recv.h"
-#include "page0zip.h"
 #include "srv0mon.h"
+#include "fsp0sysspace.h"
+#endif /* !UNIV_INNOCHECKSUM */
+#include "page0zip.h"
 #include "buf0checksum.h"
-#ifdef HAVE_LIBNUMA
-#include <numa.h>
-#include <numaif.h>
-#endif // HAVE_LIBNUMA
+#include "sync0sync.h"
+#include "buf0dump.h"
+#include "ut0new.h"
+#include <new>
+#include <map>
+#include <sstream>
+#ifndef UNIV_INNOCHECKSUM
 #include "fil0pagecompress.h"
+#include "fsp0pagecompress.h"
+#endif
 #include "ha_prototypes.h"
 #include "ut0byte.h"
 #include <new>
@@ -68,6 +87,49 @@ Created 11/5/1995 Heikki Tuuri
 #include "lzo/lzo1x.h"
 #endif
 
+#if defined(HAVE_LIBNUMA) && defined(WITH_NUMA)
+#include <numa.h>
+#include <numaif.h>
+struct set_numa_interleave_t
+{
+	set_numa_interleave_t()
+	{
+		if (srv_numa_interleave) {
+
+			struct bitmask *numa_mems_allowed = numa_get_mems_allowed();
+			ib::info() << "Setting NUMA memory policy to"
+				" MPOL_INTERLEAVE";
+			if (set_mempolicy(MPOL_INTERLEAVE,
+					  numa_mems_allowed->maskp,
+					  numa_mems_allowed->size) != 0) {
+
+				ib::warn() << "Failed to set NUMA memory"
+					" policy to MPOL_INTERLEAVE: "
+					<< strerror(errno);
+			}
+		}
+	}
+
+	~set_numa_interleave_t()
+	{
+		if (srv_numa_interleave) {
+
+			ib::info() << "Setting NUMA memory policy to"
+				" MPOL_DEFAULT";
+			if (set_mempolicy(MPOL_DEFAULT, NULL, 0) != 0) {
+				ib::warn() << "Failed to set NUMA memory"
+					" policy to MPOL_DEFAULT: "
+					<< strerror(errno);
+			}
+		}
+	}
+};
+
+#define NUMA_MEMPOLICY_INTERLEAVE_IN_SCOPE set_numa_interleave_t scoped_numa
+#else
+#define NUMA_MEMPOLICY_INTERLEAVE_IN_SCOPE
+#endif /* HAVE_LIBNUMA && WITH_NUMA */
+
 /*
 		IMPLEMENTATION OF THE BUFFER POOL
 		=================================
@@ -256,41 +318,58 @@ that the whole area may be needed in the near future, and issue
 the read requests for the whole area.
 */
 
-#ifndef UNIV_HOTBACKUP
+#if (!(defined(UNIV_HOTBACKUP) || defined(UNIV_INNOCHECKSUM)))
 /** Value in microseconds */
 static const int WAIT_FOR_READ	= 100;
-/** Number of attemtps made to read in a page in the buffer pool */
-static const ulint BUF_PAGE_READ_MAX_RETRIES = 100;
+static const int WAIT_FOR_WRITE = 100;
+/** Number of attempts made to read in a page in the buffer pool */
+static const ulint	BUF_PAGE_READ_MAX_RETRIES = 100;
+/** Number of pages to read ahead */
+static const ulint	BUF_READ_AHEAD_PAGES = 64;
+/** The maximum portion of the buffer pool that can be used for the
+read-ahead buffer.  (Divide buf_pool size by this amount) */
+static const ulint	BUF_READ_AHEAD_PORTION = 32;
 
 /** The buffer pools of the database */
-UNIV_INTERN buf_pool_t*	buf_pool_ptr;
+buf_pool_t*	buf_pool_ptr;
+
+/** true when resizing buffer pool is in the critical path. */
+volatile bool	buf_pool_resizing;
+
+/** true when withdrawing buffer pool pages might cause page relocation */
+volatile bool	buf_pool_withdrawing;
+
+/** the clock is incremented every time a pointer to a page may become obsolete;
+if the withdrwa clock has not changed, the pointer is still valid in buffer
+pool. if changed, the pointer might not be in buffer pool any more. */
+volatile ulint	buf_withdraw_clock;
+
+/** Map of buffer pool chunks by its first frame address
+This is newly made by initialization of buffer pool and buf_resize_thread.
+Currently, no need mutex protection for update. */
+typedef std::map<
+	const byte*,
+	buf_chunk_t*,
+	std::less<const byte*>,
+	ut_allocator<std::pair<const byte*, buf_chunk_t*> > >
+	buf_pool_chunk_map_t;
+
+static buf_pool_chunk_map_t*			buf_chunk_map_reg;
+
+/** Chunk map to be used to lookup.
+The map pointed by this should not be updated */
+static buf_pool_chunk_map_t*	buf_chunk_map_ref = NULL;
 
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-static ulint	buf_dbg_counter	= 0; /*!< This is used to insert validation
-					operations in execution in the
-					debug version */
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 #ifdef UNIV_DEBUG
-/** If this is set TRUE, the program prints info whenever
-read-ahead or flush occurs */
-UNIV_INTERN ibool		buf_debug_prints = FALSE;
+/** Disable resizing buffer pool to make assertion code not expensive. */
+my_bool			buf_disable_resize_buffer_pool_debug = TRUE;
 #endif /* UNIV_DEBUG */
 
-#ifdef UNIV_PFS_RWLOCK
-/* Keys to register buffer block related rwlocks and mutexes with
-performance schema */
-UNIV_INTERN mysql_pfs_key_t	buf_block_lock_key;
-# ifdef UNIV_SYNC_DEBUG
-UNIV_INTERN mysql_pfs_key_t	buf_block_debug_latch_key;
-# endif /* UNIV_SYNC_DEBUG */
-#endif /* UNIV_PFS_RWLOCK */
-
-#ifdef UNIV_PFS_MUTEX
-UNIV_INTERN mysql_pfs_key_t	buffer_block_mutex_key;
-UNIV_INTERN mysql_pfs_key_t	buf_pool_mutex_key;
-UNIV_INTERN mysql_pfs_key_t	buf_pool_zip_mutex_key;
-UNIV_INTERN mysql_pfs_key_t	flush_list_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+/** This is used to insert validation operations in execution
+in the debug version */
+static ulint	buf_dbg_counter	= 0;
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 
 #if defined UNIV_PFS_MUTEX || defined UNIV_PFS_RWLOCK
 # ifndef PFS_SKIP_BUFFER_MUTEX_RWLOCK
@@ -317,6 +396,17 @@ on the io_type */
 	 ? (counter##_READ)				\
 	 : (counter##_WRITTEN))
 
+/** Registers a chunk to buf_pool_chunk_map
+@param[in]	chunk	chunk of buffers */
+static
+void
+buf_pool_register_chunk(
+	buf_chunk_t*	chunk)
+{
+	buf_chunk_map_reg->insert(buf_pool_chunk_map_t::value_type(
+		chunk->blocks->frame, chunk));
+}
+
 /********************************************************************//**
 Check if page is maybe compressed, encrypted or both when we encounter
 corrupted page. Note that we can't be 100% sure if page is corrupted
@@ -333,13 +423,10 @@ buf_page_check_corrupt(
 Gets the smallest oldest_modification lsn for any page in the pool. Returns
 zero if all modified pages have been flushed to disk.
 @return oldest modification in pool, zero if none */
-UNIV_INTERN
 lsn_t
 buf_pool_get_oldest_modification(void)
 /*==================================*/
 {
-	ulint		i;
-	buf_page_t*	bpage;
 	lsn_t		lsn = 0;
 	lsn_t		oldest_lsn = 0;
 
@@ -347,14 +434,24 @@ buf_pool_get_oldest_modification(void)
 	thread to add a dirty page to any flush list. */
 	log_flush_order_mutex_enter();
 
-	for (i = 0; i < srv_buf_pool_instances; i++) {
+	for (ulint i = 0; i < srv_buf_pool_instances; i++) {
 		buf_pool_t*	buf_pool;
 
 		buf_pool = buf_pool_from_array(i);
 
 		buf_flush_list_mutex_enter(buf_pool);
 
-		bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
+		buf_page_t*	bpage;
+
+		/* We don't let log-checkpoint halt because pages from system
+		temporary are not yet flushed to the disk. Anyway, object
+		residing in system temporary doesn't generate REDO logging. */
+		for (bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
+		     bpage != NULL
+			&& fsp_is_system_temporary(bpage->id.space());
+		     bpage = UT_LIST_GET_PREV(list, bpage)) {
+			/* Do nothing. */
+		}
 
 		if (bpage != NULL) {
 			ut_ad(bpage->in_flush_list);
@@ -378,7 +475,6 @@ buf_pool_get_oldest_modification(void)
 
 /********************************************************************//**
 Get total buffer pool statistics. */
-UNIV_INTERN
 void
 buf_get_total_list_len(
 /*===================*/
@@ -405,7 +501,6 @@ buf_get_total_list_len(
 
 /********************************************************************//**
 Get total list size in bytes from all buffer pools. */
-UNIV_INTERN
 void
 buf_get_total_list_size_in_bytes(
 /*=============================*/
@@ -431,7 +526,6 @@ buf_get_total_list_size_in_bytes(
 
 /********************************************************************//**
 Get total buffer pool statistics. */
-UNIV_INTERN
 void
 buf_get_total_stat(
 /*===============*/
@@ -465,7 +559,6 @@ buf_get_total_stat(
 /********************************************************************//**
 Allocates a buffer block.
 @return own: the allocated block, in state BUF_BLOCK_MEMORY */
-UNIV_INTERN
 buf_block_t*
 buf_block_alloc(
 /*============*/
@@ -490,21 +583,18 @@ buf_block_alloc(
 
 	return(block);
 }
-#endif /* !UNIV_HOTBACKUP */
+#endif /* !UNIV_HOTBACKUP && !UNIV_INNOCHECKSUM */
 
-/********************************************************************//**
-Checks if a page is all zeroes.
-@return	TRUE if the page is all zeroes */
+/** Checks if a page contains only zeroes.
+@param[in]	read_buf	database page
+@param[in]	page_size	page size
+@return true if page is filled with zeroes */
 bool
 buf_page_is_zeroes(
-/*===============*/
-	const byte*	read_buf,	/*!< in: a database page */
-	const ulint	zip_size)	/*!< in: size of compressed page;
-					0 for uncompressed pages */
+	const byte*		read_buf,
+	const page_size_t&	page_size)
 {
-	const ulint page_size = zip_size ? zip_size : UNIV_PAGE_SIZE;
-
-	for (ulint i = 0; i < page_size; i++) {
+	for (ulint i = 0; i < page_size.logical(); i++) {
 		if (read_buf[i] != 0) {
 			return(false);
 		}
@@ -513,33 +603,82 @@ buf_page_is_zeroes(
 }
 
 /** Checks if the page is in crc32 checksum format.
-@param[in]	read_buf	database page
-@param[in]	checksum_field1	new checksum field
-@param[in]	checksum_field2	old checksum field
-@return true if the page is in crc32 checksum format */
+@param[in]	read_buf		database page
+@param[in]	checksum_field1		new checksum field
+@param[in]	checksum_field2		old checksum field
+@param[in]	page_no			page number of given read_buf
+@param[in]	is_log_enabled		true if log option is enabled
+@param[in]	log_file		file pointer to log_file
+@param[in]	curr_algo		current checksum algorithm
+@param[in]	use_legacy_big_endian   use legacy big endian algorithm
+@return true if the page is in crc32 checksum format. */
 UNIV_INLINE
 bool
 buf_page_is_checksum_valid_crc32(
-	const byte*	read_buf,
-	ulint		checksum_field1,
-	ulint		checksum_field2)
+	const byte*			read_buf,
+	ulint				checksum_field1,
+	ulint				checksum_field2,
+#ifdef UNIV_INNOCHECKSUM
+	uintmax_t			page_no,
+	bool				is_log_enabled,
+	FILE*				log_file,
+	const srv_checksum_algorithm_t	curr_algo,
+#endif /* UNIV_INNOCHECKSUM */
+	bool				use_legacy_big_endian)
 {
-	ib_uint32_t	crc32 = buf_calc_page_crc32(read_buf);
+	const uint32_t	crc32 = buf_calc_page_crc32(read_buf,
+						    use_legacy_big_endian);
 
-	return(checksum_field1 == crc32 && checksum_field2 == crc32);
+#ifdef UNIV_INNOCHECKSUM
+	if (is_log_enabled
+	    && curr_algo == SRV_CHECKSUM_ALGORITHM_STRICT_CRC32) {
+		fprintf(log_file, "page::%lu;"
+			" crc32 calculated = %u;"
+			" recorded checksum field1 = %lu recorded"
+			" checksum field2 =%lu\n", page_no,
+			crc32, checksum_field1, checksum_field2);
+	}
+#endif /* UNIV_INNOCHECKSUM */
+
+	if (checksum_field1 != checksum_field2) {
+		return(false);
+	}
+
+	if (checksum_field1 == crc32) {
+		return(true);
+	}
+
+	const uint32_t	crc32_legacy = buf_calc_page_crc32(read_buf, true);
+
+	if (checksum_field1 == crc32_legacy) {
+		return(true);
+	}
+
+	return(false);
 }
 
 /** Checks if the page is in innodb checksum format.
 @param[in]	read_buf	database page
 @param[in]	checksum_field1	new checksum field
 @param[in]	checksum_field2	old checksum field
-@return true if the page is in innodb checksum format */
+@param[in]	page_no		page number of given read_buf
+@param[in]	is_log_enabled	true if log option is enabled
+@param[in]	log_file	file pointer to log_file
+@param[in]	curr_algo	current checksum algorithm
+@return true if the page is in innodb checksum format. */
 UNIV_INLINE
 bool
 buf_page_is_checksum_valid_innodb(
-	const byte*	read_buf,
-	ulint		checksum_field1,
-	ulint		checksum_field2)
+	const byte*			read_buf,
+	ulint				checksum_field1,
+	ulint				checksum_field2
+#ifdef UNIV_INNOCHECKSUM
+	,uintmax_t			page_no,
+	bool				is_log_enabled,
+	FILE*				log_file,
+	const srv_checksum_algorithm_t	curr_algo
+#endif /* UNIV_INNOCHECKSUM */
+	)
 {
 	/* There are 2 valid formulas for
 	checksum_field2 (old checksum field) which algo=innodb could have
@@ -551,8 +690,41 @@ buf_page_is_checksum_valid_innodb(
 	2. Newer InnoDB versions store the old formula checksum
 	(buf_calc_page_old_checksum()). */
 
+	ulint	old_checksum = buf_calc_page_old_checksum(read_buf);
+	ulint	new_checksum = buf_calc_page_new_checksum(read_buf);
+
+#ifdef UNIV_INNOCHECKSUM
+	if (is_log_enabled
+	    && curr_algo == SRV_CHECKSUM_ALGORITHM_INNODB) {
+		fprintf(log_file, "page::%lu;"
+			" old style: calculated ="
+			" %lu; recorded = %lu\n",
+			page_no, old_checksum,
+			checksum_field2);
+		fprintf(log_file, "page::%lu;"
+			" new style: calculated ="
+			" %lu; crc32 = %u; recorded = %lu\n",
+			page_no, new_checksum,
+			buf_calc_page_crc32(read_buf), checksum_field1);
+	}
+
+	if (is_log_enabled
+	    && curr_algo == SRV_CHECKSUM_ALGORITHM_STRICT_INNODB) {
+		fprintf(log_file, "page::%lu;"
+			" old style: calculated ="
+			" %lu; recorded checksum = %lu\n",
+			page_no, old_checksum,
+			checksum_field2);
+		fprintf(log_file, "page::%lu;"
+			" new style: calculated ="
+			" %lu; recorded checksum  = %lu\n",
+			page_no, new_checksum,
+			checksum_field1);
+	}
+#endif /* UNIV_INNOCHECKSUM */
+
 	if (checksum_field2 != mach_read_from_4(read_buf + FIL_PAGE_LSN)
-	    && checksum_field2 != buf_calc_page_old_checksum(read_buf)) {
+	    && checksum_field2 != old_checksum) {
 		return(false);
 	}
 
@@ -561,8 +733,7 @@ buf_page_is_checksum_valid_innodb(
 	/* InnoDB versions < 4.0.14 and < 4.1.1 stored the space id
 	(always equal to 0), to FIL_PAGE_SPACE_OR_CHKSUM */
 
-	if (checksum_field1 != 0
-	    && checksum_field1 != buf_calc_page_new_checksum(read_buf)) {
+	if (checksum_field1 != 0 && checksum_field1 != new_checksum) {
 		return(false);
 	}
 
@@ -573,38 +744,77 @@ buf_page_is_checksum_valid_innodb(
 @param[in]	read_buf	database page
 @param[in]	checksum_field1	new checksum field
 @param[in]	checksum_field2	old checksum field
-@return true if the page is in none checksum format */
+@param[in]	page_no		page number of given read_buf
+@param[in]	is_log_enabled	true if log option is enabled
+@param[in]	log_file	file pointer to log_file
+@param[in]	curr_algo	current checksum algorithm
+@return true if the page is in none checksum format. */
 UNIV_INLINE
 bool
 buf_page_is_checksum_valid_none(
-	const byte*	read_buf,
-	ulint		checksum_field1,
-	ulint		checksum_field2)
+	const byte*			read_buf,
+	ulint				checksum_field1,
+	ulint				checksum_field2
+#ifdef	UNIV_INNOCHECKSUM
+	,uintmax_t			page_no,
+	bool				is_log_enabled,
+	FILE*				log_file,
+	const srv_checksum_algorithm_t	curr_algo
+#endif	/* UNIV_INNOCHECKSUM */
+	)
 {
+
+#ifdef UNIV_INNOCHECKSUM
+	if (is_log_enabled
+	    && curr_algo == SRV_CHECKSUM_ALGORITHM_STRICT_NONE) {
+		fprintf(log_file,
+			"page::%lu; none checksum: calculated"
+			" = %lu; recorded checksum_field1 = %lu"
+			" recorded checksum_field2 = %lu\n",
+			page_no, BUF_NO_CHECKSUM_MAGIC,
+			checksum_field1, checksum_field2);
+	}
+#endif /* UNIV_INNOCHECKSUM */
+
+
 	return(checksum_field1 == checksum_field2
 	       && checksum_field1 == BUF_NO_CHECKSUM_MAGIC);
 }
 
-/********************************************************************//**
-Checks if a page is corrupt.
-@return	TRUE if corrupted */
-UNIV_INTERN
+/** Checks if a page is corrupt.
+@param[in]	check_lsn	true if we need to check and complain about
+the LSN
+@param[in]	read_buf	database page
+@param[in]	page_size	page size
+@param[in]	skip_checksum	if true, skip checksum
+@param[in]	page_no		page number of given read_buf
+@param[in]	strict_check	true if strict-check option is enabled
+@param[in]	is_log_enabled	true if log option is enabled
+@param[in]	log_file	file pointer to log_file
+@return TRUE if corrupted */
 ibool
 buf_page_is_corrupted(
-/*==================*/
-	bool		check_lsn,	/*!< in: true if we need to check
-					and complain about the LSN */
-	const byte*	read_buf,	/*!< in: a database page */
-	ulint		zip_size)	/*!< in: size of compressed page;
-					0 for uncompressed pages */
+	bool			check_lsn,
+	const byte*		read_buf,
+	const page_size_t&	page_size,
+	bool			skip_checksum
+#ifdef UNIV_INNOCHECKSUM
+	,uintmax_t		page_no,
+	bool			strict_check,
+	bool			is_log_enabled,
+	FILE*			log_file
+#endif /* UNIV_INNOCHECKSUM */
+)
 {
-	ulint		page_encrypted = fil_page_is_encrypted(read_buf);
+	ulint		page_encrypted = (mach_read_from_4(read_buf+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION) != 0);
 	ulint		checksum_field1;
 	ulint		checksum_field2;
 
-	if (!page_encrypted && !zip_size
+	DBUG_EXECUTE_IF("buf_page_is_corrupt_failure", return(TRUE); );
+
+	if (!page_encrypted && !page_size.is_compressed()
 	    && memcmp(read_buf + FIL_PAGE_LSN + 4,
-		      read_buf + UNIV_PAGE_SIZE
+		      read_buf + page_size.logical()
 		      - FIL_PAGE_END_LSN_OLD_CHKSUM + 4, 4)) {
 
 		/* Stored log sequence numbers at the start and the end
@@ -613,47 +823,55 @@ buf_page_is_corrupted(
 		return(TRUE);
 	}
 
-#ifndef UNIV_HOTBACKUP
+#if !defined(UNIV_HOTBACKUP) && !defined(UNIV_INNOCHECKSUM)
 	if (check_lsn && recv_lsn_checks_on) {
-		lsn_t	current_lsn;
+		lsn_t		current_lsn;
+		const lsn_t	page_lsn
+			= mach_read_from_8(read_buf + FIL_PAGE_LSN);
 
 		/* Since we are going to reset the page LSN during the import
 		phase it makes no sense to spam the log with error messages. */
 
-		if (log_peek_lsn(&current_lsn)
-		    && current_lsn
-		    < mach_read_from_8(read_buf + FIL_PAGE_LSN)) {
-			ut_print_timestamp(stderr);
+		if (log_peek_lsn(&current_lsn) && current_lsn < page_lsn) {
+
+			const ulint	space_id = mach_read_from_4(
+				read_buf + FIL_PAGE_SPACE_ID);
+			const ulint	page_no = mach_read_from_4(
+				read_buf + FIL_PAGE_OFFSET);
+
+			ib::error() << "Page " << page_id_t(space_id, page_no)
+				<< " log sequence number " << page_lsn
+				<< " is in the future! Current system"
+				<< " log sequence number "
+				<< current_lsn << ".";
+
+			ib::error() << "Your database may be corrupt or"
+				" you may have copied the InnoDB"
+				" tablespace but not the InnoDB"
+				" log files. "
+				<< FORCE_RECOVERY_MSG;
 
-			fprintf(stderr,
-				" InnoDB: Error: page %lu log sequence number"
-				" " LSN_PF "\n"
-				"InnoDB: is in the future! Current system "
-				"log sequence number " LSN_PF ".\n"
-				"InnoDB: Your database may be corrupt or "
-				"you may have copied the InnoDB\n"
-				"InnoDB: tablespace but not the InnoDB "
-				"log files. See\n"
-				"InnoDB: " REFMAN
-				"forcing-innodb-recovery.html\n"
-				"InnoDB: for more information.\n",
-				(ulint) mach_read_from_4(
-					read_buf + FIL_PAGE_OFFSET),
-				(lsn_t) mach_read_from_8(
-					read_buf + FIL_PAGE_LSN),
-				current_lsn);
 		}
 	}
-#endif
+#endif /* !UNIV_HOTBACKUP && !UNIV_INNOCHECKSUM */
 
 	/* Check whether the checksum fields have correct values */
 
-	if (srv_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_NONE) {
+	if (srv_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_NONE
+	    || skip_checksum) {
 		return(FALSE);
 	}
 
-	if (zip_size) {
-		return(!page_zip_verify_checksum(read_buf, zip_size));
+	if (page_size.is_compressed()) {
+#ifdef UNIV_INNOCHECKSUM
+		return(!page_zip_verify_checksum(read_buf,
+						 page_size.physical(),
+						 page_no, strict_check,
+						 is_log_enabled, log_file));
+#else
+		return(!page_zip_verify_checksum(read_buf,
+						 page_size.physical()));
+#endif /* UNIV_INNOCHECKSUM */
 	}
 	if (page_encrypted) {
 		return (FALSE);
@@ -663,131 +881,299 @@ buf_page_is_corrupted(
 		read_buf + FIL_PAGE_SPACE_OR_CHKSUM);
 
 	checksum_field2 = mach_read_from_4(
-		read_buf + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM);
+		read_buf + page_size.logical() - FIL_PAGE_END_LSN_OLD_CHKSUM);
 
 #if FIL_PAGE_LSN % 8
 #error "FIL_PAGE_LSN must be 64 bit aligned"
 #endif
 
 	/* declare empty pages non-corrupted */
-	if (checksum_field1 == 0 && checksum_field2 == 0
-	    && *reinterpret_cast<const ib_uint64_t*>(read_buf +
-						     FIL_PAGE_LSN) == 0) {
+	if (checksum_field1 == 0
+	    && checksum_field2 == 0
+	    && *reinterpret_cast<const ib_uint64_t*>(
+		    read_buf + FIL_PAGE_LSN) == 0) {
+
 		/* make sure that the page is really empty */
-		for (ulint i = 0; i < UNIV_PAGE_SIZE; i++) {
-			if (read_buf[i] != 0) {
-				return(TRUE);
+
+		ulint	i;
+
+		for (i = 0; i < page_size.logical(); ++i) {
+
+			/* The FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID has been
+			repurposed for page compression. It can be
+			set for uncompressed empty pages. */
+
+			if ((i < FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION
+			     || i >= FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID)
+			    && read_buf[i] != 0) {
+
+				break;
 			}
 		}
-
-		return(FALSE);
+#ifdef UNIV_INNOCHECKSUM
+		if (i >= page_size.logical()) {
+			if (is_log_enabled) {
+				fprintf(log_file, "Page::%lu"
+					" is empty and uncorrupted\n",
+					page_no);
+			}
+			return(FALSE);
+		}
+#else
+		return(i < page_size.logical());
+#endif /* UNIV_INNOCHECKSUM */
 	}
 
-	DBUG_EXECUTE_IF("buf_page_is_corrupt_failure", return(TRUE); );
+#ifndef UNIV_INNOCHECKSUM
+	const page_id_t	page_id(mach_read_from_4(
+					read_buf + FIL_PAGE_SPACE_ID),
+				mach_read_from_4(
+					read_buf + FIL_PAGE_OFFSET));
+#endif /* UNIV_INNOCHECKSUM */
 
-	ulint	page_no = mach_read_from_4(read_buf + FIL_PAGE_OFFSET);
-	ulint	space_id = mach_read_from_4(read_buf + FIL_PAGE_SPACE_ID);
+	DBUG_EXECUTE_IF("buf_page_import_corrupt_failure", return(TRUE); );
 	const srv_checksum_algorithm_t	curr_algo =
 		static_cast<srv_checksum_algorithm_t>(srv_checksum_algorithm);
 
+	bool	legacy_checksum_checked = false;
+
 	switch (curr_algo) {
 	case SRV_CHECKSUM_ALGORITHM_CRC32:
 	case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32:
 
 		if (buf_page_is_checksum_valid_crc32(read_buf,
-			checksum_field1, checksum_field2)) {
+			checksum_field1, checksum_field2,
+#ifdef UNIV_INNOCHECKSUM
+			page_no, is_log_enabled, log_file, curr_algo,
+#endif /* UNIV_INNOCHECKSUM */
+			false)) {
 			return(FALSE);
 		}
 
 		if (buf_page_is_checksum_valid_none(read_buf,
-			checksum_field1, checksum_field2)) {
+			checksum_field1, checksum_field2
+#ifdef UNIV_INNOCHECKSUM
+			, page_no, is_log_enabled, log_file, curr_algo)) {
+#else /* UNIV_INNOCHECKSUM */
+		)) {
 			if (curr_algo
 			    == SRV_CHECKSUM_ALGORITHM_STRICT_CRC32) {
 				page_warn_strict_checksum(
 					curr_algo,
 					SRV_CHECKSUM_ALGORITHM_NONE,
-					space_id, page_no);
+					page_id);
 			}
+#endif /* UNIV_INNOCHECKSUM */
 
+#ifdef UNIV_INNOCHECKSUM
+			if (is_log_enabled) {
+
+				fprintf(log_file, "page::%lu;"
+					" old style: calculated = " ULINTPF ";"
+					" recorded = " ULINTPF "\n", page_no,
+					buf_calc_page_old_checksum(read_buf),
+					checksum_field2);
+				fprintf(log_file, "page::%lu;"
+					" new style: calculated = " ULINTPF ";"
+					" crc32 = %u; recorded = " ULINTPF "\n",
+					page_no,
+					buf_calc_page_new_checksum(read_buf),
+					buf_calc_page_crc32(read_buf),
+					checksum_field1);
+			}
+#endif /* UNIV_INNOCHECKSUM */
 			return(FALSE);
 		}
 
+		/* We need to check whether the stored checksum matches legacy
+		big endian checksum or Innodb checksum. We optimize the order
+		based on earlier results. if earlier we have found pages
+		matching legacy big endian checksum, we try to match it first.
+		Otherwise we check innodb checksum first. */
+		if (legacy_big_endian_checksum) {
+			if (buf_page_is_checksum_valid_crc32(read_buf,
+				checksum_field1, checksum_field2,
+#ifdef UNIV_INNOCHECKSUM
+				page_no, is_log_enabled, log_file, curr_algo,
+#endif /* UNIV_INNOCHECKSUM */
+				true)) {
+
+				return(FALSE);
+			}
+			legacy_checksum_checked = true;
+		}
+
 		if (buf_page_is_checksum_valid_innodb(read_buf,
-			checksum_field1, checksum_field2)) {
+			checksum_field1, checksum_field2
+#ifdef UNIV_INNOCHECKSUM
+			, page_no, is_log_enabled, log_file, curr_algo)) {
+#else /* UNIV_INNOCHECKSUM */
+		)) {
 			if (curr_algo
 			    == SRV_CHECKSUM_ALGORITHM_STRICT_CRC32) {
 				page_warn_strict_checksum(
 					curr_algo,
 					SRV_CHECKSUM_ALGORITHM_INNODB,
-					space_id, page_no);
+					page_id);
 			}
-
+#endif /* UNIV_INNOCHECKSUM */
 			return(FALSE);
 		}
 
+		/* If legacy checksum is not checked, do it now. */
+		if (!legacy_checksum_checked && buf_page_is_checksum_valid_crc32(
+			read_buf, checksum_field1, checksum_field2,
+#ifdef UNIV_INNOCHECKSUM
+			page_no, is_log_enabled, log_file, curr_algo,
+#endif /* UNIV_INNOCHECKSUM */
+			true)) {
+
+				legacy_big_endian_checksum = true;
+				return(FALSE);
+		}
+
+#ifdef UNIV_INNOCHECKSUM
+		if (is_log_enabled) {
+			fprintf(log_file, "Fail; page %lu"
+				" invalid (fails crc32 checksum)\n",
+				page_no);
+		}
+#endif /* UNIV_INNOCHECKSUM */
 		return(TRUE);
 
 	case SRV_CHECKSUM_ALGORITHM_INNODB:
 	case SRV_CHECKSUM_ALGORITHM_STRICT_INNODB:
 
 		if (buf_page_is_checksum_valid_innodb(read_buf,
-			checksum_field1, checksum_field2)) {
+			checksum_field1, checksum_field2
+#ifdef UNIV_INNOCHECKSUM
+			, page_no, is_log_enabled, log_file, curr_algo
+#endif /* UNIV_INNOCHECKSUM */
+		)) {
 			return(FALSE);
 		}
 
 		if (buf_page_is_checksum_valid_none(read_buf,
-			checksum_field1, checksum_field2)) {
+			checksum_field1, checksum_field2
+#ifdef UNIV_INNOCHECKSUM
+			, page_no, is_log_enabled, log_file, curr_algo)) {
+#else	/* UNIV_INNOCHECKSUM */
+		)) {
 			if (curr_algo
 			    == SRV_CHECKSUM_ALGORITHM_STRICT_INNODB) {
 				page_warn_strict_checksum(
 					curr_algo,
 					SRV_CHECKSUM_ALGORITHM_NONE,
-					space_id, page_no);
+					page_id);
 			}
+#endif /* UNIV_INNOCHECKSUM */
 
+#ifdef UNIV_INNOCHECKSUM
+			if (is_log_enabled) {
+				fprintf(log_file, "page::%lu;"
+					" old style: calculated = %lu;"
+					" recorded = %lu\n", page_no,
+					buf_calc_page_old_checksum(read_buf),
+					checksum_field2);
+				fprintf(log_file, "page::%lu;"
+					" new style: calculated = %lu;"
+					" crc32 = %u; recorded = %lu\n",
+					page_no,
+					buf_calc_page_new_checksum(read_buf),
+					buf_calc_page_crc32(read_buf),
+					checksum_field1);
+			}
+#endif /* UNIV_INNOCHECKSUM */
 			return(FALSE);
 		}
 
+#ifdef UNIV_INNOCHECKSUM
 		if (buf_page_is_checksum_valid_crc32(read_buf,
-			checksum_field1, checksum_field2)) {
+			checksum_field1, checksum_field2,
+			page_no, is_log_enabled, log_file, curr_algo, false)
+		    || buf_page_is_checksum_valid_crc32(read_buf,
+			checksum_field1, checksum_field2,
+			page_no, is_log_enabled, log_file, curr_algo, true)) {
+#else /* UNIV_INNOCHECKSUM */
+		if (buf_page_is_checksum_valid_crc32(read_buf,
+			checksum_field1, checksum_field2, false)
+		    || buf_page_is_checksum_valid_crc32(read_buf,
+			checksum_field1, checksum_field2, true)) {
+
 			if (curr_algo
 			    == SRV_CHECKSUM_ALGORITHM_STRICT_INNODB) {
 				page_warn_strict_checksum(
 					curr_algo,
 					SRV_CHECKSUM_ALGORITHM_CRC32,
-					space_id, page_no);
+					page_id);
 			}
+#endif /* UNIV_INNOCHECKSUM */
 
 			return(FALSE);
 		}
 
+#ifdef UNIV_INNOCHECKSUM
+		if (is_log_enabled) {
+			fprintf(log_file, "Fail; page %lu"
+				" invalid (fails innodb checksum)\n",
+				page_no);
+		}
+#endif /* UNIV_INNOCHECKSUM */
 		return(TRUE);
 
 	case SRV_CHECKSUM_ALGORITHM_STRICT_NONE:
 
 		if (buf_page_is_checksum_valid_none(read_buf,
-			checksum_field1, checksum_field2)) {
-			return(FALSE);
+			checksum_field1, checksum_field2
+#ifdef UNIV_INNOCHECKSUM
+			, page_no, is_log_enabled, log_file, curr_algo
+#endif /* UNIV_INNOCHECKSUM */
+		)) {
+			return(false);
 		}
 
+#ifdef UNIV_INNOCHECKSUM
 		if (buf_page_is_checksum_valid_crc32(read_buf,
-			checksum_field1, checksum_field2)) {
+			checksum_field1, checksum_field2,
+			page_no, is_log_enabled, log_file, curr_algo, false)
+		    || buf_page_is_checksum_valid_crc32(read_buf,
+			checksum_field1, checksum_field2,
+			page_no, is_log_enabled, log_file, curr_algo, true)) {
+#else /* UNIV_INNOCHECKSUM */
+		if (buf_page_is_checksum_valid_crc32(read_buf,
+			checksum_field1, checksum_field2, false)
+		    || buf_page_is_checksum_valid_crc32(read_buf,
+			checksum_field1, checksum_field2, true)) {
+
 			page_warn_strict_checksum(
 				curr_algo,
 				SRV_CHECKSUM_ALGORITHM_CRC32,
-				space_id, page_no);
+				page_id);
+#endif /* UNIV_INNOCHECKSUM */
 			return(FALSE);
 		}
 
 		if (buf_page_is_checksum_valid_innodb(read_buf,
-			checksum_field1, checksum_field2)) {
+			checksum_field1, checksum_field2
+#ifdef UNIV_INNOCHECKSUM
+			, page_no, is_log_enabled, log_file, curr_algo)) {
+#else /* UNIV_INNOCHECKSUM */
+		)) {
 			page_warn_strict_checksum(
 				curr_algo,
 				SRV_CHECKSUM_ALGORITHM_INNODB,
-				space_id, page_no);
+				page_id);
+#endif /* UNIV_INNOCHECKSUM */
 			return(FALSE);
 		}
 
+#ifdef UNIV_INNOCHECKSUM
+		if (is_log_enabled) {
+			fprintf(log_file, "Fail; page %lu"
+				" invalid (fails none checksum)\n",
+				page_no);
+		}
+#endif /* UNIV_INNOCHECKSUM */
 		return(TRUE);
 
 	case SRV_CHECKSUM_ALGORITHM_NONE:
@@ -801,118 +1187,125 @@ buf_page_is_corrupted(
 	return(FALSE);
 }
 
-/********************************************************************//**
-Prints a page to stderr. */
-UNIV_INTERN
+#ifndef UNIV_INNOCHECKSUM
+
+/** Prints a page to stderr.
+@param[in]	read_buf	a database page
+@param[in]	page_size	page size
+@param[in]	flags		0 or BUF_PAGE_PRINT_NO_CRASH or
+BUF_PAGE_PRINT_NO_FULL */
 void
 buf_page_print(
-/*===========*/
-	const byte*	read_buf,	/*!< in: a database page */
-	ulint		zip_size,	/*!< in: compressed page size, or
-					0 for uncompressed pages */
-	ulint		flags)		/*!< in: 0 or
-					BUF_PAGE_PRINT_NO_CRASH or
-					BUF_PAGE_PRINT_NO_FULL */
-
+	const byte*		read_buf,
+	const page_size_t&	page_size,
+	ulint			flags)
 {
 #ifndef UNIV_HOTBACKUP
 	dict_index_t*	index;
 #endif /* !UNIV_HOTBACKUP */
-	ulint		size = zip_size;
-
-	if (!size) {
-		size = UNIV_PAGE_SIZE;
-	}
 
 	if (!(flags & BUF_PAGE_PRINT_NO_FULL)) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: Page dump in ascii and hex (%lu bytes):\n",
-			size);
-		ut_print_buf(stderr, read_buf, size);
+
+		ib::info() << "Page dump in ascii and hex ("
+			<< page_size.physical() << " bytes):";
+
+		ut_print_buf(stderr, read_buf, page_size.physical());
 		fputs("\nInnoDB: End of page dump\n", stderr);
 	}
 
-	if (zip_size) {
+	if (page_size.is_compressed()) {
 		/* Print compressed page. */
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: Compressed page type (" ULINTPF "); "
-			"stored checksum in field1 " ULINTPF "; "
-			"calculated checksums for field1: "
-			"%s " ULINTPF ", "
-			"%s " ULINTPF ", "
-			"%s " ULINTPF "; "
-			"page LSN " LSN_PF "; "
-			"page number (if stored to page already) " ULINTPF "; "
-			"space id (if stored to page already) " ULINTPF "\n",
-			fil_page_get_type(read_buf),
-			mach_read_from_4(read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
-			buf_checksum_algorithm_name(
-				SRV_CHECKSUM_ALGORITHM_CRC32),
-			page_zip_calc_checksum(read_buf, zip_size,
-				SRV_CHECKSUM_ALGORITHM_CRC32),
-			buf_checksum_algorithm_name(
-				SRV_CHECKSUM_ALGORITHM_INNODB),
-			page_zip_calc_checksum(read_buf, zip_size,
-				SRV_CHECKSUM_ALGORITHM_INNODB),
-			buf_checksum_algorithm_name(
-				SRV_CHECKSUM_ALGORITHM_NONE),
-			page_zip_calc_checksum(read_buf, zip_size,
-				SRV_CHECKSUM_ALGORITHM_NONE),
-			mach_read_from_8(read_buf + FIL_PAGE_LSN),
-			mach_read_from_4(read_buf + FIL_PAGE_OFFSET),
-			mach_read_from_4(read_buf
-					 + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
+		ib::info() << "Compressed page type ("
+			<< fil_page_get_type(read_buf)
+			<< "); stored checksum in field1 "
+			<< mach_read_from_4(
+				read_buf + FIL_PAGE_SPACE_OR_CHKSUM)
+			<< "; calculated checksums for field1: "
+			<< buf_checksum_algorithm_name(
+				SRV_CHECKSUM_ALGORITHM_CRC32)
+			<< " "
+			<< page_zip_calc_checksum(
+				read_buf, page_size.physical(),
+				SRV_CHECKSUM_ALGORITHM_CRC32)
+			<< "/"
+			<< page_zip_calc_checksum(
+				read_buf, page_size.physical(),
+				SRV_CHECKSUM_ALGORITHM_CRC32, true)
+			<< ", "
+			<< buf_checksum_algorithm_name(
+				SRV_CHECKSUM_ALGORITHM_INNODB)
+			<< " "
+			<< page_zip_calc_checksum(
+				read_buf, page_size.physical(),
+				SRV_CHECKSUM_ALGORITHM_INNODB)
+			<< ", "
+			<< buf_checksum_algorithm_name(
+				SRV_CHECKSUM_ALGORITHM_NONE)
+			<< " "
+			<< page_zip_calc_checksum(
+				read_buf, page_size.physical(),
+				SRV_CHECKSUM_ALGORITHM_NONE)
+			<< "; page LSN "
+			<< mach_read_from_8(read_buf + FIL_PAGE_LSN)
+			<< "; page number (if stored to page"
+			<< " already) "
+			<< mach_read_from_4(read_buf + FIL_PAGE_OFFSET)
+			<< "; space id (if stored to page already) "
+			<< mach_read_from_4(
+				read_buf + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
+
 	} else {
-		ut_print_timestamp(stderr);
-		fprintf(stderr, " InnoDB: uncompressed page, "
-			"stored checksum in field1 " ULINTPF ", "
-			"calculated checksums for field1: "
-			"%s " UINT32PF ", "
-			"%s " ULINTPF ", "
-			"%s " ULINTPF ", "
-
-			"stored checksum in field2 " ULINTPF ", "
-			"calculated checksums for field2: "
-			"%s " UINT32PF ", "
-			"%s " ULINTPF ", "
-			"%s " ULINTPF ", "
-
-			"page LSN " ULINTPF " " ULINTPF ", "
-			"low 4 bytes of LSN at page end " ULINTPF ", "
-			"page number (if stored to page already) " ULINTPF ", "
-			"space id (if created with >= MySQL-4.1.1 "
-			"and stored already) %lu\n",
-			mach_read_from_4(read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
-			buf_checksum_algorithm_name(SRV_CHECKSUM_ALGORITHM_CRC32),
-			buf_calc_page_crc32(read_buf),
-			buf_checksum_algorithm_name(SRV_CHECKSUM_ALGORITHM_INNODB),
-			buf_calc_page_new_checksum(read_buf),
-			buf_checksum_algorithm_name(SRV_CHECKSUM_ALGORITHM_NONE),
-			BUF_NO_CHECKSUM_MAGIC,
-
-			mach_read_from_4(read_buf + UNIV_PAGE_SIZE
-					 - FIL_PAGE_END_LSN_OLD_CHKSUM),
-			buf_checksum_algorithm_name(SRV_CHECKSUM_ALGORITHM_CRC32),
-			buf_calc_page_crc32(read_buf),
-			buf_checksum_algorithm_name(SRV_CHECKSUM_ALGORITHM_INNODB),
-			buf_calc_page_old_checksum(read_buf),
-			buf_checksum_algorithm_name(SRV_CHECKSUM_ALGORITHM_NONE),
-			BUF_NO_CHECKSUM_MAGIC,
-
-			mach_read_from_4(read_buf + FIL_PAGE_LSN),
-			mach_read_from_4(read_buf + FIL_PAGE_LSN + 4),
-			mach_read_from_4(read_buf + UNIV_PAGE_SIZE
-					 - FIL_PAGE_END_LSN_OLD_CHKSUM + 4),
-			mach_read_from_4(read_buf + FIL_PAGE_OFFSET),
-			mach_read_from_4(read_buf
-					 + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
+		const uint32_t	crc32 = buf_calc_page_crc32(read_buf);
 
+		const uint32_t	crc32_legacy = buf_calc_page_crc32(read_buf,
+								   true);
 		ulint page_type = fil_page_get_type(read_buf);
 
-		fprintf(stderr, "InnoDB: page type %ld meaning %s\n", page_type,
-			fil_get_page_type_name(page_type));
+		ib::info() << "Uncompressed page, stored checksum in field1 "
+			<< mach_read_from_4(
+				read_buf + FIL_PAGE_SPACE_OR_CHKSUM)
+			<< ", calculated checksums for field1: "
+			<< buf_checksum_algorithm_name(
+				SRV_CHECKSUM_ALGORITHM_CRC32) << " "
+			<< crc32 << "/" << crc32_legacy
+			<< ", "
+			<< buf_checksum_algorithm_name(
+				SRV_CHECKSUM_ALGORITHM_INNODB) << " "
+			<< buf_calc_page_new_checksum(read_buf)
+			<< ", "
+			<< " page type " << page_type << " == "
+			<< fil_get_page_type_name(page_type) << "."
+			<< buf_checksum_algorithm_name(
+				SRV_CHECKSUM_ALGORITHM_NONE) << " "
+			<< BUF_NO_CHECKSUM_MAGIC
+			<< ", stored checksum in field2 "
+			<< mach_read_from_4(read_buf + page_size.logical()
+					    - FIL_PAGE_END_LSN_OLD_CHKSUM)
+			<< ", calculated checksums for field2: "
+			<< buf_checksum_algorithm_name(
+				SRV_CHECKSUM_ALGORITHM_CRC32) << " "
+			<< crc32 << "/" << crc32_legacy
+			<< ", "
+			<< buf_checksum_algorithm_name(
+				SRV_CHECKSUM_ALGORITHM_INNODB) << " "
+			<< buf_calc_page_old_checksum(read_buf)
+			<< ", "
+			<< buf_checksum_algorithm_name(
+				SRV_CHECKSUM_ALGORITHM_NONE) << " "
+			<< BUF_NO_CHECKSUM_MAGIC
+			<< ",  page LSN "
+			<< mach_read_from_4(read_buf + FIL_PAGE_LSN)
+			<< " "
+			<< mach_read_from_4(read_buf + FIL_PAGE_LSN + 4)
+			<< ", low 4 bytes of LSN at page end "
+			<< mach_read_from_4(read_buf + page_size.logical()
+					    - FIL_PAGE_END_LSN_OLD_CHKSUM + 4)
+			<< ", page number (if stored to page already) "
+			<< mach_read_from_4(read_buf + FIL_PAGE_OFFSET)
+			<< ", space id (if created with >= MySQL-4.1.1"
+			   " and stored already) "
+			<< mach_read_from_4(
+				read_buf + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
 	}
 
 #ifndef UNIV_HOTBACKUP
@@ -931,17 +1324,19 @@ buf_page_print(
 	switch (fil_page_get_type(read_buf)) {
 		index_id_t	index_id;
 	case FIL_PAGE_INDEX:
+	case FIL_PAGE_RTREE:
 		index_id = btr_page_get_index_id(read_buf);
-		fprintf(stderr,
+		ib::error() <<
 			"InnoDB: Page may be an index page where"
-			" index id is %llu\n",
-			(ullint) index_id);
+			" index id is " << index_id;
+
 #ifndef UNIV_HOTBACKUP
 		index = dict_index_find_on_id_low(index_id);
 		if (index) {
-			fputs("InnoDB: (", stderr);
-			dict_index_name_print(stderr, NULL, index);
-			fputs(")\n", stderr);
+			ib::info()
+				<< "Index " << index_id
+				<< " is " << index->name
+				<< " in table " << index->table->name;
 		}
 #endif /* !UNIV_HOTBACKUP */
 		break;
@@ -993,6 +1388,8 @@ buf_page_print(
 #ifndef UNIV_HOTBACKUP
 
 # ifdef PFS_GROUP_BUFFER_SYNC
+extern mysql_pfs_key_t	buffer_block_mutex_key;
+
 /********************************************************************//**
 This function registers mutexes and rwlocks in buffer blocks with
 performance schema. If PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER is
@@ -1005,27 +1402,24 @@ pfs_register_buffer_block(
 /*======================*/
 	buf_chunk_t*	chunk)		/*!< in/out: chunk of buffers */
 {
-	ulint		i;
-	ulint		num_to_register;
 	buf_block_t*    block;
+	ulint		num_to_register;
 
 	block = chunk->blocks;
 
-	num_to_register = ut_min(chunk->size,
-				 PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER);
-
-	for (i = 0; i < num_to_register; i++) {
-		ib_mutex_t*	mutex;
-		rw_lock_t*	rwlock;
+	num_to_register = ut_min(
+		chunk->size, PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER);
 
+	for (ulint i = 0; i < num_to_register; i++) {
 #  ifdef UNIV_PFS_MUTEX
+		BPageMutex*	mutex;
+
 		mutex = &block->mutex;
-		ut_a(!mutex->pfs_psi);
-		mutex->pfs_psi = (PSI_server)
-			? PSI_server->init_mutex(buffer_block_mutex_key, mutex)
-			: NULL;
+		mutex->pfs_add(buffer_block_mutex_key);
 #  endif /* UNIV_PFS_MUTEX */
 
+		rw_lock_t*	rwlock;
+
 #  ifdef UNIV_PFS_RWLOCK
 		rwlock = &block->lock;
 		ut_a(!rwlock->pfs_psi);
@@ -1033,14 +1427,14 @@ pfs_register_buffer_block(
 			? PSI_server->init_rwlock(buf_block_lock_key, rwlock)
 			: NULL;
 
-#   ifdef UNIV_SYNC_DEBUG
+#   ifdef UNIV_DEBUG
 		rwlock = &block->debug_latch;
 		ut_a(!rwlock->pfs_psi);
 		rwlock->pfs_psi = (PSI_server)
 			? PSI_server->init_rwlock(buf_block_debug_latch_key,
 						  rwlock)
 			: NULL;
-#   endif /* UNIV_SYNC_DEBUG */
+#   endif /* UNIV_DEBUG */
 
 #  endif /* UNIV_PFS_RWLOCK */
 		block++;
@@ -1066,6 +1460,7 @@ buf_block_init(
 	block->page.state = BUF_BLOCK_NOT_USED;
 	block->page.buf_fix_count = 0;
 	block->page.io_fix = BUF_IO_NONE;
+	block->page.flush_observer = NULL;
 	block->page.key_version = 0;
 	block->page.page_encrypted = false;
 	block->page.page_compressed = false;
@@ -1077,57 +1472,59 @@ buf_block_init(
 	block->modify_clock = 0;
 	block->page.slot = NULL;
 
-#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
-	block->page.file_page_was_freed = FALSE;
-#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
+	ut_d(block->page.file_page_was_freed = FALSE);
 
-	block->check_index_page_at_flush = FALSE;
 	block->index = NULL;
+	block->made_dirty_with_no_latch = false;
+	block->skip_flush_check = false;
+
+	ut_d(block->page.in_page_hash = FALSE);
+	ut_d(block->page.in_zip_hash = FALSE);
+	ut_d(block->page.in_flush_list = FALSE);
+	ut_d(block->page.in_free_list = FALSE);
+	ut_d(block->page.in_LRU_list = FALSE);
+	ut_d(block->in_unzip_LRU_list = FALSE);
+	ut_d(block->in_withdraw_list = FALSE);
 
-#ifdef UNIV_DEBUG
-	block->page.in_page_hash = FALSE;
-	block->page.in_zip_hash = FALSE;
-	block->page.in_flush_list = FALSE;
-	block->page.in_free_list = FALSE;
-	block->page.in_LRU_list = FALSE;
-	block->in_unzip_LRU_list = FALSE;
-#endif /* UNIV_DEBUG */
 #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
 	block->n_pointers = 0;
 #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
 	page_zip_des_init(&block->page.zip);
 
+	mutex_create(LATCH_ID_BUF_BLOCK_MUTEX, &block->mutex);
+
 #if defined PFS_SKIP_BUFFER_MUTEX_RWLOCK || defined PFS_GROUP_BUFFER_SYNC
 	/* If PFS_SKIP_BUFFER_MUTEX_RWLOCK is defined, skip registration
-	of buffer block mutex/rwlock with performance schema. If
-	PFS_GROUP_BUFFER_SYNC is defined, skip the registration
-	since buffer block mutex/rwlock will be registered later in
-	pfs_register_buffer_block() */
+	of buffer block rwlock with performance schema.
+
+	If PFS_GROUP_BUFFER_SYNC is defined, skip the registration
+	since buffer block rwlock will be registered later in
+	pfs_register_buffer_block(). */
 
-	mutex_create(PFS_NOT_INSTRUMENTED, &block->mutex, SYNC_BUF_BLOCK);
 	rw_lock_create(PFS_NOT_INSTRUMENTED, &block->lock, SYNC_LEVEL_VARYING);
 
-# ifdef UNIV_SYNC_DEBUG
-	rw_lock_create(PFS_NOT_INSTRUMENTED,
-		       &block->debug_latch, SYNC_NO_ORDER_CHECK);
-# endif /* UNIV_SYNC_DEBUG */
+	ut_d(rw_lock_create(
+			PFS_NOT_INSTRUMENTED,
+			&block->debug_latch, SYNC_NO_ORDER_CHECK));
 
 #else /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */
-	mutex_create(buffer_block_mutex_key, &block->mutex, SYNC_BUF_BLOCK);
+
 	rw_lock_create(buf_block_lock_key, &block->lock, SYNC_LEVEL_VARYING);
 
-# ifdef UNIV_SYNC_DEBUG
-	rw_lock_create(buf_block_debug_latch_key,
-		       &block->debug_latch, SYNC_NO_ORDER_CHECK);
-# endif /* UNIV_SYNC_DEBUG */
+	ut_d(rw_lock_create(
+			buf_block_debug_latch_key,
+			&block->debug_latch, SYNC_NO_ORDER_CHECK));
+
 #endif /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */
 
+	block->lock.is_block_lock = 1;
+
 	ut_ad(rw_lock_validate(&(block->lock)));
 }
 
 /********************************************************************//**
 Allocates a chunk of buffer frames.
-@return	chunk, or NULL on failure */
+@return chunk, or NULL on failure */
 static
 buf_chunk_t*
 buf_chunk_init(
@@ -1147,30 +1544,32 @@ buf_chunk_init(
 	mem_size += ut_2pow_round((mem_size / UNIV_PAGE_SIZE) * (sizeof *block)
 				  + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE);
 
-	chunk->mem_size = mem_size;
-	chunk->mem = os_mem_alloc_large(&chunk->mem_size);
+	DBUG_EXECUTE_IF("ib_buf_chunk_init_fails", return(NULL););
+
+	chunk->mem = buf_pool->allocator.allocate_large(mem_size,
+							&chunk->mem_pfx);
 
 	if (UNIV_UNLIKELY(chunk->mem == NULL)) {
 
 		return(NULL);
 	}
 
-#ifdef HAVE_LIBNUMA
+#if defined(HAVE_LIBNUMA) && defined(WITH_NUMA)
 	if (srv_numa_interleave) {
 		struct bitmask *numa_mems_allowed = numa_get_mems_allowed();
-		int	st = mbind(chunk->mem, chunk->mem_size,
+		int	st = mbind(chunk->mem, chunk->mem_size(),
 				   MPOL_INTERLEAVE,
 				   numa_mems_allowed->maskp,
 				   numa_mems_allowed->size,
 				   MPOL_MF_MOVE);
 		if (st != 0) {
-			ib_logf(IB_LOG_LEVEL_WARN,
-				"Failed to set NUMA memory policy of buffer"
-				" pool page frames to MPOL_INTERLEAVE"
-				" (error: %s).", strerror(errno));
+			ib::warn() << "Failed to set NUMA memory policy of"
+				" buffer pool page frames to MPOL_INTERLEAVE"
+				" (error: " << strerror(errno) << ").";
 		}
 	}
-#endif // HAVE_LIBNUMA
+#endif /* HAVE_LIBNUMA && WITH_NUMA */
+
 
 	/* Allocate the block descriptors from
 	the start of the memory block. */
@@ -1182,7 +1581,7 @@ buf_chunk_init(
 	it is bigger, we may allocate more blocks than requested. */
 
 	frame = (byte*) ut_align(chunk->mem, UNIV_PAGE_SIZE);
-	chunk->size = chunk->mem_size / UNIV_PAGE_SIZE
+	chunk->size = chunk->mem_pfx.m_size / UNIV_PAGE_SIZE
 		- (frame != chunk->mem);
 
 	/* Subtract the space needed for block descriptors. */
@@ -1209,7 +1608,7 @@ buf_chunk_init(
 		UNIV_MEM_INVALID(block->frame, UNIV_PAGE_SIZE);
 
 		/* Add the block to the free list */
-		UT_LIST_ADD_LAST(list, buf_pool->free, (&block->page));
+		UT_LIST_ADD_LAST(buf_pool->free, &block->page);
 
 		ut_d(block->page.in_free_list = TRUE);
 		ut_ad(buf_pool_from_block(block) == buf_pool);
@@ -1218,9 +1617,11 @@ buf_chunk_init(
 		frame += UNIV_PAGE_SIZE;
 	}
 
+	buf_pool_register_chunk(chunk);
+
 #ifdef PFS_GROUP_BUFFER_SYNC
 	pfs_register_buffer_block(chunk);
-#endif
+#endif /* PFS_GROUP_BUFFER_SYNC */
 	return(chunk);
 }
 
@@ -1228,7 +1629,7 @@ buf_chunk_init(
 /*********************************************************************//**
 Finds a block in the given buffer chunk that points to a
 given compressed page.
-@return	buffer block pointing to the compressed page, or NULL */
+@return buffer block pointing to the compressed page, or NULL */
 static
 buf_block_t*
 buf_chunk_contains_zip(
@@ -1254,8 +1655,7 @@ buf_chunk_contains_zip(
 /*********************************************************************//**
 Finds a block in the buffer pool that points to a
 given compressed page.
-@return	buffer block pointing to the compressed page, or NULL */
-UNIV_INTERN
+@return buffer block pointing to the compressed page, or NULL */
 buf_block_t*
 buf_pool_contains_zip(
 /*==================*/
@@ -1282,7 +1682,7 @@ buf_pool_contains_zip(
 
 /*********************************************************************//**
 Checks that all file pages in the buffer chunk are in a replaceable state.
-@return	address of a non-free block, or NULL if all freed */
+@return address of a non-free block, or NULL if all freed */
 static
 const buf_block_t*
 buf_chunk_not_freed(
@@ -1313,9 +1713,9 @@ buf_chunk_not_freed(
 			file pages. */
 			break;
 		case BUF_BLOCK_FILE_PAGE:
-			mutex_enter(&block->mutex);
+			buf_page_mutex_enter(block);
 			ready = buf_flush_ready_for_replace(&block->page);
-			mutex_exit(&block->mutex);
+			buf_page_mutex_exit(block);
 
 			if (!ready) {
 
@@ -1350,6 +1750,7 @@ buf_pool_set_sizes(void)
 
 	srv_buf_pool_curr_size = curr_size;
 	srv_buf_pool_old_size = srv_buf_pool_size;
+	srv_buf_pool_base_size = srv_buf_pool_size;
 
 	buf_pool_mutex_exit_all();
 }
@@ -1357,7 +1758,6 @@ buf_pool_set_sizes(void)
 /********************************************************************//**
 Initialize a buffer pool instance.
 @return DB_SUCCESS if all goes well. */
-UNIV_INTERN
 ulint
 buf_pool_init_instance(
 /*===================*/
@@ -1366,50 +1766,99 @@ buf_pool_init_instance(
 	ulint		instance_no)	/*!< in: id of the instance */
 {
 	ulint		i;
+	ulint		chunk_size;
 	buf_chunk_t*	chunk;
 
+	ut_ad(buf_pool_size % srv_buf_pool_chunk_unit == 0);
+
 	/* 1. Initialize general fields
 	------------------------------- */
-	mutex_create(buf_pool_mutex_key,
-		     &buf_pool->mutex, SYNC_BUF_POOL);
-	mutex_create(buf_pool_zip_mutex_key,
-		     &buf_pool->zip_mutex, SYNC_BUF_BLOCK);
+	mutex_create(LATCH_ID_BUF_POOL, &buf_pool->mutex);
+
+	mutex_create(LATCH_ID_BUF_POOL_ZIP, &buf_pool->zip_mutex);
+
+	new(&buf_pool->allocator)
+		ut_allocator<unsigned char>(mem_key_buf_buf_pool);
 
 	buf_pool_mutex_enter(buf_pool);
 
 	if (buf_pool_size > 0) {
-		buf_pool->n_chunks = 1;
+		buf_pool->n_chunks
+			= buf_pool_size / srv_buf_pool_chunk_unit;
+		chunk_size = srv_buf_pool_chunk_unit;
 
-		buf_pool->chunks = chunk =
-			(buf_chunk_t*) mem_zalloc(sizeof *chunk);
+		buf_pool->chunks =
+			reinterpret_cast<buf_chunk_t*>(ut_zalloc_nokey(
+				buf_pool->n_chunks * sizeof(*chunk)));
+		buf_pool->chunks_old = NULL;
 
-		UT_LIST_INIT(buf_pool->free);
+		UT_LIST_INIT(buf_pool->LRU, &buf_page_t::LRU);
+		UT_LIST_INIT(buf_pool->free, &buf_page_t::list);
+		UT_LIST_INIT(buf_pool->withdraw, &buf_page_t::list);
+		buf_pool->withdraw_target = 0;
+		UT_LIST_INIT(buf_pool->flush_list, &buf_page_t::list);
+		UT_LIST_INIT(buf_pool->unzip_LRU, &buf_block_t::unzip_LRU);
 
-		if (!buf_chunk_init(buf_pool, chunk, buf_pool_size)) {
-			mem_free(chunk);
-			mem_free(buf_pool);
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+		UT_LIST_INIT(buf_pool->zip_clean, &buf_page_t::list);
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 
-			buf_pool_mutex_exit(buf_pool);
-
-			return(DB_ERROR);
+		for (i = 0; i < UT_ARR_SIZE(buf_pool->zip_free); ++i) {
+			UT_LIST_INIT(
+				buf_pool->zip_free[i], &buf_buddy_free_t::list);
 		}
 
+		buf_pool->curr_size = 0;
+		chunk = buf_pool->chunks;
+
+		do {
+			if (!buf_chunk_init(buf_pool, chunk, chunk_size)) {
+				while (--chunk >= buf_pool->chunks) {
+					buf_block_t*	block = chunk->blocks;
+
+					for (i = chunk->size; i--; block++) {
+						mutex_free(&block->mutex);
+						rw_lock_free(&block->lock);
+
+						ut_d(rw_lock_free(
+							&block->debug_latch));
+					}
+
+					buf_pool->allocator.deallocate_large(
+						chunk->mem, &chunk->mem_pfx);
+				}
+				ut_free(buf_pool->chunks);
+				buf_pool_mutex_exit(buf_pool);
+
+				return(DB_ERROR);
+			}
+
+			buf_pool->curr_size += chunk->size;
+		} while (++chunk < buf_pool->chunks + buf_pool->n_chunks);
+
 		buf_pool->instance_no = instance_no;
-		buf_pool->old_pool_size = buf_pool_size;
-		buf_pool->curr_size = chunk->size;
+		buf_pool->read_ahead_area =
+			ut_min(BUF_READ_AHEAD_PAGES,
+			       ut_2_power_up(buf_pool->curr_size /
+					     BUF_READ_AHEAD_PORTION));
 		buf_pool->curr_pool_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
 
+		buf_pool->old_size = buf_pool->curr_size;
+		buf_pool->n_chunks_new = buf_pool->n_chunks;
+
 		/* Number of locks protecting page_hash must be a
 		power of two */
 		srv_n_page_hash_locks = static_cast<ulong>(
-				 ut_2_power_up(srv_n_page_hash_locks));
+			 ut_2_power_up(srv_n_page_hash_locks));
 		ut_a(srv_n_page_hash_locks != 0);
 		ut_a(srv_n_page_hash_locks <= MAX_PAGE_HASH_LOCKS);
 
-		buf_pool->page_hash = ha_create(2 * buf_pool->curr_size,
-						srv_n_page_hash_locks,
-						MEM_HEAP_FOR_PAGE_HASH,
-						SYNC_BUF_PAGE_HASH);
+		buf_pool->page_hash = ib_create(
+			2 * buf_pool->curr_size,
+			LATCH_ID_HASH_TABLE_RW_LOCK,
+			srv_n_page_hash_locks, MEM_HEAP_FOR_PAGE_HASH);
+
+		buf_pool->page_hash_old = NULL;
 
 		buf_pool->zip_hash = hash_create(2 * buf_pool->curr_size);
 
@@ -1418,17 +1867,19 @@ buf_pool_init_instance(
 	/* 2. Initialize flushing fields
 	-------------------------------- */
 
-	mutex_create(flush_list_mutex_key, &buf_pool->flush_list_mutex,
-		     SYNC_BUF_FLUSH_LIST);
+	mutex_create(LATCH_ID_FLUSH_LIST, &buf_pool->flush_list_mutex);
 
 	for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) {
-		buf_pool->no_flush[i] = os_event_create();
+		buf_pool->no_flush[i] = os_event_create(0);
 	}
 
-	buf_pool->watch = (buf_page_t*) mem_zalloc(
+	buf_pool->watch = (buf_page_t*) ut_zalloc_nokey(
 		sizeof(*buf_pool->watch) * BUF_POOL_WATCH_SIZE);
+	for (i = 0; i < BUF_POOL_WATCH_SIZE; i++) {
+		buf_pool->watch[i].buf_pool_index = buf_pool->instance_no;
+	}
 
-	/* All fields are initialized by mem_zalloc(). */
+	/* All fields are initialized by ut_zalloc_nokey(). */
 
 	buf_pool->try_LRU_scan = TRUE;
 
@@ -1446,10 +1897,12 @@ buf_pool_init_instance(
 	new(&buf_pool->single_scan_itr) LRUItr(buf_pool, &buf_pool->mutex);
 
 	/* Initialize the temporal memory array and slots */
-	buf_pool->tmp_arr = (buf_tmp_array_t *)mem_zalloc(sizeof(buf_tmp_array_t));
+	buf_pool->tmp_arr = (buf_tmp_array_t *)ut_malloc_nokey(sizeof(buf_tmp_array_t));
+	memset(buf_pool->tmp_arr, 0, sizeof(buf_tmp_array_t));
 	ulint n_slots = srv_n_read_io_threads * srv_n_write_io_threads * (8 * OS_AIO_N_PENDING_IOS_PER_THREAD);
 	buf_pool->tmp_arr->n_slots = n_slots;
-	buf_pool->tmp_arr->slots = (buf_tmp_buffer_t*)mem_zalloc(sizeof(buf_tmp_buffer_t) * n_slots);
+	buf_pool->tmp_arr->slots = (buf_tmp_buffer_t*)ut_malloc_nokey(sizeof(buf_tmp_buffer_t) * n_slots);
+	memset(buf_pool->tmp_arr->slots, 0, (sizeof(buf_tmp_buffer_t) * n_slots));
 
 	buf_pool_mutex_exit(buf_pool);
 
@@ -1471,11 +1924,18 @@ buf_pool_free_instance(
 	buf_chunk_t*	chunk;
 	buf_chunk_t*	chunks;
 	buf_page_t*	bpage;
+	buf_page_t*	prev_bpage = 0;
 
-	bpage = UT_LIST_GET_LAST(buf_pool->LRU);
-	while (bpage != NULL) {
-		buf_page_t*	prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
-		enum buf_page_state	state = buf_page_get_state(bpage);
+	mutex_free(&buf_pool->mutex);
+	mutex_free(&buf_pool->zip_mutex);
+	mutex_free(&buf_pool->flush_list_mutex);
+
+	for (bpage = UT_LIST_GET_LAST(buf_pool->LRU);
+	     bpage != NULL;
+	     bpage = prev_bpage) {
+
+		prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
+		buf_page_state	state = buf_page_get_state(bpage);
 
 		ut_ad(buf_page_in_file(bpage));
 		ut_ad(bpage->in_LRU_list);
@@ -1487,21 +1947,33 @@ buf_pool_free_instance(
 			      || srv_fast_shutdown == 2);
 			buf_page_free_descriptor(bpage);
 		}
-
-		bpage = prev_bpage;
 	}
 
-	mem_free(buf_pool->watch);
+	ut_free(buf_pool->watch);
 	buf_pool->watch = NULL;
 
 	chunks = buf_pool->chunks;
 	chunk = chunks + buf_pool->n_chunks;
 
 	while (--chunk >= chunks) {
-		os_mem_free_large(chunk->mem, chunk->mem_size);
+		buf_block_t*	block = chunk->blocks;
+
+		for (ulint i = chunk->size; i--; block++) {
+			mutex_free(&block->mutex);
+			rw_lock_free(&block->lock);
+
+			ut_d(rw_lock_free(&block->debug_latch));
+		}
+
+		buf_pool->allocator.deallocate_large(
+			chunk->mem, &chunk->mem_pfx);
 	}
 
-	mem_free(buf_pool->chunks);
+	for (ulint i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; ++i) {
+		os_event_destroy(buf_pool->no_flush[i]);
+	}
+
+	ut_free(buf_pool->chunks);
 	ha_clear(buf_pool->page_hash);
 	hash_table_free(buf_pool->page_hash);
 	hash_table_free(buf_pool->zip_hash);
@@ -1526,17 +1998,18 @@ buf_pool_free_instance(
 				slot->comp_buf_free = NULL;
 			}
 		}
+
+		ut_free(buf_pool->tmp_arr->slots);
+		ut_free(buf_pool->tmp_arr);
+		buf_pool->tmp_arr = NULL;
 	}
 
-	mem_free(buf_pool->tmp_arr->slots);
-	mem_free(buf_pool->tmp_arr);
-	buf_pool->tmp_arr = NULL;
+	buf_pool->allocator.~ut_allocator();
 }
 
 /********************************************************************//**
 Creates the buffer pool.
-@return	DB_SUCCESS if success, DB_ERROR if not enough memory or error */
-UNIV_INTERN
+@return DB_SUCCESS if success, DB_ERROR if not enough memory or error */
 dberr_t
 buf_pool_init(
 /*==========*/
@@ -1550,26 +2023,17 @@ buf_pool_init(
 	ut_ad(n_instances <= MAX_BUFFER_POOLS);
 	ut_ad(n_instances == srv_buf_pool_instances);
 
-#ifdef HAVE_LIBNUMA
-	if (srv_numa_interleave) {
-		struct bitmask *numa_mems_allowed = numa_get_mems_allowed();
+	NUMA_MEMPOLICY_INTERLEAVE_IN_SCOPE;
 
-		ib_logf(IB_LOG_LEVEL_INFO,
-			"Setting NUMA memory policy to MPOL_INTERLEAVE");
-		if (set_mempolicy(MPOL_INTERLEAVE,
-				  numa_mems_allowed->maskp,
-				  numa_mems_allowed->size) != 0) {
-			ib_logf(IB_LOG_LEVEL_WARN,
-				"Failed to set NUMA memory policy to"
-				" MPOL_INTERLEAVE (error: %s).",
-				strerror(errno));
-		}
-	}
-#endif // HAVE_LIBNUMA
+	buf_pool_resizing = false;
+	buf_pool_withdrawing = false;
+	buf_withdraw_clock = 0;
 
-	buf_pool_ptr = (buf_pool_t*) mem_zalloc(
+	buf_pool_ptr = (buf_pool_t*) ut_zalloc_nokey(
 		n_instances * sizeof *buf_pool_ptr);
 
+	buf_chunk_map_reg = UT_NEW_NOKEY(buf_pool_chunk_map_t());
+
 	for (i = 0; i < n_instances; i++) {
 		buf_pool_t*	ptr	= &buf_pool_ptr[i];
 
@@ -1582,59 +2046,1092 @@ buf_pool_init(
 		}
 	}
 
+	buf_chunk_map_ref = buf_chunk_map_reg;
+
 	buf_pool_set_sizes();
 	buf_LRU_old_ratio_update(100 * 3/ 8, FALSE);
 
 	btr_search_sys_create(buf_pool_get_curr_size() / sizeof(void*) / 64);
 
-#ifdef HAVE_LIBNUMA
-	if (srv_numa_interleave) {
-		ib_logf(IB_LOG_LEVEL_INFO,
-			"Setting NUMA memory policy to MPOL_DEFAULT");
-		if (set_mempolicy(MPOL_DEFAULT, NULL, 0) != 0) {
-			ib_logf(IB_LOG_LEVEL_WARN,
-				"Failed to set NUMA memory policy to"
-				" MPOL_DEFAULT (error: %s).", strerror(errno));
-		}
-	}
-#endif // HAVE_LIBNUMA
-
-	buf_flush_event = os_event_create();
-
 	return(DB_SUCCESS);
 }
 
 /********************************************************************//**
 Frees the buffer pool at shutdown.  This must not be invoked before
 freeing all mutexes. */
-UNIV_INTERN
 void
 buf_pool_free(
 /*==========*/
 	ulint	n_instances)	/*!< in: numbere of instances to free */
 {
-	ulint	i;
-
-	for (i = 0; i < n_instances; i++) {
+	for (ulint i = 0; i < n_instances; i++) {
 		buf_pool_free_instance(buf_pool_from_array(i));
 	}
 
-	mem_free(buf_pool_ptr);
+	UT_DELETE(buf_chunk_map_reg);
+	buf_chunk_map_reg = buf_chunk_map_ref = NULL;
+
+	ut_free(buf_pool_ptr);
 	buf_pool_ptr = NULL;
 }
 
+/** Reallocate a control block.
+@param[in]	buf_pool	buffer pool instance
+@param[in]	block		pointer to control block
+@retval false	if failed because of no free blocks. */
+static
+bool
+buf_page_realloc(
+	buf_pool_t*	buf_pool,
+	buf_block_t*	block)
+{
+	buf_block_t*	new_block;
+
+	ut_ad(buf_pool_withdrawing);
+	ut_ad(buf_pool_mutex_own(buf_pool));
+	ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+
+	new_block = buf_LRU_get_free_only(buf_pool);
+
+	if (new_block == NULL) {
+		return(false); /* free_list was not enough */
+	}
+
+	rw_lock_t*	hash_lock = buf_page_hash_lock_get(buf_pool, block->page.id);
+
+	rw_lock_x_lock(hash_lock);
+	mutex_enter(&block->mutex);
+
+	if (buf_page_can_relocate(&block->page)) {
+		mutex_enter(&new_block->mutex);
+
+		memcpy(new_block->frame, block->frame, UNIV_PAGE_SIZE);
+		memcpy(&new_block->page, &block->page, sizeof block->page);
+
+		/* relocate LRU list */
+		ut_ad(block->page.in_LRU_list);
+		ut_ad(!block->page.in_zip_hash);
+		ut_d(block->page.in_LRU_list = FALSE);
+
+		buf_LRU_adjust_hp(buf_pool, &block->page);
+
+		buf_page_t*	prev_b = UT_LIST_GET_PREV(LRU, &block->page);
+		UT_LIST_REMOVE(buf_pool->LRU, &block->page);
+
+		if (prev_b != NULL) {
+			UT_LIST_INSERT_AFTER(buf_pool->LRU, prev_b, &new_block->page);
+		} else {
+			UT_LIST_ADD_FIRST(buf_pool->LRU, &new_block->page);
+		}
+
+		if (buf_pool->LRU_old == &block->page) {
+			buf_pool->LRU_old = &new_block->page;
+		}
+
+		ut_ad(new_block->page.in_LRU_list);
+
+		/* relocate unzip_LRU list */
+		if (block->page.zip.data != NULL) {
+			ut_ad(block->in_unzip_LRU_list);
+			ut_d(new_block->in_unzip_LRU_list = TRUE);
+			UNIV_MEM_DESC(&new_block->page.zip.data,
+				      page_zip_get_size(&new_block->page.zip));
+
+			buf_block_t*	prev_block = UT_LIST_GET_PREV(unzip_LRU, block);
+			UT_LIST_REMOVE(buf_pool->unzip_LRU, block);
+
+			ut_d(block->in_unzip_LRU_list = FALSE);
+			block->page.zip.data = NULL;
+			page_zip_set_size(&block->page.zip, 0);
+
+			if (prev_block != NULL) {
+				UT_LIST_INSERT_AFTER(buf_pool->unzip_LRU, prev_block, new_block);
+			} else {
+				UT_LIST_ADD_FIRST(buf_pool->unzip_LRU, new_block);
+			}
+		} else {
+			ut_ad(!block->in_unzip_LRU_list);
+			ut_d(new_block->in_unzip_LRU_list = FALSE);
+		}
+
+		/* relocate buf_pool->page_hash */
+		ut_ad(block->page.in_page_hash);
+		ut_ad(&block->page == buf_page_hash_get_low(buf_pool,
+							    block->page.id));
+		ut_d(block->page.in_page_hash = FALSE);
+		ulint	fold = block->page.id.fold();
+		ut_ad(fold == new_block->page.id.fold());
+		HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, (&block->page));
+		HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold, (&new_block->page));
+
+		ut_ad(new_block->page.in_page_hash);
+
+		buf_block_modify_clock_inc(block);
+		memset(block->frame + FIL_PAGE_OFFSET, 0xff, 4);
+		memset(block->frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 0xff, 4);
+		UNIV_MEM_INVALID(block->frame, UNIV_PAGE_SIZE);
+		buf_block_set_state(block, BUF_BLOCK_REMOVE_HASH);
+		block->page.id.reset(ULINT32_UNDEFINED, ULINT32_UNDEFINED);
+
+		/* Relocate buf_pool->flush_list. */
+		if (block->page.oldest_modification) {
+			buf_flush_relocate_on_flush_list(
+				&block->page, &new_block->page);
+		}
+
+		/* set other flags of buf_block_t */
+
+		ut_ad(!block->index);
+		new_block->index	= NULL;
+		new_block->n_hash_helps	= 0;
+		new_block->n_fields	= 1;
+		new_block->left_side	= TRUE;
+
+		new_block->lock_hash_val = block->lock_hash_val;
+		ut_ad(new_block->lock_hash_val == lock_rec_hash(
+			new_block->page.id.space(),
+			new_block->page.id.page_no()));
+
+		rw_lock_x_unlock(hash_lock);
+		mutex_exit(&new_block->mutex);
+
+		/* free block */
+		buf_block_set_state(block, BUF_BLOCK_MEMORY);
+		buf_LRU_block_free_non_file_page(block);
+
+		mutex_exit(&block->mutex);
+	} else {
+		rw_lock_x_unlock(hash_lock);
+		mutex_exit(&block->mutex);
+
+		/* free new_block */
+		mutex_enter(&new_block->mutex);
+		buf_LRU_block_free_non_file_page(new_block);
+		mutex_exit(&new_block->mutex);
+	}
+
+	return(true); /* free_list was enough */
+}
+
+/** Sets the global variable that feeds MySQL's innodb_buffer_pool_resize_status
+to the specified string. The format and the following parameters are the
+same as the ones used for printf(3).
+@param[in]	fmt	format
+@param[in]	...	extra parameters according to fmt */
+static
+void
+buf_resize_status(
+	const char*	fmt,
+	...)
+{
+	va_list	ap;
+
+	va_start(ap, fmt);
+
+	ut_vsnprintf(
+		export_vars.innodb_buffer_pool_resize_status,
+		sizeof(export_vars.innodb_buffer_pool_resize_status),
+		fmt, ap);
+
+	va_end(ap);
+
+	ib::info() << export_vars.innodb_buffer_pool_resize_status;
+}
+
+/** Determines if a block is intended to be withdrawn.
+@param[in]	buf_pool	buffer pool instance
+@param[in]	block		pointer to control block
+@retval true	if will be withdrawn */
+bool
+buf_block_will_withdrawn(
+	buf_pool_t*		buf_pool,
+	const buf_block_t*	block)
+{
+	ut_ad(buf_pool->curr_size < buf_pool->old_size);
+	ut_ad(!buf_pool_resizing || buf_pool_mutex_own(buf_pool));
+
+	const buf_chunk_t*	chunk
+		= buf_pool->chunks + buf_pool->n_chunks_new;
+	const buf_chunk_t*	echunk
+		= buf_pool->chunks + buf_pool->n_chunks;
+
+	while (chunk < echunk) {
+		if (block >= chunk->blocks
+		    && block < chunk->blocks + chunk->size) {
+			return(true);
+		}
+		++chunk;
+	}
+
+	return(false);
+}
+
+/** Determines if a frame is intended to be withdrawn.
+@param[in]	buf_pool	buffer pool instance
+@param[in]	ptr		pointer to a frame
+@retval true	if will be withdrawn */
+bool
+buf_frame_will_withdrawn(
+	buf_pool_t*	buf_pool,
+	const byte*	ptr)
+{
+	ut_ad(buf_pool->curr_size < buf_pool->old_size);
+	ut_ad(!buf_pool_resizing || buf_pool_mutex_own(buf_pool));
+
+	const buf_chunk_t*	chunk
+		= buf_pool->chunks + buf_pool->n_chunks_new;
+	const buf_chunk_t*	echunk
+		= buf_pool->chunks + buf_pool->n_chunks;
+
+	while (chunk < echunk) {
+		if (ptr >= chunk->blocks->frame
+		    && ptr < (chunk->blocks + chunk->size - 1)->frame
+			     + UNIV_PAGE_SIZE) {
+			return(true);
+		}
+		++chunk;
+	}
+
+	return(false);
+}
+
+/** Withdraw the buffer pool blocks from end of the buffer pool instance
+until withdrawn by buf_pool->withdraw_target.
+@param[in]	buf_pool	buffer pool instance
+@retval true	if retry is needed */
+static
+bool
+buf_pool_withdraw_blocks(
+	buf_pool_t*	buf_pool)
+{
+	buf_block_t*	block;
+	ulint		loop_count = 0;
+	ulint		i = buf_pool_index(buf_pool);
+
+	ib::info() << "buffer pool " << i
+		<< " : start to withdraw the last "
+		<< buf_pool->withdraw_target << " blocks.";
+
+	/* Minimize buf_pool->zip_free[i] lists */
+	buf_pool_mutex_enter(buf_pool);
+	buf_buddy_condense_free(buf_pool);
+	buf_pool_mutex_exit(buf_pool);
+
+	while (UT_LIST_GET_LEN(buf_pool->withdraw)
+	       < buf_pool->withdraw_target) {
+
+		/* try to withdraw from free_list */
+		ulint	count1 = 0;
+
+		buf_pool_mutex_enter(buf_pool);
+		block = reinterpret_cast<buf_block_t*>(
+			UT_LIST_GET_FIRST(buf_pool->free));
+		while (block != NULL
+		       && UT_LIST_GET_LEN(buf_pool->withdraw)
+			  < buf_pool->withdraw_target) {
+			ut_ad(block->page.in_free_list);
+			ut_ad(!block->page.in_flush_list);
+			ut_ad(!block->page.in_LRU_list);
+			ut_a(!buf_page_in_file(&block->page));
+
+			buf_block_t*	next_block;
+			next_block = reinterpret_cast<buf_block_t*>(
+				UT_LIST_GET_NEXT(
+					list, &block->page));
+
+			if (buf_block_will_withdrawn(buf_pool, block)) {
+				/* This should be withdrawn */
+				UT_LIST_REMOVE(
+					buf_pool->free,
+					&block->page);
+				UT_LIST_ADD_LAST(
+					buf_pool->withdraw,
+					&block->page);
+				ut_d(block->in_withdraw_list = TRUE);
+				count1++;
+			}
+
+			block = next_block;
+		}
+		buf_pool_mutex_exit(buf_pool);
+
+		/* reserve free_list length */
+		if (UT_LIST_GET_LEN(buf_pool->withdraw)
+		    < buf_pool->withdraw_target) {
+			ulint	scan_depth;
+			flush_counters_t n;
+
+			/* cap scan_depth with current LRU size. */
+			buf_pool_mutex_enter(buf_pool);
+			scan_depth = UT_LIST_GET_LEN(buf_pool->LRU);
+			buf_pool_mutex_exit(buf_pool);
+
+			scan_depth = ut_min(
+				ut_max(buf_pool->withdraw_target
+				       - UT_LIST_GET_LEN(buf_pool->withdraw),
+				       static_cast<ulint>(srv_LRU_scan_depth)),
+				scan_depth);
+
+			buf_flush_do_batch(buf_pool, BUF_FLUSH_LRU,
+				scan_depth, 0, &n);
+			buf_flush_wait_batch_end(buf_pool, BUF_FLUSH_LRU);
+
+			if (n.flushed) {
+				MONITOR_INC_VALUE_CUMULATIVE(
+					MONITOR_LRU_BATCH_FLUSH_TOTAL_PAGE,
+					MONITOR_LRU_BATCH_FLUSH_COUNT,
+					MONITOR_LRU_BATCH_FLUSH_PAGES,
+					n.flushed);
+			}
+		}
+
+		/* relocate blocks/buddies in withdrawn area */
+		ulint	count2 = 0;
+
+		buf_pool_mutex_enter(buf_pool);
+		buf_page_t*	bpage;
+		bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
+		while (bpage != NULL) {
+			BPageMutex*	block_mutex;
+			buf_page_t*	next_bpage;
+
+			block_mutex = buf_page_get_mutex(bpage);
+			mutex_enter(block_mutex);
+
+			next_bpage = UT_LIST_GET_NEXT(LRU, bpage);
+
+			if (bpage->zip.data != NULL
+			    && buf_frame_will_withdrawn(
+				buf_pool,
+				static_cast<byte*>(bpage->zip.data))) {
+
+				if (buf_page_can_relocate(bpage)) {
+					mutex_exit(block_mutex);
+					buf_pool_mutex_exit_forbid(buf_pool);
+					if(!buf_buddy_realloc(
+						buf_pool, bpage->zip.data,
+						page_zip_get_size(
+							&bpage->zip))) {
+
+						/* failed to allocate block */
+						buf_pool_mutex_exit_allow(
+							buf_pool);
+						break;
+					}
+					buf_pool_mutex_exit_allow(buf_pool);
+					mutex_enter(block_mutex);
+					count2++;
+				}
+				/* NOTE: if the page is in use,
+				not reallocated yet */
+			}
+
+			if (buf_page_get_state(bpage)
+			    == BUF_BLOCK_FILE_PAGE
+			    && buf_block_will_withdrawn(
+				buf_pool,
+				reinterpret_cast<buf_block_t*>(bpage))) {
+
+				if (buf_page_can_relocate(bpage)) {
+					mutex_exit(block_mutex);
+					buf_pool_mutex_exit_forbid(buf_pool);
+					if(!buf_page_realloc(
+						buf_pool,
+						reinterpret_cast<buf_block_t*>(
+							bpage))) {
+						/* failed to allocate block */
+						buf_pool_mutex_exit_allow(
+							buf_pool);
+						break;
+					}
+					buf_pool_mutex_exit_allow(buf_pool);
+					count2++;
+				} else {
+					mutex_exit(block_mutex);
+				}
+				/* NOTE: if the page is in use,
+				not reallocated yet */
+			} else {
+				mutex_exit(block_mutex);
+			}
+
+			bpage = next_bpage;
+		}
+		buf_pool_mutex_exit(buf_pool);
+
+		buf_resize_status(
+			"buffer pool %lu : withdrawing blocks. (%lu/%lu)",
+			i, UT_LIST_GET_LEN(buf_pool->withdraw),
+			buf_pool->withdraw_target);
+
+		ib::info() << "buffer pool " << i << " : withdrew "
+			<< count1 << " blocks from free list."
+			<< " Tried to relocate " << count2 << " pages ("
+			<< UT_LIST_GET_LEN(buf_pool->withdraw) << "/"
+			<< buf_pool->withdraw_target << ").";
+
+		if (++loop_count >= 10) {
+			/* give up for now.
+			retried after user threads paused. */
+
+			ib::info() << "buffer pool " << i
+				<< " : will retry to withdraw later.";
+
+			/* need retry later */
+			return(true);
+		}
+	}
+
+	/* confirm withdrawn enough */
+	const buf_chunk_t*	chunk
+		= buf_pool->chunks + buf_pool->n_chunks_new;
+	const buf_chunk_t*	echunk
+		= buf_pool->chunks + buf_pool->n_chunks;
+
+	while (chunk < echunk) {
+		block = chunk->blocks;
+		for (ulint j = chunk->size; j--; block++) {
+			/* If !=BUF_BLOCK_NOT_USED block in the
+			withdrawn area, it means corruption
+			something */
+			ut_a(buf_block_get_state(block)
+				== BUF_BLOCK_NOT_USED);
+			ut_ad(block->in_withdraw_list);
+		}
+		++chunk;
+	}
+
+	ib::info() << "buffer pool " << i << " : withdrawn target "
+		<< UT_LIST_GET_LEN(buf_pool->withdraw) << " blocks.";
+
+	/* retry is not needed */
+	++buf_withdraw_clock;
+	os_wmb;
+
+	return(false);
+}
+
+/** resize page_hash and zip_hash for a buffer pool instance.
+@param[in]	buf_pool	buffer pool instance */
+static
+void
+buf_pool_resize_hash(
+	buf_pool_t*	buf_pool)
+{
+	hash_table_t*	new_hash_table;
+
+	ut_ad(buf_pool->page_hash_old == NULL);
+
+	/* recreate page_hash */
+	new_hash_table = ib_recreate(
+		buf_pool->page_hash, 2 * buf_pool->curr_size);
+
+	for (ulint i = 0; i < hash_get_n_cells(buf_pool->page_hash); i++) {
+		buf_page_t*	bpage;
+
+		bpage = static_cast<buf_page_t*>(
+			HASH_GET_FIRST(
+				buf_pool->page_hash, i));
+
+		while (bpage) {
+			buf_page_t*	prev_bpage = bpage;
+			ulint		fold;
+
+			bpage = static_cast<buf_page_t*>(
+				HASH_GET_NEXT(
+					hash, prev_bpage));
+
+			fold = prev_bpage->id.fold();
+
+			HASH_DELETE(buf_page_t, hash,
+				buf_pool->page_hash, fold,
+				prev_bpage);
+
+			HASH_INSERT(buf_page_t, hash,
+				new_hash_table, fold,
+				prev_bpage);
+		}
+	}
+
+	buf_pool->page_hash_old = buf_pool->page_hash;
+	buf_pool->page_hash = new_hash_table;
+
+	/* recreate zip_hash */
+	new_hash_table = hash_create(2 * buf_pool->curr_size);
+
+	for (ulint i = 0; i < hash_get_n_cells(buf_pool->zip_hash); i++) {
+		buf_page_t*	bpage;
+
+		bpage = static_cast<buf_page_t*>(
+			HASH_GET_FIRST(buf_pool->zip_hash, i));
+
+		while (bpage) {
+			buf_page_t*	prev_bpage = bpage;
+			ulint		fold;
+
+			bpage = static_cast<buf_page_t*>(
+				HASH_GET_NEXT(
+					hash, prev_bpage));
+
+			fold = BUF_POOL_ZIP_FOLD(
+				reinterpret_cast<buf_block_t*>(
+					prev_bpage));
+
+			HASH_DELETE(buf_page_t, hash,
+				buf_pool->zip_hash, fold,
+				prev_bpage);
+
+			HASH_INSERT(buf_page_t, hash,
+				new_hash_table, fold,
+				prev_bpage);
+		}
+	}
+
+	hash_table_free(buf_pool->zip_hash);
+	buf_pool->zip_hash = new_hash_table;
+}
+
+#ifndef DBUG_OFF
+/** This is a debug routine to inject an memory allocation failure error. */
+static
+void
+buf_pool_resize_chunk_make_null(buf_chunk_t** new_chunks)
+{
+	static int count = 0;
+
+	if (count == 1) {
+		ut_free(*new_chunks);
+		*new_chunks = NULL;
+	}
+
+	count++;
+}
+#endif // DBUG_OFF
+
+/** Resize the buffer pool based on srv_buf_pool_size from
+srv_buf_pool_old_size. */
+void
+buf_pool_resize()
+{
+	buf_pool_t*	buf_pool;
+	ulint		new_instance_size;
+	bool		warning = false;
+
+	NUMA_MEMPOLICY_INTERLEAVE_IN_SCOPE;
+
+	ut_ad(!buf_pool_resizing);
+	ut_ad(!buf_pool_withdrawing);
+	ut_ad(srv_buf_pool_chunk_unit > 0);
+
+	new_instance_size = srv_buf_pool_size / srv_buf_pool_instances;
+	new_instance_size /= UNIV_PAGE_SIZE;
+
+	buf_resize_status("Resizing buffer pool from " ULINTPF " to "
+			  ULINTPF " (unit=" ULINTPF ").",
+			  srv_buf_pool_old_size, srv_buf_pool_size,
+			  srv_buf_pool_chunk_unit);
+
+	/* set new limit for all buffer pool for resizing */
+	for (ulint i = 0; i < srv_buf_pool_instances; i++) {
+		buf_pool = buf_pool_from_array(i);
+		buf_pool_mutex_enter(buf_pool);
+
+		ut_ad(buf_pool->curr_size == buf_pool->old_size);
+		ut_ad(buf_pool->n_chunks_new == buf_pool->n_chunks);
+		ut_ad(UT_LIST_GET_LEN(buf_pool->withdraw) == 0);
+		ut_ad(buf_pool->flush_rbt == NULL);
+
+		buf_pool->curr_size = new_instance_size;
+
+		buf_pool->n_chunks_new = new_instance_size * UNIV_PAGE_SIZE
+			/ srv_buf_pool_chunk_unit;
+
+		buf_pool_mutex_exit(buf_pool);
+	}
+
+	/* disable AHI if needed */
+	bool	btr_search_disabled = false;
+
+	buf_resize_status("Disabling adaptive hash index.");
+
+	btr_search_s_lock_all();
+	if (btr_search_enabled) {
+		btr_search_s_unlock_all();
+		btr_search_disabled = true;
+	} else {
+		btr_search_s_unlock_all();
+	}
+
+	btr_search_disable(true);
+
+	if (btr_search_disabled) {
+		ib::info() << "disabled adaptive hash index.";
+	}
+
+	/* set withdraw target */
+	for (ulint i = 0; i < srv_buf_pool_instances; i++) {
+		buf_pool = buf_pool_from_array(i);
+		if (buf_pool->curr_size < buf_pool->old_size) {
+			ulint	withdraw_target = 0;
+
+			const buf_chunk_t*	chunk
+				= buf_pool->chunks + buf_pool->n_chunks_new;
+			const buf_chunk_t*	echunk
+				= buf_pool->chunks + buf_pool->n_chunks;
+
+			while (chunk < echunk) {
+				withdraw_target += chunk->size;
+				++chunk;
+			}
+
+			ut_ad(buf_pool->withdraw_target == 0);
+			buf_pool->withdraw_target = withdraw_target;
+			buf_pool_withdrawing = true;
+		}
+	}
+
+	buf_resize_status("Withdrawing blocks to be shrunken.");
+
+	ib_time_t	withdraw_started = ut_time();
+	ulint		message_interval = 60;
+	ulint		retry_interval = 1;
+
+withdraw_retry:
+	bool	should_retry_withdraw = false;
+
+	/* wait for the number of blocks fit to the new size (if needed)*/
+	for (ulint i = 0; i < srv_buf_pool_instances; i++) {
+		buf_pool = buf_pool_from_array(i);
+		if (buf_pool->curr_size < buf_pool->old_size) {
+
+			should_retry_withdraw |=
+				buf_pool_withdraw_blocks(buf_pool);
+		}
+	}
+
+	if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
+		/* abort to resize for shutdown. */
+		buf_pool_withdrawing = false;
+		return;
+	}
+
+	/* abort buffer pool load */
+	buf_load_abort();
+
+	if (should_retry_withdraw
+	    && ut_difftime(ut_time(), withdraw_started) >= message_interval) {
+
+		if (message_interval > 900) {
+			message_interval = 1800;
+		} else {
+			message_interval *= 2;
+		}
+
+		lock_mutex_enter();
+		trx_sys_mutex_enter();
+		bool	found = false;
+		for (trx_t* trx = UT_LIST_GET_FIRST(trx_sys->mysql_trx_list);
+		     trx != NULL;
+		     trx = UT_LIST_GET_NEXT(mysql_trx_list, trx)) {
+			if (trx->state != TRX_STATE_NOT_STARTED
+			    && trx->mysql_thd != NULL
+			    && ut_difftime(withdraw_started,
+					   trx->start_time) > 0) {
+				if (!found) {
+					ib::warn() <<
+						"The following trx might hold"
+						" the blocks in buffer pool to"
+					        " be withdrawn. Buffer pool"
+						" resizing can complete only"
+						" after all the transactions"
+						" below release the blocks.";
+					found = true;
+				}
+
+				lock_trx_print_wait_and_mvcc_state(
+					stderr, trx);
+			}
+		}
+		trx_sys_mutex_exit();
+		lock_mutex_exit();
+
+		withdraw_started = ut_time();
+	}
+
+	if (should_retry_withdraw) {
+		ib::info() << "Will retry to withdraw " << retry_interval
+			<< " seconds later.";
+		os_thread_sleep(retry_interval * 1000000);
+
+		if (retry_interval > 5) {
+			retry_interval = 10;
+		} else {
+			retry_interval *= 2;
+		}
+
+		goto withdraw_retry;
+	}
+
+	buf_pool_withdrawing = false;
+
+	buf_resize_status("Latching whole of buffer pool.");
+
+#ifndef DBUG_OFF
+	{
+		bool	should_wait = true;
+
+		while (should_wait) {
+			should_wait = false;
+			DBUG_EXECUTE_IF(
+				"ib_buf_pool_resize_wait_before_resize",
+				should_wait = true; os_thread_sleep(10000););
+		}
+	}
+#endif /* !DBUG_OFF */
+
+	if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
+		return;
+	}
+
+	/* Indicate critical path */
+	buf_pool_resizing = true;
+
+	/* Acquire all buf_pool_mutex/hash_lock */
+	for (ulint i = 0; i < srv_buf_pool_instances; ++i) {
+		buf_pool_t*	buf_pool = buf_pool_from_array(i);
+
+		buf_pool_mutex_enter(buf_pool);
+	}
+	for (ulint i = 0; i < srv_buf_pool_instances; ++i) {
+		buf_pool_t*	buf_pool = buf_pool_from_array(i);
+
+		hash_lock_x_all(buf_pool->page_hash);
+	}
+
+	buf_chunk_map_reg = UT_NEW_NOKEY(buf_pool_chunk_map_t());
+
+	/* add/delete chunks */
+	for (ulint i = 0; i < srv_buf_pool_instances; ++i) {
+		buf_pool_t*	buf_pool = buf_pool_from_array(i);
+		buf_chunk_t*	chunk;
+		buf_chunk_t*	echunk;
+
+		buf_resize_status("buffer pool %lu :"
+			" resizing with chunks %lu to %lu.",
+			i, buf_pool->n_chunks, buf_pool->n_chunks_new);
+
+		if (buf_pool->n_chunks_new < buf_pool->n_chunks) {
+			/* delete chunks */
+			chunk = buf_pool->chunks
+				+ buf_pool->n_chunks_new;
+			echunk = buf_pool->chunks + buf_pool->n_chunks;
+
+			ulint	sum_freed = 0;
+
+			while (chunk < echunk) {
+				buf_block_t*	block = chunk->blocks;
+
+				for (ulint j = chunk->size;
+				     j--; block++) {
+					mutex_free(&block->mutex);
+					rw_lock_free(&block->lock);
+
+					ut_d(rw_lock_free(
+						&block->debug_latch));
+				}
+
+				buf_pool->allocator.deallocate_large(
+					chunk->mem, &chunk->mem_pfx);
+
+				sum_freed += chunk->size;
+
+				++chunk;
+			}
+
+			/* discard withdraw list */
+			UT_LIST_INIT(buf_pool->withdraw,
+				     &buf_page_t::list);
+			buf_pool->withdraw_target = 0;
+
+			ib::info() << "buffer pool " << i << " : "
+				<< buf_pool->n_chunks - buf_pool->n_chunks_new
+				<< " chunks (" << sum_freed
+				<< " blocks) were freed.";
+
+			buf_pool->n_chunks = buf_pool->n_chunks_new;
+		}
+
+		{
+			/* reallocate buf_pool->chunks */
+			const ulint	new_chunks_size
+				= buf_pool->n_chunks_new * sizeof(*chunk);
+
+			buf_chunk_t*	new_chunks
+				= reinterpret_cast<buf_chunk_t*>(
+					ut_zalloc_nokey_nofatal(new_chunks_size));
+
+			DBUG_EXECUTE_IF("buf_pool_resize_chunk_null",
+				buf_pool_resize_chunk_make_null(&new_chunks););
+
+			if (new_chunks == NULL) {
+				ib::error() << "buffer pool " << i
+					<< " : failed to allocate"
+					" the chunk array.";
+				buf_pool->n_chunks_new
+					= buf_pool->n_chunks;
+				warning = true;
+				buf_pool->chunks_old = NULL;
+				goto calc_buf_pool_size;
+			}
+
+			ulint	n_chunks_copy = ut_min(buf_pool->n_chunks_new,
+						       buf_pool->n_chunks);
+
+			memcpy(new_chunks, buf_pool->chunks,
+			       n_chunks_copy * sizeof(*chunk));
+
+			for (ulint j = 0; j < n_chunks_copy; j++) {
+				buf_pool_register_chunk(&new_chunks[j]);
+			}
+
+			buf_pool->chunks_old = buf_pool->chunks;
+			buf_pool->chunks = new_chunks;
+		}
+
+
+		if (buf_pool->n_chunks_new > buf_pool->n_chunks) {
+			/* add chunks */
+			chunk = buf_pool->chunks + buf_pool->n_chunks;
+			echunk = buf_pool->chunks
+				+ buf_pool->n_chunks_new;
+
+			ulint	sum_added = 0;
+			ulint	n_chunks = buf_pool->n_chunks;
+
+			while (chunk < echunk) {
+				ulong	unit = srv_buf_pool_chunk_unit;
+
+				if (!buf_chunk_init(buf_pool, chunk, unit)) {
+
+					ib::error() << "buffer pool " << i
+						<< " : failed to allocate"
+						" new memory.";
+
+					warning = true;
+
+					buf_pool->n_chunks_new
+						= n_chunks;
+
+					break;
+				}
+
+				sum_added += chunk->size;
+
+				++n_chunks;
+				++chunk;
+			}
+
+			ib::info() << "buffer pool " << i << " : "
+				<< buf_pool->n_chunks_new - buf_pool->n_chunks
+				<< " chunks (" << sum_added
+				<< " blocks) were added.";
+
+			buf_pool->n_chunks = n_chunks;
+		}
+calc_buf_pool_size:
+
+		/* recalc buf_pool->curr_size */
+		ulint	new_size = 0;
+
+		chunk = buf_pool->chunks;
+		do {
+			new_size += chunk->size;
+		} while (++chunk < buf_pool->chunks
+				   + buf_pool->n_chunks);
+
+		buf_pool->curr_size = new_size;
+		buf_pool->n_chunks_new = buf_pool->n_chunks;
+
+		if (buf_pool->chunks_old) {
+			ut_free(buf_pool->chunks_old);
+			buf_pool->chunks_old = NULL;
+		}
+	}
+
+	buf_pool_chunk_map_t*	chunk_map_old = buf_chunk_map_ref;
+	buf_chunk_map_ref = buf_chunk_map_reg;
+
+	/* set instance sizes */
+	{
+		ulint	curr_size = 0;
+
+		for (ulint i = 0; i < srv_buf_pool_instances; i++) {
+			buf_pool = buf_pool_from_array(i);
+
+			ut_ad(UT_LIST_GET_LEN(buf_pool->withdraw) == 0);
+
+			buf_pool->read_ahead_area =
+				ut_min(BUF_READ_AHEAD_PAGES,
+				       ut_2_power_up(buf_pool->curr_size /
+						      BUF_READ_AHEAD_PORTION));
+			buf_pool->curr_pool_size
+				= buf_pool->curr_size * UNIV_PAGE_SIZE;
+			curr_size += buf_pool->curr_pool_size;
+			buf_pool->old_size = buf_pool->curr_size;
+		}
+		srv_buf_pool_curr_size = curr_size;
+		innodb_set_buf_pool_size(buf_pool_size_align(curr_size));
+	}
+
+	const bool	new_size_too_diff
+		= srv_buf_pool_base_size > srv_buf_pool_size * 2
+			|| srv_buf_pool_base_size * 2 < srv_buf_pool_size;
+
+	/* Normalize page_hash and zip_hash,
+	if the new size is too different */
+	if (!warning && new_size_too_diff) {
+
+		buf_resize_status("Resizing hash tables.");
+
+		for (ulint i = 0; i < srv_buf_pool_instances; ++i) {
+			buf_pool_t*	buf_pool = buf_pool_from_array(i);
+
+			buf_pool_resize_hash(buf_pool);
+
+			ib::info() << "buffer pool " << i
+				<< " : hash tables were resized.";
+		}
+	}
+
+	/* Release all buf_pool_mutex/page_hash */
+	for (ulint i = 0; i < srv_buf_pool_instances; ++i) {
+		buf_pool_t*	buf_pool = buf_pool_from_array(i);
+
+		hash_unlock_x_all(buf_pool->page_hash);
+		buf_pool_mutex_exit(buf_pool);
+
+		if (buf_pool->page_hash_old != NULL) {
+			hash_table_free(buf_pool->page_hash_old);
+			buf_pool->page_hash_old = NULL;
+		}
+	}
+
+	UT_DELETE(chunk_map_old);
+
+	buf_pool_resizing = false;
+
+	/* Normalize other components, if the new size is too different */
+	if (!warning && new_size_too_diff) {
+		srv_buf_pool_base_size = srv_buf_pool_size;
+
+		buf_resize_status("Resizing also other hash tables.");
+
+		/* normalize lock_sys */
+		srv_lock_table_size = 5 * (srv_buf_pool_size / UNIV_PAGE_SIZE);
+		lock_sys_resize(srv_lock_table_size);
+
+		/* normalize btr_search_sys */
+		btr_search_sys_resize(
+			buf_pool_get_curr_size() / sizeof(void*) / 64);
+
+		/* normalize dict_sys */
+		dict_resize();
+
+		ib::info() << "Resized hash tables at lock_sys,"
+			" adaptive hash index, dictionary.";
+	}
+
+	/* normalize ibuf->max_size */
+	ibuf_max_size_update(srv_change_buffer_max_size);
+
+	if (srv_buf_pool_old_size != srv_buf_pool_size) {
+
+		ib::info() << "Completed to resize buffer pool from "
+			<< srv_buf_pool_old_size
+			<< " to " << srv_buf_pool_size << ".";
+		srv_buf_pool_old_size = srv_buf_pool_size;
+	}
+
+	/* enable AHI if needed */
+	if (btr_search_disabled) {
+		btr_search_enable();
+		ib::info() << "Re-enabled adaptive hash index.";
+	}
+
+	char	now[32];
+
+	ut_sprintf_timestamp(now);
+	if (!warning) {
+		buf_resize_status("Completed resizing buffer pool at %s.",
+			now);
+	} else {
+		buf_resize_status("Resizing buffer pool failed,"
+			" finished resizing at %s.", now);
+	}
+
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+	ut_a(buf_validate());
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+
+	return;
+}
+
+/** This is the thread for resizing buffer pool. It waits for an event and
+when waked up either performs a resizing and sleeps again.
+@param[in]	arg	a dummy parameter required by os_thread_create.
+@return	this function does not return, calls os_thread_exit()
+*/
+extern "C"
+os_thread_ret_t
+DECLARE_THREAD(buf_resize_thread)(
+	void*	arg MY_ATTRIBUTE((unused)))
+{
+	my_thread_init();
+
+	srv_buf_resize_thread_active = true;
+
+	while (srv_shutdown_state == SRV_SHUTDOWN_NONE) {
+		os_event_wait(srv_buf_resize_event);
+		os_event_reset(srv_buf_resize_event);
+
+		if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
+			break;
+		}
+
+		buf_pool_mutex_enter_all();
+		if (srv_buf_pool_old_size == srv_buf_pool_size) {
+			buf_pool_mutex_exit_all();
+			std::ostringstream sout;
+			sout << "Size did not change (old size = new size = "
+				<< srv_buf_pool_size << ". Nothing to do.";
+			buf_resize_status(sout.str().c_str());
+
+			/* nothing to do */
+			continue;
+		}
+		buf_pool_mutex_exit_all();
+
+		buf_pool_resize();
+	}
+
+	srv_buf_resize_thread_active = false;
+
+	my_thread_end();
+	os_thread_exit();
+
+	OS_THREAD_DUMMY_RETURN;
+}
+
 /********************************************************************//**
 Clears the adaptive hash index on all pages in the buffer pool. */
-UNIV_INTERN
 void
 buf_pool_clear_hash_index(void)
 /*===========================*/
 {
 	ulint	p;
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(btr_search_own_all(RW_LOCK_X));
+	ut_ad(!buf_pool_resizing);
 	ut_ad(!btr_search_enabled);
 
 	for (p = 0; p < srv_buf_pool_instances; p++) {
@@ -1650,7 +3147,7 @@ buf_pool_clear_hash_index(void)
 				dict_index_t*	index	= block->index;
 
 				/* We can set block->index = NULL
-				when we have an x-latch on btr_search_latch;
+				when we have an x-latch on search latch;
 				see the comment in buf0buf.h */
 
 				if (!index) {
@@ -1671,7 +3168,7 @@ buf_pool_clear_hash_index(void)
 Relocate a buffer control block.  Relocates the block on the LRU list
 and in buf_pool->page_hash.  Does not relocate bpage->list.
 The caller must take care of relocating bpage->list. */
-UNIV_INTERN
+static
 void
 buf_relocate(
 /*=========*/
@@ -1681,11 +3178,8 @@ buf_relocate(
 	buf_page_t*	dpage)	/*!< in/out: destination control block */
 {
 	buf_page_t*	b;
-	ulint		fold;
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
 
-	fold = buf_page_address_fold(bpage->space, bpage->offset);
-
 	ut_ad(buf_pool_mutex_own(buf_pool));
 	ut_ad(buf_page_hash_lock_held_x(buf_pool, bpage));
 	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
@@ -1694,10 +3188,7 @@ buf_relocate(
 	ut_ad(bpage->in_LRU_list);
 	ut_ad(!bpage->in_zip_hash);
 	ut_ad(bpage->in_page_hash);
-	ut_ad(bpage == buf_page_hash_get_low(buf_pool,
-					     bpage->space,
-					     bpage->offset,
-					     fold));
+	ut_ad(bpage == buf_page_hash_get_low(buf_pool, bpage->id));
 
 	ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
 #ifdef UNIV_DEBUG
@@ -1726,12 +3217,12 @@ buf_relocate(
 
 	/* relocate buf_pool->LRU */
 	b = UT_LIST_GET_PREV(LRU, bpage);
-	UT_LIST_REMOVE(LRU, buf_pool->LRU, bpage);
+	UT_LIST_REMOVE(buf_pool->LRU, bpage);
 
-	if (b) {
-		UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU, b, dpage);
+	if (b != NULL) {
+		UT_LIST_INSERT_AFTER(buf_pool->LRU, b, dpage);
 	} else {
-		UT_LIST_ADD_FIRST(LRU, buf_pool->LRU, dpage);
+		UT_LIST_ADD_FIRST(buf_pool->LRU, dpage);
 	}
 
 	if (UNIV_UNLIKELY(buf_pool->LRU_old == bpage)) {
@@ -1751,10 +3242,11 @@ buf_relocate(
 #endif /* UNIV_LRU_DEBUG */
 	}
 
-        ut_d(UT_LIST_VALIDATE(
-		LRU, buf_page_t, buf_pool->LRU, CheckInLRUList()));
+        ut_d(CheckInLRUList::validate(buf_pool));
 
 	/* relocate buf_pool->page_hash */
+	ulint	fold = bpage->id.fold();
+	ut_ad(fold == dpage->id.fold());
 	HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, bpage);
 	HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold, dpage);
 }
@@ -1837,15 +3329,14 @@ LRUItr::start()
 	return(m_hp);
 }
 
-/********************************************************************//**
-Determine if a block is a sentinel for a buffer pool watch.
-@return	TRUE if a sentinel for a buffer pool watch, FALSE if not */
-UNIV_INTERN
+/** Determine if a block is a sentinel for a buffer pool watch.
+@param[in]	buf_pool	buffer pool instance
+@param[in]	bpage		block
+@return TRUE if a sentinel for a buffer pool watch, FALSE if not */
 ibool
 buf_pool_watch_is_sentinel(
-/*=======================*/
-	buf_pool_t*		buf_pool,	/*!< buffer pool instance */
-	const buf_page_t*	bpage)		/*!< in: block */
+	const buf_pool_t*	buf_pool,
+	const buf_page_t*	bpage)
 {
 	/* We must also own the appropriate hash lock. */
 	ut_ad(buf_page_hash_lock_held_s_or_x(buf_pool, bpage));
@@ -1864,35 +3355,29 @@ buf_pool_watch_is_sentinel(
 	ut_ad(!bpage->in_zip_hash);
 	ut_ad(bpage->in_page_hash);
 	ut_ad(bpage->zip.data == NULL);
-	ut_ad(bpage->buf_fix_count > 0);
 	return(TRUE);
 }
 
-/****************************************************************//**
-Add watch for the given page to be read in. Caller must have
+/** Add watch for the given page to be read in. Caller must have
 appropriate hash_lock for the bpage. This function may release the
 hash_lock and reacquire it.
+@param[in]	page_id		page id
+@param[in,out]	hash_lock	hash_lock currently latched
 @return NULL if watch set, block if the page is in the buffer pool */
-UNIV_INTERN
 buf_page_t*
 buf_pool_watch_set(
-/*===============*/
-	ulint	space,	/*!< in: space id */
-	ulint	offset,	/*!< in: page number */
-	ulint	fold)	/*!< in: buf_page_address_fold(space, offset) */
+	const page_id_t&	page_id,
+	rw_lock_t**		hash_lock)
 {
 	buf_page_t*	bpage;
 	ulint		i;
-	buf_pool_t*	buf_pool = buf_pool_get(space, offset);
-	rw_lock_t*	hash_lock;
+	buf_pool_t*	buf_pool = buf_pool_get(page_id);
 
-	hash_lock = buf_page_hash_lock_get(buf_pool, fold);
+	ut_ad(*hash_lock == buf_page_hash_lock_get(buf_pool, page_id));
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(hash_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(rw_lock_own(*hash_lock, RW_LOCK_X));
 
-	bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
+	bpage = buf_page_hash_get_low(buf_pool, page_id);
 
 	if (bpage != NULL) {
 page_found:
@@ -1902,11 +3387,7 @@ page_found:
 		}
 
 		/* Add to an existing watch. */
-#ifdef PAGE_ATOMIC_REF_COUNT
-		os_atomic_increment_uint32(&bpage->buf_fix_count, 1);
-#else
-		++bpage->buf_fix_count;
-#endif /* PAGE_ATOMIC_REF_COUNT */
+		buf_block_fix(bpage);
 		return(NULL);
 	}
 
@@ -1920,21 +3401,24 @@ page_found:
 
 
 	/* To obey latching order first release the hash_lock. */
-	rw_lock_x_unlock(hash_lock);
+	rw_lock_x_unlock(*hash_lock);
 
 	buf_pool_mutex_enter(buf_pool);
 	hash_lock_x_all(buf_pool->page_hash);
 
+	/* If not own buf_pool_mutex, page_hash can be changed. */
+	*hash_lock = buf_page_hash_lock_get(buf_pool, page_id);
+
 	/* We have to recheck that the page
 	was not loaded or a watch set by some other
 	purge thread. This is because of the small
 	time window between when we release the
 	hash_lock to acquire buf_pool mutex above. */
 
-	bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
+	bpage = buf_page_hash_get_low(buf_pool, page_id);
 	if (UNIV_LIKELY_NULL(bpage)) {
 		buf_pool_mutex_exit(buf_pool);
-		hash_unlock_x_all_but(buf_pool->page_hash, hash_lock);
+		hash_unlock_x_all_but(buf_pool->page_hash, *hash_lock);
 		goto page_found;
 	}
 
@@ -1961,20 +3445,19 @@ page_found:
 			buf_block_t::mutex or buf_pool->zip_mutex or both. */
 
 			bpage->state = BUF_BLOCK_ZIP_PAGE;
-			bpage->space = static_cast<ib_uint32_t>(space);
-			bpage->offset = static_cast<ib_uint32_t>(offset);
+			bpage->id.copy_from(page_id);
 			bpage->buf_fix_count = 1;
 
 			ut_d(bpage->in_page_hash = TRUE);
 			HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
-				    fold, bpage);
+				    page_id.fold(), bpage);
 
 			buf_pool_mutex_exit(buf_pool);
 			/* Once the sentinel is in the page_hash we can
 			safely release all locks except just the
 			relevant hash_lock */
 			hash_unlock_x_all_but(buf_pool->page_hash,
-						hash_lock);
+						*hash_lock);
 
 			return(NULL);
 		case BUF_BLOCK_ZIP_PAGE:
@@ -1996,48 +3479,42 @@ page_found:
 	return(NULL);
 }
 
-/****************************************************************//**
-Remove the sentinel block for the watch before replacing it with a real block.
-buf_page_watch_clear() or buf_page_watch_occurred() will notice that
-the block has been replaced with the real block.
+/** Remove the sentinel block for the watch before replacing it with a
+real block. buf_page_watch_clear() or buf_page_watch_occurred() will notice
+that the block has been replaced with the real block.
+@param[in,out]	buf_pool	buffer pool instance
+@param[in,out]	watch		sentinel for watch
 @return reference count, to be added to the replacement block */
 static
 void
 buf_pool_watch_remove(
-/*==================*/
-	buf_pool_t*	buf_pool,	/*!< buffer pool instance */
-	ulint		fold,		/*!< in: buf_page_address_fold(
-					space, offset) */
-	buf_page_t*	watch)		/*!< in/out: sentinel for watch */
+	buf_pool_t*	buf_pool,
+	buf_page_t*	watch)
 {
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
 	/* We must also own the appropriate hash_bucket mutex. */
-	rw_lock_t* hash_lock = buf_page_hash_lock_get(buf_pool, fold);
-	ut_ad(rw_lock_own(hash_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	rw_lock_t* hash_lock = buf_page_hash_lock_get(buf_pool, watch->id);
+	ut_ad(rw_lock_own(hash_lock, RW_LOCK_X));
+#endif /* UNIV_DEBUG */
 
 	ut_ad(buf_pool_mutex_own(buf_pool));
 
-	HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, watch);
+	HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, watch->id.fold(),
+		    watch);
 	ut_d(watch->in_page_hash = FALSE);
 	watch->buf_fix_count = 0;
 	watch->state = BUF_BLOCK_POOL_WATCH;
 }
 
-/****************************************************************//**
-Stop watching if the page has been read in.
-buf_pool_watch_set(space,offset) must have returned NULL before. */
-UNIV_INTERN
+/** Stop watching if the page has been read in.
+buf_pool_watch_set(same_page_id) must have returned NULL before.
+@param[in]	page_id	page id */
 void
 buf_pool_watch_unset(
-/*=================*/
-	ulint	space,	/*!< in: space id */
-	ulint	offset)	/*!< in: page number */
+	const page_id_t&	page_id)
 {
 	buf_page_t*	bpage;
-	buf_pool_t*	buf_pool = buf_pool_get(space, offset);
-	ulint		fold = buf_page_address_fold(space, offset);
-	rw_lock_t*	hash_lock = buf_page_hash_lock_get(buf_pool, fold);
+	buf_pool_t*	buf_pool = buf_pool_get(page_id);
 
 	/* We only need to have buf_pool mutex in case where we end
 	up calling buf_pool_watch_remove but to obey latching order
@@ -2046,58 +3523,44 @@ buf_pool_watch_unset(
 	called from the purge thread. */
 	buf_pool_mutex_enter(buf_pool);
 
+	rw_lock_t*	hash_lock = buf_page_hash_lock_get(buf_pool, page_id);
 	rw_lock_x_lock(hash_lock);
 
-	/* The page must exist because buf_pool_watch_set() increments
-	buf_fix_count. */
+	/* The page must exist because buf_pool_watch_set()
+	increments buf_fix_count. */
+	bpage = buf_page_hash_get_low(buf_pool, page_id);
 
-	bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
-
-	if (!buf_pool_watch_is_sentinel(buf_pool, bpage)) {
-		buf_block_unfix(reinterpret_cast<buf_block_t*>(bpage));
-	} else {
-
-		ut_ad(bpage->buf_fix_count > 0);
-
-#ifdef PAGE_ATOMIC_REF_COUNT
-		os_atomic_decrement_uint32(&bpage->buf_fix_count, 1);
-#else
-		--bpage->buf_fix_count;
-#endif /* PAGE_ATOMIC_REF_COUNT */
-
-		if (bpage->buf_fix_count == 0) {
-			buf_pool_watch_remove(buf_pool, fold, bpage);
-		}
+	if (buf_block_unfix(bpage) == 0
+	    && buf_pool_watch_is_sentinel(buf_pool, bpage)) {
+		buf_pool_watch_remove(buf_pool, bpage);
 	}
 
 	buf_pool_mutex_exit(buf_pool);
 	rw_lock_x_unlock(hash_lock);
 }
 
-/****************************************************************//**
-Check if the page has been read in.
-This may only be called after buf_pool_watch_set(space,offset)
-has returned NULL and before invoking buf_pool_watch_unset(space,offset).
-@return	FALSE if the given page was not read in, TRUE if it was */
-UNIV_INTERN
+/** Check if the page has been read in.
+This may only be called after buf_pool_watch_set(same_page_id)
+has returned NULL and before invoking buf_pool_watch_unset(same_page_id).
+@param[in]	page_id	page id
+@return FALSE if the given page was not read in, TRUE if it was */
 ibool
 buf_pool_watch_occurred(
-/*====================*/
-	ulint	space,	/*!< in: space id */
-	ulint	offset)	/*!< in: page number */
+	const page_id_t&	page_id)
 {
 	ibool		ret;
 	buf_page_t*	bpage;
-	buf_pool_t*	buf_pool = buf_pool_get(space, offset);
-	ulint		fold	= buf_page_address_fold(space, offset);
-	rw_lock_t*	hash_lock = buf_page_hash_lock_get(buf_pool,
-							     fold);
+	buf_pool_t*	buf_pool = buf_pool_get(page_id);
+	rw_lock_t*	hash_lock = buf_page_hash_lock_get(buf_pool, page_id);
 
 	rw_lock_s_lock(hash_lock);
 
+	/* If not own buf_pool_mutex, page_hash can be changed. */
+	hash_lock = buf_page_hash_lock_s_confirm(hash_lock, buf_pool, page_id);
+
 	/* The page must exist because buf_pool_watch_set()
 	increments buf_fix_count. */
-	bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
+	bpage = buf_page_hash_get_low(buf_pool, page_id);
 
 	ret = !buf_pool_watch_is_sentinel(buf_pool, bpage);
 	rw_lock_s_unlock(hash_lock);
@@ -2109,7 +3572,6 @@ buf_pool_watch_occurred(
 Moves a page to the start of the buffer pool LRU list. This high-level
 function can be used to prevent an important page from slipping out of
 the buffer pool. */
-UNIV_INTERN
 void
 buf_page_make_young(
 /*================*/
@@ -2148,54 +3610,26 @@ buf_page_make_young_if_needed(
 	}
 }
 
-/********************************************************************//**
-Resets the check_index_page_at_flush field of a page if found in the buffer
-pool. */
-UNIV_INTERN
-void
-buf_reset_check_index_page_at_flush(
-/*================================*/
-	ulint	space,	/*!< in: space id */
-	ulint	offset)	/*!< in: page number */
-{
-	buf_block_t*	block;
-	buf_pool_t*	buf_pool = buf_pool_get(space, offset);
+#ifdef UNIV_DEBUG
 
-	buf_pool_mutex_enter(buf_pool);
-
-	block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
-
-	if (block && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE) {
-		ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
-		block->check_index_page_at_flush = FALSE;
-	}
-
-	buf_pool_mutex_exit(buf_pool);
-}
-
-#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
-/********************************************************************//**
-Sets file_page_was_freed TRUE if the page is found in the buffer pool.
+/** Sets file_page_was_freed TRUE if the page is found in the buffer pool.
 This function should be called when we free a file page and want the
 debug version to check that it is not accessed any more unless
 reallocated.
-@return	control block if found in page hash table, otherwise NULL */
-UNIV_INTERN
+@param[in]	page_id	page id
+@return control block if found in page hash table, otherwise NULL */
 buf_page_t*
 buf_page_set_file_page_was_freed(
-/*=============================*/
-	ulint	space,	/*!< in: space id */
-	ulint	offset)	/*!< in: page number */
+	const page_id_t&	page_id)
 {
 	buf_page_t*	bpage;
-	buf_pool_t*	buf_pool = buf_pool_get(space, offset);
+	buf_pool_t*	buf_pool = buf_pool_get(page_id);
 	rw_lock_t*	hash_lock;
 
-	bpage = buf_page_hash_get_s_locked(buf_pool, space, offset,
-					   &hash_lock);
+	bpage = buf_page_hash_get_s_locked(buf_pool, page_id, &hash_lock);
 
 	if (bpage) {
-		ib_mutex_t*	block_mutex = buf_page_get_mutex(bpage);
+		BPageMutex*	block_mutex = buf_page_get_mutex(bpage);
 		ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
 		mutex_enter(block_mutex);
 		rw_lock_s_unlock(hash_lock);
@@ -2208,27 +3642,23 @@ buf_page_set_file_page_was_freed(
 	return(bpage);
 }
 
-/********************************************************************//**
-Sets file_page_was_freed FALSE if the page is found in the buffer pool.
+/** Sets file_page_was_freed FALSE if the page is found in the buffer pool.
 This function should be called when we free a file page and want the
 debug version to check that it is not accessed any more unless
 reallocated.
-@return	control block if found in page hash table, otherwise NULL */
-UNIV_INTERN
+@param[in]	page_id	page id
+@return control block if found in page hash table, otherwise NULL */
 buf_page_t*
 buf_page_reset_file_page_was_freed(
-/*===============================*/
-	ulint	space,	/*!< in: space id */
-	ulint	offset)	/*!< in: page number */
+	const page_id_t&	page_id)
 {
 	buf_page_t*	bpage;
-	buf_pool_t*	buf_pool = buf_pool_get(space, offset);
+	buf_pool_t*	buf_pool = buf_pool_get(page_id);
 	rw_lock_t*	hash_lock;
 
-	bpage = buf_page_hash_get_s_locked(buf_pool, space, offset,
-					   &hash_lock);
+	bpage = buf_page_hash_get_s_locked(buf_pool, page_id, &hash_lock);
 	if (bpage) {
-		ib_mutex_t*	block_mutex = buf_page_get_mutex(bpage);
+		BPageMutex*	block_mutex = buf_page_get_mutex(bpage);
 		ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
 		mutex_enter(block_mutex);
 		rw_lock_s_unlock(hash_lock);
@@ -2238,21 +3668,19 @@ buf_page_reset_file_page_was_freed(
 
 	return(bpage);
 }
-#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
+#endif /* UNIV_DEBUG */
 
-/********************************************************************//**
-Attempts to discard the uncompressed frame of a compressed page. The
-caller should not be holding any mutexes when this function is called.
-@return	TRUE if successful, FALSE otherwise. */
+/** Attempts to discard the uncompressed frame of a compressed page.
+The caller should not be holding any mutexes when this function is called.
+@param[in]	page_id	page id
+@return TRUE if successful, FALSE otherwise. */
 static
 void
 buf_block_try_discard_uncompressed(
-/*===============================*/
-	ulint		space,	/*!< in: space id */
-	ulint		offset)	/*!< in: page number */
+	const page_id_t&	page_id)
 {
 	buf_page_t*	bpage;
-	buf_pool_t*	buf_pool = buf_pool_get(space, offset);
+	buf_pool_t*	buf_pool = buf_pool_get(page_id);
 
 	/* Since we need to acquire buf_pool mutex to discard
 	the uncompressed frame and because page_hash mutex resides
@@ -2262,7 +3690,7 @@ buf_block_try_discard_uncompressed(
 	we need to check again if the block is still in page_hash. */
 	buf_pool_mutex_enter(buf_pool);
 
-	bpage = buf_page_hash_get(buf_pool, space, offset);
+	bpage = buf_page_hash_get(buf_pool, page_id);
 
 	if (bpage) {
 		buf_LRU_free_page(bpage, false);
@@ -2271,29 +3699,28 @@ buf_block_try_discard_uncompressed(
 	buf_pool_mutex_exit(buf_pool);
 }
 
-/********************************************************************//**
-Get read access to a compressed page (usually of type
+/** Get read access to a compressed page (usually of type
 FIL_PAGE_TYPE_ZBLOB or FIL_PAGE_TYPE_ZBLOB2).
 The page must be released with buf_page_release_zip().
 NOTE: the page is not protected by any latch.  Mutual exclusion has to
 be implemented at a higher level.  In other words, all possible
 accesses to a given page through this function must be protected by
 the same set of mutexes or latches.
-@return	pointer to the block */
-UNIV_INTERN
+@param[in]	page_id		page id
+@param[in]	page_size	page size
+@return pointer to the block */
 buf_page_t*
 buf_page_get_zip(
-/*=============*/
-	ulint		space,	/*!< in: space id */
-	ulint		zip_size,/*!< in: compressed page size */
-	ulint		offset)	/*!< in: page number */
+	const page_id_t&	page_id,
+	const page_size_t&	page_size)
 {
 	buf_page_t*	bpage;
-	ib_mutex_t*	block_mutex;
+	BPageMutex*	block_mutex;
 	rw_lock_t*	hash_lock;
 	ibool		discard_attempted = FALSE;
 	ibool		must_read;
-	buf_pool_t*	buf_pool = buf_pool_get(space, offset);
+	buf_pool_t*	buf_pool = buf_pool_get(page_id);
+	buf_page_t*	rpage = NULL;
 
 	buf_pool->stat.n_page_gets++;
 
@@ -2302,8 +3729,8 @@ lookup:
 
 		/* The following call will also grab the page_hash
 		mutex if the page is found. */
-		bpage = buf_page_hash_get_s_locked(buf_pool, space,
-						offset, &hash_lock);
+		bpage = buf_page_hash_get_s_locked(buf_pool, page_id,
+						   &hash_lock);
 		if (bpage) {
 			ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
 			break;
@@ -2312,7 +3739,7 @@ lookup:
 		/* Page not in buf_pool: needs to be read from file */
 
 		ut_ad(!hash_lock);
-		buf_read_page(space, zip_size, offset, NULL);
+		buf_read_page(page_id, page_size, &rpage);
 
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 		ut_a(++buf_dbg_counter % 5771 || buf_validate());
@@ -2340,28 +3767,26 @@ err_exit:
 
 	case BUF_BLOCK_ZIP_PAGE:
 	case BUF_BLOCK_ZIP_DIRTY:
+		buf_block_fix(bpage);
 		block_mutex = &buf_pool->zip_mutex;
 		mutex_enter(block_mutex);
-#ifdef PAGE_ATOMIC_REF_COUNT
-		os_atomic_increment_uint32(&bpage->buf_fix_count, 1);
-#else
-		++bpage->buf_fix_count;
-#endif /* PAGE_ATOMIC_REF_COUNT */
 		goto got_block;
 	case BUF_BLOCK_FILE_PAGE:
 		/* Discard the uncompressed page frame if possible. */
 		if (!discard_attempted) {
 			rw_lock_s_unlock(hash_lock);
-			buf_block_try_discard_uncompressed(space, offset);
+			buf_block_try_discard_uncompressed(page_id);
 			discard_attempted = TRUE;
 			goto lookup;
 		}
 
+		buf_block_buf_fix_inc((buf_block_t*) bpage,
+				      __FILE__, __LINE__);
+
 		block_mutex = &((buf_block_t*) bpage)->mutex;
 
 		mutex_enter(block_mutex);
 
-		buf_block_buf_fix_inc((buf_block_t*) bpage, __FILE__, __LINE__);
 		goto got_block;
 	}
 
@@ -2372,9 +3797,8 @@ got_block:
 	must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
 
 	rw_lock_s_unlock(hash_lock);
-#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
-	ut_a(!bpage->file_page_was_freed);
-#endif /* defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG */
+
+	ut_ad(!bpage->file_page_was_freed);
 
 	buf_page_set_accessed(bpage);
 
@@ -2409,9 +3833,9 @@ got_block:
 	}
 
 #ifdef UNIV_IBUF_COUNT_DEBUG
-	ut_a(ibuf_count_get(buf_page_get_space(bpage),
-			    buf_page_get_page_no(bpage)) == 0);
-#endif
+	ut_a(ibuf_count_get(page_id) == 0);
+#endif /* UNIV_IBUF_COUNT_DEBUG */
+
 	return(bpage);
 }
 
@@ -2423,8 +3847,9 @@ buf_block_init_low(
 /*===============*/
 	buf_block_t*	block)	/*!< in: block to init */
 {
-	block->check_index_page_at_flush = FALSE;
 	block->index		= NULL;
+	block->made_dirty_with_no_latch = false;
+	block->skip_flush_check = false;
 
 	block->n_hash_helps	= 0;
 	block->n_fields		= 1;
@@ -2435,8 +3860,7 @@ buf_block_init_low(
 
 /********************************************************************//**
 Decompress a block.
-@return	TRUE if successful */
-UNIV_INTERN
+@return TRUE if successful */
 ibool
 buf_zip_decompress(
 /*===============*/
@@ -2446,38 +3870,43 @@ buf_zip_decompress(
 	const byte*	frame = block->page.zip.data;
 	ulint		size = page_zip_get_size(&block->page.zip);
 
-	ut_ad(buf_block_get_zip_size(block));
-	ut_a(buf_block_get_space(block) != 0);
+	ut_ad(block->page.size.is_compressed());
+	ut_a(block->page.id.space() != 0);
 
 	if (UNIV_UNLIKELY(check && !page_zip_verify_checksum(frame, size))) {
 
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			"  InnoDB: compressed page checksum mismatch"
-			" (space %u page %u): stored: %lu, crc32: %lu "
-			"innodb: %lu, none: %lu\n",
-			block->page.space, block->page.offset,
-			mach_read_from_4(frame + FIL_PAGE_SPACE_OR_CHKSUM),
-			page_zip_calc_checksum(frame, size,
-					       SRV_CHECKSUM_ALGORITHM_CRC32),
-			page_zip_calc_checksum(frame, size,
-					       SRV_CHECKSUM_ALGORITHM_INNODB),
-			page_zip_calc_checksum(frame, size,
-					       SRV_CHECKSUM_ALGORITHM_NONE));
+		ib::error() << "Compressed page checksum mismatch "
+			<< block->page.id << "): stored: "
+			<< mach_read_from_4(frame + FIL_PAGE_SPACE_OR_CHKSUM)
+			<< ", crc32: "
+			<< page_zip_calc_checksum(
+				frame, size, SRV_CHECKSUM_ALGORITHM_CRC32)
+			<< "/"
+			<< page_zip_calc_checksum(
+				frame, size, SRV_CHECKSUM_ALGORITHM_CRC32,
+				true)
+			<< " innodb: "
+			<< page_zip_calc_checksum(
+				frame, size, SRV_CHECKSUM_ALGORITHM_INNODB)
+			<< ", none: "
+			<< page_zip_calc_checksum(
+				frame, size, SRV_CHECKSUM_ALGORITHM_NONE);
+
 		return(FALSE);
 	}
 
 	switch (fil_page_get_type(frame)) {
 	case FIL_PAGE_INDEX:
+	case FIL_PAGE_RTREE:
 		if (page_zip_decompress(&block->page.zip,
 					block->frame, TRUE)) {
 			return(TRUE);
 		}
 
-		fprintf(stderr,
-			"InnoDB: unable to decompress space %u page %u\n",
-			block->page.space,
-			block->page.offset);
+		ib::error() << "Unable to decompress space "
+			<< block->page.id.space()
+			<< " page " << block->page.id.page_no();
+
 		return(FALSE);
 
 	case FIL_PAGE_TYPE_ALLOCATED:
@@ -2488,155 +3917,62 @@ buf_zip_decompress(
 	case FIL_PAGE_TYPE_ZBLOB:
 	case FIL_PAGE_TYPE_ZBLOB2:
 		/* Copy to uncompressed storage. */
-		memcpy(block->frame, frame,
-		       buf_block_get_zip_size(block));
+		memcpy(block->frame, frame, block->page.size.physical());
 		return(TRUE);
 	}
 
-	ut_print_timestamp(stderr);
-	fprintf(stderr,
-		"  InnoDB: unknown compressed page"
-		" type %lu\n",
-		fil_page_get_type(frame));
+	ib::error() << "Unknown compressed page type "
+		<< fil_page_get_type(frame);
+
 	return(FALSE);
 }
 
 #ifndef UNIV_HOTBACKUP
-/*******************************************************************//**
-Gets the block to whose frame the pointer is pointing to if found
-in this buffer pool instance.
-@return	pointer to block */
-UNIV_INTERN
+/** Get a buffer block from an adaptive hash index pointer.
+This function does not return if the block is not identified.
+@param[in]	ptr	pointer to within a page frame
+@return pointer to block, never NULL */
 buf_block_t*
-buf_block_align_instance(
-/*=====================*/
- 	buf_pool_t*	buf_pool,	/*!< in: buffer in which the block
-					resides */
-	const byte*	ptr)		/*!< in: pointer to a frame */
+buf_block_from_ahi(const byte* ptr)
 {
-	buf_chunk_t*	chunk;
-	ulint		i;
+	buf_pool_chunk_map_t::iterator it;
 
-	/* TODO: protect buf_pool->chunks with a mutex (it will
-	currently remain constant after buf_pool_init()) */
-	for (chunk = buf_pool->chunks, i = buf_pool->n_chunks; i--; chunk++) {
-		ulint	offs;
+	buf_pool_chunk_map_t*	chunk_map = buf_chunk_map_ref;
+	ut_ad(buf_chunk_map_ref == buf_chunk_map_reg);
+	ut_ad(!buf_pool_resizing);
 
-		if (UNIV_UNLIKELY(ptr < chunk->blocks->frame)) {
+	const byte* bound = reinterpret_cast<uintptr_t>(ptr)
+			    > srv_buf_pool_chunk_unit
+			    ? ptr - srv_buf_pool_chunk_unit : 0;
+	it = chunk_map->upper_bound(bound);
 
-			continue;
-		}
-		/* else */
+	ut_a(it != chunk_map->end());
 
-		offs = ptr - chunk->blocks->frame;
+	buf_chunk_t*	chunk = it->second;
+	ulint		offs = ptr - chunk->blocks->frame;
 
-		offs >>= UNIV_PAGE_SIZE_SHIFT;
+	offs >>= UNIV_PAGE_SIZE_SHIFT;
 
-		if (UNIV_LIKELY(offs < chunk->size)) {
-			buf_block_t*	block = &chunk->blocks[offs];
+	ut_a(offs < chunk->size);
 
-			/* The function buf_chunk_init() invokes
-			buf_block_init() so that block[n].frame ==
-			block->frame + n * UNIV_PAGE_SIZE.  Check it. */
-			ut_ad(block->frame == page_align(ptr));
-#ifdef UNIV_DEBUG
-			/* A thread that updates these fields must
-			hold buf_pool->mutex and block->mutex.  Acquire
-			only the latter. */
-			mutex_enter(&block->mutex);
+	buf_block_t*	block = &chunk->blocks[offs];
 
-			switch (buf_block_get_state(block)) {
-			case BUF_BLOCK_POOL_WATCH:
-			case BUF_BLOCK_ZIP_PAGE:
-			case BUF_BLOCK_ZIP_DIRTY:
-				/* These types should only be used in
-				the compressed buffer pool, whose
-				memory is allocated from
-				buf_pool->chunks, in UNIV_PAGE_SIZE
-				blocks flagged as BUF_BLOCK_MEMORY. */
-				ut_error;
-				break;
-			case BUF_BLOCK_NOT_USED:
-			case BUF_BLOCK_READY_FOR_USE:
-			case BUF_BLOCK_MEMORY:
-				/* Some data structures contain
-				"guess" pointers to file pages.  The
-				file pages may have been freed and
-				reused.  Do not complain. */
-				break;
-			case BUF_BLOCK_REMOVE_HASH:
-				/* buf_LRU_block_remove_hashed_page()
-				will overwrite the FIL_PAGE_OFFSET and
-				FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID with
-				0xff and set the state to
-				BUF_BLOCK_REMOVE_HASH. */
-				ut_ad(page_get_space_id(page_align(ptr))
-				      == 0xffffffff);
-				ut_ad(page_get_page_no(page_align(ptr))
-				      == 0xffffffff);
-				break;
-			case BUF_BLOCK_FILE_PAGE: {
-				ulint space =  page_get_space_id(page_align(ptr));
-				ulint offset = page_get_page_no(page_align(ptr));
-
-				if (block->page.space != space ||
-					block->page.offset != offset) {
-					ib_logf(IB_LOG_LEVEL_ERROR,
-						"Corruption: Block space_id %lu != page space_id %lu or "
-						"Block offset %lu != page offset %lu",
-						(ulint)block->page.space, space,
-						(ulint)block->page.offset, offset);
-				}
-
-				ut_ad(block->page.space
-					== page_get_space_id(page_align(ptr)));
-				ut_ad(block->page.offset
-				      == page_get_page_no(page_align(ptr)));
-				break;
-			}
-			}
-
-			mutex_exit(&block->mutex);
-#endif /* UNIV_DEBUG */
-
-			return(block);
-		}
-	}
-
-	return(NULL);
-}
-
-/*******************************************************************//**
-Gets the block to whose frame the pointer is pointing to.
-@return	pointer to block, never NULL */
-UNIV_INTERN
-buf_block_t*
-buf_block_align(
-/*============*/
-	const byte*	ptr)	/*!< in: pointer to a frame */
-{
-	ulint		i;
-
-	for (i = 0; i < srv_buf_pool_instances; i++) {
-		buf_block_t*	block;
-
-		block = buf_block_align_instance(
-			buf_pool_from_array(i), ptr);
-		if (block) {
-			return(block);
-		}
-	}
-
-	/* The block should always be found. */
-	ut_error;
-	return(NULL);
+	/* The function buf_chunk_init() invokes buf_block_init() so that
+	block[n].frame == block->frame + n * UNIV_PAGE_SIZE.  Check it. */
+	ut_ad(block->frame == page_align(ptr));
+	/* Read the state of the block without holding a mutex.
+	A state transition from BUF_BLOCK_FILE_PAGE to
+	BUF_BLOCK_REMOVE_HASH is possible during this execution. */
+	ut_d(const buf_page_state state = buf_block_get_state(block));
+	ut_ad(state == BUF_BLOCK_FILE_PAGE || state == BUF_BLOCK_REMOVE_HASH);
+	return(block);
 }
 
 /********************************************************************//**
 Find out if a pointer belongs to a buf_block_t. It can be a pointer to
 the buf_block_t itself or a member of it. This functions checks one of
 the buffer pool instances.
-@return	TRUE if ptr belongs to a buf_block_t struct */
+@return TRUE if ptr belongs to a buf_block_t struct */
 static
 ibool
 buf_pointer_is_block_field_instance(
@@ -2645,10 +3981,11 @@ buf_pointer_is_block_field_instance(
 	const void*	ptr)		/*!< in: pointer not dereferenced */
 {
 	const buf_chunk_t*		chunk	= buf_pool->chunks;
-	const buf_chunk_t* const	echunk	= chunk + buf_pool->n_chunks;
+	const buf_chunk_t* const	echunk	= chunk + ut_min(
+		buf_pool->n_chunks, buf_pool->n_chunks_new);
 
-	/* TODO: protect buf_pool->chunks with a mutex (it will
-	currently remain constant after buf_pool_init()) */
+	/* TODO: protect buf_pool->chunks with a mutex (the older pointer will
+	currently remain while during buf_pool_resize()) */
 	while (chunk < echunk) {
 		if (ptr >= (void*) chunk->blocks
 		    && ptr < (void*) (chunk->blocks + chunk->size)) {
@@ -2665,8 +4002,7 @@ buf_pointer_is_block_field_instance(
 /********************************************************************//**
 Find out if a pointer belongs to a buf_block_t. It can be a pointer to
 the buf_block_t itself or a member of it
-@return	TRUE if ptr belongs to a buf_block_t struct */
-UNIV_INTERN
+@return TRUE if ptr belongs to a buf_block_t struct */
 ibool
 buf_pointer_is_block_field(
 /*=======================*/
@@ -2689,7 +4025,7 @@ buf_pointer_is_block_field(
 
 /********************************************************************//**
 Find out if a buffer block was created by buf_chunk_init().
-@return	TRUE if "block" has been added to buf_pool->free by buf_chunk_init() */
+@return TRUE if "block" has been added to buf_pool->free by buf_chunk_init() */
 static
 ibool
 buf_block_is_uncompressed(
@@ -2728,14 +4064,14 @@ buf_debug_execute_is_force_flush()
 }
 #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
 
-/**
-Wait for the block to be read in.
-@param block	The block to check */
+/** Wait for the block to be read in.
+@param[in]	block	The block to check */
 static
 void
-buf_wait_for_read(buf_block_t* block)
+buf_wait_for_read(
+	buf_block_t*	block)
 {
-	/* Note: For the PAGE_ATOMIC_REF_COUNT case:
+	/* Note:
 
 	We are using the block->lock to check for IO state (and a dirty read).
 	We set the IO_READ state under the protection of the hash_lock
@@ -2747,7 +4083,7 @@ buf_wait_for_read(buf_block_t* block)
 
 		/* Wait until the read operation completes */
 
-		ib_mutex_t*	mutex = buf_page_get_mutex(&block->page);
+		BPageMutex*	mutex = buf_page_get_mutex(&block->page);
 
 		for (;;) {
 			buf_io_fix	io_fix;
@@ -2769,41 +4105,43 @@ buf_wait_for_read(buf_block_t* block)
 	}
 }
 
-/********************************************************************//**
-This is the general function used to get access to a database page.
-@return	pointer to the block or NULL */
-UNIV_INTERN
+/** This is the general function used to get access to a database page.
+@param[in]	page_id		page id
+@param[in]	rw_latch	RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH
+@param[in]	guess		guessed block or NULL
+@param[in]	mode		BUF_GET, BUF_GET_IF_IN_POOL,
+BUF_PEEK_IF_IN_POOL, BUF_GET_NO_LATCH, or BUF_GET_IF_IN_POOL_OR_WATCH
+@param[in]	file		file name
+@param[in]	line		line where called
+@param[in]	mtr		mini-transaction
+@param[in]	dirty_with_no_latch
+				mark page as dirty even if page
+				is being pinned without any latch
+@return pointer to the block or NULL */
 buf_block_t*
 buf_page_get_gen(
-/*=============*/
-	ulint		space,	/*!< in: space id */
-	ulint		zip_size,/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	ulint		offset,	/*!< in: page number */
-	ulint		rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
-	buf_block_t*	guess,	/*!< in: guessed block or NULL */
-	ulint		mode,	/*!< in: BUF_GET, BUF_GET_IF_IN_POOL,
-				BUF_PEEK_IF_IN_POOL, BUF_GET_NO_LATCH, or
-				BUF_GET_IF_IN_POOL_OR_WATCH */
-	const char*	file,	/*!< in: file name */
-	ulint		line,	/*!< in: line where called */
-	mtr_t*		mtr,	/*!< in: mini-transaction */
-	dberr_t*        err)	/*!< out: error code */
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	ulint			rw_latch,
+	buf_block_t*		guess,
+	ulint			mode,
+	const char*		file,
+	ulint			line,
+	mtr_t*			mtr,
+	dberr_t*		err,
+	bool			dirty_with_no_latch)
 {
 	buf_block_t*	block;
-	ulint		fold;
 	unsigned	access_time;
-	ulint		fix_type;
 	rw_lock_t*	hash_lock;
-	ulint		retries = 0;
 	buf_block_t*	fix_block;
-	ib_mutex_t*	fix_mutex = NULL;
-	buf_pool_t*	buf_pool = buf_pool_get(space, offset);
+	ulint		retries = 0;
+	buf_pool_t*	buf_pool = buf_pool_get(page_id);
 
-	ut_ad(mtr);
-	ut_ad(mtr->state == MTR_ACTIVE);
+	ut_ad(mtr->is_active());
 	ut_ad((rw_latch == RW_S_LATCH)
 	      || (rw_latch == RW_X_LATCH)
+	      || (rw_latch == RW_SX_LATCH)
 	      || (rw_latch == RW_NO_LATCH));
 
 	if (err) {
@@ -2824,22 +4162,29 @@ buf_page_get_gen(
 	default:
 		ut_error;
 	}
+
+	bool			found;
+	const page_size_t&	space_page_size
+		= fil_space_get_page_size(page_id.space(), &found);
+
+	ut_ad(found);
+
+	ut_ad(page_size.equals_to(space_page_size));
 #endif /* UNIV_DEBUG */
-	ut_ad(zip_size == fil_space_get_zip_size(space));
-	ut_ad(ut_is_2pow(zip_size));
-#ifndef UNIV_LOG_DEBUG
+
 	ut_ad(!ibuf_inside(mtr)
-	      || ibuf_page_low(space, zip_size, offset,
-			       FALSE, file, line, NULL));
-#endif
+	      || ibuf_page_low(page_id, page_size, FALSE, file, line, NULL));
+
 	buf_pool->stat.n_page_gets++;
-	fold = buf_page_address_fold(space, offset);
-	hash_lock = buf_page_hash_lock_get(buf_pool, fold);
+	hash_lock = buf_page_hash_lock_get(buf_pool, page_id);
 loop:
 	block = guess;
 
 	rw_lock_s_lock(hash_lock);
 
+	/* If not own buf_pool_mutex, page_hash can be changed. */
+	hash_lock = buf_page_hash_lock_s_confirm(hash_lock, buf_pool, page_id);
+
 	if (block != NULL) {
 
 		/* If the guess is a compressed page descriptor that
@@ -2847,8 +4192,7 @@ loop:
 		it may have been freed by buf_relocate(). */
 
 		if (!buf_block_is_uncompressed(buf_pool, block)
-		    || offset != block->page.offset
-		    || space != block->page.space
+		    || !page_id.equals_to(block->page.id)
 		    || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
 
 			/* Our guess was bogus or things have changed
@@ -2860,8 +4204,7 @@ loop:
 	}
 
 	if (block == NULL) {
-		block = (buf_block_t*) buf_page_hash_get_low(
-			buf_pool, space, offset, fold);
+		block = (buf_block_t*) buf_page_hash_get_low(buf_pool, page_id);
 	}
 
 	if (!block || buf_pool_watch_is_sentinel(buf_pool, &block->page)) {
@@ -2876,15 +4219,39 @@ loop:
 
 		if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
 			rw_lock_x_lock(hash_lock);
-			block = (buf_block_t*) buf_pool_watch_set(
-				space, offset, fold);
 
-			if (UNIV_LIKELY_NULL(block)) {
+			/* If not own buf_pool_mutex,
+			page_hash can be changed. */
+			hash_lock = buf_page_hash_lock_x_confirm(
+				hash_lock, buf_pool, page_id);
+
+			block = (buf_block_t*) buf_pool_watch_set(
+				page_id, &hash_lock);
+
+			if (block) {
 				/* We can release hash_lock after we
 				increment the fix count to make
 				sure that no state change takes place. */
 				fix_block = block;
-				buf_block_fix(fix_block);
+
+				if (fsp_is_system_temporary(page_id.space())) {
+					/* For temporary tablespace,
+					the mutex is being used for
+					synchronization between user
+					thread and flush thread,
+					instead of block->lock. See
+					buf_flush_page() for the flush
+					thread counterpart. */
+
+					BPageMutex*	fix_mutex
+						= buf_page_get_mutex(
+							&fix_block->page);
+					mutex_enter(fix_mutex);
+					buf_block_fix(fix_block);
+					mutex_exit(fix_mutex);
+				} else {
+					buf_block_fix(fix_block);
+				}
 
 				/* Now safe to release page_hash mutex */
 				rw_lock_x_unlock(hash_lock);
@@ -2897,15 +4264,15 @@ loop:
 		if (mode == BUF_GET_IF_IN_POOL
 		    || mode == BUF_PEEK_IF_IN_POOL
 		    || mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
-#ifdef UNIV_SYNC_DEBUG
-			ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX));
-			ut_ad(!rw_lock_own(hash_lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
+
+			ut_ad(!rw_lock_own(hash_lock, RW_LOCK_X));
+			ut_ad(!rw_lock_own(hash_lock, RW_LOCK_S));
+
 			return(NULL);
 		}
 
-		if (buf_read_page(space, zip_size, offset, &bpage)) {
-			buf_read_ahead_random(space, zip_size, offset,
+		if (buf_read_page(page_id, page_size, &bpage)) {
+			buf_read_ahead_random(page_id, page_size,
 					      ibuf_inside(mtr));
 
 			retries = 0;
@@ -2920,7 +4287,7 @@ loop:
 
 			/* Do not try again for encrypted pages */
 			if (!corrupted) {
-				ib_mutex_t* pmutex = buf_page_get_mutex(bpage);
+				BPageMutex* pmutex = buf_page_get_mutex(bpage);
 
 				buf_pool = buf_pool_from_bpage(bpage);
 				buf_pool_mutex_enter(buf_pool);
@@ -2954,25 +4321,20 @@ loop:
 			}
 
 			if (corrupted) {
-				fprintf(stderr, "InnoDB: Error: Unable"
-					" to read tablespace %lu page no"
-					" %lu into the buffer pool after"
-					" %lu attempts\n"
-					"InnoDB: The most probable cause"
-					" of this error may be that the"
-					" table has been corrupted.\n"
-					"InnoDB: You can try to fix this"
-					" problem by using"
-					" innodb_force_recovery.\n"
-					"InnoDB: Please see reference manual"
-					" for more details.\n"
-					"InnoDB: Aborting...\n",
-					space, offset,
-					BUF_PAGE_READ_MAX_RETRIES);
-
-				ut_error;
+				ib::fatal() << "Unable to read page " << page_id
+					    << " into the buffer pool after "
+					    << BUF_PAGE_READ_MAX_RETRIES << " attempts."
+					" The most probable cause of this error may"
+					" be that the table has been corrupted. Or,"
+					" the table was compressed with with an"
+					" algorithm that is not supported by this"
+					" instance. If it is not a decompress failure,"
+					" you can try to fix this problem by using"
+					" innodb_force_recovery."
+					" Please see " REFMAN " for more"
+					" details. Aborting...";
 			} else {
-				ib_mutex_t* pmutex = buf_page_get_mutex(bpage);
+				BPageMutex* pmutex = buf_page_get_mutex(bpage);
 
 				buf_pool = buf_pool_from_bpage(bpage);
 				buf_pool_mutex_enter(buf_pool);
@@ -2996,39 +4358,42 @@ loop:
 		}
 
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-		ut_a(++buf_dbg_counter % 5771 || buf_validate());
+		ut_a(fsp_skip_sanity_check(page_id.space())
+		     || ++buf_dbg_counter % 5771
+		     || buf_validate());
 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 		goto loop;
 	} else {
 		fix_block = block;
 	}
 
-	buf_block_fix(fix_block);
+	if (fsp_is_system_temporary(page_id.space())) {
+		/* For temporary tablespace, the mutex is being used
+		for synchronization between user thread and flush
+		thread, instead of block->lock. See buf_flush_page()
+		for the flush thread counterpart. */
+		BPageMutex*	fix_mutex = buf_page_get_mutex(
+			&fix_block->page);
+		mutex_enter(fix_mutex);
+		buf_block_fix(fix_block);
+		mutex_exit(fix_mutex);
+	} else {
+		buf_block_fix(fix_block);
+	}
 
 	/* Now safe to release page_hash mutex */
 	rw_lock_s_unlock(hash_lock);
 
 got_block:
 
-	fix_mutex = buf_page_get_mutex(&fix_block->page);
-
-	ut_ad(page_zip_get_size(&block->page.zip) == zip_size);
-
 	if (mode == BUF_GET_IF_IN_POOL || mode == BUF_PEEK_IF_IN_POOL) {
 
-		bool	must_read;
-
-		{
-			buf_page_t*	fix_page = &fix_block->page;
-
-			mutex_enter(fix_mutex);
-
-			buf_io_fix	io_fix = buf_page_get_io_fix(fix_page);
-
-			must_read = (io_fix == BUF_IO_READ);
-
-			mutex_exit(fix_mutex);
-		}
+		buf_page_t*	fix_page = &fix_block->page;
+		BPageMutex*	fix_mutex = buf_page_get_mutex(fix_page);
+		mutex_enter(fix_mutex);
+		const bool	must_read
+			= (buf_page_get_io_fix(fix_page) == BUF_IO_READ);
+		mutex_exit(fix_mutex);
 
 		if (must_read) {
 			/* The page is being read to buffer pool,
@@ -3040,10 +4405,22 @@ got_block:
 		}
 	}
 
-	switch(buf_block_get_state(fix_block)) {
+	switch (buf_block_get_state(fix_block)) {
 		buf_page_t*	bpage;
 
 	case BUF_BLOCK_FILE_PAGE:
+		bpage = &block->page;
+		if (fsp_is_system_temporary(page_id.space())
+		    && buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
+			/* This suggest that page is being flushed.
+			Avoid returning reference to this page.
+			Instead wait for flush action to complete.
+			For normal page this sync is done using SX
+			lock but for intrinsic there is no latching. */
+			buf_block_unfix(fix_block);
+			os_thread_sleep(WAIT_FOR_WRITE);
+			goto loop;
+		}
 		break;
 
 	case BUF_BLOCK_ZIP_PAGE:
@@ -3084,24 +4461,19 @@ got_block:
 
 		buf_pool_mutex_enter(buf_pool);
 
+		/* If not own buf_pool_mutex, page_hash can be changed. */
+		hash_lock = buf_page_hash_lock_get(buf_pool, page_id);
+
 		rw_lock_x_lock(hash_lock);
 
 		/* Buffer-fixing prevents the page_hash from changing. */
-		ut_ad(bpage == buf_page_hash_get_low(
-			      buf_pool, space, offset, fold));
+		ut_ad(bpage == buf_page_hash_get_low(buf_pool, page_id));
 
-		buf_block_mutex_enter(block);
+		buf_block_unfix(fix_block);
 
+		buf_page_mutex_enter(block);
 		mutex_enter(&buf_pool->zip_mutex);
 
-		ut_ad(fix_block->page.buf_fix_count > 0);
-
-#ifdef PAGE_ATOMIC_REF_COUNT
-		os_atomic_decrement_uint32(&fix_block->page.buf_fix_count, 1);
-#else
-		--fix_block->page.buf_fix_count;
-#endif /* PAGE_ATOMIC_REF_COUNT */
-
 		fix_block = block;
 
 		if (bpage->buf_fix_count > 0
@@ -3117,7 +4489,7 @@ got_block:
 			buf_LRU_block_free_non_file_page(block);
 			buf_pool_mutex_exit(buf_pool);
 			rw_lock_x_unlock(hash_lock);
-			buf_block_mutex_exit(block);
+			buf_page_mutex_exit(block);
 
 			/* Try again */
 			goto loop;
@@ -3133,18 +4505,18 @@ got_block:
 
 		buf_block_init_low(block);
 
-		/* Set after relocate(). */
+		/* Set after buf_relocate(). */
 		block->page.buf_fix_count = 1;
 
-		block->lock_hash_val = lock_rec_hash(space, offset);
+		block->lock_hash_val = lock_rec_hash(page_id.space(),
+						     page_id.page_no());
 
 		UNIV_MEM_DESC(&block->page.zip.data,
-			page_zip_get_size(&block->page.zip));
+			      page_zip_get_size(&block->page.zip));
 
 		if (buf_page_get_state(&block->page) == BUF_BLOCK_ZIP_PAGE) {
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-			UT_LIST_REMOVE(list, buf_pool->zip_clean,
-				       &block->page);
+			UT_LIST_REMOVE(buf_pool->zip_clean, &block->page);
 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 			ut_ad(!block->page.in_flush_list);
 		} else {
@@ -3166,15 +4538,13 @@ got_block:
 		UNIV_MEM_INVALID(bpage, sizeof *bpage);
 
 		rw_lock_x_unlock(hash_lock);
-
-		++buf_pool->n_pend_unzip;
-
+		buf_pool->n_pend_unzip++;
 		mutex_exit(&buf_pool->zip_mutex);
 		buf_pool_mutex_exit(buf_pool);
 
 		access_time = buf_page_is_accessed(&block->page);
 
-		buf_block_mutex_exit(block);
+		buf_page_mutex_exit(block);
 
 		buf_page_free_descriptor(bpage);
 
@@ -3192,22 +4562,21 @@ got_block:
 		if (!recv_no_ibuf_operations) {
 			if (access_time) {
 #ifdef UNIV_IBUF_COUNT_DEBUG
-				ut_a(ibuf_count_get(space, offset) == 0);
+				ut_a(ibuf_count_get(page_id) == 0);
 #endif /* UNIV_IBUF_COUNT_DEBUG */
 			} else {
 				ibuf_merge_or_delete_for_page(
-					block, space, offset, zip_size, TRUE);
+					block, page_id, &page_size, TRUE);
 			}
 		}
 
 		buf_pool_mutex_enter(buf_pool);
 
-		/* Unfix and unlatch the block. */
-		buf_block_mutex_enter(fix_block);
+		buf_page_mutex_enter(fix_block);
 
 		buf_block_set_io_fix(fix_block, BUF_IO_NONE);
 
-		buf_block_mutex_exit(fix_block);
+		buf_page_mutex_exit(fix_block);
 
 		--buf_pool->n_pend_unzip;
 
@@ -3229,10 +4598,8 @@ got_block:
 	ut_ad(block == fix_block);
 	ut_ad(fix_block->page.buf_fix_count > 0);
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX));
-	ut_ad(!rw_lock_own(hash_lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(!rw_lock_own(hash_lock, RW_LOCK_X));
+	ut_ad(!rw_lock_own(hash_lock, RW_LOCK_S));
 
 	ut_ad(buf_block_get_state(fix_block) == BUF_BLOCK_FILE_PAGE);
 
@@ -3254,18 +4621,29 @@ got_block:
 		are holding the buf_pool->mutex. */
 
 		if (buf_LRU_free_page(&fix_block->page, true)) {
+
 			buf_pool_mutex_exit(buf_pool);
+
+			/* If not own buf_pool_mutex,
+			page_hash can be changed. */
+			hash_lock = buf_page_hash_lock_get(buf_pool, page_id);
+
 			rw_lock_x_lock(hash_lock);
 
+			/* If not own buf_pool_mutex,
+			page_hash can be changed. */
+			hash_lock = buf_page_hash_lock_x_confirm(
+				hash_lock, buf_pool, page_id);
+
 			if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
 				/* Set the watch, as it would have
 				been set if the page were not in the
 				buffer pool in the first place. */
 				block = (buf_block_t*) buf_pool_watch_set(
-					space, offset, fold);
+					page_id, &hash_lock);
 			} else {
 				block = (buf_block_t*) buf_page_hash_get_low(
-					buf_pool, space, offset, fold);
+					buf_pool, page_id);
 			}
 
 			rw_lock_x_unlock(hash_lock);
@@ -3277,26 +4655,29 @@ got_block:
 				and before we acquire the hash_lock
 				above. Try again. */
 				guess = block;
+
 				goto loop;
 			}
 
-			fprintf(stderr,
-				"innodb_change_buffering_debug evict %u %u\n",
-				(unsigned) space, (unsigned) offset);
+			ib::info() << "innodb_change_buffering_debug evict "
+				<< page_id;
+
 			return(NULL);
 		}
 
-		mutex_enter(&fix_block->mutex);
+		buf_page_mutex_enter(fix_block);
 
 		if (buf_flush_page_try(buf_pool, fix_block)) {
-			fprintf(stderr,
-				"innodb_change_buffering_debug flush %u %u\n",
-				(unsigned) space, (unsigned) offset);
+
+			ib::info() << "innodb_change_buffering_debug flush "
+				<< page_id;
+
 			guess = fix_block;
+
 			goto loop;
 		}
 
-		buf_block_mutex_exit(fix_block);
+		buf_page_mutex_exit(fix_block);
 
 		buf_block_fix(fix_block);
 
@@ -3308,30 +4689,40 @@ got_block:
 
 	ut_ad(fix_block->page.buf_fix_count > 0);
 
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
 	/* We have already buffer fixed the page, and we are committed to
-	returning this page to the caller. Register for debugging. */
-	{
-		ibool	ret;
-		ret = rw_lock_s_lock_nowait(&fix_block->debug_latch, file, line);
+	returning this page to the caller. Register for debugging.
+	Avoid debug latching if page/block belongs to system temporary
+	tablespace (Not much needed for table with single threaded access.). */
+	if (!fsp_is_system_temporary(page_id.space())) {
+		ibool   ret;
+		ret = rw_lock_s_lock_nowait(
+			&fix_block->debug_latch, file, line);
 		ut_a(ret);
 	}
-#endif /* UNIV_SYNC_DEBUG */
+#endif /* UNIV_DEBUG */
+
+	/* While tablespace is reinited the indexes are already freed but the
+	blocks related to it still resides in buffer pool. Trying to remove
+	such blocks from buffer pool would invoke removal of AHI entries
+	associated with these blocks. Logic to remove AHI entry will try to
+	load the block but block is already in free state. Handle the said case
+	with mode = BUF_PEEK_IF_IN_POOL that is invoked from
+	"btr_search_drop_page_hash_when_freed". */
+	ut_ad(mode == BUF_GET_POSSIBLY_FREED
+	      || mode == BUF_PEEK_IF_IN_POOL
+	      || !fix_block->page.file_page_was_freed);
 
-#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
-	ut_a(mode == BUF_GET_POSSIBLY_FREED
-	     || !fix_block->page.file_page_was_freed);
-#endif
 	/* Check if this is the first access to the page */
 	access_time = buf_page_is_accessed(&fix_block->page);
 
 	/* This is a heuristic and we don't care about ordering issues. */
 	if (access_time == 0) {
-		buf_block_mutex_enter(fix_block);
+		buf_page_mutex_enter(fix_block);
 
 		buf_page_set_accessed(&fix_block->page);
 
-		buf_block_mutex_exit(fix_block);
+		buf_page_mutex_exit(fix_block);
 	}
 
 	if (mode != BUF_PEEK_IF_IN_POOL) {
@@ -3339,25 +4730,33 @@ got_block:
 	}
 
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-	ut_a(++buf_dbg_counter % 5771 || buf_validate());
-	ut_a(fix_block->page.buf_fix_count > 0);
+	ut_a(fsp_skip_sanity_check(page_id.space())
+	     || ++buf_dbg_counter % 5771
+	     || buf_validate());
 	ut_a(buf_block_get_state(fix_block) == BUF_BLOCK_FILE_PAGE);
 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 
-#ifdef PAGE_ATOMIC_REF_COUNT
 	/* We have to wait here because the IO_READ state was set
-	under the protection of the hash_lock and the block->mutex
-	but not the block->lock. */
+	under the protection of the hash_lock and not the block->mutex
+	and block->lock. */
 	buf_wait_for_read(fix_block);
-#endif /* PAGE_ATOMIC_REF_COUNT */
+
+	/* Mark block as dirty if requested by caller. If not requested (false)
+	then we avoid updating the dirty state of the block and retain the
+	original one. This is reason why ?
+	Same block can be shared/pinned by 2 different mtrs. If first mtr
+	set the dirty state to true and second mtr mark it as false the last
+	updated dirty state is retained. Which means we can loose flushing of
+	a modified block. */
+	if (dirty_with_no_latch) {
+		fix_block->made_dirty_with_no_latch = dirty_with_no_latch;
+	}
+
+	mtr_memo_type_t	fix_type;
 
 	switch (rw_latch) {
 	case RW_NO_LATCH:
 
-#ifndef PAGE_ATOMIC_REF_COUNT
-		buf_wait_for_read(fix_block);
-#endif /* !PAGE_ATOMIC_REF_COUNT */
-
 		fix_type = MTR_MEMO_BUF_FIX;
 		break;
 
@@ -3367,6 +4766,12 @@ got_block:
 		fix_type = MTR_MEMO_PAGE_S_FIX;
 		break;
 
+	case RW_SX_LATCH:
+		rw_lock_sx_lock_inline(&fix_block->lock, 0, file, line);
+
+		fix_type = MTR_MEMO_PAGE_SX_FIX;
+		break;
+
 	default:
 		ut_ad(rw_latch == RW_X_LATCH);
 		rw_lock_x_lock_inline(&fix_block->lock, 0, file, line);
@@ -3381,26 +4786,23 @@ got_block:
 		/* In the case of a first access, try to apply linear
 		read-ahead */
 
-		buf_read_ahead_linear(
-			space, zip_size, offset, ibuf_inside(mtr));
+		buf_read_ahead_linear(page_id, page_size, ibuf_inside(mtr));
 	}
 
 #ifdef UNIV_IBUF_COUNT_DEBUG
-	ut_a(ibuf_count_get(buf_block_get_space(fix_block),
-			    buf_block_get_page_no(fix_block)) == 0);
+	ut_a(ibuf_count_get(fix_block->page.id) == 0);
 #endif
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX));
-	ut_ad(!rw_lock_own(hash_lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
+
+	ut_ad(!rw_lock_own(hash_lock, RW_LOCK_X));
+	ut_ad(!rw_lock_own(hash_lock, RW_LOCK_S));
+
 	return(fix_block);
 }
 
 /********************************************************************//**
 This is the general function used to get optimistic access to a database
 page.
-@return	TRUE if success */
-UNIV_INTERN
+@return TRUE if success */
 ibool
 buf_page_optimistic_get(
 /*====================*/
@@ -3414,18 +4816,17 @@ buf_page_optimistic_get(
 	buf_pool_t*	buf_pool;
 	unsigned	access_time;
 	ibool		success;
-	ulint		fix_type;
 
 	ut_ad(block);
 	ut_ad(mtr);
-	ut_ad(mtr->state == MTR_ACTIVE);
+	ut_ad(mtr->is_active());
 	ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
 
-	mutex_enter(&block->mutex);
+	buf_page_mutex_enter(block);
 
 	if (UNIV_UNLIKELY(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE)) {
 
-		mutex_exit(&block->mutex);
+		buf_page_mutex_exit(block);
 
 		return(FALSE);
 	}
@@ -3436,41 +4837,52 @@ buf_page_optimistic_get(
 
 	buf_page_set_accessed(&block->page);
 
-	mutex_exit(&block->mutex);
+	buf_page_mutex_exit(block);
 
 	buf_page_make_young_if_needed(&block->page);
 
 	ut_ad(!ibuf_inside(mtr)
-	      || ibuf_page(buf_block_get_space(block),
-			   buf_block_get_zip_size(block),
-			   buf_block_get_page_no(block), NULL));
+	      || ibuf_page(block->page.id, block->page.size, NULL));
+
+	mtr_memo_type_t	fix_type;
+
+	switch (rw_latch) {
+	case RW_S_LATCH:
+		success = rw_lock_s_lock_nowait(&block->lock, file, line);
 
-	if (rw_latch == RW_S_LATCH) {
-		success = rw_lock_s_lock_nowait(&(block->lock),
-						file, line);
 		fix_type = MTR_MEMO_PAGE_S_FIX;
-	} else {
-		success = rw_lock_x_lock_func_nowait_inline(&(block->lock),
-							    file, line);
+		break;
+	case RW_X_LATCH:
+		success = rw_lock_x_lock_func_nowait_inline(
+			&block->lock, file, line);
+
 		fix_type = MTR_MEMO_PAGE_X_FIX;
+		break;
+	default:
+		ut_error; /* RW_SX_LATCH is not implemented yet */
 	}
 
-	if (UNIV_UNLIKELY(!success)) {
+	if (!success) {
+		buf_page_mutex_enter(block);
 		buf_block_buf_fix_dec(block);
+		buf_page_mutex_exit(block);
 
 		return(FALSE);
 	}
 
-	if (UNIV_UNLIKELY(modify_clock != block->modify_clock)) {
+	if (modify_clock != block->modify_clock) {
+
 		buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
 
 		if (rw_latch == RW_S_LATCH) {
-			rw_lock_s_unlock(&(block->lock));
+			rw_lock_s_unlock(&block->lock);
 		} else {
-			rw_lock_x_unlock(&(block->lock));
+			rw_lock_x_unlock(&block->lock);
 		}
 
+		buf_page_mutex_enter(block);
 		buf_block_buf_fix_dec(block);
+		buf_page_mutex_exit(block);
 
 		return(FALSE);
 	}
@@ -3478,31 +4890,28 @@ buf_page_optimistic_get(
 	mtr_memo_push(mtr, block, fix_type);
 
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-	ut_a(++buf_dbg_counter % 5771 || buf_validate());
+	ut_a(fsp_skip_sanity_check(block->page.id.space())
+	     || ++buf_dbg_counter % 5771
+	     || buf_validate());
 	ut_a(block->page.buf_fix_count > 0);
 	ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 
-#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
-	mutex_enter(&block->mutex);
-	ut_a(!block->page.file_page_was_freed);
-	mutex_exit(&block->mutex);
-#endif
+	ut_d(buf_page_mutex_enter(block));
+	ut_ad(!block->page.file_page_was_freed);
+	ut_d(buf_page_mutex_exit(block));
 
 	if (!access_time) {
 		/* In the case of a first access, try to apply linear
 		read-ahead */
-
-		buf_read_ahead_linear(buf_block_get_space(block),
-				      buf_block_get_zip_size(block),
-				      buf_block_get_page_no(block),
+		buf_read_ahead_linear(block->page.id, block->page.size,
 				      ibuf_inside(mtr));
 	}
 
 #ifdef UNIV_IBUF_COUNT_DEBUG
-	ut_a(ibuf_count_get(buf_block_get_space(block),
-			    buf_block_get_page_no(block)) == 0);
-#endif
+	ut_a(ibuf_count_get(block->page.id) == 0);
+#endif /* UNIV_IBUF_COUNT_DEBUG */
+
 	buf_pool = buf_pool_from_block(block);
 	buf_pool->stat.n_page_gets++;
 
@@ -3513,8 +4922,7 @@ buf_page_optimistic_get(
 This is used to get access to a known database page, when no waiting can be
 done. For example, if a search in an adaptive hash index leads us to this
 frame.
-@return	TRUE if success */
-UNIV_INTERN
+@return TRUE if success */
 ibool
 buf_page_get_known_nowait(
 /*======================*/
@@ -3527,13 +4935,11 @@ buf_page_get_known_nowait(
 {
 	buf_pool_t*	buf_pool;
 	ibool		success;
-	ulint		fix_type;
 
-	ut_ad(mtr);
-	ut_ad(mtr->state == MTR_ACTIVE);
+	ut_ad(mtr->is_active());
 	ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
 
-	mutex_enter(&block->mutex);
+	buf_page_mutex_enter(block);
 
 	if (buf_block_get_state(block) == BUF_BLOCK_REMOVE_HASH) {
 		/* Another thread is just freeing the block from the LRU list
@@ -3543,7 +4949,7 @@ buf_page_get_known_nowait(
 		we have already removed it from the page address hash table
 		of the buffer pool. */
 
-		mutex_exit(&block->mutex);
+		buf_page_mutex_exit(block);
 
 		return(FALSE);
 	}
@@ -3554,7 +4960,7 @@ buf_page_get_known_nowait(
 
 	buf_page_set_accessed(&block->page);
 
-	mutex_exit(&block->mutex);
+	buf_page_mutex_exit(block);
 
 	buf_pool = buf_pool_from_block(block);
 
@@ -3564,18 +4970,27 @@ buf_page_get_known_nowait(
 
 	ut_ad(!ibuf_inside(mtr) || mode == BUF_KEEP_OLD);
 
-	if (rw_latch == RW_S_LATCH) {
-		success = rw_lock_s_lock_nowait(&(block->lock),
-						file, line);
+	mtr_memo_type_t	fix_type;
+
+	switch (rw_latch) {
+	case RW_S_LATCH:
+		success = rw_lock_s_lock_nowait(&block->lock, file, line);
 		fix_type = MTR_MEMO_PAGE_S_FIX;
-	} else {
-		success = rw_lock_x_lock_func_nowait_inline(&(block->lock),
-							    file, line);
+		break;
+	case RW_X_LATCH:
+		success = rw_lock_x_lock_func_nowait_inline(
+			&block->lock, file, line);
+
 		fix_type = MTR_MEMO_PAGE_X_FIX;
+		break;
+	default:
+		ut_error; /* RW_SX_LATCH is not implemented yet */
 	}
 
 	if (!success) {
+		buf_page_mutex_enter(block);
 		buf_block_buf_fix_dec(block);
+		buf_page_mutex_exit(block);
 
 		return(FALSE);
 	}
@@ -3587,7 +5002,8 @@ buf_page_get_known_nowait(
 	ut_a(block->page.buf_fix_count > 0);
 	ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
+
+#ifdef UNIV_DEBUG
 	if (mode != BUF_KEEP_OLD) {
 		/* If mode == BUF_KEEP_OLD, we are executing an I/O
 		completion routine.  Avoid a bogus assertion failure
@@ -3596,50 +5012,44 @@ buf_page_get_known_nowait(
 		deleting a record from SYS_INDEXES. This check will be
 		skipped in recv_recover_page() as well. */
 
-		mutex_enter(&block->mutex);
+		buf_page_mutex_enter(block);
 		ut_a(!block->page.file_page_was_freed);
-		mutex_exit(&block->mutex);
+		buf_page_mutex_exit(block);
 	}
-#endif
+#endif /* UNIV_DEBUG */
 
 #ifdef UNIV_IBUF_COUNT_DEBUG
-	ut_a((mode == BUF_KEEP_OLD)
-	     || (ibuf_count_get(buf_block_get_space(block),
-				buf_block_get_page_no(block)) == 0));
+	ut_a((mode == BUF_KEEP_OLD) || ibuf_count_get(block->page.id) == 0);
 #endif
 	buf_pool->stat.n_page_gets++;
 
 	return(TRUE);
 }
 
-/*******************************************************************//**
-Given a tablespace id and page number tries to get that page. If the
+/** Given a tablespace id and page number tries to get that page. If the
 page is not in the buffer pool it is not loaded and NULL is returned.
 Suitable for using when holding the lock_sys_t::mutex.
-@return	pointer to a page or NULL */
-UNIV_INTERN
+@param[in]	page_id	page id
+@param[in]	file	file name
+@param[in]	line	line where called
+@param[in]	mtr	mini-transaction
+@return pointer to a page or NULL */
 buf_block_t*
 buf_page_try_get_func(
-/*==================*/
-	ulint		space_id,/*!< in: tablespace id */
-	ulint		page_no,/*!< in: page number */
-	ulint		rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */
-	bool		possibly_freed,
-	const char*	file,	/*!< in: file name */
-	ulint		line,	/*!< in: line where called */
-	mtr_t*		mtr)	/*!< in: mini-transaction */
+	const page_id_t&	page_id,
+	const char*		file,
+	ulint			line,
+	mtr_t*			mtr)
 {
 	buf_block_t*	block;
 	ibool		success;
-	ulint		fix_type;
-	buf_pool_t*	buf_pool = buf_pool_get(space_id, page_no);
+	buf_pool_t*	buf_pool = buf_pool_get(page_id);
 	rw_lock_t*	hash_lock;
 
 	ut_ad(mtr);
-	ut_ad(mtr->state == MTR_ACTIVE);
+	ut_ad(mtr->is_active());
 
-	block = buf_block_hash_get_s_locked(buf_pool, space_id,
-					    page_no, &hash_lock);
+	block = buf_block_hash_get_s_locked(buf_pool, page_id, &hash_lock);
 
 	if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
 		if (block) {
@@ -3650,24 +5060,19 @@ buf_page_try_get_func(
 
 	ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
 
-	mutex_enter(&block->mutex);
+	buf_page_mutex_enter(block);
 	rw_lock_s_unlock(hash_lock);
 
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 	ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
-	ut_a(buf_block_get_space(block) == space_id);
-	ut_a(buf_block_get_page_no(block) == page_no);
+	ut_a(page_id.equals_to(block->page.id));
 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 
 	buf_block_buf_fix_inc(block, file, line);
-	mutex_exit(&block->mutex);
+	buf_page_mutex_exit(block);
 
-	if (rw_latch == RW_S_LATCH) {
-		fix_type = MTR_MEMO_PAGE_S_FIX;
-		success = rw_lock_s_lock_nowait(&block->lock, file, line);
-	} else {
-		success = false;
-	}
+	mtr_memo_type_t	fix_type = MTR_MEMO_PAGE_S_FIX;
+	success = rw_lock_s_lock_nowait(&block->lock, file, line);
 
 	if (!success) {
 		/* Let us try to get an X-latch. If the current thread
@@ -3680,32 +5085,34 @@ buf_page_try_get_func(
 	}
 
 	if (!success) {
+		buf_page_mutex_enter(block);
 		buf_block_buf_fix_dec(block);
+		buf_page_mutex_exit(block);
 
 		return(NULL);
 	}
 
 	mtr_memo_push(mtr, block, fix_type);
+
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-	ut_a(++buf_dbg_counter % 5771 || buf_validate());
+	ut_a(fsp_skip_sanity_check(block->page.id.space())
+	     || ++buf_dbg_counter % 5771
+	     || buf_validate());
 	ut_a(block->page.buf_fix_count > 0);
 	ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
-	if (!possibly_freed) {
-		mutex_enter(&block->mutex);
-		ut_a(!block->page.file_page_was_freed);
-		mutex_exit(&block->mutex);
-	}
-#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
+
+	ut_d(buf_page_mutex_enter(block));
+	ut_d(ut_a(!block->page.file_page_was_freed));
+	ut_d(buf_page_mutex_exit(block));
+
 	buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
 
 	buf_pool->stat.n_page_gets++;
 
 #ifdef UNIV_IBUF_COUNT_DEBUG
-	ut_a(ibuf_count_get(buf_block_get_space(block),
-			    buf_block_get_page_no(block)) == 0);
-#endif
+	ut_a(ibuf_count_get(block->page.id) == 0);
+#endif /* UNIV_IBUF_COUNT_DEBUG */
 
 	return(block);
 }
@@ -3736,43 +5143,38 @@ buf_page_init_low(
 	bpage->slot = NULL;
 
 	HASH_INVALIDATE(bpage, hash);
-#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
-	bpage->file_page_was_freed = FALSE;
-#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
+
+	ut_d(bpage->file_page_was_freed = FALSE);
 }
 
-/********************************************************************//**
-Inits a page to the buffer buf_pool. */
-static MY_ATTRIBUTE((nonnull))
+/** Inits a page to the buffer buf_pool.
+@param[in,out]	buf_pool	buffer pool
+@param[in]	page_id		page id
+@param[in,out]	block		block to init */
+static
 void
 buf_page_init(
-/*==========*/
-	buf_pool_t*	buf_pool,/*!< in/out: buffer pool */
-	ulint		space,	/*!< in: space id */
-	ulint		offset,	/*!< in: offset of the page within space
-				in units of a page */
-	ulint		fold,	/*!< in: buf_page_address_fold(space,offset) */
-	ulint		zip_size,/*!< in: compressed page size, or 0 */
-	buf_block_t*	block)	/*!< in/out: block to init */
+	buf_pool_t*		buf_pool,
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	buf_block_t*		block)
 {
 	buf_page_t*	hash_page;
 
-	ut_ad(buf_pool == buf_pool_get(space, offset));
+	ut_ad(buf_pool == buf_pool_get(page_id));
 	ut_ad(buf_pool_mutex_own(buf_pool));
 
-	ut_ad(mutex_own(&(block->mutex)));
+	ut_ad(buf_page_mutex_own(block));
 	ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(buf_page_hash_lock_get(buf_pool, fold),
-			  RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(rw_lock_own(buf_page_hash_lock_get(buf_pool, page_id),
+			  RW_LOCK_X));
 
 	/* Set the state of the block */
-	buf_block_set_file_page(block, space, offset);
+	buf_block_set_file_page(block, page_id);
 
 #ifdef UNIV_DEBUG_VALGRIND
-	if (!space) {
+	if (is_system_tablespace(page_id.space())) {
 		/* Silence valid Valgrind warnings about uninitialized
 		data being written to data files.  There are some unused
 		bytes on some pages that InnoDB does not initialize. */
@@ -3782,60 +5184,58 @@ buf_page_init(
 
 	buf_block_init_low(block);
 
-	block->lock_hash_val = lock_rec_hash(space, offset);
+	block->lock_hash_val = lock_rec_hash(page_id.space(),
+					     page_id.page_no());
 
 	buf_page_init_low(&block->page);
 
 	/* Insert into the hash table of file pages */
 
-	hash_page = buf_page_hash_get_low(buf_pool, space, offset, fold);
+	hash_page = buf_page_hash_get_low(buf_pool, page_id);
 
 	if (hash_page == NULL) {
-		/* Block not found in the hash table */
+		/* Block not found in hash table */
 	} else if (buf_pool_watch_is_sentinel(buf_pool, hash_page)) {
+		/* Preserve the reference count. */
 		ib_uint32_t	buf_fix_count = hash_page->buf_fix_count;
 
-	ut_a(buf_fix_count > 0);
+		ut_a(buf_fix_count > 0);
 
-#ifdef PAGE_ATOMIC_REF_COUNT
-		os_atomic_increment_uint32(
-			&block->page.buf_fix_count, buf_fix_count);
-#else
-		block->page.buf_fix_count += ulint(buf_fix_count);
-#endif /* PAGE_ATOMIC_REF_COUNT */
+		os_atomic_increment_uint32(&block->page.buf_fix_count,
+					   buf_fix_count);
 
-		buf_pool_watch_remove(buf_pool, fold, hash_page);
+		buf_pool_watch_remove(buf_pool, hash_page);
 	} else {
-		fprintf(stderr,
-			"InnoDB: Error: page %lu %lu already found"
-			" in the hash table: %p, %p\n",
-			space,
-			offset,
-			(const void*) hash_page, (const void*) block);
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-		mutex_exit(&block->mutex);
-		buf_pool_mutex_exit(buf_pool);
-		buf_print();
-		buf_LRU_print();
-		buf_validate();
-		buf_LRU_validate();
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-		ut_error;
+
+		ib::error() << "Page " << page_id
+			<< " already found in the hash table: "
+			<< hash_page << ", " << block;
+
+		ut_d(buf_page_mutex_exit(block));
+		ut_d(buf_pool_mutex_exit(buf_pool));
+		ut_d(buf_print());
+		ut_d(buf_LRU_print());
+		ut_d(buf_validate());
+		ut_d(buf_LRU_validate());
+		ut_ad(0);
 	}
 
 	ut_ad(!block->page.in_zip_hash);
 	ut_ad(!block->page.in_page_hash);
 	ut_d(block->page.in_page_hash = TRUE);
 
-	HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold, &block->page);
+	block->page.id.copy_from(page_id);
+	block->page.size.copy_from(page_size);
 
-	if (zip_size) {
-		page_zip_set_size(&block->page.zip, zip_size);
+	HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
+		    page_id.fold(), &block->page);
+
+	if (page_size.is_compressed()) {
+		page_zip_set_size(&block->page.zip, page_size.physical());
 	}
 }
 
-/********************************************************************//**
-Function which inits a page for read to the buffer buf_pool. If the page is
+/** Inits a page for read to the buffer buf_pool. If the page is
 (1) already in buf_pool, or
 (2) if we specify to read only ibuf pages and the page is not an ibuf page, or
 (3) if the space is deleted or being deleted,
@@ -3843,31 +5243,27 @@ then this function does nothing.
 Sets the io_fix flag to BUF_IO_READ and sets a non-recursive exclusive lock
 on the buffer frame. The io-handler must take care that the flag is cleared
 and the lock released later.
-@return	pointer to the block or NULL */
-UNIV_INTERN
+@param[out]	err			DB_SUCCESS or DB_TABLESPACE_DELETED
+@param[in]	mode			BUF_READ_IBUF_PAGES_ONLY, ...
+@param[in]	page_id			page id
+@param[in]	unzip			TRUE=request uncompressed page
+@return pointer to the block or NULL */
 buf_page_t*
 buf_page_init_for_read(
-/*===================*/
-	dberr_t*	err,	/*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED */
-	ulint		mode,	/*!< in: BUF_READ_IBUF_PAGES_ONLY, ... */
-	ulint		space,	/*!< in: space id */
-	ulint		zip_size,/*!< in: compressed page size, or 0 */
-	ibool		unzip,	/*!< in: TRUE=request uncompressed page */
-	ib_int64_t	tablespace_version,
-				/*!< in: prevents reading from a wrong
-				version of the tablespace in case we have done
-				DISCARD + IMPORT */
-	ulint		offset)	/*!< in: page number */
+	dberr_t*		err,
+	ulint			mode,
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	ibool			unzip)
 {
 	buf_block_t*	block;
 	buf_page_t*	bpage	= NULL;
 	buf_page_t*	watch_page;
 	rw_lock_t*	hash_lock;
 	mtr_t		mtr;
-	ulint		fold;
 	ibool		lru	= FALSE;
 	void*		data;
-	buf_pool_t*	buf_pool = buf_pool_get(space, offset);
+	buf_pool_t*	buf_pool = buf_pool_get(page_id);
 
 	ut_ad(buf_pool);
 
@@ -3876,12 +5272,12 @@ buf_page_init_for_read(
 	if (mode == BUF_READ_IBUF_PAGES_ONLY) {
 		/* It is a read-ahead within an ibuf routine */
 
-		ut_ad(!ibuf_bitmap_page(zip_size, offset));
+		ut_ad(!ibuf_bitmap_page(page_id, page_size));
 
 		ibuf_mtr_start(&mtr);
 
-		if (!recv_no_ibuf_operations
-		    && !ibuf_page(space, zip_size, offset, &mtr)) {
+		if (!recv_no_ibuf_operations &&
+		    !ibuf_page(page_id, page_size, &mtr)) {
 
 			ibuf_mtr_commit(&mtr);
 
@@ -3891,7 +5287,7 @@ buf_page_init_for_read(
 		ut_ad(mode == BUF_READ_ANY_PAGE);
 	}
 
-	if (zip_size && !unzip && !recv_recovery_is_on()) {
+	if (page_size.is_compressed() && !unzip && !recv_recovery_is_on()) {
 		block = NULL;
 	} else {
 		block = buf_LRU_get_free_block(buf_pool);
@@ -3899,53 +5295,40 @@ buf_page_init_for_read(
 		ut_ad(buf_pool_from_block(block) == buf_pool);
 	}
 
-	fold = buf_page_address_fold(space, offset);
-	hash_lock = buf_page_hash_lock_get(buf_pool, fold);
-
 	buf_pool_mutex_enter(buf_pool);
+
+	hash_lock = buf_page_hash_lock_get(buf_pool, page_id);
 	rw_lock_x_lock(hash_lock);
 
-	watch_page = buf_page_hash_get_low(buf_pool, space, offset, fold);
+	watch_page = buf_page_hash_get_low(buf_pool, page_id);
 	if (watch_page && !buf_pool_watch_is_sentinel(buf_pool, watch_page)) {
 		/* The page is already in the buffer pool. */
 		watch_page = NULL;
-err_exit:
 		rw_lock_x_unlock(hash_lock);
 		if (block) {
-			mutex_enter(&block->mutex);
+			buf_page_mutex_enter(block);
 			buf_LRU_block_free_non_file_page(block);
-			mutex_exit(&block->mutex);
+			buf_page_mutex_exit(block);
 		}
 
 		bpage = NULL;
 		goto func_exit;
 	}
 
-	if (fil_tablespace_deleted_or_being_deleted_in_mem(
-		    space, tablespace_version)) {
-		/* The page belongs to a space which has been
-		deleted or is being deleted. */
-		*err = DB_TABLESPACE_DELETED;
-
-		goto err_exit;
-	}
-
 	if (block) {
 		bpage = &block->page;
 
-		mutex_enter(&block->mutex);
+		buf_page_mutex_enter(block);
 
 		ut_ad(buf_pool_from_bpage(bpage) == buf_pool);
 
-		buf_page_init(buf_pool, space, offset, fold, zip_size, block);
+		buf_page_init(buf_pool, page_id, page_size, block);
 
-#ifdef PAGE_ATOMIC_REF_COUNT
-		/* Note: We set the io state without the protection of
-		the block->lock. This is because other threads cannot
-		access this block unless it is in the hash table. */
+		/* Note: We are using the hash_lock for protection. This is
+		safe because no other thread can lookup the block from the
+		page hashtable yet. */
 
 		buf_page_set_io_fix(bpage, BUF_IO_READ);
-#endif /* PAGE_ATOMIC_REF_COUNT */
 
 		rw_lock_x_unlock(hash_lock);
 
@@ -3963,11 +5346,7 @@ err_exit:
 
 		rw_lock_x_lock_gen(&block->lock, BUF_IO_READ);
 
-#ifndef PAGE_ATOMIC_REF_COUNT
-		buf_page_set_io_fix(bpage, BUF_IO_READ);
-#endif /* !PAGE_ATOMIC_REF_COUNT */
-
-		if (zip_size) {
+		if (page_size.is_compressed()) {
 			/* buf_pool->mutex may be released and
 			reacquired by buf_buddy_alloc().  Thus, we
 			must release block->mutex in order not to
@@ -3976,9 +5355,10 @@ err_exit:
 			operation until after the block descriptor has
 			been added to buf_pool->LRU and
 			buf_pool->page_hash. */
-			mutex_exit(&block->mutex);
-			data = buf_buddy_alloc(buf_pool, zip_size, &lru);
-			mutex_enter(&block->mutex);
+			buf_page_mutex_exit(block);
+			data = buf_buddy_alloc(buf_pool, page_size.physical(),
+					       &lru);
+			buf_page_mutex_enter(block);
 			block->page.zip.data = (page_zip_t*) data;
 
 			/* To maintain the invariant
@@ -3990,7 +5370,7 @@ err_exit:
 			buf_unzip_LRU_add_block(block, TRUE);
 		}
 
-		mutex_exit(&block->mutex);
+		buf_page_mutex_exit(block);
 	} else {
 		rw_lock_x_unlock(hash_lock);
 
@@ -3998,7 +5378,7 @@ err_exit:
 		control block (bpage), in order to avoid the
 		invocation of buf_buddy_relocate_block() on
 		uninitialized data. */
-		data = buf_buddy_alloc(buf_pool, zip_size, &lru);
+		data = buf_buddy_alloc(buf_pool, page_size.physical(), &lru);
 
 		rw_lock_x_lock(hash_lock);
 
@@ -4007,8 +5387,7 @@ err_exit:
 		check the page_hash again, as it may have been modified. */
 		if (UNIV_UNLIKELY(lru)) {
 
-			watch_page = buf_page_hash_get_low(
-				buf_pool, space, offset, fold);
+			watch_page = buf_page_hash_get_low(buf_pool, page_id);
 
 			if (UNIV_UNLIKELY(watch_page
 			    && !buf_pool_watch_is_sentinel(buf_pool,
@@ -4017,7 +5396,8 @@ err_exit:
 				/* The block was added by some other thread. */
 				rw_lock_x_unlock(hash_lock);
 				watch_page = NULL;
-				buf_buddy_free(buf_pool, data, zip_size);
+				buf_buddy_free(buf_pool, data,
+					       page_size.physical());
 
 				bpage = NULL;
 				goto func_exit;
@@ -4030,28 +5410,25 @@ err_exit:
 		bpage->buf_pool_index = buf_pool_index(buf_pool);
 
 		page_zip_des_init(&bpage->zip);
-		page_zip_set_size(&bpage->zip, zip_size);
+		page_zip_set_size(&bpage->zip, page_size.physical());
 		bpage->zip.data = (page_zip_t*) data;
 
-		bpage->slot = NULL;
+		bpage->size.copy_from(page_size);
 
 		mutex_enter(&buf_pool->zip_mutex);
-		UNIV_MEM_DESC(bpage->zip.data,
-			      page_zip_get_size(&bpage->zip));
+		UNIV_MEM_DESC(bpage->zip.data, bpage->size.physical());
 
 		buf_page_init_low(bpage);
 
-		bpage->state	= BUF_BLOCK_ZIP_PAGE;
-		bpage->space	= static_cast<ib_uint32_t>(space);
-		bpage->offset	= static_cast<ib_uint32_t>(offset);
+		bpage->state = BUF_BLOCK_ZIP_PAGE;
+		bpage->id.copy_from(page_id);
+		bpage->flush_observer = NULL;
 
-#ifdef UNIV_DEBUG
-		bpage->in_page_hash = FALSE;
-		bpage->in_zip_hash = FALSE;
-		bpage->in_flush_list = FALSE;
-		bpage->in_free_list = FALSE;
-		bpage->in_LRU_list = FALSE;
-#endif /* UNIV_DEBUG */
+		ut_d(bpage->in_page_hash = FALSE);
+		ut_d(bpage->in_zip_hash = FALSE);
+		ut_d(bpage->in_flush_list = FALSE);
+		ut_d(bpage->in_free_list = FALSE);
+		ut_d(bpage->in_LRU_list = FALSE);
 
 		ut_d(bpage->in_page_hash = TRUE);
 
@@ -4064,24 +5441,20 @@ err_exit:
 
 			ut_a(buf_fix_count > 0);
 
-#ifdef PAGE_ATOMIC_REF_COUNT
 			os_atomic_increment_uint32(
 				&bpage->buf_fix_count, buf_fix_count);
-#else
-			bpage->buf_fix_count += buf_fix_count;
-#endif /* PAGE_ATOMIC_REF_COUNT */
 
 			ut_ad(buf_pool_watch_is_sentinel(buf_pool, watch_page));
-			buf_pool_watch_remove(buf_pool, fold, watch_page);
+			buf_pool_watch_remove(buf_pool, watch_page);
 		}
 
-		HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold,
-			    bpage);
+		HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
+			    bpage->id.fold(), bpage);
 
 		rw_lock_x_unlock(hash_lock);
 
 		/* The block must be put to the LRU list, to the old blocks.
-		The zip_size is already set into the page zip */
+		The zip size is already set into the page zip */
 		buf_LRU_add_block(bpage, TRUE/* to old blocks */);
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 		buf_LRU_insert_zip_clean(bpage);
@@ -4101,63 +5474,54 @@ func_exit:
 		ibuf_mtr_commit(&mtr);
 	}
 
-
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX));
-	ut_ad(!rw_lock_own(hash_lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
-
+	ut_ad(!rw_lock_own(hash_lock, RW_LOCK_X));
+	ut_ad(!rw_lock_own(hash_lock, RW_LOCK_S));
 	ut_ad(!bpage || buf_page_in_file(bpage));
+
 	return(bpage);
 }
 
-/********************************************************************//**
-Initializes a page to the buffer buf_pool. The page is usually not read
+/** Initializes a page to the buffer buf_pool. The page is usually not read
 from a file even if it cannot be found in the buffer buf_pool. This is one
 of the functions which perform to a block a state transition NOT_USED =>
 FILE_PAGE (the other is buf_page_get_gen).
-@return	pointer to the block, page bufferfixed */
-UNIV_INTERN
+@param[in]	page_id		page id
+@param[in]	page_size	page size
+@param[in]	mtr		mini-transaction
+@return pointer to the block, page bufferfixed */
 buf_block_t*
 buf_page_create(
-/*============*/
-	ulint	space,	/*!< in: space id */
-	ulint	offset,	/*!< in: offset of the page within space in units of
-			a page */
-	ulint	zip_size,/*!< in: compressed page size, or 0 */
-	mtr_t*	mtr)	/*!< in: mini-transaction handle */
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	mtr_t*			mtr)
 {
 	buf_frame_t*	frame;
 	buf_block_t*	block;
-	ulint		fold;
 	buf_block_t*	free_block	= NULL;
-	buf_pool_t*	buf_pool	= buf_pool_get(space, offset);
+	buf_pool_t*	buf_pool = buf_pool_get(page_id);
 	rw_lock_t*	hash_lock;
 
-	ut_ad(mtr);
-	ut_ad(mtr->state == MTR_ACTIVE);
-	ut_ad(space || !zip_size);
+	ut_ad(mtr->is_active());
+	ut_ad(page_id.space() != 0 || !page_size.is_compressed());
 
 	free_block = buf_LRU_get_free_block(buf_pool);
 
-	fold = buf_page_address_fold(space, offset);
-	hash_lock = buf_page_hash_lock_get(buf_pool, fold);
-
 	buf_pool_mutex_enter(buf_pool);
+
+	hash_lock = buf_page_hash_lock_get(buf_pool, page_id);
 	rw_lock_x_lock(hash_lock);
 
-	block = (buf_block_t*) buf_page_hash_get_low(
-		buf_pool, space, offset, fold);
+	block = (buf_block_t*) buf_page_hash_get_low(buf_pool, page_id);
 
 	if (block
 	    && buf_page_in_file(&block->page)
 	    && !buf_pool_watch_is_sentinel(buf_pool, &block->page)) {
+
 #ifdef UNIV_IBUF_COUNT_DEBUG
-		ut_a(ibuf_count_get(space, offset) == 0);
-#endif
-#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
-		block->page.file_page_was_freed = FALSE;
-#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
+		ut_a(ibuf_count_get(page_id) == 0);
+#endif /* UNIV_IBUF_COUNT_DEBUG */
+
+		ut_d(block->page.file_page_was_freed = FALSE);
 
 		/* Page can be found in buf_pool */
 		buf_pool_mutex_exit(buf_pool);
@@ -4165,23 +5529,19 @@ buf_page_create(
 
 		buf_block_free(free_block);
 
-		return(buf_page_get_with_no_latch(space, zip_size, offset, mtr));
+		return(buf_page_get_with_no_latch(page_id, page_size, mtr));
 	}
 
 	/* If we get here, the page was not in buf_pool: init it there */
 
-#ifdef UNIV_DEBUG
-	if (buf_debug_prints) {
-		fprintf(stderr, "Creating space %lu page %lu to buffer\n",
-			space, offset);
-	}
-#endif /* UNIV_DEBUG */
+	DBUG_PRINT("ib_buf", ("create page %u:%u",
+			      page_id.space(), page_id.page_no()));
 
 	block = free_block;
 
-	mutex_enter(&block->mutex);
+	buf_page_mutex_enter(block);
 
-	buf_page_init(buf_pool, space, offset, fold, zip_size, block);
+	buf_page_init(buf_pool, page_id, page_size, block);
 
 	rw_lock_x_unlock(hash_lock);
 
@@ -4191,7 +5551,7 @@ buf_page_create(
 	buf_block_buf_fix_inc(block, __FILE__, __LINE__);
 	buf_pool->stat.n_pages_created++;
 
-	if (zip_size) {
+	if (page_size.is_compressed()) {
 		void*	data;
 		ibool	lru;
 
@@ -4202,15 +5562,15 @@ buf_page_create(
 		buf_page_set_io_fix(&block->page, BUF_IO_READ);
 		rw_lock_x_lock(&block->lock);
 
-		mutex_exit(&block->mutex);
+		buf_page_mutex_exit(block);
 		/* buf_pool->mutex may be released and reacquired by
 		buf_buddy_alloc().  Thus, we must release block->mutex
 		in order not to break the latching order in
 		the reacquisition of buf_pool->mutex.  We also must
 		defer this operation until after the block descriptor
 		has been added to buf_pool->LRU and buf_pool->page_hash. */
-		data = buf_buddy_alloc(buf_pool, zip_size, &lru);
-		mutex_enter(&block->mutex);
+		data = buf_buddy_alloc(buf_pool, page_size.physical(), &lru);
+		buf_page_mutex_enter(block);
 		block->page.zip.data = (page_zip_t*) data;
 
 		/* To maintain the invariant
@@ -4231,12 +5591,11 @@ buf_page_create(
 
 	buf_page_set_accessed(&block->page);
 
-	mutex_exit(&block->mutex);
+	buf_page_mutex_exit(block);
 
 	/* Delete possible entries for the page from the insert buffer:
 	such can exist if the page belonged to an index which was dropped */
-
-	ibuf_merge_or_delete_for_page(NULL, space, offset, zip_size, TRUE);
+	ibuf_merge_or_delete_for_page(NULL, page_id, &page_size, TRUE);
 
 	frame = block->frame;
 
@@ -4244,11 +5603,15 @@ buf_page_create(
 	memset(frame + FIL_PAGE_NEXT, 0xff, 4);
 	mach_write_to_2(frame + FIL_PAGE_TYPE, FIL_PAGE_TYPE_ALLOCATED);
 
-	/* Reset to zero the file flush lsn field in the page; if the first
-	page of an ibdata file is 'created' in this function into the buffer
-	pool then we lose the original contents of the file flush lsn stamp.
-	Then InnoDB could in a crash recovery print a big, false, corruption
-	warning if the stamp contains an lsn bigger than the ib_logfile lsn. */
+	/* These 8 bytes are also repurposed for PageIO compression and must
+	be reset when the frame is assigned to a new page id. See fil0fil.h.
+
+
+	FIL_PAGE_FILE_FLUSH_LSN is used on the following pages:
+	(1) The first page of the InnoDB system tablespace (page 0:0)
+	(2) FIL_RTREE_SPLIT_SEQ_NUM on R-tree pages .
+
+	Therefore we don't transparently compress such pages. */
 
 	memset(frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, 0, 8);
 
@@ -4256,8 +5619,7 @@ buf_page_create(
 	ut_a(++buf_dbg_counter % 5771 || buf_validate());
 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 #ifdef UNIV_IBUF_COUNT_DEBUG
-	ut_a(ibuf_count_get(buf_block_get_space(block),
-			    buf_block_get_page_no(block)) == 0);
+	ut_a(ibuf_count_get(block->page.id) == 0);
 #endif
 	return(block);
 }
@@ -4291,6 +5653,7 @@ buf_page_monitor(
 		ulint	level;
 
 	case FIL_PAGE_INDEX:
+	case FIL_PAGE_RTREE:
 		level = btr_page_get_level_low(frame);
 
 		/* Check if it is an index page for insert buffer */
@@ -4315,49 +5678,49 @@ buf_page_monitor(
 		}
 		break;
 
-        case FIL_PAGE_UNDO_LOG:
+	case FIL_PAGE_UNDO_LOG:
 		counter = MONITOR_RW_COUNTER(io_type, MONITOR_UNDO_LOG_PAGE);
 		break;
 
-        case FIL_PAGE_INODE:
+	case FIL_PAGE_INODE:
 		counter = MONITOR_RW_COUNTER(io_type, MONITOR_INODE_PAGE);
 		break;
 
-        case FIL_PAGE_IBUF_FREE_LIST:
+	case FIL_PAGE_IBUF_FREE_LIST:
 		counter = MONITOR_RW_COUNTER(io_type,
 					     MONITOR_IBUF_FREELIST_PAGE);
 		break;
 
-        case FIL_PAGE_IBUF_BITMAP:
+	case FIL_PAGE_IBUF_BITMAP:
 		counter = MONITOR_RW_COUNTER(io_type,
 					     MONITOR_IBUF_BITMAP_PAGE);
 		break;
 
-        case FIL_PAGE_TYPE_SYS:
+	case FIL_PAGE_TYPE_SYS:
 		counter = MONITOR_RW_COUNTER(io_type, MONITOR_SYSTEM_PAGE);
 		break;
 
-        case FIL_PAGE_TYPE_TRX_SYS:
+	case FIL_PAGE_TYPE_TRX_SYS:
 		counter = MONITOR_RW_COUNTER(io_type, MONITOR_TRX_SYSTEM_PAGE);
 		break;
 
-        case FIL_PAGE_TYPE_FSP_HDR:
+	case FIL_PAGE_TYPE_FSP_HDR:
 		counter = MONITOR_RW_COUNTER(io_type, MONITOR_FSP_HDR_PAGE);
 		break;
 
-        case FIL_PAGE_TYPE_XDES:
+	case FIL_PAGE_TYPE_XDES:
 		counter = MONITOR_RW_COUNTER(io_type, MONITOR_XDES_PAGE);
 		break;
 
-        case FIL_PAGE_TYPE_BLOB:
+	case FIL_PAGE_TYPE_BLOB:
 		counter = MONITOR_RW_COUNTER(io_type, MONITOR_BLOB_PAGE);
 		break;
 
-        case FIL_PAGE_TYPE_ZBLOB:
+	case FIL_PAGE_TYPE_ZBLOB:
 		counter = MONITOR_RW_COUNTER(io_type, MONITOR_ZBLOB_PAGE);
 		break;
 
-        case FIL_PAGE_TYPE_ZBLOB2:
+	case FIL_PAGE_TYPE_ZBLOB2:
 		counter = MONITOR_RW_COUNTER(io_type, MONITOR_ZBLOB2_PAGE);
 		break;
 
@@ -4369,7 +5732,7 @@ buf_page_monitor(
 }
 
 /********************************************************************//**
-Mark a table with the specified space pointed by bpage->space corrupted.
+Mark a table with the specified space pointed by bpage->id.space() corrupted.
 Also remove the bpage from LRU list.
 @return TRUE if successful */
 static
@@ -4381,7 +5744,7 @@ buf_mark_space_corrupt(
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
 	const ibool	uncompressed = (buf_page_get_state(bpage)
 					== BUF_BLOCK_FILE_PAGE);
-	ulint		space = bpage->space;
+	ib_uint32_t	space = bpage->id.space();
 	ibool		ret = TRUE;
 
 	if (!bpage->encrypted) {
@@ -4433,8 +5796,7 @@ buf_page_check_corrupt(
 /*===================*/
 	buf_page_t*	bpage)	/*!< in/out: buffer page read from disk */
 {
-	ulint zip_size = buf_page_get_zip_size(bpage);
-	byte* dst_frame = (zip_size) ? bpage->zip.data :
+	byte* dst_frame = (bpage->zip.data) ? bpage->zip.data :
 		((buf_block_t*) bpage)->frame;
 	unsigned key_version = bpage->key_version;
 	bool page_compressed = bpage->page_encrypted;
@@ -4462,39 +5824,43 @@ buf_page_check_corrupt(
 		corrupted = (!page_compressed_encrypted && stored_checksum != calculated_checksum);
 
 		if (corrupted) {
-			ib_logf(IB_LOG_LEVEL_ERROR,
-				"%s: Block in space_id %lu in file %s corrupted.",
-				page_compressed_encrypted ? "Maybe corruption" : "Corruption",
-				space_id, space ? space->name : "NULL");
-			ib_logf(IB_LOG_LEVEL_ERROR,
-				"Page based on contents %s encrypted.",
-				(key_version == 0 && page_compressed_encrypted == false) ? "not" : "maybe");
-			if (stored_checksum != BUF_NO_CHECKSUM_MAGIC || calculated_checksum != BUF_NO_CHECKSUM_MAGIC) {
-				ib_logf(IB_LOG_LEVEL_ERROR,
-					"Page stored checksum %lu but calculated checksum %lu.",
-					stored_checksum, calculated_checksum);
+			ib::error() << (page_compressed_encrypted ? "Maybe corruption" : "Corruption")
+				    <<  ": Block in space_id " << space_id
+				    << " in file " << (space ? space->name : "NULL")
+				    << " corrupted.";
+
+			ib::error() <<  "Page based on contents "
+				    << ((key_version == 0 && page_compressed_encrypted == false) ? "not" : "maybe")
+				    << " encrypted.";
+
+			if (stored_checksum != BUF_NO_CHECKSUM_MAGIC ||
+			    calculated_checksum != BUF_NO_CHECKSUM_MAGIC) {
+				ib::error() << "Page stored checksum " << stored_checksum
+					    << " but calculated checksum "
+					    << calculated_checksum << " .";
 			}
-			ib_logf(IB_LOG_LEVEL_ERROR,
-				"Reason could be that key_version %u in page "
-				"or in crypt_data %p could not be found.",
-				key_version, crypt_data);
-			ib_logf(IB_LOG_LEVEL_ERROR,
-				"Reason could be also that key management plugin is not found or"
-				" used encryption algorithm or method does not match.");
-			ib_logf(IB_LOG_LEVEL_ERROR,
-				"Based on page page compressed %d, compressed and encrypted %d.",
-				page_compressed, page_compressed_encrypted);
+
+			ib::error() << "Reason could be that key_version " << key_version
+				<< " in page or in crypt_data " << crypt_data
+				<< " could not be found.";
+			ib::error() << "Reason could be also that key management plugin is not found or"
+				" used encryption algorithm or method does not match.";
+			ib::error() << "Based on page page compressed"
+				    << page_compressed
+				    << ", compressed and encrypted "
+				    << page_compressed_encrypted << " .";
 		} else {
-			ib_logf(IB_LOG_LEVEL_ERROR,
-				"Block in space_id %lu in file %s encrypted.",
-				space_id, space ? space->name : "NULL");
-			ib_logf(IB_LOG_LEVEL_ERROR,
-				"However key management plugin or used key_id %u is not found or"
-				" used encryption algorithm or method does not match.",
-				key_version);
-			ib_logf(IB_LOG_LEVEL_ERROR,
-				"Marking tablespace as missing. You may drop this table or"
-				" install correct key management plugin and key file.");
+			ib::error() << "Block in space_id "
+				    << space_id
+				    << " in file "
+				    << (space ? space->name : "NULL")
+				    << " encrypted.";
+			ib::error() << "However key management plugin or used key_id "
+				    << key_version
+				    << " is not found or"
+				    << " used encryption algorithm or method does not match.";
+			ib::error() << "Marking tablespace as missing. You may drop this table or"
+				    << " install correct key management plugin and key file.";
 		}
 	}
 
@@ -4505,7 +5871,6 @@ buf_page_check_corrupt(
 Completes an asynchronous read or write request of a file page to or from
 the buffer pool.
 @return true if successful */
-UNIV_INTERN
 bool
 buf_page_io_complete(
 /*=================*/
@@ -4518,8 +5883,6 @@ buf_page_io_complete(
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
 	const ibool	uncompressed = (buf_page_get_state(bpage)
 					== BUF_BLOCK_FILE_PAGE);
-	fil_space_t*	space = NULL;
-
 	ut_a(buf_page_in_file(bpage));
 
 	/* We do not need protect io_fix here by mutex to read
@@ -4535,10 +5898,13 @@ buf_page_io_complete(
 		ulint	read_page_no;
 		ulint	read_space_id;
 		byte*	frame;
+		bool	compressed_page=false;
+
+		ut_ad(bpage->zip.data != NULL || ((buf_block_t*)bpage)->frame != NULL);
 
 		if (!buf_page_decrypt_after_read(bpage)) {
 			/* encryption error! */
-			if (buf_page_get_zip_size(bpage)) {
+			if (bpage->size.is_compressed()) {
 				frame = bpage->zip.data;
 			} else {
 				frame = ((buf_block_t*) bpage)->frame;
@@ -4546,14 +5912,16 @@ buf_page_io_complete(
 			goto corrupt;
 		}
 
-		if (buf_page_get_zip_size(bpage)) {
+		if (bpage->size.is_compressed()) {
 			frame = bpage->zip.data;
 			buf_pool->n_pend_unzip++;
+
 			if (uncompressed
 			    && !buf_zip_decompress((buf_block_t*) bpage,
 						   FALSE)) {
 
 				buf_pool->n_pend_unzip--;
+				compressed_page = false;
 				goto corrupt;
 			}
 			buf_pool->n_pend_unzip--;
@@ -4569,50 +5937,76 @@ buf_page_io_complete(
 		read_space_id = mach_read_from_4(
 			frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
 
-		if (bpage->space == TRX_SYS_SPACE
-		    && buf_dblwr_page_inside(bpage->offset)) {
+		if (bpage->id.space() == TRX_SYS_SPACE
+		    && buf_dblwr_page_inside(bpage->id.page_no())) {
 
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				"  InnoDB: Error: reading page %u\n"
-				"InnoDB: which is in the"
-				" doublewrite buffer!\n",
-				bpage->offset);
-		} else if (!read_space_id && !read_page_no) {
+			ib::error() << "Reading page " << bpage->id
+				<< ", which is in the doublewrite buffer!";
+
+		} else if (read_space_id == 0 && read_page_no == 0) {
 			/* This is likely an uninitialized page. */
-		} else if ((bpage->space
-			    && bpage->space != read_space_id)
-			   || bpage->offset != read_page_no) {
+		} else if ((bpage->id.space() != 0
+			    && bpage->id.space() != read_space_id)
+			   || bpage->id.page_no() != read_page_no) {
 			/* We did not compare space_id to read_space_id
 			if bpage->space == 0, because the field on the
 			page may contain garbage in MySQL < 4.1.1,
 			which only supported bpage->space == 0. */
 
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				"  InnoDB: Error: space id and page n:o"
-				" stored in the page\n"
-				"InnoDB: read in are %lu:%lu,"
-				" should be %u:%u!\n",
-				read_space_id,
-				read_page_no,
-				bpage->space,
-				bpage->offset);
+			ib::error() << "Space id and page no stored in "
+				"the page, read in are "
+				<< page_id_t(read_space_id, read_page_no)
+				<< ", should be " << bpage->id;
 		}
 
+#ifdef MYSQL_COMPRESSION
+		compressed_page = Compression::is_compressed_page(frame);
+
+		/* If the decompress failed then the most likely case is
+		that we are reading in a page for which this instance doesn't
+		support the compression algorithm. */
+		if (compressed_page) {
+
+			Compression::meta_t	meta;
+
+			Compression::deserialize_header(frame, &meta);
+
+			ib::error()
+				<< "Page " << bpage->id << " "
+				<< "compressed with "
+				<< Compression::to_string(meta) << " "
+				<< "that is not supported by this instance";
+		}
+#endif /* MYSQL_COMPRESSION */
+
 		/* From version 3.23.38 up we store the page checksum
 		to the 4 first bytes of the page end lsn field */
-
-		if (buf_page_is_corrupted(true, frame,
-					  buf_page_get_zip_size(bpage))) {
+		if (compressed_page
+		    || buf_page_is_corrupted(
+			    true, frame, bpage->size,
+			    fsp_is_checksum_disabled(bpage->id.space()))) {
 
 			/* Not a real corruption if it was triggered by
 			error injection */
 			DBUG_EXECUTE_IF("buf_page_is_corrupt_failure",
 				if (bpage->space > TRX_SYS_SPACE
 				    && buf_mark_space_corrupt(bpage)) {
-					ib_logf(IB_LOG_LEVEL_INFO,
-						"Simulated page corruption");
+					ib::info() <<
+						"Simulated page corruption";
+					return(true);
+				}
+				goto page_not_corrupt_1;
+				;);
+			/* Not a real corruption if it was triggered by
+			error injection */
+			DBUG_EXECUTE_IF(
+				"buf_page_import_corrupt_failure",
+				if (bpage->id.space() > TRX_SYS_SPACE
+				    && !Tablespace::is_undo_tablespace(
+					    bpage->id.space())
+				    && buf_mark_space_corrupt(bpage)) {
+					ib::info() << "Simulated IMPORT "
+						"corruption";
 					return(true);
 				}
 				goto page_not_corrupt;
@@ -4620,60 +6014,52 @@ buf_page_io_complete(
 corrupt:
 			bool corrupted = buf_page_check_corrupt(bpage);
 
-			if (corrupted) {
-				fil_system_enter();
-				space = fil_space_get_by_id(bpage->space);
-				fil_system_exit();
-				ib_logf(IB_LOG_LEVEL_ERROR,
-					"Database page corruption on disk"
-					" or a failed");
-				ib_logf(IB_LOG_LEVEL_ERROR,
-					"Space %lu file %s read of page %u.",
-					(ulint)bpage->space,
-					space ? space->name : "NULL",
-					bpage->offset);
-				ib_logf(IB_LOG_LEVEL_ERROR,
-					"You may have to recover"
-					" from a backup.");
+			/* Compressed and encrypted pages are basically gibberish avoid
+			printing the contents. */
+			if (corrupted && !compressed_page) {
 
+				ib::error()
+					<< "Database page corruption on disk"
+					" or a failed file read of page "
+					<< bpage->id
+					<< ". You may have to recover from "
+					<< "a backup.";
 
-				buf_page_print(frame, buf_page_get_zip_size(bpage),
-					BUF_PAGE_PRINT_NO_CRASH);
-
-				ib_logf(IB_LOG_LEVEL_ERROR,
-					"It is also possible that your operating"
-					"system has corrupted its own file cache.");
-				ib_logf(IB_LOG_LEVEL_ERROR,
-					"and rebooting your computer removes the error.");
-				ib_logf(IB_LOG_LEVEL_ERROR,
-					"If the corrupt page is an index page you can also try to");
-				ib_logf(IB_LOG_LEVEL_ERROR,
-					"fix the corruption by dumping, dropping, and reimporting");
-				ib_logf(IB_LOG_LEVEL_ERROR,
-					"the corrupt table. You can use CHECK");
-				ib_logf(IB_LOG_LEVEL_ERROR,
-					"TABLE to scan your table for corruption.");
-				ib_logf(IB_LOG_LEVEL_ERROR,
-					"See also "
-					REFMAN "forcing-innodb-recovery.html"
-					" about forcing recovery.");
+				ib::info()
+					<< "It is also possible that your"
+					" operating system has corrupted"
+					" its own file cache and rebooting"
+					" your computer removes the error."
+					" If the corrupt page is an index page."
+					" You can also try to fix the"
+					" corruption by dumping, dropping,"
+					" and reimporting the corrupt table."
+					" You can use CHECK TABLE to scan"
+					" your table for corruption. "
+					<< FORCE_RECOVERY_MSG;
 			}
 
 			if (srv_force_recovery < SRV_FORCE_IGNORE_CORRUPT) {
+
 				/* If page space id is larger than TRX_SYS_SPACE
 				(0), we will attempt to mark the corresponding
 				table as corrupted instead of crashing server */
-				if (bpage->space > TRX_SYS_SPACE
+
+				if (bpage->id.space() > TRX_SYS_SPACE
 				    && buf_mark_space_corrupt(bpage)) {
+
 					return(false);
 				} else {
 					corrupted = buf_page_check_corrupt(bpage);
 
 					if (corrupted) {
-						ib_logf(IB_LOG_LEVEL_ERROR,
-							"Ending processing because of a corrupt database page.");
-
-						ut_error;
+						ib::fatal()
+							<< "Aborting because of a"
+							" corrupt database page in"
+							" the system tablespace. Or, "
+							" there was a failure in"
+							" tagging the tablespace "
+							" as corrupt.";
 					}
 
 					ib_push_warning((void *)NULL, DB_DECRYPTION_FAILED,
@@ -4683,29 +6069,39 @@ corrupt:
 						" Can't continue opening the table.",
 						(ulint)bpage->space, bpage->key_version);
 
-					if (bpage->space > TRX_SYS_SPACE) {
-						if (corrupted) {
-							buf_mark_space_corrupt(bpage);
-						}
-					} else {
-						ut_error;
-					}
-					return(false);
+					buf_page_print(frame, bpage->size, BUF_PAGE_PRINT_NO_CRASH);
+
+					return (false);
 				}
 			}
 		}
 
-		DBUG_EXECUTE_IF("buf_page_is_corrupt_failure",
+		DBUG_EXECUTE_IF("buf_page_import_corrupt_failure",
 				page_not_corrupt:  bpage = bpage; );
 
+		DBUG_EXECUTE_IF("buf_page_is_corrupt_failure",
+				page_not_corrupt_1:  bpage = bpage; );
+
 		if (recv_recovery_is_on()) {
 			/* Pages must be uncompressed for crash recovery. */
 			ut_a(uncompressed);
 			recv_recover_page(TRUE, (buf_block_t*) bpage);
 		}
 
-		if (uncompressed && !recv_no_ibuf_operations) {
-			if (bpage && bpage->encrypted) {
+		/* If space is being truncated then avoid ibuf operation.
+		During re-init we have already freed ibuf entries. */
+		if (uncompressed
+#ifdef MYSQL_COMPRESSION
+		    && !Compression::is_compressed_page(frame)
+#endif /* MYSQL_COMPRESSION */
+		    && !recv_no_ibuf_operations
+		    && !Tablespace::is_undo_tablespace(bpage->id.space())
+		    && bpage->id.space() != srv_tmp_space.space_id()
+		    && !srv_is_tablespace_truncated(bpage->id.space())
+		    && fil_page_get_type(frame) == FIL_PAGE_INDEX
+		    && page_is_leaf(frame)) {
+
+		    	if (bpage && bpage->encrypted) {
 				fprintf(stderr,
 					"InnoDB: Warning: Table in tablespace %lu encrypted."
 					"However key management plugin or used key_id %u is not found or"
@@ -4714,9 +6110,8 @@ corrupt:
 					(ulint)bpage->space, bpage->key_version);
 			} else {
 				ibuf_merge_or_delete_for_page(
-					(buf_block_t*) bpage, bpage->space,
-					bpage->offset, buf_page_get_zip_size(bpage),
-					TRUE);
+					(buf_block_t*) bpage, bpage->id,
+					&bpage->size, TRUE);
 			}
 		}
 	} else {
@@ -4736,7 +6131,7 @@ corrupt:
 		/* For BUF_IO_READ of compressed-only blocks, the
 		buffered operations will be merged by buf_page_get_gen()
 		after the block has been uncompressed. */
-		ut_a(ibuf_count_get(bpage->space, bpage->offset) == 0);
+		ut_a(ibuf_count_get(bpage->id) == 0);
 	}
 #endif
 	/* Because this thread which does the unlocking is not the same that
@@ -4773,44 +6168,39 @@ corrupt:
 		buf_flush_write_complete(bpage);
 
 		if (uncompressed) {
-			rw_lock_s_unlock_gen(&((buf_block_t*) bpage)->lock,
-					     BUF_IO_WRITE);
+			rw_lock_sx_unlock_gen(&((buf_block_t*) bpage)->lock,
+					      BUF_IO_WRITE);
 		}
 
 		buf_pool->stat.n_pages_written++;
 
-		/* In case of flush batches i.e.: BUF_FLUSH_LIST and
-		BUF_FLUSH_LRU this function is always called from IO
-		helper thread. In this case, we decide whether or not
-		to evict the page based on flush type. The value
-		passed as evict is the default value in function
-		definition which is false.
-		We always evict in case of LRU batch and never evict
-		in case of flush list batch. For single page flush
-		the caller sets the appropriate value. */
+		/* We decide whether or not to evict the page from the
+		LRU list based on the flush_type.
+		* BUF_FLUSH_LIST: don't evict
+		* BUF_FLUSH_LRU: always evict
+		* BUF_FLUSH_SINGLE_PAGE: eviction preference is passed
+		by the caller explicitly. */
 		if (buf_page_get_flush_type(bpage) == BUF_FLUSH_LRU) {
 			evict = true;
 		}
 
-		mutex_exit(buf_page_get_mutex(bpage));
 		if (evict) {
+			mutex_exit(buf_page_get_mutex(bpage));
 			buf_LRU_free_page(bpage, true);
+		} else {
+			mutex_exit(buf_page_get_mutex(bpage));
 		}
 
+
 		break;
 
 	default:
 		ut_error;
 	}
 
-#ifdef UNIV_DEBUG
-	if (buf_debug_prints) {
-		fprintf(stderr, "Has %s page space %lu page no %lu\n",
-			io_type == BUF_IO_READ ? "read" : "written",
-			buf_page_get_space(bpage),
-			buf_page_get_page_no(bpage));
-	}
-#endif /* UNIV_DEBUG */
+	DBUG_PRINT("ib_buf", ("%s page %u:%u",
+			      io_type == BUF_IO_READ ? "read" : "wrote",
+			      bpage->id.space(), bpage->id.page_no()));
 
 	buf_pool_mutex_exit(buf_pool);
 
@@ -4819,7 +6209,7 @@ corrupt:
 
 /*********************************************************************//**
 Asserts that all file pages in the buffer are in a replaceable state.
-@return	TRUE */
+@return TRUE */
 static
 ibool
 buf_all_freed_instance(
@@ -4840,22 +6230,9 @@ buf_all_freed_instance(
 		const buf_block_t* block = buf_chunk_not_freed(chunk);
 
 		if (UNIV_LIKELY_NULL(block)) {
-				if (block->page.key_version == 0) {
-				fil_space_t* space = fil_space_get(block->page.space);
-				ib_logf(IB_LOG_LEVEL_ERROR,
-					"Page %u %u still fixed or dirty.",
-					block->page.space,
-					block->page.offset);
-				ib_logf(IB_LOG_LEVEL_ERROR,
-					"Page oldest_modification %lu fix_count %d io_fix %d.",
-					block->page.oldest_modification,
-					block->page.buf_fix_count,
-					buf_page_get_io_fix(&block->page));
-				ib_logf(IB_LOG_LEVEL_ERROR,
-					"Page space_id %u name %s.",
-					block->page.space,
-					(space && space->name) ? space->name : "NULL");
-				ut_error;
+			if (block->page.key_version == 0) {
+				ib::fatal() << "Page " << block->page.id
+					<< " still fixed or dirty";
 			}
 		}
 	}
@@ -4905,7 +6282,7 @@ buf_pool_invalidate_instance(
 
 	buf_pool_mutex_enter(buf_pool);
 
-	while (buf_LRU_scan_and_free_block(buf_pool, TRUE)) {
+	while (buf_LRU_scan_and_free_block(buf_pool, true)) {
 	}
 
 	ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
@@ -4925,7 +6302,6 @@ buf_pool_invalidate_instance(
 Invalidates the file pages in the buffer pool when an archive recovery is
 completed. All the file pages buffered must be in a replaceable state when
 this function is called: not latched and not modified. */
-UNIV_INTERN
 void
 buf_pool_invalidate(void)
 /*=====================*/
@@ -4940,7 +6316,7 @@ buf_pool_invalidate(void)
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 /*********************************************************************//**
 Validates data in one buffer pool instance
-@return	TRUE */
+@return TRUE */
 static
 ibool
 buf_pool_validate_instance(
@@ -4957,9 +6333,6 @@ buf_pool_validate_instance(
 	ulint		n_flush		= 0;
 	ulint		n_free		= 0;
 	ulint		n_zip		= 0;
-	ulint		fold		= 0;
-	ulint		space		= 0;
-	ulint		offset		= 0;
 
 	ut_ad(buf_pool);
 
@@ -4977,7 +6350,7 @@ buf_pool_validate_instance(
 
 		for (j = chunk->size; j--; block++) {
 
-			mutex_enter(&block->mutex);
+			buf_page_mutex_enter(block);
 
 			switch (buf_block_get_state(block)) {
 			case BUF_BLOCK_POOL_WATCH:
@@ -4989,22 +6362,14 @@ buf_pool_validate_instance(
 				break;
 
 			case BUF_BLOCK_FILE_PAGE:
-				space = buf_block_get_space(block);
-				offset = buf_block_get_page_no(block);
-				fold = buf_page_address_fold(space, offset);
-				ut_a(buf_page_hash_get_low(buf_pool,
-							   space,
-							   offset,
-							   fold)
+				ut_a(buf_page_hash_get_low(
+						buf_pool, block->page.id)
 				     == &block->page);
 
 #ifdef UNIV_IBUF_COUNT_DEBUG
 				ut_a(buf_page_get_io_fix(&block->page)
 				     == BUF_IO_READ
-				     || !ibuf_count_get(buf_block_get_space(
-								block),
-							buf_block_get_page_no(
-								block)));
+				     || !ibuf_count_get(block->page.id));
 #endif
 				switch (buf_page_get_io_fix(&block->page)) {
 				case BUF_IO_NONE:
@@ -5021,7 +6386,10 @@ buf_pool_validate_instance(
 assert_s_latched:
 						ut_a(rw_lock_is_locked(
 							     &block->lock,
-								     RW_LOCK_SHARED));
+								     RW_LOCK_S)
+						     || rw_lock_is_locked(
+								&block->lock,
+								RW_LOCK_SX));
 						break;
 					case BUF_FLUSH_LIST:
 						n_list_flush++;
@@ -5035,7 +6403,7 @@ assert_s_latched:
 				case BUF_IO_READ:
 
 					ut_a(rw_lock_is_locked(&block->lock,
-							       RW_LOCK_EX));
+							       RW_LOCK_X));
 					break;
 
 				case BUF_IO_PIN:
@@ -5056,7 +6424,7 @@ assert_s_latched:
 				break;
 			}
 
-			mutex_exit(&block->mutex);
+			buf_page_mutex_exit(block);
 		}
 	}
 
@@ -5087,9 +6455,7 @@ assert_s_latched:
 		we have acquired buf_pool->zip_mutex above which acts
 		as the 'block->mutex' for these bpages. */
 		ut_a(!b->oldest_modification);
-		fold = buf_page_address_fold(b->space, b->offset);
-		ut_a(buf_page_hash_get_low(buf_pool, b->space, b->offset,
-					   fold) == b);
+		ut_a(buf_page_hash_get_low(buf_pool, b->id) == b);
 		n_lru++;
 		n_zip++;
 	}
@@ -5141,9 +6507,7 @@ assert_s_latched:
 			ut_error;
 			break;
 		}
-		fold = buf_page_address_fold(b->space, b->offset);
-		ut_a(buf_page_hash_get_low(buf_pool, b->space, b->offset,
-					   fold) == b);
+		ut_a(buf_page_hash_get_low(buf_pool, b->id) == b);
 	}
 
 	ut_a(UT_LIST_GET_LEN(buf_pool->flush_list) == n_flush);
@@ -5153,19 +6517,21 @@ assert_s_latched:
 
 	mutex_exit(&buf_pool->zip_mutex);
 
-	if (n_lru + n_free > buf_pool->curr_size + n_zip) {
-		fprintf(stderr, "n LRU %lu, n free %lu, pool %lu zip %lu\n",
-			n_lru, n_free,
-			buf_pool->curr_size, n_zip);
-		ut_error;
+	if (buf_pool->curr_size == buf_pool->old_size
+	    && n_lru + n_free > buf_pool->curr_size + n_zip) {
+
+		ib::fatal() << "n_LRU " << n_lru << ", n_free " << n_free
+			<< ", pool " << buf_pool->curr_size
+			<< " zip " << n_zip << ". Aborting...";
 	}
 
 	ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru);
-	if (UT_LIST_GET_LEN(buf_pool->free) != n_free) {
-		fprintf(stderr, "Free list len %lu, free blocks %lu\n",
-			UT_LIST_GET_LEN(buf_pool->free),
-			n_free);
-		ut_error;
+	if (buf_pool->curr_size == buf_pool->old_size
+	    && UT_LIST_GET_LEN(buf_pool->free) != n_free) {
+
+		ib::fatal() << "Free list len "
+			<< UT_LIST_GET_LEN(buf_pool->free)
+			<< ", free blocks " << n_free << ". Aborting...";
 	}
 
 	ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
@@ -5182,8 +6548,7 @@ assert_s_latched:
 
 /*********************************************************************//**
 Validates the buffer buf_pool data structure.
-@return	TRUE */
-UNIV_INTERN
+@return TRUE */
 ibool
 buf_validate(void)
 /*==============*/
@@ -5226,37 +6591,14 @@ buf_print_instance(
 	size = buf_pool->curr_size;
 
 	index_ids = static_cast<index_id_t*>(
-		mem_alloc(size * sizeof *index_ids));
+		ut_malloc_nokey(size * sizeof *index_ids));
 
-	counts = static_cast<ulint*>(mem_alloc(sizeof(ulint) * size));
+	counts = static_cast<ulint*>(ut_malloc_nokey(sizeof(ulint) * size));
 
 	buf_pool_mutex_enter(buf_pool);
 	buf_flush_list_mutex_enter(buf_pool);
 
-	fprintf(stderr,
-		"buf_pool size %lu\n"
-		"database pages %lu\n"
-		"free pages %lu\n"
-		"modified database pages %lu\n"
-		"n pending decompressions %lu\n"
-		"n pending reads %lu\n"
-		"n pending flush LRU %lu list %lu single page %lu\n"
-		"pages made young %lu, not young %lu\n"
-		"pages read %lu, created %lu, written %lu\n",
-		(ulint) size,
-		(ulint) UT_LIST_GET_LEN(buf_pool->LRU),
-		(ulint) UT_LIST_GET_LEN(buf_pool->free),
-		(ulint) UT_LIST_GET_LEN(buf_pool->flush_list),
-		(ulint) buf_pool->n_pend_unzip,
-		(ulint) buf_pool->n_pend_reads,
-		(ulint) buf_pool->n_flush[BUF_FLUSH_LRU],
-		(ulint) buf_pool->n_flush[BUF_FLUSH_LIST],
-		(ulint) buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE],
-		(ulint) buf_pool->stat.n_pages_made_young,
-		(ulint) buf_pool->stat.n_pages_not_made_young,
-		(ulint) buf_pool->stat.n_pages_read,
-		(ulint) buf_pool->stat.n_pages_created,
-		(ulint) buf_pool->stat.n_pages_written);
+	ib::info() << *buf_pool;
 
 	buf_flush_list_mutex_exit(buf_pool);
 
@@ -5273,7 +6615,7 @@ buf_print_instance(
 		for (; n_blocks--; block++) {
 			const buf_frame_t* frame = block->frame;
 
-			if (fil_page_get_type(frame) == FIL_PAGE_INDEX) {
+			if (fil_page_index_page_check(frame)) {
 
 				id = btr_page_get_index_id(frame);
 
@@ -5304,28 +6646,26 @@ buf_print_instance(
 	for (i = 0; i < n_found; i++) {
 		index = dict_index_get_if_in_cache(index_ids[i]);
 
-		fprintf(stderr,
-			"Block count for index %llu in buffer is about %lu",
-			(ullint) index_ids[i],
-			(ulint) counts[i]);
-
-		if (index) {
-			putc(' ', stderr);
-			dict_index_name_print(stderr, NULL, index);
+		if (!index) {
+			ib::info() << "Block count for index "
+				<< index_ids[i] << " in buffer is about "
+				<< counts[i];
+		} else {
+			ib::info() << "Block count for index " << index_ids[i]
+				<< " in buffer is about " << counts[i]
+				<< ", index " << index->name
+				<< " of table " << index->table->name;
 		}
-
-		putc('\n', stderr);
 	}
 
-	mem_free(index_ids);
-	mem_free(counts);
+	ut_free(index_ids);
+	ut_free(counts);
 
 	ut_a(buf_pool_validate_instance(buf_pool));
 }
 
 /*********************************************************************//**
 Prints info of the buffer buf_pool data structure. */
-UNIV_INTERN
 void
 buf_print(void)
 /*===========*/
@@ -5344,8 +6684,7 @@ buf_print(void)
 #ifdef UNIV_DEBUG
 /*********************************************************************//**
 Returns the number of latched pages in the buffer pool.
-@return	number of latched pages */
-UNIV_INTERN
+@return number of latched pages */
 ulint
 buf_get_latched_pages_number_instance(
 /*==================================*/
@@ -5373,7 +6712,7 @@ buf_get_latched_pages_number_instance(
 				continue;
 			}
 
-			mutex_enter(&block->mutex);
+			buf_page_mutex_enter(block);
 
 			if (block->page.buf_fix_count != 0
 			    || buf_page_get_io_fix(&block->page)
@@ -5381,7 +6720,7 @@ buf_get_latched_pages_number_instance(
 				fixed_pages_number++;
 			}
 
-			mutex_exit(&block->mutex);
+			buf_page_mutex_exit(block);
 		}
 	}
 
@@ -5435,8 +6774,7 @@ buf_get_latched_pages_number_instance(
 
 /*********************************************************************//**
 Returns the number of latched pages in all the buffer pools.
-@return	number of latched pages */
-UNIV_INTERN
+@return number of latched pages */
 ulint
 buf_get_latched_pages_number(void)
 /*==============================*/
@@ -5460,16 +6798,14 @@ buf_get_latched_pages_number(void)
 
 /*********************************************************************//**
 Returns the number of pending buf pool read ios.
-@return	number of pending read I/O operations */
-UNIV_INTERN
+@return number of pending read I/O operations */
 ulint
 buf_get_n_pending_read_ios(void)
 /*============================*/
 {
-	ulint	i;
 	ulint	pend_ios = 0;
 
-	for (i = 0; i < srv_buf_pool_instances; i++) {
+	for (ulint i = 0; i < srv_buf_pool_instances; i++) {
 		pend_ios += buf_pool_from_array(i)->n_pend_reads;
 	}
 
@@ -5479,24 +6815,24 @@ buf_get_n_pending_read_ios(void)
 /*********************************************************************//**
 Returns the ratio in percents of modified pages in the buffer pool /
 database pages in the buffer pool.
-@return	modified page percentage ratio */
-UNIV_INTERN
+@return modified page percentage ratio */
 double
 buf_get_modified_ratio_pct(void)
 /*============================*/
 {
-	double		percentage = 0.0;
+	double		ratio;
 	ulint		lru_len = 0;
 	ulint		free_len = 0;
 	ulint		flush_list_len = 0;
 
 	buf_get_total_list_len(&lru_len, &free_len, &flush_list_len);
 
-	percentage = (100.0 * flush_list_len) / (1.0 + lru_len + free_len);
+	ratio = static_cast<double>(100 * flush_list_len)
+		/ (1 + lru_len + free_len);
 
 	/* 1 + is there to avoid division by zero */
 
-	return(percentage);
+	return(ratio);
 }
 
 /*******************************************************************//**
@@ -5559,7 +6895,6 @@ buf_stats_aggregate_pool_info(
 Collect buffer pool stats information for a buffer pool. Also
 record aggregated stats if there are more than one buffer pool
 in the server */
-UNIV_INTERN
 void
 buf_stats_get_pool_info(
 /*====================*/
@@ -5568,7 +6903,7 @@ buf_stats_get_pool_info(
 	buf_pool_info_t*	all_pool_info)	/*!< in/out: buffer pool info
 						to fill */
 {
-	buf_pool_info_t*        pool_info;
+	buf_pool_info_t*	pool_info;
 	time_t			current_time;
 	double			time_elapsed;
 
@@ -5694,7 +7029,6 @@ buf_stats_get_pool_info(
 
 /*********************************************************************//**
 Prints info of the buffer i/o. */
-UNIV_INTERN
 void
 buf_print_io_instance(
 /*==================*/
@@ -5777,8 +7111,9 @@ buf_print_io_instance(
 	/* Print some values to help us with visualizing what is
 	happening with LRU eviction. */
 	fprintf(file,
-		"LRU len: %lu, unzip_LRU len: %lu\n"
-		"I/O sum[%lu]:cur[%lu], unzip sum[%lu]:cur[%lu]\n",
+		"LRU len: " ULINTPF ", unzip_LRU len: " ULINTPF "\n"
+		"I/O sum[" ULINTPF "]:cur[" ULINTPF "], "
+		"unzip sum[" ULINTPF "]:cur[" ULINTPF "]\n",
 		pool_info->lru_len, pool_info->unzip_lru_len,
 		pool_info->io_sum, pool_info->io_cur,
 		pool_info->unzip_sum, pool_info->unzip_cur);
@@ -5786,7 +7121,6 @@ buf_print_io_instance(
 
 /*********************************************************************//**
 Prints info of the buffer i/o. */
-UNIV_INTERN
 void
 buf_print_io(
 /*=========*/
@@ -5800,7 +7134,7 @@ buf_print_io(
 	one extra buf_pool_info_t, the last one stores
 	aggregated/total values from all pools */
 	if (srv_buf_pool_instances > 1) {
-		pool_info = (buf_pool_info_t*) mem_zalloc((
+		pool_info = (buf_pool_info_t*) ut_zalloc_nokey((
 			srv_buf_pool_instances + 1) * sizeof *pool_info);
 
 		pool_info_total = &pool_info[srv_buf_pool_instances];
@@ -5809,7 +7143,7 @@ buf_print_io(
 
 		pool_info_total = pool_info =
 			static_cast<buf_pool_info_t*>(
-				mem_zalloc(sizeof *pool_info));
+				ut_zalloc_nokey(sizeof *pool_info));
 	}
 
 	for (i = 0; i < srv_buf_pool_instances; i++) {
@@ -5840,17 +7174,16 @@ buf_print_io(
 		"----------------------\n", file);
 
 		for (i = 0; i < srv_buf_pool_instances; i++) {
-			fprintf(file, "---BUFFER POOL %lu\n", i);
+			fprintf(file, "---BUFFER POOL " ULINTPF "\n", i);
 			buf_print_io_instance(&pool_info[i], file);
 		}
 	}
 
-	mem_free(pool_info);
+	ut_free(pool_info);
 }
 
 /**********************************************************************//**
 Refreshes the statistics used to print per-second averages. */
-UNIV_INTERN
 void
 buf_refresh_io_stats(
 /*=================*/
@@ -5862,7 +7195,6 @@ buf_refresh_io_stats(
 
 /**********************************************************************//**
 Refreshes the statistics used to print per-second averages. */
-UNIV_INTERN
 void
 buf_refresh_io_stats_all(void)
 /*==========================*/
@@ -5879,7 +7211,6 @@ buf_refresh_io_stats_all(void)
 /**********************************************************************//**
 Check if all pages in all buffer pools are in a replacable state.
 @return FALSE if not */
-UNIV_INTERN
 ibool
 buf_all_freed(void)
 /*===============*/
@@ -5900,8 +7231,7 @@ buf_all_freed(void)
 /*********************************************************************//**
 Checks that there currently are no pending i/o-operations for the buffer
 pool.
-@return	number of pending i/o */
-UNIV_INTERN
+@return number of pending i/o */
 ulint
 buf_pool_check_no_pending_io(void)
 /*==============================*/
@@ -5932,8 +7262,7 @@ buf_pool_check_no_pending_io(void)
 Code currently not used
 /*********************************************************************//**
 Gets the current length of the free list of buffer blocks.
-@return	length of the free list */
-UNIV_INTERN
+@return length of the free list */
 ulint
 buf_get_free_list_len(void)
 /*=======================*/
@@ -5951,36 +7280,77 @@ buf_get_free_list_len(void)
 #endif
 
 #else /* !UNIV_HOTBACKUP */
-/********************************************************************//**
-Inits a page to the buffer buf_pool, for use in mysqlbackup --restore. */
-UNIV_INTERN
+
+/** Inits a page to the buffer buf_pool, for use in mysqlbackup --restore.
+@param[in]	page_id		page id
+@param[in]	page_size	page size
+@param[in,out]	block		block to init */
 void
 buf_page_init_for_backup_restore(
-/*=============================*/
-	ulint		space,	/*!< in: space id */
-	ulint		offset,	/*!< in: offset of the page within space
-				in units of a page */
-	ulint		zip_size,/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	buf_block_t*	block)	/*!< in: block to init */
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	buf_block_t*		block)
 {
-	block->page.state	= BUF_BLOCK_FILE_PAGE;
-	block->page.space	= space;
-	block->page.offset	= offset;
+	block->page.state = BUF_BLOCK_FILE_PAGE;
+	block->page.id = page_id;
+	block->page.size.copy_from(page_size);
 
 	page_zip_des_init(&block->page.zip);
 
 	/* We assume that block->page.data has been allocated
-	with zip_size == UNIV_PAGE_SIZE. */
-	ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX);
-	ut_ad(ut_is_2pow(zip_size));
-	page_zip_set_size(&block->page.zip, zip_size);
-	if (zip_size) {
-		block->page.zip.data = block->frame + UNIV_PAGE_SIZE;
+	with page_size == univ_page_size. */
+	if (page_size.is_compressed()) {
+		page_zip_set_size(&block->page.zip, page_size.physical());
+		block->page.zip.data = block->frame + page_size.logical();
+	} else {
+		page_zip_set_size(&block->page.zip, 0);
 	}
 }
+
 #endif /* !UNIV_HOTBACKUP */
 
+/** Print the given page_id_t object.
+@param[in,out]	out	the output stream
+@param[in]	page_id	the page_id_t object to be printed
+@return the output stream */
+std::ostream&
+operator<<(
+	std::ostream&		out,
+	const page_id_t&	page_id)
+{
+	out << "[page id: space=" << page_id.m_space
+		<< ", page number=" << page_id.m_page_no << "]";
+	return(out);
+}
+
+/** Print the given buf_pool_t object.
+@param[in,out]	out		the output stream
+@param[in]	buf_pool	the buf_pool_t object to be printed
+@return the output stream */
+std::ostream&
+operator<<(
+	std::ostream&		out,
+	const buf_pool_t&	buf_pool)
+{
+	out << "[buffer pool instance: "
+		<< "buf_pool size=" << buf_pool.curr_size
+		<< ", database pages=" << UT_LIST_GET_LEN(buf_pool.LRU)
+		<< ", free pages=" << UT_LIST_GET_LEN(buf_pool.free)
+		<< ", modified database pages="
+		<< UT_LIST_GET_LEN(buf_pool.flush_list)
+		<< ", n pending decompressions=" << buf_pool.n_pend_unzip
+		<< ", n pending reads=" << buf_pool.n_pend_reads
+		<< ", n pending flush LRU=" << buf_pool.n_flush[BUF_FLUSH_LRU]
+		<< " list=" << buf_pool.n_flush[BUF_FLUSH_LIST]
+		<< " single page=" << buf_pool.n_flush[BUF_FLUSH_SINGLE_PAGE]
+		<< ", pages made young=" << buf_pool.stat.n_pages_made_young
+		<< ", not young=" << buf_pool.stat.n_pages_not_made_young
+		<< ", pages read=" << buf_pool.stat.n_pages_read
+		<< ", created=" << buf_pool.stat.n_pages_created
+		<< ", written=" << buf_pool.stat.n_pages_written << "]";
+	return(out);
+}
+
 /********************************************************************//**
 Reserve unused slot from temporary memory array and allocate necessary
 temporary memory if not yet allocated.
@@ -6016,7 +7386,7 @@ buf_pool_reserve_tmp_slot(
 
 	/* Allocate temporary memory for encryption/decryption */
 	if (free_slot->crypt_buf_free == NULL) {
-		free_slot->crypt_buf_free = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE*2));
+		free_slot->crypt_buf_free = static_cast<byte *>(ut_malloc_nokey(UNIV_PAGE_SIZE*2));
 		free_slot->crypt_buf = static_cast<byte *>(ut_align(free_slot->crypt_buf_free, UNIV_PAGE_SIZE));
 		memset(free_slot->crypt_buf_free, 0, UNIV_PAGE_SIZE *2);
 	}
@@ -6024,11 +7394,11 @@ buf_pool_reserve_tmp_slot(
 	/* For page compressed tables allocate temporary memory for
 	compression/decompression */
 	if (compressed && free_slot->comp_buf_free == NULL) {
-		free_slot->comp_buf_free = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE*2));
+		free_slot->comp_buf_free = static_cast<byte *>(ut_malloc_nokey(UNIV_PAGE_SIZE*2));
 		free_slot->comp_buf = static_cast<byte *>(ut_align(free_slot->comp_buf_free, UNIV_PAGE_SIZE));
 		memset(free_slot->comp_buf_free, 0, UNIV_PAGE_SIZE *2);
 #ifdef HAVE_LZO
-		free_slot->lzo_mem = static_cast<byte *>(ut_malloc(LZO1X_1_15_MEM_COMPRESS));
+		free_slot->lzo_mem = static_cast<byte *>(ut_malloc_nokey(LZO1X_1_15_MEM_COMPRESS));
 		memset(free_slot->lzo_mem, 0, LZO1X_1_15_MEM_COMPRESS);
 #endif
 	}
@@ -6046,24 +7416,23 @@ buf_page_encrypt_before_write(
 	byte*		src_frame,	/*!< in: src frame */
 	ulint		space_id)	/*!< in: space id */
 {
-	fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(space_id);
-	ulint zip_size = buf_page_get_zip_size(bpage);
-	ulint page_size = (zip_size) ? zip_size : UNIV_PAGE_SIZE;
+	fil_space_crypt_t*	crypt_data = fil_space_get_crypt_data(space_id);
+	const page_size_t&	page_size = bpage->size;
 	buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
-	bool page_compressed = fil_space_is_page_compressed(bpage->space);
+	bool page_compressed = fil_space_is_page_compressed(space_id);
 	bool encrypted = true;
 
 	bpage->real_size = UNIV_PAGE_SIZE;
 
 	fil_page_type_validate(src_frame);
 
-	if (bpage->offset == 0) {
+	if (bpage->id.page_no() == 0) {
 		/* Page 0 of a tablespace is not encrypted/compressed */
 		ut_ad(bpage->key_version == 0);
 		return src_frame;
 	}
 
-	if (bpage->space == TRX_SYS_SPACE && bpage->offset == TRX_SYS_PAGE_NO) {
+	if (space_id == TRX_SYS_SPACE && bpage->id.page_no() == TRX_SYS_PAGE_NO) {
 		/* don't encrypt/compress page as it contains address to dblwr buffer */
 		bpage->key_version = 0;
 		return src_frame;
@@ -6100,17 +7469,17 @@ buf_page_encrypt_before_write(
 
 	if (!page_compressed) {
 		/* Encrypt page content */
-		byte* tmp = fil_space_encrypt(bpage->space,
-					      bpage->offset,
+		byte* tmp = fil_space_encrypt(space_id,
+					      bpage->id.page_no(),
 					      bpage->newest_modification,
 					      src_frame,
-					      zip_size,
+					      page_size,
 					      dst_frame);
 
 		ulint key_version = mach_read_from_4(dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
 		ut_ad(key_version == 0 || key_version >= bpage->key_version);
 		bpage->key_version = key_version;
-		bpage->real_size = page_size;
+		bpage->real_size = page_size.physical();
 		slot->out_buf = dst_frame = tmp;
 
 #ifdef UNIV_DEBUG
@@ -6120,13 +7489,13 @@ buf_page_encrypt_before_write(
 	} else {
 		/* First we compress the page content */
 		ulint out_len = 0;
-		ulint block_size = fil_space_get_block_size(bpage->space, bpage->offset, page_size);
+		ulint block_size = fil_space_get_block_size(space_id, bpage->id.page_no(), page_size.logical());
 
-		byte *tmp = fil_compress_page(bpage->space,
+		byte *tmp = fil_compress_page(space_id,
 					(byte *)src_frame,
 					slot->comp_buf,
-					page_size,
-					fil_space_get_page_compression_level(bpage->space),
+					page_size.logical(),
+					fil_space_get_page_compression_level(space_id),
 					block_size,
 					encrypted,
 					&out_len,
@@ -6142,11 +7511,11 @@ buf_page_encrypt_before_write(
 		if(encrypted) {
 
 			/* And then we encrypt the page content */
-			tmp = fil_space_encrypt(bpage->space,
-						bpage->offset,
+			tmp = fil_space_encrypt(space_id,
+						bpage->id.page_no(),
 						bpage->newest_modification,
 						tmp,
-						zip_size,
+						page_size,
 						dst_frame);
 		}
 
@@ -6169,10 +7538,9 @@ buf_page_decrypt_after_read(
 /*========================*/
 	buf_page_t*	bpage)	/*!< in/out: buffer page read from disk */
 {
-	ulint zip_size = buf_page_get_zip_size(bpage);
-	ulint size = (zip_size) ? zip_size : UNIV_PAGE_SIZE;
-
-	byte* dst_frame = (zip_size) ? bpage->zip.data :
+	bool compressed = bpage->size.is_compressed();
+	const page_size_t& size = bpage->size;
+	byte* dst_frame = compressed ? bpage->zip.data :
 		((buf_block_t*) bpage)->frame;
 	unsigned key_version =
 		mach_read_from_4(dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
@@ -6188,7 +7556,7 @@ buf_page_decrypt_after_read(
 
 	ut_ad(bpage->key_version == 0);
 
-	if (bpage->offset == 0) {
+	if (bpage->id.page_no() == 0) {
 		/* File header pages are not encrypted/compressed */
 		return (TRUE);
 	}
@@ -6197,6 +7565,7 @@ buf_page_decrypt_after_read(
 	bpage->key_version = key_version;
 	bpage->page_encrypted = page_compressed_encrypted;
 	bpage->page_compressed = page_compressed;
+	bpage->space = bpage->id.space();
 
 	if (page_compressed) {
 		/* the page we read is unencrypted */
@@ -6210,7 +7579,7 @@ buf_page_decrypt_after_read(
 		/* decompress using comp_buf to dst_frame */
 		fil_decompress_page(slot->comp_buf,
 			dst_frame,
-			size,
+			size.logical(),
 			&bpage->write_size);
 
 		/* Mark this slot as free */
@@ -6234,15 +7603,16 @@ buf_page_decrypt_after_read(
 			/* Calculate checksum before decrypt, this will be
 			used later to find out if incorrect key was used. */
 			if (!page_compressed_encrypted) {
-				bpage->calculated_checksum = fil_crypt_calculate_checksum(zip_size, dst_frame);
+				bpage->calculated_checksum = fil_crypt_calculate_checksum(size, dst_frame);
 			}
 
 			/* decrypt using crypt_buf to dst_frame */
-			byte* res = fil_space_decrypt(bpage->space,
+			byte* res = fil_space_decrypt(bpage->id.space(),
 						slot->crypt_buf,
 						size,
 						dst_frame);
 
+
 			if (!res) {
 				bpage->encrypted = true;
 				success = false;
@@ -6263,7 +7633,7 @@ buf_page_decrypt_after_read(
 			/* decompress using comp_buf to dst_frame */
 			fil_decompress_page(slot->comp_buf,
 					dst_frame,
-					size,
+					size.logical(),
 					&bpage->write_size);
 
 #ifdef UNIV_DEBUG
@@ -6281,3 +7651,5 @@ buf_page_decrypt_after_read(
 
 	return (success);
 }
+#endif /* !UNIV_INNOCHECKSUM */
+
diff --git a/storage/innobase/buf/buf0checksum.cc b/storage/innobase/buf/buf0checksum.cc
index 4101d117896..94eafec0584 100644
--- a/storage/innobase/buf/buf0checksum.cc
+++ b/storage/innobase/buf/buf0checksum.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -24,37 +24,39 @@ Created Aug 11, 2011 Vasil Dimov
 *******************************************************/
 
 #include "univ.i"
-#include "fil0fil.h" /* FIL_* */
-#include "ut0crc32.h" /* ut_crc32() */
-#include "ut0rnd.h" /* ut_fold_binary() */
+#include "fil0fil.h"
+#include "ut0crc32.h"
+#include "ut0rnd.h"
 #include "buf0checksum.h"
 
 #ifndef UNIV_INNOCHECKSUM
-
-#include "srv0srv.h" /* SRV_CHECKSUM_* */
-#include "buf0types.h"
-
+#include "srv0srv.h"
 #endif /* !UNIV_INNOCHECKSUM */
 
+#include "buf0types.h"
+
 /** the macro MYSQL_SYSVAR_ENUM() requires "long unsigned int" and if we
 use srv_checksum_algorithm_t here then we get a compiler error:
 ha_innodb.cc:12251: error: cannot convert 'srv_checksum_algorithm_t*' to
   'long unsigned int*' in initialization */
-UNIV_INTERN ulong	srv_checksum_algorithm = SRV_CHECKSUM_ALGORITHM_INNODB;
+ulong	srv_checksum_algorithm = SRV_CHECKSUM_ALGORITHM_INNODB;
 
-/********************************************************************//**
-Calculates a page CRC32 which is stored to the page when it is written
-to a file. Note that we must be careful to calculate the same value on
-32-bit and 64-bit architectures.
-@return	checksum */
-UNIV_INTERN
-ib_uint32_t
+/** set if we have found pages matching legacy big endian checksum */
+bool	legacy_big_endian_checksum = false;
+/** Calculates the CRC32 checksum of a page. The value is stored to the page
+when it is written to a file and also checked for a match when reading from
+the file. When reading we allow both normal CRC32 and CRC-legacy-big-endian
+variants. Note that we must be careful to calculate the same value on 32-bit
+and 64-bit architectures.
+@param[in]	page			buffer page (UNIV_PAGE_SIZE bytes)
+@param[in]	use_legacy_big_endian	if true then use big endian
+byteorder when converting byte strings to integers
+@return checksum */
+uint32_t
 buf_calc_page_crc32(
-/*================*/
-	const byte*	page)	/*!< in: buffer page */
+	const byte*	page,
+	bool		use_legacy_big_endian /* = false */)
 {
-	ib_uint32_t	checksum;
-
 	/* Since the field FIL_PAGE_FILE_FLUSH_LSN, and in versions <= 4.1.x
 	FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, are written outside the buffer pool
 	to the first pages of data files, we have to skip them in the page
@@ -63,22 +65,26 @@ buf_calc_page_crc32(
 	checksum is stored, and also the last 8 bytes of page because
 	there we store the old formula checksum. */
 
-	checksum = ut_crc32(page + FIL_PAGE_OFFSET,
-			    FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION
-			    - FIL_PAGE_OFFSET)
-		^ ut_crc32(page + FIL_PAGE_DATA,
-			   UNIV_PAGE_SIZE - FIL_PAGE_DATA
-			   - FIL_PAGE_END_LSN_OLD_CHKSUM);
+	ut_crc32_func_t	crc32_func = use_legacy_big_endian
+		? ut_crc32_legacy_big_endian
+		: ut_crc32;
 
-	return(checksum);
+	const uint32_t	c1 = crc32_func(
+		page + FIL_PAGE_OFFSET,
+		FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION - FIL_PAGE_OFFSET);
+
+	const uint32_t	c2 = crc32_func(
+		page + FIL_PAGE_DATA,
+		UNIV_PAGE_SIZE - FIL_PAGE_DATA - FIL_PAGE_END_LSN_OLD_CHKSUM);
+
+	return(c1 ^ c2);
 }
 
 /********************************************************************//**
 Calculates a page checksum which is stored to the page when it is written
 to a file. Note that we must be careful to calculate the same value on
 32-bit and 64-bit architectures.
-@return	checksum */
-UNIV_INTERN
+@return checksum */
 ulint
 buf_calc_page_new_checksum(
 /*=======================*/
@@ -112,8 +118,7 @@ checksum.
 NOTE: we must first store the new formula checksum to
 FIL_PAGE_SPACE_OR_CHKSUM before calculating and storing this old checksum
 because this takes that field as an input!
-@return	checksum */
-UNIV_INTERN
+@return checksum */
 ulint
 buf_calc_page_old_checksum(
 /*=======================*/
@@ -128,12 +133,9 @@ buf_calc_page_old_checksum(
 	return(checksum);
 }
 
-#ifndef UNIV_INNOCHECKSUM
-
 /********************************************************************//**
 Return a printable string describing the checksum algorithm.
-@return	algorithm name */
-UNIV_INTERN
+@return algorithm name */
 const char*
 buf_checksum_algorithm_name(
 /*========================*/
@@ -157,5 +159,3 @@ buf_checksum_algorithm_name(
 	ut_error;
 	return(NULL);
 }
-
-#endif /* !UNIV_INNOCHECKSUM */
diff --git a/storage/innobase/buf/buf0dblwr.cc b/storage/innobase/buf/buf0dblwr.cc
index 16877818ba9..ff1d2057e6a 100644
--- a/storage/innobase/buf/buf0dblwr.cc
+++ b/storage/innobase/buf/buf0dblwr.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2013, 2016, MariaDB Corporation. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
@@ -24,6 +24,7 @@ Doublwrite buffer module
 Created 2011/12/19
 *******************************************************/
 
+#include "ha_prototypes.h"
 #include "buf0dblwr.h"
 
 #ifdef UNIV_NONINL
@@ -41,16 +42,11 @@ Created 2011/12/19
 
 #ifndef UNIV_HOTBACKUP
 
-#ifdef UNIV_PFS_MUTEX
-/* Key to register the mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t	buf_dblwr_mutex_key;
-#endif /* UNIV_PFS_RWLOCK */
-
 /** The doublewrite buffer */
-UNIV_INTERN buf_dblwr_t*	buf_dblwr = NULL;
+buf_dblwr_t*	buf_dblwr = NULL;
 
 /** Set to TRUE when the doublewrite buffer is being created */
-UNIV_INTERN ibool	buf_dblwr_being_created = FALSE;
+ibool	buf_dblwr_being_created = FALSE;
 
 #define TRX_SYS_DOUBLEWRITE_BLOCKS 2
 
@@ -58,7 +54,6 @@ UNIV_INTERN ibool	buf_dblwr_being_created = FALSE;
 Determines if a page number is located inside the doublewrite buffer.
 @return TRUE if the location is inside the two blocks of the
 doublewrite buffer */
-UNIV_INTERN
 ibool
 buf_dblwr_page_inside(
 /*==================*/
@@ -87,7 +82,7 @@ buf_dblwr_page_inside(
 /****************************************************************//**
 Calls buf_page_get() on the TRX_SYS_PAGE and returns a pointer to the
 doublewrite buffer within it.
-@return	pointer to the doublewrite buffer within the filespace header
+@return pointer to the doublewrite buffer within the filespace header
 page. */
 UNIV_INLINE
 byte*
@@ -97,8 +92,9 @@ buf_dblwr_get(
 {
 	buf_block_t*	block;
 
-	block = buf_page_get(TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO,
-			     RW_X_LATCH, mtr);
+	block = buf_page_get(page_id_t(TRX_SYS_SPACE, TRX_SYS_PAGE_NO),
+			     univ_page_size, RW_X_LATCH, mtr);
+
 	buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
 
 	return(buf_block_get_frame(block) + TRX_SYS_DOUBLEWRITE);
@@ -107,7 +103,6 @@ buf_dblwr_get(
 /********************************************************************//**
 Flush a batch of writes to the datafiles that have already been
 written to the dblwr buffer on disk. */
-UNIV_INLINE
 void
 buf_dblwr_sync_datafiles()
 /*======================*/
@@ -121,7 +116,7 @@ buf_dblwr_sync_datafiles()
 	os_aio_wait_until_no_pending_writes();
 
 	/* Now we flush the data to disk (for example, with fsync) */
-	fil_flush_file_spaces(FIL_TABLESPACE);
+	fil_flush_file_spaces(FIL_TYPE_TABLESPACE);
 }
 
 /****************************************************************//**
@@ -136,7 +131,7 @@ buf_dblwr_init(
 	ulint	buf_size;
 
 	buf_dblwr = static_cast<buf_dblwr_t*>(
-		mem_zalloc(sizeof(buf_dblwr_t)));
+		ut_zalloc_nokey(sizeof(buf_dblwr_t)));
 
 	/* There are two blocks of same size in the doublewrite
 	buffer. */
@@ -147,11 +142,10 @@ buf_dblwr_init(
 	ut_a(srv_doublewrite_batch_size > 0
 	     && srv_doublewrite_batch_size < buf_size);
 
-	mutex_create(buf_dblwr_mutex_key,
-		     &buf_dblwr->mutex, SYNC_DOUBLEWRITE);
+	mutex_create(LATCH_ID_BUF_DBLWR, &buf_dblwr->mutex);
 
-	buf_dblwr->b_event = os_event_create();
-	buf_dblwr->s_event = os_event_create();
+	buf_dblwr->b_event = os_event_create("dblwr_batch_event");
+	buf_dblwr->s_event = os_event_create("dblwr_single_event");
 	buf_dblwr->first_free = 0;
 	buf_dblwr->s_reserved = 0;
 	buf_dblwr->b_reserved = 0;
@@ -162,24 +156,25 @@ buf_dblwr_init(
 		doublewrite + TRX_SYS_DOUBLEWRITE_BLOCK2);
 
 	buf_dblwr->in_use = static_cast<bool*>(
-		mem_zalloc(buf_size * sizeof(bool)));
+		ut_zalloc_nokey(buf_size * sizeof(bool)));
 
 	buf_dblwr->write_buf_unaligned = static_cast<byte*>(
-		ut_malloc((1 + buf_size) * UNIV_PAGE_SIZE));
+		ut_malloc_nokey((1 + buf_size) * UNIV_PAGE_SIZE));
 
 	buf_dblwr->write_buf = static_cast<byte*>(
 		ut_align(buf_dblwr->write_buf_unaligned,
 			 UNIV_PAGE_SIZE));
 
 	buf_dblwr->buf_block_arr = static_cast<buf_page_t**>(
-		mem_zalloc(buf_size * sizeof(void*)));
+		ut_zalloc_nokey(buf_size * sizeof(void*)));
 }
 
 /****************************************************************//**
 Creates the doublewrite buffer to a new InnoDB installation. The header of the
-doublewrite buffer is placed on the trx system header page. */
-UNIV_INTERN
-void
+doublewrite buffer is placed on the trx system header page.
+@return true if successful, false if not. */
+MY_ATTRIBUTE((warn_unused_result))
+bool
 buf_dblwr_create(void)
 /*==================*/
 {
@@ -195,7 +190,7 @@ buf_dblwr_create(void)
 	if (buf_dblwr) {
 		/* Already inited */
 
-		return;
+		return(true);
 	}
 
 start_again:
@@ -213,23 +208,22 @@ start_again:
 
 		mtr_commit(&mtr);
 		buf_dblwr_being_created = FALSE;
-		return;
+		return(true);
 	}
 
-	ib_logf(IB_LOG_LEVEL_INFO,
-		"Doublewrite buffer not found: creating new");
+	ib::info() << "Doublewrite buffer not found: creating new";
 
-	if (buf_pool_get_curr_size()
-	    < ((TRX_SYS_DOUBLEWRITE_BLOCKS * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
-		+ FSP_EXTENT_SIZE / 2 + 100)
-	       * UNIV_PAGE_SIZE)) {
+	ulint min_doublewrite_size =
+		( ( 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
+		  + FSP_EXTENT_SIZE / 2
+		  + 100)
+		* UNIV_PAGE_SIZE);
+	if (buf_pool_get_curr_size() <  min_doublewrite_size) {
+		ib::error() << "Cannot create doublewrite buffer: you must"
+			" increase your buffer pool size. Cannot continue"
+			" operation.";
 
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Cannot create doublewrite buffer: you must "
-			"increase your buffer pool size. Cannot continue "
-			"operation.");
-
-		exit(EXIT_FAILURE);
+		return(false);
 	}
 
 	block2 = fseg_create(TRX_SYS_SPACE, TRX_SYS_PAGE_NO,
@@ -242,15 +236,14 @@ start_again:
 	buf_block_dbg_add_level(block2, SYNC_NO_ORDER_CHECK);
 
 	if (block2 == NULL) {
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Cannot create doublewrite buffer: you must "
-			"increase your tablespace size. "
-			"Cannot continue operation.");
+		ib::error() << "Cannot create doublewrite buffer: you must"
+			" increase your tablespace size."
+			" Cannot continue operation.";
 
 		/* We exit without committing the mtr to prevent
 		its modifications to the database getting to disk */
 
-		exit(EXIT_FAILURE);
+		return(false);
 	}
 
 	fseg_header = doublewrite + TRX_SYS_DOUBLEWRITE_FSEG;
@@ -261,12 +254,11 @@ start_again:
 		new_block = fseg_alloc_free_page(
 			fseg_header, prev_page_no + 1, FSP_UP, &mtr);
 		if (new_block == NULL) {
-			ib_logf(IB_LOG_LEVEL_ERROR,
-				"Cannot create doublewrite buffer: you must "
-				"increase your tablespace size. "
-				"Cannot continue operation.");
+			ib::error() << "Cannot create doublewrite buffer: "
+				" you must increase your tablespace size."
+				" Cannot continue operation.";
 
-			exit(EXIT_FAILURE);
+			return(false);
 		}
 
 		/* We read the allocated pages to the buffer pool;
@@ -279,7 +271,7 @@ start_again:
 		has not been written to in doublewrite. */
 
 		ut_ad(rw_lock_get_x_lock_count(&new_block->lock) == 1);
-		page_no = buf_block_get_page_no(new_block);
+		page_no = new_block->page.id.page_no();
 
 		if (i == FSP_EXTENT_SIZE / 2) {
 			ut_a(page_no == FSP_EXTENT_SIZE);
@@ -346,55 +338,73 @@ start_again:
 	/* Remove doublewrite pages from LRU */
 	buf_pool_invalidate();
 
-	ib_logf(IB_LOG_LEVEL_INFO, "Doublewrite buffer created");
+	ib::info() <<  "Doublewrite buffer created";
 
 	goto start_again;
 }
 
-/****************************************************************//**
-At a database startup initializes the doublewrite buffer memory structure if
+/**
+At database startup initializes the doublewrite buffer memory structure if
 we already have a doublewrite buffer created in the data files. If we are
 upgrading to an InnoDB version which supports multiple tablespaces, then this
 function performs the necessary update operations. If we are in a crash
-recovery, this function loads the pages from double write buffer into memory. */
-void
+recovery, this function loads the pages from double write buffer into memory.
+@param[in]	file		File handle
+@param[in]	path		Path name of file
+@return DB_SUCCESS or error code */
+dberr_t
 buf_dblwr_init_or_load_pages(
-/*=========================*/
 	os_file_t	file,
-	char*		path,
-	bool		load_corrupt_pages)
+	const char*	path)
 {
-	byte*	buf;
-	byte*	read_buf;
-	byte*	unaligned_read_buf;
-	ulint	block1;
-	ulint	block2;
-	byte*	page;
-	ibool	reset_space_ids = FALSE;
-	byte*	doublewrite;
-	ulint	space_id;
-	ulint	i;
-        ulint	block_bytes = 0;
-	recv_dblwr_t& recv_dblwr = recv_sys->dblwr;
+	byte*		buf;
+	byte*		page;
+	ulint		block1;
+	ulint		block2;
+	ulint		space_id;
+	byte*		read_buf;
+	byte*		doublewrite;
+	byte*		unaligned_read_buf;
+	ibool		reset_space_ids = FALSE;
+	recv_dblwr_t&	recv_dblwr = recv_sys->dblwr;
 
 	/* We do the file i/o past the buffer pool */
 
-	unaligned_read_buf = static_cast<byte*>(ut_malloc(3 * UNIV_PAGE_SIZE));
+	unaligned_read_buf = static_cast<byte*>(
+		ut_malloc_nokey(3 * UNIV_PAGE_SIZE));
 
 	read_buf = static_cast<byte*>(
 		ut_align(unaligned_read_buf, UNIV_PAGE_SIZE));
 
 	/* Read the trx sys header to check if we are using the doublewrite
 	buffer */
-	off_t  trx_sys_page = TRX_SYS_PAGE_NO * UNIV_PAGE_SIZE;
-	os_file_read(file, read_buf, trx_sys_page, UNIV_PAGE_SIZE);
+	dberr_t		err;
+
+	IORequest	read_request(IORequest::READ);
+
+	read_request.disable_compression();
+
+	err = os_file_read(
+		read_request,
+		file, read_buf, TRX_SYS_PAGE_NO * UNIV_PAGE_SIZE,
+		UNIV_PAGE_SIZE);
+
+	if (err != DB_SUCCESS) {
+
+		ib::error()
+			<< "Failed to read the system tablespace header page";
+
+		ut_free(unaligned_read_buf);
+
+		return(err);
+	}
 
 	doublewrite = read_buf + TRX_SYS_DOUBLEWRITE;
 
 	if (mach_read_from_4(read_buf + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION) != 0) {
 		byte* tmp = fil_space_decrypt((ulint)TRX_SYS_SPACE,
 						read_buf + UNIV_PAGE_SIZE,
-						UNIV_PAGE_SIZE, /* page size */
+						univ_page_size, /* page size */
 						read_buf);
 		doublewrite = tmp + TRX_SYS_DOUBLEWRITE;
 	}
@@ -410,7 +420,8 @@ buf_dblwr_init_or_load_pages(
 
 		buf = buf_dblwr->write_buf;
 	} else {
-		goto leave_func;
+		ut_free(unaligned_read_buf);
+		return(DB_SUCCESS);
 	}
 
 	if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED)
@@ -424,32 +435,56 @@ buf_dblwr_init_or_load_pages(
 
 		reset_space_ids = TRUE;
 
-		ib_logf(IB_LOG_LEVEL_INFO,
-			"Resetting space id's in the doublewrite buffer");
+		ib::info() << "Resetting space id's in the doublewrite buffer";
 	}
 
 	/* Read the pages from the doublewrite buffer to memory */
+	err = os_file_read(
+		read_request,
+		file, buf, block1 * UNIV_PAGE_SIZE,
+		TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE);
 
-        block_bytes = TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE;
+	if (err != DB_SUCCESS) {
 
-	os_file_read(file, buf, block1 * UNIV_PAGE_SIZE, block_bytes);
-	os_file_read(file, buf + block_bytes, block2 * UNIV_PAGE_SIZE,
-		     block_bytes);
+		ib::error()
+			<< "Failed to read the first double write buffer "
+			"extent";
+
+		ut_free(unaligned_read_buf);
+
+		return(err);
+	}
+
+	err = os_file_read(
+		read_request,
+		file,
+		buf + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
+		block2 * UNIV_PAGE_SIZE,
+		TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE);
+
+	if (err != DB_SUCCESS) {
+
+		ib::error()
+			<< "Failed to read the second double write buffer "
+			"extent";
+
+		ut_free(unaligned_read_buf);
+
+		return(err);
+	}
 
 	/* Check if any of these pages is half-written in data files, in the
 	intended position */
 
 	page = buf;
 
-	for (i = 0; i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * TRX_SYS_DOUBLEWRITE_BLOCKS; i++) {
-
-		ulint source_page_no;
-
+	for (ulint i = 0; i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 2; i++) {
 		if (reset_space_ids) {
+			ulint source_page_no;
 
 			space_id = 0;
-			mach_write_to_4(page
-					+ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, space_id);
+			mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
+					space_id);
 			/* We do not need to calculate new checksums for the
 			pages because the field .._SPACE_ID does not affect
 			them. Write the page back to where we read it from. */
@@ -461,79 +496,119 @@ buf_dblwr_init_or_load_pages(
 					+ i - TRX_SYS_DOUBLEWRITE_BLOCK_SIZE;
 			}
 
-			os_file_write(path, file, page,
-				      source_page_no * UNIV_PAGE_SIZE,
-				      UNIV_PAGE_SIZE);
-		} else if (load_corrupt_pages) {
+			IORequest	write_request(IORequest::WRITE);
+
+			/* Recovered data file pages are written out
+			as uncompressed. */
+
+			write_request.disable_compression();
+
+			err = os_file_write(
+				write_request, path, file, page,
+				source_page_no * UNIV_PAGE_SIZE,
+				UNIV_PAGE_SIZE);
+			if (err != DB_SUCCESS) {
+
+				ib::error()
+					<< "Failed to write to the double write"
+					" buffer";
+
+				ut_free(unaligned_read_buf);
+
+				return(err);
+			}
+
+		} else {
 
 			recv_dblwr.add(page);
 		}
 
-		page += UNIV_PAGE_SIZE;
+		page += univ_page_size.physical();
 	}
 
 	if (reset_space_ids) {
 		os_file_flush(file);
 	}
 
-leave_func:
 	ut_free(unaligned_read_buf);
+
+	return(DB_SUCCESS);
 }
 
-/****************************************************************//**
-Process the double write buffer pages. */
+/** Process and remove the double write buffer pages for all tablespaces. */
 void
-buf_dblwr_process()
-/*===============*/
+buf_dblwr_process(void)
 {
-	ulint	space_id;
-	ulint	page_no;
-	ulint	page_no_dblwr = 0;
-	byte*	page;
-	byte*	read_buf;
-	byte*	unaligned_read_buf;
-	recv_dblwr_t& recv_dblwr = recv_sys->dblwr;
+	ulint		page_no_dblwr	= 0;
+	byte*		read_buf;
+	byte*		unaligned_read_buf;
+	recv_dblwr_t&	recv_dblwr	= recv_sys->dblwr;
 
-	unaligned_read_buf = static_cast<byte*>(ut_malloc(2 * UNIV_PAGE_SIZE));
+	unaligned_read_buf = static_cast<byte*>(
+		ut_malloc_nokey(2 * UNIV_PAGE_SIZE));
 
 	read_buf = static_cast<byte*>(
 		ut_align(unaligned_read_buf, UNIV_PAGE_SIZE));
 
-	for (std::list<byte*>::iterator i = recv_dblwr.pages.begin();
-	     i != recv_dblwr.pages.end(); ++i, ++page_no_dblwr ) {
+	for (recv_dblwr_t::list::iterator i = recv_dblwr.pages.begin();
+	     i != recv_dblwr.pages.end();
+	     ++i, ++page_no_dblwr) {
 		bool is_compressed = false;
 
-		page = *i;
-		page_no  = mach_read_from_4(page + FIL_PAGE_OFFSET);
-		space_id = mach_read_from_4(page + FIL_PAGE_SPACE_ID);
+		const byte*	page		= *i;
+		ulint		page_no		= page_get_page_no(page);
+		ulint		space_id	= page_get_space_id(page);
 
-		if (!fil_tablespace_exists_in_mem(space_id)) {
-			/* Maybe we have dropped the single-table tablespace
+		fil_space_t*	space = fil_space_get(space_id);
+
+		if (space == NULL) {
+			/* Maybe we have dropped the tablespace
 			and this page once belonged to it: do nothing */
+			continue;
+		}
 
-		} else if (!fil_check_adress_in_tablespace(space_id,
-							   page_no)) {
-			ib_logf(IB_LOG_LEVEL_WARN,
-				"A page in the doublewrite buffer is not "
-				"within space bounds; space id %lu "
-				"page number %lu, page %lu in "
-				"doublewrite buf.",
-				(ulong) space_id, (ulong) page_no,
-				page_no_dblwr);
+		fil_space_open_if_needed(space);
+
+		if (page_no >= space->size) {
+
+			/* Do not report the warning if the tablespace is
+			schedule for truncate or was truncated and we have live
+			MLOG_TRUNCATE record in redo. */
+			bool	skip_warning =
+				srv_is_tablespace_truncated(space_id)
+				|| srv_was_tablespace_truncated(space);
+
+			if (!skip_warning) {
+				ib::warn() << "Page " << page_no_dblwr
+					<< " in the doublewrite buffer is"
+					" not within space bounds: page "
+					<< page_id_t(space_id, page_no);
+			}
 		} else {
-			ulint	zip_size = fil_space_get_zip_size(space_id);
+			const page_size_t	page_size(space->flags);
+			const page_id_t		page_id(space_id, page_no);
+
+			/* We want to ensure that for partial reads the
+			unread portion of the page is NUL. */
+			memset(read_buf, 0x0, page_size.physical());
+
+			IORequest	request;
+
+			request.dblwr_recover();
 
 			/* Read in the actual page from the file */
-			fil_io(OS_FILE_READ,
-				true,
-				space_id,
-				zip_size,
-				page_no,
-				0,
-				zip_size ? zip_size : UNIV_PAGE_SIZE,
-				read_buf,
-				NULL,
-				0);
+			dberr_t	err = fil_io(
+				request, true,
+				page_id, page_size,
+				0, page_size.physical(), read_buf, NULL, NULL);
+
+			if (err != DB_SUCCESS) {
+
+				ib::warn()
+					<< "Double write buffer recovery: "
+					<< page_id << " read failed with "
+					<< "error: " << ut_strerr(err);
+			}
 
 			/* Is page compressed ? */
 			is_compressed = fil_page_is_compressed_encrypted(read_buf) |
@@ -544,19 +619,26 @@ buf_dblwr_process()
 			if (is_compressed) {
 				fil_decompress_page(NULL, read_buf, UNIV_PAGE_SIZE, NULL, true);
 			}
+			if (err != DB_SUCCESS) {
+
+				ib::warn()
+					<< "Double write buffer recovery: "
+					<< page_id << " read failed with "
+					<< "error: " << ut_strerr(err);
+			}
+
+			if (fil_space_verify_crypt_checksum(read_buf, page_size)) {
 
-			if (fil_space_verify_crypt_checksum(read_buf, zip_size)) {
 				/* page is encrypted and checksum is OK */
-			} else if (buf_page_is_corrupted(true, read_buf, zip_size)) {
+			} else if (buf_page_is_corrupted(
+					true, read_buf, page_size,
+					fsp_is_checksum_disabled(space_id))) {
 
-				fprintf(stderr,
-					"InnoDB: Warning: database page"
-					" corruption or a failed\n"
-					"InnoDB: file read of"
-					" space %lu page %lu.\n"
-					"InnoDB: Trying to recover it from"
-					" the doublewrite buffer.\n",
-					(ulong) space_id, (ulong) page_no);
+				ib::warn() << "Database page corruption or"
+					   << " a failed file read of page "
+					   << page_id
+					   << ". Trying to recover it from the"
+					   << " doublewrite buffer.";
 
 				/* Is page compressed ? */
 				is_compressed = fil_page_is_compressed_encrypted(page) |
@@ -565,109 +647,95 @@ buf_dblwr_process()
 				/* If page was compressed, decompress it before we
 				check checksum. */
 				if (is_compressed) {
-					fil_decompress_page(NULL, page, UNIV_PAGE_SIZE, NULL, true);
+					fil_decompress_page(NULL, (byte*)page, UNIV_PAGE_SIZE, NULL, true);
 				}
 
-				if (fil_space_verify_crypt_checksum(page, zip_size)) {
+				if (fil_space_verify_crypt_checksum(page, page_size)) {
 					/* the doublewrite buffer page is encrypted and OK */
-				} else if (buf_page_is_corrupted(true,
-								 page,
-								 zip_size)) {
-					fprintf(stderr,
-						"InnoDB: Dump of the page:\n");
+				} else if (buf_page_is_corrupted(
+						true, page, page_size,
+						fsp_is_checksum_disabled(space_id))) {
+
+					ib::error() << "Dump of the page:";
+
 					buf_page_print(
-						read_buf, zip_size,
+						read_buf, page_size,
 						BUF_PAGE_PRINT_NO_CRASH);
-					fprintf(stderr,
-						"InnoDB: Dump of"
-						" corresponding page"
-						" in doublewrite buffer:\n");
+					ib::error() << "Dump of corresponding"
+						" page in doublewrite buffer:";
+
 					buf_page_print(
-						page, zip_size,
+						page, page_size,
 						BUF_PAGE_PRINT_NO_CRASH);
 
-					fprintf(stderr,
-						"InnoDB: Also the page in the"
-						" doublewrite buffer"
-						" is corrupt.\n"
-						"InnoDB: Cannot continue"
-						" operation.\n"
-						"InnoDB: You can try to"
-						" recover the database"
-						" with the my.cnf\n"
-						"InnoDB: option:\n"
-						"InnoDB:"
-						" innodb_force_recovery=6\n");
-					ut_error;
+					ib::fatal() << "The page in the"
+						" doublewrite buffer is"
+						" corrupt. Cannot continue"
+						" operation. You can try to"
+						" recover the database with"
+						" innodb_force_recovery=6";
 				}
+			} else if (buf_page_is_zeroes(read_buf, page_size)
+				   && !buf_page_is_zeroes(page, page_size)
+				   && !buf_page_is_corrupted(
+					true, page, page_size,
+					fsp_is_checksum_disabled(space_id))) {
 
-				/* Write the good page from the
-				doublewrite buffer to the intended
-				position */
+				/* Database page contained only zeroes, while
+				a valid copy is available in dblwr buffer. */
 
-				fil_io(OS_FILE_WRITE,
-					true,
-					space_id,
-					zip_size,
-					page_no,
-					0,
-					zip_size ? zip_size : UNIV_PAGE_SIZE,
-					page,
-					NULL,
-					0);
+			} else {
 
-				ib_logf(IB_LOG_LEVEL_INFO,
-					"Recovered the page from"
-					" the doublewrite buffer.");
+				bool t1 = buf_page_is_zeroes(
+                                        read_buf, page_size);
 
-			} else if (buf_page_is_zeroes(read_buf, zip_size)) {
+				bool t2 = buf_page_is_zeroes(page, page_size);
 
-				if (!buf_page_is_zeroes(page, zip_size)
-				    && !buf_page_is_corrupted(true, page,
-							      zip_size)) {
+				bool t3 = buf_page_is_corrupted(
+					true, page, page_size,
+					fsp_is_checksum_disabled(space_id));
+
+				if (t1 && !(t2 || t3)) {
 
 					/* Database page contained only
 					zeroes, while a valid copy is
 					available in dblwr buffer. */
 
-					fil_io(OS_FILE_WRITE,
-						true,
-						space_id,
-						zip_size,
-						page_no,
-						0,
-						zip_size ? zip_size : UNIV_PAGE_SIZE,
-						page,
-						NULL,
-						0);
+				} else {
+					continue;
 				}
 			}
+
+			/* Recovered data file pages are written out
+			as uncompressed. */
+
+			IORequest	write_request(IORequest::WRITE);
+
+			write_request.disable_compression();
+
+			/* Write the good page from the doublewrite
+			buffer to the intended position. */
+
+			fil_io(write_request, true,
+			       page_id, page_size,
+			       0, page_size.physical(),
+				const_cast<byte*>(page), NULL, NULL);
+
+			ib::info()
+				<< "Recovered page "
+				<< page_id
+				<< " from the doublewrite buffer.";
 		}
 	}
 
-	fil_flush_file_spaces(FIL_TABLESPACE);
+	recv_dblwr.pages.clear();
 
-        {
-		size_t bytes = TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE;
-		byte *unaligned_buf = static_cast<byte*>(
-			ut_malloc(bytes + UNIV_PAGE_SIZE - 1));
-
-		byte *buf = static_cast<byte*>(
-			ut_align(unaligned_buf, UNIV_PAGE_SIZE));
-		memset(buf, 0, bytes);
-
-		fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0,
-			buf_dblwr->block1, 0, bytes, buf, NULL, NULL);
-		fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0,
-			buf_dblwr->block2, 0, bytes, buf, NULL, NULL);
-
-		ut_free(unaligned_buf);
-        }
+	fil_flush_file_spaces(FIL_TYPE_TABLESPACE);
+	ut_free(unaligned_read_buf);
 }
 
 /****************************************************************//**
 Frees doublewrite buffer. */
-UNIV_INTERN
 void
 buf_dblwr_free(void)
 /*================*/
@@ -677,35 +745,38 @@ buf_dblwr_free(void)
 	ut_ad(buf_dblwr->s_reserved == 0);
 	ut_ad(buf_dblwr->b_reserved == 0);
 
-	os_event_free(buf_dblwr->b_event);
-	os_event_free(buf_dblwr->s_event);
+	os_event_destroy(buf_dblwr->b_event);
+	os_event_destroy(buf_dblwr->s_event);
 	ut_free(buf_dblwr->write_buf_unaligned);
 	buf_dblwr->write_buf_unaligned = NULL;
 
-	mem_free(buf_dblwr->buf_block_arr);
+	ut_free(buf_dblwr->buf_block_arr);
 	buf_dblwr->buf_block_arr = NULL;
 
-	mem_free(buf_dblwr->in_use);
+	ut_free(buf_dblwr->in_use);
 	buf_dblwr->in_use = NULL;
 
 	mutex_free(&buf_dblwr->mutex);
-	mem_free(buf_dblwr);
+	ut_free(buf_dblwr);
 	buf_dblwr = NULL;
 }
 
 /********************************************************************//**
 Updates the doublewrite buffer when an IO request is completed. */
-UNIV_INTERN
 void
 buf_dblwr_update(
 /*=============*/
 	const buf_page_t*	bpage,	/*!< in: buffer block descriptor */
 	buf_flush_t		flush_type)/*!< in: flush type */
 {
-	if (!srv_use_doublewrite_buf || buf_dblwr == NULL) {
+	if (!srv_use_doublewrite_buf
+	    || buf_dblwr == NULL
+	    || fsp_is_system_temporary(bpage->id.space())) {
 		return;
 	}
 
+	ut_ad(!srv_read_only_mode);
+
 	switch (flush_type) {
 	case BUF_FLUSH_LIST:
 	case BUF_FLUSH_LRU:
@@ -721,7 +792,7 @@ buf_dblwr_update(
 			mutex_exit(&buf_dblwr->mutex);
 			/* This will finish the batch. Sync data files
 			to the disk. */
-			fil_flush_file_spaces(FIL_TABLESPACE);
+			fil_flush_file_spaces(FIL_TYPE_TABLESPACE);
 			mutex_enter(&buf_dblwr->mutex);
 
 			/* We can now reuse the doublewrite memory buffer: */
@@ -779,18 +850,16 @@ buf_dblwr_check_page_lsn(
 			   - FIL_PAGE_END_LSN_OLD_CHKSUM + 4),
 		   4)) {
 
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: ERROR: The page to be written"
-			" seems corrupt!\n"
-			"InnoDB: The low 4 bytes of LSN fields do not match "
-			"(" ULINTPF " != " ULINTPF ")!"
-			" Noticed in the buffer pool.\n",
-			mach_read_from_4(
-				page + FIL_PAGE_LSN + 4),
-			mach_read_from_4(
-				page + UNIV_PAGE_SIZE
-				- FIL_PAGE_END_LSN_OLD_CHKSUM + 4));
+		const ulint	lsn1 = mach_read_from_4(
+			page + FIL_PAGE_LSN + 4);
+		const ulint	lsn2 = mach_read_from_4(
+			page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM
+			+ 4);
+
+		ib::error() << "The page to be written seems corrupt!"
+			" The low 4 bytes of LSN fields do not match"
+			" (" << lsn1 << " != " << lsn2 << ")!"
+			" Noticed in the buffer pool.";
 	}
 }
 
@@ -803,21 +872,13 @@ buf_dblwr_assert_on_corrupt_block(
 /*==============================*/
 	const buf_block_t*	block)	/*!< in: block to check */
 {
-	buf_page_print(block->frame, 0, BUF_PAGE_PRINT_NO_CRASH);
+	buf_page_print(block->frame, univ_page_size, BUF_PAGE_PRINT_NO_CRASH);
 
-	ut_print_timestamp(stderr);
-	fprintf(stderr,
-		"  InnoDB: Apparent corruption of an"
-		" index page n:o %lu in space %lu\n"
-		"InnoDB: to be written to data file."
-		" We intentionally crash server\n"
-		"InnoDB: to prevent corrupt data"
-		" from ending up in data\n"
-		"InnoDB: files.\n",
-		(ulong) buf_block_get_page_no(block),
-		(ulong) buf_block_get_space(block));
-
-	ut_error;
+	ib::fatal() << "Apparent corruption of an index page "
+		<< block->page.id
+		<< " to be written to data file. We intentionally crash"
+		" the server to prevent corrupt data from ending up in"
+		" data files.";
 }
 
 /********************************************************************//**
@@ -829,26 +890,50 @@ buf_dblwr_check_block(
 /*==================*/
 	const buf_block_t*	block)	/*!< in: block to check */
 {
-	if (buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE
-	    || block->page.zip.data) {
-		/* No simple validate for compressed pages exists. */
+	ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+
+	if (block->skip_flush_check) {
 		return;
 	}
 
-	buf_dblwr_check_page_lsn(block->frame);
-
-	if (!block->check_index_page_at_flush) {
-		return;
-	}
-
-	if (page_is_comp(block->frame)) {
-		if (!page_simple_validate_new(block->frame)) {
-			buf_dblwr_assert_on_corrupt_block(block);
+	switch (fil_page_get_type(block->frame)) {
+	case FIL_PAGE_INDEX:
+	case FIL_PAGE_RTREE:
+		if (page_is_comp(block->frame)) {
+			if (page_simple_validate_new(block->frame)) {
+				return;
+			}
+		} else if (page_simple_validate_old(block->frame)) {
+			return;
 		}
-	} else if (!page_simple_validate_old(block->frame)) {
-
-		buf_dblwr_assert_on_corrupt_block(block);
+		/* While it is possible that this is not an index page
+		but just happens to have wrongly set FIL_PAGE_TYPE,
+		such pages should never be modified to without also
+		adjusting the page type during page allocation or
+		buf_flush_init_for_writing() or fil_page_reset_type(). */
+		break;
+	case FIL_PAGE_TYPE_FSP_HDR:
+	case FIL_PAGE_IBUF_BITMAP:
+	case FIL_PAGE_TYPE_UNKNOWN:
+		/* Do not complain again, we already reset this field. */
+	case FIL_PAGE_UNDO_LOG:
+	case FIL_PAGE_INODE:
+	case FIL_PAGE_IBUF_FREE_LIST:
+	case FIL_PAGE_TYPE_SYS:
+	case FIL_PAGE_TYPE_TRX_SYS:
+	case FIL_PAGE_TYPE_XDES:
+	case FIL_PAGE_TYPE_BLOB:
+	case FIL_PAGE_TYPE_ZBLOB:
+	case FIL_PAGE_TYPE_ZBLOB2:
+		/* TODO: validate also non-index pages */
+		return;
+	case FIL_PAGE_TYPE_ALLOCATED:
+		/* empty pages should never be flushed */
+		return;
+		break;
 	}
+
+	buf_dblwr_assert_on_corrupt_block(block);
 }
 
 /********************************************************************//**
@@ -862,45 +947,43 @@ buf_dblwr_write_block_to_datafile(
 	bool			sync)	/*!< in: true if sync IO
 					is requested */
 {
-	ut_a(bpage);
 	ut_a(buf_page_in_file(bpage));
 
-	const ulint flags = sync
-		? OS_FILE_WRITE
-		: OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER;
+	ulint	type = IORequest::WRITE;
 
-	void * frame = buf_page_get_frame(bpage);
-
-	if (bpage->zip.data) {
-		fil_io(flags,
-			sync,
-			buf_page_get_space(bpage),
-			buf_page_get_zip_size(bpage),
-			buf_page_get_page_no(bpage),
-			0,
-			buf_page_get_zip_size(bpage),
-			frame,
-			(void*) bpage,
-			0);
-
-		return;
+	if (sync) {
+		type |= IORequest::DO_NOT_WAKE;
 	}
 
+	IORequest	request(type);
 
-	const buf_block_t* block = (buf_block_t*) bpage;
-	ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
-	buf_dblwr_check_page_lsn(block->frame);
+	/* We request frame here to get correct buffer in case of
+	encryption and/or page compression */
+	void * frame = buf_page_get_frame(bpage);
 
-	fil_io(flags,
-		sync,
-		buf_block_get_space(block),
-		0,
-		buf_block_get_page_no(block),
-		0,
-		bpage->real_size,
-		frame,
-		(void*) block,
-		(ulint *)&bpage->write_size);
+	if (bpage->zip.data != NULL) {
+		ut_ad(bpage->size.is_compressed());
+
+		fil_io(request, sync, bpage->id, bpage->size, 0,
+		       bpage->size.physical(),
+		       (void*) frame,
+		       (void*) bpage, NULL);
+	} else {
+		ut_ad(!bpage->size.is_compressed());
+
+		/* Our IO API is common for both reads and writes and is
+		therefore geared towards a non-const parameter. */
+
+		buf_block_t*	block = reinterpret_cast<buf_block_t*>(
+			const_cast<buf_page_t*>(bpage));
+
+		ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+		buf_dblwr_check_page_lsn(block->frame);
+
+		fil_io(request,
+		       sync, bpage->id, bpage->size, 0, bpage->size.physical(),
+		       frame, block, (ulint *)&bpage->write_size);
+	}
 }
 
 /********************************************************************//**
@@ -909,7 +992,6 @@ and also wakes up the aio thread if simulated aio is used. It is very
 important to call this function after a batch of writes has been posted,
 and also when we may have to wait for a page latch! Otherwise a deadlock
 of threads can occur. */
-UNIV_INTERN
 void
 buf_dblwr_flush_buffered_writes(void)
 /*=================================*/
@@ -924,6 +1006,8 @@ buf_dblwr_flush_buffered_writes(void)
 		return;
 	}
 
+	ut_ad(!srv_read_only_mode);
+
 try_again:
 	mutex_enter(&buf_dblwr->mutex);
 
@@ -935,13 +1019,19 @@ try_again:
 
 		mutex_exit(&buf_dblwr->mutex);
 
+		/* Wake possible simulated aio thread as there could be
+		system temporary tablespace pages active for flushing.
+		Note: system temporary tablespace pages are not scheduled
+		for doublewrite. */
+		os_aio_simulated_wake_handler_threads();
+
 		return;
 	}
 
 	if (buf_dblwr->batch_running) {
 		/* Another thread is running the batch right now. Wait
 		for it to finish. */
-		ib_int64_t	sig_count = os_event_reset(buf_dblwr->b_event);
+		int64_t	sig_count = os_event_reset(buf_dblwr->b_event);
 		mutex_exit(&buf_dblwr->mutex);
 
 		os_event_wait_low(buf_dblwr->b_event, sig_count);
@@ -992,9 +1082,9 @@ try_again:
 	len = ut_min(TRX_SYS_DOUBLEWRITE_BLOCK_SIZE,
 		     buf_dblwr->first_free) * UNIV_PAGE_SIZE;
 
-	fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0,
-	       buf_dblwr->block1, 0, len,
-		(void*) write_buf, NULL, 0);
+	fil_io(IORequestWrite, true,
+	       page_id_t(TRX_SYS_SPACE, buf_dblwr->block1), univ_page_size,
+	       0, len, (void*) write_buf, NULL, NULL);
 
 	if (buf_dblwr->first_free <= TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
 		/* No unwritten pages in the second block. */
@@ -1008,9 +1098,9 @@ try_again:
 	write_buf = buf_dblwr->write_buf
 		    + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE;
 
-	fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0,
-	       buf_dblwr->block2, 0, len,
-		(void*) write_buf, NULL, 0);
+	fil_io(IORequestWrite, true,
+	       page_id_t(TRX_SYS_SPACE, buf_dblwr->block2), univ_page_size,
+	       0, len, (void*) write_buf, NULL, NULL);
 
 flush:
 	/* increment the doublewrite flushed pages counter */
@@ -1052,14 +1142,11 @@ flush:
 Posts a buffer page for writing. If the doublewrite memory buffer is
 full, calls buf_dblwr_flush_buffered_writes and waits for for free
 space to appear. */
-UNIV_INTERN
 void
 buf_dblwr_add_to_batch(
 /*====================*/
 	buf_page_t*	bpage)	/*!< in: buffer block to write */
 {
-	ulint	zip_size;
-
 	ut_a(buf_page_in_file(bpage));
 
 try_again:
@@ -1075,7 +1162,7 @@ try_again:
 		point. The only exception is when a user thread is
 		forced to do a flush batch because of a sync
 		checkpoint. */
-		ib_int64_t	sig_count = os_event_reset(buf_dblwr->b_event);
+		int64_t	sig_count = os_event_reset(buf_dblwr->b_event);
 		mutex_exit(&buf_dblwr->mutex);
 
 		os_event_wait_low(buf_dblwr->b_event, sig_count);
@@ -1090,26 +1177,29 @@ try_again:
 		goto try_again;
 	}
 
-	zip_size = buf_page_get_zip_size(bpage);
+	byte*	p = buf_dblwr->write_buf
+		+ univ_page_size.physical() * buf_dblwr->first_free;
+
+	/* We request frame here to get correct buffer in case of
+	encryption and/or page compression */
 	void * frame = buf_page_get_frame(bpage);
 
-	if (zip_size) {
-		UNIV_MEM_ASSERT_RW(bpage->zip.data, zip_size);
+	if (bpage->size.is_compressed()) {
+		UNIV_MEM_ASSERT_RW(bpage->zip.data, bpage->size.physical());
 		/* Copy the compressed page and clear the rest. */
-		memcpy(buf_dblwr->write_buf
-		       + UNIV_PAGE_SIZE * buf_dblwr->first_free,
-                       frame, zip_size);
-		memset(buf_dblwr->write_buf
-		       + UNIV_PAGE_SIZE * buf_dblwr->first_free
-		       + zip_size, 0, UNIV_PAGE_SIZE - zip_size);
+
+		memcpy(p, frame, bpage->size.physical());
+
+		memset(p + bpage->size.physical(), 0x0,
+		       univ_page_size.physical() - bpage->size.physical());
 	} else {
 		ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
-		UNIV_MEM_ASSERT_RW(((buf_block_t*) bpage)->frame,
-				   UNIV_PAGE_SIZE);
 
-		memcpy(buf_dblwr->write_buf
-		       + UNIV_PAGE_SIZE * buf_dblwr->first_free,
-		       frame, UNIV_PAGE_SIZE);
+
+		UNIV_MEM_ASSERT_RW(frame,
+				   bpage->size.logical());
+
+		memcpy(p, frame, bpage->size.logical());
 	}
 
 	buf_dblwr->buf_block_arr[buf_dblwr->first_free] = bpage;
@@ -1140,7 +1230,6 @@ flushes in the doublewrite buffer are in use we wait here for one to
 become free. We are guaranteed that a slot will become free because any
 thread that is using a slot must also release the slot before leaving
 this function. */
-UNIV_INTERN
 void
 buf_dblwr_write_single_page(
 /*========================*/
@@ -1149,7 +1238,6 @@ buf_dblwr_write_single_page(
 {
 	ulint		n_slots;
 	ulint		size;
-	ulint		zip_size;
 	ulint		offset;
 	ulint		i;
 
@@ -1183,8 +1271,7 @@ retry:
 	if (buf_dblwr->s_reserved == n_slots) {
 
 		/* All slots are reserved. */
-		ib_int64_t	sig_count =
-			os_event_reset(buf_dblwr->s_event);
+		int64_t	sig_count = os_event_reset(buf_dblwr->s_event);
 		mutex_exit(&buf_dblwr->mutex);
 		os_event_wait_low(buf_dblwr->s_event, sig_count);
 
@@ -1230,36 +1317,39 @@ retry:
 	write it. This is so because we want to pad the remaining
 	bytes in the doublewrite page with zeros. */
 
-	zip_size = buf_page_get_zip_size(bpage);
+	/* We request frame here to get correct buffer in case of
+	encryption and/or page compression */
 	void * frame = buf_page_get_frame(bpage);
 
-	if (zip_size) {
-		memcpy(buf_dblwr->write_buf + UNIV_PAGE_SIZE * i,
-		       frame, zip_size);
-		memset(buf_dblwr->write_buf + UNIV_PAGE_SIZE * i
-		       + zip_size, 0, UNIV_PAGE_SIZE - zip_size);
+	if (bpage->size.is_compressed()) {
+		memcpy(buf_dblwr->write_buf + univ_page_size.physical() * i,
+		       frame, bpage->size.physical());
 
-		fil_io(OS_FILE_WRITE,
-			true,
-			TRX_SYS_SPACE, 0,
-			offset,
-			0,
-			UNIV_PAGE_SIZE,
-			(void*) (buf_dblwr->write_buf + UNIV_PAGE_SIZE * i),
-			NULL,
-			0);
+		memset(buf_dblwr->write_buf + univ_page_size.physical() * i
+		       + bpage->size.physical(), 0x0,
+		       univ_page_size.physical() - bpage->size.physical());
+
+		fil_io(IORequestWrite,
+		       true,
+		       page_id_t(TRX_SYS_SPACE, offset),
+		       univ_page_size,
+		       0,
+		       univ_page_size.physical(),
+		       (void *)(buf_dblwr->write_buf + univ_page_size.physical() * i),
+		       NULL,
+		       NULL);
 	} else {
 		/* It is a regular page. Write it directly to the
 		doublewrite buffer */
-		fil_io(OS_FILE_WRITE,
-			true,
-			TRX_SYS_SPACE, 0,
-			offset,
-			0,
-			bpage->real_size,
-			frame,
-			NULL,
-			0);
+		fil_io(IORequestWrite,
+		       true,
+		       page_id_t(TRX_SYS_SPACE, offset),
+		       univ_page_size,
+		       0,
+		       univ_page_size.physical(),
+		       (void*) frame,
+		       NULL,
+		       NULL);
 	}
 
 	/* Now flush the doublewrite buffer data to disk */
diff --git a/storage/innobase/buf/buf0dump.cc b/storage/innobase/buf/buf0dump.cc
index 0abf7118b4f..682be386f2b 100644
--- a/storage/innobase/buf/buf0dump.cc
+++ b/storage/innobase/buf/buf0dump.cc
@@ -23,32 +23,33 @@ Implements a buffer pool dump/load.
 Created April 08, 2011 Vasil Dimov
 *******************************************************/
 
+#include "my_global.h"
+#include "my_sys.h"
+
+#include "mysql/psi/mysql_stage.h"
+#include "mysql/psi/psi.h"
+
 #include "univ.i"
 
-#include <stdarg.h> /* va_* */
-#include <string.h> /* strerror() */
-
-#include "buf0buf.h" /* buf_pool_mutex_enter(), srv_buf_pool_instances */
+#include "buf0buf.h"
 #include "buf0dump.h"
-#include "db0err.h"
-#include "dict0dict.h" /* dict_operation_lock */
-#include "os0file.h" /* OS_FILE_MAX_PATH */
-#include "os0sync.h" /* os_event* */
-#include "os0thread.h" /* os_thread_* */
-#include "srv0srv.h" /* srv_fast_shutdown, srv_buf_dump* */
-#include "srv0start.h" /* srv_shutdown_state */
-#include "sync0rw.h" /* rw_lock_s_lock() */
-#include "ut0byte.h" /* ut_ull_create() */
-#include "ut0sort.h" /* UT_SORT_FUNCTION_BODY */
+#include "dict0dict.h"
+#include "os0file.h"
+#include "os0thread.h"
+#include "srv0srv.h"
+#include "srv0start.h"
+#include "sync0rw.h"
+#include "ut0byte.h"
+
+#include <algorithm>
 
 enum status_severity {
+	STATUS_VERBOSE,
 	STATUS_INFO,
-	STATUS_NOTICE,
 	STATUS_ERR
 };
 
-#define SHUTTING_DOWN()	(UNIV_UNLIKELY(srv_shutdown_state \
-				       != SRV_SHUTDOWN_NONE))
+#define SHUTTING_DOWN()	(srv_shutdown_state != SRV_SHUTDOWN_NONE)
 
 /* Flags that tell the buffer pool dump/load thread which action should it
 take after being waked up. */
@@ -73,7 +74,6 @@ Wakes up the buffer pool dump/load thread and instructs it to start
 a dump. This function is called by MySQL code via buffer_pool_dump_now()
 and it should return immediately because the whole MySQL is frozen during
 its execution. */
-UNIV_INTERN
 void
 buf_dump_start()
 /*============*/
@@ -87,7 +87,6 @@ Wakes up the buffer pool dump/load thread and instructs it to start
 a load. This function is called by MySQL code via buffer_pool_load_now()
 and it should return immediately because the whole MySQL is frozen during
 its execution. */
-UNIV_INTERN
 void
 buf_load_start()
 /*============*/
@@ -123,7 +122,18 @@ buf_dump_status(
 		sizeof(export_vars.innodb_buffer_pool_dump_status),
 		fmt, ap);
 
-	ib_logf((ib_log_level_t) severity, "%s", export_vars.innodb_buffer_pool_dump_status);
+	switch (severity) {
+	case STATUS_INFO:
+		ib::info() << export_vars.innodb_buffer_pool_dump_status;
+		break;
+
+	case STATUS_ERR:
+		ib::error() << export_vars.innodb_buffer_pool_dump_status;
+		break;
+
+	case STATUS_VERBOSE:
+		break;
+	}
 
 	va_end(ap);
 }
@@ -154,10 +164,17 @@ buf_load_status(
 		sizeof(export_vars.innodb_buffer_pool_load_status),
 		fmt, ap);
 
-	if (severity == STATUS_NOTICE || severity == STATUS_ERR) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr, " InnoDB: %s\n",
-			export_vars.innodb_buffer_pool_load_status);
+	switch (severity) {
+	case STATUS_INFO:
+		ib::info() << export_vars.innodb_buffer_pool_load_status;
+		break;
+
+	case STATUS_ERR:
+		ib::error() << export_vars.innodb_buffer_pool_load_status;
+		break;
+
+	case STATUS_VERBOSE:
+		break;
 	}
 
 	va_end(ap);
@@ -182,6 +199,56 @@ get_buf_dump_dir()
 	return(dump_dir);
 }
 
+/** Generate the path to the buffer pool dump/load file.
+@param[out]	path		generated path
+@param[in]	path_size	size of 'path', used as in snprintf(3). */
+static
+void
+buf_dump_generate_path(
+	char*	path,
+	size_t	path_size)
+{
+	char	buf[FN_REFLEN];
+
+	ut_snprintf(buf, sizeof(buf), "%s%c%s", get_buf_dump_dir(),
+		    OS_PATH_SEPARATOR, srv_buf_dump_filename);
+
+	os_file_type_t	type;
+	bool		exists = false;
+	bool		ret;
+
+	ret = os_file_status(buf, &exists, &type);
+
+	/* For realpath() to succeed the file must exist. */
+
+	if (ret && exists) {
+		/* my_realpath() assumes the destination buffer is big enough
+		to hold FN_REFLEN bytes. */
+		ut_a(path_size >= FN_REFLEN);
+
+		my_realpath(path, buf, 0);
+	} else {
+		/* If it does not exist, then resolve only srv_data_home
+		and append srv_buf_dump_filename to it. */
+		char	srv_data_home_full[FN_REFLEN];
+
+		my_realpath(srv_data_home_full, get_buf_dump_dir(), 0);
+
+		if (srv_data_home_full[strlen(srv_data_home_full) - 1]
+		    == OS_PATH_SEPARATOR) {
+
+			ut_snprintf(path, path_size, "%s%s",
+				    srv_data_home_full,
+				    srv_buf_dump_filename);
+		} else {
+			ut_snprintf(path, path_size, "%s%c%s",
+				    srv_data_home_full,
+				    OS_PATH_SEPARATOR,
+				    srv_buf_dump_filename);
+		}
+	}
+}
+
 /*****************************************************************//**
 Perform a buffer pool dump into the file specified by
 innodb_buffer_pool_filename. If any errors occur then the value of
@@ -204,14 +271,12 @@ buf_dump(
 	ulint	i;
 	int	ret;
 
-	ut_snprintf(full_filename, sizeof(full_filename),
-		    "%s%c%s", get_buf_dump_dir(), SRV_PATH_SEPARATOR,
-		    srv_buf_dump_filename);
+	buf_dump_generate_path(full_filename, sizeof(full_filename));
 
 	ut_snprintf(tmp_filename, sizeof(tmp_filename),
 		    "%s.incomplete", full_filename);
 
-	buf_dump_status(STATUS_NOTICE, "Dumping buffer pool(s) to %s",
+	buf_dump_status(STATUS_INFO, "Dumping buffer pool(s) to %s",
 			full_filename);
 
 	f = fopen(tmp_filename, "w");
@@ -257,8 +322,8 @@ buf_dump(
 			}
 		}
 
-		dump = static_cast<buf_dump_t*>(
-			ut_malloc(n_pages * sizeof(*dump))) ;
+		dump = static_cast<buf_dump_t*>(ut_malloc_nokey(
+				n_pages * sizeof(*dump)));
 
 		if (dump == NULL) {
 			buf_pool_mutex_exit(buf_pool);
@@ -277,8 +342,8 @@ buf_dump(
 
 			ut_a(buf_page_in_file(bpage));
 
-			dump[j] = BUF_DUMP_CREATE(buf_page_get_space(bpage),
-						  buf_page_get_page_no(bpage));
+			dump[j] = BUF_DUMP_CREATE(bpage->id.space(),
+						  bpage->id.page_no());
 		}
 
 		ut_a(j == n_pages);
@@ -311,10 +376,10 @@ buf_dump(
 			    counter == limit) {
 				counter = 0;
 				buf_dump_status(
-					STATUS_INFO,
-					"Dumping buffer pool "
-					ULINTPF "/" ULINTPF ", "
-					"page " ULINTPF "/" ULINTPF,
+					STATUS_VERBOSE,
+					"Dumping buffer pool"
+					" " ULINTPF "/" ULINTPF ","
+					" page " ULINTPF "/" ULINTPF,
 					i + 1, srv_buf_pool_instances,
 					j + 1, n_pages);
 			}
@@ -357,46 +422,10 @@ buf_dump(
 
 	ut_sprintf_timestamp(now);
 
-	buf_dump_status(STATUS_NOTICE,
+	buf_dump_status(STATUS_INFO,
 			"Buffer pool(s) dump completed at %s", now);
 }
 
-/*****************************************************************//**
-Compare two buffer pool dump entries, used to sort the dump on
-space_no,page_no before loading in order to increase the chance for
-sequential IO.
-@return -1/0/1 if entry 1 is smaller/equal/bigger than entry 2 */
-static
-lint
-buf_dump_cmp(
-/*=========*/
-	const buf_dump_t	d1,	/*!< in: buffer pool dump entry 1 */
-	const buf_dump_t	d2)	/*!< in: buffer pool dump entry 2 */
-{
-	if (d1 < d2) {
-		return(-1);
-	} else if (d1 == d2) {
-		return(0);
-	} else {
-		return(1);
-	}
-}
-
-/*****************************************************************//**
-Sort a buffer pool dump on space_no, page_no. */
-static
-void
-buf_dump_sort(
-/*==========*/
-	buf_dump_t*	dump,	/*!< in/out: buffer pool dump to sort */
-	buf_dump_t*	tmp,	/*!< in/out: temp storage */
-	ulint		low,	/*!< in: lowest index (inclusive) */
-	ulint		high)	/*!< in: highest index (non-inclusive) */
-{
-	UT_SORT_FUNCTION_BODY(buf_dump_sort, dump, tmp, low, high,
-			      buf_dump_cmp);
-}
-
 /*****************************************************************//**
 Artificially delay the buffer pool loading if necessary. The idea of
 this function is to prevent hogging the server with IO and slowing down
@@ -405,7 +434,7 @@ UNIV_INLINE
 void
 buf_load_throttle_if_needed(
 /*========================*/
-	ulint*	last_check_time,	/*!< in/out: miliseconds since epoch
+	ulint*	last_check_time,	/*!< in/out: milliseconds since epoch
 					of the last time we did check if
 					throttling is needed, we do the check
 					every srv_io_capacity IO ops. */
@@ -455,7 +484,7 @@ buf_load_throttle_if_needed(
 	"cur_activity_count == *last_activity_count" check and calling
 	ut_time_ms() that often may turn out to be too expensive. */
 
-	if (elapsed_time < 1000 /* 1 sec (1000 mili secs) */) {
+	if (elapsed_time < 1000 /* 1 sec (1000 milli secs) */) {
 		os_thread_sleep((1000 - elapsed_time) * 1000 /* micro secs */);
 	}
 
@@ -478,7 +507,6 @@ buf_load()
 	char		now[32];
 	FILE*		f;
 	buf_dump_t*	dump;
-	buf_dump_t*	dump_tmp;
 	ulint		dump_n;
 	ulint		total_buffer_pools_pages;
 	ulint		i;
@@ -489,11 +517,9 @@ buf_load()
 	/* Ignore any leftovers from before */
 	buf_load_abort_flag = FALSE;
 
-	ut_snprintf(full_filename, sizeof(full_filename),
-		    "%s%c%s", get_buf_dump_dir(), SRV_PATH_SEPARATOR,
-		    srv_buf_dump_filename);
+	buf_dump_generate_path(full_filename, sizeof(full_filename));
 
-	buf_load_status(STATUS_NOTICE,
+	buf_load_status(STATUS_INFO,
 			"Loading buffer pool(s) from %s", full_filename);
 
 	f = fopen(full_filename, "r");
@@ -523,45 +549,42 @@ buf_load()
 			what = "parsing";
 		}
 		fclose(f);
-		buf_load_status(STATUS_ERR, "Error %s '%s', "
-				"unable to load buffer pool (stage 1)",
+		buf_load_status(STATUS_ERR, "Error %s '%s',"
+				" unable to load buffer pool (stage 1)",
 				what, full_filename);
 		return;
 	}
 
 	/* If dump is larger than the buffer pool(s), then we ignore the
 	extra trailing. This could happen if a dump is made, then buffer
-	pool is shrunk and then load it attempted. */
+	pool is shrunk and then load is attempted. */
 	total_buffer_pools_pages = buf_pool_get_n_pages()
 		* srv_buf_pool_instances;
 	if (dump_n > total_buffer_pools_pages) {
 		dump_n = total_buffer_pools_pages;
 	}
 
-	dump = static_cast<buf_dump_t*>(ut_malloc(dump_n * sizeof(*dump)));
+	if(dump_n != 0) {
+		dump = static_cast<buf_dump_t*>(ut_malloc_nokey(
+				dump_n * sizeof(*dump)));
+	} else {
+		fclose(f);
+		ut_sprintf_timestamp(now);
+		buf_load_status(STATUS_INFO,
+				"Buffer pool(s) load completed at %s"
+				" (%s was empty)", now, full_filename);
+		return;
+	}
 
 	if (dump == NULL) {
 		fclose(f);
 		buf_load_status(STATUS_ERR,
-				"Cannot allocate " ULINTPF " bytes: %s",
+				"Cannot allocate %lu bytes: %s",
 				(ulint) (dump_n * sizeof(*dump)),
 				strerror(errno));
 		return;
 	}
 
-	dump_tmp = static_cast<buf_dump_t*>(
-		ut_malloc(dump_n * sizeof(*dump_tmp)));
-
-	if (dump_tmp == NULL) {
-		ut_free(dump);
-		fclose(f);
-		buf_load_status(STATUS_ERR,
-				"Cannot allocate " ULINTPF " bytes: %s",
-				(ulint) (dump_n * sizeof(*dump_tmp)),
-				strerror(errno));
-		return;
-	}
-
 	rewind(f);
 
 	for (i = 0; i < dump_n && !SHUTTING_DOWN(); i++) {
@@ -575,24 +598,22 @@ buf_load()
 			/* else */
 
 			ut_free(dump);
-			ut_free(dump_tmp);
 			fclose(f);
 			buf_load_status(STATUS_ERR,
-					"Error parsing '%s', unable "
-					"to load buffer pool (stage 2)",
+					"Error parsing '%s', unable"
+					" to load buffer pool (stage 2)",
 					full_filename);
 			return;
 		}
 
 		if (space_id > ULINT32_MASK || page_no > ULINT32_MASK) {
 			ut_free(dump);
-			ut_free(dump_tmp);
 			fclose(f);
 			buf_load_status(STATUS_ERR,
-					"Error parsing '%s': bogus "
-					"space,page " ULINTPF "," ULINTPF
-					" at line " ULINTPF ", "
-					"unable to load buffer pool",
+					"Error parsing '%s': bogus"
+					" space,page " ULINTPF "," ULINTPF
+					" at line " ULINTPF ","
+					" unable to load buffer pool",
 					full_filename,
 					space_id, page_no,
 					i);
@@ -612,42 +633,107 @@ buf_load()
 	if (dump_n == 0) {
 		ut_free(dump);
 		ut_sprintf_timestamp(now);
-		buf_load_status(STATUS_NOTICE,
-				"Buffer pool(s) load completed at %s "
-				"(%s was empty)", now, full_filename);
+		buf_load_status(STATUS_INFO,
+				"Buffer pool(s) load completed at %s"
+				" (%s was empty)", now, full_filename);
 		return;
 	}
 
 	if (!SHUTTING_DOWN()) {
-		buf_dump_sort(dump, dump_tmp, 0, dump_n);
+		std::sort(dump, dump + dump_n);
 	}
 
-	ut_free(dump_tmp);
+	ulint		last_check_time = 0;
+	ulint		last_activity_cnt = 0;
 
-	ulint	last_check_time = 0;
-	ulint	last_activity_cnt = 0;
+	/* Avoid calling the expensive fil_space_acquire_silent() for each
+	page within the same tablespace. dump[] is sorted by (space, page),
+	so all pages from a given tablespace are consecutive. */
+	ulint		cur_space_id = BUF_DUMP_SPACE(dump[0]);
+	fil_space_t*	space = fil_space_acquire_silent(cur_space_id);
+	page_size_t	page_size(space ? space->flags : 0);
+
+	/* JAN: TODO: MySQL 5.7 PSI
+#ifdef HAVE_PSI_STAGE_INTERFACE
+	PSI_stage_progress*	pfs_stage_progress
+		= mysql_set_stage(srv_stage_buffer_pool_load.m_key);
+	#endif*/ /* HAVE_PSI_STAGE_INTERFACE */
+	/*
+	mysql_stage_set_work_estimated(pfs_stage_progress, dump_n);
+	mysql_stage_set_work_completed(pfs_stage_progress, 0);
+	*/
 
 	for (i = 0; i < dump_n && !SHUTTING_DOWN(); i++) {
 
-		buf_read_page_async(BUF_DUMP_SPACE(dump[i]),
-				    BUF_DUMP_PAGE(dump[i]));
+		/* space_id for this iteration of the loop */
+		const ulint	this_space_id = BUF_DUMP_SPACE(dump[i]);
+
+		if (this_space_id != cur_space_id) {
+			if (space != NULL) {
+				fil_space_release(space);
+			}
+
+			cur_space_id = this_space_id;
+			space = fil_space_acquire_silent(cur_space_id);
+
+			if (space != NULL) {
+				const page_size_t	cur_page_size(
+					space->flags);
+				page_size.copy_from(cur_page_size);
+			}
+		}
+
+		/* JAN: TODO: As we use background page read below,
+		if tablespace is encrypted we cant use it. */
+		if (space == NULL ||
+		   (space && space->crypt_data &&
+		    space->crypt_data->encryption != FIL_SPACE_ENCRYPTION_OFF &&
+		    space->crypt_data->type != CRYPT_SCHEME_UNENCRYPTED)) {
+			continue;
+		}
+
+		buf_read_page_background(
+			page_id_t(this_space_id, BUF_DUMP_PAGE(dump[i])),
+			page_size, true);
 
 		if (i % 64 == 63) {
 			os_aio_simulated_wake_handler_threads();
 		}
 
-		if (i % 128 == 0) {
-			buf_load_status(STATUS_INFO,
+		/* Update the progress every 32 MiB, which is every Nth page,
+		where N = 32*1024^2 / page_size. */
+		static const ulint	update_status_every_n_mb = 32;
+		static const ulint	update_status_every_n_pages
+			= update_status_every_n_mb * 1024 * 1024
+			/ page_size.physical();
+
+		if (i % update_status_every_n_pages == 0) {
+			buf_load_status(STATUS_VERBOSE,
 					"Loaded " ULINTPF "/" ULINTPF " pages",
 					i + 1, dump_n);
+			/* mysql_stage_set_work_completed(pfs_stage_progress,
+			i); */
 		}
 
 		if (buf_load_abort_flag) {
+			if (space != NULL) {
+				fil_space_release(space);
+			}
 			buf_load_abort_flag = FALSE;
 			ut_free(dump);
 			buf_load_status(
-				STATUS_NOTICE,
+				STATUS_INFO,
 				"Buffer pool(s) load aborted on request");
+			/* Premature end, set estimated = completed = i and
+			end the current stage event. */
+			/*
+			mysql_stage_set_work_estimated(pfs_stage_progress, i);
+			mysql_stage_set_work_completed(pfs_stage_progress,
+			i);
+			*/
+#ifdef HAVE_PSI_STAGE_INTERFACE
+			/* mysql_end_stage(); */
+#endif /* HAVE_PSI_STAGE_INTERFACE */
 			return;
 		}
 
@@ -655,19 +741,29 @@ buf_load()
 			&last_check_time, &last_activity_cnt, i);
 	}
 
+	if (space != NULL) {
+		fil_space_release(space);
+	}
+
 	ut_free(dump);
 
 	ut_sprintf_timestamp(now);
 
-	buf_load_status(STATUS_NOTICE,
+	buf_load_status(STATUS_INFO,
 			"Buffer pool(s) load completed at %s", now);
+
+	/* Make sure that estimated = completed when we end. */
+	/* mysql_stage_set_work_completed(pfs_stage_progress, dump_n); */
+	/* End the stage progress event. */
+#ifdef HAVE_PSI_STAGE_INTERFACE
+	/* mysql_end_stage(); */
+#endif /* HAVE_PSI_STAGE_INTERFACE */
 }
 
 /*****************************************************************//**
 Aborts a currently running buffer pool load. This function is called by
 MySQL code via buffer_pool_load_abort() and it should return immediately
 because the whole MySQL is frozen during its execution. */
-UNIV_INTERN
 void
 buf_load_abort()
 /*============*/
@@ -680,7 +776,7 @@ This is the main thread for buffer pool dump/load. It waits for an
 event and when waked up either performs a dump or load and sleeps
 again.
 @return this function does not return, it calls os_thread_exit() */
-extern "C" UNIV_INTERN
+extern "C"
 os_thread_ret_t
 DECLARE_THREAD(buf_dump_thread)(
 /*============================*/
@@ -688,11 +784,15 @@ DECLARE_THREAD(buf_dump_thread)(
 						required by os_thread_create */
 {
 	ut_ad(!srv_read_only_mode);
+	/* JAN: TODO: MySQL 5.7 PSI
+#ifdef UNIV_PFS_THREAD
+	pfs_register_thread(buf_dump_thread_key);
+	#endif */ /* UNIV_PFS_THREAD */
 
 	srv_buf_dump_thread_active = TRUE;
 
-	buf_dump_status(STATUS_INFO, "Dumping buffer pool(s) not yet started");
-	buf_load_status(STATUS_INFO, "Loading buffer pool(s) not yet started");
+	buf_dump_status(STATUS_VERBOSE, "Dumping of buffer pool not started");
+	buf_load_status(STATUS_VERBOSE, "Loading of buffer pool not started");
 
 	if (srv_buffer_pool_load_at_startup) {
 		buf_load();
@@ -724,7 +824,7 @@ DECLARE_THREAD(buf_dump_thread)(
 
 	/* We count the number of threads in os_thread_exit(). A created
 	thread should always use that to exit and not use return() to exit. */
-	os_thread_exit(NULL);
+	os_thread_exit();
 
 	OS_THREAD_DUMMY_RETURN;
 }
diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc
index f7721e69128..26e0da4e371 100644
--- a/storage/innobase/buf/buf0flu.cc
+++ b/storage/innobase/buf/buf0flu.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2016, Oracle and/or its affiliates
+Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2013, 2016, MariaDB Corporation
 Copyright (c) 2013, 2014, Fusion-io
 
@@ -25,6 +25,10 @@ The database buffer buf_pool flush algorithm
 Created 11/11/1995 Heikki Tuuri
 *******************************************************/
 
+#include "ha_prototypes.h"
+#include <mysql/service_thd_wait.h>
+#include <my_dbug.h>
+
 #include "buf0flu.h"
 
 #ifdef UNIV_NONINL
@@ -39,7 +43,6 @@ Created 11/11/1995 Heikki Tuuri
 #include "page0zip.h"
 #ifndef UNIV_HOTBACKUP
 #include "ut0byte.h"
-#include "ut0lst.h"
 #include "page0page.h"
 #include "fil0fil.h"
 #include "buf0lru.h"
@@ -47,12 +50,23 @@ Created 11/11/1995 Heikki Tuuri
 #include "ibuf0ibuf.h"
 #include "log0log.h"
 #include "os0file.h"
-#include "os0sync.h"
 #include "trx0sys.h"
 #include "srv0mon.h"
-#include "mysql/plugin.h"
-#include "mysql/service_thd_wait.h"
+#include "fsp0sysspace.h"
+#include "ut0stage.h"
 #include "fil0pagecompress.h"
+#ifdef UNIV_LINUX
+/* include defs for CPU time priority settings */
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+static const int buf_flush_page_cleaner_priority = -20;
+#endif /* UNIV_LINUX */
+
+/** Sleep time in microseconds for loop waiting for the oldest
+modification lsn */
+static const ulint buf_flush_wait_flushed_sleep_time = 10000;
 
 /** Number of pages flushed through non flush_list flushes. */
 static ulint buf_lru_flush_page_count = 0;
@@ -62,14 +76,125 @@ is set to TRUE by the page_cleaner thread when it is spawned and is set
 back to FALSE at shutdown by the page_cleaner as well. Therefore no
 need to protect it by a mutex. It is only ever read by the thread
 doing the shutdown */
-UNIV_INTERN ibool buf_page_cleaner_is_active = FALSE;
+bool buf_page_cleaner_is_active = false;
+
+/** Factor for scan length to determine n_pages for intended oldest LSN
+progress */
+static ulint buf_flush_lsn_scan_factor = 3;
+
+/** Average redo generation rate */
+static lsn_t lsn_avg_rate = 0;
+
+/** Target oldest LSN for the requested flush_sync */
+static lsn_t buf_flush_sync_lsn = 0;
 
 #ifdef UNIV_PFS_THREAD
-UNIV_INTERN mysql_pfs_key_t buf_page_cleaner_thread_key;
+mysql_pfs_key_t page_cleaner_thread_key;
 #endif /* UNIV_PFS_THREAD */
 
 /** Event to synchronise with the flushing. */
- os_event_t	buf_flush_event;
+os_event_t	buf_flush_event;
+
+/** State for page cleaner array slot */
+enum page_cleaner_state_t {
+	/** Not requested any yet.
+	Moved from FINISHED by the coordinator. */
+	PAGE_CLEANER_STATE_NONE = 0,
+	/** Requested but not started flushing.
+	Moved from NONE by the coordinator. */
+	PAGE_CLEANER_STATE_REQUESTED,
+	/** Flushing is on going.
+	Moved from REQUESTED by the worker. */
+	PAGE_CLEANER_STATE_FLUSHING,
+	/** Flushing was finished.
+	Moved from FLUSHING by the worker. */
+	PAGE_CLEANER_STATE_FINISHED
+};
+
+/** Page cleaner request state for each buffer pool instance */
+struct page_cleaner_slot_t {
+	page_cleaner_state_t	state;	/*!< state of the request.
+					protected by page_cleaner_t::mutex
+					if the worker thread got the slot and
+					set to PAGE_CLEANER_STATE_FLUSHING,
+					n_flushed_lru and n_flushed_list can be
+					updated only by the worker thread */
+	/* This value is set during state==PAGE_CLEANER_STATE_NONE */
+	ulint			n_pages_requested;
+					/*!< number of requested pages
+					for the slot */
+	/* These values are updated during state==PAGE_CLEANER_STATE_FLUSHING,
+	and commited with state==PAGE_CLEANER_STATE_FINISHED.
+	The consistency is protected by the 'state' */
+	ulint			n_flushed_lru;
+					/*!< number of flushed pages
+					by LRU scan flushing */
+	ulint			n_flushed_list;
+					/*!< number of flushed pages
+					by flush_list flushing */
+	bool			succeeded_list;
+					/*!< true if flush_list flushing
+					succeeded. */
+	ulint			flush_lru_time;
+					/*!< elapsed time for LRU flushing */
+	ulint			flush_list_time;
+					/*!< elapsed time for flush_list
+					flushing */
+	ulint			flush_lru_pass;
+					/*!< count to attempt LRU flushing */
+	ulint			flush_list_pass;
+					/*!< count to attempt flush_list
+					flushing */
+};
+
+/** Page cleaner structure common for all threads */
+struct page_cleaner_t {
+	ib_mutex_t		mutex;		/*!< mutex to protect whole of
+						page_cleaner_t struct and
+						page_cleaner_slot_t slots. */
+	os_event_t		is_requested;	/*!< event to activate worker
+						threads. */
+	os_event_t		is_finished;	/*!< event to signal that all
+						slots were finished. */
+	volatile ulint		n_workers;	/*!< number of worker threads
+						in existence */
+	bool			requested;	/*!< true if requested pages
+						to flush */
+	lsn_t			lsn_limit;	/*!< upper limit of LSN to be
+						flushed */
+	ulint			n_slots;	/*!< total number of slots */
+	ulint			n_slots_requested;
+						/*!< number of slots
+						in the state
+						PAGE_CLEANER_STATE_REQUESTED */
+	ulint			n_slots_flushing;
+						/*!< number of slots
+						in the state
+						PAGE_CLEANER_STATE_FLUSHING */
+	ulint			n_slots_finished;
+						/*!< number of slots
+						in the state
+						PAGE_CLEANER_STATE_FINISHED */
+	ulint			flush_time;	/*!< elapsed time to flush
+						requests for all slots */
+	ulint			flush_pass;	/*!< count to finish to flush
+						requests for all slots */
+	page_cleaner_slot_t*	slots;		/*!< pointer to the slots */
+	bool			is_running;	/*!< false if attempt
+						to shutdown */
+
+#ifdef UNIV_DEBUG
+	ulint			n_disabled_debug;
+						/*<! how many of pc threads
+						have been disabled */
+#endif /* UNIV_DEBUG */
+};
+
+static page_cleaner_t*	page_cleaner = NULL;
+
+#ifdef UNIV_DEBUG
+my_bool innodb_page_cleaner_disabled_debug;
+#endif /* UNIV_DEBUG */
 
 /** If LRU list of a buf_pool is less than this size then LRU eviction
 should not happen. This is because when we do LRU flushing we also put
@@ -80,8 +205,7 @@ in thrashing. */
 /* @} */
 
 /******************************************************************//**
-Increases flush_list size in bytes with zip_size for compressed page,
-UNIV_PAGE_SIZE for uncompressed page in inline function */
+Increases flush_list size in bytes with the page size in inline function */
 static inline
 void
 incr_flush_list_size_in_bytes(
@@ -90,15 +214,16 @@ incr_flush_list_size_in_bytes(
 	buf_pool_t*	buf_pool)	/*!< in: buffer pool instance */
 {
 	ut_ad(buf_flush_list_mutex_own(buf_pool));
-	ulint zip_size = page_zip_get_size(&block->page.zip);
-	buf_pool->stat.flush_list_bytes += zip_size ? zip_size : UNIV_PAGE_SIZE;
+
+	buf_pool->stat.flush_list_bytes += block->page.size.physical();
+
 	ut_ad(buf_pool->stat.flush_list_bytes <= buf_pool->curr_pool_size);
 }
 
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 /******************************************************************//**
 Validates the flush list.
-@return	TRUE if ok */
+@return TRUE if ok */
 static
 ibool
 buf_flush_validate_low(
@@ -107,7 +232,7 @@ buf_flush_validate_low(
 
 /******************************************************************//**
 Validates the flush list some of the time.
-@return	TRUE if ok or the check was skipped */
+@return TRUE if ok or the check was skipped */
 static
 ibool
 buf_flush_validate_skip(
@@ -138,7 +263,7 @@ buf_flush_validate_skip(
 Insert a block in the flush_rbt and returns a pointer to its
 predecessor or NULL if no predecessor. The ordering is maintained
 on the basis of the <oldest_modification, space, offset> key.
-@return	pointer to the predecessor or NULL if no predecessor. */
+@return pointer to the predecessor or NULL if no predecessor. */
 static
 buf_page_t*
 buf_flush_insert_in_flush_rbt(
@@ -201,7 +326,7 @@ buf_pool->flush_rbt.
 Note that for the purpose of flush_rbt, we only need to order blocks
 on the oldest_modification. The other two fields are used to uniquely
 identify the blocks.
-@return	 < 0 if b2 < b1, 0 if b2 == b1, > 0 if b2 > b1 */
+@return < 0 if b2 < b1, 0 if b2 == b1, > 0 if b2 > b1 */
 static
 int
 buf_flush_block_cmp(
@@ -212,13 +337,14 @@ buf_flush_block_cmp(
 	int			ret;
 	const buf_page_t*	b1 = *(const buf_page_t**) p1;
 	const buf_page_t*	b2 = *(const buf_page_t**) p2;
-#ifdef UNIV_DEBUG
-	buf_pool_t*		buf_pool = buf_pool_from_bpage(b1);
-#endif /* UNIV_DEBUG */
 
 	ut_ad(b1 != NULL);
 	ut_ad(b2 != NULL);
 
+#ifdef UNIV_DEBUG
+	buf_pool_t*	buf_pool = buf_pool_from_bpage(b1);
+#endif /* UNIV_DEBUG */
+
 	ut_ad(buf_flush_list_mutex_own(buf_pool));
 
 	ut_ad(b1->in_flush_list);
@@ -231,17 +357,16 @@ buf_flush_block_cmp(
 	}
 
 	/* If oldest_modification is same then decide on the space. */
-	ret = (int)(b2->space - b1->space);
+	ret = (int)(b2->id.space() - b1->id.space());
 
-	/* Or else decide ordering on the offset field. */
-	return(ret ? ret : (int)(b2->offset - b1->offset));
+	/* Or else decide ordering on the page number. */
+	return(ret ? ret : (int) (b2->id.page_no() - b1->id.page_no()));
 }
 
 /********************************************************************//**
 Initialize the red-black tree to speed up insertions into the flush_list
 during recovery process. Should be called at the start of recovery
 process before any page has been read/written. */
-UNIV_INTERN
 void
 buf_flush_init_flush_rbt(void)
 /*==========================*/
@@ -255,6 +380,8 @@ buf_flush_init_flush_rbt(void)
 
 		buf_flush_list_mutex_enter(buf_pool);
 
+		ut_ad(buf_pool->flush_rbt == NULL);
+
 		/* Create red black tree for speedy insertions in flush list. */
 		buf_pool->flush_rbt = rbt_create(
 			sizeof(buf_page_t*), buf_flush_block_cmp);
@@ -265,7 +392,6 @@ buf_flush_init_flush_rbt(void)
 
 /********************************************************************//**
 Frees up the red-black tree. */
-UNIV_INTERN
 void
 buf_flush_free_flush_rbt(void)
 /*==========================*/
@@ -292,7 +418,6 @@ buf_flush_free_flush_rbt(void)
 
 /********************************************************************//**
 Inserts a modified block into the flush list. */
-UNIV_INTERN
 void
 buf_flush_insert_into_flush_list(
 /*=============================*/
@@ -302,7 +427,7 @@ buf_flush_insert_into_flush_list(
 {
 	ut_ad(!buf_pool_mutex_own(buf_pool));
 	ut_ad(log_flush_order_mutex_own());
-	ut_ad(mutex_own(&block->mutex));
+	ut_ad(buf_page_mutex_own(block));
 
 	buf_flush_list_mutex_enter(buf_pool);
 
@@ -312,7 +437,7 @@ buf_flush_insert_into_flush_list(
 
 	/* If we are in the recovery then we need to update the flush
 	red-black tree as well. */
-	if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
+	if (buf_pool->flush_rbt != NULL) {
 		buf_flush_list_mutex_exit(buf_pool);
 		buf_flush_insert_sorted_into_flush_list(buf_pool, block, lsn);
 		return;
@@ -323,20 +448,23 @@ buf_flush_insert_into_flush_list(
 
 	ut_d(block->page.in_flush_list = TRUE);
 	block->page.oldest_modification = lsn;
-	UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
+
+	UT_LIST_ADD_FIRST(buf_pool->flush_list, &block->page);
+
 	incr_flush_list_size_in_bytes(block, buf_pool);
 
 #ifdef UNIV_DEBUG_VALGRIND
-	{
-		ulint	zip_size = buf_block_get_zip_size(block);
+	void*	p;
 
-		if (zip_size) {
-			UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
-		} else {
-			UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE);
-		}
+	if (block->page.size.is_compressed()) {
+		p = block->page.zip.data;
+	} else {
+		p = block->frame;
 	}
+
+	UNIV_MEM_ASSERT_RW(p, block->page.size.physical());
 #endif /* UNIV_DEBUG_VALGRIND */
+
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 	ut_a(buf_flush_validate_skip(buf_pool));
 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
@@ -348,7 +476,6 @@ buf_flush_insert_into_flush_list(
 Inserts a modified block into the flush list in the right sorted position.
 This function is used by recovery, because there the modifications do not
 necessarily come in the order of lsn's. */
-UNIV_INTERN
 void
 buf_flush_insert_sorted_into_flush_list(
 /*====================================*/
@@ -361,7 +488,7 @@ buf_flush_insert_sorted_into_flush_list(
 
 	ut_ad(!buf_pool_mutex_own(buf_pool));
 	ut_ad(log_flush_order_mutex_own());
-	ut_ad(mutex_own(&block->mutex));
+	ut_ad(buf_page_mutex_own(block));
 	ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
 
 	buf_flush_list_mutex_enter(buf_pool);
@@ -387,15 +514,15 @@ buf_flush_insert_sorted_into_flush_list(
 	block->page.oldest_modification = lsn;
 
 #ifdef UNIV_DEBUG_VALGRIND
-	{
-		ulint	zip_size = buf_block_get_zip_size(block);
+	void*	p;
 
-		if (zip_size) {
-			UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
-		} else {
-			UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE);
-		}
+	if (block->page.size.is_compressed()) {
+		p = block->page.zip.data;
+	} else {
+		p = block->frame;
 	}
+
+	UNIV_MEM_ASSERT_RW(p, block->page.size.physical());
 #endif /* UNIV_DEBUG_VALGRIND */
 
 	prev_b = NULL;
@@ -404,9 +531,9 @@ buf_flush_insert_sorted_into_flush_list(
 	should not be NULL. In a very rare boundary case it is possible
 	that the flush_rbt has already been freed by the recovery thread
 	before the last page was hooked up in the flush_list by the
-	io-handler thread. In that case we'll  just do a simple
+	io-handler thread. In that case we'll just do a simple
 	linear search in the else block. */
-	if (buf_pool->flush_rbt) {
+	if (buf_pool->flush_rbt != NULL) {
 
 		prev_b = buf_flush_insert_in_flush_rbt(&block->page);
 
@@ -414,8 +541,9 @@ buf_flush_insert_sorted_into_flush_list(
 
 		b = UT_LIST_GET_FIRST(buf_pool->flush_list);
 
-		while (b && b->oldest_modification
+		while (b != NULL && b->oldest_modification
 		       > block->page.oldest_modification) {
+
 			ut_ad(b->in_flush_list);
 			prev_b = b;
 			b = UT_LIST_GET_NEXT(list, b);
@@ -423,10 +551,9 @@ buf_flush_insert_sorted_into_flush_list(
 	}
 
 	if (prev_b == NULL) {
-		UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
+		UT_LIST_ADD_FIRST(buf_pool->flush_list, &block->page);
 	} else {
-		UT_LIST_INSERT_AFTER(list, buf_pool->flush_list,
-				     prev_b, &block->page);
+		UT_LIST_INSERT_AFTER(buf_pool->flush_list, prev_b, &block->page);
 	}
 
 	incr_flush_list_size_in_bytes(block, buf_pool);
@@ -441,8 +568,7 @@ buf_flush_insert_sorted_into_flush_list(
 /********************************************************************//**
 Returns TRUE if the file page block is immediately suitable for replacement,
 i.e., the transition FILE_PAGE => NOT_USED allowed.
-@return	TRUE if can replace immediately */
-UNIV_INTERN
+@return TRUE if can replace immediately */
 ibool
 buf_flush_ready_for_replace(
 /*========================*/
@@ -463,21 +589,15 @@ buf_flush_ready_for_replace(
 		       && buf_page_get_io_fix(bpage) == BUF_IO_NONE);
 	}
 
-	ut_print_timestamp(stderr);
-	fprintf(stderr,
-		"  InnoDB: Error: buffer block state %lu"
-		" in the LRU list!\n",
-		(ulong) buf_page_get_state(bpage));
-	ut_print_buf(stderr, bpage, sizeof(buf_page_t));
-	putc('\n', stderr);
+	ib::fatal() << "Buffer block " << bpage << " state " <<  bpage->state
+		<< " in the LRU list!";
 
 	return(FALSE);
 }
 
 /********************************************************************//**
 Returns true if the block is modified and ready for flushing.
-@return	true if can flush immediately */
-UNIV_INTERN
+@return true if can flush immediately */
 bool
 buf_flush_ready_for_flush(
 /*======================*/
@@ -517,14 +637,12 @@ buf_flush_ready_for_flush(
 
 /********************************************************************//**
 Remove a block from the flush list of modified blocks. */
-UNIV_INTERN
 void
 buf_flush_remove(
 /*=============*/
 	buf_page_t*	bpage)	/*!< in: pointer to the block in question */
 {
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
-	ulint		zip_size;
 
 	ut_ad(buf_pool_mutex_own(buf_pool));
 	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
@@ -548,18 +666,18 @@ buf_flush_remove(
 		return;
 	case BUF_BLOCK_ZIP_DIRTY:
 		buf_page_set_state(bpage, BUF_BLOCK_ZIP_PAGE);
-		UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
+		UT_LIST_REMOVE(buf_pool->flush_list, bpage);
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 		buf_LRU_insert_zip_clean(bpage);
 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 		break;
 	case BUF_BLOCK_FILE_PAGE:
-		UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
+		UT_LIST_REMOVE(buf_pool->flush_list, bpage);
 		break;
 	}
 
 	/* If the flush_rbt is active then delete from there as well. */
-	if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
+	if (buf_pool->flush_rbt != NULL) {
 		buf_flush_delete_from_flush_rbt(bpage);
 	}
 
@@ -567,8 +685,7 @@ buf_flush_remove(
 	because we assert on in_flush_list in comparison function. */
 	ut_d(bpage->in_flush_list = FALSE);
 
-	zip_size = page_zip_get_size(&bpage->zip);
-	buf_pool->stat.flush_list_bytes -= zip_size ? zip_size : UNIV_PAGE_SIZE;
+	buf_pool->stat.flush_list_bytes -= bpage->size.physical();
 
 	bpage->oldest_modification = 0;
 
@@ -576,6 +693,14 @@ buf_flush_remove(
 	ut_a(buf_flush_validate_skip(buf_pool));
 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 
+	/* If there is an observer that want to know if the asynchronous
+	flushing was done then notify it. */
+	if (bpage->flush_observer != NULL) {
+		bpage->flush_observer->notify_remove(buf_pool, bpage);
+
+		bpage->flush_observer = NULL;
+	}
+
 	buf_flush_list_mutex_exit(buf_pool);
 }
 
@@ -590,7 +715,6 @@ use the current list node (bpage) to do the list manipulation because
 the list pointers could have changed between the time that we copied
 the contents of bpage to the dpage and the flush list manipulation
 below. */
-UNIV_INTERN
 void
 buf_flush_relocate_on_flush_list(
 /*=============================*/
@@ -621,7 +745,7 @@ buf_flush_relocate_on_flush_list(
 
 	/* If recovery is active we must swap the control blocks in
 	the flush_rbt as well. */
-	if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
+	if (buf_pool->flush_rbt != NULL) {
 		buf_flush_delete_from_flush_rbt(bpage);
 		prev_b = buf_flush_insert_in_flush_rbt(dpage);
 	}
@@ -635,24 +759,18 @@ buf_flush_relocate_on_flush_list(
 	ut_d(bpage->in_flush_list = FALSE);
 
 	prev = UT_LIST_GET_PREV(list, bpage);
-	UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
+	UT_LIST_REMOVE(buf_pool->flush_list, bpage);
 
 	if (prev) {
 		ut_ad(prev->in_flush_list);
-		UT_LIST_INSERT_AFTER(
-			list,
-			buf_pool->flush_list,
-			prev, dpage);
+		UT_LIST_INSERT_AFTER( buf_pool->flush_list, prev, dpage);
 	} else {
-		UT_LIST_ADD_FIRST(
-			list,
-			buf_pool->flush_list,
-			dpage);
+		UT_LIST_ADD_FIRST(buf_pool->flush_list, dpage);
 	}
 
 	/* Just an extra check. Previous in flush_list
 	should be the same control block as in flush_rbt. */
-	ut_a(!buf_pool->flush_rbt || prev_b == prev);
+	ut_a(buf_pool->flush_rbt == NULL || prev_b == prev);
 
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 	ut_a(buf_flush_validate_low(buf_pool));
@@ -663,7 +781,6 @@ buf_flush_relocate_on_flush_list(
 
 /********************************************************************//**
 Updates the flush system data structures when a write is completed. */
-UNIV_INTERN
 void
 buf_flush_write_complete(
 /*=====================*/
@@ -679,11 +796,6 @@ buf_flush_write_complete(
 	flush_type = buf_page_get_flush_type(bpage);
 	buf_pool->n_flush[flush_type]--;
 
-#ifdef UNIV_DEBUG
-	/* fprintf(stderr, "n pending flush %lu\n",
-	buf_pool->n_flush[flush_type]); */
-#endif
-
 	if (buf_pool->n_flush[flush_type] == 0
 	    && buf_pool->init_flush[flush_type] == FALSE) {
 
@@ -696,80 +808,85 @@ buf_flush_write_complete(
 }
 #endif /* !UNIV_HOTBACKUP */
 
-/********************************************************************//**
-Calculate the checksum of a page from compressed table and update the page. */
-UNIV_INTERN
+/** Calculate the checksum of a page from compressed table and update
+the page.
+@param[in,out]	page	page to update
+@param[in]	size	compressed page size
+@param[in]	lsn	LSN to stamp on the page */
 void
 buf_flush_update_zip_checksum(
-/*==========================*/
-	buf_frame_t*	page,		/*!< in/out: Page to update */
-	ulint		zip_size,	/*!< in: Compressed page size */
-	lsn_t		lsn)		/*!< in: Lsn to stamp on the page */
+	buf_frame_t*	page,
+	ulint		size,
+	lsn_t		lsn)
 {
-	ut_a(zip_size > 0);
+	ut_a(size > 0);
 
-	ib_uint32_t	checksum = static_cast<ib_uint32_t>(
-		page_zip_calc_checksum(
-			page, zip_size,
-			static_cast<srv_checksum_algorithm_t>(
-				srv_checksum_algorithm)));
+	const uint32_t	checksum = page_zip_calc_checksum(
+		page, size,
+		static_cast<srv_checksum_algorithm_t>(srv_checksum_algorithm));
 
 	mach_write_to_8(page + FIL_PAGE_LSN, lsn);
 	memset(page + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, 0, 8);
 	mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM, checksum);
 }
 
-/********************************************************************//**
-Initializes a page for writing to the tablespace. */
-UNIV_INTERN
+/** Initialize a page for writing to the tablespace.
+@param[in]	block		buffer block; NULL if bypassing the buffer pool
+@param[in,out]	page		page frame
+@param[in,out]	page_zip_	compressed page, or NULL if uncompressed
+@param[in]	newest_lsn	newest modification LSN to the page
+@param[in]	skip_checksum	whether to disable the page checksum */
 void
 buf_flush_init_for_writing(
-/*=======================*/
-	byte*	page,		/*!< in/out: page */
-	void*	page_zip_,	/*!< in/out: compressed page, or NULL */
-	lsn_t	newest_lsn)	/*!< in: newest modification lsn
-				to the page */
+	const buf_block_t*	block,
+	byte*			page,
+	void*			page_zip_,
+	lsn_t			newest_lsn,
+	bool			skip_checksum)
 {
-	ib_uint32_t	checksum = 0 /* silence bogus gcc warning */;
+	ib_uint32_t	checksum = BUF_NO_CHECKSUM_MAGIC;
 
+	ut_ad(block == NULL || block->frame == page);
+	ut_ad(block == NULL || page_zip_ == NULL
+	      || &block->page.zip == page_zip_);
 	ut_ad(page);
 
 	if (page_zip_) {
 		page_zip_des_t*	page_zip;
-		ulint		zip_size;
+		ulint		size;
 
 		page_zip = static_cast<page_zip_des_t*>(page_zip_);
-		zip_size = page_zip_get_size(page_zip);
+		size = page_zip_get_size(page_zip);
 
-		ut_ad(zip_size);
-		ut_ad(ut_is_2pow(zip_size));
-		ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX);
+		ut_ad(size);
+		ut_ad(ut_is_2pow(size));
+		ut_ad(size <= UNIV_ZIP_SIZE_MAX);
 
-		switch (UNIV_EXPECT(fil_page_get_type(page), FIL_PAGE_INDEX)) {
+		switch (fil_page_get_type(page)) {
 		case FIL_PAGE_TYPE_ALLOCATED:
 		case FIL_PAGE_INODE:
 		case FIL_PAGE_IBUF_BITMAP:
 		case FIL_PAGE_TYPE_FSP_HDR:
 		case FIL_PAGE_TYPE_XDES:
 			/* These are essentially uncompressed pages. */
-			memcpy(page_zip->data, page, zip_size);
+			memcpy(page_zip->data, page, size);
 			/* fall through */
 		case FIL_PAGE_TYPE_ZBLOB:
 		case FIL_PAGE_TYPE_ZBLOB2:
 		case FIL_PAGE_INDEX:
+		case FIL_PAGE_RTREE:
 
 			buf_flush_update_zip_checksum(
-				page_zip->data, zip_size, newest_lsn);
+				page_zip->data, size, newest_lsn);
 
 			return;
 		}
 
-		ut_print_timestamp(stderr);
-		fputs("  InnoDB: ERROR: The compressed page to be written"
-		      " seems corrupt:", stderr);
-		ut_print_buf(stderr, page, zip_size);
+		ib::error() << "The compressed page to be written"
+			" seems corrupt:";
+		ut_print_buf(stderr, page, size);
 		fputs("\nInnoDB: Possibly older version of the page:", stderr);
-		ut_print_buf(stderr, page_zip->data, zip_size);
+		ut_print_buf(stderr, page_zip->data, size);
 		putc('\n', stderr);
 		ut_error;
 	}
@@ -780,27 +897,85 @@ buf_flush_init_for_writing(
 	mach_write_to_8(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
 			newest_lsn);
 
-	/* Store the new formula checksum */
+	if (skip_checksum) {
+		mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM, checksum);
+	} else {
+		if (block != NULL && UNIV_PAGE_SIZE == 16384) {
+			/* The page type could be garbage in old files
+			created before MySQL 5.5. Such files always
+			had a page size of 16 kilobytes. */
+			ulint	page_type = fil_page_get_type(page);
+			ulint	reset_type = page_type;
 
-	switch ((srv_checksum_algorithm_t) srv_checksum_algorithm) {
-	case SRV_CHECKSUM_ALGORITHM_CRC32:
-	case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32:
-		checksum = buf_calc_page_crc32(page);
-		mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM, checksum);
-		break;
-	case SRV_CHECKSUM_ALGORITHM_INNODB:
-	case SRV_CHECKSUM_ALGORITHM_STRICT_INNODB:
-		checksum = (ib_uint32_t) buf_calc_page_new_checksum(page);
-		mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM, checksum);
-		checksum = (ib_uint32_t) buf_calc_page_old_checksum(page);
-		break;
-	case SRV_CHECKSUM_ALGORITHM_NONE:
-	case SRV_CHECKSUM_ALGORITHM_STRICT_NONE:
-		checksum = BUF_NO_CHECKSUM_MAGIC;
-		mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM, checksum);
-		break;
-	/* no default so the compiler will emit a warning if new enum
-	is added and not handled here */
+			switch (block->page.id.page_no() % 16384) {
+			case 0:
+				reset_type = block->page.id.page_no() == 0
+					? FIL_PAGE_TYPE_FSP_HDR
+					: FIL_PAGE_TYPE_XDES;
+				break;
+			case 1:
+				reset_type = FIL_PAGE_IBUF_BITMAP;
+				break;
+			default:
+				switch (page_type) {
+				case FIL_PAGE_INDEX:
+				case FIL_PAGE_RTREE:
+				case FIL_PAGE_UNDO_LOG:
+				case FIL_PAGE_INODE:
+				case FIL_PAGE_IBUF_FREE_LIST:
+				case FIL_PAGE_TYPE_ALLOCATED:
+				case FIL_PAGE_TYPE_SYS:
+				case FIL_PAGE_TYPE_TRX_SYS:
+				case FIL_PAGE_TYPE_BLOB:
+				case FIL_PAGE_TYPE_ZBLOB:
+				case FIL_PAGE_TYPE_ZBLOB2:
+					break;
+				case FIL_PAGE_TYPE_FSP_HDR:
+				case FIL_PAGE_TYPE_XDES:
+				case FIL_PAGE_IBUF_BITMAP:
+					/* These pages should have
+					predetermined page numbers
+					(see above). */
+				default:
+					reset_type = FIL_PAGE_TYPE_UNKNOWN;
+					break;
+				}
+			}
+
+			if (UNIV_UNLIKELY(page_type != reset_type)) {
+				ib::info()
+					<< "Resetting invalid page "
+					<< block->page.id << " type "
+					<< page_type << " to "
+					<< reset_type << " when flushing.";
+				fil_page_set_type(page, reset_type);
+			}
+		}
+
+		switch ((srv_checksum_algorithm_t) srv_checksum_algorithm) {
+		case SRV_CHECKSUM_ALGORITHM_CRC32:
+		case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32:
+			checksum = buf_calc_page_crc32(page);
+			mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM,
+					checksum);
+			break;
+		case SRV_CHECKSUM_ALGORITHM_INNODB:
+		case SRV_CHECKSUM_ALGORITHM_STRICT_INNODB:
+			checksum = (ib_uint32_t) buf_calc_page_new_checksum(
+				page);
+			mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM,
+					checksum);
+			checksum = (ib_uint32_t) buf_calc_page_old_checksum(
+				page);
+			break;
+		case SRV_CHECKSUM_ALGORITHM_NONE:
+		case SRV_CHECKSUM_ALGORITHM_STRICT_NONE:
+			mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM,
+					checksum);
+			break;
+			/* no default so the compiler will emit a warning if
+			new enum is added and not handled here */
+		}
 	}
 
 	/* With the InnoDB checksum, we overwrite the first 4 bytes of
@@ -834,19 +1009,18 @@ buf_flush_write_block_low(
 	buf_flush_t	flush_type,	/*!< in: type of flush */
 	bool		sync)		/*!< in: true if sync IO request */
 {
-	ulint	zip_size	= buf_page_get_zip_size(bpage);
-	page_t*	frame		= NULL;
-	ulint space_id          = buf_page_get_space(bpage);
+	page_t*	frame = NULL;
+	ulint space_id = bpage->id.space();
 	atomic_writes_t awrites = fil_space_get_atomic_writes(space_id);
 
 #ifdef UNIV_DEBUG
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
 	ut_ad(!buf_pool_mutex_own(buf_pool));
-#endif
+#endif /* UNIV_DEBUG */
 
-#ifdef UNIV_LOG_DEBUG
-	static ibool	univ_log_debug_warned;
-#endif /* UNIV_LOG_DEBUG */
+	DBUG_PRINT("ib_buf", ("flush %s %u page %u:%u",
+			      sync ? "sync" : "async", (unsigned) flush_type,
+			      bpage->id.space(), bpage->id.page_no()));
 
 	ut_ad(buf_page_in_file(bpage));
 
@@ -857,27 +1031,21 @@ buf_flush_write_block_low(
 	LRU_list. */
 	ut_ad(!buf_pool_mutex_own(buf_pool));
 	ut_ad(!buf_flush_list_mutex_own(buf_pool));
-	ut_ad(!mutex_own(buf_page_get_mutex(bpage)));
+	ut_ad(!buf_page_get_mutex(bpage)->is_owned());
 	ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_WRITE);
 	ut_ad(bpage->oldest_modification != 0);
 
 #ifdef UNIV_IBUF_COUNT_DEBUG
-	ut_a(ibuf_count_get(bpage->space, bpage->offset) == 0);
-#endif
+	ut_a(ibuf_count_get(bpage->id) == 0);
+#endif /* UNIV_IBUF_COUNT_DEBUG */
+
 	ut_ad(bpage->newest_modification != 0);
 
-#ifdef UNIV_LOG_DEBUG
-	if (!univ_log_debug_warned) {
-		univ_log_debug_warned = TRUE;
-		fputs("Warning: cannot force log to disk if"
-		      " UNIV_LOG_DEBUG is defined!\n"
-		      "Crash recovery will not work!\n",
-		      stderr);
-	}
-#else
 	/* Force the log to the disk before writing the modified block */
-	log_write_up_to(bpage->newest_modification, LOG_WAIT_ALL_GROUPS, TRUE);
-#endif
+	if (!srv_read_only_mode) {
+		log_write_up_to(bpage->newest_modification, true);
+	}
+
 	switch (buf_page_get_state(bpage)) {
 	case BUF_BLOCK_POOL_WATCH:
 	case BUF_BLOCK_ZIP_PAGE: /* The page should be dirty. */
@@ -889,11 +1057,11 @@ buf_flush_write_block_low(
 		break;
 	case BUF_BLOCK_ZIP_DIRTY:
 		frame = bpage->zip.data;
+
 		mach_write_to_8(frame + FIL_PAGE_LSN,
 				bpage->newest_modification);
 
-		ut_a(page_zip_verify_checksum(frame, zip_size));
-
+		ut_a(page_zip_verify_checksum(frame, bpage->size.physical()));
 		memset(frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, 0, 8);
 		break;
 	case BUF_BLOCK_FILE_PAGE:
@@ -902,47 +1070,39 @@ buf_flush_write_block_low(
 			frame = ((buf_block_t*) bpage)->frame;
 		}
 
-		buf_flush_init_for_writing(((buf_block_t*) bpage)->frame,
-					   bpage->zip.data
-					   ? &bpage->zip : NULL,
-					   bpage->newest_modification);
+		buf_flush_init_for_writing(
+			reinterpret_cast<const buf_block_t*>(bpage),
+			reinterpret_cast<const buf_block_t*>(bpage)->frame,
+			bpage->zip.data ? &bpage->zip : NULL,
+			bpage->newest_modification,
+			fsp_is_checksum_disabled(bpage->id.space()));
 		break;
 	}
 
 	frame = buf_page_encrypt_before_write(bpage, frame, space_id);
 
-	if (!srv_use_doublewrite_buf || !buf_dblwr) {
-		fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
-			sync,
-			buf_page_get_space(bpage),
-			zip_size,
-			buf_page_get_page_no(bpage),
-			0,
-			zip_size ? zip_size : bpage->real_size,
-			frame,
-			bpage,
-			&bpage->write_size);
+	/* Disable use of double-write buffer for temporary tablespace.
+	Given the nature and load of temporary tablespace doublewrite buffer
+	adds an overhead during flushing. */
+
+	if (!srv_use_doublewrite_buf
+	    || buf_dblwr == NULL
+	    || srv_read_only_mode
+	    || fsp_is_system_temporary(bpage->id.space())
+	    || awrites == ATOMIC_WRITES_ON) {
+
+		ut_ad(!srv_read_only_mode
+		      || fsp_is_system_temporary(bpage->id.space()));
+
+		ulint	type = IORequest::WRITE | IORequest::DO_NOT_WAKE;
+
+		IORequest	request(type);
+
+		fil_io(request,
+		       sync, bpage->id, bpage->size, 0, bpage->size.physical(),
+			frame, bpage, NULL);
 	} else {
-
-		/* InnoDB uses doublewrite buffer and doublewrite buffer
-		is initialized. User can define do we use atomic writes
-		on a file space (table) or not. If atomic writes are
-		not used we should use doublewrite buffer and if
-		atomic writes should be used, no doublewrite buffer
-		is used. */
-
-		if (awrites == ATOMIC_WRITES_ON) {
-			fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
-				FALSE,
-				buf_page_get_space(bpage),
-				zip_size,
-				buf_page_get_page_no(bpage),
-				0,
-				zip_size ? zip_size : bpage->real_size,
-				frame,
-				bpage,
-				&bpage->write_size);
-		} else if (flush_type == BUF_FLUSH_SINGLE_PAGE) {
+		if (flush_type == BUF_FLUSH_SINGLE_PAGE) {
 			buf_dblwr_write_single_page(bpage, sync);
 		} else {
 			ut_ad(!sync);
@@ -955,7 +1115,7 @@ buf_flush_write_block_low(
 	are working on. */
 	if (sync) {
 		ut_ad(flush_type == BUF_FLUSH_SINGLE_PAGE);
-		fil_flush(buf_page_get_space(bpage));
+		fil_flush(bpage->id.space());
 
 		/* true means we want to evict this page from the
 		LRU list as well. */
@@ -975,8 +1135,7 @@ writes! NOTE: buf_pool->mutex and buf_page_get_mutex(bpage) must be
 held upon entering this function, and they will be released by this
 function if it returns true.
 @return TRUE if the page was flushed */
-UNIV_INTERN
-bool
+ibool
 buf_flush_page(
 /*===========*/
 	buf_pool_t*	buf_pool,	/*!< in: buffer pool instance */
@@ -984,47 +1143,50 @@ buf_flush_page(
 	buf_flush_t	flush_type,	/*!< in: type of flush */
 	bool		sync)		/*!< in: true if sync IO request */
 {
+	BPageMutex*	block_mutex;
+
 	ut_ad(flush_type < BUF_FLUSH_N_TYPES);
 	ut_ad(buf_pool_mutex_own(buf_pool));
 	ut_ad(buf_page_in_file(bpage));
 	ut_ad(!sync || flush_type == BUF_FLUSH_SINGLE_PAGE);
 
-	ib_mutex_t*	block_mutex = buf_page_get_mutex(bpage);
-
+	block_mutex = buf_page_get_mutex(bpage);
 	ut_ad(mutex_own(block_mutex));
 
 	ut_ad(buf_flush_ready_for_flush(bpage, flush_type));
 
-        bool            is_uncompressed;
+	bool	is_uncompressed;
 
-        is_uncompressed = (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
-        ut_ad(is_uncompressed == (block_mutex != &buf_pool->zip_mutex));
+	is_uncompressed = (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
+	ut_ad(is_uncompressed == (block_mutex != &buf_pool->zip_mutex));
 
-        ibool           flush;
-        rw_lock_t*	rw_lock;
-        bool            no_fix_count = bpage->buf_fix_count == 0;
+	ibool		flush;
+	rw_lock_t*	rw_lock;
+	bool		no_fix_count = bpage->buf_fix_count == 0;
 
-        if (!is_uncompressed) {
-                flush = TRUE;
+	if (!is_uncompressed) {
+		flush = TRUE;
 		rw_lock = NULL;
-
-	} else if (!(no_fix_count || flush_type == BUF_FLUSH_LIST)) {
-		/* This is a heuristic, to avoid expensive S attempts. */
+	} else if (!(no_fix_count || flush_type == BUF_FLUSH_LIST)
+		   || (!no_fix_count
+		       && srv_shutdown_state <= SRV_SHUTDOWN_CLEANUP
+		       && fsp_is_system_temporary(bpage->id.space()))) {
+		/* This is a heuristic, to avoid expensive SX attempts. */
+		/* For table residing in temporary tablespace sync is done
+		using IO_FIX and so before scheduling for flush ensure that
+		page is not fixed. */
 		flush = FALSE;
 	} else {
-
 		rw_lock = &reinterpret_cast<buf_block_t*>(bpage)->lock;
-
 		if (flush_type != BUF_FLUSH_LIST) {
-			flush = rw_lock_s_lock_gen_nowait(
-				rw_lock, BUF_IO_WRITE);
+			flush = rw_lock_sx_lock_nowait(rw_lock, BUF_IO_WRITE);
 		} else {
-			/* Will S lock later */
+			/* Will SX lock later */
 			flush = TRUE;
 		}
 	}
 
-        if (flush) {
+	if (flush) {
 
 		/* We are committed to flushing by the time we get here */
 
@@ -1033,33 +1195,51 @@ buf_flush_page(
 		buf_page_set_flush_type(bpage, flush_type);
 
 		if (buf_pool->n_flush[flush_type] == 0) {
-
 			os_event_reset(buf_pool->no_flush[flush_type]);
 		}
 
 		++buf_pool->n_flush[flush_type];
 
 		mutex_exit(block_mutex);
+
 		buf_pool_mutex_exit(buf_pool);
 
 		if (flush_type == BUF_FLUSH_LIST
 		    && is_uncompressed
-		    && !rw_lock_s_lock_gen_nowait(rw_lock, BUF_IO_WRITE)) {
-			/* avoiding deadlock possibility involves doublewrite
-			buffer, should flush it, because it might hold the
-			another block->lock. */
-			buf_dblwr_flush_buffered_writes();
+		    && !rw_lock_sx_lock_nowait(rw_lock, BUF_IO_WRITE)) {
 
-			rw_lock_s_lock_gen(rw_lock, BUF_IO_WRITE);
-                }
+			if (!fsp_is_system_temporary(bpage->id.space())) {
+				/* avoiding deadlock possibility involves
+				doublewrite buffer, should flush it, because
+				it might hold the another block->lock. */
+				buf_dblwr_flush_buffered_writes();
+			} else {
+				buf_dblwr_sync_datafiles();
+			}
+
+			rw_lock_sx_lock_gen(rw_lock, BUF_IO_WRITE);
+		}
+
+		/* If there is an observer that want to know if the asynchronous
+		flushing was sent then notify it.
+		Note: we set flush observer to a page with x-latch, so we can
+		guarantee that notify_flush and notify_remove are called in pair
+		with s-latch on a uncompressed page. */
+		if (bpage->flush_observer != NULL) {
+			buf_pool_mutex_enter(buf_pool);
+
+			bpage->flush_observer->notify_flush(buf_pool, bpage);
+
+			buf_pool_mutex_exit(buf_pool);
+		}
 
 		/* Even though bpage is not protected by any mutex at this
 		point, it is safe to access bpage, because it is io_fixed and
 		oldest_modification != 0.  Thus, it cannot be relocated in the
 		buffer pool or removed from flush_list or LRU_list. */
 
-                buf_flush_write_block_low(bpage, flush_type, sync);
-        }
+		buf_flush_write_block_low(bpage, flush_type, sync);
+	}
 
 	return(flush);
 }
@@ -1071,7 +1251,6 @@ NOTE: buf_pool->mutex and block->mutex must be held upon entering this
 function, and they will be released by this function after flushing.
 This is loosely based on buf_flush_batch() and buf_flush_page().
 @return TRUE if the page was flushed and the mutexes released */
-UNIV_INTERN
 ibool
 buf_flush_page_try(
 /*===============*/
@@ -1080,7 +1259,7 @@ buf_flush_page_try(
 {
 	ut_ad(buf_pool_mutex_own(buf_pool));
 	ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
-	ut_ad(mutex_own(&block->mutex));
+	ut_ad(buf_page_mutex_own(block));
 
 	if (!buf_flush_ready_for_flush(&block->page, BUF_FLUSH_SINGLE_PAGE)) {
 		return(FALSE);
@@ -1089,23 +1268,23 @@ buf_flush_page_try(
 	/* The following call will release the buffer pool and
 	block mutex. */
 	return(buf_flush_page(
-			buf_pool, &block->page, BUF_FLUSH_SINGLE_PAGE, true));
+			buf_pool, &block->page,
+			BUF_FLUSH_SINGLE_PAGE, true));
 }
 # endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
-/***********************************************************//**
-Check the page is in buffer pool and can be flushed.
-@return	true if the page can be flushed. */
+
+/** Check the page is in buffer pool and can be flushed.
+@param[in]	page_id		page id
+@param[in]	flush_type	BUF_FLUSH_LRU or BUF_FLUSH_LIST
+@return true if the page can be flushed. */
 static
 bool
 buf_flush_check_neighbor(
-/*=====================*/
-	ulint		space,		/*!< in: space id */
-	ulint		offset,		/*!< in: page offset */
-	buf_flush_t	flush_type)	/*!< in: BUF_FLUSH_LRU or
-					BUF_FLUSH_LIST */
+	const page_id_t&	page_id,
+	buf_flush_t		flush_type)
 {
 	buf_page_t*	bpage;
-	buf_pool_t*	buf_pool = buf_pool_get(space, offset);
+	buf_pool_t*	buf_pool = buf_pool_get(page_id);
 	bool		ret;
 
 	ut_ad(flush_type == BUF_FLUSH_LRU
@@ -1114,7 +1293,7 @@ buf_flush_check_neighbor(
 	buf_pool_mutex_enter(buf_pool);
 
 	/* We only want to flush pages from this buffer pool. */
-	bpage = buf_page_hash_get(buf_pool, space, offset);
+	bpage = buf_page_hash_get(buf_pool, page_id);
 
 	if (!bpage) {
 
@@ -1129,7 +1308,7 @@ buf_flush_check_neighbor(
 
 	ret = false;
 	if (flush_type != BUF_FLUSH_LRU || buf_page_is_old(bpage)) {
-		ib_mutex_t* block_mutex = buf_page_get_mutex(bpage);
+		BPageMutex* block_mutex = buf_page_get_mutex(bpage);
 
 		mutex_enter(block_mutex);
 		if (buf_flush_ready_for_flush(bpage, flush_type)) {
@@ -1142,26 +1321,25 @@ buf_flush_check_neighbor(
 	return(ret);
 }
 
-/***********************************************************//**
-Flushes to disk all flushable pages within the flush area.
-@return	number of pages flushed */
+/** Flushes to disk all flushable pages within the flush area.
+@param[in]	page_id		page id
+@param[in]	flush_type	BUF_FLUSH_LRU or BUF_FLUSH_LIST
+@param[in]	n_flushed	number of pages flushed so far in this batch
+@param[in]	n_to_flush	maximum number of pages we are allowed to flush
+@return number of pages flushed */
 static
 ulint
 buf_flush_try_neighbors(
-/*====================*/
-	ulint		space,		/*!< in: space id */
-	ulint		offset,		/*!< in: page offset */
-	buf_flush_t	flush_type,	/*!< in: BUF_FLUSH_LRU or
-					BUF_FLUSH_LIST */
-	ulint		n_flushed,	/*!< in: number of pages
-					flushed so far in this batch */
-	ulint		n_to_flush)	/*!< in: maximum number of pages
-					we are allowed to flush */
+	const page_id_t&	page_id,
+	buf_flush_t		flush_type,
+	ulint			n_flushed,
+	ulint			n_to_flush)
 {
 	ulint		i;
 	ulint		low;
 	ulint		high;
-	buf_pool_t*	buf_pool = buf_pool_get(space, offset);
+	ulint		count = 0;
+	buf_pool_t*	buf_pool = buf_pool_get(page_id);
 
 	ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
 
@@ -1169,8 +1347,8 @@ buf_flush_try_neighbors(
 	    || srv_flush_neighbors == 0) {
 		/* If there is little space or neighbor flushing is
 		not enabled then just flush the victim. */
-		low = offset;
-		high = offset + 1;
+		low = page_id.page_no();
+		high = page_id.page_no() + 1;
 	} else {
 		/* When flushed, dirty blocks are searched in
 		neighborhoods of this size, and flushed along with the
@@ -1182,27 +1360,38 @@ buf_flush_try_neighbors(
 			BUF_READ_AHEAD_AREA(buf_pool),
 			buf_pool->curr_size / 16);
 
-		low = (offset / buf_flush_area) * buf_flush_area;
-		high = (offset / buf_flush_area + 1) * buf_flush_area;
+		low = (page_id.page_no() / buf_flush_area) * buf_flush_area;
+		high = (page_id.page_no() / buf_flush_area + 1) * buf_flush_area;
 
 		if (srv_flush_neighbors == 1) {
 			/* adjust 'low' and 'high' to limit
 			   for contiguous dirty area */
-			if (offset > low) {
-				for (i = offset - 1;
-				     i >= low
-				     && buf_flush_check_neighbor(
-						space, i, flush_type);
-				     i--) {
-					/* do nothing */
+			if (page_id.page_no() > low) {
+				for (i = page_id.page_no() - 1; i >= low; i--) {
+					if (!buf_flush_check_neighbor(
+						page_id_t(page_id.space(), i),
+						flush_type)) {
+
+						break;
+					}
+
+					if (i == low) {
+						/* Avoid overwrap when low == 0
+						and calling
+						buf_flush_check_neighbor() with
+						i == (ulint) -1 */
+						i--;
+						break;
+					}
 				}
 				low = i + 1;
 			}
 
-			for (i = offset + 1;
+			for (i = page_id.page_no() + 1;
 			     i < high
 			     && buf_flush_check_neighbor(
-						space, i, flush_type);
+				     page_id_t(page_id.space(), i),
+				     flush_type);
 			     i++) {
 				/* do nothing */
 			}
@@ -1210,17 +1399,17 @@ buf_flush_try_neighbors(
 		}
 	}
 
-#ifdef UNIV_DEBUG
-	/* fprintf(stderr, "Flush area: low %lu high %lu\n", low, high); */
-#endif
-
-	if (high > fil_space_get_size(space)) {
-		high = fil_space_get_size(space);
+	const ulint	space_size = fil_space_get_size(page_id.space());
+	if (high > space_size) {
+		high = space_size;
 	}
 
-	ulint	count = 0;
+	DBUG_PRINT("ib_buf", ("flush %u:%u..%u",
+			      page_id.space(),
+			      (unsigned) low, (unsigned) high));
 
-	for (i = low; i < high; i++) {
+	for (ulint i = low; i < high; i++) {
+		buf_page_t*	bpage;
 
 		if ((count + n_flushed) >= n_to_flush) {
 
@@ -1230,19 +1419,21 @@ buf_flush_try_neighbors(
 			are flushing has not been flushed yet then
 			we'll try to flush the victim that we
 			selected originally. */
-			if (i <= offset) {
-				i = offset;
+			if (i <= page_id.page_no()) {
+				i = page_id.page_no();
 			} else {
 				break;
 			}
 		}
 
-		buf_pool = buf_pool_get(space, i);
+		const page_id_t	cur_page_id(page_id.space(), i);
+
+		buf_pool = buf_pool_get(cur_page_id);
 
 		buf_pool_mutex_enter(buf_pool);
 
 		/* We only want to flush pages from this buffer pool. */
-		buf_page_t*	bpage = buf_page_hash_get(buf_pool, space, i);
+		bpage = buf_page_hash_get(buf_pool, cur_page_id);
 
 		if (bpage == NULL) {
 
@@ -1256,70 +1447,76 @@ buf_flush_try_neighbors(
 		because the flushed blocks are soon freed */
 
 		if (flush_type != BUF_FLUSH_LRU
-		    || i == offset
+		    || i == page_id.page_no()
 		    || buf_page_is_old(bpage)) {
 
-			ib_mutex_t* block_mutex = buf_page_get_mutex(bpage);
+			BPageMutex* block_mutex = buf_page_get_mutex(bpage);
 
 			mutex_enter(block_mutex);
 
 			if (buf_flush_ready_for_flush(bpage, flush_type)
-			    && (i == offset || bpage->buf_fix_count == 0)
-			    && buf_flush_page(
+			    && (i == page_id.page_no()
+				|| bpage->buf_fix_count == 0)) {
+
+				/* We also try to flush those
+				neighbors != offset */
+
+				if (buf_flush_page(
 					buf_pool, bpage, flush_type, false)) {
 
-				++count;
+					++count;
+				} else {
+					mutex_exit(block_mutex);
+					buf_pool_mutex_exit(buf_pool);
+				}
 
 				continue;
+			} else {
+				mutex_exit(block_mutex);
 			}
-
-			mutex_exit(block_mutex);
 		}
-
 		buf_pool_mutex_exit(buf_pool);
 	}
 
-	if (count > 0) {
+	if (count > 1) {
 		MONITOR_INC_VALUE_CUMULATIVE(
-					MONITOR_FLUSH_NEIGHBOR_TOTAL_PAGE,
-					MONITOR_FLUSH_NEIGHBOR_COUNT,
-					MONITOR_FLUSH_NEIGHBOR_PAGES,
-					(count - 1));
+			MONITOR_FLUSH_NEIGHBOR_TOTAL_PAGE,
+			MONITOR_FLUSH_NEIGHBOR_COUNT,
+			MONITOR_FLUSH_NEIGHBOR_PAGES,
+			(count - 1));
 	}
 
 	return(count);
 }
 
-/********************************************************************//**
-Check if the block is modified and ready for flushing. If the the block
-is ready to flush then flush the page and try o flush its neighbors.
-
-@return	TRUE if buf_pool mutex was released during this function.
+/** Check if the block is modified and ready for flushing.
+If the the block is ready to flush then flush the page and try o flush
+its neighbors.
+@param[in]	bpage		buffer control block,
+must be buf_page_in_file(bpage)
+@param[in]	flush_type	BUF_FLUSH_LRU or BUF_FLUSH_LIST
+@param[in]	n_to_flush	number of pages to flush
+@param[in,out]	count		number of pages flushed
+@return TRUE if buf_pool mutex was released during this function.
 This does not guarantee that some pages were written as well.
 Number of pages written are incremented to the count. */
 static
-ibool
+bool
 buf_flush_page_and_try_neighbors(
-/*=============================*/
-	buf_page_t*	bpage,		/*!< in: buffer control block,
-					must be
-					buf_page_in_file(bpage) */
-	buf_flush_t	flush_type,	/*!< in: BUF_FLUSH_LRU
-					or BUF_FLUSH_LIST */
-	ulint		n_to_flush,	/*!< in: number of pages to
-					flush */
-	ulint*		count)		/*!< in/out: number of pages
-					flushed */
+	buf_page_t*		bpage,
+	buf_flush_t		flush_type,
+	ulint			n_to_flush,
+	ulint*			count)
 {
-	ibool		flushed;
-	ib_mutex_t*	block_mutex;
 #ifdef UNIV_DEBUG
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
-#endif /* UNIV_DEBUG */
 
 	ut_ad(buf_pool_mutex_own(buf_pool));
+#endif /* UNIV_DEBUG */
+
+	bool		flushed;
+	BPageMutex*	block_mutex = buf_page_get_mutex(bpage);
 
-	block_mutex = buf_page_get_mutex(bpage);
 	mutex_enter(block_mutex);
 
 	ut_a(buf_page_in_file(bpage));
@@ -1329,26 +1526,22 @@ buf_flush_page_and_try_neighbors(
 
 		buf_pool = buf_pool_from_bpage(bpage);
 
-		buf_pool_mutex_exit(buf_pool);
-
-		/* These fields are protected by both the
-		buffer pool mutex and block mutex. */
-		ulint	space = buf_page_get_space(bpage);
-		ulint	offset = buf_page_get_page_no(bpage);
+		const page_id_t	page_id = bpage->id;
 
 		mutex_exit(block_mutex);
 
+		buf_pool_mutex_exit(buf_pool);
+
 		/* Try to flush also all the neighbors */
 		*count += buf_flush_try_neighbors(
-			space, offset, flush_type, *count, n_to_flush);
+			page_id, flush_type, *count, n_to_flush);
 
 		buf_pool_mutex_enter(buf_pool);
-
 		flushed = TRUE;
-
 	} else {
 		mutex_exit(block_mutex);
-		flushed = FALSE;
+
+		flushed = false;
 	}
 
 	ut_ad(buf_pool_mutex_own(buf_pool));
@@ -1373,7 +1566,6 @@ buf_free_from_unzip_LRU_list_batch(
 	ulint		max)		/*!< in: desired number of
 					blocks in the free_list */
 {
-	buf_block_t*	block;
 	ulint		scanned = 0;
 	ulint		count = 0;
 	ulint		free_len = UT_LIST_GET_LEN(buf_pool->free);
@@ -1381,8 +1573,10 @@ buf_free_from_unzip_LRU_list_batch(
 
 	ut_ad(buf_pool_mutex_own(buf_pool));
 
-	block = UT_LIST_GET_LAST(buf_pool->unzip_LRU);
-	while (block != NULL && count < max
+	buf_block_t*	block = UT_LIST_GET_LAST(buf_pool->unzip_LRU);
+
+	while (block != NULL
+	       && count < max
 	       && free_len < srv_LRU_scan_depth
 	       && lru_len > UT_LIST_GET_LEN(buf_pool->LRU) / 10) {
 
@@ -1421,7 +1615,7 @@ The calling thread is not allowed to own any latches on pages!
 It attempts to make 'max' blocks available in the free list. Note that
 it is a best effort attempt and it is not guaranteed that after a call
 to this function there will be 'max' blocks in the free list.*/
-__attribute__((nonnull))
+
 void
 buf_flush_LRU_list_batch(
 /*=====================*/
@@ -1433,18 +1627,25 @@ buf_flush_LRU_list_batch(
 {
 	buf_page_t*	bpage;
 	ulint		scanned = 0;
+	ulint		evict_count = 0;
+	ulint		count = 0;
 	ulint		free_len = UT_LIST_GET_LEN(buf_pool->free);
 	ulint		lru_len = UT_LIST_GET_LEN(buf_pool->LRU);
+	ulint		withdraw_depth = 0;
 
 	n->flushed = 0;
 	n->evicted = 0;
 	n->unzip_LRU_evicted = 0;
-
 	ut_ad(buf_pool_mutex_own(buf_pool));
+	if (buf_pool->curr_size < buf_pool->old_size
+	    && buf_pool->withdraw_target > 0) {
+		withdraw_depth = buf_pool->withdraw_target
+				 - UT_LIST_GET_LEN(buf_pool->withdraw);
+	}
 
 	for (bpage = UT_LIST_GET_LAST(buf_pool->LRU);
-	     bpage != NULL && (n->evicted + n->flushed) < max
-	     && free_len < srv_LRU_scan_depth
+	     bpage != NULL && count + evict_count < max
+	     && free_len < srv_LRU_scan_depth + withdraw_depth
 	     && lru_len > BUF_LRU_MIN_LEN;
 	     ++scanned,
 	     bpage = buf_pool->lru_hp.get()) {
@@ -1452,23 +1653,29 @@ buf_flush_LRU_list_batch(
 		buf_page_t* prev = UT_LIST_GET_PREV(LRU, bpage);
 		buf_pool->lru_hp.set(prev);
 
-		ib_mutex_t* block_mutex = buf_page_get_mutex(bpage);
-		mutex_enter(block_mutex);
-		bool	evict = buf_flush_ready_for_replace(bpage);
-		mutex_exit(block_mutex);
+		BPageMutex*	block_mutex = buf_page_get_mutex(bpage);
 
-		if (evict) {
+		mutex_enter(block_mutex);
+
+		if (buf_flush_ready_for_replace(bpage)) {
 			/* block is ready for eviction i.e., it is
 			clean and is not IO-fixed or buffer fixed. */
+			mutex_exit(block_mutex);
 			if (buf_LRU_free_page(bpage, true)) {
-				n->evicted++;
+				++evict_count;
 			}
-		} else {
+		} else if (buf_flush_ready_for_flush(bpage, BUF_FLUSH_LRU)) {
 			/* Block is ready for flush. Dispatch an IO
 			request. The IO helper thread will put it on
 			free list in IO completion routine. */
+			mutex_exit(block_mutex);
 			buf_flush_page_and_try_neighbors(
-				bpage, BUF_FLUSH_LRU, max, &n->flushed);
+				bpage, BUF_FLUSH_LRU, max, &count);
+		} else {
+			/* Can't evict or dispatch this block. Go to
+			previous. */
+			ut_ad(buf_pool->lru_hp.is_hp(prev));
+			mutex_exit(block_mutex);
 		}
 
 		ut_ad(!mutex_own(block_mutex));
@@ -1487,6 +1694,14 @@ buf_flush_LRU_list_batch(
 
 	ut_ad(buf_pool_mutex_own(buf_pool));
 
+	if (evict_count) {
+		MONITOR_INC_VALUE_CUMULATIVE(
+			MONITOR_LRU_BATCH_EVICT_TOTAL_PAGE,
+			MONITOR_LRU_BATCH_EVICT_COUNT,
+			MONITOR_LRU_BATCH_EVICT_PAGES,
+			evict_count);
+	}
+
 	if (scanned) {
 		MONITOR_INC_VALUE_CUMULATIVE(
 			MONITOR_LRU_BATCH_SCANNED,
@@ -1499,7 +1714,7 @@ buf_flush_LRU_list_batch(
 /*******************************************************************//**
 Flush and move pages from LRU or unzip_LRU list to the free list.
 Whether LRU or unzip_LRU is used depends on the state of the system.*/
-__attribute__((nonnull))
+
 static
 void
 buf_do_LRU_batch(
@@ -1528,26 +1743,22 @@ buf_do_LRU_batch(
 	n->evicted += n->unzip_LRU_evicted;
 }
 
-/*******************************************************************//**
-This utility flushes dirty blocks from the end of the flush_list.
-the calling thread is not allowed to own any latches on pages!
+/** This utility flushes dirty blocks from the end of the flush_list.
+The calling thread is not allowed to own any latches on pages!
+@param[in]	buf_pool	buffer pool instance
+@param[in]	min_n		wished minimum mumber of blocks flushed (it is
+not guaranteed that the actual number is that big, though)
+@param[in]	lsn_limit	all blocks whose oldest_modification is smaller
+than this should be flushed (if their number does not exceed min_n)
 @return number of blocks for which the write request was queued;
 ULINT_UNDEFINED if there was a flush of the same type already
 running */
 static
 ulint
 buf_do_flush_list_batch(
-/*====================*/
-	buf_pool_t*	buf_pool,	/*!< in: buffer pool instance */
-	ulint		min_n,		/*!< in: wished minimum mumber
-					of blocks flushed (it is not
-					guaranteed that the actual
-					number is that big, though) */
-	lsn_t		lsn_limit)	/*!< all blocks whose
-					oldest_modification is smaller
-					than this should be flushed (if
-					their number does not exceed
-					min_n) */
+	buf_pool_t*		buf_pool,
+	ulint			min_n,
+	lsn_t			lsn_limit)
 {
 	ulint		count = 0;
 	ulint		scanned = 0;
@@ -1595,50 +1806,65 @@ buf_do_flush_list_batch(
 	buf_pool->flush_hp.set(NULL);
 	buf_flush_list_mutex_exit(buf_pool);
 
-	MONITOR_INC_VALUE_CUMULATIVE(MONITOR_FLUSH_BATCH_SCANNED,
-				     MONITOR_FLUSH_BATCH_SCANNED_NUM_CALL,
-				     MONITOR_FLUSH_BATCH_SCANNED_PER_CALL,
-				     scanned);
+	if (scanned) {
+		MONITOR_INC_VALUE_CUMULATIVE(
+			MONITOR_FLUSH_BATCH_SCANNED,
+			MONITOR_FLUSH_BATCH_SCANNED_NUM_CALL,
+			MONITOR_FLUSH_BATCH_SCANNED_PER_CALL,
+			scanned);
+	}
+
+	if (count) {
+		MONITOR_INC_VALUE_CUMULATIVE(
+			MONITOR_FLUSH_BATCH_TOTAL_PAGE,
+			MONITOR_FLUSH_BATCH_COUNT,
+			MONITOR_FLUSH_BATCH_PAGES,
+			count);
+	}
 
 	ut_ad(buf_pool_mutex_own(buf_pool));
 
 	return(count);
 }
 
-/*******************************************************************//**
-This utility flushes dirty blocks from the end of the LRU list or flush_list.
+/** This utility flushes dirty blocks from the end of the LRU list or
+flush_list.
 NOTE 1: in the case of an LRU flush the calling thread may own latches to
 pages: to avoid deadlocks, this function must be written so that it cannot
 end up waiting for these latches! NOTE 2: in the case of a flush list flush,
-the calling thread is not allowed to own any latches on pages! */
-__attribute__((nonnull))
+the calling thread is not allowed to own any latches on pages!
+@param[in]	buf_pool	buffer pool instance
+@param[in]	flush_type	BUF_FLUSH_LRU or BUF_FLUSH_LIST; if
+BUF_FLUSH_LIST, then the caller must not own any latches on pages
+@param[in]	min_n		wished minimum mumber of blocks flushed (it is
+not guaranteed that the actual number is that big, though)
+@param[in]	lsn_limit	in the case of BUF_FLUSH_LIST all blocks whose
+oldest_modification is smaller than this should be flushed (if their number
+does not exceed min_n), otherwise ignored */
 void
 buf_flush_batch(
-/*============*/
-	buf_pool_t*	buf_pool,	/*!< in: buffer pool instance */
-	buf_flush_t	flush_type,	/*!< in: BUF_FLUSH_LRU or
-					BUF_FLUSH_LIST; if BUF_FLUSH_LIST,
-					then the caller must not own any
-					latches on pages */
-	ulint		min_n,		/*!< in: wished minimum mumber of blocks
-					flushed (it is not guaranteed that the
-					actual number is that big, though) */
-	lsn_t		lsn_limit,	/*!< in: in the case of BUF_FLUSH_LIST
-					all blocks whose oldest_modification is
-					smaller than this should be flushed
-					(if their number does not exceed
-					min_n), otherwise ignored */
+	buf_pool_t*		buf_pool,
+	buf_flush_t		flush_type,
+	ulint			min_n,
+	lsn_t			lsn_limit,
 	flush_counters_t*	n)	/*!< out: flushed/evicted page
 					counts  */
 {
 	ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad((flush_type != BUF_FLUSH_LIST)
-	      || sync_thread_levels_empty_except_dict());
-#endif /* UNIV_SYNC_DEBUG */
+
+#ifdef UNIV_DEBUG
+	{
+		dict_sync_check	check(true);
+
+		ut_ad(flush_type != BUF_FLUSH_LIST
+		      || !sync_check_iterate(check));
+	}
+#endif /* UNIV_DEBUG */
 
 	buf_pool_mutex_enter(buf_pool);
 
+	ulint	count = 0;
+
 	/* Note: The buffer pool mutex is released and reacquired within
 	the flush functions. */
 	switch (flush_type) {
@@ -1655,38 +1881,27 @@ buf_flush_batch(
 
 	buf_pool_mutex_exit(buf_pool);
 
-#ifdef UNIV_DEBUG
-	if (buf_debug_prints && n->flushed > 0) {
-		fprintf(stderr, flush_type == BUF_FLUSH_LRU
-			? "Flushed %lu pages in LRU flush\n"
-			: "Flushed %lu pages in flush list flush\n",
-			(ulong) n->flushed);
-	}
-#endif /* UNIV_DEBUG */
+	DBUG_PRINT("ib_buf", ("flush %u completed, %u pages",
+			      unsigned(flush_type), unsigned(count)));
 }
 
 /******************************************************************//**
-Gather the aggregated stats for both flush list and LRU list flushing */
+Gather the aggregated stats for both flush list and LRU list flushing.
+@param page_count_flush	number of pages flushed from the end of the flush_list
+@param page_count_LRU	number of pages flushed from the end of the LRU list
+*/
 void
-buf_flush_common(
-/*=============*/
-	buf_flush_t	flush_type,	/*!< in: type of flush */
-	ulint		page_count)	/*!< in: number of pages flushed */
+buf_flush_stats(
+/*============*/
+	ulint		page_count_flush,
+	ulint		page_count_LRU)
 {
-	buf_dblwr_flush_buffered_writes();
+	DBUG_PRINT("ib_buf", ("flush completed, from flush_list %u pages, "
+			      "from LRU_list %u pages",
+			      unsigned(page_count_flush),
+			      unsigned(page_count_LRU)));
 
-	ut_a(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
-
-#ifdef UNIV_DEBUG
-	if (buf_debug_prints && page_count > 0) {
-		fprintf(stderr, flush_type == BUF_FLUSH_LRU
-			? "Flushed %lu pages in LRU flush\n"
-			: "Flushed %lu pages in flush list flush\n",
-			(ulong) page_count);
-	}
-#endif /* UNIV_DEBUG */
-
-	srv_stats.buf_pool_flushed.add(page_count);
+	srv_stats.buf_pool_flushed.add(page_count_flush + page_count_LRU);
 }
 
 /******************************************************************//**
@@ -1698,6 +1913,8 @@ buf_flush_start(
 	buf_flush_t	flush_type)	/*!< in: BUF_FLUSH_LRU
 					or BUF_FLUSH_LIST */
 {
+	ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
+
 	buf_pool_mutex_enter(buf_pool);
 
 	if (buf_pool->n_flush[flush_type] > 0
@@ -1712,11 +1929,28 @@ buf_flush_start(
 
 	buf_pool->init_flush[flush_type] = TRUE;
 
+	os_event_reset(buf_pool->no_flush[flush_type]);
+
 	buf_pool_mutex_exit(buf_pool);
 
 	return(TRUE);
 }
 
+/******************************************************************//**
+Gather the aggregated stats for both flush list and LRU list flushing */
+void
+buf_flush_common(
+/*=============*/
+	buf_flush_t	flush_type,	/*!< in: type of flush */
+	ulint		page_count)	/*!< in: number of pages flushed */
+{
+	buf_dblwr_flush_buffered_writes();
+
+	ut_a(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
+
+	srv_stats.buf_pool_flushed.add(page_count);
+}
+
 /******************************************************************//**
 End a buffer flush batch for LRU or flush list */
 void
@@ -1740,11 +1974,16 @@ buf_flush_end(
 	}
 
 	buf_pool_mutex_exit(buf_pool);
+
+	if (!srv_read_only_mode) {
+		buf_dblwr_flush_buffered_writes();
+	} else {
+		os_aio_simulated_wake_handler_threads();
+	}
 }
 
 /******************************************************************//**
 Waits until a flush batch of the given type ends */
-UNIV_INTERN
 void
 buf_flush_wait_batch_end(
 /*=====================*/
@@ -1773,31 +2012,116 @@ buf_flush_wait_batch_end(
 	}
 }
 
-/*******************************************************************//**
-This utility flushes dirty blocks from the end of the flush list of
-all buffer pool instances.
+/** Do flushing batch of a given type.
 NOTE: The calling thread is not allowed to own any latches on pages!
+@param[in,out]	buf_pool	buffer pool instance
+@param[in]	type		flush type
+@param[in]	min_n		wished minimum mumber of blocks flushed
+(it is not guaranteed that the actual number is that big, though)
+@param[in]	lsn_limit	in the case BUF_FLUSH_LIST all blocks whose
+oldest_modification is smaller than this should be flushed (if their number
+does not exceed min_n), otherwise ignored
+@param[out]	n_processed	the number of pages which were processed is
+passed back to caller. Ignored if NULL
+@retval true	if a batch was queued successfully.
+@retval false	if another batch of same type was already running. */
+bool
+buf_flush_do_batch(
+	buf_pool_t*		buf_pool,
+	buf_flush_t		type,
+	ulint			min_n,
+	lsn_t			lsn_limit,
+	flush_counters_t*	n)
+{
+	ut_ad(type == BUF_FLUSH_LRU || type == BUF_FLUSH_LIST);
+
+	if (n != NULL) {
+		n->flushed = 0;
+	}
+
+	if (!buf_flush_start(buf_pool, type)) {
+		return(false);
+	}
+
+	buf_flush_batch(buf_pool, type, min_n, lsn_limit, n);
+
+	buf_flush_end(buf_pool, type);
+
+	return(true);
+}
+/**
+Waits until a flush batch of the given lsn ends
+@param[in]	new_oldest	target oldest_modified_lsn to wait for */
+
+void
+buf_flush_wait_flushed(
+	lsn_t		new_oldest)
+{
+	for (ulint i = 0; i < srv_buf_pool_instances; ++i) {
+		buf_pool_t*	buf_pool;
+		lsn_t		oldest;
+
+		buf_pool = buf_pool_from_array(i);
+
+		for (;;) {
+			/* We don't need to wait for fsync of the flushed
+			blocks, because anyway we need fsync to make chekpoint.
+			So, we don't need to wait for the batch end here. */
+
+			buf_flush_list_mutex_enter(buf_pool);
+
+			buf_page_t*	bpage;
+
+			/* We don't need to wait for system temporary pages */
+			for (bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
+			     bpage != NULL
+				&& fsp_is_system_temporary(bpage->id.space());
+			     bpage = UT_LIST_GET_PREV(list, bpage)) {
+				/* Do nothing. */
+			}
+
+			if (bpage != NULL) {
+				ut_ad(bpage->in_flush_list);
+				oldest = bpage->oldest_modification;
+			} else {
+				oldest = 0;
+			}
+
+			buf_flush_list_mutex_exit(buf_pool);
+
+			if (oldest == 0 || oldest >= new_oldest) {
+				break;
+			}
+
+			/* sleep and retry */
+			os_thread_sleep(buf_flush_wait_flushed_sleep_time);
+
+			MONITOR_INC(MONITOR_FLUSH_SYNC_WAITS);
+		}
+	}
+}
+
+/** This utility flushes dirty blocks from the end of the flush list of all
+buffer pool instances.
+NOTE: The calling thread is not allowed to own any latches on pages!
+@param[in]	min_n		wished minimum mumber of blocks flushed (it is
+not guaranteed that the actual number is that big, though)
+@param[in]	lsn_limit	in the case BUF_FLUSH_LIST all blocks whose
+oldest_modification is smaller than this should be flushed (if their number
+does not exceed min_n), otherwise ignored
+@param[out]	n_processed	the number of pages which were processed is
+passed back to caller. Ignored if NULL.
 @return true if a batch was queued successfully for each buffer pool
 instance. false if another batch of same type was already running in
 at least one of the buffer pool instance */
-UNIV_INTERN
 bool
-buf_flush_list(
-/*===========*/
-	ulint		min_n,		/*!< in: wished minimum mumber of blocks
-					flushed (it is not guaranteed that the
-					actual number is that big, though) */
-	lsn_t		lsn_limit,	/*!< in the case BUF_FLUSH_LIST all
-					blocks whose oldest_modification is
-					smaller than this should be flushed
-					(if their number does not exceed
-					min_n), otherwise ignored */
-	ulint*		n_processed)	/*!< out: the number of pages
-					which were processed is passed
-					back to caller. Ignored if NULL */
-
+buf_flush_lists(
+	ulint			min_n,
+	lsn_t			lsn_limit,
+	ulint*			n_processed)
 {
 	ulint		i;
+	ulint		n_flushed = 0;
 	bool		success = true;
 
 	if (buf_mtflu_init_done()) {
@@ -1822,9 +2146,14 @@ buf_flush_list(
 		buf_pool_t*		buf_pool;
 		flush_counters_t	n;
 
+		memset(&n, 0, sizeof(flush_counters_t));
 		buf_pool = buf_pool_from_array(i);
 
-		if (!buf_flush_start(buf_pool, BUF_FLUSH_LIST)) {
+		if (!buf_flush_do_batch(buf_pool,
+					BUF_FLUSH_LIST,
+					min_n,
+					lsn_limit,
+					&n)) {
 			/* We have two choices here. If lsn_limit was
 			specified then skipping an instance of buffer
 			pool means we cannot guarantee that all pages
@@ -1839,25 +2168,14 @@ buf_flush_list(
 
 			continue;
 		}
+	}
 
-		buf_flush_batch(
-			buf_pool, BUF_FLUSH_LIST, min_n, lsn_limit, &n);
+	if (n_flushed) {
+		buf_flush_stats(n_flushed, 0);
+	}
 
-		buf_flush_end(buf_pool, BUF_FLUSH_LIST);
-
-		buf_flush_common(BUF_FLUSH_LIST, n.flushed);
-
-		if (n_processed) {
-			*n_processed += n.flushed;
-		}
-
-		if (n.flushed) {
-			MONITOR_INC_VALUE_CUMULATIVE(
-				MONITOR_FLUSH_BATCH_TOTAL_PAGE,
-				MONITOR_FLUSH_BATCH_COUNT,
-				MONITOR_FLUSH_BATCH_PAGES,
-				n.flushed);
-		}
+	if (n_processed) {
+		*n_processed = n_flushed;
 	}
 
 	return(success);
@@ -1870,9 +2188,8 @@ list and puts it on the free list. It is called from user threads when
 they are unable to find a replaceable page at the tail of the LRU
 list i.e.: when the background LRU flushing in the page_cleaner thread
 is not fast enough to keep pace with the workload.
-@return TRUE if success. */
-UNIV_INTERN
-ibool
+@return true if success. */
+bool
 buf_flush_single_page_from_LRU(
 /*===========================*/
 	buf_pool_t*	buf_pool)	/*!< in/out: buffer pool instance */
@@ -1883,48 +2200,56 @@ buf_flush_single_page_from_LRU(
 
 	buf_pool_mutex_enter(buf_pool);
 
-	for (bpage = buf_pool->single_scan_itr.start(),
-	     scanned = 0, freed = FALSE;
+	for (bpage = buf_pool->single_scan_itr.start(), scanned = 0,
+	     freed = false;
 	     bpage != NULL;
 	     ++scanned, bpage = buf_pool->single_scan_itr.get()) {
 
 		ut_ad(buf_pool_mutex_own(buf_pool));
 
-		buf_page_t* prev = UT_LIST_GET_PREV(LRU, bpage);
+		buf_page_t*	prev = UT_LIST_GET_PREV(LRU, bpage);
 		buf_pool->single_scan_itr.set(prev);
+		BPageMutex*	block_mutex;
+
+		block_mutex = buf_page_get_mutex(bpage);
 
-		ib_mutex_t* block_mutex = buf_page_get_mutex(bpage);
 		mutex_enter(block_mutex);
 
 		if (buf_flush_ready_for_replace(bpage)) {
 			/* block is ready for eviction i.e., it is
 			clean and is not IO-fixed or buffer fixed. */
 			mutex_exit(block_mutex);
+
 			if (buf_LRU_free_page(bpage, true)) {
 				buf_pool_mutex_exit(buf_pool);
-				freed = TRUE;
+				freed = true;
 				break;
 			}
+
 		} else if (buf_flush_ready_for_flush(
-				bpage, BUF_FLUSH_SINGLE_PAGE)) {
-			/* Block is ready for flush. Dispatch an IO
-			request. We'll put it on free list in IO
-			completion routine. The following call, if
-			successful, will release the buffer pool and
-			block mutex. */
-			freed = buf_flush_page(buf_pool, bpage,
-					       BUF_FLUSH_SINGLE_PAGE, true);
+				   bpage, BUF_FLUSH_SINGLE_PAGE)) {
+
+			/* Block is ready for flush. Try and dispatch an IO
+			request. We'll put it on free list in IO completion
+			routine if it is not buffer fixed. The following call
+			will release the buffer pool and block mutex.
+
+			Note: There is no guarantee that this page has actually
+			been freed, only that it has been flushed to disk */
+
+			freed = buf_flush_page(
+				buf_pool, bpage, BUF_FLUSH_SINGLE_PAGE, true);
+
 			if (freed) {
-				/* block and buffer pool mutex have
-				already been reelased. */
 				break;
 			}
+
 			mutex_exit(block_mutex);
 		} else {
 			mutex_exit(block_mutex);
 		}
+		ut_ad(!mutex_own(block_mutex));
 	}
-
 	if (!freed) {
 		/* Can't find a single flushable page. */
 		ut_ad(!bpage);
@@ -1939,10 +2264,63 @@ buf_flush_single_page_from_LRU(
 			scanned);
 	}
 
+
+
 	ut_ad(!buf_pool_mutex_own(buf_pool));
 	return(freed);
 }
 
+/**
+Clears up tail of the LRU list of a given buffer pool instance:
+* Put replaceable pages at the tail of LRU to the free list
+* Flush dirty pages at the tail of LRU to the disk
+The depth to which we scan each buffer pool is controlled by dynamic
+config parameter innodb_LRU_scan_depth.
+@param buf_pool buffer pool instance
+@return total pages flushed */
+static
+ulint
+buf_flush_LRU_list(
+	buf_pool_t*	buf_pool)
+{
+	ulint	scan_depth, withdraw_depth;
+	flush_counters_t	n;
+
+	memset(&n, 0, sizeof(flush_counters_t));
+
+	if(buf_mtflu_init_done())
+	{
+		return(buf_mtflu_flush_LRU_tail());
+	}
+
+	ut_ad(buf_pool);
+	/* srv_LRU_scan_depth can be arbitrarily large value.
+	We cap it with current LRU size. */
+	buf_pool_mutex_enter(buf_pool);
+	scan_depth = UT_LIST_GET_LEN(buf_pool->LRU);
+	if (buf_pool->curr_size < buf_pool->old_size
+	    && buf_pool->withdraw_target > 0) {
+		withdraw_depth = buf_pool->withdraw_target
+				 - UT_LIST_GET_LEN(buf_pool->withdraw);
+	} else {
+		withdraw_depth = 0;
+	}
+	buf_pool_mutex_exit(buf_pool);
+	if (withdraw_depth > srv_LRU_scan_depth) {
+		scan_depth = ut_min(withdraw_depth, scan_depth);
+	} else {
+		scan_depth = ut_min(static_cast<ulint>(srv_LRU_scan_depth),
+				    scan_depth);
+	}
+	/* Currently one of page_cleaners is the only thread
+	that can trigger an LRU flush at the same time.
+	So, it is not possible that a batch triggered during
+	last iteration is still running, */
+	buf_flush_do_batch(buf_pool, BUF_FLUSH_LRU, scan_depth,
+			   0, &n);
+
+	return(n.flushed);
+}
 /*********************************************************************//**
 Clears up tail of the LRU lists:
 * Put replaceable pages at the tail of LRU to the free list
@@ -1950,71 +2328,25 @@ Clears up tail of the LRU lists:
 The depth to which we scan each buffer pool is controlled by dynamic
 config parameter innodb_LRU_scan_depth.
 @return total pages flushed */
-UNIV_INTERN
 ulint
-buf_flush_LRU_tail(void)
-/*====================*/
+buf_flush_LRU_lists(void)
+/*=====================*/
 {
-	ulint	total_flushed = 0;
-
-	if(buf_mtflu_init_done())
-	{
-		return(buf_mtflu_flush_LRU_tail());
-	}
-
+	ulint	n_flushed = 0;
 	for (ulint i = 0; i < srv_buf_pool_instances; i++) {
 
-		buf_pool_t*	buf_pool = buf_pool_from_array(i);
-		ulint		scan_depth;
-		flush_counters_t	n;
-
-		/* srv_LRU_scan_depth can be arbitrarily large value.
-		We cap it with current LRU size. */
-		buf_pool_mutex_enter(buf_pool);
-		scan_depth = UT_LIST_GET_LEN(buf_pool->LRU);
-		buf_pool_mutex_exit(buf_pool);
-
-		scan_depth = ut_min(srv_LRU_scan_depth, scan_depth);
-
-		/* Currently page_cleaner is the only thread
-		that can trigger an LRU flush. It is possible
-		that a batch triggered during last iteration is
-		still running, */
-		if (!buf_flush_start(buf_pool, BUF_FLUSH_LRU)) {
-			continue;
-		}
-
-		buf_flush_batch(buf_pool, BUF_FLUSH_LRU, scan_depth, 0, &n);
-
-		buf_flush_end(buf_pool, BUF_FLUSH_LRU);
-
-		buf_flush_common(BUF_FLUSH_LRU, n.flushed);
-
-		if (n.flushed) {
-			MONITOR_INC_VALUE_CUMULATIVE(
-				MONITOR_LRU_BATCH_FLUSH_TOTAL_PAGE,
-				MONITOR_LRU_BATCH_FLUSH_COUNT,
-				MONITOR_LRU_BATCH_FLUSH_PAGES,
-				n.flushed);
-		}
-
-		if (n.evicted) {
-			MONITOR_INC_VALUE_CUMULATIVE(
-				MONITOR_LRU_BATCH_EVICT_TOTAL_PAGE,
-				MONITOR_LRU_BATCH_EVICT_COUNT,
-				MONITOR_LRU_BATCH_EVICT_PAGES,
-				n.evicted);
-		}
-
-		total_flushed += (n.flushed + n.evicted);
+		n_flushed += buf_flush_LRU_list(buf_pool_from_array(i));
 	}
 
-	return(total_flushed);
+	if (n_flushed) {
+		buf_flush_stats(0, n_flushed);
+	}
+
+	return(n_flushed);
 }
 
 /*********************************************************************//**
 Wait for any possible LRU flushes that are in progress to end. */
-UNIV_INTERN
 void
 buf_flush_wait_LRU_batch_end(void)
 /*==============================*/
@@ -2037,26 +2369,6 @@ buf_flush_wait_LRU_batch_end(void)
 	}
 }
 
-/*********************************************************************//**
-Flush a batch of dirty pages from the flush list
-@return number of pages flushed, 0 if no page is flushed or if another
-flush_list type batch is running */
-static
-ulint
-page_cleaner_do_flush_batch(
-/*========================*/
-	ulint		n_to_flush,	/*!< in: number of pages that
-					we should attempt to flush. */
-	lsn_t		lsn_limit)	/*!< in: LSN up to which flushing
-					must happen */
-{
-	ulint n_flushed;
-
-	buf_flush_list(n_to_flush, lsn_limit, &n_flushed);
-
-	return(n_flushed);
-}
-
 /*********************************************************************//**
 Calculates if flushing is required based on number of dirty pages in
 the buffer pool.
@@ -2066,10 +2378,11 @@ ulint
 af_get_pct_for_dirty()
 /*==================*/
 {
-	ulint dirty_pct = (ulint) buf_get_modified_ratio_pct();
+	double	dirty_pct = buf_get_modified_ratio_pct();
 
-	if (dirty_pct > 0 && srv_max_buf_pool_modified_pct == 0) {
-		return(100);
+	if (dirty_pct == 0.0) {
+		/* No pages modified */
+		return(0);
 	}
 
 	ut_a(srv_max_dirty_pages_pct_lwm
@@ -2078,16 +2391,16 @@ af_get_pct_for_dirty()
 	if (srv_max_dirty_pages_pct_lwm == 0) {
 		/* The user has not set the option to preflush dirty
 		pages as we approach the high water mark. */
-		if (dirty_pct > srv_max_buf_pool_modified_pct) {
+		if (dirty_pct >= srv_max_buf_pool_modified_pct) {
 			/* We have crossed the high water mark of dirty
 			pages In this case we start flushing at 100% of
 			innodb_io_capacity. */
 			return(100);
 		}
-	} else if (dirty_pct > srv_max_dirty_pages_pct_lwm) {
+	} else if (dirty_pct >= srv_max_dirty_pages_pct_lwm) {
 		/* We should start flushing pages gradually. */
-		return (ulint) ((dirty_pct * 100)
-		       / (srv_max_buf_pool_modified_pct + 1));
+		return(static_cast<ulint>((dirty_pct * 100)
+		       / (srv_max_buf_pool_modified_pct + 1)));
 	}
 
 	return(0);
@@ -2136,22 +2449,23 @@ af_get_pct_for_lsn(
 /*********************************************************************//**
 This function is called approximately once every second by the
 page_cleaner thread. Based on various factors it decides if there is a
-need to do flushing. If flushing is needed it is performed and the
-number of pages flushed is returned.
-@return number of pages flushed */
+need to do flushing.
+@return number of pages recommended to be flushed
+@param lsn_limit	pointer to return LSN up to which flushing must happen
+@param last_pages_in	the number of pages flushed by the last flush_list
+			flushing. */
 static
 ulint
-page_cleaner_flush_pages_if_needed(void)
+page_cleaner_flush_pages_recommendation(
 /*====================================*/
+	lsn_t*	lsn_limit,
+	ulint	last_pages_in)
 {
-	static	lsn_t		lsn_avg_rate = 0;
 	static	lsn_t		prev_lsn = 0;
-	static	lsn_t		last_lsn = 0;
 	static	ulint		sum_pages = 0;
-	static	ulint		last_pages = 0;
-	static	ulint		prev_pages = 0;
 	static	ulint		avg_page_rate = 0;
 	static	ulint		n_iterations = 0;
+	static	time_t		prev_time;
 	lsn_t			oldest_lsn;
 	lsn_t			cur_lsn;
 	lsn_t			age;
@@ -2160,7 +2474,6 @@ page_cleaner_flush_pages_if_needed(void)
 	ulint			pct_for_dirty = 0;
 	ulint			pct_for_lsn = 0;
 	ulint			pct_total = 0;
-	int			age_factor = 0;
 
 	cur_lsn = log_get_lsn_nowait();
 
@@ -2174,6 +2487,7 @@ page_cleaner_flush_pages_if_needed(void)
 	if (prev_lsn == 0) {
 		/* First time around. */
 		prev_lsn = cur_lsn;
+		prev_time = ut_time();
 		return(0);
 	}
 
@@ -2181,19 +2495,111 @@ page_cleaner_flush_pages_if_needed(void)
 		return(0);
 	}
 
+	sum_pages += last_pages_in;
+
+	time_t	curr_time = ut_time();
+	double	time_elapsed = difftime(curr_time, prev_time);
+
 	/* We update our variables every srv_flushing_avg_loops
 	iterations to smooth out transition in workload. */
-	if (++n_iterations >= srv_flushing_avg_loops) {
+	if (++n_iterations >= srv_flushing_avg_loops
+	    || time_elapsed >= srv_flushing_avg_loops) {
 
-		avg_page_rate = ((sum_pages / srv_flushing_avg_loops)
-				 + avg_page_rate) / 2;
+		if (time_elapsed < 1) {
+			time_elapsed = 1;
+		}
+
+		avg_page_rate = static_cast<ulint>(
+			((static_cast<double>(sum_pages)
+			  / time_elapsed)
+			 + avg_page_rate) / 2);
 
 		/* How much LSN we have generated since last call. */
-		lsn_rate = (cur_lsn - prev_lsn) / srv_flushing_avg_loops;
+		lsn_rate = static_cast<lsn_t>(
+			static_cast<double>(cur_lsn - prev_lsn)
+			/ time_elapsed);
 
 		lsn_avg_rate = (lsn_avg_rate + lsn_rate) / 2;
 
+
+		/* aggregate stats of all slots */
+		mutex_enter(&page_cleaner->mutex);
+
+		ulint	flush_tm = page_cleaner->flush_time;
+		ulint	flush_pass = page_cleaner->flush_pass;
+
+		page_cleaner->flush_time = 0;
+		page_cleaner->flush_pass = 0;
+
+		ulint	lru_tm = 0;
+		ulint	list_tm = 0;
+		ulint	lru_pass = 0;
+		ulint	list_pass = 0;
+
+		for (ulint i = 0; i < page_cleaner->n_slots; i++) {
+			page_cleaner_slot_t*	slot;
+
+			slot = &page_cleaner->slots[i];
+
+			lru_tm    += slot->flush_lru_time;
+			lru_pass  += slot->flush_lru_pass;
+			list_tm   += slot->flush_list_time;
+			list_pass += slot->flush_list_pass;
+
+			slot->flush_lru_time  = 0;
+			slot->flush_lru_pass  = 0;
+			slot->flush_list_time = 0;
+			slot->flush_list_pass = 0;
+		}
+
+		mutex_exit(&page_cleaner->mutex);
+
+		/* minimum values are 1, to avoid dividing by zero. */
+		if (lru_tm < 1) {
+			lru_tm = 1;
+		}
+		if (list_tm < 1) {
+			list_tm = 1;
+		}
+		if (flush_tm < 1) {
+			flush_tm = 1;
+		}
+
+		if (lru_pass < 1) {
+			lru_pass = 1;
+		}
+		if (list_pass < 1) {
+			list_pass = 1;
+		}
+		if (flush_pass < 1) {
+			flush_pass = 1;
+		}
+
+		MONITOR_SET(MONITOR_FLUSH_ADAPTIVE_AVG_TIME_SLOT,
+			    list_tm / list_pass);
+		MONITOR_SET(MONITOR_LRU_BATCH_FLUSH_AVG_TIME_SLOT,
+			    lru_tm  / lru_pass);
+
+		MONITOR_SET(MONITOR_FLUSH_ADAPTIVE_AVG_TIME_THREAD,
+			    list_tm / (srv_n_page_cleaners * flush_pass));
+		MONITOR_SET(MONITOR_LRU_BATCH_FLUSH_AVG_TIME_THREAD,
+			    lru_tm / (srv_n_page_cleaners * flush_pass));
+		MONITOR_SET(MONITOR_FLUSH_ADAPTIVE_AVG_TIME_EST,
+			    flush_tm * list_tm / flush_pass
+			    / (list_tm + lru_tm));
+		MONITOR_SET(MONITOR_LRU_BATCH_FLUSH_AVG_TIME_EST,
+			    flush_tm * lru_tm / flush_pass
+			    / (list_tm + lru_tm));
+		MONITOR_SET(MONITOR_FLUSH_AVG_TIME, flush_tm / flush_pass);
+
+		MONITOR_SET(MONITOR_FLUSH_ADAPTIVE_AVG_PASS,
+			    list_pass / page_cleaner->n_slots);
+		MONITOR_SET(MONITOR_LRU_BATCH_FLUSH_AVG_PASS,
+			    lru_pass / page_cleaner->n_slots);
+		MONITOR_SET(MONITOR_FLUSH_AVG_PASS, flush_pass);
+
 		prev_lsn = cur_lsn;
+		prev_time = curr_time;
 
 		n_iterations = 0;
 
@@ -2211,54 +2617,96 @@ page_cleaner_flush_pages_if_needed(void)
 
 	pct_total = ut_max(pct_for_dirty, pct_for_lsn);
 
+	/* Estimate pages to be flushed for the lsn progress */
+	ulint	sum_pages_for_lsn = 0;
+	lsn_t	target_lsn = oldest_lsn
+			     + lsn_avg_rate * buf_flush_lsn_scan_factor;
+
+	for (ulint i = 0; i < srv_buf_pool_instances; i++) {
+		buf_pool_t*	buf_pool = buf_pool_from_array(i);
+		ulint		pages_for_lsn = 0;
+
+		buf_flush_list_mutex_enter(buf_pool);
+		for (buf_page_t* b = UT_LIST_GET_LAST(buf_pool->flush_list);
+		     b != NULL;
+		     b = UT_LIST_GET_PREV(list, b)) {
+			if (b->oldest_modification > target_lsn) {
+				break;
+			}
+			++pages_for_lsn;
+		}
+		buf_flush_list_mutex_exit(buf_pool);
+
+		sum_pages_for_lsn += pages_for_lsn;
+
+		mutex_enter(&page_cleaner->mutex);
+		ut_ad(page_cleaner->slots[i].state
+		      == PAGE_CLEANER_STATE_NONE);
+		page_cleaner->slots[i].n_pages_requested
+			= pages_for_lsn / buf_flush_lsn_scan_factor + 1;
+		mutex_exit(&page_cleaner->mutex);
+	}
+
+	sum_pages_for_lsn /= buf_flush_lsn_scan_factor;
+	if(sum_pages_for_lsn < 1) {
+		sum_pages_for_lsn = 1;
+	}
+
 	/* Cap the maximum IO capacity that we are going to use by
-	max_io_capacity. */
-	n_pages = (PCT_IO(pct_total) + avg_page_rate) / 2;
+	max_io_capacity. Limit the value to avoid too quick increase */
+	ulint	pages_for_lsn =
+		std::min<ulint>(sum_pages_for_lsn, srv_max_io_capacity * 2);
+
+	n_pages = (PCT_IO(pct_total) + avg_page_rate + pages_for_lsn) / 3;
 
 	if (n_pages > srv_max_io_capacity) {
 		n_pages = srv_max_io_capacity;
 	}
 
-	if (last_pages && cur_lsn - last_lsn > lsn_avg_rate / 2) {
-		age_factor = static_cast<int>(prev_pages / last_pages);
+	/* Normalize request for each instance */
+	mutex_enter(&page_cleaner->mutex);
+	ut_ad(page_cleaner->n_slots_requested == 0);
+	ut_ad(page_cleaner->n_slots_flushing == 0);
+	ut_ad(page_cleaner->n_slots_finished == 0);
+
+	for (ulint i = 0; i < srv_buf_pool_instances; i++) {
+		/* if REDO has enough of free space,
+		don't care about age distribution of pages */
+		page_cleaner->slots[i].n_pages_requested = pct_for_lsn > 30 ?
+			page_cleaner->slots[i].n_pages_requested
+			* n_pages / sum_pages_for_lsn + 1
+			: n_pages / srv_buf_pool_instances;
 	}
+	mutex_exit(&page_cleaner->mutex);
 
 	MONITOR_SET(MONITOR_FLUSH_N_TO_FLUSH_REQUESTED, n_pages);
 
-	prev_pages = n_pages;
-	n_pages = page_cleaner_do_flush_batch(
-		n_pages, oldest_lsn + lsn_avg_rate * (age_factor + 1));
-
-	last_lsn= cur_lsn;
-	last_pages= n_pages + 1;
+	MONITOR_SET(MONITOR_FLUSH_N_TO_FLUSH_BY_AGE, sum_pages_for_lsn);
 
 	MONITOR_SET(MONITOR_FLUSH_AVG_PAGE_RATE, avg_page_rate);
 	MONITOR_SET(MONITOR_FLUSH_LSN_AVG_RATE, lsn_avg_rate);
 	MONITOR_SET(MONITOR_FLUSH_PCT_FOR_DIRTY, pct_for_dirty);
 	MONITOR_SET(MONITOR_FLUSH_PCT_FOR_LSN, pct_for_lsn);
 
-	if (n_pages) {
-		MONITOR_INC_VALUE_CUMULATIVE(
-			MONITOR_FLUSH_ADAPTIVE_TOTAL_PAGE,
-			MONITOR_FLUSH_ADAPTIVE_COUNT,
-			MONITOR_FLUSH_ADAPTIVE_PAGES,
-			n_pages);
-
-		sum_pages += n_pages;
-	}
+	*lsn_limit = LSN_MAX;
 
 	return(n_pages);
 }
 
 /*********************************************************************//**
 Puts the page_cleaner thread to sleep if it has finished work in less
-than a second */
+than a second
+@retval 0 wake up by event set,
+@retval OS_SYNC_TIME_EXCEEDED if timeout was exceeded
+@param next_loop_time	time when next loop iteration should start
+@param sig_count	zero or the value returned by previous call of
+			os_event_reset() */
 static
-void
-page_cleaner_sleep_if_needed(
-/*=========================*/
-	ulint	next_loop_time)	/*!< in: time when next loop iteration
-				should start */
+ulint
+pc_sleep_if_needed(
+/*===============*/
+	ulint		next_loop_time,
+	int64_t		sig_count)
 {
 	ulint	cur_time = ut_time_ms();
 
@@ -2267,24 +2715,419 @@ page_cleaner_sleep_if_needed(
 		ut_min() to avoid long sleep in case of wrap around. */
 		ulint	sleep_us;
 
-		sleep_us = ut_min(1000000, (next_loop_time - cur_time) * 1000);
+		sleep_us = ut_min(static_cast<ulint>(1000000),
+				  (next_loop_time - cur_time) * 1000);
 
-		ib_int64_t	sig_count = os_event_reset(buf_flush_event);
-
-		os_event_wait_time_low(buf_flush_event, sleep_us, sig_count);
+		return(os_event_wait_time_low(buf_flush_event,
+					      sleep_us, sig_count));
 	}
+
+	return(OS_SYNC_TIME_EXCEEDED);
 }
 
+/******************************************************************//**
+Initialize page_cleaner. */
+void
+buf_flush_page_cleaner_init(void)
+/*=============================*/
+{
+	ut_ad(page_cleaner == NULL);
 
+	page_cleaner = static_cast<page_cleaner_t*>(
+		ut_zalloc_nokey(sizeof(*page_cleaner)));
+
+	mutex_create(LATCH_ID_PAGE_CLEANER, &page_cleaner->mutex);
+
+	page_cleaner->is_requested = os_event_create("pc_is_requested");
+	page_cleaner->is_finished = os_event_create("pc_is_finished");
+
+	page_cleaner->n_slots = static_cast<ulint>(srv_buf_pool_instances);
+
+	page_cleaner->slots = static_cast<page_cleaner_slot_t*>(
+		ut_zalloc_nokey(page_cleaner->n_slots
+				* sizeof(*page_cleaner->slots)));
+
+	ut_d(page_cleaner->n_disabled_debug = 0);
+
+	page_cleaner->is_running = true;
+}
+
+/**
+Close page_cleaner. */
+static
+void
+buf_flush_page_cleaner_close(void)
+{
+	/* waiting for all worker threads exit */
+	while (page_cleaner->n_workers > 0) {
+		os_thread_sleep(10000);
+	}
+
+	mutex_destroy(&page_cleaner->mutex);
+
+	ut_free(page_cleaner->slots);
+
+	os_event_destroy(page_cleaner->is_finished);
+	os_event_destroy(page_cleaner->is_requested);
+
+	ut_free(page_cleaner);
+
+	page_cleaner = NULL;
+}
+
+/**
+Requests for all slots to flush all buffer pool instances.
+@param min_n	wished minimum mumber of blocks flushed
+		(it is not guaranteed that the actual number is that big)
+@param lsn_limit in the case BUF_FLUSH_LIST all blocks whose
+		oldest_modification is smaller than this should be flushed
+		(if their number does not exceed min_n), otherwise ignored
+*/
+static
+void
+pc_request(
+	ulint		min_n,
+	lsn_t		lsn_limit)
+{
+	if (min_n != ULINT_MAX) {
+		/* Ensure that flushing is spread evenly amongst the
+		buffer pool instances. When min_n is ULINT_MAX
+		we need to flush everything up to the lsn limit
+		so no limit here. */
+		min_n = (min_n + srv_buf_pool_instances - 1)
+			/ srv_buf_pool_instances;
+	}
+
+	mutex_enter(&page_cleaner->mutex);
+
+	ut_ad(page_cleaner->n_slots_requested == 0);
+	ut_ad(page_cleaner->n_slots_flushing == 0);
+	ut_ad(page_cleaner->n_slots_finished == 0);
+
+	page_cleaner->requested = (min_n > 0);
+	page_cleaner->lsn_limit = lsn_limit;
+
+	for (ulint i = 0; i < page_cleaner->n_slots; i++) {
+		page_cleaner_slot_t* slot = &page_cleaner->slots[i];
+
+		ut_ad(slot->state == PAGE_CLEANER_STATE_NONE);
+
+		if (min_n == ULINT_MAX) {
+			slot->n_pages_requested = ULINT_MAX;
+		} else if (min_n == 0) {
+			slot->n_pages_requested = 0;
+		}
+
+		/* slot->n_pages_requested was already set by
+		page_cleaner_flush_pages_recommendation() */
+
+		slot->state = PAGE_CLEANER_STATE_REQUESTED;
+	}
+
+	page_cleaner->n_slots_requested = page_cleaner->n_slots;
+	page_cleaner->n_slots_flushing = 0;
+	page_cleaner->n_slots_finished = 0;
+
+	os_event_set(page_cleaner->is_requested);
+
+	mutex_exit(&page_cleaner->mutex);
+}
+
+/**
+Do flush for one slot.
+@return	the number of the slots which has not been treated yet. */
+static
+ulint
+pc_flush_slot(void)
+{
+	ulint	lru_tm = 0;
+	ulint	list_tm = 0;
+	int	lru_pass = 0;
+	int	list_pass = 0;
+
+	mutex_enter(&page_cleaner->mutex);
+
+	if (page_cleaner->n_slots_requested > 0) {
+		page_cleaner_slot_t*	slot = NULL;
+		ulint			i;
+
+		for (i = 0; i < page_cleaner->n_slots; i++) {
+			slot = &page_cleaner->slots[i];
+
+			if (slot->state == PAGE_CLEANER_STATE_REQUESTED) {
+				break;
+			}
+		}
+
+		/* slot should be found because
+		page_cleaner->n_slots_requested > 0 */
+		ut_a(i < page_cleaner->n_slots);
+
+		buf_pool_t* buf_pool = buf_pool_from_array(i);
+
+		page_cleaner->n_slots_requested--;
+		page_cleaner->n_slots_flushing++;
+		slot->state = PAGE_CLEANER_STATE_FLUSHING;
+
+		if (page_cleaner->n_slots_requested == 0) {
+			os_event_reset(page_cleaner->is_requested);
+		}
+
+		if (!page_cleaner->is_running) {
+			slot->n_flushed_lru = 0;
+			slot->n_flushed_list = 0;
+			goto finish_mutex;
+		}
+
+		mutex_exit(&page_cleaner->mutex);
+
+		lru_tm = ut_time_ms();
+
+		/* Flush pages from end of LRU if required */
+		slot->n_flushed_lru = buf_flush_LRU_list(buf_pool);
+
+		lru_tm = ut_time_ms() - lru_tm;
+		lru_pass++;
+
+		if (!page_cleaner->is_running) {
+			slot->n_flushed_list = 0;
+			goto finish;
+		}
+
+		/* Flush pages from flush_list if required */
+		if (page_cleaner->requested) {
+			flush_counters_t n;
+			memset(&n, 0, sizeof(flush_counters_t));
+			list_tm = ut_time_ms();
+
+			slot->succeeded_list = buf_flush_do_batch(
+				buf_pool, BUF_FLUSH_LIST,
+				slot->n_pages_requested,
+				page_cleaner->lsn_limit,
+				&n);
+
+			slot->n_flushed_list = n.flushed;
+
+			list_tm = ut_time_ms() - list_tm;
+			list_pass++;
+		} else {
+			slot->n_flushed_list = 0;
+			slot->succeeded_list = true;
+		}
+finish:
+		mutex_enter(&page_cleaner->mutex);
+finish_mutex:
+		page_cleaner->n_slots_flushing--;
+		page_cleaner->n_slots_finished++;
+		slot->state = PAGE_CLEANER_STATE_FINISHED;
+
+		slot->flush_lru_time += lru_tm;
+		slot->flush_list_time += list_tm;
+		slot->flush_lru_pass += lru_pass;
+		slot->flush_list_pass += list_pass;
+
+		if (page_cleaner->n_slots_requested == 0
+		    && page_cleaner->n_slots_flushing == 0) {
+			os_event_set(page_cleaner->is_finished);
+		}
+	}
+
+	ulint	ret = page_cleaner->n_slots_requested;
+
+	mutex_exit(&page_cleaner->mutex);
+
+	return(ret);
+}
+
+/**
+Wait until all flush requests are finished.
+@param n_flushed_lru	number of pages flushed from the end of the LRU list.
+@param n_flushed_list	number of pages flushed from the end of the
+			flush_list.
+@return			true if all flush_list flushing batch were success. */
+static
+bool
+pc_wait_finished(
+	ulint*	n_flushed_lru,
+	ulint*	n_flushed_list)
+{
+	bool	all_succeeded = true;
+
+	*n_flushed_lru = 0;
+	*n_flushed_list = 0;
+
+	os_event_wait(page_cleaner->is_finished);
+
+	mutex_enter(&page_cleaner->mutex);
+
+	ut_ad(page_cleaner->n_slots_requested == 0);
+	ut_ad(page_cleaner->n_slots_flushing == 0);
+	ut_ad(page_cleaner->n_slots_finished == page_cleaner->n_slots);
+
+	for (ulint i = 0; i < page_cleaner->n_slots; i++) {
+		page_cleaner_slot_t* slot = &page_cleaner->slots[i];
+
+		ut_ad(slot->state == PAGE_CLEANER_STATE_FINISHED);
+
+		*n_flushed_lru += slot->n_flushed_lru;
+		*n_flushed_list += slot->n_flushed_list;
+		all_succeeded &= slot->succeeded_list;
+
+		slot->state = PAGE_CLEANER_STATE_NONE;
+
+		slot->n_pages_requested = 0;
+	}
+
+	page_cleaner->n_slots_finished = 0;
+
+	os_event_reset(page_cleaner->is_finished);
+
+	mutex_exit(&page_cleaner->mutex);
+
+	return(all_succeeded);
+}
+
+#ifdef UNIV_LINUX
+/**
+Set priority for page_cleaner threads.
+@param[in]	priority	priority intended to set
+@return	true if set as intended */
+static
+bool
+buf_flush_page_cleaner_set_priority(
+	int	priority)
+{
+	setpriority(PRIO_PROCESS, (pid_t)syscall(SYS_gettid),
+		    priority);
+	return(getpriority(PRIO_PROCESS, (pid_t)syscall(SYS_gettid))
+	       == priority);
+}
+#endif /* UNIV_LINUX */
+
+#ifdef UNIV_DEBUG
+/** Loop used to disable page cleaner threads. */
+static
+void
+buf_flush_page_cleaner_disabled_loop(void)
+{
+	ut_ad(page_cleaner != NULL);
+
+	if (!innodb_page_cleaner_disabled_debug) {
+		/* We return to avoid entering and exiting mutex. */
+		return;
+	}
+
+	mutex_enter(&page_cleaner->mutex);
+	page_cleaner->n_disabled_debug++;
+	mutex_exit(&page_cleaner->mutex);
+
+	while (innodb_page_cleaner_disabled_debug
+	       && srv_shutdown_state == SRV_SHUTDOWN_NONE
+	       && page_cleaner->is_running) {
+
+		os_thread_sleep(100000); /* [A] */
+	}
+
+	/* We need to wait for threads exiting here, otherwise we would
+	encounter problem when we quickly perform following steps:
+		1) SET GLOBAL innodb_page_cleaner_disabled_debug = 1;
+		2) SET GLOBAL innodb_page_cleaner_disabled_debug = 0;
+		3) SET GLOBAL innodb_page_cleaner_disabled_debug = 1;
+	That's because after step 1 this thread could still be sleeping
+	inside the loop above at [A] and steps 2, 3 could happen before
+	this thread wakes up from [A]. In such case this thread would
+	not re-increment n_disabled_debug and we would be waiting for
+	him forever in buf_flush_page_cleaner_disabled_debug_update(...).
+
+	Therefore we are waiting in step 2 for this thread exiting here. */
+
+	mutex_enter(&page_cleaner->mutex);
+	page_cleaner->n_disabled_debug--;
+	mutex_exit(&page_cleaner->mutex);
+}
+
+/** Disables page cleaner threads (coordinator and workers).
+It's used by: SET GLOBAL innodb_page_cleaner_disabled_debug = 1 (0).
+@param[in]	thd		thread handle
+@param[in]	var		pointer to system variable
+@param[out]	var_ptr		where the formal string goes
+@param[in]	save		immediate result from check function */
+void
+buf_flush_page_cleaner_disabled_debug_update(
+	THD*				thd,
+	struct st_mysql_sys_var*	var,
+	void*				var_ptr,
+	const void*			save)
+{
+	if (page_cleaner == NULL) {
+		return;
+	}
+
+	if (!*static_cast<const my_bool*>(save)) {
+		if (!innodb_page_cleaner_disabled_debug) {
+			return;
+		}
+
+		innodb_page_cleaner_disabled_debug = false;
+
+		/* Enable page cleaner threads. */
+		while (srv_shutdown_state == SRV_SHUTDOWN_NONE) {
+			mutex_enter(&page_cleaner->mutex);
+			const ulint n = page_cleaner->n_disabled_debug;
+			mutex_exit(&page_cleaner->mutex);
+			/* Check if all threads have been enabled, to avoid
+			problem when we decide to re-disable them soon. */
+			if (n == 0) {
+				break;
+			}
+		}
+		return;
+	}
+
+	if (innodb_page_cleaner_disabled_debug) {
+		return;
+	}
+
+	innodb_page_cleaner_disabled_debug = true;
+
+	while (srv_shutdown_state == SRV_SHUTDOWN_NONE) {
+		/* Workers are possibly sleeping on is_requested.
+
+		We have to wake them, otherwise they could possibly
+		have never noticed, that they should be disabled,
+		and we would wait for them here forever.
+
+		That's why we have sleep-loop instead of simply
+		waiting on some disabled_debug_event. */
+		os_event_set(page_cleaner->is_requested);
+
+		mutex_enter(&page_cleaner->mutex);
+
+		ut_ad(page_cleaner->n_disabled_debug
+		      <= srv_n_page_cleaners);
+
+		if (page_cleaner->n_disabled_debug
+		    == srv_n_page_cleaners) {
+
+			mutex_exit(&page_cleaner->mutex);
+			break;
+		}
+
+		mutex_exit(&page_cleaner->mutex);
+
+		os_thread_sleep(100000);
+	}
+}
+#endif /* UNIV_DEBUG */
 
 /******************************************************************//**
 page_cleaner thread tasked with flushing dirty pages from the buffer
-pools. As of now we'll have only one instance of this thread.
+pools. As of now we'll have only one coordinator.
 @return a dummy parameter */
-extern "C" UNIV_INTERN
+extern "C"
 os_thread_ret_t
-DECLARE_THREAD(buf_flush_page_cleaner_thread)(
-/*==========================================*/
+DECLARE_THREAD(buf_flush_page_cleaner_coordinator)(
+/*===============================================*/
 	void*	arg MY_ATTRIBUTE((unused)))
 			/*!< in: a dummy parameter required by
 			os_thread_create */
@@ -2292,35 +3135,248 @@ DECLARE_THREAD(buf_flush_page_cleaner_thread)(
 	ulint	next_loop_time = ut_time_ms() + 1000;
 	ulint	n_flushed = 0;
 	ulint	last_activity = srv_get_activity_count();
+	ulint	last_pages = 0;
 
-	ut_ad(!srv_read_only_mode);
-
+	my_thread_init();
 #ifdef UNIV_PFS_THREAD
-	pfs_register_thread(buf_page_cleaner_thread_key);
+	pfs_register_thread(page_cleaner_thread_key);
 #endif /* UNIV_PFS_THREAD */
 
 #ifdef UNIV_DEBUG_THREAD_CREATION
-	fprintf(stderr, "InnoDB: page_cleaner thread running, id %lu\n",
-		os_thread_pf(os_thread_get_curr_id()));
+	ib::info() << "page_cleaner thread running, id "
+		<< os_thread_pf(os_thread_get_curr_id());
 #endif /* UNIV_DEBUG_THREAD_CREATION */
-	buf_page_cleaner_is_active = TRUE;
+#ifdef UNIV_LINUX
+	/* linux might be able to set different setting for each thread.
+	worth to try to set high priority for page cleaner threads */
+	if (buf_flush_page_cleaner_set_priority(
+		buf_flush_page_cleaner_priority)) {
+
+		ib::info() << "page_cleaner coordinator priority: "
+			<< buf_flush_page_cleaner_priority;
+	} else {
+		ib::info() << "If the mysqld execution user is authorized,"
+		" page cleaner thread priority can be changed."
+		" See the man page of setpriority().";
+	}
+#endif /* UNIV_LINUX */
+
+	buf_page_cleaner_is_active = true;
+
+	while (!srv_read_only_mode
+	       && srv_shutdown_state == SRV_SHUTDOWN_NONE
+	       && recv_sys->heap != NULL) {
+		/* treat flushing requests during recovery. */
+		ulint	n_flushed_lru = 0;
+		ulint	n_flushed_list = 0;
+
+		os_event_wait(recv_sys->flush_start);
+
+		if (srv_shutdown_state != SRV_SHUTDOWN_NONE
+		    || recv_sys->heap == NULL) {
+			break;
+		}
+
+		switch (recv_sys->flush_type) {
+		case BUF_FLUSH_LRU:
+			/* Flush pages from end of LRU if required */
+			pc_request(0, LSN_MAX);
+			while (pc_flush_slot() > 0) {}
+			pc_wait_finished(&n_flushed_lru, &n_flushed_list);
+			break;
+
+		case BUF_FLUSH_LIST:
+			/* Flush all pages */
+			do {
+				pc_request(ULINT_MAX, LSN_MAX);
+				while (pc_flush_slot() > 0) {}
+			} while (!pc_wait_finished(&n_flushed_lru,
+						   &n_flushed_list));
+			break;
+
+		default:
+			ut_ad(0);
+		}
+
+		os_event_reset(recv_sys->flush_start);
+		os_event_set(recv_sys->flush_end);
+	}
+
+	os_event_wait(buf_flush_event);
+
+	ulint		ret_sleep = 0;
+	ulint		n_evicted = 0;
+	ulint		n_flushed_last = 0;
+	ulint		warn_interval = 1;
+	ulint		warn_count = 0;
+	int64_t		sig_count = os_event_reset(buf_flush_event);
 
 	while (srv_shutdown_state == SRV_SHUTDOWN_NONE) {
 
-		page_cleaner_sleep_if_needed(next_loop_time);
+		/* The page_cleaner skips sleep if the server is
+		idle and there are no pending IOs in the buffer pool
+		and there is work to do. */
+		if (srv_check_activity(last_activity)
+		    || buf_get_n_pending_read_ios()
+		    || n_flushed == 0) {
 
-		next_loop_time = ut_time_ms() + 1000;
+			ret_sleep = pc_sleep_if_needed(
+				next_loop_time, sig_count);
 
-		if (srv_check_activity(last_activity)) {
-			last_activity = srv_get_activity_count();
+			if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
+				break;
+			}
+		} else if (ut_time_ms() > next_loop_time) {
+			ret_sleep = OS_SYNC_TIME_EXCEEDED;
+		} else {
+			ret_sleep = 0;
+		}
 
-			/* Flush pages from flush_list if required */
-			n_flushed += page_cleaner_flush_pages_if_needed();
+		sig_count = os_event_reset(buf_flush_event);
 
-		} else if (srv_idle_flush_pct) {
-			n_flushed = page_cleaner_do_flush_batch(
-				PCT_IO(100),
-				LSN_MAX);
+		if (ret_sleep == OS_SYNC_TIME_EXCEEDED) {
+			ulint	curr_time = ut_time_ms();
+
+			if (curr_time > next_loop_time + 3000) {
+				if (warn_count == 0) {
+					ib::info() << "page_cleaner: 1000ms"
+						" intended loop took "
+						<< 1000 + curr_time
+						   - next_loop_time
+						<< "ms. The settings might not"
+						" be optimal. (flushed="
+						<< n_flushed_last
+						<< " and evicted="
+						<< n_evicted
+						<< ", during the time.)";
+					if (warn_interval > 300) {
+						warn_interval = 600;
+					} else {
+						warn_interval *= 2;
+					}
+
+					warn_count = warn_interval;
+				} else {
+					--warn_count;
+				}
+			} else {
+				/* reset counter */
+				warn_interval = 1;
+				warn_count = 0;
+			}
+
+			next_loop_time = curr_time + 1000;
+			n_flushed_last = n_evicted = 0;
+		}
+
+		if (ret_sleep != OS_SYNC_TIME_EXCEEDED
+		    && srv_flush_sync
+		    && buf_flush_sync_lsn > 0) {
+			/* woke up for flush_sync */
+			mutex_enter(&page_cleaner->mutex);
+			lsn_t	lsn_limit = buf_flush_sync_lsn;
+			buf_flush_sync_lsn = 0;
+			mutex_exit(&page_cleaner->mutex);
+
+			/* Request flushing for threads */
+			pc_request(ULINT_MAX, lsn_limit);
+
+			ulint tm = ut_time_ms();
+
+			/* Coordinator also treats requests */
+			while (pc_flush_slot() > 0) {}
+
+			/* only coordinator is using these counters,
+			so no need to protect by lock. */
+			page_cleaner->flush_time += ut_time_ms() - tm;
+			page_cleaner->flush_pass++;
+
+			/* Wait for all slots to be finished */
+			ulint	n_flushed_lru = 0;
+			ulint	n_flushed_list = 0;
+			pc_wait_finished(&n_flushed_lru, &n_flushed_list);
+
+			if (n_flushed_list > 0 || n_flushed_lru > 0) {
+				buf_flush_stats(n_flushed_list, n_flushed_lru);
+
+				MONITOR_INC_VALUE_CUMULATIVE(
+					MONITOR_FLUSH_SYNC_TOTAL_PAGE,
+					MONITOR_FLUSH_SYNC_COUNT,
+					MONITOR_FLUSH_SYNC_PAGES,
+					n_flushed_lru + n_flushed_list);
+			}
+
+			n_flushed = n_flushed_lru + n_flushed_list;
+
+		} else if (srv_check_activity(last_activity)) {
+			ulint	n_to_flush;
+			lsn_t	lsn_limit = 0;
+
+			/* Estimate pages from flush_list to be flushed */
+			if (ret_sleep == OS_SYNC_TIME_EXCEEDED) {
+				last_activity = srv_get_activity_count();
+				n_to_flush =
+					page_cleaner_flush_pages_recommendation(
+						&lsn_limit, last_pages);
+			} else {
+				n_to_flush = 0;
+			}
+
+			/* Request flushing for threads */
+			pc_request(n_to_flush, lsn_limit);
+
+			ulint tm = ut_time_ms();
+
+			/* Coordinator also treats requests */
+			while (pc_flush_slot() > 0) {
+				/* No op */
+			}
+
+			/* only coordinator is using these counters,
+			so no need to protect by lock. */
+			page_cleaner->flush_time += ut_time_ms() - tm;
+			page_cleaner->flush_pass++ ;
+
+			/* Wait for all slots to be finished */
+			ulint	n_flushed_lru = 0;
+			ulint	n_flushed_list = 0;
+
+			pc_wait_finished(&n_flushed_lru, &n_flushed_list);
+
+			if (n_flushed_list > 0 || n_flushed_lru > 0) {
+				buf_flush_stats(n_flushed_list, n_flushed_lru);
+			}
+
+			if (ret_sleep == OS_SYNC_TIME_EXCEEDED) {
+				last_pages = n_flushed_list;
+			}
+
+			n_evicted += n_flushed_lru;
+			n_flushed_last += n_flushed_list;
+
+			n_flushed = n_flushed_lru + n_flushed_list;
+
+			if (n_flushed_lru) {
+				MONITOR_INC_VALUE_CUMULATIVE(
+					MONITOR_LRU_BATCH_FLUSH_TOTAL_PAGE,
+					MONITOR_LRU_BATCH_FLUSH_COUNT,
+					MONITOR_LRU_BATCH_FLUSH_PAGES,
+					n_flushed_lru);
+			}
+
+			if (n_flushed_list) {
+				MONITOR_INC_VALUE_CUMULATIVE(
+					MONITOR_FLUSH_ADAPTIVE_TOTAL_PAGE,
+					MONITOR_FLUSH_ADAPTIVE_COUNT,
+					MONITOR_FLUSH_ADAPTIVE_PAGES,
+					n_flushed_list);
+			}
+
+		} else if (ret_sleep == OS_SYNC_TIME_EXCEEDED) {
+			/* no activity, slept enough */
+			buf_flush_lists(PCT_IO(100), LSN_MAX, &n_flushed);
+
+			n_flushed_last += n_flushed;
 
 			if (n_flushed) {
 				MONITOR_INC_VALUE_CUMULATIVE(
@@ -2328,18 +3384,23 @@ DECLARE_THREAD(buf_flush_page_cleaner_thread)(
 					MONITOR_FLUSH_BACKGROUND_COUNT,
 					MONITOR_FLUSH_BACKGROUND_PAGES,
 					n_flushed);
+
 			}
+
+		} else {
+			/* no activity, but woken up by event */
+			n_flushed = 0;
 		}
 
-		/* Flush pages from end of LRU if required */
-		buf_flush_LRU_tail();
+		ut_d(buf_flush_page_cleaner_disabled_loop());
 	}
 
 	ut_ad(srv_shutdown_state > 0);
-
-	if (srv_fast_shutdown == 2) {
-		/* In very fast shutdown we simulate a crash of
-		buffer pool. We are not required to do any flushing */
+	if (srv_fast_shutdown == 2
+	    || srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
+		/* In very fast shutdown or when innodb failed to start, we
+		simulate a crash of the buffer pool. We are not required to do
+		any flushing. */
 		goto thread_exit;
 	}
 
@@ -2356,7 +3417,15 @@ DECLARE_THREAD(buf_flush_page_cleaner_thread)(
 	dirtied until we enter SRV_SHUTDOWN_FLUSH_PHASE phase. */
 
 	do {
-		n_flushed = page_cleaner_do_flush_batch(PCT_IO(100), LSN_MAX);
+		pc_request(ULINT_MAX, LSN_MAX);
+
+		while (pc_flush_slot() > 0) {}
+
+		ulint	n_flushed_lru = 0;
+		ulint	n_flushed_list = 0;
+		pc_wait_finished(&n_flushed_lru, &n_flushed_list);
+
+		n_flushed = n_flushed_lru + n_flushed_list;
 
 		/* We sleep only if there are no pages to flush */
 		if (n_flushed == 0) {
@@ -2382,15 +3451,25 @@ DECLARE_THREAD(buf_flush_page_cleaner_thread)(
 	bool	success;
 
 	do {
+		pc_request(ULINT_MAX, LSN_MAX);
+
+		while (pc_flush_slot() > 0) {}
+
+		ulint	n_flushed_lru = 0;
+		ulint	n_flushed_list = 0;
+		success = pc_wait_finished(&n_flushed_lru, &n_flushed_list);
+
+		n_flushed = n_flushed_lru + n_flushed_list;
 
-		success = buf_flush_list(PCT_IO(100), LSN_MAX, &n_flushed);
 		buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
+		buf_flush_wait_LRU_batch_end();
 
 	} while (!success || n_flushed > 0);
 
 	/* Some sanity checks */
 	ut_a(srv_get_active_thread_type() == SRV_NONE);
 	ut_a(srv_shutdown_state == SRV_SHUTDOWN_FLUSH_PHASE);
+
 	for (ulint i = 0; i < srv_buf_pool_instances; i++) {
 		buf_pool_t* buf_pool = buf_pool_from_array(i);
 		ut_a(UT_LIST_GET_LEN(buf_pool->flush_list) == 0);
@@ -2399,17 +3478,110 @@ DECLARE_THREAD(buf_flush_page_cleaner_thread)(
 	/* We have lived our life. Time to die. */
 
 thread_exit:
-	buf_page_cleaner_is_active = FALSE;
+	/* All worker threads are waiting for the event here,
+	and no more access to page_cleaner structure by them.
+	Wakes worker threads up just to make them exit. */
+	page_cleaner->is_running = false;
+	os_event_set(page_cleaner->is_requested);
 
-	os_event_free(buf_flush_event);
+	buf_flush_page_cleaner_close();
+
+	buf_page_cleaner_is_active = false;
+
+	my_thread_end();
 
 	/* We count the number of threads in os_thread_exit(). A created
 	thread should always use that to exit and not use return() to exit. */
-	os_thread_exit(NULL);
+	os_thread_exit();
 
 	OS_THREAD_DUMMY_RETURN;
 }
 
+/******************************************************************//**
+Worker thread of page_cleaner.
+@return a dummy parameter */
+extern "C"
+os_thread_ret_t
+DECLARE_THREAD(buf_flush_page_cleaner_worker)(
+/*==========================================*/
+	void*	arg MY_ATTRIBUTE((unused)))
+			/*!< in: a dummy parameter required by
+			os_thread_create */
+{
+	my_thread_init();
+
+	mutex_enter(&page_cleaner->mutex);
+	page_cleaner->n_workers++;
+	mutex_exit(&page_cleaner->mutex);
+
+#ifdef UNIV_LINUX
+	/* linux might be able to set different setting for each thread
+	worth to try to set high priority for page cleaner threads */
+	if (buf_flush_page_cleaner_set_priority(
+		buf_flush_page_cleaner_priority)) {
+
+		ib::info() << "page_cleaner worker priority: "
+			<< buf_flush_page_cleaner_priority;
+	}
+#endif /* UNIV_LINUX */
+
+	while (true) {
+		os_event_wait(page_cleaner->is_requested);
+
+		ut_d(buf_flush_page_cleaner_disabled_loop());
+
+		if (!page_cleaner->is_running) {
+			break;
+		}
+
+		pc_flush_slot();
+	}
+
+	mutex_enter(&page_cleaner->mutex);
+	page_cleaner->n_workers--;
+	mutex_exit(&page_cleaner->mutex);
+
+	my_thread_end();
+
+	os_thread_exit();
+
+	OS_THREAD_DUMMY_RETURN;
+}
+
+/*******************************************************************//**
+Synchronously flush dirty blocks from the end of the flush list of all buffer
+pool instances.
+NOTE: The calling thread is not allowed to own any latches on pages! */
+void
+buf_flush_sync_all_buf_pools(void)
+/*==============================*/
+{
+	bool success;
+	do {
+		success = buf_flush_lists(ULINT_MAX, LSN_MAX, NULL);
+		buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
+	} while (!success);
+
+	ut_a(success);
+}
+
+/** Request IO burst and wake page_cleaner up.
+@param[in]	lsn_limit	upper limit of LSN to be flushed */
+void
+buf_flush_request_force(
+	lsn_t	lsn_limit)
+{
+	/* adjust based on lsn_avg_rate not to get old */
+	lsn_t	lsn_target = lsn_limit + lsn_avg_rate * 3;
+
+	mutex_enter(&page_cleaner->mutex);
+	if (lsn_target > buf_flush_sync_lsn) {
+		buf_flush_sync_lsn = lsn_target;
+	}
+	mutex_exit(&page_cleaner->mutex);
+
+	os_event_set(buf_flush_event);
+}
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 
 /** Functor to validate the flush list. */
@@ -2422,7 +3594,7 @@ struct	Check {
 
 /******************************************************************//**
 Validates the flush list.
-@return	TRUE if ok */
+@return TRUE if ok */
 static
 ibool
 buf_flush_validate_low(
@@ -2431,17 +3603,18 @@ buf_flush_validate_low(
 {
 	buf_page_t*		bpage;
 	const ib_rbt_node_t*	rnode = NULL;
+	Check			check;
 
 	ut_ad(buf_flush_list_mutex_own(buf_pool));
 
-	UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list, Check());
+	ut_list_validate(buf_pool->flush_list, check);
 
 	bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
 
 	/* If we are in recovery mode i.e.: flush_rbt != NULL
 	then each block in the flush_list must also be present
 	in the flush_rbt. */
-	if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
+	if (buf_pool->flush_rbt != NULL) {
 		rnode = rbt_first(buf_pool->flush_rbt);
 	}
 
@@ -2462,20 +3635,20 @@ buf_flush_validate_low(
 		     || buf_page_get_state(bpage) == BUF_BLOCK_REMOVE_HASH);
 		ut_a(om > 0);
 
-		if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
-			buf_page_t** prpage;
+		if (buf_pool->flush_rbt != NULL) {
+			buf_page_t**	prpage;
 
-			ut_a(rnode);
+			ut_a(rnode != NULL);
 			prpage = rbt_value(buf_page_t*, rnode);
 
-			ut_a(*prpage);
+			ut_a(*prpage != NULL);
 			ut_a(*prpage == bpage);
 			rnode = rbt_next(buf_pool->flush_rbt, rnode);
 		}
 
 		bpage = UT_LIST_GET_NEXT(list, bpage);
 
-		ut_a(!bpage || om >= bpage->oldest_modification);
+		ut_a(bpage == NULL || om >= bpage->oldest_modification);
 	}
 
 	/* By this time we must have exhausted the traversal of
@@ -2487,8 +3660,7 @@ buf_flush_validate_low(
 
 /******************************************************************//**
 Validates the flush list.
-@return	TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
 ibool
 buf_flush_validate(
 /*===============*/
@@ -2508,18 +3680,16 @@ buf_flush_validate(
 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 #endif /* !UNIV_HOTBACKUP */
 
-
-#ifdef UNIV_DEBUG
 /******************************************************************//**
 Check if there are any dirty pages that belong to a space id in the flush
 list in a particular buffer pool.
-@return	number of dirty pages present in a single buffer pool */
-UNIV_INTERN
+@return number of dirty pages present in a single buffer pool */
 ulint
 buf_pool_get_dirty_pages_count(
 /*===========================*/
 	buf_pool_t*	buf_pool,	/*!< in: buffer pool */
-	ulint		id)		/*!< in: space id to check */
+	ulint		id,		/*!< in: space id to check */
+	FlushObserver*	observer)	/*!< in: flush observer to check */
 
 {
 	ulint		count = 0;
@@ -2537,7 +3707,10 @@ buf_pool_get_dirty_pages_count(
 		ut_ad(bpage->in_flush_list);
 		ut_ad(bpage->oldest_modification > 0);
 
-		if (buf_page_get_space(bpage) == id) {
+		if ((observer != NULL
+		     && observer == bpage->flush_observer)
+		    || (observer == NULL
+			&& id == bpage->id.space())) {
 			++count;
 		}
 	}
@@ -2550,13 +3723,12 @@ buf_pool_get_dirty_pages_count(
 
 /******************************************************************//**
 Check if there are any dirty pages that belong to a space id in the flush list.
-@return	number of dirty pages present in all the buffer pools */
-UNIV_INTERN
+@return number of dirty pages present in all the buffer pools */
 ulint
 buf_flush_get_dirty_pages_count(
 /*============================*/
-	ulint		id)		/*!< in: space id to check */
-
+	ulint		id,		/*!< in: space id to check */
+	FlushObserver*	observer)	/*!< in: flush observer to check */
 {
 	ulint		count = 0;
 
@@ -2565,9 +3737,136 @@ buf_flush_get_dirty_pages_count(
 
 		buf_pool = buf_pool_from_array(i);
 
-		count += buf_pool_get_dirty_pages_count(buf_pool, id);
+		count += buf_pool_get_dirty_pages_count(buf_pool, id, observer);
 	}
 
 	return(count);
 }
-#endif /* UNIV_DEBUG */
+
+/** FlushObserver constructor
+@param[in]	space_id	table space id
+@param[in]	trx		trx instance
+@param[in]	stage		performance schema accounting object,
+used by ALTER TABLE. It is passed to log_preflush_pool_modified_pages()
+for accounting. */
+FlushObserver::FlushObserver(
+	ulint			space_id,
+	trx_t*			trx,
+	ut_stage_alter_t*	stage)
+	:
+	m_space_id(space_id),
+	m_trx(trx),
+	m_stage(stage),
+	m_interrupted(false)
+{
+	m_flushed = UT_NEW_NOKEY(std::vector<ulint>(srv_buf_pool_instances));
+	m_removed = UT_NEW_NOKEY(std::vector<ulint>(srv_buf_pool_instances));
+
+	for (ulint i = 0; i < srv_buf_pool_instances; i++) {
+		m_flushed->at(i) = 0;
+		m_removed->at(i) = 0;
+	}
+
+#ifdef FLUSH_LIST_OBSERVER_DEBUG
+		ib::info() << "FlushObserver constructor: " << m_trx->id;
+#endif /* FLUSH_LIST_OBSERVER_DEBUG */
+}
+
+/** FlushObserver deconstructor */
+FlushObserver::~FlushObserver()
+{
+	ut_ad(buf_flush_get_dirty_pages_count(m_space_id, this) == 0);
+
+	UT_DELETE(m_flushed);
+	UT_DELETE(m_removed);
+
+#ifdef FLUSH_LIST_OBSERVER_DEBUG
+		ib::info() << "FlushObserver deconstructor: " << m_trx->id;
+#endif /* FLUSH_LIST_OBSERVER_DEBUG */
+}
+
+/** Check whether trx is interrupted
+@return true if trx is interrupted */
+bool
+FlushObserver::check_interrupted()
+{
+	if (trx_is_interrupted(m_trx)) {
+		interrupted();
+
+		return(true);
+	}
+
+	return(false);
+}
+
+/** Notify observer of a flush
+@param[in]	buf_pool	buffer pool instance
+@param[in]	bpage		buffer page to flush */
+void
+FlushObserver::notify_flush(
+	buf_pool_t*	buf_pool,
+	buf_page_t*	bpage)
+{
+	ut_ad(buf_pool_mutex_own(buf_pool));
+
+	m_flushed->at(buf_pool->instance_no)++;
+
+	if (m_stage != NULL) {
+		m_stage->inc();
+	}
+
+#ifdef FLUSH_LIST_OBSERVER_DEBUG
+	ib::info() << "Flush <" << bpage->id.space()
+		   << ", " << bpage->id.page_no() << ">";
+#endif /* FLUSH_LIST_OBSERVER_DEBUG */
+}
+
+/** Notify observer of a remove
+@param[in]	buf_pool	buffer pool instance
+@param[in]	bpage		buffer page flushed */
+void
+FlushObserver::notify_remove(
+	buf_pool_t*	buf_pool,
+	buf_page_t*	bpage)
+{
+	ut_ad(buf_pool_mutex_own(buf_pool));
+
+	m_removed->at(buf_pool->instance_no)++;
+
+#ifdef FLUSH_LIST_OBSERVER_DEBUG
+	ib::info() << "Remove <" << bpage->id.space()
+		   << ", " << bpage->id.page_no() << ">";
+#endif /* FLUSH_LIST_OBSERVER_DEBUG */
+}
+
+/** Flush dirty pages and wait. */
+void
+FlushObserver::flush()
+{
+	buf_remove_t	buf_remove;
+
+	if (m_interrupted) {
+		buf_remove = BUF_REMOVE_FLUSH_NO_WRITE;
+	} else {
+		buf_remove = BUF_REMOVE_FLUSH_WRITE;
+
+		if (m_stage != NULL) {
+			ulint	pages_to_flush =
+				buf_flush_get_dirty_pages_count(
+					m_space_id, this);
+
+			m_stage->begin_phase_flush(pages_to_flush);
+		}
+	}
+
+	/* Flush or remove dirty pages. */
+	buf_LRU_flush_or_remove_pages(m_space_id, buf_remove, m_trx);
+
+	/* Wait for all dirty pages were flushed. */
+	for (ulint i = 0; i < srv_buf_pool_instances; i++) {
+		while (!is_complete(i)) {
+
+			os_thread_sleep(2000);
+		}
+	}
+}
diff --git a/storage/innobase/buf/buf0lru.cc b/storage/innobase/buf/buf0lru.cc
index 30b991d24cf..5d2077763a6 100644
--- a/storage/innobase/buf/buf0lru.cc
+++ b/storage/innobase/buf/buf0lru.cc
@@ -24,19 +24,16 @@ Created 11/5/1995 Heikki Tuuri
 *******************************************************/
 
 #include "buf0lru.h"
-
-#ifndef UNIV_HOTBACKUP
 #ifdef UNIV_NONINL
 #include "buf0lru.ic"
-#endif
+#endif /* UNIV_NOINL */
 
+#ifndef UNIV_HOTBACKUP
 #include "ut0byte.h"
-#include "ut0lst.h"
 #include "ut0rnd.h"
-#include "sync0sync.h"
 #include "sync0rw.h"
 #include "hash0hash.h"
-#include "os0sync.h"
+#include "os0event.h"
 #include "fil0fil.h"
 #include "btr0btr.h"
 #include "buf0buddy.h"
@@ -53,8 +50,6 @@ Created 11/5/1995 Heikki Tuuri
 #include "srv0mon.h"
 #include "lock0lock.h"
 
-#include "ha_prototypes.h"
-
 /** The number of blocks from the LRU_old pointer onward, including
 the block pointed to, must be buf_pool->LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV
 of the whole LRU list length, except that the tolerance defined below
@@ -62,7 +57,7 @@ is allowed. Note that the tolerance must be small enough such that for
 even the BUF_LRU_OLD_MIN_LEN long LRU list, the LRU_old pointer is not
 allowed to point to either end of the LRU list. */
 
-#define BUF_LRU_OLD_TOLERANCE	20
+static const ulint BUF_LRU_OLD_TOLERANCE = 20;
 
 /** The minimum amount of non-old blocks when the LRU_old list exists
 (that is, when there are more than BUF_LRU_OLD_MIN_LEN blocks).
@@ -79,7 +74,11 @@ We also release buf_pool->mutex after scanning this many pages of the
 flush_list when dropping a table. This is to ensure that other threads
 are not blocked for extended period of time when using very large
 buffer pools. */
-#define BUF_LRU_DROP_SEARCH_SIZE	1024
+static const ulint BUF_LRU_DROP_SEARCH_SIZE = 1024;
+
+/** We scan these many blocks when looking for a clean page to evict
+during LRU eviction. */
+static const ulint BUF_LRU_SEARCH_SCAN_THRESHOLD = 100;
 
 /** We scan these many blocks when looking for a clean page to evict
 during LRU eviction. */
@@ -87,7 +86,7 @@ during LRU eviction. */
 
 /** If we switch on the InnoDB monitor because there are too few available
 frames in the buffer pool, we set this to TRUE */
-static ibool	buf_lru_switched_on_innodb_mon	= FALSE;
+static bool buf_lru_switched_on_innodb_mon = false;
 
 /******************************************************************//**
 These statistics are not 'of' LRU but 'for' LRU.  We keep count of I/O
@@ -103,11 +102,11 @@ uncompressed and compressed data), which must be clean. */
 /** Number of intervals for which we keep the history of these stats.
 Each interval is 1 second, defined by the rate at which
 srv_error_monitor_thread() calls buf_LRU_stat_update(). */
-#define BUF_LRU_STAT_N_INTERVAL 50
+static const ulint BUF_LRU_STAT_N_INTERVAL = 50;
 
 /** Co-efficient with which we multiply I/O operations to equate them
 with page_zip_decompress() operations. */
-#define BUF_LRU_IO_TO_UNZIP_FACTOR 50
+static const ulint BUF_LRU_IO_TO_UNZIP_FACTOR = 50;
 
 /** Sampled values buf_LRU_stat_cur.
 Not protected by any mutex.  Updated by buf_LRU_stat_update(). */
@@ -118,18 +117,18 @@ static ulint			buf_LRU_stat_arr_ind;
 
 /** Current operation counters.  Not protected by any mutex.  Cleared
 by buf_LRU_stat_update(). */
-UNIV_INTERN buf_LRU_stat_t	buf_LRU_stat_cur;
+buf_LRU_stat_t	buf_LRU_stat_cur;
 
 /** Running sum of past values of buf_LRU_stat_cur.
 Updated by buf_LRU_stat_update().  Not Protected by any mutex. */
-UNIV_INTERN buf_LRU_stat_t	buf_LRU_stat_sum;
+buf_LRU_stat_t	buf_LRU_stat_sum;
 
 /* @} */
 
 /** @name Heuristics for detecting index scan @{ */
 /** Move blocks to "new" LRU list only if the first access was at
 least this many milliseconds ago.  Not protected by any mutex or latch. */
-UNIV_INTERN uint	buf_LRU_old_threshold_ms;
+uint	buf_LRU_old_threshold_ms;
 /* @} */
 
 /******************************************************************//**
@@ -146,7 +145,7 @@ If a compressed page is freed other compressed pages may be relocated.
 caller needs to free the page to the free list
 @retval false if BUF_BLOCK_ZIP_PAGE was removed from page_hash. In
 this case the block is already returned to the buddy allocator. */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
+static MY_ATTRIBUTE((warn_unused_result))
 bool
 buf_LRU_block_remove_hashed(
 /*========================*/
@@ -165,8 +164,7 @@ buf_LRU_block_free_hashed_page(
 				be in a state where it can be freed */
 
 /******************************************************************//**
-Increases LRU size in bytes with zip_size for compressed page,
-UNIV_PAGE_SIZE for uncompressed page in inline function */
+Increases LRU size in bytes with page size inline function */
 static inline
 void
 incr_LRU_size_in_bytes(
@@ -175,24 +173,21 @@ incr_LRU_size_in_bytes(
 	buf_pool_t*	buf_pool)	/*!< in: buffer pool instance */
 {
 	ut_ad(buf_pool_mutex_own(buf_pool));
-	ulint zip_size = page_zip_get_size(&bpage->zip);
-	buf_pool->stat.LRU_bytes += zip_size ? zip_size : UNIV_PAGE_SIZE;
+
+	buf_pool->stat.LRU_bytes += bpage->size.physical();
+
 	ut_ad(buf_pool->stat.LRU_bytes <= buf_pool->curr_pool_size);
 }
 
 /******************************************************************//**
 Determines if the unzip_LRU list should be used for evicting a victim
 instead of the general LRU list.
-@return	TRUE if should use unzip_LRU */
-UNIV_INTERN
+@return TRUE if should use unzip_LRU */
 ibool
 buf_LRU_evict_from_unzip_LRU(
 /*=========================*/
 	buf_pool_t*	buf_pool)
 {
-	ulint	io_avg;
-	ulint	unzip_avg;
-
 	ut_ad(buf_pool_mutex_own(buf_pool));
 
 	/* If the unzip_LRU list is empty, we can only use the LRU. */
@@ -216,9 +211,10 @@ buf_LRU_evict_from_unzip_LRU(
 
 	/* Calculate the average over past intervals, and add the values
 	of the current interval. */
-	io_avg = buf_LRU_stat_sum.io / BUF_LRU_STAT_N_INTERVAL
+	ulint	io_avg = buf_LRU_stat_sum.io / BUF_LRU_STAT_N_INTERVAL
 		+ buf_LRU_stat_cur.io;
-	unzip_avg = buf_LRU_stat_sum.unzip / BUF_LRU_STAT_N_INTERVAL
+
+	ulint	unzip_avg = buf_LRU_stat_sum.unzip / BUF_LRU_STAT_N_INTERVAL
 		+ buf_LRU_stat_cur.unzip;
 
 	/* Decide based on our formula.  If the load is I/O bound
@@ -228,27 +224,33 @@ buf_LRU_evict_from_unzip_LRU(
 	return(unzip_avg <= io_avg * BUF_LRU_IO_TO_UNZIP_FACTOR);
 }
 
-/******************************************************************//**
-Attempts to drop page hash index on a batch of pages belonging to a
-particular space id. */
+/** Attempts to drop page hash index on a batch of pages belonging to a
+particular space id.
+@param[in]	space_id	space id
+@param[in]	page_size	page size
+@param[in]	arr		array of page_no
+@param[in]	count		number of entries in array */
 static
 void
 buf_LRU_drop_page_hash_batch(
-/*=========================*/
-	ulint		space_id,	/*!< in: space id */
-	ulint		zip_size,	/*!< in: compressed page size in bytes
-					or 0 for uncompressed pages */
-	const ulint*	arr,		/*!< in: array of page_no */
-	ulint		count)		/*!< in: number of entries in array */
+	ulint			space_id,
+	const page_size_t&	page_size,
+	const ulint*		arr,
+	ulint			count)
 {
-	ulint	i;
-
-	ut_ad(arr != NULL);
 	ut_ad(count <= BUF_LRU_DROP_SEARCH_SIZE);
 
-	for (i = 0; i < count; ++i) {
-		btr_search_drop_page_hash_when_freed(space_id, zip_size,
-						     arr[i]);
+	for (ulint i = 0; i < count; ++i, ++arr) {
+		/* While our only caller
+		buf_LRU_drop_page_hash_for_tablespace()
+		is being executed for DROP TABLE or similar,
+		the table cannot be evicted from the buffer pool.
+		Note: this should not be executed for DROP TABLESPACE,
+		because DROP TABLESPACE would be refused if tables existed
+		in the tablespace, and a previous DROP TABLE would have
+		already removed the AHI entries. */
+		btr_search_drop_page_hash_when_freed(
+			page_id_t(space_id, *arr), page_size);
 	}
 }
 
@@ -264,38 +266,33 @@ buf_LRU_drop_page_hash_for_tablespace(
 	buf_pool_t*	buf_pool,	/*!< in: buffer pool instance */
 	ulint		id)		/*!< in: space id */
 {
-	buf_page_t*	bpage;
-	ulint*		page_arr;
-	ulint		num_entries;
-	ulint		zip_size;
+	bool			found;
+	const page_size_t	page_size(fil_space_get_page_size(id, &found));
 
-	zip_size = fil_space_get_zip_size(id);
-
-	if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
+	if (!found) {
 		/* Somehow, the tablespace does not exist.  Nothing to drop. */
 		ut_ad(0);
 		return;
 	}
 
-	page_arr = static_cast<ulint*>(ut_malloc(
-		sizeof(ulint) * BUF_LRU_DROP_SEARCH_SIZE));
+	ulint*	page_arr = static_cast<ulint*>(ut_malloc_nokey(
+			sizeof(ulint) * BUF_LRU_DROP_SEARCH_SIZE));
+
+	ulint	num_entries = 0;
 
 	buf_pool_mutex_enter(buf_pool);
-	num_entries = 0;
 
 scan_again:
-	bpage = UT_LIST_GET_LAST(buf_pool->LRU);
+	for (buf_page_t* bpage = UT_LIST_GET_LAST(buf_pool->LRU);
+	     bpage != NULL;
+	     /* No op */) {
 
-	while (bpage != NULL) {
-		buf_page_t*	prev_bpage;
-		ibool		is_fixed;
-
-		prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
+		buf_page_t*	prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
 
 		ut_a(buf_page_in_file(bpage));
 
 		if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE
-		    || bpage->space != id
+		    || bpage->id.space() != id
 		    || bpage->io_fix != BUF_IO_NONE) {
 			/* Compressed pages are never hashed.
 			Skip blocks of other tablespaces.
@@ -306,17 +303,25 @@ next_page:
 		}
 
 		mutex_enter(&((buf_block_t*) bpage)->mutex);
-		is_fixed = bpage->buf_fix_count > 0
-			|| !((buf_block_t*) bpage)->index;
-		mutex_exit(&((buf_block_t*) bpage)->mutex);
 
-		if (is_fixed) {
-			goto next_page;
+		{
+			bool	skip = bpage->buf_fix_count > 0
+				|| !((buf_block_t*) bpage)->index;
+
+			mutex_exit(&((buf_block_t*) bpage)->mutex);
+
+			if (skip) {
+				/* Skip this block, because there are
+				no adaptive hash index entries
+				pointing to it, or because we cannot
+				drop them due to the buffer-fix. */
+				goto next_page;
+			}
 		}
 
 		/* Store the page number so that we can drop the hash
 		index in a batch later. */
-		page_arr[num_entries] = bpage->offset;
+		page_arr[num_entries] = bpage->id.page_no();
 		ut_a(num_entries < BUF_LRU_DROP_SEARCH_SIZE);
 		++num_entries;
 
@@ -329,7 +334,7 @@ next_page:
 		buf_pool_mutex_exit(buf_pool);
 
 		buf_LRU_drop_page_hash_batch(
-			id, zip_size, page_arr, num_entries);
+			id, page_size, page_arr, num_entries);
 
 		num_entries = 0;
 
@@ -351,8 +356,9 @@ next_page:
 		/* If, however, bpage has been removed from LRU list
 		to the free list then we should restart the scan.
 		bpage->state is protected by buf_pool mutex. */
-		if (bpage
+		if (bpage != NULL
 		    && buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
+
 			goto scan_again;
 		}
 	}
@@ -360,7 +366,7 @@ next_page:
 	buf_pool_mutex_exit(buf_pool);
 
 	/* Drop any remaining batch of search hashed pages. */
-	buf_LRU_drop_page_hash_batch(id, zip_size, page_arr, num_entries);
+	buf_LRU_drop_page_hash_batch(id, page_size, page_arr, num_entries);
 	ut_free(page_arr);
 }
 
@@ -370,14 +376,14 @@ want to hog the CPU and resources. Release the buffer pool and block
 mutex and try to force a context switch. Then reacquire the same mutexes.
 The current page is "fixed" before the release of the mutexes and then
 "unfixed" again once we have reacquired the mutexes. */
-static	MY_ATTRIBUTE((nonnull))
+static
 void
 buf_flush_yield(
 /*============*/
 	buf_pool_t*	buf_pool,	/*!< in/out: buffer pool instance */
 	buf_page_t*	bpage)		/*!< in/out: current page */
 {
-	ib_mutex_t*	block_mutex;
+	BPageMutex*	block_mutex;
 
 	ut_ad(buf_pool_mutex_own(buf_pool));
 	ut_ad(buf_page_in_file(bpage));
@@ -385,6 +391,7 @@ buf_flush_yield(
 	block_mutex = buf_page_get_mutex(bpage);
 
 	mutex_enter(block_mutex);
+
 	/* "Fix" the block so that the position cannot be
 	changed after we release the buffer pool and
 	block mutexes. */
@@ -400,6 +407,7 @@ buf_flush_yield(
 	buf_pool_mutex_enter(buf_pool);
 
 	mutex_enter(block_mutex);
+
 	/* "Unfix" the block now that we have both the
 	buffer pool and block mutex again. */
 	buf_page_unset_sticky(bpage);
@@ -411,7 +419,7 @@ If we have hogged the resources for too long then release the buffer
 pool and flush list mutex and do a thread yield. Set the current page
 to "sticky" so that it is not relocated during the yield.
 @return true if yielded */
-static	MY_ATTRIBUTE((nonnull(1), warn_unused_result))
+static	MY_ATTRIBUTE((warn_unused_result))
 bool
 buf_flush_try_yield(
 /*================*/
@@ -454,7 +462,7 @@ buf_flush_try_yield(
 Removes a single page from a given tablespace inside a specific
 buffer pool instance.
 @return true if page was removed. */
-static	MY_ATTRIBUTE((nonnull, warn_unused_result))
+static	MY_ATTRIBUTE((warn_unused_result))
 bool
 buf_flush_or_remove_page(
 /*=====================*/
@@ -477,8 +485,14 @@ buf_flush_or_remove_page(
 		yet; maybe the system is currently reading it
 		in, or flushing the modifications to the file */
 		return(false);
+
 	}
 
+	BPageMutex*	block_mutex;
+	bool		processed = false;
+
+	block_mutex = buf_page_get_mutex(bpage);
+
 	/* We have to release the flush_list_mutex to obey the
 	latching order. We are however guaranteed that the page
 	will stay in the flush_list and won't be relocated because
@@ -487,9 +501,6 @@ buf_flush_or_remove_page(
 
 	buf_flush_list_mutex_exit(buf_pool);
 
-	bool		processed;
-	ib_mutex_t*	block_mutex = buf_page_get_mutex(bpage);
-
 	mutex_enter(block_mutex);
 
 	ut_ad(bpage->oldest_modification != 0);
@@ -497,28 +508,30 @@ buf_flush_or_remove_page(
 	if (!flush) {
 
 		buf_flush_remove(bpage);
+
+		mutex_exit(block_mutex);
+
 		processed = true;
 
-	} else if (buf_flush_ready_for_flush(bpage, BUF_FLUSH_SINGLE_PAGE)
-		   && buf_flush_page(
-			   buf_pool, bpage, BUF_FLUSH_SINGLE_PAGE, false)) {
+	} else if (buf_flush_ready_for_flush(bpage, BUF_FLUSH_SINGLE_PAGE)) {
 
-		/* Wake possible simulated aio thread to actually
-		post the writes to the operating system */
-		os_aio_simulated_wake_handler_threads();
-
-		buf_pool_mutex_enter(buf_pool);
-
-		buf_flush_list_mutex_enter(buf_pool);
-
-		return(true);
+		/* The following call will release the buffer pool
+		and block mutex. */
+		processed = buf_flush_page(
+			buf_pool, bpage, BUF_FLUSH_SINGLE_PAGE, false);
 
+		if (processed) {
+			/* Wake possible simulated aio thread to actually
+			post the writes to the operating system */
+			os_aio_simulated_wake_handler_threads();
+			buf_pool_mutex_enter(buf_pool);
+		} else {
+			mutex_exit(block_mutex);
+		}
 	} else {
-		processed = false;
+		mutex_exit(block_mutex);
 	}
 
-	mutex_exit(block_mutex);
-
 	buf_flush_list_mutex_enter(buf_pool);
 
 	ut_ad(!mutex_own(block_mutex));
@@ -535,13 +548,14 @@ the list as they age towards the tail of the LRU.
 @retval DB_SUCCESS if all freed
 @retval DB_FAIL if not all freed
 @retval DB_INTERRUPTED if the transaction was interrupted */
-static	MY_ATTRIBUTE((nonnull(1), warn_unused_result))
+static	MY_ATTRIBUTE((warn_unused_result))
 dberr_t
 buf_flush_or_remove_pages(
 /*======================*/
 	buf_pool_t*	buf_pool,	/*!< buffer pool instance */
 	ulint		id,		/*!< in: target space id for which
 					to remove or flush pages */
+	FlushObserver*	observer,	/*!< in: flush observer */
 	bool		flush,		/*!< in: flush to disk if true but
 					don't remove else remove without
 					flushing to disk */
@@ -568,7 +582,10 @@ rescan:
 
 		prev = UT_LIST_GET_PREV(list, bpage);
 
-		if (buf_page_get_space(bpage) != id) {
+		/* If flush observer is NULL, flush page for space id,
+		or flush page for flush observer. */
+		if ((observer != NULL && observer != bpage->flush_observer)
+		    || (observer == NULL && id != bpage->id.space())) {
 
 			/* Skip this block, as it does not belong to
 			the target space. */
@@ -626,6 +643,16 @@ rescan:
 		/* The check for trx is interrupted is expensive, we want
 		to check every N iterations. */
 		if (!processed && trx && trx_is_interrupted(trx)) {
+			if (trx->flush_observer != NULL) {
+				if (flush) {
+					trx->flush_observer->interrupted();
+				} else {
+					/* We should remove all pages with the
+					the flush observer. */
+					continue;
+				}
+			}
+
 			buf_flush_list_mutex_exit(buf_pool);
 			return(DB_INTERRUPTED);
 		}
@@ -641,12 +668,13 @@ Remove or flush all the dirty pages that belong to a given tablespace
 inside a specific buffer pool instance. The pages will remain in the LRU
 list and will be evicted from the LRU list as they age and move towards
 the tail of the LRU list. */
-static MY_ATTRIBUTE((nonnull(1)))
+static
 void
 buf_flush_dirty_pages(
 /*==================*/
 	buf_pool_t*	buf_pool,	/*!< buffer pool instance */
 	ulint		id,		/*!< in: space id */
+	FlushObserver*	observer,	/*!< in: flush observer */
 	bool		flush,		/*!< in: flush to disk if true otherwise
 					remove the pages without flushing */
 	const trx_t*	trx)		/*!< to check if the operation must
@@ -657,7 +685,8 @@ buf_flush_dirty_pages(
 	do {
 		buf_pool_mutex_enter(buf_pool);
 
-		err = buf_flush_or_remove_pages(buf_pool, id, flush, trx);
+		err = buf_flush_or_remove_pages(
+			buf_pool, id, observer, flush, trx);
 
 		buf_pool_mutex_exit(buf_pool);
 
@@ -667,6 +696,13 @@ buf_flush_dirty_pages(
 			os_thread_sleep(2000);
 		}
 
+		if (err == DB_INTERRUPTED && observer != NULL) {
+			ut_a(flush);
+
+			flush = false;
+			err = DB_FAIL;
+		}
+
 		/* DB_FAIL is a soft error, it means that the task wasn't
 		completed, needs to be retried. */
 
@@ -675,13 +711,13 @@ buf_flush_dirty_pages(
 	} while (err == DB_FAIL);
 
 	ut_ad(err == DB_INTERRUPTED
-	      || buf_pool_get_dirty_pages_count(buf_pool, id) == 0);
+	      || buf_pool_get_dirty_pages_count(buf_pool, id, observer) == 0);
 }
 
 /******************************************************************//**
 Remove all pages that belong to a given tablespace inside a specific
 buffer pool instance when we are DISCARDing the tablespace. */
-static MY_ATTRIBUTE((nonnull))
+static
 void
 buf_LRU_remove_all_pages(
 /*=====================*/
@@ -702,18 +738,18 @@ scan_again:
 
 		rw_lock_t*	hash_lock;
 		buf_page_t*	prev_bpage;
-		ib_mutex_t*	block_mutex = NULL;
+		BPageMutex*	block_mutex;
 
 		ut_a(buf_page_in_file(bpage));
 		ut_ad(bpage->in_LRU_list);
 
 		prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
 
-		/* bpage->space and bpage->io_fix are protected by
+		/* bpage->id.space() and bpage->io_fix are protected by
 		buf_pool->mutex and the block_mutex. It is safe to check
 		them while holding buf_pool->mutex only. */
 
-		if (buf_page_get_space(bpage) != id) {
+		if (bpage->id.space() != id) {
 			/* Skip this block, as it does not belong to
 			the space that is being invalidated. */
 			goto next_page;
@@ -725,14 +761,12 @@ scan_again:
 			all_freed = FALSE;
 			goto next_page;
 		} else {
-			ulint	fold = buf_page_address_fold(
-				bpage->space, bpage->offset);
-
-			hash_lock = buf_page_hash_lock_get(buf_pool, fold);
+			hash_lock = buf_page_hash_lock_get(buf_pool, bpage->id);
 
 			rw_lock_x_lock(hash_lock);
 
 			block_mutex = buf_page_get_mutex(bpage);
+
 			mutex_enter(block_mutex);
 
 			if (bpage->buf_fix_count > 0) {
@@ -754,35 +788,30 @@ scan_again:
 
 		ut_ad(mutex_own(block_mutex));
 
-#ifdef UNIV_DEBUG
-		if (buf_debug_prints) {
-			fprintf(stderr,
-				"Dropping space %lu page %lu\n",
-				(ulong) buf_page_get_space(bpage),
-				(ulong) buf_page_get_page_no(bpage));
-		}
-#endif
+		DBUG_PRINT("ib_buf", ("evict page %u:%u"
+				      " state %u",
+				      bpage->id.space(),
+				      bpage->id.page_no(),
+				      bpage->state));
+
 		if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
 			/* Do nothing, because the adaptive hash index
 			covers uncompressed pages only. */
 		} else if (((buf_block_t*) bpage)->index) {
-			ulint	page_no;
-			ulint	zip_size;
-
 			buf_pool_mutex_exit(buf_pool);
 
-			zip_size = buf_page_get_zip_size(bpage);
-			page_no = buf_page_get_page_no(bpage);
-
 			rw_lock_x_unlock(hash_lock);
 
 			mutex_exit(block_mutex);
 
 			/* Note that the following call will acquire
-			and release block->lock X-latch. */
+			and release block->lock X-latch.
+			Note that the table cannot be evicted during
+			the execution of ALTER TABLE...DISCARD TABLESPACE
+			because MySQL is keeping the table handle open. */
 
 			btr_search_drop_page_hash_when_freed(
-				id, zip_size, page_no);
+				bpage->id, bpage->size);
 
 			goto scan_again;
 		}
@@ -804,11 +833,9 @@ scan_again:
 
 		ut_ad(!mutex_own(block_mutex));
 
-#ifdef UNIV_SYNC_DEBUG
 		/* buf_LRU_block_remove_hashed() releases the hash_lock */
-		ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX));
-		ut_ad(!rw_lock_own(hash_lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
+		ut_ad(!rw_lock_own(hash_lock, RW_LOCK_X));
+		ut_ad(!rw_lock_own(hash_lock, RW_LOCK_S));
 
 next_page:
 		bpage = prev_bpage;
@@ -829,7 +856,7 @@ buffer pool instance when we are deleting the data file(s) of that
 tablespace. The pages still remain a part of LRU and are evicted from
 the list as they age towards the tail of the LRU only if buf_remove
 is BUF_REMOVE_FLUSH_NO_WRITE. */
-static	MY_ATTRIBUTE((nonnull(1)))
+static
 void
 buf_LRU_remove_pages(
 /*=================*/
@@ -839,22 +866,27 @@ buf_LRU_remove_pages(
 	const trx_t*	trx)		/*!< to check if the operation must
 					be interrupted */
 {
+	FlushObserver*	observer = (trx == NULL) ? NULL : trx->flush_observer;
+
 	switch (buf_remove) {
 	case BUF_REMOVE_ALL_NO_WRITE:
 		buf_LRU_remove_all_pages(buf_pool, id);
 		break;
 
 	case BUF_REMOVE_FLUSH_NO_WRITE:
-		ut_a(trx == 0);
-		buf_flush_dirty_pages(buf_pool, id, false, NULL);
+		/* Pass trx as NULL to avoid interruption check. */
+		buf_flush_dirty_pages(buf_pool, id, observer, false, NULL);
 		break;
 
 	case BUF_REMOVE_FLUSH_WRITE:
-		ut_a(trx != 0);
-		buf_flush_dirty_pages(buf_pool, id, true, trx);
-		/* Ensure that all asynchronous IO is completed. */
-		os_aio_wait_until_no_pending_writes();
-		fil_flush(id);
+		buf_flush_dirty_pages(buf_pool, id, observer, true, trx);
+
+		if (observer == NULL) {
+			/* Ensure that all asynchronous IO is completed. */
+			os_aio_wait_until_no_pending_writes();
+			fil_flush(id);
+		}
+
 		break;
 	}
 }
@@ -864,7 +896,6 @@ Flushes all dirty pages or removes all pages belonging
 to a given tablespace. A PROBLEM: if readahead is being started, what
 guarantees that it will not try to read in pages after this operation
 has completed? */
-UNIV_INTERN
 void
 buf_LRU_flush_or_remove_pages(
 /*==========================*/
@@ -909,13 +940,11 @@ buf_LRU_flush_or_remove_pages(
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 /********************************************************************//**
 Insert a compressed block into buf_pool->zip_clean in the LRU order. */
-UNIV_INTERN
 void
 buf_LRU_insert_zip_clean(
 /*=====================*/
 	buf_page_t*	bpage)	/*!< in: pointer to the block in question */
 {
-	buf_page_t*	b;
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
 
 	ut_ad(buf_pool_mutex_own(buf_pool));
@@ -923,20 +952,21 @@ buf_LRU_insert_zip_clean(
 
 	/* Find the first successor of bpage in the LRU list
 	that is in the zip_clean list. */
-	b = bpage;
+	buf_page_t*	b = bpage;
+
 	do {
 		b = UT_LIST_GET_NEXT(LRU, b);
 	} while (b && buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE);
 
 	/* Insert bpage before b, i.e., after the predecessor of b. */
-	if (b) {
+	if (b != NULL) {
 		b = UT_LIST_GET_PREV(list, b);
 	}
 
-	if (b) {
-		UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, bpage);
+	if (b != NULL) {
+		UT_LIST_INSERT_AFTER(buf_pool->zip_clean, b, bpage);
 	} else {
-		UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, bpage);
+		UT_LIST_ADD_FIRST(buf_pool->zip_clean, bpage);
 	}
 }
 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
@@ -944,34 +974,34 @@ buf_LRU_insert_zip_clean(
 /******************************************************************//**
 Try to free an uncompressed page of a compressed block from the unzip
 LRU list.  The compressed page is preserved, and it need not be clean.
-@return	TRUE if freed */
-UNIV_INLINE
-ibool
+@return true if freed */
+static
+bool
 buf_LRU_free_from_unzip_LRU_list(
 /*=============================*/
 	buf_pool_t*	buf_pool,	/*!< in: buffer pool instance */
-	ibool		scan_all)	/*!< in: scan whole LRU list
-					if TRUE, otherwise scan only
+	bool		scan_all)	/*!< in: scan whole LRU list
+					if true, otherwise scan only
 					srv_LRU_scan_depth / 2 blocks. */
 {
-	buf_block_t*	block;
-	ibool 		freed;
-	ulint		scanned;
-
 	ut_ad(buf_pool_mutex_own(buf_pool));
 
 	if (!buf_LRU_evict_from_unzip_LRU(buf_pool)) {
-		return(FALSE);
+		return(false);
 	}
 
-	for (block = UT_LIST_GET_LAST(buf_pool->unzip_LRU),
-	     scanned = 0, freed = FALSE;
-	     block != NULL && !freed
+	ulint	scanned = 0;
+	bool	freed = false;
+
+	for (buf_block_t* block = UT_LIST_GET_LAST(buf_pool->unzip_LRU);
+	     block != NULL
+	     && !freed
 	     && (scan_all || scanned < srv_LRU_scan_depth);
 	     ++scanned) {
 
-		buf_block_t*	prev_block = UT_LIST_GET_PREV(unzip_LRU,
-						block);
+		buf_block_t*	prev_block;
+
+		prev_block = UT_LIST_GET_PREV(unzip_LRU, block);
 
 		ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
 		ut_ad(block->in_unzip_LRU_list);
@@ -989,43 +1019,44 @@ buf_LRU_free_from_unzip_LRU_list(
 			MONITOR_LRU_UNZIP_SEARCH_SCANNED_PER_CALL,
 			scanned);
 	}
+
 	return(freed);
 }
 
 /******************************************************************//**
 Try to free a clean page from the common LRU list.
-@return	TRUE if freed */
-UNIV_INLINE
-ibool
+@return true if freed */
+static
+bool
 buf_LRU_free_from_common_LRU_list(
 /*==============================*/
 	buf_pool_t*	buf_pool,	/*!< in: buffer pool instance */
-	ibool		scan_all)	/*!< in: scan whole LRU list
-					if TRUE, otherwise scan only
-					srv_LRU_scan_depth / 2 blocks. */
+	bool		scan_all)	/*!< in: scan whole LRU list
+					if true, otherwise scan only
+					up to BUF_LRU_SEARCH_SCAN_THRESHOLD */
 {
-	buf_page_t*	bpage;
-	ibool		freed;
-	ulint		scanned;
-
 	ut_ad(buf_pool_mutex_own(buf_pool));
 
-	for (bpage = buf_pool->lru_scan_itr.start(),
-	     scanned = 0, freed = false;
-	     bpage != NULL && !freed
+	ulint		scanned = 0;
+	bool		freed = false;
+
+	for (buf_page_t* bpage = buf_pool->lru_scan_itr.start();
+	     bpage != NULL
+	     && !freed
 	     && (scan_all || scanned < BUF_LRU_SEARCH_SCAN_THRESHOLD);
 	     ++scanned, bpage = buf_pool->lru_scan_itr.get()) {
 
-		buf_page_t* prev = UT_LIST_GET_PREV(LRU, bpage);
+		buf_page_t*	prev = UT_LIST_GET_PREV(LRU, bpage);
+		BPageMutex*	mutex = buf_page_get_mutex(bpage);
+
 		buf_pool->lru_scan_itr.set(prev);
 
-		ib_mutex_t* mutex = buf_page_get_mutex(bpage);
 		mutex_enter(mutex);
 
 		ut_ad(buf_page_in_file(bpage));
 		ut_ad(bpage->in_LRU_list);
 
-		unsigned accessed = buf_page_is_accessed(bpage);
+		unsigned	accessed = buf_page_is_accessed(bpage);
 
 		if (buf_flush_ready_for_replace(bpage)) {
 			mutex_exit(mutex);
@@ -1058,47 +1089,45 @@ buf_LRU_free_from_common_LRU_list(
 
 /******************************************************************//**
 Try to free a replaceable block.
-@return	TRUE if found and freed */
-UNIV_INTERN
-ibool
+@return true if found and freed */
+bool
 buf_LRU_scan_and_free_block(
 /*========================*/
 	buf_pool_t*	buf_pool,	/*!< in: buffer pool instance */
-	ibool		scan_all)	/*!< in: scan whole LRU list
-					if TRUE, otherwise scan only
-					'old' blocks. */
+	bool		scan_all)	/*!< in: scan whole LRU list
+					if true, otherwise scan only
+					BUF_LRU_SEARCH_SCAN_THRESHOLD
+					blocks. */
 {
 	ut_ad(buf_pool_mutex_own(buf_pool));
 
 	return(buf_LRU_free_from_unzip_LRU_list(buf_pool, scan_all)
-	       || buf_LRU_free_from_common_LRU_list(
-			buf_pool, scan_all));
+	       || buf_LRU_free_from_common_LRU_list(buf_pool, scan_all));
 }
 
 /******************************************************************//**
 Returns TRUE if less than 25 % of the buffer pool in any instance is
 available. This can be used in heuristics to prevent huge transactions
 eating up the whole buffer pool for their locks.
-@return	TRUE if less than 25 % of buffer pool left */
-UNIV_INTERN
+@return TRUE if less than 25 % of buffer pool left */
 ibool
 buf_LRU_buf_pool_running_out(void)
 /*==============================*/
 {
-	ulint	i;
 	ibool	ret = FALSE;
 
-	for (i = 0; i < srv_buf_pool_instances && !ret; i++) {
+	for (ulint i = 0; i < srv_buf_pool_instances && !ret; i++) {
 		buf_pool_t*	buf_pool;
 
 		buf_pool = buf_pool_from_array(i);
 
 		buf_pool_mutex_enter(buf_pool);
 
-		if (!recv_recovery_on
+		if (!recv_recovery_is_on()
 		    && UT_LIST_GET_LEN(buf_pool->free)
 		       + UT_LIST_GET_LEN(buf_pool->LRU)
-		       < buf_pool->curr_size / 4) {
+		       < ut_min(buf_pool->curr_size,
+				buf_pool->old_size) / 4) {
 
 			ret = TRUE;
 		}
@@ -1112,8 +1141,7 @@ buf_LRU_buf_pool_running_out(void)
 /******************************************************************//**
 Returns a free block from the buf_pool.  The block is taken off the
 free list.  If it is empty, returns NULL.
-@return	a free control block, or NULL if the buf_block->free list is empty */
-UNIV_INTERN
+@return a free control block, or NULL if the buf_block->free list is empty */
 buf_block_t*
 buf_LRU_get_free_only(
 /*==================*/
@@ -1123,25 +1151,42 @@ buf_LRU_get_free_only(
 
 	ut_ad(buf_pool_mutex_own(buf_pool));
 
-	block = (buf_block_t*) UT_LIST_GET_FIRST(buf_pool->free);
+	block = reinterpret_cast<buf_block_t*>(
+		UT_LIST_GET_FIRST(buf_pool->free));
 
-	if (block) {
+	while (block != NULL) {
 
 		ut_ad(block->page.in_free_list);
 		ut_d(block->page.in_free_list = FALSE);
 		ut_ad(!block->page.in_flush_list);
 		ut_ad(!block->page.in_LRU_list);
 		ut_a(!buf_page_in_file(&block->page));
-		UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
+		UT_LIST_REMOVE(buf_pool->free, &block->page);
 
-		mutex_enter(&block->mutex);
+		if (buf_pool->curr_size >= buf_pool->old_size
+		    || UT_LIST_GET_LEN(buf_pool->withdraw)
+			>= buf_pool->withdraw_target
+		    || !buf_block_will_withdrawn(buf_pool, block)) {
+			/* found valid free block */
+			buf_page_mutex_enter(block);
 
-		buf_block_set_state(block, BUF_BLOCK_READY_FOR_USE);
-		UNIV_MEM_ALLOC(block->frame, UNIV_PAGE_SIZE);
+			buf_block_set_state(block, BUF_BLOCK_READY_FOR_USE);
+			UNIV_MEM_ALLOC(block->frame, UNIV_PAGE_SIZE);
 
-		ut_ad(buf_pool_from_block(block) == buf_pool);
+			ut_ad(buf_pool_from_block(block) == buf_pool);
 
-		mutex_exit(&block->mutex);
+			buf_page_mutex_exit(block);
+			break;
+		}
+
+		/* This should be withdrawn */
+		UT_LIST_ADD_LAST(
+			buf_pool->withdraw,
+			&block->page);
+		ut_d(block->in_withdraw_list = TRUE);
+
+		block = reinterpret_cast<buf_block_t*>(
+			UT_LIST_GET_FIRST(buf_pool->free));
 	}
 
 	return(block);
@@ -1160,28 +1205,23 @@ buf_LRU_check_size_of_non_data_objects(
 {
 	ut_ad(buf_pool_mutex_own(buf_pool));
 
-	if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
+	if (!recv_recovery_is_on()
+	    && buf_pool->curr_size == buf_pool->old_size
+	    && UT_LIST_GET_LEN(buf_pool->free)
 	    + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->curr_size / 20) {
-		ut_print_timestamp(stderr);
 
-		fprintf(stderr,
-			"  InnoDB: ERROR: over 95 percent of the buffer pool"
-			" is occupied by\n"
-			"InnoDB: lock heaps or the adaptive hash index!"
-			" Check that your\n"
-			"InnoDB: transactions do not set too many row locks.\n"
-			"InnoDB: Your buffer pool size is %lu MB."
-			" Maybe you should make\n"
-			"InnoDB: the buffer pool bigger?\n"
-			"InnoDB: We intentionally generate a seg fault"
-			" to print a stack trace\n"
-			"InnoDB: on Linux!\n",
-			(ulong) (buf_pool->curr_size
-				 / (1024 * 1024 / UNIV_PAGE_SIZE)));
+		ib::fatal() << "Over 95 percent of the buffer pool is"
+			" occupied by lock heaps or the adaptive hash index!"
+			" Check that your transactions do not set too many"
+			" row locks. Your buffer pool size is "
+			<< (buf_pool->curr_size
+				/ (1024 * 1024 / UNIV_PAGE_SIZE)) << " MB."
+			" Maybe you should make the buffer pool bigger?"
+			" We intentionally generate a seg fault to print"
+			" a stack trace on Linux!";
 
-		ut_error;
-
-	} else if (!recv_recovery_on
+	} else if (!recv_recovery_is_on()
+		   && buf_pool->curr_size == buf_pool->old_size
 		   && (UT_LIST_GET_LEN(buf_pool->free)
 		       + UT_LIST_GET_LEN(buf_pool->LRU))
 		   < buf_pool->curr_size / 3) {
@@ -1192,27 +1232,23 @@ buf_LRU_check_size_of_non_data_objects(
 			heaps or the adaptive hash index. This may be a memory
 			leak! */
 
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				"  InnoDB: WARNING: over 67 percent of"
-				" the buffer pool is occupied by\n"
-				"InnoDB: lock heaps or the adaptive"
-				" hash index! Check that your\n"
-				"InnoDB: transactions do not set too many"
-				" row locks.\n"
-				"InnoDB: Your buffer pool size is %lu MB."
-				" Maybe you should make\n"
-				"InnoDB: the buffer pool bigger?\n"
-				"InnoDB: Starting the InnoDB Monitor to print"
-				" diagnostics, including\n"
-				"InnoDB: lock heap and hash index sizes.\n",
-				(ulong) (buf_pool->curr_size
-					 / (1024 * 1024 / UNIV_PAGE_SIZE)));
+			ib::warn() << "Over 67 percent of the buffer pool is"
+				" occupied by lock heaps or the adaptive hash"
+				" index! Check that your transactions do not"
+				" set too many row locks. Your buffer pool"
+				" size is "
+				<< (buf_pool->curr_size
+					 / (1024 * 1024 / UNIV_PAGE_SIZE))
+				<< " MB. Maybe you should make the buffer pool"
+				" bigger?. Starting the InnoDB Monitor to print"
+				" diagnostics, including lock heap and hash"
+				" index sizes.";
 
-			buf_lru_switched_on_innodb_mon = TRUE;
+			buf_lru_switched_on_innodb_mon = true;
 			srv_print_innodb_monitor = TRUE;
 			os_event_set(srv_monitor_event);
 		}
+
 	} else if (buf_lru_switched_on_innodb_mon) {
 
 		/* Switch off the InnoDB Monitor; this is a simple way
@@ -1220,7 +1256,7 @@ buf_LRU_check_size_of_non_data_objects(
 		but may also surprise users if the user also switched on the
 		monitor! */
 
-		buf_lru_switched_on_innodb_mon = FALSE;
+		buf_lru_switched_on_innodb_mon = false;
 		srv_print_innodb_monitor = FALSE;
 	}
 }
@@ -1248,19 +1284,18 @@ we put it to free list to be used.
     * scan LRU list even if buf_pool->try_LRU_scan is not set
 * iteration > 1:
   * same as iteration 1 but sleep 10ms
-@return	the free control block, in state BUF_BLOCK_READY_FOR_USE */
-UNIV_INTERN
+@return the free control block, in state BUF_BLOCK_READY_FOR_USE */
 buf_block_t*
 buf_LRU_get_free_block(
 /*===================*/
 	buf_pool_t*	buf_pool)	/*!< in/out: buffer pool instance */
 {
 	buf_block_t*	block		= NULL;
-	ibool		freed		= FALSE;
+	bool		freed		= false;
 	ulint		n_iterations	= 0;
 	ulint		flush_failures	= 0;
-	ibool		mon_value_was	= FALSE;
-	ibool		started_monitor	= FALSE;
+	bool		mon_value_was	= false;
+	bool		started_monitor	= false;
 
 	MONITOR_INC(MONITOR_LRU_GET_FREE_SEARCH);
 loop:
@@ -1271,7 +1306,7 @@ loop:
 	/* If there is a block in the free list, take it */
 	block = buf_LRU_get_free_only(buf_pool);
 
-	if (block) {
+	if (block != NULL) {
 
 		buf_pool_mutex_exit(buf_pool);
 		ut_ad(buf_pool_from_block(block) == buf_pool);
@@ -1282,18 +1317,21 @@ loop:
 				static_cast<my_bool>(mon_value_was);
 		}
 
+		block->skip_flush_check = false;
+		block->page.flush_observer = NULL;
 		return(block);
 	}
 
-	freed = FALSE;
+	MONITOR_INC( MONITOR_LRU_GET_FREE_LOOPS );
+	freed = false;
 	if (buf_pool->try_LRU_scan || n_iterations > 0) {
 		/* If no block was in the free list, search from the
 		end of the LRU list and try to free a block there.
 		If we are doing for the first time we'll scan only
 		tail of the LRU list otherwise we scan the whole LRU
 		list. */
-		freed = buf_LRU_scan_and_free_block(buf_pool,
-						    n_iterations > 0);
+		freed = buf_LRU_scan_and_free_block(
+			buf_pool, n_iterations > 0);
 
 		if (!freed && n_iterations == 0) {
 			/* Tell other threads that there is no point
@@ -1312,42 +1350,33 @@ loop:
 
 	if (freed) {
 		goto loop;
-
 	}
 
-	if (n_iterations > 20) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			"  InnoDB: Warning: difficult to find free blocks in\n"
-			"InnoDB: the buffer pool (%lu search iterations)!\n"
-			"InnoDB: %lu failed attempts to flush a page!"
-			" Consider\n"
-			"InnoDB: increasing the buffer pool size.\n"
-			"InnoDB: It is also possible that"
-			" in your Unix version\n"
-			"InnoDB: fsync is very slow, or"
-			" completely frozen inside\n"
-			"InnoDB: the OS kernel. Then upgrading to"
-			" a newer version\n"
-			"InnoDB: of your operating system may help."
-			" Look at the\n"
-			"InnoDB: number of fsyncs in diagnostic info below.\n"
-			"InnoDB: Pending flushes (fsync) log: %lu;"
-			" buffer pool: %lu\n"
-			"InnoDB: %lu OS file reads, %lu OS file writes,"
-			" %lu OS fsyncs\n"
-			"InnoDB: Starting InnoDB Monitor to print further\n"
-			"InnoDB: diagnostics to the standard output.\n",
-			(ulong) n_iterations,
-			(ulong)	flush_failures,
-			(ulong) fil_n_pending_log_flushes,
-			(ulong) fil_n_pending_tablespace_flushes,
-			(ulong) os_n_file_reads, (ulong) os_n_file_writes,
-			(ulong) os_n_fsyncs);
+	if (n_iterations > 20
+	    && srv_buf_pool_old_size == srv_buf_pool_size) {
+
+		ib::warn() << "Difficult to find free blocks in the buffer pool"
+			" (" << n_iterations << " search iterations)! "
+			<< flush_failures << " failed attempts to"
+			" flush a page! Consider increasing the buffer pool"
+			" size. It is also possible that in your Unix version"
+			" fsync is very slow, or completely frozen inside"
+			" the OS kernel. Then upgrading to a newer version"
+			" of your operating system may help. Look at the"
+			" number of fsyncs in diagnostic info below."
+			" Pending flushes (fsync) log: "
+			<< fil_n_pending_log_flushes
+			<< "; buffer pool: "
+			<< fil_n_pending_tablespace_flushes
+			<< ". " << os_n_file_reads << " OS file reads, "
+			<< os_n_file_writes << " OS file writes, "
+			<< os_n_fsyncs
+			<< " OS fsyncs. Starting InnoDB Monitor to print"
+			" further diagnostics to the standard output.";
 
 		mon_value_was = srv_print_innodb_monitor;
-		started_monitor = TRUE;
-		srv_print_innodb_monitor = TRUE;
+		started_monitor = true;
+		srv_print_innodb_monitor = true;
 		os_event_set(srv_monitor_event);
 	}
 
@@ -1355,7 +1384,13 @@ loop:
 	find a free block then we should sleep here to let the
 	page_cleaner do an LRU batch for us. */
 
+	if (!srv_read_only_mode) {
+		os_event_set(buf_flush_event);
+	}
+
 	if (n_iterations > 1) {
+
+		MONITOR_INC( MONITOR_LRU_GET_FREE_WAITS );
 		os_thread_sleep(10000);
 	}
 
@@ -1363,11 +1398,13 @@ loop:
 	This call will flush one page from the LRU and put it on the
 	free list. That means that the free block is up for grabs for
 	all user threads.
+
 	TODO: A more elegant way would have been to return the freed
 	up block to the caller here but the code that deals with
 	removing the block from page_hash and LRU_list is fairly
 	involved (particularly in case of compressed pages). We
 	can do that in a separate patch sometime in future. */
+
 	if (!buf_flush_single_page_from_LRU(buf_pool)) {
 		MONITOR_INC(MONITOR_LRU_SINGLE_FLUSH_FAILURE_COUNT);
 		++flush_failures;
@@ -1457,8 +1494,6 @@ buf_LRU_old_init(
 /*=============*/
 	buf_pool_t*	buf_pool)
 {
-	buf_page_t*	bpage;
-
 	ut_ad(buf_pool_mutex_own(buf_pool));
 	ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN);
 
@@ -1466,10 +1501,13 @@ buf_LRU_old_init(
 	the adjust function to move the LRU_old pointer to the right
 	position */
 
-	for (bpage = UT_LIST_GET_LAST(buf_pool->LRU); bpage != NULL;
+	for (buf_page_t* bpage = UT_LIST_GET_LAST(buf_pool->LRU);
+	     bpage != NULL;
 	     bpage = UT_LIST_GET_PREV(LRU, bpage)) {
+
 		ut_ad(bpage->in_LRU_list);
 		ut_ad(buf_page_in_file(bpage));
+
 		/* This loop temporarily violates the
 		assertions of buf_page_set_old(). */
 		bpage->old = TRUE;
@@ -1491,24 +1529,21 @@ buf_unzip_LRU_remove_block_if_needed(
 {
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
 
-	ut_ad(buf_pool);
-	ut_ad(bpage);
 	ut_ad(buf_page_in_file(bpage));
 	ut_ad(buf_pool_mutex_own(buf_pool));
 
 	if (buf_page_belongs_to_unzip_LRU(bpage)) {
-		buf_block_t*	block = (buf_block_t*) bpage;
+		buf_block_t*	block = reinterpret_cast<buf_block_t*>(bpage);
 
 		ut_ad(block->in_unzip_LRU_list);
 		ut_d(block->in_unzip_LRU_list = FALSE);
 
-		UT_LIST_REMOVE(unzip_LRU, buf_pool->unzip_LRU, block);
+		UT_LIST_REMOVE(buf_pool->unzip_LRU, block);
 	}
 }
 
 /******************************************************************//**
 Adjust LRU hazard pointers if needed. */
-
 void
 buf_LRU_adjust_hp(
 /*==============*/
@@ -1529,10 +1564,7 @@ buf_LRU_remove_block(
 	buf_page_t*	bpage)	/*!< in: control block */
 {
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
-	ulint		zip_size;
 
-	ut_ad(buf_pool);
-	ut_ad(bpage);
 	ut_ad(buf_pool_mutex_own(buf_pool));
 
 	ut_a(buf_page_in_file(bpage));
@@ -1546,7 +1578,7 @@ buf_LRU_remove_block(
 	/* If the LRU_old pointer is defined and points to just this block,
 	move it backward one step */
 
-	if (UNIV_UNLIKELY(bpage == buf_pool->LRU_old)) {
+	if (bpage == buf_pool->LRU_old) {
 
 		/* Below: the previous block is guaranteed to exist,
 		because the LRU_old pointer is only allowed to differ
@@ -1566,11 +1598,10 @@ buf_LRU_remove_block(
 	}
 
 	/* Remove the block from the LRU list */
-	UT_LIST_REMOVE(LRU, buf_pool->LRU, bpage);
+	UT_LIST_REMOVE(buf_pool->LRU, bpage);
 	ut_d(bpage->in_LRU_list = FALSE);
 
-	zip_size = page_zip_get_size(&bpage->zip);
-	buf_pool->stat.LRU_bytes -= zip_size ? zip_size : UNIV_PAGE_SIZE;
+	buf_pool->stat.LRU_bytes -= bpage->size.physical();
 
 	buf_unzip_LRU_remove_block_if_needed(bpage);
 
@@ -1578,8 +1609,10 @@ buf_LRU_remove_block(
 	clear the "old" flags and return */
 	if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) {
 
-		for (bpage = UT_LIST_GET_FIRST(buf_pool->LRU); bpage != NULL;
+		for (buf_page_t* bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
+		     bpage != NULL;
 		     bpage = UT_LIST_GET_NEXT(LRU, bpage)) {
+
 			/* This loop temporarily violates the
 			assertions of buf_page_set_old(). */
 			bpage->old = FALSE;
@@ -1605,7 +1638,6 @@ buf_LRU_remove_block(
 
 /******************************************************************//**
 Adds a block to the LRU list of decompressed zip pages. */
-UNIV_INTERN
 void
 buf_unzip_LRU_add_block(
 /*====================*/
@@ -1615,8 +1647,6 @@ buf_unzip_LRU_add_block(
 {
 	buf_pool_t*	buf_pool = buf_pool_from_block(block);
 
-	ut_ad(buf_pool);
-	ut_ad(block);
 	ut_ad(buf_pool_mutex_own(buf_pool));
 
 	ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
@@ -1625,18 +1655,17 @@ buf_unzip_LRU_add_block(
 	ut_d(block->in_unzip_LRU_list = TRUE);
 
 	if (old) {
-		UT_LIST_ADD_LAST(unzip_LRU, buf_pool->unzip_LRU, block);
+		UT_LIST_ADD_LAST(buf_pool->unzip_LRU, block);
 	} else {
-		UT_LIST_ADD_FIRST(unzip_LRU, buf_pool->unzip_LRU, block);
+		UT_LIST_ADD_FIRST(buf_pool->unzip_LRU, block);
 	}
 }
 
 /******************************************************************//**
-Adds a block to the LRU list end. Please make sure that the zip_size is
-already set into the page zip when invoking the function, so that we
-can get correct zip_size from the buffer page when adding a block
-into LRU */
-UNIV_INLINE
+Adds a block to the LRU list end. Please make sure that the page_size is
+already set when invoking the function, so that we can get correct
+page_size from the buffer page when adding a block into LRU */
+static
 void
 buf_LRU_add_block_to_end_low(
 /*=========================*/
@@ -1644,14 +1673,12 @@ buf_LRU_add_block_to_end_low(
 {
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
 
-	ut_ad(buf_pool);
-	ut_ad(bpage);
 	ut_ad(buf_pool_mutex_own(buf_pool));
 
 	ut_a(buf_page_in_file(bpage));
 
 	ut_ad(!bpage->in_LRU_list);
-	UT_LIST_ADD_LAST(LRU, buf_pool->LRU, bpage);
+	UT_LIST_ADD_LAST(buf_pool->LRU, bpage);
 	ut_d(bpage->in_LRU_list = TRUE);
 
 	incr_LRU_size_in_bytes(bpage, buf_pool);
@@ -1684,10 +1711,9 @@ buf_LRU_add_block_to_end_low(
 }
 
 /******************************************************************//**
-Adds a block to the LRU list. Please make sure that the zip_size is
-already set into the page zip when invoking the function, so that we
-can get correct zip_size from the buffer page when adding a block
-into LRU */
+Adds a block to the LRU list. Please make sure that the page_size is
+already set when invoking the function, so that we can get correct
+page_size from the buffer page when adding a block into LRU */
 UNIV_INLINE
 void
 buf_LRU_add_block_low(
@@ -1707,7 +1733,7 @@ buf_LRU_add_block_low(
 
 	if (!old || (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN)) {
 
-		UT_LIST_ADD_FIRST(LRU, buf_pool->LRU, bpage);
+		UT_LIST_ADD_FIRST(buf_pool->LRU, bpage);
 
 		bpage->freed_page_clock = buf_pool->freed_page_clock;
 	} else {
@@ -1720,8 +1746,9 @@ buf_LRU_add_block_low(
 		ut_a(!UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)
 		     || UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)->old);
 #endif /* UNIV_LRU_DEBUG */
-		UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU, buf_pool->LRU_old,
-				     bpage);
+		UT_LIST_INSERT_AFTER(buf_pool->LRU, buf_pool->LRU_old,
+			bpage);
+
 		buf_pool->LRU_old_len++;
 	}
 
@@ -1756,11 +1783,9 @@ buf_LRU_add_block_low(
 }
 
 /******************************************************************//**
-Adds a block to the LRU list. Please make sure that the zip_size is
-already set into the page zip when invoking the function, so that we
-can get correct zip_size from the buffer page when adding a block
-into LRU */
-UNIV_INTERN
+Adds a block to the LRU list. Please make sure that the page_size is
+already set when invoking the function, so that we can get correct
+page_size from the buffer page when adding a block into LRU */
 void
 buf_LRU_add_block(
 /*==============*/
@@ -1776,7 +1801,6 @@ buf_LRU_add_block(
 
 /******************************************************************//**
 Moves a block to the start of the LRU list. */
-UNIV_INTERN
 void
 buf_LRU_make_block_young(
 /*=====================*/
@@ -1796,7 +1820,6 @@ buf_LRU_make_block_young(
 
 /******************************************************************//**
 Moves a block to the end of the LRU list. */
-UNIV_INTERN
 void
 buf_LRU_make_block_old(
 /*===================*/
@@ -1817,7 +1840,6 @@ accessible via bpage.
 The caller must hold buf_pool->mutex and must not hold any
 buf_page_get_mutex() when calling this function.
 @return true if freed, false otherwise. */
-UNIV_INTERN
 bool
 buf_LRU_free_page(
 /*===============*/
@@ -1827,11 +1849,10 @@ buf_LRU_free_page(
 {
 	buf_page_t*	b = NULL;
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
-	const ulint	fold = buf_page_address_fold(bpage->space,
-						     bpage->offset);
-	rw_lock_t*	hash_lock = buf_page_hash_lock_get(buf_pool, fold);
 
-	ib_mutex_t*	block_mutex = buf_page_get_mutex(bpage);
+	rw_lock_t*	hash_lock = buf_page_hash_lock_get(buf_pool, bpage->id);
+
+	BPageMutex*	block_mutex = buf_page_get_mutex(bpage);
 
 	ut_ad(buf_pool_mutex_own(buf_pool));
 	ut_ad(buf_page_in_file(bpage));
@@ -1842,12 +1863,12 @@ buf_LRU_free_page(
 
 	if (!buf_page_can_relocate(bpage)) {
 
-		/* Do not free buffer fixed or I/O-fixed blocks. */
+		/* Do not free buffer fixed and I/O-fixed blocks. */
 		goto func_exit;
 	}
 
 #ifdef UNIV_IBUF_COUNT_DEBUG
-	ut_a(ibuf_count_get(bpage->space, bpage->offset) == 0);
+	ut_a(ibuf_count_get(bpage->id) == 0);
 #endif /* UNIV_IBUF_COUNT_DEBUG */
 
 	if (zip || !bpage->zip.data) {
@@ -1878,28 +1899,19 @@ func_exit:
 	ut_ad(bpage->in_LRU_list);
 	ut_ad(!bpage->in_flush_list == !bpage->oldest_modification);
 
-#ifdef UNIV_DEBUG
-	if (buf_debug_prints) {
-		fprintf(stderr, "Putting space %lu page %lu to free list\n",
-			(ulong) buf_page_get_space(bpage),
-			(ulong) buf_page_get_page_no(bpage));
-	}
-#endif /* UNIV_DEBUG */
+	DBUG_PRINT("ib_buf", ("free page %u:%u",
+			      bpage->id.space(), bpage->id.page_no()));
 
-#ifdef UNIV_SYNC_DEBUG
-        ut_ad(rw_lock_own(hash_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(rw_lock_own(hash_lock, RW_LOCK_X));
 	ut_ad(buf_page_can_relocate(bpage));
 
 	if (!buf_LRU_block_remove_hashed(bpage, zip)) {
 		return(true);
 	}
 
-#ifdef UNIV_SYNC_DEBUG
 	/* buf_LRU_block_remove_hashed() releases the hash_lock */
-	ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX)
-	      && !rw_lock_own(hash_lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(!rw_lock_own(hash_lock, RW_LOCK_X)
+	      && !rw_lock_own(hash_lock, RW_LOCK_S));
 
 	/* We have just freed a BUF_BLOCK_FILE_PAGE. If b != NULL
 	then it was a compressed page with an uncompressed frame and
@@ -1908,20 +1920,22 @@ func_exit:
 	into the LRU and page_hash (and possibly flush_list).
 	if b == NULL then it was a regular page that has been freed */
 
-	if (b) {
+	if (b != NULL) {
 		buf_page_t*	prev_b	= UT_LIST_GET_PREV(LRU, b);
 
 		rw_lock_x_lock(hash_lock);
+
 		mutex_enter(block_mutex);
 
-		ut_a(!buf_page_hash_get_low(
-				buf_pool, b->space, b->offset, fold));
+		ut_a(!buf_page_hash_get_low(buf_pool, b->id));
 
 		b->state = b->oldest_modification
 			? BUF_BLOCK_ZIP_DIRTY
 			: BUF_BLOCK_ZIP_PAGE;
-		UNIV_MEM_DESC(b->zip.data,
-			      page_zip_get_size(&b->zip));
+
+		ut_ad(b->size.is_compressed());
+
+		UNIV_MEM_DESC(b->zip.data, b->size.physical());
 
 		/* The fields in_page_hash and in_LRU_list of
 		the to-be-freed block descriptor should have
@@ -1930,6 +1944,7 @@ func_exit:
 		invokes buf_LRU_remove_block(). */
 		ut_ad(!bpage->in_page_hash);
 		ut_ad(!bpage->in_LRU_list);
+
 		/* bpage->state was BUF_BLOCK_FILE_PAGE because
 		b != NULL. The type cast below is thus valid. */
 		ut_ad(!((buf_block_t*) bpage)->in_unzip_LRU_list);
@@ -1940,25 +1955,24 @@ func_exit:
 		ut_ad(b->in_page_hash);
 		ut_ad(b->in_LRU_list);
 
-		HASH_INSERT(buf_page_t, hash,
-			    buf_pool->page_hash, fold, b);
+		HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
+			    b->id.fold(), b);
 
 		/* Insert b where bpage was in the LRU list. */
-		if (UNIV_LIKELY(prev_b != NULL)) {
+		if (prev_b != NULL) {
 			ulint	lru_len;
 
 			ut_ad(prev_b->in_LRU_list);
 			ut_ad(buf_page_in_file(prev_b));
-			UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU,
-					     prev_b, b);
+
+			UT_LIST_INSERT_AFTER(buf_pool->LRU, prev_b, b);
 
 			incr_LRU_size_in_bytes(b, buf_pool);
 
 			if (buf_page_is_old(b)) {
 				buf_pool->LRU_old_len++;
-				if (UNIV_UNLIKELY
-				    (buf_pool->LRU_old
-				     == UT_LIST_GET_NEXT(LRU, b))) {
+				if (buf_pool->LRU_old
+				    == UT_LIST_GET_NEXT(LRU, b)) {
 
 					buf_pool->LRU_old = b;
 				}
@@ -1997,33 +2011,27 @@ func_exit:
 		}
 
 		bpage->zip.data = NULL;
+
 		page_zip_set_size(&bpage->zip, 0);
+
+		bpage->size.copy_from(page_size_t(bpage->size.logical(),
+						  bpage->size.logical(),
+						  false));
+
 		mutex_exit(block_mutex);
 
 		/* Prevent buf_page_get_gen() from
 		decompressing the block while we release
 		buf_pool->mutex and block_mutex. */
 		block_mutex = buf_page_get_mutex(b);
+
 		mutex_enter(block_mutex);
+
 		buf_page_set_sticky(b);
+
 		mutex_exit(block_mutex);
 
 		rw_lock_x_unlock(hash_lock);
-
-	} else {
-
-		/* There can be multiple threads doing an LRU scan to
-		free a block. The page_cleaner thread can be doing an
-		LRU batch whereas user threads can potentially be doing
-		multiple single page flushes. As we release
-		buf_pool->mutex below we need to make sure that no one
-		else considers this block as a victim for page
-		replacement. This block is already out of page_hash
-		and we are about to remove it from the LRU list and put
-		it on the free list. */
-		mutex_enter(block_mutex);
-		buf_page_set_sticky(bpage);
-		mutex_exit(block_mutex);
 	}
 
 	buf_pool_mutex_exit(buf_pool);
@@ -2040,8 +2048,8 @@ func_exit:
 	UNIV_MEM_INVALID(((buf_block_t*) bpage)->frame,
 			 UNIV_PAGE_SIZE);
 
-	if (b) {
-		ib_uint32_t	checksum;
+	if (b != NULL) {
+
 		/* Compute and stamp the compressed page
 		checksum while not holding any mutex.  The
 		block is already half-freed
@@ -2049,12 +2057,13 @@ func_exit:
 		buf_pool->page_hash, thus inaccessible by any
 		other thread. */
 
-		checksum = static_cast<ib_uint32_t>(
-			page_zip_calc_checksum(
-				b->zip.data,
-				page_zip_get_size(&b->zip),
-				static_cast<srv_checksum_algorithm_t>(
-					srv_checksum_algorithm)));
+		ut_ad(b->size.is_compressed());
+
+		const uint32_t	checksum = page_zip_calc_checksum(
+			b->zip.data,
+			b->size.physical(),
+			static_cast<srv_checksum_algorithm_t>(
+				srv_checksum_algorithm));
 
 		mach_write_to_4(b->zip.data + FIL_PAGE_SPACE_OR_CHKSUM,
 				checksum);
@@ -2062,17 +2071,21 @@ func_exit:
 
 	buf_pool_mutex_enter(buf_pool);
 
-	mutex_enter(block_mutex);
-	buf_page_unset_sticky(b != NULL ? b : bpage);
-	mutex_exit(block_mutex);
+	if (b != NULL) {
+		mutex_enter(block_mutex);
+
+		buf_page_unset_sticky(b);
+
+		mutex_exit(block_mutex);
+	}
 
 	buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
+
 	return(true);
 }
 
 /******************************************************************//**
 Puts a block back to the free list. */
-UNIV_INTERN
 void
 buf_LRU_block_free_non_file_page(
 /*=============================*/
@@ -2081,17 +2094,17 @@ buf_LRU_block_free_non_file_page(
 	void*		data;
 	buf_pool_t*	buf_pool = buf_pool_from_block(block);
 
-	ut_ad(block);
 	ut_ad(buf_pool_mutex_own(buf_pool));
-	ut_ad(mutex_own(&block->mutex));
+	ut_ad(buf_page_mutex_own(block));
 
 	switch (buf_block_get_state(block)) {
 	case BUF_BLOCK_MEMORY:
 	case BUF_BLOCK_READY_FOR_USE:
 		break;
 	default:
-		fprintf(stderr, "InnoDB: Error: Block %p incorrect state %s in buf_LRU_block_free_non_file_page()\n",
-			block, buf_get_state_name(block));
+		ib::error() << "Block:" << block
+			    << " incorrect state:" << buf_get_state_name(block)
+			    << " in buf_LRU_block_free_non_file_page";
 		return; /* Continue */
 	}
 
@@ -2112,24 +2125,41 @@ buf_LRU_block_free_non_file_page(
 	/* Wipe page_no and space_id */
 	memset(block->frame + FIL_PAGE_OFFSET, 0xfe, 4);
 	memset(block->frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 0xfe, 4);
-#endif
+#endif /* UNIV_DEBUG */
 	data = block->page.zip.data;
 
-	if (data) {
+	if (data != NULL) {
 		block->page.zip.data = NULL;
-		mutex_exit(&block->mutex);
+		buf_page_mutex_exit(block);
 		buf_pool_mutex_exit_forbid(buf_pool);
 
-		buf_buddy_free(
-			buf_pool, data, page_zip_get_size(&block->page.zip));
+		ut_ad(block->page.size.is_compressed());
+
+		buf_buddy_free(buf_pool, data, block->page.size.physical());
 
 		buf_pool_mutex_exit_allow(buf_pool);
-		mutex_enter(&block->mutex);
+		buf_page_mutex_enter(block);
+
 		page_zip_set_size(&block->page.zip, 0);
+
+		block->page.size.copy_from(
+			page_size_t(block->page.size.logical(),
+				    block->page.size.logical(),
+				    false));
 	}
 
-	UT_LIST_ADD_FIRST(list, buf_pool->free, (&block->page));
-	ut_d(block->page.in_free_list = TRUE);
+	if (buf_pool->curr_size < buf_pool->old_size
+	    && UT_LIST_GET_LEN(buf_pool->withdraw) < buf_pool->withdraw_target
+	    && buf_block_will_withdrawn(buf_pool, block)) {
+		/* This should be withdrawn */
+		UT_LIST_ADD_LAST(
+			buf_pool->withdraw,
+			&block->page);
+		ut_d(block->in_withdraw_list = TRUE);
+	} else {
+		UT_LIST_ADD_FIRST(buf_pool->free, &block->page);
+		ut_d(block->page.in_free_list = TRUE);
+	}
 
 	UNIV_MEM_ASSERT_AND_FREE(block->frame, UNIV_PAGE_SIZE);
 }
@@ -2158,20 +2188,16 @@ buf_LRU_block_remove_hashed(
 	bool		zip)	/*!< in: true if should remove also the
 				compressed page of an uncompressed page */
 {
-	ulint			fold;
 	const buf_page_t*	hashed_bpage;
 	buf_pool_t*		buf_pool = buf_pool_from_bpage(bpage);
 	rw_lock_t*		hash_lock;
 
-	ut_ad(bpage);
 	ut_ad(buf_pool_mutex_own(buf_pool));
 	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
 
-	fold = buf_page_address_fold(bpage->space, bpage->offset);
-	hash_lock = buf_page_hash_lock_get(buf_pool, fold);
-#ifdef UNIV_SYNC_DEBUG
-        ut_ad(rw_lock_own(hash_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	hash_lock = buf_page_hash_lock_get(buf_pool, bpage->id);
+
+        ut_ad(rw_lock_own(hash_lock, RW_LOCK_X));
 
 	ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
 	ut_a(bpage->buf_fix_count == 0);
@@ -2188,13 +2214,11 @@ buf_LRU_block_remove_hashed(
 		buf_block_modify_clock_inc((buf_block_t*) bpage);
 		if (bpage->zip.data) {
 			const page_t*	page = ((buf_block_t*) bpage)->frame;
-			const ulint	zip_size
-				= page_zip_get_size(&bpage->zip);
 
 			ut_a(!zip || bpage->oldest_modification == 0);
+			ut_ad(bpage->size.is_compressed());
 
-			switch (UNIV_EXPECT(fil_page_get_type(page),
-					    FIL_PAGE_INDEX)) {
+			switch (fil_page_get_type(page)) {
 			case FIL_PAGE_TYPE_ALLOCATED:
 			case FIL_PAGE_INODE:
 			case FIL_PAGE_IBUF_BITMAP:
@@ -2207,13 +2231,14 @@ buf_LRU_block_remove_hashed(
 					to the compressed page, which will
 					be preserved. */
 					memcpy(bpage->zip.data, page,
-					       zip_size);
+					       bpage->size.physical());
 				}
 				break;
 			case FIL_PAGE_TYPE_ZBLOB:
 			case FIL_PAGE_TYPE_ZBLOB2:
 				break;
 			case FIL_PAGE_INDEX:
+			case FIL_PAGE_RTREE:
 #ifdef UNIV_ZIP_DEBUG
 				ut_a(page_zip_validate(
 					     &bpage->zip, page,
@@ -2221,14 +2246,16 @@ buf_LRU_block_remove_hashed(
 #endif /* UNIV_ZIP_DEBUG */
 				break;
 			default:
-				ut_print_timestamp(stderr);
-				fputs("  InnoDB: ERROR: The compressed page"
-				      " to be evicted seems corrupt:", stderr);
-				ut_print_buf(stderr, page, zip_size);
-				fputs("\nInnoDB: Possibly older version"
-				      " of the page:", stderr);
+				ib::error() << "The compressed page to be"
+					" evicted seems corrupt:";
+				ut_print_buf(stderr, page,
+					     bpage->size.logical());
+
+				ib::error() << "Possibly older version of"
+					" the page:";
+
 				ut_print_buf(stderr, bpage->zip.data,
-					     zip_size);
+					     bpage->size.physical());
 				putc('\n', stderr);
 				ut_error;
 			}
@@ -2238,8 +2265,10 @@ buf_LRU_block_remove_hashed(
 		/* fall through */
 	case BUF_BLOCK_ZIP_PAGE:
 		ut_a(bpage->oldest_modification == 0);
-		UNIV_MEM_ASSERT_W(bpage->zip.data,
-				  page_zip_get_size(&bpage->zip));
+		if (bpage->size.is_compressed()) {
+			UNIV_MEM_ASSERT_W(bpage->zip.data,
+					  bpage->size.physical());
+		}
 		break;
 	case BUF_BLOCK_POOL_WATCH:
 	case BUF_BLOCK_ZIP_DIRTY:
@@ -2251,41 +2280,35 @@ buf_LRU_block_remove_hashed(
 		break;
 	}
 
-	hashed_bpage = buf_page_hash_get_low(buf_pool, bpage->space,
-					     bpage->offset, fold);
+	hashed_bpage = buf_page_hash_get_low(buf_pool, bpage->id);
+	if (bpage != hashed_bpage) {
+		ib::error() << "Page " << bpage->id
+			<< " not found in the hash table";
 
-	if (UNIV_UNLIKELY(bpage != hashed_bpage)) {
-		fprintf(stderr,
-			"InnoDB: Error: page %lu %lu not found"
-			" in the hash table\n",
-			(ulong) bpage->space,
-			(ulong) bpage->offset);
 #ifdef UNIV_DEBUG
-		fprintf(stderr,
-			"InnoDB: in_page_hash %lu in_zip_hash %lu\n"
-			" in_free_list %lu in_flush_list %lu in_LRU_list %lu\n"
-			" zip.data %p zip_size %lu page_state %d\n",
-			bpage->in_page_hash, bpage->in_zip_hash,
-			bpage->in_free_list, bpage->in_flush_list,
-			bpage->in_LRU_list, bpage->zip.data,
-			buf_page_get_zip_size(bpage),
-			buf_page_get_state(bpage));
+
+		
+		ib::error()
+			<< "in_page_hash:" << bpage->in_page_hash
+			<< " in_zip_hash:" << bpage->in_zip_hash
+			//			<< " in_free_list:"<< bpage->in_fee_list
+			<< " in_flush_list:" << bpage->in_flush_list
+			<< " in_LRU_list:" << bpage->in_LRU_list
+			<< " zip.data:" << bpage->zip.data
+			<< " zip_size:" << bpage->size.logical()
+			<< " page_state:" << buf_page_get_state(bpage);
 #else
-		fprintf(stderr,
-			"InnoDB: zip.data %p zip_size %lu page_state %d\n",
-			bpage->zip.data,
-			buf_page_get_zip_size(bpage),
-			buf_page_get_state(bpage));
+		ib::error()
+			<< " zip.data:" << bpage->zip.data
+			<< " zip_size:" << bpage->size.logical()
+			<< " page_state:" << buf_page_get_state(bpage);
 #endif
 
 		if (hashed_bpage) {
-			fprintf(stderr,
-				"InnoDB: In hash table we find block"
-				" %p of %lu %lu which is not %p\n",
-				(const void*) hashed_bpage,
-				(ulong) hashed_bpage->space,
-				(ulong) hashed_bpage->offset,
-				(const void*) bpage);
+
+			ib::error() << "In hash table we find block "
+				<< hashed_bpage << " of " << hashed_bpage->id
+				<< " which is not " << bpage;
 		}
 
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
@@ -2303,26 +2326,28 @@ buf_LRU_block_remove_hashed(
 	ut_ad(!bpage->in_zip_hash);
 	ut_ad(bpage->in_page_hash);
 	ut_d(bpage->in_page_hash = FALSE);
-	HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, bpage);
+
+	HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, bpage->id.fold(),
+		    bpage);
+
 	switch (buf_page_get_state(bpage)) {
 	case BUF_BLOCK_ZIP_PAGE:
 		ut_ad(!bpage->in_free_list);
 		ut_ad(!bpage->in_flush_list);
 		ut_ad(!bpage->in_LRU_list);
 		ut_a(bpage->zip.data);
-		ut_a(buf_page_get_zip_size(bpage));
+		ut_a(bpage->size.is_compressed());
 
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-		UT_LIST_REMOVE(list, buf_pool->zip_clean, bpage);
+		UT_LIST_REMOVE(buf_pool->zip_clean, bpage);
 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 
 		mutex_exit(&buf_pool->zip_mutex);
 		rw_lock_x_unlock(hash_lock);
 		buf_pool_mutex_exit_forbid(buf_pool);
 
-		buf_buddy_free(
-			buf_pool, bpage->zip.data,
-			page_zip_get_size(&bpage->zip));
+		buf_buddy_free(buf_pool, bpage->zip.data,
+			       bpage->size.physical());
 
 		buf_pool_mutex_exit_allow(buf_pool);
 		buf_page_free_descriptor(bpage);
@@ -2337,11 +2362,6 @@ buf_LRU_block_remove_hashed(
 				 UNIV_PAGE_SIZE);
 		buf_page_set_state(bpage, BUF_BLOCK_REMOVE_HASH);
 
-		if (buf_pool->flush_rbt == NULL) {
-			bpage->space = ULINT32_UNDEFINED;
-			bpage->offset = ULINT32_UNDEFINED;
-		}
-
 		/* Question: If we release bpage and hash mutex here
 		then what protects us against:
 		1) Some other thread buffer fixing this page
@@ -2374,12 +2394,16 @@ buf_LRU_block_remove_hashed(
 			ut_ad(!bpage->in_LRU_list);
 			buf_pool_mutex_exit_forbid(buf_pool);
 
-			buf_buddy_free(
-				buf_pool, data,
-				page_zip_get_size(&bpage->zip));
+			buf_buddy_free(buf_pool, data, bpage->size.physical());
 
 			buf_pool_mutex_exit_allow(buf_pool);
+
 			page_zip_set_size(&bpage->zip, 0);
+
+			bpage->size.copy_from(
+				page_size_t(bpage->size.logical(),
+					    bpage->size.logical(),
+					    false));
 		}
 
 		return(true);
@@ -2406,21 +2430,23 @@ buf_LRU_block_free_hashed_page(
 	buf_block_t*	block)	/*!< in: block, must contain a file page and
 				be in a state where it can be freed */
 {
-#ifdef UNIV_DEBUG
 	buf_pool_t*	buf_pool = buf_pool_from_block(block);
 	ut_ad(buf_pool_mutex_own(buf_pool));
-#endif
 
-	mutex_enter(&block->mutex);
+	buf_page_mutex_enter(block);
+
+	if (buf_pool->flush_rbt == NULL) {
+		block->page.id.reset(ULINT32_UNDEFINED, ULINT32_UNDEFINED);
+	}
+
 	buf_block_set_state(block, BUF_BLOCK_MEMORY);
 
 	buf_LRU_block_free_non_file_page(block);
-	mutex_exit(&block->mutex);
+	buf_page_mutex_exit(block);
 }
 
 /******************************************************************//**
 Remove one page from LRU list and put it to free list */
-UNIV_INTERN
 void
 buf_LRU_free_one_page(
 /*==================*/
@@ -2429,10 +2455,9 @@ buf_LRU_free_one_page(
 				may or may not be a hash index to the page */
 {
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
-	const ulint	fold = buf_page_address_fold(bpage->space,
-						     bpage->offset);
-	rw_lock_t*	hash_lock = buf_page_hash_lock_get(buf_pool, fold);
-	ib_mutex_t*	block_mutex = buf_page_get_mutex(bpage);
+
+	rw_lock_t*	hash_lock = buf_page_hash_lock_get(buf_pool, bpage->id);
+	BPageMutex*	block_mutex = buf_page_get_mutex(bpage);
 
 	ut_ad(buf_pool_mutex_own(buf_pool));
 
@@ -2444,16 +2469,15 @@ buf_LRU_free_one_page(
 	}
 
 	/* buf_LRU_block_remove_hashed() releases hash_lock and block_mutex */
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX)
-	      && !rw_lock_own(hash_lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(!rw_lock_own(hash_lock, RW_LOCK_X)
+	      && !rw_lock_own(hash_lock, RW_LOCK_S));
+
 	ut_ad(!mutex_own(block_mutex));
 }
 
 /**********************************************************************//**
 Updates buf_pool->LRU_old_ratio for one buffer pool instance.
-@return	updated old_pct */
+@return updated old_pct */
 static
 uint
 buf_LRU_old_ratio_update_instance(
@@ -2481,7 +2505,7 @@ buf_LRU_old_ratio_update_instance(
 			buf_pool->LRU_old_ratio = ratio;
 
 			if (UT_LIST_GET_LEN(buf_pool->LRU)
-			   >= BUF_LRU_OLD_MIN_LEN) {
+			    >= BUF_LRU_OLD_MIN_LEN) {
 
 				buf_LRU_old_adjust_len(buf_pool);
 			}
@@ -2498,9 +2522,8 @@ buf_LRU_old_ratio_update_instance(
 
 /**********************************************************************//**
 Updates buf_pool->LRU_old_ratio.
-@return	updated old_pct */
-UNIV_INTERN
-ulint
+@return updated old_pct */
+uint
 buf_LRU_old_ratio_update(
 /*=====================*/
 	uint	old_pct,/*!< in: Reserve this percentage of
@@ -2509,10 +2532,9 @@ buf_LRU_old_ratio_update(
 			FALSE=just assign buf_pool->LRU_old_ratio
 			during the initialization of InnoDB */
 {
-	ulint	i;
-	ulint	new_ratio = 0;
+	uint	new_ratio = 0;
 
-	for (i = 0; i < srv_buf_pool_instances; i++) {
+	for (ulint i = 0; i < srv_buf_pool_instances; i++) {
 		buf_pool_t*	buf_pool;
 
 		buf_pool = buf_pool_from_array(i);
@@ -2527,24 +2549,22 @@ buf_LRU_old_ratio_update(
 /********************************************************************//**
 Update the historical stats that we are collecting for LRU eviction
 policy at the end of each interval. */
-UNIV_INTERN
 void
 buf_LRU_stat_update(void)
 /*=====================*/
 {
-	ulint		i;
 	buf_LRU_stat_t*	item;
 	buf_pool_t*	buf_pool;
-	ibool		evict_started = FALSE;
+	bool		evict_started = FALSE;
 	buf_LRU_stat_t	cur_stat;
 
 	/* If we haven't started eviction yet then don't update stats. */
-	for (i = 0; i < srv_buf_pool_instances; i++) {
+	for (ulint i = 0; i < srv_buf_pool_instances; i++) {
 
 		buf_pool = buf_pool_from_array(i);
 
 		if (buf_pool->freed_page_clock != 0) {
-			evict_started = TRUE;
+			evict_started = true;
 			break;
 		}
 	}
@@ -2586,33 +2606,32 @@ buf_LRU_validate_instance(
 /*======================*/
 	buf_pool_t*	buf_pool)
 {
-	buf_page_t*	bpage;
-	buf_block_t*	block;
 	ulint		old_len;
 	ulint		new_len;
 
-	ut_ad(buf_pool);
 	buf_pool_mutex_enter(buf_pool);
 
 	if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
 
 		ut_a(buf_pool->LRU_old);
 		old_len = buf_pool->LRU_old_len;
+
 		new_len = ut_min(UT_LIST_GET_LEN(buf_pool->LRU)
 				 * buf_pool->LRU_old_ratio
 				 / BUF_LRU_OLD_RATIO_DIV,
 				 UT_LIST_GET_LEN(buf_pool->LRU)
 				 - (BUF_LRU_OLD_TOLERANCE
 				    + BUF_LRU_NON_OLD_MIN_LEN));
+
 		ut_a(old_len >= new_len - BUF_LRU_OLD_TOLERANCE);
 		ut_a(old_len <= new_len + BUF_LRU_OLD_TOLERANCE);
 	}
 
-	UT_LIST_VALIDATE(LRU, buf_page_t, buf_pool->LRU, CheckInLRUList());
+	CheckInLRUList::validate(buf_pool);
 
 	old_len = 0;
 
-	for (bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
+	for (buf_page_t* bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
 	     bpage != NULL;
              bpage = UT_LIST_GET_NEXT(LRU, bpage)) {
 
@@ -2650,21 +2669,19 @@ buf_LRU_validate_instance(
 
 	ut_a(buf_pool->LRU_old_len == old_len);
 
-	UT_LIST_VALIDATE(list, buf_page_t, buf_pool->free, CheckInFreeList());
+	CheckInFreeList::validate(buf_pool);
 
-	for (bpage = UT_LIST_GET_FIRST(buf_pool->free);
+	for (buf_page_t* bpage = UT_LIST_GET_FIRST(buf_pool->free);
 	     bpage != NULL;
 	     bpage = UT_LIST_GET_NEXT(list, bpage)) {
 
 		ut_a(buf_page_get_state(bpage) == BUF_BLOCK_NOT_USED);
 	}
 
-	UT_LIST_VALIDATE(
-                unzip_LRU, buf_block_t, buf_pool->unzip_LRU,
-                CheckUnzipLRUAndLRUList());
+	CheckUnzipLRUAndLRUList::validate(buf_pool);
 
-	for (block = UT_LIST_GET_FIRST(buf_pool->unzip_LRU);
-	     block;
+	for (buf_block_t* block = UT_LIST_GET_FIRST(buf_pool->unzip_LRU);
+	     block != NULL;
 	     block = UT_LIST_GET_NEXT(unzip_LRU, block)) {
 
 		ut_ad(block->in_unzip_LRU_list);
@@ -2677,15 +2694,12 @@ buf_LRU_validate_instance(
 
 /**********************************************************************//**
 Validates the LRU list.
-@return	TRUE */
-UNIV_INTERN
+@return TRUE */
 ibool
 buf_LRU_validate(void)
 /*==================*/
 {
-	ulint	i;
-
-	for (i = 0; i < srv_buf_pool_instances; i++) {
+	for (ulint i = 0; i < srv_buf_pool_instances; i++) {
 		buf_pool_t*	buf_pool;
 
 		buf_pool = buf_pool_from_array(i);
@@ -2699,25 +2713,21 @@ buf_LRU_validate(void)
 #if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 /**********************************************************************//**
 Prints the LRU list for one buffer pool instance. */
-UNIV_INTERN
 void
 buf_LRU_print_instance(
 /*===================*/
 	buf_pool_t*	buf_pool)
 {
-	const buf_page_t*	bpage;
-
-	ut_ad(buf_pool);
 	buf_pool_mutex_enter(buf_pool);
 
-	bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
-
-	while (bpage != NULL) {
+	for (const buf_page_t* bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
+	     bpage != NULL;
+	     bpage = UT_LIST_GET_NEXT(LRU, bpage)) {
 
 		mutex_enter(buf_page_get_mutex(bpage));
-		fprintf(stderr, "BLOCK space %lu page %lu ",
-			(ulong) buf_page_get_space(bpage),
-			(ulong) buf_page_get_page_no(bpage));
+
+		fprintf(stderr, "BLOCK space %u page %u ",
+			bpage->id.space(), bpage->id.page_no());
 
 		if (buf_page_is_old(bpage)) {
 			fputs("old ", stderr);
@@ -2742,17 +2752,17 @@ buf_LRU_print_instance(
 		case BUF_BLOCK_FILE_PAGE:
 			frame = buf_block_get_frame((buf_block_t*) bpage);
 			fprintf(stderr, "\ntype %lu"
-				" index id %llu\n",
+				" index id " IB_ID_FMT "\n",
 				(ulong) fil_page_get_type(frame),
-				(ullint) btr_page_get_index_id(frame));
+				btr_page_get_index_id(frame));
 			break;
 		case BUF_BLOCK_ZIP_PAGE:
 			frame = bpage->zip.data;
 			fprintf(stderr, "\ntype %lu size %lu"
-				" index id %llu\n",
+				" index id " IB_ID_FMT "\n",
 				(ulong) fil_page_get_type(frame),
-				(ulong) buf_page_get_zip_size(bpage),
-				(ullint) btr_page_get_index_id(frame));
+				(ulong) bpage->size.physical(),
+				btr_page_get_index_id(frame));
 			break;
 
 		default:
@@ -2762,7 +2772,6 @@ buf_LRU_print_instance(
 		}
 
 		mutex_exit(buf_page_get_mutex(bpage));
-		bpage = UT_LIST_GET_NEXT(LRU, bpage);
 	}
 
 	buf_pool_mutex_exit(buf_pool);
@@ -2770,15 +2779,13 @@ buf_LRU_print_instance(
 
 /**********************************************************************//**
 Prints the LRU list. */
-UNIV_INTERN
 void
 buf_LRU_print(void)
 /*===============*/
 {
-	ulint		i;
-	buf_pool_t*	buf_pool;
+	for (ulint i = 0; i < srv_buf_pool_instances; i++) {
+		buf_pool_t*	buf_pool;
 
-	for (i = 0; i < srv_buf_pool_instances; i++) {
 		buf_pool = buf_pool_from_array(i);
 		buf_LRU_print_instance(buf_pool);
 	}
diff --git a/storage/innobase/buf/buf0mtflu.cc b/storage/innobase/buf/buf0mtflu.cc
index e990ba785e7..117de5cc948 100644
--- a/storage/innobase/buf/buf0mtflu.cc
+++ b/storage/innobase/buf/buf0mtflu.cc
@@ -43,7 +43,6 @@ Modified 06/02/2014 Jan Lindström jan.lindstrom@skysql.com
 #include "ibuf0ibuf.h"
 #include "log0log.h"
 #include "os0file.h"
-#include "os0sync.h"
 #include "trx0sys.h"
 #include "srv0mon.h"
 #include "mysql/plugin.h"
@@ -122,7 +121,6 @@ typedef struct wrk_itm
 typedef struct thread_data
 {
 	os_thread_id_t	wthread_id;	/*!< Identifier */
-	os_thread_t 	wthread;	/*!< Thread id */
 	wthr_status_t   wt_status;	/*!< Worker thread status */
 } thread_data_t;
 
@@ -130,7 +128,7 @@ typedef struct thread_data
 typedef struct thread_sync
 {
 	/* Global variables used by all threads */
-	os_fast_mutex_t	thread_global_mtx; /*!< Mutex used protecting below
+	ib_mutex_t	thread_global_mtx; /*!< Mutex used protecting below
 					   variables */
 	ulint           n_threads;	/*!< Number of threads */
 	ib_wqueue_t	*wq;		/*!< Work Queue */
@@ -149,7 +147,7 @@ typedef struct thread_sync
 
 static int		mtflush_work_initialized = -1;
 static thread_sync_t*   mtflush_ctx=NULL;
-static os_fast_mutex_t  mtflush_mtx;
+static ib_mutex_t       mtflush_mtx;
 
 /******************************************************************//**
 Set multi-threaded flush work initialized. */
@@ -211,7 +209,7 @@ buf_mtflu_flush_pool_instance(
         	buf_pool_mutex_enter(work_item->wr.buf_pool);
         	work_item->wr.min = UT_LIST_GET_LEN(work_item->wr.buf_pool->LRU);
         	buf_pool_mutex_exit(work_item->wr.buf_pool);
-        	work_item->wr.min = ut_min(srv_LRU_scan_depth,work_item->wr.min);
+        	work_item->wr.min = ut_min((ulint)srv_LRU_scan_depth,(ulint)work_item->wr.min);
     	}
 
 	buf_flush_batch(work_item->wr.buf_pool,
@@ -324,7 +322,7 @@ DECLARE_THREAD(mtflush_io_thread)(
 	ulint i;
 
 	/* Find correct slot for this thread */
-	os_fast_mutex_lock(&(mtflush_io->thread_global_mtx));
+	mutex_enter(&(mtflush_io->thread_global_mtx));
 	for(i=0; i < mtflush_io->n_threads; i ++) {
 		if (mtflush_io->thread_data[i].wthread_id == os_thread_get_curr_id()) {
 			break;
@@ -333,7 +331,7 @@ DECLARE_THREAD(mtflush_io_thread)(
 
 	ut_a(i <= mtflush_io->n_threads);
 	this_thread_data = &mtflush_io->thread_data[i];
-	os_fast_mutex_unlock(&(mtflush_io->thread_global_mtx));
+	mutex_exit(&(mtflush_io->thread_global_mtx));
 
 	while (TRUE) {
 
@@ -352,7 +350,7 @@ DECLARE_THREAD(mtflush_io_thread)(
 		}
 	}
 
-	os_thread_exit(NULL);
+	os_thread_exit();
 	OS_THREAD_DUMMY_RETURN;
 }
 
@@ -389,7 +387,7 @@ buf_mtflu_io_thread_exit(void)
 	been processed. Thus, we can get this mutex if and only if work
 	queue is empty. */
 
-	os_fast_mutex_lock(&mtflush_mtx);
+	mutex_enter(&mtflush_mtx);
 
 	/* Make sure the work queue is empty */
 	ut_a(ib_wqueue_is_empty(mtflush_io->wq));
@@ -408,7 +406,7 @@ buf_mtflu_io_thread_exit(void)
 	}
 
 	/* Requests sent */
-	os_fast_mutex_unlock(&mtflush_mtx);
+	mutex_exit(&mtflush_mtx);
 
 	/* Wait until all work items on a work queue are processed */
 	while(!ib_wqueue_is_empty(mtflush_io->wq)) {
@@ -440,7 +438,7 @@ buf_mtflu_io_thread_exit(void)
 		ib_wqueue_nowait(mtflush_io->wq);
 	}
 
-	os_fast_mutex_lock(&mtflush_mtx);
+	mutex_enter(&mtflush_mtx);
 
 	ut_a(ib_wqueue_is_empty(mtflush_io->wq));
 	ut_a(ib_wqueue_is_empty(mtflush_io->wr_cq));
@@ -460,9 +458,9 @@ buf_mtflu_io_thread_exit(void)
 	mem_heap_free(mtflush_io->wheap);
 	mem_heap_free(mtflush_io->rheap);
 
-	os_fast_mutex_unlock(&mtflush_mtx);
-	os_fast_mutex_free(&mtflush_mtx);
-	os_fast_mutex_free(&mtflush_io->thread_global_mtx);
+	mutex_exit(&mtflush_mtx);
+	mutex_free(&mtflush_mtx);
+	mutex_free(&mtflush_io->thread_global_mtx);
 }
 
 /******************************************************************//**
@@ -505,8 +503,8 @@ buf_mtflu_handler_init(
 	mtflush_ctx->wheap = mtflush_heap;
 	mtflush_ctx->rheap = mtflush_heap2;
 
-	os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &mtflush_ctx->thread_global_mtx);
-	os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &mtflush_mtx);
+	mutex_create(LATCH_ID_MTFLUSH_THREAD_MUTEX, &mtflush_ctx->thread_global_mtx);
+	mutex_create(LATCH_ID_MTFLUSH_MUTEX, &mtflush_mtx);
 
 	/* Create threads for page-compression-flush */
 	for(i=0; i < n_threads; i++) {
@@ -514,7 +512,7 @@ buf_mtflu_handler_init(
 
 		mtflush_ctx->thread_data[i].wt_status = WTHR_INITIALIZED;
 
-		mtflush_ctx->thread_data[i].wthread = os_thread_create(
+		os_thread_create(
 			mtflush_io_thread,
 			((void *) mtflush_ctx),
 	                &new_thread_id);
@@ -647,11 +645,11 @@ buf_mtflu_flush_list(
 	}
 
 	/* This lock is to safequard against re-entry if any. */
-	os_fast_mutex_lock(&mtflush_mtx);
+	mutex_enter(&mtflush_mtx);
 	buf_mtflu_flush_work_items(srv_buf_pool_instances,
                 cnt, BUF_FLUSH_LIST,
                 min_n, lsn_limit);
-	os_fast_mutex_unlock(&mtflush_mtx);
+	mutex_exit(&mtflush_mtx);
 
 	for (i = 0; i < srv_buf_pool_instances; i++) {
 		if (n_processed) {
@@ -704,10 +702,10 @@ buf_mtflu_flush_LRU_tail(void)
 	}
 
 	/* This lock is to safeguard against re-entry if any */
-	os_fast_mutex_lock(&mtflush_mtx);
+	mutex_enter(&mtflush_mtx);
 	buf_mtflu_flush_work_items(srv_buf_pool_instances,
 		cnt, BUF_FLUSH_LRU, srv_LRU_scan_depth, 0);
-	os_fast_mutex_unlock(&mtflush_mtx);
+	mutex_exit(&mtflush_mtx);
 
 	for (i = 0; i < srv_buf_pool_instances; i++) {
 		total_flushed += cnt[i].flushed+cnt[i].evicted;
diff --git a/storage/innobase/buf/buf0rea.cc b/storage/innobase/buf/buf0rea.cc
index 285fc465160..e96c61d01f9 100644
--- a/storage/innobase/buf/buf0rea.cc
+++ b/storage/innobase/buf/buf0rea.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2015. MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
@@ -24,11 +24,12 @@ The database buffer read
 Created 11/5/1995 Heikki Tuuri
 *******************************************************/
 
-#include "buf0rea.h"
+#include "ha_prototypes.h"
+#include <mysql/service_thd_wait.h>
 
+#include "buf0rea.h"
 #include "fil0fil.h"
 #include "mtr0mtr.h"
-
 #include "buf0buf.h"
 #include "buf0flu.h"
 #include "buf0lru.h"
@@ -39,8 +40,6 @@ Created 11/5/1995 Heikki Tuuri
 #include "os0file.h"
 #include "srv0start.h"
 #include "srv0srv.h"
-#include "mysql/plugin.h"
-#include "mysql/service_thd_wait.h"
 
 /** There must be at least this many pages in buf_pool in the area to start
 a random read-ahead */
@@ -91,62 +90,50 @@ buf_read_page_handle_error(
 	buf_pool_mutex_exit(buf_pool);
 }
 
-/********************************************************************//**
-Low-level function which reads a page asynchronously from a file to the
+/** Low-level function which reads a page asynchronously from a file to the
 buffer buf_pool if it is not already there, in which case does nothing.
 Sets the io_fix flag and sets an exclusive lock on the buffer frame. The
 flag is cleared and the x-lock released by an i/o-handler thread.
+
+@param[out] err		DB_SUCCESS, DB_TABLESPACE_DELETED or
+			DB_TABLESPACE_TRUNCATED if we are trying
+			to read from a non-existent tablespace, a
+			tablespace which is just now being dropped,
+			or a tablespace which is truncated
+@param[in] sync		true if synchronous aio is desired
+@param[in] type		IO type, SIMULATED, IGNORE_MISSING
+@param[in] mode		BUF_READ_IBUF_PAGES_ONLY, ...,
+@param[in] page_id	page id
+@param[in] unzip	true=request uncompressed page
 @return 1 if a read request was queued, 0 if the page already resided
 in buf_pool, or if the page is in the doublewrite buffer blocks in
 which case it is never read into the pool, or if the tablespace does
-not exist or is being dropped
-@return 1 if read request is issued. 0 if it is not */
+not exist or is being dropped */
 static
 ulint
 buf_read_page_low(
-/*==============*/
-	dberr_t*	err,	/*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED if we are
-				trying to read from a non-existent tablespace, or a
-				tablespace which is just now being dropped */
-	bool		sync,	/*!< in: true if synchronous aio is desired */
-	ulint		mode,	/*!< in: BUF_READ_IBUF_PAGES_ONLY, ...,
-				ORed to OS_AIO_SIMULATED_WAKE_LATER (see below
-				at read-ahead functions) */
-	ulint		space,	/*!< in: space id */
-	ulint		zip_size,/*!< in: compressed page size, or 0 */
-	ibool		unzip,	/*!< in: TRUE=request uncompressed page */
-	ib_int64_t 	tablespace_version, /*!< in: if the space memory object has
-					    this timestamp different from what we are giving here,
-					    treat the tablespace as dropped; this is a timestamp we
-					    use to stop dangling page reads from a tablespace
-					    which we have DISCARDed + IMPORTed back */
-	ulint		offset,	/*!< in: page number */
+	dberr_t*		err,
+	bool			sync,
+	ulint			type,
+	ulint			mode,
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	bool			unzip,
 	buf_page_t** 	rbpage) /*!< out: page */
 {
 	buf_page_t*	bpage;
-	ulint		wake_later;
-	ibool		ignore_nonexistent_pages;
 
 	*err = DB_SUCCESS;
 
-	wake_later = mode & OS_AIO_SIMULATED_WAKE_LATER;
-	mode = mode & ~OS_AIO_SIMULATED_WAKE_LATER;
-
-	ignore_nonexistent_pages = mode & BUF_READ_IGNORE_NONEXISTENT_PAGES;
-	mode &= ~BUF_READ_IGNORE_NONEXISTENT_PAGES;
-
-	if (space == TRX_SYS_SPACE && buf_dblwr_page_inside(offset)) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			"  InnoDB: Warning: trying to read"
-			" doublewrite buffer page %lu\n",
-			(ulong) offset);
+	if (page_id.space() == TRX_SYS_SPACE
+	    && buf_dblwr_page_inside(page_id.page_no())) {
 
+		ib::error() << "Trying to read doublewrite buffer page "
+			<< page_id;
 		return(0);
 	}
 
-	if (ibuf_bitmap_page(zip_size, offset)
-	    || trx_sys_hdr_page(space, offset)) {
+	if (ibuf_bitmap_page(page_id, page_size) || trx_sys_hdr_page(page_id)) {
 
 		/* Trx sys header is so low in the latching order that we play
 		safe and do not leave the i/o-completion to an asynchronous
@@ -161,60 +148,81 @@ buf_read_page_low(
 	or is being dropped; if we succeed in initing the page in the buffer
 	pool for read, then DISCARD cannot proceed until the read has
 	completed */
-	bpage = buf_page_init_for_read(err, mode, space, zip_size, unzip,
-				       tablespace_version, offset);
+	bpage = buf_page_init_for_read(err, mode, page_id, page_size, unzip);
+
 	if (bpage == NULL) {
 
 		return(0);
 	}
 
-#ifdef UNIV_DEBUG
-	if (buf_debug_prints) {
-		fprintf(stderr,
-			"Posting read request for page %lu, sync %s\n",
-			(ulong) offset, sync ? "true" : "false");
-	}
-#endif
+	DBUG_PRINT("ib_buf", ("read page %u:%u size=%u unzip=%u,%s",
+			      (unsigned) page_id.space(),
+			      (unsigned) page_id.page_no(),
+			      (unsigned) page_size.physical(),
+			      (unsigned) unzip,
+			      sync ? "sync" : "async"));
 
 	ut_ad(buf_page_in_file(bpage));
 
-	byte* frame = zip_size ? bpage->zip.data : ((buf_block_t*) bpage)->frame;
-
 	if (sync) {
 		thd_wait_begin(NULL, THD_WAIT_DISKIO);
 	}
 
-	if (zip_size) {
-		*err = fil_io(OS_FILE_READ | wake_later
-			| ignore_nonexistent_pages,
-			sync, space, zip_size, offset, 0, zip_size,
-			frame, bpage, &bpage->write_size);
+	void*	dst;
+
+	if (page_size.is_compressed()) {
+		dst = bpage->zip.data;
 	} else {
 		ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
 
-		*err = fil_io(OS_FILE_READ | wake_later
-			| ignore_nonexistent_pages,
-			sync, space, 0, offset, 0, UNIV_PAGE_SIZE,
-			frame, bpage,
-			&bpage->write_size);
+		dst = ((buf_block_t*) bpage)->frame;
 	}
 
+	IORequest	request(type | IORequest::READ);
+
+	ut_ad(dst != NULL);
+	ut_ad(bpage->zip.data != NULL || ((buf_block_t*)bpage)->frame != NULL);
+
+	*err = fil_io(
+		request, sync, page_id, page_size, 0, page_size.physical(),
+		dst, bpage, NULL);
+
+	ut_ad(dst != NULL);
+	ut_ad(bpage->zip.data != NULL || ((buf_block_t*)bpage)->frame != NULL);
+	ut_ad(bpage->id.space() == page_id.space());
+
 	if (sync) {
 		thd_wait_end(NULL);
 	}
 
 	if (*err != DB_SUCCESS) {
-		if (ignore_nonexistent_pages || *err == DB_TABLESPACE_DELETED) {
+		if (*err == DB_TABLESPACE_TRUNCATED) {
+			/* Remove the page which is outside the
+			truncated tablespace bounds when recovering
+			from a crash happened during a truncation */
+			buf_read_page_handle_error(bpage);
+			if (recv_recovery_on) {
+				mutex_enter(&recv_sys->mutex);
+				ut_ad(recv_sys->n_addrs > 0);
+				recv_sys->n_addrs--;
+				mutex_exit(&recv_sys->mutex);
+			}
+			return(0);
+		} else if (IORequest::ignore_missing(type)
+			   || *err == DB_TABLESPACE_DELETED) {
 			buf_read_page_handle_error(bpage);
 			return(0);
 		}
-		/* else */
+
 		ut_error;
 	}
 
 	if (sync) {
+		ut_ad(dst != NULL);
+		ut_ad(bpage->zip.data != NULL || ((buf_block_t*)bpage)->frame != NULL);
 		/* The i/o is already completed when we arrive from
 		fil_read */
+
 		if (!buf_page_io_complete(bpage)) {
 			if (rbpage) {
 				*rbpage = bpage;
@@ -230,8 +238,7 @@ buf_read_page_low(
 	return(1);
 }
 
-/********************************************************************//**
-Applies a random read-ahead in buf_pool if there are at least a threshold
+/** Applies a random read-ahead in buf_pool if there are at least a threshold
 value of accessed pages from the random read-ahead area. Does not read any
 page, not even the one at the position (space, offset), if the read-ahead
 mechanism is not activated. NOTE 1: the calling thread may own latches on
@@ -240,24 +247,20 @@ end up waiting for these latches! NOTE 2: the calling thread must want
 access to the page given: this rule is set to prevent unintended read-aheads
 performed by ibuf routines, a situation which could result in a deadlock if
 the OS does not support asynchronous i/o.
+@param[in]	page_id		page id of a page which the current thread
+wants to access
+@param[in]	page_size	page size
+@param[in]	inside_ibuf	TRUE if we are inside ibuf routine
 @return number of page read requests issued; NOTE that if we read ibuf
 pages, it may happen that the page at the given page number does not
-get read even if we return a positive value!
-@return	number of page read requests issued */
-UNIV_INTERN
+get read even if we return a positive value! */
 ulint
 buf_read_ahead_random(
-/*==================*/
-	ulint	space,		/*!< in: space id */
-	ulint	zip_size,	/*!< in: compressed page size in bytes,
-				or 0 */
-	ulint	offset,		/*!< in: page number of a page which
-				the current thread wants to access */
-	ibool	inside_ibuf)	/*!< in: TRUE if we are inside ibuf
-				routine */
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	ibool			inside_ibuf)
 {
-	buf_pool_t*	buf_pool = buf_pool_get(space, offset);
-	ib_int64_t	tablespace_version;
+	buf_pool_t*	buf_pool = buf_pool_get(page_id);
 	ulint		recent_blocks	= 0;
 	ulint		ibuf_mode;
 	ulint		count;
@@ -277,8 +280,7 @@ buf_read_ahead_random(
 		return(0);
 	}
 
-	if (ibuf_bitmap_page(zip_size, offset)
-	    || trx_sys_hdr_page(space, offset)) {
+	if (ibuf_bitmap_page(page_id, page_size) || trx_sys_hdr_page(page_id)) {
 
 		/* If it is an ibuf bitmap page or trx sys hdr, we do
 		no read-ahead, as that could break the ibuf page access
@@ -287,19 +289,22 @@ buf_read_ahead_random(
 		return(0);
 	}
 
-	/* Remember the tablespace version before we ask te tablespace size
+	low  = (page_id.page_no() / buf_read_ahead_random_area)
+		* buf_read_ahead_random_area;
+
+	high = (page_id.page_no() / buf_read_ahead_random_area + 1)
+		* buf_read_ahead_random_area;
+
+	/* Remember the tablespace version before we ask the tablespace size
 	below: if DISCARD + IMPORT changes the actual .ibd file meanwhile, we
 	do not try to read outside the bounds of the tablespace! */
-
-	tablespace_version = fil_space_get_version(space);
-
-	low  = (offset / buf_read_ahead_random_area)
-		* buf_read_ahead_random_area;
-	high = (offset / buf_read_ahead_random_area + 1)
-		* buf_read_ahead_random_area;
-	if (high > fil_space_get_size(space)) {
-
-		high = fil_space_get_size(space);
+	if (fil_space_t* space = fil_space_acquire(page_id.space())) {
+		if (high > space->size) {
+			high = space->size;
+		}
+		fil_space_release(space);
+	} else {
+		return(0);
 	}
 
 	buf_pool_mutex_enter(buf_pool);
@@ -315,10 +320,10 @@ buf_read_ahead_random(
 	that is, reside near the start of the LRU list. */
 
 	for (i = low; i < high; i++) {
-		const buf_page_t* bpage =
-			buf_page_hash_get(buf_pool, space, i);
+		const buf_page_t*	bpage = buf_page_hash_get(
+			buf_pool, page_id_t(page_id.space(), i));
 
-		if (bpage
+		if (bpage != NULL
 		    && buf_page_is_accessed(bpage)
 		    && buf_page_peek_if_young(bpage)) {
 
@@ -352,21 +357,22 @@ read_ahead:
 		/* It is only sensible to do read-ahead in the non-sync aio
 		mode: hence FALSE as the first parameter */
 
-		if (!ibuf_bitmap_page(zip_size, i)) {
+		const page_id_t	cur_page_id(page_id.space(), i);
+
+		if (!ibuf_bitmap_page(cur_page_id, page_size)) {
+			buf_page_t* rpage = NULL;
 			count += buf_read_page_low(
 				&err, false,
-				ibuf_mode | OS_AIO_SIMULATED_WAKE_LATER,
-				space, zip_size, FALSE,
-				tablespace_version, i, NULL);
+				IORequest::DO_NOT_WAKE,
+				ibuf_mode,
+				cur_page_id, page_size, false, &rpage);
+
 			if (err == DB_TABLESPACE_DELETED) {
-				ut_print_timestamp(stderr);
-				fprintf(stderr,
-					"  InnoDB: Warning: in random"
-					" readahead trying to access\n"
-					"InnoDB: tablespace %lu page %lu,\n"
-					"InnoDB: but the tablespace does not"
-					" exist or is just being dropped.\n",
-					(ulong) space, (ulong) i);
+				ib::warn() << "Random readahead trying to"
+					" access page " << cur_page_id
+					<< " in nonexisting or"
+					" being-dropped tablespace";
+				break;
 			}
 		}
 	}
@@ -377,14 +383,12 @@ read_ahead:
 
 	os_aio_simulated_wake_handler_threads();
 
-#ifdef UNIV_DEBUG
-	if (buf_debug_prints && (count > 0)) {
-		fprintf(stderr,
-			"Random read-ahead space %lu offset %lu pages %lu\n",
-			(ulong) space, (ulong) offset,
-			(ulong) count);
+	if (count) {
+		DBUG_PRINT("ib_buf", ("random read-ahead %u pages, %u:%u",
+				      (unsigned) count,
+				      (unsigned) page_id.space(),
+				      (unsigned) page_id.page_no()));
 	}
-#endif /* UNIV_DEBUG */
 
 	/* Read ahead is considered one I/O operation for the purpose of
 	LRU policy decision. */
@@ -395,42 +399,37 @@ read_ahead:
 	return(count);
 }
 
-/********************************************************************//**
-High-level function which reads a page asynchronously from a file to the
+/** High-level function which reads a page asynchronously from a file to the
 buffer buf_pool if it is not already there. Sets the io_fix flag and sets
 an exclusive lock on the buffer frame. The flag is cleared and the x-lock
 released by the i/o-handler thread.
+@param[in]	page_id		page id
+@param[in]	page_size	page size
 @return TRUE if page has been read in, FALSE in case of failure */
-UNIV_INTERN
 ibool
 buf_read_page(
-/*==========*/
-	ulint	space,	/*!< in: space id */
-	ulint	zip_size,/*!< in: compressed page size in bytes, or 0 */
-	ulint	offset,	/*!< in: page number */
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
 	buf_page_t** bpage)	/*!< out: page */
 {
-	ib_int64_t	tablespace_version;
 	ulint		count;
 	dberr_t		err;
 
-	tablespace_version = fil_space_get_version(space);
+	/* We do synchronous IO because our AIO completion code
+	is sub-optimal. See buf_page_io_complete(), we have to
+	acquire the buffer pool mutex before acquiring the block
+	mutex, required for updating the page state. The acquire
+	of the buffer pool mutex becomes an expensive bottleneck. */
 
-	/* We do the i/o in the synchronous aio mode to save thread
-	switches: hence TRUE */
+	count = buf_read_page_low(
+		&err, true,
+		0, BUF_READ_ANY_PAGE, page_id, page_size, false, bpage);
 
-	count = buf_read_page_low(&err, true, BUF_READ_ANY_PAGE, space,
-				  zip_size, FALSE,
-				  tablespace_version, offset, bpage);
 	srv_stats.buf_pool_reads.add(count);
+
 	if (err == DB_TABLESPACE_DELETED) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			"  InnoDB: Error: trying to access"
-			" tablespace %lu page no. %lu,\n"
-			"InnoDB: but the tablespace does not exist"
-			" or is just being dropped.\n",
-			(ulong) space, (ulong) offset);
+		ib::error() << "trying to read page " << page_id
+			<< " in nonexisting or being-dropped tablespace";
 	}
 
 	/* Increment number of I/O operations used for LRU policy. */
@@ -439,37 +438,30 @@ buf_read_page(
 	return(count > 0);
 }
 
-/********************************************************************//**
-High-level function which reads a page asynchronously from a file to the
+/** High-level function which reads a page asynchronously from a file to the
 buffer buf_pool if it is not already there. Sets the io_fix flag and sets
 an exclusive lock on the buffer frame. The flag is cleared and the x-lock
 released by the i/o-handler thread.
+@param[in]	page_id		page id
+@param[in]	page_size	page size
+@param[in]	sync		true if synchronous aio is desired
 @return TRUE if page has been read in, FALSE in case of failure */
-UNIV_INTERN
 ibool
-buf_read_page_async(
-/*================*/
-	ulint	space,	/*!< in: space id */
-	ulint	offset)	/*!< in: page number */
+buf_read_page_background(
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	bool			sync)
 {
-	ulint		zip_size;
-	ib_int64_t	tablespace_version;
 	ulint		count;
 	dberr_t		err;
+	buf_page_t*	rbpage = NULL;
 
-	zip_size = fil_space_get_zip_size(space);
+	count = buf_read_page_low(
+		&err, sync,
+		IORequest::DO_NOT_WAKE | IORequest::IGNORE_MISSING,
+		BUF_READ_ANY_PAGE,
+		page_id, page_size, false, &rbpage);
 
-	if (zip_size == ULINT_UNDEFINED) {
-		return(FALSE);
-	}
-
-	tablespace_version = fil_space_get_version(space);
-
-	count = buf_read_page_low(&err, true, BUF_READ_ANY_PAGE
-				  | OS_AIO_SIMULATED_WAKE_LATER
-				  | BUF_READ_IGNORE_NONEXISTENT_PAGES,
-				  space, zip_size, FALSE,
-				  tablespace_version, offset, NULL);
 	srv_stats.buf_pool_reads.add(count);
 
 	/* We do not increment number of I/O operations used for LRU policy
@@ -482,8 +474,7 @@ buf_read_page_async(
 	return(count > 0);
 }
 
-/********************************************************************//**
-Applies linear read-ahead if in the buf_pool the page is a border page of
+/** Applies linear read-ahead if in the buf_pool the page is a border page of
 a linear read-ahead area and all the pages in the area have been accessed.
 Does not read any page if the read-ahead mechanism is not activated. Note
 that the algorithm looks at the 'natural' adjacent successor and
@@ -505,28 +496,25 @@ latches!
 NOTE 3: the calling thread must want access to the page given: this rule is
 set to prevent unintended read-aheads performed by ibuf routines, a situation
 which could result in a deadlock if the OS does not support asynchronous io.
-@return	number of page read requests issued */
-UNIV_INTERN
+@param[in]	page_id		page id; see NOTE 3 above
+@param[in]	page_size	page size
+@param[in]	inside_ibuf	TRUE if we are inside ibuf routine
+@return number of page read requests issued */
 ulint
 buf_read_ahead_linear(
-/*==================*/
-	ulint	space,		/*!< in: space id */
-	ulint	zip_size,	/*!< in: compressed page size in bytes, or 0 */
-	ulint	offset,		/*!< in: page number; see NOTE 3 above */
-	ibool	inside_ibuf)	/*!< in: TRUE if we are inside ibuf routine */
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	ibool			inside_ibuf)
 {
-	buf_pool_t*	buf_pool = buf_pool_get(space, offset);
-	ib_int64_t	tablespace_version;
+	buf_pool_t*	buf_pool = buf_pool_get(page_id);
 	buf_page_t*	bpage;
 	buf_frame_t*	frame;
 	buf_page_t*	pred_bpage	= NULL;
 	ulint		pred_offset;
 	ulint		succ_offset;
-	ulint		count;
 	int		asc_or_desc;
 	ulint		new_offset;
 	ulint		fail_count;
-	ulint		ibuf_mode;
 	ulint		low, high;
 	dberr_t		err;
 	ulint		i;
@@ -539,24 +527,23 @@ buf_read_ahead_linear(
 		return(0);
 	}
 
-	if (UNIV_UNLIKELY(srv_startup_is_before_trx_rollback_phase)) {
+	if (srv_startup_is_before_trx_rollback_phase) {
 		/* No read-ahead to avoid thread deadlocks */
 		return(0);
 	}
 
-	low  = (offset / buf_read_ahead_linear_area)
+	low  = (page_id.page_no() / buf_read_ahead_linear_area)
 		* buf_read_ahead_linear_area;
-	high = (offset / buf_read_ahead_linear_area + 1)
+	high = (page_id.page_no() / buf_read_ahead_linear_area + 1)
 		* buf_read_ahead_linear_area;
 
-	if ((offset != low) && (offset != high - 1)) {
+	if ((page_id.page_no() != low) && (page_id.page_no() != high - 1)) {
 		/* This is not a border page of the area: return */
 
 		return(0);
 	}
 
-	if (ibuf_bitmap_page(zip_size, offset)
-	    || trx_sys_hdr_page(space, offset)) {
+	if (ibuf_bitmap_page(page_id, page_size) || trx_sys_hdr_page(page_id)) {
 
 		/* If it is an ibuf bitmap page or trx sys hdr, we do
 		no read-ahead, as that could break the ibuf page access
@@ -568,18 +555,22 @@ buf_read_ahead_linear(
 	/* Remember the tablespace version before we ask te tablespace size
 	below: if DISCARD + IMPORT changes the actual .ibd file meanwhile, we
 	do not try to read outside the bounds of the tablespace! */
+	ulint	space_size;
 
-	tablespace_version = fil_space_get_version(space);
-
-	buf_pool_mutex_enter(buf_pool);
-
-	if (high > fil_space_get_size(space)) {
-		buf_pool_mutex_exit(buf_pool);
-		/* The area is not whole, return */
+	if (fil_space_t* space = fil_space_acquire(page_id.space())) {
+		space_size = space->size;
+		fil_space_release(space);
 
+		if (high > space_size) {
+			/* The area is not whole */
+			return(0);
+		}
+	} else {
 		return(0);
 	}
 
+	buf_pool_mutex_enter(buf_pool);
+
 	if (buf_pool->n_pend_reads
 	    > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
 		buf_pool_mutex_exit(buf_pool);
@@ -593,19 +584,20 @@ buf_read_ahead_linear(
 
 	asc_or_desc = 1;
 
-	if (offset == low) {
+	if (page_id.page_no() == low) {
 		asc_or_desc = -1;
 	}
 
 	/* How many out of order accessed pages can we ignore
 	when working out the access pattern for linear readahead */
-	threshold = ut_min((64 - srv_read_ahead_threshold),
+	threshold = ut_min(static_cast<ulint>(64 - srv_read_ahead_threshold),
 			   BUF_READ_AHEAD_AREA(buf_pool));
 
 	fail_count = 0;
 
 	for (i = low; i < high; i++) {
-		bpage = buf_page_hash_get(buf_pool, space, i);
+		bpage = buf_page_hash_get(buf_pool,
+					  page_id_t(page_id.space(), i));
 
 		if (bpage == NULL || !buf_page_is_accessed(bpage)) {
 			/* Not accessed */
@@ -643,7 +635,7 @@ buf_read_ahead_linear(
 	/* If we got this far, we know that enough pages in the area have
 	been accessed in the right order: linear read-ahead can be sensible */
 
-	bpage = buf_page_hash_get(buf_pool, space, offset);
+	bpage = buf_page_hash_get(buf_pool, page_id);
 
 	if (bpage == NULL) {
 		buf_pool_mutex_exit(buf_pool);
@@ -674,12 +666,14 @@ buf_read_ahead_linear(
 
 	buf_pool_mutex_exit(buf_pool);
 
-	if ((offset == low) && (succ_offset == offset + 1)) {
+	if ((page_id.page_no() == low)
+	    && (succ_offset == page_id.page_no() + 1)) {
 
 		/* This is ok, we can continue */
 		new_offset = pred_offset;
 
-	} else if ((offset == high - 1) && (pred_offset == offset - 1)) {
+	} else if ((page_id.page_no() == high - 1)
+		   && (pred_offset == page_id.page_no() - 1)) {
 
 		/* This is ok, we can continue */
 		new_offset = succ_offset;
@@ -700,19 +694,19 @@ buf_read_ahead_linear(
 		return(0);
 	}
 
-	if (high > fil_space_get_size(space)) {
+	if (high > space_size) {
 		/* The area is not whole, return */
 
 		return(0);
 	}
 
+	ulint	count = 0;
+
 	/* If we got this far, read-ahead can be sensible: do it */
 
-	ibuf_mode = inside_ibuf
-		? BUF_READ_IBUF_PAGES_ONLY | OS_AIO_SIMULATED_WAKE_LATER
-		: BUF_READ_ANY_PAGE | OS_AIO_SIMULATED_WAKE_LATER;
+	ulint	ibuf_mode;
 
-	count = 0;
+	ibuf_mode = inside_ibuf ? BUF_READ_IBUF_PAGES_ONLY : BUF_READ_ANY_PAGE;
 
 	/* Since Windows XP seems to schedule the i/o handler thread
 	very eagerly, and consequently it does not wait for the
@@ -724,20 +718,22 @@ buf_read_ahead_linear(
 		/* It is only sensible to do read-ahead in the non-sync
 		aio mode: hence FALSE as the first parameter */
 
-		if (!ibuf_bitmap_page(zip_size, i)) {
+		const page_id_t	cur_page_id(page_id.space(), i);
+
+		if (!ibuf_bitmap_page(cur_page_id, page_size)) {
+			buf_page_t* rpage = NULL;
+
 			count += buf_read_page_low(
 				&err, false,
-				ibuf_mode,
-				space, zip_size, FALSE, tablespace_version, i, NULL);
+				IORequest::DO_NOT_WAKE,
+				ibuf_mode, cur_page_id, page_size, false, &rpage);
+
 			if (err == DB_TABLESPACE_DELETED) {
-				ut_print_timestamp(stderr);
-				fprintf(stderr,
-					"  InnoDB: Warning: in"
-					" linear readahead trying to access\n"
-					"InnoDB: tablespace %lu page %lu,\n"
-					"InnoDB: but the tablespace does not"
-					" exist or is just being dropped.\n",
-					(ulong) space, (ulong) i);
+				ib::warn() << "linear readahead trying to"
+					" access page "
+					<< page_id_t(page_id.space(), i)
+					<< " in nonexisting or being-dropped"
+					" tablespace";
 			}
 		}
 	}
@@ -748,13 +744,13 @@ buf_read_ahead_linear(
 
 	os_aio_simulated_wake_handler_threads();
 
-#ifdef UNIV_DEBUG
-	if (buf_debug_prints && (count > 0)) {
-		fprintf(stderr,
-			"LINEAR read-ahead space %lu offset %lu pages %lu\n",
-			(ulong) space, (ulong) offset, (ulong) count);
+	if (count) {
+		DBUG_PRINT("ib_buf", ("linear read-ahead %lu pages, "
+				      "%lu:%lu",
+				      count,
+				      (ulint)page_id.space(),
+				      (ulint)page_id.page_no()));
 	}
-#endif /* UNIV_DEBUG */
 
 	/* Read ahead is considered one I/O operation for the purpose of
 	LRU policy decision. */
@@ -768,7 +764,6 @@ buf_read_ahead_linear(
 Issues read requests for pages which the ibuf module wants to read in, in
 order to contract the insert buffer tree. Technically, this function is like
 a read-ahead function. */
-UNIV_INTERN
 void
 buf_read_ibuf_merge_pages(
 /*======================*/
@@ -778,13 +773,6 @@ buf_read_ibuf_merge_pages(
 					to get read in, before this
 					function returns */
 	const ulint*	space_ids,	/*!< in: array of space ids */
-	const ib_int64_t* space_versions,/*!< in: the spaces must have
-					this version number
-					(timestamp), otherwise we
-					discard the read; we use this
-					to cancel reads if DISCARD +
-					IMPORT may have changed the
-					tablespace size */
 	const ulint*	page_nos,	/*!< in: array of page numbers
 					to read, with the highest page
 					number the last in the
@@ -792,100 +780,94 @@ buf_read_ibuf_merge_pages(
 	ulint		n_stored)	/*!< in: number of elements
 					in the arrays */
 {
-	ulint	i;
-
 #ifdef UNIV_IBUF_DEBUG
 	ut_a(n_stored < UNIV_PAGE_SIZE);
 #endif
 
-	for (i = 0; i < n_stored; i++) {
-		dberr_t		err;
-		buf_pool_t*	buf_pool;
-		ulint		zip_size = fil_space_get_zip_size(space_ids[i]);
+	for (ulint i = 0; i < n_stored; i++) {
+		const page_id_t	page_id(space_ids[i], page_nos[i]);
 
-		buf_pool = buf_pool_get(space_ids[i], page_nos[i]);
+		buf_pool_t*	buf_pool = buf_pool_get(page_id);
+		buf_page_t*	rpage = NULL;
+
+		bool			found;
+		const page_size_t	page_size(fil_space_get_page_size(
+			space_ids[i], &found));
+
+		if (!found) {
+			/* The tablespace was not found, remove the
+			entries for that page */
+			ibuf_merge_or_delete_for_page(NULL, page_id,
+						      NULL, FALSE);
+			continue;
+		}
 
 		while (buf_pool->n_pend_reads
 		       > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
 			os_thread_sleep(500000);
 		}
 
-		if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
+		dberr_t	err;
 
-			goto tablespace_deleted;
-		}
+		buf_read_page_low(&err,
+				  sync && (i + 1 == n_stored),
+				  0,
+				  BUF_READ_ANY_PAGE, page_id, page_size,
+				  true, &rpage);
 
-		buf_read_page_low(&err, sync && (i + 1 == n_stored),
-				  BUF_READ_ANY_PAGE, space_ids[i],
-				  zip_size, TRUE, space_versions[i],
-				  page_nos[i], NULL);
-
-		if (UNIV_UNLIKELY(err == DB_TABLESPACE_DELETED)) {
-tablespace_deleted:
+		if (err == DB_TABLESPACE_DELETED) {
 			/* We have deleted or are deleting the single-table
 			tablespace: remove the entries for that page */
-
-			ibuf_merge_or_delete_for_page(NULL, space_ids[i],
-						      page_nos[i],
-						      zip_size, FALSE);
+			ibuf_merge_or_delete_for_page(NULL, page_id,
+						      &page_size, FALSE);
 		}
 	}
 
 	os_aio_simulated_wake_handler_threads();
 
-#ifdef UNIV_DEBUG
-	if (buf_debug_prints) {
-		fprintf(stderr,
-			"Ibuf merge read-ahead space %lu pages %lu\n",
-			(ulong) space_ids[0], (ulong) n_stored);
+	if (n_stored) {
+		DBUG_PRINT("ib_buf",
+			   ("ibuf merge read-ahead %u pages, space %u",
+			    unsigned(n_stored), unsigned(space_ids[0])));
 	}
-#endif /* UNIV_DEBUG */
 }
 
-/********************************************************************//**
-Issues read requests for pages which recovery wants to read in. */
-UNIV_INTERN
+/** Issues read requests for pages which recovery wants to read in.
+@param[in]	sync		true if the caller wants this function to wait
+for the highest address page to get read in, before this function returns
+@param[in]	space_id	tablespace id
+@param[in]	page_nos	array of page numbers to read, with the
+highest page number the last in the array
+@param[in]	n_stored	number of page numbers in the array */
 void
 buf_read_recv_pages(
-/*================*/
-	ibool		sync,		/*!< in: TRUE if the caller
-					wants this function to wait
-					for the highest address page
-					to get read in, before this
-					function returns */
-	ulint		space,		/*!< in: space id */
-	ulint		zip_size,	/*!< in: compressed page size in
-					bytes, or 0 */
-	const ulint*	page_nos,	/*!< in: array of page numbers
-					to read, with the highest page
-					number the last in the
-					array */
-	ulint		n_stored)	/*!< in: number of page numbers
-					in the array */
+	bool		sync,
+	ulint		space_id,
+	const ulint*	page_nos,
+	ulint		n_stored)
 {
-	ib_int64_t	tablespace_version;
-	ulint		count;
-	dberr_t		err;
-	ulint		i;
-
-	zip_size = fil_space_get_zip_size(space);
-
-	if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
-		/* It is a single table tablespace and the .ibd file is
-		missing: do nothing */
+	ulint			count;
+	dberr_t			err;
+	ulint			i;
+	fil_space_t*		space	= fil_space_get(space_id);
 
+	if (space == NULL) {
+		/* The tablespace is missing: do nothing */
 		return;
 	}
 
-	tablespace_version = fil_space_get_version(space);
+	fil_space_open_if_needed(space);
+
+	const page_size_t	page_size(space->flags);
 
 	for (i = 0; i < n_stored; i++) {
-		buf_pool_t*	buf_pool;
+		buf_pool_t*		buf_pool;
+		const page_id_t	cur_page_id(space_id, page_nos[i]);
+		buf_page_t*		rpage = NULL;
 
 		count = 0;
 
-		os_aio_print_debug = FALSE;
-		buf_pool = buf_pool_get(space, page_nos[i]);
+		buf_pool = buf_pool_get(cur_page_id);
 		while (buf_pool->n_pend_reads >= recv_n_pool_free_frames / 2) {
 
 			os_aio_simulated_wake_handler_threads();
@@ -893,42 +875,34 @@ buf_read_recv_pages(
 
 			count++;
 
-			if (count > 1000) {
-				fprintf(stderr,
-					"InnoDB: Error: InnoDB has waited for"
-					" 10 seconds for pending\n"
-					"InnoDB: reads to the buffer pool to"
-					" be finished.\n"
-					"InnoDB: Number of pending reads %lu,"
-					" pending pread calls %lu\n",
-					(ulong) buf_pool->n_pend_reads,
-					(ulong) os_file_n_pending_preads);
+			if (!(count % 1000)) {
 
-				os_aio_print_debug = TRUE;
+				ib::error()
+					<< "Waited for " << count / 100
+					<< " seconds for "
+					<< buf_pool->n_pend_reads
+					<< " pending reads";
 			}
 		}
 
-		os_aio_print_debug = FALSE;
-
 		if ((i + 1 == n_stored) && sync) {
-			buf_read_page_low(&err, true, BUF_READ_ANY_PAGE, space,
-					  zip_size, TRUE, tablespace_version,
-					  page_nos[i], NULL);
+			buf_read_page_low(
+				&err, true,
+				0,
+				BUF_READ_ANY_PAGE,
+				cur_page_id, page_size, true, &rpage);
 		} else {
-			buf_read_page_low(&err, false, BUF_READ_ANY_PAGE
-					  | OS_AIO_SIMULATED_WAKE_LATER,
-					  space, zip_size, TRUE,
-					  tablespace_version, page_nos[i], NULL);
+			buf_read_page_low(
+				&err, false,
+				IORequest::DO_NOT_WAKE,
+				BUF_READ_ANY_PAGE,
+				cur_page_id, page_size, true, &rpage);
 		}
 	}
 
 	os_aio_simulated_wake_handler_threads();
 
-#ifdef UNIV_DEBUG
-	if (buf_debug_prints) {
-		fprintf(stderr,
-			"Recovery applies read-ahead pages %lu\n",
-			(ulong) n_stored);
-	}
-#endif /* UNIV_DEBUG */
+	DBUG_PRINT("ib_buf", ("recovery read-ahead (%u pages)",
+			      unsigned(n_stored)));
 }
+
diff --git a/storage/innobase/data/data0data.cc b/storage/innobase/data/data0data.cc
index 593af089b00..4b788c8952c 100644
--- a/storage/innobase/data/data0data.cc
+++ b/storage/innobase/data/data0data.cc
@@ -23,6 +23,8 @@ SQL data field and tuple
 Created 5/30/1994 Heikki Tuuri
 *************************************************************************/
 
+#include "ha_prototypes.h"
+
 #include "data0data.h"
 
 #ifdef UNIV_NONINL
@@ -36,36 +38,36 @@ Created 5/30/1994 Heikki Tuuri
 #include "page0zip.h"
 #include "dict0dict.h"
 #include "btr0cur.h"
+#include "row0upd.h"
 
-#include <ctype.h>
 #endif /* !UNIV_HOTBACKUP */
 
 #ifdef UNIV_DEBUG
 /** Dummy variable to catch access to uninitialized fields.  In the
 debug version, dtuple_create() will make all fields of dtuple_t point
 to data_error. */
-UNIV_INTERN byte	data_error;
+byte	data_error;
 
 # ifndef UNIV_DEBUG_VALGRIND
 /** this is used to fool the compiler in dtuple_validate */
-UNIV_INTERN ulint	data_dummy;
+ulint	data_dummy;
 # endif /* !UNIV_DEBUG_VALGRIND */
 #endif /* UNIV_DEBUG */
 
 #ifndef UNIV_HOTBACKUP
-/************************************************************//**
-Compare two data tuples, respecting the collation of character fields.
-@return 1, 0 , -1 if tuple1 is greater, equal, less, respectively,
-than tuple2 */
-UNIV_INTERN
+/** Compare two data tuples.
+@param[in] tuple1 first data tuple
+@param[in] tuple2 second data tuple
+@return positive, 0, negative if tuple1 is greater, equal, less, than tuple2,
+respectively */
 int
 dtuple_coll_cmp(
-/*============*/
-	const dtuple_t*	tuple1,	/*!< in: tuple 1 */
-	const dtuple_t*	tuple2)	/*!< in: tuple 2 */
+	const dtuple_t*	tuple1,
+	const dtuple_t*	tuple2)
 {
 	ulint	n_fields;
 	ulint	i;
+	int	cmp;
 
 	ut_ad(tuple1 != NULL);
 	ut_ad(tuple2 != NULL);
@@ -76,30 +78,20 @@ dtuple_coll_cmp(
 
 	n_fields = dtuple_get_n_fields(tuple1);
 
-	if (n_fields != dtuple_get_n_fields(tuple2)) {
+	cmp = (int) n_fields - (int) dtuple_get_n_fields(tuple2);
 
-		return(n_fields < dtuple_get_n_fields(tuple2) ? -1 : 1);
-	}
-
-	for (i = 0; i < n_fields; i++) {
-		int		cmp;
+	for (i = 0; cmp == 0 && i < n_fields; i++) {
 		const dfield_t*	field1	= dtuple_get_nth_field(tuple1, i);
 		const dfield_t*	field2	= dtuple_get_nth_field(tuple2, i);
-
 		cmp = cmp_dfield_dfield(field1, field2);
-
-		if (cmp) {
-			return(cmp);
-		}
 	}
 
-	return(0);
+	return(cmp);
 }
 
 /*********************************************************************//**
 Sets number of fields used in a tuple. Normally this is set in
 dtuple_create, but if you want later to set it smaller, you can use this. */
-UNIV_INTERN
 void
 dtuple_set_n_fields(
 /*================*/
@@ -114,20 +106,20 @@ dtuple_set_n_fields(
 
 /**********************************************************//**
 Checks that a data field is typed.
-@return	TRUE if ok */
+@return TRUE if ok */
 static
 ibool
 dfield_check_typed_no_assert(
 /*=========================*/
 	const dfield_t*	field)	/*!< in: data field */
 {
-	if (dfield_get_type(field)->mtype > DATA_MYSQL
-	    || dfield_get_type(field)->mtype < DATA_VARCHAR) {
+	if (dfield_get_type(field)->mtype > DATA_MTYPE_CURRENT_MAX
+	    || dfield_get_type(field)->mtype < DATA_MTYPE_CURRENT_MIN) {
+
+		ib::error() << "Data field type "
+			<< dfield_get_type(field)->mtype
+			<< ", len " << dfield_get_len(field);
 
-		fprintf(stderr,
-			"InnoDB: Error: data field type %lu, len %lu\n",
-			(ulong) dfield_get_type(field)->mtype,
-			(ulong) dfield_get_len(field));
 		return(FALSE);
 	}
 
@@ -136,8 +128,7 @@ dfield_check_typed_no_assert(
 
 /**********************************************************//**
 Checks that a data tuple is typed.
-@return	TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
 ibool
 dtuple_check_typed_no_assert(
 /*=========================*/
@@ -147,9 +138,8 @@ dtuple_check_typed_no_assert(
 	ulint		i;
 
 	if (dtuple_get_n_fields(tuple) > REC_MAX_N_FIELDS) {
-		fprintf(stderr,
-			"InnoDB: Error: index entry has %lu fields\n",
-			(ulong) dtuple_get_n_fields(tuple));
+		ib::error() << "Index entry has "
+			<< dtuple_get_n_fields(tuple) << " fields";
 dump:
 		fputs("InnoDB: Tuple contents: ", stderr);
 		dtuple_print(stderr, tuple);
@@ -174,22 +164,18 @@ dump:
 #ifdef UNIV_DEBUG
 /**********************************************************//**
 Checks that a data field is typed. Asserts an error if not.
-@return	TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
 ibool
 dfield_check_typed(
 /*===============*/
 	const dfield_t*	field)	/*!< in: data field */
 {
-	if (dfield_get_type(field)->mtype > DATA_MYSQL
-	    || dfield_get_type(field)->mtype < DATA_VARCHAR) {
+	if (dfield_get_type(field)->mtype > DATA_MTYPE_CURRENT_MAX
+	    || dfield_get_type(field)->mtype < DATA_MTYPE_CURRENT_MIN) {
 
-		fprintf(stderr,
-			"InnoDB: Error: data field type %lu, len %lu\n",
-			(ulong) dfield_get_type(field)->mtype,
-			(ulong) dfield_get_len(field));
-
-		ut_error;
+		ib::fatal() << "Data field type "
+			<< dfield_get_type(field)->mtype
+			<< ", len " << dfield_get_len(field);
 	}
 
 	return(TRUE);
@@ -197,8 +183,7 @@ dfield_check_typed(
 
 /**********************************************************//**
 Checks that a data tuple is typed. Asserts an error if not.
-@return	TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
 ibool
 dtuple_check_typed(
 /*===============*/
@@ -220,8 +205,7 @@ dtuple_check_typed(
 /**********************************************************//**
 Validates the consistency of a tuple which must be complete, i.e,
 all fields must have been set.
-@return	TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
 ibool
 dtuple_validate(
 /*============*/
@@ -274,7 +258,6 @@ dtuple_validate(
 #ifndef UNIV_HOTBACKUP
 /*************************************************************//**
 Pretty prints a dfield value according to its data type. */
-UNIV_INTERN
 void
 dfield_print(
 /*=========*/
@@ -317,7 +300,6 @@ dfield_print(
 /*************************************************************//**
 Pretty prints a dfield value according to its data type. Also the hex string
 is printed if a string contains non-printable characters. */
-UNIV_INTERN
 void
 dfield_print_also_hex(
 /*==================*/
@@ -391,16 +373,16 @@ dfield_print_also_hex(
 
 		case 6:
 			id = mach_read_from_6(data);
-			fprintf(stderr, "%llu", (ullint) id);
+			fprintf(stderr, IB_ID_FMT, id);
 			break;
 
 		case 7:
 			id = mach_read_from_7(data);
-			fprintf(stderr, "%llu", (ullint) id);
+			fprintf(stderr, IB_ID_FMT, id);
 			break;
 		case 8:
 			id = mach_read_from_8(data);
-			fprintf(stderr, "%llu", (ullint) id);
+			fprintf(stderr, IB_ID_FMT, id);
 			break;
 		default:
 			goto print_hex;
@@ -428,9 +410,7 @@ dfield_print_also_hex(
 			break;
 
 		default:
-			id = mach_ull_read_compressed(data);
-
-			fprintf(stderr, "mix_id " TRX_ID_FMT, id);
+			goto print_hex;
 		}
 		break;
 
@@ -467,7 +447,7 @@ print_hex:
 		fputs(" Hex: ",stderr);
 
 		for (i = 0; i < len; i++) {
-			fprintf(stderr, "%02lx", (ulint) *data++);
+			fprintf(stderr, "%02lx", static_cast<ulong>(*data++));
 		}
 
 		if (dfield_is_ext(dfield)) {
@@ -487,7 +467,7 @@ dfield_print_raw(
 {
 	ulint	len	= dfield_get_len(dfield);
 	if (!dfield_is_null(dfield)) {
-		ulint	print_len = ut_min(len, 1000);
+		ulint	print_len = ut_min(len, static_cast<ulint>(1000));
 		ut_print_buf(f, dfield_get_data(dfield), print_len);
 		if (len != print_len) {
 			fprintf(f, "(total %lu bytes%s)",
@@ -501,7 +481,6 @@ dfield_print_raw(
 
 /**********************************************************//**
 The following function prints the contents of a tuple. */
-UNIV_INTERN
 void
 dtuple_print(
 /*=========*/
@@ -527,6 +506,62 @@ dtuple_print(
 	ut_ad(dtuple_validate(tuple));
 }
 
+/** Print the contents of a tuple.
+@param[out]	o	output stream
+@param[in]	field	array of data fields
+@param[in]	n	number of data fields */
+void
+dfield_print(
+	std::ostream&	o,
+	const dfield_t*	field,
+	ulint		n)
+{
+	for (ulint i = 0; i < n; i++, field++) {
+		const void*	data	= dfield_get_data(field);
+		const ulint	len	= dfield_get_len(field);
+
+		if (i) {
+			o << ',';
+		}
+
+		if (dfield_is_null(field)) {
+			o << "NULL";
+		} else if (dfield_is_ext(field)) {
+			ulint	local_len = len - BTR_EXTERN_FIELD_REF_SIZE;
+			ut_ad(len >= BTR_EXTERN_FIELD_REF_SIZE);
+
+			o << '['
+			  << local_len
+			  << '+' << BTR_EXTERN_FIELD_REF_SIZE << ']';
+			ut_print_buf(o, data, local_len);
+			ut_print_buf_hex(o, static_cast<const byte*>(data)
+					 + local_len,
+					 BTR_EXTERN_FIELD_REF_SIZE);
+		} else {
+			o << '[' << len << ']';
+			ut_print_buf(o, data, len);
+		}
+	}
+}
+
+/** Print the contents of a tuple.
+@param[out]	o	output stream
+@param[in]	tuple	data tuple */
+void
+dtuple_print(
+	std::ostream&	o,
+	const dtuple_t*	tuple)
+{
+	const ulint	n	= dtuple_get_n_fields(tuple);
+
+	o << "TUPLE (info_bits=" << dtuple_get_info_bits(tuple)
+	  << ", " << n << " fields): {";
+
+	dfield_print(o, tuple->fields, n);
+
+	o << "}";
+}
+
 /**************************************************************//**
 Moves parts of long fields in entry to the big record vector so that
 the size of tuple drops below the maximum record size allowed in the
@@ -535,11 +570,11 @@ to determine uniquely the insertion place of the tuple in the index.
 @return own: created big record vector, NULL if we are not able to
 shorten the entry enough, i.e., if there are too many fixed-length or
 short fields in entry or the index is clustered */
-UNIV_INTERN
 big_rec_t*
 dtuple_convert_big_rec(
 /*===================*/
 	dict_index_t*	index,	/*!< in: index */
+	upd_t*		upd,	/*!< in/out: update vector */
 	dtuple_t*	entry,	/*!< in/out: index entry */
 	ulint*		n_ext)	/*!< in/out: number of
 				externally stored columns */
@@ -571,9 +606,7 @@ dtuple_convert_big_rec(
 	size = rec_get_converted_size(index, entry, *n_ext);
 
 	if (UNIV_UNLIKELY(size > 1000000000)) {
-		fprintf(stderr,
-			"InnoDB: Warning: tuple size very big: %lu\n",
-			(ulong) size);
+		ib::warn() << "Tuple size is very big: " << size;
 		fputs("InnoDB: Tuple contents: ", stderr);
 		dtuple_print(stderr, entry);
 		putc('\n', stderr);
@@ -582,15 +615,7 @@ dtuple_convert_big_rec(
 	heap = mem_heap_create(size + dtuple_get_n_fields(entry)
 			       * sizeof(big_rec_field_t) + 1000);
 
-	vector = static_cast<big_rec_t*>(
-		mem_heap_alloc(heap, sizeof(big_rec_t)));
-
-	vector->heap = heap;
-
-	vector->fields = static_cast<big_rec_field_t*>(
-		mem_heap_alloc(
-			heap,
-			dtuple_get_n_fields(entry) * sizeof(big_rec_field_t)));
+	vector = big_rec_t::alloc(heap, dtuple_get_n_fields(entry));
 
 	/* Decide which fields to shorten: the algorithm is to look for
 	a variable-length field that yields the biggest savings when
@@ -602,12 +627,12 @@ dtuple_convert_big_rec(
 							     *n_ext),
 				      dict_table_is_comp(index->table),
 				      dict_index_get_n_fields(index),
-				      dict_table_zip_size(index->table))) {
+				      dict_table_page_size(index->table))) {
+
 		ulint			i;
 		ulint			longest		= 0;
 		ulint			longest_i	= ULINT_MAX;
 		byte*			data;
-		big_rec_field_t*	b;
 
 		for (i = dict_index_get_n_unique_in_tree(index);
 		     i < dtuple_get_n_fields(entry); i++) {
@@ -624,7 +649,7 @@ dtuple_convert_big_rec(
 			    || dfield_is_ext(dfield)
 			    || dfield_get_len(dfield) <= local_len
 			    || dfield_get_len(dfield)
-			    <= BTR_EXTERN_FIELD_REF_SIZE * 2) {
+			    <= BTR_EXTERN_LOCAL_STORED_MAX_SIZE) {
 				goto skip_field;
 			}
 
@@ -645,8 +670,7 @@ dtuple_convert_big_rec(
 			there we always store locally columns whose
 			length is up to local_len == 788 bytes.
 			@see rec_init_offsets_comp_ordinary */
-			if (ifield->col->mtype != DATA_BLOB
-			    && ifield->col->len < 256) {
+			if (!DATA_BIG_COL(ifield->col)) {
 				goto skip_field;
 			}
 
@@ -675,10 +699,12 @@ skip_field:
 		ifield = dict_index_get_nth_field(index, longest_i);
 		local_prefix_len = local_len - BTR_EXTERN_FIELD_REF_SIZE;
 
-		b = &vector->fields[n_fields];
-		b->field_no = longest_i;
-		b->len = dfield_get_len(dfield) - local_prefix_len;
-		b->data = (char*) dfield_get_data(dfield) + local_prefix_len;
+		vector->append(
+			big_rec_field_t(
+				longest_i,
+				dfield_get_len(dfield) - local_prefix_len,
+				static_cast<char*>(dfield_get_data(dfield))
+				+ local_prefix_len));
 
 		/* Allocate the locally stored part of the column. */
 		data = static_cast<byte*>(mem_heap_alloc(heap, local_len));
@@ -702,9 +728,30 @@ skip_field:
 		n_fields++;
 		(*n_ext)++;
 		ut_ad(n_fields < dtuple_get_n_fields(entry));
+
+		if (upd && !upd->is_modified(longest_i)) {
+
+			DEBUG_SYNC_C("ib_mv_nonupdated_column_offpage");
+
+			upd_field_t	upd_field;
+			upd_field.field_no = longest_i;
+			upd_field.orig_len = 0;
+			upd_field.exp = NULL;
+			upd_field.old_v_val = NULL;
+			dfield_copy(&upd_field.new_val,
+				    dfield->clone(upd->heap));
+			upd->append(upd_field);
+			ut_ad(upd->is_modified(longest_i));
+
+			ut_ad(upd_field.new_val.len
+			      >= BTR_EXTERN_FIELD_REF_SIZE);
+			ut_ad(upd_field.new_val.len == local_len);
+			ut_ad(upd_field.new_val.len == dfield_get_len(dfield));
+		}
 	}
 
-	vector->n_fields = n_fields;
+	ut_ad(n_fields == vector->n_fields);
+
 	return(vector);
 }
 
@@ -712,7 +759,6 @@ skip_field:
 Puts back to entry the data stored in vector. Note that to ensure the
 fields in entry can accommodate the data, vector must have been created
 from entry with dtuple_convert_big_rec. */
-UNIV_INTERN
 void
 dtuple_convert_back_big_rec(
 /*========================*/
@@ -748,4 +794,58 @@ dtuple_convert_back_big_rec(
 
 	mem_heap_free(vector->heap);
 }
+
+/** Allocate a big_rec_t object in the given memory heap, and for storing
+n_fld number of fields.
+@param[in]	heap	memory heap in which this object is allocated
+@param[in]	n_fld	maximum number of fields that can be stored in
+			this object
+
+@return the allocated object */
+big_rec_t*
+big_rec_t::alloc(
+	mem_heap_t*	heap,
+	ulint		n_fld)
+{
+	big_rec_t*	rec = static_cast<big_rec_t*>(
+		mem_heap_alloc(heap, sizeof(big_rec_t)));
+
+	new(rec) big_rec_t(n_fld);
+
+	rec->heap = heap;
+	rec->fields = static_cast<big_rec_field_t*>(
+		mem_heap_alloc(heap,
+			       n_fld * sizeof(big_rec_field_t)));
+
+	rec->n_fields = 0;
+	return(rec);
+}
+
+/** Create a deep copy of this object
+@param[in]	heap	the memory heap in which the clone will be
+			created.
+
+@return	the cloned object. */
+dfield_t*
+dfield_t::clone(
+	mem_heap_t*	heap)
+{
+	const ulint size = len == UNIV_SQL_NULL ? 0 : len;
+	dfield_t* obj = static_cast<dfield_t*>(
+		mem_heap_alloc(heap, sizeof(dfield_t) + size));
+
+	obj->ext  = ext;
+	obj->len  = len;
+	obj->type = type;
+	obj->spatial_status = spatial_status;
+
+	if (len != UNIV_SQL_NULL) {
+		obj->data = obj + 1;
+		memcpy(obj->data, data, len);
+	} else {
+		obj->data = 0;
+	}
+
+	return(obj);
+}
 #endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/data/data0type.cc b/storage/innobase/data/data0type.cc
index 0b9e08544a5..8fb3761531c 100644
--- a/storage/innobase/data/data0type.cc
+++ b/storage/innobase/data/data0type.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -23,6 +23,8 @@ Data types
 Created 1/16/1996 Heikki Tuuri
 *******************************************************/
 
+#include "ha_prototypes.h"
+
 #include "data0type.h"
 
 #ifdef UNIV_NONINL
@@ -30,21 +32,18 @@ Created 1/16/1996 Heikki Tuuri
 #endif
 
 #ifndef UNIV_HOTBACKUP
-# include "ha_prototypes.h"
-
 /* At the database startup we store the default-charset collation number of
 this MySQL installation to this global variable. If we have < 4.1.2 format
 column definitions, or records in the insert buffer, we use this
 charset-collation code for them. */
 
-UNIV_INTERN ulint	data_mysql_default_charset_coll;
+ulint	data_mysql_default_charset_coll;
 
 /*********************************************************************//**
 Determine how many bytes the first n characters of the given string occupy.
 If the string is shorter than n characters, returns the number of bytes
 the characters in the string occupy.
-@return	length of the prefix, in bytes */
-UNIV_INTERN
+@return length of the prefix, in bytes */
 ulint
 dtype_get_at_most_n_mbchars(
 /*========================*/
@@ -84,8 +83,7 @@ dtype_get_at_most_n_mbchars(
 /*********************************************************************//**
 Checks if a data main type is a string type. Also a BLOB is considered a
 string type.
-@return	TRUE if string type */
-UNIV_INTERN
+@return TRUE if string type */
 ibool
 dtype_is_string_type(
 /*=================*/
@@ -105,8 +103,7 @@ dtype_is_string_type(
 Checks if a type is a binary string type. Note that for tables created with
 < 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column. For
 those DATA_BLOB columns this function currently returns FALSE.
-@return	TRUE if binary string type */
-UNIV_INTERN
+@return TRUE if binary string type */
 ibool
 dtype_is_binary_string_type(
 /*========================*/
@@ -128,8 +125,7 @@ Checks if a type is a non-binary string type. That is, dtype_is_string_type is
 TRUE and dtype_is_binary_string_type is FALSE. Note that for tables created
 with < 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column.
 For those DATA_BLOB columns this function currently returns TRUE.
-@return	TRUE if non-binary string type */
-UNIV_INTERN
+@return TRUE if non-binary string type */
 ibool
 dtype_is_non_binary_string_type(
 /*============================*/
@@ -149,7 +145,6 @@ dtype_is_non_binary_string_type(
 Forms a precise type from the < 4.1.2 format precise type plus the
 charset-collation code.
 @return precise type, including the charset-collation code */
-UNIV_INTERN
 ulint
 dtype_form_prtype(
 /*==============*/
@@ -165,8 +160,7 @@ dtype_form_prtype(
 
 /*********************************************************************//**
 Validates a data type structure.
-@return	TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
 ibool
 dtype_validate(
 /*===========*/
@@ -174,7 +168,7 @@ dtype_validate(
 {
 	ut_a(type);
 	ut_a(type->mtype >= DATA_VARCHAR);
-	ut_a(type->mtype <= DATA_MYSQL);
+	ut_a(type->mtype <= DATA_MTYPE_MAX);
 
 	if (type->mtype == DATA_SYS) {
 		ut_a((type->prtype & DATA_MYSQL_TYPE_MASK) < DATA_N_SYS_COLS);
@@ -190,7 +184,6 @@ dtype_validate(
 #ifndef UNIV_HOTBACKUP
 /*********************************************************************//**
 Prints a data type structure. */
-UNIV_INTERN
 void
 dtype_print(
 /*========*/
@@ -226,6 +219,17 @@ dtype_print(
 		fputs("DATA_BLOB", stderr);
 		break;
 
+	case DATA_POINT:
+		fputs("DATA_POINT", stderr);
+		break;
+
+	case DATA_VAR_POINT:
+		fputs("DATA_VAR_POINT", stderr);
+
+	case DATA_GEOMETRY:
+		fputs("DATA_GEOMETRY", stderr);
+		break;
+
 	case DATA_INT:
 		fputs("DATA_INT", stderr);
 		break;
diff --git a/storage/innobase/dict/dict0boot.cc b/storage/innobase/dict/dict0boot.cc
index 80724372f27..5c4e2049723 100644
--- a/storage/innobase/dict/dict0boot.cc
+++ b/storage/innobase/dict/dict0boot.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2016, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
@@ -24,6 +24,8 @@ Data dictionary creation and booting
 Created 4/18/1996 Heikki Tuuri
 *******************************************************/
 
+#include "ha_prototypes.h"
+
 #include "dict0boot.h"
 
 #ifdef UNIV_NONINL
@@ -42,8 +44,7 @@ Created 4/18/1996 Heikki Tuuri
 
 /**********************************************************************//**
 Gets a pointer to the dictionary header and x-latches its page.
-@return	pointer to the dictionary header, page x-latched */
-UNIV_INTERN
+@return pointer to the dictionary header, page x-latched */
 dict_hdr_t*
 dict_hdr_get(
 /*=========*/
@@ -52,8 +53,8 @@ dict_hdr_get(
 	buf_block_t*	block;
 	dict_hdr_t*	header;
 
-	block = buf_page_get(DICT_HDR_SPACE, 0, DICT_HDR_PAGE_NO,
-			     RW_X_LATCH, mtr);
+	block = buf_page_get(page_id_t(DICT_HDR_SPACE, DICT_HDR_PAGE_NO),
+			     univ_page_size, RW_X_LATCH, mtr);
 	header = DICT_HDR + buf_block_get_frame(block);
 
 	buf_block_dbg_add_level(block, SYNC_DICT_HEADER);
@@ -63,23 +64,57 @@ dict_hdr_get(
 
 /**********************************************************************//**
 Returns a new table, index, or space id. */
-UNIV_INTERN
 void
 dict_hdr_get_new_id(
 /*================*/
-	table_id_t*	table_id,	/*!< out: table id
-					(not assigned if NULL) */
-	index_id_t*	index_id,	/*!< out: index id
-					(not assigned if NULL) */
-	ulint*		space_id)	/*!< out: space id
-					(not assigned if NULL) */
+	table_id_t*		table_id,	/*!< out: table id
+						(not assigned if NULL) */
+	index_id_t*		index_id,	/*!< out: index id
+						(not assigned if NULL) */
+	ulint*			space_id,	/*!< out: space id
+						(not assigned if NULL) */
+	const dict_table_t*	table,		/*!< in: table */
+	bool			disable_redo)	/*!< in: if true and table
+						object is NULL
+						then disable-redo */
 {
 	dict_hdr_t*	dict_hdr;
 	ib_id_t		id;
 	mtr_t		mtr;
 
 	mtr_start(&mtr);
+	if (table) {
+		dict_disable_redo_if_temporary(table, &mtr);
+	} else if (disable_redo) {
+		/* In non-read-only mode we need to ensure that space-id header
+		page is written to disk else if page is removed from buffer
+		cache and re-loaded it would assign temporary tablespace id
+		to another tablespace.
+		This is not a case with read-only mode as there is no new object
+		that is created except temporary tablespace. */
+		mtr_set_log_mode(&mtr,
+			(srv_read_only_mode ? MTR_LOG_NONE : MTR_LOG_NO_REDO));
+	}
 
+	/* Server started and let's say space-id = x
+	- table created with file-per-table
+	- space-id = x + 1
+	- crash
+	Case 1: If it was redo logged then we know that it will be
+		restored to x + 1
+	Case 2: if not redo-logged
+		Header will have the old space-id = x
+		This is OK because on restart there is no object with
+		space id = x + 1
+	Case 3:
+		space-id = x (on start)
+		space-id = x+1 (temp-table allocation) - no redo logging
+		space-id = x+2 (non-temp-table allocation), this get's
+			   redo logged.
+		If there is a crash there will be only 2 entries
+		x (original) and x+2 (new) and disk hdr will be updated
+		to reflect x + 2 entry.
+		We cannot allocate the same space id to different objects. */
 	dict_hdr = dict_hdr_get(&mtr);
 
 	if (table_id) {
@@ -111,7 +146,6 @@ dict_hdr_get_new_id(
 /**********************************************************************//**
 Writes the current value of the row id counter to the dictionary header file
 page. */
-UNIV_INTERN
 void
 dict_hdr_flush_row_id(void)
 /*=======================*/
@@ -120,7 +154,7 @@ dict_hdr_flush_row_id(void)
 	row_id_t	id;
 	mtr_t		mtr;
 
-	ut_ad(mutex_own(&(dict_sys->mutex)));
+	ut_ad(mutex_own(&dict_sys->mutex));
 
 	id = dict_sys->row_id;
 
@@ -136,7 +170,7 @@ dict_hdr_flush_row_id(void)
 /*****************************************************************//**
 Creates the file page for the dictionary header. This function is
 called only at the database creation.
-@return	TRUE if succeed */
+@return TRUE if succeed */
 static
 ibool
 dict_hdr_create(
@@ -154,7 +188,7 @@ dict_hdr_create(
 	block = fseg_create(DICT_HDR_SPACE, 0,
 			    DICT_HDR + DICT_HDR_FSEG_HEADER, mtr);
 
-	ut_a(DICT_HDR_PAGE_NO == buf_block_get_page_no(block));
+	ut_a(DICT_HDR_PAGE_NO == block->page.id.page_no());
 
 	dict_header = dict_hdr_get(mtr);
 
@@ -180,9 +214,9 @@ dict_hdr_create(
 	system tables */
 
 	/*--------------------------*/
-	root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
-				  DICT_HDR_SPACE, 0, DICT_TABLES_ID,
-				  dict_ind_redundant, mtr);
+	root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE, DICT_HDR_SPACE,
+				  univ_page_size, DICT_TABLES_ID,
+				  dict_ind_redundant, NULL, mtr);
 	if (root_page_no == FIL_NULL) {
 
 		return(FALSE);
@@ -191,9 +225,9 @@ dict_hdr_create(
 	mlog_write_ulint(dict_header + DICT_HDR_TABLES, root_page_no,
 			 MLOG_4BYTES, mtr);
 	/*--------------------------*/
-	root_page_no = btr_create(DICT_UNIQUE, DICT_HDR_SPACE, 0,
-				  DICT_TABLE_IDS_ID,
-				  dict_ind_redundant, mtr);
+	root_page_no = btr_create(DICT_UNIQUE, DICT_HDR_SPACE,
+				  univ_page_size, DICT_TABLE_IDS_ID,
+				  dict_ind_redundant, NULL, mtr);
 	if (root_page_no == FIL_NULL) {
 
 		return(FALSE);
@@ -202,9 +236,9 @@ dict_hdr_create(
 	mlog_write_ulint(dict_header + DICT_HDR_TABLE_IDS, root_page_no,
 			 MLOG_4BYTES, mtr);
 	/*--------------------------*/
-	root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
-				  DICT_HDR_SPACE, 0, DICT_COLUMNS_ID,
-				  dict_ind_redundant, mtr);
+	root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE, DICT_HDR_SPACE,
+				  univ_page_size, DICT_COLUMNS_ID,
+				  dict_ind_redundant, NULL, mtr);
 	if (root_page_no == FIL_NULL) {
 
 		return(FALSE);
@@ -213,9 +247,9 @@ dict_hdr_create(
 	mlog_write_ulint(dict_header + DICT_HDR_COLUMNS, root_page_no,
 			 MLOG_4BYTES, mtr);
 	/*--------------------------*/
-	root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
-				  DICT_HDR_SPACE, 0, DICT_INDEXES_ID,
-				  dict_ind_redundant, mtr);
+	root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE, DICT_HDR_SPACE,
+				  univ_page_size, DICT_INDEXES_ID,
+				  dict_ind_redundant, NULL, mtr);
 	if (root_page_no == FIL_NULL) {
 
 		return(FALSE);
@@ -224,9 +258,9 @@ dict_hdr_create(
 	mlog_write_ulint(dict_header + DICT_HDR_INDEXES, root_page_no,
 			 MLOG_4BYTES, mtr);
 	/*--------------------------*/
-	root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
-				  DICT_HDR_SPACE, 0, DICT_FIELDS_ID,
-				  dict_ind_redundant, mtr);
+	root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE, DICT_HDR_SPACE,
+				  univ_page_size, DICT_FIELDS_ID,
+				  dict_ind_redundant, NULL, mtr);
 	if (root_page_no == FIL_NULL) {
 
 		return(FALSE);
@@ -243,7 +277,6 @@ dict_hdr_create(
 Initializes the data dictionary memory structures when the database is
 started. This function is also called when the data dictionary is created.
 @return DB_SUCCESS or error code. */
-UNIV_INTERN
 dberr_t
 dict_boot(void)
 /*===========*/
@@ -263,8 +296,8 @@ dict_boot(void)
 	ut_ad(DICT_NUM_FIELDS__SYS_TABLE_IDS == 2);
 	ut_ad(DICT_NUM_COLS__SYS_COLUMNS == 7);
 	ut_ad(DICT_NUM_FIELDS__SYS_COLUMNS == 9);
-	ut_ad(DICT_NUM_COLS__SYS_INDEXES == 7);
-	ut_ad(DICT_NUM_FIELDS__SYS_INDEXES == 9);
+	ut_ad(DICT_NUM_COLS__SYS_INDEXES == 8);
+	ut_ad(DICT_NUM_FIELDS__SYS_INDEXES == 10);
 	ut_ad(DICT_NUM_COLS__SYS_FIELDS == 3);
 	ut_ad(DICT_NUM_FIELDS__SYS_FIELDS == 5);
 	ut_ad(DICT_NUM_COLS__SYS_FOREIGN == 4);
@@ -280,7 +313,7 @@ dict_boot(void)
 
 	heap = mem_heap_create(450);
 
-	mutex_enter(&(dict_sys->mutex));
+	mutex_enter(&dict_sys->mutex);
 
 	/* Get the dictionary header */
 	dict_hdr = dict_hdr_get(&mtr);
@@ -302,10 +335,11 @@ dict_boot(void)
 	/* Insert into the dictionary cache the descriptions of the basic
 	system tables */
 	/*-------------------------*/
-	table = dict_mem_table_create("SYS_TABLES", DICT_HDR_SPACE, 8, 0, 0);
+	table = dict_mem_table_create("SYS_TABLES", DICT_HDR_SPACE, 8, 0, 0, 0);
 
-	dict_mem_table_add_col(table, heap, "NAME", DATA_BINARY, 0, 0);
-	dict_mem_table_add_col(table, heap, "ID", DATA_BINARY, 0, 0);
+	dict_mem_table_add_col(table, heap, "NAME", DATA_BINARY, 0,
+			       MAX_FULL_NAME_LEN);
+	dict_mem_table_add_col(table, heap, "ID", DATA_BINARY, 0, 8);
 	/* ROW_FORMAT = (N_COLS >> 31) ? COMPACT : REDUNDANT */
 	dict_mem_table_add_col(table, heap, "N_COLS", DATA_INT, 0, 4);
 	/* The low order bit of TYPE is always set to 1.  If the format
@@ -354,9 +388,10 @@ dict_boot(void)
 	ut_a(error == DB_SUCCESS);
 
 	/*-------------------------*/
-	table = dict_mem_table_create("SYS_COLUMNS", DICT_HDR_SPACE, 7, 0, 0);
+	table = dict_mem_table_create("SYS_COLUMNS", DICT_HDR_SPACE,
+				      7, 0, 0, 0);
 
-	dict_mem_table_add_col(table, heap, "TABLE_ID", DATA_BINARY, 0, 0);
+	dict_mem_table_add_col(table, heap, "TABLE_ID", DATA_BINARY, 0, 8);
 	dict_mem_table_add_col(table, heap, "POS", DATA_INT, 0, 4);
 	dict_mem_table_add_col(table, heap, "NAME", DATA_BINARY, 0, 0);
 	dict_mem_table_add_col(table, heap, "MTYPE", DATA_INT, 0, 4);
@@ -386,15 +421,17 @@ dict_boot(void)
 	ut_a(error == DB_SUCCESS);
 
 	/*-------------------------*/
-	table = dict_mem_table_create("SYS_INDEXES", DICT_HDR_SPACE, 7, 0, 0);
+	table = dict_mem_table_create("SYS_INDEXES", DICT_HDR_SPACE,
+				      DICT_NUM_COLS__SYS_INDEXES, 0, 0, 0);
 
-	dict_mem_table_add_col(table, heap, "TABLE_ID", DATA_BINARY, 0, 0);
-	dict_mem_table_add_col(table, heap, "ID", DATA_BINARY, 0, 0);
+	dict_mem_table_add_col(table, heap, "TABLE_ID", DATA_BINARY, 0, 8);
+	dict_mem_table_add_col(table, heap, "ID", DATA_BINARY, 0, 8);
 	dict_mem_table_add_col(table, heap, "NAME", DATA_BINARY, 0, 0);
 	dict_mem_table_add_col(table, heap, "N_FIELDS", DATA_INT, 0, 4);
 	dict_mem_table_add_col(table, heap, "TYPE", DATA_INT, 0, 4);
 	dict_mem_table_add_col(table, heap, "SPACE", DATA_INT, 0, 4);
 	dict_mem_table_add_col(table, heap, "PAGE_NO", DATA_INT, 0, 4);
+	dict_mem_table_add_col(table, heap, "MERGE_THRESHOLD", DATA_INT, 0, 4);
 
 	table->id = DICT_INDEXES_ID;
 
@@ -418,9 +455,9 @@ dict_boot(void)
 	ut_a(error == DB_SUCCESS);
 
 	/*-------------------------*/
-	table = dict_mem_table_create("SYS_FIELDS", DICT_HDR_SPACE, 3, 0, 0);
+	table = dict_mem_table_create("SYS_FIELDS", DICT_HDR_SPACE, 3, 0, 0, 0);
 
-	dict_mem_table_add_col(table, heap, "INDEX_ID", DATA_BINARY, 0, 0);
+	dict_mem_table_add_col(table, heap, "INDEX_ID", DATA_BINARY, 0, 8);
 	dict_mem_table_add_col(table, heap, "POS", DATA_INT, 0, 4);
 	dict_mem_table_add_col(table, heap, "COL_NAME", DATA_BINARY, 0, 0);
 
@@ -459,17 +496,15 @@ dict_boot(void)
 		if (srv_read_only_mode && !ibuf_is_empty()) {
 
 			if (srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE) {
-				ib_logf(IB_LOG_LEVEL_ERROR,
-					"Change buffer must be empty when --innodb-read-only "
-					"is set! "
-					"You can try to recover the database with innodb_force_recovery=5");
+				ib::error() << "Change buffer must be empty when"
+					" --innodb-read-only is set!"
+					"You can try to recover the database with innodb_force_recovery=5";
 
 				err = DB_ERROR;
 			} else {
-				ib_logf(IB_LOG_LEVEL_WARN,
-					"Change buffer not empty when --innodb-read-only "
-					"is set! but srv_force_recovery = %lu, ignoring.",
-					srv_force_recovery);
+				ib::warn() << "Change buffer not empty when --innodb-read-only "
+					"is set! but srv_force_recovery = " << srv_force_recovery
+					   << " , ignoring.";
 			}
 		}
 
@@ -481,10 +516,10 @@ dict_boot(void)
 			dict_load_sys_table(dict_sys->sys_indexes);
 			dict_load_sys_table(dict_sys->sys_fields);
 		}
-
-		mutex_exit(&(dict_sys->mutex));
 	}
 
+	mutex_exit(&dict_sys->mutex);
+
 	return(err);
 }
 
@@ -502,7 +537,6 @@ dict_insert_initial_data(void)
 /*****************************************************************//**
 Creates and initializes the data dictionary at the server bootstrap.
 @return DB_SUCCESS or error code. */
-UNIV_INTERN
 dberr_t
 dict_create(void)
 /*=============*/
diff --git a/storage/innobase/dict/dict0crea.cc b/storage/innobase/dict/dict0crea.cc
index d423f16f61c..31952424119 100644
--- a/storage/innobase/dict/dict0crea.cc
+++ b/storage/innobase/dict/dict0crea.cc
@@ -23,6 +23,8 @@ Database object creation
 Created 1/8/1996 Heikki Tuuri
 *******************************************************/
 
+#include "ha_prototypes.h"
+
 #include "dict0crea.h"
 
 #ifdef UNIV_NONINL
@@ -44,12 +46,14 @@ Created 1/8/1996 Heikki Tuuri
 #include "ut0vec.h"
 #include "dict0priv.h"
 #include "fts0priv.h"
-#include "ha_prototypes.h"
+#include "fsp0space.h"
+#include "fsp0sysspace.h"
+#include "srv0start.h"
 
 /*****************************************************************//**
 Based on a table object, this function builds the entry to be inserted
 in the SYS_TABLES system table.
-@return	the tuple which should be inserted */
+@return the tuple which should be inserted */
 static
 dtuple_t*
 dict_create_sys_tables_tuple(
@@ -78,7 +82,8 @@ dict_create_sys_tables_tuple(
 	dfield = dtuple_get_nth_field(
 		entry, DICT_COL__SYS_TABLES__NAME);
 
-	dfield_set_data(dfield, table->name, ut_strlen(table->name));
+	dfield_set_data(dfield,
+			table->name.m_name, strlen(table->name.m_name));
 
 	/* 1: DB_TRX_ID added later */
 	/* 2: DB_ROLL_PTR added later */
@@ -96,7 +101,11 @@ dict_create_sys_tables_tuple(
 		entry, DICT_COL__SYS_TABLES__N_COLS);
 
 	ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
-	mach_write_to_4(ptr, table->n_def
+
+	/* If there is any virtual column, encode it in N_COLS */
+	mach_write_to_4(ptr, dict_table_encode_n_col(
+				static_cast<ulint>(table->n_def),
+				static_cast<ulint>(table->n_v_def))
 			| ((table->flags & DICT_TF_COMPACT) << 31));
 	dfield_set_data(dfield, ptr, 4);
 
@@ -128,7 +137,7 @@ dict_create_sys_tables_tuple(
 
 	ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
 	/* Be sure all non-used bits are zero. */
-	ut_a(!(table->flags2 & ~DICT_TF2_BIT_MASK));
+	ut_a(!(table->flags2 & DICT_TF2_UNUSED_BIT_MASK));
 	mach_write_to_4(ptr, table->flags2);
 
 	dfield_set_data(dfield, ptr, 4);
@@ -154,7 +163,7 @@ dict_create_sys_tables_tuple(
 /*****************************************************************//**
 Based on a table object, this function builds the entry to be inserted
 in the SYS_COLUMNS system table.
-@return	the tuple which should be inserted */
+@return the tuple which should be inserted */
 static
 dtuple_t*
 dict_create_sys_columns_tuple(
@@ -171,11 +180,23 @@ dict_create_sys_columns_tuple(
 	dfield_t*		dfield;
 	byte*			ptr;
 	const char*		col_name;
+	ulint			num_base = 0;
+	ulint			v_col_no = ULINT_UNDEFINED;
 
 	ut_ad(table);
 	ut_ad(heap);
 
-	column = dict_table_get_nth_col(table, i);
+	/* Any column beyond table->n_def would be virtual columns */
+        if (i >= table->n_def) {
+		dict_v_col_t*	v_col = dict_table_get_nth_v_col(
+					table, i - table->n_def);
+		column = &v_col->m_col;
+		num_base = v_col->num_base;
+		v_col_no = column->ind;
+	} else {
+		column = dict_table_get_nth_col(table, i);
+		ut_ad(!dict_col_is_virtual(column));
+	}
 
 	sys_columns = dict_sys->sys_columns;
 
@@ -195,7 +216,15 @@ dict_create_sys_columns_tuple(
 	dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_COLUMNS__POS);
 
 	ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
-	mach_write_to_4(ptr, i);
+
+	if (v_col_no != ULINT_UNDEFINED) {
+		/* encode virtual column's position in MySQL table and InnoDB
+		table in "POS" */
+		mach_write_to_4(ptr, dict_create_v_col_pos(
+				i - table->n_def, v_col_no));
+	} else {
+		mach_write_to_4(ptr, i);
+	}
 
 	dfield_set_data(dfield, ptr, 4);
 
@@ -204,7 +233,12 @@ dict_create_sys_columns_tuple(
 	/* 4: NAME ---------------------------*/
 	dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_COLUMNS__NAME);
 
-	col_name = dict_table_get_col_name(table, i);
+        if (i >= table->n_def) {
+		col_name = dict_table_get_v_col_name(table, i - table->n_def);
+	} else {
+		col_name = dict_table_get_col_name(table, i);
+	}
+
 	dfield_set_data(dfield, col_name, ut_strlen(col_name));
 
 	/* 5: MTYPE --------------------------*/
@@ -235,7 +269,7 @@ dict_create_sys_columns_tuple(
 	dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_COLUMNS__PREC);
 
 	ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
-	mach_write_to_4(ptr, 0/* unused */);
+	mach_write_to_4(ptr, num_base);
 
 	dfield_set_data(dfield, ptr, 4);
 	/*---------------------------------*/
@@ -243,9 +277,77 @@ dict_create_sys_columns_tuple(
 	return(entry);
 }
 
+/** Based on a table object, this function builds the entry to be inserted
+in the SYS_VIRTUAL system table. Each row maps a virtual column to one of
+its base column.
+@param[in]	table	table
+@param[in]	v_col_n	virtual column number
+@param[in]	b_col_n	base column sequence num
+@param[in]	heap	memory heap
+@return the tuple which should be inserted */
+static
+dtuple_t*
+dict_create_sys_virtual_tuple(
+	const dict_table_t*	table,
+	ulint			v_col_n,
+	ulint			b_col_n,
+	mem_heap_t*		heap)
+{
+	dict_table_t*		sys_virtual;
+	dtuple_t*		entry;
+	const dict_col_t*	base_column;
+	dfield_t*		dfield;
+	byte*			ptr;
+
+	ut_ad(table);
+	ut_ad(heap);
+
+	ut_ad(v_col_n < table->n_v_def);
+	dict_v_col_t*	v_col = dict_table_get_nth_v_col(table, v_col_n);
+	base_column = v_col->base_col[b_col_n];
+
+	sys_virtual = dict_sys->sys_virtual;
+
+	entry = dtuple_create(heap, DICT_NUM_COLS__SYS_VIRTUAL
+			      + DATA_N_SYS_COLS);
+
+	dict_table_copy_types(entry, sys_virtual);
+
+	/* 0: TABLE_ID -----------------------*/
+	dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_VIRTUAL__TABLE_ID);
+
+	ptr = static_cast<byte*>(mem_heap_alloc(heap, 8));
+	mach_write_to_8(ptr, table->id);
+
+	dfield_set_data(dfield, ptr, 8);
+
+	/* 1: POS ---------------------------*/
+	dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_VIRTUAL__POS);
+
+	ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
+	ulint	v_col_no = dict_create_v_col_pos(v_col_n, v_col->m_col.ind);
+	mach_write_to_4(ptr, v_col_no);
+
+	dfield_set_data(dfield, ptr, 4);
+
+	/* 2: BASE_POS ----------------------------*/
+	dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_VIRTUAL__BASE_POS);
+
+	ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
+	mach_write_to_4(ptr, base_column->ind);
+
+	dfield_set_data(dfield, ptr, 4);
+
+	/* 3: DB_TRX_ID added later */
+	/* 4: DB_ROLL_PTR added later */
+
+	/*---------------------------------*/
+	return(entry);
+}
+
 /***************************************************************//**
 Builds a table definition to insert.
-@return	DB_SUCCESS or error code */
+@return DB_SUCCESS or error code */
 static MY_ATTRIBUTE((nonnull, warn_unused_result))
 dberr_t
 dict_build_table_def_step(
@@ -255,86 +357,231 @@ dict_build_table_def_step(
 {
 	dict_table_t*	table;
 	dtuple_t*	row;
-	dberr_t		error;
-	const char*	path;
-	mtr_t		mtr;
-	ulint		space = 0;
-	bool		use_tablespace;
-
-	ut_ad(mutex_own(&(dict_sys->mutex)));
+	dberr_t		err = DB_SUCCESS;
 
 	table = node->table;
-	use_tablespace = DICT_TF2_FLAG_IS_SET(table, DICT_TF2_USE_TABLESPACE);
 
-	dict_hdr_get_new_id(&table->id, NULL, NULL);
+	trx_t*	trx = thr_get_trx(thr);
+	dict_table_assign_new_id(table, trx);
 
-	thr_get_trx(thr)->table_id = table->id;
+	err = dict_build_tablespace_for_table(table, node);
 
-        /* Always set this bit for all new created tables */
+	if (err != DB_SUCCESS) {
+		return(err);
+	}
+
+	row = dict_create_sys_tables_tuple(table, node->heap);
+
+	ins_node_set_new_row(node->tab_def, row);
+
+	return(err);
+}
+
+/** Build a tablespace to store various objects.
+@param[in,out]	tablespace	Tablespace object describing what to build.
+@return DB_SUCCESS or error code. */
+dberr_t
+dict_build_tablespace(
+	Tablespace*	tablespace)
+{
+	dberr_t		err	= DB_SUCCESS;
+	mtr_t		mtr;
+	ulint		space = 0;
+
+	ut_ad(mutex_own(&dict_sys->mutex));
+	ut_ad(tablespace);
+
+        DBUG_EXECUTE_IF("out_of_tablespace_disk",
+                         return(DB_OUT_OF_FILE_SPACE););
+	/* Get a new space id. */
+	dict_hdr_get_new_id(NULL, NULL, &space, NULL, false);
+	if (space == ULINT_UNDEFINED) {
+		return(DB_ERROR);
+	}
+	tablespace->set_space_id(space);
+
+	Datafile* datafile = tablespace->first_datafile();
+
+	/* We create a new generic empty tablespace.
+	We initially let it be 4 pages:
+	- page 0 is the fsp header and an extent descriptor page,
+	- page 1 is an ibuf bitmap page,
+	- page 2 is the first inode page,
+	- page 3 will contain the root of the clustered index of the
+	first table we create here. */
+
+	err = fil_ibd_create(
+		space,
+		tablespace->name(),
+		datafile->filepath(),
+		tablespace->flags(),
+		FIL_IBD_FILE_INITIAL_SIZE,
+		tablespace->encryption_mode(),
+		tablespace->key_id());
+
+	if (err != DB_SUCCESS) {
+		return(err);
+	}
+
+	/* Update SYS_TABLESPACES and SYS_DATAFILES */
+	err = dict_replace_tablespace_and_filepath(
+		tablespace->space_id(), tablespace->name(),
+		datafile->filepath(), tablespace->flags());
+	if (err != DB_SUCCESS) {
+		os_file_delete(innodb_data_file_key, datafile->filepath());
+		return(err);
+	}
+
+	mtr_start(&mtr);
+	mtr.set_named_space(space);
+
+	/* Once we allow temporary general tablespaces, we must do this;
+	mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO); */
+	ut_a(!FSP_FLAGS_GET_TEMPORARY(tablespace->flags()));
+
+	fsp_header_init(space, FIL_IBD_FILE_INITIAL_SIZE, &mtr);
+
+	mtr_commit(&mtr);
+
+	return(err);
+}
+
+/** Builds a tablespace to contain a table, using file-per-table=1.
+@param[in,out]	table	Table to build in its own tablespace.
+@param[in]	node	Table create node
+@return DB_SUCCESS or error code */
+dberr_t
+dict_build_tablespace_for_table(
+	dict_table_t*	table,
+	tab_node_t*	node)
+{
+	dberr_t		err	= DB_SUCCESS;
+	mtr_t		mtr;
+	ulint		space = 0;
+	bool		needs_file_per_table;
+	char*		filepath;
+
+	ut_ad(mutex_own(&dict_sys->mutex) || dict_table_is_intrinsic(table));
+
+	needs_file_per_table
+		= DICT_TF2_FLAG_IS_SET(table, DICT_TF2_USE_FILE_PER_TABLE);
+
+	/* Always set this bit for all new created tables */
 	DICT_TF2_FLAG_SET(table, DICT_TF2_FTS_AUX_HEX_NAME);
 	DBUG_EXECUTE_IF("innodb_test_wrong_fts_aux_table_name",
 			DICT_TF2_FLAG_UNSET(table,
 					    DICT_TF2_FTS_AUX_HEX_NAME););
 
-	if (use_tablespace) {
-		/* This table will not use the system tablespace.
-		Get a new space id. */
-		dict_hdr_get_new_id(NULL, NULL, &space);
+	if (needs_file_per_table) {
+		/* This table will need a new tablespace. */
+
+		ut_ad(dict_table_get_format(table) <= UNIV_FORMAT_MAX);
+		ut_ad(DICT_TF_GET_ZIP_SSIZE(table->flags) == 0
+		      || dict_table_get_format(table) >= UNIV_FORMAT_B);
+
+		/* Get a new tablespace ID */
+		dict_hdr_get_new_id(NULL, NULL, &space, table, false);
 
 		DBUG_EXECUTE_IF(
 			"ib_create_table_fail_out_of_space_ids",
 			space = ULINT_UNDEFINED;
 		);
 
-		if (UNIV_UNLIKELY(space == ULINT_UNDEFINED)) {
+		if (space == ULINT_UNDEFINED) {
 			return(DB_ERROR);
 		}
+		table->space = static_cast<unsigned int>(space);
+
+		/* Determine the tablespace flags. */
+		bool	is_temp = dict_table_is_temporary(table);
+		bool	is_encrypted = dict_table_is_encrypted(table);
+		bool	has_data_dir = DICT_TF_HAS_DATA_DIR(table->flags);
+		ulint	fsp_flags = dict_tf_to_fsp_flags(table->flags,
+							 is_temp,
+							 is_encrypted);
+
+		/* Determine the full filepath */
+		if (is_temp) {
+			/* Temporary table filepath contains a full path
+			and a filename without the extension. */
+			ut_ad(table->dir_path_of_temp_table);
+			filepath = fil_make_filepath(
+				table->dir_path_of_temp_table,
+				NULL, IBD, false);
+
+		} else if (has_data_dir) {
+			ut_ad(table->data_dir_path);
+			filepath = fil_make_filepath(
+				table->data_dir_path,
+				table->name.m_name, IBD, true);
+
+		} else {
+			/* Make the tablespace file in the default dir
+			using the table name */
+			filepath = fil_make_filepath(
+				NULL, table->name.m_name, IBD, false);
+		}
 
 		/* We create a new single-table tablespace for the table.
 		We initially let it be 4 pages:
 		- page 0 is the fsp header and an extent descriptor page,
 		- page 1 is an ibuf bitmap page,
 		- page 2 is the first inode page,
-		- page 3 will contain the root of the clustered index of the
-		table we create here. */
+		- page 3 will contain the root of the clustered index of
+		the table we create here. */
 
-		path = table->data_dir_path ? table->data_dir_path
-					    : table->dir_path_of_temp_table;
-
-		ut_ad(dict_table_get_format(table) <= UNIV_FORMAT_MAX);
-		ut_ad(!dict_table_zip_size(table)
-		      || dict_table_get_format(table) >= UNIV_FORMAT_B);
-
-		error = fil_create_new_single_table_tablespace(
-			space, table->name, path,
-			dict_tf_to_fsp_flags(table->flags),
-			table->flags2,
+		err = fil_ibd_create(
+			space, table->name.m_name, filepath, fsp_flags,
 			FIL_IBD_FILE_INITIAL_SIZE,
-			node->mode, node->key_id);
+			node ? node->mode : FIL_SPACE_ENCRYPTION_DEFAULT,
+			node ? node->key_id : FIL_DEFAULT_ENCRYPTION_KEY);
 
-		table->space = (unsigned int) space;
+		ut_free(filepath);
 
-		if (error != DB_SUCCESS) {
+		if (err != DB_SUCCESS) {
 
-			return(error);
+			return(err);
 		}
 
 		mtr_start(&mtr);
+		mtr.set_named_space(table->space);
+		dict_disable_redo_if_temporary(table, &mtr);
 
-		fsp_header_init(table->space, FIL_IBD_FILE_INITIAL_SIZE, &mtr);
+		bool ret = fsp_header_init(table->space,
+					   FIL_IBD_FILE_INITIAL_SIZE,
+					   &mtr);
 
 		mtr_commit(&mtr);
+		if (!ret) {
+			return(DB_ERROR);
+		}
 	} else {
-		/* Create in the system tablespace: disallow Barracuda
-		features by keeping only the first bit which says whether
-		the row format is redundant or compact */
-		table->flags &= DICT_TF_COMPACT;
+		/* We do not need to build a tablespace for this table. It
+		is already built.  Just find the correct tablespace ID. */
+
+		if (DICT_TF_HAS_SHARED_SPACE(table->flags)) {
+			ut_ad(table->tablespace != NULL);
+
+			ut_ad(table->space == fil_space_get_id_by_name(
+				table->tablespace()));
+		} else if (dict_table_is_temporary(table)) {
+			/* Use the shared temporary tablespace.
+			Note: The temp tablespace supports all non-Compressed
+			row formats whereas the system tablespace only
+			supports Redundant and Compact */
+			ut_ad(dict_tf_get_rec_format(table->flags)
+				!= REC_FORMAT_COMPRESSED);
+			table->space = static_cast<uint32_t>(
+				srv_tmp_space.space_id());
+		} else {
+			/* Create in the system tablespace. */
+			ut_ad(table->space == srv_sys_space.space_id());
+		}
+
+		DBUG_EXECUTE_IF("ib_ddl_crash_during_tablespace_alloc",
+				DBUG_SUICIDE(););
 	}
 
-	row = dict_create_sys_tables_tuple(table, node->heap);
-
-	ins_node_set_new_row(node->tab_def, row);
-
 	return(DB_SUCCESS);
 }
 
@@ -353,10 +600,25 @@ dict_build_col_def_step(
 	ins_node_set_new_row(node->col_def, row);
 }
 
+/** Builds a SYS_VIRTUAL row definition to insert.
+@param[in]	node	table create node */
+static
+void
+dict_build_v_col_def_step(
+	tab_node_t*	node)
+{
+	dtuple_t*	row;
+
+	row = dict_create_sys_virtual_tuple(node->table, node->col_no,
+					    node->base_col_no,
+					    node->heap);
+	ins_node_set_new_row(node->v_col_def, row);
+}
+
 /*****************************************************************//**
 Based on an index object, this function builds the entry to be inserted
 in the SYS_INDEXES system table.
-@return	the tuple which should be inserted */
+@return the tuple which should be inserted */
 static
 dtuple_t*
 dict_create_sys_indexes_tuple(
@@ -372,7 +634,7 @@ dict_create_sys_indexes_tuple(
 	dfield_t*	dfield;
 	byte*		ptr;
 
-	ut_ad(mutex_own(&(dict_sys->mutex)));
+	ut_ad(mutex_own(&dict_sys->mutex));
 	ut_ad(index);
 	ut_ad(heap);
 
@@ -380,7 +642,8 @@ dict_create_sys_indexes_tuple(
 
 	table = dict_table_get_low(index->table_name);
 
-	entry = dtuple_create(heap, 7 + DATA_N_SYS_COLS);
+	entry = dtuple_create(
+		heap, DICT_NUM_COLS__SYS_INDEXES + DATA_N_SYS_COLS);
 
 	dict_table_copy_types(entry, sys_indexes);
 
@@ -408,7 +671,16 @@ dict_create_sys_indexes_tuple(
 	dfield = dtuple_get_nth_field(
 		entry, DICT_COL__SYS_INDEXES__NAME);
 
-	dfield_set_data(dfield, index->name, ut_strlen(index->name));
+	if (!index->is_committed()) {
+		ulint	len	= strlen(index->name) + 1;
+		char*	name	= static_cast<char*>(
+			mem_heap_alloc(heap, len));
+		*name = *TEMP_INDEX_PREFIX_STR;
+		memcpy(name + 1, index->name, len - 1);
+		dfield_set_data(dfield, name, len);
+	} else {
+		dfield_set_data(dfield, index->name, strlen(index->name));
+	}
 
 	/* 5: N_FIELDS ----------------------*/
 	dfield = dtuple_get_nth_field(
@@ -448,6 +720,16 @@ dict_create_sys_indexes_tuple(
 
 	dfield_set_data(dfield, ptr, 4);
 
+	/* 9: MERGE_THRESHOLD ----------------*/
+
+	dfield = dtuple_get_nth_field(
+		entry, DICT_COL__SYS_INDEXES__MERGE_THRESHOLD);
+
+	ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
+	mach_write_to_4(ptr, DICT_INDEX_MERGE_THRESHOLD_DEFAULT);
+
+	dfield_set_data(dfield, ptr, 4);
+
 	/*--------------------------------*/
 
 	return(entry);
@@ -456,7 +738,7 @@ dict_create_sys_indexes_tuple(
 /*****************************************************************//**
 Based on an index object, this function builds the entry to be inserted
 in the SYS_FIELDS system table.
-@return	the tuple which should be inserted */
+@return the tuple which should be inserted */
 static
 dtuple_t*
 dict_create_sys_fields_tuple(
@@ -538,7 +820,7 @@ dict_create_sys_fields_tuple(
 /*****************************************************************//**
 Creates the tuple with which the index entry is searched for writing the index
 tree root page number, if such a tree is created.
-@return	the tuple for search */
+@return the tuple for search */
 static
 dtuple_t*
 dict_create_search_tuple(
@@ -573,7 +855,7 @@ dict_create_search_tuple(
 
 /***************************************************************//**
 Builds an index definition row to insert.
-@return	DB_SUCCESS or error code */
+@return DB_SUCCESS or error code */
 static MY_ATTRIBUTE((nonnull, warn_unused_result))
 dberr_t
 dict_build_index_def_step(
@@ -586,7 +868,7 @@ dict_build_index_def_step(
 	dtuple_t*	row;
 	trx_t*		trx;
 
-	ut_ad(mutex_own(&(dict_sys->mutex)));
+	ut_ad(mutex_own(&dict_sys->mutex));
 
 	trx = thr_get_trx(thr);
 
@@ -608,7 +890,7 @@ dict_build_index_def_step(
 	ut_ad((UT_LIST_GET_LEN(table->indexes) > 0)
 	      || dict_index_is_clust(index));
 
-	dict_hdr_get_new_id(NULL, &index->id, NULL);
+	dict_hdr_get_new_id(NULL, &index->id, NULL, table, false);
 
 	/* Inherit the space id from the table; we store all indexes of a
 	table in the same tablespace */
@@ -628,6 +910,48 @@ dict_build_index_def_step(
 	return(DB_SUCCESS);
 }
 
+/***************************************************************//**
+Builds an index definition without updating SYSTEM TABLES.
+@return DB_SUCCESS or error code */
+void
+dict_build_index_def(
+/*=================*/
+	const dict_table_t*	table,	/*!< in: table */
+	dict_index_t*		index,	/*!< in/out: index */
+	trx_t*			trx)	/*!< in/out: InnoDB transaction handle */
+{
+	ut_ad(mutex_own(&dict_sys->mutex) || dict_table_is_intrinsic(table));
+
+	if (trx->table_id == 0) {
+		/* Record only the first table id. */
+		trx->table_id = table->id;
+	}
+
+	ut_ad((UT_LIST_GET_LEN(table->indexes) > 0)
+	      || dict_index_is_clust(index));
+
+	if (!dict_table_is_intrinsic(table)) {
+		dict_hdr_get_new_id(NULL, &index->id, NULL, table, false);
+	} else {
+		/* Index are re-loaded in process of creation using id.
+		If same-id is used for all indexes only first index will always
+		be retrieved when expected is iterative return of all indexes*/
+		if (UT_LIST_GET_LEN(table->indexes) > 0) {
+			index->id = UT_LIST_GET_LAST(table->indexes)->id + 1;
+		} else {
+			index->id = 1;
+		}
+	}
+
+	/* Inherit the space id from the table; we store all indexes of a
+	table in the same tablespace */
+
+	index->space = table->space;
+
+	/* Note that the index was created by this transaction. */
+	index->trx_id = trx->id;
+}
+
 /***************************************************************//**
 Builds a field definition row to insert. */
 static
@@ -648,20 +972,20 @@ dict_build_field_def_step(
 
 /***************************************************************//**
 Creates an index tree for the index if it is not a member of a cluster.
-@return	DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
+@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
 static MY_ATTRIBUTE((nonnull, warn_unused_result))
 dberr_t
 dict_create_index_tree_step(
 /*========================*/
 	ind_node_t*	node)	/*!< in: index create node */
 {
+	mtr_t		mtr;
+	btr_pcur_t	pcur;
 	dict_index_t*	index;
 	dict_table_t*	sys_indexes;
 	dtuple_t*	search_tuple;
-	btr_pcur_t	pcur;
-	mtr_t		mtr;
 
-	ut_ad(mutex_own(&(dict_sys->mutex)));
+	ut_ad(mutex_own(&dict_sys->mutex));
 
 	index = node->index;
 
@@ -678,6 +1002,13 @@ dict_create_index_tree_step(
 
 	mtr_start(&mtr);
 
+	const bool	missing = index->table->ibd_file_missing
+		|| dict_table_is_discarded(index->table);
+
+	if (!missing) {
+		mtr.set_named_space(index->space);
+	}
+
 	search_tuple = dict_create_search_tuple(node->ind_row, node->heap);
 
 	btr_pcur_open(UT_LIST_GET_FIRST(sys_indexes->indexes),
@@ -688,16 +1019,14 @@ dict_create_index_tree_step(
 
 
 	dberr_t		err = DB_SUCCESS;
-	ulint		zip_size = dict_table_zip_size(index->table);
-
-	if (node->index->table->ibd_file_missing
-	    || dict_table_is_discarded(node->index->table)) {
 
+	if (missing) {
 		node->page_no = FIL_NULL;
 	} else {
 		node->page_no = btr_create(
-			index->type, index->space, zip_size,
-			index->id, index, &mtr);
+			index->type, index->space,
+			dict_table_page_size(index->table),
+			index->id, index, NULL, &mtr);
 
 		if (node->page_no == FIL_NULL) {
 			err = DB_OUT_OF_FILE_SPACE;
@@ -719,169 +1048,207 @@ dict_create_index_tree_step(
 	return(err);
 }
 
-/*******************************************************************//**
-Drops the index tree associated with a row in SYS_INDEXES table. */
-UNIV_INTERN
-void
-dict_drop_index_tree(
-/*=================*/
-	rec_t*	rec,	/*!< in/out: record in the clustered index
-			of SYS_INDEXES table */
-	mtr_t*	mtr)	/*!< in: mtr having the latch on the record page */
+/***************************************************************//**
+Creates an index tree for the index if it is not a member of a cluster.
+Don't update SYSTEM TABLES.
+@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
+dberr_t
+dict_create_index_tree_in_mem(
+/*==========================*/
+	dict_index_t*	index,	/*!< in/out: index */
+	const trx_t*	trx)	/*!< in: InnoDB transaction handle */
+{
+	mtr_t		mtr;
+	ulint		page_no = FIL_NULL;
+
+	ut_ad(mutex_own(&dict_sys->mutex)
+	      || dict_table_is_intrinsic(index->table));
+
+	if (index->type == DICT_FTS) {
+		/* FTS index does not need an index tree */
+		return(DB_SUCCESS);
+	}
+
+	mtr_start(&mtr);
+	mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
+
+	dberr_t		err = DB_SUCCESS;
+
+	/* Currently this function is being used by temp-tables only.
+	Import/Discard of temp-table is blocked and so this assert. */
+	ut_ad(index->table->ibd_file_missing == 0
+	      && !dict_table_is_discarded(index->table));
+
+	page_no = btr_create(
+		index->type, index->space,
+		dict_table_page_size(index->table),
+		index->id, index, NULL, &mtr);
+
+	index->page = page_no;
+	index->trx_id = trx->id;
+
+	if (page_no == FIL_NULL) {
+		err = DB_OUT_OF_FILE_SPACE;
+	}
+
+	mtr_commit(&mtr);
+
+	return(err);
+}
+
+/** Drop the index tree associated with a row in SYS_INDEXES table.
+@param[in,out]	rec	SYS_INDEXES record
+@param[in,out]	pcur	persistent cursor on rec
+@param[in,out]	mtr	mini-transaction
+@return	whether freeing the B-tree was attempted */
+bool
+dict_drop_index_tree(
+	rec_t*		rec,
+	btr_pcur_t*	pcur,
+	mtr_t*		mtr)
 {
-	ulint		root_page_no;
-	ulint		space;
-	ulint		zip_size;
 	const byte*	ptr;
 	ulint		len;
+	ulint		space;
+	ulint		root_page_no;
 
-	ut_ad(mutex_own(&(dict_sys->mutex)));
+	ut_ad(mutex_own(&dict_sys->mutex));
 	ut_a(!dict_table_is_comp(dict_sys->sys_indexes));
-	ptr = rec_get_nth_field_old(
-		rec, DICT_FLD__SYS_INDEXES__PAGE_NO, &len);
+
+	ptr = rec_get_nth_field_old(rec, DICT_FLD__SYS_INDEXES__PAGE_NO, &len);
 
 	ut_ad(len == 4);
 
+	btr_pcur_store_position(pcur, mtr);
+
 	root_page_no = mtr_read_ulint(ptr, MLOG_4BYTES, mtr);
 
 	if (root_page_no == FIL_NULL) {
 		/* The tree has already been freed */
 
-		return;
+		return(false);
 	}
 
+	mlog_write_ulint(const_cast<byte*>(ptr), FIL_NULL, MLOG_4BYTES, mtr);
+
 	ptr = rec_get_nth_field_old(
 		rec, DICT_FLD__SYS_INDEXES__SPACE, &len);
 
 	ut_ad(len == 4);
 
 	space = mtr_read_ulint(ptr, MLOG_4BYTES, mtr);
-	zip_size = fil_space_get_zip_size(space);
 
-	if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
+	ptr = rec_get_nth_field_old(
+		rec, DICT_FLD__SYS_INDEXES__ID, &len);
+
+	ut_ad(len == 8);
+
+	bool			found;
+	const page_size_t	page_size(fil_space_get_page_size(space,
+								  &found));
+
+	if (!found) {
 		/* It is a single table tablespace and the .ibd file is
 		missing: do nothing */
 
-		return;
+		return(false);
 	}
 
-	/* We free all the pages but the root page first; this operation
-	may span several mini-transactions */
+	/* If tablespace is scheduled for truncate, do not try to drop
+	the indexes in that tablespace. There is a truncate fixup action
+	which will take care of it. */
+	if (srv_is_tablespace_truncated(space)) {
+		return(false);
+	}
 
-	btr_free_but_not_root(space, zip_size, root_page_no);
+	btr_free_if_exists(page_id_t(space, root_page_no), page_size,
+			   mach_read_from_8(ptr), mtr);
 
-	/* Then we free the root page in the same mini-transaction where
-	we write FIL_NULL to the appropriate field in the SYS_INDEXES
-	record: this mini-transaction marks the B-tree totally freed */
-
-	/* printf("Dropping index tree in space %lu root page %lu\n", space,
-	root_page_no); */
-	btr_free_root(space, zip_size, root_page_no, mtr);
-
-	page_rec_write_field(rec, DICT_FLD__SYS_INDEXES__PAGE_NO,
-			     FIL_NULL, mtr);
+	return(true);
 }
 
 /*******************************************************************//**
-Truncates the index tree associated with a row in SYS_INDEXES table.
+Drops the index tree but don't update SYS_INDEXES table. */
+void
+dict_drop_index_tree_in_mem(
+/*========================*/
+	const dict_index_t*	index,		/*!< in: index */
+	ulint			page_no)	/*!< in: index page-no */
+{
+	ut_ad(mutex_own(&dict_sys->mutex)
+	      || dict_table_is_intrinsic(index->table));
+	ut_ad(dict_table_is_temporary(index->table));
+
+	ulint			root_page_no = page_no;
+	ulint			space = index->space;
+	bool			found;
+	const page_size_t	page_size(fil_space_get_page_size(space,
+								  &found));
+
+	/* If tree has already been freed or it is a single table
+	tablespace and the .ibd file is missing do nothing,
+	else free the all the pages */
+	if (root_page_no != FIL_NULL && found) {
+		btr_free(page_id_t(space, root_page_no), page_size);
+	}
+}
+
+/*******************************************************************//**
+Recreate the index tree associated with a row in SYS_INDEXES table.
 @return	new root page number, or FIL_NULL on failure */
-UNIV_INTERN
 ulint
-dict_truncate_index_tree(
+dict_recreate_index_tree(
 /*=====================*/
-	dict_table_t*	table,	/*!< in: the table the index belongs to */
-	ulint		space,	/*!< in: 0=truncate,
-				nonzero=create the index tree in the
-				given tablespace */
+	const dict_table_t*
+			table,	/*!< in/out: the table the index belongs to */
 	btr_pcur_t*	pcur,	/*!< in/out: persistent cursor pointing to
 				record in the clustered index of
 				SYS_INDEXES table. The cursor may be
 				repositioned in this call. */
-	mtr_t*		mtr)	/*!< in: mtr having the latch
-				on the record page. The mtr may be
-				committed and restarted in this call. */
+	mtr_t*		mtr)	/*!< in/out: mtr having the latch
+				on the record page. */
 {
-	ulint		root_page_no;
-	ibool		drop = !space;
-	ulint		zip_size;
-	ulint		type;
-	index_id_t	index_id;
-	rec_t*		rec;
-	const byte*	ptr;
-	ulint		len;
-	dict_index_t*	index;
-	bool		has_been_dropped = false;
-
-	ut_ad(mutex_own(&(dict_sys->mutex)));
+	ut_ad(mutex_own(&dict_sys->mutex));
 	ut_a(!dict_table_is_comp(dict_sys->sys_indexes));
-	rec = btr_pcur_get_rec(pcur);
-	ptr = rec_get_nth_field_old(
+
+	ulint		len;
+	rec_t*		rec = btr_pcur_get_rec(pcur);
+
+	const byte*	ptr = rec_get_nth_field_old(
 		rec, DICT_FLD__SYS_INDEXES__PAGE_NO, &len);
 
 	ut_ad(len == 4);
 
-	root_page_no = mtr_read_ulint(ptr, MLOG_4BYTES, mtr);
-
-	if (drop && root_page_no == FIL_NULL) {
-		has_been_dropped = true;
-		drop = FALSE;
-	}
-
-	ptr = rec_get_nth_field_old(
-		rec, DICT_FLD__SYS_INDEXES__SPACE, &len);
+	ulint	root_page_no = mtr_read_ulint(ptr, MLOG_4BYTES, mtr);
 
+	ptr = rec_get_nth_field_old(rec, DICT_FLD__SYS_INDEXES__SPACE, &len);
 	ut_ad(len == 4);
 
-	if (drop) {
-		space = mtr_read_ulint(ptr, MLOG_4BYTES, mtr);
-	}
+	ut_a(table->space == mtr_read_ulint(ptr, MLOG_4BYTES, mtr));
 
-	zip_size = fil_space_get_zip_size(space);
+	ulint			space = table->space;
+	bool			found;
+	const page_size_t	page_size(fil_space_get_page_size(space,
+								  &found));
 
-	if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
-		/* It is a single table tablespace and the .ibd file is
-		missing: do nothing */
+	if (!found) {
+		/* It is a single table tablespae and the .ibd file is
+		missing: do nothing. */
+
+		ib::warn()
+			<< "Trying to TRUNCATE a missing .ibd file of table "
+			<< table->name << "!";
 
-		ut_print_timestamp(stderr);
-		fprintf(stderr, "  InnoDB: Trying to TRUNCATE"
-			" a missing .ibd file of table %s!\n", table->name);
 		return(FIL_NULL);
 	}
 
-	ptr = rec_get_nth_field_old(
-		rec, DICT_FLD__SYS_INDEXES__TYPE, &len);
+	ptr = rec_get_nth_field_old(rec, DICT_FLD__SYS_INDEXES__TYPE, &len);
 	ut_ad(len == 4);
-	type = mach_read_from_4(ptr);
+	ulint	type = mach_read_from_4(ptr);
 
 	ptr = rec_get_nth_field_old(rec, DICT_FLD__SYS_INDEXES__ID, &len);
 	ut_ad(len == 8);
-	index_id = mach_read_from_8(ptr);
-
-	if (!drop) {
-
-		goto create;
-	}
-
-	/* We free all the pages but the root page first; this operation
-	may span several mini-transactions */
-
-	btr_free_but_not_root(space, zip_size, root_page_no);
-
-	/* Then we free the root page in the same mini-transaction where
-	we create the b-tree and write its new root page number to the
-	appropriate field in the SYS_INDEXES record: this mini-transaction
-	marks the B-tree totally truncated */
-
-	btr_block_get(space, zip_size, root_page_no, RW_X_LATCH, NULL, mtr);
-
-	btr_free_root(space, zip_size, root_page_no, mtr);
-create:
-	/* We will temporarily write FIL_NULL to the PAGE_NO field
-	in SYS_INDEXES, so that the database will not get into an
-	inconsistent state in case it crashes between the mtr_commit()
-	below and the following mtr_commit() call. */
-	page_rec_write_field(rec, DICT_FLD__SYS_INDEXES__PAGE_NO,
-			     FIL_NULL, mtr);
+	index_id_t	index_id = mach_read_from_8(ptr);
 
 	/* We will need to commit the mini-transaction in order to avoid
 	deadlocks in the btr_create() call, because otherwise we would
@@ -890,53 +1257,109 @@ create:
 	mtr_commit(mtr);
 
 	mtr_start(mtr);
+	mtr->set_named_space(space);
 	btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, mtr);
 
 	/* Find the index corresponding to this SYS_INDEXES record. */
-	for (index = UT_LIST_GET_FIRST(table->indexes);
-	     index;
+	for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
+	     index != NULL;
 	     index = UT_LIST_GET_NEXT(indexes, index)) {
 		if (index->id == index_id) {
 			if (index->type & DICT_FTS) {
 				return(FIL_NULL);
 			} else {
-				if (has_been_dropped) {
-					fprintf(stderr,	"  InnoDB: Trying to"
-						" TRUNCATE a missing index of"
-						" table %s!\n",
-						index->table->name);
-				}
-
-				root_page_no = btr_create(type, space, zip_size,
-							  index_id, index, mtr);
+				root_page_no = btr_create(
+					type, space, page_size, index_id,
+					index, NULL, mtr);
 				index->page = (unsigned int) root_page_no;
 				return(root_page_no);
 			}
 		}
 	}
 
-	ut_print_timestamp(stderr);
-	fprintf(stderr,
-		"  InnoDB: Index %llu of table %s is missing\n"
-		"InnoDB: from the data dictionary during TRUNCATE!\n",
-		(ullint) index_id,
-		table->name);
+	ib::error() << "Failed to create index with index id " << index_id
+		<< " of table " << table->name;
 
 	return(FIL_NULL);
 }
 
+/*******************************************************************//**
+Truncates the index tree but don't update SYSTEM TABLES.
+@return DB_SUCCESS or error */
+dberr_t
+dict_truncate_index_tree_in_mem(
+/*============================*/
+	dict_index_t*	index)		/*!< in/out: index */
+{
+	mtr_t		mtr;
+	bool		truncate;
+	ulint		space = index->space;
+
+	ut_ad(mutex_own(&dict_sys->mutex)
+	      || dict_table_is_intrinsic(index->table));
+	ut_ad(dict_table_is_temporary(index->table));
+
+	ulint		type = index->type;
+	ulint		root_page_no = index->page;
+
+	if (root_page_no == FIL_NULL) {
+
+		/* The tree has been freed. */
+		ib::warn() << "Trying to TRUNCATE a missing index of table "
+			<< index->table->name << "!";
+
+		truncate = false;
+	} else {
+		truncate = true;
+	}
+
+	bool			found;
+	const page_size_t	page_size(fil_space_get_page_size(space,
+								  &found));
+
+	if (!found) {
+
+		/* It is a single table tablespace and the .ibd file is
+		missing: do nothing */
+
+		ib::warn()
+			<< "Trying to TRUNCATE a missing .ibd file of table "
+			<< index->table->name << "!";
+	}
+
+	/* If table to truncate resides in its on own tablespace that will
+	be re-created on truncate then we can ignore freeing of existing
+	tablespace objects. */
+
+	if (truncate) {
+		btr_free(page_id_t(space, root_page_no), page_size);
+	}
+
+	mtr_start(&mtr);
+	mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
+
+	root_page_no = btr_create(
+		type, space, page_size, index->id, index, NULL, &mtr);
+
+	DBUG_EXECUTE_IF("ib_err_trunc_temp_recreate_index",
+			root_page_no = FIL_NULL;);
+
+	index->page = root_page_no;
+
+	mtr_commit(&mtr);
+
+	return(index->page == FIL_NULL ? DB_ERROR : DB_SUCCESS);
+}
+
 /*********************************************************************//**
 Creates a table create graph.
-@return	own: table create node */
-UNIV_INTERN
+@return own: table create node */
 tab_node_t*
 tab_create_graph_create(
 /*====================*/
 	dict_table_t*	table,	/*!< in: table to create, built as a memory data
 				structure */
 	mem_heap_t*	heap,	/*!< in: heap where created */
-	bool		commit,	/*!< in: true if the commit node should be
-				added to the query graph */
 	fil_encryption_t mode,	/*!< in: encryption mode */
 	ulint		key_id)	/*!< in: encryption key_id */
 {
@@ -962,28 +1385,24 @@ tab_create_graph_create(
 					heap);
 	node->col_def->common.parent = node;
 
-	if (commit) {
-		node->commit_node = trx_commit_node_create(heap);
-		node->commit_node->common.parent = node;
-	} else {
-		node->commit_node = 0;
-	}
+	node->v_col_def = ins_node_create(INS_DIRECT, dict_sys->sys_virtual,
+                                          heap);
+	node->v_col_def->common.parent = node;
 
 	return(node);
 }
 
-/*********************************************************************//**
-Creates an index create graph.
-@return	own: index create node */
-UNIV_INTERN
+/** Creates an index create graph.
+@param[in]	index	index to create, built as a memory data structure
+@param[in,out]	heap	heap where created
+@param[in]	add_v	new virtual columns added in the same clause with
+			add index
+@return own: index create node */
 ind_node_t*
 ind_create_graph_create(
-/*====================*/
-	dict_index_t*	index,	/*!< in: index to create, built as a memory data
-				structure */
-	mem_heap_t*	heap,	/*!< in: heap where created */
-	bool		commit)	/*!< in: true if the commit node should be
-				added to the query graph */
+	dict_index_t*		index,
+	mem_heap_t*		heap,
+	const dict_add_v_col_t*	add_v)
 {
 	ind_node_t*	node;
 
@@ -994,6 +1413,8 @@ ind_create_graph_create(
 
 	node->index = index;
 
+	node->add_v = add_v;
+
 	node->state = INDEX_BUILD_INDEX_DEF;
 	node->page_no = FIL_NULL;
 	node->heap = mem_heap_create(256);
@@ -1006,20 +1427,12 @@ ind_create_graph_create(
 					  dict_sys->sys_fields, heap);
 	node->field_def->common.parent = node;
 
-	if (commit) {
-		node->commit_node = trx_commit_node_create(heap);
-		node->commit_node->common.parent = node;
-	} else {
-		node->commit_node = 0;
-	}
-
 	return(node);
 }
 
 /***********************************************************//**
 Creates a table. This is a high-level function used in SQL execution graphs.
-@return	query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
 que_thr_t*
 dict_create_table_step(
 /*===================*/
@@ -1030,7 +1443,7 @@ dict_create_table_step(
 	trx_t*		trx;
 
 	ut_ad(thr);
-	ut_ad(mutex_own(&(dict_sys->mutex)));
+	ut_ad(mutex_own(&dict_sys->mutex));
 
 	trx = thr_get_trx(thr);
 
@@ -1062,7 +1475,8 @@ dict_create_table_step(
 
 	if (node->state == TABLE_BUILD_COL_DEF) {
 
-		if (node->col_no < (node->table)->n_def) {
+		if (node->col_no < (static_cast<ulint>(node->table->n_def)
+				    + static_cast<ulint>(node->table->n_v_def))) {
 
 			dict_build_col_def_step(node);
 
@@ -1072,24 +1486,57 @@ dict_create_table_step(
 
 			return(thr);
 		} else {
-			node->state = TABLE_COMMIT_WORK;
+			/* Move on to SYS_VIRTUAL table */
+			node->col_no = 0;
+                        node->base_col_no = 0;
+                        node->state = TABLE_BUILD_V_COL_DEF;
 		}
 	}
 
-	if (node->state == TABLE_COMMIT_WORK) {
+	if (node->state == TABLE_BUILD_V_COL_DEF) {
 
-		/* Table was correctly defined: do NOT commit the transaction
-		(CREATE TABLE does NOT do an implicit commit of the current
-		transaction) */
+		if (node->col_no < static_cast<ulint>(node->table->n_v_def)) {
+			dict_v_col_t*   v_col = dict_table_get_nth_v_col(
+						node->table, node->col_no);
 
-		node->state = TABLE_ADD_TO_CACHE;
+			/* If no base column */
+			while (v_col->num_base == 0) {
+				node->col_no++;
+				if (node->col_no == static_cast<ulint>(
+					(node->table)->n_v_def)) {
+					node->state = TABLE_ADD_TO_CACHE;
+					break;
+				}
 
-		/* thr->run_node = node->commit_node;
+				v_col = dict_table_get_nth_v_col(
+					node->table, node->col_no);
+				node->base_col_no = 0;
+			}
 
-		return(thr); */
+			if (node->state != TABLE_ADD_TO_CACHE) {
+				ut_ad(node->col_no == v_col->v_pos);
+				dict_build_v_col_def_step(node);
+
+				if (node->base_col_no < v_col->num_base - 1) {
+					/* move on to next base column */
+					node->base_col_no++;
+				} else {
+					/* move on to next virtual column */
+					node->col_no++;
+					node->base_col_no = 0;
+				}
+
+				thr->run_node = node->v_col_def;
+
+				return(thr);
+			}
+		} else {
+			node->state = TABLE_ADD_TO_CACHE;
+		}
 	}
 
 	if (node->state == TABLE_ADD_TO_CACHE) {
+		DBUG_EXECUTE_IF("ib_ddl_crash_during_create", DBUG_SUICIDE(););
 
 		dict_table_add_to_cache(node->table, TRUE, node->heap);
 
@@ -1119,8 +1566,7 @@ function_exit:
 /***********************************************************//**
 Creates an index. This is a high-level function used in SQL execution
 graphs.
-@return	query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
 que_thr_t*
 dict_create_index_step(
 /*===================*/
@@ -1131,7 +1577,7 @@ dict_create_index_step(
 	trx_t*		trx;
 
 	ut_ad(thr);
-	ut_ad(mutex_own(&(dict_sys->mutex)));
+	ut_ad(mutex_own(&dict_sys->mutex));
 
 	trx = thr_get_trx(thr);
 
@@ -1180,11 +1626,9 @@ dict_create_index_step(
 
 		index_id_t	index_id = node->index->id;
 
-		err = dict_index_add_to_cache(
-			node->table, node->index, FIL_NULL,
-			trx_is_strict(trx)
-			|| dict_table_get_format(node->table)
-			>= UNIV_FORMAT_B);
+		err = dict_index_add_to_cache_w_vcol(
+			node->table, node->index, node->add_v, FIL_NULL,
+			trx_is_strict(trx));
 
 		node->index = dict_index_get_if_in_cache_low(index_id);
 		ut_a((node->index == NULL) == (err != DB_SUCCESS));
@@ -1244,20 +1688,6 @@ dict_create_index_step(
 		dict_index_add_to_cache(). */
 		ut_ad(node->index->trx_id == trx->id);
 		ut_ad(node->index->table->def_trx_id == trx->id);
-		node->state = INDEX_COMMIT_WORK;
-	}
-
-	if (node->state == INDEX_COMMIT_WORK) {
-
-		/* Index was correctly defined: do NOT commit the transaction
-		(CREATE INDEX does NOT currently do an implicit commit of
-		the current transaction) */
-
-		node->state = INDEX_CREATE_INDEX_TREE;
-
-		/* thr->run_node = node->commit_node;
-
-		return(thr); */
 	}
 
 function_exit:
@@ -1316,7 +1746,7 @@ dict_check_if_system_table_exists(
 		/* This table has already been created, and it is OK.
 		Ensure that it can't be evicted from the table LRU cache. */
 
-		dict_table_move_from_lru_to_non_lru(sys_table);
+		dict_table_prevent_eviction(sys_table);
 	}
 
 	mutex_exit(&dict_sys->mutex);
@@ -1328,8 +1758,7 @@ dict_check_if_system_table_exists(
 Creates the foreign key constraints system tables inside InnoDB
 at server bootstrap or server start if they are not found or are
 not of the right form.
-@return	DB_SUCCESS or error code */
-UNIV_INTERN
+@return DB_SUCCESS or error code */
 dberr_t
 dict_create_or_check_foreign_constraint_tables(void)
 /*================================================*/
@@ -1366,22 +1795,19 @@ dict_create_or_check_foreign_constraint_tables(void)
 	/* Check which incomplete table definition to drop. */
 
 	if (sys_foreign_err == DB_CORRUPTION) {
-		ib_logf(IB_LOG_LEVEL_WARN,
-			"Dropping incompletely created "
-			"SYS_FOREIGN table.");
+		ib::warn() << "Dropping incompletely created"
+			" SYS_FOREIGN table.";
 		row_drop_table_for_mysql("SYS_FOREIGN", trx, TRUE, TRUE);
 	}
 
 	if (sys_foreign_cols_err == DB_CORRUPTION) {
-		ib_logf(IB_LOG_LEVEL_WARN,
-			"Dropping incompletely created "
-			"SYS_FOREIGN_COLS table.");
+		ib::warn() << "Dropping incompletely created"
+			" SYS_FOREIGN_COLS table.";
 
 		row_drop_table_for_mysql("SYS_FOREIGN_COLS", trx, TRUE, TRUE);
 	}
 
-	ib_logf(IB_LOG_LEVEL_WARN,
-		"Creating foreign key constraint system tables.");
+	ib::warn() << "Creating foreign key constraint system tables.";
 
 	/* NOTE: in dict_load_foreigns we use the fact that
 	there are 2 secondary indexes on SYS_FOREIGN, and they
@@ -1422,11 +1848,10 @@ dict_create_or_check_foreign_constraint_tables(void)
 		FALSE, trx);
 
 	if (err != DB_SUCCESS) {
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Creation of SYS_FOREIGN and SYS_FOREIGN_COLS "
-			"has failed with error %lu.  Tablespace is full. "
-			"Dropping incompletely created tables.",
-			(ulong) err);
+
+		ib::error() << "Creation of SYS_FOREIGN and SYS_FOREIGN_COLS"
+			" failed: " << ut_strerr(err) << ". Tablespace is"
+			" full. Dropping incompletely created tables.";
 
 		ut_ad(err == DB_OUT_OF_FILE_SPACE
 		      || err == DB_TOO_MANY_CONCURRENT_TRXS);
@@ -1448,8 +1873,7 @@ dict_create_or_check_foreign_constraint_tables(void)
 	srv_file_per_table = srv_file_per_table_backup;
 
 	if (err == DB_SUCCESS) {
-		ib_logf(IB_LOG_LEVEL_INFO,
-			"Foreign key constraint system tables created");
+		ib::info() << "Foreign key constraint system tables created";
 	}
 
 	/* Note: The master thread has not been started at this point. */
@@ -1465,9 +1889,118 @@ dict_create_or_check_foreign_constraint_tables(void)
 	return(err);
 }
 
+/** Creates the virtual column system table (SYS_VIRTUAL) inside InnoDB
+at server bootstrap or server start if the table is not found or is
+not of the right form.
+@return DB_SUCCESS or error code */
+dberr_t
+dict_create_or_check_sys_virtual()
+{
+	trx_t*		trx;
+	my_bool		srv_file_per_table_backup;
+	dberr_t		err;
+
+	ut_a(srv_get_active_thread_type() == SRV_NONE);
+
+	/* Note: The master thread has not been started at this point. */
+	err = dict_check_if_system_table_exists(
+		"SYS_VIRTUAL", DICT_NUM_FIELDS__SYS_VIRTUAL + 1, 1);
+
+	if (err == DB_SUCCESS) {
+		mutex_enter(&dict_sys->mutex);
+		dict_sys->sys_virtual = dict_table_get_low("SYS_VIRTUAL");
+		mutex_exit(&dict_sys->mutex);
+		return(DB_SUCCESS);
+	}
+
+	if (srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO
+	    || srv_read_only_mode) {
+		ib::error() << "Cannot create sys_virtual system tables;"
+			" running in read-only mode.";
+		return(DB_ERROR);
+	}
+
+	trx = trx_allocate_for_mysql();
+
+	trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
+
+	trx->op_info = "creating sys_virtual tables";
+
+	row_mysql_lock_data_dictionary(trx);
+
+	/* Check which incomplete table definition to drop. */
+
+	if (err == DB_CORRUPTION) {
+		ib::warn() << "Dropping incompletely created"
+			" SYS_VIRTUAL table.";
+		row_drop_table_for_mysql("SYS_VIRTUAL", trx, false, TRUE);
+	}
+
+	ib::info() << "Creating sys_virtual system tables.";
+
+	srv_file_per_table_backup = srv_file_per_table;
+
+	/* We always want SYSTEM tables to be created inside the system
+	tablespace. */
+
+	srv_file_per_table = 0;
+
+	err = que_eval_sql(
+		NULL,
+		"PROCEDURE CREATE_SYS_VIRTUAL_TABLES_PROC () IS\n"
+		"BEGIN\n"
+		"CREATE TABLE\n"
+		"SYS_VIRTUAL(TABLE_ID BIGINT, POS INT,"
+		" BASE_POS INT);\n"
+		"CREATE UNIQUE CLUSTERED INDEX BASE_IDX"
+		" ON SYS_VIRTUAL(TABLE_ID, POS, BASE_POS);\n"
+		"END;\n",
+		FALSE, trx);
+
+	if (err != DB_SUCCESS) {
+
+		ib::error() << "Creation of SYS_VIRTUAL"
+			" failed: " << ut_strerr(err) << ". Tablespace is"
+			" full or too many transactions."
+			" Dropping incompletely created tables.";
+
+		ut_ad(err == DB_OUT_OF_FILE_SPACE
+		      || err == DB_TOO_MANY_CONCURRENT_TRXS);
+
+		row_drop_table_for_mysql("SYS_VIRTUAL", trx, false, TRUE);
+
+		if (err == DB_OUT_OF_FILE_SPACE) {
+			err = DB_MUST_GET_MORE_FILE_SPACE;
+		}
+	}
+
+	trx_commit_for_mysql(trx);
+
+	row_mysql_unlock_data_dictionary(trx);
+
+	trx_free_for_mysql(trx);
+
+	srv_file_per_table = srv_file_per_table_backup;
+
+	if (err == DB_SUCCESS) {
+		ib::info() << "sys_virtual table created";
+	}
+
+	/* Note: The master thread has not been started at this point. */
+	/* Confirm and move to the non-LRU part of the table LRU list. */
+	dberr_t sys_virtual_err = dict_check_if_system_table_exists(
+		"SYS_VIRTUAL", DICT_NUM_FIELDS__SYS_VIRTUAL + 1, 1);
+	ut_a(sys_virtual_err == DB_SUCCESS);
+	mutex_enter(&dict_sys->mutex);
+	dict_sys->sys_virtual = dict_table_get_low("SYS_VIRTUAL");
+	mutex_exit(&dict_sys->mutex);
+
+	return(err);
+}
+
 /****************************************************************//**
 Evaluate the given foreign key SQL statement.
-@return	error code or DB_SUCCESS */
+@return error code or DB_SUCCESS */
 static MY_ATTRIBUTE((nonnull, warn_unused_result))
 dberr_t
 dict_foreign_eval_sql(
@@ -1489,9 +2022,9 @@ dict_foreign_eval_sql(
 		ut_print_timestamp(ef);
 		fputs(" Error in foreign key constraint creation for table ",
 		      ef);
-		ut_print_name(ef, trx, TRUE, name);
+		ut_print_name(ef, trx, name);
 		fputs(".\nA foreign key constraint of name ", ef);
-		ut_print_name(ef, trx, TRUE, id);
+		ut_print_name(ef, trx, id);
 		fputs("\nalready exists."
 		      " (Note that internally InnoDB adds 'databasename'\n"
 		      "in front of the user-defined constraint name.)\n"
@@ -1510,15 +2043,14 @@ dict_foreign_eval_sql(
 	}
 
 	if (error != DB_SUCCESS) {
-		fprintf(stderr,
-			"InnoDB: Foreign key constraint creation failed:\n"
-			"InnoDB: internal error number %lu\n", (ulong) error);
+		ib::error() << "Foreign key constraint creation failed: "
+			<< ut_strerr(error);
 
 		mutex_enter(&dict_foreign_err_mutex);
 		ut_print_timestamp(ef);
 		fputs(" Internal error in foreign key constraint creation"
 		      " for table ", ef);
-		ut_print_name(ef, trx, TRUE, name);
+		ut_print_name(ef, trx, name);
 		fputs(".\n"
 		      "See the MySQL .err log in the datadir"
 		      " for more information.\n", ef);
@@ -1533,7 +2065,7 @@ dict_foreign_eval_sql(
 /********************************************************************//**
 Add a single foreign key field definition to the data dictionary tables in
 the database.
-@return	error code or DB_SUCCESS */
+@return error code or DB_SUCCESS */
 static MY_ATTRIBUTE((nonnull, warn_unused_result))
 dberr_t
 dict_create_add_foreign_field_to_dictionary(
@@ -1543,6 +2075,8 @@ dict_create_add_foreign_field_to_dictionary(
 	const dict_foreign_t*	foreign,	/*!< in: foreign */
 	trx_t*			trx)		/*!< in/out: transaction */
 {
+	DBUG_ENTER("dict_create_add_foreign_field_to_dictionary");
+
 	pars_info_t*	info = pars_info_create();
 
 	pars_info_add_str_literal(info, "id", foreign->id);
@@ -1555,7 +2089,7 @@ dict_create_add_foreign_field_to_dictionary(
 	pars_info_add_str_literal(info, "ref_col_name",
 				  foreign->referenced_col_names[field_nr]);
 
-	return(dict_foreign_eval_sql(
+	DBUG_RETURN(dict_foreign_eval_sql(
 		       info,
 		       "PROCEDURE P () IS\n"
 		       "BEGIN\n"
@@ -1583,7 +2117,7 @@ dict_foreign_def_get(
 
 	tbname = dict_remove_db_name(foreign->id);
 	bufend = innobase_convert_name(tablebuf, MAX_TABLE_NAME_LEN,
-				tbname, strlen(tbname), trx->mysql_thd, FALSE);
+				tbname, strlen(tbname), trx->mysql_thd);
 	tablebuf[bufend - tablebuf] = '\0';
 
 	sprintf(fk_def,
@@ -1594,7 +2128,7 @@ dict_foreign_def_get(
 		innobase_convert_name(buf, MAX_TABLE_NAME_LEN,
 				foreign->foreign_col_names[i],
 				strlen(foreign->foreign_col_names[i]),
-				trx->mysql_thd, FALSE);
+				trx->mysql_thd);
 		strcat(fk_def, buf);
 		if (i < foreign->n_fields-1) {
 			strcat(fk_def, (char *)",");
@@ -1606,7 +2140,7 @@ dict_foreign_def_get(
 	bufend = innobase_convert_name(tablebuf, MAX_TABLE_NAME_LEN,
 	        	        foreign->referenced_table_name,
 			        strlen(foreign->referenced_table_name),
-			        trx->mysql_thd, TRUE);
+			        trx->mysql_thd);
 	tablebuf[bufend - tablebuf] = '\0';
 
 	strcat(fk_def, tablebuf);
@@ -1617,7 +2151,7 @@ dict_foreign_def_get(
 		bufend = innobase_convert_name(buf, MAX_TABLE_NAME_LEN,
 				foreign->referenced_col_names[i],
 				strlen(foreign->referenced_col_names[i]),
-				trx->mysql_thd, FALSE);
+				trx->mysql_thd);
 		buf[bufend - buf] = '\0';
 		strcat(fk_def, buf);
 		if (i < foreign->n_fields-1) {
@@ -1649,14 +2183,14 @@ dict_foreign_def_get_fields(
 	bufend = innobase_convert_name(fieldbuf, MAX_TABLE_NAME_LEN,
 			foreign->foreign_col_names[col_no],
 			strlen(foreign->foreign_col_names[col_no]),
-			trx->mysql_thd, FALSE);
+			trx->mysql_thd);
 
 	fieldbuf[bufend - fieldbuf] = '\0';
 
 	bufend = innobase_convert_name(fieldbuf2, MAX_TABLE_NAME_LEN,
 			foreign->referenced_col_names[col_no],
 			strlen(foreign->referenced_col_names[col_no]),
-			trx->mysql_thd, FALSE);
+			trx->mysql_thd);
 
 	fieldbuf2[bufend - fieldbuf2] = '\0';
 	*field = fieldbuf;
@@ -1665,8 +2199,7 @@ dict_foreign_def_get_fields(
 
 /********************************************************************//**
 Add a foreign key definition to the data dictionary tables.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
 dberr_t
 dict_create_add_foreign_to_dictionary(
 /*==================================*/
@@ -1676,6 +2209,9 @@ dict_create_add_foreign_to_dictionary(
 	trx_t*			trx)	/*!< in/out: dictionary transaction */
 {
 	dberr_t		error;
+
+	DBUG_ENTER("dict_create_add_foreign_to_dictionary");
+
 	pars_info_t*	info = pars_info_create();
 
 	pars_info_add_str_literal(info, "id", foreign->id);
@@ -1688,6 +2224,11 @@ dict_create_add_foreign_to_dictionary(
 	pars_info_add_int4_literal(info, "n_cols",
 				   foreign->n_fields + (foreign->type << 24));
 
+	DBUG_PRINT("dict_create_add_foreign_to_dictionary",
+		   ("'%s', '%s', '%s', %d", foreign->id, name,
+		    foreign->referenced_table_name,
+		    foreign->n_fields + (foreign->type << 24)));
+
 	error = dict_foreign_eval_sql(info,
 				      "PROCEDURE P () IS\n"
 				      "BEGIN\n"
@@ -1704,11 +2245,11 @@ dict_create_add_foreign_to_dictionary(
 			char*	fk_def;
 
 			innobase_convert_name(tablename, MAX_TABLE_NAME_LEN,
-				table->name, strlen(table->name),
-				trx->mysql_thd, TRUE);
+				table->name.m_name, strlen(table->name.m_name),
+				trx->mysql_thd);
 
 			innobase_convert_name(buf, MAX_TABLE_NAME_LEN,
-				foreign->id, strlen(foreign->id), trx->mysql_thd, FALSE);
+				foreign->id, strlen(foreign->id), trx->mysql_thd);
 
 			fk_def = dict_foreign_def_get((dict_foreign_t*)foreign, trx);
 
@@ -1721,7 +2262,7 @@ dict_create_add_foreign_to_dictionary(
 				tablename, buf, fk_def);
 		}
 
-		return(error);
+		DBUG_RETURN(error);
 	}
 
 	for (ulint i = 0; i < foreign->n_fields; i++) {
@@ -1736,10 +2277,10 @@ dict_create_add_foreign_to_dictionary(
 			char*	fk_def;
 
 			innobase_convert_name(tablename, MAX_TABLE_NAME_LEN,
-				table->name, strlen(table->name),
-				trx->mysql_thd, TRUE);
+				table->name.m_name, strlen(table->name.m_name),
+				trx->mysql_thd);
 			innobase_convert_name(buf, MAX_TABLE_NAME_LEN,
-				foreign->id, strlen(foreign->id), trx->mysql_thd, FALSE);
+				foreign->id, strlen(foreign->id), trx->mysql_thd);
 			fk_def = dict_foreign_def_get((dict_foreign_t*)foreign, trx);
 			dict_foreign_def_get_fields((dict_foreign_t*)foreign, trx, &field, &field2, i);
 
@@ -1750,11 +2291,202 @@ dict_create_add_foreign_to_dictionary(
 				" Error in foreign key definition: %s.",
 				tablename, buf, i+1, fk_def);
 
-			return(error);
+			DBUG_RETURN(error);
 		}
 	}
 
-	return(error);
+	DBUG_RETURN(error);
+}
+
+/** Check whether a column is in an index by the column name
+@param[in]	col_name	column name for the column to be checked
+@param[in]	index		the index to be searched
+@return	true if this column is in the index, otherwise, false */
+static
+bool
+dict_index_has_col_by_name(
+/*=======================*/
+	const char*		col_name,
+	const dict_index_t*	index)
+{
+	for (ulint i = 0; i < index->n_fields; i++) {
+		dict_field_t*   field = dict_index_get_nth_field(index, i);
+
+		if (strcmp(field->name, col_name) == 0) {
+			return(true);
+		}
+	}
+	return(false);
+}
+
+/** Check whether the foreign constraint could be on a column that is
+part of a virtual index (index contains virtual column) in the table
+@param[in]	fk_col_name	FK column name to be checked
+@param[in]	table		the table
+@return	true if this column is indexed with other virtual columns */
+bool
+dict_foreign_has_col_in_v_index(
+	const char*		fk_col_name,
+	const dict_table_t*	table)
+{
+	/* virtual column can't be Primary Key, so start with secondary index */
+	for (dict_index_t* index = dict_table_get_next_index(
+		     dict_table_get_first_index(table));
+	     index;
+	     index = dict_table_get_next_index(index)) {
+
+		if (dict_index_has_virtual(index)) {
+			if (dict_index_has_col_by_name(fk_col_name, index)) {
+				return(true);
+			}
+		}
+	}
+
+	return(false);
+}
+
+
+/** Check whether the foreign constraint could be on a column that is
+a base column of some indexed virtual columns.
+@param[in]	col_name	column name for the column to be checked
+@param[in]	table		the table
+@return	true if this column is a base column, otherwise, false */
+bool
+dict_foreign_has_col_as_base_col(
+	const char*		col_name,
+	const dict_table_t*	table)
+{
+	/* Loop through each virtual column and check if its base column has
+	the same name as the column name being checked */
+	for (ulint i = 0; i < table->n_v_cols; i++) {
+		dict_v_col_t*	v_col = dict_table_get_nth_v_col(table, i);
+
+		/* Only check if the virtual column is indexed */
+		if (!v_col->m_col.ord_part) {
+			continue;
+		}
+
+		for (ulint j = 0; j < v_col->num_base; j++) {
+			if (strcmp(col_name, dict_table_get_col_name(
+					   table,
+					   v_col->base_col[j]->ind)) == 0) {
+				return(true);
+			}
+		}
+	}
+
+	return(false);
+}
+
+/** Check if a foreign constraint is on the given column name.
+@param[in]	col_name	column name to be searched for fk constraint
+@param[in]	table		table to which foreign key constraint belongs
+@return true if fk constraint is present on the table, false otherwise. */
+static
+bool
+dict_foreign_base_for_stored(
+	const char*		col_name,
+	const dict_table_t*	table)
+{
+	/* Loop through each stored column and check if its base column has
+	the same name as the column name being checked */
+	dict_s_col_list::const_iterator	it;
+	for (it = table->s_cols->begin();
+	     it != table->s_cols->end(); ++it) {
+		dict_s_col_t	s_col = *it;
+
+		for (ulint j = 0; j < s_col.num_base; j++) {
+			if (strcmp(col_name, dict_table_get_col_name(
+						table,
+						s_col.base_col[j]->ind)) == 0) {
+				return(true);
+			}
+		}
+	}
+
+	return(false);
+}
+
+/** Check if a foreign constraint is on columns served as base columns
+of any stored column. This is to prevent creating SET NULL or CASCADE
+constraint on such columns
+@param[in]	local_fk_set	set of foreign key objects, to be added to
+the dictionary tables
+@param[in]	table		table to which the foreign key objects in
+local_fk_set belong to
+@return true if yes, otherwise, false */
+bool
+dict_foreigns_has_s_base_col(
+	const dict_foreign_set&	local_fk_set,
+	const dict_table_t*	table)
+{
+	dict_foreign_t*	foreign;
+
+	if (table->s_cols == NULL) {
+		return (false);
+	}
+
+	for (dict_foreign_set::const_iterator it = local_fk_set.begin();
+	     it != local_fk_set.end(); ++it) {
+
+		foreign = *it;
+		ulint	type = foreign->type;
+
+		type &= ~(DICT_FOREIGN_ON_DELETE_NO_ACTION
+			  | DICT_FOREIGN_ON_UPDATE_NO_ACTION);
+
+		if (type == 0) {
+			continue;
+		}
+
+		for (ulint i = 0; i < foreign->n_fields; i++) {
+			/* Check if the constraint is on a column that
+			is a base column of any stored column */
+			if (dict_foreign_base_for_stored(
+				foreign->foreign_col_names[i], table)) {
+				return(true);
+			}
+		}
+	}
+
+	return(false);
+}
+
+/** Check if a column is in foreign constraint with CASCADE properties or
+SET NULL
+@param[in]	table		table
+@param[in]	fk_col_name	name for the column to be checked
+@return true if the column is in foreign constraint, otherwise, false */
+bool
+dict_foreigns_has_this_col(
+	const dict_table_t*	table,
+	const char*		col_name)
+{
+	dict_foreign_t*		foreign;
+	const dict_foreign_set*	local_fk_set = &table->foreign_set;
+
+	for (dict_foreign_set::const_iterator it = local_fk_set->begin();
+	     it != local_fk_set->end();
+	     ++it) {
+		foreign = *it;
+		ut_ad(foreign->id != NULL);
+		ulint	type = foreign->type;
+
+		type &= ~(DICT_FOREIGN_ON_DELETE_NO_ACTION
+			  | DICT_FOREIGN_ON_UPDATE_NO_ACTION);
+
+		if (type == 0) {
+			continue;
+		}
+
+		for (ulint i = 0; i < foreign->n_fields; i++) {
+			if (strcmp(foreign->foreign_col_names[i],
+				   col_name) == 0) {
+				return(true);
+			}
+		}
+	}
+	return(false);
 }
 
 /** Adds the given set of foreign key objects to the dictionary tables
@@ -1767,7 +2499,6 @@ the dictionary tables
 local_fk_set belong to
 @param[in,out]	trx		transaction
 @return error code or DB_SUCCESS */
-UNIV_INTERN
 dberr_t
 dict_create_add_foreigns_to_dictionary(
 /*===================================*/
@@ -1778,12 +2509,17 @@ dict_create_add_foreigns_to_dictionary(
 	dict_foreign_t*	foreign;
 	dberr_t		error;
 
-	ut_ad(mutex_own(&(dict_sys->mutex)));
+	ut_ad(mutex_own(&dict_sys->mutex)
+	      || dict_table_is_intrinsic(table));
+
+	if (dict_table_is_intrinsic(table)) {
+		goto exit_loop;
+	}
 
 	if (NULL == dict_table_get_low("SYS_FOREIGN")) {
-		fprintf(stderr,
-			"InnoDB: table SYS_FOREIGN not found"
-			" in internal data dictionary\n");
+
+		ib::error() << "Table SYS_FOREIGN not found"
+			" in internal data dictionary";
 
 		return(DB_ERROR);
 	}
@@ -1795,8 +2531,8 @@ dict_create_add_foreigns_to_dictionary(
 		foreign = *it;
 		ut_ad(foreign->id != NULL);
 
-		error = dict_create_add_foreign_to_dictionary((dict_table_t*)table, table->name,
-							      foreign, trx);
+		error = dict_create_add_foreign_to_dictionary(
+			(dict_table_t*)table, table->name.m_name, foreign, trx);
 
 		if (error != DB_SUCCESS) {
 
@@ -1804,9 +2540,13 @@ dict_create_add_foreigns_to_dictionary(
 		}
 	}
 
+exit_loop:
 	trx->op_info = "committing foreign key definitions";
 
-	trx_commit(trx);
+	if (trx_is_started(trx)) {
+
+		trx_commit(trx);
+	}
 
 	trx->op_info = "";
 
@@ -1817,8 +2557,7 @@ dict_create_add_foreigns_to_dictionary(
 Creates the tablespaces and datafiles system tables inside InnoDB
 at server bootstrap or server start if they are not found or are
 not of the right form.
-@return	DB_SUCCESS or error code */
-UNIV_INTERN
+@return DB_SUCCESS or error code */
 dberr_t
 dict_create_or_check_sys_tablespace(void)
 /*=====================================*/
@@ -1854,22 +2593,19 @@ dict_create_or_check_sys_tablespace(void)
 	/* Check which incomplete table definition to drop. */
 
 	if (sys_tablespaces_err == DB_CORRUPTION) {
-		ib_logf(IB_LOG_LEVEL_WARN,
-			"Dropping incompletely created "
-			"SYS_TABLESPACES table.");
+		ib::warn() << "Dropping incompletely created"
+			" SYS_TABLESPACES table.";
 		row_drop_table_for_mysql("SYS_TABLESPACES", trx, TRUE, TRUE);
 	}
 
 	if (sys_datafiles_err == DB_CORRUPTION) {
-		ib_logf(IB_LOG_LEVEL_WARN,
-			"Dropping incompletely created "
-			"SYS_DATAFILES table.");
+		ib::warn() << "Dropping incompletely created"
+			" SYS_DATAFILES table.";
 
 		row_drop_table_for_mysql("SYS_DATAFILES", trx, TRUE, TRUE);
 	}
 
-	ib_logf(IB_LOG_LEVEL_INFO,
-		"Creating tablespace and datafile system tables.");
+	ib::info() << "Creating tablespace and datafile system tables.";
 
 	/* We always want SYSTEM tables to be created inside the system
 	tablespace. */
@@ -1892,11 +2628,10 @@ dict_create_or_check_sys_tablespace(void)
 		FALSE, trx);
 
 	if (err != DB_SUCCESS) {
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Creation of SYS_TABLESPACES and SYS_DATAFILES "
-			"has failed with error %lu.  Tablespace is full. "
-			"Dropping incompletely created tables.",
-			(ulong) err);
+
+		ib::error() << "Creation of SYS_TABLESPACES and SYS_DATAFILES"
+			" has failed with error " << ut_strerr(err)
+			<< ". Dropping incompletely created tables.";
 
 		ut_a(err == DB_OUT_OF_FILE_SPACE
 		     || err == DB_TOO_MANY_CONCURRENT_TRXS);
@@ -1918,8 +2653,7 @@ dict_create_or_check_sys_tablespace(void)
 	srv_file_per_table = srv_file_per_table_backup;
 
 	if (err == DB_SUCCESS) {
-		ib_logf(IB_LOG_LEVEL_INFO,
-			"Tablespace and datafile system tables created.");
+		ib::info() << "Tablespace and datafile system tables created.";
 	}
 
 	/* Note: The master thread has not been started at this point. */
@@ -1936,29 +2670,34 @@ dict_create_or_check_sys_tablespace(void)
 	return(err);
 }
 
-/********************************************************************//**
-Add a single tablespace definition to the data dictionary tables in the
-database.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+/** Put a tablespace definition into the data dictionary,
+replacing what was there previously.
+@param[in]	space	Tablespace id
+@param[in]	name	Tablespace name
+@param[in]	flags	Tablespace flags
+@param[in]	path	Tablespace path
+@param[in]	trx	Transaction
+@param[in]	commit	If true, commit the transaction
+@return error code or DB_SUCCESS */
 dberr_t
-dict_create_add_tablespace_to_dictionary(
-/*=====================================*/
-	ulint		space,		/*!< in: tablespace id */
-	const char*	name,		/*!< in: tablespace name */
-	ulint		flags,		/*!< in: tablespace flags */
-	const char*	path,		/*!< in: tablespace path */
-	trx_t*		trx,		/*!< in/out: transaction */
-	bool		commit)		/*!< in: if true then commit the
-					transaction */
+dict_replace_tablespace_in_dictionary(
+	ulint		space_id,
+	const char*	name,
+	ulint		flags,
+	const char*	path,
+	trx_t*		trx,
+	bool		commit)
 {
+	if (!srv_sys_tablespaces_open) {
+		/* Startup procedure is not yet ready for updates. */
+		return(DB_SUCCESS);
+	}
+
 	dberr_t		error;
 
 	pars_info_t*	info = pars_info_create();
 
-	ut_a(space > TRX_SYS_SPACE);
-
-	pars_info_add_int4_literal(info, "space", space);
+	pars_info_add_int4_literal(info, "space", space_id);
 
 	pars_info_add_str_literal(info, "name", name);
 
@@ -1968,11 +2707,27 @@ dict_create_add_tablespace_to_dictionary(
 
 	error = que_eval_sql(info,
 			     "PROCEDURE P () IS\n"
+			     "p CHAR;\n"
+
+			     "DECLARE CURSOR c IS\n"
+			     " SELECT PATH FROM SYS_DATAFILES\n"
+			     " WHERE SPACE=:space FOR UPDATE;\n"
+
 			     "BEGIN\n"
-			     "INSERT INTO SYS_TABLESPACES VALUES"
+			     "OPEN c;\n"
+			     "FETCH c INTO p;\n"
+
+			     "IF (SQL % NOTFOUND) THEN"
+			     "  DELETE FROM SYS_TABLESPACES "
+			     "WHERE SPACE=:space;\n"
+			     "  INSERT INTO SYS_TABLESPACES VALUES"
 			     "(:space, :name, :flags);\n"
-			     "INSERT INTO SYS_DATAFILES VALUES"
+			     "  INSERT INTO SYS_DATAFILES VALUES"
 			     "(:space, :path);\n"
+			     "ELSIF p <> :path THEN\n"
+			     "  UPDATE SYS_DATAFILES SET PATH=:path"
+			     " WHERE CURRENT OF c;\n"
+			     "END IF;\n"
 			     "END;\n",
 			     FALSE, trx);
 
@@ -1989,3 +2744,66 @@ dict_create_add_tablespace_to_dictionary(
 
 	return(error);
 }
+
+/** Delete records from SYS_TABLESPACES and SYS_DATAFILES associated
+with a particular tablespace ID.
+@param[in]	space	Tablespace ID
+@param[in,out]	trx	Current transaction
+@return DB_SUCCESS if OK, dberr_t if the operation failed */
+
+dberr_t
+dict_delete_tablespace_and_datafiles(
+	ulint		space,
+	trx_t*		trx)
+{
+	dberr_t		err = DB_SUCCESS;
+
+	ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
+	ut_ad(mutex_own(&dict_sys->mutex));
+	ut_ad(srv_sys_tablespaces_open);
+
+	trx->op_info = "delete tablespace and datafiles from dictionary";
+
+	pars_info_t*	info = pars_info_create();
+	ut_a(!is_system_tablespace(space));
+	pars_info_add_int4_literal(info, "space", space);
+
+	err = que_eval_sql(info,
+			   "PROCEDURE P () IS\n"
+			   "BEGIN\n"
+			   "DELETE FROM SYS_TABLESPACES\n"
+			   "WHERE SPACE = :space;\n"
+			   "DELETE FROM SYS_DATAFILES\n"
+			   "WHERE SPACE = :space;\n"
+			   "END;\n",
+			   FALSE, trx);
+
+	if (err != DB_SUCCESS) {
+		ib::warn() << "Could not delete space_id "
+			<< space << " from data dictionary";
+	}
+
+	trx->op_info = "";
+
+	return(err);
+}
+
+/** Assign a new table ID and put it into the table cache and the transaction.
+@param[in,out]	table	Table that needs an ID
+@param[in,out]	trx	Transaction */
+void
+dict_table_assign_new_id(
+	dict_table_t*	table,
+	trx_t*		trx)
+{
+	if (dict_table_is_intrinsic(table)) {
+		/* There is no significance of this table->id (if table is
+		intrinsic) so assign it default instead of something meaningful
+		to avoid confusion.*/
+		table->id = ULINT_UNDEFINED;
+	} else {
+		dict_hdr_get_new_id(&table->id, NULL, NULL, table, false);
+	}
+
+	trx->table_id = table->id;
+}
diff --git a/storage/innobase/dict/dict0defrag_bg.cc b/storage/innobase/dict/dict0defrag_bg.cc
new file mode 100644
index 00000000000..82aa3abcde6
--- /dev/null
+++ b/storage/innobase/dict/dict0defrag_bg.cc
@@ -0,0 +1,403 @@
+/*****************************************************************************
+
+Copyright (c) 2016, MariaDB Corporation. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file dict/dict0defrag_bg.cc
+Defragmentation routines.
+
+Created 25/08/2016 Jan Lindström
+*******************************************************/
+
+#include "dict0dict.h"
+#include "dict0stats.h"
+#include "dict0stats_bg.h"
+#include "dict0defrag_bg.h"
+#include "row0mysql.h"
+#include "srv0start.h"
+#include "ut0new.h"
+
+#ifdef UNIV_NONINL
+# include "dict0stats_bg.ic"
+#endif
+
+#include <vector>
+
+static ib_mutex_t		defrag_pool_mutex;
+
+#ifdef MYSQL_PFS
+static mysql_pfs_key_t		defrag_pool_mutex_key;
+#endif
+
+/** The number of tables that can be added to "defrag_pool" before
+it is enlarged */
+static const ulint		DEFRAG_POOL_INITIAL_SLOTS = 128;
+
+/** Indices whose defrag stats need to be saved to persistent storage.*/
+struct defrag_pool_item_t {
+	table_id_t	table_id;
+	index_id_t	index_id;
+};
+
+/** Allocator type, used by std::vector */
+typedef ut_allocator<defrag_pool_item_t>
+	defrag_pool_allocator_t;
+
+/** The multitude of tables to be defragmented- an STL vector */
+typedef std::vector<defrag_pool_item_t, defrag_pool_allocator_t>
+	defrag_pool_t;
+
+/** Iterator type for iterating over the elements of objects of type
+defrag_pool_t. */
+typedef defrag_pool_t::iterator		defrag_pool_iterator_t;
+
+/** Pool where we store information on which tables are to be processed
+by background defragmentation. */
+static defrag_pool_t*			defrag_pool;
+
+extern bool dict_stats_start_shutdown;
+
+/*****************************************************************//**
+Initialize the defrag pool, called once during thread initialization. */
+void
+dict_defrag_pool_init(void)
+/*=======================*/
+{
+	ut_ad(!srv_read_only_mode);
+	/* JAN: TODO: MySQL 5.7 PSI
+	const PSI_memory_key	key2 = mem_key_dict_defrag_pool_t;
+
+	defrag_pool = UT_NEW(defrag_pool_t(defrag_pool_allocator_t(key2)), key2);
+
+	recalc_pool->reserve(RECALC_POOL_INITIAL_SLOTS);
+	*/
+	defrag_pool = new std::vector<defrag_pool_item_t, defrag_pool_allocator_t>();
+
+	/* We choose SYNC_STATS_DEFRAG to be below SYNC_FSP_PAGE. */
+	mutex_create(LATCH_ID_DEFRAGMENT_MUTEX, &defrag_pool_mutex);
+}
+
+/*****************************************************************//**
+Free the resources occupied by the defrag pool, called once during
+thread de-initialization. */
+void
+dict_defrag_pool_deinit(void)
+/*=========================*/
+{
+	ut_ad(!srv_read_only_mode);
+
+	defrag_pool->clear();
+	mutex_free(&defrag_pool_mutex);
+
+	UT_DELETE(defrag_pool);
+}
+
+/*****************************************************************//**
+Get an index from the auto defrag pool. The returned index id is removed
+from the pool.
+@return true if the pool was non-empty and "id" was set, false otherwise */
+static
+bool
+dict_stats_defrag_pool_get(
+/*=======================*/
+	table_id_t*	table_id,	/*!< out: table id, or unmodified if
+					list is empty */
+	index_id_t*	index_id)	/*!< out: index id, or unmodified if
+					list is empty */
+{
+	ut_ad(!srv_read_only_mode);
+
+	mutex_enter(&defrag_pool_mutex);
+
+	if (defrag_pool->empty()) {
+		mutex_exit(&defrag_pool_mutex);
+		return(false);
+	}
+
+	defrag_pool_item_t& item = defrag_pool->back();
+	*table_id = item.table_id;
+	*index_id = item.index_id;
+
+	defrag_pool->pop_back();
+
+	mutex_exit(&defrag_pool_mutex);
+
+	return(true);
+}
+
+/*****************************************************************//**
+Add an index in a table to the defrag pool, which is processed by the
+background stats gathering thread. Only the table id and index id are
+added to the list, so the table can be closed after being enqueued and
+it will be opened when needed. If the table or index does not exist later
+(has been DROPped), then it will be removed from the pool and skipped. */
+void
+dict_stats_defrag_pool_add(
+/*=======================*/
+	const dict_index_t*	index)	/*!< in: table to add */
+{
+	defrag_pool_item_t item;
+
+	ut_ad(!srv_read_only_mode);
+
+	mutex_enter(&defrag_pool_mutex);
+
+	/* quit if already in the list */
+	for (defrag_pool_iterator_t iter = defrag_pool->begin();
+	     iter != defrag_pool->end();
+	     ++iter) {
+		if ((*iter).table_id == index->table->id
+		    && (*iter).index_id == index->id) {
+			mutex_exit(&defrag_pool_mutex);
+			return;
+		}
+	}
+
+	item.table_id = index->table->id;
+	item.index_id = index->id;
+	defrag_pool->push_back(item);
+
+	mutex_exit(&defrag_pool_mutex);
+
+	os_event_set(dict_stats_event);
+}
+
+/*****************************************************************//**
+Delete a given index from the auto defrag pool. */
+void
+dict_stats_defrag_pool_del(
+/*=======================*/
+	const dict_table_t*	table,	/*!<in: if given, remove
+					all entries for the table */
+	const dict_index_t*	index)	/*!< in: if given, remove this index */
+{
+	ut_a((table && !index) || (!table && index));
+	ut_ad(!srv_read_only_mode);
+	ut_ad(mutex_own(&dict_sys->mutex));
+
+	mutex_enter(&defrag_pool_mutex);
+
+	defrag_pool_iterator_t iter = defrag_pool->begin();
+	while (iter != defrag_pool->end()) {
+		if ((table && (*iter).table_id == table->id)
+		    || (index
+			&& (*iter).table_id == index->table->id
+			&& (*iter).index_id == index->id)) {
+			/* erase() invalidates the iterator */
+			iter = defrag_pool->erase(iter);
+			if (index)
+				break;
+		} else {
+			iter++;
+		}
+	}
+
+	mutex_exit(&defrag_pool_mutex);
+}
+
+/*****************************************************************//**
+Get the first index that has been added for updating persistent defrag
+stats and eventually save its stats. */
+static
+void
+dict_stats_process_entry_from_defrag_pool()
+/*=======================================*/
+{
+	table_id_t	table_id;
+	index_id_t	index_id;
+	dberr_t		err = DB_SUCCESS;
+
+	ut_ad(!srv_read_only_mode);
+
+	/* pop the first index from the auto defrag pool */
+	if (!dict_stats_defrag_pool_get(&table_id, &index_id)) {
+		/* no index in defrag pool */
+		return;
+	}
+
+	dict_table_t*	table;
+
+	mutex_enter(&dict_sys->mutex);
+
+	/* If the table is no longer cached, we've already lost the in
+	memory stats so there's nothing really to write to disk. */
+	table = dict_table_open_on_id(table_id, TRUE,
+				      DICT_TABLE_OP_OPEN_ONLY_IF_CACHED);
+
+	if (table == NULL) {
+		mutex_exit(&dict_sys->mutex);
+		return;
+	}
+
+	/* Check whether table is corrupted */
+	if (table->corrupted) {
+		dict_table_close(table, TRUE, FALSE);
+		mutex_exit(&dict_sys->mutex);
+		return;
+	}
+	mutex_exit(&dict_sys->mutex);
+
+	dict_index_t*	index = dict_table_find_index_on_id(table, index_id);
+
+	if (index == NULL) {
+		return;
+	}
+
+	/* Check whether index is corrupted */
+	if (dict_index_is_corrupted(index)) {
+		dict_table_close(table, FALSE, FALSE);
+		return;
+	}
+
+	err = dict_stats_save_defrag_stats(index);
+
+	if (err != DB_SUCCESS) {
+		ib::error() << "Saving defragmentation status for table "
+			    << index->table->name.m_name
+			    << " index " << index->name()
+			    << " failed " << err;
+	}
+
+	dict_table_close(table, FALSE, FALSE);
+}
+
+/*****************************************************************//**
+Get the first index that has been added for updating persistent defrag
+stats and eventually save its stats. */
+void
+dict_defrag_process_entries_from_defrag_pool()
+/*==========================================*/
+{
+	while (defrag_pool->size() && !dict_stats_start_shutdown) {
+		dict_stats_process_entry_from_defrag_pool();
+	}
+}
+
+/*********************************************************************//**
+Save defragmentation result.
+@return DB_SUCCESS or error code */
+dberr_t
+dict_stats_save_defrag_summary(
+/*============================*/
+	dict_index_t*	index)	/*!< in: index */
+{
+	dberr_t	ret=DB_SUCCESS;
+	lint	now = (lint) ut_time();
+
+	if (dict_index_is_univ(index)) {
+		return DB_SUCCESS;
+	}
+
+	rw_lock_x_lock(dict_operation_lock);
+	mutex_enter(&dict_sys->mutex);
+
+	ret = dict_stats_save_index_stat(index, now, "n_pages_freed",
+					 index->stat_defrag_n_pages_freed,
+					 NULL,
+					 "Number of pages freed during"
+					 " last defragmentation run.",
+					 NULL);
+
+	mutex_exit(&dict_sys->mutex);
+	rw_lock_x_unlock(dict_operation_lock);
+
+	return (ret);
+}
+
+/*********************************************************************//**
+Save defragmentation stats for a given index.
+@return DB_SUCCESS or error code */
+dberr_t
+dict_stats_save_defrag_stats(
+/*============================*/
+	dict_index_t*	index)	/*!< in: index */
+{
+	dberr_t	ret;
+
+	if (index->table->ibd_file_missing) {
+		ut_print_timestamp(stderr);
+		fprintf(stderr,
+			" InnoDB: Cannot save defragment stats because "
+			".ibd file is missing.\n");
+		return (DB_TABLESPACE_DELETED);
+	}
+	if (dict_index_is_corrupted(index)) {
+		ut_print_timestamp(stderr);
+		fprintf(stderr,
+			" InnoDB: Cannot save defragment stats because "
+			"index is corrupted.\n");
+		return(DB_CORRUPTION);
+	}
+
+	if (dict_index_is_univ(index)) {
+		return DB_SUCCESS;
+	}
+
+	lint	now = (lint) ut_time();
+	mtr_t	mtr;
+	ulint	n_leaf_pages;
+	ulint	n_leaf_reserved;
+	mtr_start(&mtr);
+	mtr_s_lock(dict_index_get_lock(index), &mtr);
+	n_leaf_reserved = btr_get_size_and_reserved(index, BTR_N_LEAF_PAGES,
+						    &n_leaf_pages, &mtr);
+	mtr_commit(&mtr);
+
+	if (n_leaf_reserved == ULINT_UNDEFINED) {
+		// The index name is different during fast index creation,
+		// so the stats won't be associated with the right index
+		// for later use. We just return without saving.
+		return DB_SUCCESS;
+	}
+
+	rw_lock_x_lock(dict_operation_lock);
+
+	mutex_enter(&dict_sys->mutex);
+	ret = dict_stats_save_index_stat(index, now, "n_page_split",
+					 index->stat_defrag_n_page_split,
+					 NULL,
+					 "Number of new page splits on leaves"
+					 " since last defragmentation.",
+					 NULL);
+	if (ret != DB_SUCCESS) {
+		goto end;
+	}
+
+	ret = dict_stats_save_index_stat(
+		index, now, "n_leaf_pages_defrag",
+		n_leaf_pages,
+		NULL,
+		"Number of leaf pages when this stat is saved to disk",
+		NULL);
+	if (ret != DB_SUCCESS) {
+		goto end;
+	}
+
+	ret = dict_stats_save_index_stat(
+		index, now, "n_leaf_pages_reserved",
+		n_leaf_reserved,
+		NULL,
+		"Number of pages reserved for this index leaves when this stat "
+		"is saved to disk",
+		NULL);
+
+end:
+	mutex_exit(&dict_sys->mutex);
+	rw_lock_x_unlock(dict_operation_lock);
+
+	return (ret);
+}
diff --git a/storage/innobase/dict/dict0dict.cc b/storage/innobase/dict/dict0dict.cc
index 0310e5e1d66..6a33de63b69 100644
--- a/storage/innobase/dict/dict0dict.cc
+++ b/storage/innobase/dict/dict0dict.cc
@@ -2,7 +2,7 @@
 
 Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2012, Facebook Inc.
-Copyright (c) 2013, 2015, MariaDB Corporation.
+Copyright (c) 2013, 2016, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -25,11 +25,17 @@ Data dictionary system
 Created 1/8/1996 Heikki Tuuri
 ***********************************************************************/
 
+#include <my_config.h>
+#include <string>
+
+#include "ha_prototypes.h"
+#include <mysqld.h>
+#include <strfunc.h>
+
 #include "dict0dict.h"
 #include "fts0fts.h"
 #include "fil0fil.h"
 #include <algorithm>
-#include <string>
 
 #ifdef UNIV_NONINL
 #include "dict0dict.ic"
@@ -37,13 +43,11 @@ Created 1/8/1996 Heikki Tuuri
 #endif
 
 /** dummy index for ROW_FORMAT=REDUNDANT supremum and infimum records */
-UNIV_INTERN dict_index_t*	dict_ind_redundant;
-/** dummy index for ROW_FORMAT=COMPACT supremum and infimum records */
-UNIV_INTERN dict_index_t*	dict_ind_compact;
+dict_index_t*	dict_ind_redundant;
 
 #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
 /** Flag to control insert buffer debugging. */
-extern UNIV_INTERN uint	ibuf_debug;
+extern uint	ibuf_debug;
 #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
 
 /**********************************************************************
@@ -52,46 +56,44 @@ void
 ib_warn_row_too_big(const dict_table_t*	table);
 
 #ifndef UNIV_HOTBACKUP
-#include "buf0buf.h"
-#include "data0type.h"
-#include "mach0data.h"
-#include "dict0boot.h"
-#include "dict0mem.h"
-#include "dict0crea.h"
-#include "dict0stats.h"
-#include "trx0undo.h"
 #include "btr0btr.h"
 #include "btr0cur.h"
 #include "btr0sea.h"
+#include "buf0buf.h"
+#include "data0type.h"
+#include "dict0boot.h"
+#include "dict0crea.h"
+#include "dict0mem.h"
+#include "dict0priv.h"
+#include "dict0stats.h"
+#include "fsp0sysspace.h"
+#include "fts0fts.h"
+#include "fts0types.h"
+#include "lock0lock.h"
+#include "mach0data.h"
+#include "mem0mem.h"
 #include "os0once.h"
-#include "page0zip.h"
 #include "page0page.h"
+#include "page0zip.h"
 #include "pars0pars.h"
 #include "pars0sym.h"
 #include "que0que.h"
 #include "rem0cmp.h"
-#include "fts0fts.h"
-#include "fts0types.h"
-#include "m_ctype.h" /* my_isspace() */
-#include "ha_prototypes.h" /* innobase_strcasecmp(), innobase_casedn_str() */
+#include "row0log.h"
+#include "row0merge.h"
+#include "row0mysql.h"
+#include "row0upd.h"
 #include "srv0mon.h"
 #include "srv0start.h"
-#include "lock0lock.h"
-#include "dict0priv.h"
-#include "row0upd.h"
-#include "row0mysql.h"
-#include "row0merge.h"
-#include "row0log.h"
-#include "ut0ut.h" /* ut_format_name() */
-#include "m_string.h"
-#include "my_sys.h"
-#include "mysqld.h" /* system_charset_info */
-#include "strfunc.h" /* strconvert() */
+#include "sync0sync.h"
+#include "trx0undo.h"
+#include "ut0new.h"
 
-#include <ctype.h>
+#include <vector>
+#include <algorithm>
 
 /** the dictionary system */
-UNIV_INTERN dict_sys_t*	dict_sys	= NULL;
+dict_sys_t*	dict_sys	= NULL;
 
 /** @brief the data dictionary rw-latch protecting dict_sys
 
@@ -101,29 +103,15 @@ in S-mode; we cannot trust that MySQL protects implicit or background
 operations a table drop since MySQL does not know of them; therefore
 we need this; NOTE: a transaction which reserves this must keep book
 on the mode in trx_t::dict_operation_lock_mode */
-UNIV_INTERN rw_lock_t	dict_operation_lock;
+rw_lock_t*	dict_operation_lock;
 
 /** Percentage of compression failures that are allowed in a single
 round */
-UNIV_INTERN ulong	zip_failure_threshold_pct = 5;
+ulong	zip_failure_threshold_pct = 5;
 
 /** Maximum percentage of a page that can be allowed as a pad to avoid
 compression failures */
-UNIV_INTERN ulong	zip_pad_max = 50;
-
-/* Keys to register rwlocks and mutexes with performance schema */
-#ifdef UNIV_PFS_RWLOCK
-UNIV_INTERN mysql_pfs_key_t	dict_operation_lock_key;
-UNIV_INTERN mysql_pfs_key_t	index_tree_rw_lock_key;
-UNIV_INTERN mysql_pfs_key_t	index_online_log_key;
-UNIV_INTERN mysql_pfs_key_t	dict_table_stats_key;
-#endif /* UNIV_PFS_RWLOCK */
-
-#ifdef UNIV_PFS_MUTEX
-UNIV_INTERN mysql_pfs_key_t	zip_pad_mutex_key;
-UNIV_INTERN mysql_pfs_key_t	dict_sys_mutex_key;
-UNIV_INTERN mysql_pfs_key_t	dict_foreign_err_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
+ulong	zip_pad_max = 50;
 
 #define	DICT_HEAP_SIZE		100	/*!< initial memory heap size when
 					creating a table or index object */
@@ -143,17 +131,20 @@ static bool	innodb_index_stats_not_found_reported = false;
 /*******************************************************************//**
 Tries to find column names for the index and sets the col field of the
 index.
+@param[in]	table	table
+@param[in]	index	index
+@param[in]	add_v	new virtual columns added along with an add index call
 @return TRUE if the column names were found */
 static
 ibool
 dict_index_find_cols(
-/*=================*/
-	dict_table_t*	table,	/*!< in: table */
-	dict_index_t*	index);	/*!< in: index */
+	const dict_table_t*	table,
+	dict_index_t*		index,
+	const dict_add_v_col_t*	add_v);
 /*******************************************************************//**
 Builds the internal dictionary cache representation for a clustered
 index, containing also system fields not defined by the user.
-@return	own: the internal representation of the clustered index */
+@return own: the internal representation of the clustered index */
 static
 dict_index_t*
 dict_index_build_internal_clust(
@@ -164,7 +155,7 @@ dict_index_build_internal_clust(
 /*******************************************************************//**
 Builds the internal dictionary cache representation for a non-clustered
 index, containing also system fields not defined by the user.
-@return	own: the internal representation of the non-clustered index */
+@return own: the internal representation of the non-clustered index */
 static
 dict_index_t*
 dict_index_build_internal_non_clust(
@@ -174,35 +165,13 @@ dict_index_build_internal_non_clust(
 					a non-clustered index */
 /**********************************************************************//**
 Builds the internal dictionary cache representation for an FTS index.
-@return	own: the internal representation of the FTS index */
+@return own: the internal representation of the FTS index */
 static
 dict_index_t*
 dict_index_build_internal_fts(
 /*==========================*/
 	dict_table_t*	table,	/*!< in: table */
 	dict_index_t*	index);	/*!< in: user representation of an FTS index */
-/**********************************************************************//**
-Prints a column data. */
-static
-void
-dict_col_print_low(
-/*===============*/
-	const dict_table_t*	table,	/*!< in: table */
-	const dict_col_t*	col);	/*!< in: column */
-/**********************************************************************//**
-Prints an index data. */
-static
-void
-dict_index_print_low(
-/*=================*/
-	dict_index_t*	index);	/*!< in: index */
-/**********************************************************************//**
-Prints a field data. */
-static
-void
-dict_field_print_low(
-/*=================*/
-	const dict_field_t*	field);	/*!< in: field */
 
 /**********************************************************************//**
 Removes an index from the dictionary cache. */
@@ -242,25 +211,13 @@ dict_non_lru_find_table(
 
 /* Stream for storing detailed information about the latest foreign key
 and unique key errors. Only created if !srv_read_only_mode */
-UNIV_INTERN FILE*	dict_foreign_err_file		= NULL;
+FILE*	dict_foreign_err_file		= NULL;
 /* mutex protecting the foreign and unique error buffers */
-UNIV_INTERN ib_mutex_t	dict_foreign_err_mutex;
-
-/******************************************************************//**
-Makes all characters in a NUL-terminated UTF-8 string lower case. */
-UNIV_INTERN
-void
-dict_casedn_str(
-/*============*/
-	char*	a)	/*!< in/out: string to put in lower case */
-{
-	innobase_casedn_str(a);
-}
+ib_mutex_t	dict_foreign_err_mutex;
 
 /********************************************************************//**
 Checks if the database name in two table names is the same.
-@return	TRUE if same db name */
-UNIV_INTERN
+@return TRUE if same db name */
 ibool
 dict_tables_have_same_db(
 /*=====================*/
@@ -280,8 +237,7 @@ dict_tables_have_same_db(
 
 /********************************************************************//**
 Return the end of table name where we have removed dbname and '/'.
-@return	table name */
-UNIV_INTERN
+@return table name */
 const char*
 dict_remove_db_name(
 /*================*/
@@ -296,8 +252,7 @@ dict_remove_db_name(
 
 /********************************************************************//**
 Get the database name length in a table name.
-@return	database name length */
-UNIV_INTERN
+@return database name length */
 ulint
 dict_get_db_name_len(
 /*=================*/
@@ -312,22 +267,20 @@ dict_get_db_name_len(
 
 /********************************************************************//**
 Reserves the dictionary system mutex for MySQL. */
-UNIV_INTERN
 void
 dict_mutex_enter_for_mysql_func(const char * file, ulint line)
 /*============================*/
 {
-	mutex_enter_func(&(dict_sys->mutex), file, line);
+	mutex_enter(&dict_sys->mutex);
 }
 
 /********************************************************************//**
 Releases the dictionary system mutex for MySQL. */
-UNIV_INTERN
 void
 dict_mutex_exit_for_mysql(void)
 /*===========================*/
 {
-	mutex_exit(&(dict_sys->mutex));
+	mutex_exit(&dict_sys->mutex);
 }
 
 /** Allocate and init a dict_table_t's stats latch.
@@ -340,7 +293,10 @@ dict_table_stats_latch_alloc(
 {
 	dict_table_t*	table = static_cast<dict_table_t*>(table_void);
 
-	table->stats_latch = new(std::nothrow) rw_lock_t;
+	/* Note: rw_lock_create() will call the constructor */
+
+	table->stats_latch = static_cast<rw_lock_t*>(
+		ut_malloc_nokey(sizeof(rw_lock_t)));
 
 	ut_a(table->stats_latch != NULL);
 
@@ -357,7 +313,7 @@ dict_table_stats_latch_free(
 	dict_table_t*	table)
 {
 	rw_lock_free(table->stats_latch);
-	delete table->stats_latch;
+	ut_free(table->stats_latch);
 }
 
 /** Create a dict_table_t's stats latch or delay for lazy creation.
@@ -366,7 +322,6 @@ or from a thread that has not shared the table object with other threads.
 @param[in,out]	table	table whose stats latch to create
 @param[in]	enabled	if false then the latch is disabled
 and dict_table_stats_lock()/unlock() become noop on this table. */
-
 void
 dict_table_stats_latch_create(
 	dict_table_t*	table,
@@ -378,23 +333,15 @@ dict_table_stats_latch_create(
 		return;
 	}
 
-#ifdef HAVE_ATOMIC_BUILTINS
 	/* We create this lazily the first time it is used. */
 	table->stats_latch = NULL;
 	table->stats_latch_created = os_once::NEVER_DONE;
-#else /* HAVE_ATOMIC_BUILTINS */
-
-	dict_table_stats_latch_alloc(table);
-
-	table->stats_latch_created = os_once::DONE;
-#endif /* HAVE_ATOMIC_BUILTINS */
 }
 
 /** Destroy a dict_table_t's stats latch.
 This function is only called from either single threaded environment
 or from a thread that has not shared the table object with other threads.
 @param[in,out]	table	table whose stats latch to destroy */
-
 void
 dict_table_stats_latch_destroy(
 	dict_table_t*	table)
@@ -406,25 +353,20 @@ dict_table_stats_latch_destroy(
 	}
 }
 
-/**********************************************************************//**
-Lock the appropriate latch to protect a given table's statistics. */
-UNIV_INTERN
+/** Lock the appropriate latch to protect a given table's statistics.
+@param[in]	table		table whose stats to lock
+@param[in]	latch_mode	RW_S_LATCH or RW_X_LATCH */
 void
 dict_table_stats_lock(
-/*==================*/
-	dict_table_t*	table,		/*!< in: table */
-	ulint		latch_mode)	/*!< in: RW_S_LATCH or RW_X_LATCH */
+	dict_table_t*	table,
+	ulint		latch_mode)
 {
 	ut_ad(table != NULL);
 	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
 
-#ifdef HAVE_ATOMIC_BUILTINS
 	os_once::do_or_wait_for_done(
 		&table->stats_latch_created,
 		dict_table_stats_latch_alloc, table);
-#else /* HAVE_ATOMIC_BUILTINS */
-	ut_ad(table->stats_latch_created == os_once::DONE);
-#endif /* HAVE_ATOMIC_BUILTINS */
 
 	if (table->stats_latch == NULL) {
 		/* This is a dummy table object that is private in the current
@@ -447,15 +389,13 @@ dict_table_stats_lock(
 	}
 }
 
-/**********************************************************************//**
-Unlock the latch that has been locked by dict_table_stats_lock() */
-UNIV_INTERN
+/** Unlock the latch that has been locked by dict_table_stats_lock().
+@param[in]	table		table whose stats to unlock
+@param[in]	latch_mode	RW_S_LATCH or RW_X_LATCH */
 void
 dict_table_stats_unlock(
-/*====================*/
-	dict_table_t*	table,		/*!< in: table */
-	ulint		latch_mode)	/*!< in: RW_S_LATCH or
-						RW_X_LATCH */
+	dict_table_t*	table,
+	ulint		latch_mode)
 {
 	ut_ad(table != NULL);
 	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
@@ -507,12 +447,12 @@ dict_table_try_drop_aborted(
 		ut_ad(table->id == table_id);
 	}
 
-	if (table && table->n_ref_count == ref_count && table->drop_aborted) {
+	if (table && table->get_ref_count() == ref_count && table->drop_aborted) {
 		/* Silence a debug assertion in row_merge_drop_indexes(). */
-		ut_d(table->n_ref_count++);
+		ut_d(table->acquire());
 		row_merge_drop_indexes(trx, table, TRUE);
-		ut_d(table->n_ref_count--);
-		ut_ad(table->n_ref_count == ref_count);
+		ut_d(table->release());
+		ut_ad(table->get_ref_count() == ref_count);
 		trx_commit_for_mysql(trx);
 	}
 
@@ -536,7 +476,7 @@ dict_table_try_drop_aborted_and_mutex_exit(
 	if (try_drop
 	    && table != NULL
 	    && table->drop_aborted
-	    && table->n_ref_count == 1
+	    && table->get_ref_count() == 1
 	    && dict_table_get_first_index(table)) {
 
 		/* Attempt to drop the indexes whose online creation
@@ -553,7 +493,6 @@ dict_table_try_drop_aborted_and_mutex_exit(
 
 /********************************************************************//**
 Decrements the count of open handles to a table. */
-UNIV_INTERN
 void
 dict_table_close(
 /*=============*/
@@ -563,22 +502,28 @@ dict_table_close(
 					indexes after an aborted online
 					index creation */
 {
-	if (!dict_locked) {
+	if (!dict_locked && !dict_table_is_intrinsic(table)) {
 		mutex_enter(&dict_sys->mutex);
 	}
 
-	ut_ad(mutex_own(&dict_sys->mutex));
-	ut_a(table->n_ref_count > 0);
+	ut_ad(mutex_own(&dict_sys->mutex) || dict_table_is_intrinsic(table));
+	ut_a(table->get_ref_count() > 0);
 
-	--table->n_ref_count;
+	table->release();
+
+	/* Intrinsic table is not added to dictionary cache so skip other
+	cache specific actions. */
+	if (dict_table_is_intrinsic(table)) {
+		return;
+	}
 
 	/* Force persistent stats re-read upon next open of the table
 	so that FLUSH TABLE can be used to forcibly fetch stats from disk
 	if they have been manually modified. We reset table->stat_initialized
 	only if table reference count is 0 because we do not want too frequent
 	stats re-reads (e.g. in other cases than FLUSH TABLE). */
-	if (strchr(table->name, '/') != NULL
-	    && table->n_ref_count == 0
+	if (strchr(table->name.m_name, '/') != NULL
+	    && table->get_ref_count() == 0
 	    && dict_stats_is_persistent_enabled(table)) {
 
 		dict_stats_deinit(table);
@@ -602,7 +547,7 @@ dict_table_close(
 
 		drop_aborted = try_drop
 			&& table->drop_aborted
-			&& table->n_ref_count == 1
+			&& table->get_ref_count() == 1
 			&& dict_table_get_first_index(table);
 
 		mutex_exit(&dict_sys->mutex);
@@ -614,11 +559,84 @@ dict_table_close(
 }
 #endif /* !UNIV_HOTBACKUP */
 
+/********************************************************************//**
+Closes the only open handle to a table and drops a table while assuring
+that dict_sys->mutex is held the whole time.  This assures that the table
+is not evicted after the close when the count of open handles goes to zero.
+Because dict_sys->mutex is held, we do not need to call
+dict_table_prevent_eviction().  */
+void
+dict_table_close_and_drop(
+/*======================*/
+	trx_t*		trx,		/*!< in: data dictionary transaction */
+	dict_table_t*	table)		/*!< in/out: table */
+{
+	dberr_t err = DB_SUCCESS;
+
+	ut_ad(mutex_own(&dict_sys->mutex));
+	ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
+	ut_ad(trx->dict_operation != TRX_DICT_OP_NONE);
+	ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
+
+	dict_table_close(table, TRUE, FALSE);
+
+#if defined UNIV_DEBUG || defined UNIV_DDL_DEBUG
+	/* Nobody should have initialized the stats of the newly created
+	table when this is called. So we know that it has not been added
+	for background stats gathering. */
+	ut_a(!table->stat_initialized);
+#endif /* UNIV_DEBUG || UNIV_DDL_DEBUG */
+
+	err = row_merge_drop_table(trx, table);
+
+	if (err != DB_SUCCESS) {
+		ib::error() << "At " << __FILE__ << ":" << __LINE__
+			    << " row_merge_drop_table returned error: " << err
+			    << " table: " << table->name.m_name;
+	}
+}
+
+/** Check if the table has a given (non_virtual) column.
+@param[in]	table		table object
+@param[in]	col_name	column name
+@param[in]	col_nr		column number guessed, 0 as default
+@return column number if the table has the specified column,
+otherwise table->n_def */
+ulint
+dict_table_has_column(
+	const dict_table_t*	table,
+	const char*		col_name,
+	ulint			col_nr)
+{
+	ulint		col_max = table->n_def;
+
+	ut_ad(table);
+	ut_ad(col_name);
+	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+
+	if (col_nr < col_max
+	    && innobase_strcasecmp(
+		col_name, dict_table_get_col_name(table, col_nr)) == 0) {
+		return(col_nr);
+	}
+
+	/** The order of column may changed, check it with other columns */
+	for (ulint i = 0; i < col_max; i++) {
+		if (i != col_nr
+		    && innobase_strcasecmp(
+			col_name, dict_table_get_col_name(table, i)) == 0) {
+
+			return(i);
+		}
+	}
+
+	return(col_max);
+}
+
 /**********************************************************************//**
 Returns a column's name.
 @return column name. NOTE: not guaranteed to stay valid if table is
 modified in any way (columns added, etc.). */
-UNIV_INTERN
 const char*
 dict_table_get_col_name(
 /*====================*/
@@ -676,49 +694,146 @@ dict_table_get_col_name_for_mysql(
 
 	return(s);
 }
+
+/** Returns a virtual column's name.
+@param[in]	table	target table
+@param[in]	col_nr	virtual column number (nth virtual column)
+@return column name or NULL if column number out of range. */
+const char*
+dict_table_get_v_col_name(
+	const dict_table_t*	table,
+	ulint			col_nr)
+{
+	const char*	s;
+
+	ut_ad(table);
+	ut_ad(col_nr < table->n_v_def);
+	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+
+	if (col_nr >= table->n_v_def) {
+		return(NULL);
+	}
+
+	s = table->v_col_names;
+
+	if (s != NULL) {
+		for (ulint i = 0; i < col_nr; i++) {
+			s += strlen(s) + 1;
+		}
+	}
+
+	return(s);
+}
+
+/** Search virtual column's position in InnoDB according to its position
+in original table's position
+@param[in]	table	target table
+@param[in]	col_nr	column number (nth column in the MySQL table)
+@return virtual column's position in InnoDB, ULINT_UNDEFINED if not find */
+static
+ulint
+dict_table_get_v_col_pos_for_mysql(
+	const dict_table_t*	table,
+	ulint			col_nr)
+{
+	ulint	i;
+
+	ut_ad(table);
+	ut_ad(col_nr < static_cast<ulint>(table->n_t_def));
+	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+
+	for (i = 0; i < table->n_v_def; i++) {
+		if (col_nr == dict_get_v_col_mysql_pos(
+				table->v_cols[i].m_col.ind)) {
+			break;
+		}
+	}
+
+	if (i == table->n_v_def) {
+		return(ULINT_UNDEFINED);
+	}
+
+	return(i);
+}
+
+/** Returns a virtual column's name according to its original
+MySQL table position.
+@param[in]	table	target table
+@param[in]	col_nr	column number (nth column in the table)
+@return column name. */
+static
+const char*
+dict_table_get_v_col_name_mysql(
+	const dict_table_t*	table,
+	ulint			col_nr)
+{
+	ulint	i = dict_table_get_v_col_pos_for_mysql(table, col_nr);
+
+	if (i == ULINT_UNDEFINED) {
+		return(NULL);
+	}
+
+	return(dict_table_get_v_col_name(table, i));
+}
+
+/** Get nth virtual column according to its original MySQL table position
+@param[in]	table	target table
+@param[in]	col_nr	column number in MySQL Table definition
+@return dict_v_col_t ptr */
+dict_v_col_t*
+dict_table_get_nth_v_col_mysql(
+	const dict_table_t*	table,
+	ulint			col_nr)
+{
+	ulint	i = dict_table_get_v_col_pos_for_mysql(table, col_nr);
+
+	if (i == ULINT_UNDEFINED) {
+		return(NULL);
+	}
+
+	return(dict_table_get_nth_v_col(table, i));
+}
+
 #ifndef UNIV_HOTBACKUP
 /** Allocate and init the autoinc latch of a given table.
 This function must not be called concurrently on the same table object.
 @param[in,out]	table_void	table whose autoinc latch to create */
+static
 void
 dict_table_autoinc_alloc(
 	void*	table_void)
 {
 	dict_table_t*	table = static_cast<dict_table_t*>(table_void);
-	table->autoinc_mutex = new (std::nothrow) ib_mutex_t();
+	table->autoinc_mutex = UT_NEW_NOKEY(ib_mutex_t());
 	ut_a(table->autoinc_mutex != NULL);
-	mutex_create(autoinc_mutex_key,
-		     table->autoinc_mutex, SYNC_DICT_AUTOINC_MUTEX);
+	mutex_create(LATCH_ID_AUTOINC, table->autoinc_mutex);
 }
 
 /** Allocate and init the zip_pad_mutex of a given index.
 This function must not be called concurrently on the same index object.
 @param[in,out]	index_void	index whose zip_pad_mutex to create */
+static
 void
 dict_index_zip_pad_alloc(
 	void*	index_void)
 {
 	dict_index_t*	index = static_cast<dict_index_t*>(index_void);
-	index->zip_pad.mutex = new (std::nothrow) os_fast_mutex_t;
+	index->zip_pad.mutex = UT_NEW_NOKEY(SysMutex());
 	ut_a(index->zip_pad.mutex != NULL);
-	os_fast_mutex_init(zip_pad_mutex_key, index->zip_pad.mutex);
+	mutex_create(LATCH_ID_ZIP_PAD_MUTEX, index->zip_pad.mutex);
 }
 
+
 /********************************************************************//**
 Acquire the autoinc lock. */
-UNIV_INTERN
 void
 dict_table_autoinc_lock(
 /*====================*/
 	dict_table_t*	table)	/*!< in/out: table */
 {
-#ifdef HAVE_ATOMIC_BUILTINS
 	os_once::do_or_wait_for_done(
 		&table->autoinc_mutex_created,
 		dict_table_autoinc_alloc, table);
-#else /* HAVE_ATOMIC_BUILTINS */
-	ut_ad(table->autoinc_mutex_created == os_once::DONE);
-#endif /* HAVE_ATOMIC_BUILTINS */
 
 	mutex_enter(table->autoinc_mutex);
 }
@@ -729,20 +844,16 @@ void
 dict_index_zip_pad_lock(
 	dict_index_t*	index)
 {
-#ifdef HAVE_ATOMIC_BUILTINS
 	os_once::do_or_wait_for_done(
 		&index->zip_pad.mutex_created,
 		dict_index_zip_pad_alloc, index);
-#else /* HAVE_ATOMIC_BUILTINS */
-	ut_ad(index->zip_pad.mutex_created == os_once::DONE);
-#endif /* HAVE_ATOMIC_BUILTINS */
 
-	os_fast_mutex_lock(index->zip_pad.mutex);
+	mutex_enter(index->zip_pad.mutex);
 }
 
+
 /********************************************************************//**
 Unconditionally set the autoinc counter. */
-UNIV_INTERN
 void
 dict_table_autoinc_initialize(
 /*==========================*/
@@ -754,16 +865,14 @@ dict_table_autoinc_initialize(
 	table->autoinc = value;
 }
 
-/************************************************************************
-Get all the FTS indexes on a table.
-@return	number of FTS indexes */
-UNIV_INTERN
+/** Get all the FTS indexes on a table.
+@param[in]	table	table
+@param[out]	indexes	all FTS indexes on this table
+@return number of FTS indexes */
 ulint
 dict_table_get_all_fts_indexes(
-/*===========================*/
-	dict_table_t*   table,          /*!< in: table */
-	ib_vector_t*    indexes)        /*!< out: all FTS indexes on this
-					table */
+	const dict_table_t*	table,
+	ib_vector_t*		indexes)
 {
 	dict_index_t* index;
 
@@ -783,7 +892,6 @@ dict_table_get_all_fts_indexes(
 
 /** Store autoinc value when the table is evicted.
 @param[in]	table	table evicted */
-UNIV_INTERN
 void
 dict_table_autoinc_store(
 	const dict_table_t*	table)
@@ -802,7 +910,6 @@ dict_table_autoinc_store(
 
 /** Restore autoinc value when the table is loaded.
 @param[in]	table	table loaded */
-UNIV_INTERN
 void
 dict_table_autoinc_restore(
 	dict_table_t*	table)
@@ -823,8 +930,7 @@ dict_table_autoinc_restore(
 /********************************************************************//**
 Reads the next autoinc value (== autoinc counter value), 0 if not yet
 initialized.
-@return	value for a new row, or 0 */
-UNIV_INTERN
+@return value for a new row, or 0 */
 ib_uint64_t
 dict_table_autoinc_read(
 /*====================*/
@@ -838,7 +944,6 @@ dict_table_autoinc_read(
 /********************************************************************//**
 Updates the autoinc counter if the value supplied is greater than the
 current value. */
-UNIV_INTERN
 void
 dict_table_autoinc_update_if_greater(
 /*=================================*/
@@ -856,7 +961,6 @@ dict_table_autoinc_update_if_greater(
 
 /********************************************************************//**
 Release the autoinc lock. */
-UNIV_INTERN
 void
 dict_table_autoinc_unlock(
 /*======================*/
@@ -866,35 +970,39 @@ dict_table_autoinc_unlock(
 }
 #endif /* !UNIV_HOTBACKUP */
 
-/********************************************************************//**
-Looks for column n in an index.
+/** Looks for column n in an index.
+@param[in]	index		index
+@param[in]	n		column number
+@param[in]	inc_prefix	true=consider column prefixes too
+@param[in]	is_virtual	true==virtual column
+@param[out]	prefix_col_pos	col num if prefix
 @return position in internal representation of the index;
 ULINT_UNDEFINED if not contained */
-UNIV_INTERN
 ulint
 dict_index_get_nth_col_or_prefix_pos(
-/*=================================*/
-	const dict_index_t*	index,		/*!< in: index */
-	ulint			n,		/*!< in: column number */
-	ibool			inc_prefix,	/*!< in: TRUE=consider
-						column prefixes too */
-	ulint*			prefix_col_pos)	/*!< out: col num if prefix */
+	const dict_index_t*	index,
+	ulint			n,
+	bool			inc_prefix,
+	bool			is_virtual,
+	ulint*			prefix_col_pos)
 {
 	const dict_field_t*	field;
 	const dict_col_t*	col;
 	ulint			pos;
 	ulint			n_fields;
-	ulint			prefixed_pos_dummy;
 
 	ut_ad(index);
 	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
 
-	if (!prefix_col_pos) {
-		prefix_col_pos = &prefixed_pos_dummy;
+	if (prefix_col_pos) {
+		*prefix_col_pos = ULINT_UNDEFINED;
 	}
-	*prefix_col_pos = ULINT_UNDEFINED;
 
-	col = dict_table_get_nth_col(index->table, n);
+	if (is_virtual) {
+		col = &(dict_table_get_nth_v_col(index->table, n)->m_col);
+	} else {
+		col = dict_table_get_nth_col(index->table, n);
+	}
 
 	if (dict_index_is_clust(index)) {
 
@@ -907,7 +1015,9 @@ dict_index_get_nth_col_or_prefix_pos(
 		field = dict_index_get_nth_field(index, pos);
 
 		if (col == field->col) {
-			*prefix_col_pos = pos;
+			if (prefix_col_pos) {
+				*prefix_col_pos = pos;
+			}
 			if (inc_prefix || field->prefix_len == 0) {
 				return(pos);
 			}
@@ -918,15 +1028,16 @@ dict_index_get_nth_col_or_prefix_pos(
 }
 
 #ifndef UNIV_HOTBACKUP
-/********************************************************************//**
-Returns TRUE if the index contains a column or a prefix of that column.
-@return	TRUE if contains the column or its prefix */
-UNIV_INTERN
+/** Returns TRUE if the index contains a column or a prefix of that column.
+@param[in]	index		index
+@param[in]	n		column number
+@param[in]	is_virtual	whether it is a virtual col
+@return TRUE if contains the column or its prefix */
 ibool
 dict_index_contains_col_or_prefix(
-/*==============================*/
-	const dict_index_t*	index,	/*!< in: index */
-	ulint			n)	/*!< in: column number */
+	const dict_index_t*	index,
+	ulint			n,
+	bool			is_virtual)
 {
 	const dict_field_t*	field;
 	const dict_col_t*	col;
@@ -941,7 +1052,11 @@ dict_index_contains_col_or_prefix(
 		return(TRUE);
 	}
 
-	col = dict_table_get_nth_col(index->table, n);
+	if (is_virtual) {
+		col = &dict_table_get_nth_v_col(index->table, n)->m_col;
+	} else {
+		col = dict_table_get_nth_col(index->table, n);
+	}
 
 	n_fields = dict_index_get_n_fields(index);
 
@@ -964,7 +1079,6 @@ column in index2. That is, we must be able to construct the prefix in index2
 from the prefix in index.
 @return position in internal representation of the index;
 ULINT_UNDEFINED if not contained */
-UNIV_INTERN
 ulint
 dict_index_get_nth_field_pos(
 /*=========================*/
@@ -984,9 +1098,22 @@ dict_index_get_nth_field_pos(
 
 	n_fields = dict_index_get_n_fields(index);
 
+	/* Are we looking for a MBR (Minimum Bound Box) field of
+	a spatial index */
+	bool	is_mbr_fld = (n == 0 && dict_index_is_spatial(index2));
+
 	for (pos = 0; pos < n_fields; pos++) {
 		field = dict_index_get_nth_field(index, pos);
 
+		/* The first field of a spatial index is a transformed
+		MBR (Minimum Bound Box) field made out of original column,
+		so its field->col still points to original cluster index
+		col, but the actual content is different. So we cannot
+		consider them equal if neither of them is MBR field */
+		if (pos == 0 && dict_index_is_spatial(index) && !is_mbr_fld) {
+			continue;
+		}
+
 		if (field->col == field2->col
 		    && (field->prefix_len == 0
 			|| (field->prefix_len >= field2->prefix_len
@@ -1001,8 +1128,7 @@ dict_index_get_nth_field_pos(
 
 /**********************************************************************//**
 Returns a table object based on table id.
-@return	table, NULL if does not exist */
-UNIV_INTERN
+@return table, NULL if does not exist */
 dict_table_t*
 dict_table_open_on_id(
 /*==================*/
@@ -1031,7 +1157,7 @@ dict_table_open_on_id(
 			dict_move_to_mru(table);
 		}
 
-		++table->n_ref_count;
+		table->acquire();
 
 		MONITOR_INC(MONITOR_TABLE_REFERENCE);
 	}
@@ -1046,23 +1172,22 @@ dict_table_open_on_id(
 
 /********************************************************************//**
 Looks for column n position in the clustered index.
-@return	position in internal representation of the clustered index */
-UNIV_INTERN
+@return position in internal representation of the clustered index */
 ulint
 dict_table_get_nth_col_pos(
 /*=======================*/
 	const dict_table_t*	table,	/*!< in: table */
-	ulint			n)	/*!< in: column number */
+	ulint			n,	/*!< in: column number */
+	ulint*			prefix_col_pos)
 {
 	return(dict_index_get_nth_col_pos(dict_table_get_first_index(table),
-					  n, NULL));
+					  n, prefix_col_pos));
 }
 
 /********************************************************************//**
 Checks if a column is in the ordering columns of the clustered index of a
 table. Column prefixes are treated like whole columns.
-@return	TRUE if the column, or its prefix, is in the clustered key */
-UNIV_INTERN
+@return TRUE if the column, or its prefix, is in the clustered key */
 ibool
 dict_table_col_in_clustered_key(
 /*============================*/
@@ -1097,38 +1222,43 @@ dict_table_col_in_clustered_key(
 
 /**********************************************************************//**
 Inits the data dictionary module. */
-UNIV_INTERN
 void
 dict_init(void)
 /*===========*/
 {
-	dict_sys = static_cast<dict_sys_t*>(mem_zalloc(sizeof(*dict_sys)));
+	dict_operation_lock = static_cast<rw_lock_t*>(
+		ut_zalloc_nokey(sizeof(*dict_operation_lock)));
 
-	mutex_create(dict_sys_mutex_key, &dict_sys->mutex, SYNC_DICT);
+	dict_sys = static_cast<dict_sys_t*>(ut_zalloc_nokey(sizeof(*dict_sys)));
+
+	UT_LIST_INIT(dict_sys->table_LRU, &dict_table_t::table_LRU);
+	UT_LIST_INIT(dict_sys->table_non_LRU, &dict_table_t::table_LRU);
+
+	mutex_create(LATCH_ID_DICT_SYS, &dict_sys->mutex);
+
+	dict_sys->table_hash = hash_create(
+		buf_pool_get_curr_size()
+		/ (DICT_POOL_PER_TABLE_HASH * UNIV_WORD_SIZE));
+
+	dict_sys->table_id_hash = hash_create(
+		buf_pool_get_curr_size()
+		/ (DICT_POOL_PER_TABLE_HASH * UNIV_WORD_SIZE));
 
-	dict_sys->table_hash = hash_create(buf_pool_get_curr_size()
-					   / (DICT_POOL_PER_TABLE_HASH
-					      * UNIV_WORD_SIZE));
-	dict_sys->table_id_hash = hash_create(buf_pool_get_curr_size()
-					      / (DICT_POOL_PER_TABLE_HASH
-						 * UNIV_WORD_SIZE));
 	rw_lock_create(dict_operation_lock_key,
-		       &dict_operation_lock, SYNC_DICT_OPERATION);
+		       dict_operation_lock, SYNC_DICT_OPERATION);
 
 	if (!srv_read_only_mode) {
 		dict_foreign_err_file = os_file_create_tmpfile(NULL);
 		ut_a(dict_foreign_err_file);
-
-		mutex_create(dict_foreign_err_mutex_key,
-			     &dict_foreign_err_mutex, SYNC_NO_ORDER_CHECK);
 	}
 
+	mutex_create(LATCH_ID_DICT_FOREIGN_ERR, &dict_foreign_err_mutex);
+
 	dict_sys->autoinc_map = new autoinc_map_t();
 }
 
 /**********************************************************************//**
 Move to the most recently used segment of the LRU list. */
-UNIV_INTERN
 void
 dict_move_to_mru(
 /*=============*/
@@ -1140,9 +1270,9 @@ dict_move_to_mru(
 
 	ut_a(table->can_be_evicted);
 
-	UT_LIST_REMOVE(table_LRU, dict_sys->table_LRU, table);
+	UT_LIST_REMOVE(dict_sys->table_LRU, table);
 
-	UT_LIST_ADD_FIRST(table_LRU, dict_sys->table_LRU, table);
+	UT_LIST_ADD_FIRST(dict_sys->table_LRU, table);
 
 	ut_ad(dict_lru_validate());
 }
@@ -1152,8 +1282,7 @@ Returns a table object and increment its open handle count.
 NOTE! This is a high-level function to be used mainly from outside the
 'dict' module. Inside this directory dict_table_get_low
 is usually the appropriate function.
-@return	table, NULL if does not exist */
-UNIV_INTERN
+@return table, NULL if does not exist */
 dict_table_t*
 dict_table_open_on_name(
 /*====================*/
@@ -1167,9 +1296,11 @@ dict_table_open_on_name(
 					loading a table definition */
 {
 	dict_table_t*	table;
+	DBUG_ENTER("dict_table_open_on_name");
+	DBUG_PRINT("dict_table_open_on_name", ("table: '%s'", table_name));
 
 	if (!dict_locked) {
-		mutex_enter(&(dict_sys->mutex));
+		mutex_enter(&dict_sys->mutex);
 	}
 
 	ut_ad(table_name);
@@ -1178,7 +1309,7 @@ dict_table_open_on_name(
 	table = dict_table_check_if_in_cache_low(table_name);
 
 	if (table == NULL) {
-		table = dict_load_table(table_name, TRUE, ignore_err);
+		table = dict_load_table(table_name, true, ignore_err);
 	}
 
 	ut_ad(!table || table->cached);
@@ -1189,50 +1320,42 @@ dict_table_open_on_name(
 		if (ignore_err == DICT_ERR_IGNORE_NONE
 			&& table->is_encrypted) {
 			/* Make life easy for drop table. */
-			if (table->can_be_evicted) {
-				dict_table_move_from_lru_to_non_lru(table);
-			}
+			dict_table_prevent_eviction(table);
 
 			if (table->can_be_evicted) {
 				dict_move_to_mru(table);
 			}
 
-			++table->n_ref_count;
+			table->acquire();
 
 			if (!dict_locked) {
 				mutex_exit(&dict_sys->mutex);
 			}
 
-			return (table);
+			DBUG_RETURN(table);
 		}
 		/* If table is corrupted, return NULL */
 		else if (ignore_err == DICT_ERR_IGNORE_NONE
 		    && table->corrupted) {
-
 			/* Make life easy for drop table. */
-			if (table->can_be_evicted) {
-				dict_table_move_from_lru_to_non_lru(table);
-			}
-
+			dict_table_prevent_eviction(table);
 			if (!dict_locked) {
 				mutex_exit(&dict_sys->mutex);
 			}
 
-			ut_print_timestamp(stderr);
+			ib::info() << "Table "
+				<< table->name
+				<< " is corrupted. Please drop the table"
+				" and recreate it";
 
-			fprintf(stderr, "  InnoDB: table ");
-			ut_print_name(stderr, NULL, TRUE, table->name);
-			fprintf(stderr, "is corrupted. Please drop the table "
-				"and recreate\n");
-
-			return(NULL);
+			DBUG_RETURN(NULL);
 		}
 
 		if (table->can_be_evicted) {
 			dict_move_to_mru(table);
 		}
 
-		++table->n_ref_count;
+		table->acquire();
 
 		MONITOR_INC(MONITOR_TABLE_REFERENCE);
 	}
@@ -1243,13 +1366,12 @@ dict_table_open_on_name(
 		dict_table_try_drop_aborted_and_mutex_exit(table, try_drop);
 	}
 
-	return(table);
+	DBUG_RETURN(table);
 }
 #endif /* !UNIV_HOTBACKUP */
 
 /**********************************************************************//**
 Adds system columns to a table object. */
-UNIV_INTERN
 void
 dict_table_add_system_columns(
 /*==========================*/
@@ -1257,19 +1379,27 @@ dict_table_add_system_columns(
 	mem_heap_t*	heap)	/*!< in: temporary heap */
 {
 	ut_ad(table);
-	ut_ad(table->n_def == table->n_cols - DATA_N_SYS_COLS);
+	ut_ad(table->n_def ==
+	      (table->n_cols - dict_table_get_n_sys_cols(table)));
 	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
 	ut_ad(!table->cached);
 
 	/* NOTE: the system columns MUST be added in the following order
 	(so that they can be indexed by the numerical value of DATA_ROW_ID,
 	etc.) and as the last columns of the table memory object.
-	The clustered index will not always physically contain all
-	system columns. */
+	The clustered index will not always physically contain all system
+	columns.
+	Intrinsic table don't need DB_ROLL_PTR as UNDO logging is turned off
+	for these tables. */
 
 	dict_mem_table_add_col(table, heap, "DB_ROW_ID", DATA_SYS,
 			       DATA_ROW_ID | DATA_NOT_NULL,
 			       DATA_ROW_ID_LEN);
+
+#if (DATA_ITT_N_SYS_COLS != 2)
+#error "DATA_ITT_N_SYS_COLS != 2"
+#endif
+
 #if DATA_ROW_ID != 0
 #error "DATA_ROW_ID != 0"
 #endif
@@ -1279,52 +1409,32 @@ dict_table_add_system_columns(
 #if DATA_TRX_ID != 1
 #error "DATA_TRX_ID != 1"
 #endif
-	dict_mem_table_add_col(table, heap, "DB_ROLL_PTR", DATA_SYS,
-			       DATA_ROLL_PTR | DATA_NOT_NULL,
-			       DATA_ROLL_PTR_LEN);
+
+	if (!dict_table_is_intrinsic(table)) {
+		dict_mem_table_add_col(table, heap, "DB_ROLL_PTR", DATA_SYS,
+				       DATA_ROLL_PTR | DATA_NOT_NULL,
+				       DATA_ROLL_PTR_LEN);
 #if DATA_ROLL_PTR != 2
 #error "DATA_ROLL_PTR != 2"
 #endif
 
-	/* This check reminds that if a new system column is added to
-	the program, it should be dealt with here */
+		/* This check reminds that if a new system column is added to
+		the program, it should be dealt with here */
 #if DATA_N_SYS_COLS != 3
 #error "DATA_N_SYS_COLS != 3"
 #endif
+	}
 }
 
 #ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Adds a table object to the dictionary cache. */
-UNIV_INTERN
+/** Mark if table has big rows.
+@param[in,out]	table	table handler */
 void
-dict_table_add_to_cache(
-/*====================*/
-	dict_table_t*	table,		/*!< in: table */
-	ibool		can_be_evicted,	/*!< in: TRUE if can be evicted */
-	mem_heap_t*	heap)		/*!< in: temporary heap */
+dict_table_set_big_rows(
+	dict_table_t*	table)
 {
-	ulint	fold;
-	ulint	id_fold;
-	ulint	i;
-	ulint	row_len;
-
-	ut_ad(dict_lru_validate());
-
-	/* The lower limit for what we consider a "big" row */
-#define BIG_ROW_SIZE 1024
-
-	ut_ad(mutex_own(&(dict_sys->mutex)));
-
-	dict_table_add_system_columns(table, heap);
-
-	table->cached = TRUE;
-
-	fold = ut_fold_string(table->name);
-	id_fold = ut_fold_ull(table->id);
-
-	row_len = 0;
-	for (i = 0; i < table->n_def; i++) {
+	ulint	row_len = 0;
+	for (ulint i = 0; i < table->n_def; i++) {
 		ulint	col_len = dict_col_get_max_size(
 			dict_table_get_nth_col(table, i));
 
@@ -1339,14 +1449,39 @@ dict_table_add_to_cache(
 		}
 	}
 
-	table->big_rows = row_len >= BIG_ROW_SIZE;
+	table->big_rows = (row_len >= BIG_ROW_SIZE) ? TRUE : FALSE;
+}
+
+/**********************************************************************//**
+Adds a table object to the dictionary cache. */
+void
+dict_table_add_to_cache(
+/*====================*/
+	dict_table_t*	table,		/*!< in: table */
+	ibool		can_be_evicted,	/*!< in: TRUE if can be evicted */
+	mem_heap_t*	heap)		/*!< in: temporary heap */
+{
+	ulint	fold;
+	ulint	id_fold;
+
+	ut_ad(dict_lru_validate());
+	ut_ad(mutex_own(&dict_sys->mutex));
+
+	dict_table_add_system_columns(table, heap);
+
+	table->cached = TRUE;
+
+	fold = ut_fold_string(table->name.m_name);
+	id_fold = ut_fold_ull(table->id);
+
+	dict_table_set_big_rows(table);
 
 	/* Look for a table with the same name: error if such exists */
 	{
 		dict_table_t*	table2;
 		HASH_SEARCH(name_hash, dict_sys->table_hash, fold,
 			    dict_table_t*, table2, ut_ad(table2->cached),
-			    ut_strcmp(table2->name, table->name) == 0);
+			    !strcmp(table2->name.m_name, table->name.m_name));
 		ut_a(table2 == NULL);
 
 #ifdef UNIV_DEBUG
@@ -1386,9 +1521,9 @@ dict_table_add_to_cache(
 	table->can_be_evicted = can_be_evicted;
 
 	if (table->can_be_evicted) {
-		UT_LIST_ADD_FIRST(table_LRU, dict_sys->table_LRU, table);
+		UT_LIST_ADD_FIRST(dict_sys->table_LRU, table);
 	} else {
-		UT_LIST_ADD_FIRST(table_LRU, dict_sys->table_non_LRU, table);
+		UT_LIST_ADD_FIRST(dict_sys->table_non_LRU, table);
 	}
 
 	dict_table_autoinc_restore(table);
@@ -1396,7 +1531,7 @@ dict_table_add_to_cache(
 	ut_ad(dict_lru_validate());
 
 	dict_sys->size += mem_heap_get_size(table->heap)
-		+ strlen(table->name) + 1;
+		+ strlen(table->name.m_name) + 1;
 }
 
 /**********************************************************************//**
@@ -1409,15 +1544,13 @@ dict_table_can_be_evicted(
 	const dict_table_t*	table)		/*!< in: table to test */
 {
 	ut_ad(mutex_own(&dict_sys->mutex));
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
 
 	ut_a(table->can_be_evicted);
 	ut_a(table->foreign_set.empty());
 	ut_a(table->referenced_set.empty());
 
-	if (table->n_ref_count == 0) {
+	if (table->get_ref_count() == 0) {
 		dict_index_t*	index;
 
 		/* The transaction commit and rollback are called from
@@ -1447,7 +1580,7 @@ dict_table_can_be_evicted(
 
 			See also: dict_index_remove_from_cache_low() */
 
-			if (btr_search_info_get_ref_count(info) > 0) {
+			if (btr_search_info_get_ref_count(info, index) > 0) {
 				return(FALSE);
 			}
 		}
@@ -1464,7 +1597,6 @@ should not be part of FK relationship and currently not used in any user
 transaction. There is no guarantee that it will remove a table.
 @return number of tables evicted. If the number of tables in the dict_LRU
 is less than max_tables it will not do anything. */
-UNIV_INTERN
 ulint
 dict_make_room_in_cache(
 /*====================*/
@@ -1480,9 +1612,7 @@ dict_make_room_in_cache(
 	ut_a(pct_check > 0);
 	ut_a(pct_check <= 100);
 	ut_ad(mutex_own(&dict_sys->mutex));
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
 	ut_ad(dict_lru_validate());
 
 	i = len = UT_LIST_GET_LEN(dict_sys->table_LRU);
@@ -1524,7 +1654,6 @@ dict_make_room_in_cache(
 
 /**********************************************************************//**
 Move a table to the non-LRU list from the LRU list. */
-UNIV_INTERN
 void
 dict_table_move_from_lru_to_non_lru(
 /*================================*/
@@ -1535,42 +1664,21 @@ dict_table_move_from_lru_to_non_lru(
 
 	ut_a(table->can_be_evicted);
 
-	UT_LIST_REMOVE(table_LRU, dict_sys->table_LRU, table);
+	UT_LIST_REMOVE(dict_sys->table_LRU, table);
 
-	UT_LIST_ADD_LAST(table_LRU, dict_sys->table_non_LRU, table);
+	UT_LIST_ADD_LAST(dict_sys->table_non_LRU, table);
 
 	table->can_be_evicted = FALSE;
 }
 
-/**********************************************************************//**
-Move a table to the LRU list from the non-LRU list. */
-UNIV_INTERN
-void
-dict_table_move_from_non_lru_to_lru(
-/*================================*/
-	dict_table_t*	table)	/*!< in: table to move from non-LRU to LRU */
-{
-	ut_ad(mutex_own(&dict_sys->mutex));
-	ut_ad(dict_non_lru_find_table(table));
-
-	ut_a(!table->can_be_evicted);
-
-	UT_LIST_REMOVE(table_LRU, dict_sys->table_non_LRU, table);
-
-	UT_LIST_ADD_LAST(table_LRU, dict_sys->table_LRU, table);
-
-	table->can_be_evicted = TRUE;
-}
-
-/**********************************************************************//**
-Looks for an index with the given id given a table instance.
-@return	index or NULL */
-UNIV_INTERN
+/** Looks for an index with the given id given a table instance.
+@param[in]	table	table instance
+@param[in]	id	index id
+@return index or NULL */
 dict_index_t*
 dict_table_find_index_on_id(
-/*========================*/
-	const dict_table_t*	table,	/*!< in: table instance */
-	index_id_t		id)	/*!< in: index id */
+	const dict_table_t*	table,
+	index_id_t		id)
 {
 	dict_index_t*	index;
 
@@ -1592,8 +1700,7 @@ dict_table_find_index_on_id(
 Looks for an index with the given id. NOTE that we do not reserve
 the dictionary mutex: this function is for emergency purposes like
 printing info of a corrupt database page!
-@return	index or NULL if not found in cache */
-UNIV_INTERN
+@return index or NULL if not found in cache */
 dict_index_t*
 dict_index_find_on_id_low(
 /*======================*/
@@ -1648,8 +1755,7 @@ struct dict_foreign_remove_partial
 
 /**********************************************************************//**
 Renames a table object.
-@return	TRUE if success */
-UNIV_INTERN
+@return TRUE if success */
 dberr_t
 dict_table_rename_in_cache(
 /*=======================*/
@@ -1665,19 +1771,16 @@ dict_table_rename_in_cache(
 	ulint		fold;
 	char		old_name[MAX_FULL_NAME_LEN + 1];
 	os_file_type_t	ftype;
-	ibool		exists;
 
-	ut_ad(mutex_own(&(dict_sys->mutex)));
+	ut_ad(mutex_own(&dict_sys->mutex));
 
 	/* store the old/current name to an automatic variable */
-	if (strlen(table->name) + 1 <= sizeof(old_name)) {
-		memcpy(old_name, table->name, strlen(table->name) + 1);
+	if (strlen(table->name.m_name) + 1 <= sizeof(old_name)) {
+		strcpy(old_name, table->name.m_name);
 	} else {
-		ut_print_timestamp(stderr);
-		fprintf(stderr, "InnoDB: too long table name: '%s', "
-			"max length is %d\n", table->name,
-			MAX_FULL_NAME_LEN);
-		ut_error;
+		ib::fatal() << "Too long table name: "
+			<< table->name
+			<< ", max length is " << MAX_FULL_NAME_LEN;
 	}
 
 	fold = ut_fold_string(new_name);
@@ -1686,16 +1789,15 @@ dict_table_rename_in_cache(
 	dict_table_t*	table2;
 	HASH_SEARCH(name_hash, dict_sys->table_hash, fold,
 			dict_table_t*, table2, ut_ad(table2->cached),
-			(ut_strcmp(table2->name, new_name) == 0));
+			(ut_strcmp(table2->name.m_name, new_name) == 0));
 	DBUG_EXECUTE_IF("dict_table_rename_in_cache_failure",
 		if (table2 == NULL) {
 			table2 = (dict_table_t*) -1;
 		} );
 	if (table2) {
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Cannot rename table '%s' to '%s' since the "
-			"dictionary cache already contains '%s'.",
-			old_name, new_name, new_name);
+		ib::error() << "Cannot rename table '" << old_name
+			<< "' to '" << new_name << "' since the"
+			" dictionary cache already contains '" << new_name << "'.";
 		return(DB_ERROR);
 	}
 
@@ -1703,19 +1805,28 @@ dict_table_rename_in_cache(
 	.ibd file and rebuild the .isl file if needed. */
 
 	if (dict_table_is_discarded(table)) {
+		bool		exists;
 		char*		filepath;
 
-		ut_ad(table->space != TRX_SYS_SPACE);
+		ut_ad(dict_table_is_file_per_table(table));
+		ut_ad(!dict_table_is_temporary(table));
+
+		/* Make sure the data_dir_path is set. */
+		dict_get_and_save_data_dir_path(table, true);
 
 		if (DICT_TF_HAS_DATA_DIR(table->flags)) {
-
-			dict_get_and_save_data_dir_path(table, true);
 			ut_a(table->data_dir_path);
 
-			filepath = os_file_make_remote_pathname(
-				table->data_dir_path, table->name, "ibd");
+			filepath = fil_make_filepath(
+				table->data_dir_path, table->name.m_name,
+				IBD, true);
 		} else {
-			filepath = fil_make_ibd_name(table->name, false);
+			filepath = fil_make_filepath(
+				NULL, table->name.m_name, IBD, false);
+		}
+
+		if (filepath == NULL) {
+			return(DB_OUT_OF_MEMORY);
 		}
 
 		fil_delete_tablespace(table->space, BUF_REMOVE_ALL_NO_WRITE);
@@ -1723,28 +1834,20 @@ dict_table_rename_in_cache(
 		/* Delete any temp file hanging around. */
 		if (os_file_status(filepath, &exists, &ftype)
 		    && exists
-		    && !os_file_delete_if_exists(innodb_file_temp_key,
-						 filepath)) {
+		    && !os_file_delete_if_exists(innodb_temp_file_key,
+						 filepath, NULL)) {
 
-			ib_logf(IB_LOG_LEVEL_INFO,
-				"Delete of %s failed.", filepath);
+			ib::info() << "Delete of " << filepath << " failed.";
 		}
 
-		mem_free(filepath);
-
-	} else if (table->space != TRX_SYS_SPACE) {
-		if (DICT_TF2_FLAG_IS_SET(table, DICT_TF2_TEMPORARY)) {
-			ut_print_timestamp(stderr);
-			fputs("  InnoDB: Error: trying to rename a"
-			      " TEMPORARY TABLE ", stderr);
-			ut_print_name(stderr, NULL, TRUE, old_name);
-			if (table->dir_path_of_temp_table != NULL) {
-				fputs(" (", stderr);
-				ut_print_filename(
-					stderr, table->dir_path_of_temp_table);
-				fputs(" )\n", stderr);
-			}
+		ut_free(filepath);
 
+	} else if (dict_table_is_file_per_table(table)) {
+		if (table->dir_path_of_temp_table != NULL) {
+			ib::error() << "Trying to rename a TEMPORARY TABLE "
+				<< old_name
+				<< " ( " << table->dir_path_of_temp_table
+				<< " )";
 			return(DB_ERROR);
 		}
 
@@ -1755,35 +1858,39 @@ dict_table_rename_in_cache(
 			new_path = os_file_make_new_pathname(
 				old_path, new_name);
 
-			err = fil_create_link_file(new_name, new_path);
+			err = RemoteDatafile::create_link_file(
+				new_name, new_path);
+
 			if (err != DB_SUCCESS) {
-				mem_free(new_path);
-				mem_free(old_path);
+				ut_free(new_path);
+				ut_free(old_path);
 				return(DB_TABLESPACE_EXISTS);
 			}
 		} else {
-			new_path = fil_make_ibd_name(new_name, false);
+			new_path = fil_make_filepath(
+				NULL, new_name, IBD, false);
 		}
 
 		/* New filepath must not exist. */
 		err = fil_rename_tablespace_check(
 			table->space, old_path, new_path, false);
 		if (err != DB_SUCCESS) {
-			mem_free(old_path);
-			mem_free(new_path);
+			ut_free(old_path);
+			ut_free(new_path);
 			return(err);
 		}
 
-		ibool	success = fil_rename_tablespace(
-			old_name, table->space, new_name, new_path);
+		bool	success = fil_rename_tablespace(
+			table->space, old_path, new_name, new_path);
 
-		mem_free(old_path);
-		mem_free(new_path);
+		ut_free(old_path);
+		ut_free(new_path);
 
 		/* If the tablespace is remote, a new .isl file was created
-		If success, delete the old one. If not, delete the new one.  */
+		If success, delete the old one. If not, delete the new one. */
 		if (DICT_TF_HAS_DATA_DIR(table->flags)) {
-			fil_delete_link_file(success ? old_name : new_name);
+			RemoteDatafile::delete_link_file(
+				success ? old_name : new_name);
 		}
 
 		if (!success) {
@@ -1795,16 +1902,16 @@ dict_table_rename_in_cache(
 	HASH_DELETE(dict_table_t, name_hash, dict_sys->table_hash,
 		    ut_fold_string(old_name), table);
 
-	if (strlen(new_name) > strlen(table->name)) {
+	if (strlen(new_name) > strlen(table->name.m_name)) {
 		/* We allocate MAX_FULL_NAME_LEN + 1 bytes here to avoid
 		memory fragmentation, we assume a repeated calls of
 		ut_realloc() with the same size do not cause fragmentation */
 		ut_a(strlen(new_name) <= MAX_FULL_NAME_LEN);
 
-		table->name = static_cast<char*>(
-			ut_realloc(table->name, MAX_FULL_NAME_LEN + 1));
+		table->name.m_name = static_cast<char*>(
+			ut_realloc(table->name.m_name, MAX_FULL_NAME_LEN + 1));
 	}
-	memcpy(table->name, new_name, strlen(new_name) + 1);
+	strcpy(table->name.m_name, new_name);
 
 	/* Add table to hash table of tables */
 	HASH_INSERT(dict_table_t, name_hash, dict_sys->table_hash, fold,
@@ -1818,7 +1925,7 @@ dict_table_rename_in_cache(
 	     index != NULL;
 	     index = dict_table_get_next_index(index)) {
 
-		index->table_name = table->name;
+		index->table_name = table->name.m_name;
 	}
 
 	if (!rename_also_foreigns) {
@@ -1876,15 +1983,16 @@ dict_table_rename_in_cache(
 		}
 
 		if (ut_strlen(foreign->foreign_table_name)
-		    < ut_strlen(table->name)) {
+		    < ut_strlen(table->name.m_name)) {
 			/* Allocate a longer name buffer;
 			TODO: store buf len to save memory */
 
 			foreign->foreign_table_name = mem_heap_strdup(
-				foreign->heap, table->name);
+				foreign->heap, table->name.m_name);
 			dict_mem_foreign_table_name_lookup_set(foreign, TRUE);
 		} else {
-			strcpy(foreign->foreign_table_name, table->name);
+			strcpy(foreign->foreign_table_name,
+			       table->name.m_name);
 			dict_mem_foreign_table_name_lookup_set(foreign, FALSE);
 		}
 		if (strchr(foreign->id, '/')) {
@@ -1960,20 +2068,21 @@ dict_table_rename_in_cache(
 				char	table_name[MAX_TABLE_NAME_LEN] = "";
 				uint	errors = 0;
 
-				if (strlen(table->name) > strlen(old_name)) {
+				if (strlen(table->name.m_name)
+				    > strlen(old_name)) {
 					foreign->id = static_cast<char*>(
 						mem_heap_alloc(
 						foreign->heap,
-						strlen(table->name)
+						strlen(table->name.m_name)
 						+ strlen(old_id) + 1));
 				}
 
 				/* Convert the table name to UTF-8 */
-				strncpy(table_name, table->name,
+				strncpy(table_name, table->name.m_name,
 					MAX_TABLE_NAME_LEN);
 				innobase_convert_to_system_charset(
 					strchr(table_name, '/') + 1,
-					strchr(table->name, '/') + 1,
+					strchr(table->name.m_name, '/') + 1,
 					MAX_TABLE_NAME_LEN, &errors);
 
 				if (errors) {
@@ -1981,7 +2090,7 @@ dict_table_rename_in_cache(
 					from charset my_charset_filename to
 					UTF-8. This means that the table name
 					is already in UTF-8 (#mysql#50). */
-					strncpy(table_name, table->name,
+					strncpy(table_name, table->name.m_name,
 						MAX_TABLE_NAME_LEN);
 				}
 
@@ -2001,9 +2110,10 @@ dict_table_rename_in_cache(
 			} else {
 				/* This is a >= 4.0.18 format id where the user
 				gave the id name */
-				db_len = dict_get_db_name_len(table->name) + 1;
+				db_len = dict_get_db_name_len(
+					table->name.m_name) + 1;
 
-				if (dict_get_db_name_len(table->name)
+				if (db_len - 1
 				    > dict_get_db_name_len(foreign->id)) {
 
 					foreign->id = static_cast<char*>(
@@ -2015,13 +2125,14 @@ dict_table_rename_in_cache(
 				/* Replace the database prefix in id with the
 				one from table->name */
 
-				ut_memcpy(foreign->id, table->name, db_len);
+				ut_memcpy(foreign->id,
+					  table->name.m_name, db_len);
 
 				strcpy(foreign->id + db_len,
 				       dict_remove_db_name(old_id));
 			}
 
-			mem_free(old_id);
+			ut_free(old_id);
 		}
 
 		table->foreign_set.erase(it);
@@ -2042,18 +2153,19 @@ dict_table_rename_in_cache(
 		foreign = *it;
 
 		if (ut_strlen(foreign->referenced_table_name)
-		    < ut_strlen(table->name)) {
+		    < ut_strlen(table->name.m_name)) {
 			/* Allocate a longer name buffer;
 			TODO: store buf len to save memory */
 
 			foreign->referenced_table_name = mem_heap_strdup(
-				foreign->heap, table->name);
+				foreign->heap, table->name.m_name);
 
 			dict_mem_referenced_table_name_lookup_set(
 				foreign, TRUE);
 		} else {
 			/* Use the same buffer */
-			strcpy(foreign->referenced_table_name, table->name);
+			strcpy(foreign->referenced_table_name,
+			       table->name.m_name);
 
 			dict_mem_referenced_table_name_lookup_set(
 				foreign, FALSE);
@@ -2066,7 +2178,6 @@ dict_table_rename_in_cache(
 /**********************************************************************//**
 Change the id of a table object in the dictionary cache. This is used in
 DISCARD TABLESPACE. */
-UNIV_INTERN
 void
 dict_table_change_id_in_cache(
 /*==========================*/
@@ -2074,7 +2185,7 @@ dict_table_change_id_in_cache(
 	table_id_t	new_id)	/*!< in: new id to set */
 {
 	ut_ad(table);
-	ut_ad(mutex_own(&(dict_sys->mutex)));
+	ut_ad(mutex_own(&dict_sys->mutex));
 	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
 
 	/* Remove the table from the hash table of id's */
@@ -2099,13 +2210,13 @@ dict_table_remove_from_cache_low(
 {
 	dict_foreign_t*	foreign;
 	dict_index_t*	index;
-	ulint		size;
+	lint		size;
 
 	ut_ad(table);
 	ut_ad(dict_lru_validate());
-	ut_a(table->n_ref_count == 0);
+	ut_a(table->get_ref_count() == 0);
 	ut_a(table->n_rec_locks == 0);
-	ut_ad(mutex_own(&(dict_sys->mutex)));
+	ut_ad(mutex_own(&dict_sys->mutex));
 	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
 
 	/* Remove the foreign constraints from the cache */
@@ -2135,7 +2246,7 @@ dict_table_remove_from_cache_low(
 	/* Remove table from the hash tables of tables */
 
 	HASH_DELETE(dict_table_t, name_hash, dict_sys->table_hash,
-		    ut_fold_string(table->name), table);
+		    ut_fold_string(table->name.m_name), table);
 
 	HASH_DELETE(dict_table_t, id_hash, dict_sys->table_id_hash,
 		    ut_fold_ull(table->id), table);
@@ -2143,10 +2254,10 @@ dict_table_remove_from_cache_low(
 	/* Remove table from LRU or non-LRU list. */
 	if (table->can_be_evicted) {
 		ut_ad(dict_lru_find_table(table));
-		UT_LIST_REMOVE(table_LRU, dict_sys->table_LRU, table);
+		UT_LIST_REMOVE(dict_sys->table_LRU, table);
 	} else {
 		ut_ad(dict_non_lru_find_table(table));
-		UT_LIST_REMOVE(table_LRU, dict_sys->table_non_LRU, table);
+		UT_LIST_REMOVE(dict_sys->table_non_LRU, table);
 	}
 
 	ut_ad(dict_lru_validate());
@@ -2161,25 +2272,30 @@ dict_table_remove_from_cache_low(
 		trx_t* trx = trx_allocate_for_background();
 
 		ut_ad(mutex_own(&dict_sys->mutex));
-#ifdef UNIV_SYNC_DEBUG
-		ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+		ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
+
 		/* Mimic row_mysql_lock_data_dictionary(). */
 		trx->dict_operation_lock_mode = RW_X_LATCH;
 
 		trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
 
 		/* Silence a debug assertion in row_merge_drop_indexes(). */
-		ut_d(table->n_ref_count++);
+		ut_d(table->acquire());
 		row_merge_drop_indexes(trx, table, TRUE);
-		ut_d(table->n_ref_count--);
-		ut_ad(table->n_ref_count == 0);
+		ut_d(table->release());
+		ut_ad(table->get_ref_count() == 0);
 		trx_commit_for_mysql(trx);
 		trx->dict_operation_lock_mode = 0;
 		trx_free_for_background(trx);
 	}
 
-	size = mem_heap_get_size(table->heap) + strlen(table->name) + 1;
+	/* Free virtual column template if any */
+	if (table->vc_templ != NULL) {
+		dict_free_vc_templ(table->vc_templ);
+		UT_DELETE(table->vc_templ);
+	}
+
+	size = mem_heap_get_size(table->heap) + strlen(table->name.m_name) + 1;
 
 	ut_ad(dict_sys->size >= size);
 
@@ -2190,7 +2306,6 @@ dict_table_remove_from_cache_low(
 
 /**********************************************************************//**
 Removes a table object from the dictionary cache. */
-UNIV_INTERN
 void
 dict_table_remove_from_cache(
 /*=========================*/
@@ -2202,8 +2317,7 @@ dict_table_remove_from_cache(
 /****************************************************************//**
 If the given column name is reserved for InnoDB system columns, return
 TRUE.
-@return	TRUE if name is reserved */
-UNIV_INTERN
+@return TRUE if name is reserved */
 ibool
 dict_col_name_is_reserved(
 /*======================*/
@@ -2231,159 +2345,107 @@ dict_col_name_is_reserved(
 	return(FALSE);
 }
 
-#if 1	/* This function is not very accurate at determining
-	whether an UNDO record will be too big. See innodb_4k.test,
-	Bug 13336585, for a testcase that shows an index that can
-	be created but cannot be updated. */
-
 /****************************************************************//**
-If an undo log record for this table might not fit on a single page,
-return TRUE.
-@return	TRUE if the undo log record could become too big */
-static
-ibool
-dict_index_too_big_for_undo(
-/*========================*/
-	const dict_table_t*	table,		/*!< in: table */
-	const dict_index_t*	new_index)	/*!< in: index */
+Return maximum size of the node pointer record.
+@return maximum size of the record in bytes */
+ulint
+dict_index_node_ptr_max_size(
+/*=========================*/
+	const dict_index_t*	index)	/*!< in: index */
 {
-	/* Make sure that all column prefixes will fit in the undo log record
-	in trx_undo_page_report_modify() right after trx_undo_page_init(). */
+	ulint	comp;
+	ulint	i;
+	/* maximum possible storage size of a record */
+	ulint	rec_max_size;
 
-	ulint			i;
-	const dict_index_t*	clust_index
-		= dict_table_get_first_index(table);
-	ulint			undo_page_len
-		= TRX_UNDO_PAGE_HDR - TRX_UNDO_PAGE_HDR_SIZE
-		+ 2 /* next record pointer */
-		+ 1 /* type_cmpl */
-		+ 11 /* trx->undo_no */ + 11 /* table->id */
-		+ 1 /* rec_get_info_bits() */
-		+ 11 /* DB_TRX_ID */
-		+ 11 /* DB_ROLL_PTR */
-		+ 10 + FIL_PAGE_DATA_END /* trx_undo_left() */
-		+ 2/* pointer to previous undo log record */;
-
-	/* FTS index consists of auxiliary tables, they shall be excluded from
-	index row size check */
-	if (new_index->type & DICT_FTS) {
-		return(false);
+	if (dict_index_is_ibuf(index)) {
+		/* cannot estimate accurately */
+		/* This is universal index for change buffer.
+		The max size of the entry is about max key length * 2.
+		(index key + primary key to be inserted to the index)
+		(The max key length is UNIV_PAGE_SIZE / 16 * 3 at
+		 ha_innobase::max_supported_key_length(),
+		 considering MAX_KEY_LENGTH = 3072 at MySQL imposes
+		 the 3500 historical InnoDB value for 16K page size case.)
+		For the universal index, node_ptr contains most of the entry.
+		And 512 is enough to contain ibuf columns and meta-data */
+		return(UNIV_PAGE_SIZE / 8 * 3 + 512);
 	}
 
-	if (!clust_index) {
-		ut_a(dict_index_is_clust(new_index));
-		clust_index = new_index;
+	comp = dict_table_is_comp(index->table);
+
+	/* Each record has page_no, length of page_no and header. */
+	rec_max_size = comp
+		? REC_NODE_PTR_SIZE + 1 + REC_N_NEW_EXTRA_BYTES
+		: REC_NODE_PTR_SIZE + 2 + REC_N_OLD_EXTRA_BYTES;
+
+	if (comp) {
+		/* Include the "null" flags in the
+		maximum possible record size. */
+		rec_max_size += UT_BITS_IN_BYTES(index->n_nullable);
+	} else {
+		/* For each column, include a 2-byte offset and a
+		"null" flag. */
+		rec_max_size += 2 * index->n_fields;
 	}
 
-	/* Add the size of the ordering columns in the
-	clustered index. */
-	for (i = 0; i < clust_index->n_uniq; i++) {
+	/* Compute the maximum possible record size. */
+	for (i = 0; i < dict_index_get_n_unique_in_tree(index); i++) {
+		const dict_field_t*	field
+			= dict_index_get_nth_field(index, i);
 		const dict_col_t*	col
-			= dict_index_get_nth_col(clust_index, i);
+			= dict_field_get_col(field);
+		ulint			field_max_size;
+		ulint			field_ext_max_size;
 
-		/* Use the maximum output size of
-		mach_write_compressed(), although the encoded
-		length should always fit in 2 bytes. */
-		undo_page_len += 5 + dict_col_get_max_size(col);
-	}
+		/* Determine the maximum length of the index field. */
 
-	/* Add the old values of the columns to be updated.
-	First, the amount and the numbers of the columns.
-	These are written by mach_write_compressed() whose
-	maximum output length is 5 bytes.  However, given that
-	the quantities are below REC_MAX_N_FIELDS (10 bits),
-	the maximum length is 2 bytes per item. */
-	undo_page_len += 2 * (dict_table_get_n_cols(table) + 1);
-
-	for (i = 0; i < clust_index->n_def; i++) {
-		const dict_col_t*	col
-			= dict_index_get_nth_col(clust_index, i);
-		ulint			max_size
-			= dict_col_get_max_size(col);
-		ulint			fixed_size
-			= dict_col_get_fixed_size(col,
-						  dict_table_is_comp(table));
-		ulint			max_prefix
-			= col->max_prefix;
-
-		if (fixed_size) {
-			/* Fixed-size columns are stored locally. */
-			max_size = fixed_size;
-		} else if (max_size <= BTR_EXTERN_FIELD_REF_SIZE * 2) {
-			/* Short columns are stored locally. */
-		} else if (!col->ord_part
-			   || (col->max_prefix
-			       < (ulint) DICT_MAX_FIELD_LEN_BY_FORMAT(table))) {
-			/* See if col->ord_part would be set
-			because of new_index. Also check if the new
-			index could have longer prefix on columns
-			that already had ord_part set  */
-			ulint	j;
-
-			for (j = 0; j < new_index->n_uniq; j++) {
-				if (dict_index_get_nth_col(
-					    new_index, j) == col) {
-					const dict_field_t*     field
-						= dict_index_get_nth_field(
-							new_index, j);
-
-					if (field->prefix_len
-					    > col->max_prefix) {
-						max_prefix =
-							 field->prefix_len;
-					}
-
-					goto is_ord_part;
-				}
-			}
-
-			if (col->ord_part) {
-				goto is_ord_part;
-			}
-
-			/* This is not an ordering column in any index.
-			Thus, it can be stored completely externally. */
-			max_size = BTR_EXTERN_FIELD_REF_SIZE;
-		} else {
-			ulint	max_field_len;
-is_ord_part:
-			max_field_len = DICT_MAX_FIELD_LEN_BY_FORMAT(table);
-
-			/* This is an ordering column in some index.
-			A long enough prefix must be written to the
-			undo log.  See trx_undo_page_fetch_ext(). */
-			max_size = ut_min(max_size, max_field_len);
-
-			/* We only store the needed prefix length in undo log */
-			if (max_prefix) {
-			     ut_ad(dict_table_get_format(table)
-				   >= UNIV_FORMAT_B);
-
-				max_size = ut_min(max_prefix, max_size);
-			}
-
-			max_size += BTR_EXTERN_FIELD_REF_SIZE;
+		field_max_size = dict_col_get_fixed_size(col, comp);
+		if (field_max_size) {
+			/* dict_index_add_col() should guarantee this */
+			ut_ad(!field->prefix_len
+			      || field->fixed_len == field->prefix_len);
+			/* Fixed lengths are not encoded
+			in ROW_FORMAT=COMPACT. */
+			rec_max_size += field_max_size;
+			continue;
 		}
 
-		undo_page_len += 5 + max_size;
+		field_max_size = dict_col_get_max_size(col);
+		field_ext_max_size = field_max_size < 256 ? 1 : 2;
+
+		if (field->prefix_len
+		    && field->prefix_len < field_max_size) {
+			field_max_size = field->prefix_len;
+		}
+
+		if (comp) {
+			/* Add the extra size for ROW_FORMAT=COMPACT.
+			For ROW_FORMAT=REDUNDANT, these bytes were
+			added to rec_max_size before this loop. */
+			rec_max_size += field_ext_max_size;
+		}
+
+		rec_max_size += field_max_size;
 	}
 
-	return(undo_page_len >= UNIV_PAGE_SIZE);
+	return(rec_max_size);
 }
-#endif
 
 /****************************************************************//**
 If a record of this index might not fit on a single B-tree page,
 return TRUE.
-@return	TRUE if the index record could become too big */
+@return TRUE if the index record could become too big */
 static
 ibool
 dict_index_too_big_for_tree(
 /*========================*/
 	const dict_table_t*	table,		/*!< in: table */
-	const dict_index_t*	new_index)	/*!< in: index */
+	const dict_index_t*	new_index,	/*!< in: index */
+	bool			strict)		/*!< in: TRUE=report error if
+						records could be too big to
+						fit in an B-tree page */
 {
-	ulint	zip_size;
 	ulint	comp;
 	ulint	i;
 	/* maximum possible storage size of a record */
@@ -2404,20 +2466,22 @@ dict_index_too_big_for_tree(
 		return(FALSE););
 
 	comp = dict_table_is_comp(table);
-	zip_size = dict_table_zip_size(table);
 
-	if (zip_size && zip_size < UNIV_PAGE_SIZE) {
+	const page_size_t	page_size(dict_table_page_size(table));
+
+	if (page_size.is_compressed()
+	    && page_size.physical() < univ_page_size.physical()) {
 		/* On a compressed page, two records must fit in the
-		uncompressed page modification log.  On compressed
-		pages with zip_size == UNIV_PAGE_SIZE, this limit will
-		never be reached. */
+		uncompressed page modification log. On compressed pages
+		with size.physical() == univ_page_size.physical(),
+		this limit will never be reached. */
 		ut_ad(comp);
 		/* The maximum allowed record size is the size of
 		an empty page, minus a byte for recoding the heap
 		number in the page modification log.  The maximum
 		allowed node pointer size is half that. */
 		page_rec_max = page_zip_empty_size(new_index->n_fields,
-						   zip_size);
+						   page_size.physical());
 		if (page_rec_max) {
 			page_rec_max--;
 		}
@@ -2428,9 +2492,12 @@ dict_index_too_big_for_tree(
 		rec_max_size = 2;
 	} else {
 		/* The maximum allowed record size is half a B-tree
-		page.  No additional sparse page directory entry will
-		be generated for the first few user records. */
-		page_rec_max = page_get_free_space_of_empty(comp) / 2;
+		page(16k for 64k page size).  No additional sparse
+		page directory entry will be generated for the first
+		few user records. */
+		page_rec_max = srv_page_size == UNIV_PAGE_SIZE_MAX
+			? REC_MAX_DATA_SIZE - 1
+			: page_get_free_space_of_empty(comp) / 2;
 		page_ptr_max = page_rec_max;
 		/* Each record has a header. */
 		rec_max_size = comp
@@ -2461,7 +2528,7 @@ dict_index_too_big_for_tree(
 		ulint			field_ext_max_size;
 
 		/* In dtuple_convert_big_rec(), variable-length columns
-		that are longer than BTR_EXTERN_FIELD_REF_SIZE * 2
+		that are longer than BTR_EXTERN_LOCAL_STORED_MAX_SIZE
 		may be chosen for external storage.
 
 		Fixed-length columns, and all columns of secondary
@@ -2473,7 +2540,7 @@ dict_index_too_big_for_tree(
 		REC_STATUS_ORDINARY records. */
 
 		field_max_size = dict_col_get_fixed_size(col, comp);
-		if (field_max_size) {
+		if (field_max_size && field->fixed_len != 0) {
 			/* dict_index_add_col() should guarantee this */
 			ut_ad(!field->prefix_len
 			      || field->fixed_len == field->prefix_len);
@@ -2490,16 +2557,16 @@ dict_index_too_big_for_tree(
 			if (field->prefix_len < field_max_size) {
 				field_max_size = field->prefix_len;
 			}
-		} else if (field_max_size > BTR_EXTERN_FIELD_REF_SIZE * 2
+		} else if (field_max_size > BTR_EXTERN_LOCAL_STORED_MAX_SIZE
 			   && dict_index_is_clust(new_index)) {
 
 			/* In the worst case, we have a locally stored
-			column of BTR_EXTERN_FIELD_REF_SIZE * 2 bytes.
+			column of BTR_EXTERN_LOCAL_STORED_MAX_SIZE bytes.
 			The length can be stored in one byte.  If the
 			column were stored externally, the lengths in
 			the clustered index page would be
 			BTR_EXTERN_FIELD_REF_SIZE and 2. */
-			field_max_size = BTR_EXTERN_FIELD_REF_SIZE * 2;
+			field_max_size = BTR_EXTERN_LOCAL_STORED_MAX_SIZE;
 			field_ext_max_size = 1;
 		}
 
@@ -2513,7 +2580,15 @@ add_field_size:
 		rec_max_size += field_max_size;
 
 		/* Check the size limit on leaf pages. */
-		if (UNIV_UNLIKELY(rec_max_size >= page_rec_max)) {
+		if (rec_max_size >= page_rec_max) {
+			ib::error_or_warn(strict)
+				<< "Cannot add field " << field->name
+				<< " in table " << table->name
+				<< " because after adding it, the row size is "
+				<< rec_max_size
+				<< " which is greater than maximum allowed"
+				" size (" << page_rec_max
+				<< ") for a record on index leaf page.";
 
 			return(TRUE);
 		}
@@ -2534,36 +2609,62 @@ add_field_size:
 	return(FALSE);
 }
 
-/**********************************************************************//**
-Adds an index to the dictionary cache.
-@return	DB_SUCCESS, DB_TOO_BIG_RECORD, or DB_CORRUPTION */
-UNIV_INTERN
+/** Adds an index to the dictionary cache.
+@param[in,out]	table	table on which the index is
+@param[in,out]	index	index; NOTE! The index memory
+			object is freed in this function!
+@param[in]	page_no	root page number of the index
+@param[in]	strict	TRUE=refuse to create the index
+			if records could be too big to fit in
+			an B-tree page
+@return DB_SUCCESS, DB_TOO_BIG_RECORD, or DB_CORRUPTION */
 dberr_t
 dict_index_add_to_cache(
-/*====================*/
-	dict_table_t*	table,	/*!< in: table on which the index is */
-	dict_index_t*	index,	/*!< in, own: index; NOTE! The index memory
-				object is freed in this function! */
-	ulint		page_no,/*!< in: root page number of the index */
-	ibool		strict)	/*!< in: TRUE=refuse to create the index
-				if records could be too big to fit in
-				an B-tree page */
+	dict_table_t*	table,
+	dict_index_t*	index,
+	ulint		page_no,
+	ibool		strict)
+{
+	return(dict_index_add_to_cache_w_vcol(
+		table, index, NULL, page_no, strict));
+}
+
+/** Adds an index to the dictionary cache, with possible indexing newly
+added column.
+@param[in,out]	table	table on which the index is
+@param[in,out]	index	index; NOTE! The index memory
+			object is freed in this function!
+@param[in]	add_v	new virtual column that being added along with
+			an add index call
+@param[in]	page_no	root page number of the index
+@param[in]	strict	TRUE=refuse to create the index
+			if records could be too big to fit in
+			an B-tree page
+@return DB_SUCCESS, DB_TOO_BIG_RECORD, or DB_CORRUPTION */
+dberr_t
+dict_index_add_to_cache_w_vcol(
+	dict_table_t*		table,
+	dict_index_t*		index,
+	const dict_add_v_col_t* add_v,
+	ulint			page_no,
+	ibool			strict)
 {
 	dict_index_t*	new_index;
 	ulint		n_ord;
 	ulint		i;
 
 	ut_ad(index);
-	ut_ad(mutex_own(&(dict_sys->mutex)));
+	ut_ad(mutex_own(&dict_sys->mutex) || dict_table_is_intrinsic(table));
 	ut_ad(index->n_def == index->n_fields);
 	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
 	ut_ad(!dict_index_is_online_ddl(index));
+	ut_ad(!dict_index_is_ibuf(index));
 
-	ut_ad(mem_heap_validate(index->heap));
+	ut_d(mem_heap_validate(index->heap));
 	ut_a(!dict_index_is_clust(index)
 	     || UT_LIST_GET_LEN(table->indexes) == 0);
 
-	if (!dict_index_find_cols(table, index)) {
+	if (!dict_index_find_cols(table, index, add_v)) {
 
 		dict_mem_index_free(index);
 		return(DB_CORRUPTION);
@@ -2585,125 +2686,69 @@ dict_index_add_to_cache(
 
 	new_index->n_fields = new_index->n_def;
 	new_index->trx_id = index->trx_id;
+	new_index->set_committed(index->is_committed());
+	new_index->allow_duplicates = index->allow_duplicates;
+	new_index->nulls_equal = index->nulls_equal;
+	new_index->disable_ahi = index->disable_ahi;
 
-	if (dict_index_too_big_for_tree(table, new_index)) {
+	if (dict_index_too_big_for_tree(table, new_index, strict)) {
 
 		if (strict) {
-too_big:
 			dict_mem_index_free(new_index);
 			dict_mem_index_free(index);
 			return(DB_TOO_BIG_RECORD);
 		} else if (current_thd != NULL) {
 			/* Avoid the warning to be printed
 			during recovery. */
-			ib_warn_row_too_big(table);
+			ib_warn_row_too_big((const dict_table_t*)table);
 		}
 	}
 
-	if (dict_index_is_univ(index)) {
-		n_ord = new_index->n_fields;
-	} else {
-		n_ord = new_index->n_uniq;
-	}
-
-#if 1	/* The following code predetermines whether to call
-	dict_index_too_big_for_undo().  This function is not
-	accurate. See innodb_4k.test, Bug 13336585, for a
-	testcase that shows an index that can be created but
-	cannot be updated. */
-
-	switch (dict_table_get_format(table)) {
-	case UNIV_FORMAT_A:
-		/* ROW_FORMAT=REDUNDANT and ROW_FORMAT=COMPACT store
-		prefixes of externally stored columns locally within
-		the record.  There are no special considerations for
-		the undo log record size. */
-		goto undo_size_ok;
-
-	case UNIV_FORMAT_B:
-		/* In ROW_FORMAT=DYNAMIC and ROW_FORMAT=COMPRESSED,
-		column prefix indexes require that prefixes of
-		externally stored columns are written to the undo log.
-		This may make the undo log record bigger than the
-		record on the B-tree page.  The maximum size of an
-		undo log record is the page size.  That must be
-		checked for below. */
-		break;
-
-#if UNIV_FORMAT_B != UNIV_FORMAT_MAX
-# error "UNIV_FORMAT_B != UNIV_FORMAT_MAX"
-#endif
-	}
-
-	for (i = 0; i < n_ord; i++) {
-		const dict_field_t*	field
-			= dict_index_get_nth_field(new_index, i);
-		const dict_col_t*	col
-			= dict_field_get_col(field);
-
-		/* In dtuple_convert_big_rec(), variable-length columns
-		that are longer than BTR_EXTERN_FIELD_REF_SIZE * 2
-		may be chosen for external storage.  If the column appears
-		in an ordering column of an index, a longer prefix determined
-		by dict_max_field_len_store_undo() will be copied to the undo
-		log by trx_undo_page_report_modify() and
-		trx_undo_page_fetch_ext().  It suffices to check the
-		capacity of the undo log whenever new_index includes
-		a column prefix on a column that may be stored externally. */
-
-		if (field->prefix_len /* prefix index */
-		    && (!col->ord_part /* not yet ordering column */
-			|| field->prefix_len > col->max_prefix)
-		    && !dict_col_get_fixed_size(col, TRUE) /* variable-length */
-		    && dict_col_get_max_size(col)
-		    > BTR_EXTERN_FIELD_REF_SIZE * 2 /* long enough */) {
-
-			if (dict_index_too_big_for_undo(table, new_index)) {
-				/* An undo log record might not fit in
-				a single page.  Refuse to create this index. */
-
-				goto too_big;
-			}
-
-			break;
-		}
-	}
-
-undo_size_ok:
-#endif
+	n_ord = new_index->n_uniq;
 	/* Flag the ordering columns and also set column max_prefix */
 
 	for (i = 0; i < n_ord; i++) {
 		const dict_field_t*	field
 			= dict_index_get_nth_field(new_index, i);
 
-		field->col->ord_part = 1;
-
-		if (field->prefix_len > field->col->max_prefix) {
+		/* Check the column being added in the index for
+		the first time and flag the ordering column. */
+		if (field->col->ord_part == 0 ) {
+			field->col->max_prefix = field->prefix_len;
+			field->col->ord_part = 1;
+		} else if (field->prefix_len == 0) {
+			/* Set the max_prefix for a column to 0 if
+			its prefix length is 0 (for this index)
+			even if it was a part of any other index
+			with some prefix length. */
+			field->col->max_prefix = 0;
+		} else if (field->col->max_prefix != 0
+			   && field->prefix_len
+			   > field->col->max_prefix) {
+			/* Set the max_prefix value based on the
+			prefix_len. */
 			field->col->max_prefix = field->prefix_len;
 		}
+		ut_ad(field->col->ord_part == 1);
 	}
 
-	if (!dict_index_is_univ(new_index)) {
-
-		new_index->stat_n_diff_key_vals =
-			static_cast<ib_uint64_t*>(mem_heap_zalloc(
+	new_index->stat_n_diff_key_vals =
+		static_cast<ib_uint64_t*>(mem_heap_zalloc(
 			new_index->heap,
 			dict_index_get_n_unique(new_index)
 			* sizeof(*new_index->stat_n_diff_key_vals)));
 
-		new_index->stat_n_sample_sizes =
-			static_cast<ib_uint64_t*>(mem_heap_zalloc(
+	new_index->stat_n_sample_sizes =
+		static_cast<ib_uint64_t*>(mem_heap_zalloc(
 			new_index->heap,
 			dict_index_get_n_unique(new_index)
 			* sizeof(*new_index->stat_n_sample_sizes)));
 
-		new_index->stat_n_non_null_key_vals =
-			static_cast<ib_uint64_t*>(mem_heap_zalloc(
+	new_index->stat_n_non_null_key_vals =
+		static_cast<ib_uint64_t*>(mem_heap_zalloc(
 			new_index->heap,
 			dict_index_get_n_unique(new_index)
 			* sizeof(*new_index->stat_n_non_null_key_vals)));
-	}
 
 	new_index->stat_index_size = 1;
 	new_index->stat_n_leaf_pages = 1;
@@ -2717,17 +2762,48 @@ undo_size_ok:
 
 	/* Add the new index as the last index for the table */
 
-	UT_LIST_ADD_LAST(indexes, table->indexes, new_index);
+	UT_LIST_ADD_LAST(table->indexes, new_index);
 	new_index->table = table;
-	new_index->table_name = table->name;
+	new_index->table_name = table->name.m_name;
 	new_index->search_info = btr_search_info_create(new_index->heap);
 
 	new_index->page = page_no;
 	rw_lock_create(index_tree_rw_lock_key, &new_index->lock,
-		       dict_index_is_ibuf(index)
-		       ? SYNC_IBUF_INDEX_TREE : SYNC_INDEX_TREE);
+		       SYNC_INDEX_TREE);
 
-	dict_sys->size += mem_heap_get_size(new_index->heap);
+	/* Intrinsic table are not added to dictionary cache instead are
+	cached to session specific thread cache. */
+	if (!dict_table_is_intrinsic(table)) {
+		dict_sys->size += mem_heap_get_size(new_index->heap);
+	}
+
+	/* Check if key part of the index is unique. */
+	if (dict_table_is_intrinsic(table)) {
+
+		new_index->rec_cache.fixed_len_key = true;
+		for (i = 0; i < new_index->n_uniq; i++) {
+
+			const dict_field_t*	field;
+			field = dict_index_get_nth_field(new_index, i);
+
+			if (!field->fixed_len) {
+				new_index->rec_cache.fixed_len_key = false;
+				break;
+			}
+		}
+
+		new_index->rec_cache.key_has_null_cols = false;
+		for (i = 0; i < new_index->n_uniq; i++) {
+
+			const dict_field_t*	field;
+			field = dict_index_get_nth_field(new_index, i);
+
+			if (!(field->col->prtype & DATA_NOT_NULL)) {
+				new_index->rec_cache.key_has_null_cols = true;
+				break;
+			}
+		}
+	}
 
 	dict_mem_index_free(index);
 
@@ -2745,14 +2821,14 @@ dict_index_remove_from_cache_low(
 	ibool		lru_evict)	/*!< in: TRUE if index being evicted
 					to make room in the table LRU list */
 {
-	ulint		size;
+	lint		size;
 	ulint		retries = 0;
 	btr_search_t*	info;
 
 	ut_ad(table && index);
 	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
 	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-	ut_ad(mutex_own(&(dict_sys->mutex)));
+	ut_ad(mutex_own(&dict_sys->mutex));
 
 	/* No need to acquire the dict_index_t::lock here because
 	there can't be any active operations on this index (or table). */
@@ -2777,7 +2853,7 @@ dict_index_remove_from_cache_low(
 	zero. See also: dict_table_can_be_evicted() */
 
 	do {
-		ulint ref_count = btr_search_info_get_ref_count(info);
+		ulint ref_count = btr_search_info_get_ref_count(info, index);
 
 		if (ref_count == 0) {
 			break;
@@ -2789,16 +2865,11 @@ dict_index_remove_from_cache_low(
 
 		if (retries % 500 == 0) {
 			/* No luck after 5 seconds of wait. */
-			fprintf(stderr, "InnoDB: Error: Waited for"
-				" %lu secs for hash index"
-				" ref_count (%lu) to drop"
-				" to 0.\n"
-				"index: \"%s\""
-				" table: \"%s\"\n",
-				retries/100,
-				ref_count,
-				index->name,
-				table->name);
+			ib::error() << "Waited for " << retries / 100
+				<< " secs for hash index"
+				" ref_count (" << ref_count << ") to drop to 0."
+				" index: " << index->name
+				<< " table: " << table->name;
 		}
 
 		/* To avoid a hang here we commit suicide if the
@@ -2810,11 +2881,52 @@ dict_index_remove_from_cache_low(
 
 	rw_lock_free(&index->lock);
 
+	/* The index is being dropped, remove any compression stats for it. */
+	if (!lru_evict && DICT_TF_GET_ZIP_SSIZE(index->table->flags)) {
+		mutex_enter(&page_zip_stat_per_index_mutex);
+		page_zip_stat_per_index.erase(index->id);
+		mutex_exit(&page_zip_stat_per_index_mutex);
+	}
+
 	/* Remove the index from the list of indexes of the table */
-	UT_LIST_REMOVE(indexes, table->indexes, index);
+	UT_LIST_REMOVE(table->indexes, index);
+
+	/* Remove the index from affected virtual column index list */
+	if (dict_index_has_virtual(index)) {
+		const dict_col_t*	col;
+		const dict_v_col_t*	vcol;
+
+		for (ulint i = 0; i < dict_index_get_n_fields(index); i++) {
+			col =  dict_index_get_nth_col(index, i);
+			if (dict_col_is_virtual(col)) {
+				vcol = reinterpret_cast<const dict_v_col_t*>(
+					col);
+
+				/* This could be NULL, when we do add virtual
+				column, add index together. We do not need to
+				track this virtual column's index */
+				if (vcol->v_indexes == NULL) {
+					continue;
+				}
+
+				dict_v_idx_list::iterator	it;
+
+				for (it = vcol->v_indexes->begin();
+				     it != vcol->v_indexes->end(); ++it) {
+					dict_v_idx_t	v_index = *it;
+					if (v_index.index == index) {
+						vcol->v_indexes->erase(it);
+						break;
+					}
+				}
+			}
+
+		}
+	}
 
 	size = mem_heap_get_size(index->heap);
 
+	ut_ad(!dict_table_is_intrinsic(table));
 	ut_ad(dict_sys->size >= size);
 
 	dict_sys->size -= size;
@@ -2824,7 +2936,6 @@ dict_index_remove_from_cache_low(
 
 /**********************************************************************//**
 Removes an index from the dictionary cache. */
-UNIV_INTERN
 void
 dict_index_remove_from_cache(
 /*=========================*/
@@ -2834,43 +2945,97 @@ dict_index_remove_from_cache(
 	dict_index_remove_from_cache_low(table, index, FALSE);
 }
 
-/*******************************************************************//**
-Tries to find column names for the index and sets the col field of the
+/** Tries to find column names for the index and sets the col field of the
 index.
+@param[in]	table	table
+@param[in,out]	index	index
+@param[in]	add_v	new virtual columns added along with an add index call
 @return TRUE if the column names were found */
 static
 ibool
 dict_index_find_cols(
-/*=================*/
-	dict_table_t*	table,	/*!< in: table */
-	dict_index_t*	index)	/*!< in: index */
+	const dict_table_t*	table,
+	dict_index_t*		index,
+	const dict_add_v_col_t*	add_v)
 {
-	ulint		i;
+	std::vector<ulint, ut_allocator<ulint> >	col_added;
+	std::vector<ulint, ut_allocator<ulint> >	v_col_added;
 
-	ut_ad(table && index);
+	ut_ad(table != NULL && index != NULL);
 	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
-	ut_ad(mutex_own(&(dict_sys->mutex)));
+	ut_ad(mutex_own(&dict_sys->mutex) || dict_table_is_intrinsic(table));
 
-	for (i = 0; i < index->n_fields; i++) {
+	for (ulint i = 0; i < index->n_fields; i++) {
 		ulint		j;
 		dict_field_t*	field = dict_index_get_nth_field(index, i);
 
 		for (j = 0; j < table->n_cols; j++) {
 			if (!innobase_strcasecmp(dict_table_get_col_name(table, j),
 				    field->name)) {
+
+				/* Check if same column is being assigned again
+				which suggest that column has duplicate name. */
+				bool exists =
+					std::find(col_added.begin(),
+						  col_added.end(), j)
+					!= col_added.end();
+
+				if (exists) {
+					/* Duplicate column found. */
+					goto dup_err;
+				}
+
 				field->col = dict_table_get_nth_col(table, j);
 
+				col_added.push_back(j);
+
 				goto found;
 			}
 		}
 
+		/* Let's check if it is a virtual column */
+		for (j = 0; j < table->n_v_cols; j++) {
+			if (!strcmp(dict_table_get_v_col_name(table, j),
+				    field->name)) {
+
+				/* Check if same column is being assigned again
+				which suggest that column has duplicate name. */
+				bool exists =
+					std::find(v_col_added.begin(),
+						  v_col_added.end(), j)
+					!= v_col_added.end();
+
+				if (exists) {
+					/* Duplicate column found. */
+					break;
+				}
+
+				field->col = reinterpret_cast<dict_col_t*>(
+					dict_table_get_nth_v_col(table, j));
+
+				v_col_added.push_back(j);
+
+				goto found;
+			}
+		}
+
+		if (add_v) {
+			for (j = 0; j < add_v->n_v_col; j++) {
+				if (!strcmp(add_v->v_col_name[j],
+					    field->name)) {
+					field->col = const_cast<dict_col_t*>(
+						&add_v->v_col[j].m_col);
+					goto found;
+				}
+			}
+		}
+
+dup_err:
 #ifdef UNIV_DEBUG
 		/* It is an error not to find a matching column. */
-		fputs("InnoDB: Error: no matching column for ", stderr);
-		ut_print_name(stderr, NULL, FALSE, field->name);
-		fputs(" in ", stderr);
-		dict_index_name_print(stderr, NULL, index);
-		fputs("!\n", stderr);
+		ib::error() << "No matching column for " << field->name
+			<< " in index " << index->name
+			<< " of table " << table->name;
 #endif /* UNIV_DEBUG */
 		return(FALSE);
 
@@ -2884,7 +3049,6 @@ found:
 
 /*******************************************************************//**
 Adds a column to index. */
-UNIV_INTERN
 void
 dict_index_add_col(
 /*===============*/
@@ -2896,15 +3060,50 @@ dict_index_add_col(
 	dict_field_t*	field;
 	const char*	col_name;
 
-	col_name = dict_table_get_col_name(table, dict_col_get_no(col));
+	if (dict_col_is_virtual(col)) {
+		dict_v_col_t*	v_col = reinterpret_cast<dict_v_col_t*>(col);
+
+		/* When v_col->v_indexes==NULL,
+		ha_innobase::commit_inplace_alter_table(commit=true)
+		will evict and reload the table definition, and
+		v_col->v_indexes will not be NULL for the new table. */
+		if (v_col->v_indexes != NULL) {
+			/* Register the index with the virtual column index
+			list */
+			struct dict_v_idx_t	new_idx
+				 = {index, index->n_def};
+
+			v_col->v_indexes->push_back(new_idx);
+
+		}
+
+		col_name = dict_table_get_v_col_name_mysql(
+			table, dict_col_get_no(col));
+	} else {
+		col_name = dict_table_get_col_name(table, dict_col_get_no(col));
+	}
 
 	dict_mem_index_add_field(index, col_name, prefix_len);
 
 	field = dict_index_get_nth_field(index, index->n_def - 1);
 
 	field->col = col;
-	field->fixed_len = (unsigned int) dict_col_get_fixed_size(
-		col, dict_table_is_comp(table));
+	/* DATA_POINT is a special type, whose fixed_len should be:
+	1) DATA_MBR_LEN, when it's indexed in R-TREE. In this case,
+	it must be the first col to be added.
+	2) DATA_POINT_LEN(be equal to fixed size of column), when it's
+	indexed in B-TREE,
+	3) DATA_POINT_LEN, if a POINT col is the PRIMARY KEY, and we are
+	adding the PK col to other B-TREE/R-TREE. */
+	/* TODO: We suppose the dimension is 2 now. */
+	if (dict_index_is_spatial(index) && DATA_POINT_MTYPE(col->mtype)
+	    && index->n_def == 1) {
+		field->fixed_len = DATA_MBR_LEN;
+	} else {
+		field->fixed_len = static_cast<unsigned int>(
+					dict_col_get_fixed_size(
+					col, dict_table_is_comp(table)));
+	}
 
 	if (prefix_len && field->fixed_len > prefix_len) {
 		field->fixed_len = (unsigned int) prefix_len;
@@ -2950,6 +3149,7 @@ dict_index_copy(
 	for (i = start; i < end; i++) {
 
 		field = dict_index_get_nth_field(index2, i);
+
 		dict_index_add_col(index1, table, field->col,
 				   field->prefix_len);
 	}
@@ -2957,7 +3157,6 @@ dict_index_copy(
 
 /*******************************************************************//**
 Copies types of fields contained in index to tuple. */
-UNIV_INTERN
 void
 dict_index_copy_types(
 /*==================*/
@@ -2968,7 +3167,7 @@ dict_index_copy_types(
 {
 	ulint		i;
 
-	if (dict_index_is_univ(index)) {
+	if (dict_index_is_ibuf(index)) {
 		dtuple_set_types_binary(tuple, n_fields);
 
 		return;
@@ -2981,14 +3180,45 @@ dict_index_copy_types(
 		ifield = dict_index_get_nth_field(index, i);
 		dfield_type = dfield_get_type(dtuple_get_nth_field(tuple, i));
 		dict_col_copy_type(dict_field_get_col(ifield), dfield_type);
+		if (dict_index_is_spatial(index)
+		    && DATA_GEOMETRY_MTYPE(dfield_type->mtype)) {
+			dfield_type->prtype |= DATA_GIS_MBR;
+		}
 	}
 }
 
+/** Copies types of virtual columns contained in table to tuple and sets all
+fields of the tuple to the SQL NULL value.  This function should
+be called right after dtuple_create().
+@param[in,out]	tuple	data tuple
+@param[in]	table	table
+*/
+void
+dict_table_copy_v_types(
+	dtuple_t*		tuple,
+	const dict_table_t*	table)
+{
+	/* tuple could have more virtual columns than existing table,
+	if we are calling this for creating index along with adding
+	virtual columns */
+	ulint	n_fields = ut_min(dtuple_get_n_v_fields(tuple),
+				  static_cast<ulint>(table->n_v_def));
+
+	for (ulint i = 0; i < n_fields; i++) {
+
+		dfield_t*	dfield	= dtuple_get_nth_v_field(tuple, i);
+		dtype_t*	dtype	= dfield_get_type(dfield);
+
+		dfield_set_null(dfield);
+		dict_col_copy_type(
+			&(dict_table_get_nth_v_col(table, i)->m_col),
+			dtype);
+	}
+}
 /*******************************************************************//**
 Copies types of columns contained in table to tuple and sets all
 fields of the tuple to the SQL NULL value.  This function should
 be called right after dtuple_create(). */
-UNIV_INTERN
 void
 dict_table_copy_types(
 /*==================*/
@@ -3005,13 +3235,14 @@ dict_table_copy_types(
 		dfield_set_null(dfield);
 		dict_col_copy_type(dict_table_get_nth_col(table, i), dtype);
 	}
+
+	dict_table_copy_v_types(tuple, table);
 }
 
 /********************************************************************
 Wait until all the background threads of the given table have exited, i.e.,
 bg_threads == 0. Note: bg_threads_mutex must be reserved when
 calling this. */
-UNIV_INTERN
 void
 dict_table_wait_for_bg_threads_to_exit(
 /*===================================*/
@@ -3021,9 +3252,7 @@ dict_table_wait_for_bg_threads_to_exit(
 {
 	fts_t*		fts = table->fts;
 
-#ifdef UNIV_SYNC_DEBUG
 	ut_ad(mutex_own(&fts->bg_threads_mutex));
-#endif /* UNIV_SYNC_DEBUG */
 
 	while (fts->bg_threads > 0) {
 		mutex_exit(&fts->bg_threads_mutex);
@@ -3037,7 +3266,7 @@ dict_table_wait_for_bg_threads_to_exit(
 /*******************************************************************//**
 Builds the internal dictionary cache representation for a clustered
 index, containing also system fields not defined by the user.
-@return	own: the internal representation of the clustered index */
+@return own: the internal representation of the clustered index */
 static
 dict_index_t*
 dict_index_build_internal_clust(
@@ -3054,11 +3283,13 @@ dict_index_build_internal_clust(
 
 	ut_ad(table && index);
 	ut_ad(dict_index_is_clust(index));
-	ut_ad(mutex_own(&(dict_sys->mutex)));
+	ut_ad(!dict_index_is_ibuf(index));
+
+	ut_ad(mutex_own(&dict_sys->mutex) || dict_table_is_intrinsic(table));
 	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
 
 	/* Create a new index object with certainly enough fields */
-	new_index = dict_mem_index_create(table->name,
+	new_index = dict_mem_index_create(table->name.m_name,
 					  index->name, table->space,
 					  index->type,
 					  index->n_fields + table->n_cols);
@@ -3073,12 +3304,7 @@ dict_index_build_internal_clust(
 	/* Copy the fields of index */
 	dict_index_copy(new_index, index, table, 0, index->n_fields);
 
-	if (dict_index_is_univ(index)) {
-		/* No fixed number of fields determines an entry uniquely */
-
-		new_index->n_uniq = REC_MAX_N_FIELDS;
-
-	} else if (dict_index_is_unique(index)) {
+	if (dict_index_is_unique(index)) {
 		/* Only the fields defined so far are needed to identify
 		the index entry uniquely */
 
@@ -3090,10 +3316,9 @@ dict_index_build_internal_clust(
 
 	new_index->trx_id_offset = 0;
 
-	if (!dict_index_is_ibuf(index)) {
-		/* Add system columns, trx id first */
+	/* Add system columns, trx id first */
 
-		trx_id_pos = new_index->n_def;
+	trx_id_pos = new_index->n_def;
 
 #if DATA_ROW_ID != 0
 # error "DATA_ROW_ID != 0"
@@ -3105,63 +3330,69 @@ dict_index_build_internal_clust(
 # error "DATA_ROLL_PTR != 2"
 #endif
 
-		if (!dict_index_is_unique(index)) {
-			dict_index_add_col(new_index, table,
-					   dict_table_get_sys_col(
-						   table, DATA_ROW_ID),
-					   0);
-			trx_id_pos++;
+	if (!dict_index_is_unique(index)) {
+		dict_index_add_col(new_index, table,
+				   dict_table_get_sys_col(
+					   table, DATA_ROW_ID),
+				   0);
+		trx_id_pos++;
+	}
+
+	dict_index_add_col(
+		new_index, table,
+		dict_table_get_sys_col(table, DATA_TRX_ID), 0);
+
+
+	for (i = 0; i < trx_id_pos; i++) {
+
+		ulint	fixed_size = dict_col_get_fixed_size(
+			dict_index_get_nth_col(new_index, i),
+			dict_table_is_comp(table));
+
+		if (fixed_size == 0) {
+			new_index->trx_id_offset = 0;
+
+			break;
 		}
 
-		dict_index_add_col(new_index, table,
-				   dict_table_get_sys_col(table, DATA_TRX_ID),
-				   0);
+		dict_field_t* field = dict_index_get_nth_field(
+			new_index, i);
+		if (field->prefix_len > 0) {
+			new_index->trx_id_offset = 0;
 
-		dict_index_add_col(new_index, table,
-				   dict_table_get_sys_col(table,
-							  DATA_ROLL_PTR),
-				   0);
-
-		for (i = 0; i < trx_id_pos; i++) {
-
-			ulint	fixed_size = dict_col_get_fixed_size(
-				dict_index_get_nth_col(new_index, i),
-				dict_table_is_comp(table));
-
-			if (fixed_size == 0) {
-				new_index->trx_id_offset = 0;
-
-				break;
-			}
-
-			if (dict_index_get_nth_field(new_index, i)->prefix_len
-			    > 0) {
-				new_index->trx_id_offset = 0;
-
-				break;
-			}
-
-			/* Add fixed_size to new_index->trx_id_offset.
-			Because the latter is a bit-field, an overflow
-			can theoretically occur. Check for it. */
-			fixed_size += new_index->trx_id_offset;
-
-			new_index->trx_id_offset = fixed_size;
-
-			if (new_index->trx_id_offset != fixed_size) {
-				/* Overflow. Pretend that this is a
-				variable-length PRIMARY KEY. */
-				ut_ad(0);
-				new_index->trx_id_offset = 0;
-				break;
-			}
+			break;
 		}
 
+		/* Add fixed_size to new_index->trx_id_offset.
+		Because the latter is a bit-field, an overflow
+		can theoretically occur. Check for it. */
+		fixed_size += new_index->trx_id_offset;
+
+		new_index->trx_id_offset = fixed_size;
+
+		if (new_index->trx_id_offset != fixed_size) {
+			/* Overflow. Pretend that this is a
+			variable-length PRIMARY KEY. */
+			ut_ad(0);
+			new_index->trx_id_offset = 0;
+			break;
+		}
+	}
+
+	/* UNDO logging is turned-off for intrinsic table and so
+	DATA_ROLL_PTR system columns are not added as default system
+	columns to such tables. */
+	if (!dict_table_is_intrinsic(table)) {
+
+		dict_index_add_col(
+			new_index, table,
+			dict_table_get_sys_col(table, DATA_ROLL_PTR),
+			0);
 	}
 
 	/* Remember the table columns already contained in new_index */
 	indexed = static_cast<ibool*>(
-		mem_zalloc(table->n_cols * sizeof *indexed));
+		ut_zalloc_nokey(table->n_cols * sizeof *indexed));
 
 	/* Mark the table columns already contained in new_index */
 	for (i = 0; i < new_index->n_def; i++) {
@@ -3179,7 +3410,8 @@ dict_index_build_internal_clust(
 
 	/* Add to new_index non-system columns of table not yet included
 	there */
-	for (i = 0; i + DATA_N_SYS_COLS < (ulint) table->n_cols; i++) {
+	ulint n_sys_cols = dict_table_get_n_sys_cols(table);
+	for (i = 0; i + n_sys_cols < (ulint) table->n_cols; i++) {
 
 		dict_col_t*	col = dict_table_get_nth_col(table, i);
 		ut_ad(col->mtype != DATA_SYS);
@@ -3189,10 +3421,9 @@ dict_index_build_internal_clust(
 		}
 	}
 
-	mem_free(indexed);
+	ut_free(indexed);
 
-	ut_ad(dict_index_is_ibuf(index)
-	      || (UT_LIST_GET_LEN(table->indexes) == 0));
+	ut_ad(UT_LIST_GET_LEN(table->indexes) == 0);
 
 	new_index->cached = TRUE;
 
@@ -3202,7 +3433,7 @@ dict_index_build_internal_clust(
 /*******************************************************************//**
 Builds the internal dictionary cache representation for a non-clustered
 index, containing also system fields not defined by the user.
-@return	own: the internal representation of the non-clustered index */
+@return own: the internal representation of the non-clustered index */
 static
 dict_index_t*
 dict_index_build_internal_non_clust(
@@ -3219,7 +3450,8 @@ dict_index_build_internal_non_clust(
 
 	ut_ad(table && index);
 	ut_ad(!dict_index_is_clust(index));
-	ut_ad(mutex_own(&(dict_sys->mutex)));
+	ut_ad(!dict_index_is_ibuf(index));
+	ut_ad(mutex_own(&dict_sys->mutex) || dict_table_is_intrinsic(table));
 	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
 
 	/* The clustered index should be the first in the list of indexes */
@@ -3227,11 +3459,11 @@ dict_index_build_internal_non_clust(
 
 	ut_ad(clust_index);
 	ut_ad(dict_index_is_clust(clust_index));
-	ut_ad(!dict_index_is_univ(clust_index));
+	ut_ad(!dict_index_is_ibuf(clust_index));
 
 	/* Create a new index */
 	new_index = dict_mem_index_create(
-		table->name, index->name, index->space, index->type,
+		table->name.m_name, index->name, index->space, index->type,
 		index->n_fields + 1 + clust_index->n_uniq);
 
 	/* Copy other relevant data from the old index
@@ -3246,13 +3478,17 @@ dict_index_build_internal_non_clust(
 
 	/* Remember the table columns already contained in new_index */
 	indexed = static_cast<ibool*>(
-		mem_zalloc(table->n_cols * sizeof *indexed));
+		ut_zalloc_nokey(table->n_cols * sizeof *indexed));
 
 	/* Mark the table columns already contained in new_index */
 	for (i = 0; i < new_index->n_def; i++) {
 
 		field = dict_index_get_nth_field(new_index, i);
 
+		if (dict_col_is_virtual(field->col)) {
+			continue;
+		}
+
 		/* If there is only a prefix of the column in the index
 		field, do not mark the column as contained in the index */
 
@@ -3272,10 +3508,15 @@ dict_index_build_internal_non_clust(
 		if (!indexed[field->col->ind]) {
 			dict_index_add_col(new_index, table, field->col,
 					   field->prefix_len);
+		} else if (dict_index_is_spatial(index)) {
+			/*For spatial index, we still need to add the
+			field to index. */
+			dict_index_add_col(new_index, table, field->col,
+					   field->prefix_len);
 		}
 	}
 
-	mem_free(indexed);
+	ut_free(indexed);
 
 	if (dict_index_is_unique(index)) {
 		new_index->n_uniq = index->n_fields;
@@ -3295,7 +3536,7 @@ dict_index_build_internal_non_clust(
 
 /***********************************************************************
 Builds the internal dictionary cache representation for an FTS index.
-@return	own: the internal representation of the FTS index */
+@return own: the internal representation of the FTS index */
 static
 dict_index_t*
 dict_index_build_internal_fts(
@@ -3307,14 +3548,12 @@ dict_index_build_internal_fts(
 
 	ut_ad(table && index);
 	ut_ad(index->type == DICT_FTS);
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(mutex_own(&(dict_sys->mutex)));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(mutex_own(&dict_sys->mutex));
 	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
 
 	/* Create a new index */
 	new_index = dict_mem_index_create(
-		table->name, index->name, index->space, index->type,
+		table->name.m_name, index->name, index->space, index->type,
 		index->n_fields);
 
 	/* Copy other relevant data from the old index struct to the new
@@ -3348,10 +3587,26 @@ dict_index_build_internal_fts(
 #define  DB_FOREIGN_KEY_COLS_NOT_EQUAL  202
 #define  DB_FOREIGN_KEY_INDEX_NOT_FOUND 203
 
+/** Check whether the dict_table_t is a partition.
+A partitioned table on the SQL level is composed of InnoDB tables,
+where each InnoDB table is a [sub]partition including its secondary indexes
+which belongs to the partition.
+@param[in]	table	Table to check.
+@return true if the dict_table_t is a partition else false. */
+UNIV_INLINE
+bool
+dict_table_is_partition(
+	const dict_table_t*	table)
+{
+	/* Check both P and p on all platforms in case it was moved to/from
+	WIN. */
+	return(strstr(table->name.m_name, "#p#")
+	       || strstr(table->name.m_name, "#P#"));
+}
+
 /*********************************************************************//**
 Checks if a table is referenced by foreign keys.
-@return	TRUE if table is referenced by a foreign key */
-UNIV_INTERN
+@return TRUE if table is referenced by a foreign key */
 ibool
 dict_table_is_referenced_by_foreign_key(
 /*====================================*/
@@ -3362,13 +3617,12 @@ dict_table_is_referenced_by_foreign_key(
 
 /**********************************************************************//**
 Removes a foreign constraint struct from the dictionary cache. */
-UNIV_INTERN
 void
 dict_foreign_remove_from_cache(
 /*===========================*/
 	dict_foreign_t*	foreign)	/*!< in, own: foreign constraint */
 {
-	ut_ad(mutex_own(&(dict_sys->mutex)));
+	ut_ad(mutex_own(&dict_sys->mutex));
 	ut_a(foreign);
 
 	if (foreign->referenced_table != NULL) {
@@ -3385,7 +3639,7 @@ dict_foreign_remove_from_cache(
 /**********************************************************************//**
 Looks for the foreign constraint from the foreign and referenced lists
 of a table.
-@return	foreign constraint */
+@return foreign constraint */
 static
 dict_foreign_t*
 dict_foreign_find(
@@ -3393,7 +3647,7 @@ dict_foreign_find(
 	dict_table_t*	table,		/*!< in: table object */
 	dict_foreign_t*	foreign)	/*!< in: foreign constraint */
 {
-	ut_ad(mutex_own(&(dict_sys->mutex)));
+	ut_ad(mutex_own(&dict_sys->mutex));
 
 	ut_ad(dict_foreign_set_validate(table->foreign_set));
 	ut_ad(dict_foreign_set_validate(table->referenced_set));
@@ -3418,8 +3672,7 @@ dict_foreign_find(
 Tries to find an index whose first fields are the columns in the array,
 in the same order and is not marked for deletion and is not the same
 as types_idx.
-@return	matching index, NULL if not found */
-UNIV_INTERN
+@return matching index, NULL if not found */
 dict_index_t*
 dict_foreign_find_index(
 /*====================*/
@@ -3446,7 +3699,7 @@ dict_foreign_find_index(
 					/*!< out: column number where
 					error happened */
 	dict_index_t**		err_index)
-			                /*!< out: index where error
+					/*!< out: index where error
 					happened */
 {
 	dict_index_t*	index;
@@ -3462,6 +3715,8 @@ dict_foreign_find_index(
 	while (index != NULL) {
 		if (types_idx != index
 		    && !(index->type & DICT_FTS)
+		    && !dict_index_is_spatial(index)
+		    && !dict_index_has_virtual(index)
 		    && !index->to_be_dropped
 		    && dict_foreign_qualify_index(
 			    table, col_names, columns, n_cols,
@@ -3537,12 +3792,9 @@ dict_foreign_error_report(
 	fputs(fk_str.c_str(), file);
 	putc('\n', file);
 	if (fk->foreign_index) {
-		fputs("The index in the foreign key in table is ", file);
-		ut_print_name(file, NULL, FALSE, fk->foreign_index->name);
-		fputs("\n"
-		      "See " REFMAN "innodb-foreign-key-constraints.html\n"
-		      "for correct foreign key definition.\n",
-		      file);
+		fprintf(file, "The index in the foreign key in table is"
+			" %s\n%s\n", fk->foreign_index->name(),
+			FOREIGN_KEY_CONSTRAINTS_MSG);
 	}
 	mutex_exit(&dict_foreign_err_mutex);
 }
@@ -3552,8 +3804,7 @@ Adds a foreign key constraint object to the dictionary cache. May free
 the object if there already is an object with the same identifier in.
 At least one of the foreign table and the referenced table must already
 be in the dictionary cache!
-@return	DB_SUCCESS or error code */
-UNIV_INTERN
+@return DB_SUCCESS or error code */
 dberr_t
 dict_foreign_add_to_cache(
 /*======================*/
@@ -3575,7 +3826,10 @@ dict_foreign_add_to_cache(
 	ibool		added_to_referenced_list= FALSE;
 	FILE*		ef			= dict_foreign_err_file;
 
-	ut_ad(mutex_own(&(dict_sys->mutex)));
+	DBUG_ENTER("dict_foreign_add_to_cache");
+	DBUG_PRINT("dict_foreign_add_to_cache", ("id: %s", foreign->id));
+
+	ut_ad(mutex_own(&dict_sys->mutex));
 
 	for_table = dict_table_check_if_in_cache_low(
 		foreign->foreign_table_name_lookup);
@@ -3594,11 +3848,13 @@ dict_foreign_add_to_cache(
 
 	if (for_in_cache) {
 		/* Free the foreign object */
-		mem_heap_free(foreign->heap);
+		dict_foreign_free(foreign);
 	} else {
 		for_in_cache = foreign;
+
 	}
 
+
 	if (ref_table && !for_in_cache->referenced_table) {
 		ulint index_error;
 		ulint err_col;
@@ -3621,11 +3877,12 @@ dict_foreign_add_to_cache(
 				"referenced table do not match"
 				" the ones in table.");
 
-			if (for_in_cache == foreign) {
-				mem_heap_free(foreign->heap);
-			}
+                       if (for_in_cache == foreign) {
+                                mem_heap_free(foreign->heap);
+                        }
 
-			return(DB_CANNOT_ADD_CONSTRAINT);
+
+			DBUG_RETURN(DB_CANNOT_ADD_CONSTRAINT);
 		}
 
 		for_in_cache->referenced_table = ref_table;
@@ -3669,23 +3926,23 @@ dict_foreign_add_to_cache(
 
 			if (for_in_cache == foreign) {
 				if (added_to_referenced_list) {
-					const dict_foreign_set::size_type	n
-						= ref_table->referenced_set
+					const dict_foreign_set::size_type
+						n = ref_table->referenced_set
 						  .erase(for_in_cache);
 
 					ut_a(n == 1);	/* the number of
 							elements removed must
 							be one */
 				}
-
 				mem_heap_free(foreign->heap);
 			}
 
-			return(DB_CANNOT_ADD_CONSTRAINT);
+			DBUG_RETURN(DB_CANNOT_ADD_CONSTRAINT);
 		}
 
 		for_in_cache->foreign_table = for_table;
 		for_in_cache->foreign_index = index;
+
 		std::pair<dict_foreign_set::iterator, bool>	ret
 			= for_table->foreign_set.insert(for_in_cache);
 
@@ -3696,24 +3953,23 @@ dict_foreign_add_to_cache(
 	/* We need to move the table to the non-LRU end of the table LRU
 	list. Otherwise it will be evicted from the cache. */
 
-	if (ref_table != NULL && ref_table->can_be_evicted) {
-		dict_table_move_from_lru_to_non_lru(ref_table);
+	if (ref_table != NULL) {
+		dict_table_prevent_eviction(ref_table);
 	}
 
-	if (for_table != NULL && for_table->can_be_evicted) {
-		dict_table_move_from_lru_to_non_lru(for_table);
+	if (for_table != NULL) {
+		dict_table_prevent_eviction(for_table);
 	}
 
 	ut_ad(dict_lru_validate());
-
-	return(DB_SUCCESS);
+	DBUG_RETURN(DB_SUCCESS);
 }
 
 /*********************************************************************//**
 Scans from pointer onwards. Stops if is at the start of a copy of
 'string' where characters are compared without case sensitivity, and
 only outside `` or "" quotes. Stops also at NUL.
-@return	scanned up to this */
+@return scanned up to this */
 static
 const char*
 dict_scan_to(
@@ -3773,7 +4029,7 @@ static
 const char*
 dict_accept(
 /*========*/
-	struct charset_info_st*	cs,/*!< in: the character set of ptr */
+	CHARSET_INFO*	cs,	/*!< in: the character set of ptr */
 	const char*	ptr,	/*!< in: scan from this */
 	const char*	string,	/*!< in: accept only this string as the next
 				non-whitespace string */
@@ -3804,12 +4060,12 @@ dict_accept(
 /*********************************************************************//**
 Scans an id. For the lexical definition of an 'id', see the code below.
 Strips backquotes or double quotes from around the id.
-@return	scanned to */
+@return scanned to */
 static
 const char*
 dict_scan_id(
 /*=========*/
-	struct charset_info_st*	cs,/*!< in: the character set of ptr */
+	CHARSET_INFO*	cs,	/*!< in: the character set of ptr */
 	const char*	ptr,	/*!< in: scanned to */
 	mem_heap_t*	heap,	/*!< in: heap where to allocate the id
 				(NULL=id will not be allocated, but it
@@ -3872,7 +4128,7 @@ dict_scan_id(
 		len = ptr - s;
 	}
 
-	if (UNIV_UNLIKELY(!heap)) {
+	if (heap == NULL) {
 		/* no heap given: id will point to source string */
 		*id = s;
 		return(ptr);
@@ -3926,12 +4182,12 @@ convert_id:
 
 /*********************************************************************//**
 Tries to scan a column name.
-@return	scanned to */
+@return scanned to */
 static
 const char*
 dict_scan_col(
 /*==========*/
-	struct charset_info_st*	cs,	/*!< in: the character set of ptr */
+	CHARSET_INFO*		cs,	/*!< in: the character set of ptr */
 	const char*		ptr,	/*!< in: scanned to */
 	ibool*			success,/*!< out: TRUE if success */
 	dict_table_t*		table,	/*!< in: table in which the column is */
@@ -3981,7 +4237,6 @@ Open a table from its database and table name, this is currently used by
 foreign constraint parser to get the referenced table.
 @return complete table name with database and table name, allocated from
 heap memory passed in */
-UNIV_INTERN
 char*
 dict_get_referenced_table(
 /*======================*/
@@ -4024,13 +4279,13 @@ dict_get_referenced_table(
 		memcpy(ref + database_name_len + 1, table_name, table_name_len + 1);
 
 	} else {
-#ifndef __WIN__
+#ifndef _WIN32
 		if (innobase_get_lower_case_table_names() == 1) {
 			innobase_casedn_str(ref);
 		}
 #else
 		innobase_casedn_str(ref);
-#endif /* !__WIN__ */
+#endif /* !_WIN32 */
 		*table = dict_table_get_low(ref);
 	}
 
@@ -4038,12 +4293,12 @@ dict_get_referenced_table(
 }
 /*********************************************************************//**
 Scans a table name from an SQL string.
-@return	scanned to */
+@return scanned to */
 static
 const char*
 dict_scan_table_name(
 /*=================*/
-	struct charset_info_st*	cs,/*!< in: the character set of ptr */
+	CHARSET_INFO*	cs,	/*!< in: the character set of ptr */
 	const char*	ptr,	/*!< in: scanned to */
 	dict_table_t**	table,	/*!< out: table object or NULL */
 	const char*	name,	/*!< in: foreign key table name */
@@ -4113,12 +4368,12 @@ dict_scan_table_name(
 
 /*********************************************************************//**
 Skips one id. The id is allowed to contain also '.'.
-@return	scanned to */
+@return scanned to */
 static
 const char*
 dict_skip_word(
 /*===========*/
-	struct charset_info_st*	cs,/*!< in: the character set of ptr */
+	CHARSET_INFO*	cs,	/*!< in: the character set of ptr */
 	const char*	ptr,	/*!< in: scanned to */
 	ibool*		success)/*!< out: TRUE if success, FALSE if just spaces
 				left in string or a syntax error */
@@ -4143,7 +4398,7 @@ Removes MySQL comments from an SQL string. A comment is either
 (c) '[slash][asterisk]' till the next '[asterisk][slash]' (like the familiar
 C comment syntax).
 @return own: SQL string stripped from comments; the caller must free
-this with mem_free()! */
+this with ut_free()! */
 static
 char*
 dict_strip_comments(
@@ -4163,7 +4418,7 @@ dict_strip_comments(
 
 	DBUG_PRINT("dict_strip_comments", ("%s", sql_string));
 
-	str = static_cast<char*>(mem_alloc(sql_length + 1));
+	str = static_cast<char*>(ut_malloc_nokey(sql_length + 1));
 
 	sptr = sql_string;
 	ptr = str;
@@ -4252,8 +4507,7 @@ end_of_string:
 Finds the highest [number] for foreign key constraints of the table. Looks
 only at the >= 4.0.18-format id's, which are of the form
 databasename/tablename_ibfk_[number].
-@return	highest number, 0 if table has no new format foreign key constraints */
-UNIV_INTERN
+@return highest number, 0 if table has no new format foreign key constraints */
 ulint
 dict_table_get_highest_foreign_id(
 /*==============================*/
@@ -4265,9 +4519,11 @@ dict_table_get_highest_foreign_id(
 	ulint		id;
 	ulint		len;
 
+	DBUG_ENTER("dict_table_get_highest_foreign_id");
+
 	ut_a(table);
 
-	len = ut_strlen(table->name);
+	len = ut_strlen(table->name.m_name);
 
 	for (dict_foreign_set::iterator it = table->foreign_set.begin();
 	     it != table->foreign_set.end();
@@ -4284,7 +4540,7 @@ dict_table_get_highest_foreign_id(
 				MAX_TABLE_NAME_LEN);
 
 		if (ut_strlen(fkid) > ((sizeof dict_ibfk) - 1) + len
-		    && 0 == ut_memcmp(fkid, table->name, len)
+		    && 0 == ut_memcmp(fkid, table->name.m_name, len)
 		    && 0 == ut_memcmp(fkid + len,
 				      dict_ibfk, (sizeof dict_ibfk) - 1)
 		    && fkid[len + ((sizeof dict_ibfk) - 1)] != '0') {
@@ -4303,7 +4559,10 @@ dict_table_get_highest_foreign_id(
 		}
 	}
 
-	return(biggest_id);
+	DBUG_PRINT("dict_table_get_highest_foreign_id",
+		   ("id: %lu", biggest_id));
+
+	DBUG_RETURN(biggest_id);
 }
 
 /*********************************************************************//**
@@ -4422,32 +4681,21 @@ dict_foreign_push_index_error(
 }
 
 /*********************************************************************//**
-Scans a table create SQL string and adds to the data dictionary the foreign
-key constraints declared in the string. This function should be called after
-the indexes for a table have been created. Each foreign key constraint must
-be accompanied with indexes in both participating tables. The indexes are
-allowed to contain more fields than mentioned in the constraint.
-@return	error code or DB_SUCCESS */
+Scans a table create SQL string and adds to the data dictionary the foreign key
+constraints declared in the string. This function should be called after the
+indexes for a table have been created. Each foreign key constraint must be
+accompanied with indexes in bot participating tables. The indexes are allowed
+to contain more fields than mentioned in the constraint.
+@return error code or DB_SUCCESS */
 static
 dberr_t
 dict_create_foreign_constraints_low(
-/*================================*/
-	trx_t*		trx,	/*!< in: transaction */
-	mem_heap_t*	heap,	/*!< in: memory heap */
-	struct charset_info_st*	cs,/*!< in: the character set of sql_string */
-	const char*	sql_string,
-				/*!< in: CREATE TABLE or ALTER TABLE statement
-				where foreign keys are declared like:
-				FOREIGN KEY (a, b) REFERENCES table2(c, d),
-				table2 can be written also with the database
-				name before it: test.table2; the default
-				database is the database of parameter name */
-	const char*	name,	/*!< in: table full name in the normalized form
-				database_name/table_name */
-	ibool		reject_fks)
-				/*!< in: if TRUE, fail with error code
-				DB_CANNOT_ADD_CONSTRAINT if any foreign
-				keys are found. */
+	trx_t*			trx,
+	mem_heap_t*		heap,
+	CHARSET_INFO*		cs,
+	const char*		sql_string,
+	const char*		name,
+	ibool			reject_fks)
 {
 	dict_table_t*	table			= NULL;
 	dict_table_t*	referenced_table	= NULL;
@@ -4486,7 +4734,7 @@ dict_create_foreign_constraints_low(
 	char	operation[8];
 
 	ut_ad(!srv_read_only_mode);
-	ut_ad(mutex_own(&(dict_sys->mutex)));
+	ut_ad(mutex_own(&dict_sys->mutex));
 
 	table = dict_table_get_low(name);
 	/* First check if we are actually doing an ALTER TABLE, and in that
@@ -4511,14 +4759,14 @@ dict_create_foreign_constraints_low(
 			char *bufend;
 			bufend = innobase_convert_name((char *)create_name, MAX_TABLE_NAME_LEN,
 					create_table_name, strlen(create_table_name),
-					trx->mysql_thd, TRUE);
+					trx->mysql_thd);
 			create_name[bufend-create_name]='\0';
 			ptr = orig;
 		} else {
 			char *bufend;
 			ptr = orig;
 			bufend = innobase_convert_name((char *)create_name, MAX_TABLE_NAME_LEN,
-					name, strlen(name), trx->mysql_thd, TRUE);
+					name, strlen(name), trx->mysql_thd);
 			create_name[bufend-create_name]='\0';
 		}
 
@@ -4566,26 +4814,21 @@ dict_create_foreign_constraints_low(
 	if (table_to_alter) {
 		char *bufend;
 		bufend = innobase_convert_name((char *)create_name, MAX_TABLE_NAME_LEN,
-				table_to_alter->name, strlen(table_to_alter->name),
-				trx->mysql_thd, TRUE);
+				table_to_alter->name.m_name, strlen(table_to_alter->name.m_name),
+				trx->mysql_thd);
 		create_name[bufend-create_name]='\0';
 	} else {
 		char *bufend;
 		bufend = innobase_convert_name((char *)create_name, MAX_TABLE_NAME_LEN,
 				referenced_table_name, strlen(referenced_table_name),
-				trx->mysql_thd, TRUE);
+				trx->mysql_thd);
 		create_name[bufend-create_name]='\0';
 
 	}
 
 	if (!success) {
-		mutex_enter(&dict_foreign_err_mutex);
-		dict_foreign_error_report_low(ef, create_name);
-		fprintf(ef,
-			"%s table %s with foreign key constraint"
-			" failed. Table %s not found from data dictionary."
-			" Error close to %s.\n",
-			operation, create_name, create_name, orig);
+		ib::error() << "Could not find the table " << create_name << " being" << operation << " near to "
+			<< orig;
 		mutex_exit(&dict_foreign_err_mutex);
 
 		ib_push_warning(trx, DB_ERROR,
@@ -4677,6 +4920,10 @@ loop:
 			return(DB_CANNOT_ADD_CONSTRAINT);
 		}
 
+		if (dict_foreigns_has_s_base_col(local_fk_set, table)) {
+			return(DB_NO_FK_ON_S_BASE_COL);
+		}
+
 		/**********************************************************/
 		/* The following call adds the foreign key constraints
 		to the data dictionary system tables on disk */
@@ -4692,6 +4939,8 @@ loop:
 				      local_fk_set.end(),
 				      dict_foreign_add_to_referenced_table());
 			local_fk_set.clear();
+
+			dict_mem_table_fill_foreign_vcol_set(table);
 		}
 		return(error);
 	}
@@ -4717,53 +4966,52 @@ loop:
 	}
 
 	if (my_isspace(cs, *ptr)) {
-                ptr1 = dict_accept(cs, ptr, "IF", &success);
+		ptr1 = dict_accept(cs, ptr, "IF", &success);
 
-                if (success) {
-                        if (!my_isspace(cs, *ptr1)) {
-                                goto loop;
-                        }
-                        ptr1 = dict_accept(cs, ptr1, "NOT", &success);
-                        if (!success) {
-                                goto loop;
-                        }
-                        ptr1 = dict_accept(cs, ptr1, "EXISTS", &success);
-                        if (!success) {
-                                goto loop;
-                        }
-                        ptr = ptr1;
-                }
+		if (success) {
+			if (!my_isspace(cs, *ptr1)) {
+				goto loop;
+			}
+			ptr1 = dict_accept(cs, ptr1, "NOT", &success);
+			if (!success) {
+				goto loop;
+			}
+			ptr1 = dict_accept(cs, ptr1, "EXISTS", &success);
+			if (!success) {
+				goto loop;
+			}
+			ptr = ptr1;
+		}
 	}
 
 	orig = ptr;
 	ptr = dict_accept(cs, ptr, "(", &success);
 
 	if (!success) {
-                if (constraint_name) {
-		  /* MySQL allows also an index id before the '('; we
-		  skip it */
-		  ptr = dict_skip_word(cs, ptr, &success);
-		  if (!success) {
-			  dict_foreign_report_syntax_err(
-				"%s table %s with foreign key constraint"
-				" failed. Parse error in '%s'"
-				" near '%s'.\n",
-				operation, create_name, start_of_latest_foreign, orig);
+		if (constraint_name) {
+			/* MySQL allows also an index id before the '('; we
+			skip it */
+			ptr = dict_skip_word(cs, ptr, &success);
+			if (!success) {
+				dict_foreign_report_syntax_err(
+					"%s table %s with foreign key constraint"
+					" failed. Parse error in '%s'"
+					" near '%s'.\n",
+					operation, create_name, start_of_latest_foreign, orig);
 
-			  ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT,
-				"%s table %s with foreign key constraint"
-				" failed. Parse error in '%s'"
-				" near '%s'.",
-				operation, create_name, start_of_latest_foreign, orig);
-			  return(DB_CANNOT_ADD_CONSTRAINT);
-                  }
-		}
-                else {
-		  while (my_isspace(cs, *ptr)) {
-			  ptr++;
-		  }
+				ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT,
+					"%s table %s with foreign key constraint"
+					" failed. Parse error in '%s'"
+					" near '%s'.",
+					operation, create_name, start_of_latest_foreign, orig);
+				return(DB_CANNOT_ADD_CONSTRAINT);
+			}
+		} else {
+			while (my_isspace(cs, *ptr)) {
+				ptr++;
+			}
 
-		  ptr = dict_scan_id(cs, ptr, heap,
+			ptr = dict_scan_id(cs, ptr, heap,
 				     &constraint_name, FALSE, FALSE);
 		}
 
@@ -4846,12 +5094,11 @@ col_loop1:
 		mutex_enter(&dict_foreign_err_mutex);
 		dict_foreign_error_report_low(ef, create_name);
 		fputs("There is no index in table ", ef);
-		ut_print_name(ef, NULL, TRUE, create_name);
+		ut_print_name(ef, NULL, create_name);
 		fprintf(ef, " where the columns appear\n"
-			"as the first columns. Constraint:\n%s\n"
-			"See " REFMAN "innodb-foreign-key-constraints.html\n"
-			"for correct foreign key definition.\n",
-			start_of_latest_foreign);
+			"as the first columns. Constraint:\n%s\n%s",
+			start_of_latest_foreign,
+			FOREIGN_KEY_CONSTRAINTS_MSG);
 		dict_foreign_push_index_error(trx, operation, create_name, start_of_latest_foreign,
 			column_names, index_error, err_col, err_index, table, ef);
 
@@ -4877,6 +5124,40 @@ col_loop1:
 		return(DB_CANNOT_ADD_CONSTRAINT);
 	}
 
+	/* Don't allow foreign keys on partitioned tables yet. */
+	ptr1 = dict_scan_to(ptr, "PARTITION");
+	if (ptr1) {
+		ptr1 = dict_accept(cs, ptr1, "PARTITION", &success);
+		if (success && my_isspace(cs, *ptr1)) {
+			ptr2 = dict_accept(cs, ptr1, "BY", &success);
+			if (success) {
+				my_error(ER_FOREIGN_KEY_ON_PARTITIONED,MYF(0));
+				return(DB_CANNOT_ADD_CONSTRAINT);
+			}
+		}
+	}
+	if (dict_table_is_partition(table)) {
+		my_error(ER_FOREIGN_KEY_ON_PARTITIONED,MYF(0));
+		return(DB_CANNOT_ADD_CONSTRAINT);
+	}
+
+	/* Don't allow foreign keys on partitioned tables yet. */
+	ptr1 = dict_scan_to(ptr, "PARTITION");
+	if (ptr1) {
+		ptr1 = dict_accept(cs, ptr1, "PARTITION", &success);
+		if (success && my_isspace(cs, *ptr1)) {
+			ptr2 = dict_accept(cs, ptr1, "BY", &success);
+			if (success) {
+				my_error(ER_FOREIGN_KEY_ON_PARTITIONED,MYF(0));
+				return(DB_CANNOT_ADD_CONSTRAINT);
+			}
+		}
+	}
+	if (dict_table_is_partition(table)) {
+		my_error(ER_FOREIGN_KEY_ON_PARTITIONED,MYF(0));
+		return(DB_CANNOT_ADD_CONSTRAINT);
+	}
+
 	/* Let us create a constraint struct */
 
 	foreign = dict_mem_foreign_create();
@@ -4889,19 +5170,19 @@ col_loop1:
 		same MySQL 'database' as the table itself. We store the name
 		to foreign->id. */
 
-		db_len = dict_get_db_name_len(table->name);
+		db_len = dict_get_db_name_len(table->name.m_name);
 
 		foreign->id = static_cast<char*>(mem_heap_alloc(
 			foreign->heap, db_len + strlen(constraint_name) + 2));
 
-		ut_memcpy(foreign->id, table->name, db_len);
+		ut_memcpy(foreign->id, table->name.m_name, db_len);
 		foreign->id[db_len] = '/';
 		strcpy(foreign->id + db_len + 1, constraint_name);
 	}
 
 	if (foreign->id == NULL) {
-		error = dict_create_add_foreign_id(&number,
-						   table->name, foreign);
+		error = dict_create_add_foreign_id(
+			&number, table->name.m_name, foreign);
 		if (error != DB_SUCCESS) {
 			dict_foreign_free(foreign);
 			return(error);
@@ -4919,7 +5200,7 @@ col_loop1:
 
 	foreign->foreign_table = table;
 	foreign->foreign_table_name = mem_heap_strdup(
-		foreign->heap, table->name);
+		foreign->heap, table->name.m_name);
 	dict_mem_foreign_table_name_lookup_set(foreign, TRUE);
 
 	foreign->foreign_index = index;
@@ -4947,7 +5228,7 @@ col_loop1:
 
 		bufend = innobase_convert_name(buf, MAX_TABLE_NAME_LEN,
 				referenced_table_name, strlen(referenced_table_name),
-				trx->mysql_thd, TRUE);
+				trx->mysql_thd);
 		buf[bufend - buf] = '\0';
 
 		ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT,
@@ -4966,7 +5247,14 @@ col_loop1:
 		return(DB_CANNOT_ADD_CONSTRAINT);
 	}
 
-	orig = ptr;
+	/* Don't allow foreign keys on partitioned tables yet. */
+	if (referenced_table && dict_table_is_partition(referenced_table)) {
+		/* How could one make a referenced table to be a partition? */
+		ut_ad(0);
+		my_error(ER_FOREIGN_KEY_ON_PARTITIONED,MYF(0));
+		return(DB_CANNOT_ADD_CONSTRAINT);
+	}
+
 	ptr = dict_accept(cs, ptr, "(", &success);
 
 	if (!success) {
@@ -5258,11 +5546,9 @@ try_find_index:
 				"tables created with >= InnoDB-4.1.12,"
 				" and such columns in old tables\n"
 				"cannot be referenced by such columns"
-				" in new tables.\n"
-				"See " REFMAN
-				"innodb-foreign-key-constraints.html\n"
-				"for correct foreign key definition.\n",
-				start_of_latest_foreign);
+				" in new tables.\n%s\n",
+				start_of_latest_foreign,
+				FOREIGN_KEY_CONSTRAINTS_MSG);
 
 			dict_foreign_push_index_error(trx, operation, create_name, start_of_latest_foreign,
 				column_names, index_error, err_col, err_index, referenced_table, ef);
@@ -5296,8 +5582,7 @@ try_find_index:
 
 /**************************************************************************
 Determines whether a string starts with the specified keyword.
-@return	TRUE if str starts with keyword */
-UNIV_INTERN
+@return TRUE if str starts with keyword */
 ibool
 dict_str_starts_with_keyword(
 /*=========================*/
@@ -5305,40 +5590,40 @@ dict_str_starts_with_keyword(
 	const char*	str,		/*!< in: string to scan for keyword */
 	const char*	keyword)	/*!< in: keyword to look for */
 {
-	struct charset_info_st*	cs = innobase_get_charset(thd);
-	ibool			success;
+	CHARSET_INFO*	cs = innobase_get_charset(thd);
+	ibool		success;
 
 	dict_accept(cs, str, keyword, &success);
 	return(success);
 }
 
-/*********************************************************************//**
-Scans a table create SQL string and adds to the data dictionary the foreign
-key constraints declared in the string. This function should be called after
-the indexes for a table have been created. Each foreign key constraint must
-be accompanied with indexes in both participating tables. The indexes are
-allowed to contain more fields than mentioned in the constraint.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+/** Scans a table create SQL string and adds to the data dictionary
+the foreign key constraints declared in the string. This function
+should be called after the indexes for a table have been created.
+Each foreign key constraint must be accompanied with indexes in
+bot participating tables. The indexes are allowed to contain more
+fields than mentioned in the constraint.
+
+@param[in]	trx		transaction
+@param[in]	sql_string	table create statement where
+				foreign keys are declared like:
+				FOREIGN KEY (a, b) REFERENCES table2(c, d),
+				table2 can be written also with the database
+				name before it: test.table2; the default
+				database id the database of parameter name
+@param[in]	sql_length	length of sql_string
+@param[in]	name		table full name in normalized form
+@param[in]	reject_fks	if TRUE, fail with error code
+				DB_CANNOT_ADD_CONSTRAINT if any
+				foreign keys are found.
+@return error code or DB_SUCCESS */
 dberr_t
 dict_create_foreign_constraints(
-/*============================*/
-	trx_t*		trx,		/*!< in: transaction */
-	const char*	sql_string,	/*!< in: table create statement where
-					foreign keys are declared like:
-					FOREIGN KEY (a, b) REFERENCES
-					table2(c, d), table2 can be written
-					also with the database
-					name before it: test.table2; the
-					default database id the database of
-					parameter name */
-	size_t		sql_length,	/*!< in: length of sql_string */
-	const char*	name,		/*!< in: table full name in the
-					normalized form
-					database_name/table_name */
-	ibool		reject_fks)	/*!< in: if TRUE, fail with error
-					code DB_CANNOT_ADD_CONSTRAINT if
-					any foreign keys are found. */
+	trx_t*			trx,
+	const char*		sql_string,
+	size_t			sql_length,
+	const char*		name,
+	ibool			reject_fks)
 {
 	char*		str;
 	dberr_t		err;
@@ -5351,11 +5636,11 @@ dict_create_foreign_constraints(
 	heap = mem_heap_create(10000);
 
 	err = dict_create_foreign_constraints_low(
-		trx, heap, innobase_get_charset(trx->mysql_thd), str, name,
-		reject_fks);
+		trx, heap, innobase_get_charset(trx->mysql_thd),
+		str, name, reject_fks);
 
 	mem_heap_free(heap);
-	mem_free(str);
+	ut_free(str);
 
 	return(err);
 }
@@ -5364,7 +5649,6 @@ dict_create_foreign_constraints(
 Parses the CONSTRAINT id's to be dropped in an ALTER TABLE statement.
 @return DB_SUCCESS or DB_CANNOT_DROP_CONSTRAINT if syntax error or the
 constraint id does not match */
-UNIV_INTERN
 dberr_t
 dict_foreign_parse_drop_constraints(
 /*================================*/
@@ -5381,9 +5665,9 @@ dict_foreign_parse_drop_constraints(
 	char*			str;
 	size_t			len;
 	const char*		ptr;
-        const char*             ptr1;
+	const char*		ptr1;
 	const char*		id;
-	struct charset_info_st*	cs;
+	CHARSET_INFO*		cs;
 
 	ut_a(trx);
 	ut_a(trx->mysql_thd);
@@ -5395,18 +5679,18 @@ dict_foreign_parse_drop_constraints(
 	*constraints_to_drop = static_cast<const char**>(
 		mem_heap_alloc(heap, 1000 * sizeof(char*)));
 
-	ptr = innobase_get_stmt(trx->mysql_thd, &len);
+	ptr = innobase_get_stmt_unsafe(trx->mysql_thd, &len);
 
 	str = dict_strip_comments(ptr, len);
 
 	ptr = str;
 
-	ut_ad(mutex_own(&(dict_sys->mutex)));
+	ut_ad(mutex_own(&dict_sys->mutex));
 loop:
 	ptr = dict_scan_to(ptr, "DROP");
 
 	if (*ptr == '\0') {
-		mem_free(str);
+		ut_free(str);
 
 		return(DB_SUCCESS);
 	}
@@ -5435,11 +5719,10 @@ loop:
 	ptr1 = dict_accept(cs, ptr, "IF", &success);
 
 	if (success && my_isspace(cs, *ptr1)) {
-	        ptr1 = dict_accept(cs, ptr1, "EXISTS", &success);
-	        if (success) {
-
-                        ptr = ptr1;
-	        }
+		ptr1 = dict_accept(cs, ptr1, "EXISTS", &success);
+		if (success) {
+			ptr = ptr1;
+		}
 	}
 
 	ptr = dict_scan_id(cs, ptr, heap, &id, FALSE, TRUE);
@@ -5464,19 +5747,16 @@ loop:
 			mutex_enter(&dict_foreign_err_mutex);
 			rewind(ef);
 			ut_print_timestamp(ef);
-			fputs(" Error in dropping of a foreign key "
-			      "constraint of table ", ef);
-			ut_print_name(ef, NULL, TRUE, table->name);
-			fputs(",\nin SQL command\n", ef);
-			fputs(str, ef);
-			fputs("\nCannot find a constraint with the "
-			      "given id ", ef);
-			ut_print_name(ef, NULL, FALSE, id);
-			fputs(".\n", ef);
+			fputs(" Error in dropping of a foreign key"
+			      " constraint of table ", ef);
+			ut_print_name(ef, NULL, table->name.m_name);
+			fprintf(ef, ",\nin SQL command\n%s"
+				"\nCannot find a constraint with the"
+				" given id %s.\n", str, id);
 			mutex_exit(&dict_foreign_err_mutex);
 		}
 
-		mem_free(str);
+		ut_free(str);
 
 		return(DB_CANNOT_DROP_CONSTRAINT);
 	}
@@ -5492,13 +5772,13 @@ syntax_error:
 		ut_print_timestamp(ef);
 		fputs(" Syntax error in dropping of a"
 		      " foreign key constraint of table ", ef);
-		ut_print_name(ef, NULL, TRUE, table->name);
+		ut_print_name(ef, NULL, table->name.m_name);
 		fprintf(ef, ",\n"
 			"close to:\n%s\n in SQL command\n%s\n", ptr, str);
 		mutex_exit(&dict_foreign_err_mutex);
 	}
 
-	mem_free(str);
+	ut_free(str);
 
 	return(DB_CANNOT_DROP_CONSTRAINT);
 }
@@ -5508,14 +5788,13 @@ syntax_error:
 /**********************************************************************//**
 Returns an index object if it is found in the dictionary cache.
 Assumes that dict_sys->mutex is already being held.
-@return	index, NULL if not found */
-UNIV_INTERN
+@return index, NULL if not found */
 dict_index_t*
 dict_index_get_if_in_cache_low(
 /*===========================*/
 	index_id_t	index_id)	/*!< in: index id */
 {
-	ut_ad(mutex_own(&(dict_sys->mutex)));
+	ut_ad(mutex_own(&dict_sys->mutex));
 
 	return(dict_index_find_on_id_low(index_id));
 }
@@ -5523,8 +5802,7 @@ dict_index_get_if_in_cache_low(
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 /**********************************************************************//**
 Returns an index object if it is found in the dictionary cache.
-@return	index, NULL if not found */
-UNIV_INTERN
+@return index, NULL if not found */
 dict_index_t*
 dict_index_get_if_in_cache(
 /*=======================*/
@@ -5536,11 +5814,11 @@ dict_index_get_if_in_cache(
 		return(NULL);
 	}
 
-	mutex_enter(&(dict_sys->mutex));
+	mutex_enter(&dict_sys->mutex);
 
 	index = dict_index_get_if_in_cache_low(index_id);
 
-	mutex_exit(&(dict_sys->mutex));
+	mutex_exit(&dict_sys->mutex);
 
 	return(index);
 }
@@ -5550,8 +5828,7 @@ dict_index_get_if_in_cache(
 /**********************************************************************//**
 Checks that a tuple has n_fields_cmp value in a sensible range, so that
 no comparison can occur with the page number field in a node pointer.
-@return	TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
 ibool
 dict_index_check_search_tuple(
 /*==========================*/
@@ -5567,8 +5844,7 @@ dict_index_check_search_tuple(
 
 /**********************************************************************//**
 Builds a node pointer out of a physical record and a page number.
-@return	own: node pointer */
-UNIV_INTERN
+@return own: node pointer */
 dtuple_t*
 dict_index_build_node_ptr(
 /*======================*/
@@ -5587,7 +5863,7 @@ dict_index_build_node_ptr(
 	byte*		buf;
 	ulint		n_unique;
 
-	if (dict_index_is_univ(index)) {
+	if (dict_index_is_ibuf(index)) {
 		/* In a universal index tree, we take the whole record as
 		the node pointer if the record is on the leaf level,
 		on non-leaf levels we remove the last field, which
@@ -5601,7 +5877,7 @@ dict_index_build_node_ptr(
 			n_unique--;
 		}
 	} else {
-		n_unique = dict_index_get_n_unique_in_tree(index);
+		n_unique = dict_index_get_n_unique_in_tree_nonleaf(index);
 	}
 
 	tuple = dtuple_create(heap, n_unique + 1);
@@ -5637,8 +5913,7 @@ dict_index_build_node_ptr(
 /**********************************************************************//**
 Copies an initial segment of a physical record, long enough to specify an
 index entry uniquely.
-@return	pointer to the prefix record */
-UNIV_INTERN
+@return pointer to the prefix record */
 rec_t*
 dict_index_copy_rec_order_prefix(
 /*=============================*/
@@ -5654,11 +5929,21 @@ dict_index_copy_rec_order_prefix(
 
 	UNIV_PREFETCH_R(rec);
 
-	if (dict_index_is_univ(index)) {
+	if (dict_index_is_ibuf(index)) {
 		ut_a(!dict_table_is_comp(index->table));
 		n = rec_get_n_fields_old(rec);
 	} else {
-		n = dict_index_get_n_unique_in_tree(index);
+		if (page_is_leaf(page_align(rec))) {
+			n = dict_index_get_n_unique_in_tree(index);
+		} else {
+			n = dict_index_get_n_unique_in_tree_nonleaf(index);
+			/* For internal node of R-tree, since we need to
+			compare the page no field, so, we need to copy this
+			field as well. */
+			if (dict_index_is_spatial(index)) {
+				n++;
+			}
+		}
 	}
 
 	*n_fields = n;
@@ -5667,8 +5952,7 @@ dict_index_copy_rec_order_prefix(
 
 /**********************************************************************//**
 Builds a typed data tuple out of a physical record.
-@return	own: data tuple */
-UNIV_INTERN
+@return own: data tuple */
 dtuple_t*
 dict_index_build_data_tuple(
 /*========================*/
@@ -5695,7 +5979,6 @@ dict_index_build_data_tuple(
 
 /*********************************************************************//**
 Calculates the minimum record length in an index. */
-UNIV_INTERN
 ulint
 dict_index_calc_min_rec_len(
 /*========================*/
@@ -5744,185 +6027,9 @@ dict_index_calc_min_rec_len(
 	return(sum);
 }
 
-/**********************************************************************//**
-Prints info of a foreign key constraint. */
-static
-void
-dict_foreign_print_low(
-/*===================*/
-	dict_foreign_t*	foreign)	/*!< in: foreign key constraint */
-{
-	ulint	i;
-
-	ut_ad(mutex_own(&(dict_sys->mutex)));
-
-	fprintf(stderr, "  FOREIGN KEY CONSTRAINT %s: %s (",
-		foreign->id, foreign->foreign_table_name);
-
-	for (i = 0; i < foreign->n_fields; i++) {
-		fprintf(stderr, " %s", foreign->foreign_col_names[i]);
-	}
-
-	fprintf(stderr, " )\n"
-		"             REFERENCES %s (",
-		foreign->referenced_table_name);
-
-	for (i = 0; i < foreign->n_fields; i++) {
-		fprintf(stderr, " %s", foreign->referenced_col_names[i]);
-	}
-
-	fputs(" )\n", stderr);
-}
-
-/**********************************************************************//**
-Prints a table data. */
-UNIV_INTERN
-void
-dict_table_print(
-/*=============*/
-	dict_table_t*	table)	/*!< in: table */
-{
-	dict_index_t*	index;
-	ulint		i;
-
-	ut_ad(mutex_own(&(dict_sys->mutex)));
-
-	dict_table_stats_lock(table, RW_X_LATCH);
-
-	if (!table->stat_initialized) {
-		dict_stats_update_transient(table);
-	}
-
-	fprintf(stderr,
-		"--------------------------------------\n"
-		"TABLE: name %s, id %llu, flags %lx, columns %lu,"
-		" indexes %lu, appr.rows " UINT64PF "\n"
-		"  COLUMNS: ",
-		table->name,
-		(ullint) table->id,
-		(ulong) table->flags,
-		(ulong) table->n_cols,
-		(ulong) UT_LIST_GET_LEN(table->indexes),
-		table->stat_n_rows);
-
-	for (i = 0; i < (ulint) table->n_cols; i++) {
-		dict_col_print_low(table, dict_table_get_nth_col(table, i));
-		fputs("; ", stderr);
-	}
-
-	putc('\n', stderr);
-
-	index = UT_LIST_GET_FIRST(table->indexes);
-
-	while (index != NULL) {
-		dict_index_print_low(index);
-		index = UT_LIST_GET_NEXT(indexes, index);
-	}
-
-	dict_table_stats_unlock(table, RW_X_LATCH);
-
-	std::for_each(table->foreign_set.begin(),
-		      table->foreign_set.end(),
-		      dict_foreign_print_low);
-
-	std::for_each(table->referenced_set.begin(),
-		      table->referenced_set.end(),
-		      dict_foreign_print_low);
-}
-
-/**********************************************************************//**
-Prints a column data. */
-static
-void
-dict_col_print_low(
-/*===============*/
-	const dict_table_t*	table,	/*!< in: table */
-	const dict_col_t*	col)	/*!< in: column */
-{
-	dtype_t	type;
-
-	ut_ad(mutex_own(&(dict_sys->mutex)));
-
-	dict_col_copy_type(col, &type);
-	fprintf(stderr, "%s: ", dict_table_get_col_name(table,
-							dict_col_get_no(col)));
-
-	dtype_print(&type);
-}
-
-/**********************************************************************//**
-Prints an index data. */
-static
-void
-dict_index_print_low(
-/*=================*/
-	dict_index_t*	index)	/*!< in: index */
-{
-	ib_int64_t	n_vals;
-	ulint		i;
-
-	ut_a(index->table->stat_initialized);
-
-	ut_ad(mutex_own(&(dict_sys->mutex)));
-
-	if (index->n_user_defined_cols > 0) {
-		n_vals = index->stat_n_diff_key_vals[
-			index->n_user_defined_cols - 1];
-	} else {
-		n_vals = index->stat_n_diff_key_vals[0];
-	}
-
-	fprintf(stderr,
-		"  INDEX: name %s, id %llu, fields %lu/%lu,"
-		" uniq %lu, type %lu\n"
-		"   root page %lu, appr.key vals %lu,"
-		" leaf pages %lu, size pages %lu\n"
-		"   FIELDS: ",
-		index->name,
-		(ullint) index->id,
-		(ulong) index->n_user_defined_cols,
-		(ulong) index->n_fields,
-		(ulong) index->n_uniq,
-		(ulong) index->type,
-		(ulong) index->page,
-		(ulong) n_vals,
-		(ulong) index->stat_n_leaf_pages,
-		(ulong) index->stat_index_size);
-
-	for (i = 0; i < index->n_fields; i++) {
-		dict_field_print_low(dict_index_get_nth_field(index, i));
-	}
-
-	putc('\n', stderr);
-
-#ifdef UNIV_BTR_PRINT
-	btr_print_size(index);
-
-	btr_print_index(index, 7);
-#endif /* UNIV_BTR_PRINT */
-}
-
-/**********************************************************************//**
-Prints a field data. */
-static
-void
-dict_field_print_low(
-/*=================*/
-	const dict_field_t*	field)	/*!< in: field */
-{
-	ut_ad(mutex_own(&(dict_sys->mutex)));
-
-	fprintf(stderr, " %s", field->name);
-
-	if (field->prefix_len != 0) {
-		fprintf(stderr, "(%lu)", (ulong) field->prefix_len);
-	}
-}
-
 /**********************************************************************//**
 Outputs info on a foreign key of a table in a format suitable for
 CREATE TABLE. */
-UNIV_INTERN
 std::string
 dict_print_info_on_foreign_key_in_create_format(
 /*============================================*/
@@ -5953,11 +6060,12 @@ dict_print_info_on_foreign_key_in_create_format(
 
 	str.append(" CONSTRAINT ");
 
-	str.append(ut_get_name(trx, FALSE, stripped_id));
+	str.append(innobase_quote_identifier(trx, stripped_id));
 	str.append(" FOREIGN KEY (");
 
 	for (i = 0;;) {
-		str.append(ut_get_name(trx, FALSE, foreign->foreign_col_names[i]));
+		str.append(innobase_quote_identifier(trx, foreign->foreign_col_names[i]));
+
 		if (++i < foreign->n_fields) {
 			str.append(", ");
 		} else {
@@ -5970,18 +6078,18 @@ dict_print_info_on_foreign_key_in_create_format(
 	if (dict_tables_have_same_db(foreign->foreign_table_name_lookup,
 				     foreign->referenced_table_name_lookup)) {
 		/* Do not print the database name of the referenced table */
-		str.append(ut_get_name(trx, TRUE,
+		str.append(ut_get_name(trx,
 			      dict_remove_db_name(
 				      foreign->referenced_table_name)));
 	} else {
-		str.append(ut_get_name(trx, TRUE,
+		str.append(ut_get_name(trx,
 				foreign->referenced_table_name));
 	}
 
 	str.append(" (");
 
 	for (i = 0;;) {
-		str.append(ut_get_name(trx, FALSE,
+		str.append(innobase_quote_identifier(trx,
 				foreign->referenced_col_names[i]));
 
 		if (++i < foreign->n_fields) {
@@ -6022,7 +6130,6 @@ dict_print_info_on_foreign_key_in_create_format(
 
 /**********************************************************************//**
 Outputs info on foreign keys of a table. */
-UNIV_INTERN
 std::string
 dict_print_info_on_foreign_keys(
 /*============================*/
@@ -6036,7 +6143,7 @@ dict_print_info_on_foreign_keys(
 	dict_foreign_t*	foreign;
 	std::string 	str;
 
-	mutex_enter(&(dict_sys->mutex));
+	mutex_enter(&dict_sys->mutex);
 
 	for (dict_foreign_set::iterator it = table->foreign_set.begin();
 	     it != table->foreign_set.end();
@@ -6057,12 +6164,12 @@ dict_print_info_on_foreign_keys(
 					str.append(" ");
 				}
 
-				str.append(ut_get_name(trx, FALSE,
+				str.append(innobase_quote_identifier(trx,
 						foreign->foreign_col_names[i]));
 			}
 
 			str.append(") REFER ");
-			str.append(ut_get_name(trx, TRUE,
+			str.append(ut_get_name(trx,
 					foreign->referenced_table_name));
 			str.append(")");
 
@@ -6070,8 +6177,8 @@ dict_print_info_on_foreign_keys(
 				if (i) {
 					str.append(" ");
 				}
-				str.append(ut_get_name(
-						trx, FALSE,
+				str.append(innobase_quote_identifier(
+						trx,
 						foreign->referenced_col_names[i]));
 			}
 
@@ -6103,37 +6210,20 @@ dict_print_info_on_foreign_keys(
 		}
 	}
 
-	mutex_exit(&(dict_sys->mutex));
-
+	mutex_exit(&dict_sys->mutex);
 	return str;
 }
 
-/********************************************************************//**
-Displays the names of the index and the table. */
-UNIV_INTERN
-void
-dict_index_name_print(
-/*==================*/
-	FILE*			file,	/*!< in: output stream */
-	const trx_t*		trx,	/*!< in: transaction */
-	const dict_index_t*	index)	/*!< in: index to print */
-{
-	fputs("index ", file);
-	ut_print_name(file, trx, FALSE, index->name);
-	fputs(" of table ", file);
-	ut_print_name(file, trx, TRUE, index->table_name);
-}
-
-/**********************************************************************//**
-Find a table in dict_sys->table_LRU list with specified space id
+/** Given a space_id of a file-per-table tablespace, search the
+dict_sys->table_LRU list and return the dict_table_t* pointer for it.
+@param	space_id	Tablespace ID
 @return table if found, NULL if not */
 static
 dict_table_t*
-dict_find_table_by_space(
-/*=====================*/
-	ulint	space_id)		/*!< in: space ID */
+dict_find_single_table_by_space(
+	ulint	space_id)
 {
-	dict_table_t*   table;
+	dict_table_t*	table;
 	ulint		num_item;
 	ulint		count = 0;
 
@@ -6149,11 +6239,14 @@ dict_find_table_by_space(
 
 	/* This function intentionally does not acquire mutex as it is used
 	by error handling code in deep call stack as last means to avoid
-	killing the server, so it worth to risk some consequencies for
+	killing the server, so it worth to risk some consequences for
 	the action. */
 	while (table && count < num_item) {
 		if (table->space == space_id) {
-			return(table);
+			if (dict_table_is_file_per_table(table)) {
+				return(table);
+			}
+			return(NULL);
 		}
 
 		table = UT_LIST_GET_NEXT(table_LRU, table);
@@ -6167,7 +6260,6 @@ dict_find_table_by_space(
 Flags a table with specified space_id corrupted in the data dictionary
 cache
 @return TRUE if successful */
-UNIV_INTERN
 ibool
 dict_set_corrupted_by_space(
 /*========================*/
@@ -6175,7 +6267,7 @@ dict_set_corrupted_by_space(
 {
 	dict_table_t*   table;
 
-	table = dict_find_table_by_space(space_id);
+	table = dict_find_single_table_by_space(space_id);
 
 	if (!table) {
 		return(FALSE);
@@ -6191,7 +6283,6 @@ dict_set_corrupted_by_space(
 /**********************************************************************//**
 Flags an index corrupted both in the data dictionary cache
 and in the SYS_INDEXES */
-UNIV_INTERN
 void
 dict_set_corrupted(
 /*===============*/
@@ -6205,7 +6296,6 @@ dict_set_corrupted(
 	dtuple_t*	tuple;
 	dfield_t*	dfield;
 	byte*		buf;
-	char*		table_name;
 	const char*	status;
 	btr_cur_t	cursor;
 	bool		locked	= RW_X_LATCH == trx->dict_operation_lock_mode;
@@ -6219,9 +6309,13 @@ dict_set_corrupted(
 	ut_ad(!dict_table_is_comp(dict_sys->sys_tables));
 	ut_ad(!dict_table_is_comp(dict_sys->sys_indexes));
 
-#ifdef UNIV_SYNC_DEBUG
-        ut_ad(sync_thread_levels_empty_except_dict());
-#endif
+#ifdef UNIV_DEBUG
+	{
+		dict_sync_check	check(true);
+
+		ut_ad(!sync_check_iterate(check));
+	}
+#endif /* UNIV_DEBUG */
 
 	/* Mark the table as corrupted only if the clustered index
 	is corrupted */
@@ -6235,6 +6329,13 @@ dict_set_corrupted(
 		goto func_exit;
 	}
 
+	/* If this is read only mode, do not update SYS_INDEXES, just
+	mark it as corrupted in memory */
+	if (srv_read_only_mode) {
+		index->type |= DICT_CORRUPT;
+		goto func_exit;
+	}
+
 	heap = mem_heap_create(sizeof(dtuple_t) + 2 * (sizeof(dfield_t)
 			       + sizeof(que_fork_t) + sizeof(upd_node_t)
 			       + sizeof(upd_t) + 12));
@@ -6281,15 +6382,8 @@ fail:
 
 	mtr_commit(&mtr);
 	mem_heap_empty(heap);
-	table_name = static_cast<char*>(mem_heap_alloc(heap, FN_REFLEN + 1));
-	*innobase_convert_name(
-		table_name, FN_REFLEN,
-		index->table_name, strlen(index->table_name),
-		NULL, TRUE) = 0;
-
-	ib_logf(IB_LOG_LEVEL_ERROR, "%s corruption of %s in table %s in %s",
-		status, index->name, table_name, ctx);
-
+	ib::error() << status << " corruption of " << index->name
+		<< " in table " << index->table->name << " in " << ctx;
 	mem_heap_free(heap);
 
 func_exit:
@@ -6298,18 +6392,16 @@ func_exit:
 	}
 }
 
-/**********************************************************************//**
-Flags an index corrupted in the data dictionary cache only. This
+/** Flags an index corrupted in the data dictionary cache only. This
 is used mostly to mark a corrupted index when index's own dictionary
-is corrupted, and we force to load such index for repair purpose */
-UNIV_INTERN
+is corrupted, and we force to load such index for repair purpose
+@param[in,out]	index	index which is corrupted */
 void
 dict_set_corrupted_index_cache_only(
-/*================================*/
-	dict_index_t*	index,		/*!< in/out: index */
-	dict_table_t*	table)		/*!< in/out: table */
+	dict_index_t*	index)
 {
 	ut_ad(index != NULL);
+	ut_ad(index->table != NULL);
 	ut_ad(mutex_own(&dict_sys->mutex));
 	ut_ad(!dict_table_is_comp(dict_sys->sys_tables));
 	ut_ad(!dict_table_is_comp(dict_sys->sys_indexes));
@@ -6317,24 +6409,128 @@ dict_set_corrupted_index_cache_only(
 	/* Mark the table as corrupted only if the clustered index
 	is corrupted */
 	if (dict_index_is_clust(index)) {
-		dict_table_t*	corrupt_table;
-
-		corrupt_table = (table != NULL) ? table : index->table;
-		ut_ad((index->table != NULL) || (table != NULL)
-		      || index->table  == table);
-
-		if (corrupt_table) {
-			corrupt_table->corrupted = TRUE;
-		}
+		index->table->corrupted = TRUE;
 	}
 
 	index->type |= DICT_CORRUPT;
 }
+
+/** Sets merge_threshold in the SYS_INDEXES
+@param[in,out]	index		index
+@param[in]	merge_threshold	value to set */
+void
+dict_index_set_merge_threshold(
+	dict_index_t*	index,
+	ulint		merge_threshold)
+{
+	mem_heap_t*	heap;
+	mtr_t		mtr;
+	dict_index_t*	sys_index;
+	dtuple_t*	tuple;
+	dfield_t*	dfield;
+	byte*		buf;
+	btr_cur_t	cursor;
+
+	ut_ad(index != NULL);
+	ut_ad(!dict_table_is_comp(dict_sys->sys_tables));
+	ut_ad(!dict_table_is_comp(dict_sys->sys_indexes));
+
+	rw_lock_x_lock(dict_operation_lock);
+	mutex_enter(&(dict_sys->mutex));
+
+	heap = mem_heap_create(sizeof(dtuple_t) + 2 * (sizeof(dfield_t)
+			       + sizeof(que_fork_t) + sizeof(upd_node_t)
+			       + sizeof(upd_t) + 12));
+
+	mtr_start(&mtr);
+
+	sys_index = UT_LIST_GET_FIRST(dict_sys->sys_indexes->indexes);
+
+	/* Find the index row in SYS_INDEXES */
+	tuple = dtuple_create(heap, 2);
+
+	dfield = dtuple_get_nth_field(tuple, 0);
+	buf = static_cast<byte*>(mem_heap_alloc(heap, 8));
+	mach_write_to_8(buf, index->table->id);
+	dfield_set_data(dfield, buf, 8);
+
+	dfield = dtuple_get_nth_field(tuple, 1);
+	buf = static_cast<byte*>(mem_heap_alloc(heap, 8));
+	mach_write_to_8(buf, index->id);
+	dfield_set_data(dfield, buf, 8);
+
+	dict_index_copy_types(tuple, sys_index, 2);
+
+	btr_cur_search_to_nth_level(sys_index, 0, tuple, PAGE_CUR_GE,
+				    BTR_MODIFY_LEAF,
+				    &cursor, 0, __FILE__, __LINE__, &mtr);
+
+	if (cursor.up_match == dtuple_get_n_fields(tuple)
+	    && rec_get_n_fields_old(btr_cur_get_rec(&cursor))
+	       == DICT_NUM_FIELDS__SYS_INDEXES) {
+		ulint	len;
+		byte*	field	= rec_get_nth_field_old(
+			btr_cur_get_rec(&cursor),
+			DICT_FLD__SYS_INDEXES__MERGE_THRESHOLD, &len);
+
+		ut_ad(len == 4);
+
+		if (len == 4) {
+			mlog_write_ulint(field, merge_threshold,
+					 MLOG_4BYTES, &mtr);
+		}
+	}
+
+	mtr_commit(&mtr);
+	mem_heap_free(heap);
+
+	mutex_exit(&(dict_sys->mutex));
+	rw_lock_x_unlock(dict_operation_lock);
+}
+
+#ifdef UNIV_DEBUG
+/** Sets merge_threshold for all indexes in the list of tables
+@param[in]	list	pointer to the list of tables */
+inline
+void
+dict_set_merge_threshold_list_debug(
+	UT_LIST_BASE_NODE_T(dict_table_t)*	list,
+	uint					merge_threshold_all)
+{
+	for (dict_table_t* table = UT_LIST_GET_FIRST(*list);
+	     table != NULL;
+	     table = UT_LIST_GET_NEXT(table_LRU, table)) {
+		for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
+		     index != NULL;
+		     index = UT_LIST_GET_NEXT(indexes, index)) {
+			rw_lock_x_lock(dict_index_get_lock(index));
+			index->merge_threshold = merge_threshold_all;
+			rw_lock_x_unlock(dict_index_get_lock(index));
+		}
+	}
+}
+
+/** Sets merge_threshold for all indexes in dictionary cache for debug.
+@param[in]	merge_threshold_all	value to set for all indexes */
+void
+dict_set_merge_threshold_all_debug(
+	uint	merge_threshold_all)
+{
+	mutex_enter(&dict_sys->mutex);
+
+	dict_set_merge_threshold_list_debug(
+		&dict_sys->table_LRU, merge_threshold_all);
+	dict_set_merge_threshold_list_debug(
+		&dict_sys->table_non_LRU, merge_threshold_all);
+
+	mutex_exit(&dict_sys->mutex);
+}
+
+#endif /* UNIV_DEBUG */
 #endif /* !UNIV_HOTBACKUP */
 
 /**********************************************************************//**
-Inits dict_ind_redundant and dict_ind_compact. */
-UNIV_INTERN
+Inits dict_ind_redundant. */
 void
 dict_ind_init(void)
 /*===============*/
@@ -6342,7 +6538,7 @@ dict_ind_init(void)
 	dict_table_t*		table;
 
 	/* create dummy table and index for REDUNDANT infimum and supremum */
-	table = dict_mem_table_create("SYS_DUMMY1", DICT_HDR_SPACE, 1, 0, 0);
+	table = dict_mem_table_create("SYS_DUMMY1", DICT_HDR_SPACE, 1, 0, 0, 0);
 	dict_mem_table_add_col(table, NULL, NULL, DATA_CHAR,
 			       DATA_ENGLISH | DATA_NOT_NULL, 8);
 
@@ -6351,26 +6547,13 @@ dict_ind_init(void)
 	dict_index_add_col(dict_ind_redundant, table,
 			   dict_table_get_nth_col(table, 0), 0);
 	dict_ind_redundant->table = table;
-
-	/* create dummy table and index for COMPACT infimum and supremum */
-	table = dict_mem_table_create("SYS_DUMMY2",
-				      DICT_HDR_SPACE, 1,
-				      DICT_TF_COMPACT, 0);
-	dict_mem_table_add_col(table, NULL, NULL, DATA_CHAR,
-			       DATA_ENGLISH | DATA_NOT_NULL, 8);
-	dict_ind_compact = dict_mem_index_create("SYS_DUMMY2", "SYS_DUMMY2",
-						 DICT_HDR_SPACE, 0, 1);
-	dict_index_add_col(dict_ind_compact, table,
-			   dict_table_get_nth_col(table, 0), 0);
-	dict_ind_compact->table = table;
-
 	/* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */
-	dict_ind_redundant->cached = dict_ind_compact->cached = TRUE;
+	dict_ind_redundant->cached = TRUE;
 }
 
 #ifndef UNIV_HOTBACKUP
 /**********************************************************************//**
-Frees dict_ind_redundant and dict_ind_compact. */
+Frees dict_ind_redundant. */
 static
 void
 dict_ind_free(void)
@@ -6378,33 +6561,31 @@ dict_ind_free(void)
 {
 	dict_table_t*	table;
 
-	table = dict_ind_compact->table;
-	dict_mem_index_free(dict_ind_compact);
-	dict_ind_compact = NULL;
-	dict_mem_table_free(table);
-
 	table = dict_ind_redundant->table;
 	dict_mem_index_free(dict_ind_redundant);
 	dict_ind_redundant = NULL;
 	dict_mem_table_free(table);
 }
 
-/**********************************************************************//**
-Get index by name
-@return	index, NULL if does not exist */
-UNIV_INTERN
+/** Get an index by name.
+@param[in]	table		the table where to look for the index
+@param[in]	name		the index name to look for
+@param[in]	committed	true=search for committed,
+false=search for uncommitted
+@return index, NULL if does not exist */
 dict_index_t*
 dict_table_get_index_on_name(
-/*=========================*/
-	dict_table_t*	table,	/*!< in: table */
-	const char*	name)	/*!< in: name of the index to find */
+	dict_table_t*	table,
+	const char*	name,
+	bool		committed)
 {
 	dict_index_t*	index;
 
 	index = dict_table_get_first_index(table);
 
 	while (index != NULL) {
-		if (innobase_strcasecmp(index->name, name) == 0) {
+		if (index->is_committed() == committed
+		    && innobase_strcasecmp(index->name, name) == 0) {
 
 			return(index);
 		}
@@ -6419,7 +6600,6 @@ dict_table_get_index_on_name(
 Replace the index passed in with another equivalent index in the
 foreign key lists of the table.
 @return whether all replacements were found */
-UNIV_INTERN
 bool
 dict_foreign_replace_index(
 /*=======================*/
@@ -6490,42 +6670,9 @@ dict_foreign_replace_index(
 	return(found);
 }
 
-/**********************************************************************//**
-In case there is more than one index with the same name return the index
-with the min(id).
-@return	index, NULL if does not exist */
-UNIV_INTERN
-dict_index_t*
-dict_table_get_index_on_name_and_min_id(
-/*=====================================*/
-	dict_table_t*	table,	/*!< in: table */
-	const char*	name)	/*!< in: name of the index to find */
-{
-	dict_index_t*	index;
-	dict_index_t*	min_index; /* Index with matching name and min(id) */
-
-	min_index = NULL;
-	index = dict_table_get_first_index(table);
-
-	while (index != NULL) {
-		if (ut_strcmp(index->name, name) == 0) {
-			if (!min_index || index->id < min_index->id) {
-
-				min_index = index;
-			}
-		}
-
-		index = dict_table_get_next_index(index);
-	}
-
-	return(min_index);
-
-}
-
 #ifdef UNIV_DEBUG
 /**********************************************************************//**
 Check for duplicate index entries in a table [using the index name] */
-UNIV_INTERN
 void
 dict_table_check_for_dup_indexes(
 /*=============================*/
@@ -6548,7 +6695,7 @@ dict_table_check_for_dup_indexes(
 	index1 = UT_LIST_GET_FIRST(table->indexes);
 
 	do {
-		if (*index1->name == TEMP_INDEX_PREFIX) {
+		if (!index1->is_committed()) {
 			ut_a(!dict_index_is_clust(index1));
 
 			switch (check) {
@@ -6573,7 +6720,9 @@ dict_table_check_for_dup_indexes(
 		for (index2 = UT_LIST_GET_NEXT(indexes, index1);
 		     index2 != NULL;
 		     index2 = UT_LIST_GET_NEXT(indexes, index2)) {
-			ut_ad(ut_strcmp(index1->name, index2->name));
+			ut_ad(index1->is_committed()
+			      != index2->is_committed()
+			      || strcmp(index1->name, index2->name) != 0);
 		}
 
 		index1 = UT_LIST_GET_NEXT(indexes, index1);
@@ -6599,7 +6748,6 @@ types. The order of the columns does not matter.
 The caller must own the dictionary mutex.
 dict_table_schema_check() @{
 @return DB_SUCCESS if the table exists and contains the necessary columns */
-UNIV_INTERN
 dberr_t
 dict_table_schema_check(
 /*====================*/
@@ -6644,7 +6792,7 @@ dict_table_schema_check(
 			ut_snprintf(errstr, errstr_sz,
 				"Table %s not found.",
 				ut_format_name(req_schema->table_name,
-					TRUE, buf, sizeof(buf)));
+					   buf, sizeof(buf)));
 			return(DB_TABLE_NOT_FOUND);
 		} else {
 			return(DB_STATS_DO_NOT_EXIST);
@@ -6657,20 +6805,19 @@ dict_table_schema_check(
 		ut_snprintf(errstr, errstr_sz,
 			    "Tablespace for table %s is missing.",
 			    ut_format_name(req_schema->table_name,
-					   TRUE, buf, sizeof(buf)));
+					   buf, sizeof(buf)));
 
 		return(DB_TABLE_NOT_FOUND);
 	}
 
-	if ((ulint) table->n_def - DATA_N_SYS_COLS != req_schema->n_cols) {
-		/* the table has a different number of columns than
-		required */
-
+	ulint n_sys_cols = dict_table_get_n_sys_cols(table);
+	if ((ulint) table->n_def - n_sys_cols != req_schema->n_cols) {
+		/* the table has a different number of columns than required */
 		ut_snprintf(errstr, errstr_sz,
-			    "%s has %d columns but should have %lu.",
+			    "%s has %lu columns but should have %lu.",
 			    ut_format_name(req_schema->table_name,
-					   TRUE, buf, sizeof(buf)),
-			    table->n_def - DATA_N_SYS_COLS,
+					   buf, sizeof(buf)),
+			    table->n_def - n_sys_cols,
 			    req_schema->n_cols);
 
 		return(DB_ERROR);
@@ -6682,44 +6829,20 @@ dict_table_schema_check(
 	be O(n_cols) if the columns are in the same order in both arrays. */
 
 	for (i = 0; i < req_schema->n_cols; i++) {
-		ulint	j;
+		ulint	j = dict_table_has_column(
+			table, req_schema->columns[i].name, i);
 
-		/* check if i'th column is the same in both arrays */
-		if (innobase_strcasecmp(req_schema->columns[i].name,
-			       dict_table_get_col_name(table, i)) == 0) {
+		if (j == table->n_def) {
 
-			/* we found the column in table->cols[] quickly */
-			j = i;
-		} else {
+			ut_snprintf(errstr, errstr_sz,
+				    "required column %s"
+				    " not found in table %s.",
+				    req_schema->columns[i].name,
+				    ut_format_name(
+					    req_schema->table_name,
+					    buf, sizeof(buf)));
 
-			/* columns in both arrays are not in the same order,
-			do a full scan of the second array */
-			for (j = 0; j < table->n_def; j++) {
-				const char*	name;
-
-				name = dict_table_get_col_name(table, j);
-
-				if (innobase_strcasecmp(name,
-					req_schema->columns[i].name) == 0) {
-
-					/* found the column on j'th
-					position */
-					break;
-				}
-			}
-
-			if (j == table->n_def) {
-
-				ut_snprintf(errstr, errstr_sz,
-					    "required column %s "
-					    "not found in table %s.",
-					    req_schema->columns[i].name,
-					    ut_format_name(
-						    req_schema->table_name,
-						    TRUE, buf, sizeof(buf)));
-
-				return(DB_ERROR);
-			}
+			return(DB_ERROR);
 		}
 
 		/* we found a column with the same name on j'th position,
@@ -6731,11 +6854,11 @@ dict_table_schema_check(
 			CREATE_TYPES_NAMES();
 
 			ut_snprintf(errstr, errstr_sz,
-				    "Column %s in table %s is %s "
-				    "but should be %s (length mismatch).",
+				    "Column %s in table %s is %s"
+				    " but should be %s (length mismatch).",
 				    req_schema->columns[i].name,
 				    ut_format_name(req_schema->table_name,
-						   TRUE, buf, sizeof(buf)),
+						   buf, sizeof(buf)),
 				    actual_type, req_type);
 
 			return(DB_ERROR);
@@ -6755,11 +6878,11 @@ dict_table_schema_check(
 			CREATE_TYPES_NAMES();
 
 			ut_snprintf(errstr, errstr_sz,
-				    "Column %s in table %s is %s "
-				    "but should be %s (type mismatch).",
+				    "Column %s in table %s is %s"
+				    " but should be %s (type mismatch).",
 				    req_schema->columns[i].name,
 				    ut_format_name(req_schema->table_name,
-						   TRUE, buf, sizeof(buf)),
+						   buf, sizeof(buf)),
 				    actual_type, req_type);
 
 			return(DB_ERROR);
@@ -6774,11 +6897,11 @@ dict_table_schema_check(
 			CREATE_TYPES_NAMES();
 
 			ut_snprintf(errstr, errstr_sz,
-				    "Column %s in table %s is %s "
-				    "but should be %s (flags mismatch).",
+				    "Column %s in table %s is %s"
+				    " but should be %s (flags mismatch).",
 				    req_schema->columns[i].name,
 				    ut_format_name(req_schema->table_name,
-						   TRUE, buf, sizeof(buf)),
+						   buf, sizeof(buf)),
 				    actual_type, req_type);
 
 			return(DB_ERROR);
@@ -6791,7 +6914,7 @@ dict_table_schema_check(
 			"Table %s has " ULINTPF " foreign key(s) pointing"
 			" to other tables, but it must have %lu.",
 			ut_format_name(req_schema->table_name,
-				       TRUE, buf, sizeof(buf)),
+				       buf, sizeof(buf)),
 			static_cast<ulint>(table->foreign_set.size()),
 			req_schema->n_foreign);
 		return(DB_ERROR);
@@ -6804,7 +6927,7 @@ dict_table_schema_check(
 			"but there must be %lu.",
 			static_cast<ulint>(table->referenced_set.size()),
 			ut_format_name(req_schema->table_name,
-				       TRUE, buf, sizeof(buf)),
+				       buf, sizeof(buf)),
 			req_schema->n_referenced);
 		return(DB_ERROR);
 	}
@@ -6818,7 +6941,6 @@ Converts a database and table name from filesystem encoding
 (e.g. d@i1b/a@q1b@1Kc, same format as used in dict_table_t::name) in two
 strings in UTF8 encoding (e.g. dцb and aюbØc). The output buffers must be
 at least MAX_DB_UTF8_LEN and MAX_TABLE_UTF8_LEN bytes. */
-UNIV_INTERN
 void
 dict_fs2utf8(
 /*=========*/
@@ -6842,7 +6964,7 @@ dict_fs2utf8(
 
 	strconvert(
 		&my_charset_filename, db, db_len, system_charset_info,
-		db_utf8, static_cast<uint>(db_utf8_size), &errors);
+		db_utf8, db_utf8_size, &errors);
 
 	/* convert each # to @0023 in table name and store the result in buf */
 	const char*	table = dict_remove_db_name(db_and_table);
@@ -6867,8 +6989,9 @@ dict_fs2utf8(
 
 	errors = 0;
 	strconvert(
-                &my_charset_filename, buf, (uint) (buf_p - buf), system_charset_info,
-		table_utf8, static_cast<uint>(table_utf8_size),
+		&my_charset_filename, buf, (uint) (buf_p - buf),
+		system_charset_info,
+		table_utf8, table_utf8_size,
 		&errors);
 
 	if (errors != 0) {
@@ -6877,9 +7000,55 @@ dict_fs2utf8(
 	}
 }
 
+/** Resize the hash tables besed on the current buffer pool size. */
+void
+dict_resize()
+{
+	dict_table_t*	table;
+
+	mutex_enter(&dict_sys->mutex);
+
+	/* all table entries are in table_LRU and table_non_LRU lists */
+	hash_table_free(dict_sys->table_hash);
+	hash_table_free(dict_sys->table_id_hash);
+
+	dict_sys->table_hash = hash_create(
+		buf_pool_get_curr_size()
+		/ (DICT_POOL_PER_TABLE_HASH * UNIV_WORD_SIZE));
+
+	dict_sys->table_id_hash = hash_create(
+		buf_pool_get_curr_size()
+		/ (DICT_POOL_PER_TABLE_HASH * UNIV_WORD_SIZE));
+
+	for (table = UT_LIST_GET_FIRST(dict_sys->table_LRU); table;
+	     table = UT_LIST_GET_NEXT(table_LRU, table)) {
+		ulint	fold = ut_fold_string(table->name.m_name);
+		ulint	id_fold = ut_fold_ull(table->id);
+
+		HASH_INSERT(dict_table_t, name_hash, dict_sys->table_hash,
+			    fold, table);
+
+		HASH_INSERT(dict_table_t, id_hash, dict_sys->table_id_hash,
+			    id_fold, table);
+	}
+
+	for (table = UT_LIST_GET_FIRST(dict_sys->table_non_LRU); table;
+	     table = UT_LIST_GET_NEXT(table_LRU, table)) {
+		ulint	fold = ut_fold_string(table->name.m_name);
+		ulint	id_fold = ut_fold_ull(table->id);
+
+		HASH_INSERT(dict_table_t, name_hash, dict_sys->table_hash,
+			    fold, table);
+
+		HASH_INSERT(dict_table_t, id_hash, dict_sys->table_id_hash,
+			    id_fold, table);
+	}
+
+	mutex_exit(&dict_sys->mutex);
+}
+
 /**********************************************************************//**
 Closes the data dictionary module. */
-UNIV_INTERN
 void
 dict_close(void)
 /*============*/
@@ -6899,9 +7068,7 @@ dict_close(void)
 
 			table = static_cast<dict_table_t*>(
 				HASH_GET_NEXT(name_hash, prev_table));
-#ifdef UNIV_DEBUG
-			ut_a(prev_table->magic_n == DICT_TABLE_MAGIC_N);
-#endif
+			ut_ad(prev_table->magic_n == DICT_TABLE_MAGIC_N);
 			/* Acquire only because it's a pre-condition. */
 			mutex_enter(&dict_sys->mutex);
 
@@ -6921,23 +7088,26 @@ dict_close(void)
 
 	mutex_free(&dict_sys->mutex);
 
-	rw_lock_free(&dict_operation_lock);
-	memset(&dict_operation_lock, 0x0, sizeof(dict_operation_lock));
+	rw_lock_free(dict_operation_lock);
 
-	if (!srv_read_only_mode) {
-		mutex_free(&dict_foreign_err_mutex);
-	}
+	ut_free(dict_operation_lock);
+	dict_operation_lock = NULL;
+
+	mutex_free(&dict_foreign_err_mutex);
 
 	delete dict_sys->autoinc_map;
 
-	mem_free(dict_sys);
+	ut_ad(dict_sys->size == 0);
+
+	ut_free(dict_sys);
+
 	dict_sys = NULL;
 }
 
 #ifdef UNIV_DEBUG
 /**********************************************************************//**
 Validate the dictionary table LRU list.
-@return TRUE if valid  */
+@return TRUE if valid */
 static
 ibool
 dict_lru_validate(void)
@@ -7025,7 +7195,6 @@ Check an index to see whether its first fields are the columns in the array,
 in the same order and is not marked for deletion and is not the same
 as types_idx.
 @return true if the index qualifies, otherwise false */
-UNIV_INTERN
 bool
 dict_foreign_qualify_index(
 /*=======================*/
@@ -7068,6 +7237,8 @@ dict_foreign_qualify_index(
 		field = dict_index_get_nth_field(index, i);
 		col_no = dict_col_get_no(field->col);
 
+		ut_ad(!dict_col_is_virtual(field->col));
+
 		if (field->prefix_len != 0) {
 			/* We do not accept column prefix
 			indexes here */
@@ -7134,7 +7305,7 @@ dict_index_zip_pad_update(
 
 	ut_ad(total > 0);
 
-	if(zip_threshold == 0) {
+	if (zip_threshold == 0) {
 		/* User has just disabled the padding. */
 		return;
 	}
@@ -7160,15 +7331,10 @@ dict_index_zip_pad_update(
 		beyond max pad size. */
 		if (info->pad + ZIP_PAD_INCR
 		    < (UNIV_PAGE_SIZE * zip_pad_max) / 100) {
-#ifdef HAVE_ATOMIC_BUILTINS
 			/* Use atomics even though we have the mutex.
 			This is to ensure that we are able to read
-			info->pad atomically where atomics are
-			supported. */
+			info->pad atomically. */
 			os_atomic_increment_ulint(&info->pad, ZIP_PAD_INCR);
-#else /* HAVE_ATOMIC_BUILTINS */
-			info->pad += ZIP_PAD_INCR;
-#endif /* HAVE_ATOMIC_BUILTINS */
 
 			MONITOR_INC(MONITOR_PAD_INCREMENTS);
 		}
@@ -7187,15 +7353,10 @@ dict_index_zip_pad_update(
 		    && info->pad > 0) {
 
 			ut_ad(info->pad % ZIP_PAD_INCR == 0);
-#ifdef HAVE_ATOMIC_BUILTINS
 			/* Use atomics even though we have the mutex.
 			This is to ensure that we are able to read
-			info->pad atomically where atomics are
-			supported. */
+			info->pad atomically. */
 			os_atomic_decrement_ulint(&info->pad, ZIP_PAD_INCR);
-#else /* HAVE_ATOMIC_BUILTINS */
-			info->pad -= ZIP_PAD_INCR;
-#endif /* HAVE_ATOMIC_BUILTINS */
 
 			info->n_rounds = 0;
 
@@ -7207,7 +7368,6 @@ dict_index_zip_pad_update(
 /*********************************************************************//**
 This function should be called whenever a page is successfully
 compressed. Updates the compression padding information. */
-UNIV_INTERN
 void
 dict_index_zip_success(
 /*===================*/
@@ -7230,7 +7390,6 @@ dict_index_zip_success(
 /*********************************************************************//**
 This function should be called whenever a page compression attempt
 fails. Updates the compression padding information. */
-UNIV_INTERN
 void
 dict_index_zip_failure(
 /*===================*/
@@ -7254,7 +7413,6 @@ dict_index_zip_failure(
 /*********************************************************************//**
 Return the optimal page size, for which page will likely compress.
 @return page size beyond which page might not compress */
-UNIV_INTERN
 ulint
 dict_index_zip_pad_optimal_page_size(
 /*=================================*/
@@ -7273,16 +7431,9 @@ dict_index_zip_pad_optimal_page_size(
 	}
 
 	/* We use atomics to read index->zip_pad.pad. Here we use zero
-	as increment as are not changing the value of the 'pad'. On
-	platforms where atomics are not available we grab the mutex. */
+	as increment as are not changing the value of the 'pad'. */
 
-#ifdef HAVE_ATOMIC_BUILTINS
 	pad = os_atomic_increment_ulint(&index->zip_pad.pad, 0);
-#else /* HAVE_ATOMIC_BUILTINS */
-	dict_index_zip_pad_lock(index);
-	pad = index->zip_pad.pad;
-	dict_index_zip_pad_unlock(index);
-#endif /* HAVE_ATOMIC_BUILTINS */
 
 	ut_ad(pad < UNIV_PAGE_SIZE);
 	sz = UNIV_PAGE_SIZE - pad;
@@ -7294,10 +7445,81 @@ dict_index_zip_pad_optimal_page_size(
 	return(ut_max(sz, min_sz));
 }
 
+/** Convert a 32 bit integer table flags to the 32 bit FSP Flags.
+Fsp Flags are written into the tablespace header at the offset
+FSP_SPACE_FLAGS and are also stored in the fil_space_t::flags field.
+The following chart shows the translation of the low order bit.
+Other bits are the same.
+			Low order bit
+		    | REDUNDANT | COMPACT | COMPRESSED | DYNAMIC
+dict_table_t::flags |     0     |    1    |     1      |    1
+fil_space_t::flags  |     0     |    0    |     1      |    1
+@param[in]	table_flags	dict_table_t::flags
+@param[in]	is_temp		whether the tablespace is temporary
+@param[in]	is_encrypted	whether the tablespace is encrypted
+@return tablespace flags (fil_space_t::flags) */
+ulint
+dict_tf_to_fsp_flags(
+	ulint	table_flags,
+	bool	is_temp,
+	bool	is_encrypted)
+{
+	DBUG_EXECUTE_IF("dict_tf_to_fsp_flags_failure",
+			return(ULINT_UNDEFINED););
+
+	bool		has_atomic_blobs =
+				 DICT_TF_HAS_ATOMIC_BLOBS(table_flags);
+	page_size_t	page_size = dict_tf_get_page_size(table_flags);
+	bool		has_data_dir = DICT_TF_HAS_DATA_DIR(table_flags);
+	bool		is_shared = DICT_TF_HAS_SHARED_SPACE(table_flags);
+	bool		page_compression = DICT_TF_GET_PAGE_COMPRESSION(table_flags);
+	ulint		page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL(table_flags);
+	ulint		atomic_writes = DICT_TF_GET_ATOMIC_WRITES(table_flags);
+
+	ut_ad(!page_size.is_compressed() || has_atomic_blobs);
+
+	/* General tablespaces that are not compressed do not get the
+	flags for dynamic row format (POST_ANTELOPE & ATOMIC_BLOBS) */
+	if (is_shared && !page_size.is_compressed()) {
+		has_atomic_blobs = false;
+	}
+
+	ulint		fsp_flags = fsp_flags_init(page_size,
+						   has_atomic_blobs,
+						   has_data_dir,
+						   is_shared,
+						   is_temp,
+						   0,
+						   0,
+						   0,
+						   is_encrypted);
+
+	/* In addition, tablespace flags also contain if the page
+	compression is used for this table. */
+	if (page_compression) {
+		fsp_flags |= FSP_FLAGS_SET_PAGE_COMPRESSION(fsp_flags, page_compression);
+	}
+
+	/* In addition, tablespace flags also contain page compression level
+	if page compression is used for this table. */
+	if (page_compression && page_compression_level) {
+		fsp_flags |= FSP_FLAGS_SET_PAGE_COMPRESSION_LEVEL(fsp_flags, page_compression_level);
+	}
+
+	/* In addition, tablespace flags also contain flag if atomic writes
+	is used for this table */
+	if (atomic_writes) {
+		fsp_flags |= FSP_FLAGS_SET_ATOMIC_WRITES(fsp_flags, atomic_writes);
+	}
+
+	ut_ad(fsp_flags_is_valid(fsp_flags));
+
+	return(fsp_flags);
+}
+
 /*************************************************************//**
 Convert table flag to row format string.
 @return row format name. */
-UNIV_INTERN
 const char*
 dict_tf_to_row_format_string(
 /*=========================*/
@@ -7317,4 +7539,142 @@ dict_tf_to_row_format_string(
 	ut_error;
 	return(0);
 }
+
+/** Look for any dictionary objects that are found in the given tablespace.
+@param[in]	space_id	Tablespace ID to search for.
+@return true if tablespace is empty. */
+bool
+dict_space_is_empty(
+	ulint	space_id)
+{
+	btr_pcur_t	pcur;
+	const rec_t*	rec;
+	mtr_t		mtr;
+	bool		found = false;
+
+	rw_lock_x_lock(dict_operation_lock);
+	mutex_enter(&dict_sys->mutex);
+	mtr_start(&mtr);
+
+	for (rec = dict_startscan_system(&pcur, &mtr, SYS_TABLES);
+	     rec != NULL;
+	     rec = dict_getnext_system(&pcur, &mtr)) {
+		const byte*	field;
+		ulint		len;
+		ulint		space_id_for_table;
+
+		field = rec_get_nth_field_old(
+			rec, DICT_FLD__SYS_TABLES__SPACE, &len);
+		ut_ad(len == 4);
+		space_id_for_table = mach_read_from_4(field);
+
+		if (space_id_for_table == space_id) {
+			found = true;
+		}
+	}
+
+	mtr_commit(&mtr);
+	mutex_exit(&dict_sys->mutex);
+	rw_lock_x_unlock(dict_operation_lock);
+
+	return(!found);
+}
+
+/** Find the space_id for the given name in sys_tablespaces.
+@param[in]	name	Tablespace name to search for.
+@return the tablespace ID. */
+ulint
+dict_space_get_id(
+	const char*	name)
+{
+	btr_pcur_t	pcur;
+	const rec_t*	rec;
+	mtr_t		mtr;
+	ulint		name_len = strlen(name);
+	ulint		id = ULINT_UNDEFINED;
+
+	rw_lock_x_lock(dict_operation_lock);
+	mutex_enter(&dict_sys->mutex);
+	mtr_start(&mtr);
+
+	for (rec = dict_startscan_system(&pcur, &mtr, SYS_TABLESPACES);
+	     rec != NULL;
+	     rec = dict_getnext_system(&pcur, &mtr)) {
+		const byte*	field;
+		ulint		len;
+
+		field = rec_get_nth_field_old(
+			rec, DICT_FLD__SYS_TABLESPACES__NAME, &len);
+		ut_ad(len > 0);
+		ut_ad(len < OS_FILE_MAX_PATH);
+
+		if (len == name_len && ut_memcmp(name, field, len) == 0) {
+
+			field = rec_get_nth_field_old(
+				rec, DICT_FLD__SYS_TABLESPACES__SPACE, &len);
+			ut_ad(len == 4);
+			id = mach_read_from_4(field);
+
+			/* This is normally called by dict_getnext_system()
+			at the end of the index. */
+			btr_pcur_close(&pcur);
+			break;
+		}
+	}
+
+	mtr_commit(&mtr);
+	mutex_exit(&dict_sys->mutex);
+	rw_lock_x_unlock(dict_operation_lock);
+
+	return(id);
+}
 #endif /* !UNIV_HOTBACKUP */
+
+/** Determine the extent size (in pages) for the given table
+@param[in]	table	the table whose extent size is being
+			calculated.
+@return extent size in pages (256, 128 or 64) */
+ulint
+dict_table_extent_size(
+	const dict_table_t*	table)
+{
+	const ulint	mb_1 = 1024 * 1024;
+	const ulint	mb_2 = 2 * mb_1;
+	const ulint	mb_4 = 4 * mb_1;
+
+	page_size_t	page_size = dict_table_page_size(table);
+	ulint	pages_in_extent = FSP_EXTENT_SIZE;
+
+	if (page_size.is_compressed()) {
+
+		ulint	disk_page_size	= page_size.physical();
+
+		switch (disk_page_size) {
+		case 1024:
+			pages_in_extent = mb_1/1024;
+			break;
+		case 2048:
+			pages_in_extent = mb_1/2048;
+			break;
+		case 4096:
+			pages_in_extent = mb_1/4096;
+			break;
+		case 8192:
+			pages_in_extent = mb_1/8192;
+			break;
+		case 16384:
+			pages_in_extent = mb_1/16384;
+			break;
+		case 32768:
+			pages_in_extent = mb_2/32768;
+			break;
+		case 65536:
+			pages_in_extent = mb_4/65536;
+			break;
+		default:
+			ut_ad(0);
+		}
+	}
+
+	return(pages_in_extent);
+}
diff --git a/storage/innobase/dict/dict0load.cc b/storage/innobase/dict/dict0load.cc
index 04e31aff088..f38ad85e903 100644
--- a/storage/innobase/dict/dict0load.cc
+++ b/storage/innobase/dict/dict0load.cc
@@ -25,27 +25,32 @@ from dictionary tables
 Created 4/24/1996 Heikki Tuuri
 *******************************************************/
 
-#include "dict0load.h"
-#include "mysql_version.h"
+#include "ha_prototypes.h"
 
+#include "dict0load.h"
 #ifdef UNIV_NONINL
 #include "dict0load.ic"
 #endif
 
+#include "mysql_version.h"
 #include "btr0pcur.h"
 #include "btr0btr.h"
-#include "page0page.h"
-#include "mach0data.h"
-#include "dict0dict.h"
 #include "dict0boot.h"
+#include "dict0crea.h"
+#include "dict0dict.h"
+#include "dict0mem.h"
+#include "dict0priv.h"
 #include "dict0stats.h"
+#include "fsp0file.h"
+#include "fsp0sysspace.h"
+#include "fts0priv.h"
+#include "mach0data.h"
+#include "page0page.h"
 #include "rem0cmp.h"
 #include "srv0start.h"
 #include "srv0srv.h"
-#include "dict0crea.h"
-#include "dict0priv.h"
-#include "ha_prototypes.h" /* innobase_casedn_str() */
-#include "fts0priv.h"
+#include <stack>
+#include <set>
 
 /** Following are the InnoDB system tables. The positions in
 this array are referenced by enum dict_system_table_id. */
@@ -57,17 +62,57 @@ static const char* SYSTEM_TABLE_NAME[] = {
 	"SYS_FOREIGN",
 	"SYS_FOREIGN_COLS",
 	"SYS_TABLESPACES",
-	"SYS_DATAFILES"
+	"SYS_DATAFILES",
+	"SYS_VIRTUAL"
 };
 
+/** Loads a table definition and also all its index definitions.
+
+Loads those foreign key constraints whose referenced table is already in
+dictionary cache.  If a foreign key constraint is not loaded, then the
+referenced table is pushed into the output stack (fk_tables), if it is not
+NULL.  These tables must be subsequently loaded so that all the foreign
+key constraints are loaded into memory.
+
+@param[in]	name		Table name in the db/tablename format
+@param[in]	cached		true=add to cache, false=do not
+@param[in]	ignore_err	Error to be ignored when loading table
+				and its index definition
+@param[out]	fk_tables	Related table names that must also be
+				loaded to ensure that all foreign key
+				constraints are loaded.
+@return table, NULL if does not exist; if the table is stored in an
+.ibd file, but the file does not exist, then we set the
+ibd_file_missing flag TRUE in the table object we return */
+static
+dict_table_t*
+dict_load_table_one(
+	table_name_t&		name,
+	bool			cached,
+	dict_err_ignore_t	ignore_err,
+	dict_names_t&		fk_tables);
+
+/** Loads a table definition from a SYS_TABLES record to dict_table_t.
+Does not load any columns or indexes.
+@param[in]	name	Table name
+@param[in]	rec	SYS_TABLES record
+@param[out,own]	table	Table, or NULL
+@return error message, or NULL on success */
+static
+const char*
+dict_load_table_low(
+	table_name_t&	name,
+	const rec_t*	rec,
+	dict_table_t**	table);
+
 /* If this flag is TRUE, then we will load the cluster index's (and tables')
 metadata even if it is marked as "corrupted". */
-UNIV_INTERN my_bool     srv_load_corrupted = FALSE;
+my_bool     srv_load_corrupted = FALSE;
 
 #ifdef UNIV_DEBUG
 /****************************************************************//**
 Compare the name of an index column.
-@return	TRUE if the i'th column of index is 'name'. */
+@return TRUE if the i'th column of index is 'name'. */
 static
 ibool
 name_of_col_is(
@@ -89,7 +134,6 @@ name_of_col_is(
 Finds the first table name in the given database.
 @return own: table name, NULL if does not exist; the caller must free
 the memory in the string! */
-UNIV_INTERN
 char*
 dict_get_first_table_name_in_db(
 /*============================*/
@@ -106,7 +150,7 @@ dict_get_first_table_name_in_db(
 	ulint		len;
 	mtr_t		mtr;
 
-	ut_ad(mutex_own(&(dict_sys->mutex)));
+	ut_ad(mutex_own(&dict_sys->mutex));
 
 	heap = mem_heap_create(1000);
 
@@ -169,69 +213,9 @@ loop:
 	goto loop;
 }
 
-/********************************************************************//**
-Prints to the standard output information on all tables found in the data
-dictionary system table. */
-UNIV_INTERN
-void
-dict_print(void)
-/*============*/
-{
-	dict_table_t*	table;
-	btr_pcur_t	pcur;
-	const rec_t*	rec;
-	mem_heap_t*	heap;
-	mtr_t		mtr;
-
-	/* Enlarge the fatal semaphore wait timeout during the InnoDB table
-	monitor printout */
-
-	os_increment_counter_by_amount(
-		server_mutex,
-		srv_fatal_semaphore_wait_threshold,
-		SRV_SEMAPHORE_WAIT_EXTENSION);
-
-	heap = mem_heap_create(1000);
-	mutex_enter(&(dict_sys->mutex));
-	mtr_start(&mtr);
-
-	rec = dict_startscan_system(&pcur, &mtr, SYS_TABLES);
-
-	while (rec) {
-		const char* err_msg;
-
-		err_msg = static_cast<const char*>(
-			dict_process_sys_tables_rec_and_mtr_commit(
-				heap, rec, &table, DICT_TABLE_LOAD_FROM_CACHE,
-				&mtr));
-
-		if (!err_msg) {
-			dict_table_print(table);
-		} else {
-			ut_print_timestamp(stderr);
-			fprintf(stderr, "  InnoDB: %s\n", err_msg);
-		}
-
-		mem_heap_empty(heap);
-
-		mtr_start(&mtr);
-		rec = dict_getnext_system(&pcur, &mtr);
-	}
-
-	mtr_commit(&mtr);
-	mutex_exit(&(dict_sys->mutex));
-	mem_heap_free(heap);
-
-	/* Restore the fatal semaphore wait timeout */
-	os_decrement_counter_by_amount(
-		server_mutex,
-		srv_fatal_semaphore_wait_threshold,
-		SRV_SEMAPHORE_WAIT_EXTENSION);
-}
-
 /********************************************************************//**
 This function gets the next system table record as it scans the table.
-@return	the next record if found, NULL if end of scan */
+@return the next record if found, NULL if end of scan */
 static
 const rec_t*
 dict_getnext_system_low(
@@ -263,8 +247,7 @@ dict_getnext_system_low(
 
 /********************************************************************//**
 This function opens a system table, and returns the first record.
-@return	first record of the system table */
-UNIV_INTERN
+@return first record of the system table */
 const rec_t*
 dict_startscan_system(
 /*==================*/
@@ -293,8 +276,7 @@ dict_startscan_system(
 
 /********************************************************************//**
 This function gets the next system table record as it scans the table.
-@return	the next record if found, NULL if end of scan */
-UNIV_INTERN
+@return the next record if found, NULL if end of scan */
 const rec_t*
 dict_getnext_system(
 /*================*/
@@ -318,7 +300,6 @@ This function processes one SYS_TABLES record and populate the dict_table_t
 struct for the table. Extracted out of dict_print() to be used by
 both monitor table output and information schema innodb_sys_tables output.
 @return error message, or NULL on success */
-UNIV_INTERN
 const char*
 dict_process_sys_tables_rec_and_mtr_commit(
 /*=======================================*/
@@ -335,7 +316,7 @@ dict_process_sys_tables_rec_and_mtr_commit(
 	ulint		len;
 	const char*	field;
 	const char*	err_msg = NULL;
-	char*		table_name;
+	table_name_t	table_name;
 
 	field = (const char*) rec_get_nth_field_old(
 		rec, DICT_FLD__SYS_TABLES__NAME, &len);
@@ -345,7 +326,7 @@ dict_process_sys_tables_rec_and_mtr_commit(
 	ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
 
 	/* Get the table name */
-	table_name = mem_heap_strdupl(heap, field, len);
+	table_name.m_name = mem_heap_strdupl(heap, field, len);
 
 	/* If DICT_TABLE_LOAD_FROM_CACHE is set, first check
 	whether there is cached dict_table_t struct */
@@ -354,7 +335,7 @@ dict_process_sys_tables_rec_and_mtr_commit(
 		/* Commit before load the table again */
 		mtr_commit(mtr);
 
-		*table = dict_table_get_low(table_name);
+		*table = dict_table_get_low(table_name.m_name);
 
 		if (!(*table)) {
 			err_msg = "Table not found in cache";
@@ -376,7 +357,6 @@ This function parses a SYS_INDEXES record and populate a dict_index_t
 structure with the information from the record. For detail information
 about SYS_INDEXES fields, please refer to dict_boot() function.
 @return error message, or NULL on success */
-UNIV_INTERN
 const char*
 dict_process_sys_indexes_rec(
 /*=========================*/
@@ -403,7 +383,6 @@ dict_process_sys_indexes_rec(
 This function parses a SYS_COLUMNS record and populate a dict_column_t
 structure with the information from the record.
 @return error message, or NULL on success */
-UNIV_INTERN
 const char*
 dict_process_sys_columns_rec(
 /*=========================*/
@@ -411,22 +390,47 @@ dict_process_sys_columns_rec(
 	const rec_t*	rec,		/*!< in: current SYS_COLUMNS rec */
 	dict_col_t*	column,		/*!< out: dict_col_t to be filled */
 	table_id_t*	table_id,	/*!< out: table id */
-	const char**	col_name)	/*!< out: column name */
+	const char**	col_name,	/*!< out: column name */
+	ulint*		nth_v_col)	/*!< out: if virtual col, this is
+					record's sequence number */
 {
 	const char*	err_msg;
 
 	/* Parse the record, and get "dict_col_t" struct filled */
 	err_msg = dict_load_column_low(NULL, heap, column,
-				       table_id, col_name, rec);
+				       table_id, col_name, rec, nth_v_col);
 
 	return(err_msg);
 }
 
+/** This function parses a SYS_VIRTUAL record and extracts virtual column
+information
+@param[in,out]	heap		heap memory
+@param[in]	rec		current SYS_COLUMNS rec
+@param[in,out]	table_id	table id
+@param[in,out]	pos		virtual column position
+@param[in,out]	base_pos	base column position
+@return error message, or NULL on success */
+const char*
+dict_process_sys_virtual_rec(
+	mem_heap_t*	heap,
+	const rec_t*	rec,
+	table_id_t*	table_id,
+	ulint*		pos,
+	ulint*		base_pos)
+{
+	const char*	err_msg;
+
+	/* Parse the record, and get "dict_col_t" struct filled */
+	err_msg = dict_load_virtual_low(NULL, heap, NULL, table_id,
+					pos, base_pos, rec);
+
+	return(err_msg);
+}
 /********************************************************************//**
 This function parses a SYS_FIELDS record and populates a dict_field_t
 structure with the information from the record.
 @return error message, or NULL on success */
-UNIV_INTERN
 const char*
 dict_process_sys_fields_rec(
 /*========================*/
@@ -461,7 +465,6 @@ This function parses a SYS_FOREIGN record and populate a dict_foreign_t
 structure with the information from the record. For detail information
 about SYS_FOREIGN fields, please refer to dict_load_foreign() function.
 @return error message, or NULL on success */
-UNIV_INTERN
 const char*
 dict_process_sys_foreign_rec(
 /*=========================*/
@@ -542,7 +545,6 @@ err_len:
 This function parses a SYS_FOREIGN_COLS record and extract necessary
 information from the record and return to caller.
 @return error message, or NULL on success */
-UNIV_INTERN
 const char*
 dict_process_sys_foreign_col_rec(
 /*=============================*/
@@ -612,7 +614,6 @@ err_len:
 This function parses a SYS_TABLESPACES record, extracts necessary
 information from the record and returns to caller.
 @return error message, or NULL on success */
-UNIV_INTERN
 const char*
 dict_process_sys_tablespaces(
 /*=========================*/
@@ -679,7 +680,6 @@ err_len:
 This function parses a SYS_DATAFILES record, extracts necessary
 information from the record and returns it to the caller.
 @return error message, or NULL on success */
-UNIV_INTERN
 const char*
 dict_process_sys_datafiles(
 /*=======================*/
@@ -729,65 +729,13 @@ err_len:
 	return(NULL);
 }
 
-/********************************************************************//**
-Determine the flags of a table as stored in SYS_TABLES.TYPE and N_COLS.
-@return  ULINT_UNDEFINED if error, else a valid dict_table_t::flags. */
-static
-ulint
-dict_sys_tables_get_flags(
-/*======================*/
-	const rec_t*	rec)	/*!< in: a record of SYS_TABLES */
-{
-	const byte*	field;
-	ulint		len;
-	ulint		type;
-	ulint		n_cols;
-
-	/* read the 4 byte flags from the TYPE field */
-	field = rec_get_nth_field_old(
-		rec, DICT_FLD__SYS_TABLES__TYPE, &len);
-	ut_a(len == 4);
-	type = mach_read_from_4(field);
-
-	/* The low order bit of SYS_TABLES.TYPE is always set to 1. But in
-	dict_table_t::flags the low order bit is used to determine if the
-	row format is Redundant or Compact when the format is Antelope.
-	Read the 4 byte N_COLS field and look at the high order bit.  It
-	should be set for COMPACT and later.  It should not be set for
-	REDUNDANT. */
-	field = rec_get_nth_field_old(
-		rec, DICT_FLD__SYS_TABLES__N_COLS, &len);
-	ut_a(len == 4);
-	n_cols = mach_read_from_4(field);
-
-	/* This validation function also combines the DICT_N_COLS_COMPACT
-	flag in n_cols into the type field to effectively make it a
-	dict_table_t::flags. */
-
-	if (ULINT_UNDEFINED == dict_sys_tables_type_validate(type, n_cols)) {
-		return(ULINT_UNDEFINED);
-	}
-
-	return(dict_sys_tables_type_to_tf(type, n_cols));
-}
-
-/********************************************************************//**
-Gets the filepath for a spaceid from SYS_DATAFILES and checks it against
-the contents of a link file. This function is called when there is no
-fil_node_t entry for this space ID so both durable locations on  disk
-must be checked and compared.
-We use a temporary heap here for the table lookup, but not for the path
-returned which the caller must free.
-This function can return NULL if the space ID is not found in SYS_DATAFILES,
-then the caller will assume that the ibd file is in the normal datadir.
-@return	own: A copy of the first datafile found in SYS_DATAFILES.PATH for
-the given space ID. NULL if space ID is zero or not found. */
-UNIV_INTERN
+/** Get the first filepath from SYS_DATAFILES for a given space_id.
+@param[in]	space_id	Tablespace ID
+@return First filepath (caller must invoke ut_free() on it)
+@retval NULL if no SYS_DATAFILES entry was found. */
 char*
 dict_get_first_path(
-/*================*/
-	ulint		space,	/*!< in: space id */
-	const char*	name)	/*!< in: tablespace name */
+	ulint	space_id)
 {
 	mtr_t		mtr;
 	dict_table_t*	sys_datafiles;
@@ -799,15 +747,16 @@ dict_get_first_path(
 	const rec_t*	rec;
 	const byte*	field;
 	ulint		len;
-	char*		dict_filepath = NULL;
+	char*		filepath = NULL;
 	mem_heap_t*	heap = mem_heap_create(1024);
 
-	ut_ad(mutex_own(&(dict_sys->mutex)));
+	ut_ad(mutex_own(&dict_sys->mutex));
 
 	mtr_start(&mtr);
 
 	sys_datafiles = dict_table_get_low("SYS_DATAFILES");
 	sys_index = UT_LIST_GET_FIRST(sys_datafiles->indexes);
+
 	ut_ad(!dict_table_is_comp(sys_datafiles));
 	ut_ad(name_of_col_is(sys_datafiles, sys_index,
 			     DICT_FLD__SYS_DATAFILES__SPACE, "SPACE"));
@@ -818,7 +767,7 @@ dict_get_first_path(
 	dfield = dtuple_get_nth_field(tuple, DICT_FLD__SYS_DATAFILES__SPACE);
 
 	buf = static_cast<byte*>(mem_heap_alloc(heap, 4));
-	mach_write_to_4(buf, space);
+	mach_write_to_4(buf, space_id);
 
 	dfield_set_data(dfield, buf, 4);
 	dict_index_copy_types(tuple, sys_index, 1);
@@ -828,44 +777,155 @@ dict_get_first_path(
 
 	rec = btr_pcur_get_rec(&pcur);
 
-	/* If the file-per-table tablespace was created with
-	an earlier version of InnoDB, then this record is not
-	in SYS_DATAFILES.  But a link file still might exist. */
-
+	/* Get the filepath from this SYS_DATAFILES record. */
 	if (btr_pcur_is_on_user_rec(&pcur)) {
-		/* A record for this space ID was found. */
 		field = rec_get_nth_field_old(
-			rec, DICT_FLD__SYS_DATAFILES__PATH, &len);
-		ut_a(len > 0 || len == UNIV_SQL_NULL);
-		ut_a(len < OS_FILE_MAX_PATH);
-		dict_filepath = mem_strdupl((char*) field, len);
-		ut_a(dict_filepath);
+			rec, DICT_FLD__SYS_DATAFILES__SPACE, &len);
+		ut_a(len == 4);
+
+		if (space_id == mach_read_from_4(field)) {
+			/* A record for this space ID was found. */
+			field = rec_get_nth_field_old(
+				rec, DICT_FLD__SYS_DATAFILES__PATH, &len);
+
+			ut_ad(len > 0);
+			ut_ad(len < OS_FILE_MAX_PATH);
+
+			if (len > 0 && len != UNIV_SQL_NULL) {
+				filepath = mem_strdupl(
+					reinterpret_cast<const char*>(field),
+					len);
+				ut_ad(filepath != NULL);
+
+				/* The dictionary may have been written on
+				another OS. */
+				os_normalize_path(filepath);
+			}
+		}
 	}
 
 	btr_pcur_close(&pcur);
 	mtr_commit(&mtr);
 	mem_heap_free(heap);
 
-	return(dict_filepath);
+	return(filepath);
 }
 
-/********************************************************************//**
-Update the record for space_id in SYS_TABLESPACES to this filepath.
-@return	DB_SUCCESS if OK, dberr_t if the insert failed */
-UNIV_INTERN
+/** Gets the space name from SYS_TABLESPACES for a given space ID.
+@param[in]	space_id	Tablespace ID
+@param[in]	callers_heap	A heap to allocate from, may be NULL
+@return Tablespace name (caller is responsible to free it)
+@retval NULL if no dictionary entry was found. */
+static
+char*
+dict_space_get_name(
+	ulint		space_id,
+	mem_heap_t*	callers_heap)
+{
+	mtr_t		mtr;
+	dict_table_t*	sys_tablespaces;
+	dict_index_t*	sys_index;
+	dtuple_t*	tuple;
+	dfield_t*	dfield;
+	byte*		buf;
+	btr_pcur_t	pcur;
+	const rec_t*	rec;
+	const byte*	field;
+	ulint		len;
+	char*		space_name = NULL;
+	mem_heap_t*	heap = mem_heap_create(1024);
+
+	ut_ad(mutex_own(&dict_sys->mutex));
+
+	sys_tablespaces = dict_table_get_low("SYS_TABLESPACES");
+	if (sys_tablespaces == NULL) {
+		ut_a(!srv_sys_tablespaces_open);
+		return(NULL);
+	}
+
+	sys_index = UT_LIST_GET_FIRST(sys_tablespaces->indexes);
+
+	ut_ad(!dict_table_is_comp(sys_tablespaces));
+	ut_ad(name_of_col_is(sys_tablespaces, sys_index,
+			     DICT_FLD__SYS_TABLESPACES__SPACE, "SPACE"));
+	ut_ad(name_of_col_is(sys_tablespaces, sys_index,
+			     DICT_FLD__SYS_TABLESPACES__NAME, "NAME"));
+
+	tuple = dtuple_create(heap, 1);
+	dfield = dtuple_get_nth_field(tuple, DICT_FLD__SYS_TABLESPACES__SPACE);
+
+	buf = static_cast<byte*>(mem_heap_alloc(heap, 4));
+	mach_write_to_4(buf, space_id);
+
+	dfield_set_data(dfield, buf, 4);
+	dict_index_copy_types(tuple, sys_index, 1);
+
+	mtr_start(&mtr);
+
+	btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
+				  BTR_SEARCH_LEAF, &pcur, &mtr);
+
+	rec = btr_pcur_get_rec(&pcur);
+
+	/* Get the tablespace name from this SYS_TABLESPACES record. */
+	if (btr_pcur_is_on_user_rec(&pcur)) {
+		field = rec_get_nth_field_old(
+			rec, DICT_FLD__SYS_TABLESPACES__SPACE, &len);
+		ut_a(len == 4);
+
+		if (space_id == mach_read_from_4(field)) {
+			/* A record for this space ID was found. */
+			field = rec_get_nth_field_old(
+				rec, DICT_FLD__SYS_TABLESPACES__NAME, &len);
+
+			ut_ad(len > 0);
+			ut_ad(len < OS_FILE_MAX_PATH);
+
+			if (len > 0 && len != UNIV_SQL_NULL) {
+				/* Found a tablespace name. */
+				if (callers_heap == NULL) {
+					space_name = mem_strdupl(
+						reinterpret_cast<
+							const char*>(field),
+						len);
+				} else {
+					space_name = mem_heap_strdupl(
+						callers_heap,
+						reinterpret_cast<
+							const char*>(field),
+						len);
+				}
+				ut_ad(space_name);
+			}
+		}
+	}
+
+	btr_pcur_close(&pcur);
+	mtr_commit(&mtr);
+	mem_heap_free(heap);
+
+	return(space_name);
+}
+
+/** Update the record for space_id in SYS_TABLESPACES to this filepath.
+@param[in]	space_id	Tablespace ID
+@param[in]	filepath	Tablespace filepath
+@return DB_SUCCESS if OK, dberr_t if the insert failed */
 dberr_t
 dict_update_filepath(
-/*=================*/
-	ulint		space_id,	/*!< in: space id */
-	const char*	filepath)	/*!< in: filepath */
+	ulint		space_id,
+	const char*	filepath)
 {
+	if (!srv_sys_tablespaces_open) {
+		/* Startup procedure is not yet ready for updates. */
+		return(DB_SUCCESS);
+	}
+
 	dberr_t		err = DB_SUCCESS;
 	trx_t*		trx;
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-	ut_ad(mutex_own(&(dict_sys->mutex)));
+	ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
+	ut_ad(mutex_own(&dict_sys->mutex));
 
 	trx = trx_allocate_for_background();
 	trx->op_info = "update filepath";
@@ -892,39 +952,48 @@ dict_update_filepath(
 	if (err == DB_SUCCESS) {
 		/* We just updated SYS_DATAFILES due to the contents in
 		a link file.  Make a note that we did this. */
-		ib_logf(IB_LOG_LEVEL_INFO,
-			"The InnoDB data dictionary table SYS_DATAFILES "
-			"for tablespace ID %lu was updated to use file %s.",
-			(ulong) space_id, filepath);
+		ib::info() << "The InnoDB data dictionary table SYS_DATAFILES"
+			" for tablespace ID " << space_id
+			<< " was updated to use file " << filepath << ".";
 	} else {
-		ib_logf(IB_LOG_LEVEL_WARN,
-			"Problem updating InnoDB data dictionary table "
-			"SYS_DATAFILES for tablespace ID %lu to file %s.",
-			(ulong) space_id, filepath);
+		ib::warn() << "Error occurred while updating InnoDB data"
+			" dictionary table SYS_DATAFILES for tablespace ID "
+			<< space_id << " to file " << filepath << ": "
+			<< ut_strerr(err) << ".";
 	}
 
 	return(err);
 }
 
-/********************************************************************//**
-Insert records into SYS_TABLESPACES and SYS_DATAFILES.
-@return	DB_SUCCESS if OK, dberr_t if the insert failed */
-UNIV_INTERN
+/** Replace records in SYS_TABLESPACES and SYS_DATAFILES associated with
+the given space_id using an independent transaction.
+@param[in]	space_id	Tablespace ID
+@param[in]	name		Tablespace name
+@param[in]	filepath	First filepath
+@param[in]	fsp_flags	Tablespace flags
+@return DB_SUCCESS if OK, dberr_t if the insert failed */
 dberr_t
-dict_insert_tablespace_and_filepath(
-/*================================*/
-	ulint		space,		/*!< in: space id */
-	const char*	name,		/*!< in: talespace name */
-	const char*	filepath,	/*!< in: filepath */
-	ulint		fsp_flags)	/*!< in: tablespace flags */
+dict_replace_tablespace_and_filepath(
+	ulint		space_id,
+	const char*	name,
+	const char*	filepath,
+	ulint		fsp_flags)
 {
+	if (!srv_sys_tablespaces_open) {
+		/* Startup procedure is not yet ready for updates.
+		Return success since this will likely get updated
+		later. */
+		return(DB_SUCCESS);
+	}
+
 	dberr_t		err = DB_SUCCESS;
 	trx_t*		trx;
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-	ut_ad(mutex_own(&(dict_sys->mutex)));
+	DBUG_EXECUTE_IF("innodb_fail_to_update_tablespace_dict",
+			return(DB_INTERRUPTED););
+
+	ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
+	ut_ad(mutex_own(&dict_sys->mutex));
 	ut_ad(filepath);
 
 	trx = trx_allocate_for_background();
@@ -934,9 +1003,9 @@ dict_insert_tablespace_and_filepath(
 
 	/* A record for this space ID was not found in
 	SYS_DATAFILES. Assume the record is also missing in
-	SYS_TABLESPACES.  Insert records onto them both. */
-	err = dict_create_add_tablespace_to_dictionary(
-		space, name, fsp_flags, filepath, trx, false);
+	SYS_TABLESPACES.  Insert records into them both. */
+	err = dict_replace_tablespace_in_dictionary(
+		space_id, name, fsp_flags, filepath, trx, false);
 
 	trx_commit_for_mysql(trx);
 	trx->dict_operation_lock_mode = 0;
@@ -945,264 +1014,538 @@ dict_insert_tablespace_and_filepath(
 	return(err);
 }
 
-/********************************************************************//**
-This function looks at each table defined in SYS_TABLES.  It checks the
-tablespace for any table with a space_id > 0.  It looks up the tablespace
-in SYS_DATAFILES to ensure the correct path.
-
-In a crash recovery we already have all the tablespace objects created.
-This function compares the space id information in the InnoDB data dictionary
-to what we already read with fil_load_single_table_tablespaces().
-
-In a normal startup, we create the tablespace objects for every table in
-InnoDB's data dictionary, if the corresponding .ibd file exists.
-We also scan the biggest space id, and store it to fil_system. */
-UNIV_INTERN
-void
-dict_check_tablespaces_and_store_max_id(
-/*====================================*/
-	dict_check_t	dict_check)	/*!< in: how to check */
+/** Check the validity of a SYS_TABLES record
+Make sure the fields are the right length and that they
+do not contain invalid contents.
+@param[in]	rec	SYS_TABLES record
+@return error message, or NULL on success */
+static
+const char*
+dict_sys_tables_rec_check(
+	const rec_t*	rec)
 {
-	dict_table_t*	sys_tables;
-	dict_index_t*	sys_index;
+	const byte*	field;
+	ulint		len;
+
+	ut_ad(mutex_own(&dict_sys->mutex));
+
+	if (rec_get_deleted_flag(rec, 0)) {
+		return("delete-marked record in SYS_TABLES");
+	}
+
+	if (rec_get_n_fields_old(rec) != DICT_NUM_FIELDS__SYS_TABLES) {
+		return("wrong number of columns in SYS_TABLES record");
+	}
+
+	rec_get_nth_field_offs_old(
+		rec, DICT_FLD__SYS_TABLES__NAME, &len);
+	if (len == 0 || len == UNIV_SQL_NULL) {
+err_len:
+		return("incorrect column length in SYS_TABLES");
+	}
+	rec_get_nth_field_offs_old(
+		rec, DICT_FLD__SYS_TABLES__DB_TRX_ID, &len);
+	if (len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL) {
+		goto err_len;
+	}
+	rec_get_nth_field_offs_old(
+		rec, DICT_FLD__SYS_TABLES__DB_ROLL_PTR, &len);
+	if (len != DATA_ROLL_PTR_LEN && len != UNIV_SQL_NULL) {
+		goto err_len;
+	}
+
+	rec_get_nth_field_offs_old(rec, DICT_FLD__SYS_TABLES__ID, &len);
+	if (len != 8) {
+		goto err_len;
+	}
+
+	field = rec_get_nth_field_old(
+		rec, DICT_FLD__SYS_TABLES__N_COLS, &len);
+	if (field == NULL || len != 4) {
+		goto err_len;
+	}
+
+	rec_get_nth_field_offs_old(rec, DICT_FLD__SYS_TABLES__TYPE, &len);
+	if (len != 4) {
+		goto err_len;
+	}
+
+	rec_get_nth_field_offs_old(
+		rec, DICT_FLD__SYS_TABLES__MIX_ID, &len);
+	if (len != 8) {
+		goto err_len;
+	}
+
+	field = rec_get_nth_field_old(
+		rec, DICT_FLD__SYS_TABLES__MIX_LEN, &len);
+	if (field == NULL || len != 4) {
+		goto err_len;
+	}
+
+	rec_get_nth_field_offs_old(
+		rec, DICT_FLD__SYS_TABLES__CLUSTER_ID, &len);
+	if (len != UNIV_SQL_NULL) {
+		goto err_len;
+	}
+
+	field = rec_get_nth_field_old(
+		rec, DICT_FLD__SYS_TABLES__SPACE, &len);
+	if (field == NULL || len != 4) {
+		goto err_len;
+	}
+
+	return(NULL);
+}
+
+/** Read and return the contents of a SYS_TABLESPACES record.
+@param[in]	rec	A record of SYS_TABLESPACES
+@param[out]	id	Pointer to the space_id for this table
+@param[in,out]	name	Buffer for Tablespace Name of length NAME_LEN
+@param[out]	flags	Pointer to tablespace flags
+@return true if the record was read correctly, false if not. */
+bool
+dict_sys_tablespaces_rec_read(
+	const rec_t*	rec,
+	ulint*		id,
+	char*		name,
+	ulint*		flags)
+{
+	const byte*	field;
+	ulint		len;
+
+	field = rec_get_nth_field_old(
+		rec, DICT_FLD__SYS_TABLESPACES__SPACE, &len);
+	if (len != DICT_FLD_LEN_SPACE) {
+		ib::error() << "Wrong field length in SYS_TABLESPACES.SPACE: "
+		<< len;
+		return(false);
+	}
+	*id = mach_read_from_4(field);
+
+	field = rec_get_nth_field_old(
+		rec, DICT_FLD__SYS_TABLESPACES__NAME, &len);
+	if (len == 0 || len == UNIV_SQL_NULL) {
+		ib::error() << "Wrong field length in SYS_TABLESPACES.NAME: "
+			<< len;
+		return(false);
+	}
+	strncpy(name, reinterpret_cast<const char*>(field), NAME_LEN);
+
+	/* read the 4 byte flags from the TYPE field */
+	field = rec_get_nth_field_old(
+		rec, DICT_FLD__SYS_TABLESPACES__FLAGS, &len);
+	if (len != 4) {
+		ib::error() << "Wrong field length in SYS_TABLESPACES.FLAGS: "
+			<< len;
+		return(false);
+	}
+	*flags = mach_read_from_4(field);
+
+	return(true);
+}
+
+/** Load and check each general tablespace mentioned in the SYS_TABLESPACES.
+Ignore system and file-per-table tablespaces.
+If it is valid, add it to the file_system list.
+@param[in]	validate	true when the previous shutdown was not clean
+@return the highest space ID found. */
+UNIV_INLINE
+ulint
+dict_check_sys_tablespaces(
+	bool		validate)
+{
+	ulint		max_space_id = 0;
 	btr_pcur_t	pcur;
 	const rec_t*	rec;
-	ulint		max_space_id;
 	mtr_t		mtr;
 
-	rw_lock_x_lock(&dict_operation_lock);
-	mutex_enter(&(dict_sys->mutex));
+	DBUG_ENTER("dict_check_sys_tablespaces");
+
+	ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
+	ut_ad(mutex_own(&dict_sys->mutex));
+
+	/* Before traversing it, let's make sure we have
+	SYS_TABLESPACES and SYS_DATAFILES loaded. */
+	dict_table_get_low("SYS_TABLESPACES");
+	dict_table_get_low("SYS_DATAFILES");
 
 	mtr_start(&mtr);
 
-	sys_tables = dict_table_get_low("SYS_TABLES");
-	sys_index = UT_LIST_GET_FIRST(sys_tables->indexes);
-	ut_ad(!dict_table_is_comp(sys_tables));
+	for (rec = dict_startscan_system(&pcur, &mtr, SYS_TABLESPACES);
+	     rec != NULL;
+	     rec = dict_getnext_system(&pcur, &mtr))
+	{
+		char	space_name[NAME_LEN];
+		ulint	space_id = 0;
+		ulint	fsp_flags;
 
-	max_space_id = mtr_read_ulint(dict_hdr_get(&mtr)
-				      + DICT_HDR_MAX_SPACE_ID,
-				      MLOG_4BYTES, &mtr);
-	fil_set_max_space_id_if_bigger(max_space_id);
+		if (!dict_sys_tablespaces_rec_read(rec, &space_id,
+						   space_name, &fsp_flags)) {
+			continue;
+		}
 
-	btr_pcur_open_at_index_side(true, sys_index, BTR_SEARCH_LEAF, &pcur,
-				    true, 0, &mtr);
-loop:
-	btr_pcur_move_to_next_user_rec(&pcur, &mtr);
+		/* Ignore system and file-per-table tablespaces. */
+		if (is_system_tablespace(space_id)
+		    || !fsp_is_shared_tablespace(fsp_flags)) {
+			continue;
+		}
 
-	rec = btr_pcur_get_rec(&pcur);
+		/* Ignore tablespaces that already are in the tablespace
+		cache. */
+		if (fil_space_for_table_exists_in_mem(
+				space_id, space_name, false, true, NULL, 0, NULL)) {
+			/* Recovery can open a datafile that does not
+			match SYS_DATAFILES.  If they don't match, update
+			SYS_DATAFILES. */
+			char *dict_path = dict_get_first_path(space_id);
+			char *fil_path = fil_space_get_first_path(space_id);
+			if (dict_path && fil_path
+			    && strcmp(dict_path, fil_path)) {
+				dict_update_filepath(space_id, fil_path);
+			}
+			ut_free(dict_path);
+			ut_free(fil_path);
+			continue;
+		}
 
-	if (!btr_pcur_is_on_user_rec(&pcur)) {
-		/* end of index */
+		/* Set the expected filepath from the data dictionary.
+		If the file is found elsewhere (from an ISL or the default
+		location) or this path is the same file but looks different,
+		fil_ibd_open() will update the dictionary with what is
+		opened. */
+		char*	filepath = dict_get_first_path(space_id);
 
-		btr_pcur_close(&pcur);
-		mtr_commit(&mtr);
+		validate = true; /* Encryption */
 
-		/* We must make the tablespace cache aware of the biggest
-		known space id */
+		/* Check that the .ibd file exists. */
+		dberr_t	err = fil_ibd_open(
+			validate,
+			!srv_read_only_mode && srv_log_file_size != 0,
+			FIL_TYPE_TABLESPACE,
+			space_id,
+			fsp_flags,
+			space_name,
+			filepath,
+			NULL);
 
-		/* printf("Biggest space id in data dictionary %lu\n",
-		max_space_id); */
-		fil_set_max_space_id_if_bigger(max_space_id);
+		if (err != DB_SUCCESS) {
+			ib::warn() << "Ignoring tablespace "
+				<< id_name_t(space_name)
+				<< " because it could not be opened.";
+		}
 
-		mutex_exit(&(dict_sys->mutex));
-		rw_lock_x_unlock(&dict_operation_lock);
+		max_space_id = ut_max(max_space_id, space_id);
 
-		return;
+		ut_free(filepath);
 	}
 
-	if (!rec_get_deleted_flag(rec, 0)) {
+	mtr_commit(&mtr);
 
-		/* We found one */
+	DBUG_RETURN(max_space_id);
+}
+
+/** Read and return 5 integer fields from a SYS_TABLES record.
+@param[in]	rec		A record of SYS_TABLES
+@param[in]	name		Table Name, the same as SYS_TABLES.NAME
+@param[out]	table_id	Pointer to the table_id for this table
+@param[out]	space_id	Pointer to the space_id for this table
+@param[out]	n_cols		Pointer to number of columns for this table.
+@param[out]	flags		Pointer to table flags
+@param[out]	flags2		Pointer to table flags2
+@return true if the record was read correctly, false if not. */
+static
+bool
+dict_sys_tables_rec_read(
+	const rec_t*		rec,
+	const table_name_t&	table_name,
+	table_id_t*		table_id,
+	ulint*			space_id,
+	ulint*			n_cols,
+	ulint*			flags,
+	ulint*			flags2)
+{
+	const byte*	field;
+	ulint		len;
+	ulint		type;
+
+	*flags2 = 0;
+
+	field = rec_get_nth_field_old(
+		rec, DICT_FLD__SYS_TABLES__ID, &len);
+	ut_ad(len == 8);
+	*table_id = static_cast<table_id_t>(mach_read_from_8(field));
+
+	field = rec_get_nth_field_old(
+		rec, DICT_FLD__SYS_TABLES__SPACE, &len);
+	ut_ad(len == 4);
+	*space_id = mach_read_from_4(field);
+
+	/* Read the 4 byte flags from the TYPE field */
+	field = rec_get_nth_field_old(
+		rec, DICT_FLD__SYS_TABLES__TYPE, &len);
+	ut_a(len == 4);
+	type = mach_read_from_4(field);
+
+	/* The low order bit of SYS_TABLES.TYPE is always set to 1. But in
+	dict_table_t::flags the low order bit is used to determine if the
+	row format is Redundant (0) or Compact (1) when the format is Antelope.
+	Read the 4 byte N_COLS field and look at the high order bit.  It
+	should be set for COMPACT and later.  It should not be set for
+	REDUNDANT. */
+	field = rec_get_nth_field_old(
+		rec, DICT_FLD__SYS_TABLES__N_COLS, &len);
+	ut_a(len == 4);
+	*n_cols = mach_read_from_4(field);
+
+	/* This validation function also combines the DICT_N_COLS_COMPACT
+	flag in n_cols into the type field to effectively make it a
+	dict_table_t::flags. */
+
+	if (ULINT_UNDEFINED == dict_sys_tables_type_validate(type, *n_cols)) {
+		ib::error() << "Table " << table_name << " in InnoDB"
+			" data dictionary contains invalid flags."
+			" SYS_TABLES.TYPE=" << type <<
+			" SYS_TABLES.N_COLS=" << *n_cols;
+		*flags = ULINT_UNDEFINED;
+		return(false);
+	}
+
+	*flags = dict_sys_tables_type_to_tf(type, *n_cols);
+
+	/* Get flags2 from SYS_TABLES.MIX_LEN */
+	field = rec_get_nth_field_old(
+		rec, DICT_FLD__SYS_TABLES__MIX_LEN, &len);
+	*flags2 = mach_read_from_4(field);
+
+	/* DICT_TF2_FTS will be set when indexes are being loaded */
+	*flags2 &= ~DICT_TF2_FTS;
+
+	/* Now that we have used this bit, unset it. */
+	*n_cols &= ~DICT_N_COLS_COMPACT;
+	return(true);
+}
+
+/** Load and check each non-predefined tablespace mentioned in SYS_TABLES.
+Search SYS_TABLES and check each tablespace mentioned that has not
+already been added to the fil_system.  If it is valid, add it to the
+file_system list.  Perform extra validation on the table if recovery from
+the REDO log occurred.
+@param[in]	validate	Whether to do validation on the table.
+@return the highest space ID found. */
+UNIV_INLINE
+ulint
+dict_check_sys_tables(
+	bool		validate)
+{
+	ulint		max_space_id = 0;
+	btr_pcur_t	pcur;
+	const rec_t*	rec;
+	mtr_t		mtr;
+
+	DBUG_ENTER("dict_check_sys_tables");
+
+	ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
+	ut_ad(mutex_own(&dict_sys->mutex));
+
+	mtr_start(&mtr);
+
+	/* Before traversing SYS_TABLES, let's make sure we have
+	SYS_TABLESPACES and SYS_DATAFILES loaded. */
+	dict_table_t*	sys_tablespaces;
+	dict_table_t*	sys_datafiles;
+	sys_tablespaces = dict_table_get_low("SYS_TABLESPACES");
+	ut_a(sys_tablespaces != NULL);
+	sys_datafiles = dict_table_get_low("SYS_DATAFILES");
+	ut_a(sys_datafiles != NULL);
+
+	for (rec = dict_startscan_system(&pcur, &mtr, SYS_TABLES);
+	     rec != NULL;
+	     rec = dict_getnext_system(&pcur, &mtr)) {
 		const byte*	field;
 		ulint		len;
+		char*		space_name;
+		table_name_t	table_name;
+		table_id_t	table_id;
 		ulint		space_id;
+		ulint		n_cols;
 		ulint		flags;
-		char*		name;
+		ulint		flags2;
 
+		/* If a table record is not useable, ignore it and continue
+		on to the next record. Error messages were logged. */
+		if (dict_sys_tables_rec_check(rec) != NULL) {
+			continue;
+		}
+
+		/* Copy the table name from rec */
 		field = rec_get_nth_field_old(
 			rec, DICT_FLD__SYS_TABLES__NAME, &len);
+		table_name.m_name = mem_strdupl((char*) field, len);
+		DBUG_PRINT("dict_check_sys_tables",
+			   ("name: %p, '%s'", table_name.m_name,
+			    table_name.m_name));
 
-		name = mem_strdupl((char*) field, len);
-
-		char	table_name[MAX_FULL_NAME_LEN + 1];
-
-		innobase_format_name(
-			table_name, sizeof(table_name), name, FALSE);
-
-		flags = dict_sys_tables_get_flags(rec);
-		if (UNIV_UNLIKELY(flags == ULINT_UNDEFINED)) {
-			/* Read again the 4 bytes from rec. */
-			field = rec_get_nth_field_old(
-				rec, DICT_FLD__SYS_TABLES__TYPE, &len);
-			ut_ad(len == 4); /* this was checked earlier */
-			flags = mach_read_from_4(field);
-
-			ib_logf(IB_LOG_LEVEL_ERROR,
-				"Table '%s' in InnoDB data dictionary"
-				" has unknown type %lx", table_name, flags);
-			mem_free(name);
-			goto loop;
+		dict_sys_tables_rec_read(rec, table_name,
+					 &table_id, &space_id,
+					 &n_cols, &flags, &flags2);
+		if (flags == ULINT_UNDEFINED
+		    || is_system_tablespace(space_id)) {
+			ut_free(table_name.m_name);
+			continue;
 		}
 
-		field = rec_get_nth_field_old(
-			rec, DICT_FLD__SYS_TABLES__SPACE, &len);
-		ut_a(len == 4);
-
-		space_id = mach_read_from_4(field);
-
-		btr_pcur_store_position(&pcur, &mtr);
-
-		mtr_commit(&mtr);
-
-		/* For tables created with old versions of InnoDB,
-		SYS_TABLES.MIX_LEN may contain garbage.  Such tables
-		would always be in ROW_FORMAT=REDUNDANT. Pretend that
-		all such tables are non-temporary. That is, do not
-		suppress error printouts about temporary or discarded
-		tablespaces not being found. */
-
-		field = rec_get_nth_field_old(
-			rec, DICT_FLD__SYS_TABLES__MIX_LEN, &len);
-
-		bool		is_temp = false;
-		bool		discarded = false;
-		ib_uint32_t	flags2 = static_cast<ib_uint32_t>(
-			mach_read_from_4(field));
-
-		/* Check that the tablespace (the .ibd file) really
-		exists; print a warning to the .err log if not.
-		Do not print warnings for temporary tables or for
-		tablespaces that have been discarded. */
-
-		field = rec_get_nth_field_old(
-			rec, DICT_FLD__SYS_TABLES__N_COLS, &len);
-
-		/* MIX_LEN valid only for ROW_FORMAT > REDUNDANT. */
-		if (mach_read_from_4(field) & DICT_N_COLS_COMPACT) {
-
-			is_temp = !!(flags2 & DICT_TF2_TEMPORARY);
-			discarded = !!(flags2 & DICT_TF2_DISCARDED);
+		if (flags2 & DICT_TF2_DISCARDED) {
+			ib::info() << "Ignoring tablespace " << table_name
+				<< " because the DISCARD flag is set .";
+			ut_free(table_name.m_name);
+			continue;
 		}
 
-		if (space_id == 0) {
-			/* The system tablespace always exists. */
-			ut_ad(!discarded);
-			goto next_tablespace;
+		/* If the table is not a predefined tablespace then it must
+		be in a file-per-table or shared tablespace.
+		Note that flags2 is not available for REDUNDANT tables,
+		so don't check those. */
+		ut_ad(DICT_TF_HAS_SHARED_SPACE(flags)
+		      || !DICT_TF_GET_COMPACT(flags)
+		      || flags2 & DICT_TF2_USE_FILE_PER_TABLE);
+
+		/* Look up the tablespace name in the data dictionary if this
+		is a shared tablespace.  For file-per-table, the table_name
+		and the tablespace_name are the same.
+		Some hidden tables like FTS AUX tables may not be found in
+		the dictionary since they can always be found in the default
+		location. If so, then dict_space_get_name() will return NULL,
+		the space name must be the table_name, and the filepath can be
+		discovered in the default location.*/
+		char*	shared_space_name = dict_space_get_name(space_id, NULL);
+		space_name = shared_space_name == NULL
+			? table_name.m_name
+			: shared_space_name;
+
+		/* Now that we have the proper name for this tablespace,
+		whether it is a shared tablespace or a single table
+		tablespace, look to see if it is already in the tablespace
+		cache. */
+		if (fil_space_for_table_exists_in_mem(
+				space_id, space_name, false, true, NULL, 0, NULL)) {
+			/* Recovery can open a datafile that does not
+			match SYS_DATAFILES.  If they don't match, update
+			SYS_DATAFILES. */
+			char *dict_path = dict_get_first_path(space_id);
+			char *fil_path = fil_space_get_first_path(space_id);
+			if (dict_path && fil_path
+			    && strcmp(dict_path, fil_path)) {
+				dict_update_filepath(space_id, fil_path);
+			}
+			ut_free(dict_path);
+			ut_free(fil_path);
+			ut_free(table_name.m_name);
+			ut_free(shared_space_name);
+			continue;
 		}
 
-		switch (dict_check) {
-		case DICT_CHECK_ALL_LOADED:
-			/* All tablespaces should have been found in
-			fil_load_single_table_tablespaces(). */
-			if (fil_space_for_table_exists_in_mem(
-				space_id, name, TRUE, !(is_temp || discarded),
-				false, NULL, 0)
-			    && !(is_temp || discarded)) {
-				/* If user changes the path of .ibd files in
-				   *.isl files before doing crash recovery ,
-				   then this leads to inconsistency in
-				   SYS_DATAFILES system table because the
-				   tables are loaded from the updated path
-				   but the SYS_DATAFILES still points to the
-				   old path.Therefore after crash recovery
-				   update SYS_DATAFILES with the updated path.*/
-				ut_ad(space_id);
-				ut_ad(recv_needed_recovery);
-				char *dict_path = dict_get_first_path(space_id,
-								      name);
-				char *remote_path = fil_read_link_file(name);
-				if(dict_path && remote_path) {
-					if(strcmp(dict_path,remote_path)) {
-						dict_update_filepath(space_id,
-								     remote_path);
-						}
-				}
-				if(dict_path)
-					mem_free(dict_path);
-				if(remote_path)
-					mem_free(remote_path);
-			}
-			break;
+		/* Set the expected filepath from the data dictionary.
+		If the file is found elsewhere (from an ISL or the default
+		location) or this path is the same file but looks different,
+		fil_ibd_open() will update the dictionary with what is
+		opened. */
+		char*	filepath = dict_get_first_path(space_id);
 
-		case DICT_CHECK_SOME_LOADED:
-			/* Some tablespaces may have been opened in
-			trx_resurrect_table_locks(). */
-			if (fil_space_for_table_exists_in_mem(
-				    space_id, name, FALSE, FALSE,
-				    false, NULL, 0)) {
-				break;
-			}
-			/* fall through */
-		case DICT_CHECK_NONE_LOADED:
-			if (discarded) {
-				ib_logf(IB_LOG_LEVEL_INFO,
-					"DISCARD flag set for table '%s',"
-					" ignored.",
-					table_name);
-				break;
-			}
+		/* Check that the .ibd file exists. */
+		bool	is_temp = flags2 & DICT_TF2_TEMPORARY;
+		bool	is_encrypted = flags2 & DICT_TF2_ENCRYPTION;
+		ulint	fsp_flags = dict_tf_to_fsp_flags(flags,
+							 is_temp,
+							 is_encrypted);
+		validate = true; /* Encryption */
 
-			/* It is a normal database startup: create the
-			space object and check that the .ibd file exists.
-			If the table uses a remote tablespace, look for the
-			space_id in SYS_DATAFILES to find the filepath */
+		dberr_t	err = fil_ibd_open(
+			validate,
+			!srv_read_only_mode && srv_log_file_size != 0,
+			FIL_TYPE_TABLESPACE,
+			space_id,
+			fsp_flags,
+			space_name,
+			filepath,
+			NULL);
 
-			/* Use the remote filepath if known. */
-			char*	filepath = NULL;
-			if (DICT_TF_HAS_DATA_DIR(flags)) {
-				filepath = dict_get_first_path(
-					space_id, name);
-			}
-
-			/* We need to read page 0 to get (optional) IV
-			regardless if encryptions is turned on or not,
-			since if it's off we should decrypt a potentially
-			already encrypted table */
-			bool read_page_0 = true;
-
-			/* We set the 2nd param (fix_dict = true)
-			here because we already have an x-lock on
-			dict_operation_lock and dict_sys->mutex. Besides,
-			this is at startup and we are now single threaded.
-			If the filepath is not known, it will need to
-			be discovered. */
-			dberr_t	err = fil_open_single_table_tablespace(
-				read_page_0, srv_read_only_mode ? false : true,
-				space_id, dict_tf_to_fsp_flags(flags),
-				name, filepath, NULL);
-
-			if (err != DB_SUCCESS) {
-				ib_logf(IB_LOG_LEVEL_ERROR,
-					"Tablespace open failed for '%s', "
-					"ignored.", table_name);
-			}
-
-			if (filepath) {
-				mem_free(filepath);
-			}
-
-			break;
+		if (err != DB_SUCCESS) {
+			ib::warn() << "Ignoring tablespace "
+				<< id_name_t(space_name)
+				<< " because it could not be opened.";
 		}
 
-		if (space_id > max_space_id) {
-			max_space_id = space_id;
-		}
+		max_space_id = ut_max(max_space_id, space_id);
 
-next_tablespace:
-		mem_free(name);
-		mtr_start(&mtr);
-
-		btr_pcur_restore_position(BTR_SEARCH_LEAF, &pcur, &mtr);
+		ut_free(table_name.m_name);
+		ut_free(shared_space_name);
+		ut_free(filepath);
 	}
 
-	goto loop;
+	mtr_commit(&mtr);
+
+	DBUG_RETURN(max_space_id);
 }
 
+/** Check each tablespace found in the data dictionary.
+Look at each general tablespace found in SYS_TABLESPACES.
+Then look at each table defined in SYS_TABLES that has a space_id > 0
+to find all the file-per-table tablespaces.
+
+In a crash recovery we already have some tablespace objects created from
+processing the REDO log.  Any other tablespace in SYS_TABLESPACES not
+previously used in recovery will be opened here.  We will compare the
+space_id information in the data dictionary to what we find in the
+tablespace file. In addition, more validation will be done if recovery
+was needed and force_recovery is not set.
+
+We also scan the biggest space id, and store it to fil_system.
+@param[in]	validate	true if recovery was needed */
+void
+dict_check_tablespaces_and_store_max_id(
+	bool	validate)
+{
+	mtr_t	mtr;
+
+	DBUG_ENTER("dict_check_tablespaces_and_store_max_id");
+
+	rw_lock_x_lock(dict_operation_lock);
+	mutex_enter(&dict_sys->mutex);
+
+	/* Initialize the max space_id from sys header */
+	mtr_start(&mtr);
+	ulint	max_space_id = mtr_read_ulint(
+		dict_hdr_get(&mtr) + DICT_HDR_MAX_SPACE_ID,
+		MLOG_4BYTES, &mtr);
+	mtr_commit(&mtr);
+
+	fil_set_max_space_id_if_bigger(max_space_id);
+
+	/* Open all general tablespaces found in SYS_TABLESPACES. */
+	ulint	max1 = dict_check_sys_tablespaces(validate);
+
+	/* Open all tablespaces referenced in SYS_TABLES.
+	This will update SYS_TABLESPACES and SYS_DATAFILES if it
+	finds any file-per-table tablespaces not already there. */
+	ulint	max2 = dict_check_sys_tables(validate);
+
+	/* Store the max space_id found */
+	max_space_id = ut_max(max1, max2);
+	fil_set_max_space_id_if_bigger(max_space_id);
+
+	mutex_exit(&dict_sys->mutex);
+	rw_lock_x_unlock(dict_operation_lock);
+
+	DBUG_VOID_RETURN;
+}
+
+/** Error message for a delete-marked record in dict_load_column_low() */
+static const char* dict_load_column_del = "delete-marked record in SYS_COLUMN";
+
 /********************************************************************//**
 Loads a table column definition from a SYS_COLUMNS record to
 dict_table_t.
 @return error message, or NULL on success */
-UNIV_INTERN
 const char*
 dict_load_column_low(
 /*=================*/
@@ -1216,7 +1559,10 @@ dict_load_column_low(
 					or NULL if table != NULL */
 	table_id_t*	table_id,	/*!< out: table id */
 	const char**	col_name,	/*!< out: column name */
-	const rec_t*	rec)		/*!< in: SYS_COLUMNS record */
+	const rec_t*	rec,		/*!< in: SYS_COLUMNS record */
+	ulint*		nth_v_col)	/*!< out: if not NULL, this
+					records the "n" of "nth" virtual
+					column */
 {
 	char*		name;
 	const byte*	field;
@@ -1225,11 +1571,12 @@ dict_load_column_low(
 	ulint		prtype;
 	ulint		col_len;
 	ulint		pos;
+	ulint		num_base;
 
 	ut_ad(table || column);
 
 	if (rec_get_deleted_flag(rec, 0)) {
-		return("delete-marked record in SYS_COLUMNS");
+		return(dict_load_column_del);
 	}
 
 	if (rec_get_n_fields_old(rec) != DICT_NUM_FIELDS__SYS_COLUMNS) {
@@ -1252,16 +1599,11 @@ err_len:
 	field = rec_get_nth_field_old(
 		rec, DICT_FLD__SYS_COLUMNS__POS, &len);
 	if (len != 4) {
-
 		goto err_len;
 	}
 
 	pos = mach_read_from_4(field);
 
-	if (table && table->n_def != pos) {
-		return("SYS_COLUMNS.POS mismatch");
-	}
-
 	rec_get_nth_field_offs_old(
 		rec, DICT_FLD__SYS_COLUMNS__DB_TRX_ID, &len);
 	if (len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL) {
@@ -1321,6 +1663,10 @@ err_len:
 		}
 	}
 
+	if (table && table->n_def != pos && !(prtype & DATA_VIRTUAL)) {
+		return("SYS_COLUMNS.POS mismatch");
+	}
+
 	field = rec_get_nth_field_old(
 		rec, DICT_FLD__SYS_COLUMNS__LEN, &len);
 	if (len != 4) {
@@ -1332,18 +1678,124 @@ err_len:
 	if (len != 4) {
 		goto err_len;
 	}
+	num_base = mach_read_from_4(field);
 
-	if (!column) {
-		dict_mem_table_add_col(table, heap, name, mtype,
-				       prtype, col_len);
+	if (column == NULL) {
+		if (prtype & DATA_VIRTUAL) {
+#ifdef UNIV_DEBUG
+			dict_v_col_t*	vcol =
+#endif
+			dict_mem_table_add_v_col(
+				table, heap, name, mtype,
+				prtype, col_len,
+				dict_get_v_col_mysql_pos(pos), num_base);
+			ut_ad(vcol->v_pos == dict_get_v_col_pos(pos));
+		} else {
+			ut_ad(num_base == 0);
+			dict_mem_table_add_col(table, heap, name, mtype,
+					       prtype, col_len);
+		}
 	} else {
 		dict_mem_fill_column_struct(column, pos, mtype,
 					    prtype, col_len);
 	}
 
+	/* Report the virtual column number */
+	if ((prtype & DATA_VIRTUAL) && nth_v_col != NULL) {
+		*nth_v_col = dict_get_v_col_pos(pos);
+	}
+
 	return(NULL);
 }
 
+/** Error message for a delete-marked record in dict_load_virtual_low() */
+static const char* dict_load_virtual_del = "delete-marked record in SYS_VIRTUAL";
+
+/** Loads a virtual column "mapping" (to base columns) information
+from a SYS_VIRTUAL record
+@param[in,out]	table		table
+@param[in,out]	heap		memory heap
+@param[in,out]	column		mapped base column's dict_column_t
+@param[in,out]	table_id	table id
+@param[in,out]	pos		virtual column position
+@param[in,out]	base_pos	base column position
+@param[in]	rec		SYS_VIRTUAL record
+@return error message, or NULL on success */
+const char*
+dict_load_virtual_low(
+	dict_table_t*	table,
+	mem_heap_t*	heap,
+	dict_col_t**	column,
+	table_id_t*	table_id,
+	ulint*		pos,
+	ulint*		base_pos,
+	const rec_t*	rec)
+{
+	const byte*	field;
+	ulint		len;
+	ulint		base;
+
+	if (rec_get_deleted_flag(rec, 0)) {
+		return(dict_load_virtual_del);
+	}
+
+	if (rec_get_n_fields_old(rec) != DICT_NUM_FIELDS__SYS_VIRTUAL) {
+		return("wrong number of columns in SYS_VIRTUAL record");
+	}
+
+	field = rec_get_nth_field_old(
+		rec, DICT_FLD__SYS_VIRTUAL__TABLE_ID, &len);
+	if (len != 8) {
+err_len:
+		return("incorrect column length in SYS_VIRTUAL");
+	}
+
+	if (table_id != NULL) {
+		*table_id = mach_read_from_8(field);
+	} else if (table->id != mach_read_from_8(field)) {
+		return("SYS_VIRTUAL.TABLE_ID mismatch");
+	}
+
+	field = rec_get_nth_field_old(
+		rec, DICT_FLD__SYS_VIRTUAL__POS, &len);
+	if (len != 4) {
+		goto err_len;
+	}
+
+	if (pos != NULL) {
+		*pos = mach_read_from_4(field);
+	}
+
+	field = rec_get_nth_field_old(
+		rec, DICT_FLD__SYS_VIRTUAL__BASE_POS, &len);
+	if (len != 4) {
+		goto err_len;
+	}
+
+	base = mach_read_from_4(field);
+
+	if (base_pos != NULL) {
+		*base_pos = base;
+	}
+
+	rec_get_nth_field_offs_old(
+		rec, DICT_FLD__SYS_VIRTUAL__DB_TRX_ID, &len);
+	if (len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL) {
+		goto err_len;
+	}
+
+	rec_get_nth_field_offs_old(
+		rec, DICT_FLD__SYS_VIRTUAL__DB_ROLL_PTR, &len);
+	if (len != DATA_ROLL_PTR_LEN && len != UNIV_SQL_NULL) {
+		goto err_len;
+	}
+
+	if (column != NULL) {
+		*column = dict_table_get_nth_col(table, base);
+	}
+
+	return(NULL);
+}
 /********************************************************************//**
 Loads definitions for table columns. */
 static
@@ -1363,8 +1815,9 @@ dict_load_columns(
 	byte*		buf;
 	ulint		i;
 	mtr_t		mtr;
+	ulint		n_skipped = 0;
 
-	ut_ad(mutex_own(&(dict_sys->mutex)));
+	ut_ad(mutex_own(&dict_sys->mutex));
 
 	mtr_start(&mtr);
 
@@ -1388,26 +1841,37 @@ dict_load_columns(
 
 	btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
 				  BTR_SEARCH_LEAF, &pcur, &mtr);
-	for (i = 0; i + DATA_N_SYS_COLS < (ulint) table->n_cols; i++) {
+
+	ut_ad(table->n_t_cols == static_cast<ulint>(
+	      table->n_cols) + static_cast<ulint>(table->n_v_cols));
+
+	for (i = 0;
+	     i + DATA_N_SYS_COLS < table->n_t_cols + n_skipped;
+	     i++) {
 		const char*	err_msg;
 		const char*	name = NULL;
+		ulint		nth_v_col = ULINT_UNDEFINED;
 
 		rec = btr_pcur_get_rec(&pcur);
 
 		ut_a(btr_pcur_is_on_user_rec(&pcur));
 
 		err_msg = dict_load_column_low(table, heap, NULL, NULL,
-					       &name, rec);
+					       &name, rec, &nth_v_col);
 
-		if (err_msg) {
-			fprintf(stderr, "InnoDB: %s\n", err_msg);
-			ut_error;
+		if (err_msg == dict_load_column_del) {
+			n_skipped++;
+			goto next_rec;
+		} else if (err_msg) {
+			ib::fatal() << err_msg;
 		}
 
 		/* Note: Currently we have one DOC_ID column that is
-		shared by all FTS indexes on a table. */
+		shared by all FTS indexes on a table. And only non-virtual
+		column can be used for FULLTEXT index */
 		if (innobase_strcasecmp(name,
-					FTS_DOC_ID_COL_NAME) == 0) {
+					FTS_DOC_ID_COL_NAME) == 0
+		    && nth_v_col == ULINT_UNDEFINED) {
 			dict_col_t*	col;
 			/* As part of normal loading of tables the
 			table->flag is not set for tables with FTS
@@ -1424,7 +1888,7 @@ dict_load_columns(
 
 			ut_a(table->fts->doc_col == ULINT_UNDEFINED);
 
-			col = dict_table_get_nth_col(table, i);
+			col = dict_table_get_nth_col(table, i - n_skipped);
 
 			ut_ad(col->len == sizeof(doc_id_t));
 
@@ -1435,7 +1899,103 @@ dict_load_columns(
 					table, DICT_TF2_FTS_ADD_DOC_ID);
 			}
 
-			table->fts->doc_col = i;
+			table->fts->doc_col = i - n_skipped;
+		}
+next_rec:
+		btr_pcur_move_to_next_user_rec(&pcur, &mtr);
+	}
+
+	btr_pcur_close(&pcur);
+	mtr_commit(&mtr);
+}
+
+/** Loads SYS_VIRTUAL info for one virtual column
+@param[in,out]	table		table
+@param[in]	nth_v_col	virtual column sequence num
+@param[in,out]	v_col		virtual column
+@param[in,out]	heap		memory heap
+*/
+static
+void
+dict_load_virtual_one_col(
+	dict_table_t*	table,
+	ulint		nth_v_col,
+	dict_v_col_t*	v_col,
+	mem_heap_t*	heap)
+{
+	dict_table_t*	sys_virtual;
+	dict_index_t*	sys_virtual_index;
+	btr_pcur_t	pcur;
+	dtuple_t*	tuple;
+	dfield_t*	dfield;
+	const rec_t*	rec;
+	byte*		buf;
+	ulint		i = 0;
+	mtr_t		mtr;
+	ulint		skipped = 0;
+
+	ut_ad(mutex_own(&dict_sys->mutex));
+
+	if (v_col->num_base == 0) {
+		return;
+	}
+
+	mtr_start(&mtr);
+
+	sys_virtual = dict_table_get_low("SYS_VIRTUAL");
+	sys_virtual_index = UT_LIST_GET_FIRST(sys_virtual->indexes);
+	ut_ad(!dict_table_is_comp(sys_virtual));
+
+	ut_ad(name_of_col_is(sys_virtual, sys_virtual_index,
+			     DICT_FLD__SYS_VIRTUAL__POS, "POS"));
+
+	tuple = dtuple_create(heap, 2);
+
+	/* table ID field */
+	dfield = dtuple_get_nth_field(tuple, 0);
+
+	buf = static_cast<byte*>(mem_heap_alloc(heap, 8));
+	mach_write_to_8(buf, table->id);
+
+	dfield_set_data(dfield, buf, 8);
+
+	/* virtual column pos field */
+	dfield = dtuple_get_nth_field(tuple, 1);
+
+	buf = static_cast<byte*>(mem_heap_alloc(heap, 4));
+	ulint	vcol_pos = dict_create_v_col_pos(nth_v_col, v_col->m_col.ind);
+	mach_write_to_4(buf, vcol_pos);
+
+	dfield_set_data(dfield, buf, 4);
+
+	dict_index_copy_types(tuple, sys_virtual_index, 2);
+
+	btr_pcur_open_on_user_rec(sys_virtual_index, tuple, PAGE_CUR_GE,
+				  BTR_SEARCH_LEAF, &pcur, &mtr);
+
+	for (i = 0; i < v_col->num_base + skipped; i++) {
+		const char*	err_msg;
+		ulint		pos;
+
+		ut_ad(btr_pcur_is_on_user_rec(&pcur));
+
+		rec = btr_pcur_get_rec(&pcur);
+
+		ut_a(btr_pcur_is_on_user_rec(&pcur));
+
+		err_msg = dict_load_virtual_low(table, heap,
+						&v_col->base_col[i - skipped],
+						NULL,
+					        &pos, NULL, rec);
+
+		if (err_msg) {
+			if (err_msg != dict_load_virtual_del) {
+				ib::fatal() << err_msg;
+			} else {
+				skipped++;
+			}
+		} else {
+			ut_ad(pos == vcol_pos);
 		}
 
 		btr_pcur_move_to_next_user_rec(&pcur, &mtr);
@@ -1445,6 +2005,23 @@ dict_load_columns(
 	mtr_commit(&mtr);
 }
 
+/** Loads info from SYS_VIRTUAL for virtual columns.
+@param[in,out]	table	table
+@param[in]	heap	memory heap
+*/
+static
+void
+dict_load_virtual(
+	dict_table_t*	table,
+	mem_heap_t*	heap)
+{
+	for (ulint i = 0; i < table->n_v_cols; i++) {
+		dict_v_col_t*	v_col = dict_table_get_nth_v_col(table, i);
+
+		dict_load_virtual_one_col(table, i, v_col, heap);
+	}
+}
+
 /** Error message for a delete-marked record in dict_load_field_low() */
 static const char* dict_load_field_del = "delete-marked record in SYS_FIELDS";
 
@@ -1452,7 +2029,6 @@ static const char* dict_load_field_del = "delete-marked record in SYS_FIELDS";
 Loads an index field definition from a SYS_FIELDS record to
 dict_index_t.
 @return error message, or NULL on success */
-UNIV_INTERN
 const char*
 dict_load_field_low(
 /*================*/
@@ -1592,7 +2168,7 @@ dict_load_fields(
 	mtr_t		mtr;
 	dberr_t		error;
 
-	ut_ad(mutex_own(&(dict_sys->mutex)));
+	ut_ad(mutex_own(&dict_sys->mutex));
 
 	mtr_start(&mtr);
 
@@ -1630,7 +2206,7 @@ dict_load_fields(
 
 			goto next_rec;
 		} else if (err_msg) {
-			fprintf(stderr, "InnoDB: %s\n", err_msg);
+			ib::error() << err_msg;
 			error = DB_CORRUPTION;
 			goto func_exit;
 		}
@@ -1656,7 +2232,6 @@ If allocate=TRUE, we will create a dict_index_t structure and fill it
 accordingly. If allocated=FALSE, the dict_index_t will be supplied by
 the caller and filled with information read from the record.  @return
 error message, or NULL on success */
-UNIV_INTERN
 const char*
 dict_load_index_low(
 /*================*/
@@ -1679,6 +2254,7 @@ dict_load_index_low(
 	ulint		n_fields;
 	ulint		type;
 	ulint		space;
+	ulint		merge_threshold;
 
 	if (allocate) {
 		/* If allocate=TRUE, no dict_index_t will
@@ -1690,7 +2266,27 @@ dict_load_index_low(
 		return(dict_load_index_del);
 	}
 
-	if (rec_get_n_fields_old(rec) != DICT_NUM_FIELDS__SYS_INDEXES) {
+	if (rec_get_n_fields_old(rec) == DICT_NUM_FIELDS__SYS_INDEXES) {
+		/* MERGE_THRESHOLD exists */
+		field = rec_get_nth_field_old(
+			rec, DICT_FLD__SYS_INDEXES__MERGE_THRESHOLD, &len);
+		switch (len) {
+		case 4:
+			merge_threshold = mach_read_from_4(field);
+			break;
+		case UNIV_SQL_NULL:
+			merge_threshold = DICT_INDEX_MERGE_THRESHOLD_DEFAULT;
+			break;
+		default:
+			return("incorrect MERGE_THRESHOLD length"
+			       " in SYS_INDEXES");
+		}
+	} else if (rec_get_n_fields_old(rec)
+		   == DICT_NUM_FIELDS__SYS_INDEXES - 1) {
+		/* MERGE_THRESHOLD doesn't exist */
+
+		merge_threshold = DICT_INDEX_MERGE_THRESHOLD_DEFAULT;
+	} else {
 		return("wrong number of columns in SYS_INDEXES record");
 	}
 
@@ -1781,6 +2377,7 @@ err_len:
 	(*index)->id = id;
 	(*index)->page = mach_read_from_4(field);
 	ut_ad((*index)->page);
+	(*index)->merge_threshold = merge_threshold;
 
 	return(NULL);
 }
@@ -1810,7 +2407,7 @@ dict_load_indexes(
 	mtr_t		mtr;
 	dberr_t		error = DB_SUCCESS;
 
-	ut_ad(mutex_own(&(dict_sys->mutex)));
+	ut_ad(mutex_own(&dict_sys->mutex));
 
 	mtr_start(&mtr);
 
@@ -1845,11 +2442,10 @@ dict_load_indexes(
 			for drop table */
 			if (dict_table_get_first_index(table) == NULL
 			    && !(ignore_err & DICT_ERR_IGNORE_CORRUPT)) {
-				ib_logf(IB_LOG_LEVEL_WARN,
-					"Cannot load table %s "
-					"because it has no indexes in "
-					"InnoDB internal data dictionary.",
-					table->name);
+				ib::warn() << "Cannot load table "
+					<< table->name
+					<< " because it has no indexes in"
+					" InnoDB internal data dictionary.";
 				error = DB_CORRUPTION;
 				goto func_exit;
 			}
@@ -1860,15 +2456,20 @@ dict_load_indexes(
 		rec = btr_pcur_get_rec(&pcur);
 
 		if ((ignore_err & DICT_ERR_IGNORE_RECOVER_LOCK)
-		    && rec_get_n_fields_old(rec)
-		    == DICT_NUM_FIELDS__SYS_INDEXES) {
+		    && (rec_get_n_fields_old(rec)
+			== DICT_NUM_FIELDS__SYS_INDEXES
+			/* a record for older SYS_INDEXES table
+			(missing merge_threshold column) is acceptable. */
+			|| rec_get_n_fields_old(rec)
+			   == DICT_NUM_FIELDS__SYS_INDEXES - 1)) {
 			const byte*	field;
 			ulint		len;
 			field = rec_get_nth_field_old(
 				rec, DICT_FLD__SYS_INDEXES__NAME, &len);
 
 			if (len != UNIV_SQL_NULL
-			    && char(*field) == char(TEMP_INDEX_PREFIX)) {
+			    && static_cast<char>(*field)
+			    == static_cast<char>(*TEMP_INDEX_PREFIX_STR)) {
 				/* Skip indexes whose name starts with
 				TEMP_INDEX_PREFIX, because they will
 				be dropped during crash recovery. */
@@ -1876,8 +2477,8 @@ dict_load_indexes(
 			}
 		}
 
-		err_msg = dict_load_index_low(buf, table->name, heap, rec,
-					      TRUE, &index);
+		err_msg = dict_load_index_low(
+			buf, table->name.m_name, heap, rec, TRUE, &index);
 		ut_ad((index == NULL && err_msg != NULL)
 		      || (index != NULL && err_msg == NULL));
 
@@ -1887,13 +2488,15 @@ dict_load_indexes(
 
 			if (dict_table_get_first_index(table) == NULL
 			    && !(ignore_err & DICT_ERR_IGNORE_CORRUPT)) {
-				ib_logf(IB_LOG_LEVEL_WARN,
-					"Failed to load the "
-					"clustered index for table %s "
-					"because of the following error: %s. "
-					"Refusing to load the rest of the "
-					"indexes (if any) and the whole table "
-					"altogether.", table->name, err_msg);
+
+				ib::warn() << "Failed to load the"
+					" clustered index for table "
+					<< table->name
+					<< " because of the following error: "
+					<< err_msg << "."
+					" Refusing to load the rest of the"
+					" indexes (if any) and the whole table"
+					" altogether.";
 				error = DB_CORRUPTION;
 				goto func_exit;
 			}
@@ -1903,7 +2506,7 @@ dict_load_indexes(
 			/* Skip delete-marked records. */
 			goto next_rec;
 		} else if (err_msg) {
-			fprintf(stderr, "InnoDB: %s\n", err_msg);
+			ib::error() << err_msg;
 			if (ignore_err & DICT_ERR_IGNORE_CORRUPT) {
 				goto next_rec;
 			}
@@ -1915,10 +2518,10 @@ dict_load_indexes(
 
 		/* Check whether the index is corrupted */
 		if (dict_index_is_corrupted(index)) {
-			ut_print_timestamp(stderr);
-			fputs("  InnoDB: ", stderr);
-			dict_index_name_print(stderr, NULL, index);
-			fputs(" is corrupted\n", stderr);
+
+			ib::error() << "Index " << index->name
+				<< " of table " << table->name
+				<< " is corrupted";
 
 			if (!srv_load_corrupted
 			    && !(ignore_err & DICT_ERR_IGNORE_CORRUPT)
@@ -1934,15 +2537,14 @@ dict_load_indexes(
 				DICT_ERR_IGNORE_CORRUPT
 				3) if the index corrupted is a secondary
 				index */
-				ut_print_timestamp(stderr);
-				fputs("  InnoDB: load corrupted index ", stderr);
-				dict_index_name_print(stderr, NULL, index);
-				putc('\n', stderr);
+				ib::info() << "Load corrupted index "
+					<< index->name
+					<< " of table " << table->name;
 			}
 		}
 
 		if (index->type & DICT_FTS
-		    && !DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS)) {
+		    && !dict_table_has_fts_index(table)) {
 			/* This should have been created by now. */
 			ut_a(table->fts != NULL);
 			DICT_TF2_FLAG_SET(table, DICT_TF2_FTS);
@@ -1951,10 +2553,12 @@ dict_load_indexes(
 		/* We check for unsupported types first, so that the
 		subsequent checks are relevant for the supported types. */
 		if (index->type & ~(DICT_CLUSTERED | DICT_UNIQUE
-				    | DICT_CORRUPT | DICT_FTS)) {
-			ib_logf(IB_LOG_LEVEL_ERROR,
-				"Unknown type %lu of index %s of table %s",
-				(ulong) index->type, index->name, table->name);
+				    | DICT_CORRUPT | DICT_FTS
+				    | DICT_SPATIAL | DICT_VIRTUAL)) {
+
+			ib::error() << "Unknown type " << index->type
+				<< " of index " << index->name
+				<< " of table " << table->name;
 
 			error = DB_UNSUPPORTED;
 			dict_mem_index_free(index);
@@ -1963,11 +2567,9 @@ dict_load_indexes(
 			   && !table->ibd_file_missing
 			   && (!(index->type & DICT_FTS))) {
 
-			fprintf(stderr,
-				"InnoDB: Error: trying to load index %s"
-				" for table %s\n"
-				"InnoDB: but the index tree has been freed!\n",
-				index->name, table->name);
+			ib::error() << "Trying to load index " << index->name
+				<< " for table " << table->name
+				<< ", but the index tree has been freed!";
 
 			if (ignore_err & DICT_ERR_IGNORE_INDEX_ROOT) {
 				/* If caller can tolerate this error,
@@ -1978,12 +2580,11 @@ dict_load_indexes(
 				dictionary cache for such metadata corruption,
 				since we would always be able to set it
 				when loading the dictionary cache */
-				dict_set_corrupted_index_cache_only(
-					index, table);
+				index->table = table;
+				dict_set_corrupted_index_cache_only(index);
 
-				fprintf(stderr,
-					"InnoDB: Index is corrupt but forcing"
-					" load into data dictionary\n");
+				ib::info() << "Index is corrupt but forcing"
+					" load into data dictionary";
 			} else {
 corrupted:
 				dict_mem_index_free(index);
@@ -1993,13 +2594,9 @@ corrupted:
 		} else if (!dict_index_is_clust(index)
 			   && NULL == dict_table_get_first_index(table)) {
 
-			fputs("InnoDB: Error: trying to load index ",
-			      stderr);
-			ut_print_name(stderr, NULL, FALSE, index->name);
-			fputs(" for table ", stderr);
-			ut_print_name(stderr, NULL, TRUE, table->name);
-			fputs("\nInnoDB: but the first index"
-			      " is not clustered!\n", stderr);
+			ib::error() << "Trying to load index " << index->name
+				<< " for table " << table->name
+				<< ", but the first index is not clustered!";
 
 			goto corrupted;
 		} else if (dict_is_sys_table(table->id)
@@ -2030,8 +2627,16 @@ next_rec:
 		btr_pcur_move_to_next_user_rec(&pcur, &mtr);
 	}
 
+	ut_ad(table->fts_doc_id_index == NULL);
+
+	if (table->fts != NULL) {
+		table->fts_doc_id_index = dict_table_get_index_on_name(
+			table, FTS_DOC_ID_INDEX_NAME);
+	}
+
 	/* If the table contains FTS indexes, populate table->fts->indexes */
-	if (DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS)) {
+	if (dict_table_has_fts_index(table)) {
+		ut_ad(table->fts_doc_id_index != NULL);
 		/* table->fts->indexes should have been created. */
 		ut_a(table->fts->indexes != NULL);
 		dict_table_get_all_fts_indexes(table, table->fts->indexes);
@@ -2044,151 +2649,44 @@ func_exit:
 	return(error);
 }
 
-/********************************************************************//**
-Loads a table definition from a SYS_TABLES record to dict_table_t.
+/** Loads a table definition from a SYS_TABLES record to dict_table_t.
 Does not load any columns or indexes.
+@param[in]	name	Table name
+@param[in]	rec	SYS_TABLES record
+@param[out,own]	table	table, or NULL
 @return error message, or NULL on success */
-UNIV_INTERN
+static
 const char*
 dict_load_table_low(
-/*================*/
-	const char*	name,		/*!< in: table name */
-	const rec_t*	rec,		/*!< in: SYS_TABLES record */
-	dict_table_t**	table)		/*!< out,own: table, or NULL */
+	table_name_t&	name,
+	const rec_t*	rec,
+	dict_table_t**	table)
 {
-	const byte*	field;
-	ulint		len;
-	ulint		space;
+	table_id_t	table_id;
+	ulint		space_id;
 	ulint		n_cols;
-	ulint		flags = 0;
+	ulint		t_num;
+	ulint		flags;
 	ulint		flags2;
+	ulint		n_v_col;
 
-	if (rec_get_deleted_flag(rec, 0)) {
-		return("delete-marked record in SYS_TABLES");
+	const char* error_text = dict_sys_tables_rec_check(rec);
+	if (error_text != NULL) {
+		return(error_text);
 	}
 
-	if (rec_get_n_fields_old(rec) != DICT_NUM_FIELDS__SYS_TABLES) {
-		return("wrong number of columns in SYS_TABLES record");
-	}
+	dict_sys_tables_rec_read(rec, name, &table_id, &space_id,
+				 &t_num, &flags, &flags2);
 
-	rec_get_nth_field_offs_old(
-		rec, DICT_FLD__SYS_TABLES__NAME, &len);
-	if (len == 0 || len == UNIV_SQL_NULL) {
-err_len:
-		return("incorrect column length in SYS_TABLES");
-	}
-	rec_get_nth_field_offs_old(
-		rec, DICT_FLD__SYS_TABLES__DB_TRX_ID, &len);
-	if (len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL) {
-		goto err_len;
-	}
-	rec_get_nth_field_offs_old(
-		rec, DICT_FLD__SYS_TABLES__DB_ROLL_PTR, &len);
-	if (len != DATA_ROLL_PTR_LEN && len != UNIV_SQL_NULL) {
-		goto err_len;
-	}
-
-	rec_get_nth_field_offs_old(rec, DICT_FLD__SYS_TABLES__ID, &len);
-	if (len != 8) {
-		goto err_len;
-	}
-
-	field = rec_get_nth_field_old(
-		rec, DICT_FLD__SYS_TABLES__N_COLS, &len);
-	if (len != 4) {
-		goto err_len;
-	}
-
-	n_cols = mach_read_from_4(field);
-
-	rec_get_nth_field_offs_old(rec, DICT_FLD__SYS_TABLES__TYPE, &len);
-	if (len != 4) {
-		goto err_len;
-	}
-
-	rec_get_nth_field_offs_old(
-		rec, DICT_FLD__SYS_TABLES__MIX_ID, &len);
-	if (len != 8) {
-		goto err_len;
-	}
-
-	field = rec_get_nth_field_old(
-		rec, DICT_FLD__SYS_TABLES__MIX_LEN, &len);
-	if (len != 4) {
-		goto err_len;
-	}
-
-	/* MIX_LEN may hold additional flags in post-antelope file formats. */
-	flags2 = mach_read_from_4(field);
-
-	/* DICT_TF2_FTS will be set when indexes is being loaded */
-	flags2 &= ~DICT_TF2_FTS;
-
-	rec_get_nth_field_offs_old(
-		rec, DICT_FLD__SYS_TABLES__CLUSTER_ID, &len);
-	if (len != UNIV_SQL_NULL) {
-		goto err_len;
-	}
-
-	field = rec_get_nth_field_old(
-		rec, DICT_FLD__SYS_TABLES__SPACE, &len);
-	if (len != 4) {
-		goto err_len;
-	}
-
-	space = mach_read_from_4(field);
-
-	/* Check if the tablespace exists and has the right name */
-	flags = dict_sys_tables_get_flags(rec);
-
-	if (UNIV_UNLIKELY(flags == ULINT_UNDEFINED)) {
-		field = rec_get_nth_field_old(
-			rec, DICT_FLD__SYS_TABLES__TYPE, &len);
-		ut_ad(len == 4); /* this was checked earlier */
-		flags = mach_read_from_4(field);
-
-		ut_print_timestamp(stderr);
-		fputs("  InnoDB: Error: table ", stderr);
-		ut_print_filename(stderr, name);
-		fprintf(stderr, "\n"
-			"InnoDB: in InnoDB data dictionary"
-			" has unknown type %lx.\n",
-			(ulong) flags);
+	if (flags == ULINT_UNDEFINED) {
 		return("incorrect flags in SYS_TABLES");
 	}
 
-	/* The high-order bit of N_COLS is the "compact format" flag.
-	For tables in that format, MIX_LEN may hold additional flags. */
-	if (n_cols & DICT_N_COLS_COMPACT) {
-		ut_ad(flags & DICT_TF_COMPACT);
+	dict_table_decode_n_col(t_num, &n_cols, &n_v_col);
 
-		if (flags2 & ~DICT_TF2_BIT_MASK) {
-			ut_print_timestamp(stderr);
-			fputs("  InnoDB: Warning: table ", stderr);
-			ut_print_filename(stderr, name);
-			fprintf(stderr, "\n"
-				"InnoDB: in InnoDB data dictionary"
-				" has unknown flags %lx.\n",
-				(ulong) flags2);
-
-			/* Clean it up and keep going */
-			flags2 &= DICT_TF2_BIT_MASK;
-		}
-	} else {
-		/* Do not trust the MIX_LEN field when the
-		row format is Redundant. */
-		flags2 = 0;
-	}
-
-	/* See if the tablespace is available. */
 	*table = dict_mem_table_create(
-		name, space, n_cols & ~DICT_N_COLS_COMPACT, flags, flags2);
-
-	field = rec_get_nth_field_old(rec, DICT_FLD__SYS_TABLES__ID, &len);
-	ut_ad(len == 8); /* this was checked earlier */
-
-	(*table)->id = mach_read_from_8(field);
-
+		name.m_name, space_id, n_cols + n_v_col, n_v_col, flags, flags2);
+	(*table)->id = table_id;
 	(*table)->ibd_file_missing = FALSE;
 
 	return(NULL);
@@ -2200,47 +2698,44 @@ table->data_dir_path and replace the 'databasename/tablename.ibd'
 portion with 'tablename'.
 This allows SHOW CREATE TABLE to return the correct DATA DIRECTORY path.
 Make this data directory path only if it has not yet been saved. */
-UNIV_INTERN
 void
 dict_save_data_dir_path(
 /*====================*/
 	dict_table_t*	table,		/*!< in/out: table */
 	char*		filepath)	/*!< in: filepath of tablespace */
 {
-	ut_ad(mutex_own(&(dict_sys->mutex)));
+	ut_ad(mutex_own(&dict_sys->mutex));
 	ut_a(DICT_TF_HAS_DATA_DIR(table->flags));
 
 	ut_a(!table->data_dir_path);
 	ut_a(filepath);
 
 	/* Be sure this filepath is not the default filepath. */
-	char*	default_filepath = fil_make_ibd_name(table->name, false);
-	if (strcmp(filepath, default_filepath)) {
-		ulint pathlen = strlen(filepath);
-		ut_a(pathlen < OS_FILE_MAX_PATH);
-		ut_a(0 == strcmp(filepath + pathlen - 4, ".ibd"));
+	char*	default_filepath = fil_make_filepath(
+			NULL, table->name.m_name, IBD, false);
+	if (default_filepath) {
+		if (0 != strcmp(filepath, default_filepath)) {
+			ulint pathlen = strlen(filepath);
+			ut_a(pathlen < OS_FILE_MAX_PATH);
+			ut_a(0 == strcmp(filepath + pathlen - 4, DOT_IBD));
 
-		table->data_dir_path = mem_heap_strdup(table->heap, filepath);
-		os_file_make_data_dir_path(table->data_dir_path);
-	} else {
-		/* This does not change SYS_DATAFILES or SYS_TABLES
-		or FSP_FLAGS on the header page of the tablespace,
-		but it makes dict_table_t consistent */
-		table->flags &= ~DICT_TF_MASK_DATA_DIR;
+			table->data_dir_path = mem_heap_strdup(
+				table->heap, filepath);
+			os_file_make_data_dir_path(table->data_dir_path);
+		}
+
+		ut_free(default_filepath);
 	}
-	mem_free(default_filepath);
 }
 
-/*****************************************************************//**
-Make sure the data_file_name is saved in dict_table_t if needed. Try to
-read it from the file dictionary first, then from SYS_DATAFILES. */
-UNIV_INTERN
+/** Make sure the data_dir_path is saved in dict_table_t if DATA DIRECTORY
+was used. Try to read it from the fil_system first, then from SYS_DATAFILES.
+@param[in]	table		Table object
+@param[in]	dict_mutex_own	true if dict_sys->mutex is owned already */
 void
 dict_get_and_save_data_dir_path(
-/*============================*/
-	dict_table_t*	table,		/*!< in/out: table */
-	bool		dict_mutex_own)	/*!< in: true if dict_sys->mutex
-					is owned already */
+	dict_table_t*	table,
+	bool		dict_mutex_own)
 {
 	bool is_temp = DICT_TF2_FLAG_IS_SET(table, DICT_TF2_TEMPORARY);
 
@@ -2250,15 +2745,23 @@ dict_get_and_save_data_dir_path(
 		if (!dict_mutex_own) {
 			dict_mutex_enter_for_mysql();
 		}
-		if (!path) {
-			path = dict_get_first_path(
-				table->space, table->name);
+
+		if (path == NULL) {
+			path = dict_get_first_path(table->space);
 		}
 
-		if (path) {
+		if (path != NULL) {
 			table->flags |= (1 << DICT_TF_POS_DATA_DIR);
 			dict_save_data_dir_path(table, path);
-			mem_free(path);
+			ut_free(path);
+		}
+
+		if (table->data_dir_path == NULL) {
+			/* Since we did not set the table data_dir_path,
+			unset the flag.  This does not change SYS_DATAFILES
+			or SYS_TABLES or FSP_FLAGS on the header page of the
+			tablespace, but it makes dict_table_t consistent. */
+			table->flags &= ~DICT_TF_MASK_DATA_DIR;
 		}
 
 		if (!dict_mutex_own) {
@@ -2267,25 +2770,268 @@ dict_get_and_save_data_dir_path(
 	}
 }
 
-/********************************************************************//**
-Loads a table definition and also all its index definitions, and also
+/** Make sure the tablespace name is saved in dict_table_t if the table
+uses a general tablespace.
+Try to read it from the fil_system_t first, then from SYS_TABLESPACES.
+@param[in]	table		Table object
+@param[in]	dict_mutex_own)	true if dict_sys->mutex is owned already */
+void
+dict_get_and_save_space_name(
+	dict_table_t*	table,
+	bool		dict_mutex_own)
+{
+	/* Do this only for general tablespaces. */
+	if (!DICT_TF_HAS_SHARED_SPACE(table->flags)) {
+		return;
+	}
+
+	bool	use_cache = true;
+	if (table->tablespace != NULL) {
+
+		if (srv_sys_tablespaces_open
+		    && dict_table_has_temp_general_tablespace_name(
+			    table->tablespace)) {
+			/* We previous saved the temporary name,
+			get the real one now. */
+			use_cache = false;
+		} else {
+			/* Keep and use this name */
+			return;
+		}
+	}
+
+	if (use_cache) {
+		fil_space_t* space = fil_space_acquire_silent(table->space);
+
+		if (space != NULL) {
+			/* Use this name unless it is a temporary general
+			tablespace name and we can now replace it. */
+			if (!srv_sys_tablespaces_open
+			    || !dict_table_has_temp_general_tablespace_name(
+				    space->name)) {
+
+				/* Use this tablespace name */
+				table->tablespace = mem_heap_strdup(
+					table->heap, space->name);
+
+				fil_space_release(space);
+				return;
+			}
+			fil_space_release(space);
+		}
+	}
+
+	/* Read it from the dictionary. */
+	if (srv_sys_tablespaces_open) {
+		if (!dict_mutex_own) {
+			dict_mutex_enter_for_mysql();
+		}
+
+		table->tablespace = dict_space_get_name(
+			table->space, table->heap);
+
+		if (!dict_mutex_own) {
+			dict_mutex_exit_for_mysql();
+		}
+	}
+}
+
+/** Loads a table definition and also all its index definitions, and also
 the cluster definition if the table is a member in a cluster. Also loads
 all foreign key constraints where the foreign key is in the table or where
-a foreign key references columns in this table. Adds all these to the data
-dictionary cache.
+a foreign key references columns in this table.
+@param[in]	name		Table name in the dbname/tablename format
+@param[in]	cached		true=add to cache, false=do not
+@param[in]	ignore_err	Error to be ignored when loading
+				table and its index definition
+@return table, NULL if does not exist; if the table is stored in an
+.ibd file, but the file does not exist, then we set the ibd_file_missing
+flag in the table object we return. */
+dict_table_t*
+dict_load_table(
+	const char*	name,
+	bool		cached,
+	dict_err_ignore_t ignore_err)
+{
+	dict_names_t			fk_list;
+	dict_table_t*			result;
+	dict_names_t::iterator		i;
+	table_name_t			table_name;
+
+	DBUG_ENTER("dict_load_table");
+	DBUG_PRINT("dict_load_table", ("loading table: '%s'", name));
+
+	ut_ad(mutex_own(&dict_sys->mutex));
+
+	table_name.m_name = const_cast<char*>(name);
+
+	result = dict_table_check_if_in_cache_low(name);
+
+	if (!result) {
+		result = dict_load_table_one(table_name, cached, ignore_err,
+					     fk_list);
+		while (!fk_list.empty()) {
+			table_name_t	fk_table_name;
+			dict_table_t*	fk_table;
+
+			fk_table_name.m_name =
+				const_cast<char*>(fk_list.front());
+			fk_table = dict_table_check_if_in_cache_low(
+				fk_table_name.m_name);
+			if (!fk_table) {
+				dict_load_table_one(fk_table_name, cached,
+						    ignore_err, fk_list);
+			}
+			fk_list.pop_front();
+		}
+	}
+
+	DBUG_RETURN(result);
+}
+
+/** Opens a tablespace for dict_load_table_one()
+@param[in,out]	table		A table that refers to the tablespace to open
+@param[in]	heap		A memory heap
+@param[in]	ignore_err	Whether to ignore an error. */
+UNIV_INLINE
+void
+dict_load_tablespace(
+	dict_table_t*		table,
+	mem_heap_t*		heap,
+	dict_err_ignore_t	ignore_err)
+{
+	/* The system tablespace is always available. */
+	if (is_system_tablespace(table->space)) {
+		return;
+	}
+
+	if (table->flags2 & DICT_TF2_DISCARDED) {
+		ib::warn() << "Tablespace for table " << table->name
+			<< " is set as discarded.";
+		table->ibd_file_missing = TRUE;
+		return;
+	}
+
+	if (dict_table_is_temporary(table)) {
+		/* Do not bother to retry opening temporary tables. */
+		table->ibd_file_missing = TRUE;
+		return;
+	}
+
+	/* A file-per-table table name is also the tablespace name.
+	A general tablespace name is not the same as the table name.
+	Use the general tablespace name if it can be read from the
+	dictionary, if not use 'innodb_general_##. */
+	char*	shared_space_name = NULL;
+	char*	space_name;
+	if (DICT_TF_HAS_SHARED_SPACE(table->flags)) {
+		if (srv_sys_tablespaces_open) {
+			shared_space_name =
+				dict_space_get_name(table->space, NULL);
+
+		} else {
+			/* Make the temporary tablespace name. */
+			shared_space_name = static_cast<char*>(
+				ut_malloc_nokey(
+					strlen(general_space_name) + 20));
+
+			sprintf(shared_space_name, "%s_" ULINTPF,
+				general_space_name,
+				static_cast<ulint>(table->space));
+		}
+		space_name = shared_space_name;
+	} else {
+		space_name = table->name.m_name;
+	}
+
+	/* The tablespace may already be open. */
+	if (fil_space_for_table_exists_in_mem(
+		    table->space, space_name, false,
+		    true, heap, table->id, table)) {
+		ut_free(shared_space_name);
+		return;
+	}
+
+	if (!(ignore_err & DICT_ERR_IGNORE_RECOVER_LOCK)) {
+		ib::error() << "Failed to find tablespace for table "
+			<< table->name << " in the cache. Attempting"
+			" to load the tablespace with space id "
+			<< table->space;
+	}
+
+	/* Use the remote filepath if needed. This parameter is optional
+	in the call to fil_ibd_open(). If not supplied, it will be built
+	from the space_name. */
+	char* filepath = NULL;
+	if (DICT_TF_HAS_DATA_DIR(table->flags)) {
+		/* This will set table->data_dir_path from either
+		fil_system or SYS_DATAFILES */
+		dict_get_and_save_data_dir_path(table, true);
+
+		if (table->data_dir_path) {
+			filepath = fil_make_filepath(
+				table->data_dir_path,
+				table->name.m_name, IBD, true);
+		}
+
+	} else if (DICT_TF_HAS_SHARED_SPACE(table->flags)) {
+		/* Set table->tablespace from either
+		fil_system or SYS_TABLESPACES */
+		dict_get_and_save_space_name(table, true);
+
+		/* Set the filepath from either
+		fil_system or SYS_DATAFILES. */
+		filepath = dict_get_first_path(table->space);
+		if (filepath == NULL) {
+			ib::warn() << "Could not find the filepath"
+				" for table " << table->name <<
+				", space ID " << table->space;
+		}
+	}
+
+	/* Try to open the tablespace.  We set the 2nd param (fix_dict) to
+	false because we do not have an x-lock on dict_operation_lock */
+	ulint fsp_flags = dict_tf_to_fsp_flags(table->flags,
+					       false,
+					       dict_table_is_encrypted(table));
+	dberr_t err = fil_ibd_open(
+		true, false, FIL_TYPE_TABLESPACE, table->space,
+		fsp_flags, space_name, filepath, table);
+
+	if (err != DB_SUCCESS) {
+		/* We failed to find a sensible tablespace file */
+		table->ibd_file_missing = TRUE;
+	}
+
+	ut_free(shared_space_name);
+	ut_free(filepath);
+}
+
+/** Loads a table definition and also all its index definitions.
+
+Loads those foreign key constraints whose referenced table is already in
+dictionary cache.  If a foreign key constraint is not loaded, then the
+referenced table is pushed into the output stack (fk_tables), if it is not
+NULL.  These tables must be subsequently loaded so that all the foreign
+key constraints are loaded into memory.
+
+@param[in]	name		Table name in the db/tablename format
+@param[in]	cached		true=add to cache, false=do not
+@param[in]	ignore_err	Error to be ignored when loading table
+				and its index definition
+@param[out]	fk_tables	Related table names that must also be
+				loaded to ensure that all foreign key
+				constraints are loaded.
 @return table, NULL if does not exist; if the table is stored in an
 .ibd file, but the file does not exist, then we set the
 ibd_file_missing flag TRUE in the table object we return */
-UNIV_INTERN
+static
 dict_table_t*
-dict_load_table(
-/*============*/
-	const char*	name,	/*!< in: table name in the
-				databasename/tablename format */
-	ibool		cached,	/*!< in: TRUE=add to cache, FALSE=do not */
-	dict_err_ignore_t ignore_err)
-				/*!< in: error to be ignored when loading
-				table and its indexes' definition */
+dict_load_table_one(
+	table_name_t&		name,
+	bool			cached,
+	dict_err_ignore_t	ignore_err,
+	dict_names_t&		fk_tables)
 {
 	dberr_t		err;
 	dict_table_t*	table;
@@ -2298,11 +3044,13 @@ dict_load_table(
 	const rec_t*	rec;
 	const byte*	field;
 	ulint		len;
-	char*		filepath = NULL;
 	const char*	err_msg;
 	mtr_t		mtr;
 
-	ut_ad(mutex_own(&(dict_sys->mutex)));
+	DBUG_ENTER("dict_load_table_one");
+	DBUG_PRINT("dict_load_table_one", ("table: %s", name.m_name));
+
+	ut_ad(mutex_own(&dict_sys->mutex));
 
 	heap = mem_heap_create(32000);
 
@@ -2325,7 +3073,7 @@ dict_load_table(
 	tuple = dtuple_create(heap, 1);
 	dfield = dtuple_get_nth_field(tuple, 0);
 
-	dfield_set_data(dfield, name, ut_strlen(name));
+	dfield_set_data(dfield, name.m_name, ut_strlen(name.m_name));
 	dict_index_copy_types(tuple, sys_index, 1);
 
 	btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
@@ -2340,14 +3088,15 @@ err_exit:
 		mtr_commit(&mtr);
 		mem_heap_free(heap);
 
-		return(NULL);
+		DBUG_RETURN(NULL);
 	}
 
 	field = rec_get_nth_field_old(
 		rec, DICT_FLD__SYS_TABLES__NAME, &len);
 
 	/* Check if the table name in record is the searched one */
-	if (len != ut_strlen(name) || ut_memcmp(name, field, len) != 0) {
+	if (len != ut_strlen(name.m_name)
+	    || 0 != ut_memcmp(name.m_name, field, len)) {
 
 		goto err_exit;
 	}
@@ -2356,79 +3105,19 @@ err_exit:
 
 	if (err_msg) {
 
-		ut_print_timestamp(stderr);
-		fprintf(stderr, "  InnoDB: %s\n", err_msg);
+		ib::error() << err_msg;
 		goto err_exit;
 	}
 
-	char	table_name[MAX_FULL_NAME_LEN + 1];
-
-	innobase_format_name(table_name, sizeof(table_name), name, FALSE);
-
 	btr_pcur_close(&pcur);
 	mtr_commit(&mtr);
 
-	if (table->space == 0) {
-		/* The system tablespace is always available. */
-	} else if (table->flags2 & DICT_TF2_DISCARDED) {
-
-		ib_logf(IB_LOG_LEVEL_WARN,
-			"Table '%s' tablespace is set as discarded.",
-			table_name);
-
-		table->ibd_file_missing = TRUE;
-
-	} else if (!fil_space_for_table_exists_in_mem(
-			table->space, name, FALSE, FALSE, true, heap,
-			table->id)) {
-
-		if (DICT_TF2_FLAG_IS_SET(table, DICT_TF2_TEMPORARY)) {
-			/* Do not bother to retry opening temporary tables. */
-			table->ibd_file_missing = TRUE;
-
-		} else {
-			if (!(ignore_err & DICT_ERR_IGNORE_RECOVER_LOCK)) {
-				ib_logf(IB_LOG_LEVEL_ERROR,
-					"Failed to find tablespace for "
-					"table '%s' in the cache. "
-					"Attempting to load the tablespace "
-					"with space id %lu.",
-					table_name, (ulong) table->space);
-			}
-
-			/* Use the remote filepath if needed. */
-			/* This needs to be added to the tablex1
-			from SYS_DATAFILES */
-			dict_get_and_save_data_dir_path(table, true);
-
-			if (table->data_dir_path) {
-				filepath = os_file_make_remote_pathname(
-						table->data_dir_path,
-						table->name, "ibd");
-			}
-
-			/* Try to open the tablespace.  We set the
-			2nd param (fix_dict = false) here because we
-			do not have an x-lock on dict_operation_lock */
-			err = fil_open_single_table_tablespace(
-				true, false, table->space,
-				dict_tf_to_fsp_flags(table->flags),
-				name, filepath, table);
-
-			if (err != DB_SUCCESS) {
-				/* We failed to find a sensible
-				tablespace file */
-
-				table->ibd_file_missing = TRUE;
-			}
-			if (filepath) {
-				mem_free(filepath);
-			}
-		}
-	}
+	dict_load_tablespace(table, heap, ignore_err);
 
 	dict_load_columns(table, heap);
 
+	dict_load_virtual(table, heap);
+
 	if (cached) {
 		dict_table_add_to_cache(table, TRUE, heap);
 	} else {
@@ -2453,13 +3142,11 @@ err_exit:
 		/* Refuse to load the table if the table has a corrupted
 		cluster index */
 		if (!srv_load_corrupted) {
-			fprintf(stderr, "InnoDB: Error: Load table ");
-			ut_print_name(stderr, NULL, TRUE, table->name);
-			fprintf(stderr, " failed, the table has corrupted"
-					" clustered indexes. Turn on"
-					" 'innodb_force_load_corrupted'"
-					" to drop it\n");
 
+			ib::error() << "Load table " << table->name
+				<< " failed, the table has"
+				" corrupted clustered indexes. Turn on"
+				" 'innodb_force_load_corrupted' to drop it";
 			dict_table_remove_from_cache(table);
 			table = NULL;
 			goto func_exit;
@@ -2473,6 +3160,32 @@ err_exit:
 		}
 	}
 
+	/* We don't trust the table->flags2(retrieved from SYS_TABLES.MIX_LEN
+	field) if the datafiles are from 3.23.52 version. To identify this
+	version, we do the below check and reset the flags. */
+	if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)
+	    && table->space == srv_sys_space.space_id()
+	    && table->flags == 0) {
+		table->flags2 = 0;
+	}
+
+	DBUG_EXECUTE_IF("ib_table_invalid_flags",
+			if(strcmp(table->name.m_name, "test/t1") == 0) {
+				table->flags2 = 255;
+				table->flags = 255;
+			});
+
+	if (!dict_tf2_is_valid(table->flags, table->flags2)) {
+		ib::error() << "Table " << table->name << " in InnoDB"
+			" data dictionary contains invalid flags."
+			" SYS_TABLES.MIX_LEN=" << table->flags2;
+		table->flags2 &= ~(DICT_TF2_TEMPORARY|DICT_TF2_INTRINSIC);
+		dict_table_remove_from_cache(table);
+		table = NULL;
+		err = DB_FAIL;
+		goto func_exit;
+	}
+
 	/* Initialize table foreign_child value. Its value could be
 	changed when dict_load_foreigns() is called below */
 	table->fk_max_recusive_level = 0;
@@ -2484,19 +3197,20 @@ err_exit:
 	if (!cached || table->ibd_file_missing) {
 		/* Don't attempt to load the indexes from disk. */
 	} else if (err == DB_SUCCESS) {
-		err = dict_load_foreigns(table->name, NULL, true, true,
-					 ignore_err);
+		err = dict_load_foreigns(table->name.m_name, NULL,
+					 true, true,
+					 ignore_err, fk_tables);
 
 		if (err != DB_SUCCESS) {
-			ib_logf(IB_LOG_LEVEL_WARN,
-				"Load table '%s' failed, the table has missing "
-				"foreign key indexes. Turn off "
-				"'foreign_key_checks' and try again.",
-				table->name);
+			ib::warn() << "Load table " << table->name
+				<< " failed, the table has missing"
+				" foreign key indexes. Turn off"
+				" 'foreign_key_checks' and try again.";
 
 			dict_table_remove_from_cache(table);
 			table = NULL;
 		} else {
+			dict_mem_table_fill_foreign_vcol_set(table);
 			table->fk_max_recusive_level = 0;
 		}
 	} else {
@@ -2547,13 +3261,12 @@ func_exit:
 
 	ut_ad(err != DB_SUCCESS || dict_foreign_set_validate(*table));
 
-	return(table);
+	DBUG_RETURN(table);
 }
 
 /***********************************************************************//**
 Loads a table object based on the table id.
-@return	table; NULL if table does not exist */
-UNIV_INTERN
+@return table; NULL if table does not exist */
 dict_table_t*
 dict_load_table_on_id(
 /*==================*/
@@ -2574,7 +3287,7 @@ dict_load_table_on_id(
 	dict_table_t*	table;
 	mtr_t		mtr;
 
-	ut_ad(mutex_own(&(dict_sys->mutex)));
+	ut_ad(mutex_own(&dict_sys->mutex));
 
 	table = NULL;
 
@@ -2634,10 +3347,9 @@ check_rec:
 				field = rec_get_nth_field_old(rec,
 					DICT_FLD__SYS_TABLE_IDS__NAME, &len);
 				/* Load the table definition to memory */
-				table = dict_load_table(
-					mem_heap_strdupl(
-						heap, (char*) field, len),
-					TRUE, ignore_err);
+				char*	table_name = mem_heap_strdupl(
+					heap, (char*) field, len);
+				table = dict_load_table(table_name, true, ignore_err);
 			}
 		}
 	}
@@ -2649,104 +3361,10 @@ check_rec:
 	return(table);
 }
 
-/***********************************************************************//**
-Loads a table id based on the index id.
-@return	true if found */
-static
-bool
-dict_load_table_id_on_index_id(
-/*==================*/
-	index_id_t		index_id,  /*!< in: index id */
-	table_id_t*		table_id) /*!< out: table id */
-{
-	/* check hard coded indexes */
-	switch(index_id) {
-	case DICT_TABLES_ID:
-	case DICT_COLUMNS_ID:
-	case DICT_INDEXES_ID:
-	case DICT_FIELDS_ID:
-		*table_id = index_id;
-		return true;
-	case DICT_TABLE_IDS_ID:
-		/* The following is a secondary index on SYS_TABLES */
-		*table_id = DICT_TABLES_ID;
-		return true;
-	}
-
-	bool		found = false;
-	mtr_t		mtr;
-
-	ut_ad(mutex_own(&(dict_sys->mutex)));
-
-	/* NOTE that the operation of this function is protected by
-	the dictionary mutex, and therefore no deadlocks can occur
-	with other dictionary operations. */
-
-	mtr_start(&mtr);
-
-	btr_pcur_t pcur;
-	const rec_t* rec = dict_startscan_system(&pcur, &mtr, SYS_INDEXES);
-
-	while (rec) {
-		ulint len;
-		const byte* field = rec_get_nth_field_old(
-			rec, DICT_FLD__SYS_INDEXES__ID, &len);
-		ut_ad(len == 8);
-
-		/* Check if the index id is the one searched for */
-		if (index_id == mach_read_from_8(field)) {
-			found = true;
-			/* Now we get the table id */
-			const byte* field = rec_get_nth_field_old(
-				rec,
-				DICT_FLD__SYS_INDEXES__TABLE_ID,
-				&len);
-			*table_id = mach_read_from_8(field);
-			break;
-		}
-		mtr_commit(&mtr);
-		mtr_start(&mtr);
-		rec = dict_getnext_system(&pcur, &mtr);
-	}
-
-	btr_pcur_close(&pcur);
-	mtr_commit(&mtr);
-
-	return(found);
-}
-
-UNIV_INTERN
-dict_table_t*
-dict_table_open_on_index_id(
-/*==================*/
-	index_id_t index_id,	/*!< in: index id */
-	bool dict_locked)	/*!< in: dict locked */
-{
-	if (!dict_locked) {
-		mutex_enter(&dict_sys->mutex);
-	}
-
-	ut_ad(mutex_own(&dict_sys->mutex));
-	table_id_t table_id;
-	dict_table_t * table = NULL;
-	if (dict_load_table_id_on_index_id(index_id, &table_id)) {
-		bool local_dict_locked = true;
-		table = dict_table_open_on_id(table_id,
-					      local_dict_locked,
-					      DICT_TABLE_OP_LOAD_TABLESPACE);
-	}
-
-	if (!dict_locked) {
-		mutex_exit(&dict_sys->mutex);
-	}
-	return table;
-}
-
 /********************************************************************//**
 This function is called when the database is booted. Loads system table
 index definitions except for the clustered index which is added to the
 dictionary cache at booting before calling this function. */
-UNIV_INTERN
 void
 dict_load_sys_table(
 /*================*/
@@ -2754,7 +3372,7 @@ dict_load_sys_table(
 {
 	mem_heap_t*	heap;
 
-	ut_ad(mutex_own(&(dict_sys->mutex)));
+	ut_ad(mutex_own(&dict_sys->mutex));
 
 	heap = mem_heap_create(1000);
 
@@ -2791,7 +3409,7 @@ dict_load_foreign_cols(
 	mtr_t		mtr;
 	size_t		id_len;
 
-	ut_ad(mutex_own(&(dict_sys->mutex)));
+	ut_ad(mutex_own(&dict_sys->mutex));
 
 	id_len = strlen(foreign->id);
 
@@ -2848,20 +3466,21 @@ dict_load_foreign_cols(
 				rec, DICT_FLD__SYS_FOREIGN_COLS__REF_COL_NAME,
 				&ref_col_name_len);
 
-			ib_logf(IB_LOG_LEVEL_ERROR,
-				"Unable to load columns names for foreign "
-				"key '%s' because it was not found in "
-				"InnoDB internal table SYS_FOREIGN_COLS. The "
-				"closest entry we found is: "
-				"(ID='%.*s', POS=%lu, FOR_COL_NAME='%.*s', "
-				"REF_COL_NAME='%.*s')",
-				foreign->id,
-				(int) len, field,
-				mach_read_from_4(pos),
-				(int) for_col_name_len, for_col_name,
-				(int) ref_col_name_len, ref_col_name);
+			ib::fatal	sout;
 
-			ut_error;
+			sout << "Unable to load column names for foreign"
+				" key '" << foreign->id
+				<< "' because it was not found in"
+				" InnoDB internal table SYS_FOREIGN_COLS. The"
+				" closest entry we found is:"
+				" (ID='";
+			sout.write(field, len);
+			sout << "', POS=" << mach_read_from_4(pos)
+				<< ", FOR_COL_NAME='";
+			sout.write(for_col_name, for_col_name_len);
+			sout << "', REF_COL_NAME='";
+			sout.write(ref_col_name, ref_col_name_len);
+			sout << "')";
 		}
 
 		field = rec_get_nth_field_old(
@@ -2887,8 +3506,9 @@ dict_load_foreign_cols(
 }
 
 /***********************************************************************//**
-Loads a foreign key constraint to the dictionary cache.
-@return	DB_SUCCESS or error code */
+Loads a foreign key constraint to the dictionary cache. If the referenced
+table is not yet loaded, it is added in the output parameter (fk_tables).
+@return DB_SUCCESS or error code */
 static MY_ATTRIBUTE((nonnull(1), warn_unused_result))
 dberr_t
 dict_load_foreign(
@@ -2906,8 +3526,15 @@ dict_load_foreign(
 	bool			check_charsets,
 				/*!< in: whether to check charset
 				compatibility */
-	dict_err_ignore_t	ignore_err)
+	dict_err_ignore_t	ignore_err,
 				/*!< in: error to be ignored */
+	dict_names_t&	fk_tables)
+				/*!< out: the foreign key constraint is added
+				to the dictionary cache only if the referenced
+				table is already in cache.  Otherwise, the
+				foreign key constraint is not added to cache,
+				and the referenced table is added to this
+				stack. */
 {
 	dict_foreign_t*	foreign;
 	dict_table_t*	sys_foreign;
@@ -2925,7 +3552,11 @@ dict_load_foreign(
 	dict_table_t*	ref_table;
 	size_t		id_len;
 
-	ut_ad(mutex_own(&(dict_sys->mutex)));
+	DBUG_ENTER("dict_load_foreign");
+	DBUG_PRINT("dict_load_foreign",
+		   ("id: '%s', check_recursive: %d", id, check_recursive));
+
+	ut_ad(mutex_own(&dict_sys->mutex));
 
 	id_len = strlen(id);
 
@@ -2952,16 +3583,15 @@ dict_load_foreign(
 	    || rec_get_deleted_flag(rec, 0)) {
 		/* Not found */
 
-		fprintf(stderr,
-			"InnoDB: Error: cannot load foreign constraint "
-			"%s: could not find the relevant record in "
-			"SYS_FOREIGN\n", id);
+		ib::error() << "Cannot load foreign constraint " << id
+			<< ": could not find the relevant record in "
+			<< "SYS_FOREIGN";
 
 		btr_pcur_close(&pcur);
 		mtr_commit(&mtr);
 		mem_heap_free(heap2);
 
-		return(DB_ERROR);
+		DBUG_RETURN(DB_ERROR);
 	}
 
 	field = rec_get_nth_field_old(rec, DICT_FLD__SYS_FOREIGN__ID, &len);
@@ -2969,16 +3599,19 @@ dict_load_foreign(
 	/* Check if the id in record is the searched one */
 	if (len != id_len || ut_memcmp(id, field, len) != 0) {
 
-		fprintf(stderr,
-			"InnoDB: Error: cannot load foreign constraint "
-			"%s: found %.*s instead in SYS_FOREIGN\n",
-			id, (int) len, field);
+		{
+			ib::error	err;
+			err << "Cannot load foreign constraint " << id
+				<< ": found ";
+			err.write(field, len);
+			err << " instead in SYS_FOREIGN";
+		}
 
 		btr_pcur_close(&pcur);
 		mtr_commit(&mtr);
 		mem_heap_free(heap2);
 
-		return(DB_ERROR);
+		DBUG_RETURN(DB_ERROR);
 	}
 
 	/* Read the table names and the number of columns associated
@@ -3008,6 +3641,8 @@ dict_load_foreign(
 		foreign->heap, (char*) field, len);
 	dict_mem_foreign_table_name_lookup_set(foreign, TRUE);
 
+	const ulint foreign_table_name_len = len;
+
 	field = rec_get_nth_field_old(
 		rec, DICT_FLD__SYS_FOREIGN__REF_NAME, &len);
 	foreign->referenced_table_name = mem_heap_strdupl(
@@ -3020,54 +3655,33 @@ dict_load_foreign(
 	dict_load_foreign_cols(foreign);
 
 	ref_table = dict_table_check_if_in_cache_low(
-			foreign->referenced_table_name_lookup);
+		foreign->referenced_table_name_lookup);
+	for_table = dict_table_check_if_in_cache_low(
+		foreign->foreign_table_name_lookup);
 
-	/* We could possibly wind up in a deep recursive calls if
-	we call dict_table_get_low() again here if there
-	is a chain of tables concatenated together with
-	foreign constraints. In such case, each table is
-	both a parent and child of the other tables, and
-	act as a "link" in such table chains.
-	To avoid such scenario, we would need to check the
-	number of ancesters the current table has. If that
-	exceeds DICT_FK_MAX_CHAIN_LEN, we will stop loading
-	the child table.
-	Foreign constraints are loaded in a Breath First fashion,
-	that is, the index on FOR_NAME is scanned first, and then
-	index on REF_NAME. So foreign constrains in which
-	current table is a child (foreign table) are loaded first,
-	and then those constraints where current table is a
-	parent (referenced) table.
-	Thus we could check the parent (ref_table) table's
-	reference count (fk_max_recusive_level) to know how deep the
-	recursive call is. If the parent table (ref_table) is already
-	loaded, and its fk_max_recusive_level is larger than
-	DICT_FK_MAX_CHAIN_LEN, we will stop the recursive loading
-	by skipping loading the child table. It will not affect foreign
-	constraint check for DMLs since child table will be loaded
-	at that time for the constraint check. */
-	if (!ref_table
-	    || ref_table->fk_max_recusive_level < DICT_FK_MAX_RECURSIVE_LOAD) {
+	if (!for_table) {
+		/* To avoid recursively loading the tables related through
+		the foreign key constraints, the child table name is saved
+		here.  The child table will be loaded later, along with its
+		foreign key constraint. */
 
-		/* If the foreign table is not yet in the dictionary cache, we
-		have to load it so that we are able to make type comparisons
-		in the next function call. */
+		lint	old_size = mem_heap_get_size(ref_table->heap);
 
-		for_table = dict_table_get_low(foreign->foreign_table_name_lookup);
+		ut_a(ref_table != NULL);
+		fk_tables.push_back(
+			mem_heap_strdupl(ref_table->heap,
+					 foreign->foreign_table_name_lookup,
+					 foreign_table_name_len));
 
-		if (for_table && ref_table && check_recursive) {
-			/* This is to record the longest chain of ancesters
-			this table has, if the parent has more ancesters
-			than this table has, record it after add 1 (for this
-			parent */
-			if (ref_table->fk_max_recusive_level
-			    >= for_table->fk_max_recusive_level) {
-				for_table->fk_max_recusive_level =
-					 ref_table->fk_max_recusive_level + 1;
-			}
-		}
+		lint	new_size = mem_heap_get_size(ref_table->heap);
+		dict_sys->size += new_size - old_size;
+
+		dict_foreign_remove_from_cache(foreign);
+		DBUG_RETURN(DB_SUCCESS);
 	}
 
+	ut_a(for_table || ref_table);
+
 	/* Note that there may already be a foreign constraint object in
 	the dictionary cache for this constraint: then the following
 	call only sets the pointers in it to point to the appropriate table
@@ -3076,18 +3690,21 @@ dict_load_foreign(
 	a new foreign key constraint but loading one from the data
 	dictionary. */
 
-	return(dict_foreign_add_to_cache(foreign, col_names, check_charsets,
-					 ignore_err));
+	DBUG_RETURN(dict_foreign_add_to_cache(foreign, col_names,
+					      check_charsets,
+					      ignore_err));
 }
 
 /***********************************************************************//**
 Loads foreign key constraints where the table is either the foreign key
 holder or where the table is referenced by a foreign key. Adds these
-constraints to the data dictionary. Note that we know that the dictionary
-cache already contains all constraints where the other relevant table is
-already in the dictionary cache.
-@return	DB_SUCCESS or error code */
-UNIV_INTERN
+constraints to the data dictionary.
+
+The foreign key constraint is loaded only if the referenced table is also
+in the dictionary cache.  If the referenced table is not in dictionary
+cache, then it is added to the output parameter (fk_tables).
+
+@return DB_SUCCESS or error code */
 dberr_t
 dict_load_foreigns(
 	const char*		table_name,	/*!< in: table name */
@@ -3098,8 +3715,12 @@ dict_load_foreigns(
 						chained by FK */
 	bool			check_charsets,	/*!< in: whether to check
 						charset compatibility */
-	dict_err_ignore_t	ignore_err)	/*!< in: error to be ignored */
-/*===============*/
+	dict_err_ignore_t	ignore_err,	/*!< in: error to be ignored */
+	dict_names_t&		fk_tables)
+						/*!< out: stack of table
+						names which must be loaded
+						subsequently to load all the
+						foreign key constraints. */
 {
 	ulint		tuple_buf[(DTUPLE_EST_ALLOC(1) + sizeof(ulint) - 1)
 				/ sizeof(ulint)];
@@ -3114,18 +3735,17 @@ dict_load_foreigns(
 	dberr_t		err;
 	mtr_t		mtr;
 
-	ut_ad(mutex_own(&(dict_sys->mutex)));
+	DBUG_ENTER("dict_load_foreigns");
+
+	ut_ad(mutex_own(&dict_sys->mutex));
 
 	sys_foreign = dict_table_get_low("SYS_FOREIGN");
 
 	if (sys_foreign == NULL) {
 		/* No foreign keys defined yet in this database */
 
-		fprintf(stderr,
-			"InnoDB: Error: no foreign key system tables"
-			" in the database\n");
-
-		return(DB_ERROR);
+		ib::info() << "No foreign key system tables in the database";
+		DBUG_RETURN(DB_ERROR);
 	}
 
 	ut_ad(!dict_table_is_comp(sys_foreign));
@@ -3139,7 +3759,7 @@ dict_load_foreigns(
 	ut_ad(!dict_index_is_clust(sec_index));
 start_load:
 
-	tuple = dtuple_create_from_mem(tuple_buf, sizeof(tuple_buf), 1);
+	tuple = dtuple_create_from_mem(tuple_buf, sizeof(tuple_buf), 1, 0);
 	dfield = dtuple_get_nth_field(tuple, 0);
 
 	dfield_set_data(dfield, table_name, ut_strlen(table_name));
@@ -3211,12 +3831,13 @@ loop:
 	/* Load the foreign constraint definition to the dictionary cache */
 
 	err = dict_load_foreign(fk_id, col_names,
-				check_recursive, check_charsets, ignore_err);
+				check_recursive, check_charsets, ignore_err,
+				fk_tables);
 
 	if (err != DB_SUCCESS) {
 		btr_pcur_close(&pcur);
 
-		return(err);
+		DBUG_RETURN(err);
 	}
 
 	mtr_start(&mtr);
@@ -3245,5 +3866,98 @@ load_next_index:
 		goto start_load;
 	}
 
-	return(DB_SUCCESS);
+	DBUG_RETURN(DB_SUCCESS);
+}
+
+/***********************************************************************//**
+Loads a table id based on the index id.
+@return	true if found */
+static
+bool
+dict_load_table_id_on_index_id(
+/*===========================*/
+	index_id_t		index_id,  /*!< in: index id */
+	table_id_t*		table_id) /*!< out: table id */
+{
+	/* check hard coded indexes */
+	switch(index_id) {
+	case DICT_TABLES_ID:
+	case DICT_COLUMNS_ID:
+	case DICT_INDEXES_ID:
+	case DICT_FIELDS_ID:
+		*table_id = index_id;
+		return true;
+	case DICT_TABLE_IDS_ID:
+		/* The following is a secondary index on SYS_TABLES */
+		*table_id = DICT_TABLES_ID;
+		return true;
+	}
+
+	bool		found = false;
+	mtr_t		mtr;
+
+	ut_ad(mutex_own(&(dict_sys->mutex)));
+
+	/* NOTE that the operation of this function is protected by
+	the dictionary mutex, and therefore no deadlocks can occur
+	with other dictionary operations. */
+
+	mtr_start(&mtr);
+
+	btr_pcur_t pcur;
+	const rec_t* rec = dict_startscan_system(&pcur, &mtr, SYS_INDEXES);
+
+	while (rec) {
+		ulint len;
+		const byte* field = rec_get_nth_field_old(
+			rec, DICT_FLD__SYS_INDEXES__ID, &len);
+		ut_ad(len == 8);
+
+		/* Check if the index id is the one searched for */
+		if (index_id == mach_read_from_8(field)) {
+			found = true;
+			/* Now we get the table id */
+			const byte* field = rec_get_nth_field_old(
+				rec,
+				DICT_FLD__SYS_INDEXES__TABLE_ID,
+				&len);
+			*table_id = mach_read_from_8(field);
+			break;
+		}
+		mtr_commit(&mtr);
+		mtr_start(&mtr);
+		rec = dict_getnext_system(&pcur, &mtr);
+	}
+
+	btr_pcur_close(&pcur);
+	mtr_commit(&mtr);
+
+	return(found);
+}
+
+UNIV_INTERN
+dict_table_t*
+dict_table_open_on_index_id(
+/*========================*/
+	index_id_t index_id,	/*!< in: index id */
+	bool dict_locked)	/*!< in: dict locked */
+{
+	if (!dict_locked) {
+		mutex_enter(&dict_sys->mutex);
+	}
+
+	ut_ad(mutex_own(&dict_sys->mutex));
+	table_id_t table_id;
+	dict_table_t * table = NULL;
+	if (dict_load_table_id_on_index_id(index_id, &table_id)) {
+		bool local_dict_locked = true;
+		table = dict_table_open_on_id(table_id,
+					      local_dict_locked,
+					      DICT_TABLE_OP_LOAD_TABLESPACE);
+	}
+
+	if (!dict_locked) {
+		mutex_exit(&dict_sys->mutex);
+	}
+	return table;
 }
diff --git a/storage/innobase/dict/dict0mem.cc b/storage/innobase/dict/dict0mem.cc
index f8ea0005665..b0d679d4619 100644
--- a/storage/innobase/dict/dict0mem.cc
+++ b/storage/innobase/dict/dict0mem.cc
@@ -24,6 +24,11 @@ Data dictionary memory object creation
 Created 1/8/1996 Heikki Tuuri
 ***********************************************************************/
 
+#ifndef UNIV_HOTBACKUP
+#include "ha_prototypes.h"
+#include <mysql_com.h>
+#endif /* !UNIV_HOTBACKUP */
+
 #include "dict0mem.h"
 
 #ifdef UNIV_NONINL
@@ -36,25 +41,17 @@ Created 1/8/1996 Heikki Tuuri
 #include "dict0dict.h"
 #include "fts0priv.h"
 #include "ut0crc32.h"
+
 #ifndef UNIV_HOTBACKUP
-# include "ha_prototypes.h"	/* innobase_casedn_str(),
-				innobase_get_lower_case_table_names */
-# include "mysql_com.h"		/* NAME_LEN */
 # include "lock0lock.h"
 #endif /* !UNIV_HOTBACKUP */
-#ifdef UNIV_BLOB_DEBUG
-# include "ut0rbt.h"
-#endif /* UNIV_BLOB_DEBUG */
+
+#include "sync0sync.h"
 #include <iostream>
 
 #define	DICT_HEAP_SIZE		100	/*!< initial memory heap size when
 					creating a table or index object */
 
-#ifdef UNIV_PFS_MUTEX
-/* Key to register autoinc_mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t	autoinc_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
-
 /** System databases */
 static const char* innobase_system_databases[] = {
 	"mysql/",
@@ -64,20 +61,55 @@ static const char* innobase_system_databases[] = {
 };
 
 /** An interger randomly initialized at startup used to make a temporary
-table name as unique as possible. */
+table name as unuique as possible. */
 static ib_uint32_t	dict_temp_file_num;
 
+/** Display an identifier.
+@param[in,out]	s	output stream
+@param[in]	id_name	SQL identifier (other than table name)
+@return the output stream */
+std::ostream&
+operator<<(
+	std::ostream&		s,
+	const id_name_t&	id_name)
+{
+	const char	q	= '`';
+	const char*	c	= id_name;
+	s << q;
+	for (; *c != 0; c++) {
+		if (*c == q) {
+			s << *c;
+		}
+		s << *c;
+	}
+	s << q;
+	return(s);
+}
+
+/** Display a table name.
+@param[in,out]	s		output stream
+@param[in]	table_name	table name
+@return the output stream */
+std::ostream&
+operator<<(
+	std::ostream&		s,
+	const table_name_t&	table_name)
+{
+	return(s << ut_get_name(NULL, table_name.m_name));
+}
+
 /**********************************************************************//**
 Creates a table memory object.
-@return	own: table object */
-UNIV_INTERN
+@return own: table object */
 dict_table_t*
 dict_mem_table_create(
 /*==================*/
 	const char*	name,	/*!< in: table name */
 	ulint		space,	/*!< in: space where the clustered index of
 				the table is placed */
-	ulint		n_cols,	/*!< in: number of columns */
+	ulint		n_cols,	/*!< in: total number of columns including
+				virtual and non-virtual columns */
+	ulint		n_v_cols,/*!< in: number of virtual columns */
 	ulint		flags,	/*!< in: table flags */
 	ulint		flags2)	/*!< in: table flags2 */
 {
@@ -85,30 +117,36 @@ dict_mem_table_create(
 	mem_heap_t*	heap;
 
 	ut_ad(name);
-	ut_a(dict_tf_is_valid(flags));
-	ut_a(!(flags2 & ~DICT_TF2_BIT_MASK));
+	ut_a(dict_tf2_is_valid(flags, flags2));
+	ut_a(!(flags2 & DICT_TF2_UNUSED_BIT_MASK));
 
 	heap = mem_heap_create(DICT_HEAP_SIZE);
 
 	table = static_cast<dict_table_t*>(
-		mem_heap_zalloc(heap, sizeof(dict_table_t)));
+		mem_heap_zalloc(heap, sizeof(*table)));
+
+	lock_table_lock_list_init(&table->locks);
+
+	UT_LIST_INIT(table->indexes, &dict_index_t::indexes);
 
 	table->heap = heap;
 
+	ut_d(table->magic_n = DICT_TABLE_MAGIC_N);
+
 	table->flags = (unsigned int) flags;
 	table->flags2 = (unsigned int) flags2;
-	table->name = static_cast<char*>(ut_malloc(strlen(name) + 1));
-	memcpy(table->name, name, strlen(name) + 1);
-	table->is_system_db = dict_mem_table_is_system(table->name);
+	table->name.m_name = mem_strdup(name);
+	table->is_system_db = dict_mem_table_is_system(table->name.m_name);
 	table->space = (unsigned int) space;
-	table->n_cols = (unsigned int) (n_cols + DATA_N_SYS_COLS);
+	table->n_t_cols = (unsigned int) (n_cols +
+			dict_table_get_n_sys_cols(table));
+	table->n_v_cols = (unsigned int) (n_v_cols);
+	table->n_cols = table->n_t_cols - table->n_v_cols;
 
 	table->cols = static_cast<dict_col_t*>(
-		mem_heap_alloc(heap,
-			       (n_cols + DATA_N_SYS_COLS)
-			       * sizeof(dict_col_t)));
-
-	ut_d(table->magic_n = DICT_TABLE_MAGIC_N);
+		mem_heap_alloc(heap, table->n_cols * sizeof(dict_col_t)));
+	table->v_cols = static_cast<dict_v_col_t*>(
+		mem_heap_alloc(heap, n_v_cols * sizeof(*table->v_cols)));
 
 	/* true means that the stats latch will be enabled -
 	dict_table_stats_lock() will not be noop. */
@@ -118,9 +156,12 @@ dict_mem_table_create(
 	table->autoinc_lock = static_cast<ib_lock_t*>(
 		mem_heap_alloc(heap, lock_get_size()));
 
+	/* lazy creation of table autoinc latch */
 	dict_table_autoinc_create_lazy(table);
 
 	table->autoinc = 0;
+	table->sess_row_id = 0;
+	table->sess_trx_id = 0;
 
 	/* The number of transactions that are either waiting on the
 	AUTOINC lock or have been granted the lock. */
@@ -138,45 +179,18 @@ dict_mem_table_create(
 	}
 #endif /* !UNIV_HOTBACKUP */
 
+	if (DICT_TF_HAS_SHARED_SPACE(table->flags)) {
+		dict_get_and_save_space_name(table, true);
+	}
+
 	new(&table->foreign_set) dict_foreign_set();
 	new(&table->referenced_set) dict_foreign_set();
 
 	return(table);
 }
 
-/****************************************************************//**
-Determines if a table belongs to a system database
-@return */
-UNIV_INTERN
-bool
-dict_mem_table_is_system(
-/*================*/
-	char	*name)		/*!< in: table name */
-{
-	ut_ad(name);
-
-	/* table has the following format: database/table
-	and some system table are of the form SYS_* */
-	if (strchr(name, '/')) {
-		int table_len = strlen(name);
-		const char *system_db;
-		int i = 0;
-		while ((system_db = innobase_system_databases[i++])
-			&& (system_db != NullS)) {
-			int len = strlen(system_db);
-			if (table_len > len && !strncmp(name, system_db, len)) {
-				return true;
-			}
-		}
-		return false;
-	} else {
-		return true;
-	}
-}
-
 /****************************************************************//**
 Free a table memory object. */
-UNIV_INTERN
 void
 dict_mem_table_free(
 /*================*/
@@ -186,13 +200,11 @@ dict_mem_table_free(
 	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
 	ut_d(table->cached = FALSE);
 
-        if (dict_table_has_fts_index(table)
-            || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)
-            || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_ADD_DOC_ID)) {
+	if (dict_table_has_fts_index(table)
+	    || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)
+	    || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_ADD_DOC_ID)) {
 		if (table->fts) {
-			if (table->cached) {
-				fts_optimize_remove_table(table);
-			}
+			fts_optimize_remove_table(table);
 
 			fts_free(table);
 		}
@@ -201,18 +213,34 @@ dict_mem_table_free(
 	dict_table_autoinc_destroy(table);
 #endif /* UNIV_HOTBACKUP */
 
+	dict_mem_table_free_foreign_vcol_set(table);
 	dict_table_stats_latch_destroy(table);
 
 	table->foreign_set.~dict_foreign_set();
 	table->referenced_set.~dict_foreign_set();
 
-	ut_free(table->name);
+	ut_free(table->name.m_name);
+	table->name.m_name = NULL;
+
+	/* Clean up virtual index info structures that are registered
+	with virtual columns */
+	for (ulint i = 0; i < table->n_v_def; i++) {
+		dict_v_col_t*	vcol
+			= dict_table_get_nth_v_col(table, i);
+
+		UT_DELETE(vcol->v_indexes);
+	}
+
+	if (table->s_cols != NULL) {
+		UT_DELETE(table->s_cols);
+	}
+
 	mem_heap_free(table->heap);
 }
 
 /****************************************************************//**
 Append 'name' to 'col_names'.  @see dict_table_t::col_names
-@return	new column names array */
+@return new column names array */
 static
 const char*
 dict_add_col_name(
@@ -260,7 +288,6 @@ dict_add_col_name(
 
 /**********************************************************************//**
 Adds a column definition to a table. */
-UNIV_INTERN
 void
 dict_mem_table_add_col(
 /*===================*/
@@ -278,13 +305,17 @@ dict_mem_table_add_col(
 	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
 	ut_ad(!heap == !name);
 
+	ut_ad(!(prtype & DATA_VIRTUAL));
+
 	i = table->n_def++;
 
+	table->n_t_def++;
+
 	if (name) {
-		if (UNIV_UNLIKELY(table->n_def == table->n_cols)) {
+		if (table->n_def == table->n_cols) {
 			heap = table->heap;
 		}
-		if (UNIV_LIKELY(i != 0) && UNIV_UNLIKELY(table->col_names == NULL)) {
+		if (i && !table->col_names) {
 			/* All preceding column names are empty. */
 			char* s = static_cast<char*>(
 				mem_heap_zalloc(heap, table->n_def));
@@ -301,6 +332,115 @@ dict_mem_table_add_col(
 	dict_mem_fill_column_struct(col, i, mtype, prtype, len);
 }
 
+/** Adds a virtual column definition to a table.
+@param[in,out]	table		table
+@param[in,out]	heap		temporary memory heap, or NULL. It is
+				used to store name when we have not finished
+				adding all columns. When all columns are
+				added, the whole name will copy to memory from
+				table->heap
+@param[in]	name		column name
+@param[in]	mtype		main datatype
+@param[in]	prtype		precise type
+@param[in]	len		length
+@param[in]	pos		position in a table
+@param[in]	num_base	number of base columns
+@return the virtual column definition */
+dict_v_col_t*
+dict_mem_table_add_v_col(
+	dict_table_t*	table,
+	mem_heap_t*	heap,
+	const char*	name,
+	ulint		mtype,
+	ulint		prtype,
+	ulint		len,
+	ulint		pos,
+	ulint		num_base)
+{
+	dict_v_col_t*	v_col;
+	ulint		i;
+
+	ut_ad(table);
+	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+	ut_ad(!heap == !name);
+
+	ut_ad(prtype & DATA_VIRTUAL);
+
+	i = table->n_v_def++;
+
+	table->n_t_def++;
+
+	if (name != NULL) {
+		if (table->n_v_def == table->n_v_cols) {
+			heap = table->heap;
+		}
+
+		if (i && !table->v_col_names) {
+			/* All preceding column names are empty. */
+			char* s = static_cast<char*>(
+				mem_heap_zalloc(heap, table->n_v_def));
+
+			table->v_col_names = s;
+		}
+
+		table->v_col_names = dict_add_col_name(table->v_col_names,
+						       i, name, heap);
+	}
+
+	v_col = dict_table_get_nth_v_col(table, i);
+
+	dict_mem_fill_column_struct(&v_col->m_col, pos, mtype, prtype, len);
+	v_col->v_pos = i;
+
+	if (num_base != 0) {
+		v_col->base_col = static_cast<dict_col_t**>(mem_heap_zalloc(
+					table->heap, num_base * sizeof(
+						*v_col->base_col)));
+	} else {
+		v_col->base_col = NULL;
+	}
+
+	v_col->num_base = num_base;
+
+	/* Initialize the index list for virtual columns */
+	v_col->v_indexes = UT_NEW_NOKEY(dict_v_idx_list());
+
+	return(v_col);
+}
+
+/** Adds a stored column definition to a table.
+@param[in]	table		table
+@param[in]	num_base	number of base columns. */
+void
+dict_mem_table_add_s_col(
+	dict_table_t*	table,
+	ulint		num_base)
+{
+	ulint	i = table->n_def - 1;
+	dict_col_t*	col = dict_table_get_nth_col(table, i);
+	dict_s_col_t	s_col;
+
+	ut_ad(col != NULL);
+
+	if (table->s_cols == NULL) {
+		table->s_cols = UT_NEW_NOKEY(dict_s_col_list());
+	}
+
+	s_col.m_col = col;
+	s_col.s_pos = i + table->n_v_def;
+
+	if (num_base != 0) {
+		s_col.base_col = static_cast<dict_col_t**>(mem_heap_zalloc(
+			table->heap, num_base * sizeof(dict_col_t*)));
+	} else {
+		s_col.base_col = NULL;
+	}
+
+	s_col.num_base = num_base;
+	table->s_cols->push_back(s_col);
+}
+
+
 /**********************************************************************//**
 Renames a column of a table in the data dictionary cache. */
 static MY_ATTRIBUTE((nonnull))
@@ -310,17 +450,22 @@ dict_mem_table_col_rename_low(
 	dict_table_t*	table,	/*!< in/out: table */
 	unsigned	i,	/*!< in: column offset corresponding to s */
 	const char*	to,	/*!< in: new column name */
-	const char*	s)	/*!< in: pointer to table->col_names */
+	const char*	s,	/*!< in: pointer to table->col_names */
+	bool		is_virtual)
+				/*!< in: if this is a virtual column */
 {
+	char*	t_col_names = const_cast<char*>(
+		is_virtual ? table->v_col_names : table->col_names);
+	ulint	n_col = is_virtual ? table->n_v_def : table->n_def;
+
 	size_t from_len = strlen(s), to_len = strlen(to);
 
-	ut_ad(i < table->n_def);
+	ut_ad(i < table->n_def || is_virtual);
+	ut_ad(i < table->n_v_def || !is_virtual);
+
 	ut_ad(from_len <= NAME_LEN);
 	ut_ad(to_len <= NAME_LEN);
 
-	char from[NAME_LEN];
-	strncpy(from, s, NAME_LEN);
-
 	if (from_len == to_len) {
 		/* The easy case: simply replace the column name in
 		table->col_names. */
@@ -329,13 +474,13 @@ dict_mem_table_col_rename_low(
 		/* We need to adjust all affected index->field
 		pointers, as in dict_index_add_col(). First, copy
 		table->col_names. */
-		ulint	prefix_len	= s - table->col_names;
+		ulint	prefix_len	= s - t_col_names;
 
-		for (; i < table->n_def; i++) {
+		for (; i < n_col; i++) {
 			s += strlen(s) + 1;
 		}
 
-		ulint	full_len	= s - table->col_names;
+		ulint	full_len	= s - t_col_names;
 		char*	col_names;
 
 		if (to_len > from_len) {
@@ -344,14 +489,14 @@ dict_mem_table_col_rename_low(
 					table->heap,
 					full_len + to_len - from_len));
 
-			memcpy(col_names, table->col_names, prefix_len);
+			memcpy(col_names, t_col_names, prefix_len);
 		} else {
-			col_names = const_cast<char*>(table->col_names);
+			col_names = const_cast<char*>(t_col_names);
 		}
 
 		memcpy(col_names + prefix_len, to, to_len);
 		memmove(col_names + prefix_len + to_len,
-			table->col_names + (prefix_len + from_len),
+			t_col_names + (prefix_len + from_len),
 			full_len - (prefix_len + from_len));
 
 		/* Replace the field names in every index. */
@@ -364,8 +509,16 @@ dict_mem_table_col_rename_low(
 				dict_field_t*	field
 					= dict_index_get_nth_field(
 						index, i);
+
+				/* if is_virtual and that in field->col does
+				not match, continue */
+				if ((!is_virtual) !=
+				    (!dict_col_is_virtual(field->col))) {
+					continue;
+				}
+
 				ulint		name_ofs
-					= field->name - table->col_names;
+					= field->name - t_col_names;
 				if (name_ofs <= prefix_len) {
 					field->name = col_names + name_ofs;
 				} else {
@@ -376,7 +529,16 @@ dict_mem_table_col_rename_low(
 			}
 		}
 
-		table->col_names = col_names;
+		if (is_virtual) {
+			table->v_col_names = col_names;
+		} else {
+			table->col_names = col_names;
+		}
+	}
+
+	/* Virtual columns are not allowed for foreign key */
+	if (is_virtual) {
+		return;
 	}
 
 	dict_foreign_t*	foreign;
@@ -388,54 +550,14 @@ dict_mem_table_col_rename_low(
 
 		foreign = *it;
 
-		if (foreign->foreign_index == NULL) {
-			/* We may go here when we set foreign_key_checks to 0,
-			and then try to rename a column and modify the
-			corresponding foreign key constraint. The index
-			would have been dropped, we have to find an equivalent
-			one */
-			for (unsigned f = 0; f < foreign->n_fields; f++) {
-				if (strcmp(foreign->foreign_col_names[f], from)
-				    == 0) {
-
-					char** rc = const_cast<char**>(
-						foreign->foreign_col_names
-						+ f);
-
-					if (to_len <= strlen(*rc)) {
-						memcpy(*rc, to, to_len + 1);
-					} else {
-						*rc = static_cast<char*>(
-							mem_heap_dup(
-								foreign->heap,
-								to,
-								to_len + 1));
-					}
-				}
-			}
-
-			dict_index_t* new_index = dict_foreign_find_index(
-				foreign->foreign_table, NULL,
-				foreign->foreign_col_names,
-				foreign->n_fields, NULL, true, false,
-				NULL, NULL, NULL);
-			/* There must be an equivalent index in this case. */
-			ut_ad(new_index != NULL);
-
-			foreign->foreign_index = new_index;
-
-		} else {
-
-			for (unsigned f = 0; f < foreign->n_fields; f++) {
-				/* These can point straight to
-				table->col_names, because the foreign key
-				constraints will be freed at the same time
-				when the table object is freed. */
-				foreign->foreign_col_names[f]
-					= dict_index_get_nth_field(
-						foreign->foreign_index,
-						f)->name;
-			}
+		for (unsigned f = 0; f < foreign->n_fields; f++) {
+			/* These can point straight to
+			table->col_names, because the foreign key
+			constraints will be freed at the same time
+			when the table object is freed. */
+			foreign->foreign_col_names[f]
+				= dict_index_get_nth_field(
+					foreign->foreign_index, f)->name;
 		}
 	}
 
@@ -445,8 +567,6 @@ dict_mem_table_col_rename_low(
 
 		foreign = *it;
 
-		ut_ad(foreign->referenced_index != NULL);
-
 		for (unsigned f = 0; f < foreign->n_fields; f++) {
 			/* foreign->referenced_col_names[] need to be
 			copies, because the constraint may become
@@ -478,20 +598,22 @@ dict_mem_table_col_rename_low(
 
 /**********************************************************************//**
 Renames a column of a table in the data dictionary cache. */
-UNIV_INTERN
 void
 dict_mem_table_col_rename(
 /*======================*/
 	dict_table_t*	table,	/*!< in/out: table */
-	unsigned	nth_col,/*!< in: column index */
+	ulint		nth_col,/*!< in: column index */
 	const char*	from,	/*!< in: old column name */
-	const char*	to)	/*!< in: new column name */
+	const char*	to,	/*!< in: new column name */
+	bool		is_virtual)
+				/*!< in: if this is a virtual column */
 {
-	const char*	s = table->col_names;
+	const char*	s = is_virtual ? table->v_col_names : table->col_names;
 
-	ut_ad(nth_col < table->n_def);
+	ut_ad((!is_virtual && nth_col < table->n_def)
+	       || (is_virtual && nth_col < table->n_v_def));
 
-	for (unsigned i = 0; i < nth_col; i++) {
+	for (ulint i = 0; i < nth_col; i++) {
 		size_t	len = strlen(s);
 		ut_ad(len > 0);
 		s += len + 1;
@@ -501,13 +623,13 @@ dict_mem_table_col_rename(
 	Proceed with the renaming anyway. */
 	ut_ad(!strcmp(from, s));
 
-	dict_mem_table_col_rename_low(table, nth_col, to, s);
+	dict_mem_table_col_rename_low(table, static_cast<unsigned>(nth_col),
+				      to, s, is_virtual);
 }
 
 /**********************************************************************//**
 This function populates a dict_col_t memory structure with
 supplied information. */
-UNIV_INTERN
 void
 dict_mem_fill_column_struct(
 /*========================*/
@@ -537,8 +659,7 @@ dict_mem_fill_column_struct(
 
 /**********************************************************************//**
 Creates an index memory object.
-@return	own: index object */
-UNIV_INTERN
+@return own: index object */
 dict_index_t*
 dict_mem_index_create(
 /*==================*/
@@ -565,20 +686,32 @@ dict_mem_index_create(
 				   space, type, n_fields);
 
 	dict_index_zip_pad_mutex_create_lazy(index);
+
+	if (type & DICT_SPATIAL) {
+		mutex_create(LATCH_ID_RTR_SSN_MUTEX, &index->rtr_ssn.mutex);
+		index->rtr_track = static_cast<rtr_info_track_t*>(
+					mem_heap_alloc(
+						heap,
+						sizeof(*index->rtr_track)));
+		mutex_create(LATCH_ID_RTR_ACTIVE_MUTEX,
+			     &index->rtr_track->rtr_active_mutex);
+		index->rtr_track->rtr_active = UT_NEW_NOKEY(rtr_info_active());
+	}
+
 	return(index);
 }
 
 #ifndef UNIV_HOTBACKUP
 /**********************************************************************//**
 Creates and initializes a foreign constraint memory object.
-@return	own: foreign constraint struct */
-UNIV_INTERN
+@return own: foreign constraint struct */
 dict_foreign_t*
 dict_mem_foreign_create(void)
 /*=========================*/
 {
 	dict_foreign_t*	foreign;
 	mem_heap_t*	heap;
+	DBUG_ENTER("dict_mem_foreign_create");
 
 	heap = mem_heap_create(100);
 
@@ -587,7 +720,11 @@ dict_mem_foreign_create(void)
 
 	foreign->heap = heap;
 
-	return(foreign);
+	foreign->v_cols = NULL;
+
+	DBUG_PRINT("dict_mem_foreign_create", ("heap: %p", heap));
+
+	DBUG_RETURN(foreign);
 }
 
 /**********************************************************************//**
@@ -595,7 +732,6 @@ Sets the foreign_table_name_lookup pointer based on the value of
 lower_case_table_names.  If that is 0 or 1, foreign_table_name_lookup
 will point to foreign_table_name.  If 2, then another string is
 allocated from foreign->heap and set to lower case. */
-UNIV_INTERN
 void
 dict_mem_foreign_table_name_lookup_set(
 /*===================================*/
@@ -626,7 +762,6 @@ Sets the referenced_table_name_lookup pointer based on the value of
 lower_case_table_names.  If that is 0 or 1, referenced_table_name_lookup
 will point to referenced_table_name.  If 2, then another string is
 allocated from foreign->heap and set to lower case. */
-UNIV_INTERN
 void
 dict_mem_referenced_table_name_lookup_set(
 /*======================================*/
@@ -651,13 +786,187 @@ dict_mem_referenced_table_name_lookup_set(
 			= foreign->referenced_table_name;
 	}
 }
+
+/** Fill the virtual column set with virtual column information
+present in the given virtual index.
+@param[in]	index	virtual index
+@param[out]	v_cols	virtual column set. */
+static
+void
+dict_mem_fill_vcol_has_index(
+	const dict_index_t*	index,
+	dict_vcol_set**		v_cols)
+{
+	for (ulint i = 0; i < index->table->n_v_cols; i++) {
+		dict_v_col_t*	v_col = dict_table_get_nth_v_col(
+					index->table, i);
+		if (!v_col->m_col.ord_part) {
+			continue;
+		}
+
+		dict_v_idx_list::iterator it;
+		for (it = v_col->v_indexes->begin();
+		     it != v_col->v_indexes->end(); ++it) {
+			dict_v_idx_t	v_idx = *it;
+
+			if (v_idx.index != index) {
+				continue;
+			}
+
+			if (*v_cols == NULL) {
+				*v_cols = UT_NEW_NOKEY(dict_vcol_set());
+			}
+
+			(*v_cols)->insert(v_col);
+		}
+	}
+}
+
+/** Fill the virtual column set with the virtual column of the index
+if the index contains given column name.
+@param[in]	col_name	column name
+@param[in]	table		innodb table object
+@param[out]	v_cols		set of virtual column information. */
+static
+void
+dict_mem_fill_vcol_from_v_indexes(
+	const char*		col_name,
+	const dict_table_t*	table,
+	dict_vcol_set**		v_cols)
+{
+	/* virtual column can't be Primary Key, so start with
+	secondary index */
+	for (dict_index_t* index = dict_table_get_next_index(
+			dict_table_get_first_index(table));
+		index;
+		index = dict_table_get_next_index(index)) {
+
+		if (!dict_index_has_virtual(index)) {
+			continue;
+		}
+
+		for (ulint i = 0; i < index->n_fields; i++) {
+			dict_field_t*	field =
+				dict_index_get_nth_field(index, i);
+
+			if (strcmp(field->name, col_name) == 0) {
+				dict_mem_fill_vcol_has_index(
+					index, v_cols);
+			}
+		}
+	}
+}
+
+/** Fill the virtual column set with virtual columns which have base columns
+as the given col_name
+@param[in]	col_name	column name
+@param[in]	table		table object
+@param[out]	v_cols		set of virtual columns. */
+static
+void
+dict_mem_fill_vcol_set_for_base_col(
+	const char*		col_name,
+	const dict_table_t*	table,
+	dict_vcol_set**		v_cols)
+{
+	for (ulint i = 0; i < table->n_v_cols; i++) {
+		dict_v_col_t*	v_col = dict_table_get_nth_v_col(table, i);
+
+		if (!v_col->m_col.ord_part) {
+			continue;
+		}
+
+		for (ulint j = 0; j < v_col->num_base; j++) {
+			if (strcmp(col_name, dict_table_get_col_name(
+					table,
+					v_col->base_col[j]->ind)) == 0) {
+
+				if (*v_cols == NULL) {
+					*v_cols = UT_NEW_NOKEY(dict_vcol_set());
+				}
+
+				(*v_cols)->insert(v_col);
+			}
+		}
+	}
+}
+
+/** Fills the dependent virtual columns in a set.
+Reason for being dependent are
+1) FK can be present on base column of virtual columns
+2) FK can be present on column which is a part of virtual index
+@param[in,out]  foreign foreign key information. */
+void
+dict_mem_foreign_fill_vcol_set(
+        dict_foreign_t* foreign)
+{
+	ulint	type = foreign->type;
+
+	if (type == 0) {
+		return;
+	}
+
+	for (ulint i = 0; i < foreign->n_fields; i++) {
+		/** FK can be present on base columns
+		of virtual columns. */
+		dict_mem_fill_vcol_set_for_base_col(
+			foreign->foreign_col_names[i],
+			foreign->foreign_table,
+			&foreign->v_cols);
+
+		/** FK can be present on the columns
+		which can be a part of virtual index. */
+		dict_mem_fill_vcol_from_v_indexes(
+			foreign->foreign_col_names[i],
+			foreign->foreign_table,
+			&foreign->v_cols);
+	}
+}
+
+/** Fill virtual columns set in each fk constraint present in the table.
+@param[in,out]	table	innodb table object. */
+void
+dict_mem_table_fill_foreign_vcol_set(
+	dict_table_t*	table)
+{
+	dict_foreign_set	fk_set = table->foreign_set;
+	dict_foreign_t*		foreign;
+
+	dict_foreign_set::iterator it;
+	for (it = fk_set.begin(); it != fk_set.end(); ++it) {
+		foreign = *it;
+
+		dict_mem_foreign_fill_vcol_set(foreign);
+	}
+}
+
+/** Free the vcol_set from all foreign key constraint on the table.
+@param[in,out]	table	innodb table object. */
+void
+dict_mem_table_free_foreign_vcol_set(
+	dict_table_t*	table)
+{
+	dict_foreign_set	fk_set = table->foreign_set;
+	dict_foreign_t*		foreign;
+
+	dict_foreign_set::iterator it;
+	for (it = fk_set.begin(); it != fk_set.end(); ++it) {
+
+		foreign = *it;
+
+		if (foreign->v_cols != NULL) {
+			UT_DELETE(foreign->v_cols);
+			foreign->v_cols = NULL;
+		}
+	}
+}
+
 #endif /* !UNIV_HOTBACKUP */
 
 /**********************************************************************//**
 Adds a field definition to an index. NOTE: does not take a copy
 of the column name if the field is a column. The memory occupied
 by the column name may be released only after publishing the index. */
-UNIV_INTERN
 void
 dict_mem_index_add_field(
 /*=====================*/
@@ -682,7 +991,6 @@ dict_mem_index_add_field(
 
 /**********************************************************************//**
 Frees an index memory object. */
-UNIV_INTERN
 void
 dict_mem_index_free(
 /*================*/
@@ -690,15 +998,25 @@ dict_mem_index_free(
 {
 	ut_ad(index);
 	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-#ifdef UNIV_BLOB_DEBUG
-	if (index->blobs) {
-		mutex_free(&index->blobs_mutex);
-		rbt_free(index->blobs);
-	}
-#endif /* UNIV_BLOB_DEBUG */
 
 	dict_index_zip_pad_mutex_destroy(index);
 
+	if (dict_index_is_spatial(index)) {
+		rtr_info_active::iterator	it;
+		rtr_info_t*			rtr_info;
+
+		for (it = index->rtr_track->rtr_active->begin();
+		     it != index->rtr_track->rtr_active->end(); ++it) {
+			rtr_info = *it;
+
+			rtr_info->index = NULL;
+		}
+
+		mutex_destroy(&index->rtr_ssn.mutex);
+		mutex_destroy(&index->rtr_track->rtr_active_mutex);
+		UT_DELETE(index->rtr_track->rtr_active);
+	}
+
 	mem_heap_free(index->heap);
 }
 
@@ -714,7 +1032,6 @@ reasonably unique temporary file name.
 @param[in]	dbtab	Table name in the form database/table name
 @param[in]	id	Table id
 @return A unique temporary tablename suitable for InnoDB use */
-UNIV_INTERN
 char*
 dict_mem_create_temporary_tablename(
 	mem_heap_t*	heap,
@@ -723,18 +1040,14 @@ dict_mem_create_temporary_tablename(
 {
 	size_t		size;
 	char*		name;
-	const char*	dbend = strchr(dbtab, '/');
+	const char*	dbend   = strchr(dbtab, '/');
 	ut_ad(dbend);
-	size_t		dblen = dbend - dbtab + 1;
+	size_t		dblen   = dbend - dbtab + 1;
 
-#ifdef HAVE_ATOMIC_BUILTINS
-	/* Increment a randomly initialized number for each temp file. */
+	/* Increment a randomly initialized  number for each temp file. */
 	os_atomic_increment_uint32(&dict_temp_file_num, 1);
-#else /* HAVE_ATOMIC_BUILTINS */
-	dict_temp_file_num++;
-#endif /* HAVE_ATOMIC_BUILTINS */
 
-	size = tmp_file_prefix_length + 3 + 20 + 1 + 10 + dblen;
+	size = dblen + (sizeof(TEMP_FILE_PREFIX) + 3 + 20 + 1 + 10);
 	name = static_cast<char*>(mem_heap_alloc(heap, size));
 	memcpy(name, dbtab, dblen);
 	ut_snprintf(name + dblen, size - dblen,
@@ -745,15 +1058,13 @@ dict_mem_create_temporary_tablename(
 }
 
 /** Initialize dict memory variables */
-
 void
 dict_mem_init(void)
 {
 	/* Initialize a randomly distributed temporary file number */
-	ib_uint32_t now = static_cast<ib_uint32_t>(ut_time());
+	ib_uint32_t	now = static_cast<ib_uint32_t>(ut_time());
 
-	const byte* buf = reinterpret_cast<const byte*>(&now);
-	ut_ad(ut_crc32 != NULL);
+	const byte*	buf = reinterpret_cast<const byte*>(&now);
 
 	dict_temp_file_num = ut_crc32(buf, sizeof(now));
 
@@ -819,3 +1130,32 @@ operator<< (std::ostream& out, const dict_foreign_set& fk_set)
 	return(out);
 }
 
+/****************************************************************//**
+Determines if a table belongs to a system database
+@return */
+bool
+dict_mem_table_is_system(
+/*================*/
+	char	*name)		/*!< in: table name */
+{
+	ut_ad(name);
+
+	/* table has the following format: database/table
+	and some system table are of the form SYS_* */
+	if (strchr(name, '/')) {
+		int table_len = strlen(name);
+		const char *system_db;
+		int i = 0;
+		while ((system_db = innobase_system_databases[i++])
+			&& (system_db != NullS)) {
+			int len = strlen(system_db);
+			if (table_len > len && !strncmp(name, system_db, len)) {
+				return true;
+			}
+		}
+		return false;
+	} else {
+		return true;
+	}
+}
+
diff --git a/storage/innobase/dict/dict0stats.cc b/storage/innobase/dict/dict0stats.cc
index 5c283f693d5..ff5162a68c4 100644
--- a/storage/innobase/dict/dict0stats.cc
+++ b/storage/innobase/dict/dict0stats.cc
@@ -27,24 +27,17 @@ Created Jan 06, 2010 Vasil Dimov
 
 #include "univ.i"
 
-#include "btr0btr.h" /* btr_get_size() */
-#include "btr0cur.h" /* btr_estimate_number_of_different_key_vals() */
-#include "dict0dict.h" /* dict_table_get_first_index(), dict_fs2utf8() */
-#include "dict0mem.h" /* DICT_TABLE_MAGIC_N */
+#include "ut0ut.h"
+#include "ut0rnd.h"
+#include "dyn0buf.h"
+#include "row0sel.h"
+#include "trx0trx.h"
+#include "pars0pars.h"
 #include "dict0stats.h"
-#include "data0type.h" /* dtype_t */
-#include "db0err.h" /* dberr_t */
-#include "page0page.h" /* page_align() */
-#include "pars0pars.h" /* pars_info_create() */
-#include "pars0types.h" /* pars_info_t */
-#include "que0que.h" /* que_eval_sql() */
-#include "rem0cmp.h" /* REC_MAX_N_FIELDS,cmp_rec_rec_with_match() */
-#include "row0sel.h" /* sel_node_t */
-#include "row0types.h" /* sel_node_t */
-#include "trx0trx.h" /* trx_create() */
-#include "trx0roll.h" /* trx_rollback_to_savepoint() */
-#include "ut0rnd.h" /* ut_rnd_interval() */
-#include "ut0ut.h" /* ut_format_name(), ut_time() */
+#include "ha_prototypes.h"
+#include "ut0new.h"
+#include <mysql_com.h>
+#include "btr0btr.h"
 
 #include <algorithm>
 #include <map>
@@ -144,18 +137,15 @@ of keys. For example if a btree level is:
 index: 0,1,2,3,4,5,6,7,8,9,10,11,12
 data:  b,b,b,b,b,b,g,g,j,j,j, x, y
 then we would store 5,7,10,11,12 in the array. */
-typedef std::vector<ib_uint64_t>	boundaries_t;
+typedef std::vector<ib_uint64_t, ut_allocator<ib_uint64_t> >	boundaries_t;
 
-/* This is used to arrange the index based on the index name.
-@return true if index_name1 is smaller than index_name2. */
-struct index_cmp
-{
-	bool operator()(const char* index_name1, const char* index_name2) const {
-		return(strcmp(index_name1, index_name2) < 0);
-	}
-};
+/** Allocator type used for index_map_t. */
+typedef ut_allocator<std::pair<const char*, dict_index_t*> >
+	index_map_t_allocator;
 
-typedef std::map<const char*, dict_index_t*, index_cmp>	index_map_t;
+/** Auxiliary map used for sorting indexes by name in dict_stats_save(). */
+typedef std::map<const char*, dict_index_t*, ut_strcmp_functor,
+		index_map_t_allocator>	index_map_t;
 
 /*********************************************************************//**
 Checks whether an index should be ignored in stats manipulations:
@@ -171,8 +161,9 @@ dict_stats_should_ignore_index(
 {
 	return((index->type & DICT_FTS)
 	       || dict_index_is_corrupted(index)
+	       || dict_index_is_spatial(index)
 	       || index->to_be_dropped
-	       || *index->name == TEMP_INDEX_PREFIX);
+	       || !index->is_committed());
 }
 
 /*********************************************************************//**
@@ -252,7 +243,7 @@ dict_stats_persistent_storage_check(
 	dberr_t		ret;
 
 	if (!caller_has_dict_sys_mutex) {
-		mutex_enter(&(dict_sys->mutex));
+		mutex_enter(&dict_sys->mutex);
 	}
 
 	ut_ad(mutex_own(&dict_sys->mutex));
@@ -267,12 +258,11 @@ dict_stats_persistent_storage_check(
 	}
 
 	if (!caller_has_dict_sys_mutex) {
-		mutex_exit(&(dict_sys->mutex));
+		mutex_exit(&dict_sys->mutex);
 	}
 
 	if (ret != DB_SUCCESS && ret != DB_STATS_DO_NOT_EXIST) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr, " InnoDB: Error: %s\n", errstr);
+		ib::error() << errstr;
 		return(false);
 	} else if (ret == DB_STATS_DO_NOT_EXIST) {
 		return false;
@@ -300,9 +290,8 @@ dict_stats_exec_sql(
 {
 	dberr_t	err;
 	bool	trx_started = false;
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+
+	ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
 	ut_ad(mutex_own(&dict_sys->mutex));
 
 	if (!dict_stats_persistent_storage_check(true)) {
@@ -312,8 +301,13 @@ dict_stats_exec_sql(
 
 	if (trx == NULL) {
 		trx = trx_allocate_for_background();
-		trx_start_if_not_started(trx);
 		trx_started = true;
+
+		if (srv_read_only_mode) {
+			trx_start_internal_read_only(trx);
+		} else {
+			trx_start_internal(trx);
+		}
 	}
 
 	err = que_eval_sql(pinfo, sql, FALSE, trx); /* pinfo is freed here */
@@ -388,7 +382,7 @@ dict_stats_table_clone_create(
 
 	heap_size = 0;
 	heap_size += sizeof(dict_table_t);
-	heap_size += strlen(table->name) + 1;
+	heap_size += strlen(table->name.m_name) + 1;
 
 	for (index = dict_table_get_first_index(table);
 	     index != NULL;
@@ -398,7 +392,7 @@ dict_stats_table_clone_create(
 			continue;
 		}
 
-		ut_ad(!dict_index_is_univ(index));
+		ut_ad(!dict_index_is_ibuf(index));
 
 		ulint	n_uniq = dict_index_get_n_unique(index);
 
@@ -428,8 +422,7 @@ dict_stats_table_clone_create(
 
 	t->heap = heap;
 
-	UNIV_MEM_ASSERT_RW_ABORT(table->name, strlen(table->name) + 1);
-	t->name = (char*) mem_heap_strdup(heap, table->name);
+	t->name.m_name = mem_heap_strdup(heap, table->name.m_name);
 
 	t->corrupted = table->corrupted;
 
@@ -438,7 +431,7 @@ dict_stats_table_clone_create(
 	dict_table_stats_lock()/unlock() routines will do nothing. */
 	dict_table_stats_latch_create(t, false);
 
-	UT_LIST_INIT(t->indexes);
+	UT_LIST_INIT(t->indexes, &dict_index_t::indexes);
 
 	for (index = dict_table_get_first_index(table);
 	     index != NULL;
@@ -448,7 +441,7 @@ dict_stats_table_clone_create(
 			continue;
 		}
 
-		ut_ad(!dict_index_is_univ(index));
+		ut_ad(!dict_index_is_ibuf(index));
 
 		dict_index_t*	idx;
 
@@ -457,10 +450,9 @@ dict_stats_table_clone_create(
 		UNIV_MEM_ASSERT_RW_ABORT(&index->id, sizeof(index->id));
 		idx->id = index->id;
 
-		UNIV_MEM_ASSERT_RW_ABORT(index->name, strlen(index->name) + 1);
-		idx->name = (char*) mem_heap_strdup(heap, index->name);
+		idx->name = mem_heap_strdup(heap, index->name);
 
-		idx->table_name = t->name;
+		idx->table_name = t->name.m_name;
 
 		idx->table = t;
 
@@ -469,6 +461,7 @@ dict_stats_table_clone_create(
 		idx->to_be_dropped = 0;
 
 		idx->online_status = ONLINE_INDEX_COMPLETE;
+		idx->set_committed(true);
 
 		idx->n_uniq = index->n_uniq;
 
@@ -476,13 +469,12 @@ dict_stats_table_clone_create(
 			heap, idx->n_uniq * sizeof(idx->fields[0]));
 
 		for (ulint i = 0; i < idx->n_uniq; i++) {
-			UNIV_MEM_ASSERT_RW_ABORT(index->fields[i].name, strlen(index->fields[i].name) + 1);
-			idx->fields[i].name = (char*) mem_heap_strdup(
+			idx->fields[i].name = mem_heap_strdup(
 				heap, index->fields[i].name);
 		}
 
 		/* hook idx into t->indexes */
-		UT_LIST_ADD_LAST(indexes, t->indexes, idx);
+		UT_LIST_ADD_LAST(t->indexes, idx);
 
 		idx->stat_n_diff_key_vals = (ib_uint64_t*) mem_heap_alloc(
 			heap,
@@ -533,7 +525,7 @@ dict_stats_empty_index(
 				/*!< in: whether to empty defrag stats */
 {
 	ut_ad(!(index->type & DICT_FTS));
-	ut_ad(!dict_index_is_univ(index));
+	ut_ad(!dict_index_is_ibuf(index));
 
 	ulint	n_uniq = index->n_uniq;
 
@@ -607,7 +599,7 @@ dict_stats_empty_table(
 			continue;
 		}
 
-		ut_ad(!dict_index_is_univ(index));
+		ut_ad(!dict_index_is_ibuf(index));
 
 		dict_stats_empty_index(index, empty_defrag_stats);
 	}
@@ -730,7 +722,7 @@ dict_stats_copy(
 			continue;
 		}
 
-		ut_ad(!dict_index_is_univ(dst_idx));
+		ut_ad(!dict_index_is_ibuf(dst_idx));
 
 		if (!INDEX_EQ(src_idx, dst_idx)) {
 			for (src_idx = dict_table_get_first_index(src);
@@ -787,8 +779,7 @@ dict_stats_copy(
 	dst->stat_initialized = TRUE;
 }
 
-/*********************************************************************//**
-Duplicate the stats of a table and its indexes.
+/** Duplicate the stats of a table and its indexes.
 This function creates a dummy dict_table_t object and copies the input
 table's stats into it. The returned table object is not in the dictionary
 cache and cannot be accessed by any other threads. In addition to the
@@ -810,12 +801,12 @@ dict_index_t::stat_defrag_n_pages_freed
 dict_index_t::stat_defrag_n_page_split
 The returned object should be freed with dict_stats_snapshot_free()
 when no longer needed.
+@param[in]	table	table whose stats to copy
 @return incomplete table object */
 static
 dict_table_t*
 dict_stats_snapshot_create(
-/*=======================*/
-	dict_table_t*	table)	/*!< in: table whose stats to copy */
+	dict_table_t*	table)
 {
 	mutex_enter(&dict_sys->mutex);
 
@@ -883,7 +874,10 @@ dict_stats_update_transient_for_index(
 	} else {
 		mtr_t	mtr;
 		ulint	size;
+
 		mtr_start(&mtr);
+		dict_disable_redo_if_temporary(index->table, &mtr);
+
 		mtr_s_lock(dict_index_get_lock(index), &mtr);
 
 		size = btr_get_size(index, BTR_TOTAL_SIZE, &mtr);
@@ -908,6 +902,9 @@ dict_stats_update_transient_for_index(
 
 		index->stat_n_leaf_pages = size;
 
+		/* We don't handle the return value since it will be false
+		only when some thread is dropping the table and we don't
+		have to empty the statistics of the to be dropped index */
 		btr_estimate_number_of_different_key_vals(index);
 	}
 }
@@ -918,7 +915,6 @@ is relatively quick and is used to calculate transient statistics that
 are not saved on disk.
 This was the only way to calculate statistics before the
 Persistent Statistics feature was introduced. */
-UNIV_INTERN
 void
 dict_stats_update_transient(
 /*========================*/
@@ -934,25 +930,22 @@ dict_stats_update_transient(
 
 	if (dict_table_is_discarded(table)) {
 		/* Nothing to do. */
-		dict_stats_empty_table(table, false);
+		dict_stats_empty_table(table, true);
 		return;
 	} else if (index == NULL) {
 		/* Table definition is corrupt */
 
-		char	buf[MAX_FULL_NAME_LEN];
-		ut_print_timestamp(stderr);
-		fprintf(stderr, " InnoDB: table %s has no indexes. "
-			"Cannot calculate statistics.\n",
-			ut_format_name(table->name, TRUE, buf, sizeof(buf)));
-		dict_stats_empty_table(table, false);
+		ib::warn() << "Table " << table->name
+			<< " has no indexes. Cannot calculate statistics.";
+		dict_stats_empty_table(table, true);
 		return;
 	}
 
 	for (; index != NULL; index = dict_table_get_next_index(index)) {
 
-		ut_ad(!dict_index_is_univ(index));
+		ut_ad(!dict_index_is_ibuf(index));
 
-		if (index->type & DICT_FTS) {
+		if (index->type & DICT_FTS || dict_index_is_spatial(index)) {
 			continue;
 		}
 
@@ -1046,7 +1039,7 @@ dict_stats_analyze_index_level(
 		     index->table->name, index->name, level);
 
 	ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
-				MTR_MEMO_S_LOCK));
+				MTR_MEMO_SX_LOCK));
 
 	n_uniq = dict_index_get_n_unique(index);
 
@@ -1080,7 +1073,7 @@ dict_stats_analyze_index_level(
 	on the desired level. */
 
 	btr_pcur_open_at_index_side(
-		true, index, BTR_SEARCH_LEAF | BTR_ALREADY_S_LATCHED,
+		true, index, BTR_SEARCH_TREE | BTR_ALREADY_S_LATCHED,
 		&pcur, true, level, mtr);
 	btr_pcur_move_to_next_on_page(&pcur);
 
@@ -1119,8 +1112,6 @@ dict_stats_analyze_index_level(
 	     btr_pcur_is_on_user_rec(&pcur);
 	     btr_pcur_move_to_next_user_rec(&pcur, mtr)) {
 
-		ulint	matched_fields = 0;
-		ulint	matched_bytes = 0;
 		bool	rec_is_last_on_page;
 
 		rec = btr_pcur_get_rec(&pcur);
@@ -1180,6 +1171,8 @@ dict_stats_analyze_index_level(
 		(*total_recs)++;
 
 		if (prev_rec != NULL) {
+			ulint	matched_fields;
+
 			prev_rec_offsets = rec_get_offsets(
 				prev_rec, index, prev_rec_offsets,
 				n_uniq, &heap);
@@ -1190,8 +1183,7 @@ dict_stats_analyze_index_level(
 					       prev_rec_offsets,
 					       index,
 					       FALSE,
-					       &matched_fields,
-					       &matched_bytes);
+					       &matched_fields);
 
 			for (i = matched_fields; i < n_uniq; i++) {
 
@@ -1320,12 +1312,7 @@ dict_stats_analyze_index_level(
 	btr_leaf_page_release(btr_pcur_get_block(&pcur), BTR_SEARCH_LEAF, mtr);
 
 	btr_pcur_close(&pcur);
-
-	if (prev_rec_buf != NULL) {
-
-		mem_free(prev_rec_buf);
-	}
-
+	ut_free(prev_rec_buf);
 	mem_heap_free(heap);
 }
 
@@ -1368,7 +1355,7 @@ dict_stats_scan_page(
 	const rec_t**		out_rec,
 	ulint*			offsets1,
 	ulint*			offsets2,
-	dict_index_t*		index,
+	const dict_index_t*	index,
 	const page_t*		page,
 	ulint			n_prefix,
 	page_scan_method_t	scan_method,
@@ -1420,8 +1407,7 @@ dict_stats_scan_page(
 
 	while (!page_rec_is_supremum(next_rec)) {
 
-		ulint	matched_fields = 0;
-		ulint	matched_bytes = 0;
+		ulint	matched_fields;
 
 		offsets_next_rec = rec_get_offsets(next_rec, index,
 						   offsets_next_rec,
@@ -1432,8 +1418,7 @@ dict_stats_scan_page(
 		the first n_prefix fields */
 		cmp_rec_rec_with_match(rec, next_rec,
 				       offsets_rec, offsets_next_rec,
-				       index, FALSE, &matched_fields,
-				       &matched_bytes);
+				       index, FALSE, &matched_fields);
 
 		if (matched_fields < n_prefix) {
 			/* rec != next_rec, => rec is non-boring */
@@ -1441,7 +1426,7 @@ dict_stats_scan_page(
 			(*n_diff)++;
 
 			if (scan_method == QUIT_ON_FIRST_NON_BORING) {
-				goto func_exit;
+				break;
 			}
 		}
 
@@ -1454,7 +1439,7 @@ dict_stats_scan_page(
 			place where offsets_rec was pointing before
 			because we have just 2 placeholders where
 			data is actually stored:
-			offsets_onstack1 and offsets_onstack2 and we
+			offsets1 and offsets2 and we
 			are using them in circular fashion
 			(offsets[_next]_rec are just pointers to
 			those placeholders). */
@@ -1472,7 +1457,6 @@ dict_stats_scan_page(
 		next_rec = get_next(next_rec);
 	}
 
-func_exit:
 	/* offsets1,offsets2 should have been big enough */
 	ut_a(heap == NULL);
 	*out_rec = rec;
@@ -1498,10 +1482,7 @@ dict_stats_analyze_index_below_cur(
 	ib_uint64_t*		n_external_pages)
 {
 	dict_index_t*	index;
-	ulint		space;
-	ulint		zip_size;
 	buf_block_t*	block;
-	ulint		page_no;
 	const page_t*	page;
 	mem_heap_t*	heap;
 	const rec_t*	rec;
@@ -1534,15 +1515,15 @@ dict_stats_analyze_index_below_cur(
 	rec_offs_set_n_alloc(offsets1, size);
 	rec_offs_set_n_alloc(offsets2, size);
 
-	space = dict_index_get_space(index);
-	zip_size = dict_table_zip_size(index->table);
-
 	rec = btr_cur_get_rec(cur);
 
 	offsets_rec = rec_get_offsets(rec, index, offsets1,
 				      ULINT_UNDEFINED, &heap);
 
-	page_no = btr_node_ptr_get_child_page_no(rec, offsets_rec);
+	page_id_t		page_id(dict_index_get_space(index),
+					btr_node_ptr_get_child_page_no(
+						rec, offsets_rec));
+	const page_size_t	page_size(dict_table_page_size(index->table));
 
 	/* assume no external pages by default - in case we quit from this
 	function without analyzing any leaf pages */
@@ -1553,9 +1534,11 @@ dict_stats_analyze_index_below_cur(
 	/* descend to the leaf level on the B-tree */
 	for (;;) {
 
-		block = buf_page_get_gen(space, zip_size, page_no, RW_S_LATCH,
+		dberr_t err = DB_SUCCESS;
+
+		block = buf_page_get_gen(page_id, page_size, RW_S_LATCH,
 					 NULL /* no guessed block */,
-					 BUF_GET, __FILE__, __LINE__, &mtr);
+					 BUF_GET, __FILE__, __LINE__, &mtr, &err);
 
 		page = buf_block_get_frame(block);
 
@@ -1599,7 +1582,8 @@ dict_stats_analyze_index_below_cur(
 
 		/* we have a non-boring record in rec, descend below it */
 
-		page_no = btr_node_ptr_get_child_page_no(rec, offsets_rec);
+		page_id.set_page_no(
+			btr_node_ptr_get_child_page_no(rec, offsets_rec));
 	}
 
 	/* make sure we got a leaf page as a result from the above loop */
@@ -1693,20 +1677,20 @@ dict_stats_analyze_index_for_n_prefix(
 	ib_uint64_t	i;
 
 #if 0
-	DEBUG_PRINTF("    %s(table=%s, index=%s, level=%lu, n_prefix=%lu, "
-		     "n_diff_on_level=" UINT64PF ")\n",
+	DEBUG_PRINTF("    %s(table=%s, index=%s, level=%lu, n_prefix=%lu,"
+		     " n_diff_on_level=" UINT64PF ")\n",
 		     __func__, index->table->name, index->name, level,
 		     n_prefix, n_diff_data->n_diff_on_level);
 #endif
 
 	ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
-				MTR_MEMO_S_LOCK));
+				MTR_MEMO_SX_LOCK));
 
 	/* Position pcur on the leftmost record on the leftmost page
 	on the desired level. */
 
 	btr_pcur_open_at_index_side(
-		true, index, BTR_SEARCH_LEAF | BTR_ALREADY_S_LATCHED,
+		true, index, BTR_SEARCH_TREE | BTR_ALREADY_S_LATCHED,
 		&pcur, true, n_diff_data->level, mtr);
 	btr_pcur_move_to_next_on_page(&pcur);
 
@@ -1833,7 +1817,7 @@ dict_stats_analyze_index_for_n_prefix(
 						   &n_external_pages);
 
 		/* We adjust n_diff_on_leaf_page here to avoid counting
-		one record twice - once as the last on some page and once
+		one value twice - once as the last on some page and once
 		as the first on another page. Consider the following example:
 		Leaf level:
 		page: (2,2,2,2,3,3)
@@ -1881,7 +1865,7 @@ dict_stats_index_set_n_diff(
 		ut_ad(data->n_leaf_pages_to_analyze > 0);
 		ut_ad(data->n_recs_on_level > 0);
 
-		ulint	n_ordinary_leaf_pages;
+		ib_uint64_t	n_ordinary_leaf_pages;
 
 		if (data->level == 1) {
 			/* If we know the number of records on level 1, then
@@ -1953,10 +1937,15 @@ dict_stats_analyze_index(
 	ulint		size;
 	DBUG_ENTER("dict_stats_analyze_index");
 
-	DBUG_PRINT("info", ("index: %s, online status: %d", index->name,
+	DBUG_PRINT("info", ("index: %s, online status: %d", index->name(),
 			    dict_index_get_online_status(index)));
 
-	DEBUG_PRINTF("  %s(index=%s)\n", __func__, index->name);
+	/* Disable update statistic for Rtree */
+	if (dict_index_is_spatial(index)) {
+		DBUG_VOID_RETURN;
+	}
+
+	DEBUG_PRINTF("  %s(index=%s)\n", __func__, index->name());
 
 	dict_stats_empty_index(index, false);
 
@@ -1987,7 +1976,7 @@ dict_stats_analyze_index(
 
 	mtr_start(&mtr);
 
-	mtr_s_lock(dict_index_get_lock(index), &mtr);
+	mtr_sx_lock(dict_index_get_lock(index), &mtr);
 
 	root_level = btr_height_get(index, &mtr);
 
@@ -2006,11 +1995,11 @@ dict_stats_analyze_index(
 	    || N_SAMPLE_PAGES(index) * n_uniq > index->stat_n_leaf_pages) {
 
 		if (root_level == 0) {
-			DEBUG_PRINTF("  %s(): just one page, "
-				     "doing full scan\n", __func__);
+			DEBUG_PRINTF("  %s(): just one page,"
+				     " doing full scan\n", __func__);
 		} else {
-			DEBUG_PRINTF("  %s(): too many pages requested for "
-				     "sampling, doing full scan\n", __func__);
+			DEBUG_PRINTF("  %s(): too many pages requested for"
+				     " sampling, doing full scan\n", __func__);
 		}
 
 		/* do full scan of level 0; save results directly
@@ -2036,16 +2025,18 @@ dict_stats_analyze_index(
 
 	/* For each level that is being scanned in the btree, this contains the
 	number of different key values for all possible n-column prefixes. */
-	ib_uint64_t*		n_diff_on_level = new ib_uint64_t[n_uniq];
+	ib_uint64_t*	n_diff_on_level = UT_NEW_ARRAY(
+		ib_uint64_t, n_uniq, mem_key_dict_stats_n_diff_on_level);
 
 	/* For each level that is being scanned in the btree, this contains the
 	index of the last record from each group of equal records (when
 	comparing only the first n columns, n=1..n_uniq). */
-	boundaries_t*		n_diff_boundaries = new boundaries_t[n_uniq];
+	boundaries_t*	n_diff_boundaries = UT_NEW_ARRAY_NOKEY(boundaries_t,
+							       n_uniq);
 
 	/* For each n-column prefix this array contains the input data that is
 	used to calculate dict_index_t::stat_n_diff_key_vals[]. */
-	n_diff_data_t*		n_diff_data = new n_diff_data_t[n_uniq];
+	n_diff_data_t*	n_diff_data = UT_NEW_ARRAY_NOKEY(n_diff_data_t, n_uniq);
 
 	/* total_recs is also used to estimate the number of pages on one
 	level below, so at the start we have 1 page (the root) */
@@ -2066,15 +2057,15 @@ dict_stats_analyze_index(
 
 	for (n_prefix = n_uniq; n_prefix >= 1; n_prefix--) {
 
-		DEBUG_PRINTF("  %s(): searching level with >=%llu "
-			     "distinct records, n_prefix=%lu\n",
+		DEBUG_PRINTF("  %s(): searching level with >=%llu"
+			     " distinct records, n_prefix=%lu\n",
 			     __func__, N_DIFF_REQUIRED(index), n_prefix);
 
 		/* Commit the mtr to release the tree S lock to allow
 		other threads to do some work too. */
 		mtr_commit(&mtr);
 		mtr_start(&mtr);
-		mtr_s_lock(dict_index_get_lock(index), &mtr);
+		mtr_sx_lock(dict_index_get_lock(index), &mtr);
 		if (root_level != btr_height_get(index, &mtr)) {
 			/* Just quit if the tree has changed beyond
 			recognition here. The old stats from previous
@@ -2213,9 +2204,9 @@ found_level:
 
 	mtr_commit(&mtr);
 
-	delete[] n_diff_boundaries;
+	UT_DELETE_ARRAY(n_diff_boundaries);
 
-	delete[] n_diff_on_level;
+	UT_DELETE_ARRAY(n_diff_on_level);
 
 	/* n_prefix == 0 means that the above loop did not end up prematurely
 	due to tree being changed and so n_diff_data[] is set up. */
@@ -2223,7 +2214,7 @@ found_level:
 		dict_stats_index_set_n_diff(n_diff_data, index);
 	}
 
-	delete[] n_diff_data;
+	UT_DELETE_ARRAY(n_diff_data);
 
 	dict_stats_assert_initialized_index(index);
 	DBUG_VOID_RETURN;
@@ -2261,7 +2252,7 @@ dict_stats_update_persistent(
 		return(DB_CORRUPTION);
 	}
 
-	ut_ad(!dict_index_is_univ(index));
+	ut_ad(!dict_index_is_ibuf(index));
 
 	dict_stats_analyze_index(index);
 
@@ -2279,9 +2270,9 @@ dict_stats_update_persistent(
 	     index != NULL;
 	     index = dict_table_get_next_index(index)) {
 
-		ut_ad(!dict_index_is_univ(index));
+		ut_ad(!dict_index_is_ibuf(index));
 
-		if (index->type & DICT_FTS) {
+		if (index->type & DICT_FTS || dict_index_is_spatial(index)) {
 			continue;
 		}
 
@@ -2325,7 +2316,6 @@ storage.
 allocate and free the trx object. If it is not NULL then it will be
 rolled back only in the case of error, but not freed.
 @return DB_SUCCESS or error code */
-static
 dberr_t
 dict_stats_save_index_stat(
 	dict_index_t*	index,
@@ -2336,23 +2326,20 @@ dict_stats_save_index_stat(
 	const char*	stat_description,
 	trx_t*		trx)
 {
-	pars_info_t*	pinfo;
 	dberr_t		ret;
+	pars_info_t*	pinfo;
 	char		db_utf8[MAX_DB_UTF8_LEN];
 	char		table_utf8[MAX_TABLE_UTF8_LEN];
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
 	ut_ad(mutex_own(&dict_sys->mutex));
 
-	dict_fs2utf8(index->table->name, db_utf8, sizeof(db_utf8),
+	dict_fs2utf8(index->table->name.m_name, db_utf8, sizeof(db_utf8),
 		     table_utf8, sizeof(table_utf8));
 
 	pinfo = pars_info_create();
 	pars_info_add_str_literal(pinfo, "database_name", db_utf8);
 	pars_info_add_str_literal(pinfo, "table_name", table_utf8);
-	UNIV_MEM_ASSERT_RW_ABORT(index->name, strlen(index->name));
 	pars_info_add_str_literal(pinfo, "index_name", index->name);
 	UNIV_MEM_ASSERT_RW_ABORT(&last_update, 4);
 	pars_info_add_int4_literal(pinfo, "last_update", last_update);
@@ -2400,17 +2387,11 @@ dict_stats_save_index_stat(
 	if (ret != DB_SUCCESS) {
 		if (innodb_index_stats_not_found == false &&
 		    index->stats_error_printed == false) {
-			char	buf_table[MAX_FULL_NAME_LEN];
-			char	buf_index[MAX_FULL_NAME_LEN];
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				" InnoDB: Cannot save index statistics for table "
-				"%s, index %s, stat name \"%s\": %s\n",
-				ut_format_name(index->table->name, TRUE,
-					buf_table, sizeof(buf_table)),
-				ut_format_name(index->name, FALSE,
-					buf_index, sizeof(buf_index)),
-				stat_name, ut_strerr(ret));
+		ib::error() << "Cannot save index statistics for table "
+			<< index->table->name
+			<< ", index " << index->name
+			<< ", stat name \"" << stat_name << "\": "
+			<< ut_strerr(ret);
 			index->stats_error_printed = true;
 		}
 	}
@@ -2419,15 +2400,14 @@ dict_stats_save_index_stat(
 }
 
 /** Save the table's statistics into the persistent statistics storage.
-@param[in] table_orig	table whose stats to save
-@param[in] only_for_index if this is non-NULL, then stats for indexes
-that are not equal to it will not be saved, if NULL, then all
-indexes' stats are saved
+@param[in]	table_orig	table whose stats to save
+@param[in]	only_for_index	if this is non-NULL, then stats for indexes
+that are not equal to it will not be saved, if NULL, then all indexes' stats
+are saved
 @return DB_SUCCESS or error code */
 static
 dberr_t
 dict_stats_save(
-/*============*/
 	dict_table_t*		table_orig,
 	const index_id_t*	only_for_index)
 {
@@ -2440,10 +2420,10 @@ dict_stats_save(
 
 	table = dict_stats_snapshot_create(table_orig);
 
-	dict_fs2utf8(table->name, db_utf8, sizeof(db_utf8),
+	dict_fs2utf8(table->name.m_name, db_utf8, sizeof(db_utf8),
 		     table_utf8, sizeof(table_utf8));
 
-	rw_lock_x_lock(&dict_operation_lock);
+	rw_lock_x_lock(dict_operation_lock);
 	mutex_enter(&dict_sys->mutex);
 
 	/* MySQL's timestamp is 4 byte, so we use
@@ -2485,16 +2465,11 @@ dict_stats_save(
 		"END;", NULL);
 
 	if (ret != DB_SUCCESS) {
-		char	buf[MAX_FULL_NAME_LEN];
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: Cannot save table statistics for table "
-			"%s: %s\n",
-			ut_format_name(table->name, TRUE, buf, sizeof(buf)),
-			ut_strerr(ret));
+		ib::error() << "Cannot save table statistics for table "
+			<< table->name << ": " << ut_strerr(ret);
 
 		mutex_exit(&dict_sys->mutex);
-		rw_lock_x_unlock(&dict_operation_lock);
+		rw_lock_x_unlock(dict_operation_lock);
 
 		dict_stats_snapshot_free(table);
 
@@ -2502,10 +2477,17 @@ dict_stats_save(
 	}
 
 	trx_t*	trx = trx_allocate_for_background();
-	trx_start_if_not_started(trx);
+
+	if (srv_read_only_mode) {
+		trx_start_internal_read_only(trx);
+	} else {
+		trx_start_internal(trx);
+	}
 
 	dict_index_t*	index;
-	index_map_t	indexes;
+	index_map_t	indexes(
+		(ut_strcmp_functor()),
+		index_map_t_allocator(mem_key_dict_stats_index_map_t));
 
 	/* Below we do all the modifications in innodb_index_stats in a single
 	transaction for performance reasons. Modifying more than one row in a
@@ -2540,7 +2522,7 @@ dict_stats_save(
 			continue;
 		}
 
-		ut_ad(!dict_index_is_univ(index));
+		ut_ad(!dict_index_is_ibuf(index));
 
 		for (ulint i = 0; i < index->n_uniq; i++) {
 
@@ -2551,10 +2533,10 @@ dict_stats_save(
 			ut_snprintf(stat_name, sizeof(stat_name),
 				    "n_diff_pfx%02lu", i + 1);
 
-			/* craft a string that contains the columns names */
+			/* craft a string that contains the column names */
 			ut_snprintf(stat_description,
 				    sizeof(stat_description),
-				    "%s", index->fields[0].name);
+				    "%s", index->fields[0].name());
 			for (j = 1; j <= i; j++) {
 				size_t	len;
 
@@ -2562,7 +2544,7 @@ dict_stats_save(
 
 				ut_snprintf(stat_description + len,
 					    sizeof(stat_description) - len,
-					    ",%s", index->fields[j].name);
+					    ",%s", index->fields[j].name());
 			}
 
 			ret = dict_stats_save_index_stat(
@@ -2601,7 +2583,7 @@ end:
 	trx_free_for_background(trx);
 
 	mutex_exit(&dict_sys->mutex);
-	rw_lock_x_unlock(&dict_operation_lock);
+	rw_lock_x_unlock(dict_operation_lock);
 
 	dict_stats_snapshot_free(table);
 
@@ -2760,7 +2742,8 @@ dict_stats_fetch_index_stats_step(
 			     index != NULL;
 			     index = dict_table_get_next_index(index)) {
 
-				if (strlen(index->name) == len
+				if (index->is_committed()
+				    && strlen(index->name) == len
 				    && memcmp(index->name, data, len) == 0) {
 					/* the corresponding index was found */
 					break;
@@ -2886,24 +2869,19 @@ dict_stats_fetch_index_stats_step(
 			char	db_utf8[MAX_DB_UTF8_LEN];
 			char	table_utf8[MAX_TABLE_UTF8_LEN];
 
-			dict_fs2utf8(table->name, db_utf8, sizeof(db_utf8),
+			dict_fs2utf8(table->name.m_name,
+				     db_utf8, sizeof(db_utf8),
 				     table_utf8, sizeof(table_utf8));
 
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				" InnoDB: Ignoring strange row from "
-				"%s WHERE "
-				"database_name = '%s' AND "
-				"table_name = '%s' AND "
-				"index_name = '%s' AND "
-				"stat_name = '%.*s'; because stat_name "
-				"is malformed\n",
-				INDEX_STATS_NAME_PRINT,
-				db_utf8,
-				table_utf8,
-				index->name,
-				(int) stat_name_len,
-				stat_name);
+			ib::info	out;
+			out << "Ignoring strange row from "
+				<< INDEX_STATS_NAME_PRINT << " WHERE"
+				" database_name = '" << db_utf8
+				<< "' AND table_name = '" << table_utf8
+				<< "' AND index_name = '" << index->name()
+				<< "' AND stat_name = '";
+			out.write(stat_name, stat_name_len);
+			out << "'; because stat_name is malformed";
 			return(TRUE);
 		}
 		/* else */
@@ -2919,26 +2897,21 @@ dict_stats_fetch_index_stats_step(
 			char	db_utf8[MAX_DB_UTF8_LEN];
 			char	table_utf8[MAX_TABLE_UTF8_LEN];
 
-			dict_fs2utf8(table->name, db_utf8, sizeof(db_utf8),
+			dict_fs2utf8(table->name.m_name,
+				     db_utf8, sizeof(db_utf8),
 				     table_utf8, sizeof(table_utf8));
 
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				" InnoDB: Ignoring strange row from "
-				"%s WHERE "
-				"database_name = '%s' AND "
-				"table_name = '%s' AND "
-				"index_name = '%s' AND "
-				"stat_name = '%.*s'; because stat_name is "
-				"out of range, the index has %lu unique "
-				"columns\n",
-				INDEX_STATS_NAME_PRINT,
-				db_utf8,
-				table_utf8,
-				index->name,
-				(int) stat_name_len,
-				stat_name,
-				n_uniq);
+			ib::info	out;
+			out << "Ignoring strange row from "
+				<< INDEX_STATS_NAME_PRINT << " WHERE"
+				" database_name = '" << db_utf8
+				<< "' AND table_name = '" << table_utf8
+				<< "' AND index_name = '" << index->name()
+				<< "' AND stat_name = '";
+			out.write(stat_name, stat_name_len);
+			out << "'; because stat_name is out of range, the index"
+				" has " << n_uniq << " unique columns";
+
 			return(TRUE);
 		}
 		/* else */
@@ -2997,9 +2970,13 @@ dict_stats_fetch_from_ps(
 
 	trx->isolation_level = TRX_ISO_READ_UNCOMMITTED;
 
-	trx_start_if_not_started(trx);
+	if (srv_read_only_mode) {
+		trx_start_internal_read_only(trx);
+	} else {
+		trx_start_internal(trx);
+	}
 
-	dict_fs2utf8(table->name, db_utf8, sizeof(db_utf8),
+	dict_fs2utf8(table->name.m_name, db_utf8, sizeof(db_utf8),
 		     table_utf8, sizeof(table_utf8));
 
 	pinfo = pars_info_create();
@@ -3106,7 +3083,6 @@ dict_stats_empty_defrag_modified_counter(
 
 /*********************************************************************//**
 Fetches or calculates new estimates for index statistics. */
-UNIV_INTERN
 void
 dict_stats_update_for_index(
 /*========================*/
@@ -3131,18 +3107,13 @@ dict_stats_update_for_index(
 		    index->stats_error_printed == false) {
 			/* Fall back to transient stats since the persistent
 			storage is not present or is corrupted */
-			char	buf_table[MAX_FULL_NAME_LEN];
-			char	buf_index[MAX_FULL_NAME_LEN];
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				" InnoDB: Recalculation of persistent statistics "
-				"requested for table %s index %s but the required "
-				"persistent statistics storage is not present or is "
-				"corrupted. Using transient stats instead.\n",
-				ut_format_name(index->table->name, TRUE,
-					buf_table, sizeof(buf_table)),
-				ut_format_name(index->name, FALSE,
-					buf_index, sizeof(buf_index)));
+
+		ib::info() << "Recalculation of persistent statistics"
+			" requested for table " << index->table->name
+			<< " index " << index->name
+			<< " but the required"
+			" persistent statistics storage is not present or is"
+			" corrupted. Using transient stats instead.";
 			index->stats_error_printed = false;
 		}
 	}
@@ -3158,7 +3129,6 @@ dict_stats_update_for_index(
 Calculates new estimates for table and index statistics. The statistics
 are used in query optimization.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 dict_stats_update(
 /*==============*/
@@ -3169,17 +3139,15 @@ dict_stats_update(
 					the persistent statistics
 					storage */
 {
-	char			buf[MAX_FULL_NAME_LEN];
-
 	ut_ad(!mutex_own(&dict_sys->mutex));
 
 	if (table->ibd_file_missing) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: cannot calculate statistics for table %s "
-			"because the .ibd file is missing. For help, please "
-			"refer to " REFMAN "innodb-troubleshooting.html\n",
-			ut_format_name(table->name, TRUE, buf, sizeof(buf)));
+
+		ib::warn() << "Cannot calculate statistics for table "
+			<< table->name
+			<< " because the .ibd file is missing. "
+			<< TROUBLESHOOTING_MSG;
+
 		dict_stats_empty_table(table, true);
 		return(DB_TABLESPACE_DELETED);
 	} else if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
@@ -3205,7 +3173,7 @@ dict_stats_update(
 
 		/* InnoDB internal tables (e.g. SYS_TABLES) cannot have
 		persistent stats enabled */
-		ut_a(strchr(table->name, '/') != NULL);
+		ut_a(strchr(table->name.m_name, '/') != NULL);
 
 		/* check if the persistent statistics storage exists
 		before calling the potentially slow function
@@ -3231,13 +3199,12 @@ dict_stats_update(
 
 		if (innodb_table_stats_not_found == false &&
 		    table->stats_error_printed == false) {
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				" InnoDB: Recalculation of persistent statistics "
-				"requested for table %s but the required persistent "
-				"statistics storage is not present or is corrupted. "
-				"Using transient stats instead.\n",
-				ut_format_name(table->name, TRUE, buf, sizeof(buf)));
+		ib::warn() << "Recalculation of persistent statistics"
+			" requested for table "
+			<< table->name
+			<< " but the required persistent"
+			" statistics storage is not present or is corrupted."
+			" Using transient stats instead.";
 			table->stats_error_printed = true;
 		}
 
@@ -3277,7 +3244,7 @@ dict_stats_update(
 
 		/* InnoDB internal tables (e.g. SYS_TABLES) cannot have
 		persistent stats enabled */
-		ut_a(strchr(table->name, '/') != NULL);
+		ut_a(strchr(table->name.m_name, '/') != NULL);
 
 		if (!dict_stats_persistent_storage_check(false)) {
 			/* persistent statistics storage does not exist
@@ -3285,18 +3252,15 @@ dict_stats_update(
 
 			if (innodb_table_stats_not_found == false &&
 			    table->stats_error_printed == false) {
-				ut_print_timestamp(stderr);
-				fprintf(stderr,
-					" InnoDB: Error: Fetch of persistent "
-					"statistics requested for table %s but the "
-					"required system tables %s and %s are not "
-					"present or have unexpected structure. "
-					"Using transient stats instead.\n",
-					ut_format_name(table->name, TRUE,
-						buf, sizeof(buf)),
-					TABLE_STATS_NAME_PRINT,
-					INDEX_STATS_NAME_PRINT);
-				table->stats_error_printed = true;
+				ib::error() << "Fetch of persistent statistics"
+					" requested for table "
+					<< table->name
+					<< " but the required system tables "
+					<< TABLE_STATS_NAME_PRINT
+					<< " and " << INDEX_STATS_NAME_PRINT
+					<< " are not present or have unexpected"
+					" structure. Using transient stats instead.";
+					table->stats_error_printed = true;
 			}
 
 			goto transient;
@@ -3348,20 +3312,18 @@ dict_stats_update(
 						DICT_STATS_RECALC_PERSISTENT));
 			}
 
-			ut_format_name(table->name, TRUE, buf, sizeof(buf));
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				" InnoDB: Trying to use table %s which has "
-				"persistent statistics enabled, but auto "
-				"recalculation turned off and the statistics "
-				"do not exist in %s and %s. Please either run "
-				"\"ANALYZE TABLE %s;\" manually or enable the "
-				"auto recalculation with "
-				"\"ALTER TABLE %s STATS_AUTO_RECALC=1;\". "
-				"InnoDB will now use transient statistics for "
-				"%s.\n",
-				buf, TABLE_STATS_NAME, INDEX_STATS_NAME, buf,
-				buf, buf);
+			ib::info() << "Trying to use table " << table->name
+				<< " which has persistent statistics enabled,"
+				" but auto recalculation turned off and the"
+				" statistics do not exist in "
+				TABLE_STATS_NAME_PRINT
+				" and " INDEX_STATS_NAME_PRINT
+				". Please either run \"ANALYZE TABLE "
+				<< table->name << ";\" manually or enable the"
+				" auto recalculation with \"ALTER TABLE "
+				<< table->name << " STATS_AUTO_RECALC=1;\"."
+				" InnoDB will now use transient statistics for "
+				<< table->name << ".";
 
 			goto transient;
 		default:
@@ -3370,16 +3332,12 @@ dict_stats_update(
 
 			if (innodb_table_stats_not_found == false &&
 			    table->stats_error_printed == false) {
-				ut_print_timestamp(stderr);
-				fprintf(stderr,
-					" InnoDB: Error fetching persistent statistics "
-					"for table %s from %s and %s: %s. "
-					"Using transient stats method instead.\n",
-					ut_format_name(table->name, TRUE, buf,
-						sizeof(buf)),
-					TABLE_STATS_NAME,
-					INDEX_STATS_NAME,
-					ut_strerr(err));
+				ib::error() << "Error fetching persistent statistics"
+					" for table "
+					<< table->name
+					<< " from " TABLE_STATS_NAME_PRINT " and "
+					INDEX_STATS_NAME_PRINT ": " << ut_strerr(err)
+					<< ". Using transient stats method instead.";
 			}
 
 			goto transient;
@@ -3410,7 +3368,6 @@ marko: If ibuf merges are not disabled, we need to scan the *.ibd files.
 But we shouldn't open *.ibd files before we have rolled back dict
 transactions and opened the SYS_* records for the *.ibd files.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 dict_stats_drop_index(
 /*==================*/
@@ -3445,7 +3402,7 @@ dict_stats_drop_index(
 
 	pars_info_add_str_literal(pinfo, "index_name", iname);
 
-	rw_lock_x_lock(&dict_operation_lock);
+	rw_lock_x_lock(dict_operation_lock);
 	mutex_enter(&dict_sys->mutex);
 
 	ret = dict_stats_exec_sql(
@@ -3459,7 +3416,7 @@ dict_stats_drop_index(
 		"END;\n", NULL);
 
 	mutex_exit(&dict_sys->mutex);
-	rw_lock_x_unlock(&dict_operation_lock);
+	rw_lock_x_unlock(dict_operation_lock);
 
 	if (ret == DB_STATS_DO_NOT_EXIST) {
 		ret = DB_SUCCESS;
@@ -3467,12 +3424,12 @@ dict_stats_drop_index(
 
 	if (ret != DB_SUCCESS) {
 		ut_snprintf(errstr, errstr_sz,
-			    "Unable to delete statistics for index %s "
-			    "from %s%s: %s. They can be deleted later using "
-			    "DELETE FROM %s WHERE "
-			    "database_name = '%s' AND "
-			    "table_name = '%s' AND "
-			    "index_name = '%s';",
+			    "Unable to delete statistics for index %s"
+			    " from %s%s: %s. They can be deleted later using"
+			    " DELETE FROM %s WHERE"
+			    " database_name = '%s' AND"
+			    " table_name = '%s' AND"
+			    " index_name = '%s';",
 			    iname,
 			    INDEX_STATS_NAME_PRINT,
 			    (ret == DB_LOCK_WAIT_TIMEOUT
@@ -3507,9 +3464,7 @@ dict_stats_delete_from_table_stats(
 	pars_info_t*	pinfo;
 	dberr_t		ret;
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
 	ut_ad(mutex_own(&dict_sys->mutex));
 
 	pinfo = pars_info_create();
@@ -3545,9 +3500,7 @@ dict_stats_delete_from_index_stats(
 	pars_info_t*	pinfo;
 	dberr_t		ret;
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
 	ut_ad(mutex_own(&dict_sys->mutex));
 
 	pinfo = pars_info_create();
@@ -3572,7 +3525,6 @@ Removes the statistics for a table and all of its indexes from the
 persistent statistics storage if it exists and if there is data stored for
 the table. This function creates its own transaction and commits it.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 dict_stats_drop_table(
 /*==================*/
@@ -3585,9 +3537,7 @@ dict_stats_drop_table(
 	char		table_utf8[MAX_TABLE_UTF8_LEN];
 	dberr_t		ret;
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
 	ut_ad(mutex_own(&dict_sys->mutex));
 
 	/* skip tables that do not contain a database name
@@ -3620,16 +3570,16 @@ dict_stats_drop_table(
 	if (ret != DB_SUCCESS) {
 
 		ut_snprintf(errstr, errstr_sz,
-			    "Unable to delete statistics for table %s.%s: %s. "
-			    "They can be deleted later using "
+			    "Unable to delete statistics for table %s.%s: %s."
+			    " They can be deleted later using"
 
-			    "DELETE FROM %s WHERE "
-			    "database_name = '%s' AND "
-			    "table_name = '%s'; "
+			    " DELETE FROM %s WHERE"
+			    " database_name = '%s' AND"
+			    " table_name = '%s';"
 
-			    "DELETE FROM %s WHERE "
-			    "database_name = '%s' AND "
-			    "table_name = '%s';",
+			    " DELETE FROM %s WHERE"
+			    " database_name = '%s' AND"
+			    " table_name = '%s';",
 
 			    db_utf8, table_utf8,
 			    ut_strerr(ret),
@@ -3653,8 +3603,8 @@ Creates its own transaction and commits it.
 @return DB_SUCCESS or error code */
 UNIV_INLINE
 dberr_t
-dict_stats_rename_in_table_stats(
-/*=============================*/
+dict_stats_rename_table_in_table_stats(
+/*===================================*/
 	const char*	old_dbname_utf8,/*!< in: database name, e.g. 'olddb' */
 	const char*	old_tablename_utf8,/*!< in: table name, e.g. 'oldtable' */
 	const char*	new_dbname_utf8,/*!< in: database name, e.g. 'newdb' */
@@ -3663,9 +3613,7 @@ dict_stats_rename_in_table_stats(
 	pars_info_t*	pinfo;
 	dberr_t		ret;
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
 	ut_ad(mutex_own(&dict_sys->mutex));
 
 	pinfo = pars_info_create();
@@ -3677,7 +3625,7 @@ dict_stats_rename_in_table_stats(
 
 	ret = dict_stats_exec_sql(
 		pinfo,
-		"PROCEDURE RENAME_IN_TABLE_STATS () IS\n"
+		"PROCEDURE RENAME_TABLE_IN_TABLE_STATS () IS\n"
 		"BEGIN\n"
 		"UPDATE \"" TABLE_STATS_NAME "\" SET\n"
 		"database_name = :new_dbname_utf8,\n"
@@ -3699,8 +3647,8 @@ Creates its own transaction and commits it.
 @return DB_SUCCESS or error code */
 UNIV_INLINE
 dberr_t
-dict_stats_rename_in_index_stats(
-/*=============================*/
+dict_stats_rename_table_in_index_stats(
+/*===================================*/
 	const char*	old_dbname_utf8,/*!< in: database name, e.g. 'olddb' */
 	const char*	old_tablename_utf8,/*!< in: table name, e.g. 'oldtable' */
 	const char*	new_dbname_utf8,/*!< in: database name, e.g. 'newdb' */
@@ -3709,9 +3657,7 @@ dict_stats_rename_in_index_stats(
 	pars_info_t*	pinfo;
 	dberr_t		ret;
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
 	ut_ad(mutex_own(&dict_sys->mutex));
 
 	pinfo = pars_info_create();
@@ -3723,7 +3669,7 @@ dict_stats_rename_in_index_stats(
 
 	ret = dict_stats_exec_sql(
 		pinfo,
-		"PROCEDURE RENAME_IN_INDEX_STATS () IS\n"
+		"PROCEDURE RENAME_TABLE_IN_INDEX_STATS () IS\n"
 		"BEGIN\n"
 		"UPDATE \"" INDEX_STATS_NAME "\" SET\n"
 		"database_name = :new_dbname_utf8,\n"
@@ -3740,7 +3686,6 @@ dict_stats_rename_in_index_stats(
 Renames a table in InnoDB persistent stats storage.
 This function creates its own transaction and commits it.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 dict_stats_rename_table(
 /*====================*/
@@ -3756,9 +3701,7 @@ dict_stats_rename_table(
 	char		new_table_utf8[MAX_TABLE_UTF8_LEN];
 	dberr_t		ret;
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(!rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(!rw_lock_own(dict_operation_lock, RW_LOCK_X));
 	ut_ad(!mutex_own(&dict_sys->mutex));
 
 	/* skip innodb_table_stats and innodb_index_stats themselves */
@@ -3776,14 +3719,14 @@ dict_stats_rename_table(
 	dict_fs2utf8(new_name, new_db_utf8, sizeof(new_db_utf8),
 		     new_table_utf8, sizeof(new_table_utf8));
 
-	rw_lock_x_lock(&dict_operation_lock);
+	rw_lock_x_lock(dict_operation_lock);
 	mutex_enter(&dict_sys->mutex);
 
 	ulint	n_attempts = 0;
 	do {
 		n_attempts++;
 
-		ret = dict_stats_rename_in_table_stats(
+		ret = dict_stats_rename_table_in_table_stats(
 			old_db_utf8, old_table_utf8,
 			new_db_utf8, new_table_utf8);
 
@@ -3798,9 +3741,9 @@ dict_stats_rename_table(
 
 		if (ret != DB_SUCCESS) {
 			mutex_exit(&dict_sys->mutex);
-			rw_lock_x_unlock(&dict_operation_lock);
+			rw_lock_x_unlock(dict_operation_lock);
 			os_thread_sleep(200000 /* 0.2 sec */);
-			rw_lock_x_lock(&dict_operation_lock);
+			rw_lock_x_lock(dict_operation_lock);
 			mutex_enter(&dict_sys->mutex);
 		}
 	} while ((ret == DB_DEADLOCK
@@ -3810,16 +3753,16 @@ dict_stats_rename_table(
 
 	if (ret != DB_SUCCESS) {
 		ut_snprintf(errstr, errstr_sz,
-			    "Unable to rename statistics from "
-			    "%s.%s to %s.%s in %s: %s. "
-			    "They can be renamed later using "
+			    "Unable to rename statistics from"
+			    " %s.%s to %s.%s in %s: %s."
+			    " They can be renamed later using"
 
-			    "UPDATE %s SET "
-			    "database_name = '%s', "
-			    "table_name = '%s' "
-			    "WHERE "
-			    "database_name = '%s' AND "
-			    "table_name = '%s';",
+			    " UPDATE %s SET"
+			    " database_name = '%s',"
+			    " table_name = '%s'"
+			    " WHERE"
+			    " database_name = '%s' AND"
+			    " table_name = '%s';",
 
 			    old_db_utf8, old_table_utf8,
 			    new_db_utf8, new_table_utf8,
@@ -3830,7 +3773,7 @@ dict_stats_rename_table(
 			    new_db_utf8, new_table_utf8,
 			    old_db_utf8, old_table_utf8);
 		mutex_exit(&dict_sys->mutex);
-		rw_lock_x_unlock(&dict_operation_lock);
+		rw_lock_x_unlock(dict_operation_lock);
 		return(ret);
 	}
 	/* else */
@@ -3839,7 +3782,7 @@ dict_stats_rename_table(
 	do {
 		n_attempts++;
 
-		ret = dict_stats_rename_in_index_stats(
+		ret = dict_stats_rename_table_in_index_stats(
 			old_db_utf8, old_table_utf8,
 			new_db_utf8, new_table_utf8);
 
@@ -3854,9 +3797,9 @@ dict_stats_rename_table(
 
 		if (ret != DB_SUCCESS) {
 			mutex_exit(&dict_sys->mutex);
-			rw_lock_x_unlock(&dict_operation_lock);
+			rw_lock_x_unlock(dict_operation_lock);
 			os_thread_sleep(200000 /* 0.2 sec */);
-			rw_lock_x_lock(&dict_operation_lock);
+			rw_lock_x_lock(dict_operation_lock);
 			mutex_enter(&dict_sys->mutex);
 		}
 	} while ((ret == DB_DEADLOCK
@@ -3865,20 +3808,20 @@ dict_stats_rename_table(
 		 && n_attempts < 5);
 
 	mutex_exit(&dict_sys->mutex);
-	rw_lock_x_unlock(&dict_operation_lock);
+	rw_lock_x_unlock(dict_operation_lock);
 
 	if (ret != DB_SUCCESS) {
 		ut_snprintf(errstr, errstr_sz,
-			    "Unable to rename statistics from "
-			    "%s.%s to %s.%s in %s: %s. "
-			    "They can be renamed later using "
+			    "Unable to rename statistics from"
+			    " %s.%s to %s.%s in %s: %s."
+			    " They can be renamed later using"
 
-			    "UPDATE %s SET "
-			    "database_name = '%s', "
-			    "table_name = '%s' "
-			    "WHERE "
-			    "database_name = '%s' AND "
-			    "table_name = '%s';",
+			    " UPDATE %s SET"
+			    " database_name = '%s',"
+			    " table_name = '%s'"
+			    " WHERE"
+			    " database_name = '%s' AND"
+			    " table_name = '%s';",
 
 			    old_db_utf8, old_table_utf8,
 			    new_db_utf8, new_table_utf8,
@@ -3894,118 +3837,64 @@ dict_stats_rename_table(
 }
 
 /*********************************************************************//**
-Save defragmentation result.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
+Renames an index in InnoDB persistent stats storage.
+This function creates its own transaction and commits it.
+@return DB_SUCCESS or error code. DB_STATS_DO_NOT_EXIST will be returned
+if the persistent stats do not exist. */
 dberr_t
-dict_stats_save_defrag_summary(
-	dict_index_t*	index)	/*!< in: index */
+dict_stats_rename_index(
+/*====================*/
+	const dict_table_t*	table,		/*!< in: table whose index
+						is renamed */
+	const char*		old_index_name,	/*!< in: old index name */
+	const char*		new_index_name)	/*!< in: new index name */
 {
-	dberr_t	ret;
-	lint	now = (lint) ut_time();
-	if (dict_index_is_univ(index)) {
-		return DB_SUCCESS;
-	}
-	rw_lock_x_lock(&dict_operation_lock);
+	rw_lock_x_lock(dict_operation_lock);
 	mutex_enter(&dict_sys->mutex);
-	ret = dict_stats_save_index_stat(index, now, "n_pages_freed",
-					 index->stat_defrag_n_pages_freed,
-					 NULL,
-					 "Number of pages freed during"
-					 " last defragmentation run.",
-					 NULL);
 
-	mutex_exit(&dict_sys->mutex);
-	rw_lock_x_unlock(&dict_operation_lock);
-	return (ret);
-}
+	if (!dict_stats_persistent_storage_check(true)) {
+		mutex_exit(&dict_sys->mutex);
+		rw_lock_x_unlock(dict_operation_lock);
+		return(DB_STATS_DO_NOT_EXIST);
+	}
+
+	char	dbname_utf8[MAX_DB_UTF8_LEN];
+	char	tablename_utf8[MAX_TABLE_UTF8_LEN];
+
+	dict_fs2utf8(table->name.m_name, dbname_utf8, sizeof(dbname_utf8),
+		     tablename_utf8, sizeof(tablename_utf8));
+
+	pars_info_t*	pinfo;
+
+	pinfo = pars_info_create();
+
+	pars_info_add_str_literal(pinfo, "dbname_utf8", dbname_utf8);
+	pars_info_add_str_literal(pinfo, "tablename_utf8", tablename_utf8);
+	pars_info_add_str_literal(pinfo, "new_index_name", new_index_name);
+	pars_info_add_str_literal(pinfo, "old_index_name", old_index_name);
 
-/*********************************************************************//**
-Save defragmentation stats for a given index.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-dict_stats_save_defrag_stats(
-	dict_index_t*	index)	/*!< in: index */
-{
 	dberr_t	ret;
 
-	if (index->table->ibd_file_missing) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: Cannot save defragment stats because "
-			".ibd file is missing.\n");
-		return (DB_TABLESPACE_DELETED);
-	}
-	if (dict_index_is_corrupted(index)) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: Cannot save defragment stats because "
-			"index is corrupted.\n");
-		return(DB_CORRUPTION);
-	}
+	ret = dict_stats_exec_sql(
+		pinfo,
+		"PROCEDURE RENAME_INDEX_IN_INDEX_STATS () IS\n"
+		"BEGIN\n"
+		"UPDATE \"" INDEX_STATS_NAME "\" SET\n"
+		"index_name = :new_index_name\n"
+		"WHERE\n"
+		"database_name = :dbname_utf8 AND\n"
+		"table_name = :tablename_utf8 AND\n"
+		"index_name = :old_index_name;\n"
+		"END;\n", NULL);
 
-	if (dict_index_is_univ(index)) {
-		return DB_SUCCESS;
-	}
-
-	lint	now = (lint) ut_time();
-	mtr_t	mtr;
-	ulint	n_leaf_pages;
-	ulint	n_leaf_reserved;
-	mtr_start(&mtr);
-	mtr_s_lock(dict_index_get_lock(index), &mtr);
-	n_leaf_reserved = btr_get_size_and_reserved(index, BTR_N_LEAF_PAGES,
-						    &n_leaf_pages, &mtr);
-	mtr_commit(&mtr);
-
-	if (n_leaf_reserved == ULINT_UNDEFINED) {
-		// The index name is different during fast index creation,
-		// so the stats won't be associated with the right index
-		// for later use. We just return without saving.
-		return DB_SUCCESS;
-	}
-
-	rw_lock_x_lock(&dict_operation_lock);
-
-	mutex_enter(&dict_sys->mutex);
-	ret = dict_stats_save_index_stat(index, now, "n_page_split",
-					 index->stat_defrag_n_page_split,
-					 NULL,
-					 "Number of new page splits on leaves"
-					 " since last defragmentation.",
-					 NULL);
-	if (ret != DB_SUCCESS) {
-		goto end;
-	}
-
-	ret = dict_stats_save_index_stat(
-		index, now, "n_leaf_pages_defrag",
-		n_leaf_pages,
-		NULL,
-		"Number of leaf pages when this stat is saved to disk",
-		NULL);
-	if (ret != DB_SUCCESS) {
-		goto end;
-	}
-
-	ret = dict_stats_save_index_stat(
-		index, now, "n_leaf_pages_reserved",
-		n_leaf_reserved,
-		NULL,
-		"Number of pages reserved for this index leaves when this stat "
-		"is saved to disk",
-		NULL);
-
-end:
 	mutex_exit(&dict_sys->mutex);
-	rw_lock_x_unlock(&dict_operation_lock);
+	rw_lock_x_unlock(dict_operation_lock);
 
-	return (ret);
+	return(ret);
 }
 
 /* tests @{ */
-#ifdef UNIV_COMPILE_TEST_FUNCS
+#ifdef UNIV_ENABLE_UNIT_TEST_DICT_STATS
 
 /* The following unit tests test some of the functions in this file
 individually, such testing cannot be performed by the mysql-test framework
@@ -4049,7 +3938,7 @@ test_dict_table_schema_check()
 	/* prevent any data dictionary modifications while we are checking
 	the tables' structure */
 
-	mutex_enter(&(dict_sys->mutex));
+	mutex_enter(&dict_sys->mutex);
 
 	/* check that a valid table is reported as valid */
 	schema.n_cols = 7;
@@ -4066,11 +3955,11 @@ test_dict_table_schema_check()
 	schema.columns[1].len = 8;
 	if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
 	    != DB_SUCCESS) {
-		printf("OK: test.tcheck.c02 has different length and is "
-		       "reported as corrupted\n");
+		printf("OK: test.tcheck.c02 has different length and is"
+		       " reported as corrupted\n");
 	} else {
-		printf("OK: test.tcheck.c02 has different length but is "
-		       "reported as ok\n");
+		printf("OK: test.tcheck.c02 has different length but is"
+		       " reported as ok\n");
 		goto test_dict_table_schema_check_end;
 	}
 	schema.columns[1].len = 4;
@@ -4080,11 +3969,11 @@ test_dict_table_schema_check()
 	schema.columns[1].prtype_mask |= DATA_NOT_NULL;
 	if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
 	    != DB_SUCCESS) {
-		printf("OK: test.tcheck.c02 does not have NOT NULL while "
-		       "it should and is reported as corrupted\n");
+		printf("OK: test.tcheck.c02 does not have NOT NULL while"
+		       " it should and is reported as corrupted\n");
 	} else {
-		printf("ERROR: test.tcheck.c02 does not have NOT NULL while "
-		       "it should and is not reported as corrupted\n");
+		printf("ERROR: test.tcheck.c02 does not have NOT NULL while"
+		       " it should and is not reported as corrupted\n");
 		goto test_dict_table_schema_check_end;
 	}
 	schema.columns[1].prtype_mask &= ~DATA_NOT_NULL;
@@ -4093,23 +3982,23 @@ test_dict_table_schema_check()
 	schema.n_cols = 6;
 	if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
 	    == DB_SUCCESS) {
-		printf("ERROR: test.tcheck has more columns but is not "
-		       "reported as corrupted\n");
+		printf("ERROR: test.tcheck has more columns but is not"
+		       " reported as corrupted\n");
 		goto test_dict_table_schema_check_end;
 	} else {
-		printf("OK: test.tcheck has more columns and is "
-		       "reported as corrupted\n");
+		printf("OK: test.tcheck has more columns and is"
+		       " reported as corrupted\n");
 	}
 
 	/* check a table that has some columns missing */
 	schema.n_cols = 8;
 	if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
 	    != DB_SUCCESS) {
-		printf("OK: test.tcheck has missing columns and is "
-		       "reported as corrupted\n");
+		printf("OK: test.tcheck has missing columns and is"
+		       " reported as corrupted\n");
 	} else {
-		printf("ERROR: test.tcheck has missing columns but is "
-		       "reported as ok\n");
+		printf("ERROR: test.tcheck has missing columns but is"
+		       " reported as ok\n");
 		goto test_dict_table_schema_check_end;
 	}
 
@@ -4125,7 +4014,7 @@ test_dict_table_schema_check()
 
 test_dict_table_schema_check_end:
 
-	mutex_exit(&(dict_sys->mutex));
+	mutex_exit(&dict_sys->mutex);
 }
 /* @} */
 
@@ -4177,13 +4066,13 @@ test_dict_stats_save()
 	dberr_t		ret;
 
 	/* craft a dummy dict_table_t */
-	table.name = (char*) (TEST_DATABASE_NAME "/" TEST_TABLE_NAME);
+	table.name.m_name = (char*) (TEST_DATABASE_NAME "/" TEST_TABLE_NAME);
 	table.stat_n_rows = TEST_N_ROWS;
 	table.stat_clustered_index_size = TEST_CLUSTERED_INDEX_SIZE;
 	table.stat_sum_of_other_index_sizes = TEST_SUM_OF_OTHER_INDEX_SIZES;
-	UT_LIST_INIT(table.indexes);
-	UT_LIST_ADD_LAST(indexes, table.indexes, &index1);
-	UT_LIST_ADD_LAST(indexes, table.indexes, &index2);
+	UT_LIST_INIT(table.indexes, &dict_index_t::indexes);
+	UT_LIST_ADD_LAST(table.indexes, &index1);
+	UT_LIST_ADD_LAST(table.indexes, &index2);
 	ut_d(table.magic_n = DICT_TABLE_MAGIC_N);
 	ut_d(index1.magic_n = DICT_INDEX_MAGIC_N);
 
@@ -4227,8 +4116,8 @@ test_dict_stats_save()
 
 	ut_a(ret == DB_SUCCESS);
 
-	printf("\nOK: stats saved successfully, now go ahead and read "
-	       "what's inside %s and %s:\n\n",
+	printf("\nOK: stats saved successfully, now go ahead and read"
+	       " what's inside %s and %s:\n\n",
 	       TABLE_STATS_NAME_PRINT,
 	       INDEX_STATS_NAME_PRINT);
 
@@ -4329,10 +4218,10 @@ test_dict_stats_fetch_from_ps()
 	dberr_t		ret;
 
 	/* craft a dummy dict_table_t */
-	table.name = (char*) (TEST_DATABASE_NAME "/" TEST_TABLE_NAME);
-	UT_LIST_INIT(table.indexes);
-	UT_LIST_ADD_LAST(indexes, table.indexes, &index1);
-	UT_LIST_ADD_LAST(indexes, table.indexes, &index2);
+	table.name.m_name = (char*) (TEST_DATABASE_NAME "/" TEST_TABLE_NAME);
+	UT_LIST_INIT(table.indexes, &dict_index_t::indexes);
+	UT_LIST_ADD_LAST(table.indexes, &index1);
+	UT_LIST_ADD_LAST(table.indexes, &index2);
 	ut_d(table.magic_n = DICT_TABLE_MAGIC_N);
 
 	index1.name = TEST_IDX1_NAME;
@@ -4390,7 +4279,7 @@ test_dict_stats_all()
 }
 /* @} */
 
-#endif /* UNIV_COMPILE_TEST_FUNCS */
+#endif /* UNIV_ENABLE_UNIT_TEST_DICT_STATS */
 /* @} */
 
 #endif /* UNIV_HOTBACKUP */
diff --git a/storage/innobase/dict/dict0stats_bg.cc b/storage/innobase/dict/dict0stats_bg.cc
index 7aefa6a1d4d..dbc8e90bed6 100644
--- a/storage/innobase/dict/dict0stats_bg.cc
+++ b/storage/innobase/dict/dict0stats_bg.cc
@@ -23,11 +23,13 @@ Code used for background table and index stats gathering.
 Created Apr 25, 2012 Vasil Dimov
 *******************************************************/
 
-#include "row0mysql.h"
-#include "srv0start.h"
 #include "dict0dict.h"
 #include "dict0stats.h"
 #include "dict0stats_bg.h"
+#include "dict0defrag_bg.h"
+#include "row0mysql.h"
+#include "srv0start.h"
+#include "ut0new.h"
 
 #ifdef UNIV_NONINL
 # include "dict0stats_bg.ic"
@@ -41,47 +43,65 @@ Created Apr 25, 2012 Vasil Dimov
 #define SHUTTING_DOWN()		(srv_shutdown_state != SRV_SHUTDOWN_NONE)
 
 /** Event to wake up the stats thread */
-UNIV_INTERN os_event_t		dict_stats_event = NULL;
+os_event_t			dict_stats_event = NULL;
+
+/** Variable to initiate shutdown the dict stats thread. Note we don't
+use 'srv_shutdown_state' because we want to shutdown dict stats thread
+before purge thread. */
+bool				dict_stats_start_shutdown = false;
+
+/** Event to wait for shutdown of the dict stats thread */
+os_event_t			dict_stats_shutdown_event = NULL;
+
+#ifdef UNIV_DEBUG
+/** Used by SET GLOBAL innodb_dict_stats_disabled_debug = 1; */
+my_bool				innodb_dict_stats_disabled_debug;
+
+static os_event_t		dict_stats_disabled_event;
+#endif /* UNIV_DEBUG */
 
 /** This mutex protects the "recalc_pool" variable. */
 static ib_mutex_t		recalc_pool_mutex;
-static ib_mutex_t		defrag_pool_mutex;
-#ifdef HAVE_PSI_INTERFACE
-static mysql_pfs_key_t		recalc_pool_mutex_key;
-static mysql_pfs_key_t		defrag_pool_mutex_key;
-#endif /* HAVE_PSI_INTERFACE */
 
 /** The number of tables that can be added to "recalc_pool" before
 it is enlarged */
-static const ulint RECALC_POOL_INITIAL_SLOTS = 128;
+static const ulint		RECALC_POOL_INITIAL_SLOTS = 128;
+
+/** Allocator type, used by std::vector */
+typedef ut_allocator<table_id_t>
+	recalc_pool_allocator_t;
 
 /** The multitude of tables whose stats are to be automatically
 recalculated - an STL vector */
-typedef std::vector<table_id_t>	recalc_pool_t;
-static recalc_pool_t		recalc_pool;
+typedef std::vector<table_id_t, recalc_pool_allocator_t>
+	recalc_pool_t;
 
-typedef recalc_pool_t::iterator	recalc_pool_iterator_t;
+/** Iterator type for iterating over the elements of objects of type
+recalc_pool_t. */
+typedef recalc_pool_t::iterator
+	recalc_pool_iterator_t;
+
+/** Pool where we store information on which tables are to be processed
+by background statistics gathering. */
+static recalc_pool_t*		recalc_pool;
 
-/** Indices whose defrag stats need to be saved to persistent storage.*/
-struct defrag_pool_item_t {
-	table_id_t	table_id;
-	index_id_t	index_id;
-};
-typedef std::vector<defrag_pool_item_t>	defrag_pool_t;
-static defrag_pool_t			defrag_pool;
-typedef defrag_pool_t::iterator		defrag_pool_iterator_t;
 
 /*****************************************************************//**
 Initialize the recalc pool, called once during thread initialization. */
 static
 void
-dict_stats_pool_init()
+dict_stats_recalc_pool_init()
 /*=========================*/
 {
 	ut_ad(!srv_read_only_mode);
+	/* JAN: TODO: MySQL 5.7 PSI
+	const PSI_memory_key	key = mem_key_dict_stats_bg_recalc_pool_t;
 
-	recalc_pool.reserve(RECALC_POOL_INITIAL_SLOTS);
-	defrag_pool.reserve(RECALC_POOL_INITIAL_SLOTS);
+	recalc_pool = UT_NEW(recalc_pool_t(recalc_pool_allocator_t(key)), key);
+
+	recalc_pool->reserve(RECALC_POOL_INITIAL_SLOTS);
+	*/
+	recalc_pool = new std::vector<table_id_t, recalc_pool_allocator_t>();
 }
 
 /*****************************************************************//**
@@ -89,27 +109,14 @@ Free the resources occupied by the recalc pool, called once during
 thread de-initialization. */
 static
 void
-dict_stats_pool_deinit()
-/*====================*/
+dict_stats_recalc_pool_deinit()
+/*===========================*/
 {
 	ut_ad(!srv_read_only_mode);
 
-	recalc_pool.clear();
-	defrag_pool.clear();
-        /*
-          recalc_pool may still have its buffer allocated. It will free it when
-          its destructor is called.
-          The problem is, memory leak detector is run before the recalc_pool's
-          destructor is invoked, and will report recalc_pool's buffer as leaked
-          memory.  To avoid that, we force recalc_pool to surrender its buffer
-          to empty_pool object, which will free it when leaving this function:
-        */
-	recalc_pool_t recalc_empty_pool;
-	defrag_pool_t defrag_empty_pool;
-	memset(&recalc_empty_pool, 0, sizeof(recalc_pool_t));
-	memset(&defrag_empty_pool, 0, sizeof(defrag_pool_t));
-        recalc_pool.swap(recalc_empty_pool);
-	defrag_pool.swap(defrag_empty_pool);
+	recalc_pool->clear();
+
+	UT_DELETE(recalc_pool);
 }
 
 /*****************************************************************//**
@@ -118,7 +125,6 @@ background stats gathering thread. Only the table id is added to the
 list, so the table can be closed after being enqueued and it will be
 opened when needed. If the table does not exist later (has been DROPped),
 then it will be removed from the pool and skipped. */
-UNIV_INTERN
 void
 dict_stats_recalc_pool_add(
 /*=======================*/
@@ -129,8 +135,8 @@ dict_stats_recalc_pool_add(
 	mutex_enter(&recalc_pool_mutex);
 
 	/* quit if already in the list */
-	for (recalc_pool_iterator_t iter = recalc_pool.begin();
-	     iter != recalc_pool.end();
+	for (recalc_pool_iterator_t iter = recalc_pool->begin();
+	     iter != recalc_pool->end();
 	     ++iter) {
 
 		if (*iter == table->id) {
@@ -139,7 +145,7 @@ dict_stats_recalc_pool_add(
 		}
 	}
 
-	recalc_pool.push_back(table->id);
+	recalc_pool->push_back(table->id);
 
 	mutex_exit(&recalc_pool_mutex);
 
@@ -161,14 +167,14 @@ dict_stats_recalc_pool_get(
 
 	mutex_enter(&recalc_pool_mutex);
 
-	if (recalc_pool.empty()) {
+	if (recalc_pool->empty()) {
 		mutex_exit(&recalc_pool_mutex);
 		return(false);
 	}
 
-	*id = recalc_pool[0];
+	*id = recalc_pool->at(0);
 
-	recalc_pool.erase(recalc_pool.begin());
+	recalc_pool->erase(recalc_pool->begin());
 
 	mutex_exit(&recalc_pool_mutex);
 
@@ -178,7 +184,6 @@ dict_stats_recalc_pool_get(
 /*****************************************************************//**
 Delete a given table from the auto recalc pool.
 dict_stats_recalc_pool_del() */
-UNIV_INTERN
 void
 dict_stats_recalc_pool_del(
 /*=======================*/
@@ -191,13 +196,13 @@ dict_stats_recalc_pool_del(
 
 	ut_ad(table->id > 0);
 
-	for (recalc_pool_iterator_t iter = recalc_pool.begin();
-	     iter != recalc_pool.end();
+	for (recalc_pool_iterator_t iter = recalc_pool->begin();
+	     iter != recalc_pool->end();
 	     ++iter) {
 
 		if (*iter == table->id) {
 			/* erase() invalidates the iterator */
-			recalc_pool.erase(iter);
+			recalc_pool->erase(iter);
 			break;
 		}
 	}
@@ -205,111 +210,6 @@ dict_stats_recalc_pool_del(
 	mutex_exit(&recalc_pool_mutex);
 }
 
-/*****************************************************************//**
-Add an index in a table to the defrag pool, which is processed by the
-background stats gathering thread. Only the table id and index id are
-added to the list, so the table can be closed after being enqueued and
-it will be opened when needed. If the table or index does not exist later
-(has been DROPped), then it will be removed from the pool and skipped. */
-UNIV_INTERN
-void
-dict_stats_defrag_pool_add(
-/*=======================*/
-	const dict_index_t*	index)	/*!< in: table to add */
-{
-	defrag_pool_item_t item;
-
-	ut_ad(!srv_read_only_mode);
-
-	mutex_enter(&defrag_pool_mutex);
-
-	/* quit if already in the list */
-	for (defrag_pool_iterator_t iter = defrag_pool.begin();
-	     iter != defrag_pool.end();
-	     ++iter) {
-		if ((*iter).table_id == index->table->id
-		    && (*iter).index_id == index->id) {
-			mutex_exit(&defrag_pool_mutex);
-			return;
-		}
-	}
-
-	item.table_id = index->table->id;
-	item.index_id = index->id;
-	defrag_pool.push_back(item);
-
-	mutex_exit(&defrag_pool_mutex);
-
-	os_event_set(dict_stats_event);
-}
-
-/*****************************************************************//**
-Get an index from the auto defrag pool. The returned index id is removed
-from the pool.
-@return true if the pool was non-empty and "id" was set, false otherwise */
-static
-bool
-dict_stats_defrag_pool_get(
-/*=======================*/
-	table_id_t*	table_id,	/*!< out: table id, or unmodified if
-					list is empty */
-	index_id_t*	index_id)	/*!< out: index id, or unmodified if
-					list is empty */
-{
-	ut_ad(!srv_read_only_mode);
-
-	mutex_enter(&defrag_pool_mutex);
-
-	if (defrag_pool.empty()) {
-		mutex_exit(&defrag_pool_mutex);
-		return(false);
-	}
-
-	defrag_pool_item_t& item = defrag_pool.back();
-	*table_id = item.table_id;
-	*index_id = item.index_id;
-
-	defrag_pool.pop_back();
-
-	mutex_exit(&defrag_pool_mutex);
-
-	return(true);
-}
-
-/*****************************************************************//**
-Delete a given index from the auto defrag pool. */
-UNIV_INTERN
-void
-dict_stats_defrag_pool_del(
-/*=======================*/
-	const dict_table_t*	table,	/*!<in: if given, remove
-					all entries for the table */
-	const dict_index_t*	index)	/*!< in: if given, remove this index */
-{
-	ut_a((table && !index) || (!table && index));
-	ut_ad(!srv_read_only_mode);
-	ut_ad(mutex_own(&dict_sys->mutex));
-
-	mutex_enter(&defrag_pool_mutex);
-
-	defrag_pool_iterator_t iter = defrag_pool.begin();
-	while (iter != defrag_pool.end()) {
-		if ((table && (*iter).table_id == table->id)
-		    || (index
-			&& (*iter).table_id == index->table->id
-			&& (*iter).index_id == index->id)) {
-			/* erase() invalidates the iterator */
-			iter = defrag_pool.erase(iter);
-			if (index)
-				break;
-		} else {
-			iter++;
-		}
-	}
-
-	mutex_exit(&defrag_pool_mutex);
-}
-
 /*****************************************************************//**
 Wait until background stats thread has stopped using the specified table.
 The caller must have locked the data dictionary using
@@ -319,7 +219,6 @@ The background stats thread is guaranteed not to start using the specified
 table after this function returns and before the caller unlocks the data
 dictionary because it sets the BG_STAT_IN_PROGRESS bit in table->stats_bg_flag
 under dict_sys->mutex. */
-UNIV_INTERN
 void
 dict_stats_wait_bg_to_stop_using_table(
 /*===================================*/
@@ -335,14 +234,16 @@ dict_stats_wait_bg_to_stop_using_table(
 /*****************************************************************//**
 Initialize global variables needed for the operation of dict_stats_thread()
 Must be called before dict_stats_thread() is started. */
-UNIV_INTERN
 void
 dict_stats_thread_init()
 /*====================*/
 {
 	ut_a(!srv_read_only_mode);
 
-	dict_stats_event = os_event_create();
+	dict_stats_event = os_event_create(0);
+	dict_stats_shutdown_event = os_event_create(0);
+
+	ut_d(dict_stats_disabled_event = os_event_create(0));
 
 	/* The recalc_pool_mutex is acquired from:
 	1) the background stats gathering thread before any other latch
@@ -357,19 +258,17 @@ dict_stats_thread_init()
 	   and dict_operation_lock (SYNC_DICT_OPERATION) have been locked
 	   (thus a level <SYNC_DICT && <SYNC_DICT_OPERATION would do)
 	So we choose SYNC_STATS_AUTO_RECALC to be about below SYNC_DICT. */
-	mutex_create(recalc_pool_mutex_key, &recalc_pool_mutex,
-		     SYNC_STATS_AUTO_RECALC);
 
-	/* We choose SYNC_STATS_DEFRAG to be below SYNC_FSP_PAGE. */
-	mutex_create(defrag_pool_mutex_key, &defrag_pool_mutex,
-	     SYNC_STATS_DEFRAG);
-	dict_stats_pool_init();
+	mutex_create(LATCH_ID_RECALC_POOL, &recalc_pool_mutex);
+
+	dict_stats_recalc_pool_init();
+	dict_defrag_pool_init();
+
 }
 
 /*****************************************************************//**
 Free resources allocated by dict_stats_thread_init(), must be called
 after dict_stats_thread() has exited. */
-UNIV_INTERN
 void
 dict_stats_thread_deinit()
 /*======================*/
@@ -377,16 +276,21 @@ dict_stats_thread_deinit()
 	ut_a(!srv_read_only_mode);
 	ut_ad(!srv_dict_stats_thread_active);
 
-	dict_stats_pool_deinit();
+	dict_stats_recalc_pool_deinit();
+	dict_defrag_pool_deinit();
 
 	mutex_free(&recalc_pool_mutex);
-	memset(&recalc_pool_mutex, 0x0, sizeof(recalc_pool_mutex));
 
-	mutex_free(&defrag_pool_mutex);
-	memset(&defrag_pool_mutex, 0x0, sizeof(defrag_pool_mutex));
+#ifdef UNIV_DEBUG
+	os_event_destroy(dict_stats_disabled_event);
+	dict_stats_disabled_event = NULL;
+#endif /* UNIV_DEBUG */
 
-	os_event_free(dict_stats_event);
+	os_event_destroy(dict_stats_event);
+	os_event_destroy(dict_stats_shutdown_event);
 	dict_stats_event = NULL;
+	dict_stats_shutdown_event = NULL;
+	dict_stats_start_shutdown = false;
 }
 
 /*****************************************************************//**
@@ -454,76 +358,50 @@ dict_stats_process_entry_from_recalc_pool()
 
 	mutex_enter(&dict_sys->mutex);
 
-	table->stats_bg_flag &= ~BG_STAT_IN_PROGRESS;
+	table->stats_bg_flag = BG_STAT_NONE;
 
 	dict_table_close(table, TRUE, FALSE);
 
 	mutex_exit(&dict_sys->mutex);
 }
 
-/*****************************************************************//**
-Get the first index that has been added for updating persistent defrag
-stats and eventually save its stats. */
-static
+#ifdef UNIV_DEBUG
+/** Disables dict stats thread. It's used by:
+	SET GLOBAL innodb_dict_stats_disabled_debug = 1 (0).
+@param[in]	thd		thread handle
+@param[in]	var		pointer to system variable
+@param[out]	var_ptr		where the formal string goes
+@param[in]	save		immediate result from check function */
 void
-dict_stats_process_entry_from_defrag_pool()
-/*=======================================*/
+dict_stats_disabled_debug_update(
+	THD*				thd,
+	struct st_mysql_sys_var*	var,
+	void*				var_ptr,
+	const void*			save)
 {
-	table_id_t	table_id;
-	index_id_t	index_id;
+	/* This method is protected by mutex, as every SET GLOBAL .. */
+	ut_ad(dict_stats_disabled_event != NULL);
 
-	ut_ad(!srv_read_only_mode);
+	const bool disable = *static_cast<const my_bool*>(save);
 
-	/* pop the first index from the auto defrag pool */
-	if (!dict_stats_defrag_pool_get(&table_id, &index_id)) {
-		/* no index in defrag pool */
-		return;
+	const int64_t sig_count = os_event_reset(dict_stats_disabled_event);
+
+	innodb_dict_stats_disabled_debug = disable;
+
+	if (disable) {
+		os_event_set(dict_stats_event);
+		os_event_wait_low(dict_stats_disabled_event, sig_count);
 	}
-
-	dict_table_t*	table;
-
-	mutex_enter(&dict_sys->mutex);
-
-	/* If the table is no longer cached, we've already lost the in
-	memory stats so there's nothing really to write to disk. */
-	table = dict_table_open_on_id(table_id, TRUE,
-				      DICT_TABLE_OP_OPEN_ONLY_IF_CACHED);
-
-	if (table == NULL) {
-		mutex_exit(&dict_sys->mutex);
-		return;
-	}
-
-	/* Check whether table is corrupted */
-	if (table->corrupted) {
-		dict_table_close(table, TRUE, FALSE);
-		mutex_exit(&dict_sys->mutex);
-		return;
-	}
-	mutex_exit(&dict_sys->mutex);
-
-	dict_index_t*	index = dict_table_find_index_on_id(table, index_id);
-
-	if (index == NULL) {
-		return;
-	}
-
-	/* Check whether index is corrupted */
-	if (dict_index_is_corrupted(index)) {
-		dict_table_close(table, FALSE, FALSE);
-		return;
-	}
-
-	dict_stats_save_defrag_stats(index);
-	dict_table_close(table, FALSE, FALSE);
 }
+#endif /* UNIV_DEBUG */
+
 
 /*****************************************************************//**
 This is the thread for background stats gathering. It pops tables, from
 the auto recalc list and proceeds them, eventually recalculating their
 statistics.
 @return this function does not return, it calls os_thread_exit() */
-extern "C" UNIV_INTERN
+extern "C"
 os_thread_ret_t
 DECLARE_THREAD(dict_stats_thread)(
 /*==============================*/
@@ -532,9 +410,15 @@ DECLARE_THREAD(dict_stats_thread)(
 {
 	ut_a(!srv_read_only_mode);
 
+#ifdef UNIV_PFS_THREAD
+	/* JAN: TODO: MySQL 5.7 PSI
+	pfs_register_thread(dict_stats_thread_key);
+	*/
+#endif /* UNIV_PFS_THREAD */
+
 	srv_dict_stats_thread_active = TRUE;
 
-	while (!SHUTTING_DOWN()) {
+	while (!dict_stats_start_shutdown) {
 
 		/* Wake up periodically even if not signaled. This is
 		because we may lose an event - if the below call to
@@ -544,23 +428,44 @@ DECLARE_THREAD(dict_stats_thread)(
 		os_event_wait_time(
 			dict_stats_event, MIN_RECALC_INTERVAL * 1000000);
 
-		if (SHUTTING_DOWN()) {
+#ifdef UNIV_DEBUG
+		while (innodb_dict_stats_disabled_debug) {
+			os_event_set(dict_stats_disabled_event);
+			if (dict_stats_start_shutdown) {
+				break;
+			}
+			os_event_wait_time(
+				dict_stats_event, 100000);
+		}
+#endif /* UNIV_DEBUG */
+
+		if (dict_stats_start_shutdown) {
 			break;
 		}
 
 		dict_stats_process_entry_from_recalc_pool();
-
-		while (defrag_pool.size())
-			dict_stats_process_entry_from_defrag_pool();
+		dict_defrag_process_entries_from_defrag_pool();
 
 		os_event_reset(dict_stats_event);
 	}
 
 	srv_dict_stats_thread_active = FALSE;
 
+	os_event_set(dict_stats_shutdown_event);
+	my_thread_end();
+
 	/* We count the number of threads in os_thread_exit(). A created
 	thread should always use that to exit instead of return(). */
-	os_thread_exit(NULL);
+	os_thread_exit();
 
 	OS_THREAD_DUMMY_RETURN;
 }
+
+/** Shutdown the dict stats thread. */
+void
+dict_stats_shutdown()
+{
+	dict_stats_start_shutdown = true;
+	os_event_set(dict_stats_event);
+	os_event_wait(dict_stats_shutdown_event);
+}
diff --git a/storage/innobase/dyn/dyn0dyn.cc b/storage/innobase/dyn/dyn0dyn.cc
deleted file mode 100644
index 3ef5297a7c9..00000000000
--- a/storage/innobase/dyn/dyn0dyn.cc
+++ /dev/null
@@ -1,66 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file dyn/dyn0dyn.cc
-The dynamically allocated array
-
-Created 2/5/1996 Heikki Tuuri
-*******************************************************/
-
-#include "dyn0dyn.h"
-#ifdef UNIV_NONINL
-#include "dyn0dyn.ic"
-#endif
-
-/************************************************************//**
-Adds a new block to a dyn array.
-@return	created block */
-UNIV_INTERN
-dyn_block_t*
-dyn_array_add_block(
-/*================*/
-	dyn_array_t*	arr)	/*!< in/out: dyn array */
-{
-	mem_heap_t*	heap;
-	dyn_block_t*	block;
-
-	ut_ad(arr);
-	ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
-
-	if (arr->heap == NULL) {
-		UT_LIST_INIT(arr->base);
-		UT_LIST_ADD_FIRST(list, arr->base, arr);
-
-		arr->heap = mem_heap_create(sizeof(dyn_block_t));
-	}
-
-	block = dyn_array_get_last_block(arr);
-	block->used = block->used | DYN_BLOCK_FULL_FLAG;
-
-	heap = arr->heap;
-
-	block = static_cast<dyn_block_t*>(
-		mem_heap_alloc(heap, sizeof(dyn_block_t)));
-
-	block->used = 0;
-
-	UT_LIST_ADD_LAST(list, arr->base, block);
-
-	return(block);
-}
diff --git a/storage/innobase/eval/eval0eval.cc b/storage/innobase/eval/eval0eval.cc
index ccc54781102..a525cb604ea 100644
--- a/storage/innobase/eval/eval0eval.cc
+++ b/storage/innobase/eval/eval0eval.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -60,8 +60,7 @@ NOTE that this memory must be explicitly freed when the query graph is
 freed. If the node already has an allocated buffer, that buffer is freed
 here. NOTE that this is the only function where dynamic memory should be
 allocated for a query node val field.
-@return	pointer to allocated buffer */
-UNIV_INTERN
+@return pointer to allocated buffer */
 byte*
 eval_node_alloc_val_buf(
 /*====================*/
@@ -80,14 +79,14 @@ eval_node_alloc_val_buf(
 
 	data = static_cast<byte*>(dfield_get_data(dfield));
 
-	if (data && data != &eval_dummy) {
-		mem_free(data);
+	if (data != &eval_dummy) {
+		ut_free(data);
 	}
 
 	if (size == 0) {
 		data = &eval_dummy;
 	} else {
-		data = static_cast<byte*>(mem_alloc(size));
+		data = static_cast<byte*>(ut_malloc_nokey(size));
 	}
 
 	que_node_set_val_buf_size(node, size);
@@ -101,7 +100,6 @@ eval_node_alloc_val_buf(
 Free the buffer from global dynamic memory for a value of a que_node,
 if it has been allocated in the above function. The freeing for pushed
 column values is done in sel_col_prefetch_buf_free. */
-UNIV_INTERN
 void
 eval_node_free_val_buf(
 /*===================*/
@@ -120,7 +118,7 @@ eval_node_free_val_buf(
 	if (que_node_get_val_buf_size(node) > 0) {
 		ut_a(data);
 
-		mem_free(data);
+		ut_free(data);
 	}
 }
 
@@ -135,12 +133,9 @@ eval_cmp_like(
 	que_node_t*	arg2)		/* !< in: right operand */
 {
 	ib_like_t	op;
-	int		res;
 	que_node_t*	arg3;
 	que_node_t*	arg4;
-	dfield_t*	dfield;
-	dtype_t*	dtype;
-	ibool		val = TRUE;
+	const dfield_t*	dfield;
 
 	arg3 = que_node_get_like_node(arg2);
 
@@ -148,51 +143,23 @@ eval_cmp_like(
 	ut_a(arg3);
 
 	dfield = que_node_get_val(arg3);
-	dtype = dfield_get_type(dfield);
-
-	ut_a(dtype_get_mtype(dtype) == DATA_INT);
-	op = static_cast<ib_like_t>(mach_read_from_4(static_cast<const unsigned char*>(dfield_get_data(dfield))));
+	ut_ad(dtype_get_mtype(dfield_get_type(dfield)) == DATA_INT);
+	op = static_cast<ib_like_t>(
+		mach_read_from_4(static_cast<const byte*>(
+					 dfield_get_data(dfield))));
 
 	switch (op) {
-	case	IB_LIKE_PREFIX:
-
+	case IB_LIKE_PREFIX:
 		arg4 = que_node_get_next(arg3);
-		res = cmp_dfield_dfield_like_prefix(
-			que_node_get_val(arg1),
-			que_node_get_val(arg4));
-		break;
-
-	case	IB_LIKE_SUFFIX:
-
-		arg4 = que_node_get_next(arg3);
-		res = cmp_dfield_dfield_like_suffix(
-			que_node_get_val(arg1),
-			que_node_get_val(arg4));
-		break;
-
-	case	IB_LIKE_SUBSTR:
-
-		arg4 = que_node_get_next(arg3);
-		res = cmp_dfield_dfield_like_substr(
-			que_node_get_val(arg1),
-			que_node_get_val(arg4));
-		break;
-
-	case	IB_LIKE_EXACT:
-		res = cmp_dfield_dfield(
-			que_node_get_val(arg1),
-			que_node_get_val(arg2));
-		break;
-
-	default:
-		ut_error;
+		return(!cmp_dfield_dfield_like_prefix(que_node_get_val(arg1),
+						      que_node_get_val(arg4)));
+	case IB_LIKE_EXACT:
+		return(!cmp_dfield_dfield(que_node_get_val(arg1),
+					  que_node_get_val(arg2)));
 	}
 
-	if (res != 0) {
-		val = FALSE;
-	}
-
-	return(val);
+	ut_error;
+	return(FALSE);
 }
 
 /*********************************************************************
@@ -206,53 +173,47 @@ eval_cmp(
 	que_node_t*	arg1;
 	que_node_t*	arg2;
 	int		res;
-	int		func;
-	ibool		val = TRUE;
+	ibool		val	= FALSE; /* remove warning */
 
 	ut_ad(que_node_get_type(cmp_node) == QUE_NODE_FUNC);
 
 	arg1 = cmp_node->args;
 	arg2 = que_node_get_next(arg1);
 
-	func = cmp_node->func;
-
-	if (func == PARS_LIKE_TOKEN_EXACT
-	    || func == PARS_LIKE_TOKEN_PREFIX
-	    || func == PARS_LIKE_TOKEN_SUFFIX
-	    || func == PARS_LIKE_TOKEN_SUBSTR) {
-
-		val = eval_cmp_like(arg1, arg2);
-	} else {
+	switch (cmp_node->func) {
+	case '<':
+	case '=':
+	case '>':
+	case PARS_LE_TOKEN:
+	case PARS_NE_TOKEN:
+	case PARS_GE_TOKEN:
 		res = cmp_dfield_dfield(
 			que_node_get_val(arg1), que_node_get_val(arg2));
 
-		if (func == '=') {
-			if (res != 0) {
-				val = FALSE;
-			}
-		} else if (func == '<') {
-			if (res != -1) {
-				val = FALSE;
-			}
-		} else if (func == PARS_LE_TOKEN) {
-			if (res == 1) {
-				val = FALSE;
-			}
-		} else if (func == PARS_NE_TOKEN) {
-			if (res == 0) {
-				val = FALSE;
-			}
-		} else if (func == PARS_GE_TOKEN) {
-			if (res == -1) {
-				val = FALSE;
-			}
-		} else {
-			ut_ad(func == '>');
-
-			if (res != 1) {
-				val = FALSE;
-			}
+		switch (cmp_node->func) {
+		case '<':
+			val = (res < 0);
+			break;
+		case '=':
+			val = (res == 0);
+			break;
+		case '>':
+			val = (res > 0);
+			break;
+		case PARS_LE_TOKEN:
+			val = (res <= 0);
+			break;
+		case PARS_NE_TOKEN:
+			val = (res != 0);
+			break;
+		case PARS_GE_TOKEN:
+			val = (res >= 0);
+			break;
 		}
+		break;
+	default:
+		val = eval_cmp_like(arg1, arg2);
+		break;
 	}
 
 	eval_node_set_ibool_val(cmp_node, val);
@@ -870,7 +831,6 @@ eval_predefined(
 
 /*****************************************************************//**
 Evaluates a function node. */
-UNIV_INTERN
 void
 eval_func(
 /*======*/
diff --git a/storage/innobase/eval/eval0proc.cc b/storage/innobase/eval/eval0proc.cc
index e6f3a32cd48..cdd6fdc2a0a 100644
--- a/storage/innobase/eval/eval0proc.cc
+++ b/storage/innobase/eval/eval0proc.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1998, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1998, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -31,8 +31,7 @@ Created 1/20/1998 Heikki Tuuri
 
 /**********************************************************************//**
 Performs an execution step of an if-statement node.
-@return	query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
 que_thr_t*
 if_step(
 /*====*/
@@ -108,8 +107,7 @@ if_step(
 
 /**********************************************************************//**
 Performs an execution step of a while-statement node.
-@return	query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
 que_thr_t*
 while_step(
 /*=======*/
@@ -144,8 +142,7 @@ while_step(
 
 /**********************************************************************//**
 Performs an execution step of an assignment statement node.
-@return	query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
 que_thr_t*
 assign_step(
 /*========*/
@@ -171,8 +168,7 @@ assign_step(
 
 /**********************************************************************//**
 Performs an execution step of a for-loop node.
-@return	query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
 que_thr_t*
 for_step(
 /*=====*/
@@ -233,8 +229,7 @@ for_step(
 
 /**********************************************************************//**
 Performs an execution step of an exit statement node.
-@return	query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
 que_thr_t*
 exit_step(
 /*======*/
@@ -265,8 +260,7 @@ exit_step(
 
 /**********************************************************************//**
 Performs an execution step of a return-statement node.
-@return	query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
 que_thr_t*
 return_step(
 /*========*/
diff --git a/storage/innobase/fil/fil0crypt.cc b/storage/innobase/fil/fil0crypt.cc
index 2db3063d6b5..9062bf1586b 100644
--- a/storage/innobase/fil/fil0crypt.cc
+++ b/storage/innobase/fil/fil0crypt.cc
@@ -122,13 +122,11 @@ void
 fil_space_crypt_init()
 /*==================*/
 {
-	mutex_create(fil_crypt_key_mutex_key,
-		     &fil_crypt_key_mutex, SYNC_NO_ORDER_CHECK);
+	mutex_create(LATCH_ID_FIL_CRYPT_MUTEX, &fil_crypt_key_mutex);
 
-	fil_crypt_throttle_sleep_event = os_event_create();
+	fil_crypt_throttle_sleep_event = os_event_create(0);
 
-	mutex_create(fil_crypt_stat_mutex_key,
-		     &crypt_stat_mutex, SYNC_NO_ORDER_CHECK);
+	mutex_create(LATCH_ID_FIL_CRYPT_STAT_MUTEX, &crypt_stat_mutex);
 	memset(&crypt_stat, 0, sizeof(crypt_stat));
 }
 
@@ -139,7 +137,7 @@ void
 fil_space_crypt_cleanup()
 /*=====================*/
 {
-	os_event_free(fil_crypt_throttle_sleep_event);
+	os_event_destroy(fil_crypt_throttle_sleep_event);
 }
 
 /******************************************************************
@@ -204,8 +202,7 @@ fil_space_create_crypt_data(
 		crypt_data->min_key_version = encryption_key_get_latest_version(key_id);
 	}
 
-	mutex_create(fil_crypt_data_mutex_key,
-		&crypt_data->mutex, SYNC_NO_ORDER_CHECK);
+	mutex_create(LATCH_ID_FIL_CRYPT_DATA_MUTEX, &crypt_data->mutex);
 	crypt_data->locker = crypt_data_scheme_locker;
 	my_random_bytes(crypt_data->iv, sizeof(crypt_data->iv));
 	crypt_data->encryption = encrypt_mode;
@@ -258,20 +255,6 @@ fil_space_read_crypt_data(
 	}
 
 	if (memcmp(page + offset, CRYPT_MAGIC, MAGIC_SZ) != 0) {
-#ifdef UNIV_DEBUG
-		ib_logf(IB_LOG_LEVEL_WARN,
-			"Found potentially bogus bytes on "
-			"page 0 offset %lu for space %lu : "
-			"[ %.2x %.2x %.2x %.2x %.2x %.2x ]. "
-			"Assuming space is not encrypted!.",
-			offset, space,
-			page[offset + 0],
-			page[offset + 1],
-			page[offset + 2],
-			page[offset + 3],
-			page[offset + 4],
-			page[offset + 5]);
-#endif
 		/* Crypt data is not stored. */
 		return NULL;
 	}
@@ -280,18 +263,17 @@ fil_space_read_crypt_data(
 
 	if (! (type == CRYPT_SCHEME_UNENCRYPTED ||
 	       type == CRYPT_SCHEME_1)) {
-
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Found non sensible crypt scheme: %lu for space %lu "
-			" offset: %lu bytes: "
-			"[ %.2x %.2x %.2x %.2x %.2x %.2x ].",
-			type, space, offset,
-			page[offset + 0 + MAGIC_SZ],
-			page[offset + 1 + MAGIC_SZ],
-			page[offset + 2 + MAGIC_SZ],
-			page[offset + 3 + MAGIC_SZ],
-			page[offset + 4 + MAGIC_SZ],
-			page[offset + 5 + MAGIC_SZ]);
+		ib::error() << "Found non sensible crypt scheme: "
+			    << type << " for space: "
+			    << space << " offset: "
+			    << offset << " bytes: ["
+			    << page[offset + 0 + MAGIC_SZ]
+			    << page[offset + 1 + MAGIC_SZ]
+			    << page[offset + 2 + MAGIC_SZ]
+			    << page[offset + 3 + MAGIC_SZ]
+			    << page[offset + 4 + MAGIC_SZ]
+			    << page[offset + 5 + MAGIC_SZ]
+			    << "].";
 		ut_error;
 	}
 
@@ -299,17 +281,18 @@ fil_space_read_crypt_data(
 	ulint iv_length = mach_read_from_1(page + offset + MAGIC_SZ + 1);
 
 	if (! (iv_length == sizeof(crypt_data->iv))) {
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Found non sensible iv length: %lu for space %lu "
-			" offset: %lu type: %lu bytes: "
-			"[ %.2x %.2x %.2x %.2x %.2x %.2x ].",
-			iv_length, space, offset, type,
-			page[offset + 0 + MAGIC_SZ],
-			page[offset + 1 + MAGIC_SZ],
-			page[offset + 2 + MAGIC_SZ],
-			page[offset + 3 + MAGIC_SZ],
-			page[offset + 4 + MAGIC_SZ],
-			page[offset + 5 + MAGIC_SZ]);
+		ib::error() << "Found non sensible iv length: "
+			    << iv_length << " for space: "
+			    << space << " offset: "
+			    << offset << " type: "
+			    << type << " bytes: ["
+			    << page[offset + 0 + MAGIC_SZ]
+			    << page[offset + 1 + MAGIC_SZ]
+			    << page[offset + 2 + MAGIC_SZ]
+			    << page[offset + 3 + MAGIC_SZ]
+			    << page[offset + 4 + MAGIC_SZ]
+			    << page[offset + 5 + MAGIC_SZ]
+			    << "].";
 		ut_error;
 	}
 
@@ -331,8 +314,7 @@ fil_space_read_crypt_data(
 	crypt_data->key_id = key_id;
 	crypt_data->page0_offset = offset;
 	crypt_data->encryption = encryption;
-	mutex_create(fil_crypt_data_mutex_key,
-		     &crypt_data->mutex, SYNC_NO_ORDER_CHECK);
+	mutex_create(LATCH_ID_FIL_CRYPT_DATA_MUTEX, &crypt_data->mutex);
 	crypt_data->locker = crypt_data_scheme_locker;
 	crypt_data->inited = true;
 	memcpy(crypt_data->iv, page + offset + MAGIC_SZ + 2, iv_length);
@@ -352,13 +334,17 @@ fil_space_destroy_crypt_data(
 		/* Make sure that this thread owns the crypt_data
 		and make it unawailable, this does not fully
 		avoid the race between drop table and crypt thread */
+		mutex_enter(&fil_crypt_threads_mutex);
 		mutex_enter(&(*crypt_data)->mutex);
 		(*crypt_data)->inited = false;
 		mutex_exit(&(*crypt_data)->mutex);
+		/* JAN: TODO:
 		mutex_free(& (*crypt_data)->mutex);
 		memset(*crypt_data, 0, sizeof(fil_space_crypt_t));
 		free(*crypt_data);
 		(*crypt_data) = NULL;
+		*/
+		mutex_exit(&fil_crypt_threads_mutex);
 	}
 }
 
@@ -550,17 +536,16 @@ fil_encrypt_buf(
 	ulint		offset,		/*!< in: Page offset */
 	lsn_t		lsn,		/*!< in: lsn */
 	byte*		src_frame,	/*!< in: Source page to be encrypted */
-	ulint		zip_size,	/*!< in: compressed size if
-					row format compressed */
+	const page_size_t&	page_size,	/*!< in: page size */
 	byte*		dst_frame)	/*!< in: outbut buffer */
 {
-	ulint page_size = (zip_size) ? zip_size : UNIV_PAGE_SIZE;
+	ulint size = page_size.physical();
 	uint key_version = fil_crypt_get_latest_key_version(crypt_data);
 
 	if (key_version == ENCRYPTION_KEY_VERSION_INVALID) {
-		ib_logf(IB_LOG_LEVEL_FATAL,
-			"Unknown key id %u. Can't continue!\n",
-			crypt_data->key_id);
+		ib::error() << "Unknown key id: "
+			    << crypt_data->key_id
+			    << " Can't continue!";
 		ut_error;
 	}
 
@@ -580,7 +565,7 @@ fil_encrypt_buf(
 
 	/* Calculate the start offset in a page */
 	ulint unencrypted_bytes = header_len + FIL_PAGE_DATA_END;
-	ulint srclen = page_size - unencrypted_bytes;
+	ulint srclen = size - unencrypted_bytes;
 	const byte* src = src_frame + header_len;
 	byte* dst = dst_frame + header_len;
 	uint32 dstlen = 0;
@@ -594,12 +579,10 @@ fil_encrypt_buf(
 					   space, offset, lsn);
 
 	if (! ((rc == MY_AES_OK) && ((ulint) dstlen == srclen))) {
-		ib_logf(IB_LOG_LEVEL_FATAL,
-			"Unable to encrypt data-block "
-			" src: %p srclen: %ld buf: %p buflen: %d."
-			" return-code: %d. Can't continue!\n",
-			src, (long)srclen,
-			dst, dstlen, rc);
+		ib::error() << "Unable to encrypt data-block "
+			    << " src: " << src << " srclen: " << srclen
+			    << " buf: " << dst << " buflen: " << dstlen
+			    << " return-code: "<< rc << " Can't continue!";
 		ut_error;
 	}
 
@@ -609,18 +592,18 @@ fil_encrypt_buf(
 	to sector boundary is written. */
 	if (!page_compressed) {
 		/* FIL page trailer is also not encrypted */
-		memcpy(dst_frame + page_size - FIL_PAGE_DATA_END,
-			src_frame + page_size - FIL_PAGE_DATA_END,
+		memcpy(dst_frame + page_size.physical() - FIL_PAGE_DATA_END,
+			src_frame + page_size.physical() - FIL_PAGE_DATA_END,
 			FIL_PAGE_DATA_END);
 	} else {
 		/* Clean up rest of buffer */
-		memset(dst_frame+header_len+srclen, 0, page_size - (header_len+srclen));
+		memset(dst_frame+header_len+srclen, 0, page_size.physical() - (header_len+srclen));
 	}
 
 	/* handle post encryption checksum */
 	ib_uint32_t checksum = 0;
 
-	checksum = fil_crypt_calculate_checksum(zip_size, dst_frame);
+	checksum = fil_crypt_calculate_checksum(page_size, dst_frame);
 
 	// store the post-encryption checksum after the key-version
 	mach_write_to_4(dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + 4, checksum);
@@ -640,8 +623,7 @@ fil_space_encrypt(
 	ulint		offset,		/*!< in: Page offset */
 	lsn_t		lsn,		/*!< in: lsn */
 	byte*		src_frame,	/*!< in: Source page to be encrypted */
-	ulint		zip_size,	/*!< in: compressed size if
-					row_format compressed */
+	const page_size_t&	page_size,	/*!< in: page size */
 	byte*		dst_frame)	/*!< in: outbut buffer */
 {
 	fil_space_crypt_t* crypt_data = NULL;
@@ -664,7 +646,60 @@ fil_space_encrypt(
 
 	ut_a(crypt_data != NULL && crypt_data->encryption != FIL_SPACE_ENCRYPTION_OFF);
 
-	byte* tmp = fil_encrypt_buf(crypt_data, space, offset, lsn, src_frame, zip_size, dst_frame);
+	byte* tmp = fil_encrypt_buf(crypt_data, space, offset, lsn, src_frame, page_size, dst_frame);
+
+#ifdef UNIV_DEBUG
+	if (tmp) {
+		/* Verify that encrypted buffer is not corrupted */
+		byte* tmp_mem = (byte *)malloc(UNIV_PAGE_SIZE);
+		dberr_t err = DB_SUCCESS;
+		byte* src = src_frame;
+		bool page_compressed_encrypted = (mach_read_from_2(tmp+FIL_PAGE_TYPE) == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED);
+		byte* comp_mem = NULL;
+		byte* uncomp_mem = NULL;
+
+		if (page_compressed_encrypted) {
+			comp_mem = (byte *)malloc(UNIV_PAGE_SIZE);
+			uncomp_mem = (byte *)malloc(UNIV_PAGE_SIZE);
+			memcpy(comp_mem, src_frame, UNIV_PAGE_SIZE);
+			fil_decompress_page(uncomp_mem, comp_mem, page_size.physical(), NULL);
+			src = uncomp_mem;
+		}
+
+		bool corrupted1 = buf_page_is_corrupted(true, src, page_size, fsp_is_checksum_disabled(space));
+		bool ok = fil_space_decrypt(crypt_data, tmp_mem, page_size, tmp, &err);
+
+		/* Need to decompress the page if it was also compressed */
+		if (page_compressed_encrypted) {
+			memcpy(comp_mem, tmp_mem, UNIV_PAGE_SIZE);
+			fil_decompress_page(tmp_mem, comp_mem, page_size.physical(), NULL);
+		}
+
+		bool corrupted = buf_page_is_corrupted(true, tmp_mem, page_size, fsp_is_checksum_disabled(space));
+		bool different = memcmp(src, tmp_mem, page_size.physical());
+
+		if (!ok || corrupted || corrupted1 || err != DB_SUCCESS || different) {
+			fprintf(stderr, "JAN: ok %d corrupted %d corrupted1 %d err %d different %d\n", ok , corrupted, corrupted1, err, different);
+			fprintf(stderr, "JAN1: src_frame\n");
+			buf_page_print(src_frame, page_size, BUF_PAGE_PRINT_NO_CRASH);
+			fprintf(stderr, "JAN2: encrypted_frame\n");
+			buf_page_print(tmp, page_size, BUF_PAGE_PRINT_NO_CRASH);
+			fprintf(stderr, "JAN1: decrypted_frame\n");
+			buf_page_print(tmp_mem, page_size, BUF_PAGE_PRINT_NO_CRASH);
+			ut_error;
+		}
+
+		free(tmp_mem);
+
+		if (comp_mem) {
+			free(comp_mem);
+		}
+
+		if (uncomp_mem) {
+			free(uncomp_mem);
+		}
+	}
+#endif /* UNIV_DEBUG */
 
 	return tmp;
 }
@@ -704,7 +739,7 @@ fil_space_decrypt(
 /*==============*/
 	fil_space_crypt_t*	crypt_data,	/*!< in: crypt data */
 	byte*			tmp_frame,	/*!< in: temporary buffer */
-	ulint			page_size,	/*!< in: page size */
+	const page_size_t&	page_size,	/*!< in: page size */
 	byte*			src_frame,	/*!< in: out: page buffer */
 	dberr_t*		err)		/*!< in: out: DB_SUCCESS or
 						error code */
@@ -730,13 +765,14 @@ fil_space_decrypt(
 			data file (ibdata*, not *.ibd), if not
 			clear it. */
 #ifdef UNIV_DEBUG
-			ib_logf(IB_LOG_LEVEL_WARN,
-				"Page on space %lu offset %lu has key_version %u"
-				" when it shoud be undefined.",
-				space, offset, key_version);
+			ib::warn()
+				<< "Page on space "<< space << " offset " << offset
+				<< " has key_version " << key_version
+				<< " when it shoud be undefined.";
 #endif
 			mach_write_to_4(src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, 0);
 		}
+
 		return false;
 	}
 
@@ -756,7 +792,7 @@ fil_space_decrypt(
 	const byte* src = src_frame + header_len;
 	byte* dst = tmp_frame + header_len;
 	uint32 dstlen = 0;
-	ulint srclen = page_size - (header_len + FIL_PAGE_DATA_END);
+	ulint srclen = page_size.physical() - (header_len + FIL_PAGE_DATA_END);
 
 	if (page_compressed) {
 		srclen = mach_read_from_2(src_frame + FIL_PAGE_DATA);
@@ -773,12 +809,11 @@ fil_space_decrypt(
 			return false;
 		}
 
-		ib_logf(IB_LOG_LEVEL_FATAL,
-			"Unable to decrypt data-block "
-			" src: %p srclen: %ld buf: %p buflen: %d."
-			" return-code: %d. Can't continue!\n",
-			src, (long)srclen,
-			dst, dstlen, rc);
+		ib::error() << "Unable to decrypt data-block "
+			    << " src: " << src << "srclen: "
+			    << srclen << " buf: " << dst << "buflen: "
+			    << dstlen << " return-code: " << rc
+			    << " Can't continue!";
 		ut_error;
 	}
 
@@ -788,8 +823,8 @@ fil_space_decrypt(
 	to sector boundary is written. */
 	if (!page_compressed) {
 		/* Copy FIL trailer */
-		memcpy(tmp_frame + page_size - FIL_PAGE_DATA_END,
-		       src_frame + page_size - FIL_PAGE_DATA_END,
+		memcpy(tmp_frame + page_size.physical() - FIL_PAGE_DATA_END,
+		       src_frame + page_size.physical() - FIL_PAGE_DATA_END,
 		       FIL_PAGE_DATA_END);
 
 		// clear key-version & crypt-checksum from dst
@@ -811,7 +846,7 @@ fil_space_decrypt(
 /*==============*/
 	ulint		space,		/*!< in: Fil space id */
 	byte*		tmp_frame,	/*!< in: temporary buffer */
-	ulint		page_size,	/*!< in: page size */
+	const page_size_t&	page_size,	/*!< in: page size */
 	byte*		src_frame)	/*!< in/out: page buffer */
 {
 	dberr_t err = DB_SUCCESS;
@@ -828,7 +863,7 @@ fil_space_decrypt(
 		if (encrypted) {
 			/* Copy the decrypted page back to page buffer, not
 			really any other options. */
-			memcpy(src_frame, tmp_frame, page_size);
+			memcpy(src_frame, tmp_frame, page_size.physical());
 		}
 
 		res = src_frame;
@@ -845,14 +880,14 @@ UNIV_INTERN
 ulint
 fil_crypt_calculate_checksum(
 /*=========================*/
-	ulint	zip_size,	/*!< in: zip_size or 0 */
-	byte*	dst_frame)	/*!< in: page where to calculate */
+	const page_size_t&	page_size,	/*!< in: page size */
+	byte*			dst_frame)	/*!< in: page where to calculate */
 {
 	ib_uint32_t checksum = 0;
 	srv_checksum_algorithm_t algorithm =
 			static_cast<srv_checksum_algorithm_t>(srv_checksum_algorithm);
 
-	if (zip_size == 0) {
+	if (!page_size.is_compressed()) {
 		switch (algorithm) {
 		case SRV_CHECKSUM_ALGORITHM_CRC32:
 		case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32:
@@ -871,7 +906,7 @@ fil_crypt_calculate_checksum(
 			* if new enum is added and not handled here */
 		}
 	} else {
-		checksum = page_zip_calc_checksum(dst_frame, zip_size,
+		checksum = page_zip_calc_checksum(dst_frame, page_size.physical(),
 				                          algorithm);
 	}
 
@@ -887,9 +922,8 @@ UNIV_INTERN
 bool
 fil_space_verify_crypt_checksum(
 /*============================*/
-	const byte* 	src_frame,	/*!< in: page the verify */
-	ulint		zip_size)  	/*!< in: compressed size if
-					row_format compressed */
+	const byte* 		src_frame,	/*!< in: page the verify */
+	const page_size_t&	page_size)	/*!< in: page size */
 {
 	// key version
 	uint key_version = mach_read_from_4(
@@ -924,7 +958,7 @@ fil_space_verify_crypt_checksum(
 	srv_checksum_algorithm_t save_checksum_algorithm =
 		(srv_checksum_algorithm_t)srv_checksum_algorithm;
 
-	if (zip_size == 0 &&
+	if (!page_size.is_compressed() &&
 	    (save_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_STRICT_INNODB ||
 	     save_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_INNODB)) {
 		/* handle ALGORITHM_INNODB specially,
@@ -938,7 +972,7 @@ fil_space_verify_crypt_checksum(
 	}
 
 	/* verify checksums */
-	ibool corrupted = buf_page_is_corrupted(false, src_frame, zip_size);
+	ibool corrupted = buf_page_is_corrupted(false, src_frame, page_size, false);
 
 	/** restore frame & algorithm */
 	srv_checksum_algorithm = save_checksum_algorithm;
@@ -951,11 +985,7 @@ fil_space_verify_crypt_checksum(
 			UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
 			checksum_field2);
 
-	if (!corrupted) {
-		return true;  // page was encrypted and checksum matched
-	} else {
-		return false; // page was encrypted but checksum didn't match
-	}
+	return (!corrupted);
 }
 
 /***********************************************************************/
@@ -986,9 +1016,9 @@ fil_crypt_get_key_state(
 		new_state->rotate_key_age = srv_fil_crypt_rotate_key_age;
 
 		if (new_state->key_version == ENCRYPTION_KEY_VERSION_INVALID) {
-			ib_logf(IB_LOG_LEVEL_ERROR,
-				"Used key_id %u can't be found from key file.",
-				new_state->key_id);
+			ib::error() << "Used key_id "
+				    << new_state->key_id
+				    << " can't be found from key file.";
 		}
 
 		ut_a(new_state->key_version != ENCRYPTION_KEY_VERSION_INVALID);
@@ -1133,26 +1163,30 @@ fil_crypt_start_encrypting_space(
 
 		/* 2 - get page 0 */
 		ulint offset = 0;
-		ulint zip_size = fil_space_get_zip_size(space);
-		buf_block_t* block = buf_page_get_gen(space, zip_size, offset,
+		const page_id_t page_id(space, offset);
+		bool tsfound;
+		const page_size_t page_size = fil_space_get_page_size(space, &tsfound);
+		dberr_t err = DB_SUCCESS;
+		buf_block_t* block = buf_page_get_gen(page_id, page_size,
 						      RW_X_LATCH,
 						      NULL,
 						      BUF_GET,
 						      __FILE__, __LINE__,
-						      &mtr);
+						      &mtr, &err);
 
 		if (fil_crypt_is_closing(space) ||
-		    fil_space_found_by_id(space) == NULL) {
+		    fil_space_found_by_id(space) == NULL ||
+		    err != DB_SUCCESS) {
 			mtr_commit(&mtr);
 			break;
 		}
 
 		/* 3 - compute location to store crypt data */
 		byte* frame = buf_block_get_frame(block);
-		ulint maxsize;
+		ulint maxsize = 0;
 		ut_ad(crypt_data);
 		crypt_data->page0_offset =
-			fsp_header_get_crypt_offset(zip_size, &maxsize);
+			fsp_header_get_crypt_offset(page_size, &maxsize);
 
 		/* 4 - write crypt data to page 0 */
 		fil_space_write_crypt_data_low(crypt_data,
@@ -1169,7 +1203,7 @@ fil_crypt_start_encrypting_space(
 		}
 
 		/* record lsn of update */
-		lsn_t end_lsn = mtr.end_lsn;
+		lsn_t end_lsn = mtr.commit_lsn();
 
 		/* 4 - sync tablespace before publishing crypt data */
 
@@ -1181,7 +1215,7 @@ fil_crypt_start_encrypting_space(
 		ulint n_pages = 0;
 		ulint sum_pages = 0;
 		do {
-			success = buf_flush_list(ULINT_MAX, end_lsn, &n_pages);
+			success = buf_flush_lists(ULINT_MAX, end_lsn, &n_pages);
 			buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
 			sum_pages += n_pages;
 		} while (!success &&
@@ -1279,7 +1313,7 @@ fil_crypt_space_needs_rotation(
 
 	/* Make sure that tablespace is found and it is normal tablespace */
 	if (fil_space_found_by_id(space) == NULL ||
-		fil_space_get_type(space) != FIL_TABLESPACE) {
+		fil_space_get_type(space) != FIL_TYPE_TABLESPACE) {
 		return false;
 	}
 
@@ -1702,11 +1736,15 @@ static
 bool
 fil_crypt_is_page_uninitialized(
 /*============================*/
-	const byte	*frame, 	/*!< in: Page */
-	uint		zip_size)	/*!< in: compressed size if
-					row_format compressed */
+	const byte*		frame, 		/*!< in: Page */
+	const page_size_t&	page_size)	/*!< in: page size */
 {
-	if (zip_size) {
+	if (fil_page_get_type(frame) == FIL_PAGE_TYPE_ALLOCATED) {
+		/* empty pages aren't encrypted */
+		return true;
+	}
+
+	if (page_size.is_compressed()) {
 		ulint stored_checksum = mach_read_from_4(
 			frame + FIL_PAGE_SPACE_OR_CHKSUM);
 		/* empty pages aren't encrypted */
@@ -1714,7 +1752,7 @@ fil_crypt_is_page_uninitialized(
 			return true;
 		}
 	} else {
-		ulint size = UNIV_PAGE_SIZE;
+		ulint size = page_size.logical();
 		ulint checksum_field1 = mach_read_from_4(
 			frame + FIL_PAGE_SPACE_OR_CHKSUM);
 		ulint checksum_field2 = mach_read_from_4(
@@ -1728,8 +1766,8 @@ fil_crypt_is_page_uninitialized(
 	return false;
 }
 
-#define fil_crypt_get_page_throttle(state,space,zip_size,offset,mtr,sleeptime_ms) \
-	fil_crypt_get_page_throttle_func(state, space, zip_size, offset, mtr, \
+#define fil_crypt_get_page_throttle(state,space,page_size,offset,mtr,sleeptime_ms) \
+	fil_crypt_get_page_throttle_func(state, space, page_size, offset, mtr, \
 					 sleeptime_ms, __FILE__, __LINE__)
 
 /***********************************************************************
@@ -1741,17 +1779,20 @@ fil_crypt_get_page_throttle_func(
 /*=============================*/
 	rotate_thread_t*	state,		/*!< in/out: Key rotation state */
 	ulint			space,		/*!< in: FIL space id */
-	uint 			zip_size,	/*!< in: compressed size if
-						row_format compressed */
+	const page_size_t&	page_size,	/*!< in: page size */
 	ulint 			offset,		/*!< in: page offsett */
 	mtr_t*			mtr,		/*!< in/out: minitransaction */
 	ulint*			sleeptime_ms,	/*!< out: sleep time */
 	const char*		file,		/*!< in: file name */
 	ulint 			line)		/*!< in: file line */
 {
-	buf_block_t* block = buf_page_try_get_func(space, offset, RW_X_LATCH,
-						   true,
-						   file, line, mtr);
+	const page_id_t&	page_id = page_id_t(space, offset);
+	dberr_t			err = DB_SUCCESS;
+	buf_block_t*		block = NULL;
+
+	// JAN: TODO:
+	// buf_block_t*		block = buf_page_try_get_func(page_id, file, line, mtr);
+
 	if (block != NULL) {
 		/* page was in buffer pool */
 		state->crypt_stat.pages_read_from_cache++;
@@ -1768,12 +1809,12 @@ fil_crypt_get_page_throttle_func(
 
 	state->crypt_stat.pages_read_from_disk++;
 
-	ullint start = ut_time_us(NULL);
-	block = buf_page_get_gen(space, zip_size, offset,
+	uintmax_t start = ut_time_us(NULL);
+	block = buf_page_get_gen(page_id, page_size,
 				 RW_X_LATCH,
 				 NULL, BUF_GET_POSSIBLY_FREED,
-				 file, line, mtr);
-	ullint end = ut_time_us(NULL);
+				 file, line, mtr, &err);
+	uintmax_t end = ut_time_us(NULL);
 
 	if (end < start) {
 		end = start; // safety...
@@ -1812,8 +1853,7 @@ btr_scrub_get_block_and_allocation_status(
 /*======================================*/
 	rotate_thread_t*	state,		/*!< in/out: Key rotation state */
 	ulint			space,		/*!< in: FIL space id */
-	uint 			zip_size,	/*!< in: compressed size if
-						row_format compressed */
+	const page_size_t&	page_size,	/*!< in: page size */
 	ulint 			offset,		/*!< in: page offsett */
 	mtr_t*			mtr,		/*!< in/out: minitransaction
 						*/
@@ -1832,7 +1872,7 @@ btr_scrub_get_block_and_allocation_status(
 		/* this is easy case, we lock fil_space_latch first and
 		then block */
 		block = fil_crypt_get_page_throttle(state,
-						    space, zip_size,
+						    space, page_size,
 						    offset, mtr,
 						    sleeptime_ms);
 		mtr_commit(&local_mtr);
@@ -1849,7 +1889,7 @@ btr_scrub_get_block_and_allocation_status(
 		*/
 
 		block = fil_crypt_get_page_throttle(state,
-						    space, zip_size,
+						    space, page_size,
 						    offset, mtr,
 						    sleeptime_ms);
 	}
@@ -1869,7 +1909,8 @@ fil_crypt_rotate_page(
 {
 	ulint space = state->space;
 	ulint offset = state->offset;
-	const uint zip_size = fil_space_get_zip_size(space);
+	bool tsfound;
+	const page_size_t page_size = fil_space_get_page_size(space, &tsfound);
 	ulint sleeptime_ms = 0;
 
 	/* check if tablespace is closing before reading page */
@@ -1885,7 +1926,7 @@ fil_crypt_rotate_page(
 	mtr_t mtr;
 	mtr_start(&mtr);
 	buf_block_t* block = fil_crypt_get_page_throttle(state,
-							 space, zip_size,
+							 space, page_size,
 							 offset, &mtr,
 							 &sleeptime_ms);
 
@@ -1902,7 +1943,7 @@ fil_crypt_rotate_page(
 			fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space);
 
 			if (kv == 0 &&
-				fil_crypt_is_page_uninitialized(frame, zip_size)) {
+				fil_crypt_is_page_uninitialized(frame, page_size)) {
 				;
 			} else if (fil_crypt_needs_rotation(
 					crypt_data->encryption,
@@ -1943,7 +1984,7 @@ fil_crypt_rotate_page(
 		}
 
 		mtr_commit(&mtr);
-		lsn_t end_lsn = mtr.end_lsn;
+		lsn_t end_lsn = mtr.commit_lsn();
 
 		if (needs_scrubbing == BTR_SCRUB_PAGE) {
 			mtr_start(&mtr);
@@ -1951,8 +1992,9 @@ fil_crypt_rotate_page(
 			* refetch page and allocation status
 			*/
 			btr_scrub_page_allocation_status_t allocated;
+
 			block = btr_scrub_get_block_and_allocation_status(
-				state, space, zip_size, offset, &mtr,
+				state, space, page_size, offset, &mtr,
 				&allocated,
 				&sleeptime_ms);
 
@@ -1966,7 +2008,7 @@ fil_crypt_rotate_page(
 					/* we need to refetch it once more now that we have
 					* index locked */
 					block = btr_scrub_get_block_and_allocation_status(
-						state, space, zip_size, offset, &mtr,
+						state, space, page_size, offset, &mtr,
 						&allocated,
 						&sleeptime_ms);
 
@@ -2073,15 +2115,15 @@ fil_crypt_flush_space(
 		bool success = false;
 		ulint n_pages = 0;
 		ulint sum_pages = 0;
-		ullint start = ut_time_us(NULL);
+		uintmax_t start = ut_time_us(NULL);
 
 		do {
-			success = buf_flush_list(ULINT_MAX, end_lsn, &n_pages);
+			success = buf_flush_lists(ULINT_MAX, end_lsn, &n_pages);
 			buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
 			sum_pages += n_pages;
 		} while (!success && !fil_crypt_is_closing(space));
 
-		ullint end = ut_time_us(NULL);
+		uintmax_t end = ut_time_us(NULL);
 
 		if (sum_pages && end > start) {
 			state->cnt_waited += sum_pages;
@@ -2101,18 +2143,27 @@ fil_crypt_flush_space(
 		mtr_t mtr;
 		mtr_start(&mtr);
 		ulint offset = 0; // page 0
-		const uint zip_size = fil_space_get_zip_size(space);
-		buf_block_t* block = buf_page_get_gen(space, zip_size, offset,
-						      RW_X_LATCH, NULL, BUF_GET,
-						      __FILE__, __LINE__, &mtr);
-		byte* frame = buf_block_get_frame(block);
-		ulint maxsize;
-		crypt_data->page0_offset =
-			fsp_header_get_crypt_offset(zip_size, &maxsize);
+		const page_id_t page_id(space, offset);
+		bool tsfound;
+		const page_size_t page_size = fil_space_get_page_size(space, &tsfound);
+		dberr_t err = DB_SUCCESS;
+
+		buf_block_t* block = buf_page_get_gen(page_id, page_size,
+						      RW_X_LATCH, NULL, BUF_GET,
+						      __FILE__, __LINE__, &mtr, &err);
+
+		if (block && err == DB_SUCCESS) {
+			byte* frame = buf_block_get_frame(block);
+			ulint maxsize=0;
+
+			crypt_data->page0_offset =
+				fsp_header_get_crypt_offset(page_size, &maxsize);
+
+			fil_space_write_crypt_data(space, frame,
+						crypt_data->page0_offset,
+						ULINT_MAX, &mtr);
+		}
 
-		fil_space_write_crypt_data(space, frame,
-					   crypt_data->page0_offset,
-					   ULINT_MAX, &mtr);
 		mtr_commit(&mtr);
 	}
 }
@@ -2302,7 +2353,7 @@ DECLARE_THREAD(fil_crypt_thread)(
 	/* We count the number of threads in os_thread_exit(). A created
 	thread should always use that to exit and not use return() to exit. */
 
-	os_thread_exit(NULL);
+	os_thread_exit();
 
 	OS_THREAD_DUMMY_RETURN;
 }
@@ -2325,9 +2376,10 @@ fil_crypt_set_thread_cnt(
 		for (uint i = 0; i < add; i++) {
 			os_thread_id_t rotation_thread_id;
 			os_thread_create(fil_crypt_thread, NULL, &rotation_thread_id);
-			ib_logf(IB_LOG_LEVEL_INFO,
-				"Creating #%d thread id %lu total threads %u.",
-				i+1, os_thread_pf(rotation_thread_id), new_cnt);
+			ib::info() << "Creating "
+				   << i+1 << " encryption thread id "
+				   << os_thread_pf(rotation_thread_id)
+				   << " total threads " << new_cnt << ".";
 		}
 	} else if (new_cnt < srv_n_fil_crypt_threads) {
 		srv_n_fil_crypt_threads = new_cnt;
@@ -2383,12 +2435,11 @@ void
 fil_crypt_threads_init()
 /*====================*/
 {
-	ut_ad(mutex_own(&fil_system->mutex));
 	if (!fil_crypt_threads_inited) {
-		fil_crypt_event = os_event_create();
-		fil_crypt_threads_event = os_event_create();
-		mutex_create(fil_crypt_threads_mutex_key,
-			&fil_crypt_threads_mutex, SYNC_NO_ORDER_CHECK);
+		fil_crypt_event = os_event_create(0);
+		fil_crypt_threads_event = os_event_create(0);
+		mutex_create(LATCH_ID_FIL_CRYPT_THREADS_MUTEX,
+		     &fil_crypt_threads_mutex);
 
 		uint cnt = srv_n_fil_crypt_threads;
 		srv_n_fil_crypt_threads = 0;
@@ -2415,8 +2466,8 @@ void
 fil_crypt_threads_cleanup()
 /*=======================*/
 {
-	os_event_free(fil_crypt_event);
-	os_event_free(fil_crypt_threads_event);
+	os_event_destroy(fil_crypt_event);
+	os_event_destroy(fil_crypt_threads_event);
 	fil_crypt_threads_inited = false;
 }
 
@@ -2494,9 +2545,10 @@ fil_space_crypt_close_tablespace(
 		uint now = time(0);
 
 		if (now >= last + 30) {
-			ib_logf(IB_LOG_LEVEL_WARN,
-				"Waited %u seconds to drop space: %lu.",
-				now - start, space);
+			ib::warn() << "Waited "
+				   << now - start
+				   << " seconds to drop space: "
+				   << space << ".";
 			last = now;
 		}
 	}
@@ -2581,8 +2633,10 @@ fil_space_get_scrub_status(
 	memset(status, 0, sizeof(*status));
 
 	if (crypt_data != NULL) {
+		bool tsfound;
+		const page_size_t page_size = fil_space_get_page_size(id, &tsfound);
 		status->space = id;
-		status->compressed = fil_space_get_zip_size(id) > 0;
+		status->compressed = page_size.is_compressed();
 		mutex_enter(&crypt_data->mutex);
 		status->last_scrub_completed =
 			crypt_data->rotate_state.scrubbing.last_scrub_completed;
diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc
index b92ac02da10..55c51f1be05 100644
--- a/storage/innobase/fil/fil0fil.cc
+++ b/storage/innobase/fil/fil0fil.cc
@@ -24,50 +24,55 @@ The tablespace memory cache
 Created 10/25/1995 Heikki Tuuri
 *******************************************************/
 
-#include "fil0fil.h"
+#include "ha_prototypes.h"
 #include "fil0pagecompress.h"
 #include "fsp0pagecompress.h"
 #include "fil0crypt.h"
 
-#include <debug_sync.h>
-#include <my_dbug.h>
-
-#include "mem0mem.h"
-#include "hash0hash.h"
-#include "os0file.h"
-#include "mach0data.h"
-#include "buf0buf.h"
-#include "buf0flu.h"
-#include "log0recv.h"
-#include "fsp0fsp.h"
-#include "srv0srv.h"
-#include "srv0start.h"
-#include "mtr0mtr.h"
-#include "mtr0log.h"
-#include "dict0dict.h"
-#include "page0page.h"
-#include "page0zip.h"
-#include "trx0sys.h"
-#include "row0mysql.h"
-#include "os0file.h"
 #ifndef UNIV_HOTBACKUP
+#include "btr0btr.h"
+#include "buf0buf.h"
+#include "dict0boot.h"
+#include "dict0dict.h"
+#include "fsp0file.h"
+#include "fsp0file.h"
+#include "fsp0fsp.h"
+#include "fsp0space.h"
+#include "fsp0sysspace.h"
+#include "hash0hash.h"
+#include "log0recv.h"
+#include "mach0data.h"
+#include "mem0mem.h"
+#include "mtr0log.h"
+#include "os0file.h"
+#include "page0zip.h"
+#include "row0mysql.h"
+#include "row0trunc.h"
+#include "srv0start.h"
+#include "trx0purge.h"
+#include "ut0new.h"
 # include "buf0lru.h"
 # include "ibuf0ibuf.h"
+# include "os0event.h"
 # include "sync0sync.h"
-# include "os0sync.h"
-#else /* !UNIV_HOTBACKUP */
-# include "srv0srv.h"
-static ulint srv_data_read, srv_data_written;
 #endif /* !UNIV_HOTBACKUP */
-#include "zlib.h"
-#ifdef __linux__
-#include <linux/fs.h>
-#include <sys/ioctl.h>
-#include <fcntl.h>
-#endif
-#include "row0mysql.h"
+#include "buf0flu.h"
+#include "srv0start.h"
+#include "trx0purge.h"
+#include "ut0new.h"
 
-MYSQL_PLUGIN_IMPORT extern my_bool lower_case_file_system;
+/** Tries to close a file in the LRU list. The caller must hold the fil_sys
+mutex.
+@return true if success, false if should retry later; since i/o's
+generally complete in < 100 ms, and as InnoDB writes at most 128 pages
+from the buffer pool in a batch, and then immediately flushes the
+files, there is a good chance that the next time we find a suitable
+node from the LRU list.
+@param[in] print_info	if true, prints information why it
+                        cannot close a file */
+static
+bool
+fil_try_to_close_file_in_LRU(bool print_info);
 
 /*
 		IMPLEMENTATION OF THE TABLESPACE MEMORY CACHE
@@ -125,51 +130,63 @@ out of the LRU-list and keep a count of pending operations. When an operation
 completes, we decrement the count and return the file node to the LRU-list if
 the count drops to zero. */
 
-/** When mysqld is run, the default directory "." is the mysqld datadir,
-but in the MySQL Embedded Server Library and mysqlbackup it is not the default
-directory, and we must set the base file path explicitly */
-UNIV_INTERN const char*	fil_path_to_mysql_datadir	= ".";
+/** This tablespace name is used internally during recovery to open a
+general tablespace before the data dictionary are recovered and available. */
+const char general_space_name[] = "innodb_general";
+
+/** Reference to the server data directory. Usually it is the
+current working directory ".", but in the MySQL Embedded Server Library
+it is an absolute path. */
+const char*	fil_path_to_mysql_datadir;
+Folder		folder_mysql_datadir;
+
+/** Common InnoDB file extentions */
+const char* dot_ext[] = { "", ".ibd", ".isl", ".cfg", ".cfp" };
 
 /** The number of fsyncs done to the log */
-UNIV_INTERN ulint	fil_n_log_flushes			= 0;
+ulint	fil_n_log_flushes			= 0;
 
 /** Number of pending redo log flushes */
-UNIV_INTERN ulint	fil_n_pending_log_flushes		= 0;
+ulint	fil_n_pending_log_flushes		= 0;
 /** Number of pending tablespace flushes */
-UNIV_INTERN ulint	fil_n_pending_tablespace_flushes	= 0;
+ulint	fil_n_pending_tablespace_flushes	= 0;
 
 /** Number of files currently open */
-UNIV_INTERN ulint	fil_n_file_opened			= 0;
+ulint	fil_n_file_opened			= 0;
 
 /** The null file address */
-UNIV_INTERN fil_addr_t	fil_addr_null = {FIL_NULL, 0};
-
-#ifdef UNIV_PFS_MUTEX
-/* Key to register fil_system_mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t	fil_system_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
-
-#ifdef UNIV_PFS_RWLOCK
-/* Key to register file space latch with performance schema */
-UNIV_INTERN mysql_pfs_key_t	fil_space_latch_key;
-#endif /* UNIV_PFS_RWLOCK */
+fil_addr_t	fil_addr_null = {FIL_NULL, 0};
 
 /** The tablespace memory cache. This variable is NULL before the module is
 initialized. */
 fil_system_t*	fil_system	= NULL;
 
-/** Determine if (i) is a user tablespace id or not. */
-# define fil_is_user_tablespace_id(i) ((i) > srv_undo_tablespaces_open)
+#ifdef UNIV_HOTBACKUP
+static ulint	srv_data_read;
+static ulint	srv_data_written;
+#endif /* UNIV_HOTBACKUP */
 
 /** Determine if user has explicitly disabled fsync(). */
-#ifndef __WIN__
+#ifndef _WIN32
 # define fil_buffering_disabled(s)	\
-	((s)->purpose == FIL_TABLESPACE	\
+	((s)->purpose == FIL_TYPE_TABLESPACE	\
 	 && srv_unix_file_flush_method	\
 	 == SRV_UNIX_O_DIRECT_NO_FSYNC)
-#else /* __WIN__ */
+#else /* _WIN32 */
 # define fil_buffering_disabled(s)	(0)
-#endif /* __WIN__ */
+#endif /* __WIN32 */
+
+/** Determine if the space id is a user tablespace id or not.
+@param[in]	space_id	Space ID to check
+@return true if it is a user tablespace ID */
+UNIV_INLINE
+bool
+fil_is_user_tablespace_id(
+	ulint	space_id)
+{
+	return(space_id > srv_undo_tablespaces_open
+	       && space_id != srv_tmp_space.space_id());
+}
 
 #ifdef UNIV_DEBUG
 /** Try fil_validate() every this many times */
@@ -177,9 +194,9 @@ fil_system_t*	fil_system	= NULL;
 
 /******************************************************************//**
 Checks the consistency of the tablespace cache some of the time.
-@return	TRUE if ok or the check was skipped */
+@return true if ok or the check was skipped */
 static
-ibool
+bool
 fil_validate_skip(void)
 /*===================*/
 {
@@ -192,7 +209,7 @@ fil_validate_skip(void)
 	reduce the call frequency of the costly fil_validate() check
 	in debug builds. */
 	if (--fil_validate_count > 0) {
-		return(TRUE);
+		return(true);
 	}
 
 	fil_validate_count = FIL_VALIDATE_SKIP;
@@ -202,15 +219,24 @@ fil_validate_skip(void)
 
 /********************************************************************//**
 Determines if a file node belongs to the least-recently-used list.
-@return TRUE if the file belongs to fil_system->LRU mutex. */
+@return true if the file belongs to fil_system->LRU mutex. */
 UNIV_INLINE
-ibool
+bool
 fil_space_belongs_in_lru(
 /*=====================*/
 	const fil_space_t*	space)	/*!< in: file space */
 {
-	return(space->purpose == FIL_TABLESPACE
-	       && fil_is_user_tablespace_id(space->id));
+	switch (space->purpose) {
+	case FIL_TYPE_LOG:
+		return(false);
+	case FIL_TYPE_TABLESPACE:
+	case FIL_TYPE_TEMPORARY:
+	case FIL_TYPE_IMPORT:
+		return(fil_is_user_tablespace_id(space->id));
+	}
+
+	ut_ad(0);
+	return(false);
 }
 
 /********************************************************************//**
@@ -228,98 +254,72 @@ fil_node_prepare_for_io(
 	fil_node_t*	node,	/*!< in: file node */
 	fil_system_t*	system,	/*!< in: tablespace memory cache */
 	fil_space_t*	space);	/*!< in: space */
-/********************************************************************//**
+
+/**
 Updates the data structures when an i/o operation finishes. Updates the
-pending i/o's field in the node appropriately. */
+pending i/o's field in the node appropriately.
+@param[in,out] node		file node
+@param[in,out] system		tablespace instance
+@param[in] type			IO context */
 static
 void
 fil_node_complete_io(
-/*=================*/
-	fil_node_t*	node,	/*!< in: file node */
-	fil_system_t*	system,	/*!< in: tablespace memory cache */
-	ulint		type);	/*!< in: OS_FILE_WRITE or OS_FILE_READ; marks
-				the node as modified if
-				type == OS_FILE_WRITE */
-/*******************************************************************//**
-Frees a space object from the tablespace memory cache. Closes the files in
-the chain but does not delete them. There must not be any pending i/o's or
-flushes on the files.
-@return TRUE on success */
-static
-ibool
-fil_space_free(
-/*===========*/
-	ulint		id,		/* in: space id */
-	ibool		x_latched);	/* in: TRUE if caller has space->latch
-					in X mode */
-/********************************************************************//**
-Reads data from a space to a buffer. Remember that the possible incomplete
+	fil_node_t*		node,
+	fil_system_t*		system,
+	const IORequest&	type);
+
+/** Reads data from a space to a buffer. Remember that the possible incomplete
 blocks at the end of file are ignored: they are not taken into account when
 calculating the byte offset within a space.
+@param[in]	page_id		page id
+@param[in]	page_size	page size
+@param[in]	byte_offset	remainder of offset in bytes; in aio this
+must be divisible by the OS block size
+@param[in]	len		how many bytes to read; this must not cross a
+file boundary; in aio this must be a block size multiple
+@param[in,out]	buf		buffer where to store data read; in aio this
+must be appropriately aligned
 @return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
 i/o on a tablespace which does not exist */
 UNIV_INLINE
 dberr_t
 fil_read(
-/*=====*/
-	bool	sync,		/*!< in: true if synchronous aio is desired */
-	ulint	space_id,	/*!< in: space id */
-	ulint	zip_size,	/*!< in: compressed page size in bytes;
-				0 for uncompressed pages */
-	ulint	block_offset,	/*!< in: offset in number of blocks */
-	ulint	byte_offset,	/*!< in: remainder of offset in bytes; in aio
-				this must be divisible by the OS block size */
-	ulint	len,		/*!< in: how many bytes to read; this must not
-				cross a file boundary; in aio this must be a
-				block size multiple */
-	void*	buf,		/*!< in/out: buffer where to store data read;
-				in aio this must be appropriately aligned */
-	void*	message,	/*!< in: message for aio handler if non-sync
-				aio used, else ignored */
-	ulint*	write_size)	/*!< in/out: Actual write size initialized
-				after fist successfull trim
-				operation for this page and if
-				initialized we do not trim again if
-				actual page size does not decrease. */
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	ulint			byte_offset,
+	ulint			len,
+	void*			buf)
 {
-	return(fil_io(OS_FILE_READ, sync, space_id, zip_size, block_offset,
-			byte_offset, len, buf, message, write_size));
+	return(fil_io(IORequestRead, true, page_id, page_size,
+			byte_offset, len, buf, NULL, NULL));
 }
 
-/********************************************************************//**
-Writes data to a space from a buffer. Remember that the possible incomplete
+/** Writes data to a space from a buffer. Remember that the possible incomplete
 blocks at the end of file are ignored: they are not taken into account when
 calculating the byte offset within a space.
+@param[in]	page_id		page id
+@param[in]	page_size	page size
+@param[in]	byte_offset	remainder of offset in bytes; in aio this
+must be divisible by the OS block size
+@param[in]	len		how many bytes to write; this must not cross
+a file boundary; in aio this must be a block size multiple
+@param[in]	buf		buffer from which to write; in aio this must
+be appropriately aligned
 @return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
 i/o on a tablespace which does not exist */
 UNIV_INLINE
 dberr_t
 fil_write(
-/*======*/
-	bool	sync,		/*!< in: true if synchronous aio is desired */
-	ulint	space_id,	/*!< in: space id */
-	ulint	zip_size,	/*!< in: compressed page size in bytes;
-				0 for uncompressed pages */
-	ulint	block_offset,	/*!< in: offset in number of blocks */
-	ulint	byte_offset,	/*!< in: remainder of offset in bytes; in aio
-				this must be divisible by the OS block size */
-	ulint	len,		/*!< in: how many bytes to write; this must
-				not cross a file boundary; in aio this must
-				be a block size multiple */
-	void*	buf,		/*!< in: buffer from which to write; in aio
-				this must be appropriately aligned */
-	void*	message,	/*!< in: message for aio handler if non-sync
-				aio used, else ignored */
-	ulint*	write_size)	/*!< in/out: Actual write size initialized
-				after fist successfull trim
-				operation for this page and if
-				initialized we do not trim again if
-				actual page size does not decrease. */
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	ulint			byte_offset,
+	ulint			len,
+	void*			buf)
 {
 	ut_ad(!srv_read_only_mode);
 
-	return(fil_io(OS_FILE_WRITE, sync, space_id, zip_size, block_offset,
-			byte_offset, len, buf, message, write_size));
+	return(fil_io(IORequestWrite, true, page_id, page_size,
+		      byte_offset, len, buf, NULL, NULL));
 }
 
 /*******************************************************************//**
@@ -341,39 +341,6 @@ fil_space_get_by_id(
 	return(space);
 }
 
-/*******************************************************************//**
-Returns the table space by a given id, NULL if not found. */
-fil_space_t*
-fil_space_found_by_id(
-/*==================*/
-	ulint	id)	/*!< in: space id */
-{
-	fil_space_t* space = NULL;
-	mutex_enter(&fil_system->mutex);
-	space = fil_space_get_by_id(id);
-
-	/* Not found if space is being deleted */
-	if (space && space->stop_new_ops) {
-		space = NULL;
-	}
-
-	mutex_exit(&fil_system->mutex);
-	return space;
-}
-
-/****************************************************************//**
-Get space id from fil node */
-ulint
-fil_node_get_space_id(
-/*==================*/
-        fil_node_t*     node)           /*!< in: Compressed node*/
-{
-	ut_ad(node);
-	ut_ad(node->space);
-
-	return (node->space->id);
-}
-
 /*******************************************************************//**
 Returns the table space by a given name, NULL if not found. */
 UNIV_INLINE
@@ -397,44 +364,35 @@ fil_space_get_by_name(
 	return(space);
 }
 
-#ifndef UNIV_HOTBACKUP
-/*******************************************************************//**
-Returns the version number of a tablespace, -1 if not found.
-@return version number, -1 if the tablespace does not exist in the
-memory cache */
-UNIV_INTERN
-ib_int64_t
-fil_space_get_version(
-/*==================*/
-	ulint	id)	/*!< in: space id */
+/** Look up a tablespace.
+The caller should hold an InnoDB table lock or a MDL that prevents
+the tablespace from being dropped during the operation,
+or the caller should be in single-threaded crash recovery mode
+(no user connections that could drop tablespaces).
+If this is not the case, fil_space_acquire() and fil_space_release()
+should be used instead.
+@param[in]	id	tablespace ID
+@return tablespace, or NULL if not found */
+fil_space_t*
+fil_space_get(
+	ulint	id)
 {
-	fil_space_t*	space;
-	ib_int64_t	version		= -1;
-
-	ut_ad(fil_system);
-
 	mutex_enter(&fil_system->mutex);
-
-	space = fil_space_get_by_id(id);
-
-	if (space) {
-		version = space->tablespace_version;
-	}
-
+	fil_space_t*	space = fil_space_get_by_id(id);
 	mutex_exit(&fil_system->mutex);
-
-	return(version);
+	ut_ad(space == NULL || space->purpose != FIL_TYPE_LOG);
+	return(space);
 }
 
-/*******************************************************************//**
-Returns the latch of a file space.
-@return	latch protecting storage allocation */
-UNIV_INTERN
+#ifndef UNIV_HOTBACKUP
+/** Returns the latch of a file space.
+@param[in]	id	space id
+@param[out]	flags	tablespace flags
+@return latch protecting storage allocation */
 rw_lock_t*
 fil_space_get_latch(
-/*================*/
-	ulint	id,	/*!< in: space id */
-	ulint*	flags)	/*!< out: tablespace flags */
+	ulint	id,
+	ulint*	flags)
 {
 	fil_space_t*	space;
 
@@ -455,17 +413,14 @@ fil_space_get_latch(
 	return(&(space->latch));
 }
 
-/*******************************************************************//**
-Returns the type of a file space.
-@return	ULINT_UNDEFINED, or FIL_TABLESPACE or FIL_LOG */
-UNIV_INTERN
-ulint
+/** Gets the type of a file space.
+@param[in]	id	tablespace identifier
+@return file type */
+fil_type_t
 fil_space_get_type(
-/*===============*/
-	ulint	id)	/*!< in: space id */
+	ulint	id)
 {
 	fil_space_t*	space;
-	ulint type = ULINT_UNDEFINED;
 
 	ut_ad(fil_system);
 
@@ -473,170 +428,276 @@ fil_space_get_type(
 
 	space = fil_space_get_by_id(id);
 
+	ut_a(space);
+
 	mutex_exit(&fil_system->mutex);
 
-	if (space) {
-		type = space->purpose;
-	}
+	return(space->purpose);
+}
 
-	return(type);
+/** Note that a tablespace has been imported.
+It is initially marked as FIL_TYPE_IMPORT so that no logging is
+done during the import process when the space ID is stamped to each page.
+Now we change it to FIL_SPACE_TABLESPACE to start redo and undo logging.
+NOTE: temporary tablespaces are never imported.
+@param[in]	id	tablespace identifier */
+void
+fil_space_set_imported(
+	ulint	id)
+{
+	ut_ad(fil_system != NULL);
+
+	mutex_enter(&fil_system->mutex);
+
+	fil_space_t*	space = fil_space_get_by_id(id);
+
+	ut_ad(space->purpose == FIL_TYPE_IMPORT);
+	space->purpose = FIL_TYPE_TABLESPACE;
+
+	mutex_exit(&fil_system->mutex);
 }
 #endif /* !UNIV_HOTBACKUP */
 
 /**********************************************************************//**
 Checks if all the file nodes in a space are flushed. The caller must hold
 the fil_system mutex.
-@return	true if all are flushed */
+@return true if all are flushed */
 static
 bool
 fil_space_is_flushed(
 /*=================*/
 	fil_space_t*	space)	/*!< in: space */
 {
-	fil_node_t*	node;
-
 	ut_ad(mutex_own(&fil_system->mutex));
 
-	node = UT_LIST_GET_FIRST(space->chain);
+	for (const fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
+	     node != NULL;
+	     node = UT_LIST_GET_NEXT(chain, node)) {
 
-	while (node) {
 		if (node->modification_counter > node->flush_counter) {
 
 			ut_ad(!fil_buffering_disabled(space));
 			return(false);
 		}
-
-		node = UT_LIST_GET_NEXT(chain, node);
 	}
 
 	return(true);
 }
 
-/*******************************************************************//**
-Appends a new file to the chain of files of a space. File must be closed.
-@return pointer to the file name, or NULL on error */
-UNIV_INTERN
-char*
-fil_node_create(
-/*============*/
-	const char*	name,	/*!< in: file name (file must be closed) */
-	ulint		size,	/*!< in: file size in database blocks, rounded
-				downwards to an integer */
-	ulint		id,	/*!< in: space id where to append */
-	ibool		is_raw)	/*!< in: TRUE if a raw device or
-				a raw disk partition */
+#if !defined(NO_FALLOCATE) && defined(UNIV_LINUX)
+
+#include <sys/ioctl.h>
+/** FusionIO atomic write control info */
+#define DFS_IOCTL_ATOMIC_WRITE_SET	_IOW(0x95, 2, uint)
+
+/**
+Try and enable FusionIO atomic writes.
+@param[in] file		OS file handle
+@return true if successful */
+bool
+fil_fusionio_enable_atomic_write(os_file_t file)
+{
+	if (srv_unix_file_flush_method == SRV_UNIX_O_DIRECT) {
+
+		uint	atomic = 1;
+
+		ut_a(file != -1);
+
+		if (ioctl(file, DFS_IOCTL_ATOMIC_WRITE_SET, &atomic) != -1) {
+
+			return(true);
+		}
+	}
+
+	return(false);
+}
+#endif /* !NO_FALLOCATE && UNIV_LINUX */
+
+/** Append a file to the chain of files of a space.
+@param[in]	name		file name of a file that is not open
+@param[in]	size		file size in entire database blocks
+@param[in,out]	space		tablespace from fil_space_create()
+@param[in]	is_raw		whether this is a raw device or partition
+@param[in]	punch_hole	true if supported for this node
+@param[in]	atomic_write	true if the file has atomic write enabled
+@param[in]	max_pages	maximum number of pages in file,
+ULINT_MAX means the file size is unlimited.
+@return pointer to the file name
+@retval NULL if error */
+static
+fil_node_t*
+fil_node_create_low(
+	const char*	name,
+	ulint		size,
+	fil_space_t*	space,
+	bool		is_raw,
+	bool		punch_hole,
+	bool		atomic_write,
+	ulint		max_pages = ULINT_MAX)
 {
 	fil_node_t*	node;
-	fil_space_t*	space;
 
-	ut_a(fil_system);
-	ut_a(name);
+	ut_ad(name != NULL);
+	ut_ad(fil_system != NULL);
 
-	mutex_enter(&fil_system->mutex);
+	if (space == NULL) {
+		return(NULL);
+	}
 
-	node = static_cast<fil_node_t*>(mem_zalloc(sizeof(fil_node_t)));
+	node = reinterpret_cast<fil_node_t*>(ut_zalloc_nokey(sizeof(*node)));
 
 	node->name = mem_strdup(name);
 
 	ut_a(!is_raw || srv_start_raw_disk_in_use);
 
-	node->sync_event = os_event_create();
+	node->sync_event = os_event_create("fsync_event");
+
 	node->is_raw_disk = is_raw;
+
 	node->size = size;
+
 	node->magic_n = FIL_NODE_MAGIC_N;
 
-	space = fil_space_get_by_id(id);
+	node->init_size = size;
+	node->max_size = max_pages;
 
-	if (!space) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			"  InnoDB: Error: Could not find tablespace %lu for\n"
-			"InnoDB: file ", (ulong) id);
-		ut_print_filename(stderr, name);
-		fputs(" in the tablespace memory cache.\n", stderr);
-		mem_free(node->name);
-
-		mem_free(node);
-
-		mutex_exit(&fil_system->mutex);
-
-		return(NULL);
-	}
+	mutex_enter(&fil_system->mutex);
 
 	space->size += size;
 
 	node->space = space;
 
-	UT_LIST_ADD_LAST(chain, space->chain, node);
+	os_file_stat_t	stat_info;
 
-	if (id < SRV_LOG_SPACE_FIRST_ID && fil_system->max_assigned_id < id) {
+#ifdef UNIV_DEBUG
+	dberr_t err =
+#endif /* UNIV_DEBUG */
 
-		fil_system->max_assigned_id = id;
+	os_file_get_status(
+		node->name, &stat_info, false,
+		fsp_is_system_temporary(space->id) ? true : srv_read_only_mode);
+
+	ut_ad(err == DB_SUCCESS);
+
+	node->block_size = stat_info.block_size;
+
+	/* In this debugging mode, we can overcome the limitation of some
+	OSes like Windows that support Punch Hole but have a hole size
+	effectively too large.  By setting the block size to be half the
+	page size, we can bypass one of the checks that would normally
+	turn Page Compression off.  This execution mode allows compression
+	to be tested even when full punch hole support is not available. */
+	DBUG_EXECUTE_IF("ignore_punch_hole",
+		node->block_size = ut_min(stat_info.block_size,
+					  static_cast<size_t>(UNIV_PAGE_SIZE / 2));
+	);
+
+	if (!IORequest::is_punch_hole_supported()
+	    || !punch_hole
+	    || node->block_size >= srv_page_size) {
+
+		fil_no_punch_hole(node);
+	} else {
+		node->punch_hole = punch_hole;
 	}
 
+	node->atomic_write = atomic_write;
+
+	UT_LIST_ADD_LAST(space->chain, node);
 	mutex_exit(&fil_system->mutex);
 
-	return(node->name);
+	return(node);
 }
 
-/********************************************************************//**
-Opens a file of a node of a tablespace. The caller must own the fil_system
-mutex.
+/** Appends a new file to the chain of files of a space. File must be closed.
+@param[in]	name		file name (file must be closed)
+@param[in]	size		file size in database blocks, rounded downwards to
+				an integer
+@param[in,out]	space		space where to append
+@param[in]	is_raw		true if a raw device or a raw disk partition
+@param[in]	atomic_write	true if the file has atomic write enabled
+@param[in]	max_pages	maximum number of pages in file,
+ULINT_MAX means the file size is unlimited.
+@return pointer to the file name
+@retval NULL if error */
+char*
+fil_node_create(
+	const char*	name,
+	ulint		size,
+	fil_space_t*	space,
+	bool		is_raw,
+	bool		atomic_write,
+	ulint		max_pages)
+{
+	fil_node_t*	node;
+
+	node = fil_node_create_low(
+		name, size, space, is_raw, IORequest::is_punch_hole_supported(),
+		atomic_write, max_pages);
+
+	return(node == NULL ? NULL : node->name);
+}
+
+/** Open a file node of a tablespace.
+The caller must own the fil_system mutex.
+@param[in,out]	node	File node
 @return false if the file can't be opened, otherwise true */
 static
 bool
 fil_node_open_file(
-/*===============*/
-	fil_node_t*	node,	/*!< in: file node */
-	fil_system_t*	system,	/*!< in: tablespace memory cache */
-	fil_space_t*	space)	/*!< in: space */
+	fil_node_t*	node)
 {
 	os_offset_t	size_bytes;
-	ibool		ret;
-	ibool		success;
+	bool		success;
 	byte*		buf2;
 	byte*		page;
+	ulint		flags;
+	ulint		min_size;
 	ulint		space_id;
-	ulint		flags=0;
-	ulint		page_size;
-	ulint           atomic_writes=0;
+	bool		read_only_mode;
+	fil_space_t*	space = node->space;
 
-	ut_ad(mutex_own(&(system->mutex)));
+	ut_ad(mutex_own(&fil_system->mutex));
 	ut_a(node->n_pending == 0);
-	ut_a(node->open == FALSE);
+	ut_a(!node->is_open);
 
-	if (node->size == 0) {
-		/* It must be a single-table tablespace and we do not know the
-		size of the file yet. First we open the file in the normal
-		mode, no async I/O here, for simplicity. Then do some checks,
-		and close the file again.
-		NOTE that we could not use the simple file read function
-		os_file_read() in Windows to read from a file opened for
-		async I/O! */
+	read_only_mode = !fsp_is_system_temporary(space->id)
+		&& srv_read_only_mode;
 
+	if (node->size == 0
+	    || (space->purpose == FIL_TYPE_TABLESPACE
+		&& node == UT_LIST_GET_FIRST(space->chain)
+		&& !undo::Truncate::was_tablespace_truncated(space->id)
+		&& srv_startup_is_before_trx_rollback_phase)) {
+		/* We do not know the size of the file yet. First we
+		open the file in the normal mode, no async I/O here,
+		for simplicity. Then do some checks, and close the
+		file again.  NOTE that we could not use the simple
+		file read function os_file_read() in Windows to read
+		from a file opened for async I/O! */
+
+retry:
 		node->handle = os_file_create_simple_no_error_handling(
-			innodb_file_data_key, node->name, OS_FILE_OPEN,
-			OS_FILE_READ_ONLY, &success, 0);
+			innodb_data_file_key, node->name, OS_FILE_OPEN,
+			OS_FILE_READ_ONLY, read_only_mode, &success);
+
 		if (!success) {
 			/* The following call prints an error message */
-			os_file_get_last_error(true);
-
-			ut_print_timestamp(stderr);
-
-			ib_logf(IB_LOG_LEVEL_WARN, "InnoDB: Error: cannot "
-				"open %s\n. InnoDB: Have you deleted .ibd "
-				"files under a running mysqld server?\n",
-				node->name);
+			ulint err = os_file_get_last_error(true);
+			if (err == EMFILE + 100) {
+				if (fil_try_to_close_file_in_LRU(true))
+					goto retry;
+			}
 
+			ib::warn() << "Cannot open '" << node->name << "'."
+				" Have you deleted .ibd files under a"
+				" running mysqld server?";
 			return(false);
 		}
 
 		size_bytes = os_file_get_size(node->handle);
 		ut_a(size_bytes != (os_offset_t) -1);
 
-		node->file_block_size = os_file_get_block_size(node->handle, node->name);
-		space->file_block_size = node->file_block_size;
-
 #ifdef UNIV_HOTBACKUP
 		if (space->id == 0) {
 			node->size = (ulint) (size_bytes / UNIV_PAGE_SIZE);
@@ -644,79 +705,44 @@ fil_node_open_file(
 			goto add_size;
 		}
 #endif /* UNIV_HOTBACKUP */
-		ut_a(space->purpose != FIL_LOG);
-		ut_a(fil_is_user_tablespace_id(space->id));
-
-		if (size_bytes < FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) {
-			fprintf(stderr,
-				"InnoDB: Error: the size of single-table"
-				" tablespace file %s\n"
-				"InnoDB: is only " UINT64PF ","
-				" should be at least %lu!\n",
-				node->name,
-				size_bytes,
-				(ulong) (FIL_IBD_FILE_INITIAL_SIZE
-					 * UNIV_PAGE_SIZE));
-
-			ut_a(0);
-		}
+		ut_a(space->purpose != FIL_TYPE_LOG);
 
 		/* Read the first page of the tablespace */
 
-		buf2 = static_cast<byte*>(ut_malloc(2 * UNIV_PAGE_SIZE));
+		buf2 = static_cast<byte*>(ut_malloc_nokey(2 * UNIV_PAGE_SIZE));
+
 		/* Align the memory for file i/o if we might have O_DIRECT
 		set */
 		page = static_cast<byte*>(ut_align(buf2, UNIV_PAGE_SIZE));
+		ut_ad(page == page_align(page));
 
-		success = os_file_read(node->handle, page, 0, UNIV_PAGE_SIZE);
+		IORequest	request(IORequest::READ);
+
+		success = os_file_read(
+			request,
+			node->handle, page, 0, UNIV_PAGE_SIZE);
 
 		space_id = fsp_header_get_space_id(page);
 		flags = fsp_header_get_flags(page);
 
-		page_size = fsp_flags_get_page_size(flags);
-		atomic_writes = fsp_flags_get_atomic_writes(flags);
-
-
-		ut_free(buf2);
-
 		/* Close the file now that we have read the space id from it */
 
 		os_file_close(node->handle);
 
-		if (UNIV_UNLIKELY(space_id != space->id)) {
-			fprintf(stderr,
-				"InnoDB: Error: tablespace id is %lu"
-				" in the data dictionary\n"
-				"InnoDB: but in file %s it is %lu!\n",
-				space->id, node->name, space_id);
+		const page_size_t	page_size(flags);
+
+		min_size = FIL_IBD_FILE_INITIAL_SIZE * page_size.physical();
+
+		if (size_bytes < min_size) {
+
+			ib::error() << "The size of tablespace " << space_id << " file "
+				<< node->name << " is only " << size_bytes
+				<< ", should be at least " << min_size << "!";
 
 			ut_error;
 		}
 
-		if (UNIV_UNLIKELY(space_id == ULINT_UNDEFINED
-				  || space_id == 0)) {
-			fprintf(stderr,
-				"InnoDB: Error: tablespace id %lu"
-				" in file %s is not sensible\n",
-				(ulong) space_id, node->name);
-
-			ut_error;
-		}
-
-		if (UNIV_UNLIKELY(fsp_flags_get_page_size(space->flags)
-				  != page_size)) {
-			fprintf(stderr,
-				"InnoDB: Error: tablespace file %s"
-				" has page size 0x%lx\n"
-				"InnoDB: but the data dictionary"
-				" expects page size 0x%lx!\n",
-				node->name, flags,
-				fsp_flags_get_page_size(space->flags));
-
-			ut_error;
-		}
-
-		if (UNIV_UNLIKELY(space->flags != flags)) {
+		if (space->flags != flags) {
 			ulint sflags = (space->flags & ~FSP_FLAGS_MASK_DATA_DIR);
 			ulint fflags = (flags & ~FSP_FLAGS_MASK_DATA_DIR_ORACLE);
 
@@ -725,110 +751,137 @@ fil_node_open_file(
 			it. */
 
 			if (sflags == fflags) {
-				fprintf(stderr,
-					"InnoDB: Warning: Table flags 0x%lx"
-					" in the data dictionary but in file %s are 0x%lx!\n"
-					" Temporally corrected because DATA_DIR option to 0x%lx.\n",
-					space->flags, node->name, flags, space->flags);
+				ib::warn()
+					<< "Tablespace " << space_id
+					<< " flags " << space->flags
+					<< " in the data dictionary but in file " << node->name
+					<< " are " << flags
+					<< ". Temporally corrected because DATA_DIR option to "
+					<< space->flags;
 
 				flags = space->flags;
-			}
-
-			if (!dict_tf_verify_flags(space->flags, flags)) {
-				fprintf(stderr,
-					"InnoDB: Error: table flags are 0x%lx"
-					" in the data dictionary\n"
-					"InnoDB: but the flags in file %s are 0x%lx!\n",
-					space->flags, node->name, flags);
-				ut_error;
+			} else {
+				ib::fatal()
+					<< "Table flags are "
+					<< ib::hex(space->flags) << " in the data"
+					" dictionary but the flags in file "
+					<< node->name << " are " << ib::hex(flags)
+					<< "!";
 			}
 		}
 
-		if (size_bytes >= (1024*1024)) {
-			/* Truncate the size to whole extent size. */
-			size_bytes = ut_2pow_round(size_bytes, (1024*1024));
+
+		{
+			ulint	size		= fsp_header_get_field(
+				page, FSP_SIZE);
+			ulint	free_limit	= fsp_header_get_field(
+				page, FSP_FREE_LIMIT);
+			ulint	free_len	= flst_get_len(
+				FSP_HEADER_OFFSET + FSP_FREE + page);
+
+			ut_ad(space->free_limit == 0
+			      || space->free_limit == free_limit);
+			ut_ad(space->free_len == 0
+			      || space->free_len == free_len);
+			space->size_in_header = size;
+			space->free_limit = free_limit;
+			space->free_len = free_len;
 		}
 
-		if (!fsp_flags_is_compressed(flags)) {
-			node->size = (ulint) (size_bytes / UNIV_PAGE_SIZE);
-		} else {
+		ut_free(buf2);
+
+#ifdef MYSQL_ENCRYPTION
+		/* For encrypted tablespace, we need to check the
+		encrytion key and iv(initial vector) is readed. */
+		if (FSP_FLAGS_GET_ENCRYPTION(flags)
+		    && !recv_recovery_is_on()) {
+			if (space->encryption_type != Encryption::AES) {
+				ib::error()
+					<< "Can't read encryption"
+					<< " key from file "
+					<< node->name << "!";
+				return(false);
+			}
+		}
+#endif
+
+		if (node->size == 0) {
+			ulint	extent_size;
+
+			extent_size = page_size.physical() * FSP_EXTENT_SIZE;
+
+			/* After apply-incremental, tablespaces are not extended
+			to a whole megabyte. Do not cut off valid data. */
+#ifndef UNIV_HOTBACKUP
+			/* Truncate the size to a multiple of extent size. */
+			if (size_bytes >= extent_size) {
+				size_bytes = ut_2pow_round(size_bytes,
+							   extent_size);
+			}
+#endif /* !UNIV_HOTBACKUP */
 			node->size = (ulint)
-				(size_bytes
-				 / fsp_flags_get_zip_size(flags));
-		}
+				(size_bytes / page_size.physical());
 
 #ifdef UNIV_HOTBACKUP
 add_size:
 #endif /* UNIV_HOTBACKUP */
-		space->size += node->size;
+			space->size += node->size;
+		}
 	}
 
-	atomic_writes = fsp_flags_get_atomic_writes(space->flags);
-
 	/* printf("Opening file %s\n", node->name); */
 
 	/* Open the file for reading and writing, in Windows normally in the
 	unbuffered async I/O mode, though global variables may make
 	os_file_create() to fall back to the normal file I/O mode. */
 
-	if (space->purpose == FIL_LOG) {
-		node->handle = os_file_create(innodb_file_log_key,
-					      node->name, OS_FILE_OPEN,
-					      OS_FILE_AIO, OS_LOG_FILE,
-					      &ret, atomic_writes);
+	if (space->purpose == FIL_TYPE_LOG) {
+		node->handle = os_file_create(
+			innodb_log_file_key, node->name, OS_FILE_OPEN,
+			OS_FILE_AIO, OS_LOG_FILE, read_only_mode, &success);
 	} else if (node->is_raw_disk) {
-		node->handle = os_file_create(innodb_file_data_key,
-					      node->name,
-					      OS_FILE_OPEN_RAW,
-					      OS_FILE_AIO, OS_DATA_FILE,
-					      &ret, atomic_writes);
+		node->handle = os_file_create(
+			innodb_data_file_key, node->name, OS_FILE_OPEN_RAW,
+			OS_FILE_AIO, OS_DATA_FILE, read_only_mode, &success);
 	} else {
-		node->handle = os_file_create(innodb_file_data_key,
-					      node->name, OS_FILE_OPEN,
-					      OS_FILE_AIO, OS_DATA_FILE,
-					      &ret, atomic_writes);
+		node->handle = os_file_create(
+			innodb_data_file_key, node->name, OS_FILE_OPEN,
+			OS_FILE_AIO, OS_DATA_FILE, read_only_mode, &success);
 	}
 
-	if (node->file_block_size == 0) {
-		node->file_block_size = os_file_get_block_size(node->handle, node->name);
-		space->file_block_size = node->file_block_size;
-	}
+	ut_a(success);
 
-	ut_a(ret);
+	node->is_open = true;
 
-	node->open = TRUE;
-
-	system->n_open++;
+	fil_system->n_open++;
 	fil_n_file_opened++;
 
 	if (fil_space_belongs_in_lru(space)) {
 
 		/* Put the node to the LRU list */
-		UT_LIST_ADD_FIRST(LRU, system->LRU, node);
+		UT_LIST_ADD_FIRST(fil_system->LRU, node);
 	}
 
 	return(true);
 }
 
-/**********************************************************************//**
-Closes a file. */
+/** Close a file node.
+@param[in,out]	node	File node */
 static
 void
 fil_node_close_file(
-/*================*/
-	fil_node_t*	node,	/*!< in: file node */
-	fil_system_t*	system)	/*!< in: tablespace memory cache */
+	fil_node_t*	node)
 {
-	ibool	ret;
+	bool	ret;
 
-	ut_ad(node && system);
-	ut_ad(mutex_own(&(system->mutex)));
-	ut_a(node->open);
+	ut_ad(mutex_own(&(fil_system->mutex)));
+	ut_a(node->is_open);
 	ut_a(node->n_pending == 0);
 	ut_a(node->n_pending_flushes == 0);
 	ut_a(!node->being_extended);
 #ifndef UNIV_HOTBACKUP
 	ut_a(node->modification_counter == node->flush_counter
+	     || node->space->purpose == FIL_TYPE_TEMPORARY
 	     || srv_fast_shutdown == 2);
 #endif /* !UNIV_HOTBACKUP */
 
@@ -837,43 +890,42 @@ fil_node_close_file(
 
 	/* printf("Closing file %s\n", node->name); */
 
-	node->open = FALSE;
-	ut_a(system->n_open > 0);
-	system->n_open--;
+	node->is_open = false;
+	ut_a(fil_system->n_open > 0);
+	fil_system->n_open--;
 	fil_n_file_opened--;
 
 	if (fil_space_belongs_in_lru(node->space)) {
 
-		ut_a(UT_LIST_GET_LEN(system->LRU) > 0);
+		ut_a(UT_LIST_GET_LEN(fil_system->LRU) > 0);
 
 		/* The node is in the LRU list, remove it */
-		UT_LIST_REMOVE(LRU, system->LRU, node);
+		UT_LIST_REMOVE(fil_system->LRU, node);
 	}
 }
 
-/********************************************************************//**
-Tries to close a file in the LRU list. The caller must hold the fil_sys
+/** Tries to close a file in the LRU list. The caller must hold the fil_sys
 mutex.
-@return TRUE if success, FALSE if should retry later; since i/o's
+@return true if success, false if should retry later; since i/o's
 generally complete in < 100 ms, and as InnoDB writes at most 128 pages
 from the buffer pool in a batch, and then immediately flushes the
 files, there is a good chance that the next time we find a suitable
-node from the LRU list */
+node from the LRU list.
+@param[in] print_info	if true, prints information why it
+			cannot close a file*/
 static
-ibool
+bool
 fil_try_to_close_file_in_LRU(
-/*=========================*/
-	ibool	print_info)	/*!< in: if TRUE, prints information why it
-				cannot close a file */
+
+	bool	print_info)
 {
 	fil_node_t*	node;
 
 	ut_ad(mutex_own(&fil_system->mutex));
 
 	if (print_info) {
-		fprintf(stderr,
-			"InnoDB: fil_sys open file LRU len %lu\n",
-			(ulong) UT_LIST_GET_LEN(fil_system->LRU));
+		ib::info() << "fil_sys open file LRU len "
+			<< UT_LIST_GET_LEN(fil_system->LRU);
 	}
 
 	for (node = UT_LIST_GET_LAST(fil_system->LRU);
@@ -884,9 +936,9 @@ fil_try_to_close_file_in_LRU(
 		    && node->n_pending_flushes == 0
 		    && !node->being_extended) {
 
-			fil_node_close_file(node, fil_system);
+			fil_node_close_file(node);
 
-			return(TRUE);
+			return(true);
 		}
 
 		if (!print_info) {
@@ -894,30 +946,26 @@ fil_try_to_close_file_in_LRU(
 		}
 
 		if (node->n_pending_flushes > 0) {
-			fputs("InnoDB: cannot close file ", stderr);
-			ut_print_filename(stderr, node->name);
-			fprintf(stderr, ", because n_pending_flushes %lu\n",
-				(ulong) node->n_pending_flushes);
+
+			ib::info() << "Cannot close file " << node->name
+				<< ", because n_pending_flushes "
+				<< node->n_pending_flushes;
 		}
 
 		if (node->modification_counter != node->flush_counter) {
-			fputs("InnoDB: cannot close file ", stderr);
-			ut_print_filename(stderr, node->name);
-			fprintf(stderr,
-				", because mod_count %ld != fl_count %ld\n",
-				(long) node->modification_counter,
-				(long) node->flush_counter);
-
+			ib::warn() << "Cannot close file " << node->name
+				<< ", because modification count "
+				<< node->modification_counter <<
+				" != flush count " << node->flush_counter;
 		}
 
 		if (node->being_extended) {
-			fputs("InnoDB: cannot close file ", stderr);
-			ut_print_filename(stderr, node->name);
-			fprintf(stderr, ", because it is being extended\n");
+			ib::info() << "Cannot close file " << node->name
+				<< ", because it is being extended";
 		}
 	}
 
-	return(FALSE);
+	return(false);
 }
 
 /*******************************************************************//**
@@ -931,150 +979,138 @@ fil_mutex_enter_and_prepare_for_io(
 	ulint	space_id)	/*!< in: space id */
 {
 	fil_space_t*	space;
-	ibool		success;
-	ibool		print_info	= FALSE;
+	bool		success;
+	bool		print_info	= false;
 	ulint		count		= 0;
 	ulint		count2		= 0;
 
-retry:
-	mutex_enter(&fil_system->mutex);
+	for (;;) {
+		mutex_enter(&fil_system->mutex);
 
-	if (space_id == 0 || space_id >= SRV_LOG_SPACE_FIRST_ID) {
-		/* We keep log files and system tablespace files always open;
-		this is important in preventing deadlocks in this module, as
-		a page read completion often performs another read from the
-		insert buffer. The insert buffer is in tablespace 0, and we
-		cannot end up waiting in this function. */
+		if (space_id == 0 || space_id >= SRV_LOG_SPACE_FIRST_ID) {
+			/* We keep log files and system tablespace files always
+			open; this is important in preventing deadlocks in this
+			module, as a page read completion often performs
+			another read from the insert buffer. The insert buffer
+			is in tablespace 0, and we cannot end up waiting in
+			this function. */
+			return;
+		}
 
-		return;
-	}
+		space = fil_space_get_by_id(space_id);
 
-	space = fil_space_get_by_id(space_id);
+		if (space != NULL && space->stop_ios) {
+			/* We are going to do a rename file and want to stop
+			new i/o's for a while. */
 
-	if (space != NULL && space->stop_ios) {
-		/* We are going to do a rename file and want to stop new i/o's
-		for a while */
+			if (count2 > 20000) {
+				ib::warn() << "Tablespace " << space->name
+					<< " has i/o ops stopped for a long"
+					" time " << count2;
+			}
 
-		if (count2 > 20000) {
-			fputs("InnoDB: Warning: tablespace ", stderr);
-			ut_print_filename(stderr, space->name);
-			fprintf(stderr,
-				" has i/o ops stopped for a long time %lu\n",
-				(ulong) count2);
+			mutex_exit(&fil_system->mutex);
+
+#ifndef UNIV_HOTBACKUP
+
+			/* Wake the i/o-handler threads to make sure pending
+			i/o's are performed */
+			os_aio_simulated_wake_handler_threads();
+
+			/* The sleep here is just to give IO helper threads a
+			bit of time to do some work. It is not required that
+			all IO related to the tablespace being renamed must
+			be flushed here as we do fil_flush() in
+			fil_rename_tablespace() as well. */
+			os_thread_sleep(20000);
+
+#endif /* UNIV_HOTBACKUP */
+
+			/* Flush tablespaces so that we can close modified
+			files in the LRU list */
+			fil_flush_file_spaces(FIL_TYPE_TABLESPACE);
+
+			os_thread_sleep(20000);
+
+			count2++;
+
+			continue;
+		}
+
+		if (fil_system->n_open < fil_system->max_n_open) {
+
+			return;
+		}
+
+		/* If the file is already open, no need to do anything; if the
+		space does not exist, we handle the situation in the function
+		which called this function. */
+
+		if (space == NULL || UT_LIST_GET_FIRST(space->chain)->is_open) {
+
+			return;
+		}
+
+		if (count > 1) {
+			print_info = true;
+		}
+
+		/* Too many files are open, try to close some */
+		do {
+			success = fil_try_to_close_file_in_LRU(print_info);
+
+		} while (success
+			 && fil_system->n_open >= fil_system->max_n_open);
+
+		if (fil_system->n_open < fil_system->max_n_open) {
+			/* Ok */
+			return;
+		}
+
+		if (count >= 2) {
+			ib::warn() << "Too many (" << fil_system->n_open
+				<< ") files stay open while the maximum"
+				" allowed value would be "
+				<< fil_system->max_n_open << ". You may need"
+				" to raise the value of innodb_open_files in"
+				" my.cnf.";
+
+			return;
 		}
 
 		mutex_exit(&fil_system->mutex);
 
 #ifndef UNIV_HOTBACKUP
-
-		/* Wake the i/o-handler threads to make sure pending
-		i/o's are performed */
+		/* Wake the i/o-handler threads to make sure pending i/o's are
+		performed */
 		os_aio_simulated_wake_handler_threads();
 
-		/* The sleep here is just to give IO helper threads a
-		bit of time to do some work. It is not required that
-		all IO related to the tablespace being renamed must
-		be flushed here as we do fil_flush() in
-		fil_rename_tablespace() as well. */
 		os_thread_sleep(20000);
+#endif /* !UNIV_HOTBACKUP */
+		/* Flush tablespaces so that we can close modified files in
+		the LRU list. */
 
-#endif /* UNIV_HOTBACKUP */
+		fil_flush_file_spaces(FIL_TYPE_TABLESPACE);
 
-		/* Flush tablespaces so that we can close modified
-		files in the LRU list */
-		fil_flush_file_spaces(FIL_TABLESPACE);
-
-		os_thread_sleep(20000);
-
-		count2++;
-
-		goto retry;
+		count++;
 	}
-
-	if (fil_system->n_open < fil_system->max_n_open) {
-
-		return;
-	}
-
-	/* If the file is already open, no need to do anything; if the space
-	does not exist, we handle the situation in the function which called
-	this function */
-
-	if (!space || UT_LIST_GET_FIRST(space->chain)->open) {
-
-		return;
-	}
-
-	if (count > 1) {
-		print_info = TRUE;
-	}
-
-	/* Too many files are open, try to close some */
-close_more:
-	success = fil_try_to_close_file_in_LRU(print_info);
-
-	if (success && fil_system->n_open >= fil_system->max_n_open) {
-
-		goto close_more;
-	}
-
-	if (fil_system->n_open < fil_system->max_n_open) {
-		/* Ok */
-
-		return;
-	}
-
-	if (count >= 2) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			"  InnoDB: Warning: too many (%lu) files stay open"
-			" while the maximum\n"
-			"InnoDB: allowed value would be %lu.\n"
-			"InnoDB: You may need to raise the value of"
-			" innodb_open_files in\n"
-			"InnoDB: my.cnf.\n",
-			(ulong) fil_system->n_open,
-			(ulong) fil_system->max_n_open);
-
-		return;
-	}
-
-	mutex_exit(&fil_system->mutex);
-
-#ifndef UNIV_HOTBACKUP
-	/* Wake the i/o-handler threads to make sure pending i/o's are
-	performed */
-	os_aio_simulated_wake_handler_threads();
-
-	os_thread_sleep(20000);
-#endif
-	/* Flush tablespaces so that we can close modified files in the LRU
-	list */
-
-	fil_flush_file_spaces(FIL_TABLESPACE);
-
-	count++;
-
-	goto retry;
 }
 
-/*******************************************************************//**
-Frees a file node object from a tablespace memory cache. */
+/** Prepare to free a file node object from a tablespace memory cache.
+@param[in,out]	node	file node
+@param[in]	space	tablespace */
 static
 void
-fil_node_free(
-/*==========*/
-	fil_node_t*	node,	/*!< in, own: file node */
-	fil_system_t*	system,	/*!< in: tablespace memory cache */
-	fil_space_t*	space)	/*!< in: space where the file node is chained */
+fil_node_close_to_free(
+	fil_node_t*	node,
+	fil_space_t*	space)
 {
-	ut_ad(node && system && space);
-	ut_ad(mutex_own(&(system->mutex)));
+	ut_ad(mutex_own(&fil_system->mutex));
 	ut_a(node->magic_n == FIL_NODE_MAGIC_N);
 	ut_a(node->n_pending == 0);
 	ut_a(!node->being_extended);
 
-	if (node->open) {
+	if (node->is_open) {
 		/* We fool the assertion in fil_node_close_file() to think
 		there are no unflushed modifications in the file */
 
@@ -1091,186 +1127,261 @@ fil_node_free(
 
 			space->is_in_unflushed_spaces = false;
 
-			UT_LIST_REMOVE(unflushed_spaces,
-				       system->unflushed_spaces,
-				       space);
+			UT_LIST_REMOVE(fil_system->unflushed_spaces, space);
 		}
 
-		fil_node_close_file(node, system);
+		fil_node_close_file(node);
 	}
-
-	space->size -= node->size;
-
-	UT_LIST_REMOVE(chain, space->chain, node);
-
-	os_event_free(node->sync_event);
-	mem_free(node->name);
-	mem_free(node);
 }
 
-#ifdef UNIV_LOG_ARCHIVE
-/****************************************************************//**
-Drops files from the start of a file space, so that its size is cut by
-the amount given. */
-UNIV_INTERN
+/** Detach a space object from the tablespace memory cache.
+Closes the files in the chain but does not delete them.
+There must not be any pending i/o's or flushes on the files.
+@param[in,out]	space		tablespace */
+static
 void
-fil_space_truncate_start(
-/*=====================*/
-	ulint	id,		/*!< in: space id */
-	ulint	trunc_len)	/*!< in: truncate by this much; it is an error
-				if this does not equal to the combined size of
-				some initial files in the space */
+fil_space_detach(
+	fil_space_t*	space)
 {
-	fil_node_t*	node;
-	fil_space_t*	space;
+	ut_ad(mutex_own(&fil_system->mutex));
+
+	HASH_DELETE(fil_space_t, hash, fil_system->spaces, space->id, space);
+
+	fil_space_t*	fnamespace = fil_space_get_by_name(space->name);
+
+	ut_a(space == fnamespace);
+
+	HASH_DELETE(fil_space_t, name_hash, fil_system->name_hash,
+		    ut_fold_string(space->name), space);
+
+	if (space->is_in_unflushed_spaces) {
+
+		ut_ad(!fil_buffering_disabled(space));
+		space->is_in_unflushed_spaces = false;
+
+		UT_LIST_REMOVE(fil_system->unflushed_spaces, space);
+	}
+
+	UT_LIST_REMOVE(fil_system->space_list, space);
+
+	ut_a(space->magic_n == FIL_SPACE_MAGIC_N);
+	ut_a(space->n_pending_flushes == 0);
+
+	for (fil_node_t* fil_node = UT_LIST_GET_FIRST(space->chain);
+	     fil_node != NULL;
+	     fil_node = UT_LIST_GET_NEXT(chain, fil_node)) {
+
+		fil_node_close_to_free(fil_node, space);
+	}
+}
+
+/** Free a tablespace object on which fil_space_detach() was invoked.
+There must not be any pending i/o's or flushes on the files.
+@param[in,out]	space		tablespace */
+static
+void
+fil_space_free_low(
+	fil_space_t*	space)
+{
+	/* The tablespace must not be in fil_system->named_spaces. */
+	ut_ad(srv_fast_shutdown == 2 || space->max_lsn == 0);
+
+	for (fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
+	     node != NULL; ) {
+		ut_d(space->size -= node->size);
+		os_event_destroy(node->sync_event);
+		ut_free(node->name);
+		fil_node_t* old_node = node;
+		node = UT_LIST_GET_NEXT(chain, node);
+		ut_free(old_node);
+	}
+
+	ut_ad(space->size == 0);
+
+	rw_lock_free(&space->latch);
+
+	ut_free(space->name);
+	ut_free(space);
+}
+
+/** Frees a space object from the tablespace memory cache.
+Closes the files in the chain but does not delete them.
+There must not be any pending i/o's or flushes on the files.
+@param[in]	id		tablespace identifier
+@param[in]	x_latched	whether the caller holds X-mode space->latch
+@return true if success */
+bool
+fil_space_free(
+	ulint		id,
+	bool		x_latched)
+{
+	ut_ad(id != TRX_SYS_SPACE);
 
 	mutex_enter(&fil_system->mutex);
+	fil_space_t*	space = fil_space_get_by_id(id);
 
-	space = fil_space_get_by_id(id);
-
-	ut_a(space);
-
-	while (trunc_len > 0) {
-		node = UT_LIST_GET_FIRST(space->chain);
-
-		ut_a(node->size * UNIV_PAGE_SIZE <= trunc_len);
-
-		trunc_len -= node->size * UNIV_PAGE_SIZE;
-
-		fil_node_free(node, fil_system, space);
+	if (space != NULL) {
+		fil_space_detach(space);
 	}
 
 	mutex_exit(&fil_system->mutex);
-}
-#endif /* UNIV_LOG_ARCHIVE */
 
-/*******************************************************************//**
-Creates a space memory object and puts it to the 'fil system' hash table.
-If there is an error, prints an error message to the .err log.
-@return	TRUE if success */
-UNIV_INTERN
-ibool
+	if (space != NULL) {
+		if (x_latched) {
+			rw_lock_x_unlock(&space->latch);
+		}
+
+		bool	need_mutex = !recv_recovery_on;
+
+		if (need_mutex) {
+			log_mutex_enter();
+		}
+
+		ut_ad(log_mutex_own());
+
+		if (space->max_lsn != 0) {
+			ut_d(space->max_lsn = 0);
+			UT_LIST_REMOVE(fil_system->named_spaces, space);
+		}
+
+		if (need_mutex) {
+			log_mutex_exit();
+		}
+
+		fil_space_free_low(space);
+	}
+
+	return(space != NULL);
+}
+
+/** Create a space memory object and put it to the fil_system hash table.
+The tablespace name is independent from the tablespace file-name.
+Error messages are issued to the server log.
+@param[in]	name	Tablespace name
+@param[in]	id	Tablespace identifier
+@param[in]	flags	Tablespace flags
+@param[in]	purpose	Tablespace purpose
+@return pointer to created tablespace, to be filled in with fil_node_create()
+@retval NULL on failure (such as when the same tablespace exists) */
+fil_space_t*
 fil_space_create(
-/*=============*/
-	const char*	name,	/*!< in: space name */
-	ulint		id,	/*!< in: space id */
-	ulint		flags,	/*!< in: tablespace flags */
-	ulint		purpose,/*!< in: FIL_TABLESPACE, or FIL_LOG if log */
-	fil_space_crypt_t* crypt_data) /*!< in: crypt data */
+	const char*	name,
+	ulint		id,
+	ulint		flags,
+	fil_type_t	purpose,
+	fil_space_crypt_t* crypt_data)	/*!< in: crypt data */
 {
 	fil_space_t*	space;
 
-	DBUG_EXECUTE_IF("fil_space_create_failure", return(false););
+	ut_ad(fil_system);
+	ut_ad(fsp_flags_is_valid(flags));
+	ut_ad(srv_page_size == UNIV_PAGE_SIZE_ORIG || flags != 0);
 
-	ut_a(fil_system);
+	DBUG_EXECUTE_IF("fil_space_create_failure", return(NULL););
 
-	/* Look for a matching tablespace and if found free it. */
-	do {
-		mutex_enter(&fil_system->mutex);
+	mutex_enter(&fil_system->mutex);
 
-		space = fil_space_get_by_name(name);
+	/* Look for a matching tablespace. */
+	space = fil_space_get_by_name(name);
 
-		if (space != 0) {
-			ib_logf(IB_LOG_LEVEL_WARN,
-				"Tablespace '%s' exists in the cache "
-				"with id %lu != %lu",
-				name, (ulong) space->id, (ulong) id);
+	if (space != NULL) {
+		mutex_exit(&fil_system->mutex);
 
-			if (id == 0 || purpose != FIL_TABLESPACE) {
+		ib::warn() << "Tablespace '" << name << "' exists in the"
+			" cache with id " << space->id << " != " << id;
 
-				mutex_exit(&fil_system->mutex);
-
-				return(FALSE);
-			}
-
-			ib_logf(IB_LOG_LEVEL_WARN,
-				"Freeing existing tablespace '%s' entry "
-				"from the cache with id %lu",
-				name, (ulong) id);
-
-			ibool	success = fil_space_free(space->id, FALSE);
-			ut_a(success);
-
-			mutex_exit(&fil_system->mutex);
-		}
-
-	} while (space != 0);
+		return(NULL);
+	}
 
 	space = fil_space_get_by_id(id);
 
-	if (space != 0) {
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Trying to add tablespace '%s' with id %lu "
-			"to the tablespace memory cache, but tablespace '%s' "
-			"with id %lu already exists in the cache!",
-			name, (ulong) id, space->name, (ulong) space->id);
-
+	if (space != NULL) {
+		ib::error() << "Trying to add tablespace '" << name
+			<< "' with id " << id
+			<< " to the tablespace memory cache, but tablespace '"
+			<< space->name << "' already exists in the cache!";
 		mutex_exit(&fil_system->mutex);
-
-		return(FALSE);
+		return(NULL);
 	}
 
-	space = static_cast<fil_space_t*>(mem_zalloc(sizeof(*space)));
+	space = static_cast<fil_space_t*>(ut_zalloc_nokey(sizeof(*space)));
 
-	space->name = mem_strdup(name);
 	space->id = id;
+	space->name = mem_strdup(name);
 
-	fil_system->tablespace_version++;
-	space->tablespace_version = fil_system->tablespace_version;
-	space->mark = FALSE;
+	UT_LIST_INIT(space->chain, &fil_node_t::chain);
 
-	if (purpose == FIL_TABLESPACE && !recv_recovery_on
+	/* This warning is not applicable while MEB scanning the redo logs */
+#ifndef UNIV_HOTBACKUP
+	if (fil_type_is_data(purpose)
+	    && !recv_recovery_on
 	    && id > fil_system->max_assigned_id) {
 
 		if (!fil_system->space_id_reuse_warned) {
-			fil_system->space_id_reuse_warned = TRUE;
+			fil_system->space_id_reuse_warned = true;
 
-			ib_logf(IB_LOG_LEVEL_WARN,
-				"Allocated tablespace %lu, old maximum "
-				"was %lu",
-				(ulong) id,
-				(ulong) fil_system->max_assigned_id);
+			ib::warn() << "Allocated tablespace ID " << id
+				<< " for " << name << ", old maximum was "
+				<< fil_system->max_assigned_id;
 		}
 
 		fil_system->max_assigned_id = id;
 	}
-
+#endif /* !UNIV_HOTBACKUP */
 	space->purpose = purpose;
 	space->flags = flags;
 
 	space->magic_n = FIL_SPACE_MAGIC_N;
-	space->printed_compression_failure = false;
+
+	space->encryption_type = Encryption::NONE;
 
 	rw_lock_create(fil_space_latch_key, &space->latch, SYNC_FSP);
 
+	if (space->purpose == FIL_TYPE_TEMPORARY) {
+#ifndef UNIV_HOTBACKUP
+		ut_d(space->latch.set_temp_fsp());
+#endif /* !UNIV_HOTBACKUP */
+	}
+
 	HASH_INSERT(fil_space_t, hash, fil_system->spaces, id, space);
 
 	HASH_INSERT(fil_space_t, name_hash, fil_system->name_hash,
 		    ut_fold_string(name), space);
-	space->is_in_unflushed_spaces = false;
 
-	UT_LIST_ADD_LAST(space_list, fil_system->space_list, space);
+	UT_LIST_ADD_LAST(fil_system->space_list, space);
+
+	if (id < SRV_LOG_SPACE_FIRST_ID && id > fil_system->max_assigned_id) {
+
+		fil_system->max_assigned_id = id;
+	}
 
 	space->crypt_data = crypt_data;
 
+	if (crypt_data) {
+		space->read_page0 = true;
+		/* If table could be encrypted print info */
+		ib::info() << "Tablespace ID " << id << " name " << space->name
+			   << ":" << fil_crypt_get_mode(crypt_data)
+			   << " " << fil_crypt_get_type(crypt_data);
+	}
+
 	mutex_exit(&fil_system->mutex);
 
-	return(TRUE);
+	return(space);
 }
 
 /*******************************************************************//**
 Assigns a new space id for a new single-table tablespace. This works simply by
 incrementing the global counter. If 4 billion id's is not enough, we may need
 to recycle id's.
-@return	TRUE if assigned, FALSE if not */
-UNIV_INTERN
-ibool
+@return true if assigned, false if not */
+bool
 fil_assign_new_space_id(
 /*====================*/
 	ulint*	space_id)	/*!< in/out: space id */
 {
 	ulint	id;
-	ibool	success;
+	bool	success;
 
 	mutex_enter(&fil_system->mutex);
 
@@ -1283,17 +1394,12 @@ fil_assign_new_space_id(
 	id++;
 
 	if (id > (SRV_LOG_SPACE_FIRST_ID / 2) && (id % 1000000UL == 0)) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			"InnoDB: Warning: you are running out of new"
-			" single-table tablespace id's.\n"
-			"InnoDB: Current counter is %lu and it"
-			" must not exceed %lu!\n"
-			"InnoDB: To reset the counter to zero"
-			" you have to dump all your tables and\n"
-			"InnoDB: recreate the whole InnoDB installation.\n",
-			(ulong) id,
-			(ulong) SRV_LOG_SPACE_FIRST_ID);
+		ib::warn() << "You are running out of new single-table"
+			" tablespace id's. Current counter is " << id
+			<< " and it must not exceed" << SRV_LOG_SPACE_FIRST_ID
+			<< "! To reset the counter to zero you have to dump"
+			" all your tables and recreate the whole InnoDB"
+			" installation.";
 	}
 
 	success = (id < SRV_LOG_SPACE_FIRST_ID);
@@ -1301,15 +1407,11 @@ fil_assign_new_space_id(
 	if (success) {
 		*space_id = fil_system->max_assigned_id = id;
 	} else {
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			"InnoDB: You have run out of single-table"
-			" tablespace id's!\n"
-			"InnoDB: Current counter is %lu.\n"
-			"InnoDB: To reset the counter to zero you"
-			" have to dump all your tables and\n"
-			"InnoDB: recreate the whole InnoDB installation.\n",
-			(ulong) id);
+		ib::warn() << "You have run out of single-table tablespace"
+			" id's! Current counter is " << id
+			<< ". To reset the counter to zero"
+			" you have to dump all your tables and"
+			" recreate the whole InnoDB installation.";
 		*space_id = ULINT_UNDEFINED;
 	}
 
@@ -1319,108 +1421,9 @@ fil_assign_new_space_id(
 }
 
 /*******************************************************************//**
-Frees a space object from the tablespace memory cache. Closes the files in
-the chain but does not delete them. There must not be any pending i/o's or
-flushes on the files.
-@return	TRUE if success */
-static
-ibool
-fil_space_free(
-/*===========*/
-					/* out: TRUE if success */
-	ulint		id,		/* in: space id */
-	ibool		x_latched)	/* in: TRUE if caller has space->latch
-					in X mode */
-{
-	fil_space_t*	space;
-	fil_space_t*	fnamespace;
-
-	ut_ad(mutex_own(&fil_system->mutex));
-
-	space = fil_space_get_by_id(id);
-
-	if (!space) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			"  InnoDB: Error: trying to remove tablespace %lu"
-			" from the cache but\n"
-			"InnoDB: it is not there.\n", (ulong) id);
-
-		return(FALSE);
-	}
-
-	HASH_DELETE(fil_space_t, hash, fil_system->spaces, id, space);
-
-	fnamespace = fil_space_get_by_name(space->name);
-	ut_a(fnamespace);
-	ut_a(space == fnamespace);
-
-	HASH_DELETE(fil_space_t, name_hash, fil_system->name_hash,
-		    ut_fold_string(space->name), space);
-
-	if (space->is_in_unflushed_spaces) {
-
-		ut_ad(!fil_buffering_disabled(space));
-		space->is_in_unflushed_spaces = false;
-
-		UT_LIST_REMOVE(unflushed_spaces, fil_system->unflushed_spaces,
-			       space);
-	}
-
-	UT_LIST_REMOVE(space_list, fil_system->space_list, space);
-
-	ut_a(space->magic_n == FIL_SPACE_MAGIC_N);
-	ut_a(0 == space->n_pending_flushes);
-
-	for (fil_node_t* fil_node = UT_LIST_GET_FIRST(space->chain);
-	     fil_node != NULL;
-	     fil_node = UT_LIST_GET_FIRST(space->chain)) {
-
-		fil_node_free(fil_node, fil_system, space);
-	}
-
-	ut_a(0 == UT_LIST_GET_LEN(space->chain));
-
-	if (x_latched) {
-		rw_lock_x_unlock(&space->latch);
-	}
-
-	rw_lock_free(&(space->latch));
-
-	fil_space_destroy_crypt_data(&(space->crypt_data));
-
-	mem_free(space->name);
-	mem_free(space);
-
-	return(TRUE);
-}
-
-/*******************************************************************//**
-Returns a pointer to the file_space_t that is in the memory cache
-associated with a space id.
-@return	file_space_t pointer, NULL if space not found */
-fil_space_t*
-fil_space_get(
-/*==========*/
-	ulint	id)	/*!< in: space id */
-{
-	fil_space_t*	space;
-
-	ut_ad(fil_system);
-
-	mutex_enter(&fil_system->mutex);
-
-	space = fil_space_get_by_id(id);
-
-	mutex_exit(&fil_system->mutex);
-
-	return (space);
-}
-
-/*******************************************************************//**
-Returns a pointer to the file_space_t that is in the memory cache
+Returns a pointer to the fil_space_t that is in the memory cache
 associated with a space id. The caller must lock fil_system->mutex.
-@return	file_space_t pointer, NULL if space not found */
+@return file_space_t pointer, NULL if space not found */
 UNIV_INLINE
 fil_space_t*
 fil_space_get_space(
@@ -1433,11 +1436,16 @@ fil_space_get_space(
 	ut_ad(fil_system);
 
 	space = fil_space_get_by_id(id);
-	if (space == NULL) {
-		return(NULL);
+	if (space == NULL || space->size != 0) {
+		return(space);
 	}
 
-	if (space->size == 0 && space->purpose == FIL_TABLESPACE) {
+	switch (space->purpose) {
+	case FIL_TYPE_LOG:
+		break;
+	case FIL_TYPE_TEMPORARY:
+	case FIL_TYPE_TABLESPACE:
+	case FIL_TYPE_IMPORT:
 		ut_a(id != 0);
 
 		mutex_exit(&fil_system->mutex);
@@ -1451,47 +1459,42 @@ fil_space_get_space(
 		/* We are still holding the fil_system->mutex. Check if
 		the space is still in memory cache. */
 		space = fil_space_get_by_id(id);
-		if (space == NULL) {
+
+		if (space == NULL || UT_LIST_GET_LEN(space->chain) == 0) {
 			return(NULL);
 		}
 
 		/* The following code must change when InnoDB supports
-		multiple datafiles per tablespace. Note that there is small
-		change that space is found from tablespace list but
-		we have not yet created node for it and as we hold
-		fil_system mutex here fil_node_create can't continue. */
-		ut_a(UT_LIST_GET_LEN(space->chain) == 1 || UT_LIST_GET_LEN(space->chain) == 0);
+		multiple datafiles per tablespace. */
+		ut_a(1 == UT_LIST_GET_LEN(space->chain));
 
 		node = UT_LIST_GET_FIRST(space->chain);
 
-		if (node) {
-			/* It must be a single-table tablespace and we have not opened
-			the file yet; the following calls will open it and update the
-			size fields */
+		/* It must be a single-table tablespace and we have not opened
+		the file yet; the following calls will open it and update the
+		size fields */
 
-			if (!fil_node_prepare_for_io(node, fil_system, space)) {
-				/* The single-table tablespace can't be opened,
-				because the ibd file is missing. */
-				return(NULL);
-			}
-			fil_node_complete_io(node, fil_system, OS_FILE_READ);
+		if (!fil_node_prepare_for_io(node, fil_system, space)) {
+			/* The single-table tablespace can't be opened,
+			because the ibd file is missing. */
+			return(NULL);
 		}
+
+		fil_node_complete_io(node, fil_system, IORequestRead);
 	}
 
 	return(space);
 }
 
-/*******************************************************************//**
-Returns the path from the first fil_node_t found for the space ID sent.
+/** Returns the path from the first fil_node_t found with this space ID.
 The caller is responsible for freeing the memory allocated here for the
 value returned.
-@return	own: A copy of fil_node_t::path, NULL if space ID is zero
+@param[in]	id	Tablespace ID
+@return own: A copy of fil_node_t::path, NULL if space ID is zero
 or not found. */
-UNIV_INTERN
 char*
 fil_space_get_first_path(
-/*=====================*/
-	ulint		id)	/*!< in: space id */
+	ulint		id)
 {
 	fil_space_t*	space;
 	fil_node_t*	node;
@@ -1524,8 +1527,7 @@ fil_space_get_first_path(
 /*******************************************************************//**
 Returns the size of the space in pages. The tablespace must be cached in the
 memory cache.
-@return	space size, 0 if space not found */
-UNIV_INTERN
+@return space size, 0 if space not found */
 ulint
 fil_space_get_size(
 /*===============*/
@@ -1549,8 +1551,7 @@ fil_space_get_size(
 /*******************************************************************//**
 Returns the flags of the space. The tablespace must be cached
 in the memory cache.
-@return	flags, ULINT_UNDEFINED if space not found */
-UNIV_INTERN
+@return flags, ULINT_UNDEFINED if space not found */
 ulint
 fil_space_get_flags(
 /*================*/
@@ -1561,10 +1562,6 @@ fil_space_get_flags(
 
 	ut_ad(fil_system);
 
-	if (!id) {
-		return(0);
-	}
-
 	mutex_enter(&fil_system->mutex);
 
 	space = fil_space_get_space(id);
@@ -1582,50 +1579,104 @@ fil_space_get_flags(
 	return(flags);
 }
 
-/*******************************************************************//**
-Returns the compressed page size of the space, or 0 if the space
-is not compressed. The tablespace must be cached in the memory cache.
-@return	compressed page size, ULINT_UNDEFINED if space not found */
-UNIV_INTERN
-ulint
-fil_space_get_zip_size(
-/*===================*/
-	ulint	id)	/*!< in: space id */
+/** Check if table is mark for truncate.
+@param[in]	id	space id
+@return true if tablespace is marked for truncate. */
+bool
+fil_space_is_being_truncated(
+	ulint id)
 {
-	ulint	flags;
-
-	flags = fil_space_get_flags(id);
-
-	if (flags && flags != ULINT_UNDEFINED) {
-
-		return(fsp_flags_get_zip_size(flags));
-	}
-
-	return(flags);
+	bool	mark_for_truncate;
+	mutex_enter(&fil_system->mutex);
+	mark_for_truncate = fil_space_get_by_id(id)->is_being_truncated;
+	mutex_exit(&fil_system->mutex);
+	return(mark_for_truncate);
 }
 
-/*******************************************************************//**
-Checks if the pair space, page_no refers to an existing page in a tablespace
-file space. The tablespace must be cached in the memory cache.
-@return	TRUE if the address is meaningful */
-UNIV_INTERN
-ibool
-fil_check_adress_in_tablespace(
-/*===========================*/
-	ulint	id,	/*!< in: space id */
-	ulint	page_no)/*!< in: page number */
+/** Open each fil_node_t of a named fil_space_t if not already open.
+@param[in]	name	Tablespace name
+@return true if all nodes are open  */
+bool
+fil_space_open(
+	const char*	name)
 {
-	if (fil_space_get_size(id) > page_no) {
+	ut_ad(fil_system != NULL);
 
-		return(TRUE);
+	mutex_enter(&fil_system->mutex);
+
+	fil_space_t*	space = fil_space_get_by_name(name);
+	fil_node_t*	node;
+
+	for (node = UT_LIST_GET_FIRST(space->chain);
+	     node != NULL;
+	     node = UT_LIST_GET_NEXT(chain, node)) {
+
+		if (!node->is_open
+		    && !fil_node_open_file(node)) {
+			mutex_exit(&fil_system->mutex);
+			return(false);
+		}
 	}
 
-	return(FALSE);
+	mutex_exit(&fil_system->mutex);
+
+	return(true);
+}
+
+/** Close each fil_node_t of a named fil_space_t if open.
+@param[in]	name	Tablespace name */
+void
+fil_space_close(
+	const char*	name)
+{
+	if (fil_system == NULL) {
+		return;
+	}
+
+	mutex_enter(&fil_system->mutex);
+
+	fil_space_t*	space = fil_space_get_by_name(name);
+	if (space == NULL) {
+		mutex_exit(&fil_system->mutex);
+		return;
+	}
+
+	for (fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
+	     node != NULL;
+	     node = UT_LIST_GET_NEXT(chain, node)) {
+
+		if (node->is_open) {
+			fil_node_close_file(node);
+		}
+	}
+
+	mutex_exit(&fil_system->mutex);
+}
+
+/** Returns the page size of the space and whether it is compressed or not.
+The tablespace must be cached in the memory cache.
+@param[in]	id	space id
+@param[out]	found	true if tablespace was found
+@return page size */
+const page_size_t
+fil_space_get_page_size(
+	ulint	id,
+	bool*	found)
+{
+	const ulint	flags = fil_space_get_flags(id);
+
+	if (flags == ULINT_UNDEFINED) {
+		*found = false;
+		return(univ_page_size);
+	}
+
+	*found = true;
+
+	return(page_size_t(flags));
 }
 
 /****************************************************************//**
 Initializes the tablespace memory cache. */
-UNIV_INTERN
 void
 fil_init(
 /*=====*/
@@ -1638,15 +1689,18 @@ fil_init(
 	ut_a(max_n_open > 0);
 
 	fil_system = static_cast<fil_system_t*>(
-		mem_zalloc(sizeof(fil_system_t)));
+		ut_zalloc_nokey(sizeof(*fil_system)));
 
-	mutex_create(fil_system_mutex_key,
-		     &fil_system->mutex, SYNC_ANY_LATCH);
+	mutex_create(LATCH_ID_FIL_SYSTEM, &fil_system->mutex);
 
 	fil_system->spaces = hash_create(hash_size);
 	fil_system->name_hash = hash_create(hash_size);
 
-	UT_LIST_INIT(fil_system->LRU);
+	UT_LIST_INIT(fil_system->LRU, &fil_node_t::LRU);
+	UT_LIST_INIT(fil_system->space_list, &fil_space_t::space_list);
+	UT_LIST_INIT(fil_system->unflushed_spaces,
+		     &fil_space_t::unflushed_spaces);
+	UT_LIST_INIT(fil_system->named_spaces, &fil_space_t::named_spaces);
 
 	fil_system->max_n_open = max_n_open;
 
@@ -1659,7 +1713,6 @@ database server shutdown. This should be called at a server startup after the
 space objects for the log and the system tablespace have been created. The
 purpose of this operation is to make sure we never run out of file descriptors
 if we need to read from the insert buffer or to write to the log. */
-UNIV_INTERN
 void
 fil_open_log_and_system_tablespace_files(void)
 /*==========================================*/
@@ -1683,9 +1736,8 @@ fil_open_log_and_system_tablespace_files(void)
 		     node != NULL;
 		     node = UT_LIST_GET_NEXT(chain, node)) {
 
-			if (!node->open) {
-				if (!fil_node_open_file(node, fil_system,
-							space)) {
+			if (!node->is_open) {
+				if (!fil_node_open_file(node)) {
 					/* This func is called during server's
 					startup. If some file of log or system
 					tablespace is missing, the server
@@ -1697,25 +1749,20 @@ fil_open_log_and_system_tablespace_files(void)
 
 			if (fil_system->max_n_open < 10 + fil_system->n_open) {
 
-				fprintf(stderr,
-					"InnoDB: Warning: you must"
-					" raise the value of"
-					" innodb_open_files in\n"
-					"InnoDB: my.cnf! Remember that"
-					" InnoDB keeps all log files"
-					" and all system\n"
-					"InnoDB: tablespace files open"
+				ib::warn() << "You must raise the value of"
+					" innodb_open_files in my.cnf!"
+					" Remember that InnoDB keeps all"
+					" log files and all system"
+					" tablespace files open"
 					" for the whole time mysqld is"
-					" running, and\n"
-					"InnoDB: needs to open also"
+					" running, and needs to open also"
 					" some .ibd files if the"
-					" file-per-table storage\n"
-					"InnoDB: model is used."
-					" Current open files %lu,"
-					" max allowed"
-					" open files %lu.\n",
-					(ulong) fil_system->n_open,
-					(ulong) fil_system->max_n_open);
+					" file-per-table storage model is used."
+					" Current open files "
+					<< fil_system->n_open
+					<< ", max allowed open files "
+					<< fil_system->max_n_open
+					<< ".";
 			}
 		}
 	}
@@ -1726,18 +1773,20 @@ fil_open_log_and_system_tablespace_files(void)
 /*******************************************************************//**
 Closes all open files. There must not be any pending i/o's or not flushed
 modifications in the files. */
-UNIV_INTERN
 void
 fil_close_all_files(void)
 /*=====================*/
 {
 	fil_space_t*	space;
 
+	/* At shutdown, we should not have any files in this list. */
+	ut_ad(srv_fast_shutdown == 2
+	      || UT_LIST_GET_LEN(fil_system->named_spaces) == 0);
+
 	mutex_enter(&fil_system->mutex);
 
-	space = UT_LIST_GET_FIRST(fil_system->space_list);
-
-	while (space != NULL) {
+	for (space = UT_LIST_GET_FIRST(fil_system->space_list);
+	     space != NULL; ) {
 		fil_node_t*	node;
 		fil_space_t*	prev_space = space;
 
@@ -1745,23 +1794,25 @@ fil_close_all_files(void)
 		     node != NULL;
 		     node = UT_LIST_GET_NEXT(chain, node)) {
 
-			if (node->open) {
-				fil_node_close_file(node, fil_system);
+			if (node->is_open) {
+				fil_node_close_file(node);
 			}
 		}
 
 		space = UT_LIST_GET_NEXT(space_list, space);
-
-		fil_space_free(prev_space->id, FALSE);
+		fil_space_detach(prev_space);
+		fil_space_free_low(prev_space);
 	}
 
 	mutex_exit(&fil_system->mutex);
+
+	ut_ad(srv_fast_shutdown == 2
+	      || UT_LIST_GET_LEN(fil_system->named_spaces) == 0);
 }
 
 /*******************************************************************//**
 Closes the redo log files. There must not be any pending i/o's or not
 flushed modifications in the files. */
-UNIV_INTERN
 void
 fil_close_log_files(
 /*================*/
@@ -1777,24 +1828,28 @@ fil_close_log_files(
 		fil_node_t*	node;
 		fil_space_t*	prev_space = space;
 
-		if (space->purpose != FIL_LOG) {
+		if (space->purpose != FIL_TYPE_LOG) {
 			space = UT_LIST_GET_NEXT(space_list, space);
 			continue;
 		}
 
+		/* Log files are not in the fil_system->named_spaces list. */
+		ut_ad(space->max_lsn == 0);
+
 		for (node = UT_LIST_GET_FIRST(space->chain);
 		     node != NULL;
 		     node = UT_LIST_GET_NEXT(chain, node)) {
 
-			if (node->open) {
-				fil_node_close_file(node, fil_system);
+			if (node->is_open) {
+				fil_node_close_file(node);
 			}
 		}
 
 		space = UT_LIST_GET_NEXT(space_list, space);
 
 		if (free) {
-			fil_space_free(prev_space->id, FALSE);
+			fil_space_detach(prev_space);
+			fil_space_free_low(prev_space);
 		}
 	}
 
@@ -1804,17 +1859,13 @@ fil_close_log_files(
 /*******************************************************************//**
 Sets the max tablespace id counter if the given number is bigger than the
 previous value. */
-UNIV_INTERN
 void
 fil_set_max_space_id_if_bigger(
 /*===========================*/
 	ulint	max_id)	/*!< in: maximum known id */
 {
 	if (max_id >= SRV_LOG_SPACE_FIRST_ID) {
-		fprintf(stderr,
-			"InnoDB: Fatal error: max tablespace id"
-			" is too high, %lu\n", (ulong) max_id);
-		ut_error;
+		ib::fatal() << "Max tablespace id is too high, " << max_id;
 	}
 
 	mutex_enter(&fil_system->mutex);
@@ -1827,57 +1878,4893 @@ fil_set_max_space_id_if_bigger(
 	mutex_exit(&fil_system->mutex);
 }
 
-/****************************************************************//**
-Writes the flushed lsn and the latest archived log number to the page header
-of the first page of a data file of the system tablespace (space 0),
-which is uncompressed. */
-static MY_ATTRIBUTE((warn_unused_result))
+/** Write the flushed LSN to the page header of the first page in the
+system tablespace.
+@param[in]	lsn	flushed LSN
+@return DB_SUCCESS or error number */
 dberr_t
-fil_write_lsn_and_arch_no_to_file(
-/*==============================*/
-	ulint	space,		/*!< in: space to write to */
-	ulint	sum_of_sizes,	/*!< in: combined size of previous files
-				in space, in database pages */
-	lsn_t	lsn,		/*!< in: lsn to write */
-	ulint	arch_log_no MY_ATTRIBUTE((unused)))
-				/*!< in: archived log number to write */
+fil_write_flushed_lsn(
+	lsn_t	lsn)
 {
 	byte*	buf1;
 	byte*	buf;
 	dberr_t	err;
 
-	buf1 = static_cast<byte*>(mem_alloc(2 * UNIV_PAGE_SIZE));
+	buf1 = static_cast<byte*>(ut_malloc_nokey(2 * UNIV_PAGE_SIZE));
 	buf = static_cast<byte*>(ut_align(buf1, UNIV_PAGE_SIZE));
 
-	err = fil_read(TRUE, space, 0, sum_of_sizes, 0,
-		       UNIV_PAGE_SIZE, buf, NULL, 0);
-	if (err == DB_SUCCESS) {
-		mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION,
-				lsn);
+	const page_id_t	page_id(TRX_SYS_SPACE, 0);
 
-		err = fil_write(TRUE, space, 0, sum_of_sizes, 0,
-			        UNIV_PAGE_SIZE, buf, NULL, 0);
+	err = fil_read(page_id, univ_page_size, 0, univ_page_size.physical(),
+		       buf);
+
+	if (err == DB_SUCCESS) {
+		mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, lsn);
+		err = fil_write(page_id, univ_page_size, 0,
+				univ_page_size.physical(), buf);
+		fil_flush_file_spaces(FIL_TYPE_TABLESPACE);
 	}
 
-	mem_free(buf1);
+	ut_free(buf1);
+	return(err);
+}
+
+#ifndef UNIV_HOTBACKUP
+/** Acquire a tablespace when it could be dropped concurrently.
+Used by background threads that do not necessarily hold proper locks
+for concurrency control.
+@param[in]	id	tablespace ID
+@param[in]	silent	whether to silently ignore missing tablespaces
+@return the tablespace, or NULL if missing or being deleted */
+inline
+fil_space_t*
+fil_space_acquire_low(
+	ulint	id,
+	bool	silent)
+{
+	fil_space_t*	space;
+
+	mutex_enter(&fil_system->mutex);
+
+	space = fil_space_get_by_id(id);
+
+	if (space == NULL) {
+		if (!silent) {
+			ib::warn() << "Trying to access missing"
+				" tablespace " << id;
+		}
+	} else if (space->stop_new_ops || space->is_being_truncated) {
+		space = NULL;
+	} else {
+		space->n_pending_ops++;
+	}
+
+	mutex_exit(&fil_system->mutex);
+
+	return(space);
+}
+
+/** Acquire a tablespace when it could be dropped concurrently.
+Used by background threads that do not necessarily hold proper locks
+for concurrency control.
+@param[in]	id	tablespace ID
+@return the tablespace, or NULL if missing or being deleted */
+fil_space_t*
+fil_space_acquire(
+	ulint	id)
+{
+	return(fil_space_acquire_low(id, false));
+}
+
+/** Acquire a tablespace that may not exist.
+Used by background threads that do not necessarily hold proper locks
+for concurrency control.
+@param[in]	id	tablespace ID
+@return the tablespace, or NULL if missing or being deleted */
+fil_space_t*
+fil_space_acquire_silent(
+	ulint	id)
+{
+	return(fil_space_acquire_low(id, true));
+}
+
+/** Release a tablespace acquired with fil_space_acquire().
+@param[in,out]	space	tablespace to release  */
+void
+fil_space_release(
+	fil_space_t*	space)
+{
+	mutex_enter(&fil_system->mutex);
+	ut_ad(space->magic_n == FIL_SPACE_MAGIC_N);
+	ut_ad(space->n_pending_ops > 0);
+	space->n_pending_ops--;
+	mutex_exit(&fil_system->mutex);
+}
+#endif /* !UNIV_HOTBACKUP */
+
+/********************************************************//**
+Creates the database directory for a table if it does not exist yet. */
+void
+fil_create_directory_for_tablename(
+/*===============================*/
+	const char*	name)	/*!< in: name in the standard
+				'databasename/tablename' format */
+{
+	const char*	namend;
+	char*		path;
+	ulint		len;
+
+	len = strlen(fil_path_to_mysql_datadir);
+	namend = strchr(name, '/');
+	ut_a(namend);
+	path = static_cast<char*>(ut_malloc_nokey(len + (namend - name) + 2));
+
+	memcpy(path, fil_path_to_mysql_datadir, len);
+	path[len] = '/';
+	memcpy(path + len + 1, name, namend - name);
+	path[len + (namend - name) + 1] = 0;
+
+	os_normalize_path(path);
+
+	bool	success = os_file_create_directory(path, false);
+	ut_a(success);
+
+	ut_free(path);
+}
+
+/** Write a log record about an operation on a tablespace file.
+@param[in]	type		MLOG_FILE_NAME or MLOG_FILE_DELETE
+or MLOG_FILE_CREATE2 or MLOG_FILE_RENAME2
+@param[in]	space_id	tablespace identifier
+@param[in]	first_page_no	first page number in the file
+@param[in]	path		file path
+@param[in]	new_path	if type is MLOG_FILE_RENAME2, the new name
+@param[in]	flags		if type is MLOG_FILE_CREATE2, the space flags
+@param[in,out]	mtr		mini-transaction */
+static
+void
+fil_op_write_log(
+	mlog_id_t	type,
+	ulint		space_id,
+	ulint		first_page_no,
+	const char*	path,
+	const char*	new_path,
+	ulint		flags,
+	mtr_t*		mtr)
+{
+	byte*		log_ptr;
+	ulint		len;
+
+	ut_ad(first_page_no == 0);
+
+	/* fil_name_parse() requires that there be at least one path
+	separator and that the file path end with ".ibd". */
+	ut_ad(strchr(path, OS_PATH_SEPARATOR) != NULL);
+	ut_ad(strcmp(&path[strlen(path) - strlen(DOT_IBD)], DOT_IBD) == 0);
+
+	log_ptr = mlog_open(mtr, 11 + 4 + 2 + 1);
+
+	if (log_ptr == NULL) {
+		/* Logging in mtr is switched off during crash recovery:
+		in that case mlog_open returns NULL */
+		return;
+	}
+
+	log_ptr = mlog_write_initial_log_record_low(
+		type, space_id, first_page_no, log_ptr, mtr);
+
+	if (type == MLOG_FILE_CREATE2) {
+		mach_write_to_4(log_ptr, flags);
+		log_ptr += 4;
+	}
+
+	/* Let us store the strings as null-terminated for easier readability
+	and handling */
+
+	len = strlen(path) + 1;
+
+	mach_write_to_2(log_ptr, len);
+	log_ptr += 2;
+	mlog_close(mtr, log_ptr);
+
+	mlog_catenate_string(
+		mtr, reinterpret_cast<const byte*>(path), len);
+
+	switch (type) {
+	case MLOG_FILE_RENAME2:
+		ut_ad(strchr(new_path, OS_PATH_SEPARATOR) != NULL);
+		len = strlen(new_path) + 1;
+		log_ptr = mlog_open(mtr, 2 + len);
+		ut_a(log_ptr);
+		mach_write_to_2(log_ptr, len);
+		log_ptr += 2;
+		mlog_close(mtr, log_ptr);
+
+		mlog_catenate_string(
+			mtr, reinterpret_cast<const byte*>(new_path), len);
+		break;
+	case MLOG_FILE_NAME:
+	case MLOG_FILE_DELETE:
+	case MLOG_FILE_CREATE2:
+		break;
+	default:
+		ut_ad(0);
+	}
+}
+#ifndef UNIV_HOTBACKUP
+/** Write redo log for renaming a file.
+@param[in]	space_id	tablespace id
+@param[in]	first_page_no	first page number in the file
+@param[in]	old_name	tablespace file name
+@param[in]	new_name	tablespace file name after renaming
+@param[in,out]	mtr		mini-transaction */
+static
+void
+fil_name_write_rename(
+	ulint		space_id,
+	ulint		first_page_no,
+	const char*	old_name,
+	const char*	new_name,
+	mtr_t*		mtr)
+{
+	ut_ad(!is_predefined_tablespace(space_id));
+
+	fil_op_write_log(
+		MLOG_FILE_RENAME2,
+		space_id, first_page_no, old_name, new_name, 0, mtr);
+}
+#endif /* !UNIV_HOTBACKUP */
+/** Write MLOG_FILE_NAME for a file.
+@param[in]	space_id	tablespace id
+@param[in]	first_page_no	first page number in the file
+@param[in]	name		tablespace file name
+@param[in,out]	mtr		mini-transaction */
+static
+void
+fil_name_write(
+	ulint		space_id,
+	ulint		first_page_no,
+	const char*	name,
+	mtr_t*		mtr)
+{
+	fil_op_write_log(
+		MLOG_FILE_NAME, space_id, first_page_no, name, NULL, 0, mtr);
+}
+/** Write MLOG_FILE_NAME for a file.
+@param[in]	space		tablespace
+@param[in]	first_page_no	first page number in the file
+@param[in]	file		tablespace file
+@param[in,out]	mtr		mini-transaction */
+static
+void
+fil_name_write(
+	const fil_space_t*	space,
+	ulint			first_page_no,
+	const fil_node_t*	file,
+	mtr_t*			mtr)
+{
+	fil_name_write(space->id, first_page_no, file->name, mtr);
+}
+
+#ifndef UNIV_HOTBACKUP
+/********************************************************//**
+Recreates table indexes by applying
+TRUNCATE log record during recovery.
+@return DB_SUCCESS or error code */
+dberr_t
+fil_recreate_table(
+/*===============*/
+	ulint		space_id,	/*!< in: space id */
+	ulint		format_flags,	/*!< in: page format */
+	ulint		flags,		/*!< in: tablespace flags */
+	const char*	name,		/*!< in: table name */
+	truncate_t&	truncate)	/*!< in: The information of
+					TRUNCATE log record */
+{
+	dberr_t			err = DB_SUCCESS;
+	bool			found;
+	const page_size_t	page_size(fil_space_get_page_size(space_id,
+								  &found));
+
+	if (!found) {
+		ib::info() << "Missing .ibd file for table '" << name
+			<< "' with tablespace " << space_id;
+		return(DB_ERROR);
+	}
+
+	ut_ad(!truncate_t::s_fix_up_active);
+	truncate_t::s_fix_up_active = true;
+
+	/* Step-1: Scan for active indexes from REDO logs and drop
+	all the indexes using low level function that take root_page_no
+	and space-id. */
+	truncate.drop_indexes(space_id);
+
+	/* Step-2: Scan for active indexes and re-create them. */
+	err = truncate.create_indexes(
+		name, space_id, page_size, flags, format_flags);
+	if (err != DB_SUCCESS) {
+		ib::info() << "Failed to create indexes for the table '"
+			<< name << "' with tablespace " << space_id
+			<< " while fixing up truncate action";
+		return(err);
+	}
+
+	truncate_t::s_fix_up_active = false;
 
 	return(err);
 }
 
-/****************************************************************//**
-Writes the flushed lsn and the latest archived log number to the page
-header of the first page of each data file in the system tablespace.
-@return	DB_SUCCESS or error number */
-UNIV_INTERN
+/********************************************************//**
+Recreates the tablespace and table indexes by applying
+TRUNCATE log record during recovery.
+@return DB_SUCCESS or error code */
 dberr_t
-fil_write_flushed_lsn_to_data_files(
-/*================================*/
-	lsn_t	lsn,		/*!< in: lsn to write */
-	ulint	arch_log_no)	/*!< in: latest archived log file number */
+fil_recreate_tablespace(
+/*====================*/
+	ulint		space_id,	/*!< in: space id */
+	ulint		format_flags,	/*!< in: page format */
+	ulint		flags,		/*!< in: tablespace flags */
+	const char*	name,		/*!< in: table name */
+	truncate_t&	truncate,	/*!< in: The information of
+					TRUNCATE log record */
+	lsn_t		recv_lsn)	/*!< in: the end LSN of
+						the log record */
+{
+	dberr_t		err = DB_SUCCESS;
+	mtr_t		mtr;
+
+	ut_ad(!truncate_t::s_fix_up_active);
+	truncate_t::s_fix_up_active = true;
+
+	/* Step-1: Invalidate buffer pool pages belonging to the tablespace
+	to re-create. */
+	buf_LRU_flush_or_remove_pages(space_id, BUF_REMOVE_ALL_NO_WRITE, 0);
+
+	/* Remove all insert buffer entries for the tablespace */
+	ibuf_delete_for_discarded_space(space_id);
+
+	/* Step-2: truncate tablespace (reset the size back to original or
+	default size) of tablespace. */
+	err = truncate.truncate(
+		space_id, truncate.get_dir_path(), name, flags, true);
+
+	if (err != DB_SUCCESS) {
+
+		ib::info() << "Cannot access .ibd file for table '"
+			<< name << "' with tablespace " << space_id
+			<< " while truncating";
+		return(DB_ERROR);
+	}
+
+	bool			found;
+	const page_size_t&	page_size =
+		fil_space_get_page_size(space_id, &found);
+
+	if (!found) {
+		ib::info() << "Missing .ibd file for table '" << name
+			<< "' with tablespace " << space_id;
+		return(DB_ERROR);
+	}
+
+	/* Step-3: Initialize Header. */
+	if (page_size.is_compressed()) {
+		byte*	buf;
+		page_t*	page;
+
+		buf = static_cast<byte*>(ut_zalloc_nokey(3 * UNIV_PAGE_SIZE));
+
+		/* Align the memory for file i/o */
+		page = static_cast<byte*>(ut_align(buf, UNIV_PAGE_SIZE));
+
+		flags = fsp_flags_set_page_size(flags, univ_page_size);
+
+		fsp_header_init_fields(page, space_id, flags);
+
+		mach_write_to_4(
+			page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, space_id);
+
+		page_zip_des_t  page_zip;
+		page_zip_set_size(&page_zip, page_size.physical());
+		page_zip.data = page + UNIV_PAGE_SIZE;
+
+#ifdef UNIV_DEBUG
+		page_zip.m_start =
+#endif /* UNIV_DEBUG */
+		page_zip.m_end = page_zip.m_nonempty = page_zip.n_blobs = 0;
+		buf_flush_init_for_writing(
+			NULL, page, &page_zip, 0,
+			fsp_is_checksum_disabled(space_id));
+
+		err = fil_write(page_id_t(space_id, 0), page_size, 0,
+				page_size.physical(), page_zip.data);
+
+		ut_free(buf);
+
+		if (err != DB_SUCCESS) {
+			ib::info() << "Failed to clean header of the"
+				" table '" << name << "' with tablespace "
+				<< space_id;
+			return(err);
+		}
+	}
+
+	mtr_start(&mtr);
+	/* Don't log the operation while fixing up table truncate operation
+	as crash at this level can still be sustained with recovery restarting
+	from last checkpoint. */
+	mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
+
+	/* Initialize the first extent descriptor page and
+	the second bitmap page for the new tablespace. */
+	fsp_header_init(space_id, FIL_IBD_FILE_INITIAL_SIZE, &mtr);
+	mtr_commit(&mtr);
+
+	/* Step-4: Re-Create Indexes to newly re-created tablespace.
+	This operation will restore tablespace back to what it was
+	when it was created during CREATE TABLE. */
+	err = truncate.create_indexes(
+		name, space_id, page_size, flags, format_flags);
+	if (err != DB_SUCCESS) {
+		return(err);
+	}
+
+	/* Step-5: Write new created pages into ibd file handle and
+	flush it to disk for the tablespace, in case i/o-handler thread
+	deletes the bitmap page from buffer. */
+	mtr_start(&mtr);
+
+	mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
+
+	mutex_enter(&fil_system->mutex);
+
+	fil_space_t*	space = fil_space_get_by_id(space_id);
+
+	mutex_exit(&fil_system->mutex);
+
+	fil_node_t*	node = UT_LIST_GET_FIRST(space->chain);
+
+	for (ulint page_no = 0; page_no < node->size; ++page_no) {
+
+		const page_id_t	cur_page_id(space_id, page_no);
+
+		buf_block_t*	block = buf_page_get(cur_page_id, page_size,
+						     RW_X_LATCH, &mtr);
+
+		byte*	page = buf_block_get_frame(block);
+
+		if (!fsp_flags_is_compressed(flags)) {
+
+			ut_ad(!page_size.is_compressed());
+
+			buf_flush_init_for_writing(
+				block, page, NULL, recv_lsn,
+				fsp_is_checksum_disabled(space_id));
+
+			err = fil_write(cur_page_id, page_size, 0,
+					page_size.physical(), page);
+		} else {
+			ut_ad(page_size.is_compressed());
+
+			/* We don't want to rewrite empty pages. */
+
+			if (fil_page_get_type(page) != 0) {
+				page_zip_des_t*  page_zip =
+					buf_block_get_page_zip(block);
+
+				buf_flush_init_for_writing(
+					block, page, page_zip, recv_lsn,
+					fsp_is_checksum_disabled(space_id));
+
+				err = fil_write(cur_page_id, page_size, 0,
+						page_size.physical(),
+						page_zip->data);
+			} else {
+#ifdef UNIV_DEBUG
+				const byte*	data = block->page.zip.data;
+
+				/* Make sure that the page is really empty */
+				for (ulint i = 0;
+				     i < page_size.physical();
+				     ++i) {
+
+					ut_a(data[i] == 0);
+				}
+#endif /* UNIV_DEBUG */
+			}
+		}
+
+		if (err != DB_SUCCESS) {
+			ib::info() << "Cannot write page " << page_no
+				<< " into a .ibd file for table '"
+				<< name << "' with tablespace " << space_id;
+		}
+	}
+
+	mtr_commit(&mtr);
+
+	truncate_t::s_fix_up_active = false;
+
+	return(err);
+}
+#endif /* UNIV_HOTBACKUP */
+/** Replay a file rename operation if possible.
+@param[in]	space_id	tablespace identifier
+@param[in]	first_page_no	first page number in the file
+@param[in]	name		old file name
+@param[in]	new_name	new file name
+@return	whether the operation was successfully applied
+(the name did not exist, or new_name did not exist and
+name was successfully renamed to new_name)  */
+bool
+fil_op_replay_rename(
+	ulint		space_id,
+	ulint		first_page_no,
+	const char*	name,
+	const char*	new_name)
+{
+#ifdef UNIV_HOTBACKUP
+	ut_ad(recv_replay_file_ops);
+#endif /* UNIV_HOTBACKUP */
+	ut_ad(first_page_no == 0);
+
+	/* In order to replay the rename, the following must hold:
+	* The new name is not already used.
+	* A tablespace exists with the old name.
+	* The space ID for that tablepace matches this log entry.
+	This will prevent unintended renames during recovery. */
+	fil_space_t*	space = fil_space_get(space_id);
+
+	if (space == NULL) {
+		return(true);
+	}
+
+	const bool name_match
+		= strcmp(name, UT_LIST_GET_FIRST(space->chain)->name) == 0;
+
+	if (!name_match) {
+		return(true);
+	}
+
+	/* Create the database directory for the new name, if
+	it does not exist yet */
+
+	const char*	namend = strrchr(new_name, OS_PATH_SEPARATOR);
+	ut_a(namend != NULL);
+
+	char*		dir = static_cast<char*>(
+		ut_malloc_nokey(namend - new_name + 1));
+
+	memcpy(dir, new_name, namend - new_name);
+	dir[namend - new_name] = '\0';
+
+	bool		success = os_file_create_directory(dir, false);
+	ut_a(success);
+
+	ulint		dirlen = 0;
+
+	if (const char* dirend = strrchr(dir, OS_PATH_SEPARATOR)) {
+		dirlen = dirend - dir + 1;
+	}
+
+	ut_free(dir);
+
+	/* New path must not exist. */
+	dberr_t		err = fil_rename_tablespace_check(
+		space_id, name, new_name, false);
+	if (err != DB_SUCCESS) {
+		ib::error() << " Cannot replay file rename."
+			" Remove either file and try again.";
+		return(false);
+	}
+
+	char*		new_table = mem_strdupl(
+		new_name + dirlen,
+		strlen(new_name + dirlen)
+		- 4 /* remove ".ibd" */);
+
+	ut_ad(new_table[namend - new_name - dirlen]
+	      == OS_PATH_SEPARATOR);
+#if OS_PATH_SEPARATOR != '/'
+	new_table[namend - new_name - dirlen] = '/';
+#endif
+
+	if (!fil_rename_tablespace(
+		    space_id, name, new_table, new_name)) {
+		ut_error;
+	}
+
+	ut_free(new_table);
+	return(true);
+}
+
+/** File operations for tablespace */
+enum fil_operation_t {
+	FIL_OPERATION_DELETE,	/*!< delete a single-table tablespace */
+	FIL_OPERATION_CLOSE,	/*!< close a single-table tablespace */
+	FIL_OPERATION_TRUNCATE	/*!< truncate a single-table tablespace */
+};
+
+/** Check for pending operations.
+@param[in]	space	tablespace
+@param[in]	count	number of attempts so far
+@return 0 if no operations else count + 1. */
+static
+ulint
+fil_check_pending_ops(
+	fil_space_t*	space,
+	ulint		count)
+{
+	ut_ad(mutex_own(&fil_system->mutex));
+
+	const ulint	n_pending_ops = space ? space->n_pending_ops : 0;
+
+	if (n_pending_ops) {
+
+		if (count > 5000) {
+			ib::warn() << "Trying to close/delete/truncate"
+				" tablespace '" << space->name
+				<< "' but there are " << n_pending_ops
+				<< " pending operations on it.";
+		}
+
+		return(count + 1);
+	}
+
+	return(0);
+}
+
+/*******************************************************************//**
+Check for pending IO.
+@return 0 if no pending else count + 1. */
+static
+ulint
+fil_check_pending_io(
+/*=================*/
+	fil_operation_t	operation,	/*!< in: File operation */
+	fil_space_t*	space,		/*!< in/out: Tablespace to check */
+	fil_node_t**	node,		/*!< out: Node in space list */
+	ulint		count)		/*!< in: number of attempts so far */
+{
+	ut_ad(mutex_own(&fil_system->mutex));
+	ut_a(space->n_pending_ops == 0);
+
+	switch (operation) {
+	case FIL_OPERATION_DELETE:
+	case FIL_OPERATION_CLOSE:
+		break;
+	case FIL_OPERATION_TRUNCATE:
+		space->is_being_truncated = true;
+		break;
+	}
+
+	/* The following code must change when InnoDB supports
+	multiple datafiles per tablespace. */
+	ut_a(UT_LIST_GET_LEN(space->chain) == 1);
+
+	*node = UT_LIST_GET_FIRST(space->chain);
+
+	if (space->n_pending_flushes > 0 || (*node)->n_pending > 0) {
+
+		ut_a(!(*node)->being_extended);
+
+		if (count > 1000) {
+			ib::warn() << "Trying to delete/close/truncate"
+				" tablespace '" << space->name
+				<< "' but there are "
+				<< space->n_pending_flushes
+				<< " flushes and " << (*node)->n_pending
+				<< " pending i/o's on it.";
+		}
+
+		return(count + 1);
+	}
+
+	return(0);
+}
+
+/*******************************************************************//**
+Check pending operations on a tablespace.
+@return DB_SUCCESS or error failure. */
+static
+dberr_t
+fil_check_pending_operations(
+/*=========================*/
+	ulint		id,		/*!< in: space id */
+	fil_operation_t	operation,	/*!< in: File operation */
+	fil_space_t**	space,		/*!< out: tablespace instance
+					in memory */
+	char**		path)		/*!< out/own: tablespace path */
+{
+	ulint		count = 0;
+
+	ut_a(!is_system_tablespace(id));
+	ut_ad(space);
+
+	*space = 0;
+
+	mutex_enter(&fil_system->mutex);
+	fil_space_t* sp = fil_space_get_by_id(id);
+	if (sp) {
+		sp->stop_new_ops = true;
+	}
+	mutex_exit(&fil_system->mutex);
+
+	/* Check for pending operations. */
+
+	do {
+		mutex_enter(&fil_system->mutex);
+
+		sp = fil_space_get_by_id(id);
+
+		count = fil_check_pending_ops(sp, count);
+
+		mutex_exit(&fil_system->mutex);
+
+		if (count > 0) {
+			os_thread_sleep(20000);
+		}
+
+	} while (count > 0);
+
+	/* Check for pending IO. */
+
+	*path = 0;
+
+	do {
+		mutex_enter(&fil_system->mutex);
+
+		sp = fil_space_get_by_id(id);
+
+		if (sp == NULL) {
+			mutex_exit(&fil_system->mutex);
+			return(DB_TABLESPACE_NOT_FOUND);
+		}
+
+		fil_node_t*	node;
+
+		count = fil_check_pending_io(operation, sp, &node, count);
+
+		if (count == 0) {
+			*path = mem_strdup(node->name);
+		}
+
+		mutex_exit(&fil_system->mutex);
+
+		if (count > 0) {
+			os_thread_sleep(20000);
+		}
+
+	} while (count > 0);
+
+	ut_ad(sp);
+
+	*space = sp;
+	return(DB_SUCCESS);
+}
+
+/*******************************************************************//**
+Closes a single-table tablespace. The tablespace must be cached in the
+memory cache. Free all pages used by the tablespace.
+@return DB_SUCCESS or error */
+dberr_t
+fil_close_tablespace(
+/*=================*/
+	trx_t*		trx,	/*!< in/out: Transaction covering the close */
+	ulint		id)	/*!< in: space id */
+{
+	char*		path = 0;
+	fil_space_t*	space = 0;
+	dberr_t		err;
+
+	ut_a(!is_system_tablespace(id));
+
+	err = fil_check_pending_operations(id, FIL_OPERATION_CLOSE,
+					   &space, &path);
+
+	if (err != DB_SUCCESS) {
+		return(err);
+	}
+
+	ut_a(space);
+	ut_a(path != 0);
+
+	rw_lock_x_lock(&space->latch);
+
+	/* Invalidate in the buffer pool all pages belonging to the
+	tablespace. Since we have set space->stop_new_ops = true, readahead
+	or ibuf merge can no longer read more pages of this tablespace to the
+	buffer pool. Thus we can clean the tablespace out of the buffer pool
+	completely and permanently. The flag stop_new_ops also prevents
+	fil_flush() from being applied to this tablespace. */
+
+	buf_LRU_flush_or_remove_pages(id, BUF_REMOVE_FLUSH_WRITE, trx);
+
+	/* If the free is successful, the X lock will be released before
+	the space memory data structure is freed. */
+
+	if (!fil_space_free(id, true)) {
+		rw_lock_x_unlock(&space->latch);
+		err = DB_TABLESPACE_NOT_FOUND;
+	} else {
+		err = DB_SUCCESS;
+	}
+
+	/* If it is a delete then also delete any generated files, otherwise
+	when we drop the database the remove directory will fail. */
+
+	char*	cfg_name = fil_make_filepath(path, NULL, CFG, false);
+	if (cfg_name != NULL) {
+		os_file_delete_if_exists(innodb_data_file_key, cfg_name, NULL);
+		ut_free(cfg_name);
+	}
+
+	ut_free(path);
+
+	return(err);
+}
+
+/** Deletes an IBD tablespace, either general or single-table.
+The tablespace must be cached in the memory cache. This will delete the
+datafile, fil_space_t & fil_node_t entries from the file_system_t cache.
+@param[in]	space_id	Tablespace id
+@param[in]	buf_remove	Specify the action to take on the pages
+for this table in the buffer pool.
+@return DB_SUCCESS or error */
+dberr_t
+fil_delete_tablespace(
+	ulint		id,
+	buf_remove_t	buf_remove)
+{
+	char*		path = 0;
+	fil_space_t*	space = 0;
+
+	ut_a(!is_system_tablespace(id));
+
+	dberr_t err = fil_check_pending_operations(
+		id, FIL_OPERATION_DELETE, &space, &path);
+
+	if (err != DB_SUCCESS) {
+
+		ib::error() << "Cannot delete tablespace " << id
+			<< " because it is not found in the tablespace"
+			" memory cache.";
+
+		return(err);
+	}
+
+	ut_a(space);
+	ut_a(path != 0);
+
+#ifndef UNIV_HOTBACKUP
+	/* IMPORTANT: Because we have set space::stop_new_ops there
+	can't be any new ibuf merges, reads or flushes. We are here
+	because node::n_pending was zero above. However, it is still
+	possible to have pending read and write requests:
+
+	A read request can happen because the reader thread has
+	gone through the ::stop_new_ops check in buf_page_init_for_read()
+	before the flag was set and has not yet incremented ::n_pending
+	when we checked it above.
+
+	A write request can be issued any time because we don't check
+	the ::stop_new_ops flag when queueing a block for write.
+
+	We deal with pending write requests in the following function
+	where we'd minimally evict all dirty pages belonging to this
+	space from the flush_list. Note that if a block is IO-fixed
+	we'll wait for IO to complete.
+
+	To deal with potential read requests, we will check the
+	::stop_new_ops flag in fil_io(). */
+
+	buf_LRU_flush_or_remove_pages(id, buf_remove, 0);
+
+#endif /* !UNIV_HOTBACKUP */
+
+	/* If it is a delete then also delete any generated files, otherwise
+	when we drop the database the remove directory will fail. */
+	{
+#ifdef UNIV_HOTBACKUP
+		/* When replaying the operation in MySQL Enterprise
+		Backup, we do not try to write any log record. */
+#else /* UNIV_HOTBACKUP */
+		/* Before deleting the file, write a log record about
+		it, so that InnoDB crash recovery will expect the file
+		to be gone. */
+		mtr_t		mtr;
+
+		mtr_start(&mtr);
+		fil_op_write_log(MLOG_FILE_DELETE, id, 0, path, NULL, 0, &mtr);
+		mtr_commit(&mtr);
+		/* Even if we got killed shortly after deleting the
+		tablespace file, the record must have already been
+		written to the redo log. */
+		log_write_up_to(mtr.commit_lsn(), true);
+#endif /* UNIV_HOTBACKUP */
+
+		char*	cfg_name = fil_make_filepath(path, NULL, CFG, false);
+		if (cfg_name != NULL) {
+			os_file_delete_if_exists(innodb_data_file_key, cfg_name, NULL);
+			ut_free(cfg_name);
+		}
+	}
+
+	/* Delete the link file pointing to the ibd file we are deleting. */
+	if (FSP_FLAGS_HAS_DATA_DIR(space->flags)) {
+
+		RemoteDatafile::delete_link_file(space->name);
+
+	} else if (FSP_FLAGS_GET_SHARED(space->flags)) {
+
+		RemoteDatafile::delete_link_file(base_name(path));
+
+	}
+
+	mutex_enter(&fil_system->mutex);
+
+	/* Double check the sanity of pending ops after reacquiring
+	the fil_system::mutex. */
+	if (const fil_space_t* s = fil_space_get_by_id(id)) {
+		ut_a(s == space);
+		ut_a(space->n_pending_ops == 0);
+		ut_a(UT_LIST_GET_LEN(space->chain) == 1);
+		fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
+		ut_a(node->n_pending == 0);
+
+		fil_space_detach(space);
+		mutex_exit(&fil_system->mutex);
+
+		log_mutex_enter();
+
+		if (space->max_lsn != 0) {
+			ut_d(space->max_lsn = 0);
+			UT_LIST_REMOVE(fil_system->named_spaces, space);
+		}
+
+		log_mutex_exit();
+		fil_space_free_low(space);
+
+		if (!os_file_delete(innodb_data_file_key, path)
+		    && !os_file_delete_if_exists(
+			    innodb_data_file_key, path, NULL)) {
+
+			/* Note: This is because we have removed the
+			tablespace instance from the cache. */
+
+			err = DB_IO_ERROR;
+		}
+	} else {
+		mutex_exit(&fil_system->mutex);
+		err = DB_TABLESPACE_NOT_FOUND;
+	}
+
+	ut_free(path);
+
+	return(err);
+}
+
+#ifndef UNIV_HOTBACKUP
+/** Truncate the tablespace to needed size.
+@param[in]	space_id	id of tablespace to truncate
+@param[in]	size_in_pages	truncate size.
+@return true if truncate was successful. */
+bool
+fil_truncate_tablespace(
+	ulint		space_id,
+	ulint		size_in_pages)
+{
+	/* Step-1: Prepare tablespace for truncate. This involves
+	stopping all the new operations + IO on that tablespace
+	and ensuring that related pages are flushed to disk. */
+	if (fil_prepare_for_truncate(space_id) != DB_SUCCESS) {
+		return(false);
+	}
+
+	/* Step-2: Invalidate buffer pool pages belonging to the tablespace
+	to re-create. Remove all insert buffer entries for the tablespace */
+	buf_LRU_flush_or_remove_pages(space_id, BUF_REMOVE_ALL_NO_WRITE, 0);
+
+	/* Step-3: Truncate the tablespace and accordingly update
+	the fil_space_t handler that is used to access this tablespace. */
+	mutex_enter(&fil_system->mutex);
+	fil_space_t*	space = fil_space_get_by_id(space_id);
+
+	/* The following code must change when InnoDB supports
+	multiple datafiles per tablespace. */
+	ut_a(UT_LIST_GET_LEN(space->chain) == 1);
+
+	fil_node_t*	node = UT_LIST_GET_FIRST(space->chain);
+
+	ut_ad(node->is_open);
+
+	space->size = node->size = size_in_pages;
+
+	bool success = os_file_truncate(node->name, node->handle, 0);
+	if (success) {
+
+		os_offset_t	size = size_in_pages * UNIV_PAGE_SIZE;
+
+		success = os_file_set_size(
+			node->name, node->handle, size, srv_read_only_mode);
+
+		if (success) {
+			space->stop_new_ops = false;
+			space->is_being_truncated = false;
+		}
+	}
+
+	mutex_exit(&fil_system->mutex);
+
+	return(success);
+}
+
+/*******************************************************************//**
+Prepare for truncating a single-table tablespace.
+1) Check pending operations on a tablespace;
+2) Remove all insert buffer entries for the tablespace;
+@return DB_SUCCESS or error */
+dberr_t
+fil_prepare_for_truncate(
+/*=====================*/
+	ulint	id)		/*!< in: space id */
+{
+	char*		path = 0;
+	fil_space_t*	space = 0;
+
+	ut_a(!is_system_tablespace(id));
+
+	dberr_t	err = fil_check_pending_operations(
+		id, FIL_OPERATION_TRUNCATE, &space, &path);
+
+	ut_free(path);
+
+	if (err == DB_TABLESPACE_NOT_FOUND) {
+		ib::error() << "Cannot truncate tablespace " << id
+			<< " because it is not found in the tablespace"
+			" memory cache.";
+	}
+
+	return(err);
+}
+
+/**********************************************************************//**
+Reinitialize the original tablespace header with the same space id
+for single tablespace */
+void
+fil_reinit_space_header(
+/*====================*/
+	ulint		id,	/*!< in: space id */
+	ulint		size)	/*!< in: size in blocks */
+{
+	ut_a(!is_system_tablespace(id));
+
+	/* Invalidate in the buffer pool all pages belonging
+	to the tablespace */
+	buf_LRU_flush_or_remove_pages(id, BUF_REMOVE_ALL_NO_WRITE, 0);
+
+	/* Remove all insert buffer entries for the tablespace */
+	ibuf_delete_for_discarded_space(id);
+
+	mutex_enter(&fil_system->mutex);
+
+	fil_space_t*	space = fil_space_get_by_id(id);
+
+	/* The following code must change when InnoDB supports
+	multiple datafiles per tablespace. */
+	ut_a(UT_LIST_GET_LEN(space->chain) == 1);
+
+	fil_node_t*	node = UT_LIST_GET_FIRST(space->chain);
+
+	space->size = node->size = size;
+
+	mutex_exit(&fil_system->mutex);
+
+	mtr_t	mtr;
+
+	mtr_start(&mtr);
+	mtr.set_named_space(id);
+
+	fsp_header_init(id, size, &mtr);
+
+	mtr_commit(&mtr);
+}
+
+#ifdef UNIV_DEBUG
+/** Increase redo skipped count for a tablespace.
+@param[in]	id	space id */
+void
+fil_space_inc_redo_skipped_count(
+	ulint		id)
 {
 	fil_space_t*	space;
+
+	mutex_enter(&fil_system->mutex);
+
+	space = fil_space_get_by_id(id);
+
+	ut_a(space != NULL);
+
+	space->redo_skipped_count++;
+
+	mutex_exit(&fil_system->mutex);
+}
+
+/** Decrease redo skipped count for a tablespace.
+@param[in]	id	space id */
+void
+fil_space_dec_redo_skipped_count(
+	ulint		id)
+{
+	fil_space_t*	space;
+
+	mutex_enter(&fil_system->mutex);
+
+	space = fil_space_get_by_id(id);
+
+	ut_a(space != NULL);
+	ut_a(space->redo_skipped_count > 0);
+
+	space->redo_skipped_count--;
+
+	mutex_exit(&fil_system->mutex);
+}
+
+/**
+Check whether a single-table tablespace is redo skipped.
+@param[in]	id	space id
+@return true if redo skipped */
+bool
+fil_space_is_redo_skipped(
+	ulint		id)
+{
+	fil_space_t*	space;
+	bool		is_redo_skipped;
+
+	mutex_enter(&fil_system->mutex);
+
+	space = fil_space_get_by_id(id);
+
+	ut_a(space != NULL);
+
+	is_redo_skipped = space->redo_skipped_count > 0;
+
+	mutex_exit(&fil_system->mutex);
+
+	return(is_redo_skipped);
+}
+#endif /* UNIV_DEBUG */
+
+/*******************************************************************//**
+Discards a single-table tablespace. The tablespace must be cached in the
+memory cache. Discarding is like deleting a tablespace, but
+
+ 1. We do not drop the table from the data dictionary;
+
+ 2. We remove all insert buffer entries for the tablespace immediately;
+    in DROP TABLE they are only removed gradually in the background;
+
+ 3. Free all the pages in use by the tablespace.
+@return DB_SUCCESS or error */
+dberr_t
+fil_discard_tablespace(
+/*===================*/
+	ulint	id)	/*!< in: space id */
+{
+	dberr_t	err;
+
+	switch (err = fil_delete_tablespace(id, BUF_REMOVE_ALL_NO_WRITE)) {
+	case DB_SUCCESS:
+		break;
+
+	case DB_IO_ERROR:
+		ib::warn() << "While deleting tablespace " << id
+			<< " in DISCARD TABLESPACE. File rename/delete"
+			" failed: " << ut_strerr(err);
+		break;
+
+	case DB_TABLESPACE_NOT_FOUND:
+		ib::warn() << "Cannot delete tablespace " << id
+			<< " in DISCARD TABLESPACE: " << ut_strerr(err);
+		break;
+
+	default:
+		ut_error;
+	}
+
+	/* Remove all insert buffer entries for the tablespace */
+
+	ibuf_delete_for_discarded_space(id);
+
+	return(err);
+}
+#endif /* !UNIV_HOTBACKUP */
+
+/*******************************************************************//**
+Allocates and builds a file name from a path, a table or tablespace name
+and a suffix. The string must be freed by caller with ut_free().
+@param[in] path NULL or the direcory path or the full path and filename.
+@param[in] name NULL if path is full, or Table/Tablespace name
+@param[in] suffix NULL or the file extention to use.
+@param[in] trim_name true if the last name on the path should be trimmed.
+@return own: file name */
+char*
+fil_make_filepath(
+	const char*	path,
+	const char*	name,
+	ib_extention	ext,
+	bool		trim_name)
+{
+	/* The path may contain the basename of the file, if so we do not
+	need the name.  If the path is NULL, we can use the default path,
+	but there needs to be a name. */
+	ut_ad(path != NULL || name != NULL);
+
+	/* If we are going to strip a name off the path, there better be a
+	path and a new name to put back on. */
+	ut_ad(!trim_name || (path != NULL && name != NULL));
+
+	if (path == NULL) {
+		path = fil_path_to_mysql_datadir;
+	}
+
+	ulint	len		= 0;	/* current length */
+	ulint	path_len	= strlen(path);
+	ulint	name_len	= (name ? strlen(name) : 0);
+	const char* suffix	= dot_ext[ext];
+	ulint	suffix_len	= strlen(suffix);
+	ulint	full_len	= path_len + 1 + name_len + suffix_len + 1;
+
+	char*	full_name = static_cast<char*>(ut_malloc_nokey(full_len));
+	if (full_name == NULL) {
+		return NULL;
+	}
+
+	/* If the name is a relative path, do not prepend "./". */
+	if (path[0] == '.'
+	    && (path[1] == '\0' || path[1] == OS_PATH_SEPARATOR)
+	    && name != NULL && name[0] == '.') {
+		path = NULL;
+		path_len = 0;
+	}
+
+	if (path != NULL) {
+		memcpy(full_name, path, path_len);
+		len = path_len;
+		full_name[len] = '\0';
+		os_normalize_path(full_name);
+	}
+
+	if (trim_name) {
+		/* Find the offset of the last DIR separator and set it to
+		null in order to strip off the old basename from this path. */
+		char* last_dir_sep = strrchr(full_name, OS_PATH_SEPARATOR);
+		if (last_dir_sep) {
+			last_dir_sep[0] = '\0';
+			len = strlen(full_name);
+		}
+	}
+
+	if (name != NULL) {
+		if (len && full_name[len - 1] != OS_PATH_SEPARATOR) {
+			/* Add a DIR separator */
+			full_name[len] = OS_PATH_SEPARATOR;
+			full_name[++len] = '\0';
+		}
+
+		char*	ptr = &full_name[len];
+		memcpy(ptr, name, name_len);
+		len += name_len;
+		full_name[len] = '\0';
+		os_normalize_path(ptr);
+	}
+
+	/* Make sure that the specified suffix is at the end of the filepath
+	string provided. This assumes that the suffix starts with '.'.
+	If the first char of the suffix is found in the filepath at the same
+	length as the suffix from the end, then we will assume that there is
+	a previous suffix that needs to be replaced. */
+	if (suffix != NULL) {
+		/* Need room for the trailing null byte. */
+		ut_ad(len < full_len);
+
+		if ((len > suffix_len)
+		   && (full_name[len - suffix_len] == suffix[0])) {
+			/* Another suffix exists, make it the one requested. */
+			memcpy(&full_name[len - suffix_len], suffix, suffix_len);
+
+		} else {
+			/* No previous suffix, add it. */
+			ut_ad(len + suffix_len < full_len);
+			memcpy(&full_name[len], suffix, suffix_len);
+			full_name[len + suffix_len] = '\0';
+		}
+	}
+
+	return(full_name);
+}
+
+/** Test if a tablespace file can be renamed to a new filepath by checking
+if that the old filepath exists and the new filepath does not exist.
+@param[in]	space_id	tablespace id
+@param[in]	old_path	old filepath
+@param[in]	new_path	new filepath
+@param[in]	is_discarded	whether the tablespace is discarded
+@return innodb error code */
+dberr_t
+fil_rename_tablespace_check(
+	ulint		space_id,
+	const char*	old_path,
+	const char*	new_path,
+	bool		is_discarded)
+{
+	bool	exists = false;
+	os_file_type_t	ftype;
+
+	if (!is_discarded
+	    && os_file_status(old_path, &exists, &ftype)
+	    && !exists) {
+		ib::error() << "Cannot rename '" << old_path
+			<< "' to '" << new_path
+			<< "' for space ID " << space_id
+			<< " because the source file"
+			<< " does not exist.";
+		return(DB_TABLESPACE_NOT_FOUND);
+	}
+
+	exists = false;
+	if (!os_file_status(new_path, &exists, &ftype) || exists) {
+		ib::error() << "Cannot rename '" << old_path
+			<< "' to '" << new_path
+			<< "' for space ID " << space_id
+			<< " because the target file exists."
+			" Remove the target file and try again.";
+		return(DB_TABLESPACE_EXISTS);
+	}
+
+	return(DB_SUCCESS);
+}
+
+/** Rename a single-table tablespace.
+The tablespace must exist in the memory cache.
+@param[in]	id		tablespace identifier
+@param[in]	old_path	old file name
+@param[in]	new_name	new table name in the
+databasename/tablename format
+@param[in]	new_path_in	new file name,
+or NULL if it is located in the normal data directory
+@return true if success */
+bool
+fil_rename_tablespace(
+	ulint		id,
+	const char*	old_path,
+	const char*	new_name,
+	const char*	new_path_in)
+{
+	bool		sleep		= false;
+	bool		flush		= false;
+	fil_space_t*	space;
 	fil_node_t*	node;
+	ulint		count		= 0;
+	ut_a(id != 0);
+
+	ut_ad(strchr(new_name, '/') != NULL);
+retry:
+	count++;
+
+	if (!(count % 1000)) {
+		ib::warn() << "Cannot rename file " << old_path
+			<< " (space id " << id << "), retried " << count
+			<< " times."
+			" There are either pending IOs or flushes or"
+			" the file is being extended.";
+	}
+
+	mutex_enter(&fil_system->mutex);
+
+	space = fil_space_get_by_id(id);
+
+	DBUG_EXECUTE_IF("fil_rename_tablespace_failure_1", space = NULL; );
+
+	if (space == NULL) {
+		ib::error() << "Cannot find space id " << id
+			<< " in the tablespace memory cache, though the file '"
+			<< old_path
+			<< "' in a rename operation should have that id.";
+func_exit:
+		mutex_exit(&fil_system->mutex);
+		return(false);
+	}
+
+	if (count > 25000) {
+		space->stop_ios = false;
+		goto func_exit;
+	}
+	if (space != fil_space_get_by_name(space->name)) {
+		ib::error() << "Cannot find " << space->name
+			<< " in tablespace memory cache";
+		space->stop_ios = false;
+		goto func_exit;
+	}
+
+	if (fil_space_get_by_name(new_name)) {
+		ib::error() << new_name
+			<< " is already in tablespace memory cache";
+		space->stop_ios = false;
+		goto func_exit;
+	}
+
+	/* We temporarily close the .ibd file because we do not trust that
+	operating systems can rename an open file. For the closing we have to
+	wait until there are no pending i/o's or flushes on the file. */
+
+	space->stop_ios = true;
+
+	/* The following code must change when InnoDB supports
+	multiple datafiles per tablespace. */
+	ut_a(UT_LIST_GET_LEN(space->chain) == 1);
+	node = UT_LIST_GET_FIRST(space->chain);
+
+	if (node->n_pending > 0
+	    || node->n_pending_flushes > 0
+	    || node->being_extended) {
+		/* There are pending i/o's or flushes or the file is
+		currently being extended, sleep for a while and
+		retry */
+		sleep = true;
+	} else if (node->modification_counter > node->flush_counter) {
+		/* Flush the space */
+		sleep = flush = true;
+	} else if (node->is_open) {
+		/* Close the file */
+
+		fil_node_close_file(node);
+	}
+
+	mutex_exit(&fil_system->mutex);
+
+	if (sleep) {
+		os_thread_sleep(20000);
+
+		if (flush) {
+			fil_flush(id);
+		}
+
+		sleep = flush = false;
+		goto retry;
+	}
+	ut_ad(space->stop_ios);
+	char*	new_file_name = new_path_in == NULL
+		? fil_make_filepath(NULL, new_name, IBD, false)
+		: mem_strdup(new_path_in);
+	char*	old_file_name = node->name;
+	char*	new_space_name = mem_strdup(new_name);
+	char*	old_space_name = space->name;
+	ulint	old_fold = ut_fold_string(old_space_name);
+	ulint	new_fold = ut_fold_string(new_space_name);
+
+	ut_ad(strchr(old_file_name, OS_PATH_SEPARATOR) != NULL);
+	ut_ad(strchr(new_file_name, OS_PATH_SEPARATOR) != NULL);
+#ifndef UNIV_HOTBACKUP
+	if (!recv_recovery_on) {
+		mtr_t		mtr;
+
+		mtr.start();
+		fil_name_write_rename(
+			id, 0, old_file_name, new_file_name, &mtr);
+		mtr.commit();
+		log_mutex_enter();
+	}
+#endif /* !UNIV_HOTBACKUP */
+
+	/* log_sys->mutex is above fil_system->mutex in the latching order */
+	ut_ad(log_mutex_own());
+	mutex_enter(&fil_system->mutex);
+	ut_ad(space->name == old_space_name);
+	/* We already checked these. */
+	ut_ad(space == fil_space_get_by_name(old_space_name));
+	ut_ad(!fil_space_get_by_name(new_space_name));
+	ut_ad(node->name == old_file_name);
+
+	bool	success;
+
+	DBUG_EXECUTE_IF("fil_rename_tablespace_failure_2",
+			goto skip_rename; );
+
+	success = os_file_rename(
+		innodb_data_file_key, old_file_name, new_file_name);
+
+	DBUG_EXECUTE_IF("fil_rename_tablespace_failure_2",
+			skip_rename: success = false; );
+
+	ut_ad(node->name == old_file_name);
+
+	if (success) {
+		node->name = new_file_name;
+	}
+
+#ifndef UNIV_HOTBACKUP
+	if (!recv_recovery_on) {
+		log_mutex_exit();
+	}
+#endif /* !UNIV_HOTBACKUP */
+
+	ut_ad(space->name == old_space_name);
+	if (success) {
+		HASH_DELETE(fil_space_t, name_hash, fil_system->name_hash,
+			    old_fold, space);
+		space->name = new_space_name;
+		HASH_INSERT(fil_space_t, name_hash, fil_system->name_hash,
+			    new_fold, space);
+	} else {
+		/* Because nothing was renamed, we must free the new
+		names, not the old ones. */
+		old_file_name = new_file_name;
+		old_space_name = new_space_name;
+	}
+
+	ut_ad(space->stop_ios);
+	space->stop_ios = false;
+	mutex_exit(&fil_system->mutex);
+
+	ut_free(old_file_name);
+	ut_free(old_space_name);
+
+	return(success);
+}
+
+/** Create a new General or Single-Table tablespace
+@param[in]	space_id	Tablespace ID
+@param[in]	name		Tablespace name in dbname/tablename format.
+For general tablespaces, the 'dbname/' part may be missing.
+@param[in]	path		Path and filename of the datafile to create.
+@param[in]	flags		Tablespace flags
+@param[in]	size		Initial size of the tablespace file in
+                                pages, must be >= FIL_IBD_FILE_INITIAL_SIZE
+@param[in]	mode		MariaDB encryption mode
+@param[in]	key_id		MariaDB encryption key_id
+@return DB_SUCCESS or error code */
+dberr_t
+fil_ibd_create(
+	ulint		space_id,
+	const char*	name,
+	const char*	path,
+	ulint		flags,
+	ulint		size,
+	fil_encryption_t mode,
+	ulint		key_id)
+{
+	os_file_t	file;
 	dberr_t		err;
+	byte*		buf2;
+	byte*		page;
+	bool		success;
+	bool		is_temp = FSP_FLAGS_GET_TEMPORARY(flags);
+	bool		has_data_dir = FSP_FLAGS_HAS_DATA_DIR(flags);
+	bool		has_shared_space = FSP_FLAGS_GET_SHARED(flags);
+	fil_space_t*	space = NULL;
+	fil_space_crypt_t *crypt_data = NULL;
+
+	ut_ad(!is_system_tablespace(space_id));
+	ut_ad(!srv_read_only_mode);
+	ut_a(space_id < SRV_LOG_SPACE_FIRST_ID);
+	ut_a(size >= FIL_IBD_FILE_INITIAL_SIZE);
+	ut_a(fsp_flags_is_valid(flags));
+
+	/* Create the subdirectories in the path, if they are
+	not there already. */
+	if (!has_shared_space) {
+		err = os_file_create_subdirs_if_needed(path);
+		if (err != DB_SUCCESS) {
+			return(err);
+		}
+	}
+
+	file = os_file_create(
+		innodb_data_file_key, path,
+		OS_FILE_CREATE | OS_FILE_ON_ERROR_NO_EXIT,
+		OS_FILE_NORMAL,
+		OS_DATA_FILE,
+		srv_read_only_mode,
+		&success);
+
+	if (!success) {
+		/* The following call will print an error message */
+		ulint	error = os_file_get_last_error(true);
+
+		ib::error() << "Cannot create file '" << path << "'";
+
+		if (error == OS_FILE_ALREADY_EXISTS) {
+			ib::error() << "The file '" << path << "'"
+				" already exists though the"
+				" corresponding table did not exist"
+				" in the InnoDB data dictionary."
+				" Have you moved InnoDB .ibd files"
+				" around without using the SQL commands"
+				" DISCARD TABLESPACE and IMPORT TABLESPACE,"
+				" or did mysqld crash in the middle of"
+				" CREATE TABLE?"
+				" You can resolve the problem by removing"
+				" the file '" << path
+				<< "' under the 'datadir' of MySQL.";
+
+			return(DB_TABLESPACE_EXISTS);
+		}
+
+		if (error == OS_FILE_DISK_FULL) {
+			return(DB_OUT_OF_FILE_SPACE);
+		}
+
+		return(DB_ERROR);
+	}
+
+	bool	atomic_write;
+
+#if !defined(NO_FALLOCATE) && defined(UNIV_LINUX)
+	if (fil_fusionio_enable_atomic_write(file)) {
+
+		/* This is required by FusionIO HW/Firmware */
+		int	ret = posix_fallocate(file, 0, size * UNIV_PAGE_SIZE);
+
+		if (ret != 0) {
+
+			ib::error() <<
+				"posix_fallocate(): Failed to preallocate"
+				" data for file " << path
+				<< ", desired size "
+				<< size * UNIV_PAGE_SIZE
+				<< " Operating system error number " << ret
+				<< ". Check"
+				" that the disk is not full or a disk quota"
+				" exceeded. Make sure the file system supports"
+				" this function. Some operating system error"
+				" numbers are described at " REFMAN
+				" operating-system-error-codes.html";
+
+			success = false;
+		} else {
+			success = true;
+		}
+
+		atomic_write = true;
+	} else {
+		atomic_write = false;
+
+		success = os_file_set_size(
+			path, file, size * UNIV_PAGE_SIZE, srv_read_only_mode);
+	}
+#else
+	atomic_write = false;
+
+	success = os_file_set_size(
+		path, file, size * UNIV_PAGE_SIZE, srv_read_only_mode);
+
+#endif /* !NO_FALLOCATE && UNIV_LINUX */
+
+	if (!success) {
+		os_file_close(file);
+		os_file_delete(innodb_data_file_key, path);
+		return(DB_OUT_OF_FILE_SPACE);
+	}
+
+	/* Note: We are actually punching a hole, previous contents will
+	be lost after this call, if it succeeds. In this case the file
+	should be full of NULs. */
+
+	bool	punch_hole = os_is_sparse_file_supported(path, file);
+
+	if (punch_hole) {
+
+		dberr_t	punch_err;
+
+		punch_err = os_file_punch_hole(file, 0, size * UNIV_PAGE_SIZE);
+
+		if (punch_err != DB_SUCCESS) {
+			punch_hole = false;
+		}
+	}
+
+	/* printf("Creating tablespace %s id %lu\n", path, space_id); */
+
+	/* We have to write the space id to the file immediately and flush the
+	file to disk. This is because in crash recovery we must be aware what
+	tablespaces exist and what are their space id's, so that we can apply
+	the log records to the right file. It may take quite a while until
+	buffer pool flush algorithms write anything to the file and flush it to
+	disk. If we would not write here anything, the file would be filled
+	with zeros from the call of os_file_set_size(), until a buffer pool
+	flush would write to it. */
+
+	buf2 = static_cast<byte*>(ut_malloc_nokey(3 * UNIV_PAGE_SIZE));
+	/* Align the memory for file i/o if we might have O_DIRECT set */
+	page = static_cast<byte*>(ut_align(buf2, UNIV_PAGE_SIZE));
+
+	memset(page, '\0', UNIV_PAGE_SIZE);
+#ifndef UNIV_HOTBACKUP
+	/* Add the UNIV_PAGE_SIZE to the table flags and write them to the
+	tablespace header. */
+	flags = fsp_flags_set_page_size(flags, univ_page_size);
+#endif /* !UNIV_HOTBACKUP */
+	fsp_header_init_fields(page, space_id, flags);
+	mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, space_id);
+
+	const page_size_t	page_size(flags);
+	IORequest		request(IORequest::WRITE);
+
+	if (!page_size.is_compressed()) {
+
+		buf_flush_init_for_writing(
+			NULL, page, NULL, 0,
+			fsp_is_checksum_disabled(space_id));
+
+		err = os_file_write(
+			request, path, file, page, 0, page_size.physical());
+
+		ut_ad(err != DB_IO_NO_PUNCH_HOLE);
+
+	} else {
+		page_zip_des_t	page_zip;
+		page_zip_set_size(&page_zip, page_size.physical());
+		page_zip.data = page + UNIV_PAGE_SIZE;
+#ifdef UNIV_DEBUG
+		page_zip.m_start =
+#endif /* UNIV_DEBUG */
+			page_zip.m_end = page_zip.m_nonempty =
+			page_zip.n_blobs = 0;
+
+		buf_flush_init_for_writing(
+			NULL, page, &page_zip, 0,
+			fsp_is_checksum_disabled(space_id));
+
+		err = os_file_write(
+			request, path, file, page_zip.data, 0,
+			page_size.physical());
+
+		ut_a(err != DB_IO_NO_PUNCH_HOLE);
+
+		punch_hole = false;
+	}
+
+	ut_free(buf2);
+
+	if (err != DB_SUCCESS) {
+
+		ib::error()
+			<< "Could not write the first page to"
+			<< " tablespace '" << path << "'";
+
+		os_file_close(file);
+		os_file_delete(innodb_data_file_key, path);
+
+		return(DB_ERROR);
+	}
+
+	success = os_file_flush(file);
+
+	if (!success) {
+		ib::error() << "File flush of tablespace '"
+			<< path << "' failed";
+		os_file_close(file);
+		os_file_delete(innodb_data_file_key, path);
+		return(DB_ERROR);
+	}
+
+	/* MEB creates isl files during copy-back, hence they
+	should not be created during apply log operation. */
+#ifndef UNIV_HOTBACKUP
+	if (has_data_dir || has_shared_space) {
+		/* Make the ISL file if the IBD file is not
+		in the default location. */
+		err = RemoteDatafile::create_link_file(name, path,
+						       has_shared_space);
+		if (err != DB_SUCCESS) {
+			os_file_close(file);
+			os_file_delete(innodb_data_file_key, path);
+			return(err);
+		}
+	}
+#endif /* !UNIV_HOTBACKUP */
+
+	/* Create crypt data if the tablespace is either encrypted or user has
+	requested it to remain unencrypted. */
+	if (mode == FIL_SPACE_ENCRYPTION_ON || mode == FIL_SPACE_ENCRYPTION_OFF ||
+		srv_encrypt_tables) {
+		crypt_data = fil_space_create_crypt_data(mode, key_id);
+	}
+
+	space = fil_space_create(name, space_id, flags, is_temp
+		? FIL_TYPE_TEMPORARY : FIL_TYPE_TABLESPACE,
+		crypt_data);
+
+	if (!fil_node_create_low(
+			path, size, space, false, punch_hole, atomic_write)) {
+
+		if (crypt_data) {
+			free(crypt_data);
+		}
+
+		err = DB_ERROR;
+		goto error_exit_1;
+	}
+
+#ifdef MYSQL_ENCRYPTION
+	/* For encryption tablespace, initial encryption information. */
+	if (FSP_FLAGS_GET_ENCRYPTION(space->flags)) {
+		err = fil_set_encryption(space->id,
+					 Encryption::AES,
+					 NULL,
+					 NULL);
+		ut_ad(err == DB_SUCCESS);
+	}
+#endif /* MYSQL_ENCRYPTION */
+
+#ifndef UNIV_HOTBACKUP
+	if (!is_temp) {
+		mtr_t			mtr;
+		const fil_node_t*	file = UT_LIST_GET_FIRST(space->chain);
+
+		mtr_start(&mtr);
+		fil_op_write_log(
+			MLOG_FILE_CREATE2, space_id, 0, file->name,
+			NULL, space->flags, &mtr);
+		fil_name_write(space, 0, file, &mtr);
+		mtr_commit(&mtr);
+	}
+#endif /* !UNIV_HOTBACKUP */
+	err = DB_SUCCESS;
+
+	/* Error code is set.  Cleanup the various variables used.
+	These labels reflect the order in which variables are assigned or
+	actions are done. */
+error_exit_1:
+	if (err != DB_SUCCESS && (has_data_dir || has_shared_space)) {
+		RemoteDatafile::delete_link_file(name);
+	}
+
+	os_file_close(file);
+	if (err != DB_SUCCESS) {
+		os_file_delete(innodb_data_file_key, path);
+	}
+
+	return(err);
+}
+
+#ifndef UNIV_HOTBACKUP
+/** Try to open a single-table tablespace and optionally check that the
+space id in it is correct. If this does not succeed, print an error message
+to the .err log. This function is used to open a tablespace when we start
+mysqld after the dictionary has been booted, and also in IMPORT TABLESPACE.
+
+NOTE that we assume this operation is used either at the database startup
+or under the protection of the dictionary mutex, so that two users cannot
+race here. This operation does not leave the file associated with the
+tablespace open, but closes it after we have looked at the space id in it.
+
+If the validate boolean is set, we read the first page of the file and
+check that the space id in the file is what we expect. We assume that
+this function runs much faster if no check is made, since accessing the
+file inode probably is much faster (the OS caches them) than accessing
+the first page of the file.  This boolean may be initially false, but if
+a remote tablespace is found it will be changed to true.
+
+If the fix_dict boolean is set, then it is safe to use an internal SQL
+statement to update the dictionary tables if they are incorrect.
+
+@param[in]	validate	true if we should validate the tablespace
+@param[in]	fix_dict	true if the dictionary is available to be fixed
+@param[in]	purpose		FIL_TYPE_TABLESPACE or FIL_TYPE_TEMPORARY
+@param[in]	id		tablespace ID
+@param[in]	flags		tablespace flags
+@param[in]	space_name	tablespace name of the datafile
+If file-per-table, it is the table name in the databasename/tablename format
+@param[in]	path_in		expected filepath, usually read from dictionary
+@return DB_SUCCESS or error code */
+dberr_t
+fil_ibd_open(
+	bool		validate,
+	bool		fix_dict,
+	fil_type_t	purpose,
+	ulint		id,
+	ulint		flags,
+	const char*	space_name,
+	const char*	path_in,
+	dict_table_t*	table)
+{
+	dberr_t		err = DB_SUCCESS;
+	bool		dict_filepath_same_as_default = false;
+	bool		link_file_found = false;
+	bool		link_file_is_bad = false;
+	bool		is_shared = FSP_FLAGS_GET_SHARED(flags);
+	bool		is_encrypted = FSP_FLAGS_GET_ENCRYPTION(flags);
+	Datafile	df_default;	/* default location */
+	Datafile	df_dict;	/* dictionary location */
+	RemoteDatafile	df_remote;	/* remote location */
+	ulint		tablespaces_found = 0;
+	ulint		valid_tablespaces_found = 0;
+	bool		for_import = (purpose == FIL_TYPE_IMPORT);
+
+	ut_ad(!fix_dict || rw_lock_own(dict_operation_lock, RW_LOCK_X));
+
+	ut_ad(!fix_dict || mutex_own(&dict_sys->mutex));
+	ut_ad(!fix_dict || !srv_read_only_mode);
+	ut_ad(!fix_dict || srv_log_file_size != 0);
+	ut_ad(fil_type_is_data(purpose));
+
+	if (!fsp_flags_is_valid(flags)) {
+		return(DB_CORRUPTION);
+	}
+
+	df_default.init(space_name, flags);
+	df_dict.init(space_name, flags);
+	df_remote.init(space_name, flags);
+
+	/* Discover the correct file by looking in three possible locations
+	while avoiding unecessary effort. */
+
+	if (is_shared) {
+		/* Shared tablespaces will have a path_in since the filename
+		is not generated from the tablespace name. Use the basename
+		from this path_in with the default datadir as a filepath to
+		the default location */
+		ut_a(path_in);
+		const char*	sep = strrchr(path_in, OS_PATH_SEPARATOR);
+		const char*	basename = (sep == NULL) ? path_in : &sep[1];
+		df_default.make_filepath(NULL, basename, IBD);
+
+		/* Always validate shared tablespaces. */
+		validate = true;
+
+		/* Set the ISL filepath in the default location. */
+		df_remote.set_link_filepath(path_in);
+	} else {
+		/* We will always look for an ibd in the default location. */
+		df_default.make_filepath(NULL, space_name, IBD);
+	}
+
+	/* Look for a filepath embedded in an ISL where the default file
+	would be. */
+	if (df_remote.open_read_only(true) == DB_SUCCESS) {
+		ut_ad(df_remote.is_open());
+
+		/* Always validate a file opened from an ISL pointer */
+		validate = true;
+		++tablespaces_found;
+		link_file_found = true;
+		if (table) {
+			table->crypt_data = df_remote.get_crypt_info();
+		}
+	} else if (df_remote.filepath() != NULL) {
+		/* An ISL file was found but contained a bad filepath in it.
+		Better validate anything we do find. */
+		validate = true;
+	}
+
+	/* Attempt to open the tablespace at the dictionary filepath. */
+	if (path_in) {
+		if (df_default.same_filepath_as(path_in)) {
+			dict_filepath_same_as_default = true;
+		} else {
+			/* Dict path is not the default path. Always validate
+			remote files. If default is opened, it was moved. */
+			validate = true;
+			df_dict.set_filepath(path_in);
+			if (df_dict.open_read_only(true) == DB_SUCCESS) {
+				ut_ad(df_dict.is_open());
+				++tablespaces_found;
+
+				if (table) {
+					table->crypt_data = df_dict.get_crypt_info();
+				}
+			}
+		}
+	}
+
+	/* Always look for a file at the default location. But don't log
+	an error if the tablespace is already open in remote or dict. */
+	ut_a(df_default.filepath());
+	const bool	strict = (tablespaces_found == 0);
+	if (df_default.open_read_only(strict) == DB_SUCCESS) {
+		ut_ad(df_default.is_open());
+		++tablespaces_found;
+		if (table) {
+			table->crypt_data = df_default.get_crypt_info();
+		}
+	}
+
+	/* Check if multiple locations point to the same file. */
+	if (tablespaces_found > 1 && df_default.same_as(df_remote)) {
+		/* A link file was found with the default path in it.
+		Use the default path and delete the link file. */
+		--tablespaces_found;
+		df_remote.delete_link_file();
+		df_remote.close();
+	}
+	if (tablespaces_found > 1 && df_default.same_as(df_dict)) {
+		--tablespaces_found;
+		df_dict.close();
+	}
+	if (tablespaces_found > 1 && df_remote.same_as(df_dict)) {
+		--tablespaces_found;
+		df_dict.close();
+	}
+
+	bool	atomic_write;
+
+#if !defined(NO_FALLOCATE) && defined(UNIV_LINUX)
+	if (!srv_use_doublewrite_buf && df_default.is_open()) {
+		atomic_write = fil_fusionio_enable_atomic_write(
+			df_default.handle());
+	} else {
+		atomic_write = false;
+	}
+#else
+	atomic_write = false;
+#endif /* !NO_FALLOCATE && UNIV_LINUX */
+
+	/*  We have now checked all possible tablespace locations and
+	have a count of how many unique files we found.  If things are
+	normal, we only found 1. */
+	/* For encrypted tablespace, we need to check the
+	encryption in header of first page. */
+	if (!validate && tablespaces_found == 1 && !is_encrypted) {
+		goto skip_validate;
+	}
+
+	/* Read and validate the first page of these three tablespace
+	locations, if found. */
+	valid_tablespaces_found +=
+		(df_remote.validate_to_dd(id, flags, for_import)
+			== DB_SUCCESS) ? 1 : 0;
+
+	valid_tablespaces_found +=
+		(df_default.validate_to_dd(id, flags, for_import)
+			== DB_SUCCESS) ? 1 : 0;
+
+	valid_tablespaces_found +=
+		(df_dict.validate_to_dd(id, flags, for_import)
+			== DB_SUCCESS) ? 1 : 0;
+
+	/* Make sense of these three possible locations.
+	First, bail out if no tablespace files were found. */
+	if (valid_tablespaces_found == 0) {
+		if (!is_encrypted) {
+			/* The following call prints an error message.
+			For encrypted tablespace we skip print, since it should
+			be keyring plugin issues. */
+			os_file_get_last_error(true);
+			ib::error() << "Could not find a valid tablespace file for `"
+				<< space_name << "`. " << TROUBLESHOOT_DATADICT_MSG;
+		}
+
+		return(DB_CORRUPTION);
+	}
+	if (!validate && !is_encrypted) {
+		return(DB_SUCCESS);
+	}
+	if (validate && is_encrypted && fil_space_get(id)) {
+		return(DB_SUCCESS);
+	}
+
+	/* Do not open any tablespaces if more than one tablespace with
+	the correct space ID and flags were found. */
+	if (tablespaces_found > 1) {
+		ib::error() << "A tablespace for `" << space_name
+			<< "` has been found in multiple places;";
+
+		if (df_default.is_open()) {
+			ib::error() << "Default location: "
+				<< df_default.filepath()
+				<< ", Space ID=" << df_default.space_id()
+				<< ", Flags=" << df_default.flags();
+		}
+		if (df_remote.is_open()) {
+			ib::error() << "Remote location: "
+				<< df_remote.filepath()
+				<< ", Space ID=" << df_remote.space_id()
+				<< ", Flags=" << df_remote.flags();
+		}
+		if (df_dict.is_open()) {
+			ib::error() << "Dictionary location: "
+				<< df_dict.filepath()
+				<< ", Space ID=" << df_dict.space_id()
+				<< ", Flags=" << df_dict.flags();
+		}
+
+		/* Force-recovery will allow some tablespaces to be
+		skipped by REDO if there was more than one file found.
+		Unlike during the REDO phase of recovery, we now know
+		if the tablespace is valid according to the dictionary,
+		which was not available then. So if we did not force
+		recovery and there is only one good tablespace, ignore
+		any bad tablespaces. */
+		if (valid_tablespaces_found > 1 || srv_force_recovery > 0) {
+			ib::error() << "Will not open tablespace `"
+				<< space_name << "`";
+
+			/* If the file is not open it cannot be valid. */
+			ut_ad(df_default.is_open() || !df_default.is_valid());
+			ut_ad(df_dict.is_open()    || !df_dict.is_valid());
+			ut_ad(df_remote.is_open()  || !df_remote.is_valid());
+
+			/* Having established that, this is an easy way to
+			look for corrupted data files. */
+			if (df_default.is_open() != df_default.is_valid()
+			    || df_dict.is_open() != df_dict.is_valid()
+			    || df_remote.is_open() != df_remote.is_valid()) {
+				return(DB_CORRUPTION);
+			}
+			return(DB_ERROR);
+		}
+
+		/* There is only one valid tablespace found and we did
+		not use srv_force_recovery during REDO.  Use this one
+		tablespace and clean up invalid tablespace pointers */
+		if (df_default.is_open() && !df_default.is_valid()) {
+			df_default.close();
+			tablespaces_found--;
+		}
+		if (df_dict.is_open() && !df_dict.is_valid()) {
+			df_dict.close();
+			/* Leave dict.filepath so that SYS_DATAFILES
+			can be corrected below. */
+			tablespaces_found--;
+		}
+		if (df_remote.is_open() && !df_remote.is_valid()) {
+			df_remote.close();
+			tablespaces_found--;
+			link_file_is_bad = true;
+		}
+	}
+
+	/* At this point, there should be only one filepath. */
+	ut_a(tablespaces_found == 1);
+	ut_a(valid_tablespaces_found == 1);
+
+	/* Only fix the dictionary at startup when there is only one thread.
+	Calls to dict_load_table() can be done while holding other latches. */
+	if (!fix_dict) {
+		goto skip_validate;
+	}
+
+	/* We may need to update what is stored in SYS_DATAFILES or
+	SYS_TABLESPACES or adjust the link file.  Since a failure to
+	update SYS_TABLESPACES or SYS_DATAFILES does not prevent opening
+	and using the tablespace either this time or the next, we do not
+	check the return code or fail to open the tablespace. But if it
+	fails, dict_update_filepath() will issue a warning to the log. */
+	if (df_dict.filepath()) {
+		ut_ad(path_in != NULL);
+		ut_ad(df_dict.same_filepath_as(path_in));
+
+		if (df_remote.is_open()) {
+			if (!df_remote.same_filepath_as(path_in)) {
+				dict_update_filepath(id, df_remote.filepath());
+			}
+
+		} else if (df_default.is_open()) {
+			ut_ad(!dict_filepath_same_as_default);
+			dict_update_filepath(id, df_default.filepath());
+			if (link_file_is_bad) {
+				RemoteDatafile::delete_link_file(space_name);
+			}
+
+		} else if (!is_shared
+			   && (!link_file_found || link_file_is_bad)) {
+			ut_ad(df_dict.is_open());
+			/* Fix the link file if we got our filepath
+			from the dictionary but a link file did not
+			exist or it did not point to a valid file. */
+			RemoteDatafile::delete_link_file(space_name);
+			RemoteDatafile::create_link_file(
+				space_name, df_dict.filepath());
+		}
+
+	} else if (df_remote.is_open()) {
+		if (dict_filepath_same_as_default) {
+			dict_update_filepath(id, df_remote.filepath());
+
+		} else if (path_in == NULL) {
+			/* SYS_DATAFILES record for this space ID
+			was not found. */
+			dict_replace_tablespace_and_filepath(
+				id, space_name, df_remote.filepath(), flags);
+		}
+
+	} else if (df_default.is_open()) {
+		/* We opened the tablespace in the default location.
+		SYS_DATAFILES.PATH needs to be updated if it is different
+		from this default path or if the SYS_DATAFILES.PATH was not
+		supplied and it should have been. Also update the dictionary
+		if we found an ISL file (since !df_remote.is_open).  Since
+		path_in is not suppled for file-per-table, we must assume
+		that it matched the ISL. */
+		if ((path_in != NULL && !dict_filepath_same_as_default)
+		    || (path_in == NULL
+		        && (DICT_TF_HAS_DATA_DIR(flags)
+		            || DICT_TF_HAS_SHARED_SPACE(flags)))
+		    || df_remote.filepath() != NULL) {
+			dict_replace_tablespace_and_filepath(
+				id, space_name, df_default.filepath(), flags);
+		}
+	}
+
+skip_validate:
+	if (err == DB_SUCCESS) {
+		fil_space_t*	space = fil_space_create(
+			space_name, id, flags, purpose,
+			df_remote.is_open() ? df_remote.get_crypt_info() :
+			df_dict.is_open() ? df_dict.get_crypt_info() :
+			df_default.get_crypt_info());
+
+		/* We do not measure the size of the file, that is why
+		we pass the 0 below */
+
+		if (fil_node_create_low(
+			    df_remote.is_open() ? df_remote.filepath() :
+			    df_dict.is_open() ? df_dict.filepath() :
+			    df_default.filepath(), 0, space, false,
+			    true, atomic_write) == NULL) {
+			err = DB_ERROR;
+		}
+
+#ifdef MYSQL_ENCRYPTION
+		/* For encryption tablespace, initialize encryption
+		information.*/
+		if (err == DB_SUCCESS && is_encrypted && !for_import) {
+			Datafile& df_current = df_remote.is_open() ?
+				df_remote: df_dict.is_open() ?
+				df_dict : df_default;
+
+			byte*	key = df_current.m_encryption_key;
+			byte*	iv = df_current.m_encryption_iv;
+			ut_ad(key && iv);
+
+			err = fil_set_encryption(space->id, Encryption::AES,
+						 key, iv);
+			ut_ad(err == DB_SUCCESS);
+		}
+#endif /* MYSQL_ENCRYPTION */
+
+	}
+
+	return(err);
+}
+#endif /* !UNIV_HOTBACKUP */
+
+#ifdef UNIV_HOTBACKUP
+/*******************************************************************//**
+Allocates a file name for an old version of a single-table tablespace.
+The string must be freed by caller with ut_free()!
+@return own: file name */
+static
+char*
+fil_make_ibbackup_old_name(
+/*=======================*/
+	const char*	name)		/*!< in: original file name */
+{
+	static const char	suffix[] = "_ibbackup_old_vers_";
+	char*			path;
+	ulint			len = strlen(name);
+
+	path = static_cast<char*>(ut_malloc_nokey(len + 15 + sizeof(suffix)));
+
+	memcpy(path, name, len);
+	memcpy(path + len, suffix, sizeof(suffix) - 1);
+	ut_sprintf_timestamp_without_extra_chars(
+		path + len + sizeof(suffix) - 1);
+	return(path);
+}
+#endif /* UNIV_HOTBACKUP */
+
+/** Looks for a pre-existing fil_space_t with the given tablespace ID
+and, if found, returns the name and filepath in newly allocated buffers
+that the caller must free.
+@param[in]	space_id	The tablespace ID to search for.
+@param[out]	name		Name of the tablespace found.
+@param[out]	filepath	The filepath of the first datafile for the
+tablespace.
+@return true if tablespace is found, false if not. */
+bool
+fil_space_read_name_and_filepath(
+	ulint	space_id,
+	char**	name,
+	char**	filepath)
+{
+	bool	success = false;
+	*name = NULL;
+	*filepath = NULL;
+
+	mutex_enter(&fil_system->mutex);
+
+	fil_space_t*	space = fil_space_get_by_id(space_id);
+
+	if (space != NULL) {
+		*name = mem_strdup(space->name);
+
+		fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
+		*filepath = mem_strdup(node->name);
+
+		success = true;
+	}
+
+	mutex_exit(&fil_system->mutex);
+
+	return(success);
+}
+
+/** Convert a file name to a tablespace name.
+@param[in]	filename	directory/databasename/tablename.ibd
+@return database/tablename string, to be freed with ut_free() */
+char*
+fil_path_to_space_name(
+	const char*	filename)
+{
+	/* Strip the file name prefix and suffix, leaving
+	only databasename/tablename. */
+	ulint		filename_len	= strlen(filename);
+	const char*	end		= filename + filename_len;
+#ifdef HAVE_MEMRCHR
+	const char*	tablename	= 1 + static_cast<const char*>(
+		memrchr(filename, OS_PATH_SEPARATOR,
+			filename_len));
+	const char*	dbname		= 1 + static_cast<const char*>(
+		memrchr(filename, OS_PATH_SEPARATOR,
+			tablename - filename - 1));
+#else /* HAVE_MEMRCHR */
+	const char*	tablename	= filename;
+	const char*	dbname		= NULL;
+
+	while (const char* t = static_cast<const char*>(
+		       memchr(tablename, OS_PATH_SEPARATOR,
+			      end - tablename))) {
+		dbname = tablename;
+		tablename = t + 1;
+	}
+#endif /* HAVE_MEMRCHR */
+
+	ut_ad(dbname != NULL);
+	ut_ad(tablename > dbname);
+	ut_ad(tablename < end);
+	ut_ad(end - tablename > 4);
+	ut_ad(memcmp(end - 4, DOT_IBD, 4) == 0);
+
+	char*	name = mem_strdupl(dbname, end - dbname - 4);
+
+	ut_ad(name[tablename - dbname - 1] == OS_PATH_SEPARATOR);
+#if OS_PATH_SEPARATOR != '/'
+	/* space->name uses '/', not OS_PATH_SEPARATOR. */
+	name[tablename - dbname - 1] = '/';
+#endif
+
+	return(name);
+}
+
+/** Discover the correct IBD file to open given a remote or missing
+filepath from the REDO log.  MEB and administrators can move a crashed
+database to another location on the same machine and try to recover it.
+Remote IBD files might be moved as well to the new location.
+    The problem with this is that the REDO log contains the old location
+which may be still accessible.  During recovery, if files are found in
+both locations, we can chose on based on these priorities;
+1. Default location
+2. ISL location
+3. REDO location
+@param[in]	space_id	tablespace ID
+@param[in]	df		Datafile object with path from redo
+@return true if a valid datafile was found, false if not */
+bool
+fil_ibd_discover(
+	ulint		space_id,
+	Datafile&	df)
+{
+	Datafile	df_def_gen;	/* default general datafile */
+	Datafile	df_def_per;	/* default file-per-table datafile */
+	RemoteDatafile	df_rem_gen;	/* remote general datafile*/
+	RemoteDatafile	df_rem_per;	/* remote file-per-table datafile */
+
+	/* Look for the datafile in the default location. If it is
+	a general tablespace, it will be in the datadir. */
+	const char*	filename = df.filepath();
+	const char*	basename = base_name(filename);
+	df_def_gen.init(basename, 0);
+	df_def_gen.make_filepath(NULL, basename, IBD);
+	if (df_def_gen.open_read_only(false) == DB_SUCCESS
+	    && df_def_gen.validate_for_recovery() == DB_SUCCESS
+	    && df_def_gen.space_id() == space_id) {
+		df.set_filepath(df_def_gen.filepath());
+		df.open_read_only(false);
+		return(true);
+	}
+
+	/* If this datafile is file-per-table it will have a schema dir. */
+	ulint		sep_found = 0;
+	const char*	db = basename;
+	for (; db > filename && sep_found < 2; db--) {
+		if (db[0] == OS_PATH_SEPARATOR) {
+			sep_found++;
+		}
+	}
+	if (sep_found == 2) {
+		db += 2;
+		df_def_per.init(db, 0);
+		df_def_per.make_filepath(NULL, db, IBD);
+		if (df_def_per.open_read_only(false) == DB_SUCCESS
+		    && df_def_per.validate_for_recovery() == DB_SUCCESS
+		    && df_def_per.space_id() == space_id) {
+			df.set_filepath(df_def_per.filepath());
+			df.open_read_only(false);
+			return(true);
+		}
+	}
+
+	/* Did not find a general or file-per-table datafile in the
+	default location.  Look for a remote general tablespace. */
+	df_rem_gen.set_name(basename);
+	if (df_rem_gen.open_link_file() == DB_SUCCESS) {
+
+		/* An ISL file was found with contents. */
+		if (df_rem_gen.open_read_only(false) != DB_SUCCESS
+		    || df_rem_gen.validate_for_recovery() != DB_SUCCESS) {
+
+			/* Assume that this ISL file is intended to be used.
+			Do not continue looking for another if this file
+			cannot be opened or is not a valid IBD file. */
+			ib::error() << "ISL file '"
+				<< df_rem_gen.link_filepath()
+				<< "' was found but the linked file '"
+				<< df_rem_gen.filepath()
+				<< "' could not be opened or is not correct.";
+			return(false);
+		}
+
+		/* Use this file if it has the space_id from the MLOG
+		record. */
+		if (df_rem_gen.space_id() == space_id) {
+			df.set_filepath(df_rem_gen.filepath());
+			df.open_read_only(false);
+			return(true);
+		}
+
+		/* Since old MLOG records can use the same basename in
+		multiple CREATE/DROP sequences, this ISL file could be
+		pointing to a later version of this basename.ibd file
+		which has a different space_id. Keep looking. */
+	}
+
+	/* Look for a remote file-per-table tablespace. */
+	if (sep_found == 2) {
+		df_rem_per.set_name(db);
+		if (df_rem_per.open_link_file() == DB_SUCCESS) {
+
+			/* An ISL file was found with contents. */
+			if (df_rem_per.open_read_only(false) != DB_SUCCESS
+				|| df_rem_per.validate_for_recovery()
+				   != DB_SUCCESS) {
+
+				/* Assume that this ISL file is intended to
+				be used. Do not continue looking for another
+				if this file cannot be opened or is not
+				a valid IBD file. */
+				ib::error() << "ISL file '"
+					<< df_rem_per.link_filepath()
+					<< "' was found but the linked file '"
+					<< df_rem_per.filepath()
+					<< "' could not be opened or is"
+					" not correct.";
+				return(false);
+			}
+
+			/* Use this file if it has the space_id from the
+			MLOG record. */
+			if (df_rem_per.space_id() == space_id) {
+				df.set_filepath(df_rem_per.filepath());
+				df.open_read_only(false);
+				return(true);
+			}
+
+			/* Since old MLOG records can use the same basename
+			in multiple CREATE/DROP TABLE sequences, this ISL
+			file could be pointing to a later version of this
+			basename.ibd file which has a different space_id.
+			Keep looking. */
+		}
+	}
+
+	/* No ISL files were found in the default location. Use the location
+	given in the redo log. */
+	if (df.open_read_only(false) == DB_SUCCESS
+	    && df.validate_for_recovery() == DB_SUCCESS
+	    && df.space_id() == space_id) {
+		return(true);
+	}
+
+	/* A datafile was not discovered for the filename given. */
+	return(false);
+}
+/** Open an ibd tablespace and add it to the InnoDB data structures.
+This is similar to fil_ibd_open() except that it is used while processing
+the REDO log, so the data dictionary is not available and very little
+validation is done. The tablespace name is extracred from the
+dbname/tablename.ibd portion of the filename, which assumes that the file
+is a file-per-table tablespace.  Any name will do for now.  General
+tablespace names will be read from the dictionary after it has been
+recovered.  The tablespace flags are read at this time from the first page
+of the file in validate_for_recovery().
+@param[in]	space_id	tablespace ID
+@param[in]	filename	path/to/databasename/tablename.ibd
+@param[out]	space		the tablespace, or NULL on error
+@return status of the operation */
+enum fil_load_status
+fil_ibd_load(
+	ulint		space_id,
+	const char*	filename,
+	fil_space_t*&	space)
+{
+	/* If the a space is already in the file system cache with this
+	space ID, then there is nothing to do. */
+	mutex_enter(&fil_system->mutex);
+	space = fil_space_get_by_id(space_id);
+	mutex_exit(&fil_system->mutex);
+
+	if (space != NULL) {
+		/* Compare the filename we are trying to open with the
+		filename from the first node of the tablespace we opened
+		previously. Fail if it is different. */
+		fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
+		if (0 != strcmp(innobase_basename(filename),
+				innobase_basename(node->name))) {
+#ifdef  UNIV_HOTBACKUP
+			ib::trace()
+#else
+			ib::info()
+#endif /* UNIV_HOTBACKUP */
+				<< "Ignoring data file '" << filename
+				<< "' with space ID " << space->id
+				<< ". Another data file called " << node->name
+				<< " exists with the same space ID.";
+				space = NULL;
+				return(FIL_LOAD_ID_CHANGED);
+		}
+		return(FIL_LOAD_OK);
+	}
+
+	/* If the filepath in the redo log is a default location in or
+	under the datadir, then just try to open it there. */
+	Datafile	file;
+	file.set_filepath(filename);
+	Folder		folder(filename, dirname_length(filename));
+	if (folder_mysql_datadir >= folder) {
+		file.open_read_only(false);
+	}
+
+	if (!file.is_open()) {
+		/* The file has been moved or it is a remote datafile. */
+		if (!fil_ibd_discover(space_id, file)
+		    || !file.is_open()) {
+			return(FIL_LOAD_NOT_FOUND);
+		}
+	}
+
+	os_offset_t	size;
+
+	/* Read and validate the first page of the tablespace.
+	Assign a tablespace name based on the tablespace type. */
+	switch (file.validate_for_recovery()) {
+		os_offset_t	minimum_size;
+	case DB_SUCCESS:
+		if (file.space_id() != space_id) {
+#ifdef UNIV_HOTBACKUP
+			ib::trace()
+#else /* !UNIV_HOTBACKUP */
+			ib::info()
+#endif /* UNIV_HOTBACKUP */
+				<< "Ignoring data file '"
+				<< file.filepath()
+				<< "' with space ID " << file.space_id()
+				<< ", since the redo log references "
+				<< file.filepath() << " with space ID "
+				<< space_id << ".";
+			return(FIL_LOAD_ID_CHANGED);
+		}
+		/* Get and test the file size. */
+		size = os_file_get_size(file.handle());
+
+		/* Every .ibd file is created >= 4 pages in size.
+		Smaller files cannot be OK. */
+		minimum_size = FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE;
+
+		if (size == static_cast<os_offset_t>(-1)) {
+			/* The following call prints an error message */
+			os_file_get_last_error(true);
+
+			ib::error() << "Could not measure the size of"
+				" single-table tablespace file '"
+				<< file.filepath() << "'";
+		} else if (size < minimum_size) {
+#ifndef UNIV_HOTBACKUP
+			ib::error() << "The size of tablespace file '"
+				<< file.filepath() << "' is only " << size
+				<< ", should be at least " << minimum_size
+				<< "!";
+#else
+			/* In MEB, we work around this error. */
+			file.set_space_id(ULINT_UNDEFINED);
+			file.set_flags(0);
+			break;
+#endif /* !UNIV_HOTBACKUP */
+		} else {
+			/* Everything is fine so far. */
+			break;
+		}
+
+		/* Fall through to error handling */
+
+	case DB_TABLESPACE_EXISTS:
+#ifdef UNIV_HOTBACKUP
+		if (file.flags() == ~(ulint)0) {
+			return FIL_LOAD_OK;
+		}
+#endif /* UNIV_HOTBACKUP */
+
+		return(FIL_LOAD_INVALID);
+
+	default:
+		return(FIL_LOAD_NOT_FOUND);
+	}
+
+	ut_ad(space == NULL);
+
+#ifdef UNIV_HOTBACKUP
+	if (file.space_id() == ULINT_UNDEFINED || file.space_id() == 0) {
+		char*	new_path;
+
+		ib::info() << "Renaming tablespace file '" << file.filepath()
+			<< "' with space ID " << file.space_id() << " to "
+			<< file.name() << "_ibbackup_old_vers_<timestamp>"
+			" because its size " << size() << " is too small"
+			" (< 4 pages 16 kB each), or the space id in the"
+			" file header is not sensible. This can happen in"
+			" an mysqlbackup run, and is not dangerous.";
+		file.close();
+
+		new_path = fil_make_ibbackup_old_name(file.filepath());
+
+		bool	success = os_file_rename(
+			innodb_data_file_key, file.filepath(), new_path);
+
+		ut_a(success);
+
+		ut_free(new_path);
+
+		return(FIL_LOAD_ID_CHANGED);
+	}
+
+	/* A backup may contain the same space several times, if the space got
+	renamed at a sensitive time. Since it is enough to have one version of
+	the space, we rename the file if a space with the same space id
+	already exists in the tablespace memory cache. We rather rename the
+	file than delete it, because if there is a bug, we do not want to
+	destroy valuable data. */
+
+	mutex_enter(&fil_system->mutex);
+	space = fil_space_get_by_id(space_id);
+	mutex_exit(&fil_system->mutex);
+
+	if (space != NULL) {
+		ib::info() << "Renaming data file '" << file.filepath()
+			<< "' with space ID " << space_id << " to "
+			<< file.name()
+			<< "_ibbackup_old_vers_<timestamp> because space "
+			<< space->name << " with the same id was scanned"
+			" earlier. This can happen if you have renamed tables"
+			" during an mysqlbackup run.";
+		file.close();
+
+		char*	new_path = fil_make_ibbackup_old_name(file.filepath());
+
+		bool	success = os_file_rename(
+			innodb_data_file_key, file.filepath(), new_path);
+
+		ut_a(success);
+
+		ut_free(new_path);
+		return(FIL_LOAD_OK);
+	}
+#endif /* UNIV_HOTBACKUP */
+
+	bool is_temp = FSP_FLAGS_GET_TEMPORARY(file.flags());
+	space = fil_space_create(
+		file.name(), space_id, file.flags(),
+		is_temp ? FIL_TYPE_TEMPORARY : FIL_TYPE_TABLESPACE,
+		file.get_crypt_info());
+
+	if (space == NULL) {
+		return(FIL_LOAD_INVALID);
+	}
+
+	ut_ad(space->id == file.space_id());
+	ut_ad(space->id == space_id);
+
+	/* We do not use the size information we have about the file, because
+	the rounding formula for extents and pages is somewhat complex; we
+	let fil_node_open() do that task. */
+
+	if (!fil_node_create_low(file.filepath(), 0, space,
+				 false, true, false)) {
+		ut_error;
+	}
+
+#ifdef MYSQL_ENCRYPTION
+	/* For encryption tablespace, initial encryption information. */
+	if (FSP_FLAGS_GET_ENCRYPTION(space->flags)
+	    && file.m_encryption_key != NULL) {
+		dberr_t err = fil_set_encryption(space->id,
+						 Encryption::AES,
+						 file.m_encryption_key,
+						 file.m_encryption_iv);
+		if (err != DB_SUCCESS) {
+			ib::error() << "Can't set encryption information for"
+				" tablespace " << space->name << "!";
+		}
+	}
+#endif /* MYSQL_ENCRYPTION */
+
+	return(FIL_LOAD_OK);
+}
+
+/***********************************************************************//**
+A fault-tolerant function that tries to read the next file name in the
+directory. We retry 100 times if os_file_readdir_next_file() returns -1. The
+idea is to read as much good data as we can and jump over bad data.
+@return 0 if ok, -1 if error even after the retries, 1 if at the end
+of the directory */
+int
+fil_file_readdir_next_file(
+/*=======================*/
+	dberr_t*	err,	/*!< out: this is set to DB_ERROR if an error
+				was encountered, otherwise not changed */
+	const char*	dirname,/*!< in: directory name or path */
+	os_file_dir_t	dir,	/*!< in: directory stream */
+	os_file_stat_t*	info)	/*!< in/out: buffer where the
+				info is returned */
+{
+	for (ulint i = 0; i < 100; i++) {
+		int	ret = os_file_readdir_next_file(dirname, dir, info);
+
+		if (ret != -1) {
+
+			return(ret);
+		}
+
+		ib::error() << "os_file_readdir_next_file() returned -1 in"
+			" directory " << dirname
+			<< ", crash recovery may have failed"
+			" for some .ibd files!";
+
+		*err = DB_ERROR;
+	}
+
+	return(-1);
+}
+
+/*******************************************************************//**
+Report that a tablespace for a table was not found. */
+static
+void
+fil_report_missing_tablespace(
+/*===========================*/
+	const char*	name,			/*!< in: table name */
+	ulint		space_id)		/*!< in: table's space id */
+{
+	ib::error() << "Table " << name
+		<< " in the InnoDB data dictionary has tablespace id "
+		<< space_id << ","
+		" but tablespace with that id or name does not exist. Have"
+		" you deleted or moved .ibd files? This may also be a table"
+		" created with CREATE TEMPORARY TABLE whose .ibd and .frm"
+		" files MySQL automatically removed, but the table still"
+		" exists in the InnoDB internal data dictionary.";
+}
+
+#ifndef UNIV_HOTBACKUP
+/** Returns true if a matching tablespace exists in the InnoDB tablespace
+memory cache. Note that if we have not done a crash recovery at the database
+startup, there may be many tablespaces which are not yet in the memory cache.
+@param[in]	id		Tablespace ID
+@param[in]	name		Tablespace name used in fil_space_create().
+@param[in]	print_error_if_does_not_exist
+				Print detailed error information to the
+error log if a matching tablespace is not found from memory.
+@param[in]	adjust_space	Whether to adjust space id on mismatch
+@param[in]	heap		Heap memory
+@param[in]	table_id	table id
+@param[in]	table		table
+@return true if a matching tablespace exists in the memory cache */
+bool
+fil_space_for_table_exists_in_mem(
+	ulint		id,
+	const char*	name,
+	bool		print_error_if_does_not_exist,
+	bool		adjust_space,
+	mem_heap_t*	heap,
+	table_id_t	table_id,
+	dict_table_t*	table)
+{
+	fil_space_t*	fnamespace = NULL;
+	fil_space_t*	space;
+
+	ut_ad(fil_system);
+
+	mutex_enter(&fil_system->mutex);
+
+	/* Look if there is a space with the same id */
+
+	space = fil_space_get_by_id(id);
+
+	/* If tablespace contains encryption information
+	copy it also to table. */
+	if (space && space->crypt_data &&
+		table && !table->crypt_data) {
+		table->crypt_data = space->crypt_data;
+	}
+
+	if (space != NULL
+	    && FSP_FLAGS_GET_SHARED(space->flags)
+	    && adjust_space
+	    && srv_sys_tablespaces_open
+	    && 0 == strncmp(space->name, general_space_name,
+			    strlen(general_space_name))) {
+		/* This name was assigned during recovery in fil_ibd_load().
+		This general tablespace was opened from an MLOG_FILE_NAME log
+		entry where the tablespace name does not exist.  Replace the
+		temporary name with this name and return this space. */
+		HASH_DELETE(fil_space_t, name_hash, fil_system->name_hash,
+			    ut_fold_string(space->name), space);
+		ut_free(space->name);
+		space->name = mem_strdup(name);
+		HASH_INSERT(fil_space_t, name_hash, fil_system->name_hash,
+			    ut_fold_string(space->name), space);
+
+		mutex_exit(&fil_system->mutex);
+
+		return(true);
+	}
+
+	if (space != NULL) {
+		if (FSP_FLAGS_GET_SHARED(space->flags)
+		    && !srv_sys_tablespaces_open) {
+
+			/* No need to check the name */
+			mutex_exit(&fil_system->mutex);
+			return(true);
+		}
+
+		/* If this space has the expected name, use it. */
+		fnamespace = fil_space_get_by_name(name);
+		if (space == fnamespace) {
+			/* Found */
+
+			mutex_exit(&fil_system->mutex);
+			return(true);
+		}
+	}
+
+	/* Info from "fnamespace" comes from the ibd file itself, it can
+	be different from data obtained from System tables since file
+	operations are not transactional. If adjust_space is set, and the
+	mismatching space are between a user table and its temp table, we
+	shall adjust the ibd file name according to system table info */
+	if (adjust_space
+	    && space != NULL
+	    && row_is_mysql_tmp_table_name(space->name)
+	    && !row_is_mysql_tmp_table_name(name)) {
+
+		mutex_exit(&fil_system->mutex);
+
+		DBUG_EXECUTE_IF("ib_crash_before_adjust_fil_space",
+				DBUG_SUICIDE(););
+
+		if (fnamespace) {
+			const char*	tmp_name;
+
+			tmp_name = dict_mem_create_temporary_tablename(
+				heap, name, table_id);
+
+			fil_rename_tablespace(
+				fnamespace->id,
+				UT_LIST_GET_FIRST(fnamespace->chain)->name,
+				tmp_name, NULL);
+		}
+
+		DBUG_EXECUTE_IF("ib_crash_after_adjust_one_fil_space",
+				DBUG_SUICIDE(););
+
+		fil_rename_tablespace(
+			id, UT_LIST_GET_FIRST(space->chain)->name,
+			name, NULL);
+
+		DBUG_EXECUTE_IF("ib_crash_after_adjust_fil_space",
+				DBUG_SUICIDE(););
+
+		mutex_enter(&fil_system->mutex);
+		fnamespace = fil_space_get_by_name(name);
+		ut_ad(space == fnamespace);
+		mutex_exit(&fil_system->mutex);
+
+		return(true);
+	}
+
+	if (!print_error_if_does_not_exist) {
+
+		mutex_exit(&fil_system->mutex);
+
+		return(false);
+	}
+
+	if (space == NULL) {
+		if (fnamespace == NULL) {
+			if (print_error_if_does_not_exist) {
+				fil_report_missing_tablespace(name, id);
+			}
+		} else {
+			ib::error() << "Table " << name << " in InnoDB data"
+				" dictionary has tablespace id " << id
+				<< ", but a tablespace with that id does not"
+				" exist. There is a tablespace of name "
+				<< fnamespace->name << " and id "
+				<< fnamespace->id << ", though. Have you"
+				" deleted or moved .ibd files?";
+		}
+error_exit:
+		ib::warn() << TROUBLESHOOT_DATADICT_MSG;
+
+		mutex_exit(&fil_system->mutex);
+
+		return(false);
+	}
+
+	if (0 != strcmp(space->name, name)) {
+
+		ib::error() << "Table " << name << " in InnoDB data dictionary"
+			" has tablespace id " << id << ", but the tablespace"
+			" with that id has name " << space->name << "."
+			" Have you deleted or moved .ibd files?";
+
+		if (fnamespace != NULL) {
+			ib::error() << "There is a tablespace with the right"
+				" name: " << fnamespace->name << ", but its id"
+				" is " << fnamespace->id << ".";
+		}
+
+		goto error_exit;
+	}
+
+	mutex_exit(&fil_system->mutex);
+
+	return(false);
+}
+#endif /* !UNIV_HOTBACKUP */
+/** Return the space ID based on the tablespace name.
+The tablespace must be found in the tablespace memory cache.
+This call is made from external to this module, so the mutex is not owned.
+@param[in]	tablespace	Tablespace name
+@return space ID if tablespace found, ULINT_UNDEFINED if space not. */
+ulint
+fil_space_get_id_by_name(
+	const char*	tablespace)
+{
+	mutex_enter(&fil_system->mutex);
+
+	/* Search for a space with the same name. */
+	fil_space_t*	space = fil_space_get_by_name(tablespace);
+	ulint		id = (space == NULL) ? ULINT_UNDEFINED : space->id;
+
+	mutex_exit(&fil_system->mutex);
+
+	return(id);
+}
+
+/**
+Fill the pages with NULs
+@param[in] node		File node
+@param[in] page_size	physical page size
+@param[in] start	Offset from the start of the file in bytes
+@param[in] len		Length in bytes
+@param[in] read_only_mode
+			if true, then read only mode checks are enforced.
+@return DB_SUCCESS or error code */
+static
+dberr_t
+fil_write_zeros(
+	const fil_node_t*	node,
+	ulint			page_size,
+	os_offset_t		start,
+	ulint			len,
+	bool			read_only_mode)
+{
+	ut_a(len > 0);
+
+	/* Extend at most 1M at a time */
+	ulint	n_bytes = ut_min(static_cast<ulint>(1024 * 1024), len);
+	byte*	ptr = reinterpret_cast<byte*>(ut_zalloc_nokey(n_bytes
+							      + page_size));
+	byte*	buf = reinterpret_cast<byte*>(ut_align(ptr, page_size));
+
+	os_offset_t		offset = start;
+	dberr_t			err = DB_SUCCESS;
+	const os_offset_t	end = start + len;
+	IORequest		request(IORequest::WRITE);
+
+	while (offset < end) {
+
+#ifdef UNIV_HOTBACKUP
+		err = os_file_write(
+			request, node->name, node->handle, buf, offset,
+			n_bytes);
+#else
+		err = os_aio(
+			request, OS_AIO_SYNC, node->name,
+			node->handle, buf, offset, n_bytes, read_only_mode,
+			NULL, NULL, NULL);
+#endif /* UNIV_HOTBACKUP */
+
+		if (err != DB_SUCCESS) {
+			break;
+		}
+
+		offset += n_bytes;
+
+		n_bytes = ut_min(n_bytes, static_cast<ulint>(end - offset));
+
+		DBUG_EXECUTE_IF("ib_crash_during_tablespace_extension",
+				DBUG_SUICIDE(););
+	}
+
+	ut_free(ptr);
+
+	return(err);
+}
+
+/** Try to extend a tablespace if it is smaller than the specified size.
+@param[in,out]	space	tablespace
+@param[in]	size	desired size in pages
+@return whether the tablespace is at least as big as requested */
+bool
+fil_space_extend(
+	fil_space_t*	space,
+	ulint		size)
+{
+	/* In read-only mode we allow write to shared temporary tablespace
+	as intrinsic table created by Optimizer reside in this tablespace. */
+	ut_ad(!srv_read_only_mode || fsp_is_system_temporary(space->id));
+
+retry:
+
+#ifdef UNIV_HOTBACKUP
+	page_size_t	page_length(space->flags);
+	ulint   actual_size = space->size;
+	ib::trace() << "space id : " << space->id << ", space name : "
+		<< space->name << ", space size : " << actual_size << " pages,"
+		<< " desired space size : " << size << " pages,"
+		<< " page size : " << page_length.physical();
+#endif /* UNIV_HOTBACKUP */
+
+	bool		success = true;
+
+	fil_mutex_enter_and_prepare_for_io(space->id);
+
+	if (space->size >= size) {
+		/* Space already big enough */
+		mutex_exit(&fil_system->mutex);
+		return(true);
+	}
+
+	page_size_t	pageSize(space->flags);
+	const ulint	page_size = pageSize.physical();
+	fil_node_t*	node = UT_LIST_GET_LAST(space->chain);
+
+	if (!node->being_extended) {
+		/* Mark this node as undergoing extension. This flag
+		is used by other threads to wait for the extension
+		opereation to finish. */
+		node->being_extended = true;
+	} else {
+		/* Another thread is currently extending the file. Wait
+		for it to finish.  It'd have been better to use an event
+		driven mechanism but the entire module is peppered with
+		polling code. */
+
+		mutex_exit(&fil_system->mutex);
+		os_thread_sleep(100000);
+		goto retry;
+	}
+
+	if (!fil_node_prepare_for_io(node, fil_system, space)) {
+		/* The tablespace data file, such as .ibd file, is missing */
+		node->being_extended = false;
+		mutex_exit(&fil_system->mutex);
+
+		return(false);
+	}
+
+	/* At this point it is safe to release fil_system mutex. No
+	other thread can rename, delete or close the file because
+	we have set the node->being_extended flag. */
+	mutex_exit(&fil_system->mutex);
+
+	ulint		pages_added;
+
+	/* Note: This code is going to be executed independent of FusionIO HW
+	if the OS supports posix_fallocate() */
+
+	ut_ad(size > space->size);
+
+	os_offset_t	node_start = os_file_get_size(node->handle);
+	ut_a(node_start != (os_offset_t) -1);
+
+	/* Node first page number */
+	ulint		node_first_page = space->size - node->size;
+
+	/* Number of physical pages in the node/file */
+	ulint		n_node_physical_pages
+		= static_cast<ulint>(node_start) / page_size;
+
+	/* Number of pages to extend in the node/file */
+	lint		n_node_extend;
+
+	n_node_extend = size - (node_first_page + node->size);
+
+	/* If we already have enough physical pages to satisfy the
+	extend request on the node then ignore it */
+	if (node->size + n_node_extend > n_node_physical_pages) {
+
+		DBUG_EXECUTE_IF("ib_crash_during_tablespace_extension",
+				DBUG_SUICIDE(););
+
+		os_offset_t     len;
+		dberr_t		err = DB_SUCCESS;
+
+		len = ((node->size + n_node_extend) * page_size) - node_start;
+		ut_ad(len > 0);
+		const char* name = node->name == NULL ? space->name : node->name;
+
+#if !defined(NO_FALLOCATE) && defined(UNIV_LINUX)
+		/* This is required by FusionIO HW/Firmware */
+		int	ret = posix_fallocate(node->handle, node_start, len);
+
+		/* We already pass the valid offset and len in, if EINVAL
+		is returned, it could only mean that the file system doesn't
+		support fallocate(), currently one known case is
+		ext3 FS with O_DIRECT. We ignore EINVAL here so that the
+		error message won't flood. */
+		if (ret != 0 && ret != EINVAL) {
+			ib::error()
+				<< "posix_fallocate(): Failed to preallocate"
+				" data for file "
+				<< name << ", desired size "
+				<< len << " bytes."
+				" Operating system error number "
+				<< ret << ". Check"
+				" that the disk is not full or a disk quota"
+				" exceeded. Make sure the file system supports"
+				" this function. Some operating system error"
+				" numbers are described at " REFMAN
+				" operating-system-error-codes.html";
+
+			err = DB_IO_ERROR;
+		}
+#endif /* NO_FALLOCATE || !UNIV_LINUX */
+
+		if (!node->atomic_write || err == DB_IO_ERROR) {
+
+			bool	read_only_mode;
+
+			read_only_mode = (space->purpose != FIL_TYPE_TEMPORARY
+					  ? false : srv_read_only_mode);
+
+			err = fil_write_zeros(
+				node, page_size, node_start,
+				static_cast<ulint>(len), read_only_mode);
+
+			if (err != DB_SUCCESS) {
+
+				ib::warn()
+					<< "Error while writing " << len
+					<< " zeroes to " << name
+					<< " starting at offset " << node_start;
+			}
+		}
+
+		/* Check how many pages actually added */
+		os_offset_t	end = os_file_get_size(node->handle);
+		ut_a(end != static_cast<os_offset_t>(-1) && end >= node_start);
+
+		os_has_said_disk_full = !(success = (end == node_start + len));
+
+		pages_added = static_cast<ulint>(end - node_start) / page_size;
+
+	} else {
+		success = true;
+		pages_added = n_node_extend;
+		os_has_said_disk_full = FALSE;
+	}
+
+	mutex_enter(&fil_system->mutex);
+
+	ut_a(node->being_extended);
+
+	node->size += pages_added;
+	space->size += pages_added;
+	node->being_extended = false;
+
+	fil_node_complete_io(node, fil_system, IORequestWrite);
+
+#ifndef UNIV_HOTBACKUP
+	/* Keep the last data file size info up to date, rounded to
+	full megabytes */
+	ulint	pages_per_mb = (1024 * 1024) / page_size;
+	ulint	size_in_pages = ((node->size / pages_per_mb) * pages_per_mb);
+
+	if (space->id == srv_sys_space.space_id()) {
+		srv_sys_space.set_last_file_size(size_in_pages);
+	} else if (space->id == srv_tmp_space.space_id()) {
+		srv_tmp_space.set_last_file_size(size_in_pages);
+	}
+#else
+	ib::trace() << "extended space : " << space->name << " from "
+		<< actual_size << " pages to " << space->size << " pages "
+		<< ", desired space size : " << size << " pages.";
+#endif /* !UNIV_HOTBACKUP */
+
+	mutex_exit(&fil_system->mutex);
+
+	fil_flush(space->id);
+
+	return(success);
+}
+
+#ifdef UNIV_HOTBACKUP
+/********************************************************************//**
+Extends all tablespaces to the size stored in the space header. During the
+mysqlbackup --apply-log phase we extended the spaces on-demand so that log
+records could be applied, but that may have left spaces still too small
+compared to the size stored in the space header. */
+void
+fil_extend_tablespaces_to_stored_len(void)
+/*======================================*/
+{
+	byte*		buf;
+	ulint		actual_size;
+	ulint		size_in_header;
+	dberr_t		error;
+	bool		success;
+
+	buf = (byte*)ut_malloc_nokey(UNIV_PAGE_SIZE);
+
+	mutex_enter(&fil_system->mutex);
+
+	for (fil_space_t* space = UT_LIST_GET_FIRST(fil_system->space_list);
+	     space != NULL;
+	     space = UT_LIST_GET_NEXT(space_list, space)) {
+
+		ut_a(space->purpose == FIL_TYPE_TABLESPACE);
+
+		mutex_exit(&fil_system->mutex); /* no need to protect with a
+					      mutex, because this is a
+					      single-threaded operation */
+		error = fil_read(
+			page_id_t(space->id, 0),
+			page_size_t(space->flags),
+			0, univ_page_size.physical(), buf);
+
+		ut_a(error == DB_SUCCESS);
+
+		size_in_header = fsp_header_get_field(buf, FSP_SIZE);
+
+		success = fil_space_extend(space, size_in_header);
+		if (!success) {
+			ib::error() << "Could not extend the tablespace of "
+				<< space->name  << " to the size stored in"
+				" header, " << size_in_header << " pages;"
+				" size after extension " << actual_size
+				<< " pages. Check that you have free disk"
+				" space and retry!";
+			ut_a(success);
+		}
+
+		mutex_enter(&fil_system->mutex);
+	}
+
+	mutex_exit(&fil_system->mutex);
+
+	ut_free(buf);
+}
+#endif
+
+/*========== RESERVE FREE EXTENTS (for a B-tree split, for example) ===*/
+
+/*******************************************************************//**
+Tries to reserve free extents in a file space.
+@return true if succeed */
+bool
+fil_space_reserve_free_extents(
+/*===========================*/
+	ulint	id,		/*!< in: space id */
+	ulint	n_free_now,	/*!< in: number of free extents now */
+	ulint	n_to_reserve)	/*!< in: how many one wants to reserve */
+{
+	fil_space_t*	space;
+	bool		success;
+
+	ut_ad(fil_system);
+
+	mutex_enter(&fil_system->mutex);
+
+	space = fil_space_get_by_id(id);
+
+	ut_a(space);
+
+	if (space->n_reserved_extents + n_to_reserve > n_free_now) {
+		success = false;
+	} else {
+		space->n_reserved_extents += n_to_reserve;
+		success = true;
+	}
+
+	mutex_exit(&fil_system->mutex);
+
+	return(success);
+}
+
+/*******************************************************************//**
+Releases free extents in a file space. */
+void
+fil_space_release_free_extents(
+/*===========================*/
+	ulint	id,		/*!< in: space id */
+	ulint	n_reserved)	/*!< in: how many one reserved */
+{
+	fil_space_t*	space;
+
+	ut_ad(fil_system);
+
+	mutex_enter(&fil_system->mutex);
+
+	space = fil_space_get_by_id(id);
+
+	ut_a(space);
+	ut_a(space->n_reserved_extents >= n_reserved);
+
+	space->n_reserved_extents -= n_reserved;
+
+	mutex_exit(&fil_system->mutex);
+}
+
+/*******************************************************************//**
+Gets the number of reserved extents. If the database is silent, this number
+should be zero. */
+ulint
+fil_space_get_n_reserved_extents(
+/*=============================*/
+	ulint	id)		/*!< in: space id */
+{
+	fil_space_t*	space;
+	ulint		n;
+
+	ut_ad(fil_system);
+
+	mutex_enter(&fil_system->mutex);
+
+	space = fil_space_get_by_id(id);
+
+	ut_a(space);
+
+	n = space->n_reserved_extents;
+
+	mutex_exit(&fil_system->mutex);
+
+	return(n);
+}
+
+/*============================ FILE I/O ================================*/
+
+/********************************************************************//**
+NOTE: you must call fil_mutex_enter_and_prepare_for_io() first!
+
+Prepares a file node for i/o. Opens the file if it is closed. Updates the
+pending i/o's field in the node and the system appropriately. Takes the node
+off the LRU list if it is in the LRU list. The caller must hold the fil_sys
+mutex.
+@return false if the file can't be opened, otherwise true */
+static
+bool
+fil_node_prepare_for_io(
+/*====================*/
+	fil_node_t*	node,	/*!< in: file node */
+	fil_system_t*	system,	/*!< in: tablespace memory cache */
+	fil_space_t*	space)	/*!< in: space */
+{
+	ut_ad(node && system && space);
+	ut_ad(mutex_own(&(system->mutex)));
+
+	if (system->n_open > system->max_n_open + 5) {
+		ib::warn() << "Open files " << system->n_open
+			<< " exceeds the limit " << system->max_n_open;
+	}
+
+	if (!node->is_open) {
+		/* File is closed: open it */
+		ut_a(node->n_pending == 0);
+
+		if (!fil_node_open_file(node)) {
+			return(false);
+		}
+	}
+
+	if (node->n_pending == 0 && fil_space_belongs_in_lru(space)) {
+		/* The node is in the LRU list, remove it */
+
+		ut_a(UT_LIST_GET_LEN(system->LRU) > 0);
+
+		UT_LIST_REMOVE(system->LRU, node);
+	}
+
+	node->n_pending++;
+
+	return(true);
+}
+
+/********************************************************************//**
+Updates the data structures when an i/o operation finishes. Updates the
+pending i/o's field in the node appropriately. */
+static
+void
+fil_node_complete_io(
+/*=================*/
+	fil_node_t*	node,	/*!< in: file node */
+	fil_system_t*	system,	/*!< in: tablespace memory cache */
+	const IORequest&type)	/*!< in: IO_TYPE_*, marks the node as
+				modified if TYPE_IS_WRITE() */
+{
+	ut_ad(mutex_own(&system->mutex));
+	ut_a(node->n_pending > 0);
+
+	--node->n_pending;
+
+	ut_ad(type.validate());
+
+	if (type.is_write()) {
+
+		ut_ad(!srv_read_only_mode
+		      || fsp_is_system_temporary(node->space->id));
+
+		++system->modification_counter;
+
+		node->modification_counter = system->modification_counter;
+
+		if (fil_buffering_disabled(node->space)) {
+
+			/* We don't need to keep track of unflushed
+			changes as user has explicitly disabled
+			buffering. */
+			ut_ad(!node->space->is_in_unflushed_spaces);
+			node->flush_counter = node->modification_counter;
+
+		} else if (!node->space->is_in_unflushed_spaces) {
+
+			node->space->is_in_unflushed_spaces = true;
+
+			UT_LIST_ADD_FIRST(
+				system->unflushed_spaces, node->space);
+		}
+	}
+
+	if (node->n_pending == 0 && fil_space_belongs_in_lru(node->space)) {
+
+		/* The node must be put back to the LRU list */
+		UT_LIST_ADD_FIRST(system->LRU, node);
+	}
+}
+
+/** Report information about an invalid page access. */
+static
+void
+fil_report_invalid_page_access(
+	ulint		block_offset,	/*!< in: block offset */
+	ulint		space_id,	/*!< in: space id */
+	const char*	space_name,	/*!< in: space name */
+	ulint		byte_offset,	/*!< in: byte offset */
+	ulint		len,		/*!< in: I/O length */
+	bool		is_read)	/*!< in: I/O type */
+{
+	ib::error()
+		<< "Trying to access page number " << block_offset << " in"
+		" space " << space_id << ", space name " << space_name << ","
+		" which is outside the tablespace bounds. Byte offset "
+		<< byte_offset << ", len " << len << ", i/o type " <<
+		(is_read ? "read" : "write")
+		<< ". If you get this error at mysqld startup, please check"
+		" that your my.cnf matches the ibdata files that you have in"
+		" the MySQL server.";
+
+	ib::error() << "Server exits"
+#ifdef UNIV_DEBUG
+		<< " at " << __FILE__ << "[" << __LINE__ << "]"
+#endif
+		<< ".";
+
+	_exit(1);
+}
+
+#ifdef MYSQL_ENCRYPTION
+/** Set encryption information for IORequest.
+@param[in,out]	req_type	IO request
+@param[in]	page_id		page id
+@param[in]	space		table space */
+inline
+void
+fil_io_set_encryption(
+	IORequest&		req_type,
+	const page_id_t&	page_id,
+	fil_space_t*		space)
+{
+	/* Don't encrypt the log, page 0 of all tablespaces, all pages
+	from the system tablespace. */
+	if (!req_type.is_log() && page_id.page_no() > 0
+	    && space->encryption_type != Encryption::NONE)
+	{
+		req_type.encryption_key(space->encryption_key,
+					space->encryption_klen,
+					space->encryption_iv);
+		req_type.encryption_algorithm(Encryption::AES);
+	} else {
+		req_type.clear_encrypted();
+	}
+}
+#endif /* MYSQL_ENCRYPTION */
+
+/** Reads or writes data. This operation could be asynchronous (aio).
+
+@param[in,out] type	IO context
+@param[in] sync		true if synchronous aio is desired
+@param[in] page_id	page id
+@param[in] page_size	page size
+@param[in] byte_offset	remainder of offset in bytes; in aio this
+			must be divisible by the OS block size
+@param[in] len		how many bytes to read or write; this must
+			not cross a file boundary; in aio this must
+			be a block size multiple
+@param[in,out] buf	buffer where to store read data or from where
+			to write; in aio this must be appropriately
+			aligned
+@param[in] message	message for aio handler if non-sync aio
+			used, else ignored
+@param[in] write_size	actual payload size when written
+                        to avoid extra punch holes in compression
+@return DB_SUCCESS, DB_TABLESPACE_DELETED or DB_TABLESPACE_TRUNCATED
+	if we are trying to do i/o on a tablespace which does not exist */
+dberr_t
+fil_io(
+	const IORequest&	type,
+	bool			sync,
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	ulint			byte_offset,
+	ulint			len,
+	void*			buf,
+	void*			message,
+	ulint*			write_size)
+{
+	os_offset_t		offset;
+	IORequest		req_type(type);
+
+	ut_ad(req_type.validate());
+
+	ut_ad(len > 0);
+	ut_ad(byte_offset < UNIV_PAGE_SIZE);
+	ut_ad(!page_size.is_compressed() || byte_offset == 0);
+	ut_ad(UNIV_PAGE_SIZE == (ulong)(1 << UNIV_PAGE_SIZE_SHIFT));
+#if (1 << UNIV_PAGE_SIZE_SHIFT_MAX) != UNIV_PAGE_SIZE_MAX
+# error "(1 << UNIV_PAGE_SIZE_SHIFT_MAX) != UNIV_PAGE_SIZE_MAX"
+#endif
+#if (1 << UNIV_PAGE_SIZE_SHIFT_MIN) != UNIV_PAGE_SIZE_MIN
+# error "(1 << UNIV_PAGE_SIZE_SHIFT_MIN) != UNIV_PAGE_SIZE_MIN"
+#endif
+	ut_ad(fil_validate_skip());
+
+#ifndef UNIV_HOTBACKUP
+
+	/* ibuf bitmap pages must be read in the sync AIO mode: */
+	ut_ad(recv_no_ibuf_operations
+	      || req_type.is_write()
+	      || !ibuf_bitmap_page(page_id, page_size)
+	      || sync
+	      || req_type.is_log());
+
+	ulint	mode;
+
+	if (sync) {
+
+		mode = OS_AIO_SYNC;
+
+	} else if (req_type.is_log()) {
+
+		mode = OS_AIO_LOG;
+
+	} else if (req_type.is_read()
+		   && !recv_no_ibuf_operations
+		   && ibuf_page(page_id, page_size, NULL)) {
+
+		mode = OS_AIO_IBUF;
+
+		/* Reduce probability of deadlock bugs in connection with ibuf:
+		do not let the ibuf i/o handler sleep */
+
+		req_type.clear_do_not_wake();
+	} else {
+		mode = OS_AIO_NORMAL;
+	}
+#else /* !UNIV_HOTBACKUP */
+	ut_a(sync);
+	ulint mode = OS_AIO_SYNC;
+#endif /* !UNIV_HOTBACKUP */
+
+#ifndef UNIV_HOTBACKUP
+	if (req_type.is_read()) {
+
+		srv_stats.data_read.add(len);
+
+	} else if (req_type.is_write()) {
+
+		ut_ad(!srv_read_only_mode
+		      || fsp_is_system_temporary(page_id.space()));
+
+		srv_stats.data_written.add(len);
+	}
+#endif /* !UNIV_HOTBACKUP */
+
+	/* Reserve the fil_system mutex and make sure that we can open at
+	least one file while holding it, if the file is not already open */
+
+	fil_mutex_enter_and_prepare_for_io(page_id.space());
+
+	fil_space_t*	space = fil_space_get_by_id(page_id.space());
+
+	/* If we are deleting a tablespace we don't allow async read operations
+	on that. However, we do allow write operations and sync read operations. */
+	if (space == NULL
+	    || (req_type.is_read()
+		&& !sync
+		&& space->stop_new_ops
+		&& !space->is_being_truncated)) {
+
+		mutex_exit(&fil_system->mutex);
+
+		if (!req_type.ignore_missing()) {
+			ib::error()
+				<< "Trying to do I/O to a tablespace which"
+				" does not exist. I/O type: "
+				<< (req_type.is_read() ? "read" : "write")
+				<< ", page: " << page_id
+				<< ", I/O length: " << len << " bytes";
+		}
+
+		return(DB_TABLESPACE_DELETED);
+	}
+
+	ut_ad(mode != OS_AIO_IBUF || fil_type_is_data(space->purpose));
+
+	ulint		cur_page_no = page_id.page_no();
+	fil_node_t*	node = UT_LIST_GET_FIRST(space->chain);
+
+	for (;;) {
+
+		if (node == NULL) {
+
+			if (req_type.ignore_missing()) {
+				mutex_exit(&fil_system->mutex);
+				return(DB_ERROR);
+			}
+
+			fil_report_invalid_page_access(
+				page_id.page_no(), page_id.space(),
+				space->name, byte_offset, len,
+				req_type.is_read());
+
+		} else if (fil_is_user_tablespace_id(space->id)
+			   && node->size == 0) {
+
+			/* We do not know the size of a single-table tablespace
+			before we open the file */
+			break;
+
+		} else if (node->size > cur_page_no) {
+			/* Found! */
+			break;
+
+		} else {
+			if (space->id != srv_sys_space.space_id()
+			    && UT_LIST_GET_LEN(space->chain) == 1
+			    && (srv_is_tablespace_truncated(space->id)
+				|| space->is_being_truncated
+				|| srv_was_tablespace_truncated(space))
+			    && req_type.is_read()) {
+
+				/* Handle page which is outside the truncated
+				tablespace bounds when recovering from a crash
+				happened during a truncation */
+				mutex_exit(&fil_system->mutex);
+				return(DB_TABLESPACE_TRUNCATED);
+			}
+
+			cur_page_no -= node->size;
+
+			node = UT_LIST_GET_NEXT(chain, node);
+		}
+	}
+
+	/* Open file if closed */
+	if (!fil_node_prepare_for_io(node, fil_system, space)) {
+		if (fil_type_is_data(space->purpose)
+		    && fil_is_user_tablespace_id(space->id)) {
+			mutex_exit(&fil_system->mutex);
+
+			if (!req_type.ignore_missing()) {
+				ib::error()
+					<< "Trying to do I/O to a tablespace"
+					" which exists without .ibd data file."
+					" I/O type: "
+					<< (req_type.is_read()
+					    ? "read" : "write")
+					<< ", page: "
+					<< page_id_t(page_id.space(),
+						     cur_page_no)
+					<< ", I/O length: " << len << " bytes";
+			}
+
+			return(DB_TABLESPACE_DELETED);
+		}
+
+		/* The tablespace is for log. Currently, we just assert here
+		to prevent handling errors along the way fil_io returns.
+		Also, if the log files are missing, it would be hard to
+		promise the server can continue running. */
+		ut_a(0);
+	}
+
+	/* Check that at least the start offset is within the bounds of a
+	single-table tablespace, including rollback tablespaces. */
+	if (node->size <= cur_page_no
+	    && space->id != srv_sys_space.space_id()
+	    && fil_type_is_data(space->purpose)) {
+
+		if (req_type.ignore_missing()) {
+			/* If we can tolerate the non-existent pages, we
+			should return with DB_ERROR and let caller decide
+			what to do. */
+			fil_node_complete_io(node, fil_system, req_type);
+			mutex_exit(&fil_system->mutex);
+			return(DB_ERROR);
+		}
+
+		fil_report_invalid_page_access(
+			page_id.page_no(), page_id.space(),
+			space->name, byte_offset, len, req_type.is_read());
+	}
+
+	/* Now we have made the changes in the data structures of fil_system */
+	mutex_exit(&fil_system->mutex);
+
+	/* Calculate the low 32 bits and the high 32 bits of the file offset */
+
+	if (!page_size.is_compressed()) {
+
+		offset = ((os_offset_t) cur_page_no
+			  << UNIV_PAGE_SIZE_SHIFT) + byte_offset;
+
+		ut_a(node->size - cur_page_no
+		     >= ((byte_offset + len + (UNIV_PAGE_SIZE - 1))
+			 / UNIV_PAGE_SIZE));
+	} else {
+		ulint	size_shift;
+
+		switch (page_size.physical()) {
+		case 1024: size_shift = 10; break;
+		case 2048: size_shift = 11; break;
+		case 4096: size_shift = 12; break;
+		case 8192: size_shift = 13; break;
+		case 16384: size_shift = 14; break;
+		case 32768: size_shift = 15; break;
+		case 65536: size_shift = 16; break;
+		default: ut_error;
+		}
+
+		offset = ((os_offset_t) cur_page_no << size_shift)
+			+ byte_offset;
+
+		ut_a(node->size - cur_page_no
+		     >= (len + (page_size.physical() - 1))
+		     / page_size.physical());
+	}
+
+	/* Do AIO */
+
+	ut_a(byte_offset % OS_FILE_LOG_BLOCK_SIZE == 0);
+	ut_a((len % OS_FILE_LOG_BLOCK_SIZE) == 0);
+
+	const char* name = node->name == NULL ? space->name : node->name;
+
+#ifdef MYSQL_COMPRESSION
+	/* Don't compress the log, page 0 of all tablespaces, tables
+	compresssed with the old scheme and all pages from the system
+	tablespace. */
+
+	if (req_type.is_write()
+	    && !req_type.is_log()
+	    && !page_size.is_compressed()
+	    && page_id.page_no() > 0
+	    && IORequest::is_punch_hole_supported()
+	    && node->punch_hole) {
+
+		ut_ad(!req_type.is_log());
+
+		req_type.set_punch_hole();
+
+		req_type.compression_algorithm(space->compression_type);
+
+	} else {
+		req_type.clear_compressed();
+	}
+#endif /* MYSQL_COMPRESSION */
+
+#ifdef MYSQL_ENCRYPTION
+	/* Set encryption information. */
+	fil_io_set_encryption(req_type, page_id, space);
+#endif /* MYSQL_ENCRYPTION */
+
+	req_type.block_size(node->block_size);
+
+	dberr_t	err;
+
+#ifdef UNIV_HOTBACKUP
+	/* In mysqlbackup do normal i/o, not aio */
+	if (req_type.is_read()) {
+
+		err = os_file_read(req_type, node->handle, buf, offset, len);
+
+	} else {
+
+		ut_ad(!srv_read_only_mode
+		      || fsp_is_system_temporary(page_id.space()));
+
+		err = os_file_write(
+			req_type, node->name, node->handle, buf, offset, len);
+	}
+#else /* UNIV_HOTBACKUP */
+	/* Queue the aio request */
+	err = os_aio(
+		req_type,
+		mode, name, node->handle, buf, offset, len,
+		fsp_is_system_temporary(page_id.space())
+		? false : srv_read_only_mode,
+		node, message, write_size);
+
+#endif /* UNIV_HOTBACKUP */
+
+	if (err == DB_IO_NO_PUNCH_HOLE) {
+
+		err = DB_SUCCESS;
+
+		if (node->punch_hole) {
+
+			ib::warn()
+				<< "Punch hole failed for '"
+				<< name << "'";
+		}
+
+		fil_no_punch_hole(node);
+	}
+
+	/* We an try to recover the page from the double write buffer if
+	the decompression fails or the page is corrupt. */
+
+	ut_a(req_type.is_dblwr_recover() || err == DB_SUCCESS);
+
+	if (sync) {
+		/* The i/o operation is already completed when we return from
+		os_aio: */
+
+		mutex_enter(&fil_system->mutex);
+
+		fil_node_complete_io(node, fil_system, req_type);
+
+		mutex_exit(&fil_system->mutex);
+
+		ut_ad(fil_validate_skip());
+	}
+
+	return(err);
+}
+
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
+Waits for an aio operation to complete. This function is used to write the
+handler for completed requests. The aio array of pending requests is divided
+into segments (see os0file.cc for more info). The thread specifies which
+segment it wants to wait for. */
+void
+fil_aio_wait(
+/*=========*/
+	ulint	segment)	/*!< in: the number of the segment in the aio
+				array to wait for */
+{
+	fil_node_t*	node;
+	IORequest	type;
+	void*		message;
+
+	ut_ad(fil_validate_skip());
+
+	dberr_t	err = os_aio_handler(segment, &node, &message, &type);
+
+	ut_a(err == DB_SUCCESS);
+
+	if (node == NULL) {
+		ut_ad(srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS);
+		return;
+	}
+
+	srv_set_io_thread_op_info(segment, "complete io for fil node");
+
+	mutex_enter(&fil_system->mutex);
+
+	fil_node_complete_io(node, fil_system, type);
+
+	mutex_exit(&fil_system->mutex);
+
+	ut_ad(fil_validate_skip());
+
+	/* Do the i/o handling */
+	/* IMPORTANT: since i/o handling for reads will read also the insert
+	buffer in tablespace 0, you have to be very careful not to introduce
+	deadlocks in the i/o system. We keep tablespace 0 data files always
+	open, and use a special i/o thread to serve insert buffer requests. */
+
+	switch (node->space->purpose) {
+	case FIL_TYPE_TABLESPACE:
+	case FIL_TYPE_TEMPORARY:
+	case FIL_TYPE_IMPORT:
+		srv_set_io_thread_op_info(segment, "complete io for buf page");
+
+		/* async single page writes from the dblwr buffer don't have
+		access to the page */
+		if (message != NULL) {
+			buf_page_io_complete(static_cast<buf_page_t*>(message));
+		}
+		return;
+	case FIL_TYPE_LOG:
+		srv_set_io_thread_op_info(segment, "complete io for log");
+		log_io_complete(static_cast<log_group_t*>(message));
+		return;
+	}
+
+	ut_ad(0);
+}
+#endif /* !UNIV_HOTBACKUP */
+
+/**********************************************************************//**
+Flushes to disk possible writes cached by the OS. If the space does not exist
+or is being dropped, does not do anything. */
+void
+fil_flush(
+/*======*/
+	ulint	space_id)	/*!< in: file space id (this can be a group of
+				log files or a tablespace of the database) */
+{
+	fil_node_t*	node;
+	os_file_t	file;
+
+	mutex_enter(&fil_system->mutex);
+
+	fil_space_t*	space = fil_space_get_by_id(space_id);
+
+	if (space == NULL
+	    || space->purpose == FIL_TYPE_TEMPORARY
+	    || space->stop_new_ops
+	    || space->is_being_truncated) {
+		mutex_exit(&fil_system->mutex);
+
+		return;
+	}
+
+	if (fil_buffering_disabled(space)) {
+
+		/* No need to flush. User has explicitly disabled
+		buffering. */
+		ut_ad(!space->is_in_unflushed_spaces);
+		ut_ad(fil_space_is_flushed(space));
+		ut_ad(space->n_pending_flushes == 0);
+
+#ifdef UNIV_DEBUG
+		for (node = UT_LIST_GET_FIRST(space->chain);
+		     node != NULL;
+		     node = UT_LIST_GET_NEXT(chain, node)) {
+			ut_ad(node->modification_counter
+			      == node->flush_counter);
+			ut_ad(node->n_pending_flushes == 0);
+		}
+#endif /* UNIV_DEBUG */
+
+		mutex_exit(&fil_system->mutex);
+		return;
+	}
+
+	space->n_pending_flushes++;	/*!< prevent dropping of the space while
+					we are flushing */
+	for (node = UT_LIST_GET_FIRST(space->chain);
+	     node != NULL;
+	     node = UT_LIST_GET_NEXT(chain, node)) {
+
+		int64_t	old_mod_counter = node->modification_counter;
+
+		if (old_mod_counter <= node->flush_counter) {
+			continue;
+		}
+
+		ut_a(node->is_open);
+
+		switch (space->purpose) {
+		case FIL_TYPE_TEMPORARY:
+			ut_ad(0); // we already checked for this
+		case FIL_TYPE_TABLESPACE:
+		case FIL_TYPE_IMPORT:
+			fil_n_pending_tablespace_flushes++;
+			break;
+		case FIL_TYPE_LOG:
+			fil_n_pending_log_flushes++;
+			fil_n_log_flushes++;
+			break;
+		}
+#ifdef _WIN32
+		if (node->is_raw_disk) {
+
+			goto skip_flush;
+		}
+#endif /* _WIN32 */
+retry:
+		if (node->n_pending_flushes > 0) {
+			/* We want to avoid calling os_file_flush() on
+			the file twice at the same time, because we do
+			not know what bugs OS's may contain in file
+			i/o */
+
+#ifndef UNIV_HOTBACKUP
+			int64_t	sig_count = os_event_reset(node->sync_event);
+#endif /* !UNIV_HOTBACKUP */
+
+			mutex_exit(&fil_system->mutex);
+
+			os_event_wait_low(node->sync_event, sig_count);
+
+			mutex_enter(&fil_system->mutex);
+
+			if (node->flush_counter >= old_mod_counter) {
+
+				goto skip_flush;
+			}
+
+			goto retry;
+		}
+
+		ut_a(node->is_open);
+		file = node->handle;
+		node->n_pending_flushes++;
+
+		mutex_exit(&fil_system->mutex);
+
+		os_file_flush(file);
+
+		mutex_enter(&fil_system->mutex);
+
+		os_event_set(node->sync_event);
+
+		node->n_pending_flushes--;
+skip_flush:
+		if (node->flush_counter < old_mod_counter) {
+			node->flush_counter = old_mod_counter;
+
+			if (space->is_in_unflushed_spaces
+			    && fil_space_is_flushed(space)) {
+
+				space->is_in_unflushed_spaces = false;
+
+				UT_LIST_REMOVE(
+					fil_system->unflushed_spaces,
+					space);
+			}
+		}
+
+		switch (space->purpose) {
+		case FIL_TYPE_TEMPORARY:
+			ut_ad(0); // we already checked for this
+		case FIL_TYPE_TABLESPACE:
+		case FIL_TYPE_IMPORT:
+			fil_n_pending_tablespace_flushes--;
+			continue;
+		case FIL_TYPE_LOG:
+			fil_n_pending_log_flushes--;
+			continue;
+		}
+
+		ut_ad(0);
+	}
+
+	space->n_pending_flushes--;
+
+	mutex_exit(&fil_system->mutex);
+}
+
+/** Flush to disk the writes in file spaces of the given type
+possibly cached by the OS.
+@param[in]	purpose	FIL_TYPE_TABLESPACE or FIL_TYPE_LOG */
+void
+fil_flush_file_spaces(
+	fil_type_t	purpose)
+{
+	fil_space_t*	space;
+	ulint*		space_ids;
+	ulint		n_space_ids;
+
+	ut_ad(purpose == FIL_TYPE_TABLESPACE || purpose == FIL_TYPE_LOG);
+
+	mutex_enter(&fil_system->mutex);
+
+	n_space_ids = UT_LIST_GET_LEN(fil_system->unflushed_spaces);
+	if (n_space_ids == 0) {
+
+		mutex_exit(&fil_system->mutex);
+		return;
+	}
+
+	/* Assemble a list of space ids to flush.  Previously, we
+	traversed fil_system->unflushed_spaces and called UT_LIST_GET_NEXT()
+	on a space that was just removed from the list by fil_flush().
+	Thus, the space could be dropped and the memory overwritten. */
+	space_ids = static_cast<ulint*>(
+		ut_malloc_nokey(n_space_ids * sizeof(*space_ids)));
+
+	n_space_ids = 0;
+
+	for (space = UT_LIST_GET_FIRST(fil_system->unflushed_spaces);
+	     space;
+	     space = UT_LIST_GET_NEXT(unflushed_spaces, space)) {
+
+		if (space->purpose == purpose
+		    && !space->stop_new_ops
+		    && !space->is_being_truncated) {
+
+			space_ids[n_space_ids++] = space->id;
+		}
+	}
+
+	mutex_exit(&fil_system->mutex);
+
+	/* Flush the spaces.  It will not hurt to call fil_flush() on
+	a non-existing space id. */
+	for (ulint i = 0; i < n_space_ids; i++) {
+
+		fil_flush(space_ids[i]);
+	}
+
+	ut_free(space_ids);
+}
+
+/** Functor to validate the file node list of a tablespace. */
+struct	Check {
+	/** Total size of file nodes visited so far */
+	ulint	size;
+	/** Total number of open files visited so far */
+	ulint	n_open;
+
+	/** Constructor */
+	Check() : size(0), n_open(0) {}
+
+	/** Visit a file node
+	@param[in]	elem	file node to visit */
+	void	operator()(const fil_node_t* elem)
+	{
+		ut_a(elem->is_open || !elem->n_pending);
+		n_open += elem->is_open;
+		size += elem->size;
+	}
+
+	/** Validate a tablespace.
+	@param[in]	space	tablespace to validate
+	@return		number of open file nodes */
+	static ulint validate(const fil_space_t* space)
+	{
+		ut_ad(mutex_own(&fil_system->mutex));
+		Check	check;
+		ut_list_validate(space->chain, check);
+		ut_a(space->size == check.size);
+		return(check.n_open);
+	}
+};
+
+/******************************************************************//**
+Checks the consistency of the tablespace cache.
+@return true if ok */
+bool
+fil_validate(void)
+/*==============*/
+{
+	fil_space_t*	space;
+	fil_node_t*	fil_node;
+	ulint		n_open		= 0;
+
+	mutex_enter(&fil_system->mutex);
+
+	/* Look for spaces in the hash table */
+
+	for (ulint i = 0; i < hash_get_n_cells(fil_system->spaces); i++) {
+
+		for (space = static_cast<fil_space_t*>(
+				HASH_GET_FIRST(fil_system->spaces, i));
+		     space != 0;
+		     space = static_cast<fil_space_t*>(
+				HASH_GET_NEXT(hash, space))) {
+
+			n_open += Check::validate(space);
+		}
+	}
+
+	ut_a(fil_system->n_open == n_open);
+
+	UT_LIST_CHECK(fil_system->LRU);
+
+	for (fil_node = UT_LIST_GET_FIRST(fil_system->LRU);
+	     fil_node != 0;
+	     fil_node = UT_LIST_GET_NEXT(LRU, fil_node)) {
+
+		ut_a(fil_node->n_pending == 0);
+		ut_a(!fil_node->being_extended);
+		ut_a(fil_node->is_open);
+		ut_a(fil_space_belongs_in_lru(fil_node->space));
+	}
+
+	mutex_exit(&fil_system->mutex);
+
+	return(true);
+}
+
+/********************************************************************//**
+Returns true if file address is undefined.
+@return true if undefined */
+bool
+fil_addr_is_null(
+/*=============*/
+	fil_addr_t	addr)	/*!< in: address */
+{
+	return(addr.page == FIL_NULL);
+}
+
+/********************************************************************//**
+Get the predecessor of a file page.
+@return FIL_PAGE_PREV */
+ulint
+fil_page_get_prev(
+/*==============*/
+	const byte*	page)	/*!< in: file page */
+{
+	return(mach_read_from_4(page + FIL_PAGE_PREV));
+}
+
+/********************************************************************//**
+Get the successor of a file page.
+@return FIL_PAGE_NEXT */
+ulint
+fil_page_get_next(
+/*==============*/
+	const byte*	page)	/*!< in: file page */
+{
+	return(mach_read_from_4(page + FIL_PAGE_NEXT));
+}
+
+/*********************************************************************//**
+Sets the file page type. */
+void
+fil_page_set_type(
+/*==============*/
+	byte*	page,	/*!< in/out: file page */
+	ulint	type)	/*!< in: type */
+{
+	ut_ad(page);
+
+	mach_write_to_2(page + FIL_PAGE_TYPE, type);
+}
+
+#ifndef UNIV_HOTBACKUP
+/** Reset the page type.
+Data files created before MySQL 5.1 may contain garbage in FIL_PAGE_TYPE.
+In MySQL 3.23.53, only undo log pages and index pages were tagged.
+Any other pages were written with uninitialized bytes in FIL_PAGE_TYPE.
+@param[in]	page_id	page number
+@param[in,out]	page	page with invalid FIL_PAGE_TYPE
+@param[in]	type	expected page type
+@param[in,out]	mtr	mini-transaction */
+void
+fil_page_reset_type(
+	const page_id_t&	page_id,
+	byte*			page,
+	ulint			type,
+	mtr_t*			mtr)
+{
+	ib::info()
+		<< "Resetting invalid page " << page_id << " type "
+		<< fil_page_get_type(page) << " to " << type << ".";
+	mlog_write_ulint(page + FIL_PAGE_TYPE, type, MLOG_2BYTES, mtr);
+}
+#endif /* !UNIV_HOTBACKUP */
+
+/****************************************************************//**
+Closes the tablespace memory cache. */
+void
+fil_close(void)
+/*===========*/
+{
+	if (fil_system) {
+		hash_table_free(fil_system->spaces);
+
+		hash_table_free(fil_system->name_hash);
+
+		ut_a(UT_LIST_GET_LEN(fil_system->LRU) == 0);
+		ut_a(UT_LIST_GET_LEN(fil_system->unflushed_spaces) == 0);
+		ut_a(UT_LIST_GET_LEN(fil_system->space_list) == 0);
+
+		mutex_free(&fil_system->mutex);
+
+		ut_free(fil_system);
+		fil_system = NULL;
+	}
+}
+
+#ifndef UNIV_HOTBACKUP
+/********************************************************************//**
+Initializes a buffer control block when the buf_pool is created. */
+static
+void
+fil_buf_block_init(
+/*===============*/
+	buf_block_t*	block,		/*!< in: pointer to control block */
+	byte*		frame)		/*!< in: pointer to buffer frame */
+{
+	UNIV_MEM_DESC(frame, UNIV_PAGE_SIZE);
+
+	block->frame = frame;
+
+	block->page.io_fix = BUF_IO_NONE;
+	/* There are assertions that check for this. */
+	block->page.buf_fix_count = 1;
+	block->page.state = BUF_BLOCK_READY_FOR_USE;
+
+	page_zip_des_init(&block->page.zip);
+}
+
+struct fil_iterator_t {
+	os_file_t	file;			/*!< File handle */
+	const char*	filepath;		/*!< File path name */
+	os_offset_t	start;			/*!< From where to start */
+	os_offset_t	end;			/*!< Where to stop */
+	os_offset_t	file_size;		/*!< File size in bytes */
+	ulint		page_size;		/*!< Page size */
+	ulint		n_io_buffers;		/*!< Number of pages to use
+						for IO */
+	byte*		io_buffer;		/*!< Buffer to use for IO */
+	fil_space_crypt_t *crypt_data;		/*!< MariaDB Crypt data (if encrypted) */
+	byte*           crypt_io_buffer;        /*!< MariaDB IO buffer when encrypted */
+	byte*		encryption_key;		/*!< Encryption key */
+	byte*		encryption_iv;		/*!< Encryption iv */
+};
+
+/********************************************************************//**
+TODO: This can be made parallel trivially by chunking up the file and creating
+a callback per thread. Main benefit will be to use multiple CPUs for
+checksums and compressed tables. We have to do compressed tables block by
+block right now. Secondly we need to decompress/compress and copy too much
+of data. These are CPU intensive.
+
+Iterate over all the pages in the tablespace.
+@param iter Tablespace iterator
+@param block block to use for IO
+@param callback Callback to inspect and update page contents
+@retval DB_SUCCESS or error code */
+static
+dberr_t
+fil_iterate(
+/*========*/
+	const fil_iterator_t&	iter,
+	buf_block_t*		block,
+	PageCallback&		callback)
+{
+	os_offset_t		offset;
+	ulint			page_no = 0;
+	ulint			space_id = callback.get_space_id();
+	ulint			n_bytes = iter.n_io_buffers * iter.page_size;
+
+	ut_ad(!srv_read_only_mode);
+
+	/* For old style compressed tables we do a lot of useless copying
+	for non-index pages. Unfortunately, it is required by
+	buf_zip_decompress() */
+
+	ulint	read_type = IORequest::READ;
+	ulint	write_type = IORequest::WRITE;
+
+	for (offset = iter.start; offset < iter.end; offset += n_bytes) {
+
+		byte*	io_buffer = iter.io_buffer;
+
+		block->frame = io_buffer;
+
+		if (callback.get_page_size().is_compressed()) {
+			page_zip_des_init(&block->page.zip);
+			page_zip_set_size(&block->page.zip, iter.page_size);
+
+			block->page.size.copy_from(
+				page_size_t(iter.page_size,
+					    univ_page_size.logical(),
+					    true));
+
+			block->page.zip.data = block->frame + UNIV_PAGE_SIZE;
+			ut_d(block->page.zip.m_external = true);
+			ut_ad(iter.page_size
+			      == callback.get_page_size().physical());
+
+			/* Zip IO is done in the compressed page buffer. */
+			io_buffer = block->page.zip.data;
+		} else {
+			io_buffer = iter.io_buffer;
+		}
+
+		/* We have to read the exact number of bytes. Otherwise the
+		InnoDB IO functions croak on failed reads. */
+
+		n_bytes = static_cast<ulint>(
+			ut_min(static_cast<os_offset_t>(n_bytes),
+			       iter.end - offset));
+
+		ut_ad(n_bytes > 0);
+		ut_ad(!(n_bytes % iter.page_size));
+
+		dberr_t		err = DB_SUCCESS;
+		IORequest	read_request(read_type);
+
+#ifdef MYSQL_ENCRYPTION
+		/* For encrypted table, set encryption information. */
+		if (iter.encryption_key != NULL && offset != 0) {
+			read_request.encryption_key(iter.encryption_key,
+						    ENCRYPTION_KEY_LEN,
+						    iter.encryption_iv);
+			read_request.encryption_algorithm(Encryption::AES);
+		}
+#endif /* MYSQL_ENCRYPTION */
+
+		byte* readptr = io_buffer;
+		byte* writeptr = io_buffer;
+		bool encrypted = false;
+
+		/* Use additional crypt io buffer if tablespace is encrypted */
+		if ((iter.crypt_data != NULL && iter.crypt_data->encryption == FIL_SPACE_ENCRYPTION_ON) ||
+				(srv_encrypt_tables &&
+					iter.crypt_data && iter.crypt_data->encryption == FIL_SPACE_ENCRYPTION_DEFAULT)) {
+
+			encrypted = true;
+			readptr = iter.crypt_io_buffer;
+			writeptr = iter.crypt_io_buffer;
+		}
+
+		err = os_file_read(
+			read_request, iter.file, readptr, offset,
+			(ulint) n_bytes);
+
+		if (err != DB_SUCCESS) {
+
+			ib::error() << "os_file_read() failed";
+
+			return(err);
+		}
+
+		bool		updated = false;
+		os_offset_t	page_off = offset;
+		ulint		n_pages_read = (ulint) n_bytes / iter.page_size;
+		bool		decrypted = false;
+
+		for (ulint i = 0; i < n_pages_read; ++i) {
+			ulint	size = iter.page_size;
+			byte*	src = (readptr + (i * size));
+			byte*	dst = (io_buffer + (i * size));
+
+			ulint page_type = mach_read_from_2(src+FIL_PAGE_TYPE);
+
+			bool page_compressed =
+				(page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED
+				 || page_type == FIL_PAGE_PAGE_COMPRESSED);
+
+			/* If tablespace is encrypted, we need to decrypt
+			the page. */
+			if (encrypted) {
+				decrypted = fil_space_decrypt(
+							iter.crypt_data,
+							dst, //dst
+							callback.get_page_size(),
+							src, // src
+							&err); // src
+
+				if (err != DB_SUCCESS) {
+					return(err);
+				}
+
+				if (decrypted) {
+					updated = true;
+				} else {
+					/* TODO: remove unnecessary memcpy's */
+					memcpy(dst, src, iter.page_size);
+				}
+			}
+
+			/* If the original page is page_compressed, we need
+			to decompress page before we can update it. */
+			if (page_compressed) {
+				fil_decompress_page(NULL, dst, size, NULL);
+				updated = true;
+			}
+
+			buf_block_set_file_page(
+				block, page_id_t(space_id, page_no++));
+
+			if ((err = callback(page_off, block)) != DB_SUCCESS) {
+
+				return(err);
+
+			} else if (!updated) {
+				updated = buf_block_get_state(block)
+					== BUF_BLOCK_FILE_PAGE;
+			}
+
+			buf_block_set_state(block, BUF_BLOCK_NOT_USED);
+			buf_block_set_state(block, BUF_BLOCK_READY_FOR_USE);
+
+			src =  (io_buffer + (i * size));
+
+			if (page_compressed) {
+				ulint len = 0;
+				fil_compress_page(space_id,
+					src,
+					NULL,
+					size,
+					fil_space_get_page_compression_level(space_id),
+					fil_space_get_block_size(space_id, offset, size),
+					encrypted,
+					&len,
+					NULL);
+
+				updated = true;
+			}
+
+			/* If tablespace is encrypted, encrypt page before we
+			write it back. Note that we should not encrypt the
+			buffer that is in buffer pool. */
+			if (decrypted && encrypted) {
+				unsigned char *dest = (writeptr + (i * size));
+				ulint space = mach_read_from_4(
+					src + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
+				ulint offset = mach_read_from_4(src + FIL_PAGE_OFFSET);
+				ib_uint64_t lsn = mach_read_from_8(src + FIL_PAGE_LSN);
+
+				byte* tmp = fil_encrypt_buf(
+							iter.crypt_data,
+							space,
+							offset,
+							lsn,
+							src,
+							callback.get_page_size(),
+							dest);
+
+				if (tmp == src) {
+					/* TODO: remove unnecessary memcpy's */
+					memcpy(dest, src, iter.page_size);
+				}
+
+				updated = true;
+			}
+
+			page_off += iter.page_size;
+			block->frame += iter.page_size;
+		}
+
+		IORequest	write_request(write_type);
+
+#ifdef MYSQL_ENCRYPTION
+		/* For encrypted table, set encryption information. */
+		if (iter.encryption_key != NULL && offset != 0) {
+			write_request.encryption_key(iter.encryption_key,
+						     ENCRYPTION_KEY_LEN,
+						     iter.encryption_iv);
+			write_request.encryption_algorithm(Encryption::AES);
+		}
+#endif /* MYSQL_ENCRYPTION */
+
+		/* A page was updated in the set, write back to disk.
+		Note: We don't have the compression algorithm, we write
+		out the imported file as uncompressed. */
+
+		if (updated
+		    && (err = os_file_write(
+				write_request,
+				iter.filepath, iter.file, writeptr,
+				offset, (ulint) n_bytes)) != DB_SUCCESS) {
+
+			/* This is not a hard error */
+			if (err == DB_IO_NO_PUNCH_HOLE) {
+
+				err = DB_SUCCESS;
+				write_type &= ~IORequest::PUNCH_HOLE;
+
+			} else {
+				ib::error() << "os_file_write() failed";
+
+				return(err);
+			}
+		}
+	}
+
+	return(DB_SUCCESS);
+}
+
+/********************************************************************//**
+Iterate over all the pages in the tablespace.
+@param table the table definiton in the server
+@param n_io_buffers number of blocks to read and write together
+@param callback functor that will do the page updates
+@return DB_SUCCESS or error code */
+dberr_t
+fil_tablespace_iterate(
+/*===================*/
+	dict_table_t*	table,
+	ulint		n_io_buffers,
+	PageCallback&	callback)
+{
+	dberr_t		err;
+	os_file_t	file;
+	char*		filepath;
+	bool		success;
+
+	ut_a(n_io_buffers > 0);
+	ut_ad(!srv_read_only_mode);
+
+	DBUG_EXECUTE_IF("ib_import_trigger_corruption_1",
+			return(DB_CORRUPTION););
+
+	/* Make sure the data_dir_path is set. */
+	dict_get_and_save_data_dir_path(table, false);
+
+	if (DICT_TF_HAS_DATA_DIR(table->flags)) {
+		ut_a(table->data_dir_path);
+
+		filepath = fil_make_filepath(
+			table->data_dir_path, table->name.m_name, IBD, true);
+	} else {
+		filepath = fil_make_filepath(
+			NULL, table->name.m_name, IBD, false);
+	}
+
+	if (filepath == NULL) {
+		return(DB_OUT_OF_MEMORY);
+	}
+
+	file = os_file_create_simple_no_error_handling(
+		innodb_data_file_key, filepath,
+		OS_FILE_OPEN, OS_FILE_READ_WRITE, srv_read_only_mode, &success);
+
+	DBUG_EXECUTE_IF("fil_tablespace_iterate_failure",
+	{
+		static bool once;
+
+		if (!once || ut_rnd_interval(0, 10) == 5) {
+			once = true;
+			success = false;
+			os_file_close(file);
+		}
+	});
+
+	if (!success) {
+		/* The following call prints an error message */
+		os_file_get_last_error(true);
+
+		ib::error() << "Trying to import a tablespace, but could not"
+			" open the tablespace file " << filepath;
+
+		ut_free(filepath);
+
+		return(DB_TABLESPACE_NOT_FOUND);
+
+	} else {
+		err = DB_SUCCESS;
+	}
+
+	callback.set_file(filepath, file);
+
+	os_offset_t	file_size = os_file_get_size(file);
+	ut_a(file_size != (os_offset_t) -1);
+
+	/* The block we will use for every physical page */
+	buf_block_t*	block;
+
+	block = reinterpret_cast<buf_block_t*>(ut_zalloc_nokey(sizeof(*block)));
+
+	mutex_create(LATCH_ID_BUF_BLOCK_MUTEX, &block->mutex);
+
+	/* Allocate a page to read in the tablespace header, so that we
+	can determine the page size and zip size (if it is compressed).
+	We allocate an extra page in case it is a compressed table. One
+	page is to ensure alignement. */
+
+	void*	page_ptr = ut_malloc_nokey(3 * UNIV_PAGE_SIZE);
+	byte*	page = static_cast<byte*>(ut_align(page_ptr, UNIV_PAGE_SIZE));
+
+	fil_buf_block_init(block, page);
+
+	/* Read the first page and determine the page and zip size. */
+
+	IORequest	request(IORequest::READ);
+
+	err = os_file_read(request, file, page, 0, UNIV_PAGE_SIZE);
+
+	if (err != DB_SUCCESS) {
+
+		err = DB_IO_ERROR;
+
+	} else if ((err = callback.init(file_size, block)) == DB_SUCCESS) {
+		fil_iterator_t	iter;
+
+		iter.file = file;
+		iter.start = 0;
+		iter.end = file_size;
+		iter.filepath = filepath;
+		iter.file_size = file_size;
+		iter.n_io_buffers = n_io_buffers;
+		iter.page_size = callback.get_page_size().physical();
+
+		ulint crypt_data_offset = fsp_header_get_crypt_offset(
+			callback.get_page_size(), 0);
+
+		/* read (optional) crypt data */
+		iter.crypt_data = fil_space_read_crypt_data(
+			0, page, crypt_data_offset);
+
+#ifdef MYSQL_ENCRYPTION
+		/* Set encryption info. */
+		iter.encryption_key = table->encryption_key;
+		iter.encryption_iv = table->encryption_iv;
+
+		/* Check encryption is matched or not. */
+		ulint	space_flags = callback.get_space_flags();
+		if (FSP_FLAGS_GET_ENCRYPTION(space_flags)) {
+			ut_ad(table->encryption_key != NULL);
+
+			if (!dict_table_is_encrypted(table)) {
+				ib::error() << "Table is not in an encrypted"
+					" tablespace, but the data file which"
+					" trying to import is an encrypted"
+					" tablespace";
+				err = DB_IO_NO_ENCRYPT_TABLESPACE;
+			}
+		}
+#endif /* MYSQL_ENCRYPTION */
+
+		if (err == DB_SUCCESS) {
+
+			/* Compressed pages can't be optimised for block IO
+			for now.  We do the IMPORT page by page. */
+
+			if (callback.get_page_size().is_compressed()) {
+				iter.n_io_buffers = 1;
+				ut_a(iter.page_size
+				     == callback.get_page_size().physical());
+			}
+
+			/** Add an extra page for compressed page scratch
+			area. */
+			void*	io_buffer = ut_malloc_nokey(
+				(2 + iter.n_io_buffers) * UNIV_PAGE_SIZE);
+
+			iter.io_buffer = static_cast<byte*>(
+				ut_align(io_buffer, UNIV_PAGE_SIZE));
+
+			/** Add an exta buffer for encryption */
+			void* crypt_io_buffer = NULL;
+			if (iter.crypt_data != NULL) {
+				crypt_io_buffer = ut_malloc_nokey(
+					iter.n_io_buffers * UNIV_PAGE_SIZE);
+				iter.crypt_io_buffer = static_cast<byte*>(
+					crypt_io_buffer);
+			}
+
+			err = fil_iterate(iter, block, callback);
+
+			ut_free(io_buffer);
+		}
+	}
+
+	if (err == DB_SUCCESS) {
+
+		ib::info() << "Sync to disk";
+
+		if (!os_file_flush(file)) {
+			ib::info() << "os_file_flush() failed!";
+			err = DB_IO_ERROR;
+		} else {
+			ib::info() << "Sync to disk - done!";
+		}
+	}
+
+	os_file_close(file);
+
+	ut_free(page_ptr);
+	ut_free(filepath);
+
+	mutex_free(&block->mutex);
+
+	ut_free(block);
+
+	return(err);
+}
+#endif /* !UNIV_HOTBACKUP */
+
+/** Set the tablespace table size.
+@param[in]	page	a page belonging to the tablespace */
+void
+PageCallback::set_page_size(
+	const buf_frame_t*	page) UNIV_NOTHROW
+{
+	m_page_size.copy_from(fsp_header_get_page_size(page));
+}
+
+/********************************************************************//**
+Delete the tablespace file and any related files like .cfg.
+This should not be called for temporary tables.
+@param[in] ibd_filepath File path of the IBD tablespace */
+void
+fil_delete_file(
+/*============*/
+	const char*	ibd_filepath)
+{
+	/* Force a delete of any stale .ibd files that are lying around. */
+
+	ib::info() << "Deleting " << ibd_filepath;
+	os_file_delete_if_exists(innodb_data_file_key, ibd_filepath, NULL);
+
+	char*	cfg_filepath = fil_make_filepath(
+		ibd_filepath, NULL, CFG, false);
+	if (cfg_filepath != NULL) {
+		os_file_delete_if_exists(
+			innodb_data_file_key, cfg_filepath, NULL);
+		ut_free(cfg_filepath);
+	}
+}
+
+/**
+Iterate over all the spaces in the space list and fetch the
+tablespace names. It will return a copy of the name that must be
+freed by the caller using: delete[].
+@return DB_SUCCESS if all OK. */
+dberr_t
+fil_get_space_names(
+/*================*/
+	space_name_list_t&	space_name_list)
+				/*!< in/out: List to append to */
+{
+	fil_space_t*	space;
+	dberr_t		err = DB_SUCCESS;
 
 	mutex_enter(&fil_system->mutex);
 
@@ -1885,220 +6772,867 @@ fil_write_flushed_lsn_to_data_files(
 	     space != NULL;
 	     space = UT_LIST_GET_NEXT(space_list, space)) {
 
-		/* We only write the lsn to all existing data files which have
-		been open during the lifetime of the mysqld process; they are
-		represented by the space objects in the tablespace memory
-		cache. Note that all data files in the system tablespace 0
-		and the UNDO log tablespaces (if separate) are always open. */
+		if (space->purpose == FIL_TYPE_TABLESPACE) {
+			ulint	len;
+			char*	name;
 
-		if (space->purpose == FIL_TABLESPACE
-		    && !fil_is_user_tablespace_id(space->id)) {
-			ulint	sum_of_sizes = 0;
+			len = ::strlen(space->name);
+			name = UT_NEW_ARRAY_NOKEY(char, len + 1);
 
-			for (node = UT_LIST_GET_FIRST(space->chain);
-			     node != NULL;
-			     node = UT_LIST_GET_NEXT(chain, node)) {
+			if (name == 0) {
+				/* Caller to free elements allocated so far. */
+				err = DB_OUT_OF_MEMORY;
+				break;
+			}
 
-				mutex_exit(&fil_system->mutex);
+			memcpy(name, space->name, len);
+			name[len] = 0;
 
-				err = fil_write_lsn_and_arch_no_to_file(
-					space->id, sum_of_sizes, lsn,
-					arch_log_no);
+			space_name_list.push_back(name);
+		}
+	}
 
-				if (err != DB_SUCCESS) {
+	mutex_exit(&fil_system->mutex);
 
-					return(err);
-				}
+	return(err);
+}
 
-				mutex_enter(&fil_system->mutex);
+#ifndef UNIV_HOTBACKUP
+/** Return the next fil_node_t in the current or next fil_space_t.
+Once started, the caller must keep calling this until it returns NULL.
+fil_space_acquire() and fil_space_release() are invoked here which
+blocks a concurrent operation from dropping the tablespace.
+@param[in]	prev_node	Pointer to the previous fil_node_t.
+If NULL, use the first fil_space_t on fil_system->space_list.
+@return pointer to the next fil_node_t.
+@retval NULL if this was the last file node */
+const fil_node_t*
+fil_node_next(
+	const fil_node_t*	prev_node)
+{
+	fil_space_t*		space;
+	const fil_node_t*	node = prev_node;
 
-				sum_of_sizes += node->size;
+	mutex_enter(&fil_system->mutex);
+
+	if (node == NULL) {
+		space = UT_LIST_GET_FIRST(fil_system->space_list);
+
+		/* We can trust that space is not NULL because at least the
+		system tablespace is always present and loaded first. */
+		space->n_pending_ops++;
+
+		node = UT_LIST_GET_FIRST(space->chain);
+		ut_ad(node != NULL);
+	} else {
+		space = node->space;
+		ut_ad(space->n_pending_ops > 0);
+		node = UT_LIST_GET_NEXT(chain, node);
+
+		if (node == NULL) {
+			/* Move on to the next fil_space_t */
+			space->n_pending_ops--;
+			space = UT_LIST_GET_NEXT(space_list, space);
+
+			/* Skip spaces that are being dropped or truncated. */
+			while (space != NULL
+			       && (space->stop_new_ops
+				   || space->is_being_truncated)) {
+				space = UT_LIST_GET_NEXT(space_list, space);
+			}
+
+			if (space != NULL) {
+				space->n_pending_ops++;
+				node = UT_LIST_GET_FIRST(space->chain);
+				ut_ad(node != NULL);
 			}
 		}
 	}
 
 	mutex_exit(&fil_system->mutex);
 
+	return(node);
+}
+
+/** Generate redo log for swapping two .ibd files
+@param[in]	old_table	old table
+@param[in]	new_table	new table
+@param[in]	tmp_name	temporary table name
+@param[in,out]	mtr		mini-transaction
+@return innodb error code */
+dberr_t
+fil_mtr_rename_log(
+	const dict_table_t*	old_table,
+	const dict_table_t*	new_table,
+	const char*		tmp_name,
+	mtr_t*			mtr)
+{
+	dberr_t	err;
+
+	bool	old_is_file_per_table =
+		!is_system_tablespace(old_table->space)
+		&& !DICT_TF_HAS_SHARED_SPACE(old_table->flags);
+
+	bool	new_is_file_per_table =
+		!is_system_tablespace(new_table->space)
+		&& !DICT_TF_HAS_SHARED_SPACE(new_table->flags);
+
+	/* If neither table is file-per-table,
+	there will be no renaming of files. */
+	if (!old_is_file_per_table && !new_is_file_per_table) {
+		return(DB_SUCCESS);
+	}
+
+	const char*	old_dir = DICT_TF_HAS_DATA_DIR(old_table->flags)
+		? old_table->data_dir_path
+		: NULL;
+
+	char*	old_path = fil_make_filepath(
+		old_dir, old_table->name.m_name, IBD, (old_dir != NULL));
+	if (old_path == NULL) {
+		return(DB_OUT_OF_MEMORY);
+	}
+
+	if (old_is_file_per_table) {
+		char*	tmp_path = fil_make_filepath(
+			old_dir, tmp_name, IBD, (old_dir != NULL));
+		if (tmp_path == NULL) {
+			ut_free(old_path);
+			return(DB_OUT_OF_MEMORY);
+		}
+
+		/* Temp filepath must not exist. */
+		err = fil_rename_tablespace_check(
+			old_table->space, old_path, tmp_path,
+			dict_table_is_discarded(old_table));
+		if (err != DB_SUCCESS) {
+			ut_free(old_path);
+			ut_free(tmp_path);
+			return(err);
+		}
+
+		fil_name_write_rename(
+			old_table->space, 0, old_path, tmp_path, mtr);
+
+		ut_free(tmp_path);
+	}
+
+	if (new_is_file_per_table) {
+		const char*	new_dir = DICT_TF_HAS_DATA_DIR(new_table->flags)
+			? new_table->data_dir_path
+			: NULL;
+		char*	new_path = fil_make_filepath(
+				new_dir, new_table->name.m_name,
+				IBD, (new_dir != NULL));
+		if (new_path == NULL) {
+			ut_free(old_path);
+			return(DB_OUT_OF_MEMORY);
+		}
+
+		/* Destination filepath must not exist unless this ALTER
+		TABLE starts and ends with a file_per-table tablespace. */
+		if (!old_is_file_per_table) {
+			err = fil_rename_tablespace_check(
+				new_table->space, new_path, old_path,
+				dict_table_is_discarded(new_table));
+			if (err != DB_SUCCESS) {
+				ut_free(old_path);
+				ut_free(new_path);
+				return(err);
+			}
+		}
+
+		fil_name_write_rename(
+			new_table->space, 0, new_path, old_path, mtr);
+
+		ut_free(new_path);
+	}
+
+	ut_free(old_path);
+
+	return(DB_SUCCESS);
+}
+#endif /* !UNIV_HOTBACKUP */
+#ifdef UNIV_DEBUG
+/** Check that a tablespace is valid for mtr_commit().
+@param[in]	space	persistent tablespace that has been changed */
+static
+void
+fil_space_validate_for_mtr_commit(
+	const fil_space_t*	space)
+{
+	ut_ad(!mutex_own(&fil_system->mutex));
+	ut_ad(space != NULL);
+	ut_ad(space->purpose == FIL_TYPE_TABLESPACE);
+	ut_ad(!is_predefined_tablespace(space->id));
+
+	/* We are serving mtr_commit(). While there is an active
+	mini-transaction, we should have !space->stop_new_ops. This is
+	guaranteed by meta-data locks or transactional locks, or
+	dict_operation_lock (X-lock in DROP, S-lock in purge).
+
+	However, a file I/O thread can invoke change buffer merge
+	while fil_check_pending_operations() is waiting for operations
+	to quiesce. This is not a problem, because
+	ibuf_merge_or_delete_for_page() would call
+	fil_space_acquire() before mtr_start() and
+	fil_space_release() after mtr_commit(). This is why
+	n_pending_ops should not be zero if stop_new_ops is set. */
+	ut_ad(!space->stop_new_ops
+	      || space->is_being_truncated /* TRUNCATE sets stop_new_ops */
+	      || space->n_pending_ops > 0);
+}
+#endif /* UNIV_DEBUG */
+
+/** Write a MLOG_FILE_NAME record for a persistent tablespace.
+@param[in]	space	tablespace
+@param[in,out]	mtr	mini-transaction */
+static
+void
+fil_names_write(
+	const fil_space_t*	space,
+	mtr_t*			mtr)
+{
+	ut_ad(UT_LIST_GET_LEN(space->chain) == 1);
+	fil_name_write(space, 0, UT_LIST_GET_FIRST(space->chain), mtr);
+}
+
+/** Note that a non-predefined persistent tablespace has been modified
+by redo log.
+@param[in,out]	space	tablespace */
+void
+fil_names_dirty(
+	fil_space_t*	space)
+{
+	ut_ad(log_mutex_own());
+	ut_ad(recv_recovery_is_on());
+	ut_ad(log_sys->lsn != 0);
+	ut_ad(space->max_lsn == 0);
+	ut_d(fil_space_validate_for_mtr_commit(space));
+
+	UT_LIST_ADD_LAST(fil_system->named_spaces, space);
+	space->max_lsn = log_sys->lsn;
+}
+
+/** Write MLOG_FILE_NAME records when a non-predefined persistent
+tablespace was modified for the first time since the latest
+fil_names_clear().
+@param[in,out]	space	tablespace
+@param[in,out]	mtr	mini-transaction */
+void
+fil_names_dirty_and_write(
+	fil_space_t*	space,
+	mtr_t*		mtr)
+{
+	ut_ad(log_mutex_own());
+	ut_d(fil_space_validate_for_mtr_commit(space));
+	ut_ad(space->max_lsn == log_sys->lsn);
+
+	UT_LIST_ADD_LAST(fil_system->named_spaces, space);
+	fil_names_write(space, mtr);
+
+	DBUG_EXECUTE_IF("fil_names_write_bogus",
+			{
+				char bogus_name[] = "./test/bogus file.ibd";
+				os_normalize_path(bogus_name);
+				fil_name_write(
+					SRV_LOG_SPACE_FIRST_ID, 0,
+					bogus_name, mtr);
+			});
+}
+#ifndef UNIV_HOTBACKUP
+/** On a log checkpoint, reset fil_names_dirty_and_write() flags
+and write out MLOG_FILE_NAME and MLOG_CHECKPOINT if needed.
+@param[in]	lsn		checkpoint LSN
+@param[in]	do_write	whether to always write MLOG_CHECKPOINT
+@return whether anything was written to the redo log
+@retval false	if no flags were set and nothing written
+@retval true	if anything was written to the redo log */
+bool
+fil_names_clear(
+	lsn_t	lsn,
+	bool	do_write)
+{
+	mtr_t	mtr;
+
+	ut_ad(log_mutex_own());
+
+	if (log_sys->append_on_checkpoint) {
+		mtr_write_log(log_sys->append_on_checkpoint);
+		do_write = true;
+	}
+
+	mtr.start();
+
+	for (fil_space_t* space = UT_LIST_GET_FIRST(fil_system->named_spaces);
+	     space != NULL; ) {
+		fil_space_t*	next = UT_LIST_GET_NEXT(named_spaces, space);
+
+		ut_ad(space->max_lsn > 0);
+		if (space->max_lsn < lsn) {
+			/* The tablespace was last dirtied before the
+			checkpoint LSN. Remove it from the list, so
+			that if the tablespace is not going to be
+			modified any more, subsequent checkpoints will
+			avoid calling fil_names_write() on it. */
+			space->max_lsn = 0;
+			UT_LIST_REMOVE(fil_system->named_spaces, space);
+		}
+
+		/* max_lsn is the last LSN where fil_names_dirty_and_write()
+		was called. If we kept track of "min_lsn" (the first LSN
+		where max_lsn turned nonzero), we could avoid the
+		fil_names_write() call if min_lsn > lsn. */
+
+		fil_names_write(space, &mtr);
+		do_write = true;
+
+		space = next;
+	}
+
+	if (do_write) {
+		mtr.commit_checkpoint(lsn);
+	} else {
+		ut_ad(!mtr.has_modifications());
+	}
+
+	return(do_write);
+}
+
+/** Truncate a single-table tablespace. The tablespace must be cached
+in the memory cache.
+@param space_id			space id
+@param dir_path			directory path
+@param tablename		the table name in the usual
+				databasename/tablename format of InnoDB
+@param flags			tablespace flags
+@param trunc_to_default		truncate to default size if tablespace
+				is being newly re-initialized.
+@return DB_SUCCESS or error */
+dberr_t
+truncate_t::truncate(
+/*=================*/
+	ulint		space_id,
+	const char*	dir_path,
+	const char*	tablename,
+	ulint		flags,
+	bool		trunc_to_default)
+{
+	dberr_t		err = DB_SUCCESS;
+	char*		path;
+	bool		has_data_dir = FSP_FLAGS_HAS_DATA_DIR(flags);
+
+	ut_a(!is_system_tablespace(space_id));
+
+	if (has_data_dir) {
+		ut_ad(dir_path != NULL);
+
+		path = fil_make_filepath(dir_path, tablename, IBD, true);
+
+	} else {
+		path = fil_make_filepath(NULL, tablename, IBD, false);
+	}
+
+	if (path == NULL) {
+		return(DB_OUT_OF_MEMORY);
+	}
+
+	mutex_enter(&fil_system->mutex);
+
+	fil_space_t*	space = fil_space_get_by_id(space_id);
+
+	/* The following code must change when InnoDB supports
+	multiple datafiles per tablespace. */
+	ut_a(UT_LIST_GET_LEN(space->chain) == 1);
+
+	fil_node_t*	node = UT_LIST_GET_FIRST(space->chain);
+
+	if (trunc_to_default) {
+		space->size = node->size = FIL_IBD_FILE_INITIAL_SIZE;
+	}
+
+	const bool already_open = node->is_open;
+
+	if (!already_open) {
+
+		bool	ret;
+
+		node->handle = os_file_create_simple_no_error_handling(
+			innodb_data_file_key, path, OS_FILE_OPEN,
+			OS_FILE_READ_WRITE,
+			fsp_is_system_temporary(space_id)
+			? false : srv_read_only_mode, &ret);
+
+		if (!ret) {
+			ib::error() << "Failed to open tablespace file "
+				<< path << ".";
+
+			ut_free(path);
+
+			return(DB_ERROR);
+		}
+
+		node->is_open = true;
+	}
+
+	os_offset_t	trunc_size = trunc_to_default
+		? FIL_IBD_FILE_INITIAL_SIZE
+		: space->size;
+
+	const bool success = os_file_truncate(
+		path, node->handle, trunc_size * UNIV_PAGE_SIZE);
+
+	if (!success) {
+		ib::error() << "Cannot truncate file " << path
+			<< " in TRUNCATE TABLESPACE.";
+		err = DB_ERROR;
+	}
+
+	space->stop_new_ops = false;
+	space->is_being_truncated = false;
+
+	/* If we opened the file in this function, close it. */
+	if (!already_open) {
+		bool	closed = os_file_close(node->handle);
+
+		if (!closed) {
+
+			ib::error() << "Failed to close tablespace file "
+				<< path << ".";
+
+			err = DB_ERROR;
+		} else {
+			node->is_open = false;
+		}
+	}
+
+	mutex_exit(&fil_system->mutex);
+
+	ut_free(path);
+
+	return(err);
+}
+#endif /* !UNIV_HOTBACKUP */
+
+/**
+Note that the file system where the file resides doesn't support PUNCH HOLE.
+Called from AIO handlers when IO returns DB_IO_NO_PUNCH_HOLE
+@param[in,out]	node		Node to set */
+void
+fil_no_punch_hole(fil_node_t* node)
+{
+	node->punch_hole = false;
+}
+
+#ifdef MYSQL_COMPRESSION
+
+/** Set the compression type for the tablespace of a table
+@param[in]	table		The table that should be compressed
+@param[in]	algorithm	Text representation of the algorithm
+@return DB_SUCCESS or error code */
+dberr_t
+fil_set_compression(
+	dict_table_t*	table,
+	const char*	algorithm)
+{
+	ut_ad(table != NULL);
+
+	/* We don't support Page Compression for the system tablespace,
+	the temporary tablespace, or any general tablespace because
+	COMPRESSION is set by TABLE DDL, not TABLESPACE DDL. There is
+	no other technical reason.  Also, do not use it for missing
+	tables or tables with compressed row_format. */
+	if (table->ibd_file_missing
+	    || !DICT_TF2_FLAG_IS_SET(table, DICT_TF2_USE_FILE_PER_TABLE)
+	    || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_TEMPORARY)
+	    || page_size_t(table->flags).is_compressed()) {
+
+		return(DB_IO_NO_PUNCH_HOLE_TABLESPACE);
+	}
+
+	dberr_t		err;
+	Compression	compression;
+
+	if (algorithm == NULL || strlen(algorithm) == 0) {
+
+#ifndef UNIV_DEBUG
+		compression.m_type = Compression::NONE;
+#else
+		/* This is a Debug tool for setting compression on all
+		compressible tables not otherwise specified. */
+		switch (srv_debug_compress) {
+		case Compression::LZ4:
+		case Compression::ZLIB:
+		case Compression::NONE:
+
+			compression.m_type =
+				static_cast<Compression::Type>(
+					srv_debug_compress);
+			break;
+
+		default:
+			compression.m_type = Compression::NONE;
+		}
+
+#endif /* UNIV_DEBUG */
+
+		err = DB_SUCCESS;
+
+	} else {
+
+		err = Compression::check(algorithm, &compression);
+	}
+
+	fil_space_t*	space = fil_space_get(table->space);
+
+	if (space == NULL) {
+		return(DB_NOT_FOUND);
+	}
+
+	space->compression_type = compression.m_type;
+
+	if (space->compression_type != Compression::NONE) {
+
+		const fil_node_t* node;
+
+		node = UT_LIST_GET_FIRST(space->chain);
+
+		if (!node->punch_hole) {
+
+			return(DB_IO_NO_PUNCH_HOLE_FS);
+		}
+	}
+
+	return(err);
+}
+
+/** Get the compression algorithm for a tablespace.
+@param[in]	space_id	Space ID to check
+@return the compression algorithm */
+Compression::Type
+fil_get_compression(
+	ulint	space_id)
+{
+	fil_space_t*	space = fil_space_get(space_id);
+
+	return(space == NULL ? Compression::NONE : space->compression_type);
+}
+
+/** Set the encryption type for the tablespace
+@param[in] space_id		Space ID of tablespace for which to set
+@param[in] algorithm		Encryption algorithm
+@param[in] key			Encryption key
+@param[in] iv			Encryption iv
+@return DB_SUCCESS or error code */
+dberr_t
+fil_set_encryption(
+	ulint			space_id,
+	Encryption::Type	algorithm,
+	byte*			key,
+	byte*			iv)
+{
+	ut_ad(!is_system_or_undo_tablespace(space_id));
+
+	if (is_system_tablespace(space_id)) {
+		return(DB_IO_NO_ENCRYPT_TABLESPACE);
+	}
+
+	mutex_enter(&fil_system->mutex);
+
+	fil_space_t*	space = fil_space_get_by_id(space_id);
+
+	if (space == NULL) {
+		mutex_exit(&fil_system->mutex);
+		return(DB_NOT_FOUND);
+	}
+
+	ut_ad(algorithm != Encryption::NONE);
+	space->encryption_type = algorithm;
+	if (key == NULL) {
+		Encryption::random_value(space->encryption_key);
+	} else {
+		memcpy(space->encryption_key,
+		       key, ENCRYPTION_KEY_LEN);
+	}
+
+	space->encryption_klen = ENCRYPTION_KEY_LEN;
+	if (iv == NULL) {
+		Encryption::random_value(space->encryption_iv);
+	} else {
+		memcpy(space->encryption_iv,
+		       iv, ENCRYPTION_KEY_LEN);
+	}
+
+	mutex_exit(&fil_system->mutex);
+
 	return(DB_SUCCESS);
 }
 
-/*******************************************************************//**
-Checks the consistency of the first data page of a tablespace
-at database startup.
-@retval NULL on success, or if innodb_force_recovery is set
-@return pointer to an error message string */
-static MY_ATTRIBUTE((warn_unused_result))
-const char*
-fil_check_first_page(
-/*=================*/
-	const page_t*	page)		/*!< in: data page */
+/** Rotate the tablespace keys by new master key.
+@return true if the re-encrypt suceeds */
+bool
+fil_encryption_rotate()
 {
-	ulint	space_id;
-	ulint	flags;
+	fil_space_t*	space;
+	mtr_t		mtr;
+	byte		encrypt_info[ENCRYPTION_INFO_SIZE_V2];
 
-	if (srv_force_recovery >= SRV_FORCE_IGNORE_CORRUPT) {
-		return(NULL);
-	}
-
-	space_id = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_ID + page);
-	flags = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + page);
-
-	if (UNIV_PAGE_SIZE != fsp_flags_get_page_size(flags)) {
-		fprintf(stderr,
-			"InnoDB: Error: Current page size %lu != "
-			" page size on page %lu\n",
-			UNIV_PAGE_SIZE, fsp_flags_get_page_size(flags));
-
-		return("innodb-page-size mismatch");
-	}
-
-	if (!space_id && !flags) {
-		ulint		nonzero_bytes	= UNIV_PAGE_SIZE;
-		const byte*	b		= page;
-
-		while (!*b && --nonzero_bytes) {
-			b++;
+	for (space = UT_LIST_GET_FIRST(fil_system->space_list);
+	     space != NULL; ) {
+		/* Skip unencypted tablespaces. */
+		if (is_system_or_undo_tablespace(space->id)
+		    || fsp_is_system_temporary(space->id)
+		    || space->purpose == FIL_TYPE_LOG) {
+			space = UT_LIST_GET_NEXT(space_list, space);
+			continue;
 		}
 
-		if (!nonzero_bytes) {
-			return("space header page consists of zero bytes");
+		if (space->encryption_type != Encryption::NONE) {
+			mtr_start(&mtr);
+			mtr.set_named_space(space->id);
+
+			space = mtr_x_lock_space(space->id, &mtr);
+
+			memset(encrypt_info, 0, ENCRYPTION_INFO_SIZE_V2);
+
+			if (!fsp_header_rotate_encryption(space,
+							  encrypt_info,
+							  &mtr)) {
+				mtr_commit(&mtr);
+				return(false);
+			}
+
+			mtr_commit(&mtr);
 		}
+
+		space = UT_LIST_GET_NEXT(space_list, space);
+		DBUG_EXECUTE_IF("ib_crash_during_rotation_for_encryption",
+				DBUG_SUICIDE(););
 	}
 
-	if (buf_page_is_corrupted(
-		    false, page, fsp_flags_get_zip_size(flags))) {
-		return("checksum mismatch");
+	return(true);
+}
+#endif /* MYSQL_COMPRESSION */
+
+/** Build the basic folder name from the path and length provided
+@param[in]	path	pathname (may also include the file basename)
+@param[in]	len	length of the path, in bytes */
+void
+Folder::make_path(const char* path, size_t len)
+{
+	if (is_absolute_path(path)) {
+		m_folder = mem_strdupl(path, len);
+		m_folder_len = len;
+	}
+	else {
+		size_t n = 2 + len + strlen(fil_path_to_mysql_datadir);
+		m_folder = static_cast<char*>(ut_malloc_nokey(n));
+		m_folder_len = 0;
+
+		if (path != fil_path_to_mysql_datadir) {
+			/* Put the mysqld datadir into m_folder first. */
+			ut_ad(fil_path_to_mysql_datadir[0] != '\0');
+			m_folder_len = strlen(fil_path_to_mysql_datadir);
+			memcpy(m_folder, fil_path_to_mysql_datadir,
+			       m_folder_len);
+			if (m_folder[m_folder_len - 1] != OS_PATH_SEPARATOR) {
+				m_folder[m_folder_len++] = OS_PATH_SEPARATOR;
+			}
+		}
+
+		/* Append the path. */
+		memcpy(m_folder + m_folder_len, path, len);
+		m_folder_len += len;
+		m_folder[m_folder_len] = '\0';
 	}
 
-	if (page_get_space_id(page) == space_id
-	    && page_get_page_no(page) == 0) {
-		return(NULL);
-	}
-
-	return("inconsistent data in space header");
+	os_normalize_path(m_folder);
 }
 
-/*******************************************************************//**
-Reads the flushed lsn, arch no, space_id and tablespace flag fields from
-the first page of a data file at database startup.
-@retval NULL on success, or if innodb_force_recovery is set
-@return pointer to an error message string */
+/** Resolve a relative path in m_folder to an absolute path
+in m_abs_path setting m_abs_len. */
+void
+Folder::make_abs_path()
+{
+	my_realpath(m_abs_path, m_folder, MYF(0));
+	m_abs_len = strlen(m_abs_path);
+
+	ut_ad(m_abs_len + 1 < sizeof(m_abs_path));
+
+	/* Folder::related_to() needs a trailing separator. */
+	if (m_abs_path[m_abs_len - 1] != OS_PATH_SEPARATOR) {
+		m_abs_path[m_abs_len] = OS_PATH_SEPARATOR;
+		m_abs_path[++m_abs_len] = '\0';
+	}
+}
+
+/** Constructor
+@param[in]	path	pathname (may also include the file basename)
+@param[in]	len	length of the path, in bytes */
+Folder::Folder(const char* path, size_t len)
+{
+	make_path(path, len);
+	make_abs_path();
+}
+
+/** Assignment operator
+@param[in]	folder	folder string provided */
+class Folder&
+Folder::operator=(const char* path)
+{
+	ut_free(m_folder);
+	make_path(path, strlen(path));
+	make_abs_path();
+
+	return(*this);
+}
+
+/** Determine if two folders are equal
+@param[in]	other	folder to compare to
+@return whether the folders are equal */
+bool Folder::operator==(const Folder& other) const
+{
+	return(m_abs_len == other.m_abs_len
+	       && !memcmp(m_abs_path, other.m_abs_path, m_abs_len));
+}
+
+/** Determine if the left folder is the same or an ancestor of
+(contains) the right folder.
+@param[in]	other	folder to compare to
+@return whether this is the same or an ancestor of the other folder. */
+bool Folder::operator>=(const Folder& other) const
+{
+	return(m_abs_len <= other.m_abs_len
+		&& (!memcmp(other.m_abs_path, m_abs_path, m_abs_len)));
+}
+
+/** Determine if the left folder is an ancestor of (contains)
+the right folder.
+@param[in]	other	folder to compare to
+@return whether this is an ancestor of the other folder */
+bool Folder::operator>(const Folder& other) const
+{
+	return(m_abs_len < other.m_abs_len
+	       && (!memcmp(other.m_abs_path, m_abs_path, m_abs_len)));
+}
+
+/** Determine if the directory referenced by m_folder exists.
+@return whether the directory exists */
+bool
+Folder::exists()
+{
+	bool		exists;
+	os_file_type_t	type;
+
+#ifdef _WIN32
+	/* Temporarily strip the trailing_separator since it will cause
+	_stat64() to fail on Windows unless the path is the root of some
+	drive; like "c:\".  _stat64() will fail if it is "c:". */
+	size_t	len = strlen(m_abs_path);
+	if (m_abs_path[m_abs_len - 1] == OS_PATH_SEPARATOR
+	    && m_abs_path[m_abs_len - 2] != ':') {
+		m_abs_path[m_abs_len - 1] = '\0';
+	}
+#endif /* WIN32 */
+
+	bool ret = os_file_status(m_abs_path, &exists, &type);
+
+#ifdef _WIN32
+	/* Put the separator back on. */
+	if (m_abs_path[m_abs_len - 1] == '\0') {
+		m_abs_path[m_abs_len - 1] = OS_PATH_SEPARATOR;
+	}
+#endif /* WIN32 */
+
+	return(ret && exists && type == OS_FILE_TYPE_DIR);
+}
+
+/* Unit Tests */
+#ifdef UNIV_ENABLE_UNIT_TEST_MAKE_FILEPATH
+#define MF  fil_make_filepath
+#define DISPLAY ib::info() << path
+void
+test_make_filepath()
+{
+	char* path;
+	const char* long_path =
+		"this/is/a/very/long/path/including/a/very/"
+		"looooooooooooooooooooooooooooooooooooooooooooooooo"
+		"oooooooooooooooooooooooooooooooooooooooooooooooooo"
+		"oooooooooooooooooooooooooooooooooooooooooooooooooo"
+		"oooooooooooooooooooooooooooooooooooooooooooooooooo"
+		"oooooooooooooooooooooooooooooooooooooooooooooooooo"
+		"oooooooooooooooooooooooooooooooooooooooooooooooooo"
+		"oooooooooooooooooooooooooooooooooooooooooooooooooo"
+		"oooooooooooooooooooooooooooooooooooooooooooooooooo"
+		"oooooooooooooooooooooooooooooooooooooooooooooooooo"
+		"oooooooooooooooooooooooooooooooooooooooooooooooong"
+		"/folder/name";
+	path = MF("/this/is/a/path/with/a/filename", NULL, IBD, false); DISPLAY;
+	path = MF("/this/is/a/path/with/a/filename", NULL, ISL, false); DISPLAY;
+	path = MF("/this/is/a/path/with/a/filename", NULL, CFG, false); DISPLAY;
+	path = MF("/this/is/a/path/with/a/filename", NULL, CFP, false); DISPLAY;
+	path = MF("/this/is/a/path/with/a/filename.ibd", NULL, IBD, false); DISPLAY;
+	path = MF("/this/is/a/path/with/a/filename.ibd", NULL, IBD, false); DISPLAY;
+	path = MF("/this/is/a/path/with/a/filename.dat", NULL, IBD, false); DISPLAY;
+	path = MF(NULL, "tablespacename", NO_EXT, false); DISPLAY;
+	path = MF(NULL, "tablespacename", IBD, false); DISPLAY;
+	path = MF(NULL, "dbname/tablespacename", NO_EXT, false); DISPLAY;
+	path = MF(NULL, "dbname/tablespacename", IBD, false); DISPLAY;
+	path = MF(NULL, "dbname/tablespacename", ISL, false); DISPLAY;
+	path = MF(NULL, "dbname/tablespacename", CFG, false); DISPLAY;
+	path = MF(NULL, "dbname/tablespacename", CFP, false); DISPLAY;
+	path = MF(NULL, "dbname\\tablespacename", NO_EXT, false); DISPLAY;
+	path = MF(NULL, "dbname\\tablespacename", IBD, false); DISPLAY;
+	path = MF("/this/is/a/path", "dbname/tablespacename", IBD, false); DISPLAY;
+	path = MF("/this/is/a/path", "dbname/tablespacename", IBD, true); DISPLAY;
+	path = MF("./this/is/a/path", "dbname/tablespacename.ibd", IBD, true); DISPLAY;
+	path = MF("this\\is\\a\\path", "dbname/tablespacename", IBD, true); DISPLAY;
+	path = MF("/this/is/a/path", "dbname\\tablespacename", IBD, true); DISPLAY;
+	path = MF(long_path, NULL, IBD, false); DISPLAY;
+	path = MF(long_path, "tablespacename", IBD, false); DISPLAY;
+	path = MF(long_path, "tablespacename", IBD, true); DISPLAY;
+}
+#endif /* UNIV_ENABLE_UNIT_TEST_MAKE_FILEPATH */
+/* @} */
+
+/** Release the reserved free extents.
+@param[in]	n_reserved	number of reserved extents */
+void
+fil_space_t::release_free_extents(ulint	n_reserved)
+{
+	ut_ad(rw_lock_own(&latch, RW_LOCK_X));
+
+	ut_a(n_reserved_extents >= n_reserved);
+	n_reserved_extents -= n_reserved;
+}
+
+/******************************************************************
+Get crypt data for a tablespace */
 UNIV_INTERN
-const char*
-fil_read_first_page(
-/*================*/
-	os_file_t	data_file,		/*!< in: open data file */
-	ibool		one_read_already,	/*!< in: TRUE if min and max
-						parameters below already
-						contain sensible data */
-	ulint*		flags,			/*!< out: tablespace flags */
-	ulint*		space_id,		/*!< out: tablespace ID */
-#ifdef UNIV_LOG_ARCHIVE
-	ulint*		min_arch_log_no,	/*!< out: min of archived
-						log numbers in data files */
-	ulint*		max_arch_log_no,	/*!< out: max of archived
-						log numbers in data files */
-#endif /* UNIV_LOG_ARCHIVE */
-	lsn_t*		min_flushed_lsn,	/*!< out: min of flushed
-						lsn values in data files */
-	lsn_t*		max_flushed_lsn,	/*!< out: max of flushed
-						lsn values in data files */
-	fil_space_crypt_t**   crypt_data)       /*<  out: crypt data */
+fil_space_crypt_t*
+fil_space_get_crypt_data(
+/*=====================*/
+	ulint id)	/*!< in: space id */
 {
-	byte*		buf;
-	byte*		page;
-	lsn_t		flushed_lsn;
-	const char*	check_msg = NULL;
-	fil_space_crypt_t* cdata;
+	fil_space_t*	space;
+	fil_space_crypt_t* crypt_data = NULL;
 
+	ut_ad(fil_system);
 
-	buf = static_cast<byte*>(ut_malloc(2 * UNIV_PAGE_SIZE));
+	mutex_enter(&fil_system->mutex);
 
-	/* Align the memory for a possible read from a raw device */
+	space = fil_space_get_by_id(id);
 
-	page = static_cast<byte*>(ut_align(buf, UNIV_PAGE_SIZE));
-
-	os_file_read(data_file, page, 0, UNIV_PAGE_SIZE);
-
-	/* The FSP_HEADER on page 0 is only valid for the first file
-	in a tablespace.  So if this is not the first datafile, leave
-	*flags and *space_id as they were read from the first file and
-	do not validate the first page. */
-	if (!one_read_already) {
-		*flags = fsp_header_get_flags(page);
-		*space_id = fsp_header_get_space_id(page);
+	if (space != NULL) {
+		crypt_data = space->crypt_data;
 	}
 
-	if (!one_read_already) {
-		check_msg = fil_check_first_page(page);
-	}
+	mutex_exit(&fil_system->mutex);
 
-	flushed_lsn = mach_read_from_8(page +
-				       FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
-
-	ulint space = fsp_header_get_space_id(page);
-	ulint offset = fsp_header_get_crypt_offset(
-		fsp_flags_get_zip_size(*flags), NULL);
-	cdata = fil_space_read_crypt_data(space, page, offset);
-
-	if (crypt_data) {
-		*crypt_data = cdata;
-	}
-
-	/* If file space is encrypted we need to have at least some
-	encryption service available where to get keys */
-	if ((cdata && cdata->encryption == FIL_SPACE_ENCRYPTION_ON) ||
-		(srv_encrypt_tables &&
-			cdata && cdata->encryption == FIL_SPACE_ENCRYPTION_DEFAULT)) {
-
-		if (!encryption_key_id_exists(cdata->key_id)) {
-			ib_logf(IB_LOG_LEVEL_ERROR,
-				"Tablespace id %ld is encrypted but encryption service"
-				" or used key_id %u is not available. Can't continue opening tablespace.",
-				space, cdata->key_id);
-
-			return ("table encrypted but encryption service not available.");
-
-		}
-	}
-
-	ut_free(buf);
-
-	if (check_msg) {
-		return(check_msg);
-	}
-
-	if (!one_read_already) {
-		*min_flushed_lsn = flushed_lsn;
-		*max_flushed_lsn = flushed_lsn;
-#ifdef UNIV_LOG_ARCHIVE
-		*min_arch_log_no = arch_log_no;
-		*max_arch_log_no = arch_log_no;
-#endif /* UNIV_LOG_ARCHIVE */
-		return(NULL);
-	}
-
-	if (*min_flushed_lsn > flushed_lsn) {
-		*min_flushed_lsn = flushed_lsn;
-	}
-	if (*max_flushed_lsn < flushed_lsn) {
-		*max_flushed_lsn = flushed_lsn;
-	}
-#ifdef UNIV_LOG_ARCHIVE
-	if (*min_arch_log_no > arch_log_no) {
-		*min_arch_log_no = arch_log_no;
-	}
-	if (*max_arch_log_no < arch_log_no) {
-		*max_arch_log_no = arch_log_no;
-	}
-#endif /* UNIV_LOG_ARCHIVE */
-
-	return(NULL);
+	return(crypt_data);
 }
 
-/*================ SINGLE-TABLE TABLESPACES ==========================*/
-
-#ifndef UNIV_HOTBACKUP
 /*******************************************************************//**
 Increments the count of pending operation, if space is not being deleted.
 @return	TRUE if being deleted, and operation should be skipped */
@@ -2164,5076 +7698,9 @@ fil_decr_pending_ops(
 
 	mutex_exit(&fil_system->mutex);
 }
-#endif /* !UNIV_HOTBACKUP */
-
-/********************************************************//**
-Creates the database directory for a table if it does not exist yet. */
-static
-void
-fil_create_directory_for_tablename(
-/*===============================*/
-	const char*	name)	/*!< in: name in the standard
-				'databasename/tablename' format */
-{
-	const char*	namend;
-	char*		path;
-	ulint		len;
-
-	len = strlen(fil_path_to_mysql_datadir);
-	namend = strchr(name, '/');
-	ut_a(namend);
-	path = static_cast<char*>(mem_alloc(len + (namend - name) + 2));
-
-	memcpy(path, fil_path_to_mysql_datadir, len);
-	path[len] = '/';
-	memcpy(path + len + 1, name, namend - name);
-	path[len + (namend - name) + 1] = 0;
-
-	srv_normalize_path_for_win(path);
-
-	ut_a(os_file_create_directory(path, FALSE));
-	mem_free(path);
-}
-
-#ifndef UNIV_HOTBACKUP
-/********************************************************//**
-Writes a log record about an .ibd file create/rename/delete. */
-static
-void
-fil_op_write_log(
-/*=============*/
-	ulint		type,		/*!< in: MLOG_FILE_CREATE,
-					MLOG_FILE_CREATE2,
-					MLOG_FILE_DELETE, or
-					MLOG_FILE_RENAME */
-	ulint		space_id,	/*!< in: space id */
-	ulint		log_flags,	/*!< in: redo log flags (stored
-					in the page number field) */
-	ulint		flags,		/*!< in: compressed page size
-					and file format
-					if type==MLOG_FILE_CREATE2, or 0 */
-	const char*	name,		/*!< in: table name in the familiar
-					'databasename/tablename' format, or
-					the file path in the case of
-					MLOG_FILE_DELETE */
-	const char*	new_name,	/*!< in: if type is MLOG_FILE_RENAME,
-					the new table name in the
-					'databasename/tablename' format */
-	mtr_t*		mtr)		/*!< in: mini-transaction handle */
-{
-	byte*	log_ptr;
-	ulint	len;
-
-	log_ptr = mlog_open(mtr, 11 + 2 + 1);
-
-	if (!log_ptr) {
-		/* Logging in mtr is switched off during crash recovery:
-		in that case mlog_open returns NULL */
-		return;
-	}
-
-	log_ptr = mlog_write_initial_log_record_for_file_op(
-		type, space_id, log_flags, log_ptr, mtr);
-	if (type == MLOG_FILE_CREATE2) {
-		mach_write_to_4(log_ptr, flags);
-		log_ptr += 4;
-	}
-	/* Let us store the strings as null-terminated for easier readability
-	and handling */
-
-	len = strlen(name) + 1;
-
-	mach_write_to_2(log_ptr, len);
-	log_ptr += 2;
-	mlog_close(mtr, log_ptr);
-
-	mlog_catenate_string(mtr, (byte*) name, len);
-
-	if (type == MLOG_FILE_RENAME) {
-		len = strlen(new_name) + 1;
-		log_ptr = mlog_open(mtr, 2 + len);
-		ut_a(log_ptr);
-		mach_write_to_2(log_ptr, len);
-		log_ptr += 2;
-		mlog_close(mtr, log_ptr);
-
-		mlog_catenate_string(mtr, (byte*) new_name, len);
-	}
-}
-#endif
-
-/*******************************************************************//**
-Parses the body of a log record written about an .ibd file operation. That is,
-the log record part after the standard (type, space id, page no) header of the
-log record.
-
-If desired, also replays the delete or rename operation if the .ibd file
-exists and the space id in it matches. Replays the create operation if a file
-at that path does not exist yet. If the database directory for the file to be
-created does not exist, then we create the directory, too.
-
-Note that mysqlbackup --apply-log sets fil_path_to_mysql_datadir to point to
-the datadir that we should use in replaying the file operations.
-
-InnoDB recovery does not replay these fully since it always sets the space id
-to zero. But mysqlbackup does replay them.  TODO: If remote tablespaces are
-used, mysqlbackup will only create tables in the default directory since
-MLOG_FILE_CREATE and MLOG_FILE_CREATE2 only know the tablename, not the path.
-
-@return end of log record, or NULL if the record was not completely
-contained between ptr and end_ptr */
-UNIV_INTERN
-byte*
-fil_op_log_parse_or_replay(
-/*=======================*/
-	byte*	ptr,		/*!< in: buffer containing the log record body,
-				or an initial segment of it, if the record does
-				not fir completely between ptr and end_ptr */
-	byte*	end_ptr,	/*!< in: buffer end */
-	ulint	type,		/*!< in: the type of this log record */
-	ulint	space_id,	/*!< in: the space id of the tablespace in
-				question, or 0 if the log record should
-				only be parsed but not replayed */
-	ulint	log_flags)	/*!< in: redo log flags
-				(stored in the page number parameter) */
-{
-	ulint		name_len;
-	ulint		new_name_len;
-	const char*	name;
-	const char*	new_name	= NULL;
-	ulint		flags		= 0;
-
-	if (type == MLOG_FILE_CREATE2) {
-		if (end_ptr < ptr + 4) {
-
-			return(NULL);
-		}
-
-		flags = mach_read_from_4(ptr);
-		ptr += 4;
-	}
-
-	if (end_ptr < ptr + 2) {
-
-		return(NULL);
-	}
-
-	name_len = mach_read_from_2(ptr);
-
-	ptr += 2;
-
-	if (end_ptr < ptr + name_len) {
-
-		return(NULL);
-	}
-
-	name = (const char*) ptr;
-
-	ptr += name_len;
-
-	if (type == MLOG_FILE_RENAME) {
-		if (end_ptr < ptr + 2) {
-
-			return(NULL);
-		}
-
-		new_name_len = mach_read_from_2(ptr);
-
-		ptr += 2;
-
-		if (end_ptr < ptr + new_name_len) {
-
-			return(NULL);
-		}
-
-		new_name = (const char*) ptr;
-
-		ptr += new_name_len;
-	}
-
-	/* We managed to parse a full log record body */
-	/*
-	printf("Parsed log rec of type %lu space %lu\n"
-	"name %s\n", type, space_id, name);
-
-	if (type == MLOG_FILE_RENAME) {
-	printf("new name %s\n", new_name);
-	}
-	*/
-	if (!space_id) {
-		return(ptr);
-	}
-
-	/* Let us try to perform the file operation, if sensible. Note that
-	mysqlbackup has at this stage already read in all space id info to the
-	fil0fil.cc data structures.
-
-	NOTE that our algorithm is not guaranteed to work correctly if there
-	were renames of tables during the backup. See mysqlbackup code for more
-	on the problem. */
-
-	switch (type) {
-	case MLOG_FILE_DELETE:
-		if (fil_tablespace_exists_in_mem(space_id)) {
-			dberr_t	err = fil_delete_tablespace(
-				space_id, BUF_REMOVE_FLUSH_NO_WRITE);
-			ut_a(err == DB_SUCCESS);
-		}
-
-		break;
-
-	case MLOG_FILE_RENAME:
-		/* In order to replay the rename, the following must hold:
-		* The new name is not already used.
-		* A tablespace is open in memory with the old name.
-		* The space ID for that tablepace matches this log entry.
-		This will prevent unintended renames during recovery. */
-
-		if (fil_get_space_id_for_table(new_name) == ULINT_UNDEFINED
-		    && space_id == fil_get_space_id_for_table(name)) {
-			/* Create the database directory for the new name, if
-			it does not exist yet */
-			fil_create_directory_for_tablename(new_name);
-
-			if (!fil_rename_tablespace(name, space_id,
-						   new_name, NULL)) {
-				ut_error;
-			}
-		}
-
-		break;
-
-	case MLOG_FILE_CREATE:
-	case MLOG_FILE_CREATE2:
-		if (fil_tablespace_exists_in_mem(space_id)) {
-			/* Do nothing */
-		} else if (fil_get_space_id_for_table(name)
-			   != ULINT_UNDEFINED) {
-			/* Do nothing */
-		} else if (log_flags & MLOG_FILE_FLAG_TEMP) {
-			/* Temporary table, do nothing */
-		} else {
-			const char*	path = NULL;
-
-			/* Create the database directory for name, if it does
-			not exist yet */
-			fil_create_directory_for_tablename(name);
-
-			if (fil_create_new_single_table_tablespace(
-				    space_id, name, path, flags,
-				    DICT_TF2_USE_TABLESPACE,
-				    FIL_IBD_FILE_INITIAL_SIZE,
-				    FIL_SPACE_ENCRYPTION_DEFAULT,
-				    FIL_DEFAULT_ENCRYPTION_KEY) != DB_SUCCESS) {
-				ut_error;
-			}
-		}
-
-		break;
-
-	default:
-		ut_error;
-	}
-
-	return(ptr);
-}
-
-/*******************************************************************//**
-Allocates a file name for the EXPORT/IMPORT config file name.  The
-string must be freed by caller with mem_free().
-@return own: file name */
-static
-char*
-fil_make_cfg_name(
-/*==============*/
-	const char*	filepath)	/*!< in: .ibd file name */
-{
-	char*	cfg_name;
-
-	/* Create a temporary file path by replacing the .ibd suffix
-	with .cfg. */
-
-	ut_ad(strlen(filepath) > 4);
-
-	cfg_name = mem_strdup(filepath);
-	ut_snprintf(cfg_name + strlen(cfg_name) - 3, 4, "cfg");
-	return(cfg_name);
-}
-
-/*******************************************************************//**
-Check for change buffer merges.
-@return 0 if no merges else count + 1. */
-static
-ulint
-fil_ibuf_check_pending_ops(
-/*=======================*/
-	fil_space_t*	space,	/*!< in/out: Tablespace to check */
-	ulint		count)	/*!< in: number of attempts so far */
-{
-	ut_ad(mutex_own(&fil_system->mutex));
-
-	if (space != 0 && space->n_pending_ops != 0) {
-
-		if (count > 5000) {
-			ib_logf(IB_LOG_LEVEL_WARN,
-				"Trying to close/delete tablespace "
-				"'%s' but there are %lu pending change "
-				"buffer merges on it.",
-				space->name,
-				(ulong) space->n_pending_ops);
-		}
-
-		return(count + 1);
-	}
-
-	return(0);
-}
-
-/*******************************************************************//**
-Check for pending IO.
-@return 0 if no pending else count + 1. */
-static
-ulint
-fil_check_pending_io(
-/*=================*/
-	fil_space_t*	space,	/*!< in/out: Tablespace to check */
-	fil_node_t**	node,	/*!< out: Node in space list */
-	ulint		count)	/*!< in: number of attempts so far */
-{
-	ut_ad(mutex_own(&fil_system->mutex));
-	ut_a(space->n_pending_ops == 0);
-
-	/* The following code must change when InnoDB supports
-	multiple datafiles per tablespace. */
-	ut_a(UT_LIST_GET_LEN(space->chain) == 1);
-
-	*node = UT_LIST_GET_FIRST(space->chain);
-
-	if (space->n_pending_flushes > 0 || (*node)->n_pending > 0) {
-
-		ut_a(!(*node)->being_extended);
-
-		if (count > 1000) {
-			ib_logf(IB_LOG_LEVEL_WARN,
-				"Trying to close/delete tablespace '%s' "
-				"but there are %lu flushes "
-				" and %lu pending i/o's on it.",
-				space->name,
-				(ulong) space->n_pending_flushes,
-				(ulong) (*node)->n_pending);
-		}
-
-		return(count + 1);
-	}
-
-	return(0);
-}
-
-/*******************************************************************//**
-Check pending operations on a tablespace.
-@return DB_SUCCESS or error failure. */
-static
-dberr_t
-fil_check_pending_operations(
-/*=========================*/
-	ulint		id,	/*!< in: space id */
-	fil_space_t**	space,	/*!< out: tablespace instance in memory */
-	char**		path)	/*!< out/own: tablespace path */
-{
-	ulint		count = 0;
-
-	ut_a(id != TRX_SYS_SPACE);
-	ut_ad(space);
-
-	*space = 0;
-
-	/* Wait for crypt threads to stop accessing space */
-	fil_space_crypt_close_tablespace(id);
-
-	mutex_enter(&fil_system->mutex);
-	fil_space_t* sp = fil_space_get_by_id(id);
-	if (sp) {
-		sp->stop_new_ops = TRUE;
-	}
-	mutex_exit(&fil_system->mutex);
-
-	/* Check for pending change buffer merges. */
-
-	do {
-		mutex_enter(&fil_system->mutex);
-
-		sp = fil_space_get_by_id(id);
-
-		count = fil_ibuf_check_pending_ops(sp, count);
-
-		mutex_exit(&fil_system->mutex);
-
-		if (count > 0) {
-			os_thread_sleep(20000);
-		}
-
-	} while (count > 0);
-
-	/* Check for pending IO. */
-
-	*path = 0;
-
-	do {
-		mutex_enter(&fil_system->mutex);
-
-		sp = fil_space_get_by_id(id);
-
-		if (sp == NULL) {
-			mutex_exit(&fil_system->mutex);
-			return(DB_TABLESPACE_NOT_FOUND);
-		}
-
-		fil_node_t*	node;
-
-		count = fil_check_pending_io(sp, &node, count);
-
-		if (count == 0) {
-			*path = mem_strdup(node->name);
-		}
-
-		mutex_exit(&fil_system->mutex);
-
-		if (count > 0) {
-			os_thread_sleep(20000);
-		}
-
-	} while (count > 0);
-
-	ut_ad(sp);
-
-	*space = sp;
-	return(DB_SUCCESS);
-}
-
-/*******************************************************************//**
-Closes a single-table tablespace. The tablespace must be cached in the
-memory cache. Free all pages used by the tablespace.
-@return	DB_SUCCESS or error */
-UNIV_INTERN
-dberr_t
-fil_close_tablespace(
-/*=================*/
-	trx_t*		trx,	/*!< in/out: Transaction covering the close */
-	ulint		id)	/*!< in: space id */
-{
-	char*		path = 0;
-	fil_space_t*	space = 0;
-
-	ut_a(id != TRX_SYS_SPACE);
-
-	dberr_t		err = fil_check_pending_operations(id, &space, &path);
-
-	if (err != DB_SUCCESS) {
-		return(err);
-	}
-
-	ut_a(space);
-	ut_a(path != 0);
-
-	rw_lock_x_lock(&space->latch);
-
-#ifndef UNIV_HOTBACKUP
-	/* Invalidate in the buffer pool all pages belonging to the
-	tablespace. Since we have set space->stop_new_ops = TRUE, readahead
-	or ibuf merge can no longer read more pages of this tablespace to the
-	buffer pool. Thus we can clean the tablespace out of the buffer pool
-	completely and permanently. The flag stop_new_ops also prevents
-	fil_flush() from being applied to this tablespace. */
-
-	buf_LRU_flush_or_remove_pages(id, BUF_REMOVE_FLUSH_WRITE, trx);
-#endif
-	mutex_enter(&fil_system->mutex);
-
-	/* If the free is successful, the X lock will be released before
-	the space memory data structure is freed. */
-
-	if (!fil_space_free(id, TRUE)) {
-		rw_lock_x_unlock(&space->latch);
-		err = DB_TABLESPACE_NOT_FOUND;
-	} else {
-		err = DB_SUCCESS;
-	}
-
-	mutex_exit(&fil_system->mutex);
-
-	/* If it is a delete then also delete any generated files, otherwise
-	when we drop the database the remove directory will fail. */
-
-	char*	cfg_name = fil_make_cfg_name(path);
-
-	os_file_delete_if_exists(innodb_file_data_key, cfg_name);
-
-	mem_free(path);
-	mem_free(cfg_name);
-
-	return(err);
-}
-
-/*******************************************************************//**
-Deletes a single-table tablespace. The tablespace must be cached in the
-memory cache.
-@return	DB_SUCCESS or error */
-UNIV_INTERN
-dberr_t
-fil_delete_tablespace(
-/*==================*/
-	ulint		id,		/*!< in: space id */
-	buf_remove_t	buf_remove)	/*!< in: specify the action to take
-					on the tables pages in the buffer
-					pool */
-{
-	char*		path = 0;
-	fil_space_t*	space = 0;
-
-	ut_a(id != TRX_SYS_SPACE);
-
-	dberr_t		err = fil_check_pending_operations(id, &space, &path);
-
-	if (err != DB_SUCCESS) {
-
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Cannot delete tablespace %lu because it is not "
-			"found in the tablespace memory cache.",
-			(ulong) id);
-
-		return(err);
-	}
-
-	ut_a(space);
-	ut_a(path != 0);
-
-	/* Important: We rely on the data dictionary mutex to ensure
-	that a race is not possible here. It should serialize the tablespace
-	drop/free. We acquire an X latch only to avoid a race condition
-	when accessing the tablespace instance via:
-
-	  fsp_get_available_space_in_free_extents().
-
-	There our main motivation is to reduce the contention on the
-	dictionary mutex. */
-
-	rw_lock_x_lock(&space->latch);
-
-#ifndef UNIV_HOTBACKUP
-	/* IMPORTANT: Because we have set space::stop_new_ops there
-	can't be any new ibuf merges, reads or flushes. We are here
-	because node::n_pending was zero above. However, it is still
-	possible to have pending read and write requests:
-
-	A read request can happen because the reader thread has
-	gone through the ::stop_new_ops check in buf_page_init_for_read()
-	before the flag was set and has not yet incremented ::n_pending
-	when we checked it above.
-
-	A write request can be issued any time because we don't check
-	the ::stop_new_ops flag when queueing a block for write.
-
-	We deal with pending write requests in the following function
-	where we'd minimally evict all dirty pages belonging to this
-	space from the flush_list. Not that if a block is IO-fixed
-	we'll wait for IO to complete.
-
-	To deal with potential read requests by checking the
-	::stop_new_ops flag in fil_io() */
-
-	buf_LRU_flush_or_remove_pages(id, buf_remove, 0);
-
-#endif /* !UNIV_HOTBACKUP */
-
-	/* If it is a delete then also delete any generated files, otherwise
-	when we drop the database the remove directory will fail. */
-	{
-		char*	cfg_name = fil_make_cfg_name(path);
-		os_file_delete_if_exists(innodb_file_data_key, cfg_name);
-		mem_free(cfg_name);
-	}
-
-	/* Delete the link file pointing to the ibd file we are deleting. */
-	if (FSP_FLAGS_HAS_DATA_DIR(space->flags)) {
-		fil_delete_link_file(space->name);
-	}
-
-	mutex_enter(&fil_system->mutex);
-
-	/* Double check the sanity of pending ops after reacquiring
-	the fil_system::mutex. */
-	if (fil_space_get_by_id(id)) {
-		ut_a(space->n_pending_ops == 0);
-		ut_a(UT_LIST_GET_LEN(space->chain) == 1);
-		fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
-		ut_a(node->n_pending == 0);
-	}
-
-	if (!fil_space_free(id, TRUE)) {
-		err = DB_TABLESPACE_NOT_FOUND;
-	}
-
-	mutex_exit(&fil_system->mutex);
-
-	if (err != DB_SUCCESS) {
-		rw_lock_x_unlock(&space->latch);
-	} else if (!os_file_delete(innodb_file_data_key, path)
-		   && !os_file_delete_if_exists(innodb_file_data_key, path)) {
-
-		/* Note: This is because we have removed the
-		tablespace instance from the cache. */
-
-		err = DB_IO_ERROR;
-	}
-
-	if (err == DB_SUCCESS) {
-#ifndef UNIV_HOTBACKUP
-		/* Write a log record about the deletion of the .ibd
-		file, so that mysqlbackup can replay it in the
-		--apply-log phase. We use a dummy mtr and the familiar
-		log write mechanism. */
-		mtr_t		mtr;
-
-		/* When replaying the operation in mysqlbackup, do not try
-		to write any log record */
-		mtr_start(&mtr);
-
-		fil_op_write_log(MLOG_FILE_DELETE, id, 0, 0, path, NULL, &mtr);
-		mtr_commit(&mtr);
-#endif
-		err = DB_SUCCESS;
-	}
-
-	mem_free(path);
-
-	return(err);
-}
-
-/*******************************************************************//**
-Returns TRUE if a single-table tablespace is being deleted.
-@return TRUE if being deleted */
-UNIV_INTERN
-ibool
-fil_tablespace_is_being_deleted(
-/*============================*/
-	ulint		id)	/*!< in: space id */
-{
-	fil_space_t*	space;
-	ibool		is_being_deleted;
-
-	mutex_enter(&fil_system->mutex);
-
-	space = fil_space_get_by_id(id);
-
-	ut_a(space != NULL);
-
-	is_being_deleted = space->stop_new_ops;
-
-	mutex_exit(&fil_system->mutex);
-
-	return(is_being_deleted);
-}
-
-#ifndef UNIV_HOTBACKUP
-/*******************************************************************//**
-Discards a single-table tablespace. The tablespace must be cached in the
-memory cache. Discarding is like deleting a tablespace, but
-
- 1. We do not drop the table from the data dictionary;
-
- 2. We remove all insert buffer entries for the tablespace immediately;
-    in DROP TABLE they are only removed gradually in the background;
-
- 3. Free all the pages in use by the tablespace.
-@return	DB_SUCCESS or error */
-UNIV_INTERN
-dberr_t
-fil_discard_tablespace(
-/*===================*/
-	ulint	id)	/*!< in: space id */
-{
-	dberr_t	err;
-
-	switch (err = fil_delete_tablespace(id, BUF_REMOVE_ALL_NO_WRITE)) {
-	case DB_SUCCESS:
-		break;
-
-	case DB_IO_ERROR:
-		ib_logf(IB_LOG_LEVEL_WARN,
-			"While deleting tablespace %lu in DISCARD TABLESPACE."
-			" File rename/delete failed: %s",
-			(ulong) id, ut_strerr(err));
-		break;
-
-	case DB_TABLESPACE_NOT_FOUND:
-		ib_logf(IB_LOG_LEVEL_WARN,
-			"Cannot delete tablespace %lu in DISCARD "
-			"TABLESPACE. %s",
-			(ulong) id, ut_strerr(err));
-		break;
-
-	default:
-		ut_error;
-	}
-
-	/* Remove all insert buffer entries for the tablespace */
-
-	ibuf_delete_for_discarded_space(id);
-
-	return(err);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/*******************************************************************//**
-Renames the memory cache structures of a single-table tablespace.
-@return	TRUE if success */
-static
-ibool
-fil_rename_tablespace_in_mem(
-/*=========================*/
-	fil_space_t*	space,	/*!< in: tablespace memory object */
-	fil_node_t*	node,	/*!< in: file node of that tablespace */
-	const char*	new_name,	/*!< in: new name */
-	const char*	new_path)	/*!< in: new file path */
-{
-	fil_space_t*	space2;
-	const char*	old_name	= space->name;
-
-	ut_ad(mutex_own(&fil_system->mutex));
-
-	space2 = fil_space_get_by_name(old_name);
-	if (space != space2) {
-		fputs("InnoDB: Error: cannot find ", stderr);
-		ut_print_filename(stderr, old_name);
-		fputs(" in tablespace memory cache\n", stderr);
-
-		return(FALSE);
-	}
-
-	space2 = fil_space_get_by_name(new_name);
-	if (space2 != NULL) {
-		fputs("InnoDB: Error: ", stderr);
-		ut_print_filename(stderr, new_name);
-		fputs(" is already in tablespace memory cache\n", stderr);
-
-		return(FALSE);
-	}
-
-	HASH_DELETE(fil_space_t, name_hash, fil_system->name_hash,
-		    ut_fold_string(space->name), space);
-	mem_free(space->name);
-	mem_free(node->name);
-
-	space->name = mem_strdup(new_name);
-	node->name = mem_strdup(new_path);
-
-	HASH_INSERT(fil_space_t, name_hash, fil_system->name_hash,
-		    ut_fold_string(new_name), space);
-	return(TRUE);
-}
-
-/*******************************************************************//**
-Allocates a file name for a single-table tablespace. The string must be freed
-by caller with mem_free().
-@return	own: file name */
-UNIV_INTERN
-char*
-fil_make_ibd_name(
-/*==============*/
-	const char*	name,		/*!< in: table name or a dir path */
-	bool		is_full_path)	/*!< in: TRUE if it is a dir path */
-{
-	char*	filename;
-	ulint	namelen		= strlen(name);
-	ulint	dirlen		= strlen(fil_path_to_mysql_datadir);
-	ulint	pathlen		= dirlen + namelen + sizeof "/.ibd";
-
-	filename = static_cast<char*>(mem_alloc(pathlen));
-
-	if (is_full_path) {
-		memcpy(filename, name, namelen);
-		memcpy(filename + namelen, ".ibd", sizeof ".ibd");
-	} else {
-		ut_snprintf(filename, pathlen, "%s/%s.ibd",
-			fil_path_to_mysql_datadir, name);
-
-	}
-
-	srv_normalize_path_for_win(filename);
-
-	return(filename);
-}
-
-/*******************************************************************//**
-Allocates a file name for a tablespace ISL file (InnoDB Symbolic Link).
-The string must be freed by caller with mem_free().
-@return	own: file name */
-UNIV_INTERN
-char*
-fil_make_isl_name(
-/*==============*/
-	const char*	name)	/*!< in: table name */
-{
-	char*	filename;
-	ulint	namelen		= strlen(name);
-	ulint	dirlen		= strlen(fil_path_to_mysql_datadir);
-	ulint	pathlen		= dirlen + namelen + sizeof "/.isl";
-
-	filename = static_cast<char*>(mem_alloc(pathlen));
-
-	ut_snprintf(filename, pathlen, "%s/%s.isl",
-		fil_path_to_mysql_datadir, name);
-
-	srv_normalize_path_for_win(filename);
-
-	return(filename);
-}
-
-/** Test if a tablespace file can be renamed to a new filepath by checking
-if that the old filepath exists and the new filepath does not exist.
-@param[in]	space_id	tablespace id
-@param[in]	old_path	old filepath
-@param[in]	new_path	new filepath
-@param[in]	is_discarded	whether the tablespace is discarded
-@return innodb error code */
-dberr_t
-fil_rename_tablespace_check(
-	ulint		space_id,
-	const char*	old_path,
-	const char*	new_path,
-	bool		is_discarded)
-{
-	ulint	exists = false;
-	os_file_type_t	ftype;
-
-	if (!is_discarded
-	    && os_file_status(old_path, &exists, &ftype)
-	    && !exists) {
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Cannot rename '%s' to '%s' for space ID %lu"
-			" because the source file does not exist.",
-			old_path, new_path, space_id);
-
-		return(DB_TABLESPACE_NOT_FOUND);
-	}
-
-	exists = false;
-	if (!os_file_status(new_path, &exists, &ftype) || exists) {
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Cannot rename '%s' to '%s' for space ID %lu"
-			" because the target file exists."
-			" Remove the target file and try again.",
-			old_path, new_path, space_id);
-
-		return(DB_TABLESPACE_EXISTS);
-	}
-
-	return(DB_SUCCESS);
-}
-
-/*******************************************************************//**
-Renames a single-table tablespace. The tablespace must be cached in the
-tablespace memory cache.
-@return	TRUE if success */
-UNIV_INTERN
-ibool
-fil_rename_tablespace(
-/*==================*/
-	const char*	old_name_in,	/*!< in: old table name in the
-					standard databasename/tablename
-					format of InnoDB, or NULL if we
-					do the rename based on the space
-					id only */
-	ulint		id,		/*!< in: space id */
-	const char*	new_name,	/*!< in: new table name in the
-					standard databasename/tablename
-					format of InnoDB */
-	const char*	new_path_in)	/*!< in: new full datafile path
-					if the tablespace is remotely
-					located, or NULL if it is located
-					in the normal data directory. */
-{
-	ibool		success;
-	fil_space_t*	space;
-	fil_node_t*	node;
-	ulint		count		= 0;
-	char*		new_path;
-	char*		old_name;
-	char*		old_path;
-	const char*	not_given	= "(name not specified)";
-
-	ut_a(id != 0);
-
-retry:
-	count++;
-
-	if (!(count % 1000)) {
-		ut_print_timestamp(stderr);
-		fputs("  InnoDB: Warning: problems renaming ", stderr);
-		ut_print_filename(stderr,
-				  old_name_in ? old_name_in : not_given);
-		fputs(" to ", stderr);
-		ut_print_filename(stderr, new_name);
-		fprintf(stderr, ", %lu iterations\n", (ulong) count);
-	}
-
-	mutex_enter(&fil_system->mutex);
-
-	space = fil_space_get_by_id(id);
-
-	DBUG_EXECUTE_IF("fil_rename_tablespace_failure_1", space = NULL; );
-
-	if (space == NULL) {
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Cannot find space id %lu in the tablespace "
-			"memory cache, though the table '%s' in a "
-			"rename operation should have that id.",
-			(ulong) id, old_name_in ? old_name_in : not_given);
-		mutex_exit(&fil_system->mutex);
-
-		return(FALSE);
-	}
-
-	if (count > 25000) {
-		space->stop_ios = FALSE;
-		mutex_exit(&fil_system->mutex);
-
-		return(FALSE);
-	}
-
-	/* We temporarily close the .ibd file because we do not trust that
-	operating systems can rename an open file. For the closing we have to
-	wait until there are no pending i/o's or flushes on the file. */
-
-	space->stop_ios = TRUE;
-
-	/* The following code must change when InnoDB supports
-	multiple datafiles per tablespace. */
-	ut_a(UT_LIST_GET_LEN(space->chain) == 1);
-	node = UT_LIST_GET_FIRST(space->chain);
-
-	if (node->n_pending > 0
-	    || node->n_pending_flushes > 0
-	    || node->being_extended) {
-		/* There are pending i/o's or flushes or the file is
-		currently being extended, sleep for a while and
-		retry */
-
-		mutex_exit(&fil_system->mutex);
-
-		os_thread_sleep(20000);
-
-		goto retry;
-
-	} else if (node->modification_counter > node->flush_counter) {
-		/* Flush the space */
-
-		mutex_exit(&fil_system->mutex);
-
-		os_thread_sleep(20000);
-
-		fil_flush(id);
-
-		goto retry;
-
-	} else if (node->open) {
-		/* Close the file */
-
-		fil_node_close_file(node, fil_system);
-	}
-
-	/* Check that the old name in the space is right */
-
-	if (old_name_in) {
-		old_name = mem_strdup(old_name_in);
-		ut_a(strcmp(space->name, old_name) == 0);
-	} else {
-		old_name = mem_strdup(space->name);
-	}
-	old_path = mem_strdup(node->name);
-
-	/* Rename the tablespace and the node in the memory cache */
-	new_path = new_path_in ? mem_strdup(new_path_in)
-		: fil_make_ibd_name(new_name, false);
-
-	success = fil_rename_tablespace_in_mem(
-		space, node, new_name, new_path);
-
-	if (success) {
-
-		DBUG_EXECUTE_IF("fil_rename_tablespace_failure_2",
-			goto skip_second_rename; );
-
-		success = os_file_rename(
-			innodb_file_data_key, old_path, new_path);
-
-		DBUG_EXECUTE_IF("fil_rename_tablespace_failure_2",
-skip_second_rename:
-			success = FALSE; );
-
-		if (!success) {
-			/* We have to revert the changes we made
-			to the tablespace memory cache */
-
-			ut_a(fil_rename_tablespace_in_mem(
-					space, node, old_name, old_path));
-		}
-	}
-
-	space->stop_ios = FALSE;
-
-	mutex_exit(&fil_system->mutex);
-
-#ifndef UNIV_HOTBACKUP
-	if (success && !recv_recovery_on) {
-		mtr_t		mtr;
-
-		mtr_start(&mtr);
-
-		fil_op_write_log(MLOG_FILE_RENAME, id, 0, 0, old_name, new_name,
-				 &mtr);
-		mtr_commit(&mtr);
-	}
-#endif /* !UNIV_HOTBACKUP */
-
-	mem_free(new_path);
-	mem_free(old_path);
-	mem_free(old_name);
-
-	return(success);
-}
-
-/*******************************************************************//**
-Creates a new InnoDB Symbolic Link (ISL) file.  It is always created
-under the 'datadir' of MySQL. The datadir is the directory of a
-running mysqld program. We can refer to it by simply using the path '.'.
-@return	DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fil_create_link_file(
-/*=================*/
-	const char*	tablename,	/*!< in: tablename */
-	const char*	filepath)	/*!< in: pathname of tablespace */
-{
-	dberr_t		err = DB_SUCCESS;
-	char*		link_filepath;
-	char*		prev_filepath = fil_read_link_file(tablename);
-
-	ut_ad(!srv_read_only_mode);
-
-	if (prev_filepath) {
-		/* Truncate will call this with an existing
-		link file which contains the same filepath. */
-		if (0 == strcmp(prev_filepath, filepath)) {
-			mem_free(prev_filepath);
-			return(DB_SUCCESS);
-		}
-		mem_free(prev_filepath);
-	}
-
-	link_filepath = fil_make_isl_name(tablename);
-
-	/** Check if the file already exists. */
-	FILE*                   file = NULL;
-	ibool                   exists;
-	os_file_type_t          ftype;
-
-	bool success = os_file_status(link_filepath, &exists, &ftype);
-
-	ulint error = 0;
-	if (success && !exists) {
-		file = fopen(link_filepath, "w");
-		if (file == NULL) {
-			/* This call will print its own error message */
-			error = os_file_get_last_error(true);
-		}
-	} else {
-		error = OS_FILE_ALREADY_EXISTS;
-	}
-	if (error != 0) {
-
-		ut_print_timestamp(stderr);
-		fputs("  InnoDB: Cannot create file ", stderr);
-		ut_print_filename(stderr, link_filepath);
-		fputs(".\n", stderr);
-
-		if (error == OS_FILE_ALREADY_EXISTS) {
-			fputs("InnoDB: The link file: ", stderr);
-			ut_print_filename(stderr, filepath);
-			fputs(" already exists.\n", stderr);
-			err = DB_TABLESPACE_EXISTS;
-		} else if (error == OS_FILE_DISK_FULL) {
-			err = DB_OUT_OF_FILE_SPACE;
-		} else if (error == OS_FILE_OPERATION_NOT_SUPPORTED) {
-			err = DB_UNSUPPORTED;
-		} else {
-			err = DB_ERROR;
-		}
-
-		/* file is not open, no need to close it. */
-		mem_free(link_filepath);
-		return(err);
-	}
-
-	ulint rbytes = fwrite(filepath, 1, strlen(filepath), file);
-	if (rbytes != strlen(filepath)) {
-		os_file_get_last_error(true);
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"cannot write link file "
-			 "%s",filepath);
-		err = DB_ERROR;
-	}
-
-	/* Close the file, we only need it at startup */
-	fclose(file);
-
-	mem_free(link_filepath);
-
-	return(err);
-}
-
-/*******************************************************************//**
-Deletes an InnoDB Symbolic Link (ISL) file. */
-UNIV_INTERN
-void
-fil_delete_link_file(
-/*=================*/
-	const char*	tablename)	/*!< in: name of table */
-{
-	char* link_filepath = fil_make_isl_name(tablename);
-
-	os_file_delete_if_exists(innodb_file_data_key, link_filepath);
-
-	mem_free(link_filepath);
-}
-
-/*******************************************************************//**
-Reads an InnoDB Symbolic Link (ISL) file.
-It is always created under the 'datadir' of MySQL.  The name is of the
-form {databasename}/{tablename}. and the isl file is expected to be in a
-'{databasename}' directory called '{tablename}.isl'. The caller must free
-the memory of the null-terminated path returned if it is not null.
-@return	own: filepath found in link file, NULL if not found. */
-UNIV_INTERN
-char*
-fil_read_link_file(
-/*===============*/
-	const char*	name)		/*!< in: tablespace name */
-{
-	char*		filepath = NULL;
-	char*		link_filepath;
-	FILE*		file = NULL;
-
-	/* The .isl file is in the 'normal' tablespace location. */
-	link_filepath = fil_make_isl_name(name);
-
-	file = fopen(link_filepath, "r+b");
-
-	mem_free(link_filepath);
-
-	if (file) {
-		filepath = static_cast<char*>(mem_alloc(OS_FILE_MAX_PATH));
-
-		os_file_read_string(file, filepath, OS_FILE_MAX_PATH);
-		fclose(file);
-
-		if (strlen(filepath)) {
-			/* Trim whitespace from end of filepath */
-			ulint lastch = strlen(filepath) - 1;
-			while (lastch > 4 && filepath[lastch] <= 0x20) {
-				filepath[lastch--] = 0x00;
-			}
-			srv_normalize_path_for_win(filepath);
-		}
-	}
-
-	return(filepath);
-}
-
-/*******************************************************************//**
-Opens a handle to the file linked to in an InnoDB Symbolic Link file.
-@return	TRUE if remote linked tablespace file is found and opened. */
-UNIV_INTERN
-ibool
-fil_open_linked_file(
-/*===============*/
-	const char*	tablename,	/*!< in: database/tablename */
-	char**		remote_filepath,/*!< out: remote filepath */
-	os_file_t*	remote_file,	/*!< out: remote file handle */
-	ulint           atomic_writes)  /*!< in: atomic writes table option
-					value */
-{
-	ibool		success;
-
-	*remote_filepath = fil_read_link_file(tablename);
-	if (*remote_filepath == NULL) {
-		return(FALSE);
-	}
-
-	/* The filepath provided is different from what was
-	found in the link file. */
-	*remote_file = os_file_create_simple_no_error_handling(
-		innodb_file_data_key, *remote_filepath,
-		OS_FILE_OPEN, OS_FILE_READ_ONLY,
-		&success, atomic_writes);
-
-	if (!success) {
-		char*	link_filepath = fil_make_isl_name(tablename);
-
-		/* The following call prints an error message */
-		os_file_get_last_error(true);
-
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"A link file was found named '%s' "
-			"but the linked tablespace '%s' "
-			"could not be opened.",
-			link_filepath, *remote_filepath);
-
-		mem_free(link_filepath);
-		mem_free(*remote_filepath);
-		*remote_filepath = NULL;
-	}
-
-	return(success);
-}
-
-/*******************************************************************//**
-Creates a new single-table tablespace to a database directory of MySQL.
-Database directories are under the 'datadir' of MySQL. The datadir is the
-directory of a running mysqld program. We can refer to it by simply the
-path '.'. Tables created with CREATE TEMPORARY TABLE we place in the temp
-dir of the mysqld server.
-
-@return	DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fil_create_new_single_table_tablespace(
-/*===================================*/
-	ulint		space_id,	/*!< in: space id */
-	const char*	tablename,	/*!< in: the table name in the usual
-					databasename/tablename format
-					of InnoDB */
-	const char*	dir_path,	/*!< in: NULL or a dir path */
-	ulint		flags,		/*!< in: tablespace flags */
-	ulint		flags2,		/*!< in: table flags2 */
-	ulint		size,		/*!< in: the initial size of the
-					tablespace file in pages,
-					must be >= FIL_IBD_FILE_INITIAL_SIZE */
-	fil_encryption_t mode,	/*!< in: encryption mode */
-	ulint		key_id)	/*!< in: encryption key_id */
-{
-	os_file_t	file;
-	ibool		ret;
-	dberr_t		err;
-	byte*		buf2;
-	byte*		page;
-	char*		path;
-	ibool		success;
-	/* TRUE if a table is created with CREATE TEMPORARY TABLE */
-	bool		is_temp = !!(flags2 & DICT_TF2_TEMPORARY);
-	bool		has_data_dir = FSP_FLAGS_HAS_DATA_DIR(flags);
-	ulint		atomic_writes = FSP_FLAGS_GET_ATOMIC_WRITES(flags);
-	fil_space_crypt_t *crypt_data = NULL;
-
-	ut_a(space_id > 0);
-	ut_ad(!srv_read_only_mode);
-	ut_a(space_id < SRV_LOG_SPACE_FIRST_ID);
-	ut_a(size >= FIL_IBD_FILE_INITIAL_SIZE);
-	ut_a(fsp_flags_is_valid(flags));
-
-	if (is_temp) {
-		/* Temporary table filepath */
-		ut_ad(dir_path);
-		path = fil_make_ibd_name(dir_path, true);
-	} else if (has_data_dir) {
-		ut_ad(dir_path);
-		path = os_file_make_remote_pathname(dir_path, tablename, "ibd");
-
-		/* Since this tablespace file will be created in a
-		remote directory, let's create the subdirectories
-		in the path, if they are not there already. */
-		success = os_file_create_subdirs_if_needed(path);
-		if (!success) {
-			err = DB_ERROR;
-			goto error_exit_3;
-		}
-	} else {
-		path = fil_make_ibd_name(tablename, false);
-	}
-
-	file = os_file_create(
-		innodb_file_data_key, path,
-		OS_FILE_CREATE | OS_FILE_ON_ERROR_NO_EXIT,
-		OS_FILE_NORMAL,
-		OS_DATA_FILE,
-		&ret,
-		atomic_writes);
-
-	if (ret == FALSE) {
-		/* The following call will print an error message */
-		ulint	error = os_file_get_last_error(true);
-
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Cannot create file '%s'\n", path);
-
-		if (error == OS_FILE_ALREADY_EXISTS) {
-			ib_logf(IB_LOG_LEVEL_ERROR,
-				"The file '%s' already exists though the "
-				"corresponding table did not exist "
-				"in the InnoDB data dictionary. "
-				"Have you moved InnoDB .ibd files "
-				"around without using the SQL commands "
-				"DISCARD TABLESPACE and IMPORT TABLESPACE, "
-				"or did mysqld crash in the middle of "
-				"CREATE TABLE? "
-				"You can resolve the problem by removing "
-				"the file '%s' under the 'datadir' of MySQL.",
-				path, path);
-
-			err = DB_TABLESPACE_EXISTS;
-			goto error_exit_3;
-		}
-
-		if (error == OS_FILE_OPERATION_NOT_SUPPORTED) {
-			err = DB_UNSUPPORTED;
-			goto error_exit_3;
-		}
-
-		if (error == OS_FILE_DISK_FULL) {
-			err = DB_OUT_OF_FILE_SPACE;
-			goto error_exit_3;
-		}
-
-		err = DB_ERROR;
-		goto error_exit_3;
-	}
-
-	ret = os_file_set_size(path, file, size * UNIV_PAGE_SIZE);
-
-	if (!ret) {
-		err = DB_OUT_OF_FILE_SPACE;
-		goto error_exit_2;
-	}
-
-	/* printf("Creating tablespace %s id %lu\n", path, space_id); */
-
-	/* We have to write the space id to the file immediately and flush the
-	file to disk. This is because in crash recovery we must be aware what
-	tablespaces exist and what are their space id's, so that we can apply
-	the log records to the right file. It may take quite a while until
-	buffer pool flush algorithms write anything to the file and flush it to
-	disk. If we would not write here anything, the file would be filled
-	with zeros from the call of os_file_set_size(), until a buffer pool
-	flush would write to it. */
-
-	buf2 = static_cast<byte*>(ut_malloc(3 * UNIV_PAGE_SIZE));
-	/* Align the memory for file i/o if we might have O_DIRECT set */
-	page = static_cast<byte*>(ut_align(buf2, UNIV_PAGE_SIZE));
-
-	memset(page, '\0', UNIV_PAGE_SIZE);
-
-	/* Add the UNIV_PAGE_SIZE to the table flags and write them to the
-	tablespace header. */
-	flags = fsp_flags_set_page_size(flags, UNIV_PAGE_SIZE);
-	fsp_header_init_fields(page, space_id, flags);
-	mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, space_id);
-	ut_ad(fsp_flags_is_valid(flags));
-
-	if (!(fsp_flags_is_compressed(flags))) {
-		buf_flush_init_for_writing(page, NULL, 0);
-		ret = os_file_write(path, file, page, 0, UNIV_PAGE_SIZE);
-	} else {
-		page_zip_des_t	page_zip;
-		ulint		zip_size;
-
-		zip_size = fsp_flags_get_zip_size(flags);
-
-		page_zip_set_size(&page_zip, zip_size);
-		page_zip.data = page + UNIV_PAGE_SIZE;
-#ifdef UNIV_DEBUG
-		page_zip.m_start =
-#endif /* UNIV_DEBUG */
-			page_zip.m_end = page_zip.m_nonempty =
-			page_zip.n_blobs = 0;
-		buf_flush_init_for_writing(page, &page_zip, 0);
-		ret = os_file_write(path, file, page_zip.data, 0, zip_size);
-	}
-
-	ut_free(buf2);
-
-	if (!ret) {
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Could not write the first page to tablespace "
-			"'%s'", path);
-
-		err = DB_ERROR;
-		goto error_exit_2;
-	}
-
-	ret = os_file_flush(file);
-
-	if (!ret) {
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"File flush of tablespace '%s' failed", path);
-		err = DB_ERROR;
-		goto error_exit_2;
-	}
-
-	if (has_data_dir) {
-		/* Now that the IBD file is created, make the ISL file. */
-		err = fil_create_link_file(tablename, path);
-		if (err != DB_SUCCESS) {
-			goto error_exit_2;
-		}
-	}
-
-	/* Create crypt data if the tablespace is either encrypted or user has
-	requested it to remain unencrypted. */
-	if (mode == FIL_SPACE_ENCRYPTION_ON || mode == FIL_SPACE_ENCRYPTION_OFF ||
-		srv_encrypt_tables) {
-		crypt_data = fil_space_create_crypt_data(mode, key_id);
-	}
-
-	success = fil_space_create(tablename, space_id, flags, FIL_TABLESPACE,
-				   crypt_data);
-
-	if (!success || !fil_node_create(path, size, space_id, FALSE)) {
-		err = DB_ERROR;
-		goto error_exit_1;
-	}
-
-#ifndef UNIV_HOTBACKUP
-	{
-		mtr_t		mtr;
-		ulint		mlog_file_flag = 0;
-
-		if (is_temp) {
-			mlog_file_flag |= MLOG_FILE_FLAG_TEMP;
-		}
-
-		mtr_start(&mtr);
-
-		fil_op_write_log(flags
-				 ? MLOG_FILE_CREATE2
-				 : MLOG_FILE_CREATE,
-				 space_id, mlog_file_flag, flags,
-				 tablename, NULL, &mtr);
-
-		mtr_commit(&mtr);
-	}
-#endif
-	err = DB_SUCCESS;
-
-	/* Error code is set.  Cleanup the various variables used.
-	These labels reflect the order in which variables are assigned or
-	actions are done. */
-error_exit_1:
-	if (has_data_dir && err != DB_SUCCESS) {
-		fil_delete_link_file(tablename);
-	}
-error_exit_2:
-	os_file_close(file);
-	if (err != DB_SUCCESS) {
-		os_file_delete(innodb_file_data_key, path);
-	}
-error_exit_3:
-	mem_free(path);
-
-	return(err);
-}
-
-#ifndef UNIV_HOTBACKUP
-/********************************************************************//**
-Report information about a bad tablespace. */
-static
-void
-fil_report_bad_tablespace(
-/*======================*/
-	const char*	filepath,	/*!< in: filepath */
-	const char*	check_msg,	/*!< in: fil_check_first_page() */
-	ulint		found_id,	/*!< in: found space ID */
-	ulint		found_flags,	/*!< in: found flags */
-	ulint		expected_id,	/*!< in: expected space id */
-	ulint		expected_flags)	/*!< in: expected flags */
-{
-	if (check_msg) {
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Error %s in file '%s',"
-			"tablespace id=%lu, flags=%lu. "
-			"Please refer to "
-			REFMAN "innodb-troubleshooting-datadict.html "
-			"for how to resolve the issue.",
-			check_msg, filepath,
-			(ulong) expected_id, (ulong) expected_flags);
-		return;
-	}
-
-	ib_logf(IB_LOG_LEVEL_ERROR,
-		"In file '%s', tablespace id and flags are %lu and %lu, "
-		"but in the InnoDB data dictionary they are %lu and %lu. "
-		"Have you moved InnoDB .ibd files around without using the "
-		"commands DISCARD TABLESPACE and IMPORT TABLESPACE? "
-		"Please refer to "
-		REFMAN "innodb-troubleshooting-datadict.html "
-		"for how to resolve the issue.",
-		filepath, (ulong) found_id, (ulong) found_flags,
-		(ulong) expected_id, (ulong) expected_flags);
-}
-
-/********************************************************************//**
-Tries to open a single-table tablespace and optionally checks that the
-space id in it is correct. If this does not succeed, print an error message
-to the .err log. This function is used to open a tablespace when we start
-mysqld after the dictionary has been booted, and also in IMPORT TABLESPACE.
-
-NOTE that we assume this operation is used either at the database startup
-or under the protection of the dictionary mutex, so that two users cannot
-race here. This operation does not leave the file associated with the
-tablespace open, but closes it after we have looked at the space id in it.
-
-If the validate boolean is set, we read the first page of the file and
-check that the space id in the file is what we expect. We assume that
-this function runs much faster if no check is made, since accessing the
-file inode probably is much faster (the OS caches them) than accessing
-the first page of the file.  This boolean may be initially FALSE, but if
-a remote tablespace is found it will be changed to true.
-
-If the fix_dict boolean is set, then it is safe to use an internal SQL
-statement to update the dictionary tables if they are incorrect.
-
-@return	DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fil_open_single_table_tablespace(
-/*=============================*/
-	bool		validate,	/*!< in: Do we validate tablespace? */
-	bool		fix_dict,	/*!< in: Can we fix the dictionary? */
-	ulint		id,		/*!< in: space id */
-	ulint		flags,		/*!< in: tablespace flags */
-	const char*	tablename,	/*!< in: table name in the
-					databasename/tablename format */
-	const char*	path_in,	/*!< in: tablespace filepath */
-	dict_table_t*	table)		/*!< in: table */
-{
-	dberr_t		err = DB_SUCCESS;
-	bool		dict_filepath_same_as_default = false;
-	bool		link_file_found = false;
-	bool		link_file_is_bad = false;
-	fsp_open_info	def;
-	fsp_open_info	dict;
-	fsp_open_info	remote;
-	ulint		tablespaces_found = 0;
-	ulint		valid_tablespaces_found = 0;
-	ulint           atomic_writes = 0;
-	fil_space_crypt_t* crypt_data = NULL;
-
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(!fix_dict || rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-	ut_ad(!fix_dict || mutex_own(&(dict_sys->mutex)));
-
-	/* Table flags can be ULINT_UNDEFINED if
-	dict_tf_to_fsp_flags_failure is set. */
-	if (flags != ULINT_UNDEFINED) {
-		if (!fsp_flags_is_valid(flags)) {
-			return(DB_CORRUPTION);
-		}
-	} else {
-		return(DB_CORRUPTION);
-	}
-
-	atomic_writes = fsp_flags_get_atomic_writes(flags);
-
-	/* If the tablespace was relocated, we do not
-	compare the DATA_DIR flag */
-	ulint mod_flags = flags & ~FSP_FLAGS_MASK_DATA_DIR;
-
-	memset(&def, 0, sizeof(def));
-	memset(&dict, 0, sizeof(dict));
-	memset(&remote, 0, sizeof(remote));
-
-	/* Discover the correct filepath.  We will always look for an ibd
-	in the default location. If it is remote, it should not be here. */
-	def.filepath = fil_make_ibd_name(tablename, false);
-
-	/* The path_in was read from SYS_DATAFILES. */
-	if (path_in) {
-		if (strcmp(def.filepath, path_in)) {
-			dict.filepath = mem_strdup(path_in);
-			/* possibility of multiple files. */
-			validate = true;
-		} else {
-			dict_filepath_same_as_default = true;
-		}
-	}
-
-	link_file_found = fil_open_linked_file(
-		tablename, &remote.filepath, &remote.file, atomic_writes);
-	remote.success = link_file_found;
-	if (remote.success) {
-		/* possibility of multiple files. */
-		validate = true;
-		tablespaces_found++;
-
-		/* A link file was found. MySQL does not allow a DATA
-		DIRECTORY to be be the same as the default filepath. */
-		ut_a(strcmp(def.filepath, remote.filepath));
-
-		/* If there was a filepath found in SYS_DATAFILES,
-		we hope it was the same as this remote.filepath found
-		in the ISL file. */
-		if (dict.filepath
-		    && (0 == strcmp(dict.filepath, remote.filepath))) {
-			remote.success = FALSE;
-			os_file_close(remote.file);
-			mem_free(remote.filepath);
-			remote.filepath = NULL;
-			tablespaces_found--;
-		}
-	}
-
-	/* Attempt to open the tablespace at other possible filepaths. */
-	if (dict.filepath) {
-		dict.file = os_file_create_simple_no_error_handling(
-			innodb_file_data_key, dict.filepath, OS_FILE_OPEN,
-			OS_FILE_READ_ONLY, &dict.success, atomic_writes);
-		if (dict.success) {
-			/* possibility of multiple files. */
-			validate = true;
-			tablespaces_found++;
-		}
-	}
-
-	/* Always look for a file at the default location. */
-	ut_a(def.filepath);
-	def.file = os_file_create_simple_no_error_handling(
-		innodb_file_data_key, def.filepath, OS_FILE_OPEN,
-		OS_FILE_READ_ONLY, &def.success, atomic_writes);
-	if (def.success) {
-		tablespaces_found++;
-	}
-
-	/*  We have now checked all possible tablespace locations and
-	have a count of how many we found.  If things are normal, we
-	only found 1. */
-	if (!validate && tablespaces_found == 1) {
-		goto skip_validate;
-	}
-
-	/* Read the first page of the datadir tablespace, if found. */
-	if (def.success) {
-		def.check_msg = fil_read_first_page(
-			def.file, FALSE, &def.flags, &def.id,
-#ifdef UNIV_LOG_ARCHIVE
-			&space_arch_log_no, &space_arch_log_no,
-#endif /* UNIV_LOG_ARCHIVE */
-			&def.lsn, &def.lsn, &def.crypt_data);
-		def.valid = !def.check_msg;
-
-		if (table) {
-			table->crypt_data = def.crypt_data;
-		}
-
-		/* Validate this single-table-tablespace with SYS_TABLES,
-		but do not compare the DATA_DIR flag, in case the
-		tablespace was relocated. */
-
-		ulint newf = def.flags;
-		if (newf != mod_flags) {
-			if (FSP_FLAGS_HAS_DATA_DIR(newf)) {
-				newf = (newf & ~FSP_FLAGS_MASK_DATA_DIR);
-			} else if(FSP_FLAGS_HAS_DATA_DIR_ORACLE(newf)) {
-				newf = (newf & ~FSP_FLAGS_MASK_DATA_DIR_ORACLE);
-			}
-		}
-
-		if (def.valid && def.id == id
-		    && newf == mod_flags) {
-			valid_tablespaces_found++;
-		} else {
-			def.valid = false;
-			/* Do not use this tablespace. */
-			fil_report_bad_tablespace(
-				def.filepath, def.check_msg, def.id,
-				def.flags, id, flags);
-		}
-	}
-
-	/* Read the first page of the remote tablespace */
-	if (remote.success) {
-		remote.check_msg = fil_read_first_page(
-			remote.file, FALSE, &remote.flags, &remote.id,
-#ifdef UNIV_LOG_ARCHIVE
-			&remote.arch_log_no, &remote.arch_log_no,
-#endif /* UNIV_LOG_ARCHIVE */
-			&remote.lsn, &remote.lsn, &remote.crypt_data);
-		remote.valid = !remote.check_msg;
-
-		if (table) {
-			table->crypt_data = remote.crypt_data;
-		}
-
-		/* Validate this single-table-tablespace with SYS_TABLES,
-		but do not compare the DATA_DIR flag, in case the
-		tablespace was relocated. */
-		ulint newf = remote.flags;
-		if (newf != mod_flags) {
-			if (FSP_FLAGS_HAS_DATA_DIR(newf)) {
-				newf = (newf & ~FSP_FLAGS_MASK_DATA_DIR);
-			} else if(FSP_FLAGS_HAS_DATA_DIR_ORACLE(newf)) {
-				newf = (newf & ~FSP_FLAGS_MASK_DATA_DIR_ORACLE);
-			}
-		}
-
-		if (remote.valid && remote.id == id
-		    && newf == mod_flags) {
-			valid_tablespaces_found++;
-		} else {
-			remote.valid = false;
-			/* Do not use this linked tablespace. */
-			fil_report_bad_tablespace(
-				remote.filepath, remote.check_msg, remote.id,
-				remote.flags, id, flags);
-			link_file_is_bad = true;
-		}
-	}
-
-	/* Read the first page of the datadir tablespace, if found. */
-	if (dict.success) {
-		dict.check_msg = fil_read_first_page(
-			dict.file, FALSE, &dict.flags, &dict.id,
-#ifdef UNIV_LOG_ARCHIVE
-			&dict.arch_log_no, &dict.arch_log_no,
-#endif /* UNIV_LOG_ARCHIVE */
-			&dict.lsn, &dict.lsn, &dict.crypt_data);
-		dict.valid = !dict.check_msg;
-
-		if (table) {
-			table->crypt_data = dict.crypt_data;
-		}
-
-		/* Validate this single-table-tablespace with SYS_TABLES,
-		but do not compare the DATA_DIR flag, in case the
-		tablespace was relocated. */
-		ulint newf = dict.flags;
-		if (newf != mod_flags) {
-			if (FSP_FLAGS_HAS_DATA_DIR(newf)) {
-				newf = (newf & ~FSP_FLAGS_MASK_DATA_DIR);
-			} else if(FSP_FLAGS_HAS_DATA_DIR_ORACLE(newf)) {
-				newf = (newf & ~FSP_FLAGS_MASK_DATA_DIR_ORACLE);
-			}
-		}
-
-		if (dict.valid && dict.id == id
-		    && newf == mod_flags) {
-			valid_tablespaces_found++;
-		} else {
-			dict.valid = false;
-			/* Do not use this tablespace. */
-			fil_report_bad_tablespace(
-				dict.filepath, dict.check_msg, dict.id,
-				dict.flags, id, flags);
-		}
-	}
-
-	/* Make sense of these three possible locations.
-	First, bail out if no tablespace files were found. */
-	if (valid_tablespaces_found == 0) {
-		/* The following call prints an error message */
-		os_file_get_last_error(true);
-
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Could not find a valid tablespace file for '%s'. "
-			"See " REFMAN "innodb-troubleshooting-datadict.html "
-			"for how to resolve the issue.",
-			tablename);
-
-		err = DB_CORRUPTION;
-
-		goto cleanup_and_exit;
-	}
-
-	/* Do not open any tablespaces if more than one tablespace with
-	the correct space ID and flags were found. */
-	if (tablespaces_found > 1) {
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"A tablespace for %s has been found in "
-			"multiple places;", tablename);
-		if (def.success) {
-			ib_logf(IB_LOG_LEVEL_ERROR,
-				"Default location; %s, LSN=" LSN_PF
-				", Space ID=%lu, Flags=%lu",
-				def.filepath, def.lsn,
-				(ulong) def.id, (ulong) def.flags);
-		}
-		if (remote.success) {
-			ib_logf(IB_LOG_LEVEL_ERROR,
-				"Remote location; %s, LSN=" LSN_PF
-				", Space ID=%lu, Flags=%lu",
-				remote.filepath, remote.lsn,
-				(ulong) remote.id, (ulong) remote.flags);
-		}
-		if (dict.success) {
-			ib_logf(IB_LOG_LEVEL_ERROR,
-				"Dictionary location; %s, LSN=" LSN_PF
-				", Space ID=%lu, Flags=%lu",
-				dict.filepath, dict.lsn,
-				(ulong) dict.id, (ulong) dict.flags);
-		}
-
-		/* Force-recovery will allow some tablespaces to be
-		skipped by REDO if there was more than one file found.
-		Unlike during the REDO phase of recovery, we now know
-		if the tablespace is valid according to the dictionary,
-		which was not available then. So if we did not force
-		recovery and there is only one good tablespace, ignore
-		any bad tablespaces. */
-		if (valid_tablespaces_found > 1 || srv_force_recovery > 0) {
-			ib_logf(IB_LOG_LEVEL_ERROR,
-				"Will not open the tablespace for '%s'",
-				tablename);
-
-			if (def.success != def.valid
-			    || dict.success != dict.valid
-			    || remote.success != remote.valid) {
-				err = DB_CORRUPTION;
-			} else {
-				err = DB_ERROR;
-			}
-			goto cleanup_and_exit;
-		}
-
-		/* There is only one valid tablespace found and we did
-		not use srv_force_recovery during REDO.  Use this one
-		tablespace and clean up invalid tablespace pointers */
-		if (def.success && !def.valid) {
-			def.success = false;
-			os_file_close(def.file);
-			tablespaces_found--;
-		}
-		if (dict.success && !dict.valid) {
-			dict.success = false;
-			os_file_close(dict.file);
-			/* Leave dict.filepath so that SYS_DATAFILES
-			can be corrected below. */
-			tablespaces_found--;
-		}
-		if (remote.success && !remote.valid) {
-			remote.success = false;
-			os_file_close(remote.file);
-			mem_free(remote.filepath);
-			remote.filepath = NULL;
-			tablespaces_found--;
-		}
-	}
-
-	/* At this point, there should be only one filepath. */
-	ut_a(tablespaces_found == 1);
-	ut_a(valid_tablespaces_found == 1);
-
-	/* Only fix the dictionary at startup when there is only one thread.
-	Calls to dict_load_table() can be done while holding other latches. */
-	if (!fix_dict) {
-		goto skip_validate;
-	}
-
-	/* We may need to change what is stored in SYS_DATAFILES or
-	SYS_TABLESPACES or adjust the link file.
-	Since a failure to update SYS_TABLESPACES or SYS_DATAFILES does
-	not prevent opening and using the single_table_tablespace either
-	this time or the next, we do not check the return code or fail
-	to open the tablespace. But dict_update_filepath() will issue a
-	warning to the log. */
-	if (dict.filepath) {
-		if (remote.success) {
-			dict_update_filepath(id, remote.filepath);
-		} else if (def.success) {
-			dict_update_filepath(id, def.filepath);
-			if (link_file_is_bad) {
-				fil_delete_link_file(tablename);
-			}
-		} else if (!link_file_found || link_file_is_bad) {
-			ut_ad(dict.success);
-			/* Fix the link file if we got our filepath
-			from the dictionary but a link file did not
-			exist or it did not point to a valid file. */
-			fil_delete_link_file(tablename);
-			fil_create_link_file(tablename, dict.filepath);
-		}
-
-	} else if (remote.success && dict_filepath_same_as_default) {
-		dict_update_filepath(id, remote.filepath);
-
-	} else if (remote.success && path_in == NULL) {
-		/* SYS_DATAFILES record for this space ID was not found. */
-		dict_insert_tablespace_and_filepath(
-			id, tablename, remote.filepath, flags);
-	}
-
-skip_validate:
-	if (remote.success)
-		crypt_data = remote.crypt_data;
-	else if (dict.success)
-		crypt_data = dict.crypt_data;
-	else if (def.success)
-		crypt_data = def.crypt_data;
-
-	if (err != DB_SUCCESS) {
-		; // Don't load the tablespace into the cache
-	} else if (!fil_space_create(tablename, id, flags, FIL_TABLESPACE,
-				     crypt_data)) {
-		err = DB_ERROR;
-	} else {
-		/* We do not measure the size of the file, that is why
-		we pass the 0 below */
-
-		if (!fil_node_create(remote.success ? remote.filepath :
-				     dict.success ? dict.filepath :
-				     def.filepath, 0, id, FALSE)) {
-			err = DB_ERROR;
-		}
-	}
-
-cleanup_and_exit:
-	if (remote.success) {
-		os_file_close(remote.file);
-	}
-	if (remote.filepath) {
-		mem_free(remote.filepath);
-	}
-	if (remote.crypt_data && remote.crypt_data != crypt_data) {
-		if (err == DB_SUCCESS) {
-			fil_space_destroy_crypt_data(&remote.crypt_data);
-		}
-	}
-	if (dict.success) {
-		os_file_close(dict.file);
-	}
-	if (dict.filepath) {
-		mem_free(dict.filepath);
-	}
-	if (dict.crypt_data && dict.crypt_data != crypt_data) {
-		fil_space_destroy_crypt_data(&dict.crypt_data);
-	}
-	if (def.success) {
-		os_file_close(def.file);
-	}
-	if (def.crypt_data && def.crypt_data != crypt_data) {
-		if (err == DB_SUCCESS) {
-			fil_space_destroy_crypt_data(&def.crypt_data);
-		}
-	}
-
-	mem_free(def.filepath);
-
-	return(err);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-#ifdef UNIV_HOTBACKUP
-/*******************************************************************//**
-Allocates a file name for an old version of a single-table tablespace.
-The string must be freed by caller with mem_free()!
-@return	own: file name */
-static
-char*
-fil_make_ibbackup_old_name(
-/*=======================*/
-	const char*	name)		/*!< in: original file name */
-{
-	static const char suffix[] = "_ibbackup_old_vers_";
-	char*	path;
-	ulint	len	= strlen(name);
-
-	path = static_cast<char*>(mem_alloc(len + (15 + sizeof suffix)));
-
-	memcpy(path, name, len);
-	memcpy(path + len, suffix, (sizeof suffix) - 1);
-	ut_sprintf_timestamp_without_extra_chars(
-		path + len + ((sizeof suffix) - 1));
-	return(path);
-}
-#endif /* UNIV_HOTBACKUP */
-
-
-/*******************************************************************//**
-Determine the space id of the given file descriptor by reading a few
-pages from the beginning of the .ibd file.
-@return true if space id was successfully identified, or false. */
-static
-bool
-fil_user_tablespace_find_space_id(
-/*==============================*/
-	fsp_open_info*	fsp)	/* in/out: contains file descriptor, which is
-				used as input.  contains space_id, which is
-				the output */
-{
-	bool		st;
-	os_offset_t	file_size;
-
-	file_size = os_file_get_size(fsp->file);
-
-	if (file_size == (os_offset_t) -1) {
-		ib_logf(IB_LOG_LEVEL_ERROR, "Could not get file size: %s",
-			fsp->filepath);
-		return(false);
-	}
-
-	/* Assuming a page size, read the space_id from each page and store it
-	in a map.  Find out which space_id is agreed on by majority of the
-	pages.  Choose that space_id. */
-	for (ulint page_size = UNIV_ZIP_SIZE_MIN;
-	     page_size <= UNIV_PAGE_SIZE_MAX; page_size <<= 1) {
-
-		/* map[space_id] = count of pages */
-		std::map<ulint, ulint> verify;
-
-		ulint page_count = 64;
-		ulint valid_pages = 0;
-
-		/* Adjust the number of pages to analyze based on file size */
-		while ((page_count * page_size) > file_size) {
-			--page_count;
-		}
-
-		ib_logf(IB_LOG_LEVEL_INFO, "Page size:%lu Pages to analyze:"
-			"%lu", page_size, page_count);
-
-		byte* buf = static_cast<byte*>(ut_malloc(2*page_size));
-		byte* page = static_cast<byte*>(ut_align(buf, page_size));
-
-		for (ulint j = 0; j < page_count; ++j) {
-
-			st = os_file_read(fsp->file, page, (j* page_size), page_size);
-
-			if (!st) {
-				ib_logf(IB_LOG_LEVEL_INFO,
-					"READ FAIL: page_no:%lu", j);
-				continue;
-			}
-
-			bool uncompressed_ok = false;
-
-			/* For uncompressed pages, the page size must be equal
-			to UNIV_PAGE_SIZE. */
-			if (page_size == UNIV_PAGE_SIZE) {
-				uncompressed_ok = !buf_page_is_corrupted(
-					false, page, 0);
-			}
-
-			bool compressed_ok = false;
-			if (page_size <= UNIV_PAGE_SIZE_DEF) {
-				compressed_ok = !buf_page_is_corrupted(
-					false, page, page_size);
-			}
-
-			if (uncompressed_ok || compressed_ok) {
-
-				ulint space_id = mach_read_from_4(page
-					+ FIL_PAGE_SPACE_ID);
-
-				if (space_id > 0) {
-					ib_logf(IB_LOG_LEVEL_INFO,
-						"VALID: space:%lu "
-						"page_no:%lu page_size:%lu",
-						space_id, j, page_size);
-					verify[space_id]++;
-					++valid_pages;
-				}
-			}
-		}
-
-		ut_free(buf);
-
-		ib_logf(IB_LOG_LEVEL_INFO, "Page size: %lu, Possible space_id "
-			"count:%lu", page_size, (ulint) verify.size());
-
-		const ulint pages_corrupted = 3;
-		for (ulint missed = 0; missed <= pages_corrupted; ++missed) {
-
-			for (std::map<ulint, ulint>::iterator
-			     m = verify.begin(); m != verify.end(); ++m ) {
-
-				ib_logf(IB_LOG_LEVEL_INFO, "space_id:%lu, "
-					"Number of pages matched: %lu/%lu "
-					"(%lu)", m->first, m->second,
-					valid_pages, page_size);
-
-				if (m->second == (valid_pages - missed)) {
-
-					ib_logf(IB_LOG_LEVEL_INFO,
-						"Chosen space:%lu\n", m->first);
-
-					fsp->id = m->first;
-					return(true);
-				}
-			}
-
-		}
-	}
-
-	return(false);
-}
-
-/*******************************************************************//**
-Finds the given page_no of the given space id from the double write buffer,
-and copies it to the corresponding .ibd file.
-@return true if copy was successful, or false. */
-bool
-fil_user_tablespace_restore_page(
-/*==============================*/
-	fsp_open_info*	fsp,		/* in: contains space id and .ibd
-					file information */
-	ulint		page_no)	/* in: page_no to obtain from double
-					write buffer */
-{
-	bool	err;
-	ulint	flags;
-	ulint	zip_size;
-	ulint	page_size;
-	ulint	buflen;
-	byte*	page;
-
-	ib_logf(IB_LOG_LEVEL_INFO, "Restoring page %lu of tablespace %lu",
-		page_no, fsp->id);
-
-	// find if double write buffer has page_no of given space id
-	page = recv_sys->dblwr.find_page(fsp->id, page_no);
-
-	if (!page) {
-                ib_logf(IB_LOG_LEVEL_WARN, "Doublewrite does not have "
-			"page_no=%lu of space: %lu", page_no, fsp->id);
-		err = false;
-		goto out;
-	}
-
-        flags = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + page);
-	zip_size = fsp_flags_get_zip_size(flags);
-	page_size = fsp_flags_get_page_size(flags);
-
-	ut_ad(page_no == page_get_page_no(page));
-
-	buflen = zip_size ? zip_size: page_size;
-
-	ib_logf(IB_LOG_LEVEL_INFO, "Writing %lu bytes into file: %s",
-		buflen, fsp->filepath);
-
-	err = os_file_write(fsp->filepath, fsp->file, page,
-			    (zip_size ? zip_size : page_size) * page_no,
-		            buflen);
-
-	os_file_flush(fsp->file);
-out:
-	return(err);
-}
-
-/********************************************************************//**
-Opens an .ibd file and adds the associated single-table tablespace to the
-InnoDB fil0fil.cc data structures.
-Set fsp->success to TRUE if tablespace is valid, FALSE if not. */
-static
-void
-fil_validate_single_table_tablespace(
-/*=================================*/
-	const char*	tablename,	/*!< in: database/tablename */
-	fsp_open_info*	fsp)		/*!< in/out: tablespace info */
-{
-	bool restore_attempted = false;
-
-check_first_page:
-	fsp->success = TRUE;
-	fsp->encryption_error = 0;
-	if (const char* check_msg = fil_read_first_page(
-		    fsp->file, FALSE, &fsp->flags, &fsp->id,
-#ifdef UNIV_LOG_ARCHIVE
-		    &fsp->arch_log_no, &fsp->arch_log_no,
-#endif /* UNIV_LOG_ARCHIVE */
-		    &fsp->lsn, &fsp->lsn, &fsp->crypt_data)) {
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"%s in tablespace %s (table %s)",
-			check_msg, fsp->filepath, tablename);
-		fsp->success = FALSE;
-	}
-
-	if (!fsp->success) {
-		if (!restore_attempted) {
-			if (!fil_user_tablespace_find_space_id(fsp)) {
-				return;
-			}
-			restore_attempted = true;
-
-			if (fsp->id > 0
-			    && !fil_user_tablespace_restore_page(fsp, 0)) {
-				return;
-			}
-			goto check_first_page;
-		}
-		return;
-	}
-
-	if (fsp->id == ULINT_UNDEFINED || fsp->id == 0) {
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Tablespace is not sensible;"
-			" Table: %s  Space ID: %lu  Filepath: %s\n",
-		tablename, (ulong) fsp->id, fsp->filepath);
-		fsp->success = FALSE;
-		return;
-	}
-
-	mutex_enter(&fil_system->mutex);
-	fil_space_t* space = fil_space_get_by_id(fsp->id);
-	mutex_exit(&fil_system->mutex);
-	if (space != NULL) {
-		char* prev_filepath = fil_space_get_first_path(fsp->id);
-
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Attempted to open a previously opened tablespace. "
-			"Previous tablespace %s uses space ID: %lu at "
-			"filepath: %s. Cannot open tablespace %s which uses "
-			"space ID: %lu at filepath: %s",
-			space->name, (ulong) space->id, prev_filepath,
-			tablename, (ulong) fsp->id, fsp->filepath);
-
-		mem_free(prev_filepath);
-		fsp->success = FALSE;
-		return;
-	}
-
-	fsp->success = TRUE;
-}
-
-
-/********************************************************************//**
-Opens an .ibd file and adds the associated single-table tablespace to the
-InnoDB fil0fil.cc data structures. */
-static
-void
-fil_load_single_table_tablespace(
-/*=============================*/
-	const char*	dbname,		/*!< in: database name */
-	const char*	filename)	/*!< in: file name (not a path),
-					including the .ibd or .isl extension */
-{
-	char*		tablename;
-	ulint		tablename_len;
-	ulint		dbname_len = strlen(dbname);
-	ulint		filename_len = strlen(filename);
-	fsp_open_info	def;
-	fsp_open_info	remote;
-	os_offset_t	size;
-	fil_space_t*	space;
-
-	memset(&def, 0, sizeof(def));
-	memset(&remote, 0, sizeof(remote));
-
-	/* The caller assured that the extension is ".ibd" or ".isl". */
-	ut_ad(0 == memcmp(filename + filename_len - 4, ".ibd", 4)
-	      || 0 == memcmp(filename + filename_len - 4, ".isl", 4));
-
-	/* Build up the tablename in the standard form database/table. */
-	tablename = static_cast<char*>(
-		mem_alloc(dbname_len + filename_len + 2));
-
-	/* When lower_case_table_names = 2 it is possible that the
-	dbname is in upper case ,but while storing it in fil_space_t
-	we must convert it into lower case */
-	sprintf(tablename, "%s" , dbname);
-	tablename[dbname_len] = '\0';
-
-        if (lower_case_file_system) {
-                dict_casedn_str(tablename);
-        }
-
-	sprintf(tablename+dbname_len,"/%s",filename);
-	tablename_len = strlen(tablename) - strlen(".ibd");
-	tablename[tablename_len] = '\0';
-
-	/* There may be both .ibd and .isl file in the directory.
-	And it is possible that the .isl file refers to a different
-	.ibd file.  If so, we open and compare them the first time
-	one of them is sent to this function.  So if this table has
-	already been loaded, there is nothing to do.*/
-	mutex_enter(&fil_system->mutex);
-	space = fil_space_get_by_name(tablename);
-	if (space) {
-		mem_free(tablename);
-		mutex_exit(&fil_system->mutex);
-		return;
-	}
-	mutex_exit(&fil_system->mutex);
-
-	/* Build up the filepath of the .ibd tablespace in the datadir.
-	This must be freed independent of def.success. */
-	def.filepath = fil_make_ibd_name(tablename, false);
-
-#ifdef __WIN__
-# ifndef UNIV_HOTBACKUP
-	/* If lower_case_table_names is 0 or 2, then MySQL allows database
-	directory names with upper case letters. On Windows, all table and
-	database names in InnoDB are internally always in lower case. Put the
-	file path to lower case, so that we are consistent with InnoDB's
-	internal data dictionary. */
-
-	dict_casedn_str(def.filepath);
-# endif /* !UNIV_HOTBACKUP */
-#endif
-
-	/* Check for a link file which locates a remote tablespace. */
-	remote.success = fil_open_linked_file(
-		tablename, &remote.filepath, &remote.file, FALSE);
-
-	/* Read the first page of the remote tablespace */
-	if (remote.success) {
-		fil_validate_single_table_tablespace(tablename, &remote);
-		if (!remote.success) {
-			os_file_close(remote.file);
-			mem_free(remote.filepath);
-		}
-	}
-
-
-	/* Try to open the tablespace in the datadir. */
-	def.file = os_file_create_simple_no_error_handling(
-		innodb_file_data_key, def.filepath, OS_FILE_OPEN,
-		OS_FILE_READ_WRITE, &def.success, FALSE);
-
-	/* Read the first page of the remote tablespace */
-	if (def.success) {
-		fil_validate_single_table_tablespace(tablename, &def);
-		if (!def.success) {
-			os_file_close(def.file);
-		}
-	}
-
-	if (!def.success && !remote.success) {
-
-		if (def.encryption_error || remote.encryption_error) {
-			fprintf(stderr,
-				"InnoDB: Error: could not open single-table"
-				" tablespace file %s. Encryption error!\n", def.filepath);
-			return;
-		}
-
-		/* The following call prints an error message */
-		os_file_get_last_error(true);
-		fprintf(stderr,
-			"InnoDB: Error: could not open single-table"
-			" tablespace file %s\n", def.filepath);
-
-		if (!strncmp(filename,
-			     tmp_file_prefix, tmp_file_prefix_length)) {
-			/* Ignore errors for #sql tablespaces. */
-			mem_free(tablename);
-			if (remote.filepath) {
-				mem_free(remote.filepath);
-			}
-			if (def.filepath) {
-				mem_free(def.filepath);
-			}
-			return;
-		}
-no_good_file:
-		fprintf(stderr,
-			"InnoDB: We do not continue the crash recovery,"
-			" because the table may become\n"
-			"InnoDB: corrupt if we cannot apply the log"
-			" records in the InnoDB log to it.\n"
-			"InnoDB: To fix the problem and start mysqld:\n"
-			"InnoDB: 1) If there is a permission problem"
-			" in the file and mysqld cannot\n"
-			"InnoDB: open the file, you should"
-			" modify the permissions.\n"
-			"InnoDB: 2) If the table is not needed, or you"
-			" can restore it from a backup,\n"
-			"InnoDB: then you can remove the .ibd file,"
-			" and InnoDB will do a normal\n"
-			"InnoDB: crash recovery and ignore that table.\n"
-			"InnoDB: 3) If the file system or the"
-			" disk is broken, and you cannot remove\n"
-			"InnoDB: the .ibd file, you can set"
-			" innodb_force_recovery > 0 in my.cnf\n"
-			"InnoDB: and force InnoDB to continue crash"
-			" recovery here.\n");
-will_not_choose:
-		mem_free(tablename);
-		if (remote.filepath) {
-			mem_free(remote.filepath);
-		}
-		if (def.filepath) {
-			mem_free(def.filepath);
-		}
-
-		if (srv_force_recovery > 0) {
-			ib_logf(IB_LOG_LEVEL_INFO,
-				"innodb_force_recovery was set to %lu. "
-				"Continuing crash recovery even though we "
-				"cannot access the .ibd file of this table.",
-				srv_force_recovery);
-			return;
-		}
-
-		exit(1);
-	}
-
-	if (def.success && remote.success) {
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Tablespaces for %s have been found in two places;\n"
-			"Location 1: SpaceID: %lu  LSN: %lu  File: %s\n"
-			"Location 2: SpaceID: %lu  LSN: %lu  File: %s\n"
-			"You must delete one of them.",
-			tablename, (ulong) def.id, (ulong) def.lsn,
-			def.filepath, (ulong) remote.id, (ulong) remote.lsn,
-			remote.filepath);
-
-		def.success = FALSE;
-		os_file_close(def.file);
-		os_file_close(remote.file);
-		goto will_not_choose;
-	}
-
-	/* At this point, only one tablespace is open */
-	ut_a(def.success == !remote.success);
-
-	fsp_open_info*	fsp = def.success ? &def : &remote;
-
-	/* Get and test the file size. */
-	size = os_file_get_size(fsp->file);
-
-	if (size == (os_offset_t) -1) {
-		/* The following call prints an error message */
-		os_file_get_last_error(true);
-
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"could not measure the size of single-table "
-			"tablespace file %s", fsp->filepath);
-
-		os_file_close(fsp->file);
-		goto no_good_file;
-	}
-
-	/* Every .ibd file is created >= 4 pages in size. Smaller files
-	cannot be ok. */
-	ulong minimum_size = FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE;
-	if (size < minimum_size) {
-#ifndef UNIV_HOTBACKUP
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"The size of single-table tablespace file %s "
-			"is only " UINT64PF ", should be at least %lu!",
-			fsp->filepath, size, minimum_size);
-		os_file_close(fsp->file);
-		goto no_good_file;
-#else
-		fsp->id = ULINT_UNDEFINED;
-		fsp->flags = 0;
-#endif /* !UNIV_HOTBACKUP */
-	}
-
-#ifdef UNIV_HOTBACKUP
-	if (fsp->id == ULINT_UNDEFINED || fsp->id == 0) {
-		char*	new_path;
-
-		fprintf(stderr,
-			"InnoDB: Renaming tablespace %s of id %lu,\n"
-			"InnoDB: to %s_ibbackup_old_vers_<timestamp>\n"
-			"InnoDB: because its size %" PRId64 " is too small"
-			" (< 4 pages 16 kB each),\n"
-			"InnoDB: or the space id in the file header"
-			" is not sensible.\n"
-			"InnoDB: This can happen in an mysqlbackup run,"
-			" and is not dangerous.\n",
-			fsp->filepath, fsp->id, fsp->filepath, size);
-		os_file_close(fsp->file);
-
-		new_path = fil_make_ibbackup_old_name(fsp->filepath);
-
-		bool	success = os_file_rename(
-			innodb_file_data_key, fsp->filepath, new_path);
-
-		ut_a(success);
-
-		mem_free(new_path);
-
-		goto func_exit_after_close;
-	}
-
-	/* A backup may contain the same space several times, if the space got
-	renamed at a sensitive time. Since it is enough to have one version of
-	the space, we rename the file if a space with the same space id
-	already exists in the tablespace memory cache. We rather rename the
-	file than delete it, because if there is a bug, we do not want to
-	destroy valuable data. */
-
-	mutex_enter(&fil_system->mutex);
-
-	space = fil_space_get_by_id(fsp->id);
-
-	if (space) {
-		char*	new_path;
-
-		fprintf(stderr,
-			"InnoDB: Renaming tablespace %s of id %lu,\n"
-			"InnoDB: to %s_ibbackup_old_vers_<timestamp>\n"
-			"InnoDB: because space %s with the same id\n"
-			"InnoDB: was scanned earlier. This can happen"
-			" if you have renamed tables\n"
-			"InnoDB: during an mysqlbackup run.\n",
-			fsp->filepath, fsp->id, fsp->filepath,
-			space->name);
-		os_file_close(fsp->file);
-
-		new_path = fil_make_ibbackup_old_name(fsp->filepath);
-
-		mutex_exit(&fil_system->mutex);
-
-		bool	success = os_file_rename(
-			innodb_file_data_key, fsp->filepath, new_path);
-
-		ut_a(success);
-
-		mem_free(new_path);
-
-		goto func_exit_after_close;
-	}
-	mutex_exit(&fil_system->mutex);
-#endif /* UNIV_HOTBACKUP */
-	ibool file_space_create_success = fil_space_create(
-		tablename, fsp->id, fsp->flags, FIL_TABLESPACE,
-		fsp->crypt_data);
-
-	if (!file_space_create_success) {
-		if (srv_force_recovery > 0) {
-			fprintf(stderr,
-				"InnoDB: innodb_force_recovery was set"
-				" to %lu. Continuing crash recovery\n"
-				"InnoDB: even though the tablespace"
-				" creation of this table failed.\n",
-				srv_force_recovery);
-			goto func_exit;
-		}
-
-		/* Exit here with a core dump, stack, etc. */
-		ut_a(file_space_create_success);
-	}
-
-	/* We do not use the size information we have about the file, because
-	the rounding formula for extents and pages is somewhat complex; we
-	let fil_node_open() do that task. */
-
-	if (!fil_node_create(fsp->filepath, 0, fsp->id, FALSE)) {
-		ut_error;
-	}
-
-func_exit:
-	os_file_close(fsp->file);
-
-#ifdef UNIV_HOTBACKUP
-func_exit_after_close:
-#else
-	ut_ad(!mutex_own(&fil_system->mutex));
-#endif
-	mem_free(tablename);
-	if (remote.success) {
-		mem_free(remote.filepath);
-	}
-	mem_free(def.filepath);
-}
-
-/***********************************************************************//**
-A fault-tolerant function that tries to read the next file name in the
-directory. We retry 100 times if os_file_readdir_next_file() returns -1. The
-idea is to read as much good data as we can and jump over bad data.
-@return 0 if ok, -1 if error even after the retries, 1 if at the end
-of the directory */
-static
-int
-fil_file_readdir_next_file(
-/*=======================*/
-	dberr_t*	err,	/*!< out: this is set to DB_ERROR if an error
-				was encountered, otherwise not changed */
-	const char*	dirname,/*!< in: directory name or path */
-	os_file_dir_t	dir,	/*!< in: directory stream */
-	os_file_stat_t*	info)	/*!< in/out: buffer where the
-				info is returned */
-{
-	for (ulint i = 0; i < 100; i++) {
-		int	ret = os_file_readdir_next_file(dirname, dir, info);
-
-		if (ret != -1) {
-
-			return(ret);
-		}
-
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"os_file_readdir_next_file() returned -1 in "
-			"directory %s, crash recovery may have failed "
-			"for some .ibd files!", dirname);
-
-		*err = DB_ERROR;
-	}
-
-	return(-1);
-}
-
-/********************************************************************//**
-At the server startup, if we need crash recovery, scans the database
-directories under the MySQL datadir, looking for .ibd files. Those files are
-single-table tablespaces. We need to know the space id in each of them so that
-we know into which file we should look to check the contents of a page stored
-in the doublewrite buffer, also to know where to apply log records where the
-space id is != 0.
-@return	DB_SUCCESS or error number */
-UNIV_INTERN
-dberr_t
-fil_load_single_table_tablespaces(void)
-/*===================================*/
-{
-	int		ret;
-	char*		dbpath		= NULL;
-	ulint		dbpath_len	= 100;
-	os_file_dir_t	dir;
-	os_file_dir_t	dbdir;
-	os_file_stat_t	dbinfo;
-	os_file_stat_t	fileinfo;
-	dberr_t		err		= DB_SUCCESS;
-
-	/* The datadir of MySQL is always the default directory of mysqld */
-
-	dir = os_file_opendir(fil_path_to_mysql_datadir, TRUE);
-
-	if (dir == NULL) {
-
-		return(DB_ERROR);
-	}
-
-	dbpath = static_cast<char*>(mem_alloc(dbpath_len));
-
-	/* Scan all directories under the datadir. They are the database
-	directories of MySQL. */
-
-	ret = fil_file_readdir_next_file(&err, fil_path_to_mysql_datadir, dir,
-					 &dbinfo);
-	while (ret == 0) {
-		ulint len;
-		/* printf("Looking at %s in datadir\n", dbinfo.name); */
-
-		if (dbinfo.type == OS_FILE_TYPE_FILE
-		    || dbinfo.type == OS_FILE_TYPE_UNKNOWN) {
-
-			goto next_datadir_item;
-		}
-
-		/* We found a symlink or a directory; try opening it to see
-		if a symlink is a directory */
-
-		len = strlen(fil_path_to_mysql_datadir)
-			+ strlen (dbinfo.name) + 2;
-		if (len > dbpath_len) {
-			dbpath_len = len;
-
-			if (dbpath) {
-				mem_free(dbpath);
-			}
-
-			dbpath = static_cast<char*>(mem_alloc(dbpath_len));
-		}
-		ut_snprintf(dbpath, dbpath_len,
-			    "%s/%s", fil_path_to_mysql_datadir, dbinfo.name);
-		srv_normalize_path_for_win(dbpath);
-
-		dbdir = os_file_opendir(dbpath, FALSE);
-
-		if (dbdir != NULL) {
-
-			/* We found a database directory; loop through it,
-			looking for possible .ibd files in it */
-
-			ret = fil_file_readdir_next_file(&err, dbpath, dbdir,
-							 &fileinfo);
-			while (ret == 0) {
-
-				if (fileinfo.type == OS_FILE_TYPE_DIR) {
-
-					goto next_file_item;
-				}
-
-				/* We found a symlink or a file */
-				if (strlen(fileinfo.name) > 4
-				    && (0 == strcmp(fileinfo.name
-						   + strlen(fileinfo.name) - 4,
-						   ".ibd")
-					|| 0 == strcmp(fileinfo.name
-						   + strlen(fileinfo.name) - 4,
-						   ".isl"))) {
-					/* The name ends in .ibd or .isl;
-					try opening the file */
-					fil_load_single_table_tablespace(
-						dbinfo.name, fileinfo.name);
-				}
-next_file_item:
-				ret = fil_file_readdir_next_file(&err,
-								 dbpath, dbdir,
-								 &fileinfo);
-			}
-
-			if (0 != os_file_closedir(dbdir)) {
-				fputs("InnoDB: Warning: could not"
-				      " close database directory ", stderr);
-				ut_print_filename(stderr, dbpath);
-				putc('\n', stderr);
-
-				err = DB_ERROR;
-			}
-		}
-
-next_datadir_item:
-		ret = fil_file_readdir_next_file(&err,
-						 fil_path_to_mysql_datadir,
-						 dir, &dbinfo);
-	}
-
-	mem_free(dbpath);
-
-	if (0 != os_file_closedir(dir)) {
-		fprintf(stderr,
-			"InnoDB: Error: could not close MySQL datadir\n");
-
-		return(DB_ERROR);
-	}
-
-	return(err);
-}
-
-/*******************************************************************//**
-Returns TRUE if a single-table tablespace does not exist in the memory cache,
-or is being deleted there.
-@return	TRUE if does not exist or is being deleted */
-UNIV_INTERN
-ibool
-fil_tablespace_deleted_or_being_deleted_in_mem(
-/*===========================================*/
-	ulint		id,	/*!< in: space id */
-	ib_int64_t	version)/*!< in: tablespace_version should be this; if
-				you pass -1 as the value of this, then this
-				parameter is ignored */
-{
-	fil_space_t*	space;
-
-	ut_ad(fil_system);
-
-	mutex_enter(&fil_system->mutex);
-
-	space = fil_space_get_by_id(id);
-
-	if (space == NULL || space->stop_new_ops) {
-		mutex_exit(&fil_system->mutex);
-
-		return(TRUE);
-	}
-
-	if (version != ((ib_int64_t)-1)
-	    && space->tablespace_version != version) {
-		mutex_exit(&fil_system->mutex);
-
-		return(TRUE);
-	}
-
-	mutex_exit(&fil_system->mutex);
-
-	return(FALSE);
-}
-
-/*******************************************************************//**
-Returns TRUE if a single-table tablespace exists in the memory cache.
-@return	TRUE if exists */
-UNIV_INTERN
-ibool
-fil_tablespace_exists_in_mem(
-/*=========================*/
-	ulint	id)	/*!< in: space id */
-{
-	fil_space_t*	space;
-
-	ut_ad(fil_system);
-
-	mutex_enter(&fil_system->mutex);
-
-	space = fil_space_get_by_id(id);
-
-	mutex_exit(&fil_system->mutex);
-
-	return(space != NULL);
-}
-
-/*******************************************************************//**
-Report that a tablespace for a table was not found. */
-static
-void
-fil_report_missing_tablespace(
-/*===========================*/
-	const char*	name,			/*!< in: table name */
-	ulint		space_id)		/*!< in: table's space id */
-{
-	char index_name[MAX_FULL_NAME_LEN + 1];
-
-	innobase_format_name(index_name, sizeof(index_name), name, TRUE);
-
-	ib_logf(IB_LOG_LEVEL_ERROR,
-		"Table %s in the InnoDB data dictionary has tablespace id %lu, "
-		"but tablespace with that id or name does not exist. Have "
-		"you deleted or moved .ibd files? This may also be a table "
-		"created with CREATE TEMPORARY TABLE whose .ibd and .frm "
-		"files MySQL automatically removed, but the table still "
-		"exists in the InnoDB internal data dictionary.",
-		name, space_id);
-}
-
-/*******************************************************************//**
-Returns TRUE if a matching tablespace exists in the InnoDB tablespace memory
-cache. Note that if we have not done a crash recovery at the database startup,
-there may be many tablespaces which are not yet in the memory cache.
-@return	TRUE if a matching tablespace exists in the memory cache */
-UNIV_INTERN
-ibool
-fil_space_for_table_exists_in_mem(
-/*==============================*/
-	ulint		id,		/*!< in: space id */
-	const char*	name,		/*!< in: table name used in
-					fil_space_create().  Either the
-					standard 'dbname/tablename' format
-					or table->dir_path_of_temp_table */
-	ibool		mark_space,	/*!< in: in crash recovery, at database
-					startup we mark all spaces which have
-					an associated table in the InnoDB
-					data dictionary, so that
-					we can print a warning about orphaned
-					tablespaces */
-	ibool		print_error_if_does_not_exist,
-					/*!< in: print detailed error
-					information to the .err log if a
-					matching tablespace is not found from
-					memory */
-	bool		adjust_space,	/*!< in: whether to adjust space id
-					when find table space mismatch */
-	mem_heap_t*	heap,		/*!< in: heap memory */
-	table_id_t	table_id)	/*!< in: table id */
-{
-	fil_space_t*	fnamespace;
-	fil_space_t*	space;
-
-	ut_ad(fil_system);
-
-	mutex_enter(&fil_system->mutex);
-
-	/* Look if there is a space with the same id */
-
-	space = fil_space_get_by_id(id);
-
-	/* Look if there is a space with the same name; the name is the
-	directory path from the datadir to the file */
-
-	fnamespace = fil_space_get_by_name(name);
-	if (space && space == fnamespace) {
-		/* Found */
-
-		if (mark_space) {
-			space->mark = TRUE;
-		}
-
-		mutex_exit(&fil_system->mutex);
-
-		return(TRUE);
-	}
-
-	/* Info from "fnamespace" comes from the ibd file itself, it can
-	be different from data obtained from System tables since it is
-	not transactional. If adjust_space is set, and the mismatching
-	space are between a user table and its temp table, we shall
-	adjust the ibd file name according to system table info */
-	if (adjust_space
-	    && space != NULL
-	    && row_is_mysql_tmp_table_name(space->name)
-	    && !row_is_mysql_tmp_table_name(name)) {
-
-		mutex_exit(&fil_system->mutex);
-
-		DBUG_EXECUTE_IF("ib_crash_before_adjust_fil_space",
-				DBUG_SUICIDE(););
-
-		if (fnamespace) {
-			char*	tmp_name;
-
-			tmp_name = dict_mem_create_temporary_tablename(
-				heap, name, table_id);
-
-			fil_rename_tablespace(fnamespace->name, fnamespace->id,
-					      tmp_name, NULL);
-		}
-
-		DBUG_EXECUTE_IF("ib_crash_after_adjust_one_fil_space",
-				DBUG_SUICIDE(););
-
-		fil_rename_tablespace(space->name, id, name, NULL);
-
-		DBUG_EXECUTE_IF("ib_crash_after_adjust_fil_space",
-				DBUG_SUICIDE(););
-
-		mutex_enter(&fil_system->mutex);
-		fnamespace = fil_space_get_by_name(name);
-		ut_ad(space == fnamespace);
-		mutex_exit(&fil_system->mutex);
-
-		return(TRUE);
-	}
-
-	if (!print_error_if_does_not_exist) {
-
-		mutex_exit(&fil_system->mutex);
-
-		return(FALSE);
-	}
-
-	if (space == NULL) {
-		if (fnamespace == NULL) {
-			if (print_error_if_does_not_exist) {
-				fil_report_missing_tablespace(name, id);
-			}
-		} else {
-			ut_print_timestamp(stderr);
-			fputs("  InnoDB: Error: table ", stderr);
-			ut_print_filename(stderr, name);
-			fprintf(stderr, "\n"
-				"InnoDB: in InnoDB data dictionary has"
-				" tablespace id %lu,\n"
-				"InnoDB: but a tablespace with that id"
-				" does not exist. There is\n"
-				"InnoDB: a tablespace of name %s and id %lu,"
-				" though. Have\n"
-				"InnoDB: you deleted or moved .ibd files?\n",
-				(ulong) id, fnamespace->name,
-				(ulong) fnamespace->id);
-		}
-error_exit:
-		fputs("InnoDB: Please refer to\n"
-		      "InnoDB: " REFMAN "innodb-troubleshooting-datadict.html\n"
-		      "InnoDB: for how to resolve the issue.\n", stderr);
-
-		mutex_exit(&fil_system->mutex);
-
-		return(FALSE);
-	}
-
-	if (0 != strcmp(space->name, name)) {
-		ut_print_timestamp(stderr);
-		fputs("  InnoDB: Error: table ", stderr);
-		ut_print_filename(stderr, name);
-		fprintf(stderr, "\n"
-			"InnoDB: in InnoDB data dictionary has"
-			" tablespace id %lu,\n"
-			"InnoDB: but the tablespace with that id"
-			" has name %s.\n"
-			"InnoDB: Have you deleted or moved .ibd files?\n",
-			(ulong) id, space->name);
-
-		if (fnamespace != NULL) {
-			fputs("InnoDB: There is a tablespace"
-			      " with the right name\n"
-			      "InnoDB: ", stderr);
-			ut_print_filename(stderr, fnamespace->name);
-			fprintf(stderr, ", but its id is %lu.\n",
-				(ulong) fnamespace->id);
-		}
-
-		goto error_exit;
-	}
-
-	mutex_exit(&fil_system->mutex);
-
-	return(FALSE);
-}
-
-/*******************************************************************//**
-Checks if a single-table tablespace for a given table name exists in the
-tablespace memory cache.
-@return	space id, ULINT_UNDEFINED if not found */
-UNIV_INTERN
-ulint
-fil_get_space_id_for_table(
-/*=======================*/
-	const char*	tablename)	/*!< in: table name in the standard
-				'databasename/tablename' format */
-{
-	fil_space_t*	fnamespace;
-	ulint		id		= ULINT_UNDEFINED;
-
-	ut_ad(fil_system);
-
-	mutex_enter(&fil_system->mutex);
-
-	/* Look if there is a space with the same name. */
-
-	fnamespace = fil_space_get_by_name(tablename);
-
-	if (fnamespace) {
-		id = fnamespace->id;
-	}
-
-	mutex_exit(&fil_system->mutex);
-
-	return(id);
-}
-
-/**********************************************************************//**
-Tries to extend a data file so that it would accommodate the number of pages
-given. The tablespace must be cached in the memory cache. If the space is big
-enough already, does nothing.
-@return	TRUE if success */
-UNIV_INTERN
-ibool
-fil_extend_space_to_desired_size(
-/*=============================*/
-	ulint*	actual_size,	/*!< out: size of the space after extension;
-				if we ran out of disk space this may be lower
-				than the desired size */
-	ulint	space_id,	/*!< in: space id */
-	ulint	size_after_extend)/*!< in: desired size in pages after the
-				extension; if the current space size is bigger
-				than this already, the function does nothing */
-{
-	fil_node_t*	node;
-	fil_space_t*	space;
-	byte*		buf2;
-	byte*		buf;
-	ulint		buf_size;
-	ulint		start_page_no;
-	ulint		file_start_page_no;
-	ulint		page_size;
-	ulint		pages_added;
-	ibool		success;
-
-	ut_ad(!srv_read_only_mode);
-
-retry:
-	pages_added = 0;
-	success = TRUE;
-
-	fil_mutex_enter_and_prepare_for_io(space_id);
-
-	space = fil_space_get_by_id(space_id);
-	ut_a(space);
-
-	if (space->size >= size_after_extend) {
-		/* Space already big enough */
-
-		*actual_size = space->size;
-
-		mutex_exit(&fil_system->mutex);
-
-		return(TRUE);
-	}
-
-	page_size = fsp_flags_get_zip_size(space->flags);
-
-	if (!page_size) {
-		page_size = UNIV_PAGE_SIZE;
-	}
-
-	node = UT_LIST_GET_LAST(space->chain);
-
-	if (!node->being_extended) {
-		/* Mark this node as undergoing extension. This flag
-		is used by other threads to wait for the extension
-		opereation to finish. */
-		node->being_extended = TRUE;
-	} else {
-		/* Another thread is currently extending the file. Wait
-		for it to finish.
-		It'd have been better to use event driven mechanism but
-		the entire module is peppered with polling stuff. */
-		mutex_exit(&fil_system->mutex);
-		os_thread_sleep(100000);
-		goto retry;
-	}
-
-	if (!fil_node_prepare_for_io(node, fil_system, space)) {
-		/* The tablespace data file, such as .ibd file, is missing */
-		node->being_extended = false;
-		mutex_exit(&fil_system->mutex);
-
-		return(false);
-	}
-
-	/* At this point it is safe to release fil_system mutex. No
-	other thread can rename, delete or close the file because
-	we have set the node->being_extended flag. */
-	mutex_exit(&fil_system->mutex);
-
-	start_page_no = space->size;
-	file_start_page_no = space->size - node->size;
-
-	/* Determine correct file block size */
-	if (node->file_block_size == 0) {
-		node->file_block_size = os_file_get_block_size(node->handle, node->name);
-		space->file_block_size = node->file_block_size;
-	}
-
-#ifdef HAVE_POSIX_FALLOCATE
-	if (srv_use_posix_fallocate) {
-		os_offset_t	start_offset = start_page_no * page_size;
-		os_offset_t	n_pages = (size_after_extend - start_page_no);
-		os_offset_t	len = n_pages * page_size;
-
-		if (posix_fallocate(node->handle, start_offset, len) == -1) {
-			ib_logf(IB_LOG_LEVEL_ERROR, "preallocating file "
-				"space for file \'%s\' failed.  Current size "
-				INT64PF ", desired size " INT64PF "\n",
-				node->name, start_offset, len+start_offset);
-			os_file_handle_error_no_exit(node->name, "posix_fallocate", FALSE, __FILE__, __LINE__);
-			success = FALSE;
-		} else {
-			success = TRUE;
-		}
-
-		DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28",
-			success = FALSE; errno = 28; os_has_said_disk_full = TRUE;);
-
-		mutex_enter(&fil_system->mutex);
-
-		if (success) {
-			node->size += (size_after_extend - start_page_no);
-			space->size += (size_after_extend - start_page_no);
-
-			os_has_said_disk_full = FALSE;
-		}
-
-		/* If posix_fallocate was used to extent the file space
-		we need to complete the io. Because no actual writes were
-		dispatched read operation is enough here. Without this
-		there will be assertion at shutdown indicating that
-		all IO is not completed. */
-		fil_node_complete_io(node, fil_system, OS_FILE_READ);
-		goto file_extended;
-	}
-#endif
-
-	/* Extend at most 64 pages at a time */
-	buf_size = ut_min(64, size_after_extend - start_page_no) * page_size;
-	buf2 = static_cast<byte*>(mem_alloc(buf_size + page_size));
-	buf = static_cast<byte*>(ut_align(buf2, page_size));
-
-	memset(buf, 0, buf_size);
-
-	while (start_page_no < size_after_extend) {
-		ulint		n_pages
-			= ut_min(buf_size / page_size,
-				 size_after_extend - start_page_no);
-
-		os_offset_t	offset
-			= ((os_offset_t) (start_page_no - file_start_page_no))
-			* page_size;
-
-		const char* name = node->name == NULL ? space->name : node->name;
-
-#ifdef UNIV_HOTBACKUP
-		success = os_file_write(name, node->handle, buf,
-					offset, page_size * n_pages);
-#else
-		success = os_aio(OS_FILE_WRITE, 0, OS_AIO_SYNC,
-				 name, node->handle, buf,
-			         offset, page_size * n_pages, page_size,
-			         node, NULL, 0);
-#endif /* UNIV_HOTBACKUP */
-
-
-		DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28",
-			success = FALSE; errno = 28; os_has_said_disk_full = TRUE;);
-
-		if (success) {
-			os_has_said_disk_full = FALSE;
-		} else {
-			/* Let us measure the size of the file to determine
-			how much we were able to extend it */
-			os_offset_t	size;
-
-			size = os_file_get_size(node->handle);
-			ut_a(size != (os_offset_t) -1);
-
-			n_pages = ((ulint) (size / page_size))
-				- node->size - pages_added;
-
-			pages_added += n_pages;
-			break;
-		}
-
-		start_page_no += n_pages;
-		pages_added += n_pages;
-	}
-
-	mem_free(buf2);
-
-	mutex_enter(&fil_system->mutex);
-
-	ut_a(node->being_extended);
-
-	space->size += pages_added;
-	node->size += pages_added;
-
-	fil_node_complete_io(node, fil_system, OS_FILE_WRITE);
-
-	/* At this point file has been extended */
-file_extended:
-
-	node->being_extended = FALSE;
-	*actual_size = space->size;
-
-#ifndef UNIV_HOTBACKUP
-	if (space_id == 0) {
-		ulint pages_per_mb = (1024 * 1024) / page_size;
-
-		/* Keep the last data file size info up to date, rounded to
-		full megabytes */
-
-		srv_data_file_sizes[srv_n_data_files - 1]
-			= (node->size / pages_per_mb) * pages_per_mb;
-	}
-#endif /* !UNIV_HOTBACKUP */
-
-	/*
-	printf("Extended %s to %lu, actual size %lu pages\n", space->name,
-	size_after_extend, *actual_size); */
-	mutex_exit(&fil_system->mutex);
-
-	fil_flush(space_id);
-
-	return(success);
-}
-
-#ifdef UNIV_HOTBACKUP
-/********************************************************************//**
-Extends all tablespaces to the size stored in the space header. During the
-mysqlbackup --apply-log phase we extended the spaces on-demand so that log
-records could be applied, but that may have left spaces still too small
-compared to the size stored in the space header. */
-UNIV_INTERN
-void
-fil_extend_tablespaces_to_stored_len(void)
-/*======================================*/
-{
-	fil_space_t*	space;
-	byte*		buf;
-	ulint		actual_size;
-	ulint		size_in_header;
-	dberr_t		error;
-	ibool		success;
-
-	buf = mem_alloc(UNIV_PAGE_SIZE);
-
-	mutex_enter(&fil_system->mutex);
-
-	space = UT_LIST_GET_FIRST(fil_system->space_list);
-
-	while (space) {
-		ut_a(space->purpose == FIL_TABLESPACE);
-
-		mutex_exit(&fil_system->mutex); /* no need to protect with a
-					      mutex, because this is a
-					      single-threaded operation */
-		error = fil_read(TRUE, space->id,
-				 fsp_flags_get_zip_size(space->flags),
-				 0, 0, UNIV_PAGE_SIZE, buf, NULL, 0);
-		ut_a(error == DB_SUCCESS);
-
-		size_in_header = fsp_get_size_low(buf);
-
-		success = fil_extend_space_to_desired_size(
-			&actual_size, space->id, size_in_header);
-		if (!success) {
-			fprintf(stderr,
-				"InnoDB: Error: could not extend the"
-				" tablespace of %s\n"
-				"InnoDB: to the size stored in header,"
-				" %lu pages;\n"
-				"InnoDB: size after extension %lu pages\n"
-				"InnoDB: Check that you have free disk space"
-				" and retry!\n",
-				space->name, size_in_header, actual_size);
-			ut_a(success);
-		}
-
-		mutex_enter(&fil_system->mutex);
-
-		space = UT_LIST_GET_NEXT(space_list, space);
-	}
-
-	mutex_exit(&fil_system->mutex);
-
-	mem_free(buf);
-}
-#endif
-
-/*========== RESERVE FREE EXTENTS (for a B-tree split, for example) ===*/
-
-/*******************************************************************//**
-Tries to reserve free extents in a file space.
-@return	TRUE if succeed */
-UNIV_INTERN
-ibool
-fil_space_reserve_free_extents(
-/*===========================*/
-	ulint	id,		/*!< in: space id */
-	ulint	n_free_now,	/*!< in: number of free extents now */
-	ulint	n_to_reserve)	/*!< in: how many one wants to reserve */
-{
-	fil_space_t*	space;
-	ibool		success;
-
-	ut_ad(fil_system);
-
-	mutex_enter(&fil_system->mutex);
-
-	space = fil_space_get_by_id(id);
-
-	ut_a(space);
-
-	if (space->n_reserved_extents + n_to_reserve > n_free_now) {
-		success = FALSE;
-	} else {
-		space->n_reserved_extents += n_to_reserve;
-		success = TRUE;
-	}
-
-	mutex_exit(&fil_system->mutex);
-
-	return(success);
-}
-
-/*******************************************************************//**
-Releases free extents in a file space. */
-UNIV_INTERN
-void
-fil_space_release_free_extents(
-/*===========================*/
-	ulint	id,		/*!< in: space id */
-	ulint	n_reserved)	/*!< in: how many one reserved */
-{
-	fil_space_t*	space;
-
-	ut_ad(fil_system);
-
-	mutex_enter(&fil_system->mutex);
-
-	space = fil_space_get_by_id(id);
-
-	ut_a(space);
-	ut_a(space->n_reserved_extents >= n_reserved);
-
-	space->n_reserved_extents -= n_reserved;
-
-	mutex_exit(&fil_system->mutex);
-}
-
-/*******************************************************************//**
-Gets the number of reserved extents. If the database is silent, this number
-should be zero. */
-UNIV_INTERN
-ulint
-fil_space_get_n_reserved_extents(
-/*=============================*/
-	ulint	id)		/*!< in: space id */
-{
-	fil_space_t*	space;
-	ulint		n;
-
-	ut_ad(fil_system);
-
-	mutex_enter(&fil_system->mutex);
-
-	space = fil_space_get_by_id(id);
-
-	ut_a(space);
-
-	n = space->n_reserved_extents;
-
-	mutex_exit(&fil_system->mutex);
-
-	return(n);
-}
-
-/*============================ FILE I/O ================================*/
-
-/********************************************************************//**
-NOTE: you must call fil_mutex_enter_and_prepare_for_io() first!
-
-Prepares a file node for i/o. Opens the file if it is closed. Updates the
-pending i/o's field in the node and the system appropriately. Takes the node
-off the LRU list if it is in the LRU list. The caller must hold the fil_sys
-mutex.
-@return false if the file can't be opened, otherwise true */
-static
-bool
-fil_node_prepare_for_io(
-/*====================*/
-	fil_node_t*	node,	/*!< in: file node */
-	fil_system_t*	system,	/*!< in: tablespace memory cache */
-	fil_space_t*	space)	/*!< in: space */
-{
-	ut_ad(node && system && space);
-	ut_ad(mutex_own(&(system->mutex)));
-
-	if (system->n_open > system->max_n_open + 5) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			"  InnoDB: Warning: open files %lu"
-			" exceeds the limit %lu\n",
-			(ulong) system->n_open,
-			(ulong) system->max_n_open);
-	}
-
-	if (node->open == FALSE) {
-		/* File is closed: open it */
-		ut_a(node->n_pending == 0);
-
-		if (!fil_node_open_file(node, system, space)) {
-			return(false);
-		}
-	}
-
-	if (node->n_pending == 0 && fil_space_belongs_in_lru(space)) {
-		/* The node is in the LRU list, remove it */
-
-		ut_a(UT_LIST_GET_LEN(system->LRU) > 0);
-
-		UT_LIST_REMOVE(LRU, system->LRU, node);
-	}
-
-	node->n_pending++;
-
-	return(true);
-}
-
-/********************************************************************//**
-Updates the data structures when an i/o operation finishes. Updates the
-pending i/o's field in the node appropriately. */
-static
-void
-fil_node_complete_io(
-/*=================*/
-	fil_node_t*	node,	/*!< in: file node */
-	fil_system_t*	system,	/*!< in: tablespace memory cache */
-	ulint		type)	/*!< in: OS_FILE_WRITE or OS_FILE_READ; marks
-				the node as modified if
-				type == OS_FILE_WRITE */
-{
-	ut_ad(node);
-	ut_ad(system);
-	ut_ad(mutex_own(&(system->mutex)));
-
-	ut_a(node->n_pending > 0);
-
-	node->n_pending--;
-
-	if (type == OS_FILE_WRITE) {
-		ut_ad(!srv_read_only_mode);
-		system->modification_counter++;
-		node->modification_counter = system->modification_counter;
-
-		if (fil_buffering_disabled(node->space)) {
-
-			/* We don't need to keep track of unflushed
-			changes as user has explicitly disabled
-			buffering. */
-			ut_ad(!node->space->is_in_unflushed_spaces);
-			node->flush_counter = node->modification_counter;
-
-		} else if (!node->space->is_in_unflushed_spaces) {
-
-			node->space->is_in_unflushed_spaces = true;
-			UT_LIST_ADD_FIRST(unflushed_spaces,
-					  system->unflushed_spaces,
-					  node->space);
-		}
-	}
-
-	if (node->n_pending == 0 && fil_space_belongs_in_lru(node->space)) {
-
-		/* The node must be put back to the LRU list */
-		UT_LIST_ADD_FIRST(LRU, system->LRU, node);
-	}
-}
-
-/********************************************************************//**
-Report information about an invalid page access. */
-static
-void
-fil_report_invalid_page_access(
-/*===========================*/
-	ulint		block_offset,	/*!< in: block offset */
-	ulint		space_id,	/*!< in: space id */
-	const char*	space_name,	/*!< in: space name */
-	ulint		byte_offset,	/*!< in: byte offset */
-	ulint		len,		/*!< in: I/O length */
-	ulint		type)		/*!< in: I/O type */
-{
-	fprintf(stderr,
-		"InnoDB: Error: trying to access page number %lu"
-		" in space %lu,\n"
-		"InnoDB: space name %s,\n"
-		"InnoDB: which is outside the tablespace bounds.\n"
-		"InnoDB: Byte offset %lu, len %lu, i/o type %lu.\n"
-		"InnoDB: If you get this error at mysqld startup,"
-		" please check that\n"
-		"InnoDB: your my.cnf matches the ibdata files"
-		" that you have in the\n"
-		"InnoDB: MySQL server.\n",
-		(ulong) block_offset, (ulong) space_id, space_name,
-		(ulong) byte_offset, (ulong) len, (ulong) type);
-}
-
-/********************************************************************//**
-Find correct node from file space
-@return node */
-static
-fil_node_t*
-fil_space_get_node(
-	fil_space_t*	space,		/*!< in: file spage */
-	ulint 		space_id,	/*!< in: space id   */
-	ulint* 		block_offset,	/*!< in/out: offset in number of blocks */
-	ulint 		byte_offset,	/*!< in: remainder of offset in bytes; in
-					aio this must be divisible by the OS block
-					size */
-	ulint 		len)		/*!< in: how many bytes to read or write; this
-					must not cross a file boundary; in aio this
-					must be a block size multiple */
-{
-	fil_node_t*	node;
-	ut_ad(mutex_own(&fil_system->mutex));
-
-	node = UT_LIST_GET_FIRST(space->chain);
-
-	for (;;) {
-		if (node == NULL) {
-			return(NULL);
-		} else if (fil_is_user_tablespace_id(space->id)
-			   && node->size == 0) {
-
-			/* We do not know the size of a single-table tablespace
-			before we open the file */
-			break;
-		} else if (node->size > *block_offset) {
-			/* Found! */
-			break;
-		} else {
-			*block_offset -= node->size;
-			node = UT_LIST_GET_NEXT(chain, node);
-		}
-	}
-
-	return (node);
-}
-/********************************************************************//**
-Return block size of node in file space
-@return file block size */
-UNIV_INTERN
-ulint
-fil_space_get_block_size(
-/*=====================*/
-	ulint	space_id,
-	ulint	block_offset,
-	ulint	len)
-{
-	ulint block_size = 512;
-	ut_ad(!mutex_own(&fil_system->mutex));
-
-	mutex_enter(&fil_system->mutex);
-	fil_space_t* space = fil_space_get_space(space_id);
-
-	if (space) {
-		fil_node_t* node = fil_space_get_node(space, space_id, &block_offset, 0, len);
-
-		if (node) {
-			block_size = node->file_block_size;
-		}
-	}
-	mutex_exit(&fil_system->mutex);
-
-	return block_size;
-}
-
-/********************************************************************//**
-Reads or writes data. This operation is asynchronous (aio).
-@return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
-i/o on a tablespace which does not exist */
-UNIV_INTERN
-dberr_t
-fil_io(
-/*===*/
-	ulint	type,		/*!< in: OS_FILE_READ or OS_FILE_WRITE,
-				ORed to OS_FILE_LOG, if a log i/o
-				and ORed to OS_AIO_SIMULATED_WAKE_LATER
-				if simulated aio and we want to post a
-				batch of i/os; NOTE that a simulated batch
-				may introduce hidden chances of deadlocks,
-				because i/os are not actually handled until
-				all have been posted: use with great
-				caution! */
-	bool	sync,		/*!< in: true if synchronous aio is desired */
-	ulint	space_id,	/*!< in: space id */
-	ulint	zip_size,	/*!< in: compressed page size in bytes;
-				0 for uncompressed pages */
-	ulint	block_offset,	/*!< in: offset in number of blocks */
-	ulint	byte_offset,	/*!< in: remainder of offset in bytes; in
-				aio this must be divisible by the OS block
-				size */
-	ulint	len,		/*!< in: how many bytes to read or write; this
-				must not cross a file boundary; in aio this
-				must be a block size multiple */
-	void*	buf,		/*!< in/out: buffer where to store read data
-				or from where to write; in aio this must be
-				appropriately aligned */
-	void*	message,	/*!< in: message for aio handler if non-sync
-				aio used, else ignored */
-	ulint*	write_size)	/*!< in/out: Actual write size initialized
-				after fist successfull trim
-				operation for this page and if
-				initialized we do not trim again if
-				actual page size does not decrease. */
-{
-	ulint		mode;
-	fil_space_t*	space;
-	fil_node_t*	node;
-	ibool		ret;
-	ulint		is_log;
-	ulint		wake_later;
-	os_offset_t	offset;
-	bool		ignore_nonexistent_pages;
-
-	is_log = type & OS_FILE_LOG;
-	type = type & ~OS_FILE_LOG;
-
-	wake_later = type & OS_AIO_SIMULATED_WAKE_LATER;
-	type = type & ~OS_AIO_SIMULATED_WAKE_LATER;
-
-	ignore_nonexistent_pages = type & BUF_READ_IGNORE_NONEXISTENT_PAGES;
-	type &= ~BUF_READ_IGNORE_NONEXISTENT_PAGES;
-
-	ut_ad(byte_offset < UNIV_PAGE_SIZE);
-	ut_ad(!zip_size || !byte_offset);
-	ut_ad(ut_is_2pow(zip_size));
-	ut_ad(buf);
-	ut_ad(len > 0);
-	ut_ad(UNIV_PAGE_SIZE == (ulong)(1 << UNIV_PAGE_SIZE_SHIFT));
-#if (1 << UNIV_PAGE_SIZE_SHIFT_MAX) != UNIV_PAGE_SIZE_MAX
-# error "(1 << UNIV_PAGE_SIZE_SHIFT_MAX) != UNIV_PAGE_SIZE_MAX"
-#endif
-#if (1 << UNIV_PAGE_SIZE_SHIFT_MIN) != UNIV_PAGE_SIZE_MIN
-# error "(1 << UNIV_PAGE_SIZE_SHIFT_MIN) != UNIV_PAGE_SIZE_MIN"
-#endif
-	ut_ad(fil_validate_skip());
-#ifndef UNIV_HOTBACKUP
-# ifndef UNIV_LOG_DEBUG
-	/* ibuf bitmap pages must be read in the sync aio mode: */
-	ut_ad(recv_no_ibuf_operations
-	      || type == OS_FILE_WRITE
-	      || !ibuf_bitmap_page(zip_size, block_offset)
-	      || sync
-	      || is_log);
-# endif /* UNIV_LOG_DEBUG */
-	if (sync) {
-		mode = OS_AIO_SYNC;
-	} else if (is_log) {
-		mode = OS_AIO_LOG;
-	} else if (type == OS_FILE_READ
-		   && !recv_no_ibuf_operations
-		   && ibuf_page(space_id, zip_size, block_offset, NULL)) {
-		mode = OS_AIO_IBUF;
-	} else {
-		mode = OS_AIO_NORMAL;
-	}
-#else /* !UNIV_HOTBACKUP */
-	ut_a(sync);
-	mode = OS_AIO_SYNC;
-#endif /* !UNIV_HOTBACKUP */
-
-	if (type == OS_FILE_READ) {
-		srv_stats.data_read.add(len);
-	} else if (type == OS_FILE_WRITE) {
-		ut_ad(!srv_read_only_mode);
-		srv_stats.data_written.add(len);
-		if (fil_page_is_index_page((byte *)buf)) {
-			srv_stats.index_pages_written.inc();
-		} else {
-			srv_stats.non_index_pages_written.inc();
-		}
-	}
-
-	/* Reserve the fil_system mutex and make sure that we can open at
-	least one file while holding it, if the file is not already open */
-
-	fil_mutex_enter_and_prepare_for_io(space_id);
-
-	space = fil_space_get_by_id(space_id);
-
-	/* If we are deleting a tablespace we don't allow async read operations
-	on that. However, we do allow write and sync read operations */
-	if (space == 0
-	    || (type == OS_FILE_READ && !sync && space->stop_new_ops)) {
-		mutex_exit(&fil_system->mutex);
-
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Trying to do i/o to a tablespace which does "
-			"not exist. i/o type %lu, space id %lu, "
-			"page no. %lu, i/o length %lu bytes",
-			(ulong) type, (ulong) space_id, (ulong) block_offset,
-			(ulong) len);
-
-		return(DB_TABLESPACE_DELETED);
-	}
-
-	ut_ad(mode != OS_AIO_IBUF || space->purpose == FIL_TABLESPACE);
-
-	node = fil_space_get_node(space, space_id, &block_offset, byte_offset, len);
-
-	if (!node) {
-		if (ignore_nonexistent_pages) {
-			mutex_exit(&fil_system->mutex);
-			return(DB_ERROR);
-		}
-		fil_report_invalid_page_access(
-				block_offset, space_id, space->name,
-				byte_offset, len, type);
-
-		ut_error;
-	}
-
-	/* Open file if closed */
-	if (!fil_node_prepare_for_io(node, fil_system, space)) {
-		if (space->purpose == FIL_TABLESPACE
-		    && fil_is_user_tablespace_id(space->id)) {
-			mutex_exit(&fil_system->mutex);
-
-			ib_logf(IB_LOG_LEVEL_ERROR,
-				"Trying to do i/o to a tablespace which "
-				"exists without .ibd data file. "
-				"i/o type %lu, space id %lu, page no %lu, "
-				"i/o length %lu bytes",
-				(ulong) type, (ulong) space_id,
-				(ulong) block_offset, (ulong) len);
-
-			return(DB_TABLESPACE_DELETED);
-		}
-
-		/* The tablespace is for log. Currently, we just assert here
-		to prevent handling errors along the way fil_io returns.
-		Also, if the log files are missing, it would be hard to
-		promise the server can continue running. */
-		ut_a(0);
-	}
-
-	/* Check that at least the start offset is within the bounds of a
-	single-table tablespace, including rollback tablespaces. */
-	if (UNIV_UNLIKELY(node->size <= block_offset)
-	    && space->id != 0 && space->purpose == FIL_TABLESPACE) {
-
-		fil_report_invalid_page_access(
-			block_offset, space_id, space->name, byte_offset,
-			len, type);
-
-		ut_error;
-	}
-
-	/* Now we have made the changes in the data structures of fil_system */
-	mutex_exit(&fil_system->mutex);
-
-	/* Calculate the low 32 bits and the high 32 bits of the file offset */
-
-	if (!zip_size) {
-		offset = ((os_offset_t) block_offset << UNIV_PAGE_SIZE_SHIFT)
-			+ byte_offset;
-
-		ut_a(node->size - block_offset
-		     >= ((byte_offset + len + (UNIV_PAGE_SIZE - 1))
-			 / UNIV_PAGE_SIZE));
-	} else {
-		ulint	zip_size_shift;
-		switch (zip_size) {
-		case 1024: zip_size_shift = 10; break;
-		case 2048: zip_size_shift = 11; break;
-		case 4096: zip_size_shift = 12; break;
-		case 8192: zip_size_shift = 13; break;
-		case 16384: zip_size_shift = 14; break;
-		case 32768: zip_size_shift = 15; break;
-		case 65536: zip_size_shift = 16; break;
-		default: ut_error;
-		}
-		offset = ((os_offset_t) block_offset << zip_size_shift)
-			+ byte_offset;
-		ut_a(node->size - block_offset
-		     >= (len + (zip_size - 1)) / zip_size);
-	}
-
-	/* Do aio */
-
-	ut_a(byte_offset % OS_FILE_LOG_BLOCK_SIZE == 0);
-	ut_a((len % OS_FILE_LOG_BLOCK_SIZE) == 0);
-
-	const char* name = node->name == NULL ? space->name : node->name;
-
-#ifdef UNIV_HOTBACKUP
-	/* In mysqlbackup do normal i/o, not aio */
-	if (type == OS_FILE_READ) {
-		ret = os_file_read(node->handle, buf, offset, len);
-	} else {
-		ut_ad(!srv_read_only_mode);
-		ret = os_file_write(name, node->handle, buf,
-				    offset, len);
-	}
-#else
-	/* Queue the aio request */
-	ret = os_aio(type, is_log, mode | wake_later, name, node->handle, buf,
-		offset, len, zip_size ? zip_size : UNIV_PAGE_SIZE, node,
-		message, write_size);
-
-#endif /* UNIV_HOTBACKUP */
-
-
-	if (mode == OS_AIO_SYNC) {
-		/* The i/o operation is already completed when we return from
-		os_aio: */
-
-		mutex_enter(&fil_system->mutex);
-
-		fil_node_complete_io(node, fil_system, type);
-
-		mutex_exit(&fil_system->mutex);
-
-		ut_ad(fil_validate_skip());
-	}
-
-	if (!ret) {
-		return(DB_OUT_OF_FILE_SPACE);
-	}
-
-	return(DB_SUCCESS);
-}
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Waits for an aio operation to complete. This function is used to write the
-handler for completed requests. The aio array of pending requests is divided
-into segments (see os0file.cc for more info). The thread specifies which
-segment it wants to wait for. */
-UNIV_INTERN
-void
-fil_aio_wait(
-/*=========*/
-	ulint	segment)	/*!< in: the number of the segment in the aio
-				array to wait for */
-{
-	ibool		ret;
-	fil_node_t*	fil_node;
-	void*		message;
-	ulint		type;
-
-	ut_ad(fil_validate_skip());
-
-	if (srv_use_native_aio) {
-		srv_set_io_thread_op_info(segment, "native aio handle");
-#ifdef WIN_ASYNC_IO
-		ret = os_aio_windows_handle(
-			segment, 0, &fil_node, &message, &type);
-#elif defined(LINUX_NATIVE_AIO)
-		ret = os_aio_linux_handle(
-			segment, &fil_node, &message, &type);
-#else
-		ut_error;
-		ret = 0; /* Eliminate compiler warning */
-#endif /* WIN_ASYNC_IO */
-	} else {
-		srv_set_io_thread_op_info(segment, "simulated aio handle");
-
-		ret = os_aio_simulated_handle(
-			segment, &fil_node, &message, &type);
-	}
-
-	ut_a(ret);
-	if (fil_node == NULL) {
-		ut_ad(srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS);
-		return;
-	}
-
-	srv_set_io_thread_op_info(segment, "complete io for fil node");
-
-	mutex_enter(&fil_system->mutex);
-
-	fil_node_complete_io(fil_node, fil_system, type);
-
-	mutex_exit(&fil_system->mutex);
-
-	ut_ad(fil_validate_skip());
-
-	/* Do the i/o handling */
-	/* IMPORTANT: since i/o handling for reads will read also the insert
-	buffer in tablespace 0, you have to be very careful not to introduce
-	deadlocks in the i/o system. We keep tablespace 0 data files always
-	open, and use a special i/o thread to serve insert buffer requests. */
-
-	if (fil_node->space->purpose == FIL_TABLESPACE) {
-		srv_set_io_thread_op_info(segment, "complete io for buf page");
-		buf_page_io_complete(static_cast<buf_page_t*>(message));
-	} else {
-		srv_set_io_thread_op_info(segment, "complete io for log");
-		log_io_complete(static_cast<log_group_t*>(message));
-	}
-}
-#endif /* UNIV_HOTBACKUP */
-
-/**********************************************************************//**
-Flushes to disk possible writes cached by the OS. If the space does not exist
-or is being dropped, does not do anything. */
-UNIV_INTERN
-void
-fil_flush(
-/*======*/
-	ulint	space_id)	/*!< in: file space id (this can be a group of
-				log files or a tablespace of the database) */
-{
-	fil_space_t*	space;
-	fil_node_t*	node;
-	os_file_t	file;
-
-
-	mutex_enter(&fil_system->mutex);
-
-	space = fil_space_get_by_id(space_id);
-
-	if (!space || space->stop_new_ops) {
-		mutex_exit(&fil_system->mutex);
-
-		return;
-	}
-
-	if (fil_buffering_disabled(space)) {
-
-		/* No need to flush. User has explicitly disabled
-		buffering. */
-		ut_ad(!space->is_in_unflushed_spaces);
-		ut_ad(fil_space_is_flushed(space));
-		ut_ad(space->n_pending_flushes == 0);
-
-#ifdef UNIV_DEBUG
-		for (node = UT_LIST_GET_FIRST(space->chain);
-		     node != NULL;
-		     node = UT_LIST_GET_NEXT(chain, node)) {
-			ut_ad(node->modification_counter
-			      == node->flush_counter);
-			ut_ad(node->n_pending_flushes == 0);
-		}
-#endif /* UNIV_DEBUG */
-
-		mutex_exit(&fil_system->mutex);
-		return;
-	}
-
-	space->n_pending_flushes++;	/*!< prevent dropping of the space while
-					we are flushing */
-	for (node = UT_LIST_GET_FIRST(space->chain);
-	     node != NULL;
-	     node = UT_LIST_GET_NEXT(chain, node)) {
-
-		ib_int64_t old_mod_counter = node->modification_counter;
-
-		if (old_mod_counter <= node->flush_counter) {
-			continue;
-		}
-
-		ut_a(node->open);
-
-		if (space->purpose == FIL_TABLESPACE) {
-			fil_n_pending_tablespace_flushes++;
-		} else {
-			fil_n_pending_log_flushes++;
-			fil_n_log_flushes++;
-		}
-#ifdef __WIN__
-		if (node->is_raw_disk) {
-
-			goto skip_flush;
-		}
-#endif /* __WIN__ */
-retry:
-		if (node->n_pending_flushes > 0) {
-			/* We want to avoid calling os_file_flush() on
-			the file twice at the same time, because we do
-			not know what bugs OS's may contain in file
-			i/o */
-
-			ib_int64_t sig_count =
-				os_event_reset(node->sync_event);
-
-			mutex_exit(&fil_system->mutex);
-
-			os_event_wait_low(node->sync_event, sig_count);
-
-			mutex_enter(&fil_system->mutex);
-
-			if (node->flush_counter >= old_mod_counter) {
-
-				goto skip_flush;
-			}
-
-			goto retry;
-		}
-
-		ut_a(node->open);
-		file = node->handle;
-		node->n_pending_flushes++;
-
-		mutex_exit(&fil_system->mutex);
-
-		os_file_flush(file);
-
-		mutex_enter(&fil_system->mutex);
-
-		os_event_set(node->sync_event);
-
-		node->n_pending_flushes--;
-skip_flush:
-		if (node->flush_counter < old_mod_counter) {
-			node->flush_counter = old_mod_counter;
-
-			if (space->is_in_unflushed_spaces
-			    && fil_space_is_flushed(space)) {
-
-				space->is_in_unflushed_spaces = false;
-
-				UT_LIST_REMOVE(
-					unflushed_spaces,
-					fil_system->unflushed_spaces,
-					space);
-			}
-		}
-
-		if (space->purpose == FIL_TABLESPACE) {
-			fil_n_pending_tablespace_flushes--;
-		} else {
-			fil_n_pending_log_flushes--;
-		}
-	}
-
-	space->n_pending_flushes--;
-
-	mutex_exit(&fil_system->mutex);
-}
-
-/**********************************************************************//**
-Flushes to disk the writes in file spaces of the given type possibly cached by
-the OS. */
-UNIV_INTERN
-void
-fil_flush_file_spaces(
-/*==================*/
-	ulint	purpose)	/*!< in: FIL_TABLESPACE, FIL_LOG */
-{
-	fil_space_t*	space;
-	ulint*		space_ids;
-	ulint		n_space_ids;
-	ulint		i;
-
-	mutex_enter(&fil_system->mutex);
-
-	n_space_ids = UT_LIST_GET_LEN(fil_system->unflushed_spaces);
-	if (n_space_ids == 0) {
-
-		mutex_exit(&fil_system->mutex);
-		return;
-	}
-
-	/* Assemble a list of space ids to flush.  Previously, we
-	traversed fil_system->unflushed_spaces and called UT_LIST_GET_NEXT()
-	on a space that was just removed from the list by fil_flush().
-	Thus, the space could be dropped and the memory overwritten. */
-	space_ids = static_cast<ulint*>(
-		mem_alloc(n_space_ids * sizeof *space_ids));
-
-	n_space_ids = 0;
-
-	for (space = UT_LIST_GET_FIRST(fil_system->unflushed_spaces);
-	     space;
-	     space = UT_LIST_GET_NEXT(unflushed_spaces, space)) {
-
-		if (space->purpose == purpose && !space->stop_new_ops) {
-
-			space_ids[n_space_ids++] = space->id;
-		}
-	}
-
-	mutex_exit(&fil_system->mutex);
-
-	/* Flush the spaces.  It will not hurt to call fil_flush() on
-	a non-existing space id. */
-	for (i = 0; i < n_space_ids; i++) {
-
-		fil_flush(space_ids[i]);
-	}
-
-	mem_free(space_ids);
-}
-
-/** Functor to validate the space list. */
-struct	Check {
-	void	operator()(const fil_node_t* elem)
-	{
-		ut_a(elem->open || !elem->n_pending);
-	}
-};
-
-/******************************************************************//**
-Checks the consistency of the tablespace cache.
-@return	TRUE if ok */
-UNIV_INTERN
-ibool
-fil_validate(void)
-/*==============*/
-{
-	fil_space_t*	space;
-	fil_node_t*	fil_node;
-	ulint		n_open		= 0;
-	ulint		i;
-
-	mutex_enter(&fil_system->mutex);
-
-	/* Look for spaces in the hash table */
-
-	for (i = 0; i < hash_get_n_cells(fil_system->spaces); i++) {
-
-		for (space = static_cast<fil_space_t*>(
-				HASH_GET_FIRST(fil_system->spaces, i));
-		     space != 0;
-		     space = static_cast<fil_space_t*>(
-			     	HASH_GET_NEXT(hash, space))) {
-
-			UT_LIST_VALIDATE(
-				chain, fil_node_t, space->chain, Check());
-
-			for (fil_node = UT_LIST_GET_FIRST(space->chain);
-			     fil_node != 0;
-			     fil_node = UT_LIST_GET_NEXT(chain, fil_node)) {
-
-				if (fil_node->n_pending > 0) {
-					ut_a(fil_node->open);
-				}
-
-				if (fil_node->open) {
-					n_open++;
-				}
-			}
-		}
-	}
-
-	ut_a(fil_system->n_open == n_open);
-
-	UT_LIST_CHECK(LRU, fil_node_t, fil_system->LRU);
-
-	for (fil_node = UT_LIST_GET_FIRST(fil_system->LRU);
-	     fil_node != 0;
-	     fil_node = UT_LIST_GET_NEXT(LRU, fil_node)) {
-
-		ut_a(fil_node->n_pending == 0);
-		ut_a(!fil_node->being_extended);
-		ut_a(fil_node->open);
-		ut_a(fil_space_belongs_in_lru(fil_node->space));
-	}
-
-	mutex_exit(&fil_system->mutex);
-
-	return(TRUE);
-}
-
-/********************************************************************//**
-Returns TRUE if file address is undefined.
-@return	TRUE if undefined */
-UNIV_INTERN
-ibool
-fil_addr_is_null(
-/*=============*/
-	fil_addr_t	addr)	/*!< in: address */
-{
-	return(addr.page == FIL_NULL);
-}
-
-/********************************************************************//**
-Get the predecessor of a file page.
-@return	FIL_PAGE_PREV */
-UNIV_INTERN
-ulint
-fil_page_get_prev(
-/*==============*/
-	const byte*	page)	/*!< in: file page */
-{
-	return(mach_read_from_4(page + FIL_PAGE_PREV));
-}
-
-/********************************************************************//**
-Get the successor of a file page.
-@return	FIL_PAGE_NEXT */
-UNIV_INTERN
-ulint
-fil_page_get_next(
-/*==============*/
-	const byte*	page)	/*!< in: file page */
-{
-	return(mach_read_from_4(page + FIL_PAGE_NEXT));
-}
-
-/*********************************************************************//**
-Sets the file page type. */
-UNIV_INTERN
-void
-fil_page_set_type(
-/*==============*/
-	byte*	page,	/*!< in/out: file page */
-	ulint	type)	/*!< in: type */
-{
-	ut_ad(page);
-
-	mach_write_to_2(page + FIL_PAGE_TYPE, type);
-}
-
-/*********************************************************************//**
-Gets the file page type.
-@return type; NOTE that if the type has not been written to page, the
-return value not defined */
-UNIV_INTERN
-ulint
-fil_page_get_type(
-/*==============*/
-	const byte*	page)	/*!< in: file page */
-{
-	ut_ad(page);
-
-	return(mach_read_from_2(page + FIL_PAGE_TYPE));
-}
-
-/****************************************************************//**
-Closes the tablespace memory cache. */
-UNIV_INTERN
-void
-fil_close(void)
-/*===========*/
-{
-	fil_space_crypt_cleanup();
-
-#ifndef UNIV_HOTBACKUP
-	/* The mutex should already have been freed. */
-	ut_ad(fil_system->mutex.magic_n == 0);
-#endif /* !UNIV_HOTBACKUP */
-
-	hash_table_free(fil_system->spaces);
-
-	hash_table_free(fil_system->name_hash);
-
-	ut_a(UT_LIST_GET_LEN(fil_system->LRU) == 0);
-	ut_a(UT_LIST_GET_LEN(fil_system->unflushed_spaces) == 0);
-	ut_a(UT_LIST_GET_LEN(fil_system->space_list) == 0);
-
-	mem_free(fil_system);
-
-	fil_system = NULL;
-}
-
-/********************************************************************//**
-Initializes a buffer control block when the buf_pool is created. */
-static
-void
-fil_buf_block_init(
-/*===============*/
-	buf_block_t*	block,		/*!< in: pointer to control block */
-	byte*		frame)		/*!< in: pointer to buffer frame */
-{
-	UNIV_MEM_DESC(frame, UNIV_PAGE_SIZE);
-
-	block->frame = frame;
-
-	block->page.io_fix = BUF_IO_NONE;
-	/* There are assertions that check for this. */
-	block->page.buf_fix_count = 1;
-	block->page.state = BUF_BLOCK_READY_FOR_USE;
-
-	page_zip_des_init(&block->page.zip);
-}
-
-struct fil_iterator_t {
-	os_file_t	file;			/*!< File handle */
-	const char*	filepath;		/*!< File path name */
-	os_offset_t	start;			/*!< From where to start */
-	os_offset_t	end;			/*!< Where to stop */
-	os_offset_t	file_size;		/*!< File size in bytes */
-	ulint		page_size;		/*!< Page size */
-	ulint		n_io_buffers;		/*!< Number of pages to use
-						for IO */
-	byte*		io_buffer;		/*!< Buffer to use for IO */
-	fil_space_crypt_t *crypt_data;		/*!< Crypt data (if encrypted) */
-	byte*           crypt_io_buffer;        /*!< IO buffer when encrypted */
-};
-
-/********************************************************************//**
-TODO: This can be made parallel trivially by chunking up the file and creating
-a callback per thread. . Main benefit will be to use multiple CPUs for
-checksums and compressed tables. We have to do compressed tables block by
-block right now. Secondly we need to decompress/compress and copy too much
-of data. These are CPU intensive.
-
-Iterate over all the pages in the tablespace.
-@param iter - Tablespace iterator
-@param block - block to use for IO
-@param callback - Callback to inspect and update page contents
-@retval DB_SUCCESS or error code */
-static
-dberr_t
-fil_iterate(
-/*========*/
-	const fil_iterator_t&	iter,
-	buf_block_t*		block,
-	PageCallback&		callback)
-{
-	os_offset_t		offset;
-	ulint			page_no = 0;
-	ulint			space_id = callback.get_space_id();
-	ulint			n_bytes = iter.n_io_buffers * iter.page_size;
-
-	ut_ad(!srv_read_only_mode);
-
-	/* TODO: For compressed tables we do a lot of useless
-	copying for non-index pages. Unfortunately, it is
-	required by buf_zip_decompress() */
-
-	for (offset = iter.start; offset < iter.end; offset += n_bytes) {
-
-		byte*		io_buffer = iter.io_buffer;
-
-		block->frame = io_buffer;
-
-		if (callback.get_zip_size() > 0) {
-			page_zip_des_init(&block->page.zip);
-			page_zip_set_size(&block->page.zip, iter.page_size);
-			block->page.zip.data = block->frame + UNIV_PAGE_SIZE;
-			ut_d(block->page.zip.m_external = true);
-			ut_ad(iter.page_size == callback.get_zip_size());
-
-			/* Zip IO is done in the compressed page buffer. */
-			io_buffer = block->page.zip.data;
-		} else {
-			io_buffer = iter.io_buffer;
-		}
-
-		/* We have to read the exact number of bytes. Otherwise the
-		InnoDB IO functions croak on failed reads. */
-
-		n_bytes = static_cast<ulint>(
-			ut_min(static_cast<os_offset_t>(n_bytes),
-			       iter.end - offset));
-
-		ut_ad(n_bytes > 0);
-		ut_ad(!(n_bytes % iter.page_size));
-
-		byte* readptr = io_buffer;
-		byte* writeptr = io_buffer;
-		bool encrypted = false;
-
-		/* Use additional crypt io buffer if tablespace is encrypted */
-		if ((iter.crypt_data != NULL && iter.crypt_data->encryption == FIL_SPACE_ENCRYPTION_ON) ||
-				(srv_encrypt_tables &&
-					iter.crypt_data && iter.crypt_data->encryption == FIL_SPACE_ENCRYPTION_DEFAULT)) {
-
-			encrypted = true;
-			readptr = iter.crypt_io_buffer;
-			writeptr = iter.crypt_io_buffer;
-		}
-
-		if (!os_file_read(iter.file, readptr, offset, (ulint) n_bytes)) {
-
-			ib_logf(IB_LOG_LEVEL_ERROR, "os_file_read() failed");
-
-			return(DB_IO_ERROR);
-		}
-
-		bool		updated = false;
-		os_offset_t	page_off = offset;
-		ulint		n_pages_read = (ulint) n_bytes / iter.page_size;
-		bool		decrypted = false;
-
-		for (ulint i = 0; i < n_pages_read; ++i) {
-			ulint 	size = iter.page_size;
-			dberr_t	err = DB_SUCCESS;
-			byte*	src = (readptr + (i * size));
-			byte*	dst = (io_buffer + (i * size));
-
-			ulint page_type = mach_read_from_2(src+FIL_PAGE_TYPE);
-
-			bool page_compressed = (page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED ||
-				page_type == FIL_PAGE_PAGE_COMPRESSED);
-
-			/* If tablespace is encrypted, we need to decrypt
-			the page. */
-			if (encrypted) {
-				decrypted = fil_space_decrypt(
-							iter.crypt_data,
-							dst, //dst
-							iter.page_size,
-							src, // src
-							&err); // src
-
-				if (err != DB_SUCCESS) {
-					return(err);
-				}
-
-				if (decrypted) {
-					updated = true;
-				} else {
-					/* TODO: remove unnecessary memcpy's */
-					memcpy(dst, src, size);
-				}
-			}
-
-			/* If the original page is page_compressed, we need
-			to decompress page before we can update it. */
-			if (page_compressed) {
-				fil_decompress_page(NULL, dst, size, NULL);
-				updated = true;
-			}
-
-			buf_block_set_file_page(block, space_id, page_no++);
-
-			if ((err = callback(page_off, block)) != DB_SUCCESS) {
-
-				return(err);
-
-			} else if (!updated) {
-				updated = buf_block_get_state(block)
-					== BUF_BLOCK_FILE_PAGE;
-			}
-
-			buf_block_set_state(block, BUF_BLOCK_NOT_USED);
-			buf_block_set_state(block, BUF_BLOCK_READY_FOR_USE);
-
-			src =  (io_buffer + (i * size));
-
-			if (page_compressed) {
-				ulint len = 0;
-				fil_compress_page(space_id,
-					src,
-					NULL,
-					size,
-					fil_space_get_page_compression_level(space_id),
-					fil_space_get_block_size(space_id, offset, size),
-					encrypted,
-					&len,
-					NULL);
-
-				updated = true;
-			}
-
-			/* If tablespace is encrypted, encrypt page before we
-			write it back. Note that we should not encrypt the
-			buffer that is in buffer pool. */
-			if (decrypted && encrypted) {
-				unsigned char *dest = (writeptr + (i * size));
-				ulint space = mach_read_from_4(
-					src + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
-				ulint offset = mach_read_from_4(src + FIL_PAGE_OFFSET);
-				ib_uint64_t lsn = mach_read_from_8(src + FIL_PAGE_LSN);
-
-				byte* tmp = fil_encrypt_buf(
-							iter.crypt_data,
-							space,
-							offset,
-							lsn,
-							src,
-							iter.page_size == UNIV_PAGE_SIZE ? 0 : iter.page_size,
-							dest);
-
-				if (tmp == src) {
-					/* TODO: remove unnecessary memcpy's */
-					memcpy(dest, src, size);
-				}
-
-				updated = true;
-			}
-
-			page_off += iter.page_size;
-			block->frame += iter.page_size;
-		}
-
-		/* A page was updated in the set, write back to disk. */
-		if (updated
-		    && !os_file_write(
-				iter.filepath, iter.file, writeptr,
-				offset, (ulint) n_bytes)) {
-
-			ib_logf(IB_LOG_LEVEL_ERROR, "os_file_write() failed");
-
-			return(DB_IO_ERROR);
-		}
-	}
-
-	return(DB_SUCCESS);
-}
-
-/********************************************************************//**
-Iterate over all the pages in the tablespace.
-@param table - the table definiton in the server
-@param n_io_buffers - number of blocks to read and write together
-@param callback - functor that will do the page updates
-@return	DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fil_tablespace_iterate(
-/*===================*/
-	dict_table_t*	table,
-	ulint		n_io_buffers,
-	PageCallback&	callback)
-{
-	dberr_t		err;
-	os_file_t	file;
-	char*		filepath;
-
-	ut_a(n_io_buffers > 0);
-	ut_ad(!srv_read_only_mode);
-
-	DBUG_EXECUTE_IF("ib_import_trigger_corruption_1",
-			return(DB_CORRUPTION););
-
-	if (DICT_TF_HAS_DATA_DIR(table->flags)) {
-		dict_get_and_save_data_dir_path(table, false);
-		ut_a(table->data_dir_path);
-
-		filepath = os_file_make_remote_pathname(
-			table->data_dir_path, table->name, "ibd");
-	} else {
-		filepath = fil_make_ibd_name(table->name, false);
-	}
-
-	{
-		ibool	success;
-
-		file = os_file_create_simple_no_error_handling(
-			innodb_file_data_key, filepath,
-			OS_FILE_OPEN, OS_FILE_READ_WRITE, &success, FALSE);
-
-		DBUG_EXECUTE_IF("fil_tablespace_iterate_failure",
-		{
-			static bool once;
-
-			if (!once || ut_rnd_interval(0, 10) == 5) {
-				once = true;
-				success = FALSE;
-				os_file_close(file);
-			}
-		});
-
-		if (!success) {
-			/* The following call prints an error message */
-			os_file_get_last_error(true);
-
-			ib_logf(IB_LOG_LEVEL_ERROR,
-				"Trying to import a tablespace, but could not "
-				"open the tablespace file %s", filepath);
-
-			mem_free(filepath);
-
-			return(DB_TABLESPACE_NOT_FOUND);
-
-		} else {
-			err = DB_SUCCESS;
-		}
-	}
-
-	callback.set_file(filepath, file);
-
-	os_offset_t	file_size = os_file_get_size(file);
-	ut_a(file_size != (os_offset_t) -1);
-
-	/* The block we will use for every physical page */
-	buf_block_t	block;
-
-	memset(&block, 0x0, sizeof(block));
-
-	/* Allocate a page to read in the tablespace header, so that we
-	can determine the page size and zip_size (if it is compressed).
-	We allocate an extra page in case it is a compressed table. One
-	page is to ensure alignement. */
-
-	void*	page_ptr = mem_alloc(3 * UNIV_PAGE_SIZE);
-	byte*	page = static_cast<byte*>(ut_align(page_ptr, UNIV_PAGE_SIZE));
-
-	fil_buf_block_init(&block, page);
-
-	/* Read the first page and determine the page and zip size. */
-
-	if (!os_file_read(file, page, 0, UNIV_PAGE_SIZE)) {
-
-		err = DB_IO_ERROR;
-
-	} else if ((err = callback.init(file_size, &block)) == DB_SUCCESS) {
-		fil_iterator_t	iter;
-
-		iter.file = file;
-		iter.start = 0;
-		iter.end = file_size;
-		iter.filepath = filepath;
-		iter.file_size = file_size;
-		iter.n_io_buffers = n_io_buffers;
-		iter.page_size = callback.get_page_size();
-
-		ulint crypt_data_offset = fsp_header_get_crypt_offset(
-			callback.get_zip_size(), 0);
-
-		/* read (optional) crypt data */
-		iter.crypt_data = fil_space_read_crypt_data(
-			0, page, crypt_data_offset);
-
-		/* Compressed pages can't be optimised for block IO for now.
-		We do the IMPORT page by page. */
-
-		if (callback.get_zip_size() > 0) {
-			iter.n_io_buffers = 1;
-			ut_a(iter.page_size == callback.get_zip_size());
-		}
-
-		/** If tablespace is encrypted, it needs extra buffers */
-		if (iter.crypt_data != NULL) {
-			/* decrease io buffers so that memory
-			* consumption doesnt double
-			* note: the +1 is to avoid n_io_buffers getting down to 0 */
-			iter.n_io_buffers = (iter.n_io_buffers + 1) / 2;
-		}
-
-		/** Add an extra page for compressed page scratch area. */
-
-		void*	io_buffer = mem_alloc(
-			(2 + iter.n_io_buffers) * UNIV_PAGE_SIZE);
-
-		iter.io_buffer = static_cast<byte*>(
-			ut_align(io_buffer, UNIV_PAGE_SIZE));
-
-		void* crypt_io_buffer = NULL;
-		if (iter.crypt_data != NULL) {
-			crypt_io_buffer = mem_alloc(
-				iter.n_io_buffers * UNIV_PAGE_SIZE);
-			iter.crypt_io_buffer = static_cast<byte*>(
-				crypt_io_buffer);
-		}
-
-		err = fil_iterate(iter, &block, callback);
-
-		mem_free(io_buffer);
-
-		if (iter.crypt_data != NULL) {
-			mem_free(crypt_io_buffer);
-			iter.crypt_io_buffer = NULL;
-			fil_space_destroy_crypt_data(&iter.crypt_data);
-		}
-	}
-
-	if (err == DB_SUCCESS) {
-
-		ib_logf(IB_LOG_LEVEL_INFO, "Sync to disk");
-
-		if (!os_file_flush(file)) {
-			ib_logf(IB_LOG_LEVEL_INFO, "os_file_flush() failed!");
-			err = DB_IO_ERROR;
-		} else {
-			ib_logf(IB_LOG_LEVEL_INFO, "Sync to disk - done!");
-		}
-	}
-
-	os_file_close(file);
-
-	mem_free(page_ptr);
-	mem_free(filepath);
-
-	return(err);
-}
-
-/**
-Set the tablespace compressed table size.
-@return DB_SUCCESS if it is valie or DB_CORRUPTION if not */
-dberr_t
-PageCallback::set_zip_size(const buf_frame_t* page) UNIV_NOTHROW
-{
-	m_zip_size = fsp_header_get_zip_size(page);
-
-	if (!ut_is_2pow(m_zip_size) || m_zip_size > UNIV_ZIP_SIZE_MAX) {
-		return(DB_CORRUPTION);
-	}
-
-	return(DB_SUCCESS);
-}
-
-/********************************************************************//**
-Delete the tablespace file and any related files like .cfg.
-This should not be called for temporary tables. */
-UNIV_INTERN
-void
-fil_delete_file(
-/*============*/
-	const char*	ibd_name)	/*!< in: filepath of the ibd
-					tablespace */
-{
-	/* Force a delete of any stale .ibd files that are lying around. */
-
-	ib_logf(IB_LOG_LEVEL_INFO, "Deleting %s", ibd_name);
-
-	os_file_delete_if_exists(innodb_file_data_key, ibd_name);
-
-	char*	cfg_name = fil_make_cfg_name(ibd_name);
-
-	os_file_delete_if_exists(innodb_file_data_key, cfg_name);
-
-	mem_free(cfg_name);
-}
-
-/**
-Iterate over all the spaces in the space list and fetch the
-tablespace names. It will return a copy of the name that must be
-freed by the caller using: delete[].
-@return DB_SUCCESS if all OK. */
-UNIV_INTERN
-dberr_t
-fil_get_space_names(
-/*================*/
-	space_name_list_t&	space_name_list)
-				/*!< in/out: List to append to */
-{
-	fil_space_t*	space;
-	dberr_t		err = DB_SUCCESS;
-
-	mutex_enter(&fil_system->mutex);
-
-	for (space = UT_LIST_GET_FIRST(fil_system->space_list);
-	     space != NULL;
-	     space = UT_LIST_GET_NEXT(space_list, space)) {
-
-		if (space->purpose == FIL_TABLESPACE) {
-			ulint	len;
-			char*	name;
-
-			len = strlen(space->name);
-			name = new(std::nothrow) char[len + 1];
-
-			if (name == 0) {
-				/* Caller to free elements allocated so far. */
-				err = DB_OUT_OF_MEMORY;
-				break;
-			}
-
-			memcpy(name, space->name, len);
-			name[len] = 0;
-
-			space_name_list.push_back(name);
-		}
-	}
-
-	mutex_exit(&fil_system->mutex);
-
-	return(err);
-}
-
-/** Generate redo log for swapping two .ibd files
-@param[in]	old_table	old table
-@param[in]	new_table	new table
-@param[in]	tmp_name	temporary table name
-@param[in,out]	mtr		mini-transaction
-@return innodb error code */
-UNIV_INTERN
-dberr_t
-fil_mtr_rename_log(
-	const dict_table_t*	old_table,
-	const dict_table_t*	new_table,
-	const char*		tmp_name,
-	mtr_t*			mtr)
-{
-	dberr_t	err = DB_SUCCESS;
-	char*	old_path;
-
-	/* If neither table is file-per-table,
-	there will be no renaming of files. */
-	if (old_table->space == TRX_SYS_SPACE
-	    && new_table->space == TRX_SYS_SPACE) {
-		return(DB_SUCCESS);
-	}
-
-	if (DICT_TF_HAS_DATA_DIR(old_table->flags)) {
-		old_path = os_file_make_remote_pathname(
-			old_table->data_dir_path, old_table->name, "ibd");
-	} else {
-		old_path = fil_make_ibd_name(old_table->name, false);
-	}
-	if (old_path == NULL) {
-		return(DB_OUT_OF_MEMORY);
-	}
-
-	if (old_table->space != TRX_SYS_SPACE) {
-		char*	tmp_path;
-
-		if (DICT_TF_HAS_DATA_DIR(old_table->flags)) {
-			tmp_path = os_file_make_remote_pathname(
-				old_table->data_dir_path, tmp_name, "ibd");
-		}
-		else {
-			tmp_path = fil_make_ibd_name(tmp_name, false);
-		}
-
-		if (tmp_path == NULL) {
-			mem_free(old_path);
-			return(DB_OUT_OF_MEMORY);
-		}
-
-		/* Temp filepath must not exist. */
-		err = fil_rename_tablespace_check(
-			old_table->space, old_path, tmp_path,
-			dict_table_is_discarded(old_table));
-		mem_free(tmp_path);
-		if (err != DB_SUCCESS) {
-			mem_free(old_path);
-			return(err);
-		}
-
-		fil_op_write_log(MLOG_FILE_RENAME, old_table->space,
-				 0, 0, old_table->name, tmp_name, mtr);
-	}
-
-	if (new_table->space != TRX_SYS_SPACE) {
-
-		/* Destination filepath must not exist unless this ALTER
-		TABLE starts and ends with a file_per-table tablespace. */
-		if (old_table->space == TRX_SYS_SPACE) {
-			char*	new_path = NULL;
-
-			if (DICT_TF_HAS_DATA_DIR(new_table->flags)) {
-				new_path = os_file_make_remote_pathname(
-					new_table->data_dir_path,
-					new_table->name, "ibd");
-			}
-			else {
-				new_path = fil_make_ibd_name(
-					new_table->name, false);
-			}
-
-			if (new_path == NULL) {
-				mem_free(old_path);
-				return(DB_OUT_OF_MEMORY);
-			}
-
-			err = fil_rename_tablespace_check(
-				new_table->space, new_path, old_path,
-				dict_table_is_discarded(new_table));
-			mem_free(new_path);
-			if (err != DB_SUCCESS) {
-				mem_free(old_path);
-				return(err);
-			}
-		}
-
-		fil_op_write_log(MLOG_FILE_RENAME, new_table->space,
-				 0, 0, new_table->name, old_table->name, mtr);
-
-	}
-
-	mem_free(old_path);
-
-	return(err);
-}
-
-/****************************************************************//**
-Acquire fil_system mutex */
-void
-fil_system_enter(void)
-/*==================*/
-{
-	ut_ad(!mutex_own(&fil_system->mutex));
-	mutex_enter(&fil_system->mutex);
-}
-
-/****************************************************************//**
-Release fil_system mutex */
-void
-fil_system_exit(void)
-/*=================*/
-{
-	ut_ad(mutex_own(&fil_system->mutex));
-	mutex_exit(&fil_system->mutex);
-}
-
 
 /******************************************************************
-Get id of first tablespace or ULINT_UNDEFINED if none */
-UNIV_INTERN
-ulint
-fil_get_first_space()
-/*=================*/
-{
-	ulint out_id = ULINT_UNDEFINED;
-	fil_space_t* space;
-
-	mutex_enter(&fil_system->mutex);
-
-	space = UT_LIST_GET_FIRST(fil_system->space_list);
-	if (space != NULL) {
-		do
-		{
-			if (!space->stop_new_ops) {
-				out_id = space->id;
-				break;
-			}
-			space = UT_LIST_GET_NEXT(space_list, space);
-		} while (space != NULL);
-	}
-
-	mutex_exit(&fil_system->mutex);
-
-	return out_id;
-}
-
-/******************************************************************
-Get id of first tablespace that has node or ULINT_UNDEFINED if none */
-UNIV_INTERN
-ulint
-fil_get_first_space_safe()
-/*======================*/
-{
-	ulint out_id = ULINT_UNDEFINED;
-	fil_space_t* space;
-
-	mutex_enter(&fil_system->mutex);
-
-	space = UT_LIST_GET_FIRST(fil_system->space_list);
-	if (space != NULL) {
-		do
-		{
-			if (!space->stop_new_ops && UT_LIST_GET_LEN(space->chain) > 0) {
-				out_id = space->id;
-				break;
-			}
-
-			space = UT_LIST_GET_NEXT(space_list, space);
-		} while (space != NULL);
-	}
-
-	mutex_exit(&fil_system->mutex);
-
-	return out_id;
-}
-
-/******************************************************************
-Get id of next tablespace or ULINT_UNDEFINED if none */
-UNIV_INTERN
-ulint
-fil_get_next_space(
-/*===============*/
-	ulint	id)	/*!< in: previous space id */
-{
-	bool found;
-	fil_space_t* space;
-	ulint out_id = ULINT_UNDEFINED;
-
-	mutex_enter(&fil_system->mutex);
-
-	space = fil_space_get_by_id(id);
-	if (space == NULL) {
-		/* we didn't find it...search for space with space->id > id */
-		found = false;
-		space = UT_LIST_GET_FIRST(fil_system->space_list);
-	} else {
-		/* we found it, take next available space */
-		found = true;
-	}
-
-	while ((space = UT_LIST_GET_NEXT(space_list, space)) != NULL) {
-
-		if (!found && space->id <= id)
-			continue;
-
-		if (!space->stop_new_ops && UT_LIST_GET_LEN(space->chain) > 0) {
-			/* inc reference to prevent drop */
-			out_id = space->id;
-			break;
-		}
-	}
-
-	mutex_exit(&fil_system->mutex);
-
-	return out_id;
-}
-
-/******************************************************************
-Get id of next tablespace that has node or ULINT_UNDEFINED if none */
-UNIV_INTERN
-ulint
-fil_get_next_space_safe(
-/*====================*/
-	ulint	id)	/*!< in: previous space id */
-{
-	bool found;
-	fil_space_t* space;
-	ulint out_id = ULINT_UNDEFINED;
-
-	mutex_enter(&fil_system->mutex);
-
-	space = fil_space_get_by_id(id);
-	if (space == NULL) {
-		/* we didn't find it...search for space with space->id > id */
-		found = false;
-		space = UT_LIST_GET_FIRST(fil_system->space_list);
-	} else {
-		/* we found it, take next available space */
-		found = true;
-	}
-
-	while ((space = UT_LIST_GET_NEXT(space_list, space)) != NULL) {
-
-		if (!found && space->id <= id)
-			continue;
-
-		if (!space->stop_new_ops) {
-			/* inc reference to prevent drop */
-			out_id = space->id;
-			break;
-		}
-	}
-
-	mutex_exit(&fil_system->mutex);
-
-	return out_id;
-}
-
-/******************************************************************
-Get crypt data for a tablespace */
-UNIV_INTERN
-fil_space_crypt_t*
-fil_space_get_crypt_data(
-/*=====================*/
-	ulint id)	/*!< in: space id */
-{
-	fil_space_t*	space;
-	fil_space_crypt_t* crypt_data = NULL;
-
-	ut_ad(fil_system);
-
-	mutex_enter(&fil_system->mutex);
-
-	space = fil_space_get_by_id(id);
-
-	if (space != NULL) {
-		crypt_data = space->crypt_data;
-	}
-
-	mutex_exit(&fil_system->mutex);
-
-	return(crypt_data);
-}
-
-/******************************************************************
-Get crypt data for a tablespace */
+Set crypt data for a tablespace */
 UNIV_INTERN
 fil_space_crypt_t*
 fil_space_set_crypt_data(
@@ -7285,3 +7752,193 @@ fil_space_set_crypt_data(
 
 	return ret_crypt_data;
 }
+
+/******************************************************************
+Get id of first tablespace that has node or ULINT_UNDEFINED if none */
+UNIV_INTERN
+ulint
+fil_get_first_space_safe()
+/*======================*/
+{
+	ulint out_id = ULINT_UNDEFINED;
+	fil_space_t* space;
+
+	mutex_enter(&fil_system->mutex);
+
+	space = UT_LIST_GET_FIRST(fil_system->space_list);
+	if (space != NULL) {
+		do
+		{
+			if (!space->stop_new_ops && UT_LIST_GET_LEN(space->chain) > 0) {
+				out_id = space->id;
+				break;
+			}
+
+			space = UT_LIST_GET_NEXT(space_list, space);
+		} while (space != NULL);
+	}
+
+	mutex_exit(&fil_system->mutex);
+
+	return out_id;
+}
+
+/******************************************************************
+Get id of next tablespace that has node or ULINT_UNDEFINED if none */
+UNIV_INTERN
+ulint
+fil_get_next_space_safe(
+/*====================*/
+	ulint	id)	/*!< in: previous space id */
+{
+	bool found;
+	fil_space_t* space;
+	ulint out_id = ULINT_UNDEFINED;
+
+	mutex_enter(&fil_system->mutex);
+
+	space = fil_space_get_by_id(id);
+	if (space == NULL) {
+		/* we didn't find it...search for space with space->id > id */
+		found = false;
+		space = UT_LIST_GET_FIRST(fil_system->space_list);
+	} else {
+		/* we found it, take next available space */
+		found = true;
+	}
+
+	while ((space = UT_LIST_GET_NEXT(space_list, space)) != NULL) {
+
+		if (!found && space->id <= id)
+			continue;
+
+		if (!space->stop_new_ops) {
+			/* inc reference to prevent drop */
+			out_id = space->id;
+			break;
+		}
+	}
+
+	mutex_exit(&fil_system->mutex);
+
+	return out_id;
+}
+
+
+/********************************************************************//**
+Find correct node from file space
+@return node */
+static
+fil_node_t*
+fil_space_get_node(
+	fil_space_t*	space,		/*!< in: file spage */
+	ulint 		space_id,	/*!< in: space id   */
+	ulint* 		block_offset,	/*!< in/out: offset in number of blocks */
+	ulint 		byte_offset,	/*!< in: remainder of offset in bytes; in
+					aio this must be divisible by the OS block
+					size */
+	ulint 		len)		/*!< in: how many bytes to read or write; this
+					must not cross a file boundary; in aio this
+					must be a block size multiple */
+{
+	fil_node_t*	node;
+	ut_ad(mutex_own(&fil_system->mutex));
+
+	node = UT_LIST_GET_FIRST(space->chain);
+
+	for (;;) {
+		if (node == NULL) {
+			return(NULL);
+		} else if (fil_is_user_tablespace_id(space->id)
+			   && node->size == 0) {
+
+			/* We do not know the size of a single-table tablespace
+			before we open the file */
+			break;
+		} else if (node->size > *block_offset) {
+			/* Found! */
+			break;
+		} else {
+			*block_offset -= node->size;
+			node = UT_LIST_GET_NEXT(chain, node);
+		}
+	}
+
+	return (node);
+}
+
+/********************************************************************//**
+Return block size of node in file space
+@return file block size */
+UNIV_INTERN
+ulint
+fil_space_get_block_size(
+/*=====================*/
+	ulint	space_id,
+	ulint	block_offset,
+	ulint	len)
+{
+	ulint block_size = 512;
+	ut_ad(!mutex_own(&fil_system->mutex));
+
+	mutex_enter(&fil_system->mutex);
+	fil_space_t* space = fil_space_get_space(space_id);
+
+	if (space) {
+		fil_node_t* node = fil_space_get_node(space, space_id, &block_offset, 0, len);
+
+		if (node) {
+			block_size = node->block_size;
+		}
+	}
+
+	/* Currently supporting block size up to 4K,
+	fall back to default if bigger requested. */
+	if (block_size > 4096) {
+		block_size = 512;
+	}
+
+	mutex_exit(&fil_system->mutex);
+
+	return block_size;
+}
+
+/*******************************************************************//**
+Returns the table space by a given id, NULL if not found. */
+fil_space_t*
+fil_space_found_by_id(
+/*==================*/
+	ulint	id)	/*!< in: space id */
+{
+	fil_space_t* space = NULL;
+	mutex_enter(&fil_system->mutex);
+	space = fil_space_get_by_id(id);
+
+	/* Not found if space is being deleted */
+	if (space && space->stop_new_ops) {
+		space = NULL;
+	}
+
+	mutex_exit(&fil_system->mutex);
+	return space;
+}
+
+/****************************************************************//**
+Acquire fil_system mutex */
+void
+fil_system_enter(void)
+/*==================*/
+{
+	ut_ad(!mutex_own(&fil_system->mutex));
+	mutex_enter(&fil_system->mutex);
+}
+
+/****************************************************************//**
+Release fil_system mutex */
+void
+fil_system_exit(void)
+/*=================*/
+{
+	ut_ad(mutex_own(&fil_system->mutex));
+	mutex_exit(&fil_system->mutex);
+}
diff --git a/storage/innobase/fil/fil0pagecompress.cc b/storage/innobase/fil/fil0pagecompress.cc
index 5c6ef3bfd0d..4479c06f1b2 100644
--- a/storage/innobase/fil/fil0pagecompress.cc
+++ b/storage/innobase/fil/fil0pagecompress.cc
@@ -52,7 +52,6 @@ Updated 14/02/2015
 # include "buf0lru.h"
 # include "ibuf0ibuf.h"
 # include "sync0sync.h"
-# include "os0sync.h"
 #else /* !UNIV_HOTBACKUP */
 # include "srv0srv.h"
 static ulint srv_data_read, srv_data_written;
@@ -119,10 +118,10 @@ fil_compress_page(
 
 	if (!out_buf) {
 		allocated = true;
-		out_buf = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE));
+		out_buf = static_cast<byte *>(ut_malloc_nokey(UNIV_PAGE_SIZE));
 #ifdef HAVE_LZO
 		if (comp_method == PAGE_LZO_ALGORITHM) {
-			lzo_mem = static_cast<byte *>(ut_malloc(LZO1X_1_15_MEM_COMPRESS));
+			lzo_mem = static_cast<byte *>(ut_malloc_nokey(LZO1X_1_15_MEM_COMPRESS));
 			memset(lzo_mem, 0, LZO1X_1_15_MEM_COMPRESS);
 		}
 #endif
@@ -173,20 +172,6 @@ fil_compress_page(
 		write_size = err;
 
 		if (err == 0) {
-			/* If error we leave the actual page as it was */
-
-#ifndef UNIV_PAGECOMPRESS_DEBUG
-			if (space->printed_compression_failure == false) {
-#endif
-				ib_logf(IB_LOG_LEVEL_WARN,
-					"Compression failed for space %lu name %s len %lu rt %d write %lu.",
-					space_id, fil_space_name(space), len, err, write_size);
-				space->printed_compression_failure = true;
-#ifndef UNIV_PAGECOMPRESS_DEBUG
-			}
-#endif
-			srv_stats.pages_page_compression_error.inc();
-			*out_len = len;
 			goto err_exit;
 		}
 		break;
@@ -197,15 +182,6 @@ fil_compress_page(
 			buf, len, out_buf+header_len, &write_size, lzo_mem);
 
 		if (err != LZO_E_OK || write_size > UNIV_PAGE_SIZE-header_len) {
-			if (space->printed_compression_failure == false) {
-				ib_logf(IB_LOG_LEVEL_WARN,
-					"Compression failed for space %lu name %s len %lu err %d write_size %lu.",
-					space_id, fil_space_name(space), len, err, write_size);
-				space->printed_compression_failure = true;
-			}
-
-			srv_stats.pages_page_compression_error.inc();
-			*out_len = len;
 			goto err_exit;
 		}
 
@@ -226,15 +202,7 @@ fil_compress_page(
 			(size_t)write_size);
 
 		if (err != LZMA_OK || out_pos > UNIV_PAGE_SIZE-header_len) {
-			if (space->printed_compression_failure == false) {
-				ib_logf(IB_LOG_LEVEL_WARN,
-					"Compression failed for space %lu name %s len %lu err %d write_size %lu",
-					space_id, fil_space_name(space), len, err, out_pos);
-				space->printed_compression_failure = true;
-			}
-
-			srv_stats.pages_page_compression_error.inc();
-			*out_len = len;
+			write_size = out_pos;
 			goto err_exit;
 		}
 
@@ -257,15 +225,6 @@ fil_compress_page(
 			0);
 
 		if (err != BZ_OK || write_size > UNIV_PAGE_SIZE-header_len) {
-			if (space->printed_compression_failure == false) {
-				ib_logf(IB_LOG_LEVEL_WARN,
-					"Compression failed for space %lu name %s len %lu err %d write_size %lu.",
-					space_id, fil_space_name(space), len, err, write_size);
-				space->printed_compression_failure = true;
-			}
-
-			srv_stats.pages_page_compression_error.inc();
-			*out_len = len;
 			goto err_exit;
 		}
 		break;
@@ -284,15 +243,7 @@ fil_compress_page(
 			(size_t*)&write_size);
 
 		if (cstatus != SNAPPY_OK || write_size > UNIV_PAGE_SIZE-header_len) {
-			if (space->printed_compression_failure == false) {
-				ib_logf(IB_LOG_LEVEL_WARN,
-					"Compression failed for space %lu name %s len %lu err %d write_size %lu.",
-					space_id, fil_space_name(space), len, (int)cstatus, write_size);
-				space->printed_compression_failure = true;
-			}
-
-			srv_stats.pages_page_compression_error.inc();
-			*out_len = len;
+			err = (int)cstatus;
 			goto err_exit;
 		}
 		break;
@@ -303,17 +254,6 @@ fil_compress_page(
 		err = compress2(out_buf+header_len, (ulong*)&write_size, buf, len, comp_level);
 
 		if (err != Z_OK) {
-			/* If error we leave the actual page as it was */
-
-			if (space->printed_compression_failure == false) {
-				ib_logf(IB_LOG_LEVEL_WARN,
-					"Compression failed for space %lu name %s len %lu rt %d write %lu.",
-					space_id, fil_space_name(space), len, err, write_size);
-				space->printed_compression_failure = true;
-			}
-
-			srv_stats.pages_page_compression_error.inc();
-			*out_len = len;
 			goto err_exit;
 		}
 		break;
@@ -360,14 +300,16 @@ fil_compress_page(
 		byte *comp_page;
 		byte *uncomp_page;
 
-		comp_page = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE));
-		uncomp_page = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE));
+		comp_page = static_cast<byte *>(ut_malloc_nokey(UNIV_PAGE_SIZE));
+		uncomp_page = static_cast<byte *>(ut_malloc_nokey(UNIV_PAGE_SIZE));
 		memcpy(comp_page, out_buf, UNIV_PAGE_SIZE);
+		bool tsfound;
+		const page_size_t page_size = fil_space_get_page_size(space_id, &tsfound);
 
 		fil_decompress_page(uncomp_page, comp_page, len, NULL);
 
-		if(buf_page_is_corrupted(false, uncomp_page, 0)) {
-			buf_page_print(uncomp_page, 0, BUF_PAGE_PRINT_NO_CRASH);
+		if(buf_page_is_corrupted(false, uncomp_page, page_size, false)) {
+			buf_page_print(uncomp_page, page_size, BUF_PAGE_PRINT_NO_CRASH);
 			ut_error;
 		}
 
@@ -423,6 +365,26 @@ fil_compress_page(
 	}
 
 err_exit:
+	/* If error we leave the actual page as it was */
+
+#ifndef UNIV_PAGECOMPRESS_DEBUG
+	if (space && space->printed_compression_failure == false) {
+#endif
+		ib::warn() << "Compression failed for space: "
+			   << space_id << " name: "
+			   << fil_space_name(space) << " len: "
+			   << len << " err: " << err << " write_size: "
+			   << write_size
+			   << " compression method: "
+			   << fil_get_compression_alg_name(comp_method)
+			   << ".";
+		space->printed_compression_failure = true;
+#ifndef UNIV_PAGECOMPRESS_DEBUG
+	}
+#endif
+	srv_stats.pages_page_compression_error.inc();
+	*out_len = len;
+
 	if (allocated) {
 		ut_free(out_buf);
 #ifdef HAVE_LZO
@@ -472,13 +434,13 @@ fil_decompress_page(
 	/* Do not try to uncompressed pages that are not compressed */
 	if (ptype !=  FIL_PAGE_PAGE_COMPRESSED &&
 		ptype != FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED &&
-		ptype != FIL_PAGE_TYPE_COMPRESSED) {
+		ptype != FIL_PAGE_COMPRESSED) {
 		return;
 	}
 
 	// If no buffer was given, we need to allocate temporal buffer
 	if (page_buf == NULL) {
-		in_buf = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE));
+		in_buf = static_cast<byte *>(ut_malloc_nokey(UNIV_PAGE_SIZE));
 		memset(in_buf, 0, UNIV_PAGE_SIZE);
 	} else {
 		in_buf = page_buf;
@@ -489,13 +451,13 @@ fil_decompress_page(
 	if (mach_read_from_4(buf+FIL_PAGE_SPACE_OR_CHKSUM) != BUF_NO_CHECKSUM_MAGIC ||
 		(ptype != FIL_PAGE_PAGE_COMPRESSED &&
 		 ptype != FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED)) {
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Corruption: We try to uncompress corrupted page"
-			" CRC %lu type %lu len %lu.",
-			mach_read_from_4(buf+FIL_PAGE_SPACE_OR_CHKSUM),
-			mach_read_from_2(buf+FIL_PAGE_TYPE), len);
+		ib::error() << "Corruption: We try to uncompress corrupted page:"
+			    << " CRC "
+			    << mach_read_from_4(buf+FIL_PAGE_SPACE_OR_CHKSUM)
+			    << " page_type "
+			    << mach_read_from_2(buf+FIL_PAGE_TYPE)
+			    << " page len " << len << ".";
 
-		fflush(stderr);
 		if (return_error) {
 			goto error_return;
 		}
@@ -513,11 +475,11 @@ fil_decompress_page(
 	actual_size = mach_read_from_2(buf+FIL_PAGE_DATA);
 	/* Check if payload size is corrupted */
 	if (actual_size == 0 || actual_size > UNIV_PAGE_SIZE) {
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Corruption: We try to uncompress corrupted page"
-			" actual size %lu compression %s.",
-			actual_size, fil_get_compression_alg_name(compression_alg));
-		fflush(stderr);
+		ib::error() << "Corruption: We try to uncompress corrupted page"
+			    << " actual size: " << actual_size
+			    << " compression method: "
+			    << fil_get_compression_alg_name(compression_alg)
+			    << ".";
 		if (return_error) {
 			goto error_return;
 		}
@@ -531,31 +493,20 @@ fil_decompress_page(
 	}
 
 #ifdef UNIV_PAGECOMPRESS_DEBUG
-	ib_logf(IB_LOG_LEVEL_INFO,
-		"Preparing for decompress for len %lu\n",
-		actual_size);
+	ib::info() << "Preparing for decompress for len "
+		   << actual_size << ".";
 #endif /* UNIV_PAGECOMPRESS_DEBUG */
 
-
 	switch(compression_alg) {
 	case PAGE_ZLIB_ALGORITHM:
 		err= uncompress(in_buf, &len, buf+header_len, (unsigned long)actual_size);
 
 		/* If uncompress fails it means that page is corrupted */
 		if (err != Z_OK) {
-
-			ib_logf(IB_LOG_LEVEL_ERROR,
-				"Corruption: Page is marked as compressed"
-				" but uncompress failed with error %d "
-				" size %lu len %lu.",
-				err, actual_size, len);
-
-			fflush(stderr);
-
+			goto err_exit;
 			if (return_error) {
 				goto error_return;
 			}
-			ut_error;
 		}
 		break;
 
@@ -564,17 +515,10 @@ fil_decompress_page(
 		err = LZ4_decompress_fast((const char *)buf+header_len, (char *)in_buf, len);
 
 		if (err != (int)actual_size) {
-			ib_logf(IB_LOG_LEVEL_ERROR,
-				"Corruption: Page is marked as compressed"
-				" but decompression read only %d bytes "
-				" size %lu len %lu.",
-				err, actual_size, len);
-			fflush(stderr);
-
+			goto err_exit;
 			if (return_error) {
 				goto error_return;
 			}
-			ut_error;
 		}
 		break;
 #endif /* HAVE_LZ4 */
@@ -585,17 +529,11 @@ fil_decompress_page(
 			actual_size,(unsigned char *)in_buf, &olen, NULL);
 
 		if (err != LZO_E_OK || (olen == 0 || olen > UNIV_PAGE_SIZE)) {
-			ib_logf(IB_LOG_LEVEL_ERROR,
-				"Corruption: Page is marked as compressed"
-				" but decompression read only %ld bytes"
-				" size %lu len %lu.",
-				olen, actual_size, len);
-			fflush(stderr);
-
+			len = olen;
+			goto err_exit;
 			if (return_error) {
 				goto error_return;
 			}
-			ut_error;
 		}
 		break;
         }
@@ -621,17 +559,11 @@ fil_decompress_page(
 
 
 		if (ret != LZMA_OK || (dst_pos == 0 || dst_pos > UNIV_PAGE_SIZE)) {
-			ib_logf(IB_LOG_LEVEL_ERROR,
-				"Corruption: Page is marked as compressed"
-				" but decompression read only %ld bytes"
-				" size %lu len %lu.",
-				dst_pos, actual_size, len);
-			fflush(stderr);
-
+			len = dst_pos;
+			goto err_exit;
 			if (return_error) {
 				goto error_return;
 			}
-			ut_error;
 		}
 
 		break;
@@ -650,17 +582,11 @@ fil_decompress_page(
 			0);
 
 		if (err != BZ_OK || (dst_pos == 0 || dst_pos > UNIV_PAGE_SIZE)) {
-			ib_logf(IB_LOG_LEVEL_ERROR,
-				"Corruption: Page is marked as compressed"
-				" but decompression read only %du bytes"
-				" size %lu len %lu err %d.",
-				dst_pos, actual_size, len, err);
-			fflush(stderr);
-
+			len = dst_pos;
+			goto err_exit;
 			if (return_error) {
 				goto error_return;
 			}
-			ut_error;
 		}
 		break;
 	}
@@ -678,33 +604,21 @@ fil_decompress_page(
 			(size_t*)&olen);
 
 		if (cstatus != SNAPPY_OK || (olen == 0 || olen > UNIV_PAGE_SIZE)) {
-			ib_logf(IB_LOG_LEVEL_ERROR,
-				"Corruption: Page is marked as compressed"
-				" but decompression read only %lu bytes"
-				" size %lu len %lu err %d.",
-				olen, actual_size, len, (int)cstatus);
-			fflush(stderr);
-
+			err = (int)cstatus;
+			len = olen;
+			goto err_exit;
 			if (return_error) {
 				goto error_return;
 			}
-			ut_error;
 		}
 		break;
 	}
 #endif /* HAVE_SNAPPY */
 	default:
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Corruption: Page is marked as compressed"
-			" but compression algorithm %s"
-			" is not known."
-			,fil_get_compression_alg_name(compression_alg));
-
-		fflush(stderr);
+		goto err_exit;
 		if (return_error) {
 			goto error_return;
 		}
-		ut_error;
 		break;
 	}
 
@@ -719,4 +633,30 @@ error_return:
 	if (page_buf == NULL) {
 		ut_free(in_buf);
 	}
+
+	return;
+
+err_exit:
+	/* Note that as we have found the page is corrupted, so
+	all this could be incorrect. */
+	ulint space_id = mach_read_from_4(buf+FIL_PAGE_SPACE_ID);
+	fil_system_enter();
+	fil_space_t* space = fil_space_get_by_id(space_id);
+	fil_system_exit();
+
+	bool tsfound;
+	const page_size_t page_size = fil_space_get_page_size(space_id, &tsfound);
+
+	ib::error() << "Corruption: Page is marked as compressed"
+		    << " space: " <<  space_id << " name: "
+		    << (space ? fil_space_name(space) : "NULL")
+		    << " but uncompress failed with error: " << err
+		    << " size: " << actual_size
+		    << " len: " << len
+		    << " compression method: "
+		    << fil_get_compression_alg_name(compression_alg) << ".";
+
+	buf_page_print(buf, page_size, BUF_PAGE_PRINT_NO_CRASH);
+
+	ut_error;
 }
diff --git a/storage/innobase/fsp/fsp0file.cc b/storage/innobase/fsp/fsp0file.cc
new file mode 100644
index 00000000000..501a8e58622
--- /dev/null
+++ b/storage/innobase/fsp/fsp0file.cc
@@ -0,0 +1,1232 @@
+/*****************************************************************************
+
+Copyright (c) 2013, 2016, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file fsp/fsp0file.cc
+Tablespace data file implementation
+
+Created 2013-7-26 by Kevin Lewis
+*******************************************************/
+
+#include "ha_prototypes.h"
+
+#include "fil0fil.h"
+#include "fsp0types.h"
+#include "fsp0sysspace.h"
+#include "os0file.h"
+#include "page0page.h"
+#include "srv0start.h"
+#include "ut0new.h"
+#include "fil0crypt.h"
+#ifdef UNIV_HOTBACKUP
+#include "my_sys.h"
+#endif /* UNIV_HOTBACKUP */
+
+/** Initialize the name, size and order of this datafile
+@param[in]	name	tablespace name, will be copied
+@param[in]	flags	tablespace flags */
+void
+Datafile::init(
+	const char*	name,
+	ulint		flags)
+{
+	ut_ad(m_name == NULL);
+	ut_ad(name != NULL);
+
+	m_name = mem_strdup(name);
+	m_flags = flags;
+	m_encryption_key = NULL;
+	m_encryption_iv = NULL;
+}
+
+/** Release the resources. */
+void
+Datafile::shutdown()
+{
+	close();
+
+	ut_free(m_name);
+	m_name = NULL;
+
+	free_filepath();
+
+	if (m_encryption_key != NULL) {
+		ut_free(m_encryption_key);
+		m_encryption_key = NULL;
+	}
+
+	if (m_crypt_info) {
+		fil_space_destroy_crypt_data(&m_crypt_info);
+	}
+
+	if (m_encryption_iv != NULL) {
+		ut_free(m_encryption_iv);
+		m_encryption_iv = NULL;
+	}
+
+	free_first_page();
+}
+
+/** Create/open a data file.
+@param[in]	read_only_mode	if true, then readonly mode checks are enforced.
+@return DB_SUCCESS or error code */
+dberr_t
+Datafile::open_or_create(bool read_only_mode)
+{
+	bool success;
+	ut_a(m_filepath != NULL);
+	ut_ad(m_handle == OS_FILE_CLOSED);
+
+	m_handle = os_file_create(
+		innodb_data_file_key, m_filepath, m_open_flags,
+		OS_FILE_NORMAL, OS_DATA_FILE, read_only_mode, &success);
+
+	if (!success) {
+		m_last_os_error = os_file_get_last_error(true);
+		ib::error() << "Cannot open datafile '" << m_filepath << "'";
+		return(DB_CANNOT_OPEN_FILE);
+	}
+
+	return(DB_SUCCESS);
+}
+
+/** Open a data file in read-only mode to check if it exists so that it
+can be validated.
+@param[in]	strict	whether to issue error messages
+@return DB_SUCCESS or error code */
+dberr_t
+Datafile::open_read_only(bool strict)
+{
+	bool	success = false;
+	ut_ad(m_handle == OS_FILE_CLOSED);
+
+	/* This function can be called for file objects that do not need
+	to be opened, which is the case when the m_filepath is NULL */
+	if (m_filepath == NULL) {
+		return(DB_ERROR);
+	}
+
+	set_open_flags(OS_FILE_OPEN);
+	m_handle = os_file_create_simple_no_error_handling(
+		innodb_data_file_key, m_filepath, m_open_flags,
+		OS_FILE_READ_ONLY, true, &success);
+
+	if (success) {
+		m_exists = true;
+		init_file_info();
+
+		return(DB_SUCCESS);
+	}
+
+	if (strict) {
+		m_last_os_error = os_file_get_last_error(true);
+		ib::error() << "Cannot open datafile for read-only: '"
+			<< m_filepath << "' OS error: " << m_last_os_error;
+	}
+
+	return(DB_CANNOT_OPEN_FILE);
+}
+
+/** Open a data file in read-write mode during start-up so that
+doublewrite pages can be restored and then it can be validated.*
+@param[in]	read_only_mode	if true, then readonly mode checks are enforced.
+@return DB_SUCCESS or error code */
+dberr_t
+Datafile::open_read_write(bool read_only_mode)
+{
+	bool	success = false;
+	ut_ad(m_handle == OS_FILE_CLOSED);
+
+	/* This function can be called for file objects that do not need
+	to be opened, which is the case when the m_filepath is NULL */
+	if (m_filepath == NULL) {
+		return(DB_ERROR);
+	}
+
+	set_open_flags(OS_FILE_OPEN);
+	m_handle = os_file_create_simple_no_error_handling(
+		innodb_data_file_key, m_filepath, m_open_flags,
+		OS_FILE_READ_WRITE, read_only_mode, &success);
+
+	if (!success) {
+		m_last_os_error = os_file_get_last_error(true);
+		ib::error() << "Cannot open datafile for read-write: '"
+			<< m_filepath << "'";
+		return(DB_CANNOT_OPEN_FILE);
+	}
+
+	m_exists = true;
+
+	init_file_info();
+
+	return(DB_SUCCESS);
+}
+
+/** Initialize OS specific file info. */
+void
+Datafile::init_file_info()
+{
+#ifdef _WIN32
+	GetFileInformationByHandle(m_handle, &m_file_info);
+#else
+	fstat(m_handle, &m_file_info);
+#endif	/* WIN32 */
+}
+
+/** Close a data file.
+@return DB_SUCCESS or error code */
+dberr_t
+Datafile::close()
+{
+	if (m_handle != OS_FILE_CLOSED) {
+		ibool	success = os_file_close(m_handle);
+		ut_a(success);
+
+		m_handle = OS_FILE_CLOSED;
+	}
+
+	return(DB_SUCCESS);
+}
+
+/** Make a full filepath from a directory path and a filename.
+Prepend the dirpath to filename using the extension given.
+If dirpath is NULL, prepend the default datadir to filepath.
+Store the result in m_filepath.
+@param[in]	dirpath		directory path
+@param[in]	filename	filename or filepath
+@param[in]	ext		filename extension */
+void
+Datafile::make_filepath(
+	const char*	dirpath,
+	const char*	filename,
+	ib_extention	ext)
+{
+	ut_ad(dirpath != NULL || filename != NULL);
+
+	free_filepath();
+
+	m_filepath = fil_make_filepath(dirpath, filename, ext, false);
+
+	ut_ad(m_filepath != NULL);
+
+	set_filename();
+}
+
+/** Set the filepath by duplicating the filepath sent in. This is the
+name of the file with its extension and absolute or relative path.
+@param[in]	filepath	filepath to set */
+void
+Datafile::set_filepath(const char* filepath)
+{
+	free_filepath();
+	m_filepath = static_cast<char*>(ut_malloc_nokey(strlen(filepath) + 1));
+	::strcpy(m_filepath, filepath);
+	set_filename();
+}
+
+/** Free the filepath buffer. */
+void
+Datafile::free_filepath()
+{
+	if (m_filepath != NULL) {
+		ut_free(m_filepath);
+		m_filepath = NULL;
+		m_filename = NULL;
+	}
+}
+
+/** Do a quick test if the filepath provided looks the same as this filepath
+byte by byte. If they are two different looking paths to the same file,
+same_as() will be used to show that after the files are opened.
+@param[in]	other	filepath to compare with
+@retval true if it is the same filename by byte comparison
+@retval false if it looks different */
+bool
+Datafile::same_filepath_as(
+	const char* other) const
+{
+	return(0 == strcmp(m_filepath, other));
+}
+
+/** Test if another opened datafile is the same file as this object.
+@param[in]	other	Datafile to compare with
+@return true if it is the same file, else false */
+bool
+Datafile::same_as(
+	const Datafile&	other) const
+{
+#ifdef _WIN32
+	return(m_file_info.dwVolumeSerialNumber
+	       == other.m_file_info.dwVolumeSerialNumber
+	       && m_file_info.nFileIndexHigh
+	          == other.m_file_info.nFileIndexHigh
+	       && m_file_info.nFileIndexLow
+	          == other.m_file_info.nFileIndexLow);
+#else
+	return(m_file_info.st_ino == other.m_file_info.st_ino
+	       && m_file_info.st_dev == other.m_file_info.st_dev);
+#endif /* WIN32 */
+}
+
+/** Allocate and set the datafile or tablespace name in m_name.
+If a name is provided, use it; else if the datafile is file-per-table,
+extract a file-per-table tablespace name from m_filepath; else it is a
+general tablespace, so just call it that for now. The value of m_name
+will be freed in the destructor.
+@param[in]	name	tablespace name if known, NULL if not */
+void
+Datafile::set_name(const char*	name)
+{
+	ut_free(m_name);
+
+	if (name != NULL) {
+		m_name = mem_strdup(name);
+	} else if (fsp_is_file_per_table(m_space_id, m_flags)) {
+		m_name = fil_path_to_space_name(m_filepath);
+	} else {
+		/* Give this general tablespace a temporary name. */
+		m_name = static_cast<char*>(
+			ut_malloc_nokey(strlen(general_space_name) + 20));
+
+		sprintf(m_name, "%s_" ULINTPF, general_space_name, m_space_id);
+	}
+}
+
+/** Reads a few significant fields from the first page of the first
+datafile.  The Datafile must already be open.
+@param[in]	read_only_mode	If true, then readonly mode checks are enforced.
+@return DB_SUCCESS or DB_IO_ERROR if page cannot be read */
+dberr_t
+Datafile::read_first_page(bool read_only_mode)
+{
+	if (m_handle == OS_FILE_CLOSED) {
+
+		dberr_t err = open_or_create(read_only_mode);
+
+		if (err != DB_SUCCESS) {
+			return(err);
+		}
+	}
+
+	m_first_page_buf = static_cast<byte*>(
+		ut_malloc_nokey(2 * UNIV_PAGE_SIZE_MAX));
+
+	/* Align the memory for a possible read from a raw device */
+
+	m_first_page = static_cast<byte*>(
+		ut_align(m_first_page_buf, UNIV_PAGE_SIZE));
+
+	IORequest	request;
+	dberr_t		err = DB_ERROR;
+	size_t		page_size = UNIV_PAGE_SIZE_MAX;
+
+	/* Don't want unnecessary complaints about partial reads. */
+
+	request.disable_partial_io_warnings();
+
+	while (page_size >= UNIV_PAGE_SIZE_MIN) {
+
+		ulint	n_read = 0;
+
+		err = os_file_read_no_error_handling(
+			request, m_handle, m_first_page, 0, page_size, &n_read);
+
+		if (err == DB_IO_ERROR && n_read >= UNIV_PAGE_SIZE_MIN) {
+
+			page_size >>= 1;
+
+		} else if (err == DB_SUCCESS) {
+
+			ut_a(n_read == page_size);
+
+			break;
+
+		} else {
+
+			ib::error()
+				<< "Cannot read first page of '"
+				<< m_filepath << "' "
+				<< ut_strerr(err);
+			break;
+		}
+	}
+
+	if (err == DB_SUCCESS && m_order == 0) {
+
+		m_flags = fsp_header_get_flags(m_first_page);
+
+		m_space_id = fsp_header_get_space_id(m_first_page);
+	}
+
+	const page_size_t page_sz = fsp_header_get_page_size(m_first_page);
+	ulint offset = fsp_header_get_crypt_offset(page_sz, NULL);
+	m_crypt_info = fil_space_read_crypt_data(m_space_id, m_first_page, offset);
+
+	return(err);
+}
+
+/** Free the first page from memory when it is no longer needed. */
+void
+Datafile::free_first_page()
+{
+	if (m_first_page_buf) {
+		ut_free(m_first_page_buf);
+		m_first_page_buf = NULL;
+		m_first_page = NULL;
+	}
+}
+
+/** Validates the datafile and checks that it conforms with the expected
+space ID and flags.  The file should exist and be successfully opened
+in order for this function to validate it.
+@param[in]	space_id	The expected tablespace ID.
+@param[in]	flags		The expected tablespace flags.
+@param[in]	for_import	if it is for importing
+@retval DB_SUCCESS if tablespace is valid, DB_ERROR if not.
+m_is_valid is also set true on success, else false. */
+dberr_t
+Datafile::validate_to_dd(
+	ulint		space_id,
+	ulint		flags,
+	bool		for_import)
+{
+	dberr_t err;
+
+	if (!is_open()) {
+		return DB_ERROR;
+	}
+
+	/* Validate this single-table-tablespace with the data dictionary,
+	but do not compare the DATA_DIR flag, in case the tablespace was
+	remotely located. */
+	err = validate_first_page(0, for_import);
+	if (err != DB_SUCCESS) {
+		return(err);
+	}
+
+	/* Make sure the datafile we found matched the space ID.
+	If the datafile is a file-per-table tablespace then also match
+	the row format and zip page size. */
+	if (m_space_id == space_id
+	    && (m_flags & FSP_FLAGS_MASK_SHARED
+	        || (m_flags & ~FSP_FLAGS_MASK_DATA_DIR)
+	            == (flags & ~FSP_FLAGS_MASK_DATA_DIR))) {
+		/* Datafile matches the tablespace expected. */
+		return(DB_SUCCESS);
+	}
+
+	/* else do not use this tablespace. */
+	m_is_valid = false;
+
+	ib::error() << "In file '" << m_filepath << "', tablespace id and"
+		" flags are " << m_space_id << " and " << m_flags << ", but in"
+		" the InnoDB data dictionary they are " << space_id << " and "
+		<< flags << ". Have you moved InnoDB .ibd files around without"
+		" using the commands DISCARD TABLESPACE and IMPORT TABLESPACE?"
+		" " << TROUBLESHOOT_DATADICT_MSG;
+
+	return(DB_ERROR);
+}
+
+/** Validates this datafile for the purpose of recovery.  The file should
+exist and be successfully opened. We initially open it in read-only mode
+because we just want to read the SpaceID.  However, if the first page is
+corrupt and needs to be restored from the doublewrite buffer, we will
+reopen it in write mode and ry to restore that page.
+@retval DB_SUCCESS if tablespace is valid, DB_ERROR if not.
+m_is_valid is also set true on success, else false. */
+dberr_t
+Datafile::validate_for_recovery()
+{
+	dberr_t err;
+
+	ut_ad(is_open());
+	ut_ad(!srv_read_only_mode);
+
+	err = validate_first_page(0, false);
+
+	switch (err) {
+	case DB_SUCCESS:
+	case DB_TABLESPACE_EXISTS:
+#ifdef UNIV_HOTBACKUP
+		err = restore_from_doublewrite(0);
+		if (err != DB_SUCCESS) {
+			return(err);
+		}
+		/* Free the previously read first page and then re-validate. */
+		free_first_page();
+		err = validate_first_page(0, false);
+		if (err == DB_SUCCESS) {
+			std::string filepath = fil_space_get_first_path(
+				m_space_id);
+			if (is_intermediate_file(filepath.c_str())) {
+				/* Existing intermediate file with same space
+				id is obsolete.*/
+				if (fil_space_free(m_space_id, FALSE)) {
+					err = DB_SUCCESS;
+				}
+		} else {
+			filepath.assign(m_filepath);
+			if (is_intermediate_file(filepath.c_str())) {
+				/* New intermediate file with same space id
+				shall be ignored.*/
+				err = DB_TABLESPACE_EXISTS;
+				/* Set all bits of 'flags' as a special
+				indicator for "ignore tablespace". Hopefully
+				InnoDB will never use all bits or at least all
+				bits set will not be a meaningful setting
+				otherwise.*/
+				m_flags = ~0;
+			}
+		}
+		}
+#endif /* UNIV_HOTBACKUP */
+		break;
+
+	default:
+		/* For encryption tablespace, we skip the retry step,
+		since it is only because the keyring is not ready. */
+		if (FSP_FLAGS_GET_ENCRYPTION(m_flags)) {
+			return(err);
+		}
+		/* Re-open the file in read-write mode  Attempt to restore
+		page 0 from doublewrite and read the space ID from a survey
+		of the first few pages. */
+		close();
+		err = open_read_write(srv_read_only_mode);
+		if (err != DB_SUCCESS) {
+			ib::error() << "Datafile '" << m_filepath << "' could not"
+				" be opened in read-write mode so that the"
+				" doublewrite pages could be restored.";
+			return(err);
+		};
+
+		err = find_space_id();
+		if (err != DB_SUCCESS || m_space_id == 0) {
+			ib::error() << "Datafile '" << m_filepath << "' is"
+				" corrupted. Cannot determine the space ID from"
+				" the first 64 pages.";
+			return(err);
+		}
+
+		err = restore_from_doublewrite(0);
+		if (err != DB_SUCCESS) {
+			return(err);
+		}
+
+		/* Free the previously read first page and then re-validate. */
+		free_first_page();
+		err = validate_first_page(0, false);
+	}
+
+	if (err == DB_SUCCESS) {
+		set_name(NULL);
+	}
+
+	return(err);
+}
+
+/** Check the consistency of the first page of a datafile when the
+tablespace is opened.  This occurs before the fil_space_t is created
+so the Space ID found here must not already be open.
+m_is_valid is set true on success, else false.
+@param[out]	flush_lsn	contents of FIL_PAGE_FILE_FLUSH_LSN
+@param[in]	for_import	if it is for importing
+(only valid for the first file of the system tablespace)
+@retval DB_SUCCESS on if the datafile is valid
+@retval DB_CORRUPTION if the datafile is not readable
+@retval DB_TABLESPACE_EXISTS if there is a duplicate space_id */
+dberr_t
+Datafile::validate_first_page(lsn_t*	flush_lsn,
+			      bool	for_import)
+{
+	char*		prev_name;
+	char*		prev_filepath;
+	const char*	error_txt = NULL;
+
+	m_is_valid = true;
+
+	if (m_first_page == NULL
+	    && read_first_page(srv_read_only_mode) != DB_SUCCESS) {
+
+		error_txt = "Cannot read first page";
+	} else {
+		ut_ad(m_first_page_buf);
+		ut_ad(m_first_page);
+
+		if (flush_lsn != NULL) {
+
+			*flush_lsn = mach_read_from_8(
+				m_first_page + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
+		}
+	}
+
+	/* Check if the whole page is blank. */
+	if (error_txt == NULL
+	    && m_space_id == srv_sys_space.space_id()
+	    && !m_flags) {
+		const byte*	b		= m_first_page;
+		ulint		nonzero_bytes	= UNIV_PAGE_SIZE;
+
+		while (*b == '\0' && --nonzero_bytes != 0) {
+
+			b++;
+		}
+
+		if (nonzero_bytes == 0) {
+			error_txt = "Header page consists of zero bytes";
+		}
+	}
+
+	const page_size_t	page_size(m_flags);
+
+	if (error_txt != NULL) {
+
+		/* skip the next few tests */
+	} else if (univ_page_size.logical() != page_size.logical()) {
+
+		/* Page size must be univ_page_size. */
+
+		ib::error()
+			<< "Data file '" << m_filepath << "' uses page size "
+			<< page_size.logical() << ", but the innodb_page_size"
+			" start-up parameter is "
+			<< univ_page_size.logical();
+
+		free_first_page();
+
+		return(DB_ERROR);
+
+	} else if (page_get_page_no(m_first_page) != 0) {
+
+		/* First page must be number 0 */
+		error_txt = "Header page contains inconsistent data";
+
+	} else if (m_space_id == ULINT_UNDEFINED) {
+
+		/* The space_id can be most anything, except -1. */
+		error_txt = "A bad Space ID was found";
+
+	} else if (buf_page_is_corrupted(
+			false, m_first_page, page_size,
+			fsp_is_checksum_disabled(m_space_id))) {
+
+		/* Look for checksum and other corruptions. */
+		error_txt = "Checksum mismatch";
+	}
+
+	if (error_txt != NULL) {
+		ib::error() << error_txt << " in datafile: " << m_filepath
+			<< ", Space ID:" << m_space_id  << ", Flags: "
+			<< m_flags << ". " << TROUBLESHOOT_DATADICT_MSG;
+		m_is_valid = false;
+
+		free_first_page();
+
+		return(DB_CORRUPTION);
+
+	}
+
+#ifdef MYSQL_ENCRYPTION
+	/* For encrypted tablespace, check the encryption info in the
+	first page can be decrypt by master key, otherwise, this table
+	can't be open. And for importing, we skip checking it. */
+	if (FSP_FLAGS_GET_ENCRYPTION(m_flags) && !for_import) {
+		m_encryption_key = static_cast<byte*>(
+			ut_zalloc_nokey(ENCRYPTION_KEY_LEN));
+		m_encryption_iv = static_cast<byte*>(
+			ut_zalloc_nokey(ENCRYPTION_KEY_LEN));
+#ifdef	UNIV_ENCRYPT_DEBUG
+                fprintf(stderr, "Got from file %lu:", m_space_id);
+#endif
+		if (!fsp_header_get_encryption_key(m_flags,
+						   m_encryption_key,
+						   m_encryption_iv,
+						   m_first_page)) {
+			ib::error()
+				<< "Encryption information in"
+				<< " datafile: " << m_filepath
+				<< " can't be decrypted"
+				<< " , please confirm the keyfile"
+				<< " is match and keyring plugin"
+				<< " is loaded.";
+
+			m_is_valid = false;
+			free_first_page();
+			ut_free(m_encryption_key);
+			ut_free(m_encryption_iv);
+			m_encryption_key = NULL;
+			m_encryption_iv = NULL;
+			return(DB_CORRUPTION);
+		}
+
+		if (recv_recovery_is_on()
+		    && memcmp(m_encryption_key,
+			      m_encryption_iv,
+			      ENCRYPTION_KEY_LEN) == 0) {
+			ut_free(m_encryption_key);
+			ut_free(m_encryption_iv);
+			m_encryption_key = NULL;
+			m_encryption_iv = NULL;
+		}
+	}
+#endif /* MYSQL_ENCRYPTION */
+
+	if (fil_space_read_name_and_filepath(
+		m_space_id, &prev_name, &prev_filepath)) {
+
+		if (0 == strcmp(m_filepath, prev_filepath)) {
+			ut_free(prev_name);
+			ut_free(prev_filepath);
+			return(DB_SUCCESS);
+		}
+
+		/* Make sure the space_id has not already been opened. */
+		ib::error() << "Attempted to open a previously opened"
+			" tablespace. Previous tablespace " << prev_name
+			<< " at filepath: " << prev_filepath
+			<< " uses space ID: " << m_space_id
+			<< ". Cannot open filepath: " << m_filepath
+			<< " which uses the same space ID.";
+
+		ut_free(prev_name);
+		ut_free(prev_filepath);
+
+		m_is_valid = false;
+
+		free_first_page();
+
+		return(is_predefined_tablespace(m_space_id)
+		       ? DB_CORRUPTION
+		       : DB_TABLESPACE_EXISTS);
+	}
+
+	return(DB_SUCCESS);
+}
+
+/** Determine the space id of the given file descriptor by reading a few
+pages from the beginning of the .ibd file.
+@return DB_SUCCESS if space id was successfully identified, else DB_ERROR. */
+dberr_t
+Datafile::find_space_id()
+{
+	os_offset_t	file_size;
+
+	ut_ad(m_handle != OS_FILE_CLOSED);
+
+	file_size = os_file_get_size(m_handle);
+
+	if (file_size == (os_offset_t) -1) {
+		ib::error() << "Could not get file size of datafile '"
+			<< m_filepath << "'";
+		return(DB_CORRUPTION);
+	}
+
+	/* Assuming a page size, read the space_id from each page and store it
+	in a map.  Find out which space_id is agreed on by majority of the
+	pages.  Choose that space_id. */
+	for (ulint page_size = UNIV_ZIP_SIZE_MIN;
+	     page_size <= UNIV_PAGE_SIZE_MAX;
+	     page_size <<= 1) {
+
+		/* map[space_id] = count of pages */
+		typedef std::map<
+			ulint,
+			ulint,
+			std::less<ulint>,
+			ut_allocator<std::pair<const ulint, ulint> > >
+			Pages;
+
+		Pages	verify;
+		ulint	page_count = 64;
+		ulint	valid_pages = 0;
+
+		/* Adjust the number of pages to analyze based on file size */
+		while ((page_count * page_size) > file_size) {
+			--page_count;
+		}
+
+		ib::info()
+			<< "Page size:" << page_size
+			<< ". Pages to analyze:" << page_count;
+
+		byte*	buf = static_cast<byte*>(
+			ut_malloc_nokey(2 * UNIV_PAGE_SIZE_MAX));
+
+		byte*	page = static_cast<byte*>(
+			ut_align(buf, UNIV_SECTOR_SIZE));
+
+		for (ulint j = 0; j < page_count; ++j) {
+
+			dberr_t		err;
+			ulint		n_bytes = j * page_size;
+			IORequest	request(IORequest::READ);
+
+			err = os_file_read(
+				request, m_handle, page, n_bytes, page_size);
+
+			if (err == DB_IO_DECOMPRESS_FAIL) {
+
+				/* If the page was compressed on the fly then
+				try and decompress the page */
+
+				n_bytes = os_file_compressed_page_size(page);
+
+				if (n_bytes != ULINT_UNDEFINED) {
+
+					err = os_file_read(
+						request,
+						m_handle, page, page_size,
+						UNIV_PAGE_SIZE_MAX);
+
+					if (err != DB_SUCCESS) {
+
+						ib::info()
+							<< "READ FAIL: "
+							<< "page_no:" << j;
+						continue;
+					}
+				}
+
+			} else if (err != DB_SUCCESS) {
+
+				ib::info()
+					<< "READ FAIL: page_no:" << j;
+
+				continue;
+			}
+
+			bool	noncompressed_ok = false;
+
+			/* For noncompressed pages, the page size must be
+			equal to univ_page_size.physical(). */
+			if (page_size == univ_page_size.physical()) {
+				noncompressed_ok = !buf_page_is_corrupted(
+					false, page, univ_page_size, false);
+			}
+
+			bool	compressed_ok = false;
+
+			/* file-per-table tablespaces can be compressed with
+			the same physical and logical page size.  General
+			tablespaces must have different physical and logical
+			page sizes in order to be compressed. For this check,
+			assume the page is compressed if univ_page_size.
+			logical() is equal to or less than 16k and the
+			page_size we are checking is equal to or less than
+			univ_page_size.logical(). */
+			if (univ_page_size.logical() <= UNIV_PAGE_SIZE_DEF
+			    && page_size <= univ_page_size.logical()) {
+				const page_size_t	compr_page_size(
+					page_size, univ_page_size.logical(),
+					true);
+
+				compressed_ok = !buf_page_is_corrupted(
+					false, page, compr_page_size, false);
+			}
+
+			if (noncompressed_ok || compressed_ok) {
+
+				ulint	space_id = mach_read_from_4(page
+					+ FIL_PAGE_SPACE_ID);
+
+				if (space_id > 0) {
+
+					ib::info()
+						<< "VALID: space:"
+						<< space_id << " page_no:" << j
+						<< " page_size:" << page_size;
+
+					++valid_pages;
+
+					++verify[space_id];
+				}
+			}
+		}
+
+		ut_free(buf);
+
+		ib::info()
+			<< "Page size: " << page_size
+			<< ". Possible space_id count:" << verify.size();
+
+		const ulint	pages_corrupted = 3;
+
+		for (ulint missed = 0; missed <= pages_corrupted; ++missed) {
+
+			for (Pages::const_iterator it = verify.begin();
+			     it != verify.end();
+			     ++it) {
+
+				ib::info() << "space_id:" << it->first
+					<< ", Number of pages matched: "
+					<< it->second << "/" << valid_pages
+					<< " (" << page_size << ")";
+
+				if (it->second == (valid_pages - missed)) {
+					ib::info() << "Chosen space:"
+						<< it->first;
+
+					m_space_id = it->first;
+					return(DB_SUCCESS);
+				}
+			}
+
+		}
+	}
+
+	return(DB_CORRUPTION);
+}
+
+
+/** Finds a given page of the given space id from the double write buffer
+and copies it to the corresponding .ibd file.
+@param[in]	page_no		Page number to restore
+@return DB_SUCCESS if page was restored from doublewrite, else DB_ERROR */
+dberr_t
+Datafile::restore_from_doublewrite(
+	ulint	restore_page_no)
+{
+	/* Find if double write buffer contains page_no of given space id. */
+	const byte*	page = recv_sys->dblwr.find_page(
+		m_space_id, restore_page_no);
+
+	if (page == NULL) {
+		/* If the first page of the given user tablespace is not there
+		in the doublewrite buffer, then the recovery is going to fail
+		now. Hence this is treated as an error. */
+
+		ib::error()
+			<< "Corrupted page "
+			<< page_id_t(m_space_id, restore_page_no)
+			<< " of datafile '" << m_filepath
+			<< "' could not be found in the doublewrite buffer.";
+
+		return(DB_CORRUPTION);
+	}
+
+	const ulint		flags = mach_read_from_4(
+		FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + page);
+
+	const page_size_t	page_size(flags);
+
+	ut_a(page_get_page_no(page) == restore_page_no);
+
+	ib::info() << "Restoring page "
+		<< page_id_t(m_space_id, restore_page_no)
+		<< " of datafile '" << m_filepath
+		<< "' from the doublewrite buffer. Writing "
+		<< page_size.physical() << " bytes into file '"
+		<< m_filepath << "'";
+
+	IORequest	request(IORequest::WRITE);
+
+	/* Note: The pages are written out as uncompressed because we don't
+	have the compression algorithm information at this point. */
+
+	request.disable_compression();
+
+	return(os_file_write(
+			request,
+			m_filepath, m_handle, page, 0, page_size.physical()));
+}
+
+/** Create a link filename based on the contents of m_name,
+open that file, and read the contents into m_filepath.
+@retval DB_SUCCESS if remote linked tablespace file is opened and read.
+@retval DB_CANNOT_OPEN_FILE if the link file does not exist. */
+dberr_t
+RemoteDatafile::open_link_file()
+{
+	set_link_filepath(NULL);
+	m_filepath = read_link_file(m_link_filepath);
+
+	return(m_filepath == NULL ? DB_CANNOT_OPEN_FILE : DB_SUCCESS);
+}
+
+/** Opens a handle to the file linked to in an InnoDB Symbolic Link file
+in read-only mode so that it can be validated.
+@param[in]	strict	whether to issue error messages
+@return DB_SUCCESS if remote linked tablespace file is found and opened. */
+dberr_t
+RemoteDatafile::open_read_only(bool strict)
+{
+	if (m_filepath == NULL && open_link_file() == DB_CANNOT_OPEN_FILE) {
+		return(DB_ERROR);
+	}
+
+	dberr_t err = Datafile::open_read_only(strict);
+
+	if (err != DB_SUCCESS && strict) {
+		/* The following call prints an error message */
+		os_file_get_last_error(true);
+		ib::error() << "A link file was found named '"
+			<< m_link_filepath << "' but the linked tablespace '"
+			<< m_filepath << "' could not be opened read-only.";
+	}
+
+	return(err);
+}
+
+/** Opens a handle to the file linked to in an InnoDB Symbolic Link file
+in read-write mode so that it can be restored from doublewrite and validated.
+@param[in]	read_only_mode	If true, then readonly mode checks are enforced.
+@return DB_SUCCESS if remote linked tablespace file is found and opened. */
+dberr_t
+RemoteDatafile::open_read_write(bool read_only_mode)
+{
+	if (m_filepath == NULL && open_link_file() == DB_CANNOT_OPEN_FILE) {
+		return(DB_ERROR);
+	}
+
+	dberr_t err = Datafile::open_read_write(read_only_mode);
+
+	if (err != DB_SUCCESS) {
+		/* The following call prints an error message */
+		m_last_os_error = os_file_get_last_error(true);
+		ib::error() << "A link file was found named '"
+			<< m_link_filepath << "' but the linked data file '"
+			<< m_filepath << "' could not be opened for writing.";
+	}
+
+	return(err);
+}
+
+/** Release the resources. */
+void
+RemoteDatafile::shutdown()
+{
+	Datafile::shutdown();
+
+	if (m_link_filepath != 0) {
+		ut_free(m_link_filepath);
+		m_link_filepath = 0;
+	}
+}
+
+/** Set the link filepath. Use default datadir, the base name of
+the path provided without its suffix, plus DOT_ISL.
+@param[in]	path	filepath which contains a basename to use.
+			If NULL, use m_name as the basename. */
+void
+RemoteDatafile::set_link_filepath(const char* path)
+{
+	if (m_link_filepath != NULL) {
+		return;
+	}
+
+	if (path != NULL && FSP_FLAGS_GET_SHARED(flags())) {
+		/* Make the link_filepath based on the basename. */
+		ut_ad(strcmp(&path[strlen(path) - strlen(DOT_IBD)],
+		      DOT_IBD) == 0);
+
+		m_link_filepath = fil_make_filepath(NULL, base_name(path),
+						    ISL, false);
+	} else {
+		/* Make the link_filepath based on the m_name. */
+		m_link_filepath = fil_make_filepath(NULL, name(), ISL, false);
+	}
+}
+
+/** Creates a new InnoDB Symbolic Link (ISL) file.  It is always created
+under the 'datadir' of MySQL. The datadir is the directory of a
+running mysqld program. We can refer to it by simply using the path ".".
+@param[in]	name		tablespace name
+@param[in]	filepath	remote filepath of tablespace datafile
+@param[in]	is_shared	true for general tablespace,
+				false for file-per-table
+@return DB_SUCCESS or error code */
+dberr_t
+RemoteDatafile::create_link_file(
+	const char*	name,
+	const char*	filepath,
+	bool		is_shared)
+{
+	bool		success;
+	dberr_t		err = DB_SUCCESS;
+	char*		link_filepath = NULL;
+	char*		prev_filepath = NULL;
+
+	ut_ad(!srv_read_only_mode);
+	ut_ad(0 == strcmp(&filepath[strlen(filepath) - 4], DOT_IBD));
+
+	if (is_shared) {
+		/* The default location for a shared tablespace is the
+		datadir. We previously made sure that this filepath is
+		not under the datadir.  If it is in the datadir there
+		is no need for a link file. */
+
+		size_t	len = dirname_length(filepath);
+		if (len == 0) {
+			/* File is in the datadir. */
+			return(DB_SUCCESS);
+		}
+
+		Folder	folder(filepath, len);
+
+		if (folder_mysql_datadir == folder) {
+			/* File is in the datadir. */
+			return(DB_SUCCESS);
+		}
+
+		/* Use the file basename to build the ISL filepath. */
+		link_filepath = fil_make_filepath(NULL, base_name(filepath),
+						  ISL, false);
+	} else {
+		link_filepath = fil_make_filepath(NULL, name, ISL, false);
+	}
+	if (link_filepath == NULL) {
+		return(DB_ERROR);
+	}
+
+	prev_filepath = read_link_file(link_filepath);
+	if (prev_filepath) {
+		/* Truncate will call this with an existing
+		link file which contains the same filepath. */
+		bool same = !strcmp(prev_filepath, filepath);
+		ut_free(prev_filepath);
+		if (same) {
+			ut_free(link_filepath);
+			return(DB_SUCCESS);
+		}
+	}
+
+	/** Check if the file already exists. */
+	FILE*			file = NULL;
+	bool			exists;
+	os_file_type_t		ftype;
+
+	success = os_file_status(link_filepath, &exists, &ftype);
+	ulint error = 0;
+
+	if (success && !exists) {
+
+		file = fopen(link_filepath, "w");
+		if (file == NULL) {
+			/* This call will print its own error message */
+			error = os_file_get_last_error(true);
+		}
+	} else {
+		error = OS_FILE_ALREADY_EXISTS;
+	}
+
+	if (error != 0) {
+
+		ib::error() << "Cannot create file " << link_filepath << ".";
+
+		if (error == OS_FILE_ALREADY_EXISTS) {
+			ib::error() << "The link file: " << link_filepath
+				<< " already exists.";
+			err = DB_TABLESPACE_EXISTS;
+
+		} else if (error == OS_FILE_DISK_FULL) {
+			err = DB_OUT_OF_FILE_SPACE;
+
+		} else {
+			err = DB_ERROR;
+		}
+
+		/* file is not open, no need to close it. */
+		ut_free(link_filepath);
+		return(err);
+	}
+
+	ulint rbytes = fwrite(filepath, 1, strlen(filepath), file);
+
+	if (rbytes != strlen(filepath)) {
+		error = os_file_get_last_error(true);
+		ib::error() <<
+			"Cannot write link file: "
+			    << link_filepath << " filepath: " << filepath;
+		err = DB_ERROR;
+	}
+
+	/* Close the file, we only need it at startup */
+	fclose(file);
+
+	ut_free(link_filepath);
+
+	return(err);
+}
+
+/** Delete an InnoDB Symbolic Link (ISL) file. */
+void
+RemoteDatafile::delete_link_file(void)
+{
+	ut_ad(m_link_filepath != NULL);
+
+	if (m_link_filepath != NULL) {
+		os_file_delete_if_exists(innodb_data_file_key,
+					 m_link_filepath, NULL);
+	}
+}
+
+/** Delete an InnoDB Symbolic Link (ISL) file by name.
+@param[in]	name	tablespace name */
+void
+RemoteDatafile::delete_link_file(
+	const char*	name)
+{
+	char* link_filepath = fil_make_filepath(NULL, name, ISL, false);
+
+	if (link_filepath != NULL) {
+		os_file_delete_if_exists(
+			innodb_data_file_key, link_filepath, NULL);
+
+		ut_free(link_filepath);
+	}
+}
+
+/** Read an InnoDB Symbolic Link (ISL) file by name.
+It is always created under the datadir of MySQL.
+For file-per-table tablespaces, the isl file is expected to be
+in a 'database' directory and called 'tablename.isl'.
+For general tablespaces, there will be no 'database' directory.
+The 'basename.isl' will be in the datadir.
+The caller must free the memory returned if it is not null.
+@param[in]	link_filepath	filepath of the ISL file
+@return Filepath of the IBD file read from the ISL file */
+char*
+RemoteDatafile::read_link_file(
+	const char*	link_filepath)
+{
+	char*		filepath = NULL;
+	FILE*		file = NULL;
+
+	file = fopen(link_filepath, "r+b");
+	if (file == NULL) {
+		return(NULL);
+	}
+
+	filepath = static_cast<char*>(
+		ut_malloc_nokey(OS_FILE_MAX_PATH));
+
+	os_file_read_string(file, filepath, OS_FILE_MAX_PATH);
+	fclose(file);
+
+	if (filepath[0] != '\0') {
+		/* Trim whitespace from end of filepath */
+		ulint last_ch = strlen(filepath) - 1;
+		while (last_ch > 4 && filepath[last_ch] <= 0x20) {
+			filepath[last_ch--] = 0x00;
+		}
+		os_normalize_path(filepath);
+	}
+
+	return(filepath);
+}
diff --git a/storage/innobase/fsp/fsp0fsp.cc b/storage/innobase/fsp/fsp0fsp.cc
index d51e36bc0ba..9dc99f3f09d 100644
--- a/storage/innobase/fsp/fsp0fsp.cc
+++ b/storage/innobase/fsp/fsp0fsp.cc
@@ -23,74 +23,49 @@ File space management
 Created 11/29/1995 Heikki Tuuri
 ***********************************************************************/
 
+#include "ha_prototypes.h"
+
 #include "fsp0fsp.h"
 
 #ifdef UNIV_NONINL
 #include "fsp0fsp.ic"
 #endif
 
+#ifdef UNIV_HOTBACKUP
+# include "fut0lst.h"
+#else /* UNIV_HOTBACKUP */
 #include "buf0buf.h"
 #include "fil0fil.h"
 #include "fil0crypt.h"
 #include "mtr0log.h"
 #include "ut0byte.h"
 #include "page0page.h"
-#include "page0zip.h"
-#ifdef UNIV_HOTBACKUP
-# include "fut0lst.h"
-#else /* UNIV_HOTBACKUP */
-# include "sync0sync.h"
-# include "fut0fut.h"
-# include "srv0srv.h"
-# include "ibuf0ibuf.h"
-# include "btr0btr.h"
-# include "btr0sea.h"
-# include "dict0boot.h"
-# include "log0log.h"
-#endif /* UNIV_HOTBACKUP */
-#include "dict0mem.h"
+#include "fut0fut.h"
+#include "srv0srv.h"
 #include "srv0start.h"
+#include "ibuf0ibuf.h"
+#include "btr0btr.h"
+#include "btr0sea.h"
+#include "dict0boot.h"
+#include "log0log.h"
+#include "fsp0sysspace.h"
+#include "dict0mem.h"
+#include "fsp0types.h"
 
+// JAN: MySQL 5.7 Encryption
+// #include <my_aes.h>
 
-#ifndef UNIV_HOTBACKUP
-/** Flag to indicate if we have printed the tablespace full error. */
-static ibool fsp_tbs_full_error_printed = FALSE;
-
-/**********************************************************************//**
-Returns an extent to the free list of a space. */
+/** Returns an extent to the free list of a space.
+@param[in]	page_id		page id in the extent
+@param[in]	page_size	page size
+@param[in,out]	mtr		mini-transaction */
 static
 void
 fsp_free_extent(
-/*============*/
-	ulint		space,	/*!< in: space id */
-	ulint		zip_size,/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	ulint		page,	/*!< in: page offset in the extent */
-	mtr_t*		mtr);	/*!< in/out: mini-transaction */
-/**********************************************************************//**
-Frees an extent of a segment to the space free list. */
-static
-void
-fseg_free_extent(
-/*=============*/
-	fseg_inode_t*	seg_inode, /*!< in: segment inode */
-	ulint		space,	/*!< in: space id */
-	ulint		zip_size,/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	ulint		page,	/*!< in: page offset in the extent */
-	mtr_t*		mtr);	/*!< in/out: mini-transaction */
-/**********************************************************************//**
-Calculates the number of pages reserved by a segment, and how
-many pages are currently used.
-@return	number of reserved pages */
-static
-ulint
-fseg_n_reserved_pages_low(
-/*======================*/
-	fseg_inode_t*	header,	/*!< in: segment inode */
-	ulint*		used,	/*!< out: number of pages used (not
-				more than reserved) */
-	mtr_t*		mtr);	/*!< in/out: mini-transaction */
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	mtr_t*			mtr);
+
 /********************************************************************//**
 Marks a page used. The page must reside within the extents of the given
 segment. */
@@ -102,115 +77,320 @@ fseg_mark_page_used(
 	ulint		page,	/*!< in: page offset */
 	xdes_t*		descr,  /*!< in: extent descriptor */
 	mtr_t*		mtr);	/*!< in/out: mini-transaction */
-/**********************************************************************//**
-Returns the first extent descriptor for a segment. We think of the extent
-lists of the segment catenated in the order FSEG_FULL -> FSEG_NOT_FULL
--> FSEG_FREE.
-@return	the first extent descriptor, or NULL if none */
+
+/** Returns the first extent descriptor for a segment.
+We think of the extent lists of the segment catenated in the order
+FSEG_FULL -> FSEG_NOT_FULL -> FSEG_FREE.
+@param[in]	inode		segment inode
+@param[in]	space_id	space id
+@param[in]	page_size	page size
+@param[in,out]	mtr		mini-transaction
+@return the first extent descriptor, or NULL if none */
 static
 xdes_t*
 fseg_get_first_extent(
-/*==================*/
-	fseg_inode_t*	inode,	/*!< in: segment inode */
-	ulint		space,	/*!< in: space id */
-	ulint		zip_size,/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	mtr_t*		mtr);	/*!< in/out: mini-transaction */
-/**********************************************************************//**
-Puts new extents to the free list if
-there are free extents above the free limit. If an extent happens
-to contain an extent descriptor page, the extent is put to
-the FSP_FREE_FRAG list with the page marked as used. */
-static
+	fseg_inode_t*		inode,
+	ulint			space_id,
+	const page_size_t&	page_size,
+	mtr_t*			mtr);
+
+/** Put new extents to the free list if there are free extents above the free
+limit. If an extent happens to contain an extent descriptor page, the extent
+is put to the FSP_FREE_FRAG list with the page marked as used.
+@param[in]	init_space	true if this is a single-table tablespace
+and we are only initializing the first extent and the first bitmap pages;
+then we will not allocate more extents
+@param[in,out]	space		tablespace
+@param[in,out]	header		tablespace header
+@param[in,out]	mtr		mini-transaction */
+static UNIV_COLD
 void
 fsp_fill_free_list(
-/*===============*/
-	ibool		init_space,	/*!< in: TRUE if this is a single-table
-					tablespace and we are only initing
-					the tablespace's first extent
-					descriptor page and ibuf bitmap page;
-					then we do not allocate more extents */
-	ulint		space,		/*!< in: space */
-	fsp_header_t*	header,		/*!< in/out: space header */
-	mtr_t*		mtr)		/*!< in/out: mini-transaction */
-	UNIV_COLD MY_ATTRIBUTE((nonnull));
-/**********************************************************************//**
-Allocates a single free page from a segment. This function implements
-the intelligent allocation strategy which tries to minimize file space
-fragmentation.
-@retval NULL if no page could be allocated
-@retval block, rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded
+	bool		init_space,
+	fil_space_t*	space,
+	fsp_header_t*	header,
+	mtr_t*		mtr);
+
+/** Allocates a single free page from a segment.
+This function implements the intelligent allocation strategy which tries
+to minimize file space fragmentation.
+@param[in,out]	space			tablespace
+@param[in]	page_size		page size
+@param[in,out]	seg_inode		segment inode
+@param[in]	hint			hint of which page would be desirable
+@param[in]	direction		if the new page is needed because of
+an index page split, and records are inserted there in order, into which
+direction they go alphabetically: FSP_DOWN, FSP_UP, FSP_NO_DIR
+@param[in]	rw_latch		RW_SX_LATCH, RW_X_LATCH
+@param[in,out]	mtr			mini-transaction
+@param[in,out]	init_mtr		mtr or another mini-transaction in
+which the page should be initialized. If init_mtr != mtr, but the page is
+already latched in mtr, do not initialize the page
+@param[in]	has_done_reservation	TRUE if the space has already been
+reserved, in this case we will never return NULL
+@retval NULL	if no page could be allocated
+@retval block	rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded
 (init_mtr == mtr, or the page was not previously freed in mtr)
-@retval block (not allocated or initialized) otherwise */
+@retval block	(not allocated or initialized) otherwise */
 static
 buf_block_t*
 fseg_alloc_free_page_low(
-/*=====================*/
-	ulint		space,	/*!< in: space */
-	ulint		zip_size,/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	fseg_inode_t*	seg_inode, /*!< in/out: segment inode */
-	ulint		hint,	/*!< in: hint of which page would be
-				desirable */
-	byte		direction, /*!< in: if the new page is needed because
-				of an index page split, and records are
-				inserted there in order, into which
-				direction they go alphabetically: FSP_DOWN,
-				FSP_UP, FSP_NO_DIR */
-	mtr_t*		mtr,	/*!< in/out: mini-transaction */
-	mtr_t*		init_mtr)/*!< in/out: mtr or another mini-transaction
-				in which the page should be initialized.
-				If init_mtr!=mtr, but the page is already
-				latched in mtr, do not initialize the page. */
-	MY_ATTRIBUTE((warn_unused_result, nonnull));
-#endif /* !UNIV_HOTBACKUP */
+	fil_space_t*		space,
+	const page_size_t&	page_size,
+	fseg_inode_t*		seg_inode,
+	ulint			hint,
+	byte			direction,
+	rw_lock_type_t		rw_latch,
+	mtr_t*			mtr,
+	mtr_t*			init_mtr
+#ifdef UNIV_DEBUG
+	, ibool			has_done_reservation
+#endif /* UNIV_DEBUG */
+)
+	MY_ATTRIBUTE((warn_unused_result));
 
-/**********************************************************************//**
-Reads the file space size stored in the header page.
-@return	tablespace size stored in the space header */
-UNIV_INTERN
-ulint
-fsp_get_size_low(
-/*=============*/
-	page_t*	page)	/*!< in: header page (page 0 in the tablespace) */
-{
-	return(mach_read_from_4(page + FSP_HEADER_OFFSET + FSP_SIZE));
-}
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Gets a pointer to the space header and x-locks its page.
-@return	pointer to the space header, page x-locked */
+/** Gets a pointer to the space header and x-locks its page.
+@param[in]	id		space id
+@param[in]	page_size	page size
+@param[in,out]	mtr		mini-transaction
+@return pointer to the space header, page x-locked */
 UNIV_INLINE
 fsp_header_t*
 fsp_get_space_header(
-/*=================*/
-	ulint	id,	/*!< in: space id */
-	ulint	zip_size,/*!< in: compressed page size in bytes
-			or 0 for uncompressed pages */
-	mtr_t*	mtr)	/*!< in/out: mini-transaction */
+	ulint			id,
+	const page_size_t&	page_size,
+	mtr_t*			mtr)
 {
 	buf_block_t*	block;
 	fsp_header_t*	header;
 
-	ut_ad(ut_is_2pow(zip_size));
-	ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX);
-	ut_ad(!zip_size || zip_size >= UNIV_ZIP_SIZE_MIN);
-	ut_ad(id || !zip_size);
+	ut_ad(id != 0 || !page_size.is_compressed());
 
-	block = buf_page_get(id, zip_size, 0, RW_X_LATCH, mtr);
+	block = buf_page_get(page_id_t(id, 0), page_size, RW_SX_LATCH, mtr);
 	header = FSP_HEADER_OFFSET + buf_block_get_frame(block);
 	buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
 
 	ut_ad(id == mach_read_from_4(FSP_SPACE_ID + header));
-	ut_ad(zip_size == fsp_flags_get_zip_size(
-		      mach_read_from_4(FSP_SPACE_FLAGS + header)));
+#ifdef UNIV_DEBUG
+	const ulint	flags = mach_read_from_4(FSP_SPACE_FLAGS + header);
+	ut_ad(page_size_t(flags).equals_to(page_size));
+#endif /* UNIV_DEBUG */
 	return(header);
 }
 
+/** Convert a 32 bit integer tablespace flags to the 32 bit table flags.
+This can only be done for a tablespace that was built as a file-per-table
+tablespace. Note that the fsp_flags cannot show the difference between a
+Compact and Redundant table, so an extra Compact boolean must be supplied.
+			Low order bit
+		    | REDUNDANT | COMPACT | COMPRESSED | DYNAMIC
+fil_space_t::flags  |     0     |    0    |     1      |    1
+dict_table_t::flags |     0     |    1    |     1      |    1
+@param[in]	fsp_flags	fil_space_t::flags
+@param[in]	compact		true if not Redundant row format
+@return tablespace flags (fil_space_t::flags) */
+ulint
+fsp_flags_to_dict_tf(
+	ulint	fsp_flags,
+	bool	compact)
+{
+	/* If the table in this file-per-table tablespace is Compact
+	row format, the low order bit will not indicate Compact. */
+	bool	post_antelope	= FSP_FLAGS_GET_POST_ANTELOPE(fsp_flags);
+	ulint	zip_ssize	= FSP_FLAGS_GET_ZIP_SSIZE(fsp_flags);
+	bool	atomic_blobs	= FSP_FLAGS_HAS_ATOMIC_BLOBS(fsp_flags);
+	bool	data_dir	= FSP_FLAGS_HAS_DATA_DIR(fsp_flags);
+	bool	shared_space	= FSP_FLAGS_GET_SHARED(fsp_flags);
+	bool	page_compressed = FSP_FLAGS_GET_PAGE_COMPRESSION(fsp_flags);
+	ulint	comp_level	= FSP_FLAGS_GET_PAGE_COMPRESSION_LEVEL(fsp_flags);
+	bool	atomic_writes	= FSP_FLAGS_GET_ATOMIC_WRITES(fsp_flags);
+
+	/* FSP_FLAGS_GET_TEMPORARY(fsp_flags) does not have an equivalent
+	flag position in the table flags. But it would go into flags2 if
+	any code is created where that is needed. */
+
+	ulint	flags = dict_tf_init(post_antelope | compact, zip_ssize,
+				atomic_blobs, data_dir, shared_space,
+				page_compressed, comp_level, atomic_writes);
+
+	return(flags);
+}
+#endif /* !UNIV_HOTBACKUP */
+
+/** Validate the tablespace flags.
+These flags are stored in the tablespace header at offset FSP_SPACE_FLAGS.
+They should be 0 for ROW_FORMAT=COMPACT and ROW_FORMAT=REDUNDANT.
+The newer row formats, COMPRESSED and DYNAMIC, use a file format > Antelope
+so they should have a file format number plus the DICT_TF_COMPACT bit set.
+@param[in]	flags	Tablespace flags
+@return true if valid, false if not */
+bool
+fsp_flags_is_valid(
+	ulint	flags)
+{
+	bool	post_antelope = FSP_FLAGS_GET_POST_ANTELOPE(flags);
+	ulint	zip_ssize = FSP_FLAGS_GET_ZIP_SSIZE(flags);
+	bool	atomic_blobs = FSP_FLAGS_HAS_ATOMIC_BLOBS(flags);
+	ulint	page_ssize = FSP_FLAGS_GET_PAGE_SSIZE(flags);
+	bool	has_data_dir = FSP_FLAGS_HAS_DATA_DIR(flags);
+	bool	is_shared = FSP_FLAGS_GET_SHARED(flags);
+	bool	is_temp = FSP_FLAGS_GET_TEMPORARY(flags);
+	bool	is_encryption = FSP_FLAGS_GET_ENCRYPTION(flags);
+	ulint	unused = FSP_FLAGS_GET_UNUSED(flags);
+	bool	page_compression = FSP_FLAGS_GET_PAGE_COMPRESSION(flags);
+	ulint	page_compression_level = FSP_FLAGS_GET_PAGE_COMPRESSION_LEVEL(flags);
+	ulint	atomic_writes = FSP_FLAGS_GET_ATOMIC_WRITES(flags);
+
+	const char *file;
+	ulint line;
+
+#define GOTO_ERROR file = __FILE__; line = __LINE__; goto err_exit;
+
+	DBUG_EXECUTE_IF("fsp_flags_is_valid_failure", return(false););
+
+	/* The Antelope row formats REDUNDANT and COMPACT did
+	not use tablespace flags, so the entire 4-byte field
+	is zero for Antelope row formats. */
+	if (flags == 0) {
+		return(true);
+	}
+
+	/* Barracuda row formats COMPRESSED and DYNAMIC use a feature called
+	ATOMIC_BLOBS which builds on the page structure introduced for the
+	COMPACT row format by allowing long fields to be broken into prefix
+	and externally stored parts. So if it is Post_antelope, it uses
+	Atomic BLOBs. */
+	if (post_antelope != atomic_blobs) {
+		GOTO_ERROR;
+		return(false);
+	}
+
+	/* Make sure there are no bits that we do not know about. */
+	if (unused != 0) {
+		GOTO_ERROR;
+	}
+
+	/* The zip ssize can be zero if it is other than compressed row format,
+	or it could be from 1 to the max. */
+	if (zip_ssize > PAGE_ZIP_SSIZE_MAX) {
+		GOTO_ERROR;
+	}
+
+	/* The actual page size must be within 4k and 16K (3 =< ssize =< 5). */
+	if (page_ssize != 0
+	    && (page_ssize < UNIV_PAGE_SSIZE_MIN
+	        || page_ssize > UNIV_PAGE_SSIZE_MAX)) {
+		GOTO_ERROR;
+	}
+
+	/* Only single-table tablespaces use the DATA DIRECTORY clause.
+	It is not compatible with the TABLESPACE clause.  Nor is it
+	compatible with the TEMPORARY clause. */
+	if (has_data_dir && (is_shared || is_temp)) {
+		GOTO_ERROR;
+		return(false);
+	}
+
+	/* Only single-table and not temp tablespaces use the encryption
+	clause. */
+	if (is_encryption && (is_shared || is_temp)) {
+		GOTO_ERROR;
+	}
+
+	/* Page compression level requires page compression and atomic blobs
+	to be set */
+	if (page_compression_level || page_compression) {
+		if (!page_compression || !atomic_blobs) {
+			GOTO_ERROR;
+		}
+	}
+
+	if (atomic_writes > ATOMIC_WRITES_OFF) {
+		GOTO_ERROR;
+		return (false);
+	}
+
+#if UNIV_FORMAT_MAX != UNIV_FORMAT_B
+# error UNIV_FORMAT_MAX != UNIV_FORMAT_B, Add more validations.
+#endif
+#if FSP_FLAGS_POS_UNUSED != 13
+	//# error You have added a new FSP_FLAG without adding a validation check.
+#endif
+
+	return(true);
+
+err_exit:
+	ib::error() << "Tablespace flags: " << flags << " corrupted "
+		    << " in file: " << file << " line: " << line
+		    << " post_antelope: " << post_antelope
+		    << " atomic_blobs: " << atomic_blobs
+		    << " unused: " << unused
+		    << " zip_ssize: " << zip_ssize << " max: " << PAGE_ZIP_SSIZE_MAX
+		    << " page_ssize: " << page_ssize
+		    << " " << UNIV_PAGE_SSIZE_MIN << ":" << UNIV_PAGE_SSIZE_MAX
+		    << " has_data_dir: " << has_data_dir
+		    << " is_shared: " << is_shared
+		    << " is_temp: " << is_temp
+		    << " is_encryption: " << is_encryption
+		    << " page_compressed: " << page_compression
+		    << " page_compression_level: " << page_compression_level
+		    << " atomic_writes: " << atomic_writes;
+	return (false);
+}
+
+/** Check if tablespace is system temporary.
+@param[in]	space_id	tablespace ID
+@return true if tablespace is system temporary. */
+bool
+fsp_is_system_temporary(
+	ulint	space_id)
+{
+	return(space_id == srv_tmp_space.space_id());
+}
+
+/** Check if checksum is disabled for the given space.
+@param[in]	space_id	tablespace ID
+@return true if checksum is disabled for given space. */
+bool
+fsp_is_checksum_disabled(
+	ulint	space_id)
+{
+	return(fsp_is_system_temporary(space_id));
+}
+
+/** Check if tablespace is file-per-table.
+@param[in]	space_id	tablespace ID
+@param[in]	fsp_flags	tablespace flags
+@return true if tablespace is file-per-table. */
+bool
+fsp_is_file_per_table(
+	ulint	space_id,
+	ulint	fsp_flags)
+{
+	return(!is_system_tablespace(space_id)
+		&& !fsp_is_shared_tablespace(fsp_flags));
+}
+
+#ifndef UNIV_HOTBACKUP
+#ifdef UNIV_DEBUG
+
+/** Skip some of the sanity checks that are time consuming even in debug mode
+and can affect frequent verification runs that are done to ensure stability of
+the product.
+@return true if check should be skipped for given space. */
+bool
+fsp_skip_sanity_check(
+	ulint	space_id)
+{
+	return(srv_skip_temp_table_checks_debug
+	       && fsp_is_system_temporary(space_id));
+}
+
+#endif /* UNIV_DEBUG */
+
 /**********************************************************************//**
 Gets a descriptor bit of a page.
-@return	TRUE if free */
+@return TRUE if free */
 UNIV_INLINE
 ibool
 xdes_mtr_get_bit(
@@ -221,8 +401,8 @@ xdes_mtr_get_bit(
 				0 ... FSP_EXTENT_SIZE - 1 */
 	mtr_t*		mtr)	/*!< in: mini-transaction */
 {
-	ut_ad(mtr->state == MTR_ACTIVE);
-	ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr->is_active());
+	ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_SX_FIX));
 
 	return(xdes_get_bit(descr, bit, offset));
 }
@@ -245,7 +425,7 @@ xdes_set_bit(
 	ulint	bit_index;
 	ulint	descr_byte;
 
-	ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_SX_FIX));
 	ut_ad((bit == XDES_FREE_BIT) || (bit == XDES_CLEAN_BIT));
 	ut_ad(offset < FSP_EXTENT_SIZE);
 
@@ -254,8 +434,7 @@ xdes_set_bit(
 	byte_index = index / 8;
 	bit_index = index % 8;
 
-	descr_byte = mtr_read_ulint(descr + XDES_BITMAP + byte_index,
-				    MLOG_1BYTE, mtr);
+	descr_byte = mach_read_from_1(descr + XDES_BITMAP + byte_index);
 	descr_byte = ut_bit_set_nth(descr_byte, bit_index, val);
 
 	mlog_write_ulint(descr + XDES_BITMAP + byte_index, descr_byte,
@@ -266,7 +445,7 @@ xdes_set_bit(
 Looks for a descriptor bit having the desired value. Starts from hint
 and scans upward; at the end of the extent the search is wrapped to
 the start of the extent.
-@return	bit index of the bit, ULINT_UNDEFINED if not found */
+@return bit index of the bit, ULINT_UNDEFINED if not found */
 UNIV_INLINE
 ulint
 xdes_find_bit(
@@ -283,7 +462,7 @@ xdes_find_bit(
 	ut_ad(descr && mtr);
 	ut_ad(val <= TRUE);
 	ut_ad(hint < FSP_EXTENT_SIZE);
-	ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_SX_FIX));
 	for (i = hint; i < FSP_EXTENT_SIZE; i++) {
 		if (val == xdes_mtr_get_bit(descr, bit, i, mtr)) {
 
@@ -303,7 +482,7 @@ xdes_find_bit(
 
 /**********************************************************************//**
 Returns the number of used pages in a descriptor.
-@return	number of pages used */
+@return number of pages used */
 UNIV_INLINE
 ulint
 xdes_get_n_used(
@@ -314,7 +493,7 @@ xdes_get_n_used(
 	ulint	count	= 0;
 
 	ut_ad(descr && mtr);
-	ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_SX_FIX));
 	for (ulint i = 0; i < FSP_EXTENT_SIZE; ++i) {
 		if (FALSE == xdes_mtr_get_bit(descr, XDES_FREE_BIT, i, mtr)) {
 			count++;
@@ -326,7 +505,7 @@ xdes_get_n_used(
 
 /**********************************************************************//**
 Returns true if extent contains no used pages.
-@return	TRUE if totally free */
+@return TRUE if totally free */
 UNIV_INLINE
 ibool
 xdes_is_free(
@@ -344,7 +523,7 @@ xdes_is_free(
 
 /**********************************************************************//**
 Returns true if extent contains no free pages.
-@return	TRUE if full */
+@return TRUE if full */
 UNIV_INLINE
 ibool
 xdes_is_full(
@@ -373,14 +552,14 @@ xdes_set_state(
 	ut_ad(descr && mtr);
 	ut_ad(state >= XDES_FREE);
 	ut_ad(state <= XDES_FSEG);
-	ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_SX_FIX));
 
 	mlog_write_ulint(descr + XDES_STATE, state, MLOG_4BYTES, mtr);
 }
 
 /**********************************************************************//**
 Gets the state of an xdes.
-@return	state */
+@return state */
 UNIV_INLINE
 ulint
 xdes_get_state(
@@ -391,9 +570,9 @@ xdes_get_state(
 	ulint	state;
 
 	ut_ad(descr && mtr);
-	ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_SX_FIX));
 
-	state = mtr_read_ulint(descr + XDES_STATE, MLOG_4BYTES, mtr);
+	state = mach_read_from_4(descr + XDES_STATE);
 	ut_ad(state - 1 < XDES_FSEG);
 	return(state);
 }
@@ -410,7 +589,7 @@ xdes_init(
 	ulint	i;
 
 	ut_ad(descr && mtr);
-	ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_SX_FIX));
 	ut_ad((XDES_SIZE - XDES_BITMAP) % 4 == 0);
 
 	for (i = XDES_BITMAP; i < XDES_SIZE; i += 4) {
@@ -420,93 +599,114 @@ xdes_init(
 	xdes_set_state(descr, XDES_FREE, mtr);
 }
 
-/********************************************************************//**
-Gets pointer to a the extent descriptor of a page. The page where the extent
-descriptor resides is x-locked. This function no longer extends the data
-file.
+/** Get pointer to a the extent descriptor of a page.
+@param[in,out]	sp_header	tablespace header page, x-latched
+@param[in]	space		tablespace identifier
+@param[in]	offset		page offset
+@param[in,out]	mtr		mini-transaction
+@param[in]	init_space	whether the tablespace is being initialized
+@param[out]	desc_block	descriptor block, or NULL if it is
+the same as the tablespace header
 @return pointer to the extent descriptor, NULL if the page does not
-exist in the space or if the offset is >= the free limit */
-UNIV_INLINE MY_ATTRIBUTE((nonnull, warn_unused_result))
+exist in the space or if the offset exceeds free limit */
+UNIV_INLINE MY_ATTRIBUTE((warn_unused_result))
 xdes_t*
 xdes_get_descriptor_with_space_hdr(
-/*===============================*/
-	fsp_header_t*	sp_header,	/*!< in/out: space header, x-latched
-					in mtr */
-	ulint		space,		/*!< in: space id */
-	ulint		offset,		/*!< in: page offset; if equal
-					to the free limit, we try to
-					add new extents to the space
-					free list */
-	mtr_t*		mtr)		/*!< in/out: mini-transaction */
+	fsp_header_t*	sp_header,
+	ulint		space,
+	ulint		offset,
+	mtr_t*		mtr,
+	bool		init_space = false,
+	buf_block_t**	desc_block = NULL)
 {
 	ulint	limit;
 	ulint	size;
-	ulint	zip_size;
 	ulint	descr_page_no;
+	ulint	flags;
 	page_t*	descr_page;
-
-	ut_ad(mtr_memo_contains(mtr, fil_space_get_latch(space, NULL),
-				MTR_MEMO_X_LOCK));
-	ut_ad(mtr_memo_contains_page(mtr, sp_header, MTR_MEMO_PAGE_X_FIX));
+#ifdef UNIV_DEBUG
+	const fil_space_t*	fspace = fil_space_get(space);
+	ut_ad(fspace != NULL);
+#endif /* UNIV_DEBUG */
+	ut_ad(mtr_memo_contains(mtr, &fspace->latch, MTR_MEMO_X_LOCK));
+	ut_ad(mtr_memo_contains_page(mtr, sp_header, MTR_MEMO_PAGE_SX_FIX));
 	ut_ad(page_offset(sp_header) == FSP_HEADER_OFFSET);
 	/* Read free limit and space size */
 	limit = mach_read_from_4(sp_header + FSP_FREE_LIMIT);
 	size  = mach_read_from_4(sp_header + FSP_SIZE);
-	zip_size = fsp_flags_get_zip_size(
-		mach_read_from_4(sp_header + FSP_SPACE_FLAGS));
+	flags = mach_read_from_4(sp_header + FSP_SPACE_FLAGS);
+	ut_ad(limit == fspace->free_limit
+	      || (fspace->free_limit == 0
+		  && (init_space
+		      || fspace->purpose == FIL_TYPE_TEMPORARY
+		      || (srv_startup_is_before_trx_rollback_phase
+			  && fspace->id <= srv_undo_tablespaces))));
+	ut_ad(size == fspace->size_in_header);
+	ut_ad(flags == fspace->flags);
 
 	if ((offset >= size) || (offset >= limit)) {
 		return(NULL);
 	}
 
-	descr_page_no = xdes_calc_descriptor_page(zip_size, offset);
+	const page_size_t	page_size(flags);
+
+	descr_page_no = xdes_calc_descriptor_page(page_size, offset);
+
+	buf_block_t*		block;
 
 	if (descr_page_no == 0) {
 		/* It is on the space header page */
 
 		descr_page = page_align(sp_header);
+		block = NULL;
 	} else {
-		buf_block_t*	block;
+		block = buf_page_get(
+			page_id_t(space, descr_page_no), page_size,
+			RW_SX_LATCH, mtr);
 
-		block = buf_page_get(space, zip_size, descr_page_no,
-				     RW_X_LATCH, mtr);
 		buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
 
 		descr_page = buf_block_get_frame(block);
 	}
 
+	if (desc_block != NULL) {
+		*desc_block = block;
+	}
+
 	return(descr_page + XDES_ARR_OFFSET
-	       + XDES_SIZE * xdes_calc_descriptor_index(zip_size, offset));
+	       + XDES_SIZE * xdes_calc_descriptor_index(page_size, offset));
 }
 
-/********************************************************************//**
-Gets pointer to a the extent descriptor of a page. The page where the
-extent descriptor resides is x-locked. If the page offset is equal to
-the free limit of the space, adds new extents from above the free limit
-to the space free list, if not free limit == space size. This adding
+/** Gets pointer to a the extent descriptor of a page.
+The page where the extent descriptor resides is x-locked. If the page offset
+is equal to the free limit of the space, adds new extents from above the free
+limit to the space free list, if not free limit == space size. This adding
 is necessary to make the descriptor defined, as they are uninitialized
 above the free limit.
+@param[in]	space_id	space id
+@param[in]	offset		page offset; if equal to the free limit, we
+try to add new extents to the space free list
+@param[in]	page_size	page size
+@param[in,out]	mtr		mini-transaction
 @return pointer to the extent descriptor, NULL if the page does not
 exist in the space or if the offset exceeds the free limit */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
 xdes_t*
 xdes_get_descriptor(
-/*================*/
-	ulint	space,	/*!< in: space id */
-	ulint	zip_size,/*!< in: compressed page size in bytes
-			or 0 for uncompressed pages */
-	ulint	offset,	/*!< in: page offset; if equal to the free limit,
-			we try to add new extents to the space free list */
-	mtr_t*	mtr)	/*!< in/out: mini-transaction */
+	ulint			space_id,
+	ulint			offset,
+	const page_size_t&	page_size,
+	mtr_t*			mtr)
 {
 	buf_block_t*	block;
 	fsp_header_t*	sp_header;
 
-	block = buf_page_get(space, zip_size, 0, RW_X_LATCH, mtr);
+	block = buf_page_get(page_id_t(space_id, 0), page_size,
+			     RW_SX_LATCH, mtr);
+
 	buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
 
 	sp_header = FSP_HEADER_OFFSET + buf_block_get_frame(block);
-	return(xdes_get_descriptor_with_space_hdr(sp_header, space, offset,
+	return(xdes_get_descriptor_with_space_hdr(sp_header, space_id, offset,
 						  mtr));
 }
 
@@ -514,14 +714,13 @@ xdes_get_descriptor(
 Gets pointer to a the extent descriptor if the file address
 of the descriptor list node is known. The page where the
 extent descriptor resides is x-locked.
-@return	pointer to the extent descriptor */
+@return pointer to the extent descriptor */
 UNIV_INLINE
 xdes_t*
 xdes_lst_get_descriptor(
 /*====================*/
 	ulint		space,	/*!< in: space id */
-	ulint		zip_size,/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
+	const page_size_t&	page_size,
 	fil_addr_t	lst_node,/*!< in: file address of the list node
 				contained in the descriptor */
 	mtr_t*		mtr)	/*!< in/out: mini-transaction */
@@ -531,7 +730,7 @@ xdes_lst_get_descriptor(
 	ut_ad(mtr);
 	ut_ad(mtr_memo_contains(mtr, fil_space_get_latch(space, NULL),
 				MTR_MEMO_X_LOCK));
-	descr = fut_get_ptr(space, zip_size, lst_node, RW_X_LATCH, mtr)
+	descr = fut_get_ptr(space, page_size, lst_node, RW_SX_LATCH, mtr)
 		- XDES_FLST_NODE;
 
 	return(descr);
@@ -539,7 +738,7 @@ xdes_lst_get_descriptor(
 
 /********************************************************************//**
 Returns page offset of the first page in extent described by a descriptor.
-@return	offset of the first page in extent */
+@return offset of the first page in extent */
 UNIV_INLINE
 ulint
 xdes_get_offset(
@@ -565,52 +764,89 @@ fsp_init_file_page_low(
 	page_t*		page	= buf_block_get_frame(block);
 	page_zip_des_t*	page_zip= buf_block_get_page_zip(block);
 
-#ifndef UNIV_HOTBACKUP
-	block->check_index_page_at_flush = FALSE;
-#endif /* !UNIV_HOTBACKUP */
+	if (!fsp_is_system_temporary(block->page.id.space())) {
+		memset(page, 0, UNIV_PAGE_SIZE);
+	}
+
+	mach_write_to_4(page + FIL_PAGE_OFFSET, block->page.id.page_no());
+	mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
+			block->page.id.space());
 
 	if (page_zip) {
-		memset(page, 0, UNIV_PAGE_SIZE);
 		memset(page_zip->data, 0, page_zip_get_size(page_zip));
-		mach_write_to_4(page + FIL_PAGE_OFFSET,
-				buf_block_get_page_no(block));
-		mach_write_to_4(page
-				+ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
-				buf_block_get_space(block));
 		memcpy(page_zip->data + FIL_PAGE_OFFSET,
 		       page + FIL_PAGE_OFFSET, 4);
 		memcpy(page_zip->data + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
 		       page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 4);
-		return;
 	}
-
-	memset(page, 0, UNIV_PAGE_SIZE);
-	mach_write_to_4(page + FIL_PAGE_OFFSET, buf_block_get_page_no(block));
-	mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
-			buf_block_get_space(block));
 }
 
 #ifndef UNIV_HOTBACKUP
-/***********************************************************//**
-Inits a file page whose prior contents should be ignored. */
+# ifdef UNIV_DEBUG
+/** Assert that the mini-transaction is compatible with
+updating an allocation bitmap page.
+@param[in]	id	tablespace identifier
+@param[in]	mtr	mini-transaction */
+static
+void
+fsp_space_modify_check(
+	ulint		id,
+	const mtr_t*	mtr)
+{
+	switch (mtr->get_log_mode()) {
+	case MTR_LOG_SHORT_INSERTS:
+	case MTR_LOG_NONE:
+		/* These modes are only allowed within a non-bitmap page
+		when there is a higher-level redo log record written. */
+		break;
+	case MTR_LOG_NO_REDO:
+#ifdef UNIV_DEBUG
+		{
+			const fil_type_t	type = fil_space_get_type(id);
+			ut_a(id == srv_tmp_space.space_id()
+			     || srv_is_tablespace_truncated(id)
+			     || fil_space_is_being_truncated(id)
+			     || fil_space_get_flags(id) == ULINT_UNDEFINED
+			     || type == FIL_TYPE_TEMPORARY
+			     || type == FIL_TYPE_IMPORT
+			     || fil_space_is_redo_skipped(id));
+		}
+#endif /* UNIV_DEBUG */
+		return;
+	case MTR_LOG_ALL:
+		/* We must not write redo log for the shared temporary
+		tablespace. */
+		ut_ad(id != srv_tmp_space.space_id());
+		/* If we write redo log, the tablespace must exist. */
+		ut_ad(fil_space_get_type(id) == FIL_TYPE_TABLESPACE);
+		ut_ad(mtr->is_named_space(id));
+		return;
+	}
+
+	ut_ad(0);
+}
+# endif /* UNIV_DEBUG */
+
+/** Initialize a file page.
+@param[in,out]	block	file page
+@param[in,out]	mtr	mini-transaction */
 static
 void
 fsp_init_file_page(
-/*===============*/
-	buf_block_t*	block,	/*!< in: pointer to a page */
-	mtr_t*		mtr)	/*!< in/out: mini-transaction */
+	buf_block_t*	block,
+	mtr_t*		mtr)
 {
 	fsp_init_file_page_low(block);
 
+	ut_d(fsp_space_modify_check(block->page.id.space(), mtr));
 	mlog_write_initial_log_record(buf_block_get_frame(block),
-				      MLOG_INIT_FILE_PAGE, mtr);
+				      MLOG_INIT_FILE_PAGE2, mtr);
 }
 #endif /* !UNIV_HOTBACKUP */
 
 /***********************************************************//**
 Parses a redo log record of a file page init.
-@return	end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
 byte*
 fsp_parse_init_file_page(
 /*=====================*/
@@ -618,7 +854,8 @@ fsp_parse_init_file_page(
 	byte*		end_ptr MY_ATTRIBUTE((unused)), /*!< in: buffer end */
 	buf_block_t*	block)	/*!< in: block or NULL */
 {
-	ut_ad(ptr && end_ptr);
+	ut_ad(ptr != NULL);
+	ut_ad(end_ptr != NULL);
 
 	if (block) {
 		fsp_init_file_page_low(block);
@@ -629,7 +866,6 @@ fsp_parse_init_file_page(
 
 /**********************************************************************//**
 Initializes the fsp system. */
-UNIV_INTERN
 void
 fsp_init(void)
 /*==========*/
@@ -652,7 +888,6 @@ fsp_init(void)
 Writes the space id and flags to a tablespace header.  The flags contain
 row type, physical/compressed page size, and logical/uncompressed page
 size of the tablespace. */
-UNIV_INTERN
 void
 fsp_header_init_fields(
 /*===================*/
@@ -669,32 +904,233 @@ fsp_header_init_fields(
 }
 
 #ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Initializes the space header of a new created space and creates also the
-insert buffer tree root if space == 0. */
-UNIV_INTERN
-void
+/** Get the offset of encrytion information in page 0.
+@param[in]	page_size	page size.
+@return	offset on success, otherwise 0. */
+static
+ulint
+fsp_header_get_encryption_offset(
+	const page_size_t&	page_size)
+{
+	ulint	offset;
+#ifdef UNIV_DEBUG
+	ulint	left_size;
+#endif
+
+	offset = XDES_ARR_OFFSET + XDES_SIZE * xdes_arr_size(page_size);
+#ifdef UNIV_DEBUG
+	left_size = page_size.physical() - FSP_HEADER_OFFSET - offset
+		- FIL_PAGE_DATA_END;
+
+	ut_ad(left_size >= ENCRYPTION_INFO_SIZE_V2);
+#endif
+
+	return offset;
+}
+
+#if 0 /* MySQL 5.7 Encryption */
+/** Fill the encryption info.
+@param[in]	space		tablespace
+@param[in,out]	encrypt_info	buffer for encrypt key.
+@return true if success. */
+bool
+fsp_header_fill_encryption_info(
+	fil_space_t*		space,
+	byte*			encrypt_info)
+{
+	byte*			ptr;
+	lint			elen;
+	ulint			master_key_id;
+	byte*			master_key;
+	byte			key_info[ENCRYPTION_KEY_LEN * 2];
+	ulint			crc;
+	Encryption::Version	version;
+#ifdef	UNIV_ENCRYPT_DEBUG
+	const byte*		data;
+	ulint			i;
+#endif
+
+	/* Get master key from key ring */
+	Encryption::get_master_key(&master_key_id, &master_key, &version);
+	if (master_key == NULL) {
+		return(false);
+	}
+
+	memset(encrypt_info, 0, ENCRYPTION_INFO_SIZE_V2);
+	memset(key_info, 0, ENCRYPTION_KEY_LEN * 2);
+
+	/* Use the new master key to encrypt the tablespace
+	key. */
+	ut_ad(encrypt_info != NULL);
+	ptr = encrypt_info;
+
+	/* Write magic header. */
+	if (version == Encryption::ENCRYPTION_VERSION_1) {
+		memcpy(ptr, ENCRYPTION_KEY_MAGIC_V1, ENCRYPTION_MAGIC_SIZE);
+	} else {
+		memcpy(ptr, ENCRYPTION_KEY_MAGIC_V2, ENCRYPTION_MAGIC_SIZE);
+	}
+	ptr += ENCRYPTION_MAGIC_SIZE;
+
+	/* Write master key id. */
+	mach_write_to_4(ptr, master_key_id);
+	ptr += sizeof(ulint);
+
+	/* Write server uuid. */
+	if (version == Encryption::ENCRYPTION_VERSION_2) {
+		memcpy(ptr, Encryption::uuid, ENCRYPTION_SERVER_UUID_LEN);
+		ptr += ENCRYPTION_SERVER_UUID_LEN;
+	}
+
+	/* Write tablespace key to temp space. */
+	memcpy(key_info,
+	       space->encryption_key,
+	       ENCRYPTION_KEY_LEN);
+
+	/* Write tablespace iv to temp space. */
+	memcpy(key_info + ENCRYPTION_KEY_LEN,
+	       space->encryption_iv,
+	       ENCRYPTION_KEY_LEN);
+
+#ifdef	UNIV_ENCRYPT_DEBUG
+	fprintf(stderr, "Set %lu:%lu ",space->id,
+		Encryption::master_key_id);
+	for (data = (const byte*) master_key, i = 0;
+	     i < ENCRYPTION_KEY_LEN; i++)
+		fprintf(stderr, "%02lx", (ulong)*data++);
+	fprintf(stderr, " ");
+	for (data = (const byte*) space->encryption_key,
+	     i = 0; i < ENCRYPTION_KEY_LEN; i++)
+		fprintf(stderr, "%02lx", (ulong)*data++);
+	fprintf(stderr, " ");
+	for (data = (const byte*) space->encryption_iv,
+	     i = 0; i < ENCRYPTION_KEY_LEN; i++)
+		fprintf(stderr, "%02lx", (ulong)*data++);
+	fprintf(stderr, "\n");
+#endif
+	/* Encrypt tablespace key and iv. */
+	elen = my_aes_encrypt(
+		key_info,
+		ENCRYPTION_KEY_LEN * 2,
+		ptr,
+		master_key,
+		ENCRYPTION_KEY_LEN,
+		my_aes_256_ecb,
+		NULL, false);
+
+	if (elen == MY_AES_BAD_DATA) {
+		my_free(master_key);
+		return(false);
+	}
+
+	ptr += ENCRYPTION_KEY_LEN * 2;
+
+	/* Write checksum bytes. */
+	crc = ut_crc32(key_info, ENCRYPTION_KEY_LEN * 2);
+	mach_write_to_4(ptr, crc);
+
+	my_free(master_key);
+	return(true);
+}
+#endif /* ! */
+
+/** Rotate the encryption info in the space header.
+@param[in]	space		tablespace
+@param[in]      encrypt_info	buffer for re-encrypt key.
+@param[in,out]	mtr		mini-transaction
+@return true if success. */
+bool
+fsp_header_rotate_encryption(
+	fil_space_t*		space,
+	byte*			encrypt_info,
+	mtr_t*			mtr)
+{
+	buf_block_t*	block;
+	ulint		offset;
+
+	ut_ad(mtr);
+
+	const page_size_t	page_size(space->flags);
+
+#if MYSQL_ENCRYPTION
+	page_t*		page;
+	ulint		master_key_id;
+	ut_ad(space->encryption_type != Encryption::NONE);
+	/* Fill encryption info. */
+	if (!fsp_header_fill_encryption_info(space,
+					     encrypt_info)) {
+		return(false);
+	}
+#endif
+
+	/* Save the encryption info to the page 0. */
+	block = buf_page_get(page_id_t(space->id, 0),
+			     page_size,
+			     RW_SX_LATCH, mtr);
+	buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
+	ut_ad(space->id == page_get_space_id(buf_block_get_frame(block)));
+
+	offset = fsp_header_get_encryption_offset(page_size);
+	ut_ad(offset != 0 && offset < UNIV_PAGE_SIZE);
+
+
+#if MYSQL_ENCRYPTION
+	page = buf_block_get_frame(block);
+	/* If is in recovering, skip all master key id is rotated
+	tablespaces. */
+	master_key_id = mach_read_from_4(
+		page + offset + ENCRYPTION_MAGIC_SIZE);
+	if (recv_recovery_is_on()
+	    && master_key_id == Encryption::master_key_id) {
+		ut_ad(memcmp(page + offset,
+			     ENCRYPTION_KEY_MAGIC_V1,
+			     ENCRYPTION_MAGIC_SIZE) == 0
+		      || memcmp(page + offset,
+				ENCRYPTION_KEY_MAGIC_V2,
+				ENCRYPTION_MAGIC_SIZE) == 0);
+		return(true);
+	}
+
+	mlog_write_string(page + offset,
+			  encrypt_info,
+			  ENCRYPTION_INFO_SIZE_V2,
+			  mtr);
+#endif /* MYSQL_ENCRYPTION */
+
+	return(true);
+}
+
+/** Initializes the space header of a new created space and creates also the
+insert buffer tree root if space == 0.
+@param[in]	space_id	space id
+@param[in]	size		current size in blocks
+@param[in,out]	mtr		min-transaction
+@return	true on success, otherwise false. */
+bool
 fsp_header_init(
-/*============*/
-	ulint	space,		/*!< in: space id */
-	ulint	size,		/*!< in: current size in blocks */
-	mtr_t*	mtr)		/*!< in/out: mini-transaction */
+	ulint	space_id,
+	ulint	size,
+	mtr_t*	mtr)
 {
 	fsp_header_t*	header;
 	buf_block_t*	block;
 	page_t*		page;
-	ulint		flags;
-	ulint		zip_size;
 
 	ut_ad(mtr);
 
-	mtr_x_lock(fil_space_get_latch(space, &flags), mtr);
+	fil_space_t*		space	= mtr_x_lock_space(space_id, mtr);
 
-	zip_size = fsp_flags_get_zip_size(flags);
-	block = buf_page_create(space, 0, zip_size, mtr);
-	buf_page_get(space, zip_size, 0, RW_X_LATCH, mtr);
+	const page_id_t		page_id(space_id, 0);
+	const page_size_t	page_size(space->flags);
+
+	block = buf_page_create(page_id, page_size, mtr);
+	buf_page_get(page_id, page_size, RW_SX_LATCH, mtr);
 	buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
 
+	space->size_in_header = size;
+	space->free_len = 0;
+	space->free_limit = 0;
+
 	/* The prior contents of the file page should be ignored */
 
 	fsp_init_file_page(block, mtr);
@@ -705,12 +1141,12 @@ fsp_header_init(
 
 	header = FSP_HEADER_OFFSET + page;
 
-	mlog_write_ulint(header + FSP_SPACE_ID, space, MLOG_4BYTES, mtr);
+	mlog_write_ulint(header + FSP_SPACE_ID, space_id, MLOG_4BYTES, mtr);
 	mlog_write_ulint(header + FSP_NOT_USED, 0, MLOG_4BYTES, mtr);
 
 	mlog_write_ulint(header + FSP_SIZE, size, MLOG_4BYTES, mtr);
 	mlog_write_ulint(header + FSP_FREE_LIMIT, 0, MLOG_4BYTES, mtr);
-	mlog_write_ulint(header + FSP_SPACE_FLAGS, flags,
+	mlog_write_ulint(header + FSP_SPACE_FLAGS, space->flags,
 			 MLOG_4BYTES, mtr);
 	mlog_write_ulint(header + FSP_FRAG_N_USED, 0, MLOG_4BYTES, mtr);
 
@@ -721,26 +1157,54 @@ fsp_header_init(
 	flst_init(header + FSP_SEG_INODES_FREE, mtr);
 
 	mlog_write_ull(header + FSP_SEG_ID, 1, mtr);
-	if (space == 0) {
-		fsp_fill_free_list(FALSE, space, header, mtr);
-		btr_create(DICT_CLUSTERED | DICT_UNIVERSAL | DICT_IBUF,
-			   0, 0, DICT_IBUF_ID_MIN + space,
-			   dict_ind_redundant, mtr);
-	} else {
-		fsp_fill_free_list(TRUE, space, header, mtr);
+
+	fsp_fill_free_list(!is_system_tablespace(space_id),
+			   space, header, mtr);
+
+#if 0 /* MySQL 5.7 Encryption */
+	/* For encryption tablespace, we need to save the encryption
+	info to the page 0. */
+	if (FSP_FLAGS_GET_ENCRYPTION(space->flags)) {
+		ulint	offset = fsp_header_get_encryption_offset(page_size);
+		byte	encryption_info[ENCRYPTION_INFO_SIZE_V2];
+
+		if (offset == 0)
+			return(false);
+
+		if (!fsp_header_fill_encryption_info(space,
+						     encryption_info)) {
+			space->encryption_type = Encryption::NONE;
+			memset(space->encryption_key, 0, ENCRYPTION_KEY_LEN);
+			memset(space->encryption_iv, 0, ENCRYPTION_KEY_LEN);
+			return(false);
+		}
+
+		mlog_write_string(page + offset,
+				  encryption_info,
+				  ENCRYPTION_INFO_SIZE_V2,
+				  mtr);
+	}
+#endif /* ! */
+
+	if (space_id == srv_sys_space.space_id()) {
+		if (btr_create(DICT_CLUSTERED | DICT_UNIVERSAL | DICT_IBUF,
+			       0, univ_page_size, DICT_IBUF_ID_MIN + space_id,
+			       dict_ind_redundant, NULL, mtr) == FIL_NULL) {
+			return(false);
+		}
 	}
 
 	ulint maxsize = 0;
-	ulint offset = fsp_header_get_crypt_offset(zip_size, &maxsize);
-	fil_space_write_crypt_data(space, page, offset, maxsize, mtr);
-}
+	ulint offset = fsp_header_get_crypt_offset(page_size, &maxsize);
+	fil_space_write_crypt_data(space_id, page, offset, maxsize, mtr);
 
+	return(true);
+}
 #endif /* !UNIV_HOTBACKUP */
 
 /**********************************************************************//**
 Reads the space id from the first page of a tablespace.
-@return	space id, ULINT UNDEFINED if error */
-UNIV_INTERN
+@return space id, ULINT UNDEFINED if error */
 ulint
 fsp_header_get_space_id(
 /*====================*/
@@ -757,71 +1221,210 @@ fsp_header_get_space_id(
 			id = ULINT_UNDEFINED;);
 
 	if (id != fsp_id) {
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Space id in fsp header %lu,but in the page header "
-			"%lu", fsp_id, id);
-
+		ib::error() << "Space ID in fsp header is " << fsp_id
+			<< ", but in the page header it is " << id << ".";
 		return(ULINT_UNDEFINED);
 	}
 
 	return(id);
 }
 
-/**********************************************************************//**
-Reads the space flags from the first page of a tablespace.
-@return	flags */
-UNIV_INTERN
-ulint
-fsp_header_get_flags(
-/*=================*/
-	const page_t*	page)	/*!< in: first page of a tablespace */
+/** Reads the page size from the first page of a tablespace.
+@param[in]	page	first page of a tablespace
+@return page size */
+page_size_t
+fsp_header_get_page_size(
+	const page_t*	page)
 {
-	ut_ad(!page_offset(page));
-
-	return(mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + page));
+	return(page_size_t(fsp_header_get_flags(page)));
 }
 
-/**********************************************************************//**
-Reads the compressed page size from the first page of a tablespace.
-@return	compressed page size in bytes, or 0 if uncompressed */
-UNIV_INTERN
-ulint
-fsp_header_get_zip_size(
-/*====================*/
-	const page_t*	page)	/*!< in: first page of a tablespace */
+#if 0 /* MySQL 5.7 Encryption */
+/** Decoding the encryption info
+from the first page of a tablespace.
+@param[in/out]	key		key
+@param[in/out]	iv		iv
+@param[in]	encryption_info	encrytion info.
+@return true if success */
+bool
+fsp_header_decode_encryption_info(
+	byte*		key,
+	byte*		iv,
+	byte*		encryption_info)
 {
-	ulint	flags = fsp_header_get_flags(page);
+	byte*			ptr;
+	ulint			master_key_id;
+	byte*			master_key = NULL;
+	lint			elen;
+	byte			key_info[ENCRYPTION_KEY_LEN * 2];
+	ulint			crc1;
+	ulint			crc2;
+	char			srv_uuid[ENCRYPTION_SERVER_UUID_LEN + 1];
+	Encryption::Version	version;
+#ifdef	UNIV_ENCRYPT_DEBUG
+	const byte*		data;
+	ulint			i;
+#endif
 
-	return(fsp_flags_get_zip_size(flags));
+	ptr = encryption_info;
+
+	/* For compatibility with 5.7.11, we need to handle the
+	encryption information which created in this old version. */
+	if (memcmp(ptr, ENCRYPTION_KEY_MAGIC_V1,
+		     ENCRYPTION_MAGIC_SIZE) == 0) {
+		version = Encryption::ENCRYPTION_VERSION_1;
+	} else {
+		version = Encryption::ENCRYPTION_VERSION_2;
+	}
+	/* Check magic. */
+	if (version == Encryption::ENCRYPTION_VERSION_2
+	    && memcmp(ptr, ENCRYPTION_KEY_MAGIC_V2, ENCRYPTION_MAGIC_SIZE) != 0) {
+		/* We ignore report error for recovery,
+		since the encryption info maybe hasn't writen
+		into datafile when the table is newly created. */
+		if (!recv_recovery_is_on()) {
+			return(false);
+		} else {
+			return(true);
+		}
+	}
+	ptr += ENCRYPTION_MAGIC_SIZE;
+
+	/* Get master key id. */
+	master_key_id = mach_read_from_4(ptr);
+	ptr += sizeof(ulint);
+
+	/* Get server uuid. */
+	if (version == Encryption::ENCRYPTION_VERSION_2) {
+		memset(srv_uuid, 0, ENCRYPTION_SERVER_UUID_LEN + 1);
+		memcpy(srv_uuid, ptr, ENCRYPTION_SERVER_UUID_LEN);
+		ptr += ENCRYPTION_SERVER_UUID_LEN;
+	}
+
+	/* Get master key by key id. */
+	memset(key_info, 0, ENCRYPTION_KEY_LEN * 2);
+	if (version == Encryption::ENCRYPTION_VERSION_1) {
+		Encryption::get_master_key(master_key_id, NULL, &master_key);
+	} else {
+		Encryption::get_master_key(master_key_id, srv_uuid, &master_key);
+	}
+        if (master_key == NULL) {
+                return(false);
+        }
+
+#ifdef	UNIV_ENCRYPT_DEBUG
+	fprintf(stderr, "%lu ", master_key_id);
+	for (data = (const byte*) master_key, i = 0;
+	     i < ENCRYPTION_KEY_LEN; i++)
+		fprintf(stderr, "%02lx", (ulong)*data++);
+#endif
+
+	/* Decrypt tablespace key and iv. */
+	elen = my_aes_decrypt(
+		ptr,
+		ENCRYPTION_KEY_LEN * 2,
+		key_info,
+		master_key,
+		ENCRYPTION_KEY_LEN,
+		my_aes_256_ecb, NULL, false);
+
+	if (elen == MY_AES_BAD_DATA) {
+		my_free(master_key);
+		return(NULL);
+	}
+
+	/* Check checksum bytes. */
+	ptr += ENCRYPTION_KEY_LEN * 2;
+
+	crc1 = mach_read_from_4(ptr);
+	crc2 = ut_crc32(key_info, ENCRYPTION_KEY_LEN * 2);
+	if (crc1 != crc2) {
+		ib::error() << "Failed to decrpt encryption information,"
+			<< " please check key file is not changed!";
+		return(false);
+	}
+
+	/* Get tablespace key */
+	memcpy(key, key_info, ENCRYPTION_KEY_LEN);
+
+	/* Get tablespace iv */
+	memcpy(iv, key_info + ENCRYPTION_KEY_LEN,
+	       ENCRYPTION_KEY_LEN);
+
+#ifdef	UNIV_ENCRYPT_DEBUG
+	fprintf(stderr, " ");
+	for (data = (const byte*) key,
+	     i = 0; i < ENCRYPTION_KEY_LEN; i++)
+		fprintf(stderr, "%02lx", (ulong)*data++);
+	fprintf(stderr, " ");
+	for (data = (const byte*) iv,
+	     i = 0; i < ENCRYPTION_KEY_LEN; i++)
+		fprintf(stderr, "%02lx", (ulong)*data++);
+	fprintf(stderr, "\n");
+#endif
+
+	my_free(master_key);
+
+	if (Encryption::master_key_id < master_key_id) {
+		Encryption::master_key_id = master_key_id;
+		memcpy(Encryption::uuid, srv_uuid, ENCRYPTION_SERVER_UUID_LEN);
+	}
+
+	return(true);
 }
 
+/** Reads the encryption key from the first page of a tablespace.
+@param[in]	fsp_flags	tablespace flags
+@param[in/out]	key		tablespace key
+@param[in/out]	iv		tablespace iv
+@param[in]	page	first page of a tablespace
+@return true if success */
+bool
+fsp_header_get_encryption_key(
+	ulint		fsp_flags,
+	byte*		key,
+	byte*		iv,
+	page_t*		page)
+{
+	ulint			offset;
+	const page_size_t	page_size(fsp_flags);
+	offset = fsp_header_get_encryption_offset(page_size);
+	if (offset == 0) {
+		return(false);
+	}
+
+	return(fsp_header_decode_encryption_info(key, iv, page + offset));
+}
+#endif /* ! */
+
 #ifndef UNIV_HOTBACKUP
 /**********************************************************************//**
 Increases the space size field of a space. */
-UNIV_INTERN
 void
 fsp_header_inc_size(
 /*================*/
-	ulint	space,		/*!< in: space id */
+	ulint	space_id,	/*!< in: space id */
 	ulint	size_inc,	/*!< in: size increment in pages */
 	mtr_t*	mtr)		/*!< in/out: mini-transaction */
 {
 	fsp_header_t*	header;
 	ulint		size;
-	ulint		flags;
 
 	ut_ad(mtr);
 
-	mtr_x_lock(fil_space_get_latch(space, &flags), mtr);
+	fil_space_t*	space = mtr_x_lock_space(space_id, mtr);
+	ut_d(fsp_space_modify_check(space_id, mtr));
 
-	header = fsp_get_space_header(space,
-				      fsp_flags_get_zip_size(flags),
-				      mtr);
+	header = fsp_get_space_header(
+		space_id, page_size_t(space->flags), mtr);
 
-	size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
+	size = mach_read_from_4(header + FSP_SIZE);
+	ut_ad(size == space->size_in_header);
 
-	mlog_write_ulint(header + FSP_SIZE, size + size_inc, MLOG_4BYTES,
-			 mtr);
+	size += size_inc;
+
+	mlog_write_ulint(header + FSP_SIZE, size, MLOG_4BYTES, mtr);
+	space->size_in_header = size;
 }
 
 /**********************************************************************//**
@@ -829,8 +1432,7 @@ Gets the size of the system tablespace from the tablespace header.  If
 we do not have an auto-extending data file, this should be equal to
 the size of the data files.  If there is an auto-extending data file,
 this can be smaller.
-@return	size in pages */
-UNIV_INTERN
+@return size in pages */
 ulint
 fsp_header_get_tablespace_size(void)
 /*================================*/
@@ -841,263 +1443,251 @@ fsp_header_get_tablespace_size(void)
 
 	mtr_start(&mtr);
 
-	mtr_x_lock(fil_space_get_latch(0, NULL), &mtr);
+#ifdef UNIV_DEBUG
+	fil_space_t*	space =
+#endif /* UNIV_DEBUG */
+	mtr_x_lock_space(TRX_SYS_SPACE, &mtr);
 
-	header = fsp_get_space_header(0, 0, &mtr);
+	header = fsp_get_space_header(TRX_SYS_SPACE, univ_page_size, &mtr);
 
-	size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, &mtr);
+	size = mach_read_from_4(header + FSP_SIZE);
+	ut_ad(space->size_in_header == size);
 
 	mtr_commit(&mtr);
 
 	return(size);
 }
 
-/***********************************************************************//**
-Tries to extend a single-table tablespace so that a page would fit in the
+/** Try to extend a single-table tablespace so that a page would fit in the
 data file.
-@return	TRUE if success */
-static UNIV_COLD MY_ATTRIBUTE((nonnull, warn_unused_result))
-ibool
+@param[in,out]	space	tablespace
+@param[in]	page_no	page number
+@param[in,out]	header	tablespace header
+@param[in,out]	mtr	mini-transaction
+@return true if success */
+static UNIV_COLD MY_ATTRIBUTE((warn_unused_result))
+bool
 fsp_try_extend_data_file_with_pages(
-/*================================*/
-	ulint		space,		/*!< in: space */
-	ulint		page_no,	/*!< in: page number */
-	fsp_header_t*	header,		/*!< in/out: space header */
-	mtr_t*		mtr)		/*!< in/out: mini-transaction */
+	fil_space_t*	space,
+	ulint		page_no,
+	fsp_header_t*	header,
+	mtr_t*		mtr)
 {
-	ibool	success;
-	ulint	actual_size;
+	bool	success;
 	ulint	size;
 
-	ut_a(space != 0);
+	ut_a(!is_system_tablespace(space->id));
+	ut_d(fsp_space_modify_check(space->id, mtr));
 
-	size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
+	size = mach_read_from_4(header + FSP_SIZE);
+	ut_ad(size == space->size_in_header);
 
 	ut_a(page_no >= size);
 
-	success = fil_extend_space_to_desired_size(&actual_size, space,
-						   page_no + 1);
-	/* actual_size now has the space size in pages; it may be less than
-	we wanted if we ran out of disk space */
-
-	mlog_write_ulint(header + FSP_SIZE, actual_size, MLOG_4BYTES, mtr);
+	success = fil_space_extend(space, page_no + 1);
+	/* The size may be less than we wanted if we ran out of disk space. */
+	mlog_write_ulint(header + FSP_SIZE, space->size, MLOG_4BYTES, mtr);
+	space->size_in_header = space->size;
 
 	return(success);
 }
 
-/***********************************************************************//**
-Tries to extend the last data file of a tablespace if it is auto-extending.
-@return	FALSE if not auto-extending */
+/** Try to extend the last data file of a tablespace if it is auto-extending.
+@param[in,out]	space	tablespace
+@param[in,out]	header	tablespace header
+@param[in,out]	mtr	mini-transaction
+@return whether the tablespace was extended */
 static UNIV_COLD MY_ATTRIBUTE((nonnull))
-ibool
+ulint
 fsp_try_extend_data_file(
-/*=====================*/
-	ulint*		actual_increase,/*!< out: actual increase in pages, where
-					we measure the tablespace size from
-					what the header field says; it may be
-					the actual file size rounded down to
-					megabyte */
-	ulint		space,		/*!< in: space */
-	fsp_header_t*	header,		/*!< in/out: space header */
-	mtr_t*		mtr)		/*!< in/out: mini-transaction */
+	fil_space_t*	space,
+	fsp_header_t*	header,
+	mtr_t*		mtr,
+	ulint*		n_pages_added)
 {
-	ulint	size;
-	ulint	zip_size;
-	ulint	new_size;
-	ulint	old_size;
-	ulint	size_increase;
-	ulint	actual_size;
-	ibool	success;
+	ulint	size;		/* current number of pages in the datafile */
+	ulint	size_increase;	/* number of pages to extend this file */
+	const char* OUT_OF_SPACE_MSG =
+		"ran out of space. Please add another file or use"
+		" 'autoextend' for the last file in setting";
 
-	*actual_increase = 0;
+	ut_d(fsp_space_modify_check(space->id, mtr));
 
-	if (space == 0 && !srv_auto_extend_last_data_file) {
+	if (space->id == srv_sys_space.space_id()
+	    && !srv_sys_space.can_auto_extend_last_file()) {
 
 		/* We print the error message only once to avoid
 		spamming the error log. Note that we don't need
-		to reset the flag to FALSE as dealing with this
+		to reset the flag to false as dealing with this
 		error requires server restart. */
-		if (fsp_tbs_full_error_printed == FALSE) {
-			fprintf(stderr,
-				"InnoDB: Error: Data file(s) ran"
-				" out of space.\n"
-				"Please add another data file or"
-				" use \'autoextend\' for the last"
-				" data file.\n");
-			fsp_tbs_full_error_printed = TRUE;
+		if (!srv_sys_space.get_tablespace_full_status()) {
+			ib::error() << "Tablespace " << srv_sys_space.name()
+				<< " " << OUT_OF_SPACE_MSG
+				<< " innodb_data_file_path.";
+			srv_sys_space.set_tablespace_full_status(true);
 		}
-		return(FALSE);
+		return(false);
+	} else if (fsp_is_system_temporary(space->id)
+		   && !srv_tmp_space.can_auto_extend_last_file()) {
+
+		/* We print the error message only once to avoid
+		spamming the error log. Note that we don't need
+		to reset the flag to false as dealing with this
+		error requires server restart. */
+		if (!srv_tmp_space.get_tablespace_full_status()) {
+			ib::error() << "Tablespace " << srv_tmp_space.name()
+				<< " " << OUT_OF_SPACE_MSG
+				<< " innodb_temp_data_file_path.";
+			srv_tmp_space.set_tablespace_full_status(true);
+		}
+		return(false);
 	}
 
-	size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
-	zip_size = fsp_flags_get_zip_size(
+	size = mach_read_from_4(header + FSP_SIZE);
+	ut_ad(size == space->size_in_header);
+
+	const page_size_t	page_size(
 		mach_read_from_4(header + FSP_SPACE_FLAGS));
 
-	old_size = size;
+	if (space->id == srv_sys_space.space_id()) {
 
-	if (space == 0) {
-		if (!srv_last_file_size_max) {
-			size_increase = SRV_AUTO_EXTEND_INCREMENT;
-		} else {
-			if (srv_last_file_size_max
-			    < srv_data_file_sizes[srv_n_data_files - 1]) {
+		size_increase = srv_sys_space.get_increment();
 
-				fprintf(stderr,
-					"InnoDB: Error: Last data file size"
-					" is %lu, max size allowed %lu\n",
-					(ulong) srv_data_file_sizes[
-						srv_n_data_files - 1],
-					(ulong) srv_last_file_size_max);
-			}
+	} else if (space->id == srv_tmp_space.space_id()) {
+
+		size_increase = srv_tmp_space.get_increment();
 
-			size_increase = srv_last_file_size_max
-				- srv_data_file_sizes[srv_n_data_files - 1];
-			if (size_increase > SRV_AUTO_EXTEND_INCREMENT) {
-				size_increase = SRV_AUTO_EXTEND_INCREMENT;
-			}
-		}
 	} else {
-		/* We extend single-table tablespaces first one extent
-		at a time, but 4 at a time for bigger tablespaces. It is
-		not enough to extend always by one extent, because we need
-		to add at least one extent to FSP_FREE.
-		A single extent descriptor page will track many extents.
-		And the extent that uses its extent descriptor page is
-		put onto the FSP_FREE_FRAG list. Extents that do not
-		use their extent descriptor page are added to FSP_FREE.
-		The physical page size is used to determine how many
-		extents are tracked on one extent descriptor page. */
-		ulint	extent_size;	/*!< one megabyte, in pages */
-		ulint	threshold;	/*!< The size of the tablespace
-					(in number of pages) where we
-					start allocating more than one
-					extent at a time. */
-
-		if (!zip_size) {
-			extent_size = FSP_EXTENT_SIZE;
-		} else {
-			extent_size = FSP_EXTENT_SIZE
-				* UNIV_PAGE_SIZE / zip_size;
-		}
-
-		/* Threshold is set at 32mb except when the page
-		size is small enough that it must be done sooner.
-		For page size less than 4k, we may reach the
-		extent contains extent descriptor page before
-		32 mb. */
-		threshold = ut_min((32 * extent_size),
-				   (zip_size ? zip_size : UNIV_PAGE_SIZE));
-
-		if (size < extent_size) {
+		ulint	extent_pages
+			= fsp_get_extent_size_in_pages(page_size);
+		if (size < extent_pages) {
 			/* Let us first extend the file to extent_size */
-			success = fsp_try_extend_data_file_with_pages(
-				space, extent_size - 1, header, mtr);
-			if (!success) {
-				new_size = mtr_read_ulint(header + FSP_SIZE,
-							  MLOG_4BYTES, mtr);
-
-				*actual_increase = new_size - old_size;
-
-				return(FALSE);
+			if (!fsp_try_extend_data_file_with_pages(
+				    space, extent_pages - 1, header, mtr)) {
+				return(false);
 			}
 
-			size = extent_size;
+			size = extent_pages;
 		}
 
-		if (size < threshold) {
-			size_increase = extent_size;
-		} else {
-			/* Below in fsp_fill_free_list() we assume
-			that we add at most FSP_FREE_ADD extents at
-			a time */
-			size_increase = FSP_FREE_ADD * extent_size;
-		}
+		size_increase = fsp_get_pages_to_extend_ibd(page_size, size);
 	}
 
 	if (size_increase == 0) {
 
-		return(TRUE);
-	}
-
-	success = fil_extend_space_to_desired_size(&actual_size, space,
-						   size + size_increase);
-	if (!success) {
-
 		return(false);
 	}
 
+	if (!fil_space_extend(space, size + size_increase)) {
+		return(false);
+	}
+
+	*n_pages_added = size_increase;
+
 	/* We ignore any fragments of a full megabyte when storing the size
 	to the space header */
 
-	if (!zip_size) {
-		new_size = ut_calc_align_down(actual_size,
-					      (1024 * 1024) / UNIV_PAGE_SIZE);
-	} else {
-		new_size = ut_calc_align_down(actual_size,
-					      (1024 * 1024) / zip_size);
-	}
-	mlog_write_ulint(header + FSP_SIZE, new_size, MLOG_4BYTES, mtr);
+	space->size_in_header = ut_calc_align_down(
+		space->size, (1024 * 1024) / page_size.physical());
 
-	*actual_increase = new_size - old_size;
+	mlog_write_ulint(
+		header + FSP_SIZE, space->size_in_header, MLOG_4BYTES, mtr);
 
-	return(TRUE);
+	return(true);
 }
 
-/**********************************************************************//**
-Puts new extents to the free list if there are free extents above the free
+/** Calculate the number of pages to extend a datafile.
+We extend single-table and general tablespaces first one extent at a time,
+but 4 at a time for bigger tablespaces. It is not enough to extend always
+by one extent, because we need to add at least one extent to FSP_FREE.
+A single extent descriptor page will track many extents. And the extent
+that uses its extent descriptor page is put onto the FSP_FREE_FRAG list.
+Extents that do not use their extent descriptor page are added to FSP_FREE.
+The physical page size is used to determine how many extents are tracked
+on one extent descriptor page. See xdes_calc_descriptor_page().
+@param[in]	page_size	page_size of the datafile
+@param[in]	size		current number of pages in the datafile
+@return number of pages to extend the file. */
+ulint
+fsp_get_pages_to_extend_ibd(
+	const page_size_t&	page_size,
+	ulint			size)
+{
+	ulint	size_increase;	/* number of pages to extend this file */
+	ulint	extent_size;	/* one megabyte, in pages */
+	ulint	threshold;	/* The size of the tablespace (in number
+				of pages) where we start allocating more
+				than one extent at a time. */
+
+	extent_size = fsp_get_extent_size_in_pages(page_size);
+
+	/* The threshold is set at 32MiB except when the physical page
+	size is small enough that it must be done sooner. */
+	threshold = ut_min(32 * extent_size, page_size.physical());
+
+	if (size < threshold) {
+		size_increase = extent_size;
+	} else {
+		/* Below in fsp_fill_free_list() we assume
+		that we add at most FSP_FREE_ADD extents at
+		a time */
+		size_increase = FSP_FREE_ADD * extent_size;
+	}
+
+	return(size_increase);
+}
+
+/** Put new extents to the free list if there are free extents above the free
 limit. If an extent happens to contain an extent descriptor page, the extent
-is put to the FSP_FREE_FRAG list with the page marked as used. */
+is put to the FSP_FREE_FRAG list with the page marked as used.
+@param[in]	init_space	true if this is a single-table tablespace
+and we are only initializing the first extent and the first bitmap pages;
+then we will not allocate more extents
+@param[in,out]	space		tablespace
+@param[in,out]	header		tablespace header
+@param[in,out]	mtr		mini-transaction */
 static
 void
 fsp_fill_free_list(
-/*===============*/
-	ibool		init_space,	/*!< in: TRUE if this is a single-table
-					tablespace and we are only initing
-					the tablespace's first extent
-					descriptor page and ibuf bitmap page;
-					then we do not allocate more extents */
-	ulint		space,		/*!< in: space */
-	fsp_header_t*	header,		/*!< in/out: space header */
-	mtr_t*		mtr)		/*!< in/out: mini-transaction */
+	bool		init_space,
+	fil_space_t*	space,
+	fsp_header_t*	header,
+	mtr_t*		mtr)
 {
 	ulint	limit;
 	ulint	size;
-	ulint	zip_size;
+	ulint	flags;
 	xdes_t*	descr;
 	ulint	count		= 0;
 	ulint	frag_n_used;
-	ulint	actual_increase;
 	ulint	i;
-	mtr_t	ibuf_mtr;
 
 	ut_ad(header != NULL);
 	ut_ad(mtr != NULL);
 	ut_ad(page_offset(header) == FSP_HEADER_OFFSET);
+	ut_d(fsp_space_modify_check(space->id, mtr));
 
 	/* Check if we can fill free list from above the free list limit */
-	size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
-	limit = mtr_read_ulint(header + FSP_FREE_LIMIT, MLOG_4BYTES, mtr);
+	size = mach_read_from_4(header + FSP_SIZE);
+	limit = mach_read_from_4(header + FSP_FREE_LIMIT);
+	flags = mach_read_from_4(header + FSP_SPACE_FLAGS);
 
-	zip_size = fsp_flags_get_zip_size(
-		mach_read_from_4(FSP_SPACE_FLAGS + header));
-	ut_a(ut_is_2pow(zip_size));
-	ut_a(zip_size <= UNIV_ZIP_SIZE_MAX);
-	ut_a(!zip_size || zip_size >= UNIV_ZIP_SIZE_MIN);
+	ut_ad(size == space->size_in_header);
+	ut_ad(limit == space->free_limit);
+	ut_ad(flags == space->flags);
 
-	if (space == 0 && srv_auto_extend_last_data_file
-	    && size < limit + FSP_EXTENT_SIZE * FSP_FREE_ADD) {
+	const page_size_t	page_size(flags);
 
-		/* Try to increase the last data file size */
-		fsp_try_extend_data_file(&actual_increase, space, header, mtr);
-		size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
-	}
-
-	if (space != 0 && !init_space
-	    && size < limit + FSP_EXTENT_SIZE * FSP_FREE_ADD) {
-
-		/* Try to increase the .ibd file size */
-		fsp_try_extend_data_file(&actual_increase, space, header, mtr);
-		size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
+	if (size < limit + FSP_EXTENT_SIZE * FSP_FREE_ADD) {
+		if ((!init_space && !is_system_tablespace(space->id))
+		    || (space->id == srv_sys_space.space_id()
+			&& srv_sys_space.can_auto_extend_last_file())
+		    || (space->id == srv_tmp_space.space_id()
+			&& srv_tmp_space.can_auto_extend_last_file())) {
+			ulint n_pages = 0;
+			fsp_try_extend_data_file(space, header, mtr, &n_pages);
+			size = space->size_in_header;
+		}
 	}
 
 	i = limit;
@@ -1105,17 +1695,14 @@ fsp_fill_free_list(
 	while ((init_space && i < 1)
 	       || ((i + FSP_EXTENT_SIZE <= size) && (count < FSP_FREE_ADD))) {
 
-		ibool	init_xdes;
-		if (zip_size) {
-			init_xdes = ut_2pow_remainder(i, zip_size) == 0;
-		} else {
-			init_xdes = ut_2pow_remainder(i, UNIV_PAGE_SIZE) == 0;
-		}
+		bool	init_xdes
+			= (ut_2pow_remainder(i, page_size.physical()) == 0);
 
+		space->free_limit = i + FSP_EXTENT_SIZE;
 		mlog_write_ulint(header + FSP_FREE_LIMIT, i + FSP_EXTENT_SIZE,
 				 MLOG_4BYTES, mtr);
 
-		if (UNIV_UNLIKELY(init_xdes)) {
+		if (init_xdes) {
 
 			buf_block_t*	block;
 
@@ -1124,12 +1711,15 @@ fsp_fill_free_list(
 			pages should be ignored. */
 
 			if (i > 0) {
+				const page_id_t	page_id(space->id, i);
+
 				block = buf_page_create(
-					space, i, zip_size, mtr);
-				buf_page_get(space, zip_size, i,
-					     RW_X_LATCH, mtr);
-				buf_block_dbg_add_level(block,
-							SYNC_FSP_PAGE);
+					page_id, page_size, mtr);
+
+				buf_page_get(
+					page_id, page_size, RW_SX_LATCH, mtr);
+
+				buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
 
 				fsp_init_file_page(block, mtr);
 				mlog_write_ulint(buf_block_get_frame(block)
@@ -1140,28 +1730,52 @@ fsp_fill_free_list(
 
 			/* Initialize the ibuf bitmap page in a separate
 			mini-transaction because it is low in the latching
-			order, and we must be able to release its latch
-			before returning from the fsp routine */
+			order, and we must be able to release its latch.
+			Note: Insert-Buffering is disabled for tables that
+			reside in the temp-tablespace. */
+			if (space->id != srv_tmp_space.space_id()) {
+				mtr_t	ibuf_mtr;
 
-			mtr_start(&ibuf_mtr);
+				mtr_start(&ibuf_mtr);
+				ibuf_mtr.set_named_space(space);
 
-			block = buf_page_create(space,
-						    i + FSP_IBUF_BITMAP_OFFSET,
-						    zip_size, &ibuf_mtr);
-			buf_page_get(space, zip_size,
-				     i + FSP_IBUF_BITMAP_OFFSET,
-				     RW_X_LATCH, &ibuf_mtr);
-			buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
+				/* Avoid logging while truncate table
+				fix-up is active. */
+				if (space->purpose == FIL_TYPE_TEMPORARY
+				    || srv_is_tablespace_truncated(
+					    space->id)) {
+					mtr_set_log_mode(
+						&ibuf_mtr, MTR_LOG_NO_REDO);
+				}
 
-			fsp_init_file_page(block, &ibuf_mtr);
+				const page_id_t	page_id(
+					space->id,
+					i + FSP_IBUF_BITMAP_OFFSET);
 
-			ibuf_bitmap_page_init(block, &ibuf_mtr);
+				block = buf_page_create(
+					page_id, page_size, &ibuf_mtr);
 
-			mtr_commit(&ibuf_mtr);
+				buf_page_get(
+					page_id, page_size, RW_SX_LATCH,
+					&ibuf_mtr);
+
+				buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
+
+				fsp_init_file_page(block, &ibuf_mtr);
+
+				ibuf_bitmap_page_init(block, &ibuf_mtr);
+
+				mtr_commit(&ibuf_mtr);
+			}
 		}
 
-		descr = xdes_get_descriptor_with_space_hdr(header, space, i,
-							   mtr);
+		buf_block_t*	desc_block = NULL;
+		descr = xdes_get_descriptor_with_space_hdr(
+			header, space->id, i, mtr, init_space, &desc_block);
+		if (desc_block != NULL) {
+			fil_block_check_type(
+				desc_block, FIL_PAGE_TYPE_XDES, mtr);
+		}
 		xdes_init(descr, mtr);
 
 		if (UNIV_UNLIKELY(init_xdes)) {
@@ -1177,8 +1791,8 @@ fsp_fill_free_list(
 
 			flst_add_last(header + FSP_FREE_FRAG,
 				      descr + XDES_FLST_NODE, mtr);
-			frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED,
-						     MLOG_4BYTES, mtr);
+			frag_n_used = mach_read_from_4(
+				header + FSP_FRAG_N_USED);
 			mlog_write_ulint(header + FSP_FRAG_N_USED,
 					 frag_n_used + 2, MLOG_4BYTES, mtr);
 		} else {
@@ -1189,32 +1803,41 @@ fsp_fill_free_list(
 
 		i += FSP_EXTENT_SIZE;
 	}
+
+	space->free_len += count;
 }
 
-/**********************************************************************//**
-Allocates a new free extent.
-@return	extent descriptor, NULL if cannot be allocated */
+/** Allocates a new free extent.
+@param[in]	space_id	tablespace identifier
+@param[in]	page_size	page size
+@param[in]	hint		hint of which extent would be desirable: any
+page offset in the extent goes; the hint must not be > FSP_FREE_LIMIT
+@param[in,out]	mtr		mini-transaction
+@return extent descriptor, NULL if cannot be allocated */
 static
 xdes_t*
 fsp_alloc_free_extent(
-/*==================*/
-	ulint	space,	/*!< in: space id */
-	ulint	zip_size,/*!< in: compressed page size in bytes
-			or 0 for uncompressed pages */
-	ulint	hint,	/*!< in: hint of which extent would be desirable: any
-			page offset in the extent goes; the hint must not
-			be > FSP_FREE_LIMIT */
-	mtr_t*	mtr)	/*!< in/out: mini-transaction */
+	ulint			space_id,
+	const page_size_t&	page_size,
+	ulint			hint,
+	mtr_t*			mtr)
 {
 	fsp_header_t*	header;
 	fil_addr_t	first;
 	xdes_t*		descr;
+	buf_block_t*	desc_block = NULL;
 
-	ut_ad(mtr);
+	header = fsp_get_space_header(space_id, page_size, mtr);
 
-	header = fsp_get_space_header(space, zip_size, mtr);
+	descr = xdes_get_descriptor_with_space_hdr(
+		header, space_id, hint, mtr, false, &desc_block);
 
-	descr = xdes_get_descriptor_with_space_hdr(header, space, hint, mtr);
+	fil_space_t*	space = fil_space_get(space_id);
+	ut_a(space != NULL);
+
+	if (desc_block != NULL) {
+		fil_block_check_type(desc_block, FIL_PAGE_TYPE_XDES, mtr);
+	}
 
 	if (descr && (xdes_get_state(descr, mtr) == XDES_FREE)) {
 		/* Ok, we can take this extent */
@@ -1223,7 +1846,7 @@ fsp_alloc_free_extent(
 		first = flst_get_first(header + FSP_FREE, mtr);
 
 		if (fil_addr_is_null(first)) {
-			fsp_fill_free_list(FALSE, space, header, mtr);
+			fsp_fill_free_list(false, space, header, mtr);
 
 			first = flst_get_first(header + FSP_FREE, mtr);
 		}
@@ -1233,10 +1856,12 @@ fsp_alloc_free_extent(
 			return(NULL);	/* No free extents left */
 		}
 
-		descr = xdes_lst_get_descriptor(space, zip_size, first, mtr);
+		descr = xdes_lst_get_descriptor(
+			space_id, page_size, first, mtr);
 	}
 
 	flst_remove(header + FSP_FREE, descr + XDES_FLST_NODE, mtr);
+	space->free_len--;
 
 	return(descr);
 }
@@ -1259,8 +1884,7 @@ fsp_alloc_from_free_frag(
 	xdes_set_bit(descr, XDES_FREE_BIT, bit, FALSE, mtr);
 
 	/* Update the FRAG_N_USED field */
-	frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED, MLOG_4BYTES,
-				     mtr);
+	frag_n_used = mach_read_from_4(header + FSP_FRAG_N_USED);
 	frag_n_used++;
 	mlog_write_ulint(header + FSP_FRAG_N_USED, frag_n_used, MLOG_4BYTES,
 			 mtr);
@@ -1278,49 +1902,62 @@ fsp_alloc_from_free_frag(
 	}
 }
 
-/**********************************************************************//**
-Gets a buffer block for an allocated page.
-
+/** Gets a buffer block for an allocated page.
 NOTE: If init_mtr != mtr, the block will only be initialized if it was
 not previously x-latched. It is assumed that the block has been
 x-latched only by mtr, and freed in mtr in that case.
-
+@param[in]	page_id		page id of the allocated page
+@param[in]	page_size	page size of the allocated page
+@param[in]	rw_latch	RW_SX_LATCH, RW_X_LATCH
+@param[in,out]	mtr		mini-transaction of the allocation
+@param[in,out]	init_mtr	mini-transaction for initializing the page
 @return block, initialized if init_mtr==mtr
 or rw_lock_x_lock_count(&block->lock) == 1 */
 static
 buf_block_t*
 fsp_page_create(
-/*============*/
-	ulint	space,		/*!< in: space id of the allocated page */
-	ulint	zip_size,	/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	ulint	page_no,	/*!< in: page number of the allocated page */
-	mtr_t*	mtr,		/*!< in: mini-transaction of the allocation */
-	mtr_t*	init_mtr)	/*!< in: mini-transaction for initializing
-				the page */
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	rw_lock_type_t		rw_latch,
+	mtr_t*			mtr,
+	mtr_t*			init_mtr)
 {
-	buf_block_t*	block
-		= buf_page_create(space, page_no, zip_size, init_mtr);
-#ifdef UNIV_SYNC_DEBUG
+	buf_block_t*	block = buf_page_create(page_id, page_size, init_mtr);
+
 	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX)
-	      == rw_lock_own(&block->lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	      == rw_lock_own(&block->lock, RW_LOCK_X));
+
+	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_SX_FIX)
+	      == rw_lock_own(&block->lock, RW_LOCK_SX));
+
+	ut_ad(rw_latch == RW_X_LATCH || rw_latch == RW_SX_LATCH);
 
 	/* Mimic buf_page_get(), but avoid the buf_pool->page_hash lookup. */
-	rw_lock_x_lock(&block->lock);
+	if (rw_latch == RW_X_LATCH) {
+		rw_lock_x_lock(&block->lock);
+	} else {
+		rw_lock_sx_lock(&block->lock);
+	}
 	mutex_enter(&block->mutex);
+
 	buf_block_buf_fix_inc(block, __FILE__, __LINE__);
+
 	mutex_exit(&block->mutex);
-	mtr_memo_push(init_mtr, block, MTR_MEMO_PAGE_X_FIX);
+	mtr_memo_push(init_mtr, block, rw_latch == RW_X_LATCH
+		      ? MTR_MEMO_PAGE_X_FIX : MTR_MEMO_PAGE_SX_FIX);
 
 	if (init_mtr == mtr
-	    || rw_lock_get_x_lock_count(&block->lock) == 1) {
+	    || (rw_latch == RW_X_LATCH
+		? rw_lock_get_x_lock_count(&block->lock) == 1
+		: rw_lock_get_sx_lock_count(&block->lock) == 1)) {
 
 		/* Initialize the page, unless it was already
-		X-latched in mtr. (In this case, we would want to
+		SX-latched in mtr. (In this case, we would want to
 		allocate another page that has not been freed in mtr.) */
 		ut_ad(init_mtr == mtr
-		      || !mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+		      || !mtr_memo_contains_flagged(mtr, block,
+						    MTR_MEMO_PAGE_X_FIX
+						    | MTR_MEMO_PAGE_SX_FIX));
 
 		fsp_init_file_page(block, init_mtr);
 	}
@@ -1328,24 +1965,28 @@ fsp_page_create(
 	return(block);
 }
 
-/**********************************************************************//**
-Allocates a single free page from a space. The page is marked as used.
-@retval NULL if no page could be allocated
-@retval block, rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded
+/** Allocates a single free page from a space.
+The page is marked as used.
+@param[in]	space		space id
+@param[in]	page_size	page size
+@param[in]	hint		hint of which page would be desirable
+@param[in]	rw_latch	RW_SX_LATCH, RW_X_LATCH
+@param[in,out]	mtr		mini-transaction
+@param[in,out]	init_mtr	mini-transaction in which the page should be
+initialized (may be the same as mtr)
+@retval NULL	if no page could be allocated
+@retval block	rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded
 (init_mtr == mtr, or the page was not previously freed in mtr)
-@retval block (not allocated or initialized) otherwise */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
+@retval block	(not allocated or initialized) otherwise */
+static MY_ATTRIBUTE((warn_unused_result))
 buf_block_t*
 fsp_alloc_free_page(
-/*================*/
-	ulint	space,	/*!< in: space id */
-	ulint	zip_size,/*!< in: compressed page size in bytes
-			or 0 for uncompressed pages */
-	ulint	hint,	/*!< in: hint of which page would be desirable */
-	mtr_t*	mtr,	/*!< in/out: mini-transaction */
-	mtr_t*	init_mtr)/*!< in/out: mini-transaction in which the
-			page should be initialized
-			(may be the same as mtr) */
+	ulint			space,
+	const page_size_t&	page_size,
+	ulint			hint,
+	rw_lock_type_t		rw_latch,
+	mtr_t*			mtr,
+	mtr_t*			init_mtr)
 {
 	fsp_header_t*	header;
 	fil_addr_t	first;
@@ -1357,7 +1998,8 @@ fsp_alloc_free_page(
 	ut_ad(mtr);
 	ut_ad(init_mtr);
 
-	header = fsp_get_space_header(space, zip_size, mtr);
+	ut_d(fsp_space_modify_check(space, mtr));
+	header = fsp_get_space_header(space, page_size, mtr);
 
 	/* Get the hinted descriptor */
 	descr = xdes_get_descriptor_with_space_hdr(header, space, hint, mtr);
@@ -1376,7 +2018,7 @@ fsp_alloc_free_page(
 			FREE_FRAG list. But we will allocate our page from the
 			the free extent anyway. */
 
-			descr = fsp_alloc_free_extent(space, zip_size,
+			descr = fsp_alloc_free_extent(space, page_size,
 						      hint, mtr);
 
 			if (descr == NULL) {
@@ -1389,7 +2031,7 @@ fsp_alloc_free_page(
 			flst_add_last(header + FSP_FREE_FRAG,
 				      descr + XDES_FLST_NODE, mtr);
 		} else {
-			descr = xdes_lst_get_descriptor(space, zip_size,
+			descr = xdes_lst_get_descriptor(space, page_size,
 							first, mtr);
 		}
 
@@ -1412,24 +2054,27 @@ fsp_alloc_free_page(
 
 	page_no = xdes_get_offset(descr) + free;
 
-	space_size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
+	space_size = mach_read_from_4(header + FSP_SIZE);
+	ut_ad(space_size == fil_space_get(space)->size_in_header
+	      || (space == TRX_SYS_SPACE
+		  && srv_startup_is_before_trx_rollback_phase));
 
 	if (space_size <= page_no) {
 		/* It must be that we are extending a single-table tablespace
 		whose size is still < 64 pages */
 
-		ut_a(space != 0);
+		ut_a(!is_system_tablespace(space));
 		if (page_no >= FSP_EXTENT_SIZE) {
-			fprintf(stderr,
-				"InnoDB: Error: trying to extend a"
-				" single-table tablespace %lu\n"
-				"InnoDB: by single page(s) though the"
-				" space size %lu. Page no %lu.\n",
-				(ulong) space, (ulong) space_size,
-				(ulong) page_no);
+			ib::error() << "Trying to extend a single-table"
+				" tablespace " << space << " , by single"
+				" page(s) though the space size " << space_size
+				<< ". Page no " << page_no << ".";
 			return(NULL);
 		}
-		if (!fsp_try_extend_data_file_with_pages(space, page_no,
+
+		fil_space_t*	fspace = fil_space_get(space);
+
+		if (!fsp_try_extend_data_file_with_pages(fspace, page_no,
 							 header, mtr)) {
 			/* No disk space left */
 			return(NULL);
@@ -1437,20 +2082,21 @@ fsp_alloc_free_page(
 	}
 
 	fsp_alloc_from_free_frag(header, descr, free, mtr);
-	return(fsp_page_create(space, zip_size, page_no, mtr, init_mtr));
+	return(fsp_page_create(page_id_t(space, page_no), page_size,
+			       rw_latch, mtr, init_mtr));
 }
 
-/**********************************************************************//**
-Frees a single page of a space. The page is marked as free and clean. */
+/** Frees a single page of a space.
+The page is marked as free and clean.
+@param[in]	page_id		page id
+@param[in]	page_size	page size
+@param[in,out]	mtr		mini-transaction */
 static
 void
 fsp_free_page(
-/*==========*/
-	ulint	space,	/*!< in: space id */
-	ulint	zip_size,/*!< in: compressed page size in bytes
-			or 0 for uncompressed pages */
-	ulint	page,	/*!< in: page offset */
-	mtr_t*	mtr)	/*!< in/out: mini-transaction */
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	mtr_t*			mtr)
 {
 	fsp_header_t*	header;
 	xdes_t*		descr;
@@ -1458,21 +2104,21 @@ fsp_free_page(
 	ulint		frag_n_used;
 
 	ut_ad(mtr);
+	ut_d(fsp_space_modify_check(page_id.space(), mtr));
 
 	/* fprintf(stderr, "Freeing page %lu in space %lu\n", page, space); */
 
-	header = fsp_get_space_header(space, zip_size, mtr);
+	header = fsp_get_space_header(
+		page_id.space(), page_size, mtr);
 
-	descr = xdes_get_descriptor_with_space_hdr(header, space, page, mtr);
+	descr = xdes_get_descriptor_with_space_hdr(
+		header, page_id.space(), page_id.page_no(), mtr);
 
 	state = xdes_get_state(descr, mtr);
 
 	if (state != XDES_FREE_FRAG && state != XDES_FULL_FRAG) {
-		fprintf(stderr,
-			"InnoDB: Error: File space extent descriptor"
-			" of page %lu has state %lu\n",
-			(ulong) page,
-			(ulong) state);
+		ib::error() << "File space extent descriptor of page "
+			<< page_id << " has state " << state;
 		fputs("InnoDB: Dump of descriptor: ", stderr);
 		ut_print_buf(stderr, ((byte*) descr) - 50, 200);
 		putc('\n', stderr);
@@ -1491,12 +2137,10 @@ fsp_free_page(
 	}
 
 	if (xdes_mtr_get_bit(descr, XDES_FREE_BIT,
-			     page % FSP_EXTENT_SIZE, mtr)) {
+			     page_id.page_no() % FSP_EXTENT_SIZE, mtr)) {
 
-		fprintf(stderr,
-			"InnoDB: Error: File space extent descriptor"
-			" of page %lu says it is free\n"
-			"InnoDB: Dump of descriptor: ", (ulong) page);
+		ib::error() << "File space extent descriptor of page "
+			<< page_id << " says it is free. Dump of descriptor: ";
 		ut_print_buf(stderr, ((byte*) descr) - 50, 200);
 		putc('\n', stderr);
 		/* Crash in debug version, so that we get a core dump
@@ -1509,8 +2153,10 @@ fsp_free_page(
 		return;
 	}
 
-	xdes_set_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, TRUE, mtr);
-	xdes_set_bit(descr, XDES_CLEAN_BIT, page % FSP_EXTENT_SIZE, TRUE, mtr);
+	const ulint	bit = page_id.page_no() % FSP_EXTENT_SIZE;
+
+	xdes_set_bit(descr, XDES_FREE_BIT, bit, TRUE, mtr);
+	xdes_set_bit(descr, XDES_CLEAN_BIT, bit, TRUE, mtr);
 
 	frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED, MLOG_4BYTES,
 				     mtr);
@@ -1534,84 +2180,78 @@ fsp_free_page(
 		/* The extent has become free: move it to another list */
 		flst_remove(header + FSP_FREE_FRAG, descr + XDES_FLST_NODE,
 			    mtr);
-		fsp_free_extent(space, zip_size, page, mtr);
+		fsp_free_extent(page_id, page_size, mtr);
 	}
-
-	mtr->n_freed_pages++;
 }
 
-/**********************************************************************//**
-Returns an extent to the free list of a space. */
+/** Returns an extent to the free list of a space.
+@param[in]	page_id		page id in the extent
+@param[in]	page_size	page size
+@param[in,out]	mtr		mini-transaction */
 static
 void
 fsp_free_extent(
-/*============*/
-	ulint	space,	/*!< in: space id */
-	ulint	zip_size,/*!< in: compressed page size in bytes
-			or 0 for uncompressed pages */
-	ulint	page,	/*!< in: page offset in the extent */
-	mtr_t*	mtr)	/*!< in/out: mini-transaction */
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	mtr_t*			mtr)
 {
 	fsp_header_t*	header;
 	xdes_t*		descr;
 
 	ut_ad(mtr);
 
-	header = fsp_get_space_header(space, zip_size, mtr);
+	header = fsp_get_space_header(page_id.space(), page_size, mtr);
 
-	descr = xdes_get_descriptor_with_space_hdr(header, space, page, mtr);
+	descr = xdes_get_descriptor_with_space_hdr(
+		header, page_id.space(), page_id.page_no(), mtr);
 
-	if (xdes_get_state(descr, mtr) == XDES_FREE) {
-
-		ut_print_buf(stderr, (byte*) descr - 500, 1000);
-		putc('\n', stderr);
-
-		ut_error;
-	}
+	ut_a(xdes_get_state(descr, mtr) != XDES_FREE);
 
 	xdes_init(descr, mtr);
 
 	flst_add_last(header + FSP_FREE, descr + XDES_FLST_NODE, mtr);
+	fil_space_get(page_id.space())->free_len++;
 }
 
-/**********************************************************************//**
-Returns the nth inode slot on an inode page.
-@return	segment inode */
+/** Returns the nth inode slot on an inode page.
+@param[in]	page		segment inode page
+@param[in]	i		inode index on page
+@param[in]	page_size	page size
+@param[in,out]	mtr		mini-transaction
+@return segment inode */
 UNIV_INLINE
 fseg_inode_t*
 fsp_seg_inode_page_get_nth_inode(
-/*=============================*/
-	page_t*	page,	/*!< in: segment inode page */
-	ulint	i,	/*!< in: inode index on page */
-	ulint	zip_size MY_ATTRIBUTE((unused)),
-			/*!< in: compressed page size, or 0 */
-	mtr_t*	mtr MY_ATTRIBUTE((unused)))
-			/*!< in/out: mini-transaction */
+	page_t*			page,
+	ulint			i,
+	const page_size_t&	page_size,
+	mtr_t*			mtr)
 {
-	ut_ad(i < FSP_SEG_INODES_PER_PAGE(zip_size));
-	ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(i < FSP_SEG_INODES_PER_PAGE(page_size));
+	ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_SX_FIX));
 
 	return(page + FSEG_ARR_OFFSET + FSEG_INODE_SIZE * i);
 }
 
-/**********************************************************************//**
-Looks for a used segment inode on a segment inode page.
-@return	segment inode index, or ULINT_UNDEFINED if not found */
+/** Looks for a used segment inode on a segment inode page.
+@param[in]	page		segment inode page
+@param[in]	page_size	page size
+@param[in,out]	mtr		mini-transaction
+@return segment inode index, or ULINT_UNDEFINED if not found */
 static
 ulint
 fsp_seg_inode_page_find_used(
-/*=========================*/
-	page_t*	page,	/*!< in: segment inode page */
-	ulint	zip_size,/*!< in: compressed page size, or 0 */
-	mtr_t*	mtr)	/*!< in/out: mini-transaction */
+	page_t*			page,
+	const page_size_t&	page_size,
+	mtr_t*			mtr)
 {
 	ulint		i;
 	fseg_inode_t*	inode;
 
-	for (i = 0; i < FSP_SEG_INODES_PER_PAGE(zip_size); i++) {
+	for (i = 0; i < FSP_SEG_INODES_PER_PAGE(page_size); i++) {
 
 		inode = fsp_seg_inode_page_get_nth_inode(
-			page, i, zip_size, mtr);
+			page, i, page_size, mtr);
 
 		if (mach_read_from_8(inode + FSEG_ID)) {
 			/* This is used */
@@ -1625,24 +2265,26 @@ fsp_seg_inode_page_find_used(
 	return(ULINT_UNDEFINED);
 }
 
-/**********************************************************************//**
-Looks for an unused segment inode on a segment inode page.
-@return	segment inode index, or ULINT_UNDEFINED if not found */
+/** Looks for an unused segment inode on a segment inode page.
+@param[in]	page		segment inode page
+@param[in]	i		search forward starting from this index
+@param[in]	page_size	page size
+@param[in,out]	mtr		mini-transaction
+@return segment inode index, or ULINT_UNDEFINED if not found */
 static
 ulint
 fsp_seg_inode_page_find_free(
-/*=========================*/
-	page_t*	page,	/*!< in: segment inode page */
-	ulint	i,	/*!< in: search forward starting from this index */
-	ulint	zip_size,/*!< in: compressed page size, or 0 */
-	mtr_t*	mtr)	/*!< in/out: mini-transaction */
+	page_t*			page,
+	ulint			i,
+	const page_size_t&	page_size,
+	mtr_t*			mtr)
 {
-	for (; i < FSP_SEG_INODES_PER_PAGE(zip_size); i++) {
+	for (; i < FSP_SEG_INODES_PER_PAGE(page_size); i++) {
 
 		fseg_inode_t*	inode;
 
 		inode = fsp_seg_inode_page_get_nth_inode(
-			page, i, zip_size, mtr);
+			page, i, page_size, mtr);
 
 		if (!mach_read_from_8(inode + FSEG_ID)) {
 			/* This is unused */
@@ -1658,7 +2300,7 @@ fsp_seg_inode_page_find_free(
 
 /**********************************************************************//**
 Allocates a new file segment inode page.
-@return	TRUE if could be allocated */
+@return TRUE if could be allocated */
 static
 ibool
 fsp_alloc_seg_inode_page(
@@ -1670,16 +2312,15 @@ fsp_alloc_seg_inode_page(
 	buf_block_t*	block;
 	page_t*		page;
 	ulint		space;
-	ulint		zip_size;
 
 	ut_ad(page_offset(space_header) == FSP_HEADER_OFFSET);
 
 	space = page_get_space_id(page_align(space_header));
 
-	zip_size = fsp_flags_get_zip_size(
-		mach_read_from_4(FSP_SPACE_FLAGS + space_header));
+	const page_size_t	page_size(mach_read_from_4(FSP_SPACE_FLAGS
+							   + space_header));
 
-	block = fsp_alloc_free_page(space, zip_size, 0, mtr, mtr);
+	block = fsp_alloc_free_page(space, page_size, 0, RW_SX_LATCH, mtr, mtr);
 
 	if (block == NULL) {
 
@@ -1687,19 +2328,17 @@ fsp_alloc_seg_inode_page(
 	}
 
 	buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
-	ut_ad(rw_lock_get_x_lock_count(&block->lock) == 1);
-
-	block->check_index_page_at_flush = FALSE;
+	ut_ad(rw_lock_get_sx_lock_count(&block->lock) == 1);
 
 	page = buf_block_get_frame(block);
 
 	mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_INODE,
 			 MLOG_2BYTES, mtr);
 
-	for (ulint i = 0; i < FSP_SEG_INODES_PER_PAGE(zip_size); i++) {
+	for (ulint i = 0; i < FSP_SEG_INODES_PER_PAGE(page_size); i++) {
 
 		inode = fsp_seg_inode_page_get_nth_inode(
-			page, i, zip_size, mtr);
+			page, i, page_size, mtr);
 
 		mlog_write_ull(inode + FSEG_ID, 0, mtr);
 	}
@@ -1713,7 +2352,7 @@ fsp_alloc_seg_inode_page(
 
 /**********************************************************************//**
 Allocates a new file segment inode.
-@return	segment inode, or NULL if not enough space */
+@return segment inode, or NULL if not enough space */
 static
 fseg_inode_t*
 fsp_alloc_seg_inode(
@@ -1721,45 +2360,39 @@ fsp_alloc_seg_inode(
 	fsp_header_t*	space_header,	/*!< in: space header */
 	mtr_t*		mtr)		/*!< in/out: mini-transaction */
 {
-	ulint		page_no;
 	buf_block_t*	block;
 	page_t*		page;
 	fseg_inode_t*	inode;
-	ibool		success;
-	ulint		zip_size;
 	ulint		n;
 
 	ut_ad(page_offset(space_header) == FSP_HEADER_OFFSET);
 
-	if (flst_get_len(space_header + FSP_SEG_INODES_FREE, mtr) == 0) {
-		/* Allocate a new segment inode page */
-
-		success = fsp_alloc_seg_inode_page(space_header, mtr);
-
-		if (!success) {
-
-			return(NULL);
-		}
+	/* Allocate a new segment inode page if needed. */
+	if (flst_get_len(space_header + FSP_SEG_INODES_FREE) == 0
+	    && !fsp_alloc_seg_inode_page(space_header, mtr)) {
+		return(NULL);
 	}
-
-	page_no = flst_get_first(space_header + FSP_SEG_INODES_FREE, mtr).page;
-
-	zip_size = fsp_flags_get_zip_size(
+	const page_size_t	page_size(
 		mach_read_from_4(FSP_SPACE_FLAGS + space_header));
-	block = buf_page_get(page_get_space_id(page_align(space_header)),
-			     zip_size, page_no, RW_X_LATCH, mtr);
+
+	const page_id_t		page_id(
+		page_get_space_id(page_align(space_header)),
+		flst_get_first(space_header + FSP_SEG_INODES_FREE, mtr).page);
+
+	block = buf_page_get(page_id, page_size, RW_SX_LATCH, mtr);
 	buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
+	fil_block_check_type(block, FIL_PAGE_INODE, mtr);
 
 	page = buf_block_get_frame(block);
 
-	n = fsp_seg_inode_page_find_free(page, 0, zip_size, mtr);
+	n = fsp_seg_inode_page_find_free(page, 0, page_size, mtr);
 
 	ut_a(n != ULINT_UNDEFINED);
 
-	inode = fsp_seg_inode_page_get_nth_inode(page, n, zip_size, mtr);
+	inode = fsp_seg_inode_page_get_nth_inode(page, n, page_size, mtr);
 
 	if (ULINT_UNDEFINED == fsp_seg_inode_page_find_free(page, n + 1,
-							    zip_size, mtr)) {
+							    page_size, mtr)) {
 		/* There are no other unused headers left on the page: move it
 		to another list */
 
@@ -1775,29 +2408,32 @@ fsp_alloc_seg_inode(
 	return(inode);
 }
 
-/**********************************************************************//**
-Frees a file segment inode. */
+/** Frees a file segment inode.
+@param[in]	space		space id
+@param[in]	page_size	page size
+@param[in,out]	inode		segment inode
+@param[in,out]	mtr		mini-transaction */
 static
 void
 fsp_free_seg_inode(
-/*===============*/
-	ulint		space,	/*!< in: space id */
-	ulint		zip_size,/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	fseg_inode_t*	inode,	/*!< in: segment inode */
-	mtr_t*		mtr)	/*!< in/out: mini-transaction */
+	ulint			space,
+	const page_size_t&	page_size,
+	fseg_inode_t*		inode,
+	mtr_t*			mtr)
 {
 	page_t*		page;
 	fsp_header_t*	space_header;
 
+	ut_d(fsp_space_modify_check(space, mtr));
+
 	page = page_align(inode);
 
-	space_header = fsp_get_space_header(space, zip_size, mtr);
+	space_header = fsp_get_space_header(space, page_size, mtr);
 
 	ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
 
 	if (ULINT_UNDEFINED
-	    == fsp_seg_inode_page_find_free(page, 0, zip_size, mtr)) {
+	    == fsp_seg_inode_page_find_free(page, 0, page_size, mtr)) {
 
 		/* Move the page to another list */
 
@@ -1812,29 +2448,33 @@ fsp_free_seg_inode(
 	mlog_write_ulint(inode + FSEG_MAGIC_N, 0xfa051ce3, MLOG_4BYTES, mtr);
 
 	if (ULINT_UNDEFINED
-	    == fsp_seg_inode_page_find_used(page, zip_size, mtr)) {
+	    == fsp_seg_inode_page_find_used(page, page_size, mtr)) {
 
 		/* There are no other used headers left on the page: free it */
 
 		flst_remove(space_header + FSP_SEG_INODES_FREE,
 			    page + FSEG_INODE_PAGE_NODE, mtr);
 
-		fsp_free_page(space, zip_size, page_get_page_no(page), mtr);
+		fsp_free_page(page_id_t(space, page_get_page_no(page)),
+			      page_size, mtr);
 	}
 }
 
-/**********************************************************************//**
-Returns the file segment inode, page x-latched.
-@return	segment inode, page x-latched; NULL if the inode is free */
+/** Returns the file segment inode, page x-latched.
+@param[in]	header		segment header
+@param[in]	space		space id
+@param[in]	page_size	page size
+@param[in,out]	mtr		mini-transaction
+@param[out]	block		inode block, or NULL to ignore
+@return segment inode, page x-latched; NULL if the inode is free */
 static
 fseg_inode_t*
 fseg_inode_try_get(
-/*===============*/
-	fseg_header_t*	header,	/*!< in: segment header */
-	ulint		space,	/*!< in: space id */
-	ulint		zip_size,/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	mtr_t*		mtr)	/*!< in/out: mini-transaction */
+	fseg_header_t*		header,
+	ulint			space,
+	const page_size_t&	page_size,
+	mtr_t*			mtr,
+	buf_block_t**		block)
 {
 	fil_addr_t	inode_addr;
 	fseg_inode_t*	inode;
@@ -1843,7 +2483,8 @@ fseg_inode_try_get(
 	inode_addr.boffset = mach_read_from_2(header + FSEG_HDR_OFFSET);
 	ut_ad(space == mach_read_from_4(header + FSEG_HDR_SPACE));
 
-	inode = fut_get_ptr(space, zip_size, inode_addr, RW_X_LATCH, mtr);
+	inode = fut_get_ptr(space, page_size, inode_addr, RW_SX_LATCH, mtr,
+			    block);
 
 	if (UNIV_UNLIKELY(!mach_read_from_8(inode + FSEG_ID))) {
 
@@ -1856,28 +2497,31 @@ fseg_inode_try_get(
 	return(inode);
 }
 
-/**********************************************************************//**
-Returns the file segment inode, page x-latched.
-@return	segment inode, page x-latched */
+/** Returns the file segment inode, page x-latched.
+@param[in]	header		segment header
+@param[in]	space		space id
+@param[in]	page_size	page size
+@param[in,out]	mtr		mini-transaction
+@param[out]	block		inode block
+@return segment inode, page x-latched */
 static
 fseg_inode_t*
 fseg_inode_get(
-/*===========*/
-	fseg_header_t*	header,	/*!< in: segment header */
-	ulint		space,	/*!< in: space id */
-	ulint		zip_size,/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	mtr_t*		mtr)	/*!< in/out: mini-transaction */
+	fseg_header_t*		header,
+	ulint			space,
+	const page_size_t&	page_size,
+	mtr_t*			mtr,
+	buf_block_t**		block = NULL)
 {
 	fseg_inode_t*	inode
-		= fseg_inode_try_get(header, space, zip_size, mtr);
+		= fseg_inode_try_get(header, space, page_size, mtr, block);
 	ut_a(inode);
 	return(inode);
 }
 
 /**********************************************************************//**
 Gets the page number from the nth fragment page slot.
-@return	page number, FIL_NULL if not in use */
+@return page number, FIL_NULL if not in use */
 UNIV_INLINE
 ulint
 fseg_get_nth_frag_page_no(
@@ -1889,7 +2533,7 @@ fseg_get_nth_frag_page_no(
 {
 	ut_ad(inode && mtr);
 	ut_ad(n < FSEG_FRAG_ARR_N_SLOTS);
-	ut_ad(mtr_memo_contains_page(mtr, inode, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page(mtr, inode, MTR_MEMO_PAGE_SX_FIX));
 	ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
 	return(mach_read_from_4(inode + FSEG_FRAG_ARR
 				+ n * FSEG_FRAG_SLOT_SIZE));
@@ -1908,7 +2552,7 @@ fseg_set_nth_frag_page_no(
 {
 	ut_ad(inode && mtr);
 	ut_ad(n < FSEG_FRAG_ARR_N_SLOTS);
-	ut_ad(mtr_memo_contains_page(mtr, inode, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page(mtr, inode, MTR_MEMO_PAGE_SX_FIX));
 	ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
 
 	mlog_write_ulint(inode + FSEG_FRAG_ARR + n * FSEG_FRAG_SLOT_SIZE,
@@ -1917,7 +2561,7 @@ fseg_set_nth_frag_page_no(
 
 /**********************************************************************//**
 Finds a fragment page slot which is free.
-@return	slot index; ULINT_UNDEFINED if none found */
+@return slot index; ULINT_UNDEFINED if none found */
 static
 ulint
 fseg_find_free_frag_page_slot(
@@ -1944,7 +2588,7 @@ fseg_find_free_frag_page_slot(
 
 /**********************************************************************//**
 Finds a fragment page slot which is used and last in the array.
-@return	slot index; ULINT_UNDEFINED if none found */
+@return slot index; ULINT_UNDEFINED if none found */
 static
 ulint
 fseg_find_last_used_frag_page_slot(
@@ -1972,7 +2616,7 @@ fseg_find_last_used_frag_page_slot(
 
 /**********************************************************************//**
 Calculates reserved fragment page slots.
-@return	number of fragment pages */
+@return number of fragment pages */
 static
 ulint
 fseg_get_n_frag_pages(
@@ -1998,11 +2642,10 @@ fseg_get_n_frag_pages(
 Creates a new segment.
 @return the block where the segment header is placed, x-latched, NULL
 if could not create segment because of lack of space */
-UNIV_INTERN
 buf_block_t*
 fseg_create_general(
 /*================*/
-	ulint	space,	/*!< in: space id */
+	ulint	space_id,/*!< in: space id */
 	ulint	page,	/*!< in: page where the segment header is placed: if
 			this is != 0, the page must belong to another segment,
 			if this is 0, a new page will be allocated and it
@@ -2017,50 +2660,54 @@ fseg_create_general(
 			operation */
 	mtr_t*	mtr)	/*!< in/out: mini-transaction */
 {
-	ulint		flags;
-	ulint		zip_size;
 	fsp_header_t*	space_header;
 	fseg_inode_t*	inode;
 	ib_id_t		seg_id;
 	buf_block_t*	block	= 0; /* remove warning */
 	fseg_header_t*	header	= 0; /* remove warning */
-	rw_lock_t*	latch;
-	ibool		success;
 	ulint		n_reserved;
 	ulint		i;
 
+	DBUG_ENTER("fseg_create_general");
+
 	ut_ad(mtr);
 	ut_ad(byte_offset + FSEG_HEADER_SIZE
 	      <= UNIV_PAGE_SIZE - FIL_PAGE_DATA_END);
+	ut_d(fsp_space_modify_check(space_id, mtr));
 
-	latch = fil_space_get_latch(space, &flags);
-	zip_size = fsp_flags_get_zip_size(flags);
+	fil_space_t*		space = mtr_x_lock_space(space_id, mtr);
+	const page_size_t	page_size(space->flags);
 
 	if (page != 0) {
-		block = buf_page_get(space, zip_size, page, RW_X_LATCH, mtr);
+		block = buf_page_get(page_id_t(space_id, page), page_size,
+				     RW_SX_LATCH, mtr);
+
 		header = byte_offset + buf_block_get_frame(block);
+
+		const ulint	type = space_id == TRX_SYS_SPACE
+			&& page == TRX_SYS_PAGE_NO
+			? FIL_PAGE_TYPE_TRX_SYS
+			: FIL_PAGE_TYPE_SYS;
+
+		fil_block_check_type(block, type, mtr);
 	}
 
-	mtr_x_lock(latch, mtr);
-
-	if (rw_lock_get_x_lock_count(latch) == 1) {
+	if (rw_lock_get_x_lock_count(&space->latch) == 1) {
 		/* This thread did not own the latch before this call: free
 		excess pages from the insert buffer free list */
 
-		if (space == IBUF_SPACE_ID) {
+		if (space_id == IBUF_SPACE_ID) {
 			ibuf_free_excess_pages();
 		}
 	}
 
-	if (!has_done_reservation) {
-		success = fsp_reserve_free_extents(&n_reserved, space, 2,
-						   FSP_NORMAL, mtr);
-		if (!success) {
-			return(NULL);
-		}
+	if (!has_done_reservation
+	    && !fsp_reserve_free_extents(&n_reserved, space_id, 2,
+					 FSP_NORMAL, mtr)) {
+		DBUG_RETURN(NULL);
 	}
 
-	space_header = fsp_get_space_header(space, zip_size, mtr);
+	space_header = fsp_get_space_header(space_id, page_size, mtr);
 
 	inode = fsp_alloc_seg_inode(space_header, mtr);
 
@@ -2090,17 +2737,26 @@ fseg_create_general(
 	}
 
 	if (page == 0) {
-		block = fseg_alloc_free_page_low(space, zip_size,
-						 inode, 0, FSP_UP, mtr, mtr);
+		block = fseg_alloc_free_page_low(space, page_size,
+						 inode, 0, FSP_UP, RW_SX_LATCH,
+						 mtr, mtr
+#ifdef UNIV_DEBUG
+						 , has_done_reservation
+#endif /* UNIV_DEBUG */
+						 );
+
+		/* The allocation cannot fail if we have already reserved a
+		space for the page. */
+		ut_ad(!has_done_reservation || block != NULL);
 
 		if (block == NULL) {
 
-			fsp_free_seg_inode(space, zip_size, inode, mtr);
+			fsp_free_seg_inode(space_id, page_size, inode, mtr);
 
 			goto funct_exit;
 		}
 
-		ut_ad(rw_lock_get_x_lock_count(&block->lock) == 1);
+		ut_ad(rw_lock_get_sx_lock_count(&block->lock) == 1);
 
 		header = byte_offset + buf_block_get_frame(block);
 		mlog_write_ulint(buf_block_get_frame(block) + FIL_PAGE_TYPE,
@@ -2114,22 +2770,21 @@ fseg_create_general(
 			 page_get_page_no(page_align(inode)),
 			 MLOG_4BYTES, mtr);
 
-	mlog_write_ulint(header + FSEG_HDR_SPACE, space, MLOG_4BYTES, mtr);
+	mlog_write_ulint(header + FSEG_HDR_SPACE, space_id, MLOG_4BYTES, mtr);
 
 funct_exit:
 	if (!has_done_reservation) {
 
-		fil_space_release_free_extents(space, n_reserved);
+		fil_space_release_free_extents(space_id, n_reserved);
 	}
 
-	return(block);
+	DBUG_RETURN(block);
 }
 
 /**********************************************************************//**
 Creates a new segment.
 @return the block where the segment header is placed, x-latched, NULL
 if could not create segment because of lack of space */
-UNIV_INTERN
 buf_block_t*
 fseg_create(
 /*========*/
@@ -2148,7 +2803,7 @@ fseg_create(
 /**********************************************************************//**
 Calculates the number of pages reserved by a segment, and how many pages are
 currently used.
-@return	number of reserved pages */
+@return number of reserved pages */
 static
 ulint
 fseg_n_reserved_pages_low(
@@ -2161,16 +2816,16 @@ fseg_n_reserved_pages_low(
 	ulint	ret;
 
 	ut_ad(inode && used && mtr);
-	ut_ad(mtr_memo_contains_page(mtr, inode, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page(mtr, inode, MTR_MEMO_PAGE_SX_FIX));
 
-	*used = mtr_read_ulint(inode + FSEG_NOT_FULL_N_USED, MLOG_4BYTES, mtr)
-		+ FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_FULL, mtr)
+	*used = mach_read_from_4(inode + FSEG_NOT_FULL_N_USED)
+		+ FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_FULL)
 		+ fseg_get_n_frag_pages(inode, mtr);
 
 	ret = fseg_get_n_frag_pages(inode, mtr)
-		+ FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_FREE, mtr)
-		+ FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_NOT_FULL, mtr)
-		+ FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_FULL, mtr);
+		+ FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_FREE)
+		+ FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_NOT_FULL)
+		+ FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_FULL);
 
 	return(ret);
 }
@@ -2178,8 +2833,7 @@ fseg_n_reserved_pages_low(
 /**********************************************************************//**
 Calculates the number of pages reserved by a segment, and how many pages are
 currently used.
-@return	number of reserved pages */
-UNIV_INTERN
+@return number of reserved pages */
 ulint
 fseg_n_reserved_pages(
 /*==================*/
@@ -2189,40 +2843,39 @@ fseg_n_reserved_pages(
 {
 	ulint		ret;
 	fseg_inode_t*	inode;
-	ulint		space;
-	ulint		flags;
-	ulint		zip_size;
-	rw_lock_t*	latch;
+	ulint		space_id;
+	fil_space_t*	space;
 
-	space = page_get_space_id(page_align(header));
-	latch = fil_space_get_latch(space, &flags);
-	zip_size = fsp_flags_get_zip_size(flags);
+	space_id = page_get_space_id(page_align(header));
+	space = mtr_x_lock_space(space_id, mtr);
 
-	mtr_x_lock(latch, mtr);
+	const page_size_t	page_size(space->flags);
 
-	inode = fseg_inode_get(header, space, zip_size, mtr);
+	inode = fseg_inode_get(header, space_id, page_size, mtr);
 
 	ret = fseg_n_reserved_pages_low(inode, used, mtr);
 
 	return(ret);
 }
 
-/*********************************************************************//**
-Tries to fill the free list of a segment with consecutive free extents.
+/** Tries to fill the free list of a segment with consecutive free extents.
 This happens if the segment is big enough to allow extents in the free list,
 the free list is empty, and the extents can be allocated consecutively from
-the hint onward. */
+the hint onward.
+@param[in]	inode		segment inode
+@param[in]	space		space id
+@param[in]	page_size	page size
+@param[in]	hint		hint which extent would be good as the first
+extent
+@param[in,out]	mtr		mini-transaction */
 static
 void
 fseg_fill_free_list(
-/*================*/
-	fseg_inode_t*	inode,	/*!< in: segment inode */
-	ulint		space,	/*!< in: space id */
-	ulint		zip_size,/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	ulint		hint,	/*!< in: hint which extent would be good as
-				the first extent */
-	mtr_t*		mtr)	/*!< in/out: mini-transaction */
+	fseg_inode_t*		inode,
+	ulint			space,
+	const page_size_t&	page_size,
+	ulint			hint,
+	mtr_t*			mtr)
 {
 	xdes_t*	descr;
 	ulint	i;
@@ -2232,6 +2885,7 @@ fseg_fill_free_list(
 
 	ut_ad(inode && mtr);
 	ut_ad(!((page_offset(inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
+	ut_d(fsp_space_modify_check(space, mtr));
 
 	reserved = fseg_n_reserved_pages_low(inode, &used, mtr);
 
@@ -2242,14 +2896,14 @@ fseg_fill_free_list(
 		return;
 	}
 
-	if (flst_get_len(inode + FSEG_FREE, mtr) > 0) {
+	if (flst_get_len(inode + FSEG_FREE) > 0) {
 		/* Free list is not empty */
 
 		return;
 	}
 
 	for (i = 0; i < FSEG_FREE_LIST_MAX_LEN; i++) {
-		descr = xdes_get_descriptor(space, zip_size, hint, mtr);
+		descr = xdes_get_descriptor(space, hint, page_size, mtr);
 
 		if ((descr == NULL)
 		    || (XDES_FREE != xdes_get_state(descr, mtr))) {
@@ -2259,7 +2913,7 @@ fseg_fill_free_list(
 			return;
 		}
 
-		descr = fsp_alloc_free_extent(space, zip_size, hint, mtr);
+		descr = fsp_alloc_free_extent(space, page_size, hint, mtr);
 
 		xdes_set_state(descr, XDES_FSEG, mtr);
 
@@ -2273,23 +2927,25 @@ fseg_fill_free_list(
 	}
 }
 
-/*********************************************************************//**
-Allocates a free extent for the segment: looks first in the free list of the
-segment, then tries to allocate from the space free list. NOTE that the extent
-returned still resides in the segment free list, it is not yet taken off it!
-@retval NULL if no page could be allocated
-@retval block, rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded
+/** Allocates a free extent for the segment: looks first in the free list of
+the segment, then tries to allocate from the space free list.
+NOTE that the extent returned still resides in the segment free list, it is
+not yet taken off it!
+@param[in]	inode		segment inode
+@param[in]	space		space id
+@param[in]	page_size	page size
+@param[in,out]	mtr		mini-transaction
+@retval NULL	if no page could be allocated
+@retval block	rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded
 (init_mtr == mtr, or the page was not previously freed in mtr)
-@retval block (not allocated or initialized) otherwise */
+@retval block	(not allocated or initialized) otherwise */
 static
 xdes_t*
 fseg_alloc_free_extent(
-/*===================*/
-	fseg_inode_t*	inode,	/*!< in: segment inode */
-	ulint		space,	/*!< in: space id */
-	ulint		zip_size,/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	mtr_t*		mtr)	/*!< in/out: mini-transaction */
+	fseg_inode_t*		inode,
+	ulint			space,
+	const page_size_t&	page_size,
+	mtr_t*			mtr)
 {
 	xdes_t*		descr;
 	ib_id_t		seg_id;
@@ -2297,16 +2953,17 @@ fseg_alloc_free_extent(
 
 	ut_ad(!((page_offset(inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
 	ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
+	ut_d(fsp_space_modify_check(space, mtr));
 
-	if (flst_get_len(inode + FSEG_FREE, mtr) > 0) {
+	if (flst_get_len(inode + FSEG_FREE) > 0) {
 		/* Segment free list is not empty, allocate from it */
 
 		first = flst_get_first(inode + FSEG_FREE, mtr);
 
-		descr = xdes_lst_get_descriptor(space, zip_size, first, mtr);
+		descr = xdes_lst_get_descriptor(space, page_size, first, mtr);
 	} else {
 		/* Segment free list was empty, allocate from space */
-		descr = fsp_alloc_free_extent(space, zip_size, 0, mtr);
+		descr = fsp_alloc_free_extent(space, page_size, 0, mtr);
 
 		if (descr == NULL) {
 
@@ -2320,7 +2977,7 @@ fseg_alloc_free_extent(
 		flst_add_last(inode + FSEG_FREE, descr + XDES_FLST_NODE, mtr);
 
 		/* Try to fill the segment free list */
-		fseg_fill_free_list(inode, space, zip_size,
+		fseg_fill_free_list(inode, space, page_size,
 				    xdes_get_offset(descr) + FSP_EXTENT_SIZE,
 				    mtr);
 	}
@@ -2328,37 +2985,44 @@ fseg_alloc_free_extent(
 	return(descr);
 }
 
-/**********************************************************************//**
-Allocates a single free page from a segment. This function implements
-the intelligent allocation strategy which tries to minimize file space
-fragmentation.
-@retval NULL if no page could be allocated
-@retval block, rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded
+/** Allocates a single free page from a segment.
+This function implements the intelligent allocation strategy which tries to
+minimize file space fragmentation.
+@param[in,out]	space			tablespace
+@param[in]	page_size		page size
+@param[in,out]	seg_inode		segment inode
+@param[in]	hint			hint of which page would be desirable
+@param[in]	direction		if the new page is needed because of
+an index page split, and records are inserted there in order, into which
+direction they go alphabetically: FSP_DOWN, FSP_UP, FSP_NO_DIR
+@param[in]	rw_latch		RW_SX_LATCH, RW_X_LATCH
+@param[in,out]	mtr			mini-transaction
+@param[in,out]	init_mtr		mtr or another mini-transaction in
+which the page should be initialized. If init_mtr != mtr, but the page is
+already latched in mtr, do not initialize the page
+@param[in]	has_done_reservation	TRUE if the space has already been
+reserved, in this case we will never return NULL
+@retval NULL	if no page could be allocated
+@retval block	rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded
 (init_mtr == mtr, or the page was not previously freed in mtr)
-@retval block (not allocated or initialized) otherwise */
+@retval block	(not allocated or initialized) otherwise */
 static
 buf_block_t*
 fseg_alloc_free_page_low(
-/*=====================*/
-	ulint		space,	/*!< in: space */
-	ulint		zip_size,/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	fseg_inode_t*	seg_inode, /*!< in/out: segment inode */
-	ulint		hint,	/*!< in: hint of which page would be
-				desirable */
-	byte		direction, /*!< in: if the new page is needed because
-				of an index page split, and records are
-				inserted there in order, into which
-				direction they go alphabetically: FSP_DOWN,
-				FSP_UP, FSP_NO_DIR */
-	mtr_t*		mtr,	/*!< in/out: mini-transaction */
-	mtr_t*		init_mtr)/*!< in/out: mtr or another mini-transaction
-				in which the page should be initialized.
-				If init_mtr!=mtr, but the page is already
-				latched in mtr, do not initialize the page. */
+	fil_space_t*		space,
+	const page_size_t&	page_size,
+	fseg_inode_t*		seg_inode,
+	ulint			hint,
+	byte			direction,
+	rw_lock_type_t		rw_latch,
+	mtr_t*			mtr,
+	mtr_t*			init_mtr
+#ifdef UNIV_DEBUG
+	, ibool			has_done_reservation
+#endif /* UNIV_DEBUG */
+)
 {
 	fsp_header_t*	space_header;
-	ulint		space_size;
 	ib_id_t		seg_id;
 	ulint		used;
 	ulint		reserved;
@@ -2366,30 +3030,34 @@ fseg_alloc_free_page_low(
 	ulint		ret_page;	/*!< the allocated page offset, FIL_NULL
 					if could not be allocated */
 	xdes_t*		ret_descr;	/*!< the extent of the allocated page */
-	ibool		success;
 	ulint		n;
+	const ulint	space_id	= space->id;
 
 	ut_ad(mtr);
 	ut_ad((direction >= FSP_UP) && (direction <= FSP_NO_DIR));
 	ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N)
 	      == FSEG_MAGIC_N_VALUE);
 	ut_ad(!((page_offset(seg_inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
+	ut_ad(space->purpose == FIL_TYPE_TEMPORARY
+	      || space->purpose == FIL_TYPE_TABLESPACE);
 	seg_id = mach_read_from_8(seg_inode + FSEG_ID);
 
 	ut_ad(seg_id);
+	ut_d(fsp_space_modify_check(space_id, mtr));
+	ut_ad(fil_page_get_type(page_align(seg_inode)) == FIL_PAGE_INODE);
 
 	reserved = fseg_n_reserved_pages_low(seg_inode, &used, mtr);
 
-	space_header = fsp_get_space_header(space, zip_size, mtr);
+	space_header = fsp_get_space_header(space_id, page_size, mtr);
 
-	descr = xdes_get_descriptor_with_space_hdr(space_header, space,
+	descr = xdes_get_descriptor_with_space_hdr(space_header, space_id,
 						   hint, mtr);
 	if (descr == NULL) {
 		/* Hint outside space or too high above free limit: reset
 		hint */
 		/* The file space header page is always allocated. */
 		hint = 0;
-		descr = xdes_get_descriptor(space, zip_size, hint, mtr);
+		descr = xdes_get_descriptor(space_id, hint, page_size, mtr);
 	}
 
 	/* In the big if-else below we look for ret_page and ret_descr */
@@ -2416,7 +3084,8 @@ take_hinted_page:
 		=========================================================
 		the hinted page
 		===============*/
-		ret_descr = fsp_alloc_free_extent(space, zip_size, hint, mtr);
+		ret_descr = fsp_alloc_free_extent(
+			space_id, page_size, hint, mtr);
 
 		ut_a(ret_descr == descr);
 
@@ -2426,7 +3095,7 @@ take_hinted_page:
 			      ret_descr + XDES_FLST_NODE, mtr);
 
 		/* Try to fill the segment free list */
-		fseg_fill_free_list(seg_inode, space, zip_size,
+		fseg_fill_free_list(seg_inode, space_id, page_size,
 				    hint + FSP_EXTENT_SIZE, mtr);
 		goto take_hinted_page;
 		/*-----------------------------------------------------------*/
@@ -2434,8 +3103,8 @@ take_hinted_page:
 		   && ((reserved - used) < reserved / FSEG_FILLFACTOR)
 		   && (used >= FSEG_FRAG_LIMIT)
 		   && (!!(ret_descr
-			  = fseg_alloc_free_extent(seg_inode,
-						   space, zip_size, mtr)))) {
+			  = fseg_alloc_free_extent(
+				  seg_inode, space_id, page_size, mtr)))) {
 
 		/* 3. We take any free extent (which was already assigned above
 		===============================================================
@@ -2448,6 +3117,7 @@ take_hinted_page:
 		if (direction == FSP_DOWN) {
 			ret_page += FSP_EXTENT_SIZE - 1;
 		}
+		ut_ad(!has_done_reservation || ret_page != FIL_NULL);
 		/*-----------------------------------------------------------*/
 	} else if ((xdes_get_state(descr, mtr) == XDES_FSEG)
 		   && mach_read_from_8(descr + XDES_ID) == seg_id
@@ -2463,33 +3133,37 @@ take_hinted_page:
 		ret_page = xdes_get_offset(ret_descr)
 			+ xdes_find_bit(ret_descr, XDES_FREE_BIT, TRUE,
 					hint % FSP_EXTENT_SIZE, mtr);
+		ut_ad(!has_done_reservation || ret_page != FIL_NULL);
 		/*-----------------------------------------------------------*/
 	} else if (reserved - used > 0) {
 		/* 5. We take any unused page from the segment
 		==============================================*/
 		fil_addr_t	first;
 
-		if (flst_get_len(seg_inode + FSEG_NOT_FULL, mtr) > 0) {
+		if (flst_get_len(seg_inode + FSEG_NOT_FULL) > 0) {
 			first = flst_get_first(seg_inode + FSEG_NOT_FULL,
 					       mtr);
-		} else if (flst_get_len(seg_inode + FSEG_FREE, mtr) > 0) {
+		} else if (flst_get_len(seg_inode + FSEG_FREE) > 0) {
 			first = flst_get_first(seg_inode + FSEG_FREE, mtr);
 		} else {
-			ut_error;
+			ut_ad(!has_done_reservation);
 			return(NULL);
 		}
 
-		ret_descr = xdes_lst_get_descriptor(space, zip_size,
+		ret_descr = xdes_lst_get_descriptor(space_id, page_size,
 						    first, mtr);
 		ret_page = xdes_get_offset(ret_descr)
 			+ xdes_find_bit(ret_descr, XDES_FREE_BIT, TRUE,
 					0, mtr);
+		ut_ad(!has_done_reservation || ret_page != FIL_NULL);
 		/*-----------------------------------------------------------*/
 	} else if (used < FSEG_FRAG_LIMIT) {
 		/* 6. We allocate an individual page from the space
 		===================================================*/
 		buf_block_t* block = fsp_alloc_free_page(
-			space, zip_size, hint, mtr, init_mtr);
+			space_id, page_size, hint, rw_latch, mtr, init_mtr);
+
+		ut_ad(!has_done_reservation || block != NULL);
 
 		if (block != NULL) {
 			/* Put the page in the fragment page array of the
@@ -2498,7 +3172,7 @@ take_hinted_page:
 			ut_a(n != ULINT_UNDEFINED);
 
 			fseg_set_nth_frag_page_no(
-				seg_inode, n, buf_block_get_page_no(block),
+				seg_inode, n, block->page.id.page_no(),
 				mtr);
 		}
 
@@ -2510,45 +3184,43 @@ take_hinted_page:
 		/* 7. We allocate a new extent and take its first page
 		======================================================*/
 		ret_descr = fseg_alloc_free_extent(seg_inode,
-						   space, zip_size, mtr);
+						   space_id, page_size, mtr);
 
 		if (ret_descr == NULL) {
 			ret_page = FIL_NULL;
+			ut_ad(!has_done_reservation);
 		} else {
 			ret_page = xdes_get_offset(ret_descr);
+			ut_ad(!has_done_reservation || ret_page != FIL_NULL);
 		}
 	}
 
 	if (ret_page == FIL_NULL) {
 		/* Page could not be allocated */
 
+		ut_ad(!has_done_reservation);
 		return(NULL);
 	}
 
-	if (space != 0) {
-		space_size = fil_space_get_size(space);
+	if (space->size <= ret_page && !is_system_tablespace(space_id)) {
+		/* It must be that we are extending a single-table
+		tablespace whose size is still < 64 pages */
 
-		if (space_size <= ret_page) {
-			/* It must be that we are extending a single-table
-			tablespace whose size is still < 64 pages */
+		if (ret_page >= FSP_EXTENT_SIZE) {
+			ib::error() << "Error (2): trying to extend"
+			" a single-table tablespace " << space_id
+			<< " by single page(s) though the"
+			<< " space size " << space->size
+			<< ". Page no " << ret_page << ".";
+			ut_ad(!has_done_reservation);
+			return(NULL);
+		}
 
-			if (ret_page >= FSP_EXTENT_SIZE) {
-				fprintf(stderr,
-					"InnoDB: Error (2): trying to extend"
-					" a single-table tablespace %lu\n"
-					"InnoDB: by single page(s) though"
-					" the space size %lu. Page no %lu.\n",
-					(ulong) space, (ulong) space_size,
-					(ulong) ret_page);
-				return(NULL);
-			}
-
-			success = fsp_try_extend_data_file_with_pages(
-				space, ret_page, space_header, mtr);
-			if (!success) {
-				/* No disk space left */
-				return(NULL);
-			}
+		if (!fsp_try_extend_data_file_with_pages(
+			    space, ret_page, space_header, mtr)) {
+			/* No disk space left */
+			ut_ad(!has_done_reservation);
+			return(NULL);
 		}
 	}
 
@@ -2560,7 +3232,7 @@ got_hinted_page:
 		The extent is still in the appropriate list (FSEG_NOT_FULL
 		or FSEG_FREE), and the page is not yet marked as used. */
 
-		ut_ad(xdes_get_descriptor(space, zip_size, ret_page, mtr)
+		ut_ad(xdes_get_descriptor(space_id, ret_page, page_size, mtr)
 		      == ret_descr);
 
 		ut_ad(xdes_mtr_get_bit(
@@ -2570,11 +3242,10 @@ got_hinted_page:
 		fseg_mark_page_used(seg_inode, ret_page, ret_descr, mtr);
 	}
 
-	return(fsp_page_create(
-		       space, fsp_flags_get_zip_size(
-			       mach_read_from_4(FSP_SPACE_FLAGS
-						+ space_header)),
-		       ret_page, mtr, init_mtr));
+	ut_ad(space->flags
+	      == mach_read_from_4(FSP_SPACE_FLAGS + space_header));
+	return(fsp_page_create(page_id_t(space_id, ret_page), page_size,
+			       rw_latch, mtr, init_mtr));
 }
 
 /**********************************************************************//**
@@ -2585,7 +3256,6 @@ fragmentation.
 @retval block, rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded
 (init_mtr == mtr, or the page was not previously freed in mtr)
 @retval block (not allocated or initialized) otherwise */
-UNIV_INTERN
 buf_block_t*
 fseg_alloc_free_page_general(
 /*=========================*/
@@ -2609,89 +3279,93 @@ fseg_alloc_free_page_general(
 				latched in mtr, do not initialize the page. */
 {
 	fseg_inode_t*	inode;
-	ulint		space;
-	ulint		flags;
-	ulint		zip_size;
-	rw_lock_t*	latch;
+	ulint		space_id;
+	fil_space_t*	space;
+	buf_block_t*	iblock;
 	buf_block_t*	block;
 	ulint		n_reserved;
 
-	space = page_get_space_id(page_align(seg_header));
+	space_id = page_get_space_id(page_align(seg_header));
+	space = mtr_x_lock_space(space_id, mtr);
+	const page_size_t	page_size(space->flags);
 
-	latch = fil_space_get_latch(space, &flags);
-
-	zip_size = fsp_flags_get_zip_size(flags);
-
-	mtr_x_lock(latch, mtr);
-
-	if (rw_lock_get_x_lock_count(latch) == 1) {
+	if (rw_lock_get_x_lock_count(&space->latch) == 1) {
 		/* This thread did not own the latch before this call: free
 		excess pages from the insert buffer free list */
 
-		if (space == IBUF_SPACE_ID) {
+		if (space_id == IBUF_SPACE_ID) {
 			ibuf_free_excess_pages();
 		}
 	}
 
-	inode = fseg_inode_get(seg_header, space, zip_size, mtr);
+	inode = fseg_inode_get(seg_header, space_id, page_size, mtr, &iblock);
+	fil_block_check_type(iblock, FIL_PAGE_INODE, mtr);
 
 	if (!has_done_reservation
-	    && !fsp_reserve_free_extents(&n_reserved, space, 2,
+	    && !fsp_reserve_free_extents(&n_reserved, space_id, 2,
 					 FSP_NORMAL, mtr)) {
 		return(NULL);
 	}
 
-	block = fseg_alloc_free_page_low(space, zip_size,
+	block = fseg_alloc_free_page_low(space, page_size,
 					 inode, hint, direction,
-					 mtr, init_mtr);
+					 RW_X_LATCH, mtr, init_mtr
+#ifdef UNIV_DEBUG
+					 , has_done_reservation
+#endif /* UNIV_DEBUG */
+					 );
+
+	/* The allocation cannot fail if we have already reserved a
+	space for the page. */
+	ut_ad(!has_done_reservation || block != NULL);
+
 	if (!has_done_reservation) {
-		fil_space_release_free_extents(space, n_reserved);
+		fil_space_release_free_extents(space_id, n_reserved);
 	}
 
 	return(block);
 }
 
-/**********************************************************************//**
-Checks that we have at least 2 frag pages free in the first extent of a
-single-table tablespace, and they are also physically initialized to the data
-file. That is we have already extended the data file so that those pages are
-inside the data file. If not, this function extends the tablespace with
-pages.
-@return	TRUE if there were >= 3 free pages, or we were able to extend */
+/** Check that we have at least n_pages frag pages free in the first extent
+of a single-table tablespace, and they are also physically initialized to
+the data file. That is we have already extended the data file so that those
+pages are inside the data file. If not, this function extends the tablespace
+with pages.
+@param[in,out]	space		tablespace
+@param[in,out]	space_header	tablespace header, x-latched
+@param[in]	size		size of the tablespace in pages,
+must be less than FSP_EXTENT_SIZE
+@param[in,out]	mtr		mini-transaction
+@param[in]	n_pages		number of pages to reserve
+@return true if there were at least n_pages free pages, or we were able
+to extend */
 static
-ibool
+bool
 fsp_reserve_free_pages(
-/*===================*/
-	ulint		space,		/*!< in: space id, must be != 0 */
-	fsp_header_t*	space_header,	/*!< in: header of that space,
-					x-latched */
-	ulint		size,		/*!< in: size of the tablespace in
-					pages, must be < FSP_EXTENT_SIZE/2 */
-	mtr_t*		mtr)		/*!< in/out: mini-transaction */
+	fil_space_t*	space,
+	fsp_header_t*	space_header,
+	ulint		size,
+	mtr_t*		mtr,
+	ulint		n_pages)
 {
 	xdes_t*	descr;
 	ulint	n_used;
 
-	ut_a(space != 0);
-	ut_a(size < FSP_EXTENT_SIZE / 2);
+	ut_a(!is_system_tablespace(space->id));
+	ut_a(size < FSP_EXTENT_SIZE);
 
-	descr = xdes_get_descriptor_with_space_hdr(space_header, space, 0,
-						   mtr);
+	descr = xdes_get_descriptor_with_space_hdr(
+		space_header, space->id, 0, mtr);
 	n_used = xdes_get_n_used(descr, mtr);
 
 	ut_a(n_used <= size);
 
-	if (size >= n_used + 2) {
-
-		return(TRUE);
-	}
-
-	return(fsp_try_extend_data_file_with_pages(space, n_used + 1,
-						   space_header, mtr));
+	return(size >= n_used + n_pages
+	       || fsp_try_extend_data_file_with_pages(
+		       space, n_used + n_pages - 1, space_header, mtr));
 }
 
-/**********************************************************************//**
-Reserves free pages from a tablespace. All mini-transactions which may
+/** Reserves free pages from a tablespace. All mini-transactions which may
 use several pages from the tablespace should call this function beforehand
 and reserve enough free extents so that they certainly will be able
 to do their operation, like a B-tree page split, fully. Reservations
@@ -2710,82 +3384,91 @@ The purpose is to avoid dead end where the database is full but the
 user cannot free any space because these freeing operations temporarily
 reserve some space.
 
-Single-table tablespaces whose size is < 32 pages are a special case. In this
-function we would liberally reserve several 64 page extents for every page
-split or merge in a B-tree. But we do not want to waste disk space if the table
-only occupies < 32 pages. That is why we apply different rules in that special
-case, just ensuring that there are 3 free pages available.
-@return	TRUE if we were able to make the reservation */
-UNIV_INTERN
-ibool
+Single-table tablespaces whose size is < FSP_EXTENT_SIZE pages are a special
+case. In this function we would liberally reserve several extents for
+every page split or merge in a B-tree. But we do not want to waste disk space
+if the table only occupies < FSP_EXTENT_SIZE pages. That is why we apply
+different rules in that special case, just ensuring that there are n_pages
+free pages available.
+
+@param[out]	n_reserved	number of extents actually reserved; if we
+				return true and the tablespace size is <
+				FSP_EXTENT_SIZE pages, then this can be 0,
+				otherwise it is n_ext
+@param[in]	space_id	tablespace identifier
+@param[in]	n_ext		number of extents to reserve
+@param[in]	alloc_type	page reservation type (FSP_BLOB, etc)
+@param[in,out]	mtr		the mini transaction
+@param[in]	n_pages		for small tablespaces (tablespace size is
+				less than FSP_EXTENT_SIZE), number of free
+				pages to reserve.
+@return true if we were able to make the reservation */
+bool
 fsp_reserve_free_extents(
-/*=====================*/
-	ulint*	n_reserved,/*!< out: number of extents actually reserved; if we
-			return TRUE and the tablespace size is < 64 pages,
-			then this can be 0, otherwise it is n_ext */
-	ulint	space,	/*!< in: space id */
-	ulint	n_ext,	/*!< in: number of extents to reserve */
-	ulint	alloc_type,/*!< in: FSP_NORMAL, FSP_UNDO, or FSP_CLEANING */
-	mtr_t*	mtr)	/*!< in/out: mini-transaction */
+	ulint*		n_reserved,
+	ulint		space_id,
+	ulint		n_ext,
+	fsp_reserve_t	alloc_type,
+	mtr_t*		mtr,
+	ulint		n_pages)
 {
 	fsp_header_t*	space_header;
-	rw_lock_t*	latch;
 	ulint		n_free_list_ext;
 	ulint		free_limit;
 	ulint		size;
-	ulint		flags;
-	ulint		zip_size;
 	ulint		n_free;
 	ulint		n_free_up;
 	ulint		reserve= 0;
-	ibool		success;
-	ulint		n_pages_added;
 	size_t		total_reserved = 0;
 	ulint		rounds = 0;
+	ulint		n_pages_added = 0;
 
 	ut_ad(mtr);
 	*n_reserved = n_ext;
 
-	latch = fil_space_get_latch(space, &flags);
-	zip_size = fsp_flags_get_zip_size(flags);
+	fil_space_t*		space = mtr_x_lock_space(space_id, mtr);
+	const page_size_t	page_size(space->flags);
 
-	mtr_x_lock(latch, mtr);
-
-	space_header = fsp_get_space_header(space, zip_size, mtr);
+	space_header = fsp_get_space_header(space_id, page_size, mtr);
 try_again:
-	size = mtr_read_ulint(space_header + FSP_SIZE, MLOG_4BYTES, mtr);
+	size = mach_read_from_4(space_header + FSP_SIZE);
+	ut_ad(size == space->size_in_header);
 
-	if (size < FSP_EXTENT_SIZE / 2) {
+	if (size < FSP_EXTENT_SIZE && n_pages < FSP_EXTENT_SIZE / 2) {
 		/* Use different rules for small single-table tablespaces */
 		*n_reserved = 0;
-		return(fsp_reserve_free_pages(space, space_header, size, mtr));
+		return(fsp_reserve_free_pages(space, space_header, size,
+					      mtr, n_pages));
 	}
 
-	n_free_list_ext = flst_get_len(space_header + FSP_FREE, mtr);
+	n_free_list_ext = flst_get_len(space_header + FSP_FREE);
+	ut_ad(space->free_len == n_free_list_ext);
 
 	free_limit = mtr_read_ulint(space_header + FSP_FREE_LIMIT,
 				    MLOG_4BYTES, mtr);
+	ut_ad(space->free_limit == free_limit);
 
 	/* Below we play safe when counting free extents above the free limit:
 	some of them will contain extent descriptor pages, and therefore
 	will not be free extents */
 
-	n_free_up = (size - free_limit) / FSP_EXTENT_SIZE;
+	if (size >= free_limit) {
+		n_free_up = (size - free_limit) / FSP_EXTENT_SIZE;
+	} else {
+		ut_ad(alloc_type == FSP_BLOB);
+		n_free_up = 0;
+	}
 
 	if (n_free_up > 0) {
 		n_free_up--;
-		if (!zip_size) {
-			n_free_up -= n_free_up
-				/ (UNIV_PAGE_SIZE / FSP_EXTENT_SIZE);
-		} else {
-			n_free_up -= n_free_up
-				/ (zip_size / FSP_EXTENT_SIZE);
-		}
+		n_free_up -= n_free_up / (page_size.physical()
+					  / FSP_EXTENT_SIZE);
 	}
 
 	n_free = n_free_list_ext + n_free_up;
 
-	if (alloc_type == FSP_NORMAL) {
+	switch (alloc_type) {
+	case FSP_NORMAL:
 		/* We reserve 1 extent + 0.5 % of the space size to undo logs
 		and 1 extent + 0.5 % to cleaning operations; NOTE: this source
 		code is duplicated in the function below! */
@@ -2796,7 +3479,8 @@ try_again:
 
 			goto try_to_extend;
 		}
-	} else if (alloc_type == FSP_UNDO) {
+		break;
+	case FSP_UNDO:
 		/* We reserve 0.5 % of the space size to cleaning operations */
 
 		reserve = 1 + ((size / FSP_EXTENT_SIZE) * 1) / 200;
@@ -2805,133 +3489,71 @@ try_again:
 
 			goto try_to_extend;
 		}
-	} else {
-		ut_a(alloc_type == FSP_CLEANING);
+		break;
+	case FSP_CLEANING:
+	case FSP_BLOB:
+		break;
+	default:
+		ut_error;
 	}
 
-	success = fil_space_reserve_free_extents(space, n_free, n_ext);
-	*n_reserved = n_ext;
-
-	if (success) {
-		return(TRUE);
+	if (fil_space_reserve_free_extents(space_id, n_free, n_ext)) {
+		return(true);
 	}
 try_to_extend:
-	success = fsp_try_extend_data_file(&n_pages_added, space,
-					   space_header, mtr);
+	n_pages_added = 0;
 
-	if (success && n_pages_added > 0) {
+	if (fsp_try_extend_data_file(space, space_header, mtr, &n_pages_added)) {
 
 		rounds++;
 		total_reserved += n_pages_added;
 
-		if (rounds > 50) {
-			ib_logf(IB_LOG_LEVEL_INFO,
-				"Space id %lu trying to reserve %lu extents actually reserved %lu "
-				" reserve %lu free %lu size %lu rounds %lu total_reserved %llu",
-				space, n_ext, n_pages_added, reserve, n_free, size, rounds, (ullint) total_reserved);
+		if (rounds > 10) {
+			ib::info() << "Space id: "
+				   << space << " trying to reserve: "
+				   << n_ext << " extents actually reserved: "
+				   << n_pages_added << " reserve: "
+				   << reserve << " free: " << n_free
+				   << " size: " << size
+				   << " rounds: " << rounds
+				   << " total_reserved: " << total_reserved << ".";
 		}
-
 		goto try_again;
 	}
 
-	return(FALSE);
+	return(false);
 }
 
-/**********************************************************************//**
-This function should be used to get information on how much we still
-will be able to insert new data to the database without running out the
-tablespace. Only free extents are taken into account and we also subtract
-the safety margin required by the above function fsp_reserve_free_extents.
-@return	available space in kB */
-UNIV_INTERN
-ullint
+/** Calculate how many KiB of new data we will be able to insert to the
+tablespace without running out of space.
+@param[in]	space_id	tablespace ID
+@return available space in KiB
+@retval UINTMAX_MAX if unknown */
+uintmax_t
 fsp_get_available_space_in_free_extents(
-/*====================================*/
-	ulint	space)	/*!< in: space id */
+	ulint	space_id)
 {
-	fsp_header_t*	space_header;
-	ulint		n_free_list_ext;
-	ulint		free_limit;
-	ulint		size;
-	ulint		flags;
-	ulint		zip_size;
-	ulint		n_free;
-	ulint		n_free_up;
-	ulint		reserve;
-	rw_lock_t*	latch;
-	mtr_t		mtr;
-
-	/* The convoluted mutex acquire is to overcome latching order
-	issues: The problem is that the fil_mutex is at a lower level
-	than the tablespace latch and the buffer pool mutex. We have to
-	first prevent any operations on the file system by acquiring the
-	dictionary mutex. Then acquire the tablespace latch to obey the
-	latching order and then release the dictionary mutex. That way we
-	ensure that the tablespace instance can't be freed while we are
-	examining its contents (see fil_space_free()).
-
-	However, there is one further complication, we release the fil_mutex
-	when we need to invalidate the the pages in the buffer pool and we
-	reacquire the fil_mutex when deleting and freeing the tablespace
-	instance in fil0fil.cc. Here we need to account for that situation
-	too. */
-
-	mutex_enter(&dict_sys->mutex);
-
-	/* At this stage there is no guarantee that the tablespace even
-	exists in the cache. */
-
-	if (fil_tablespace_deleted_or_being_deleted_in_mem(space, -1)) {
-
-		mutex_exit(&dict_sys->mutex);
-
-		return(ULLINT_UNDEFINED);
+	FilSpace	space(space_id);
+	if (space() == NULL) {
+		return(UINTMAX_MAX);
 	}
 
-	mtr_start(&mtr);
+	return(fsp_get_available_space_in_free_extents(space));
+}
 
-	latch = fil_space_get_latch(space, &flags);
-
-	/* This should ensure that the tablespace instance can't be freed
-	by another thread. However, the tablespace pages can still be freed
-	from the buffer pool. We need to check for that again. */
-
-	zip_size = fsp_flags_get_zip_size(flags);
-
-	mtr_x_lock(latch, &mtr);
-
-	mutex_exit(&dict_sys->mutex);
-
-	/* At this point it is possible for the tablespace to be deleted and
-	its pages removed from the buffer pool. We need to check for that
-	situation. However, the tablespace instance can't be deleted because
-	our latching above should ensure that. */
-
-	if (fil_tablespace_is_being_deleted(space)) {
-
-		mtr_commit(&mtr);
-
-		return(ULLINT_UNDEFINED);
-	}
-
-	/* From here on even if the user has dropped the tablespace, the
-	pages _must_ still exist in the buffer pool and the tablespace
-	instance _must_ be in the file system hash table. */
-
-	space_header = fsp_get_space_header(space, zip_size, &mtr);
-
-	size = mtr_read_ulint(space_header + FSP_SIZE, MLOG_4BYTES, &mtr);
-
-	n_free_list_ext = flst_get_len(space_header + FSP_FREE, &mtr);
-
-	free_limit = mtr_read_ulint(space_header + FSP_FREE_LIMIT,
-				    MLOG_4BYTES, &mtr);
-	mtr_commit(&mtr);
-
-	if (size < FSP_EXTENT_SIZE) {
-		ut_a(space != 0);	/* This must be a single-table
-					tablespace */
+/** Calculate how many KiB of new data we will be able to insert to the
+tablespace without running out of space. Start with a space object that has
+been acquired by the caller who holds it for the calculation,
+@param[in]	space		tablespace object from fil_space_acquire()
+@return available space in KiB */
+uintmax_t
+fsp_get_available_space_in_free_extents(
+	const fil_space_t*	space)
+{
+	ut_ad(space->n_pending_ops > 0);
 
+	ulint	size_in_header = space->size_in_header;
+	if (size_in_header < FSP_EXTENT_SIZE) {
 		return(0);		/* TODO: count free frag pages and
 					return a value based on that */
 	}
@@ -2939,41 +3561,30 @@ fsp_get_available_space_in_free_extents(
 	/* Below we play safe when counting free extents above the free limit:
 	some of them will contain extent descriptor pages, and therefore
 	will not be free extents */
+	ut_ad(size_in_header >= space->free_limit);
+	ulint	n_free_up =
+		(size_in_header - space->free_limit) / FSP_EXTENT_SIZE;
 
-	n_free_up = (size - free_limit) / FSP_EXTENT_SIZE;
-
+	page_size_t	page_size(space->flags);
 	if (n_free_up > 0) {
 		n_free_up--;
-		if (!zip_size) {
-			n_free_up -= n_free_up
-				/ (UNIV_PAGE_SIZE / FSP_EXTENT_SIZE);
-		} else {
-			n_free_up -= n_free_up
-				/ (zip_size / FSP_EXTENT_SIZE);
-		}
+		n_free_up -= n_free_up / (page_size.physical()
+					  / FSP_EXTENT_SIZE);
 	}
 
-	n_free = n_free_list_ext + n_free_up;
-
 	/* We reserve 1 extent + 0.5 % of the space size to undo logs
 	and 1 extent + 0.5 % to cleaning operations; NOTE: this source
 	code is duplicated in the function above! */
 
-	reserve = 2 + ((size / FSP_EXTENT_SIZE) * 2) / 200;
+	ulint	reserve = 2 + ((size_in_header / FSP_EXTENT_SIZE) * 2) / 200;
+	ulint	n_free = space->free_len + n_free_up;
 
 	if (reserve > n_free) {
 		return(0);
 	}
 
-	if (!zip_size) {
-		return((ullint) (n_free - reserve)
-		       * FSP_EXTENT_SIZE
-		       * (UNIV_PAGE_SIZE / 1024));
-	} else {
-		return((ullint) (n_free - reserve)
-		       * FSP_EXTENT_SIZE
-		       * (zip_size / 1024));
-	}
+	return(static_cast<uintmax_t>(n_free - reserve)
+	       * FSP_EXTENT_SIZE * (page_size.physical() / 1024));
 }
 
 /********************************************************************//**
@@ -2990,6 +3601,7 @@ fseg_mark_page_used(
 {
 	ulint	not_full_n_used;
 
+	ut_ad(fil_page_get_type(page_align(seg_inode)) == FIL_PAGE_INODE);
 	ut_ad(!((page_offset(seg_inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
 	ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N)
 	      == FSEG_MAGIC_N_VALUE);
@@ -3031,18 +3643,21 @@ fseg_mark_page_used(
 	}
 }
 
-/**********************************************************************//**
-Frees a single page of a segment. */
+/** Frees a single page of a segment.
+@param[in]	seg_inode	segment inode
+@param[in]	page_id		page id
+@param[in]	page_size	page size
+@param[in]	ahi		whether we may need to drop the adaptive
+hash index
+@param[in,out]	mtr		mini-transaction */
 static
 void
 fseg_free_page_low(
-/*===============*/
-	fseg_inode_t*	seg_inode, /*!< in: segment inode */
-	ulint		space,	/*!< in: space id */
-	ulint		zip_size,/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	ulint		page,	/*!< in: page offset */
-	mtr_t*		mtr)	/*!< in/out: mini-transaction */
+	fseg_inode_t*		seg_inode,
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	bool			ahi,
+	mtr_t*			mtr)
 {
 	xdes_t*	descr;
 	ulint	not_full_n_used;
@@ -3056,34 +3671,30 @@ fseg_free_page_low(
 	ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N)
 	      == FSEG_MAGIC_N_VALUE);
 	ut_ad(!((page_offset(seg_inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
+	ut_d(fsp_space_modify_check(page_id.space(), mtr));
 
 	/* Drop search system page hash index if the page is found in
 	the pool and is hashed */
 
-	btr_search_drop_page_hash_when_freed(space, zip_size, page);
+	if (ahi) {
+		btr_search_drop_page_hash_when_freed(page_id, page_size);
+	}
 
-	descr = xdes_get_descriptor(space, zip_size, page, mtr);
+	descr = xdes_get_descriptor(page_id.space(), page_id.page_no(),
+				    page_size, mtr);
 
 	if (xdes_mtr_get_bit(descr, XDES_FREE_BIT,
-			     page % FSP_EXTENT_SIZE, mtr)) {
+			     page_id.page_no() % FSP_EXTENT_SIZE, mtr)) {
 		fputs("InnoDB: Dump of the tablespace extent descriptor: ",
 		      stderr);
 		ut_print_buf(stderr, descr, 40);
-
-		fprintf(stderr, "\n"
-			"InnoDB: Serious error! InnoDB is trying to"
-			" free page %lu\n"
-			"InnoDB: though it is already marked as free"
-			" in the tablespace!\n"
-			"InnoDB: The tablespace free space info is corrupt.\n"
-			"InnoDB: You may need to dump your"
-			" InnoDB tables and recreate the whole\n"
-			"InnoDB: database!\n", (ulong) page);
+		ib::error() << "InnoDB is trying to free page " << page_id
+			<< " though it is already marked as free in the"
+			" tablespace! The tablespace free space info is"
+			" corrupt. You may need to dump your tables and"
+			" recreate the whole database!";
 crash:
-		fputs("InnoDB: Please refer to\n"
-		      "InnoDB: " REFMAN "forcing-innodb-recovery.html\n"
-		      "InnoDB: about forcing recovery.\n", stderr);
-		ut_error;
+		ib::fatal() << FORCE_RECOVERY_MSG;
 	}
 
 	state = xdes_get_state(descr, mtr);
@@ -3093,7 +3704,7 @@ crash:
 
 		for (i = 0;; i++) {
 			if (fseg_get_nth_frag_page_no(seg_inode, i, mtr)
-			    == page) {
+			    == page_id.page_no()) {
 
 				fseg_set_nth_frag_page_no(seg_inode, i,
 							  FIL_NULL, mtr);
@@ -3101,7 +3712,7 @@ crash:
 			}
 		}
 
-		fsp_free_page(space, zip_size, page, mtr);
+		fsp_free_page(page_id, page_size, mtr);
 
 		return;
 	}
@@ -3110,15 +3721,7 @@ crash:
 
 	descr_id = mach_read_from_8(descr + XDES_ID);
 	seg_id = mach_read_from_8(seg_inode + FSEG_ID);
-#if 0
-	fprintf(stderr,
-		"InnoDB: InnoDB is freeing space %lu page %lu,\n"
-		"InnoDB: which belongs to descr seg %llu\n"
-		"InnoDB: segment %llu.\n",
-		(ulong) space, (ulong) page,
-		(ullint) descr_id,
-		(ullint) seg_id);
-#endif /* 0 */
+
 	if (UNIV_UNLIKELY(descr_id != seg_id)) {
 		fputs("InnoDB: Dump of the tablespace extent descriptor: ",
 		      stderr);
@@ -3127,15 +3730,9 @@ crash:
 		ut_print_buf(stderr, seg_inode, 40);
 		putc('\n', stderr);
 
-		fprintf(stderr,
-			"InnoDB: Serious error: InnoDB is trying to"
-			" free space %lu page %lu,\n"
-			"InnoDB: which does not belong to"
-			" segment %llu but belongs\n"
-			"InnoDB: to segment %llu.\n",
-			(ulong) space, (ulong) page,
-			(ullint) descr_id,
-			(ullint) seg_id);
+		ib::error() << "InnoDB is trying to free page " << page_id
+			<< ", which does not belong to segment " << descr_id
+			<< " but belongs to segment " << seg_id << ".";
 		goto crash;
 	}
 
@@ -3156,82 +3753,74 @@ crash:
 				 not_full_n_used - 1, MLOG_4BYTES, mtr);
 	}
 
-	xdes_set_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, TRUE, mtr);
-	xdes_set_bit(descr, XDES_CLEAN_BIT, page % FSP_EXTENT_SIZE, TRUE, mtr);
+	const ulint	bit = page_id.page_no() % FSP_EXTENT_SIZE;
+
+	xdes_set_bit(descr, XDES_FREE_BIT, bit, TRUE, mtr);
+	xdes_set_bit(descr, XDES_CLEAN_BIT, bit, TRUE, mtr);
 
 	if (xdes_is_free(descr, mtr)) {
 		/* The extent has become free: free it to space */
 		flst_remove(seg_inode + FSEG_NOT_FULL,
 			    descr + XDES_FLST_NODE, mtr);
-		fsp_free_extent(space, zip_size, page, mtr);
+		fsp_free_extent(page_id, page_size, mtr);
 	}
-
-	mtr->n_freed_pages++;
 }
 
 /**********************************************************************//**
 Frees a single page of a segment. */
-UNIV_INTERN
 void
 fseg_free_page(
 /*===========*/
 	fseg_header_t*	seg_header, /*!< in: segment header */
-	ulint		space,	/*!< in: space id */
+	ulint		space_id,/*!< in: space id */
 	ulint		page,	/*!< in: page offset */
+	bool		ahi,	/*!< in: whether we may need to drop
+				the adaptive hash index */
 	mtr_t*		mtr)	/*!< in/out: mini-transaction */
 {
-	ulint		flags;
-	ulint		zip_size;
-	fseg_inode_t*	seg_inode;
-	rw_lock_t*	latch;
+	fseg_inode_t*		seg_inode;
+	buf_block_t*		iblock;
+	const fil_space_t*	space = mtr_x_lock_space(space_id, mtr);
+	const page_size_t	page_size(space->flags);
 
-	latch = fil_space_get_latch(space, &flags);
-	zip_size = fsp_flags_get_zip_size(flags);
+	seg_inode = fseg_inode_get(seg_header, space_id, page_size, mtr,
+				   &iblock);
+	fil_block_check_type(iblock, FIL_PAGE_INODE, mtr);
 
-	mtr_x_lock(latch, mtr);
+	const page_id_t	page_id(space_id, page);
 
-	seg_inode = fseg_inode_get(seg_header, space, zip_size, mtr);
+	fseg_free_page_low(seg_inode, page_id, page_size, ahi, mtr);
 
-	fseg_free_page_low(seg_inode, space, zip_size, page, mtr);
-
-#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
-	buf_page_set_file_page_was_freed(space, page);
-#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
+	ut_d(buf_page_set_file_page_was_freed(page_id));
 }
 
 /**********************************************************************//**
 Checks if a single page of a segment is free.
-@return	true if free */
-UNIV_INTERN
+@return true if free */
 bool
 fseg_page_is_free(
 /*==============*/
 	fseg_header_t*	seg_header,	/*!< in: segment header */
-	ulint		space,		/*!< in: space id */
+	ulint		space_id,	/*!< in: space id */
 	ulint		page)		/*!< in: page offset */
 {
 	mtr_t		mtr;
 	ibool		is_free;
-	ulint		flags;
-	rw_lock_t*	latch;
 	xdes_t*		descr;
-	ulint		zip_size;
 	fseg_inode_t*	seg_inode;
 
-	latch = fil_space_get_latch(space, &flags);
-	zip_size = dict_tf_get_zip_size(flags);
-
 	mtr_start(&mtr);
-	mtr_x_lock(latch, &mtr);
+	const fil_space_t*	space = mtr_x_lock_space(space_id, &mtr);
+	const page_size_t	page_size(space->flags);
 
-	seg_inode = fseg_inode_get(seg_header, space, zip_size, &mtr);
+	seg_inode = fseg_inode_get(seg_header, space_id, page_size, &mtr);
 
 	ut_a(seg_inode);
 	ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N)
 	      == FSEG_MAGIC_N_VALUE);
 	ut_ad(!((page_offset(seg_inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
 
-	descr = xdes_get_descriptor(space, zip_size, page, &mtr);
+	descr = xdes_get_descriptor(space_id, page, page_size, &mtr);
 	ut_a(descr);
 
 	is_free = xdes_mtr_get_bit(
@@ -3244,15 +3833,16 @@ fseg_page_is_free(
 
 /**********************************************************************//**
 Frees an extent of a segment to the space free list. */
-static
+static MY_ATTRIBUTE((nonnull))
 void
 fseg_free_extent(
 /*=============*/
 	fseg_inode_t*	seg_inode, /*!< in: segment inode */
 	ulint		space,	/*!< in: space id */
-	ulint		zip_size,/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
+	const page_size_t&	page_size,
 	ulint		page,	/*!< in: a page in the extent */
+	bool		ahi,	/*!< in: whether we may need to drop
+				the adaptive hash index */
 	mtr_t*		mtr)	/*!< in/out: mini-transaction */
 {
 	ulint	first_page_in_extent;
@@ -3264,23 +3854,29 @@ fseg_free_extent(
 	ut_ad(seg_inode != NULL);
 	ut_ad(mtr != NULL);
 
-	descr = xdes_get_descriptor(space, zip_size, page, mtr);
+	descr = xdes_get_descriptor(space, page, page_size, mtr);
 
 	ut_a(xdes_get_state(descr, mtr) == XDES_FSEG);
 	ut_a(!memcmp(descr + XDES_ID, seg_inode + FSEG_ID, 8));
 	ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N)
 	      == FSEG_MAGIC_N_VALUE);
+	ut_d(fsp_space_modify_check(space, mtr));
 
 	first_page_in_extent = page - (page % FSP_EXTENT_SIZE);
 
-	for (i = 0; i < FSP_EXTENT_SIZE; i++) {
-		if (!xdes_mtr_get_bit(descr, XDES_FREE_BIT, i, mtr)) {
+	if (ahi) {
+		for (i = 0; i < FSP_EXTENT_SIZE; i++) {
+			if (!xdes_mtr_get_bit(descr, XDES_FREE_BIT, i, mtr)) {
 
-			/* Drop search system page hash index if the page is
-			found in the pool and is hashed */
+				/* Drop search system page hash index
+				if the page is found in the pool and
+				is hashed */
 
-			btr_search_drop_page_hash_when_freed(
-				space, zip_size, first_page_in_extent + i);
+				btr_search_drop_page_hash_when_freed(
+					page_id_t(space,
+						  first_page_in_extent + i),
+					page_size);
+			}
 		}
 	}
 
@@ -3304,15 +3900,15 @@ fseg_free_extent(
 				 MLOG_4BYTES, mtr);
 	}
 
-	fsp_free_extent(space, zip_size, page, mtr);
+	fsp_free_extent(page_id_t(space, page), page_size, mtr);
 
-#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
+#ifdef UNIV_DEBUG
 	for (i = 0; i < FSP_EXTENT_SIZE; i++) {
 
-		buf_page_set_file_page_was_freed(space,
-						 first_page_in_extent + i);
+		buf_page_set_file_page_was_freed(
+			page_id_t(space, first_page_in_extent + i));
 	}
-#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
+#endif /* UNIV_DEBUG */
 }
 
 /**********************************************************************//**
@@ -3320,8 +3916,7 @@ Frees part of a segment. This function can be used to free a segment by
 repeatedly calling this function in different mini-transactions. Doing
 the freeing in a single mini-transaction might result in too big a
 mini-transaction.
-@return	TRUE if freeing completed */
-UNIV_INTERN
+@return TRUE if freeing completed */
 ibool
 fseg_free_step(
 /*===========*/
@@ -3329,51 +3924,52 @@ fseg_free_step(
 				resides on the first page of the frag list
 				of the segment, this pointer becomes obsolete
 				after the last freeing step */
+	bool		ahi,	/*!< in: whether we may need to drop
+				the adaptive hash index */
 	mtr_t*		mtr)	/*!< in/out: mini-transaction */
 {
 	ulint		n;
 	ulint		page;
 	xdes_t*		descr;
 	fseg_inode_t*	inode;
-	ulint		space;
-	ulint		flags;
-	ulint		zip_size;
+	ulint		space_id;
 	ulint		header_page;
-	rw_lock_t*	latch;
 
-	space = page_get_space_id(page_align(header));
+	DBUG_ENTER("fseg_free_step");
+
+	space_id = page_get_space_id(page_align(header));
 	header_page = page_get_page_no(page_align(header));
 
-	latch = fil_space_get_latch(space, &flags);
-	zip_size = fsp_flags_get_zip_size(flags);
+	const fil_space_t*	space = mtr_x_lock_space(space_id, mtr);
+	const page_size_t	page_size(space->flags);
 
-	mtr_x_lock(latch, mtr);
-
-	descr = xdes_get_descriptor(space, zip_size, header_page, mtr);
+	descr = xdes_get_descriptor(space_id, header_page, page_size, mtr);
 
 	/* Check that the header resides on a page which has not been
 	freed yet */
 
 	ut_a(xdes_mtr_get_bit(descr, XDES_FREE_BIT,
 			      header_page % FSP_EXTENT_SIZE, mtr) == FALSE);
+	buf_block_t*		iblock;
 
-	inode = fseg_inode_try_get(header, space, zip_size, mtr);
+	inode = fseg_inode_try_get(header, space_id, page_size, mtr, &iblock);
 
-	if (UNIV_UNLIKELY(inode == NULL)) {
-		fprintf(stderr, "double free of inode from %u:%u\n",
-			(unsigned) space, (unsigned) header_page);
-		return(TRUE);
+	if (inode == NULL) {
+		ib::info() << "Double free of inode from "
+			<< page_id_t(space_id, header_page);
+		DBUG_RETURN(TRUE);
 	}
 
-	descr = fseg_get_first_extent(inode, space, zip_size, mtr);
+	fil_block_check_type(iblock, FIL_PAGE_INODE, mtr);
+	descr = fseg_get_first_extent(inode, space_id, page_size, mtr);
 
 	if (descr != NULL) {
 		/* Free the extent held by the segment */
 		page = xdes_get_offset(descr);
 
-		fseg_free_extent(inode, space, zip_size, page, mtr);
+		fseg_free_extent(inode, space_id, page_size, page, ahi, mtr);
 
-		return(FALSE);
+		DBUG_RETURN(FALSE);
 	}
 
 	/* Free a frag page */
@@ -3381,64 +3977,65 @@ fseg_free_step(
 
 	if (n == ULINT_UNDEFINED) {
 		/* Freeing completed: free the segment inode */
-		fsp_free_seg_inode(space, zip_size, inode, mtr);
+		fsp_free_seg_inode(space_id, page_size, inode, mtr);
 
-		return(TRUE);
+		DBUG_RETURN(TRUE);
 	}
 
-	fseg_free_page_low(inode, space, zip_size,
-			   fseg_get_nth_frag_page_no(inode, n, mtr), mtr);
+	fseg_free_page_low(
+		inode,
+		page_id_t(space_id, fseg_get_nth_frag_page_no(inode, n, mtr)),
+		page_size, ahi, mtr);
 
 	n = fseg_find_last_used_frag_page_slot(inode, mtr);
 
 	if (n == ULINT_UNDEFINED) {
 		/* Freeing completed: free the segment inode */
-		fsp_free_seg_inode(space, zip_size, inode, mtr);
+		fsp_free_seg_inode(space_id, page_size, inode, mtr);
 
-		return(TRUE);
+		DBUG_RETURN(TRUE);
 	}
 
-	return(FALSE);
+	DBUG_RETURN(FALSE);
 }
 
 /**********************************************************************//**
 Frees part of a segment. Differs from fseg_free_step because this function
 leaves the header page unfreed.
-@return	TRUE if freeing completed, except the header page */
-UNIV_INTERN
+@return TRUE if freeing completed, except the header page */
 ibool
 fseg_free_step_not_header(
 /*======================*/
 	fseg_header_t*	header,	/*!< in: segment header which must reside on
 				the first fragment page of the segment */
+	bool		ahi,	/*!< in: whether we may need to drop
+				the adaptive hash index */
 	mtr_t*		mtr)	/*!< in/out: mini-transaction */
 {
 	ulint		n;
 	ulint		page;
 	xdes_t*		descr;
 	fseg_inode_t*	inode;
-	ulint		space;
-	ulint		flags;
-	ulint		zip_size;
+	ulint		space_id;
 	ulint		page_no;
-	rw_lock_t*	latch;
 
-	space = page_get_space_id(page_align(header));
+	space_id = page_get_space_id(page_align(header));
+	ut_ad(mtr->is_named_space(space_id));
 
-	latch = fil_space_get_latch(space, &flags);
-	zip_size = fsp_flags_get_zip_size(flags);
+	const fil_space_t*	space = mtr_x_lock_space(space_id, mtr);
+	const page_size_t	page_size(space->flags);
+	buf_block_t*		iblock;
 
-	mtr_x_lock(latch, mtr);
+	inode = fseg_inode_get(header, space_id, page_size, mtr, &iblock);
+	fil_block_check_type(iblock, FIL_PAGE_INODE, mtr);
 
-	inode = fseg_inode_get(header, space, zip_size, mtr);
-
-	descr = fseg_get_first_extent(inode, space, zip_size, mtr);
+	descr = fseg_get_first_extent(inode, space_id, page_size, mtr);
 
 	if (descr != NULL) {
 		/* Free the extent held by the segment */
 		page = xdes_get_offset(descr);
 
-		fseg_free_extent(inode, space, zip_size, page, mtr);
+		fseg_free_extent(inode, space_id, page_size, page, ahi, mtr);
 
 		return(FALSE);
 	}
@@ -3458,45 +4055,47 @@ fseg_free_step_not_header(
 		return(TRUE);
 	}
 
-	fseg_free_page_low(inode, space, zip_size, page_no, mtr);
+	fseg_free_page_low(inode, page_id_t(space_id, page_no), page_size, ahi,
+			   mtr);
 
 	return(FALSE);
 }
 
-/**********************************************************************//**
-Returns the first extent descriptor for a segment. We think of the extent
-lists of the segment catenated in the order FSEG_FULL -> FSEG_NOT_FULL
--> FSEG_FREE.
-@return	the first extent descriptor, or NULL if none */
+/** Returns the first extent descriptor for a segment.
+We think of the extent lists of the segment catenated in the order
+FSEG_FULL -> FSEG_NOT_FULL -> FSEG_FREE.
+@param[in]	inode		segment inode
+@param[in]	space_id	space id
+@param[in]	page_size	page size
+@param[in,out]	mtr		mini-transaction
+@return the first extent descriptor, or NULL if none */
 static
 xdes_t*
 fseg_get_first_extent(
-/*==================*/
-	fseg_inode_t*	inode,	/*!< in: segment inode */
-	ulint		space,	/*!< in: space id */
-	ulint		zip_size,/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	mtr_t*		mtr)	/*!< in/out: mini-transaction */
+	fseg_inode_t*		inode,
+	ulint			space_id,
+	const page_size_t&	page_size,
+	mtr_t*			mtr)
 {
 	fil_addr_t	first;
 	xdes_t*		descr;
 
 	ut_ad(inode && mtr);
 
-	ut_ad(space == page_get_space_id(page_align(inode)));
+	ut_ad(space_id == page_get_space_id(page_align(inode)));
 	ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
 
 	first = fil_addr_null;
 
-	if (flst_get_len(inode + FSEG_FULL, mtr) > 0) {
+	if (flst_get_len(inode + FSEG_FULL) > 0) {
 
 		first = flst_get_first(inode + FSEG_FULL, mtr);
 
-	} else if (flst_get_len(inode + FSEG_NOT_FULL, mtr) > 0) {
+	} else if (flst_get_len(inode + FSEG_NOT_FULL) > 0) {
 
 		first = flst_get_first(inode + FSEG_NOT_FULL, mtr);
 
-	} else if (flst_get_len(inode + FSEG_FREE, mtr) > 0) {
+	} else if (flst_get_len(inode + FSEG_FREE) > 0) {
 
 		first = flst_get_first(inode + FSEG_FREE, mtr);
 	}
@@ -3505,14 +4104,15 @@ fseg_get_first_extent(
 
 		return(NULL);
 	}
-	descr = xdes_lst_get_descriptor(space, zip_size, first, mtr);
+	descr = xdes_lst_get_descriptor(space_id, page_size, first, mtr);
 
 	return(descr);
 }
 
+#ifdef UNIV_DEBUG
 /*******************************************************************//**
 Validates a segment.
-@return	TRUE if ok */
+@return TRUE if ok */
 static
 ibool
 fseg_validate_low(
@@ -3520,7 +4120,7 @@ fseg_validate_low(
 	fseg_inode_t*	inode, /*!< in: segment inode */
 	mtr_t*		mtr2)	/*!< in/out: mini-transaction */
 {
-	ulint		space;
+	ulint		space_id;
 	ib_id_t		seg_id;
 	mtr_t		mtr;
 	xdes_t*		descr;
@@ -3528,10 +4128,10 @@ fseg_validate_low(
 	ulint		n_used		= 0;
 	ulint		n_used2		= 0;
 
-	ut_ad(mtr_memo_contains_page(mtr2, inode, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page(mtr2, inode, MTR_MEMO_PAGE_SX_FIX));
 	ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
 
-	space = page_get_space_id(page_align(inode));
+	space_id = page_get_space_id(page_align(inode));
 
 	seg_id = mach_read_from_8(inode + FSEG_ID);
 	n_used = mtr_read_ulint(inode + FSEG_NOT_FULL_N_USED,
@@ -3544,14 +4144,13 @@ fseg_validate_low(
 	node_addr = flst_get_first(inode + FSEG_FREE, mtr2);
 
 	while (!fil_addr_is_null(node_addr)) {
-		ulint	flags;
-		ulint	zip_size;
-
 		mtr_start(&mtr);
-		mtr_x_lock(fil_space_get_latch(space, &flags), &mtr);
-		zip_size = fsp_flags_get_zip_size(flags);
+		const fil_space_t*	space = mtr_x_lock_space(
+			space_id, &mtr);
 
-		descr = xdes_lst_get_descriptor(space, zip_size,
+		const page_size_t	page_size(space->flags);
+
+		descr = xdes_lst_get_descriptor(space_id, page_size,
 						node_addr, &mtr);
 
 		ut_a(xdes_get_n_used(descr, &mtr) == 0);
@@ -3567,14 +4166,12 @@ fseg_validate_low(
 	node_addr = flst_get_first(inode + FSEG_NOT_FULL, mtr2);
 
 	while (!fil_addr_is_null(node_addr)) {
-		ulint	flags;
-		ulint	zip_size;
-
 		mtr_start(&mtr);
-		mtr_x_lock(fil_space_get_latch(space, &flags), &mtr);
-		zip_size = fsp_flags_get_zip_size(flags);
+		const fil_space_t*	space = mtr_x_lock_space(
+			space_id, &mtr);
+		const page_size_t	page_size(space->flags);
 
-		descr = xdes_lst_get_descriptor(space, zip_size,
+		descr = xdes_lst_get_descriptor(space_id, page_size,
 						node_addr, &mtr);
 
 		ut_a(xdes_get_n_used(descr, &mtr) > 0);
@@ -3593,14 +4190,12 @@ fseg_validate_low(
 	node_addr = flst_get_first(inode + FSEG_FULL, mtr2);
 
 	while (!fil_addr_is_null(node_addr)) {
-		ulint	flags;
-		ulint	zip_size;
-
 		mtr_start(&mtr);
-		mtr_x_lock(fil_space_get_latch(space, &flags), &mtr);
-		zip_size = fsp_flags_get_zip_size(flags);
+		const fil_space_t*	space = mtr_x_lock_space(
+			space_id, &mtr);
+		const page_size_t	page_size(space->flags);
 
-		descr = xdes_lst_get_descriptor(space, zip_size,
+		descr = xdes_lst_get_descriptor(space_id, page_size,
 						node_addr, &mtr);
 
 		ut_a(xdes_get_n_used(descr, &mtr) == FSP_EXTENT_SIZE);
@@ -3616,11 +4211,9 @@ fseg_validate_low(
 	return(TRUE);
 }
 
-#ifdef UNIV_DEBUG
 /*******************************************************************//**
 Validates a segment.
-@return	TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
 ibool
 fseg_validate(
 /*==========*/
@@ -3629,16 +4222,14 @@ fseg_validate(
 {
 	fseg_inode_t*	inode;
 	ibool		ret;
-	ulint		space;
-	ulint		flags;
-	ulint		zip_size;
+	ulint		space_id;
 
-	space = page_get_space_id(page_align(header));
+	space_id = page_get_space_id(page_align(header));
 
-	mtr_x_lock(fil_space_get_latch(space, &flags), mtr);
-	zip_size = fsp_flags_get_zip_size(flags);
+	const fil_space_t*	space = mtr_x_lock_space(space_id, mtr);
+	const page_size_t	page_size(space->flags);
 
-	inode = fseg_inode_get(header, space, zip_size, mtr);
+	inode = fseg_inode_get(header, space_id, page_size, mtr);
 
 	ret = fseg_validate_low(inode, mtr);
 
@@ -3646,6 +4237,7 @@ fseg_validate(
 }
 #endif /* UNIV_DEBUG */
 
+#ifdef UNIV_BTR_PRINT
 /*******************************************************************//**
 Writes info of a segment. */
 static
@@ -3666,7 +4258,7 @@ fseg_print_low(
 	ulint	page_no;
 	ib_id_t	seg_id;
 
-	ut_ad(mtr_memo_contains_page(mtr, inode, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page(mtr, inode, MTR_MEMO_PAGE_SX_FIX));
 	space = page_get_space_id(page_align(inode));
 	page_no = page_get_page_no(page_align(inode));
 
@@ -3677,27 +4269,24 @@ fseg_print_low(
 	n_used = mtr_read_ulint(inode + FSEG_NOT_FULL_N_USED,
 				MLOG_4BYTES, mtr);
 	n_frag = fseg_get_n_frag_pages(inode, mtr);
-	n_free = flst_get_len(inode + FSEG_FREE, mtr);
-	n_not_full = flst_get_len(inode + FSEG_NOT_FULL, mtr);
-	n_full = flst_get_len(inode + FSEG_FULL, mtr);
+	n_free = flst_get_len(inode + FSEG_FREE);
+	n_not_full = flst_get_len(inode + FSEG_NOT_FULL);
+	n_full = flst_get_len(inode + FSEG_FULL);
+
+	ib::info() << "SEGMENT id " << seg_id
+		<< " space " << space << ";"
+		<< " page " << page_no << ";"
+		<< " res " << reserved << " used " << used << ";"
+		<< " full ext " << n_full << ";"
+		<< " fragm pages " << n_frag << ";"
+		<< " free extents " << n_free << ";"
+		<< " not full extents " << n_not_full << ": pages " << n_used;
 
-	fprintf(stderr,
-		"SEGMENT id %llu space %lu; page %lu;"
-		" res %lu used %lu; full ext %lu\n"
-		"fragm pages %lu; free extents %lu;"
-		" not full extents %lu: pages %lu\n",
-		(ullint) seg_id,
-		(ulong) space, (ulong) page_no,
-		(ulong) reserved, (ulong) used, (ulong) n_full,
-		(ulong) n_frag, (ulong) n_free, (ulong) n_not_full,
-		(ulong) n_used);
 	ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
 }
 
-#ifdef UNIV_BTR_PRINT
 /*******************************************************************//**
 Writes info of a segment. */
-UNIV_INTERN
 void
 fseg_print(
 /*=======*/
@@ -3705,456 +4294,40 @@ fseg_print(
 	mtr_t*		mtr)	/*!< in/out: mini-transaction */
 {
 	fseg_inode_t*	inode;
-	ulint		space;
-	ulint		flags;
-	ulint		zip_size;
+	ulint		space_id;
 
-	space = page_get_space_id(page_align(header));
+	space_id = page_get_space_id(page_align(header));
+	const fil_space_t*	space = mtr_x_lock_space(space_id, mtr);
+	const page_size_t	page_size(space->flags);
 
-	mtr_x_lock(fil_space_get_latch(space, &flags), mtr);
-	zip_size = fsp_flags_get_zip_size(flags);
-
-	inode = fseg_inode_get(header, space, zip_size, mtr);
+	inode = fseg_inode_get(header, space_id, page_size, mtr);
 
 	fseg_print_low(inode, mtr);
 }
 #endif /* UNIV_BTR_PRINT */
-
-/*******************************************************************//**
-Validates the file space system and its segments.
-@return	TRUE if ok */
-UNIV_INTERN
-ibool
-fsp_validate(
-/*=========*/
-	ulint	space)	/*!< in: space id */
-{
-	fsp_header_t*	header;
-	fseg_inode_t*	seg_inode;
-	page_t*		seg_inode_page;
-	rw_lock_t*	latch;
-	ulint		size;
-	ulint		flags;
-	ulint		zip_size;
-	ulint		free_limit;
-	ulint		frag_n_used;
-	mtr_t		mtr;
-	mtr_t		mtr2;
-	xdes_t*		descr;
-	fil_addr_t	node_addr;
-	fil_addr_t	next_node_addr;
-	ulint		descr_count	= 0;
-	ulint		n_used		= 0;
-	ulint		n_used2		= 0;
-	ulint		n_full_frag_pages;
-	ulint		n;
-	ulint		seg_inode_len_free;
-	ulint		seg_inode_len_full;
-
-	latch = fil_space_get_latch(space, &flags);
-	zip_size = fsp_flags_get_zip_size(flags);
-	ut_a(ut_is_2pow(zip_size));
-	ut_a(zip_size <= UNIV_ZIP_SIZE_MAX);
-	ut_a(!zip_size || zip_size >= UNIV_ZIP_SIZE_MIN);
-
-	/* Start first a mini-transaction mtr2 to lock out all other threads
-	from the fsp system */
-	mtr_start(&mtr2);
-	mtr_x_lock(latch, &mtr2);
-
-	mtr_start(&mtr);
-	mtr_x_lock(latch, &mtr);
-
-	header = fsp_get_space_header(space, zip_size, &mtr);
-
-	size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, &mtr);
-	free_limit = mtr_read_ulint(header + FSP_FREE_LIMIT,
-				    MLOG_4BYTES, &mtr);
-	frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED,
-				     MLOG_4BYTES, &mtr);
-
-	n_full_frag_pages = FSP_EXTENT_SIZE
-		* flst_get_len(header + FSP_FULL_FRAG, &mtr);
-
-	if (UNIV_UNLIKELY(free_limit > size)) {
-
-		ut_a(space != 0);
-		ut_a(size < FSP_EXTENT_SIZE);
-	}
-
-	flst_validate(header + FSP_FREE, &mtr);
-	flst_validate(header + FSP_FREE_FRAG, &mtr);
-	flst_validate(header + FSP_FULL_FRAG, &mtr);
-
-	mtr_commit(&mtr);
-
-	/* Validate FSP_FREE list */
-	mtr_start(&mtr);
-	mtr_x_lock(latch, &mtr);
-
-	header = fsp_get_space_header(space, zip_size, &mtr);
-	node_addr = flst_get_first(header + FSP_FREE, &mtr);
-
-	mtr_commit(&mtr);
-
-	while (!fil_addr_is_null(node_addr)) {
-		mtr_start(&mtr);
-		mtr_x_lock(latch, &mtr);
-
-		descr_count++;
-		descr = xdes_lst_get_descriptor(space, zip_size,
-						node_addr, &mtr);
-
-		ut_a(xdes_get_n_used(descr, &mtr) == 0);
-		ut_a(xdes_get_state(descr, &mtr) == XDES_FREE);
-
-		node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr);
-		mtr_commit(&mtr);
-	}
-
-	/* Validate FSP_FREE_FRAG list */
-	mtr_start(&mtr);
-	mtr_x_lock(latch, &mtr);
-
-	header = fsp_get_space_header(space, zip_size, &mtr);
-	node_addr = flst_get_first(header + FSP_FREE_FRAG, &mtr);
-
-	mtr_commit(&mtr);
-
-	while (!fil_addr_is_null(node_addr)) {
-		mtr_start(&mtr);
-		mtr_x_lock(latch, &mtr);
-
-		descr_count++;
-		descr = xdes_lst_get_descriptor(space, zip_size,
-						node_addr, &mtr);
-
-		ut_a(xdes_get_n_used(descr, &mtr) > 0);
-		ut_a(xdes_get_n_used(descr, &mtr) < FSP_EXTENT_SIZE);
-		ut_a(xdes_get_state(descr, &mtr) == XDES_FREE_FRAG);
-
-		n_used += xdes_get_n_used(descr, &mtr);
-		node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr);
-
-		mtr_commit(&mtr);
-	}
-
-	/* Validate FSP_FULL_FRAG list */
-	mtr_start(&mtr);
-	mtr_x_lock(latch, &mtr);
-
-	header = fsp_get_space_header(space, zip_size, &mtr);
-	node_addr = flst_get_first(header + FSP_FULL_FRAG, &mtr);
-
-	mtr_commit(&mtr);
-
-	while (!fil_addr_is_null(node_addr)) {
-		mtr_start(&mtr);
-		mtr_x_lock(latch, &mtr);
-
-		descr_count++;
-		descr = xdes_lst_get_descriptor(space, zip_size,
-						node_addr, &mtr);
-
-		ut_a(xdes_get_n_used(descr, &mtr) == FSP_EXTENT_SIZE);
-		ut_a(xdes_get_state(descr, &mtr) == XDES_FULL_FRAG);
-
-		node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr);
-		mtr_commit(&mtr);
-	}
-
-	/* Validate segments */
-	mtr_start(&mtr);
-	mtr_x_lock(latch, &mtr);
-
-	header = fsp_get_space_header(space, zip_size, &mtr);
-
-	node_addr = flst_get_first(header + FSP_SEG_INODES_FULL, &mtr);
-
-	seg_inode_len_full = flst_get_len(header + FSP_SEG_INODES_FULL, &mtr);
-
-	mtr_commit(&mtr);
-
-	while (!fil_addr_is_null(node_addr)) {
-
-		n = 0;
-		do {
-			mtr_start(&mtr);
-			mtr_x_lock(latch, &mtr);
-
-			seg_inode_page = fut_get_ptr(
-				space, zip_size, node_addr, RW_X_LATCH, &mtr)
-				- FSEG_INODE_PAGE_NODE;
-
-			seg_inode = fsp_seg_inode_page_get_nth_inode(
-				seg_inode_page, n, zip_size, &mtr);
-			ut_a(mach_read_from_8(seg_inode + FSEG_ID) != 0);
-			fseg_validate_low(seg_inode, &mtr);
-
-			descr_count += flst_get_len(seg_inode + FSEG_FREE,
-						    &mtr);
-			descr_count += flst_get_len(seg_inode + FSEG_FULL,
-						    &mtr);
-			descr_count += flst_get_len(seg_inode + FSEG_NOT_FULL,
-						    &mtr);
-
-			n_used2 += fseg_get_n_frag_pages(seg_inode, &mtr);
-
-			next_node_addr = flst_get_next_addr(
-				seg_inode_page + FSEG_INODE_PAGE_NODE, &mtr);
-			mtr_commit(&mtr);
-		} while (++n < FSP_SEG_INODES_PER_PAGE(zip_size));
-
-		node_addr = next_node_addr;
-	}
-
-	mtr_start(&mtr);
-	mtr_x_lock(latch, &mtr);
-
-	header = fsp_get_space_header(space, zip_size, &mtr);
-
-	node_addr = flst_get_first(header + FSP_SEG_INODES_FREE, &mtr);
-
-	seg_inode_len_free = flst_get_len(header + FSP_SEG_INODES_FREE, &mtr);
-
-	mtr_commit(&mtr);
-
-	while (!fil_addr_is_null(node_addr)) {
-
-		n = 0;
-
-		do {
-			mtr_start(&mtr);
-			mtr_x_lock(latch, &mtr);
-
-			seg_inode_page = fut_get_ptr(
-				space, zip_size, node_addr, RW_X_LATCH, &mtr)
-				- FSEG_INODE_PAGE_NODE;
-
-			seg_inode = fsp_seg_inode_page_get_nth_inode(
-				seg_inode_page, n, zip_size, &mtr);
-			if (mach_read_from_8(seg_inode + FSEG_ID)) {
-				fseg_validate_low(seg_inode, &mtr);
-
-				descr_count += flst_get_len(
-					seg_inode + FSEG_FREE, &mtr);
-				descr_count += flst_get_len(
-					seg_inode + FSEG_FULL, &mtr);
-				descr_count += flst_get_len(
-					seg_inode + FSEG_NOT_FULL, &mtr);
-				n_used2 += fseg_get_n_frag_pages(
-					seg_inode, &mtr);
-			}
-
-			next_node_addr = flst_get_next_addr(
-				seg_inode_page + FSEG_INODE_PAGE_NODE, &mtr);
-			mtr_commit(&mtr);
-		} while (++n < FSP_SEG_INODES_PER_PAGE(zip_size));
-
-		node_addr = next_node_addr;
-	}
-
-	ut_a(descr_count * FSP_EXTENT_SIZE == free_limit);
-	if (!zip_size) {
-		ut_a(n_used + n_full_frag_pages
-		     == n_used2 + 2 * ((free_limit + (UNIV_PAGE_SIZE - 1))
-				       / UNIV_PAGE_SIZE)
-		     + seg_inode_len_full + seg_inode_len_free);
-	} else {
-		ut_a(n_used + n_full_frag_pages
-		     == n_used2 + 2 * ((free_limit + (zip_size - 1))
-				       / zip_size)
-		     + seg_inode_len_full + seg_inode_len_free);
-	}
-	ut_a(frag_n_used == n_used);
-
-	mtr_commit(&mtr2);
-
-	return(TRUE);
-}
-
-/*******************************************************************//**
-Prints info of a file space. */
-UNIV_INTERN
-void
-fsp_print(
-/*======*/
-	ulint	space)	/*!< in: space id */
-{
-	fsp_header_t*	header;
-	fseg_inode_t*	seg_inode;
-	page_t*		seg_inode_page;
-	rw_lock_t*	latch;
-	ulint		flags;
-	ulint		zip_size;
-	ulint		size;
-	ulint		free_limit;
-	ulint		frag_n_used;
-	fil_addr_t	node_addr;
-	fil_addr_t	next_node_addr;
-	ulint		n_free;
-	ulint		n_free_frag;
-	ulint		n_full_frag;
-	ib_id_t		seg_id;
-	ulint		n;
-	ulint		n_segs		= 0;
-	mtr_t		mtr;
-	mtr_t		mtr2;
-
-	latch = fil_space_get_latch(space, &flags);
-	zip_size = fsp_flags_get_zip_size(flags);
-
-	/* Start first a mini-transaction mtr2 to lock out all other threads
-	from the fsp system */
-
-	mtr_start(&mtr2);
-
-	mtr_x_lock(latch, &mtr2);
-
-	mtr_start(&mtr);
-
-	mtr_x_lock(latch, &mtr);
-
-	header = fsp_get_space_header(space, zip_size, &mtr);
-
-	size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, &mtr);
-
-	free_limit = mtr_read_ulint(header + FSP_FREE_LIMIT, MLOG_4BYTES,
-				    &mtr);
-	frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED, MLOG_4BYTES,
-				     &mtr);
-	n_free = flst_get_len(header + FSP_FREE, &mtr);
-	n_free_frag = flst_get_len(header + FSP_FREE_FRAG, &mtr);
-	n_full_frag = flst_get_len(header + FSP_FULL_FRAG, &mtr);
-
-	seg_id = mach_read_from_8(header + FSP_SEG_ID);
-
-	fprintf(stderr,
-		"FILE SPACE INFO: id %lu\n"
-		"size %lu, free limit %lu, free extents %lu\n"
-		"not full frag extents %lu: used pages %lu,"
-		" full frag extents %lu\n"
-		"first seg id not used %llu\n",
-		(ulong) space,
-		(ulong) size, (ulong) free_limit, (ulong) n_free,
-		(ulong) n_free_frag, (ulong) frag_n_used, (ulong) n_full_frag,
-		(ullint) seg_id);
-
-	mtr_commit(&mtr);
-
-	/* Print segments */
-
-	mtr_start(&mtr);
-	mtr_x_lock(latch, &mtr);
-
-	header = fsp_get_space_header(space, zip_size, &mtr);
-
-	node_addr = flst_get_first(header + FSP_SEG_INODES_FULL, &mtr);
-
-	mtr_commit(&mtr);
-
-	while (!fil_addr_is_null(node_addr)) {
-
-		n = 0;
-
-		do {
-
-			mtr_start(&mtr);
-			mtr_x_lock(latch, &mtr);
-
-			seg_inode_page = fut_get_ptr(
-				space, zip_size, node_addr, RW_X_LATCH, &mtr)
-				- FSEG_INODE_PAGE_NODE;
-
-			seg_inode = fsp_seg_inode_page_get_nth_inode(
-				seg_inode_page, n, zip_size, &mtr);
-			ut_a(mach_read_from_8(seg_inode + FSEG_ID) != 0);
-			fseg_print_low(seg_inode, &mtr);
-
-			n_segs++;
-
-			next_node_addr = flst_get_next_addr(
-				seg_inode_page + FSEG_INODE_PAGE_NODE, &mtr);
-			mtr_commit(&mtr);
-		} while (++n < FSP_SEG_INODES_PER_PAGE(zip_size));
-
-		node_addr = next_node_addr;
-	}
-
-	mtr_start(&mtr);
-	mtr_x_lock(latch, &mtr);
-
-	header = fsp_get_space_header(space, zip_size, &mtr);
-
-	node_addr = flst_get_first(header + FSP_SEG_INODES_FREE, &mtr);
-
-	mtr_commit(&mtr);
-
-	while (!fil_addr_is_null(node_addr)) {
-
-		n = 0;
-
-		do {
-
-			mtr_start(&mtr);
-			mtr_x_lock(latch, &mtr);
-
-			seg_inode_page = fut_get_ptr(
-				space, zip_size, node_addr, RW_X_LATCH, &mtr)
-				- FSEG_INODE_PAGE_NODE;
-
-			seg_inode = fsp_seg_inode_page_get_nth_inode(
-				seg_inode_page, n, zip_size, &mtr);
-			if (mach_read_from_8(seg_inode + FSEG_ID)) {
-
-				fseg_print_low(seg_inode, &mtr);
-				n_segs++;
-			}
-
-			next_node_addr = flst_get_next_addr(
-				seg_inode_page + FSEG_INODE_PAGE_NODE, &mtr);
-			mtr_commit(&mtr);
-		} while (++n < FSP_SEG_INODES_PER_PAGE(zip_size));
-
-		node_addr = next_node_addr;
-	}
-
-	mtr_commit(&mtr2);
-
-	fprintf(stderr, "NUMBER of file segments: %lu\n", (ulong) n_segs);
-}
 #endif /* !UNIV_HOTBACKUP */
 
-/**********************************************************************//**
-Compute offset after xdes where crypt data can be stored
-@return	offset */
-ulint
-fsp_header_get_crypt_offset(
-/*========================*/
-	ulint   zip_size, /*!< in: zip_size */
-	ulint*  max_size) /*!< out: free space available for crypt data */
+#ifdef UNIV_DEBUG
+/** Print the file segment header to the given output stream.
+@param[in]	out	the output stream into which the object is printed.
+@retval	the output stream into which the object was printed. */
+std::ostream&
+fseg_header::to_stream(std::ostream&	out) const
 {
-	ulint pageno = 0;
-	/* compute first page_no that will have xdes stored on page != 0*/
-	for (ulint i = 0;
-	     (pageno = xdes_calc_descriptor_page(zip_size, i)) == 0; )
-		i++;
+	const ulint	space = mtr_read_ulint(m_header + FSEG_HDR_SPACE,
+					       MLOG_4BYTES, m_mtr);
+	const ulint	page_no = mtr_read_ulint(m_header + FSEG_HDR_PAGE_NO,
+						 MLOG_4BYTES, m_mtr);
 
-	/* use pageno prior to this...i.e last page on page 0 */
-	ut_ad(pageno > 0);
-	pageno--;
+	const ulint	offset = mtr_read_ulint(m_header + FSEG_HDR_OFFSET,
+						 MLOG_2BYTES, m_mtr);
 
-	ulint iv_offset = XDES_ARR_OFFSET +
-		XDES_SIZE * (1 + xdes_calc_descriptor_index(zip_size, pageno));
+	out << "[fseg_header_t: space=" << space << ", page="
+		<< page_no << ", offset=" << offset << "]";
 
-	if (max_size != NULL) {
-		/* return how much free space there is available on page */
-		*max_size = (zip_size ? zip_size : UNIV_PAGE_SIZE) -
-			(FSP_HEADER_OFFSET + iv_offset + FIL_PAGE_DATA_END);
-	}
-
-	return FSP_HEADER_OFFSET + iv_offset;
+	return(out);
 }
+#endif /* UNIV_DEBUG */
 
 /**********************************************************************//**
 Checks if a single page is free.
@@ -4163,22 +4336,52 @@ UNIV_INTERN
 bool
 fsp_page_is_free_func(
 /*==============*/
-	ulint		space,		/*!< in: space id */
+	ulint		space_id,	/*!< in: space id */
 	ulint		page_no,	/*!< in: page offset */
 	mtr_t*		mtr,		/*!< in/out: mini-transaction */
 	const char *file,
 	ulint line)
 {
-	ulint		flags;
-
 	ut_ad(mtr);
 
-	mtr_x_lock_func(fil_space_get_latch(space, &flags), file, line, mtr);
-	ulint zip_size = fsp_flags_get_zip_size(flags);
+	const fil_space_t*	space = mtr_x_lock_space(space_id, mtr);
+	const page_size_t	page_size(space->flags);
 
-	xdes_t* descr = xdes_get_descriptor(space, zip_size, page_no, mtr);
+	xdes_t* descr = xdes_get_descriptor(space_id, page_no, page_size, mtr);
 	ut_a(descr);
 
 	return xdes_mtr_get_bit(
 		descr, XDES_FREE_BIT, page_no % FSP_EXTENT_SIZE, mtr);
 }
+
+/**********************************************************************//**
+Compute offset after xdes where crypt data can be stored
+@return	offset */
+ulint
+fsp_header_get_crypt_offset(
+/*========================*/
+	const page_size_t&	page_size,/*!< in: page size */
+	ulint* 			max_size) /*!< out: free space available for crypt data */
+{
+	ulint pageno = 0;
+	/* compute first page_no that will have xdes stored on page != 0*/
+
+	for (ulint i = 0;
+	     (pageno = xdes_calc_descriptor_page(page_size, i)) == 0; )
+		i++;
+
+	/* use pageno prior to this...i.e last page on page 0 */
+	ut_ad(pageno > 0);
+	pageno--;
+
+	ulint iv_offset = XDES_ARR_OFFSET +
+		XDES_SIZE * (1 + xdes_calc_descriptor_index(page_size, pageno));
+
+	if (max_size != NULL) {
+		/* return how much free space there is available on page */
+		*max_size = (page_size.logical() ? page_size.logical() : UNIV_PAGE_SIZE) -
+			(FSP_HEADER_OFFSET + iv_offset + FIL_PAGE_DATA_END);
+	}
+
+	return FSP_HEADER_OFFSET + iv_offset;
+}
diff --git a/storage/innobase/fsp/fsp0space.cc b/storage/innobase/fsp/fsp0space.cc
new file mode 100644
index 00000000000..f66f7b8fc78
--- /dev/null
+++ b/storage/innobase/fsp/fsp0space.cc
@@ -0,0 +1,265 @@
+/*****************************************************************************
+
+Copyright (c) 2013, 2015, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file fsp/fsp0space.cc
+General shared tablespace implementation.
+
+Created 2012-11-16 by Sunny Bains as srv/srv0space.cc
+*******************************************************/
+
+#include "ha_prototypes.h"
+
+#include "fsp0space.h"
+#include "fsp0sysspace.h"
+#ifndef UNIV_HOTBACKUP
+#include "fsp0fsp.h"
+#include "os0file.h"
+#endif /* !UNIV_HOTBACKUP */
+
+#include "my_sys.h"
+
+
+/** Check if two tablespaces have common data file names.
+@param other_space	Tablespace to check against this.
+@return true if they have the same data filenames and paths */
+bool
+Tablespace::intersection(
+	const Tablespace*	other_space)
+{
+	files_t::const_iterator	end = other_space->m_files.end();
+
+	for (files_t::const_iterator it = other_space->m_files.begin();
+	     it != end;
+	     ++it) {
+
+		if (find(it->m_filename)) {
+
+			return(true);
+		}
+	}
+
+	return(false);
+}
+
+/** Frees the memory allocated by the SysTablespace object. */
+void
+Tablespace::shutdown()
+{
+	files_t::iterator	end = m_files.end();
+
+	for (files_t::iterator it = m_files.begin(); it != end; ++it) {
+		it->shutdown();
+	}
+
+	m_files.clear();
+
+	m_space_id = ULINT_UNDEFINED;
+}
+
+/** Note that the data file was found.
+@param[in,out] file	Data file object to set */
+void
+Tablespace::file_found(Datafile& file)
+{
+	/* Note that the file exists and can be opened
+	in the appropriate mode. */
+	file.m_exists = true;
+
+	file.set_open_flags(
+		&file == &m_files.front()
+		? OS_FILE_OPEN_RETRY : OS_FILE_OPEN);
+}
+
+/** Open or Create the data files if they do not exist.
+@param[in]	is_temp	whether this is a temporary tablespace
+@return DB_SUCCESS or error code */
+dberr_t
+Tablespace::open_or_create(bool is_temp)
+{
+	fil_space_t*		space = NULL;
+	dberr_t			err = DB_SUCCESS;
+
+	ut_ad(!m_files.empty());
+
+	files_t::iterator	begin = m_files.begin();
+	files_t::iterator	end = m_files.end();
+
+	for (files_t::iterator it = begin; it != end; ++it) {
+
+		if (it->m_exists) {
+			err = it->open_or_create(
+				m_ignore_read_only
+				? false : srv_read_only_mode);
+		} else {
+			err = it->open_or_create(
+				m_ignore_read_only
+				? false : srv_read_only_mode);
+
+			/* Set the correct open flags now that we have
+			successfully created the file. */
+			if (err == DB_SUCCESS) {
+				file_found(*it);
+			}
+		}
+
+		if (err != DB_SUCCESS) {
+			break;
+		}
+
+		bool	atomic_write;
+
+#if !defined(NO_FALLOCATE) && defined(UNIV_LINUX)
+		if (!srv_use_doublewrite_buf) {
+			atomic_write = fil_fusionio_enable_atomic_write(
+				it->m_handle);
+		} else {
+			atomic_write = false;
+		}
+#else
+		atomic_write = false;
+#endif /* !NO_FALLOCATE && UNIV_LINUX */
+
+		/* We can close the handle now and open the tablespace
+		the proper way. */
+		it->close();
+
+		if (it == begin) {
+			/* First data file. */
+
+			ulint	flags;
+
+			flags = fsp_flags_set_page_size(0, univ_page_size);
+
+			/* Create the tablespace entry for the multi-file
+			tablespace in the tablespace manager. */
+			space = fil_space_create(
+				m_name, m_space_id, flags, is_temp
+				? FIL_TYPE_TEMPORARY : FIL_TYPE_TABLESPACE, it->m_crypt_info);
+		}
+
+		ut_a(fil_validate());
+
+		/* Create the tablespace node entry for this data file. */
+		if (!fil_node_create(
+			    it->m_filepath, it->m_size, space, false,
+			    atomic_write)) {
+
+		       err = DB_ERROR;
+		       break;
+		}
+	}
+
+	return(err);
+}
+
+/** Find a filename in the list of Datafiles for a tablespace
+@return true if the filename exists in the data files */
+bool
+Tablespace::find(const char* filename)
+{
+	files_t::const_iterator	end = m_files.end();
+
+	for (files_t::const_iterator it = m_files.begin(); it != end; ++it) {
+
+		if (innobase_strcasecmp(filename, it->m_filename) == 0) {
+			return(true);
+		}
+	}
+
+	return(false);
+}
+
+
+/** Delete all the data files. */
+void
+Tablespace::delete_files()
+{
+	files_t::iterator	end = m_files.end();
+
+	for (files_t::iterator it = m_files.begin(); it != end; ++it) {
+
+		it->close();
+
+		bool file_pre_exists;
+		bool success = os_file_delete_if_exists(
+			innodb_data_file_key, it->m_filepath, &file_pre_exists);
+
+		if (success && file_pre_exists) {
+			ib::info() << "Removed temporary tablespace data"
+				" file: \"" << it->m_name << "\"";
+		}
+	}
+}
+
+/** Check if undo tablespace.
+@return true if undo tablespace */
+bool
+Tablespace::is_undo_tablespace(
+	ulint	id)
+{
+	return(id <= srv_undo_tablespaces_open
+	       && id != srv_sys_space.space_id()
+	       && id != srv_tmp_space.space_id());
+}
+
+/** Use the ADD DATAFILE path to create a Datafile object and add it to the
+front of m_files.
+Parse the datafile path into a path and a filename with extension 'ibd'.
+This datafile_path provided may or may not be an absolute path, but it
+must end with the extension .ibd and have a basename of at least 1 byte.
+
+Set tablespace m_path member and add a Datafile with the filename.
+@param[in]	datafile_path	full path of the tablespace file. */
+dberr_t
+Tablespace::add_datafile(
+	const char*	datafile_added)
+{
+	/* The path provided ends in ".ibd".  This was assured by
+	validate_create_tablespace_info() */
+	ut_d(const char* dot = strrchr(datafile_added, '.'));
+	ut_ad(dot != NULL && 0 == strcmp(dot, DOT_IBD));
+
+	char* filepath = mem_strdup(datafile_added);
+	os_normalize_path(filepath);
+
+	/* If the path is an absolute path, separate it onto m_path and a
+	basename. For relative paths, make the whole thing a basename so that
+	it can be appended to the datadir. */
+	bool	is_abs_path = is_absolute_path(filepath);
+	size_t	dirlen = (is_abs_path ? dirname_length(filepath) : 0);
+	const char* basename = filepath + dirlen;
+
+	/* If the pathname contains a directory separator, fill the
+	m_path member which is the default directory for files in this
+	tablespace. Leave it null otherwise. */
+	if (dirlen > 0) {
+		set_path(filepath, dirlen);
+	}
+
+	/* Now add a new Datafile and set the filepath
+	using the m_path created above. */
+	m_files.push_back(Datafile(m_name, m_flags,
+				   FIL_IBD_FILE_INITIAL_SIZE, 0));
+	Datafile* datafile = &m_files.back();
+	datafile->make_filepath(m_path, basename, IBD);
+
+	ut_free(filepath);
+
+	return(DB_SUCCESS);
+}
diff --git a/storage/innobase/fsp/fsp0sysspace.cc b/storage/innobase/fsp/fsp0sysspace.cc
new file mode 100644
index 00000000000..66b5da15e8b
--- /dev/null
+++ b/storage/innobase/fsp/fsp0sysspace.cc
@@ -0,0 +1,1053 @@
+/*****************************************************************************
+
+Copyright (c) 2013, 2016, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file fsp/fsp0space.cc
+Multi file, shared, system tablespace implementation.
+
+Created 2012-11-16 by Sunny Bains as srv/srv0space.cc
+Refactored 2013-7-26 by Kevin Lewis
+*******************************************************/
+
+#include "ha_prototypes.h"
+
+#include "fsp0sysspace.h"
+#include "srv0start.h"
+#include "trx0sys.h"
+#ifndef UNIV_HOTBACKUP
+#include "dict0load.h"
+#include "mem0mem.h"
+#include "os0file.h"
+#include "row0mysql.h"
+#include "ut0new.h"
+
+/** The server header file is included to access opt_initialize global variable.
+If server passes the option for create/open DB to SE, we should remove such
+direct reference to server header and global variable */
+#include "mysqld.h"
+#else
+my_bool opt_initialize = 0;
+#endif /* !UNIV_HOTBACKUP */
+
+/** The control info of the system tablespace. */
+SysTablespace srv_sys_space;
+
+/** The control info of a temporary table shared tablespace. */
+SysTablespace srv_tmp_space;
+
+/** If the last data file is auto-extended, we add this many pages to it
+at a time. We have to make this public because it is a config variable. */
+ulong sys_tablespace_auto_extend_increment;
+
+#ifdef UNIV_DEBUG
+/** Control if extra debug checks need to be done for temporary tablespace.
+Default = true that is disable such checks.
+This variable is not exposed to end-user but still kept as variable for
+developer to enable it during debug. */
+bool srv_skip_temp_table_checks_debug = true;
+#endif /* UNIV_DEBUG */
+
+/** Convert a numeric string that optionally ends in G or M or K,
+    to a number containing megabytes.
+@param[in]	str	String with a quantity in bytes
+@param[out]	megs	The number in megabytes
+@return next character in string */
+char*
+SysTablespace::parse_units(
+	char*	ptr,
+	ulint*	megs)
+{
+	char*		endp;
+
+	*megs = strtoul(ptr, &endp, 10);
+
+	ptr = endp;
+
+	switch (*ptr) {
+	case 'G': case 'g':
+		*megs *= 1024;
+		/* fall through */
+	case 'M': case 'm':
+		++ptr;
+		break;
+	case 'K': case 'k':
+		*megs /= 1024;
+		++ptr;
+		break;
+	default:
+		*megs /= 1024 * 1024;
+		break;
+	}
+
+	return(ptr);
+}
+
+/** Parse the input params and populate member variables.
+@param[in]	filepath	path to data files
+@param[in]	supports_raw	true if the tablespace supports raw devices
+@return true on success parse */
+bool
+SysTablespace::parse_params(
+	const char*	filepath_spec,
+	bool		supports_raw)
+{
+	char*	filepath;
+	ulint	size;
+	char*	input_str;
+	ulint	n_files = 0;
+
+	ut_ad(m_last_file_size_max == 0);
+	ut_ad(!m_auto_extend_last_file);
+
+	char*	new_str = mem_strdup(filepath_spec);
+	char*	str = new_str;
+
+	input_str = str;
+
+	/*---------------------- PASS 1 ---------------------------*/
+	/* First calculate the number of data files and check syntax:
+	filepath:size[K |M | G];filepath:size[K |M | G]... .
+	Note that a Windows path may contain a drive name and a ':'. */
+	while (*str != '\0') {
+		filepath = str;
+
+		while ((*str != ':' && *str != '\0')
+		       || (*str == ':'
+			   && (*(str + 1) == '\\' || *(str + 1) == '/'
+			       || *(str + 1) == ':'))) {
+			str++;
+		}
+
+		if (*str == '\0') {
+			ut_free(new_str);
+
+			ib::error()
+				<< "syntax error in file path or size"
+				" specified is less than 1 megabyte";
+			return(false);
+		}
+
+		str++;
+
+		str = parse_units(str, &size);
+
+		if (0 == strncmp(str, ":autoextend",
+				 (sizeof ":autoextend") - 1)) {
+
+			str += (sizeof ":autoextend") - 1;
+
+			if (0 == strncmp(str, ":max:",
+					 (sizeof ":max:") - 1)) {
+
+				str += (sizeof ":max:") - 1;
+
+				str = parse_units(str, &size);
+			}
+
+			if (*str != '\0') {
+				ut_free(new_str);
+				ib::error()
+					<< "syntax error in file path or"
+					<< " size specified is less than"
+					<< " 1 megabyte";
+				return(false);
+			}
+		}
+
+		if (::strlen(str) >= 6
+		    && *str == 'n'
+		    && *(str + 1) == 'e'
+		    && *(str + 2) == 'w') {
+
+			if (!supports_raw) {
+				ib::error()
+					<< "Tablespace doesn't support raw"
+					" devices";
+				ut_free(new_str);
+				return(false);
+			}
+
+			str += 3;
+		}
+
+		if (*str == 'r' && *(str + 1) == 'a' && *(str + 2) == 'w') {
+			str += 3;
+
+			if (!supports_raw) {
+				ib::error()
+					<< "Tablespace doesn't support raw"
+					" devices";
+				ut_free(new_str);
+				return(false);
+			}
+		}
+
+		if (size == 0) {
+
+			ut_free(new_str);
+
+			ib::error()
+				<< "syntax error in file path or size"
+				" specified is less than 1 megabyte";
+
+			return(false);
+		}
+
+		++n_files;
+
+		if (*str == ';') {
+			str++;
+		} else if (*str != '\0') {
+			ut_free(new_str);
+
+			ib::error()
+				<< "syntax error in file path or size"
+				" specified is less than 1 megabyte";
+			return(false);
+		}
+	}
+
+	if (n_files == 0) {
+
+		/* filepath_spec must contain at least one data file
+		definition */
+
+		ut_free(new_str);
+
+		ib::error()
+			<< "syntax error in file path or size specified"
+			" is less than 1 megabyte";
+
+		return(false);
+	}
+
+	/*---------------------- PASS 2 ---------------------------*/
+	/* Then store the actual values to our arrays */
+	str = input_str;
+	ulint order = 0;
+
+	while (*str != '\0') {
+		filepath = str;
+
+		/* Note that we must step over the ':' in a Windows filepath;
+		a Windows path normally looks like C:\ibdata\ibdata1:1G, but
+		a Windows raw partition may have a specification like
+		\\.\C::1Gnewraw or \\.\PHYSICALDRIVE2:1Gnewraw */
+
+		while ((*str != ':' && *str != '\0')
+		       || (*str == ':'
+			   && (*(str + 1) == '\\' || *(str + 1) == '/'
+			       || *(str + 1) == ':'))) {
+			str++;
+		}
+
+		if (*str == ':') {
+			/* Make filepath a null-terminated string */
+			*str = '\0';
+			str++;
+		}
+
+		str = parse_units(str, &size);
+
+		if (0 == strncmp(str, ":autoextend",
+				 (sizeof ":autoextend") - 1)) {
+
+			m_auto_extend_last_file = true;
+
+			str += (sizeof ":autoextend") - 1;
+
+			if (0 == strncmp(str, ":max:",
+					 (sizeof ":max:") - 1)) {
+
+				str += (sizeof ":max:") - 1;
+
+				str = parse_units(str, &m_last_file_size_max);
+			}
+
+			if (*str != '\0') {
+				ut_free(new_str);
+				ib::error() << "syntax error in file path or"
+					" size specified is less than 1"
+					" megabyte";
+				return(false);
+			}
+		}
+
+		m_files.push_back(Datafile(filepath, flags(), size, order));
+		Datafile* datafile = &m_files.back();
+		datafile->make_filepath(path(), filepath, NO_EXT);
+
+		if (::strlen(str) >= 6
+		    && *str == 'n'
+		    && *(str + 1) == 'e'
+		    && *(str + 2) == 'w') {
+
+			ut_a(supports_raw);
+
+			str += 3;
+
+			/* Initialize new raw device only during initialize */
+			/* JAN: TODO: MySQL 5.7 used opt_initialize */
+			m_files.back().m_type =
+			opt_bootstrap ? SRV_NEW_RAW : SRV_OLD_RAW;
+		}
+
+		if (*str == 'r' && *(str + 1) == 'a' && *(str + 2) == 'w') {
+
+			ut_a(supports_raw);
+
+			str += 3;
+
+			/* Initialize new raw device only during initialize */
+			if (m_files.back().m_type == SRV_NOT_RAW) {
+				/* JAN: TODO: MySQL 5.7 used opt_initialize */
+				m_files.back().m_type =
+				opt_bootstrap ? SRV_NEW_RAW : SRV_OLD_RAW;
+			}
+		}
+
+		if (*str == ';') {
+			++str;
+		}
+		order++;
+	}
+
+	ut_ad(n_files == ulint(m_files.size()));
+
+	ut_free(new_str);
+
+	return(true);
+}
+
+/** Frees the memory allocated by the parse method. */
+void
+SysTablespace::shutdown()
+{
+	Tablespace::shutdown();
+
+	m_auto_extend_last_file = 0;
+	m_last_file_size_max = 0;
+	m_created_new_raw = 0;
+	m_is_tablespace_full = false;
+	m_sanity_checks_done = false;
+}
+
+/** Verify the size of the physical file.
+@param[in]	file	data file object
+@return DB_SUCCESS if OK else error code. */
+dberr_t
+SysTablespace::check_size(
+	Datafile&	file)
+{
+	os_offset_t	size = os_file_get_size(file.m_handle);
+	ut_a(size != (os_offset_t) -1);
+
+	/* Under some error conditions like disk full scenarios
+	or file size reaching filesystem limit the data file
+	could contain an incomplete extent at the end. When we
+	extend a data file and if some failure happens, then
+	also the data file could contain an incomplete extent.
+	So we need to round the size downward to a  megabyte.*/
+
+	ulint	rounded_size_pages = get_pages_from_size(size);
+
+	/* If last file */
+	if (&file == &m_files.back() && m_auto_extend_last_file) {
+
+		if (file.m_size > rounded_size_pages
+		    || (m_last_file_size_max > 0
+			&& m_last_file_size_max < rounded_size_pages)) {
+			ib::error() << "The Auto-extending " << name()
+				<< " data file '" << file.filepath() << "' is"
+				" of a different size " << rounded_size_pages
+				<< " pages (rounded down to MB) than specified"
+				" in the .cnf file: initial " << file.m_size
+				<< " pages, max " << m_last_file_size_max
+				<< " (relevant if non-zero) pages!";
+			return(DB_ERROR);
+		}
+
+		file.m_size = rounded_size_pages;
+	}
+
+	if (rounded_size_pages != file.m_size) {
+		ib::error() << "The " << name() << " data file '"
+			<< file.filepath() << "' is of a different size "
+			<< rounded_size_pages << " pages (rounded down to MB)"
+			" than the " << file.m_size << " pages specified in"
+			" the .cnf file!";
+		return(DB_ERROR);
+	}
+
+	return(DB_SUCCESS);
+}
+
+/** Set the size of the file.
+@param[in]	file	data file object
+@return DB_SUCCESS or error code */
+dberr_t
+SysTablespace::set_size(
+	Datafile&	file)
+{
+	ut_a(!srv_read_only_mode || m_ignore_read_only);
+
+	/* We created the data file and now write it full of zeros */
+	ib::info() << "Setting file '" << file.filepath() << "' size to "
+		<< (file.m_size >> (20 - UNIV_PAGE_SIZE_SHIFT)) << " MB."
+		" Physically writing the file full; Please wait ...";
+
+	bool	success = os_file_set_size(
+		file.m_filepath, file.m_handle,
+		static_cast<os_offset_t>(file.m_size << UNIV_PAGE_SIZE_SHIFT),
+		m_ignore_read_only ? false : srv_read_only_mode);
+
+	if (success) {
+		ib::info() << "File '" << file.filepath() << "' size is now "
+			<< (file.m_size >> (20 - UNIV_PAGE_SIZE_SHIFT))
+			<< " MB.";
+	} else {
+		ib::error() << "Could not set the file size of '"
+			<< file.filepath() << "'. Probably out of disk space";
+
+		return(DB_ERROR);
+	}
+
+	return(DB_SUCCESS);
+}
+
+/** Create a data file.
+@param[in]	file	data file object
+@return DB_SUCCESS or error code */
+dberr_t
+SysTablespace::create_file(
+	Datafile&	file)
+{
+	dberr_t	err = DB_SUCCESS;
+
+	ut_a(!file.m_exists);
+	ut_a(!srv_read_only_mode || m_ignore_read_only);
+
+	switch (file.m_type) {
+	case SRV_NEW_RAW:
+
+		/* The partition is opened, not created; then it is
+		written over */
+		m_created_new_raw = true;
+
+		/* Fall through. */
+
+	case SRV_OLD_RAW:
+
+		srv_start_raw_disk_in_use = TRUE;
+
+		/* Fall through. */
+
+	case SRV_NOT_RAW:
+		err = file.open_or_create(
+			m_ignore_read_only ? false : srv_read_only_mode);
+		break;
+	}
+
+
+	if (err == DB_SUCCESS && file.m_type != SRV_OLD_RAW) {
+		err = set_size(file);
+	}
+
+	return(err);
+}
+
+/** Open a data file.
+@param[in]	file	data file object
+@return DB_SUCCESS or error code */
+dberr_t
+SysTablespace::open_file(
+	Datafile&	file)
+{
+	dberr_t	err = DB_SUCCESS;
+
+	ut_a(file.m_exists);
+
+	switch (file.m_type) {
+	case SRV_NEW_RAW:
+		/* The partition is opened, not created; then it is
+		written over */
+		m_created_new_raw = true;
+
+		/* Fall through */
+
+	case SRV_OLD_RAW:
+		srv_start_raw_disk_in_use = TRUE;
+
+		if (srv_read_only_mode && !m_ignore_read_only) {
+			ib::error() << "Can't open a raw device '"
+				<< file.m_filepath << "' when"
+				" --innodb-read-only is set";
+
+			return(DB_ERROR);
+		}
+
+		/* Fall through */
+
+	case SRV_NOT_RAW:
+		err = file.open_or_create(
+			m_ignore_read_only ? false : srv_read_only_mode);
+
+		if (err != DB_SUCCESS) {
+			return(err);
+		}
+		break;
+	}
+
+	switch (file.m_type) {
+	case SRV_NEW_RAW:
+		/* Set file size for new raw device. */
+		err = set_size(file);
+		break;
+
+	case SRV_NOT_RAW:
+		/* Check file size for existing file. */
+		err = check_size(file);
+		break;
+
+	case SRV_OLD_RAW:
+		err = DB_SUCCESS;
+		break;
+
+	}
+
+	if (err != DB_SUCCESS) {
+		file.close();
+	}
+
+	return(err);
+}
+
+#ifndef UNIV_HOTBACKUP
+/** Check the tablespace header for this tablespace.
+@param[out]	flushed_lsn	the value of FIL_PAGE_FILE_FLUSH_LSN
+@return DB_SUCCESS or error code */
+dberr_t
+SysTablespace::read_lsn_and_check_flags(lsn_t* flushed_lsn)
+{
+	dberr_t	err;
+
+	/* Only relevant for the system tablespace. */
+	ut_ad(space_id() == TRX_SYS_SPACE);
+
+	files_t::iterator it = m_files.begin();
+
+	ut_a(it->m_exists);
+
+	if (it->m_handle == OS_FILE_CLOSED) {
+
+		err = it->open_or_create(
+			m_ignore_read_only ?  false : srv_read_only_mode);
+
+		if (err != DB_SUCCESS) {
+			return(err);
+		}
+	}
+
+	err = it->read_first_page(
+		m_ignore_read_only ?  false : srv_read_only_mode);
+
+	m_crypt_info = it->m_crypt_info;
+
+	if (err != DB_SUCCESS) {
+		return(err);
+	}
+
+	ut_a(it->order() == 0);
+
+
+	buf_dblwr_init_or_load_pages(it->handle(), it->filepath());
+
+	/* Check the contents of the first page of the
+	first datafile. */
+	for (int retry = 0; retry < 2; ++retry) {
+
+		err = it->validate_first_page(flushed_lsn, false);
+
+		if (err != DB_SUCCESS
+		    && (retry == 1
+			|| it->restore_from_doublewrite(0) != DB_SUCCESS)) {
+
+			it->close();
+
+			return(err);
+		}
+	}
+
+	/* Make sure the tablespace space ID matches the
+	space ID on the first page of the first datafile. */
+	if (space_id() != it->m_space_id) {
+
+		ib::error()
+			<< "The " << name() << " data file '" << it->name()
+			<< "' has the wrong space ID. It should be "
+			<< space_id() << ", but " << it->m_space_id
+			<< " was found";
+
+		it->close();
+
+		return(err);
+	}
+
+	it->close();
+
+	return(DB_SUCCESS);
+}
+#endif /* !UNIV_HOTBACKUP */
+/** Check if a file can be opened in the correct mode.
+@param[in]	file	data file object
+@param[out]	reason	exact reason if file_status check failed.
+@return DB_SUCCESS or error code. */
+dberr_t
+SysTablespace::check_file_status(
+	const Datafile&		file,
+	file_status_t&		reason)
+{
+	os_file_stat_t	stat;
+
+	memset(&stat, 0x0, sizeof(stat));
+
+	dberr_t	err = os_file_get_status(
+		file.m_filepath, &stat, true,
+		m_ignore_read_only ? false : srv_read_only_mode);
+
+	reason = FILE_STATUS_VOID;
+	/* File exists but we can't read the rw-permission settings. */
+	switch (err) {
+	case DB_FAIL:
+		ib::error() << "os_file_get_status() failed on '"
+			<< file.filepath()
+			<< "'. Can't determine file permissions";
+		err = DB_ERROR;
+		reason = FILE_STATUS_RW_PERMISSION_ERROR;
+		break;
+
+	case DB_SUCCESS:
+
+		/* Note: stat.rw_perm is only valid for "regular" files */
+
+		if (stat.type == OS_FILE_TYPE_FILE) {
+
+			if (!stat.rw_perm) {
+				const char	*p = (!srv_read_only_mode
+						      || m_ignore_read_only)
+						     ? "writable"
+						     : "readable";
+
+				ib::error() << "The " << name() << " data file"
+					<< " '" << file.name() << "' must be "
+					<< p;
+
+				err = DB_ERROR;
+				reason = FILE_STATUS_READ_WRITE_ERROR;
+			}
+
+		} else {
+			/* Not a regular file, bail out. */
+			ib::error() << "The " << name() << " data file '"
+				<< file.name() << "' is not a regular"
+				" InnoDB data file.";
+
+			err = DB_ERROR;
+			reason = FILE_STATUS_NOT_REGULAR_FILE_ERROR;
+		}
+		break;
+
+	case DB_NOT_FOUND:
+		break;
+
+	default:
+		ut_ad(0);
+	}
+
+	return(err);
+}
+
+/** Note that the data file was not found.
+@param[in]	file		data file object
+@param[out]	create_new_db	true if a new instance to be created
+@return DB_SUCESS or error code */
+dberr_t
+SysTablespace::file_not_found(
+	Datafile&	file,
+	bool*	create_new_db)
+{
+	file.m_exists = false;
+
+	if (srv_read_only_mode && !m_ignore_read_only) {
+		ib::error() << "Can't create file '" << file.filepath()
+			<< "' when --innodb-read-only is set";
+
+		return(DB_ERROR);
+
+	} else if (&file == &m_files.front()) {
+
+		/* First data file. */
+		ut_a(!*create_new_db);
+		*create_new_db = TRUE;
+
+		if (space_id() == TRX_SYS_SPACE) {
+			ib::info() << "The first " << name() << " data file '"
+				<< file.name() << "' did not exist."
+				" A new tablespace will be created!";
+		}
+
+	} else {
+		ib::info() << "Need to create a new " << name()
+			<< " data file '" << file.name() << "'.";
+	}
+
+	/* Set the file create mode. */
+	switch (file.m_type) {
+	case SRV_NOT_RAW:
+		file.set_open_flags(OS_FILE_CREATE);
+		break;
+
+	case SRV_NEW_RAW:
+	case SRV_OLD_RAW:
+		file.set_open_flags(OS_FILE_OPEN_RAW);
+		break;
+	}
+
+	return(DB_SUCCESS);
+}
+
+/** Note that the data file was found.
+@param[in,out]	file	data file object
+@return true if a new instance to be created */
+bool
+SysTablespace::file_found(
+	Datafile&	file)
+{
+	/* Note that the file exists and can be opened
+	in the appropriate mode. */
+	file.m_exists = true;
+
+	/* Set the file open mode */
+	switch (file.m_type) {
+	case SRV_NOT_RAW:
+		file.set_open_flags(
+			&file == &m_files.front()
+			? OS_FILE_OPEN_RETRY : OS_FILE_OPEN);
+		break;
+
+	case SRV_NEW_RAW:
+	case SRV_OLD_RAW:
+		file.set_open_flags(OS_FILE_OPEN_RAW);
+		break;
+	}
+
+	/* Need to create the system tablespace for new raw device. */
+	return(file.m_type == SRV_NEW_RAW);
+}
+#ifndef UNIV_HOTBACKUP
+/** Check the data file specification.
+@param[out] create_new_db	true if a new database is to be created
+@param[in] min_expected_size	Minimum expected tablespace size in bytes
+@return DB_SUCCESS if all OK else error code */
+dberr_t
+SysTablespace::check_file_spec(
+	bool*	create_new_db,
+	ulint	min_expected_size)
+{
+	*create_new_db = FALSE;
+
+	if (m_files.size() >= 1000) {
+		ib::error() << "There must be < 1000 data files in "
+			<< name() << " but " << m_files.size() << " have been"
+			" defined.";
+
+		return(DB_ERROR);
+	}
+
+	if (get_sum_of_sizes() < min_expected_size / UNIV_PAGE_SIZE) {
+
+		ib::error() << "Tablespace size must be at least "
+			<< min_expected_size / (1024 * 1024) << " MB";
+
+		return(DB_ERROR);
+	}
+
+	dberr_t	err = DB_SUCCESS;
+
+	ut_a(!m_files.empty());
+
+	/* If there is more than one data file and the last data file
+	doesn't exist, that is OK. We allow adding of new data files. */
+
+	files_t::iterator	begin = m_files.begin();
+	files_t::iterator	end = m_files.end();
+
+	for (files_t::iterator it = begin; it != end; ++it) {
+
+		file_status_t reason_if_failed;
+		err = check_file_status(*it, reason_if_failed);
+
+		if (err == DB_NOT_FOUND) {
+
+			err = file_not_found(*it, create_new_db);
+
+			if (err != DB_SUCCESS) {
+				break;
+			}
+
+		} else if (err != DB_SUCCESS) {
+			if (reason_if_failed == FILE_STATUS_READ_WRITE_ERROR) {
+				const char*	p = (!srv_read_only_mode
+						     || m_ignore_read_only)
+						    ? "writable" : "readable";
+				ib::error() << "The " << name() << " data file"
+					<< " '" << it->name() << "' must be "
+					<< p;
+			}
+
+			ut_a(err != DB_FAIL);
+			break;
+
+		} else if (*create_new_db) {
+			ib::error() << "The " << name() << " data file '"
+				<< begin->m_name << "' was not found but"
+				" one of the other data files '" << it->m_name
+				<< "' exists.";
+
+			err = DB_ERROR;
+			break;
+
+		} else {
+			*create_new_db = file_found(*it);
+		}
+	}
+
+	/* We assume doublewirte blocks in the first data file. */
+	if (err == DB_SUCCESS && *create_new_db
+	    && begin->m_size < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 3) {
+		ib::error() << "The " << name() << " data file "
+			<< "'" << begin->name() << "' must be at least "
+			<< TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 3 * UNIV_PAGE_SIZE
+			/ (1024 * 1024) << " MB";
+
+		err = DB_ERROR;
+	}
+
+	return(err);
+}
+
+/** Open or create the data files
+@param[in]  is_temp		whether this is a temporary tablespace
+@param[in]  create_new_db	whether we are creating a new database
+@param[out] sum_new_sizes	sum of sizes of the new files added
+@param[out] flush_lsn		FIL_PAGE_FILE_FLUSH_LSN of first file
+@return DB_SUCCESS or error code */
+dberr_t
+SysTablespace::open_or_create(
+	bool	is_temp,
+	bool	create_new_db,
+	ulint*	sum_new_sizes,
+	lsn_t*	flush_lsn)
+{
+	dberr_t		err	= DB_SUCCESS;
+	fil_space_t*	space	= NULL;
+
+	ut_ad(!m_files.empty());
+
+	if (sum_new_sizes) {
+		*sum_new_sizes = 0;
+	}
+
+	files_t::iterator	begin = m_files.begin();
+	files_t::iterator	end = m_files.end();
+
+	ut_ad(begin->order() == 0);
+
+	for (files_t::iterator it = begin; it != end; ++it) {
+
+		if (it->m_exists) {
+			err = open_file(*it);
+
+			/* For new raw device increment new size. */
+			if (sum_new_sizes && it->m_type == SRV_NEW_RAW) {
+
+				*sum_new_sizes += it->m_size;
+			}
+
+		} else {
+			err = create_file(*it);
+
+			if (sum_new_sizes) {
+				*sum_new_sizes += it->m_size;
+			}
+
+			/* Set the correct open flags now that we have
+			successfully created the file. */
+			if (err == DB_SUCCESS) {
+				/* We ignore new_db OUT parameter here
+				as the information is known at this stage */
+				file_found(*it);
+			}
+		}
+
+		if (err != DB_SUCCESS) {
+			return(err);
+		}
+
+#if !defined(NO_FALLOCATE) && defined(UNIV_LINUX)
+		/* Note: This should really be per node and not per
+		tablespace because a tablespace can contain multiple
+		files (nodes). The implication is that all files of
+		the tablespace should be on the same medium. */
+
+		if (fil_fusionio_enable_atomic_write(it->m_handle)) {
+
+			if (srv_use_doublewrite_buf) {
+				ib::info() << "FusionIO atomic IO enabled,"
+					" disabling the double write buffer";
+
+				srv_use_doublewrite_buf = false;
+			}
+
+			it->m_atomic_write = true;
+		} else {
+			it->m_atomic_write = false;
+		}
+#else
+		it->m_atomic_write = false;
+#endif /* !NO_FALLOCATE && UNIV_LINUX*/
+	}
+
+	if (!create_new_db && flush_lsn) {
+		/* Validate the header page in the first datafile
+		and read LSNs fom the others. */
+		err = read_lsn_and_check_flags(flush_lsn);
+		if (err != DB_SUCCESS) {
+			return(err);
+		}
+	}
+
+	/* Close the curent handles, add space and file info to the
+	fil_system cache and the Data Dictionary, and re-open them
+	in file_system cache so that they stay open until shutdown. */
+	ulint	node_counter = 0;
+	for (files_t::iterator it = begin; it != end; ++it) {
+		it->close();
+		it->m_exists = true;
+
+		if (it == begin) {
+			/* First data file. */
+
+			/* Create the tablespace entry for the multi-file
+			tablespace in the tablespace manager. */
+
+			if (!m_crypt_info) {
+				/* Create default crypt info for system
+				tablespace if it does not yet exists. */
+				m_crypt_info = fil_space_create_crypt_data(
+					FIL_SPACE_ENCRYPTION_DEFAULT,
+					FIL_DEFAULT_ENCRYPTION_KEY);
+			}
+
+			space = fil_space_create(
+				name(), space_id(), flags(), is_temp
+				? FIL_TYPE_TEMPORARY : FIL_TYPE_TABLESPACE, m_crypt_info);
+		}
+
+		ut_a(fil_validate());
+
+		ulint	max_size = (++node_counter == m_files.size()
+				    ? (m_last_file_size_max == 0
+				       ? ULINT_MAX
+				       : m_last_file_size_max)
+				    : it->m_size);
+
+		/* Add the datafile to the fil_system cache. */
+		if (!fil_node_create(
+			    it->m_filepath, it->m_size,
+			    space, it->m_type != SRV_NOT_RAW,
+			    it->m_atomic_write, max_size)) {
+
+			err = DB_ERROR;
+			break;
+		}
+	}
+
+	return(err);
+}
+#endif /* UNIV_HOTBACKUP */
+/** Normalize the file size, convert from megabytes to number of pages. */
+void
+SysTablespace::normalize()
+{
+	files_t::iterator	end = m_files.end();
+
+	for (files_t::iterator it = m_files.begin(); it != end; ++it) {
+
+		it->m_size *= (1024 * 1024) / UNIV_PAGE_SIZE;
+	}
+
+	m_last_file_size_max *= (1024 * 1024) / UNIV_PAGE_SIZE;
+}
+
+
+/**
+@return next increment size */
+ulint
+SysTablespace::get_increment() const
+{
+	ulint	increment;
+
+	if (m_last_file_size_max == 0) {
+		increment = get_autoextend_increment();
+	} else {
+
+		if (!is_valid_size()) {
+			ib::error() << "The last data file in " << name()
+				<< " has a size of " << last_file_size()
+				<< " but the max size allowed is "
+				<< m_last_file_size_max;
+		}
+
+		increment = m_last_file_size_max - last_file_size();
+	}
+
+	if (increment > get_autoextend_increment()) {
+		increment = get_autoextend_increment();
+	}
+
+	return(increment);
+}
+
+
+/**
+@return true if configured to use raw devices */
+bool
+SysTablespace::has_raw_device()
+{
+	files_t::iterator	end = m_files.end();
+
+	for (files_t::iterator it = m_files.begin(); it != end; ++it) {
+
+		if (it->is_raw_device()) {
+			return(true);
+		}
+	}
+
+	return(false);
+}
diff --git a/storage/innobase/fts/fts0ast.cc b/storage/innobase/fts/fts0ast.cc
index 030b972440f..8c542aab289 100644
--- a/storage/innobase/fts/fts0ast.cc
+++ b/storage/innobase/fts/fts0ast.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2007, 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2016, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -23,7 +23,8 @@ Full Text Search parser helper file.
 Created 2007/3/16 Sunny Bains.
 ***********************************************************************/
 
-#include "mem0mem.h"
+#include "ha_prototypes.h"
+
 #include "fts0ast.h"
 #include "fts0pars.h"
 #include "fts0fts.h"
@@ -49,8 +50,7 @@ fts_ast_node_create(void)
 {
 	fts_ast_node_t*	node;
 
-	node = (fts_ast_node_t*) ut_malloc(sizeof(*node));
-	memset(node, 0x0, sizeof(*node));
+	node = (fts_ast_node_t*) ut_zalloc_nokey(sizeof(*node));
 
 	return(node);
 }
@@ -58,7 +58,6 @@ fts_ast_node_create(void)
 /******************************************************************//**
 Create a operator fts_ast_node_t.
 @return new node */
-UNIV_INTERN
 fts_ast_node_t*
 fts_ast_create_node_oper(
 /*=====================*/
@@ -79,7 +78,6 @@ fts_ast_create_node_oper(
 This function takes ownership of the ptr and is responsible
 for free'ing it
 @return new node or a node list with tokenized words */
-UNIV_INTERN
 fts_ast_node_t*
 fts_ast_create_node_term(
 /*=====================*/
@@ -96,14 +94,12 @@ fts_ast_create_node_term(
 	/* Scan the incoming string and filter out any "non-word" characters */
 	while (cur_pos < len) {
 		fts_string_t	str;
-		ulint		offset;
 		ulint		cur_len;
 
 		cur_len = innobase_mysql_fts_get_token(
 			state->charset,
 			reinterpret_cast<const byte*>(ptr->str) + cur_pos,
-			reinterpret_cast<const byte*>(ptr->str) + len,
-			&str, &offset);
+			reinterpret_cast<const byte*>(ptr->str) + len, &str);
 
 		if (cur_len == 0) {
 			break;
@@ -151,11 +147,41 @@ fts_ast_create_node_term(
 	return((node_list != NULL) ? node_list : first_node);
 }
 
+/******************************************************************//**
+Create an AST term node, makes a copy of ptr for plugin parser
+@return node */
+fts_ast_node_t*
+fts_ast_create_node_term_for_parser(
+/*================================*/
+	void*		arg,		/*!< in: ast state */
+	const char*	ptr,		/*!< in: term string */
+	const ulint	len)		/*!< in: term string length */
+{
+	fts_ast_node_t*		node = NULL;
+
+	/* '%' as first char is forbidden for LIKE in internal SQL parser;
+	'%' as last char is reserved for wildcard search;*/
+	if (len == 0 || len > FTS_MAX_WORD_LEN
+	    || ptr[0] == '%' || ptr[len - 1] == '%') {
+		return(NULL);
+	}
+
+	node = fts_ast_node_create();
+
+	node->type = FTS_AST_TERM;
+
+	node->term.ptr = fts_ast_string_create(
+			reinterpret_cast<const byte*>(ptr), len);
+
+	fts_ast_state_add_node(static_cast<fts_ast_state_t*>(arg), node);
+
+	return(node);
+}
+
 /******************************************************************//**
 This function takes ownership of the ptr and is responsible
 for free'ing it.
 @return new node */
-UNIV_INTERN
 fts_ast_node_t*
 fts_ast_create_node_text(
 /*=====================*/
@@ -193,11 +219,30 @@ fts_ast_create_node_text(
 	return(node);
 }
 
+/******************************************************************//**
+Create an AST phrase list node for plugin parser
+@return node */
+fts_ast_node_t*
+fts_ast_create_node_phrase_list(
+/*============================*/
+	void*		arg)			/*!< in: ast state */
+{
+	fts_ast_node_t*		node = fts_ast_node_create();
+
+	node->type = FTS_AST_PARSER_PHRASE_LIST;
+
+	node->text.distance = ULINT_UNDEFINED;
+	node->list.head = node->list.tail = NULL;
+
+	fts_ast_state_add_node(static_cast<fts_ast_state_t*>(arg), node);
+
+	return(node);
+}
+
 /******************************************************************//**
 This function takes ownership of the expr and is responsible
 for free'ing it.
 @return new node */
-UNIV_INTERN
 fts_ast_node_t*
 fts_ast_create_node_list(
 /*=====================*/
@@ -218,7 +263,6 @@ fts_ast_create_node_list(
 Create a sub-expression list node. This function takes ownership of
 expr and is responsible for deleting it.
 @return new node */
-UNIV_INTERN
 fts_ast_node_t*
 fts_ast_create_node_subexp_list(
 /*============================*/
@@ -244,7 +288,8 @@ fts_ast_free_list(
 	fts_ast_node_t*	node)			/*!< in: ast node to free */
 {
 	ut_a(node->type == FTS_AST_LIST
-	     || node->type == FTS_AST_SUBEXP_LIST);
+	     || node->type == FTS_AST_SUBEXP_LIST
+	     || node->type == FTS_AST_PARSER_PHRASE_LIST);
 
 	for (node = node->list.head;
 	     node != NULL;
@@ -257,7 +302,6 @@ fts_ast_free_list(
 /********************************************************************//**
 Free a fts_ast_node_t instance.
 @return next node to free */
-UNIV_INTERN
 fts_ast_node_t*
 fts_ast_free_node(
 /*==============*/
@@ -282,6 +326,7 @@ fts_ast_free_node(
 
 	case FTS_AST_LIST:
 	case FTS_AST_SUBEXP_LIST:
+	case FTS_AST_PARSER_PHRASE_LIST:
 		fts_ast_free_list(node);
 		node->list.head = node->list.tail = NULL;
 		break;
@@ -305,7 +350,6 @@ fts_ast_free_node(
 This AST takes ownership of the expr and is responsible
 for free'ing it.
 @return in param "list" */
-UNIV_INTERN
 fts_ast_node_t*
 fts_ast_add_node(
 /*=============*/
@@ -318,7 +362,8 @@ fts_ast_add_node(
 
 	ut_a(!elem->next);
 	ut_a(node->type == FTS_AST_LIST
-	     || node->type == FTS_AST_SUBEXP_LIST);
+	     || node->type == FTS_AST_SUBEXP_LIST
+	     || node->type == FTS_AST_PARSER_PHRASE_LIST);
 
 	if (!node->list.head) {
 		ut_a(!node->list.tail);
@@ -337,7 +382,6 @@ fts_ast_add_node(
 /******************************************************************//**
 For tracking node allocations, in case there is an error during
 parsing. */
-UNIV_INTERN
 void
 fts_ast_state_add_node(
 /*===================*/
@@ -356,7 +400,6 @@ fts_ast_state_add_node(
 
 /******************************************************************//**
 Set the wildcard attribute of a term. */
-UNIV_INTERN
 void
 fts_ast_term_set_wildcard(
 /*======================*/
@@ -381,9 +424,8 @@ fts_ast_term_set_wildcard(
 
 /******************************************************************//**
 Set the proximity attribute of a text node. */
-UNIV_INTERN
 void
-fts_ast_term_set_distance(
+fts_ast_text_set_distance(
 /*======================*/
 	fts_ast_node_t*	node,			/*!< in/out: text node */
 	ulint		distance)		/*!< in: the text proximity
@@ -401,7 +443,6 @@ fts_ast_term_set_distance(
 
 /******************************************************************//**
 Free node and expr allocations. */
-UNIV_INTERN
 void
 fts_ast_state_free(
 /*===============*/
@@ -429,13 +470,19 @@ fts_ast_state_free(
 }
 
 /******************************************************************//**
-Print an ast node. */
-UNIV_INTERN
+Print an ast node recursively. */
+static
 void
-fts_ast_node_print(
-/*===============*/
-	fts_ast_node_t*	node)			/*!< in: ast node to print */
+fts_ast_node_print_recursive(
+/*=========================*/
+	fts_ast_node_t*	node,			/*!< in: ast node to print */
+	ulint		level)			/*!< in: recursive level */
 {
+	/* Print alignment blank */
+	for (ulint i = 0; i < level; i++) {
+		printf("  ");
+	}
+
 	switch (node->type) {
 	case FTS_AST_TEXT:
 		printf("TEXT: ");
@@ -448,38 +495,83 @@ fts_ast_node_print(
 		break;
 
 	case FTS_AST_LIST:
-		printf("LIST: ");
-		node = node->list.head;
+		printf("LIST: \n");
 
-		while (node) {
-			fts_ast_node_print(node);
-			node = node->next;
+		for (node = node->list.head; node; node = node->next) {
+			fts_ast_node_print_recursive(node, level + 1);
 		}
 		break;
 
 	case FTS_AST_SUBEXP_LIST:
-		printf("SUBEXP_LIST: ");
-		node = node->list.head;
+		printf("SUBEXP_LIST: \n");
 
-		while (node) {
-			fts_ast_node_print(node);
-			node = node->next;
+		for (node = node->list.head; node; node = node->next) {
+			fts_ast_node_print_recursive(node, level + 1);
 		}
+		break;
+
 	case FTS_AST_OPER:
 		printf("OPER: %d\n", node->oper);
 		break;
 
+	case FTS_AST_PARSER_PHRASE_LIST:
+		printf("PARSER_PHRASE_LIST: \n");
+
+		for (node = node->list.head; node; node = node->next) {
+			fts_ast_node_print_recursive(node, level + 1);
+		}
+		break;
+
 	default:
 		ut_error;
 	}
 }
 
+/******************************************************************//**
+Print an ast node */
+void
+fts_ast_node_print(
+/*===============*/
+	fts_ast_node_t* node)		/*!< in: ast node to print */
+{
+	fts_ast_node_print_recursive(node, 0);
+}
+
+/** Check only union operation involved in the node
+@param[in]	node	ast node to check
+@return true if the node contains only union else false. */
+bool
+fts_ast_node_check_union(
+	fts_ast_node_t*	node)
+{
+	if (node->type == FTS_AST_LIST
+	    || node->type == FTS_AST_SUBEXP_LIST
+	    || node->type == FTS_AST_PARSER_PHRASE_LIST) {
+
+		for (node = node->list.head; node; node = node->next) {
+			if (!fts_ast_node_check_union(node)) {
+				return(false);
+			}
+		}
+
+	} else if (node->type == FTS_AST_OPER
+		   && (node->oper == FTS_IGNORE
+		       || node->oper == FTS_EXIST)) {
+
+		return(false);
+	} else if (node->type == FTS_AST_TEXT) {
+		/* Distance or phrase search query. */
+		return(false);
+	}
+
+	return(true);
+}
+
 /******************************************************************//**
 Traverse the AST - in-order traversal, except for the FTX_EXIST and FTS_IGNORE
 nodes, which will be ignored in the first pass of each level, and visited in a
 second and third pass after all other nodes in the same level are visited.
 @return DB_SUCCESS if all went well */
-UNIV_INTERN
 dberr_t
 fts_ast_visit(
 /*==========*/
@@ -531,7 +623,7 @@ fts_ast_visit(
 	     node && (error == DB_SUCCESS);
 	     node = node->next) {
 
-		switch(node->type) {
+		switch (node->type) {
 		case FTS_AST_LIST:
 			if (visit_pass != FTS_PASS_FIRST) {
 				break;
@@ -630,7 +722,6 @@ has one more byte than len
 @param[in] str		pointer to string
 @param[in] len		length of the string
 @return ast string with NUL-terminator */
-UNIV_INTERN
 fts_ast_string_t*
 fts_ast_string_create(
 	const byte*	str,
@@ -640,9 +731,10 @@ fts_ast_string_create(
 
 	ut_ad(len > 0);
 
-	ast_str = static_cast<fts_ast_string_t*>
-			(ut_malloc(sizeof(fts_ast_string_t)));
-	ast_str->str = static_cast<byte*>(ut_malloc(len + 1));
+	ast_str = static_cast<fts_ast_string_t*>(
+		ut_malloc_nokey(sizeof(fts_ast_string_t)));
+
+	ast_str->str = static_cast<byte*>(ut_malloc_nokey(len + 1));
 
 	ast_str->len = len;
 	memcpy(ast_str->str, str, len);
@@ -654,7 +746,6 @@ fts_ast_string_create(
 /**
 Free an ast string instance
 @param[in,out] ast_str		string to free */
-UNIV_INTERN
 void
 fts_ast_string_free(
 	fts_ast_string_t*	ast_str)
@@ -670,7 +761,6 @@ Translate ast string of type FTS_AST_NUMB to unsigned long by strtoul
 @param[in] str		string to translate
 @param[in] base		the base
 @return translated number */
-UNIV_INTERN
 ulint
 fts_ast_string_to_ul(
 	const fts_ast_string_t*	ast_str,
@@ -683,7 +773,6 @@ fts_ast_string_to_ul(
 /**
 Print the ast string
 @param[in] str		string to print */
-UNIV_INTERN
 void
 fts_ast_string_print(
 	const fts_ast_string_t*	ast_str)
@@ -720,6 +809,7 @@ fts_ast_oper_name_get(fts_ast_oper_t	oper)
 		return("FTS_EXIST_SKIP");
 	}
 	ut_ad(0);
+	return("FTS_UNKNOWN");
 }
 
 const char*
@@ -738,7 +828,10 @@ fts_ast_node_type_get(fts_ast_type_t	type)
 		return("FTS_AST_LIST");
 	case FTS_AST_SUBEXP_LIST:
 		return("FTS_AST_SUBEXP_LIST");
+	case FTS_AST_PARSER_PHRASE_LIST:
+		return("FTS_AST_PARSER_PHRASE_LIST");
 	}
 	ut_ad(0);
+	return("FTS_UNKNOWN");
 }
 #endif /* UNIV_DEBUG */
diff --git a/storage/innobase/fts/fts0blex.cc b/storage/innobase/fts/fts0blex.cc
index 2d71934fa0e..183e05edd04 100644
--- a/storage/innobase/fts/fts0blex.cc
+++ b/storage/innobase/fts/fts0blex.cc
@@ -479,9 +479,10 @@ this program; if not, write to the Free Software Foundation, Inc.,
 
 /* Required for reentrant parser */
 #define YY_DECL int fts_blexer(YYSTYPE* val, yyscan_t yyscanner)
+#define exit(A)   ut_error
 
 #define YY_NO_INPUT 1
-#line 484 "fts0blex.cc"
+#line 485 "fts0blex.cc"
 
 #define INITIAL 0
 
@@ -706,7 +707,7 @@ YY_DECL
 	register int yy_act;
 	struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
 
-#line 43 "fts0blex.l"
+#line 44 "fts0blex.l"
 
 
 #line 712 "fts0blex.cc"
@@ -790,12 +791,12 @@ do_action:	/* This label is used only to access EOF actions. */
 
 case 1:
 YY_RULE_SETUP
-#line 45 "fts0blex.l"
+#line 46 "fts0blex.l"
 /* Ignore whitespace */ ;
 	YY_BREAK
 case 2:
 YY_RULE_SETUP
-#line 47 "fts0blex.l"
+#line 48 "fts0blex.l"
 {
 	val->oper = fts0bget_text(yyscanner)[0];
 
@@ -804,7 +805,7 @@ YY_RULE_SETUP
 	YY_BREAK
 case 3:
 YY_RULE_SETUP
-#line 53 "fts0blex.l"
+#line 54 "fts0blex.l"
 {
 	val->token = fts_ast_string_create(reinterpret_cast<const byte*>(fts0bget_text(yyscanner)), fts0bget_leng(yyscanner));
 
@@ -813,7 +814,7 @@ YY_RULE_SETUP
 	YY_BREAK
 case 4:
 YY_RULE_SETUP
-#line 59 "fts0blex.l"
+#line 60 "fts0blex.l"
 {
 	val->token = fts_ast_string_create(reinterpret_cast<const byte*>(fts0bget_text(yyscanner)), fts0bget_leng(yyscanner));
 
@@ -822,7 +823,7 @@ YY_RULE_SETUP
 	YY_BREAK
 case 5:
 YY_RULE_SETUP
-#line 65 "fts0blex.l"
+#line 66 "fts0blex.l"
 {
 	val->token = fts_ast_string_create(reinterpret_cast<const byte*>(fts0bget_text(yyscanner)), fts0bget_leng(yyscanner));
 
@@ -832,12 +833,12 @@ YY_RULE_SETUP
 case 6:
 /* rule 6 can match eol */
 YY_RULE_SETUP
-#line 71 "fts0blex.l"
+#line 72 "fts0blex.l"
 
 	YY_BREAK
 case 7:
 YY_RULE_SETUP
-#line 73 "fts0blex.l"
+#line 74 "fts0blex.l"
 ECHO;
 	YY_BREAK
 #line 843 "fts0blex.cc"
@@ -1953,5 +1954,5 @@ void fts0bfree (void * ptr ,            yyscan_t yyscanner MY_ATTRIBUTE((unused)
 
 #define YYTABLES_NAME "yytables"
 
-#line 73 "fts0blex.l"
+#line 74 "fts0blex.l"
 
diff --git a/storage/innobase/fts/fts0blex.l b/storage/innobase/fts/fts0blex.l
index ae6e8ffaa48..ce61fc6b2d9 100644
--- a/storage/innobase/fts/fts0blex.l
+++ b/storage/innobase/fts/fts0blex.l
@@ -30,6 +30,7 @@ this program; if not, write to the Free Software Foundation, Inc.,
 
 /* Required for reentrant parser */
 #define YY_DECL int fts_blexer(YYSTYPE* val, yyscan_t yyscanner)
+#define exit(A)   ut_error
 
 %}
 
diff --git a/storage/innobase/fts/fts0config.cc b/storage/innobase/fts/fts0config.cc
index 5b4ae5c39f7..740ee87fe01 100644
--- a/storage/innobase/fts/fts0config.cc
+++ b/storage/innobase/fts/fts0config.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2007, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -28,7 +28,7 @@ Created 2007/5/9 Sunny Bains
 
 #include "fts0priv.h"
 
-#ifndef UNIV_NONINL
+#ifdef UNIV_NONINL
 #include "fts0types.ic"
 #include "fts0vlc.ic"
 #endif
@@ -69,7 +69,6 @@ fts_config_fetch_value(
 Get value from the config table. The caller must ensure that enough
 space is allocated for value to hold the column contents.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 fts_config_get_value(
 /*=================*/
@@ -85,6 +84,7 @@ fts_config_get_value(
 	que_t*		graph;
 	dberr_t		error;
 	ulint		name_len = strlen(name);
+	char		table_name[MAX_FULL_NAME_LEN];
 
 	info = pars_info_create();
 
@@ -100,12 +100,14 @@ fts_config_get_value(
 	pars_info_bind_varchar_literal(info, "name", (byte*) name, name_len);
 
 	fts_table->suffix = "CONFIG";
+	fts_get_table_name(fts_table, table_name);
+	pars_info_bind_id(info, true, "table_name", table_name);
 
 	graph = fts_parse_sql(
 		fts_table,
 		info,
 		"DECLARE FUNCTION my_func;\n"
-		"DECLARE CURSOR c IS SELECT value FROM \"%s\""
+		"DECLARE CURSOR c IS SELECT value FROM $table_name"
 		" WHERE key = :name;\n"
 		"BEGIN\n"
 		""
@@ -132,7 +134,6 @@ fts_config_get_value(
 /*********************************************************************//**
 Create the config table name for retrieving index specific value.
 @return index config parameter name */
-UNIV_INTERN
 char*
 fts_config_create_index_param_name(
 /*===============================*/
@@ -146,9 +147,9 @@ fts_config_create_index_param_name(
 	len = strlen(param);
 
 	/* Caller is responsible for deleting name. */
-	name = static_cast<char*>(ut_malloc(
+	name = static_cast<char*>(ut_malloc_nokey(
 		len + FTS_AUX_MIN_TABLE_ID_LENGTH + 2));
-	strcpy(name, param);
+	::strcpy(name, param);
 	name[len] = '_';
 
 	fts_write_object_id(index->id, name + len + 1,
@@ -163,7 +164,6 @@ Get value specific to an FTS index from the config table. The caller
 must ensure that enough space is allocated for value to hold the
 column contents.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 fts_config_get_index_value(
 /*=======================*/
@@ -194,7 +194,6 @@ fts_config_get_index_value(
 /******************************************************************//**
 Set the value in the config table for name.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 fts_config_set_value(
 /*=================*/
@@ -212,6 +211,7 @@ fts_config_set_value(
 	undo_no_t	undo_no;
 	undo_no_t	n_rows_updated;
 	ulint		name_len = strlen(name);
+	char		table_name[MAX_FULL_NAME_LEN];
 
 	info = pars_info_create();
 
@@ -220,10 +220,13 @@ fts_config_set_value(
 				       value->f_str, value->f_len);
 
 	fts_table->suffix = "CONFIG";
+	fts_get_table_name(fts_table, table_name);
+	pars_info_bind_id(info, true, "table_name", table_name);
 
 	graph = fts_parse_sql(
 		fts_table, info,
-		"BEGIN UPDATE \"%s\" SET value = :value WHERE key = :name;");
+		"BEGIN UPDATE $table_name SET value = :value"
+		" WHERE key = :name;");
 
 	trx->op_info = "setting FTS config value";
 
@@ -245,10 +248,13 @@ fts_config_set_value(
 		pars_info_bind_varchar_literal(
 			info, "value", value->f_str, value->f_len);
 
+		fts_get_table_name(fts_table, table_name);
+		pars_info_bind_id(info, true, "table_name", table_name);
+
 		graph = fts_parse_sql(
 			fts_table, info,
 			"BEGIN\n"
-			"INSERT INTO \"%s\" VALUES(:name, :value);");
+			"INSERT INTO $table_name VALUES(:name, :value);");
 
 		trx->op_info = "inserting FTS config value";
 
@@ -263,7 +269,6 @@ fts_config_set_value(
 /******************************************************************//**
 Set the value specific to an FTS index in the config table.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 fts_config_set_index_value(
 /*=======================*/
@@ -294,7 +299,6 @@ fts_config_set_index_value(
 /******************************************************************//**
 Get an ulint value from the config table.
 @return DB_SUCCESS if all OK else error code */
-UNIV_INTERN
 dberr_t
 fts_config_get_index_ulint(
 /*=======================*/
@@ -309,15 +313,14 @@ fts_config_get_index_ulint(
 	/* We set the length of value to the max bytes it can hold. This
 	information is used by the callback that reads the value.*/
 	value.f_len = FTS_MAX_CONFIG_VALUE_LEN;
-	value.f_str = static_cast<byte*>(ut_malloc(value.f_len + 1));
+	value.f_str = static_cast<byte*>(ut_malloc_nokey(value.f_len + 1));
 
 	error = fts_config_get_index_value(trx, index, name, &value);
 
 	if (UNIV_UNLIKELY(error != DB_SUCCESS)) {
-		ut_print_timestamp(stderr);
 
-		fprintf(stderr, "  InnoDB: Error: (%s) reading `%s'\n",
-			ut_strerr(error), name);
+		ib::error() << "(" << ut_strerr(error) << ") reading `"
+			<< name << "'";
 	} else {
 		*int_value = strtoul((char*) value.f_str, NULL, 10);
 	}
@@ -330,7 +333,6 @@ fts_config_get_index_ulint(
 /******************************************************************//**
 Set an ulint value in the config table.
 @return DB_SUCCESS if all OK else error code */
-UNIV_INTERN
 dberr_t
 fts_config_set_index_ulint(
 /*=======================*/
@@ -345,7 +347,7 @@ fts_config_set_index_ulint(
 	/* We set the length of value to the max bytes it can hold. This
 	information is used by the callback that reads the value.*/
 	value.f_len = FTS_MAX_CONFIG_VALUE_LEN;
-	value.f_str = static_cast<byte*>(ut_malloc(value.f_len + 1));
+	value.f_str = static_cast<byte*>(ut_malloc_nokey(value.f_len + 1));
 
 	// FIXME: Get rid of snprintf
 	ut_a(FTS_MAX_INT_LEN < FTS_MAX_CONFIG_VALUE_LEN);
@@ -356,10 +358,9 @@ fts_config_set_index_ulint(
 	error = fts_config_set_index_value(trx, index, name, &value);
 
 	if (UNIV_UNLIKELY(error != DB_SUCCESS)) {
-		ut_print_timestamp(stderr);
 
-		fprintf(stderr, "  InnoDB: Error: (%s) writing `%s'\n",
-			ut_strerr(error), name);
+		ib::error() << "(" << ut_strerr(error) << ") writing `"
+			<< name << "'";
 	}
 
 	ut_free(value.f_str);
@@ -370,7 +371,6 @@ fts_config_set_index_ulint(
 /******************************************************************//**
 Get an ulint value from the config table.
 @return DB_SUCCESS if all OK else error code */
-UNIV_INTERN
 dberr_t
 fts_config_get_ulint(
 /*=================*/
@@ -386,15 +386,13 @@ fts_config_get_ulint(
 	/* We set the length of value to the max bytes it can hold. This
 	information is used by the callback that reads the value.*/
 	value.f_len = FTS_MAX_CONFIG_VALUE_LEN;
-	value.f_str = static_cast<byte*>(ut_malloc(value.f_len + 1));
+	value.f_str = static_cast<byte*>(ut_malloc_nokey(value.f_len + 1));
 
 	error = fts_config_get_value(trx, fts_table, name, &value);
 
 	if (UNIV_UNLIKELY(error != DB_SUCCESS)) {
-		ut_print_timestamp(stderr);
-
-		fprintf(stderr, "  InnoDB: Error: (%s) reading `%s'\n",
-			ut_strerr(error), name);
+		ib::error() <<  "(" << ut_strerr(error) << ") reading `"
+			<< name << "'";
 	} else {
 		*int_value = strtoul((char*) value.f_str, NULL, 10);
 	}
@@ -407,7 +405,6 @@ fts_config_get_ulint(
 /******************************************************************//**
 Set an ulint value in the config table.
 @return DB_SUCCESS if all OK else error code */
-UNIV_INTERN
 dberr_t
 fts_config_set_ulint(
 /*=================*/
@@ -423,21 +420,18 @@ fts_config_set_ulint(
 	/* We set the length of value to the max bytes it can hold. This
 	information is used by the callback that reads the value.*/
 	value.f_len = FTS_MAX_CONFIG_VALUE_LEN;
-	value.f_str = static_cast<byte*>(ut_malloc(value.f_len + 1));
+	value.f_str = static_cast<byte*>(ut_malloc_nokey(value.f_len + 1));
 
-	// FIXME: Get rid of snprintf
 	ut_a(FTS_MAX_INT_LEN < FTS_MAX_CONFIG_VALUE_LEN);
 
-	value.f_len = snprintf(
+	value.f_len = my_snprintf(
 		(char*) value.f_str, FTS_MAX_INT_LEN, "%lu", int_value);
 
 	error = fts_config_set_value(trx, fts_table, name, &value);
 
 	if (UNIV_UNLIKELY(error != DB_SUCCESS)) {
-		ut_print_timestamp(stderr);
-
-		fprintf(stderr, "  InnoDB: Error: (%s) writing `%s'\n",
-			ut_strerr(error), name);
+		ib::error() <<  "(" << ut_strerr(error) << ") writing `"
+			<< name << "'";
 	}
 
 	ut_free(value.f_str);
@@ -448,7 +442,6 @@ fts_config_set_ulint(
 /******************************************************************//**
 Increment the value in the config table for column name.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 fts_config_increment_value(
 /*=======================*/
@@ -465,11 +458,12 @@ fts_config_increment_value(
 	que_t*		graph = NULL;
 	ulint		name_len = strlen(name);
 	pars_info_t*	info = pars_info_create();
+	char		table_name[MAX_FULL_NAME_LEN];
 
 	/* We set the length of value to the max bytes it can hold. This
 	information is used by the callback that reads the value.*/
 	value.f_len = FTS_MAX_CONFIG_VALUE_LEN;
-	value.f_str = static_cast<byte*>(ut_malloc(value.f_len + 1));
+	value.f_str = static_cast<byte*>(ut_malloc_nokey(value.f_len + 1));
 
 	*value.f_str = '\0';
 
@@ -479,11 +473,13 @@ fts_config_increment_value(
 		info, "my_func", fts_config_fetch_value, &value);
 
 	fts_table->suffix = "CONFIG";
+	fts_get_table_name(fts_table, table_name);
+	pars_info_bind_id(info, true, "config_table", table_name);
 
 	graph = fts_parse_sql(
 		fts_table, info,
 		"DECLARE FUNCTION my_func;\n"
-		"DECLARE CURSOR c IS SELECT value FROM \"%s\""
+		"DECLARE CURSOR c IS SELECT value FROM $config_table"
 		" WHERE key = :name FOR UPDATE;\n"
 		"BEGIN\n"
 		""
@@ -511,8 +507,7 @@ fts_config_increment_value(
 
 		ut_a(FTS_MAX_CONFIG_VALUE_LEN > FTS_MAX_INT_LEN);
 
-		// FIXME: Get rid of snprintf
-		value.f_len = snprintf(
+		value.f_len = my_snprintf(
 			(char*) value.f_str, FTS_MAX_INT_LEN, "%lu", int_value);
 
 		fts_config_set_value(trx, fts_table, name, &value);
@@ -520,10 +515,8 @@ fts_config_increment_value(
 
 	if (UNIV_UNLIKELY(error != DB_SUCCESS)) {
 
-		ut_print_timestamp(stderr);
-
-		fprintf(stderr, "  InnoDB: Error: (%s) "
-			"while incrementing %s.\n", ut_strerr(error), name);
+		ib::error() << "(" << ut_strerr(error) << ") while"
+			" incrementing " << name << ".";
 	}
 
 	ut_free(value.f_str);
@@ -534,7 +527,6 @@ fts_config_increment_value(
 /******************************************************************//**
 Increment the per index value in the config table for column name.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 fts_config_increment_index_value(
 /*=============================*/
diff --git a/storage/innobase/fts/fts0fts.cc b/storage/innobase/fts/fts0fts.cc
index 1457417d5dc..3451607a37e 100644
--- a/storage/innobase/fts/fts0fts.cc
+++ b/storage/innobase/fts/fts0fts.cc
@@ -21,26 +21,26 @@ this program; if not, write to the Free Software Foundation, Inc.,
 Full Text Search interface
 ***********************************************************************/
 
+#include "ha_prototypes.h"
+
 #include "trx0roll.h"
 #include "row0mysql.h"
 #include "row0upd.h"
 #include "dict0types.h"
 #include "row0sel.h"
-
 #include "fts0fts.h"
 #include "fts0priv.h"
 #include "fts0types.h"
-
 #include "fts0types.ic"
 #include "fts0vlc.ic"
+#include "fts0plugin.h"
 #include "dict0priv.h"
 #include "dict0stats.h"
 #include "btr0pcur.h"
-#include <vector>
+#include "sync0sync.h"
+#include "ut0new.h"
 
-#include "ha_prototypes.h"
-
-#define FTS_MAX_ID_LEN	32
+static const ulint FTS_MAX_ID_LEN = 32;
 
 /** Column name from the FTS config table */
 #define FTS_MAX_CACHE_SIZE_IN_MB	"cache_size_in_mb"
@@ -54,60 +54,44 @@ by looking up the key word in the obsolete table names */
 
 /** This is maximum FTS cache for each table and would be
 a configurable variable */
-UNIV_INTERN ulong	fts_max_cache_size;
+ulong	fts_max_cache_size;
 
 /** Whether the total memory used for FTS cache is exhausted, and we will
 need a sync to free some memory */
-UNIV_INTERN bool       fts_need_sync = false;
+bool	fts_need_sync = false;
 
 /** Variable specifying the total memory allocated for FTS cache */
-UNIV_INTERN ulong      fts_max_total_cache_size;
+ulong	fts_max_total_cache_size;
 
 /** This is FTS result cache limit for each query and would be
 a configurable variable */
-UNIV_INTERN ulong	fts_result_cache_limit;
+ulong	fts_result_cache_limit;
 
 /** Variable specifying the maximum FTS max token size */
-UNIV_INTERN ulong	fts_max_token_size;
+ulong	fts_max_token_size;
 
 /** Variable specifying the minimum FTS max token size */
-UNIV_INTERN ulong	fts_min_token_size;
+ulong	fts_min_token_size;
 
 
 // FIXME: testing
 ib_time_t elapsed_time = 0;
 ulint n_nodes = 0;
 
-/** Error condition reported by fts_utf8_decode() */
-const ulint UTF8_ERROR = 0xFFFFFFFF;
-
 #ifdef FTS_CACHE_SIZE_DEBUG
 /** The cache size permissible lower limit (1K) */
 static const ulint FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB = 1;
 
 /** The cache size permissible upper limit (1G) */
 static const ulint FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB = 1024;
-#endif /* FTS_CACHE_SIZE_DEBUG */
+#endif
 
 /** Time to sleep after DEADLOCK error before retrying operation. */
 static const ulint FTS_DEADLOCK_RETRY_WAIT = 100000;
 
-#ifdef UNIV_PFS_RWLOCK
-UNIV_INTERN mysql_pfs_key_t	fts_cache_rw_lock_key;
-UNIV_INTERN mysql_pfs_key_t	fts_cache_init_rw_lock_key;
-#endif /* UNIV_PFS_RWLOCK */
-
-#ifdef UNIV_PFS_MUTEX
-UNIV_INTERN mysql_pfs_key_t	fts_delete_mutex_key;
-UNIV_INTERN mysql_pfs_key_t	fts_optimize_mutex_key;
-UNIV_INTERN mysql_pfs_key_t	fts_bg_threads_mutex_key;
-UNIV_INTERN mysql_pfs_key_t	fts_doc_id_mutex_key;
-UNIV_INTERN mysql_pfs_key_t	fts_pll_tokenize_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
-
 /** variable to record innodb_fts_internal_tbl_name for information
 schema table INNODB_FTS_INSERTED etc. */
-UNIV_INTERN char* fts_internal_tbl_name		= NULL;
+char* fts_internal_tbl_name		= NULL;
 
 /** InnoDB default stopword list:
 There are different versions of stopwords, the stop words listed
@@ -164,64 +148,22 @@ struct fts_aux_table_t {
 	char*		name;		/*!< Name of the table */
 };
 
-/** SQL statements for creating the ancillary common FTS tables. */
-static const char* fts_create_common_tables_sql = {
-	"BEGIN\n"
-	""
-	"CREATE TABLE \"%s_DELETED\" (\n"
-	"  doc_id BIGINT UNSIGNED\n"
-	") COMPACT;\n"
-	"CREATE UNIQUE CLUSTERED INDEX IND ON \"%s_DELETED\"(doc_id);\n"
-	""
-	"CREATE TABLE \"%s_DELETED_CACHE\" (\n"
-	"  doc_id BIGINT UNSIGNED\n"
-	") COMPACT;\n"
-	"CREATE UNIQUE CLUSTERED INDEX IND "
-		"ON \"%s_DELETED_CACHE\"(doc_id);\n"
-	""
-	"CREATE TABLE \"%s_BEING_DELETED\" (\n"
-	"  doc_id BIGINT UNSIGNED\n"
-	") COMPACT;\n"
-	"CREATE UNIQUE CLUSTERED INDEX IND "
-		"ON \"%s_BEING_DELETED\"(doc_id);\n"
-	""
-	"CREATE TABLE \"%s_BEING_DELETED_CACHE\" (\n"
-	"  doc_id BIGINT UNSIGNED\n"
-	") COMPACT;\n"
-	"CREATE UNIQUE CLUSTERED INDEX IND "
-		"ON \"%s_BEING_DELETED_CACHE\"(doc_id);\n"
-	""
-	"CREATE TABLE \"%s_CONFIG\" (\n"
-	"  key CHAR(50),\n"
-	"  value CHAR(200) NOT NULL\n"
-	") COMPACT;\n"
-	"CREATE UNIQUE CLUSTERED INDEX IND ON \"%s_CONFIG\"(key);\n"
-};
-
 #ifdef FTS_DOC_STATS_DEBUG
 /** Template for creating the FTS auxiliary index specific tables. This is
 mainly designed for the statistics work in the future */
 static const char* fts_create_index_tables_sql = {
 	"BEGIN\n"
 	""
-	"CREATE TABLE \"%s_DOC_ID\" (\n"
+	"CREATE TABLE $doc_id_table (\n"
 	"   doc_id BIGINT UNSIGNED,\n"
 	"   word_count INTEGER UNSIGNED NOT NULL\n"
 	") COMPACT;\n"
-	"CREATE UNIQUE CLUSTERED INDEX IND ON \"%s_DOC_ID\"(doc_id);\n"
+	"CREATE UNIQUE CLUSTERED INDEX IND ON $doc_id_table(doc_id);\n"
 };
 #endif
 
-/** Template for creating the ancillary FTS tables word index tables. */
-static const char* fts_create_index_sql = {
-	"BEGIN\n"
-	""
-	"CREATE UNIQUE CLUSTERED INDEX FTS_INDEX_TABLE_IND "
-		"ON \"%s\"(word, first_doc_id);\n"
-};
-
 /** FTS auxiliary table suffixes that are common to all FT indexes. */
-static const char* fts_common_tables[] = {
+const char* fts_common_tables[] = {
 	"BEING_DELETED",
 	"BEING_DELETED_CACHE",
 	"CONFIG",
@@ -245,21 +187,27 @@ const  fts_index_selector_t fts_index_selector[] = {
 static const char* fts_config_table_insert_values_sql =
 	"BEGIN\n"
 	"\n"
-	"INSERT INTO \"%s\" VALUES('"
+	"INSERT INTO $config_table VALUES('"
 		FTS_MAX_CACHE_SIZE_IN_MB "', '256');\n"
 	""
-	"INSERT INTO \"%s\" VALUES('"
+	"INSERT INTO $config_table VALUES('"
 		FTS_OPTIMIZE_LIMIT_IN_SECS  "', '180');\n"
 	""
-	"INSERT INTO \"%s\" VALUES ('"
+	"INSERT INTO $config_table VALUES ('"
 		FTS_SYNCED_DOC_ID "', '0');\n"
 	""
-	"INSERT INTO \"%s\" VALUES ('"
+	"INSERT INTO $config_table VALUES ('"
 		FTS_TOTAL_DELETED_COUNT "', '0');\n"
 	"" /* Note: 0 == FTS_TABLE_STATE_RUNNING */
-	"INSERT INTO \"%s\" VALUES ('"
+	"INSERT INTO $config_table VALUES ('"
 		FTS_TABLE_STATE "', '0');\n";
 
+/** FTS tokenize parmameter for plugin parser */
+struct fts_tokenize_param_t {
+	fts_doc_t*	result_doc;	/*!< Result doc for tokens */
+	ulint		add_pos;	/*!< Added position for tokens */
+};
+
 /** Run SYNC on the table, i.e., write out data from the cache to the
 FTS auxiliary INDEX table and clear the cache at the end.
 @param[in,out]	sync		sync state
@@ -336,6 +284,39 @@ fts_update_sync_doc_id(
 	trx_t*			trx)		/*!< in: update trx, or NULL */
 	MY_ATTRIBUTE((nonnull(1)));
 
+/** Get a character set based on precise type.
+@param prtype precise type
+@return the corresponding character set */
+UNIV_INLINE
+CHARSET_INFO*
+fts_get_charset(ulint prtype)
+{
+#ifdef UNIV_DEBUG
+	switch (prtype & DATA_MYSQL_TYPE_MASK) {
+	case MYSQL_TYPE_BIT:
+	case MYSQL_TYPE_STRING:
+	case MYSQL_TYPE_VAR_STRING:
+	case MYSQL_TYPE_TINY_BLOB:
+	case MYSQL_TYPE_MEDIUM_BLOB:
+	case MYSQL_TYPE_BLOB:
+	case MYSQL_TYPE_LONG_BLOB:
+	case MYSQL_TYPE_VARCHAR:
+		break;
+	default:
+		ut_error;
+	}
+#endif /* UNIV_DEBUG */
+
+	uint cs_num = (uint) dtype_get_charset_coll(prtype);
+
+	if (CHARSET_INFO* cs = get_charset(cs_num, MYF(MY_WME))) {
+		return(cs);
+	}
+
+	ib::fatal() << "Unable to find charset-collation " << cs_num;
+	return(NULL);
+}
+
 /****************************************************************//**
 This function loads the default InnoDB stopword list */
 static
@@ -353,9 +334,9 @@ fts_load_default_stopword(
 	heap = static_cast<mem_heap_t*>(allocator->arg);
 
 	if (!stopword_info->cached_stopword) {
-		/* For default stopword, we always use fts_utf8_string_cmp() */
-		stopword_info->cached_stopword = rbt_create(
-			sizeof(fts_tokenizer_word_t), fts_utf8_string_cmp);
+		stopword_info->cached_stopword = rbt_create_arg_cmp(
+			sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
+			&my_charset_latin1);
 	}
 
 	stop_words = stopword_info->cached_stopword;
@@ -375,7 +356,7 @@ fts_load_default_stopword(
 		str.f_len = ut_strlen(word);
 		str.f_str = reinterpret_cast<byte*>(word);
 
-		fts_utf8_string_dup(&new_word.text, &str, heap);
+		fts_string_dup(&new_word.text, &str, heap);
 
 		rbt_insert(stop_words, &new_word, &new_word);
 	}
@@ -496,7 +477,7 @@ fts_load_user_stopword(
 		info,
 		"DECLARE FUNCTION my_func;\n"
 		"DECLARE CURSOR c IS"
-		" SELECT value "
+		" SELECT value"
 		" FROM $table_stopword;\n"
 		"BEGIN\n"
 		"\n"
@@ -520,18 +501,15 @@ fts_load_user_stopword(
 
 			fts_sql_rollback(trx);
 
-			ut_print_timestamp(stderr);
-
 			if (error == DB_LOCK_WAIT_TIMEOUT) {
-				fprintf(stderr, "  InnoDB: Warning: lock wait "
-					"timeout reading user stopword table. "
-					"Retrying!\n");
+				ib::warn() << "Lock wait timeout reading user"
+					" stopword table. Retrying!";
 
 				trx->error_state = DB_SUCCESS;
 			} else {
-				fprintf(stderr, "  InnoDB: Error '%s' "
-					"while reading user stopword table.\n",
-					ut_strerr(error));
+				ib::error() << "Error '" << ut_strerr(error)
+					<< "' while reading user stopword"
+					" table.";
 				ret = FALSE;
 				break;
 			}
@@ -571,7 +549,7 @@ fts_index_cache_init(
 	index_cache->doc_stats = ib_vector_create(
 		allocator, sizeof(fts_doc_stats_t), 4);
 
-	for (i = 0; fts_index_selector[i].value; ++i) {
+	for (i = 0; i < FTS_NUM_AUX_INDEX; ++i) {
 		ut_a(index_cache->ins_graph[i] == NULL);
 		ut_a(index_cache->sel_graph[i] == NULL);
 	}
@@ -579,7 +557,6 @@ fts_index_cache_init(
 
 /*********************************************************************//**
 Initialize FTS cache. */
-UNIV_INTERN
 void
 fts_cache_init(
 /*===========*/
@@ -612,7 +589,6 @@ fts_cache_init(
 
 /****************************************************************//**
 Create a FTS cache. */
-UNIV_INTERN
 fts_cache_t*
 fts_cache_create(
 /*=============*/
@@ -634,15 +610,11 @@ fts_cache_create(
 		fts_cache_init_rw_lock_key, &cache->init_lock,
 		SYNC_FTS_CACHE_INIT);
 
-	mutex_create(
-		fts_delete_mutex_key, &cache->deleted_lock, SYNC_FTS_OPTIMIZE);
+	mutex_create(LATCH_ID_FTS_DELETE, &cache->deleted_lock);
 
-	mutex_create(
-		fts_optimize_mutex_key, &cache->optimize_lock,
-		SYNC_FTS_OPTIMIZE);
+	mutex_create(LATCH_ID_FTS_OPTIMIZE, &cache->optimize_lock);
 
-	mutex_create(
-		fts_doc_id_mutex_key, &cache->doc_id_lock, SYNC_FTS_OPTIMIZE);
+	mutex_create(LATCH_ID_FTS_DOC_ID, &cache->doc_id_lock);
 
 	/* This is the heap used to create the cache itself. */
 	cache->self_heap = ib_heap_allocator_create(heap);
@@ -651,13 +623,11 @@ fts_cache_create(
 	cache->sync_heap = ib_heap_allocator_create(heap);
 	cache->sync_heap->arg = NULL;
 
-	fts_need_sync = false;
-
 	cache->sync = static_cast<fts_sync_t*>(
 		mem_heap_zalloc(heap, sizeof(fts_sync_t)));
 
 	cache->sync->table = table;
-	cache->sync->event = os_event_create();
+	cache->sync->event = os_event_create(0);
 
 	/* Create the index cache vector that will hold the inverted indexes. */
 	cache->indexes = ib_vector_create(
@@ -677,7 +647,6 @@ fts_cache_create(
 
 /*******************************************************************//**
 Add a newly create index into FTS cache */
-UNIV_INTERN
 void
 fts_add_index(
 /*==========*/
@@ -716,9 +685,8 @@ fts_reset_get_doc(
 	fts_get_doc_t*  get_doc;
 	ulint		i;
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_EX));
-#endif
+	ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_X));
+
 	ib_vector_reset(cache->get_docs);
 
 	for (i = 0; i < ib_vector_size(cache->indexes); i++) {
@@ -793,7 +761,6 @@ fts_in_index_cache(
 Check indexes in the fts->indexes is also present in index cache and
 table->indexes list
 @return TRUE if all indexes match */
-UNIV_INTERN
 ibool
 fts_check_cached_index(
 /*===================*/
@@ -829,7 +796,6 @@ fts_check_cached_index(
 /*******************************************************************//**
 Drop auxiliary tables related to an FTS index
 @return DB_SUCCESS or error number */
-UNIV_INTERN
 dberr_t
 fts_drop_index(
 /*===========*/
@@ -912,7 +878,6 @@ fts_drop_index(
 /****************************************************************//**
 Free the query graph but check whether dict_sys->mutex is already
 held */
-UNIV_INTERN
 void
 fts_que_graph_free_check_lock(
 /*==========================*/
@@ -949,7 +914,6 @@ fts_que_graph_free_check_lock(
 
 /****************************************************************//**
 Create an FTS index cache. */
-UNIV_INTERN
 CHARSET_INFO*
 fts_index_get_charset(
 /*==================*/
@@ -962,9 +926,7 @@ fts_index_get_charset(
 	field = dict_index_get_nth_field(index, 0);
 	prtype = field->col->prtype;
 
-	charset = innobase_get_fts_charset(
-		(int) (prtype & DATA_MYSQL_TYPE_MASK),
-		(uint) dtype_get_charset_coll(prtype));
+	charset = fts_get_charset(prtype);
 
 #ifdef FTS_DEBUG
 	/* Set up charset info for this index. Please note all
@@ -975,9 +937,7 @@ fts_index_get_charset(
 		field = dict_index_get_nth_field(index, i);
 		prtype = field->col->prtype;
 
-		fld_charset = innobase_get_fts_charset(
-			(int)(prtype & DATA_MYSQL_TYPE_MASK),
-			(uint) dtype_get_charset_coll(prtype));
+		fld_charset = fts_get_charset(prtype);
 
 		/* All FTS columns should have the same charset */
 		if (charset) {
@@ -994,7 +954,6 @@ fts_index_get_charset(
 /****************************************************************//**
 Create an FTS index cache.
 @return Index Cache */
-UNIV_INTERN
 fts_index_cache_t*
 fts_cache_index_cache_create(
 /*=========================*/
@@ -1007,9 +966,7 @@ fts_cache_index_cache_create(
 
 	ut_a(cache != NULL);
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_EX));
-#endif
+	ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_X));
 
 	/* Must not already exist in the cache vector. */
 	ut_a(fts_find_index_cache(cache, index) == NULL);
@@ -1023,7 +980,7 @@ fts_cache_index_cache_create(
 
 	index_cache->charset = fts_index_get_charset(index);
 
-	n_bytes = sizeof(que_t*) * sizeof(fts_index_selector);
+	n_bytes = sizeof(que_t*) * FTS_NUM_AUX_INDEX;
 
 	index_cache->ins_graph = static_cast<que_t**>(
 		mem_heap_zalloc(static_cast<mem_heap_t*>(
@@ -1079,7 +1036,6 @@ fts_words_free(
 
 /** Clear cache.
 @param[in,out]	cache	fts cache */
-UNIV_INTERN
 void
 fts_cache_clear(
 	fts_cache_t*	cache)
@@ -1099,7 +1055,7 @@ fts_cache_clear(
 
 		index_cache->words = NULL;
 
-		for (j = 0; fts_index_selector[j].value; ++j) {
+		for (j = 0; j < FTS_NUM_AUX_INDEX; ++j) {
 
 			if (index_cache->ins_graph[j] != NULL) {
 
@@ -1126,6 +1082,8 @@ fts_cache_clear(
 	mem_heap_free(static_cast<mem_heap_t*>(cache->sync_heap->arg));
 	cache->sync_heap->arg = NULL;
 
+	fts_need_sync = false;
+
 	cache->total_size = 0;
 
 	mutex_enter((ib_mutex_t*) &cache->deleted_lock);
@@ -1145,10 +1103,8 @@ fts_get_index_cache(
 {
 	ulint			i;
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own((rw_lock_t*) &cache->lock, RW_LOCK_EX)
-	      || rw_lock_own((rw_lock_t*) &cache->init_lock, RW_LOCK_EX));
-#endif
+	ut_ad(rw_lock_own((rw_lock_t*) &cache->lock, RW_LOCK_X)
+	      || rw_lock_own((rw_lock_t*) &cache->init_lock, RW_LOCK_X));
 
 	for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
 		fts_index_cache_t*	index_cache;
@@ -1178,9 +1134,7 @@ fts_get_index_get_doc(
 {
 	ulint			i;
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own((rw_lock_t*) &cache->init_lock, RW_LOCK_EX));
-#endif
+	ut_ad(rw_lock_own((rw_lock_t*) &cache->init_lock, RW_LOCK_X));
 
 	for (i = 0; i < ib_vector_size(cache->get_docs); ++i) {
 		fts_get_doc_t*	get_doc;
@@ -1200,7 +1154,6 @@ fts_get_index_get_doc(
 
 /**********************************************************************//**
 Free the FTS cache. */
-UNIV_INTERN
 void
 fts_cache_destroy(
 /*==============*/
@@ -1211,7 +1164,7 @@ fts_cache_destroy(
 	mutex_free(&cache->optimize_lock);
 	mutex_free(&cache->deleted_lock);
 	mutex_free(&cache->doc_id_lock);
-	os_event_free(cache->sync->event);
+	os_event_destroy(cache->sync->event);
 
 	if (cache->stopword_info.cached_stopword) {
 		rbt_free(cache->stopword_info.cached_stopword);
@@ -1239,14 +1192,13 @@ fts_tokenizer_word_get(
 	fts_tokenizer_word_t*	word;
 	ib_rbt_bound_t		parent;
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&cache->lock, RW_LOCK_EX));
-#endif
+	ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
 
 	/* If it is a stopword, do not index it */
-	if (cache->stopword_info.cached_stopword != NULL
-	    && rbt_search(cache->stopword_info.cached_stopword,
-		       &parent, text) == 0) {
+	if (!fts_check_token(text,
+		    cache->stopword_info.cached_stopword,
+		    index_cache->index->is_ngram,
+		    index_cache->charset)) {
 
 		return(NULL);
 	}
@@ -1261,7 +1213,7 @@ fts_tokenizer_word_get(
 		new_word.nodes = ib_vector_create(
 			cache->sync_heap, sizeof(fts_node_t), 4);
 
-		fts_utf8_string_dup(&new_word.text, text, heap);
+		fts_string_dup(&new_word.text, text, heap);
 
 		parent.last = rbt_add_node(
 			index_cache->words, &parent, &new_word);
@@ -1283,7 +1235,6 @@ fts_tokenizer_word_get(
 
 /**********************************************************************//**
 Add the given doc_id/word positions to the given node's ilist. */
-UNIV_INTERN
 void
 fts_cache_node_add_positions(
 /*=========================*/
@@ -1300,11 +1251,12 @@ fts_cache_node_add_positions(
 	byte*		ptr_start;
 	ulint		doc_id_delta;
 
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
 	if (cache) {
-		ut_ad(rw_lock_own(&cache->lock, RW_LOCK_EX));
+		ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
 	}
-#endif
+#endif /* UNIV_DEBUG */
+
 	ut_ad(doc_id >= node->last_doc_id);
 
 	/* Calculate the space required to store the ilist. */
@@ -1345,7 +1297,7 @@ fts_cache_node_add_positions(
 			new_size = (ulint)(1.2 * new_size);
 		}
 
-		ilist = static_cast<byte*>(ut_malloc(new_size));
+		ilist = static_cast<byte*>(ut_malloc_nokey(new_size));
 		ptr = ilist + node->ilist_size;
 
 		node->ilist_size_alloc = new_size;
@@ -1414,9 +1366,7 @@ fts_cache_add_doc(
 		return;
 	}
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&cache->lock, RW_LOCK_EX));
-#endif
+	ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
 
 	n_words = rbt_size(tokens);
 
@@ -1503,12 +1453,11 @@ fts_drop_table(
 		/* Pass nonatomic=false (dont allow data dict unlock),
 		because the transaction may hold locks on SYS_* tables from
 		previous calls to fts_drop_table(). */
-		error = row_drop_table_for_mysql(table_name, trx, true, false);
+		error = row_drop_table_for_mysql(table_name, trx, true, false, false);
 
 		if (error != DB_SUCCESS) {
-			ib_logf(IB_LOG_LEVEL_ERROR,
-				"Unable to drop FTS index aux table %s: %s",
-				table_name, ut_strerr(error));
+			ib::error() << "Unable to drop FTS index aux table "
+				<< table_name << ": " << ut_strerr(error);
 		}
 	} else {
 		error = DB_FAIL;
@@ -1555,7 +1504,6 @@ fts_rename_one_aux_table(
 Rename auxiliary tables for all fts index for a table. This(rename)
 is due to database name change
 @return DB_SUCCESS or error code */
-
 dberr_t
 fts_rename_aux_tables(
 /*==================*/
@@ -1570,17 +1518,15 @@ fts_rename_aux_tables(
 
 	/* Rename common auxiliary tables */
 	for (i = 0; fts_common_tables[i] != NULL; ++i) {
-		char*	old_table_name;
+		char	old_table_name[MAX_FULL_NAME_LEN];
 		dberr_t	err = DB_SUCCESS;
 
 		fts_table.suffix = fts_common_tables[i];
 
-		old_table_name = fts_get_table_name(&fts_table);
+		fts_get_table_name(&fts_table, old_table_name);
 
 		err = fts_rename_one_aux_table(new_name, old_table_name, trx);
 
-		mem_free(old_table_name);
-
 		if (err != DB_SUCCESS) {
 			return(err);
 		}
@@ -1598,13 +1544,13 @@ fts_rename_aux_tables(
 
 		FTS_INIT_INDEX_TABLE(&fts_table, NULL, FTS_INDEX_TABLE, index);
 
-		for (ulint j = 0; fts_index_selector[j].value; ++j) {
+		for (ulint j = 0; j < FTS_NUM_AUX_INDEX; ++j) {
 			dberr_t	err;
-			char*	old_table_name;
+			char	old_table_name[MAX_FULL_NAME_LEN];
 
 			fts_table.suffix = fts_get_suffix(j);
 
-			old_table_name = fts_get_table_name(&fts_table);
+			fts_get_table_name(&fts_table, old_table_name);
 
 			err = fts_rename_one_aux_table(
 				new_name, old_table_name, trx);
@@ -1613,8 +1559,6 @@ fts_rename_aux_tables(
 					err = DB_DEADLOCK;
 					fts_sql_rollback(trx););
 
-			mem_free(old_table_name);
-
 			if (err != DB_SUCCESS) {
 				return(err);
 			}
@@ -1642,11 +1586,11 @@ fts_drop_common_tables(
 
 	for (i = 0; fts_common_tables[i] != NULL; ++i) {
 		dberr_t	err;
-		char*	table_name;
+		char	table_name[MAX_FULL_NAME_LEN];
 
 		fts_table->suffix = fts_common_tables[i];
 
-		table_name = fts_get_table_name(fts_table);
+		fts_get_table_name(fts_table, table_name);
 
 		err = fts_drop_table(trx, table_name);
 
@@ -1654,8 +1598,6 @@ fts_drop_common_tables(
 		if (err != DB_SUCCESS && err != DB_FAIL) {
 			error = err;
 		}
-
-		mem_free(table_name);
 	}
 
 	return(error);
@@ -1665,7 +1607,6 @@ fts_drop_common_tables(
 Since we do a horizontal split on the index table, we need to drop
 all the split tables.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 fts_drop_index_split_tables(
 /*========================*/
@@ -1679,13 +1620,13 @@ fts_drop_index_split_tables(
 
 	FTS_INIT_INDEX_TABLE(&fts_table, NULL, FTS_INDEX_TABLE, index);
 
-	for (i = 0; fts_index_selector[i].value; ++i) {
+	for (i = 0; i < FTS_NUM_AUX_INDEX; ++i) {
 		dberr_t	err;
-		char*	table_name;
+		char	table_name[MAX_FULL_NAME_LEN];
 
 		fts_table.suffix = fts_get_suffix(i);
 
-		table_name = fts_get_table_name(&fts_table);
+		fts_get_table_name(&fts_table, table_name);
 
 		err = fts_drop_table(trx, table_name);
 
@@ -1693,8 +1634,6 @@ fts_drop_index_split_tables(
 		if (err != DB_SUCCESS && err != DB_FAIL) {
 			error = err;
 		}
-
-		mem_free(table_name);
 	}
 
 	return(error);
@@ -1703,7 +1642,6 @@ fts_drop_index_split_tables(
 /****************************************************************//**
 Drops FTS auxiliary tables for an FTS index
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 fts_drop_index_tables(
 /*==================*/
@@ -1731,11 +1669,11 @@ fts_drop_index_tables(
 	FTS_INIT_INDEX_TABLE(&fts_table, NULL, FTS_INDEX_TABLE, index);
 
 	for (ulint i = 0; index_tables[i] != NULL; ++i) {
-		char*	table_name;
+		char	table_name[MAX_FULL_NAME_LEN];
 
 		fts_table.suffix = index_tables[i];
 
-		table_name = fts_get_table_name(&fts_table);
+		fts_get_table_name(&fts_table, table_name);
 
 		err = fts_drop_table(trx, table_name);
 
@@ -1743,8 +1681,6 @@ fts_drop_index_tables(
 		if (err != DB_SUCCESS && err != DB_FAIL) {
 			error = err;
 		}
-
-		mem_free(table_name);
 	}
 #endif /* FTS_DOC_STATS_DEBUG */
 
@@ -1790,7 +1726,6 @@ Drops the ancillary tables needed for supporting an FTS index on a
 given table. row_mysql_lock_data_dictionary must have been called before
 this.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 fts_drop_tables(
 /*============*/
@@ -1813,46 +1748,171 @@ fts_drop_tables(
 	return(error);
 }
 
-/*********************************************************************//**
-Prepare the SQL, so that all '%s' are replaced by the common prefix.
-@return sql string, use mem_free() to free the memory */
-static
-char*
-fts_prepare_sql(
-/*============*/
-	fts_table_t*	fts_table,	/*!< in: table name info */
-	const char*	my_template)	/*!< in: sql template */
+/** Extract only the required flags from table->flags2 for FTS Aux
+tables.
+@param[in]	in_flags2	Table flags2
+@return extracted flags2 for FTS aux tables */
+static inline
+ulint
+fts_get_table_flags2_for_aux_tables(
+	ulint	flags2)
 {
-	char*		sql;
-	char*		name_prefix;
-
-	name_prefix = fts_get_table_name_prefix(fts_table);
-	sql = ut_strreplace(my_template, "%s", name_prefix);
-	mem_free(name_prefix);
-
-	return(sql);
+	/* Extract the file_per_table flag & temporary file flag
+	from the main FTS table flags2 */
+	return((flags2 & DICT_TF2_USE_FILE_PER_TABLE) |
+	       (flags2 & DICT_TF2_TEMPORARY));
 }
 
-/*********************************************************************//**
-Creates the common ancillary tables needed for supporting an FTS index
+/** Create dict_table_t object for FTS Aux tables.
+@param[in]	aux_table_name	FTS Aux table name
+@param[in]	table		table object of FTS Index
+@param[in]	n_cols		number of columns for FTS Aux table
+@return table object for FTS Aux table */
+static
+dict_table_t*
+fts_create_in_mem_aux_table(
+	const char*		aux_table_name,
+	const dict_table_t*	table,
+	ulint			n_cols)
+{
+	dict_table_t*	new_table = dict_mem_table_create(
+		aux_table_name, table->space, n_cols, 0, table->flags,
+		fts_get_table_flags2_for_aux_tables(table->flags2));
+
+	if (DICT_TF_HAS_SHARED_SPACE(table->flags)) {
+		ut_ad(table->space == fil_space_get_id_by_name(
+			table->tablespace()));
+		new_table->tablespace = mem_heap_strdup(
+			new_table->heap, table->tablespace);
+	}
+
+	if (DICT_TF_HAS_DATA_DIR(table->flags)) {
+		ut_ad(table->data_dir_path != NULL);
+		new_table->data_dir_path = mem_heap_strdup(
+			new_table->heap, table->data_dir_path);
+	}
+
+	return(new_table);
+}
+
+/** Function to create on FTS common table.
+@param[in,out]	trx		InnoDB transaction
+@param[in]	table		Table that has FTS Index
+@param[in]	fts_table_name	FTS AUX table name
+@param[in]	fts_suffix	FTS AUX table suffix
+@param[in]	heap		heap
+@return table object if created, else NULL */
+static
+dict_table_t*
+fts_create_one_common_table(
+	trx_t*			trx,
+	const dict_table_t*	table,
+	const char*		fts_table_name,
+	const char*		fts_suffix,
+	mem_heap_t*		heap)
+{
+	dict_table_t*		new_table = NULL;
+	dberr_t			error;
+	bool			is_config = strcmp(fts_suffix, "CONFIG") == 0;
+
+	if (!is_config) {
+
+		new_table = fts_create_in_mem_aux_table(
+			fts_table_name, table, FTS_DELETED_TABLE_NUM_COLS);
+
+		dict_mem_table_add_col(
+			new_table, heap, "doc_id", DATA_INT, DATA_UNSIGNED,
+			FTS_DELETED_TABLE_COL_LEN);
+	} else {
+		/* Config table has different schema. */
+		new_table = fts_create_in_mem_aux_table(
+			fts_table_name, table, FTS_CONFIG_TABLE_NUM_COLS);
+
+		dict_mem_table_add_col(
+			new_table, heap, "key", DATA_VARCHAR, 0,
+			FTS_CONFIG_TABLE_KEY_COL_LEN);
+
+		dict_mem_table_add_col(
+			new_table, heap, "value", DATA_VARCHAR, DATA_NOT_NULL,
+			FTS_CONFIG_TABLE_VALUE_COL_LEN);
+	}
+
+	error = row_create_table_for_mysql(new_table, NULL, trx, false,
+		FIL_SPACE_ENCRYPTION_DEFAULT, FIL_DEFAULT_ENCRYPTION_KEY);
+
+	if (error == DB_SUCCESS) {
+
+		dict_index_t*	index = dict_mem_index_create(
+			fts_table_name, "FTS_COMMON_TABLE_IND",
+			new_table->space, DICT_UNIQUE|DICT_CLUSTERED, 1);
+
+		if (!is_config) {
+			dict_mem_index_add_field(index, "doc_id", 0);
+		} else {
+			dict_mem_index_add_field(index, "key", 0);
+		}
+
+		/* We save and restore trx->dict_operation because
+		row_create_index_for_mysql() changes the operation to
+		TRX_DICT_OP_TABLE. */
+		trx_dict_op_t op = trx_get_dict_operation(trx);
+
+		error =	row_create_index_for_mysql(index, trx, NULL, NULL);
+
+		trx->dict_operation = op;
+	}
+
+	if (error != DB_SUCCESS) {
+		trx->error_state = error;
+		dict_mem_table_free(new_table);
+		new_table = NULL;
+		ib::warn() << "Failed to create FTS common table "
+			<< fts_table_name;
+	}
+	return(new_table);
+}
+
+/** Creates the common auxiliary tables needed for supporting an FTS index
 on the given table. row_mysql_lock_data_dictionary must have been called
 before this.
+The following tables are created.
+CREATE TABLE $FTS_PREFIX_DELETED
+	(doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
+CREATE TABLE $FTS_PREFIX_DELETED_CACHE
+	(doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
+CREATE TABLE $FTS_PREFIX_BEING_DELETED
+	(doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
+CREATE TABLE $FTS_PREFIX_BEING_DELETED_CACHE
+	(doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
+CREATE TABLE $FTS_PREFIX_CONFIG
+	(key CHAR(50), value CHAR(200), UNIQUE CLUSTERED INDEX on key)
+@param[in,out]	trx			transaction
+@param[in]	table			table with FTS index
+@param[in]	name			table name normalized
+@param[in]	skip_doc_id_index	Skip index on doc id
 @return DB_SUCCESS if succeed */
-UNIV_INTERN
 dberr_t
 fts_create_common_tables(
-/*=====================*/
-	trx_t*		trx,		/*!< in: transaction */
-	const dict_table_t* table,	/*!< in: table with FTS index */
-	const char*	name,		/*!< in: table name normalized.*/
-	bool		skip_doc_id_index)/*!< in: Skip index on doc id */
+	trx_t*			trx,
+	const dict_table_t*	table,
+	const char*		name,
+	bool			skip_doc_id_index)
 {
-	char*		sql;
 	dberr_t		error;
 	que_t*		graph;
 	fts_table_t	fts_table;
 	mem_heap_t*	heap = mem_heap_create(1024);
 	pars_info_t*	info;
+	char		fts_name[MAX_FULL_NAME_LEN];
+	char		full_name[sizeof(fts_common_tables) / sizeof(char*)]
+				[MAX_FULL_NAME_LEN];
+
+	dict_index_t*					index = NULL;
+	trx_dict_op_t					op;
+	/* common_tables vector is used for dropping FTS common tables
+	on error condition. */
+	std::vector<dict_table_t*>			common_tables;
+	std::vector<dict_table_t*>::const_iterator	it;
 
 	FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
 
@@ -1864,23 +1924,39 @@ fts_create_common_tables(
 	}
 
 	/* Create the FTS tables that are common to an FTS index. */
-	sql = fts_prepare_sql(&fts_table, fts_create_common_tables_sql);
-	graph = fts_parse_sql_no_dict_lock(NULL, NULL, sql);
-	mem_free(sql);
+	for (ulint i = 0; fts_common_tables[i] != NULL; ++i) {
 
-	error = fts_eval_sql(trx, graph);
+		fts_table.suffix = fts_common_tables[i];
+		fts_get_table_name(&fts_table, full_name[i]);
+		dict_table_t*	common_table = fts_create_one_common_table(
+			trx, table, full_name[i], fts_table.suffix, heap);
 
-	que_graph_free(graph);
+		 if (common_table == NULL) {
+			error = DB_ERROR;
+			goto func_exit;
+		} else {
+			common_tables.push_back(common_table);
+		}
 
-	if (error != DB_SUCCESS) {
+		DBUG_EXECUTE_IF("ib_fts_aux_table_error",
+			/* Return error after creating FTS_AUX_CONFIG table. */
+			if (i == 4) {
+				error = DB_ERROR;
+				goto func_exit;
+			}
+		);
 
-		goto func_exit;
 	}
 
 	/* Write the default settings to the config table. */
+	info = pars_info_create();
+
 	fts_table.suffix = "CONFIG";
+	fts_get_table_name(&fts_table, fts_name);
+	pars_info_bind_id(info, true, "config_table", fts_name);
+
 	graph = fts_parse_sql_no_dict_lock(
-		&fts_table, NULL, fts_config_table_insert_values_sql);
+		&fts_table, info, fts_config_table_insert_values_sql);
 
 	error = fts_eval_sql(trx, graph);
 
@@ -1891,133 +1967,132 @@ fts_create_common_tables(
 		goto func_exit;
 	}
 
-	info = pars_info_create();
+	index = dict_mem_index_create(
+		name, FTS_DOC_ID_INDEX_NAME, table->space,
+		DICT_UNIQUE, 1);
+	dict_mem_index_add_field(index, FTS_DOC_ID_COL_NAME, 0);
 
-	pars_info_bind_id(info, TRUE, "table_name", name);
-	pars_info_bind_id(info, TRUE, "index_name", FTS_DOC_ID_INDEX_NAME);
-	pars_info_bind_id(info, TRUE, "doc_id_col_name", FTS_DOC_ID_COL_NAME);
+	op = trx_get_dict_operation(trx);
 
-	/* Create the FTS DOC_ID index on the hidden column. Currently this
-	is common for any FT index created on the table. */
-	graph = fts_parse_sql_no_dict_lock(
-		NULL,
-		info,
-		mem_heap_printf(
-			heap,
-			"BEGIN\n"
-			""
-			"CREATE UNIQUE INDEX $index_name ON $table_name("
-			"$doc_id_col_name);\n"));
+	error =	row_create_index_for_mysql(index, trx, NULL, NULL);
 
-	error = fts_eval_sql(trx, graph);
-	que_graph_free(graph);
+	trx->dict_operation = op;
 
 func_exit:
 	if (error != DB_SUCCESS) {
-		/* We have special error handling here */
-
-		trx->error_state = DB_SUCCESS;
-
-		trx_rollback_to_savepoint(trx, NULL);
-
-		row_drop_table_for_mysql(table->name, trx, FALSE, TRUE);
-
-		trx->error_state = DB_SUCCESS;
+		for (it = common_tables.begin(); it != common_tables.end();
+		     ++it) {
+			row_drop_table_for_mysql(
+				(*it)->name.m_name, trx, true, FALSE);
+		}
 	}
 
+	common_tables.clear();
 	mem_heap_free(heap);
 
 	return(error);
 }
-
-/*************************************************************//**
-Wrapper function of fts_create_index_tables_low(), create auxiliary
-tables for an FTS index
-@return: DB_SUCCESS or error code */
+/** Creates one FTS auxiliary index table for an FTS index.
+@param[in,out]	trx		transaction
+@param[in]	index		the index instance
+@param[in]	fts_table	fts_table structure
+@param[in]	heap		memory heap
+@return DB_SUCCESS or error code */
 static
 dict_table_t*
 fts_create_one_index_table(
-/*=======================*/
-	trx_t*		trx,		/*!< in: transaction */
-	const dict_index_t*
-			index,		/*!< in: the index instance */
-	fts_table_t*	fts_table,	/*!< in: fts_table structure */
-	mem_heap_t*	heap)		/*!< in: heap */
+	trx_t*			trx,
+	const dict_index_t*	index,
+	fts_table_t*		fts_table,
+	mem_heap_t*		heap)
 {
 	dict_field_t*		field;
 	dict_table_t*		new_table = NULL;
-	char*			table_name = fts_get_table_name(fts_table);
+	char			table_name[MAX_FULL_NAME_LEN];
 	dberr_t			error;
 	CHARSET_INFO*		charset;
-	ulint			flags2 = 0;
 
 	ut_ad(index->type & DICT_FTS);
 
-	if (srv_file_per_table) {
-		flags2 = DICT_TF2_USE_TABLESPACE;
-	}
+	fts_get_table_name(fts_table, table_name);
 
-	new_table = dict_mem_table_create(table_name, 0, 5, 1, flags2);
+	new_table = fts_create_in_mem_aux_table(
+			table_name, fts_table->table,
+			FTS_AUX_INDEX_TABLE_NUM_COLS);
 
 	field = dict_index_get_nth_field(index, 0);
-	charset = innobase_get_fts_charset(
-		(int)(field->col->prtype & DATA_MYSQL_TYPE_MASK),
-		(uint) dtype_get_charset_coll(field->col->prtype));
+	charset = fts_get_charset(field->col->prtype);
 
-	if (strcmp(charset->name, "latin1_swedish_ci") == 0) {
-		dict_mem_table_add_col(new_table, heap, "word", DATA_VARCHAR,
-				       field->col->prtype, FTS_MAX_WORD_LEN);
-	} else {
-		dict_mem_table_add_col(new_table, heap, "word", DATA_VARMYSQL,
-				       field->col->prtype, FTS_MAX_WORD_LEN);
-	}
+	dict_mem_table_add_col(new_table, heap, "word",
+			       charset == &my_charset_latin1
+			       ? DATA_VARCHAR : DATA_VARMYSQL,
+			       field->col->prtype,
+			       FTS_INDEX_WORD_LEN);
 
 	dict_mem_table_add_col(new_table, heap, "first_doc_id", DATA_INT,
 			       DATA_NOT_NULL | DATA_UNSIGNED,
-			       sizeof(doc_id_t));
+			       FTS_INDEX_FIRST_DOC_ID_LEN);
 
 	dict_mem_table_add_col(new_table, heap, "last_doc_id", DATA_INT,
 			       DATA_NOT_NULL | DATA_UNSIGNED,
-			       sizeof(doc_id_t));
+			       FTS_INDEX_LAST_DOC_ID_LEN);
 
 	dict_mem_table_add_col(new_table, heap, "doc_count", DATA_INT,
-			       DATA_NOT_NULL | DATA_UNSIGNED, 4);
+			       DATA_NOT_NULL | DATA_UNSIGNED,
+			       FTS_INDEX_DOC_COUNT_LEN);
 
-	dict_mem_table_add_col(new_table, heap, "ilist", DATA_BLOB,
-			       4130048,	0);
+	/* The precise type calculation is as follows:
+	least signficiant byte: MySQL type code (not applicable for sys cols)
+	second least : DATA_NOT_NULL | DATA_BINARY_TYPE
+	third least  : the MySQL charset-collation code (DATA_MTYPE_MAX) */
 
-	error = row_create_table_for_mysql(new_table, trx, false, FIL_SPACE_ENCRYPTION_DEFAULT, FIL_DEFAULT_ENCRYPTION_KEY);
+	dict_mem_table_add_col(
+		new_table, heap, "ilist", DATA_BLOB,
+		(DATA_MTYPE_MAX << 16) | DATA_UNSIGNED | DATA_NOT_NULL,
+		FTS_INDEX_ILIST_LEN);
+
+	error = row_create_table_for_mysql(new_table, NULL, trx, false,
+		FIL_SPACE_ENCRYPTION_DEFAULT, FIL_DEFAULT_ENCRYPTION_KEY);
+
+	if (error == DB_SUCCESS) {
+		dict_index_t*	index = dict_mem_index_create(
+			table_name, "FTS_INDEX_TABLE_IND", new_table->space,
+			DICT_UNIQUE|DICT_CLUSTERED, 2);
+		dict_mem_index_add_field(index, "word", 0);
+		dict_mem_index_add_field(index, "first_doc_id", 0);
+
+		trx_dict_op_t op = trx_get_dict_operation(trx);
+
+		error =	row_create_index_for_mysql(index, trx, NULL, NULL);
+
+		trx->dict_operation = op;
+	}
 
 	if (error != DB_SUCCESS) {
 		trx->error_state = error;
 		dict_mem_table_free(new_table);
 		new_table = NULL;
-		ib_logf(IB_LOG_LEVEL_WARN,
-			"Fail to create FTS index table %s", table_name);
+		ib::warn() << "Failed to create FTS index table "
+			<< table_name;
 	}
 
-	mem_free(table_name);
-
 	return(new_table);
 }
 
-/*************************************************************//**
-Wrapper function of fts_create_index_tables_low(), create auxiliary
-tables for an FTS index
-@return: DB_SUCCESS or error code */
-UNIV_INTERN
+/** Create auxiliary index tables for an FTS index.
+@param[in,out]	trx		transaction
+@param[in]	index		the index instance
+@param[in]	table_name	table name
+@param[in]	table_id	the table id
+@return DB_SUCCESS or error code */
 dberr_t
 fts_create_index_tables_low(
-/*========================*/
-	trx_t*		trx,		/*!< in: transaction */
-	const dict_index_t*
-			index,		/*!< in: the index instance */
-	const char*	table_name,	/*!< in: the table name */
-	table_id_t	table_id)	/*!< in: the table id */
-
+	trx_t*			trx,
+	const dict_index_t*	index,
+	const char*		table_name,
+	table_id_t		table_id)
 {
 	ulint		i;
-	que_t*		graph;
 	fts_table_t	fts_table;
 	dberr_t		error = DB_SUCCESS;
 	mem_heap_t*	heap = mem_heap_create(1024);
@@ -2029,20 +2104,28 @@ fts_create_index_tables_low(
 	fts_table.table = index->table;
 
 #ifdef FTS_DOC_STATS_DEBUG
-	char*		sql;
-
 	/* Create the FTS auxiliary tables that are specific
 	to an FTS index. */
-	sql = fts_prepare_sql(&fts_table, fts_create_index_tables_sql);
+	info = pars_info_create();
 
-	graph = fts_parse_sql_no_dict_lock(NULL, NULL, sql);
-	mem_free(sql);
+	fts_table.suffix = "DOC_ID";
+	fts_get_table_name(&fts_table, fts_name);
+
+	pars_info_bind_id(info, true, "doc_id_table", fts_name);
+
+	graph = fts_parse_sql_no_dict_lock(NULL, info,
+					   fts_create_index_tables_sql);
 
 	error = fts_eval_sql(trx, graph);
 	que_graph_free(graph);
 #endif /* FTS_DOC_STATS_DEBUG */
 
-	for (i = 0; fts_index_selector[i].value && error == DB_SUCCESS; ++i) {
+	/* aux_idx_tables vector is used for dropping FTS AUX INDEX
+	tables on error condition. */
+	std::vector<dict_table_t*>			aux_idx_tables;
+	std::vector<dict_table_t*>::const_iterator	it;
+
+	for (i = 0; i < FTS_NUM_AUX_INDEX && error == DB_SUCCESS; ++i) {
 		dict_table_t*	new_table;
 
 		/* Create the FTS auxiliary tables that are specific
@@ -2053,46 +2136,57 @@ fts_create_index_tables_low(
 		new_table = fts_create_one_index_table(
 			trx, index, &fts_table, heap);
 
-		if (!new_table) {
+		if (new_table == NULL) {
 			error = DB_FAIL;
 			break;
+		} else {
+			aux_idx_tables.push_back(new_table);
 		}
 
-		graph = fts_parse_sql_no_dict_lock(
-			&fts_table, NULL, fts_create_index_sql);
-
-		error = fts_eval_sql(trx, graph);
-		que_graph_free(graph);
+		DBUG_EXECUTE_IF("ib_fts_index_table_error",
+			/* Return error after creating FTS_INDEX_5
+			aux table. */
+			if (i == 4) {
+				error = DB_FAIL;
+				break;
+			}
+		);
 	}
 
 	if (error != DB_SUCCESS) {
-		/* We have special error handling here */
 
-		trx->error_state = DB_SUCCESS;
-
-		trx_rollback_to_savepoint(trx, NULL);
-
-		row_drop_table_for_mysql(table_name, trx, FALSE, TRUE);
-
-		trx->error_state = DB_SUCCESS;
+		for (it = aux_idx_tables.begin(); it != aux_idx_tables.end();
+		     ++it) {
+			row_drop_table_for_mysql(
+				(*it)->name.m_name, trx, true, FALSE);
+		}
 	}
 
+	aux_idx_tables.clear();
 	mem_heap_free(heap);
 
 	return(error);
 }
 
-/******************************************************************//**
-Creates the column specific ancillary tables needed for supporting an
+/** Creates the column specific ancillary tables needed for supporting an
 FTS index on the given table. row_mysql_lock_data_dictionary must have
 been called before this.
+
+All FTS AUX Index tables have the following schema.
+CREAT TABLE $FTS_PREFIX_INDEX_[1-6](
+	word		VARCHAR(FTS_MAX_WORD_LEN),
+	first_doc_id	INT NOT NULL,
+	last_doc_id	UNSIGNED NOT NULL,
+	doc_count	UNSIGNED INT NOT NULL,
+	ilist		VARBINARY NOT NULL,
+	UNIQUE CLUSTERED INDEX ON (word, first_doc_id))
+@param[in,out]	trx	transaction
+@param[in]	index	index instance
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 fts_create_index_tables(
-/*====================*/
-	trx_t*			trx,	/*!< in: transaction */
-	const dict_index_t*	index)	/*!< in: the index instance */
+	trx_t*			trx,
+	const dict_index_t*	index)
 {
 	dberr_t		err;
 	dict_table_t*	table;
@@ -2100,7 +2194,8 @@ fts_create_index_tables(
 	table = dict_table_get_low(index->table_name);
 	ut_a(table != NULL);
 
-	err = fts_create_index_tables_low(trx, index, table->name, table->id);
+	err = fts_create_index_tables_low(
+		trx, index, table->name.m_name, table->id);
 
 	if (err == DB_SUCCESS) {
 		trx_commit(trx);
@@ -2246,7 +2341,7 @@ fts_savepoint_create(
 
 /******************************************************************//**
 Create an FTS trx.
-@return FTS trx  */
+@return FTS trx */
 static
 fts_trx_t*
 fts_trx_create(
@@ -2429,7 +2524,6 @@ fts_trx_table_add_op(
 
 /******************************************************************//**
 Notify the FTS system about an operation on an FTS-indexed table. */
-UNIV_INTERN
 void
 fts_trx_add_op(
 /*===========*/
@@ -2511,7 +2605,7 @@ fts_get_max_cache_size(
 	information is used by the callback that reads the value. */
 	value.f_n_char = 0;
 	value.f_len = FTS_MAX_CONFIG_VALUE_LEN;
-	value.f_str = ut_malloc(value.f_len + 1);
+	value.f_str = ut_malloc_nokey(value.f_len + 1);
 
 	error = fts_config_get_value(
 		trx, fts_table, FTS_MAX_CACHE_SIZE_IN_MB, &value);
@@ -2523,35 +2617,32 @@ fts_get_max_cache_size(
 
 		if (cache_size_in_mb > FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB) {
 
-			ut_print_timestamp(stderr);
-			fprintf(stderr, "  InnoDB: Warning: FTS max cache size "
-				" (%lu) out of range. Minimum value is "
-				"%luMB and the maximum values is %luMB, "
-				"setting cache size to upper limit\n",
-				cache_size_in_mb,
-				FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB,
-				FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB);
+			ib::warn() << "FTS max cache size ("
+				<< cache_size_in_mb << ") out of range."
+				" Minimum value is "
+				<< FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB
+				<< "MB and the maximum value is "
+				<< FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB
+				<< "MB, setting cache size to upper limit";
 
 			cache_size_in_mb = FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB;
 
 		} else if  (cache_size_in_mb
 			    < FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB) {
 
-			ut_print_timestamp(stderr);
-			fprintf(stderr, "  InnoDB: Warning: FTS max cache size "
-				" (%lu) out of range. Minimum value is "
-				"%luMB and the maximum values is %luMB, "
-				"setting cache size to lower limit\n",
-				cache_size_in_mb,
-				FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB,
-				FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB);
+			ib::warn() << "FTS max cache size ("
+				<< cache_size_in_mb << ") out of range."
+				" Minimum value is "
+				<< FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB
+				<< "MB and the maximum value is"
+				<< FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB
+				<< "MB, setting cache size to lower limit";
 
 			cache_size_in_mb = FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB;
 		}
 	} else {
-		ut_print_timestamp(stderr);
-		fprintf(stderr, "InnoDB: Error: (%lu) reading max cache "
-			"config value from config table\n", error);
+		ib::error() << "(" << ut_strerr(error) << ") reading max"
+			" cache config value from config table";
 	}
 
 	ut_free(value.f_str);
@@ -2564,7 +2655,6 @@ fts_get_max_cache_size(
 /*********************************************************************//**
 Get the total number of words in the FTS for a particular FTS index.
 @return DB_SUCCESS if all OK else error code */
-UNIV_INTERN
 dberr_t
 fts_get_total_word_count(
 /*=====================*/
@@ -2581,7 +2671,7 @@ fts_get_total_word_count(
 	information is used by the callback that reads the value. */
 	value.f_n_char = 0;
 	value.f_len = FTS_MAX_CONFIG_VALUE_LEN;
-	value.f_str = static_cast<byte*>(ut_malloc(value.f_len + 1));
+	value.f_str = static_cast<byte*>(ut_malloc_nokey(value.f_len + 1));
 
 	error = fts_config_get_index_value(
 		trx, index, FTS_TOTAL_WORD_COUNT, &value);
@@ -2591,9 +2681,8 @@ fts_get_total_word_count(
 		value.f_str[value.f_len] = 0;
 		*total = strtoul((char*) value.f_str, NULL, 10);
 	} else {
-		ut_print_timestamp(stderr);
-		fprintf(stderr, "  InnoDB: Error: (%s) reading total words "
-			"value from config table\n", ut_strerr(error));
+		ib::error() << "(" << ut_strerr(error) << ") reading total"
+			" words value from config table";
 	}
 
 	ut_free(value.f_str);
@@ -2606,7 +2695,6 @@ fts_get_total_word_count(
 Update the next and last Doc ID in the CONFIG table to be the input
 "doc_id" value (+ 1). We would do so after each FTS index build or
 table truncate */
-UNIV_INTERN
 void
 fts_update_next_doc_id(
 /*===================*/
@@ -2628,7 +2716,6 @@ fts_update_next_doc_id(
 /*********************************************************************//**
 Get the next available document id.
 @return DB_SUCCESS if OK */
-UNIV_INTERN
 dberr_t
 fts_get_next_doc_id(
 /*================*/
@@ -2640,23 +2727,19 @@ fts_get_next_doc_id(
 	/* If the Doc ID system has not yet been initialized, we
 	will consult the CONFIG table and user table to re-establish
 	the initial value of the Doc ID */
-
-	if (cache->first_doc_id != 0 || !fts_init_doc_id(table)) {
-		if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
-			*doc_id = FTS_NULL_DOC_ID;
-			return(DB_SUCCESS);
-		}
-
-		/* Otherwise, simply increment the value in cache */
-		mutex_enter(&cache->doc_id_lock);
-		*doc_id = ++cache->next_doc_id;
-		mutex_exit(&cache->doc_id_lock);
-	} else {
-		mutex_enter(&cache->doc_id_lock);
-		*doc_id = cache->next_doc_id;
-		mutex_exit(&cache->doc_id_lock);
+	if (cache->first_doc_id == FTS_NULL_DOC_ID) {
+		fts_init_doc_id(table);
 	}
 
+	if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
+		*doc_id = FTS_NULL_DOC_ID;
+		return(DB_SUCCESS);
+	}
+
+	mutex_enter(&cache->doc_id_lock);
+	*doc_id = ++cache->next_doc_id;
+	mutex_exit(&cache->doc_id_lock);
+
 	return(DB_SUCCESS);
 }
 
@@ -2683,6 +2766,7 @@ fts_cmp_set_sync_doc_id(
 	fts_table_t	fts_table;
 	que_t*		graph = NULL;
 	fts_cache_t*	cache = table->fts->cache;
+	char		table_name[MAX_FULL_NAME_LEN];
 retry:
 	ut_a(table->fts->doc_col != ULINT_UNDEFINED);
 
@@ -2691,7 +2775,7 @@ retry:
 	fts_table.type = FTS_COMMON_TABLE;
 	fts_table.table = table;
 
-	fts_table.parent = table->name;
+	fts_table.parent = table->name.m_name;
 
 	trx = trx_allocate_for_background();
 
@@ -2702,10 +2786,13 @@ retry:
 	pars_info_bind_function(
 		info, "my_func", fts_fetch_store_doc_id, doc_id);
 
+	fts_get_table_name(&fts_table, table_name);
+	pars_info_bind_id(info, true, "config_table", table_name);
+
 	graph = fts_parse_sql(
 		&fts_table, info,
 		"DECLARE FUNCTION my_func;\n"
-		"DECLARE CURSOR c IS SELECT value FROM \"%s\""
+		"DECLARE CURSOR c IS SELECT value FROM $config_table"
 		" WHERE key = 'synced_doc_id' FOR UPDATE;\n"
 		"BEGIN\n"
 		""
@@ -2749,7 +2836,7 @@ retry:
 
 	if (doc_id_cmp > *doc_id) {
 		error = fts_update_sync_doc_id(
-			table, table->name, cache->synced_doc_id, trx);
+			table, table->name.m_name, cache->synced_doc_id, trx);
 	}
 
 	*doc_id = cache->next_doc_id;
@@ -2761,10 +2848,8 @@ func_exit:
 	} else {
 		*doc_id = 0;
 
-		ut_print_timestamp(stderr);
-		fprintf(stderr, "  InnoDB: Error: (%s) "
-			"while getting next doc id.\n", ut_strerr(error));
-
+		ib::error() << "(" << ut_strerr(error) << ") while getting"
+			" next doc id.";
 		fts_sql_rollback(trx);
 
 		if (error == DB_DEADLOCK) {
@@ -2799,6 +2884,7 @@ fts_update_sync_doc_id(
 	dberr_t		error;
 	ibool		local_trx = FALSE;
 	fts_cache_t*	cache = table->fts->cache;
+	char		fts_name[MAX_FULL_NAME_LEN];
 
 	fts_table.suffix = "CONFIG";
 	fts_table.table_id = table->id;
@@ -2807,7 +2893,7 @@ fts_update_sync_doc_id(
 	if (table_name) {
 		fts_table.parent = table_name;
 	} else {
-		fts_table.parent = table->name;
+		fts_table.parent = table->name.m_name;
 	}
 
 	if (!trx) {
@@ -2824,10 +2910,13 @@ fts_update_sync_doc_id(
 
 	pars_info_bind_varchar_literal(info, "doc_id", id, id_len);
 
+	fts_get_table_name(&fts_table, fts_name);
+	pars_info_bind_id(info, true, "table_name", fts_name);
+
 	graph = fts_parse_sql(
 		&fts_table, info,
-		"BEGIN "
-		"UPDATE \"%s\" SET value = :doc_id"
+		"BEGIN"
+		" UPDATE $table_name SET value = :doc_id"
 		" WHERE key = 'synced_doc_id';");
 
 	error = fts_eval_sql(trx, graph);
@@ -2840,9 +2929,8 @@ fts_update_sync_doc_id(
 			cache->synced_doc_id = doc_id;
 		} else {
 
-			ib_logf(IB_LOG_LEVEL_ERROR,
-				"(%s) while updating last doc id.",
-				ut_strerr(error));
+			ib::error() << "(" << ut_strerr(error) << ") while"
+				" updating last doc id.";
 
 			fts_sql_rollback(trx);
 		}
@@ -2855,7 +2943,6 @@ fts_update_sync_doc_id(
 /*********************************************************************//**
 Create a new fts_doc_ids_t.
 @return new fts_doc_ids_t */
-UNIV_INTERN
 fts_doc_ids_t*
 fts_doc_ids_create(void)
 /*====================*/
@@ -2876,7 +2963,6 @@ fts_doc_ids_create(void)
 
 /*********************************************************************//**
 Free a fts_doc_ids_t. */
-
 void
 fts_doc_ids_free(
 /*=============*/
@@ -2974,6 +3060,7 @@ fts_delete(
 
 	/* Note the deleted document for OPTIMIZE to purge. */
 	if (error == DB_SUCCESS) {
+		char	table_name[MAX_FULL_NAME_LEN];
 
 		trx->op_info = "adding doc id to FTS DELETED";
 
@@ -2981,10 +3068,13 @@ fts_delete(
 
 		fts_table.suffix = "DELETED";
 
+		fts_get_table_name(&fts_table, table_name);
+		pars_info_bind_id(info, true, "deleted", table_name);
+
 		graph = fts_parse_sql(
 			&fts_table,
 			info,
-			"BEGIN INSERT INTO \"%s\" VALUES (:doc_id);");
+			"BEGIN INSERT INTO $deleted VALUES (:doc_id);");
 
 		error = fts_eval_sql(trx, graph);
 
@@ -3032,7 +3122,6 @@ fts_modify(
 /*********************************************************************//**
 Create a new document id.
 @return DB_SUCCESS if all went well else error */
-UNIV_INTERN
 dberr_t
 fts_create_doc_id(
 /*==============*/
@@ -3139,7 +3228,6 @@ fts_commit_table(
 The given transaction is about to be committed; do whatever is necessary
 from the FTS system's POV.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 fts_commit(
 /*=======*/
@@ -3170,7 +3258,6 @@ fts_commit(
 
 /*********************************************************************//**
 Initialize a document. */
-UNIV_INTERN
 void
 fts_doc_init(
 /*=========*/
@@ -3185,7 +3272,6 @@ fts_doc_init(
 
 /*********************************************************************//**
 Free document. */
-UNIV_INTERN
 void
 fts_doc_free(
 /*=========*/
@@ -3197,9 +3283,7 @@ fts_doc_free(
 		rbt_free(doc->tokens);
 	}
 
-#ifdef UNIV_DEBUG
-	memset(doc, 0, sizeof(*doc));
-#endif /* UNIV_DEBUG */
+	ut_d(memset(doc, 0, sizeof(*doc)));
 
 	mem_heap_free(heap);
 }
@@ -3208,7 +3292,6 @@ fts_doc_free(
 Callback function for fetch that stores a row id to the location pointed.
 The column's type must be DATA_FIXBINARY, DATA_BINARY_TYPE, length = 8.
 @return always returns NULL */
-UNIV_INTERN
 void*
 fts_fetch_row_id(
 /*=============*/
@@ -3234,7 +3317,6 @@ fts_fetch_row_id(
 Callback function for fetch that stores the text of an FTS document,
 converting each column to UTF-16.
 @return always FALSE */
-UNIV_INTERN
 ibool
 fts_query_expansion_fetch_doc(
 /*==========================*/
@@ -3273,13 +3355,11 @@ fts_query_expansion_fetch_doc(
 		}
 
 		if (!doc_charset) {
-			ulint   prtype = dfield->type.prtype;
-			doc_charset = innobase_get_fts_charset(
-					(int)(prtype & DATA_MYSQL_TYPE_MASK),
-					(uint) dtype_get_charset_coll(prtype));
+			doc_charset = fts_get_charset(dfield->type.prtype);
 		}
 
 		doc.charset = doc_charset;
+		doc.is_ngram = result_doc->is_ngram;
 
 		if (dfield_is_ext(dfield)) {
 			/* We ignore columns that are stored externally, this
@@ -3296,9 +3376,11 @@ fts_query_expansion_fetch_doc(
 		}
 
 		if (field_no == 0) {
-			fts_tokenize_document(&doc, result_doc);
+			fts_tokenize_document(&doc, result_doc,
+					      result_doc->parser);
 		} else {
-			fts_tokenize_document_next(&doc, doc_len, result_doc);
+			fts_tokenize_document_next(&doc, doc_len, result_doc,
+						   result_doc->parser);
 		}
 
 		exp = que_node_get_next(exp);
@@ -3343,6 +3425,7 @@ fts_fetch_doc_from_rec(
 	ulint			i;
 	ulint			doc_len = 0;
 	ulint			processed_doc = 0;
+	st_mysql_ftparser*	parser;
 
 	if (!get_doc) {
 		return;
@@ -3350,6 +3433,7 @@ fts_fetch_doc_from_rec(
 
 	index = get_doc->index_cache->index;
 	table = get_doc->index_cache->index->table;
+	parser = get_doc->index_cache->index->parser;
 
 	clust_rec = btr_pcur_get_rec(pcur);
 
@@ -3361,23 +3445,18 @@ fts_fetch_doc_from_rec(
 		clust_pos = dict_col_get_clust_pos(col, clust_index);
 
 		if (!get_doc->index_cache->charset) {
-			ulint   prtype = ifield->col->prtype;
-
-			get_doc->index_cache->charset =
-				innobase_get_fts_charset(
-					(int) (prtype & DATA_MYSQL_TYPE_MASK),
-					(uint) dtype_get_charset_coll(prtype));
+			get_doc->index_cache->charset = fts_get_charset(
+				ifield->col->prtype);
 		}
 
 		if (rec_offs_nth_extern(offsets, clust_pos)) {
 			doc->text.f_str =
 				btr_rec_copy_externally_stored_field(
 					clust_rec, offsets,
-					dict_table_zip_size(table),
+					dict_table_page_size(table),
 					clust_pos, &doc->text.f_len,
 					static_cast<mem_heap_t*>(
-						doc->self_heap->arg),
-					NULL);
+						doc->self_heap->arg));
 		} else {
 			doc->text.f_str = (byte*) rec_get_nth_field(
 				clust_rec, offsets, clust_pos,
@@ -3386,6 +3465,7 @@ fts_fetch_doc_from_rec(
 
 		doc->found = TRUE;
 		doc->charset = get_doc->index_cache->charset;
+		doc->is_ngram = index->is_ngram;
 
 		/* Null Field */
 		if (doc->text.f_len == UNIV_SQL_NULL || doc->text.f_len == 0) {
@@ -3393,9 +3473,9 @@ fts_fetch_doc_from_rec(
 		}
 
 		if (processed_doc == 0) {
-			fts_tokenize_document(doc, NULL);
+			fts_tokenize_document(doc, NULL, parser);
 		} else {
-			fts_tokenize_document_next(doc, doc_len, NULL);
+			fts_tokenize_document_next(doc, doc_len, NULL, parser);
 		}
 
 		processed_doc++;
@@ -3449,8 +3529,7 @@ fts_add_doc_by_id(
 	heap = mem_heap_create(512);
 
 	clust_index = dict_table_get_first_index(table);
-	fts_id_index = dict_table_get_index_on_name(
-				table, FTS_DOC_ID_INDEX_NAME);
+	fts_id_index = table->fts_doc_id_index;
 
 	/* Check whether the index on FTS_DOC_ID is cluster index */
 	is_id_cluster = (clust_index == fts_id_index);
@@ -3636,7 +3715,6 @@ fts_read_ulint(
 /*********************************************************************//**
 Get maximum Doc ID in a table if index "FTS_DOC_ID_INDEX" exists
 @return max Doc ID or 0 if index "FTS_DOC_ID_INDEX" does not exist */
-UNIV_INTERN
 doc_id_t
 fts_get_max_doc_id(
 /*===============*/
@@ -3648,7 +3726,7 @@ fts_get_max_doc_id(
 	mtr_t		mtr;
 	btr_pcur_t	pcur;
 
-	index = dict_table_get_index_on_name(table, FTS_DOC_ID_INDEX_NAME);
+	index = table->fts_doc_id_index;
 
 	if (!index) {
 		return(0);
@@ -3706,7 +3784,6 @@ func_exit:
 /*********************************************************************//**
 Fetch document with the given document id.
 @return DB_SUCCESS if OK else error */
-UNIV_INTERN
 dberr_t
 fts_doc_fetch_by_doc_id(
 /*====================*/
@@ -3835,7 +3912,6 @@ fts_doc_fetch_by_doc_id(
 /*********************************************************************//**
 Write out a single word's data as new entry/entries in the INDEX table.
 @return DB_SUCCESS if all OK. */
-UNIV_INTERN
 dberr_t
 fts_write_node(
 /*===========*/
@@ -3851,11 +3927,17 @@ fts_write_node(
 	ib_time_t	start_time;
 	doc_id_t	last_doc_id;
 	doc_id_t	first_doc_id;
+	char		table_name[MAX_FULL_NAME_LEN];
+
+	ut_a(node->ilist != NULL);
 
 	if (*graph) {
 		info = (*graph)->info;
 	} else {
 		info = pars_info_create();
+
+		fts_get_table_name(fts_table, table_name);
+		pars_info_bind_id(info, true, "index_table_name", table_name);
 	}
 
 	pars_info_bind_varchar_literal(info, "token", word->f_str, word->f_len);
@@ -3881,13 +3963,14 @@ fts_write_node(
 		DATA_BLOB, DATA_BINARY_TYPE);
 
 	if (!*graph) {
+
 		*graph = fts_parse_sql(
 			fts_table,
 			info,
 			"BEGIN\n"
-			"INSERT INTO \"%s\" VALUES "
-			"(:token, :first_doc_id,"
-			" :last_doc_id, :doc_count, :ilist);");
+			"INSERT INTO $index_table_name VALUES"
+			" (:token, :first_doc_id,"
+			"  :last_doc_id, :doc_count, :ilist);");
 	}
 
 	start_time = ut_time();
@@ -3912,6 +3995,7 @@ fts_sync_add_deleted_cache(
 	pars_info_t*	info;
 	que_t*		graph;
 	fts_table_t	fts_table;
+	char		table_name[MAX_FULL_NAME_LEN];
 	doc_id_t	dummy = 0;
 	dberr_t		error = DB_SUCCESS;
 	ulint		n_elems = ib_vector_size(doc_ids);
@@ -3927,10 +4011,13 @@ fts_sync_add_deleted_cache(
 	FTS_INIT_FTS_TABLE(
 		&fts_table, "DELETED_CACHE", FTS_COMMON_TABLE, sync->table);
 
+	fts_get_table_name(&fts_table, table_name);
+	pars_info_bind_id(info, true, "table_name", table_name);
+
 	graph = fts_parse_sql(
 		&fts_table,
 		info,
-		"BEGIN INSERT INTO \"%s\" VALUES (:doc_id);");
+		"BEGIN INSERT INTO $table_name VALUES (:doc_id);");
 
 	for (i = 0; i < n_elems && error == DB_SUCCESS; ++i) {
 		fts_update_t*	update;
@@ -4057,11 +4144,8 @@ fts_sync_write_words(
 		n_nodes += ib_vector_size(word->nodes);
 
 		if (error != DB_SUCCESS && !print_error) {
-			ut_print_timestamp(stderr);
-			fprintf(stderr, "  InnoDB: Error (%s) writing "
-				"word node to FTS auxiliary index "
-				"table.\n", ut_strerr(error));
-
+			ib::error() << "(" << ut_strerr(error) << ") writing"
+				" word node to FTS auxiliary index table.";
 			print_error = TRUE;
 		}
 	}
@@ -4104,6 +4188,7 @@ fts_sync_write_doc_stat(
 	doc_id_t	doc_id;
 	dberr_t		error = DB_SUCCESS;
 	ib_uint32_t	word_count;
+	char		table_name[MAX_FULL_NAME_LEN];
 
 	if (*graph) {
 		info = (*graph)->info;
@@ -4126,10 +4211,15 @@ fts_sync_write_doc_stat(
 		FTS_INIT_INDEX_TABLE(
 			&fts_table, "DOC_ID", FTS_INDEX_TABLE, index);
 
+		fts_get_table_name(&fts_table, table_name);
+
+		pars_info_bind_id(info, true, "doc_id_table", table_name);
+
 		*graph = fts_parse_sql(
 			&fts_table,
 			info,
-			"BEGIN INSERT INTO \"%s\" VALUES (:doc_id, :count);");
+			"BEGIN"
+			" INSERT INTO $doc_id_table VALUES (:doc_id, :count);");
 	}
 
 	for (;;) {
@@ -4139,18 +4229,15 @@ fts_sync_write_doc_stat(
 
 			break;				/* Exit the loop. */
 		} else {
-			ut_print_timestamp(stderr);
 
 			if (error == DB_LOCK_WAIT_TIMEOUT) {
-				fprintf(stderr, "  InnoDB: Warning: lock wait "
-					"timeout writing to FTS doc_id. "
-					"Retrying!\n");
+				ib::warn() << "Lock wait timeout writing to"
+					" FTS doc_id. Retrying!";
 
 				trx->error_state = DB_SUCCESS;
 			} else {
-				fprintf(stderr, "  InnoDB: Error: (%s) "
-					"while writing to FTS doc_id.\n",
-					ut_strerr(error));
+				ib::error() << "(" << ut_strerr(error)
+					<< ") while writing to FTS doc_id.";
 
 				break;			/* Exit the loop. */
 			}
@@ -4251,6 +4338,7 @@ fts_is_word_in_index(
 {
 	pars_info_t*	info;
 	dberr_t		error;
+	char		table_name[MAX_FULL_NAME_LEN];
 
 	trx->op_info = "looking up word in FTS index";
 
@@ -4260,6 +4348,8 @@ fts_is_word_in_index(
 		info = pars_info_create();
 	}
 
+	fts_get_table_name(fts_table, table_name);
+	pars_info_bind_id(info, true, "table_name", table_name);
 	pars_info_bind_function(info, "my_func", fts_lookup_word, found);
 	pars_info_bind_varchar_literal(info, "word", word->f_str, word->f_len);
 
@@ -4270,8 +4360,8 @@ fts_is_word_in_index(
 			"DECLARE FUNCTION my_func;\n"
 			"DECLARE CURSOR c IS"
 			" SELECT doc_count\n"
-			" FROM \"%s\"\n"
-			" WHERE word = :word "
+			" FROM $table_name\n"
+			" WHERE word = :word"
 			" ORDER BY first_doc_id;\n"
 			"BEGIN\n"
 			"\n"
@@ -4292,18 +4382,15 @@ fts_is_word_in_index(
 
 			break;				/* Exit the loop. */
 		} else {
-			ut_print_timestamp(stderr);
 
 			if (error == DB_LOCK_WAIT_TIMEOUT) {
-				fprintf(stderr, "  InnoDB: Warning: lock wait "
-					"timeout reading FTS index. "
-					"Retrying!\n");
+				ib::warn() << "Lock wait timeout reading"
+					" FTS index. Retrying!";
 
 				trx->error_state = DB_SUCCESS;
 			} else {
-				fprintf(stderr, "  InnoDB: Error: (%s) "
-					"while reading FTS index.\n",
-					ut_strerr(error));
+				ib::error() << "(" << ut_strerr(error)
+					<< ") while reading FTS index.";
 
 				break;			/* Exit the loop. */
 			}
@@ -4332,12 +4419,10 @@ fts_sync_begin(
 	sync->trx = trx_allocate_for_background();
 
 	if (fts_enable_diag_print) {
-		ib_logf(IB_LOG_LEVEL_INFO,
-			"FTS SYNC for table %s, deleted count: %ld size: "
-			"%lu bytes",
-			sync->table->name,
-			ib_vector_size(cache->deleted_doc_ids),
-			cache->total_size);
+		ib::info() << "FTS SYNC for table " << sync->table->name
+			<< ", deleted count: "
+			<< ib_vector_size(cache->deleted_doc_ids)
+			<< " size: " << cache->total_size << " bytes";
 	}
 }
 
@@ -4358,13 +4443,12 @@ fts_sync_index(
 	trx->op_info = "doing SYNC index";
 
 	if (fts_enable_diag_print) {
-		ib_logf(IB_LOG_LEVEL_INFO,
-			"SYNC words: %ld", rbt_size(index_cache->words));
+		ib::info() << "SYNC words: " << rbt_size(index_cache->words);
 	}
 
 	ut_ad(rbt_validate(index_cache->words));
 
-	error = fts_sync_write_words(sync->trx, index_cache, sync->unlock_cache);
+	error = fts_sync_write_words(trx, index_cache, sync->unlock_cache);
 
 #ifdef FTS_DOC_STATS_DEBUG
 	/* FTS_RESOLVE: the word counter info in auxiliary table "DOC_ID"
@@ -4476,18 +4560,16 @@ fts_sync_commit(
 
 		fts_sql_rollback(trx);
 
-		ut_print_timestamp(stderr);
-		fprintf(stderr, "  InnoDB: Error: (%s) during SYNC.\n",
-			ut_strerr(error));
+		ib::error() << "(" << ut_strerr(error) << ") during SYNC.";
 	}
 
 	if (fts_enable_diag_print && elapsed_time) {
-		ib_logf(IB_LOG_LEVEL_INFO,
-			"SYNC for table %s: SYNC time : %lu secs: "
-			"elapsed %lf ins/sec",
-			sync->table->name,
-			(ulong) (ut_time() - sync->start_time),
-			(double) n_nodes/ (double) elapsed_time);
+		ib::info() << "SYNC for table " << sync->table->name
+			<< ": SYNC time: "
+			<< (ut_time() - sync->start_time)
+			<< " secs: elapsed "
+			<< (double) n_nodes / elapsed_time
+			<< " ins/sec";
 	}
 
 	/* Avoid assertion in trx_free(). */
@@ -4611,10 +4693,6 @@ begin_sync:
 		index_cache = static_cast<fts_index_cache_t*>(
 			ib_vector_get(cache->indexes, i));
 
-		if (index_cache->index->to_be_dropped) {
-			continue;
-		}
-
 		error = fts_sync_index(sync, index_cache);
 
 		if (error != DB_SUCCESS && !sync->interrupted) {
@@ -4647,7 +4725,7 @@ begin_sync:
 end_sync:
 	if (error == DB_SUCCESS && !sync->interrupted) {
 		error = fts_sync_commit(sync);
-	}  else {
+	} else {
 		fts_sync_rollback(sync);
 	}
 
@@ -4678,7 +4756,6 @@ FTS auxiliary INDEX table and clear the cache at the end.
 @param[in]	wait		whether wait for existing sync to finish
 @param[in]	has_dict	whether has dict operation lock
 @return DB_SUCCESS on success, error code on failure. */
-UNIV_INTERN
 dberr_t
 fts_sync_table(
 	dict_table_t*	table,
@@ -4690,7 +4767,8 @@ fts_sync_table(
 
 	ut_ad(table->fts);
 
-	if (!dict_table_is_discarded(table) && table->fts->cache) {
+	if (!dict_table_is_discarded(table) && table->fts->cache
+	    && !dict_table_is_corrupted(table)) {
 		err = fts_sync(table->fts->cache->sync,
 			       unlock_cache, wait, has_dict);
 	}
@@ -4698,6 +4776,175 @@ fts_sync_table(
 	return(err);
 }
 
+/** Check fts token
+1. for ngram token, check whether the token contains any words in stopwords
+2. for non-ngram token, check if it's stopword or less than fts_min_token_size
+or greater than fts_max_token_size.
+@param[in]	token		token string
+@param[in]	stopwords	stopwords rb tree
+@param[in]	is_ngram	is ngram parser
+@param[in]	cs		token charset
+@retval	true	if it is not stopword and length in range
+@retval	false	if it is stopword or lenght not in range */
+bool
+fts_check_token(
+	const fts_string_t*		token,
+	const ib_rbt_t*			stopwords,
+	bool				is_ngram,
+	const CHARSET_INFO*		cs)
+{
+	ut_ad(cs != NULL || stopwords == NULL);
+
+	if (!is_ngram) {
+		ib_rbt_bound_t  parent;
+
+		if (token->f_n_char < fts_min_token_size
+		    || token->f_n_char > fts_max_token_size
+		    || (stopwords != NULL
+			&& rbt_search(stopwords, &parent, token) == 0)) {
+			return(false);
+		} else {
+			return(true);
+		}
+	}
+
+	/* Check token for ngram. */
+	DBUG_EXECUTE_IF(
+		"fts_instrument_ignore_ngram_check",
+		return(true);
+	);
+
+	/* We ignore fts_min_token_size when ngram */
+	ut_ad(token->f_n_char > 0
+	      && token->f_n_char <= fts_max_token_size);
+
+	if (stopwords == NULL) {
+		return(true);
+	}
+
+	/*Ngram checks whether the token contains any words in stopwords.
+	We can't simply use CONTAIN to search in stopwords, because it's
+	built on COMPARE. So we need to tokenize the token into words
+	from unigram to f_n_char, and check them separately. */
+	for (ulint ngram_token_size = 1; ngram_token_size <= token->f_n_char;
+	     ngram_token_size ++) {
+		const char*	start;
+		const char*	next;
+		const char*	end;
+		ulint		char_len;
+		ulint		n_chars;
+
+		start = reinterpret_cast<char*>(token->f_str);
+		next = start;
+		end = start + token->f_len;
+		n_chars = 0;
+
+		while (next < end) {
+			char_len = my_charlen(cs, next, end);
+
+			if (next + char_len > end || char_len == 0) {
+				break;
+			} else {
+				/* Skip SPACE */
+				if (char_len == 1 && *next == ' ') {
+					start = next + 1;
+					next = start;
+					n_chars = 0;
+
+					continue;
+				}
+
+				next += char_len;
+				n_chars++;
+			}
+
+			if (n_chars == ngram_token_size) {
+				fts_string_t	ngram_token;
+				ngram_token.f_str =
+					reinterpret_cast<byte*>(
+					const_cast<char*>(start));
+				ngram_token.f_len = next - start;
+				ngram_token.f_n_char = ngram_token_size;
+
+				ib_rbt_bound_t  parent;
+				if (rbt_search(stopwords, &parent,
+					       &ngram_token) == 0) {
+					return(false);
+				}
+
+				/* Move a char forward */
+				start += my_charlen(cs, start, end);
+				n_chars = ngram_token_size - 1;
+			}
+		}
+	}
+
+	return(true);
+}
+
+/** Add the token and its start position to the token's list of positions.
+@param[in,out]	result_doc	result doc rb tree
+@param[in]	str		token string
+@param[in]	position	token position */
+static
+void
+fts_add_token(
+	fts_doc_t*	result_doc,
+	fts_string_t	str,
+	ulint		position)
+{
+	/* Ignore string whose character number is less than
+	"fts_min_token_size" or more than "fts_max_token_size" */
+
+	if (fts_check_token(&str, NULL, result_doc->is_ngram,
+			    result_doc->charset)) {
+
+		mem_heap_t*	heap;
+		fts_string_t	t_str;
+		fts_token_t*	token;
+		ib_rbt_bound_t	parent;
+		ulint		newlen;
+
+		heap = static_cast<mem_heap_t*>(result_doc->self_heap->arg);
+
+		t_str.f_n_char = str.f_n_char;
+
+		t_str.f_len = str.f_len * result_doc->charset->casedn_multiply + 1;
+
+		t_str.f_str = static_cast<byte*>(
+			mem_heap_alloc(heap, t_str.f_len));
+
+		newlen = innobase_fts_casedn_str(
+			result_doc->charset,
+			reinterpret_cast<char*>(str.f_str), str.f_len,
+			reinterpret_cast<char*>(t_str.f_str), t_str.f_len);
+
+		t_str.f_len = newlen;
+		t_str.f_str[newlen] = 0;
+
+		/* Add the word to the document statistics. If the word
+		hasn't been seen before we create a new entry for it. */
+		if (rbt_search(result_doc->tokens, &parent, &t_str) != 0) {
+			fts_token_t	new_token;
+
+			new_token.text.f_len = newlen;
+			new_token.text.f_str = t_str.f_str;
+			new_token.text.f_n_char = t_str.f_n_char;
+
+			new_token.positions = ib_vector_create(
+				result_doc->self_heap, sizeof(ulint), 32);
+
+			parent.last = rbt_add_node(
+				result_doc->tokens, &parent, &new_token);
+
+			ut_ad(rbt_validate(result_doc->tokens));
+		}
+
+		token = rbt_value(fts_token_t, parent.last);
+		ib_vector_push(token->positions, &position);
+	}
+}
+
 /********************************************************************
 Process next token from document starting at the given position, i.e., add
 the token's start position to the token's list of positions.
@@ -4716,107 +4963,216 @@ fts_process_token(
 {
 	ulint		ret;
 	fts_string_t	str;
-	ulint		offset = 0;
+	ulint		position;
 	fts_doc_t*	result_doc;
+	byte		buf[FTS_MAX_WORD_LEN + 1];
+
+	str.f_str = buf;
 
 	/* Determine where to save the result. */
-	result_doc = (result) ? result : doc;
+	result_doc = (result != NULL) ? result : doc;
 
 	/* The length of a string in characters is set here only. */
+
 	ret = innobase_mysql_fts_get_token(
 		doc->charset, doc->text.f_str + start_pos,
-		doc->text.f_str + doc->text.f_len, &str, &offset);
+		doc->text.f_str + doc->text.f_len, &str);
 
-	/* Ignore string whose character number is less than
-	"fts_min_token_size" or more than "fts_max_token_size" */
+	position = start_pos + ret - str.f_len + add_pos;
 
-	if (str.f_n_char >= fts_min_token_size
-	    && str.f_n_char <= fts_max_token_size) {
-
-		mem_heap_t*	heap;
-		fts_string_t	t_str;
-		fts_token_t*	token;
-		ib_rbt_bound_t	parent;
-		ulint		newlen;
-
-		heap = static_cast<mem_heap_t*>(result_doc->self_heap->arg);
-
-		t_str.f_n_char = str.f_n_char;
-
-		t_str.f_len = str.f_len * doc->charset->casedn_multiply + 1;
-
-		t_str.f_str = static_cast<byte*>(
-			mem_heap_alloc(heap, t_str.f_len));
-
-		newlen = innobase_fts_casedn_str(
-			doc->charset, (char*) str.f_str, str.f_len,
-			(char*) t_str.f_str, t_str.f_len);
-
-		t_str.f_len = newlen;
-		t_str.f_str[newlen] = 0;
-
-		/* Add the word to the document statistics. If the word
-		hasn't been seen before we create a new entry for it. */
-		if (rbt_search(result_doc->tokens, &parent, &t_str) != 0) {
-			fts_token_t	new_token;
-
-			new_token.text.f_len = newlen;
-			new_token.text.f_str = t_str.f_str;
-			new_token.text.f_n_char = t_str.f_n_char;
-
-			new_token.positions = ib_vector_create(
-				result_doc->self_heap, sizeof(ulint), 32);
-
-			ut_a(new_token.text.f_n_char >= fts_min_token_size);
-			ut_a(new_token.text.f_n_char <= fts_max_token_size);
-
-			parent.last = rbt_add_node(
-				result_doc->tokens, &parent, &new_token);
-
-			ut_ad(rbt_validate(result_doc->tokens));
-		}
-
-#ifdef	FTS_CHARSET_DEBUG
-		offset += start_pos + add_pos;
-#endif /* FTS_CHARSET_DEBUG */
-
-		offset += start_pos + ret - str.f_len + add_pos;
-
-		token = rbt_value(fts_token_t, parent.last);
-		ib_vector_push(token->positions, &offset);
-	}
+	fts_add_token(result_doc, str, position);
 
 	return(ret);
 }
 
+/*************************************************************//**
+Get token char size by charset
+@return token size */
+ulint
+fts_get_token_size(
+/*===============*/
+	const CHARSET_INFO*	cs,	/*!< in: Character set */
+	const char*		token,	/*!< in: token */
+	ulint			len)	/*!< in: token length */
+{
+	char*	start;
+	char*	end;
+	ulint	size = 0;
+
+	/* const_cast is for reinterpret_cast below, or it will fail. */
+	start = const_cast<char*>(token);
+	end = start + len;
+	while (start < end) {
+		int	ctype;
+		int	mbl;
+
+		mbl = cs->cset->ctype(
+			cs, &ctype,
+			reinterpret_cast<uchar*>(start),
+			reinterpret_cast<uchar*>(end));
+
+		size++;
+
+		start += mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1);
+	}
+
+	return(size);
+}
+
+/*************************************************************//**
+FTS plugin parser 'myql_parser' callback function for document tokenize.
+Refer to 'st_mysql_ftparser_param' for more detail.
+@return always returns 0 */
+int
+fts_tokenize_document_internal(
+/*===========================*/
+	MYSQL_FTPARSER_PARAM*	param,	/*!< in: parser parameter */
+	const char*		doc,/*!< in/out: document */
+	int			len)	/*!< in: document length */
+{
+	fts_string_t	str;
+	byte		buf[FTS_MAX_WORD_LEN + 1];
+	/* JAN: TODO: MySQL 5.7
+	MYSQL_FTPARSER_BOOLEAN_INFO bool_info =
+		{ FT_TOKEN_WORD, 0, 0, 0, 0, 0, ' ', 0 };
+	*/
+	MYSQL_FTPARSER_BOOLEAN_INFO bool_info =
+		{ FT_TOKEN_WORD, 0, 0, 0, 0, ' ', 0};
+
+	ut_ad(len >= 0);
+
+	str.f_str = buf;
+
+	for (ulint i = 0, inc = 0; i < static_cast<ulint>(len); i += inc) {
+		inc = innobase_mysql_fts_get_token(
+			const_cast<CHARSET_INFO*>(param->cs),
+			(uchar*)(doc) + i,
+			(uchar*)(doc) + len,
+			&str);
+
+		if (str.f_len > 0) {
+			/* JAN: TODO: MySQL 5.7
+			bool_info.position =
+				static_cast<int>(i + inc - str.f_len);
+			ut_ad(bool_info.position >= 0);
+			*/
+
+			/* Stop when add word fails */
+			if (param->mysql_add_word(
+				param,
+				reinterpret_cast<char*>(str.f_str),
+				static_cast<int>(str.f_len),
+				&bool_info)) {
+				break;
+			}
+		}
+	}
+
+	return(0);
+}
+
+/******************************************************************//**
+FTS plugin parser 'myql_add_word' callback function for document tokenize.
+Refer to 'st_mysql_ftparser_param' for more detail.
+@return always returns 0 */
+static
+int
+fts_tokenize_add_word_for_parser(
+/*=============================*/
+	MYSQL_FTPARSER_PARAM*	param,		/* in: parser paramter */
+	const char*			word,		/* in: token word */
+	int			word_len,	/* in: word len */
+	MYSQL_FTPARSER_BOOLEAN_INFO* boolean_info) /* in: word boolean info */
+{
+	fts_string_t	str;
+	fts_tokenize_param_t*	fts_param;
+	fts_doc_t*	result_doc;
+	ulint		position;
+
+	fts_param = static_cast<fts_tokenize_param_t*>(param->mysql_ftparam);
+	result_doc = fts_param->result_doc;
+	ut_ad(result_doc != NULL);
+
+	str.f_str = (byte*)(word);
+	str.f_len = word_len;
+	str.f_n_char = fts_get_token_size(
+		const_cast<CHARSET_INFO*>(param->cs), word, word_len);
+
+	/* JAN: TODO: MySQL 5.7 FTS
+	ut_ad(boolean_info->position >= 0);
+	position = boolean_info->position + fts_param->add_pos;
+	*/
+	position = fts_param->add_pos;
+
+	fts_add_token(result_doc, str, position);
+
+	return(0);
+}
+
+/******************************************************************//**
+Parse a document using an external / user supplied parser */
+static
+void
+fts_tokenize_by_parser(
+/*===================*/
+	fts_doc_t*		doc,	/* in/out: document to tokenize */
+	st_mysql_ftparser*	parser, /* in: plugin fts parser */
+	fts_tokenize_param_t*	fts_param) /* in: fts tokenize param */
+{
+	MYSQL_FTPARSER_PARAM	param;
+
+	ut_a(parser);
+
+	/* Set paramters for param */
+	param.mysql_parse = fts_tokenize_document_internal;
+	param.mysql_add_word = fts_tokenize_add_word_for_parser;
+	param.mysql_ftparam = fts_param;
+	param.cs = doc->charset;
+	param.doc = reinterpret_cast<char*>(doc->text.f_str);
+	param.length = static_cast<int>(doc->text.f_len);
+	param.mode= MYSQL_FTPARSER_SIMPLE_MODE;
+
+	PARSER_INIT(parser, &param);
+	parser->parse(&param);
+	PARSER_DEINIT(parser, &param);
+}
+
 /******************************************************************//**
 Tokenize a document. */
-UNIV_INTERN
 void
 fts_tokenize_document(
 /*==================*/
 	fts_doc_t*	doc,		/* in/out: document to
 					tokenize */
-	fts_doc_t*	result)		/* out: if provided, save
+	fts_doc_t*	result,		/* out: if provided, save
 					the result token here */
+	st_mysql_ftparser*	parser) /* in: plugin fts parser */
 {
-	ulint		inc;
-
 	ut_a(!doc->tokens);
 	ut_a(doc->charset);
 
 	doc->tokens = rbt_create_arg_cmp(
-                                         sizeof(fts_token_t), innobase_fts_text_cmp, (void*) doc->charset);
+		sizeof(fts_token_t), innobase_fts_text_cmp, (void*) doc->charset);
 
-	for (ulint i = 0; i < doc->text.f_len; i += inc) {
-		inc = fts_process_token(doc, result, i, 0);
-		ut_a(inc > 0);
+	if (parser != NULL) {
+		fts_tokenize_param_t	fts_param;
+
+		fts_param.result_doc = (result != NULL) ? result : doc;
+		fts_param.add_pos = 0;
+
+		fts_tokenize_by_parser(doc, parser, &fts_param);
+	} else {
+		ulint		inc;
+
+		for (ulint i = 0; i < doc->text.f_len; i += inc) {
+			inc = fts_process_token(doc, result, i, 0);
+			ut_a(inc > 0);
+		}
 	}
 }
 
 /******************************************************************//**
 Continue to tokenize a document. */
-UNIV_INTERN
 void
 fts_tokenize_document_next(
 /*=======================*/
@@ -4824,22 +5180,31 @@ fts_tokenize_document_next(
 					tokenize */
 	ulint		add_pos,	/*!< in: add this position to all
 					tokens from this tokenization */
-	fts_doc_t*	result)		/*!< out: if provided, save
+	fts_doc_t*	result,		/*!< out: if provided, save
 					the result token here */
+	st_mysql_ftparser*	parser) /* in: plugin fts parser */
 {
-	ulint		inc;
-
 	ut_a(doc->tokens);
 
-	for (ulint i = 0; i < doc->text.f_len; i += inc) {
-		inc = fts_process_token(doc, result, i, add_pos);
-		ut_a(inc > 0);
+	if (parser) {
+		fts_tokenize_param_t	fts_param;
+
+		fts_param.result_doc = (result != NULL) ? result : doc;
+		fts_param.add_pos = add_pos;
+
+		fts_tokenize_by_parser(doc, parser, &fts_param);
+	} else {
+		ulint		inc;
+
+		for (ulint i = 0; i < doc->text.f_len; i += inc) {
+			inc = fts_process_token(doc, result, i, add_pos);
+			ut_a(inc > 0);
+		}
 	}
 }
 
 /********************************************************************
 Create the vector of fts_get_doc_t instances. */
-UNIV_INTERN
 ib_vector_t*
 fts_get_docs_create(
 /*================*/
@@ -4847,19 +5212,16 @@ fts_get_docs_create(
 						fts_get_doc_t instances */
 	fts_cache_t*	cache)			/*!< in: fts cache */
 {
-	ulint		i;
 	ib_vector_t*	get_docs;
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_EX));
-#endif
+	ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_X));
+
 	/* We need one instance of fts_get_doc_t per index. */
-	get_docs = ib_vector_create(
-		cache->self_heap, sizeof(fts_get_doc_t), 4);
+	get_docs = ib_vector_create(cache->self_heap, sizeof(fts_get_doc_t), 4);
 
 	/* Create the get_doc instance, we need one of these
 	per FTS index. */
-	for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
+	for (ulint i = 0; i < ib_vector_size(cache->indexes); ++i) {
 
 		dict_index_t**	index;
 		fts_get_doc_t*	get_doc;
@@ -4911,7 +5273,6 @@ fts_get_docs_clear(
 /*********************************************************************//**
 Get the initial Doc ID by consulting the CONFIG table
 @return initial Doc ID */
-UNIV_INTERN
 doc_id_t
 fts_init_doc_id(
 /*============*/
@@ -4986,7 +5347,6 @@ fts_is_index_updated(
 /*********************************************************************//**
 Fetch COUNT(*) from specified table.
 @return the number of rows in the table */
-UNIV_INTERN
 ulint
 fts_get_rows_count(
 /*===============*/
@@ -4997,6 +5357,7 @@ fts_get_rows_count(
 	que_t*		graph;
 	dberr_t		error;
 	ulint		count = 0;
+	char		table_name[MAX_FULL_NAME_LEN];
 
 	trx = trx_allocate_for_background();
 
@@ -5006,13 +5367,16 @@ fts_get_rows_count(
 
 	pars_info_bind_function(info, "my_func", fts_read_ulint, &count);
 
+	fts_get_table_name(fts_table, table_name);
+	pars_info_bind_id(info, true, "table_name", table_name);
+
 	graph = fts_parse_sql(
 		fts_table,
 		info,
 		"DECLARE FUNCTION my_func;\n"
 		"DECLARE CURSOR c IS"
-		" SELECT COUNT(*) "
-		" FROM \"%s\";\n"
+		" SELECT COUNT(*)"
+		" FROM $table_name;\n"
 		"BEGIN\n"
 		"\n"
 		"OPEN c;\n"
@@ -5034,18 +5398,14 @@ fts_get_rows_count(
 		} else {
 			fts_sql_rollback(trx);
 
-			ut_print_timestamp(stderr);
-
 			if (error == DB_LOCK_WAIT_TIMEOUT) {
-				fprintf(stderr, "  InnoDB: Warning: lock wait "
-					"timeout reading FTS table. "
-					"Retrying!\n");
+				ib::warn() << "lock wait timeout reading"
+					" FTS table. Retrying!";
 
 				trx->error_state = DB_SUCCESS;
 			} else {
-				fprintf(stderr, "  InnoDB: Error: (%s) "
-					"while reading FTS table.\n",
-					ut_strerr(error));
+				ib::error() << "(" << ut_strerr(error)
+					<< ") while reading FTS table.";
 
 				break;			/* Exit the loop. */
 			}
@@ -5166,7 +5526,6 @@ fts_savepoint_free(
 
 /*********************************************************************//**
 Free an FTS trx. */
-UNIV_INTERN
 void
 fts_trx_free(
 /*=========*/
@@ -5210,7 +5569,6 @@ fts_trx_free(
 /*********************************************************************//**
 Extract the doc id from the FTS hidden column.
 @return doc id that was extracted from rec */
-UNIV_INTERN
 doc_id_t
 fts_get_doc_id_from_row(
 /*====================*/
@@ -5234,37 +5592,37 @@ fts_get_doc_id_from_row(
 	return(doc_id);
 }
 
-/*********************************************************************//**
-Extract the doc id from the FTS hidden column.
+/** Extract the doc id from the record that belongs to index.
+@param[in]	table	table
+@param[in]	rec	record contains FTS_DOC_ID
+@param[in]	index	index of rec
+@param[in]	heap	heap memory
 @return doc id that was extracted from rec */
-UNIV_INTERN
 doc_id_t
 fts_get_doc_id_from_rec(
-/*====================*/
-	dict_table_t*	table,			/*!< in: table */
-	const rec_t*	rec,			/*!< in: rec */
-	mem_heap_t*	heap)			/*!< in: heap */
+	dict_table_t*		table,
+	const rec_t*		rec,
+	const dict_index_t*	index,
+	mem_heap_t*		heap)
 {
 	ulint		len;
 	const byte*	data;
 	ulint		col_no;
 	doc_id_t	doc_id = 0;
-	dict_index_t*	clust_index;
 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
 	ulint*		offsets = offsets_;
 	mem_heap_t*	my_heap = heap;
 
 	ut_a(table->fts->doc_col != ULINT_UNDEFINED);
 
-	clust_index = dict_table_get_first_index(table);
-
 	rec_offs_init(offsets_);
 
 	offsets = rec_get_offsets(
-		rec, clust_index, offsets, ULINT_UNDEFINED, &my_heap);
+		rec, index, offsets, ULINT_UNDEFINED, &my_heap);
+
+	col_no = dict_col_get_index_pos(
+		&table->cols[table->fts->doc_col], index);
 
-	col_no = dict_col_get_clust_pos(
-		&table->cols[table->fts->doc_col], clust_index);
 	ut_ad(col_no != ULINT_UNDEFINED);
 
 	data = rec_get_nth_field(rec, offsets, col_no, &len);
@@ -5283,7 +5641,6 @@ fts_get_doc_id_from_rec(
 /*********************************************************************//**
 Search the index specific cache for a particular FTS index.
 @return the index specific cache else NULL */
-UNIV_INTERN
 fts_index_cache_t*
 fts_find_index_cache(
 /*=================*/
@@ -5299,7 +5656,6 @@ fts_find_index_cache(
 /*********************************************************************//**
 Search cache for word.
 @return the word node vector if found else NULL */
-UNIV_INTERN
 const ib_vector_t*
 fts_cache_find_word(
 /*================*/
@@ -5308,12 +5664,12 @@ fts_cache_find_word(
 {
 	ib_rbt_bound_t		parent;
 	const ib_vector_t*	nodes = NULL;
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
 	dict_table_t*		table = index_cache->index->table;
 	fts_cache_t*		cache = table->fts->cache;
 
-	ut_ad(rw_lock_own((rw_lock_t*) &cache->lock, RW_LOCK_EX));
-#endif
+	ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
+#endif /* UNIV_DEBUG */
 
 	/* Lookup the word in the rb tree */
 	if (rbt_search(index_cache->words, &parent, text) == 0) {
@@ -5330,20 +5686,15 @@ fts_cache_find_word(
 /*********************************************************************//**
 Check cache for deleted doc id.
 @return TRUE if deleted */
-UNIV_INTERN
 ibool
 fts_cache_is_deleted_doc_id(
 /*========================*/
 	const fts_cache_t*	cache,		/*!< in: cache ito search */
 	doc_id_t		doc_id)		/*!< in: doc id to search for */
 {
-	ulint			i;
-
-#ifdef UNIV_SYNC_DEBUG
 	ut_ad(mutex_own(&cache->deleted_lock));
-#endif
 
-	for (i = 0; i < ib_vector_size(cache->deleted_doc_ids); ++i) {
+	for (ulint i = 0; i < ib_vector_size(cache->deleted_doc_ids); ++i) {
 		const fts_update_t*	update;
 
 		update = static_cast<const fts_update_t*>(
@@ -5360,16 +5711,13 @@ fts_cache_is_deleted_doc_id(
 
 /*********************************************************************//**
 Append deleted doc ids to vector. */
-UNIV_INTERN
 void
 fts_cache_append_deleted_doc_ids(
 /*=============================*/
 	const fts_cache_t*	cache,		/*!< in: cache to use */
 	ib_vector_t*		vector)		/*!< in: append to this vector */
 {
-	ulint			i;
-
-	mutex_enter((ib_mutex_t*) &cache->deleted_lock);
+	mutex_enter(const_cast<ib_mutex_t*>(&cache->deleted_lock));
 
 	if (cache->deleted_doc_ids == NULL) {
 		mutex_exit((ib_mutex_t*) &cache->deleted_lock);
@@ -5377,7 +5725,7 @@ fts_cache_append_deleted_doc_ids(
 	}
 
 
-	for (i = 0; i < ib_vector_size(cache->deleted_doc_ids); ++i) {
+	for (ulint i = 0; i < ib_vector_size(cache->deleted_doc_ids); ++i) {
 		fts_update_t*	update;
 
 		update = static_cast<fts_update_t*>(
@@ -5394,7 +5742,6 @@ Wait for the background thread to start. We poll to detect change
 of state, which is acceptable, since the wait should happen only
 once during startup.
 @return true if the thread started else FALSE (i.e timed out) */
-UNIV_INTERN
 ibool
 fts_wait_for_background_thread_to_start(
 /*====================================*/
@@ -5440,10 +5787,9 @@ fts_wait_for_background_thread_to_start(
 		}
 
 		if (count >= FTS_BACKGROUND_THREAD_WAIT_COUNT) {
-			ut_print_timestamp(stderr);
-			fprintf(stderr, " InnoDB: Error the background thread "
-				"for the FTS table %s refuses to start\n",
-				table->name);
+			ib::error() << "The background thread for the FTS"
+				" table " << table->name
+				<< " refuses to start";
 
 			count = 0;
 		}
@@ -5454,7 +5800,6 @@ fts_wait_for_background_thread_to_start(
 
 /*********************************************************************//**
 Add the FTS document id hidden column. */
-UNIV_INTERN
 void
 fts_add_doc_id_column(
 /*==================*/
@@ -5472,16 +5817,23 @@ fts_add_doc_id_column(
 	DICT_TF2_FLAG_SET(table, DICT_TF2_FTS_HAS_DOC_ID);
 }
 
-/*********************************************************************//**
-Update the query graph with a new document id.
-@return Doc ID used */
-UNIV_INTERN
+/** Add new fts doc id to the update vector.
+@param[in]	table		the table that contains the FTS index.
+@param[in,out]	ufield		the fts doc id field in the update vector.
+				No new memory is allocated for this in this
+				function.
+@param[in,out]	next_doc_id	the fts doc id that has been added to the
+				update vector.  If 0, a new fts doc id is
+				automatically generated.  The memory provided
+				for this argument will be used by the update
+				vector. Ensure that the life time of this
+				memory matches that of the update vector.
+@return the fts doc id used in the update vector */
 doc_id_t
 fts_update_doc_id(
-/*==============*/
-	dict_table_t*	table,		/*!< in: table */
-	upd_field_t*	ufield,		/*!< out: update node */
-	doc_id_t*	next_doc_id)	/*!< in/out: buffer for writing */
+	dict_table_t*	table,
+	upd_field_t*	ufield,
+	doc_id_t*	next_doc_id)
 {
 	doc_id_t	doc_id;
 	dberr_t		error = DB_SUCCESS;
@@ -5495,6 +5847,8 @@ fts_update_doc_id(
 
 	if (error == DB_SUCCESS) {
 		dict_index_t*	clust_index;
+		dict_col_t*	col = dict_table_get_nth_col(
+			table, table->fts->doc_col);
 
 		ufield->exp = NULL;
 
@@ -5502,8 +5856,8 @@ fts_update_doc_id(
 
 		clust_index = dict_table_get_first_index(table);
 
-		ufield->field_no = dict_col_get_clust_pos(
-			&table->cols[table->fts->doc_col], clust_index);
+		ufield->field_no = dict_col_get_clust_pos(col, clust_index);
+		dict_col_copy_type(col, dfield_get_type(&ufield->new_val));
 
 		/* It is possible we update record that has
 		not yet be sync-ed from last crash. */
@@ -5513,6 +5867,7 @@ fts_update_doc_id(
 		fts_write_doc_id((byte*) next_doc_id, doc_id);
 
 		ufield->new_val.data = next_doc_id;
+                ufield->new_val.ext = 0;
 	}
 
 	return(doc_id);
@@ -5522,7 +5877,6 @@ fts_update_doc_id(
 Check if the table has an FTS index. This is the non-inline version
 of dict_table_has_fts_index().
 @return TRUE if table has an FTS index */
-UNIV_INTERN
 ibool
 fts_dict_table_has_fts_index(
 /*=========================*/
@@ -5531,61 +5885,78 @@ fts_dict_table_has_fts_index(
 	return(dict_table_has_fts_index(table));
 }
 
+/** fts_t constructor.
+@param[in]	table	table with FTS indexes
+@param[in,out]	heap	memory heap where 'this' is stored */
+fts_t::fts_t(
+	const dict_table_t*	table,
+	mem_heap_t*		heap)
+	:
+	bg_threads(0),
+	fts_status(0),
+	add_wq(NULL),
+	cache(NULL),
+	doc_col(ULINT_UNDEFINED),
+	fts_heap(heap)
+{
+	ut_a(table->fts == NULL);
+
+	mutex_create(LATCH_ID_FTS_BG_THREADS, &bg_threads_mutex);
+
+	ib_alloc_t*	heap_alloc = ib_heap_allocator_create(fts_heap);
+
+	indexes = ib_vector_create(heap_alloc, sizeof(dict_index_t*), 4);
+
+	dict_table_get_all_fts_indexes(table, indexes);
+}
+
+/** fts_t destructor. */
+fts_t::~fts_t()
+{
+	mutex_free(&bg_threads_mutex);
+
+	ut_ad(add_wq == NULL);
+
+	if (cache != NULL) {
+		fts_cache_clear(cache);
+		fts_cache_destroy(cache);
+		cache = NULL;
+	}
+
+	/* There is no need to call ib_vector_free() on this->indexes
+	because it is stored in this->fts_heap. */
+}
+
 /*********************************************************************//**
 Create an instance of fts_t.
 @return instance of fts_t */
-UNIV_INTERN
 fts_t*
 fts_create(
 /*=======*/
 	dict_table_t*	table)		/*!< in/out: table with FTS indexes */
 {
 	fts_t*		fts;
-	ib_alloc_t*	heap_alloc;
 	mem_heap_t*	heap;
 
-	ut_a(!table->fts);
-
 	heap = mem_heap_create(512);
 
 	fts = static_cast<fts_t*>(mem_heap_alloc(heap, sizeof(*fts)));
 
-	memset(fts, 0x0, sizeof(*fts));
-
-	fts->fts_heap = heap;
-
-	fts->doc_col = ULINT_UNDEFINED;
-
-	mutex_create(
-		fts_bg_threads_mutex_key, &fts->bg_threads_mutex,
-		SYNC_FTS_BG_THREADS);
-
-	heap_alloc = ib_heap_allocator_create(heap);
-	fts->indexes = ib_vector_create(heap_alloc, sizeof(dict_index_t*), 4);
-	dict_table_get_all_fts_indexes(table, fts->indexes);
+	new(fts) fts_t(table, heap);
 
 	return(fts);
 }
 
 /*********************************************************************//**
 Free the FTS resources. */
-UNIV_INTERN
 void
 fts_free(
 /*=====*/
 	dict_table_t*	table)	/*!< in/out: table with FTS indexes */
 {
-	fts_t*		fts = table->fts;
+	fts_t*	fts = table->fts;
 
-	mutex_free(&fts->bg_threads_mutex);
-
-	ut_ad(!fts->add_wq);
-
-	if (fts->cache) {
-		fts_cache_clear(fts->cache);
-		fts_cache_destroy(fts->cache);
-		fts->cache = NULL;
-	}
+	fts->~fts_t();
 
 	mem_heap_free(fts->fts_heap);
 
@@ -5594,7 +5965,6 @@ fts_free(
 
 /*********************************************************************//**
 Signal FTS threads to initiate shutdown. */
-UNIV_INTERN
 void
 fts_start_shutdown(
 /*===============*/
@@ -5612,7 +5982,6 @@ fts_start_shutdown(
 
 /*********************************************************************//**
 Wait for FTS threads to shutdown. */
-UNIV_INTERN
 void
 fts_shutdown(
 /*=========*/
@@ -5657,7 +6026,6 @@ fts_savepoint_copy(
 
 /*********************************************************************//**
 Take a FTS savepoint. */
-UNIV_INTERN
 void
 fts_savepoint_take(
 /*===============*/
@@ -5717,7 +6085,6 @@ fts_savepoint_lookup(
 Release the savepoint data identified by  name. All savepoints created
 after the named savepoint are kept.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 void
 fts_savepoint_release(
 /*==================*/
@@ -5760,7 +6127,6 @@ fts_savepoint_release(
 
 /**********************************************************************//**
 Refresh last statement savepoint. */
-UNIV_INTERN
 void
 fts_savepoint_laststmt_refresh(
 /*===========================*/
@@ -5836,7 +6202,6 @@ fts_undo_last_stmt(
 /**********************************************************************//**
 Rollback to savepoint indentified by name.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 void
 fts_savepoint_rollback_last_stmt(
 /*=============================*/
@@ -5886,7 +6251,6 @@ fts_savepoint_rollback_last_stmt(
 /**********************************************************************//**
 Rollback to savepoint indentified by name.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 void
 fts_savepoint_rollback(
 /*===================*/
@@ -5947,16 +6311,17 @@ fts_savepoint_rollback(
 	}
 }
 
-/**********************************************************************//**
-Check if a table is an FTS auxiliary table name.
-@return TRUE if the name matches an auxiliary table name pattern */
+/** Check if a table is an FTS auxiliary table name.
+@param[out]	table	FTS table info
+@param[in]	name	Table name
+@param[in]	len	Length of table name
+@return true if the name matches an auxiliary table name pattern */
 static
-ibool
+bool
 fts_is_aux_table_name(
-/*==================*/
-	fts_aux_table_t*table,		/*!< out: table info */
-	const char*	name,		/*!< in: table name */
-	ulint		len)		/*!< in: length of table name */
+	fts_aux_table_t*	table,
+	const char*		name,
+	ulint			len)
 {
 	const char*	ptr;
 	char*		end;
@@ -5986,14 +6351,14 @@ fts_is_aux_table_name(
 
 		/* Try and read the table id. */
 		if (!fts_read_object_id(&table->parent_id, ptr)) {
-			return(FALSE);
+			return(false);
 		}
 
 		/* Skip the table id. */
 		ptr = static_cast<const char*>(memchr(ptr, '_', len));
 
 		if (ptr == NULL) {
-			return(FALSE);
+			return(false);
 		}
 
 		/* Skip the underscore. */
@@ -6005,7 +6370,7 @@ fts_is_aux_table_name(
 		for (i = 0; fts_common_tables[i] != NULL; ++i) {
 
 			if (strncmp(ptr, fts_common_tables[i], len) == 0) {
-				return(TRUE);
+				return(true);
 			}
 		}
 
@@ -6017,14 +6382,14 @@ fts_is_aux_table_name(
 
 		/* Try and read the index id. */
 		if (!fts_read_object_id(&table->index_id, ptr)) {
-			return(FALSE);
+			return(false);
 		}
 
 		/* Skip the table id. */
 		ptr = static_cast<const char*>(memchr(ptr, '_', len));
 
 		if (ptr == NULL) {
-			return(FALSE);
+			return(false);
 		}
 
 		/* Skip the underscore. */
@@ -6033,20 +6398,20 @@ fts_is_aux_table_name(
 		len = end - ptr;
 
 		/* Search the FT index specific array. */
-		for (i = 0; fts_index_selector[i].value; ++i) {
+		for (i = 0; i < FTS_NUM_AUX_INDEX; ++i) {
 
 			if (strncmp(ptr, fts_get_suffix(i), len) == 0) {
-				return(TRUE);
+				return(true);
 			}
 		}
 
 		/* Other FT index specific table(s). */
 		if (strncmp(ptr, "DOC_ID", len) == 0) {
-			return(TRUE);
+			return(true);
 		}
 	}
 
-	return(FALSE);
+	return(false);
 }
 
 /**********************************************************************//**
@@ -6150,7 +6515,6 @@ fts_set_hex_format(
 /*****************************************************************//**
 Update the DICT_TF2_FTS_AUX_HEX_NAME flag in SYS_TABLES.
 @return DB_SUCCESS or error code. */
-UNIV_INTERN
 dberr_t
 fts_update_hex_format_flag(
 /*=======================*/
@@ -6169,8 +6533,8 @@ fts_update_hex_format_flag(
 		"PROCEDURE UPDATE_HEX_FORMAT_FLAG() IS\n"
 		"DECLARE FUNCTION my_func;\n"
 		"DECLARE CURSOR c IS\n"
-		" SELECT MIX_LEN "
-		" FROM SYS_TABLES "
+		" SELECT MIX_LEN"
+		" FROM SYS_TABLES"
 		" WHERE ID = :table_id FOR UPDATE;"
 		"\n"
 		"BEGIN\n"
@@ -6205,7 +6569,7 @@ fts_update_hex_format_flag(
 
 	ut_a(flags2 != ULINT32_UNDEFINED);
 
-	return (err);
+	return(err);
 }
 
 /*********************************************************************//**
@@ -6221,7 +6585,7 @@ fts_rename_one_aux_table_to_hex_format(
 {
 	const char*     ptr;
 	fts_table_t	fts_table;
-	char*		new_name;
+	char		new_name[MAX_FULL_NAME_LEN];
 	dberr_t		error;
 
 	ptr = strchr(aux_table->name, '/');
@@ -6262,12 +6626,12 @@ fts_rename_one_aux_table_to_hex_format(
 
 	ut_a(fts_table.suffix != NULL);
 
-	fts_table.parent = parent_table->name;
+	fts_table.parent = parent_table->name.m_name;
 	fts_table.table_id = aux_table->parent_id;
 	fts_table.index_id = aux_table->index_id;
 	fts_table.table = parent_table;
 
-	new_name = fts_get_table_name(&fts_table);
+	fts_get_table_name(&fts_table, new_name);
 	ut_ad(strcmp(new_name, aux_table->name) != 0);
 
 	if (trx_get_dict_operation(trx) == TRX_DICT_OP_NONE) {
@@ -6278,19 +6642,15 @@ fts_rename_one_aux_table_to_hex_format(
 					   FALSE);
 
 	if (error != DB_SUCCESS) {
-		ib_logf(IB_LOG_LEVEL_WARN,
-			"Failed to rename aux table \'%s\' to "
-			"new format \'%s\'. ",
-			aux_table->name, new_name);
+		ib::warn() << "Failed to rename aux table '"
+			<< aux_table->name << "' to new format '"
+			<< new_name << "'.";
 	} else {
-		ib_logf(IB_LOG_LEVEL_INFO,
-			"Renamed aux table \'%s\' to \'%s\'.",
-			aux_table->name, new_name);
+		ib::info() << "Renamed aux table '" << aux_table->name
+			<< "' to '" << new_name << "'.";
 	}
 
-	mem_free(new_name);
-
-	return (error);
+	return(error);
 }
 
 /**********************************************************************//**
@@ -6319,12 +6679,10 @@ fts_rename_aux_tables_to_hex_format_low(
 	error = fts_update_hex_format_flag(trx, parent_table->id, true);
 
 	if (error != DB_SUCCESS) {
-		ib_logf(IB_LOG_LEVEL_WARN,
-			"Setting parent table %s to hex format failed.",
-			parent_table->name);
-
+		ib::warn() << "Setting parent table " << parent_table->name
+			<< " to hex format failed.";
 		fts_sql_rollback(trx);
-		return (error);
+		return(error);
 	}
 
 	DICT_TF2_FLAG_SET(parent_table, DICT_TF2_FTS_AUX_HEX_NAME);
@@ -6355,10 +6713,9 @@ fts_rename_aux_tables_to_hex_format_low(
 		if (error != DB_SUCCESS) {
 			dict_table_close(table, TRUE, FALSE);
 
-			ib_logf(IB_LOG_LEVEL_WARN,
-				"Failed to rename one aux table %s "
-				"Will revert all successful rename "
-				"operations.", aux_table->name);
+			ib::warn() << "Failed to rename one aux table "
+				<< aux_table->name << ". Will revert"
+				" all successful rename operations.";
 
 			fts_sql_rollback(trx);
 			break;
@@ -6368,9 +6725,8 @@ fts_rename_aux_tables_to_hex_format_low(
 		dict_table_close(table, TRUE, FALSE);
 
 		if (error != DB_SUCCESS) {
-			ib_logf(IB_LOG_LEVEL_WARN,
-				"Setting aux table %s to hex format failed.",
-				aux_table->name);
+			ib::warn() << "Setting aux table " << aux_table->name
+				<< " to hex format failed.";
 
 			fts_sql_rollback(trx);
 			break;
@@ -6379,10 +6735,13 @@ fts_rename_aux_tables_to_hex_format_low(
 
 	if (error != DB_SUCCESS) {
 		ut_ad(count != ib_vector_size(tables));
+
 		/* If rename fails, thr trx would be rolled back, we can't
 		use it any more, we'll start a new background trx to do
 		the reverting. */
-		ut_a(trx->state == TRX_STATE_NOT_STARTED);
+
+		ut_ad(!trx_is_started(trx));
+
 		bool not_rename = false;
 
 		/* Try to revert those succesful rename operations
@@ -6417,7 +6776,7 @@ fts_rename_aux_tables_to_hex_format_low(
 			trx_start_for_ddl(trx_bg, TRX_DICT_OP_TABLE);
 
 			DICT_TF2_FLAG_UNSET(table, DICT_TF2_FTS_AUX_HEX_NAME);
-			err = row_rename_table_for_mysql(table->name,
+			err = row_rename_table_for_mysql(table->name.m_name,
 							 aux_table->name,
 							 trx_bg, FALSE);
 
@@ -6425,9 +6784,9 @@ fts_rename_aux_tables_to_hex_format_low(
 			dict_table_close(table, TRUE, FALSE);
 
 			if (err != DB_SUCCESS) {
-				ib_logf(IB_LOG_LEVEL_WARN, "Failed to revert "
-					"table %s. Please revert manually.",
-					table->name);
+				ib::warn() << "Failed to revert table "
+					<< table->name << ". Please revert"
+					" manually.";
 				fts_sql_rollback(trx_bg);
 				trx_free_for_background(trx_bg);
 				/* Continue to clear aux tables' flags2 */
@@ -6442,7 +6801,7 @@ fts_rename_aux_tables_to_hex_format_low(
 		DICT_TF2_FLAG_UNSET(parent_table, DICT_TF2_FTS_AUX_HEX_NAME);
 	}
 
-	return (error);
+	return(error);
 }
 
 /**********************************************************************//**
@@ -6456,15 +6815,16 @@ fts_fake_hex_to_dec(
 {
 	ib_id_t		dec_id = 0;
 	char		tmp_id[FTS_AUX_MIN_TABLE_ID_LENGTH];
-	int		ret MY_ATTRIBUTE((unused));
 
-	ret = sprintf(tmp_id, UINT64PFx, id);
+#ifdef UNIV_DEBUG
+	int		ret =
+#endif /* UNIV_DEBUG */
+	sprintf(tmp_id, UINT64PFx, id);
 	ut_ad(ret == 16);
-#ifdef _WIN32
-	ret = sscanf(tmp_id, "%016llu", &dec_id);
-#else
-	ret = sscanf(tmp_id, "%016" PRIu64, &dec_id);
-#endif /* _WIN32 */
+#ifdef UNIV_DEBUG
+	ret =
+#endif /* UNIV_DEBUG */
+	sscanf(tmp_id, "%016" UINT64scan, &dec_id);
 	ut_ad(ret == 1);
 
 	return dec_id;
@@ -6529,7 +6889,7 @@ fts_set_index_corrupt(
 	}
 
 	for (ulint j = 0; j < ib_vector_size(fts->indexes); j++) {
-		dict_index_t*   index = static_cast<dict_index_t*>(
+		dict_index_t*	index = static_cast<dict_index_t*>(
 			ib_vector_getp_const(fts->indexes, j));
 		if (index->id == id) {
 			dict_set_corrupted(index, trx,
@@ -6634,12 +6994,10 @@ fts_rename_aux_tables_to_hex_format(
 
 	if (err != DB_SUCCESS) {
 
-		ib_logf(IB_LOG_LEVEL_WARN,
-			"Rollback operations on all aux tables of table %s. "
-			"All the fts index associated with the table are "
-			"marked as corrupted. Please rebuild the "
-			"index again.", parent_table->name);
-		fts_sql_rollback(trx_rename);
+		ib::warn() << "Rollback operations on all aux tables of "
+			"table "<< parent_table->name << ". All the fts index "
+			"associated with the table are marked as corrupted. "
+			"Please rebuild the index again.";
 
 		/* Corrupting the fts index related to parent table. */
 		trx_t*	trx_corrupt;
@@ -6669,25 +7027,18 @@ fts_set_parent_hex_format_flag(
 {
 	if (!DICT_TF2_FLAG_IS_SET(parent_table,
 				  DICT_TF2_FTS_AUX_HEX_NAME)) {
-		DBUG_EXECUTE_IF("parent_table_flag_fail",
-			ib_logf(IB_LOG_LEVEL_FATAL,
-				"Setting parent table %s  to hex format "
-				"failed. Please try to restart the server "
-				"again, if it doesn't work, the system "
-				"tables might be corrupted.",
-				parent_table->name);
-			return;);
+		DBUG_EXECUTE_IF("parent_table_flag_fail", DBUG_SUICIDE(););
 
 		dberr_t	err = fts_update_hex_format_flag(
 				trx, parent_table->id, true);
 
 		if (err != DB_SUCCESS) {
-			ib_logf(IB_LOG_LEVEL_FATAL,
-				"Setting parent table %s  to hex format "
-				"failed. Please try to restart the server "
-				"again, if it doesn't work, the system "
-				"tables might be corrupted.",
-				parent_table->name);
+			ib::fatal() << "Setting parent table "
+				<< parent_table->name
+				<< "to hex format failed. Please try "
+				<< "to restart the server again, if it "
+				<< "doesn't work, the system tables "
+				<< "might be corrupted.";
 		} else {
 			DICT_TF2_FLAG_SET(
 				parent_table, DICT_TF2_FTS_AUX_HEX_NAME);
@@ -6725,15 +7076,16 @@ fts_drop_obsolete_aux_table_from_vector(
 			failure, since server would try to
 			drop it on next restart, even if
 			the table was broken. */
-			ib_logf(IB_LOG_LEVEL_WARN,
-				"Fail to drop obsolete aux table '%s', which "
-				"is harmless. will try to drop it on next "
-				"restart.", aux_drop_table->name);
+			ib::warn() << "Failed to drop obsolete aux table "
+				<< aux_drop_table->name << ", which is "
+				<< "harmless. will try to drop it on next "
+				<< "restart.";
+
 			fts_sql_rollback(trx_drop);
 		} else {
-			ib_logf(IB_LOG_LEVEL_INFO,
-				"Dropped obsolete aux table '%s'.",
-				aux_drop_table->name);
+			ib::info() << "Dropped obsolete aux"
+				" table '" << aux_drop_table->name
+				<< "'.";
 
 			fts_sql_commit(trx_drop);
 		}
@@ -6759,16 +7111,22 @@ fts_drop_aux_table_from_vector(
 
 		/* Check for the validity of the parent table */
 		if (!fts_valid_parent_table(aux_drop_table)) {
-			ib_logf(IB_LOG_LEVEL_WARN,
-				"Parent table of FTS auxiliary table %s not "
-				"found.", aux_drop_table->name);
+
+			ib::warn() << "Parent table of FTS auxiliary table "
+				<< aux_drop_table->name << " not found.";
+
 			dberr_t err = fts_drop_table(trx, aux_drop_table->name);
 			if (err == DB_FAIL) {
-				char*	path = fil_make_ibd_name(
-					aux_drop_table->name, false);
-				os_file_delete_if_exists(innodb_file_data_key,
-							 path);
-				mem_free(path);
+
+				char*	path = fil_make_filepath(
+					NULL, aux_drop_table->name, IBD, false);
+
+				if (path != NULL) {
+					os_file_delete_if_exists(
+							innodb_data_file_key,
+							path , NULL);
+					ut_free(path);
+				}
 			}
 		}
 	}
@@ -6839,7 +7197,8 @@ fts_check_and_drop_orphaned_tables(
 		orig_parent_id = aux_table->parent_id;
 		orig_index_id = aux_table->index_id;
 
-		if (table == NULL || strcmp(table->name, aux_table->name)) {
+		if (table == NULL
+		    || strcmp(table->name.m_name, aux_table->name)) {
 
 			bool	fake_aux = false;
 
@@ -6874,7 +7233,7 @@ fts_check_and_drop_orphaned_tables(
 			     || orig_parent_id != next_aux_table->parent_id)
 			    && (!ib_vector_is_empty(aux_tables_to_rename))) {
 
-					ulint	parent_id = fts_fake_hex_to_dec(
+					ib_id_t	parent_id = fts_fake_hex_to_dec(
 							aux_table->parent_id);
 
 					parent_table = dict_table_open_on_id(
@@ -6936,7 +7295,7 @@ fts_check_and_drop_orphaned_tables(
 		}
 
 		if (table != NULL) {
-			dict_table_close(table, true, false);
+			dict_table_close(table, TRUE, FALSE);
 		}
 
 		if (!rename) {
@@ -6947,7 +7306,7 @@ fts_check_and_drop_orphaned_tables(
 		}
 
 		/* Filter out the fake aux table by comparing with the
-		current valid auxiliary table name . */
+		current valid auxiliary table name. */
 		for (ulint count = 0;
 		     count < ib_vector_size(invalid_aux_tables); count++) {
 			fts_aux_table_t*	invalid_aux;
@@ -6969,7 +7328,7 @@ fts_check_and_drop_orphaned_tables(
 
 			if (i + 1 < ib_vector_size(tables)) {
 				next_aux_table = static_cast<fts_aux_table_t*>(
-						ib_vector_get(tables, i + 1));
+					ib_vector_get(tables, i + 1));
 			}
 
 			if (next_aux_table == NULL
@@ -6982,7 +7341,6 @@ fts_check_and_drop_orphaned_tables(
 				if (!ib_vector_is_empty(aux_tables_to_rename)) {
 					fts_rename_aux_tables_to_hex_format(
 						aux_tables_to_rename, parent_table);
-
 				} else {
 					fts_set_parent_hex_format_flag(
 						parent_table, trx);
@@ -6998,16 +7356,9 @@ fts_check_and_drop_orphaned_tables(
 			aux_table->parent_id, TRUE, DICT_TABLE_OP_NORMAL);
 
 		if (drop) {
-			 ib_vector_push(drop_aux_tables, aux_table);
+			ib_vector_push(drop_aux_tables, aux_table);
 		} else {
 			if (FTS_IS_OBSOLETE_AUX_TABLE(aux_table->name)) {
-
-				/* Current table could be one of the three
-				obsolete tables, in this case, we should
-				always try to drop it but not rename it.
-				This could happen when we try to upgrade
-				from older server to later one, which doesn't
-				contain these obsolete tables. */
 				ib_vector_push(obsolete_aux_tables, aux_table);
 				continue;
 			}
@@ -7016,22 +7367,36 @@ fts_check_and_drop_orphaned_tables(
 		/* If the aux table is in decimal format, we should
 		rename it, so push it to aux_tables_to_rename */
 		if (!drop && rename) {
-			ib_vector_push(aux_tables_to_rename, aux_table);
+			bool	rename_table = true;
+			for (ulint count = 0;
+			     count < ib_vector_size(aux_tables_to_rename);
+			     count++) {
+				fts_aux_table_t*	rename_aux =
+					static_cast<fts_aux_table_t*>(
+					ib_vector_get(aux_tables_to_rename,
+						      count));
+					if (strcmp(rename_aux->name,
+						   aux_table->name) == 0) {
+						rename_table = false;
+						break;
+					}
+			}
+
+			if (rename_table) {
+				ib_vector_push(aux_tables_to_rename,
+					       aux_table);
+			}
 		}
 
 		if (i + 1 < ib_vector_size(tables)) {
 			next_aux_table = static_cast<fts_aux_table_t*>(
-					ib_vector_get(tables, i + 1));
+				ib_vector_get(tables, i + 1));
 		}
 
 		if ((next_aux_table == NULL
 		     || orig_parent_id != next_aux_table->parent_id)
 		    && !ib_vector_is_empty(aux_tables_to_rename)) {
-			/* All aux tables of parent table, whose id is
-			last_parent_id, have been checked, try to rename
-			them if necessary. We had better use a new background
-			trx to rename rather than the original trx, in case
-			any failure would cause a complete rollback. */
+
 			ut_ad(rename);
 			ut_ad(!DICT_TF2_FLAG_IS_SET(
 				parent_table, DICT_TF2_FTS_AUX_HEX_NAME));
@@ -7046,21 +7411,22 @@ fts_check_and_drop_orphaned_tables(
 
 			table = dict_table_open_on_id(
 				aux_table->id, TRUE, DICT_TABLE_OP_NORMAL);
+
 			if (table != NULL
-			    && strcmp(table->name, aux_table->name)) {
+			    && strcmp(table->name.m_name, aux_table->name)) {
 				dict_table_close(table, TRUE, FALSE);
 				table = NULL;
 			}
 
 			if (table != NULL
 			    && !DICT_TF2_FLAG_IS_SET(
-						table,
-						DICT_TF2_FTS_AUX_HEX_NAME)) {
+					table,
+					DICT_TF2_FTS_AUX_HEX_NAME)) {
 
 				DBUG_EXECUTE_IF("aux_table_flag_fail",
-					ib_logf(IB_LOG_LEVEL_WARN,
-						"Setting aux table %s to hex "
-						"format failed.", table->name);
+					ib::warn() << "Setting aux table "
+						<< table->name << " to hex "
+						"format failed.";
 					fts_set_index_corrupt(
 						trx, aux_table->index_id,
 						parent_table);
@@ -7070,9 +7436,9 @@ fts_check_and_drop_orphaned_tables(
 						trx, table->id, true);
 
 				if (err != DB_SUCCESS) {
-					ib_logf(IB_LOG_LEVEL_WARN,
-						"Setting aux table %s to hex "
-						"format failed.", table->name);
+					ib::warn() << "Setting aux table "
+						<< table->name << " to hex "
+						"format failed.";
 
 					fts_set_index_corrupt(
 						trx, aux_table->index_id,
@@ -7093,7 +7459,7 @@ table_exit:
 			ut_ad(parent_table != NULL);
 
 			fts_set_parent_hex_format_flag(
-					parent_table, trx);
+				parent_table, trx);
 		}
 
 		if (parent_table != NULL) {
@@ -7116,7 +7482,6 @@ table_exit:
 /**********************************************************************//**
 Drop all orphaned FTS auxiliary tables, those that don't have a parent
 table or FTS index defined on them. */
-UNIV_INTERN
 void
 fts_drop_orphaned_tables(void)
 /*==========================*/
@@ -7134,8 +7499,7 @@ fts_drop_orphaned_tables(void)
 	error = fil_get_space_names(space_name_list);
 
 	if (error == DB_OUT_OF_MEMORY) {
-		ib_logf(IB_LOG_LEVEL_ERROR, "Out of memory");
-		ut_error;
+		ib::fatal() << "Out of memory";
 	}
 
 	heap = mem_heap_create(1024);
@@ -7165,7 +7529,7 @@ fts_drop_orphaned_tables(void)
 		} else {
 			ulint	len = strlen(*it);
 
-			fts_aux_table->id = fil_get_space_id_for_table(*it);
+			fts_aux_table->id = fil_space_get_id_by_name(*it);
 
 			/* We got this list from fil0fil.cc. The tablespace
 			with this name must exist. */
@@ -7191,7 +7555,7 @@ fts_drop_orphaned_tables(void)
 		info,
 		"DECLARE FUNCTION my_func;\n"
 		"DECLARE CURSOR c IS"
-		" SELECT NAME, ID "
+		" SELECT NAME, ID"
 		" FROM SYS_TABLES;\n"
 		"BEGIN\n"
 		"\n"
@@ -7215,18 +7579,14 @@ fts_drop_orphaned_tables(void)
 
 			fts_sql_rollback(trx);
 
-			ut_print_timestamp(stderr);
-
 			if (error == DB_LOCK_WAIT_TIMEOUT) {
-				ib_logf(IB_LOG_LEVEL_WARN,
-					"lock wait timeout reading SYS_TABLES. "
-					"Retrying!");
+				ib::warn() << "lock wait timeout reading"
+					" SYS_TABLES. Retrying!";
 
 				trx->error_state = DB_SUCCESS;
 			} else {
-				ib_logf(IB_LOG_LEVEL_ERROR,
-					"(%s) while reading SYS_TABLES.",
-					ut_strerr(error));
+				ib::error() << "(" << ut_strerr(error)
+					<< ") while reading SYS_TABLES.";
 
 				break;			/* Exit the loop. */
 			}
@@ -7248,7 +7608,7 @@ fts_drop_orphaned_tables(void)
 	     it != space_name_list.end();
 	     ++it) {
 
-		delete[] *it;
+		UT_DELETE_ARRAY(*it);
 	}
 }
 
@@ -7256,11 +7616,10 @@ fts_drop_orphaned_tables(void)
 Check whether user supplied stopword table is of the right format.
 Caller is responsible to hold dictionary locks.
 @return the stopword column charset if qualifies */
-UNIV_INTERN
 CHARSET_INFO*
 fts_valid_stopword_table(
 /*=====================*/
-	 const char*	stopword_table_name)	/*!< in: Stopword table
+	const char*	stopword_table_name)	/*!< in: Stopword table
 						name */
 {
 	dict_table_t*	table;
@@ -7273,9 +7632,8 @@ fts_valid_stopword_table(
 	table = dict_table_get_low(stopword_table_name);
 
 	if (!table) {
-		fprintf(stderr,
-			"InnoDB: user stopword table %s does not exist.\n",
-			stopword_table_name);
+		ib::error() << "User stopword table " << stopword_table_name
+			<< " does not exist.";
 
 		return(NULL);
 	} else {
@@ -7284,10 +7642,9 @@ fts_valid_stopword_table(
 		col_name = dict_table_get_col_name(table, 0);
 
 		if (ut_strcmp(col_name, "value")) {
-			fprintf(stderr,
-				"InnoDB: invalid column name for stopword "
-				"table %s. Its first column must be named as "
-				"'value'.\n", stopword_table_name);
+			ib::error() << "Invalid column name for stopword"
+				" table " << stopword_table_name << ". Its"
+				" first column must be named as 'value'.";
 
 			return(NULL);
 		}
@@ -7296,10 +7653,9 @@ fts_valid_stopword_table(
 
 		if (col->mtype != DATA_VARCHAR
 		    && col->mtype != DATA_VARMYSQL) {
-			fprintf(stderr,
-				"InnoDB: invalid column type for stopword "
-				"table %s. Its first column must be of "
-				"varchar type\n", stopword_table_name);
+			ib::error() << "Invalid column type for stopword"
+				" table " << stopword_table_name << ". Its"
+				" first column must be of varchar type";
 
 			return(NULL);
 		}
@@ -7307,9 +7663,7 @@ fts_valid_stopword_table(
 
 	ut_ad(col);
 
-	return(innobase_get_fts_charset(
-		static_cast<int>(col->prtype & DATA_MYSQL_TYPE_MASK),
-		static_cast<uint>(dtype_get_charset_coll(col->prtype))));
+	return(fts_get_charset(col->prtype));
 }
 
 /**********************************************************************//**
@@ -7318,7 +7672,6 @@ records/fetches stopword configuration to/from FTS configure
 table, depending on whether we are creating or reloading the
 FTS.
 @return TRUE if load operation is successful */
-UNIV_INTERN
 ibool
 fts_load_stopword(
 /*==============*/
@@ -7432,8 +7785,9 @@ cleanup:
 	}
 
 	if (!cache->stopword_info.cached_stopword) {
-		cache->stopword_info.cached_stopword = rbt_create(
-			sizeof(fts_tokenizer_word_t), fts_utf8_string_cmp);
+		cache->stopword_info.cached_stopword = rbt_create_arg_cmp(
+			sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
+			&my_charset_latin1);
 	}
 
 	return(error == DB_SUCCESS);
@@ -7497,6 +7851,7 @@ fts_init_recover_doc(
 	sel_node_t*	node = static_cast<sel_node_t*>(row);
 	que_node_t*	exp = node->select_list;
 	fts_cache_t*	cache = get_doc->cache;
+	st_mysql_ftparser*	parser = get_doc->index_cache->index->parser;
 
 	fts_doc_init(&doc);
 	doc.found = TRUE;
@@ -7530,26 +7885,22 @@ fts_init_recover_doc(
 		ut_ad(get_doc);
 
 		if (!get_doc->index_cache->charset) {
-			ulint   prtype = dfield->type.prtype;
-
-			get_doc->index_cache->charset =
-				innobase_get_fts_charset(
-				(int)(prtype & DATA_MYSQL_TYPE_MASK),
-				(uint) dtype_get_charset_coll(prtype));
+			get_doc->index_cache->charset = fts_get_charset(
+				dfield->type.prtype);
 		}
 
 		doc.charset = get_doc->index_cache->charset;
+		doc.is_ngram = get_doc->index_cache->index->is_ngram;
 
 		if (dfield_is_ext(dfield)) {
 			dict_table_t*	table = cache->sync->table;
-			ulint		zip_size = dict_table_zip_size(table);
 
 			doc.text.f_str = btr_copy_externally_stored_field(
 				&doc.text.f_len,
 				static_cast<byte*>(dfield_get_data(dfield)),
-				zip_size, len,
-				static_cast<mem_heap_t*>(doc.self_heap->arg),
-				NULL);
+				dict_table_page_size(table), len,
+				static_cast<mem_heap_t*>(doc.self_heap->arg)
+				);
 		} else {
 			doc.text.f_str = static_cast<byte*>(
 				dfield_get_data(dfield));
@@ -7558,9 +7909,9 @@ fts_init_recover_doc(
 		}
 
 		if (field_no == 1) {
-			fts_tokenize_document(&doc, NULL);
+			fts_tokenize_document(&doc, NULL, parser);
 		} else {
-			fts_tokenize_document_next(&doc, doc_len, NULL);
+			fts_tokenize_document_next(&doc, doc_len, NULL, parser);
 		}
 
 		exp = que_node_get_next(exp);
@@ -7589,7 +7940,6 @@ used. There are documents that have not yet sync-ed to auxiliary
 tables from last server abnormally shutdown, we will need to bring
 such document into FTS cache before any further operations
 @return TRUE if all OK */
-UNIV_INTERN
 ibool
 fts_init_index(
 /*===========*/
@@ -7633,7 +7983,7 @@ fts_init_index(
 	dropped, and we re-initialize the Doc ID system for subsequent
 	insertion */
 	if (ib_vector_is_empty(cache->get_docs)) {
-		index = dict_table_get_index_on_name(table, FTS_DOC_ID_INDEX_NAME);
+		index = table->fts_doc_id_index;
 
 		ut_a(index);
 
@@ -7676,3 +8026,58 @@ func_exit:
 
 	return(TRUE);
 }
+
+/** Check if the all the auxillary tables associated with FTS index are in
+consistent state. For now consistency is check only by ensuring
+index->page_no != FIL_NULL
+@param[out]	base_table	table has host fts index
+@param[in,out]	trx		trx handler */
+void
+fts_check_corrupt(
+	dict_table_t*	base_table,
+	trx_t*		trx)
+{
+	bool		sane = true;
+	fts_table_t	fts_table;
+
+	/* Iterate over the common table and check for their sanity. */
+	FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, base_table);
+
+	for (ulint i = 0; fts_common_tables[i] != NULL && sane; ++i) {
+
+		char	table_name[MAX_FULL_NAME_LEN];
+
+		fts_table.suffix = fts_common_tables[i];
+		fts_get_table_name(&fts_table, table_name);
+
+		dict_table_t*	aux_table = dict_table_open_on_name(
+			table_name, true, FALSE, DICT_ERR_IGNORE_NONE);
+
+		if (aux_table == NULL) {
+			dict_set_corrupted(
+				dict_table_get_first_index(base_table),
+				trx, "FTS_SANITY_CHECK");
+			ut_ad(base_table->corrupted == TRUE);
+			sane = false;
+			continue;
+		}
+
+		for (dict_index_t*	aux_table_index =
+			UT_LIST_GET_FIRST(aux_table->indexes);
+		     aux_table_index != NULL;
+		     aux_table_index =
+			UT_LIST_GET_NEXT(indexes, aux_table_index)) {
+
+			/* Check if auxillary table needed for FTS is sane. */
+			if (aux_table_index->page == FIL_NULL) {
+				dict_set_corrupted(
+					dict_table_get_first_index(base_table),
+					trx, "FTS_SANITY_CHECK");
+				ut_ad(base_table->corrupted == TRUE);
+				sane = false;
+			}
+		}
+
+		dict_table_close(aux_table, FALSE, FALSE);
+	}
+}
diff --git a/storage/innobase/fts/fts0opt.cc b/storage/innobase/fts/fts0opt.cc
index ea937c20752..5989aff83f4 100644
--- a/storage/innobase/fts/fts0opt.cc
+++ b/storage/innobase/fts/fts0opt.cc
@@ -25,6 +25,8 @@ Completed 2011/7/10 Sunny and Jimmy Yang
 
 ***********************************************************************/
 
+#include "ha_prototypes.h"
+
 #include "fts0fts.h"
 #include "row0sel.h"
 #include "que0types.h"
@@ -32,9 +34,10 @@ Completed 2011/7/10 Sunny and Jimmy Yang
 #include "fts0types.h"
 #include "ut0wqueue.h"
 #include "srv0start.h"
+#include "ut0list.h"
 #include "zlib.h"
 
-#ifndef UNIV_NONINL
+#ifdef UNIV_NONINL
 #include "fts0types.ic"
 #include "fts0vlc.ic"
 #endif
@@ -51,6 +54,9 @@ static const ulint FTS_OPTIMIZE_INTERVAL_IN_SECS = 300;
 /** Server is shutting down, so does we exiting the optimize thread */
 static bool fts_opt_start_shutdown = false;
 
+/** Event to wait for shutdown of the optimize thread */
+static os_event_t fts_opt_shutdown_event = NULL;
+
 /** Initial size of nodes in fts_word_t. */
 static const ulint FTS_WORD_NODES_INIT_SIZE = 64;
 
@@ -215,11 +221,6 @@ struct fts_msg_del_t {
 					this message by the consumer */
 };
 
-/** Stop the optimize thread. */
-struct fts_msg_optimize_t {
-	dict_table_t*	table;		/*!< Table to optimize */
-};
-
 /** The FTS optimize message work queue message type. */
 struct fts_msg_t {
 	fts_msg_type_t	type;		/*!< Message type */
@@ -232,10 +233,10 @@ struct fts_msg_t {
 };
 
 /** The number of words to read and optimize in a single pass. */
-UNIV_INTERN ulong	fts_num_word_optimize;
+ulong	fts_num_word_optimize;
 
 // FIXME
-UNIV_INTERN char	fts_enable_diag_print;
+char	fts_enable_diag_print;
 
 /** ZLib compressed block size.*/
 static ulint FTS_ZIP_BLOCK_SIZE	= 1024;
@@ -243,27 +244,30 @@ static ulint FTS_ZIP_BLOCK_SIZE	= 1024;
 /** The amount of time optimizing in a single pass, in milliseconds. */
 static ib_time_t fts_optimize_time_limit = 0;
 
+/** It's defined in fts0fts.cc  */
+extern const char* fts_common_tables[];
+
 /** SQL Statement for changing state of rows to be deleted from FTS Index. */
 static	const char* fts_init_delete_sql =
 	"BEGIN\n"
 	"\n"
-	"INSERT INTO \"%s_BEING_DELETED\"\n"
-		"SELECT doc_id FROM \"%s_DELETED\";\n"
+	"INSERT INTO $BEING_DELETED\n"
+		"SELECT doc_id FROM $DELETED;\n"
 	"\n"
-	"INSERT INTO \"%s_BEING_DELETED_CACHE\"\n"
-		"SELECT doc_id FROM \"%s_DELETED_CACHE\";\n";
+	"INSERT INTO $BEING_DELETED_CACHE\n"
+		"SELECT doc_id FROM $DELETED_CACHE;\n";
 
 static const char* fts_delete_doc_ids_sql =
 	"BEGIN\n"
 	"\n"
-	"DELETE FROM \"%s_DELETED\" WHERE doc_id = :doc_id1;\n"
-	"DELETE FROM \"%s_DELETED_CACHE\" WHERE doc_id = :doc_id2;\n";
+	"DELETE FROM $DELETED WHERE doc_id = :doc_id1;\n"
+	"DELETE FROM $DELETED_CACHE WHERE doc_id = :doc_id2;\n";
 
 static const char* fts_end_delete_sql =
 	"BEGIN\n"
 	"\n"
-	"DELETE FROM \"%s_BEING_DELETED\";\n"
-	"DELETE FROM \"%s_BEING_DELETED_CACHE\";\n";
+	"DELETE FROM $BEING_DELETED;\n"
+	"DELETE FROM $BEING_DELETED_CACHE;\n";
 
 /**********************************************************************//**
 Initialize fts_zip_t. */
@@ -338,7 +342,6 @@ fts_zip_init(
 /**********************************************************************//**
 Create a fts_optimizer_word_t instance.
 @return new instance */
-UNIV_INTERN
 fts_word_t*
 fts_word_init(
 /*==========*/
@@ -405,7 +408,7 @@ fts_optimize_read_node(
 
 		case 4: /* ILIST */
 			node->ilist_size_alloc = node->ilist_size = len;
-			node->ilist = static_cast<byte*>(ut_malloc(len));
+			node->ilist = static_cast<byte*>(ut_malloc_nokey(len));
 			memcpy(node->ilist, data, len);
 			break;
 
@@ -423,7 +426,6 @@ fts_optimize_read_node(
 /**********************************************************************//**
 Callback function to fetch the rows in an FTS INDEX record.
 @return always returns non-NULL */
-UNIV_INTERN
 ibool
 fts_optimize_index_fetch_node(
 /*==========================*/
@@ -481,7 +483,6 @@ fts_optimize_index_fetch_node(
 /**********************************************************************//**
 Read the rows from the FTS inde.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 fts_index_fetch_nodes(
 /*==================*/
@@ -494,21 +495,17 @@ fts_index_fetch_nodes(
 {
 	pars_info_t*	info;
 	dberr_t		error;
+	char		table_name[MAX_FULL_NAME_LEN];
 
 	trx->op_info = "fetching FTS index nodes";
 
 	if (*graph) {
 		info = (*graph)->info;
 	} else {
-		info = pars_info_create();
-	}
-
-	pars_info_bind_function(info, "my_func", fetch->read_record, fetch);
-	pars_info_bind_varchar_literal(info, "word", word->f_str, word->f_len);
-
-	if (!*graph) {
 		ulint	selected;
 
+		info = pars_info_create();
+
 		ut_a(fts_table->type == FTS_INDEX_TABLE);
 
 		selected = fts_select_index(fts_table->charset,
@@ -516,14 +513,24 @@ fts_index_fetch_nodes(
 
 		fts_table->suffix = fts_get_suffix(selected);
 
+		fts_get_table_name(fts_table, table_name);
+
+		pars_info_bind_id(info, true, "table_name", table_name);
+	}
+
+	pars_info_bind_function(info, "my_func", fetch->read_record, fetch);
+	pars_info_bind_varchar_literal(info, "word", word->f_str, word->f_len);
+
+	if (!*graph) {
+
 		*graph = fts_parse_sql(
 			fts_table,
 			info,
 			"DECLARE FUNCTION my_func;\n"
 			"DECLARE CURSOR c IS"
-			" SELECT word, doc_count, first_doc_id, last_doc_id, "
-				"ilist\n"
-			" FROM \"%s\"\n"
+			" SELECT word, doc_count, first_doc_id, last_doc_id,"
+			" ilist\n"
+			" FROM $table_name\n"
 			" WHERE word LIKE :word\n"
 			" ORDER BY first_doc_id;\n"
 			"BEGIN\n"
@@ -538,7 +545,7 @@ fts_index_fetch_nodes(
 			"CLOSE c;");
 	}
 
-	for(;;) {
+	for (;;) {
 		error = fts_eval_sql(trx, *graph);
 
 		if (error == DB_SUCCESS) {
@@ -548,18 +555,14 @@ fts_index_fetch_nodes(
 		} else {
 			fts_sql_rollback(trx);
 
-			ut_print_timestamp(stderr);
-
 			if (error == DB_LOCK_WAIT_TIMEOUT) {
-				fprintf(stderr, " InnoDB: Warning: lock wait "
-					"timeout reading FTS index. "
-					"Retrying!\n");
+				ib::warn() << "lock wait timeout reading"
+					" FTS index. Retrying!";
 
 				trx->error_state = DB_SUCCESS;
 			} else {
-				fprintf(stderr, " InnoDB: Error: (%s) "
-					"while reading FTS index.\n",
-					ut_strerr(error));
+				ib::error() << "(" << ut_strerr(error)
+					<< ") while reading FTS index.";
 
 				break;			/* Exit the loop. */
 			}
@@ -599,7 +602,7 @@ fts_zip_read_word(
 		/* Finished decompressing block. */
 		if (zip->zp->avail_in == 0) {
 
-			/* Free the block that's been decompressed. */
+			/* Free the block thats been decompressed. */
 			if (zip->pos > 0) {
 				ulint	prev = zip->pos - 1;
 
@@ -620,7 +623,8 @@ fts_zip_read_word(
 					zip->zp->avail_in =
 						FTS_MAX_WORD_LEN;
 				} else {
-					zip->zp->avail_in = static_cast<uInt>(zip->block_sz);
+					zip->zp->avail_in =
+						static_cast<uInt>(zip->block_sz);
 				}
 
 				++zip->pos;
@@ -718,7 +722,9 @@ fts_fetch_index_words(
 		if (zip->zp->avail_out == 0) {
 			byte*		block;
 
-			block = static_cast<byte*>(ut_malloc(zip->block_sz));
+			block = static_cast<byte*>(
+				ut_malloc_nokey(zip->block_sz));
+
 			ib_vector_push(zip->blocks, &block);
 
 			zip->zp->next_out = block;
@@ -775,7 +781,9 @@ fts_zip_deflate_end(
 
 		ut_a(zip->zp->avail_out == 0);
 
-		block = static_cast<byte*>(ut_malloc(FTS_MAX_WORD_LEN + 1));
+		block = static_cast<byte*>(
+			ut_malloc_nokey(FTS_MAX_WORD_LEN + 1));
+
 		ib_vector_push(zip->blocks, &block);
 
 		zip->zp->next_out = block;
@@ -823,16 +831,13 @@ fts_index_fetch_words(
 	}
 
 	for (selected = fts_select_index(
-		optim->fts_index_table.charset, word->f_str, word->f_len);
-	     fts_index_selector[selected].value;
+		     optim->fts_index_table.charset, word->f_str, word->f_len);
+	     selected < FTS_NUM_AUX_INDEX;
 	     selected++) {
 
-		optim->fts_index_table.suffix = fts_get_suffix(selected);
+		char	table_name[MAX_FULL_NAME_LEN];
 
-		/* We've search all indexes. */
-		if (optim->fts_index_table.suffix == NULL) {
-			return(DB_TABLE_NOT_FOUND);
-		}
+		optim->fts_index_table.suffix = fts_get_suffix(selected);
 
 		info = pars_info_create();
 
@@ -842,13 +847,16 @@ fts_index_fetch_words(
 		pars_info_bind_varchar_literal(
 			info, "word", word->f_str, word->f_len);
 
+		fts_get_table_name(&optim->fts_index_table, table_name);
+		pars_info_bind_id(info, true, "table_name", table_name);
+
 		graph = fts_parse_sql(
 			&optim->fts_index_table,
 			info,
 			"DECLARE FUNCTION my_func;\n"
 			"DECLARE CURSOR c IS"
 			" SELECT word\n"
-			" FROM \"%s\"\n"
+			" FROM $table_name\n"
 			" WHERE word > :word\n"
 			" ORDER BY word;\n"
 			"BEGIN\n"
@@ -864,15 +872,13 @@ fts_index_fetch_words(
 
 		zip = optim->zip;
 
-		for(;;) {
+		for (;;) {
 			int	err;
 
 			if (!inited && ((err = deflateInit(zip->zp, 9))
 					!= Z_OK)) {
-				ut_print_timestamp(stderr);
-				fprintf(stderr,
-					" InnoDB: Error: ZLib deflateInit() "
-					"failed: %d\n", err);
+				ib::error() << "ZLib deflateInit() failed: "
+					<< err;
 
 				error = DB_ERROR;
 				break;
@@ -887,13 +893,9 @@ fts_index_fetch_words(
 			} else {
 				//FIXME fts_sql_rollback(optim->trx);
 
-				ut_print_timestamp(stderr);
-
 				if (error == DB_LOCK_WAIT_TIMEOUT) {
-					fprintf(stderr, " InnoDB: "
-						"Warning: lock wait "
-						"timeout reading document. "
-						"Retrying!\n");
+					ib::warn() << "Lock wait timeout"
+						" reading document. Retrying!";
 
 					/* We need to reset the ZLib state. */
 					inited = FALSE;
@@ -902,9 +904,8 @@ fts_index_fetch_words(
 
 					optim->trx->error_state = DB_SUCCESS;
 				} else {
-					fprintf(stderr, " InnoDB: Error: (%s) "
-						"while reading document.\n",
-						ut_strerr(error));
+					ib::error() << "(" << ut_strerr(error)
+						<< ") while reading document.";
 
 					break;	/* Exit the loop. */
 				}
@@ -978,7 +979,6 @@ fts_fetch_doc_ids(
 /**********************************************************************//**
 Read the rows from a FTS common auxiliary table.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 fts_table_fetch_doc_ids(
 /*====================*/
@@ -990,6 +990,7 @@ fts_table_fetch_doc_ids(
 	que_t*		graph;
 	pars_info_t*	info = pars_info_create();
 	ibool		alloc_bk_trx = FALSE;
+	char		table_name[MAX_FULL_NAME_LEN];
 
 	ut_a(fts_table->suffix != NULL);
 	ut_a(fts_table->type == FTS_COMMON_TABLE);
@@ -1003,12 +1004,15 @@ fts_table_fetch_doc_ids(
 
 	pars_info_bind_function(info, "my_func", fts_fetch_doc_ids, doc_ids);
 
+	fts_get_table_name(fts_table, table_name);
+	pars_info_bind_id(info, true, "table_name", table_name);
+
 	graph = fts_parse_sql(
 		fts_table,
 		info,
 		"DECLARE FUNCTION my_func;\n"
 		"DECLARE CURSOR c IS"
-		" SELECT doc_id FROM \"%s\";\n"
+		" SELECT doc_id FROM $table_name;\n"
 		"BEGIN\n"
 		"\n"
 		"OPEN c;\n"
@@ -1045,7 +1049,6 @@ fts_table_fetch_doc_ids(
 Do a binary search for a doc id in the array
 @return +ve index if found -ve index where it should be inserted
         if not found */
-UNIV_INTERN
 int
 fts_bsearch(
 /*========*/
@@ -1082,7 +1085,7 @@ fts_bsearch(
 	}
 
 	/* Not found. */
-	return( (lower == 0) ? -1 : -lower);
+	return( (lower == 0) ? -1 : -(lower));
 }
 
 /**********************************************************************//**
@@ -1181,12 +1184,12 @@ fts_optimize_encode_node(
 		new_size = enc_len > FTS_ILIST_MAX_SIZE
 			? enc_len : FTS_ILIST_MAX_SIZE;
 
-		node->ilist = static_cast<byte*>(ut_malloc(new_size));
+		node->ilist = static_cast<byte*>(ut_malloc_nokey(new_size));
 		node->ilist_size_alloc = new_size;
 
 	} else if ((node->ilist_size + enc_len) > node->ilist_size_alloc) {
 		ulint	new_size = node->ilist_size + enc_len;
-		byte*	ilist = static_cast<byte*>(ut_malloc(new_size));
+		byte*	ilist = static_cast<byte*>(ut_malloc_nokey(new_size));
 
 		memcpy(ilist, node->ilist, node->ilist_size);
 
@@ -1386,8 +1389,8 @@ fts_optimize_word(
 
 	if (fts_enable_diag_print) {
 		word->text.f_str[word->text.f_len] = 0;
-		fprintf(stderr, "FTS_OPTIMIZE: optimize \"%s\"\n",
-			word->text.f_str);
+		ib::info() << "FTS_OPTIMIZE: optimize \"" << word->text.f_str
+			<< "\"";
 	}
 
 	while (i < size) {
@@ -1461,15 +1464,15 @@ fts_optimize_write_word(
 	que_t*		graph;
 	ulint		selected;
 	dberr_t		error = DB_SUCCESS;
-	char*		table_name = fts_get_table_name(fts_table);
+	char		table_name[MAX_FULL_NAME_LEN];
 
 	info = pars_info_create();
 
 	ut_ad(fts_table->charset);
 
 	if (fts_enable_diag_print) {
-		fprintf(stderr, "FTS_OPTIMIZE: processed \"%s\"\n",
-			word->f_str);
+		ib::info() << "FTS_OPTIMIZE: processed \"" << word->f_str
+			<< "\"";
 	}
 
 	pars_info_bind_varchar_literal(
@@ -1479,26 +1482,24 @@ fts_optimize_write_word(
 				    word->f_str, word->f_len);
 
 	fts_table->suffix = fts_get_suffix(selected);
+	fts_get_table_name(fts_table, table_name);
+	pars_info_bind_id(info, true, "table_name", table_name);
 
 	graph = fts_parse_sql(
 		fts_table,
 		info,
-		"BEGIN DELETE FROM \"%s\" WHERE word = :word;");
+		"BEGIN DELETE FROM $table_name WHERE word = :word;");
 
 	error = fts_eval_sql(trx, graph);
 
 	if (error != DB_SUCCESS) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr, " InnoDB: Error: (%s) during optimize, "
-			"when deleting a word from the FTS index.\n",
-			ut_strerr(error));
+		ib::error() << "(" << ut_strerr(error) << ") during optimize,"
+			" when deleting a word from the FTS index.";
 	}
 
 	fts_que_graph_free(graph);
 	graph = NULL;
 
-	mem_free(table_name);
-
 	/* Even if the operation needs to be rolled back and redone,
 	we iterate over the nodes in order to free the ilist. */
 	for (i = 0; i < ib_vector_size(nodes); ++i) {
@@ -1506,15 +1507,19 @@ fts_optimize_write_word(
 		fts_node_t* node = (fts_node_t*) ib_vector_get(nodes, i);
 
 		if (error == DB_SUCCESS) {
+			/* Skip empty node. */
+			if (node->ilist == NULL) {
+				ut_ad(node->ilist_size == 0);
+				continue;
+			}
+
 			error = fts_write_node(
 				trx, &graph, fts_table, word, node);
 
 			if (error != DB_SUCCESS) {
-				ut_print_timestamp(stderr);
-				fprintf(stderr, " InnoDB: Error: (%s) "
-					"during optimize, while adding a "
-					"word to the FTS index.\n",
-					ut_strerr(error));
+				ib::error() << "(" << ut_strerr(error) << ")"
+					" during optimize, while adding a"
+					" word to the FTS index.";
 			}
 		}
 
@@ -1532,7 +1537,6 @@ fts_optimize_write_word(
 
 /**********************************************************************//**
 Free fts_optimizer_word_t instanace.*/
-UNIV_INTERN
 void
 fts_word_free(
 /*==========*/
@@ -1624,12 +1628,12 @@ fts_optimize_create(
 
 	optim->trx = trx_allocate_for_background();
 
-	optim->fts_common_table.parent = table->name;
+	optim->fts_common_table.parent = table->name.m_name;
 	optim->fts_common_table.table_id = table->id;
 	optim->fts_common_table.type = FTS_COMMON_TABLE;
 	optim->fts_common_table.table = table;
 
-	optim->fts_index_table.parent = table->name;
+	optim->fts_index_table.parent = table->name.m_name;
 	optim->fts_index_table.table_id = table->id;
 	optim->fts_index_table.type = FTS_INDEX_TABLE;
 	optim->fts_index_table.table = table;
@@ -1750,7 +1754,7 @@ fts_optimize_free(
 	fts_doc_ids_free(optim->to_delete);
 	fts_optimize_graph_free(&optim->graph);
 
-	mem_free(optim->name_prefix);
+	ut_free(optim->name_prefix);
 
 	/* This will free the heap from which optim itself was allocated. */
 	mem_heap_free(heap);
@@ -1804,9 +1808,9 @@ fts_optimize_words(
 	fetch.read_arg = optim->words;
 	fetch.read_record = fts_optimize_index_fetch_node;
 
-	fprintf(stderr, "%.*s\n", (int) word->f_len, word->f_str);
+	ib::info().write(word->f_str, word->f_len);
 
-	while(!optim->done) {
+	while (!optim->done) {
 		dberr_t	error;
 		trx_t*	trx = optim->trx;
 		ulint	selected;
@@ -1853,13 +1857,12 @@ fts_optimize_words(
 				}
 			}
 		} else if (error == DB_LOCK_WAIT_TIMEOUT) {
-			fprintf(stderr, "InnoDB: Warning: lock wait timeout "
-				"during optimize. Retrying!\n");
+			ib::warn() << "Lock wait timeout during optimize."
+				" Retrying!";
 
 			trx->error_state = DB_SUCCESS;
 		} else if (error == DB_DEADLOCK) {
-			fprintf(stderr, "InnoDB: Warning: deadlock "
-				"during optimize. Retrying!\n");
+			ib::warn() << "Deadlock during optimize. Retrying!";
 
 			trx->error_state = DB_SUCCESS;
 		} else {
@@ -1872,42 +1875,6 @@ fts_optimize_words(
 	}
 }
 
-/**********************************************************************//**
-Select the FTS index to search.
-@return TRUE if last index */
-static
-ibool
-fts_optimize_set_next_word(
-/*=======================*/
-	CHARSET_INFO*	charset,	/*!< in: charset */
-	fts_string_t*	word)		/*!< in: current last word */
-{
-	ulint		selected;
-	ibool		last = FALSE;
-
-	selected = fts_select_next_index(charset, word->f_str, word->f_len);
-
-	/* If this was the last index then reset to start. */
-	if (fts_index_selector[selected].value == 0) {
-		/* Reset the last optimized word to '' if no
-		more words could be read from the FTS index. */
-		word->f_len = 0;
-		*word->f_str = 0;
-
-		last = TRUE;
-	} else {
-		ulint	value = fts_index_selector[selected].value;
-
-		ut_a(value <= 0xff);
-
-		/* Set to the first character of the next slot. */
-		word->f_len = 1;
-		*word->f_str = (byte) value;
-	}
-
-	return(last);
-}
-
 /**********************************************************************//**
 Optimize is complete. Set the completion time, and reset the optimize
 start string for this FTS index to "".
@@ -1940,8 +1907,8 @@ fts_optimize_index_completed(
 
 	if (error != DB_SUCCESS) {
 
-		fprintf(stderr, "InnoDB: Error: (%s) while "
-			"updating last optimized word!\n", ut_strerr(error));
+		ib::error() << "(" << ut_strerr(error) << ") while updating"
+			" last optimized word!";
 	}
 
 	return(error);
@@ -1984,21 +1951,14 @@ fts_optimize_index_read_words(
 			optim, word, fts_num_word_optimize);
 
 		if (error == DB_SUCCESS) {
-
-			/* If the search returned an empty set
-			try the next index in the horizontal split. */
-			if (optim->zip->n_words > 0) {
-				break;
-			} else {
-
-				fts_optimize_set_next_word(
-					optim->fts_index_table.charset,
-					word);
-
-				if (word->f_len == 0) {
-					break;
-				}
+			/* Reset the last optimized word to '' if no
+			more words could be read from the FTS index. */
+			if (optim->zip->n_words == 0) {
+				word->f_len = 0;
+				*word->f_str = 0;
 			}
+
+			break;
 		}
 	}
 
@@ -2090,9 +2050,10 @@ fts_optimize_purge_deleted_doc_ids(
 	pars_info_t*	info;
 	que_t*		graph;
 	fts_update_t*	update;
-	char*		sql_str;
 	doc_id_t	write_doc_id;
 	dberr_t		error = DB_SUCCESS;
+	char		deleted[MAX_FULL_NAME_LEN];
+	char		deleted_cache[MAX_FULL_NAME_LEN];
 
 	info = pars_info_create();
 
@@ -2109,14 +2070,17 @@ fts_optimize_purge_deleted_doc_ids(
 	fts_bind_doc_id(info, "doc_id1", &write_doc_id);
 	fts_bind_doc_id(info, "doc_id2", &write_doc_id);
 
-	/* Since we only replace the table_id and don't construct the full
-	name, we do substitution ourselves. Remember to free sql_str. */
-	sql_str = ut_strreplace(
-		fts_delete_doc_ids_sql, "%s", optim->name_prefix);
+	/* Make sure the following two names are consistent with the name
+	used in the fts_delete_doc_ids_sql */
+	optim->fts_common_table.suffix = fts_common_tables[3];
+	fts_get_table_name(&optim->fts_common_table, deleted);
+	pars_info_bind_id(info, true, fts_common_tables[3], deleted);
 
-	graph = fts_parse_sql(NULL, info, sql_str);
+	optim->fts_common_table.suffix = fts_common_tables[4];
+	fts_get_table_name(&optim->fts_common_table, deleted_cache);
+	pars_info_bind_id(info, true, fts_common_tables[4], deleted_cache);
 
-	mem_free(sql_str);
+	graph = fts_parse_sql(NULL, info, fts_delete_doc_ids_sql);
 
 	/* Delete the doc ids that were copied at the start. */
 	for (i = 0; i < ib_vector_size(optim->to_delete->doc_ids); ++i) {
@@ -2157,17 +2121,26 @@ fts_optimize_purge_deleted_doc_id_snapshot(
 {
 	dberr_t		error;
 	que_t*		graph;
-	char*		sql_str;
+	pars_info_t*	info;
+	char		being_deleted[MAX_FULL_NAME_LEN];
+	char		being_deleted_cache[MAX_FULL_NAME_LEN];
 
-	/* Since we only replace the table_id and don't construct
-	the full name, we do the '%s' substitution ourselves. */
-	sql_str = ut_strreplace(fts_end_delete_sql, "%s", optim->name_prefix);
+	info = pars_info_create();
+
+	/* Make sure the following two names are consistent with the name
+	used in the fts_end_delete_sql */
+	optim->fts_common_table.suffix = fts_common_tables[0];
+	fts_get_table_name(&optim->fts_common_table, being_deleted);
+	pars_info_bind_id(info, true, fts_common_tables[0], being_deleted);
+
+	optim->fts_common_table.suffix = fts_common_tables[1];
+	fts_get_table_name(&optim->fts_common_table, being_deleted_cache);
+	pars_info_bind_id(info, true, fts_common_tables[1],
+			  being_deleted_cache);
 
 	/* Delete the doc ids that were copied to delete pending state at
 	the start of optimize. */
-	graph = fts_parse_sql(NULL, NULL, sql_str);
-
-	mem_free(sql_str);
+	graph = fts_parse_sql(NULL, info, fts_end_delete_sql);
 
 	error = fts_eval_sql(optim->trx, graph);
 	fts_que_graph_free(graph);
@@ -2207,16 +2180,35 @@ fts_optimize_create_deleted_doc_id_snapshot(
 {
 	dberr_t		error;
 	que_t*		graph;
-	char*		sql_str;
+	pars_info_t*	info;
+	char		being_deleted[MAX_FULL_NAME_LEN];
+	char		deleted[MAX_FULL_NAME_LEN];
+	char		being_deleted_cache[MAX_FULL_NAME_LEN];
+	char		deleted_cache[MAX_FULL_NAME_LEN];
 
-	/* Since we only replace the table_id and don't construct the
-	full name, we do the substitution ourselves. */
-	sql_str = ut_strreplace(fts_init_delete_sql, "%s", optim->name_prefix);
+	info = pars_info_create();
+
+	/* Make sure the following four names are consistent with the name
+	used in the fts_init_delete_sql */
+	optim->fts_common_table.suffix = fts_common_tables[0];
+	fts_get_table_name(&optim->fts_common_table, being_deleted);
+	pars_info_bind_id(info, true, fts_common_tables[0], being_deleted);
+
+	optim->fts_common_table.suffix = fts_common_tables[3];
+	fts_get_table_name(&optim->fts_common_table, deleted);
+	pars_info_bind_id(info, true, fts_common_tables[3], deleted);
+
+	optim->fts_common_table.suffix = fts_common_tables[1];
+	fts_get_table_name(&optim->fts_common_table, being_deleted_cache);
+	pars_info_bind_id(info, true, fts_common_tables[1],
+			  being_deleted_cache);
+
+	optim->fts_common_table.suffix = fts_common_tables[4];
+	fts_get_table_name(&optim->fts_common_table, deleted_cache);
+	pars_info_bind_id(info, true, fts_common_tables[4], deleted_cache);
 
 	/* Move doc_ids that are to be deleted to state being deleted. */
-	graph = fts_parse_sql(NULL, NULL, sql_str);
-
-	mem_free(sql_str);
+	graph = fts_parse_sql(NULL, info, fts_init_delete_sql);
 
 	error = fts_eval_sql(optim->trx, graph);
 
@@ -2450,7 +2442,6 @@ fts_optimize_table_bk(
 /*********************************************************************//**
 Run OPTIMIZE on the given table.
 @return DB_SUCCESS if all OK */
-UNIV_INTERN
 dberr_t
 fts_optimize_table(
 /*===============*/
@@ -2460,8 +2451,9 @@ fts_optimize_table(
 	fts_optimize_t*	optim = NULL;
 	fts_t*		fts = table->fts;
 
-	ut_print_timestamp(stderr);
-	fprintf(stderr, " InnoDB: FTS start optimize %s\n", table->name);
+	if (fts_enable_diag_print) {
+		ib::info() << "FTS start optimize " << table->name;
+	}
 
 	optim = fts_optimize_create(table);
 
@@ -2512,9 +2504,8 @@ fts_optimize_table(
 		    && optim->n_completed == ib_vector_size(fts->indexes)) {
 
 			if (fts_enable_diag_print) {
-				fprintf(stderr, "FTS_OPTIMIZE: Completed "
-						"Optimize, cleanup DELETED "
-						"table\n");
+				ib::info() << "FTS_OPTIMIZE: Completed"
+					" Optimize, cleanup DELETED table";
 			}
 
 			if (ib_vector_size(optim->to_delete->doc_ids) > 0) {
@@ -2535,8 +2526,9 @@ fts_optimize_table(
 
 	fts_optimize_free(optim);
 
-	ut_print_timestamp(stderr);
-	fprintf(stderr, " InnoDB: FTS end optimize %s\n", table->name);
+	if (fts_enable_diag_print) {
+		ib::info() << "FTS end optimize " << table->name;
+	}
 
 	return(error);
 }
@@ -2566,7 +2558,6 @@ fts_optimize_create_msg(
 
 /**********************************************************************//**
 Add the table to add to the OPTIMIZER's list. */
-UNIV_INTERN
 void
 fts_optimize_add_table(
 /*===================*/
@@ -2579,9 +2570,7 @@ fts_optimize_add_table(
 	}
 
 	/* Make sure table with FTS index cannot be evicted */
-	if (table->can_be_evicted) {
-		dict_table_move_from_lru_to_non_lru(table);
-	}
+	dict_table_prevent_eviction(table);
 
 	msg = fts_optimize_create_msg(FTS_MSG_ADD_TABLE, table);
 
@@ -2590,7 +2579,6 @@ fts_optimize_add_table(
 
 /**********************************************************************//**
 Optimize a table. */
-UNIV_INTERN
 void
 fts_optimize_do_table(
 /*==================*/
@@ -2611,7 +2599,6 @@ fts_optimize_do_table(
 /**********************************************************************//**
 Remove the table from the OPTIMIZER's list. We do wait for
 acknowledgement from the consumer of the message. */
-UNIV_INTERN
 void
 fts_optimize_remove_table(
 /*======================*/
@@ -2628,16 +2615,15 @@ fts_optimize_remove_table(
 
 	/* FTS optimizer thread is already exited */
 	if (fts_opt_start_shutdown) {
-		ib_logf(IB_LOG_LEVEL_INFO,
-			"Try to remove table %s after FTS optimize"
-			" thread exiting.", table->name);
+		ib::info() << "Try to remove table " << table->name
+			<< " after FTS optimize thread exiting.";
 		return;
 	}
 
 	msg = fts_optimize_create_msg(FTS_MSG_DEL_TABLE, NULL);
 
 	/* We will wait on this event until signalled by the consumer. */
-	event = os_event_create();
+	event = os_event_create(0);
 
 	remove = static_cast<fts_msg_del_t*>(
 		mem_heap_alloc(msg->heap, sizeof(*remove)));
@@ -2650,12 +2636,11 @@ fts_optimize_remove_table(
 
 	os_event_wait(event);
 
-	os_event_free(event);
+	os_event_destroy(event);
 }
 
 /** Send sync fts cache for the table.
 @param[in]	table	table to sync */
-UNIV_INTERN
 void
 fts_optimize_request_sync_table(
 	dict_table_t*	table)
@@ -2670,9 +2655,8 @@ fts_optimize_request_sync_table(
 
 	/* FTS optimizer thread is already exited */
 	if (fts_opt_start_shutdown) {
-		ib_logf(IB_LOG_LEVEL_INFO,
-			"Try to sync table %s after FTS optimize"
-			" thread exiting.", table->name);
+		ib::info() << "Try to sync table " << table->name
+			<< " after FTS optimize thread exiting.";
 		return;
 	}
 
@@ -2703,7 +2687,7 @@ fts_optimize_find_slot(
 
 		slot = static_cast<fts_slot_t*>(ib_vector_get(tables, i));
 
-		if (slot->table->id == table->id) {
+		if (slot->table == table) {
 			return(slot);
 		}
 	}
@@ -2725,9 +2709,8 @@ fts_optimize_start_table(
 	slot = fts_optimize_find_slot(tables, table);
 
 	if (slot == NULL) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr, " InnoDB: Error: table %s not registered "
-			"with the optimize thread.\n", table->name);
+		ib::error() << "Table " << table->name << " not registered"
+			" with the optimize thread.";
 	} else {
 		slot->last_run = 0;
 		slot->completed = 0;
@@ -2755,7 +2738,7 @@ fts_optimize_new_table(
 
 		if (slot->state == FTS_STATE_EMPTY) {
 			empty_slot = i;
-		} else if (slot->table->id == table->id) {
+		} else if (slot->table == table) {
 			/* Already exists in our optimize queue. */
 			ut_ad(slot->table_id = table->id);
 			return(FALSE);
@@ -2802,13 +2785,13 @@ fts_optimize_del_table(
 
 		slot = static_cast<fts_slot_t*>(ib_vector_get(tables, i));
 
-		/* FIXME: Should we assert on this ? */
 		if (slot->state != FTS_STATE_EMPTY
-		    && slot->table->id == table->id) {
+		    && slot->table == table) {
 
-			ut_print_timestamp(stderr);
-			fprintf(stderr, " InnoDB: FTS Optimize Removing "
-				"table %s\n", table->name);
+			if (fts_enable_diag_print) {
+				ib::info() << "FTS Optimize Removing table "
+					<< table->name;
+			}
 
 			slot->table = NULL;
 			slot->state = FTS_STATE_EMPTY;
@@ -2888,8 +2871,8 @@ fts_is_sync_needed(
 	const ib_vector_t*	tables)		/*!< in: registered tables
 						vector*/
 {
-	ulint		total_memory = 0;
-	double		time_diff = difftime(ut_time(), last_check_sync_time);
+	ulint	total_memory = 0;
+	double	time_diff = difftime(ut_time(), last_check_sync_time);
 
 	if (fts_need_sync || time_diff < 5) {
 		return(false);
@@ -2904,7 +2887,7 @@ fts_is_sync_needed(
 			ib_vector_get_const(tables, i));
 
 		if (slot->state != FTS_STATE_EMPTY && slot->table
-		    && slot->table->fts) {
+		    && slot->table->fts && slot->table->fts->cache) {
 			total_memory += slot->table->fts->cache->total_size;
 		}
 
@@ -2977,7 +2960,7 @@ fts_optimize_sync_table(
 
 	/* Prevent DROP INDEX etc. from running when we are syncing
 	cache in background. */
-	if (!rw_lock_s_lock_nowait(&dict_operation_lock, __FILE__, __LINE__)) {
+	if (!rw_lock_s_lock_nowait(dict_operation_lock, __FILE__, __LINE__)) {
 		/* Exit when fail to get dict operation lock. */
 		return;
 	}
@@ -2992,13 +2975,12 @@ fts_optimize_sync_table(
 		dict_table_close(table, FALSE, FALSE);
 	}
 
-	rw_lock_s_unlock(&dict_operation_lock);
+	rw_lock_s_unlock(dict_operation_lock);
 }
 
 /**********************************************************************//**
 Optimize all FTS tables.
 @return Dummy return */
-UNIV_INTERN
 os_thread_ret_t
 fts_optimize_thread(
 /*================*/
@@ -3010,7 +2992,6 @@ fts_optimize_thread(
 	ulint		current = 0;
 	ibool		done = FALSE;
 	ulint		n_tables = 0;
-	os_event_t	exit_event = 0;
 	ulint		n_optimize = 0;
 	ib_wqueue_t*	wq = (ib_wqueue_t*) arg;
 
@@ -3022,7 +3003,7 @@ fts_optimize_thread(
 
 	tables = ib_vector_create(heap_alloc, sizeof(fts_slot_t), 4);
 
-	while(!done && srv_shutdown_state == SRV_SHUTDOWN_NONE) {
+	while (!done && srv_shutdown_state == SRV_SHUTDOWN_NONE) {
 
 		/* If there is no message in the queue and we have tables
 		to optimize then optimize the tables. */
@@ -3081,7 +3062,6 @@ fts_optimize_thread(
 
 			case FTS_MSG_STOP:
 				done = TRUE;
-				exit_event = (os_event_t) msg->ptr;
 				break;
 
 			case FTS_MSG_ADD_TABLE:
@@ -3154,21 +3134,20 @@ fts_optimize_thread(
 
 	ib_vector_free(tables);
 
-	ib_logf(IB_LOG_LEVEL_INFO, "FTS optimize thread exiting.");
+	ib::info() << "FTS optimize thread exiting.";
 
-	os_event_set(exit_event);
+	os_event_set(fts_opt_shutdown_event);
 	my_thread_end();
 
 	/* We count the number of threads in os_thread_exit(). A created
 	thread should always use that to exit and not use return() to exit. */
-	os_thread_exit(NULL);
+	os_thread_exit();
 
 	OS_THREAD_DUMMY_RETURN;
 }
 
 /**********************************************************************//**
 Startup the optimize thread and create the work queue. */
-UNIV_INTERN
 void
 fts_optimize_init(void)
 /*===================*/
@@ -3179,6 +3158,7 @@ fts_optimize_init(void)
 	ut_a(fts_optimize_wq == NULL);
 
 	fts_optimize_wq = ib_wqueue_create();
+	fts_opt_shutdown_event = os_event_create(0);
 	ut_a(fts_optimize_wq != NULL);
 	last_check_sync_time = ut_time();
 
@@ -3188,7 +3168,6 @@ fts_optimize_init(void)
 /**********************************************************************//**
 Check whether the work queue is initialized.
 @return TRUE if optimze queue is initialized. */
-UNIV_INTERN
 ibool
 fts_optimize_is_init(void)
 /*======================*/
@@ -3196,17 +3175,13 @@ fts_optimize_is_init(void)
 	return(fts_optimize_wq != NULL);
 }
 
-/**********************************************************************//**
-Signal the optimize thread to prepare for shutdown. */
-UNIV_INTERN
+/** Shutdown fts optimize thread. */
 void
-fts_optimize_start_shutdown(void)
-/*=============================*/
+fts_optimize_shutdown()
 {
 	ut_ad(!srv_read_only_mode);
 
 	fts_msg_t*	msg;
-	os_event_t	event;
 
 	/* If there is an ongoing activity on dictionary, such as
 	srv_master_evict_from_table_cache(), wait for it */
@@ -3221,30 +3196,15 @@ fts_optimize_start_shutdown(void)
 	/* We tell the OPTIMIZE thread to switch to state done, we
 	can't delete the work queue here because the add thread needs
 	deregister the FTS tables. */
-	event = os_event_create();
 
 	msg = fts_optimize_create_msg(FTS_MSG_STOP, NULL);
-	msg->ptr = event;
 
 	ib_wqueue_add(fts_optimize_wq, msg, msg->heap);
 
-	os_event_wait(event);
-	os_event_free(event);
+	os_event_wait(fts_opt_shutdown_event);
+
+	os_event_destroy(fts_opt_shutdown_event);
 
 	ib_wqueue_free(fts_optimize_wq);
-
-}
-
-/**********************************************************************//**
-Reset the work queue. */
-UNIV_INTERN
-void
-fts_optimize_end(void)
-/*==================*/
-{
-	ut_ad(!srv_read_only_mode);
-
-	// FIXME: Potential race condition here: We should wait for
-	// the optimize thread to confirm shutdown.
 	fts_optimize_wq = NULL;
 }
diff --git a/storage/innobase/fts/fts0pars.cc b/storage/innobase/fts/fts0pars.cc
index 7f0ba4e0c1b..e4d1bba2be6 100644
--- a/storage/innobase/fts/fts0pars.cc
+++ b/storage/innobase/fts/fts0pars.cc
@@ -76,12 +76,13 @@
 /* Line 268 of yacc.c  */
 #line 26 "fts0pars.y"
 
-
+#include "ha_prototypes.h"
 #include "mem0mem.h"
 #include "fts0ast.h"
 #include "fts0blex.h"
 #include "fts0tlex.h"
 #include "fts0pars.h"
+#include <my_sys.h>
 
 extern	int fts_lexer(YYSTYPE*, fts_lexer_t*);
 extern	int fts_blexer(YYSTYPE*, yyscan_t);
@@ -271,8 +272,6 @@ YYID (yyi)
 #    define YYSTACK_ALLOC __builtin_alloca
 #   elif defined __BUILTIN_VA_ARG_INCR
 #    include <alloca.h> /* INFRINGES ON USER NAME SPACE */
-#   elif defined _AIX
-#    define YYSTACK_ALLOC __alloca
 #   elif defined _MSC_VER
 #    include <malloc.h> /* INFRINGES ON USER NAME SPACE */
 #    define alloca _alloca
@@ -1541,7 +1540,7 @@ yyreduce:
 /* Line 1806 of yacc.c  */
 #line 141 "fts0pars.y"
     {
-		fts_ast_term_set_distance((yyvsp[(1) - (3)].node), fts_ast_string_to_ul((yyvsp[(3) - (3)].token), 10));
+		fts_ast_text_set_distance((yyvsp[(1) - (3)].node), fts_ast_string_to_ul((yyvsp[(3) - (3)].token), 10));
 		fts_ast_string_free((yyvsp[(3) - (3)].token));
 	}
     break;
@@ -1574,7 +1573,7 @@ yyreduce:
     {
 		(yyval.node) = fts_ast_create_node_list(state, (yyvsp[(1) - (4)].node));
 		fts_ast_add_node((yyval.node), (yyvsp[(2) - (4)].node));
-		fts_ast_term_set_distance((yyvsp[(2) - (4)].node), fts_ast_string_to_ul((yyvsp[(4) - (4)].token), 10));
+		fts_ast_text_set_distance((yyvsp[(2) - (4)].node), fts_ast_string_to_ul((yyvsp[(4) - (4)].token), 10));
 		fts_ast_string_free((yyvsp[(4) - (4)].token));
 	}
     break;
@@ -1933,7 +1932,6 @@ ftserror(
 
 /********************************************************************
 Create a fts_lexer_t instance.*/
-
 fts_lexer_t*
 fts_lexer_create(
 /*=============*/
@@ -1942,7 +1940,7 @@ fts_lexer_create(
 	ulint		query_len)
 {
 	fts_lexer_t*	fts_lexer = static_cast<fts_lexer_t*>(
-		ut_malloc(sizeof(fts_lexer_t)));
+		ut_malloc_nokey(sizeof(fts_lexer_t)));
 
 	if (boolean_mode) {
 		fts0blex_init(&fts_lexer->yyscanner);
@@ -1984,7 +1982,6 @@ fts_lexer_free(
 
 /********************************************************************
 Call the appropaiate scanner.*/
-
 int
 fts_lexer(
 /*======*/
diff --git a/storage/innobase/fts/fts0pars.y b/storage/innobase/fts/fts0pars.y
index e48036e82fe..1f4ec9922e3 100644
--- a/storage/innobase/fts/fts0pars.y
+++ b/storage/innobase/fts/fts0pars.y
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2007, 2014,  Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2014, Oracle and/or its affiliates. All rights reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -24,12 +24,13 @@ this program; if not, write to the Free Software Foundation, Inc.,
  */
 
 %{
-
+#include "ha_prototypes.h"
 #include "mem0mem.h"
 #include "fts0ast.h"
 #include "fts0blex.h"
 #include "fts0tlex.h"
 #include "fts0pars.h"
+#include <my_sys.h>
 
 extern	int fts_lexer(YYSTYPE*, fts_lexer_t*);
 extern	int fts_blexer(YYSTYPE*, yyscan_t);
@@ -139,7 +140,7 @@ expr	: term		{
 	}
 
 	| text '@' FTS_NUMB {
-		fts_ast_term_set_distance($1, fts_ast_string_to_ul($3, 10));
+		fts_ast_text_set_distance($1, fts_ast_string_to_ul($3, 10));
 		fts_ast_string_free($3);
 	}
 
@@ -157,7 +158,7 @@ expr	: term		{
 	| prefix text '@' FTS_NUMB {
 		$$ = fts_ast_create_node_list(state, $1);
 		fts_ast_add_node($$, $2);
-		fts_ast_term_set_distance($2, fts_ast_string_to_ul($4, 10));
+		fts_ast_text_set_distance($2, fts_ast_string_to_ul($4, 10));
 		fts_ast_string_free($4);
 	}
 
@@ -224,7 +225,6 @@ ftserror(
 
 /********************************************************************
 Create a fts_lexer_t instance.*/
-
 fts_lexer_t*
 fts_lexer_create(
 /*=============*/
@@ -233,17 +233,17 @@ fts_lexer_create(
 	ulint		query_len)
 {
 	fts_lexer_t*	fts_lexer = static_cast<fts_lexer_t*>(
-		ut_malloc(sizeof(fts_lexer_t)));
+		ut_malloc_nokey(sizeof(fts_lexer_t)));
 
 	if (boolean_mode) {
 		fts0blex_init(&fts_lexer->yyscanner);
-		fts0b_scan_bytes((char*) query, query_len, fts_lexer->yyscanner);
+		fts0b_scan_bytes((char*) query, (int) query_len, fts_lexer->yyscanner);
 		fts_lexer->scanner = (fts_scan) fts_blexer;
 		/* FIXME: Debugging */
 		/* fts0bset_debug(1 , fts_lexer->yyscanner); */
 	} else {
 		fts0tlex_init(&fts_lexer->yyscanner);
-		fts0t_scan_bytes((char*) query, query_len, fts_lexer->yyscanner);
+		fts0t_scan_bytes((char*) query, (int) query_len, fts_lexer->yyscanner);
 		fts_lexer->scanner = (fts_scan) fts_tlexer;
 	}
 
@@ -269,7 +269,6 @@ fts_lexer_free(
 
 /********************************************************************
 Call the appropaiate scanner.*/
-
 int
 fts_lexer(
 /*======*/
diff --git a/storage/innobase/fts/fts0plugin.cc b/storage/innobase/fts/fts0plugin.cc
new file mode 100644
index 00000000000..eaa32379a7c
--- /dev/null
+++ b/storage/innobase/fts/fts0plugin.cc
@@ -0,0 +1,295 @@
+/*****************************************************************************
+
+Copyright (c) 2013, 2015, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file fts/fts0plugin.cc
+Full Text Search plugin support.
+
+Created 2013/06/04 Shaohua Wang
+***********************************************************************/
+
+#include "fts0ast.h"
+#include "fts0plugin.h"
+#include "fts0tokenize.h"
+
+#include "ft_global.h"
+
+/******************************************************************//**
+FTS default parser init
+@return 0 */
+static
+int
+fts_default_parser_init(
+/*====================*/
+	MYSQL_FTPARSER_PARAM *param)	/*!< in: plugin parser param */
+{
+	return(0);
+}
+
+/******************************************************************//**
+FTS default parser deinit
+@return 0 */
+static
+int
+fts_default_parser_deinit(
+/*======================*/
+	MYSQL_FTPARSER_PARAM *param)	/*!< in: plugin parser param */
+{
+        return(0);
+}
+
+/******************************************************************//**
+FTS default parser parse from ft_static.c in MYISAM.
+@return 0 if parse successfully, or return non-zero */
+static
+int
+fts_default_parser_parse(
+/*=====================*/
+	MYSQL_FTPARSER_PARAM *param)	/*!< in: plugin parser param */
+{
+	return(param->mysql_parse(param, param->doc, param->length));
+}
+
+/* FTS default parser from ft_static.c in MYISAM. */
+struct st_mysql_ftparser fts_default_parser =
+{
+	MYSQL_FTPARSER_INTERFACE_VERSION,
+	fts_default_parser_parse,
+	fts_default_parser_init,
+	fts_default_parser_deinit
+};
+
+/******************************************************************//**
+Get a operator node from token boolean info
+@return node */
+static
+fts_ast_node_t*
+fts_query_get_oper_node(
+/*====================*/
+	MYSQL_FTPARSER_BOOLEAN_INFO*	info,	/*!< in: token info */
+	fts_ast_state_t*		state)	/*!< in/out: query parse state*/
+{
+	fts_ast_node_t*	oper_node = NULL;
+
+	if (info->yesno > 0) {
+		oper_node = fts_ast_create_node_oper(state, FTS_EXIST);
+	} else if (info->yesno < 0) {
+		oper_node = fts_ast_create_node_oper(state, FTS_IGNORE);
+	} else if (info->weight_adjust > 0) {
+		oper_node = fts_ast_create_node_oper(state, FTS_INCR_RATING);
+	} else if (info->weight_adjust < 0) {
+		oper_node = fts_ast_create_node_oper(state, FTS_DECR_RATING);
+	} else if (info->wasign > 0) {
+		oper_node = fts_ast_create_node_oper(state, FTS_NEGATE);
+	}
+
+	return(oper_node);
+}
+
+/******************************************************************//**
+FTS plugin parser 'myql_add_word' callback function for query parse.
+Refer to 'st_mysql_ftparser_param' for more detail.
+Note:
+a. Parse logic refers to 'ftb_query_add_word' from ft_boolean_search.c in MYISAM;
+b. Parse node or tree refers to fts0pars.y.
+@return 0 if add successfully, or return non-zero. */
+int
+fts_query_add_word_for_parser(
+/*==========================*/
+	MYSQL_FTPARSER_PARAM*	param,		/*!< in: parser param */
+	const char*			word,		/*!< in: token */
+	int			word_len,	/*!< in: token length */
+	MYSQL_FTPARSER_BOOLEAN_INFO*	info)	/*!< in: token info */
+{
+	fts_ast_state_t* state =
+		static_cast<fts_ast_state_t*>(param->mysql_ftparam);
+	fts_ast_node_t*	cur_node = state->cur_node;
+	fts_ast_node_t*	oper_node = NULL;
+	fts_ast_node_t*	term_node = NULL;
+	fts_ast_node_t*	node = NULL;
+
+	switch (info->type) {
+	case FT_TOKEN_STOPWORD:
+		/* We only handler stopword in phrase */
+		if (cur_node->type != FTS_AST_PARSER_PHRASE_LIST) {
+			break;
+		}
+
+	case FT_TOKEN_WORD:
+		term_node = fts_ast_create_node_term_for_parser(
+			state, word, word_len);
+
+		if (info->trunc) {
+			fts_ast_term_set_wildcard(term_node);
+		}
+
+		if (cur_node->type == FTS_AST_PARSER_PHRASE_LIST) {
+			/* Ignore operator inside phrase */
+			fts_ast_add_node(cur_node, term_node);
+		} else {
+			ut_ad(cur_node->type == FTS_AST_LIST
+			      || cur_node->type == FTS_AST_SUBEXP_LIST);
+			oper_node = fts_query_get_oper_node(info, state);
+
+			if (oper_node) {
+				node = fts_ast_create_node_list(state, oper_node);
+				fts_ast_add_node(node, term_node);
+				fts_ast_add_node(cur_node, node);
+			} else {
+				fts_ast_add_node(cur_node, term_node);
+			}
+		}
+
+		break;
+
+	case FT_TOKEN_LEFT_PAREN:
+		/* Check parse error */
+		if (cur_node->type != FTS_AST_LIST
+		    && cur_node->type != FTS_AST_SUBEXP_LIST) {
+			return(1);
+		}
+
+		/* Set operator */
+                oper_node = fts_query_get_oper_node(info, state);
+		if (oper_node != NULL) {
+			node = fts_ast_create_node_list(state, oper_node);
+			fts_ast_add_node(cur_node, node);
+			node->go_up = true;
+			node->up_node = cur_node;
+			cur_node = node;
+		}
+
+		if (info->quot) {
+			/* Phrase node */
+			node = fts_ast_create_node_phrase_list(state);
+		} else {
+			/* Subexp list node */
+			node = fts_ast_create_node_subexp_list(state, NULL);
+		}
+
+		fts_ast_add_node(cur_node, node);
+
+		node->up_node = cur_node;
+		state->cur_node = node;
+		state->depth += 1;
+
+		break;
+
+	case FT_TOKEN_RIGHT_PAREN:
+		info->quot = 0;
+
+		if (cur_node->up_node != NULL) {
+			cur_node = cur_node->up_node;
+
+			if (cur_node->go_up) {
+				ut_a(cur_node->up_node
+				     && !(cur_node->up_node->go_up));
+				cur_node = cur_node->up_node;
+			}
+		}
+
+		state->cur_node = cur_node;
+
+		if (state->depth > 0) {
+			state->depth--;
+		} else {
+			/* Parentheses mismatch */
+			return(1);
+		}
+
+		break;
+
+	case FT_TOKEN_EOF:
+	default:
+		break;
+	}
+
+	return(0);
+}
+
+/******************************************************************//**
+FTS plugin parser 'myql_parser' callback function for query parse.
+Refer to 'st_mysql_ftparser_param' for more detail.
+@return 0 if parse successfully */
+static
+int
+fts_parse_query_internal(
+/*=====================*/
+	MYSQL_FTPARSER_PARAM*	param,	/*!< in: parser param */
+	const char*			query,	/*!< in: query string */
+	int			len)	/*!< in: query length */
+{
+	MYSQL_FTPARSER_BOOLEAN_INFO	info;
+	const CHARSET_INFO*		cs = param->cs;
+	uchar**	start = (uchar**)(&query);
+	uchar*	end = (uchar*)(query + len);
+	FT_WORD	w = {NULL, 0, 0};
+
+	info.prev = ' ';
+	info.quot = 0;
+	memset(&w, 0, sizeof(w));
+	/* Note: We don't handle simple parser mode here,
+	but user supplied plugin parser should handler it. */
+	while (fts_get_word(cs, start, end, &w, &info)) {
+		int ret = param->mysql_add_word(
+				param,
+				reinterpret_cast<char*>(w.pos),
+				w.len, &info);
+		if (ret) {
+			return(ret);
+		}
+	}
+
+	return(0);
+}
+
+/******************************************************************//**
+fts parse query by plugin parser.
+@return 0 if parse successfully, or return non-zero. */
+int
+fts_parse_by_parser(
+/*================*/
+	ibool			mode,		/*!< in: parse boolean mode */
+	uchar*			query_str,	/*!< in: query string */
+	ulint			query_len,	/*!< in: query string length */
+	st_mysql_ftparser*	parser,		/*!< in: fts plugin parser */
+	fts_ast_state_t*	state)		/*!< in/out: parser state */
+{
+	MYSQL_FTPARSER_PARAM	param;
+	int	ret;
+
+	ut_ad(parser);
+
+	/* Initial parser param */
+	param.mysql_parse = fts_parse_query_internal;
+	param.mysql_add_word = fts_query_add_word_for_parser;
+	param.mysql_ftparam = static_cast<void*>(state);
+	param.cs = state->charset;
+	param.doc = reinterpret_cast<char*>(query_str);
+	param.length = static_cast<int>(query_len);
+	param.flags = 0;
+	param.mode = mode ?
+		     MYSQL_FTPARSER_FULL_BOOLEAN_INFO :
+		     MYSQL_FTPARSER_SIMPLE_MODE;
+
+	PARSER_INIT(parser, &param);
+	ret = parser->parse(&param);
+	PARSER_DEINIT(parser, &param);
+
+	return(ret | state->depth);
+}
diff --git a/storage/innobase/fts/fts0que.cc b/storage/innobase/fts/fts0que.cc
index 26bd0378aed..dee7c59a58b 100644
--- a/storage/innobase/fts/fts0que.cc
+++ b/storage/innobase/fts/fts0que.cc
@@ -24,7 +24,9 @@ Created 2007/03/27 Sunny Bains
 Completed 2011/7/10 Sunny and Jimmy Yang
 *******************************************************/
 
-#include "dict0dict.h" /* dict_table_get_n_rows() */
+#include "ha_prototypes.h"
+
+#include "dict0dict.h"
 #include "ut0rbt.h"
 #include "row0sel.h"
 #include "fts0fts.h"
@@ -32,14 +34,15 @@ Completed 2011/7/10 Sunny and Jimmy Yang
 #include "fts0ast.h"
 #include "fts0pars.h"
 #include "fts0types.h"
-#include "ha_prototypes.h"
-#include <ctype.h>
+#include "fts0plugin.h"
+#include "ut0new.h"
 
-#ifndef UNIV_NONINL
+#ifdef UNIV_NONINL
 #include "fts0types.ic"
 #include "fts0vlc.ic"
 #endif
 
+#include <iomanip>
 #include <vector>
 
 #define FTS_ELEM(t, n, i, j) (t[(i) * n + (j)])
@@ -59,7 +62,7 @@ Completed 2011/7/10 Sunny and Jimmy Yang
 
 // FIXME: Need to have a generic iterator that traverses the ilist.
 
-typedef std::vector<fts_string_t>	word_vector_t;
+typedef std::vector<fts_string_t, ut_allocator<fts_string_t> >	word_vector_t;
 
 struct fts_word_freq_t;
 
@@ -71,6 +74,7 @@ struct fts_query_t {
 
 	dict_index_t*	index;		/*!< The FTS index to search */
 					/*!< FTS auxiliary common table def */
+
 	fts_table_t	fts_common_table;
 
 	fts_table_t	fts_index_table;/*!< FTS auxiliary index table def */
@@ -144,7 +148,18 @@ struct fts_query_t {
 					document, its elements are of type
 					fts_word_freq_t */
 
+	ib_rbt_t*	wildcard_words;	/*!< words with wildcard */
+
 	bool		multi_exist;	/*!< multiple FTS_EXIST oper */
+
+	st_mysql_ftparser*	parser;	/*!< fts plugin parser */
+
+	/** limit value for the fts query */
+	ulonglong		limit;
+
+	/** number of docs fetched by query. This is to restrict the
+	result with limit value */
+	ulonglong		n_docs;
 };
 
 /** For phrase matching, first we collect the documents and the positions
@@ -178,7 +193,7 @@ struct fts_select_t {
 					the FTS index */
 };
 
-typedef std::vector<ulint>       pos_vector_t;
+typedef std::vector<ulint, ut_allocator<ulint> >       pos_vector_t;
 
 /** structure defines a set of ranges for original documents, each of which
 has a minimum position and maximum position. Text in such range should
@@ -197,22 +212,54 @@ struct fts_proximity_t {
 
 /** The match positions and tokesn to match */
 struct fts_phrase_t {
-	ibool		found;		/*!< Match result */
+	fts_phrase_t(const dict_table_t* table)
+		:
+		found(false),
+		match(NULL),
+		tokens(NULL),
+		distance(0),
+		charset(NULL),
+		heap(NULL),
+		page_size(dict_table_page_size(table)),
+		proximity_pos(NULL),
+		parser(NULL)
+	{
+	}
 
-	const fts_match_t*
-			match;		/*!< Positions within text */
+	/** Match result */
+	ibool			found;
 
-	const ib_vector_t*
-			tokens;		/*!< Tokens to match */
+	/** Positions within text */
+	const fts_match_t*	match;
 
-	ulint		distance;	/*!< For matching on proximity
-					distance. Can be 0 for exact match */
-	CHARSET_INFO*	charset;	/*!< Phrase match charset */
-	mem_heap_t*     heap;		/*!< Heap for word processing */
-	ulint		zip_size;	/*!< row zip size */
-	fts_proximity_t*proximity_pos;	/*!< position info for proximity
-					search verification. Records the min
-					and max position of words matched */
+	/** Tokens to match */
+	const ib_vector_t*	tokens;
+
+	/** For matching on proximity distance. Can be 0 for exact match */
+	ulint			distance;
+
+	/** Phrase match charset */
+	CHARSET_INFO*		charset;
+
+	/** Heap for word processing */
+	mem_heap_t*		heap;
+
+	/** Row page size */
+	const page_size_t	page_size;
+
+	/** Position info for proximity search verification. Records the
+	min and max position of words matched */
+	fts_proximity_t*	proximity_pos;
+
+	/** FTS plugin parser */
+	st_mysql_ftparser*	parser;
+};
+
+/** Paramter passed to fts phrase match by parser */
+struct fts_phrase_param_t {
+	fts_phrase_t*	phrase;		/*!< Match phrase instance */
+	ulint		token_index;	/*!< Index of token to match next */
+	mem_heap_t*	heap;		/*!< Heap for word processing */
 };
 
 /** For storing the frequncy of a word/term in a document */
@@ -395,7 +442,7 @@ fts_query_lcs(
 	ulint	r = len_p1;
 	ulint	c = len_p2;
 	ulint	size = (r + 1) * (c + 1) * sizeof(ulint);
-	ulint*	table = (ulint*) ut_malloc(size);
+	ulint*	table = (ulint*) ut_malloc_nokey(size);
 
 	/* Traverse the table backwards, from the last row to the first and
 	also from the last column to the first. We compute the smaller
@@ -442,7 +489,7 @@ fts_query_lcs(
 /*******************************************************************//**
 Compare two fts_ranking_t instance on their rank value and doc ids in
 descending order on the rank and ascending order on doc id.
-@return 0 if p1 == p2, < 0 if p1 <  p2, > 0 if p1 >  p2 */
+@return 0 if p1 == p2, < 0 if p1 < p2, > 0 if p1 > p2 */
 static
 int
 fts_query_compare_rank(
@@ -469,67 +516,6 @@ fts_query_compare_rank(
 	return(1);
 }
 
-#ifdef FTS_UTF8_DEBUG
-/*******************************************************************//**
-Convert string to lowercase.
-@return lower case string, callers responsibility to delete using
-ut_free() */
-static
-byte*
-fts_tolower(
-/*========*/
-	const byte*	src,		/*!< in: src string */
-	ulint		len)		/*!< in: src string length */
-{
-	fts_string_t	str;
-	byte*		lc_str = ut_malloc(len + 1);
-
-	str.f_len = len;
-	str.f_str = lc_str;
-
-	memcpy(str.f_str, src, len);
-
-	/* Make sure the last byte is NUL terminated */
-	str.f_str[len] = '\0';
-
-	fts_utf8_tolower(&str);
-
-	return(lc_str);
-}
-
-/*******************************************************************//**
-Do a case insensitive search. Doesn't check for NUL byte end marker
-only relies on len. Convert str2 to lower case before comparing.
-@return 0 if p1 == p2, < 0 if p1 <  p2, > 0 if p1 >  p2 */
-static
-int
-fts_utf8_strcmp(
-/*============*/
-	const fts_string_t*
-			str1,		/*!< in: should be lower case*/
-
-	fts_string_t*	str2)		/*!< in: any case. We will use the length
-					of this string during compare as it
-					should be the min of the two strings */
-{
-	byte		b = str2->f_str[str2->f_len];
-
-	ut_a(str2->f_len <= str1->f_len);
-
-	/* We need to write a NUL byte at the end of the string because the
-	string is converted to lowercase by a MySQL function which doesn't
-	care about the length. */
-	str2->f_str[str2->f_len] = 0;
-
-	fts_utf8_tolower(str2);
-
-	/* Restore the value we replaced above. */
-	str2->f_str[str2->f_len] = b;
-
-	return(memcmp(str1->f_str, str2->f_str, str2->f_len));
-}
-#endif
-
 /*******************************************************************//**
 Create words in ranking */
 static
@@ -593,11 +579,7 @@ fts_ranking_words_add(
 
 		pos = rbt_size(query->word_map);
 
-		new_word.f_str = static_cast<byte*>(mem_heap_alloc(query->heap,
-			word->f_len + 1));
-		memcpy(new_word.f_str, word->f_str, word->f_len);
-		new_word.f_str[word->f_len] = 0;
-		new_word.f_len = word->f_len;
+		fts_string_dup(&new_word, word, query->heap);
 		new_word.f_n_char = pos;
 
 		rbt_add_node(query->word_map, &parent, &new_word);
@@ -684,11 +666,7 @@ fts_query_add_word_freq(
 
 		memset(&word_freq, 0, sizeof(word_freq));
 
-		word_freq.word.f_str = static_cast<byte*>(
-			mem_heap_alloc(query->heap, word->f_len + 1));
-		memcpy(word_freq.word.f_str, word->f_str, word->f_len);
-		word_freq.word.f_str[word->f_len] = 0;
-		word_freq.word.f_len = word->f_len;
+		fts_string_dup(&word_freq.word, word, query->heap);
 
 		word_freq.doc_count = 0;
 
@@ -1142,8 +1120,12 @@ fts_query_difference(
 	ut_a(query->oper == FTS_IGNORE);
 
 #ifdef FTS_INTERNAL_DIAG_PRINT
-	fprintf(stderr, "DIFFERENCE: Searching: '%.*s'\n",
-		(int) token->f_len, token->f_str);
+	{
+		ib::info	out;
+		out << "DIFFERENCE: Searching: '";
+		out.write(token->f_str, token->f_len);
+		out << "'";
+	}
 #endif
 
 	if (query->doc_ids) {
@@ -1233,8 +1215,12 @@ fts_query_intersect(
 	ut_a(query->oper == FTS_EXIST);
 
 #ifdef FTS_INTERNAL_DIAG_PRINT
-	fprintf(stderr, "INTERSECT: Searching: '%.*s'\n",
-		(int) token->f_len, token->f_str);
+	{
+		ib::info	out;
+		out << "INTERSECT: Searching: '";
+		out.write(token->f_str, token->f_len);
+		out << "'";
+	}
 #endif
 
 	/* If the words set is not empty and multi exist is true,
@@ -1415,8 +1401,12 @@ fts_query_union(
 	     query->oper == FTS_NEGATE || query->oper == FTS_INCR_RATING);
 
 #ifdef FTS_INTERNAL_DIAG_PRINT
-	fprintf(stderr, "UNION: Searching: '%.*s'\n",
-		(int) token->f_len, token->f_str);
+	{
+		ib::info	out;
+		out << "UNION: Searching: '";
+		out.write(token->f_str, token->f_len);
+		out << "'";
+	}
 #endif
 
 	if (query->doc_ids) {
@@ -1427,10 +1417,6 @@ fts_query_union(
 		return(query->error);
 	}
 
-	/* Single '%' would confuse parser in pars_like_rebind(). In addition,
-	our wildcard search only supports prefix search */
-	ut_ad(*token->f_str != '%');
-
 	fts_query_cache(query, token);
 
 	/* Setup the callback args for filtering and
@@ -1626,18 +1612,17 @@ fts_query_match_phrase_terms(
 		const fts_string_t*	token;
 		int			result;
 		ulint			ret;
-		ulint			offset;
 
 		ret = innobase_mysql_fts_get_token(
-			phrase->charset, ptr, (byte*) end,
-			&match, &offset);
+			phrase->charset, ptr,
+			const_cast<byte*>(end), &match);
 
 		if (match.f_len > 0) {
 			/* Get next token to match. */
 			token = static_cast<const fts_string_t*>(
 				ib_vector_get_const(tokens, i));
 
-			fts_utf8_string_dup(&cmp_str, &match, heap);
+			fts_string_dup(&cmp_str, &match, heap);
 
 			result = innobase_fts_text_case_cmp(
 				phrase->charset, token, &cmp_str);
@@ -1718,12 +1703,11 @@ fts_proximity_is_word_in_range(
 		while (cur_pos <= proximity_pos->max_pos[i]) {
 			ulint		len;
 			fts_string_t	str;
-			ulint           offset = 0;
 
 			len = innobase_mysql_fts_get_token(
 				phrase->charset,
 				start + cur_pos,
-				start + total_len, &str, &offset);
+				start + total_len, &str);
 
 			if (len == 0) {
 				break;
@@ -1752,6 +1736,103 @@ fts_proximity_is_word_in_range(
 	return(false);
 }
 
+/*****************************************************************//**
+FTS plugin parser 'myql_add_word' callback function for phrase match
+Refer to 'st_mysql_ftparser_param' for more detail.
+@return 0 if match, or return non-zero */
+static
+int
+fts_query_match_phrase_add_word_for_parser(
+/*=======================================*/
+	MYSQL_FTPARSER_PARAM*	param,		/*!< in: parser param */
+	const char*			word,		/*!< in: token */
+	int			word_len,	/*!< in: token length */
+	MYSQL_FTPARSER_BOOLEAN_INFO* info)	/*!< in: token info */
+{
+	fts_phrase_param_t*	phrase_param;
+	fts_phrase_t*		phrase;
+	const ib_vector_t*	tokens;
+	fts_string_t		match;
+	fts_string_t		cmp_str;
+	const fts_string_t*	token;
+	int			result;
+	mem_heap_t*		heap;
+
+	phrase_param = static_cast<fts_phrase_param_t*>(param->mysql_ftparam);
+	heap = phrase_param->heap;
+	phrase = phrase_param->phrase;
+	tokens = phrase->tokens;
+
+	/* In case plugin parser doesn't check return value */
+	if (phrase_param->token_index == ib_vector_size(tokens)) {
+		return(1);
+	}
+
+	match.f_str = (uchar *)(word);
+	match.f_len = word_len;
+	match.f_n_char = fts_get_token_size(phrase->charset, word, word_len);
+
+	if (match.f_len > 0) {
+		/* Get next token to match. */
+		ut_a(phrase_param->token_index < ib_vector_size(tokens));
+		token = static_cast<const fts_string_t*>(
+			ib_vector_get_const(tokens, phrase_param->token_index));
+
+		fts_string_dup(&cmp_str, &match, heap);
+
+		result = innobase_fts_text_case_cmp(
+			phrase->charset, token, &cmp_str);
+
+		if (result == 0) {
+			phrase_param->token_index++;
+		} else {
+			return(1);
+		}
+	}
+
+	/* Can't be greater than the number of elements. */
+	ut_a(phrase_param->token_index <= ib_vector_size(tokens));
+
+	/* This is the case for multiple words. */
+	if (phrase_param->token_index == ib_vector_size(tokens)) {
+		phrase->found = TRUE;
+	}
+
+	return(static_cast<int>(phrase->found));
+}
+
+/*****************************************************************//**
+Check whether the terms in the phrase match the text.
+@return TRUE if matched else FALSE */
+static
+ibool
+fts_query_match_phrase_terms_by_parser(
+/*===================================*/
+	fts_phrase_param_t*	phrase_param,	/* in/out: phrase param */
+	st_mysql_ftparser*	parser,		/* in: plugin fts parser */
+	byte*			text,		/* in: text to check */
+	ulint			len)		/* in: text length */
+{
+	MYSQL_FTPARSER_PARAM	param;
+
+	ut_a(parser);
+
+	/* Set paramters for param */
+	param.mysql_parse = fts_tokenize_document_internal;
+	param.mysql_add_word = fts_query_match_phrase_add_word_for_parser;
+	param.mysql_ftparam = phrase_param;
+	param.cs = phrase_param->phrase->charset;
+	param.doc = reinterpret_cast<char*>(text);
+	param.length = static_cast<int>(len);
+	param.mode= MYSQL_FTPARSER_WITH_STOPWORDS;
+
+	PARSER_INIT(parser, &param);
+	parser->parse(&param);
+	PARSER_DEINIT(parser, &param);
+
+	return(phrase_param->phrase->found);
+}
+
 /*****************************************************************//**
 Callback function to fetch and search the document.
 @return TRUE if matched else FALSE */
@@ -1786,11 +1867,7 @@ fts_query_match_phrase(
 
 	for (i = phrase->match->start; i < ib_vector_size(positions); ++i) {
 		ulint		pos;
-		fts_string_t	match;
-		fts_string_t	cmp_str;
 		byte*		ptr = start;
-		ulint		ret;
-		ulint		offset;
 
 		pos = *(ulint*) ib_vector_get_const(positions, i);
 
@@ -1807,39 +1884,60 @@ fts_query_match_phrase(
 		searched field to adjust the doc position when search
 		phrases. */
 		pos -= prev_len;
-		ptr = match.f_str = start + pos;
+		ptr = start + pos;
 
 		/* Within limits ? */
 		if (ptr >= end) {
 			break;
 		}
 
-		ret = innobase_mysql_fts_get_token(
-			phrase->charset, start + pos, (byte*) end,
-			&match, &offset);
+		if (phrase->parser) {
+			fts_phrase_param_t	phrase_param;
 
-		if (match.f_len == 0) {
-			break;
-		}
+			phrase_param.phrase = phrase;
+			phrase_param.token_index = 0;
+			phrase_param.heap = heap;
 
-		fts_utf8_string_dup(&cmp_str, &match, heap);
+			if (fts_query_match_phrase_terms_by_parser(
+				&phrase_param,
+				phrase->parser,
+				ptr,
+				(end - ptr))) {
+				break;
+			}
+		} else {
+			fts_string_t	match;
+			fts_string_t	cmp_str;
+			ulint		ret;
 
-		if (innobase_fts_text_case_cmp(
-			phrase->charset, first, &cmp_str) == 0) {
+			match.f_str = ptr;
+			ret = innobase_mysql_fts_get_token(
+				phrase->charset, start + pos,
+				const_cast<byte*>(end), &match);
 
-			/* This is the case for the single word
-			in the phrase. */
-			if (ib_vector_size(phrase->tokens) == 1) {
-				phrase->found = TRUE;
+			if (match.f_len == 0) {
 				break;
 			}
 
-			ptr += ret;
+			fts_string_dup(&cmp_str, &match, heap);
 
-			/* Match the remaining terms in the phrase. */
-			if (fts_query_match_phrase_terms(phrase, &ptr,
-							 end, heap)) {
-				break;
+			if (innobase_fts_text_case_cmp(
+				phrase->charset, first, &cmp_str) == 0) {
+
+				/* This is the case for the single word
+				in the phrase. */
+				if (ib_vector_size(phrase->tokens) == 1) {
+					phrase->found = TRUE;
+					break;
+				}
+
+				ptr += ret;
+
+				/* Match the remaining terms in the phrase. */
+				if (fts_query_match_phrase_terms(phrase, &ptr,
+								 end, heap)) {
+					break;
+				}
 			}
 		}
 	}
@@ -1915,9 +2013,9 @@ fts_query_fetch_document(
 
 		if (dfield_is_ext(dfield)) {
 			data = btr_copy_externally_stored_field(
-				&cur_len, data, phrase->zip_size,
-				dfield_get_len(dfield), phrase->heap,
-				NULL);
+				&cur_len, data, phrase->page_size,
+				dfield_get_len(dfield), phrase->heap
+				);
 		} else {
 			cur_len = dfield_get_len(dfield);
 		}
@@ -2032,13 +2130,22 @@ fts_query_find_term(
 	fts_select_t		select;
 	doc_id_t		match_doc_id;
 	trx_t*			trx = query->trx;
+	char			table_name[MAX_FULL_NAME_LEN];
 
 	trx->op_info = "fetching FTS index matching nodes";
 
 	if (*graph) {
 		info = (*graph)->info;
 	} else {
+		ulint	selected;
+
 		info = pars_info_create();
+
+		selected = fts_select_index(*word->f_str);
+		query->fts_index_table.suffix = fts_get_suffix(selected);
+
+		fts_get_table_name(&query->fts_index_table, table_name);
+		pars_info_bind_id(info, true, "index_table_name", table_name);
 	}
 
 	select.found = FALSE;
@@ -2057,11 +2164,6 @@ fts_query_find_term(
 	fts_bind_doc_id(info, "max_doc_id", &match_doc_id);
 
 	if (!*graph) {
-		ulint		selected;
-
-		selected = fts_select_index(*word->f_str);
-
-		query->fts_index_table.suffix = fts_get_suffix(selected);
 
 		*graph = fts_parse_sql(
 			&query->fts_index_table,
@@ -2069,10 +2171,10 @@ fts_query_find_term(
 			"DECLARE FUNCTION my_func;\n"
 			"DECLARE CURSOR c IS"
 			" SELECT doc_count, ilist\n"
-			" FROM \"%s\"\n"
-			" WHERE word LIKE :word AND "
-			"	first_doc_id <= :min_doc_id AND "
-			"	last_doc_id >= :max_doc_id\n"
+			" FROM $index_table_name\n"
+			" WHERE word LIKE :word AND"
+			" first_doc_id <= :min_doc_id AND"
+			" last_doc_id >= :max_doc_id\n"
 			" ORDER BY first_doc_id;\n"
 			"BEGIN\n"
 			"\n"
@@ -2086,24 +2188,22 @@ fts_query_find_term(
 			"CLOSE c;");
 	}
 
-	for(;;) {
+	for (;;) {
 		error = fts_eval_sql(trx, *graph);
 
 		if (error == DB_SUCCESS) {
 
 			break;				/* Exit the loop. */
 		} else {
-			ut_print_timestamp(stderr);
 
 			if (error == DB_LOCK_WAIT_TIMEOUT) {
-				fprintf(stderr, " InnoDB: Warning: lock wait "
-					"timeout reading FTS index. "
-					"Retrying!\n");
+				ib::warn() << "lock wait timeout reading FTS"
+					" index. Retrying!";
 
 				trx->error_state = DB_SUCCESS;
 			} else {
-				fprintf(stderr, " InnoDB: Error: %lu "
-					"while reading FTS index.\n", error);
+				ib::error() << error
+					<< " while reading FTS index.";
 
 				break;			/* Exit the loop. */
 			}
@@ -2168,6 +2268,7 @@ fts_query_total_docs_containing_term(
 	que_t*			graph;
 	ulint			selected;
 	trx_t*			trx = query->trx;
+	char			table_name[MAX_FULL_NAME_LEN]
 
 	trx->op_info = "fetching FTS index document count";
 
@@ -2182,14 +2283,18 @@ fts_query_total_docs_containing_term(
 
 	query->fts_index_table.suffix = fts_get_suffix(selected);
 
+	fts_get_table_name(&query->fts_index_table, table_name);
+
+	pars_info_bind_id(info, true, "index_table_name", table_name);
+
 	graph = fts_parse_sql(
 		&query->fts_index_table,
 		info,
 		"DECLARE FUNCTION my_func;\n"
 		"DECLARE CURSOR c IS"
 		" SELECT doc_count\n"
-		" FROM %s\n"
-		" WHERE word = :word "
+		" FROM $index_table_name\n"
+		" WHERE word = :word"
 		" ORDER BY first_doc_id;\n"
 		"BEGIN\n"
 		"\n"
@@ -2202,24 +2307,22 @@ fts_query_total_docs_containing_term(
 		"END LOOP;\n"
 		"CLOSE c;");
 
-	for(;;) {
+	for (;;) {
 		error = fts_eval_sql(trx, graph);
 
 		if (error == DB_SUCCESS) {
 
 			break;				/* Exit the loop. */
 		} else {
-			ut_print_timestamp(stderr);
 
 			if (error == DB_LOCK_WAIT_TIMEOUT) {
-				fprintf(stderr, " InnoDB: Warning: lock wait "
-					"timeout reading FTS index. "
-					"Retrying!\n");
+				ib::warn() << "lock wait timeout reading FTS"
+					" index. Retrying!";
 
 				trx->error_state = DB_SUCCESS;
 			} else {
-				fprintf(stderr, " InnoDB: Error: %lu "
-					"while reading FTS index.\n", error);
+				ib::error() << error
+					<< " while reading FTS index.";
 
 				break;			/* Exit the loop. */
 			}
@@ -2247,6 +2350,7 @@ fts_query_terms_in_document(
 	que_t*		graph;
 	doc_id_t	read_doc_id;
 	trx_t*		trx = query->trx;
+	char		table_name[MAX_FULL_NAME_LEN];
 
 	trx->op_info = "fetching FTS document term count";
 
@@ -2262,15 +2366,19 @@ fts_query_terms_in_document(
 
 	query->fts_index_table.suffix = "DOC_ID";
 
+	fts_get_table_name(&query->fts_index_table, table_name);
+
+	pars_info_bind_id(info, true, "index_table_name", table_name);
+
 	graph = fts_parse_sql(
 		&query->fts_index_table,
 		info,
 		"DECLARE FUNCTION my_func;\n"
 		"DECLARE CURSOR c IS"
 		" SELECT count\n"
-		" FROM \"%s\"\n"
-		" WHERE doc_id = :doc_id "
-		"BEGIN\n"
+		" FROM $index_table_name\n"
+		" WHERE doc_id = :doc_id"
+		" BEGIN\n"
 		"\n"
 		"OPEN c;\n"
 		"WHILE 1 = 1 LOOP\n"
@@ -2281,25 +2389,22 @@ fts_query_terms_in_document(
 		"END LOOP;\n"
 		"CLOSE c;");
 
-	for(;;) {
+	for (;;) {
 		error = fts_eval_sql(trx, graph);
 
 		if (error == DB_SUCCESS) {
 
 			break;				/* Exit the loop. */
 		} else {
-			ut_print_timestamp(stderr);
 
 			if (error == DB_LOCK_WAIT_TIMEOUT) {
-				fprintf(stderr, " InnoDB: Warning: lock wait "
-					"timeout reading FTS doc id table. "
-					"Retrying!\n");
+				ib::warn() << "lock wait timeout reading FTS"
+					" doc id table. Retrying!";
 
 				trx->error_state = DB_SUCCESS;
 			} else {
-				fprintf(stderr, " InnoDB: Error: %lu "
-					"while reading FTS doc id table.\n",
-					error);
+				ib::error() << error << " while reading FTS"
+					" doc id table.";
 
 				break;			/* Exit the loop. */
 			}
@@ -2323,20 +2428,18 @@ fts_query_match_document(
 	fts_get_doc_t*	get_doc,	/*!< in: table and prepared statements */
 	fts_match_t*	match,		/*!< in: doc id and positions */
 	ulint		distance,	/*!< in: proximity distance */
+	st_mysql_ftparser* parser,	/*!< in: fts plugin parser */
 	ibool*		found)		/*!< out: TRUE if phrase found */
 {
 	dberr_t		error;
-	fts_phrase_t	phrase;
-
-	memset(&phrase, 0x0, sizeof(phrase));
+	fts_phrase_t	phrase(get_doc->index_cache->index->table);
 
 	phrase.match = match;		/* Positions to match */
 	phrase.tokens = tokens;		/* Tokens to match */
 	phrase.distance = distance;
 	phrase.charset = get_doc->index_cache->charset;
-	phrase.zip_size = dict_table_zip_size(
-		get_doc->index_cache->index->table);
 	phrase.heap = mem_heap_create(512);
+	phrase.parser = parser;
 
 	*found = phrase.found = FALSE;
 
@@ -2345,9 +2448,8 @@ fts_query_match_document(
 		fts_query_fetch_document, &phrase);
 
 	if (error != DB_SUCCESS) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr, "InnoDB: Error: (%s) matching document.\n",
-			ut_strerr(error));
+		ib::error() << "(" << ut_strerr(error)
+			<< ") matching document.";
 	} else {
 		*found = phrase.found;
 	}
@@ -2370,23 +2472,21 @@ fts_query_is_in_proximity_range(
 	fts_proximity_t*	qualified_pos)	/*!< in: position info for
 						qualified ranges */
 {
-	fts_get_doc_t		get_doc;
-	fts_cache_t*		cache = query->index->table->fts->cache;
-	dberr_t			err;
-	fts_phrase_t		phrase;
+	fts_get_doc_t	get_doc;
+	fts_cache_t*	cache = query->index->table->fts->cache;
+	dberr_t		err;
 
 	memset(&get_doc, 0x0, sizeof(get_doc));
-	memset(&phrase, 0x0, sizeof(phrase));
 
 	rw_lock_x_lock(&cache->lock);
 	get_doc.index_cache = fts_find_index_cache(cache, query->index);
 	rw_lock_x_unlock(&cache->lock);
 	ut_a(get_doc.index_cache != NULL);
 
+	fts_phrase_t	phrase(get_doc.index_cache->index->table);
+
 	phrase.distance = query->distance;
 	phrase.charset = get_doc.index_cache->charset;
-	phrase.zip_size = dict_table_zip_size(
-		get_doc.index_cache->index->table);
 	phrase.heap = mem_heap_create(512);
 	phrase.proximity_pos = qualified_pos;
 	phrase.found = FALSE;
@@ -2396,9 +2496,8 @@ fts_query_is_in_proximity_range(
 		fts_query_fetch_document, &phrase);
 
 	if (err != DB_SUCCESS) {
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Error: (%s) in verification phase of proximity "
-			"search", ut_strerr(err));
+		ib::error() << "(" << ut_strerr(err) << ") in verification"
+			" phase of proximity search";
 	}
 
 	/* Free the prepared statement. */
@@ -2449,8 +2548,7 @@ fts_query_search_phrase(
 	rw_lock_x_unlock(&cache->lock);
 
 #ifdef FTS_INTERNAL_DIAG_PRINT
-	ut_print_timestamp(stderr);
-	fprintf(stderr, " Start phrase search\n");
+	ib::info() << "Start phrase search";
 #endif
 
 	/* Read the document from disk and do the actual
@@ -2468,8 +2566,8 @@ fts_query_search_phrase(
 		if (match->doc_id != 0) {
 
 			query->error = fts_query_match_document(
-				orig_tokens, &get_doc,
-				match, query->distance, &found);
+				orig_tokens, &get_doc, match,
+				query->distance, query->parser, &found);
 
 			if (query->error == DB_SUCCESS && found) {
 				ulint	z;
@@ -2501,77 +2599,91 @@ func_exit:
 	return(query->error);
 }
 
-/*****************************************************************//**
-Text/Phrase search.
-@return DB_SUCCESS or error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-fts_query_phrase_search(
-/*====================*/
-	fts_query_t*		query,	/*!< in: query instance */
-	const fts_string_t*	phrase)	/*!< in: token to search */
+/** Split the phrase into tokens
+@param[in,out]	query		query instance
+@param[in]	node		query node to search
+@param[in,out]	tokens		token vector
+@param[in,out]	orig_tokens	original node tokens include stopword
+@param[in,out]	heap	mem heap */
+static
+void
+fts_query_phrase_split(
+	fts_query_t*		query,
+	const fts_ast_node_t*	node,
+	ib_vector_t*		tokens,
+	ib_vector_t*		orig_tokens,
+	mem_heap_t*		heap)
 {
-	ib_vector_t*		tokens;
-	ib_vector_t*		orig_tokens;
-	mem_heap_t*		heap = mem_heap_create(sizeof(fts_string_t));
-	ulint			len = phrase->f_len;
+	fts_string_t		phrase;
+	ulint			len = 0;
 	ulint			cur_pos = 0;
-	ib_alloc_t*		heap_alloc;
-	ulint			num_token;
-	CHARSET_INFO*		charset;
+	fts_ast_node_t*		term_node = NULL;
 
-	charset = query->fts_index_table.charset;
-
-	heap_alloc = ib_heap_allocator_create(heap);
-
-	tokens = ib_vector_create(heap_alloc, sizeof(fts_string_t), 4);
-	orig_tokens = ib_vector_create(heap_alloc, sizeof(fts_string_t), 4);
-
-	if (query->distance != ULINT_UNDEFINED && query->distance > 0) {
-		query->flags = FTS_PROXIMITY;
+	if (node->type == FTS_AST_TEXT) {
+		phrase.f_str = node->text.ptr->str;
+		phrase.f_len = node->text.ptr->len;
+		len = phrase.f_len;
 	} else {
-		query->flags = FTS_PHRASE;
+		ut_ad(node->type == FTS_AST_PARSER_PHRASE_LIST);
+		phrase.f_str = NULL;
+		phrase.f_len = 0;
+		term_node = node->list.head;
 	}
 
-	/* Split the phrase into tokens. */
-	while (cur_pos < len) {
+	while (true) {
 		fts_cache_t*	cache = query->index->table->fts->cache;
-		ib_rbt_bound_t	parent;
-		ulint		offset;
 		ulint		cur_len;
 		fts_string_t	result_str;
 
-                cur_len = innobase_mysql_fts_get_token(
-                        charset,
-                        reinterpret_cast<const byte*>(phrase->f_str) + cur_pos,
-                        reinterpret_cast<const byte*>(phrase->f_str) + len,
-			&result_str, &offset);
+		if (node->type == FTS_AST_TEXT) {
+			if (cur_pos >= len) {
+				break;
+			}
 
-		if (cur_len == 0) {
-			break;
+			cur_len = innobase_mysql_fts_get_token(
+				query->fts_index_table.charset,
+				reinterpret_cast<const byte*>(phrase.f_str)
+				+ cur_pos,
+				reinterpret_cast<const byte*>(phrase.f_str)
+				+ len,
+				&result_str);
+
+			if (cur_len == 0) {
+				break;
+			}
+
+			cur_pos += cur_len;
+		} else {
+			ut_ad(node->type == FTS_AST_PARSER_PHRASE_LIST);
+			/* Term node in parser phrase list */
+			if (term_node == NULL) {
+				break;
+			}
+
+			ut_a(term_node->type == FTS_AST_TERM);
+			result_str.f_str = term_node->term.ptr->str;
+			result_str.f_len = term_node->term.ptr->len;
+			result_str.f_n_char = fts_get_token_size(
+				query->fts_index_table.charset,
+				reinterpret_cast<char*>(result_str.f_str),
+				result_str.f_len);
+
+			term_node = term_node->next;
 		}
 
-		cur_pos += cur_len;
-
 		if (result_str.f_n_char == 0) {
 			continue;
 		}
 
 		fts_string_t*	token = static_cast<fts_string_t*>(
 			ib_vector_push(tokens, NULL));
+		fts_string_dup(token, &result_str, heap);
 
-		token->f_str = static_cast<byte*>(
-			mem_heap_alloc(heap, result_str.f_len + 1));
-		ut_memcpy(token->f_str, result_str.f_str, result_str.f_len);
-
-		token->f_len = result_str.f_len;
-		token->f_str[token->f_len] = 0;
-
-		if (cache->stopword_info.cached_stopword
-		    && rbt_search(cache->stopword_info.cached_stopword,
-			       &parent, token) != 0
-		    && result_str.f_n_char >= fts_min_token_size
-		    && result_str.f_n_char <= fts_max_token_size) {
+		if (fts_check_token(
+			   &result_str,
+			   cache->stopword_info.cached_stopword,
+			   query->index->is_ngram,
+			   query->fts_index_table.charset)) {
 			/* Add the word to the RB tree so that we can
 			calculate it's frequencey within a document. */
 			fts_query_add_word_freq(query, token);
@@ -2590,6 +2702,37 @@ fts_query_phrase_search(
 			orig_token->f_len = token->f_len;
 		}
 	}
+}
+
+/*****************************************************************//**
+Text/Phrase search.
+@return DB_SUCCESS or error code */
+static MY_ATTRIBUTE((warn_unused_result))
+dberr_t
+fts_query_phrase_search(
+/*====================*/
+	fts_query_t*		query,	/*!< in: query instance */
+	const fts_ast_node_t*	node)	/*!< in: node to search */
+{
+	ib_vector_t*		tokens;
+	ib_vector_t*		orig_tokens;
+	mem_heap_t*		heap = mem_heap_create(sizeof(fts_string_t));
+	ib_alloc_t*		heap_alloc;
+	ulint			num_token;
+
+	heap_alloc = ib_heap_allocator_create(heap);
+
+	tokens = ib_vector_create(heap_alloc, sizeof(fts_string_t), 4);
+	orig_tokens = ib_vector_create(heap_alloc, sizeof(fts_string_t), 4);
+
+	if (query->distance != ULINT_UNDEFINED && query->distance > 0) {
+		query->flags = FTS_PROXIMITY;
+	} else {
+		query->flags = FTS_PHRASE;
+	}
+
+	/* Split the phrase into tokens. */
+	fts_query_phrase_split(query, node, tokens, orig_tokens, heap);
 
 	num_token = ib_vector_size(tokens);
 	if (num_token > MAX_PROXIMITY_ITEM) {
@@ -2787,7 +2930,7 @@ fts_query_execute(
 
 /*****************************************************************//**
 Create a wildcard string. It's the responsibility of the caller to
-free the byte* pointer. It's allocated using ut_malloc().
+free the byte* pointer. It's allocated using ut_malloc_nokey().
 @return ptr to allocated memory */
 static
 byte*
@@ -2808,7 +2951,7 @@ fts_query_get_token(
 
 	if (node->term.wildcard) {
 
-		token->f_str = static_cast<byte*>(ut_malloc(str_len + 2));
+		token->f_str = static_cast<byte*>(ut_malloc_nokey(str_len + 2));
 		token->f_len = str_len + 1;
 
 		memcpy(token->f_str, node->term.ptr->str, str_len);
@@ -2846,8 +2989,7 @@ fts_query_visitor(
 
 	switch (node->type) {
 	case FTS_AST_TEXT:
-		token.f_str = node->text.ptr->str;
-		token.f_len = node->text.ptr->len;
+	case FTS_AST_PARSER_PHRASE_LIST:
 
 		if (query->oper == FTS_EXIST) {
 			ut_ad(query->intersection == NULL);
@@ -2863,7 +3005,7 @@ fts_query_visitor(
 		/* Force collection of doc ids and the positions. */
 		query->collect_positions = TRUE;
 
-		query->error = fts_query_phrase_search(query, &token);
+		query->error = fts_query_phrase_search(query, node);
 
 		query->collect_positions = FALSE;
 
@@ -2879,6 +3021,20 @@ fts_query_visitor(
 		token.f_str = node->term.ptr->str;
 		token.f_len = node->term.ptr->len;
 
+		/* Collect wildcard words for QUERY EXPANSION. */
+		if (node->term.wildcard && query->wildcard_words != NULL) {
+			ib_rbt_bound_t          parent;
+
+			if (rbt_search(query->wildcard_words, &parent, &token)
+			     != 0) {
+				fts_string_t	word;
+
+				fts_string_dup(&word, &token, query->heap);
+				rbt_add_node(query->wildcard_words, &parent,
+					     &word);
+			}
+		}
+
 		/* Add the word to our RB tree that will be used to
 		calculate this terms per document frequency. */
 		fts_query_add_word_freq(query, &token);
@@ -2889,6 +3045,7 @@ fts_query_visitor(
 		if (ptr) {
 			ut_free(ptr);
 		}
+
 		break;
 
 	case FTS_AST_SUBEXP_LIST:
@@ -2910,8 +3067,7 @@ fts_query_visitor(
 Process (nested) sub-expression, create a new result set to store the
 sub-expression result by processing nodes under current sub-expression
 list. Merge the sub-expression result with that of parent expression list.
-@return DB_SUCCESS if all  well */
-UNIV_INTERN
+@return DB_SUCCESS if all go well */
 dberr_t
 fts_ast_visit_sub_exp(
 /*==================*/
@@ -3060,6 +3216,11 @@ fts_query_filter_doc_ids(
 	ulint		decoded = 0;
 	ib_rbt_t*	doc_freqs = word_freq->doc_freqs;
 
+	if (query->limit != ULONG_UNDEFINED
+	    && query->n_docs >= query->limit) {
+		return(DB_SUCCESS);
+	}
+
 	/* Decode the ilist and add the doc ids to the query doc_id set. */
 	while (decoded < len) {
 		ulint		freq = 0;
@@ -3147,11 +3308,17 @@ fts_query_filter_doc_ids(
 			/* Add the word to the document's matched RB tree. */
 			fts_query_add_word_to_document(query, doc_id, word);
 		}
+
+		if (query->limit != ULONG_UNDEFINED
+		    && query->limit <= ++query->n_docs) {
+			goto func_exit;
+		}
 	}
 
 	/* Some sanity checks. */
 	ut_a(doc_id == node->last_doc_id);
 
+func_exit:
 	if (query->total_size > fts_result_cache_limit) {
 		return(DB_FTS_EXCEED_RESULT_CACHE_LIMIT);
 	} else {
@@ -3180,8 +3347,9 @@ fts_query_read_node(
 	byte			buf[FTS_MAX_WORD_LEN + 1];
 	dberr_t			error = DB_SUCCESS;
 
-	ut_a(query->cur_node->type == FTS_AST_TERM ||
-	     query->cur_node->type == FTS_AST_TEXT);
+	ut_a(query->cur_node->type == FTS_AST_TERM
+	     || query->cur_node->type == FTS_AST_TEXT
+	     || query->cur_node->type == FTS_AST_PARSER_PHRASE_LIST);
 
 	memset(&node, 0, sizeof(node));
 	term.f_str = buf;
@@ -3191,6 +3359,7 @@ fts_query_read_node(
 	to assign the frequency on search string behalf. */
 	if (query->cur_node->type == FTS_AST_TERM
 	    && query->cur_node->term.wildcard) {
+
 		term.f_len = query->cur_node->term.ptr->len;
 		ut_ad(FTS_MAX_WORD_LEN >= term.f_len);
 		memcpy(term.f_str, query->cur_node->term.ptr->str, term.f_len);
@@ -3344,11 +3513,11 @@ fts_query_calculate_idf(
 		}
 
 		if (fts_enable_diag_print) {
-			fprintf(stderr,"'%s' -> " UINT64PF "/" UINT64PF
-				" %6.5lf\n",
-			        word_freq->word.f_str,
-			        query->total_docs, word_freq->doc_count,
-			        word_freq->idf);
+			ib::info() << "'" << word_freq->word.f_str << "' -> "
+				<< query->total_docs << "/"
+				<< word_freq->doc_count << " "
+				<< std::setw(6) << std::setprecision(5)
+				<< word_freq->idf;
 		}
 	}
 }
@@ -3477,9 +3646,8 @@ fts_query_prepare_result(
 	DBUG_ENTER("fts_query_prepare_result");
 
 	if (result == NULL) {
-		result = static_cast<fts_result_t*>(ut_malloc(sizeof(*result)));
-
-		memset(result, 0x0, sizeof(*result));
+		result = static_cast<fts_result_t*>(
+			ut_zalloc_nokey(sizeof(*result)));
 
 		result->rankings_by_id = rbt_create(
 			sizeof(fts_ranking_t), fts_ranking_doc_id_cmp);
@@ -3605,8 +3773,8 @@ fts_query_get_result(
 		result = fts_query_prepare_result(query, result);
 	} else {
 		/* Create an empty result instance. */
-		result = static_cast<fts_result_t*>(ut_malloc(sizeof(*result)));
-		memset(result, 0, sizeof(*result));
+		result = static_cast<fts_result_t*>(
+			ut_zalloc_nokey(sizeof(*result)));
 	}
 
 	DBUG_RETURN(result);
@@ -3657,14 +3825,18 @@ fts_query_free(
 		rbt_free(query->word_freqs);
 	}
 
+	if (query->wildcard_words != NULL) {
+		rbt_free(query->wildcard_words);
+	}
+
 	ut_a(!query->intersection);
 
 	if (query->word_map) {
 		rbt_free(query->word_map);
 	}
 
-	if (query->word_vector) {
-		delete query->word_vector;
+	if (query->word_vector != NULL) {
+		UT_DELETE(query->word_vector);
 	}
 
 	if (query->heap) {
@@ -3675,7 +3847,8 @@ fts_query_free(
 }
 
 /*****************************************************************//**
-Parse the query using flex/bison. */
+Parse the query using flex/bison or plugin parser.
+@return parse tree node. */
 static
 fts_ast_node_t*
 fts_query_parse(
@@ -3691,12 +3864,24 @@ fts_query_parse(
 
 	memset(&state, 0x0, sizeof(state));
 
-	/* Setup the scanner to use, this depends on the mode flag. */
-	state.lexer = fts_lexer_create(mode, query_str, query_len);
 	state.charset = query->fts_index_table.charset;
-	error = fts_parse(&state);
-	fts_lexer_free(state.lexer);
-	state.lexer = NULL;
+
+	DBUG_EXECUTE_IF("fts_instrument_query_disable_parser",
+		query->parser = NULL;);
+
+	if (query->parser) {
+		state.root = state.cur_node =
+			fts_ast_create_node_list(&state, NULL);
+		error = fts_parse_by_parser(mode, query_str, query_len,
+					    query->parser, &state);
+	} else {
+		/* Setup the scanner to use, this depends on the mode flag. */
+		state.lexer = fts_lexer_create(mode, query_str, query_len);
+		state.charset = query->fts_index_table.charset;
+		error = fts_parse(&state);
+		fts_lexer_free(state.lexer);
+		state.lexer = NULL;
+	}
 
 	/* Error during parsing ? */
 	if (error) {
@@ -3704,6 +3889,10 @@ fts_query_parse(
 		fts_ast_state_free(&state);
 	} else {
 		query->root = state.root;
+
+		if (fts_enable_diag_print && query->root != NULL) {
+			fts_ast_node_print(query->root);
+		}
 	}
 
 	DBUG_RETURN(state.root);
@@ -3733,108 +3922,29 @@ fts_query_can_optimize(
 	}
 }
 
-/*******************************************************************//**
-Pre-process the query string
-1) make it lower case
-2) in boolean mode, if there is '-' or '+' that is immediately proceeded
-and followed by valid word, make it a space
-@return the processed string */
-static
-byte*
-fts_query_str_preprocess(
-/*=====================*/
-	const byte*	query_str,	/*!< in: FTS query */
-	ulint		query_len,	/*!< in: FTS query string len */
-	ulint		*result_len,	/*!< out: result string length */
-	CHARSET_INFO*	charset,	/*!< in: string charset */
-	bool		boolean_mode)	/*!< in: is boolean mode */
-{
-	ulint	cur_pos = 0;
-	ulint	str_len;
-	byte*	str_ptr;
-	bool	in_phrase = false;
-
-	/* Convert the query string to lower case before parsing. We own
-	the ut_malloc'ed result and so remember to free it before return. */
-
-	str_len = query_len * charset->casedn_multiply + 1;
-	str_ptr = static_cast<byte*>(ut_malloc(str_len));
-
-	*result_len = innobase_fts_casedn_str(
-		charset, const_cast<char*>(reinterpret_cast<const char*>(
-			query_str)), query_len,
-		reinterpret_cast<char*>(str_ptr), str_len);
-
-	ut_ad(*result_len < str_len);
-
-	str_ptr[*result_len] = 0;
-
-	/* If it is boolean mode, no need to check for '-/+' */
-	if (!boolean_mode) {
-		return(str_ptr);
-	}
-
-	/* Otherwise, we travese the string to find any '-/+' that are
-	immediately proceeded and followed by valid search word.
-	NOTE: we should not do so for CJK languages, this should
-	be taken care of in our CJK implementation */
-        while (cur_pos < *result_len) {
-                fts_string_t    str;
-                ulint           offset;
-                ulint           cur_len;
-
-                cur_len = innobase_mysql_fts_get_token(
-                        charset, str_ptr + cur_pos, str_ptr + *result_len,
-			&str, &offset);
-
-		if (cur_len == 0 || str.f_str == NULL) {
-			/* No valid word found */
-			break;
-		}
-
-		/* Check if we are in a phrase, if so, no need to do
-		replacement of '-/+'. */
-		for (byte* ptr = str_ptr + cur_pos; ptr < str.f_str; ptr++) {
-			if ((char) (*ptr) == '"' ) {
-				in_phrase = !in_phrase;
-			}
-		}
-
-		/* Find those are not leading '-/+' and also not in a phrase */
-		if (cur_pos > 0 && str.f_str - str_ptr - cur_pos == 1
-		    && !in_phrase) {
-			char*	last_op = reinterpret_cast<char*>(
-						str_ptr + cur_pos);
-
-			if (*last_op == '-' || *last_op == '+') {
-				*last_op = ' ';
-			}
-		}
-
-                cur_pos += cur_len;
-	}
-
-	return(str_ptr);
-}
-
-/*******************************************************************//**
-FTS Query entry point.
+/** FTS Query entry point.
+@param[in]	trx		transaction
+@param[in]	index		fts index to search
+@param[in]	flags		FTS search mode
+@param[in]	query_str	FTS query
+@param[in]	query_len	FTS query string len in bytes
+@param[in,out]	result		result doc ids
+@param[in]	limit		limit value
 @return DB_SUCCESS if successful otherwise error code */
-UNIV_INTERN
 dberr_t
 fts_query(
-/*======*/
-	trx_t*		trx,		/*!< in: transaction */
-	dict_index_t*	index,		/*!< in: The FTS index to search */
-	uint		flags,		/*!< in: FTS search mode */
-	const byte*	query_str,	/*!< in: FTS query */
-	ulint		query_len,	/*!< in: FTS query string len
-					in bytes */
-	fts_result_t**	result)		/*!< in/out: result doc ids */
+	trx_t*		trx,
+	dict_index_t*	index,
+	uint		flags,
+	const byte*	query_str,
+	ulint		query_len,
+	fts_result_t**	result,
+	ulonglong	limit)
 {
 	fts_query_t	query;
 	dberr_t		error = DB_SUCCESS;
 	byte*		lc_query_str;
+	ulint		lc_query_str_len;
 	ulint		result_len;
 	bool		boolean_mode;
 	trx_t*		query_trx;
@@ -3859,7 +3969,7 @@ fts_query(
 
 	query.fts_common_table.type = FTS_COMMON_TABLE;
 	query.fts_common_table.table_id = index->table->id;
-	query.fts_common_table.parent = index->table->name;
+	query.fts_common_table.parent = index->table->name.m_name;
 	query.fts_common_table.table = index->table;
 
 	charset = fts_index_get_charset(index);
@@ -3867,26 +3977,33 @@ fts_query(
 	query.fts_index_table.type = FTS_INDEX_TABLE;
 	query.fts_index_table.index_id = index->id;
 	query.fts_index_table.table_id = index->table->id;
-	query.fts_index_table.parent = index->table->name;
+	query.fts_index_table.parent = index->table->name.m_name;
 	query.fts_index_table.charset = charset;
 	query.fts_index_table.table = index->table;
 
 	query.word_map = rbt_create_arg_cmp(
-		sizeof(fts_string_t), innobase_fts_text_cmp,
-                (void *) charset);
-	query.word_vector = new word_vector_t;
+		sizeof(fts_string_t), innobase_fts_text_cmp, (void*)charset);
+	query.word_vector = UT_NEW_NOKEY(word_vector_t());
 	query.error = DB_SUCCESS;
 
 	/* Setup the RB tree that will be used to collect per term
 	statistics. */
 	query.word_freqs = rbt_create_arg_cmp(
-		sizeof(fts_word_freq_t), innobase_fts_text_cmp, 
+		sizeof(fts_word_freq_t), innobase_fts_text_cmp,
                 (void*) charset);
 
+	if (flags & FTS_EXPAND) {
+		query.wildcard_words = rbt_create_arg_cmp(
+			sizeof(fts_string_t), innobase_fts_text_cmp, (void *)charset);
+	}
+
 	query.total_size += SIZEOF_RBT_CREATE;
 
 	query.total_docs = dict_table_get_n_rows(index->table);
 
+	query.limit = limit;
+
+	query.n_docs = 0;
 #ifdef FTS_DOC_STATS_DEBUG
 	if (ft_enable_diag_print) {
 		error = fts_get_total_word_count(
@@ -3896,8 +4013,8 @@ fts_query(
 			goto func_exit;
 		}
 
-		fprintf(stderr, "Total docs: " UINT64PF " Total words: %lu\n",
-			query.total_docs, query.total_words);
+		ib::info() << "Total docs: " << query.total_docs
+			<< " Total words: " << query.total_words;
 	}
 #endif /* FTS_DOC_STATS_DEBUG */
 
@@ -3928,12 +4045,11 @@ fts_query(
 	/* Sort the vector so that we can do a binary search over the ids. */
 	ib_vector_sort(query.deleted->doc_ids, fts_update_doc_id_cmp);
 
-#if 0
 	/* Convert the query string to lower case before parsing. We own
 	the ut_malloc'ed result and so remember to free it before return. */
 
 	lc_query_str_len = query_len * charset->casedn_multiply + 1;
-	lc_query_str = static_cast<byte*>(ut_malloc(lc_query_str_len));
+	lc_query_str = static_cast<byte*>(ut_malloc_nokey(lc_query_str_len));
 
 	result_len = innobase_fts_casedn_str(
 		charset, (char*) query_str, query_len,
@@ -3943,16 +4059,12 @@ fts_query(
 
 	lc_query_str[result_len] = 0;
 
-#endif
-
-	lc_query_str = fts_query_str_preprocess(
-		query_str, query_len, &result_len, charset, boolean_mode);
-
 	query.heap = mem_heap_create(128);
 
 	/* Create the rb tree for the doc id (current) set. */
 	query.doc_ids = rbt_create(
 		sizeof(fts_ranking_t), fts_ranking_doc_id_cmp);
+	query.parser = index->parser;
 
 	query.total_size += SIZEOF_RBT_CREATE;
 
@@ -3967,6 +4079,19 @@ fts_query(
 			        fts_result_cache_limit = 2048;
 		);
 
+		/* Optimisation is allowed for limit value
+		when
+		i)  No ranking involved
+		ii) Only FTS Union operations involved. */
+		if (query.limit != ULONG_UNDEFINED
+		    && !fts_ast_node_check_union(ast)) {
+			query.limit = ULONG_UNDEFINED;
+		}
+
+		DBUG_EXECUTE_IF("fts_union_limit_off",
+			query.limit = ULONG_UNDEFINED;
+		);
+
 		/* Traverse the Abstract Syntax Tree (AST) and execute
 		the query. */
 		query.error = fts_ast_visit(
@@ -3995,29 +4120,28 @@ fts_query(
 	} else {
 		/* still return an empty result set */
 		*result = static_cast<fts_result_t*>(
-			ut_malloc(sizeof(**result)));
-		memset(*result, 0, sizeof(**result));
+			ut_zalloc_nokey(sizeof(**result)));
 	}
 
 	ut_free(lc_query_str);
 
 	if (fts_enable_diag_print && (*result)) {
 		ulint	diff_time = ut_time_ms() - start_time_ms;
-		fprintf(stderr, "FTS Search Processing time: %ld secs:"
-				" %ld millisec: row(s) %d \n",
-			diff_time / 1000, diff_time % 1000,
-			(*result)->rankings_by_id
-				? (int) rbt_size((*result)->rankings_by_id)
-				: -1);
+
+		ib::info() << "FTS Search Processing time: "
+			<< diff_time / 1000 << " secs: " << diff_time % 1000
+			<< " millisec: row(s) "
+			<< ((*result)->rankings_by_id
+			    ? rbt_size((*result)->rankings_by_id)
+			    : -1);
 
 		/* Log memory consumption & result size */
-		ib_logf(IB_LOG_LEVEL_INFO,
-			"Full Search Memory: "
-			"%lu (bytes),  Row: %lu .",
-			query.total_size,
-			(*result)->rankings_by_id
-				?  rbt_size((*result)->rankings_by_id)
-				: 0);
+		ib::info() << "Full Search Memory: " << query.total_size
+			<< " (bytes),  Row: "
+			<< ((*result)->rankings_by_id
+			    ? rbt_size((*result)->rankings_by_id)
+			    : 0)
+			<< ".";
 	}
 
 func_exit:
@@ -4030,7 +4154,6 @@ func_exit:
 
 /*****************************************************************//**
 FTS Query free result, returned by fts_query(). */
-
 void
 fts_query_free_result(
 /*==================*/
@@ -4053,7 +4176,6 @@ fts_query_free_result(
 
 /*****************************************************************//**
 FTS Query sort result, returned by fts_query() on fts_ranking_t::rank. */
-
 void
 fts_query_sort_result_on_rank(
 /*==========================*/
@@ -4089,7 +4211,6 @@ fts_query_sort_result_on_rank(
 	result->rankings_by_rank = ranked;
 }
 
-#ifdef UNIV_DEBUG
 /*******************************************************************//**
 A debug function to print result doc_id set. */
 static
@@ -4107,18 +4228,16 @@ fts_print_doc_id(
 		fts_ranking_t*	ranking;
 		ranking = rbt_value(fts_ranking_t, node);
 
-		ib_logf(IB_LOG_LEVEL_INFO, "doc_ids info, doc_id: %ld \n",
-			(ulint) ranking->doc_id);
+		ib::info() << "doc_ids info, doc_id: " << ranking->doc_id;
 
 		ulint		pos = 0;
 		fts_string_t	word;
 
 		while (fts_ranking_words_get_next(query, ranking, &pos, &word)) {
-			ib_logf(IB_LOG_LEVEL_INFO, "doc_ids info, value: %s \n", word.f_str);
+			ib::info() << "doc_ids info, value: " << word.f_str;
 		}
 	}
 }
-#endif
 
 /*************************************************************//**
 This function implements a simple "blind" query expansion search:
@@ -4158,19 +4277,20 @@ fts_expand_query(
 		(void*) index_cache->charset);
 
 	result_doc.charset = index_cache->charset;
+	result_doc.parser = index_cache->index->parser;
+	result_doc.is_ngram = index_cache->index->is_ngram;
 
 	query->total_size += SIZEOF_RBT_CREATE;
-#ifdef UNIV_DEBUG
-	fts_print_doc_id(query);
-#endif
+
+	if (fts_enable_diag_print) {
+		fts_print_doc_id(query);
+	}
 
 	for (node = rbt_first(query->doc_ids);
 	     node;
 	     node = rbt_next(query->doc_ids, node)) {
 
 		fts_ranking_t*	ranking;
-		ulint		pos;
-		fts_string_t	word;
 		ulint		prev_token_size;
 		ulint		estimate_size;
 
@@ -4189,24 +4309,6 @@ fts_expand_query(
 					fts_query_expansion_fetch_doc,
 					&result_doc);
 
-		/* Remove words that have already been searched in the
-		first pass */
-		pos = 0;
-		while (fts_ranking_words_get_next(query, ranking, &pos,
-		       &word)) {
-			ibool		ret;
-
-			ret = rbt_delete(result_doc.tokens, &word);
-
-			/* The word must exist in the doc we found */
-			if (!ret) {
-				ib_logf(IB_LOG_LEVEL_ERROR, "Did not "
-					"find word %s in doc %ld for query "
-					"expansion search.\n", word.f_str,
-					(ulint) ranking->doc_id);
-			}
-		}
-
 		/* Estimate memory used, see fts_process_token and fts_token_t.
 		   We ignore token size here. */
 		estimate_size = (rbt_size(result_doc.tokens) - prev_token_size)
@@ -4220,6 +4322,30 @@ fts_expand_query(
 		}
 	}
 
+	/* Remove words that have already been searched in the first pass */
+	for (ulint i = 0; i < query->word_vector->size(); i++) {
+		fts_string_t	word = query->word_vector->at(i);
+		ib_rbt_bound_t	parent;
+
+		if (query->wildcard_words
+		    && rbt_search(query->wildcard_words, &parent, &word) == 0) {
+			/* If it's a wildcard word, remove words having
+			it as prefix. */
+			while (rbt_search_cmp(result_doc.tokens,
+					      &parent, &word, NULL,
+					      innobase_fts_text_cmp_prefix)
+			       == 0) {
+				ut_free(rbt_remove_node(result_doc.tokens,
+							parent.last));
+			}
+		} else {
+			/* We don't check return value, because the word may
+			have been deleted by a previous wildcard word as its
+			prefix, e.g. ('g * good'). */
+			rbt_delete(result_doc.tokens, &word);
+		}
+	}
+
 	/* Search the table the second time with expanded search list */
 	for (token_node = rbt_first(result_doc.tokens);
 	     token_node;
@@ -4227,6 +4353,12 @@ fts_expand_query(
 		fts_token_t*	mytoken;
 		mytoken = rbt_value(fts_token_t, token_node);
 
+		/* '%' in the end is treated as prefix search,
+		it can cause assert failure, so we skip it. */
+		if (mytoken->text.f_str[mytoken->text.f_len - 1] == '%') {
+			continue;
+		}
+
 		ut_ad(mytoken->text.f_str[mytoken->text.f_len] == 0);
 		fts_query_add_word_freq(query, &mytoken->text);
 		error = fts_query_union(query, &mytoken->text);
diff --git a/storage/innobase/fts/fts0sql.cc b/storage/innobase/fts/fts0sql.cc
index cb8eff3cacc..4900ff3be1a 100644
--- a/storage/innobase/fts/fts0sql.cc
+++ b/storage/innobase/fts/fts0sql.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2007, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -30,13 +30,12 @@ Created 2007-03-27 Sunny Bains
 #include "fts0types.h"
 #include "fts0priv.h"
 
-#ifndef UNIV_NONINL
+#ifdef UNIV_NONINL
 #include "fts0types.ic"
 #include "fts0vlc.ic"
 #endif
 
-/** SQL statements for creating the ancillary FTS tables. %s must be replaced
-with the indexed table's id. */
+/** SQL statements for creating the ancillary FTS tables. */
 
 /** Preamble to all SQL statements. */
 static const char* fts_sql_begin=
@@ -50,7 +49,6 @@ static const char* fts_sql_end=
 /******************************************************************//**
 Get the table id.
 @return number of bytes written */
-UNIV_INTERN
 int
 fts_get_table_id(
 /*=============*/
@@ -97,8 +95,7 @@ fts_get_table_id(
 
 /******************************************************************//**
 Construct the prefix name of an FTS table.
-@return own: table name, must be freed with mem_free() */
-UNIV_INTERN
+@return own: table name, must be freed with ut_free() */
 char*
 fts_get_table_name_prefix(
 /*======================*/
@@ -124,7 +121,7 @@ fts_get_table_name_prefix(
 
 	prefix_name_len = dbname_len + 4 + len + 1;
 
-	prefix_name = static_cast<char*>(mem_alloc(prefix_name_len));
+	prefix_name = static_cast<char*>(ut_malloc_nokey(prefix_name_len));
 
 	len = sprintf(prefix_name, "%.*sFTS_%s",
 		      dbname_len, fts_table->parent, table_id);
@@ -136,41 +133,34 @@ fts_get_table_name_prefix(
 }
 
 /******************************************************************//**
-Construct the name of an ancillary FTS table.
-@return own: table name, must be freed with mem_free() */
-UNIV_INTERN
-char*
+Construct the name of an ancillary FTS table for the given table.
+Caller must allocate enough memory(usually size of MAX_FULL_NAME_LEN)
+for param 'table_name'. */
+void
 fts_get_table_name(
 /*===============*/
-	const fts_table_t*	fts_table)
+	const fts_table_t*	fts_table,
 					/*!< in: Auxiliary table type */
+	char*			table_name)
+					/*!< in/out: aux table name */
 {
 	int		len;
-	char*		name;
-	int		name_len;
 	char*		prefix_name;
 
 	prefix_name = fts_get_table_name_prefix(fts_table);
 
-	name_len = static_cast<int>(
-		strlen(prefix_name) + 1 + strlen(fts_table->suffix) + 1);
-
-	name = static_cast<char*>(mem_alloc(name_len));
-
-	len = sprintf(name, "%s_%s", prefix_name, fts_table->suffix);
+	len = sprintf(table_name, "%s_%s", prefix_name, fts_table->suffix);
 
 	ut_a(len > 0);
-	ut_a(len == name_len - 1);
+	ut_a(strlen(prefix_name) + 1 + strlen(fts_table->suffix)
+	     == static_cast<uint>(len));
 
-	mem_free(prefix_name);
-
-	return(name);
+	ut_free(prefix_name);
 }
 
 /******************************************************************//**
-Parse an SQL string. %s is replaced with the table's id.
+Parse an SQL string.
 @return query graph */
-UNIV_INTERN
 que_t*
 fts_parse_sql(
 /*==========*/
@@ -180,31 +170,16 @@ fts_parse_sql(
 {
 	char*		str;
 	que_t*		graph;
-	char*		str_tmp;
 	ibool		dict_locked;
 
-	if (fts_table != NULL) {
-		char*	table_name;
-
-		table_name = fts_get_table_name(fts_table);
-		str_tmp = ut_strreplace(sql, "%s", table_name);
-		mem_free(table_name);
-	} else {
-		ulint	sql_len = strlen(sql) + 1;
-
-		str_tmp = static_cast<char*>(mem_alloc(sql_len));
-		strcpy(str_tmp, sql);
-	}
-
-	str = ut_str3cat(fts_sql_begin, str_tmp, fts_sql_end);
-	mem_free(str_tmp);
+	str = ut_str3cat(fts_sql_begin, sql, fts_sql_end);
 
 	dict_locked = (fts_table && fts_table->table->fts
 		       && (fts_table->table->fts->fts_status
 			   & TABLE_DICT_LOCKED));
 
 	if (!dict_locked) {
-		ut_ad(!mutex_own(&(dict_sys->mutex)));
+		ut_ad(!mutex_own(&dict_sys->mutex));
 
 		/* The InnoDB SQL parser is not re-entrant. */
 		mutex_enter(&dict_sys->mutex);
@@ -217,15 +192,14 @@ fts_parse_sql(
 		mutex_exit(&dict_sys->mutex);
 	}
 
-	mem_free(str);
+	ut_free(str);
 
 	return(graph);
 }
 
 /******************************************************************//**
-Parse an SQL string. %s is replaced with the table's id.
+Parse an SQL string.
 @return query graph */
-UNIV_INTERN
 que_t*
 fts_parse_sql_no_dict_lock(
 /*=======================*/
@@ -235,33 +209,19 @@ fts_parse_sql_no_dict_lock(
 {
 	char*		str;
 	que_t*		graph;
-	char*		str_tmp = NULL;
 
 #ifdef UNIV_DEBUG
 	ut_ad(mutex_own(&dict_sys->mutex));
 #endif
 
-	if (fts_table != NULL) {
-		char*		table_name;
-
-		table_name = fts_get_table_name(fts_table);
-		str_tmp = ut_strreplace(sql, "%s", table_name);
-		mem_free(table_name);
-	}
-
-	if (str_tmp != NULL) {
-		str = ut_str3cat(fts_sql_begin, str_tmp, fts_sql_end);
-		mem_free(str_tmp);
-	} else {
-		str = ut_str3cat(fts_sql_begin, sql, fts_sql_end);
-	}
+	str = ut_str3cat(fts_sql_begin, sql, fts_sql_end);
 
 	//fprintf(stderr, "%s\n", str);
 
 	graph = pars_sql(info, str);
 	ut_a(graph);
 
-	mem_free(str);
+	ut_free(str);
 
 	return(graph);
 }
@@ -269,7 +229,6 @@ fts_parse_sql_no_dict_lock(
 /******************************************************************//**
 Evaluate an SQL query graph.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 fts_eval_sql(
 /*=========*/
@@ -303,7 +262,6 @@ Two indexed columns named "subject" and "content":
  "$sel0, $sel1",
  info/ids: sel0 -> "subject", sel1 -> "content",
 @return heap-allocated WHERE string */
-UNIV_INTERN
 const char*
 fts_get_select_columns_str(
 /*=======================*/
@@ -334,7 +292,6 @@ fts_get_select_columns_str(
 /******************************************************************//**
 Commit a transaction.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 fts_sql_commit(
 /*===========*/
@@ -353,7 +310,6 @@ fts_sql_commit(
 /******************************************************************//**
 Rollback a transaction.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 fts_sql_rollback(
 /*=============*/
diff --git a/storage/innobase/fts/fts0tlex.cc b/storage/innobase/fts/fts0tlex.cc
index d4d9b4c48d1..4cb09784e03 100644
--- a/storage/innobase/fts/fts0tlex.cc
+++ b/storage/innobase/fts/fts0tlex.cc
@@ -184,15 +184,15 @@ typedef struct yy_buffer_state *YY_BUFFER_STATE;
 #define EOB_ACT_END_OF_FILE 1
 #define EOB_ACT_LAST_MATCH 2
 
-    #define YY_LESS_LINENO(n)
+#define YY_LESS_LINENO(n)
 
 /* Return all but the first "n" matched characters back to the input stream. */
 #define yyless(n) \
 	do \
 		{ \
 		/* Undo effects of setting up yytext. */ \
-        int yyless_macro_arg = (n); \
-        YY_LESS_LINENO(yyless_macro_arg);\
+		int yyless_macro_arg = (n); \
+		YY_LESS_LINENO(yyless_macro_arg);\
 		*yy_cp = yyg->yy_hold_char; \
 		YY_RESTORE_YY_MORE_OFFSET \
 		yyg->yy_c_buf_p = yy_cp = yy_bp + yyless_macro_arg - YY_MORE_ADJ; \
@@ -245,8 +245,8 @@ struct yy_buffer_state
 	 */
 	int yy_at_bol;
 
-    int yy_bs_lineno; /**< The line count. */
-    int yy_bs_column; /**< The column count. */
+	int yy_bs_lineno; /**< The line count. */
+	int yy_bs_column; /**< The column count. */
 
 	/* Whether to try to fill the input buffer when we reach the
 	 * end of it.
@@ -314,9 +314,9 @@ void fts0tfree (void * ,           yyscan_t yyscanner MY_ATTRIBUTE((unused)) MY_
 #define yy_set_interactive(is_interactive) \
 	{ \
 	if ( ! YY_CURRENT_BUFFER ){ \
-        fts0tensure_buffer_stack (yyscanner); \
+		fts0tensure_buffer_stack (yyscanner); \
 		YY_CURRENT_BUFFER_LVALUE =    \
-            fts0t_create_buffer(yyin,YY_BUF_SIZE ,yyscanner); \
+		fts0t_create_buffer(yyin,YY_BUF_SIZE ,yyscanner); \
 	} \
 	YY_CURRENT_BUFFER_LVALUE->yy_is_interactive = is_interactive; \
 	}
@@ -324,9 +324,9 @@ void fts0tfree (void * ,           yyscan_t yyscanner MY_ATTRIBUTE((unused)) MY_
 #define yy_set_bol(at_bol) \
 	{ \
 	if ( ! YY_CURRENT_BUFFER ){\
-        fts0tensure_buffer_stack (yyscanner); \
+		fts0tensure_buffer_stack (yyscanner); \
 		YY_CURRENT_BUFFER_LVALUE =    \
-            fts0t_create_buffer(yyin,YY_BUF_SIZE ,yyscanner); \
+		fts0t_create_buffer(yyin,YY_BUF_SIZE ,yyscanner); \
 	} \
 	YY_CURRENT_BUFFER_LVALUE->yy_at_bol = at_bol; \
 	}
@@ -475,9 +475,10 @@ this program; if not, write to the Free Software Foundation, Inc.,
 
 /* Required for reentrant parser */
 #define YY_DECL int fts_tlexer(YYSTYPE* val, yyscan_t yyscanner)
+#define exit(A)   ut_error
 
 #define YY_NO_INPUT 1
-#line 480 "fts0tlex.cc"
+#line 481 "fts0tlex.cc"
 
 #define INITIAL 0
 
@@ -495,37 +496,37 @@ this program; if not, write to the Free Software Foundation, Inc.,
 
 /* Holds the entire state of the reentrant scanner. */
 struct yyguts_t
-    {
+{
 
-    /* User-defined. Not touched by flex. */
-    YY_EXTRA_TYPE yyextra_r;
+	/* User-defined. Not touched by flex. */
+	YY_EXTRA_TYPE yyextra_r;
 
-    /* The rest are the same as the globals declared in the non-reentrant scanner. */
-    FILE *yyin_r, *yyout_r;
-    size_t yy_buffer_stack_top; /**< index of top of stack. */
-    size_t yy_buffer_stack_max; /**< capacity of stack. */
-    YY_BUFFER_STATE * yy_buffer_stack; /**< Stack as an array. */
-    char yy_hold_char;
-    int yy_n_chars;
-    int yyleng_r;
-    char *yy_c_buf_p;
-    int yy_init;
-    int yy_start;
-    int yy_did_buffer_switch_on_eof;
-    int yy_start_stack_ptr;
-    int yy_start_stack_depth;
-    int *yy_start_stack;
-    yy_state_type yy_last_accepting_state;
-    char* yy_last_accepting_cpos;
+	/* The rest are the same as the globals declared in the non-reentrant scanner. */
+	FILE *yyin_r, *yyout_r;
+	size_t yy_buffer_stack_top; /**< index of top of stack. */
+	size_t yy_buffer_stack_max; /**< capacity of stack. */
+	YY_BUFFER_STATE * yy_buffer_stack; /**< Stack as an array. */
+	char yy_hold_char;
+	int yy_n_chars;
+	int yyleng_r;
+	char *yy_c_buf_p;
+	int yy_init;
+	int yy_start;
+	int yy_did_buffer_switch_on_eof;
+	int yy_start_stack_ptr;
+	int yy_start_stack_depth;
+	int *yy_start_stack;
+	yy_state_type yy_last_accepting_state;
+	char* yy_last_accepting_cpos;
 
-    int yylineno_r;
-    int yy_flex_debug_r;
+	int yylineno_r;
+	int yy_flex_debug_r;
 
-    char *yytext_r;
-    int yy_more_flag;
-    int yy_more_len;
+	char *yytext_r;
+	int yy_more_flag;
+	int yy_more_len;
 
-    }; /* end struct yyguts_t */
+}; /* end struct yyguts_t */
 
 static int yy_init_globals (yyscan_t yyscanner );
 
@@ -700,12 +701,12 @@ YY_DECL
 	register yy_state_type yy_current_state;
 	register char *yy_cp, *yy_bp;
 	register int yy_act;
-    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+	struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
 
-#line 44 "fts0tlex.l"
+#line 45 "fts0tlex.l"
 
 
-#line 707 "fts0tlex.cc"
+#line 708 "fts0tlex.cc"
 
 	if ( !yyg->yy_init )
 		{
@@ -786,12 +787,12 @@ do_action:	/* This label is used only to access EOF actions. */
 
 case 1:
 YY_RULE_SETUP
-#line 46 "fts0tlex.l"
+#line 47 "fts0tlex.l"
 /* Ignore whitespace */ ;
 	YY_BREAK
 case 2:
 YY_RULE_SETUP
-#line 48 "fts0tlex.l"
+#line 49 "fts0tlex.l"
 {
 	val->oper = fts0tget_text(yyscanner)[0];
 
@@ -800,7 +801,7 @@ YY_RULE_SETUP
 	YY_BREAK
 case 3:
 YY_RULE_SETUP
-#line 54 "fts0tlex.l"
+#line 55 "fts0tlex.l"
 {
 	val->token = fts_ast_string_create(reinterpret_cast<const byte*>(fts0tget_text(yyscanner)), fts0tget_leng(yyscanner));
 
@@ -809,7 +810,7 @@ YY_RULE_SETUP
 	YY_BREAK
 case 4:
 YY_RULE_SETUP
-#line 60 "fts0tlex.l"
+#line 61 "fts0tlex.l"
 {
 	val->token = fts_ast_string_create(reinterpret_cast<const byte*>(fts0tget_text(yyscanner)), fts0tget_leng(yyscanner));
 
@@ -818,21 +819,21 @@ YY_RULE_SETUP
 	YY_BREAK
 case 5:
 YY_RULE_SETUP
-#line 65 "fts0tlex.l"
+#line 66 "fts0tlex.l"
 ;
 	YY_BREAK
 case 6:
 /* rule 6 can match eol */
 YY_RULE_SETUP
-#line 66 "fts0tlex.l"
+#line 67 "fts0tlex.l"
 
 	YY_BREAK
 case 7:
 YY_RULE_SETUP
-#line 68 "fts0tlex.l"
+#line 69 "fts0tlex.l"
 ECHO;
 	YY_BREAK
-#line 834 "fts0tlex.cc"
+#line 835 "fts0tlex.cc"
 case YY_STATE_EOF(INITIAL):
 	yyterminate();
 
@@ -975,7 +976,7 @@ case YY_STATE_EOF(INITIAL):
  */
 static int yy_get_next_buffer (yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+	struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
 	register char *dest = YY_CURRENT_BUFFER_LVALUE->yy_ch_buf;
 	register char *source = yyg->yytext_ptr;
 	register int number_to_move, i;
@@ -1108,11 +1109,11 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
 
 /* yy_get_previous_state - get the state just before the EOB char was reached */
 
-    static yy_state_type yy_get_previous_state (yyscan_t yyscanner)
+static yy_state_type yy_get_previous_state (yyscan_t yyscanner)
 {
 	register yy_state_type yy_current_state;
 	register char *yy_cp;
-    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+	struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
 
 	yy_current_state = yyg->yy_start;
 
@@ -1141,10 +1142,10 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
  * synopsis
  *	next_state = yy_try_NUL_trans( current_state );
  */
-    static yy_state_type yy_try_NUL_trans  (yy_state_type yy_current_state , yyscan_t yyscanner)
+static yy_state_type yy_try_NUL_trans  (yy_state_type yy_current_state , yyscan_t yyscanner)
 {
 	register int yy_is_jam;
-    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; /* This var may be unused depending upon options. */
+	struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; /* This var may be unused depending upon options. */
 	register char *yy_cp = yyg->yy_c_buf_p;
 
 	register YY_CHAR yy_c = 1;
@@ -1167,14 +1168,14 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
 
 #ifndef YY_NO_INPUT
 #ifdef __cplusplus
-    static int yyinput (yyscan_t yyscanner)
+	static int yyinput (yyscan_t yyscanner)
 #else
-    static int input  (yyscan_t yyscanner)
+	static int input  (yyscan_t yyscanner)
 #endif
 
 {
 	int c;
-    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+	struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
 
 	*yyg->yy_c_buf_p = yyg->yy_hold_char;
 
@@ -1245,14 +1246,14 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
  * @param yyscanner The scanner object.
  * @note This function does not reset the start condition to @c INITIAL .
  */
-    void fts0trestart  (FILE * input_file , yyscan_t yyscanner)
+void fts0trestart  (FILE * input_file , yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+	struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
 
 	if ( ! YY_CURRENT_BUFFER ){
-        fts0tensure_buffer_stack (yyscanner);
+		fts0tensure_buffer_stack (yyscanner);
 		YY_CURRENT_BUFFER_LVALUE =
-            fts0t_create_buffer(yyin,YY_BUF_SIZE ,yyscanner);
+			fts0t_create_buffer(yyin,YY_BUF_SIZE ,yyscanner);
 	}
 
 	fts0t_init_buffer(YY_CURRENT_BUFFER,input_file ,yyscanner);
@@ -1263,15 +1264,15 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
  * @param new_buffer The new input buffer.
  * @param yyscanner The scanner object.
  */
-    void fts0t_switch_to_buffer  (YY_BUFFER_STATE  new_buffer , yyscan_t yyscanner)
+void fts0t_switch_to_buffer  (YY_BUFFER_STATE  new_buffer , yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+	struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
 
 	/* TODO. We should be able to replace this entire function body
 	 * with
 	 *		fts0tpop_buffer_state();
 	 *		fts0tpush_buffer_state(new_buffer);
-     */
+	 */
 	fts0tensure_buffer_stack (yyscanner);
 	if ( YY_CURRENT_BUFFER == new_buffer )
 		return;
@@ -1297,7 +1298,7 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
 
 static void fts0t_load_buffer_state  (yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+	struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
 	yyg->yy_n_chars = YY_CURRENT_BUFFER_LVALUE->yy_n_chars;
 	yyg->yytext_ptr = yyg->yy_c_buf_p = YY_CURRENT_BUFFER_LVALUE->yy_buf_pos;
 	yyin = YY_CURRENT_BUFFER_LVALUE->yy_input_file;
@@ -1310,7 +1311,7 @@ static void fts0t_load_buffer_state  (yyscan_t yyscanner)
  * @param yyscanner The scanner object.
  * @return the allocated buffer state.
  */
-    YY_BUFFER_STATE fts0t_create_buffer  (FILE * file, int  size , yyscan_t yyscanner)
+YY_BUFFER_STATE fts0t_create_buffer  (FILE * file, int  size , yyscan_t yyscanner)
 {
 	YY_BUFFER_STATE b;
 
@@ -1338,9 +1339,9 @@ static void fts0t_load_buffer_state  (yyscan_t yyscanner)
  * @param b a buffer created with fts0t_create_buffer()
  * @param yyscanner The scanner object.
  */
-    void fts0t_delete_buffer (YY_BUFFER_STATE  b , yyscan_t yyscanner)
+void fts0t_delete_buffer (YY_BUFFER_STATE  b , yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+	struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
 
 	if ( ! b )
 		return;
@@ -1358,27 +1359,27 @@ static void fts0t_load_buffer_state  (yyscan_t yyscanner)
  * This function is sometimes called more than once on the same buffer,
  * such as during a fts0trestart() or at EOF.
  */
-    static void fts0t_init_buffer  (YY_BUFFER_STATE  b, FILE * file , yyscan_t yyscanner)
+static void fts0t_init_buffer  (YY_BUFFER_STATE  b, FILE * file , yyscan_t yyscanner)
 
 {
 	int oerrno = errno;
-    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+	struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
 
 	fts0t_flush_buffer(b ,yyscanner);
 
 	b->yy_input_file = file;
 	b->yy_fill_buffer = 1;
 
-    /* If b is the current buffer, then fts0t_init_buffer was _probably_
-     * called from fts0trestart() or through yy_get_next_buffer.
-     * In that case, we don't want to reset the lineno or column.
-     */
-    if (b != YY_CURRENT_BUFFER){
-        b->yy_bs_lineno = 1;
-        b->yy_bs_column = 0;
-    }
+	/* If b is the current buffer, then fts0t_init_buffer was _probably_
+	 * called from fts0trestart() or through yy_get_next_buffer.
+	 * In that case, we don't want to reset the lineno or column.
+	 */
+	if (b != YY_CURRENT_BUFFER){
+		b->yy_bs_lineno = 1;
+		b->yy_bs_column = 0;
+	}
 
-        b->yy_is_interactive = 0;
+	b->yy_is_interactive = 0;
 
 	errno = oerrno;
 }
@@ -1387,9 +1388,9 @@ static void fts0t_load_buffer_state  (yyscan_t yyscanner)
  * @param b the buffer state to be flushed, usually @c YY_CURRENT_BUFFER.
  * @param yyscanner The scanner object.
  */
-    void fts0t_flush_buffer (YY_BUFFER_STATE  b , yyscan_t yyscanner)
+void fts0t_flush_buffer (YY_BUFFER_STATE  b , yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+	struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
 	if ( ! b )
 		return;
 
@@ -1419,7 +1420,7 @@ static void fts0t_load_buffer_state  (yyscan_t yyscanner)
  */
 void fts0tpush_buffer_state (YY_BUFFER_STATE new_buffer , yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+	struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
 	if (new_buffer == NULL)
 		return;
 
@@ -1450,7 +1451,7 @@ void fts0tpush_buffer_state (YY_BUFFER_STATE new_buffer , yyscan_t yyscanner)
  */
 void fts0tpop_buffer_state (yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+	struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
 	if (!YY_CURRENT_BUFFER)
 		return;
 
@@ -1471,14 +1472,14 @@ void fts0tpop_buffer_state (yyscan_t yyscanner)
 static void fts0tensure_buffer_stack (yyscan_t yyscanner)
 {
 	int num_to_alloc;
-    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+	struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
 
 	if (!yyg->yy_buffer_stack) {
 
 		/* First allocation is just for 2 elements, since we don't know if this
 		 * scanner will even need a stack. We use 2 instead of 1 to avoid an
 		 * immediate realloc on the next call.
-         */
+		 */
 		num_to_alloc = 1;
 		yyg->yy_buffer_stack = (struct yy_buffer_state**)fts0talloc
 								(num_to_alloc * sizeof(struct yy_buffer_state*)
@@ -1604,7 +1605,7 @@ YY_BUFFER_STATE fts0t_scan_bytes  (yyconst char * yybytes, int  _yybytes_len , y
 
 static void yy_fatal_error (yyconst char* msg ,            yyscan_t yyscanner MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)))
 {
-    	(void) fprintf( stderr, "%s\n", msg );
+	(void) fprintf( stderr, "%s\n", msg );
 	exit( YY_EXIT_FAILURE );
 }
 
@@ -1615,8 +1616,8 @@ static void yy_fatal_error (yyconst char* msg ,            yyscan_t yyscanner MY
 	do \
 		{ \
 		/* Undo effects of setting up yytext. */ \
-        int yyless_macro_arg = (n); \
-        YY_LESS_LINENO(yyless_macro_arg);\
+	int yyless_macro_arg = (n); \
+	YY_LESS_LINENO(yyless_macro_arg);\
 		yytext[yyleng] = yyg->yy_hold_char; \
 		yyg->yy_c_buf_p = yytext + yyless_macro_arg; \
 		yyg->yy_hold_char = *yyg->yy_c_buf_p; \
@@ -1632,8 +1633,8 @@ static void yy_fatal_error (yyconst char* msg ,            yyscan_t yyscanner MY
  */
 YY_EXTRA_TYPE fts0tget_extra  (yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
-    return yyextra;
+	struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+	return yyextra;
 }
 
 /** Get the current line number.
@@ -1641,12 +1642,12 @@ YY_EXTRA_TYPE fts0tget_extra  (yyscan_t yyscanner)
  */
 int fts0tget_lineno  (yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+	struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
 
-        if (! YY_CURRENT_BUFFER)
-            return 0;
+	if (! YY_CURRENT_BUFFER)
+		return 0;
 
-    return yylineno;
+	return yylineno;
 }
 
 /** Get the current column number.
@@ -1654,12 +1655,12 @@ int fts0tget_lineno  (yyscan_t yyscanner)
  */
 int fts0tget_column  (yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+	struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
 
-        if (! YY_CURRENT_BUFFER)
-            return 0;
+	if (! YY_CURRENT_BUFFER)
+		return 0;
 
-    return yycolumn;
+	return yycolumn;
 }
 
 /** Get the input stream.
@@ -1667,8 +1668,8 @@ int fts0tget_column  (yyscan_t yyscanner)
  */
 FILE *fts0tget_in  (yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
-    return yyin;
+	struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+	return yyin;
 }
 
 /** Get the output stream.
@@ -1676,8 +1677,8 @@ FILE *fts0tget_in  (yyscan_t yyscanner)
  */
 FILE *fts0tget_out  (yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
-    return yyout;
+	struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+	return yyout;
 }
 
 /** Get the length of the current token.
@@ -1685,8 +1686,8 @@ FILE *fts0tget_out  (yyscan_t yyscanner)
  */
 int fts0tget_leng  (yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
-    return yyleng;
+	struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+	return yyleng;
 }
 
 /** Get the current token.
@@ -1695,8 +1696,8 @@ int fts0tget_leng  (yyscan_t yyscanner)
 
 char *fts0tget_text  (yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
-    return yytext;
+	struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+	return yytext;
 }
 
 /** Set the user-defined data. This data is never touched by the scanner.
@@ -1705,8 +1706,8 @@ char *fts0tget_text  (yyscan_t yyscanner)
  */
 void fts0tset_extra (YY_EXTRA_TYPE  user_defined , yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
-    yyextra = user_defined ;
+	struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+	yyextra = user_defined ;
 }
 
 /** Set the current line number.
@@ -1715,13 +1716,13 @@ void fts0tset_extra (YY_EXTRA_TYPE  user_defined , yyscan_t yyscanner)
  */
 void fts0tset_lineno (int  line_number , yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+	struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
 
-        /* lineno is only valid if an input buffer exists. */
-        if (! YY_CURRENT_BUFFER )
-           yy_fatal_error( "fts0tset_lineno called with no buffer" , yyscanner);
+	/* lineno is only valid if an input buffer exists. */
+	if (! YY_CURRENT_BUFFER )
+		yy_fatal_error( "fts0tset_lineno called with no buffer" , yyscanner);
 
-    yylineno = line_number;
+	yylineno = line_number;
 }
 
 /** Set the current column.
@@ -1730,13 +1731,13 @@ void fts0tset_lineno (int  line_number , yyscan_t yyscanner)
  */
 void fts0tset_column (int  column_no , yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+	struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
 
-        /* column is only valid if an input buffer exists. */
-        if (! YY_CURRENT_BUFFER )
-           yy_fatal_error( "fts0tset_column called with no buffer" , yyscanner);
+	/* column is only valid if an input buffer exists. */
+	if (! YY_CURRENT_BUFFER )
+		yy_fatal_error( "fts0tset_column called with no buffer" , yyscanner);
 
-    yycolumn = column_no;
+	yycolumn = column_no;
 }
 
 /** Set the input stream. This does not discard the current
@@ -1747,26 +1748,26 @@ void fts0tset_column (int  column_no , yyscan_t yyscanner)
  */
 void fts0tset_in (FILE *  in_str , yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
-    yyin = in_str ;
+	struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+	yyin = in_str ;
 }
 
 void fts0tset_out (FILE *  out_str , yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
-    yyout = out_str ;
+	struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+	yyout = out_str ;
 }
 
 int fts0tget_debug  (yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
-    return yy_flex_debug;
+	struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+	return yy_flex_debug;
 }
 
 void fts0tset_debug (int  bdebug , yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
-    yy_flex_debug = bdebug ;
+	struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+	yy_flex_debug = bdebug ;
 }
 
 /* Accessor methods for yylval and yylloc */
@@ -1781,22 +1782,22 @@ void fts0tset_debug (int  bdebug , yyscan_t yyscanner)
 int fts0tlex_init(yyscan_t* ptr_yy_globals)
 
 {
-    if (ptr_yy_globals == NULL){
-        errno = EINVAL;
-        return 1;
-    }
+	if (ptr_yy_globals == NULL){
+		errno = EINVAL;
+		return 1;
+	}
 
-    *ptr_yy_globals = (yyscan_t) fts0talloc ( sizeof( struct yyguts_t ), NULL );
+	*ptr_yy_globals = (yyscan_t) fts0talloc ( sizeof( struct yyguts_t ), NULL );
 
-    if (*ptr_yy_globals == NULL){
-        errno = ENOMEM;
-        return 1;
-    }
+	if (*ptr_yy_globals == NULL){
+		errno = ENOMEM;
+		return 1;
+	}
 
-    /* By setting to 0xAA, we expose bugs in yy_init_globals. Leave at 0x00 for releases. */
-    memset(*ptr_yy_globals,0x00,sizeof(struct yyguts_t));
+	/* By setting to 0xAA, we expose bugs in yy_init_globals. Leave at 0x00 for releases. */
+	memset(*ptr_yy_globals,0x00,sizeof(struct yyguts_t));
 
-    return yy_init_globals ( *ptr_yy_globals );
+	return yy_init_globals ( *ptr_yy_globals );
 }
 
 /* fts0tlex_init_extra has the same functionality as fts0tlex_init, but follows the
@@ -1810,70 +1811,70 @@ int fts0tlex_init(yyscan_t* ptr_yy_globals)
 int fts0tlex_init_extra(YY_EXTRA_TYPE yy_user_defined,yyscan_t* ptr_yy_globals )
 
 {
-    struct yyguts_t dummy_yyguts;
+	struct yyguts_t dummy_yyguts;
 
-    fts0tset_extra (yy_user_defined, &dummy_yyguts);
+	fts0tset_extra (yy_user_defined, &dummy_yyguts);
 
-    if (ptr_yy_globals == NULL){
-        errno = EINVAL;
-        return 1;
-    }
+	if (ptr_yy_globals == NULL){
+		errno = EINVAL;
+		return 1;
+	}
 
-    *ptr_yy_globals = (yyscan_t) fts0talloc ( sizeof( struct yyguts_t ), &dummy_yyguts );
+	*ptr_yy_globals = (yyscan_t) fts0talloc ( sizeof( struct yyguts_t ), &dummy_yyguts );
 
-    if (*ptr_yy_globals == NULL){
-        errno = ENOMEM;
-        return 1;
-    }
+	if (*ptr_yy_globals == NULL){
+		errno = ENOMEM;
+		return 1;
+	}
 
-    /* By setting to 0xAA, we expose bugs in
-    yy_init_globals. Leave at 0x00 for releases. */
-    memset(*ptr_yy_globals,0x00,sizeof(struct yyguts_t));
+	/* By setting to 0xAA, we expose bugs in
+	yy_init_globals. Leave at 0x00 for releases. */
+	memset(*ptr_yy_globals,0x00,sizeof(struct yyguts_t));
 
-    fts0tset_extra (yy_user_defined, *ptr_yy_globals);
+	fts0tset_extra (yy_user_defined, *ptr_yy_globals);
 
-    return yy_init_globals ( *ptr_yy_globals );
+	return yy_init_globals ( *ptr_yy_globals );
 }
 
 static int yy_init_globals (yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
-    /* Initialization is the same as for the non-reentrant scanner.
-     * This function is called from fts0tlex_destroy(), so don't allocate here.
-     */
+	struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+	/* Initialization is the same as for the non-reentrant scanner.
+	 * This function is called from fts0tlex_destroy(), so don't allocate here.
+	 */
 
-    yyg->yy_buffer_stack = 0;
-    yyg->yy_buffer_stack_top = 0;
-    yyg->yy_buffer_stack_max = 0;
-    yyg->yy_c_buf_p = (char *) 0;
-    yyg->yy_init = 0;
-    yyg->yy_start = 0;
+	yyg->yy_buffer_stack = 0;
+	yyg->yy_buffer_stack_top = 0;
+	yyg->yy_buffer_stack_max = 0;
+	yyg->yy_c_buf_p = (char *) 0;
+	yyg->yy_init = 0;
+	yyg->yy_start = 0;
 
-    yyg->yy_start_stack_ptr = 0;
-    yyg->yy_start_stack_depth = 0;
-    yyg->yy_start_stack =  NULL;
+	yyg->yy_start_stack_ptr = 0;
+	yyg->yy_start_stack_depth = 0;
+	yyg->yy_start_stack =  NULL;
 
 /* Defined in main.c */
 #ifdef YY_STDINIT
-    yyin = stdin;
-    yyout = stdout;
+	yyin = stdin;
+	yyout = stdout;
 #else
-    yyin = (FILE *) 0;
-    yyout = (FILE *) 0;
+	yyin = (FILE *) 0;
+	yyout = (FILE *) 0;
 #endif
 
-    /* For future reference: Set errno on error, since we are called by
-     * fts0tlex_init()
-     */
-    return 0;
+	/* For future reference: Set errno on error, since we are called by
+	 * fts0tlex_init()
+	 */
+	return 0;
 }
 
 /* fts0tlex_destroy is for both reentrant and non-reentrant scanners. */
 int fts0tlex_destroy  (yyscan_t yyscanner)
 {
-    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+	struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
 
-    /* Pop the buffer stack, destroying each element. */
+	/* Pop the buffer stack, destroying each element. */
 	while(YY_CURRENT_BUFFER){
 		fts0t_delete_buffer(YY_CURRENT_BUFFER ,yyscanner );
 		YY_CURRENT_BUFFER_LVALUE = NULL;
@@ -1884,18 +1885,18 @@ int fts0tlex_destroy  (yyscan_t yyscanner)
 	fts0tfree(yyg->yy_buffer_stack ,yyscanner);
 	yyg->yy_buffer_stack = NULL;
 
-    /* Destroy the start condition stack. */
-        fts0tfree(yyg->yy_start_stack ,yyscanner );
-        yyg->yy_start_stack = NULL;
+	/* Destroy the start condition stack. */
+	fts0tfree(yyg->yy_start_stack ,yyscanner );
+	yyg->yy_start_stack = NULL;
 
-    /* Reset the globals. This is important in a non-reentrant scanner so the next time
-     * fts0tlex() is called, initialization will occur. */
-    yy_init_globals( yyscanner);
+	/* Reset the globals. This is important in a non-reentrant scanner so the next time
+	 * fts0tlex() is called, initialization will occur. */
+	yy_init_globals( yyscanner);
 
-    /* Destroy the main struct (reentrant only). */
-    fts0tfree ( yyscanner , yyscanner );
-    yyscanner = NULL;
-    return 0;
+	/* Destroy the main struct (reentrant only). */
+	fts0tfree ( yyscanner , yyscanner );
+	yyscanner = NULL;
+	return 0;
 }
 
 /*
@@ -1946,7 +1947,5 @@ void fts0tfree (void * ptr ,            yyscan_t yyscanner MY_ATTRIBUTE((unused)
 
 #define YYTABLES_NAME "yytables"
 
-#line 68 "fts0tlex.l"
-
-
+#line 69 "fts0tlex.l"
 
diff --git a/storage/innobase/fts/fts0tlex.l b/storage/innobase/fts/fts0tlex.l
index 4f55a83afe5..242bfebda52 100644
--- a/storage/innobase/fts/fts0tlex.l
+++ b/storage/innobase/fts/fts0tlex.l
@@ -30,6 +30,7 @@ this program; if not, write to the Free Software Foundation, Inc.,
 
 /* Required for reentrant parser */
 #define YY_DECL int fts_tlexer(YYSTYPE* val, yyscan_t yyscanner)
+#define exit(A)   ut_error
 
 %}
 
diff --git a/storage/innobase/fut/fut0lst.cc b/storage/innobase/fut/fut0lst.cc
index 8f96a6426d2..8b39a4de1fb 100644
--- a/storage/innobase/fut/fut0lst.cc
+++ b/storage/innobase/fut/fut0lst.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -23,6 +23,7 @@ File-based list utilities
 Created 11/28/1995 Heikki Tuuri
 ***********************************************************************/
 
+#include "univ.i"
 #include "fut0lst.h"
 
 #ifdef UNIV_NONINL
@@ -49,9 +50,13 @@ flst_add_to_empty(
 
 	ut_ad(mtr && base && node);
 	ut_ad(base != node);
-	ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
-	ut_ad(mtr_memo_contains_page(mtr, node, MTR_MEMO_PAGE_X_FIX));
-	len = flst_get_len(base, mtr);
+	ut_ad(mtr_memo_contains_page_flagged(mtr, base,
+					     MTR_MEMO_PAGE_X_FIX
+					     | MTR_MEMO_PAGE_SX_FIX));
+	ut_ad(mtr_memo_contains_page_flagged(mtr, node,
+					     MTR_MEMO_PAGE_X_FIX
+					     | MTR_MEMO_PAGE_SX_FIX));
+	len = flst_get_len(base);
 	ut_a(len == 0);
 
 	buf_ptr_get_fsp_addr(node, &space, &node_addr);
@@ -70,7 +75,6 @@ flst_add_to_empty(
 
 /********************************************************************//**
 Adds a node as the last node in a list. */
-UNIV_INTERN
 void
 flst_add_last(
 /*==========*/
@@ -82,26 +86,35 @@ flst_add_last(
 	fil_addr_t	node_addr;
 	ulint		len;
 	fil_addr_t	last_addr;
-	flst_node_t*	last_node;
 
 	ut_ad(mtr && base && node);
 	ut_ad(base != node);
-	ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
-	ut_ad(mtr_memo_contains_page(mtr, node, MTR_MEMO_PAGE_X_FIX));
-	len = flst_get_len(base, mtr);
+	ut_ad(mtr_memo_contains_page_flagged(mtr, base,
+					     MTR_MEMO_PAGE_X_FIX
+					     | MTR_MEMO_PAGE_SX_FIX));
+	ut_ad(mtr_memo_contains_page_flagged(mtr, node,
+					     MTR_MEMO_PAGE_X_FIX
+					     | MTR_MEMO_PAGE_SX_FIX));
+	len = flst_get_len(base);
 	last_addr = flst_get_last(base, mtr);
 
 	buf_ptr_get_fsp_addr(node, &space, &node_addr);
 
 	/* If the list is not empty, call flst_insert_after */
 	if (len != 0) {
+		flst_node_t*	last_node;
+
 		if (last_addr.page == node_addr.page) {
 			last_node = page_align(node) + last_addr.boffset;
 		} else {
-			ulint	zip_size = fil_space_get_zip_size(space);
+			bool			found;
+			const page_size_t&	page_size
+				= fil_space_get_page_size(space, &found);
 
-			last_node = fut_get_ptr(space, zip_size, last_addr,
-						RW_X_LATCH, mtr);
+			ut_ad(found);
+
+			last_node = fut_get_ptr(space, page_size, last_addr,
+						RW_SX_LATCH, mtr);
 		}
 
 		flst_insert_after(base, last_node, node, mtr);
@@ -113,7 +126,6 @@ flst_add_last(
 
 /********************************************************************//**
 Adds a node as the first node in a list. */
-UNIV_INTERN
 void
 flst_add_first(
 /*===========*/
@@ -129,9 +141,13 @@ flst_add_first(
 
 	ut_ad(mtr && base && node);
 	ut_ad(base != node);
-	ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
-	ut_ad(mtr_memo_contains_page(mtr, node, MTR_MEMO_PAGE_X_FIX));
-	len = flst_get_len(base, mtr);
+	ut_ad(mtr_memo_contains_page_flagged(mtr, base,
+					     MTR_MEMO_PAGE_X_FIX
+					     | MTR_MEMO_PAGE_SX_FIX));
+	ut_ad(mtr_memo_contains_page_flagged(mtr, node,
+					     MTR_MEMO_PAGE_X_FIX
+					     | MTR_MEMO_PAGE_SX_FIX));
+	len = flst_get_len(base);
 	first_addr = flst_get_first(base, mtr);
 
 	buf_ptr_get_fsp_addr(node, &space, &node_addr);
@@ -141,10 +157,14 @@ flst_add_first(
 		if (first_addr.page == node_addr.page) {
 			first_node = page_align(node) + first_addr.boffset;
 		} else {
-			ulint	zip_size = fil_space_get_zip_size(space);
+			bool			found;
+			const page_size_t&	page_size
+				= fil_space_get_page_size(space, &found);
 
-			first_node = fut_get_ptr(space, zip_size, first_addr,
-						 RW_X_LATCH, mtr);
+			ut_ad(found);
+
+			first_node = fut_get_ptr(space, page_size, first_addr,
+						 RW_SX_LATCH, mtr);
 		}
 
 		flst_insert_before(base, node, first_node, mtr);
@@ -156,7 +176,6 @@ flst_add_first(
 
 /********************************************************************//**
 Inserts a node after another in a list. */
-UNIV_INTERN
 void
 flst_insert_after(
 /*==============*/
@@ -176,9 +195,15 @@ flst_insert_after(
 	ut_ad(base != node1);
 	ut_ad(base != node2);
 	ut_ad(node2 != node1);
-	ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
-	ut_ad(mtr_memo_contains_page(mtr, node1, MTR_MEMO_PAGE_X_FIX));
-	ut_ad(mtr_memo_contains_page(mtr, node2, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page_flagged(mtr, base,
+					     MTR_MEMO_PAGE_X_FIX
+					     | MTR_MEMO_PAGE_SX_FIX));
+	ut_ad(mtr_memo_contains_page_flagged(mtr, node1,
+					     MTR_MEMO_PAGE_X_FIX
+					     | MTR_MEMO_PAGE_SX_FIX));
+	ut_ad(mtr_memo_contains_page_flagged(mtr, node2,
+					     MTR_MEMO_PAGE_X_FIX
+					     | MTR_MEMO_PAGE_SX_FIX));
 
 	buf_ptr_get_fsp_addr(node1, &space, &node1_addr);
 	buf_ptr_get_fsp_addr(node2, &space, &node2_addr);
@@ -191,10 +216,14 @@ flst_insert_after(
 
 	if (!fil_addr_is_null(node3_addr)) {
 		/* Update prev field of node3 */
-		ulint	zip_size = fil_space_get_zip_size(space);
+		bool			found;
+		const page_size_t&	page_size
+			= fil_space_get_page_size(space, &found);
 
-		node3 = fut_get_ptr(space, zip_size,
-				    node3_addr, RW_X_LATCH, mtr);
+		ut_ad(found);
+
+		node3 = fut_get_ptr(space, page_size,
+				    node3_addr, RW_SX_LATCH, mtr);
 		flst_write_addr(node3 + FLST_PREV, node2_addr, mtr);
 	} else {
 		/* node1 was last in list: update last field in base */
@@ -205,13 +234,12 @@ flst_insert_after(
 	flst_write_addr(node1 + FLST_NEXT, node2_addr, mtr);
 
 	/* Update len of base node */
-	len = flst_get_len(base, mtr);
+	len = flst_get_len(base);
 	mlog_write_ulint(base + FLST_LEN, len + 1, MLOG_4BYTES, mtr);
 }
 
 /********************************************************************//**
 Inserts a node before another in a list. */
-UNIV_INTERN
 void
 flst_insert_before(
 /*===============*/
@@ -231,9 +259,15 @@ flst_insert_before(
 	ut_ad(base != node2);
 	ut_ad(base != node3);
 	ut_ad(node2 != node3);
-	ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
-	ut_ad(mtr_memo_contains_page(mtr, node2, MTR_MEMO_PAGE_X_FIX));
-	ut_ad(mtr_memo_contains_page(mtr, node3, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page_flagged(mtr, base,
+					     MTR_MEMO_PAGE_X_FIX
+					     | MTR_MEMO_PAGE_SX_FIX));
+	ut_ad(mtr_memo_contains_page_flagged(mtr, node2,
+					     MTR_MEMO_PAGE_X_FIX
+					     | MTR_MEMO_PAGE_SX_FIX));
+	ut_ad(mtr_memo_contains_page_flagged(mtr, node3,
+					     MTR_MEMO_PAGE_X_FIX
+					     | MTR_MEMO_PAGE_SX_FIX));
 
 	buf_ptr_get_fsp_addr(node2, &space, &node2_addr);
 	buf_ptr_get_fsp_addr(node3, &space, &node3_addr);
@@ -245,10 +279,15 @@ flst_insert_before(
 	flst_write_addr(node2 + FLST_NEXT, node3_addr, mtr);
 
 	if (!fil_addr_is_null(node1_addr)) {
-		ulint	zip_size = fil_space_get_zip_size(space);
+		bool			found;
+		const page_size_t&	page_size
+			= fil_space_get_page_size(space, &found);
+
+		ut_ad(found);
+
 		/* Update next field of node1 */
-		node1 = fut_get_ptr(space, zip_size, node1_addr,
-				    RW_X_LATCH, mtr);
+		node1 = fut_get_ptr(space, page_size, node1_addr,
+				    RW_SX_LATCH, mtr);
 		flst_write_addr(node1 + FLST_NEXT, node2_addr, mtr);
 	} else {
 		/* node3 was first in list: update first field in base */
@@ -259,13 +298,12 @@ flst_insert_before(
 	flst_write_addr(node3 + FLST_PREV, node2_addr, mtr);
 
 	/* Update len of base node */
-	len = flst_get_len(base, mtr);
+	len = flst_get_len(base);
 	mlog_write_ulint(base + FLST_LEN, len + 1, MLOG_4BYTES, mtr);
 }
 
 /********************************************************************//**
 Removes a node. */
-UNIV_INTERN
 void
 flst_remove(
 /*========*/
@@ -274,7 +312,6 @@ flst_remove(
 	mtr_t*			mtr)	/*!< in: mini-transaction handle */
 {
 	ulint		space;
-	ulint		zip_size;
 	flst_node_t*	node1;
 	fil_addr_t	node1_addr;
 	fil_addr_t	node2_addr;
@@ -283,11 +320,20 @@ flst_remove(
 	ulint		len;
 
 	ut_ad(mtr && node2 && base);
-	ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
-	ut_ad(mtr_memo_contains_page(mtr, node2, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page_flagged(mtr, base,
+					     MTR_MEMO_PAGE_X_FIX
+					     | MTR_MEMO_PAGE_SX_FIX));
+	ut_ad(mtr_memo_contains_page_flagged(mtr, node2,
+					     MTR_MEMO_PAGE_X_FIX
+					     | MTR_MEMO_PAGE_SX_FIX));
 
 	buf_ptr_get_fsp_addr(node2, &space, &node2_addr);
-	zip_size = fil_space_get_zip_size(space);
+
+	bool			found;
+	const page_size_t&	page_size = fil_space_get_page_size(space,
+								    &found);
+
+	ut_ad(found);
 
 	node1_addr = flst_get_prev_addr(node2, mtr);
 	node3_addr = flst_get_next_addr(node2, mtr);
@@ -300,8 +346,8 @@ flst_remove(
 
 			node1 = page_align(node2) + node1_addr.boffset;
 		} else {
-			node1 = fut_get_ptr(space, zip_size,
-					    node1_addr, RW_X_LATCH, mtr);
+			node1 = fut_get_ptr(space, page_size,
+					    node1_addr, RW_SX_LATCH, mtr);
 		}
 
 		ut_ad(node1 != node2);
@@ -319,8 +365,8 @@ flst_remove(
 
 			node3 = page_align(node2) + node3_addr.boffset;
 		} else {
-			node3 = fut_get_ptr(space, zip_size,
-					    node3_addr, RW_X_LATCH, mtr);
+			node3 = fut_get_ptr(space, page_size,
+					    node3_addr, RW_SX_LATCH, mtr);
 		}
 
 		ut_ad(node2 != node3);
@@ -332,7 +378,7 @@ flst_remove(
 	}
 
 	/* Update len of base node */
-	len = flst_get_len(base, mtr);
+	len = flst_get_len(base);
 	ut_ad(len > 0);
 
 	mlog_write_ulint(base + FLST_LEN, len - 1, MLOG_4BYTES, mtr);
@@ -342,7 +388,6 @@ flst_remove(
 Cuts off the tail of the list, including the node given. The number of
 nodes which will be removed must be provided by the caller, as this function
 does not measure the length of the tail. */
-UNIV_INTERN
 void
 flst_cut_end(
 /*=========*/
@@ -359,8 +404,12 @@ flst_cut_end(
 	ulint		len;
 
 	ut_ad(mtr && node2 && base);
-	ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
-	ut_ad(mtr_memo_contains_page(mtr, node2, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page_flagged(mtr, base,
+					     MTR_MEMO_PAGE_X_FIX
+					     | MTR_MEMO_PAGE_SX_FIX));
+	ut_ad(mtr_memo_contains_page_flagged(mtr, node2,
+					     MTR_MEMO_PAGE_X_FIX
+					     | MTR_MEMO_PAGE_SX_FIX));
 	ut_ad(n_nodes > 0);
 
 	buf_ptr_get_fsp_addr(node2, &space, &node2_addr);
@@ -375,9 +424,14 @@ flst_cut_end(
 
 			node1 = page_align(node2) + node1_addr.boffset;
 		} else {
-			node1 = fut_get_ptr(space,
-					    fil_space_get_zip_size(space),
-					    node1_addr, RW_X_LATCH, mtr);
+			bool			found;
+			const page_size_t&	page_size
+				= fil_space_get_page_size(space, &found);
+
+			ut_ad(found);
+
+			node1 = fut_get_ptr(space, page_size,
+					    node1_addr, RW_SX_LATCH, mtr);
 		}
 
 		flst_write_addr(node1 + FLST_NEXT, fil_addr_null, mtr);
@@ -389,7 +443,7 @@ flst_cut_end(
 	flst_write_addr(base + FLST_LAST, node1_addr, mtr);
 
 	/* Update len of base node */
-	len = flst_get_len(base, mtr);
+	len = flst_get_len(base);
 	ut_ad(len >= n_nodes);
 
 	mlog_write_ulint(base + FLST_LEN, len - n_nodes, MLOG_4BYTES, mtr);
@@ -399,7 +453,6 @@ flst_cut_end(
 Cuts off the tail of the list, not including the given node. The number of
 nodes which will be removed must be provided by the caller, as this function
 does not measure the length of the tail. */
-UNIV_INTERN
 void
 flst_truncate_end(
 /*==============*/
@@ -413,8 +466,12 @@ flst_truncate_end(
 	ulint		space;
 
 	ut_ad(mtr && node2 && base);
-	ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
-	ut_ad(mtr_memo_contains_page(mtr, node2, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page_flagged(mtr, base,
+					     MTR_MEMO_PAGE_X_FIX
+					     | MTR_MEMO_PAGE_SX_FIX));
+	ut_ad(mtr_memo_contains_page_flagged(mtr, node2,
+					     MTR_MEMO_PAGE_X_FIX
+					     | MTR_MEMO_PAGE_SX_FIX));
 	if (n_nodes == 0) {
 
 		ut_ad(fil_addr_is_null(flst_get_next_addr(node2, mtr)));
@@ -430,7 +487,7 @@ flst_truncate_end(
 	flst_write_addr(base + FLST_LAST, node2_addr, mtr);
 
 	/* Update len of base node */
-	len = flst_get_len(base, mtr);
+	len = flst_get_len(base);
 	ut_ad(len >= n_nodes);
 
 	mlog_write_ulint(base + FLST_LEN, len - n_nodes, MLOG_4BYTES, mtr);
@@ -438,8 +495,7 @@ flst_truncate_end(
 
 /********************************************************************//**
 Validates a file-based list.
-@return	TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
 ibool
 flst_validate(
 /*==========*/
@@ -447,7 +503,6 @@ flst_validate(
 	mtr_t*			mtr1)	/*!< in: mtr */
 {
 	ulint			space;
-	ulint			zip_size;
 	const flst_node_t*	node;
 	fil_addr_t		node_addr;
 	fil_addr_t		base_addr;
@@ -456,7 +511,9 @@ flst_validate(
 	mtr_t			mtr2;
 
 	ut_ad(base);
-	ut_ad(mtr_memo_contains_page(mtr1, base, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page_flagged(mtr1, base,
+					     MTR_MEMO_PAGE_X_FIX
+					     | MTR_MEMO_PAGE_SX_FIX));
 
 	/* We use two mini-transaction handles: the first is used to
 	lock the base node, and prevent other threads from modifying the
@@ -467,16 +524,21 @@ flst_validate(
 
 	/* Find out the space id */
 	buf_ptr_get_fsp_addr(base, &space, &base_addr);
-	zip_size = fil_space_get_zip_size(space);
 
-	len = flst_get_len(base, mtr1);
+	bool			found;
+	const page_size_t&	page_size = fil_space_get_page_size(space,
+								    &found);
+
+	ut_ad(found);
+
+	len = flst_get_len(base);
 	node_addr = flst_get_first(base, mtr1);
 
 	for (i = 0; i < len; i++) {
 		mtr_start(&mtr2);
 
-		node = fut_get_ptr(space, zip_size,
-				   node_addr, RW_X_LATCH, &mtr2);
+		node = fut_get_ptr(space, page_size,
+				   node_addr, RW_SX_LATCH, &mtr2);
 		node_addr = flst_get_next_addr(node, &mtr2);
 
 		mtr_commit(&mtr2); /* Commit mtr2 each round to prevent buffer
@@ -490,8 +552,8 @@ flst_validate(
 	for (i = 0; i < len; i++) {
 		mtr_start(&mtr2);
 
-		node = fut_get_ptr(space, zip_size,
-				   node_addr, RW_X_LATCH, &mtr2);
+		node = fut_get_ptr(space, page_size,
+				   node_addr, RW_SX_LATCH, &mtr2);
 		node_addr = flst_get_prev_addr(node, &mtr2);
 
 		mtr_commit(&mtr2); /* Commit mtr2 each round to prevent buffer
@@ -505,7 +567,6 @@ flst_validate(
 
 /********************************************************************//**
 Prints info of a file-based list. */
-UNIV_INTERN
 void
 flst_print(
 /*=======*/
@@ -516,15 +577,16 @@ flst_print(
 	ulint			len;
 
 	ut_ad(base && mtr);
-	ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page_flagged(mtr, base,
+					     MTR_MEMO_PAGE_X_FIX
+					     | MTR_MEMO_PAGE_SX_FIX));
 	frame = page_align((byte*) base);
 
-	len = flst_get_len(base, mtr);
+	len = flst_get_len(base);
 
-	fprintf(stderr,
-		"FILE-BASED LIST:\n"
-		"Base node in space %lu page %lu byte offset %lu; len %lu\n",
-		(ulong) page_get_space_id(frame),
-		(ulong) page_get_page_no(frame),
-		(ulong) page_offset(base), (ulong) len);
+	ib::info() << "FILE-BASED LIST: Base node in space "
+		<< page_get_space_id(frame)
+		<< "; page " << page_get_page_no(frame)
+		<< "; byte offset " << page_offset(base)
+		<< "; len " << len;
 }
diff --git a/storage/innobase/gis/gis0geo.cc b/storage/innobase/gis/gis0geo.cc
new file mode 100644
index 00000000000..b90b47dc08c
--- /dev/null
+++ b/storage/innobase/gis/gis0geo.cc
@@ -0,0 +1,833 @@
+/*****************************************************************************
+
+Copyright (c) 2013, 2015, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file gis/gis0geo.cc
+InnoDB R-tree related functions.
+
+Created 2013/03/27 Allen Lai and Jimmy Yang
+*******************************************************/
+
+#include "page0types.h"
+#include "gis0geo.h"
+#include "page0cur.h"
+#include "ut0rnd.h"
+#include "mach0data.h"
+
+#include <spatial.h>
+
+/* These definitions are for comparing 2 mbrs. */
+
+/* Check if a intersects b.
+Return false if a intersects b, otherwise true. */
+#define INTERSECT_CMP(amin, amax, bmin, bmax) \
+(((amin) > (bmax)) || ((bmin) > (amax)))
+
+/* Check if b contains a.
+Return false if b contains a, otherwise true. */
+#define CONTAIN_CMP(amin, amax, bmin, bmax) \
+(((bmin) > (amin)) || ((bmax) < (amax)))
+
+/* Check if b is within a.
+Return false if b is within a, otherwise true. */
+#define WITHIN_CMP(amin, amax, bmin, bmax) \
+(((amin) > (bmin)) || ((amax) < (bmax)))
+
+/* Check if a disjoints b.
+Return false if a disjoints b, otherwise true. */
+#define DISJOINT_CMP(amin, amax, bmin, bmax) \
+(((amin) <= (bmax)) && ((bmin) <= (amax)))
+
+/* Check if a equals b.
+Return false if equal, otherwise true. */
+#define EQUAL_CMP(amin, amax, bmin, bmax) \
+(((amin) != (bmin)) || ((amax) != (bmax)))
+
+/****************************************************************
+Functions for generating mbr
+****************************************************************/
+/*************************************************************//**
+Add one point stored in wkb to a given mbr.
+@return 0 if the point in wkb is valid, otherwise -1. */
+static
+int
+rtree_add_point_to_mbr(
+/*===================*/
+	uchar**	wkb,		/*!< in: pointer to wkb,
+				where point is stored */
+	uchar*	end,		/*!< in: end of wkb. */
+	uint	n_dims,		/*!< in: dimensions. */
+	uchar	byte_order,	/*!< in: byte order. */
+	double*	mbr)		/*!< in/out: mbr, which
+				must be of length n_dims * 2. */
+{
+	double	ord;
+	double*	mbr_end = mbr + n_dims * 2;
+
+	while (mbr < mbr_end) {
+		if ((*wkb) + sizeof(double) > end) {
+			return(-1);
+		}
+
+		ord = mach_double_read(*wkb);
+		(*wkb) += sizeof(double);
+
+		if (ord < *mbr) {
+			*mbr = ord;
+		}
+		mbr++;
+
+		if (ord > *mbr) {
+			*mbr = ord;
+		}
+		mbr++;
+	}
+
+	return(0);
+}
+
+/*************************************************************//**
+Get mbr of point stored in wkb.
+@return 0 if ok, otherwise -1. */
+static
+int
+rtree_get_point_mbr(
+/*================*/
+	uchar**	wkb,		/*!< in: pointer to wkb,
+				where point is stored. */
+	uchar*	end,		/*!< in: end of wkb. */
+	uint	n_dims,		/*!< in: dimensions. */
+	uchar	byte_order,	/*!< in: byte order. */
+	double*	mbr)		/*!< in/out: mbr,
+				must be of length n_dims * 2. */
+{
+	return rtree_add_point_to_mbr(wkb, end, n_dims, byte_order, mbr);
+}
+
+
+/*************************************************************//**
+Get mbr of linestring stored in wkb.
+@return 0 if the linestring is valid, otherwise -1. */
+static
+int
+rtree_get_linestring_mbr(
+/*=====================*/
+	uchar**	wkb,		/*!< in: pointer to wkb,
+				where point is stored. */
+	uchar*	end,		/*!< in: end of wkb. */
+	uint	n_dims,		/*!< in: dimensions. */
+	uchar	byte_order,	/*!< in: byte order. */
+	double*	mbr)		/*!< in/out: mbr,
+				must be of length n_dims * 2. */
+{
+	uint	n_points;
+
+	n_points = uint4korr(*wkb);
+	(*wkb) += 4;
+
+	for (; n_points > 0; --n_points) {
+		/* Add next point to mbr */
+		if (rtree_add_point_to_mbr(wkb, end, n_dims,
+					   byte_order, mbr)) {
+			return(-1);
+		}
+	}
+
+	return(0);
+}
+
+/*************************************************************//**
+Get mbr of polygon stored in wkb.
+@return 0 if the polygon is valid, otherwise -1. */
+static
+int
+rtree_get_polygon_mbr(
+/*==================*/
+	uchar**	wkb,		/*!< in: pointer to wkb,
+				where point is stored. */
+	uchar*	end,		/*!< in: end of wkb. */
+	uint	n_dims,		/*!< in: dimensions. */
+	uchar	byte_order,	/*!< in: byte order. */
+	double*	mbr)		/*!< in/out: mbr,
+				must be of length n_dims * 2. */
+{
+	uint	n_linear_rings;
+	uint	n_points;
+
+	n_linear_rings = uint4korr((*wkb));
+	(*wkb) += 4;
+
+	for (; n_linear_rings > 0; --n_linear_rings) {
+		n_points = uint4korr((*wkb));
+		(*wkb) += 4;
+
+		for (; n_points > 0; --n_points) {
+			/* Add next point to mbr */
+			if (rtree_add_point_to_mbr(wkb, end, n_dims,
+						   byte_order, mbr)) {
+				return(-1);
+			}
+		}
+	}
+
+	return(0);
+}
+
+/*************************************************************//**
+Get mbr of geometry stored in wkb.
+@return 0 if the geometry is valid, otherwise -1. */
+static
+int
+rtree_get_geometry_mbr(
+/*===================*/
+	uchar**	wkb,		/*!< in: pointer to wkb,
+				where point is stored. */
+	uchar*	end,		/*!< in: end of wkb. */
+	uint	n_dims,		/*!< in: dimensions. */
+	double*	mbr,		/*!< in/out: mbr. */
+	int	top)		/*!< in: if it is the top,
+				which means it's not called
+				by itself. */
+{
+	int	res;
+	uchar	byte_order = 2;
+	uint	wkb_type = 0;
+	uint	n_items;
+
+	byte_order = *(*wkb);
+	++(*wkb);
+
+	wkb_type = uint4korr((*wkb));
+	(*wkb) += 4;
+
+	switch ((enum wkbType) wkb_type) {
+	case wkbPoint:
+		res = rtree_get_point_mbr(wkb, end, n_dims, byte_order, mbr);
+		break;
+	case wkbLineString:
+		res = rtree_get_linestring_mbr(wkb, end, n_dims,
+					       byte_order, mbr);
+		break;
+	case wkbPolygon:
+		res = rtree_get_polygon_mbr(wkb, end, n_dims, byte_order, mbr);
+		break;
+	case wkbMultiPoint:
+		n_items = uint4korr((*wkb));
+		(*wkb) += 4;
+		for (; n_items > 0; --n_items) {
+			byte_order = *(*wkb);
+			++(*wkb);
+			(*wkb) += 4;
+			if (rtree_get_point_mbr(wkb, end, n_dims,
+						byte_order, mbr)) {
+				return(-1);
+			}
+		}
+		res = 0;
+		break;
+	case wkbMultiLineString:
+		n_items = uint4korr((*wkb));
+		(*wkb) += 4;
+		for (; n_items > 0; --n_items) {
+			byte_order = *(*wkb);
+			++(*wkb);
+			(*wkb) += 4;
+			if (rtree_get_linestring_mbr(wkb, end, n_dims,
+						     byte_order, mbr)) {
+				return(-1);
+			}
+		}
+		res = 0;
+		break;
+	case wkbMultiPolygon:
+		n_items = uint4korr((*wkb));
+		(*wkb) += 4;
+		for (; n_items > 0; --n_items) {
+			byte_order = *(*wkb);
+			++(*wkb);
+			(*wkb) += 4;
+			if (rtree_get_polygon_mbr(wkb, end, n_dims,
+						  byte_order, mbr)) {
+				return(-1);
+			}
+		}
+		res = 0;
+		break;
+	case wkbGeometryCollection:
+		if (!top) {
+			return(-1);
+		}
+
+		n_items = uint4korr((*wkb));
+		(*wkb) += 4;
+		for (; n_items > 0; --n_items) {
+			if (rtree_get_geometry_mbr(wkb, end, n_dims,
+						   mbr, 0)) {
+				return(-1);
+			}
+		}
+		res = 0;
+		break;
+	default:
+		res = -1;
+	}
+
+	return(res);
+}
+
+/*************************************************************//**
+Calculate Minimal Bounding Rectangle (MBR) of the spatial object
+stored in "well-known binary representation" (wkb) format.
+@return 0 if ok. */
+int
+rtree_mbr_from_wkb(
+/*===============*/
+	uchar*	wkb,		/*!< in: wkb */
+	uint	size,		/*!< in: size of wkb. */
+	uint	n_dims,		/*!< in: dimensions. */
+	double*	mbr)		/*!< in/out: mbr, which must
+				be of length n_dim2 * 2. */
+{
+	for (uint i = 0; i < n_dims; ++i) {
+		mbr[i * 2] = DBL_MAX;
+		mbr[i * 2 + 1] = -DBL_MAX;
+	}
+
+	return rtree_get_geometry_mbr(&wkb, wkb + size, n_dims, mbr, 1);
+}
+
+
+/****************************************************************
+Functions for Rtree split
+****************************************************************/
+/*************************************************************//**
+Join 2 mbrs of dimensions n_dim. */
+static
+void
+mbr_join(
+/*=====*/
+	double*		a,	/*!< in/out: the first mbr,
+				where the joined result will be. */
+	const double*	b,	/*!< in: the second mbr. */
+	int		n_dim)	/*!< in: dimensions. */
+{
+	double*		end = a + n_dim * 2;
+
+	do {
+		if (a[0] > b[0]) {
+			a[0] = b[0];
+		}
+
+		if (a[1] < b[1]) {
+			a[1] = b[1];
+		}
+
+		a += 2;
+		b += 2;
+
+	} while (a != end);
+}
+
+/*************************************************************//**
+Counts the square of mbr which is the join of a and b. Both a and b
+are of dimensions n_dim. */
+static
+double
+mbr_join_square(
+/*============*/
+	const double*	a,	/*!< in: the first mbr. */
+	const double*	b,	/*!< in: the second mbr. */
+	int		n_dim)	/*!< in: dimensions. */
+{
+	const double*	end = a + n_dim * 2;
+	double		square = 1.0;
+
+	do {
+		square *= std::max(a[1], b[1]) - std::min(a[0], b[0]);
+
+		a += 2;
+		b += 2;
+	} while (a != end);
+
+	/* Check for infinity or NaN, so we don't get NaN in calculations */
+	if (my_isinf(square) || my_isnan(square)) {
+		return DBL_MAX;
+	}
+
+	return square;
+}
+
+/*************************************************************//**
+Counts the square of mbr of dimension n_dim. */
+static
+double
+count_square(
+/*=========*/
+	const double*	a,	/*!< in: the mbr. */
+	int		n_dim)	/*!< in: dimensions. */
+{
+	const double*	end = a + n_dim * 2;
+	double		square = 1.0;
+
+	do {
+		square *= a[1] - a[0];
+		a += 2;
+	} while (a != end);
+
+	return square;
+}
+
+/*************************************************************//**
+Copy mbr of dimension n_dim from src to dst. */
+inline
+static
+void
+copy_coords(
+/*========*/
+	double*		dst,	/*!< in/out: destination. */
+	const double*	src,	/*!< in: source. */
+	int		n_dim)	/*!< in: dimensions. */
+{
+	memcpy(dst, src, DATA_MBR_LEN);
+}
+
+/*************************************************************//**
+Select two nodes to collect group upon */
+static
+void
+pick_seeds(
+/*=======*/
+	rtr_split_node_t*	node,		/*!< in: split nodes. */
+	int			n_entries,	/*!< in: entries number. */
+	rtr_split_node_t**	seed_a,		/*!< out: seed 1. */
+	rtr_split_node_t**	seed_b,		/*!< out: seed 2. */
+	int			n_dim)		/*!< in: dimensions. */
+{
+	rtr_split_node_t*	cur1;
+	rtr_split_node_t*	lim1 = node + (n_entries - 1);
+	rtr_split_node_t*	cur2;
+	rtr_split_node_t*	lim2 = node + n_entries;
+
+	double			max_d = -DBL_MAX;
+	double			d;
+
+	*seed_a = node;
+	*seed_b = node + 1;
+
+	for (cur1 = node; cur1 < lim1; ++cur1) {
+		for (cur2 = cur1 + 1; cur2 < lim2; ++cur2) {
+			d = mbr_join_square(cur1->coords, cur2->coords, n_dim) -
+				cur1->square - cur2->square;
+			if (d > max_d) {
+				max_d = d;
+				*seed_a = cur1;
+				*seed_b = cur2;
+			}
+		}
+	}
+}
+
+/*********************************************************//**
+Generates a random iboolean value.
+@return the random value */
+static
+ibool
+ut_rnd_gen_ibool(void)
+/*=================*/
+{
+	ulint    x;
+
+	x = ut_rnd_gen_ulint();
+
+	if (((x >> 20) + (x >> 15)) & 1) {
+
+		return(TRUE);
+	}
+
+	return(FALSE);
+}
+
+/*************************************************************//**
+Select next node and group where to add. */
+static
+void
+pick_next(
+/*======*/
+	rtr_split_node_t*	node,		/*!< in: split nodes. */
+	int			n_entries,	/*!< in: entries number. */
+	double*			g1,		/*!< in: mbr of group 1. */
+	double*			g2,		/*!< in: mbr of group 2. */
+	rtr_split_node_t**	choice,		/*!< out: the next node.*/
+	int*			n_group,	/*!< out: group number.*/
+	int			n_dim)		/*!< in: dimensions. */
+{
+	rtr_split_node_t*	cur = node;
+	rtr_split_node_t*	end = node + n_entries;
+	double			max_diff = -DBL_MAX;
+
+	for (; cur < end; ++cur) {
+		double	diff;
+		double	abs_diff;
+
+		if (cur->n_node != 0) {
+			continue;
+		}
+
+		diff = mbr_join_square(g1, cur->coords, n_dim) -
+		       mbr_join_square(g2, cur->coords, n_dim);
+
+		abs_diff = fabs(diff);
+		if (abs_diff > max_diff) {
+			max_diff = abs_diff;
+
+			/* Introduce some randomness if the record
+			is identical */
+			if (diff == 0) {
+				diff = static_cast<double>(
+					ut_rnd_gen_ibool());
+			}
+
+			*n_group = 1 + (diff > 0);
+			*choice = cur;
+		}
+	}
+}
+
+/*************************************************************//**
+Mark not-in-group entries as n_group. */
+static
+void
+mark_all_entries(
+/*=============*/
+	rtr_split_node_t*	node,		/*!< in/out: split nodes. */
+	int			n_entries,	/*!< in: entries number. */
+	int			n_group)	/*!< in: group number. */
+{
+	rtr_split_node_t*	cur = node;
+	rtr_split_node_t*	end = node + n_entries;
+	for (; cur < end; ++cur) {
+		if (cur->n_node != 0) {
+			continue;
+		}
+		cur->n_node = n_group;
+	}
+}
+
+/*************************************************************//**
+Split rtree node.
+Return which group the first rec is in. */
+int
+split_rtree_node(
+/*=============*/
+	rtr_split_node_t*	node,		/*!< in: split nodes. */
+	int			n_entries,	/*!< in: entries number. */
+	int			all_size,	/*!< in: total key's size. */
+	int			key_size,	/*!< in: key's size. */
+	int			min_size,	/*!< in: minimal group size. */
+	int			size1,		/*!< in: size of group. */
+	int			size2,		/*!< in: initial group sizes */
+	double**		d_buffer,	/*!< in/out: buffer. */
+	int			n_dim,		/*!< in: dimensions. */
+	uchar*			first_rec)	/*!< in: the first rec. */
+{
+	rtr_split_node_t*	cur;
+	rtr_split_node_t*	a = NULL;
+	rtr_split_node_t*	b = NULL;
+	double*			g1 = reserve_coords(d_buffer, n_dim);
+	double*			g2 = reserve_coords(d_buffer, n_dim);
+	rtr_split_node_t*	next = NULL;
+	int			next_node = 0;
+	int			i;
+	int			first_rec_group = 1;
+	rtr_split_node_t*	end = node + n_entries;
+
+	if (all_size < min_size * 2) {
+		return 1;
+	}
+
+	cur = node;
+	for (; cur < end; ++cur) {
+		cur->square = count_square(cur->coords, n_dim);
+		cur->n_node = 0;
+	}
+
+	pick_seeds(node, n_entries, &a, &b, n_dim);
+	a->n_node = 1;
+	b->n_node = 2;
+
+	copy_coords(g1, a->coords, n_dim);
+	size1 += key_size;
+	copy_coords(g2, b->coords, n_dim);
+	size2 += key_size;
+
+	for (i = n_entries - 2; i > 0; --i) {
+		/* Can't write into group 2 */
+		if (all_size - (size2 + key_size) < min_size) {
+			mark_all_entries(node, n_entries, 1);
+			break;
+		}
+
+		/* Can't write into group 1 */
+		if (all_size - (size1 + key_size) < min_size) {
+			mark_all_entries(node, n_entries, 2);
+			break;
+		}
+
+		pick_next(node, n_entries, g1, g2, &next, &next_node, n_dim);
+		if (next_node == 1) {
+			size1 += key_size;
+			mbr_join(g1, next->coords, n_dim);
+		} else {
+			size2 += key_size;
+			mbr_join(g2, next->coords, n_dim);
+		}
+
+		next->n_node = next_node;
+
+		/* Find out where the first rec (of the page) will be at,
+		and inform the caller */
+		if (first_rec && first_rec == next->key) {
+			first_rec_group = next_node;
+		}
+	}
+
+	return(first_rec_group);
+}
+
+/*************************************************************//**
+Compares two keys a and b depending on nextflag
+nextflag can contain these flags:
+   MBR_INTERSECT(a,b)  a overlaps b
+   MBR_CONTAIN(a,b)    a contains b
+   MBR_DISJOINT(a,b)   a disjoint b
+   MBR_WITHIN(a,b)     a within   b
+   MBR_EQUAL(a,b)      All coordinates of MBRs are equal
+Return 0 on success, otherwise 1. */
+int
+rtree_key_cmp(
+/*==========*/
+	page_cur_mode_t	mode,	/*!< in: compare method. */
+	const uchar*	b,	/*!< in: first key. */
+	int		b_len,	/*!< in: first key len. */
+	const uchar*	a,	/*!< in: second key. */
+	int		a_len)	/*!< in: second key len. */
+{
+	double		amin, amax, bmin, bmax;
+	int		key_len;
+	int		keyseg_len;
+
+	keyseg_len = 2 * sizeof(double);
+	for (key_len = a_len; key_len > 0; key_len -= keyseg_len) {
+		amin = mach_double_read(a);
+		bmin = mach_double_read(b);
+		amax = mach_double_read(a + sizeof(double));
+		bmax = mach_double_read(b + sizeof(double));
+
+		switch (mode) {
+		case PAGE_CUR_INTERSECT:
+			if (INTERSECT_CMP(amin, amax, bmin, bmax)) {
+				return(1);
+			}
+			break;
+		case PAGE_CUR_CONTAIN:
+			if (CONTAIN_CMP(amin, amax, bmin, bmax)) {
+				return(1);
+			}
+			break;
+		case PAGE_CUR_WITHIN:
+			if (WITHIN_CMP(amin, amax, bmin, bmax)) {
+				return(1);
+			}
+			break;
+		case PAGE_CUR_MBR_EQUAL:
+			if (EQUAL_CMP(amin, amax, bmin, bmax)) {
+				return(1);
+			}
+			break;
+		case PAGE_CUR_DISJOINT:
+			int result;
+
+			result = DISJOINT_CMP(amin, amax, bmin, bmax);
+			if (result == 0) {
+				return(0);
+			}
+
+			if (key_len - keyseg_len <= 0) {
+				return(1);
+			}
+
+			break;
+		default:
+			/* if unknown comparison operator */
+			ut_ad(0);
+		}
+
+		a += keyseg_len;
+		b += keyseg_len;
+	}
+
+	return(0);
+}
+
+/*************************************************************//**
+Calculates MBR_AREA(a+b) - MBR_AREA(a)
+Note: when 'a' and 'b' objects are far from each other,
+the area increase can be really big, so this function
+can return 'inf' as a result.
+Return the area increaed. */
+double
+rtree_area_increase(
+	const uchar*	a,		/*!< in: original mbr. */
+	const uchar*	b,		/*!< in: new mbr. */
+	int		mbr_len,	/*!< in: mbr length of a and b. */
+	double*		ab_area)	/*!< out: increased area. */
+{
+	double		a_area = 1.0;
+	double		loc_ab_area = 1.0;
+	double		amin, amax, bmin, bmax;
+	int		key_len;
+	int		keyseg_len;
+	double		data_round = 1.0;
+
+	keyseg_len = 2 * sizeof(double);
+
+	for (key_len = mbr_len; key_len > 0; key_len -= keyseg_len) {
+		double	area;
+
+		amin = mach_double_read(a);
+		bmin = mach_double_read(b);
+		amax = mach_double_read(a + sizeof(double));
+		bmax = mach_double_read(b + sizeof(double));
+
+		area = amax - amin;
+		if (area == 0) {
+			a_area *= LINE_MBR_WEIGHTS;
+		} else {
+			a_area *= area;
+		}
+
+		area = (double)std::max(amax, bmax) -
+		       (double)std::min(amin, bmin);
+		if (area == 0) {
+			loc_ab_area *= LINE_MBR_WEIGHTS;
+		} else {
+			loc_ab_area *= area;
+		}
+
+		/* Value of amax or bmin can be so large that small difference
+		are ignored. For example: 3.2884281489988079e+284 - 100 =
+		3.2884281489988079e+284. This results some area difference
+		are not detected */
+		if (loc_ab_area == a_area) {
+			if (bmin < amin || bmax > amax) {
+				data_round *= ((double)std::max(amax, bmax)
+					       - amax
+					       + (amin - (double)std::min(
+								amin, bmin)));
+			} else {
+				data_round *= area;
+			}
+		}
+
+		a += keyseg_len;
+		b += keyseg_len;
+	}
+
+	*ab_area = loc_ab_area;
+
+	if (loc_ab_area == a_area && data_round != 1.0) {
+		return(data_round);
+	}
+
+	return(loc_ab_area - a_area);
+}
+
+/** Calculates overlapping area
+@param[in]	a	mbr a
+@param[in]	b	mbr b
+@param[in]	mbr_len	mbr length
+@return overlapping area */
+double
+rtree_area_overlapping(
+	const uchar*	a,
+	const uchar*	b,
+	int		mbr_len)
+{
+	double	area = 1.0;
+	double	amin;
+	double	amax;
+	double	bmin;
+	double	bmax;
+	int	key_len;
+	int	keyseg_len;
+
+	keyseg_len = 2 * sizeof(double);
+
+	for (key_len = mbr_len; key_len > 0; key_len -= keyseg_len) {
+		amin = mach_double_read(a);
+		bmin = mach_double_read(b);
+		amax = mach_double_read(a + sizeof(double));
+		bmax = mach_double_read(b + sizeof(double));
+
+		amin = std::max(amin, bmin);
+		amax = std::min(amax, bmax);
+
+		if (amin > amax) {
+			return(0);
+		} else {
+			area *= (amax - amin);
+		}
+
+		a += keyseg_len;
+		b += keyseg_len;
+	}
+
+	return(area);
+}
+
+/** Get the wkb of default POINT value, which represents POINT(0 0)
+if it's of dimension 2, etc.
+@param[in]	n_dims		dimensions
+@param[out]	wkb		wkb buffer for default POINT
+@param[in]	len		length of wkb buffer
+@return non-0 indicate the length of wkb of the default POINT,
+0 if the buffer is too small */
+uint
+get_wkb_of_default_point(
+	uint	n_dims,
+	uchar*	wkb,
+	uint	len)
+{
+  // JAN: TODO: MYSQL 5.7 GIS
+  #define GEOM_HEADER_SIZE 16
+	if (len < GEOM_HEADER_SIZE + sizeof(double) * n_dims) {
+		return(0);
+	}
+
+	/** POINT wkb comprises SRID, wkb header(byte order and type)
+	and coordinates of the POINT */
+	len = GEOM_HEADER_SIZE + sizeof(double) * n_dims;
+	/** We always use 0 as default coordinate */
+	memset(wkb, 0, len);
+	/** We don't need to write SRID, write 0x01 for Byte Order */
+	mach_write_to_n_little_endian(wkb + SRID_SIZE, 1, 0x01);
+	/** Write wkbType::wkbPoint for the POINT type */
+	mach_write_to_n_little_endian(wkb + SRID_SIZE + 1, 4, wkbPoint);
+
+	return(len);
+}
diff --git a/storage/innobase/gis/gis0rtree.cc b/storage/innobase/gis/gis0rtree.cc
new file mode 100644
index 00000000000..ee89e0a913f
--- /dev/null
+++ b/storage/innobase/gis/gis0rtree.cc
@@ -0,0 +1,1997 @@
+/*****************************************************************************
+
+Copyright (c) 2016, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file gis/gis0rtree.cc
+InnoDB R-tree interfaces
+
+Created 2013/03/27 Allen Lai and Jimmy Yang
+***********************************************************************/
+
+#include "fsp0fsp.h"
+#include "page0page.h"
+#include "page0cur.h"
+#include "page0zip.h"
+#include "gis0rtree.h"
+
+#ifndef UNIV_HOTBACKUP
+#include "btr0cur.h"
+#include "btr0sea.h"
+#include "btr0pcur.h"
+#include "rem0cmp.h"
+#include "lock0lock.h"
+#include "ibuf0ibuf.h"
+#include "trx0trx.h"
+#include "srv0mon.h"
+#include "gis0geo.h"
+
+#endif /* UNIV_HOTBACKUP */
+
+/*************************************************************//**
+Initial split nodes info for R-tree split.
+@return initialized split nodes array */
+static
+rtr_split_node_t*
+rtr_page_split_initialize_nodes(
+/*============================*/
+	mem_heap_t*	heap,	/*!< in: pointer to memory heap, or NULL */
+	btr_cur_t*	cursor,	/*!< in: cursor at which to insert; when the
+				function returns, the cursor is positioned
+				on the predecessor of the inserted record */
+	ulint**		offsets,/*!< in: offsets on inserted record */
+	const dtuple_t*	tuple,	/*!< in: tuple to insert */
+	double**	buf_pos)/*!< in/out: current buffer position */
+{
+	rtr_split_node_t*	split_node_array;
+	double*			buf;
+	ulint			n_recs;
+	rtr_split_node_t*	task;
+	rtr_split_node_t*	stop;
+	rtr_split_node_t*	cur;
+	rec_t*			rec;
+	buf_block_t*		block;
+	page_t*			page;
+	ulint			n_uniq;
+	ulint			len;
+	byte*			source_cur;
+
+	block = btr_cur_get_block(cursor);
+	page = buf_block_get_frame(block);
+	n_uniq = dict_index_get_n_unique_in_tree(cursor->index);
+
+	n_recs = page_get_n_recs(page) + 1;
+
+	/*We reserve 2 MBRs memory space for temp result of split
+	algrithm. And plus the new mbr that need to insert, we
+	need (n_recs + 3)*MBR size for storing all MBRs.*/
+	buf = static_cast<double*>(mem_heap_alloc(
+		heap, DATA_MBR_LEN * (n_recs + 3)
+		+ sizeof(rtr_split_node_t) * (n_recs + 1)));
+
+	split_node_array = (rtr_split_node_t*)(buf + SPDIMS * 2 * (n_recs + 3));
+	task = split_node_array;
+	*buf_pos = buf;
+	stop = task + n_recs;
+
+	rec = page_rec_get_next(page_get_infimum_rec(page));
+	*offsets = rec_get_offsets(rec, cursor->index, *offsets,
+				   n_uniq, &heap);
+
+	source_cur = rec_get_nth_field(rec, *offsets, 0, &len);
+
+	for (cur = task; cur < stop - 1; ++cur) {
+		cur->coords = reserve_coords(buf_pos, SPDIMS);
+		cur->key = rec;
+
+		memcpy(cur->coords, source_cur, DATA_MBR_LEN);
+
+		rec = page_rec_get_next(rec);
+		*offsets = rec_get_offsets(rec, cursor->index, *offsets,
+					   n_uniq, &heap);
+		source_cur = rec_get_nth_field(rec, *offsets, 0, &len);
+	}
+
+	/* Put the insert key to node list */
+	source_cur = static_cast<byte*>(dfield_get_data(
+		dtuple_get_nth_field(tuple, 0)));
+	cur->coords = reserve_coords(buf_pos, SPDIMS);
+	rec = (byte*) mem_heap_alloc(
+		heap, rec_get_converted_size(cursor->index, tuple, 0));
+
+	rec = rec_convert_dtuple_to_rec(rec, cursor->index, tuple, 0);
+	cur->key = rec;
+
+	memcpy(cur->coords, source_cur, DATA_MBR_LEN);
+
+	return split_node_array;
+}
+
+/**********************************************************************//**
+Builds a Rtree node pointer out of a physical record and a page number.
+Note: For Rtree, we just keep the mbr and page no field in non-leaf level
+page. It's different with Btree, Btree still keeps PK fields so far.
+@return	own: node pointer */
+dtuple_t*
+rtr_index_build_node_ptr(
+/*=====================*/
+	const dict_index_t*	index,	/*!< in: index */
+	const rtr_mbr_t*	mbr,	/*!< in: mbr of lower page */
+	const rec_t*		rec,	/*!< in: record for which to build node
+					pointer */
+	ulint			page_no,/*!< in: page number to put in node
+					pointer */
+	mem_heap_t*		heap,	/*!< in: memory heap where pointer
+					created */
+	ulint			level)	/*!< in: level of rec in tree:
+					0 means leaf level */
+{
+	dtuple_t*	tuple;
+	dfield_t*	field;
+	byte*		buf;
+	ulint		n_unique;
+	ulint		info_bits;
+
+	ut_ad(dict_index_is_spatial(index));
+
+	n_unique = DICT_INDEX_SPATIAL_NODEPTR_SIZE;
+
+	tuple = dtuple_create(heap, n_unique + 1);
+
+	/* For rtree internal node, we need to compare page number
+	fields. */
+	dtuple_set_n_fields_cmp(tuple, n_unique + 1);
+
+	dict_index_copy_types(tuple, index, n_unique);
+
+	/* Write page no field */
+	buf = static_cast<byte*>(mem_heap_alloc(heap, 4));
+
+	mach_write_to_4(buf, page_no);
+
+	field = dtuple_get_nth_field(tuple, n_unique);
+	dfield_set_data(field, buf, 4);
+
+	dtype_set(dfield_get_type(field), DATA_SYS_CHILD, DATA_NOT_NULL, 4);
+
+	/* Set info bits. */
+	info_bits = rec_get_info_bits(rec, dict_table_is_comp(index->table));
+	dtuple_set_info_bits(tuple, info_bits | REC_STATUS_NODE_PTR);
+
+	/* Set mbr as index entry data */
+	field = dtuple_get_nth_field(tuple, 0);
+
+	buf = static_cast<byte*>(mem_heap_alloc(heap, DATA_MBR_LEN));
+
+	rtr_write_mbr(buf, mbr);
+
+	dfield_set_data(field, buf, DATA_MBR_LEN);
+
+	ut_ad(dtuple_check_typed(tuple));
+
+	return(tuple);
+}
+
+/**************************************************************//**
+In-place update the mbr field of a spatial index row.
+@return true if update is successful */
+static
+bool
+rtr_update_mbr_field_in_place(
+/*==========================*/
+	dict_index_t*	index,		/*!< in: spatial index. */
+	rec_t*		rec,		/*!< in/out: rec to be modified.*/
+	ulint*		offsets,	/*!< in/out: offsets on rec. */
+	rtr_mbr_t*	mbr,		/*!< in: the new mbr. */
+	mtr_t*		mtr)		/*!< in: mtr */
+{
+	void*		new_mbr_ptr;
+	double		new_mbr[SPDIMS * 2];
+	byte*		log_ptr;
+	page_t*		page = page_align(rec);
+	ulint		len = DATA_MBR_LEN;
+	ulint		flags = BTR_NO_UNDO_LOG_FLAG
+			| BTR_NO_LOCKING_FLAG
+			| BTR_KEEP_SYS_FLAG;
+	ulint		rec_info;
+
+	rtr_write_mbr(reinterpret_cast<byte*>(&new_mbr), mbr);
+	new_mbr_ptr = static_cast<void*>(new_mbr);
+	/* Otherwise, set the mbr to the new_mbr. */
+	rec_set_nth_field(rec, offsets, 0, new_mbr_ptr, len);
+
+	rec_info = rec_get_info_bits(rec, rec_offs_comp(offsets));
+
+	/* Write redo log. */
+	/* For now, we use LOG_REC_UPDATE_IN_PLACE to log this enlarge.
+	In the future, we may need to add a new log type for this. */
+	log_ptr = mlog_open_and_write_index(mtr, rec, index, page_is_comp(page)
+					    ? MLOG_COMP_REC_UPDATE_IN_PLACE
+					    : MLOG_REC_UPDATE_IN_PLACE,
+					    1 + DATA_ROLL_PTR_LEN + 14 + 2
+					    + MLOG_BUF_MARGIN);
+
+	if (!log_ptr) {
+		/* Logging in mtr is switched off during
+		crash recovery */
+		return(false);
+	}
+
+	/* Flags */
+	mach_write_to_1(log_ptr, flags);
+	log_ptr++;
+	/* TRX_ID Position */
+	log_ptr += mach_write_compressed(log_ptr, 0);
+	/* ROLL_PTR */
+	trx_write_roll_ptr(log_ptr, 0);
+	log_ptr += DATA_ROLL_PTR_LEN;
+	/* TRX_ID */
+	log_ptr += mach_u64_write_compressed(log_ptr, 0);
+
+	/* Offset */
+	mach_write_to_2(log_ptr, page_offset(rec));
+	log_ptr += 2;
+	/* Info bits */
+	mach_write_to_1(log_ptr, rec_info);
+	log_ptr++;
+	/* N fields */
+	log_ptr += mach_write_compressed(log_ptr, 1);
+	/* Field no, len */
+	log_ptr += mach_write_compressed(log_ptr, 0);
+	log_ptr += mach_write_compressed(log_ptr, len);
+	/* Data */
+	memcpy(log_ptr, new_mbr_ptr, len);
+	log_ptr += len;
+
+	mlog_close(mtr, log_ptr);
+
+	return(true);
+}
+
+/**************************************************************//**
+Update the mbr field of a spatial index row.
+@return true if update is successful */
+bool
+rtr_update_mbr_field(
+/*=================*/
+	btr_cur_t*	cursor,		/*!< in/out: cursor pointed to rec.*/
+	ulint*		offsets,	/*!< in/out: offsets on rec. */
+	btr_cur_t*	cursor2,	/*!< in/out: cursor pointed to rec
+					that should be deleted.
+					this cursor is for btr_compress to
+					delete the merged page's father rec.*/
+	page_t*		child_page,	/*!< in: child page. */
+	rtr_mbr_t*	mbr,		/*!< in: the new mbr. */
+	rec_t*		new_rec,	/*!< in: rec to use */
+	mtr_t*		mtr)		/*!< in: mtr */
+{
+	dict_index_t*	index = cursor->index;
+	mem_heap_t*	heap;
+	page_t*		page;
+	rec_t*		rec;
+	ulint		flags = BTR_NO_UNDO_LOG_FLAG
+			| BTR_NO_LOCKING_FLAG
+			| BTR_KEEP_SYS_FLAG;
+	dberr_t		err;
+	big_rec_t*	dummy_big_rec;
+	buf_block_t*	block;
+	rec_t*		child_rec;
+	ulint		up_match = 0;
+	ulint		low_match = 0;
+	ulint		child;
+	ulint		level;
+	ulint		rec_info;
+	page_zip_des_t*	page_zip;
+	bool		ins_suc = true;
+	ulint		cur2_pos = 0;
+	ulint		del_page_no = 0;
+	ulint*		offsets2;
+
+	rec = btr_cur_get_rec(cursor);
+	page = page_align(rec);
+
+	rec_info = rec_get_info_bits(rec, rec_offs_comp(offsets));
+
+	heap = mem_heap_create(100);
+	block = btr_cur_get_block(cursor);
+	ut_ad(page == buf_block_get_frame(block));
+	page_zip = buf_block_get_page_zip(block);
+
+	child = btr_node_ptr_get_child_page_no(rec, offsets);
+	level = btr_page_get_level(buf_block_get_frame(block), mtr);
+
+	if (new_rec) {
+		child_rec = new_rec;
+	} else {
+		child_rec = page_rec_get_next(page_get_infimum_rec(child_page));
+	}
+
+	dtuple_t* node_ptr = rtr_index_build_node_ptr(
+		index, mbr, child_rec, child, heap, level);
+
+	/* We need to remember the child page no of cursor2, since page could be
+	reorganized or insert a new rec before it. */
+	if (cursor2) {
+		rec_t*	del_rec = btr_cur_get_rec(cursor2);
+		offsets2 = rec_get_offsets(btr_cur_get_rec(cursor2),
+					   index, NULL,
+					   ULINT_UNDEFINED, &heap);
+		del_page_no = btr_node_ptr_get_child_page_no(del_rec, offsets2);
+		cur2_pos = page_rec_get_n_recs_before(btr_cur_get_rec(cursor2));
+	}
+
+	if (rec_info & REC_INFO_MIN_REC_FLAG) {
+		/* When the rec is minimal rec in this level, we do
+		 in-place update for avoiding it move to other place. */
+
+		if (page_zip) {
+			/* Check if there's enough space for in-place
+			update the zip page. */
+			if (!btr_cur_update_alloc_zip(
+					page_zip,
+					btr_cur_get_page_cur(cursor),
+					index, offsets,
+					rec_offs_size(offsets),
+					false, mtr)) {
+
+				/* If there's not enought space for
+				inplace update zip page, we do delete
+				insert. */
+				ins_suc = false;
+
+				/* Since btr_cur_update_alloc_zip could
+				reorganize the page, we need to repositon
+				cursor2. */
+				if (cursor2) {
+					cursor2->page_cur.rec =
+						page_rec_get_nth(page,
+								 cur2_pos);
+				}
+
+				goto update_mbr;
+			}
+
+			/* Record could be repositioned */
+			rec = btr_cur_get_rec(cursor);
+
+#ifdef UNIV_DEBUG
+			/* Make sure it is still the first record */
+			rec_info = rec_get_info_bits(
+					rec, rec_offs_comp(offsets));
+			ut_ad(rec_info & REC_INFO_MIN_REC_FLAG);
+#endif /* UNIV_DEBUG */
+		}
+
+		if (!rtr_update_mbr_field_in_place(index, rec,
+						   offsets, mbr, mtr)) {
+			return(false);
+		}
+
+		if (page_zip) {
+			page_zip_write_rec(page_zip, rec, index, offsets, 0);
+		}
+
+		if (cursor2) {
+			ulint* offsets2;
+
+			if (page_zip) {
+				cursor2->page_cur.rec
+					= page_rec_get_nth(page, cur2_pos);
+			}
+			offsets2 = rec_get_offsets(btr_cur_get_rec(cursor2),
+						   index, NULL,
+						   ULINT_UNDEFINED, &heap);
+			ut_ad(del_page_no == btr_node_ptr_get_child_page_no(
+							cursor2->page_cur.rec,
+							offsets2));
+
+			page_cur_delete_rec(btr_cur_get_page_cur(cursor2),
+					    index, offsets2, mtr);
+		}
+	} else if (page_get_n_recs(page) == 1) {
+		/* When there's only one rec in the page, we do insert/delete to
+		avoid page merge. */
+
+		page_cur_t		page_cur;
+		rec_t*			insert_rec;
+		ulint*			insert_offsets = NULL;
+		ulint			old_pos;
+		rec_t*			old_rec;
+
+		ut_ad(cursor2 == NULL);
+
+		/* Insert the new mbr rec. */
+		old_pos = page_rec_get_n_recs_before(rec);
+
+		err = btr_cur_optimistic_insert(
+			flags,
+			cursor, &insert_offsets, &heap,
+			node_ptr, &insert_rec, &dummy_big_rec, 0, NULL, mtr);
+
+		ut_ad(err == DB_SUCCESS);
+
+		btr_cur_position(index, insert_rec, block, cursor);
+
+		/* Delete the old mbr rec. */
+		old_rec = page_rec_get_nth(page, old_pos);
+		ut_ad(old_rec != insert_rec);
+
+		page_cur_position(old_rec, block, &page_cur);
+		offsets2 = rec_get_offsets(old_rec,
+					   index, NULL,
+					   ULINT_UNDEFINED, &heap);
+		page_cur_delete_rec(&page_cur, index, offsets2, mtr);
+
+	} else {
+update_mbr:
+		/* When there're not only 1 rec in the page, we do delete/insert
+		to avoid page split. */
+		rec_t*			insert_rec;
+		ulint*			insert_offsets = NULL;
+		rec_t*			next_rec;
+
+		/* Delete the rec which cursor point to. */
+		next_rec = page_rec_get_next(rec);
+		page_cur_delete_rec(btr_cur_get_page_cur(cursor),
+				    index, offsets, mtr);
+		if (!ins_suc) {
+			ut_ad(rec_info & REC_INFO_MIN_REC_FLAG);
+
+			btr_set_min_rec_mark(next_rec, mtr);
+		}
+
+		/* If there's more than 1 rec left in the page, delete
+		the rec which cursor2 point to. Otherwise, delete it later.*/
+		if (cursor2 && page_get_n_recs(page) > 1) {
+			ulint		cur2_rec_info;
+			rec_t*		cur2_rec;
+
+			cur2_rec = cursor2->page_cur.rec;
+			offsets2 = rec_get_offsets(cur2_rec, index, NULL,
+						   ULINT_UNDEFINED, &heap);
+
+			cur2_rec_info = rec_get_info_bits(cur2_rec,
+						rec_offs_comp(offsets2));
+			if (cur2_rec_info & REC_INFO_MIN_REC_FLAG) {
+				/* If we delete the leftmost node
+				pointer on a non-leaf level, we must
+				mark the new leftmost node pointer as
+				the predefined minimum record */
+				rec_t*	next_rec = page_rec_get_next(cur2_rec);
+				btr_set_min_rec_mark(next_rec, mtr);
+			}
+
+			ut_ad(del_page_no
+			      == btr_node_ptr_get_child_page_no(cur2_rec,
+								offsets2));
+			page_cur_delete_rec(btr_cur_get_page_cur(cursor2),
+					    index, offsets2, mtr);
+			cursor2 = NULL;
+		}
+
+		/* Insert the new rec. */
+		page_cur_search_with_match(block, index, node_ptr,
+					   PAGE_CUR_LE , &up_match, &low_match,
+					   btr_cur_get_page_cur(cursor), NULL);
+
+		err = btr_cur_optimistic_insert(flags, cursor, &insert_offsets,
+						&heap, node_ptr, &insert_rec,
+						&dummy_big_rec, 0, NULL, mtr);
+
+		if (!ins_suc && err == DB_SUCCESS) {
+			ins_suc = true;
+		}
+
+		/* If optimistic insert fail, try reorganize the page
+		and insert again. */
+		if (err != DB_SUCCESS && ins_suc) {
+			btr_page_reorganize(btr_cur_get_page_cur(cursor),
+					    index, mtr);
+
+			err = btr_cur_optimistic_insert(flags,
+							cursor,
+							&insert_offsets,
+							&heap,
+							node_ptr,
+							&insert_rec,
+							&dummy_big_rec,
+							0, NULL, mtr);
+
+			/* Will do pessimistic insert */
+			if (err != DB_SUCCESS) {
+				ins_suc = false;
+			}
+		}
+
+		/* Insert succeed, position cursor the inserted rec.*/
+		if (ins_suc) {
+			btr_cur_position(index, insert_rec, block, cursor);
+			offsets = rec_get_offsets(insert_rec,
+						  index, offsets,
+						  ULINT_UNDEFINED, &heap);
+		}
+
+		/* Delete the rec which cursor2 point to. */
+		if (cursor2) {
+			ulint		cur2_pno;
+			rec_t*		cur2_rec;
+
+			cursor2->page_cur.rec = page_rec_get_nth(page,
+								 cur2_pos);
+
+			cur2_rec = btr_cur_get_rec(cursor2);
+
+			offsets2 = rec_get_offsets(cur2_rec, index, NULL,
+						   ULINT_UNDEFINED, &heap);
+
+			/* If the cursor2 position is on a wrong rec, we
+			need to reposition it. */
+			cur2_pno = btr_node_ptr_get_child_page_no(cur2_rec, offsets2);
+			if ((del_page_no != cur2_pno)
+			    || (cur2_rec == insert_rec)) {
+				cur2_rec = page_rec_get_next(
+					page_get_infimum_rec(page));
+
+				while (!page_rec_is_supremum(cur2_rec)) {
+					offsets2 = rec_get_offsets(cur2_rec, index,
+								   NULL,
+								   ULINT_UNDEFINED,
+								   &heap);
+					cur2_pno = btr_node_ptr_get_child_page_no(
+							cur2_rec, offsets2);
+					if (cur2_pno == del_page_no) {
+						if (insert_rec != cur2_rec) {
+							cursor2->page_cur.rec =
+								cur2_rec;
+							break;
+						}
+					}
+					cur2_rec = page_rec_get_next(cur2_rec);
+				}
+
+				ut_ad(!page_rec_is_supremum(cur2_rec));
+			}
+
+			rec_info = rec_get_info_bits(cur2_rec,
+						     rec_offs_comp(offsets2));
+			if (rec_info & REC_INFO_MIN_REC_FLAG) {
+				/* If we delete the leftmost node
+				pointer on a non-leaf level, we must
+				mark the new leftmost node pointer as
+				the predefined minimum record */
+				rec_t*	next_rec = page_rec_get_next(cur2_rec);
+				btr_set_min_rec_mark(next_rec, mtr);
+			}
+
+			ut_ad(cur2_pno == del_page_no && cur2_rec != insert_rec);
+
+			page_cur_delete_rec(btr_cur_get_page_cur(cursor2),
+					    index, offsets2, mtr);
+		}
+
+		if (!ins_suc) {
+			mem_heap_t*	new_heap = NULL;
+
+			err = btr_cur_pessimistic_insert(
+				flags,
+				cursor, &insert_offsets, &new_heap,
+				node_ptr, &insert_rec, &dummy_big_rec,
+				0, NULL, mtr);
+
+			ut_ad(err == DB_SUCCESS);
+
+			if (new_heap) {
+				mem_heap_free(new_heap);
+			}
+
+		}
+
+		if (cursor2) {
+			btr_cur_compress_if_useful(cursor, FALSE, mtr);
+		}
+	}
+
+#ifdef UNIV_DEBUG
+	ulint	left_page_no = btr_page_get_prev(page, mtr);
+
+	if (left_page_no == FIL_NULL) {
+
+		ut_a(REC_INFO_MIN_REC_FLAG & rec_get_info_bits(
+			page_rec_get_next(page_get_infimum_rec(page)),
+			page_is_comp(page)));
+	}
+#endif /* UNIV_DEBUG */
+
+	mem_heap_free(heap);
+
+	return(true);
+}
+
+/**************************************************************//**
+Update parent page's MBR and Predicate lock information during a split */
+static MY_ATTRIBUTE((nonnull))
+void
+rtr_adjust_upper_level(
+/*===================*/
+	btr_cur_t*	sea_cur,	/*!< in: search cursor */
+	ulint		flags,		/*!< in: undo logging and
+					locking flags */
+	buf_block_t*	block,		/*!< in/out: page to be split */
+	buf_block_t*	new_block,	/*!< in/out: the new half page */
+	rtr_mbr_t*	mbr,		/*!< in: MBR on the old page */
+	rtr_mbr_t*	new_mbr,	/*!< in: MBR on the new page */
+	ulint		direction,	/*!< in: FSP_UP or FSP_DOWN */
+	mtr_t*		mtr)		/*!< in: mtr */
+{
+	page_t*		page;
+	page_t*		new_page;
+	ulint		page_no;
+	ulint		new_page_no;
+	page_zip_des_t*	page_zip;
+	page_zip_des_t*	new_page_zip;
+	dict_index_t*	index = sea_cur->index;
+	btr_cur_t	cursor;
+	ulint*		offsets;
+	mem_heap_t*	heap;
+	ulint		level;
+	dtuple_t*	node_ptr_upper;
+	ulint		prev_page_no;
+	ulint		next_page_no;
+	ulint		space;
+	page_cur_t*	page_cursor;
+	rtr_mbr_t	parent_mbr;
+	lock_prdt_t	prdt;
+	lock_prdt_t	new_prdt;
+	lock_prdt_t	parent_prdt;
+	dberr_t		err;
+	big_rec_t*	dummy_big_rec;
+	rec_t*		rec;
+
+	/* Create a memory heap where the data tuple is stored */
+	heap = mem_heap_create(1024);
+	memset(&cursor, 0, sizeof(cursor));
+
+	cursor.thr = sea_cur->thr;
+
+	/* Get the level of the split pages */
+	level = btr_page_get_level(buf_block_get_frame(block), mtr);
+	ut_ad(level
+	      == btr_page_get_level(buf_block_get_frame(new_block), mtr));
+
+	page = buf_block_get_frame(block);
+	page_no = block->page.id.page_no();
+	page_zip = buf_block_get_page_zip(block);
+
+	new_page = buf_block_get_frame(new_block);
+	new_page_no = new_block->page.id.page_no();
+	new_page_zip = buf_block_get_page_zip(new_block);
+
+	/* Set new mbr for the old page on the upper level. */
+	/* Look up the index for the node pointer to page */
+	offsets = rtr_page_get_father_block(
+		NULL, heap, index, block, mtr, sea_cur, &cursor);
+
+	page_cursor = btr_cur_get_page_cur(&cursor);
+
+	rtr_get_mbr_from_rec(page_cursor->rec, offsets, &parent_mbr);
+
+	rtr_update_mbr_field(&cursor, offsets, NULL, page, mbr, NULL, mtr);
+
+	/* Already updated parent MBR, reset in our path */
+	if (sea_cur->rtr_info) {
+		node_visit_t*	node_visit = rtr_get_parent_node(
+						sea_cur, level + 1, true);
+		if (node_visit) {
+			node_visit->mbr_inc = 0;
+		}
+	}
+
+	/* Insert the node for the new page. */
+	node_ptr_upper = rtr_index_build_node_ptr(
+		index, new_mbr,
+		page_rec_get_next(page_get_infimum_rec(new_page)),
+		new_page_no, heap, level);
+
+	ulint	up_match = 0;
+	ulint	low_match = 0;
+
+	buf_block_t*	father_block = btr_cur_get_block(&cursor);
+
+	page_cur_search_with_match(
+		father_block, index, node_ptr_upper,
+		PAGE_CUR_LE , &up_match, &low_match,
+		btr_cur_get_page_cur(&cursor), NULL);
+
+	err = btr_cur_optimistic_insert(
+		flags
+		| BTR_NO_LOCKING_FLAG
+		| BTR_KEEP_SYS_FLAG
+		| BTR_NO_UNDO_LOG_FLAG,
+		&cursor, &offsets, &heap,
+		node_ptr_upper, &rec, &dummy_big_rec, 0, NULL, mtr);
+
+	if (err == DB_FAIL) {
+		cursor.rtr_info = sea_cur->rtr_info;
+		cursor.tree_height = sea_cur->tree_height;
+
+		err = btr_cur_pessimistic_insert(flags
+						 | BTR_NO_LOCKING_FLAG
+						 | BTR_KEEP_SYS_FLAG
+						 | BTR_NO_UNDO_LOG_FLAG,
+						 &cursor, &offsets, &heap,
+						 node_ptr_upper, &rec,
+						 &dummy_big_rec, 0, NULL, mtr);
+		cursor.rtr_info = NULL;
+		ut_a(err == DB_SUCCESS);
+	}
+
+	prdt.data = static_cast<void*>(mbr);
+	prdt.op = 0;
+	new_prdt.data = static_cast<void*>(new_mbr);
+	new_prdt.op = 0;
+	parent_prdt.data = static_cast<void*>(&parent_mbr);
+	parent_prdt.op = 0;
+
+	lock_prdt_update_parent(block, new_block, &prdt, &new_prdt,
+				&parent_prdt, dict_index_get_space(index),
+				page_cursor->block->page.id.page_no());
+
+	mem_heap_free(heap);
+
+	/* Get the previous and next pages of page */
+	prev_page_no = btr_page_get_prev(page, mtr);
+	next_page_no = btr_page_get_next(page, mtr);
+	space = block->page.id.space();
+	const page_size_t&	page_size = dict_table_page_size(index->table);
+
+	/* Update page links of the level */
+	if (prev_page_no != FIL_NULL) {
+		page_id_t	prev_page_id(space, prev_page_no);
+
+		buf_block_t*	prev_block = btr_block_get(
+			prev_page_id, page_size, RW_X_LATCH, index, mtr);
+#ifdef UNIV_BTR_DEBUG
+		ut_a(page_is_comp(prev_block->frame) == page_is_comp(page));
+		ut_a(btr_page_get_next(prev_block->frame, mtr)
+		     == block->page.id.page_no());
+#endif /* UNIV_BTR_DEBUG */
+
+		btr_page_set_next(buf_block_get_frame(prev_block),
+				  buf_block_get_page_zip(prev_block),
+				  page_no, mtr);
+	}
+
+	if (next_page_no != FIL_NULL) {
+		page_id_t	next_page_id(space, next_page_no);
+
+		buf_block_t*	next_block = btr_block_get(
+			next_page_id, page_size, RW_X_LATCH, index, mtr);
+#ifdef UNIV_BTR_DEBUG
+		ut_a(page_is_comp(next_block->frame) == page_is_comp(page));
+		ut_a(btr_page_get_prev(next_block->frame, mtr)
+		     == page_get_page_no(page));
+#endif /* UNIV_BTR_DEBUG */
+
+		btr_page_set_prev(buf_block_get_frame(next_block),
+				  buf_block_get_page_zip(next_block),
+				  new_page_no, mtr);
+	}
+
+	btr_page_set_prev(page, page_zip, prev_page_no, mtr);
+	btr_page_set_next(page, page_zip, new_page_no, mtr);
+
+	btr_page_set_prev(new_page, new_page_zip, page_no, mtr);
+	btr_page_set_next(new_page, new_page_zip, next_page_no, mtr);
+}
+
+/*************************************************************//**
+Moves record list to another page for rtree splitting.
+
+IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
+if new_block is a compressed leaf page in a secondary index.
+This has to be done either within the same mini-transaction,
+or by invoking ibuf_reset_free_bits() before mtr_commit().
+
+@return TRUE on success; FALSE on compression failure */
+ibool
+rtr_split_page_move_rec_list(
+/*=========================*/
+	rtr_split_node_t*	node_array,	/*!< in: split node array. */
+	int			first_rec_group,/*!< in: group number of the
+						first rec. */
+	buf_block_t*		new_block,	/*!< in/out: index page
+						where to move */
+	buf_block_t*		block,		/*!< in/out: page containing
+						split_rec */
+	rec_t*			first_rec,	/*!< in: first record not to
+						move */
+	dict_index_t*		index,		/*!< in: record descriptor */
+	mem_heap_t*		heap,		/*!< in: pointer to memory
+						heap, or NULL */
+	mtr_t*			mtr)		/*!< in: mtr */
+{
+	rtr_split_node_t*	cur_split_node;
+	rtr_split_node_t*	end_split_node;
+	page_cur_t		page_cursor;
+	page_cur_t		new_page_cursor;
+	page_t*			page;
+	page_t*			new_page;
+	ulint			offsets_[REC_OFFS_NORMAL_SIZE];
+	ulint*			offsets		= offsets_;
+	page_zip_des_t*		new_page_zip
+		= buf_block_get_page_zip(new_block);
+	rec_t*			rec;
+	rec_t*			ret;
+	ulint			moved		= 0;
+	ulint			max_to_move	= 0;
+	rtr_rec_move_t*		rec_move	= NULL;
+
+	rec_offs_init(offsets_);
+
+	page_cur_set_before_first(block, &page_cursor);
+	page_cur_set_before_first(new_block, &new_page_cursor);
+
+	page = buf_block_get_frame(block);
+	new_page = buf_block_get_frame(new_block);
+	ret = page_rec_get_prev(page_get_supremum_rec(new_page));
+
+	end_split_node = node_array + page_get_n_recs(page);
+
+	mtr_log_t	log_mode = MTR_LOG_NONE;
+
+	if (new_page_zip) {
+		log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
+	}
+
+	max_to_move = page_get_n_recs(
+				buf_block_get_frame(block));
+	rec_move = static_cast<rtr_rec_move_t*>(mem_heap_alloc(
+			heap,
+			sizeof (*rec_move) * max_to_move));
+
+	/* Insert the recs in group 2 to new page.  */
+	for (cur_split_node = node_array;
+	     cur_split_node < end_split_node; ++cur_split_node) {
+		if (cur_split_node->n_node != first_rec_group) {
+			lock_rec_store_on_page_infimum(
+				block, cur_split_node->key);
+
+			offsets = rec_get_offsets(cur_split_node->key,
+						  index, offsets,
+						  ULINT_UNDEFINED, &heap);
+
+			ut_ad (cur_split_node->key != first_rec
+			       || !page_is_leaf(page));
+
+			rec = page_cur_insert_rec_low(
+					page_cur_get_rec(&new_page_cursor),
+					index,
+					cur_split_node->key,
+					offsets,
+					mtr);
+
+			ut_a(rec);
+
+			lock_rec_restore_from_page_infimum(
+				new_block, rec, block);
+
+			page_cur_move_to_next(&new_page_cursor);
+
+			rec_move[moved].new_rec = rec;
+			rec_move[moved].old_rec = cur_split_node->key;
+			rec_move[moved].moved = false;
+			moved++;
+
+			if (moved > max_to_move) {
+				ut_ad(0);
+				break;
+			}
+		}
+	}
+
+	/* Update PAGE_MAX_TRX_ID on the uncompressed page.
+	Modifications will be redo logged and copied to the compressed
+	page in page_zip_compress() or page_zip_reorganize() below.
+	Multiple transactions cannot simultaneously operate on the
+	same temp-table in parallel.
+	max_trx_id is ignored for temp tables because it not required
+	for MVCC. */
+	if (dict_index_is_sec_or_ibuf(index)
+	    && page_is_leaf(page)
+	    && !dict_table_is_temporary(index->table)) {
+		page_update_max_trx_id(new_block, NULL,
+				       page_get_max_trx_id(page),
+				       mtr);
+	}
+
+	if (new_page_zip) {
+		mtr_set_log_mode(mtr, log_mode);
+
+		if (!page_zip_compress(new_page_zip, new_page, index,
+				       page_zip_level, NULL, mtr)) {
+			ulint	ret_pos;
+
+			/* Before trying to reorganize the page,
+			store the number of preceding records on the page. */
+			ret_pos = page_rec_get_n_recs_before(ret);
+			/* Before copying, "ret" was the predecessor
+			of the predefined supremum record.  If it was
+			the predefined infimum record, then it would
+			still be the infimum, and we would have
+			ret_pos == 0. */
+
+			if (UNIV_UNLIKELY
+			    (!page_zip_reorganize(new_block, index, mtr))) {
+
+				if (UNIV_UNLIKELY
+				    (!page_zip_decompress(new_page_zip,
+							  new_page, FALSE))) {
+					ut_error;
+				}
+#ifdef UNIV_GIS_DEBUG
+				ut_ad(page_validate(new_page, index));
+#endif
+
+				return(false);
+			}
+
+			/* The page was reorganized: Seek to ret_pos. */
+			ret = page_rec_get_nth(new_page, ret_pos);
+		}
+	}
+
+	/* Update the lock table */
+	lock_rtr_move_rec_list(new_block, block, rec_move, moved);
+
+	/* Delete recs in second group from the old page. */
+	for (cur_split_node = node_array;
+	     cur_split_node < end_split_node; ++cur_split_node) {
+		if (cur_split_node->n_node != first_rec_group) {
+			page_cur_position(cur_split_node->key,
+					  block, &page_cursor);
+			offsets = rec_get_offsets(
+				page_cur_get_rec(&page_cursor), index,
+				offsets, ULINT_UNDEFINED,
+				&heap);
+			page_cur_delete_rec(&page_cursor,
+				index, offsets, mtr);
+		}
+	}
+
+	return(true);
+}
+
+/*************************************************************//**
+Splits an R-tree index page to halves and inserts the tuple. It is assumed
+that mtr holds an x-latch to the index tree. NOTE: the tree x-latch is
+released within this function! NOTE that the operation of this
+function must always succeed, we cannot reverse it: therefore enough
+free disk space (2 pages) must be guaranteed to be available before
+this function is called.
+@return inserted record */
+rec_t*
+rtr_page_split_and_insert(
+/*======================*/
+	ulint		flags,	/*!< in: undo logging and locking flags */
+	btr_cur_t*	cursor,	/*!< in/out: cursor at which to insert; when the
+				function returns, the cursor is positioned
+				on the predecessor of the inserted record */
+	ulint**		offsets,/*!< out: offsets on inserted record */
+	mem_heap_t**	heap,	/*!< in/out: pointer to memory heap, or NULL */
+	const dtuple_t*	tuple,	/*!< in: tuple to insert */
+	ulint		n_ext,	/*!< in: number of externally stored columns */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	buf_block_t*		block;
+	page_t*			page;
+	page_t*			new_page;
+	ulint			page_no;
+	byte			direction;
+	ulint			hint_page_no;
+	buf_block_t*		new_block;
+	page_zip_des_t*		page_zip;
+	page_zip_des_t*		new_page_zip;
+	buf_block_t*		insert_block;
+	page_cur_t*		page_cursor;
+	rec_t*			rec = 0;
+	ulint			n_recs;
+	ulint			total_data;
+	ulint			insert_size;
+	rtr_split_node_t*	rtr_split_node_array;
+	rtr_split_node_t*	cur_split_node;
+	rtr_split_node_t*	end_split_node;
+	double*			buf_pos;
+	ulint			page_level;
+	node_seq_t		current_ssn;
+	node_seq_t		next_ssn;
+	buf_block_t*		root_block;
+	rtr_mbr_t		mbr;
+	rtr_mbr_t		new_mbr;
+	lock_prdt_t		prdt;
+	lock_prdt_t		new_prdt;
+	rec_t*			first_rec = NULL;
+	int			first_rec_group = 1;
+	ulint			n_iterations = 0;
+
+	if (!*heap) {
+		*heap = mem_heap_create(1024);
+	}
+
+func_start:
+	mem_heap_empty(*heap);
+	*offsets = NULL;
+
+	ut_ad(mtr_memo_contains_flagged(mtr, dict_index_get_lock(cursor->index),
+					MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK));
+	ut_ad(!dict_index_is_online_ddl(cursor->index)
+	      || (flags & BTR_CREATE_FLAG)
+	      || dict_index_is_clust(cursor->index));
+	ut_ad(rw_lock_own_flagged(dict_index_get_lock(cursor->index),
+				  RW_LOCK_FLAG_X | RW_LOCK_FLAG_SX));
+
+	block = btr_cur_get_block(cursor);
+	page = buf_block_get_frame(block);
+	page_zip = buf_block_get_page_zip(block);
+	page_level = btr_page_get_level(page, mtr);
+	current_ssn = page_get_ssn_id(page);
+
+	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(page_get_n_recs(page) >= 1);
+
+	page_no = block->page.id.page_no();
+
+	if (btr_page_get_prev(page, mtr) == FIL_NULL && !page_is_leaf(page)) {
+		first_rec = page_rec_get_next(
+			page_get_infimum_rec(buf_block_get_frame(block)));
+	}
+
+	/* Initial split nodes array. */
+	rtr_split_node_array = rtr_page_split_initialize_nodes(
+		*heap, cursor, offsets, tuple, &buf_pos);
+
+	/* Divide all mbrs to two groups. */
+	n_recs = page_get_n_recs(page) + 1;
+
+	end_split_node = rtr_split_node_array + n_recs;
+
+#ifdef UNIV_GIS_DEBUG
+	fprintf(stderr, "Before split a page:\n");
+	for (cur_split_node = rtr_split_node_array;
+		cur_split_node < end_split_node; ++cur_split_node) {
+		for (int i = 0; i < SPDIMS * 2; i++) {
+			fprintf(stderr, "%.2lf ",
+			        *(cur_split_node->coords + i));
+		}
+		fprintf(stderr, "\n");
+	}
+#endif
+
+	insert_size = rec_get_converted_size(cursor->index, tuple, n_ext);
+	total_data = page_get_data_size(page) + insert_size;
+	first_rec_group = split_rtree_node(rtr_split_node_array,
+					   static_cast<int>(n_recs),
+					   static_cast<int>(total_data),
+					   static_cast<int>(insert_size),
+					   0, 2, 2, &buf_pos, SPDIMS,
+					   static_cast<uchar*>(first_rec));
+
+	/* Allocate a new page to the index */
+	direction = FSP_UP;
+	hint_page_no = page_no + 1;
+	new_block = btr_page_alloc(cursor->index, hint_page_no, direction,
+				   page_level, mtr, mtr);
+	new_page_zip = buf_block_get_page_zip(new_block);
+	btr_page_create(new_block, new_page_zip, cursor->index,
+			page_level, mtr);
+
+	new_page = buf_block_get_frame(new_block);
+	ut_ad(page_get_ssn_id(new_page) == 0);
+
+	/* Set new ssn to the new page and page. */
+	page_set_ssn_id(new_block, new_page_zip, current_ssn, mtr);
+	next_ssn = rtr_get_new_ssn_id(cursor->index);
+
+	page_set_ssn_id(block, page_zip, next_ssn, mtr);
+
+	/* Keep recs in first group to the old page, move recs in second
+	groups to the new page. */
+	if (0
+#ifdef UNIV_ZIP_COPY
+	    || page_zip
+#endif
+	    || !rtr_split_page_move_rec_list(rtr_split_node_array,
+					     first_rec_group,
+					     new_block, block, first_rec,
+					     cursor->index, *heap, mtr)) {
+		ulint			n		= 0;
+		rec_t*			rec;
+		ulint			moved		= 0;
+		ulint			max_to_move	= 0;
+		rtr_rec_move_t*		rec_move	= NULL;
+		ulint			pos;
+
+		/* For some reason, compressing new_page failed,
+		even though it should contain fewer records than
+		the original page.  Copy the page byte for byte
+		and then delete the records from both pages
+		as appropriate.  Deleting will always succeed. */
+		ut_a(new_page_zip);
+
+		page_zip_copy_recs(new_page_zip, new_page,
+				   page_zip, page, cursor->index, mtr);
+
+		page_cursor = btr_cur_get_page_cur(cursor);
+
+		/* Move locks on recs. */
+		max_to_move = page_get_n_recs(page);
+		rec_move = static_cast<rtr_rec_move_t*>(mem_heap_alloc(
+				*heap,
+				sizeof (*rec_move) * max_to_move));
+
+		/* Init the rec_move array for moving lock on recs.  */
+		for (cur_split_node = rtr_split_node_array;
+		     cur_split_node < end_split_node - 1; ++cur_split_node) {
+			if (cur_split_node->n_node != first_rec_group) {
+				pos = page_rec_get_n_recs_before(
+					cur_split_node->key);
+				rec = page_rec_get_nth(new_page, pos);
+				ut_a(rec);
+
+				rec_move[moved].new_rec = rec;
+				rec_move[moved].old_rec = cur_split_node->key;
+				rec_move[moved].moved = false;
+				moved++;
+
+				if (moved > max_to_move) {
+					ut_ad(0);
+					break;
+				}
+			}
+		}
+
+		/* Update the lock table */
+		lock_rtr_move_rec_list(new_block, block, rec_move, moved);
+
+		/* Delete recs in first group from the new page. */
+		for (cur_split_node = rtr_split_node_array;
+		     cur_split_node < end_split_node - 1; ++cur_split_node) {
+			if (cur_split_node->n_node == first_rec_group) {
+				ulint	pos;
+
+				pos = page_rec_get_n_recs_before(
+						cur_split_node->key);
+				ut_a(pos > 0);
+				rec_t* new_rec = page_rec_get_nth(new_page,
+								  pos - n);
+
+				ut_a(new_rec && page_rec_is_user_rec(new_rec));
+				page_cur_position(new_rec, new_block,
+						  page_cursor);
+
+				*offsets = rec_get_offsets(
+					page_cur_get_rec(page_cursor),
+					cursor->index,
+					*offsets, ULINT_UNDEFINED,
+					heap);
+
+				page_cur_delete_rec(page_cursor,
+					cursor->index, *offsets, mtr);
+				n++;
+			}
+		}
+
+		/* Delete recs in second group from the old page. */
+		for (cur_split_node = rtr_split_node_array;
+		     cur_split_node < end_split_node - 1; ++cur_split_node) {
+			if (cur_split_node->n_node != first_rec_group) {
+				page_cur_position(cur_split_node->key,
+						  block, page_cursor);
+				*offsets = rec_get_offsets(
+					page_cur_get_rec(page_cursor),
+					cursor->index,
+					*offsets, ULINT_UNDEFINED,
+					heap);
+				page_cur_delete_rec(page_cursor,
+					cursor->index, *offsets, mtr);
+			}
+		}
+
+#ifdef UNIV_GIS_DEBUG
+		ut_ad(page_validate(new_page, cursor->index));
+		ut_ad(page_validate(page, cursor->index));
+#endif
+	}
+
+	/* Insert the new rec to the proper page. */
+	cur_split_node = end_split_node - 1;
+	if (cur_split_node->n_node != first_rec_group) {
+		insert_block = new_block;
+	} else {
+		insert_block = block;
+	}
+
+	/* Reposition the cursor for insert and try insertion */
+	page_cursor = btr_cur_get_page_cur(cursor);
+
+	page_cur_search(insert_block, cursor->index, tuple,
+			PAGE_CUR_LE, page_cursor);
+
+	/* It's possible that the new record is too big to be inserted into
+	the page, and it'll need the second round split in this case.
+	We test this scenario here*/
+	DBUG_EXECUTE_IF("rtr_page_need_second_split",
+			if (n_iterations == 0) {
+				rec = NULL;
+				goto after_insert; }
+	);
+
+	rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index,
+				    offsets, heap, n_ext, mtr);
+
+	/* If insert did not fit, try page reorganization.
+	For compressed pages, page_cur_tuple_insert() will have
+	attempted this already. */
+	if (rec == NULL) {
+		if (!page_cur_get_page_zip(page_cursor)
+		    && btr_page_reorganize(page_cursor, cursor->index, mtr)) {
+			rec = page_cur_tuple_insert(page_cursor, tuple,
+						    cursor->index, offsets,
+						    heap, n_ext, mtr);
+
+		}
+		/* If insert fail, we will try to split the insert_block
+		again. */
+	}
+
+#ifdef UNIV_DEBUG
+after_insert:
+#endif
+	/* Calculate the mbr on the upper half-page, and the mbr on
+	original page. */
+	rtr_page_cal_mbr(cursor->index, block, &mbr, *heap);
+	rtr_page_cal_mbr(cursor->index, new_block, &new_mbr, *heap);
+	prdt.data = &mbr;
+	new_prdt.data = &new_mbr;
+
+	/* Check any predicate locks need to be moved/copied to the
+	new page */
+	lock_prdt_update_split(block, new_block, &prdt, &new_prdt,
+			       dict_index_get_space(cursor->index), page_no);
+
+	/* Adjust the upper level. */
+	rtr_adjust_upper_level(cursor, flags, block, new_block,
+			       &mbr, &new_mbr, direction, mtr);
+
+	/* Save the new ssn to the root page, since we need to reinit
+	the first ssn value from it after restart server. */
+
+	root_block = btr_root_block_get(cursor->index, RW_SX_LATCH, mtr);
+
+	page_zip = buf_block_get_page_zip(root_block);
+	page_set_ssn_id(root_block, page_zip, next_ssn, mtr);
+
+	/* Insert fit on the page: update the free bits for the
+	left and right pages in the same mtr */
+
+	if (page_is_leaf(page)) {
+		ibuf_update_free_bits_for_two_pages_low(
+			block, new_block, mtr);
+	}
+
+
+	/* If the new res insert fail, we need to do another split
+	 again. */
+	if (!rec) {
+		/* We play safe and reset the free bits for new_page */
+		if (!dict_index_is_clust(cursor->index)
+		    && !dict_table_is_temporary(cursor->index->table)) {
+			ibuf_reset_free_bits(new_block);
+			ibuf_reset_free_bits(block);
+		}
+
+		/* We need to clean the parent path here and search father
+		node later, otherwise, it's possible that find a wrong
+		parent. */
+		rtr_clean_rtr_info(cursor->rtr_info, true);
+		cursor->rtr_info = NULL;
+		n_iterations++;
+
+		rec_t* i_rec = page_rec_get_next(page_get_infimum_rec(
+			buf_block_get_frame(block)));
+		btr_cur_position(cursor->index, i_rec, block, cursor);
+
+		goto func_start;
+	}
+
+#ifdef UNIV_GIS_DEBUG
+	ut_ad(page_validate(buf_block_get_frame(block), cursor->index));
+	ut_ad(page_validate(buf_block_get_frame(new_block), cursor->index));
+
+	ut_ad(!rec || rec_offs_validate(rec, cursor->index, *offsets));
+#endif
+	MONITOR_INC(MONITOR_INDEX_SPLIT);
+
+	return(rec);
+}
+
+/****************************************************************//**
+Following the right link to find the proper block for insert.
+@return the proper block.*/
+dberr_t
+rtr_ins_enlarge_mbr(
+/*================*/
+	btr_cur_t*		btr_cur,	/*!< in: btr cursor */
+	que_thr_t*		thr,		/*!< in: query thread */
+	mtr_t*			mtr)		/*!< in: mtr */
+{
+	dberr_t			err = DB_SUCCESS;
+	rtr_mbr_t		new_mbr;
+	buf_block_t*		block;
+	mem_heap_t*		heap;
+	dict_index_t*		index = btr_cur->index;
+	page_cur_t*		page_cursor;
+	ulint*			offsets;
+	node_visit_t*		node_visit;
+	btr_cur_t		cursor;
+	page_t*			page;
+
+	ut_ad(dict_index_is_spatial(index));
+
+	/* If no rtr_info or rtree is one level tree, return. */
+	if (!btr_cur->rtr_info || btr_cur->tree_height == 1) {
+		return(err);
+	}
+
+	/* Check path info is not empty. */
+	ut_ad(!btr_cur->rtr_info->parent_path->empty());
+
+	/* Create a memory heap. */
+	heap = mem_heap_create(1024);
+
+	/* Leaf level page is stored in cursor */
+	page_cursor = btr_cur_get_page_cur(btr_cur);
+	block = page_cur_get_block(page_cursor);
+
+	for (ulint i = 1; i < btr_cur->tree_height; i++) {
+		node_visit = rtr_get_parent_node(btr_cur, i, true);
+		ut_ad(node_visit != NULL);
+
+		/* If there's no mbr enlarge, return.*/
+		if (node_visit->mbr_inc == 0) {
+			block = btr_pcur_get_block(node_visit->cursor);
+			continue;
+		}
+
+		/* Calculate the mbr of the child page. */
+		rtr_page_cal_mbr(index, block, &new_mbr, heap);
+
+		/* Get father block. */
+		memset(&cursor, 0, sizeof(cursor));
+		offsets = rtr_page_get_father_block(
+			NULL, heap, index, block, mtr, btr_cur, &cursor);
+
+		page = buf_block_get_frame(block);
+
+		/* Update the mbr field of the rec. */
+		if (!rtr_update_mbr_field(&cursor, offsets, NULL, page,
+					  &new_mbr, NULL, mtr)) {
+			err = DB_ERROR;
+			break;
+		}
+
+		page_cursor = btr_cur_get_page_cur(&cursor);
+		block = page_cur_get_block(page_cursor);
+	}
+
+	mem_heap_free(heap);
+
+	return(err);
+}
+
+/*************************************************************//**
+Copy recs from a page to new_block of rtree.
+Differs from page_copy_rec_list_end, because this function does not
+touch the lock table and max trx id on page or compress the page.
+
+IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
+if new_block is a compressed leaf page in a secondary index.
+This has to be done either within the same mini-transaction,
+or by invoking ibuf_reset_free_bits() before mtr_commit(). */
+void
+rtr_page_copy_rec_list_end_no_locks(
+/*================================*/
+	buf_block_t*	new_block,	/*!< in: index page to copy to */
+	buf_block_t*	block,		/*!< in: index page of rec */
+	rec_t*		rec,		/*!< in: record on page */
+	dict_index_t*	index,		/*!< in: record descriptor */
+	mem_heap_t*	heap,		/*!< in/out: heap memory */
+	rtr_rec_move_t*	rec_move,	/*!< in: recording records moved */
+	ulint		max_move,	/*!< in: num of rec to move */
+	ulint*		num_moved,	/*!< out: num of rec to move */
+	mtr_t*		mtr)		/*!< in: mtr */
+{
+	page_t*		new_page	= buf_block_get_frame(new_block);
+	page_cur_t	page_cur;
+	page_cur_t	cur1;
+	rec_t*		cur_rec;
+	ulint		offsets_1[REC_OFFS_NORMAL_SIZE];
+	ulint*		offsets1 = offsets_1;
+	ulint		offsets_2[REC_OFFS_NORMAL_SIZE];
+	ulint*		offsets2 = offsets_2;
+	ulint		moved = 0;
+	bool		is_leaf = page_is_leaf(new_page);
+
+	rec_offs_init(offsets_1);
+	rec_offs_init(offsets_2);
+
+	page_cur_position(rec, block, &cur1);
+
+	if (page_cur_is_before_first(&cur1)) {
+		page_cur_move_to_next(&cur1);
+	}
+
+	btr_assert_not_corrupted(new_block, index);
+	ut_a(page_is_comp(new_page) == page_rec_is_comp(rec));
+	ut_a(mach_read_from_2(new_page + UNIV_PAGE_SIZE - 10) == (ulint)
+	     (page_is_comp(new_page) ? PAGE_NEW_INFIMUM : PAGE_OLD_INFIMUM));
+
+	cur_rec = page_rec_get_next(
+		page_get_infimum_rec(buf_block_get_frame(new_block)));
+	page_cur_position(cur_rec, new_block, &page_cur);
+
+	/* Copy records from the original page to the new page */
+	while (!page_cur_is_after_last(&cur1)) {
+		rec_t*	cur1_rec = page_cur_get_rec(&cur1);
+		rec_t*	ins_rec;
+
+		if (page_rec_is_infimum(cur_rec)) {
+			cur_rec = page_rec_get_next(cur_rec);
+		}
+
+		offsets1 = rec_get_offsets(cur1_rec, index, offsets1,
+					   ULINT_UNDEFINED, &heap);
+		while (!page_rec_is_supremum(cur_rec)) {
+			ulint		cur_matched_fields = 0;
+			int		cmp;
+
+			offsets2 = rec_get_offsets(cur_rec, index, offsets2,
+						   ULINT_UNDEFINED, &heap);
+			cmp = cmp_rec_rec_with_match(cur1_rec, cur_rec,
+						     offsets1, offsets2,
+						     index, FALSE,
+						     &cur_matched_fields);
+			if (cmp < 0) {
+				page_cur_move_to_prev(&page_cur);
+				break;
+			} else if (cmp > 0) {
+				/* Skip small recs. */
+				page_cur_move_to_next(&page_cur);
+				cur_rec = page_cur_get_rec(&page_cur);
+			} else if (is_leaf) {
+				if (rec_get_deleted_flag(cur1_rec,
+					dict_table_is_comp(index->table))) {
+					goto next;
+				} else {
+					/* We have two identical leaf records,
+					skip copying the undeleted one, and
+					unmark deleted on the current page */
+					btr_rec_set_deleted_flag(
+						cur_rec, NULL, FALSE);
+					goto next;
+				}
+			}
+		}
+
+		/* If position is on suprenum rec, need to move to
+		previous rec. */
+		if (page_rec_is_supremum(cur_rec)) {
+			page_cur_move_to_prev(&page_cur);
+		}
+
+		cur_rec = page_cur_get_rec(&page_cur);
+
+		offsets1 = rec_get_offsets(cur1_rec, index, offsets1,
+					   ULINT_UNDEFINED, &heap);
+
+		ins_rec = page_cur_insert_rec_low(cur_rec, index,
+						  cur1_rec, offsets1, mtr);
+		if (UNIV_UNLIKELY(!ins_rec)) {
+			fprintf(stderr, "page number %ld and %ld\n",
+				(long)new_block->page.id.page_no(),
+				(long)block->page.id.page_no());
+
+			ib::fatal() << "rec offset " << page_offset(rec)
+				<< ", cur1 offset "
+				<<  page_offset(page_cur_get_rec(&cur1))
+				<< ", cur_rec offset "
+				<< page_offset(cur_rec);
+		}
+
+		rec_move[moved].new_rec = ins_rec;
+		rec_move[moved].old_rec = cur1_rec;
+		rec_move[moved].moved = false;
+		moved++;
+next:
+		if (moved > max_move) {
+			ut_ad(0);
+			break;
+		}
+
+		page_cur_move_to_next(&cur1);
+	}
+
+	*num_moved = moved;
+}
+
+/*************************************************************//**
+Copy recs till a specified rec from a page to new_block of rtree. */
+void
+rtr_page_copy_rec_list_start_no_locks(
+/*==================================*/
+	buf_block_t*	new_block,	/*!< in: index page to copy to */
+	buf_block_t*	block,		/*!< in: index page of rec */
+	rec_t*		rec,		/*!< in: record on page */
+	dict_index_t*	index,		/*!< in: record descriptor */
+	mem_heap_t*	heap,		/*!< in/out: heap memory */
+	rtr_rec_move_t*	rec_move,	/*!< in: recording records moved */
+	ulint		max_move,	/*!< in: num of rec to move */
+	ulint*		num_moved,	/*!< out: num of rec to move */
+	mtr_t*		mtr)		/*!< in: mtr */
+{
+	page_cur_t	cur1;
+	rec_t*		cur_rec;
+	ulint		offsets_1[REC_OFFS_NORMAL_SIZE];
+	ulint*		offsets1 = offsets_1;
+	ulint		offsets_2[REC_OFFS_NORMAL_SIZE];
+	ulint*		offsets2 = offsets_2;
+	page_cur_t	page_cur;
+	ulint		moved = 0;
+	bool		is_leaf = page_is_leaf(buf_block_get_frame(block));
+
+	rec_offs_init(offsets_1);
+	rec_offs_init(offsets_2);
+
+	page_cur_set_before_first(block, &cur1);
+	page_cur_move_to_next(&cur1);
+
+	cur_rec = page_rec_get_next(
+		page_get_infimum_rec(buf_block_get_frame(new_block)));
+	page_cur_position(cur_rec, new_block, &page_cur);
+
+	while (page_cur_get_rec(&cur1) != rec) {
+		rec_t*	cur1_rec = page_cur_get_rec(&cur1);
+		rec_t*	ins_rec;
+
+		if (page_rec_is_infimum(cur_rec)) {
+			cur_rec = page_rec_get_next(cur_rec);
+		}
+
+		offsets1 = rec_get_offsets(cur1_rec, index, offsets1,
+					   ULINT_UNDEFINED, &heap);
+
+		while (!page_rec_is_supremum(cur_rec)) {
+			ulint		cur_matched_fields = 0;
+			int		cmp;
+
+			offsets2 = rec_get_offsets(cur_rec, index, offsets2,
+						   ULINT_UNDEFINED, &heap);
+			cmp = cmp_rec_rec_with_match(cur1_rec, cur_rec,
+						     offsets1, offsets2,
+						     index, FALSE,
+						     &cur_matched_fields);
+			if (cmp < 0) {
+				page_cur_move_to_prev(&page_cur);
+				cur_rec = page_cur_get_rec(&page_cur);
+				break;
+			} else if (cmp > 0) {
+				/* Skip small recs. */
+				page_cur_move_to_next(&page_cur);
+				cur_rec = page_cur_get_rec(&page_cur);
+			} else if (is_leaf) {
+				if (rec_get_deleted_flag(
+					cur1_rec,
+					dict_table_is_comp(index->table))) {
+					goto next;
+				} else {
+					/* We have two identical leaf records,
+					skip copying the undeleted one, and
+					unmark deleted on the current page */
+					btr_rec_set_deleted_flag(
+						cur_rec, NULL, FALSE);
+					goto next;
+				}
+			}
+		}
+
+		/* If position is on suprenum rec, need to move to
+		previous rec. */
+		if (page_rec_is_supremum(cur_rec)) {
+			page_cur_move_to_prev(&page_cur);
+		}
+
+		cur_rec = page_cur_get_rec(&page_cur);
+
+		offsets1 = rec_get_offsets(cur1_rec, index, offsets1,
+					   ULINT_UNDEFINED, &heap);
+
+		ins_rec = page_cur_insert_rec_low(cur_rec, index,
+						  cur1_rec, offsets1, mtr);
+		if (UNIV_UNLIKELY(!ins_rec)) {
+			fprintf(stderr, "page number %ld and %ld\n",
+				(long)new_block->page.id.page_no(),
+				(long)block->page.id.page_no());
+
+			ib::fatal() << "rec offset " << page_offset(rec)
+				<< ", cur1 offset "
+				<<  page_offset(page_cur_get_rec(&cur1))
+				<< ", cur_rec offset "
+				<< page_offset(cur_rec);
+		}
+
+		rec_move[moved].new_rec = ins_rec;
+		rec_move[moved].old_rec = cur1_rec;
+		rec_move[moved].moved = false;
+		moved++;
+next:
+		if (moved > max_move) {
+			ut_ad(0);
+			break;
+		}
+
+		page_cur_move_to_next(&cur1);
+	}
+
+	*num_moved = moved;
+}
+
+/****************************************************************//**
+Check two MBRs are identical or need to be merged */
+bool
+rtr_merge_mbr_changed(
+/*==================*/
+	btr_cur_t*		cursor,		/*!< in/out: cursor */
+	btr_cur_t*		cursor2,	/*!< in: the other cursor */
+	ulint*			offsets,	/*!< in: rec offsets */
+	ulint*			offsets2,	/*!< in: rec offsets */
+	rtr_mbr_t*		new_mbr,	/*!< out: MBR to update */
+	buf_block_t*		merge_block,	/*!< in: page to merge */
+	buf_block_t*		block,		/*!< in: page be merged */
+	dict_index_t*		index)		/*!< in: index */
+{
+	double*		mbr;
+	double		mbr1[SPDIMS * 2];
+	double		mbr2[SPDIMS * 2];
+	rec_t*		rec;
+	ulint		len;
+	bool		changed = false;
+
+	ut_ad(dict_index_is_spatial(cursor->index));
+
+	rec = btr_cur_get_rec(cursor);
+
+	rtr_read_mbr(rec_get_nth_field(rec, offsets, 0, &len),
+		     reinterpret_cast<rtr_mbr_t*>(mbr1));
+
+	rec = btr_cur_get_rec(cursor2);
+
+	rtr_read_mbr(rec_get_nth_field(rec, offsets2, 0, &len),
+		     reinterpret_cast<rtr_mbr_t*>(mbr2));
+
+	mbr = reinterpret_cast<double*>(new_mbr);
+
+	for (int i = 0; i < SPDIMS * 2; i += 2) {
+		changed = (changed || mbr1[i] != mbr2[i]);
+		*mbr = mbr1[i] < mbr2[i] ? mbr1[i] : mbr2[i];
+		mbr++;
+		changed = (changed || mbr1[i + 1] != mbr2 [i + 1]);
+		*mbr = mbr1[i + 1] > mbr2[i + 1] ? mbr1[i + 1] : mbr2[i + 1];
+		mbr++;
+	}
+
+	return(changed);
+}
+
+/****************************************************************//**
+Merge 2 mbrs and update the the mbr that cursor is on. */
+dberr_t
+rtr_merge_and_update_mbr(
+/*=====================*/
+	btr_cur_t*		cursor,		/*!< in/out: cursor */
+	btr_cur_t*		cursor2,	/*!< in: the other cursor */
+	ulint*			offsets,	/*!< in: rec offsets */
+	ulint*			offsets2,	/*!< in: rec offsets */
+	page_t*			child_page,	/*!< in: the page. */
+	buf_block_t*		merge_block,	/*!< in: page to merge */
+	buf_block_t*		block,		/*!< in: page be merged */
+	dict_index_t*		index,		/*!< in: index */
+	mtr_t*			mtr)		/*!< in: mtr */
+{
+	dberr_t			err = DB_SUCCESS;
+	rtr_mbr_t		new_mbr;
+	bool			changed = false;
+
+	ut_ad(dict_index_is_spatial(cursor->index));
+
+	changed = rtr_merge_mbr_changed(cursor, cursor2, offsets, offsets2,
+					&new_mbr, merge_block,
+					block, index);
+
+	/* Update the mbr field of the rec. And will delete the record
+	pointed by cursor2 */
+	if (changed) {
+		if (!rtr_update_mbr_field(cursor, offsets, cursor2, child_page,
+					  &new_mbr, NULL, mtr)) {
+			err = DB_ERROR;
+		}
+	} else {
+		rtr_node_ptr_delete(cursor2->index, cursor2, block, mtr);
+	}
+
+	return(err);
+}
+
+/*************************************************************//**
+Deletes on the upper level the node pointer to a page. */
+void
+rtr_node_ptr_delete(
+/*================*/
+	dict_index_t*	index,	/*!< in: index tree */
+	btr_cur_t*	cursor, /*!< in: search cursor, contains information
+				about parent nodes in search */
+	buf_block_t*	block,	/*!< in: page whose node pointer is deleted */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	ibool		compressed;
+	dberr_t		err;
+
+	compressed = btr_cur_pessimistic_delete(&err, TRUE, cursor,
+						BTR_CREATE_FLAG, false, mtr);
+	ut_a(err == DB_SUCCESS);
+
+	if (!compressed) {
+		btr_cur_compress_if_useful(cursor, FALSE, mtr);
+	}
+}
+
+/**************************************************************//**
+Check whether a Rtree page is child of a parent page
+@return true if there is child/parent relationship */
+bool
+rtr_check_same_block(
+/*================*/
+	dict_index_t*	index,	/*!< in: index tree */
+	btr_cur_t*	cursor,	/*!< in/out: position at the parent entry
+				pointing to the child if successful */
+	buf_block_t*	parentb,/*!< in: parent page to check */
+	buf_block_t*	childb,	/*!< in: child Page */
+	mem_heap_t*	heap)	/*!< in: memory heap */
+
+{
+	ulint		page_no = childb->page.id.page_no();
+	ulint*		offsets;
+	rec_t*		rec = page_rec_get_next(page_get_infimum_rec(
+				buf_block_get_frame(parentb)));
+
+	while (!page_rec_is_supremum(rec)) {
+		offsets = rec_get_offsets(
+			rec, index, NULL, ULINT_UNDEFINED, &heap);
+
+		if (btr_node_ptr_get_child_page_no(rec, offsets) == page_no) {
+			btr_cur_position(index, rec, parentb, cursor);
+			return(true);
+		}
+
+		rec = page_rec_get_next(rec);
+	}
+
+	return(false);
+}
+
+/****************************************************************//**
+Calculate the area increased for a new record
+@return area increased */
+double
+rtr_rec_cal_increase(
+/*=================*/
+	const dtuple_t*	dtuple,	/*!< in: data tuple to insert, which
+				cause area increase */
+	const rec_t*	rec,	/*!< in: physical record which differs from
+				dtuple in some of the common fields, or which
+				has an equal number or more fields than
+				dtuple */
+	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
+	double*		area)	/*!< out: increased area */
+{
+	const dfield_t*	dtuple_field;
+	ulint		dtuple_f_len;
+	ulint		rec_f_len;
+	const byte*	rec_b_ptr;
+	double		ret = 0;
+
+	ut_ad(!page_rec_is_supremum(rec));
+	ut_ad(!page_rec_is_infimum(rec));
+
+	dtuple_field = dtuple_get_nth_field(dtuple, 0);
+	dtuple_f_len = dfield_get_len(dtuple_field);
+
+	rec_b_ptr = rec_get_nth_field(rec, offsets, 0, &rec_f_len);
+	ret = rtree_area_increase(
+		rec_b_ptr,
+		static_cast<const byte*>(dfield_get_data(dtuple_field)),
+		static_cast<int>(dtuple_f_len), area);
+
+	return(ret);
+}
+
+/** Estimates the number of rows in a given area.
+@param[in]	index	index
+@param[in]	tuple	range tuple containing mbr, may also be empty tuple
+@param[in]	mode	search mode
+@return estimated number of rows */
+int64_t
+rtr_estimate_n_rows_in_range(
+	dict_index_t*	index,
+	const dtuple_t*	tuple,
+	page_cur_mode_t	mode)
+{
+	/* Check tuple & mode */
+	if (tuple->n_fields == 0) {
+		return(HA_POS_ERROR);
+	}
+
+	switch (mode) {
+	case PAGE_CUR_DISJOINT:
+	case PAGE_CUR_CONTAIN:
+	case PAGE_CUR_INTERSECT:
+	case PAGE_CUR_WITHIN:
+	case PAGE_CUR_MBR_EQUAL:
+		break;
+	default:
+		return(HA_POS_ERROR);
+	}
+
+	DBUG_EXECUTE_IF("rtr_pcur_move_to_next_return",
+		return(2);
+	);
+
+	/* Read mbr from tuple. */
+	const dfield_t*	dtuple_field;
+	ulint		dtuple_f_len MY_ATTRIBUTE((unused));
+	rtr_mbr_t	range_mbr;
+	double		range_area;
+	byte*		range_mbr_ptr;
+
+	dtuple_field = dtuple_get_nth_field(tuple, 0);
+	dtuple_f_len = dfield_get_len(dtuple_field);
+	range_mbr_ptr = reinterpret_cast<byte*>(dfield_get_data(dtuple_field));
+
+	ut_ad(dtuple_f_len >= DATA_MBR_LEN);
+	rtr_read_mbr(range_mbr_ptr, &range_mbr);
+	range_area = (range_mbr.xmax - range_mbr.xmin)
+		 * (range_mbr.ymax - range_mbr.ymin);
+
+	/* Get index root page. */
+	page_size_t	page_size(dict_table_page_size(index->table));
+	page_id_t	page_id(dict_index_get_space(index),
+				dict_index_get_page(index));
+	mtr_t		mtr;
+	buf_block_t*	block;
+	page_t*		page;
+	ulint		n_recs;
+
+	mtr_start(&mtr);
+	mtr.set_named_space(dict_index_get_space(index));
+	mtr_s_lock(dict_index_get_lock(index), &mtr);
+
+	block = btr_block_get(page_id, page_size, RW_S_LATCH, index, &mtr);
+	page = buf_block_get_frame(block);
+	n_recs = page_header_get_field(page, PAGE_N_RECS);
+
+	if (n_recs == 0) {
+		mtr_commit(&mtr);
+		return(HA_POS_ERROR);
+	}
+
+	rec_t*		rec;
+	byte*		field;
+	ulint		len;
+	ulint*		offsets = NULL;
+	mem_heap_t*	heap;
+
+	heap = mem_heap_create(512);
+	rec = page_rec_get_next(page_get_infimum_rec(page));
+	offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
+
+	/* Scan records in root page and calculate area. */
+	double	area = 0;
+	while (!page_rec_is_supremum(rec)) {
+		rtr_mbr_t	mbr;
+		double		rec_area;
+
+		field = rec_get_nth_field(rec, offsets, 0, &len);
+		ut_ad(len == DATA_MBR_LEN);
+
+		rtr_read_mbr(field, &mbr);
+
+		rec_area = (mbr.xmax - mbr.xmin) * (mbr.ymax - mbr.ymin);
+
+		if (rec_area == 0) {
+			switch (mode) {
+			case PAGE_CUR_CONTAIN:
+			case PAGE_CUR_INTERSECT:
+				area += 1;
+				break;
+
+			case PAGE_CUR_DISJOINT:
+				break;
+
+			case PAGE_CUR_WITHIN:
+			case PAGE_CUR_MBR_EQUAL:
+				if (rtree_key_cmp(
+					PAGE_CUR_WITHIN, range_mbr_ptr,
+					DATA_MBR_LEN, field, DATA_MBR_LEN)
+				    == 0) {
+					area += 1;
+				}
+
+				break;
+
+			default:
+				ut_error;
+			}
+		} else {
+			switch (mode) {
+			case PAGE_CUR_CONTAIN:
+			case PAGE_CUR_INTERSECT:
+				area += rtree_area_overlapping(range_mbr_ptr,
+						field, DATA_MBR_LEN) / rec_area;
+				break;
+
+			case PAGE_CUR_DISJOINT:
+				area += 1;
+				area -= rtree_area_overlapping(range_mbr_ptr,
+						field, DATA_MBR_LEN) / rec_area;
+				break;
+
+			case PAGE_CUR_WITHIN:
+			case PAGE_CUR_MBR_EQUAL:
+				if (rtree_key_cmp(
+					PAGE_CUR_WITHIN, range_mbr_ptr,
+					DATA_MBR_LEN, field, DATA_MBR_LEN)
+				    == 0) {
+					area += range_area / rec_area;
+				}
+
+				break;
+			default:
+				ut_error;
+			}
+		}
+
+		rec = page_rec_get_next(rec);
+	}
+
+	mtr_commit(&mtr);
+	mem_heap_free(heap);
+
+	if (my_isinf(area) || my_isnan(area)) {
+		return(HA_POS_ERROR);
+	}
+
+	return(static_cast<int64_t>(dict_table_get_n_rows(index->table)
+				    * area / n_recs));
+}
diff --git a/storage/innobase/gis/gis0sea.cc b/storage/innobase/gis/gis0sea.cc
new file mode 100644
index 00000000000..2c0c5b453a3
--- /dev/null
+++ b/storage/innobase/gis/gis0sea.cc
@@ -0,0 +1,2025 @@
+/*****************************************************************************
+
+Copyright (c) 2016, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file gis/gis0sea.cc
+InnoDB R-tree search interfaces
+
+Created 2014/01/16 Jimmy Yang
+***********************************************************************/
+
+#include "fsp0fsp.h"
+#include "page0page.h"
+#include "page0cur.h"
+#include "page0zip.h"
+#include "gis0rtree.h"
+
+#ifndef UNIV_HOTBACKUP
+#include "btr0cur.h"
+#include "btr0sea.h"
+#include "btr0pcur.h"
+#include "rem0cmp.h"
+#include "lock0lock.h"
+#include "ibuf0ibuf.h"
+#include "trx0trx.h"
+#include "srv0mon.h"
+#include "gis0geo.h"
+
+#endif /* UNIV_HOTBACKUP */
+
+/*************************************************************//**
+Pop out used parent path entry, until we find the parent with matching
+page number */
+static
+void
+rtr_adjust_parent_path(
+/*===================*/
+	rtr_info_t*	rtr_info,	/* R-Tree info struct */
+	ulint		page_no)	/* page number to look for */
+{
+	while (!rtr_info->parent_path->empty()) {
+		if (rtr_info->parent_path->back().child_no == page_no) {
+			break;
+		} else {
+			if (rtr_info->parent_path->back().cursor) {
+				btr_pcur_close(
+					rtr_info->parent_path->back().cursor);
+				ut_free(rtr_info->parent_path->back().cursor);
+			}
+
+			rtr_info->parent_path->pop_back();
+		}
+	}
+}
+
+/*************************************************************//**
+Find the next matching record. This function is used by search
+or record locating during index delete/update.
+@return true if there is suitable record found, otherwise false */
+static
+bool
+rtr_pcur_getnext_from_path(
+/*=======================*/
+	const dtuple_t* tuple,	/*!< in: data tuple */
+	page_cur_mode_t	mode,	/*!< in: cursor search mode */
+	btr_cur_t*	btr_cur,/*!< in: persistent cursor; NOTE that the
+				function may release the page latch */
+	ulint		target_level,
+				/*!< in: target level */
+	ulint		latch_mode,
+				/*!< in: latch_mode */
+	bool		index_locked,
+				/*!< in: index tree locked */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	dict_index_t*	index = btr_cur->index;
+	bool		found = false;
+	ulint		space = dict_index_get_space(index);
+	page_cur_t*	page_cursor;
+	ulint		level = 0;
+	node_visit_t	next_rec;
+	rtr_info_t*	rtr_info = btr_cur->rtr_info;
+	node_seq_t	page_ssn;
+	ulint		my_latch_mode;
+	ulint		skip_parent = false;
+	bool		new_split = false;
+	bool		need_parent;
+	bool		for_delete = false;
+	bool		for_undo_ins = false;
+
+	/* exhausted all the pages to be searched */
+	if (rtr_info->path->empty()) {
+		return(false);
+	}
+
+	ut_ad(dtuple_get_n_fields_cmp(tuple));
+
+	my_latch_mode = BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode);
+
+	for_delete = latch_mode & BTR_RTREE_DELETE_MARK;
+	for_undo_ins = latch_mode & BTR_RTREE_UNDO_INS;
+
+	/* There should be no insert coming to this function. Only
+	mode with BTR_MODIFY_* should be delete */
+	ut_ad(mode != PAGE_CUR_RTREE_INSERT);
+	ut_ad(my_latch_mode == BTR_SEARCH_LEAF
+	      || my_latch_mode == BTR_MODIFY_LEAF
+	      || my_latch_mode == BTR_MODIFY_TREE
+	      || my_latch_mode == BTR_CONT_MODIFY_TREE);
+
+	/* Whether need to track parent information. Only need so
+	when we do tree altering operations (such as index page merge) */
+	need_parent = ((my_latch_mode == BTR_MODIFY_TREE
+		        || my_latch_mode == BTR_CONT_MODIFY_TREE)
+		       && mode == PAGE_CUR_RTREE_LOCATE);
+
+	if (!index_locked) {
+		ut_ad(latch_mode & BTR_SEARCH_LEAF
+		      || latch_mode & BTR_MODIFY_LEAF);
+		mtr_s_lock(dict_index_get_lock(index), mtr);
+	} else {
+		ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
+					MTR_MEMO_SX_LOCK)
+		      || mtr_memo_contains(mtr, dict_index_get_lock(index),
+					MTR_MEMO_S_LOCK)
+		      || mtr_memo_contains(mtr, dict_index_get_lock(index),
+					   MTR_MEMO_X_LOCK));
+	}
+
+	const page_size_t&	page_size = dict_table_page_size(index->table);
+
+	/* Pop each node/page to be searched from "path" structure
+	and do a search on it. Please note, any pages that are in
+	the "path" structure are protected by "page" lock, so tey
+	cannot be shrunk away */
+	do {
+		buf_block_t*	block;
+		node_seq_t	path_ssn;
+		const page_t*	page;
+		ulint		rw_latch = RW_X_LATCH;
+		ulint		tree_idx;
+
+		mutex_enter(&rtr_info->rtr_path_mutex);
+		next_rec = rtr_info->path->back();
+		rtr_info->path->pop_back();
+		level = next_rec.level;
+		path_ssn = next_rec.seq_no;
+		tree_idx = btr_cur->tree_height - level - 1;
+
+		/* Maintain the parent path info as well, if needed */
+		if (need_parent && !skip_parent && !new_split) {
+			ulint		old_level;
+			ulint		new_level;
+
+			ut_ad(!rtr_info->parent_path->empty());
+
+			/* Cleanup unused parent info */
+			if (rtr_info->parent_path->back().cursor) {
+				btr_pcur_close(
+					rtr_info->parent_path->back().cursor);
+				ut_free(rtr_info->parent_path->back().cursor);
+			}
+
+			old_level = rtr_info->parent_path->back().level;
+
+			rtr_info->parent_path->pop_back();
+
+			ut_ad(!rtr_info->parent_path->empty());
+
+			/* check whether there is a level change. If so,
+			the current parent path needs to pop enough
+			nodes to adjust to the new search page */
+			new_level = rtr_info->parent_path->back().level;
+
+			if (old_level < new_level) {
+				rtr_adjust_parent_path(
+					rtr_info, next_rec.page_no);
+			}
+
+			ut_ad(!rtr_info->parent_path->empty());
+
+			ut_ad(next_rec.page_no
+			      == rtr_info->parent_path->back().child_no);
+		}
+
+		mutex_exit(&rtr_info->rtr_path_mutex);
+
+		skip_parent = false;
+		new_split = false;
+
+		/* Once we have pages in "path", these pages are
+		predicate page locked, so they can't be shrunk away.
+		They also have SSN (split sequence number) to detect
+		splits, so we can directly latch single page while
+		getting them. They can be unlatched if not qualified.
+		One reason for pre-latch is that we might need to position
+		some parent position (requires latch) during search */
+		if (level == 0) {
+			/* S latched for SEARCH_LEAF, and X latched
+			for MODIFY_LEAF */
+			if (my_latch_mode <= BTR_MODIFY_LEAF) {
+				rw_latch = my_latch_mode;
+			}
+
+			if (my_latch_mode == BTR_CONT_MODIFY_TREE
+			    || my_latch_mode == BTR_MODIFY_TREE) {
+				rw_latch = RW_NO_LATCH;
+			}
+
+		} else if (level == target_level) {
+			rw_latch = RW_X_LATCH;
+		}
+
+		/* Release previous locked blocks */
+		if (my_latch_mode != BTR_SEARCH_LEAF) {
+			for (ulint idx = 0; idx < btr_cur->tree_height;
+			     idx++) {
+				if (rtr_info->tree_blocks[idx]) {
+					mtr_release_block_at_savepoint(
+						mtr,
+						rtr_info->tree_savepoints[idx],
+						rtr_info->tree_blocks[idx]);
+					rtr_info->tree_blocks[idx] = NULL;
+				}
+			}
+			for (ulint idx = RTR_MAX_LEVELS; idx < RTR_MAX_LEVELS + 3;
+			     idx++) {
+				if (rtr_info->tree_blocks[idx]) {
+					mtr_release_block_at_savepoint(
+						mtr,
+						rtr_info->tree_savepoints[idx],
+						rtr_info->tree_blocks[idx]);
+					rtr_info->tree_blocks[idx] = NULL;
+				}
+			}
+		}
+
+		/* set up savepoint to record any locks to be taken */
+		rtr_info->tree_savepoints[tree_idx] = mtr_set_savepoint(mtr);
+
+#ifdef UNIV_RTR_DEBUG
+		ut_ad(!(rw_lock_own(&btr_cur->page_cur.block->lock, RW_LOCK_X)
+			||
+			rw_lock_own(&btr_cur->page_cur.block->lock, RW_LOCK_S))
+			|| my_latch_mode == BTR_MODIFY_TREE
+			|| my_latch_mode == BTR_CONT_MODIFY_TREE
+			|| !page_is_leaf(buf_block_get_frame(
+					btr_cur->page_cur.block)));
+#endif /* UNIV_RTR_DEBUG */
+
+		page_id_t	page_id(space, next_rec.page_no);
+		dberr_t err = DB_SUCCESS;
+
+		block = buf_page_get_gen(
+			page_id, page_size,
+			rw_latch, NULL, BUF_GET, __FILE__, __LINE__, mtr, &err);
+
+		if (block == NULL) {
+			continue;
+		} else if (rw_latch != RW_NO_LATCH) {
+			ut_ad(!dict_index_is_ibuf(index));
+			buf_block_dbg_add_level(block, SYNC_TREE_NODE);
+		}
+
+		rtr_info->tree_blocks[tree_idx] = block;
+
+		page = buf_block_get_frame(block);
+		page_ssn = page_get_ssn_id(page);
+
+		/* If there are splits, push the splitted page.
+		Note that we have SX lock on index->lock, there
+		should not be any split/shrink happening here */
+		if (page_ssn > path_ssn) {
+			ulint next_page_no = btr_page_get_next(page, mtr);
+			rtr_non_leaf_stack_push(
+				rtr_info->path, next_page_no, path_ssn,
+				level, 0, NULL, 0);
+
+			if (!srv_read_only_mode
+			    && mode != PAGE_CUR_RTREE_INSERT
+			    && mode != PAGE_CUR_RTREE_LOCATE) {
+				ut_ad(rtr_info->thr);
+				lock_place_prdt_page_lock(
+					space, next_page_no, index,
+					rtr_info->thr);
+			}
+			new_split = true;
+#if UNIV_GIS_DEBUG
+			fprintf(stderr,
+				"GIS_DIAG: Splitted page found: %d, %ld\n",
+				static_cast<int>(need_parent), next_page_no);
+#endif
+		}
+
+		page_cursor = btr_cur_get_page_cur(btr_cur);
+		page_cursor->rec = NULL;
+
+		if (mode == PAGE_CUR_RTREE_LOCATE) {
+			if (level == target_level && level == 0) {
+				ulint	low_match;
+
+				found = false;
+
+				low_match = page_cur_search(
+					block, index, tuple,
+					PAGE_CUR_LE,
+					btr_cur_get_page_cur(btr_cur));
+
+				if (low_match == dtuple_get_n_fields_cmp(
+							tuple)) {
+					rec_t*	rec = btr_cur_get_rec(btr_cur);
+
+					if (!rec_get_deleted_flag(rec,
+					    dict_table_is_comp(index->table))
+					    || (!for_delete && !for_undo_ins)) {
+						found = true;
+						btr_cur->low_match = low_match;
+					} else {
+						/* mark we found deleted row */
+						btr_cur->rtr_info->fd_del
+							= true;
+					}
+				}
+			} else {
+				page_cur_mode_t	page_mode = mode;
+
+				if (level == target_level
+				    && target_level != 0) {
+					page_mode = PAGE_CUR_RTREE_GET_FATHER;
+				}
+				found = rtr_cur_search_with_match(
+					block, index, tuple, page_mode,
+					page_cursor, btr_cur->rtr_info);
+
+				/* Save the position of parent if needed */
+				if (found && need_parent) {
+					btr_pcur_t*     r_cursor =
+						rtr_get_parent_cursor(
+							btr_cur, level, false);
+
+					rec_t*	rec = page_cur_get_rec(
+						page_cursor);
+					page_cur_position(
+						rec, block,
+						btr_pcur_get_page_cur(r_cursor));
+					r_cursor->pos_state =
+						 BTR_PCUR_IS_POSITIONED;
+					r_cursor->latch_mode = my_latch_mode;
+					btr_pcur_store_position(r_cursor, mtr);
+#ifdef UNIV_DEBUG
+					ulint num_stored =
+						rtr_store_parent_path(
+							block, btr_cur,
+							rw_latch, level, mtr);
+					ut_ad(num_stored > 0);
+#else
+					rtr_store_parent_path(
+						block, btr_cur, rw_latch,
+						level, mtr);
+#endif /* UNIV_DEBUG */
+				}
+			}
+		} else {
+			found = rtr_cur_search_with_match(
+				block, index, tuple, mode, page_cursor,
+				btr_cur->rtr_info);
+		}
+
+		/* Attach predicate lock if needed, no matter whether
+		there are matched records */
+		if (mode != PAGE_CUR_RTREE_INSERT
+		    && mode != PAGE_CUR_RTREE_LOCATE
+		    && mode >= PAGE_CUR_CONTAIN
+		    && btr_cur->rtr_info->need_prdt_lock
+		    && found) {
+			lock_prdt_t	prdt;
+
+			trx_t*		trx = thr_get_trx(
+						btr_cur->rtr_info->thr);
+			lock_mutex_enter();
+			lock_init_prdt_from_mbr(
+				&prdt, &btr_cur->rtr_info->mbr,
+				mode, trx->lock.lock_heap);
+			lock_mutex_exit();
+
+			if (rw_latch == RW_NO_LATCH) {
+				rw_lock_s_lock(&(block->lock));
+			}
+
+			lock_prdt_lock(block, &prdt, index, LOCK_S,
+				       LOCK_PREDICATE, btr_cur->rtr_info->thr,
+				       mtr);
+
+			if (rw_latch == RW_NO_LATCH) {
+				rw_lock_s_unlock(&(block->lock));
+			}
+		}
+
+		if (found) {
+			if (level == target_level) {
+				page_cur_t*	r_cur;;
+
+				if (my_latch_mode == BTR_MODIFY_TREE
+				    && level == 0) {
+					ut_ad(rw_latch == RW_NO_LATCH);
+					page_id_t	my_page_id(
+						space, block->page.id.page_no());
+
+					btr_cur_latch_leaves(
+						block, my_page_id,
+						page_size, BTR_MODIFY_TREE,
+						btr_cur, mtr);
+				}
+
+				r_cur = btr_cur_get_page_cur(btr_cur);
+
+				page_cur_position(
+					page_cur_get_rec(page_cursor),
+					page_cur_get_block(page_cursor),
+					r_cur);
+
+				btr_cur->low_match = level != 0 ?
+					DICT_INDEX_SPATIAL_NODEPTR_SIZE + 1
+					: btr_cur->low_match;
+				break;
+			}
+
+			/* Keep the parent path node, which points to
+			last node just located */
+			skip_parent = true;
+		} else {
+			/* Release latch on the current page */
+			ut_ad(rtr_info->tree_blocks[tree_idx]);
+
+			mtr_release_block_at_savepoint(
+				mtr, rtr_info->tree_savepoints[tree_idx],
+				rtr_info->tree_blocks[tree_idx]);
+			rtr_info->tree_blocks[tree_idx] = NULL;
+		}
+
+	} while (!rtr_info->path->empty());
+
+	const rec_t* rec = btr_cur_get_rec(btr_cur);
+
+	if (page_rec_is_infimum(rec) || page_rec_is_supremum(rec)) {
+		mtr_commit(mtr);
+		mtr_start(mtr);
+	} else if (!index_locked) {
+		mtr_memo_release(mtr, dict_index_get_lock(index),
+				 MTR_MEMO_X_LOCK);
+	}
+
+	return(found);
+}
+
+/*************************************************************//**
+Find the next matching record. This function will first exhaust
+the copied record listed in the rtr_info->matches vector before
+moving to the next page
+@return true if there is suitable record found, otherwise false */
+bool
+rtr_pcur_move_to_next(
+/*==================*/
+	const dtuple_t*	tuple,	/*!< in: data tuple; NOTE: n_fields_cmp in
+				tuple must be set so that it cannot get
+				compared to the node ptr page number field! */
+	page_cur_mode_t	mode,	/*!< in: cursor search mode */
+	btr_pcur_t*	cursor,	/*!< in: persistent cursor; NOTE that the
+				function may release the page latch */
+	ulint		level,	/*!< in: target level */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	rtr_info_t*	rtr_info = cursor->btr_cur.rtr_info;
+
+	ut_a(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+
+	mutex_enter(&rtr_info->matches->rtr_match_mutex);
+	/* First retrieve the next record on the current page */
+	if (!rtr_info->matches->matched_recs->empty()) {
+		rtr_rec_t	rec;
+		rec = rtr_info->matches->matched_recs->back();
+		rtr_info->matches->matched_recs->pop_back();
+		mutex_exit(&rtr_info->matches->rtr_match_mutex);
+
+		cursor->btr_cur.page_cur.rec = rec.r_rec;
+		cursor->btr_cur.page_cur.block = &rtr_info->matches->block;
+
+		DEBUG_SYNC_C("rtr_pcur_move_to_next_return");
+		return(true);
+	}
+
+	mutex_exit(&rtr_info->matches->rtr_match_mutex);
+
+	/* Fetch the next page */
+	return(rtr_pcur_getnext_from_path(tuple, mode, &cursor->btr_cur,
+					 level, cursor->latch_mode,
+					 false, mtr));
+}
+
+/*************************************************************//**
+Check if the cursor holds record pointing to the specified child page
+@return	true if it is (pointing to the child page) false otherwise */
+static
+bool
+rtr_compare_cursor_rec(
+/*===================*/
+	dict_index_t*	index,		/*!< in: index */
+	btr_cur_t*	cursor,		/*!< in: Cursor to check */
+	ulint		page_no,	/*!< in: desired child page number */
+	mem_heap_t**	heap)		/*!< in: memory heap */
+{
+	const rec_t*	rec;
+	ulint*		offsets;
+
+	rec = btr_cur_get_rec(cursor);
+
+	offsets = rec_get_offsets(
+		rec, index, NULL, ULINT_UNDEFINED, heap);
+
+	return(btr_node_ptr_get_child_page_no(rec, offsets) == page_no);
+}
+
+/**************************************************************//**
+Initializes and opens a persistent cursor to an index tree. It should be
+closed with btr_pcur_close. Mainly called by row_search_index_entry() */
+void
+rtr_pcur_open_low(
+/*==============*/
+	dict_index_t*	index,	/*!< in: index */
+	ulint		level,	/*!< in: level in the rtree */
+	const dtuple_t*	tuple,	/*!< in: tuple on which search done */
+	page_cur_mode_t	mode,	/*!< in: PAGE_CUR_RTREE_LOCATE, ... */
+	ulint		latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */
+	btr_pcur_t*	cursor, /*!< in: memory buffer for persistent cursor */
+	const char*	file,	/*!< in: file name */
+	ulint		line,	/*!< in: line where called */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	btr_cur_t*	btr_cursor;
+	ulint		n_fields;
+	ulint		low_match;
+	rec_t*		rec;
+	bool		tree_latched = false;
+	bool		for_delete = false;
+	bool		for_undo_ins = false;
+
+	ut_ad(level == 0);
+
+	ut_ad(latch_mode & BTR_MODIFY_LEAF || latch_mode & BTR_MODIFY_TREE);
+	ut_ad(mode == PAGE_CUR_RTREE_LOCATE);
+
+	/* Initialize the cursor */
+
+	btr_pcur_init(cursor);
+
+	for_delete = latch_mode & BTR_RTREE_DELETE_MARK;
+	for_undo_ins = latch_mode & BTR_RTREE_UNDO_INS;
+
+	cursor->latch_mode = BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode);
+	cursor->search_mode = mode;
+
+	/* Search with the tree cursor */
+
+	btr_cursor = btr_pcur_get_btr_cur(cursor);
+
+	btr_cursor->rtr_info = rtr_create_rtr_info(false, false,
+						   btr_cursor, index);
+
+	/* Purge will SX lock the tree instead of take Page Locks */
+	if (btr_cursor->thr) {
+		btr_cursor->rtr_info->need_page_lock = true;
+		btr_cursor->rtr_info->thr = btr_cursor->thr;
+	}
+
+	btr_cur_search_to_nth_level(index, level, tuple, mode, latch_mode,
+				    btr_cursor, 0, file, line, mtr);
+	cursor->pos_state = BTR_PCUR_IS_POSITIONED;
+
+	cursor->trx_if_known = NULL;
+
+	low_match = btr_pcur_get_low_match(cursor);
+
+	rec = btr_pcur_get_rec(cursor);
+
+	n_fields = dtuple_get_n_fields(tuple);
+
+	if (latch_mode & BTR_ALREADY_S_LATCHED) {
+		ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
+				  MTR_MEMO_S_LOCK));
+		tree_latched = true;
+	}
+
+	if (latch_mode & BTR_MODIFY_TREE) {
+		ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
+					MTR_MEMO_X_LOCK)
+		      || mtr_memo_contains(mtr, dict_index_get_lock(index),
+					   MTR_MEMO_SX_LOCK));
+		tree_latched = true;
+	}
+
+	if (page_rec_is_infimum(rec) || low_match != n_fields
+	    || (rec_get_deleted_flag(rec, dict_table_is_comp(index->table))
+		&& (for_delete || for_undo_ins))) {
+
+		if (rec_get_deleted_flag(rec, dict_table_is_comp(index->table))
+		    && for_delete) {
+			btr_cursor->rtr_info->fd_del = true;
+			btr_cursor->low_match = 0;
+		}
+		/* Did not find matched row in first dive. Release
+		latched block if any before search more pages */
+		if (latch_mode & BTR_MODIFY_LEAF) {
+			ulint		tree_idx = btr_cursor->tree_height - 1;
+			rtr_info_t*	rtr_info = btr_cursor->rtr_info;
+
+			ut_ad(level == 0);
+
+			if (rtr_info->tree_blocks[tree_idx]) {
+				mtr_release_block_at_savepoint(
+					mtr,
+					rtr_info->tree_savepoints[tree_idx],
+					rtr_info->tree_blocks[tree_idx]);
+				rtr_info->tree_blocks[tree_idx] = NULL;
+			}
+		}
+
+		bool	ret = rtr_pcur_getnext_from_path(
+			tuple, mode, btr_cursor, level, latch_mode,
+			tree_latched, mtr);
+
+		if (ret) {
+			low_match = btr_pcur_get_low_match(cursor);
+			ut_ad(low_match == n_fields);
+		}
+	}
+}
+
+/* Get the rtree page father.
+@param[in]	index		rtree index
+@param[in]	block		child page in the index
+@param[in]	mtr		mtr
+@param[in]	sea_cur		search cursor, contains information
+				about parent nodes in search
+@param[in]	cursor		cursor on node pointer record,
+				its page x-latched */
+void
+rtr_page_get_father(
+	dict_index_t*	index,
+	buf_block_t*	block,
+	mtr_t*		mtr,
+	btr_cur_t*	sea_cur,
+	btr_cur_t*	cursor)
+{
+	mem_heap_t*	heap = mem_heap_create(100);
+#ifdef UNIV_DEBUG
+	ulint*		offsets;
+
+	offsets = rtr_page_get_father_block(
+		NULL, heap, index, block, mtr, sea_cur, cursor);
+
+	ulint	page_no = btr_node_ptr_get_child_page_no(cursor->page_cur.rec,
+							 offsets);
+
+	ut_ad(page_no == block->page.id.page_no());
+#else
+	rtr_page_get_father_block(
+		NULL, heap, index, block, mtr, sea_cur, cursor);
+#endif
+
+	mem_heap_free(heap);
+}
+
+/************************************************************//**
+Returns the father block to a page. It is assumed that mtr holds
+an X or SX latch on the tree.
+@return rec_get_offsets() of the node pointer record */
+ulint*
+rtr_page_get_father_block(
+/*======================*/
+	ulint*		offsets,/*!< in: work area for the return value */
+	mem_heap_t*	heap,	/*!< in: memory heap to use */
+	dict_index_t*	index,	/*!< in: b-tree index */
+	buf_block_t*	block,	/*!< in: child page in the index */
+	mtr_t*		mtr,	/*!< in: mtr */
+	btr_cur_t*	sea_cur,/*!< in: search cursor, contains information
+				about parent nodes in search */
+	btr_cur_t*	cursor)	/*!< out: cursor on node pointer record,
+				its page x-latched */
+{
+
+	rec_t*  rec = page_rec_get_next(
+		page_get_infimum_rec(buf_block_get_frame(block)));
+	btr_cur_position(index, rec, block, cursor);
+
+	return(rtr_page_get_father_node_ptr(offsets, heap, sea_cur,
+					    cursor, mtr));
+}
+
+/************************************************************//**
+Returns the upper level node pointer to a R-Tree page. It is assumed
+that mtr holds an x-latch on the tree.
+@return	rec_get_offsets() of the node pointer record */
+ulint*
+rtr_page_get_father_node_ptr_func(
+/*==============================*/
+	ulint*		offsets,/*!< in: work area for the return value */
+	mem_heap_t*	heap,	/*!< in: memory heap to use */
+	btr_cur_t*	sea_cur,/*!< in: search cursor */
+	btr_cur_t*	cursor,	/*!< in: cursor pointing to user record,
+				out: cursor on node pointer record,
+				its page x-latched */
+	const char*	file,	/*!< in: file name */
+	ulint		line,	/*!< in: line where called */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	dtuple_t*	tuple;
+	rec_t*		user_rec;
+	rec_t*		node_ptr;
+	ulint		level;
+	ulint		page_no;
+	dict_index_t*	index;
+	rtr_mbr_t	mbr;
+
+	page_no = btr_cur_get_block(cursor)->page.id.page_no();
+	index = btr_cur_get_index(cursor);
+
+	ut_ad(srv_read_only_mode
+	      || mtr_memo_contains_flagged(mtr, dict_index_get_lock(index),
+					   MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK));
+
+	ut_ad(dict_index_get_page(index) != page_no);
+
+	level = btr_page_get_level(btr_cur_get_page(cursor), mtr);
+
+	user_rec = btr_cur_get_rec(cursor);
+	ut_a(page_rec_is_user_rec(user_rec));
+
+	offsets = rec_get_offsets(user_rec, index, offsets,
+				  ULINT_UNDEFINED, &heap);
+	rtr_get_mbr_from_rec(user_rec, offsets, &mbr);
+
+	tuple = rtr_index_build_node_ptr(
+			index, &mbr, user_rec, page_no, heap, level);
+
+	if (sea_cur && !sea_cur->rtr_info) {
+		sea_cur = NULL;
+	}
+
+	rtr_get_father_node(index, level + 1, tuple, sea_cur, cursor,
+			    page_no, mtr);
+
+	node_ptr = btr_cur_get_rec(cursor);
+	ut_ad(!page_rec_is_comp(node_ptr)
+	      || rec_get_status(node_ptr) == REC_STATUS_NODE_PTR);
+	offsets = rec_get_offsets(node_ptr, index, offsets,
+				  ULINT_UNDEFINED, &heap);
+
+	ulint	child_page = btr_node_ptr_get_child_page_no(node_ptr, offsets);
+
+	if (child_page != page_no) {
+		const rec_t*	print_rec;
+
+		ib::fatal	error;
+
+		error << "Corruption of index " << index->name
+			<< " of table " << index->table->name
+			<< " parent page " << page_no
+			<< " child page " << child_page;
+
+		print_rec = page_rec_get_next(
+			page_get_infimum_rec(page_align(user_rec)));
+		offsets = rec_get_offsets(print_rec, index,
+					  offsets, ULINT_UNDEFINED, &heap);
+		error << "; child ";
+		rec_print(error.m_oss, print_rec,
+			  rec_get_info_bits(print_rec, rec_offs_comp(offsets)),
+			  offsets);
+		offsets = rec_get_offsets(node_ptr, index, offsets,
+					  ULINT_UNDEFINED, &heap);
+		error << "; parent ";
+		rec_print(error.m_oss, print_rec,
+			  rec_get_info_bits(print_rec, rec_offs_comp(offsets)),
+			  offsets);
+
+		error << ". You should dump + drop + reimport the table to"
+			" fix the corruption. If the crash happens at"
+			" database startup, see " REFMAN
+			"forcing-innodb-recovery.html about forcing"
+			" recovery. Then dump + drop + reimport.";
+	}
+
+	return(offsets);
+}
+
+/********************************************************************//**
+Returns the upper level node pointer to a R-Tree page. It is assumed
+that mtr holds an x-latch on the tree. */
+void
+rtr_get_father_node(
+/*================*/
+	dict_index_t*	index,	/*!< in: index */
+	ulint		level,	/*!< in: the tree level of search */
+	const dtuple_t*	tuple,	/*!< in: data tuple; NOTE: n_fields_cmp in
+				tuple must be set so that it cannot get
+				compared to the node ptr page number field! */
+	btr_cur_t*	sea_cur,/*!< in: search cursor */
+	btr_cur_t*	btr_cur,/*!< in/out: tree cursor; the cursor page is
+				s- or x-latched, but see also above! */
+	ulint		page_no,/*!< Current page no */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	mem_heap_t*	heap = NULL;
+	bool		ret = false;
+	const rec_t*	rec;
+	ulint		n_fields;
+	bool		new_rtr = false;
+
+	/* Try to optimally locate the parent node. Level should always
+	less than sea_cur->tree_height unless the root is splitting */
+	if (sea_cur && sea_cur->tree_height > level) {
+
+		ut_ad(mtr_memo_contains_flagged(mtr,
+						dict_index_get_lock(index),
+						MTR_MEMO_X_LOCK
+						| MTR_MEMO_SX_LOCK));
+		ret = rtr_cur_restore_position(
+			BTR_CONT_MODIFY_TREE, sea_cur, level, mtr);
+
+		/* Once we block shrink tree nodes while there are
+		active search on it, this optimal locating should always
+		succeeds */
+		ut_ad(ret);
+
+		if (ret) {
+			btr_pcur_t*	r_cursor = rtr_get_parent_cursor(
+						sea_cur, level, false);
+
+			rec = btr_pcur_get_rec(r_cursor);
+
+			ut_ad(r_cursor->rel_pos == BTR_PCUR_ON);
+			page_cur_position(rec,
+					  btr_pcur_get_block(r_cursor),
+					  btr_cur_get_page_cur(btr_cur));
+			btr_cur->rtr_info = sea_cur->rtr_info;
+			btr_cur->tree_height = sea_cur->tree_height;
+			ut_ad(rtr_compare_cursor_rec(
+				index, btr_cur, page_no, &heap));
+			goto func_exit;
+		}
+	}
+
+	/* We arrive here in one of two scenario
+	1) check table and btr_valide
+	2) index root page being raised */
+	ut_ad(!sea_cur || sea_cur->tree_height == level);
+
+	if (btr_cur->rtr_info) {
+		rtr_clean_rtr_info(btr_cur->rtr_info, true);
+	} else {
+		new_rtr = true;
+	}
+
+	btr_cur->rtr_info = rtr_create_rtr_info(false, false, btr_cur, index);
+
+	if (sea_cur && sea_cur->tree_height == level) {
+		/* root split, and search the new root */
+		btr_cur_search_to_nth_level(
+			index, level, tuple, PAGE_CUR_RTREE_LOCATE,
+			BTR_CONT_MODIFY_TREE, btr_cur, 0,
+			__FILE__, __LINE__, mtr);
+
+	} else {
+		/* btr_validate */
+		ut_ad(level >= 1);
+		ut_ad(!sea_cur);
+
+		btr_cur_search_to_nth_level(
+			index, level, tuple, PAGE_CUR_RTREE_LOCATE,
+			BTR_CONT_MODIFY_TREE, btr_cur, 0,
+			__FILE__, __LINE__, mtr);
+
+		rec = btr_cur_get_rec(btr_cur);
+		n_fields = dtuple_get_n_fields_cmp(tuple);
+
+		if (page_rec_is_infimum(rec)
+		    || (btr_cur->low_match != n_fields)) {
+			ret = rtr_pcur_getnext_from_path(
+				tuple, PAGE_CUR_RTREE_LOCATE, btr_cur,
+				level, BTR_CONT_MODIFY_TREE,
+				true, mtr);
+
+			ut_ad(ret && btr_cur->low_match == n_fields);
+		}
+	}
+
+	ret = rtr_compare_cursor_rec(
+		index, btr_cur, page_no, &heap);
+
+	ut_ad(ret);
+
+func_exit:
+	if (heap) {
+		mem_heap_free(heap);
+	}
+
+	if (new_rtr && btr_cur->rtr_info) {
+		rtr_clean_rtr_info(btr_cur->rtr_info, true);
+		btr_cur->rtr_info = NULL;
+	}
+}
+
+/*******************************************************************//**
+Create a RTree search info structure */
+rtr_info_t*
+rtr_create_rtr_info(
+/******************/
+	bool		need_prdt,	/*!< in: Whether predicate lock
+					is needed */
+	bool		init_matches,	/*!< in: Whether to initiate the
+					"matches" structure for collecting
+					matched leaf records */
+	btr_cur_t*	cursor,		/*!< in: tree search cursor */
+	dict_index_t*	index)		/*!< in: index struct */
+{
+	rtr_info_t*	rtr_info;
+
+	index = index ? index : cursor->index;
+	ut_ad(index);
+
+	rtr_info = static_cast<rtr_info_t*>(ut_zalloc_nokey(sizeof(*rtr_info)));
+
+	rtr_info->allocated = true;
+	rtr_info->cursor = cursor;
+	rtr_info->index = index;
+
+	if (init_matches) {
+		rtr_info->heap = mem_heap_create(sizeof(*(rtr_info->matches)));
+		rtr_info->matches = static_cast<matched_rec_t*>(
+					mem_heap_zalloc(
+						rtr_info->heap,
+						sizeof(*rtr_info->matches)));
+
+		rtr_info->matches->matched_recs
+			= UT_NEW_NOKEY(rtr_rec_vector());
+
+		rtr_info->matches->bufp = page_align(rtr_info->matches->rec_buf
+						     + UNIV_PAGE_SIZE_MAX + 1);
+		mutex_create(LATCH_ID_RTR_MATCH_MUTEX,
+			     &rtr_info->matches->rtr_match_mutex);
+		rw_lock_create(PFS_NOT_INSTRUMENTED,
+			       &(rtr_info->matches->block.lock),
+			      SYNC_LEVEL_VARYING);
+	}
+
+	rtr_info->path = UT_NEW_NOKEY(rtr_node_path_t());
+	rtr_info->parent_path = UT_NEW_NOKEY(rtr_node_path_t());
+	rtr_info->need_prdt_lock = need_prdt;
+	mutex_create(LATCH_ID_RTR_PATH_MUTEX,
+		     &rtr_info->rtr_path_mutex);
+
+	mutex_enter(&index->rtr_track->rtr_active_mutex);
+	index->rtr_track->rtr_active->push_back(rtr_info);
+	mutex_exit(&index->rtr_track->rtr_active_mutex);
+	return(rtr_info);
+}
+
+/*******************************************************************//**
+Update a btr_cur_t with rtr_info */
+void
+rtr_info_update_btr(
+/******************/
+	btr_cur_t*	cursor,		/*!< in/out: tree cursor */
+	rtr_info_t*	rtr_info)	/*!< in: rtr_info to set to the
+					cursor */
+{
+	ut_ad(rtr_info);
+
+	cursor->rtr_info = rtr_info;
+}
+
+/*******************************************************************//**
+Initialize a R-Tree Search structure */
+void
+rtr_init_rtr_info(
+/****************/
+	rtr_info_t*	rtr_info,	/*!< in: rtr_info to set to the
+					cursor */
+	bool		need_prdt,	/*!< in: Whether predicate lock is
+					needed */
+	btr_cur_t*	cursor,		/*!< in: tree search cursor */
+	dict_index_t*	index,		/*!< in: index structure */
+	bool		reinit)		/*!< in: Whether this is a reinit */
+{
+	ut_ad(rtr_info);
+
+	if (!reinit) {
+		/* Reset all members. */
+		rtr_info->path = NULL;
+		rtr_info->parent_path = NULL;
+		rtr_info->matches = NULL;
+
+		mutex_create(LATCH_ID_RTR_PATH_MUTEX,
+			     &rtr_info->rtr_path_mutex);
+
+		memset(rtr_info->tree_blocks, 0x0,
+		       sizeof(rtr_info->tree_blocks));
+		memset(rtr_info->tree_savepoints, 0x0,
+		       sizeof(rtr_info->tree_savepoints));
+		rtr_info->mbr.xmin = 0.0;
+		rtr_info->mbr.xmax = 0.0;
+		rtr_info->mbr.ymin = 0.0;
+		rtr_info->mbr.ymax = 0.0;
+		rtr_info->thr = NULL;
+		rtr_info->heap = NULL;
+		rtr_info->cursor = NULL;
+		rtr_info->index = NULL;
+		rtr_info->need_prdt_lock = false;
+		rtr_info->need_page_lock = false;
+		rtr_info->allocated = false;
+		rtr_info->mbr_adj = false;
+		rtr_info->fd_del = false;
+		rtr_info->search_tuple = NULL;
+		rtr_info->search_mode = PAGE_CUR_UNSUPP;
+	}
+
+	ut_ad(!rtr_info->matches || rtr_info->matches->matched_recs->empty());
+
+	rtr_info->path = UT_NEW_NOKEY(rtr_node_path_t());
+	rtr_info->parent_path = UT_NEW_NOKEY(rtr_node_path_t());
+	rtr_info->need_prdt_lock = need_prdt;
+	rtr_info->cursor = cursor;
+	rtr_info->index = index;
+
+	mutex_enter(&index->rtr_track->rtr_active_mutex);
+	index->rtr_track->rtr_active->push_back(rtr_info);
+	mutex_exit(&index->rtr_track->rtr_active_mutex);
+}
+
+/**************************************************************//**
+Clean up R-Tree search structure */
+void
+rtr_clean_rtr_info(
+/*===============*/
+	rtr_info_t*	rtr_info,	/*!< in: RTree search info */
+	bool		free_all)	/*!< in: need to free rtr_info itself */
+{
+	dict_index_t*	index;
+	bool		initialized = false;
+
+	if (!rtr_info) {
+		return;
+	}
+
+	index = rtr_info->index;
+
+	if (index) {
+		mutex_enter(&index->rtr_track->rtr_active_mutex);
+	}
+
+	while (rtr_info->parent_path && !rtr_info->parent_path->empty()) {
+		btr_pcur_t*	cur = rtr_info->parent_path->back().cursor;
+		rtr_info->parent_path->pop_back();
+
+		if (cur) {
+			btr_pcur_close(cur);
+			ut_free(cur);
+		}
+	}
+
+	UT_DELETE(rtr_info->parent_path);
+	rtr_info->parent_path = NULL;
+
+	if (rtr_info->path != NULL) {
+		UT_DELETE(rtr_info->path);
+		rtr_info->path = NULL;
+		initialized = true;
+	}
+
+	if (rtr_info->matches) {
+		rtr_info->matches->used = false;
+		rtr_info->matches->locked = false;
+		rtr_info->matches->valid = false;
+		rtr_info->matches->matched_recs->clear();
+	}
+
+	if (index) {
+		index->rtr_track->rtr_active->remove(rtr_info);
+		mutex_exit(&index->rtr_track->rtr_active_mutex);
+	}
+
+	if (free_all) {
+		if (rtr_info->matches) {
+			if (rtr_info->matches->matched_recs != NULL) {
+				UT_DELETE(rtr_info->matches->matched_recs);
+			}
+
+			rw_lock_free(&(rtr_info->matches->block.lock));
+
+			mutex_destroy(&rtr_info->matches->rtr_match_mutex);
+		}
+
+		if (rtr_info->heap) {
+			mem_heap_free(rtr_info->heap);
+		}
+
+		if (initialized) {
+			mutex_destroy(&rtr_info->rtr_path_mutex);
+		}
+
+		if (rtr_info->allocated) {
+			ut_free(rtr_info);
+		}
+	}
+}
+
+/**************************************************************//**
+Rebuilt the "path" to exclude the removing page no */
+static
+void
+rtr_rebuild_path(
+/*=============*/
+	rtr_info_t*	rtr_info,	/*!< in: RTree search info */
+	ulint		page_no)	/*!< in: need to free rtr_info itself */
+{
+	rtr_node_path_t*		new_path
+		= UT_NEW_NOKEY(rtr_node_path_t());
+
+	rtr_node_path_t::iterator	rit;
+#ifdef UNIV_DEBUG
+	ulint	before_size = rtr_info->path->size();
+#endif /* UNIV_DEBUG */
+
+	for (rit = rtr_info->path->begin();
+	     rit != rtr_info->path->end(); ++rit) {
+		node_visit_t	next_rec = *rit;
+
+		if (next_rec.page_no == page_no) {
+			continue;
+		}
+
+		new_path->push_back(next_rec);
+#ifdef UNIV_DEBUG
+		node_visit_t	rec = new_path->back();
+		ut_ad(rec.level < rtr_info->cursor->tree_height
+		      && rec.page_no > 0);
+#endif /* UNIV_DEBUG */
+	}
+
+	UT_DELETE(rtr_info->path);
+
+	ut_ad(new_path->size() == before_size - 1);
+
+	rtr_info->path = new_path;
+
+	if (!rtr_info->parent_path->empty()) {
+		rtr_node_path_t*	new_parent_path = UT_NEW_NOKEY(
+			rtr_node_path_t());
+
+		for (rit = rtr_info->parent_path->begin();
+		     rit != rtr_info->parent_path->end(); ++rit) {
+			node_visit_t	next_rec = *rit;
+
+			if (next_rec.child_no == page_no) {
+				btr_pcur_t*	cur = next_rec.cursor;
+
+				if (cur) {
+					btr_pcur_close(cur);
+					ut_free(cur);
+				}
+
+				continue;
+			}
+
+			new_parent_path->push_back(next_rec);
+		}
+		UT_DELETE(rtr_info->parent_path);
+		rtr_info->parent_path = new_parent_path;
+	}
+
+}
+
+/**************************************************************//**
+Check whether a discarding page is in anyone's search path */
+void
+rtr_check_discard_page(
+/*===================*/
+	dict_index_t*	index,	/*!< in: index */
+	btr_cur_t*	cursor, /*!< in: cursor on the page to discard: not on
+				the root page */
+	buf_block_t*	block)	/*!< in: block of page to be discarded */
+{
+	ulint			pageno = block->page.id.page_no();
+	rtr_info_t*		rtr_info;
+	rtr_info_active::iterator	it;
+
+	mutex_enter(&index->rtr_track->rtr_active_mutex);
+
+	for (it = index->rtr_track->rtr_active->begin();
+	     it != index->rtr_track->rtr_active->end(); ++it) {
+		rtr_info = *it;
+		rtr_node_path_t::iterator	rit;
+		bool	found = false;
+
+		if (cursor && rtr_info == cursor->rtr_info) {
+			continue;
+		}
+
+		mutex_enter(&rtr_info->rtr_path_mutex);
+		for (rit = rtr_info->path->begin();
+		     rit != rtr_info->path->end(); ++rit) {
+			node_visit_t	node = *rit;
+
+			if (node.page_no == pageno) {
+				found = true;
+				break;
+			}
+		}
+
+		if (found) {
+			rtr_rebuild_path(rtr_info, pageno);
+		}
+		mutex_exit(&rtr_info->rtr_path_mutex);
+
+		if (rtr_info->matches) {
+			mutex_enter(&rtr_info->matches->rtr_match_mutex);
+
+			if ((&rtr_info->matches->block)->page.id.page_no()
+			     == pageno) {
+				if (!rtr_info->matches->matched_recs->empty()) {
+					rtr_info->matches->matched_recs->clear();
+				}
+				ut_ad(rtr_info->matches->matched_recs->empty());
+				rtr_info->matches->valid = false;
+			}
+
+			mutex_exit(&rtr_info->matches->rtr_match_mutex);
+		}
+	}
+
+	mutex_exit(&index->rtr_track->rtr_active_mutex);
+
+	lock_mutex_enter();
+	lock_prdt_page_free_from_discard(block, lock_sys->prdt_hash);
+	lock_prdt_page_free_from_discard(block, lock_sys->prdt_page_hash);
+	lock_mutex_exit();
+}
+
+/**************************************************************//**
+Restores the stored position of a persistent cursor bufferfixing the page */
+bool
+rtr_cur_restore_position_func(
+/*==========================*/
+	ulint		latch_mode,	/*!< in: BTR_CONT_MODIFY_TREE, ... */
+	btr_cur_t*	btr_cur,	/*!< in: detached persistent cursor */
+	ulint		level,		/*!< in: index level */
+	const char*	file,		/*!< in: file name */
+	ulint		line,		/*!< in: line where called */
+	mtr_t*		mtr)		/*!< in: mtr */
+{
+	dict_index_t*	index;
+	mem_heap_t*	heap;
+	btr_pcur_t*	r_cursor = rtr_get_parent_cursor(btr_cur, level, false);
+	dtuple_t*	tuple;
+	bool		ret = false;
+
+	ut_ad(mtr);
+	ut_ad(r_cursor);
+	ut_ad(mtr->is_active());
+
+	index = btr_cur_get_index(btr_cur);
+
+	if (r_cursor->rel_pos == BTR_PCUR_AFTER_LAST_IN_TREE
+	    || r_cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE) {
+		return(false);
+	}
+
+	DBUG_EXECUTE_IF(
+		"rtr_pessimistic_position",
+		r_cursor->modify_clock = 100;
+	);
+
+	ut_ad(latch_mode == BTR_CONT_MODIFY_TREE);
+
+	if (!buf_pool_is_obsolete(r_cursor->withdraw_clock)
+	    && buf_page_optimistic_get(RW_X_LATCH,
+				    r_cursor->block_when_stored,
+				    r_cursor->modify_clock, file, line, mtr)) {
+		ut_ad(r_cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+
+		ut_ad(r_cursor->rel_pos == BTR_PCUR_ON);
+#ifdef UNIV_DEBUG
+		do {
+			const rec_t*	rec;
+			const ulint*	offsets1;
+			const ulint*	offsets2;
+			ulint		comp;
+
+			rec = btr_pcur_get_rec(r_cursor);
+
+			heap = mem_heap_create(256);
+			offsets1 = rec_get_offsets(
+				r_cursor->old_rec, index, NULL,
+				r_cursor->old_n_fields, &heap);
+			offsets2 = rec_get_offsets(
+				rec, index, NULL,
+				r_cursor->old_n_fields, &heap);
+
+			comp = rec_offs_comp(offsets1);
+
+			if (rec_get_info_bits(r_cursor->old_rec, comp)
+			    & REC_INFO_MIN_REC_FLAG) {
+				ut_ad(rec_get_info_bits(rec, comp)
+					& REC_INFO_MIN_REC_FLAG);
+			} else {
+
+				ut_ad(!cmp_rec_rec(r_cursor->old_rec,
+						   rec, offsets1, offsets2,
+						   index));
+			}
+
+			mem_heap_free(heap);
+		} while (0);
+#endif /* UNIV_DEBUG */
+
+		return(true);
+	}
+
+	/* Page has changed, for R-Tree, the page cannot be shrunk away,
+	so we search the page and its right siblings */
+	buf_block_t*	block;
+	node_seq_t	page_ssn;
+	const page_t*	page;
+	page_cur_t*	page_cursor;
+	node_visit_t*	node = rtr_get_parent_node(btr_cur, level, false);
+	ulint		space = dict_index_get_space(index);
+	node_seq_t	path_ssn = node->seq_no;
+	page_size_t	page_size = dict_table_page_size(index->table);
+
+	ulint		page_no = node->page_no;
+
+	heap = mem_heap_create(256);
+
+	tuple = dict_index_build_data_tuple(index, r_cursor->old_rec,
+					    r_cursor->old_n_fields, heap);
+
+	page_cursor = btr_pcur_get_page_cur(r_cursor);
+	ut_ad(r_cursor == node->cursor);
+
+search_again:
+	page_id_t	page_id(space, page_no);
+	dberr_t err = DB_SUCCESS;
+
+	block = buf_page_get_gen(
+		page_id, page_size, RW_X_LATCH, NULL,
+		BUF_GET, __FILE__, __LINE__, mtr, &err);
+
+	ut_ad(block);
+
+	/* Get the page SSN */
+	page = buf_block_get_frame(block);
+	page_ssn = page_get_ssn_id(page);
+
+	ulint low_match = page_cur_search(
+				block, index, tuple, PAGE_CUR_LE, page_cursor);
+
+	if (low_match == r_cursor->old_n_fields) {
+		const rec_t*	rec;
+		const ulint*	offsets1;
+		const ulint*	offsets2;
+		ulint		comp;
+
+		rec = btr_pcur_get_rec(r_cursor);
+
+		offsets1 = rec_get_offsets(
+			r_cursor->old_rec, index, NULL,
+			r_cursor->old_n_fields, &heap);
+		offsets2 = rec_get_offsets(
+			rec, index, NULL,
+			r_cursor->old_n_fields, &heap);
+
+		comp = rec_offs_comp(offsets1);
+
+		if ((rec_get_info_bits(r_cursor->old_rec, comp)
+		     & REC_INFO_MIN_REC_FLAG)
+		    && (rec_get_info_bits(rec, comp) & REC_INFO_MIN_REC_FLAG)) {
+			r_cursor->pos_state = BTR_PCUR_IS_POSITIONED;
+			ret = true;
+		} else if (!cmp_rec_rec(r_cursor->old_rec, rec, offsets1, offsets2,
+				 index)) {
+			r_cursor->pos_state = BTR_PCUR_IS_POSITIONED;
+			ret = true;
+		}
+	}
+
+	/* Check the page SSN to see if it has been splitted, if so, search
+	the right page */
+	if (!ret && page_ssn > path_ssn) {
+		page_no = btr_page_get_next(page, mtr);
+		goto search_again;
+	}
+
+	mem_heap_free(heap);
+
+	return(ret);
+}
+
+/****************************************************************//**
+Copy the leaf level R-tree record, and push it to matched_rec in rtr_info */
+static
+void
+rtr_leaf_push_match_rec(
+/*====================*/
+	const rec_t*	rec,		/*!< in: record to copy */
+	rtr_info_t*	rtr_info,	/*!< in/out: search stack */
+	ulint*		offsets,	/*!< in: offsets */
+	bool		is_comp)	/*!< in: is compact format */
+{
+	byte*		buf;
+	matched_rec_t*	match_rec = rtr_info->matches;
+	rec_t*		copy;
+	ulint		data_len;
+	rtr_rec_t	rtr_rec;
+
+	buf = match_rec->block.frame + match_rec->used;
+
+	copy = rec_copy(buf, rec, offsets);
+
+	if (is_comp) {
+		rec_set_next_offs_new(copy, PAGE_NEW_SUPREMUM);
+	} else {
+		rec_set_next_offs_old(copy, PAGE_OLD_SUPREMUM);
+	}
+
+	rtr_rec.r_rec = copy;
+	rtr_rec.locked = false;
+
+	match_rec->matched_recs->push_back(rtr_rec);
+	match_rec->valid = true;
+
+	data_len = rec_offs_data_size(offsets) + rec_offs_extra_size(offsets);
+	match_rec->used += data_len;
+
+	ut_ad(match_rec->used < UNIV_PAGE_SIZE);
+}
+
+/**************************************************************//**
+Store the parent path cursor
+@return number of cursor stored */
+ulint
+rtr_store_parent_path(
+/*==================*/
+	const buf_block_t*	block,	/*!< in: block of the page */
+	btr_cur_t*		btr_cur,/*!< in/out: persistent cursor */
+	ulint			latch_mode,
+					/*!< in: latch_mode */
+	ulint			level,	/*!< in: index level */
+	mtr_t*			mtr)	/*!< in: mtr */
+{
+	ulint	num = btr_cur->rtr_info->parent_path->size();
+	ulint	num_stored = 0;
+
+	while (num >= 1) {
+		node_visit_t*	node = &(*btr_cur->rtr_info->parent_path)[
+					num - 1];
+		btr_pcur_t*	r_cursor = node->cursor;
+		buf_block_t*	cur_block;
+
+		if (node->level > level) {
+			break;
+		}
+
+		r_cursor->pos_state = BTR_PCUR_IS_POSITIONED;
+		r_cursor->latch_mode = latch_mode;
+
+		cur_block = btr_pcur_get_block(r_cursor);
+
+		if (cur_block == block) {
+			btr_pcur_store_position(r_cursor, mtr);
+			num_stored++;
+		} else {
+			break;
+		}
+
+		num--;
+	}
+
+	return(num_stored);
+}
+/**************************************************************//**
+push a nonleaf index node to the search path for insertion */
+static
+void
+rtr_non_leaf_insert_stack_push(
+/*===========================*/
+	dict_index_t*		index,	/*!< in: index descriptor */
+	rtr_node_path_t*	path,	/*!< in/out: search path */
+	ulint			level,	/*!< in: index page level */
+	ulint			child_no,/*!< in: child page no */
+	const buf_block_t*	block,	/*!< in: block of the page */
+	const rec_t*		rec,	/*!< in: positioned record */
+	double			mbr_inc)/*!< in: MBR needs to be enlarged */
+{
+	node_seq_t	new_seq;
+	btr_pcur_t*	my_cursor;
+	ulint		page_no = block->page.id.page_no();
+
+	my_cursor = static_cast<btr_pcur_t*>(
+		ut_malloc_nokey(sizeof(*my_cursor)));
+
+	btr_pcur_init(my_cursor);
+
+	page_cur_position(rec, block, btr_pcur_get_page_cur(my_cursor));
+
+	(btr_pcur_get_btr_cur(my_cursor))->index = index;
+
+	new_seq = rtr_get_current_ssn_id(index);
+	rtr_non_leaf_stack_push(path, page_no, new_seq, level, child_no,
+				my_cursor, mbr_inc);
+}
+
+/** Copy a buf_block_t strcuture, except "block->lock" and "block->mutex".
+@param[in,out]	matches	copy to match->block
+@param[in]	block	block to copy */
+static
+void
+rtr_copy_buf(
+	matched_rec_t*		matches,
+	const buf_block_t*	block)
+{
+	/* Copy all members of "block" to "matches->block" except "mutex"
+	and "lock". We skip "mutex" and "lock" because they are not used
+	from the dummy buf_block_t we create here and because memcpy()ing
+	them generates (valid) compiler warnings that the vtable pointer
+	will be copied. It is also undefined what will happen with the
+	newly memcpy()ed mutex if the source mutex was acquired by
+	(another) thread while it was copied. */
+	memcpy(&matches->block.page, &block->page, sizeof(buf_page_t));
+	matches->block.frame = block->frame;
+#ifndef UNIV_HOTBACKUP
+	matches->block.unzip_LRU = block->unzip_LRU;
+
+	ut_d(matches->block.in_unzip_LRU_list = block->in_unzip_LRU_list);
+	ut_d(matches->block.in_withdraw_list = block->in_withdraw_list);
+
+	/* Skip buf_block_t::mutex */
+	/* Skip buf_block_t::lock */
+	matches->block.lock_hash_val = block->lock_hash_val;
+	matches->block.modify_clock = block->modify_clock;
+	matches->block.n_hash_helps = block->n_hash_helps;
+	matches->block.n_fields = block->n_fields;
+	matches->block.left_side = block->left_side;
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+	matches->block.n_pointers = block->n_pointers;
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+	matches->block.curr_n_fields = block->curr_n_fields;
+	matches->block.curr_left_side = block->curr_left_side;
+	matches->block.index = block->index;
+	matches->block.made_dirty_with_no_latch
+		= block->made_dirty_with_no_latch;
+
+	ut_d(matches->block.debug_latch = block->debug_latch);
+
+#endif /* !UNIV_HOTBACKUP */
+}
+
+/****************************************************************//**
+Generate a shadow copy of the page block header to save the
+matched records */
+static
+void
+rtr_init_match(
+/*===========*/
+	matched_rec_t*		matches,/*!< in/out: match to initialize */
+	const buf_block_t*	block,	/*!< in: buffer block */
+	const page_t*		page)	/*!< in: buffer page */
+{
+	ut_ad(matches->matched_recs->empty());
+	matches->locked = false;
+	rtr_copy_buf(matches, block);
+	matches->block.frame = matches->bufp;
+	matches->valid = false;
+	/* We have to copy PAGE_W*_SUPREMUM_END bytes so that we can
+	use infimum/supremum of this page as normal btr page for search. */
+	memcpy(matches->block.frame, page, page_is_comp(page)
+						? PAGE_NEW_SUPREMUM_END
+						: PAGE_OLD_SUPREMUM_END);
+	matches->used = page_is_comp(page)
+				? PAGE_NEW_SUPREMUM_END
+				: PAGE_OLD_SUPREMUM_END;
+#ifdef RTR_SEARCH_DIAGNOSTIC
+	ulint pageno = page_get_page_no(page);
+	fprintf(stderr, "INNODB_RTR: Searching leaf page %d\n",
+		static_cast<int>(pageno));
+#endif /* RTR_SEARCH_DIAGNOSTIC */
+}
+
+/****************************************************************//**
+Get the bounding box content from an index record */
+void
+rtr_get_mbr_from_rec(
+/*=================*/
+	const rec_t*	rec,	/*!< in: data tuple */
+	const ulint*	offsets,/*!< in: offsets array */
+	rtr_mbr_t*	mbr)	/*!< out MBR */
+{
+	ulint		rec_f_len;
+	const byte*	data;
+
+	data = rec_get_nth_field(rec, offsets, 0, &rec_f_len);
+
+	rtr_read_mbr(data, mbr);
+}
+
+/****************************************************************//**
+Get the bounding box content from a MBR data record */
+void
+rtr_get_mbr_from_tuple(
+/*===================*/
+	const dtuple_t* dtuple, /*!< in: data tuple */
+	rtr_mbr*	mbr)	/*!< out: mbr to fill */
+{
+	const dfield_t* dtuple_field;
+        ulint           dtuple_f_len;
+	byte*		data;
+
+	dtuple_field = dtuple_get_nth_field(dtuple, 0);
+	dtuple_f_len = dfield_get_len(dtuple_field);
+	ut_a(dtuple_f_len >= 4 * sizeof(double));
+
+	data = static_cast<byte*>(dfield_get_data(dtuple_field));
+
+	rtr_read_mbr(data, mbr);
+}
+
+/****************************************************************//**
+Searches the right position in rtree for a page cursor. */
+bool
+rtr_cur_search_with_match(
+/*======================*/
+	const buf_block_t*	block,	/*!< in: buffer block */
+	dict_index_t*		index,	/*!< in: index descriptor */
+	const dtuple_t*		tuple,	/*!< in: data tuple */
+	page_cur_mode_t		mode,	/*!< in: PAGE_CUR_RTREE_INSERT,
+					PAGE_CUR_RTREE_LOCATE etc. */
+	page_cur_t*		cursor,	/*!< in/out: page cursor */
+	rtr_info_t*		rtr_info)/*!< in/out: search stack */
+{
+	bool		found = false;
+	const page_t*	page;
+	const rec_t*	rec;
+	const rec_t*	last_rec;
+	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
+	ulint*		offsets		= offsets_;
+	mem_heap_t*	heap = NULL;
+	int		cmp = 1;
+	bool		is_leaf;
+	double		least_inc = DBL_MAX;
+	const rec_t*	best_rec;
+	const rec_t*	last_match_rec = NULL;
+	ulint		level;
+	bool		match_init = false;
+	ulint		space = block->page.id.space();
+	page_cur_mode_t	orig_mode = mode;
+	const rec_t*	first_rec = NULL;
+
+	rec_offs_init(offsets_);
+
+	ut_ad(RTREE_SEARCH_MODE(mode));
+
+	ut_ad(dict_index_is_spatial(index));
+
+	page = buf_block_get_frame(block);
+
+	is_leaf = page_is_leaf(page);
+	level = btr_page_get_level(page, mtr);
+
+	if (mode == PAGE_CUR_RTREE_LOCATE) {
+		ut_ad(level != 0);
+		mode = PAGE_CUR_WITHIN;
+	}
+
+	rec = page_dir_slot_get_rec(page_dir_get_nth_slot(page, 0));
+
+	last_rec = rec;
+	best_rec = rec;
+
+	if (page_rec_is_infimum(rec)) {
+		rec = page_rec_get_next_const(rec);
+	}
+
+	/* Check insert tuple size is larger than first rec, and try to
+	avoid it if possible */
+	if (mode == PAGE_CUR_RTREE_INSERT && !page_rec_is_supremum(rec)) {
+
+		ulint	new_rec_size = rec_get_converted_size(index, tuple, 0);
+
+		offsets = rec_get_offsets(rec, index, offsets,
+					  dtuple_get_n_fields_cmp(tuple),
+					  &heap);
+
+		if (rec_offs_size(offsets) < new_rec_size) {
+			first_rec = rec;
+		}
+
+		/* If this is the left-most page of this index level
+		and the table is a compressed table, try to avoid
+		first page as much as possible, as there will be problem
+		when update MIN_REC rec in compress table */
+		if (buf_block_get_page_zip(block)
+		    && mach_read_from_4(page + FIL_PAGE_PREV) == FIL_NULL
+		    && page_get_n_recs(page) >= 2) {
+
+			rec = page_rec_get_next_const(rec);
+		}
+	}
+
+	while (!page_rec_is_supremum(rec)) {
+		offsets = rec_get_offsets(rec, index, offsets,
+					  dtuple_get_n_fields_cmp(tuple),
+					  &heap);
+		if (!is_leaf) {
+			switch (mode) {
+			case PAGE_CUR_CONTAIN:
+			case PAGE_CUR_INTERSECT:
+			case PAGE_CUR_MBR_EQUAL:
+				/* At non-leaf level, we will need to check
+				both CONTAIN and INTERSECT for either of
+				the search mode */
+				cmp = cmp_dtuple_rec_with_gis(
+					tuple, rec, offsets, PAGE_CUR_CONTAIN);
+
+				if (cmp != 0) {
+					cmp = cmp_dtuple_rec_with_gis(
+						tuple, rec, offsets,
+						PAGE_CUR_INTERSECT);
+				}
+				break;
+			case PAGE_CUR_DISJOINT:
+				cmp = cmp_dtuple_rec_with_gis(
+					tuple, rec, offsets, mode);
+
+				if (cmp != 0) {
+					cmp = cmp_dtuple_rec_with_gis(
+						tuple, rec, offsets,
+						PAGE_CUR_INTERSECT);
+				}
+				break;
+			case PAGE_CUR_RTREE_INSERT:
+				double	increase;
+				double	area;
+
+				cmp = cmp_dtuple_rec_with_gis(
+					tuple, rec, offsets, PAGE_CUR_WITHIN);
+
+				if (cmp != 0) {
+					increase = rtr_rec_cal_increase(
+						tuple, rec, offsets, &area);
+					/* Once it goes beyond DBL_MAX,
+					it would not make sense to record
+					such value, just make it
+					DBL_MAX / 2  */
+					if (increase >= DBL_MAX) {
+						increase = DBL_MAX / 2;
+					}
+
+					if (increase < least_inc) {
+						least_inc = increase;
+						best_rec = rec;
+					} else if (best_rec
+						   && best_rec == first_rec) {
+						/* if first_rec is set,
+						we will try to avoid it */
+						least_inc = increase;
+						best_rec = rec;
+					}
+				}
+				break;
+			case PAGE_CUR_RTREE_GET_FATHER:
+				cmp = cmp_dtuple_rec_with_gis_internal(
+					tuple, rec, offsets);
+				break;
+			default:
+				/* WITHIN etc. */
+				cmp = cmp_dtuple_rec_with_gis(
+					tuple, rec, offsets, mode);
+			}
+		} else {
+			/* At leaf level, INSERT should translate to LE */
+			ut_ad(mode != PAGE_CUR_RTREE_INSERT);
+
+			cmp = cmp_dtuple_rec_with_gis(
+				tuple, rec, offsets, mode);
+		}
+
+		if (cmp == 0) {
+			found = true;
+
+			/* If located, the matching node/rec will be pushed
+			to rtr_info->path for non-leaf nodes, or
+			rtr_info->matches for leaf nodes */
+			if (rtr_info && mode != PAGE_CUR_RTREE_INSERT) {
+				if (!is_leaf) {
+					ulint		page_no;
+					node_seq_t	new_seq;
+					bool		is_loc;
+
+					is_loc = (orig_mode
+						  == PAGE_CUR_RTREE_LOCATE
+						  || orig_mode
+						  == PAGE_CUR_RTREE_GET_FATHER);
+
+					offsets = rec_get_offsets(
+						rec, index, offsets,
+						ULINT_UNDEFINED, &heap);
+
+					page_no = btr_node_ptr_get_child_page_no(
+						rec, offsets);
+
+					ut_ad(level >= 1);
+
+					/* Get current SSN, before we insert
+					it into the path stack */
+					new_seq = rtr_get_current_ssn_id(index);
+
+					rtr_non_leaf_stack_push(
+						rtr_info->path,
+						page_no,
+						new_seq, level - 1, 0,
+						NULL, 0);
+
+					if (is_loc) {
+						rtr_non_leaf_insert_stack_push(
+							index,
+							rtr_info->parent_path,
+							level, page_no, block,
+							rec, 0);
+					}
+
+					if (!srv_read_only_mode
+					    && (rtr_info->need_page_lock
+						|| !is_loc)) {
+
+						/* Lock the page, preventing it
+						from being shrunk */
+						lock_place_prdt_page_lock(
+							space, page_no, index,
+							rtr_info->thr);
+					}
+				} else {
+					ut_ad(orig_mode
+					      != PAGE_CUR_RTREE_LOCATE);
+
+					if (!match_init) {
+						rtr_init_match(
+							rtr_info->matches,
+							block, page);
+						match_init = true;
+					}
+
+					/* Collect matched records on page */
+					offsets = rec_get_offsets(
+						rec, index, offsets,
+						ULINT_UNDEFINED, &heap);
+					rtr_leaf_push_match_rec(
+						rec, rtr_info, offsets,
+						page_is_comp(page));
+				}
+
+				last_match_rec = rec;
+			} else {
+				/* This is the insertion case, it will break
+				once it finds the first MBR that can accomodate
+				the inserting rec */
+				break;
+			}
+		}
+
+		last_rec = rec;
+
+		rec = page_rec_get_next_const(rec);
+	}
+
+	/* All records on page are searched */
+	if (page_rec_is_supremum(rec)) {
+		if (!is_leaf) {
+			if (!found) {
+				/* No match case, if it is for insertion,
+				then we select the record that result in
+				least increased area */
+				if (mode == PAGE_CUR_RTREE_INSERT) {
+					ulint	child_no;
+					ut_ad(least_inc < DBL_MAX);
+					offsets = rec_get_offsets(
+						best_rec, index,
+						offsets, ULINT_UNDEFINED,
+						&heap);
+					child_no =
+					btr_node_ptr_get_child_page_no(
+						best_rec, offsets);
+
+					rtr_non_leaf_insert_stack_push(
+						index, rtr_info->parent_path,
+						level, child_no, block,
+						best_rec, least_inc);
+
+					page_cur_position(best_rec, block,
+							  cursor);
+					rtr_info->mbr_adj = true;
+				} else {
+					/* Position at the last rec of the
+					page, if it is not the leaf page */
+					page_cur_position(last_rec, block,
+							  cursor);
+				}
+			} else {
+				/* There are matching records, position
+				in the last matching records */
+				if (rtr_info) {
+					rec = last_match_rec;
+					page_cur_position(
+						rec, block, cursor);
+				}
+			}
+		} else if (rtr_info) {
+			/* Leaf level, no match, position at the
+			last (supremum) rec */
+			if (!last_match_rec) {
+				page_cur_position(rec, block, cursor);
+				goto func_exit;
+			}
+
+			/* There are matched records */
+			matched_rec_t*	match_rec = rtr_info->matches;
+
+			rtr_rec_t	test_rec;
+
+			test_rec = match_rec->matched_recs->back();
+#ifdef UNIV_DEBUG
+			ulint		offsets_2[REC_OFFS_NORMAL_SIZE];
+			ulint*		offsets2	= offsets_2;
+			rec_offs_init(offsets_2);
+
+			ut_ad(found);
+
+			/* Verify the record to be positioned is the same
+			as the last record in matched_rec vector */
+			offsets2 = rec_get_offsets(test_rec.r_rec, index,
+						   offsets2, ULINT_UNDEFINED,
+						   &heap);
+
+			offsets = rec_get_offsets(last_match_rec, index,
+						  offsets, ULINT_UNDEFINED,
+						  &heap);
+
+			ut_ad(cmp_rec_rec(test_rec.r_rec, last_match_rec,
+					  offsets2, offsets, index) == 0);
+#endif /* UNIV_DEBUG */
+			/* Pop the last match record and position on it */
+			match_rec->matched_recs->pop_back();
+			page_cur_position(test_rec.r_rec, &match_rec->block,
+					  cursor);
+		}
+	} else {
+
+		if (mode == PAGE_CUR_RTREE_INSERT) {
+			ulint	child_no;
+			ut_ad(!last_match_rec && rec);
+
+			offsets = rec_get_offsets(
+				rec, index, offsets, ULINT_UNDEFINED, &heap);
+
+			child_no = btr_node_ptr_get_child_page_no(rec, offsets);
+
+			rtr_non_leaf_insert_stack_push(
+				index, rtr_info->parent_path, level, child_no,
+				block, rec, 0);
+
+		} else if (rtr_info && found && !is_leaf) {
+			rec = last_match_rec;
+		}
+
+		page_cur_position(rec, block, cursor);
+	}
+
+#ifdef UNIV_DEBUG
+	/* Verify that we are positioned at the same child page as pushed in
+	the path stack */
+	if (!is_leaf && (!page_rec_is_supremum(rec) || found)
+	    && mode != PAGE_CUR_RTREE_INSERT) {
+		ulint		page_no;
+
+		offsets = rec_get_offsets(rec, index, offsets,
+					  ULINT_UNDEFINED, &heap);
+		page_no = btr_node_ptr_get_child_page_no(rec, offsets);
+
+		if (rtr_info && found) {
+			rtr_node_path_t*	path = rtr_info->path;
+			node_visit_t		last_visit = path->back();
+
+			ut_ad(last_visit.page_no == page_no);
+		}
+	}
+#endif /* UNIV_DEBUG */
+
+func_exit:
+	if (UNIV_LIKELY_NULL(heap)) {
+		mem_heap_free(heap);
+	}
+
+	return(found);
+}
diff --git a/storage/innobase/ha/ha0ha.cc b/storage/innobase/ha/ha0ha.cc
index ae1eb55982a..f57a6d383d9 100644
--- a/storage/innobase/ha/ha0ha.cc
+++ b/storage/innobase/ha/ha0ha.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -38,27 +38,19 @@ Created 8/22/1994 Heikki Tuuri
 /*************************************************************//**
 Creates a hash table with at least n array cells.  The actual number
 of cells is chosen to be a prime number slightly bigger than n.
-@return	own: created table */
-UNIV_INTERN
+@return own: created table */
 hash_table_t*
-ha_create_func(
-/*===========*/
-	ulint	n,		/*!< in: number of array cells */
-#ifdef UNIV_SYNC_DEBUG
-	ulint	sync_level,	/*!< in: level of the mutexes or rw_locks
-				in the latching order: this is used in the
-				 debug version */
-#endif /* UNIV_SYNC_DEBUG */
-	ulint	n_sync_obj,	/*!< in: number of mutexes or rw_locks
-				to protect the hash table: must be a
-				power of 2, or 0 */
-	ulint	type)		/*!< in: type of datastructure for which
-				the memory heap is going to be used e.g.:
-				MEM_HEAP_FOR_BTR_SEARCH or
+ib_create(
+/*======*/
+	ulint		n,	/*!< in: number of array cells */
+	latch_id_t	id,	/*!< in: latch ID */
+	ulint		n_sync_obj,
+				/*!< in: number of mutexes to protect the
+				hash table: must be a power of 2, or 0 */
+	ulint		type)	/*!< in: type of datastructure for which
 				MEM_HEAP_FOR_PAGE_HASH */
 {
 	hash_table_t*	table;
-	ulint		i;
 
 	ut_a(type == MEM_HEAP_FOR_BTR_SEARCH
 	     || type == MEM_HEAP_FOR_PAGE_HASH);
@@ -71,7 +63,10 @@ ha_create_func(
 
 	if (n_sync_obj == 0) {
 		table->heap = mem_heap_create_typed(
-			ut_min(4096, MEM_MAX_ALLOC_IN_BUF), type);
+			ut_min(static_cast<ulint>(4096),
+				MEM_MAX_ALLOC_IN_BUF / 2
+				- MEM_BLOCK_HEADER_SIZE - MEM_SPACE_NEEDED(0)),
+			type);
 		ut_a(table->heap);
 
 		return(table);
@@ -80,61 +75,101 @@ ha_create_func(
 	if (type == MEM_HEAP_FOR_PAGE_HASH) {
 		/* We create a hash table protected by rw_locks for
 		buf_pool->page_hash. */
-		hash_create_sync_obj(table, HASH_TABLE_SYNC_RW_LOCK,
-				     n_sync_obj, sync_level);
+		hash_create_sync_obj(
+			table, HASH_TABLE_SYNC_RW_LOCK, id, n_sync_obj);
 	} else {
-		hash_create_sync_obj(table, HASH_TABLE_SYNC_MUTEX,
-				     n_sync_obj, sync_level);
+		hash_create_sync_obj(
+			table, HASH_TABLE_SYNC_MUTEX, id, n_sync_obj);
 	}
 
 	table->heaps = static_cast<mem_heap_t**>(
-		mem_alloc(n_sync_obj * sizeof(void*)));
+		ut_malloc_nokey(n_sync_obj * sizeof(void*)));
 
-	for (i = 0; i < n_sync_obj; i++) {
-		table->heaps[i] = mem_heap_create_typed(4096, type);
+	for (ulint i = 0; i < n_sync_obj; i++) {
+		table->heaps[i] = mem_heap_create_typed(
+			ut_min(static_cast<ulint>(4096),
+				MEM_MAX_ALLOC_IN_BUF / 2
+				- MEM_BLOCK_HEADER_SIZE - MEM_SPACE_NEEDED(0)),
+			type);
 		ut_a(table->heaps[i]);
 	}
 
 	return(table);
 }
 
+/** Recreate a hash table with at least n array cells. The actual number
+of cells is chosen to be a prime number slightly bigger than n.
+The new cells are all cleared. The heaps are recreated.
+The sync objects are reused.
+@param[in,out]	table	hash table to be resuzed (to be freed later)
+@param[in]	n	number of array cells
+@return	resized new table */
+hash_table_t*
+ib_recreate(
+	hash_table_t*	table,
+	ulint		n)
+{
+	/* This function is for only page_hash for now */
+	ut_ad(table->type == HASH_TABLE_SYNC_RW_LOCK);
+	ut_ad(table->n_sync_obj > 0);
+
+	hash_table_t*	new_table = hash_create(n);
+
+	new_table->type = table->type;
+	new_table->n_sync_obj = table->n_sync_obj;
+	new_table->sync_obj = table->sync_obj;
+
+	for (ulint i = 0; i < table->n_sync_obj; i++) {
+		mem_heap_free(table->heaps[i]);
+	}
+	ut_free(table->heaps);
+
+	new_table->heaps = static_cast<mem_heap_t**>(
+		ut_malloc_nokey(new_table->n_sync_obj * sizeof(void*)));
+
+	for (ulint i = 0; i < new_table->n_sync_obj; i++) {
+		new_table->heaps[i] = mem_heap_create_typed(
+			ut_min(static_cast<ulint>(4096),
+				MEM_MAX_ALLOC_IN_BUF / 2
+				- MEM_BLOCK_HEADER_SIZE - MEM_SPACE_NEEDED(0)),
+			MEM_HEAP_FOR_PAGE_HASH);
+		ut_a(new_table->heaps[i]);
+	}
+
+	return(new_table);
+}
+
 /*************************************************************//**
 Empties a hash table and frees the memory heaps. */
-UNIV_INTERN
 void
 ha_clear(
 /*=====*/
 	hash_table_t*	table)	/*!< in, own: hash table */
 {
-	ulint	i;
-	ulint	n;
-
-	ut_ad(table);
 	ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(!table->adaptive
-	       || rw_lock_own(&btr_search_latch, RW_LOCK_EXCLUSIVE));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(!table->adaptive || btr_search_own_all(RW_LOCK_X));
 
-	/* Free the memory heaps. */
-	n = table->n_sync_obj;
-
-	for (i = 0; i < n; i++) {
+	for (ulint i = 0; i < table->n_sync_obj; i++) {
 		mem_heap_free(table->heaps[i]);
 	}
 
-	if (table->heaps) {
-		mem_free(table->heaps);
-	}
+	ut_free(table->heaps);
 
 	switch (table->type) {
 	case HASH_TABLE_SYNC_MUTEX:
-		mem_free(table->sync_obj.mutexes);
+		for (ulint i = 0; i < table->n_sync_obj; ++i) {
+			mutex_destroy(&table->sync_obj.mutexes[i]);
+		}
+		ut_free(table->sync_obj.mutexes);
 		table->sync_obj.mutexes = NULL;
 		break;
 
 	case HASH_TABLE_SYNC_RW_LOCK:
-		mem_free(table->sync_obj.rw_locks);
+		for (ulint i = 0; i < table->n_sync_obj; ++i) {
+			rw_lock_free(&table->sync_obj.rw_locks[i]);
+		}
+
+		ut_free(table->sync_obj.rw_locks);
 		table->sync_obj.rw_locks = NULL;
 		break;
 
@@ -148,9 +183,9 @@ ha_clear(
 
 
 	/* Clear the hash table. */
-	n = hash_get_n_cells(table);
+	ulint	n = hash_get_n_cells(table);
 
-	for (i = 0; i < n; i++) {
+	for (ulint i = 0; i < n; i++) {
 		hash_get_nth_cell(table, i)->node = NULL;
 	}
 }
@@ -160,8 +195,7 @@ Inserts an entry into a hash table. If an entry with the same fold number
 is found, its node is updated to point to the new data, and no new node
 is inserted. If btr_search_enabled is set to FALSE, we will only allow
 updating existing nodes, but no new node is allowed to be added.
-@return	TRUE if succeed, FALSE if no more memory could be allocated */
-UNIV_INTERN
+@return TRUE if succeed, FALSE if no more memory could be allocated */
 ibool
 ha_insert_for_fold_func(
 /*====================*/
@@ -262,9 +296,27 @@ ha_insert_for_fold_func(
 	return(TRUE);
 }
 
+#ifdef UNIV_DEBUG
+/** Verify if latch corresponding to the hash table is x-latched
+@param[in]	table		hash table */
+static
+void
+ha_btr_search_latch_x_locked(const hash_table_t* table)
+{
+	ulint	i;
+	for (i = 0; i < btr_ahi_parts; ++i) {
+		if (btr_search_sys->hash_tables[i] == table) {
+			break;
+		}
+	}
+
+	ut_ad(i < btr_ahi_parts);
+	ut_ad(rw_lock_own(btr_search_latches[i], RW_LOCK_X));
+}
+#endif /* UNIV_DEBUG */
+
 /***********************************************************//**
 Deletes a hash node. */
-UNIV_INTERN
 void
 ha_delete_hash_node(
 /*================*/
@@ -273,9 +325,7 @@ ha_delete_hash_node(
 {
 	ut_ad(table);
 	ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_d(ha_btr_search_latch_x_locked(table));
 	ut_ad(btr_search_enabled);
 #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
 	if (table->adaptive) {
@@ -292,7 +342,6 @@ ha_delete_hash_node(
 Looks for an element when we know the pointer to the data, and updates
 the pointer to data, if found.
 @return TRUE if found */
-UNIV_INTERN
 ibool
 ha_search_and_update_if_found_func(
 /*===============================*/
@@ -312,9 +361,8 @@ ha_search_and_update_if_found_func(
 #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
 	ut_a(new_block->frame == page_align(new_data));
 #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+
+	ut_d(ha_btr_search_latch_x_locked(table));
 
 	if (!btr_search_enabled) {
 		return(FALSE);
@@ -343,7 +391,6 @@ ha_search_and_update_if_found_func(
 /*****************************************************************//**
 Removes from the chain determined by fold all nodes whose data pointer
 points to the page given. */
-UNIV_INTERN
 void
 ha_remove_all_nodes_to_page(
 /*========================*/
@@ -386,14 +433,13 @@ ha_remove_all_nodes_to_page(
 
 		node = ha_chain_get_next(node);
 	}
-#endif
+#endif /* UNIV_DEBUG */
 }
 
 #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
 /*************************************************************//**
 Validates a given range of the cells in hash table.
-@return	TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
 ibool
 ha_validate(
 /*========*/
@@ -421,12 +467,9 @@ ha_validate(
 		     node = node->next) {
 
 			if (hash_calc_hash(node->fold, table) != i) {
-				ut_print_timestamp(stderr);
-				fprintf(stderr,
-					"InnoDB: Error: hash table node"
-					" fold value %lu does not\n"
-					"InnoDB: match the cell number %lu.\n",
-					(ulong) node->fold, (ulong) i);
+				ib::error() << "Hash table node fold value "
+					<< node->fold << " does not match the"
+					" cell number " << i << ".";
 
 				ok = FALSE;
 			}
@@ -439,7 +482,6 @@ ha_validate(
 
 /*************************************************************//**
 Prints info of a hash table. */
-UNIV_INTERN
 void
 ha_print_info(
 /*==========*/
diff --git a/storage/innobase/ha/ha0storage.cc b/storage/innobase/ha/ha0storage.cc
index 6820591f316..a36fd573a4f 100644
--- a/storage/innobase/ha/ha0storage.cc
+++ b/storage/innobase/ha/ha0storage.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2007, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -25,7 +25,7 @@ its own storage, avoiding duplicates.
 Created September 22, 2007 Vasil Dimov
 *******************************************************/
 
-#include "univ.i"
+#include "ha_prototypes.h"
 #include "ha0storage.h"
 #include "hash0hash.h"
 #include "mem0mem.h"
@@ -83,7 +83,6 @@ data_len bytes need to be allocated) and the size of storage is going to
 become more than "memlim" then "data" is not added and NULL is returned.
 To disable this behavior "memlim" can be set to 0, which stands for
 "no limit". */
-UNIV_INTERN
 const void*
 ha_storage_put_memlim(
 /*==================*/
@@ -169,14 +168,13 @@ test_ha_storage()
 		p = ha_storage_put(storage, buf, sizeof(buf));
 
 		if (p != stored[i]) {
-
-			fprintf(stderr, "ha_storage_put() returned %p "
-				"instead of %p, i=%d\n", p, stored[i], i);
+			ib::warn() << "ha_storage_put() returned " << p
+				<< " instead of " << stored[i] << ", i=" << i;
 			return;
 		}
 	}
 
-	fprintf(stderr, "all ok\n");
+	ib::info() << "all ok";
 
 	ha_storage_free(storage);
 }
diff --git a/storage/innobase/ha/hash0hash.cc b/storage/innobase/ha/hash0hash.cc
index 174b6bcb57e..234fd7ac032 100644
--- a/storage/innobase/ha/hash0hash.cc
+++ b/storage/innobase/ha/hash0hash.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -24,24 +24,18 @@ Created 5/20/1997 Heikki Tuuri
 *******************************************************/
 
 #include "hash0hash.h"
+
 #ifdef UNIV_NONINL
 #include "hash0hash.ic"
-#endif
+#endif /* UNIV_NOINL */
 
 #include "mem0mem.h"
+#include "sync0sync.h"
 
 #ifndef UNIV_HOTBACKUP
 
-# ifdef UNIV_PFS_MUTEX
-UNIV_INTERN mysql_pfs_key_t	hash_table_mutex_key;
-# endif /* UNIV_PFS_MUTEX */
-
-# ifdef UNIV_PFS_RWLOCK
-UNIV_INTERN mysql_pfs_key_t	hash_table_rw_lock_key;
-# endif /* UNIV_PFS_RWLOCK */
 /************************************************************//**
 Reserves the mutex for a fold value in a hash table. */
-UNIV_INTERN
 void
 hash_mutex_enter(
 /*=============*/
@@ -54,7 +48,6 @@ hash_mutex_enter(
 
 /************************************************************//**
 Releases the mutex for a fold value in a hash table. */
-UNIV_INTERN
 void
 hash_mutex_exit(
 /*============*/
@@ -67,16 +60,14 @@ hash_mutex_exit(
 
 /************************************************************//**
 Reserves all the mutexes of a hash table, in an ascending order. */
-UNIV_INTERN
 void
 hash_mutex_enter_all(
 /*=================*/
 	hash_table_t*	table)	/*!< in: hash table */
 {
-	ulint	i;
-
 	ut_ad(table->type == HASH_TABLE_SYNC_MUTEX);
-	for (i = 0; i < table->n_sync_obj; i++) {
+
+	for (ulint i = 0; i < table->n_sync_obj; i++) {
 
 		mutex_enter(table->sync_obj.mutexes + i);
 	}
@@ -84,16 +75,14 @@ hash_mutex_enter_all(
 
 /************************************************************//**
 Releases all the mutexes of a hash table. */
-UNIV_INTERN
 void
 hash_mutex_exit_all(
 /*================*/
 	hash_table_t*	table)	/*!< in: hash table */
 {
-	ulint	i;
-
 	ut_ad(table->type == HASH_TABLE_SYNC_MUTEX);
-	for (i = 0; i < table->n_sync_obj; i++) {
+
+	for (ulint i = 0; i < table->n_sync_obj; i++) {
 
 		mutex_exit(table->sync_obj.mutexes + i);
 	}
@@ -101,7 +90,6 @@ hash_mutex_exit_all(
 
 /************************************************************//**
 Releases all but the passed in mutex of a hash table. */
-UNIV_INTERN
 void
 hash_mutex_exit_all_but(
 /*====================*/
@@ -114,7 +102,7 @@ hash_mutex_exit_all_but(
 	for (i = 0; i < table->n_sync_obj; i++) {
 
 		ib_mutex_t* mutex = table->sync_obj.mutexes + i;
-		if (UNIV_LIKELY(keep_mutex != mutex)) {
+		if (keep_mutex != mutex) {
 			mutex_exit(mutex);
 		}
 	}
@@ -124,7 +112,6 @@ hash_mutex_exit_all_but(
 
 /************************************************************//**
 s-lock a lock for a fold value in a hash table. */
-UNIV_INTERN
 void
 hash_lock_s(
 /*========*/
@@ -137,17 +124,14 @@ hash_lock_s(
 	ut_ad(table->type == HASH_TABLE_SYNC_RW_LOCK);
 	ut_ad(lock);
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(!rw_lock_own(lock, RW_LOCK_SHARED));
-	ut_ad(!rw_lock_own(lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(!rw_lock_own(lock, RW_LOCK_S));
+	ut_ad(!rw_lock_own(lock, RW_LOCK_X));
 
 	rw_lock_s_lock(lock);
 }
 
 /************************************************************//**
 x-lock a lock for a fold value in a hash table. */
-UNIV_INTERN
 void
 hash_lock_x(
 /*========*/
@@ -160,17 +144,14 @@ hash_lock_x(
 	ut_ad(table->type == HASH_TABLE_SYNC_RW_LOCK);
 	ut_ad(lock);
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(!rw_lock_own(lock, RW_LOCK_SHARED));
-	ut_ad(!rw_lock_own(lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(!rw_lock_own(lock, RW_LOCK_S));
+	ut_ad(!rw_lock_own(lock, RW_LOCK_X));
 
 	rw_lock_x_lock(lock);
 }
 
 /************************************************************//**
 unlock an s-lock for a fold value in a hash table. */
-UNIV_INTERN
 void
 hash_unlock_s(
 /*==========*/
@@ -184,16 +165,13 @@ hash_unlock_s(
 	ut_ad(table->type == HASH_TABLE_SYNC_RW_LOCK);
 	ut_ad(lock);
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(rw_lock_own(lock, RW_LOCK_S));
 
 	rw_lock_s_unlock(lock);
 }
 
 /************************************************************//**
 unlock x-lock for a fold value in a hash table. */
-UNIV_INTERN
 void
 hash_unlock_x(
 /*==========*/
@@ -205,31 +183,26 @@ hash_unlock_x(
 	ut_ad(table->type == HASH_TABLE_SYNC_RW_LOCK);
 	ut_ad(lock);
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(rw_lock_own(lock, RW_LOCK_X));
 
 	rw_lock_x_unlock(lock);
 }
 
 /************************************************************//**
 Reserves all the locks of a hash table, in an ascending order. */
-UNIV_INTERN
 void
 hash_lock_x_all(
 /*============*/
 	hash_table_t*	table)	/*!< in: hash table */
 {
-	ulint	i;
-
 	ut_ad(table->type == HASH_TABLE_SYNC_RW_LOCK);
-	for (i = 0; i < table->n_sync_obj; i++) {
+
+	for (ulint i = 0; i < table->n_sync_obj; i++) {
 
 		rw_lock_t* lock = table->sync_obj.rw_locks + i;
-#ifdef UNIV_SYNC_DEBUG
-		ut_ad(!rw_lock_own(lock, RW_LOCK_SHARED));
-		ut_ad(!rw_lock_own(lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+
+		ut_ad(!rw_lock_own(lock, RW_LOCK_S));
+		ut_ad(!rw_lock_own(lock, RW_LOCK_X));
 
 		rw_lock_x_lock(lock);
 	}
@@ -237,21 +210,18 @@ hash_lock_x_all(
 
 /************************************************************//**
 Releases all the locks of a hash table, in an ascending order. */
-UNIV_INTERN
 void
 hash_unlock_x_all(
 /*==============*/
 	hash_table_t*	table)	/*!< in: hash table */
 {
-	ulint	i;
-
 	ut_ad(table->type == HASH_TABLE_SYNC_RW_LOCK);
-	for (i = 0; i < table->n_sync_obj; i++) {
+
+	for (ulint i = 0; i < table->n_sync_obj; i++) {
 
 		rw_lock_t* lock = table->sync_obj.rw_locks + i;
-#ifdef UNIV_SYNC_DEBUG
-		ut_ad(rw_lock_own(lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+
+		ut_ad(rw_lock_own(lock, RW_LOCK_X));
 
 		rw_lock_x_unlock(lock);
 	}
@@ -259,24 +229,21 @@ hash_unlock_x_all(
 
 /************************************************************//**
 Releases all but passed in lock of a hash table, */
-UNIV_INTERN
 void
 hash_unlock_x_all_but(
 /*==================*/
 	hash_table_t*	table,		/*!< in: hash table */
 	rw_lock_t*	keep_lock)	/*!< in: lock to keep */
 {
-	ulint	i;
-
 	ut_ad(table->type == HASH_TABLE_SYNC_RW_LOCK);
-	for (i = 0; i < table->n_sync_obj; i++) {
+
+	for (ulint i = 0; i < table->n_sync_obj; i++) {
 
 		rw_lock_t* lock = table->sync_obj.rw_locks + i;
-#ifdef UNIV_SYNC_DEBUG
-		ut_ad(rw_lock_own(lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
 
-		if (UNIV_LIKELY(keep_lock != lock)) {
+		ut_ad(rw_lock_own(lock, RW_LOCK_X));
+
+		if (keep_lock != lock) {
 			rw_lock_x_unlock(lock);
 		}
 	}
@@ -287,8 +254,7 @@ hash_unlock_x_all_but(
 /*************************************************************//**
 Creates a hash table with >= n array cells. The actual number of cells is
 chosen to be a prime number slightly bigger than n.
-@return	own: created table */
-UNIV_INTERN
+@return own: created table */
 hash_table_t*
 hash_create(
 /*========*/
@@ -300,10 +266,11 @@ hash_create(
 
 	prime = ut_find_prime(n);
 
-	table = static_cast<hash_table_t*>(mem_alloc(sizeof(hash_table_t)));
+	table = static_cast<hash_table_t*>(
+		ut_malloc_nokey(sizeof(hash_table_t)));
 
 	array = static_cast<hash_cell_t*>(
-		ut_malloc(sizeof(hash_cell_t) * prime));
+		ut_malloc_nokey(sizeof(hash_cell_t) * prime));
 
 	/* The default type of hash_table is HASH_TABLE_SYNC_NONE i.e.:
 	the caller is responsible for access control to the table. */
@@ -329,17 +296,15 @@ hash_create(
 
 /*************************************************************//**
 Frees a hash table. */
-UNIV_INTERN
 void
 hash_table_free(
 /*============*/
 	hash_table_t*	table)	/*!< in, own: hash table */
 {
-	ut_ad(table);
 	ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
 
 	ut_free(table->array);
-	mem_free(table);
+	ut_free(table);
 }
 
 #ifndef UNIV_HOTBACKUP
@@ -347,52 +312,49 @@ hash_table_free(
 Creates a sync object array to protect a hash table.
 ::sync_obj can be mutexes or rw_locks depening on the type of
 hash table. */
-UNIV_INTERN
 void
-hash_create_sync_obj_func(
-/*======================*/
+hash_create_sync_obj(
+/*=================*/
 	hash_table_t*		table,	/*!< in: hash table */
 	enum hash_table_sync_t	type,	/*!< in: HASH_TABLE_SYNC_MUTEX
 					or HASH_TABLE_SYNC_RW_LOCK */
-#ifdef UNIV_SYNC_DEBUG
-	ulint			sync_level,/*!< in: latching order level
-					of the mutexes: used in the
-					debug version */
-#endif /* UNIV_SYNC_DEBUG */
+	latch_id_t		id,	/*!< in: latch ID */
 	ulint			n_sync_obj)/*!< in: number of sync objects,
 					must be a power of 2 */
 {
-	ulint	i;
-
-	ut_ad(table);
-	ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
 	ut_a(n_sync_obj > 0);
 	ut_a(ut_is_2pow(n_sync_obj));
+	ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
 
 	table->type = type;
 
-	switch (type) {
+	switch (table->type) {
 	case HASH_TABLE_SYNC_MUTEX:
 		table->sync_obj.mutexes = static_cast<ib_mutex_t*>(
-			mem_alloc(n_sync_obj * sizeof(ib_mutex_t)));
+			ut_malloc_nokey(n_sync_obj * sizeof(ib_mutex_t)));
 
-		for (i = 0; i < n_sync_obj; i++) {
-			mutex_create(hash_table_mutex_key,
-			     table->sync_obj.mutexes + i, sync_level);
+		for (ulint i = 0; i < n_sync_obj; i++) {
+			mutex_create(id, table->sync_obj.mutexes + i);
 		}
 
 		break;
 
-	case HASH_TABLE_SYNC_RW_LOCK:
+	case HASH_TABLE_SYNC_RW_LOCK: {
+
+		latch_level_t	level = sync_latch_get_level(id);
+
+		ut_a(level != SYNC_UNKNOWN);
+
 		table->sync_obj.rw_locks = static_cast<rw_lock_t*>(
-			mem_alloc(n_sync_obj * sizeof(rw_lock_t)));
+			ut_malloc_nokey(n_sync_obj * sizeof(rw_lock_t)));
 
-		for (i = 0; i < n_sync_obj; i++) {
-			rw_lock_create(hash_table_rw_lock_key,
-			     table->sync_obj.rw_locks + i, sync_level);
+		for (ulint i = 0; i < n_sync_obj; i++) {
+			rw_lock_create(hash_table_locks_key,
+			     table->sync_obj.rw_locks + i, level);
 		}
 
 		break;
+	}
 
 	case HASH_TABLE_SYNC_NONE:
 		ut_error;
diff --git a/storage/innobase/ha_innodb.def b/storage/innobase/ha_innodb.def
deleted file mode 100644
index e0faa62deb1..00000000000
--- a/storage/innobase/ha_innodb.def
+++ /dev/null
@@ -1,4 +0,0 @@
-EXPORTS
-	_mysql_plugin_interface_version_
-	_mysql_sizeof_struct_st_plugin_
-	_mysql_plugin_declarations_
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index 6f364b9dcbd..f9c9285036d 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -33,107 +33,121 @@ this program; if not, write to the Free Software Foundation, Inc.,
 
 *****************************************************************************/
 
-#define lower_case_file_system lower_case_file_system_server
-#define mysql_unpacked_real_data_home mysql_unpacked_real_data_home_server
-#include <sql_table.h>	// explain_filename, nz2, EXPLAIN_PARTITIONS_AS_COMMENT,
-			// EXPLAIN_FILENAME_MAX_EXTRA_LENGTH
-
-#include <sql_acl.h>	// PROCESS_ACL
-#include <debug_sync.h> // DEBUG_SYNC
-#include <my_base.h>	// HA_OPTION_*
-#include <mysys_err.h>
-#include <innodb_priv.h>
-#include <table_cache.h>
-#include <my_check_opt.h>
-
-#undef lower_case_file_system
-#undef mysql_unpacked_real_data_home
-MYSQL_PLUGIN_IMPORT extern my_bool lower_case_file_system;
-MYSQL_PLUGIN_IMPORT extern char mysql_unpacked_real_data_home[];
-
-#ifdef _WIN32
-#include <io.h>
-#endif
-
-#include <my_systemd.h>
-
 /** @file ha_innodb.cc */
 
-/* Include necessary InnoDB headers */
 #include "univ.i"
-#include "buf0dump.h"
-#include "buf0lru.h"
-#include "buf0flu.h"
-#include "buf0dblwr.h"
-#include "btr0sea.h"
-#include "btr0defragment.h"
-#include "os0file.h"
-#include "os0thread.h"
-#include "srv0start.h"
-#include "srv0srv.h"
-#include "trx0roll.h"
-#include "trx0trx.h"
 
-#include "trx0sys.h"
-#include "rem0types.h"
-#include "row0ins.h"
-#include "row0mysql.h"
-#include "row0sel.h"
-#include "row0upd.h"
-#include "log0log.h"
-#include "lock0lock.h"
-#include "dict0crea.h"
-#include "btr0cur.h"
-#include "btr0btr.h"
-#include "fsp0fsp.h"
-#include "sync0sync.h"
-#include "fil0fil.h"
-#include "fil0crypt.h"
-#include "trx0xa.h"
-#include "row0merge.h"
-#include "dict0boot.h"
-#include "dict0stats.h"
-#include "dict0stats_bg.h"
+/* Include necessary SQL headers */
 #include "ha_prototypes.h"
-#include "ut0mem.h"
-#include "ut0timer.h"
-#include "ibuf0ibuf.h"
-#include "dict0dict.h"
-#include "srv0mon.h"
+#include <debug_sync.h>
+#include <gstream.h>
+#include <log.h>
+#include <mysys_err.h>
+#include <innodb_priv.h>
+#include <strfunc.h>
+#include <sql_acl.h>
+#include <sql_class.h>
+#include <sql_show.h>
+#include <sql_table.h>
+#include <sql_tablespace.h>
+// MySQL 5.7 Header */
+// #include <sql_thd_internal_api.h>
+#include <table_cache.h>
+#include <my_check_opt.h>
+#include <my_bitmap.h>
+#include <mysql/service_thd_alloc.h>
+#include <mysql/service_thd_wait.h>
+
+// MYSQL_PLUGIN_IMPORT extern my_bool lower_case_file_system;
+// MYSQL_PLUGIN_IMPORT extern char mysql_unpacked_real_data_home[];
+
+#include <my_systemd.h>
+#include <key.h>
+
+/* Include necessary InnoDB headers */
 #include "api0api.h"
 #include "api0misc.h"
-#include "pars0pars.h"
+#include "btr0btr.h"
+#include "btr0cur.h"
+#include "btr0bulk.h"
+#include "btr0sea.h"
+#include "buf0dblwr.h"
+#include "buf0dump.h"
+#include "buf0flu.h"
+#include "buf0lru.h"
+#include "dict0boot.h"
+#include "btr0defragment.h"
+#include "dict0crea.h"
+#include "dict0dict.h"
+#include "dict0stats.h"
+#include "dict0stats_bg.h"
+#include "fil0fil.h"
+#include "fsp0fsp.h"
+#include "fsp0space.h"
+#include "fsp0sysspace.h"
 #include "fts0fts.h"
+#include "fts0plugin.h"
+#include "fts0priv.h"
 #include "fts0types.h"
+#include "ibuf0ibuf.h"
+#include "lock0lock.h"
+#include "log0log.h"
+#include "mem0mem.h"
+#include "mtr0mtr.h"
+#include "os0file.h"
+#include "os0thread.h"
+#include "page0zip.h"
+#include "pars0pars.h"
+#include "rem0types.h"
 #include "row0import.h"
+#include "row0ins.h"
+#include "row0merge.h"
+#include "row0mysql.h"
 #include "row0quiesce.h"
+#include "row0sel.h"
+#include "row0trunc.h"
+#include "row0upd.h"
+#include "fil0crypt.h"
+#include "ut0timer.h"
+#include "srv0mon.h"
+#include "srv0srv.h"
+#include "srv0start.h"
 #ifdef UNIV_DEBUG
 #include "trx0purge.h"
 #endif /* UNIV_DEBUG */
-#include "fts0priv.h"
-#include "page0zip.h"
+#include "trx0roll.h"
+#include "trx0sys.h"
+#include "trx0trx.h"
 #include "fil0pagecompress.h"
+#include "trx0xa.h"
+#include "ut0mem.h"
+#include "row0ext.h"
 
 #define thd_get_trx_isolation(X) ((enum_tx_isolation)thd_tx_isolation(X))
 
+extern "C" void thd_mark_transaction_to_rollback(MYSQL_THD thd, bool all);
+
 #ifdef MYSQL_DYNAMIC_PLUGIN
-#define tc_size 2000
+#define tc_size  400
 #define tdc_size 400
 #endif
 
 #include "ha_innodb.h"
 #include "i_s.h"
+#include "sync0sync.h"
 
 #include <string>
 #include <sstream>
 
+/* for ha_innopart, Native InnoDB Partitioning. */
+/* JAN: TODO: MySQL 5.7 Native InnoDB Partitioning */
+#ifdef HAVE_HA_INNOPART_H
+#include "ha_innopart.h"
+#endif
+
 #include <mysql/plugin.h>
 #include <mysql/service_wsrep.h>
 
-# ifndef MYSQL_PLUGIN_IMPORT
-#  define MYSQL_PLUGIN_IMPORT /* nothing */
-# endif /* MYSQL_PLUGIN_IMPORT */
-
 #ifdef WITH_WSREP
 #include "dict0priv.h"
 #include "../storage/innobase/include/ut0byte.h"
@@ -180,16 +194,12 @@ static const long AUTOINC_OLD_STYLE_LOCKING = 0;
 static const long AUTOINC_NEW_STYLE_LOCKING = 1;
 static const long AUTOINC_NO_LOCKING = 2;
 
-static long innobase_mirrored_log_groups;
 static long innobase_log_buffer_size;
-static long innobase_additional_mem_pool_size;
-static long innobase_file_io_threads;
-static long innobase_open_files;
+static long innobase_open_files=0;
 static long innobase_autoinc_lock_mode;
 static ulong innobase_commit_concurrency = 0;
 static ulong innobase_read_io_threads;
 static ulong innobase_write_io_threads;
-static long innobase_buffer_pool_instances = 1;
 
 static long long innobase_buffer_pool_size, innobase_log_file_size;
 
@@ -197,15 +207,12 @@ static long long innobase_buffer_pool_size, innobase_log_file_size;
 Connected to buf_LRU_old_ratio. */
 static uint innobase_old_blocks_pct;
 
-/** Maximum on-disk size of change buffer in terms of percentage
-of the buffer pool. */
-static uint innobase_change_buffer_max_size = CHANGE_BUFFER_DEFAULT_SIZE;
-
 /* The default values for the following char* start-up parameters
 are determined in innobase_init below: */
 
 static char*	innobase_data_home_dir			= NULL;
 static char*	innobase_data_file_path			= NULL;
+static char*	innobase_temp_data_file_path		= NULL;
 static char*	innobase_file_format_name		= NULL;
 static char*	innobase_change_buffering		= NULL;
 static char*	innobase_enable_monitor_counter		= NULL;
@@ -216,7 +223,12 @@ static char*	innobase_reset_all_monitor_counter	= NULL;
 /* The highest file format being used in the database. The value can be
 set by user, however, it will be adjusted to the newer file format if
 a table of such format is created/opened. */
-static char*	innobase_file_format_max		= NULL;
+char*	innobase_file_format_max		= NULL;
+
+/** Default value of innodb_file_format */
+static const char*	innodb_file_format_default	= "Barracuda";
+/** Default value of innodb_file_format_max */
+static const char*	innodb_file_format_max_default	= "Antelope";
 
 static char*	innobase_file_flush_method		= NULL;
 
@@ -229,10 +241,6 @@ values */
 
 static ulong	innobase_fast_shutdown			= 1;
 static my_bool	innobase_file_format_check		= TRUE;
-#ifdef UNIV_LOG_ARCHIVE
-static my_bool	innobase_log_archive			= FALSE;
-static char*	innobase_log_arch_dir			= NULL;
-#endif /* UNIV_LOG_ARCHIVE */
 static my_bool	innobase_use_atomic_writes		= FALSE;
 static my_bool	innobase_use_fallocate			= TRUE;
 static my_bool	innobase_use_doublewrite		= TRUE;
@@ -240,12 +248,10 @@ static my_bool	innobase_use_checksums			= TRUE;
 static my_bool	innobase_locks_unsafe_for_binlog	= FALSE;
 static my_bool	innobase_rollback_on_timeout		= FALSE;
 static my_bool	innobase_create_status_file		= FALSE;
-static my_bool	innobase_stats_on_metadata		= TRUE;
+my_bool	innobase_stats_on_metadata		= TRUE;
 static my_bool	innobase_large_prefix			= FALSE;
 static my_bool	innodb_optimize_fulltext_only		= FALSE;
 
-static char*	internal_innobase_data_file_path	= NULL;
-
 static char*	innodb_version_str = (char*) INNODB_VERSION_STR;
 
 extern uint srv_n_fil_crypt_threads;
@@ -261,6 +267,62 @@ extern uint srv_background_scrub_data_check_interval;
 extern my_bool srv_scrub_force_testing;
 #endif
 
+/** Note we cannot use rec_format_enum because we do not allow
+COMPRESSED row format for innodb_default_row_format option. */
+enum default_row_format_enum {
+	DEFAULT_ROW_FORMAT_REDUNDANT = 0,
+	DEFAULT_ROW_FORMAT_COMPACT = 1,
+	DEFAULT_ROW_FORMAT_DYNAMIC = 2,
+};
+
+static
+void set_my_errno(int err)
+{
+	errno = err;
+}
+
+/** Return the InnoDB ROW_FORMAT enum value
+@param[in]	row_format	row_format from "innodb_default_row_format"
+@return InnoDB ROW_FORMAT value from rec_format_t enum. */
+static
+rec_format_t
+get_row_format(
+	ulong row_format)
+{
+	switch(row_format) {
+	case DEFAULT_ROW_FORMAT_REDUNDANT:
+		return(REC_FORMAT_REDUNDANT);
+	case DEFAULT_ROW_FORMAT_COMPACT:
+		return(REC_FORMAT_COMPACT);
+	case DEFAULT_ROW_FORMAT_DYNAMIC:
+		return(REC_FORMAT_DYNAMIC);
+	default:
+		ut_ad(0);
+		return(REC_FORMAT_DYNAMIC);
+	}
+}
+
+static ulong	innodb_default_row_format = DEFAULT_ROW_FORMAT_DYNAMIC;
+
+#ifdef UNIV_DEBUG
+/** Values for --innodb-debug-compress names. */
+static const char* innodb_debug_compress_names[] = {
+	"none",
+	"zlib",
+	"lz4",
+	"lz4hc",
+	NullS
+};
+
+/** Enumeration of --innodb-debug-compress */
+static TYPELIB innodb_debug_compress_typelib = {
+	array_elements(innodb_debug_compress_names) - 1,
+	"innodb_debug_compress_typelib",
+	innodb_debug_compress_names,
+	NULL
+};
+#endif /* UNIV_DEBUG */
+
 /** Possible values for system variable "innodb_stats_method". The values
 are defined the same as its corresponding MyISAM system variable
 "myisam_stats_method"(see "myisam_stats_method_names"), for better usability */
@@ -280,14 +342,14 @@ static TYPELIB innodb_stats_method_typelib = {
 	NULL
 };
 
-/** Possible values for system variable "innodb_checksum_algorithm". */
+/** Possible values of the parameter innodb_checksum_algorithm */
 static const char* innodb_checksum_algorithm_names[] = {
-	"CRC32",
-	"STRICT_CRC32",
-	"INNODB",
-	"STRICT_INNODB",
-	"NONE",
-	"STRICT_NONE",
+	"crc32",
+	"strict_crc32",
+	"innodb",
+	"strict_innodb",
+	"none",
+	"strict_none",
 	NullS
 };
 
@@ -300,6 +362,23 @@ static TYPELIB innodb_checksum_algorithm_typelib = {
 	NULL
 };
 
+/** Possible values for system variable "innodb_default_row_format". */
+static const char* innodb_default_row_format_names[] = {
+	"redundant",
+	"compact",
+	"dynamic",
+	NullS
+};
+
+/** Used to define an enumerate type of the system variable
+innodb_default_row_format. */
+static TYPELIB innodb_default_row_format_typelib = {
+	array_elements(innodb_default_row_format_names) - 1,
+	"innodb_default_row_format_typelib",
+	innodb_default_row_format_names,
+	NULL
+};
+
 /* The following counter is used to convey information to InnoDB
 about server activity: in case of normal DML ops it is not
 sensible to call srv_active_wake_master_thread after each
@@ -320,6 +399,20 @@ static const char* innobase_change_buffering_values[IBUF_USE_COUNT] = {
 	"all"		/* IBUF_USE_ALL */
 };
 
+
+/* This tablespace name is reserved by InnoDB in order to explicitly
+create a file_per_table tablespace for the table. */
+const char reserved_file_per_table_space_name[] = "innodb_file_per_table";
+
+/* This tablespace name is reserved by InnoDB for the system tablespace
+which uses space_id 0 and stores extra types of system pages like UNDO
+and doublewrite. */
+const char reserved_system_space_name[] = "innodb_system";
+
+/* This tablespace name is reserved by InnoDB for the predefined temporary
+tablespace. */
+const char reserved_temporary_space_name[] = "innodb_temporary";
+
 /* Call back function array defined by MySQL and used to
 retrieve FTS results. */
 const struct _ft_vft ft_vft_result = {NULL,
@@ -334,6 +427,10 @@ const struct _ft_vft_ext ft_vft_ext_result = {innobase_fts_get_version,
 					      innobase_fts_count_matches};
 
 #ifdef HAVE_PSI_INTERFACE
+# define PSI_KEY(n) {&n##_key, #n, 0}
+/* All RWLOCK used in Innodb are SX-locks */
+# define PSI_RWLOCK_KEY(n) {&n##_key, #n, PSI_RWLOCK_FLAG_SX}
+
 /* Keys to register pthread mutexes/cond in the current file with
 performance schema */
 static mysql_pfs_key_t	innobase_share_mutex_key;
@@ -342,12 +439,12 @@ static mysql_pfs_key_t	commit_cond_key;
 static mysql_pfs_key_t	pending_checkpoint_mutex_key;
 
 static PSI_mutex_info	all_pthread_mutexes[] = {
-	{&commit_cond_mutex_key, "commit_cond_mutex", 0},
-	{&innobase_share_mutex_key, "innobase_share_mutex", 0}
+	PSI_KEY(commit_cond_mutex),
+	PSI_KEY(innobase_share_mutex)
 };
 
 static PSI_cond_info	all_innodb_conds[] = {
-	{&commit_cond_key, "commit_cond", 0}
+	PSI_KEY(commit_cond)
 };
 
 # ifdef UNIV_PFS_MUTEX
@@ -355,76 +452,68 @@ static PSI_cond_info	all_innodb_conds[] = {
 performance schema instrumented if "UNIV_PFS_MUTEX"
 is defined */
 static PSI_mutex_info all_innodb_mutexes[] = {
-	{&autoinc_mutex_key, "autoinc_mutex", 0},
+	PSI_KEY(autoinc_mutex),
 #  ifndef PFS_SKIP_BUFFER_MUTEX_RWLOCK
-	{&buffer_block_mutex_key, "buffer_block_mutex", 0},
+	PSI_KEY(buffer_block_mutex),
 #  endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
-	{&buf_pool_mutex_key, "buf_pool_mutex", 0},
-	{&buf_pool_zip_mutex_key, "buf_pool_zip_mutex", 0},
-	{&cache_last_read_mutex_key, "cache_last_read_mutex", 0},
-	{&dict_foreign_err_mutex_key, "dict_foreign_err_mutex", 0},
-	{&dict_sys_mutex_key, "dict_sys_mutex", 0},
-	{&file_format_max_mutex_key, "file_format_max_mutex", 0},
-	{&fil_system_mutex_key, "fil_system_mutex", 0},
-	{&flush_list_mutex_key, "flush_list_mutex", 0},
-	{&fts_bg_threads_mutex_key, "fts_bg_threads_mutex", 0},
-	{&fts_delete_mutex_key, "fts_delete_mutex", 0},
-	{&fts_optimize_mutex_key, "fts_optimize_mutex", 0},
-	{&fts_doc_id_mutex_key, "fts_doc_id_mutex", 0},
-	{&fts_pll_tokenize_mutex_key, "fts_pll_tokenize_mutex", 0},
-	{&log_flush_order_mutex_key, "log_flush_order_mutex", 0},
-	{&hash_table_mutex_key, "hash_table_mutex", 0},
-	{&ibuf_bitmap_mutex_key, "ibuf_bitmap_mutex", 0},
-	{&ibuf_mutex_key, "ibuf_mutex", 0},
-	{&ibuf_pessimistic_insert_mutex_key,
-		 "ibuf_pessimistic_insert_mutex", 0},
-#  ifndef HAVE_ATOMIC_BUILTINS
-	{&server_mutex_key, "server_mutex", 0},
-#  endif /* !HAVE_ATOMIC_BUILTINS */
-	{&log_sys_mutex_key, "log_sys_mutex", 0},
-#  ifdef UNIV_MEM_DEBUG
-	{&mem_hash_mutex_key, "mem_hash_mutex", 0},
-#  endif /* UNIV_MEM_DEBUG */
-	{&mem_pool_mutex_key, "mem_pool_mutex", 0},
-	{&mutex_list_mutex_key, "mutex_list_mutex", 0},
-	{&page_zip_stat_per_index_mutex_key, "page_zip_stat_per_index_mutex", 0},
-	{&purge_sys_bh_mutex_key, "purge_sys_bh_mutex", 0},
-	{&recv_sys_mutex_key, "recv_sys_mutex", 0},
-	{&recv_writer_mutex_key, "recv_writer_mutex", 0},
-	{&rseg_mutex_key, "rseg_mutex", 0},
-#  ifdef UNIV_SYNC_DEBUG
-	{&rw_lock_debug_mutex_key, "rw_lock_debug_mutex", 0},
-#  endif /* UNIV_SYNC_DEBUG */
-	{&rw_lock_list_mutex_key, "rw_lock_list_mutex", 0},
-	{&rw_lock_mutex_key, "rw_lock_mutex", 0},
-	{&srv_dict_tmpfile_mutex_key, "srv_dict_tmpfile_mutex", 0},
-	{&srv_innodb_monitor_mutex_key, "srv_innodb_monitor_mutex", 0},
-	{&srv_misc_tmpfile_mutex_key, "srv_misc_tmpfile_mutex", 0},
-	{&srv_monitor_file_mutex_key, "srv_monitor_file_mutex", 0},
-#  ifdef UNIV_SYNC_DEBUG
-	{&sync_thread_mutex_key, "sync_thread_mutex", 0},
-#  endif /* UNIV_SYNC_DEBUG */
-	{&buf_dblwr_mutex_key, "buf_dblwr_mutex", 0},
-	{&trx_undo_mutex_key, "trx_undo_mutex", 0},
-	{&srv_sys_mutex_key, "srv_sys_mutex", 0},
-	{&lock_sys_mutex_key, "lock_mutex", 0},
-	{&lock_sys_wait_mutex_key, "lock_wait_mutex", 0},
-	{&trx_mutex_key, "trx_mutex", 0},
-	{&srv_sys_tasks_mutex_key, "srv_threads_mutex", 0},
-	/* mutex with os_fast_mutex_ interfaces */
+	PSI_KEY(buf_pool_mutex),
+	PSI_KEY(buf_pool_zip_mutex),
+	PSI_KEY(cache_last_read_mutex),
+	PSI_KEY(dict_foreign_err_mutex),
+	PSI_KEY(dict_sys_mutex),
+	PSI_KEY(recalc_pool_mutex),
+	PSI_KEY(file_format_max_mutex),
+	PSI_KEY(fil_system_mutex),
+	PSI_KEY(flush_list_mutex),
+	PSI_KEY(fts_bg_threads_mutex),
+	PSI_KEY(fts_delete_mutex),
+	PSI_KEY(fts_optimize_mutex),
+	PSI_KEY(fts_doc_id_mutex),
+	PSI_KEY(log_flush_order_mutex),
+	PSI_KEY(hash_table_mutex),
+	PSI_KEY(ibuf_bitmap_mutex),
+	PSI_KEY(ibuf_mutex),
+	PSI_KEY(ibuf_pessimistic_insert_mutex),
+	PSI_KEY(log_sys_mutex),
+	PSI_KEY(log_sys_write_mutex),
+	PSI_KEY(mutex_list_mutex),
+	PSI_KEY(page_zip_stat_per_index_mutex),
+	PSI_KEY(purge_sys_pq_mutex),
+	PSI_KEY(recv_sys_mutex),
+	PSI_KEY(recv_writer_mutex),
+	PSI_KEY(redo_rseg_mutex),
+	PSI_KEY(noredo_rseg_mutex),
+#  ifdef UNIV_DEBUG
+	PSI_KEY(rw_lock_debug_mutex),
+#  endif /* UNIV_DEBUG */
+	PSI_KEY(rw_lock_list_mutex),
+	PSI_KEY(rw_lock_mutex),
+	PSI_KEY(srv_dict_tmpfile_mutex),
+	PSI_KEY(srv_innodb_monitor_mutex),
+	PSI_KEY(srv_misc_tmpfile_mutex),
+	PSI_KEY(srv_monitor_file_mutex),
+#  ifdef UNIV_DEBUG
+	PSI_KEY(sync_thread_mutex),
+#  endif /* UNIV_DEBUG */
+	PSI_KEY(buf_dblwr_mutex),
+	PSI_KEY(trx_undo_mutex),
+	PSI_KEY(trx_pool_mutex),
+	PSI_KEY(trx_pool_manager_mutex),
+	PSI_KEY(srv_sys_mutex),
+	PSI_KEY(lock_mutex),
+	PSI_KEY(lock_wait_mutex),
+	PSI_KEY(trx_mutex),
+	PSI_KEY(srv_threads_mutex),
 #  ifndef PFS_SKIP_EVENT_MUTEX
-	{&event_os_mutex_key, "event_os_mutex", 0},
+	PSI_KEY(event_mutex),
 #  endif /* PFS_SKIP_EVENT_MUTEX */
-	{&os_mutex_key, "os_mutex", 0},
-#ifndef HAVE_ATOMIC_BUILTINS
-	{&srv_conc_mutex_key, "srv_conc_mutex", 0},
-#endif /* !HAVE_ATOMIC_BUILTINS */
-#ifndef HAVE_ATOMIC_BUILTINS_64
-	{&monitor_mutex_key, "monitor_mutex", 0},
-#endif /* !HAVE_ATOMIC_BUILTINS_64 */
-	{&ut_list_mutex_key, "ut_list_mutex", 0},
-	{&trx_sys_mutex_key, "trx_sys_mutex", 0},
-	{&zip_pad_mutex_key, "zip_pad_mutex", 0},
+	PSI_KEY(rtr_active_mutex),
+	PSI_KEY(rtr_match_mutex),
+	PSI_KEY(rtr_path_mutex),
+	PSI_KEY(rtr_ssn_mutex),
+	PSI_KEY(trx_sys_mutex),
+	PSI_KEY(zip_pad_mutex),
+	PSI_KEY(master_key_id_mutex),
 };
 # endif /* UNIV_PFS_MUTEX */
 
@@ -433,27 +522,24 @@ static PSI_mutex_info all_innodb_mutexes[] = {
 performance schema instrumented if "UNIV_PFS_RWLOCK"
 is defined */
 static PSI_rwlock_info all_innodb_rwlocks[] = {
-#  ifdef UNIV_LOG_ARCHIVE
-	{&archive_lock_key, "archive_lock", 0},
-#  endif /* UNIV_LOG_ARCHIVE */
-	{&btr_search_latch_key, "btr_search_latch", 0},
+	PSI_RWLOCK_KEY(btr_search_latch),
 #  ifndef PFS_SKIP_BUFFER_MUTEX_RWLOCK
-	{&buf_block_lock_key, "buf_block_lock", 0},
+	PSI_RWLOCK_KEY(buf_block_lock),
 #  endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
-#  ifdef UNIV_SYNC_DEBUG
-	{&buf_block_debug_latch_key, "buf_block_debug_latch", 0},
-#  endif /* UNIV_SYNC_DEBUG */
-	{&dict_operation_lock_key, "dict_operation_lock", 0},
-	{&fil_space_latch_key, "fil_space_latch", 0},
-	{&checkpoint_lock_key, "checkpoint_lock", 0},
-	{&fts_cache_rw_lock_key, "fts_cache_rw_lock", 0},
-	{&fts_cache_init_rw_lock_key, "fts_cache_init_rw_lock", 0},
-	{&trx_i_s_cache_lock_key, "trx_i_s_cache_lock", 0},
-	{&trx_purge_latch_key, "trx_purge_latch", 0},
-	{&index_tree_rw_lock_key, "index_tree_rw_lock", 0},
-	{&index_online_log_key, "index_online_log", 0},
-	{&dict_table_stats_key, "dict_table_stats", 0},
-	{&hash_table_rw_lock_key, "hash_table_locks", 0}
+#  ifdef UNIV_DEBUG
+	PSI_RWLOCK_KEY(buf_block_debug_latch),
+#  endif /* UNIV_DEBUG */
+	PSI_RWLOCK_KEY(dict_operation_lock),
+	PSI_RWLOCK_KEY(fil_space_latch),
+	PSI_RWLOCK_KEY(checkpoint_lock),
+	PSI_RWLOCK_KEY(fts_cache_rw_lock),
+	PSI_RWLOCK_KEY(fts_cache_init_rw_lock),
+	PSI_RWLOCK_KEY(trx_i_s_cache_lock),
+	PSI_RWLOCK_KEY(trx_purge_latch),
+	PSI_RWLOCK_KEY(index_tree_rw_lock),
+	PSI_RWLOCK_KEY(index_online_log),
+	PSI_RWLOCK_KEY(dict_table_stats),
+	PSI_RWLOCK_KEY(hash_table_locks)
 };
 # endif /* UNIV_PFS_RWLOCK */
 
@@ -462,15 +548,22 @@ static PSI_rwlock_info all_innodb_rwlocks[] = {
 performance schema instrumented if "UNIV_PFS_THREAD"
 is defined */
 static PSI_thread_info	all_innodb_threads[] = {
-	{&trx_rollback_clean_thread_key, "trx_rollback_clean_thread", 0},
-	{&io_handler_thread_key, "io_handler_thread", 0},
-	{&srv_lock_timeout_thread_key, "srv_lock_timeout_thread", 0},
-	{&srv_error_monitor_thread_key, "srv_error_monitor_thread", 0},
-	{&srv_monitor_thread_key, "srv_monitor_thread", 0},
-	{&srv_master_thread_key, "srv_master_thread", 0},
-	{&srv_purge_thread_key, "srv_purge_thread", 0},
-	{&buf_page_cleaner_thread_key, "page_cleaner_thread", 0},
-	{&recv_writer_thread_key, "recv_writer_thread", 0}
+	PSI_KEY(buf_dump_thread),
+	PSI_KEY(dict_stats_thread),
+	PSI_KEY(io_handler_thread),
+	PSI_KEY(io_ibuf_thread),
+	PSI_KEY(io_log_thread),
+	PSI_KEY(io_read_thread),
+	PSI_KEY(io_write_thread),
+	PSI_KEY(page_cleaner_thread),
+	PSI_KEY(recv_writer_thread),
+	PSI_KEY(srv_error_monitor_thread),
+	PSI_KEY(srv_lock_timeout_thread),
+	PSI_KEY(srv_master_thread),
+	PSI_KEY(srv_monitor_thread),
+	PSI_KEY(srv_purge_thread),
+	PSI_KEY(srv_worker_thread),
+	PSI_KEY(trx_rollback_clean_thread),
 };
 # endif /* UNIV_PFS_THREAD */
 
@@ -478,22 +571,13 @@ static PSI_thread_info	all_innodb_threads[] = {
 /* all_innodb_files array contains the type of files that are
 performance schema instrumented if "UNIV_PFS_IO" is defined */
 static PSI_file_info	all_innodb_files[] = {
-	{&innodb_file_data_key, "innodb_data_file", 0},
-	{&innodb_file_log_key, "innodb_log_file", 0},
-	{&innodb_file_temp_key, "innodb_temp_file", 0}
+	PSI_KEY(innodb_data_file),
+	PSI_KEY(innodb_log_file),
+	PSI_KEY(innodb_temp_file)
 };
 # endif /* UNIV_PFS_IO */
 #endif /* HAVE_PSI_INTERFACE */
 
-/** Always normalize table name to lower case on Windows */
-#ifdef __WIN__
-#define normalize_table_name(norm_name, name)           \
-	normalize_table_name_low(norm_name, name, TRUE)
-#else
-#define normalize_table_name(norm_name, name)           \
-	normalize_table_name_low(norm_name, name, FALSE)
-#endif /* __WIN__ */
-
 /** Set up InnoDB API callback function array */
 ib_cb_t innodb_api_cb[] = {
 	(ib_cb_t) ib_cursor_open_table,
@@ -504,28 +588,18 @@ ib_cb_t innodb_api_cb[] = {
 	(ib_cb_t) ib_cursor_moveto,
 	(ib_cb_t) ib_cursor_first,
 	(ib_cb_t) ib_cursor_next,
-	(ib_cb_t) ib_cursor_last,
 	(ib_cb_t) ib_cursor_set_match_mode,
 	(ib_cb_t) ib_sec_search_tuple_create,
 	(ib_cb_t) ib_clust_read_tuple_create,
 	(ib_cb_t) ib_tuple_delete,
-	(ib_cb_t) ib_tuple_copy,
 	(ib_cb_t) ib_tuple_read_u8,
-	(ib_cb_t) ib_tuple_write_u8,
 	(ib_cb_t) ib_tuple_read_u16,
-	(ib_cb_t) ib_tuple_write_u16,
 	(ib_cb_t) ib_tuple_read_u32,
-	(ib_cb_t) ib_tuple_write_u32,
 	(ib_cb_t) ib_tuple_read_u64,
-	(ib_cb_t) ib_tuple_write_u64,
 	(ib_cb_t) ib_tuple_read_i8,
-	(ib_cb_t) ib_tuple_write_i8,
 	(ib_cb_t) ib_tuple_read_i16,
-	(ib_cb_t) ib_tuple_write_i16,
 	(ib_cb_t) ib_tuple_read_i32,
-	(ib_cb_t) ib_tuple_write_i32,
 	(ib_cb_t) ib_tuple_read_i64,
-	(ib_cb_t) ib_tuple_write_i64,
 	(ib_cb_t) ib_tuple_get_n_cols,
 	(ib_cb_t) ib_col_set_value,
 	(ib_cb_t) ib_col_get_value,
@@ -535,16 +609,13 @@ ib_cb_t innodb_api_cb[] = {
 	(ib_cb_t) ib_trx_rollback,
 	(ib_cb_t) ib_trx_start,
 	(ib_cb_t) ib_trx_release,
-	(ib_cb_t) ib_trx_state,
 	(ib_cb_t) ib_cursor_lock,
 	(ib_cb_t) ib_cursor_close,
 	(ib_cb_t) ib_cursor_new_trx,
 	(ib_cb_t) ib_cursor_reset,
-	(ib_cb_t) ib_open_table_by_name,
 	(ib_cb_t) ib_col_get_name,
 	(ib_cb_t) ib_table_truncate,
 	(ib_cb_t) ib_cursor_open_index_using_name,
-	(ib_cb_t) ib_close_thd,
 	(ib_cb_t) ib_cfg_get_cfg,
 	(ib_cb_t) ib_cursor_set_memcached_sync,
 	(ib_cb_t) ib_cursor_set_cluster_access,
@@ -552,75 +623,47 @@ ib_cb_t innodb_api_cb[] = {
 	(ib_cb_t) ib_cfg_trx_level,
 	(ib_cb_t) ib_tuple_get_n_user_cols,
 	(ib_cb_t) ib_cursor_set_lock_mode,
-	(ib_cb_t) ib_cursor_clear_trx,
 	(ib_cb_t) ib_get_idx_field_name,
 	(ib_cb_t) ib_trx_get_start_time,
 	(ib_cb_t) ib_cfg_bk_commit_interval,
+	(ib_cb_t) ib_ut_strerr,
 	(ib_cb_t) ib_cursor_stmt_begin,
-	(ib_cb_t) ib_trx_read_only
+	(ib_cb_t) ib_trx_read_only,
+	(ib_cb_t) ib_is_virtual_table
 };
 
-/**
-  Test a file path whether it is same as mysql data directory path.
-
-  @param path null terminated character string
-
-  @return
-    @retval TRUE The path is different from mysql data directory.
-    @retval FALSE The path is same as mysql data directory.
-*/
-static bool is_mysql_datadir_path(const char *path)
+/******************************************************************//**
+Function used to loop a thread (for debugging/instrumentation
+purpose). */
+void
+srv_debug_loop(void)
+/*================*/
 {
-  if (path == NULL)
-    return false;
-
-  char mysql_data_dir[FN_REFLEN], path_dir[FN_REFLEN];
-  convert_dirname(path_dir, path, NullS);
-  convert_dirname(mysql_data_dir, mysql_unpacked_real_data_home, NullS);
-  size_t mysql_data_home_len= dirname_length(mysql_data_dir);
-  size_t path_len = dirname_length(path_dir);
-
-  if (path_len < mysql_data_home_len)
-    return true;
-
-  if (!lower_case_file_system)
-    return(memcmp(mysql_data_dir, path_dir, mysql_data_home_len));
-
-  return(files_charset_info->coll->strnncoll(files_charset_info,
-                                            (uchar *) path_dir, path_len,
-                                            (uchar *) mysql_data_dir,
-                                            mysql_data_home_len,
-                                            TRUE));
+        ibool set = TRUE;
 
+        while (set) {
+                os_thread_yield();
+        }
 }
 
+/******************************************************************//**
+Debug function used to read a MBR data */
 
-static int mysql_tmpfile_path(const char *path, const char *prefix)
+#ifdef UNIV_DEBUG
+void
+srv_mbr_debug(const byte* data)
 {
-  DBUG_ASSERT(path != NULL);
-  DBUG_ASSERT((strlen(path) + strlen(prefix)) <= FN_REFLEN);
-
-  char filename[FN_REFLEN];
-  File fd = create_temp_file(filename, path, prefix,
-#ifdef __WIN__
-                             O_BINARY | O_TRUNC | O_SEQUENTIAL |
-                             O_SHORT_LIVED |
-#endif /* __WIN__ */
-                             O_CREAT | O_EXCL | O_RDWR | O_TEMPORARY,
-                             MYF(MY_WME));
-  if (fd >= 0) {
-#ifndef __WIN__
-    /*
-      This can be removed once the following bug is fixed:
-      Bug #28903  create_temp_file() doesn't honor O_TEMPORARY option
-                  (file not removed) (Unix)
-    */
-    unlink(filename);
-#endif /* !__WIN__ */
-  }
-
-  return fd;
+	double a, b, c , d;
+	a = mach_double_read(data);
+	data += sizeof(double);
+	b = mach_double_read(data);
+	data += sizeof(double);
+	c = mach_double_read(data);
+	data += sizeof(double);
+	d = mach_double_read(data);
+	ut_ad(a && b && c &&d);
 }
+#endif
 
 static void innodb_remember_check_sysvar_funcs();
 mysql_var_check_func check_sysvar_enum;
@@ -671,6 +714,8 @@ innodb_stopword_table_validate(
 						for update function */
 	struct st_mysql_value*		value);	/*!< in: incoming string */
 
+static bool is_mysql_datadir_path(const char *path);
+
 /** Validate passed-in "value" is a valid directory name.
 This function is registered as a callback with MySQL.
 @param[in,out]	thd	thread handle
@@ -721,6 +766,7 @@ innodb_tmpdir_validate(
 		return(1);
 	}
 
+	os_normalize_path(alter_tmp_dir);
 	my_realpath(tmp_abs_path, alter_tmp_dir, 0);
 	size_t	tmp_abs_len = strlen(tmp_abs_path);
 
@@ -773,10 +819,6 @@ innodb_tmpdir_validate(
 	return(0);
 }
 
-/** "GEN_CLUST_INDEX" is the name reserved for InnoDB default
-system clustered index when there is no primary key. */
-const char innobase_index_reserve_name[] = "GEN_CLUST_INDEX";
-
 /******************************************************************//**
 Maps a MySQL trx isolation level code to the InnoDB isolation level code
 @return	InnoDB isolation level */
@@ -786,6 +828,16 @@ innobase_map_isolation_level(
 /*=========================*/
 	enum_tx_isolation	iso);	/*!< in: MySQL isolation level code */
 
+/** Gets field offset for a field in a table.
+@param[in]	table	MySQL table object
+@param[in]	field	MySQL field object
+@return offset */
+static inline
+uint
+get_field_offset(
+	const TABLE*	table,
+	const Field*	field);
+
 /*************************************************************//**
 Check for a valid value of innobase_compression_algorithm.
 @return	0 for valid innodb_compression_algorithm. */
@@ -819,9 +871,40 @@ innodb_encrypt_tables_validate(
 
 static const char innobase_hton_name[]= "InnoDB";
 
+static const char*	deprecated_innodb_support_xa
+	= "Using innodb_support_xa is deprecated and the"
+	" parameter may be removed in future releases.";
+
+static const char*	deprecated_innodb_support_xa_off
+	= "Using innodb_support_xa is deprecated and the"
+	" parameter may be removed in future releases."
+	" Only innodb_support_xa=ON is allowed.";
+
+/** Update the session variable innodb_support_xa.
+@param[in]	thd	current session
+@param[in]	var	the system variable innodb_support_xa
+@param[in,out]	var_ptr	the contents of the variable
+@param[in]	save	the to-be-updated value */
+static
+void
+innodb_support_xa_update(
+	THD*				thd,
+	struct st_mysql_sys_var*	var,
+	void*				var_ptr,
+	const void*			save)
+{
+	my_bool	innodb_support_xa = *static_cast<const my_bool*>(save);
+
+	push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
+		     HA_ERR_WRONG_COMMAND,
+		     innodb_support_xa
+		     ? deprecated_innodb_support_xa
+		     : deprecated_innodb_support_xa_off);
+}
+
 static MYSQL_THDVAR_BOOL(support_xa, PLUGIN_VAR_OPCMDARG,
   "Enable InnoDB support for the XA two-phase commit",
-  /* check_func */ NULL, /* update_func */ NULL,
+  /* check_func */ NULL, innodb_support_xa_update,
   /* default */ TRUE);
 
 static MYSQL_THDVAR_BOOL(table_locks, PLUGIN_VAR_OPCMDARG,
@@ -831,7 +914,7 @@ static MYSQL_THDVAR_BOOL(table_locks, PLUGIN_VAR_OPCMDARG,
 
 static MYSQL_THDVAR_BOOL(strict_mode, PLUGIN_VAR_OPCMDARG,
   "Use strict mode when evaluating create options.",
-  NULL, NULL, FALSE);
+  NULL, NULL, TRUE);
 
 static MYSQL_THDVAR_BOOL(ft_enable_stopword, PLUGIN_VAR_OPCMDARG,
   "Create FTS index with stopword.",
@@ -857,6 +940,8 @@ static SHOW_VAR innodb_status_variables[]= {
   (char*) &export_vars.innodb_buffer_pool_dump_status,	  SHOW_CHAR},
   {"buffer_pool_load_status",
   (char*) &export_vars.innodb_buffer_pool_load_status,	  SHOW_CHAR},
+  {"buffer_pool_resize_status",
+  (char*) &export_vars.innodb_buffer_pool_resize_status,  SHOW_CHAR},
   {"buffer_pool_pages_data",
   (char*) &export_vars.innodb_buffer_pool_pages_data,	  SHOW_LONG},
   {"buffer_pool_bytes_data",
@@ -911,8 +996,6 @@ static SHOW_VAR innodb_status_variables[]= {
   (char*) &export_vars.innodb_dblwr_pages_written,	  SHOW_LONG},
   {"dblwr_writes",
   (char*) &export_vars.innodb_dblwr_writes,		  SHOW_LONG},
-  {"have_atomic_builtins",
-  (char*) &export_vars.innodb_have_atomic_builtins,	  SHOW_BOOL},
   {"log_waits",
   (char*) &export_vars.innodb_log_waits,		  SHOW_LONG},
   {"log_write_requests",
@@ -972,7 +1055,10 @@ static SHOW_VAR innodb_status_variables[]= {
   (char*) &export_vars.innodb_purge_trx_id_age,           SHOW_LONG},
   {"purge_view_trx_id_age",
   (char*) &export_vars.innodb_purge_view_trx_id_age,      SHOW_LONG},
+  {"ahi_drop_lookups",
+  (char*) &export_vars.innodb_ahi_drop_lookups,           SHOW_LONG},
 #endif /* UNIV_DEBUG */
+
   /* Status variables for page compression */
   {"page_compression_saved",
    (char*) &export_vars.innodb_page_compression_saved,    SHOW_LONGLONG},
@@ -1101,12 +1187,12 @@ free_share(
 
 /*****************************************************************//**
 Frees a possible InnoDB trx object associated with the current THD.
-@return	0 or error number */
+@return 0 or error number */
 static
 int
 innobase_close_connection(
 /*======================*/
-	handlerton*	hton,		/*!< in/out: Innodb handlerton */
+	handlerton*	hton,		/*!< in/out: InnoDB handlerton */
 	THD*		thd);		/*!< in: MySQL thread handle for
 					which to close the connection */
 
@@ -1116,12 +1202,12 @@ static void innobase_commit_ordered(handlerton *hton, THD* thd, bool all);
 /*****************************************************************//**
 Commits a transaction in an InnoDB database or marks an SQL statement
 ended.
-@return	0 */
+@return 0 */
 static
 int
 innobase_commit(
 /*============*/
-	handlerton*	hton,		/*!< in/out: Innodb handlerton */
+	handlerton*	hton,		/*!< in/out: InnoDB handlerton */
 	THD*		thd,		/*!< in: MySQL thread handle of the
 					user for whom the transaction should
 					be committed */
@@ -1137,7 +1223,7 @@ static
 int
 innobase_rollback(
 /*==============*/
-	handlerton*	hton,		/*!< in/out: Innodb handlerton */
+	handlerton*	hton,		/*!< in/out: InnoDB handlerton */
 	THD*		thd,		/*!< in: handle to the MySQL thread
 					of the user whose transaction should
 					be rolled back */
@@ -1174,7 +1260,7 @@ innobase_rollback_to_savepoint_can_release_mdl(
 
 /*****************************************************************//**
 Sets a transaction savepoint.
-@return	always 0, that is, always succeeds */
+@return always 0, that is, always succeeds */
 static
 int
 innobase_savepoint(
@@ -1193,7 +1279,7 @@ static
 int
 innobase_release_savepoint(
 /*=======================*/
-	handlerton*	hton,		/*!< in/out: handlerton for Innodb */
+	handlerton*	hton,		/*!< in/out: handlerton for InnoDB */
 	THD*		thd,		/*!< in: handle to the MySQL thread
 					of the user whose transaction's
 					savepoint should be released */
@@ -1207,7 +1293,7 @@ static
 handler*
 innobase_create_handler(
 /*====================*/
-	handlerton*	hton,		/*!< in/out: handlerton for Innodb */
+	handlerton*	hton,		/*!< in/out: handlerton for InnoDB */
 	TABLE_SHARE*	table,
 	MEM_ROOT*	mem_root);
 
@@ -1237,7 +1323,7 @@ innobase_undo_logs_init_default_max();
 
 /************************************************************//**
 Validate the file format name and return its corresponding id.
-@return	valid file format id */
+@return valid file format id */
 static
 uint
 innobase_file_format_name_lookup(
@@ -1247,7 +1333,7 @@ innobase_file_format_name_lookup(
 /************************************************************//**
 Validate the file format check config parameters, as a side effect it
 sets the srv_max_file_format_at_startup variable.
-@return	the format_id if valid config value, otherwise, return -1 */
+@return the format_id if valid config value, otherwise, return -1 */
 static
 int
 innobase_file_format_validate_and_set(
@@ -1256,7 +1342,7 @@ innobase_file_format_validate_and_set(
 
 /*******************************************************************//**
 This function is used to prepare an X/Open XA distributed transaction.
-@return	0 or error number */
+@return 0 or error number */
 static
 int
 innobase_xa_prepare(
@@ -1270,7 +1356,7 @@ innobase_xa_prepare(
 					ended */
 /*******************************************************************//**
 This function is used to recover X/Open XA distributed transactions.
-@return	number of prepared transactions stored in xid_list */
+@return number of prepared transactions stored in xid_list */
 static
 int
 innobase_xa_recover(
@@ -1281,7 +1367,7 @@ innobase_xa_recover(
 /*******************************************************************//**
 This function is used to commit one X/Open XA distributed transaction
 which is in the prepared state
-@return	0 or error number */
+@return 0 or error number */
 static
 int
 innobase_commit_by_xid(
@@ -1292,7 +1378,7 @@ innobase_commit_by_xid(
 /*******************************************************************//**
 This function is used to rollback one X/Open XA distributed transaction
 which is in the prepared state
-@return	0 or error number */
+@return 0 or error number */
 static
 int
 innobase_rollback_by_xid(
@@ -1300,62 +1386,39 @@ innobase_rollback_by_xid(
 	handlerton*	hton,		/*!< in: InnoDB handlerton */
 	XID*		xid);		/*!< in: X/Open XA transaction
 					identification */
-/*******************************************************************//**
-Create a consistent view for a cursor based on current transaction
-which is created if the corresponding MySQL thread still lacks one.
-This consistent view is then used inside of MySQL when accessing records
-using a cursor.
-@return	pointer to cursor view or NULL */
+
+#ifdef MYSQL_TABLESPACES
+/** This API handles CREATE, ALTER & DROP commands for InnoDB tablespaces.
+@param[in]	hton		Handlerton of InnoDB
+@param[in]	thd		Connection
+@param[in]	alter_info	Describies the command and how to do it.
+@return MySQL error code*/
 static
-void*
-innobase_create_cursor_view(
-/*========================*/
-	handlerton*	hton,		/*!< in: innobase hton */
-	THD*		thd);		/*!< in: user thread handle */
-/*******************************************************************//**
-Set the given consistent cursor view to a transaction which is created
-if the corresponding MySQL thread still lacks one. If the given
-consistent cursor view is NULL global read view of a transaction is
-restored to a transaction read view. */
-static
-void
-innobase_set_cursor_view(
-/*=====================*/
-	handlerton*	hton,		/*!< in: handlerton of Innodb */
-	THD*		thd,		/*!< in: user thread handle */
-	void*		curview);	/*!< in: Consistent cursor view to
-					be set */
-/*******************************************************************//**
-Close the given consistent cursor view of a transaction and restore
-global read view to a transaction read view. Transaction is created if the
-corresponding MySQL thread still lacks one. */
-static
-void
-innobase_close_cursor_view(
-/*=======================*/
-	handlerton*	hton,		/*!< in: handlerton of Innodb */
-	THD*		thd,		/*!< in: user thread handle */
-	void*		curview);	/*!< in: Consistent read view to be
-					closed */
-/*****************************************************************//**
-Removes all tables in the named database inside InnoDB. */
+int
+innobase_alter_tablespace(
+	handlerton*		hton,
+	THD*			thd,
+	st_alter_tablespace*	alter_info);
+#endif /* MYSQL_TABLESPACES */
+
+/** Remove all tables in the named database inside InnoDB.
+@param[in]	hton	handlerton from InnoDB
+@param[in]	path	Database path; Inside InnoDB the name of the last
+directory in the path is used as the database name.
+For example, in 'mysql/data/test' the database name is 'test'. */
 static
 void
 innobase_drop_database(
-/*===================*/
-	handlerton*	hton,		/*!< in: handlerton of Innodb */
-	char*		path);		/*!< in: database path; inside InnoDB
-					the name of the last directory in
-					the path is used as the database name:
-					for example, in 'mysql/data/test' the
-					database name is 'test' */
+	handlerton*	hton,
+	char*		path);
+
 /*******************************************************************//**
 Closes an InnoDB database. */
 static
 int
 innobase_end(
 /*=========*/
-	handlerton*		hton,	/* in: Innodb handlerton */
+	handlerton*		hton,	/* in: InnoDB handlerton */
 	ha_panic_function	type);
 
 /*****************************************************************//**
@@ -1363,24 +1426,66 @@ Creates an InnoDB transaction struct for the thd if it does not yet have one.
 Starts a new InnoDB transaction if a transaction is not yet started. And
 assigns a new snapshot for a consistent read if the transaction does not yet
 have one.
-@return	0 */
+@return 0 */
 static
 int
 innobase_start_trx_and_assign_read_view(
 /*====================================*/
-	handlerton*	hton,		/* in: Innodb handlerton */
+	handlerton*	hton,		/* in: InnoDB handlerton */
 	THD*		thd);		/* in: MySQL thread handle of the
 					user for whom the transaction should
 					be committed */
-/****************************************************************//**
-Flushes InnoDB logs to disk and makes a checkpoint. Really, a commit flushes
-the logs, and the name of this function should be innobase_checkpoint.
-@return	TRUE if error */
+
+/** Flush InnoDB redo logs to the file system.
+@param[in]	hton			InnoDB handlerton
+@param[in]	binlog_group_flush	true if we got invoked by binlog
+group commit during flush stage, false in other cases.
+@return false */
 static
 bool
 innobase_flush_logs(
-/*================*/
-	handlerton*	hton);		/*!< in: InnoDB handlerton */
+	handlerton*	hton,
+	bool		binlog_group_flush)
+{
+	DBUG_ENTER("innobase_flush_logs");
+	DBUG_ASSERT(hton == innodb_hton_ptr);
+
+	if (srv_read_only_mode) {
+		DBUG_RETURN(false);
+	}
+
+	/* If !binlog_group_flush, we got invoked by FLUSH LOGS or similar.
+	Else, we got invoked by binlog group commit during flush stage. */
+
+	if (binlog_group_flush && srv_flush_log_at_trx_commit == 0) {
+		/* innodb_flush_log_at_trx_commit=0
+		(write and sync once per second).
+		Do not flush the redo log during binlog group commit. */
+		DBUG_RETURN(false);
+	}
+
+	/* Flush the redo log buffer to the redo log file.
+	Sync it to disc if we are in FLUSH LOGS, or if
+	innodb_flush_log_at_trx_commit=1
+	(write and sync at each commit). */
+	log_buffer_flush_to_disk(!binlog_group_flush
+				 || srv_flush_log_at_trx_commit == 1);
+
+	DBUG_RETURN(false);
+}
+
+/** Flush InnoDB redo logs to the file system.
+@param[in]	hton			InnoDB handlerton
+@param[in]	binlog_group_flush	true if we got invoked by binlog
+group commit during flush stage, false in other cases.
+@return false */
+static
+bool
+innobase_flush_logs(
+	handlerton*	hton)
+{
+	return innobase_flush_logs(hton, true);
+}
 
 /************************************************************************//**
 Implements the SHOW ENGINE INNODB STATUS command. Sends the output of the
@@ -1407,14 +1512,6 @@ innobase_show_status(
 	stat_print_fn*		stat_print,
 	enum ha_stat_type	stat_type);
 
-/*****************************************************************//**
-Commits a transaction in an InnoDB database. */
-static
-void
-innobase_commit_low(
-/*================*/
-	trx_t*	trx);	/*!< in: transaction handle */
-
 /****************************************************************//**
 Parse and enable InnoDB monitor counters during server startup.
 User can enable monitor counters/groups by specifying
@@ -1426,23 +1523,53 @@ innodb_enable_monitor_at_startup(
 /*=============================*/
 	char*	str);	/*!< in: monitor counter enable list */
 
-/*********************************************************************
-Normalizes a table name string. A normalized name consists of the
-database name catenated to '/' and table name. An example:
-test/mytable. On Windows normalization puts both the database name and the
-table name always to lower case if "set_lower_case" is set to TRUE. */
+/** Fill handlerton based INFORMATION_SCHEMA tables.
+@param[in]		(unused) Handle to the handlerton structure
+@param[in]	thd	Thread/connection descriptor
+@param[in,out]	tables	Information Schema tables to fill
+@param[in]		(unused) Intended for conditional pushdown
+@param[in]	idx	Table id that indicates which I_S table to fill
+@return Operation status */
+static
+int
+innobase_fill_i_s_table(
+	handlerton*,
+	THD*			thd,
+	TABLE_LIST*		tables,
+	Item*,
+	enum_schema_tables	idx)
+{
+	int	ret = 0;
+
+	if (idx == SCH_FILES) {
+		ret = i_s_files_table_fill(thd, tables);
+	}
+
+	return(ret);
+}
+
+#ifdef MYSQL_STORE_FTS_DOC_ID
+/** Store doc_id value into FTS_DOC_ID field
+@param[in,out]	tbl	table containing FULLTEXT index
+@param[in]	doc_id	FTS_DOC_ID value */
+static
 void
-normalize_table_name_low(
-/*=====================*/
-	char*           norm_name,      /* out: normalized name as a
-					null-terminated string */
-	const char*     name,           /* in: table name string */
-	ibool           set_lower_case); /* in: TRUE if we want to set
-					 name to lower case */
+innobase_fts_store_docid(
+	TABLE*		tbl,
+	ulonglong	doc_id)
+{
+	my_bitmap_map*	old_map
+		= dbug_tmp_use_all_columns(tbl, tbl->write_set);
+
+	tbl->fts_doc_id_field->store(static_cast<longlong>(doc_id), true);
+
+	dbug_tmp_restore_column_map(tbl->write_set, old_map);
+}
+#endif
 
 /*************************************************************//**
 Check for a valid value of innobase_commit_concurrency.
-@return	0 for valid innodb_commit_concurrency */
+@return 0 for valid innodb_commit_concurrency */
 static
 int
 innobase_commit_concurrency_validate(
@@ -1482,20 +1609,40 @@ innobase_create_handler(
 	TABLE_SHARE*	table,
 	MEM_ROOT*	mem_root)
 {
+#ifdef MYSQL_INNODB_PARTITIONING
+	/* If the table:
+	1) have type InnoDB (not the generic partition handlerton)
+	2) have partitioning defined
+	Then return the native partitioning handler ha_innopart
+	else return normal ha_innobase handler. */
+	if (table
+	    && table->db_type() == innodb_hton_ptr // 1)
+	    && table->partition_info_str           // 2)
+	    && table->partition_info_str_len) {    // 2)
+		ha_innopart* file = new (mem_root) ha_innopart(hton, table);
+		if (file && file->init_partitioning(mem_root))
+		{
+			delete file;
+			return(NULL);
+		}
+		return(file);
+	}
+#endif
+
 	return(new (mem_root) ha_innobase(hton, table));
 }
 
 /* General functions */
 
-/*************************************************************//**
-Check that a page_size is correct for InnoDB.  If correct, set the
-associated page_size_shift which is the power of 2 for this page size.
-@return	an associated page_size_shift if valid, 0 if invalid. */
+/** Check that a page_size is correct for InnoDB.
+If correct, set the associated page_size_shift which is the power of 2
+for this page size.
+@param[in]	page_size	Page Size to evaluate
+@return an associated page_size_shift if valid, 0 if invalid. */
 inline
-int
+ulong
 innodb_page_size_validate(
-/*======================*/
-	ulong	page_size)		/*!< in: Page Size to evaluate */
+	ulong	page_size)
 {
 	ulong		n;
 
@@ -1504,7 +1651,7 @@ innodb_page_size_validate(
 	for (n = UNIV_PAGE_SIZE_SHIFT_MIN;
 	     n <= UNIV_PAGE_SIZE_SHIFT_MAX;
 	     n++) {
-		if (page_size == (ulong) (1 << n)) {
+		if (page_size == static_cast<ulong>(1 << n)) {
 			DBUG_RETURN(n);
 		}
 	}
@@ -1518,8 +1665,7 @@ server. Used in srv_conc_enter_innodb() to determine if the thread
 should be allowed to enter InnoDB - the replication thread is treated
 differently than other threads. Also used in
 srv_conc_force_exit_innodb().
-@return	true if thd is the replication thread */
-UNIV_INTERN
+@return true if thd is the replication thread */
 ibool
 thd_is_replication_slave_thread(
 /*============================*/
@@ -1532,7 +1678,6 @@ thd_is_replication_slave_thread(
 Gets information on the durability property requested by thread.
 Used when writing either a prepare or commit record to the log
 buffer. @return the durability property. */
-UNIV_INTERN
 enum durability_properties
 thd_requested_durability(
 /*=====================*/
@@ -1543,9 +1688,8 @@ thd_requested_durability(
 
 /******************************************************************//**
 Returns true if transaction should be flagged as read-only.
-@return	true if the thd is marked as read-only */
-UNIV_INTERN
-ibool
+@return true if the thd is marked as read-only */
+bool
 thd_trx_is_read_only(
 /*=================*/
 	THD*	thd)	/*!< in: thread handle */
@@ -1553,11 +1697,61 @@ thd_trx_is_read_only(
 	return(thd != 0 && thd_tx_is_read_only(thd));
 }
 
+#if 0
+/**
+Check if the transaction can be rolled back
+@param[in] requestor	Session requesting the lock
+@param[in] holder	Session that holds the lock
+@return the session that will be rolled back, null don't care */
+
+THD*
+thd_trx_arbitrate(THD* requestor, THD* holder)
+{
+	/* Non-user (thd==0) transactions by default can't rollback, in
+	practice DDL transactions should never rollback and that's because
+	they should never wait on table/record locks either */
+
+	ut_a(holder != NULL);
+	ut_a(holder != requestor);
+
+	THD*	victim = thd_tx_arbitrate(requestor, holder);
+
+	ut_a(victim == NULL || victim == requestor || victim == holder);
+
+	return(victim);
+}
+
+/**
+@param[in] thd		Session to check
+@return the priority */
+
+int
+thd_trx_priority(THD* thd)
+{
+	return(thd == NULL ? 0 : thd_tx_priority(thd));
+}
+#endif
+
+#ifdef UNIV_DEBUG
+/**
+Returns true if transaction should be flagged as DD attachable transaction
+@param[in] thd			Thread handle
+@return true if the thd is marked as read-only */
+bool
+thd_trx_is_dd_trx(THD* thd)
+{
+	/* JAN: TODO: MySQL 5.7
+	ha_table_flags() & HA_ATTACHABLE_TRX_COMPATIBLE
+	return(thd != NULL && thd_tx_is_dd_trx(thd));
+	*/
+	return false;
+}
+#endif /* UNIV_DEBUG */
+
 /******************************************************************//**
 Check if the transaction is an auto-commit transaction. TRUE also
 implies that it is a SELECT (read-only) transaction.
-@return	true if the transaction is an auto commit read-only transaction. */
-UNIV_INTERN
+@return true if the transaction is an auto commit read-only transaction. */
 ibool
 thd_trx_is_auto_commit(
 /*===================*/
@@ -1570,19 +1764,44 @@ thd_trx_is_auto_commit(
 	       && thd_is_select(thd));
 }
 
+extern "C" time_t thd_start_time(const THD* thd);
+
 /******************************************************************//**
-Save some CPU by testing the value of srv_thread_concurrency in inline
-functions. */
+Get the thread start time.
+@return the thread start time in seconds since the epoch. */
+ulint
+thd_start_time_in_secs(
+/*===================*/
+	THD*	thd)	/*!< in: thread handle, or NULL */
+{
+	// FIXME: This function should be added to the server code.
+	//return(thd_start_time(thd));
+	return(ulint(ut_time()));
+}
+
+/** Enter InnoDB engine after checking the max number of user threads
+allowed, else the thread is put into sleep.
+@param[in,out]	prebuilt	row prebuilt handler */
 static inline
 void
 innobase_srv_conc_enter_innodb(
-/*===========================*/
-	trx_t*	trx)	/*!< in: transaction handle */
+	row_prebuilt_t*	prebuilt)
 {
 #ifdef WITH_WSREP
-	if (wsrep_on(trx->mysql_thd) && 
-	    wsrep_thd_is_BF(trx->mysql_thd, FALSE)) return;
+	if (wsrep_on(prebuilt->trx->mysql_thd) &&
+	    wsrep_thd_is_BF(prebuilt->trx->mysql_thd, FALSE)) {
+		return;
+	}
 #endif /* WITH_WSREP */
+
+	/* We rely on server to do external_lock(F_UNLCK) to reset the
+	srv_conc.n_active counter. Since there are no locks on instrinsic
+	tables, we should skip this for intrinsic temporary tables. */
+	if (dict_table_is_intrinsic(prebuilt->table)) {
+		return;
+	}
+
+	trx_t*	trx	= prebuilt->trx;
 	if (srv_thread_concurrency) {
 		if (trx->n_tickets_to_enter_innodb > 0) {
 
@@ -1599,29 +1818,41 @@ innobase_srv_conc_enter_innodb(
 				< srv_thread_concurrency,
 				srv_replication_delay * 1000);
 
-		}  else {
-			srv_conc_enter_innodb(trx);
+		} else {
+			srv_conc_enter_innodb(prebuilt);
 		}
 	}
 }
 
-/******************************************************************//**
-Note that the thread wants to leave InnoDB only if it doesn't have
-any spare tickets. */
+/** Note that the thread wants to leave InnoDB only if it doesn't have
+any spare tickets.
+@param[in,out]	m_prebuilt	row prebuilt handler */
 static inline
 void
 innobase_srv_conc_exit_innodb(
-/*==========================*/
-	trx_t*	trx)	/*!< in: transaction handle */
+	row_prebuilt_t*	prebuilt)
 {
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch));
-#endif /* UNIV_SYNC_DEBUG */
 #ifdef WITH_WSREP
-	if (wsrep_on(trx->mysql_thd) && 
-	    wsrep_thd_is_BF(trx->mysql_thd, FALSE)) return;
+	if (wsrep_on(prebuilt->trx->mysql_thd) &&
+	    wsrep_thd_is_BF(prebuilt->trx->mysql_thd, FALSE)) {
+		return;
+	}
 #endif /* WITH_WSREP */
 
+	/* We rely on server to do external_lock(F_UNLCK) to reset the
+	srv_conc.n_active counter. Since there are no locks on instrinsic
+	tables, we should skip this for intrinsic temporary tab */
+	if (dict_table_is_intrinsic(prebuilt->table)) {
+		return;
+	}
+
+	trx_t*			trx = prebuilt->trx;
+#ifdef UNIV_DEBUG
+	btrsea_sync_check	check(trx->has_search_latch);
+
+	ut_ad(!sync_check_iterate(check));
+#endif /* UNIV_DEBUG */
+
 	/* This is to avoid making an unnecessary function call. */
 	if (trx->declared_to_be_inside_innodb
 	    && trx->n_tickets_to_enter_innodb == 0) {
@@ -1638,9 +1869,11 @@ innobase_srv_conc_force_exit_innodb(
 /*================================*/
 	trx_t*	trx)	/*!< in: transaction handle */
 {
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch));
-#endif /* UNIV_SYNC_DEBUG */
+#ifdef UNIV_DEBUG
+	btrsea_sync_check	check(trx->has_search_latch);
+
+	ut_ad(!sync_check_iterate(check));
+#endif /* UNIV_DEBUG */
 
 	/* This is to avoid making an unnecessary function call. */
 	if (trx->declared_to_be_inside_innodb) {
@@ -1650,8 +1883,7 @@ innobase_srv_conc_force_exit_innodb(
 
 /******************************************************************//**
 Returns the NUL terminated value of glob_hostname.
-@return	pointer to glob_hostname. */
-UNIV_INTERN
+@return pointer to glob_hostname. */
 const char*
 server_get_hostname()
 /*=================*/
@@ -1664,8 +1896,7 @@ Returns true if the transaction this thread is processing has edited
 non-transactional tables. Used by the deadlock detector when deciding
 which transaction to rollback in case of a deadlock - we try to avoid
 rolling back transactions that have edited non-transactional tables.
-@return	true if non-transactional tables have been edited */
-UNIV_INTERN
+@return true if non-transactional tables have been edited */
 ibool
 thd_has_edited_nontrans_tables(
 /*===========================*/
@@ -1676,8 +1907,7 @@ thd_has_edited_nontrans_tables(
 
 /******************************************************************//**
 Returns true if the thread is executing a SELECT statement.
-@return	true if thd is executing SELECT */
-UNIV_INTERN
+@return true if thd is executing SELECT */
 ibool
 thd_is_select(
 /*==========*/
@@ -1686,44 +1916,9 @@ thd_is_select(
 	return(thd_sql_command(thd) == SQLCOM_SELECT);
 }
 
-/******************************************************************//**
-Returns true if the thread supports XA,
-global value of innodb_supports_xa if thd is NULL.
-@return	true if thd has XA support */
-UNIV_INTERN
-ibool
-thd_supports_xa(
-/*============*/
-	THD*	thd)	/*!< in: thread handle, or NULL to query
-			the global innodb_supports_xa */
-{
-	return(THDVAR(thd, support_xa));
-}
-
-/** Get the value of innodb_tmpdir.
-@param[in]	thd	thread handle, or NULL to query
-			the global innodb_tmpdir.
-@retval NULL if innodb_tmpdir="" */
-UNIV_INTERN
-const char*
-thd_innodb_tmpdir(
-	THD*	thd)
-{
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(!sync_thread_levels_nonempty_trx(false));
-#endif /* UNIV_SYNC_DEBUG */
-
-	const char*	tmp_dir = THDVAR(thd, tmpdir);
-	if (tmp_dir != NULL && *tmp_dir == '\0') {
-		tmp_dir = NULL;
-	}
-
-	return(tmp_dir);
-}
 /******************************************************************//**
 Returns the lock wait timeout for the current connection.
-@return	the lock wait timeout, in seconds */
-UNIV_INTERN
+@return the lock wait timeout, in seconds */
 ulong
 thd_lock_wait_timeout(
 /*==================*/
@@ -1737,7 +1932,6 @@ thd_lock_wait_timeout(
 
 /******************************************************************//**
 Set the time waited for the lock for the current query. */
-UNIV_INTERN
 void
 thd_set_lock_wait_time(
 /*===================*/
@@ -1749,34 +1943,127 @@ thd_set_lock_wait_time(
 	}
 }
 
-/********************************************************************//**
-Obtain the InnoDB transaction of a MySQL thread.
-@return	reference to transaction pointer */
-MY_ATTRIBUTE((warn_unused_result, nonnull))
-static inline
+/** Get the value of innodb_tmpdir.
+@param[in]	thd	thread handle, or NULL to query
+			the global innodb_tmpdir.
+@retval NULL if innodb_tmpdir="" */
+const char*
+thd_innodb_tmpdir(
+	THD*	thd)
+{
+
+#ifdef UNIV_DEBUG
+	trx_t*	trx = thd_to_trx(thd);
+	btrsea_sync_check	check(trx->has_search_latch);
+	ut_ad(!sync_check_iterate(check));
+#endif /* UNIV_DEBUG */
+
+	const char*	tmp_dir = THDVAR(thd, tmpdir);
+
+	if (tmp_dir != NULL && *tmp_dir == '\0') {
+		tmp_dir = NULL;
+	}
+
+	return(tmp_dir);
+}
+
+/** Obtain the private handler of InnoDB session specific data.
+@param[in,out]	thd	MySQL thread handler.
+@return reference to private handler */
+MY_ATTRIBUTE((warn_unused_result))
+static
+innodb_session_t*&
+thd_to_innodb_session(
+	THD*	thd)
+{
+	innodb_session_t*& innodb_session =
+		*(innodb_session_t**) thd_ha_data(thd, innodb_hton_ptr);
+
+	if (innodb_session != NULL) {
+		return(innodb_session);
+	}
+
+	innodb_session = UT_NEW_NOKEY(innodb_session_t());
+
+	thd_set_ha_data(thd, innodb_hton_ptr, innodb_session);
+
+	return(innodb_session);
+}
+
+/** Obtain the InnoDB transaction of a MySQL thread.
+@param[in,out]	thd	MySQL thread handler.
+@return reference to transaction pointer */
+MY_ATTRIBUTE((warn_unused_result))
 trx_t*&
 thd_to_trx(
-/*=======*/
-	THD*	thd)	/*!< in: MySQL thread */
+	THD*	thd)
 {
-	return(*(trx_t**) thd_ha_data(thd, innodb_hton_ptr));
+	innodb_session_t*& innodb_session = thd_to_innodb_session(thd);
+	ut_ad(innodb_session != NULL);
+
+	return(innodb_session->m_trx);
 }
+
 #ifdef WITH_WSREP
+/********************************************************************//**
+Obtain the InnoDB transaction id of a MySQL thread.
+@return	transaction id */
+__attribute__((warn_unused_result, nonnull))
 ulonglong
 thd_to_trx_id(
-/*=======*/
 	THD*	thd)	/*!< in: MySQL thread */
 {
 	return(thd_to_trx(thd)->id);
 }
 #endif /* WITH_WSREP */
 
+/** Check if statement is of type INSERT .... SELECT that involves
+use of intrinsic tables.
+@param[in]	thd	thread handler
+@return true if INSERT .... SELECT statement. */
+static inline
+bool
+thd_is_ins_sel_stmt(THD* user_thd)
+{
+	/* If the session involves use of intrinsic table
+	and it is trying to fetch the result from non-temporary tables
+	it indicates "insert .... select" statement. For non-temporary
+	table this is verifed using the locked tables count but for
+	intrinsic table as external_lock is not invoked this count is
+	not updated.
+
+	Why is this needed ?
+	Use of AHI is blocked if statement is insert .... select statement. */
+	innodb_session_t*	innodb_priv = thd_to_innodb_session(user_thd);
+	return(innodb_priv->count_register_table_handler() > 0 ? true : false);
+}
+
+/** Add the table handler to thread cache.
+Obtain the InnoDB transaction of a MySQL thread.
+@param[in,out]	table		table handler
+@param[in,out]	heap		heap for allocating system columns.
+@param[in,out]	thd		MySQL thread handler */
+static inline
+void
+add_table_to_thread_cache(
+	dict_table_t*	table,
+	mem_heap_t*	heap,
+	THD*		thd)
+{
+	dict_table_add_system_columns(table, heap);
+
+	dict_table_set_big_rows(table);
+
+	innodb_session_t*& priv = thd_to_innodb_session(thd);
+	priv->register_table_handler(table->name.m_name, table);
+}
+
 /********************************************************************//**
 Call this function when mysqld passes control to the client. That is to
 avoid deadlocks on the adaptive hash S-latch possibly held by thd. For more
 documentation, see handler.cc.
-@return	0 */
-static
+@return 0 */
+inline
 int
 innobase_release_temporary_latches(
 /*===============================*/
@@ -1804,7 +2091,7 @@ Increments innobase_active_counter and every INNOBASE_WAKE_INTERVALth
 time calls srv_active_wake_master_thread. This function should be used
 when a single database operation may introduce a small need for
 server utility activity, like checkpointing. */
-static inline
+inline
 void
 innobase_active_small(void)
 /*=======================*/
@@ -1820,8 +2107,7 @@ innobase_active_small(void)
 Converts an InnoDB error code to a MySQL error code and also tells to MySQL
 about a possible transaction rollback inside InnoDB caused by a lock wait
 timeout or a deadlock.
-@return	MySQL error code */
-static
+@return MySQL error code */
 int
 convert_error_code_to_mysql(
 /*========================*/
@@ -1834,7 +2120,7 @@ convert_error_code_to_mysql(
 		return(0);
 
 	case DB_INTERRUPTED:
-                return(HA_ERR_ABORTED_BY_USER);
+		return(HA_ERR_ABORTED_BY_USER);
 
 	case DB_FOREIGN_EXCEED_MAX_CASCADE:
 		ut_ad(thd);
@@ -1846,12 +2132,15 @@ convert_error_code_to_mysql(
 				    "depth of %d. Please "
 				    "drop extra constraints and try "
 				    "again", DICT_FK_MAX_RECURSIVE_LOAD);
+		return(HA_ERR_FK_DEPTH_EXCEEDED);
 
-		/* fall through */
+	case DB_CANT_CREATE_GEOMETRY_OBJECT:
+		my_error(ER_CANT_CREATE_GEOMETRY_OBJECT, MYF(0));
+		return(HA_ERR_NULL_IN_SPATIAL);
 
 	case DB_ERROR:
 	default:
-		return(-1); /* unspecified error */
+		return(HA_ERR_GENERIC); /* unspecified error */
 
 	case DB_DUPLICATE_KEY:
 		/* Be cautious with returning this error, since
@@ -1874,13 +2163,14 @@ convert_error_code_to_mysql(
 	case DB_RECORD_NOT_FOUND:
 		return(HA_ERR_NO_ACTIVE_RECORD);
 
+	case DB_FORCED_ABORT:
 	case DB_DEADLOCK:
 		/* Since we rolled back the whole transaction, we must
 		tell it also to MySQL so that MySQL knows to empty the
 		cached binlog for this transaction */
 
-		if (thd) {
-			thd_mark_transaction_to_rollback(thd, TRUE);
+		if (thd != NULL) {
+			thd_mark_transaction_to_rollback(thd, 1);
 		}
 
 		return(HA_ERR_LOCK_DEADLOCK);
@@ -1903,6 +2193,7 @@ convert_error_code_to_mysql(
 	case DB_ROW_IS_REFERENCED:
 		return(HA_ERR_ROW_IS_REFERENCED);
 
+	case DB_NO_FK_ON_S_BASE_COL:
 	case DB_CANNOT_ADD_CONSTRAINT:
 	case DB_CHILD_NO_INDEX:
 	case DB_PARENT_NO_INDEX:
@@ -1920,10 +2211,10 @@ convert_error_code_to_mysql(
 	case DB_OUT_OF_FILE_SPACE:
 		return(HA_ERR_RECORD_FILE_FULL);
 
-	case DB_TEMP_FILE_WRITE_FAILURE:
+	case DB_TEMP_FILE_WRITE_FAIL:
 		my_error(ER_GET_ERRMSG, MYF(0),
-                         DB_TEMP_FILE_WRITE_FAILURE,
-                         ut_strerr(DB_TEMP_FILE_WRITE_FAILURE),
+                         DB_TEMP_FILE_WRITE_FAIL,
+                         ut_strerr(DB_TEMP_FILE_WRITE_FAIL),
                          "InnoDB");
 		return(HA_ERR_INTERNAL_ERROR);
 
@@ -1933,7 +2224,6 @@ convert_error_code_to_mysql(
 	case DB_TABLE_IS_BEING_USED:
 		return(HA_ERR_WRONG_COMMAND);
 
-	case DB_TABLESPACE_DELETED:
 	case DB_TABLE_NOT_FOUND:
 		return(HA_ERR_NO_SUCH_TABLE);
 
@@ -1941,34 +2231,32 @@ convert_error_code_to_mysql(
 		return(HA_ERR_DECRYPTION_FAILED);
 
 	case DB_TABLESPACE_NOT_FOUND:
-		return(HA_ERR_NO_SUCH_TABLE);
+		return(HA_ERR_TABLESPACE_MISSING);
 
 	case DB_TOO_BIG_RECORD: {
 		/* If prefix is true then a 768-byte prefix is stored
-		locally for BLOB fields. Refer to dict_table_get_format() */
+		locally for BLOB fields. Refer to dict_table_get_format().
+		We limit max record size to 16k for 64k page size. */
 		bool prefix = (dict_tf_get_format(flags) == UNIV_FORMAT_A);
 		my_printf_error(ER_TOO_BIG_ROWSIZE,
-			"Row size too large (> %lu). Changing some columns "
-			"to TEXT or BLOB %smay help. In current row "
-			"format, BLOB prefix of %d bytes is stored inline.",
+			"Row size too large (> %lu). Changing some columns"
+			" to TEXT or BLOB %smay help. In current row"
+			" format, BLOB prefix of %d bytes is stored inline.",
 			MYF(0),
-			page_get_free_space_of_empty(flags &
+			srv_page_size == UNIV_PAGE_SIZE_MAX
+			? REC_MAX_DATA_SIZE - 1
+			: page_get_free_space_of_empty(flags &
 				DICT_TF_COMPACT) / 2,
-			prefix ? "or using ROW_FORMAT=DYNAMIC "
-			"or ROW_FORMAT=COMPRESSED ": "",
-			prefix ? DICT_MAX_FIXED_COL_LEN : 0);
+			prefix
+			? "or using ROW_FORMAT=DYNAMIC or"
+			  " ROW_FORMAT=COMPRESSED "
+			: "",
+			prefix
+			? DICT_MAX_FIXED_COL_LEN
+			: 0);
 		return(HA_ERR_TO_BIG_ROW);
 	}
 
-
-	case DB_TOO_BIG_FOR_REDO:
-		my_printf_error(ER_TOO_BIG_ROWSIZE, "%s" , MYF(0),
-				"The size of BLOB/TEXT data inserted"
-				" in one transaction is greater than"
-				" 10% of redo log size. Increase the"
-				" redo log size using innodb_log_file_size.");
-		return(HA_ERR_TO_BIG_ROW);
-
 	case DB_TOO_BIG_INDEX_COL:
 		my_error(ER_INDEX_COLUMN_TOO_LONG, MYF(0),
 			 DICT_MAX_FIELD_LEN_BY_FORMAT_FLAG(flags));
@@ -1983,7 +2271,7 @@ convert_error_code_to_mysql(
 		cached binlog for this transaction */
 
 		if (thd) {
-			thd_mark_transaction_to_rollback(thd, TRUE);
+			thd_mark_transaction_to_rollback(thd, 1);
 		}
 
 		return(HA_ERR_LOCK_TABLE_FULL);
@@ -2004,16 +2292,23 @@ convert_error_code_to_mysql(
 		return(HA_ERR_OUT_OF_MEM);
 	case DB_TABLESPACE_EXISTS:
 		return(HA_ERR_TABLESPACE_EXISTS);
+	case DB_TABLESPACE_DELETED:
+		return(HA_ERR_TABLESPACE_MISSING);
 	case DB_IDENTIFIER_TOO_LONG:
 		return(HA_ERR_INTERNAL_ERROR);
+	case DB_TABLE_CORRUPT:
+		return(HA_ERR_TABLE_CORRUPT);
 	case DB_FTS_TOO_MANY_WORDS_IN_PHRASE:
 		return(HA_ERR_FTS_TOO_MANY_WORDS_IN_PHRASE);
+	case DB_WRONG_FILE_NAME:
+		return(HA_ERR_GENERIC); // when can this happen?
+	case DB_COMPUTE_VALUE_FAILED:
+		return(HA_ERR_GENERIC); // impossible
 	}
 }
 
 /*************************************************************//**
 Prints info of a THD object (== user session thread) to the given file. */
-UNIV_INTERN
 void
 innobase_mysql_print_thd(
 /*=====================*/
@@ -2032,7 +2327,6 @@ innobase_mysql_print_thd(
 /******************************************************************//**
 Get the error message format string.
 @return the format string or 0 if not found. */
-UNIV_INTERN
 const char*
 innobase_get_err_msg(
 /*=================*/
@@ -2043,7 +2337,6 @@ innobase_get_err_msg(
 
 /******************************************************************//**
 Get the variable length bounds of the given character set. */
-UNIV_INTERN
 void
 innobase_get_cset_width(
 /*====================*/
@@ -2069,8 +2362,7 @@ innobase_get_cset_width(
 
 			/* Fix bug#46256: allow tables to be dropped if the
 			collation is not found, but issue a warning. */
-			if ((global_system_variables.log_warnings)
-			    && (cset != 0)){
+			if (cset != 0) {
 
 				sql_print_warning(
 					"Unknown collation #%lu.", cset);
@@ -2086,14 +2378,13 @@ innobase_get_cset_width(
 
 /******************************************************************//**
 Converts an identifier to a table name. */
-UNIV_INTERN
 void
 innobase_convert_from_table_id(
 /*===========================*/
-	struct charset_info_st*	cs,	/*!< in: the 'from' character set */
-	char*			to,	/*!< out: converted identifier */
-	const char*		from,	/*!< in: identifier to convert */
-	ulint			len)	/*!< in: length of 'to', in bytes */
+	CHARSET_INFO*	cs,	/*!< in: the 'from' character set */
+	char*		to,	/*!< out: converted identifier */
+	const char*	from,	/*!< in: identifier to convert */
+	ulint		len)	/*!< in: length of 'to', in bytes */
 {
 	uint	errors;
 
@@ -2103,7 +2394,6 @@ innobase_convert_from_table_id(
 /**********************************************************************
 Check if the length of the identifier exceeds the maximum allowed.
 return true when length of identifier is too long. */
-UNIV_INTERN
 my_bool
 innobase_check_identifier_length(
 /*=============================*/
@@ -2127,14 +2417,13 @@ innobase_check_identifier_length(
 
 /******************************************************************//**
 Converts an identifier to UTF-8. */
-UNIV_INTERN
 void
 innobase_convert_from_id(
 /*=====================*/
-	struct charset_info_st*	cs,	/*!< in: the 'from' character set */
-	char*			to,	/*!< out: converted identifier */
-	const char*		from,	/*!< in: identifier to convert */
-	ulint			len)	/*!< in: length of 'to', in bytes */
+	CHARSET_INFO*	cs,	/*!< in: the 'from' character set */
+	char*		to,	/*!< out: converted identifier */
+	const char*	from,	/*!< in: identifier to convert */
+	ulint		len)	/*!< in: length of 'to', in bytes */
 {
 	uint	errors;
 
@@ -2143,8 +2432,7 @@ innobase_convert_from_id(
 
 /******************************************************************//**
 Compares NUL-terminated UTF-8 strings case insensitively.
-@return	0 if a=b, <0 if a<b, >1 if a>b */
-UNIV_INTERN
+@return 0 if a=b, <0 if a<b, >1 if a>b */
 int
 innobase_strcasecmp(
 /*================*/
@@ -2168,7 +2456,6 @@ innobase_strcasecmp(
 Compares NUL-terminated UTF-8 strings case insensitively. The
 second string contains wildcards.
 @return 0 if a match is found, 1 if not */
-UNIV_INTERN
 int
 innobase_wildcasecmp(
 /*=================*/
@@ -2178,14 +2465,12 @@ innobase_wildcasecmp(
 	return(wild_case_compare(system_charset_info, a, b));
 }
 
-/******************************************************************//**
-Strip dir name from a full path name and return only the file name
+/** Strip dir name from a full path name and return only the file name
+@param[in]	path_name	full path name
 @return file name or "null" if no file name */
-UNIV_INTERN
 const char*
 innobase_basename(
-/*==============*/
-	const char*	path_name)	/*!< in: full path name */
+	const char*	path_name)
 {
 	const char*	name = base_name(path_name);
 
@@ -2194,7 +2479,6 @@ innobase_basename(
 
 /******************************************************************//**
 Makes all characters in a NUL-terminated UTF-8 string lower case. */
-UNIV_INTERN
 void
 innobase_casedn_str(
 /*================*/
@@ -2205,9 +2489,8 @@ innobase_casedn_str(
 
 /**********************************************************************//**
 Determines the connection character set.
-@return	connection character set */
-UNIV_INTERN
-struct charset_info_st*
+@return connection character set */
+CHARSET_INFO*
 innobase_get_charset(
 /*=================*/
 	THD*	mysql_thd)	/*!< in: MySQL thread handle */
@@ -2215,21 +2498,64 @@ innobase_get_charset(
 	return(thd_charset(mysql_thd));
 }
 
-/**********************************************************************//**
-Determines the current SQL statement.
-@return	SQL statement string */
-UNIV_INTERN
+/** Determines the current SQL statement.
+Thread unsafe, can only be called from the thread owning the THD.
+@param[in]	thd	MySQL thread handle
+@param[out]	length	Length of the SQL statement
+@return			SQL statement string */
 const char*
-innobase_get_stmt(
-/*==============*/
-	THD*	thd,		/*!< in: MySQL thread handle */
-	size_t*	length)		/*!< out: length of the SQL statement */
+innobase_get_stmt_unsafe(
+/*=====================*/
+	THD*	thd,
+	size_t*	length)
 {
 	LEX_STRING* stmt;
+	const char* query=NULL;
 
 	stmt = thd_query_string(thd);
-	*length = stmt->length;
-	return(stmt->str);
+	// MySQL 5.7
+	//stmt = thd_query_unsafe(thd);
+
+	if (stmt && stmt->str) {
+		*length = stmt->length;
+		query = stmt->str;
+	} else {
+		*length = 0;
+	}
+
+	return(query);
+}
+
+/** Determines the current SQL statement.
+Thread safe, can be called from any thread as the string is copied
+into the provided buffer.
+@param[in]	thd	MySQL thread handle
+@param[out]	buf	Buffer containing SQL statement
+@param[in]	buflen	Length of provided buffer
+@return			Length of the SQL statement */
+size_t
+innobase_get_stmt_safe(
+/*===================*/
+	THD*	thd,
+	char*	buf,
+	size_t	buflen)
+{
+	LEX_STRING* stmt;
+	size_t length=0;
+
+	ut_ad(buflen > 1);
+
+	stmt = thd_query_string(thd);
+
+	if (stmt && stmt->str) {
+		length = stmt->length > buflen ? buflen : stmt->length;
+		memcpy(buf, stmt->str, length-1);
+		buf[length]='\0';
+	} else {
+		buf[0]='\0';
+	}
+
+	return (length);
 }
 
 /**********************************************************************//**
@@ -2237,7 +2563,6 @@ Get the current setting of the tdc_size global parameter. We do
 a dirty read because for one there is no synchronization object and
 secondly there is little harm in doing so even if we get a torn read.
 @return	value of tdc_size */
-UNIV_INTERN
 ulint
 innobase_get_table_cache_size(void)
 /*===============================*/
@@ -2250,8 +2575,7 @@ Get the current setting of the lower_case_table_names global parameter from
 mysqld.cc. We do a dirty read because for one there is no synchronization
 object and secondly there is little harm in doing so even if we get a torn
 read.
-@return	value of lower_case_table_names */
-UNIV_INTERN
+@return value of lower_case_table_names */
 ulint
 innobase_get_lower_case_table_names(void)
 /*=====================================*/
@@ -2259,11 +2583,70 @@ innobase_get_lower_case_table_names(void)
 	return(lower_case_table_names);
 }
 
-/** Create a temporary file in the location specified by the parameter
-path. If the path is null, then it will be created in tmpdir.
+/**
+  Test a file path whether it is same as mysql data directory path.
+
+  @param path null terminated character string
+
+  @return
+    @retval TRUE The path is different from mysql data directory.
+    @retval FALSE The path is same as mysql data directory.
+*/
+static bool is_mysql_datadir_path(const char *path)
+{
+  if (path == NULL)
+    return false;
+
+  char mysql_data_dir[FN_REFLEN], path_dir[FN_REFLEN];
+  convert_dirname(path_dir, path, NullS);
+  convert_dirname(mysql_data_dir, mysql_unpacked_real_data_home, NullS);
+  size_t mysql_data_home_len= dirname_length(mysql_data_dir);
+  size_t path_len = dirname_length(path_dir);
+
+  if (path_len < mysql_data_home_len)
+    return true;
+
+  if (!lower_case_file_system)
+    return(memcmp(mysql_data_dir, path_dir, mysql_data_home_len));
+
+  return(files_charset_info->coll->strnncoll(files_charset_info,
+                                            (uchar *) path_dir, path_len,
+                                            (uchar *) mysql_data_dir,
+                                            mysql_data_home_len,
+                                            TRUE));
+}
+
+static int mysql_tmpfile_path(const char *path, const char *prefix)
+{
+  DBUG_ASSERT(path != NULL);
+  DBUG_ASSERT((strlen(path) + strlen(prefix)) <= FN_REFLEN);
+
+  char filename[FN_REFLEN];
+  File fd = create_temp_file(filename, path, prefix,
+#ifdef __WIN__
+                             O_BINARY | O_TRUNC | O_SEQUENTIAL |
+                             O_SHORT_LIVED |
+#endif /* __WIN__ */
+                             O_CREAT | O_EXCL | O_RDWR | O_TEMPORARY,
+                             MYF(MY_WME));
+  if (fd >= 0) {
+#ifndef __WIN__
+    /*
+      This can be removed once the following bug is fixed:
+      Bug #28903  create_temp_file() doesn't honor O_TEMPORARY option
+                  (file not removed) (Unix)
+    */
+    unlink(filename);
+#endif /* !__WIN__ */
+  }
+
+  return fd;
+}
+
+/** Creates a temporary file in the location specified by the parameter
+path. If the path is NULL, then it will be created in tmpdir.
 @param[in]	path	location for creating temporary file
-@return	temporary file descriptor, or < 0 on error */
-UNIV_INTERN
+@return temporary file descriptor, or < 0 on error */
 int
 innobase_mysql_tmpfile(
 	const char*	path)
@@ -2318,11 +2701,14 @@ innobase_mysql_tmpfile(
 		fd2 = dup(fd);
 #endif
 		if (fd2 < 0) {
+			char errbuf[MYSYS_STRERROR_SIZE];
 			DBUG_PRINT("error",("Got error %d on dup",fd2));
-			my_errno=errno;
+			set_my_errno(errno);
+			my_strerror(errbuf, sizeof(errbuf), my_errno);
 			my_error(EE_OUT_OF_FILERESOURCES,
-				 MYF(ME_BELL+ME_WAITTANG),
-				 "ib*", my_errno);
+				 MYF(0),
+				 "ib*", my_errno,
+				 errbuf);
 		}
 		my_close(fd, MYF(MY_WME));
 	}
@@ -2331,8 +2717,7 @@ innobase_mysql_tmpfile(
 
 /*********************************************************************//**
 Wrapper around MySQL's copy_and_convert function.
-@return	number of bytes copied to 'to' */
-UNIV_INTERN
+@return number of bytes copied to 'to' */
 ulint
 innobase_convert_string(
 /*====================*/
@@ -2361,8 +2746,7 @@ Not more than "buf_size" bytes are written to "buf".
 The result is always NUL-terminated (provided buf_size > 0) and the
 number of bytes that were written to "buf" is returned (including the
 terminating NUL).
-@return	number of bytes that were written */
-UNIV_INTERN
+@return number of bytes that were written */
 ulint
 innobase_raw_format(
 /*================*/
@@ -2391,6 +2775,118 @@ innobase_raw_format(
 	return(ut_str_sql_format(buf_tmp, buf_tmp_used, buf, buf_size));
 }
 
+#ifdef MYSQL_COMPRESSION
+/** Check if the string is "empty" or "none".
+@param[in]      algorithm       Compression algorithm to check
+@return true if no algorithm requested */
+bool
+Compression::is_none(const char* algorithm)
+{
+	/* NULL is the same as NONE */
+	if (algorithm == NULL
+	    || *algorithm == 0
+	    || innobase_strcasecmp(algorithm, "none") == 0) {
+		return(true);
+	}
+
+	return(false);
+}
+
+/** Check for supported COMPRESS := (ZLIB | LZ4 | NONE) supported values
+@param[in]	name		Name of the compression algorithm
+@param[out]	compression	The compression algorithm
+@return DB_SUCCESS or DB_UNSUPPORTED */
+dberr_t
+Compression::check(
+	const char*	algorithm,
+	Compression*	compression)
+{
+	if (is_none(algorithm)) {
+
+		compression->m_type = NONE;
+
+	} else if (innobase_strcasecmp(algorithm, "zlib") == 0) {
+
+		compression->m_type = ZLIB;
+
+	} else if (innobase_strcasecmp(algorithm, "lz4") == 0) {
+
+		compression->m_type = LZ4;
+
+	} else {
+		return(DB_UNSUPPORTED);
+	}
+
+	return(DB_SUCCESS);
+}
+
+/** Check for supported COMPRESS := (ZLIB | LZ4 | NONE) supported values
+@param[in]	name		Name of the compression algorithm
+@param[out]	compression	The compression algorithm
+@return DB_SUCCESS or DB_UNSUPPORTED */
+dberr_t
+Compression::validate(const char* algorithm)
+{
+	Compression	compression;
+
+	return(check(algorithm, &compression));
+}
+#endif /* MYSQL_COMPRESSION */
+
+#ifdef MYSQL_ENCRYPTION
+/** Check if the string is "" or "n".
+@param[in]      algorithm       Encryption algorithm to check
+@return true if no algorithm requested */
+bool
+Encryption::is_none(const char* algorithm)
+{
+	/* NULL is the same as NONE */
+	if (algorithm == NULL
+	    || innobase_strcasecmp(algorithm, "n") == 0
+	    || innobase_strcasecmp(algorithm, "") == 0) {
+		return(true);
+	}
+
+	return(false);
+}
+
+/** Check the encryption option and set it
+@param[in]	option		encryption option
+@param[in/out]	encryption	The encryption algorithm
+@return DB_SUCCESS or DB_UNSUPPORTED */
+dberr_t
+Encryption::set_algorithm(
+	const char*	option,
+	Encryption*	encryption)
+{
+	if (is_none(option)) {
+
+		encryption->m_type = NONE;
+
+	} else if (innobase_strcasecmp(option, "y") == 0) {
+
+		encryption->m_type = AES;
+
+	} else {
+		return(DB_UNSUPPORTED);
+	}
+
+	return(DB_SUCCESS);
+}
+
+/** Check for supported ENCRYPT := (Y | N) supported values
+@param[in]	option		Encryption option
+@param[out]	encryption	The encryption algorithm
+@return DB_SUCCESS or DB_UNSUPPORTED */
+dberr_t
+Encryption::validate(const char* option)
+{
+	Encryption	encryption;
+
+	return(encryption.set_algorithm(option, &encryption));
+}
+#endif /* MYSQL_ENCRYPTION */
+
 /*********************************************************************//**
 Compute the next autoinc value.
 
@@ -2408,8 +2904,7 @@ values we want to reserve for multi-value inserts e.g.,
 innobase_next_autoinc() will be called with increment set to 3 where
 autoinc_lock_mode != TRADITIONAL because we want to reserve 3 values for
 the multi-value INSERT above.
-@return	the next value */
-UNIV_INTERN
+@return the next value */
 ulonglong
 innobase_next_autoinc(
 /*==================*/
@@ -2502,6 +2997,64 @@ innobase_next_autoinc(
 	return(next_value);
 }
 
+/********************************************************************//**
+Reset the autoinc value in the table.
+@return	DB_SUCCESS if all went well else error code */
+UNIV_INTERN
+dberr_t
+ha_innobase::innobase_reset_autoinc(
+/*================================*/
+	ulonglong	autoinc)	/*!< in: value to store */
+{
+	dberr_t		error;
+
+	error = innobase_lock_autoinc();
+
+	if (error == DB_SUCCESS) {
+
+		dict_table_autoinc_initialize(m_prebuilt->table, autoinc);
+
+		dict_table_autoinc_unlock(m_prebuilt->table);
+	}
+
+	return(error);
+}
+
+/*******************************************************************//**
+Reset the auto-increment counter to the given value, i.e. the next row
+inserted will get the given value. This is called e.g. after TRUNCATE
+is emulated by doing a 'DELETE FROM t'. HA_ERR_WRONG_COMMAND is
+returned by storage engines that don't support this operation.
+@return	0 or error code */
+UNIV_INTERN
+int
+ha_innobase::reset_auto_increment(
+/*==============================*/
+	ulonglong	value)		/*!< in: new value for table autoinc */
+{
+	DBUG_ENTER("ha_innobase::reset_auto_increment");
+
+	dberr_t	error;
+
+	update_thd(ha_thd());
+
+	error = row_lock_table_autoinc_for_mysql(m_prebuilt);
+
+	if (error != DB_SUCCESS) {
+		DBUG_RETURN(convert_error_code_to_mysql(
+				    error, m_prebuilt->table->flags, m_user_thd));
+	}
+
+	/* The next value can never be 0. */
+	if (value == 0) {
+		value = 1;
+	}
+
+	innobase_reset_autoinc(value);
+
+	DBUG_RETURN(0);
+}
+
 /*********************************************************************//**
 Initializes some fields in an InnoDB transaction object. */
 static
@@ -2525,8 +3078,7 @@ innobase_trx_init(
 
 /*********************************************************************//**
 Allocates an InnoDB transaction for a MySQL handler object for DML.
-@return	InnoDB transaction handle */
-UNIV_INTERN
+@return InnoDB transaction handle */
 trx_t*
 innobase_trx_allocate(
 /*==================*/
@@ -2551,7 +3103,7 @@ innobase_trx_allocate(
 Gets the InnoDB transaction handle for a MySQL handler object, creates
 an InnoDB transaction struct if the corresponding MySQL thread struct still
 lacks one.
-@return	InnoDB transaction handle */
+@return InnoDB transaction handle */
 static inline
 trx_t*
 check_trx_exists(
@@ -2562,24 +3114,71 @@ check_trx_exists(
 
 	if (trx == NULL) {
 		trx = innobase_trx_allocate(thd);
-		thd_set_ha_data(thd, innodb_hton_ptr, trx);
-	} else if (UNIV_UNLIKELY(trx->magic_n != TRX_MAGIC_N)) {
-		mem_analyze_corruption(trx);
-		ut_error;
-	}
+		innodb_session_t* innodb_session = thd_to_innodb_session(thd);
+		innodb_session->m_trx = trx;
 
-	innobase_trx_init(thd, trx);
+		/* User trx can be forced to rollback,
+		so we unset the disable flag. */
+		ut_ad(trx->in_innodb & TRX_FORCE_ROLLBACK_DISABLE);
+		trx->in_innodb &= TRX_FORCE_ROLLBACK_MASK;
+	} else {
+		ut_a(trx->magic_n == TRX_MAGIC_N);
+
+		innobase_trx_init(thd, trx);
+	}
 
 	return(trx);
 }
 
+#ifdef MYSQL_REPLACE_TRX_IN_THD
+/** InnoDB transaction object that is currently associated with THD is
+replaced with that of the 2nd argument. The previous value is
+returned through the 3rd argument's buffer, unless it's NULL.  When
+the buffer is not provided (value NULL) that should mean the caller
+restores previously saved association so the current trx has to be
+additionally freed from all association with MYSQL.
+
+@param[in,out]	thd		MySQL thread handle
+@param[in]	new_trx_arg	replacement trx_t
+@param[in,out]	ptr_trx_arg	pointer to a buffer to store old trx_t */
+static
+void
+innodb_replace_trx_in_thd(
+	THD*	thd,
+	void*	new_trx_arg,
+	void**	ptr_trx_arg)
+{
+	trx_t*& trx = thd_to_trx(thd);
+
+	ut_ad(new_trx_arg == NULL
+	      || (((trx_t*) new_trx_arg)->mysql_thd == thd
+		  && !((trx_t*) new_trx_arg)->is_recovered));
+
+	if (ptr_trx_arg) {
+		*ptr_trx_arg = trx;
+
+		ut_ad(trx == NULL
+		      || (trx->mysql_thd == thd && !trx->is_recovered));
+
+	} else if (trx->state == TRX_STATE_NOT_STARTED) {
+		ut_ad(thd == trx->mysql_thd);
+		trx_free_for_mysql(trx);
+	} else {
+		ut_ad(thd == trx->mysql_thd);
+		ut_ad(trx_state_eq(trx, TRX_STATE_PREPARED));
+		trx_disconnect_prepared(trx);
+	}
+	trx = static_cast<trx_t*>(new_trx_arg);
+}
+#endif /* MYSQL_REPLACE_TRX_IN_THD */
+
 /*********************************************************************//**
 Note that a transaction has been registered with MySQL.
 @return true if transaction is registered with MySQL 2PC coordinator */
 static inline
 bool
 trx_is_registered_for_2pc(
-/*=========================*/
+/*======================*/
 	const trx_t*	trx)	/* in: transaction */
 {
 	return(trx->is_registered == 1);
@@ -2590,7 +3189,7 @@ Note that innobase_commit_ordered() was run. */
 static inline
 void
 trx_set_active_commit_ordered(
-/*==============================*/
+/*==========================*/
 	trx_t*	trx)	/* in: transaction */
 {
 	ut_a(trx_is_registered_for_2pc(trx));
@@ -2618,7 +3217,7 @@ trx_deregister_from_2pc(
 	trx_t*	trx)	/* in: transaction */
 {
 	trx->is_registered = 0;
-        trx->active_commit_ordered = 0;
+	trx->active_commit_ordered = 0;
 }
 
 /*********************************************************************//**
@@ -2632,24 +3231,11 @@ trx_is_active_commit_ordered(
 	return(trx->active_commit_ordered == 1);
 }
 
-/*********************************************************************//**
-Check if transaction is started.
-@reutrn true if transaction is in state started */
-static
-bool
-trx_is_started(
-/*===========*/
-	trx_t*	trx)	/* in: transaction */
-{
-	return(trx->state != TRX_STATE_NOT_STARTED);
-}
-
 /*********************************************************************//**
 Copy table flags from MySQL's HA_CREATE_INFO into an InnoDB table object.
 Those flags are stored in .frm file and end up in the MySQL table object,
 but are frequently used inside InnoDB so we keep their copies into the
 InnoDB table object. */
-UNIV_INTERN
 void
 innobase_copy_frm_flags_from_create_info(
 /*=====================================*/
@@ -2685,7 +3271,6 @@ Copy table flags from MySQL's TABLE_SHARE into an InnoDB table object.
 Those flags are stored in .frm file and end up in the MySQL table object,
 but are frequently used inside InnoDB so we keep their copies into the
 InnoDB table object. */
-UNIV_INTERN
 void
 innobase_copy_frm_flags_from_table_share(
 /*=====================================*/
@@ -2718,31 +3303,44 @@ innobase_copy_frm_flags_from_table_share(
 
 /*********************************************************************//**
 Construct ha_innobase handler. */
-UNIV_INTERN
+
 ha_innobase::ha_innobase(
 /*=====================*/
 	handlerton*	hton,
 	TABLE_SHARE*	table_arg)
 	:handler(hton, table_arg),
-	int_table_flags(HA_REC_NOT_IN_SEQ |
-		  HA_NULL_IN_KEY | HA_CAN_VIRTUAL_COLUMNS |
-		  HA_CAN_INDEX_BLOBS | HA_CONCURRENT_OPTIMIZE |
-		  HA_CAN_SQL_HANDLER |
-		  HA_PRIMARY_KEY_REQUIRED_FOR_POSITION |
-		  HA_PRIMARY_KEY_IN_READ_INDEX |
-		  HA_BINLOG_ROW_CAPABLE |
-		  HA_CAN_GEOMETRY | HA_PARTIAL_COLUMN_READ |
-		  HA_TABLE_SCAN_ON_INDEX | HA_CAN_FULLTEXT |
-		  (srv_force_primary_key ? HA_REQUIRE_PRIMARY_KEY : 0 ) |
-		  HA_CAN_FULLTEXT_EXT | HA_CAN_EXPORT),
-	start_of_scan(0),
-	num_write_row(0),
-	ha_partition_stats(NULL)
+	m_prebuilt(),
+	m_prebuilt_ptr(&m_prebuilt),
+	m_user_thd(),
+	m_int_table_flags(HA_REC_NOT_IN_SEQ
+			  | HA_NULL_IN_KEY
+			  | HA_CAN_VIRTUAL_COLUMNS
+			  | HA_CAN_INDEX_BLOBS
+			  | HA_CAN_SQL_HANDLER
+			  | HA_PRIMARY_KEY_REQUIRED_FOR_POSITION
+			  | HA_PRIMARY_KEY_IN_READ_INDEX
+			  | HA_BINLOG_ROW_CAPABLE
+			  | HA_CAN_GEOMETRY
+			  | HA_PARTIAL_COLUMN_READ
+			  | HA_TABLE_SCAN_ON_INDEX
+			  | HA_CAN_FULLTEXT
+			  | HA_CAN_FULLTEXT_EXT
+		/* JAN: TODO: MySQL 5.7
+			  | HA_CAN_FULLTEXT_HINTS
+		*/
+			  | HA_CAN_EXPORT
+			  | HA_CAN_RTREEKEYS
+			  | HA_CONCURRENT_OPTIMIZE
+			  |  (srv_force_primary_key ? HA_REQUIRE_PRIMARY_KEY : 0)
+		  ),
+	m_start_of_scan(),
+	m_num_write_row(),
+        m_mysql_has_locked()
 {}
 
 /*********************************************************************//**
 Destruct ha_innobase handler. */
-UNIV_INTERN
+
 ha_innobase::~ha_innobase()
 /*======================*/
 {
@@ -2751,38 +3349,46 @@ ha_innobase::~ha_innobase()
 /*********************************************************************//**
 Updates the user_thd field in a handle and also allocates a new InnoDB
 transaction handle if needed, and updates the transaction fields in the
-prebuilt struct. */
-UNIV_INTERN inline
+m_prebuilt struct. */
 void
 ha_innobase::update_thd(
 /*====================*/
 	THD*	thd)	/*!< in: thd to use the handle */
 {
-	trx_t*		trx;
-
 	DBUG_ENTER("ha_innobase::update_thd");
 	DBUG_PRINT("ha_innobase::update_thd", ("user_thd: %p -> %p",
-		   user_thd, thd));
+		   m_user_thd, thd));
 
 	/* The table should have been opened in ha_innobase::open(). */
-	DBUG_ASSERT(prebuilt->table->n_ref_count > 0);
+	DBUG_ASSERT(m_prebuilt->table->n_ref_count > 0);
 
-	trx = check_trx_exists(thd);
+	trx_t*	trx = check_trx_exists(thd);
 
-	if (prebuilt->trx != trx) {
+	TrxInInnoDB	trx_in_innodb(trx);
 
-		row_update_prebuilt_trx(prebuilt, trx);
+	ut_ad(dict_table_is_intrinsic(m_prebuilt->table)
+	      || trx_in_innodb.is_aborted()
+	      || (trx->dict_operation_lock_mode == 0
+		  && trx->dict_operation == TRX_DICT_OP_NONE));
+
+	if (m_prebuilt->trx != trx) {
+
+		row_update_prebuilt_trx(m_prebuilt, trx);
 	}
 
-	user_thd = thd;
+	m_user_thd = thd;
+
+	DBUG_ASSERT(m_prebuilt->trx->magic_n == TRX_MAGIC_N);
+	DBUG_ASSERT(m_prebuilt->trx == thd_to_trx(m_user_thd));
+
 	DBUG_VOID_RETURN;
 }
 
 /*********************************************************************//**
 Updates the user_thd field in a handle and also allocates a new InnoDB
 transaction handle if needed, and updates the transaction fields in the
-prebuilt struct. */
-UNIV_INTERN
+m_prebuilt struct. */
+
 void
 ha_innobase::update_thd()
 /*=====================*/
@@ -2808,11 +3414,18 @@ innobase_register_trx(
 	THD*		thd,	/* in: MySQL thd (connection) object */
 	trx_t*		trx)	/* in: transaction to register */
 {
+	/* JAN: TODO: MySQL 5.7 PSI
+	const ulonglong	trx_id = static_cast<const ulonglong>(
+		trx_get_id_for_print(trx));
+
+	trans_register_ha(thd, FALSE, hton, &trx_id);
+	*/
 	trans_register_ha(thd, FALSE, hton);
 
 	if (!trx_is_registered_for_2pc(trx)
 	    && thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
 
+		//trans_register_ha(thd, TRUE, hton, &trx_id);
 		trans_register_ha(thd, TRUE, hton);
 	}
 
@@ -2880,7 +3493,7 @@ Why a deadlock of threads is not possible: the query cache calls this function
 at the start of a SELECT processing. Then the calling thread cannot be
 holding any InnoDB semaphores. The calling thread is holding the
 query cache mutex, and this function will reserve the InnoDB trx_sys->mutex.
-Thus, the 'rank' in sync0sync.h of the MySQL query cache mutex is above
+Thus, the 'rank' in sync0mutex.h of the MySQL query cache mutex is above
 the InnoDB trx_sys->mutex.
 @return TRUE if permitted, FALSE if not; note that the value FALSE
 does not mean we should invalidate the query cache: invalidation is
@@ -2894,28 +3507,26 @@ innobase_query_caching_of_table_permitted(
 				retrieve it */
 	char*	full_name,	/*!< in: normalized path to the table */
 	uint	full_name_len,	/*!< in: length of the normalized path
-                                to the table */
+				to the table */
 	ulonglong *unused)	/*!< unused for this engine */
 {
-	ibool	is_autocommit;
-	trx_t*	trx;
+	bool	is_autocommit;
 	char	norm_name[1000];
+	trx_t*	trx = check_trx_exists(thd);
 
 	ut_a(full_name_len < 999);
 
-	trx = check_trx_exists(thd);
-
 	if (trx->isolation_level == TRX_ISO_SERIALIZABLE) {
 		/* In the SERIALIZABLE mode we add LOCK IN SHARE MODE to every
 		plain SELECT if AUTOCOMMIT is not on. */
 
-		return((my_bool)FALSE);
+		return(static_cast<my_bool>(false));
 	}
 
-	if (UNIV_UNLIKELY(trx->has_search_latch)) {
-		sql_print_error("The calling thread is holding the adaptive "
-				"search, latch though calling "
-				"innobase_query_caching_of_table_permitted.");
+	if (trx->has_search_latch) {
+		sql_print_error("The calling thread is holding the adaptive"
+				" search, latch though calling"
+				" innobase_query_caching_of_table_permitted.");
 		trx_print(stderr, trx, 1024);
 	}
 
@@ -2925,9 +3536,9 @@ innobase_query_caching_of_table_permitted(
 
 	if (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
 
-		is_autocommit = TRUE;
+		is_autocommit = true;
 	} else {
-		is_autocommit = FALSE;
+		is_autocommit = false;
 
 	}
 
@@ -2960,217 +3571,181 @@ innobase_query_caching_of_table_permitted(
 
 	if (row_search_check_if_query_cache_permitted(trx, norm_name)) {
 
-		/* printf("Query cache for %s permitted\n", norm_name); */
-
-		return((my_bool)TRUE);
+		return(static_cast<my_bool>(true));
 	}
 
-	/* printf("Query cache for %s NOT permitted\n", norm_name); */
-
-	return((my_bool)FALSE);
+	return(static_cast<my_bool>(false));
 }
 
 /*****************************************************************//**
 Invalidates the MySQL query cache for the table. */
-UNIV_INTERN
 void
 innobase_invalidate_query_cache(
 /*============================*/
 	trx_t*		trx,		/*!< in: transaction which
 					modifies the table */
 	const char*	full_name,	/*!< in: concatenation of
-					database name, null char NUL,
+					database name, path separator,
 					table name, null char NUL;
 					NOTE that in Windows this is
 					always in LOWER CASE! */
 	ulint		full_name_len)	/*!< in: full name length where
 					also the null chars count */
 {
-	/* Note that the sync0sync.h rank of the query cache mutex is just
+	/* Note that the sync0mutex.h rank of the query cache mutex is just
 	above the InnoDB trx_sys_t->lock. The caller of this function must
 	not have latches of a lower rank. */
 
-#ifdef HAVE_QUERY_CACHE
-	char	qcache_key_name[2 * (NAME_LEN + 1)];
-	size_t	tabname_len;
-	size_t	dbname_len;
-
-	/* Construct the key("db-name\0table$name\0") for the query cache using
-	the path name("db@002dname\0table@0024name\0") of the table in its
-        canonical form. */
-	dbname_len = filename_to_tablename(full_name, qcache_key_name,
-					   sizeof(qcache_key_name));
-	tabname_len = filename_to_tablename(full_name + strlen(full_name) + 1,
-					    qcache_key_name + dbname_len + 1,
-					    sizeof(qcache_key_name)
-                                            - dbname_len - 1);
-
 	/* Argument TRUE below means we are using transactions */
 	mysql_query_cache_invalidate4(trx->mysql_thd,
-				      qcache_key_name,
-				      (dbname_len + tabname_len + 2),
+				      full_name,
+				      (uint32) full_name_len,
 				      TRUE);
-#endif
 }
 
-/*****************************************************************//**
-Convert an SQL identifier to the MySQL system_charset_info (UTF-8)
-and quote it if needed.
-@return	pointer to the end of buf */
-static
-char*
-innobase_convert_identifier(
-/*========================*/
-	char*		buf,	/*!< out: buffer for converted identifier */
-	ulint		buflen,	/*!< in: length of buf, in bytes */
-	const char*	id,	/*!< in: identifier to convert */
-	ulint		idlen,	/*!< in: length of id, in bytes */
-	THD*		thd,	/*!< in: MySQL connection thread, or NULL */
-	ibool		file_id)/*!< in: TRUE=id is a table or database name;
-				FALSE=id is an UTF-8 string */
+/** Quote a standard SQL identifier like tablespace, index or column name.
+@param[in]	file	output stream
+@param[in]	trx	InnoDB transaction, or NULL
+@param[in]	id	identifier to quote */
+void
+innobase_quote_identifier(
+	FILE*		file,
+	trx_t*		trx,
+	const char*	id)
 {
-	const char*	s	= id;
-	int		q;
-
-	if (file_id) {
-
-		char nz[MAX_TABLE_NAME_LEN + 1];
-		char nz2[MAX_TABLE_NAME_LEN + 1];
-
-		/* Decode the table name.  The MySQL function expects
-		a NUL-terminated string.  The input and output strings
-		buffers must not be shared. */
-		ut_a(idlen <= MAX_TABLE_NAME_LEN);
-		memcpy(nz, id, idlen);
-		nz[idlen] = 0;
-
-		s = nz2;
-		idlen = explain_filename(thd, nz, nz2, sizeof nz2,
-					 EXPLAIN_PARTITIONS_AS_COMMENT);
-		goto no_quote;
-	}
-
-	/* See if the identifier needs to be quoted. */
-	if (UNIV_UNLIKELY(!thd)) {
-		q = '"';
-	} else {
-		q = get_quote_char_for_identifier(thd, s, (int) idlen);
-	}
+	const int	q = trx != NULL && trx->mysql_thd != NULL
+		? get_quote_char_for_identifier(trx->mysql_thd, id, strlen(id))
+		: '`';
 
 	if (q == EOF) {
-no_quote:
-		if (UNIV_UNLIKELY(idlen > buflen)) {
-			idlen = buflen;
-		}
-		memcpy(buf, s, idlen);
-		return(buf + idlen);
-	}
+		fputs(id, file);
+	} else {
+		putc(q, file);
 
-	/* Quote the identifier. */
-	if (buflen < 2) {
-		return(buf);
-	}
-
-	*buf++ = q;
-	buflen--;
-
-	for (; idlen; idlen--) {
-		int	c = *s++;
-		if (UNIV_UNLIKELY(c == q)) {
-			if (UNIV_UNLIKELY(buflen < 3)) {
-				break;
+		while (int c = *id++) {
+			if (c == q) {
+				putc(c, file);
 			}
-
-			*buf++ = c;
-			*buf++ = c;
-			buflen -= 2;
-		} else {
-			if (UNIV_UNLIKELY(buflen < 2)) {
-				break;
-			}
-
-			*buf++ = c;
-			buflen--;
+			putc(c, file);
 		}
+
+		putc(q, file);
+	}
+}
+
+/** Quote a standard SQL identifier like tablespace, index or column name.
+@param[in]	trx	InnoDB transaction, or NULL
+@param[in]	id	identifier to quote
+@return quoted identifier */
+std::string
+innobase_quote_identifier(
+/*======================*/
+	trx_t*		trx,
+	const char*	id)
+{
+	std::string quoted_identifier;
+	const int	q = trx != NULL && trx->mysql_thd != NULL
+		? get_quote_char_for_identifier(trx->mysql_thd, id, strlen(id))
+		: '`';
+
+	if (q == EOF) {
+		quoted_identifier.append(id);
+	} else {
+		quoted_identifier += (unsigned char)q;
+		quoted_identifier.append(id);
+		quoted_identifier += (unsigned char)q;
 	}
 
-	*buf++ = q;
-	return(buf);
+	return (quoted_identifier);
+}
+
+/** Convert a table name to the MySQL system_charset_info (UTF-8)
+and quote it.
+@param[out]	buf	buffer for converted identifier
+@param[in]	buflen	length of buf, in bytes
+@param[in]	id	identifier to convert
+@param[in]	idlen	length of id, in bytes
+@param[in]	thd	MySQL connection thread, or NULL
+@return pointer to the end of buf */
+char*
+innobase_convert_identifier(
+	char*		buf,
+	ulint		buflen,
+	const char*	id,
+	ulint		idlen,
+	THD*		thd)
+{
+	const char*	s	= id;
+
+	char nz[MAX_TABLE_NAME_LEN + 1];
+	char nz2[MAX_TABLE_NAME_LEN + 1];
+
+	/* Decode the table name.  The MySQL function expects
+	a NUL-terminated string.  The input and output strings
+	buffers must not be shared. */
+	ut_a(idlen <= MAX_TABLE_NAME_LEN);
+	memcpy(nz, id, idlen);
+	nz[idlen] = 0;
+
+	s = nz2;
+	idlen = explain_filename(thd, nz, nz2, sizeof nz2,
+				 EXPLAIN_PARTITIONS_AS_COMMENT);
+	if (idlen > buflen) {
+		idlen = buflen;
+	}
+	memcpy(buf, s, idlen);
+	return(buf + idlen);
 }
 
 /*****************************************************************//**
-Convert a table or index name to the MySQL system_charset_info (UTF-8)
-and quote it if needed.
-@return	pointer to the end of buf */
-UNIV_INTERN
+Convert a table name to the MySQL system_charset_info (UTF-8).
+@return pointer to the end of buf */
 char*
 innobase_convert_name(
 /*==================*/
 	char*		buf,	/*!< out: buffer for converted identifier */
 	ulint		buflen,	/*!< in: length of buf, in bytes */
-	const char*	id,	/*!< in: identifier to convert */
+	const char*	id,	/*!< in: table name to convert */
 	ulint		idlen,	/*!< in: length of id, in bytes */
-	THD*		thd,	/*!< in: MySQL connection thread, or NULL */
-	ibool		table_id)/*!< in: TRUE=id is a table or database name;
-				FALSE=id is an index name */
+	THD*		thd)	/*!< in: MySQL connection thread, or NULL */
 {
 	char*		s	= buf;
 	const char*	bufend	= buf + buflen;
 
-	if (table_id) {
-		const char*	slash = (const char*) memchr(id, '/', idlen);
-		if (!slash) {
+	const char*	slash = (const char*) memchr(id, '/', idlen);
 
-			goto no_db_name;
-		}
+	if (slash == NULL) {
+		return(innobase_convert_identifier(
+				buf, buflen, id, idlen, thd));
+	}
 
-		/* Print the database name and table name separately. */
-		s = innobase_convert_identifier(s, bufend - s, id, slash - id,
-						thd, TRUE);
-		if (UNIV_LIKELY(s < bufend)) {
-			*s++ = '.';
-			s = innobase_convert_identifier(s, bufend - s,
-							slash + 1, idlen
-							- (slash - id) - 1,
-							thd, TRUE);
-		}
-	} else if (UNIV_UNLIKELY(*id == TEMP_INDEX_PREFIX)) {
-		/* Temporary index name (smart ALTER TABLE) */
-		const char temp_index_suffix[]= "--temporary--";
-
-		s = innobase_convert_identifier(buf, buflen, id + 1, idlen - 1,
-						thd, FALSE);
-		if (s - buf + (sizeof temp_index_suffix - 1) < buflen) {
-			memcpy(s, temp_index_suffix,
-			       sizeof temp_index_suffix - 1);
-			s += sizeof temp_index_suffix - 1;
-		}
-	} else {
-no_db_name:
-		s = innobase_convert_identifier(buf, buflen, id, idlen,
-						thd, table_id);
+	/* Print the database name and table name separately. */
+	s = innobase_convert_identifier(s, bufend - s, id, slash - id, thd);
+	if (s < bufend) {
+		*s++ = '.';
+		s = innobase_convert_identifier(s, bufend - s,
+						slash + 1, idlen
+						- (slash - id) - 1,
+						thd);
 	}
 
 	return(s);
 }
 
 /*****************************************************************//**
-A wrapper function of innobase_convert_name(), convert a table or
-index name to the MySQL system_charset_info (UTF-8) and quote it if needed.
-@return	pointer to the end of buf */
-UNIV_INTERN
+A wrapper function of innobase_convert_name(), convert a table name
+to the MySQL system_charset_info (UTF-8) and quote it if needed.
+@return pointer to the end of buf */
 void
 innobase_format_name(
 /*==================*/
 	char*		buf,	/*!< out: buffer for converted identifier */
 	ulint		buflen,	/*!< in: length of buf, in bytes */
-	const char*	name,	/*!< in: index or table name to format */
-	ibool		is_index_name) /*!< in: index name */
+	const char*	name)	/*!< in: table name to format */
 {
 	const char*     bufend;
 
-	bufend = innobase_convert_name(buf, buflen, name, strlen(name),
-				       NULL, !is_index_name);
+	bufend = innobase_convert_name(buf, buflen, name, strlen(name), NULL);
 
 	ut_ad((ulint) (bufend - buf) < buflen);
 
@@ -3179,20 +3754,18 @@ innobase_format_name(
 
 /**********************************************************************//**
 Determines if the currently running transaction has been interrupted.
-@return	TRUE if interrupted */
-UNIV_INTERN
+@return TRUE if interrupted */
 ibool
 trx_is_interrupted(
 /*===============*/
 	const trx_t*	trx)	/*!< in: transaction */
 {
-  return(trx && trx->mysql_thd && thd_kill_level(trx->mysql_thd));
+	return(trx && trx->mysql_thd && thd_kill_level(trx->mysql_thd));
 }
 
 /**********************************************************************//**
 Determines if the currently running transaction is in strict mode.
-@return	TRUE if strict */
-UNIV_INTERN
+@return TRUE if strict */
 ibool
 trx_is_strict(
 /*==========*/
@@ -3202,104 +3775,243 @@ trx_is_strict(
 }
 
 /**************************************************************//**
-Resets some fields of a prebuilt struct. The template is used in fast
+Resets some fields of a m_prebuilt struct. The template is used in fast
 retrieval of just those column values MySQL needs in its processing. */
-inline
 void
 ha_innobase::reset_template(void)
 /*=============================*/
 {
-	ut_ad(prebuilt->magic_n == ROW_PREBUILT_ALLOCATED);
-	ut_ad(prebuilt->magic_n2 == prebuilt->magic_n);
+	ut_ad(m_prebuilt->magic_n == ROW_PREBUILT_ALLOCATED);
+	ut_ad(m_prebuilt->magic_n2 == m_prebuilt->magic_n);
 
 	/* Force table to be freed in close_thread_table(). */
 	DBUG_EXECUTE_IF("free_table_in_fts_query",
-		if (prebuilt->in_fts_query) {
+		if (m_prebuilt->in_fts_query) {
 			table->m_needs_reopen = true;
 		}
 	);
 
-	prebuilt->keep_other_fields_on_keyread = 0;
-	prebuilt->read_just_key = 0;
-	prebuilt->in_fts_query = 0;
+	m_prebuilt->keep_other_fields_on_keyread = 0;
+	m_prebuilt->read_just_key = 0;
+	m_prebuilt->in_fts_query = 0;
+
 	/* Reset index condition pushdown state. */
-	if (prebuilt->idx_cond) {
-		prebuilt->idx_cond = NULL;
-		prebuilt->idx_cond_n_cols = 0;
-		/* Invalidate prebuilt->mysql_template
+	if (m_prebuilt->idx_cond) {
+		m_prebuilt->idx_cond = NULL;
+		m_prebuilt->idx_cond_n_cols = 0;
+		/* Invalidate m_prebuilt->mysql_template
 		in ha_innobase::write_row(). */
-		prebuilt->template_type = ROW_MYSQL_NO_TEMPLATE;
+		m_prebuilt->template_type = ROW_MYSQL_NO_TEMPLATE;
 	}
 }
 
 /*****************************************************************//**
 Call this when you have opened a new table handle in HANDLER, before you
-call index_read_idx() etc. Actually, we can let the cursor stay open even
+call index_read_map() etc. Actually, we can let the cursor stay open even
 over a transaction commit! Then you should call this before every operation,
 fetch next etc. This function inits the necessary things even after a
 transaction commit. */
-UNIV_INTERN
+
 void
 ha_innobase::init_table_handle_for_HANDLER(void)
 /*============================================*/
 {
 	/* If current thd does not yet have a trx struct, create one.
-	If the current handle does not yet have a prebuilt struct, create
-	one. Update the trx pointers in the prebuilt struct. Normally
+	If the current handle does not yet have a m_prebuilt struct, create
+	one. Update the trx pointers in the m_prebuilt struct. Normally
 	this operation is done in external_lock. */
 
 	update_thd(ha_thd());
 
-	/* Initialize the prebuilt struct much like it would be inited in
+	/* Initialize the m_prebuilt struct much like it would be inited in
 	external_lock */
 
-	trx_search_latch_release_if_reserved(prebuilt->trx);
+	trx_search_latch_release_if_reserved(m_prebuilt->trx);
 
-	innobase_srv_conc_force_exit_innodb(prebuilt->trx);
+	innobase_srv_conc_force_exit_innodb(m_prebuilt->trx);
 
 	/* If the transaction is not started yet, start it */
 
-	trx_start_if_not_started_xa(prebuilt->trx);
+	trx_start_if_not_started_xa(m_prebuilt->trx, false);
+
+	TrxInInnoDB	trx_in_innodb(m_prebuilt->trx);
 
 	/* Assign a read view if the transaction does not have it yet */
 
-	trx_assign_read_view(prebuilt->trx);
+	trx_assign_read_view(m_prebuilt->trx);
 
-	innobase_register_trx(ht, user_thd, prebuilt->trx);
+	innobase_register_trx(ht, m_user_thd, m_prebuilt->trx);
 
 	/* We did the necessary inits in this function, no need to repeat them
 	in row_search_for_mysql */
 
-	prebuilt->sql_stat_start = FALSE;
+	m_prebuilt->sql_stat_start = FALSE;
 
 	/* We let HANDLER always to do the reads as consistent reads, even
 	if the trx isolation level would have been specified as SERIALIZABLE */
 
-	prebuilt->select_lock_type = LOCK_NONE;
-	prebuilt->stored_select_lock_type = LOCK_NONE;
+	m_prebuilt->select_lock_type = LOCK_NONE;
+	m_prebuilt->stored_select_lock_type = LOCK_NONE;
 
 	/* Always fetch all columns in the index record */
 
-	prebuilt->hint_need_to_fetch_extra_cols = ROW_RETRIEVE_ALL_COLS;
+	m_prebuilt->hint_need_to_fetch_extra_cols = ROW_RETRIEVE_ALL_COLS;
 
 	/* We want always to fetch all columns in the whole row? Or do
 	we???? */
 
-	prebuilt->used_in_HANDLER = TRUE;
+	m_prebuilt->used_in_HANDLER = TRUE;
+
 	reset_template();
 }
 
+/*********************************************************************//**
+Free tablespace resources allocated. */
+static
+void
+innobase_space_shutdown()
+/*=====================*/
+{
+	DBUG_ENTER("innobase_space_shutdown");
+
+	srv_sys_space.shutdown();
+	if (srv_tmp_space.get_sanity_check_status()) {
+		fil_space_close(srv_tmp_space.name());
+		srv_tmp_space.delete_files();
+	}
+	srv_tmp_space.shutdown();
+
+	DBUG_VOID_RETURN;
+}
+
+/*********************************************************************//**
+Free any resources that were allocated and return failure.
+@return always return 1 */
+static
+int
+innobase_init_abort()
+/*=================*/
+{
+	DBUG_ENTER("innobase_init_abort");
+	innobase_space_shutdown();
+	DBUG_RETURN(1);
+}
+
+
+#ifdef MYSQL_ENCRYPTION
+/* mutex protecting the master_key_id */
+ib_mutex_t	master_key_id_mutex;
+
+/** Rotate the encrypted tablespace keys according to master key
+rotation.
+@return false on success, true on failure */
+bool
+innobase_encryption_key_rotation()
+{
+	byte*	master_key = NULL;
+	bool	ret = FALSE;
+
+	/* Require the mutex to block other rotate request. */
+	mutex_enter(&master_key_id_mutex);
+
+	/* Check if keyring loaded and the currently master key
+	can be fetched. */
+	if (Encryption::master_key_id != 0) {
+		ulint			master_key_id;
+		Encryption::Version	version;
+
+		Encryption::get_master_key(&master_key_id,
+					   &master_key,
+					   &version);
+		if (master_key == NULL) {
+			mutex_exit(&master_key_id_mutex);
+			my_error(ER_CANNOT_FIND_KEY_IN_KEYRING, MYF(0));
+			return(true);
+		}
+		my_free(master_key);
+	}
+
+	master_key = NULL;
+
+	/* Generate the new master key. */
+	Encryption::create_master_key(&master_key);
+
+        if (master_key == NULL) {
+		my_error(ER_CANNOT_FIND_KEY_IN_KEYRING, MYF(0));
+		mutex_exit(&master_key_id_mutex);
+                return(true);
+        }
+
+	ret = !fil_encryption_rotate();
+
+	my_free(master_key);
+
+	/* If rotation failure, return error */
+	if (ret) {
+		my_error(ER_CANNOT_FIND_KEY_IN_KEYRING, MYF(0));
+	}
+
+	/* Release the mutex. */
+	mutex_exit(&master_key_id_mutex);
+
+	return(ret);
+}
+#endif /* MYSQL_ENCRYPTION */
+
+/** Return partitioning flags. */
+static uint innobase_partition_flags()
+{
+	/* JAN: TODO: MYSQL 5.7
+	return(HA_CAN_EXCHANGE_PARTITION | HA_CANNOT_PARTITION_FK);
+	*/
+	return (0);
+}
+
+/** Deprecation message about InnoDB file format related parameters */
+#define DEPRECATED_FORMAT_PARAMETER(x)					\
+	"Using " x " is deprecated and the parameter"			\
+	" may be removed in future releases."				\
+	" See " REFMAN "innodb-file-format.html"
+
+/** Deprecation message about innodb_file_format */
+static const char*	deprecated_file_format
+	= DEPRECATED_FORMAT_PARAMETER("innodb_file_format");
+
+/** Deprecation message about innodb_large_prefix */
+static const char*	deprecated_large_prefix
+	= DEPRECATED_FORMAT_PARAMETER("innodb_large_prefix");
+
+/** Deprecation message about innodb_file_format_check */
+static const char*	deprecated_file_format_check
+	= DEPRECATED_FORMAT_PARAMETER("innodb_file_format_check");
+
+/** Deprecation message about innodb_file_format_max */
+static const char*	deprecated_file_format_max
+	= DEPRECATED_FORMAT_PARAMETER("innodb_file_format_max");
+
+/** Update log_checksum_algorithm_ptr with a pointer to the function
+corresponding to whether checksums are enabled.
+@param[in]	check	whether redo log block checksums are enabled */
+static
+void
+innodb_log_checksums_func_update(bool	check)
+{
+	log_checksum_algorithm_ptr = check
+		? log_block_calc_checksum_crc32
+		: log_block_calc_checksum_none;
+}
+
 /****************************************************************//**
 Gives the file extension of an InnoDB single-table tablespace. */
 static const char* ha_innobase_exts[] = {
-	".ibd",
-	".isl",
-  NullS
+	dot_ext[IBD],
+	dot_ext[ISL],
+	NullS
 };
 
 /*********************************************************************//**
 Opens an InnoDB database.
-@return	0 on success, error code on failure */
+@return 0 on success, 1 on failure */
 static
 int
 innobase_init(
@@ -3308,23 +4020,27 @@ innobase_init(
 {
 	static char	current_dir[3];		/*!< Set if using current lib */
 	int		err;
-	bool		ret;
 	char		*default_path;
 	uint		format_id;
 	ulong		num_pll_degree;
+	ulint		srv_buf_pool_size_org = 0;
+	ulint		fsp_flags =0;
 
 	DBUG_ENTER("innobase_init");
-	handlerton *innobase_hton= (handlerton*) p;
+	handlerton* innobase_hton= (handlerton*) p;
 	innodb_hton_ptr = innobase_hton;
 
 	innobase_hton->state = SHOW_OPTION_YES;
-	innobase_hton->db_type= DB_TYPE_INNODB;
+	innobase_hton->db_type = DB_TYPE_INNODB;
 	innobase_hton->savepoint_offset = sizeof(trx_named_savept_t);
 	innobase_hton->close_connection = innobase_close_connection;
+	innobase_hton->kill_query = innobase_kill_query;
 	innobase_hton->savepoint_set = innobase_savepoint;
 	innobase_hton->savepoint_rollback = innobase_rollback_to_savepoint;
+
 	innobase_hton->savepoint_rollback_can_release_mdl =
 				innobase_rollback_to_savepoint_can_release_mdl;
+
 	innobase_hton->savepoint_release = innobase_release_savepoint;
 	innobase_hton->prepare_ordered= NULL;
 	innobase_hton->commit_ordered= innobase_commit_ordered;
@@ -3335,38 +4051,63 @@ innobase_init(
 	innobase_hton->commit_by_xid = innobase_commit_by_xid;
 	innobase_hton->rollback_by_xid = innobase_rollback_by_xid;
 	innobase_hton->commit_checkpoint_request=innobase_checkpoint_request;
+
+#ifdef INNOBASE_CURSOR_VIEW
 	innobase_hton->create_cursor_read_view = innobase_create_cursor_view;
 	innobase_hton->set_cursor_read_view = innobase_set_cursor_view;
 	innobase_hton->close_cursor_read_view = innobase_close_cursor_view;
+#endif
 	innobase_hton->create = innobase_create_handler;
+
+#ifdef MYSQL_TABLESPACES
+	innobase_hton->alter_tablespace = innobase_alter_tablespace;
+#endif
 	innobase_hton->drop_database = innobase_drop_database;
 	innobase_hton->panic = innobase_end;
+	innobase_hton->partition_flags= innobase_partition_flags;
 
 	innobase_hton->start_consistent_snapshot =
 		innobase_start_trx_and_assign_read_view;
 
 	innobase_hton->flush_logs = innobase_flush_logs;
 	innobase_hton->show_status = innobase_show_status;
+	innobase_hton->fill_is_table = innobase_fill_i_s_table;
 	innobase_hton->flags =
 		HTON_SUPPORTS_EXTENDED_KEYS | HTON_SUPPORTS_FOREIGN_KEYS;
 
 	innobase_hton->release_temporary_latches =
 		innobase_release_temporary_latches;
+
+#ifdef MYSQL_REPLACE_TRX_IN_THD
+        innobase_hton->replace_native_transaction_in_thd =
+                innodb_replace_trx_in_thd;
+#endif
+
 #ifdef WITH_WSREP
         innobase_hton->abort_transaction=wsrep_abort_transaction;
         innobase_hton->set_checkpoint=innobase_wsrep_set_checkpoint;
         innobase_hton->get_checkpoint=innobase_wsrep_get_checkpoint;
         innobase_hton->fake_trx_id=wsrep_fake_trx_id;
 #endif /* WITH_WSREP */
-	innobase_hton->kill_query = innobase_kill_query;
 
-        if (srv_file_per_table)
-          innobase_hton->tablefile_extensions = ha_innobase_exts;
+        if (srv_file_per_table) {
+		innobase_hton->tablefile_extensions = ha_innobase_exts;
+	}
+
+#ifdef MYSQL_INNODB_API_CB
+	/* JAN: TODO: MySQL 5.7 */
+	innobase_hton->data = &innodb_api_cb;
+#endif
 
 	innobase_hton->table_options = innodb_table_option_list;
 
 	innodb_remember_check_sysvar_funcs();
 
+#ifdef MYSQL_ENCRYPTION
+	innobase_hton->rotate_encryption_master_key =
+		innobase_encryption_key_rotation;
+#endif
+
 	ut_a(DATA_MYSQL_TRUE_VARCHAR == (ulint)MYSQL_TYPE_VARCHAR);
 
 #ifndef DBUG_OFF
@@ -3385,8 +4126,7 @@ innobase_init(
 				  test_filename)) {
 
 		sql_print_error("tablename encoding has been changed");
-
-		goto error;
+		DBUG_RETURN(innobase_init_abort());
 	}
 #endif /* DBUG_OFF */
 
@@ -3394,13 +4134,19 @@ innobase_init(
 	if (sizeof(ulint) == 4) {
 		if (innobase_buffer_pool_size > UINT_MAX32) {
 			sql_print_error(
-				"innobase_buffer_pool_size can't be over 4GB"
+				"innodb_buffer_pool_size can't be over 4GB"
 				" on 32-bit systems");
 
-			goto error;
+			DBUG_RETURN(innobase_init_abort());
 		}
 	}
 
+	os_file_set_umask(my_umask);
+
+	/* Setup the memory alloc/free tracing mechanisms before calling
+	any functions that could possibly allocate memory. */
+	ut_new_boot();
+
 	if (UNIV_PAGE_SIZE != UNIV_PAGE_SIZE_DEF) {
 		fprintf(stderr,
 			"InnoDB: Warning: innodb_page_size has been "
@@ -3469,7 +4215,6 @@ innobase_init(
 		goto error;
 	}
 
-	os_innodb_umask = (ulint) my_umask;
 
 	/* First calculate the default path for innodb_data_home_dir etc.,
 	in case the user has not given any value.
@@ -3479,7 +4224,6 @@ innobase_init(
 
 	if (mysqld_embedded) {
 		default_path = mysql_real_data_home;
-		fil_path_to_mysql_datadir = mysql_real_data_home;
 	} else {
 		/* It's better to use current lib, to keep paths short */
 		current_dir[0] = FN_CURLIB;
@@ -3490,40 +4234,94 @@ innobase_init(
 
 	ut_a(default_path);
 
+	fil_path_to_mysql_datadir = default_path;
+	folder_mysql_datadir = fil_path_to_mysql_datadir;
+
 	/* Set InnoDB initialization parameters according to the values
 	read from MySQL .cnf file */
 
-	/*--------------- Data files -------------------------*/
-
 	/* The default dir for data files is the datadir of MySQL */
 
-	srv_data_home = (innobase_data_home_dir ? innobase_data_home_dir :
-			 default_path);
+	srv_data_home = innobase_data_home_dir
+		? innobase_data_home_dir : default_path;
 
-	/* Set default InnoDB data file size to 12 MB and let it be
-	auto-extending. Thus users can use InnoDB in >= 4.0 without having
-	to specify any startup options. */
+	/*--------------- Shared tablespaces -------------------------*/
 
+	/* Check that the value of system variable innodb_page_size was
+	set correctly.  Its value was put into srv_page_size. If valid,
+	return the associated srv_page_size_shift. */
+	srv_page_size_shift = innodb_page_size_validate(srv_page_size);
+	if (!srv_page_size_shift) {
+		sql_print_error("InnoDB: Invalid page size=%lu.\n",
+				srv_page_size);
+		DBUG_RETURN(innobase_init_abort());
+	}
+
+	/* Set default InnoDB temp data file size to 12 MB and let it be
+	auto-extending. */
 	if (!innobase_data_file_path) {
 		innobase_data_file_path = (char*) "ibdata1:12M:autoextend";
 	}
 
-	/* Since InnoDB edits the argument in the next call, we make another
-	copy of it: */
+	/* This is the first time univ_page_size is used.
+	It was initialized to 16k pages before srv_page_size was set */
+	univ_page_size.copy_from(
+		page_size_t(srv_page_size, srv_page_size, false));
 
-	internal_innobase_data_file_path = my_strdup(innobase_data_file_path,
-						   MYF(MY_FAE));
+	srv_sys_space.set_space_id(TRX_SYS_SPACE);
 
-	ret = (bool) srv_parse_data_file_paths_and_sizes(
-		internal_innobase_data_file_path);
-	if (ret == FALSE) {
-		sql_print_error(
-			"InnoDB: syntax error in innodb_data_file_path"
-			" or size specified is less than 1 megabyte");
-mem_free_and_error:
-		srv_free_paths_and_sizes();
-		my_free(internal_innobase_data_file_path);
-		goto error;
+	/* Create the filespace flags. */
+	fsp_flags = fsp_flags_init(
+		univ_page_size, false, false, false, false, false, 0, ATOMIC_WRITES_DEFAULT);
+	srv_sys_space.set_flags(fsp_flags);
+
+	srv_sys_space.set_name(reserved_system_space_name);
+	srv_sys_space.set_path(srv_data_home);
+
+	/* Supports raw devices */
+	if (!srv_sys_space.parse_params(innobase_data_file_path, true)) {
+		DBUG_RETURN(innobase_init_abort());
+	}
+
+	/* Set default InnoDB temp data file size to 12 MB and let it be
+	auto-extending. */
+
+	if (!innobase_temp_data_file_path) {
+		innobase_temp_data_file_path = (char*) "ibtmp1:12M:autoextend";
+	}
+
+	/* We set the temporary tablspace id later, after recovery.
+	The temp tablespace doesn't support raw devices.
+	Set the name and path. */
+	srv_tmp_space.set_name(reserved_temporary_space_name);
+	srv_tmp_space.set_path(srv_data_home);
+
+	/* Create the filespace flags with the temp flag set. */
+	fsp_flags = fsp_flags_init(
+		univ_page_size, false, false, false, true, false, 0, ATOMIC_WRITES_DEFAULT);
+	srv_tmp_space.set_flags(fsp_flags);
+
+	if (!srv_tmp_space.parse_params(innobase_temp_data_file_path, false)) {
+		DBUG_RETURN(innobase_init_abort());
+	}
+
+	/* Perform all sanity check before we take action of deleting files*/
+	if (srv_sys_space.intersection(&srv_tmp_space)) {
+		sql_print_error("%s and %s file names seem to be the same.",
+			srv_tmp_space.name(), srv_sys_space.name());
+		DBUG_RETURN(innobase_init_abort());
+	}
+
+	/* ------------ UNDO tablespaces files ---------------------*/
+	if (!srv_undo_dir) {
+		srv_undo_dir = default_path;
+	}
+
+	os_normalize_path(srv_undo_dir);
+
+	if (strchr(srv_undo_dir, ';')) {
+		sql_print_error("syntax error in innodb_undo_directory");
+		DBUG_RETURN(innobase_init_abort());
 	}
 
 	/* -------------- All log files ---------------------------*/
@@ -3534,43 +4332,24 @@ mem_free_and_error:
 		srv_log_group_home_dir = default_path;
 	}
 
-#ifdef UNIV_LOG_ARCHIVE
-	/* Since innodb_log_arch_dir has no relevance under MySQL,
-	starting from 4.0.6 we always set it the same as
-	innodb_log_group_home_dir: */
-
-	innobase_log_arch_dir = innobase_log_group_home_dir;
-
-	srv_arch_dir = innobase_log_arch_dir;
-#endif /* UNIG_LOG_ARCHIVE */
-
-	srv_normalize_path_for_win(srv_log_group_home_dir);
+	os_normalize_path(srv_log_group_home_dir);
 
 	if (strchr(srv_log_group_home_dir, ';')) {
 		sql_print_error("syntax error in innodb_log_group_home_dir");
-		goto mem_free_and_error;
+		DBUG_RETURN(innobase_init_abort());
 	}
 
-	if (innobase_mirrored_log_groups == 1) {
-		sql_print_warning(
-			"innodb_mirrored_log_groups is an unimplemented "
-			"feature and the variable will be completely "
-			"removed in a future version.");
+	if (!innobase_large_prefix) {
+		ib::warn() << deprecated_large_prefix;
 	}
 
-	if (innobase_mirrored_log_groups > 1) {
-		sql_print_error(
-		"innodb_mirrored_log_groups is an unimplemented feature and "
-		"the variable will be completely removed in a future version. "
-		"Using values other than 1 is not supported.");
-		goto mem_free_and_error;
+	if (!THDVAR(NULL, support_xa)) {
+		ib::warn() << deprecated_innodb_support_xa_off;
+		THDVAR(NULL, support_xa) = TRUE;
 	}
 
-	if (innobase_mirrored_log_groups == 0) {
-		/* To throw a deprecation warning message when the option is
-		passed, the default was changed to '0' (as a workaround). Since
-		the only value accepted for this option is '1', reset it to 1 */
-		innobase_mirrored_log_groups = 1;
+	if (innobase_file_format_name != innodb_file_format_default) {
+		ib::warn() << deprecated_file_format;
 	}
 
 	/* Validate the file format by animal name */
@@ -3583,7 +4362,7 @@ mem_free_and_error:
 
 			sql_print_error("InnoDB: wrong innodb_file_format.");
 
-			goto mem_free_and_error;
+			DBUG_RETURN(innobase_init_abort());
 		}
 	} else {
 		/* Set it to the default file format id. Though this
@@ -3603,6 +4382,7 @@ mem_free_and_error:
 
 	/* Check innobase_file_format_check variable */
 	if (!innobase_file_format_check) {
+		ib::warn() << deprecated_file_format_check;
 
 		/* Set the value to disable checking. */
 		srv_max_file_format_at_startup = UNIV_FORMAT_MAX + 1;
@@ -3613,20 +4393,24 @@ mem_free_and_error:
 		srv_max_file_format_at_startup = UNIV_FORMAT_MIN;
 	}
 
+	if (innobase_file_format_max != innodb_file_format_max_default) {
+		ib::warn() << deprecated_file_format_max;
+	}
+
 	/* Did the user specify a format name that we support?
 	As a side effect it will update the variable
 	srv_max_file_format_at_startup */
 	if (innobase_file_format_validate_and_set(
 			innobase_file_format_max) < 0) {
 
-		sql_print_error("InnoDB: invalid "
-				"innodb_file_format_max value: "
-				"should be any value up to %s or its "
-				"equivalent numeric id",
+		sql_print_error("InnoDB: invalid"
+				" innodb_file_format_max value:"
+				" should be any value up to %s or its"
+				" equivalent numeric id",
 				trx_sys_file_format_id_to_name(
 					UNIV_FORMAT_MAX));
 
-		goto mem_free_and_error;
+		DBUG_RETURN(innobase_init_abort());
 	}
 
 	if (innobase_change_buffering) {
@@ -3643,10 +4427,10 @@ mem_free_and_error:
 			}
 		}
 
-		sql_print_error("InnoDB: invalid value "
-				"innodb_change_buffering=%s",
+		sql_print_error("InnoDB: invalid value"
+				" innodb_change_buffering=%s",
 				innobase_change_buffering);
-		goto mem_free_and_error;
+		DBUG_RETURN(innobase_init_abort());
 	}
 
 innobase_change_buffering_inited_ok:
@@ -3674,8 +4458,8 @@ innobase_change_buffering_inited_ok:
 		} else {
 			/* The user has not set the value. We should
 			set it based on innodb_io_capacity. */
-			srv_max_io_capacity = static_cast<ulong>(
-				ut_max(2 * srv_io_capacity, 2000));
+			srv_max_io_capacity =
+				ut_max(2 * srv_io_capacity, 2000UL);
 		}
 
 	} else if (srv_max_io_capacity < srv_io_capacity) {
@@ -3693,7 +4477,7 @@ innobase_change_buffering_inited_ok:
 				 strlen(srv_buf_dump_filename), FALSE)) {
 		sql_print_error("InnoDB: innodb_buffer_pool_filename"
 			" cannot have colon (:) in the file name.");
-		goto mem_free_and_error;
+		DBUG_RETURN(innobase_init_abort());
 	}
 
 	/* --------------------------------------------------*/
@@ -3702,89 +4486,50 @@ innobase_change_buffering_inited_ok:
 
 	srv_log_file_size = (ib_uint64_t) innobase_log_file_size;
 
-#ifdef UNIV_LOG_ARCHIVE
-	srv_log_archive_on = (ulint) innobase_log_archive;
-#endif /* UNIV_LOG_ARCHIVE */
-
-	/* Check that the value of system variable innodb_page_size was
-	set correctly.  Its value was put into srv_page_size. If valid,
-	return the associated srv_page_size_shift.*/
-	srv_page_size_shift = innodb_page_size_validate(srv_page_size);
-	if (!srv_page_size_shift) {
-		sql_print_error("InnoDB: Invalid page size=%lu.\n",
-				srv_page_size);
-		goto mem_free_and_error;
-	}
 	if (UNIV_PAGE_SIZE_DEF != srv_page_size) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: innodb-page-size has been changed"
-			" from the default value %d to %lu.\n",
-			UNIV_PAGE_SIZE_DEF, srv_page_size);
+		ib::warn() << "innodb-page-size has been changed from the"
+			" default value " << UNIV_PAGE_SIZE_DEF << " to "
+			<< srv_page_size << ".";
+	}
+
+	if (srv_log_write_ahead_size > srv_page_size) {
+		srv_log_write_ahead_size = srv_page_size;
+	} else {
+		ulong	srv_log_write_ahead_size_tmp = OS_FILE_LOG_BLOCK_SIZE;
+
+		while (srv_log_write_ahead_size_tmp
+		       < srv_log_write_ahead_size) {
+			srv_log_write_ahead_size_tmp
+				= srv_log_write_ahead_size_tmp * 2;
+		}
+		if (srv_log_write_ahead_size_tmp
+		    != srv_log_write_ahead_size) {
+			srv_log_write_ahead_size
+				= srv_log_write_ahead_size_tmp / 2;
+		}
 	}
 
 	srv_log_buffer_size = (ulint) innobase_log_buffer_size;
 
-	if (innobase_buffer_pool_instances == 0) {
-		innobase_buffer_pool_instances = 8;
-
-#if defined(__WIN__) && !defined(_WIN64)
-		if (innobase_buffer_pool_size > 1331 * 1024 * 1024) {
-			innobase_buffer_pool_instances
-				= ut_min(MAX_BUFFER_POOLS,
-					(long) (innobase_buffer_pool_size
-					/ (128 * 1024 * 1024)));
-		}
-#endif /* defined(__WIN__) && !defined(_WIN64) */
-	}
 	srv_buf_pool_size = (ulint) innobase_buffer_pool_size;
-	srv_buf_pool_instances = (ulint) innobase_buffer_pool_instances;
 
-	srv_mem_pool_size = (ulint) innobase_additional_mem_pool_size;
-
-	if (innobase_additional_mem_pool_size
-	    != 8*1024*1024L /* the default */ ) {
-
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: Warning: Using "
-			"innodb_additional_mem_pool_size is DEPRECATED. "
-			"This option may be removed in future releases, "
-			"together with the option innodb_use_sys_malloc "
-			"and with the InnoDB's internal memory "
-			"allocator.\n");
-	}
-
-	if (!srv_use_sys_malloc ) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: Warning: Setting "
-			"innodb_use_sys_malloc to FALSE is DEPRECATED. "
-			"This option may be removed in future releases, "
-			"together with the InnoDB's internal memory "
-			"allocator.\n");
-	}
-
-	srv_n_file_io_threads = (ulint) innobase_file_io_threads;
 	srv_n_read_io_threads = (ulint) innobase_read_io_threads;
 	srv_n_write_io_threads = (ulint) innobase_write_io_threads;
 
 	srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite;
 
 	if (!innobase_use_checksums) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: Warning: Setting "
-			"innodb_checksums to OFF is DEPRECATED. "
-			"This option may be removed in future releases. "
-			"You should set innodb_checksum_algorithm=NONE "
-			"instead.\n");
+		ib::warn() << "Setting innodb_checksums to OFF is DEPRECATED."
+			" This option may be removed in future releases. You"
+			" should set innodb_checksum_algorithm=NONE instead.";
 		srv_checksum_algorithm = SRV_CHECKSUM_ALGORITHM_NONE;
 	}
 
-#ifdef HAVE_LARGE_PAGES
-	if ((os_use_large_pages = (ibool) my_use_large_pages)) {
-		os_large_page_size = (ulint) opt_large_page_size;
+	innodb_log_checksums_func_update(innodb_log_checksums);
+
+#ifdef HAVE_LINUX_LARGE_PAGES
+	if ((os_use_large_pages = my_use_large_pages)) {
+		os_large_page_size = opt_large_page_size;
 	}
 #endif
 
@@ -3792,27 +4537,26 @@ innobase_change_buffering_inited_ok:
 
 	srv_locks_unsafe_for_binlog = (ibool) innobase_locks_unsafe_for_binlog;
 	if (innobase_locks_unsafe_for_binlog) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: Warning: Using "
-			"innodb_locks_unsafe_for_binlog is DEPRECATED. "
-			"This option may be removed in future releases. "
-			"Please use READ COMMITTED transaction isolation "
-			"level instead, see " REFMAN "set-transaction.html.\n");
+		ib::warn() << "Using innodb_locks_unsafe_for_binlog is"
+			" DEPRECATED. This option may be removed in future"
+			" releases. Please use READ COMMITTED transaction"
+			" isolation level instead; " << SET_TRANSACTION_MSG;
 	}
 
 	if (innobase_open_files < 10) {
 		innobase_open_files = 300;
-		if (srv_file_per_table && tc_size > 300) {
+		if (srv_file_per_table && tc_size > 300 && tc_size < open_files_limit) {
 			innobase_open_files = tc_size;
 		}
 	}
 
-	if (innobase_open_files > (long) tc_size) {
-		fprintf(stderr,
-                       "innodb_open_files should not be greater"
-                       " than the open_files_limit.\n");
-		innobase_open_files = tc_size;
+	if (innobase_open_files > (long) open_files_limit) {
+		ib::warn() << "innodb_open_files " << innobase_open_files
+			   << " should not be greater"
+			   << "than the open_files_limit " << open_files_limit;
+		if (innobase_open_files > (long) tc_size) {
+			innobase_open_files = tc_size;
+		}
 	}
 
 	srv_max_n_open_files = (ulint) innobase_open_files;
@@ -3835,28 +4579,13 @@ innobase_change_buffering_inited_ok:
 
 	data_mysql_default_charset_coll = (ulint) default_charset_info->number;
 
-	ut_a(DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL ==
-					my_charset_latin1.number);
-	ut_a(DATA_MYSQL_BINARY_CHARSET_COLL == my_charset_bin.number);
-
-	/* Store the latin1_swedish_ci character ordering table to InnoDB. For
-	non-latin1_swedish_ci charsets we use the MySQL comparison functions,
-	and consequently we do not need to know the ordering internally in
-	InnoDB. */
-
-	ut_a(0 == strcmp(my_charset_latin1.name, "latin1_swedish_ci"));
-	srv_latin1_ordering = my_charset_latin1.sort_order;
-
 	innobase_commit_concurrency_init_default();
-
 #ifdef HAVE_POSIX_FALLOCATE
 	srv_use_posix_fallocate = (ibool) innobase_use_fallocate;
 #endif
 	srv_use_atomic_writes = (ibool) innobase_use_atomic_writes;
-
 	if (innobase_use_atomic_writes) {
 		fprintf(stderr, "InnoDB: using atomic writes.\n");
-
 		/* Force doublewrite buffer off, atomic writes replace it. */
 		if (srv_use_doublewrite_buf) {
 			fprintf(stderr, "InnoDB: Switching off doublewrite buffer "
@@ -3866,7 +4595,7 @@ innobase_change_buffering_inited_ok:
 
 		/* Force O_DIRECT on Unixes (on Windows writes are always unbuffered)*/
 #ifndef _WIN32
-		if(!innobase_file_flush_method ||
+		if (!innobase_file_flush_method ||
 			!strstr(innobase_file_flush_method, "O_DIRECT")) {
 			innobase_file_flush_method =
 				srv_file_flush_method_str = (char*)"O_DIRECT";
@@ -3887,7 +4616,7 @@ innobase_change_buffering_inited_ok:
 	int	count;
 
 	count = array_elements(all_pthread_mutexes);
- 	mysql_mutex_register("innodb", all_pthread_mutexes, count);
+	mysql_mutex_register("innodb", all_pthread_mutexes, count);
 
 # ifdef UNIV_PFS_MUTEX
 	count = array_elements(all_innodb_mutexes);
@@ -3913,6 +4642,19 @@ innobase_change_buffering_inited_ok:
 	mysql_cond_register("innodb", all_innodb_conds, count);
 #endif /* HAVE_PSI_INTERFACE */
 
+	/* Set buffer pool size to default for fast startup when mysqld is
+	run with --help --verbose options. */
+	/* JAN: TODO: MySQL 5.7 has opt_verbose
+	if (opt_help && opt_verbose
+	    && srv_buf_pool_size > srv_buf_pool_def_size) {
+		ib::warn() << "Setting innodb_buf_pool_size to "
+			<< srv_buf_pool_def_size << " for fast startup, "
+			<< "when running with --help --verbose options.";
+		srv_buf_pool_size_org = srv_buf_pool_size;
+		srv_buf_pool_size = srv_buf_pool_def_size;
+	}
+	*/
+
 	/* Since we in this module access directly the fields of a trx
 	struct, and due to different headers and flags it might happen that
 	ib_mutex_t has a different size in this module and in InnoDB
@@ -3921,17 +4663,33 @@ innobase_change_buffering_inited_ok:
 
 	err = innobase_start_or_create_for_mysql();
 
-	if (err != DB_SUCCESS) {
-		goto mem_free_and_error;
+	if (srv_buf_pool_size_org != 0) {
+		/* Set the original value back to show in help. */
+		srv_buf_pool_size_org =
+			buf_pool_size_align(srv_buf_pool_size_org);
+		innobase_buffer_pool_size =
+			static_cast<long long>(srv_buf_pool_size_org);
+	} else {
+		innobase_buffer_pool_size =
+			static_cast<long long>(srv_buf_pool_size);
 	}
 
+	if (err != DB_SUCCESS) {
+		DBUG_RETURN(innobase_init_abort());
+	}
+
+#ifdef MYSQL_ENCRYPTION
+	/* Create mutex to protect encryption master_key_id. */
+	mutex_create(LATCH_ID_MASTER_KEY_ID_MUTEX, &master_key_id_mutex);
+#endif
+
 	/* Adjust the innodb_undo_logs config object */
 	innobase_undo_logs_init_default_max();
 
 	innobase_old_blocks_pct = static_cast<uint>(
 		buf_LRU_old_ratio_update(innobase_old_blocks_pct, TRUE));
 
-	ibuf_max_size_update(innobase_change_buffer_max_size);
+	ibuf_max_size_update(srv_change_buffer_max_size);
 
 	innobase_open_tables = hash_create(200);
 	mysql_mutex_init(innobase_share_mutex_key,
@@ -3939,7 +4697,7 @@ innobase_change_buffering_inited_ok:
 			 MY_MUTEX_INIT_FAST);
 	mysql_mutex_init(commit_cond_mutex_key,
 			 &commit_cond_m, MY_MUTEX_INIT_FAST);
-	mysql_cond_init(commit_cond_key, &commit_cond, NULL);
+	mysql_cond_init(commit_cond_key, &commit_cond, 0);
 	mysql_mutex_init(pending_checkpoint_mutex_key,
 			 &pending_checkpoint_mutex,
 			 MY_MUTEX_INIT_FAST);
@@ -3970,14 +4728,35 @@ innobase_change_buffering_inited_ok:
 	/* Turn on monitor counters that are default on */
 	srv_mon_default_on();
 
-	DBUG_RETURN(FALSE);
+
+	/* Unit Tests */
+#ifdef UNIV_ENABLE_UNIT_TEST_GET_PARENT_DIR
+	unit_test_os_file_get_parent_dir();
+#endif /* UNIV_ENABLE_UNIT_TEST_GET_PARENT_DIR */
+
+#ifdef UNIV_ENABLE_UNIT_TEST_MAKE_FILEPATH
+	test_make_filepath();
+#endif /*UNIV_ENABLE_UNIT_TEST_MAKE_FILEPATH */
+
+#ifdef UNIV_ENABLE_DICT_STATS_TEST
+	test_dict_stats_all();
+#endif /*UNIV_ENABLE_DICT_STATS_TEST */
+
+#ifdef UNIV_ENABLE_UNIT_TEST_ROW_RAW_FORMAT_INT
+# ifdef HAVE_UT_CHRONO_T
+	test_row_raw_format_int();
+# endif /* HAVE_UT_CHRONO_T */
+#endif /* UNIV_ENABLE_UNIT_TEST_ROW_RAW_FORMAT_INT */
+
+	DBUG_RETURN(0);
+
 error:
-	DBUG_RETURN(TRUE);
+	DBUG_RETURN(1);
 }
 
 /*******************************************************************//**
 Closes an InnoDB database.
-@return	TRUE if error */
+@return TRUE if error */
 static
 int
 innobase_end(
@@ -4006,11 +4785,16 @@ innobase_end(
 		innodb_inited = 0;
 		hash_table_free(innobase_open_tables);
 		innobase_open_tables = NULL;
+
+#ifdef MYSQL_ENCRYPTION
+		mutex_free(&master_key_id_mutex);
+#endif
 		if (innobase_shutdown_for_mysql() != DB_SUCCESS) {
 			err = 1;
 		}
-		srv_free_paths_and_sizes();
-		my_free(internal_innobase_data_file_path);
+
+		innobase_space_shutdown();
+
 		mysql_mutex_destroy(&innobase_share_mutex);
 		mysql_mutex_destroy(&commit_cond_m);
 		mysql_cond_destroy(&commit_cond);
@@ -4020,31 +4804,8 @@ innobase_end(
 	DBUG_RETURN(err);
 }
 
-/****************************************************************//**
-Flushes InnoDB logs to disk and makes a checkpoint. Really, a commit flushes
-the logs, and the name of this function should be innobase_checkpoint.
-@return	TRUE if error */
-static
-bool
-innobase_flush_logs(
-/*================*/
-	handlerton*	hton)	/*!< in/out: InnoDB handlerton */
-{
-	bool	result = 0;
-
-	DBUG_ENTER("innobase_flush_logs");
-	DBUG_ASSERT(hton == innodb_hton_ptr);
-
-	if (!srv_read_only_mode) {
-		log_buffer_flush_to_disk();
-	}
-
-	DBUG_RETURN(result);
-}
-
 /*****************************************************************//**
 Commits a transaction in an InnoDB database. */
-static
 void
 innobase_commit_low(
 /*================*/
@@ -4071,6 +4832,7 @@ innobase_commit_low(
 
 		trx_commit_for_mysql(trx);
 	}
+	trx->will_lock = 0;
 #ifdef WITH_WSREP
 	if (wsrep_on(thd)) { thd_proc_info(thd, tmp); }
 #endif /* WITH_WSREP */
@@ -4081,36 +4843,31 @@ Creates an InnoDB transaction struct for the thd if it does not yet have one.
 Starts a new InnoDB transaction if a transaction is not yet started. And
 assigns a new snapshot for a consistent read if the transaction does not yet
 have one.
-@return	0 */
+@return 0 */
 static
 int
 innobase_start_trx_and_assign_read_view(
 /*====================================*/
-	handlerton*	hton,	/*!< in: Innodb handlerton */
+	handlerton*	hton,	/*!< in: InnoDB handlerton */
 	THD*		thd)	/*!< in: MySQL thread handle of the user for
 				whom the transaction should be committed */
 {
-	trx_t*	trx;
-
 	DBUG_ENTER("innobase_start_trx_and_assign_read_view");
 	DBUG_ASSERT(hton == innodb_hton_ptr);
 
 	/* Create a new trx struct for thd, if it does not yet have one */
 
-	trx = check_trx_exists(thd);
+	trx_t*	trx = check_trx_exists(thd);
 
-	/* This is just to play safe: release a possible FIFO ticket and
-	search latch. Since we can potentially reserve the trx_sys->mutex,
-	we have to release the search system latch first to obey the latching
-	order. */
-
-	trx_search_latch_release_if_reserved(trx);
+	TrxInInnoDB	trx_in_innodb(trx);
 
 	innobase_srv_conc_force_exit_innodb(trx);
 
-	/* If the transaction is not started yet, start it */
+	/* The transaction should not be active yet, start it */
 
-	trx_start_if_not_started_xa(trx);
+	ut_ad(!trx_is_started(trx));
+
+	trx_start_if_not_started_xa(trx, false);
 
 	/* Assign a read view if the transaction does not have it yet.
 	Do this only if transaction is using REPEATABLE READ isolation
@@ -4123,10 +4880,10 @@ innobase_start_trx_and_assign_read_view(
 	} else {
 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
 				    HA_ERR_UNSUPPORTED,
-				    "InnoDB: WITH CONSISTENT SNAPSHOT "
-				    "was ignored because this phrase "
-				    "can only be used with "
-				    "REPEATABLE READ isolation level.");
+				    "InnoDB: WITH CONSISTENT SNAPSHOT"
+				    " was ignored because this phrase"
+				    " can only be used with"
+				    " REPEATABLE READ isolation level.");
 	}
 
 	/* Set the MySQL flag to mark that there is an active transaction */
@@ -4139,48 +4896,75 @@ innobase_start_trx_and_assign_read_view(
 static
 void
 innobase_commit_ordered_2(
-/*============*/
+/*======================*/
 	trx_t*	trx, 	/*!< in: Innodb transaction */
 	THD*	thd)	/*!< in: MySQL thread handle */
 {
 	DBUG_ENTER("innobase_commit_ordered_2");
 
-	/* We need current binlog position for mysqlbackup to work.
-	Note, the position is current because commit_ordered is guaranteed
-	to be called in same sequenece as writing to binlog. */
+	bool	read_only = trx->read_only || trx->id == 0;
 
-retry:
-	if (innobase_commit_concurrency > 0) {
-		mysql_mutex_lock(&commit_cond_m);
-		commit_threads++;
+	if (!read_only) {
+
+		while (innobase_commit_concurrency > 0) {
+
+			mysql_mutex_lock(&commit_cond_m);
+
+			++commit_threads;
+
+			if (commit_threads
+				<= innobase_commit_concurrency) {
+
+				mysql_mutex_unlock(&commit_cond_m);
+				break;
+			}
+
+			--commit_threads;
+
+			mysql_cond_wait(&commit_cond, &commit_cond_m);
 
-		if (commit_threads > innobase_commit_concurrency) {
-			commit_threads--;
-			mysql_cond_wait(&commit_cond,
-					  &commit_cond_m);
-			mysql_mutex_unlock(&commit_cond_m);
-			goto retry;
-		}
-		else {
 			mysql_mutex_unlock(&commit_cond_m);
 		}
+
+		/* The following call reads the binary log position of
+		the transaction being committed.
+
+		Binary logging of other engines is not relevant to
+		InnoDB as all InnoDB requires is that committing
+		InnoDB transactions appear in the same order in the
+		MySQL binary log as they appear in InnoDB logs, which
+		is guaranteed by the server.
+
+		If the binary log is not enabled, or the transaction
+		is not written to the binary log, the file name will
+		be a NULL pointer. */
+		ulonglong	pos;
+
+		thd_binlog_pos(thd, &trx->mysql_log_file_name, &pos);
+
+		trx->mysql_log_offset = static_cast<int64_t>(pos);
+
+		/* Don't do write + flush right now. For group commit
+		to work we want to do the flush later. */
+		trx->flush_log_later = true;
 	}
 
-        unsigned long long pos;
-        thd_binlog_pos(thd, &trx->mysql_log_file_name, &pos);
-        trx->mysql_log_offset= static_cast<ib_int64_t>(pos);
-	/* Don't do write + flush right now. For group commit
-	   to work we want to do the flush in the innobase_commit()
-	   method, which runs without holding any locks. */
-	trx->flush_log_later = TRUE;
 	innobase_commit_low(trx);
-	trx->flush_log_later = FALSE;
 
-	if (innobase_commit_concurrency > 0) {
-		mysql_mutex_lock(&commit_cond_m);
-		commit_threads--;
-		mysql_cond_signal(&commit_cond);
-		mysql_mutex_unlock(&commit_cond_m);
+	if (!read_only) {
+		trx->flush_log_later = false;
+
+		if (innobase_commit_concurrency > 0) {
+
+			mysql_mutex_lock(&commit_cond_m);
+
+			ut_ad(commit_threads > 0);
+			--commit_threads;
+
+			mysql_cond_signal(&commit_cond);
+
+			mysql_mutex_unlock(&commit_cond_m);
+		}
 	}
 
 	DBUG_VOID_RETURN;
@@ -4201,7 +4985,7 @@ the one handling the rest of the transaction. */
 static
 void
 innobase_commit_ordered(
-/*============*/
+/*====================*/
 	handlerton *hton, /*!< in: Innodb handlerton */
 	THD*	thd,	/*!< in: MySQL thread handle of the user for whom
 			the transaction should be committed */
@@ -4213,6 +4997,7 @@ innobase_commit_ordered(
 	DBUG_ASSERT(hton == innodb_hton_ptr);
 
 	trx = check_trx_exists(thd);
+	TrxInInnoDB	trx_in_innodb(trx);
 
 	/* Since we will reserve the kernel mutex, we must not be holding the
 	search system latch, or we will disobey the latching order. But we
@@ -4233,7 +5018,7 @@ innobase_commit_ordered(
 
 	innobase_commit_ordered_2(trx, thd);
 
-        trx_set_active_commit_ordered(trx);
+	trx_set_active_commit_ordered(trx);
 
 	DBUG_VOID_RETURN;
 }
@@ -4241,12 +5026,13 @@ innobase_commit_ordered(
 /*****************************************************************//**
 Commits a transaction in an InnoDB database or marks an SQL statement
 ended.
-@return	0 */
+@return 0 or deadlock error if the transaction was aborted by another
+	higher priority transaction. */
 static
 int
 innobase_commit(
 /*============*/
-	handlerton*	hton,		/*!< in: Innodb handlerton */
+	handlerton*	hton,		/*!< in: InnoDB handlerton */
 	THD*		thd,		/*!< in: MySQL thread handle of the
 					user for whom the transaction should
 					be committed */
@@ -4254,22 +5040,26 @@ innobase_commit(
 					false - the current SQL statement
 					ended */
 {
-	trx_t*		trx;
-
 	DBUG_ENTER("innobase_commit");
 	DBUG_PRINT("enter", ("commit_trx: %d", commit_trx));
 	DBUG_ASSERT(hton == innodb_hton_ptr);
 	DBUG_PRINT("trans", ("ending transaction"));
 
-	trx = check_trx_exists(thd);
+	trx_t*	trx = check_trx_exists(thd);
 
-	/* Since we will reserve the trx_sys->mutex, we have to release
-	the search system latch first to obey the latching order. */
+	TrxInInnoDB	trx_in_innodb(trx);
 
-	if (trx->has_search_latch && !trx_is_active_commit_ordered(trx)) {
-		trx_search_latch_release_if_reserved(trx);
+	if (trx_in_innodb.is_aborted()) {
+
+		innobase_rollback(hton, thd, commit_trx);
+
+		DBUG_RETURN(convert_error_code_to_mysql(
+			DB_FORCED_ABORT, 0, thd));
 	}
 
+	ut_ad(trx->dict_operation_lock_mode == 0);
+	ut_ad(trx->dict_operation == TRX_DICT_OP_NONE);
+
 	/* Transaction is deregistered only in a commit or a rollback. If
 	it is deregistered we know there cannot be resources to be freed
 	and we could return immediately.  For the time being, we play safe
@@ -4277,16 +5067,23 @@ innobase_commit(
 
 	if (!trx_is_registered_for_2pc(trx) && trx_is_started(trx)) {
 
-		sql_print_error("Transaction not registered for MySQL 2PC, "
-				"but transaction is active");
+		sql_print_error("Transaction not registered for MySQL 2PC,"
+				" but transaction is active");
 	}
 
+	bool	read_only = trx->read_only || trx->id == 0;
+	DBUG_PRINT("info", ("readonly: %d", read_only));
+
 	if (commit_trx
 	    || (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
 
+		DBUG_EXECUTE_IF("crash_innodb_before_commit",
+				DBUG_SUICIDE(););
+
 		/* Run the fast part of commit if we did not already. */
 		if (!trx_is_active_commit_ordered(trx)) {
 			innobase_commit_ordered_2(trx, thd);
+
 		}
 
 		/* We were instructed to commit the whole transaction, or
@@ -4297,10 +5094,14 @@ innobase_commit(
 		this one, to allow then to group commit with us. */
 		thd_wakeup_subsequent_commits(thd, 0);
 
-		/* We did the first part already in innobase_commit_ordered(),
-		Now finish by doing a write + flush of logs. */
+		if (!read_only) {
+			trx->flush_log_later = false;
+		}
+
+		/* Now do a write + flush of logs. */
 		trx_commit_complete_for_mysql(trx);
-                trx_deregister_from_2pc(trx);
+
+		trx_deregister_from_2pc(trx);
 	} else {
 		/* We just mark the SQL statement ended and do not do a
 		transaction commit */
@@ -4308,7 +5109,9 @@ innobase_commit(
 		/* If we had reserved the auto-inc lock for some
 		table in this SQL statement we release it now */
 
-		lock_unlock_table_autoinc(trx);
+		if (!read_only) {
+			lock_unlock_table_autoinc(trx);
+		}
 
 		/* Store the current undo_no of the transaction so that we
 		know where to roll back if we have to roll back the next
@@ -4317,7 +5120,8 @@ innobase_commit(
 		trx_mark_sql_stat_end(trx);
 	}
 
-	trx->n_autoinc_rows = 0; /* Reset the number AUTO-INC rows required */
+	/* Reset the number AUTO-INC rows required */
+	trx->n_autoinc_rows = 0;
 
 	/* This is a statement level variable. */
 	trx->fts_next_doc_id = 0;
@@ -4329,12 +5133,12 @@ innobase_commit(
 
 /*****************************************************************//**
 Rolls back a transaction or the latest SQL statement.
-@return	0 or error number */
+@return 0 or error number */
 static
 int
 innobase_rollback(
 /*==============*/
-	handlerton*	hton,		/*!< in: Innodb handlerton */
+	handlerton*	hton,		/*!< in: InnoDB handlerton */
 	THD*		thd,		/*!< in: handle to the MySQL thread
 					of the user whose transaction should
 					be rolled back */
@@ -4342,49 +5146,67 @@ innobase_rollback(
 					transaction FALSE - rollback the current
 					statement only */
 {
-	dberr_t	error;
-	trx_t*	trx;
-
 	DBUG_ENTER("innobase_rollback");
 	DBUG_ASSERT(hton == innodb_hton_ptr);
 	DBUG_PRINT("trans", ("aborting transaction"));
 
-	trx = check_trx_exists(thd);
+	trx_t*	trx = check_trx_exists(thd);
 
-	/* Release a possible FIFO ticket and search latch. Since we will
-	reserve the trx_sys->mutex, we have to release the search system
-	latch first to obey the latching order. */
+	TrxInInnoDB	trx_in_innodb(trx);
 
-	trx_search_latch_release_if_reserved(trx);
+	ut_ad(trx_in_innodb.is_aborted()
+	      || (trx->dict_operation_lock_mode == 0
+		  && trx->dict_operation == TRX_DICT_OP_NONE));
 
 	innobase_srv_conc_force_exit_innodb(trx);
 
-	trx->n_autoinc_rows = 0; /* Reset the number AUTO-INC rows required */
+	/* Reset the number AUTO-INC rows required */
+
+	trx->n_autoinc_rows = 0;
 
 	/* If we had reserved the auto-inc lock for some table (if
 	we come here to roll back the latest SQL statement) we
 	release it now before a possibly lengthy rollback */
 
-	lock_unlock_table_autoinc(trx);
+	if (!trx_in_innodb.is_aborted()) {
+		lock_unlock_table_autoinc(trx);
+	}
 
 	/* This is a statement level variable. */
+
 	trx->fts_next_doc_id = 0;
 
+	dberr_t		error;
+
 	if (rollback_trx
 	    || !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
 
 		error = trx_rollback_for_mysql(trx);
+
+		if (trx->state == TRX_STATE_FORCED_ROLLBACK) {
+#ifdef UNIV_DEBUG
+			char	buffer[1024];
+
+			ib::info() << "Forced rollback : "
+				<< thd_get_error_context_description(thd,
+						buffer, sizeof(buffer), 512);
+#endif /* UNIV_DEBUG */
+
+			trx->state = TRX_STATE_NOT_STARTED;
+		}
+
 		trx_deregister_from_2pc(trx);
 	} else {
+
 		error = trx_rollback_last_sql_stat_for_mysql(trx);
 	}
 
-	DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL));
+	DBUG_RETURN(convert_error_code_to_mysql(error, 0, trx->mysql_thd));
 }
 
 /*****************************************************************//**
 Rolls back a transaction
-@return	0 or error number */
+@return 0 or error number */
 static
 int
 innobase_rollback_trx(
@@ -4407,14 +5229,17 @@ innobase_rollback_trx(
 	/* If we had reserved the auto-inc lock for some table (if
 	we come here to roll back the latest SQL statement) we
 	release it now before a possibly lengthy rollback */
-
-	lock_unlock_table_autoinc(trx);
-
-	if (!trx->read_only) {
-		error = trx_rollback_for_mysql(trx);
+	if (!TrxInInnoDB::is_aborted(trx)) {
+		lock_unlock_table_autoinc(trx);
 	}
 
-	DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL));
+	if (trx_is_rseg_updated(trx)) {
+		error = trx_rollback_for_mysql(trx);
+	} else {
+		trx->will_lock = 0;
+	}
+
+	DBUG_RETURN(convert_error_code_to_mysql(error, 0, trx->mysql_thd));
 }
 
 
@@ -4493,7 +5318,7 @@ checkpoint when necessary.*/
 UNIV_INTERN
 void
 innobase_mysql_log_notify(
-/*===============*/
+/*======================*/
 	ib_uint64_t	write_lsn,	/*!< in: LSN written to log file */
 	ib_uint64_t	flush_lsn)	/*!< in: LSN flushed to disk */
 {
@@ -4566,35 +5391,31 @@ static
 int
 innobase_rollback_to_savepoint(
 /*===========================*/
-	handlerton*	hton,		/*!< in: Innodb handlerton */
+	handlerton*	hton,		/*!< in: InnoDB handlerton */
 	THD*		thd,		/*!< in: handle to the MySQL thread
 					of the user whose transaction should
 					be rolled back to savepoint */
 	void*		savepoint)	/*!< in: savepoint data */
 {
-	ib_int64_t	mysql_binlog_cache_pos;
-	dberr_t		error;
-	trx_t*		trx;
-	char		name[64];
 
 	DBUG_ENTER("innobase_rollback_to_savepoint");
 	DBUG_ASSERT(hton == innodb_hton_ptr);
 
-	trx = check_trx_exists(thd);
+	trx_t*	trx = check_trx_exists(thd);
 
-	/* Release a possible FIFO ticket and search latch. Since we will
-	reserve the trx_sys->mutex, we have to release the search system
-	latch first to obey the latching order. */
-
-	trx_search_latch_release_if_reserved(trx);
+	TrxInInnoDB	trx_in_innodb(trx);
 
 	innobase_srv_conc_force_exit_innodb(trx);
 
 	/* TODO: use provided savepoint data area to store savepoint data */
 
+	char	name[64];
+
 	longlong2str((ulint) savepoint, name, 36);
 
-	error = trx_rollback_to_savepoint_for_mysql(
+	int64_t	mysql_binlog_cache_pos;
+
+	dberr_t	error = trx_rollback_to_savepoint_for_mysql(
 		trx, name, &mysql_binlog_cache_pos);
 
 	if (error == DB_SUCCESS && trx->fts_trx != NULL) {
@@ -4619,17 +5440,17 @@ innobase_rollback_to_savepoint_can_release_mdl(
 					of the user whose transaction should
 					be rolled back to savepoint */
 {
-	trx_t*		trx;
-
 	DBUG_ENTER("innobase_rollback_to_savepoint_can_release_mdl");
 	DBUG_ASSERT(hton == innodb_hton_ptr);
 
-	trx = check_trx_exists(thd);
-	ut_ad(trx);
+	trx_t*	trx = check_trx_exists(thd);
+
+	TrxInInnoDB	trx_in_innodb(trx);
+
+	/* If transaction has not acquired any locks then it is safe
+	to release MDL after rollback to savepoint */
+	if (UT_LIST_GET_LEN(trx->lock.trx_locks) == 0) {
 
-        /* If transaction has not acquired any locks then it is safe
-	   to release MDL after rollback to savepoint */
-	if (!(UT_LIST_GET_LEN(trx->lock.trx_locks))) {
 		DBUG_RETURN(true);
 	}
 
@@ -4644,7 +5465,7 @@ static
 int
 innobase_release_savepoint(
 /*=======================*/
-	handlerton*	hton,		/*!< in: handlerton for Innodb */
+	handlerton*	hton,		/*!< in: handlerton for InnoDB */
 	THD*		thd,		/*!< in: handle to the MySQL thread
 					of the user whose transaction's
 					savepoint should be released */
@@ -4659,9 +5480,7 @@ innobase_release_savepoint(
 
 	trx = check_trx_exists(thd);
 
-	if (trx->state == TRX_STATE_NOT_STARTED) {
-		trx_start_if_not_started(trx);
-	}
+	TrxInInnoDB	trx_in_innodb(trx);
 
 	/* TODO: use provided savepoint data area to store savepoint data */
 
@@ -4678,18 +5497,15 @@ innobase_release_savepoint(
 
 /*****************************************************************//**
 Sets a transaction savepoint.
-@return	always 0, that is, always succeeds */
+@return always 0, that is, always succeeds */
 static
 int
 innobase_savepoint(
 /*===============*/
-	handlerton*	hton,	/*!< in: handle to the Innodb handlerton */
-	THD*	thd,		/*!< in: handle to the MySQL thread */
-	void*	savepoint)	/*!< in: savepoint data */
+	handlerton*	hton,	/*!< in: handle to the InnoDB handlerton */
+	THD*		thd,	/*!< in: handle to the MySQL thread */
+	void*		savepoint)/*!< in: savepoint data */
 {
-	dberr_t	error;
-	trx_t*	trx;
-
 	DBUG_ENTER("innobase_savepoint");
 	DBUG_ASSERT(hton == innodb_hton_ptr);
 
@@ -4697,13 +5513,9 @@ innobase_savepoint(
 	(unless we are in sub-statement), so SQL layer ensures that
 	this method is never called in such situation.  */
 
-	trx = check_trx_exists(thd);
+	trx_t*	trx = check_trx_exists(thd);
 
-	/* Release a possible FIFO ticket and search latch. Since we will
-	reserve the trx_sys->mutex, we have to release the search system
-	latch first to obey the latching order. */
-
-	trx_search_latch_release_if_reserved(trx);
+	TrxInInnoDB	trx_in_innodb(trx);
 
 	innobase_srv_conc_force_exit_innodb(trx);
 
@@ -4711,10 +5523,11 @@ innobase_savepoint(
 	DBUG_ASSERT(trx_is_registered_for_2pc(trx));
 
 	/* TODO: use provided savepoint data area to store savepoint data */
-	char name[64];
+	char	name[64];
+
 	longlong2str((ulint) savepoint,name,36);
 
-	error = trx_savepoint_for_mysql(trx, name, (ib_int64_t)0);
+	dberr_t	error = trx_savepoint_for_mysql(trx, name, 0);
 
 	if (error == DB_SUCCESS && trx->fts_trx != NULL) {
 		fts_savepoint_take(trx, trx->fts_trx, name);
@@ -4725,7 +5538,7 @@ innobase_savepoint(
 
 /*****************************************************************//**
 Frees a possible InnoDB trx object associated with the current THD.
-@return	0 or error number */
+@return 0 or error number */
 static
 int
 innobase_close_connection(
@@ -4734,55 +5547,91 @@ innobase_close_connection(
 	THD*		thd)	/*!< in: handle to the MySQL thread of the user
 				whose resources should be free'd */
 {
-	trx_t*	trx;
 
 	DBUG_ENTER("innobase_close_connection");
 	DBUG_ASSERT(hton == innodb_hton_ptr);
-	trx = thd_to_trx(thd);
 
-	ut_a(trx);
+	trx_t*	trx = thd_to_trx(thd);
+	bool	free_trx = false;
 
-	if (!trx_is_registered_for_2pc(trx) && trx_is_started(trx)) {
+	/* During server initialization MySQL layer will try to open
+	some of the master-slave tables those residing in InnoDB.
+	After MySQL layer is done with needed checks these tables
+	are closed followed by invocation of close_connection on the
+	associated thd.
 
-		sql_print_error("Transaction not registered for MySQL 2PC, "
+	close_connection rolls back the trx and then frees it.
+	Once trx is freed thd should avoid maintaining reference to
+	it else it can be classified as stale reference.
+
+	Re-invocation of innodb_close_connection on same thd should
+	get trx as NULL. */
+
+	if (trx) {
+
+		TrxInInnoDB	trx_in_innodb(trx);
+
+		if (trx_in_innodb.is_aborted()) {
+
+			while (trx_is_started(trx)) {
+
+				os_thread_sleep(20);
+			}
+		}
+
+		if (!trx_is_registered_for_2pc(trx) && trx_is_started(trx)) {
+
+			sql_print_error("Transaction not registered for MySQL 2PC, "
 				"but transaction is active");
+		}
+
+		/* Disconnect causes rollback in the following cases:
+		- trx is not started, or
+		- trx is in *not* in PREPARED state, or
+		- trx has not updated any persistent data.
+		TODO/FIXME: it does not make sense to initiate rollback
+		in the 1st and 3rd case. */
+		if (trx_is_started(trx)) {
+			if (trx_state_eq(trx, TRX_STATE_PREPARED)) {
+				if (trx_is_redo_rseg_updated(trx)) {
+					trx_disconnect_prepared(trx);
+				} else {
+					trx_rollback_for_mysql(trx);
+					trx_deregister_from_2pc(trx);
+					free_trx = true;
+				}
+			} else {
+			sql_print_warning(
+				"MySQL is closing a connection that has an active "
+				"InnoDB transaction.  " TRX_ID_FMT " row modifications "
+				"will roll back.",
+					" row modifications will roll back.",
+					trx->undo_no);
+				ut_d(ib::warn()
+				     << "trx: " << trx << " started on: "
+				     << innobase_basename(trx->start_file)
+				     << ":" << trx->start_line);
+				innobase_rollback_trx(trx);
+				free_trx = true;
+			}
+		} else {
+			innobase_rollback_trx(trx);
+			free_trx = true;
+		}
 	}
 
-	if (trx_is_started(trx) && global_system_variables.log_warnings) {
-
-		sql_print_warning(
-			"MySQL is closing a connection that has an active "
-			"InnoDB transaction.  " TRX_ID_FMT " row modifications "
-			"will roll back.",
-			trx->undo_no);
+	/* Free trx only after TrxInInnoDB is deleted. */
+	if (free_trx) {
+		trx_free_for_mysql(trx);
 	}
 
-	innobase_rollback_trx(trx);
+	UT_DELETE(thd_to_innodb_session(thd));
 
-	trx_free_for_mysql(trx);
+	thd_to_innodb_session(thd) = NULL;
 
 	DBUG_RETURN(0);
 }
 
-/*****************************************************************//**
-Frees a possible InnoDB trx object associated with the current THD.
-@return	0 or error number */
-UNIV_INTERN
-int
-innobase_close_thd(
-/*===============*/
-	THD*		thd)	/*!< in: handle to the MySQL thread of the user
-				whose resources should be free'd */
-{
-	trx_t*	trx = thd_to_trx(thd);
-
-	if (!trx) {
-		return(0);
-	}
-
-	return(innobase_close_connection(innodb_hton_ptr, thd));
-}
-
 UNIV_INTERN void lock_cancel_waiting_and_release(lock_t* lock);
 
 /*****************************************************************//**
@@ -4790,10 +5639,10 @@ Cancel any pending lock request associated with the current THD. */
 static
 void
 innobase_kill_query(
-/*======================*/
-        handlerton*	hton,	    /*!< in: innobase handlerton */
-	THD*	thd,	            /*!< in: MySQL thread being killed */
-        enum thd_kill_levels level) /*!< in: kill level */
+/*================*/
+	handlerton*	hton,		/*!< in: innobase handlerton */
+	THD*		thd,		/*!< in: MySQL thread being killed */
+	enum thd_kill_levels level)	/*!< in: kill level */
 {
 	trx_t*	trx;
 
@@ -4814,50 +5663,61 @@ innobase_kill_query(
 	}
 	wsrep_thd_UNLOCK(thd);
 #endif /* WITH_WSREP */
+
 	trx = thd_to_trx(thd);
 
-	if (trx && trx->lock.wait_lock) {
-		/* In wsrep BF we have already took lock_sys and trx
-		mutex either on wsrep_abort_transaction() or
-		before wsrep_kill_victim(). In replication we
-		could own lock_sys mutex taken in
-		lock_deadlock_check_and_resolve(). */
+	if (trx != NULL) {
+		/* Cancel a pending lock request if there are any */
+		bool lock_mutex_taken = false;
+		bool trx_mutex_taken = false;
+		bool already_have_lock_mutex = false;
+		bool already_have_trx_mutex = false;
+		dberr_t err = DB_SUCCESS;
 
-		WSREP_DEBUG("Killing victim trx %p BF %d trx BF %d trx_id " TRX_ID_FMT " ABORT %d thd %p"
-			" current_thd %p BF %d wait_lock_modes: %s\n",
-			trx, wsrep_thd_is_BF(trx->mysql_thd, FALSE),
-			wsrep_thd_is_BF(thd, FALSE),
-			trx->id, trx->abort_type,
-			trx->mysql_thd,
-			current_thd,
-			wsrep_thd_is_BF(current_thd, FALSE),
-			lock_get_info(trx->lock.wait_lock).c_str());
+		if (trx->lock.wait_lock) {
+			WSREP_DEBUG("Killing victim trx %p BF %d trx BF %d trx_id " IB_ID_FMT " ABORT %d thd %p"
+				" current_thd %p BF %d",
+				trx, wsrep_thd_is_BF(trx->mysql_thd, FALSE),
+				wsrep_thd_is_BF(thd, FALSE),
+				trx->id, trx->abort_type,
+				trx->mysql_thd,
+				current_thd,
+				wsrep_thd_is_BF(current_thd, FALSE));
+		}
 
 		if (!wsrep_thd_is_BF(trx->mysql_thd, FALSE) &&
-		    trx->abort_type == TRX_SERVER_ABORT) {
+			trx->abort_type == TRX_SERVER_ABORT) {
 			ut_ad(!lock_mutex_own());
 			lock_mutex_enter();
+			lock_mutex_taken = true;
+		} else {
+			already_have_lock_mutex = true;
 		}
 
 		if (trx->abort_type != TRX_WSREP_ABORT) {
+			ut_ad(!trx_mutex_own(trx));
 			trx_mutex_enter(trx);
+			trx_mutex_taken = true;
+		} else {
+			already_have_trx_mutex = true;
 		}
 
-		ut_ad(lock_mutex_own());
-		ut_ad(trx_mutex_own(trx));
+		err = lock_trx_handle_wait(trx,
+			(lock_mutex_taken || already_have_lock_mutex),
+			(trx_mutex_taken || already_have_trx_mutex));
 
-		/* Cancel a pending lock request. */
-		if (trx->lock.wait_lock) {
-			lock_cancel_waiting_and_release(trx->lock.wait_lock);
+		if (lock_mutex_taken) {
+			ut_ad(lock_mutex_own());
+			lock_mutex_exit();
 		}
 
-		if (trx->abort_type != TRX_WSREP_ABORT) {
+		if (trx_mutex_taken) {
+			ut_ad(trx_mutex_own(trx));
 			trx_mutex_exit(trx);
 		}
 
-		if (!wsrep_thd_is_BF(trx->mysql_thd, FALSE) &&
-		    trx->abort_type == TRX_SERVER_ABORT) {
-			lock_mutex_exit();
+		if (err != DB_SUCCESS && err != DB_LOCK_WAIT) {
+			ib::warn() << "Killing connection failed " << ut_strerr(err) << "("<<err<<")";
 		}
 	}
 
@@ -4869,17 +5729,15 @@ innobase_kill_query(
 ** InnoDB database tables
 *****************************************************************************/
 
-/****************************************************************//**
-Get the record format from the data dictionary.
+/** Get the record format from the data dictionary.
 @return one of ROW_TYPE_REDUNDANT, ROW_TYPE_COMPACT,
 ROW_TYPE_COMPRESSED, ROW_TYPE_DYNAMIC */
-UNIV_INTERN
+
 enum row_type
 ha_innobase::get_row_type() const
-/*=============================*/
 {
-	if (prebuilt && prebuilt->table) {
-		const ulint	flags = prebuilt->table->flags;
+	if (m_prebuilt && m_prebuilt->table) {
+		const ulint	flags = m_prebuilt->table->flags;
 
 		switch (dict_tf_get_rec_format(flags)) {
 		case REC_FORMAT_REDUNDANT:
@@ -4896,31 +5754,46 @@ ha_innobase::get_row_type() const
 	return(ROW_TYPE_NOT_USED);
 }
 
-
-
 /****************************************************************//**
 Get the table flags to use for the statement.
-@return	table flags */
-UNIV_INTERN
+@return table flags */
+
 handler::Table_flags
 ha_innobase::table_flags() const
 /*============================*/
 {
-	/* Need to use tx_isolation here since table flags is (also)
-	called before prebuilt is inited. */
-	ulong const tx_isolation = thd_tx_isolation(ha_thd());
+	THD*			thd = ha_thd();
+	handler::Table_flags	flags = m_int_table_flags;
 
-	if (tx_isolation <= ISO_READ_COMMITTED) {
-		return(int_table_flags);
+	/* If querying the table flags when no table_share is given,
+	then we must check if the table to be created/checked is partitioned.
+	*/
+	if (table_share == NULL) {
+	  /* JAN: TODO: MySQL 5.7 Partitioning && thd_get_work_part_info(thd) != NULL) { */
+		/* Currently ha_innopart does not support
+		all InnoDB features such as GEOMETRY, FULLTEXT etc. */
+		/* JAN: TODO: MySQL 5.7
+		flags &= ~(HA_INNOPART_DISABLED_TABLE_FLAGS);
+
+		*/
 	}
 
-	return(int_table_flags | HA_BINLOG_STMT_CAPABLE);
+	/* Need to use tx_isolation here since table flags is (also)
+	called before prebuilt is inited. */
+
+	ulong const	tx_isolation = thd_tx_isolation(thd);
+
+	if (tx_isolation <= ISO_READ_COMMITTED) {
+		return(flags);
+	}
+
+	return(flags | HA_BINLOG_STMT_CAPABLE);
 }
 
 /****************************************************************//**
 Returns the table type (storage engine name).
-@return	table type */
-UNIV_INTERN
+@return table type */
+
 const char*
 ha_innobase::table_type() const
 /*===========================*/
@@ -4931,7 +5804,7 @@ ha_innobase::table_type() const
 /****************************************************************//**
 Returns the index type.
 @return index type */
-UNIV_INTERN
+
 const char*
 ha_innobase::index_type(
 /*====================*/
@@ -4941,6 +5814,8 @@ ha_innobase::index_type(
 
 	if (index && index->type & DICT_FTS) {
 		return("FULLTEXT");
+	} else if (dict_index_is_spatial(index)) {
+		return("SPATIAL");
 	} else {
 		return("BTREE");
 	}
@@ -4948,8 +5823,8 @@ ha_innobase::index_type(
 
 /****************************************************************//**
 Returns the table file name extension.
-@return	file extension string */
-UNIV_INTERN
+@return file extension string */
+
 const char**
 ha_innobase::bas_ext() const
 /*========================*/
@@ -4959,8 +5834,8 @@ ha_innobase::bas_ext() const
 
 /****************************************************************//**
 Returns the operations supported for indexes.
-@return	flags of supported operations */
-UNIV_INTERN
+@return flags of supported operations */
+
 ulong
 ha_innobase::index_flags(
 /*=====================*/
@@ -4968,20 +5843,35 @@ ha_innobase::index_flags(
 	uint,
 	bool) const
 {
-       ulong extra_flag= 0;
-       if (table && key == table->s->primary_key)
-             extra_flag= HA_CLUSTERED_INDEX;
-	return((table_share->key_info[key].algorithm == HA_KEY_ALG_FULLTEXT)
-		 ? 0
-		 : (HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER
-		  | HA_READ_RANGE | HA_KEYREAD_ONLY | extra_flag
-		  | HA_DO_INDEX_COND_PUSHDOWN));
+	if (table_share->key_info[key].algorithm == HA_KEY_ALG_FULLTEXT) {
+		return(0);
+	}
+
+	ulong extra_flag= 0;
+
+	if (table && key == table->s->primary_key) {
+		extra_flag= HA_CLUSTERED_INDEX;
+	}
+
+	ulong flags = HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER
+		      | HA_READ_RANGE | HA_KEYREAD_ONLY
+		      | extra_flag
+		      | HA_DO_INDEX_COND_PUSHDOWN;
+
+	/* For spatial index, we don't support descending scan
+	and ICP so far. */
+	if (table_share->key_info[key].flags & HA_SPATIAL) {
+		flags = HA_READ_NEXT | HA_READ_ORDER| HA_READ_RANGE
+			| HA_KEYREAD_ONLY | HA_KEY_SCAN_NOT_ROR;
+	}
+
+	return(flags);
 }
 
 /****************************************************************//**
 Returns the maximum number of keys.
-@return	MAX_KEY */
-UNIV_INTERN
+@return MAX_KEY */
+
 uint
 ha_innobase::max_supported_keys() const
 /*===================================*/
@@ -4991,8 +5881,8 @@ ha_innobase::max_supported_keys() const
 
 /****************************************************************//**
 Returns the maximum key length.
-@return	maximum supported key length, in bytes */
-UNIV_INTERN
+@return maximum supported key length, in bytes */
+
 uint
 ha_innobase::max_supported_key_length() const
 /*=========================================*/
@@ -5028,8 +5918,8 @@ ha_innobase::max_supported_key_length() const
 
 /****************************************************************//**
 Returns the key map of keys that are usable for scanning.
-@return	key_map_full */
-UNIV_INTERN
+@return key_map_full */
+
 const key_map*
 ha_innobase::keys_to_use_for_scanning()
 /*===================================*/
@@ -5039,8 +5929,8 @@ ha_innobase::keys_to_use_for_scanning()
 
 /****************************************************************//**
 Determines if table caching is supported.
-@return	HA_CACHE_TBL_ASKTRANSACT */
-UNIV_INTERN
+@return HA_CACHE_TBL_ASKTRANSACT */
+
 uint8
 ha_innobase::table_cache_type()
 /*===========================*/
@@ -5050,8 +5940,8 @@ ha_innobase::table_cache_type()
 
 /****************************************************************//**
 Determines if the primary key is clustered index.
-@return	true */
-UNIV_INTERN
+@return true */
+
 bool
 ha_innobase::primary_key_is_clustered()
 /*===================================*/
@@ -5059,19 +5949,22 @@ ha_innobase::primary_key_is_clustered()
 	return(true);
 }
 
-/*****************************************************************//**
-Normalizes a table name string. A normalized name consists of the
-database name catenated to '/' and table name. Example: test/mytable.
-On Windows normalization puts both the database name and the
-table name always to lower case if "set_lower_case" is set to TRUE. */
+/** Normalizes a table name string.
+A normalized name consists of the database name catenated to '/'
+and table name. For example: test/mytable.
+On Windows, normalization puts both the database name and the
+table name always to lower case if "set_lower_case" is set to TRUE.
+@param[out]	norm_name	Normalized name, null-terminated.
+@param[in]	name		Name to normalize.
+@param[in]	set_lower_case	True if we also should fold to lower case. */
 void
-normalize_table_name_low(
-/*=====================*/
-	char*		norm_name,	/*!< out: normalized name as a
+normalize_table_name_c_low(
+/*=======================*/
+	char*           norm_name,      /* out: normalized name as a
 					null-terminated string */
-	const char*	name,		/*!< in: table name string */
-	ibool		set_lower_case)	/*!< in: TRUE if we want to set name
-					to lower case */
+	const char*     name,           /* in: table name string */
+	ibool           set_lower_case) /* in: TRUE if we want to set
+					 name to lower case */
 {
 	char*	name_ptr;
 	ulint	name_len;
@@ -5124,6 +6017,23 @@ normalize_table_name_low(
 	}
 }
 
+/** Normalizes a table name string.
+A normalized name consists of the database name catenated to '/'
+and table name. For example: test/mytable.
+On Windows, normalization puts both the database name and the
+table name always to lower case if "set_lower_case" is set to TRUE.
+@param[out]	norm_name	Normalized name, null-terminated.
+@param[in]	name		Name to normalize.
+@param[in]	set_lower_case	True if we also should fold to lower case. */
+void
+create_table_info_t::normalize_table_name_low(
+	char*		norm_name,
+	const char*	name,
+	ibool		set_lower_case)
+{
+	normalize_table_name_c_low(norm_name, name, set_lower_case);
+}
+
 #if !defined(DBUG_OFF)
 /*********************************************************************
 Test normalize_table_name_low(). */
@@ -5174,11 +6084,12 @@ test_normalize_table_name_low()
 	};
 
 	for (size_t i = 0; i < UT_ARR_SIZE(test_data); i++) {
-		printf("test_normalize_table_name_low(): "
-		       "testing \"%s\", expected \"%s\"... ",
+		printf("test_normalize_table_name_low():"
+		       " testing \"%s\", expected \"%s\"... ",
 		       test_data[i][0], test_data[i][1]);
 
-		normalize_table_name_low(norm_name, test_data[i][0], FALSE);
+		create_table_info_t::normalize_table_name_low(
+			norm_name, test_data[i][0], FALSE);
 
 		if (strcmp(norm_name, test_data[i][1]) == 0) {
 			printf("ok\n");
@@ -5200,30 +6111,27 @@ test_ut_format_name()
 
 	struct {
 		const char*	name;
-		ibool		is_table;
 		ulint		buf_size;
 		const char*	expected;
 	} test_data[] = {
-		{"test/t1",	TRUE,	sizeof(buf),	"\"test\".\"t1\""},
-		{"test/t1",	TRUE,	12,		"\"test\".\"t1\""},
-		{"test/t1",	TRUE,	11,		"\"test\".\"t1"},
-		{"test/t1",	TRUE,	10,		"\"test\".\"t"},
-		{"test/t1",	TRUE,	9,		"\"test\".\""},
-		{"test/t1",	TRUE,	8,		"\"test\"."},
-		{"test/t1",	TRUE,	7,		"\"test\""},
-		{"test/t1",	TRUE,	6,		"\"test"},
-		{"test/t1",	TRUE,	5,		"\"tes"},
-		{"test/t1",	TRUE,	4,		"\"te"},
-		{"test/t1",	TRUE,	3,		"\"t"},
-		{"test/t1",	TRUE,	2,		"\""},
-		{"test/t1",	TRUE,	1,		""},
-		{"test/t1",	TRUE,	0,		"BUF_NOT_CHANGED"},
-		{"table",	TRUE,	sizeof(buf),	"\"table\""},
-		{"ta'le",	TRUE,	sizeof(buf),	"\"ta'le\""},
-		{"ta\"le",	TRUE,	sizeof(buf),	"\"ta\"\"le\""},
-		{"ta`le",	TRUE,	sizeof(buf),	"\"ta`le\""},
-		{"index",	FALSE,	sizeof(buf),	"\"index\""},
-		{"ind/ex",	FALSE,	sizeof(buf),	"\"ind/ex\""},
+		{"test/t1",	sizeof(buf),	"`test`.`t1`"},
+		{"test/t1",	12,		"`test`.`t1`"},
+		{"test/t1",	11,		"`test`.`t1"},
+		{"test/t1",	10,		"`test`.`t"},
+		{"test/t1",	9,		"`test`.`"},
+		{"test/t1",	8,		"`test`."},
+		{"test/t1",	7,		"`test`"},
+		{"test/t1",	6,		"`test"},
+		{"test/t1",	5,		"`tes"},
+		{"test/t1",	4,		"`te"},
+		{"test/t1",	3,		"`t"},
+		{"test/t1",	2,		"`"},
+		{"test/t1",	1,		""},
+		{"test/t1",	0,		"BUF_NOT_CHANGED"},
+		{"table",	sizeof(buf),	"`table`"},
+		{"ta'le",	sizeof(buf),	"`ta'le`"},
+		{"ta\"le",	sizeof(buf),	"`ta\"le`"},
+		{"ta`le",	sizeof(buf),	"`ta``le`"},
 	};
 
 	for (size_t i = 0; i < UT_ARR_SIZE(test_data); i++) {
@@ -5233,35 +6141,509 @@ test_ut_format_name()
 		char*	ret;
 
 		ret = ut_format_name(test_data[i].name,
-				     test_data[i].is_table,
 				     buf,
 				     test_data[i].buf_size);
 
 		ut_a(ret == buf);
 
 		if (strcmp(buf, test_data[i].expected) == 0) {
-			fprintf(stderr,
-				"ut_format_name(%s, %s, buf, %lu), "
-				"expected %s, OK\n",
-				test_data[i].name,
-				test_data[i].is_table ? "TRUE" : "FALSE",
-				test_data[i].buf_size,
-				test_data[i].expected);
+			ib::info() << "ut_format_name(" << test_data[i].name
+				<< ", buf, " << test_data[i].buf_size << "),"
+				" expected " << test_data[i].expected
+				<< ", OK";
 		} else {
-			fprintf(stderr,
-				"ut_format_name(%s, %s, buf, %lu), "
-				"expected %s, ERROR: got %s\n",
-				test_data[i].name,
-				test_data[i].is_table ? "TRUE" : "FALSE",
-				test_data[i].buf_size,
-				test_data[i].expected,
-				buf);
+			ib::error() << "ut_format_name(" << test_data[i].name
+				<< ", buf, " << test_data[i].buf_size << "),"
+				" expected " << test_data[i].expected
+				<< ", ERROR: got " << buf;
 			ut_error;
 		}
 	}
 }
 #endif /* !DBUG_OFF */
 
+/** Match index columns between MySQL and InnoDB.
+This function checks whether the index column information
+is consistent between KEY info from mysql and that from innodb index.
+@param[in]	key_info	Index info from mysql
+@param[in]	index_info	Index info from InnoDB
+@return true if all column types match. */
+bool
+innobase_match_index_columns(
+	const KEY*		key_info,
+	const dict_index_t*	index_info)
+{
+	const KEY_PART_INFO*	key_part;
+	const KEY_PART_INFO*	key_end;
+	const dict_field_t*	innodb_idx_fld;
+	const dict_field_t*	innodb_idx_fld_end;
+
+	DBUG_ENTER("innobase_match_index_columns");
+
+	/* Check whether user defined index column count matches */
+	if (key_info->user_defined_key_parts !=
+		index_info->n_user_defined_cols) {
+		DBUG_RETURN(FALSE);
+	}
+
+	key_part = key_info->key_part;
+	key_end = key_part + key_info->user_defined_key_parts;
+	innodb_idx_fld = index_info->fields;
+	innodb_idx_fld_end = index_info->fields + index_info->n_fields;
+
+	/* Check each index column's datatype. We do not check
+	column name because there exists case that index
+	column name got modified in mysql but such change does not
+	propagate to InnoDB.
+	One hidden assumption here is that the index column sequences
+	are matched up between those in mysql and InnoDB. */
+	for (; key_part != key_end; ++key_part) {
+		ulint	col_type;
+		ibool	is_unsigned;
+		ulint	mtype = innodb_idx_fld->col->mtype;
+
+		/* Need to translate to InnoDB column type before
+		comparison. */
+		col_type = get_innobase_type_from_mysql_type(
+			&is_unsigned, key_part->field);
+
+		/* Ignore InnoDB specific system columns. */
+		while (mtype == DATA_SYS) {
+			innodb_idx_fld++;
+
+			if (innodb_idx_fld >= innodb_idx_fld_end) {
+				DBUG_RETURN(FALSE);
+			}
+
+			mtype = innodb_idx_fld->col->mtype;
+		}
+
+		/* MariaDB-5.5 compatibility */
+		if ((key_part->field->real_type() == MYSQL_TYPE_ENUM ||
+		     key_part->field->real_type() == MYSQL_TYPE_SET) &&
+		    mtype == DATA_FIXBINARY) {
+			col_type= DATA_FIXBINARY;
+		}
+
+		if (col_type != mtype) {
+			/* If the col_type we get from mysql type is a geometry
+			data type, we should check if mtype is a legacy type
+			from 5.6, either upgraded to DATA_GEOMETRY or not.
+			This is indeed not an accurate check, but should be
+			safe, since DATA_BLOB would be upgraded once we create
+			spatial index on it and we intend to use DATA_GEOMETRY
+			for legacy GIS data types which are of var-length. */
+			switch (col_type) {
+			case DATA_POINT:
+			case DATA_VAR_POINT:
+				if (DATA_POINT_MTYPE(mtype)
+				    || mtype == DATA_GEOMETRY
+				    || mtype == DATA_BLOB) {
+					break;
+				}
+				/* Fall through */
+			case DATA_GEOMETRY:
+				if (mtype == DATA_BLOB) {
+					break;
+				}
+				/* Fall through */
+			default:
+				/* Column type mismatches */
+				DBUG_RETURN(false);
+			}
+		}
+
+		innodb_idx_fld++;
+	}
+
+	DBUG_RETURN(TRUE);
+}
+
+#ifdef MYSQL_VIRTUAL_COLUMNS
+/** Build a template for a base column for a virtual column
+@param[in]	table		MySQL TABLE
+@param[in]	clust_index	InnoDB clustered index
+@param[in]	field		field in MySQL table
+@param[in]	col		InnoDB column
+@param[in,out]	templ		template to fill
+@param[in]	col_no		field index for virtual col
+*/
+static
+void
+innobase_vcol_build_templ(
+	const TABLE*		table,
+	dict_index_t*		clust_index,
+	Field*			field,
+	const dict_col_t*	col,
+	mysql_row_templ_t*	templ,
+	ulint			col_no)
+{
+	if (dict_col_is_virtual(col)) {
+		templ->is_virtual = true;
+		templ->col_no = col_no;
+		templ->clust_rec_field_no = ULINT_UNDEFINED;
+		templ->rec_field_no = col->ind;
+	} else {
+		templ->is_virtual = false;
+		templ->col_no = col_no;
+		templ->clust_rec_field_no = dict_col_get_clust_pos(
+						col, clust_index);
+		ut_a(templ->clust_rec_field_no != ULINT_UNDEFINED);
+
+		templ->rec_field_no = templ->clust_rec_field_no;
+	}
+
+	if (field->real_maybe_null()) {
+                templ->mysql_null_byte_offset =
+                        field->null_offset();
+
+                templ->mysql_null_bit_mask = (ulint) field->null_bit;
+        } else {
+                templ->mysql_null_bit_mask = 0;
+        }
+
+        templ->mysql_col_offset = static_cast<ulint>(
+					get_field_offset(table, field));
+	templ->mysql_col_len = static_cast<ulint>(field->pack_length());
+        templ->type = col->mtype;
+        templ->mysql_type = static_cast<ulint>(field->type());
+
+	if (templ->mysql_type == DATA_MYSQL_TRUE_VARCHAR) {
+		templ->mysql_length_bytes = static_cast<ulint>(
+			((Field_varstring*) field)->length_bytes);
+	}
+
+        templ->charset = dtype_get_charset_coll(col->prtype);
+        templ->mbminlen = dict_col_get_mbminlen(col);
+        templ->mbmaxlen = dict_col_get_mbmaxlen(col);
+        templ->is_unsigned = col->prtype & DATA_UNSIGNED;
+}
+
+/** callback used by MySQL server layer to initialize
+the table virtual columns' template
+@param[in]	table		MySQL TABLE
+@param[in,out]	ib_table	InnoDB table */
+void
+innobase_build_v_templ_callback(
+	const TABLE*	table,
+	void*		ib_table)
+{
+	const dict_table_t* t_table = static_cast<dict_table_t*>(ib_table);
+
+	innobase_build_v_templ(table, t_table, t_table->vc_templ, NULL,
+			       true, NULL);
+}
+
+/** Build template for the virtual columns and their base columns. This
+is done when the table first opened.
+@param[in]	table		MySQL TABLE
+@param[in]	ib_table	InnoDB dict_table_t
+@param[in,out]	s_templ		InnoDB template structure
+@param[in]	add_v		new virtual columns added along with
+				add index call
+@param[in]	locked		true if dict_sys mutex is held
+@param[in]	share_tbl_name	original MySQL table name */
+void
+innobase_build_v_templ(
+	const TABLE*		table,
+	const dict_table_t*	ib_table,
+	dict_vcol_templ_t*	s_templ,
+	const dict_add_v_col_t*	add_v,
+	bool			locked,
+	const char*		share_tbl_name)
+{
+	ulint	ncol = ib_table->n_cols - DATA_N_SYS_COLS;
+	ulint	n_v_col = ib_table->n_v_cols;
+	bool	marker[REC_MAX_N_FIELDS];
+
+	ut_ad(ncol < REC_MAX_N_FIELDS);
+
+	if (add_v != NULL) {
+		n_v_col += add_v->n_v_col;
+	}
+
+	ut_ad(n_v_col > 0);
+
+	if (!locked) {
+		mutex_enter(&dict_sys->mutex);
+	}
+
+	if (s_templ->vtempl) {
+		if (!locked) {
+			mutex_exit(&dict_sys->mutex);
+		}
+		return;
+	}
+
+	memset(marker, 0, sizeof(bool) * ncol);
+
+	s_templ->vtempl = static_cast<mysql_row_templ_t**>(
+		ut_zalloc_nokey((ncol + n_v_col)
+				* sizeof *s_templ->vtempl));
+	s_templ->n_col = ncol;
+	s_templ->n_v_col = n_v_col;
+	s_templ->rec_len = table->s->stored_rec_length;
+	// JAN: MySQL 5.6
+	// s_templ->default_rec = table->s->default_values;
+
+	s_templ->default_rec = static_cast<byte*>(
+		ut_malloc_nokey(table->s->stored_rec_length));
+	memcpy(s_templ->default_rec, table->s->default_values,
+	       table->s->stored_rec_length);
+
+
+	/* Mark those columns could be base columns */
+	for (ulint i = 0; i < ib_table->n_v_cols; i++) {
+		const dict_v_col_t*	vcol = dict_table_get_nth_v_col(
+							ib_table, i);
+
+		for (ulint j = 0; j < vcol->num_base; j++) {
+			ulint	col_no = vcol->base_col[j]->ind;
+			marker[col_no] = true;
+		}
+	}
+
+	if (add_v) {
+		for (ulint i = 0; i < add_v->n_v_col; i++) {
+			const dict_v_col_t*	vcol = &add_v->v_col[i];
+
+			for (ulint j = 0; j < vcol->num_base; j++) {
+				ulint	col_no = vcol->base_col[j]->ind;
+				marker[col_no] = true;
+			}
+		}
+	}
+
+	ulint	j = 0;
+	ulint	z = 0;
+
+	dict_index_t*	clust_index = dict_table_get_first_index(ib_table);
+
+	for (ulint i = 0; i < table->s->fields; i++) {
+		Field*  field = table->field[i];
+
+		/* Build template for virtual columns */
+		if (innobase_is_v_fld(field)) {
+#ifdef UNIV_DEBUG
+			const char*	name;
+
+			if (z >= ib_table->n_v_def) {
+				name = add_v->v_col_name[z - ib_table->n_v_def];
+			} else {
+				name = dict_table_get_v_col_name(ib_table, z);
+			}
+
+			ut_ad(!ut_strcmp(name, field->field_name));
+#endif
+			const dict_v_col_t*	vcol;
+
+			if (z >= ib_table->n_v_def) {
+				vcol = &add_v->v_col[z - ib_table->n_v_def];
+			} else {
+				vcol = dict_table_get_nth_v_col(ib_table, z);
+			}
+
+			s_templ->vtempl[z + s_templ->n_col]
+				= static_cast<mysql_row_templ_t*>(
+					ut_malloc_nokey(
+					sizeof *s_templ->vtempl[j]));
+
+			innobase_vcol_build_templ(
+				table, clust_index, field,
+				&vcol->m_col,
+				s_templ->vtempl[z + s_templ->n_col],
+				z);
+			z++;
+			continue;
+                }
+
+		ut_ad(j < ncol);
+
+		/* Build template for base columns */
+		if (marker[j]) {
+			dict_col_t*   col = dict_table_get_nth_col(
+						ib_table, j);
+
+#ifdef UNIV_DEBUG
+			const char*	name = dict_table_get_col_name(
+						ib_table, j);
+
+			ut_ad(!ut_strcmp(name, field->field_name));
+#endif
+
+			s_templ->vtempl[j] = static_cast<
+				mysql_row_templ_t*>(
+					ut_malloc_nokey(
+					sizeof *s_templ->vtempl[j]));
+
+			innobase_vcol_build_templ(
+				table, clust_index, field, col,
+				s_templ->vtempl[j], j);
+		}
+
+		j++;
+	}
+
+	if (!locked) {
+		mutex_exit(&dict_sys->mutex);
+	}
+
+	s_templ->db_name = table->s->db.str;
+	s_templ->tb_name = table->s->table_name.str;
+
+	if (share_tbl_name) {
+		s_templ->share_name = share_tbl_name;
+	}
+}
+#endif /* MYSQL_VIRTUAL_COLUMNS */
+
+/*******************************************************************//**
+This function builds a translation table in INNOBASE_SHARE
+structure for fast index location with mysql array number from its
+table->key_info structure. This also provides the necessary translation
+between the key order in mysql key_info and InnoDB ib_table->indexes if
+they are not fully matched with each other.
+Note we do not have any mutex protecting the translation table
+building based on the assumption that there is no concurrent
+index creation/drop and DMLs that requires index lookup. All table
+handle will be closed before the index creation/drop.
+@return true if index translation table built successfully */
+static
+bool
+innobase_build_index_translation(
+/*=============================*/
+	const TABLE*		table,	/*!< in: table in MySQL data
+					dictionary */
+	dict_table_t*		ib_table,/*!< in: table in InnoDB data
+					dictionary */
+	INNOBASE_SHARE*		share)	/*!< in/out: share structure
+					where index translation table
+					will be constructed in. */
+{
+	DBUG_ENTER("innobase_build_index_translation");
+
+	bool	ret = true;
+
+	mutex_enter(&dict_sys->mutex);
+
+	ulint	mysql_num_index = table->s->keys;
+	ulint	ib_num_index = UT_LIST_GET_LEN(ib_table->indexes);
+	dict_index_t**	index_mapping = share->idx_trans_tbl.index_mapping;
+
+	/* If there exists inconsistency between MySQL and InnoDB dictionary
+	(metadata) information, the number of index defined in MySQL
+	could exceed that in InnoDB, do not build index translation
+	table in such case */
+	if (ib_num_index < mysql_num_index) {
+		ret = false;
+		goto func_exit;
+	}
+
+	/* If index entry count is non-zero, nothing has
+	changed since last update, directly return TRUE */
+	if (share->idx_trans_tbl.index_count) {
+		/* Index entry count should still match mysql_num_index */
+		ut_a(share->idx_trans_tbl.index_count == mysql_num_index);
+		goto func_exit;
+	}
+
+	/* The number of index increased, rebuild the mapping table */
+	if (mysql_num_index > share->idx_trans_tbl.array_size) {
+
+		index_mapping = reinterpret_cast<dict_index_t**>(
+			ut_realloc(index_mapping,
+				   mysql_num_index * sizeof(*index_mapping)));
+
+		if (index_mapping == NULL) {
+			/* Report an error if index_mapping continues to be
+			NULL and mysql_num_index is a non-zero value */
+			sql_print_error("InnoDB: fail to allocate memory for"
+					" index translation table. Number of"
+					" Index:%lu, array size:%lu",
+					mysql_num_index,
+					share->idx_trans_tbl.array_size);
+			ret = false;
+			goto func_exit;
+		}
+
+		share->idx_trans_tbl.array_size = mysql_num_index;
+	}
+
+	/* For each index in the mysql key_info array, fetch its
+	corresponding InnoDB index pointer into index_mapping
+	array. */
+	for (ulint count = 0; count < mysql_num_index; count++) {
+
+		/* Fetch index pointers into index_mapping according to mysql
+		index sequence */
+		index_mapping[count] = dict_table_get_index_on_name(
+			ib_table, table->key_info[count].name);
+
+		if (index_mapping[count] == 0) {
+			sql_print_error("Cannot find index %s in InnoDB"
+					" index dictionary.",
+					table->key_info[count].name);
+			ret = false;
+			goto func_exit;
+		}
+
+		/* Double check fetched index has the same
+		column info as those in mysql key_info. */
+		if (!innobase_match_index_columns(&table->key_info[count],
+					          index_mapping[count])) {
+			sql_print_error("Found index %s whose column info"
+					" does not match that of MySQL.",
+					table->key_info[count].name);
+			ret = false;
+			goto func_exit;
+		}
+	}
+
+	/* Successfully built the translation table */
+	share->idx_trans_tbl.index_count = mysql_num_index;
+
+func_exit:
+	if (!ret) {
+		/* Build translation table failed. */
+		ut_free(index_mapping);
+
+		share->idx_trans_tbl.array_size = 0;
+		share->idx_trans_tbl.index_count = 0;
+		index_mapping = NULL;
+	}
+
+	share->idx_trans_tbl.index_mapping = index_mapping;
+
+	mutex_exit(&dict_sys->mutex);
+
+	DBUG_RETURN(ret);
+}
+
+/*******************************************************************//**
+This function uses index translation table to quickly locate the
+requested index structure.
+Note we do not have mutex protection for the index translatoin table
+access, it is based on the assumption that there is no concurrent
+translation table rebuild (fter create/drop index) and DMLs that
+require index lookup.
+@return dict_index_t structure for requested index. NULL if
+fail to locate the index structure. */
+static
+dict_index_t*
+innobase_index_lookup(
+/*==================*/
+	INNOBASE_SHARE*	share,	/*!< in: share structure for index
+				translation table. */
+	uint		keynr)	/*!< in: index number for the requested
+				index */
+{
+	if (share->idx_trans_tbl.index_mapping == NULL
+	    || keynr >= share->idx_trans_tbl.index_count) {
+		return(NULL);
+	}
+
+	return(share->idx_trans_tbl.index_mapping[keynr]);
+}
 /********************************************************************//**
 Get the upper limit of the MySQL integral and floating-point type.
 @return maximum allowed value for the field */
@@ -5323,238 +6705,10 @@ innobase_get_int_col_max_value(
 
 	return(max_value);
 }
-
-/*******************************************************************//**
-This function checks whether the index column information
-is consistent between KEY info from mysql and that from innodb index.
-@return TRUE if all column types match. */
-static
-ibool
-innobase_match_index_columns(
-/*=========================*/
-	const KEY*		key_info,	/*!< in: Index info
-						from mysql */
-	const dict_index_t*	index_info)	/*!< in: Index info
-						from Innodb */
-{
-	const KEY_PART_INFO*	key_part;
-	const KEY_PART_INFO*	key_end;
-	const dict_field_t*	innodb_idx_fld;
-	const dict_field_t*	innodb_idx_fld_end;
-
-	DBUG_ENTER("innobase_match_index_columns");
-
-	/* Check whether user defined index column count matches */
-	if (key_info->user_defined_key_parts !=
-		index_info->n_user_defined_cols) {
-		DBUG_RETURN(FALSE);
-	}
-
-	key_part = key_info->key_part;
-	key_end = key_part + key_info->user_defined_key_parts;
-	innodb_idx_fld = index_info->fields;
-	innodb_idx_fld_end = index_info->fields + index_info->n_fields;
-
-	/* Check each index column's datatype. We do not check
-	column name because there exists case that index
-	column name got modified in mysql but such change does not
-	propagate to InnoDB.
-	One hidden assumption here is that the index column sequences
-	are matched up between those in mysql and Innodb. */
-	for (; key_part != key_end; ++key_part) {
-		ulint	col_type;
-		ibool	is_unsigned;
-		ulint	mtype = innodb_idx_fld->col->mtype;
-
-		/* Need to translate to InnoDB column type before
-		comparison. */
-		col_type = get_innobase_type_from_mysql_type(&is_unsigned,
-							     key_part->field);
-
-		/* Ignore Innodb specific system columns. */
-		while (mtype == DATA_SYS) {
-			innodb_idx_fld++;
-
-			if (innodb_idx_fld >= innodb_idx_fld_end) {
-				DBUG_RETURN(FALSE);
-			}
-
-			mtype = innodb_idx_fld->col->mtype;
-		}
-
-                // MariaDB-5.5 compatibility
-                if ((key_part->field->real_type() == MYSQL_TYPE_ENUM ||
-                     key_part->field->real_type() == MYSQL_TYPE_SET) &&
-                    mtype == DATA_FIXBINARY)
-                  col_type= DATA_FIXBINARY;
-
-		if (col_type != mtype) {
-			/* Column Type mismatches */
-			DBUG_RETURN(FALSE);
-		}
-
-		innodb_idx_fld++;
-	}
-
-	DBUG_RETURN(TRUE);
-}
-
-/*******************************************************************//**
-This function builds a translation table in INNOBASE_SHARE
-structure for fast index location with mysql array number from its
-table->key_info structure. This also provides the necessary translation
-between the key order in mysql key_info and Innodb ib_table->indexes if
-they are not fully matched with each other.
-Note we do not have any mutex protecting the translation table
-building based on the assumption that there is no concurrent
-index creation/drop and DMLs that requires index lookup. All table
-handle will be closed before the index creation/drop.
-@return TRUE if index translation table built successfully */
-static
-ibool
-innobase_build_index_translation(
-/*=============================*/
-	const TABLE*		table,	/*!< in: table in MySQL data
-					dictionary */
-	dict_table_t*		ib_table,/*!< in: table in Innodb data
-					dictionary */
-	INNOBASE_SHARE*		share)	/*!< in/out: share structure
-					where index translation table
-					will be constructed in. */
-{
-	ulint		mysql_num_index;
-	ulint		ib_num_index;
-	dict_index_t**	index_mapping;
-	ibool		ret = TRUE;
-
-	DBUG_ENTER("innobase_build_index_translation");
-
-	mutex_enter(&dict_sys->mutex);
-
-	mysql_num_index = table->s->keys;
-	ib_num_index = UT_LIST_GET_LEN(ib_table->indexes);
-
-	index_mapping = share->idx_trans_tbl.index_mapping;
-
-	/* If there exists inconsistency between MySQL and InnoDB dictionary
-	(metadata) information, the number of index defined in MySQL
-	could exceed that in InnoDB, do not build index translation
-	table in such case */
-	if (UNIV_UNLIKELY(ib_num_index < mysql_num_index)) {
-		ret = FALSE;
-		goto func_exit;
-	}
-
-	/* If index entry count is non-zero, nothing has
-	changed since last update, directly return TRUE */
-	if (share->idx_trans_tbl.index_count) {
-		/* Index entry count should still match mysql_num_index */
-		ut_a(share->idx_trans_tbl.index_count == mysql_num_index);
-		goto func_exit;
-	}
-
-	/* The number of index increased, rebuild the mapping table */
-	if (mysql_num_index > share->idx_trans_tbl.array_size) {
-		index_mapping = (dict_index_t**) my_realloc(index_mapping,
-							mysql_num_index *
-							sizeof(*index_mapping),
-							MYF(MY_ALLOW_ZERO_PTR));
-
-		if (!index_mapping) {
-			/* Report an error if index_mapping continues to be
-			NULL and mysql_num_index is a non-zero value */
-			sql_print_error("InnoDB: fail to allocate memory for "
-					"index translation table. Number of "
-					"Index:%lu, array size:%lu",
-					mysql_num_index,
-					share->idx_trans_tbl.array_size);
-			ret = FALSE;
-			goto func_exit;
-		}
-
-		share->idx_trans_tbl.array_size = mysql_num_index;
-	}
-
-	/* For each index in the mysql key_info array, fetch its
-	corresponding InnoDB index pointer into index_mapping
-	array. */
-	for (ulint count = 0; count < mysql_num_index; count++) {
-
-		/* Fetch index pointers into index_mapping according to mysql
-		index sequence */
-		index_mapping[count] = dict_table_get_index_on_name(
-			ib_table, table->key_info[count].name);
-
-		if (!index_mapping[count]) {
-			sql_print_error("Cannot find index %s in InnoDB "
-					"index dictionary.",
-					table->key_info[count].name);
-			ret = FALSE;
-			goto func_exit;
-		}
-
-		/* Double check fetched index has the same
-		column info as those in mysql key_info. */
-		if (!innobase_match_index_columns(&table->key_info[count],
-					          index_mapping[count])) {
-			sql_print_error("Found index %s whose column info "
-					"does not match that of MySQL.",
-					table->key_info[count].name);
-			ret = FALSE;
-			goto func_exit;
-		}
-	}
-
-	/* Successfully built the translation table */
-	share->idx_trans_tbl.index_count = mysql_num_index;
-
-func_exit:
-	if (!ret) {
-		/* Build translation table failed. */
-		my_free(index_mapping);
-
-		share->idx_trans_tbl.array_size = 0;
-		share->idx_trans_tbl.index_count = 0;
-		index_mapping = NULL;
-	}
-
-	share->idx_trans_tbl.index_mapping = index_mapping;
-
-	mutex_exit(&dict_sys->mutex);
-
-	DBUG_RETURN(ret);
-}
-
-/*******************************************************************//**
-This function uses index translation table to quickly locate the
-requested index structure.
-Note we do not have mutex protection for the index translatoin table
-access, it is based on the assumption that there is no concurrent
-translation table rebuild (fter create/drop index) and DMLs that
-require index lookup.
-@return dict_index_t structure for requested index. NULL if
-fail to locate the index structure. */
-static
-dict_index_t*
-innobase_index_lookup(
-/*==================*/
-	INNOBASE_SHARE*	share,	/*!< in: share structure for index
-				translation table. */
-	uint		keynr)	/*!< in: index number for the requested
-				index */
-{
-	if (!share->idx_trans_tbl.index_mapping
-	    || keynr >= share->idx_trans_tbl.index_count) {
-		return(NULL);
-	}
-
-	return(share->idx_trans_tbl.index_mapping[keynr]);
-}
-
 /************************************************************************
 Set the autoinc column max value. This should only be called once from
 ha_innobase::open(). Therefore there's no need for a covering lock. */
-UNIV_INTERN
+
 void
 ha_innobase::innobase_initialize_autoinc()
 /*======================================*/
@@ -5564,15 +6718,17 @@ ha_innobase::innobase_initialize_autoinc()
 
 	if (field != NULL) {
 		auto_inc = innobase_get_int_col_max_value(field);
+		ut_ad(!innobase_is_v_fld(field));
+
+		/* autoinc column cannot be virtual column */
+		ut_ad(!innobase_is_v_fld(field));
 	} else {
 		/* We have no idea what's been passed in to us as the
 		autoinc column. We set it to the 0, effectively disabling
 		updates to the table. */
 		auto_inc = 0;
 
-		ut_print_timestamp(stderr);
-		fprintf(stderr, "  InnoDB: Unable to determine the AUTOINC "
-				"column name\n");
+		ib::info() << "Unable to determine the AUTOINC column name";
 	}
 
 	if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
@@ -5599,9 +6755,20 @@ ha_innobase::innobase_initialize_autoinc()
 
 		update_thd(ha_thd());
 
-		ut_a(prebuilt->trx == thd_to_trx(user_thd));
-
 		col_name = field->field_name;
+
+		/* For intrinsic table, name of field has to be prefixed with
+		table name to maintain column-name uniqueness. */
+		if (m_prebuilt->table != NULL
+		    && dict_table_is_intrinsic(m_prebuilt->table)) {
+
+			ulint	col_no = dict_col_get_no(dict_table_get_nth_col(
+				m_prebuilt->table, field->field_index));
+
+			col_name = dict_table_get_col_name(
+				m_prebuilt->table, col_no);
+		}
+
 		index = innobase_get_index(table->s->next_number_index);
 
 		/* Execute SELECT MAX(col_name) FROM TABLE; */
@@ -5611,7 +6778,7 @@ ha_innobase::innobase_initialize_autoinc()
 		case DB_SUCCESS: {
 			ulonglong	col_max_value;
 
-			col_max_value = innobase_get_int_col_max_value(field);
+			col_max_value =  innobase_get_int_col_max_value(field);
 
 			/* At the this stage we do not know the increment
 			nor the offset, so use a default increment of 1. */
@@ -5622,20 +6789,18 @@ ha_innobase::innobase_initialize_autoinc()
 			break;
 		}
 		case DB_RECORD_NOT_FOUND:
-			ut_print_timestamp(stderr);
-			fprintf(stderr, "  InnoDB: MySQL and InnoDB data "
-				"dictionaries are out of sync.\n"
-				"InnoDB: Unable to find the AUTOINC column "
-				"%s in the InnoDB table %s.\n"
-				"InnoDB: We set the next AUTOINC column "
-				"value to 0,\n"
-				"InnoDB: in effect disabling the AUTOINC "
-				"next value generation.\n"
-				"InnoDB: You can either set the next "
-				"AUTOINC value explicitly using ALTER TABLE\n"
-				"InnoDB: or fix the data dictionary by "
-				"recreating the table.\n",
-				col_name, index->table->name);
+			ib::error() << "MySQL and InnoDB data dictionaries are"
+				" out of sync. Unable to find the AUTOINC"
+				" column " << col_name << " in the InnoDB"
+				" table " << index->table->name << ". We set"
+				" the next AUTOINC column value to 0, in"
+				" effect disabling the AUTOINC next value"
+				" generation.";
+
+			ib::info() << "You can either set the next AUTOINC"
+				" value explicitly using ALTER TABLE or fix"
+				" the data dictionary by recreating the"
+				" table.";
 
 			/* This will disable the AUTOINC generation. */
 			auto_inc = 0;
@@ -5652,14 +6817,14 @@ ha_innobase::innobase_initialize_autoinc()
 		}
 	}
 
-	dict_table_autoinc_initialize(prebuilt->table, auto_inc);
+	dict_table_autoinc_initialize(m_prebuilt->table, auto_inc);
 }
 
 /*****************************************************************//**
 Creates and opens a handle to a table which already exists in an InnoDB
 database.
-@return	1 if error, 0 if success */
-UNIV_INTERN
+@return 1 if error, 0 if success */
+
 int
 ha_innobase::open(
 /*==============*/
@@ -5671,8 +6836,6 @@ ha_innobase::open(
 	char			norm_name[FN_REFLEN];
 	THD*			thd;
 	char*			is_part = NULL;
-	ibool			par_case_name_set = FALSE;
-	char			par_case_name[FN_REFLEN];
 	dict_err_ignore_t	ignore_err = DICT_ERR_IGNORE_NONE;
 
 	DBUG_ENTER("ha_innobase::open");
@@ -5683,7 +6846,7 @@ ha_innobase::open(
 	thd = ha_thd();
 
 	/* Under some cases MySQL seems to call this function while
-	holding btr_search_latch. This breaks the latching order as
+	holding search latch(es). This breaks the latching order as
 	we acquire dict_sys->mutex below and leads to a deadlock. */
 	if (thd != NULL) {
 		innobase_release_temporary_latches(ht, thd);
@@ -5691,24 +6854,24 @@ ha_innobase::open(
 
 	normalize_table_name(norm_name, name);
 
-	user_thd = NULL;
+	m_user_thd = NULL;
 
-	if (!(share=get_share(name))) {
+	if (!(m_share = get_share(name))) {
 
 		DBUG_RETURN(1);
 	}
 
 	/* Will be allocated if it is needed in ::update_row() */
-	upd_buf = NULL;
-	upd_buf_size = 0;
+	m_upd_buf = NULL;
+	m_upd_buf_size = 0;
 
 	/* We look for pattern #P# to see if the table is partitioned
 	MySQL table. */
-#ifdef __WIN__
+#ifdef _WIN32
 	is_part = strstr(norm_name, "#p#");
 #else
 	is_part = strstr(norm_name, "#P#");
-#endif /* __WIN__ */
+#endif /* _WIN32 */
 
 	/* Check whether FOREIGN_KEY_CHECKS is set to 0. If so, the table
 	can be opened even if some FK indexes are missing. If not, the table
@@ -5717,23 +6880,33 @@ ha_innobase::open(
 		ignore_err = DICT_ERR_IGNORE_FK_NOKEY;
 	}
 
-	/* Get pointer to a table object in InnoDB dictionary cache */
-	ib_table = dict_table_open_on_name(norm_name, FALSE, TRUE, ignore_err);
+	/* Get pointer to a table object in InnoDB dictionary cache.
+	For intrinsic table, get it from session private data */
+	ib_table = thd_to_innodb_session(thd)->lookup_table_handler(norm_name);
 
-	if (ib_table
+	if (ib_table == NULL) {
+
+		ib_table = open_dict_table(name, norm_name, is_part,
+					   ignore_err);
+	} else {
+		ib_table->acquire();
+		ut_ad(dict_table_is_intrinsic(ib_table));
+	}
+
+	if (ib_table != NULL
 	    && ((!DICT_TF2_FLAG_IS_SET(ib_table, DICT_TF2_FTS_HAS_DOC_ID)
-		 && table->s->stored_fields != dict_table_get_n_user_cols(ib_table))
+		 && table->s->stored_fields != dict_table_get_n_tot_u_cols(ib_table))
 		|| (DICT_TF2_FLAG_IS_SET(ib_table, DICT_TF2_FTS_HAS_DOC_ID)
-		    && (table->s->fields
-			!= dict_table_get_n_user_cols(ib_table) - 1)))) {
-		ib_logf(IB_LOG_LEVEL_WARN,
-			"table %s contains %lu user defined columns "
-			"in InnoDB, but %lu columns in MySQL. Please "
-			"check INFORMATION_SCHEMA.INNODB_SYS_COLUMNS and "
-			REFMAN "innodb-troubleshooting.html "
-			"for how to resolve it",
-			norm_name, (ulong) dict_table_get_n_user_cols(ib_table),
-			(ulong) table->s->fields);
+		    && (table->s->stored_fields
+			!= dict_table_get_n_tot_u_cols(ib_table) - 1)))) {
+
+		ib::warn() << "Table " << norm_name << " contains "
+			<< dict_table_get_n_user_cols(ib_table) << " user"
+			" defined columns in InnoDB, but " << table->s->stored_fields
+			<< " columns in MySQL. Please check"
+			" INFORMATION_SCHEMA.INNODB_SYS_COLUMNS and " REFMAN
+			"innodb-troubleshooting.html for how to resolve the"
+			" issue.";
 
 		/* Mark this table as corrupted, so the drop table
 		or force recovery can still use it, but not others. */
@@ -5743,95 +6916,45 @@ ha_innobase::open(
 		is_part = NULL;
 	}
 
+#ifdef MYSQL_ENCRYPTION
+	/* For encrypted table, check if the encryption info in data
+	file can't be retrieved properly, mark it as corrupted. */
+	if (ib_table != NULL
+	    && dict_table_is_encrypted(ib_table)
+	    && ib_table->ibd_file_missing
+	    && !dict_table_is_discarded(ib_table)) {
+
+		/* Mark this table as corrupted, so the drop table
+		or force recovery can still use it, but not others. */
+
+		dict_table_close(ib_table, FALSE, FALSE);
+		ib_table = NULL;
+		is_part = NULL;
+
+		free_share(m_share);
+		my_error(ER_CANNOT_FIND_KEY_IN_KEYRING, MYF(0));
+
+		DBUG_RETURN(HA_ERR_TABLE_CORRUPT);
+	}
+#endif
+
 	if (NULL == ib_table) {
-		if (is_part) {
-			/* MySQL partition engine hard codes the file name
-			separator as "#P#". The text case is fixed even if
-			lower_case_table_names is set to 1 or 2. This is true
-			for sub-partition names as well. InnoDB always
-			normalises file names to lower case on Windows, this
-			can potentially cause problems when copying/moving
-			tables between platforms.
-
-			1) If boot against an installation from Windows
-			platform, then its partition table name could
-			be in lower case in system tables. So we will
-			need to check lower case name when load table.
-
-			2) If we boot an installation from other case
-			sensitive platform in Windows, we might need to
-			check the existence of table name without lower
-			case in the system table. */
-			if (innobase_get_lower_case_table_names() == 1) {
-
-				if (!par_case_name_set) {
-#ifndef __WIN__
-					/* Check for the table using lower
-					case name, including the partition
-					separator "P" */
-					strcpy(par_case_name, norm_name);
-					innobase_casedn_str(par_case_name);
-#else
-					/* On Windows platfrom, check
-					whether there exists table name in
-					system table whose name is
-					not being normalized to lower case */
-					normalize_table_name_low(
-						par_case_name, name, FALSE);
-#endif
-					par_case_name_set = TRUE;
-				}
-
-				ib_table = dict_table_open_on_name(
-					par_case_name, FALSE, TRUE,
-					ignore_err);
-			}
-
-			if (ib_table) {
-#ifndef __WIN__
-				sql_print_warning("Partition table %s opened "
-						  "after converting to lower "
-						  "case. The table may have "
-						  "been moved from a case "
-						  "in-sensitive file system. "
-						  "Please recreate table in "
-						  "the current file system\n",
-						  norm_name);
-#else
-				sql_print_warning("Partition table %s opened "
-						  "after skipping the step to "
-						  "lower case the table name. "
-						  "The table may have been "
-						  "moved from a case sensitive "
-						  "file system. Please "
-						  "recreate table in the "
-						  "current file system\n",
-						  norm_name);
-#endif
-				goto table_opened;
-			}
-		}
 
 		if (is_part) {
 			sql_print_error("Failed to open table %s.\n",
 					norm_name);
 		}
 
-		ib_logf(IB_LOG_LEVEL_WARN,
-			"Cannot open table %s from the internal data "
-			"dictionary of InnoDB though the .frm file "
-			"for the table exists. See "
-			REFMAN "innodb-troubleshooting.html for how "
-			"you can resolve the problem.", norm_name);
+		ib::warn() << "Cannot open table " << norm_name << " from the"
+			" internal data dictionary of InnoDB though the .frm"
+			" file for the table exists. " << TROUBLESHOOTING_MSG;
 
-		free_share(share);
-		my_errno = ENOENT;
+		free_share(m_share);
+		set_my_errno(ENOENT);
 
 		DBUG_RETURN(HA_ERR_NO_SUCH_TABLE);
 	}
 
-table_opened:
-
 	innobase_copy_frm_flags_from_table_share(ib_table, table->s);
 
 	ib_table->thd = (void*)thd;
@@ -5880,15 +7003,17 @@ table_opened:
 	}
 
 	if (!thd_tablespace_op(thd) && no_tablespace) {
-		free_share(share);
-		my_errno = ENOENT;
+		free_share(m_share);
+		set_my_errno(ENOENT);
 		int ret_err = HA_ERR_NO_SUCH_TABLE;
 
 		/* If table has no talespace but it has crypt data, check
 		is tablespace made unaccessible because encryption service
 		or used key_id is not available. */
 		if (ib_table) {
+			bool warning_pushed = false;
 			fil_space_crypt_t* crypt_data = ib_table->crypt_data;
+
 			if ((crypt_data && crypt_data->encryption == FIL_SPACE_ENCRYPTION_ON) ||
 				(srv_encrypt_tables &&
 					crypt_data && crypt_data->encryption == FIL_SPACE_ENCRYPTION_DEFAULT)) {
@@ -5901,8 +7026,14 @@ table_opened:
 						" Can't continue reading table.",
 						ib_table->name, crypt_data->key_id);
 					ret_err = HA_ERR_DECRYPTION_FAILED;
+					warning_pushed = true;
 				}
-			} else if (ib_table->is_encrypted) {
+			}
+
+			/* If table is marked as encrypted then we push
+			warning if it has not been already done as used
+			key_id might be found but it is incorrect. */
+			if (ib_table->is_encrypted && !warning_pushed) {
 				push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
 					HA_ERR_DECRYPTION_FAILED,
 					"Table %s is encrypted but encryption service or"
@@ -5914,20 +7045,45 @@ table_opened:
 		}
 
 		dict_table_close(ib_table, FALSE, FALSE);
-
 		DBUG_RETURN(ret_err);
 	}
 
-	prebuilt = row_create_prebuilt(ib_table, table->s->stored_rec_length);
+	m_prebuilt = row_create_prebuilt(ib_table, table->s->stored_rec_length);
 
-	prebuilt->default_rec = table->s->default_values;
-	ut_ad(prebuilt->default_rec);
+	m_prebuilt->default_rec = table->s->default_values;
+	ut_ad(m_prebuilt->default_rec);
+
+	m_prebuilt->m_mysql_table = table;
 
 	/* Looks like MySQL-3.23 sometimes has primary key number != 0 */
-	primary_key = table->s->primary_key;
-	key_used_on_scan = primary_key;
+	m_primary_key = table->s->primary_key;
 
-	if (!innobase_build_index_translation(table, ib_table, share)) {
+	key_used_on_scan = m_primary_key;
+
+#ifdef MYSQL_VIRTUAL_COLUMNS
+	if (ib_table->n_v_cols) {
+		mutex_enter(&dict_sys->mutex);
+		if (ib_table->vc_templ == NULL) {
+			ib_table->vc_templ = UT_NEW_NOKEY(dict_vcol_templ_t());
+			ib_table->vc_templ->vtempl = NULL;
+		} else if (ib_table->get_ref_count() == 1) {
+			/* Clean and refresh the template if no one else
+			get hold on it */
+			dict_free_vc_templ(ib_table->vc_templ);
+			ib_table->vc_templ->vtempl = NULL;
+		}
+
+		if (ib_table->vc_templ->vtempl == NULL) {
+			innobase_build_v_templ(
+				table, ib_table, ib_table->vc_templ, NULL,
+				true, m_share->table_name);
+		}
+
+		mutex_exit(&dict_sys->mutex);
+	}
+#endif /* MYSQL_VIRTUAL_COLUMNS */
+
+	if (!innobase_build_index_translation(table, ib_table, m_share)) {
 		  sql_print_error("Build InnoDB index translation table for"
 				  " Table %s failed", name);
 	}
@@ -5940,9 +7096,9 @@ table_opened:
 
 	if (!row_table_got_default_clust_index(ib_table)) {
 
-		prebuilt->clust_index_was_generated = FALSE;
+		m_prebuilt->clust_index_was_generated = FALSE;
 
-		if (UNIV_UNLIKELY(primary_key >= MAX_KEY)) {
+		if (m_primary_key >= MAX_KEY) {
 			ib_table->dict_frm_mismatch = DICT_FRM_NO_PK;
 
 			/* This mismatch could cause further problems
@@ -5952,7 +7108,7 @@ table_opened:
 
 			ib_push_frm_error(thd, ib_table, table, 0, true);
 
-			/* If primary_key >= MAX_KEY, its (primary_key)
+			/* If m_primary_key >= MAX_KEY, its (m_primary_key)
 			value could be out of bound if continue to index
 			into key_info[] array. Find InnoDB primary index,
 			and assign its key_length to ref_length.
@@ -5993,10 +7149,10 @@ table_opened:
 			save space, because all row reference buffers are
 			allocated based on ref_length. */
 
-			ref_length = table->key_info[primary_key].key_length;
+			ref_length = table->key_info[m_primary_key].key_length;
 		}
 	} else {
-		if (primary_key != MAX_KEY) {
+		if (m_primary_key != MAX_KEY) {
 
 			ib_table->dict_frm_mismatch = DICT_NO_PK_FRM_HAS;
 
@@ -6007,7 +7163,7 @@ table_opened:
 			ib_push_frm_error(thd, ib_table, table, 0, true);
 		}
 
-		prebuilt->clust_index_was_generated = TRUE;
+		m_prebuilt->clust_index_was_generated = TRUE;
 
 		ref_length = DATA_ROW_ID_LEN;
 
@@ -6021,9 +7177,9 @@ table_opened:
 
 		if (key_used_on_scan != MAX_KEY) {
 			sql_print_warning(
-				"Table %s key_used_on_scan is %lu even "
-				"though there is no primary key inside "
-				"InnoDB.", name, (ulong) key_used_on_scan);
+				"Table %s key_used_on_scan is %lu even"
+				" though there is no primary key inside"
+				" InnoDB.", name, (ulong) key_used_on_scan);
 		}
 	}
 
@@ -6031,64 +7187,198 @@ table_opened:
 	stats.block_size = UNIV_PAGE_SIZE;
 
 	/* Init table lock structure */
-	thr_lock_data_init(&share->lock,&lock,(void*) 0);
+	thr_lock_data_init(&m_share->lock, &lock, NULL);
 
-	if (prebuilt->table) {
+	if (m_prebuilt->table != NULL) {
 		/* We update the highest file format in the system table
 		space, if this table has higher file format setting. */
 
 		trx_sys_file_format_max_upgrade(
 			(const char**) &innobase_file_format_max,
-			dict_table_get_format(prebuilt->table));
+			dict_table_get_format(m_prebuilt->table));
 	}
 
 	/* Only if the table has an AUTOINC column. */
-	if (prebuilt->table != NULL
-	    && !prebuilt->table->ibd_file_missing
+	if (m_prebuilt->table != NULL
+	    && !m_prebuilt->table->ibd_file_missing
 	    && table->found_next_number_field != NULL) {
-		dict_table_autoinc_lock(prebuilt->table);
+		dict_table_autoinc_lock(m_prebuilt->table);
 
 		/* Since a table can already be "open" in InnoDB's internal
 		data dictionary, we only init the autoinc counter once, the
 		first time the table is loaded. We can safely reuse the
 		autoinc value from a previous MySQL open. */
-		if (dict_table_autoinc_read(prebuilt->table) == 0) {
+		if (dict_table_autoinc_read(m_prebuilt->table) == 0) {
 
 			innobase_initialize_autoinc();
 		}
 
-		dict_table_autoinc_unlock(prebuilt->table);
+		dict_table_autoinc_unlock(m_prebuilt->table);
+	}
+
+	/* Set plugin parser for fulltext index */
+	for (uint i = 0; i < table->s->keys; i++) {
+		if (table->key_info[i].flags & HA_USES_PARSER) {
+			dict_index_t*	index = innobase_get_index(i);
+			plugin_ref	parser = table->key_info[i].parser;
+
+			ut_ad(index->type & DICT_FTS);
+			index->parser =
+				static_cast<st_mysql_ftparser *>(
+					plugin_decl(parser)->info);
+
+			index->is_ngram = strncmp(
+				plugin_name(parser)->str,
+				FTS_NGRAM_PARSER_NAME,
+				plugin_name(parser)->length) == 0;
+
+			DBUG_EXECUTE_IF("fts_instrument_use_default_parser",
+				index->parser = &fts_default_parser;);
+		}
 	}
 
 	info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST);
 
+#ifdef MYSQL_COMPRESSION
+	dberr_t	err = fil_set_compression(m_prebuilt->table,
+					  table->s->compress.str);
+
+	switch (err) {
+	case DB_NOT_FOUND:
+	case DB_UNSUPPORTED:
+		/* We will do another check before the create
+		table and push the error to the client there. */
+		break;
+
+	case DB_IO_NO_PUNCH_HOLE_TABLESPACE:
+		/* We did the check in the 'if' above. */
+
+	case DB_IO_NO_PUNCH_HOLE_FS:
+		/* During open we can't check whether the FS supports
+		punch hole or not, at least on Linux. */
+		break;
+
+	default:
+		ut_error;
+
+	case DB_SUCCESS:
+		break;
+	}
+#endif
+
 	DBUG_RETURN(0);
 }
 
-UNIV_INTERN
+/** Opens dictionary table object using table name. For partition, we need to
+try alternative lower/upper case names to support moving data files across
+platforms.
+@param[in]	table_name	name of the table/partition
+@param[in]	norm_name	normalized name of the table/partition
+@param[in]	is_partition	if this is a partition of a table
+@param[in]	ignore_err	error to ignore for loading dictionary object
+@return dictionary table object or NULL if not found */
+dict_table_t*
+ha_innobase::open_dict_table(
+	const char*		table_name,
+	const char*		norm_name,
+	bool			is_partition,
+	dict_err_ignore_t	ignore_err)
+{
+	DBUG_ENTER("ha_innobase::open_dict_table");
+	dict_table_t*	ib_table = dict_table_open_on_name(norm_name, FALSE,
+							   TRUE, ignore_err);
+
+	if (NULL == ib_table && is_partition) {
+		/* MySQL partition engine hard codes the file name
+		separator as "#P#". The text case is fixed even if
+		lower_case_table_names is set to 1 or 2. This is true
+		for sub-partition names as well. InnoDB always
+		normalises file names to lower case on Windows, this
+		can potentially cause problems when copying/moving
+		tables between platforms.
+
+		1) If boot against an installation from Windows
+		platform, then its partition table name could
+		be in lower case in system tables. So we will
+		need to check lower case name when load table.
+
+		2) If we boot an installation from other case
+		sensitive platform in Windows, we might need to
+		check the existence of table name without lower
+		case in the system table. */
+		if (innobase_get_lower_case_table_names() == 1) {
+			char	par_case_name[FN_REFLEN];
+
+#ifndef _WIN32
+			/* Check for the table using lower
+			case name, including the partition
+			separator "P" */
+			strcpy(par_case_name, norm_name);
+			innobase_casedn_str(par_case_name);
+#else
+			/* On Windows platfrom, check
+			whether there exists table name in
+			system table whose name is
+			not being normalized to lower case */
+			create_table_info_t::
+				normalize_table_name_low(
+					par_case_name,
+					table_name, FALSE);
+#endif
+			ib_table = dict_table_open_on_name(
+				par_case_name, FALSE, TRUE,
+				ignore_err);
+		}
+
+		if (ib_table != NULL) {
+#ifndef _WIN32
+			sql_print_warning("Partition table %s opened"
+					  " after converting to lower"
+					  " case. The table may have"
+					  " been moved from a case"
+					  " in-sensitive file system."
+					  " Please recreate table in"
+					  " the current file system\n",
+					  norm_name);
+#else
+			sql_print_warning("Partition table %s opened"
+					  " after skipping the step to"
+					  " lower case the table name."
+					  " The table may have been"
+					  " moved from a case sensitive"
+					  " file system. Please"
+					  " recreate table in the"
+					  " current file system\n",
+					  norm_name);
+#endif
+		}
+	}
+
+	DBUG_RETURN(ib_table);
+}
+
 handler*
 ha_innobase::clone(
 /*===============*/
 	const char*	name,		/*!< in: table name */
 	MEM_ROOT*	mem_root)	/*!< in: memory context */
 {
-	ha_innobase* new_handler;
-
 	DBUG_ENTER("ha_innobase::clone");
 
-	new_handler = static_cast<ha_innobase*>(handler::clone(name,
-							       mem_root));
-	if (new_handler) {
-		DBUG_ASSERT(new_handler->prebuilt != NULL);
+	ha_innobase*	new_handler = static_cast<ha_innobase*>(
+		handler::clone(name, mem_root));
 
-		new_handler->prebuilt->select_lock_type
-			= prebuilt->select_lock_type;
+	if (new_handler != NULL) {
+		DBUG_ASSERT(new_handler->m_prebuilt != NULL);
+
+		new_handler->m_prebuilt->select_lock_type
+			= m_prebuilt->select_lock_type;
 	}
 
 	DBUG_RETURN(new_handler);
 }
 
-UNIV_INTERN
+
 uint
 ha_innobase::max_supported_key_part_length() const
 /*==============================================*/
@@ -6102,31 +7392,30 @@ ha_innobase::max_supported_key_part_length() const
 
 /******************************************************************//**
 Closes a handle to an InnoDB table.
-@return	0 */
-UNIV_INTERN
+@return 0 */
+
 int
 ha_innobase::close()
 /*================*/
 {
-	THD*	thd;
-
 	DBUG_ENTER("ha_innobase::close");
 
-	thd = ha_thd();
+	THD*	thd = ha_thd();
+
 	if (thd != NULL) {
 		innobase_release_temporary_latches(ht, thd);
 	}
 
-	row_prebuilt_free(prebuilt, FALSE);
+	row_prebuilt_free(m_prebuilt, FALSE);
 
-	if (upd_buf != NULL) {
-		ut_ad(upd_buf_size != 0);
-		my_free(upd_buf);
-		upd_buf = NULL;
-		upd_buf_size = 0;
+	if (m_upd_buf != NULL) {
+		ut_ad(m_upd_buf_size != 0);
+		my_free(m_upd_buf);
+		m_upd_buf = NULL;
+		m_upd_buf_size = 0;
 	}
 
-	free_share(share);
+	free_share(m_share);
 
 	MONITOR_INC(MONITOR_TABLE_CLOSE);
 
@@ -6140,24 +7429,24 @@ ha_innobase::close()
 
 /* The following accessor functions should really be inside MySQL code! */
 
-/**************************************************************//**
-Gets field offset for a field in a table.
-@return	offset */
+/** Gets field offset for a field in a table.
+@param[in]	table	MySQL table object
+@param[in]	field	MySQL field object
+@return offset */
 static inline
 uint
 get_field_offset(
-/*=============*/
-	const TABLE*	table,	/*!< in: MySQL table object */
-	const Field*	field)	/*!< in: MySQL field object */
+	const TABLE*	table,
+	const Field*	field)
 {
-	return((uint) (field->ptr - table->record[0]));
+	return(static_cast<uint>((field->ptr - table->record[0])));
 }
 
 #ifdef WITH_WSREP
 UNIV_INTERN
 int
 wsrep_innobase_mysql_sort(
-/*===============*/
+/*======================*/
 					/* out: str contains sort string */
 	int		mysql_type,	/* in: MySQL type */
 	uint		charset_number,	/* in: number of the charset */
@@ -6214,8 +7503,8 @@ wsrep_innobase_mysql_sort(
 		memcpy(tmp_str, str, str_length);
 
 		tmp_length = charset->coll->strnxfrm(charset, str, str_length,
-                                             str_length, tmp_str,
-                                             tmp_length, 0);
+						     str_length, tmp_str,
+						     tmp_length, 0);
 		DBUG_ASSERT(tmp_length <= str_length);
 		if (wsrep_protocol_version < 3) {
 			tmp_length = charset->coll->strnxfrm(
@@ -6233,7 +7522,7 @@ wsrep_innobase_mysql_sort(
 			DBUG_ASSERT(tmp_length <= buf_length);
 			ret_length = tmp_length;
 		}
- 
+
 		break;
 	}
 	case MYSQL_TYPE_DECIMAL :
@@ -6264,172 +7553,8 @@ wsrep_innobase_mysql_sort(
 }
 #endif /* WITH_WSREP */
 
-/*************************************************************//**
-InnoDB uses this function to compare two data fields for which the data type
-is such that we must use MySQL code to compare them. NOTE that the prototype
-of this function is in rem0cmp.cc in InnoDB source code! If you change this
-function, remember to update the prototype there!
-@return	1, 0, -1, if a is greater, equal, less than b, respectively */
-UNIV_INTERN
-int
-innobase_mysql_cmp(
-/*===============*/
-	int		mysql_type,	/*!< in: MySQL type */
-	uint		charset_number,	/*!< in: number of the charset */
-	const unsigned char* a,		/*!< in: data field */
-	unsigned int	a_length,	/*!< in: data field length,
-					not UNIV_SQL_NULL */
-	const unsigned char* b,		/*!< in: data field */
-	unsigned int	b_length)	/*!< in: data field length,
-					not UNIV_SQL_NULL */
-{
-	CHARSET_INFO*		charset;
-	enum_field_types	mysql_tp;
-	int			ret;
-
-	DBUG_ASSERT(a_length != UNIV_SQL_NULL);
-	DBUG_ASSERT(b_length != UNIV_SQL_NULL);
-
-	mysql_tp = (enum_field_types) mysql_type;
-
-	switch (mysql_tp) {
-
-	case MYSQL_TYPE_BIT:
-	case MYSQL_TYPE_STRING:
-	case MYSQL_TYPE_VAR_STRING:
-	case MYSQL_TYPE_TINY_BLOB:
-	case MYSQL_TYPE_MEDIUM_BLOB:
-	case MYSQL_TYPE_BLOB:
-	case MYSQL_TYPE_LONG_BLOB:
-	case MYSQL_TYPE_VARCHAR:
-		/* Use the charset number to pick the right charset struct for
-		the comparison. Since the MySQL function get_charset may be
-		slow before Bar removes the mutex operation there, we first
-		look at 2 common charsets directly. */
-
-		if (charset_number == default_charset_info->number) {
-			charset = default_charset_info;
-		} else if (charset_number == my_charset_latin1.number) {
-			charset = &my_charset_latin1;
-		} else {
-			charset = get_charset(charset_number, MYF(MY_WME));
-
-			if (charset == NULL) {
-			  sql_print_error("InnoDB needs charset %lu for doing "
-					  "a comparison, but MySQL cannot "
-					  "find that charset.",
-					  (ulong) charset_number);
-				ut_a(0);
-			}
-		}
-
-		/* Starting from 4.1.3, we use strnncollsp() in comparisons of
-		non-latin1_swedish_ci strings. NOTE that the collation order
-		changes then: 'b\0\0...' is ordered BEFORE 'b  ...'. Users
-		having indexes on such data need to rebuild their tables! */
-
-		ret = charset->coll->strnncollsp(
-			charset, a, a_length, b, b_length);
-
-		if (ret < 0) {
-			return(-1);
-		} else if (ret > 0) {
-			return(1);
-		} else {
-			return(0);
-		}
-	default:
-		ut_error;
-	}
-
-	return(0);
-}
-
-
-/*************************************************************//**
-Get the next token from the given string and store it in *token. */
-UNIV_INTERN
-CHARSET_INFO*
-innobase_get_fts_charset(
-/*=====================*/
-	int		mysql_type,	/*!< in: MySQL type */
-	uint		charset_number)	/*!< in: number of the charset */
-{
-	enum_field_types	mysql_tp;
-	CHARSET_INFO*		charset;
-
-	mysql_tp = (enum_field_types) mysql_type;
-
-	switch (mysql_tp) {
-
-	case MYSQL_TYPE_BIT:
-	case MYSQL_TYPE_STRING:
-	case MYSQL_TYPE_VAR_STRING:
-	case MYSQL_TYPE_TINY_BLOB:
-	case MYSQL_TYPE_MEDIUM_BLOB:
-	case MYSQL_TYPE_BLOB:
-	case MYSQL_TYPE_LONG_BLOB:
-	case MYSQL_TYPE_VARCHAR:
-		/* Use the charset number to pick the right charset struct for
-		the comparison. Since the MySQL function get_charset may be
-		slow before Bar removes the mutex operation there, we first
-		look at 2 common charsets directly. */
-
-		if (charset_number == default_charset_info->number) {
-			charset = default_charset_info;
-		} else if (charset_number == my_charset_latin1.number) {
-			charset = &my_charset_latin1;
-		} else {
-			charset = get_charset(charset_number, MYF(MY_WME));
-
-			if (charset == NULL) {
-			  sql_print_error("InnoDB needs charset %lu for doing "
-					  "a comparison, but MySQL cannot "
-					  "find that charset.",
-					  (ulong) charset_number);
-				ut_a(0);
-			}
-		}
-		break;
-	default:
-		ut_error;
-	}
-
-	return(charset);
-}
-
-/*************************************************************//**
-InnoDB uses this function to compare two data fields for which the data type
-is such that we must use MySQL code to compare them. NOTE that the prototype
-of this function is in rem0cmp.c in InnoDB source code! If you change this
-function, remember to update the prototype there!
-@return	1, 0, -1, if a is greater, equal, less than b, respectively */
-UNIV_INTERN
-int
-innobase_mysql_cmp_prefix(
-/*======================*/
-	int		mysql_type,	/*!< in: MySQL type */
-	uint		charset_number,	/*!< in: number of the charset */
-	const unsigned char* a,		/*!< in: data field */
-	unsigned int	a_length,	/*!< in: data field length,
-					not UNIV_SQL_NULL */
-	const unsigned char* b,		/*!< in: data field */
-	unsigned int	b_length)	/*!< in: data field length,
-					not UNIV_SQL_NULL */
-{
-	CHARSET_INFO*		charset;
-	int			result;
-
-	charset = innobase_get_fts_charset(mysql_type, charset_number);
-
-	result = ha_compare_text(charset, (uchar*) a, a_length,
-				 (uchar*) b, b_length, 1);
-
-	return(result);
-}
 /******************************************************************//**
 compare two character string according to their charset. */
-UNIV_INTERN
 int
 innobase_fts_text_cmp(
 /*==================*/
@@ -6445,9 +7570,9 @@ innobase_fts_text_cmp(
 		charset, s1->f_str, static_cast<uint>(s1->f_len),
 		s2->f_str, static_cast<uint>(s2->f_len), 0));
 }
+
 /******************************************************************//**
 compare two character string case insensitively according to their charset. */
-UNIV_INTERN
 int
 innobase_fts_text_case_cmp(
 /*=======================*/
@@ -6468,9 +7593,9 @@ innobase_fts_text_case_cmp(
 		charset, s1->f_str, static_cast<uint>(s1->f_len),
 		s2->f_str, static_cast<uint>(newlen), 0));
 }
+
 /******************************************************************//**
 Get the first character's code position for FTS index partition. */
-UNIV_INTERN
 ulint
 innobase_strnxfrm(
 /*==============*/
@@ -6499,7 +7624,6 @@ innobase_strnxfrm(
 
 /******************************************************************//**
 compare two character string according to their charset. */
-UNIV_INTERN
 int
 innobase_fts_text_cmp_prefix(
 /*=========================*/
@@ -6523,7 +7647,6 @@ innobase_fts_text_cmp_prefix(
 
 /******************************************************************//**
 Makes all characters in a string lower case. */
-UNIV_INTERN
 size_t
 innobase_fts_casedn_str(
 /*====================*/
@@ -6552,7 +7675,6 @@ innobase_fts_casedn_str(
 Get the next token from the given string and store it in *token.
 It is mostly copied from MyISAM's doc parsing function ft_simple_get_word()
 @return length of string processed */
-UNIV_INTERN
 ulint
 innobase_mysql_fts_get_token(
 /*=========================*/
@@ -6560,10 +7682,7 @@ innobase_mysql_fts_get_token(
 	const byte*	start,		/*!< in: start of text */
 	const byte*	end,		/*!< in: one character past end of
 					text */
-	fts_string_t*	token,		/*!< out: token's text */
-	ulint*		offset)		/*!< out: offset to token,
-					measured as characters from
-					'start' */
+	fts_string_t*	token)		/*!< out: token's text */
 {
 	int		mbl;
 	const uchar*	doc = start;
@@ -6621,21 +7740,17 @@ innobase_mysql_fts_get_token(
 	return(doc - start);
 }
 
-/**************************************************************//**
-Converts a MySQL type to an InnoDB type. Note that this function returns
+/** Converts a MySQL type to an InnoDB type. Note that this function returns
 the 'mtype' of InnoDB. InnoDB differentiates between MySQL's old <= 4.1
 VARCHAR and the new true VARCHAR in >= 5.0.3 by the 'prtype'.
-@return	DATA_BINARY, DATA_VARCHAR, ... */
-UNIV_INTERN
+@param[out]	unsigned_flag	DATA_UNSIGNED if an 'unsigned type'; at least
+ENUM and SET, and unsigned integer types are 'unsigned types'
+@param[in]	f		MySQL Field
+@return DATA_BINARY, DATA_VARCHAR, ... */
 ulint
 get_innobase_type_from_mysql_type(
-/*==============================*/
-	ulint*		unsigned_flag,	/*!< out: DATA_UNSIGNED if an
-					'unsigned type';
-					at least ENUM and SET,
-					and unsigned integer
-					types are 'unsigned types' */
-	const void*	f)		/*!< in: MySQL Field */
+	ulint*			unsigned_flag,
+	const void*		f)
 {
 	const class Field* field = reinterpret_cast<const class Field*>(f);
 
@@ -6684,7 +7799,8 @@ get_innobase_type_from_mysql_type(
 			return(DATA_VARMYSQL);
 		}
 	case MYSQL_TYPE_BIT:
-	case MYSQL_TYPE_STRING: if (field->binary()) {
+	case MYSQL_TYPE_STRING:
+		if (field->binary()) {
 
 			return(DATA_FIXBINARY);
 		} else if (strcmp(field->charset()->name,
@@ -6704,13 +7820,14 @@ get_innobase_type_from_mysql_type(
 	case MYSQL_TYPE_YEAR:
 	case MYSQL_TYPE_NEWDATE:
 		return(DATA_INT);
-	case MYSQL_TYPE_TIMESTAMP:
 	case MYSQL_TYPE_TIME:
 	case MYSQL_TYPE_DATETIME:
-		if (field->key_type() == HA_KEYTYPE_BINARY)
+	case MYSQL_TYPE_TIMESTAMP:
+		if (field->key_type() == HA_KEYTYPE_BINARY) {
 			return(DATA_FIXBINARY);
-                else
+		} else {
 			return(DATA_INT);
+		}
 	case MYSQL_TYPE_FLOAT:
 		return(DATA_FLOAT);
 	case MYSQL_TYPE_DOUBLE:
@@ -6718,6 +7835,10 @@ get_innobase_type_from_mysql_type(
 	case MYSQL_TYPE_DECIMAL:
 		return(DATA_DECIMAL);
 	case MYSQL_TYPE_GEOMETRY:
+		return (DATA_BLOB);
+		/* TODO: MySQL 5.7: Geometry
+		return(DATA_GEOMETRY);
+		*/
 	case MYSQL_TYPE_TINY_BLOB:
 	case MYSQL_TYPE_MEDIUM_BLOB:
 	case MYSQL_TYPE_BLOB:
@@ -6735,26 +7856,10 @@ get_innobase_type_from_mysql_type(
 	return(0);
 }
 
-/*******************************************************************//**
-Writes an unsigned integer value < 64k to 2 bytes, in the little-endian
-storage format. */
-static inline
-void
-innobase_write_to_2_little_endian(
-/*==============================*/
-	byte*	buf,	/*!< in: where to store */
-	ulint	val)	/*!< in: value to write, must be < 64k */
-{
-	ut_a(val < 256 * 256);
-
-	buf[0] = (byte)(val & 0xFF);
-	buf[1] = (byte)(val / 256);
-}
-
 /*******************************************************************//**
 Reads an unsigned integer value < 64k from 2 bytes, in the little-endian
 storage format.
-@return	value */
+@return value */
 static inline
 uint
 innobase_read_from_2_little_endian(
@@ -6771,7 +7876,7 @@ Stores a key value for a row to a buffer.
 UNIV_INTERN
 uint
 wsrep_store_key_val_for_row(
-/*===============================*/
+/*=========================*/
 	THD* 		thd,
 	TABLE*		table,
 	uint		keynr,	/*!< in: key number */
@@ -6801,7 +7906,7 @@ wsrep_store_key_val_for_row(
 
 		if (key_part->null_bit) {
 			if (buff_space > 0) {
-				if (record[key_part->null_offset] 
+				if (record[key_part->null_offset]
 				    & key_part->null_bit) {
 					*buff = 1;
 					part_is_null = TRUE;
@@ -6863,8 +7968,8 @@ wsrep_store_key_val_for_row(
 				true_len = (ulint) cs->cset->well_formed_len(cs,
 						(const char *) data,
 						(const char *) data + len,
-                                                (uint) (key_len /
-                                                        cs->mbmaxlen),
+						(uint) (key_len /
+						cs->mbmaxlen),
 						&error);
 			}
 
@@ -6877,15 +7982,14 @@ wsrep_store_key_val_for_row(
 
 			memcpy(sorted, data, true_len);
 			true_len = wsrep_innobase_mysql_sort(
-				mysql_type, cs->number, sorted, true_len, 
+				mysql_type, cs->number, sorted, true_len,
 				REC_VERSION_56_MAX_INDEX_COL_LEN);
-
 			if (wsrep_protocol_version > 1) {
-			/* Note that we always reserve the maximum possible
-			   length of the true VARCHAR in the key value, though
-			   only len first bytes after the 2 length bytes contain
-			   actual data. The rest of the space was reset to zero
-			   in the bzero() call above. */
+				/* Note that we always reserve the maximum possible
+				length of the true VARCHAR in the key value, though
+				only len first bytes after the 2 length bytes contain
+				actual data. The rest of the space was reset to zero
+				in the bzero() call above. */
 				if (true_len > buff_space) {
 					fprintf (stderr,
 						 "WSREP: key truncated: %s\n",
@@ -6893,11 +7997,11 @@ wsrep_store_key_val_for_row(
 					true_len = buff_space;
 				}
  				memcpy(buff, sorted, true_len);
-                                buff       += true_len;
+				buff += true_len;
 				buff_space -= true_len;
-                        } else {
-                                buff += key_len;
-                        }
+			} else {
+				buff += key_len;
+			}
 		} else if (mysql_type == MYSQL_TYPE_TINY_BLOB
 			|| mysql_type == MYSQL_TYPE_MEDIUM_BLOB
 			|| mysql_type == MYSQL_TYPE_BLOB
@@ -6951,8 +8055,8 @@ wsrep_store_key_val_for_row(
 						(const char *) blob_data,
 						(const char *) blob_data
 							+ blob_len,
-                                                (uint) (key_len /
-                                                        cs->mbmaxlen),
+						(uint) (key_len /
+							cs->mbmaxlen),
 						&error);
 			}
 
@@ -6972,7 +8076,7 @@ wsrep_store_key_val_for_row(
 
 			/* Note that we always reserve the maximum possible
 			length of the BLOB prefix in the key value. */
-                        if (wsrep_protocol_version > 1) {
+			if (wsrep_protocol_version > 1) {
 				if (true_len > buff_space) {
 					fprintf (stderr,
 						 "WSREP: key truncated: %s\n",
@@ -7040,8 +8144,8 @@ wsrep_store_key_val_for_row(
 							(const char *)src_start,
 							(const char *)src_start
 								+ key_len,
-                                                        (uint) (key_len /
-                                                                cs->mbmaxlen),
+							(uint) (key_len /
+								cs->mbmaxlen),
 							&error);
 				}
 				memcpy(sorted, src_start, true_len);
@@ -7069,285 +8173,8 @@ wsrep_store_key_val_for_row(
 	DBUG_RETURN((uint)(buff - buff_start));
 }
 #endif /* WITH_WSREP */
-
-/*******************************************************************//**
-Stores a key value for a row to a buffer.
-@return	key value length as stored in buff */
-UNIV_INTERN
-uint
-ha_innobase::store_key_val_for_row(
-/*===============================*/
-	uint		keynr,	/*!< in: key number */
-	char*		buff,	/*!< in/out: buffer for the key value (in MySQL
-				format) */
-	uint		buff_len,/*!< in: buffer length */
-	const uchar*	record)/*!< in: row in MySQL format */
-{
-	KEY*		key_info	= table->key_info + keynr;
-	KEY_PART_INFO*	key_part	= key_info->key_part;
-	KEY_PART_INFO*	end		=
-		key_part + key_info->user_defined_key_parts;
-	char*		buff_start	= buff;
-	enum_field_types mysql_type;
-	Field*		field;
-	ibool		is_null;
-
-	DBUG_ENTER("store_key_val_for_row");
-
-	/* The format for storing a key field in MySQL is the following:
-
-	1. If the column can be NULL, then in the first byte we put 1 if the
-	field value is NULL, 0 otherwise.
-
-	2. If the column is of a BLOB type (it must be a column prefix field
-	in this case), then we put the length of the data in the field to the
-	next 2 bytes, in the little-endian format. If the field is SQL NULL,
-	then these 2 bytes are set to 0. Note that the length of data in the
-	field is <= column prefix length.
-
-	3. In a column prefix field, prefix_len next bytes are reserved for
-	data. In a normal field the max field length next bytes are reserved
-	for data. For a VARCHAR(n) the max field length is n. If the stored
-	value is the SQL NULL then these data bytes are set to 0.
-
-	4. We always use a 2 byte length for a true >= 5.0.3 VARCHAR. Note that
-	in the MySQL row format, the length is stored in 1 or 2 bytes,
-	depending on the maximum allowed length. But in the MySQL key value
-	format, the length always takes 2 bytes.
-
-	We have to zero-fill the buffer so that MySQL is able to use a
-	simple memcmp to compare two key values to determine if they are
-	equal. MySQL does this to compare contents of two 'ref' values. */
-
-	memset(buff, 0, buff_len);
-
-	for (; key_part != end; key_part++) {
-		is_null = FALSE;
-
-		if (key_part->null_bit) {
-			if (record[key_part->null_offset]
-						& key_part->null_bit) {
-				*buff = 1;
-				is_null = TRUE;
-			} else {
-				*buff = 0;
-			}
-			buff++;
-		}
-
-		field = key_part->field;
-		mysql_type = field->type();
-
-		if (mysql_type == MYSQL_TYPE_VARCHAR) {
-						/* >= 5.0.3 true VARCHAR */
-			ulint		lenlen;
-			ulint		len;
-			const byte*	data;
-			ulint		key_len;
-			ulint		true_len;
-			const CHARSET_INFO* cs;
-			int		error=0;
-
-			key_len = key_part->length;
-
-			if (is_null) {
-				buff += key_len + 2;
-
-				continue;
-			}
-			cs = field->charset();
-
-			lenlen = (ulint)
-				(((Field_varstring*) field)->length_bytes);
-
-			data = row_mysql_read_true_varchar(&len,
-				(byte*) (record
-				+ (ulint) get_field_offset(table, field)),
-				lenlen);
-
-			true_len = len;
-
-			/* For multi byte character sets we need to calculate
-			the true length of the key */
-
-			if (len > 0 && cs->mbmaxlen > 1) {
-				true_len = (ulint) cs->cset->well_formed_len(cs,
-						(const char*) data,
-						(const char*) data + len,
-						(uint) (key_len / cs->mbmaxlen),
-						&error);
-			}
-
-			/* In a column prefix index, we may need to truncate
-			the stored value: */
-
-			if (true_len > key_len) {
-				true_len = key_len;
-			}
-
-			/* The length in a key value is always stored in 2
-			bytes */
-
-			row_mysql_store_true_var_len((byte*) buff, true_len, 2);
-			buff += 2;
-
-			memcpy(buff, data, true_len);
-
-			/* Note that we always reserve the maximum possible
-			length of the true VARCHAR in the key value, though
-			only len first bytes after the 2 length bytes contain
-			actual data. The rest of the space was reset to zero
-			in the memset() call above. */
-
-			buff += key_len;
-
-		} else if (mysql_type == MYSQL_TYPE_TINY_BLOB
-			|| mysql_type == MYSQL_TYPE_MEDIUM_BLOB
-			|| mysql_type == MYSQL_TYPE_BLOB
-			|| mysql_type == MYSQL_TYPE_LONG_BLOB
-			/* MYSQL_TYPE_GEOMETRY data is treated
-			as BLOB data in innodb. */
-			|| mysql_type == MYSQL_TYPE_GEOMETRY) {
-
-			const CHARSET_INFO* cs;
-			ulint		key_len;
-			ulint		true_len;
-			int		error=0;
-			ulint		blob_len;
-			const byte*	blob_data;
-
-			ut_a(key_part->key_part_flag & HA_PART_KEY_SEG);
-
-			key_len = key_part->length;
-
-			if (is_null) {
-				buff += key_len + 2;
-
-				continue;
-			}
-
-			cs = field->charset();
-
-			blob_data = row_mysql_read_blob_ref(&blob_len,
-				(byte*) (record
-				+ (ulint) get_field_offset(table, field)),
-					(ulint) field->pack_length());
-
-			true_len = blob_len;
-
-			ut_a(get_field_offset(table, field)
-				== key_part->offset);
-
-			/* For multi byte character sets we need to calculate
-			the true length of the key */
-
-			if (blob_len > 0 && cs->mbmaxlen > 1) {
-				true_len = (ulint) cs->cset->well_formed_len(cs,
-						(const char*) blob_data,
-						(const char*) blob_data
-							+ blob_len,
-						(uint) (key_len / cs->mbmaxlen),
-						&error);
-			}
-
-			/* All indexes on BLOB and TEXT are column prefix
-			indexes, and we may need to truncate the data to be
-			stored in the key value: */
-
-			if (true_len > key_len) {
-				true_len = key_len;
-			}
-
-			/* MySQL reserves 2 bytes for the length and the
-			storage of the number is little-endian */
-
-			innobase_write_to_2_little_endian(
-					(byte*) buff, true_len);
-			buff += 2;
-
-			memcpy(buff, blob_data, true_len);
-
-			/* Note that we always reserve the maximum possible
-			length of the BLOB prefix in the key value. */
-
-			buff += key_len;
-		} else {
-			/* Here we handle all other data types except the
-			true VARCHAR, BLOB and TEXT. Note that the column
-			value we store may be also in a column prefix
-			index. */
-
-			const CHARSET_INFO*	cs = NULL;
-			ulint			true_len;
-			ulint			key_len;
-			const uchar*		src_start;
-			int			error=0;
-			enum_field_types	real_type;
-
-			key_len = key_part->length;
-
-			if (is_null) {
-				 buff += key_len;
-
-				 continue;
-			}
-
-			src_start = record + key_part->offset;
-			real_type = field->real_type();
-			true_len = key_len;
-
-			/* Character set for the field is defined only
-			to fields whose type is string and real field
-			type is not enum or set. For these fields check
-			if character set is multi byte. */
-
-			if (real_type != MYSQL_TYPE_ENUM
-				&& real_type != MYSQL_TYPE_SET
-				&& ( mysql_type == MYSQL_TYPE_VAR_STRING
-					|| mysql_type == MYSQL_TYPE_STRING)) {
-
-				cs = field->charset();
-
-				/* For multi byte character sets we need to
-				calculate the true length of the key */
-
-				if (key_len > 0 && cs->mbmaxlen > 1) {
-
-					true_len = (ulint)
-						cs->cset->well_formed_len(cs,
-							(const char*) src_start,
-							(const char*) src_start
-								+ key_len,
-							(uint) (key_len
-								/ cs->mbmaxlen),
-							&error);
-				}
-			}
-
-			memcpy(buff, src_start, true_len);
-			buff += true_len;
-
-			/* Pad the unused space with spaces. */
-
-			if (true_len < key_len) {
-				ulint	pad_len = key_len - true_len;
-				ut_a(cs != NULL);
-				ut_a(!(pad_len % cs->mbminlen));
-
-				cs->cset->fill(cs, buff, pad_len,
-					       0x20 /* space */);
-				buff += pad_len;
-			}
-		}
-	}
-
-	ut_a(buff <= buff_start + buff_len);
-
-	DBUG_RETURN((uint)(buff - buff_start));
-}
-
 /**************************************************************//**
-Determines if a field is needed in a prebuilt struct 'template'.
+Determines if a field is needed in a m_prebuilt struct 'template'.
 @return field to use, or NULL if the field is not needed */
 static
 const Field*
@@ -7371,12 +8198,11 @@ build_template_needs_field(
 	dict_index_t*	index,		/*!< in: InnoDB index to use */
 	const TABLE*	table,		/*!< in: MySQL table object */
 	ulint		i,		/*!< in: field index in InnoDB table */
-        ulint		sql_idx)	/*!< in: field index in SQL table */
+	ulint		sql_idx,	/*!< in: field index in SQL table */
+	ulint		num_v)		/*!< in: num virtual column so far */
 {
 	const Field*	field	= table->field[sql_idx];
 
-	ut_ad(index_contains == dict_index_contains_col_or_prefix(index, i));
-
 	if (!index_contains) {
 		if (read_just_key) {
 			/* If this is a 'key read', we do not need
@@ -7397,8 +8223,9 @@ build_template_needs_field(
 		return(field);
 	}
 
+	ut_ad(i >= num_v);
 	if (fetch_primary_key_cols
-	    && dict_table_col_in_clustered_key(index->table, i)) {
+	    && dict_table_col_in_clustered_key(index->table, i - num_v)) {
 		/* This field is needed in the query */
 
 		return(field);
@@ -7410,7 +8237,7 @@ build_template_needs_field(
 }
 
 /**************************************************************//**
-Determines if a field is needed in a prebuilt struct 'template'.
+Determines if a field is needed in a m_prebuilt struct 'template'.
 @return whether the field is needed for index condition pushdown */
 inline
 bool
@@ -7420,17 +8247,19 @@ build_template_needs_field_in_icp(
 	const row_prebuilt_t*	prebuilt,/*!< in: row fetch template */
 	bool			contains,/*!< in: whether the index contains
 					column i */
-	ulint			i)	/*!< in: column number */
+	ulint			i,	/*!< in: column number */
+	bool			is_virtual)
+					/*!< in: a virtual column or not */
 {
-	ut_ad(contains == dict_index_contains_col_or_prefix(index, i));
+	ut_ad(contains == dict_index_contains_col_or_prefix(index, i, is_virtual));
 
 	return(index == prebuilt->index
 	       ? contains
-	       : dict_index_contains_col_or_prefix(prebuilt->index, i));
+	       : dict_index_contains_col_or_prefix(prebuilt->index, i, is_virtual));
 }
 
 /**************************************************************//**
-Adds a field to a prebuilt struct 'template'.
+Adds a field to a m_prebuilt struct 'template'.
 @return the field template */
 static
 mysql_row_templ_t*
@@ -7441,35 +8270,58 @@ build_template_field(
 	dict_index_t*	index,		/*!< in: InnoDB index to use */
 	TABLE*		table,		/*!< in: MySQL table object */
 	const Field*	field,		/*!< in: field in MySQL table */
-	ulint		i)		/*!< in: field index in InnoDB table */
+	ulint		i,		/*!< in: field index in InnoDB table */
+	ulint		v_no)		/*!< in: field index for virtual col */
 {
 	mysql_row_templ_t*	templ;
 	const dict_col_t*	col;
 
-        //ut_ad(field == table->field[i]);
 	ut_ad(clust_index->table == index->table);
 
-	col = dict_table_get_nth_col(index->table, i);
-
 	templ = prebuilt->mysql_template + prebuilt->n_template++;
 	UNIV_MEM_INVALID(templ, sizeof *templ);
-	templ->col_no = i;
-	templ->clust_rec_field_no = dict_col_get_clust_pos(col, clust_index);
-	ut_a(templ->clust_rec_field_no != ULINT_UNDEFINED);
-	templ->rec_field_is_prefix = FALSE;
 
-	if (dict_index_is_clust(index)) {
-		templ->rec_field_no = templ->clust_rec_field_no;
-		templ->rec_prefix_field_no = ULINT_UNDEFINED;
+	if (innobase_is_v_fld(field)) {
+		templ->is_virtual = true;
+		col = &dict_table_get_nth_v_col(index->table, v_no)->m_col;
 	} else {
-		/* If we're in a secondary index, keep track
-		* of the original index position even if this
-		* is just a prefix index; we will use this
-		* later to avoid a cluster index lookup in
-		* some cases.*/
+		templ->is_virtual = false;
+		col = dict_table_get_nth_col(index->table, i);
+	}
 
-		templ->rec_field_no = dict_index_get_nth_col_pos(index, i,
+	if (!templ->is_virtual) {
+		templ->col_no = i;
+		templ->clust_rec_field_no = dict_col_get_clust_pos(
+						col, clust_index);
+		ut_a(templ->clust_rec_field_no != ULINT_UNDEFINED);
+		templ->rec_field_is_prefix = FALSE;
+		templ->rec_prefix_field_no = ULINT_UNDEFINED;
+
+		if (dict_index_is_clust(index)) {
+			templ->rec_field_no = templ->clust_rec_field_no;
+		} else {
+			/* If we're in a secondary index, keep track
+			* of the original index position even if this
+			* is just a prefix index; we will use this
+			* later to avoid a cluster index lookup in
+			* some cases.*/
+
+			templ->rec_field_no = dict_index_get_nth_col_pos(index, i,
 						&templ->rec_prefix_field_no);
+		}
+	} else {
+		templ->clust_rec_field_no = v_no;
+		templ->rec_prefix_field_no = ULINT_UNDEFINED;
+
+		if (dict_index_is_clust(index)) {
+			templ->rec_field_no = templ->clust_rec_field_no;
+		} else {
+			templ->rec_field_no
+				= dict_index_get_nth_col_or_prefix_pos(
+					index, v_no, FALSE, true,
+					&templ->rec_prefix_field_no);
+		}
+		templ->icp_rec_field_no = ULINT_UNDEFINED;
 	}
 
 	if (field->real_maybe_null()) {
@@ -7481,8 +8333,8 @@ build_template_field(
 		templ->mysql_null_bit_mask = 0;
 	}
 
-	templ->mysql_col_offset = (ulint) get_field_offset(table, field);
 
+	templ->mysql_col_offset = (ulint) get_field_offset(table, field);
 	templ->mysql_col_len = (ulint) field->pack_length();
 	templ->type = col->mtype;
 	templ->mysql_type = (ulint) field->type();
@@ -7490,6 +8342,8 @@ build_template_field(
 	if (templ->mysql_type == DATA_MYSQL_TRUE_VARCHAR) {
 		templ->mysql_length_bytes = (ulint)
 			(((Field_varstring*) field)->length_bytes);
+	} else {
+		templ->mysql_length_bytes = 0;
 	}
 
 	templ->charset = dtype_get_charset_coll(col->prtype);
@@ -7509,23 +8363,34 @@ build_template_field(
 		}
 	}
 
+	/* For spatial index, we need to access cluster index. */
+	if (dict_index_is_spatial(index)) {
+		prebuilt->need_to_access_clustered = TRUE;
+	}
+
 	if (prebuilt->mysql_prefix_len < templ->mysql_col_offset
 	    + templ->mysql_col_len) {
 		prebuilt->mysql_prefix_len = templ->mysql_col_offset
 			+ templ->mysql_col_len;
 	}
 
-	if (templ->type == DATA_BLOB) {
+	if (DATA_LARGE_MTYPE(templ->type)) {
 		prebuilt->templ_contains_blob = TRUE;
 	}
 
+	if (templ->type == DATA_POINT) {
+		/* We set this only when it's DATA_POINT, but not
+		DATA_VAR_POINT */
+		prebuilt->templ_contains_fixed_point = TRUE;
+	}
+
 	return(templ);
 }
 
 /**************************************************************//**
-Builds a 'template' to the prebuilt struct. The template is used in fast
+Builds a 'template' to the m_prebuilt struct. The template is used in fast
 retrieval of just those column values MySQL needs in its processing. */
-UNIV_INTERN
+
 void
 ha_innobase::build_template(
 /*========================*/
@@ -7539,20 +8404,20 @@ ha_innobase::build_template(
 	ibool		fetch_primary_key_cols	= FALSE;
 	ulint		i, sql_idx;
 
-	if (prebuilt->select_lock_type == LOCK_X) {
+	if (m_prebuilt->select_lock_type == LOCK_X) {
 		/* We always retrieve the whole clustered index record if we
 		use exclusive row level locks, for example, if the read is
 		done in an UPDATE statement. */
 
 		whole_row = true;
 	} else if (!whole_row) {
-		if (prebuilt->hint_need_to_fetch_extra_cols
+		if (m_prebuilt->hint_need_to_fetch_extra_cols
 			== ROW_RETRIEVE_ALL_COLS) {
 
 			/* We know we must at least fetch all columns in the
 			key, or all columns in the table */
 
-			if (prebuilt->read_just_key) {
+			if (m_prebuilt->read_just_key) {
 				/* MySQL has instructed us that it is enough
 				to fetch the columns in the key; looks like
 				MySQL can set this flag also when there is
@@ -7564,68 +8429,80 @@ ha_innobase::build_template(
 			} else {
 				whole_row = true;
 			}
-		} else if (prebuilt->hint_need_to_fetch_extra_cols
+		} else if (m_prebuilt->hint_need_to_fetch_extra_cols
 			== ROW_RETRIEVE_PRIMARY_KEY) {
 			/* We must at least fetch all primary key cols. Note
 			that if the clustered index was internally generated
 			by InnoDB on the row id (no primary key was
 			defined), then row_search_for_mysql() will always
 			retrieve the row id to a special buffer in the
-			prebuilt struct. */
+			m_prebuilt struct. */
 
 			fetch_primary_key_cols = TRUE;
 		}
 	}
 
-	clust_index = dict_table_get_first_index(prebuilt->table);
+	clust_index = dict_table_get_first_index(m_prebuilt->table);
 
-	index = whole_row ? clust_index : prebuilt->index;
+	index = whole_row ? clust_index : m_prebuilt->index;
 
-	prebuilt->need_to_access_clustered = (index == clust_index);
+	m_prebuilt->need_to_access_clustered = (index == clust_index);
 
-	/* Either prebuilt->index should be a secondary index, or it
+	/* Either m_prebuilt->index should be a secondary index, or it
 	should be the clustered index. */
 	ut_ad(dict_index_is_clust(index) == (index == clust_index));
 
 	/* Below we check column by column if we need to access
 	the clustered index. */
 
-        n_stored_fields= (ulint)table->s->stored_fields; /* number of stored columns */
+	/* number of stored columns */
+	n_stored_fields= (ulint)table->s->stored_fields;
 
-	if (!prebuilt->mysql_template) {
-		prebuilt->mysql_template = (mysql_row_templ_t*)
-			mem_alloc(n_stored_fields * sizeof(mysql_row_templ_t));
+	if (!m_prebuilt->mysql_template) {
+		m_prebuilt->mysql_template = (mysql_row_templ_t*)
+			ut_malloc_nokey(n_stored_fields * sizeof(mysql_row_templ_t));
 	}
 
-	prebuilt->template_type = whole_row
+	m_prebuilt->template_type = whole_row
 		? ROW_MYSQL_WHOLE_ROW : ROW_MYSQL_REC_FIELDS;
-	prebuilt->null_bitmap_len = table->s->null_bytes;
+	m_prebuilt->null_bitmap_len = table->s->null_bytes;
 
-	/* Prepare to build prebuilt->mysql_template[]. */
-	prebuilt->templ_contains_blob = FALSE;
-	prebuilt->mysql_prefix_len = 0;
-	prebuilt->n_template = 0;
-	prebuilt->idx_cond_n_cols = 0;
+	/* Prepare to build m_prebuilt->mysql_template[]. */
+	m_prebuilt->templ_contains_blob = FALSE;
+	m_prebuilt->templ_contains_fixed_point = FALSE;
+	m_prebuilt->mysql_prefix_len = 0;
+	m_prebuilt->n_template = 0;
+	m_prebuilt->idx_cond_n_cols = 0;
 
 	/* Note that in InnoDB, i is the column number in the table.
 	MySQL calls columns 'fields'. */
 
-	if (active_index != MAX_KEY && active_index == pushed_idx_cond_keyno) {
+	if (active_index != MAX_KEY
+	    && active_index == pushed_idx_cond_keyno) {
+		ulint	num_v = 0;
+
 		/* Push down an index condition or an end_range check. */
 		for (i = 0, sql_idx = 0; i < n_stored_fields; i++, sql_idx++) {
+			ibool		index_contains;
 
-                        while (!table->field[sql_idx]->stored_in_db()) {
-			        sql_idx++;
-                        }
+			while (!table->field[sql_idx]->stored_in_db()) {
+				sql_idx++;
+			}
 
-			const ibool		index_contains
-				= dict_index_contains_col_or_prefix(index, i);
+			if (innobase_is_v_fld(table->field[sql_idx])) {
+				index_contains = dict_index_contains_col_or_prefix(
+					index, num_v, true);
+			} else {
+				index_contains = dict_index_contains_col_or_prefix(
+					index, i - num_v, false);
+			}
 
 			/* Test if an end_range or an index condition
 			refers to the field. Note that "index" and
 			"index_contains" may refer to the clustered index.
-			Index condition pushdown is relative to prebuilt->index
-			(the index that is being looked up first). */
+			Index condition pushdown is relative to
+			m_prebuilt->index (the index that is being
+			looked up first). */
 
 			/* When join_read_always_key() invokes this
 			code via handler::ha_index_init() and
@@ -7635,8 +8512,11 @@ ha_innobase::build_template(
 			the subset
 			field->part_of_key.is_set(active_index)
 			which would be acceptable if end_range==NULL. */
+			bool	is_v = innobase_is_v_fld(table->field[sql_idx]);
+
 			if (build_template_needs_field_in_icp(
-				    index, prebuilt, index_contains, i)) {
+				    index, m_prebuilt, index_contains,
+				    is_v ? num_v : i - num_v, is_v)) {
 				/* Needed in ICP */
 				const Field*		field;
 				mysql_row_templ_t*	templ;
@@ -7646,33 +8526,41 @@ ha_innobase::build_template(
 				} else {
 					field = build_template_needs_field(
 						index_contains,
-						prebuilt->read_just_key,
+						m_prebuilt->read_just_key,
 						fetch_all_in_key,
 						fetch_primary_key_cols,
-						index, table, i, sql_idx);
+						index, table, i, sql_idx, num_v);
 					if (!field) {
+						if (innobase_is_v_fld(
+							table->field[sql_idx])) {
+							num_v++;
+						}
 						continue;
 					}
 				}
 
 				templ = build_template_field(
-					prebuilt, clust_index, index,
-					table, field, i);
-				prebuilt->idx_cond_n_cols++;
-				ut_ad(prebuilt->idx_cond_n_cols
-				      == prebuilt->n_template);
+					m_prebuilt, clust_index, index,
+					table, field, i - num_v, 0);
 
-				if (index == prebuilt->index) {
+				ut_ad(!templ->is_virtual);
+
+				m_prebuilt->idx_cond_n_cols++;
+				ut_ad(m_prebuilt->idx_cond_n_cols
+				      == m_prebuilt->n_template);
+
+				if (index == m_prebuilt->index) {
 					templ->icp_rec_field_no
 						= templ->rec_field_no;
 				} else {
 					templ->icp_rec_field_no
 						= dict_index_get_nth_col_pos(
-							prebuilt->index, i,
-							NULL);
+							m_prebuilt->index,
+							i - num_v,
+							&templ->rec_prefix_field_no);
 				}
 
-				if (dict_index_is_clust(prebuilt->index)) {
+				if (dict_index_is_clust(m_prebuilt->index)) {
 					ut_ad(templ->icp_rec_field_no
 					      != ULINT_UNDEFINED);
 					/* If the primary key includes
@@ -7683,7 +8571,7 @@ ha_innobase::build_template(
 					off-page (externally stored)
 					columns. */
 					if (templ->icp_rec_field_no
-					    < prebuilt->index->n_uniq) {
+					    < m_prebuilt->index->n_uniq) {
 						/* This is a key column;
 						all set. */
 						continue;
@@ -7699,7 +8587,9 @@ ha_innobase::build_template(
 
 				templ->icp_rec_field_no
 					= dict_index_get_nth_col_or_prefix_pos(
-						prebuilt->index, i, TRUE, NULL);
+						m_prebuilt->index, i - num_v,
+						true, false,
+						&templ->rec_prefix_field_no);
 				ut_ad(templ->icp_rec_field_no
 				      != ULINT_UNDEFINED);
 
@@ -7719,29 +8609,44 @@ ha_innobase::build_template(
 				we were unable to use an accurate condition
 				for end_range in the "if" condition above,
 				and the following assertion would fail.
-				ut_ad(!dict_index_is_clust(prebuilt->index)
+				ut_ad(!dict_index_is_clust(m_prebuilt->index)
 				      || templ->rec_field_no
-				      < prebuilt->index->n_uniq);
+				      < m_prebuilt->index->n_uniq);
 				*/
 			}
+			if (innobase_is_v_fld(table->field[sql_idx])) {
+				num_v++;
+			}
 		}
 
-		ut_ad(prebuilt->idx_cond_n_cols > 0);
-		ut_ad(prebuilt->idx_cond_n_cols == prebuilt->n_template);
+		ut_ad(m_prebuilt->idx_cond_n_cols > 0);
+		ut_ad(m_prebuilt->idx_cond_n_cols == m_prebuilt->n_template);
+
+		num_v = 0;
 
 		/* Include the fields that are not needed in index condition
 		pushdown. */
 		for (i = 0, sql_idx = 0; i < n_stored_fields; i++, sql_idx++) {
+			mysql_row_templ_t*	templ;
+			ibool			index_contains;
 
-                        while (!table->field[sql_idx]->stored_in_db()) {
-			        sql_idx++;
-                        }
+			while (!table->field[sql_idx]->stored_in_db()) {
+				sql_idx++;
+			}
 
-			const ibool		index_contains
-				= dict_index_contains_col_or_prefix(index, i);
+			if (innobase_is_v_fld(table->field[sql_idx])) {
+				index_contains = dict_index_contains_col_or_prefix(
+					index, num_v, true);
+			} else {
+				index_contains = dict_index_contains_col_or_prefix(
+					index, i - num_v, false);
+			}
+
+			bool	is_v = innobase_is_v_fld(table->field[sql_idx]);
 
 			if (!build_template_needs_field_in_icp(
-				    index, prebuilt, index_contains, i)) {
+				    index, m_prebuilt, index_contains,
+				    is_v ? num_v : i - num_v, is_v)) {
 				/* Not needed in ICP */
 				const Field*	field;
 
@@ -7750,60 +8655,106 @@ ha_innobase::build_template(
 				} else {
 					field = build_template_needs_field(
 						index_contains,
-						prebuilt->read_just_key,
+						m_prebuilt->read_just_key,
 						fetch_all_in_key,
 						fetch_primary_key_cols,
-						index, table, i, sql_idx);
+						index, table, i, sql_idx, num_v);
 					if (!field) {
+						if (innobase_is_v_fld(table->field[sql_idx])) {
+							num_v++;
+						}
 						continue;
 					}
 				}
 
-				build_template_field(prebuilt,
-						     clust_index, index,
-						     table, field, i);
+				templ = build_template_field(
+					m_prebuilt, clust_index, index,
+					table, field, i - num_v, num_v);
+
+				if (templ->is_virtual) {
+					num_v++;
+				}
 			}
 		}
 
-		prebuilt->idx_cond = this;
+		m_prebuilt->idx_cond = this;
 	} else {
+		mysql_row_templ_t*	templ;
+		ulint			num_v = 0;
 		/* No index condition pushdown */
-		prebuilt->idx_cond = NULL;
+		m_prebuilt->idx_cond = NULL;
 
 		for (i = 0, sql_idx = 0; i < n_stored_fields; i++, sql_idx++) {
 			const Field*	field;
 
-                        while (!table->field[sql_idx]->stored_in_db()) {
-			        sql_idx++;
-                        }
+			while (!table->field[sql_idx]->stored_in_db()) {
+				sql_idx++;
+			}
 
 			if (whole_row) {
+				/* Even this is whole_row, if the seach is
+				on a virtual column, and read_just_key is
+				set, and field is not in this index, we
+				will not try to fill the value since they
+				are not stored in such index nor in the
+				cluster index. */
+				if (innobase_is_v_fld(table->field[sql_idx])
+				    && m_prebuilt->read_just_key
+				    && !dict_index_contains_col_or_prefix(
+					m_prebuilt->index, num_v, true))
+				{
+					/* Turn off ROW_MYSQL_WHOLE_ROW */
+					m_prebuilt->template_type =
+						 ROW_MYSQL_REC_FIELDS;
+					num_v++;
+					continue;
+				}
+
 				field = table->field[sql_idx];
 			} else {
+				ibool	contain;
+
+				if (innobase_is_v_fld(table->field[sql_idx])) {
+					contain = dict_index_contains_col_or_prefix(
+						index, num_v, true);
+				} else {
+					contain = dict_index_contains_col_or_prefix(
+						index, i - num_v,
+						false);
+				}
+
+
 				field = build_template_needs_field(
-					dict_index_contains_col_or_prefix(
-						index, i),
-					prebuilt->read_just_key,
+					contain,
+					m_prebuilt->read_just_key,
 					fetch_all_in_key,
 					fetch_primary_key_cols,
-					index, table, i, sql_idx);
+					index, table, i, sql_idx, num_v);
+
 				if (!field) {
+					if (innobase_is_v_fld(table->field[sql_idx])) {
+						num_v++;
+					}
 					continue;
 				}
 			}
 
-			build_template_field(prebuilt, clust_index, index,
-					     table, field, i);
+			templ = build_template_field(
+				m_prebuilt, clust_index, index,
+				table, field, i - num_v, num_v);
+			if (templ->is_virtual) {
+				num_v++;
+			}
 		}
 	}
 
-	if (index != clust_index && prebuilt->need_to_access_clustered) {
+	if (index != clust_index && m_prebuilt->need_to_access_clustered) {
 		/* Change rec_field_no's to correspond to the clustered index
 		record */
-		for (i = 0; i < prebuilt->n_template; i++) {
+		for (i = 0; i < m_prebuilt->n_template; i++) {
 
 			mysql_row_templ_t*	templ
-				= &prebuilt->mysql_template[i];
+				= &m_prebuilt->mysql_template[i];
 
 			templ->rec_field_no = templ->clust_rec_field_no;
 		}
@@ -7816,21 +8767,29 @@ binlogging. We need to eliminate the non-determinism that will arise in
 INSERT ... SELECT type of statements, since MySQL binlog only stores the
 min value of the autoinc interval. Once that is fixed we can get rid of
 the special lock handling.
-@return	DB_SUCCESS if all OK else error code */
-UNIV_INTERN
+@return DB_SUCCESS if all OK else error code */
+
 dberr_t
 ha_innobase::innobase_lock_autoinc(void)
 /*====================================*/
 {
 	DBUG_ENTER("ha_innobase::innobase_lock_autoinc");
 	dberr_t		error = DB_SUCCESS;
+	long		lock_mode = innobase_autoinc_lock_mode;
 
-	ut_ad(!srv_read_only_mode);
+	ut_ad(!srv_read_only_mode
+	      || dict_table_is_intrinsic(m_prebuilt->table));
 
-	switch (innobase_autoinc_lock_mode) {
+	if (dict_table_is_intrinsic(m_prebuilt->table)) {
+		/* Intrinsic table are not shared accorss connection
+		so there is no need to AUTOINC lock the table. */
+		lock_mode = AUTOINC_NO_LOCKING;
+	}
+
+	switch (lock_mode) {
 	case AUTOINC_NO_LOCKING:
 		/* Acquire only the AUTOINC mutex. */
-		dict_table_autoinc_lock(prebuilt->table);
+		dict_table_autoinc_lock(m_prebuilt->table);
 		break;
 
 	case AUTOINC_NEW_STYLE_LOCKING:
@@ -7839,21 +8798,20 @@ ha_innobase::innobase_lock_autoinc(void)
 		transaction has already acquired the AUTOINC lock on
 		behalf of a LOAD FILE or INSERT ... SELECT etc. type of
 		statement. */
-		if (thd_sql_command(user_thd) == SQLCOM_INSERT
-		    || thd_sql_command(user_thd) == SQLCOM_REPLACE
-		    || thd_sql_command(user_thd) == SQLCOM_END // RBR event
+		if (thd_sql_command(m_user_thd) == SQLCOM_INSERT
+		    || thd_sql_command(m_user_thd) == SQLCOM_REPLACE
+		    || thd_sql_command(m_user_thd) == SQLCOM_END // RBR event
 		) {
-			dict_table_t*	ib_table = prebuilt->table;
 
 			/* Acquire the AUTOINC mutex. */
-			dict_table_autoinc_lock(ib_table);
+			dict_table_autoinc_lock(m_prebuilt->table);
 
 			/* We need to check that another transaction isn't
 			already holding the AUTOINC lock on the table. */
-			if (ib_table->n_waiting_or_granted_auto_inc_locks) {
+			if (m_prebuilt->table->n_waiting_or_granted_auto_inc_locks) {
 				/* Release the mutex to avoid deadlocks and
 				fall back to old style locking. */
-				dict_table_autoinc_unlock(ib_table);
+				dict_table_autoinc_unlock(m_prebuilt->table);
 			} else {
 				/* Do not fall back to old style locking. */
 				break;
@@ -7864,12 +8822,12 @@ ha_innobase::innobase_lock_autoinc(void)
 	case AUTOINC_OLD_STYLE_LOCKING:
 		DBUG_EXECUTE_IF("die_if_autoinc_old_lock_style_used",
 				ut_ad(0););
-		error = row_lock_table_autoinc_for_mysql(prebuilt);
+		error = row_lock_table_autoinc_for_mysql(m_prebuilt);
 
 		if (error == DB_SUCCESS) {
 
 			/* Acquire the AUTOINC mutex. */
-			dict_table_autoinc_lock(prebuilt->table);
+			dict_table_autoinc_lock(m_prebuilt->table);
 		}
 		break;
 
@@ -7880,34 +8838,11 @@ ha_innobase::innobase_lock_autoinc(void)
 	DBUG_RETURN(error);
 }
 
-/********************************************************************//**
-Reset the autoinc value in the table.
-@return	DB_SUCCESS if all went well else error code */
-UNIV_INTERN
-dberr_t
-ha_innobase::innobase_reset_autoinc(
-/*================================*/
-	ulonglong	autoinc)	/*!< in: value to store */
-{
-	dberr_t		error;
-
-	error = innobase_lock_autoinc();
-
-	if (error == DB_SUCCESS) {
-
-		dict_table_autoinc_initialize(prebuilt->table, autoinc);
-
-		dict_table_autoinc_unlock(prebuilt->table);
-	}
-
-	return(error);
-}
-
 /********************************************************************//**
 Store the autoinc value in the table. The autoinc value is only set if
 it's greater than the existing autoinc value in the table.
-@return	DB_SUCCESS if all went well else error code */
-UNIV_INTERN
+@return DB_SUCCESS if all went well else error code */
+
 dberr_t
 ha_innobase::innobase_set_max_autoinc(
 /*==================================*/
@@ -7919,49 +8854,88 @@ ha_innobase::innobase_set_max_autoinc(
 
 	if (error == DB_SUCCESS) {
 
-		dict_table_autoinc_update_if_greater(prebuilt->table, auto_inc);
+		dict_table_autoinc_update_if_greater(m_prebuilt->table, auto_inc);
 
-		dict_table_autoinc_unlock(prebuilt->table);
+		dict_table_autoinc_unlock(m_prebuilt->table);
 	}
 
 	return(error);
 }
 
+/** Write Row interface optimized for intrinisc table.
+@param[in]	record	a row in MySQL format.
+@return 0 on success or error code */
+int
+ha_innobase::intrinsic_table_write_row(uchar* record)
+{
+	dberr_t		err;
+
+	/* No auto-increment support for intrinsic table. */
+	ut_ad(!(table->next_number_field && record == table->record[0]));
+
+	if (m_prebuilt->mysql_template == NULL
+	    || m_prebuilt->template_type != ROW_MYSQL_WHOLE_ROW) {
+		/* Build the template used in converting quickly between
+		the two database formats */
+		build_template(true);
+	}
+
+	err = row_insert_for_mysql((byte*) record, m_prebuilt);
+
+	return(convert_error_code_to_mysql(
+		err, m_prebuilt->table->flags, m_user_thd));
+}
+
 /********************************************************************//**
 Stores a row in an InnoDB database, to the table specified in this
 handle.
-@return	error code */
-UNIV_INTERN
+@return error code */
+
 int
 ha_innobase::write_row(
 /*===================*/
 	uchar*	record)	/*!< in: a row in MySQL format */
 {
 	dberr_t		error;
-	int		error_result= 0;
-	ibool		auto_inc_used= FALSE;
 #ifdef WITH_WSREP
-	ibool           auto_inc_inserted= FALSE; /* if NULL was inserted */
+	ibool		auto_inc_inserted= FALSE; /* if NULL was inserted */
 #endif
 	ulint		sql_command;
-	trx_t*		trx = thd_to_trx(user_thd);
+	int		error_result = 0;
+	bool		auto_inc_used = false;
 
 	DBUG_ENTER("ha_innobase::write_row");
 
+	if (dict_table_is_intrinsic(m_prebuilt->table)) {
+		DBUG_RETURN(intrinsic_table_write_row(record));
+	}
+
+	trx_t*		trx = thd_to_trx(m_user_thd);
+	TrxInInnoDB	trx_in_innodb(trx);
+
+	if (!dict_table_is_intrinsic(m_prebuilt->table)
+	    && trx_in_innodb.is_aborted()) {
+
+		innobase_rollback(ht, m_user_thd, false);
+
+		DBUG_RETURN(convert_error_code_to_mysql(
+			DB_FORCED_ABORT, 0, m_user_thd));
+	}
+
+	/* Step-1: Validation checks before we commence write_row operation. */
 	if (high_level_read_only) {
 		ib_senderrf(ha_thd(), IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE);
 		DBUG_RETURN(HA_ERR_TABLE_READONLY);
-	} else if (prebuilt->trx != trx) {
-		sql_print_error("The transaction object for the table handle "
-				"is at %p, but for the current thread it is at "
-				"%p",
-				(const void*) prebuilt->trx, (const void*) trx);
+	} else if (m_prebuilt->trx != trx) {
 
-		fputs("InnoDB: Dump of 200 bytes around prebuilt: ", stderr);
-		ut_print_buf(stderr, ((const byte*) prebuilt) - 100, 200);
-		fputs("\n"
-			"InnoDB: Dump of 200 bytes around ha_data: ",
-			stderr);
+		ib::error() << "The transaction object for the table handle is"
+			" at " << static_cast<const void*>(m_prebuilt->trx)
+			<< ", but for the current thread it is at "
+			<< static_cast<const void*>(trx);
+
+		fputs("InnoDB: Dump of 200 bytes around m_prebuilt: ", stderr);
+		ut_print_buf(stderr, ((const byte*) m_prebuilt) - 100, 200);
+		fputs("\nInnoDB: Dump of 200 bytes around ha_data: ", stderr);
 		ut_print_buf(stderr, ((const byte*) trx) - 100, 200);
 		putc('\n', stderr);
 		ut_error;
@@ -7969,25 +8943,26 @@ ha_innobase::write_row(
 		++trx->will_lock;
 	}
 
-	ha_statistic_increment(&SSV::ha_write_count);
-
-	sql_command = thd_sql_command(user_thd);
+	/* Step-2: Intermediate commit if original operation involves ALTER
+	table with algorithm = copy. Intermediate commit ease pressure on
+	recovery if server crashes while ALTER is active. */
+	sql_command = thd_sql_command(m_user_thd);
 
 	if ((sql_command == SQLCOM_ALTER_TABLE
 	     || sql_command == SQLCOM_OPTIMIZE
 	     || sql_command == SQLCOM_CREATE_INDEX
 #ifdef WITH_WSREP
-	     || (wsrep_on(user_thd) && wsrep_load_data_splitting &&
+	     || (wsrep_on(m_user_thd) && wsrep_load_data_splitting &&
 		 sql_command == SQLCOM_LOAD                      &&
 		 !thd_test_options(
-			user_thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))
+			m_user_thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))
 #endif /* WITH_WSREP */
 	     || sql_command == SQLCOM_DROP_INDEX)
-	    && num_write_row >= 10000) {
+	    && m_num_write_row >= 10000) {
 #ifdef WITH_WSREP
-		if (wsrep_on(user_thd) && sql_command == SQLCOM_LOAD) {
-			WSREP_DEBUG("forced trx split for LOAD: %s", 
-				    wsrep_thd_query(user_thd));
+		if (wsrep_on(m_user_thd) && sql_command == SQLCOM_LOAD) {
+			WSREP_DEBUG("forced trx split for LOAD: %s",
+				    wsrep_thd_query(m_user_thd));
 		}
 #endif /* WITH_WSREP */
 		/* ALTER TABLE is COMMITted at every 10000 copied rows.
@@ -8002,7 +8977,7 @@ ha_innobase::write_row(
 		dict_table_t*	src_table;
 		enum lock_mode	mode;
 
-		num_write_row = 0;
+		m_num_write_row = 0;
 
 		/* Commit the transaction.  This will release the table
 		locks, so they have to be acquired again. */
@@ -8010,28 +8985,20 @@ ha_innobase::write_row(
 		/* Altering an InnoDB table */
 		/* Get the source table. */
 		src_table = lock_get_src_table(
-				prebuilt->trx, prebuilt->table, &mode);
+				m_prebuilt->trx, m_prebuilt->table, &mode);
 		if (!src_table) {
 no_commit:
 			/* Unknown situation: do not commit */
-			/*
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				"  InnoDB: ALTER TABLE is holding lock"
-				" on %lu tables!\n",
-				prebuilt->trx->mysql_n_tables_locked);
-			*/
 			;
-		} else if (src_table == prebuilt->table) {
+		} else if (src_table == m_prebuilt->table) {
 #ifdef WITH_WSREP
-			if (wsrep_on(user_thd)                              &&
+			if (wsrep_on(m_user_thd)                            &&
 			    wsrep_load_data_splitting                       &&
 			    sql_command == SQLCOM_LOAD                      &&
-			    !thd_test_options(user_thd,
+			    !thd_test_options(m_user_thd,
 			                      OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))
 			{
-				switch (wsrep_run_wsrep_commit(user_thd, 1))
-				{
+				switch (wsrep_run_wsrep_commit(m_user_thd, 1)) {
 				case WSREP_TRX_OK:
 				  break;
 				case WSREP_TRX_SIZE_EXCEEDED:
@@ -8040,71 +9007,72 @@ no_commit:
 				  DBUG_RETURN(1);
 				}
 
-				if (binlog_hton->commit(binlog_hton, user_thd, 1))
+				if (binlog_hton->commit(binlog_hton, m_user_thd, 1)) {
 					DBUG_RETURN(1);
-				wsrep_post_commit(user_thd, TRUE);
+				}
+				wsrep_post_commit(m_user_thd, TRUE);
 			}
 #endif /* WITH_WSREP */
 			/* Source table is not in InnoDB format:
 			no need to re-acquire locks on it. */
 
 			/* Altering to InnoDB format */
-			innobase_commit(ht, user_thd, 1);
+			innobase_commit(ht, m_user_thd, 1);
 			/* Note that this transaction is still active. */
-			trx_register_for_2pc(prebuilt->trx);
+			trx_register_for_2pc(m_prebuilt->trx);
 			/* We will need an IX lock on the destination table. */
-			prebuilt->sql_stat_start = TRUE;
+			m_prebuilt->sql_stat_start = TRUE;
 		} else {
 #ifdef WITH_WSREP
-			if (wsrep_on(user_thd)                              &&
+			if (wsrep_on(m_user_thd)                            &&
 			    wsrep_load_data_splitting                       &&
 			    sql_command == SQLCOM_LOAD                      &&
-			    !thd_test_options(user_thd,
-			                      OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))
-			{
-				switch (wsrep_run_wsrep_commit(user_thd, 1))
-				{
+			    !thd_test_options(m_user_thd,
+			                      OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
+				switch (wsrep_run_wsrep_commit(m_user_thd, 1)) {
 				case WSREP_TRX_OK:
-				  break;
+					break;
 				case WSREP_TRX_SIZE_EXCEEDED:
 				case WSREP_TRX_CERT_FAIL:
 				case WSREP_TRX_ERROR:
-				  DBUG_RETURN(1);
+					DBUG_RETURN(1);
 				}
 
-				if (binlog_hton->commit(binlog_hton, user_thd, 1))
+				if (binlog_hton->commit(binlog_hton, m_user_thd, 1)) {
 					DBUG_RETURN(1);
-				wsrep_post_commit(user_thd, TRUE);
+				}
+
+				wsrep_post_commit(m_user_thd, TRUE);
 			}
 #endif /* WITH_WSREP */
 			/* Ensure that there are no other table locks than
 			LOCK_IX and LOCK_AUTO_INC on the destination table. */
 
-			if (!lock_is_table_exclusive(prebuilt->table,
-							prebuilt->trx)) {
+			if (!lock_is_table_exclusive(m_prebuilt->table,
+							m_prebuilt->trx)) {
 				goto no_commit;
 			}
 
 			/* Commit the transaction.  This will release the table
 			locks, so they have to be acquired again. */
-			innobase_commit(ht, user_thd, 1);
+			innobase_commit(ht, m_user_thd, 1);
 			/* Note that this transaction is still active. */
-			trx_register_for_2pc(prebuilt->trx);
+			trx_register_for_2pc(m_prebuilt->trx);
 			/* Re-acquire the table lock on the source table. */
-			row_lock_table_for_mysql(prebuilt, src_table, mode);
+			row_lock_table_for_mysql(m_prebuilt, src_table, mode);
 			/* We will need an IX lock on the destination table. */
-			prebuilt->sql_stat_start = TRUE;
+			m_prebuilt->sql_stat_start = TRUE;
 		}
 	}
 
-	num_write_row++;
+	m_num_write_row++;
 
-	/* This is the case where the table has an auto-increment column */
+	/* Step-3: Handling of Auto-Increment Columns. */
 	if (table->next_number_field && record == table->record[0]) {
 
 		/* Reset the error code before calling
 		innobase_get_auto_increment(). */
-		prebuilt->autoinc_error = DB_SUCCESS;
+		m_prebuilt->autoinc_error = DB_SUCCESS;
 
 #ifdef WITH_WSREP
 		auto_inc_inserted= (table->next_number_field->val_int() == 0);
@@ -8115,13 +9083,13 @@ no_commit:
 
 			/* Handle the case where the AUTOINC sub-system
 			failed during initialization. */
-			if (prebuilt->autoinc_error == DB_UNSUPPORTED) {
+			if (m_prebuilt->autoinc_error == DB_UNSUPPORTED) {
 				error_result = ER_AUTOINC_READ_FAILED;
 				/* Set the error message to report too. */
 				my_error(ER_AUTOINC_READ_FAILED, MYF(0));
 				goto func_exit;
-			} else if (prebuilt->autoinc_error != DB_SUCCESS) {
-				error = prebuilt->autoinc_error;
+			} else if (m_prebuilt->autoinc_error != DB_SUCCESS) {
+				error = m_prebuilt->autoinc_error;
 				goto report_error;
 			}
 
@@ -8129,11 +9097,13 @@ no_commit:
 			goto func_exit;
 		}
 
-		auto_inc_used = TRUE;
+		auto_inc_used = true;
 	}
 
-	if (prebuilt->mysql_template == NULL
-	    || prebuilt->template_type != ROW_MYSQL_WHOLE_ROW) {
+	/* Step-4: Prepare INSERT graph that will be executed for actual INSERT
+	(This is a one time operation) */
+	if (m_prebuilt->mysql_template == NULL
+	    || m_prebuilt->template_type != ROW_MYSQL_WHOLE_ROW) {
 
 		/* Build the template used in converting quickly between
 		the two database formats */
@@ -8141,12 +9111,14 @@ no_commit:
 		build_template(true);
 	}
 
-	innobase_srv_conc_enter_innodb(prebuilt->trx);
+	innobase_srv_conc_enter_innodb(m_prebuilt);
 
-	error = row_insert_for_mysql((byte*) record, prebuilt);
-	DEBUG_SYNC(user_thd, "ib_after_row_insert");
+	/* Step-5: Execute insert graph that will result in actual insert. */
+	error = row_insert_for_mysql((byte*) record, m_prebuilt);
 
-	/* Handle duplicate key errors */
+	DEBUG_SYNC(m_user_thd, "ib_after_row_insert");
+
+	/* Step-6: Handling of errors related to auto-increment. */
 	if (auto_inc_used) {
 		ulonglong	auto_inc;
 		ulonglong	col_max_value;
@@ -8154,17 +9126,16 @@ no_commit:
 		/* Note the number of rows processed for this statement, used
 		by get_auto_increment() to determine the number of AUTO-INC
 		values to reserve. This is only useful for a mult-value INSERT
-		and is a statement level counter.*/
+		and is a statement level counter. */
 		if (trx->n_autoinc_rows > 0) {
 			--trx->n_autoinc_rows;
 		}
 
 		/* We need the upper limit of the col type to check for
 		whether we update the table autoinc counter or not. */
-		col_max_value = innobase_get_int_col_max_value(
-			table->next_number_field);
+		col_max_value = innobase_get_int_col_max_value(table->next_number_field);
 
-		/* Get the value that MySQL attempted to store in the table.*/
+		/* Get the value that MySQL attempted to store in the table. */
 		auto_inc = table->next_number_field->val_int();
 
 		switch (error) {
@@ -8194,27 +9165,27 @@ no_commit:
 
 				WSREP_DEBUG("DUPKEY error for autoinc\n"
 				      "THD %ld, value %llu, off %llu inc %llu",
-				      thd_get_thread_id(current_thd),
+				      thd_get_thread_id(m_user_thd),
 				      auto_inc,
-				      prebuilt->autoinc_offset,
-				      prebuilt->autoinc_increment);
+				      m_prebuilt->autoinc_offset,
+				      m_prebuilt->autoinc_increment);
 
-                               if (wsrep_on(current_thd)                     &&
+                               if (wsrep_on(m_user_thd)                      &&
                                    auto_inc_inserted                         &&
                                    wsrep_drupal_282555_workaround            &&
-                                   wsrep_thd_retry_counter(current_thd) == 0 &&
-				    !thd_test_options(current_thd, 
-						      OPTION_NOT_AUTOCOMMIT | 
+                                   wsrep_thd_retry_counter(m_user_thd) == 0  &&
+				    !thd_test_options(m_user_thd,
+						      OPTION_NOT_AUTOCOMMIT |
 						      OPTION_BEGIN)) {
 					WSREP_DEBUG(
 					    "retrying insert: %s",
-					    (*wsrep_thd_query(current_thd)) ? 
-						wsrep_thd_query(current_thd) : 
+					    (*wsrep_thd_query(m_user_thd)) ?
+						wsrep_thd_query(m_user_thd) :
 						(char *)"void");
 					error= DB_SUCCESS;
 					wsrep_thd_set_conflict_state(
-						current_thd, MUST_ABORT);
-                                        innobase_srv_conc_exit_innodb(prebuilt->trx);
+						m_user_thd, MUST_ABORT);
+                                        innobase_srv_conc_exit_innodb(m_prebuilt);
                                         /* jump straight to func exit over
                                          * later wsrep hooks */
                                         goto func_exit;
@@ -8232,21 +9203,21 @@ no_commit:
 			/* If the actual value inserted is greater than
 			the upper limit of the interval, then we try and
 			update the table upper limit. Note: last_value
-			will be 0 if get_auto_increment() was not called.*/
+			will be 0 if get_auto_increment() was not called. */
 
-			if (auto_inc >= prebuilt->autoinc_last_value) {
+			if (auto_inc >= m_prebuilt->autoinc_last_value) {
 set_max_autoinc:
 				/* This should filter out the negative
 				values set explicitly by the user. */
 				if (auto_inc <= col_max_value) {
-					ut_a(prebuilt->autoinc_increment > 0);
+					ut_a(m_prebuilt->autoinc_increment > 0);
 
 					ulonglong	offset;
 					ulonglong	increment;
 					dberr_t		err;
 
-					offset = prebuilt->autoinc_offset;
-					increment = prebuilt->autoinc_increment;
+					offset = m_prebuilt->autoinc_offset;
+					increment = m_prebuilt->autoinc_increment;
 
 					auto_inc = innobase_next_autoinc(
 						auto_inc,
@@ -8267,9 +9238,10 @@ set_max_autoinc:
 		}
 	}
 
-	innobase_srv_conc_exit_innodb(prebuilt->trx);
+	innobase_srv_conc_exit_innodb(m_prebuilt);
 
 report_error:
+	/* Step-7: Cleanup and exit. */
 	if (error == DB_TABLESPACE_DELETED) {
 		ib_senderrf(
 			trx->mysql_thd, IB_LOG_LEVEL_ERROR,
@@ -8277,18 +9249,16 @@ report_error:
 			table->s->table_name.str);
 	}
 
-	error_result = convert_error_code_to_mysql(error,
-						   prebuilt->table->flags,
-						   user_thd);
+	error_result = convert_error_code_to_mysql(
+		error, m_prebuilt->table->flags, m_user_thd);
 
 #ifdef WITH_WSREP
-	if (!error_result                                &&
-	    wsrep_thd_exec_mode(user_thd) == LOCAL_STATE &&
-	    wsrep_on(user_thd)                           &&
-	    !wsrep_consistency_check(user_thd)           &&
-	    !wsrep_thd_ignore_table(user_thd))
-	{
-		if (wsrep_append_keys(user_thd, false, record, NULL))
+	if (!error_result                                  &&
+	    wsrep_thd_exec_mode(m_user_thd) == LOCAL_STATE &&
+	    wsrep_on(m_user_thd)                           &&
+	    !wsrep_consistency_check(m_user_thd)           &&
+	    !wsrep_thd_ignore_table(m_user_thd)) {
+		if (wsrep_append_keys(m_user_thd, false, record, NULL))
 		{
 			DBUG_PRINT("wsrep", ("row key failed"));
 			error_result = HA_ERR_INTERNAL_ERROR;
@@ -8308,16 +9278,57 @@ func_exit:
 	DBUG_RETURN(error_result);
 }
 
+/** Fill the update vector's "old_vrow" field for those non-updated,
+but indexed columns. Such columns could stil present in the virtual
+index rec fields even if they are not updated (some other fields updated),
+so needs to be logged.
+@param[in]	prebuilt		InnoDB prebuilt struct
+@param[in,out]	vfield			field to filled
+@param[in]	o_len			actual column length
+@param[in,out]	col			column to be filled
+@param[in]	old_mysql_row_col	MySQL old field ptr
+@param[in]	col_pack_len		MySQL field col length
+@param[in,out]	buf			buffer for a converted integer value
+@return used buffer ptr from row_mysql_store_col_in_innobase_format() */
+static
+byte*
+innodb_fill_old_vcol_val(
+	row_prebuilt_t*	prebuilt,
+	dfield_t*	vfield,
+	ulint		o_len,
+	dict_col_t*	col,
+	const byte*	old_mysql_row_col,
+	ulint		col_pack_len,
+	byte*		buf)
+{
+	dict_col_copy_type(
+		col, dfield_get_type(vfield));
+	if (o_len != UNIV_SQL_NULL) {
+
+		buf = row_mysql_store_col_in_innobase_format(
+			vfield,
+			buf,
+			TRUE,
+			old_mysql_row_col,
+			col_pack_len,
+			dict_table_is_comp(prebuilt->table));
+	} else {
+		dfield_set_null(vfield);
+	}
+
+	return(buf);
+}
+
 /**********************************************************************//**
 Checks which fields have changed in a row and stores information
 of them to an update vector.
-@return	DB_SUCCESS or error code */
+@return DB_SUCCESS or error code */
 static
 dberr_t
 calc_row_difference(
 /*================*/
 	upd_t*		uvect,		/*!< in/out: update vector */
-	uchar*		old_row,	/*!< in: old row in MySQL format */
+	const uchar*	old_row,	/*!< in: old row in MySQL format */
 	uchar*		new_row,	/*!< in: new row in MySQL format */
 	TABLE*		table,		/*!< in: table in MySQL data
 					dictionary */
@@ -8334,6 +9345,7 @@ calc_row_difference(
 	ulint		n_len;
 	ulint		col_pack_len;
 	const byte*	new_mysql_row_col;
+	const byte*	old_mysql_row_col;
 	const byte*	o_ptr;
 	const byte*	n_ptr;
 	byte*		buf;
@@ -8342,13 +9354,14 @@ calc_row_difference(
 	ulint		n_changed = 0;
 	dfield_t	dfield;
 	dict_index_t*	clust_index;
-        uint		sql_idx, innodb_idx= 0;
+        uint		sql_idx,i, innodb_idx= 0;
 	ibool		changes_fts_column = FALSE;
 	ibool		changes_fts_doc_col = FALSE;
-	trx_t*          trx = thd_to_trx(thd);
+	trx_t*		trx = thd_to_trx(thd);
 	doc_id_t	doc_id = FTS_NULL_DOC_ID;
+	ulint		num_v = 0;
 
-	ut_ad(!srv_read_only_mode);
+	ut_ad(!srv_read_only_mode || dict_table_is_intrinsic(prebuilt->table));
 
 	n_fields = table->s->fields;
 	clust_index = dict_table_get_first_index(prebuilt->table);
@@ -8356,10 +9369,20 @@ calc_row_difference(
 	/* We use upd_buff to convert changed fields */
 	buf = (byte*) upd_buff;
 
-	for (sql_idx = 0; sql_idx < n_fields; sql_idx++) {
+	for (sql_idx = 0,i=0; i < n_fields; i++, sql_idx++) {
 		field = table->field[sql_idx];
-                if (!field->stored_in_db())
-		  continue;
+		bool		is_virtual = innobase_is_v_fld(field);
+		dict_col_t*	col;
+
+		if (!field->stored_in_db()) {
+			continue;
+		}
+
+		if (is_virtual) {
+			col = &prebuilt->table->v_cols[num_v].m_col;
+		} else {
+			col = &prebuilt->table->cols[innodb_idx - num_v];
+		}
 
 		o_ptr = (const byte*) old_row + get_field_offset(table, field);
 		n_ptr = (const byte*) new_row + get_field_offset(table, field);
@@ -8367,6 +9390,7 @@ calc_row_difference(
 		/* Use new_mysql_row_col and col_pack_len save the values */
 
 		new_mysql_row_col = n_ptr;
+		old_mysql_row_col = o_ptr;
 		col_pack_len = field->pack_length();
 
 		o_len = col_pack_len;
@@ -8377,11 +9401,14 @@ calc_row_difference(
 
 		field_mysql_type = field->type();
 
-		col_type = prebuilt->table->cols[innodb_idx].mtype;
+		col_type = col->mtype;
 
 		switch (col_type) {
 
 		case DATA_BLOB:
+		case DATA_POINT:
+		case DATA_VAR_POINT:
+		case DATA_GEOMETRY:
 			o_ptr = row_mysql_read_blob_ref(&o_len, o_ptr, o_len);
 			n_ptr = row_mysql_read_blob_ref(&n_len, n_ptr, n_len);
 
@@ -8422,7 +9449,6 @@ calc_row_difference(
 			}
 		}
 
-
 		if (field->real_maybe_null()) {
 			if (field->is_null_in_record(old_row)) {
 				o_len = UNIV_SQL_NULL;
@@ -8433,6 +9459,64 @@ calc_row_difference(
 			}
 		}
 
+#ifdef UNIV_DEBUG
+		bool	online_ord_part = false;
+#endif
+
+		if (is_virtual) {
+			/* If the virtual column is not indexed,
+			we shall ignore it for update */
+			if (!col->ord_part) {
+				/* Check whether there is a table-rebuilding
+				online ALTER TABLE in progress, and this
+				virtual column could be newly indexed, thus
+				it will be materialized. Then we will have
+				to log its update.
+				Note, we do not support online dropping virtual
+				column while adding new index, nor with
+				online alter column order while adding index,
+				so the virtual column sequence must not change
+				if it is online operation */
+				if (dict_index_is_online_ddl(clust_index)
+				    && row_log_col_is_indexed(clust_index,
+							      num_v)) {
+#ifdef UNIV_DEBUG
+					online_ord_part = true;
+#endif
+				} else {
+					num_v++;
+					continue;
+				}
+			}
+
+			if (!uvect->old_vrow) {
+				uvect->old_vrow = dtuple_create_with_vcol(
+					uvect->heap, 0, prebuilt->table->n_v_cols);
+			}
+
+			ulint   max_field_len = DICT_MAX_FIELD_LEN_BY_FORMAT(
+						prebuilt->table);
+
+			/* for virtual columns, we only materialize
+			its index, and index field length would not
+			exceed max_field_len. So continue if the
+			first max_field_len bytes are matched up */
+			if (o_len != UNIV_SQL_NULL
+			   && n_len != UNIV_SQL_NULL
+			   && o_len >= max_field_len
+			   && n_len >= max_field_len
+			   && memcmp(o_ptr, n_ptr, max_field_len) == 0) {
+				dfield_t*	vfield = dtuple_get_nth_v_field(
+					uvect->old_vrow, num_v);
+				buf = innodb_fill_old_vcol_val(
+					prebuilt, vfield, o_len,
+					col, old_mysql_row_col,
+					col_pack_len, buf);
+			       num_v++;
+			       continue;
+			}
+		}
+
 		if (o_len != n_len || (o_len != UNIV_SQL_NULL &&
 					0 != memcmp(o_ptr, n_ptr, o_len))) {
 			/* The field has changed */
@@ -8443,10 +9527,18 @@ calc_row_difference(
 			/* Let us use a dummy dfield to make the conversion
 			from the MySQL column format to the InnoDB format */
 
+
+			/* If the length of new geometry object is 0, means
+			this object is invalid geometry object, we need
+			to block it. */
+			if (DATA_GEOMETRY_MTYPE(col_type)
+			    && o_len != 0 && n_len == 0) {
+				return(DB_CANT_CREATE_GEOMETRY_OBJECT);
+			}
+
 			if (n_len != UNIV_SQL_NULL) {
-				dict_col_copy_type(prebuilt->table->cols +
-                                                   innodb_idx,
-						   dfield_get_type(&dfield));
+				dict_col_copy_type(
+					col, dfield_get_type(&dfield));
 
 				buf = row_mysql_store_col_in_innobase_format(
 					&dfield,
@@ -8457,13 +9549,57 @@ calc_row_difference(
 					dict_table_is_comp(prebuilt->table));
 				dfield_copy(&ufield->new_val, &dfield);
 			} else {
+				dict_col_copy_type(
+					col, dfield_get_type(&ufield->new_val));
 				dfield_set_null(&ufield->new_val);
 			}
 
 			ufield->exp = NULL;
 			ufield->orig_len = 0;
-			ufield->field_no = dict_col_get_clust_pos(
-				&prebuilt->table->cols[innodb_idx], clust_index);
+			if (is_virtual) {
+				dfield_t*	vfield = dtuple_get_nth_v_field(
+					uvect->old_vrow, num_v);
+				upd_fld_set_virtual_col(ufield);
+				ufield->field_no = num_v;
+
+				ut_ad(col->ord_part || online_ord_part);
+				ufield->old_v_val = static_cast<dfield_t*>(
+					mem_heap_alloc(
+						uvect->heap,
+						sizeof *ufield->old_v_val));
+
+				if (!field->is_null_in_record(old_row)) {
+					if (n_len == UNIV_SQL_NULL) {
+						dict_col_copy_type(
+							col, dfield_get_type(
+								&dfield));
+					}
+
+					buf = row_mysql_store_col_in_innobase_format(
+						&dfield,
+						(byte*) buf,
+						TRUE,
+						old_mysql_row_col,
+						col_pack_len,
+						dict_table_is_comp(
+						prebuilt->table));
+					dfield_copy(ufield->old_v_val,
+						    &dfield);
+					dfield_copy(vfield, &dfield);
+				} else {
+					dict_col_copy_type(
+						col, dfield_get_type(
+						ufield->old_v_val));
+					dfield_set_null(ufield->old_v_val);
+					dfield_set_null(vfield);
+				}
+				num_v++;
+			} else {
+				ufield->field_no = dict_col_get_clust_pos(
+					&prebuilt->table->cols[innodb_idx - num_v],
+					clust_index);
+				ufield->old_v_val = NULL;
+			}
 			n_changed++;
 
 			/* If an FTS indexed column was changed by this
@@ -8475,8 +9611,8 @@ calc_row_difference(
 			checking only once here. Later we will need to
 			note which columns have been updated and do
 			selective processing. */
-			if (prebuilt->table->fts != NULL) {
-				ulint           offset;
+			if (prebuilt->table->fts != NULL && !is_virtual) {
+				ulint		offset;
 				dict_table_t*   innodb_table;
 
 				innodb_table = prebuilt->table;
@@ -8496,9 +9632,20 @@ calc_row_difference(
 						innodb_table, ufield);
 				}
 			}
+		} else if (is_virtual) {
+			dfield_t*	vfield = dtuple_get_nth_v_field(
+				uvect->old_vrow, num_v);
+			buf = innodb_fill_old_vcol_val(
+				prebuilt, vfield, o_len,
+				col, old_mysql_row_col,
+				col_pack_len, buf);
+			ut_ad(col->ord_part || online_ord_part);
+			num_v++;
+		}
+
+		if (field->stored_in_db()) {
+			innodb_idx++;
 		}
-                if (field->stored_in_db())
-                  innodb_idx++;
 	}
 
 	/* If the update changes a column with an FTS index on it, we
@@ -8520,37 +9667,31 @@ calc_row_difference(
 			Doc ID must also be updated. Otherwise, return
 			error */
 			if (changes_fts_column && !changes_fts_doc_col) {
-				ut_print_timestamp(stderr);
-				fprintf(stderr, " InnoDB: A new Doc ID"
-					" must be supplied while updating"
-					" FTS indexed columns.\n");
+				ib::warn() << "A new Doc ID must be supplied"
+					" while updating FTS indexed columns.";
 				return(DB_FTS_INVALID_DOCID);
 			}
 
 			/* Doc ID must monotonically increase */
 			ut_ad(innodb_table->fts->cache);
 			if (doc_id < prebuilt->table->fts->cache->next_doc_id) {
-				fprintf(stderr,
-					"InnoDB: FTS Doc ID must be larger than"
-					" " IB_ID_FMT " for table",
-					innodb_table->fts->cache->next_doc_id
-					- 1);
-				ut_print_name(stderr, trx,
-					      TRUE, innodb_table->name);
-				putc('\n', stderr);
+
+				ib::warn() << "FTS Doc ID must be larger than "
+					<< innodb_table->fts->cache->next_doc_id
+					- 1  << " for table "
+					<< innodb_table->name;
 
 				return(DB_FTS_INVALID_DOCID);
 			} else if ((doc_id
 				    - prebuilt->table->fts->cache->next_doc_id)
 				   >= FTS_DOC_ID_MAX_STEP) {
-				fprintf(stderr,
-					"InnoDB: Doc ID " UINT64PF " is too"
+
+				ib::warn() << "Doc ID " << doc_id << " is too"
 					" big. Its difference with largest"
-					" Doc ID used " UINT64PF " cannot"
-					" exceed or equal to %d\n",
-					doc_id,
-					prebuilt->table->fts->cache->next_doc_id - 1,
-					FTS_DOC_ID_MAX_STEP);
+					" Doc ID used " << prebuilt->table->fts
+					->cache->next_doc_id - 1
+					<< " cannot exceed or equal to "
+					<< FTS_DOC_ID_MAX_STEP;
 			}
 
 
@@ -8582,6 +9723,7 @@ calc_row_difference(
 
 	ut_a(buf <= (byte*) original_upd_buff + buff_len);
 
+	ut_ad(uvect->validate());
 	return(DB_SUCCESS);
 }
 
@@ -8606,7 +9748,7 @@ wsrep_calc_row_hash(
 	uint		i;
 
 	void *ctx = alloca(my_md5_context_size());
-        my_md5_init(ctx);
+	my_md5_init(ctx);
 
 	n_fields = table->s->fields;
 
@@ -8667,80 +9809,97 @@ wsrep_calc_row_hash(
 	return(0);
 }
 #endif /* WITH_WSREP */
-/**********************************************************************//**
+
+/*
 Updates a row given as a parameter to a new value. Note that we are given
 whole rows, not just the fields which are updated: this incurs some
 overhead for CPU when we check which fields are actually updated.
 TODO: currently InnoDB does not prevent the 'Halloween problem':
 in a searched update a single row can get updated several times
 if its index columns are updated!
-@return	error number or 0 */
-UNIV_INTERN
+@param[in] old_row	Old row contents in MySQL format
+@param[out] new_row	Updated row contents in MySQL format
+@return error number or 0 */
+
 int
 ha_innobase::update_row(
-/*====================*/
-	const uchar*	old_row,	/*!< in: old row in MySQL format */
-	uchar*		new_row)	/*!< in: new row in MySQL format */
+	const uchar*	old_row,
+	uchar*		new_row)
 {
-	upd_t*		uvect;
+	int		err;
+
 	dberr_t		error;
-	trx_t*		trx = thd_to_trx(user_thd);
+	trx_t*		trx = thd_to_trx(m_user_thd);
 
 	DBUG_ENTER("ha_innobase::update_row");
 
-	ut_a(prebuilt->trx == trx);
+	ut_a(m_prebuilt->trx == trx);
 
-	if (high_level_read_only) {
+	if (high_level_read_only && !dict_table_is_intrinsic(m_prebuilt->table)) {
 		ib_senderrf(ha_thd(), IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE);
 		DBUG_RETURN(HA_ERR_TABLE_READONLY);
 	} else if (!trx_is_started(trx)) {
 		++trx->will_lock;
 	}
 
-	if (upd_buf == NULL) {
-		ut_ad(upd_buf_size == 0);
+	if (m_upd_buf == NULL) {
+		ut_ad(m_upd_buf_size == 0);
 
-                /* Create a buffer for packing the fields of a record. Why
-                  table->stored_rec_length did not work here? Obviously,
-                  because char fields when packed actually became 1 byte
-                  longer, when we also stored the string length as the first
-                  byte. */
+		/* Create a buffer for packing the fields of a record. Why
+		table->stored_rec_length did not work here? Obviously,
+		because char fields when packed actually became 1 byte
+		longer, when we also stored the string length as the first
+		byte. */
 
-		upd_buf_size = table->s->stored_rec_length +
-                  table->s->max_key_length + MAX_REF_PARTS * 3;
-		upd_buf = (uchar*) my_malloc(upd_buf_size, MYF(MY_WME));
-		if (upd_buf == NULL) {
-			upd_buf_size = 0;
+		m_upd_buf_size = table->s->stored_rec_length + table->s->max_key_length
+			+ MAX_REF_PARTS * 3;
+
+		m_upd_buf = reinterpret_cast<uchar*>(
+			my_malloc(
+				m_upd_buf_size,
+				MYF(MY_WME)));
+		/* JAN: TODO: MySQL 5.7: PSI_INSTRUMENT_ME,...*/
+
+		if (m_upd_buf == NULL) {
+			m_upd_buf_size = 0;
 			DBUG_RETURN(HA_ERR_OUT_OF_MEM);
 		}
 	}
 
-	ha_statistic_increment(&SSV::ha_update_count);
+	upd_t*		uvect;
 
-	if (prebuilt->upd_node) {
-		uvect = prebuilt->upd_node->update;
+	if (m_prebuilt->upd_node) {
+		uvect = m_prebuilt->upd_node->update;
 	} else {
-		uvect = row_get_prebuilt_update_vector(prebuilt);
+		uvect = row_get_prebuilt_update_vector(m_prebuilt);
 	}
 
 	/* Build an update vector from the modified fields in the rows
-	(uses upd_buf of the handle) */
+	(uses m_upd_buf of the handle) */
 
-	error = calc_row_difference(uvect, (uchar*) old_row, new_row, table,
-				    upd_buf, upd_buf_size, prebuilt, user_thd);
+	error = calc_row_difference(
+		uvect, old_row, new_row, table, m_upd_buf, m_upd_buf_size,
+		m_prebuilt, m_user_thd);
 
 	if (error != DB_SUCCESS) {
 		goto func_exit;
 	}
 
+	if (!dict_table_is_intrinsic(m_prebuilt->table)
+	    && TrxInInnoDB::is_aborted(trx)) {
+
+		innobase_rollback(ht, m_user_thd, false);
+
+		DBUG_RETURN(convert_error_code_to_mysql(
+			DB_FORCED_ABORT, 0, m_user_thd));
+	}
+
 	/* This is not a delete */
-	prebuilt->upd_node->is_delete = FALSE;
+	m_prebuilt->upd_node->is_delete = FALSE;
 
-	ut_a(prebuilt->template_type == ROW_MYSQL_WHOLE_ROW);
+	innobase_srv_conc_enter_innodb(m_prebuilt);
 
-	innobase_srv_conc_enter_innodb(trx);
-
-	error = row_update_for_mysql((byte*) old_row, prebuilt);
+	error = row_update_for_mysql((byte*) old_row, m_prebuilt);
 
 	/* We need to do some special AUTOINC handling for the following case:
 
@@ -8748,12 +9907,12 @@ ha_innobase::update_row(
 
 	We need to use the AUTOINC counter that was actually used by
 	MySQL in the UPDATE statement, which can be different from the
-	value used in the INSERT statement.*/
+	value used in the INSERT statement. */
 
 	if (error == DB_SUCCESS
 	    && table->next_number_field
 	    && new_row == table->record[0]
-	    && thd_sql_command(user_thd) == SQLCOM_INSERT
+	    && thd_sql_command(m_user_thd) == SQLCOM_INSERT
 	    && trx->duplicates)  {
 
 		ulonglong	auto_inc;
@@ -8763,16 +9922,16 @@ ha_innobase::update_row(
 
 		/* We need the upper limit of the col type to check for
 		whether we update the table autoinc counter or not. */
-		col_max_value = innobase_get_int_col_max_value(
-			table->next_number_field);
+		col_max_value =
+			innobase_get_int_col_max_value(table->next_number_field);
 
 		if (auto_inc <= col_max_value && auto_inc != 0) {
 
 			ulonglong	offset;
 			ulonglong	increment;
 
-			offset = prebuilt->autoinc_offset;
-			increment = prebuilt->autoinc_increment;
+			offset = m_prebuilt->autoinc_offset;
+			increment = m_prebuilt->autoinc_increment;
 
 			auto_inc = innobase_next_autoinc(
 				auto_inc, 1, increment, offset, col_max_value);
@@ -8781,11 +9940,12 @@ ha_innobase::update_row(
 		}
 	}
 
-	innobase_srv_conc_exit_innodb(trx);
+	innobase_srv_conc_exit_innodb(m_prebuilt);
 
 func_exit:
-	int err = convert_error_code_to_mysql(error,
-					    prebuilt->table->flags, user_thd);
+
+	err = convert_error_code_to_mysql(
+		error, m_prebuilt->table->flags, m_user_thd);
 
 	/* If success and no columns were updated. */
 	if (err == 0 && uvect->n_fields == 0) {
@@ -8805,14 +9965,13 @@ func_exit:
 	innobase_active_small();
 
 #ifdef WITH_WSREP
-	if (error == DB_SUCCESS                          &&
-	    wsrep_thd_exec_mode(user_thd) == LOCAL_STATE &&
-	    wsrep_on(user_thd)                           &&
-	    !wsrep_thd_ignore_table(user_thd))
-        {
+	if (error == DB_SUCCESS                            &&
+	    wsrep_thd_exec_mode(m_user_thd) == LOCAL_STATE &&
+	    wsrep_on(m_user_thd)                           &&
+	    !wsrep_thd_ignore_table(m_user_thd)) {
 		DBUG_PRINT("wsrep", ("update row key"));
 
-		if (wsrep_append_keys(user_thd, false, old_row, new_row)) {
+		if (wsrep_append_keys(m_user_thd, false, old_row, new_row)) {
 			WSREP_DEBUG("WSREP: UPDATE_ROW_KEY FAILED");
 			DBUG_PRINT("wsrep", ("row key failed"));
 			err = HA_ERR_INTERNAL_ERROR;
@@ -8828,42 +9987,50 @@ wsrep_error:
 
 /**********************************************************************//**
 Deletes a row given as the parameter.
-@return	error number or 0 */
-UNIV_INTERN
+@return error number or 0 */
+
 int
 ha_innobase::delete_row(
 /*====================*/
 	const uchar*	record)	/*!< in: a row in MySQL format */
 {
 	dberr_t		error;
-	trx_t*		trx = thd_to_trx(user_thd);
+	trx_t*		trx = thd_to_trx(m_user_thd);
+	TrxInInnoDB	trx_in_innodb(trx);
 
 	DBUG_ENTER("ha_innobase::delete_row");
 
-	ut_a(prebuilt->trx == trx);
+	if (!dict_table_is_intrinsic(m_prebuilt->table)
+	    && trx_in_innodb.is_aborted()) {
 
-	if (high_level_read_only) {
+		innobase_rollback(ht, m_user_thd, false);
+
+		DBUG_RETURN(convert_error_code_to_mysql(
+			DB_FORCED_ABORT, 0, m_user_thd));
+	}
+
+	ut_a(m_prebuilt->trx == trx);
+
+	if (high_level_read_only && !dict_table_is_intrinsic(m_prebuilt->table)) {
 		ib_senderrf(ha_thd(), IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE);
 		DBUG_RETURN(HA_ERR_TABLE_READONLY);
 	} else if (!trx_is_started(trx)) {
 		++trx->will_lock;
 	}
 
-	ha_statistic_increment(&SSV::ha_delete_count);
-
-	if (!prebuilt->upd_node) {
-		row_get_prebuilt_update_vector(prebuilt);
+	if (!m_prebuilt->upd_node) {
+		row_get_prebuilt_update_vector(m_prebuilt);
 	}
 
 	/* This is a delete */
 
-	prebuilt->upd_node->is_delete = TRUE;
+	m_prebuilt->upd_node->is_delete = TRUE;
 
-	innobase_srv_conc_enter_innodb(trx);
+	innobase_srv_conc_enter_innodb(m_prebuilt);
 
-	error = row_update_for_mysql((byte*) record, prebuilt);
+	error = row_update_for_mysql((byte*) record, m_prebuilt);
 
-	innobase_srv_conc_exit_innodb(trx);
+	innobase_srv_conc_exit_innodb(m_prebuilt);
 
 	/* Tell the InnoDB server that there might be work for
 	utility threads: */
@@ -8871,12 +10038,11 @@ ha_innobase::delete_row(
 	innobase_active_small();
 
 #ifdef WITH_WSREP
-	if (error == DB_SUCCESS                          &&
-            wsrep_thd_exec_mode(user_thd) == LOCAL_STATE &&
-            wsrep_on(user_thd)                           &&
-            !wsrep_thd_ignore_table(user_thd))
-        {
-		if (wsrep_append_keys(user_thd, false, record, NULL)) {
+	if (error == DB_SUCCESS                            &&
+	    wsrep_thd_exec_mode(m_user_thd) == LOCAL_STATE &&
+	    wsrep_on(m_user_thd)                           &&
+	    !wsrep_thd_ignore_table(m_user_thd)) {
+		if (wsrep_append_keys(m_user_thd, false, record, NULL)) {
 			DBUG_PRINT("wsrep", ("delete fail"));
 			error = (dberr_t) HA_ERR_INTERNAL_ERROR;
 			goto wsrep_error;
@@ -8885,14 +10051,45 @@ ha_innobase::delete_row(
 wsrep_error:
 #endif /* WITH_WSREP */
 	DBUG_RETURN(convert_error_code_to_mysql(
-			    error, prebuilt->table->flags, user_thd));
+			    error, m_prebuilt->table->flags, m_user_thd));
+}
+
+/** Delete all rows from the table.
+@return error number or 0 */
+
+int
+ha_innobase::delete_all_rows()
+{
+	DBUG_ENTER("ha_innobase::delete_all_rows");
+
+	/* Currently enabled only for intrinsic tables. */
+	if (!dict_table_is_intrinsic(m_prebuilt->table)) {
+		DBUG_RETURN(HA_ERR_WRONG_COMMAND);
+	}
+
+	TrxInInnoDB	trx_in_innodb(m_prebuilt->trx);
+
+	if (!dict_table_is_intrinsic(m_prebuilt->table)
+	    && trx_in_innodb.is_aborted()) {
+
+		DBUG_RETURN(innobase_rollback(ht, m_user_thd, false));
+	}
+
+	dberr_t	error = row_delete_all_rows(m_prebuilt->table);
+
+	if (error == DB_SUCCESS) {
+		dict_stats_update(m_prebuilt->table, DICT_STATS_EMPTY_TABLE);
+	}
+
+	DBUG_RETURN(convert_error_code_to_mysql(
+			    error, m_prebuilt->table->flags, m_user_thd));
 }
 
 /**********************************************************************//**
 Removes a new lock set on a row, if it was not read optimistically. This can
 be called after a row has been read in the processing of an UPDATE or a DELETE
 query, if the option innodb_locks_unsafe_for_binlog is set. */
-UNIV_INTERN
+
 void
 ha_innobase::unlock_row(void)
 /*=========================*/
@@ -8900,31 +10097,42 @@ ha_innobase::unlock_row(void)
 	DBUG_ENTER("ha_innobase::unlock_row");
 
 	/* Consistent read does not take any locks, thus there is
-	nothing to unlock. */
+	nothing to unlock.  There is no locking for intrinsic table. */
 
-	if (prebuilt->select_lock_type == LOCK_NONE) {
+	if (m_prebuilt->select_lock_type == LOCK_NONE
+	    || dict_table_is_intrinsic(m_prebuilt->table)) {
 		DBUG_VOID_RETURN;
 	}
 
+	TrxInInnoDB	trx_in_innodb(m_prebuilt->trx);
+
+	if (trx_in_innodb.is_aborted()) {
+		DBUG_VOID_RETURN;
+	}
+
+	ut_ad(!dict_table_is_intrinsic(m_prebuilt->table));
+
 	/* Ideally, this assert must be in the beginning of the function.
 	But there are some calls to this function from the SQL layer when the
 	transaction is in state TRX_STATE_NOT_STARTED.  The check on
-	prebuilt->select_lock_type above gets around this issue. */
-	ut_ad(trx_state_eq(prebuilt->trx, TRX_STATE_ACTIVE));
+	m_prebuilt->select_lock_type above gets around this issue. */
 
-	switch (prebuilt->row_read_type) {
+	ut_ad(trx_state_eq(m_prebuilt->trx, TRX_STATE_ACTIVE)
+	      || trx_state_eq(m_prebuilt->trx, TRX_STATE_FORCED_ROLLBACK));
+
+	switch (m_prebuilt->row_read_type) {
 	case ROW_READ_WITH_LOCKS:
 		if (!srv_locks_unsafe_for_binlog
-		    && prebuilt->trx->isolation_level
+		    && m_prebuilt->trx->isolation_level
 		    > TRX_ISO_READ_COMMITTED) {
 			break;
 		}
 		/* fall through */
 	case ROW_READ_TRY_SEMI_CONSISTENT:
-		row_unlock_for_mysql(prebuilt, FALSE);
+		row_unlock_for_mysql(m_prebuilt, FALSE);
 		break;
 	case ROW_READ_DID_SEMI_CONSISTENT:
-		prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
+		m_prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
 		break;
 	}
 
@@ -8932,21 +10140,21 @@ ha_innobase::unlock_row(void)
 }
 
 /* See handler.h and row0mysql.h for docs on this function. */
-UNIV_INTERN
+
 bool
 ha_innobase::was_semi_consistent_read(void)
 /*=======================================*/
 {
-	return(prebuilt->row_read_type == ROW_READ_DID_SEMI_CONSISTENT);
+	return(m_prebuilt->row_read_type == ROW_READ_DID_SEMI_CONSISTENT);
 }
 
 /* See handler.h and row0mysql.h for docs on this function. */
-UNIV_INTERN
+
 void
 ha_innobase::try_semi_consistent_read(bool yes)
 /*===========================================*/
 {
-	ut_a(prebuilt->trx == thd_to_trx(ha_thd()));
+	ut_a(m_prebuilt->trx == thd_to_trx(ha_thd()));
 
 	/* Row read type is set to semi consistent read if this was
 	requested by the MySQL and either innodb_locks_unsafe_for_binlog
@@ -8955,22 +10163,26 @@ ha_innobase::try_semi_consistent_read(bool yes)
 
 	if (yes
 	    && (srv_locks_unsafe_for_binlog
-		|| prebuilt->trx->isolation_level <= TRX_ISO_READ_COMMITTED)) {
-		prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
+		|| m_prebuilt->trx->isolation_level
+		<= TRX_ISO_READ_COMMITTED)) {
+
+		m_prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
+
 	} else {
-		prebuilt->row_read_type = ROW_READ_WITH_LOCKS;
+		m_prebuilt->row_read_type = ROW_READ_WITH_LOCKS;
 	}
 }
 
 /******************************************************************//**
 Initializes a handle to use an index.
-@return	0 or error number */
-UNIV_INTERN
+@return 0 or error number */
+
 int
 ha_innobase::index_init(
 /*====================*/
-	uint	keynr,	/*!< in: key (index) number */
-	bool sorted)	/*!< in: 1 if result MUST be sorted according to index */
+	uint		keynr,	/*!< in: key (index) number */
+	bool		sorted)	/*!< in: 1 if result MUST be sorted
+				according to index */
 {
 	DBUG_ENTER("index_init");
 
@@ -8979,65 +10191,57 @@ ha_innobase::index_init(
 
 /******************************************************************//**
 Currently does nothing.
-@return	0 */
-UNIV_INTERN
+@return 0 */
+
 int
 ha_innobase::index_end(void)
 /*========================*/
 {
-	int	error	= 0;
 	DBUG_ENTER("index_end");
+
+	m_prebuilt->index->last_sel_cur->release();
+
 	active_index = MAX_KEY;
+
 	in_range_check_pushed_down = FALSE;
-	ds_mrr.dsmrr_close();
-	DBUG_RETURN(error);
+
+	m_ds_mrr.dsmrr_close();
+
+	DBUG_RETURN(0);
 }
 
 /*********************************************************************//**
 Converts a search mode flag understood by MySQL to a flag understood
 by InnoDB. */
-static inline
-ulint
+page_cur_mode_t
 convert_search_mode_to_innobase(
 /*============================*/
-	enum ha_rkey_function	find_flag)
+	ha_rkey_function	find_flag)
 {
 	switch (find_flag) {
 	case HA_READ_KEY_EXACT:
 		/* this does not require the index to be UNIQUE */
-		return(PAGE_CUR_GE);
 	case HA_READ_KEY_OR_NEXT:
 		return(PAGE_CUR_GE);
-	case HA_READ_KEY_OR_PREV:
-		return(PAGE_CUR_LE);
 	case HA_READ_AFTER_KEY:
 		return(PAGE_CUR_G);
 	case HA_READ_BEFORE_KEY:
 		return(PAGE_CUR_L);
-	case HA_READ_PREFIX:
-		return(PAGE_CUR_GE);
+	case HA_READ_KEY_OR_PREV:
 	case HA_READ_PREFIX_LAST:
-		return(PAGE_CUR_LE);
 	case HA_READ_PREFIX_LAST_OR_PREV:
 		return(PAGE_CUR_LE);
-		/* In MySQL-4.0 HA_READ_PREFIX and HA_READ_PREFIX_LAST always
-		pass a complete-field prefix of a key value as the search
-		tuple. I.e., it is not allowed that the last field would
-		just contain n first bytes of the full field value.
-		MySQL uses a 'padding' trick to convert LIKE 'abc%'
-		type queries so that it can use as a search tuple
-		a complete-field-prefix of a key value. Thus, the InnoDB
-		search mode PAGE_CUR_LE_OR_EXTENDS is never used.
-		TODO: when/if MySQL starts to use also partial-field
-		prefixes, we have to deal with stripping of spaces
-		and comparison of non-latin1 char type fields in
-		innobase_mysql_cmp() to get PAGE_CUR_LE_OR_EXTENDS to
-		work correctly. */
 	case HA_READ_MBR_CONTAIN:
+		return(PAGE_CUR_CONTAIN);
 	case HA_READ_MBR_INTERSECT:
+		return(PAGE_CUR_INTERSECT);
 	case HA_READ_MBR_WITHIN:
+		return(PAGE_CUR_WITHIN);
 	case HA_READ_MBR_DISJOINT:
+		return(PAGE_CUR_DISJOINT);
 	case HA_READ_MBR_EQUAL:
+		return(PAGE_CUR_MBR_EQUAL);
+	case HA_READ_PREFIX:
 		return(PAGE_CUR_UNSUPP);
 	/* do not use "default:" in order to produce a gcc warning:
 	enumeration value '...' not handled in switch
@@ -9057,22 +10261,22 @@ the start of a new SQL statement, and what is associated with it.
 
 For each table in the database the MySQL interpreter may have several
 table handle instances in use, also in a single SQL query. For each table
-handle instance there is an InnoDB  'prebuilt' struct which contains most
+handle instance there is an InnoDB  'm_prebuilt' struct which contains most
 of the InnoDB data associated with this table handle instance.
 
   A) if the user has not explicitly set any MySQL table level locks:
 
   1) MySQL calls ::external_lock to set an 'intention' table level lock on
 the table of the handle instance. There we set
-prebuilt->sql_stat_start = TRUE. The flag sql_stat_start should be set
+m_prebuilt->sql_stat_start = TRUE. The flag sql_stat_start should be set
 true if we are taking this table handle instance to use in a new SQL
 statement issued by the user. We also increment trx->n_mysql_tables_in_use.
 
-  2) If prebuilt->sql_stat_start == TRUE we 'pre-compile' the MySQL search
-instructions to prebuilt->template of the table handle instance in
+  2) If m_prebuilt->sql_stat_start == TRUE we 'pre-compile' the MySQL search
+instructions to m_prebuilt->template of the table handle instance in
 ::index_read. The template is used to save CPU time in large joins.
 
-  3) In row_search_for_mysql, if prebuilt->sql_stat_start is true, we
+  3) In row_search_for_mysql, if m_prebuilt->sql_stat_start is true, we
 allocate a new consistent read view for the trx if it does not yet have one,
 or in the case of a locking read, set an InnoDB 'intention' table level
 lock on the table.
@@ -9101,8 +10305,8 @@ start of a new SQL statement. */
 /**********************************************************************//**
 Positions an index cursor to the index specified in the handle. Fetches the
 row if any.
-@return	0, HA_ERR_KEY_NOT_FOUND, or error number */
-UNIV_INTERN
+@return 0, HA_ERR_KEY_NOT_FOUND, or error number */
+
 int
 ha_innobase::index_read(
 /*====================*/
@@ -9120,27 +10324,20 @@ ha_innobase::index_read(
 	uint			key_len,/*!< in: key value length */
 	enum ha_rkey_function find_flag)/*!< in: search flags from my_base.h */
 {
-	ulint		mode;
-	dict_index_t*	index;
-	ulint		match_mode	= 0;
-	int		error;
-	dberr_t		ret;
-
 	DBUG_ENTER("index_read");
 	DEBUG_SYNC_C("ha_innobase_index_read_begin");
 
-	ut_a(prebuilt->trx == thd_to_trx(user_thd));
+	ut_a(m_prebuilt->trx == thd_to_trx(m_user_thd));
 	ut_ad(key_len != 0 || find_flag != HA_READ_KEY_EXACT);
 
-	ha_statistic_increment(&SSV::ha_read_key_count);
+	dict_index_t*	index = m_prebuilt->index;
 
-	index = prebuilt->index;
-
-	if (UNIV_UNLIKELY(index == NULL) || dict_index_is_corrupted(index)) {
-		prebuilt->index_usable = FALSE;
+	if (index == NULL || dict_index_is_corrupted(index)) {
+		m_prebuilt->index_usable = FALSE;
 		DBUG_RETURN(HA_ERR_CRASHED);
 	}
-	if (UNIV_UNLIKELY(!prebuilt->index_usable)) {
+
+	if (!m_prebuilt->index_usable) {
 		DBUG_RETURN(dict_index_is_corrupted(index)
 			    ? HA_ERR_INDEX_CORRUPT
 			    : HA_ERR_TABLE_DEF_CHANGED);
@@ -9150,105 +10347,142 @@ ha_innobase::index_read(
 		DBUG_RETURN(HA_ERR_KEY_NOT_FOUND);
 	}
 
-	/* Note that if the index for which the search template is built is not
-	necessarily prebuilt->index, but can also be the clustered index */
+	/* For R-Tree index, we will always place the page lock to
+	pages being searched */
+	if (dict_index_is_spatial(index)) {
+		++m_prebuilt->trx->will_lock;
+	}
 
-	if (prebuilt->sql_stat_start) {
+	/* Note that if the index for which the search template is built is not
+	necessarily m_prebuilt->index, but can also be the clustered index */
+
+	if (m_prebuilt->sql_stat_start) {
 		build_template(false);
 	}
 
-	if (key_ptr) {
+	if (key_ptr != NULL) {
 		/* Convert the search key value to InnoDB format into
-		prebuilt->search_tuple */
+		m_prebuilt->search_tuple */
 
 		row_sel_convert_mysql_key_to_innobase(
-			prebuilt->search_tuple,
-			prebuilt->srch_key_val1,
-			prebuilt->srch_key_val_len,
+			m_prebuilt->search_tuple,
+			m_prebuilt->srch_key_val1,
+			m_prebuilt->srch_key_val_len,
 			index,
 			(byte*) key_ptr,
 			(ulint) key_len,
-			prebuilt->trx);
-		DBUG_ASSERT(prebuilt->search_tuple->n_fields > 0);
+			m_prebuilt->trx);
+
+		DBUG_ASSERT(m_prebuilt->search_tuple->n_fields > 0);
 	} else {
 		/* We position the cursor to the last or the first entry
 		in the index */
 
-		dtuple_set_n_fields(prebuilt->search_tuple, 0);
+		dtuple_set_n_fields(m_prebuilt->search_tuple, 0);
 	}
 
-	mode = convert_search_mode_to_innobase(find_flag);
+	page_cur_mode_t	mode = convert_search_mode_to_innobase(find_flag);
 
-	match_mode = 0;
+	ulint	match_mode = 0;
 
 	if (find_flag == HA_READ_KEY_EXACT) {
 
 		match_mode = ROW_SEL_EXACT;
 
-	} else if (find_flag == HA_READ_PREFIX
-		   || find_flag == HA_READ_PREFIX_LAST) {
+	} else if (find_flag == HA_READ_PREFIX_LAST) {
 
 		match_mode = ROW_SEL_EXACT_PREFIX;
 	}
 
-	last_match_mode = (uint) match_mode;
+	m_last_match_mode = (uint) match_mode;
+
+	dberr_t		ret;
 
 	if (mode != PAGE_CUR_UNSUPP) {
 
-		innobase_srv_conc_enter_innodb(prebuilt->trx);
+		innobase_srv_conc_enter_innodb(m_prebuilt);
 
-		ret = row_search_for_mysql((byte*) buf, mode, prebuilt,
-					   match_mode, 0);
+		if (!dict_table_is_intrinsic(m_prebuilt->table)) {
 
-		innobase_srv_conc_exit_innodb(prebuilt->trx);
+			if (TrxInInnoDB::is_aborted(m_prebuilt->trx)) {
+
+				innobase_rollback(ht, m_user_thd, false);
+
+				DBUG_RETURN(convert_error_code_to_mysql(
+					DB_FORCED_ABORT, 0, m_user_thd));
+			}
+
+			m_prebuilt->ins_sel_stmt = thd_is_ins_sel_stmt(
+				m_user_thd);
+
+			ret = row_search_mvcc(
+				buf, mode, m_prebuilt, match_mode, 0);
+
+		} else {
+			m_prebuilt->session = thd_to_innodb_session(m_user_thd);
+
+			ret = row_search_no_mvcc(
+				buf, mode, m_prebuilt, match_mode, 0);
+		}
+
+		innobase_srv_conc_exit_innodb(m_prebuilt);
 	} else {
 
 		ret = DB_UNSUPPORTED;
 	}
 
+	DBUG_EXECUTE_IF("ib_select_query_failure", ret = DB_ERROR;);
+
+	int	error;
+
 	switch (ret) {
 	case DB_SUCCESS:
 		error = 0;
 		table->status = 0;
-		if (prebuilt->table->is_system_db) {
+		if (m_prebuilt->table->is_system_db) {
 			srv_stats.n_system_rows_read.add(
-				(size_t) prebuilt->trx->id, 1);
+				(size_t) m_prebuilt->trx->id, 1);
 		} else {
 			srv_stats.n_rows_read.add(
-				(size_t) prebuilt->trx->id, 1);
+				(size_t) m_prebuilt->trx->id, 1);
 		}
 		break;
+
 	case DB_RECORD_NOT_FOUND:
 		error = HA_ERR_KEY_NOT_FOUND;
 		table->status = STATUS_NOT_FOUND;
 		break;
+
 	case DB_END_OF_INDEX:
 		error = HA_ERR_KEY_NOT_FOUND;
 		table->status = STATUS_NOT_FOUND;
 		break;
-	case DB_TABLESPACE_DELETED:
 
+	case DB_TABLESPACE_DELETED:
 		ib_senderrf(
-			prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+			m_prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
 			ER_TABLESPACE_DISCARDED,
 			table->s->table_name.str);
 
 		table->status = STATUS_NOT_FOUND;
 		error = HA_ERR_NO_SUCH_TABLE;
 		break;
+
 	case DB_TABLESPACE_NOT_FOUND:
 
 		ib_senderrf(
-			prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
-			ER_TABLESPACE_MISSING, MYF(0),
+			m_prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+			ER_TABLESPACE_MISSING,
 			table->s->table_name.str);
 
 		table->status = STATUS_NOT_FOUND;
-		error = HA_ERR_NO_SUCH_TABLE;
+		//error = HA_ERR_TABLESPACE_MISSING;
+		error =  HA_ERR_NO_SUCH_TABLE;
 		break;
+
 	default:
 		error = convert_error_code_to_mysql(
-			ret, prebuilt->table->flags, user_thd);
+			ret, m_prebuilt->table->flags, m_user_thd);
 
 		table->status = STATUS_NOT_FOUND;
 		break;
@@ -9260,8 +10494,8 @@ ha_innobase::index_read(
 /*******************************************************************//**
 The following functions works like index_read, but it find the last
 row with the current key value or prefix.
-@return	0, HA_ERR_KEY_NOT_FOUND, or an error code */
-UNIV_INTERN
+@return 0, HA_ERR_KEY_NOT_FOUND, or an error code */
+
 int
 ha_innobase::index_read_last(
 /*=========================*/
@@ -9276,8 +10510,8 @@ ha_innobase::index_read_last(
 
 /********************************************************************//**
 Get the index for a handle. Does not change active index.
-@return	NULL or index instance. */
-UNIV_INTERN
+@return NULL or index instance. */
+
 dict_index_t*
 ha_innobase::innobase_get_index(
 /*============================*/
@@ -9285,35 +10519,33 @@ ha_innobase::innobase_get_index(
 				clustered index, even if it was internally
 				generated by InnoDB */
 {
-	KEY*		key = 0;
-	dict_index_t*	index = 0;
+	KEY*		key;
+	dict_index_t*	index;
 
 	DBUG_ENTER("innobase_get_index");
 
 	if (keynr != MAX_KEY && table->s->keys > 0) {
+
 		key = table->key_info + keynr;
 
-		index = innobase_index_lookup(share, keynr);
+		index = innobase_index_lookup(m_share, keynr);
 
-		if (index) {
+		if (index != NULL) {
 			if (!key || ut_strcmp(index->name, key->name) != 0) {
-				fprintf(stderr, "InnoDB: [Error] Index for key no %u"
-					" mysql name %s , InnoDB name %s for table %s\n",
-					keynr, key ? key->name : "NULL",
-					index->name,
-					prebuilt->table->name);
+				ib::error() << " Index for key no " << keynr
+					    << " mysql name " << (key ? key->name : "NULL")
+					    << " InnoDB name " << index->name()
+					    << " for table " << m_prebuilt->table->name.m_name;
 
 				for(ulint i=0; i < table->s->keys; i++) {
-					index = innobase_index_lookup(share, i);
+					index = innobase_index_lookup(m_share, i);
 					key = table->key_info + keynr;
 
 					if (index) {
-
-						fprintf(stderr, "InnoDB: [Note] Index for key no %u"
-							" mysql name %s , InnoDB name %s for table %s\n",
-							keynr, key ? key->name : "NULL",
-							index->name,
-							prebuilt->table->name);
+						ib::info() << " Index for key no " << keynr
+							   << " mysql name " << (key ? key->name : "NULL")
+							   << " InnoDB name " << index->name()
+							   << " for table " << m_prebuilt->table->name.m_name;
 					}
 				}
 
@@ -9324,29 +10556,31 @@ ha_innobase::innobase_get_index(
 			/* Can't find index with keynr in the translation
 			table. Only print message if the index translation
 			table exists */
-			if (share->idx_trans_tbl.index_mapping) {
-				sql_print_warning("InnoDB could not find "
-						  "index %s key no %u for "
-						  "table %s through its "
-						  "index translation table",
+			if (m_share->idx_trans_tbl.index_mapping != NULL) {
+				sql_print_warning("InnoDB could not find"
+						  " index %s key no %u for"
+						  " table %s through its"
+						  " index translation table",
 						  key ? key->name : "NULL",
 						  keynr,
-						  prebuilt->table->name);
+						  m_prebuilt->table->name
+						  .m_name);
 			}
 
-			index = dict_table_get_index_on_name(prebuilt->table,
-							     key->name);
+			index = dict_table_get_index_on_name(
+				m_prebuilt->table, key->name);
 		}
 	} else {
-		index = dict_table_get_first_index(prebuilt->table);
+		key = 0;
+		index = dict_table_get_first_index(m_prebuilt->table);
 	}
 
-	if (!index) {
+	if (index == NULL) {
 		sql_print_error(
-			"Innodb could not find key n:o %u with name %s "
-			"from dict cache for table %s",
+			"InnoDB could not find key no %u with name %s"
+			" from dict cache for table %s",
 			keynr, key ? key->name : "NULL",
-			prebuilt->table->name);
+			m_prebuilt->table->name.m_name);
 	}
 
 	DBUG_RETURN(index);
@@ -9354,8 +10588,8 @@ ha_innobase::innobase_get_index(
 
 /********************************************************************//**
 Changes the active index of a handle.
-@return	0 or error code */
-UNIV_INTERN
+@return 0 or error code */
+
 int
 ha_innobase::change_active_index(
 /*=============================*/
@@ -9365,46 +10599,63 @@ ha_innobase::change_active_index(
 {
 	DBUG_ENTER("change_active_index");
 
-	ut_ad(user_thd == ha_thd());
-	ut_a(prebuilt->trx == thd_to_trx(user_thd));
+	ut_ad(m_user_thd == ha_thd());
+	ut_a(m_prebuilt->trx == thd_to_trx(m_user_thd));
+
+	TrxInInnoDB	trx_in_innodb(m_prebuilt->trx);
+
+	if (!dict_table_is_intrinsic(m_prebuilt->table)
+	    && trx_in_innodb.is_aborted()) {
+
+		innobase_rollback(ht, m_user_thd, false);
+
+		DBUG_RETURN(convert_error_code_to_mysql(
+			DB_FORCED_ABORT, 0,  m_user_thd));
+	}
 
 	active_index = keynr;
 
-	prebuilt->index = innobase_get_index(keynr);
+	m_prebuilt->index = innobase_get_index(keynr);
 
-	if (UNIV_UNLIKELY(!prebuilt->index)) {
+	if (m_prebuilt->index == NULL) {
 		sql_print_warning("InnoDB: change_active_index(%u) failed",
 				  keynr);
-		prebuilt->index_usable = FALSE;
+		m_prebuilt->index_usable = FALSE;
 		DBUG_RETURN(1);
 	}
 
-	prebuilt->index_usable = row_merge_is_index_usable(prebuilt->trx,
-							   prebuilt->index);
+	m_prebuilt->index_usable = row_merge_is_index_usable(
+		m_prebuilt->trx, m_prebuilt->index);
 
-	if (UNIV_UNLIKELY(!prebuilt->index_usable)) {
-		if (dict_index_is_corrupted(prebuilt->index)) {
-			char index_name[MAX_FULL_NAME_LEN + 1];
-			char table_name[MAX_FULL_NAME_LEN + 1];
-
-			innobase_format_name(
-				index_name, sizeof index_name,
-				prebuilt->index->name, TRUE);
+	if (!m_prebuilt->index_usable) {
+		if (dict_index_is_corrupted(m_prebuilt->index)) {
+			char	table_name[MAX_FULL_NAME_LEN + 1];
 
 			innobase_format_name(
 				table_name, sizeof table_name,
-				prebuilt->index->table->name, FALSE);
+				m_prebuilt->index->table->name.m_name);
 
-			push_warning_printf(
-				user_thd, Sql_condition::WARN_LEVEL_WARN,
-				HA_ERR_INDEX_CORRUPT,
-				"InnoDB: Index %s for table %s is"
-				" marked as corrupted",
-				index_name, table_name);
-			DBUG_RETURN(HA_ERR_INDEX_CORRUPT);
+			if (dict_index_is_clust(m_prebuilt->index)) {
+				ut_ad(m_prebuilt->index->table->corrupted);
+				push_warning_printf(
+					m_user_thd, Sql_condition::WARN_LEVEL_WARN,
+					ER_TABLE_CORRUPT,
+					"InnoDB: Table %s is corrupted.",
+					table_name);
+				DBUG_RETURN(ER_TABLE_CORRUPT);
+			} else {
+				push_warning_printf(
+					m_user_thd, Sql_condition::WARN_LEVEL_WARN,
+					HA_ERR_INDEX_CORRUPT,
+					"InnoDB: Index %s for table %s is"
+					" marked as corrupted",
+					m_prebuilt->index->name(),
+					table_name);
+				DBUG_RETURN(HA_ERR_INDEX_CORRUPT);
+			}
 		} else {
 			push_warning_printf(
-				user_thd, Sql_condition::WARN_LEVEL_WARN,
+				m_user_thd, Sql_condition::WARN_LEVEL_WARN,
 				HA_ERR_TABLE_DEF_CHANGED,
 				"InnoDB: insufficient history for index %u",
 				keynr);
@@ -9413,15 +10664,41 @@ ha_innobase::change_active_index(
 		/* The caller seems to ignore this.  Thus, we must check
 		this again in row_search_for_mysql(). */
 		DBUG_RETURN(convert_error_code_to_mysql(DB_MISSING_HISTORY,
-                                                        0, NULL));
+				0, NULL));
 	}
 
-	ut_a(prebuilt->search_tuple != 0);
+	ut_a(m_prebuilt->search_tuple != 0);
 
-	dtuple_set_n_fields(prebuilt->search_tuple, prebuilt->index->n_fields);
+	/* Initialization of search_tuple is not needed for FT index
+	since FT search returns rank only. In addition engine should
+	be able to retrieve FTS_DOC_ID column value if necessary. */
+	if ((m_prebuilt->index->type & DICT_FTS)) {
+#ifdef MYSQL_STORE_FTS_DOC_ID
+		if (table->fts_doc_id_field
+		    && bitmap_is_set(table->read_set,
+				     table->fts_doc_id_field->field_index
+				     && m_prebuilt->read_just_key)) {
+			m_prebuilt->fts_doc_id_in_read_set = 1;
+		}
+#endif
+	} else {
+		dtuple_set_n_fields(m_prebuilt->search_tuple,
+				    m_prebuilt->index->n_fields);
 
-	dict_index_copy_types(prebuilt->search_tuple, prebuilt->index,
-			      prebuilt->index->n_fields);
+		dict_index_copy_types(
+			m_prebuilt->search_tuple, m_prebuilt->index,
+			m_prebuilt->index->n_fields);
+
+		/* If it's FTS query and FTS_DOC_ID exists FTS_DOC_ID field is
+		always added to read_set. */
+
+#ifdef MYSQL_STORE_FTS_DOC_ID
+		m_prebuilt->fts_doc_id_in_read_set =
+			(m_prebuilt->read_just_key && table->fts_doc_id_field
+			 && m_prebuilt->in_fts_query);
+#endif
+
+	}
 
 	/* MySQL changes the active index for a handle also during some
 	queries, for example SELECT MAX(a), SUM(a) first retrieves the MAX()
@@ -9434,37 +10711,11 @@ ha_innobase::change_active_index(
 	DBUG_RETURN(0);
 }
 
-/**********************************************************************//**
-Positions an index cursor to the index specified in keynr. Fetches the
-row if any.
-??? This is only used to read whole keys ???
-@return	error number or 0 */
-UNIV_INTERN
-int
-ha_innobase::index_read_idx(
-/*========================*/
-	uchar*		buf,		/*!< in/out: buffer for the returned
-					row */
-	uint		keynr,		/*!< in: use this index */
-	const uchar*	key,		/*!< in: key value; if this is NULL
-					we position the cursor at the
-					start or end of index */
-	uint		key_len,	/*!< in: key value length */
-	enum ha_rkey_function find_flag)/*!< in: search flags from my_base.h */
-{
-	if (change_active_index(keynr)) {
-
-		return(1);
-	}
-
-	return(index_read(buf, key, key_len, find_flag));
-}
-
 /***********************************************************************//**
 Reads the next or previous row from a cursor, which must have previously been
 positioned using index_read.
-@return	0, HA_ERR_END_OF_FILE, or error number */
-UNIV_INTERN
+@return 0, HA_ERR_END_OF_FILE, or error number */
+
 int
 ha_innobase::general_fetch(
 /*=======================*/
@@ -9474,35 +10725,51 @@ ha_innobase::general_fetch(
 	uint	match_mode)	/*!< in: 0, ROW_SEL_EXACT, or
 				ROW_SEL_EXACT_PREFIX */
 {
-	dberr_t	ret;
-	int	error;
-
 	DBUG_ENTER("general_fetch");
 
-	/* If transaction is not startted do not continue, instead return a error code. */
-	if(!(prebuilt->sql_stat_start || (prebuilt->trx && prebuilt->trx->state == 1))) {
-		DBUG_RETURN(HA_ERR_END_OF_FILE);
+	const trx_t*	trx = m_prebuilt->trx;
+	dberr_t	ret;
+
+	ut_ad(trx == thd_to_trx(m_user_thd));
+
+	bool	intrinsic = dict_table_is_intrinsic(m_prebuilt->table);
+
+	if (!intrinsic && TrxInInnoDB::is_aborted(trx)) {
+
+		innobase_rollback(ht, m_user_thd, false);
+
+		DBUG_RETURN(convert_error_code_to_mysql(
+			DB_FORCED_ABORT, 0,  m_user_thd));
 	}
 
-	ut_a(prebuilt->trx == thd_to_trx(user_thd));
+	innobase_srv_conc_enter_innodb(m_prebuilt);
 
-	innobase_srv_conc_enter_innodb(prebuilt->trx);
+	if (!intrinsic) {
 
-	ret = row_search_for_mysql(
-		(byte*) buf, 0, prebuilt, match_mode, direction);
+		ret = row_search_mvcc(
+			buf, PAGE_CUR_UNSUPP, m_prebuilt, match_mode,
+			direction);
 
-	innobase_srv_conc_exit_innodb(prebuilt->trx);
+	} else {
+		ret = row_search_no_mvcc(
+			buf, PAGE_CUR_UNSUPP, m_prebuilt, match_mode,
+			direction);
+	}
+
+	innobase_srv_conc_exit_innodb(m_prebuilt);
+
+	int	error;
 
 	switch (ret) {
 	case DB_SUCCESS:
 		error = 0;
 		table->status = 0;
-		if (prebuilt->table->is_system_db) {
+		if (m_prebuilt->table->is_system_db) {
 			srv_stats.n_system_rows_read.add(
-				(size_t) prebuilt->trx->id, 1);
+				(size_t) m_prebuilt->trx->id, 1);
 		} else {
 			srv_stats.n_rows_read.add(
-				(size_t) prebuilt->trx->id, 1);
+				(size_t) m_prebuilt->trx->id, 1);
 		}
 		break;
 	case DB_RECORD_NOT_FOUND:
@@ -9514,9 +10781,8 @@ ha_innobase::general_fetch(
 		table->status = STATUS_NOT_FOUND;
 		break;
 	case DB_TABLESPACE_DELETED:
-
 		ib_senderrf(
-			prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+			trx->mysql_thd, IB_LOG_LEVEL_ERROR,
 			ER_TABLESPACE_DISCARDED,
 			table->s->table_name.str);
 
@@ -9526,16 +10792,16 @@ ha_innobase::general_fetch(
 	case DB_TABLESPACE_NOT_FOUND:
 
 		ib_senderrf(
-			prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+			trx->mysql_thd, IB_LOG_LEVEL_ERROR,
 			ER_TABLESPACE_MISSING,
 			table->s->table_name.str);
 
 		table->status = STATUS_NOT_FOUND;
-		error = HA_ERR_NO_SUCH_TABLE;
+		error = HA_ERR_TABLESPACE_MISSING;
 		break;
 	default:
 		error = convert_error_code_to_mysql(
-			ret, prebuilt->table->flags, user_thd);
+			ret, m_prebuilt->table->flags, m_user_thd);
 
 		table->status = STATUS_NOT_FOUND;
 		break;
@@ -9547,23 +10813,21 @@ ha_innobase::general_fetch(
 /***********************************************************************//**
 Reads the next row from a cursor, which must have previously been
 positioned using index_read.
-@return	0, HA_ERR_END_OF_FILE, or error number */
-UNIV_INTERN
+@return 0, HA_ERR_END_OF_FILE, or error number */
+
 int
 ha_innobase::index_next(
 /*====================*/
 	uchar*		buf)	/*!< in/out: buffer for next row in MySQL
 				format */
 {
-	ha_statistic_increment(&SSV::ha_read_next_count);
-
 	return(general_fetch(buf, ROW_SEL_NEXT, 0));
 }
 
 /*******************************************************************//**
 Reads the next row matching to the key value given as the parameter.
-@return	0, HA_ERR_END_OF_FILE, or error number */
-UNIV_INTERN
+@return 0, HA_ERR_END_OF_FILE, or error number */
+
 int
 ha_innobase::index_next_same(
 /*=========================*/
@@ -9571,42 +10835,35 @@ ha_innobase::index_next_same(
 	const uchar*	key,	/*!< in: key value */
 	uint		keylen)	/*!< in: key value length */
 {
-	ha_statistic_increment(&SSV::ha_read_next_count);
-
-	return(general_fetch(buf, ROW_SEL_NEXT, last_match_mode));
+	return(general_fetch(buf, ROW_SEL_NEXT, m_last_match_mode));
 }
 
 /***********************************************************************//**
 Reads the previous row from a cursor, which must have previously been
 positioned using index_read.
-@return	0, HA_ERR_END_OF_FILE, or error number */
-UNIV_INTERN
+@return 0, HA_ERR_END_OF_FILE, or error number */
+
 int
 ha_innobase::index_prev(
 /*====================*/
 	uchar*	buf)	/*!< in/out: buffer for previous row in MySQL format */
 {
-	ha_statistic_increment(&SSV::ha_read_prev_count);
-
 	return(general_fetch(buf, ROW_SEL_PREV, 0));
 }
 
 /********************************************************************//**
 Positions a cursor on the first record in an index and reads the
 corresponding row to buf.
-@return	0, HA_ERR_END_OF_FILE, or error code */
-UNIV_INTERN
+@return 0, HA_ERR_END_OF_FILE, or error code */
+
 int
 ha_innobase::index_first(
 /*=====================*/
 	uchar*	buf)	/*!< in/out: buffer for the row */
 {
-	int	error;
-
 	DBUG_ENTER("index_first");
-	ha_statistic_increment(&SSV::ha_read_first_count);
 
-	error = index_read(buf, NULL, 0, HA_READ_AFTER_KEY);
+	int	error = index_read(buf, NULL, 0, HA_READ_AFTER_KEY);
 
 	/* MySQL does not seem to allow this to return HA_ERR_KEY_NOT_FOUND */
 
@@ -9620,19 +10877,16 @@ ha_innobase::index_first(
 /********************************************************************//**
 Positions a cursor on the last record in an index and reads the
 corresponding row to buf.
-@return	0, HA_ERR_END_OF_FILE, or error code */
-UNIV_INTERN
+@return 0, HA_ERR_END_OF_FILE, or error code */
+
 int
 ha_innobase::index_last(
 /*====================*/
 	uchar*	buf)	/*!< in/out: buffer for the row */
 {
-	int	error;
-
 	DBUG_ENTER("index_last");
-	ha_statistic_increment(&SSV::ha_read_last_count);
 
-	error = index_read(buf, NULL, 0, HA_READ_BEFORE_KEY);
+	int	error = index_read(buf, NULL, 0, HA_READ_BEFORE_KEY);
 
 	/* MySQL does not seem to allow this to return HA_ERR_KEY_NOT_FOUND */
 
@@ -9645,22 +10899,30 @@ ha_innobase::index_last(
 
 /****************************************************************//**
 Initialize a table scan.
-@return	0 or error number */
-UNIV_INTERN
+@return 0 or error number */
+
 int
 ha_innobase::rnd_init(
 /*==================*/
-	bool	scan)	/*!< in: TRUE if table/index scan FALSE otherwise */
+	bool	scan)	/*!< in: true if table/index scan FALSE otherwise */
 {
+	TrxInInnoDB	trx_in_innodb(m_prebuilt->trx);
+
+	if (!dict_table_is_intrinsic(m_prebuilt->table)
+	    && trx_in_innodb.is_aborted()) {
+
+		return(innobase_rollback(ht, m_user_thd, false));
+	}
+
 	int	err;
 
 	/* Store the active index value so that we can restore the original
 	value after a scan */
 
-	if (prebuilt->clust_index_was_generated) {
+	if (m_prebuilt->clust_index_was_generated) {
 		err = change_active_index(MAX_KEY);
 	} else {
-		err = change_active_index(primary_key);
+		err = change_active_index(m_primary_key);
 	}
 
 	/* Don't use semi-consistent read in random row reads (by position).
@@ -9670,15 +10932,15 @@ ha_innobase::rnd_init(
 		try_semi_consistent_read(0);
 	}
 
-	start_of_scan = 1;
+	m_start_of_scan = true;
 
 	return(err);
 }
 
 /*****************************************************************//**
 Ends a table scan.
-@return	0 or error number */
-UNIV_INTERN
+@return 0 or error number */
+
 int
 ha_innobase::rnd_end(void)
 /*======================*/
@@ -9689,8 +10951,8 @@ ha_innobase::rnd_end(void)
 /*****************************************************************//**
 Reads the next row in a table scan (also used to read the FIRST row
 in a table scan).
-@return	0, HA_ERR_END_OF_FILE, or error number */
-UNIV_INTERN
+@return 0, HA_ERR_END_OF_FILE, or error number */
+
 int
 ha_innobase::rnd_next(
 /*==================*/
@@ -9700,16 +10962,15 @@ ha_innobase::rnd_next(
 	int	error;
 
 	DBUG_ENTER("rnd_next");
-	ha_statistic_increment(&SSV::ha_read_rnd_next_count);
 
-	if (start_of_scan) {
+	if (m_start_of_scan) {
 		error = index_first(buf);
 
 		if (error == HA_ERR_KEY_NOT_FOUND) {
 			error = HA_ERR_END_OF_FILE;
 		}
 
-		start_of_scan = 0;
+		m_start_of_scan = false;
 	} else {
 		error = general_fetch(buf, ROW_SEL_NEXT, 0);
 	}
@@ -9719,8 +10980,8 @@ ha_innobase::rnd_next(
 
 /**********************************************************************//**
 Fetches a row from the table based on a row reference.
-@return	0, HA_ERR_KEY_NOT_FOUND, or error code */
-UNIV_INTERN
+@return 0, HA_ERR_KEY_NOT_FOUND, or error code */
+
 int
 ha_innobase::rnd_pos(
 /*=================*/
@@ -9730,20 +10991,17 @@ ha_innobase::rnd_pos(
 			index was internally generated by InnoDB; the
 			length of data in pos has to be ref_length */
 {
-	int		error;
 	DBUG_ENTER("rnd_pos");
 	DBUG_DUMP("key", pos, ref_length);
 
-	ha_statistic_increment(&SSV::ha_read_rnd_count);
-
-	ut_a(prebuilt->trx == thd_to_trx(ha_thd()));
+	ut_a(m_prebuilt->trx == thd_to_trx(ha_thd()));
 
 	/* Note that we assume the length of the row reference is fixed
 	for the table, and it is == ref_length */
 
-	error = index_read(buf, pos, ref_length, HA_READ_KEY_EXACT);
+	int	error = index_read(buf, pos, ref_length, HA_READ_KEY_EXACT);
 
-	if (error) {
+	if (error != 0) {
 		DBUG_PRINT("error", ("Got error: %d", error));
 	}
 
@@ -9753,7 +11011,7 @@ ha_innobase::rnd_pos(
 /**********************************************************************//**
 Initialize FT index scan
 @return 0 or error number */
-UNIV_INTERN
+
 int
 ha_innobase::ft_init()
 /*==================*/
@@ -9777,7 +11035,7 @@ ha_innobase::ft_init()
 /**********************************************************************//**
 Initialize FT index scan
 @return FT_INFO structure if successful or NULL */
-UNIV_INTERN
+
 FT_INFO*
 ha_innobase::ft_init_ext(
 /*=====================*/
@@ -9785,47 +11043,63 @@ ha_innobase::ft_init_ext(
 	uint			keynr,	/* in: */
 	String*			key)	/* in: */
 {
-	trx_t*			trx;
-	dict_table_t*		ft_table;
-	dberr_t			error;
-	byte*			query = (byte*) key->ptr();
-	ulint			query_len = key->length();
-	const CHARSET_INFO*	char_set = key->charset();
 	NEW_FT_INFO*		fts_hdl = NULL;
 	dict_index_t*		index;
 	fts_result_t*		result;
 	char			buf_tmp[8192];
 	ulint			buf_tmp_used;
 	uint			num_errors;
+	ulint			query_len = key->length();
+	const CHARSET_INFO*	char_set = key->charset();
+	const char*		query = key->ptr();
 
 	if (fts_enable_diag_print) {
-		fprintf(stderr, "keynr=%u, '%.*s'\n",
-			keynr, (int) key->length(), (byte*) key->ptr());
+		{
+			ib::info	out;
+			out << "keynr=" << keynr << ", '";
+			out.write(key->ptr(), key->length());
+		}
 
 		if (flags & FT_BOOL) {
-			fprintf(stderr, "BOOL search\n");
+			ib::info() << "BOOL search";
 		} else {
-			fprintf(stderr, "NL search\n");
+			ib::info() << "NL search";
 		}
 	}
 
 	/* FIXME: utf32 and utf16 are not compatible with some
 	string function used. So to convert them to uft8 before
-	proceed. */
+	we proceed. */
 	if (strcmp(char_set->csname, "utf32") == 0
 	    || strcmp(char_set->csname, "utf16") == 0) {
+
 		buf_tmp_used = innobase_convert_string(
 			buf_tmp, sizeof(buf_tmp) - 1,
 			&my_charset_utf8_general_ci,
 			query, query_len, (CHARSET_INFO*) char_set,
 			&num_errors);
 
-		query = (byte*) buf_tmp;
+		buf_tmp[buf_tmp_used] = 0;
+		query = buf_tmp;
 		query_len = buf_tmp_used;
-		query[query_len] = 0;
 	}
 
-	trx = prebuilt->trx;
+	trx_t*	trx = m_prebuilt->trx;
+
+	TrxInInnoDB	trx_in_innodb(trx);
+
+	if (trx_in_innodb.is_aborted()) {
+
+		innobase_rollback(ht, m_user_thd, false);
+
+		int	err;
+		err = convert_error_code_to_mysql(
+			DB_FORCED_ABORT, 0, m_user_thd);
+
+		my_error(err, MYF(0));
+
+		return(NULL);
+	}
 
 	/* FTS queries are not treated as autocommit non-locking selects.
 	This is because the FTS implementation can acquire locks behind
@@ -9836,7 +11110,7 @@ ha_innobase::ft_init_ext(
 		++trx->will_lock;
 	}
 
-	ft_table = prebuilt->table;
+	dict_table_t*	ft_table = m_prebuilt->table;
 
 	/* Table does not have an FTS index */
 	if (!ft_table->fts || ib_vector_is_empty(ft_table->fts->indexes)) {
@@ -9853,12 +11127,13 @@ ha_innobase::ft_init_ext(
 
 	if (keynr == NO_SUCH_KEY) {
 		/* FIXME: Investigate the NO_SUCH_KEY usage */
-		index = (dict_index_t*) ib_vector_getp(ft_table->fts->indexes, 0);
+		index = reinterpret_cast<dict_index_t*>
+			(ib_vector_getp(ft_table->fts->indexes, 0));
 	} else {
 		index = innobase_get_index(keynr);
 	}
 
-	if (!index || index->type != DICT_FTS) {
+	if (index == NULL || index->type != DICT_FTS) {
 		my_error(ER_TABLE_HAS_NO_FT, MYF(0));
 		return(NULL);
 	}
@@ -9869,28 +11144,54 @@ ha_innobase::ft_init_ext(
 		ft_table->fts->fts_status |= ADDED_TABLE_SYNCED;
 	}
 
-	error = fts_query(trx, index, flags, query, query_len, &result);
+	const byte*	q = reinterpret_cast<const byte*>(
+		const_cast<char*>(query));
+
+	// JAN: TODO: support for ft_init_ext_with_hints(), remove the line below
+	m_prebuilt->m_fts_limit= ULONG_UNDEFINED;
+	dberr_t	error = fts_query(trx, index, flags, q, query_len, &result,
+				  m_prebuilt->m_fts_limit);
 
 	if (error != DB_SUCCESS) {
-		my_error(convert_error_code_to_mysql(error, 0, NULL),
-			MYF(0));
+		my_error(convert_error_code_to_mysql(error, 0, NULL), MYF(0));
 		return(NULL);
 	}
 
 	/* Allocate FTS handler, and instantiate it before return */
-	fts_hdl = static_cast<NEW_FT_INFO*>(my_malloc(sizeof(NEW_FT_INFO),
-				   MYF(0)));
+	fts_hdl = reinterpret_cast<NEW_FT_INFO*>(
+		my_malloc(sizeof(NEW_FT_INFO), MYF(0)));
+		/* JAN: TODO: MySQL 5.7 PSI
+		my_malloc(PSI_INSTRUMENT_ME, sizeof(NEW_FT_INFO), MYF(0)));
+		*/
 
 	fts_hdl->please = const_cast<_ft_vft*>(&ft_vft_result);
 	fts_hdl->could_you = const_cast<_ft_vft_ext*>(&ft_vft_ext_result);
-	fts_hdl->ft_prebuilt = prebuilt;
+	fts_hdl->ft_prebuilt = m_prebuilt;
 	fts_hdl->ft_result = result;
 
-	/* FIXME: Re-evluate the condition when Bug 14469540
-	is resolved */
-	prebuilt->in_fts_query = true;
+	/* FIXME: Re-evaluate the condition when Bug 14469540 is resolved */
+	m_prebuilt->in_fts_query = true;
 
-	return((FT_INFO*) fts_hdl);
+	return(reinterpret_cast<FT_INFO*>(fts_hdl));
+}
+
+/**********************************************************************//**
+Initialize FT index scan
+@return FT_INFO structure if successful or NULL */
+
+FT_INFO*
+ha_innobase::ft_init_ext_with_hints(
+/*================================*/
+	uint			keynr,		/* in: key num */
+	String*			key,		/* in: key */
+	void*			hints)		/* in: hints  */
+{
+	/* TODO Implement function properly working with FT hint. */
+#ifdef MYSQL_FT_INIT_EXT
+	return(ft_init_ext(hints->get_flags(), keynr, key));
+#else
+	return NULL;
+#endif
 }
 
 /*****************************************************************//**
@@ -9901,7 +11202,7 @@ static
 void
 innobase_fts_create_doc_id_key(
 /*===========================*/
-	dtuple_t*	tuple,		/* in/out: prebuilt->search_tuple */
+	dtuple_t*	tuple,		/* in/out: m_prebuilt->search_tuple */
 	const dict_index_t*
 			index,		/* in: index (FTS_DOC_ID_INDEX) */
 	doc_id_t*	doc_id)		/* in/out: doc id to search, value
@@ -9921,7 +11222,7 @@ innobase_fts_create_doc_id_key(
 	dict_field_t*	field = dict_index_get_nth_field(index, 0);
         ut_a(field->col->mtype == DATA_INT);
 	ut_ad(sizeof(*doc_id) == field->fixed_len);
-	ut_ad(innobase_strcasecmp(index->name, FTS_DOC_ID_INDEX_NAME) == 0);
+	ut_ad(!strcmp(index->name, FTS_DOC_ID_INDEX_NAME));
 #endif /* UNIV_DEBUG */
 
 	/* Convert to storage byte order */
@@ -9940,21 +11241,31 @@ innobase_fts_create_doc_id_key(
 /**********************************************************************//**
 Fetch next result from the FT result set
 @return error code */
-UNIV_INTERN
+
 int
 ha_innobase::ft_read(
 /*=================*/
 	uchar*		buf)		/*!< in/out: buf contain result row */
 {
-	fts_result_t*	result;
-	int		error;
+	TrxInInnoDB	trx_in_innodb(m_prebuilt->trx);
+
+	if (trx_in_innodb.is_aborted()) {
+
+		innobase_rollback(ht, m_user_thd, false);
+
+		return(convert_error_code_to_mysql(
+			DB_FORCED_ABORT, 0, m_user_thd));
+	}
+
 	row_prebuilt_t*	ft_prebuilt;
 
-	ft_prebuilt = ((NEW_FT_INFO*) ft_handler)->ft_prebuilt;
+	ft_prebuilt = reinterpret_cast<NEW_FT_INFO*>(ft_handler)->ft_prebuilt;
 
-	ut_a(ft_prebuilt == prebuilt);
+	ut_a(ft_prebuilt == m_prebuilt);
 
-	result = ((NEW_FT_INFO*) ft_handler)->ft_result;
+	fts_result_t*	result;
+
+	result = reinterpret_cast<NEW_FT_INFO*>(ft_handler)->ft_result;
 
 	if (result->current == NULL) {
 		/* This is the case where the FTS query did not
@@ -9979,25 +11290,34 @@ ha_innobase::ft_read(
 next_record:
 
 	if (result->current != NULL) {
-		dict_index_t*	index;
-		dtuple_t*	tuple = prebuilt->search_tuple;
 		doc_id_t	search_doc_id;
+		dtuple_t*	tuple = m_prebuilt->search_tuple;
 
 		/* If we only need information from result we can return
 		   without fetching the table row */
 		if (ft_prebuilt->read_just_key) {
+#ifdef MYSQL_STORE_FTS_DOC_ID
+			if (m_prebuilt->fts_doc_id_in_read_set) {
+				fts_ranking_t* ranking;
+				ranking = rbt_value(fts_ranking_t,
+						    result->current);
+				innobase_fts_store_docid(
+					table, ranking->doc_id);
+			}
+#endif
 			table->status= 0;
 			return(0);
 		}
 
-		index = dict_table_get_index_on_name(
-			prebuilt->table, FTS_DOC_ID_INDEX_NAME);
+		dict_index_t*	index;
+
+		index = m_prebuilt->table->fts_doc_id_index;
 
 		/* Must find the index */
-		ut_a(index);
+		ut_a(index != NULL);
 
 		/* Switch to the FTS doc id index */
-		prebuilt->index = index;
+		m_prebuilt->index = index;
 
 		fts_ranking_t*	ranking = rbt_value(
 			fts_ranking_t, result->current);
@@ -10009,12 +11329,14 @@ next_record:
 		tuple. */
 		innobase_fts_create_doc_id_key(tuple, index, &search_doc_id);
 
-		innobase_srv_conc_enter_innodb(prebuilt->trx);
+		innobase_srv_conc_enter_innodb(m_prebuilt);
 
 		dberr_t ret = row_search_for_mysql(
-			(byte*) buf, PAGE_CUR_GE, prebuilt, ROW_SEL_EXACT, 0);
+			(byte*) buf, PAGE_CUR_GE, m_prebuilt, ROW_SEL_EXACT, 0);
 
-		innobase_srv_conc_exit_innodb(prebuilt->trx);
+		innobase_srv_conc_exit_innodb(m_prebuilt);
+
+		int	error;
 
 		switch (ret) {
 		case DB_SUCCESS:
@@ -10044,7 +11366,7 @@ next_record:
 		case DB_TABLESPACE_DELETED:
 
 			ib_senderrf(
-				prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+				m_prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
 				ER_TABLESPACE_DISCARDED,
 				table->s->table_name.str);
 
@@ -10054,16 +11376,16 @@ next_record:
 		case DB_TABLESPACE_NOT_FOUND:
 
 			ib_senderrf(
-				prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+				m_prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
 				ER_TABLESPACE_MISSING,
 				table->s->table_name.str);
 
 			table->status = STATUS_NOT_FOUND;
-			error = HA_ERR_NO_SUCH_TABLE;
+			error = HA_ERR_TABLESPACE_MISSING;
 			break;
 		default:
 			error = convert_error_code_to_mysql(
-				ret, 0, user_thd);
+				ret, 0, m_user_thd);
 
 			table->status = STATUS_NOT_FOUND;
 			break;
@@ -10081,13 +11403,14 @@ next_record:
 void
 ha_innobase::ft_end()
 {
-	fprintf(stderr, "ft_end()\n");
+	ib::info() << "ft_end()";
 
 	rnd_end();
 }
 #ifdef WITH_WSREP
 extern dict_index_t*
 wsrep_dict_foreign_find_index(
+/*==========================*/
 	dict_table_t*	table,
 	const char**	col_names,
 	const char**	columns,
@@ -10111,16 +11434,16 @@ wsrep_append_foreign_key(
 	THD*  thd = (THD*)trx->mysql_thd;
 	ulint rcode = DB_SUCCESS;
 	char  cache_key[513] = {'\0'};
-	int   cache_key_len;
-    bool const copy = true;
+	int   cache_key_len=0;
+	bool const copy = true;
 
 	if (!wsrep_on(trx->mysql_thd) ||
-	    wsrep_thd_exec_mode(thd) != LOCAL_STATE)
+	    wsrep_thd_exec_mode(thd) != LOCAL_STATE) {
 		return DB_SUCCESS;
+	}
 
 	if (!thd || !foreign ||
-	    (!foreign->referenced_table && !foreign->foreign_table))
-	{
+	    (!foreign->referenced_table && !foreign->foreign_table)) {
 		WSREP_INFO("FK: %s missing in: %s",
 			(!thd)      ?  "thread"     :
 			((!foreign) ?  "constraint" :
@@ -10132,40 +11455,36 @@ wsrep_append_foreign_key(
 	}
 
 	if ( !((referenced) ?
-		foreign->referenced_table : foreign->foreign_table))
-	{
+		foreign->referenced_table : foreign->foreign_table)) {
 		WSREP_DEBUG("pulling %s table into cache",
 			    (referenced) ? "referenced" : "foreign");
 		mutex_enter(&(dict_sys->mutex));
-		if (referenced)
-		{
+
+		if (referenced) {
 			foreign->referenced_table =
 				dict_table_get_low(
 					foreign->referenced_table_name_lookup);
-			if (foreign->referenced_table)
-			{
+			if (foreign->referenced_table) {
 				foreign->referenced_index =
 					wsrep_dict_foreign_find_index(
 						foreign->referenced_table, NULL,
 						foreign->referenced_col_names,
-						foreign->n_fields, 
+						foreign->n_fields,
 						foreign->foreign_index,
 						TRUE, FALSE);
 			}
-		}
-		else
-		{
+		} else {
 	  		foreign->foreign_table =
 				dict_table_get_low(
 					foreign->foreign_table_name_lookup);
-			if (foreign->foreign_table)
-			{
+
+			if (foreign->foreign_table) {
 				foreign->foreign_index =
 					wsrep_dict_foreign_find_index(
 						foreign->foreign_table, NULL,
 						foreign->foreign_col_names,
 						foreign->n_fields,
-						foreign->referenced_index, 
+						foreign->referenced_index,
 						TRUE, FALSE);
 			}
 		}
@@ -10173,8 +11492,7 @@ wsrep_append_foreign_key(
 	}
 
 	if ( !((referenced) ?
-		foreign->referenced_table : foreign->foreign_table))
-	{
+		foreign->referenced_table : foreign->foreign_table)) {
 		WSREP_WARN("FK: %s missing in query: %s",
 			   (!foreign->referenced_table) ?
 			   "referenced table" : "foreign table",
@@ -10182,6 +11500,7 @@ wsrep_append_foreign_key(
 			   wsrep_thd_query(thd) : "void");
 		return DB_ERROR;
 	}
+
 	byte  key[WSREP_MAX_SUPPORTED_KEY_LENGTH+1] = {'\0'};
 	ulint len = WSREP_MAX_SUPPORTED_KEY_LENGTH;
 
@@ -10191,36 +11510,40 @@ wsrep_append_foreign_key(
 		UT_LIST_GET_FIRST(foreign->referenced_table->indexes) :
 		UT_LIST_GET_FIRST(foreign->foreign_table->indexes);
 	int i = 0;
+
 	while (idx != NULL && idx != idx_target) {
 		if (innobase_strcasecmp (idx->name, innobase_index_reserve_name) != 0) {
 			i++;
 		}
 		idx = UT_LIST_GET_NEXT(indexes, idx);
 	}
+
 	ut_a(idx);
 	key[0] = (char)i;
 
 	rcode = wsrep_rec_get_foreign_key(
 		&key[1], &len, rec, index, idx,
 		wsrep_protocol_version > 1);
+
 	if (rcode != DB_SUCCESS) {
 		WSREP_ERROR(
 			"FK key set failed: %lu (%lu %lu), index: %s %s, %s",
 			rcode, referenced, shared,
-			(index && index->name)       ? index->name :
-				"void index",
-			(index && index->table_name) ? index->table_name :
+			(index)       ? index->name() : "void index",
+			(index && index->table) ? index->table->name.m_name :
 				"void table",
 			wsrep_thd_query(thd));
 		return DB_ERROR;
 	}
+
 	strncpy(cache_key,
 		(wsrep_protocol_version > 1) ?
 		((referenced) ?
-			foreign->referenced_table->name :
-			foreign->foreign_table->name) :
-		foreign->foreign_table->name, sizeof(cache_key) - 1);
+			foreign->referenced_table->name.m_name :
+			foreign->foreign_table->name.m_name) :
+		foreign->foreign_table->name.m_name, sizeof(cache_key) - 1);
 	cache_key_len = strlen(cache_key);
+
 #ifdef WSREP_DEBUG_PRINT
 	ulint j;
 	fprintf(stderr, "FK parent key, table: %s %s len: %lu ",
@@ -10231,16 +11554,18 @@ wsrep_append_foreign_key(
 	fprintf(stderr, "\n");
 #endif
 	char *p = strchr(cache_key, '/');
+
 	if (p) {
 		*p = '\0';
 	} else {
 		WSREP_WARN("unexpected foreign key table %s %s",
-			   foreign->referenced_table->name,
-			   foreign->foreign_table->name);
+			   foreign->referenced_table->name.m_name,
+			   foreign->foreign_table->name.m_name);
 	}
 
 	wsrep_buf_t wkey_part[3];
         wsrep_key_t wkey = {wkey_part, 3};
+
 	if (!wsrep_prepare_key(
 		(const uchar*)cache_key,
 		cache_key_len +  1,
@@ -10252,7 +11577,9 @@ wsrep_append_foreign_key(
 			    wsrep_thd_query(thd) : "void");
 		return DB_ERROR;
 	}
+
 	wsrep_t *wsrep= get_wsrep();
+
 	rcode = (int)wsrep->append_key(
 		wsrep,
 		wsrep_ws_handle(thd, trx),
@@ -10260,6 +11587,7 @@ wsrep_append_foreign_key(
 		1,
 		shared ? WSREP_KEY_SHARED : WSREP_KEY_EXCLUSIVE,
                 copy);
+
 	if (rcode) {
 		DBUG_PRINT("wsrep", ("row key failed: %lu", rcode));
 		WSREP_ERROR("Appending cascaded fk row key failed: %s, %lu",
@@ -10273,7 +11601,7 @@ wsrep_append_foreign_key(
 
 static int
 wsrep_append_key(
-/*==================*/
+/*=============*/
 	THD		*thd,
 	trx_t 		*trx,
 	TABLE_SHARE 	*table_share,
@@ -10297,6 +11625,7 @@ wsrep_append_key(
 #endif
 	wsrep_buf_t wkey_part[3];
         wsrep_key_t wkey = {wkey_part, 3};
+
 	if (!wsrep_prepare_key(
 			(const uchar*)table_share->table_cache_key.str,
 			table_share->table_cache_key.length,
@@ -10310,6 +11639,7 @@ wsrep_append_key(
 	}
 
 	wsrep_t *wsrep= get_wsrep();
+
 	int rcode = (int)wsrep->append_key(
 			       wsrep,
 			       wsrep_ws_handle(thd, trx),
@@ -10324,21 +11654,26 @@ wsrep_append_key(
 			   wsrep_thd_query(thd) : "void", rcode);
 		DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
 	}
+
 	DBUG_RETURN(0);
 }
 
 static bool
-referenced_by_foreign_key2(dict_table_t* table,
-                           dict_index_t* index) {
-        ut_ad(table != NULL);
-        ut_ad(index != NULL);
+referenced_by_foreign_key2(
+/*=======================*/
+	dict_table_t* table,
+	dict_index_t* index)
+{
+	ut_ad(table != NULL);
+	ut_ad(index != NULL);
 
-        const dict_foreign_set* fks = &table->referenced_set;
-        for (dict_foreign_set::const_iterator it = fks->begin();
+	const dict_foreign_set* fks = &table->referenced_set;
+
+	for (dict_foreign_set::const_iterator it = fks->begin();
              it != fks->end();
-             ++it)
-        {
+             ++it) {
                 dict_foreign_t* foreign = *it;
+
                 if (foreign->referenced_index != index) {
                         continue;
                 }
@@ -10350,7 +11685,7 @@ referenced_by_foreign_key2(dict_table_t* table,
 
 int
 ha_innobase::wsrep_append_keys(
-/*==================*/
+/*===========================*/
 	THD 		*thd,
 	bool		shared,
 	const uchar*	record0,	/* in: row in MySQL format */
@@ -10363,10 +11698,10 @@ ha_innobase::wsrep_append_keys(
 	trx_t *trx = thd_to_trx(thd);
 
 	if (table_share && table_share->tmp_table  != NO_TMP_TABLE) {
-		WSREP_DEBUG("skipping tmp table DML: THD: %lu tmp: %d SQL: %s", 
+		WSREP_DEBUG("skipping tmp table DML: THD: %lu tmp: %d SQL: %s",
 			    thd_get_thread_id(thd),
 			    table_share->tmp_table,
-			    (wsrep_thd_query(thd)) ? 
+			    (wsrep_thd_query(thd)) ?
 			    wsrep_thd_query(thd) : "void");
 		DBUG_RETURN(0);
 	}
@@ -10383,13 +11718,14 @@ ha_innobase::wsrep_append_keys(
 
 		if (!is_null) {
 			rcode = wsrep_append_key(
-				thd, trx, table_share, table, keyval, 
+				thd, trx, table_share, table, keyval,
 				len, shared);
-			if (rcode) DBUG_RETURN(rcode);
-		}
-		else
-		{
-			WSREP_DEBUG("NULL key skipped (proto 0): %s", 
+
+			if (rcode) {
+				DBUG_RETURN(rcode);
+			}
+		} else {
+			WSREP_DEBUG("NULL key skipped (proto 0): %s",
 				    wsrep_thd_query(thd));
 		}
 	} else {
@@ -10421,7 +11757,7 @@ ha_innobase::wsrep_append_keys(
 
 			if (!tab) {
 				WSREP_WARN("MySQL-InnoDB key mismatch %s %s",
-					   table->s->table_name.str, 
+					   table->s->table_name.str,
 					   key_info->name);
 			}
 			/* !hasPK == table with no PK, must append all non-unique keys */
@@ -10431,32 +11767,35 @@ ha_innobase::wsrep_append_keys(
 			     (!tab && referenced_by_foreign_key()))) {
 
 				len = wsrep_store_key_val_for_row(
-					thd, table, i, key0, 
-					WSREP_MAX_SUPPORTED_KEY_LENGTH, 
+					thd, table, i, key0,
+					WSREP_MAX_SUPPORTED_KEY_LENGTH,
 					record0, &is_null);
 				if (!is_null) {
 					rcode = wsrep_append_key(
-						thd, trx, table_share, table, 
+						thd, trx, table_share, table,
 						keyval0, len+1, shared);
-					if (rcode) DBUG_RETURN(rcode);
+
+					if (rcode) {
+						DBUG_RETURN(rcode);
+					}
 
 				if (key_info->flags & HA_NOSAME || shared)
 			  		key_appended = true;
-				}
-				else
-				{
-					WSREP_DEBUG("NULL key skipped: %s", 
+				} else {
+					WSREP_DEBUG("NULL key skipped: %s",
 						    wsrep_thd_query(thd));
 				}
+
 				if (record1) {
 					len = wsrep_store_key_val_for_row(
-						thd, table, i, key1, 
+						thd, table, i, key1,
 						WSREP_MAX_SUPPORTED_KEY_LENGTH,
 						record1, &is_null);
+
 					if (!is_null && memcmp(key0, key1, len)) {
 						rcode = wsrep_append_key(
-							thd, trx, table_share, 
-							table, 
+							thd, trx, table_share,
+							table,
 							keyval1, len+1, shared);
 						if (rcode) DBUG_RETURN(rcode);
 					}
@@ -10470,19 +11809,20 @@ ha_innobase::wsrep_append_keys(
 		uchar digest[16];
 		int rcode;
 
-		wsrep_calc_row_hash(digest, record0, table, prebuilt, thd);
-		if ((rcode = wsrep_append_key(thd, trx, table_share, table, 
-					      (const char*) digest, 16, 
+		wsrep_calc_row_hash(digest, record0, table, m_prebuilt, thd);
+
+		if ((rcode = wsrep_append_key(thd, trx, table_share, table,
+					      (const char*) digest, 16,
 					      shared))) {
 			DBUG_RETURN(rcode);
 		}
 
 		if (record1) {
 			wsrep_calc_row_hash(
-				digest, record1, table, prebuilt, thd);
-			if ((rcode = wsrep_append_key(thd, trx, table_share, 
+				digest, record1, table, m_prebuilt, thd);
+			if ((rcode = wsrep_append_key(thd, trx, table_share,
 						      table,
-						      (const char*) digest, 
+						      (const char*) digest,
 						      16, shared))) {
 				DBUG_RETURN(rcode);
 			}
@@ -10502,7 +11842,7 @@ is the current 'position' of the handle, because if row ref is actually
 the row id internally generated in InnoDB, then 'record' does not contain
 it. We just guess that the row id must be for the record where the handle
 was positioned the last time. */
-UNIV_INTERN
+
 void
 ha_innobase::position(
 /*==================*/
@@ -10510,9 +11850,9 @@ ha_innobase::position(
 {
 	uint		len;
 
-	ut_a(prebuilt->trx == thd_to_trx(ha_thd()));
+	ut_a(m_prebuilt->trx == thd_to_trx(ha_thd()));
 
-	if (prebuilt->clust_index_was_generated) {
+	if (m_prebuilt->clust_index_was_generated) {
 		/* No primary key was defined for the table and we
 		generated the clustered index from row id: the
 		row reference will be the row id, not any key value
@@ -10520,18 +11860,21 @@ ha_innobase::position(
 
 		len = DATA_ROW_ID_LEN;
 
-		memcpy(ref, prebuilt->row_id, len);
+		memcpy(ref, m_prebuilt->row_id, len);
 	} else {
-		len = store_key_val_for_row(primary_key, (char*) ref,
-							 ref_length, record);
+
+		/* Copy primary key as the row reference */
+		KEY*	key_info = table->key_info + m_primary_key;
+		key_copy(ref, (uchar*)record, key_info, key_info->key_length);
+		len = key_info->key_length;
 	}
 
 	/* We assume that the 'ref' value len is always fixed for the same
 	table. */
 
 	if (len != ref_length) {
-		sql_print_error("Stored ref len is %lu, but table ref len is "
-				"%lu", (ulong) len, (ulong) ref_length);
+		sql_print_error("Stored ref len is %lu, but table ref len is"
+				" %lu", (ulong) len, (ulong) ref_length);
 	}
 }
 
@@ -10558,9 +11901,12 @@ create_table_check_doc_id_col(
 		ulint		unsigned_type;
 
 		field = form->field[i];
+		if (!field->stored_in_db()) {
+			continue;
+		}
 
-		col_type = get_innobase_type_from_mysql_type(&unsigned_type,
-							     field);
+		col_type = get_innobase_type_from_mysql_type(
+			&unsigned_type, field);
 
 		col_len = field->pack_length();
 
@@ -10580,9 +11926,9 @@ create_table_check_doc_id_col(
 					trx->mysql_thd,
 					Sql_condition::WARN_LEVEL_WARN,
 					ER_ILLEGAL_HA_CREATE_OPTION,
-					"InnoDB: FTS_DOC_ID column must be "
-					"of BIGINT NOT NULL type, and named "
-					"in all capitalized characters");
+					"InnoDB: FTS_DOC_ID column must be"
+					" of BIGINT NOT NULL type, and named"
+					" in all capitalized characters");
 				my_error(ER_WRONG_COLUMN_NAME, MYF(0),
 					 field->field_name);
 				*doc_id_col = ULINT_UNDEFINED;
@@ -10595,34 +11941,95 @@ create_table_check_doc_id_col(
 	return(false);
 }
 
-/*****************************************************************//**
-Creates a table definition to an InnoDB database. */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-int
-create_table_def(
-/*=============*/
-	trx_t*		trx,		/*!< in: InnoDB transaction handle */
-	const TABLE*	form,		/*!< in: information on table
-					columns and indexes */
-	const char*	table_name,	/*!< in: table name */
-	const char*	temp_path,	/*!< in: if this is a table explicitly
-					created by the user with the
-					TEMPORARY keyword, then this
-					parameter is the dir path where the
-					table should be placed if we create
-					an .ibd file for it (no .ibd extension
-					in the path, though). Otherwise this
-					is a zero length-string */
-	const char*	remote_path,	/*!< in: Remote path or zero length-string */
-	ulint		flags,		/*!< in: table flags */
-	ulint		flags2,		/*!< in: table flags2 */
-	fil_encryption_t mode,		/*!< in: encryption mode */
-	ulint		key_id)		/*!< in: encryption key_id */
+#ifdef MYSQL_VIRTUAL_COLUMNS
+/** Set up base columns for virtual column
+@param[in]	table		InnoDB table
+@param[in]	field		MySQL field
+@param[in,out]	v_col		virtual column */
+void
+innodb_base_col_setup(
+	dict_table_t*	table,
+	const Field*	field,
+	dict_v_col_t*	v_col)
+{
+	int     n = 0;
+
+	for (uint i= 0; i < field->table->s->fields; ++i) {
+		const Field* base_field = field->table->field[i];
+		if (!base_field->is_virtual_gcol()
+			&& bitmap_is_set(&field->gcol_info->base_columns_map, i)) {
+			ulint   z;
+
+			for (z = 0; z < table->n_cols; z++) {
+				const char* name = dict_table_get_col_name(table, z);
+				if (!innobase_strcasecmp(name,
+						base_field->field_name)) {
+					break;
+				}
+			}
+
+			ut_ad(z != table->n_cols);
+
+			v_col->base_col[n] = dict_table_get_nth_col(table, z);
+			ut_ad(v_col->base_col[n]->ind == z);
+			n++;
+		}
+	}
+}
+#endif /* MYSQL_VIRTUAL_COLUMNS */
+
+#ifdef MYSQL_VIRTUAL_COLUMNS
+/** Set up base columns for stored column
+@param[in]	table	InnoDB table
+@param[in]	field	MySQL field
+@param[in,out]	s_col	stored column */
+void
+innodb_base_col_setup_for_stored(
+	const dict_table_t*	table,
+	const Field*		field,
+	dict_s_col_t*		s_col)
+{
+	ulint	n = 0;
+
+	for (uint i= 0; i < field->table->s->fields; ++i) {
+		const Field* base_field = field->table->field[i];
+
+		if (!innobase_is_s_fld(base_field)
+		    && !innobase_is_v_fld(base_field)
+		    && bitmap_is_set(&field->gcol_info->base_columns_map,
+				     i)) {
+			ulint	z;
+			for (z = 0; z < table->n_cols; z++) {
+				const char* name = dict_table_get_col_name(
+						table, z);
+				if (!innobase_strcasecmp(
+					name, base_field->field_name)) {
+					break;
+				}
+			}
+
+			ut_ad(z != table->n_cols);
+
+			s_col->base_col[n] = dict_table_get_nth_col(table, z);
+			n++;
+
+			if (n == s_col->num_base) {
+				break;
+			}
+		}
+	}
+}
+#endif
+
+/** Create a table definition to an InnoDB database.
+@return ER_* level error */
+inline MY_ATTRIBUTE((warn_unused_result))
+int
+create_table_info_t::create_table_def()
 {
-	THD*		thd = trx->mysql_thd;
 	dict_table_t*	table;
 	ulint		n_cols;
-	dberr_t		err;
+	ulint		s_cols;
 	ulint		col_type;
 	ulint		col_len;
 	ulint		nulls_allowed;
@@ -10634,41 +12041,59 @@ create_table_def(
 	ulint		doc_id_col = 0;
 	ibool		has_doc_id_col = FALSE;
 	mem_heap_t*	heap;
+	ulint		num_v = 0;
+	ulint		space_id = 0;
+	ulint		actual_n_cols;
+	ha_table_option_struct *options= m_form->s->option_struct;
+	dberr_t		err = DB_SUCCESS;
 
 	DBUG_ENTER("create_table_def");
-	DBUG_PRINT("enter", ("table_name: %s", table_name));
+	DBUG_PRINT("enter", ("table_name: %s", m_table_name));
 
-	DBUG_ASSERT(thd != NULL);
+	DBUG_ASSERT(m_trx->mysql_thd == m_thd);
 
 	/* MySQL does the name length check. But we do additional check
 	on the name length here */
-	const size_t	table_name_len = strlen(table_name);
+	const size_t	table_name_len = strlen(m_table_name);
 	if (table_name_len > MAX_FULL_NAME_LEN) {
 		push_warning_printf(
-			thd, Sql_condition::WARN_LEVEL_WARN,
+			m_thd, Sql_condition::WARN_LEVEL_WARN,
 			ER_TABLE_NAME,
 			"InnoDB: Table Name or Database Name is too long");
 
 		DBUG_RETURN(ER_TABLE_NAME);
 	}
 
-	if (table_name[table_name_len - 1] == '/') {
+	if (m_table_name[table_name_len - 1] == '/') {
 		push_warning_printf(
-			thd, Sql_condition::WARN_LEVEL_WARN,
+			m_thd, Sql_condition::WARN_LEVEL_WARN,
 			ER_TABLE_NAME,
 			"InnoDB: Table name is empty");
 
 		DBUG_RETURN(ER_WRONG_TABLE_NAME);
 	}
 
-	n_cols = form->s->fields;
+	n_cols = m_form->s->fields;
+	s_cols = m_form->s->stored_fields;
+
+#ifdef MYSQL_VIRTUAL_COLUMNS
+	/* Find out any virtual column */
+	for (i = 0; i < n_cols; i++) {
+		Field*	field = m_form->field[i];
+
+		if (innobase_is_v_fld(field)) {
+			num_v++;
+		}
+	}
+#endif /* MYSQL_VIRTUAL_COLUMNS */
+
+	ut_ad(trx_state_eq(m_trx, TRX_STATE_NOT_STARTED));
 
 	/* Check whether there already exists a FTS_DOC_ID column */
-	if (create_table_check_doc_id_col(trx, form, &doc_id_col)){
+	if (create_table_check_doc_id_col(m_trx, m_form, &doc_id_col)){
 
 		/* Raise error if the Doc ID column is of wrong type or name */
 		if (doc_id_col == ULINT_UNDEFINED) {
-			trx_commit_for_mysql(trx);
 
 			err = DB_ERROR;
 			goto error_ret;
@@ -10677,59 +12102,95 @@ create_table_def(
 		}
 	}
 
-	/* We pass 0 as the space id, and determine at a lower level the space
-	id where to store the table */
+	/* For single-table tablespaces, we pass 0 as the space id, and then
+	determine the actual space id when the tablespace is created. */
+	if (DICT_TF_HAS_SHARED_SPACE(m_flags)) {
+		ut_ad(m_tablespace != NULL && m_tablespace[0] != '\0');
 
-	if (flags2 & DICT_TF2_FTS) {
-		/* Adjust for the FTS hidden field */
-		if (!has_doc_id_col) {
-			table = dict_mem_table_create(table_name, 0, form->s->stored_fields + 1,
-						      flags, flags2);
-
-			/* Set the hidden doc_id column. */
-			table->fts->doc_col = form->s->stored_fields;
-		} else {
-			table = dict_mem_table_create(table_name, 0, form->s->stored_fields,
-						      flags, flags2);
-			table->fts->doc_col = doc_id_col;
-		}
-	} else {
-		table = dict_mem_table_create(table_name, 0, form->s->stored_fields,
-					      flags, flags2);
+		space_id = fil_space_get_id_by_name(m_tablespace);
 	}
 
-	if (flags2 & DICT_TF2_TEMPORARY) {
-		ut_a(strlen(temp_path));
+	/* Adjust the number of columns for the FTS hidden field */
+	actual_n_cols = m_form->s->stored_fields;
+
+	if (m_flags2 & DICT_TF2_FTS && !has_doc_id_col) {
+		actual_n_cols += 1;
+	}
+
+	table = dict_mem_table_create(m_table_name, space_id,
+				      actual_n_cols, num_v, m_flags, m_flags2);
+
+	/* Set the hidden doc_id column. */
+	if (m_flags2 & DICT_TF2_FTS) {
+		table->fts->doc_col = has_doc_id_col
+				      ? doc_id_col : s_cols;
+	}
+
+	if (strlen(m_temp_path) != 0) {
 		table->dir_path_of_temp_table =
-			mem_heap_strdup(table->heap, temp_path);
+			mem_heap_strdup(table->heap, m_temp_path);
 	}
 
-	if (DICT_TF_HAS_DATA_DIR(flags)) {
-		ut_a(strlen(remote_path));
-		table->data_dir_path = mem_heap_strdup(table->heap, remote_path);
+	if (DICT_TF_HAS_DATA_DIR(m_flags)) {
+		ut_a(strlen(m_remote_path));
+
+		table->data_dir_path = mem_heap_strdup(
+			table->heap, m_remote_path);
+
 	} else {
 		table->data_dir_path = NULL;
 	}
+
+	if (DICT_TF_HAS_SHARED_SPACE(m_flags)) {
+		ut_ad(strlen(m_tablespace));
+		table->tablespace = mem_heap_strdup(table->heap, m_tablespace);
+	} else {
+		table->tablespace = NULL;
+	}
+
 	heap = mem_heap_create(1000);
 
 	for (i = 0; i < n_cols; i++) {
-		Field*	field = form->field[i];
-                if (!field->stored_in_db())
-		  continue;
+		ulint	is_virtual;
+		bool	is_stored MY_ATTRIBUTE((unused));
+		Field*	field = m_form->field[i];
 
-		col_type = get_innobase_type_from_mysql_type(&unsigned_type,
-							     field);
+		if (!field->stored_in_db()) {
+			continue;
+		}
+
+		/* Generate a unique column name by pre-pending table-name for
+		intrinsic tables. For other tables (including normal
+		temporary) column names are unique. If not, MySQL layer will
+		block such statement.
+		This is work-around fix till Optimizer can handle this issue
+		(probably 5.7.4+). */
+		char field_name[MAX_FULL_NAME_LEN + 2 + 10];
+
+		if (dict_table_is_intrinsic(table) && field->orig_table) {
+
+			ut_snprintf(field_name, sizeof(field_name),
+				    "%s_%s_%lu", field->orig_table->alias.c_ptr(),
+				    field->field_name, i);
+
+		} else {
+			ut_snprintf(field_name, sizeof(field_name),
+				    "%s", field->field_name);
+		}
+
+		col_type = get_innobase_type_from_mysql_type(
+			&unsigned_type, field);
 
 		if (!col_type) {
 			push_warning_printf(
-				thd, Sql_condition::WARN_LEVEL_WARN,
+				m_thd, Sql_condition::WARN_LEVEL_WARN,
 				ER_CANT_CREATE_TABLE,
-				"Error creating table '%s' with "
-				"column '%s'. Please check its "
-				"column type and try to re-create "
-				"the table with an appropriate "
-				"column type.",
-				table->name, field->field_name);
+				"Error creating table '%s' with"
+				" column '%s'. Please check its"
+				" column type and try to re-create"
+				" the table with an appropriate"
+				" column type.",
+				table->name.m_name, field->field_name);
 			goto err_col;
 		}
 
@@ -10742,24 +12203,30 @@ create_table_def(
 
 			charset_no = (ulint) field->charset()->number;
 
-			if (UNIV_UNLIKELY(charset_no > MAX_CHAR_COLL_NUM)) {
+			DBUG_EXECUTE_IF("simulate_max_char_col",
+					charset_no = MAX_CHAR_COLL_NUM + 1;
+					);
+
+			if (charset_no > MAX_CHAR_COLL_NUM) {
 				/* in data0type.h we assume that the
 				number fits in one byte in prtype */
 				push_warning_printf(
-					thd, Sql_condition::WARN_LEVEL_WARN,
+					m_thd, Sql_condition::WARN_LEVEL_WARN,
 					ER_CANT_CREATE_TABLE,
 					"In InnoDB, charset-collation codes"
 					" must be below 256."
 					" Unsupported code %lu.",
 					(ulong) charset_no);
 				mem_heap_free(heap);
+				dict_mem_table_free(table);
+
+				ut_ad(trx_state_eq(
+					m_trx, TRX_STATE_NOT_STARTED));
+
 				DBUG_RETURN(ER_CANT_CREATE_TABLE);
 			}
 		}
 
-		/* we assume in dtype_form_prtype() that this fits in
-		two bytes */
-		ut_a(static_cast<uint>(field->type()) <= MAX_CHAR_COLL_NUM);
 		col_len = field->pack_length();
 
 		/* The MySQL pack length contains 1 or 2 bytes length field
@@ -10777,37 +12244,266 @@ create_table_def(
 			}
 		}
 
+		if (col_type == DATA_POINT) {
+			col_len = DATA_POINT_LEN;
+		}
+
+		is_virtual = (innobase_is_v_fld(field)) ? DATA_VIRTUAL : 0;
+		is_stored = innobase_is_s_fld(field);
+
 		/* First check whether the column to be added has a
 		system reserved name. */
-		if (dict_col_name_is_reserved(field->field_name)){
+		if (dict_col_name_is_reserved(field_name)){
 			my_error(ER_WRONG_COLUMN_NAME, MYF(0),
-				 field->field_name);
+				 field_name);
 err_col:
 			dict_mem_table_free(table);
 			mem_heap_free(heap);
-			trx_commit_for_mysql(trx);
+			ut_ad(trx_state_eq(m_trx, TRX_STATE_NOT_STARTED));
 
 			err = DB_ERROR;
 			goto error_ret;
 		}
 
-		dict_mem_table_add_col(table, heap,
-			field->field_name,
-			col_type,
-			dtype_form_prtype(
-				(ulint) field->type()
-				| nulls_allowed | unsigned_type
-				| binary_type | long_true_varchar,
-				charset_no),
-			col_len);
+		if (!is_virtual) {
+			dict_mem_table_add_col(table, heap,
+				field_name, col_type,
+				dtype_form_prtype(
+					(ulint) field->type()
+					| nulls_allowed | unsigned_type
+					| binary_type | long_true_varchar,
+					charset_no),
+				col_len);
+		} else {
+#ifdef MYSQL_VIRTUAL_COLUMNS
+			dict_mem_table_add_v_col(table, heap,
+				field_name, col_type,
+				dtype_form_prtype(
+					(ulint) field->type()
+					| nulls_allowed | unsigned_type
+					| binary_type | long_true_varchar
+					| is_virtual,
+					charset_no),
+				col_len, i,
+				0);
+
+				field->gcol_info->non_virtual_base_columns());
+#endif
 	}
 
+#ifdef MYSQL_VIRTUAL_COLUMNS
+		if (is_stored) {
+			ut_ad(!is_virtual);
+			/* Added stored column in m_s_cols list. */
+			dict_mem_table_add_s_col(
+				table,
+				field->gcol_info->non_virtual_base_columns());
+		}
+#endif
+	}
+#ifdef MYSQL_VIRTUAL_COLUMNS
+	if (num_v) {
+		ulint		j = 0;
+		for (i = 0; i < n_cols; i++) {
+			dict_v_col_t*	v_col;
+
+			Field*	field = m_form->field[i];
+
+			if (!innobase_is_v_fld(field)) {
+				continue;
+			}
+
+			v_col = dict_table_get_nth_v_col(table, j);
+
+			j++;
+
+			innodb_base_col_setup(table, field, v_col);
+		}
+	}
+
+	/** Fill base columns for the stored column present in the list. */
+	if (table->s_cols && table->s_cols->size()) {
+
+		for (i = 0; i < n_cols; i++) {
+			Field*  field = m_form->field[i];
+
+			if (!innobase_is_s_fld(field)) {
+				continue;
+			}
+
+			dict_s_col_list::iterator       it;
+			for (it = table->s_cols->begin();
+			     it != table->s_cols->end(); ++it) {
+				dict_s_col_t	s_col = *it;
+
+				if (s_col.s_pos == i) {
+					innodb_base_col_setup_for_stored(
+						table, field, &s_col);
+					break;
+				}
+			}
+		}
+	}
+#endif /* MYSQL_VIRTUAL_COLUMNS */
+
 	/* Add the FTS doc_id hidden column. */
-	if (flags2 & DICT_TF2_FTS && !has_doc_id_col) {
+	if (m_flags2 & DICT_TF2_FTS && !has_doc_id_col) {
 		fts_add_doc_id_column(table, heap);
 	}
 
-	err = row_create_table_for_mysql(table, trx, false, mode, key_id);
+	ut_ad(trx_state_eq(m_trx, TRX_STATE_NOT_STARTED));
+
+	/* If temp table, then we avoid creation of entries in SYSTEM TABLES.
+	Given that temp table lifetime is limited to connection/server lifetime
+	on re-start we don't need to restore temp-table and so no entry is
+	needed in SYSTEM tables. */
+	if (dict_table_is_temporary(table)) {
+#ifdef MYSQL_COMPRESSION
+		if (m_create_info->compress.length > 0) {
+			push_warning_printf(
+				m_thd,
+				Sql_condition::WARN_LEVEL_WARN,
+				HA_ERR_UNSUPPORTED,
+				"InnoDB: Compression not supported for "
+				"temporary tables");
+
+			err = DB_UNSUPPORTED;
+
+			dict_mem_table_free(table);
+		} else if (m_create_info->encrypt_type.length > 0
+			   && !Encryption::is_none(
+				   m_create_info->encrypt_type.str)) {
+
+			my_error(ER_TABLESPACE_CANNOT_ENCRYPT, MYF(0));
+			err = DB_UNSUPPORTED;
+			dict_mem_table_free(table);
+			*/
+		} else {
+#endif /* MYSQL_COMPRESSION */
+			/* Get a new table ID */
+			dict_table_assign_new_id(table, m_trx);
+
+			/* Create temp tablespace if configured. */
+			err = dict_build_tablespace_for_table(table, NULL);
+
+			if (err == DB_SUCCESS) {
+				/* Temp-table are maintained in memory and so
+				can_be_evicted is FALSE. */
+				mem_heap_t* temp_table_heap;
+
+				temp_table_heap = mem_heap_create(256);
+
+				/* For intrinsic table (given that they are
+				not shared beyond session scope), add
+				it to session specific THD structure
+				instead of adding it to dictionary cache. */
+				if (dict_table_is_intrinsic(table)) {
+					add_table_to_thread_cache(
+						table, temp_table_heap, m_thd);
+
+				} else {
+					dict_table_add_to_cache(
+						table, FALSE, temp_table_heap);
+				}
+
+				DBUG_EXECUTE_IF("ib_ddl_crash_during_create2",
+						DBUG_SUICIDE(););
+
+				mem_heap_free(temp_table_heap);
+			}
+#ifdef MYSQL_COMPRESSION
+		}
+#endif
+
+	} else {
+		const char*     algorithm = NULL;
+
+#if MYSQL_COMPRESSION
+		const char*     algorithm = m_create_info->compress.str;
+
+		if (!(m_flags2 & DICT_TF2_USE_FILE_PER_TABLE)
+		    && m_create_info->compress.length > 0
+		    && !Compression::is_none(algorithm)) {
+
+			push_warning_printf(
+				m_thd,
+				Sql_condition::WARN_LEVEL_WARN,
+				HA_ERR_UNSUPPORTED,
+				"InnoDB: Compression not supported for "
+				"shared tablespaces");
+
+			algorithm = NULL;
+
+			err = DB_UNSUPPORTED;
+			dict_mem_table_free(table);
+
+		} else if (Compression::validate(algorithm) != DB_SUCCESS
+			   || m_form->s->row_type == ROW_TYPE_COMPRESSED
+			   || m_create_info->key_block_size > 0) {
+
+			algorithm = NULL;
+		}
+
+		const char*	encrypt = m_create_info->encrypt_type.str;
+
+		if (!(m_flags2 & DICT_TF2_USE_FILE_PER_TABLE)
+		    && m_create_info->encrypt_type.length > 0
+		    && !Encryption::is_none(encrypt)) {
+
+			my_error(ER_TABLESPACE_CANNOT_ENCRYPT, MYF(0));
+			err = DB_UNSUPPORTED;
+			dict_mem_table_free(table);
+
+		} else if (!Encryption::is_none(encrypt)) {
+			/* Set the encryption flag. */
+			byte*			master_key = NULL;
+			ulint			master_key_id;
+			Encryption::Version	version;
+
+			/* Check if keyring is ready. */
+			Encryption::get_master_key(&master_key_id,
+						   &master_key,
+						   &version);
+
+			if (master_key == NULL) {
+				my_error(ER_CANNOT_FIND_KEY_IN_KEYRING,
+					 MYF(0));
+				err = DB_UNSUPPORTED;
+				dict_mem_table_free(table);
+			} else {
+				my_free(master_key);
+				DICT_TF2_FLAG_SET(table,
+						  DICT_TF2_ENCRYPTION);
+			}
+		}
+#endif /* MYSQL_COMPRESSION */
+
+		if (err == DB_SUCCESS) {
+			err = row_create_table_for_mysql(
+				table, algorithm, m_trx, false,
+				(fil_encryption_t)options->encryption,
+				options->encryption_key_id);
+
+		}
+
+		if (err == DB_IO_NO_PUNCH_HOLE_FS) {
+
+			ut_ad(!dict_table_in_shared_tablespace(table));
+
+			push_warning_printf(
+				m_thd,
+				Sql_condition::WARN_LEVEL_WARN,
+				HA_ERR_UNSUPPORTED,
+				"InnoDB: Punch hole not supported by the "
+				"file system or the tablespace page size "
+				"is not large enough. Compression disabled");
+
+			err = DB_SUCCESS;
+		}
+
+		DBUG_EXECUTE_IF("ib_crash_during_create_for_encryption",
+				DBUG_SUICIDE(););
+	}
 
 	mem_heap_free(heap);
 
@@ -10818,8 +12514,8 @@ err_col:
 		char display_name[FN_REFLEN];
 		char* buf_end = innobase_convert_identifier(
 			display_name, sizeof(display_name) - 1,
-			table_name, strlen(table_name),
-			thd, TRUE);
+			m_table_name, strlen(m_table_name),
+			m_thd);
 
 		*buf_end = '\0';
 
@@ -10828,17 +12524,17 @@ err_col:
 			 : ER_TABLESPACE_EXISTS, MYF(0), display_name);
 	}
 
-	if (err == DB_SUCCESS && (flags2 & DICT_TF2_FTS)) {
+	if (err == DB_SUCCESS && (m_flags2 & DICT_TF2_FTS)) {
 		fts_optimize_add_table(table);
 	}
 
 error_ret:
-	DBUG_RETURN(convert_error_code_to_mysql(err, flags, thd));
+	DBUG_RETURN(convert_error_code_to_mysql(err, m_flags, m_thd));
 }
 
 /*****************************************************************//**
 Creates an index in an InnoDB database. */
-static
+inline
 int
 create_index(
 /*=========*/
@@ -10862,20 +12558,36 @@ create_index(
 	/* Assert that "GEN_CLUST_INDEX" cannot be used as non-primary index */
 	ut_a(innobase_strcasecmp(key->name, innobase_index_reserve_name) != 0);
 
-	if (key->flags & HA_FULLTEXT) {
+	ind_type = 0;
+	if (key->flags & HA_SPATIAL) {
+		ind_type = DICT_SPATIAL;
+	} else if (key->flags & HA_FULLTEXT) {
+		ind_type = DICT_FTS;
+	}
+
+	if (ind_type != 0)
+	{
 		index = dict_mem_index_create(table_name, key->name, 0,
-					      DICT_FTS,
+					      ind_type,
 					      key->user_defined_key_parts);
 
 		for (ulint i = 0; i < key->user_defined_key_parts; i++) {
 			KEY_PART_INFO*	key_part = key->key_part + i;
+
+			/* We do not support special (Fulltext or Spatial)
+			index on virtual columns */
+			if (innobase_is_v_fld(key_part->field)) {
+				ut_ad(0);
+				DBUG_RETURN(HA_ERR_UNSUPPORTED);
+			}
+
 			dict_mem_index_add_field(
 				index, key_part->field->field_name, 0);
 		}
 
 		DBUG_RETURN(convert_error_code_to_mysql(
 				    row_create_index_for_mysql(
-					    index, trx, NULL),
+					    index, trx, NULL, NULL),
 				    flags, NULL));
 
 	}
@@ -10890,6 +12602,12 @@ create_index(
 		ind_type |= DICT_UNIQUE;
 	}
 
+	/* JAN: TODO: MySQL 5.7 PSI
+	field_lengths = (ulint*) my_malloc(PSI_INSTRUMENT_ME,
+		key->user_defined_key_parts * sizeof *
+				field_lengths, MYF(MY_FAE));
+	*/
+
 	field_lengths = (ulint*) my_malloc(
 		key->user_defined_key_parts * sizeof *
 				field_lengths, MYF(MY_FAE));
@@ -10900,6 +12618,26 @@ create_index(
 	index = dict_mem_index_create(table_name, key->name, 0,
 				      ind_type, key->user_defined_key_parts);
 
+	innodb_session_t*& priv = thd_to_innodb_session(trx->mysql_thd);
+	dict_table_t* handler = priv->lookup_table_handler(table_name);
+
+	if (handler != NULL) {
+		/* This setting will enforce SQL NULL == SQL NULL.
+		For now this is turned-on for intrinsic tables
+		only but can be turned on for other tables if needed arises. */
+		index->nulls_equal =
+			(key->flags & HA_NULL_ARE_EQUAL) ? true : false;
+
+		/* Disable use of AHI for intrinsic table indexes as AHI
+		validates the predicated entry using index-id which has to be
+		system-wide unique that is not the case with indexes of
+		intrinsic table for performance reason.
+		Also given the lifetime of these tables and frequent delete
+		and update AHI would not help on performance front as it does
+		with normal tables. */
+		index->disable_ahi = true;
+	}
+
 	for (ulint i = 0; i < key->user_defined_key_parts; i++) {
 		KEY_PART_INFO*	key_part = key->key_part + i;
 		ulint		prefix_len;
@@ -10912,29 +12650,29 @@ create_index(
 		specified number of first bytes of the column to
 		the index field.) The flag does not seem to be
 		properly set by MySQL. Let us fall back on testing
-		the length of the key part versus the column. */
+		the length of the key part versus the column.
+		We first reach to the table's column; if the index is on a
+		prefix, key_part->field is not the table's column (it's a
+		"fake" field forged in open_table_from_share() with length
+		equal to the length of the prefix); so we have to go to
+		form->fied. */
+		Field*	field= form->field[key_part->field->field_index];
+		if (field == NULL)
+		  ut_error;
 
-		Field*	field = NULL;
+		const char*	field_name = key_part->field->field_name;
+		if (handler != NULL && dict_table_is_intrinsic(handler)) {
 
-		for (ulint j = 0; j < form->s->fields; j++) {
-
-			field = form->field[j];
-
-			if (0 == innobase_strcasecmp(
-				    field->field_name,
-				    key_part->field->field_name)) {
-				/* Found the corresponding column */
-
-				goto found;
-			}
+			ut_ad(!innobase_is_v_fld(key_part->field));
+			ulint	col_no = dict_col_get_no(dict_table_get_nth_col(
+					handler, key_part->field->field_index));
+			field_name = dict_table_get_col_name(handler, col_no);
 		}
 
-		ut_error;
-found:
 		col_type = get_innobase_type_from_mysql_type(
 			&is_unsigned, key_part->field);
 
-		if (DATA_BLOB == col_type
+		if (DATA_LARGE_MTYPE(col_type)
 		    || (key_part->length < field->pack_length()
 			&& field->type() != MYSQL_TYPE_VARCHAR)
 		    || (field->type() == MYSQL_TYPE_VARCHAR
@@ -10950,10 +12688,10 @@ found:
 			case DATA_DOUBLE:
 			case DATA_DECIMAL:
 				sql_print_error(
-					"MySQL is trying to create a column "
-					"prefix index field, on an "
-					"inappropriate data type. Table "
-					"name %s, column name %s.",
+					"MySQL is trying to create a column"
+					" prefix index field, on an"
+					" inappropriate data type. Table"
+					" name %s, column name %s.",
 					table_name,
 					key_part->field->field_name);
 
@@ -10965,8 +12703,11 @@ found:
 
 		field_lengths[i] = key_part->length;
 
-		dict_mem_index_add_field(
-			index, key_part->field->field_name, prefix_len);
+		if (innobase_is_v_fld(key_part->field)) {
+			index->type |= DICT_VIRTUAL;
+		}
+
+		dict_mem_index_add_field(index, field_name, prefix_len);
 	}
 
 	ut_ad(key->flags & HA_FULLTEXT || !(index->type & DICT_FTS));
@@ -10976,9 +12717,13 @@ found:
 	sure we don't create too long indexes. */
 
 	error = convert_error_code_to_mysql(
-		row_create_index_for_mysql(index, trx, field_lengths),
+		row_create_index_for_mysql(index, trx, field_lengths, handler),
 		flags, NULL);
 
+	if (error && handler != NULL) {
+		priv->unregister_table_handler(table_name);
+	}
+
 	my_free(field_lengths);
 
 	DBUG_RETURN(error);
@@ -10987,7 +12732,7 @@ found:
 /*****************************************************************//**
 Creates an index to an InnoDB table when the user has defined no
 primary index. */
-static
+inline
 int
 create_clustered_index_when_no_primary(
 /*===================================*/
@@ -11004,19 +12749,36 @@ create_clustered_index_when_no_primary(
 				      innobase_index_reserve_name,
 				      0, DICT_CLUSTERED, 0);
 
-	error = row_create_index_for_mysql(index, trx, NULL);
+	innodb_session_t*& priv = thd_to_innodb_session(trx->mysql_thd);
+
+	dict_table_t* handler = priv->lookup_table_handler(table_name);
+
+	if (handler != NULL) {
+		/* Disable use of AHI for intrinsic table indexes as AHI
+		validates the predicated entry using index-id which has to be
+		system-wide unique that is not the case with indexes of
+		intrinsic table for performance reason.
+		Also given the lifetime of these tables and frequent delete
+		and update AHI would not help on performance front as it does
+		with normal tables. */
+		index->disable_ahi = true;
+	}
+
+	error = row_create_index_for_mysql(index, trx, NULL, handler);
+
+	if (error != DB_SUCCESS && handler != NULL) {
+		priv->unregister_table_handler(table_name);
+	}
 
 	return(convert_error_code_to_mysql(error, flags, NULL));
 }
 
-/*****************************************************************//**
-Return a display name for the row format
+/** Return a display name for the row format
+@param[in]	row_format	Row Format
 @return row format name */
-UNIV_INTERN
 const char*
 get_row_format_name(
-/*================*/
-	enum row_type	row_format)		/*!< in: Row Format */
+	enum row_type	row_format)
 {
 	switch (row_format) {
 	case ROW_TYPE_COMPACT:
@@ -11038,75 +12800,376 @@ get_row_format_name(
 	return("NOT USED");
 }
 
-/** If file-per-table is missing, issue warning and set ret false */
-#define CHECK_ERROR_ROW_TYPE_NEEDS_FILE_PER_TABLE(use_tablespace)\
-	if (!use_tablespace) {					\
-		push_warning_printf(				\
-			thd, Sql_condition::WARN_LEVEL_WARN,	\
-			ER_ILLEGAL_HA_CREATE_OPTION,		\
-			"InnoDB: ROW_FORMAT=%s requires"	\
-			" innodb_file_per_table.",		\
-			get_row_format_name(row_format));	\
-		ret = "ROW_FORMAT";					\
-	}
-
-/** If file-format is Antelope, issue warning and set ret false */
-#define CHECK_ERROR_ROW_TYPE_NEEDS_GT_ANTELOPE			\
-	if (srv_file_format < UNIV_FORMAT_B) {		\
-		push_warning_printf(				\
-			thd, Sql_condition::WARN_LEVEL_WARN,	\
-			ER_ILLEGAL_HA_CREATE_OPTION,		\
-			"InnoDB: ROW_FORMAT=%s requires"	\
-			" innodb_file_format > Antelope.",	\
-			get_row_format_name(row_format));	\
-		ret = "ROW_FORMAT";				\
-	}
-
-
-/*****************************************************************//**
-Validates the create options. We may build on this function
-in future. For now, it checks two specifiers:
-KEY_BLOCK_SIZE and ROW_FORMAT
-If innodb_strict_mode is not set then this function is a no-op
-@return	NULL if valid, string if not. */
-UNIV_INTERN
-const char*
-create_options_are_invalid(
-/*=======================*/
-	THD*		thd,		/*!< in: connection thread. */
-	TABLE*		form,		/*!< in: information on table
-					columns and indexes */
-	HA_CREATE_INFO*	create_info,	/*!< in: create info. */
-	bool		use_tablespace)	/*!< in: srv_file_per_table */
+/** Validate DATA DIRECTORY option.
+@return true if valid, false if not. */
+bool
+create_table_info_t::create_option_data_directory_is_valid()
 {
-	ibool	kbs_specified	= FALSE;
-	const char*	ret	= NULL;
-	enum row_type	row_format	= form->s->row_type;
+	bool		is_valid = true;
 
-	ut_ad(thd != NULL);
+	ut_ad(m_create_info->data_file_name
+	      && m_create_info->data_file_name[0] != '\0');
 
-	/* If innodb_strict_mode is not set don't do any validation. */
-	if (!(THDVAR(thd, strict_mode))) {
+	/* Use DATA DIRECTORY only with file-per-table. */
+	if (!m_use_shared_space && !m_allow_file_per_table) {
+		push_warning(
+			m_thd, Sql_condition::WARN_LEVEL_WARN,
+			ER_ILLEGAL_HA_CREATE_OPTION,
+			"InnoDB: DATA DIRECTORY requires"
+			" innodb_file_per_table.");
+		is_valid = false;
+	}
+
+	/* Do not use DATA DIRECTORY with TEMPORARY TABLE. */
+	if (m_create_info->options & HA_LEX_CREATE_TMP_TABLE) {
+		push_warning(
+			m_thd, Sql_condition::WARN_LEVEL_WARN,
+			ER_ILLEGAL_HA_CREATE_OPTION,
+			"InnoDB: DATA DIRECTORY cannot be used"
+			" for TEMPORARY tables.");
+		is_valid = false;
+	}
+
+	/* We check for a DATA DIRECTORY mixed with TABLESPACE in
+	create_option_tablespace_is_valid(), no need to here. */
+
+	return(is_valid);
+}
+
+
+#define IDENT_NAME_OK 0
+static int check_tablespace_name(const char *name)
+{
+  CHARSET_INFO *cs= system_charset_info;
+  return cs->cset->numchars(cs, name, name + strlen(name)) > NAME_CHAR_LEN;
+}
+
+
+/** Validate the tablespace name provided for a tablespace DDL
+@param[in]	name		A proposed tablespace name
+@param[in]	for_table	Caller is putting a table here
+@return MySQL handler error code like HA_... */
+static
+int
+validate_tablespace_name(
+	const char*	name,
+	bool		for_table)
+{
+	int	err = 0;
+
+	/* This prefix is reserved by InnoDB for use in internal tablespace names. */
+	const char reserved_space_name_prefix[] = "innodb_";
+
+	if (check_tablespace_name(name) != IDENT_NAME_OK) {
+		err = HA_WRONG_CREATE_OPTION;
+	}
+
+	/* The tablespace name cannot start with `innodb_`. */
+	if (strlen(name) >= sizeof(reserved_space_name_prefix) - 1
+	    && 0 == memcmp(name, reserved_space_name_prefix,
+			   sizeof(reserved_space_name_prefix) - 1)) {
+
+		/* Use a different message for reserved names */
+		if (0 == strcmp(name, reserved_file_per_table_space_name)
+		    || 0 == strcmp(name, reserved_system_space_name)
+		    || 0 == strcmp(name, reserved_temporary_space_name)) {
+			/* Allow these names if the caller is putting a
+			table into one of these by CREATE/ALTER TABLE */
+			if (!for_table) {
+				my_printf_error(
+					ER_WRONG_TABLESPACE_NAME,
+					"InnoDB: `%s` is a reserved"
+					" tablespace name.",
+					MYF(0), name);
+				err = HA_WRONG_CREATE_OPTION;
+			}
+		} else {
+			my_printf_error(
+				ER_WRONG_TABLESPACE_NAME,
+					"InnoDB: A general tablespace"
+					" name cannot start with `%s`.",
+					MYF(0), reserved_space_name_prefix);
+			err = HA_WRONG_CREATE_OPTION;
+		}
+	}
+
+	/* The tablespace name cannot contain a '/'. */
+	if (memchr(name, '/', strlen(name)) != NULL) {
+				my_printf_error(ER_WRONG_TABLESPACE_NAME,
+			"InnoDB: A general tablespace name cannot"
+			" contain '/'.", MYF(0));
+		err = HA_WRONG_CREATE_OPTION;
+	}
+
+	return(err);
+}
+
+/** Validate TABLESPACE option.
+@return true if valid, false if not. */
+bool
+create_table_info_t::create_option_tablespace_is_valid()
+{
+	if (!m_use_shared_space) {
+		return(true);
+	}
+
+	if (0 != validate_tablespace_name(m_create_info->tablespace, true)) {
+		return(false);
+	}
+
+	/* Look up the tablespace name in the fil_system. */
+	ulint	space_id = fil_space_get_id_by_name(
+		m_create_info->tablespace);
+
+	if (space_id == ULINT_UNDEFINED) {
+		my_printf_error(ER_TABLESPACE_MISSING,
+			"InnoDB: A general tablespace named"
+			" `%s` cannot be found.", MYF(0),
+			m_create_info->tablespace);
+		return(false);
+	}
+
+	/* Cannot add a second table to a file-per-table tablespace. */
+	ulint	fsp_flags = fil_space_get_flags(space_id);
+	if (fsp_is_file_per_table(space_id, fsp_flags)) {
+		my_printf_error(ER_ILLEGAL_HA_CREATE_OPTION,
+			"InnoDB: Tablespace `%s` is file-per-table so no"
+			" other table can be added to it.", MYF(0),
+			m_create_info->tablespace);
+		return(false);
+	}
+
+	/* If TABLESPACE=innodb_file_per_table this function is not called
+	since tablespace_is_shared_space() will return false.  Any other
+	tablespace is incompatible with the DATA DIRECTORY phrase.
+	On any ALTER TABLE that contains a DATA DIRECTORY, MySQL will issue
+	a warning like "<DATA DIRECTORY> option ignored." The check below is
+	needed for CREATE TABLE only. ALTER TABLE may be moving remote
+	file-per-table table to a general tablespace, in which case the
+	create_info->data_file_name is not null. */
+	bool	is_create_table = (thd_sql_command(m_thd) == SQLCOM_CREATE_TABLE);
+	if (is_create_table
+	    && m_create_info->data_file_name != NULL
+	    && m_create_info->data_file_name[0] != '\0') {
+		my_printf_error(ER_ILLEGAL_HA_CREATE_OPTION,
+			"InnoDB: DATA DIRECTORY cannot be used"
+			" with a TABLESPACE assignment.", MYF(0));
+		return(false);
+	}
+
+	/* Temp tables only belong in temp tablespaces. */
+	if (m_create_info->options & HA_LEX_CREATE_TMP_TABLE) {
+		if (!FSP_FLAGS_GET_TEMPORARY(fsp_flags)) {
+			my_printf_error(ER_ILLEGAL_HA_CREATE_OPTION,
+				"InnoDB: Tablespace `%s` cannot contain"
+				" TEMPORARY tables.", MYF(0),
+				m_create_info->tablespace);
+			return(false);
+		}
+
+		/* Restrict Compressed Temporary General tablespaces. */
+		if (m_create_info->key_block_size
+		    || m_create_info->row_type == ROW_TYPE_COMPRESSED) {
+			my_printf_error(ER_ILLEGAL_HA_CREATE_OPTION,
+				"InnoDB: Temporary tablespace `%s` cannot"
+				" contain COMPRESSED tables.", MYF(0),
+				m_create_info->tablespace);
+			return(false);
+		}
+	} else if (FSP_FLAGS_GET_TEMPORARY(fsp_flags)) {
+		my_printf_error(ER_ILLEGAL_HA_CREATE_OPTION,
+			"InnoDB: Tablespace `%s` can only contain"
+			" TEMPORARY tables.", MYF(0),
+			m_create_info->tablespace);
+		return(false);
+	}
+
+	/* Make sure the physical page size of the table matches the
+	file block size of the tablespace. */
+	ulint	block_size_needed;
+	bool	table_is_compressed;
+	if (m_create_info->key_block_size) {
+		block_size_needed = m_create_info->key_block_size * 1024;
+		table_is_compressed = true;
+	} else if (m_create_info->row_type == ROW_TYPE_COMPRESSED) {
+		block_size_needed = ut_min(
+			UNIV_PAGE_SIZE / 2,
+			static_cast<ulint>(UNIV_ZIP_SIZE_MAX));
+		table_is_compressed = true;
+	} else {
+		block_size_needed = UNIV_PAGE_SIZE;
+		table_is_compressed = false;
+	}
+
+	const page_size_t	page_size(fsp_flags);
+
+	/* The compression code needs some work in order for a general
+	tablespace to contain both compressed and non-compressed tables
+	together in the same tablespace.  The problem seems to be that
+	each page is either compressed or not based on the fsp flags,
+	which is shared by all tables in that general tablespace. */
+	if (table_is_compressed && page_size.physical() == UNIV_PAGE_SIZE) {
+		my_printf_error(ER_ILLEGAL_HA_CREATE_OPTION,
+			"InnoDB: Tablespace `%s` cannot contain a"
+			" COMPRESSED table", MYF(0),
+			m_create_info->tablespace);
+		return(false);
+	}
+
+	if (block_size_needed != page_size.physical()) {
+		my_printf_error(ER_ILLEGAL_HA_CREATE_OPTION,
+			"InnoDB: Tablespace `%s` uses block size " ULINTPF
+			" and cannot contain a table with physical"
+			" page size " ULINTPF, MYF(0),
+			m_create_info->tablespace,
+			page_size.physical(),
+			block_size_needed);
+		return(false);
+	}
+
+	return(true);
+}
+
+#ifdef MYSQL_COMPRESSION
+/** Validate the COPMRESSION option.
+@return true if valid, false if not. */
+bool
+create_table_info_t::create_option_compression_is_valid()
+{
+	dberr_t		err;
+	Compression	compression;
+
+	if (m_create_info->compress.length == 0) {
+		return(true);
+	}
+
+	err = Compression::check(m_create_info->compress.str, &compression);
+
+	if (err == DB_UNSUPPORTED) {
+		push_warning_printf(
+			m_thd,
+			Sql_condition::WARN_LEVEL_WARN,
+			ER_UNSUPPORTED_EXTENSION,
+			"InnoDB: Unsupported compression algorithm '%s'",
+			m_create_info->compress.str);
+		return(false);
+	}
+
+	/* Allow Compression=NONE on any tablespace or row format. */
+	if (compression.m_type == Compression::NONE) {
+		return(true);
+	}
+
+	static char intro[] = "InnoDB: Page Compression is not supported";
+
+	if (m_create_info->key_block_size != 0
+	    || m_create_info->row_type == ROW_TYPE_COMPRESSED) {
+		push_warning_printf(
+			m_thd, Sql_condition::WARN_LEVEL_WARN,
+			ER_UNSUPPORTED_EXTENSION,
+			"%s with row_format=compressed or"
+			" key_block_size > 0", intro);
+		return(false);
+	}
+
+	if (m_create_info->options & HA_LEX_CREATE_TMP_TABLE) {
+		push_warning_printf(
+			m_thd, Sql_condition::WARN_LEVEL_WARN,
+			HA_ERR_UNSUPPORTED,
+			"%s for temporary tables", intro);
+		return(false);
+	}
+
+	if (tablespace_is_general_space(m_create_info)) {
+		push_warning_printf(
+			m_thd, Sql_condition::WARN_LEVEL_WARN,
+			HA_ERR_UNSUPPORTED,
+			"%s for shared general tablespaces", intro);
+		return(false);
+	}
+
+	/* The only non-file-per-table tablespace left is the system space. */
+	if (!m_use_file_per_table) {
+		push_warning_printf(
+			m_thd, Sql_condition::WARN_LEVEL_WARN,
+			HA_ERR_UNSUPPORTED,
+			"%s for the system tablespace", intro);
+		return(false);
+	}
+
+	return(true);
+}
+#endif /* MYSQL_COMPRESSION */
+
+/** Validate the create options. Check that the options KEY_BLOCK_SIZE,
+ROW_FORMAT, DATA DIRECTORY, TEMPORARY & TABLESPACE are compatible with
+each other and other settings.  These CREATE OPTIONS are not validated
+here unless innodb_strict_mode is on. With strict mode, this function
+will report each problem it finds using a custom message with error
+code ER_ILLEGAL_HA_CREATE_OPTION, not its built-in message.
+@return NULL if valid, string name of bad option if not. */
+const char*
+create_table_info_t::create_options_are_invalid()
+{
+	bool	has_key_block_size = (m_create_info->key_block_size != 0);
+
+	const char*	ret = NULL;
+	enum row_type	row_format	= m_create_info->row_type;
+
+	ut_ad(m_thd != NULL);
+	ut_ad(m_create_info != NULL);
+
+	/* The TABLESPACE designation on a CREATE TABLE is not subject to
+	non-strict-mode.  If it is incorrect or is incompatible with other
+	options, then we will return an error. Make sure the tablespace exists
+	and is compatible with this table */
+	if (!create_option_tablespace_is_valid()) {
+		return("TABLESPACE");
+	}
+
+	/* If innodb_strict_mode is not set don't do any more validation.
+	Also, if this table is being put into a shared general tablespace
+	we ALWAYS act like strict mode is ON. */
+	if (!m_use_shared_space && !(THDVAR(m_thd, strict_mode))) {
 		return(NULL);
 	}
 
-	ut_ad(form != NULL);
-	ut_ad(create_info != NULL);
-
-	/* First check if a non-zero KEY_BLOCK_SIZE was specified. */
-	if (create_info->key_block_size) {
-		kbs_specified = TRUE;
-		switch (create_info->key_block_size) {
+	/* Check if a non-zero KEY_BLOCK_SIZE was specified. */
+	if (has_key_block_size) {
+		switch (m_create_info->key_block_size) {
 			ulint	kbs_max;
 		case 1:
 		case 2:
 		case 4:
 		case 8:
 		case 16:
+			/* The maximum KEY_BLOCK_SIZE (KBS) is
+			UNIV_PAGE_SIZE_MAX. But if UNIV_PAGE_SIZE is
+			smaller than UNIV_PAGE_SIZE_MAX, the maximum
+			KBS is also smaller. */
+			kbs_max = ut_min(
+				1 << (UNIV_PAGE_SSIZE_MAX - 1),
+				1 << (PAGE_ZIP_SSIZE_MAX - 1));
+			if (m_create_info->key_block_size > kbs_max) {
+				push_warning_printf(
+					m_thd, Sql_condition::WARN_LEVEL_WARN,
+					ER_ILLEGAL_HA_CREATE_OPTION,
+					"InnoDB: KEY_BLOCK_SIZE=%ld"
+					" cannot be larger than %ld.",
+					m_create_info->key_block_size,
+					kbs_max);
+				ret = "KEY_BLOCK_SIZE";
+			}
+
+			/* The following checks do not appy to shared tablespaces */
+			if (m_use_shared_space) {
+				break;
+			}
+
 			/* Valid KEY_BLOCK_SIZE, check its dependencies. */
-			if (!use_tablespace) {
+			if (!m_allow_file_per_table) {
 				push_warning(
-					thd, Sql_condition::WARN_LEVEL_WARN,
+					m_thd, Sql_condition::WARN_LEVEL_WARN,
 					ER_ILLEGAL_HA_CREATE_OPTION,
 					"InnoDB: KEY_BLOCK_SIZE requires"
 					" innodb_file_per_table.");
@@ -11114,58 +13177,77 @@ create_options_are_invalid(
 			}
 			if (srv_file_format < UNIV_FORMAT_B) {
 				push_warning(
-					thd, Sql_condition::WARN_LEVEL_WARN,
+					m_thd, Sql_condition::WARN_LEVEL_WARN,
 					ER_ILLEGAL_HA_CREATE_OPTION,
 					"InnoDB: KEY_BLOCK_SIZE requires"
 					" innodb_file_format > Antelope.");
 				ret = "KEY_BLOCK_SIZE";
 			}
-
-			/* The maximum KEY_BLOCK_SIZE (KBS) is 16. But if
-			UNIV_PAGE_SIZE is smaller than 16k, the maximum
-			KBS is also smaller. */
-			kbs_max = ut_min(
-				1 << (UNIV_PAGE_SSIZE_MAX - 1),
-				1 << (PAGE_ZIP_SSIZE_MAX - 1));
-			if (create_info->key_block_size > kbs_max) {
-				push_warning_printf(
-					thd, Sql_condition::WARN_LEVEL_WARN,
-					ER_ILLEGAL_HA_CREATE_OPTION,
-					"InnoDB: KEY_BLOCK_SIZE=%ld"
-					" cannot be larger than %ld.",
-					create_info->key_block_size,
-					kbs_max);
-				ret = "KEY_BLOCK_SIZE";
-			}
 			break;
 		default:
 			push_warning_printf(
-				thd, Sql_condition::WARN_LEVEL_WARN,
+				m_thd, Sql_condition::WARN_LEVEL_WARN,
 				ER_ILLEGAL_HA_CREATE_OPTION,
 				"InnoDB: invalid KEY_BLOCK_SIZE = %lu."
 				" Valid values are [1, 2, 4, 8, 16]",
-				create_info->key_block_size);
+				m_create_info->key_block_size);
 			ret = "KEY_BLOCK_SIZE";
 			break;
 		}
 	}
 
-	/* Check for a valid Innodb ROW_FORMAT specifier and
+	/* Check for a valid InnoDB ROW_FORMAT specifier and
 	other incompatibilities. */
 	switch (row_format) {
 	case ROW_TYPE_COMPRESSED:
-		CHECK_ERROR_ROW_TYPE_NEEDS_FILE_PER_TABLE(use_tablespace);
-		CHECK_ERROR_ROW_TYPE_NEEDS_GT_ANTELOPE;
+		if (!m_use_shared_space) {
+			if (!m_allow_file_per_table) {
+				push_warning_printf(
+					m_thd, Sql_condition::WARN_LEVEL_WARN,
+					ER_ILLEGAL_HA_CREATE_OPTION,
+					"InnoDB: ROW_FORMAT=%s requires"
+					" innodb_file_per_table.",
+					get_row_format_name(row_format));
+				ret = "ROW_FORMAT";
+			}
+			if (srv_file_format < UNIV_FORMAT_B) {
+				push_warning_printf(
+					m_thd, Sql_condition::WARN_LEVEL_WARN,
+					ER_ILLEGAL_HA_CREATE_OPTION,
+					"InnoDB: ROW_FORMAT=%s requires"
+					" innodb_file_format > Antelope.",
+					get_row_format_name(row_format));
+				ret = "ROW_FORMAT";
+			}
+		}
 		break;
 	case ROW_TYPE_DYNAMIC:
-		CHECK_ERROR_ROW_TYPE_NEEDS_FILE_PER_TABLE(use_tablespace);
-		CHECK_ERROR_ROW_TYPE_NEEDS_GT_ANTELOPE;
-		/* fall through since dynamic also shuns KBS */
+		if (!m_use_shared_space) {
+			if (!m_allow_file_per_table) {
+				push_warning_printf(
+					m_thd, Sql_condition::WARN_LEVEL_WARN,
+					ER_ILLEGAL_HA_CREATE_OPTION,
+					"InnoDB: ROW_FORMAT=%s requires"
+					" innodb_file_per_table.",
+					get_row_format_name(row_format));
+				ret = "ROW_FORMAT";
+			}
+			if (srv_file_format < UNIV_FORMAT_B) {
+				push_warning_printf(
+					m_thd, Sql_condition::WARN_LEVEL_WARN,
+					ER_ILLEGAL_HA_CREATE_OPTION,
+					"InnoDB: ROW_FORMAT=%s requires"
+					" innodb_file_format > Antelope.",
+					get_row_format_name(row_format));
+				ret = "ROW_FORMAT";
+			}
+		}
+		/* FALLTRHOUGH */
 	case ROW_TYPE_COMPACT:
 	case ROW_TYPE_REDUNDANT:
-		if (kbs_specified) {
+		if (has_key_block_size) {
 			push_warning_printf(
-				thd, Sql_condition::WARN_LEVEL_WARN,
+				m_thd, Sql_condition::WARN_LEVEL_WARN,
 				ER_ILLEGAL_HA_CREATE_OPTION,
 				"InnoDB: cannot specify ROW_FORMAT = %s"
 				" with KEY_BLOCK_SIZE.",
@@ -11179,86 +13261,259 @@ create_options_are_invalid(
 	case ROW_TYPE_PAGE:
 	case ROW_TYPE_NOT_USED:
 		push_warning(
-			thd, Sql_condition::WARN_LEVEL_WARN,
-			ER_ILLEGAL_HA_CREATE_OPTION,		\
+			m_thd, Sql_condition::WARN_LEVEL_WARN,
+			ER_ILLEGAL_HA_CREATE_OPTION,
 			"InnoDB: invalid ROW_FORMAT specifier.");
 		ret = "ROW_TYPE";
 		break;
 	}
 
-	/* Use DATA DIRECTORY only with file-per-table. */
-	if (create_info->data_file_name && !use_tablespace) {
-		push_warning(
-			thd, Sql_condition::WARN_LEVEL_WARN,
-			ER_ILLEGAL_HA_CREATE_OPTION,
-			"InnoDB: DATA DIRECTORY requires"
-			" innodb_file_per_table.");
-		ret = "DATA DIRECTORY";
-	}
-
-	/* Do not use DATA DIRECTORY with TEMPORARY TABLE. */
-	if (create_info->data_file_name
-	    && create_info->options & HA_LEX_CREATE_TMP_TABLE) {
-		push_warning(
-			thd, Sql_condition::WARN_LEVEL_WARN,
-			ER_ILLEGAL_HA_CREATE_OPTION,
-			"InnoDB: DATA DIRECTORY cannot be used"
-			" for TEMPORARY tables.");
+	if (m_create_info->data_file_name
+	    && m_create_info->data_file_name[0] != '\0'
+	    && !create_option_data_directory_is_valid()) {
 		ret = "DATA DIRECTORY";
 	}
 
 	/* Do not allow INDEX_DIRECTORY */
-	if (create_info->index_file_name) {
+	if (m_create_info->index_file_name) {
 		push_warning_printf(
-			thd, Sql_condition::WARN_LEVEL_WARN,
+			m_thd, Sql_condition::WARN_LEVEL_WARN,
 			ER_ILLEGAL_HA_CREATE_OPTION,
 			"InnoDB: INDEX DIRECTORY is not supported");
 		ret = "INDEX DIRECTORY";
 	}
 
-	if ((kbs_specified || row_format == ROW_TYPE_COMPRESSED)
-		&& UNIV_PAGE_SIZE > (1<<14)) {
-		push_warning(
-			thd, Sql_condition::WARN_LEVEL_WARN,
-			ER_ILLEGAL_HA_CREATE_OPTION,
-			"InnoDB: Cannot create a COMPRESSED table"
-			" when innodb_page_size > 16k.");
+	/* Don't support compressed table when page size > 16k. */
+	if ((has_key_block_size || row_format == ROW_TYPE_COMPRESSED)
+	    && UNIV_PAGE_SIZE > UNIV_PAGE_SIZE_DEF) {
+		push_warning(m_thd, Sql_condition::WARN_LEVEL_WARN,
+			     ER_ILLEGAL_HA_CREATE_OPTION,
+			     "InnoDB: Cannot create a COMPRESSED table"
+			     " when innodb_page_size > 16k.");
 
-		if (kbs_specified) {
+		if (has_key_block_size) {
 			ret = "KEY_BLOCK_SIZE";
 		} else {
 			ret = "ROW_TYPE";
 		}
 	}
 
+#ifdef MYSQL_COMPRESSION
+	/* Validate the page compression parameter. */
+	if (!create_option_compression_is_valid()) {
+		return("COMPRESSION");
+	}
+
+	/* Check the encryption option. */
+	if (ret == NULL && m_create_info->encrypt_type.length > 0) {
+		dberr_t		err;
+
+		err = Encryption::validate(m_create_info->encrypt_type.str);
+
+		if (err == DB_UNSUPPORTED) {
+			my_error(ER_INVALID_ENCRYPTION_OPTION, MYF(0));
+			ret = "ENCRYPTION";
+		}
+	}
+#endif
+
 	return(ret);
 }
 
+/*****************************************************************//**
+Check engine specific table options not handled by SQL-parser.
+@return	NULL if valid, string if not */
+const char*
+create_table_info_t::check_table_options()
+{
+	enum row_type	row_format = m_form->s->row_type;
+	ha_table_option_struct *options= m_form->s->option_struct;
+	atomic_writes_t awrites = (atomic_writes_t)options->atomic_writes;
+	fil_encryption_t encrypt = (fil_encryption_t)options->encryption;
+
+	if (encrypt != FIL_SPACE_ENCRYPTION_DEFAULT && !m_allow_file_per_table) {
+		push_warning(
+			m_thd, Sql_condition::WARN_LEVEL_WARN,
+			HA_WRONG_CREATE_OPTION,
+			"InnoDB: ENCRYPTED requires innodb_file_per_table");
+		return "ENCRYPTED";
+ 	}
+
+	if (encrypt == FIL_SPACE_ENCRYPTION_OFF && srv_encrypt_tables == 2) {
+		push_warning(
+			m_thd, Sql_condition::WARN_LEVEL_WARN,
+			HA_WRONG_CREATE_OPTION,
+			"InnoDB: ENCRYPTED=OFF cannot be used when innodb_encrypt_tables=FORCE");
+		return "ENCRYPTED";
+	}
+
+	/* Check page compression requirements */
+	if (options->page_compressed) {
+
+		if (row_format == ROW_TYPE_COMPRESSED) {
+			push_warning(
+				m_thd, Sql_condition::WARN_LEVEL_WARN,
+				HA_WRONG_CREATE_OPTION,
+				"InnoDB: PAGE_COMPRESSED table can't have"
+				" ROW_TYPE=COMPRESSED");
+			return "PAGE_COMPRESSED";
+		}
+
+		if (row_format == ROW_TYPE_REDUNDANT) {
+			push_warning(
+				m_thd, Sql_condition::WARN_LEVEL_WARN,
+				HA_WRONG_CREATE_OPTION,
+				"InnoDB: PAGE_COMPRESSED table can't have"
+				" ROW_TYPE=REDUNDANT");
+			return "PAGE_COMPRESSED";
+		}
+
+		if (!m_allow_file_per_table) {
+			push_warning(
+				m_thd, Sql_condition::WARN_LEVEL_WARN,
+				HA_WRONG_CREATE_OPTION,
+				"InnoDB: PAGE_COMPRESSED requires"
+				" innodb_file_per_table.");
+			return "PAGE_COMPRESSED";
+		}
+
+		if (srv_file_format < UNIV_FORMAT_B) {
+			push_warning(
+				m_thd, Sql_condition::WARN_LEVEL_WARN,
+				HA_WRONG_CREATE_OPTION,
+				"InnoDB: PAGE_COMPRESSED requires"
+				" innodb_file_format > Antelope.");
+			return "PAGE_COMPRESSED";
+		}
+
+		if (m_create_info->key_block_size) {
+			push_warning(
+				m_thd, Sql_condition::WARN_LEVEL_WARN,
+				HA_WRONG_CREATE_OPTION,
+				"InnoDB: PAGE_COMPRESSED table can't have"
+				" key_block_size");
+			return "PAGE_COMPRESSED";
+		}
+	}
+
+	/* Check page compression level requirements, some of them are
+	already checked above */
+	if (options->page_compression_level != 0) {
+		if (options->page_compressed == false) {
+			push_warning(
+				m_thd, Sql_condition::WARN_LEVEL_WARN,
+				HA_WRONG_CREATE_OPTION,
+				"InnoDB: PAGE_COMPRESSION_LEVEL requires"
+				" PAGE_COMPRESSED");
+			return "PAGE_COMPRESSION_LEVEL";
+		}
+
+		if (options->page_compression_level < 1 || options->page_compression_level > 9) {
+			push_warning_printf(
+				m_thd, Sql_condition::WARN_LEVEL_WARN,
+				HA_WRONG_CREATE_OPTION,
+				"InnoDB: invalid PAGE_COMPRESSION_LEVEL = %lu."
+				" Valid values are [1, 2, 3, 4, 5, 6, 7, 8, 9]",
+				options->page_compression_level);
+			return "PAGE_COMPRESSION_LEVEL";
+		}
+	}
+
+	/* If encryption is set up make sure that used key_id is found */
+	if (encrypt == FIL_SPACE_ENCRYPTION_ON ||
+		(encrypt == FIL_SPACE_ENCRYPTION_DEFAULT && srv_encrypt_tables)) {
+		if (!encryption_key_id_exists((unsigned int)options->encryption_key_id)) {
+			push_warning_printf(
+				m_thd, Sql_condition::WARN_LEVEL_WARN,
+				HA_WRONG_CREATE_OPTION,
+				"InnoDB: ENCRYPTION_KEY_ID %u not available",
+				(uint)options->encryption_key_id
+			);
+			return "ENCRYPTION_KEY_ID";
+		}
+	}
+
+	/* Ignore nondefault key_id if encryption is set off */
+	if (encrypt == FIL_SPACE_ENCRYPTION_OFF &&
+		options->encryption_key_id != THDVAR(m_thd, default_encryption_key_id)) {
+		push_warning_printf(
+			m_thd, Sql_condition::WARN_LEVEL_WARN,
+			HA_WRONG_CREATE_OPTION,
+			"InnoDB: Ignored ENCRYPTION_KEY_ID %u when encryption is disabled",
+			(uint)options->encryption_key_id
+		);
+		options->encryption_key_id = FIL_DEFAULT_ENCRYPTION_KEY;
+	}
+
+	/* If default encryption is used make sure that used kay is found
+	from key file. */
+	if (encrypt == FIL_SPACE_ENCRYPTION_DEFAULT &&
+		!srv_encrypt_tables &&
+		options->encryption_key_id != FIL_DEFAULT_ENCRYPTION_KEY) {
+		if (!encryption_key_id_exists((unsigned int)options->encryption_key_id)) {
+			push_warning_printf(
+				m_thd, Sql_condition::WARN_LEVEL_WARN,
+				HA_WRONG_CREATE_OPTION,
+				"InnoDB: ENCRYPTION_KEY_ID %u not available",
+				(uint)options->encryption_key_id
+			);
+			return "ENCRYPTION_KEY_ID";
+
+		}
+	}
+
+	/* Check atomic writes requirements */
+	if (awrites == ATOMIC_WRITES_ON ||
+		(awrites == ATOMIC_WRITES_DEFAULT && srv_use_atomic_writes)) {
+		if (!m_allow_file_per_table) {
+			push_warning(
+				m_thd, Sql_condition::WARN_LEVEL_WARN,
+				HA_WRONG_CREATE_OPTION,
+				"InnoDB: ATOMIC_WRITES requires"
+				" innodb_file_per_table.");
+			return "ATOMIC_WRITES";
+		}
+	}
+
+	return NULL;
+}
+
 /*****************************************************************//**
 Update create_info.  Used in SHOW CREATE TABLE et al. */
-UNIV_INTERN
+
 void
 ha_innobase::update_create_info(
 /*============================*/
 	HA_CREATE_INFO*	create_info)	/*!< in/out: create info */
 {
 	if (!(create_info->used_fields & HA_CREATE_USED_AUTO)) {
-		ha_innobase::info(HA_STATUS_AUTO);
+		info(HA_STATUS_AUTO);
 		create_info->auto_increment_value = stats.auto_increment_value;
 	}
 
 	/* Update the DATA DIRECTORY name from SYS_DATAFILES. */
-	dict_get_and_save_data_dir_path(prebuilt->table, false);
+	dict_get_and_save_data_dir_path(m_prebuilt->table, false);
 
-	if (prebuilt->table->data_dir_path) {
-		create_info->data_file_name = prebuilt->table->data_dir_path;
+	if (m_prebuilt->table->data_dir_path) {
+		create_info->data_file_name = m_prebuilt->table->data_dir_path;
+	}
+
+	/* Update the TABLESPACE name from the Data Dictionary. */
+	dict_get_and_save_space_name(m_prebuilt->table, false);
+
+	/* Put this tablespace name into the create_info structure so that
+	SHOW CREATE TABLE will display TABLESPACE=name.  This also affects
+	an ALTER TABLE which must know the current TABLESPACE so that the
+	table will stay there. */
+	if (m_prebuilt->table->tablespace != NULL
+	    && create_info->tablespace == NULL) {
+		create_info->tablespace = m_prebuilt->table->tablespace;
 	}
 }
 
 /*****************************************************************//**
 Initialize the table FTS stopword list
 @return TRUE if success */
-UNIV_INTERN
 ibool
 innobase_fts_load_stopword(
 /*=======================*/
@@ -11272,29 +13527,17 @@ innobase_fts_load_stopword(
 				 THDVAR(thd, ft_enable_stopword), FALSE));
 }
 
-/*****************************************************************//**
-Parses the table name into normal name and either temp path or remote path
+/** Parse the table name into normal name and either temp path or remote path
 if needed.
-@return	0 if successful, otherwise, error number */
-UNIV_INTERN
+@param[in]	name	Table name (db/table or full path).
+@return 0 if successful, otherwise, error number */
 int
-ha_innobase::parse_table_name(
-/*==========================*/
-	const char*	name,		/*!< in/out: table name provided*/
-	HA_CREATE_INFO*	create_info,	/*!< in: more information of the
-					created table, contains also the
-					create statement string */
-	ulint		flags,		/*!< in: flags*/
-	ulint		flags2,		/*!< in: flags2*/
-	char*		norm_name,	/*!< out: normalized table name */
-	char*		temp_path,	/*!< out: absolute path of table */
-	char*		remote_path)	/*!< out: remote path of table */
+create_table_info_t::parse_table_name(
+	const char*		name)
 {
-	THD*		thd = ha_thd();
-	bool		use_tablespace = flags2 & DICT_TF2_USE_TABLESPACE;
-	DBUG_ENTER("ha_innobase::parse_table_name");
+	DBUG_ENTER("parse_table_name");
 
-#ifdef __WIN__
+#ifdef _WIN32
 	/* Names passed in from server are in two formats:
 	1. <database_name>/<table_name>: for normal table creation
 	2. full path: for temp table creation, or DATA DIRECTORY.
@@ -11306,9 +13549,9 @@ ha_innobase::parse_table_name(
 	returns error if it is in full path format, but not creating a temp.
 	table. Currently InnoDB does not support symbolic link on Windows. */
 
-	if (use_tablespace
+	if (m_innodb_file_per_table
 	    && !mysqld_embedded
-	    && !(create_info->options & HA_LEX_CREATE_TMP_TABLE)) {
+	    && !(m_create_info->options & HA_LEX_CREATE_TMP_TABLE)) {
 
 		if ((name[1] == ':')
 		    || (name[0] == '\\' && name[1] == '\\')) {
@@ -11318,83 +13561,77 @@ ha_innobase::parse_table_name(
 	}
 #endif
 
-	normalize_table_name(norm_name, name);
-	temp_path[0] = '\0';
-	remote_path[0] = '\0';
+	m_temp_path[0] = '\0';
+	m_remote_path[0] = '\0';
+	m_tablespace[0] = '\0';
 
-	/* A full path is used for TEMPORARY TABLE and DATA DIRECTORY.
-	In the case of;
-	  CREATE TEMPORARY TABLE ... DATA DIRECTORY={path} ... ;
-	We ignore the DATA DIRECTORY. */
-	if (create_info->options & HA_LEX_CREATE_TMP_TABLE) {
-		strncpy(temp_path, name, FN_REFLEN - 1);
+	/* A full path is provided by the server for TEMPORARY tables not
+	targeted for a tablespace or when DATA DIRECTORY is given.
+	So these two are not compatible.  Likewise, DATA DIRECTORY is not
+	compatible with a TABLESPACE assignment. */
+	if ((m_create_info->options & HA_LEX_CREATE_TMP_TABLE)
+	    && !m_use_shared_space) {
+		strncpy(m_temp_path, name, FN_REFLEN - 1);
 	}
 
-	if (create_info->data_file_name) {
-		bool ignore = false;
-
-		/* Use DATA DIRECTORY only with file-per-table. */
-		if (!use_tablespace) {
-			push_warning(
-				thd, Sql_condition::WARN_LEVEL_WARN,
-				ER_ILLEGAL_HA_CREATE_OPTION,
-				"InnoDB: DATA DIRECTORY requires"
-				" innodb_file_per_table.");
-			ignore = true;
-		}
-
-		/* Do not use DATA DIRECTORY with TEMPORARY TABLE. */
-		if (create_info->options & HA_LEX_CREATE_TMP_TABLE) {
-			push_warning(
-				thd, Sql_condition::WARN_LEVEL_WARN,
-				ER_ILLEGAL_HA_CREATE_OPTION,
-				"InnoDB: DATA DIRECTORY cannot be"
-				" used for TEMPORARY tables.");
-			ignore = true;
-		}
-
-		if (ignore) {
-			my_error(WARN_OPTION_IGNORED, ME_JUST_WARNING,
+	/* Make sure DATA DIRECTORY is compatible with other options
+	and set the remote path.  In the case of either;
+	  CREATE TEMPORARY TABLE ... DATA DIRECTORY={path} ... ;
+	  CREATE TABLE ... DATA DIRECTORY={path} TABLESPACE={name}... ;
+	we ignore the DATA DIRECTORY. */
+	if (m_create_info->data_file_name
+	    && m_create_info->data_file_name[0] != '\0') {
+		if (!create_option_data_directory_is_valid()) {
+			push_warning_printf(
+				m_thd, Sql_condition::WARN_LEVEL_WARN,
+				WARN_OPTION_IGNORED,
+				ER_DEFAULT(WARN_OPTION_IGNORED),
 				"DATA DIRECTORY");
+
+			m_flags &= ~DICT_TF_MASK_DATA_DIR;
 		} else {
-			strncpy(remote_path, create_info->data_file_name,
+			strncpy(m_remote_path,
+				m_create_info->data_file_name,
 				FN_REFLEN - 1);
 		}
 	}
 
-	if (create_info->index_file_name) {
+	if (m_create_info->index_file_name) {
 		my_error(WARN_OPTION_IGNORED, ME_JUST_WARNING,
 			"INDEX DIRECTORY");
 	}
 
+	/* The TABLESPACE designation has already been validated by
+	create_option_tablespace_is_valid() irregardless of strict-mode.
+	So it only needs to be copied now. */
+	if (m_use_shared_space) {
+		strncpy(m_tablespace, m_create_info->tablespace,
+			NAME_LEN - 1);
+	}
+
 	DBUG_RETURN(0);
 }
 
-/*****************************************************************//**
-Determines InnoDB table flags.
+/** Determine InnoDB table flags.
+If strict_mode=OFF, this will adjust the flags to what should be assumed.
+However, if an existing general tablespace is being targeted, we will NOT
+assume anything or adjust these flags.
 @retval true if successful, false if error */
-UNIV_INTERN
 bool
-innobase_table_flags(
-/*=================*/
-	const TABLE*		form,		/*!< in: table */
-	const HA_CREATE_INFO*	create_info,	/*!< in: information
-						on table columns and indexes */
-	THD*			thd,		/*!< in: connection */
-	bool			use_tablespace,	/*!< in: whether to create
-						outside system tablespace */
-	ulint*			flags,		/*!< out: DICT_TF flags */
-	ulint*			flags2)		/*!< out: DICT_TF2 flags */
+create_table_info_t::innobase_table_flags()
 {
 	DBUG_ENTER("innobase_table_flags");
 
 	const char*	fts_doc_id_index_bad = NULL;
 	bool		zip_allowed = true;
 	ulint		zip_ssize = 0;
-	enum row_type	row_format;
-	rec_format_t	innodb_row_format = REC_FORMAT_COMPACT;
-	bool		use_data_dir;
-	ha_table_option_struct *options= form->s->option_struct;
+	enum row_type	row_type;
+	rec_format_t	innodb_row_format =
+		get_row_format(innodb_default_row_format);
+
+	const ulint	zip_ssize_max =
+		ut_min(static_cast<ulint>(UNIV_PAGE_SSIZE_MAX),
+		       static_cast<ulint>(PAGE_ZIP_SSIZE_MAX));
 
 	/* Cache the value of innodb_file_format, in case it is
 	modified by another thread while the table is being created. */
@@ -11404,37 +13641,69 @@ innobase_table_flags(
 	modified by another thread while the table is being created. */
 	const ulint     default_compression_level = page_zip_level;
 
-	*flags = 0;
-	*flags2 = 0;
+	ha_table_option_struct *options= m_form->s->option_struct;
+
+	m_flags = 0;
+	m_flags2 = 0;
+
+#ifdef MYSQL_COMPRESSION
+	/* Validate the page compression parameter. */
+	if (!create_option_compression_is_valid()) {
+		/* No need to do anything. Warnings were issued.
+		The compresion setting will be ignored later.
+		If inodb_strict_mode=ON, this is called twice unless
+		there was a problem before.
+		If inodb_strict_mode=OFF, this is the only call. */
+	}
+#endif
+
+#ifdef MYSQL_ENCRYPTION
+	/* Validate the page encryption parameter. */
+	if (m_create_info->encrypt_type.length > 0) {
+
+		const char* encryption = m_create_info->encrypt_type.str;
+
+		if (Encryption::validate(encryption) != DB_SUCCESS) {
+			/* Incorrect encryption option */
+			my_error(ER_INVALID_ENCRYPTION_OPTION, MYF(0));
+			DBUG_RETURN(false);
+		}
+
+		if (m_use_shared_space
+		    || (m_create_info->options & HA_LEX_CREATE_TMP_TABLE)) {
+			if (!Encryption::is_none(encryption)) {
+				/* Can't encrypt shared tablespace */
+				my_error(ER_TABLESPACE_CANNOT_ENCRYPT, MYF(0));
+				DBUG_RETURN(false);
+			}
+		}
+	}
+#endif /* MYSQL_ENCRYPTION */
 
 	/* Check if there are any FTS indexes defined on this table. */
-	for (uint i = 0; i < form->s->keys; i++) {
-		const KEY*	key = &form->key_info[i];
+	for (uint i = 0; i < m_form->s->keys; i++) {
+		const KEY*	key = &m_form->key_info[i];
 
 		if (key->flags & HA_FULLTEXT) {
-			*flags2 |= DICT_TF2_FTS;
+			m_flags2 |= DICT_TF2_FTS;
 
 			/* We don't support FTS indexes in temporary
 			tables. */
-			if (create_info->options & HA_LEX_CREATE_TMP_TABLE) {
+			if (m_create_info->options & HA_LEX_CREATE_TMP_TABLE) {
 
 				my_error(ER_INNODB_NO_FT_TEMP_TABLE, MYF(0));
 				DBUG_RETURN(false);
 			}
 
-			if (key->flags & HA_USES_PARSER) {
-				my_error(ER_INNODB_NO_FT_USES_PARSER, MYF(0));
-                                DBUG_RETURN(false);
-			}
-
-			if (key->flags & HA_USES_PARSER) {
-				my_error(ER_INNODB_NO_FT_USES_PARSER, MYF(0));
-                                DBUG_RETURN(false);
-			}
-
 			if (fts_doc_id_index_bad) {
 				goto index_bad;
 			}
+		} else if (key->flags & HA_SPATIAL) {
+			if (m_create_info->options & HA_LEX_CREATE_TMP_TABLE
+			    && !m_use_file_per_table) {
+				my_error(ER_TABLE_CANT_HANDLE_SPKEYS, MYF(0));
+				DBUG_RETURN(false);
+			}
 		}
 
 		if (innobase_strcasecmp(key->name, FTS_DOC_ID_INDEX_NAME)) {
@@ -11449,7 +13718,7 @@ innobase_table_flags(
 			fts_doc_id_index_bad = key->name;
 		}
 
-		if (fts_doc_id_index_bad && (*flags2 & DICT_TF2_FTS)) {
+		if (fts_doc_id_index_bad && (m_flags2 & DICT_TF2_FTS)) {
 index_bad:
 			my_error(ER_INNODB_FT_WRONG_DOCID_INDEX, MYF(0),
 				 fts_doc_id_index_bad);
@@ -11457,61 +13726,62 @@ index_bad:
 		}
 	}
 
-	row_format = form->s->row_type;
+	//rec_format_t row_format = m_form->s->row_type;
 
-	if (create_info->key_block_size) {
+	if (m_create_info->key_block_size > 0) {
 		/* The requested compressed page size (key_block_size)
 		is given in kilobytes. If it is a valid number, store
 		that value as the number of log2 shifts from 512 in
 		zip_ssize. Zero means it is not compressed. */
-		ulint zssize;		/* Zip Shift Size */
-		ulint kbsize;		/* Key Block Size */
+		ulint	zssize;		/* Zip Shift Size */
+		ulint	kbsize;		/* Key Block Size */
 		for (zssize = kbsize = 1;
-		     zssize <= ut_min(UNIV_PAGE_SSIZE_MAX,
-			     	      PAGE_ZIP_SSIZE_MAX);
+		     zssize <= zip_ssize_max;
 		     zssize++, kbsize <<= 1) {
-			if (kbsize == create_info->key_block_size) {
+			if (kbsize == m_create_info->key_block_size) {
 				zip_ssize = zssize;
 				break;
 			}
 		}
 
 		/* Make sure compressed row format is allowed. */
-		if (!use_tablespace) {
+		if (!m_allow_file_per_table && !m_use_shared_space) {
 			push_warning(
-				thd, Sql_condition::WARN_LEVEL_WARN,
+				m_thd, Sql_condition::WARN_LEVEL_WARN,
 				ER_ILLEGAL_HA_CREATE_OPTION,
 				"InnoDB: KEY_BLOCK_SIZE requires"
 				" innodb_file_per_table.");
-			zip_allowed = FALSE;
+			zip_allowed = false;
 		}
 
-		if (file_format_allowed < UNIV_FORMAT_B) {
+		if (file_format_allowed < UNIV_FORMAT_B
+		    && !m_use_shared_space) {
 			push_warning(
-				thd, Sql_condition::WARN_LEVEL_WARN,
+				m_thd, Sql_condition::WARN_LEVEL_WARN,
 				ER_ILLEGAL_HA_CREATE_OPTION,
 				"InnoDB: KEY_BLOCK_SIZE requires"
 				" innodb_file_format > Antelope.");
-			zip_allowed = FALSE;
+			zip_allowed = false;
 		}
 
 		if (!zip_allowed
-			|| zssize > ut_min(UNIV_PAGE_SSIZE_MAX,
-					   PAGE_ZIP_SSIZE_MAX)) {
+		    || zssize > zip_ssize_max) {
 			push_warning_printf(
-				thd, Sql_condition::WARN_LEVEL_WARN,
+				m_thd, Sql_condition::WARN_LEVEL_WARN,
 				ER_ILLEGAL_HA_CREATE_OPTION,
 				"InnoDB: ignoring KEY_BLOCK_SIZE=%lu.",
-				create_info->key_block_size);
+				m_create_info->key_block_size);
 		}
 	}
 
+	row_type = m_form->s->row_type;
+
 	if (zip_ssize && zip_allowed) {
 		/* if ROW_FORMAT is set to default,
-		automatically change it to COMPRESSED.*/
-		if (row_format == ROW_TYPE_DEFAULT) {
-			row_format = ROW_TYPE_COMPRESSED;
-		} else if (row_format != ROW_TYPE_COMPRESSED) {
+		automatically change it to COMPRESSED. */
+		if (row_type == ROW_TYPE_DEFAULT) {
+			row_type = ROW_TYPE_COMPRESSED;
+		} else if (row_type != ROW_TYPE_COMPRESSED) {
 			/* ROW_FORMAT other than COMPRESSED
 			ignores KEY_BLOCK_SIZE.  It does not
 			make sense to reject conflicting
@@ -11519,84 +13789,89 @@ index_bad:
 			such combinations can be obtained
 			with ALTER TABLE anyway. */
 			push_warning_printf(
-				thd, Sql_condition::WARN_LEVEL_WARN,
+				m_thd, Sql_condition::WARN_LEVEL_WARN,
 				ER_ILLEGAL_HA_CREATE_OPTION,
 				"InnoDB: ignoring KEY_BLOCK_SIZE=%lu"
 				" unless ROW_FORMAT=COMPRESSED.",
-				create_info->key_block_size);
-			zip_allowed = FALSE;
+				m_create_info->key_block_size);
+			zip_allowed = false;
 		}
 	} else {
-		/* zip_ssize == 0 means no KEY_BLOCK_SIZE.*/
-		if (row_format == ROW_TYPE_COMPRESSED && zip_allowed) {
+		/* zip_ssize == 0 means no KEY_BLOCK_SIZE. */
+		if (row_type == ROW_TYPE_COMPRESSED && zip_allowed) {
 			/* ROW_FORMAT=COMPRESSED without KEY_BLOCK_SIZE
 			implies half the maximum KEY_BLOCK_SIZE(*1k) or
 			UNIV_PAGE_SIZE, whichever is less. */
-			zip_ssize = ut_min(UNIV_PAGE_SSIZE_MAX,
-					   PAGE_ZIP_SSIZE_MAX) - 1;
+			zip_ssize = zip_ssize_max - 1;
 		}
 	}
 
 	/* Validate the row format.  Correct it if necessary */
-	switch (row_format) {
+
+	switch (row_type) {
 	case ROW_TYPE_REDUNDANT:
 		innodb_row_format = REC_FORMAT_REDUNDANT;
 		break;
+	case ROW_TYPE_COMPACT:
+		innodb_row_format = REC_FORMAT_COMPACT;
+		break;
 	case ROW_TYPE_COMPRESSED:
-	case ROW_TYPE_DYNAMIC:
-		if (!use_tablespace) {
+		/* ROW_FORMAT=COMPRESSED requires file_per_table and
+		file_format=Barracuda unless there is a target tablespace. */
+		if (!m_allow_file_per_table
+		    && !m_use_shared_space) {
 			push_warning_printf(
-				thd, Sql_condition::WARN_LEVEL_WARN,
+				m_thd, Sql_condition::WARN_LEVEL_WARN,
 				ER_ILLEGAL_HA_CREATE_OPTION,
-				"InnoDB: ROW_FORMAT=%s requires"
-				" innodb_file_per_table.",
-				get_row_format_name(row_format));
-		} else if (file_format_allowed == UNIV_FORMAT_A) {
+				"InnoDB: ROW_FORMAT=COMPRESSED requires"
+				" innodb_file_per_table.");
+
+		} else if (file_format_allowed == UNIV_FORMAT_A
+			   && !m_use_shared_space) {
 			push_warning_printf(
-				thd, Sql_condition::WARN_LEVEL_WARN,
+				m_thd, Sql_condition::WARN_LEVEL_WARN,
 				ER_ILLEGAL_HA_CREATE_OPTION,
-				"InnoDB: ROW_FORMAT=%s requires"
-				" innodb_file_format > Antelope.",
-				get_row_format_name(row_format));
+				"InnoDB: ROW_FORMAT=COMPRESSED requires"
+				" innodb_file_format > Antelope.");
+
 		} else {
-			switch(row_format) {
-			  case ROW_TYPE_COMPRESSED:
-			    innodb_row_format = REC_FORMAT_COMPRESSED;
-			    break;
-			  case ROW_TYPE_DYNAMIC:
-			    innodb_row_format = REC_FORMAT_DYNAMIC;
-                            break;
-			  default:
-			    /* Not possible, avoid compiler warning */
-			    break;
+			switch(row_type) {
+			case ROW_TYPE_COMPRESSED:
+				innodb_row_format = REC_FORMAT_COMPRESSED;
+				break;
+			case ROW_TYPE_DYNAMIC:
+				innodb_row_format = REC_FORMAT_DYNAMIC;
+				break;
+			default:
+				/* Not possible, avoid compiler warning */
+				break;
 			}
 			break; /* Correct row_format */
 		}
-		zip_allowed = FALSE;
-		/* fall through to set row_format = COMPACT */
+		zip_allowed = false;
+		/* fall through to set row_type = DYNAMIC */
 	case ROW_TYPE_NOT_USED:
 	case ROW_TYPE_FIXED:
 	case ROW_TYPE_PAGE:
 		push_warning(
-			thd, Sql_condition::WARN_LEVEL_WARN,
+			m_thd, Sql_condition::WARN_LEVEL_WARN,
 			ER_ILLEGAL_HA_CREATE_OPTION,
-			"InnoDB: assuming ROW_FORMAT=COMPACT.");
-	case ROW_TYPE_DEFAULT:
-		/* If we fell through, set row format to Compact. */
-		row_format = ROW_TYPE_COMPACT;
-	case ROW_TYPE_COMPACT:
+			"InnoDB: assuming ROW_FORMAT=DYNAMIC.");
+	case ROW_TYPE_DYNAMIC:
+		innodb_row_format = REC_FORMAT_DYNAMIC;
 		break;
+	case ROW_TYPE_DEFAULT:
+		;
 	}
 
 	/* Don't support compressed table when page size > 16k. */
 	if (zip_allowed && zip_ssize && UNIV_PAGE_SIZE > UNIV_PAGE_SIZE_DEF) {
-		push_warning(
-			thd, Sql_condition::WARN_LEVEL_WARN,
-			ER_ILLEGAL_HA_CREATE_OPTION,
-			"InnoDB: Cannot create a COMPRESSED table"
-			" when innodb_page_size > 16k."
-			" Assuming ROW_FORMAT=COMPACT.");
-		zip_allowed = FALSE;
+		push_warning(m_thd, Sql_condition::WARN_LEVEL_WARN,
+			     ER_ILLEGAL_HA_CREATE_OPTION,
+			     "InnoDB: Cannot create a COMPRESSED table"
+			     " when innodb_page_size > 16k."
+			     " Assuming ROW_FORMAT=DYNAMIC.");
+		zip_allowed = false;
 	}
 
 	/* Set the table flags */
@@ -11604,386 +13879,409 @@ index_bad:
 		zip_ssize = 0;
 	}
 
-	use_data_dir = use_tablespace
-		       && ((create_info->data_file_name != NULL)
-		       && !(create_info->options & HA_LEX_CREATE_TMP_TABLE));
+	if (m_create_info->options & HA_LEX_CREATE_TMP_TABLE) {
+		m_flags2 |= DICT_TF2_TEMPORARY;
+		/* Intrinsic tables reside only in the shared temporary
+		tablespace and we will always use ROW_FORMAT=DYNAMIC. */
 
-	/* Set up table dictionary flags */
-	dict_tf_set(flags,
-		    innodb_row_format,
-		    zip_ssize,
-		    use_data_dir,
-		    options->page_compressed,
-		    options->page_compression_level == 0 ?
-		        default_compression_level : options->page_compression_level,
-		    options->atomic_writes);
+#ifdef MYSQL_COMPRESSION
+		if ((m_create_info->options & HA_LEX_CREATE_INTERNAL_TMP_TABLE)
+		    && !m_use_file_per_table) {
+
+			/* We do not allow compressed instrinsic
+			temporary tables. */
+
+			ut_ad(zip_ssize == 0);
+			m_flags2 |= DICT_TF2_INTRINSIC;
+			innodb_row_format = REC_FORMAT_DYNAMIC;
+		}
+#endif
 
-	if (create_info->options & HA_LEX_CREATE_TMP_TABLE) {
-		*flags2 |= DICT_TF2_TEMPORARY;
 	}
 
-	if (use_tablespace) {
-		*flags2 |= DICT_TF2_USE_TABLESPACE;
+	/* Set the table flags */
+	dict_tf_set(&m_flags, innodb_row_format, zip_ssize,
+			m_use_data_dir, m_use_shared_space,
+			options->page_compressed,
+		    	options->page_compression_level == 0 ?
+		        	default_compression_level : options->page_compression_level,
+		    	options->atomic_writes);
+
+	if (m_use_file_per_table) {
+		ut_ad(!m_use_shared_space);
+		m_flags2 |= DICT_TF2_USE_FILE_PER_TABLE;
 	}
 
 	/* Set the flags2 when create table or alter tables */
-	*flags2 |= DICT_TF2_FTS_AUX_HEX_NAME;
+	m_flags2 |= DICT_TF2_FTS_AUX_HEX_NAME;
 	DBUG_EXECUTE_IF("innodb_test_wrong_fts_aux_table_name",
-			*flags2 &= ~DICT_TF2_FTS_AUX_HEX_NAME;);
+			m_flags2 &= ~DICT_TF2_FTS_AUX_HEX_NAME;);
 
 	DBUG_RETURN(true);
 }
 
-
-/*****************************************************************//**
-Check engine specific table options not handled by SQL-parser.
-@return	NULL if valid, string if not */
-UNIV_INTERN
-const char*
-ha_innobase::check_table_options(
-	THD		*thd,		/*!< in: thread handle */
-	TABLE*		table,		/*!< in: information on table
-					columns and indexes */
-	HA_CREATE_INFO*	create_info,	/*!< in: more information of the
-					created table, contains also the
-					create statement string */
-	const bool	use_tablespace, /*!< in: use file par table */
-	const ulint     file_format)
+/** Parse MERGE_THRESHOLD value from the string.
+@param[in]	thd	connection
+@param[in]	str	string which might include 'MERGE_THRESHOLD='
+@return	value parsed. 0 means not found or invalid value. */
+static
+ulint
+innobase_parse_merge_threshold(
+	THD*		thd,
+	const char*	str)
 {
-	enum row_type	row_format = table->s->row_type;
-	ha_table_option_struct *options= table->s->option_struct;
-	atomic_writes_t awrites = (atomic_writes_t)options->atomic_writes;
-	fil_encryption_t encrypt = (fil_encryption_t)options->encryption;
+	static const char*	label = "MERGE_THRESHOLD=";
+	static const size_t	label_len = strlen(label);
+	const char*		pos = str;
 
-	if (encrypt != FIL_SPACE_ENCRYPTION_DEFAULT && !use_tablespace) {
-		push_warning(
-			thd, Sql_condition::WARN_LEVEL_WARN,
-			HA_WRONG_CREATE_OPTION,
-			"InnoDB: ENCRYPTED requires innodb_file_per_table");
-		return "ENCRYPTED";
- 	}
+	pos = strstr(str, label);
 
-        if (encrypt == FIL_SPACE_ENCRYPTION_OFF && srv_encrypt_tables == 2) {
-		push_warning(
-			thd, Sql_condition::WARN_LEVEL_WARN,
-			HA_WRONG_CREATE_OPTION,
-			"InnoDB: ENCRYPTED=OFF cannot be used when innodb_encrypt_tables=FORCE");
-		return "ENCRYPTED";
+	if (pos == NULL) {
+		return(0);
 	}
 
-	/* Check page compression requirements */
-	if (options->page_compressed) {
+	pos += label_len;
 
-		if (row_format == ROW_TYPE_COMPRESSED) {
-			push_warning(
-				thd, Sql_condition::WARN_LEVEL_WARN,
-				HA_WRONG_CREATE_OPTION,
-				"InnoDB: PAGE_COMPRESSED table can't have"
-				" ROW_TYPE=COMPRESSED");
-			return "PAGE_COMPRESSED";
-		}
+	lint	ret = atoi(pos);
 
-		if (row_format == ROW_TYPE_REDUNDANT) {
-			push_warning(
-				thd, Sql_condition::WARN_LEVEL_WARN,
-				HA_WRONG_CREATE_OPTION,
-				"InnoDB: PAGE_COMPRESSED table can't have"
-				" ROW_TYPE=REDUNDANT");
-			return "PAGE_COMPRESSED";
-		}
-
-		if (!use_tablespace) {
-			push_warning(
-				thd, Sql_condition::WARN_LEVEL_WARN,
-				HA_WRONG_CREATE_OPTION,
-				"InnoDB: PAGE_COMPRESSED requires"
-				" innodb_file_per_table.");
-			return "PAGE_COMPRESSED";
-		}
-
-		if (file_format < UNIV_FORMAT_B) {
-			push_warning(
-				thd, Sql_condition::WARN_LEVEL_WARN,
-				HA_WRONG_CREATE_OPTION,
-				"InnoDB: PAGE_COMPRESSED requires"
-				" innodb_file_format > Antelope.");
-			return "PAGE_COMPRESSED";
-		}
-
-		if (create_info->key_block_size) {
-			push_warning(
-				thd, Sql_condition::WARN_LEVEL_WARN,
-				HA_WRONG_CREATE_OPTION,
-				"InnoDB: PAGE_COMPRESSED table can't have"
-				" key_block_size");
-			return "PAGE_COMPRESSED";
-		}
+	if (ret > 0 && ret <= 50) {
+		return(static_cast<ulint>(ret));
 	}
 
-	/* Check page compression level requirements, some of them are
-	already checked above */
-	if (options->page_compression_level != 0) {
-		if (options->page_compressed == false) {
-			push_warning(
-				thd, Sql_condition::WARN_LEVEL_WARN,
-				HA_WRONG_CREATE_OPTION,
-				"InnoDB: PAGE_COMPRESSION_LEVEL requires"
-				" PAGE_COMPRESSED");
-			return "PAGE_COMPRESSION_LEVEL";
-		}
+	push_warning_printf(
+		thd, Sql_condition::WARN_LEVEL_WARN,
+		ER_ILLEGAL_HA_CREATE_OPTION,
+		"InnoDB: Invalid value for MERGE_THRESHOLD in the CREATE TABLE"
+		" statement. The value is ignored.");
 
-		if (options->page_compression_level < 1 || options->page_compression_level > 9) {
-			push_warning_printf(
-				thd, Sql_condition::WARN_LEVEL_WARN,
-				HA_WRONG_CREATE_OPTION,
-				"InnoDB: invalid PAGE_COMPRESSION_LEVEL = %lu."
-				" Valid values are [1, 2, 3, 4, 5, 6, 7, 8, 9]",
-				options->page_compression_level);
-			return "PAGE_COMPRESSION_LEVEL";
-		}
-	}
-
-	/* If encryption is set up make sure that used key_id is found */
-	if (encrypt == FIL_SPACE_ENCRYPTION_ON ||
-            (encrypt == FIL_SPACE_ENCRYPTION_DEFAULT && srv_encrypt_tables)) {
-		if (!encryption_key_id_exists((unsigned int)options->encryption_key_id)) {
-			push_warning_printf(
-				thd, Sql_condition::WARN_LEVEL_WARN,
-				HA_WRONG_CREATE_OPTION,
-				"InnoDB: ENCRYPTION_KEY_ID %u not available",
-				(uint)options->encryption_key_id
-			);
-			return "ENCRYPTION_KEY_ID";
-		}
-	}
-
-	/* Ignore nondefault key_id if encryption is set off */
-	if (encrypt == FIL_SPACE_ENCRYPTION_OFF &&
-		options->encryption_key_id != THDVAR(thd, default_encryption_key_id)) {
-		push_warning_printf(
-			thd, Sql_condition::WARN_LEVEL_WARN,
-			HA_WRONG_CREATE_OPTION,
-			"InnoDB: Ignored ENCRYPTION_KEY_ID %u when encryption is disabled",
-			(uint)options->encryption_key_id
-		);
-		options->encryption_key_id = FIL_DEFAULT_ENCRYPTION_KEY;
-	}
-
-	/* If default encryption is used make sure that used kay is found
-	from key file. */
-	if (encrypt == FIL_SPACE_ENCRYPTION_DEFAULT &&
-		!srv_encrypt_tables &&
-		options->encryption_key_id != FIL_DEFAULT_ENCRYPTION_KEY) {
-		if (!encryption_key_id_exists((unsigned int)options->encryption_key_id)) {
-			push_warning_printf(
-				thd, Sql_condition::WARN_LEVEL_WARN,
-				HA_WRONG_CREATE_OPTION,
-				"InnoDB: ENCRYPTION_KEY_ID %u not available",
-				(uint)options->encryption_key_id
-			);
-			return "ENCRYPTION_KEY_ID";
-
-		}
-	}
-
-	/* Check atomic writes requirements */
-	if (awrites == ATOMIC_WRITES_ON ||
-		(awrites == ATOMIC_WRITES_DEFAULT && srv_use_atomic_writes)) {
-		if (!use_tablespace) {
-			push_warning(
-				thd, Sql_condition::WARN_LEVEL_WARN,
-				HA_WRONG_CREATE_OPTION,
-				"InnoDB: ATOMIC_WRITES requires"
-				" innodb_file_per_table.");
-			return "ATOMIC_WRITES";
-		}
-	}
-
-	return 0;
+	return(0);
 }
 
-/*****************************************************************//**
-Creates a new table to an InnoDB database.
-@return	error number */
-UNIV_INTERN
-int
-ha_innobase::create(
-/*================*/
-	const char*	name,		/*!< in: table name */
-	TABLE*		form,		/*!< in: information on table
-					columns and indexes */
-	HA_CREATE_INFO*	create_info)	/*!< in: more information of the
-					created table, contains also the
-					create statement string */
+/** Parse hint for table and its indexes, and update the information
+in dictionary.
+@param[in]	thd		connection
+@param[in,out]	table		target table
+@param[in]	table_share	table definition */
+void
+innobase_parse_hint_from_comment(
+	THD*			thd,
+	dict_table_t*		table,
+	const TABLE_SHARE*	table_share)
+{
+	ulint	merge_threshold_table;
+	ulint	merge_threshold_index[MAX_KEY];
+	bool	is_found[MAX_KEY];
+
+	if (table_share->comment.str != NULL) {
+		merge_threshold_table
+			= innobase_parse_merge_threshold(
+				thd, table_share->comment.str);
+	} else {
+		merge_threshold_table = DICT_INDEX_MERGE_THRESHOLD_DEFAULT;
+	}
+
+	if (merge_threshold_table == 0) {
+		merge_threshold_table = DICT_INDEX_MERGE_THRESHOLD_DEFAULT;
+	}
+
+	for (uint i = 0; i < table_share->keys; i++) {
+		KEY*	key_info = &table_share->key_info[i];
+
+		ut_ad(i < sizeof(merge_threshold_index)
+			  / sizeof(merge_threshold_index[0]));
+
+		if (key_info->flags & HA_USES_COMMENT
+		    && key_info->comment.str != NULL) {
+			merge_threshold_index[i]
+				= innobase_parse_merge_threshold(
+					thd, key_info->comment.str);
+		} else {
+			merge_threshold_index[i] = merge_threshold_table;
+		}
+
+		if (merge_threshold_index[i] == 0) {
+			merge_threshold_index[i] = merge_threshold_table;
+		}
+	}
+
+	/* update SYS_INDEX table */
+	if (!dict_table_is_temporary(table)) {
+		for (uint i = 0; i < table_share->keys; i++) {
+			is_found[i] = false;
+		}
+
+		for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
+		     index != NULL;
+		     index = UT_LIST_GET_NEXT(indexes, index)) {
+
+			if (dict_index_is_auto_gen_clust(index)) {
+
+				/* GEN_CLUST_INDEX should use
+				merge_threshold_table */
+				dict_index_set_merge_threshold(
+					index, merge_threshold_table);
+				continue;
+			}
+
+			for (uint i = 0; i < table_share->keys; i++) {
+				if (is_found[i]) {
+					continue;
+				}
+
+				KEY*	key_info = &table_share->key_info[i];
+
+				if (innobase_strcasecmp(
+					index->name, key_info->name) == 0) {
+
+					dict_index_set_merge_threshold(
+						index,
+						merge_threshold_index[i]);
+					is_found[i] = true;
+					break;
+				}
+			}
+		}
+	}
+
+	for (uint i = 0; i < table_share->keys; i++) {
+		is_found[i] = false;
+	}
+
+	/* update in memory */
+	for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
+	     index != NULL;
+	     index = UT_LIST_GET_NEXT(indexes, index)) {
+
+		if (dict_index_is_auto_gen_clust(index)) {
+
+			/* GEN_CLUST_INDEX should use merge_threshold_table */
+
+			/* x-lock index is needed to exclude concurrent
+			pessimistic tree operations */
+			rw_lock_x_lock(dict_index_get_lock(index));
+			index->merge_threshold = merge_threshold_table;
+			rw_lock_x_unlock(dict_index_get_lock(index));
+
+			continue;
+		}
+
+		for (uint i = 0; i < table_share->keys; i++) {
+			if (is_found[i]) {
+				continue;
+			}
+
+			KEY*	key_info = &table_share->key_info[i];
+
+			if (innobase_strcasecmp(
+				index->name, key_info->name) == 0) {
+
+				/* x-lock index is needed to exclude concurrent
+				pessimistic tree operations */
+				rw_lock_x_lock(dict_index_get_lock(index));
+				index->merge_threshold
+					= merge_threshold_index[i];
+				rw_lock_x_unlock(dict_index_get_lock(index));
+				is_found[i] = true;
+
+				break;
+			}
+		}
+	}
+}
+
+/** Set m_use_* flags. */
+void
+create_table_info_t::set_tablespace_type(
+	bool	table_being_altered_is_file_per_table)
+{
+	/* Note whether this table will be created using a shared,
+	general or system tablespace. */
+	m_use_shared_space = tablespace_is_shared_space(m_create_info);
+
+	/** Allow file_per_table for this table either because:
+	1) the setting innodb_file_per_table=on,
+	2) the table being altered is currently file_per_table
+	3) explicitly requested by tablespace=innodb_file_per_table. */
+	m_allow_file_per_table =
+		m_innodb_file_per_table
+		|| table_being_altered_is_file_per_table
+		|| tablespace_is_file_per_table(m_create_info);
+
+	/* All noncompresed temporary tables will be put into the
+	system temporary tablespace.  */
+	bool is_noncompressed_temporary =
+		m_create_info->options & HA_LEX_CREATE_TMP_TABLE
+		&& !(m_create_info->row_type == ROW_TYPE_COMPRESSED
+		     || m_create_info->key_block_size > 0);
+
+	/* Ignore the current innodb-file-per-table setting if we are
+	creating a temporary, non-compressed table or if the
+	TABLESPACE= phrase is using an existing shared tablespace. */
+	m_use_file_per_table =
+		m_allow_file_per_table
+		&& !is_noncompressed_temporary
+		&& !m_use_shared_space;
+
+	/* DATA DIRECTORY must have m_use_file_per_table but cannot be
+	used with TEMPORARY tables. */
+	m_use_data_dir =
+		m_use_file_per_table
+		&& !(m_create_info->options & HA_LEX_CREATE_TMP_TABLE)
+		&& (m_create_info->data_file_name != NULL)
+		&& (m_create_info->data_file_name[0] != '\0');
+	ut_ad(!(m_use_shared_space && m_use_data_dir));
+}
+
+/** Initialize the create_table_info_t object.
+@return error number */
+int
+create_table_info_t::initialize()
 {
-	int		error;
 	trx_t*		parent_trx;
-	trx_t*		trx;
-	int		primary_key_no;
-	uint		i;
-	char		norm_name[FN_REFLEN];	/* {database}/{tablename} */
-	char		temp_path[FN_REFLEN];	/* absolute path of temp frm */
-	char		remote_path[FN_REFLEN];	/* absolute path of table */
-	THD*		thd = ha_thd();
-	ib_int64_t	auto_inc_value;
 
-	/* Cache the global variable "srv_file_per_table" to a local
-	variable before using it. Note that "srv_file_per_table"
-	is not under dict_sys mutex protection, and could be changed
-	while creating the table. So we read the current value here
-	and make all further decisions based on this. */
-	bool		use_tablespace = srv_file_per_table;
-	const ulint     file_format    = srv_file_format;
+	DBUG_ENTER("create_table_info_t::initialize");
 
-	/* Zip Shift Size - log2 - 9 of compressed page size,
-	zero for uncompressed */
-	ulint		flags;
-	ulint		flags2;
-	dict_table_t*	innobase_table = NULL;
+	ut_ad(m_thd != NULL);
+	ut_ad(m_create_info != NULL);
 
-	const char*	stmt;
-	size_t		stmt_len;
-	/* Cache table options */
-	ha_table_option_struct *options= form->s->option_struct;
-	fil_encryption_t encrypt = (fil_encryption_t)options->encryption;
-	uint		key_id = (uint)options->encryption_key_id;
-
-	DBUG_ENTER("ha_innobase::create");
-
-	DBUG_ASSERT(thd != NULL);
-	DBUG_ASSERT(create_info != NULL);
-
-	if (form->s->stored_fields > REC_MAX_N_USER_FIELDS) {
+	if (m_form->s->fields > REC_MAX_N_USER_FIELDS) {
 		DBUG_RETURN(HA_ERR_TOO_MANY_FIELDS);
-	} else if (high_level_read_only) {
-		DBUG_RETURN(HA_ERR_TABLE_READONLY);
 	}
 
-	/* Create the table definition in InnoDB */
-
-	/* Validate table options not handled by the SQL-parser */
-	if(check_table_options(thd, form, create_info, use_tablespace,
-			       file_format)) {
-		DBUG_RETURN(HA_WRONG_CREATE_OPTION);
-	}
-
-	/* Validate create options if innodb_strict_mode is set. */
-	if (create_options_are_invalid(
-			thd, form, create_info, use_tablespace)) {
-		DBUG_RETURN(HA_WRONG_CREATE_OPTION);
-	}
-
-	if (!innobase_table_flags(form, create_info,
-				  thd, use_tablespace,
-				  &flags, &flags2)) {
-		DBUG_RETURN(-1);
-	}
-
-	error = parse_table_name(name, create_info, flags, flags2,
-				 norm_name, temp_path, remote_path);
-	if (error) {
-		DBUG_RETURN(error);
-	}
-
-	/* Look for a primary key */
-	primary_key_no = (form->s->primary_key != MAX_KEY ?
-			  (int) form->s->primary_key :
-			  -1);
-
-	/* Our function innobase_get_mysql_key_number_for_index assumes
-	the primary key is always number 0, if it exists */
-	ut_a(primary_key_no == -1 || primary_key_no == 0);
-
 	/* Check for name conflicts (with reserved name) for
 	any user indices to be created. */
-	if (innobase_index_name_is_reserved(thd, form->key_info,
-					    form->s->keys)) {
-		DBUG_RETURN(-1);
+	if (innobase_index_name_is_reserved(m_thd, m_form->key_info,
+					    m_form->s->keys)) {
+		DBUG_RETURN(HA_ERR_WRONG_INDEX);
 	}
 
-	if (row_is_magic_monitor_table(norm_name)) {
-		push_warning_printf(thd,
-				    Sql_condition::WARN_LEVEL_WARN,
-				    HA_ERR_WRONG_COMMAND,
-				    "Using the table name %s to enable "
-				    "diagnostic output is deprecated "
-				    "and may be removed in future releases. "
-				    "Use INFORMATION_SCHEMA or "
-				    "PERFORMANCE_SCHEMA tables or "
-				    "SET GLOBAL innodb_status_output=ON.",
-				    dict_remove_db_name(norm_name));
-
-		/* Limit innodb monitor access to users with PROCESS privilege.
-		See http://bugs.mysql.com/32710 why we chose PROCESS. */
-		if (check_global_access(thd, PROCESS_ACL)) {
-			DBUG_RETURN(HA_ERR_GENERIC);
-		}
-	}
+	ut_ad(m_form->s->row_type == m_create_info->row_type);
 
 	/* Get the transaction associated with the current thd, or create one
 	if not yet created */
 
-	parent_trx = check_trx_exists(thd);
+	parent_trx = check_trx_exists(m_thd);
 
 	/* In case MySQL calls this in the middle of a SELECT query, release
 	possible adaptive hash latch to avoid deadlocks of threads */
 
 	trx_search_latch_release_if_reserved(parent_trx);
+	DBUG_RETURN(0);
+}
 
-	trx = innobase_trx_allocate(thd);
 
-	/* Latch the InnoDB data dictionary exclusively so that no deadlocks
-	or lock waits can happen in it during a table create operation.
-	Drop table etc. do this latching in row0mysql.cc. */
+/** Prepare to create a new table to an InnoDB database.
+@param[in]	name	Table name
+@return error number */
+int
+create_table_info_t::prepare_create_table(
+	const char*		name)
+{
+	DBUG_ENTER("prepare_create_table");
 
-	row_mysql_lock_data_dictionary(trx);
+	ut_ad(m_thd != NULL);
+	ut_ad(m_create_info != NULL);
+
+	ut_ad(m_form->s->row_type == m_create_info->row_type);
+
+	set_tablespace_type(false);
+
+	normalize_table_name(m_table_name, name);
+
+	/* Validate table options not handled by the SQL-parser */
+	if (check_table_options()) {
+		DBUG_RETURN(HA_WRONG_CREATE_OPTION);
+	}
+
+	/* Validate the create options if innodb_strict_mode is set.
+	Do not use the regular message for ER_ILLEGAL_HA_CREATE_OPTION
+	because InnoDB might actually support the option, but not under
+	the current conditions.  The messages revealing the specific
+	problems are reported inside this function. */
+	if (create_options_are_invalid()) {
+		DBUG_RETURN(HA_WRONG_CREATE_OPTION);
+	}
+
+	/* Create the table flags and flags2 */
+	if (!innobase_table_flags()) {
+		DBUG_RETURN(HA_WRONG_CREATE_OPTION);
+	}
+
+	if (high_level_read_only && !is_intrinsic_temp_table()) {
+		DBUG_RETURN(HA_ERR_TABLE_READONLY);
+	}
+
+	DBUG_RETURN(parse_table_name(name));
+}
+
+/** Create a new table to an InnoDB database.
+@return error number */
+int
+create_table_info_t::create_table()
+{
+	int		error;
+	int		primary_key_no;
+	uint		i;
+	dict_table_t*	innobase_table = NULL;
+	const char*	stmt;
+	size_t		stmt_len;
+
+	DBUG_ENTER("create_table");
+
+	/* Look for a primary key */
+	primary_key_no = (m_form->s->primary_key != MAX_KEY ?
+			  (int) m_form->s->primary_key : -1);
+
+	/* Our function innobase_get_mysql_key_number_for_index assumes
+	the primary key is always number 0, if it exists */
+	ut_a(primary_key_no == -1 || primary_key_no == 0);
+
+	error = create_table_def();
 
-	error = create_table_def(trx, form, norm_name, temp_path,
-			remote_path, flags, flags2, encrypt, key_id);
 	if (error) {
-		goto cleanup;
+		DBUG_RETURN(error);
 	}
 
 	/* Create the keys */
 
-	if (form->s->keys == 0 || primary_key_no == -1) {
+	if (m_form->s->keys == 0 || primary_key_no == -1) {
 		/* Create an index which is used as the clustered index;
 		order the rows by their row id which is internally generated
 		by InnoDB */
 
 		error = create_clustered_index_when_no_primary(
-			trx, flags, norm_name);
+			m_trx, m_flags, m_table_name);
 		if (error) {
-			goto cleanup;
+			DBUG_RETURN(error);
 		}
 	}
 
 	if (primary_key_no != -1) {
 		/* In InnoDB the clustered index must always be created
 		first */
-		if ((error = create_index(trx, form, flags, norm_name,
+		if ((error = create_index(m_trx, m_form, m_flags, m_table_name,
 					  (uint) primary_key_no))) {
-			goto cleanup;
+			DBUG_RETURN(error);
 		}
 	}
 
 	/* Create the ancillary tables that are common to all FTS indexes on
 	this table. */
-	if (flags2 & DICT_TF2_FTS) {
-		enum fts_doc_id_index_enum	ret;
+	if (m_flags2 & DICT_TF2_FTS) {
+		fts_doc_id_index_enum	ret;
 
 		innobase_table = dict_table_open_on_name(
-			norm_name, TRUE, FALSE, DICT_ERR_IGNORE_NONE);
+			m_table_name, TRUE, FALSE, DICT_ERR_IGNORE_NONE);
 
 		ut_a(innobase_table);
 
 		/* Check whether there already exists FTS_DOC_ID_INDEX */
 		ret = innobase_fts_check_doc_id_index_in_def(
-			form->s->keys, form->key_info);
+			m_form->s->keys, m_form->key_info);
 
 		switch (ret) {
 		case FTS_INCORRECT_DOC_ID_INDEX:
-			push_warning_printf(thd,
+			push_warning_printf(m_thd,
 					    Sql_condition::WARN_LEVEL_WARN,
 					    ER_WRONG_NAME_FOR_INDEX,
 					    " InnoDB: Index name %s is reserved"
@@ -11995,7 +14293,7 @@ ha_innobase::create(
 					    " make sure it is of correct"
 					    " type\n",
 					    FTS_DOC_ID_INDEX_NAME,
-					    innobase_table->name);
+					    innobase_table->name.m_name);
 
 			if (innobase_table->fts) {
 				fts_free(innobase_table);
@@ -12005,14 +14303,14 @@ ha_innobase::create(
 			my_error(ER_WRONG_NAME_FOR_INDEX, MYF(0),
 				 FTS_DOC_ID_INDEX_NAME);
 			error = -1;
-			goto cleanup;
+			DBUG_RETURN(error);
 		case FTS_EXIST_DOC_ID_INDEX:
 		case FTS_NOT_EXIST_DOC_ID_INDEX:
 			break;
 		}
 
 		dberr_t	err = fts_create_common_tables(
-			trx, innobase_table, norm_name,
+			m_trx, innobase_table, m_table_name,
 			(ret == FTS_EXIST_DOC_ID_INDEX));
 
 		error = convert_error_code_to_mysql(err, 0, NULL);
@@ -12020,24 +14318,30 @@ ha_innobase::create(
 		dict_table_close(innobase_table, TRUE, FALSE);
 
 		if (error) {
-			goto cleanup;
+			trx_rollback_to_savepoint(m_trx, NULL);
+			m_trx->error_state = DB_SUCCESS;
+
+			row_drop_table_for_mysql(m_table_name, m_trx, true, FALSE);
+
+			m_trx->error_state = DB_SUCCESS;
+			DBUG_RETURN(error);
 		}
 	}
 
-	for (i = 0; i < form->s->keys; i++) {
+	for (i = 0; i < m_form->s->keys; i++) {
 
 		if (i != static_cast<uint>(primary_key_no)) {
 
-			if ((error = create_index(trx, form, flags,
-						  norm_name, i))) {
-				goto cleanup;
+			if ((error = create_index(m_trx, m_form, m_flags,
+						  m_table_name, i))) {
+				DBUG_RETURN(error);
 			}
 		}
 	}
 
 	/* Cache all the FTS indexes on this table in the FTS specific
 	structure. They are used for FTS indexed column update handling. */
-	if (flags2 & DICT_TF2_FTS) {
+	if (m_flags2 & DICT_TF2_FTS) {
 		fts_t*          fts = innobase_table->fts;
 
 		ut_a(fts != NULL);
@@ -12045,61 +14349,126 @@ ha_innobase::create(
 		dict_table_get_all_fts_indexes(innobase_table, fts->indexes);
 	}
 
-	stmt = innobase_get_stmt(thd, &stmt_len);
+	stmt = innobase_get_stmt_unsafe(m_thd, &stmt_len);
+
+	innodb_session_t*&	priv =
+		thd_to_innodb_session(m_trx->mysql_thd);
+	dict_table_t*		handler =
+		priv->lookup_table_handler(m_table_name);
+
+	ut_ad(handler == NULL
+	      || (handler != NULL && dict_table_is_intrinsic(handler)));
+
+	/* There is no concept of foreign key for intrinsic tables. */
+	if (stmt && (handler == NULL)) {
 
-	if (stmt) {
 		dberr_t	err = row_table_add_foreign_constraints(
-			trx, stmt, stmt_len, norm_name,
-			create_info->options & HA_LEX_CREATE_TMP_TABLE);
+			m_trx, stmt, stmt_len, m_table_name,
+			m_create_info->options & HA_LEX_CREATE_TMP_TABLE);
 
 		switch (err) {
 
 		case DB_PARENT_NO_INDEX:
 			push_warning_printf(
-				thd, Sql_condition::WARN_LEVEL_WARN,
+				m_thd, Sql_condition::WARN_LEVEL_WARN,
 				HA_ERR_CANNOT_ADD_FOREIGN,
 				"Create table '%s' with foreign key constraint"
 				" failed. There is no index in the referenced"
 				" table where the referenced columns appear"
-				" as the first columns.\n", norm_name);
+				" as the first columns.\n", m_table_name);
 			break;
 
 		case DB_CHILD_NO_INDEX:
 			push_warning_printf(
-				thd, Sql_condition::WARN_LEVEL_WARN,
+				m_thd, Sql_condition::WARN_LEVEL_WARN,
 				HA_ERR_CANNOT_ADD_FOREIGN,
 				"Create table '%s' with foreign key constraint"
 				" failed. There is no index in the referencing"
 				" table where referencing columns appear"
-				" as the first columns.\n", norm_name);
+				" as the first columns.\n", m_table_name);
 			break;
+#ifdef MYSQL_VIRTUAL_COLUMNS
+		case DB_NO_FK_ON_V_BASE_COL:
+			push_warning_printf(
+				m_thd, Sql_condition::WARN_LEVEL_WARN,
+				HA_ERR_CANNOT_ADD_FOREIGN,
+				"Create table '%s' with foreign key constraint"
+				" failed. Cannot add foreign key constraint"
+				" placed on the base column of indexed"
+				" virtual column, or constraint placed"
+				" on columns being part of virtual index.\n",
+				m_table_name);
+			break;
+#endif
 		default:
 			break;
 		}
 
-		error = convert_error_code_to_mysql(err, flags, NULL);
+		error = convert_error_code_to_mysql(err, m_flags, NULL);
 
 		if (error) {
-			goto cleanup;
+			if (handler != NULL) {
+				priv->unregister_table_handler(m_table_name);
+			}
+			DBUG_RETURN(error);
 		}
 	}
 
-	innobase_commit_low(trx);
+	if (!is_intrinsic_temp_table()) {
+		innobase_table = dict_table_open_on_name(
+			m_table_name, TRUE, FALSE, DICT_ERR_IGNORE_NONE);
 
-	row_mysql_unlock_data_dictionary(trx);
+		if (innobase_table != NULL) {
+			dict_table_close(innobase_table, TRUE, FALSE);
+		}
 
-	/* Flush the log to reduce probability that the .frm files and
-	the InnoDB data dictionary get out-of-sync if the user runs
-	with innodb_flush_log_at_trx_commit = 0 */
+	} else {
+		innobase_table = NULL;
+	}
 
-	log_buffer_flush_to_disk();
+	DBUG_RETURN(0);
+}
 
-	innobase_table = dict_table_open_on_name(
-		norm_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE);
+/** Update a new table in an InnoDB database.
+@return error number */
+int
+create_table_info_t::create_table_update_dict()
+{
+	dict_table_t*	innobase_table;
+
+	DBUG_ENTER("create_table_update_dict");
+
+	innobase_table = thd_to_innodb_session(m_thd)->lookup_table_handler(
+		m_table_name);
+
+
+	if (innobase_table == NULL) {
+		innobase_table = dict_table_open_on_name(
+			m_table_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE);
+	} else {
+		innobase_table->acquire();
+		ut_ad(dict_table_is_intrinsic(innobase_table));
+	}
 
 	DBUG_ASSERT(innobase_table != 0);
+	if (innobase_table->fts != NULL) {
+		if (innobase_table->fts_doc_id_index == NULL) {
+			innobase_table->fts_doc_id_index
+				= dict_table_get_index_on_name(
+					innobase_table, FTS_DOC_ID_INDEX_NAME);
+			DBUG_ASSERT(innobase_table->fts_doc_id_index != NULL);
+		} else {
+			DBUG_ASSERT(innobase_table->fts_doc_id_index
+				    == dict_table_get_index_on_name(
+						innobase_table,
+						FTS_DOC_ID_INDEX_NAME));
+		}
+	}
 
-	innobase_copy_frm_flags_from_create_info(innobase_table, create_info);
+	DBUG_ASSERT((innobase_table->fts == NULL)
+		    == (innobase_table->fts_doc_id_index == NULL));
+
+	innobase_copy_frm_flags_from_create_info(innobase_table, m_create_info);
 
 	dict_stats_update(innobase_table, DICT_STATS_EMPTY_TABLE);
 
@@ -12113,16 +14482,16 @@ ha_innobase::create(
 	}
 
 	/* Load server stopword into FTS cache */
-	if (flags2 & DICT_TF2_FTS) {
-		if (!innobase_fts_load_stopword(innobase_table, NULL, thd)) {
+	if (m_flags2 & DICT_TF2_FTS) {
+		if (!innobase_fts_load_stopword(innobase_table, NULL, m_thd)) {
 			dict_table_close(innobase_table, FALSE, FALSE);
 			srv_active_wake_master_thread();
-			trx_free_for_mysql(trx);
+			trx_free_for_mysql(m_trx);
 			DBUG_RETURN(-1);
 		}
 	}
 
-	/* Note: We can't call update_thd() as prebuilt will not be
+	/* Note: We can't call update_thd() as m_prebuilt will not be
 	setup at this stage and so we use thd. */
 
 	/* We need to copy the AUTOINC value from the old table if
@@ -12139,13 +14508,14 @@ ha_innobase::create(
 	value to the auto increment field if the value is greater
 	than the maximum value in the column. */
 
-	if (((create_info->used_fields & HA_CREATE_USED_AUTO)
-	    || thd_sql_command(thd) == SQLCOM_ALTER_TABLE
-	    || thd_sql_command(thd) == SQLCOM_OPTIMIZE
-	    || thd_sql_command(thd) == SQLCOM_CREATE_INDEX)
-	    && create_info->auto_increment_value > 0) {
+	if (((m_create_info->used_fields & HA_CREATE_USED_AUTO)
+	     || thd_sql_command(m_thd) == SQLCOM_ALTER_TABLE
+	     || thd_sql_command(m_thd) == SQLCOM_OPTIMIZE
+	     || thd_sql_command(m_thd) == SQLCOM_CREATE_INDEX)
+	    && m_create_info->auto_increment_value > 0) {
+		ib_uint64_t	auto_inc_value;
 
-		auto_inc_value = create_info->auto_increment_value;
+		auto_inc_value = m_create_info->auto_increment_value;
 
 		dict_table_autoinc_lock(innobase_table);
 		dict_table_autoinc_initialize(innobase_table, auto_inc_value);
@@ -12154,6 +14524,89 @@ ha_innobase::create(
 
 	dict_table_close(innobase_table, FALSE, FALSE);
 
+	innobase_parse_hint_from_comment(m_thd, innobase_table, m_form->s);
+
+	DBUG_RETURN(0);
+}
+
+/** Allocate a new trx. */
+void
+create_table_info_t::allocate_trx()
+{
+	m_trx = innobase_trx_allocate(m_thd);
+
+	m_trx->will_lock++;
+	m_trx->ddl = true;
+}
+
+/** Create a new table to an InnoDB database.
+@param[in]	name		Table name, format: "db/table_name".
+@param[in]	form		Table format; columns and index information.
+@param[in]	create_info	Create info (including create statement string).
+@return	0 if success else error number. */
+int
+ha_innobase::create(
+	const char*	name,
+	TABLE*		form,
+	HA_CREATE_INFO*	create_info)
+{
+	int		error;
+	char		norm_name[FN_REFLEN];	/* {database}/{tablename} */
+	char		temp_path[FN_REFLEN];	/* Absolute path of temp frm */
+	char		remote_path[FN_REFLEN];	/* Absolute path of table */
+	char		tablespace[NAME_LEN];	/* Tablespace name identifier */
+	trx_t*		trx;
+	DBUG_ENTER("ha_innobase::create");
+
+	create_table_info_t	info(ha_thd(),
+				     form,
+				     create_info,
+				     norm_name,
+				     temp_path,
+				     remote_path,
+				     tablespace);
+
+	/* Initialize the object. */
+	if ((error = info.initialize())) {
+		DBUG_RETURN(error);
+	}
+
+	/* Prepare for create and validate options. */
+	if ((error = info.prepare_create_table(name))) {
+		DBUG_RETURN(error);
+	}
+
+	info.allocate_trx();
+
+	trx = info.trx();
+
+	/* Latch the InnoDB data dictionary exclusively so that no deadlocks
+	or lock waits can happen in it during a table create operation.
+	Drop table etc. do this latching in row0mysql.cc.
+	Avoid locking dictionary if table is intrinsic.
+	Table Object for such table is cached in THD instead of storing it
+	to dictionary. */
+	if (!info.is_intrinsic_temp_table()) {
+		row_mysql_lock_data_dictionary(trx);
+	}
+
+	if ((error = info.create_table())) {
+		goto cleanup;
+	}
+
+	innobase_commit_low(trx);
+
+	if (!info.is_intrinsic_temp_table()) {
+		ut_ad(!srv_read_only_mode);
+		row_mysql_unlock_data_dictionary(trx);
+		/* Flush the log to reduce probability that the .frm files and
+		the InnoDB data dictionary get out-of-sync if the user runs
+		with innodb_flush_log_at_trx_commit = 0 */
+		log_buffer_flush_to_disk();
+	}
+
+	error = info.create_table_update_dict();
+
 	/* Tell the InnoDB server that there might be work for
 	utility threads: */
 
@@ -12161,12 +14614,41 @@ ha_innobase::create(
 
 	trx_free_for_mysql(trx);
 
-	DBUG_RETURN(0);
+	DBUG_RETURN(error);
 
 cleanup:
 	trx_rollback_for_mysql(trx);
 
-	row_mysql_unlock_data_dictionary(trx);
+	if (!info.is_intrinsic_temp_table()) {
+		row_mysql_unlock_data_dictionary(trx);
+	} else {
+		THD* thd = info.thd();
+
+		dict_table_t* intrinsic_table =
+			thd_to_innodb_session(thd)->lookup_table_handler(
+			info.table_name());
+
+		if (intrinsic_table != NULL) {
+			thd_to_innodb_session(thd)->unregister_table_handler(
+				info.table_name());
+
+			for (;;) {
+				dict_index_t*	index;
+				index = UT_LIST_GET_FIRST(
+					intrinsic_table->indexes);
+				if (index == NULL) {
+					break;
+				}
+				rw_lock_free(&index->lock);
+				UT_LIST_REMOVE(intrinsic_table->indexes, index);
+				dict_mem_index_free(index);
+				index = NULL;
+			}
+
+			dict_mem_table_free(intrinsic_table);
+			intrinsic_table = NULL;
+		}
+	}
 
 	trx_free_for_mysql(trx);
 
@@ -12175,47 +14657,68 @@ cleanup:
 
 /*****************************************************************//**
 Discards or imports an InnoDB tablespace.
-@return	0 == success, -1 == error */
-UNIV_INTERN
+@return 0 == success, -1 == error */
+
 int
 ha_innobase::discard_or_import_tablespace(
 /*======================================*/
-	my_bool discard)	/*!< in: TRUE if discard, else import */
+	my_bool		discard)	/*!< in: TRUE if discard, else import */
 {
-	dberr_t		err;
-	dict_table_t*	dict_table;
 
 	DBUG_ENTER("ha_innobase::discard_or_import_tablespace");
 
-	ut_a(prebuilt->trx);
-	ut_a(prebuilt->trx->magic_n == TRX_MAGIC_N);
-	ut_a(prebuilt->trx == thd_to_trx(ha_thd()));
+	ut_a(m_prebuilt->trx != NULL);
+	ut_a(m_prebuilt->trx->magic_n == TRX_MAGIC_N);
+	ut_a(m_prebuilt->trx == thd_to_trx(ha_thd()));
 
 	if (high_level_read_only) {
 		DBUG_RETURN(HA_ERR_TABLE_READONLY);
 	}
 
-	dict_table = prebuilt->table;
+	dict_table_t*	dict_table = m_prebuilt->table;
 
-	if (dict_table->space == TRX_SYS_SPACE) {
+	if (dict_table_is_temporary(dict_table)) {
 
 		ib_senderrf(
-			prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
-			ER_TABLE_IN_SYSTEM_TABLESPACE,
-			table->s->table_name.str);
+			m_prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+			ER_CANNOT_DISCARD_TEMPORARY_TABLE);
 
 		DBUG_RETURN(HA_ERR_TABLE_NEEDS_UPGRADE);
 	}
 
-	trx_start_if_not_started(prebuilt->trx);
+	if (dict_table->space == srv_sys_space.space_id()) {
+		ib_senderrf(
+			m_prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+			ER_TABLE_IN_SYSTEM_TABLESPACE,
+			dict_table->name.m_name);
 
-	/* In case MySQL calls this in the middle of a SELECT query, release
-	possible adaptive hash latch to avoid deadlocks of threads. */
-	trx_search_latch_release_if_reserved(prebuilt->trx);
+		DBUG_RETURN(HA_ERR_TABLE_NEEDS_UPGRADE);
+	}
+
+	if (DICT_TF_HAS_SHARED_SPACE(dict_table->flags)) {
+		my_printf_error(ER_NOT_ALLOWED_COMMAND,
+			"InnoDB: Cannot %s table `%s` because it is in"
+			" a general tablespace. It must be file-per-table.",
+			MYF(0), discard ? "discard" : "import",
+			dict_table->name.m_name);
+
+		DBUG_RETURN(HA_ERR_NOT_ALLOWED_COMMAND);
+	}
+
+	TrxInInnoDB	trx_in_innodb(m_prebuilt->trx);
+
+	if (trx_in_innodb.is_aborted()) {
+		innobase_rollback(ht, m_user_thd, false);
+
+		DBUG_RETURN(convert_error_code_to_mysql(
+			DB_FORCED_ABORT, 0, m_user_thd));
+	}
+
+	trx_start_if_not_started(m_prebuilt->trx, true);
 
 	/* Obtain an exclusive lock on the table. */
-	err = row_mysql_lock_table(
-		prebuilt->trx, dict_table, LOCK_X,
+	dberr_t	err = row_mysql_lock_table(
+		m_prebuilt->trx, dict_table, LOCK_X,
 		discard ? "setting table lock for DISCARD TABLESPACE"
 			: "setting table lock for IMPORT TABLESPACE");
 
@@ -12230,35 +14733,33 @@ ha_innobase::discard_or_import_tablespace(
 
 		if (dict_table->ibd_file_missing) {
 			ib_senderrf(
-				prebuilt->trx->mysql_thd,
+				m_prebuilt->trx->mysql_thd,
 				IB_LOG_LEVEL_WARN, ER_TABLESPACE_MISSING,
-				table->s->table_name.str);
+				dict_table->name.m_name);
 		}
 
 		err = row_discard_tablespace_for_mysql(
-			dict_table->name, prebuilt->trx);
+			dict_table->name.m_name, m_prebuilt->trx);
 
 	} else if (!dict_table->ibd_file_missing) {
 		/* Commit the transaction in order to
 		release the table lock. */
-		trx_commit_for_mysql(prebuilt->trx);
+		trx_commit_for_mysql(m_prebuilt->trx);
 
+		ib::error() << "Unable to import tablespace "
+			<< dict_table->name << " because it already"
+			" exists.  Please DISCARD the tablespace"
+			" before IMPORT.";
 		ib_senderrf(
-			prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
-			ER_TABLESPACE_EXISTS, table->s->table_name.str);
+			m_prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+			ER_TABLESPACE_EXISTS, dict_table->name.m_name);
 
 		DBUG_RETURN(HA_ERR_TABLE_EXIST);
 	} else {
-		err = row_import_for_mysql(dict_table, prebuilt);
+		err = row_import_for_mysql(dict_table, m_prebuilt);
 
 		if (err == DB_SUCCESS) {
 
-			if (table->found_next_number_field) {
-				dict_table_autoinc_lock(dict_table);
-				innobase_initialize_autoinc();
-				dict_table_autoinc_unlock(dict_table);
-			}
-
 			info(HA_STATUS_TIME
 			     | HA_STATUS_CONST
 			     | HA_STATUS_VARIABLE
@@ -12269,7 +14770,7 @@ ha_innobase::discard_or_import_tablespace(
 	}
 
 	/* Commit the transaction in order to release the table lock. */
-	trx_commit_for_mysql(prebuilt->trx);
+	trx_commit_for_mysql(m_prebuilt->trx);
 
 	if (err == DB_SUCCESS && !discard
 	    && dict_stats_is_persistent_enabled(dict_table)) {
@@ -12286,7 +14787,7 @@ ha_innobase::discard_or_import_tablespace(
 				ER_ALTER_INFO,
 				"Error updating stats for table '%s'"
 				" after table rebuild: %s",
-				dict_table->name, ut_strerr(ret));
+				dict_table->name.m_name, ut_strerr(ret));
 		}
 	}
 
@@ -12295,53 +14796,62 @@ ha_innobase::discard_or_import_tablespace(
 
 /*****************************************************************//**
 Deletes all rows of an InnoDB table.
-@return	error number */
-UNIV_INTERN
+@return error number */
+
 int
 ha_innobase::truncate()
 /*===================*/
 {
-	dberr_t		err;
-	int		error;
-
 	DBUG_ENTER("ha_innobase::truncate");
 
+	/* Truncate of intrinsic table is not allowed truncate for now. */
+	if (dict_table_is_intrinsic(m_prebuilt->table)) {
+		DBUG_RETURN(HA_ERR_WRONG_COMMAND);
+	}
+
 	if (high_level_read_only) {
 		DBUG_RETURN(HA_ERR_TABLE_READONLY);
 	}
 
 	/* Get the transaction associated with the current thd, or create one
-	if not yet created, and update prebuilt->trx */
+	if not yet created, and update m_prebuilt->trx */
 
 	update_thd(ha_thd());
 
-	if (!trx_is_started(prebuilt->trx)) {
-		++prebuilt->trx->will_lock;
-	}
-	/* Truncate the table in InnoDB */
+	TrxInInnoDB	trx_in_innodb(m_prebuilt->trx);
 
-	err = row_truncate_table_for_mysql(prebuilt->table, prebuilt->trx);
+	if (!trx_is_started(m_prebuilt->trx)) {
+		++m_prebuilt->trx->will_lock;
+	}
+
+	dberr_t	err;
+
+	/* Truncate the table in InnoDB */
+	err = row_truncate_table_for_mysql(m_prebuilt->table, m_prebuilt->trx);
+
+	int	error;
 
 	switch (err) {
-
 	case DB_TABLESPACE_DELETED:
 	case DB_TABLESPACE_NOT_FOUND:
 		ib_senderrf(
-			prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+			m_prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
 			(err == DB_TABLESPACE_DELETED ?
 			ER_TABLESPACE_DISCARDED : ER_TABLESPACE_MISSING),
 			table->s->table_name.str);
 		table->status = STATUS_NOT_FOUND;
-		error = HA_ERR_NO_SUCH_TABLE;
+		error = HA_ERR_TABLESPACE_MISSING;
 		break;
 
 	default:
 		error = convert_error_code_to_mysql(
-			err, prebuilt->table->flags,
-			prebuilt->trx->mysql_thd);
+			err, m_prebuilt->table->flags,
+			m_prebuilt->trx->mysql_thd);
+
 		table->status = STATUS_NOT_FOUND;
 		break;
 	}
+
 	DBUG_RETURN(error);
 }
 
@@ -12351,17 +14861,14 @@ MySQL calls innobase_commit to commit the transaction of the current user.
 Then the current user cannot have locks set on the table. Drop table
 operation inside InnoDB will remove all locks any user has on the table
 inside InnoDB.
-@return	error number */
-UNIV_INTERN
+@return error number */
+
 int
 ha_innobase::delete_table(
 /*======================*/
 	const char*	name)	/*!< in: table name */
 {
-	ulint	name_len;
 	dberr_t	err;
-	trx_t*	parent_trx;
-	trx_t*	trx;
 	THD*	thd = ha_thd();
 	char	norm_name[FN_REFLEN];
 
@@ -12380,23 +14887,47 @@ ha_innobase::delete_table(
 	extension, in contrast to ::create */
 	normalize_table_name(norm_name, name);
 
-	if (srv_read_only_mode) {
+	innodb_session_t*&	priv = thd_to_innodb_session(thd);
+	dict_table_t*		handler = priv->lookup_table_handler(norm_name);
+
+	if (handler != NULL) {
+		for (dict_index_t* index = UT_LIST_GET_FIRST(handler->indexes);
+		     index != NULL;
+		     index = UT_LIST_GET_NEXT(indexes, index)) {
+			index->last_ins_cur->release();
+			index->last_sel_cur->release();
+		}
+	} else if (srv_read_only_mode) {
 		DBUG_RETURN(HA_ERR_TABLE_READONLY);
-	} else if (row_is_magic_monitor_table(norm_name)
-		   && check_global_access(thd, PROCESS_ACL)) {
-		DBUG_RETURN(HA_ERR_GENERIC);
 	}
 
-	parent_trx = check_trx_exists(thd);
+	trx_t*	parent_trx = check_trx_exists(thd);
 
-	/* In case MySQL calls this in the middle of a SELECT query, release
-	possible adaptive hash latch to avoid deadlocks of threads */
+	TrxInInnoDB	trx_in_innodb(parent_trx);
 
-	trx_search_latch_release_if_reserved(parent_trx);
+	/* Remove the to-be-dropped table from the list of modified tables
+	by parent_trx. Otherwise we may end up with an orphaned pointer to
+	the table object from parent_trx::mod_tables. This could happen in:
+	SET AUTOCOMMIT=0;
+	CREATE TABLE t (PRIMARY KEY (a)) ENGINE=INNODB SELECT 1 AS a UNION
+	ALL SELECT 1 AS a; */
+	trx_mod_tables_t::const_iterator	iter;
 
-	trx = innobase_trx_allocate(thd);
+	for (iter = parent_trx->mod_tables.begin();
+	     iter != parent_trx->mod_tables.end();
+	     ++iter) {
 
-	name_len = strlen(name);
+		dict_table_t*	table_to_drop = *iter;
+
+		if (strcmp(norm_name, table_to_drop->name.m_name) == 0) {
+			parent_trx->mod_tables.erase(table_to_drop);
+			break;
+		}
+	}
+
+	trx_t*	trx = innobase_trx_allocate(thd);
+
+	ulint	name_len = strlen(name);
 
 	ut_a(name_len < 1000);
 
@@ -12407,13 +14938,12 @@ ha_innobase::delete_table(
 
 	/* We are doing a DDL operation. */
 	++trx->will_lock;
-	trx->ddl = true;
 
 	/* Drop the table in InnoDB */
+
 	err = row_drop_table_for_mysql(
 		norm_name, trx, thd_sql_command(thd) == SQLCOM_DROP_DB,
-		FALSE);
-
+		false, handler);
 
 	if (err == DB_TABLE_NOT_FOUND
 	    && innobase_get_lower_case_table_names() == 1) {
@@ -12438,7 +14968,7 @@ ha_innobase::delete_table(
 			whether there exists table name in
 			system table whose name is
 			not being normalized to lower case */
-			normalize_table_name_low(
+			normalize_table_name_c_low(
 				par_case_name, name, FALSE);
 #endif
 			err = row_drop_table_for_mysql(
@@ -12448,6 +14978,571 @@ ha_innobase::delete_table(
 		}
 	}
 
+	if (err == DB_TABLE_NOT_FOUND) {
+		/* Test to drop all tables which matches db/tablename + '#'.
+		Only partitions can have '#' as non-first character in
+		the table name!
+
+		Temporary table names always start with '#', partitions are
+		the only 'tables' that can have '#' after the first character
+		and table name must have length > 0. User tables cannot have
+		'#' since it would be translated to @0023. Therefor this should
+		only match partitions. */
+		uint	len = (uint) strlen(norm_name);
+		ulint	num_partitions;
+		ut_a(len < FN_REFLEN);
+		norm_name[len] = '#';
+		norm_name[len + 1] = 0;
+		err = row_drop_database_for_mysql(norm_name, trx,
+			&num_partitions);
+		norm_name[len] = 0;
+		if (num_partitions == 0
+		    && !row_is_mysql_tmp_table_name(norm_name)) {
+			table_name_t tbl_name;
+			tbl_name.m_name = norm_name;
+			ib::error() << "Table " << tbl_name <<
+				" does not exist in the InnoDB"
+				" internal data dictionary though MySQL is"
+				" trying to drop it. Have you copied the .frm"
+				" file of the table to the MySQL database"
+				" directory from another database? "
+				<< TROUBLESHOOTING_MSG;
+		}
+		if (num_partitions == 0) {
+			err = DB_TABLE_NOT_FOUND;
+		}
+	}
+
+	/* TODO: remove this when the conversion tool from ha_partition to
+	native innodb partitioning is completed */
+	if (err == DB_TABLE_NOT_FOUND
+	    && innobase_get_lower_case_table_names() == 1) {
+#ifdef _WIN32
+		char*	is_part = strstr(norm_name, "#p#");
+#else
+		char*	is_part = strstr(norm_name, "#P#");
+#endif /* _WIN32 */
+
+		if (is_part != NULL) {
+			char	par_case_name[FN_REFLEN];
+
+#ifndef _WIN32
+			/* Check for the table using lower
+			case name, including the partition
+			separator "P" */
+			strcpy(par_case_name, norm_name);
+			innobase_casedn_str(par_case_name);
+#else
+			/* On Windows platfrom, check
+			whether there exists table name in
+			system table whose name is
+			not being normalized to lower case */
+			create_table_info_t::normalize_table_name_low(
+				par_case_name, name, FALSE);
+#endif /* _WIN32 */
+			err = row_drop_table_for_mysql(
+				par_case_name, trx,
+				thd_sql_command(thd) == SQLCOM_DROP_DB,
+				true, handler);
+		}
+	}
+
+	if (handler == NULL) {
+		ut_ad(!srv_read_only_mode);
+		/* Flush the log to reduce probability that the .frm files and
+		the InnoDB data dictionary get out-of-sync if the user runs
+		with innodb_flush_log_at_trx_commit = 0 */
+
+		log_buffer_flush_to_disk();
+	} else if (err == DB_SUCCESS) {
+		priv->unregister_table_handler(norm_name);
+	}
+
+	innobase_commit_low(trx);
+
+	trx_free_for_mysql(trx);
+
+	DBUG_RETURN(convert_error_code_to_mysql(err, 0, NULL));
+}
+
+#ifdef MYSQL_TABLESPACES
+/** Validate the parameters in st_alter_tablespace
+before using them in InnoDB tablespace functions.
+@param[in]	thd		Connection
+@param[in]	alter_info	How to do the command.
+@return MySQL handler error code like HA_... */
+static
+int
+validate_create_tablespace_info(
+	THD*			thd,
+	st_alter_tablespace*	alter_info)
+{
+
+	int error = 0;
+
+	/* The parser ensures that these fields are provided. */
+	ut_a(alter_info->tablespace_name);
+	ut_a(alter_info->data_file_name);
+
+	if (high_level_read_only) {
+		return(HA_ERR_INNODB_READ_ONLY);
+	}
+
+	/* From this point forward, push a warning for each problem found
+	instead of returning immediately*/
+	int	error = validate_tablespace_name(
+			alter_info->tablespace_name, false);
+
+	/* Make sure the tablespace is not already open. */
+	space_id = fil_space_get_id_by_name(alter_info->tablespace_name);
+	if (space_id != ULINT_UNDEFINED) {
+		my_printf_error(ER_TABLESPACE_EXISTS,
+				"InnoDB: A tablespace named `%s`"
+				" already exists.", MYF(0),
+				alter_info->tablespace_name);
+		error = HA_ERR_TABLESPACE_EXISTS;
+	}
+	if (alter_info->file_block_size) {
+		/* Check for a bad file block size. */
+		if (!ut_is_2pow(alter_info->file_block_size)
+		    || alter_info->file_block_size < UNIV_ZIP_SIZE_MIN
+		    || alter_info->file_block_size > UNIV_PAGE_SIZE_MAX) {
+			my_printf_error(ER_ILLEGAL_HA_CREATE_OPTION,
+					"InnoDB does not support"
+					" FILE_BLOCK_SIZE=%llu", MYF(0),
+					alter_info->file_block_size);
+			error = HA_WRONG_CREATE_OPTION;
+
+		/* Don't allow a file block size larger than UNIV_PAGE_SIZE. */
+		} else if (alter_info->file_block_size > UNIV_PAGE_SIZE) {
+			my_printf_error(ER_ILLEGAL_HA_CREATE_OPTION,
+					"InnoDB: Cannot create a tablespace"
+					" with FILE_BLOCK_SIZE=%llu because"
+					" INNODB_PAGE_SIZE=%lu.", MYF(0),
+					alter_info->file_block_size,
+					UNIV_PAGE_SIZE);
+			error = HA_WRONG_CREATE_OPTION;
+
+		/* Don't allow a compressed tablespace when page size > 16k. */
+		} else if (UNIV_PAGE_SIZE > UNIV_PAGE_SIZE_DEF
+			   && alter_info->file_block_size != UNIV_PAGE_SIZE) {
+			my_printf_error(ER_ILLEGAL_HA_CREATE_OPTION,
+					"InnoDB: Cannot create a COMPRESSED"
+					" tablespace when innodb_page_size >"
+					" 16k.", MYF(0));
+			error = HA_WRONG_CREATE_OPTION;
+		}
+	}
+
+	/* Validate the ADD DATAFILE name. */
+	char*	filepath = mem_strdup(alter_info->data_file_name);
+	os_normalize_path(filepath);
+
+	/* It must end with '.ibd' and contain a basename of at least
+	1 character before the.ibd extension. */
+	ulint dirname_len = dirname_length(filepath);
+	const char* basename = filepath + dirname_len;
+	ulint	basename_len = strlen(basename);
+	if (basename_len < 5) {
+		my_error(ER_WRONG_FILE_NAME, MYF(0),
+		alter_info->data_file_name);
+		ut_free(filepath);
+		return(HA_WRONG_CREATE_OPTION);
+	}
+
+	if (memcmp(&basename[basename_len - 4], DOT_IBD, 5)) {
+		my_error(ER_WRONG_FILE_NAME, MYF(0),
+			 alter_info->data_file_name);
+		my_printf_error(ER_WRONG_FILE_NAME,
+				"An IBD filepath must end with `.ibd`.",
+				MYF(0));
+		ut_free(filepath);
+		return(HA_WRONG_CREATE_OPTION);
+	}
+
+	/* Do not allow an invalid colon in the file name. */
+	const char* colon = strchr(filepath, ':');
+	if (colon != NULL) {
+#ifdef _WIN32
+		/* Do not allow names like "C:name.ibd" because it
+		specifies the "C:" drive but allows a relative location.
+		It should be like "c:\". If a single colon is used it must
+		be the second byte the the third byte must be a separator. */
+		if (colon != &filepath[1]
+		    || (colon[1] != OS_PATH_SEPARATOR)
+		    || NULL != strchr(&colon[1], ':')) {
+#endif /* _WIN32 */
+			my_error(ER_WRONG_FILE_NAME, MYF(0),
+				 alter_info->data_file_name);
+			my_printf_error(ER_WRONG_FILE_NAME,
+					"Invalid use of ':'.", MYF(0));
+			ut_free(filepath);
+			return(HA_WRONG_CREATE_OPTION);
+#ifdef _WIN32
+		}
+#endif /* _WIN32 */
+	}
+
+#ifndef _WIN32
+	/* On Non-Windows platforms, '\\' is a valid file name character.
+	But for InnoDB datafiles, we always assume it is a directory
+	separator and convert these to '/' */
+	if (strchr(alter_info->data_file_name, '\\') != NULL) {
+		ib::warn() << "Converting backslash to forward slash in"
+			" ADD DATAFILE " << alter_info->data_file_name;
+	}
+#endif /* _WIN32 */
+
+	/* The directory path must be pre-existing. */
+	Folder folder(filepath, dirname_len);
+	ut_free(filepath);
+	if (!folder.exists()) {
+		my_error(ER_WRONG_FILE_NAME, MYF(0),
+			 alter_info->data_file_name);
+		my_printf_error(ER_WRONG_FILE_NAME,
+				"The directory does not exist.", MYF(0));
+		return(HA_WRONG_CREATE_OPTION);
+	}
+
+	/* CREATE TABLESPACE...ADD DATAFILE can be inside but not under
+	the datadir.*/
+	if (folder_mysql_datadir > folder) {
+		my_error(ER_WRONG_FILE_NAME, MYF(0),
+			 alter_info->data_file_name);
+		my_printf_error(ER_WRONG_FILE_NAME,
+				"CREATE TABLESPACE data file"
+				" cannot be under the datadir.", MYF(0));
+		error = HA_WRONG_CREATE_OPTION;
+	}
+
+	return(error);
+}
+
+/** CREATE a tablespace.
+@param[in]	hton		Handlerton of InnoDB
+@param[in]	thd		Connection
+@param[in]	alter_info	How to do the command
+@return MySQL error code*/
+static
+int
+innobase_create_tablespace(
+	handlerton*		hton,
+	THD*			thd,
+	st_alter_tablespace*	alter_info)
+{
+	trx_t*		trx;
+	int		error=0;
+	Tablespace	tablespace;
+
+	DBUG_ENTER("innobase_create_tablespace");
+	DBUG_ASSERT(hton == innodb_hton_ptr);
+
+	/* Be sure the input parameters are valid before continuing. */
+	error = validate_create_tablespace_info(thd, alter_info);
+	if (error) {
+		DBUG_RETURN(error);
+	}
+
+	/* Create the tablespace object. */
+	tablespace.set_name(alter_info->tablespace_name);
+
+	dberr_t err = tablespace.add_datafile(alter_info->data_file_name);
+	if (err != DB_SUCCESS) {
+		DBUG_RETURN(convert_error_code_to_mysql(err, 0, NULL));
+	}
+
+	/* Get the transaction associated with the current thd and make
+	sure it will not block this DDL. */
+	trx_t*	parent_trx = check_trx_exists(thd);
+
+	/* In case MySQL calls this in the middle of a SELECT
+	query, release possible adaptive hash latch to avoid
+	deadlocks of threads */
+	trx_search_latch_release_if_reserved(parent_trx);
+
+	/* Allocate a new transaction for this DDL */
+	trx = innobase_trx_allocate(thd);
+	++trx->will_lock;
+
+	trx_start_if_not_started(trx, true);
+	row_mysql_lock_data_dictionary(trx);
+
+	/* In FSP_FLAGS, a zip_ssize of zero means that the tablespace
+	holds non-compresssed tables.  A non-zero zip_ssize means that
+	the general tablespace can ONLY contain compressed tables. */
+	ulint	zip_size = static_cast<ulint>(alter_info->file_block_size);
+	ut_ad(zip_size <= UNIV_PAGE_SIZE_MAX);
+	if (zip_size == 0) {
+		zip_size = UNIV_PAGE_SIZE;
+	}
+
+	bool	zipped = (zip_size != UNIV_PAGE_SIZE);
+	page_size_t	page_size(zip_size, UNIV_PAGE_SIZE, zipped);
+	bool atomic_blobs = page_size.is_compressed();
+
+	/* Create the filespace flags */
+	ulint	fsp_flags = fsp_flags_init(
+		page_size,	/* page sizes and a flag if compressed */
+		atomic_blobs,	/* needed only for compressed tables */
+		false,		/* This is not a file-per-table tablespace */
+		true,		/* This is a general shared tablespace */
+		false,		/* Temporary General Tablespaces not allowed */
+		false,		/* Page compression is not used. */
+		0,		/* Page compression level 0 */
+		ATOMIC_WRITES_DEFAULT); /* No atomic writes yet */
+
+	tablespace.set_flags(fsp_flags);
+
+	err = dict_build_tablespace(&tablespace);
+	if (err != DB_SUCCESS) {
+		error = convert_error_code_to_mysql(err, 0, NULL);
+		trx_rollback_for_mysql(trx);
+		goto cleanup;
+	}
+
+	innobase_commit_low(trx);
+
+cleanup:
+	row_mysql_unlock_data_dictionary(trx);
+	trx_free_for_mysql(trx);
+
+	DBUG_RETURN(error);
+}
+
+/** DROP a tablespace.
+@param[in]	hton		Handlerton of InnoDB
+@param[in]	thd		Connection
+@param[in]	alter_info	How to do the command
+@return MySQL error code*/
+static
+int
+innobase_drop_tablespace(
+	handlerton*		hton,
+	THD*			thd,
+	st_alter_tablespace*	alter_info)
+{
+	trx_t*		trx;
+	dberr_t		err;
+	int		error = 0;
+	ulint		space_id;
+
+	DBUG_ENTER("innobase_drop_tablespace");
+	DBUG_ASSERT(hton == innodb_hton_ptr);
+
+	if (srv_read_only_mode) {
+		DBUG_RETURN(HA_ERR_INNODB_READ_ONLY);
+	}
+
+	error = validate_tablespace_name(alter_info->tablespace_name, false);
+	if (error != 0) {
+		DBUG_RETURN(error);
+	}
+
+	/* Be sure that this tablespace is known and valid. */
+	space_id = fil_space_get_id_by_name(alter_info->tablespace_name);
+	if (space_id == ULINT_UNDEFINED) {
+
+		space_id = dict_space_get_id(alter_info->tablespace_name);
+		if (space_id == ULINT_UNDEFINED) {
+			DBUG_RETURN(HA_ERR_NO_SUCH_TABLE);
+			// DBUG_RETURN(HA_ERR_TABLESPACE_MISSING);
+		}
+
+		/* The datafile is not open but the tablespace is in
+		sys_tablespaces, so we can try to drop the metadata. */
+	}
+
+	/* The tablespace can only be dropped if it is empty. */
+	if (!dict_space_is_empty(space_id)) {
+		DBUG_RETURN(HA_ERR_TABLE_READONLY);
+		//DBUG_RETURN(HA_ERR_TABLESPACE_IS_NOT_EMPTY);
+	}
+
+	/* Get the transaction associated with the current thd and make sure
+	it will not block this DDL. */
+	trx_t*	parent_trx = check_trx_exists(thd);
+
+	/* In case MySQL calls this in the middle of a SELECT
+	query, release possible adaptive hash latch to avoid
+	deadlocks of threads */
+	trx_search_latch_release_if_reserved(parent_trx);
+
+	/* Allocate a new transaction for this DDL */
+	trx = innobase_trx_allocate(thd);
+	++trx->will_lock;
+
+	trx_start_if_not_started(trx, true);
+	row_mysql_lock_data_dictionary(trx);
+
+	/* Update SYS_TABLESPACES and SYS_DATAFILES */
+	err = dict_delete_tablespace_and_datafiles(space_id, trx);
+	if (err != DB_SUCCESS) {
+		ib::error() << "Unable to delete the dictionary entries"
+			" for tablespace `" << alter_info->tablespace_name
+			<< "`, Space ID " << space_id;
+		goto have_error;
+	}
+
+	/* Delete the physical files, fil_space_t & fil_node_t entries. */
+	err = fil_delete_tablespace(space_id, BUF_REMOVE_FLUSH_NO_WRITE);
+	switch (err) {
+	case DB_TABLESPACE_NOT_FOUND:
+		/* OK if the physical file is mising.
+		We deleted the metadata. */
+	case DB_SUCCESS:
+		innobase_commit_low(trx);
+		break;
+	default:
+		ib::error() << "Unable to delete the tablespace `"
+			<< alter_info->tablespace_name
+			<< "`, Space ID " << space_id;
+have_error:
+		error = convert_error_code_to_mysql(err, 0, NULL);
+		trx_rollback_for_mysql(trx);
+	}
+
+	row_mysql_unlock_data_dictionary(trx);
+	trx_free_for_mysql(trx);
+
+	DBUG_RETURN(error);
+}
+
+/** This API handles CREATE, ALTER & DROP commands for InnoDB tablespaces.
+@param[in]	hton		Handlerton of InnoDB
+@param[in]	thd		Connection
+@param[in]	alter_info	How to do the command
+@return MySQL error code*/
+static
+int
+innobase_alter_tablespace(
+	handlerton*		hton,
+	THD*			thd,
+	st_alter_tablespace*	alter_info)
+{
+	int		error;		/* return zero for success */
+	DBUG_ENTER("innobase_alter_tablespace");
+
+	switch (alter_info->ts_cmd_type) {
+	case CREATE_TABLESPACE:
+		error = innobase_create_tablespace(hton, thd, alter_info);
+		break;
+
+	case DROP_TABLESPACE:
+		error = innobase_drop_tablespace(hton, thd, alter_info);
+		break;
+
+	default:
+		error = HA_ADMIN_NOT_IMPLEMENTED;
+	}
+
+	if (error) {
+		/* These are the most common message params */
+		const char*	object_type = "TABLESPACE";
+		const char*	object = alter_info->tablespace_name;
+
+		/* Modify those params as needed. */
+		switch (alter_info->ts_cmd_type) {
+		case DROP_TABLESPACE:
+			ib_errf(thd, IB_LOG_LEVEL_ERROR,
+				ER_DROP_FILEGROUP_FAILED,
+				"%s %s", object_type, object);
+			break;
+		case CREATE_TABLESPACE:
+			ib_errf(thd, IB_LOG_LEVEL_ERROR,
+				ER_CREATE_FILEGROUP_FAILED,
+				"%s %s", object_type, object);
+			break;
+		case CREATE_LOGFILE_GROUP:
+			my_error(ER_ILLEGAL_HA_CREATE_OPTION, MYF(0),
+			innobase_hton_name, "LOGFILE GROUP");
+			break;
+		case ALTER_TABLESPACE:
+		case ALTER_ACCESS_MODE_TABLESPACE:
+		case DROP_LOGFILE_GROUP:
+		case ALTER_LOGFILE_GROUP:
+		case CHANGE_FILE_TABLESPACE:
+		case TS_CMD_NOT_DEFINED:
+			break;
+		}
+	}
+
+	DBUG_RETURN(error);
+}
+#endif /* MYSQL_TABLESPACES */
+
+/** Remove all tables in the named database inside InnoDB.
+@param[in]	hton	handlerton from InnoDB
+@param[in]	path	Database path; Inside InnoDB the name of the last
+directory in the path is used as the database name.
+For example, in 'mysql/data/test' the database name is 'test'. */
+
+static
+void
+innobase_drop_database(
+	handlerton*	hton,
+	char*		path)
+{
+	char*	namebuf;
+
+	/* Get the transaction associated with the current thd, or create one
+	if not yet created */
+
+	DBUG_ASSERT(hton == innodb_hton_ptr);
+
+	if (srv_read_only_mode) {
+		return;
+	}
+
+	THD*	thd = current_thd;
+
+	/* In the Windows plugin, thd = current_thd is always NULL */
+	if (thd != NULL) {
+		trx_t*	parent_trx = check_trx_exists(thd);
+
+		/* In case MySQL calls this in the middle of a SELECT
+		query, release possible adaptive hash latch to avoid
+		deadlocks of threads */
+
+		trx_search_latch_release_if_reserved(parent_trx);
+	}
+
+	ulint	len = 0;
+	char*	ptr = strend(path) - 2;
+
+	while (ptr >= path && *ptr != '\\' && *ptr != '/') {
+		ptr--;
+		len++;
+	}
+
+	ptr++;
+	namebuf = (char*) my_malloc((uint) len + 2, MYF(0));
+	// JAN: TODO: MySQL 5.7
+	//namebuf = (char*) my_malloc(PSI_INSTRUMENT_ME, (uint) len + 2, MYF(0));
+
+	memcpy(namebuf, ptr, len);
+	namebuf[len] = '/';
+	namebuf[len + 1] = '\0';
+
+#ifdef	_WIN32
+	innobase_casedn_str(namebuf);
+#endif /* _WIN32 */
+
+	trx_t*	trx = innobase_trx_allocate(thd);
+
+	/* Either the transaction is already flagged as a locking transaction
+	or it hasn't been started yet. */
+
+	ut_a(!trx_is_started(trx) || trx->will_lock > 0);
+
+	/* We are doing a DDL operation. */
+	++trx->will_lock;
+
+	ulint	dummy;
+
+	row_drop_database_for_mysql(namebuf, trx, &dummy);
+
+	my_free(namebuf);
+
 	/* Flush the log to reduce probability that the .frm files and
 	the InnoDB data dictionary get out-of-sync if the user runs
 	with innodb_flush_log_at_trx_commit = 0 */
@@ -12457,9 +15552,1284 @@ ha_innobase::delete_table(
 	innobase_commit_low(trx);
 
 	trx_free_for_mysql(trx);
-
-	DBUG_RETURN(convert_error_code_to_mysql(err, 0, NULL));
 }
+
+/*********************************************************************//**
+Renames an InnoDB table.
+@return DB_SUCCESS or error code */
+inline MY_ATTRIBUTE((warn_unused_result))
+dberr_t
+innobase_rename_table(
+/*==================*/
+	trx_t*		trx,	/*!< in: transaction */
+	const char*	from,	/*!< in: old name of the table */
+	const char*	to)	/*!< in: new name of the table */
+{
+	dberr_t	error;
+	char	norm_to[FN_REFLEN];
+	char	norm_from[FN_REFLEN];
+
+	DBUG_ENTER("innobase_rename_table");
+	DBUG_ASSERT(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX);
+
+	ut_ad(!srv_read_only_mode);
+
+	normalize_table_name(norm_to, to);
+	normalize_table_name(norm_from, from);
+
+	DEBUG_SYNC_C("innodb_rename_table_ready");
+
+	TrxInInnoDB	trx_in_innodb(trx);
+
+	trx_start_if_not_started(trx, true);
+
+	/* Serialize data dictionary operations with dictionary mutex:
+	no deadlocks can occur then in these operations. */
+
+	row_mysql_lock_data_dictionary(trx);
+
+	/* Transaction must be flagged as a locking transaction or it hasn't
+	been started yet. */
+
+	ut_a(trx->will_lock > 0);
+
+	error = row_rename_table_for_mysql(norm_from, norm_to, trx, TRUE);
+
+	if (error == DB_TABLE_NOT_FOUND) {
+		/* May be partitioned table, which consists of partitions
+		named table_name#P#partition_name[#SP#subpartition_name].
+
+		We are doing a DDL operation. */
+		++trx->will_lock;
+		trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
+		trx_start_if_not_started(trx, true);
+		error = row_rename_partitions_for_mysql(norm_from, norm_to,
+							trx);
+		if (error == DB_TABLE_NOT_FOUND) {
+			ib::error() << "Table " << ut_get_name(trx, norm_from)
+				<< " does not exist in the InnoDB internal"
+				" data dictionary though MySQL is trying to"
+				" rename the table. Have you copied the .frm"
+				" file of the table to the MySQL database"
+				" directory from another database? "
+				<< TROUBLESHOOTING_MSG;
+		}
+	}
+	if (error != DB_SUCCESS) {
+		if (error == DB_TABLE_NOT_FOUND
+		    && innobase_get_lower_case_table_names() == 1) {
+			char*	is_part = NULL;
+#ifdef _WIN32
+			is_part = strstr(norm_from, "#p#");
+#else
+			is_part = strstr(norm_from, "#P#");
+#endif /* _WIN32 */
+
+			if (is_part) {
+				char	par_case_name[FN_REFLEN];
+#ifndef _WIN32
+				/* Check for the table using lower
+				case name, including the partition
+				separator "P" */
+				strcpy(par_case_name, norm_from);
+				innobase_casedn_str(par_case_name);
+#else
+				/* On Windows platfrom, check
+				whether there exists table name in
+				system table whose name is
+				not being normalized to lower case */
+				create_table_info_t::normalize_table_name_low(
+					par_case_name, from, FALSE);
+#endif /* _WIN32 */
+				trx_start_if_not_started(trx, true);
+				error = row_rename_table_for_mysql(
+					par_case_name, norm_to, trx, TRUE);
+			}
+		}
+
+		if (error == DB_SUCCESS) {
+#ifndef _WIN32
+			sql_print_warning("Rename partition table %s"
+					  " succeeds after converting to lower"
+					  " case. The table may have"
+					  " been moved from a case"
+					  " in-sensitive file system.\n",
+					  norm_from);
+#else
+			sql_print_warning("Rename partition table %s"
+					  " succeeds after skipping the step to"
+					  " lower case the table name."
+					  " The table may have been"
+					  " moved from a case sensitive"
+					  " file system.\n",
+					  norm_from);
+#endif /* _WIN32 */
+		}
+	}
+
+	row_mysql_unlock_data_dictionary(trx);
+
+	/* Flush the log to reduce probability that the .frm
+	files and the InnoDB data dictionary get out-of-sync
+	if the user runs with innodb_flush_log_at_trx_commit = 0 */
+
+	log_buffer_flush_to_disk();
+
+	DBUG_RETURN(error);
+}
+
+/*********************************************************************//**
+Renames an InnoDB table.
+@return 0 or error code */
+
+int
+ha_innobase::rename_table(
+/*======================*/
+	const char*	from,	/*!< in: old name of the table */
+	const char*	to)	/*!< in: new name of the table */
+{
+	THD*	thd = ha_thd();
+
+	DBUG_ENTER("ha_innobase::rename_table");
+
+	if (high_level_read_only) {
+		ib_senderrf(thd, IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE);
+		DBUG_RETURN(HA_ERR_TABLE_READONLY);
+	}
+
+	/* Get the transaction associated with the current thd, or create one
+	if not yet created */
+
+	trx_t*	parent_trx = check_trx_exists(thd);
+
+	TrxInInnoDB	trx_in_innodb(parent_trx);
+
+	trx_t*	trx = innobase_trx_allocate(thd);
+
+	/* We are doing a DDL operation. */
+	++trx->will_lock;
+	trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
+
+	dberr_t	error = innobase_rename_table(trx, from, to);
+
+	DEBUG_SYNC(thd, "after_innobase_rename_table");
+
+	innobase_commit_low(trx);
+
+	trx_free_for_mysql(trx);
+
+	if (error == DB_SUCCESS) {
+		char	norm_from[MAX_FULL_NAME_LEN];
+		char	norm_to[MAX_FULL_NAME_LEN];
+		char	errstr[512];
+		dberr_t	ret;
+
+		normalize_table_name(norm_from, from);
+		normalize_table_name(norm_to, to);
+
+		ret = dict_stats_rename_table(norm_from, norm_to,
+					      errstr, sizeof(errstr));
+
+		if (ret != DB_SUCCESS) {
+			ib::error() << errstr;
+
+			push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
+				     ER_LOCK_WAIT_TIMEOUT, errstr);
+		}
+	}
+
+	/* Add a special case to handle the Duplicated Key error
+	and return DB_ERROR instead.
+	This is to avoid a possible SIGSEGV error from mysql error
+	handling code. Currently, mysql handles the Duplicated Key
+	error by re-entering the storage layer and getting dup key
+	info by calling get_dup_key(). This operation requires a valid
+	table handle ('row_prebuilt_t' structure) which could no
+	longer be available in the error handling stage. The suggested
+	solution is to report a 'table exists' error message (since
+	the dup key error here is due to an existing table whose name
+	is the one we are trying to rename to) and return the generic
+	error code. */
+	if (error == DB_DUPLICATE_KEY) {
+		my_error(ER_TABLE_EXISTS_ERROR, MYF(0), to);
+
+		error = DB_ERROR;
+	}
+
+	DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL));
+}
+
+/*********************************************************************//**
+Returns the exact number of records that this client can see using this
+handler object.
+@return Error code in case something goes wrong.
+These errors will abort the current query:
+      case HA_ERR_LOCK_DEADLOCK:
+      case HA_ERR_LOCK_TABLE_FULL:
+      case HA_ERR_LOCK_WAIT_TIMEOUT:
+      case HA_ERR_QUERY_INTERRUPTED:
+For other error codes, the server will fall back to counting records. */
+
+#ifdef MYSQL_57_SELECT_COUNT_OPTIMIZATION
+int
+ha_innobase::records(ha_rows* num_rows)
+/*===================================*/
+{
+	DBUG_ENTER("ha_innobase::records()");
+
+	dberr_t		ret;
+	ulint		n_rows = 0;	/* Record count in this view */
+
+	update_thd();
+
+	if (dict_table_is_discarded(m_prebuilt->table)) {
+		ib_senderrf(
+			m_user_thd,
+			IB_LOG_LEVEL_ERROR,
+			ER_TABLESPACE_DISCARDED,
+			table->s->table_name.str);
+
+		*num_rows = HA_POS_ERROR;
+		DBUG_RETURN(HA_ERR_NO_SUCH_TABLE);
+
+	} else if (m_prebuilt->table->ibd_file_missing) {
+		ib_senderrf(
+			m_user_thd, IB_LOG_LEVEL_ERROR,
+			ER_TABLESPACE_MISSING,
+			table->s->table_name.str);
+
+		*num_rows = HA_POS_ERROR;
+		DBUG_RETURN(HA_ERR_TABLESPACE_MISSING);
+
+	} else if (m_prebuilt->table->corrupted) {
+		ib_errf(m_user_thd, IB_LOG_LEVEL_WARN,
+			ER_INNODB_INDEX_CORRUPT,
+			"Table '%s' is corrupt.",
+			table->s->table_name.str);
+
+		*num_rows = HA_POS_ERROR;
+		DBUG_RETURN(HA_ERR_INDEX_CORRUPT);
+	}
+
+	TrxInInnoDB	trx_in_innodb(m_prebuilt->trx);
+
+	m_prebuilt->trx->op_info = "counting records";
+
+	dict_index_t*	index = dict_table_get_first_index(m_prebuilt->table);
+
+	ut_ad(dict_index_is_clust(index));
+
+	m_prebuilt->index_usable = row_merge_is_index_usable(
+		m_prebuilt->trx, index);
+
+	if (!m_prebuilt->index_usable) {
+		*num_rows = HA_POS_ERROR;
+		DBUG_RETURN(HA_ERR_TABLE_DEF_CHANGED);
+	}
+
+	/* (Re)Build the m_prebuilt->mysql_template if it is null to use
+	the clustered index and just the key, no off-record data. */
+	m_prebuilt->index = index;
+	dtuple_set_n_fields(m_prebuilt->search_tuple, 0);
+	m_prebuilt->read_just_key = 1;
+	build_template(false);
+
+	/* Count the records in the clustered index */
+	ret = row_scan_index_for_mysql(m_prebuilt, index, false, &n_rows);
+	reset_template();
+	switch (ret) {
+	case DB_SUCCESS:
+		break;
+	case DB_DEADLOCK:
+	case DB_LOCK_TABLE_FULL:
+	case DB_LOCK_WAIT_TIMEOUT:
+		*num_rows = HA_POS_ERROR;
+		DBUG_RETURN(convert_error_code_to_mysql(ret, 0, m_user_thd));
+		break;
+	case DB_INTERRUPTED:
+		*num_rows = HA_POS_ERROR;
+		DBUG_RETURN(HA_ERR_QUERY_INTERRUPTED);
+		break;
+	default:
+		/* No other error besides the three below is returned from
+		row_scan_index_for_mysql(). Make a debug catch. */
+		*num_rows = HA_POS_ERROR;
+		ut_ad(0);
+	}
+
+	m_prebuilt->trx->op_info = "";
+
+	if (thd_killed(m_user_thd)) {
+		*num_rows = HA_POS_ERROR;
+		DBUG_RETURN(HA_ERR_QUERY_INTERRUPTED);
+	}
+
+	DBUG_RETURN(0);
+}
+#endif /* MYSQL_57_SELECT_COUNT_OPTIMIZATION */
+
+/*********************************************************************//**
+Estimates the number of index records in a range.
+@return estimated number of rows */
+
+ha_rows
+ha_innobase::records_in_range(
+/*==========================*/
+	uint			keynr,		/*!< in: index number */
+	key_range		*min_key,	/*!< in: start key value of the
+						range, may also be 0 */
+	key_range		*max_key)	/*!< in: range end key val, may
+						also be 0 */
+{
+	KEY*		key;
+	dict_index_t*	index;
+	dtuple_t*	range_start;
+	dtuple_t*	range_end;
+	int64_t		n_rows;
+	page_cur_mode_t	mode1;
+	page_cur_mode_t	mode2;
+	mem_heap_t*	heap;
+
+	DBUG_ENTER("records_in_range");
+
+	ut_a(m_prebuilt->trx == thd_to_trx(ha_thd()));
+
+	m_prebuilt->trx->op_info = "estimating records in index range";
+
+	TrxInInnoDB	trx_in_innodb(m_prebuilt->trx);
+
+	active_index = keynr;
+
+	key = table->key_info + active_index;
+
+	index = innobase_get_index(keynr);
+
+	/* There exists possibility of not being able to find requested
+	index due to inconsistency between MySQL and InoDB dictionary info.
+	Necessary message should have been printed in innobase_get_index() */
+	if (dict_table_is_discarded(m_prebuilt->table)) {
+		n_rows = HA_POS_ERROR;
+		goto func_exit;
+	}
+	if (!index) {
+		n_rows = HA_POS_ERROR;
+		goto func_exit;
+	}
+	if (dict_index_is_corrupted(index)) {
+		n_rows = HA_ERR_INDEX_CORRUPT;
+		goto func_exit;
+	}
+	if (!row_merge_is_index_usable(m_prebuilt->trx, index)) {
+		n_rows = HA_ERR_TABLE_DEF_CHANGED;
+		goto func_exit;
+	}
+
+	heap = mem_heap_create(2 * (key->ext_key_parts * sizeof(dfield_t)
+				    + sizeof(dtuple_t)));
+
+	range_start = dtuple_create(heap, key->ext_key_parts);
+	dict_index_copy_types(range_start, index, key->ext_key_parts);
+
+	range_end = dtuple_create(heap, key->ext_key_parts);
+	dict_index_copy_types(range_end, index, key->ext_key_parts);
+
+	row_sel_convert_mysql_key_to_innobase(
+		range_start,
+		m_prebuilt->srch_key_val1,
+		m_prebuilt->srch_key_val_len,
+		index,
+		(byte*) (min_key ? min_key->key : (const uchar*) 0),
+		(ulint) (min_key ? min_key->length : 0),
+		m_prebuilt->trx);
+
+	DBUG_ASSERT(min_key
+		    ? range_start->n_fields > 0
+		    : range_start->n_fields == 0);
+
+	row_sel_convert_mysql_key_to_innobase(
+		range_end,
+		m_prebuilt->srch_key_val2,
+		m_prebuilt->srch_key_val_len,
+		index,
+		(byte*) (max_key ? max_key->key : (const uchar*) 0),
+		(ulint) (max_key ? max_key->length : 0),
+		m_prebuilt->trx);
+
+	DBUG_ASSERT(max_key
+		    ? range_end->n_fields > 0
+		    : range_end->n_fields == 0);
+
+	mode1 = convert_search_mode_to_innobase(
+		min_key ? min_key->flag : HA_READ_KEY_EXACT);
+
+	mode2 = convert_search_mode_to_innobase(
+		max_key ? max_key->flag : HA_READ_KEY_EXACT);
+
+	if (mode1 != PAGE_CUR_UNSUPP && mode2 != PAGE_CUR_UNSUPP) {
+
+		if (dict_index_is_spatial(index)) {
+			/*Only min_key used in spatial index. */
+			n_rows = rtr_estimate_n_rows_in_range(
+				index, range_start, mode1);
+		} else {
+			n_rows = btr_estimate_n_rows_in_range(
+				index, range_start, mode1, range_end, mode2);
+		}
+	} else {
+
+		n_rows = HA_POS_ERROR;
+	}
+
+	mem_heap_free(heap);
+
+	DBUG_EXECUTE_IF(
+		"print_btr_estimate_n_rows_in_range_return_value",
+		push_warning_printf(
+			ha_thd(), Sql_condition::WARN_LEVEL_WARN,
+			ER_NO_DEFAULT,
+			"btr_estimate_n_rows_in_range(): %f", n_rows);
+	);
+
+func_exit:
+
+	m_prebuilt->trx->op_info = (char*)"";
+
+	/* The MySQL optimizer seems to believe an estimate of 0 rows is
+	always accurate and may return the result 'Empty set' based on that.
+	The accuracy is not guaranteed, and even if it were, for a locking
+	read we should anyway perform the search to set the next-key lock.
+	Add 1 to the value to make sure MySQL does not make the assumption! */
+
+	if (n_rows == 0) {
+		n_rows = 1;
+	}
+
+	DBUG_RETURN((ha_rows) n_rows);
+}
+
+/*********************************************************************//**
+Gives an UPPER BOUND to the number of rows in a table. This is used in
+filesort.cc.
+@return upper bound of rows */
+
+ha_rows
+ha_innobase::estimate_rows_upper_bound()
+/*====================================*/
+{
+	const dict_index_t*	index;
+	ulonglong		estimate;
+	ulonglong		local_data_file_length;
+
+	DBUG_ENTER("estimate_rows_upper_bound");
+
+	/* We do not know if MySQL can call this function before calling
+	external_lock(). To be safe, update the thd of the current table
+	handle. */
+
+	update_thd(ha_thd());
+
+	TrxInInnoDB	trx_in_innodb(m_prebuilt->trx);
+
+	m_prebuilt->trx->op_info = "calculating upper bound for table rows";
+
+	index = dict_table_get_first_index(m_prebuilt->table);
+
+	ulint	stat_n_leaf_pages = index->stat_n_leaf_pages;
+
+	ut_a(stat_n_leaf_pages > 0);
+
+	local_data_file_length =
+		((ulonglong) stat_n_leaf_pages) * UNIV_PAGE_SIZE;
+
+	/* Calculate a minimum length for a clustered index record and from
+	that an upper bound for the number of rows. Since we only calculate
+	new statistics in row0mysql.cc when a table has grown by a threshold
+	factor, we must add a safety factor 2 in front of the formula below. */
+
+	estimate = 2 * local_data_file_length
+		/ dict_index_calc_min_rec_len(index);
+
+	m_prebuilt->trx->op_info = "";
+
+        /* Set num_rows less than MERGEBUFF to simulate the case where we do
+        not have enough space to merge the externally sorted file blocks. */
+        DBUG_EXECUTE_IF("set_num_rows_lt_MERGEBUFF",
+                        estimate = 2;
+                        DBUG_SET("-d,set_num_rows_lt_MERGEBUFF");
+                       );
+
+	DBUG_RETURN((ha_rows) estimate);
+}
+
+/*********************************************************************//**
+How many seeks it will take to read through the table. This is to be
+comparable to the number returned by records_in_range so that we can
+decide if we should scan the table or use keys.
+@return estimated time measured in disk seeks */
+
+double
+ha_innobase::scan_time()
+/*====================*/
+{
+	/* Since MySQL seems to favor table scans too much over index
+	searches, we pretend that a sequential read takes the same time
+	as a random disk read, that is, we do not divide the following
+	by 10, which would be physically realistic. */
+
+	/* The locking below is disabled for performance reasons. Without
+	it we could end up returning uninitialized value to the caller,
+	which in the worst case could make some query plan go bogus or
+	issue a Valgrind warning. */
+	if (m_prebuilt == NULL) {
+		/* In case of derived table, Optimizer will try to fetch stat
+		for table even before table is create or open. In such
+		cases return default value of 1.
+		TODO: This will be further improved to return some approximate
+		estimate but that would also needs pre-population of stats
+		structure. As of now approach is in sync with MyISAM. */
+		return(ulonglong2double(stats.data_file_length) / IO_SIZE + 2);
+	}
+
+	ulint	stat_clustered_index_size;
+
+	ut_a(m_prebuilt->table->stat_initialized);
+
+	stat_clustered_index_size =
+		m_prebuilt->table->stat_clustered_index_size;
+
+	return((double) stat_clustered_index_size);
+}
+
+/******************************************************************//**
+Calculate the time it takes to read a set of ranges through an index
+This enables us to optimise reads for clustered indexes.
+@return estimated time measured in disk seeks */
+
+double
+ha_innobase::read_time(
+/*===================*/
+	uint	index,	/*!< in: key number */
+	uint	ranges,	/*!< in: how many ranges */
+	ha_rows rows)	/*!< in: estimated number of rows in the ranges */
+{
+	ha_rows total_rows;
+
+	if (index != table->s->primary_key) {
+		/* Not clustered */
+		return(handler::read_time(index, ranges, rows));
+	}
+
+	/* Assume that the read time is proportional to the scan time for all
+	rows + at most one seek per range. */
+
+	double	time_for_scan = scan_time();
+
+	if ((total_rows = estimate_rows_upper_bound()) < rows) {
+
+		return(time_for_scan);
+	}
+
+	return(ranges + (double) rows / (double) total_rows * time_for_scan);
+}
+
+/******************************************************************//**
+Return the size of the InnoDB memory buffer. */
+
+longlong
+ha_innobase::get_memory_buffer_size() const
+/*=======================================*/
+{
+	return(innobase_buffer_pool_size);
+}
+
+/** Update the system variable with the given value of the InnoDB
+buffer pool size.
+@param[in]	buf_pool_size	given value of buffer pool size.*/
+void
+innodb_set_buf_pool_size(ulonglong buf_pool_size)
+{
+	innobase_buffer_pool_size = buf_pool_size;
+}
+
+/*********************************************************************//**
+Calculates the key number used inside MySQL for an Innobase index. We will
+first check the "index translation table" for a match of the index to get
+the index number. If there does not exist an "index translation table",
+or not able to find the index in the translation table, then we will fall back
+to the traditional way of looping through dict_index_t list to find a
+match. In this case, we have to take into account if we generated a
+default clustered index for the table
+@return the key number used inside MySQL */
+static
+int
+innobase_get_mysql_key_number_for_index(
+/*====================================*/
+	INNOBASE_SHARE*		share,	/*!< in: share structure for index
+					translation table. */
+	const TABLE*		table,	/*!< in: table in MySQL data
+					dictionary */
+	dict_table_t*		ib_table,/*!< in: table in InnoDB data
+					dictionary */
+	const dict_index_t*	index)	/*!< in: index */
+{
+	const dict_index_t*	ind;
+	unsigned int		i;
+
+	/* If index does not belong to the table object of share structure
+	(ib_table comes from the share structure) search the index->table
+	object instead */
+	if (index->table != ib_table) {
+		i = 0;
+		ind = dict_table_get_first_index(index->table);
+
+		while (index != ind) {
+			ind = dict_table_get_next_index(ind);
+			i++;
+		}
+
+		if (row_table_got_default_clust_index(index->table)) {
+			ut_a(i > 0);
+			i--;
+		}
+
+		return(i);
+	}
+
+	/* If index translation table exists, we will first check
+	the index through index translation table for a match. */
+	if (share->idx_trans_tbl.index_mapping != NULL) {
+		for (i = 0; i < share->idx_trans_tbl.index_count; i++) {
+			if (share->idx_trans_tbl.index_mapping[i] == index) {
+				return(i);
+			}
+		}
+
+		/* Print an error message if we cannot find the index
+		in the "index translation table". */
+		if (index->is_committed()) {
+			sql_print_error("Cannot find index %s in InnoDB index"
+					" translation table.", index->name());
+		}
+	}
+
+	/* If we do not have an "index translation table", or not able
+	to find the index in the translation table, we'll directly find
+	matching index with information from mysql TABLE structure and
+	InnoDB dict_index_t list */
+	for (i = 0; i < table->s->keys; i++) {
+		ind = dict_table_get_index_on_name(
+			ib_table, table->key_info[i].name);
+
+		if (index == ind) {
+			return(i);
+		}
+	}
+
+	/* Loop through each index of the table and lock them */
+	for (ind = dict_table_get_first_index(ib_table);
+	     ind != NULL;
+	     ind = dict_table_get_next_index(ind)) {
+		if (index == ind) {
+			/* Temp index is internal to InnoDB, that is
+			not present in the MySQL index list, so no
+			need to print such mismatch warning. */
+			if (index->is_committed()) {
+				sql_print_warning(
+					"Found index %s in InnoDB index list"
+					" but not its MySQL index number."
+					" It could be an InnoDB internal"
+					" index.",
+					index->name());
+			}
+			return(-1);
+		}
+	}
+
+	ut_error;
+
+	return(-1);
+}
+
+/*********************************************************************//**
+Calculate Record Per Key value. Need to exclude the NULL value if
+innodb_stats_method is set to "nulls_ignored"
+@return estimated record per key value */
+rec_per_key_t
+innodb_rec_per_key(
+/*===============*/
+	dict_index_t*	index,		/*!< in: dict_index_t structure */
+	ulint		i,		/*!< in: the column we are
+					calculating rec per key */
+	ha_rows		records)	/*!< in: estimated total records */
+{
+	rec_per_key_t	rec_per_key;
+	ib_uint64_t	n_diff;
+
+	ut_a(index->table->stat_initialized);
+
+	ut_ad(i < dict_index_get_n_unique(index));
+	ut_ad(!dict_index_is_spatial(index));
+
+	if (records == 0) {
+		/* "Records per key" is meaningless for empty tables.
+		Return 1.0 because that is most convenient to the Optimizer. */
+		return(1.0);
+	}
+
+	n_diff = index->stat_n_diff_key_vals[i];
+
+	if (n_diff == 0) {
+
+		rec_per_key = static_cast<rec_per_key_t>(records);
+	} else if (srv_innodb_stats_method == SRV_STATS_NULLS_IGNORED) {
+		ib_uint64_t	n_null;
+		ib_uint64_t	n_non_null;
+
+		n_non_null = index->stat_n_non_null_key_vals[i];
+
+		/* In theory, index->stat_n_non_null_key_vals[i]
+		should always be less than the number of records.
+		Since this is statistics value, the value could
+		have slight discrepancy. But we will make sure
+		the number of null values is not a negative number. */
+		if (records < n_non_null) {
+			n_null = 0;
+		} else {
+			n_null = records - n_non_null;
+		}
+
+		/* If the number of NULL values is the same as or
+		large than that of the distinct values, we could
+		consider that the table consists mostly of NULL value.
+		Set rec_per_key to 1. */
+		if (n_diff <= n_null) {
+			rec_per_key = 1.0;
+		} else {
+			/* Need to exclude rows with NULL values from
+			rec_per_key calculation */
+			rec_per_key
+				= static_cast<rec_per_key_t>(records - n_null)
+				/ (n_diff - n_null);
+		}
+	} else {
+		DEBUG_SYNC_C("after_checking_for_0");
+		rec_per_key = static_cast<rec_per_key_t>(records) / n_diff;
+	}
+
+	if (rec_per_key < 1.0) {
+		/* Values below 1.0 are meaningless and must be due to the
+		stats being imprecise. */
+		rec_per_key = 1.0;
+	}
+
+	return(rec_per_key);
+}
+
+/*********************************************************************//**
+Returns statistics information of the table to the MySQL interpreter,
+in various fields of the handle object.
+@return HA_ERR_* error code or 0 */
+
+int
+ha_innobase::info_low(
+/*==================*/
+	uint	flag,	/*!< in: what information is requested */
+	bool	is_analyze)
+{
+	dict_table_t*	ib_table;
+	ib_uint64_t	n_rows;
+	char		path[FN_REFLEN];
+	os_file_stat_t	stat_info;
+
+	DBUG_ENTER("info");
+
+	DEBUG_SYNC_C("ha_innobase_info_low");
+
+	/* If we are forcing recovery at a high level, we will suppress
+	statistics calculation on tables, because that may crash the
+	server if an index is badly corrupted. */
+
+	/* We do not know if MySQL can call this function before calling
+	external_lock(). To be safe, update the thd of the current table
+	handle. */
+
+	update_thd(ha_thd());
+
+	/* In case MySQL calls this in the middle of a SELECT query, release
+	possible adaptive hash latch to avoid deadlocks of threads */
+
+	m_prebuilt->trx->op_info = (char*)"returning various info to MySQL";
+
+	trx_search_latch_release_if_reserved(m_prebuilt->trx);
+
+	ib_table = m_prebuilt->table;
+	DBUG_ASSERT(ib_table->n_ref_count > 0);
+
+	if (flag & HA_STATUS_TIME) {
+		if (is_analyze || innobase_stats_on_metadata) {
+
+			dict_stats_upd_option_t	opt;
+			dberr_t			ret;
+
+			m_prebuilt->trx->op_info = "updating table statistics";
+
+			if (dict_stats_is_persistent_enabled(ib_table)) {
+
+				if (is_analyze) {
+					opt = DICT_STATS_RECALC_PERSISTENT;
+				} else {
+					/* This is e.g. 'SHOW INDEXES', fetch
+					the persistent stats from disk. */
+					opt = DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY;
+				}
+			} else {
+				opt = DICT_STATS_RECALC_TRANSIENT;
+			}
+
+			ut_ad(!mutex_own(&dict_sys->mutex));
+			ret = dict_stats_update(ib_table, opt);
+
+			if (ret != DB_SUCCESS) {
+				m_prebuilt->trx->op_info = "";
+				DBUG_RETURN(HA_ERR_GENERIC);
+			}
+
+			m_prebuilt->trx->op_info =
+				"returning various info to MySQL";
+		}
+
+
+		stats.update_time = (ulong) ib_table->update_time;
+	}
+
+	if (flag & HA_STATUS_VARIABLE) {
+
+		ulint	stat_clustered_index_size;
+		ulint	stat_sum_of_other_index_sizes;
+
+		if (!(flag & HA_STATUS_NO_LOCK)) {
+			dict_table_stats_lock(ib_table, RW_S_LATCH);
+		}
+
+		ut_a(ib_table->stat_initialized);
+
+		n_rows = ib_table->stat_n_rows;
+
+		stat_clustered_index_size
+			= ib_table->stat_clustered_index_size;
+
+		stat_sum_of_other_index_sizes
+			= ib_table->stat_sum_of_other_index_sizes;
+
+		if (!(flag & HA_STATUS_NO_LOCK)) {
+			dict_table_stats_unlock(ib_table, RW_S_LATCH);
+		}
+
+		/*
+		The MySQL optimizer seems to assume in a left join that n_rows
+		is an accurate estimate if it is zero. Of course, it is not,
+		since we do not have any locks on the rows yet at this phase.
+		Since SHOW TABLE STATUS seems to call this function with the
+		HA_STATUS_TIME flag set, while the left join optimizer does not
+		set that flag, we add one to a zero value if the flag is not
+		set. That way SHOW TABLE STATUS will show the best estimate,
+		while the optimizer never sees the table empty. */
+
+		if (n_rows == 0 && !(flag & HA_STATUS_TIME)) {
+			n_rows++;
+		}
+
+		/* Fix bug#40386: Not flushing query cache after truncate.
+		n_rows can not be 0 unless the table is empty, set to 1
+		instead. The original problem of bug#29507 is actually
+		fixed in the server code. */
+		if (thd_sql_command(m_user_thd) == SQLCOM_TRUNCATE) {
+
+			n_rows = 1;
+
+			/* We need to reset the m_prebuilt value too, otherwise
+			checks for values greater than the last value written
+			to the table will fail and the autoinc counter will
+			not be updated. This will force write_row() into
+			attempting an update of the table's AUTOINC counter. */
+
+			m_prebuilt->autoinc_last_value = 0;
+		}
+
+		const page_size_t&	page_size
+			= dict_table_page_size(ib_table);
+
+		stats.records = (ha_rows) n_rows;
+		stats.deleted = 0;
+		stats.data_file_length
+			= ((ulonglong) stat_clustered_index_size)
+			* page_size.physical();
+		stats.index_file_length
+			= ((ulonglong) stat_sum_of_other_index_sizes)
+			* page_size.physical();
+
+		/* Since fsp_get_available_space_in_free_extents() is
+		acquiring latches inside InnoDB, we do not call it if we
+		are asked by MySQL to avoid locking. Another reason to
+		avoid the call is that it uses quite a lot of CPU.
+		See Bug#38185. */
+		if (flag & HA_STATUS_NO_LOCK
+		    || !(flag & HA_STATUS_VARIABLE_EXTRA)) {
+			/* We do not update delete_length if no
+			locking is requested so the "old" value can
+			remain. delete_length is initialized to 0 in
+			the ha_statistics' constructor. Also we only
+			need delete_length to be set when
+			HA_STATUS_VARIABLE_EXTRA is set */
+		} else if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
+			/* Avoid accessing the tablespace if
+			innodb_crash_recovery is set to a high value. */
+			stats.delete_length = 0;
+		} else {
+			uintmax_t   avail_space;
+
+			avail_space = fsp_get_available_space_in_free_extents(
+				ib_table->space);
+
+			if (avail_space == UINTMAX_MAX) {
+				THD*	thd;
+				char	errbuf[MYSYS_STRERROR_SIZE];
+
+				thd = ha_thd();
+				my_strerror(errbuf, sizeof(errbuf),
+					    errno);
+				push_warning_printf(
+					thd,
+					Sql_condition::WARN_LEVEL_WARN,
+					ER_CANT_GET_STAT,
+					"InnoDB: Trying to get the free"
+					" space for table %s but its"
+					" tablespace has been discarded or"
+					" the .ibd file is missing. Setting"
+					" the free space to zero."
+					" (errno: %d - %s)",
+					ib_table->name.m_name, errno,
+					errbuf);
+
+
+				stats.delete_length = 0;
+			} else {
+				stats.delete_length = avail_space * 1024;
+			}
+		}
+
+		stats.check_time = 0;
+		stats.mrr_length_per_rec= ref_length +  8; // 8 = max(sizeof(void *));
+
+		if (stats.records == 0) {
+			stats.mean_rec_length = 0;
+		} else {
+			stats.mean_rec_length = (ulong)
+				(stats.data_file_length / stats.records);
+		}
+	}
+
+	if (flag & HA_STATUS_CONST) {
+		ulong	i;
+		/* Verify the number of index in InnoDB and MySQL
+		matches up. If m_prebuilt->clust_index_was_generated
+		holds, InnoDB defines GEN_CLUST_INDEX internally */
+		ulint	num_innodb_index = UT_LIST_GET_LEN(ib_table->indexes)
+			- m_prebuilt->clust_index_was_generated;
+		if (table->s->keys < num_innodb_index) {
+			/* If there are too many indexes defined
+			inside InnoDB, ignore those that are being
+			created, because MySQL will only consider
+			the fully built indexes here. */
+
+			for (const dict_index_t* index
+				     = UT_LIST_GET_FIRST(ib_table->indexes);
+			     index != NULL;
+			     index = UT_LIST_GET_NEXT(indexes, index)) {
+
+				/* First, online index creation is
+				completed inside InnoDB, and then
+				MySQL attempts to upgrade the
+				meta-data lock so that it can rebuild
+				the .frm file. If we get here in that
+				time frame, dict_index_is_online_ddl()
+				would not hold and the index would
+				still not be included in TABLE_SHARE. */
+				if (!index->is_committed()) {
+					num_innodb_index--;
+				}
+			}
+
+			if (table->s->keys < num_innodb_index
+			    && innobase_fts_check_doc_id_index(
+				    ib_table, NULL, NULL)
+			    == FTS_EXIST_DOC_ID_INDEX) {
+				num_innodb_index--;
+			}
+		}
+
+		if (table->s->keys != num_innodb_index) {
+			ib_table->dict_frm_mismatch = DICT_FRM_INCONSISTENT_KEYS;
+			ib_push_frm_error(m_user_thd, ib_table, table, num_innodb_index, true);
+		}
+
+		if (!(flag & HA_STATUS_NO_LOCK)) {
+			dict_table_stats_lock(ib_table, RW_S_LATCH);
+		}
+
+		ut_a(ib_table->stat_initialized);
+
+		for (i = 0; i < table->s->keys; i++) {
+			ulong	j;
+
+			/* We could get index quickly through internal
+			index mapping with the index translation table.
+			The identity of index (match up index name with
+			that of table->key_info[i]) is already verified in
+			innobase_get_index().  */
+			dict_index_t* index = innobase_get_index(i);
+
+			if (index == NULL) {
+				ib_table->dict_frm_mismatch = DICT_FRM_INCONSISTENT_KEYS;
+				ib_push_frm_error(m_user_thd, ib_table, table, num_innodb_index, true);
+				break;
+			}
+
+			KEY*	key = &table->key_info[i];
+
+			for (j = 0; j < key->ext_key_parts; j++) {
+
+				if ((key->flags & HA_FULLTEXT)
+				    || (key->flags & HA_SPATIAL)) {
+
+					/* The record per key does not apply to
+					FTS or Spatial indexes. */
+				/*
+					key->rec_per_key[j] = 1;
+					key->set_records_per_key(j, 1.0);
+				*/
+					continue;
+				}
+
+				if (j + 1 > index->n_uniq) {
+					sql_print_error(
+						"Index %s of %s has %lu columns"
+						" unique inside InnoDB, but"
+						" MySQL is asking statistics for"
+						" %lu columns. Have you mixed"
+						" up .frm files from different"
+						" installations? %s",
+						index->name(),
+						ib_table->name.m_name,
+						(unsigned long)
+						index->n_uniq, j + 1,
+						TROUBLESHOOTING_MSG);
+					break;
+				}
+
+				/* innodb_rec_per_key() will use
+				index->stat_n_diff_key_vals[] and the value we
+				pass index->table->stat_n_rows. Both are
+				calculated by ANALYZE and by the background
+				stats gathering thread (which kicks in when too
+				much of the table has been changed). In
+				addition table->stat_n_rows is adjusted with
+				each DML (e.g. ++ on row insert). Those
+				adjustments are not MVCC'ed and not even
+				reversed on rollback. So,
+				index->stat_n_diff_key_vals[] and
+				index->table->stat_n_rows could have been
+				calculated at different time. This is
+				acceptable. */
+
+				ulong	rec_per_key_int = static_cast<ulong>(
+					innodb_rec_per_key(index, j,
+							   stats.records));
+
+				/* Since MySQL seems to favor table scans
+				too much over index searches, we pretend
+				index selectivity is 2 times better than
+				our estimate: */
+
+				rec_per_key_int = rec_per_key_int / 2;
+
+				if (rec_per_key_int == 0) {
+					rec_per_key_int = 1;
+				}
+
+				key->rec_per_key[j] = rec_per_key_int;
+			}
+		}
+
+		if (!(flag & HA_STATUS_NO_LOCK)) {
+			dict_table_stats_unlock(ib_table, RW_S_LATCH);
+		}
+
+		my_snprintf(path, sizeof(path), "%s/%s%s",
+			    mysql_data_home, table->s->normalized_path.str,
+			    reg_ext);
+
+		unpack_filename(path,path);
+
+		/* Note that we do not know the access time of the table,
+		nor the CHECK TABLE time, nor the UPDATE or INSERT time. */
+
+		if (os_file_get_status(
+			path, &stat_info, false,
+			(dict_table_is_intrinsic(ib_table)
+			? false : srv_read_only_mode)) == DB_SUCCESS) {
+			stats.create_time = (ulong) stat_info.ctime;
+		}
+	}
+
+	if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
+
+		goto func_exit;
+
+	} else if (flag & HA_STATUS_ERRKEY) {
+		const dict_index_t*	err_index;
+
+		ut_a(m_prebuilt->trx);
+		ut_a(m_prebuilt->trx->magic_n == TRX_MAGIC_N);
+
+		err_index = trx_get_error_info(m_prebuilt->trx);
+
+		if (err_index) {
+			errkey = innobase_get_mysql_key_number_for_index(
+					m_share, table, ib_table, err_index);
+		} else {
+			errkey = (unsigned int) (
+				(m_prebuilt->trx->error_key_num
+				 == ULINT_UNDEFINED)
+					? ~0
+					: m_prebuilt->trx->error_key_num);
+		}
+	}
+
+	if ((flag & HA_STATUS_AUTO) && table->found_next_number_field) {
+
+		ulonglong auto_inc_val = innobase_peek_autoinc();
+		/* Initialize autoinc value if not set. */
+		if (auto_inc_val == 0) {
+
+			dict_table_autoinc_lock(m_prebuilt->table);
+			innobase_initialize_autoinc();
+			dict_table_autoinc_unlock(m_prebuilt->table);
+
+			auto_inc_val = innobase_peek_autoinc();
+		}
+		stats.auto_increment_value = auto_inc_val;
+	}
+
+func_exit:
+	m_prebuilt->trx->op_info = (char*)"";
+
+	DBUG_RETURN(0);
+}
+
+/*********************************************************************//**
+Returns statistics information of the table to the MySQL interpreter,
+in various fields of the handle object.
+@return HA_ERR_* error code or 0 */
+
+int
+ha_innobase::info(
+/*==============*/
+	uint	flag)	/*!< in: what information is requested */
+{
+	return(info_low(flag, false /* not ANALYZE */));
+}
+
+/** Enable indexes.
+@param[in]	mode	enable index mode.
+@return HA_ERR_* error code or 0 */
+int
+ha_innobase::enable_indexes(
+	uint	mode)
+{
+	int	error = HA_ERR_WRONG_COMMAND;
+
+	/* Enable index only for intrinsic table. Behavior for all other
+	table continue to remain same. */
+
+	if (dict_table_is_intrinsic(m_prebuilt->table)) {
+		ut_ad(mode == HA_KEY_SWITCH_ALL);
+		for (dict_index_t* index
+			= UT_LIST_GET_FIRST(m_prebuilt->table->indexes);
+		     index != NULL;
+		     index = UT_LIST_GET_NEXT(indexes, index)) {
+
+			/* InnoDB being clustered index we can't disable/enable
+			clustered index itself. */
+			if (dict_index_is_clust(index)) {
+				continue;
+			}
+
+			index->allow_duplicates = false;
+		}
+		error = 0;
+	}
+
+	return(error);
+}
+
+/** Disable indexes.
+@param[in]	mode	disable index mode.
+@return HA_ERR_* error code or 0 */
+int
+ha_innobase::disable_indexes(
+	uint	mode)
+{
+	int	error = HA_ERR_WRONG_COMMAND;
+
+	/* Disable index only for intrinsic table. Behavior for all other
+	table continue to remain same. */
+
+	if (dict_table_is_intrinsic(m_prebuilt->table)) {
+		ut_ad(mode == HA_KEY_SWITCH_ALL);
+		for (dict_index_t* index
+			= UT_LIST_GET_FIRST(m_prebuilt->table->indexes);
+		     index != NULL;
+		     index = UT_LIST_GET_NEXT(indexes, index)) {
+
+			/* InnoDB being clustered index we can't disable/enable
+			clustered index itself. */
+			if (dict_index_is_clust(index)) {
+				continue;
+			}
+
+			index->allow_duplicates = true;
+		}
+		error = 0;
+	}
+
+	return(error);
+}
+
+/*
+Updates index cardinalities of the table, based on random dives into
+each index tree. This does NOT calculate exact statistics on the table.
+@return HA_ADMIN_* error code or HA_ADMIN_OK */
+
+int
+ha_innobase::analyze(
+/*=================*/
+	THD*		thd,		/*!< in: connection thread handle */
+	HA_CHECK_OPT*	check_opt)	/*!< in: currently ignored */
+{
+	/* Simply call info_low() with all the flags
+	and request recalculation of the statistics */
+	int	ret = info_low(
+		HA_STATUS_TIME | HA_STATUS_CONST | HA_STATUS_VARIABLE,
+		true /* this is ANALYZE */);
+
+	if (ret != 0) {
+		return(HA_ADMIN_FAILED);
+	}
+
+	return(HA_ADMIN_OK);
+}
+
 /*****************************************************************//**
 Defragment table.
 @return	error number */
@@ -12533,7 +16903,7 @@ ha_innobase::defragment_table(
 					break;
 				}
 			}
-			os_event_free(event);
+			os_event_destroy(event);
 		}
 
 		if (ret) {
@@ -12555,1155 +16925,20 @@ ha_innobase::defragment_table(
 	return ret;
 }
 
-/*****************************************************************//**
-Removes all tables in the named database inside InnoDB. */
-static
-void
-innobase_drop_database(
-/*===================*/
-	handlerton*	hton,	/*!< in: handlerton of Innodb */
-	char*		path)	/*!< in: database path; inside InnoDB the name
-				of the last directory in the path is used as
-				the database name: for example, in
-				'mysql/data/test' the database name is 'test' */
-{
-	ulint	len		= 0;
-	trx_t*	trx;
-	char*	ptr;
-	char*	namebuf;
-	THD*	thd		= current_thd;
-
-	/* Get the transaction associated with the current thd, or create one
-	if not yet created */
-
-	DBUG_ASSERT(hton == innodb_hton_ptr);
-
-	if (srv_read_only_mode) {
-		return;
-	}
-
-	/* In the Windows plugin, thd = current_thd is always NULL */
-	if (thd) {
-		trx_t*	parent_trx = check_trx_exists(thd);
-
-		/* In case MySQL calls this in the middle of a SELECT
-		query, release possible adaptive hash latch to avoid
-		deadlocks of threads */
-
-		trx_search_latch_release_if_reserved(parent_trx);
-	}
-
-	ptr = strend(path) - 2;
-
-	while (ptr >= path && *ptr != '\\' && *ptr != '/') {
-		ptr--;
-		len++;
-	}
-
-	ptr++;
-	namebuf = (char*) my_malloc((uint) len + 2, MYF(0));
-
-	memcpy(namebuf, ptr, len);
-	namebuf[len] = '/';
-	namebuf[len + 1] = '\0';
-#ifdef	__WIN__
-	innobase_casedn_str(namebuf);
-#endif
-	trx = innobase_trx_allocate(thd);
-
-	/* Either the transaction is already flagged as a locking transaction
-	or it hasn't been started yet. */
-
-	ut_a(!trx_is_started(trx) || trx->will_lock > 0);
-
-	/* We are doing a DDL operation. */
-	++trx->will_lock;
-
-	row_drop_database_for_mysql(namebuf, trx);
-
-	my_free(namebuf);
-
-	/* Flush the log to reduce probability that the .frm files and
-	the InnoDB data dictionary get out-of-sync if the user runs
-	with innodb_flush_log_at_trx_commit = 0 */
-
-	log_buffer_flush_to_disk();
-
-	innobase_commit_low(trx);
-	trx_free_for_mysql(trx);
-}
-
-/*********************************************************************//**
-Renames an InnoDB table.
-@return DB_SUCCESS or error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-innobase_rename_table(
-/*==================*/
-	trx_t*		trx,	/*!< in: transaction */
-	const char*	from,	/*!< in: old name of the table */
-	const char*	to)	/*!< in: new name of the table */
-{
-	dberr_t	error;
-	char	norm_to[FN_REFLEN];
-	char	norm_from[FN_REFLEN];
-
-	DBUG_ENTER("innobase_rename_table");
-	DBUG_ASSERT(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX);
-
-	ut_ad(!srv_read_only_mode);
-
-	normalize_table_name(norm_to, to);
-	normalize_table_name(norm_from, from);
-
-	DEBUG_SYNC_C("innodb_rename_table_ready");
-
-	trx_start_if_not_started(trx);
-
-	/* Serialize data dictionary operations with dictionary mutex:
-	no deadlocks can occur then in these operations. */
-
-	row_mysql_lock_data_dictionary(trx);
-
-	/* Transaction must be flagged as a locking transaction or it hasn't
-	been started yet. */
-
-	ut_a(trx->will_lock > 0);
-
-	error = row_rename_table_for_mysql(
-		norm_from, norm_to, trx, TRUE);
-
-	if (error != DB_SUCCESS) {
-		if (error == DB_TABLE_NOT_FOUND
-		    && innobase_get_lower_case_table_names() == 1) {
-			char*	is_part = NULL;
-#ifdef __WIN__
-			is_part = strstr(norm_from, "#p#");
-#else
-			is_part = strstr(norm_from, "#P#");
-#endif /* __WIN__ */
-
-			if (is_part) {
-				char	par_case_name[FN_REFLEN];
-#ifndef __WIN__
-				/* Check for the table using lower
-				case name, including the partition
-				separator "P" */
-				strcpy(par_case_name, norm_from);
-				innobase_casedn_str(par_case_name);
-#else
-				/* On Windows platfrom, check
-				whether there exists table name in
-				system table whose name is
-				not being normalized to lower case */
-				normalize_table_name_low(
-					par_case_name, from, FALSE);
-#endif
-				trx_start_if_not_started(trx);
-				error = row_rename_table_for_mysql(
-					par_case_name, norm_to, trx, TRUE);
-			}
-		}
-
-		if (error == DB_SUCCESS) {
-#ifndef __WIN__
-			sql_print_warning("Rename partition table %s "
-					  "succeeds after converting to lower "
-					  "case. The table may have "
-					  "been moved from a case "
-					  "in-sensitive file system.\n",
-					  norm_from);
-#else
-			sql_print_warning("Rename partition table %s "
-					  "succeeds after skipping the step to "
-					  "lower case the table name. "
-					  "The table may have been "
-					  "moved from a case sensitive "
-					  "file system.\n",
-					  norm_from);
-#endif /* __WIN__ */
-		}
-	}
-
-	row_mysql_unlock_data_dictionary(trx);
-
-	/* Flush the log to reduce probability that the .frm
-	files and the InnoDB data dictionary get out-of-sync
-	if the user runs with innodb_flush_log_at_trx_commit = 0 */
-
-	log_buffer_flush_to_disk();
-
-	DBUG_RETURN(error);
-}
-
-/*********************************************************************//**
-Renames an InnoDB table.
-@return	0 or error code */
-UNIV_INTERN
-int
-ha_innobase::rename_table(
-/*======================*/
-	const char*	from,	/*!< in: old name of the table */
-	const char*	to)	/*!< in: new name of the table */
-{
-	trx_t*	trx;
-	dberr_t	error;
-	trx_t*	parent_trx;
-	THD*	thd		= ha_thd();
-
-	DBUG_ENTER("ha_innobase::rename_table");
-
-	if (high_level_read_only) {
-		ib_senderrf(thd, IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE);
-		DBUG_RETURN(HA_ERR_TABLE_READONLY);
-	}
-
-	/* Get the transaction associated with the current thd, or create one
-	if not yet created */
-
-	parent_trx = check_trx_exists(thd);
-
-	/* In case MySQL calls this in the middle of a SELECT query, release
-	possible adaptive hash latch to avoid deadlocks of threads */
-
-	trx_search_latch_release_if_reserved(parent_trx);
-
-	trx = innobase_trx_allocate(thd);
-
-	/* We are doing a DDL operation. */
-	++trx->will_lock;
-	trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
-
-	error = innobase_rename_table(trx, from, to);
-
-	DEBUG_SYNC(thd, "after_innobase_rename_table");
-
-	innobase_commit_low(trx);
-	trx_free_for_mysql(trx);
-
-	if (error == DB_SUCCESS) {
-		char	norm_from[MAX_FULL_NAME_LEN];
-		char	norm_to[MAX_FULL_NAME_LEN];
-		char	errstr[512];
-		dberr_t	ret;
-
-		normalize_table_name(norm_from, from);
-		normalize_table_name(norm_to, to);
-
-		ret = dict_stats_rename_table(norm_from, norm_to,
-					      errstr, sizeof(errstr));
-
-		if (ret != DB_SUCCESS) {
-			ut_print_timestamp(stderr);
-			fprintf(stderr, " InnoDB: %s\n", errstr);
-
-			push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
-				     ER_LOCK_WAIT_TIMEOUT, errstr);
-		}
-	}
-
-	/* Add a special case to handle the Duplicated Key error
-	and return DB_ERROR instead.
-	This is to avoid a possible SIGSEGV error from mysql error
-	handling code. Currently, mysql handles the Duplicated Key
-	error by re-entering the storage layer and getting dup key
-	info by calling get_dup_key(). This operation requires a valid
-	table handle ('row_prebuilt_t' structure) which could no
-	longer be available in the error handling stage. The suggested
-	solution is to report a 'table exists' error message (since
-	the dup key error here is due to an existing table whose name
-	is the one we are trying to rename to) and return the generic
-	error code. */
-	if (error == DB_DUPLICATE_KEY) {
-		my_error(ER_TABLE_EXISTS_ERROR, MYF(0), to);
-
-		error = DB_ERROR;
-	}
-
-	DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL));
-}
-
-/*********************************************************************//**
-Estimates the number of index records in a range.
-@return	estimated number of rows */
-UNIV_INTERN
-ha_rows
-ha_innobase::records_in_range(
-/*==========================*/
-	uint			keynr,		/*!< in: index number */
-	key_range		*min_key,	/*!< in: start key value of the
-						range, may also be 0 */
-	key_range		*max_key)	/*!< in: range end key val, may
-						also be 0 */
-{
-	KEY*		key;
-	dict_index_t*	index;
-	dtuple_t*	range_start;
-	dtuple_t*	range_end;
-	ib_int64_t	n_rows;
-	ulint		mode1;
-	ulint		mode2;
-	mem_heap_t*	heap;
-
-	DBUG_ENTER("records_in_range");
-
-	ut_a(prebuilt->trx == thd_to_trx(ha_thd()));
-
-	prebuilt->trx->op_info = (char*)"estimating records in index range";
-
-	/* In case MySQL calls this in the middle of a SELECT query, release
-	possible adaptive hash latch to avoid deadlocks of threads */
-
-	trx_search_latch_release_if_reserved(prebuilt->trx);
-
-	active_index = keynr;
-
-	key = table->key_info + active_index;
-
-	index = innobase_get_index(keynr);
-
-	/* There exists possibility of not being able to find requested
-	index due to inconsistency between MySQL and InoDB dictionary info.
-	Necessary message should have been printed in innobase_get_index() */
-	if (dict_table_is_discarded(prebuilt->table)) {
-		n_rows = HA_POS_ERROR;
-		goto func_exit;
-	}
-	if (UNIV_UNLIKELY(!index)) {
-		n_rows = HA_POS_ERROR;
-		goto func_exit;
-	}
-	if (dict_index_is_corrupted(index)) {
-		n_rows = HA_ERR_INDEX_CORRUPT;
-		goto func_exit;
-	}
-	if (UNIV_UNLIKELY(!row_merge_is_index_usable(prebuilt->trx, index))) {
-		n_rows = HA_ERR_TABLE_DEF_CHANGED;
-		goto func_exit;
-	}
-
-	heap = mem_heap_create(2 * (key->ext_key_parts * sizeof(dfield_t)
-				    + sizeof(dtuple_t)));
-
-	range_start = dtuple_create(heap, key->ext_key_parts);
-	dict_index_copy_types(range_start, index, key->ext_key_parts);
-
-	range_end = dtuple_create(heap, key->ext_key_parts);
-	dict_index_copy_types(range_end, index, key->ext_key_parts);
-
-	row_sel_convert_mysql_key_to_innobase(
-				range_start,
-				prebuilt->srch_key_val1,
-				prebuilt->srch_key_val_len,
-				index,
-				(byte*) (min_key ? min_key->key :
-					 (const uchar*) 0),
-				(ulint) (min_key ? min_key->length : 0),
-				prebuilt->trx);
-	DBUG_ASSERT(min_key
-		    ? range_start->n_fields > 0
-		    : range_start->n_fields == 0);
-
-	row_sel_convert_mysql_key_to_innobase(
-				range_end,
-				prebuilt->srch_key_val2,
-				prebuilt->srch_key_val_len,
-				index,
-				(byte*) (max_key ? max_key->key :
-					 (const uchar*) 0),
-				(ulint) (max_key ? max_key->length : 0),
-				prebuilt->trx);
-	DBUG_ASSERT(max_key
-		    ? range_end->n_fields > 0
-		    : range_end->n_fields == 0);
-
-	mode1 = convert_search_mode_to_innobase(min_key ? min_key->flag :
-						HA_READ_KEY_EXACT);
-	mode2 = convert_search_mode_to_innobase(max_key ? max_key->flag :
-						HA_READ_KEY_EXACT);
-
-	if (mode1 != PAGE_CUR_UNSUPP && mode2 != PAGE_CUR_UNSUPP) {
-
-		n_rows = btr_estimate_n_rows_in_range(index, range_start,
-						      mode1, range_end,
-						      mode2, prebuilt->trx);
-	} else {
-
-		n_rows = HA_POS_ERROR;
-	}
-
-	mem_heap_free(heap);
-
-func_exit:
-
-	prebuilt->trx->op_info = (char*)"";
-
-	/* The MySQL optimizer seems to believe an estimate of 0 rows is
-	always accurate and may return the result 'Empty set' based on that.
-	The accuracy is not guaranteed, and even if it were, for a locking
-	read we should anyway perform the search to set the next-key lock.
-	Add 1 to the value to make sure MySQL does not make the assumption! */
-
-	if (n_rows == 0) {
-		n_rows = 1;
-	}
-
-	DBUG_RETURN((ha_rows) n_rows);
-}
-
-/*********************************************************************//**
-Gives an UPPER BOUND to the number of rows in a table. This is used in
-filesort.cc.
-@return	upper bound of rows */
-UNIV_INTERN
-ha_rows
-ha_innobase::estimate_rows_upper_bound()
-/*====================================*/
-{
-	const dict_index_t*	index;
-	ulonglong		estimate;
-	ulonglong		local_data_file_length;
-	ulint			stat_n_leaf_pages;
-
-	DBUG_ENTER("estimate_rows_upper_bound");
-
-	/* We do not know if MySQL can call this function before calling
-	external_lock(). To be safe, update the thd of the current table
-	handle. */
-
-	update_thd(ha_thd());
-
-	prebuilt->trx->op_info = "calculating upper bound for table rows";
-
-	/* In case MySQL calls this in the middle of a SELECT query, release
-	possible adaptive hash latch to avoid deadlocks of threads */
-
-	trx_search_latch_release_if_reserved(prebuilt->trx);
-
-	index = dict_table_get_first_index(prebuilt->table);
-
-	stat_n_leaf_pages = index->stat_n_leaf_pages;
-
-	ut_a(stat_n_leaf_pages > 0);
-
-	local_data_file_length =
-		((ulonglong) stat_n_leaf_pages) * UNIV_PAGE_SIZE;
-
-	/* Calculate a minimum length for a clustered index record and from
-	that an upper bound for the number of rows. Since we only calculate
-	new statistics in row0mysql.cc when a table has grown by a threshold
-	factor, we must add a safety factor 2 in front of the formula below. */
-
-	estimate = 2 * local_data_file_length
-		/ dict_index_calc_min_rec_len(index);
-
-	prebuilt->trx->op_info = "";
-
-        /* Set num_rows less than MERGEBUFF to simulate the case where we do
-        not have enough space to merge the externally sorted file blocks. */
-        DBUG_EXECUTE_IF("set_num_rows_lt_MERGEBUFF",
-                        estimate = 2;
-                        DBUG_SET("-d,set_num_rows_lt_MERGEBUFF");
-                       );
-
-	DBUG_RETURN((ha_rows) estimate);
-}
-
-/*********************************************************************//**
-How many seeks it will take to read through the table. This is to be
-comparable to the number returned by records_in_range so that we can
-decide if we should scan the table or use keys.
-@return	estimated time measured in disk seeks */
-UNIV_INTERN
-double
-ha_innobase::scan_time()
-/*====================*/
-{
-	/* Since MySQL seems to favor table scans too much over index
-	searches, we pretend that a sequential read takes the same time
-	as a random disk read, that is, we do not divide the following
-	by 10, which would be physically realistic. */
-
-	/* The locking below is disabled for performance reasons. Without
-	it we could end up returning uninitialized value to the caller,
-	which in the worst case could make some query plan go bogus or
-	issue a Valgrind warning. */
-#if 0
-	/* avoid potential lock order violation with dict_table_stats_lock()
-	below */
-	update_thd(ha_thd());
-	trx_search_latch_release_if_reserved(prebuilt->trx);
-#endif
-
-	ulint	stat_clustered_index_size;
-
-#if 0
-	dict_table_stats_lock(prebuilt->table, RW_S_LATCH);
-#endif
-
-	ut_a(prebuilt->table->stat_initialized);
-
-	stat_clustered_index_size = prebuilt->table->stat_clustered_index_size;
-
-#if 0
-	dict_table_stats_unlock(prebuilt->table, RW_S_LATCH);
-#endif
-
-	return((double) stat_clustered_index_size);
-}
-
-/******************************************************************//**
-Calculate the time it takes to read a set of ranges through an index
-This enables us to optimise reads for clustered indexes.
-@return	estimated time measured in disk seeks */
-UNIV_INTERN
-double
-ha_innobase::read_time(
-/*===================*/
-	uint	index,	/*!< in: key number */
-	uint	ranges,	/*!< in: how many ranges */
-	ha_rows rows)	/*!< in: estimated number of rows in the ranges */
-{
-	ha_rows total_rows;
-	double	time_for_scan;
-
-	if (index != table->s->primary_key) {
-		/* Not clustered */
-		return(handler::read_time(index, ranges, rows));
-	}
-
-	/* Assume that the read time is proportional to the scan time for all
-	rows + at most one seek per range. */
-
-	time_for_scan = scan_time();
-
-	if ((total_rows = estimate_rows_upper_bound()) < rows) {
-
-		return(time_for_scan);
-	}
-
-	return(ranges + (double) rows / (double) total_rows * time_for_scan);
-}
-
-/******************************************************************//**
-Return the size of the InnoDB memory buffer. */
-UNIV_INTERN
-longlong
-ha_innobase::get_memory_buffer_size() const
-/*=======================================*/
-{
-	return(innobase_buffer_pool_size);
-}
-
-/*********************************************************************//**
-Calculates the key number used inside MySQL for an Innobase index. We will
-first check the "index translation table" for a match of the index to get
-the index number. If there does not exist an "index translation table",
-or not able to find the index in the translation table, then we will fall back
-to the traditional way of looping through dict_index_t list to find a
-match. In this case, we have to take into account if we generated a
-default clustered index for the table
-@return the key number used inside MySQL */
-static
-int
-innobase_get_mysql_key_number_for_index(
-/*====================================*/
-	INNOBASE_SHARE*		share,	/*!< in: share structure for index
-					translation table. */
-	const TABLE*		table,	/*!< in: table in MySQL data
-					dictionary */
-	dict_table_t*		ib_table,/*!< in: table in Innodb data
-					dictionary */
-	const dict_index_t*	index)	/*!< in: index */
-{
-	const dict_index_t*	ind;
-	unsigned int		i;
-
- 	ut_a(index);
-
-	/* If index does not belong to the table object of share structure
-	(ib_table comes from the share structure) search the index->table
-	object instead */
-	if (index->table != ib_table) {
-		i = 0;
-		ind = dict_table_get_first_index(index->table);
-
-		while (index != ind) {
-			ind = dict_table_get_next_index(ind);
-			i++;
-		}
-
-		if (row_table_got_default_clust_index(index->table)) {
-			ut_a(i > 0);
-			i--;
-		}
-
-		return(i);
-	}
-
-	/* If index translation table exists, we will first check
-	the index through index translation table for a match. */
-	if (share->idx_trans_tbl.index_mapping) {
-		for (i = 0; i < share->idx_trans_tbl.index_count; i++) {
-			if (share->idx_trans_tbl.index_mapping[i] == index) {
-				return(i);
-			}
-		}
-
-		/* Print an error message if we cannot find the index
-		in the "index translation table". */
-		if (*index->name != TEMP_INDEX_PREFIX) {
-			sql_print_error("Cannot find index %s in InnoDB index "
-					"translation table.", index->name);
-		}
-	}
-
-	/* If we do not have an "index translation table", or not able
-	to find the index in the translation table, we'll directly find
-	matching index with information from mysql TABLE structure and
-	InnoDB dict_index_t list */
-	for (i = 0; i < table->s->keys; i++) {
-		ind = dict_table_get_index_on_name(
-			ib_table, table->key_info[i].name);
-
-		if (index == ind) {
-			return(i);
-		}
-	}
-
-	/* Loop through each index of the table and lock them */
-	for (ind = dict_table_get_first_index(ib_table);
-	     ind != NULL;
-	     ind = dict_table_get_next_index(ind)) {
-		if (index == ind) {
-			/* Temp index is internal to InnoDB, that is
-			not present in the MySQL index list, so no
-			need to print such mismatch warning. */
-			if (*(index->name) != TEMP_INDEX_PREFIX) {
-				sql_print_warning(
-					"Find index %s in InnoDB index list "
-					"but not its MySQL index number "
-					"It could be an InnoDB internal index.",
-					index->name);
-			}
-			return(-1);
-		}
-	}
-
-	ut_error;
-
-	return(-1);
-}
-
-/*********************************************************************//**
-Calculate Record Per Key value. Need to exclude the NULL value if
-innodb_stats_method is set to "nulls_ignored"
-@return estimated record per key value */
-static
-ha_rows
-innodb_rec_per_key(
-/*===============*/
-	dict_index_t*	index,		/*!< in: dict_index_t structure */
-	ulint		i,		/*!< in: the column we are
-					calculating rec per key */
-	ha_rows		records)	/*!< in: estimated total records */
-{
-	ha_rows		rec_per_key;
-	ib_uint64_t	n_diff;
-
-	ut_a(index->table->stat_initialized);
-
-	ut_ad(i < dict_index_get_n_unique(index));
-
-	n_diff = index->stat_n_diff_key_vals[i];
-
-	if (n_diff == 0) {
-
-		rec_per_key = records;
-	} else if (srv_innodb_stats_method == SRV_STATS_NULLS_IGNORED) {
-		ib_uint64_t	n_null;
-		ib_uint64_t	n_non_null;
-
-		n_non_null = index->stat_n_non_null_key_vals[i];
-
-		/* In theory, index->stat_n_non_null_key_vals[i]
-		should always be less than the number of records.
-		Since this is statistics value, the value could
-		have slight discrepancy. But we will make sure
-		the number of null values is not a negative number. */
-		if (records < n_non_null) {
-			n_null = 0;
-		} else {
-			n_null = records - n_non_null;
-		}
-
-		/* If the number of NULL values is the same as or
-		large than that of the distinct values, we could
-		consider that the table consists mostly of NULL value.
-		Set rec_per_key to 1. */
-		if (n_diff <= n_null) {
-			rec_per_key = 1;
-		} else {
-			/* Need to exclude rows with NULL values from
-			rec_per_key calculation */
-			rec_per_key = (ha_rows)
-				((records - n_null) / (n_diff - n_null));
-		}
-	} else {
-		DEBUG_SYNC_C("after_checking_for_0");
-		rec_per_key = (ha_rows) (records / n_diff);
-	}
-
-	return(rec_per_key);
-}
-
-/*********************************************************************//**
-Returns statistics information of the table to the MySQL interpreter,
-in various fields of the handle object.
-@return HA_ERR_* error code or 0 */
-UNIV_INTERN
-int
-ha_innobase::info_low(
-/*==================*/
-	uint	flag,	/*!< in: what information is requested */
-	bool	is_analyze)
-{
-	dict_table_t*	ib_table;
-	ha_rows		rec_per_key;
-	ib_uint64_t	n_rows;
-	os_file_stat_t	stat_info;
-
-	DBUG_ENTER("info");
-
-	/* If we are forcing recovery at a high level, we will suppress
-	statistics calculation on tables, because that may crash the
-	server if an index is badly corrupted. */
-
-	/* We do not know if MySQL can call this function before calling
-	external_lock(). To be safe, update the thd of the current table
-	handle. */
-
-	update_thd(ha_thd());
-
-	/* In case MySQL calls this in the middle of a SELECT query, release
-	possible adaptive hash latch to avoid deadlocks of threads */
-
-	prebuilt->trx->op_info = (char*)"returning various info to MySQL";
-
-	trx_search_latch_release_if_reserved(prebuilt->trx);
-
-	ib_table = prebuilt->table;
-	DBUG_ASSERT(ib_table->n_ref_count > 0);
-
-	if (flag & HA_STATUS_TIME) {
-		if (is_analyze || innobase_stats_on_metadata) {
-
-			dict_stats_upd_option_t	opt;
-			dberr_t			ret;
-
-			prebuilt->trx->op_info = "updating table statistics";
-
-			if (dict_stats_is_persistent_enabled(ib_table)) {
-
-				if (is_analyze) {
-					opt = DICT_STATS_RECALC_PERSISTENT;
-				} else {
-					/* This is e.g. 'SHOW INDEXES', fetch
-					the persistent stats from disk. */
-					opt = DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY;
-				}
-			} else {
-				opt = DICT_STATS_RECALC_TRANSIENT;
-			}
-
-			ut_ad(!mutex_own(&dict_sys->mutex));
-			ret = dict_stats_update(ib_table, opt);
-
-			if (ret != DB_SUCCESS) {
-				prebuilt->trx->op_info = "";
-				DBUG_RETURN(HA_ERR_GENERIC);
-			}
-
-			prebuilt->trx->op_info =
-				"returning various info to MySQL";
-		}
-
-	}
-
-	if (flag & HA_STATUS_VARIABLE) {
-
-		ulint	page_size;
-		ulint	stat_clustered_index_size;
-		ulint	stat_sum_of_other_index_sizes;
-
-		if (!(flag & HA_STATUS_NO_LOCK)) {
-			dict_table_stats_lock(ib_table, RW_S_LATCH);
-		}
-
-		ut_a(ib_table->stat_initialized);
-
-		n_rows = ib_table->stat_n_rows;
-
-		stat_clustered_index_size
-			= ib_table->stat_clustered_index_size;
-
-		stat_sum_of_other_index_sizes
-			= ib_table->stat_sum_of_other_index_sizes;
-
-		if (!(flag & HA_STATUS_NO_LOCK)) {
-			dict_table_stats_unlock(ib_table, RW_S_LATCH);
-		}
-
-		/*
-		The MySQL optimizer seems to assume in a left join that n_rows
-		is an accurate estimate if it is zero. Of course, it is not,
-		since we do not have any locks on the rows yet at this phase.
-		Since SHOW TABLE STATUS seems to call this function with the
-		HA_STATUS_TIME flag set, while the left join optimizer does not
-		set that flag, we add one to a zero value if the flag is not
-		set. That way SHOW TABLE STATUS will show the best estimate,
-		while the optimizer never sees the table empty. */
-
-		if (n_rows == 0 && !(flag & HA_STATUS_TIME)) {
-			n_rows++;
-		}
-
-		/* Fix bug#40386: Not flushing query cache after truncate.
-		n_rows can not be 0 unless the table is empty, set to 1
-		instead. The original problem of bug#29507 is actually
-		fixed in the server code. */
-		if (thd_sql_command(user_thd) == SQLCOM_TRUNCATE) {
-
-			n_rows = 1;
-
-			/* We need to reset the prebuilt value too, otherwise
-			checks for values greater than the last value written
-			to the table will fail and the autoinc counter will
-			not be updated. This will force write_row() into
-			attempting an update of the table's AUTOINC counter. */
-
-			prebuilt->autoinc_last_value = 0;
-		}
-
-		page_size = dict_table_zip_size(ib_table);
-		if (page_size == 0) {
-			page_size = UNIV_PAGE_SIZE;
-		}
-
-		stats.records = (ha_rows) n_rows;
-		stats.deleted = 0;
-		stats.data_file_length
-			= ((ulonglong) stat_clustered_index_size)
-			* page_size;
-		stats.index_file_length
-			= ((ulonglong) stat_sum_of_other_index_sizes)
-			* page_size;
-
-		/* Since fsp_get_available_space_in_free_extents() is
-		acquiring latches inside InnoDB, we do not call it if we
-		are asked by MySQL to avoid locking. Another reason to
-		avoid the call is that it uses quite a lot of CPU.
-		See Bug#38185. */
-		if (flag & HA_STATUS_NO_LOCK
-		    || !(flag & HA_STATUS_VARIABLE_EXTRA)) {
-			/* We do not update delete_length if no
-			locking is requested so the "old" value can
-			remain. delete_length is initialized to 0 in
-			the ha_statistics' constructor. Also we only
-			need delete_length to be set when
-			HA_STATUS_VARIABLE_EXTRA is set */
-		} else if (UNIV_UNLIKELY
-			   (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE)) {
-			/* Avoid accessing the tablespace if
-			innodb_crash_recovery is set to a high value. */
-			stats.delete_length = 0;
-		} else {
-			ullint	avail_space;
-
-			avail_space = fsp_get_available_space_in_free_extents(
-				ib_table->space);
-
-			if (avail_space == ULLINT_UNDEFINED) {
-				THD*	thd;
-
-				thd = ha_thd();
-
-				push_warning_printf(
-					thd,
-					Sql_condition::WARN_LEVEL_WARN,
-					ER_CANT_GET_STAT,
-					"InnoDB: Trying to get the free "
-					"space for table %s but its "
-					"tablespace has been discarded or "
-					"the .ibd file is missing. Setting "
-					"the free space to zero. "
-					"(errno: %M)",
-					ib_table->name, errno);
-
-				stats.delete_length = 0;
-			} else {
-				stats.delete_length = avail_space * 1024;
-			}
-		}
-
-		stats.check_time = 0;
-                stats.mrr_length_per_rec= ref_length +  8; // 8 = max(sizeof(void *));
-
-		if (stats.records == 0) {
-			stats.mean_rec_length = 0;
-		} else {
-			stats.mean_rec_length = (ulong)
-				(stats.data_file_length / stats.records);
-		}
-	}
-
-	if (flag & HA_STATUS_CONST) {
-		ulong	i;
-		char	path[FN_REFLEN];
-		/* Verify the number of index in InnoDB and MySQL
-		matches up. If prebuilt->clust_index_was_generated
-		holds, InnoDB defines GEN_CLUST_INDEX internally */
-		ulint	num_innodb_index = UT_LIST_GET_LEN(ib_table->indexes)
-			- prebuilt->clust_index_was_generated;
-		if (table->s->keys < num_innodb_index) {
-			/* If there are too many indexes defined
-			inside InnoDB, ignore those that are being
-			created, because MySQL will only consider
-			the fully built indexes here. */
-
-			for (const dict_index_t* index
-				     = UT_LIST_GET_FIRST(ib_table->indexes);
-			     index != NULL;
-			     index = UT_LIST_GET_NEXT(indexes, index)) {
-
-				/* First, online index creation is
-				completed inside InnoDB, and then
-				MySQL attempts to upgrade the
-				meta-data lock so that it can rebuild
-				the .frm file. If we get here in that
-				time frame, dict_index_is_online_ddl()
-				would not hold and the index would
-				still not be included in TABLE_SHARE. */
-				if (*index->name == TEMP_INDEX_PREFIX) {
-					num_innodb_index--;
-				}
-			}
-
-			if (table->s->keys < num_innodb_index
-			    && innobase_fts_check_doc_id_index(
-				    ib_table, NULL, NULL)
-			    == FTS_EXIST_DOC_ID_INDEX) {
-				num_innodb_index--;
-			}
-		}
-
-		if (table->s->keys != num_innodb_index) {
-			ib_table->dict_frm_mismatch = DICT_FRM_INCONSISTENT_KEYS;
-			ib_push_frm_error(user_thd, ib_table, table, num_innodb_index, true);
-		}
-
-		if (!(flag & HA_STATUS_NO_LOCK)) {
-			dict_table_stats_lock(ib_table, RW_S_LATCH);
-		}
-
-		ut_a(ib_table->stat_initialized);
-
-		for (i = 0; i < table->s->keys; i++) {
-			ulong	j;
-                        rec_per_key = 1;
-			/* We could get index quickly through internal
-			index mapping with the index translation table.
-			The identity of index (match up index name with
-			that of table->key_info[i]) is already verified in
-			innobase_get_index().  */
-			dict_index_t* index = innobase_get_index(i);
-
-			if (index == NULL) {
-				ib_table->dict_frm_mismatch = DICT_FRM_INCONSISTENT_KEYS;
-				ib_push_frm_error(user_thd, ib_table, table, num_innodb_index, true);
-				break;
-			}
-
-			for (j = 0; j < table->key_info[i].ext_key_parts; j++) {
-
-				if (table->key_info[i].flags & HA_FULLTEXT) {
-					/* The whole concept has no validity
-					for FTS indexes. */
-					table->key_info[i].rec_per_key[j] = 1;
-					continue;
-				}
-
-				if (j + 1 > index->n_uniq) {
-					sql_print_error(
-						"Index %s of %s has %lu columns"
-					        " unique inside InnoDB, but "
-						"MySQL is asking statistics for"
-					        " %lu columns. Have you mixed "
-						"up .frm files from different "
-					       	"installations? "
-						"See " REFMAN
-						"innodb-troubleshooting.html\n",
-						index->name,
-						ib_table->name,
-						(unsigned long)
-						index->n_uniq, j + 1);
-					break;
-				}
-
-				DBUG_EXECUTE_IF("ib_ha_innodb_stat_not_initialized",
-					index->table->stat_initialized = FALSE;);
-
-				if (!ib_table->stat_initialized ||
-					(index->table != ib_table ||
-						!index->table->stat_initialized)) {
-					fprintf(stderr,
-						"InnoDB: Warning: Index %s points to table %s"
-						" and ib_table %s statistics is initialized %d "
-						" but index table %s initialized %d "
-					        " mysql table is %s. Have you mixed "
-						"up .frm files from different "
-					       	"installations? "
-						"See " REFMAN
-						"innodb-troubleshooting.html\n",
-						index->name,
-						index->table->name,
-						ib_table->name,
-						ib_table->stat_initialized,
-						index->table->name,
-						index->table->stat_initialized,
-						table->s->table_name.str
-						);
-
-					/* This is better than
-					assert on below function */
-					dict_stats_init(index->table);
-				}
-
-				rec_per_key = innodb_rec_per_key(
-					index, j, stats.records);
-
-				/* Since MySQL seems to favor table scans
-				too much over index searches, we pretend
-				index selectivity is 2 times better than
-				our estimate: */
-
-				rec_per_key = rec_per_key / 2;
-
-				if (rec_per_key == 0) {
-					rec_per_key = 1;
-				}
-
-				table->key_info[i].rec_per_key[j] =
-				  rec_per_key >= ~(ulong) 0 ? ~(ulong) 0 :
-				  (ulong) rec_per_key;
-			}
-
-		}
-
-		if (!(flag & HA_STATUS_NO_LOCK)) {
-			dict_table_stats_unlock(ib_table, RW_S_LATCH);
-		}
-
-		my_snprintf(path, sizeof(path), "%s/%s%s",
-			    mysql_data_home,
-			    table->s->normalized_path.str,
-			    reg_ext);
-
-		unpack_filename(path,path);
-
-		/* Note that we do not know the access time of the table,
-		nor the CHECK TABLE time, nor the UPDATE or INSERT time. */
-
-		if (os_file_get_status(path, &stat_info, false) == DB_SUCCESS) {
-			stats.create_time = (ulong) stat_info.ctime;
-		}
-	}
-
-	if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
-
-		goto func_exit;
-	}
-
-	if (flag & HA_STATUS_ERRKEY) {
-		const dict_index_t*	err_index;
-
-		ut_a(prebuilt->trx);
-		ut_a(prebuilt->trx->magic_n == TRX_MAGIC_N);
-
-		err_index = trx_get_error_info(prebuilt->trx);
-
-		if (err_index) {
-			errkey = innobase_get_mysql_key_number_for_index(
-					share, table, ib_table, err_index);
-		} else {
-			errkey = (unsigned int) (
-				(prebuilt->trx->error_key_num
-				 == ULINT_UNDEFINED)
-					? ~0
-					: prebuilt->trx->error_key_num);
-		}
-	}
-
-	if ((flag & HA_STATUS_AUTO) && table->found_next_number_field) {
-		stats.auto_increment_value = innobase_peek_autoinc();
-	}
-
-func_exit:
-	prebuilt->trx->op_info = (char*)"";
-
-	DBUG_RETURN(0);
-}
-
-/*********************************************************************//**
-Returns statistics information of the table to the MySQL interpreter,
-in various fields of the handle object.
-@return HA_ERR_* error code or 0 */
-UNIV_INTERN
-int
-ha_innobase::info(
-/*==============*/
-	uint	flag)	/*!< in: what information is requested */
-{
-	return(this->info_low(flag, false /* not ANALYZE */));
-}
-
-/**********************************************************************//**
-Updates index cardinalities of the table, based on random dives into
-each index tree. This does NOT calculate exact statistics on the table.
-@return	HA_ADMIN_* error code or HA_ADMIN_OK */
-UNIV_INTERN
-int
-ha_innobase::analyze(
-/*=================*/
-	THD*		thd,		/*!< in: connection thread handle */
-	HA_CHECK_OPT*	check_opt)	/*!< in: currently ignored */
-{
-	int	ret;
-
-	/* Simply call this->info_low() with all the flags
-	and request recalculation of the statistics */
-	ret = this->info_low(
-		HA_STATUS_TIME | HA_STATUS_CONST | HA_STATUS_VARIABLE,
-		true /* this is ANALYZE */);
-
-	if (ret != 0) {
-		return(HA_ADMIN_FAILED);
-	}
-
-	return(HA_ADMIN_OK);
-}
-
 /**********************************************************************//**
 This is mapped to "ALTER TABLE tablename ENGINE=InnoDB", which rebuilds
 the table in MySQL. */
-UNIV_INTERN
+
 int
 ha_innobase::optimize(
 /*==================*/
 	THD*		thd,		/*!< in: connection thread handle */
 	HA_CHECK_OPT*	check_opt)	/*!< in: currently ignored */
 {
-	/*FTS-FIXME: Since MySQL doesn't support engine-specific commands,
+
+	TrxInInnoDB	trx_in_innodb(m_prebuilt->trx);
+
+	/* FTS-FIXME: Since MySQL doesn't support engine-specific commands,
 	we have to hijack some existing command in order to be able to test
 	the new admin commands added in InnoDB's FTS support. For now, we
 	use MySQL's OPTIMIZE command, normally mapped to ALTER TABLE in
@@ -13715,7 +16950,7 @@ ha_innobase::optimize(
 	if (srv_defragment) {
 		int err;
 
-		err = defragment_table(prebuilt->table->name, NULL, false);
+		err = defragment_table(m_prebuilt->table->name.m_name, NULL, false);
 
 		if (err == 0) {
 			return (HA_ADMIN_OK);
@@ -13723,9 +16958,9 @@ ha_innobase::optimize(
 			push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
 				err,
 				"InnoDB: Cannot defragment table %s: returned error code %d\n",
-				prebuilt->table->name, err);
+				m_prebuilt->table->name, err);
 
-			if(err == ER_SP_ALREADY_EXISTS) {
+			if (err == ER_SP_ALREADY_EXISTS) {
 				return (HA_ADMIN_OK);
 			} else {
 				return (HA_ADMIN_TRY_ALTER);
@@ -13734,10 +16969,10 @@ ha_innobase::optimize(
 	}
 
 	if (innodb_optimize_fulltext_only) {
-		if (prebuilt->table->fts && prebuilt->table->fts->cache
-		    && !dict_table_is_discarded(prebuilt->table)) {
-			fts_sync_table(prebuilt->table, false, true, false);
-			fts_optimize_table(prebuilt->table);
+		if (m_prebuilt->table->fts && m_prebuilt->table->fts->cache
+		    && !dict_table_is_discarded(m_prebuilt->table)) {
+			fts_sync_table(m_prebuilt->table, false, true, false);
+			fts_optimize_table(m_prebuilt->table);
 		}
 		return(HA_ADMIN_OK);
 	} else {
@@ -13750,8 +16985,8 @@ ha_innobase::optimize(
 Tries to check that an InnoDB table is not corrupted. If corruption is
 noticed, prints to stderr information about it. In case of corruption
 may also assert a failure and crash the server.
-@return	HA_ADMIN_CORRUPT or HA_ADMIN_OK */
-UNIV_INTERN
+@return HA_ADMIN_CORRUPT or HA_ADMIN_OK */
+
 int
 ha_innobase::check(
 /*===============*/
@@ -13763,22 +16998,23 @@ ha_innobase::check(
 	ulint		n_rows_in_table	= ULINT_UNDEFINED;
 	bool		is_ok		= true;
 	ulint		old_isolation_level;
-	ibool		table_corrupted;
+	dberr_t		ret;
 
 	DBUG_ENTER("ha_innobase::check");
 	DBUG_ASSERT(thd == ha_thd());
-	ut_a(prebuilt->trx);
-	ut_a(prebuilt->trx->magic_n == TRX_MAGIC_N);
-	ut_a(prebuilt->trx == thd_to_trx(thd));
+	ut_a(m_prebuilt->trx->magic_n == TRX_MAGIC_N);
+	ut_a(m_prebuilt->trx == thd_to_trx(thd));
 
-	if (prebuilt->mysql_template == NULL) {
+	TrxInInnoDB	trx_in_innodb(m_prebuilt->trx);
+
+	if (m_prebuilt->mysql_template == NULL) {
 		/* Build the template; we will use a dummy template
 		in index scans done in checking */
 
 		build_template(true);
 	}
 
-	if (dict_table_is_discarded(prebuilt->table)) {
+	if (dict_table_is_discarded(m_prebuilt->table)) {
 
 		ib_senderrf(
 			thd,
@@ -13788,7 +17024,7 @@ ha_innobase::check(
 
 		DBUG_RETURN(HA_ADMIN_CORRUPT);
 
-	} else if (prebuilt->table->ibd_file_missing) {
+	} else if (m_prebuilt->table->ibd_file_missing) {
 
 		ib_senderrf(
 			thd, IB_LOG_LEVEL_ERROR,
@@ -13798,87 +17034,71 @@ ha_innobase::check(
 		DBUG_RETURN(HA_ADMIN_CORRUPT);
 	}
 
-	if (prebuilt->table->corrupted) {
-		char	index_name[MAX_FULL_NAME_LEN + 1];
+	m_prebuilt->trx->op_info = "checking table";
+
+	if (m_prebuilt->table->corrupted) {
 		/* If some previous operation has marked the table as
 		corrupted in memory, and has not propagated such to
 		clustered index, we will do so here */
-		index = dict_table_get_first_index(prebuilt->table);
+		index = dict_table_get_first_index(m_prebuilt->table);
 
 		if (!dict_index_is_corrupted(index)) {
-			row_mysql_lock_data_dictionary(prebuilt->trx);
-			dict_set_corrupted(index, prebuilt->trx, "CHECK TABLE");
-			row_mysql_unlock_data_dictionary(prebuilt->trx);
+			dict_set_corrupted(
+				index, m_prebuilt->trx, "CHECK TABLE");
 		}
 
-		innobase_format_name(index_name, sizeof index_name,
-			index->name, TRUE);
-
-		push_warning_printf(thd,
+		push_warning_printf(m_user_thd,
 				    Sql_condition::WARN_LEVEL_WARN,
 				    HA_ERR_INDEX_CORRUPT,
 				    "InnoDB: Index %s is marked as"
-				    " corrupted", index_name);
+				    " corrupted",
+				    index->name());
 
 		/* Now that the table is already marked as corrupted,
 		there is no need to check any index of this table */
-		prebuilt->trx->op_info = "";
+		m_prebuilt->trx->op_info = "";
 
 		DBUG_RETURN(HA_ADMIN_CORRUPT);
 	}
 
-	prebuilt->trx->op_info = "checking table";
-
-	old_isolation_level = prebuilt->trx->isolation_level;
+	old_isolation_level = m_prebuilt->trx->isolation_level;
 
 	/* We must run the index record counts at an isolation level
 	>= READ COMMITTED, because a dirty read can see a wrong number
 	of records in some index; to play safe, we use always
 	REPEATABLE READ here */
+	m_prebuilt->trx->isolation_level = TRX_ISO_REPEATABLE_READ;
 
-	prebuilt->trx->isolation_level = TRX_ISO_REPEATABLE_READ;
+	ut_ad(!m_prebuilt->table->corrupted);
 
-	/* Check whether the table is already marked as corrupted
-	before running the check table */
-	table_corrupted = prebuilt->table->corrupted;
-
-	/* Reset table->corrupted bit so that check table can proceed to
-	do additional check */
-	prebuilt->table->corrupted = FALSE;
-
-	for (index = dict_table_get_first_index(prebuilt->table);
+	for (index = dict_table_get_first_index(m_prebuilt->table);
 	     index != NULL;
 	     index = dict_table_get_next_index(index)) {
-		char	index_name[MAX_FULL_NAME_LEN + 1];
-
 		/* If this is an index being created or dropped, skip */
-		if (*index->name == TEMP_INDEX_PREFIX) {
+		if (!index->is_committed()) {
 			continue;
 		}
 
-		if (!(check_opt->flags & T_QUICK)) {
+		if (!(check_opt->flags & T_QUICK)
+		    && !dict_index_is_corrupted(index)) {
 			/* Enlarge the fatal lock wait timeout during
 			CHECK TABLE. */
-			os_increment_counter_by_amount(
-				server_mutex,
-                                srv_fatal_semaphore_wait_threshold,
+			os_atomic_increment_ulint(
+				&srv_fatal_semaphore_wait_threshold,
 				SRV_SEMAPHORE_WAIT_EXTENSION);
-			dberr_t err = btr_validate_index(index, prebuilt->trx);
+
+			dberr_t err = btr_validate_index(
+					index, m_prebuilt->trx, false);
 
 			/* Restore the fatal lock wait timeout after
 			CHECK TABLE. */
-			os_decrement_counter_by_amount(
-				server_mutex,
-                                srv_fatal_semaphore_wait_threshold,
+			os_atomic_decrement_ulint(
+				&srv_fatal_semaphore_wait_threshold,
 				SRV_SEMAPHORE_WAIT_EXTENSION);
 
 			if (err != DB_SUCCESS) {
 				is_ok = false;
 
-				innobase_format_name(
-					index_name, sizeof index_name,
-					index->name, TRUE);
-
 				if (err == DB_DECRYPTION_FAILED) {
 					push_warning_printf(
 						thd,
@@ -13895,7 +17115,7 @@ ha_innobase::check(
 						ER_NOT_KEYFILE,
 						"InnoDB: The B-tree of"
 						" index %s is corrupted.",
-						index_name);
+						index->name());
 				}
 
 				continue;
@@ -13905,81 +17125,85 @@ ha_innobase::check(
 		/* Instead of invoking change_active_index(), set up
 		a dummy template for non-locking reads, disabling
 		access to the clustered index. */
-		prebuilt->index = index;
+		m_prebuilt->index = index;
 
-		prebuilt->index_usable = row_merge_is_index_usable(
-			prebuilt->trx, prebuilt->index);
+		m_prebuilt->index_usable = row_merge_is_index_usable(
+			m_prebuilt->trx, m_prebuilt->index);
 
 		DBUG_EXECUTE_IF(
 			"dict_set_index_corrupted",
 			if (!dict_index_is_clust(index)) {
-				prebuilt->index_usable = FALSE;
-				row_mysql_lock_data_dictionary(prebuilt->trx);
-                                dict_set_corrupted(index, prebuilt->trx, "dict_set_index_corrupted");;
-				row_mysql_unlock_data_dictionary(prebuilt->trx);
+				m_prebuilt->index_usable = FALSE;
+				// row_mysql_lock_data_dictionary(m_prebuilt->trx);
+				dict_set_corrupted(index, m_prebuilt->trx, "dict_set_index_corrupted");;
+				// row_mysql_unlock_data_dictionary(m_prebuilt->trx);
 			});
 
-		if (UNIV_UNLIKELY(!prebuilt->index_usable)) {
-			innobase_format_name(
-				index_name, sizeof index_name,
-				prebuilt->index->name, TRUE);
-
-			if (dict_index_is_corrupted(prebuilt->index)) {
+		if (UNIV_UNLIKELY(!m_prebuilt->index_usable)) {
+			if (dict_index_is_corrupted(m_prebuilt->index)) {
 				push_warning_printf(
-					user_thd,
+					m_user_thd,
 					Sql_condition::WARN_LEVEL_WARN,
 					HA_ERR_INDEX_CORRUPT,
 					"InnoDB: Index %s is marked as"
 					" corrupted",
-					index_name);
+					index->name());
 				is_ok = false;
 			} else {
 				push_warning_printf(
-					thd,
+					m_user_thd,
 					Sql_condition::WARN_LEVEL_WARN,
 					HA_ERR_TABLE_DEF_CHANGED,
 					"InnoDB: Insufficient history for"
 					" index %s",
-					index_name);
+					index->name());
 			}
 			continue;
 		}
 
-		prebuilt->sql_stat_start = TRUE;
-		prebuilt->template_type = ROW_MYSQL_DUMMY_TEMPLATE;
-		prebuilt->n_template = 0;
-		prebuilt->need_to_access_clustered = FALSE;
+		m_prebuilt->sql_stat_start = TRUE;
+		m_prebuilt->template_type = ROW_MYSQL_DUMMY_TEMPLATE;
+		m_prebuilt->n_template = 0;
+		m_prebuilt->need_to_access_clustered = FALSE;
 
-		dtuple_set_n_fields(prebuilt->search_tuple, 0);
+		dtuple_set_n_fields(m_prebuilt->search_tuple, 0);
 
-		prebuilt->select_lock_type = LOCK_NONE;
+		m_prebuilt->select_lock_type = LOCK_NONE;
 
-		if (!row_check_index_for_mysql(prebuilt, index, &n_rows)) {
-			innobase_format_name(
-				index_name, sizeof index_name,
-				index->name, TRUE);
+		/* Scan this index. */
+		if (dict_index_is_spatial(index)) {
+			ret = row_count_rtree_recs(m_prebuilt, &n_rows);
+		} else {
+			ret = row_scan_index_for_mysql(
+				m_prebuilt, index, true, &n_rows);
+		}
 
+		DBUG_EXECUTE_IF(
+			"dict_set_index_corrupted",
+			if (!dict_index_is_clust(index)) {
+				ret = DB_CORRUPTION;
+			});
+
+		if (ret == DB_INTERRUPTED || thd_killed(m_user_thd)) {
+			/* Do not report error since this could happen
+			during shutdown */
+			break;
+		}
+		if (ret != DB_SUCCESS) {
+			/* Assume some kind of corruption. */
 			push_warning_printf(
 				thd, Sql_condition::WARN_LEVEL_WARN,
 				ER_NOT_KEYFILE,
 				"InnoDB: The B-tree of"
 				" index %s is corrupted.",
-				index_name);
+				index->name());
 			is_ok = false;
 			dict_set_corrupted(
-				index, prebuilt->trx, "CHECK TABLE-check index");
+				index, m_prebuilt->trx, "CHECK TABLE-check index");
 		}
 
-		if (thd_kill_level(user_thd)) {
-			break;
-		}
 
-#if 0
-		fprintf(stderr, "%lu entries in index %s\n", n_rows,
-			index->name);
-#endif
-
-		if (index == dict_table_get_first_index(prebuilt->table)) {
+		if (index == dict_table_get_first_index(m_prebuilt->table)) {
 			n_rows_in_table = n_rows;
 		} else if (!(index->type & DICT_FTS)
 			   && (n_rows != n_rows_in_table)) {
@@ -13988,36 +17212,22 @@ ha_innobase::check(
 				ER_NOT_KEYFILE,
 				"InnoDB: Index '%-.200s' contains %lu"
 				" entries, should be %lu.",
-				index->name,
+				index->name(),
 				(ulong) n_rows,
 				(ulong) n_rows_in_table);
 			is_ok = false;
 			dict_set_corrupted(
-				index, prebuilt->trx,
+				index, m_prebuilt->trx,
 				"CHECK TABLE; Wrong count");
 		}
 	}
 
-	if (table_corrupted) {
-		/* If some previous operation has marked the table as
-		corrupted in memory, and has not propagated such to
-		clustered index, we will do so here */
-		index = dict_table_get_first_index(prebuilt->table);
-
-		if (!dict_index_is_corrupted(index)) {
-			dict_set_corrupted(
-				index, prebuilt->trx, "CHECK TABLE");
-		}
-		prebuilt->table->corrupted = TRUE;
-	}
-
 	/* Restore the original isolation level */
-	prebuilt->trx->isolation_level = old_isolation_level;
-
+	m_prebuilt->trx->isolation_level = old_isolation_level;
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
 	/* We validate the whole adaptive hash index for all tables
 	at every CHECK TABLE only when QUICK flag is not present. */
 
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
 	if (!(check_opt->flags & T_QUICK) && !btr_search_validate()) {
 		push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
 			     ER_NOT_KEYFILE,
@@ -14025,11 +17235,7 @@ ha_innobase::check(
 		is_ok = false;
 	}
 #endif /* defined UNIV_AHI_DEBUG || defined UNIV_DEBUG */
-
-	prebuilt->trx->op_info = "";
-	if (thd_kill_level(user_thd)) {
-		my_error(ER_QUERY_INTERRUPTED, MYF(0));
-	}
+	m_prebuilt->trx->op_info = "";
 
 	DBUG_RETURN(is_ok ? HA_ADMIN_OK : HA_ADMIN_CORRUPT);
 }
@@ -14060,23 +17266,23 @@ ha_innobase::update_table_comment(
 
 	update_thd(ha_thd());
 
-	prebuilt->trx->op_info = (char*)"returning table comment";
+	m_prebuilt->trx->op_info = (char*)"returning table comment";
 
 	/* In case MySQL calls this in the middle of a SELECT query, release
 	possible adaptive hash latch to avoid deadlocks of threads */
 
-	trx_search_latch_release_if_reserved(prebuilt->trx);
+	trx_search_latch_release_if_reserved(m_prebuilt->trx);
 
 #define SSTR( x ) reinterpret_cast< std::ostringstream & >(		\
         ( std::ostringstream() << std::dec << x ) ).str()
 
 	fk_str.append("InnoDB free: ");
 	fk_str.append(SSTR(fsp_get_available_space_in_free_extents(
-				prebuilt->table->space)));
+				m_prebuilt->table->space)));
 
 	fk_str.append(dict_print_info_on_foreign_keys(
-			FALSE, prebuilt->trx,
-			prebuilt->table));
+			FALSE, m_prebuilt->trx,
+			m_prebuilt->table));
 
 	flen = fk_str.length();
 
@@ -14085,11 +17291,8 @@ ha_innobase::update_table_comment(
 	} else if (length + flen + 3 > 64000) {
 		flen = 64000 - 3 - length;
 	}
-
 	/* allocate buffer for the full string */
-
 	str = (char*) my_malloc(length + flen + 3, MYF(0));
-
 	if (str) {
 		char* pos	= str + length;
 		if (length) {
@@ -14097,12 +17300,11 @@ ha_innobase::update_table_comment(
 			*pos++ = ';';
 			*pos++ = ' ';
 		}
-
 		memcpy(pos, fk_str.c_str(), flen);
 		pos[flen] = 0;
 	}
 
-	prebuilt->trx->op_info = (char*)"";
+	m_prebuilt->trx->op_info = (char*)"";
 
 	return(str ? str : (char*) comment);
 }
@@ -14112,14 +17314,12 @@ Gets the foreign key create info for a table stored in InnoDB.
 @return own: character string in the form which can be inserted to the
 CREATE TABLE statement, MUST be freed with
 ha_innobase::free_foreign_key_create_info */
-UNIV_INTERN
+
 char*
 ha_innobase::get_foreign_key_create_info(void)
 /*==========================================*/
 {
-	char*	fk_str	= 0;
-
-	ut_a(prebuilt != NULL);
+	ut_a(m_prebuilt != NULL);
 
 	/* We do not know if MySQL can call this function before calling
 	external_lock(). To be safe, update the thd of the current table
@@ -14127,23 +17327,32 @@ ha_innobase::get_foreign_key_create_info(void)
 
 	update_thd(ha_thd());
 
-	prebuilt->trx->op_info = (char*)"getting info on foreign keys";
+	m_prebuilt->trx->op_info = (char*)"getting info on foreign keys";
 
 	/* In case MySQL calls this in the middle of a SELECT query,
 	release possible adaptive hash latch to avoid
 	deadlocks of threads */
 
-	trx_search_latch_release_if_reserved(prebuilt->trx);
+	trx_search_latch_release_if_reserved(m_prebuilt->trx);
 
-	/* Output the data to a temporary file */
+
+
+	/* Output the data to a temporary string */
 	std::string str = dict_print_info_on_foreign_keys(
-		TRUE, prebuilt->trx,
-			prebuilt->table);
+		TRUE, m_prebuilt->trx,
+		m_prebuilt->table);
 
-	prebuilt->trx->op_info = (char*)"";
+	m_prebuilt->trx->op_info = (char*)"";
 
 	/* Allocate buffer for the string */
-	fk_str = (char*) my_malloc(str.length() + 1, MYF(0));
+	char* fk_str = (char*) my_malloc(str.length() + 1, MYF(0));
+
+	/* JAN: TODO: MySQL 5.7
+	fk_str = reinterpret_cast<char*>(
+			my_malloc(PSI_INSTRUMENT_ME, str.length() + 1, MYF(0)));
+	*/
+
+
 
 	if (fk_str) {
 		memcpy(fk_str, str.c_str(), str.length());
@@ -14161,13 +17370,13 @@ static
 FOREIGN_KEY_INFO*
 get_foreign_key_info(
 /*=================*/
-	THD*			thd,		/*!< in: user thread handle */
-	dict_foreign_t*		foreign)	/*!< in: foreign key constraint */
+	THD*			thd,	/*!< in: user thread handle */
+	dict_foreign_t*		foreign)/*!< in: foreign key constraint */
 {
 	FOREIGN_KEY_INFO	f_key_info;
 	FOREIGN_KEY_INFO*	pf_key_info;
 	uint			i = 0;
-	ulint			len;
+	size_t			len;
 	char			tmp_buff[NAME_LEN+1];
 	char			name_buff[NAME_LEN+1];
 	const char*		ptr;
@@ -14175,8 +17384,8 @@ get_foreign_key_info(
 	LEX_STRING*		name = NULL;
 
 	ptr = dict_remove_db_name(foreign->id);
-	f_key_info.foreign_id = thd_make_lex_string(thd, 0, ptr,
-						    (uint) strlen(ptr), 1);
+	f_key_info.foreign_id = thd_make_lex_string(
+		thd, 0, ptr, (uint) strlen(ptr), 1);
 
 	/* Name format: database name, '/', table name, '\0' */
 
@@ -14259,12 +17468,14 @@ get_foreign_key_info(
 		thd, f_key_info.update_method, ptr,
 		static_cast<unsigned int>(len), 1);
 
-	if (foreign->referenced_index && foreign->referenced_index->name) {
-		referenced_key_name = thd_make_lex_string(thd,
-					f_key_info.referenced_key_name,
-					foreign->referenced_index->name,
-					 (uint) strlen(foreign->referenced_index->name),
-					1);
+	if (foreign->referenced_index
+	    && foreign->referenced_index->name != NULL) {
+		referenced_key_name = thd_make_lex_string(
+			thd,
+			f_key_info.referenced_key_name,
+			foreign->referenced_index->name,
+			(uint) strlen(foreign->referenced_index->name),
+			1);
 	} else {
 		referenced_key_name = NULL;
 	}
@@ -14280,41 +17491,39 @@ get_foreign_key_info(
 /*******************************************************************//**
 Gets the list of foreign keys in this table.
 @return always 0, that is, always succeeds */
-UNIV_INTERN
+
 int
 ha_innobase::get_foreign_key_list(
 /*==============================*/
 	THD*			thd,		/*!< in: user thread handle */
 	List<FOREIGN_KEY_INFO>*	f_key_list)	/*!< out: foreign key list */
 {
-	FOREIGN_KEY_INFO*	pf_key_info;
-	dict_foreign_t*		foreign;
-
-	ut_a(prebuilt != NULL);
 	update_thd(ha_thd());
 
-	prebuilt->trx->op_info = "getting list of foreign keys";
+	TrxInInnoDB	trx_in_innodb(m_prebuilt->trx);
 
-	trx_search_latch_release_if_reserved(prebuilt->trx);
+	m_prebuilt->trx->op_info = "getting list of foreign keys";
 
-	mutex_enter(&(dict_sys->mutex));
+	mutex_enter(&dict_sys->mutex);
 
 	for (dict_foreign_set::iterator it
-		= prebuilt->table->foreign_set.begin();
-	     it != prebuilt->table->foreign_set.end();
+		= m_prebuilt->table->foreign_set.begin();
+	     it != m_prebuilt->table->foreign_set.end();
 	     ++it) {
 
-		foreign = *it;
+		FOREIGN_KEY_INFO*	pf_key_info;
+		dict_foreign_t*		foreign = *it;
 
 		pf_key_info = get_foreign_key_info(thd, foreign);
-		if (pf_key_info) {
+
+		if (pf_key_info != NULL) {
 			f_key_list->push_back(pf_key_info);
 		}
 	}
 
-	mutex_exit(&(dict_sys->mutex));
+	mutex_exit(&dict_sys->mutex);
 
-	prebuilt->trx->op_info = "";
+	m_prebuilt->trx->op_info = "";
 
 	return(0);
 }
@@ -14322,41 +17531,206 @@ ha_innobase::get_foreign_key_list(
 /*******************************************************************//**
 Gets the set of foreign keys where this table is the referenced table.
 @return always 0, that is, always succeeds */
-UNIV_INTERN
+
 int
 ha_innobase::get_parent_foreign_key_list(
 /*=====================================*/
 	THD*			thd,		/*!< in: user thread handle */
 	List<FOREIGN_KEY_INFO>*	f_key_list)	/*!< out: foreign key list */
 {
-	FOREIGN_KEY_INFO*	pf_key_info;
-	dict_foreign_t*		foreign;
-
-	ut_a(prebuilt != NULL);
 	update_thd(ha_thd());
 
-	prebuilt->trx->op_info = "getting list of referencing foreign keys";
+	TrxInInnoDB	trx_in_innodb(m_prebuilt->trx);
 
-	trx_search_latch_release_if_reserved(prebuilt->trx);
+	m_prebuilt->trx->op_info = "getting list of referencing foreign keys";
 
-	mutex_enter(&(dict_sys->mutex));
+	mutex_enter(&dict_sys->mutex);
 
 	for (dict_foreign_set::iterator it
-		= prebuilt->table->referenced_set.begin();
-	     it != prebuilt->table->referenced_set.end();
+		= m_prebuilt->table->referenced_set.begin();
+	     it != m_prebuilt->table->referenced_set.end();
 	     ++it) {
 
-		foreign = *it;
+		FOREIGN_KEY_INFO*	pf_key_info;
+		dict_foreign_t*		foreign = *it;
 
 		pf_key_info = get_foreign_key_info(thd, foreign);
-		if (pf_key_info) {
+
+		if (pf_key_info != NULL) {
 			f_key_list->push_back(pf_key_info);
 		}
 	}
 
-	mutex_exit(&(dict_sys->mutex));
+	mutex_exit(&dict_sys->mutex);
 
-	prebuilt->trx->op_info = "";
+	m_prebuilt->trx->op_info = "";
+
+	return(0);
+}
+
+/** Table list item structure is used to store only the table
+and name. It is used by get_cascade_foreign_key_table_list to store
+the intermediate result for fetching the table set. */
+struct table_list_item {
+	/** InnoDB table object */
+	const dict_table_t*	table;
+	/** Table name */
+	const char*		name;
+};
+
+/** Structure to compare two st_tablename objects using their
+db and tablename. It is used in the ordering of cascade_fk_set.
+It returns true if the first argument precedes the second argument
+and false otherwise. */
+struct tablename_compare {
+
+	bool operator()(const st_handler_tablename lhs,
+			const st_handler_tablename rhs) const
+	{
+		int cmp = strcmp(lhs.db, rhs.db);
+		if (cmp == 0) {
+			cmp = strcmp(lhs.tablename, rhs.tablename);
+		}
+
+		return(cmp < 0);
+	}
+};
+
+/** Get the table name and database name for the given table.
+@param[in,out]	thd		user thread handle
+@param[out]	f_key_info	pointer to table_name_info object
+@param[in]	foreign		foreign key constraint. */
+static
+void
+get_table_name_info(
+	THD*			thd,
+	st_handler_tablename*	f_key_info,
+	const dict_foreign_t*	foreign)
+{
+#define FILENAME_CHARSET_MBMAXLEN 5
+	char	tmp_buff[NAME_CHAR_LEN * FILENAME_CHARSET_MBMAXLEN + 1];
+	char	name_buff[NAME_CHAR_LEN * FILENAME_CHARSET_MBMAXLEN + 1];
+	const char*	ptr;
+
+	size_t  len = dict_get_db_name_len(
+		foreign->referenced_table_name_lookup);
+	ut_memcpy(tmp_buff, foreign->referenced_table_name_lookup, len);
+	tmp_buff[len] = 0;
+
+	ut_ad(len < sizeof(tmp_buff));
+
+	len = filename_to_tablename(tmp_buff, name_buff, sizeof(name_buff));
+	f_key_info->db = thd_strmake(thd, name_buff, len);
+
+	ptr = dict_remove_db_name(foreign->referenced_table_name_lookup);
+	len = filename_to_tablename(ptr, name_buff, sizeof(name_buff));
+	f_key_info->tablename = thd_strmake(thd, name_buff, len);
+}
+
+/** Get the list of tables ordered by the dependency on the other tables using
+the 'CASCADE' foreign key constraint.
+@param[in,out]	thd		user thread handle
+@param[out]	fk_table_list	set of tables name info for the
+				dependent table
+@retval 0 for success. */
+int
+ha_innobase::get_cascade_foreign_key_table_list(
+	THD*				thd,
+	List<st_handler_tablename>*	fk_table_list)
+{
+	TrxInInnoDB	trx_in_innodb(m_prebuilt->trx);
+
+	m_prebuilt->trx->op_info = "getting cascading foreign keys";
+
+	std::list<table_list_item, ut_allocator<table_list_item> > table_list;
+
+	typedef std::set<st_handler_tablename, tablename_compare,
+			 ut_allocator<st_handler_tablename> >	cascade_fk_set;
+
+	cascade_fk_set	fk_set;
+
+	mutex_enter(&dict_sys->mutex);
+
+	/* Initialize the table_list with prebuilt->table name. */
+	struct table_list_item	item = {m_prebuilt->table,
+					m_prebuilt->table->name.m_name};
+
+	table_list.push_back(item);
+
+	/* Get the parent table, grand parent table info from the
+	table list by depth-first traversal. */
+	do {
+		const dict_table_t*			parent_table;
+		dict_table_t*				parent = NULL;
+		std::pair<cascade_fk_set::iterator,bool>	ret;
+
+		item = table_list.back();
+		table_list.pop_back();
+		parent_table = item.table;
+
+		if (parent_table == NULL) {
+
+			ut_ad(item.name != NULL);
+
+			parent_table = parent = dict_table_open_on_name(
+					item.name, TRUE, FALSE,
+					DICT_ERR_IGNORE_NONE);
+
+			if (parent_table == NULL) {
+				/* foreign_key_checks is or was probably
+				disabled; ignore the constraint */
+				continue;
+			}
+		}
+
+		for (dict_foreign_set::const_iterator it =
+		     parent_table->foreign_set.begin();
+		     it != parent_table->foreign_set.end(); ++it) {
+
+			const dict_foreign_t*	foreign = *it;
+			st_handler_tablename	f1;
+
+			/* Skip the table if there is no
+			cascading operation. */
+			if (0 == (foreign->type
+				  & ~(DICT_FOREIGN_ON_DELETE_NO_ACTION
+				      | DICT_FOREIGN_ON_UPDATE_NO_ACTION))) {
+				continue;
+			}
+
+			if (foreign->referenced_table_name_lookup != NULL) {
+				get_table_name_info(thd, &f1, foreign);
+				ret = fk_set.insert(f1);
+
+				/* Ignore the table if it is already
+				in the set. */
+				if (!ret.second) {
+					continue;
+				}
+
+				struct table_list_item	item1 = {
+					foreign->referenced_table,
+					foreign->referenced_table_name_lookup};
+
+				table_list.push_back(item1);
+
+				st_handler_tablename*	fk_table =
+					(st_handler_tablename*) thd_memdup(
+						thd, &f1, sizeof(*fk_table));
+
+				fk_table_list->push_back(fk_table);
+			}
+		}
+
+		if (parent != NULL) {
+			dict_table_close(parent, true, false);
+		}
+
+	} while(!table_list.empty());
+
+	mutex_exit(&dict_sys->mutex);
+
+	m_prebuilt->trx->op_info = "";
 
 	return(0);
 }
@@ -14365,26 +17739,26 @@ ha_innobase::get_parent_foreign_key_list(
 Checks if ALTER TABLE may change the storage engine of the table.
 Changing storage engines is not allowed for tables for which there
 are foreign key constraints (parent or child tables).
-@return	TRUE if can switch engines */
-UNIV_INTERN
+@return TRUE if can switch engines */
+
 bool
 ha_innobase::can_switch_engines(void)
 /*=================================*/
 {
-	bool	can_switch;
-
 	DBUG_ENTER("ha_innobase::can_switch_engines");
+
 	update_thd();
 
-	prebuilt->trx->op_info =
+	m_prebuilt->trx->op_info =
 			"determining if there are foreign key constraints";
-	row_mysql_freeze_data_dictionary(prebuilt->trx);
 
-	can_switch = prebuilt->table->referenced_set.empty()
-		&& prebuilt->table->foreign_set.empty();
+	row_mysql_freeze_data_dictionary(m_prebuilt->trx);
 
-	row_mysql_unfreeze_data_dictionary(prebuilt->trx);
-	prebuilt->trx->op_info = "";
+	bool	can_switch = m_prebuilt->table->referenced_set.empty()
+		&& m_prebuilt->table->foreign_set.empty();
+
+	row_mysql_unfreeze_data_dictionary(m_prebuilt->trx);
+	m_prebuilt->trx->op_info = "";
 
 	DBUG_RETURN(can_switch);
 }
@@ -14394,13 +17768,13 @@ Checks if a table is referenced by a foreign key. The MySQL manual states that
 a REPLACE is either equivalent to an INSERT, or DELETE(s) + INSERT. Only a
 delete is then allowed internally to resolve a duplicate key conflict in
 REPLACE, not an update.
-@return	> 0 if referenced by a FOREIGN KEY */
-UNIV_INTERN
+@return > 0 if referenced by a FOREIGN KEY */
+
 uint
 ha_innobase::referenced_by_foreign_key(void)
 /*========================================*/
 {
-	if (dict_table_is_referenced_by_foreign_key(prebuilt->table)) {
+	if (dict_table_is_referenced_by_foreign_key(m_prebuilt->table)) {
 
 		return(1);
 	}
@@ -14411,21 +17785,21 @@ ha_innobase::referenced_by_foreign_key(void)
 /*******************************************************************//**
 Frees the foreign key create info for a table stored in InnoDB, if it is
 non-NULL. */
-UNIV_INTERN
+
 void
 ha_innobase::free_foreign_key_create_info(
 /*======================================*/
 	char*	str)	/*!< in, own: create info string to free */
 {
-	if (str) {
+	if (str != NULL) {
 		my_free(str);
 	}
 }
 
 /*******************************************************************//**
 Tells something additional to the handler about how to do things.
-@return	0 or error number */
-UNIV_INTERN
+@return 0 or error number */
+
 int
 ha_innobase::extra(
 /*===============*/
@@ -14435,13 +17809,13 @@ ha_innobase::extra(
 	check_trx_exists(ha_thd());
 
 	/* Warning: since it is not sure that MySQL calls external_lock
-	before calling this function, the trx field in prebuilt can be
+	before calling this function, the trx field in m_prebuilt can be
 	obsolete! */
 
 	switch (operation) {
 	case HA_EXTRA_FLUSH:
-		if (prebuilt->blob_heap) {
-			row_mysql_prebuilt_free_blob_heap(prebuilt);
+		if (m_prebuilt->blob_heap) {
+			row_mysql_prebuilt_free_blob_heap(m_prebuilt);
 		}
 		break;
 	case HA_EXTRA_RESET_STATE:
@@ -14449,16 +17823,16 @@ ha_innobase::extra(
 		thd_to_trx(ha_thd())->duplicates = 0;
 		break;
 	case HA_EXTRA_NO_KEYREAD:
-		prebuilt->read_just_key = 0;
+		m_prebuilt->read_just_key = 0;
 		break;
 	case HA_EXTRA_KEYREAD:
-		prebuilt->read_just_key = 1;
+		m_prebuilt->read_just_key = 1;
 		break;
 	case HA_EXTRA_KEYREAD_PRESERVE_FIELDS:
-		prebuilt->keep_other_fields_on_keyread = 1;
+		m_prebuilt->keep_other_fields_on_keyread = 1;
 		break;
 
-		/* IMPORTANT: prebuilt->trx can be obsolete in
+		/* IMPORTANT: m_prebuilt->trx can be obsolete in
 		this method, because it is not sure that MySQL
 		calls external_lock before this method with the
 		parameters below.  We must not invoke update_thd()
@@ -14483,29 +17857,49 @@ ha_innobase::extra(
 	return(0);
 }
 
-/******************************************************************//**
-*/
-UNIV_INTERN
+/**
+MySQL calls this method at the end of each statement. This method
+exists for readability only. ha_innobase::reset() doesn't give any
+clue about the method. */
+
 int
-ha_innobase::reset()
-/*================*/
+ha_innobase::end_stmt()
 {
-	if (prebuilt->blob_heap) {
-		row_mysql_prebuilt_free_blob_heap(prebuilt);
+	if (m_prebuilt->blob_heap) {
+		row_mysql_prebuilt_free_blob_heap(m_prebuilt);
 	}
 
 	reset_template();
-	ds_mrr.dsmrr_close();
+
+	m_ds_mrr.dsmrr_close();
 
 	/* TODO: This should really be reset in reset_template() but for now
 	it's safer to do it explicitly here. */
 
 	/* This is a statement level counter. */
-	prebuilt->autoinc_last_value = 0;
+	m_prebuilt->autoinc_last_value = 0;
+
+	/* This transaction had called ha_innobase::start_stmt() */
+	trx_t*	trx = m_prebuilt->trx;
+
+	if (trx->lock.start_stmt) {
+		TrxInInnoDB::end_stmt(trx);
+
+		trx->lock.start_stmt = false;
+	}
 
 	return(0);
 }
 
+/**
+MySQL calls this method at the end of each statement */
+
+int
+ha_innobase::reset()
+{
+	return(end_stmt());
+}
+
 /******************************************************************//**
 MySQL calls this function at the start of each SQL statement inside LOCK
 TABLES. Inside LOCK TABLES the ::external_lock method does not work to
@@ -14517,42 +17911,48 @@ procedure. To make the execution more deterministic for binlogging, MySQL-5.0
 locks all tables involved in a stored procedure with full explicit table
 locks (thd_in_lock_tables(thd) holds in store_lock()) before executing the
 procedure.
-@return	0 or error code */
-UNIV_INTERN
+@return 0 or error code */
+
 int
 ha_innobase::start_stmt(
 /*====================*/
 	THD*		thd,	/*!< in: handle to the user thread */
 	thr_lock_type	lock_type)
 {
-	trx_t*		trx;
+	trx_t*		trx = m_prebuilt->trx;
+
 	DBUG_ENTER("ha_innobase::start_stmt");
 
 	update_thd(thd);
 
-	trx = prebuilt->trx;
+	ut_ad(m_prebuilt->table != NULL);
 
-	/* Here we release the search latch and the InnoDB thread FIFO ticket
-	if they were reserved. They should have been released already at the
-	end of the previous statement, but because inside LOCK TABLES the
-	lock count method does not work to mark the end of a SELECT statement,
-	that may not be the case. We MUST release the search latch before an
-	INSERT, for example. */
+	TrxInInnoDB	trx_in_innodb(trx);
 
-	trx_search_latch_release_if_reserved(trx);
+	if (dict_table_is_intrinsic(m_prebuilt->table)) {
+
+		if (thd_sql_command(thd) == SQLCOM_ALTER_TABLE) {
+
+			DBUG_RETURN(HA_ERR_WRONG_COMMAND);
+		}
+
+		DBUG_RETURN(0);
+	}
+
+	trx = m_prebuilt->trx;
 
 	innobase_srv_conc_force_exit_innodb(trx);
 
 	/* Reset the AUTOINC statement level counter for multi-row INSERTs. */
 	trx->n_autoinc_rows = 0;
 
-	prebuilt->sql_stat_start = TRUE;
-	prebuilt->hint_need_to_fetch_extra_cols = 0;
+	m_prebuilt->sql_stat_start = TRUE;
+	m_prebuilt->hint_need_to_fetch_extra_cols = 0;
 	reset_template();
 
-	if (dict_table_is_temporary(prebuilt->table)
-	    && prebuilt->mysql_has_locked
-	    && prebuilt->select_lock_type == LOCK_NONE) {
+	if (dict_table_is_temporary(m_prebuilt->table)
+	    && m_mysql_has_locked
+	    && m_prebuilt->select_lock_type == LOCK_NONE) {
 		dberr_t error;
 
 		switch (thd_sql_command(thd)) {
@@ -14560,12 +17960,12 @@ ha_innobase::start_stmt(
 		case SQLCOM_UPDATE:
 		case SQLCOM_DELETE:
 			init_table_handle_for_HANDLER();
-			prebuilt->select_lock_type = LOCK_X;
-			prebuilt->stored_select_lock_type = LOCK_X;
-			error = row_lock_table_for_mysql(prebuilt, NULL, 1);
+			m_prebuilt->select_lock_type = LOCK_X;
+			m_prebuilt->stored_select_lock_type = LOCK_X;
+			error = row_lock_table_for_mysql(m_prebuilt, NULL, 1);
 
 			if (error != DB_SUCCESS) {
-				int st = convert_error_code_to_mysql(
+				int	st = convert_error_code_to_mysql(
 					error, 0, thd);
 				DBUG_RETURN(st);
 			}
@@ -14573,13 +17973,13 @@ ha_innobase::start_stmt(
 		}
 	}
 
-	if (!prebuilt->mysql_has_locked) {
+	if (!m_mysql_has_locked) {
 		/* This handle is for a temporary table created inside
 		this same LOCK TABLES; since MySQL does NOT call external_lock
 		in this case, we must use x-row locks inside InnoDB to be
 		prepared for an update of a row */
 
-		prebuilt->select_lock_type = LOCK_X;
+		m_prebuilt->select_lock_type = LOCK_X;
 
 	} else if (trx->isolation_level != TRX_ISO_SERIALIZABLE
 		   && thd_sql_command(thd) == SQLCOM_SELECT
@@ -14588,18 +17988,19 @@ ha_innobase::start_stmt(
 		/* For other than temporary tables, we obtain
 		no lock for consistent read (plain SELECT). */
 
-		prebuilt->select_lock_type = LOCK_NONE;
+		m_prebuilt->select_lock_type = LOCK_NONE;
 	} else {
 		/* Not a consistent read: restore the
 		select_lock_type value. The value of
 		stored_select_lock_type was decided in:
 		1) ::store_lock(),
 		2) ::external_lock(),
-		3) ::init_table_handle_for_HANDLER(), and
-		4) ::transactional_table_lock(). */
+		3) ::init_table_handle_for_HANDLER(). */
 
-		ut_a(prebuilt->stored_select_lock_type != LOCK_NONE_UNSET);
-		prebuilt->select_lock_type = prebuilt->stored_select_lock_type;
+		ut_a(m_prebuilt->stored_select_lock_type != LOCK_NONE_UNSET);
+
+		m_prebuilt->select_lock_type =
+			m_prebuilt->stored_select_lock_type;
 	}
 
 	*trx->detailed_error = 0;
@@ -14610,12 +18011,20 @@ ha_innobase::start_stmt(
 		++trx->will_lock;
 	}
 
+	/* Only do it once per transaction. */
+	if (!trx->lock.start_stmt && lock_type != TL_UNLOCK) {
+
+		TrxInInnoDB::begin_stmt(trx);
+
+		trx->lock.start_stmt = true;
+	}
+
 	DBUG_RETURN(0);
 }
 
 /******************************************************************//**
 Maps a MySQL trx isolation level code to the InnoDB isolation level code
-@return	InnoDB isolation level */
+@return InnoDB isolation level */
 static inline
 ulint
 innobase_map_isolation_level(
@@ -14642,21 +18051,35 @@ the THD in the handle. We will also use this function to communicate
 to InnoDB that a new SQL statement has started and that we must store a
 savepoint to our transaction handle, so that we are able to roll back
 the SQL statement in case of an error.
-@return	0 */
-UNIV_INTERN
+@return 0 */
+
 int
 ha_innobase::external_lock(
 /*=======================*/
 	THD*	thd,		/*!< in: handle to the user thread */
 	int	lock_type)	/*!< in: lock type */
 {
-	trx_t*		trx;
-
 	DBUG_ENTER("ha_innobase::external_lock");
 	DBUG_PRINT("enter",("lock_type: %d", lock_type));
 
 	update_thd(thd);
 
+	trx_t*		trx = m_prebuilt->trx;
+
+	ut_ad(m_prebuilt->table);
+
+	if (dict_table_is_intrinsic(m_prebuilt->table)) {
+
+		if (thd_sql_command(thd) == SQLCOM_ALTER_TABLE) {
+
+			DBUG_RETURN(HA_ERR_WRONG_COMMAND);
+		}
+
+		TrxInInnoDB::begin_stmt(trx);
+
+		DBUG_RETURN(0);
+	}
+
 	/* Statement based binlogging does not work in isolation level
 	READ UNCOMMITTED and READ COMMITTED since the necessary
 	locks cannot be taken. In this case, we print an
@@ -14669,18 +18092,22 @@ ha_innobase::external_lock(
 	    && thd_binlog_format(thd) == BINLOG_FORMAT_STMT
 	    && thd_binlog_filter_ok(thd)
 	    && thd_sqlcom_can_generate_row_events(thd)) {
-		bool skip = 0;
+
+		bool	skip = false;
+
 		/* used by test case */
 		DBUG_EXECUTE_IF("no_innodb_binlog_errors", skip = true;);
+
 		if (!skip) {
 #ifdef WITH_WSREP
 			if (!wsrep_on(thd) || wsrep_thd_exec_mode(thd) == LOCAL_STATE)
 			{
 #endif /* WITH_WSREP */
 			my_error(ER_BINLOG_STMT_MODE_AND_ROW_ENGINE, MYF(0),
-			         " InnoDB is limited to row-logging when "
-			         "transaction isolation level is "
-			         "READ COMMITTED or READ UNCOMMITTED.");
+			         " InnoDB is limited to row-logging when"
+			         " transaction isolation level is"
+			         " READ COMMITTED or READ UNCOMMITTED.");
+
 			DBUG_RETURN(HA_ERR_LOGGING_IMPOSSIBLE);
 #ifdef WITH_WSREP
 			}
@@ -14702,8 +18129,7 @@ ha_innobase::external_lock(
 		|| thd_sql_command(thd) == SQLCOM_DROP_INDEX
 		|| thd_sql_command(thd) == SQLCOM_DELETE)) {
 
-		if (thd_sql_command(thd) == SQLCOM_CREATE_TABLE)
-		{
+		if (thd_sql_command(thd) == SQLCOM_CREATE_TABLE) {
 			ib_senderrf(thd, IB_LOG_LEVEL_WARN,
 				    ER_READ_ONLY_MODE);
 			DBUG_RETURN(HA_ERR_TABLE_READONLY);
@@ -14712,24 +18138,29 @@ ha_innobase::external_lock(
 				    ER_READ_ONLY_MODE);
 			DBUG_RETURN(HA_ERR_TABLE_READONLY);
 		}
-
 	}
 
-	trx = prebuilt->trx;
-
-	prebuilt->sql_stat_start = TRUE;
-	prebuilt->hint_need_to_fetch_extra_cols = 0;
+	m_prebuilt->sql_stat_start = TRUE;
+	m_prebuilt->hint_need_to_fetch_extra_cols = 0;
 
 	reset_template();
 
-	switch (prebuilt->table->quiesce) {
+	switch (m_prebuilt->table->quiesce) {
 	case QUIESCE_START:
 		/* Check for FLUSH TABLE t WITH READ LOCK; */
 		if (!srv_read_only_mode
 		    && thd_sql_command(thd) == SQLCOM_FLUSH
 		    && lock_type == F_RDLCK) {
 
-			row_quiesce_table_start(prebuilt->table, trx);
+			if (dict_table_is_discarded(m_prebuilt->table)) {
+				ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+					    ER_TABLESPACE_DISCARDED,
+					    table->s->table_name.str);
+
+				DBUG_RETURN(HA_ERR_NO_SUCH_TABLE);
+			}
+
+			row_quiesce_table_start(m_prebuilt->table, trx);
 
 			/* Use the transaction instance to track UNLOCK
 			TABLES. It can be done via START TRANSACTION; too
@@ -14745,7 +18176,7 @@ ha_innobase::external_lock(
 		if (trx->flush_tables > 0
 		    && (lock_type == F_UNLCK || trx_is_interrupted(trx))) {
 
-			row_quiesce_table_complete(prebuilt->table, trx);
+			row_quiesce_table_complete(m_prebuilt->table, trx);
 
 			ut_a(trx->flush_tables > 0);
 			--trx->flush_tables;
@@ -14761,8 +18192,8 @@ ha_innobase::external_lock(
 
 		/* If this is a SELECT, then it is in UPDATE TABLE ...
 		or SELECT ... FOR UPDATE */
-		prebuilt->select_lock_type = LOCK_X;
-		prebuilt->stored_select_lock_type = LOCK_X;
+		m_prebuilt->select_lock_type = LOCK_X;
+		m_prebuilt->stored_select_lock_type = LOCK_X;
 	}
 
 	if (lock_type != F_UNLCK) {
@@ -14773,7 +18204,7 @@ ha_innobase::external_lock(
 		innobase_register_trx(ht, thd, trx);
 
 		if (trx->isolation_level == TRX_ISO_SERIALIZABLE
-		    && prebuilt->select_lock_type == LOCK_NONE
+		    && m_prebuilt->select_lock_type == LOCK_NONE
 		    && thd_test_options(
 			    thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
 
@@ -14785,8 +18216,8 @@ ha_innobase::external_lock(
 			can be serialized also if performed as consistent
 			reads. */
 
-			prebuilt->select_lock_type = LOCK_S;
-			prebuilt->stored_select_lock_type = LOCK_S;
+			m_prebuilt->select_lock_type = LOCK_S;
+			m_prebuilt->stored_select_lock_type = LOCK_S;
 		}
 
 		/* Starting from 4.1.9, no InnoDB table lock is taken in LOCK
@@ -14800,7 +18231,7 @@ ha_innobase::external_lock(
 		can hold in some cases, e.g., at the start of a stored
 		procedure call (SQLCOM_CALL). */
 
-		if (prebuilt->select_lock_type != LOCK_NONE) {
+		if (m_prebuilt->select_lock_type != LOCK_NONE) {
 
 			if (thd_sql_command(thd) == SQLCOM_LOCK_TABLES
 			    && THDVAR(thd, table_locks)
@@ -14808,9 +18239,10 @@ ha_innobase::external_lock(
 			    && thd_in_lock_tables(thd)) {
 
 				dberr_t	error = row_lock_table_for_mysql(
-					prebuilt, NULL, 0);
+					m_prebuilt, NULL, 0);
 
 				if (error != DB_SUCCESS) {
+
 					DBUG_RETURN(
 						convert_error_code_to_mysql(
 							error, 0, thd));
@@ -14821,28 +18253,34 @@ ha_innobase::external_lock(
 		}
 
 		trx->n_mysql_tables_in_use++;
-		prebuilt->mysql_has_locked = TRUE;
+		m_mysql_has_locked = true;
 
 		if (!trx_is_started(trx)
-		    && (prebuilt->select_lock_type != LOCK_NONE
-			|| prebuilt->stored_select_lock_type != LOCK_NONE)) {
+		    && (m_prebuilt->select_lock_type != LOCK_NONE
+			|| m_prebuilt->stored_select_lock_type != LOCK_NONE)) {
 
 			++trx->will_lock;
 		}
 
+		TrxInInnoDB::begin_stmt(trx);
+
+#ifdef UNIV_DEBUG
+		if (thd_trx_is_dd_trx(thd)) {
+			trx->is_dd_trx = true;
+		}
+#endif /* UNIV_DEBUG */
 		DBUG_RETURN(0);
+	} else {
+
+		TrxInInnoDB::end_stmt(trx);
+
+		DEBUG_SYNC_C("ha_innobase_end_statement");
 	}
 
 	/* MySQL is releasing a table lock */
 
 	trx->n_mysql_tables_in_use--;
-	prebuilt->mysql_has_locked = FALSE;
-
-	/* Release a possible FIFO ticket and search latch. Since we
-	may reserve the trx_sys->mutex, we have to release the search
-	system latch first to obey the latching order. */
-
-	trx_search_latch_release_if_reserved(trx);
+	m_mysql_has_locked = false;
 
 	innobase_srv_conc_force_exit_innodb(trx);
 
@@ -14852,28 +18290,36 @@ ha_innobase::external_lock(
 	if (trx->n_mysql_tables_in_use == 0) {
 
 		trx->mysql_n_tables_locked = 0;
-		prebuilt->used_in_HANDLER = FALSE;
+		m_prebuilt->used_in_HANDLER = FALSE;
 
 		if (!thd_test_options(
 				thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
 
 			if (trx_is_started(trx)) {
+
 				innobase_commit(ht, thd, TRUE);
+			} else {
+				/* Since the trx state is TRX_NOT_STARTED,
+				trx_commit() will not be called. Reset
+				trx->is_dd_trx here */
+				ut_d(trx->is_dd_trx = false);
 			}
 
 		} else if (trx->isolation_level <= TRX_ISO_READ_COMMITTED
-			   && trx->global_read_view) {
+			   && MVCC::is_view_active(trx->read_view)) {
 
-			/* At low transaction isolation levels we let
-			each consistent read set its own snapshot */
+			mutex_enter(&trx_sys->mutex);
 
-			read_view_close_for_mysql(trx);
+			trx_sys->mvcc->view_close(trx->read_view, true);
+
+			mutex_exit(&trx_sys->mutex);
 		}
 	}
 
 	if (!trx_is_started(trx)
-	    && (prebuilt->select_lock_type != LOCK_NONE
-		|| prebuilt->stored_select_lock_type != LOCK_NONE)) {
+	    && lock_type != F_UNLCK
+	    && (m_prebuilt->select_lock_type != LOCK_NONE
+		|| m_prebuilt->stored_select_lock_type != LOCK_NONE)) {
 
 		++trx->will_lock;
 	}
@@ -14881,100 +18327,6 @@ ha_innobase::external_lock(
 	DBUG_RETURN(0);
 }
 
-/******************************************************************//**
-With this function MySQL request a transactional lock to a table when
-user issued query LOCK TABLES..WHERE ENGINE = InnoDB.
-@return	error code */
-UNIV_INTERN
-int
-ha_innobase::transactional_table_lock(
-/*==================================*/
-	THD*	thd,		/*!< in: handle to the user thread */
-	int	lock_type)	/*!< in: lock type */
-{
-	trx_t*		trx;
-
-	DBUG_ENTER("ha_innobase::transactional_table_lock");
-	DBUG_PRINT("enter",("lock_type: %d", lock_type));
-
-	/* We do not know if MySQL can call this function before calling
-	external_lock(). To be safe, update the thd of the current table
-	handle. */
-
-	update_thd(thd);
-
-	if (!thd_tablespace_op(thd)) {
-
-		if (dict_table_is_discarded(prebuilt->table)) {
-
-			ib_senderrf(
-				thd, IB_LOG_LEVEL_ERROR,
-				ER_TABLESPACE_DISCARDED,
-				table->s->table_name.str);
-
-		} else if (prebuilt->table->ibd_file_missing) {
-
-			ib_senderrf(
-				thd, IB_LOG_LEVEL_ERROR,
-				ER_TABLESPACE_MISSING,
-				table->s->table_name.str);
-		}
-
-		DBUG_RETURN(HA_ERR_CRASHED);
-	}
-
-	trx = prebuilt->trx;
-
-	prebuilt->sql_stat_start = TRUE;
-	prebuilt->hint_need_to_fetch_extra_cols = 0;
-
-	reset_template();
-
-	if (lock_type == F_WRLCK) {
-		prebuilt->select_lock_type = LOCK_X;
-		prebuilt->stored_select_lock_type = LOCK_X;
-	} else if (lock_type == F_RDLCK) {
-		prebuilt->select_lock_type = LOCK_S;
-		prebuilt->stored_select_lock_type = LOCK_S;
-	} else {
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"MySQL is trying to set transactional table lock "
-			"with corrupted lock type to table %s, lock type "
-			"%d does not exist.",
-			table->s->table_name.str, lock_type);
-
-		DBUG_RETURN(HA_ERR_CRASHED);
-	}
-
-	/* MySQL is setting a new transactional table lock */
-
-	innobase_register_trx(ht, thd, trx);
-
-	if (THDVAR(thd, table_locks) && thd_in_lock_tables(thd)) {
-		dberr_t	error;
-
-		error = row_lock_table_for_mysql(prebuilt, NULL, 0);
-
-		if (error != DB_SUCCESS) {
-			DBUG_RETURN(
-				convert_error_code_to_mysql(
-					error, prebuilt->table->flags, thd));
-		}
-
-		if (thd_test_options(
-			thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
-
-			/* Store the current undo_no of the transaction
-			so that we know where to roll back if we have
-			to roll back the next SQL statement */
-
-			trx_mark_sql_stat_end(trx);
-		}
-	}
-
-	DBUG_RETURN(0);
-}
-
 /************************************************************************//**
 Here we export InnoDB status variables to MySQL. */
 static
@@ -14999,7 +18351,6 @@ innodb_show_status(
 	THD*		thd,	/*!< in: the MySQL query thread of the caller */
 	stat_print_fn*	stat_print)
 {
-	trx_t*			trx;
 	static const char	truncated_msg[] = "... truncated...\n";
 	const long		MAX_STATUS_SIZE = 1048576;
 	ulint			trx_list_start = ULINT_UNDEFINED;
@@ -15016,17 +18367,19 @@ innodb_show_status(
 		DBUG_RETURN(0);
 	}
 
-	trx = check_trx_exists(thd);
+	trx_t*	trx = check_trx_exists(thd);
 
 	trx_search_latch_release_if_reserved(trx);
 
 	innobase_srv_conc_force_exit_innodb(trx);
 
+	TrxInInnoDB	trx_in_innodb(trx);
+
 	/* We let the InnoDB Monitor to output at most MAX_STATUS_SIZE
 	bytes of text. */
 
 	char*	str;
-	ssize_t	flen, usable_len;
+	ssize_t	flen;
 
 	mutex_enter(&srv_monitor_file_mutex);
 	rewind(srv_monitor_file);
@@ -15040,6 +18393,8 @@ innodb_show_status(
 		flen = 0;
 	}
 
+	ssize_t	usable_len;
+
 	if (flen > MAX_STATUS_SIZE) {
 		usable_len = MAX_STATUS_SIZE;
 		srv_truncated_status_writes++;
@@ -15050,7 +18405,12 @@ innodb_show_status(
 	/* allocate buffer for the string, and
 	read the contents of the temporary file */
 
-	if (!(str = (char*) my_malloc(usable_len + 1, MYF(0)))) {
+	/* JAN: TODO: MySQL 5.7 PSI */
+	if (!(str = (char*) my_malloc(
+				usable_len + 1, MYF(0)))) {
+		/*	if (!(str = (char*) my_malloc(PSI_INSTRUMENT_ME,
+				usable_len + 1, MYF(0)))) {
+		*/
 		mutex_exit(&srv_monitor_file_mutex);
 		DBUG_RETURN(1);
 	}
@@ -15092,163 +18452,339 @@ innodb_show_status(
 	DBUG_RETURN(ret_val);
 }
 
-/************************************************************************//**
-Implements the SHOW MUTEX STATUS command.
+/** Callback for collecting mutex statistics */
+struct ShowStatus {
+
+	/** For tracking the mutex metrics */
+	struct Value {
+
+		/** Constructor
+		@param[in]	name		Name of the mutex
+		@param[in]	spins		Number of spins
+		@param[in]	os_waits	OS waits so far
+		@param[in]	calls		Number of calls to enter() */
+		Value(const char*	name,
+		      ulint		spins,
+		      uint64_t		waits,
+		      uint64_t		calls)
+			:
+			m_name(name),
+			m_spins(spins),
+			m_waits(waits),
+			m_calls(calls)
+		{
+			/* No op */
+		}
+
+		/** Mutex name */
+		std::string		m_name;
+
+		/** Spins so far */
+		ulint			m_spins;
+
+		/** Waits so far */
+		uint64_t		m_waits;
+
+		/** Number of calls so far */
+		uint64_t		m_calls;
+	};
+
+	/** Order by m_waits, in descending order. */
+	struct OrderByWaits: public std::binary_function<Value, Value, bool>
+	{
+		/** @return true if rhs < lhs */
+		bool operator()(
+			const Value& lhs,
+			const Value& rhs) const
+			UNIV_NOTHROW
+		{
+			return(rhs.m_waits < lhs.m_waits);
+		}
+	};
+
+	typedef std::vector<Value, ut_allocator<Value> > Values;
+
+	/** Collect the individual latch counts */
+	struct GetCount {
+		typedef latch_meta_t::CounterType::Count Count;
+
+		/** Constructor
+		@param[in]	name		Latch name
+		@param[in,out]	values		Put the values here */
+		GetCount(
+			const char*	name,
+			Values*		values)
+			UNIV_NOTHROW
+			:
+			m_name(name),
+			m_values(values)
+		{
+			/* No op */
+		}
+
+		/** Collect the latch metrics. Ignore entries where the
+		spins and waits are zero.
+		@param[in]	count		The latch metrics */
+		void operator()(Count* count)
+			UNIV_NOTHROW
+		{
+			if (count->m_spins > 0 || count->m_waits > 0) {
+
+				m_values->push_back(Value(
+					m_name,
+					count->m_spins,
+					count->m_waits,
+					count->m_calls));
+			}
+		}
+
+		/** The latch name */
+		const char*	m_name;
+
+		/** For collecting the active mutex stats. */
+		Values*		m_values;
+	};
+
+	/** Constructor */
+	ShowStatus() { }
+
+	/** Callback for collecting the stats
+	@param[in]	latch_meta		Latch meta data
+	@return always returns true */
+	bool operator()(latch_meta_t& latch_meta)
+		UNIV_NOTHROW
+	{
+		latch_meta_t::CounterType*	counter;
+
+		counter = latch_meta.get_counter();
+
+		GetCount	get_count(latch_meta.get_name(), &m_values);
+
+		counter->iterate(get_count);
+
+		return(true);
+	}
+
+	/** Implements the SHOW MUTEX STATUS command, for mutexes.
+	The table structure is like so: Engine | Mutex Name | Status
+	We store the metrics  in the "Status" column as:
+
+		spins=N,waits=N,calls=N"
+
+	The user has to parse the dataunfortunately
+	@param[in,out]	hton		the innodb handlerton
+	@param[in,out]	thd		the MySQL query thread of the caller
+	@param[in,out]	stat_print	function for printing statistics
+	@return true on success. */
+	bool to_string(
+		handlerton*	hton,
+		THD*		thd,
+		stat_print_fn*	stat_print)
+		UNIV_NOTHROW;
+
+	/** For collecting the active mutex stats. */
+	Values		m_values;
+};
+
+/** Implements the SHOW MUTEX STATUS command, for mutexes.
+The table structure is like so: Engine | Mutex Name | Status
+We store the metrics  in the "Status" column as:
+
+	spins=N,waits=N,calls=N"
+
+The user has to parse the dataunfortunately
+@param[in,out]	hton		the innodb handlerton
+@param[in,out]	thd		the MySQL query thread of the caller
+@param[in,out]	stat_print	function for printing statistics
+@return true on success. */
+bool
+ShowStatus::to_string(
+	handlerton*	hton,
+	THD*		thd,
+	stat_print_fn*	stat_print)
+	UNIV_NOTHROW
+{
+	uint		hton_name_len = (uint) strlen(innobase_hton_name);
+
+	std::sort(m_values.begin(), m_values.end(), OrderByWaits());
+
+	Values::iterator	end = m_values.end();
+
+	for (Values::iterator it = m_values.begin(); it != end; ++it) {
+
+		int	name_len;
+		char	name_buf[IO_SIZE];
+
+		name_len = ut_snprintf(
+			name_buf, sizeof(name_buf), "%s", it->m_name.c_str());
+
+		int	status_len;
+		char	status_buf[IO_SIZE];
+
+		status_len = ut_snprintf(
+			status_buf, sizeof(status_buf),
+			"spins=%lu,waits=%lu,calls=%llu",
+			static_cast<ulong>(it->m_spins),
+			static_cast<long>(it->m_waits),
+			(ulonglong) it->m_calls);
+
+		if (stat_print(thd, innobase_hton_name,
+			       hton_name_len,
+			       name_buf, static_cast<uint>(name_len),
+			       status_buf, static_cast<uint>(status_len))) {
+
+			return(false);
+		}
+	}
+
+	return(true);
+}
+
+/** Implements the SHOW MUTEX STATUS command, for mutexes.
+@param[in,out]	hton		the innodb handlerton
+@param[in,out]	thd		the MySQL query thread of the caller
+@param[in,out]	stat_print	function for printing statistics
 @return 0 on success. */
 static
 int
-innodb_mutex_show_status(
-/*=====================*/
-	handlerton*	hton,		/*!< in: the innodb handlerton */
-	THD*		thd,		/*!< in: the MySQL query thread of the
-					caller */
-	stat_print_fn*	stat_print)	/*!< in: function for printing
-					statistics */
+innodb_show_mutex_status(
+	handlerton*	hton,
+	THD*		thd,
+	stat_print_fn*	stat_print)
 {
-	char		buf1[IO_SIZE];
-	char		buf2[IO_SIZE];
-	ib_mutex_t*	mutex;
-	rw_lock_t*	lock;
-	ulint		block_mutex_oswait_count = 0;
-	ulint		block_lock_oswait_count = 0;
-	ib_mutex_t*	block_mutex = NULL;
-	rw_lock_t*	block_lock = NULL;
-#ifdef UNIV_DEBUG
-	ulint		rw_lock_count= 0;
-	ulint		rw_lock_count_spin_loop= 0;
-	ulint		rw_lock_count_spin_rounds= 0;
-	ulint		rw_lock_count_os_wait= 0;
-	ulint		rw_lock_count_os_yield= 0;
-	ulonglong	rw_lock_wait_time= 0;
-#endif /* UNIV_DEBUG */
-	uint		buf1len;
-	uint		buf2len;
-	uint		hton_name_len;
+	DBUG_ENTER("innodb_show_mutex_status");
 
-	hton_name_len = (uint) strlen(innobase_hton_name);
+	ShowStatus	collector;
 
-	DBUG_ENTER("innodb_mutex_show_status");
 	DBUG_ASSERT(hton == innodb_hton_ptr);
 
-	mutex_enter(&mutex_list_mutex);
+	mutex_monitor->iterate(collector);
 
-	for (mutex = UT_LIST_GET_FIRST(mutex_list); mutex != NULL;
-	     mutex = UT_LIST_GET_NEXT(list, mutex)) {
-		if (mutex->count_os_wait == 0) {
-			continue;
-		}
-
-		if (buf_pool_is_block_mutex(mutex)) {
-			block_mutex = mutex;
-			block_mutex_oswait_count += mutex->count_os_wait;
-			continue;
-		}
-
-		buf1len= (uint) my_snprintf(buf1, sizeof(buf1), "%s:%lu",
-				     innobase_basename(mutex->cfile_name),
-				     (ulong) mutex->cline);
-		buf2len= (uint) my_snprintf(buf2, sizeof(buf2), "os_waits=%lu",
-				     (ulong) mutex->count_os_wait);
-
-		if (stat_print(thd, innobase_hton_name,
-			       hton_name_len, buf1, buf1len,
-			       buf2, buf2len)) {
-			mutex_exit(&mutex_list_mutex);
-			DBUG_RETURN(1);
-		}
+	if (!collector.to_string(hton, thd, stat_print)) {
+		DBUG_RETURN(1);
 	}
 
-	if (block_mutex) {
-		buf1len = (uint) my_snprintf(buf1, sizeof buf1,
-					     "combined %s:%lu",
-					     innobase_basename(
-						block_mutex->cfile_name),
-					     (ulong) block_mutex->cline);
-		buf2len = (uint) my_snprintf(buf2, sizeof buf2,
-					     "os_waits=%lu",
-					     (ulong) block_mutex_oswait_count);
+	DBUG_RETURN(0);
+}
 
-		if (stat_print(thd, innobase_hton_name,
-			       hton_name_len, buf1, buf1len,
-			       buf2, buf2len)) {
-			mutex_exit(&mutex_list_mutex);
-			DBUG_RETURN(1);
-		}
-	}
+/** Implements the SHOW MUTEX STATUS command.
+@param[in,out]	hton		the innodb handlerton
+@param[in,out]	thd		the MySQL query thread of the caller
+@param[in,out]	stat_print	function for printing statistics
+@return 0 on success. */
+static
+int
+innodb_show_rwlock_status(
+	handlerton*	hton,
+	THD*		thd,
+	stat_print_fn*	stat_print)
+{
+	DBUG_ENTER("innodb_show_rwlock_status");
 
-	mutex_exit(&mutex_list_mutex);
+	rw_lock_t*	block_rwlock = NULL;
+	ulint		block_rwlock_oswait_count = 0;
+	uint		hton_name_len = (uint) strlen(innobase_hton_name);
+
+	DBUG_ASSERT(hton == innodb_hton_ptr);
 
 	mutex_enter(&rw_lock_list_mutex);
 
-	for (lock = UT_LIST_GET_FIRST(rw_lock_list); lock != NULL;
-	     lock = UT_LIST_GET_NEXT(list, lock)) {
-		if (lock->count_os_wait == 0) {
+	for (rw_lock_t* rw_lock = UT_LIST_GET_FIRST(rw_lock_list);
+	     rw_lock != NULL;
+	     rw_lock = UT_LIST_GET_NEXT(list, rw_lock)) {
+
+		if (rw_lock->count_os_wait == 0) {
 			continue;
 		}
 
-		if (buf_pool_is_block_lock(lock)) {
-			block_lock = lock;
-			block_lock_oswait_count += lock->count_os_wait;
+		int		buf1len;
+		char		buf1[IO_SIZE];
+
+		if (rw_lock->is_block_lock) {
+
+			block_rwlock = rw_lock;
+			block_rwlock_oswait_count += rw_lock->count_os_wait;
+
 			continue;
 		}
 
-		buf1len = (uint) my_snprintf(
-			buf1, sizeof buf1, "%s:%lu",
-			innobase_basename(lock->cfile_name),
-			static_cast<ulong>(lock->cline));
-		buf2len = (uint) my_snprintf(
-			buf2, sizeof buf2, "os_waits=%lu",
-			static_cast<ulong>(lock->count_os_wait));
+		buf1len = ut_snprintf(
+			buf1, sizeof buf1, "rwlock: %s:%lu",
+			innobase_basename(rw_lock->cfile_name),
+			static_cast<ulong>(rw_lock->cline));
+
+		int		buf2len;
+		char		buf2[IO_SIZE];
+
+		buf2len = ut_snprintf(
+			buf2, sizeof buf2, "waits=%lu",
+			static_cast<ulong>(rw_lock->count_os_wait));
 
 		if (stat_print(thd, innobase_hton_name,
-			       hton_name_len, buf1, buf1len,
-			       buf2, buf2len)) {
+			       hton_name_len,
+			       buf1, static_cast<uint>(buf1len),
+			       buf2, static_cast<uint>(buf2len))) {
+
 			mutex_exit(&rw_lock_list_mutex);
+
 			DBUG_RETURN(1);
 		}
 	}
 
-	if (block_lock) {
-		buf1len = (uint) my_snprintf(buf1, sizeof buf1,
-					     "combined %s:%lu",
-					     innobase_basename(
-						block_lock->cfile_name),
-					     (ulong) block_lock->cline);
-		buf2len = (uint) my_snprintf(buf2, sizeof buf2,
-					     "os_waits=%lu",
-					     (ulong) block_lock_oswait_count);
+	if (block_rwlock != NULL) {
+
+		int		buf1len;
+		char		buf1[IO_SIZE];
+
+		buf1len = ut_snprintf(
+			buf1, sizeof buf1, "sum rwlock: %s:%lu",
+			innobase_basename(block_rwlock->cfile_name),
+			static_cast<ulong>(block_rwlock->cline));
+
+		int		buf2len;
+		char		buf2[IO_SIZE];
+
+		buf2len = ut_snprintf(
+			buf2, sizeof buf2, "waits=%lu",
+			static_cast<ulong>(block_rwlock_oswait_count));
 
 		if (stat_print(thd, innobase_hton_name,
-			       hton_name_len, buf1, buf1len,
-			       buf2, buf2len)) {
+			       hton_name_len,
+			       buf1, static_cast<uint>(buf1len),
+			       buf2, static_cast<uint>(buf2len))) {
+
 			mutex_exit(&rw_lock_list_mutex);
+
 			DBUG_RETURN(1);
 		}
 	}
 
 	mutex_exit(&rw_lock_list_mutex);
 
-#ifdef UNIV_DEBUG
-	buf2len = static_cast<uint>(my_snprintf(buf2, sizeof buf2,
-			     "count=%lu, spin_waits=%lu, spin_rounds=%lu, "
-			     "os_waits=%lu, os_yields=%lu, os_wait_times=%lu",
-			      (ulong) rw_lock_count,
-			      (ulong) rw_lock_count_spin_loop,
-			      (ulong) rw_lock_count_spin_rounds,
-			      (ulong) rw_lock_count_os_wait,
-			      (ulong) rw_lock_count_os_yield,
-			      (ulong) (rw_lock_wait_time / 1000)));
-
-	if (stat_print(thd, innobase_hton_name, hton_name_len,
-			STRING_WITH_LEN("rw_lock_mutexes"), buf2, buf2len)) {
-		DBUG_RETURN(1);
-	}
-#endif /* UNIV_DEBUG */
-
-	/* Success */
 	DBUG_RETURN(0);
 }
 
+/** Implements the SHOW MUTEX STATUS command.
+@param[in,out]	hton		the innodb handlerton
+@param[in,out]	thd		the MySQL query thread of the caller
+@param[in,out]	stat_print	function for printing statistics
+@return 0 on success. */
+static
+int
+innodb_show_latch_status(
+	handlerton*	hton,
+	THD*		thd,
+	stat_print_fn*	stat_print)
+{
+	int	ret = innodb_show_mutex_status(hton, thd, stat_print);
+
+	if (ret != 0) {
+		return(ret);
+	}
+
+	return(innodb_show_rwlock_status(hton, thd, stat_print));
+}
+
 /************************************************************************//**
 Return 0 on success and non-zero on failure. Note: the bool return type
 seems to be abused here, should be an int. */
@@ -15270,8 +18806,7 @@ innobase_show_status(
 		return(innodb_show_status(hton, thd, stat_print) != 0);
 
 	case HA_ENGINE_MUTEX:
-		/* Non-zero return value means there was an error. */
-		return(innodb_mutex_show_status(hton, thd, stat_print) != 0);
+		return(innodb_show_latch_status(hton, thd, stat_print) != 0);
 
 	case HA_ENGINE_LOGS:
 		/* Not handled */
@@ -15302,18 +18837,26 @@ get_share(
 		    ut_ad(share->use_count > 0),
 		    !strcmp(share->table_name, table_name));
 
-	if (!share) {
+	if (share == NULL) {
 
 		uint length = (uint) strlen(table_name);
 
 		/* TODO: invoke HASH_MIGRATE if innobase_open_tables
 		grows too big */
 
-		share = (INNOBASE_SHARE*) my_malloc(sizeof(*share)+length+1,
-			MYF(MY_FAE | MY_ZEROFILL));
+		share = reinterpret_cast<INNOBASE_SHARE*>(
+			my_malloc(
+				  sizeof(*share) + length + 1,
+				  MYF(MY_FAE | MY_ZEROFILL)));
+		/* JAN: TODO: MySQL 5.7 PSI
+		share = reinterpret_cast<INNOBASE_SHARE*>(
+			my_malloc(PSI_INSTRUMENT_ME,
+				  sizeof(*share) + length + 1,
+				  MYF(MY_FAE | MY_ZEROFILL)));
+		*/
 
-		share->table_name = (char*) memcpy(share + 1,
-						   table_name, length + 1);
+		share->table_name = reinterpret_cast<char*>(
+			memcpy(share + 1, table_name, length + 1));
 
 		HASH_INSERT(INNOBASE_SHARE, table_name_hash,
 			    innobase_open_tables, fold, share);
@@ -15326,7 +18869,8 @@ get_share(
 		share->idx_trans_tbl.array_size = 0;
 	}
 
-	share->use_count++;
+	++share->use_count;
+
 	mysql_mutex_unlock(&innobase_share_mutex);
 
 	return(share);
@@ -15344,7 +18888,7 @@ free_share(
 
 #ifdef UNIV_DEBUG
 	INNOBASE_SHARE* share2;
-	ulint	fold = ut_fold_string(share->table_name);
+	ulint		fold = ut_fold_string(share->table_name);
 
 	HASH_SEARCH(table_name_hash, innobase_open_tables, fold,
 		    INNOBASE_SHARE*, share2,
@@ -15354,15 +18898,18 @@ free_share(
 	ut_a(share2 == share);
 #endif /* UNIV_DEBUG */
 
-	if (!--share->use_count) {
+	--share->use_count;
+
+	if (share->use_count == 0) {
 		ulint	fold = ut_fold_string(share->table_name);
 
 		HASH_DELETE(INNOBASE_SHARE, table_name_hash,
 			    innobase_open_tables, fold, share);
+
 		thr_lock_delete(&share->lock);
 
 		/* Free any memory from index translation table */
-		my_free(share->idx_trans_tbl.index_mapping);
+		ut_free(share->idx_trans_tbl.index_mapping);
 
 		my_free(share);
 
@@ -15373,36 +18920,58 @@ free_share(
 	mysql_mutex_unlock(&innobase_share_mutex);
 }
 
+#if 0
+/*********************************************************************//**
+Returns number of THR_LOCK locks used for one instance of InnoDB table.
+InnoDB no longer relies on THR_LOCK locks so 0 value is returned.
+Instead of THR_LOCK locks InnoDB relies on combination of metadata locks
+(e.g. for LOCK TABLES and DDL) and its own locking subsystem.
+Note that even though this method returns 0, SQL-layer still calls
+::store_lock(), ::start_stmt() and ::external_lock() methods for InnoDB
+tables. */
+
+uint
+ha_innobase::lock_count(void) const
+/*===============================*/
+{
+	return 0;
+}
+#endif
+
 /*****************************************************************//**
-Converts a MySQL table lock stored in the 'lock' field of the handle to
-a proper type before storing pointer to the lock into an array of pointers.
+Supposed to convert a MySQL table lock stored in the 'lock' field of the
+handle to a proper type before storing pointer to the lock into an array
+of pointers.
+In practice, since InnoDB no longer relies on THR_LOCK locks and its
+lock_count() method returns 0 it just informs storage engine about type
+of THR_LOCK which SQL-layer would have acquired for this specific statement
+on this specific table.
 MySQL also calls this if it wants to reset some table locks to a not-locked
 state during the processing of an SQL query. An example is that during a
 SELECT the read lock is released early on the 'const' tables where we only
 fetch one row. MySQL does not call this when it releases all locks at the
 end of an SQL statement.
-@return	pointer to the next element in the 'to' array */
-UNIV_INTERN
+@return pointer to the current element in the 'to' array. */
+
 THR_LOCK_DATA**
 ha_innobase::store_lock(
 /*====================*/
 	THD*			thd,		/*!< in: user thread handle */
-	THR_LOCK_DATA**		to,		/*!< in: pointer to an array
-						of pointers to lock structs;
-						pointer to the 'lock' field
-						of current handle is stored
-						next to this array */
-	enum thr_lock_type	lock_type)	/*!< in: lock type to store in
+	THR_LOCK_DATA**		to,		/*!< in: pointer to the current
+						element in an array of pointers
+						to lock structs;
+						only used as return value */
+	thr_lock_type		lock_type)	/*!< in: lock type to store in
 						'lock'; this may also be
 						TL_IGNORE */
 {
-	trx_t*		trx;
-
-	/* Note that trx in this function is NOT necessarily prebuilt->trx
+	/* Note that trx in this function is NOT necessarily m_prebuilt->trx
 	because we call update_thd() later, in ::external_lock()! Failure to
 	understand this caused a serious memory corruption bug in 5.1.11. */
 
-	trx = check_trx_exists(thd);
+	trx_t*	trx = check_trx_exists(thd);
+
+	TrxInInnoDB	trx_in_innodb(trx);
 
 	/* NOTE: MySQL can call this function with lock 'type' TL_IGNORE!
 	Be careful to ignore TL_IGNORE if we are going to do something with
@@ -15417,12 +18986,16 @@ ha_innobase::store_lock(
 			(enum_tx_isolation) thd_tx_isolation(thd));
 
 		if (trx->isolation_level <= TRX_ISO_READ_COMMITTED
-		    && trx->global_read_view) {
+		    && MVCC::is_view_active(trx->read_view)) {
 
 			/* At low transaction isolation levels we let
 			each consistent read set its own snapshot */
 
-			read_view_close_for_mysql(trx);
+			mutex_enter(&trx_sys->mutex);
+
+			trx_sys->mvcc->view_close(trx->read_view, true);
+
+			mutex_exit(&trx_sys->mutex);
 		}
 	}
 
@@ -15431,6 +19004,7 @@ ha_innobase::store_lock(
 	const uint sql_command = thd_sql_command(thd);
 
 	if (srv_read_only_mode
+	    && !dict_table_is_intrinsic(m_prebuilt->table)
 	    && (sql_command == SQLCOM_UPDATE
 		|| sql_command == SQLCOM_INSERT
 		|| sql_command == SQLCOM_REPLACE
@@ -15458,16 +19032,16 @@ ha_innobase::store_lock(
 		detected in the function. */
 
 		dberr_t	err = row_quiesce_set_state(
-			prebuilt->table, QUIESCE_START, trx);
+			m_prebuilt->table, QUIESCE_START, trx);
 
 		ut_a(err == DB_SUCCESS || err == DB_UNSUPPORTED);
 
 		if (trx->isolation_level == TRX_ISO_SERIALIZABLE) {
-			prebuilt->select_lock_type = LOCK_S;
-			prebuilt->stored_select_lock_type = LOCK_S;
+			m_prebuilt->select_lock_type = LOCK_S;
+			m_prebuilt->stored_select_lock_type = LOCK_S;
 		} else {
-			prebuilt->select_lock_type = LOCK_NONE;
-			prebuilt->stored_select_lock_type = LOCK_NONE;
+			m_prebuilt->select_lock_type = LOCK_NONE;
+			m_prebuilt->stored_select_lock_type = LOCK_NONE;
 		}
 
 	/* Check for DROP TABLE */
@@ -15475,7 +19049,7 @@ ha_innobase::store_lock(
 
 		/* MySQL calls this function in DROP TABLE though this table
 		handle may belong to another thd that is running a query. Let
-		us in that case skip any changes to the prebuilt struct. */
+		us in that case skip any changes to the m_prebuilt struct. */
 
 	/* Check for LOCK TABLE t1,...,tn WITH SHARED LOCKS */
 	} else if ((lock_type == TL_READ && in_lock_tables)
@@ -15527,11 +19101,11 @@ ha_innobase::store_lock(
 			MODE in select, then we use consistent read
 			for select. */
 
-			prebuilt->select_lock_type = LOCK_NONE;
-			prebuilt->stored_select_lock_type = LOCK_NONE;
+			m_prebuilt->select_lock_type = LOCK_NONE;
+			m_prebuilt->stored_select_lock_type = LOCK_NONE;
 		} else {
-			prebuilt->select_lock_type = LOCK_S;
-			prebuilt->stored_select_lock_type = LOCK_S;
+			m_prebuilt->select_lock_type = LOCK_S;
+			m_prebuilt->stored_select_lock_type = LOCK_S;
 		}
 
 	} else if (lock_type != TL_IGNORE) {
@@ -15539,8 +19113,8 @@ ha_innobase::store_lock(
 		/* We set possible LOCK_X value in external_lock, not yet
 		here even if this would be SELECT ... FOR UPDATE */
 
-		prebuilt->select_lock_type = LOCK_NONE;
-		prebuilt->stored_select_lock_type = LOCK_NONE;
+		m_prebuilt->select_lock_type = LOCK_NONE;
+		m_prebuilt->stored_select_lock_type = LOCK_NONE;
 	}
 
 	if (lock_type != TL_IGNORE && lock.type == TL_UNLOCK) {
@@ -15613,12 +19187,19 @@ ha_innobase::store_lock(
 	*to++= &lock;
 
 	if (!trx_is_started(trx)
-	    && (prebuilt->select_lock_type != LOCK_NONE
-	        || prebuilt->stored_select_lock_type != LOCK_NONE)) {
+	    && (m_prebuilt->select_lock_type != LOCK_NONE
+	        || m_prebuilt->stored_select_lock_type != LOCK_NONE)) {
 
 		++trx->will_lock;
 	}
 
+#ifdef UNIV_DEBUG
+	if (trx->is_dd_trx) {
+		ut_ad(trx->will_lock == 0
+		      && m_prebuilt->select_lock_type == LOCK_NONE);
+	}
+#endif /* UNIV_DEBUG */
+
 	return(to);
 }
 
@@ -15626,8 +19207,8 @@ ha_innobase::store_lock(
 Read the next autoinc value. Acquire the relevant locks before reading
 the AUTOINC value. If SUCCESS then the table AUTOINC mutex will be locked
 on return and all relevant locks acquired.
-@return	DB_SUCCESS or error code */
-UNIV_INTERN
+@return DB_SUCCESS or error code */
+
 dberr_t
 ha_innobase::innobase_get_autoinc(
 /*==============================*/
@@ -15635,28 +19216,28 @@ ha_innobase::innobase_get_autoinc(
 {
 	*value = 0;
 
-	prebuilt->autoinc_error = innobase_lock_autoinc();
+	m_prebuilt->autoinc_error = innobase_lock_autoinc();
 
-	if (prebuilt->autoinc_error == DB_SUCCESS) {
+	if (m_prebuilt->autoinc_error == DB_SUCCESS) {
 
 		/* Determine the first value of the interval */
-		*value = dict_table_autoinc_read(prebuilt->table);
+		*value = dict_table_autoinc_read(m_prebuilt->table);
 
 		/* It should have been initialized during open. */
 		if (*value == 0) {
-			prebuilt->autoinc_error = DB_UNSUPPORTED;
-			dict_table_autoinc_unlock(prebuilt->table);
+			m_prebuilt->autoinc_error = DB_UNSUPPORTED;
+			dict_table_autoinc_unlock(m_prebuilt->table);
 		}
 	}
 
-	return(prebuilt->autoinc_error);
+	return(m_prebuilt->autoinc_error);
 }
 
 /*******************************************************************//**
 This function reads the global auto-inc counter. It doesn't use the
 AUTOINC lock even if the lock mode is set to TRADITIONAL.
-@return	the autoinc value */
-UNIV_INTERN
+@return the autoinc value */
+
 ulonglong
 ha_innobase::innobase_peek_autoinc(void)
 /*====================================*/
@@ -15664,19 +19245,18 @@ ha_innobase::innobase_peek_autoinc(void)
 	ulonglong	auto_inc;
 	dict_table_t*	innodb_table;
 
-	ut_a(prebuilt != NULL);
-	ut_a(prebuilt->table != NULL);
+	ut_a(m_prebuilt != NULL);
+	ut_a(m_prebuilt->table != NULL);
 
-	innodb_table = prebuilt->table;
+	innodb_table = m_prebuilt->table;
 
 	dict_table_autoinc_lock(innodb_table);
 
 	auto_inc = dict_table_autoinc_read(innodb_table);
 
 	if (auto_inc == 0) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr, "  InnoDB: AUTOINC next value generation "
-			"is disabled for '%s'\n", innodb_table->name);
+		ib::info() << "AUTOINC next value generation is disabled for"
+			" '" << innodb_table->name << "'";
 	}
 
 	dict_table_autoinc_unlock(innodb_table);
@@ -15686,7 +19266,7 @@ ha_innobase::innobase_peek_autoinc(void)
 
 /*********************************************************************//**
 Returns the value of the auto-inc counter in *first_value and ~0 on failure. */
-UNIV_INTERN
+
 void
 ha_innobase::get_auto_increment(
 /*============================*/
@@ -15703,7 +19283,7 @@ ha_innobase::get_auto_increment(
 	dberr_t		error;
 	ulonglong	autoinc = 0;
 
-	/* Prepare prebuilt->trx in the table handle */
+	/* Prepare m_prebuilt->trx in the table handle */
 	update_thd(ha_thd());
 
 	error = innobase_get_autoinc(&autoinc);
@@ -15721,7 +19301,9 @@ ha_innobase::get_auto_increment(
 	called and count down from that as rows are written (see write_row()).
 	*/
 
-	trx = prebuilt->trx;
+	trx = m_prebuilt->trx;
+
+	TrxInInnoDB	trx_in_innodb(trx);
 
 	/* Note: We can't rely on *first_value since some MySQL engines,
 	in particular the partition engine, don't initialize it to 0 when
@@ -15730,8 +19312,7 @@ ha_innobase::get_auto_increment(
 
 	/* We need the upper limit of the col type to check for
 	whether we update the table autoinc counter or not. */
-	ulonglong	col_max_value = innobase_get_int_col_max_value(
-		table->next_number_field);
+	ulonglong	col_max_value = innobase_get_int_col_max_value(table->next_number_field);
 
 	/* Called for the first time ? */
 	if (trx->n_autoinc_rows == 0) {
@@ -15747,19 +19328,19 @@ ha_innobase::get_auto_increment(
 
 		set_if_bigger(*first_value, autoinc);
 	/* Not in the middle of a mult-row INSERT. */
-	} else if (prebuilt->autoinc_last_value == 0) {
+	} else if (m_prebuilt->autoinc_last_value == 0) {
 		set_if_bigger(*first_value, autoinc);
 	}
 
-        if (*first_value > col_max_value)
-        {
-          	/* Out of range number. Let handler::update_auto_increment()
-                   take care of this */
-                prebuilt->autoinc_last_value = 0;
-                dict_table_autoinc_unlock(prebuilt->table);
-                *nb_reserved_values= 0;
-                return;
-        }
+	if (*first_value > col_max_value) {
+		/* Out of range number. Let handler::update_auto_increment()
+		take care of this */
+		m_prebuilt->autoinc_last_value = 0;
+		dict_table_autoinc_unlock(m_prebuilt->table);
+		*nb_reserved_values= 0;
+		return;
+	}
+
 	*nb_reserved_values = trx->n_autoinc_rows;
 
 	/* With old style AUTOINC locking we only update the table's
@@ -15770,23 +19351,24 @@ ha_innobase::get_auto_increment(
 
 		current = *first_value;
 
-		if (prebuilt->autoinc_increment != increment) {
+		if (m_prebuilt->autoinc_increment != increment) {
 
 			WSREP_DEBUG("autoinc decrease: %llu -> %llu\n"
 				    "THD: %ld, current: %llu, autoinc: %llu",
-				    prebuilt->autoinc_increment,
+				    m_prebuilt->autoinc_increment,
 				    increment,
 				    thd_get_thread_id(ha_thd()),
 				    current, autoinc);
-			if (!wsrep_on(ha_thd()))
-			{
-			current = autoinc - prebuilt->autoinc_increment;
+
+			if (!wsrep_on(ha_thd())) {
+				current = autoinc - m_prebuilt->autoinc_increment;
 			}
 
 			current = innobase_next_autoinc(
 				current, 1, increment, offset, col_max_value);
 
-			dict_table_autoinc_initialize(prebuilt->table, current);
+			dict_table_autoinc_initialize(
+				m_prebuilt->table, current);
 
 			*first_value = current;
 		}
@@ -15796,69 +19378,35 @@ ha_innobase::get_auto_increment(
 			current, *nb_reserved_values, increment, offset,
 			col_max_value);
 
-		prebuilt->autoinc_last_value = next_value;
+		m_prebuilt->autoinc_last_value = next_value;
 
-		if (prebuilt->autoinc_last_value < *first_value) {
+		if (m_prebuilt->autoinc_last_value < *first_value) {
 			*first_value = (~(ulonglong) 0);
 		} else {
 			/* Update the table autoinc variable */
 			dict_table_autoinc_update_if_greater(
-				prebuilt->table, prebuilt->autoinc_last_value);
+				m_prebuilt->table,
+				m_prebuilt->autoinc_last_value);
 		}
 	} else {
 		/* This will force write_row() into attempting an update
 		of the table's AUTOINC counter. */
-		prebuilt->autoinc_last_value = 0;
+		m_prebuilt->autoinc_last_value = 0;
 	}
 
 	/* The increment to be used to increase the AUTOINC value, we use
 	this in write_row() and update_row() to increase the autoinc counter
 	for columns that are filled by the user. We need the offset and
 	the increment. */
-	prebuilt->autoinc_offset = offset;
-	prebuilt->autoinc_increment = increment;
+	m_prebuilt->autoinc_offset = offset;
+	m_prebuilt->autoinc_increment = increment;
 
-	dict_table_autoinc_unlock(prebuilt->table);
-}
-
-/*******************************************************************//**
-Reset the auto-increment counter to the given value, i.e. the next row
-inserted will get the given value. This is called e.g. after TRUNCATE
-is emulated by doing a 'DELETE FROM t'. HA_ERR_WRONG_COMMAND is
-returned by storage engines that don't support this operation.
-@return	0 or error code */
-UNIV_INTERN
-int
-ha_innobase::reset_auto_increment(
-/*==============================*/
-	ulonglong	value)		/*!< in: new value for table autoinc */
-{
-	DBUG_ENTER("ha_innobase::reset_auto_increment");
-
-	dberr_t	error;
-
-	update_thd(ha_thd());
-
-	error = row_lock_table_autoinc_for_mysql(prebuilt);
-
-	if (error != DB_SUCCESS) {
-		DBUG_RETURN(convert_error_code_to_mysql(
-				    error, prebuilt->table->flags, user_thd));
-	}
-
-	/* The next value can never be 0. */
-	if (value == 0) {
-		value = 1;
-	}
-
-	innobase_reset_autoinc(value);
-
-	DBUG_RETURN(0);
+	dict_table_autoinc_unlock(m_prebuilt->table);
 }
 
 /*******************************************************************//**
 See comment in handler.cc */
-UNIV_INTERN
+
 bool
 ha_innobase::get_error_message(
 /*===========================*/
@@ -15878,24 +19426,21 @@ ha_innobase::get_error_message(
 	return(FALSE);
 }
 
-/*******************************************************************//**
-  Retrieves the names of the table and the key for which there was a
-  duplicate entry in the case of HA_ERR_FOREIGN_DUPLICATE_KEY.
+/** Retrieves the names of the table and the key for which there was a
+duplicate entry in the case of HA_ERR_FOREIGN_DUPLICATE_KEY.
 
-  If any of the names is not available, then this method will return
-  false and will not change any of child_table_name or child_key_name.
+If any of the names is not available, then this method will return
+false and will not change any of child_table_name or child_key_name.
 
-  @param child_table_name[out]    Table name
-  @param child_table_name_len[in] Table name buffer size
-  @param child_key_name[out]      Key name
-  @param child_key_name_len[in]   Key name buffer size
+@param[out] child_table_name Table name
+@param[in] child_table_name_len Table name buffer size
+@param[out] child_key_name Key name
+@param[in] child_key_name_len Key name buffer size
 
-  @retval  true                  table and key names were available
-                                 and were written into the corresponding
-                                 out parameters.
-  @retval  false                 table and key names were not available,
-                                 the out parameters were not touched.
-*/
+@retval true table and key names were available and were written into the
+corresponding out parameters.
+@retval false table and key names were not available, the out parameters
+were not touched. */
 bool
 ha_innobase::get_foreign_dup_key(
 /*=============================*/
@@ -15906,10 +19451,10 @@ ha_innobase::get_foreign_dup_key(
 {
 	const dict_index_t*	err_index;
 
-	ut_a(prebuilt->trx != NULL);
-	ut_a(prebuilt->trx->magic_n == TRX_MAGIC_N);
+	ut_a(m_prebuilt->trx != NULL);
+	ut_a(m_prebuilt->trx->magic_n == TRX_MAGIC_N);
 
-	err_index = trx_get_error_info(prebuilt->trx);
+	err_index = trx_get_error_info(m_prebuilt->trx);
 
 	if (err_index == NULL) {
 		return(false);
@@ -15918,20 +19463,24 @@ ha_innobase::get_foreign_dup_key(
 
 	/* copy table name (and convert from filename-safe encoding to
 	system_charset_info) */
-	char*	p;
-	p = strchr(err_index->table->name, '/');
+	char*	p = strchr(err_index->table->name.m_name, '/');
+
 	/* strip ".../" prefix if any */
 	if (p != NULL) {
 		p++;
 	} else {
-		p = err_index->table->name;
+		p = err_index->table->name.m_name;
 	}
-	uint	len;
+
+	size_t	len;
+
 	len = filename_to_tablename(p, child_table_name, child_table_name_len);
+
 	child_table_name[len] = '\0';
 
 	/* copy index name */
-	ut_snprintf(child_key_name, child_key_name_len, "%s", err_index->name);
+	ut_snprintf(child_key_name, child_key_name_len, "%s",
+		    err_index->name());
 
 	return(true);
 }
@@ -15940,8 +19489,8 @@ ha_innobase::get_foreign_dup_key(
 Compares two 'refs'. A 'ref' is the (internal) primary key value of the row.
 If there is no explicitly declared non-null unique key or a primary key, then
 InnoDB internally uses the row id as the primary key.
-@return	< 0 if ref1 < ref2, 0 if equal, else > 0 */
-UNIV_INTERN
+@return < 0 if ref1 < ref2, 0 if equal, else > 0 */
+
 int
 ha_innobase::cmp_ref(
 /*=================*/
@@ -15958,7 +19507,7 @@ ha_innobase::cmp_ref(
 	uint		len2;
 	int		result;
 
-	if (prebuilt->clust_index_was_generated) {
+	if (m_prebuilt->clust_index_was_generated) {
 		/* The 'ref' is an InnoDB row id */
 
 		return(memcmp(ref1, ref2, DATA_ROW_ID_LEN));
@@ -15970,7 +19519,7 @@ ha_innobase::cmp_ref(
 	key_part = table->key_info[table->s->primary_key].key_part;
 
 	key_part_end = key_part
-			+ table->key_info[table->s->primary_key].user_defined_key_parts;
+		+ table->key_info[table->s->primary_key].user_defined_key_parts;
 
 	for (; key_part != key_part_end; ++key_part) {
 		field = key_part->field;
@@ -16007,8 +19556,8 @@ ha_innobase::cmp_ref(
 
 /*******************************************************************//**
 Ask InnoDB if a query to a table can be cached.
-@return	TRUE if query caching of the table is permitted */
-UNIV_INTERN
+@return TRUE if query caching of the table is permitted */
+
 my_bool
 ha_innobase::register_query_cache_table(
 /*====================================*/
@@ -16023,34 +19572,13 @@ ha_innobase::register_query_cache_table(
 					is permitted */
 	ulonglong	*engine_data)	/*!< in/out: data to call_back */
 {
-	*call_back = innobase_query_caching_of_table_permitted;
 	*engine_data = 0;
-	return(innobase_query_caching_of_table_permitted(thd, table_key,
-							 key_length,
-							 engine_data));
-}
+	*call_back = innobase_query_caching_of_table_permitted;
 
-/*******************************************************************//**
-Get the bin log name. */
-UNIV_INTERN
-const char*
-ha_innobase::get_mysql_bin_log_name()
-/*=================================*/
-{
-	return(trx_sys_mysql_bin_log_name);
-}
-
-/*******************************************************************//**
-Get the bin log offset (or file position). */
-UNIV_INTERN
-ulonglong
-ha_innobase::get_mysql_bin_log_pos()
-/*================================*/
-{
-	/* trx... is ib_int64_t, which is a typedef for a 64-bit integer
-	(__int64 or longlong) so it's ok to cast it to ulonglong. */
-
-	return(trx_sys_mysql_bin_log_pos);
+	return(innobase_query_caching_of_table_permitted(
+			thd, table_key,
+			static_cast<uint>(key_length),
+			engine_data));
 }
 
 /******************************************************************//**
@@ -16058,8 +19586,7 @@ This function is used to find the storage length in bytes of the first n
 characters for prefix indexes using a multibyte character set. The function
 finds charset information and returns length of prefix_len characters in the
 index field in bytes.
-@return	number of bytes occupied by the first n characters */
-UNIV_INTERN
+@return number of bytes occupied by the first n characters */
 ulint
 innobase_get_at_most_n_mbchars(
 /*===========================*/
@@ -16112,12 +19639,13 @@ innobase_get_at_most_n_mbchars(
 		if (char_length > data_len) {
 			char_length = data_len;
 		}
+	} else if (data_len < prefix_len) {
+
+		char_length = data_len;
+
 	} else {
-		if (data_len < prefix_len) {
-			char_length = data_len;
-		} else {
-			char_length = prefix_len;
-		}
+
+		char_length = prefix_len;
 	}
 
 	return(char_length);
@@ -16125,7 +19653,7 @@ innobase_get_at_most_n_mbchars(
 
 /*******************************************************************//**
 This function is used to prepare an X/Open XA distributed transaction.
-@return	0 or error number */
+@return 0 or error number */
 static
 int
 innobase_xa_prepare(
@@ -16138,23 +19666,11 @@ innobase_xa_prepare(
 					false - the current SQL statement
 					ended */
 {
-	int		error = 0;
 	trx_t*		trx = check_trx_exists(thd);
 
 	DBUG_ASSERT(hton == innodb_hton_ptr);
 
-	/* we use support_xa value as it was seen at transaction start
-	time, not the current session variable value. Any possible changes
-	to the session variable take effect only in the next transaction */
-	if (!trx->support_xa) {
-
-#ifdef WITH_WSREP
-                thd_get_xid(thd, (MYSQL_XID*) &trx->xid);
-#endif // WITH_WSREP
-		return(0);
-	}
-
-	thd_get_xid(thd, (MYSQL_XID*) &trx->xid);
+	thd_get_xid(thd, (MYSQL_XID*) trx->xid);
 
 	/* Release a possible FIFO ticket and search latch. Since we will
 	reserve the trx_sys->mutex, we have to release the search system
@@ -16164,10 +19680,20 @@ innobase_xa_prepare(
 
 	innobase_srv_conc_force_exit_innodb(trx);
 
+	TrxInInnoDB	trx_in_innodb(trx);
+
+	if (trx_in_innodb.is_aborted()) {
+
+		innobase_rollback(hton, thd, prepare_trx);
+
+		return(convert_error_code_to_mysql(
+			DB_FORCED_ABORT, 0, thd));
+	}
+
 	if (!trx_is_registered_for_2pc(trx) && trx_is_started(trx)) {
 
-		sql_print_error("Transaction not registered for MySQL 2PC, "
-				"but transaction is active");
+		sql_print_error("Transaction not registered for MySQL 2PC,"
+				" but transaction is active");
 	}
 
 	if (prepare_trx
@@ -16178,9 +19704,18 @@ innobase_xa_prepare(
 
 		ut_ad(trx_is_registered_for_2pc(trx));
 
-		trx_prepare_for_mysql(trx);
+		dberr_t	err = trx_prepare_for_mysql(trx);
+
+		ut_ad(err == DB_SUCCESS || err == DB_FORCED_ABORT);
+
+		if (err == DB_FORCED_ABORT) {
+
+			innobase_rollback(hton, thd, prepare_trx);
+
+			return(convert_error_code_to_mysql(
+				DB_FORCED_ABORT, 0, thd));
+		}
 
-		error = 0;
 	} else {
 		/* We just mark the SQL statement ended and do not do a
 		transaction prepare */
@@ -16197,12 +19732,30 @@ innobase_xa_prepare(
 		trx_mark_sql_stat_end(trx);
 	}
 
-	return(error);
+	if (thd_sql_command(thd) != SQLCOM_XA_PREPARE
+	    && (prepare_trx
+		|| !thd_test_options(
+			thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
+
+		/* For mysqlbackup to work the order of transactions in binlog
+		and InnoDB must be the same. Consider the situation
+
+		  thread1> prepare; write to binlog; ...
+			  <context switch>
+		  thread2> prepare; write to binlog; commit
+		  thread1>			     ... commit
+
+		The server guarantees that writes to the binary log
+		and commits are in the same order, so we do not have
+		to handle this case. */
+	}
+
+	return(0);
 }
 
 /*******************************************************************//**
 This function is used to recover X/Open XA distributed transactions.
-@return	number of prepared transactions stored in xid_list */
+@return number of prepared transactions stored in xid_list */
 static
 int
 innobase_xa_recover(
@@ -16224,7 +19777,7 @@ innobase_xa_recover(
 /*******************************************************************//**
 This function is used to commit one X/Open XA distributed transaction
 which is in the prepared state
-@return	0 or error number */
+@return 0 or error number */
 static
 int
 innobase_commit_by_xid(
@@ -16232,15 +19785,20 @@ innobase_commit_by_xid(
 	handlerton*	hton,
 	XID*		xid)	/*!< in: X/Open XA transaction identification */
 {
-	trx_t*	trx;
-
 	DBUG_ASSERT(hton == innodb_hton_ptr);
 
-	trx = trx_get_trx_by_xid(xid);
+	trx_t*	trx = trx_get_trx_by_xid(xid);
+
+	if (trx != NULL) {
+		TrxInInnoDB	trx_in_innodb(trx);
 
-	if (trx) {
 		innobase_commit_low(trx);
+		ut_ad(trx->mysql_thd == NULL);
+		/* use cases are: disconnected xa, slave xa, recovery */
+		trx_deregister_from_2pc(trx);
+		ut_ad(!trx->will_lock);    /* trx cache requirement */
 		trx_free_for_background(trx);
+
 		return(XA_OK);
 	} else {
 		return(XAER_NOTA);
@@ -16250,7 +19808,7 @@ innobase_commit_by_xid(
 /*******************************************************************//**
 This function is used to rollback one X/Open XA distributed transaction
 which is in the prepared state
-@return	0 or error number */
+@return 0 or error number */
 static
 int
 innobase_rollback_by_xid(
@@ -16259,21 +19817,27 @@ innobase_rollback_by_xid(
 	XID*		xid)	/*!< in: X/Open XA transaction
 				identification */
 {
-	trx_t*	trx;
-
 	DBUG_ASSERT(hton == innodb_hton_ptr);
 
-	trx = trx_get_trx_by_xid(xid);
+	trx_t*	trx = trx_get_trx_by_xid(xid);
+
+	if (trx != NULL) {
+		TrxInInnoDB	trx_in_innodb(trx);
 
-	if (trx) {
 		int	ret = innobase_rollback_trx(trx);
+
+		trx_deregister_from_2pc(trx);
+		ut_ad(!trx->will_lock);
 		trx_free_for_background(trx);
+
 		return(ret);
 	} else {
 		return(XAER_NOTA);
 	}
 }
 
+#ifdef INNOBASE_CURSOR_VIEW
+
 /*******************************************************************//**
 Create a consistent view for a cursor based on current transaction
 which is created if the corresponding MySQL thread still lacks one.
@@ -16328,10 +19892,8 @@ innobase_set_cursor_view(
 	read_cursor_set_for_mysql(check_trx_exists(thd),
 				  (cursor_view_t*) curview);
 }
+#endif /* INNOBASE_CURSOR_VIEW */
 
-/*******************************************************************//**
-*/
-UNIV_INTERN
 bool
 ha_innobase::check_if_incompatible_data(
 /*====================================*/
@@ -16344,7 +19906,7 @@ ha_innobase::check_if_incompatible_data(
 	param_new = info->option_struct;
 	param_old = table->s->option_struct;
 
-	innobase_copy_frm_flags_from_create_info(prebuilt->table, info);
+	innobase_copy_frm_flags_from_create_info(m_prebuilt->table, info);
 
 	if (table_changes != IS_EQUAL_YES) {
 
@@ -16352,8 +19914,8 @@ ha_innobase::check_if_incompatible_data(
 	}
 
 	/* Check that auto_increment value was not changed */
-	if ((info->used_fields & HA_CREATE_USED_AUTO) &&
-		info->auto_increment_value != 0) {
+	if ((info->used_fields & HA_CREATE_USED_AUTO)
+	    && info->auto_increment_value != 0) {
 
 		return(COMPATIBLE_DATA_NO);
 	}
@@ -16396,6 +19958,7 @@ innodb_io_capacity_max_update(
 						from check function */
 {
 	ulong	in_val = *static_cast<const ulong*>(save);
+
 	if (in_val < srv_io_capacity) {
 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
 				    ER_WRONG_ARGUMENTS,
@@ -16406,7 +19969,7 @@ innodb_io_capacity_max_update(
 		srv_io_capacity = in_val;
 
 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
-				    ER_WRONG_ARGUMENTS,
+					    ER_WRONG_ARGUMENTS,
 				    "Setting innodb_io_capacity to %lu",
 				    srv_io_capacity);
 	}
@@ -16435,8 +19998,8 @@ innodb_io_capacity_update(
 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
 				    ER_WRONG_ARGUMENTS,
 				    "Setting innodb_io_capacity to %lu"
-			" higher than innodb_io_capacity_max %lu",
-			in_val, srv_max_io_capacity);
+				    " higher than innodb_io_capacity_max %lu",
+				    in_val, srv_max_io_capacity);
 
 		srv_max_io_capacity = in_val * 2;
 
@@ -16516,16 +20079,9 @@ innodb_max_dirty_pages_pct_lwm_update(
 	srv_max_dirty_pages_pct_lwm = in_val;
 }
 
-UNIV_INTERN
-void
-ha_innobase::set_partition_owner_stats(ha_statistics *stats)
-{
-	ha_partition_stats= stats;
-}
-
 /************************************************************//**
 Validate the file format name and return its corresponding id.
-@return	valid file format id */
+@return valid file format id */
 static
 uint
 innobase_file_format_name_lookup(
@@ -16592,7 +20148,7 @@ innobase_file_format_validate_and_set(
 /*************************************************************//**
 Check if it is a valid file format. This function is registered as
 a callback with MySQL.
-@return	0 for valid file format */
+@return 0 for valid file format */
 static
 int
 innodb_file_format_name_validate(
@@ -16654,6 +20210,10 @@ innodb_file_format_name_update(
 	ut_a(var_ptr != NULL);
 	ut_a(save != NULL);
 
+
+	push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
+		     HA_ERR_WRONG_COMMAND, deprecated_file_format);
+
 	format_name = *static_cast<const char*const*>(save);
 
 	if (format_name) {
@@ -16673,7 +20233,7 @@ innodb_file_format_name_update(
 /*************************************************************//**
 Check if valid argument to innodb_file_format_max. This function
 is registered as a callback with MySQL.
-@return	0 for valid file format */
+@return 0 for valid file format */
 static
 int
 innodb_file_format_max_validate(
@@ -16713,9 +20273,9 @@ innodb_file_format_max_validate(
 			push_warning_printf(thd,
 			  Sql_condition::WARN_LEVEL_WARN,
 			  ER_WRONG_ARGUMENTS,
-			  "InnoDB: invalid innodb_file_format_max "
-			  "value; can be any format up to %s "
-			  "or equivalent id of %d",
+			  "InnoDB: invalid innodb_file_format_max"
+			  " value; can be any format up to %s"
+			  " or equivalent id of %d",
 			  trx_sys_file_format_id_to_name(UNIV_FORMAT_MAX),
 			  UNIV_FORMAT_MAX);
 		}
@@ -16747,6 +20307,10 @@ innodb_file_format_max_update(
 	ut_a(save != NULL);
 	ut_a(var_ptr != NULL);
 
+
+	push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
+		     HA_ERR_WRONG_COMMAND, deprecated_file_format_max);
+
 	format_name_in = *static_cast<const char*const*>(save);
 
 	if (!format_name_in) {
@@ -16769,13 +20333,29 @@ innodb_file_format_max_update(
 
 	/* Update the max format id in the system tablespace. */
 	if (trx_sys_file_format_max_set(format_id, format_name_out)) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" [Info] InnoDB: the file format in the system "
-			"tablespace is now set to %s.\n", *format_name_out);
+		ib::info() << "The file format in the system tablespace is now"
+			" set to " << *format_name_out << ".";
 	}
 }
 
+/** Update innodb_large_prefix.
+@param[in,out]	thd	MySQL client connection
+@param[out]	var_ptr	current value
+@param[in]	save	to-be-assigned value */
+static
+void
+innodb_large_prefix_update(
+	THD*		thd,
+	st_mysql_sys_var*,
+	void*		var_ptr,
+	const void*	save)
+{
+	push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
+		     HA_ERR_WRONG_COMMAND, deprecated_large_prefix);
+
+	*static_cast<my_bool*>(var_ptr) = *static_cast<const my_bool*>(save);
+}
+
 /*************************************************************//**
 Check whether valid argument given to innobase_*_stopword_table.
 This function is registered as a callback with MySQL.
@@ -16819,6 +20399,32 @@ innodb_stopword_table_validate(
 	return(ret);
 }
 
+/** Update the system variable innodb_buffer_pool_size using the "saved"
+value. This function is registered as a callback with MySQL.
+@param[in]	thd	thread handle
+@param[in]	var	pointer to system variable
+@param[out]	var_ptr	where the formal string goes
+@param[in]	save	immediate result from check function */
+static
+void
+innodb_buffer_pool_size_update(
+	THD*				thd,
+	struct st_mysql_sys_var*	var,
+	void*				var_ptr,
+	const void*			save)
+{
+        longlong	in_val = *static_cast<const longlong*>(save);
+
+	ut_snprintf(export_vars.innodb_buffer_pool_resize_status,
+	        sizeof(export_vars.innodb_buffer_pool_resize_status),
+		"Requested to resize buffer pool.");
+
+	os_event_set(srv_buf_resize_event);
+
+	ib::info() << export_vars.innodb_buffer_pool_resize_status
+		<< " (new size: " << in_val << " bytes)";
+}
+
 /*************************************************************//**
 Check whether valid argument given to "innodb_fts_internal_tbl_name"
 This function is registered as a callback with MySQL.
@@ -16871,44 +20477,6 @@ innodb_internal_table_validate(
 	return(ret);
 }
 
-/****************************************************************//**
-Update global variable "fts_internal_tbl_name" with the "saved"
-stopword table name value. This function is registered as a callback
-with MySQL. */
-static
-void
-innodb_internal_table_update(
-/*=========================*/
-	THD*				thd,	/*!< in: thread handle */
-	struct st_mysql_sys_var*	var,	/*!< in: pointer to
-						system variable */
-	void*				var_ptr,/*!< out: where the
-						formal string goes */
-	const void*			save)	/*!< in: immediate result
-						from check function */
-{
-	const char*	table_name;
-	char*		old;
-
-	ut_a(save != NULL);
-	ut_a(var_ptr != NULL);
-
-	table_name = *static_cast<const char*const*>(save);
-	old = *(char**) var_ptr;
-
-	if (table_name) {
-		*(char**) var_ptr =  my_strdup(table_name,  MYF(0));
-	} else {
-		*(char**) var_ptr = NULL;
-	}
-
-	if (old) {
-		my_free(old);
-	}
-
-	fts_internal_tbl_name = *(char**) var_ptr;
-}
-
 /****************************************************************//**
 Update the system variable innodb_adaptive_hash_index using the "saved"
 value. This function is registered as a callback with MySQL. */
@@ -16927,7 +20495,7 @@ innodb_adaptive_hash_index_update(
 	if (*(my_bool*) save) {
 		btr_search_enable();
 	} else {
-		btr_search_disable();
+		btr_search_disable(true);
 	}
 }
 
@@ -16990,9 +20558,9 @@ innodb_change_buffer_max_size_update(
 	const void*			save)	/*!< in: immediate result
 						from check function */
 {
-	innobase_change_buffer_max_size =
+	srv_change_buffer_max_size =
 			(*static_cast<const uint*>(save));
-	ibuf_max_size_update(innobase_change_buffer_max_size);
+	ibuf_max_size_update(srv_change_buffer_max_size);
 }
 
 #ifdef UNIV_DEBUG
@@ -17015,9 +20583,8 @@ innodb_save_page_no(
 {
 	srv_saved_page_number_debug = *static_cast<const ulong*>(save);
 
-	ib_logf(IB_LOG_LEVEL_INFO,
-		"Saving InnoDB page number: %lu",
-		srv_saved_page_number_debug);
+	ib::info() << "Saving InnoDB page number: "
+		<< srv_saved_page_number_debug;
 }
 
 /****************************************************************//**
@@ -17034,33 +20601,45 @@ innodb_make_page_dirty(
 	const void*			save)	/*!< in: immediate result
 						from check function */
 {
-	mtr_t mtr;
-	ulong space_id = *static_cast<const ulong*>(save);
+	mtr_t		mtr;
+	ulong		space_id = *static_cast<const ulong*>(save);
+	fil_space_t*	space = fil_space_acquire_silent(space_id);
 
-	mtr_start(&mtr);
+	if (space == NULL) {
+		return;
+	}
 
-	buf_block_t* block = buf_page_get(
-		space_id, 0, srv_saved_page_number_debug, RW_X_LATCH, &mtr);
+	if (srv_saved_page_number_debug > space->size) {
+		fil_space_release(space);
+		return;
+	}
+
+	mtr.start();
+	mtr.set_named_space(space);
+
+	buf_block_t*	block = buf_page_get(
+		page_id_t(space_id, srv_saved_page_number_debug),
+		page_size_t(space->flags), RW_X_LATCH, &mtr);
+
+	if (block != NULL) {
+		byte*	page = block->frame;
+
+		ib::info() << "Dirtying page: " << page_id_t(
+			page_get_space_id(page), page_get_page_no(page));
 
-	if (block) {
-		byte* page = block->frame;
-		ib_logf(IB_LOG_LEVEL_INFO,
-			"Dirtying page:%lu of space:%lu",
-			page_get_page_no(page),
-			page_get_space_id(page));
 		mlog_write_ulint(page + FIL_PAGE_TYPE,
 				 fil_page_get_type(page),
 				 MLOG_2BYTES, &mtr);
 	}
-	mtr_commit(&mtr);
+	mtr.commit();
+	fil_space_release(space);
 }
 #endif // UNIV_DEBUG
-
 /*************************************************************//**
 Find the corresponding ibuf_use_t value that indexes into
 innobase_change_buffering_values[] array for the input
 change buffering option name.
-@return	corresponding IBUF_USE_* value for the input variable
+@return corresponding IBUF_USE_* value for the input variable
 name, or IBUF_USE_COUNT if not able to find a match */
 static
 ibuf_use_t
@@ -17069,14 +20648,14 @@ innodb_find_change_buffering_value(
 	const char*	input_name)	/*!< in: input change buffering
 					option name */
 {
-	ulint	use;
+	for (ulint i = 0;
+	     i < UT_ARR_SIZE(innobase_change_buffering_values);
+	     ++i) {
 
-	for (use = 0; use < UT_ARR_SIZE(innobase_change_buffering_values);
-	     use++) {
 		/* found a match */
 		if (!innobase_strcasecmp(
-			input_name, innobase_change_buffering_values[use])) {
-			return((ibuf_use_t) use);
+			input_name, innobase_change_buffering_values[i])) {
+			return(static_cast<ibuf_use_t>(i));
 		}
 	}
 
@@ -17087,7 +20666,7 @@ innodb_find_change_buffering_value(
 /*************************************************************//**
 Check if it is a valid value of innodb_change_buffering. This function is
 registered as a callback with MySQL.
-@return	0 for valid innodb_change_buffering */
+@return 0 for valid innodb_change_buffering */
 static
 int
 innodb_change_buffering_validate(
@@ -17160,7 +20739,7 @@ innodb_change_buffering_update(
 
 /*************************************************************//**
 Just emit a warning that the usage of the variable is deprecated.
-@return	0 */
+@return 0 */
 static
 void
 innodb_stats_sample_pages_update(
@@ -17173,19 +20752,16 @@ innodb_stats_sample_pages_update(
 	const void*			save)	/*!< in: immediate result
 						from check function */
 {
-#define STATS_SAMPLE_PAGES_DEPRECATED_MSG \
-	"Using innodb_stats_sample_pages is deprecated and " \
-	"the variable may be removed in future releases. " \
-	"Please use innodb_stats_transient_sample_pages " \
-	"instead."
+
+	const char*	STATS_SAMPLE_PAGES_DEPRECATED_MSG =
+		"Using innodb_stats_sample_pages is deprecated and"
+		" the variable may be removed in future releases."
+		" Please use innodb_stats_transient_sample_pages instead.";
 
 	push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
 		     HA_ERR_WRONG_COMMAND, STATS_SAMPLE_PAGES_DEPRECATED_MSG);
 
-	ut_print_timestamp(stderr);
-	fprintf(stderr,
-		" InnoDB: Warning: %s\n",
-		STATS_SAMPLE_PAGES_DEPRECATED_MSG);
+	ib::warn() << STATS_SAMPLE_PAGES_DEPRECATED_MSG;
 
 	srv_stats_transient_sample_pages =
 		*static_cast<const unsigned long long*>(save);
@@ -17220,11 +20796,15 @@ innodb_monitor_set_option(
 		exisitng monitor counter (status variable),
 		make special processing to remember existing
 		counter value. */
-		if (monitor_info->monitor_type
-		    & MONITOR_EXISTING) {
+		if (monitor_info->monitor_type & MONITOR_EXISTING) {
 			srv_mon_process_existing_counter(
 				monitor_id, MONITOR_TURN_ON);
 		}
+
+		if (MONITOR_IS_ON(MONITOR_LATCHES)) {
+
+			mutex_monitor->enable();
+		}
 		break;
 
 	case MONITOR_TURN_OFF:
@@ -17235,14 +20815,25 @@ innodb_monitor_set_option(
 
 		MONITOR_OFF(monitor_id);
 		MONITOR_SET_OFF(monitor_id);
+
+		if (!MONITOR_IS_ON(MONITOR_LATCHES)) {
+
+			mutex_monitor->disable();
+		}
 		break;
 
 	case MONITOR_RESET_VALUE:
 		srv_mon_reset(monitor_id);
+
+		if (monitor_id == (MONITOR_LATCHES)) {
+
+			mutex_monitor->reset();
+		}
 		break;
 
 	case MONITOR_RESET_ALL_VALUE:
 		srv_mon_reset_all(monitor_id);
+		mutex_monitor->reset();
 		break;
 
 	default:
@@ -17315,7 +20906,7 @@ innodb_monitor_update_wildcard(
 /*************************************************************//**
 Given a configuration variable name, find corresponding monitor counter
 and return its monitor ID if found.
-@return	monitor ID if found, MONITOR_NO_MATCH if there is no match */
+@return monitor ID if found, MONITOR_NO_MATCH if there is no match */
 static
 ulint
 innodb_monitor_id_by_name_get(
@@ -17346,7 +20937,7 @@ innodb_monitor_id_by_name_get(
 /*************************************************************//**
 Validate that the passed in monitor name matches at least one
 monitor counter name with wildcard compare.
-@return	TRUE if at least one monitor name matches */
+@return TRUE if at least one monitor name matches */
 static
 ibool
 innodb_monitor_validate_wildcard_name(
@@ -17365,7 +20956,7 @@ innodb_monitor_validate_wildcard_name(
 /*************************************************************//**
 Validate the passed in monitor name, find and save the
 corresponding monitor name in the function parameter "save".
-@return	0 if monitor name is valid */
+@return 0 if monitor name is valid */
 static
 int
 innodb_monitor_valid_byname(
@@ -17428,7 +21019,7 @@ innodb_monitor_valid_byname(
 /*************************************************************//**
 Validate passed-in "value" is a valid monitor counter name.
 This function is registered as a callback with MySQL.
-@return	0 for valid name */
+@return 0 for valid name */
 static
 int
 innodb_monitor_validate(
@@ -17456,7 +21047,12 @@ innodb_monitor_validate(
 	by InnoDB, so we can access it in another callback
 	function innodb_monitor_update() and free it appropriately */
 	if (name) {
-		monitor_name = my_strdup(name, MYF(0));
+		/* JAN: TODO: MySQL 5.7 PSI
+		monitor_name = my_strdup(PSI_INSTRUMENT_ME,
+                                         name, MYF(0));
+		*/
+		monitor_name = my_strdup(
+					name, MYF(0));
 	} else {
 		return(1);
 	}
@@ -17524,14 +21120,14 @@ innodb_monitor_update(
 			push_warning_printf(
 				thd, Sql_condition::WARN_LEVEL_WARN,
 				ER_NO_DEFAULT,
-				"Default value is not defined for "
-				"this set option. Please specify "
-				"correct counter or module name.");
+				"Default value is not defined for"
+				" this set option. Please specify"
+				" correct counter or module name.");
 		} else {
 			sql_print_error(
-				"Default value is not defined for "
-				"this set option. Please specify "
-				"correct counter or module name.\n");
+				"Default value is not defined for"
+				" this set option. Please specify"
+				" correct counter or module name.\n");
 		}
 
 		if (var_ptr) {
@@ -17574,7 +21170,7 @@ exit:
 	been turned on, we will set err_monitor. Print related
 	information */
 	if (err_monitor) {
-		sql_print_warning("Monitor %s is already enabled.",
+		sql_print_warning("InnoDB: Monitor %s is already enabled.",
 				  srv_mon_get_name((monitor_id_t) err_monitor));
 	}
 
@@ -17585,13 +21181,13 @@ exit:
 	return;
 }
 
-#ifdef __WIN__
+#ifdef _WIN32
 /*************************************************************//**
 Validate if passed-in "value" is a valid value for
 innodb_buffer_pool_filename. On Windows, file names with colon (:)
 are not allowed.
 
-@return	0 for valid name */
+@return 0 for valid name */
 static
 int
 innodb_srv_buf_dump_filename_validate(
@@ -17603,16 +21199,15 @@ innodb_srv_buf_dump_filename_validate(
 						for update function */
 	struct st_mysql_value*		value)	/*!< in: incoming string */
 {
-	const char*	buf_name;
 	char		buff[OS_FILE_MAX_PATH];
-	int		len= sizeof(buff);
+	int		len = sizeof(buff);
 
 	ut_a(save != NULL);
 	ut_a(value != NULL);
 
-	buf_name = value->val_str(value, buff, &len);
+	const char*	buf_name = value->val_str(value, buff, &len);
 
-	if (buf_name) {
+	if (buf_name != NULL) {
 		if (is_filename_allowed(buf_name, len, FALSE)){
 			*static_cast<const char**>(save) = buf_name;
 			return(0);
@@ -17620,17 +21215,17 @@ innodb_srv_buf_dump_filename_validate(
 			push_warning_printf(thd,
 				Sql_condition::WARN_LEVEL_WARN,
 				ER_WRONG_ARGUMENTS,
-				"InnoDB: innodb_buffer_pool_filename "
-				"cannot have colon (:) in the file name.");
+				"InnoDB: innodb_buffer_pool_filename"
+				" cannot have colon (:) in the file name.");
 
 		}
 	}
 
 	return(1);
 }
-#else /* __WIN__ */
+#else /* _WIN32 */
 # define innodb_srv_buf_dump_filename_validate NULL
-#endif /* __WIN__ */
+#endif /* _WIN32 */
 
 #ifdef UNIV_DEBUG
 static char* srv_buffer_pool_evict;
@@ -17799,6 +21394,15 @@ innodb_defragment_frequency_update(
 		(ulonglong) (1000000.0 / srv_defragment_frequency));
 }
 
+static inline char *my_strtok_r(char *str, const char *delim, char **saveptr)
+{
+#if defined _WIN32
+	return strtok_s(str, delim, saveptr);
+#else
+	return strtok_r(str, delim, saveptr);
+#endif
+}
+
 /****************************************************************//**
 Parse and enable InnoDB monitor counters during server startup.
 User can list the monitor counters/groups to be enable by specifying
@@ -17820,9 +21424,9 @@ innodb_enable_monitor_at_startup(
 	and/or counter group name, and calling innodb_monitor_update()
 	if successfully updated. Please note that the "str" would be
 	changed by strtok_r() as it walks through it. */
-	for (char* option = strtok_r(str, sep, &last);
+	for (char* option = my_strtok_r(str, sep, &last);
 	     option;
-	     option = strtok_r(NULL, sep, &last)) {
+	     option = my_strtok_r(NULL, sep, &last)) {
 		ulint	ret;
 		char*	option_name;
 
@@ -17853,6 +21457,7 @@ show_innodb_vars(
 	innodb_export_status();
 	var->type = SHOW_ARRAY;
 	var->value = (char*) &innodb_status_variables;
+	//var->scope = SHOW_SCOPE_GLOBAL;
 
 	return(0);
 }
@@ -17863,7 +21468,6 @@ system default primary index name 'GEN_CLUST_INDEX'. If a name
 matches, this function pushes an warning message to the client,
 and returns true.
 @return true if the index name matches the reserved name */
-UNIV_INTERN
 bool
 innobase_index_name_is_reserved(
 /*============================*/
@@ -17884,10 +21488,10 @@ innobase_index_name_is_reserved(
 			push_warning_printf(thd,
 					    Sql_condition::WARN_LEVEL_WARN,
 					    ER_WRONG_NAME_FOR_INDEX,
-					    "Cannot Create Index with name "
-					    "'%s'. The name is reserved "
-					    "for the system default primary "
-					    "index.",
+					    "Cannot Create Index with name"
+					    " '%s'. The name is reserved"
+					    " for the system default primary"
+					    " index.",
 					    innobase_index_reserve_name);
 
 			my_error(ER_WRONG_NAME_FOR_INDEX, MYF(0),
@@ -17902,35 +21506,28 @@ innobase_index_name_is_reserved(
 
 /***********************************************************************
 Retrieve the FTS Relevance Ranking result for doc with doc_id
-of prebuilt->fts_doc_id
+of m_prebuilt->fts_doc_id
 @return the relevance ranking value */
-UNIV_INTERN
 float
 innobase_fts_retrieve_ranking(
 /*============================*/
 		FT_INFO * fts_hdl)	/*!< in: FTS handler */
 {
-	row_prebuilt_t*	ft_prebuilt;
 	fts_result_t*	result;
+	row_prebuilt_t*	ft_prebuilt;
 
-	result = ((NEW_FT_INFO*) fts_hdl)->ft_result;
+	result = reinterpret_cast<NEW_FT_INFO*>(fts_hdl)->ft_result;
 
-	ft_prebuilt = ((NEW_FT_INFO*) fts_hdl)->ft_prebuilt;
+	ft_prebuilt = reinterpret_cast<NEW_FT_INFO*>(fts_hdl)->ft_prebuilt;
 
-	if (ft_prebuilt->read_just_key) {
-		fts_ranking_t*  ranking =
-			rbt_value(fts_ranking_t, result->current);
-		return(ranking->rank);
-	}
+	fts_ranking_t*  ranking = rbt_value(fts_ranking_t, result->current);
+	ft_prebuilt->fts_doc_id= ranking->doc_id;
 
-	/* Retrieve the ranking value for doc_id with value of
-	prebuilt->fts_doc_id */
-	return(fts_retrieve_ranking(result, ft_prebuilt->fts_doc_id));
+	return(ranking->rank);
 }
 
 /***********************************************************************
 Free the memory for the FTS handler */
-UNIV_INTERN
 void
 innobase_fts_close_ranking(
 /*=======================*/
@@ -17938,7 +21535,7 @@ innobase_fts_close_ranking(
 {
 	fts_result_t*	result;
 
-	result = ((NEW_FT_INFO*) fts_hdl)->ft_result;
+	result = reinterpret_cast<NEW_FT_INFO*>(fts_hdl)->ft_result;
 
 	fts_query_free_result(result);
 
@@ -17949,9 +21546,8 @@ innobase_fts_close_ranking(
 
 /***********************************************************************
 Find and Retrieve the FTS Relevance Ranking result for doc with doc_id
-of prebuilt->fts_doc_id
+of m_prebuilt->fts_doc_id
 @return the relevance ranking value */
-UNIV_INTERN
 float
 innobase_fts_find_ranking(
 /*======================*/
@@ -17959,22 +21555,43 @@ innobase_fts_find_ranking(
 		uchar*		record,		/*!< in: Unused */
 		uint		len)		/*!< in: Unused */
 {
-	row_prebuilt_t*	ft_prebuilt;
 	fts_result_t*	result;
+	row_prebuilt_t*	ft_prebuilt;
 
-	ft_prebuilt = ((NEW_FT_INFO*) fts_hdl)->ft_prebuilt;
-	result = ((NEW_FT_INFO*) fts_hdl)->ft_result;
+	ft_prebuilt = reinterpret_cast<NEW_FT_INFO*>(fts_hdl)->ft_prebuilt;
+	result = reinterpret_cast<NEW_FT_INFO*>(fts_hdl)->ft_result;
 
 	/* Retrieve the ranking value for doc_id with value of
-	prebuilt->fts_doc_id */
+	m_prebuilt->fts_doc_id */
 	return(fts_retrieve_ranking(result, ft_prebuilt->fts_doc_id));
 }
 
 #ifdef UNIV_DEBUG
+static my_bool	innodb_background_drop_list_empty = TRUE;
 static my_bool	innodb_purge_run_now = TRUE;
 static my_bool	innodb_purge_stop_now = TRUE;
 static my_bool	innodb_log_checkpoint_now = TRUE;
 static my_bool	innodb_buf_flush_list_now = TRUE;
+static uint	innodb_merge_threshold_set_all_debug
+	= DICT_INDEX_MERGE_THRESHOLD_DEFAULT;
+
+/** Wait for the background drop list to become empty. */
+static
+void
+wait_background_drop_list_empty(
+	THD*				thd	/*!< in: thread handle */
+					MY_ATTRIBUTE((unused)),
+	struct st_mysql_sys_var*	var	/*!< in: pointer to system
+						variable */
+					MY_ATTRIBUTE((unused)),
+	void*				var_ptr	/*!< out: where the formal
+						string goes */
+					MY_ATTRIBUTE((unused)),
+	const void*			save)	/*!< in: immediate result from
+						check function */
+{
+	row_wait_for_background_drop_list_empty();
+}
 
 /****************************************************************//**
 Set the purge state to RUN. If purge is disabled then it
@@ -18040,12 +21657,20 @@ checkpoint_now_set(
 						check function */
 {
 	if (*(my_bool*) save) {
-		while (log_sys->last_checkpoint_lsn < log_sys->lsn) {
+		while (log_sys->last_checkpoint_lsn
+		       + SIZE_OF_MLOG_CHECKPOINT
+		       + (log_sys->append_on_checkpoint != NULL
+			  ? log_sys->append_on_checkpoint->size() : 0)
+		       < log_sys->lsn) {
 			log_make_checkpoint_at(LSN_MAX, TRUE);
-			fil_flush_file_spaces(FIL_LOG);
+			fil_flush_file_spaces(FIL_TYPE_LOG);
+		}
+
+		dberr_t err = fil_write_flushed_lsn(log_sys->lsn);
+
+		if (err != DB_SUCCESS) {
+			ib::warn() << "Checkpoint set failed " << err;
 		}
-		fil_write_flushed_lsn_to_data_files(log_sys->lsn, 0);
-		fil_flush_file_spaces(FIL_TABLESPACE);
 	}
 }
 
@@ -18067,10 +21692,29 @@ buf_flush_list_now_set(
 						check function */
 {
 	if (*(my_bool*) save) {
-		buf_flush_list(ULINT_MAX, LSN_MAX, NULL);
-		buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
+		buf_flush_sync_all_buf_pools();
 	}
 }
+
+/** Override current MERGE_THRESHOLD setting for all indexes at dictionary
+now.
+@param[in]	thd	thread handle
+@param[in]	var	pointer to system variable
+@param[out]	var_ptr	where the formal string goes
+@param[in]	save	immediate result from check function */
+static
+void
+innodb_merge_threshold_set_all_debug_update(
+	THD*				thd,
+	struct st_mysql_sys_var*	var,
+	void*				var_ptr,
+	const void*			save)
+{
+	innodb_merge_threshold_set_all_debug
+		= (*static_cast<const uint*>(save));
+	dict_set_merge_threshold_all_debug(
+		innodb_merge_threshold_set_all_debug);
+}
 #endif /* UNIV_DEBUG */
 
 /***********************************************************************
@@ -18101,17 +21745,19 @@ Find and Retrieve the FTS doc_id for the current result row
 ulonglong
 innobase_fts_retrieve_docid(
 /*========================*/
-		FT_INFO_EXT * fts_hdl)	/*!< in: FTS handler */
+	FT_INFO_EXT*	fts_hdl)	/*!< in: FTS handler */
 {
-	row_prebuilt_t* ft_prebuilt;
 	fts_result_t*	result;
+	row_prebuilt_t* ft_prebuilt;
 
-	ft_prebuilt = ((NEW_FT_INFO *)fts_hdl)->ft_prebuilt;
-	result = ((NEW_FT_INFO *)fts_hdl)->ft_result;
+	ft_prebuilt = reinterpret_cast<NEW_FT_INFO *>(fts_hdl)->ft_prebuilt;
+	result = reinterpret_cast<NEW_FT_INFO *>(fts_hdl)->ft_result;
 
 	if (ft_prebuilt->read_just_key) {
+
 		fts_ranking_t* ranking =
 			rbt_value(fts_ranking_t, result->current);
+
 		return(ranking->doc_id);
 	}
 
@@ -18124,9 +21770,9 @@ Find and retrieve the size of the current result
 ulonglong
 innobase_fts_count_matches(
 /*=======================*/
-	FT_INFO_EXT* fts_hdl)	/*!< in: FTS handler */
+	FT_INFO_EXT*	fts_hdl)	/*!< in: FTS handler */
 {
-	NEW_FT_INFO*	handle = (NEW_FT_INFO *) fts_hdl;
+	NEW_FT_INFO*	handle = reinterpret_cast<NEW_FT_INFO *>(fts_hdl);
 
 	if (handle->ft_result->rankings_by_id != 0) {
 		return rbt_size(handle->ft_result->rankings_by_id);
@@ -18182,13 +21828,13 @@ void
 buffer_pool_load_now(
 /*=================*/
 	THD*				thd	/*!< in: thread handle */
-					__attribute__((unused)),
+					MY_ATTRIBUTE((unused)),
 	struct st_mysql_sys_var*	var	/*!< in: pointer to system
 						variable */
-					__attribute__((unused)),
+					MY_ATTRIBUTE((unused)),
 	void*				var_ptr	/*!< out: where the formal
 						string goes */
-					__attribute__((unused)),
+					MY_ATTRIBUTE((unused)),
 	const void*			save)	/*!< in: immediate result from
 						check function */
 {
@@ -18205,13 +21851,13 @@ void
 buffer_pool_load_abort(
 /*===================*/
 	THD*				thd	/*!< in: thread handle */
-					__attribute__((unused)),
+					MY_ATTRIBUTE((unused)),
 	struct st_mysql_sys_var*	var	/*!< in: pointer to system
 						variable */
-					__attribute__((unused)),
+					MY_ATTRIBUTE((unused)),
 	void*				var_ptr	/*!< out: where the formal
 						string goes */
-					__attribute__((unused)),
+					MY_ATTRIBUTE((unused)),
 	const void*			save)	/*!< in: immediate result from
 						check function */
 {
@@ -18220,10 +21866,56 @@ buffer_pool_load_abort(
 	}
 }
 
+/****************************************************************//**
+Update the system variable innodb_log_write_ahead_size using the "saved"
+value. This function is registered as a callback with MySQL. */
+static
+void
+innodb_log_write_ahead_size_update(
+/*===============================*/
+	THD*				thd,	/*!< in: thread handle */
+	struct st_mysql_sys_var*	var,	/*!< in: pointer to
+						system variable */
+	void*				var_ptr,/*!< out: where the
+						formal string goes */
+	const void*			save)	/*!< in: immediate result
+						from check function */
+{
+	ulong	val = OS_FILE_LOG_BLOCK_SIZE;
+	ulong	in_val = *static_cast<const ulong*>(save);
+
+	while (val < in_val) {
+		val = val * 2;
+	}
+
+	if (val > UNIV_PAGE_SIZE) {
+		val = UNIV_PAGE_SIZE;
+		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+				    ER_WRONG_ARGUMENTS,
+				    "innodb_log_write_ahead_size cannot"
+				    " be set higher than innodb_page_size.");
+		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+				    ER_WRONG_ARGUMENTS,
+				    "Setting innodb_log_write_ahead_size"
+				    " to %lu",
+				    UNIV_PAGE_SIZE);
+	} else if (val != in_val) {
+		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+				    ER_WRONG_ARGUMENTS,
+				    "innodb_log_write_ahead_size should be"
+				    " set 2^n value and larger than 512.");
+		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+				    ER_WRONG_ARGUMENTS,
+				    "Setting innodb_log_write_ahead_size"
+				    " to %lu",
+				    val);
+	}
+
+	srv_log_write_ahead_size = val;
+}
+
 /** Update innodb_status_output or innodb_status_output_locks,
 which control InnoDB "status monitor" output to the error log.
-@param[in]	thd	thread handle
-@param[in]	var	system variable
 @param[out]	var_ptr	current value
 @param[in]	save	to-be-assigned value */
 static
@@ -18308,6 +22000,28 @@ innodb_encrypt_tables_update(
 	fil_crypt_set_encrypt_tables(*static_cast<const ulong*>(save));
 }
 
+/** Update the innodb_log_checksums parameter.
+@param[in]	thd	thread handle
+@param[in]	var	system variable
+@param[out]	var_ptr	current value
+@param[in]	save	immediate result from check function */
+static
+void
+innodb_log_checksums_update(
+	THD*				thd,
+	struct st_mysql_sys_var*	var,
+	void*				var_ptr,
+	const void*			save)
+{
+	my_bool	check = *static_cast<my_bool*>(var_ptr)
+		= *static_cast<const my_bool*>(save);
+
+	/* Make sure we are the only log user */
+	mutex_enter(&log_sys->mutex);
+	innodb_log_checksums_func_update(check);
+	mutex_exit(&log_sys->mutex);
+}
+
 static SHOW_VAR innodb_status_variables_export[]= {
 	{"Innodb", (char*) &show_innodb_vars, SHOW_FUNC},
 	{NullS, NullS, SHOW_LONG}
@@ -18318,7 +22032,10 @@ static struct st_mysql_storage_engine innobase_storage_engine=
 
 #ifdef WITH_WSREP
 void
-wsrep_abort_slave_trx(wsrep_seqno_t bf_seqno, wsrep_seqno_t victim_seqno)
+wsrep_abort_slave_trx(
+/*==================*/
+	wsrep_seqno_t bf_seqno,
+	wsrep_seqno_t victim_seqno)
 {
 	WSREP_ERROR("Trx %lld tries to abort slave trx %lld. This could be "
 		"caused by:\n\t"
@@ -18334,6 +22051,7 @@ This function is used to kill one transaction in BF. */
 UNIV_INTERN
 int
 wsrep_innobase_kill_one_trx(
+/*========================*/
 	void * const bf_thd_ptr,
 	const trx_t * const bf_trx,
 	trx_t *victim_trx,
@@ -18389,7 +22107,7 @@ wsrep_innobase_kill_one_trx(
 		DBUG_RETURN(0);
 	}
 
-	if(wsrep_thd_exec_mode(thd) != LOCAL_STATE) {
+	if (wsrep_thd_exec_mode(thd) != LOCAL_STATE) {
 		WSREP_DEBUG("withdraw for BF trx: %llu, state: %d",
 			    (longlong) victim_trx->id,
 		wsrep_thd_get_conflict_state(thd));
@@ -18470,10 +22188,12 @@ wsrep_innobase_kill_one_trx(
                             (ulonglong) victim_trx->id);
 
 		victim_trx->lock.was_chosen_as_deadlock_victim= TRUE;
+
 		if (victim_trx->lock.wait_lock) {
 			WSREP_DEBUG("victim has wait flag: %ld",
 				thd_get_thread_id(thd));
 			lock_t*  wait_lock = victim_trx->lock.wait_lock;
+
 			if (wait_lock) {
 				WSREP_DEBUG("canceling wait lock");
 				victim_trx->lock.was_chosen_as_deadlock_victim= TRUE;
@@ -18548,6 +22268,7 @@ wsrep_innobase_kill_one_trx(
 static
 int
 wsrep_abort_transaction(
+/*====================*/
 	handlerton* hton,
 	THD *bf_thd,
 	THD *victim_thd,
@@ -18583,32 +22304,44 @@ wsrep_abort_transaction(
 	DBUG_RETURN(-1);
 }
 
-static int innobase_wsrep_set_checkpoint(handlerton* hton, const XID* xid)
+static
+int
+innobase_wsrep_set_checkpoint(
+/*==========================*/
+	handlerton* hton,
+	const XID* xid)
 {
 	DBUG_ASSERT(hton == innodb_hton_ptr);
+
         if (wsrep_is_wsrep_xid(xid)) {
                 mtr_t mtr;
                 mtr_start(&mtr);
                 trx_sysf_t* sys_header = trx_sysf_get(&mtr);
                 trx_sys_update_wsrep_checkpoint(xid, sys_header, &mtr);
                 mtr_commit(&mtr);
-                innobase_flush_logs(hton);
+                innobase_flush_logs(hton, false);
                 return 0;
         } else {
                 return 1;
         }
 }
 
-static int innobase_wsrep_get_checkpoint(handlerton* hton, XID* xid)
+static
+int
+innobase_wsrep_get_checkpoint(
+/*==========================*/
+	handlerton* hton,
+	XID* xid)
 {
 	DBUG_ASSERT(hton == innodb_hton_ptr);
         trx_sys_read_wsrep_checkpoint(xid);
         return 0;
 }
 
-static void
+static
+void
 wsrep_fake_trx_id(
-/*==================*/
+/*==============*/
 	handlerton	*hton,
 	THD		*thd)	/*!< in: user thread handle */
 {
@@ -18625,34 +22358,39 @@ wsrep_fake_trx_id(
 
 static MYSQL_SYSVAR_ENUM(checksum_algorithm, srv_checksum_algorithm,
   PLUGIN_VAR_RQCMDARG,
-  "The algorithm InnoDB uses for page checksumming. Possible values are "
-  "CRC32 (hardware accelerated if the CPU supports it) "
-    "write crc32, allow any of the other checksums to match when reading; "
-  "STRICT_CRC32 "
-    "write crc32, do not allow other algorithms to match when reading; "
-  "INNODB "
-    "write a software calculated checksum, allow any other checksums "
-    "to match when reading; "
-  "STRICT_INNODB "
-    "write a software calculated checksum, do not allow other algorithms "
-    "to match when reading; "
-  "NONE "
-    "write a constant magic number, do not do any checksum verification "
-    "when reading (same as innodb_checksums=OFF); "
-  "STRICT_NONE "
-    "write a constant magic number, do not allow values other than that "
-    "magic number when reading; "
-  "Files updated when this option is set to crc32 or strict_crc32 will "
-  "not be readable by MySQL versions older than 5.6.3",
-  NULL, NULL, SRV_CHECKSUM_ALGORITHM_INNODB,
+  "The algorithm InnoDB uses for page checksumming. Possible values are"
+  " CRC32 (hardware accelerated if the CPU supports it)"
+    " write crc32, allow any of the other checksums to match when reading;"
+  " STRICT_CRC32"
+    " write crc32, do not allow other algorithms to match when reading;"
+  " INNODB"
+    " write a software calculated checksum, allow any other checksums"
+    " to match when reading;"
+  " STRICT_INNODB"
+    " write a software calculated checksum, do not allow other algorithms"
+    " to match when reading;"
+  " NONE"
+    " write a constant magic number, do not do any checksum verification"
+    " when reading (same as innodb_checksums=OFF);"
+  " STRICT_NONE"
+    " write a constant magic number, do not allow values other than that"
+    " magic number when reading;"
+  " Files updated when this option is set to crc32 or strict_crc32 will"
+  " not be readable by MySQL versions older than 5.6.3",
+  NULL, NULL, SRV_CHECKSUM_ALGORITHM_CRC32,
   &innodb_checksum_algorithm_typelib);
 
+static MYSQL_SYSVAR_BOOL(log_checksums, innodb_log_checksums,
+  PLUGIN_VAR_RQCMDARG,
+  "Whether to compute and require checksums for InnoDB redo log blocks",
+  NULL, innodb_log_checksums_update, TRUE);
+
 static MYSQL_SYSVAR_BOOL(checksums, innobase_use_checksums,
   PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
-  "DEPRECATED. Use innodb_checksum_algorithm=NONE instead of setting "
-  "this to OFF. "
-  "Enable InnoDB checksums validation (enabled by default). "
-  "Disable with --skip-innodb-checksums.",
+  "DEPRECATED. Use innodb_checksum_algorithm=NONE instead of setting"
+  " this to OFF."
+  " Enable InnoDB checksums validation (enabled by default)."
+  " Disable with --skip-innodb-checksums.",
   NULL, NULL, TRUE);
 
 static MYSQL_SYSVAR_STR(data_home_dir, innobase_data_home_dir,
@@ -18662,8 +22400,8 @@ static MYSQL_SYSVAR_STR(data_home_dir, innobase_data_home_dir,
 
 static MYSQL_SYSVAR_BOOL(doublewrite, innobase_use_doublewrite,
   PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
-  "Enable InnoDB doublewrite buffer (enabled by default). "
-  "Disable with --skip-innodb-doublewrite.",
+  "Enable InnoDB doublewrite buffer (enabled by default)."
+  " Disable with --skip-innodb-doublewrite.",
   NULL, NULL, TRUE);
 
 static MYSQL_SYSVAR_BOOL(use_atomic_writes, innobase_use_atomic_writes,
@@ -18700,6 +22438,12 @@ static MYSQL_SYSVAR_ULONG(idle_flush_pct,
   NULL, NULL, 100, 0, 100, 0);
 
 #ifdef UNIV_DEBUG
+static MYSQL_SYSVAR_BOOL(background_drop_list_empty,
+  innodb_background_drop_list_empty,
+  PLUGIN_VAR_OPCMDARG,
+  "Wait for the background drop list to become empty",
+  NULL, wait_background_drop_list_empty, FALSE);
+
 static MYSQL_SYSVAR_BOOL(purge_run_now, innodb_purge_run_now,
   PLUGIN_VAR_OPCMDARG,
   "Set purge state to RUN",
@@ -18719,6 +22463,14 @@ static MYSQL_SYSVAR_BOOL(buf_flush_list_now, innodb_buf_flush_list_now,
   PLUGIN_VAR_OPCMDARG,
   "Force dirty page flush now",
   NULL, buf_flush_list_now_set, FALSE);
+
+static MYSQL_SYSVAR_UINT(merge_threshold_set_all_debug,
+  innodb_merge_threshold_set_all_debug,
+  PLUGIN_VAR_RQCMDARG,
+  "Override current MERGE_THRESHOLD setting for all indexes at dictionary"
+  " cache by the specified value dynamically, at the time.",
+  NULL, innodb_merge_threshold_set_all_debug_update,
+  DICT_INDEX_MERGE_THRESHOLD_DEFAULT, 1, 50, 0);
 #endif /* UNIV_DEBUG */
 
 static MYSQL_SYSVAR_ULONG(purge_batch_size, srv_purge_batch_size,
@@ -18731,9 +22483,9 @@ static MYSQL_SYSVAR_ULONG(purge_batch_size, srv_purge_batch_size,
 
 static MYSQL_SYSVAR_ULONG(purge_threads, srv_n_purge_threads,
   PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
-  "Purge threads can be from 1 to 32. Default is 1.",
+  "Purge threads can be from 1 to 32. Default is 4.",
   NULL, NULL,
-  1,			/* Default setting */
+  4,			/* Default setting */
   1,			/* Minimum value */
   32, 0);		/* Maximum value */
 
@@ -18747,8 +22499,8 @@ static MYSQL_SYSVAR_ULONG(sync_array_size, srv_sync_array_size,
 
 static MYSQL_SYSVAR_ULONG(fast_shutdown, innobase_fast_shutdown,
   PLUGIN_VAR_OPCMDARG,
-  "Speeds up the shutdown process of the InnoDB storage engine. Possible "
-  "values are 0, 1 (faster) or 2 (fastest - crash-like).",
+  "Speeds up the shutdown process of the InnoDB storage engine. Possible"
+  " values are 0, 1 (faster) or 2 (fastest - crash-like).",
   NULL, NULL, 1, 0, 2, 0);
 
 static MYSQL_SYSVAR_BOOL(file_per_table, srv_file_per_table,
@@ -18760,7 +22512,7 @@ static MYSQL_SYSVAR_STR(file_format, innobase_file_format_name,
   PLUGIN_VAR_RQCMDARG,
   "File format to use for new tables in .ibd files.",
   innodb_file_format_name_validate,
-  innodb_file_format_name_update, "Antelope");
+  innodb_file_format_name_update, innodb_file_format_default);
 
 /* "innobase_file_format_check" decides whether we would continue
 booting the server if the file format stamped on the system
@@ -18781,7 +22533,7 @@ static MYSQL_SYSVAR_STR(file_format_max, innobase_file_format_max,
   PLUGIN_VAR_OPCMDARG,
   "The highest file format in the tablespace.",
   innodb_file_format_max_validate,
-  innodb_file_format_max_update, "Antelope");
+  innodb_file_format_max_update, innodb_file_format_max_default);
 
 static MYSQL_SYSVAR_STR(ft_server_stopword_table, innobase_server_stopword_table,
   PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_MEMALLOC,
@@ -18816,7 +22568,7 @@ static MYSQL_SYSVAR_STR(flush_method, innobase_file_flush_method,
 static MYSQL_SYSVAR_BOOL(large_prefix, innobase_large_prefix,
   PLUGIN_VAR_NOCMDARG,
   "Support large index prefix length of REC_VERSION_56_MAX_INDEX_COL_LEN (3072) bytes.",
-  NULL, NULL, FALSE);
+  NULL, innodb_large_prefix_update, TRUE);
 
 static MYSQL_SYSVAR_BOOL(force_load_corrupted, srv_load_corrupted,
   PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
@@ -18825,35 +22577,30 @@ static MYSQL_SYSVAR_BOOL(force_load_corrupted, srv_load_corrupted,
 
 static MYSQL_SYSVAR_BOOL(locks_unsafe_for_binlog, innobase_locks_unsafe_for_binlog,
   PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
-  "DEPRECATED. This option may be removed in future releases. "
-  "Please use READ COMMITTED transaction isolation level instead. "
-  "Force InnoDB to not use next-key locking, to use only row-level locking.",
+  "DEPRECATED. This option may be removed in future releases."
+  " Please use READ COMMITTED transaction isolation level instead."
+  " Force InnoDB to not use next-key locking, to use only row-level locking.",
   NULL, NULL, FALSE);
 
-#ifdef UNIV_LOG_ARCHIVE
-static MYSQL_SYSVAR_STR(log_arch_dir, innobase_log_arch_dir,
-  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
-  "Where full logs should be archived.", NULL, NULL, NULL);
-
-static MYSQL_SYSVAR_BOOL(log_archive, innobase_log_archive,
-  PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
-  "Set to 1 if you want to have logs archived.", NULL, NULL, FALSE);
-#endif /* UNIV_LOG_ARCHIVE */
-
 static MYSQL_SYSVAR_STR(log_group_home_dir, srv_log_group_home_dir,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
   "Path to InnoDB log files.", NULL, NULL, NULL);
 
+static MYSQL_SYSVAR_ULONG(page_cleaners, srv_n_page_cleaners,
+  PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
+  "Page cleaner threads can be from 1 to 64. Default is 4.",
+  NULL, NULL, 4, 1, 64, 0);
+
 static MYSQL_SYSVAR_DOUBLE(max_dirty_pages_pct, srv_max_buf_pool_modified_pct,
   PLUGIN_VAR_RQCMDARG,
   "Percentage of dirty pages allowed in bufferpool.",
-  NULL, innodb_max_dirty_pages_pct_update, 75.0, 0.001, 99.999, 0);
+  NULL, innodb_max_dirty_pages_pct_update, 75.0, 0, 99.999, 0);
 
 static MYSQL_SYSVAR_DOUBLE(max_dirty_pages_pct_lwm,
   srv_max_dirty_pages_pct_lwm,
   PLUGIN_VAR_RQCMDARG,
   "Percentage of dirty pages at which flushing kicks in.",
-  NULL, innodb_max_dirty_pages_pct_lwm_update, 0.001, 0.000, 99.999, 0);
+  NULL, innodb_max_dirty_pages_pct_lwm_update, 0, 0, 99.999, 0);
 
 static MYSQL_SYSVAR_DOUBLE(adaptive_flushing_lwm,
   srv_adaptive_flushing_lwm,
@@ -18866,6 +22613,11 @@ static MYSQL_SYSVAR_BOOL(adaptive_flushing, srv_adaptive_flushing,
   "Attempt flushing dirty pages to avoid IO bursts at checkpoints.",
   NULL, NULL, TRUE);
 
+static MYSQL_SYSVAR_BOOL(flush_sync, srv_flush_sync,
+  PLUGIN_VAR_NOCMDARG,
+  "Allow IO bursts at the checkpoints ignoring io_capacity setting.",
+  NULL, NULL, TRUE);
+
 static MYSQL_SYSVAR_ULONG(flushing_avg_loops,
   srv_flushing_avg_loops,
   PLUGIN_VAR_RQCMDARG,
@@ -18897,8 +22649,8 @@ static MYSQL_SYSVAR_BOOL(status_file, innobase_create_status_file,
 
 static MYSQL_SYSVAR_BOOL(stats_on_metadata, innobase_stats_on_metadata,
   PLUGIN_VAR_OPCMDARG,
-  "Enable statistics gathering for metadata commands such as "
-  "SHOW TABLE STATUS for tables that use transient statistics (off by default)",
+  "Enable statistics gathering for metadata commands such as"
+  " SHOW TABLE STATUS for tables that use transient statistics (off by default)",
   NULL, NULL, FALSE);
 
 static MYSQL_SYSVAR_ULONGLONG(stats_sample_pages, srv_stats_transient_sample_pages,
@@ -18909,29 +22661,29 @@ static MYSQL_SYSVAR_ULONGLONG(stats_sample_pages, srv_stats_transient_sample_pag
 static MYSQL_SYSVAR_ULONGLONG(stats_transient_sample_pages,
   srv_stats_transient_sample_pages,
   PLUGIN_VAR_RQCMDARG,
-  "The number of leaf index pages to sample when calculating transient "
-  "statistics (if persistent statistics are not used, default 8)",
+  "The number of leaf index pages to sample when calculating transient"
+  " statistics (if persistent statistics are not used, default 8)",
   NULL, NULL, 8, 1, ~0ULL, 0);
 
 static MYSQL_SYSVAR_BOOL(stats_persistent, srv_stats_persistent,
   PLUGIN_VAR_OPCMDARG,
-  "InnoDB persistent statistics enabled for all tables unless overridden "
-  "at table level",
+  "InnoDB persistent statistics enabled for all tables unless overridden"
+  " at table level",
   NULL, NULL, TRUE);
 
 static MYSQL_SYSVAR_BOOL(stats_auto_recalc, srv_stats_auto_recalc,
   PLUGIN_VAR_OPCMDARG,
-  "InnoDB automatic recalculation of persistent statistics enabled for all "
-  "tables unless overridden at table level (automatic recalculation is only "
-  "done when InnoDB decides that the table has changed too much and needs a "
-  "new statistics)",
+  "InnoDB automatic recalculation of persistent statistics enabled for all"
+  " tables unless overridden at table level (automatic recalculation is only"
+  " done when InnoDB decides that the table has changed too much and needs a"
+  " new statistics)",
   NULL, NULL, TRUE);
 
 static MYSQL_SYSVAR_ULONGLONG(stats_persistent_sample_pages,
   srv_stats_persistent_sample_pages,
   PLUGIN_VAR_RQCMDARG,
-  "The number of leaf index pages to sample when calculating persistent "
-  "statistics (by ANALYZE, default 20)",
+  "The number of leaf index pages to sample when calculating persistent"
+  " statistics (by ANALYZE, default 20)",
   NULL, NULL, 20, 1, ~0ULL, 0);
 
 static MYSQL_SYSVAR_ULONGLONG(stats_modified_counter, srv_stats_modified_counter,
@@ -18946,14 +22698,22 @@ static MYSQL_SYSVAR_BOOL(stats_traditional, srv_stats_sample_traditional,
 
 static MYSQL_SYSVAR_BOOL(adaptive_hash_index, btr_search_enabled,
   PLUGIN_VAR_OPCMDARG,
-  "Enable InnoDB adaptive hash index (enabled by default).  "
-  "Disable with --skip-innodb-adaptive-hash-index.",
-  NULL, innodb_adaptive_hash_index_update, TRUE);
+  "Enable InnoDB adaptive hash index (enabled by default). "
+  " Disable with --skip-innodb-adaptive-hash-index.",
+  NULL, innodb_adaptive_hash_index_update, true);
+
+/** Number of distinct partitions of AHI.
+Each partition is protected by its own latch and so we have parts number
+of latches protecting complete search system. */
+static MYSQL_SYSVAR_ULONG(adaptive_hash_index_parts, btr_ahi_parts,
+  PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
+  "Number of InnoDB Adapative Hash Index Partitions. (default = 8). ",
+  NULL, NULL, 8, 1, 512, 0);
 
 static MYSQL_SYSVAR_ULONG(replication_delay, srv_replication_delay,
   PLUGIN_VAR_RQCMDARG,
-  "Replication thread delay (ms) on the slave server if "
-  "innodb_thread_concurrency is reached (0 by default)",
+  "Replication thread delay (ms) on the slave server if"
+  " innodb_thread_concurrency is reached (0 by default)",
   NULL, NULL, 0, 0, ~0UL, 0);
 
 static MYSQL_SYSVAR_UINT(compression_level, page_zip_level,
@@ -18971,23 +22731,49 @@ static MYSQL_SYSVAR_BOOL(log_compressed_pages, page_zip_log_pages,
   " compression algorithm doesn't change.",
   NULL, NULL, FALSE);
 
-static MYSQL_SYSVAR_LONG(additional_mem_pool_size, innobase_additional_mem_pool_size,
-  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
-  "DEPRECATED. This option may be removed in future releases, "
-  "together with the option innodb_use_sys_malloc and with the InnoDB's "
-  "internal memory allocator. "
-  "Size of a memory pool InnoDB uses to store data dictionary information and other internal data structures.",
-  NULL, NULL, 8*1024*1024L, 512*1024L, LONG_MAX, 1024);
-
-static MYSQL_SYSVAR_ULONG(autoextend_increment, srv_auto_extend_increment,
+static MYSQL_SYSVAR_ULONG(autoextend_increment,
+  sys_tablespace_auto_extend_increment,
   PLUGIN_VAR_RQCMDARG,
   "Data file autoextend increment in megabytes",
   NULL, NULL, 64L, 1L, 1000L, 0);
 
+/** Validate the requested buffer pool size.  Also, reserve the necessary
+memory needed for buffer pool resize.
+@param[in]	thd	thread handle
+@param[in]	var	pointer to system variable
+@param[out]	save	immediate result for update function
+@param[in]	value	incoming string
+@return 0 on success, 1 on failure.
+*/
+static
+int
+innodb_buffer_pool_size_validate(
+	THD*				thd,
+	struct st_mysql_sys_var*	var,
+	void*				save,
+	struct st_mysql_value*		value);
+
+/* If the default value of innodb_buffer_pool_size is increased to be more than
+BUF_POOL_SIZE_THRESHOLD (srv/srv0start.cc), then srv_buf_pool_instances_default
+can be removed and 8 used instead. The problem with the current setup is that
+with 128MiB default buffer pool size and 8 instances by default we would emit
+a warning when no options are specified. */
 static MYSQL_SYSVAR_LONGLONG(buffer_pool_size, innobase_buffer_pool_size,
-  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+  PLUGIN_VAR_RQCMDARG,
   "The size of the memory buffer InnoDB uses to cache data and indexes of its tables.",
-  NULL, NULL, 128*1024*1024L, 5*1024*1024L, LONGLONG_MAX, 1024*1024L);
+  innodb_buffer_pool_size_validate,
+  innodb_buffer_pool_size_update,
+  static_cast<longlong>(srv_buf_pool_def_size),
+  static_cast<longlong>(srv_buf_pool_min_size),
+  LLONG_MAX, 1024*1024L);
+
+static MYSQL_SYSVAR_ULONG(buffer_pool_chunk_size, srv_buf_pool_chunk_unit,
+  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+  "Size of a single memory chunk within each buffer pool instance"
+  " for resizing buffer pool. Online buffer pool resizing happens"
+  " at this granularity. 0 means disable resizing buffer pool.",
+  NULL, NULL,
+  128 * 1024 * 1024, 1024 * 1024, LONG_MAX, 1024 * 1024);
 
 #if defined UNIV_DEBUG || defined UNIV_PERF_DEBUG
 static MYSQL_SYSVAR_ULONG(page_hash_locks, srv_n_page_hash_locks,
@@ -19001,10 +22787,10 @@ static MYSQL_SYSVAR_ULONG(doublewrite_batch_size, srv_doublewrite_batch_size,
   NULL, NULL, 120, 1, 127, 0);
 #endif /* defined UNIV_DEBUG || defined UNIV_PERF_DEBUG */
 
-static MYSQL_SYSVAR_LONG(buffer_pool_instances, innobase_buffer_pool_instances,
+static MYSQL_SYSVAR_ULONG(buffer_pool_instances, srv_buf_pool_instances,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
   "Number of buffer pool instances, set to higher value on high-end machines to increase scalability",
-  NULL, NULL, 0L, 0L, MAX_BUFFER_POOLS, 1L);
+  NULL, NULL, srv_buf_pool_instances_default, 0, MAX_BUFFER_POOLS, 0);
 
 static MYSQL_SYSVAR_STR(buffer_pool_filename, srv_buf_dump_filename,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_MEMALLOC,
@@ -19019,12 +22805,12 @@ static MYSQL_SYSVAR_BOOL(buffer_pool_dump_now, innodb_buffer_pool_dump_now,
 static MYSQL_SYSVAR_BOOL(buffer_pool_dump_at_shutdown, srv_buffer_pool_dump_at_shutdown,
   PLUGIN_VAR_RQCMDARG,
   "Dump the buffer pool into a file named @@innodb_buffer_pool_filename",
-  NULL, NULL, FALSE);
+  NULL, NULL, TRUE);
 
 static MYSQL_SYSVAR_ULONG(buffer_pool_dump_pct, srv_buf_pool_dump_pct,
   PLUGIN_VAR_RQCMDARG,
-  "Dump only the hottest N% of each buffer pool, defaults to 100",
-  NULL, NULL, 100, 1, 100, 0);
+  "Dump only the hottest N% of each buffer pool, defaults to 25",
+  NULL, NULL, 25, 1, 100, 0);
 
 #ifdef UNIV_DEBUG
 static MYSQL_SYSVAR_STR(buffer_pool_evict, srv_buffer_pool_evict,
@@ -19047,7 +22833,7 @@ static MYSQL_SYSVAR_BOOL(buffer_pool_load_abort, innodb_buffer_pool_load_abort,
 static MYSQL_SYSVAR_BOOL(buffer_pool_load_at_startup, srv_buffer_pool_load_at_startup,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
   "Load the buffer pool from a file named @@innodb_buffer_pool_filename",
-  NULL, NULL, FALSE);
+  NULL, NULL, TRUE);
 
 static MYSQL_SYSVAR_BOOL(defragment, srv_defragment,
   PLUGIN_VAR_RQCMDARG,
@@ -19126,10 +22912,10 @@ static MYSQL_SYSVAR_ULONG(concurrency_tickets, srv_n_free_tickets_to_enter,
   "Number of times a thread is allowed to enter InnoDB within the same SQL query after it has once got the ticket",
   NULL, NULL, 5000L, 1L, ~0UL, 0);
 
-static MYSQL_SYSVAR_LONG(file_io_threads, innobase_file_io_threads,
-  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY | PLUGIN_VAR_NOSYSVAR,
-  "Number of file I/O threads in InnoDB.",
-  NULL, NULL, 4, 4, 64, 0);
+static MYSQL_SYSVAR_LONG(fill_factor, innobase_fill_factor,
+  PLUGIN_VAR_RQCMDARG,
+  "Percentage of B-tree page filled during bulk insert",
+  NULL, NULL, 100, 10, 100, 0);
 
 static MYSQL_SYSVAR_BOOL(ft_enable_diag_print, fts_enable_diag_print,
   PLUGIN_VAR_OPCMDARG,
@@ -19142,10 +22928,10 @@ static MYSQL_SYSVAR_BOOL(disable_sort_file_cache, srv_disable_sort_file_cache,
   NULL, NULL, FALSE);
 
 static MYSQL_SYSVAR_STR(ft_aux_table, fts_internal_tbl_name,
-  PLUGIN_VAR_NOCMDARG,
+  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_MEMALLOC,
   "FTS internal auxiliary table to be checked",
   innodb_internal_table_validate,
-  innodb_internal_table_update, NULL);
+  NULL, NULL);
 
 static MYSQL_SYSVAR_ULONG(ft_cache_size, fts_max_cache_size,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
@@ -19172,7 +22958,6 @@ static MYSQL_SYSVAR_ULONG(ft_max_token_size, fts_max_token_size,
   "InnoDB Fulltext search maximum token size in characters",
   NULL, NULL, FTS_MAX_WORD_LEN_IN_CHAR, 10, FTS_MAX_WORD_LEN_IN_CHAR, 0);
 
-
 static MYSQL_SYSVAR_ULONG(ft_num_word_optimize, fts_num_word_optimize,
   PLUGIN_VAR_OPCMDARG,
   "InnoDB Fulltext search number of words to optimize for each optimize table call ",
@@ -19217,7 +23002,7 @@ static MYSQL_SYSVAR_ULONG(force_recovery, srv_force_recovery,
 static MYSQL_SYSVAR_ULONG(force_recovery_crash, srv_force_recovery_crash,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
   "Kills the server during crash recovery.",
-  NULL, NULL, 0, 0, 10, 0);
+  NULL, NULL, 0, 0, 100, 0);
 #endif /* !DBUG_OFF */
 
 static MYSQL_SYSVAR_ULONG(page_size, srv_page_size,
@@ -19234,19 +23019,19 @@ static MYSQL_SYSVAR_LONG(log_buffer_size, innobase_log_buffer_size,
 static MYSQL_SYSVAR_LONGLONG(log_file_size, innobase_log_file_size,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
   "Size of each log file in a log group.",
-  NULL, NULL, 48*1024*1024L, 1*1024*1024L, LONGLONG_MAX, 1024*1024L);
+  NULL, NULL, 48*1024*1024L, 4*1024*1024L, LLONG_MAX, 1024*1024L);
 
 static MYSQL_SYSVAR_ULONG(log_files_in_group, srv_n_log_files,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
   "Number of log files in the log group. InnoDB writes to the files in a circular fashion.",
   NULL, NULL, 2, 2, SRV_N_LOG_FILES_MAX, 0);
 
-/* Note that the default and minimum values are set to 0 to
-detect if the option is passed and print deprecation message */
-static MYSQL_SYSVAR_LONG(mirrored_log_groups, innobase_mirrored_log_groups,
-  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
-  "Number of identical copies of log groups we keep for the database. Currently this should be set to 1.",
-  NULL, NULL, 0, 0, 10, 0);
+static MYSQL_SYSVAR_ULONG(log_write_ahead_size, srv_log_write_ahead_size,
+  PLUGIN_VAR_RQCMDARG,
+  "Redo log write ahead unit size to avoid read-on-write,"
+  " it should match the OS cache block IO size",
+  NULL, innodb_log_write_ahead_size_update,
+  8*1024L, OS_FILE_LOG_BLOCK_SIZE, UNIV_PAGE_SIZE_DEF, OS_FILE_LOG_BLOCK_SIZE);
 
 static MYSQL_SYSVAR_UINT(old_blocks_pct, innobase_old_blocks_pct,
   PLUGIN_VAR_RQCMDARG,
@@ -19280,7 +23065,6 @@ static MYSQL_SYSVAR_ULONG(thread_concurrency, srv_thread_concurrency,
   "Helps in performance tuning in heavily concurrent environments. Sets the maximum number of threads allowed inside InnoDB. Value 0 will disable the thread throttling.",
   NULL, NULL, 0, 0, 1000, 0);
 
-#ifdef HAVE_ATOMIC_BUILTINS
 static MYSQL_SYSVAR_ULONG(
   adaptive_max_sleep_delay, srv_adaptive_max_sleep_delay,
   PLUGIN_VAR_RQCMDARG,
@@ -19289,7 +23073,6 @@ static MYSQL_SYSVAR_ULONG(
   150000,			/* Default setting */
   0,				/* Minimum value */
   1000000, 0);			/* Maximum value */
-#endif /* HAVE_ATOMIC_BUILTINS */
 
 static MYSQL_SYSVAR_BOOL(prefix_index_cluster_optimization,
   srv_prefix_index_cluster_optimization,
@@ -19299,8 +23082,8 @@ static MYSQL_SYSVAR_BOOL(prefix_index_cluster_optimization,
 
 static MYSQL_SYSVAR_ULONG(thread_sleep_delay, srv_thread_sleep_delay,
   PLUGIN_VAR_RQCMDARG,
-  "Time of innodb thread sleeping before joining InnoDB queue (usec). "
-  "Value 0 disable a sleep",
+  "Time of innodb thread sleeping before joining InnoDB queue (usec)."
+  " Value 0 disable a sleep",
   NULL, NULL,
   10000L,
   0L,
@@ -19311,10 +23094,15 @@ static MYSQL_SYSVAR_STR(data_file_path, innobase_data_file_path,
   "Path to individual files and their sizes.",
   NULL, NULL, NULL);
 
+static MYSQL_SYSVAR_STR(temp_data_file_path, innobase_temp_data_file_path,
+  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+  "Path to files and their sizes making temp-tablespace.",
+  NULL, NULL, NULL);
+
 static MYSQL_SYSVAR_STR(undo_directory, srv_undo_dir,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
   "Directory where undo tablespace files live, this path can be absolute.",
-  NULL, NULL, ".");
+  NULL, NULL, NULL);
 
 static MYSQL_SYSVAR_ULONG(undo_tablespaces, srv_undo_tablespaces,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
@@ -19322,7 +23110,7 @@ static MYSQL_SYSVAR_ULONG(undo_tablespaces, srv_undo_tablespaces,
   NULL, NULL,
   0L,			/* Default seting */
   0L,			/* Minimum value */
-  126L, 0);		/* Maximum value */
+  95L, 0);		/* Maximum value */
 
 static MYSQL_SYSVAR_ULONG(undo_logs, srv_undo_logs,
   PLUGIN_VAR_OPCMDARG,
@@ -19332,6 +23120,27 @@ static MYSQL_SYSVAR_ULONG(undo_logs, srv_undo_logs,
   1,			/* Minimum value */
   TRX_SYS_N_RSEGS, 0);	/* Maximum value */
 
+static MYSQL_SYSVAR_ULONGLONG(max_undo_log_size, srv_max_undo_log_size,
+  PLUGIN_VAR_OPCMDARG,
+  "Maximum size of UNDO tablespace in MB (If UNDO tablespace grows"
+  " beyond this size it will be truncated in due course). ",
+  NULL, NULL,
+  1024 * 1024 * 1024L,
+  10 * 1024 * 1024L,
+  ~0ULL, 0);
+
+static MYSQL_SYSVAR_ULONG(purge_rseg_truncate_frequency,
+  srv_purge_rseg_truncate_frequency,
+  PLUGIN_VAR_OPCMDARG,
+  "Dictates rate at which UNDO records are purged. Value N means"
+  " purge rollback segment(s) on every Nth iteration of purge invocation",
+  NULL, NULL, 128, 1, 128, 0);
+
+static MYSQL_SYSVAR_BOOL(undo_log_truncate, srv_undo_log_truncate,
+  PLUGIN_VAR_OPCMDARG,
+  "Enable or Disable Truncate of UNDO tablespace.",
+  NULL, NULL, FALSE);
+
 /* Alias for innodb_undo_logs, this config variable is deprecated. */
 static MYSQL_SYSVAR_ULONG(rollback_segments, srv_undo_logs,
   PLUGIN_VAR_OPCMDARG,
@@ -19343,11 +23152,10 @@ static MYSQL_SYSVAR_ULONG(rollback_segments, srv_undo_logs,
 
 static MYSQL_SYSVAR_LONG(autoinc_lock_mode, innobase_autoinc_lock_mode,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
-  "The AUTOINC lock modes supported by InnoDB:               "
-  "0 => Old style AUTOINC locking (for backward"
-  " compatibility)                                           "
-  "1 => New style AUTOINC locking                            "
-  "2 => No AUTOINC locking (unsafe for SBR)",
+  "The AUTOINC lock modes supported by InnoDB:"
+  " 0 => Old style AUTOINC locking (for backward compatibility);"
+  " 1 => New style AUTOINC locking;"
+  " 2 => No AUTOINC locking (unsafe for SBR)",
   NULL, NULL,
   AUTOINC_NEW_STYLE_LOCKING,	/* Default setting */
   AUTOINC_OLD_STYLE_LOCKING,	/* Minimum value */
@@ -19357,24 +23165,17 @@ static MYSQL_SYSVAR_STR(version, innodb_version_str,
   PLUGIN_VAR_NOCMDOPT | PLUGIN_VAR_READONLY,
   "InnoDB version", NULL, NULL, INNODB_VERSION_STR);
 
-static MYSQL_SYSVAR_BOOL(use_sys_malloc, srv_use_sys_malloc,
-  PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
-  "DEPRECATED. This option may be removed in future releases, "
-  "together with the InnoDB's internal memory allocator. "
-  "Use OS memory allocator instead of InnoDB's internal memory allocator",
-  NULL, NULL, TRUE);
-
 static MYSQL_SYSVAR_BOOL(use_native_aio, srv_use_native_aio,
   PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
   "Use native AIO if supported on this platform.",
   NULL, NULL, TRUE);
 
-#ifdef HAVE_LIBNUMA
+#if defined(HAVE_LIBNUMA) && defined(WITH_NUMA)
 static MYSQL_SYSVAR_BOOL(numa_interleave, srv_numa_interleave,
   PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
   "Use NUMA interleave memory policy to allocate InnoDB buffer pool.",
   NULL, NULL, FALSE);
-#endif // HAVE_LIBNUMA
+#endif /* HAVE_LIBNUMA && WITH_NUMA */
 
 static MYSQL_SYSVAR_BOOL(api_enable_binlog, ib_binlog_enabled,
   PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
@@ -19409,13 +23210,13 @@ static MYSQL_SYSVAR_ULONG(api_bk_commit_interval, ib_bk_commit_interval,
 
 static MYSQL_SYSVAR_STR(change_buffering, innobase_change_buffering,
   PLUGIN_VAR_RQCMDARG,
-  "Buffer changes to reduce random access: "
-  "OFF, ON, inserting, deleting, changing, or purging.",
+  "Buffer changes to reduce random access:"
+  " OFF, ON, inserting, deleting, changing, or purging.",
   innodb_change_buffering_validate,
   innodb_change_buffering_update, "all");
 
 static MYSQL_SYSVAR_UINT(change_buffer_max_size,
-  innobase_change_buffer_max_size,
+  srv_change_buffer_max_size,
   PLUGIN_VAR_RQCMDARG,
   "Maximum on-disk size of change buffer in terms of percentage"
   " of the buffer pool.",
@@ -19424,9 +23225,9 @@ static MYSQL_SYSVAR_UINT(change_buffer_max_size,
 
 static MYSQL_SYSVAR_ENUM(stats_method, srv_innodb_stats_method,
    PLUGIN_VAR_RQCMDARG,
-  "Specifies how InnoDB index statistics collection code should "
-  "treat NULLs. Possible values are NULLS_EQUAL (default), "
-  "NULLS_UNEQUAL and NULLS_IGNORED",
+  "Specifies how InnoDB index statistics collection code should"
+  " treat NULLs. Possible values are NULLS_EQUAL (default),"
+  " NULLS_UNEQUAL and NULLS_IGNORED",
    NULL, NULL, SRV_STATS_NULLS_EQUAL, &innodb_stats_method_typelib);
 
 #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
@@ -19440,6 +23241,11 @@ static MYSQL_SYSVAR_BOOL(disable_background_merge,
   PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_RQCMDARG,
   "Disable change buffering merges by the master thread",
   NULL, NULL, FALSE);
+
+static MYSQL_SYSVAR_ENUM(compress_debug, srv_debug_compress,
+  PLUGIN_VAR_RQCMDARG,
+  "Compress all tables, without specifying the COMPRESS table attribute",
+  NULL, NULL, Compression::NONE, &innodb_debug_compress_typelib);
 #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
 
 static MYSQL_SYSVAR_ULONG(buf_dump_status_frequency, srv_buf_dump_status_frequency,
@@ -19491,8 +23297,8 @@ static MYSQL_SYSVAR_BOOL(random_read_ahead, srv_random_read_ahead,
 
 static MYSQL_SYSVAR_ULONG(read_ahead_threshold, srv_read_ahead_threshold,
   PLUGIN_VAR_RQCMDARG,
-  "Number of pages that must be accessed sequentially for InnoDB to "
-  "trigger a readahead.",
+  "Number of pages that must be accessed sequentially for InnoDB to"
+  " trigger a readahead.",
   NULL, NULL, 56, 0, 64, 0);
 
 static MYSQL_SYSVAR_STR(monitor_enable, innobase_enable_monitor_counter,
@@ -19553,10 +23359,18 @@ static MYSQL_SYSVAR_BOOL(read_only, srv_read_only_mode,
 
 static MYSQL_SYSVAR_BOOL(cmp_per_index_enabled, srv_cmp_per_index_enabled,
   PLUGIN_VAR_OPCMDARG,
-  "Enable INFORMATION_SCHEMA.innodb_cmp_per_index, "
-  "may have negative impact on performance (off by default)",
+  "Enable INFORMATION_SCHEMA.innodb_cmp_per_index,"
+  " may have negative impact on performance (off by default)",
   NULL, innodb_cmp_per_index_update, FALSE);
 
+static MYSQL_SYSVAR_ENUM(default_row_format, innodb_default_row_format,
+  PLUGIN_VAR_RQCMDARG,
+  "The default ROW FORMAT for all innodb tables created without explicit"
+  " ROW_FORMAT. Possible values are REDUNDANT, COMPACT, and DYNAMIC."
+  " The ROW_FORMAT value COMPRESSED is not allowed",
+  NULL, NULL, DEFAULT_ROW_FORMAT_DYNAMIC,
+  &innodb_default_row_format_typelib);
+
 #ifdef UNIV_DEBUG
 static MYSQL_SYSVAR_UINT(trx_rseg_n_slots_debug, trx_rseg_n_slots_debug,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_NOCMDOPT,
@@ -19570,9 +23384,9 @@ static MYSQL_SYSVAR_UINT(limit_optimistic_insert_debug,
 
 static MYSQL_SYSVAR_BOOL(trx_purge_view_update_only_debug,
   srv_purge_view_update_only_debug, PLUGIN_VAR_NOCMDOPT,
-  "Pause actual purging any delete-marked records, but merely update the purge view. "
-  "It is to create artificially the situation the purge view have been updated "
-  "but the each purges were not done yet.",
+  "Pause actual purging any delete-marked records, but merely update the purge view."
+  " It is to create artificially the situation the purge view have been updated"
+  " but the each purges were not done yet.",
   NULL, NULL, FALSE);
 
 static MYSQL_SYSVAR_ULONG(fil_make_page_dirty_debug,
@@ -19584,12 +23398,40 @@ static MYSQL_SYSVAR_ULONG(saved_page_number_debug,
   srv_saved_page_number_debug, PLUGIN_VAR_OPCMDARG,
   "An InnoDB page number.",
   NULL, innodb_save_page_no, 0, 0, UINT_MAX32, 0);
-#endif /* UNIV_DEBUG */
+
+static MYSQL_SYSVAR_BOOL(disable_resize_buffer_pool_debug,
+  buf_disable_resize_buffer_pool_debug, PLUGIN_VAR_NOCMDARG,
+  "Disable resizing buffer pool to make assertion code not expensive.",
+  NULL, NULL, TRUE);
+
+static MYSQL_SYSVAR_BOOL(page_cleaner_disabled_debug,
+  innodb_page_cleaner_disabled_debug,
+  PLUGIN_VAR_OPCMDARG,
+  "Disable page cleaner",
+  NULL, buf_flush_page_cleaner_disabled_debug_update, FALSE);
+
+static MYSQL_SYSVAR_BOOL(sync_debug, srv_sync_debug,
+  PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
+  "Enable the sync debug checks",
+  NULL, NULL, FALSE);
+
+static MYSQL_SYSVAR_BOOL(dict_stats_disabled_debug,
+  innodb_dict_stats_disabled_debug,
+  PLUGIN_VAR_OPCMDARG,
+  "Disable dict_stats thread",
+  NULL, dict_stats_disabled_debug_update, FALSE);
+
+static MYSQL_SYSVAR_BOOL(master_thread_disabled_debug,
+  srv_master_thread_disabled_debug,
+  PLUGIN_VAR_OPCMDARG,
+  "Disable master thread",
+  NULL, srv_master_thread_disabled_debug_update, FALSE);
 
 static MYSQL_SYSVAR_UINT(simulate_comp_failures, srv_simulate_comp_failures,
   PLUGIN_VAR_NOCMDARG,
   "Simulate compression failures.",
   NULL, NULL, 0, 0, 99, 0);
+#endif /* UNIV_DEBUG */
 
 static MYSQL_SYSVAR_BOOL(force_primary_key,
   srv_force_primary_key,
@@ -19754,11 +23596,11 @@ static MYSQL_SYSVAR_BOOL(instrument_semaphores, srv_instrument_semaphores,
   0, 0, FALSE);
 
 static struct st_mysql_sys_var* innobase_system_variables[]= {
-  MYSQL_SYSVAR(additional_mem_pool_size),
   MYSQL_SYSVAR(api_trx_level),
   MYSQL_SYSVAR(api_bk_commit_interval),
   MYSQL_SYSVAR(autoextend_increment),
   MYSQL_SYSVAR(buffer_pool_size),
+  MYSQL_SYSVAR(buffer_pool_chunk_size),
   MYSQL_SYSVAR(buffer_pool_instances),
   MYSQL_SYSVAR(buffer_pool_filename),
   MYSQL_SYSVAR(buffer_pool_dump_now),
@@ -19779,11 +23621,13 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(lru_scan_depth),
   MYSQL_SYSVAR(flush_neighbors),
   MYSQL_SYSVAR(checksum_algorithm),
+  MYSQL_SYSVAR(log_checksums),
   MYSQL_SYSVAR(checksums),
   MYSQL_SYSVAR(commit_concurrency),
   MYSQL_SYSVAR(concurrency_tickets),
   MYSQL_SYSVAR(compression_level),
   MYSQL_SYSVAR(data_file_path),
+  MYSQL_SYSVAR(temp_data_file_path),
   MYSQL_SYSVAR(data_home_dir),
   MYSQL_SYSVAR(doublewrite),
   MYSQL_SYSVAR(use_atomic_writes),
@@ -19792,7 +23636,6 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(api_enable_mdl),
   MYSQL_SYSVAR(api_disable_rowlock),
   MYSQL_SYSVAR(fast_shutdown),
-  MYSQL_SYSVAR(file_io_threads),
   MYSQL_SYSVAR(read_io_threads),
   MYSQL_SYSVAR(write_io_threads),
   MYSQL_SYSVAR(file_per_table),
@@ -19806,6 +23649,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
 #ifndef DBUG_OFF
   MYSQL_SYSVAR(force_recovery_crash),
 #endif /* !DBUG_OFF */
+  MYSQL_SYSVAR(fill_factor),
   MYSQL_SYSVAR(ft_cache_size),
   MYSQL_SYSVAR(ft_total_cache_size),
   MYSQL_SYSVAR(ft_result_cache_limit),
@@ -19818,24 +23662,21 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(force_load_corrupted),
   MYSQL_SYSVAR(locks_unsafe_for_binlog),
   MYSQL_SYSVAR(lock_wait_timeout),
-#ifdef UNIV_LOG_ARCHIVE
-  MYSQL_SYSVAR(log_arch_dir),
-  MYSQL_SYSVAR(log_archive),
-#endif /* UNIV_LOG_ARCHIVE */
   MYSQL_SYSVAR(page_size),
   MYSQL_SYSVAR(log_buffer_size),
   MYSQL_SYSVAR(log_file_size),
   MYSQL_SYSVAR(log_files_in_group),
+  MYSQL_SYSVAR(log_write_ahead_size),
   MYSQL_SYSVAR(log_group_home_dir),
   MYSQL_SYSVAR(log_compressed_pages),
   MYSQL_SYSVAR(max_dirty_pages_pct),
   MYSQL_SYSVAR(max_dirty_pages_pct_lwm),
   MYSQL_SYSVAR(adaptive_flushing_lwm),
   MYSQL_SYSVAR(adaptive_flushing),
+  MYSQL_SYSVAR(flush_sync),
   MYSQL_SYSVAR(flushing_avg_loops),
   MYSQL_SYSVAR(max_purge_lag),
   MYSQL_SYSVAR(max_purge_lag_delay),
-  MYSQL_SYSVAR(mirrored_log_groups),
   MYSQL_SYSVAR(old_blocks_pct),
   MYSQL_SYSVAR(old_blocks_time),
   MYSQL_SYSVAR(open_files),
@@ -19855,6 +23696,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(stats_modified_counter),
   MYSQL_SYSVAR(stats_traditional),
   MYSQL_SYSVAR(adaptive_hash_index),
+  MYSQL_SYSVAR(adaptive_hash_index_parts),
   MYSQL_SYSVAR(stats_method),
   MYSQL_SYSVAR(replication_delay),
   MYSQL_SYSVAR(status_file),
@@ -19866,18 +23708,16 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(spin_wait_delay),
   MYSQL_SYSVAR(table_locks),
   MYSQL_SYSVAR(thread_concurrency),
-#ifdef HAVE_ATOMIC_BUILTINS
   MYSQL_SYSVAR(adaptive_max_sleep_delay),
-#endif /* HAVE_ATOMIC_BUILTINS */
   MYSQL_SYSVAR(prefix_index_cluster_optimization),
   MYSQL_SYSVAR(thread_sleep_delay),
+  MYSQL_SYSVAR(tmpdir),
   MYSQL_SYSVAR(autoinc_lock_mode),
   MYSQL_SYSVAR(version),
-  MYSQL_SYSVAR(use_sys_malloc),
   MYSQL_SYSVAR(use_native_aio),
-#ifdef HAVE_LIBNUMA
+#if defined(HAVE_LIBNUMA) && defined(WITH_NUMA)
   MYSQL_SYSVAR(numa_interleave),
-#endif // HAVE_LIBNUMA
+#endif /* HAVE_LIBNUMA && WITH_NUMA */
   MYSQL_SYSVAR(change_buffering),
   MYSQL_SYSVAR(change_buffer_max_size),
 #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
@@ -19892,6 +23732,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(read_only),
   MYSQL_SYSVAR(io_capacity),
   MYSQL_SYSVAR(io_capacity_max),
+  MYSQL_SYSVAR(page_cleaners),
   MYSQL_SYSVAR(idle_flush_pct),
   MYSQL_SYSVAR(monitor_enable),
   MYSQL_SYSVAR(monitor_disable),
@@ -19900,10 +23741,12 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(purge_threads),
   MYSQL_SYSVAR(purge_batch_size),
 #ifdef UNIV_DEBUG
+  MYSQL_SYSVAR(background_drop_list_empty),
   MYSQL_SYSVAR(purge_run_now),
   MYSQL_SYSVAR(purge_stop_now),
   MYSQL_SYSVAR(log_checkpoint_now),
   MYSQL_SYSVAR(buf_flush_list_now),
+  MYSQL_SYSVAR(merge_threshold_set_all_debug),
 #endif /* UNIV_DEBUG */
 #if defined UNIV_DEBUG || defined UNIV_PERF_DEBUG
   MYSQL_SYSVAR(page_hash_locks),
@@ -19914,21 +23757,30 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(print_all_deadlocks),
   MYSQL_SYSVAR(cmp_per_index_enabled),
   MYSQL_SYSVAR(undo_logs),
+  MYSQL_SYSVAR(max_undo_log_size),
+  MYSQL_SYSVAR(purge_rseg_truncate_frequency),
+  MYSQL_SYSVAR(undo_log_truncate),
   MYSQL_SYSVAR(rollback_segments),
   MYSQL_SYSVAR(undo_directory),
   MYSQL_SYSVAR(undo_tablespaces),
   MYSQL_SYSVAR(sync_array_size),
   MYSQL_SYSVAR(compression_failure_threshold_pct),
   MYSQL_SYSVAR(compression_pad_pct_max),
-  MYSQL_SYSVAR(simulate_comp_failures),
+  MYSQL_SYSVAR(default_row_format),
 #ifdef UNIV_DEBUG
+  MYSQL_SYSVAR(simulate_comp_failures),
   MYSQL_SYSVAR(trx_rseg_n_slots_debug),
   MYSQL_SYSVAR(limit_optimistic_insert_debug),
   MYSQL_SYSVAR(trx_purge_view_update_only_debug),
   MYSQL_SYSVAR(fil_make_page_dirty_debug),
   MYSQL_SYSVAR(saved_page_number_debug),
+  MYSQL_SYSVAR(compress_debug),
+  MYSQL_SYSVAR(disable_resize_buffer_pool_debug),
+  MYSQL_SYSVAR(page_cleaner_disabled_debug),
+  MYSQL_SYSVAR(dict_stats_disabled_debug),
+  MYSQL_SYSVAR(master_thread_disabled_debug),
+  MYSQL_SYSVAR(sync_debug),
 #endif /* UNIV_DEBUG */
-  MYSQL_SYSVAR(tmpdir),
   MYSQL_SYSVAR(force_primary_key),
   MYSQL_SYSVAR(fatal_semaphore_wait_threshold),
   /* Table page compression feature */
@@ -19987,6 +23839,7 @@ i_s_innodb_cmp_per_index_reset,
 i_s_innodb_buffer_page,
 i_s_innodb_buffer_page_lru,
 i_s_innodb_buffer_stats,
+i_s_innodb_temp_table_info,
 i_s_innodb_metrics,
 i_s_innodb_ft_default_stopword,
 i_s_innodb_ft_deleted,
@@ -20003,6 +23856,7 @@ i_s_innodb_sys_foreign,
 i_s_innodb_sys_foreign_cols,
 i_s_innodb_sys_tablespaces,
 i_s_innodb_sys_datafiles,
+i_s_innodb_sys_virtual,
 i_s_innodb_mutexes,
 i_s_innodb_sys_semaphore_waits,
 i_s_innodb_tablespaces_encryption,
@@ -20042,204 +23896,460 @@ innobase_undo_logs_init_default_max()
 		= static_cast<unsigned long>(srv_available_undo_logs);
 }
 
-#ifdef UNIV_COMPILE_TEST_FUNCS
-
-struct innobase_convert_name_test_t {
-	char*		buf;
-	ulint		buflen;
-	const char*	id;
-	ulint		idlen;
-	void*		thd;
-	ibool		file_id;
-
-	const char*	expected;
-};
-
-void
-test_innobase_convert_name()
-{
-	char	buf[1024];
-	ulint	i;
-
-	innobase_convert_name_test_t test_input[] = {
-		{buf, sizeof(buf), "abcd", 4, NULL, TRUE, "\"abcd\""},
-		{buf, 7, "abcd", 4, NULL, TRUE, "\"abcd\""},
-		{buf, 6, "abcd", 4, NULL, TRUE, "\"abcd\""},
-		{buf, 5, "abcd", 4, NULL, TRUE, "\"abc\""},
-		{buf, 4, "abcd", 4, NULL, TRUE, "\"ab\""},
-
-		{buf, sizeof(buf), "ab@0060cd", 9, NULL, TRUE, "\"ab`cd\""},
-		{buf, 9, "ab@0060cd", 9, NULL, TRUE, "\"ab`cd\""},
-		{buf, 8, "ab@0060cd", 9, NULL, TRUE, "\"ab`cd\""},
-		{buf, 7, "ab@0060cd", 9, NULL, TRUE, "\"ab`cd\""},
-		{buf, 6, "ab@0060cd", 9, NULL, TRUE, "\"ab`c\""},
-		{buf, 5, "ab@0060cd", 9, NULL, TRUE, "\"ab`\""},
-		{buf, 4, "ab@0060cd", 9, NULL, TRUE, "\"ab\""},
-
-		{buf, sizeof(buf), "ab\"cd", 5, NULL, TRUE,
-			"\"#mysql50#ab\"\"cd\""},
-		{buf, 17, "ab\"cd", 5, NULL, TRUE,
-			"\"#mysql50#ab\"\"cd\""},
-		{buf, 16, "ab\"cd", 5, NULL, TRUE,
-			"\"#mysql50#ab\"\"c\""},
-		{buf, 15, "ab\"cd", 5, NULL, TRUE,
-			"\"#mysql50#ab\"\"\""},
-		{buf, 14, "ab\"cd", 5, NULL, TRUE,
-			"\"#mysql50#ab\""},
-		{buf, 13, "ab\"cd", 5, NULL, TRUE,
-			"\"#mysql50#ab\""},
-		{buf, 12, "ab\"cd", 5, NULL, TRUE,
-			"\"#mysql50#a\""},
-		{buf, 11, "ab\"cd", 5, NULL, TRUE,
-			"\"#mysql50#\""},
-		{buf, 10, "ab\"cd", 5, NULL, TRUE,
-			"\"#mysql50\""},
-
-		{buf, sizeof(buf), "ab/cd", 5, NULL, TRUE, "\"ab\".\"cd\""},
-		{buf, 9, "ab/cd", 5, NULL, TRUE, "\"ab\".\"cd\""},
-		{buf, 8, "ab/cd", 5, NULL, TRUE, "\"ab\".\"c\""},
-		{buf, 7, "ab/cd", 5, NULL, TRUE, "\"ab\".\"\""},
-		{buf, 6, "ab/cd", 5, NULL, TRUE, "\"ab\"."},
-		{buf, 5, "ab/cd", 5, NULL, TRUE, "\"ab\"."},
-		{buf, 4, "ab/cd", 5, NULL, TRUE, "\"ab\""},
-		{buf, 3, "ab/cd", 5, NULL, TRUE, "\"a\""},
-		{buf, 2, "ab/cd", 5, NULL, TRUE, "\"\""},
-		/* XXX probably "" is a better result in this case
-		{buf, 1, "ab/cd", 5, NULL, TRUE, "."},
-		*/
-		{buf, 0, "ab/cd", 5, NULL, TRUE, ""},
-	};
-
-	for (i = 0; i < sizeof(test_input) / sizeof(test_input[0]); i++) {
-
-		char*	end;
-		ibool	ok = TRUE;
-		size_t	res_len;
-
-		fprintf(stderr, "TESTING %lu, %s, %lu, %s\n",
-			test_input[i].buflen,
-			test_input[i].id,
-			test_input[i].idlen,
-			test_input[i].expected);
-
-		end = innobase_convert_name(
-			test_input[i].buf,
-			test_input[i].buflen,
-			test_input[i].id,
-			test_input[i].idlen,
-			test_input[i].thd,
-			test_input[i].file_id);
-
-		res_len = (size_t) (end - test_input[i].buf);
-
-		if (res_len != strlen(test_input[i].expected)) {
-
-			fprintf(stderr, "unexpected len of the result: %u, "
-				"expected: %u\n", (unsigned) res_len,
-				(unsigned) strlen(test_input[i].expected));
-			ok = FALSE;
-		}
-
-		if (memcmp(test_input[i].buf,
-			   test_input[i].expected,
-			   strlen(test_input[i].expected)) != 0
-		    || !ok) {
-
-			fprintf(stderr, "unexpected result: %.*s, "
-				"expected: %s\n", (int) res_len,
-				test_input[i].buf,
-				test_input[i].expected);
-			ok = FALSE;
-		}
-
-		if (ok) {
-			fprintf(stderr, "OK: res: %.*s\n\n", (int) res_len,
-				buf);
-		} else {
-			fprintf(stderr, "FAILED\n\n");
-			return;
-		}
-	}
-}
-
-#endif /* UNIV_COMPILE_TEST_FUNCS */
-
 /****************************************************************************
  * DS-MRR implementation
  ***************************************************************************/
 
 /**
- * Multi Range Read interface, DS-MRR calls
- */
-
-int ha_innobase::multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
-                                       uint n_ranges, uint mode, 
-                                       HANDLER_BUFFER *buf)
+Multi Range Read interface, DS-MRR calls */
+int
+ha_innobase::multi_range_read_init(
+	RANGE_SEQ_IF*	seq,
+	void*		seq_init_param,
+	uint		n_ranges,
+	uint		mode,
+	HANDLER_BUFFER*	buf)
 {
-  return ds_mrr.dsmrr_init(this, seq, seq_init_param, n_ranges, mode, buf);
+	return(m_ds_mrr.dsmrr_init(this, seq, seq_init_param,
+				 n_ranges, mode, buf));
 }
 
-int ha_innobase::multi_range_read_next(range_id_t *range_info)
+int
+ha_innobase::multi_range_read_next(
+	range_id_t*		range_info)
 {
-  return ds_mrr.dsmrr_next(range_info);
+	return(m_ds_mrr.dsmrr_next(range_info));
 }
 
-ha_rows ha_innobase::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
-                                                 void *seq_init_param,  
-                                                 uint n_ranges, uint *bufsz,
-                                                 uint *flags, 
-                                                 Cost_estimate *cost)
+ha_rows
+ha_innobase::multi_range_read_info_const(
+	uint		keyno,
+	RANGE_SEQ_IF*	seq,
+	void*		seq_init_param,
+	uint		n_ranges,
+	uint*		bufsz,
+	uint*		flags,
+	Cost_estimate*	cost)
 {
-  /* See comments in ha_myisam::multi_range_read_info_const */
-  ds_mrr.init(this, table);
+	/* See comments in ha_myisam::multi_range_read_info_const */
+	m_ds_mrr.init(this, table);
 
-  if (prebuilt->select_lock_type != LOCK_NONE)
-    *flags |= HA_MRR_USE_DEFAULT_IMPL;
+	if (m_prebuilt->select_lock_type != LOCK_NONE) {
+		*flags |= HA_MRR_USE_DEFAULT_IMPL;
+	}
 
-  ha_rows res= ds_mrr.dsmrr_info_const(keyno, seq, seq_init_param, n_ranges,
-                                       bufsz, flags, cost);
-  return res;
+	ha_rows res= m_ds_mrr.dsmrr_info_const(keyno, seq, seq_init_param, n_ranges,
+			bufsz, flags, cost);
+	return res;
 }
 
-ha_rows ha_innobase::multi_range_read_info(uint keyno, uint n_ranges,
-                                           uint keys, uint key_parts,
-                                           uint *bufsz, uint *flags,
-                                           Cost_estimate *cost)
+ha_rows
+ha_innobase::multi_range_read_info(
+	uint		keyno,
+	uint		n_ranges,
+	uint		keys,
+	uint		key_parts,
+	uint*		bufsz,
+	uint*		flags,
+	Cost_estimate*	cost)
 {
-  ds_mrr.init(this, table);
-  ha_rows res= ds_mrr.dsmrr_info(keyno, n_ranges, keys, key_parts, bufsz, 
-                                 flags, cost);
-  return res;
+	m_ds_mrr.init(this, table);
+	ha_rows res= m_ds_mrr.dsmrr_info(keyno, n_ranges, keys, key_parts, bufsz,
+					flags, cost);
+	return res;
 }
 
-int ha_innobase::multi_range_read_explain_info(uint mrr_mode, char *str,
-                                               size_t size)
+int
+ha_innobase::multi_range_read_explain_info(
+	uint mrr_mode,
+	char *str,
+	size_t size)
 {
-  return ds_mrr.dsmrr_explain_info(mrr_mode, str, size);
+	return m_ds_mrr.dsmrr_explain_info(mrr_mode, str, size);
 }
 
 /**
- * Index Condition Pushdown interface implementation
- */
+Index Condition Pushdown interface implementation */
 
 /*************************************************************//**
 InnoDB index push-down condition check
 @return ICP_NO_MATCH, ICP_MATCH, or ICP_OUT_OF_RANGE */
-UNIV_INTERN
-enum icp_result
+ICP_RESULT
 innobase_index_cond(
 /*================*/
 	void*	file)	/*!< in/out: pointer to ha_innobase */
 {
-  return handler_index_cond_check(file);
+	return handler_index_cond_check(file);
 }
 
+#ifdef MYSQL_VIRTUAL_COLUMNS
+/** Get the computed value by supplying the base column values.
+@param[in,out]	table	the table whose virtual column template to be built */
+void
+innobase_init_vc_templ(
+	dict_table_t*	table)
+{
+	char    dbname[MAX_DATABASE_NAME_LEN + 1];
+	char    tbname[MAX_TABLE_NAME_LEN + 1];
+	char*   name = table->name.m_name;
+	ulint   dbnamelen = dict_get_db_name_len(name);
+	ulint   tbnamelen = strlen(name) - dbnamelen - 1;
+	char    t_dbname[MAX_DATABASE_NAME_LEN + 1];
+	char    t_tbname[MAX_TABLE_NAME_LEN + 1];
+
+	mutex_enter(&dict_sys->mutex);
+
+	if (table->vc_templ != NULL) {
+		mutex_exit(&dict_sys->mutex);
+
+		return;
+	}
+
+	strncpy(dbname, name, dbnamelen);
+	dbname[dbnamelen] = 0;
+	strncpy(tbname, name + dbnamelen + 1, tbnamelen);
+	tbname[tbnamelen] =0;
+
+	/* For partition table, remove the partition name and use the
+	"main" table name to build the template */
+#ifdef _WIN32
+        char*	is_part = strstr(tbname, "#p#");
+#else
+        char*	is_part = strstr(tbname, "#P#");
+#endif /* _WIN32 */
+
+	if (is_part != NULL) {
+		*is_part = '\0';
+		tbnamelen = is_part - tbname;
+	}
+
+	table->vc_templ = UT_NEW_NOKEY(dict_vcol_templ_t());
+	table->vc_templ->vtempl = NULL;
+
+	dbnamelen = filename_to_tablename(dbname, t_dbname,
+					  MAX_DATABASE_NAME_LEN + 1);
+	tbnamelen = filename_to_tablename(tbname, t_tbname,
+					  MAX_TABLE_NAME_LEN + 1);
+
+#ifdef UNIV_DEBUG
+	//	bool ret =
+#endif /* UNIV_DEBUG */
+
+	/* JAN: TODO: MySQL: 5.7 virtual columsn
+	handler::my_prepare_gcolumn_template(
+		thd, t_dbname, t_tbname,
+		&innobase_build_v_templ_callback,
+		static_cast<void*>(table));
+	ut_ad(!ret);
+	*/
+	mutex_exit(&dict_sys->mutex);
+}
+
+/** Change dbname and table name in table->vc_templ.
+@param[in,out]	table	the table whose virtual column template
+dbname and tbname to be renamed. */
+void
+innobase_rename_vc_templ(
+	dict_table_t*	table)
+{
+	char	dbname[MAX_DATABASE_NAME_LEN + 1];
+	char	tbname[MAX_DATABASE_NAME_LEN + 1];
+	char*	name = table->name.m_name;
+	ulint	dbnamelen = dict_get_db_name_len(name);
+	ulint	tbnamelen = strlen(name) - dbnamelen - 1;
+	char	t_dbname[MAX_DATABASE_NAME_LEN + 1];
+	char	t_tbname[MAX_TABLE_NAME_LEN + 1];
+
+	strncpy(dbname, name, dbnamelen);
+	dbname[dbnamelen] = 0;
+	strncpy(tbname, name + dbnamelen + 1, tbnamelen);
+	tbname[tbnamelen] =0;
+
+	/* For partition table, remove the partition name and use the
+	"main" table name to build the template */
+#ifdef _WIN32
+	char*   is_part = strstr(tbname, "#p#");
+#else
+	char*   is_part = strstr(tbname, "#P#");
+#endif /* _WIN32 */
+
+	if (is_part != NULL) {
+		*is_part = '\0';
+		tbnamelen = is_part - tbname;
+	}
+
+	dbnamelen = filename_to_tablename(dbname, t_dbname,
+					  MAX_DATABASE_NAME_LEN + 1);
+	tbnamelen = filename_to_tablename(tbname, t_tbname,
+					  MAX_TABLE_NAME_LEN + 1);
+
+	table->vc_templ->db_name = t_dbname;
+	table->vc_templ->tb_name = t_tbname;
+}
+
+/** Get the updated parent field value from the update vector for the
+given col_no.
+@param[in]	foreign		foreign key information
+@param[in]	update		updated parent vector.
+@param[in]	col_no		column position of the table
+@return updated field from the parent update vector, else NULL */
+static
+dfield_t*
+innobase_get_field_from_update_vector(
+	dict_foreign_t*	foreign,
+	upd_t*		update,
+	ulint		col_no)
+{
+	dict_table_t*	parent_table = foreign->referenced_table;
+	dict_index_t*	parent_index = foreign->referenced_index;
+	ulint		parent_field_no;
+	ulint		parent_col_no;
+	ulint		prefix_col_no;
+
+	for (ulint i = 0; i < foreign->n_fields; i++) {
+
+		parent_col_no = dict_index_get_nth_col_no(parent_index, i);
+		parent_field_no = dict_table_get_nth_col_pos(
+			parent_table, parent_col_no, &prefix_col_no);
+
+		for (ulint j = 0; j < update->n_fields; j++) {
+			upd_field_t*	parent_ufield
+				= &update->fields[j];
+
+			if (parent_ufield->field_no == parent_field_no
+			    && parent_col_no == col_no) {
+				return(&parent_ufield->new_val);
+			}
+		}
+	}
+
+	return (NULL);
+}
+
+/** Get the computed value by supplying the base column values.
+@param[in,out]	row		the data row
+@param[in]	col		virtual column
+@param[in]	index		index
+@param[in,out]	local_heap	heap memory for processing large data etc.
+@param[in,out]	heap		memory heap that copies the actual index row
+@param[in]	ifield		index field
+@param[in]	thd		MySQL thread handle
+@param[in,out]	mysql_table	mysql table object
+@param[in]	old_table	during ALTER TABLE, this is the old table
+				or NULL.
+@param[in]	parent_update	update vector for the parent row
+@param[in]	foreign		foreign key information
+@return the field filled with computed value, or NULL if just want
+to store the value in passed in "my_rec" */
+dfield_t*
+innobase_get_computed_value(
+	const dtuple_t*		row,
+	const dict_v_col_t*	col,
+	const dict_index_t*	index,
+	mem_heap_t**		local_heap,
+	mem_heap_t*		heap,
+	const dict_field_t*	ifield,
+	THD*			thd,
+	TABLE*			mysql_table,
+	const dict_table_t*	old_table,
+	upd_t*			parent_update,
+	dict_foreign_t*		foreign)
+{
+	byte		rec_buf1[REC_VERSION_56_MAX_INDEX_COL_LEN];
+	byte		rec_buf2[REC_VERSION_56_MAX_INDEX_COL_LEN];
+	byte*		mysql_rec;
+	byte*		buf;
+	dfield_t*	field;
+	ulint		len;
+
+	const page_size_t page_size = (old_table == NULL)
+		? dict_table_page_size(index->table)
+		: dict_table_page_size(old_table);
+
+	ulint		ret = 0;
+
+	ut_ad(index->table->vc_templ);
+	ut_ad(thd != NULL);
+
+	const mysql_row_templ_t*
+			vctempl =  index->table->vc_templ->vtempl[
+				index->table->vc_templ->n_col + col->v_pos];
+
+	if (!heap || index->table->vc_templ->rec_len
+		     >= REC_VERSION_56_MAX_INDEX_COL_LEN) {
+		if (*local_heap == NULL) {
+			*local_heap = mem_heap_create(UNIV_PAGE_SIZE);
+		}
+
+		mysql_rec = static_cast<byte*>(mem_heap_alloc(
+			    *local_heap, index->table->vc_templ->rec_len));
+		buf = static_cast<byte*>(mem_heap_alloc(
+				*local_heap, index->table->vc_templ->rec_len));
+	} else {
+		mysql_rec = rec_buf1;
+		buf = rec_buf2;
+	}
+
+	for (ulint i = 0; i < col->num_base; i++) {
+		dict_col_t*			base_col = col->base_col[i];
+		const dfield_t*			row_field = NULL;
+		ulint				col_no = base_col->ind;
+		const mysql_row_templ_t*	templ
+			= index->table->vc_templ->vtempl[col_no];
+		const byte*			data;
+
+		if (parent_update != NULL) {
+			/** Get the updated field from update vector
+			of the parent table. */
+			row_field = innobase_get_field_from_update_vector(
+					foreign, parent_update, col_no);
+		}
+
+		if (row_field == NULL) {
+			row_field = dtuple_get_nth_field(row, col_no);
+		}
+
+		data = static_cast<const byte*>(row_field->data);
+		len = row_field->len;
+
+		if (row_field->ext) {
+			if (*local_heap == NULL) {
+				*local_heap = mem_heap_create(UNIV_PAGE_SIZE);
+			}
+
+			data = btr_copy_externally_stored_field(
+				&len, data, page_size,
+				dfield_get_len(row_field), *local_heap);
+		}
+
+		if (len == UNIV_SQL_NULL) {
+                        mysql_rec[templ->mysql_null_byte_offset]
+                                |= (byte) templ->mysql_null_bit_mask;
+                        memcpy(mysql_rec + templ->mysql_col_offset,
+                               static_cast<const byte*>(
+					index->table->vc_templ->default_rec
+					+ templ->mysql_col_offset),
+                               templ->mysql_col_len);
+                } else {
+
+			row_sel_field_store_in_mysql_format(
+				mysql_rec + templ->mysql_col_offset,
+				templ, index, templ->clust_rec_field_no,
+				(const byte*)data, len);
+
+			if (templ->mysql_null_bit_mask) {
+				/* It is a nullable column with a
+				non-NULL value */
+				mysql_rec[templ->mysql_null_byte_offset]
+					&= ~(byte) templ->mysql_null_bit_mask;
+			}
+		}
+	}
+
+	field = dtuple_get_nth_v_field(row, col->v_pos);
+
+	/* Bitmap for specifying which virtual columns the server
+	should evaluate */
+	MY_BITMAP	column_map;
+	my_bitmap_map	col_map_storage[bitmap_buffer_size(REC_MAX_N_FIELDS)];
+
+	bitmap_init(&column_map, col_map_storage, REC_MAX_N_FIELDS, false);
+
+	/* Specify the column the server should evaluate */
+	bitmap_set_bit(&column_map, col->m_col.ind);
+
+	if (mysql_table == NULL) {
+		if (vctempl->type == DATA_BLOB) {
+			ulint	max_len;
+
+			if (vctempl->mysql_col_len - 8 == 1) {
+				/* This is for TINYBLOB only, which needs
+				only 1 byte, other BLOBs won't be affected */
+				max_len = 255;
+			} else {
+				max_len = DICT_MAX_FIELD_LEN_BY_FORMAT(
+						index->table) + 1;
+			}
+
+			byte*   blob_mem = static_cast<byte*>(
+				mem_heap_alloc(heap, max_len));
+
+			row_mysql_store_blob_ref(
+				mysql_rec + vctempl->mysql_col_offset,
+				vctempl->mysql_col_len, blob_mem, max_len);
+                }
+
+		ret = handler::my_eval_gcolumn_expr_with_open(
+			thd, index->table->vc_templ->db_name.c_str(),
+			index->table->vc_templ->tb_name.c_str(), &column_map,
+			(uchar *)mysql_rec);
+        } else {
+		ret = handler::my_eval_gcolumn_expr(
+			thd, mysql_table, &column_map,
+			(uchar *)mysql_rec);
+	}
+
+	if (ret != 0) {
+#ifdef INNODB_VIRTUAL_DEBUG
+		ib::warn() << "Compute virtual column values failed ";
+		fputs("InnoDB: Cannot compute value for following record ",
+		      stderr);
+		dtuple_print(stderr, row);
+#endif /* INNODB_VIRTUAL_DEBUG */
+		return(NULL);
+	}
+
+	/* we just want to store the data in passed in MySQL record */
+	if (ret != 0) {
+		return(NULL);
+	}
+
+	if (vctempl->mysql_null_bit_mask
+	    && (mysql_rec[vctempl->mysql_null_byte_offset]
+	        & vctempl->mysql_null_bit_mask)) {
+		dfield_set_null(field);
+		field->type.prtype |= DATA_VIRTUAL;
+		return(field);
+	}
+
+	row_mysql_store_col_in_innobase_format(
+		field, buf,
+		TRUE, mysql_rec + vctempl->mysql_col_offset,
+		vctempl->mysql_col_len, dict_table_is_comp(index->table));
+	field->type.prtype |= DATA_VIRTUAL;
+
+	ulint	max_prefix = col->m_col.max_prefix;
+
+	if (max_prefix && ifield
+	    && (ifield->prefix_len == 0
+	        || ifield->prefix_len > col->m_col.max_prefix)) {
+		max_prefix = ifield->prefix_len;
+	}
+
+	/* If this is a prefix index, we only need a portion of the field */
+	if (max_prefix) {
+		len = dtype_get_at_most_n_mbchars(
+			col->m_col.prtype,
+			col->m_col.mbminmaxlen,
+			max_prefix,
+			field->len,
+			static_cast<char*>(dfield_get_data(field)));
+		dfield_set_len(field, len);
+	}
+
+	if (heap) {
+		dfield_dup(field, heap);
+	}
+
+	return(field);
+}
+#endif /* MYSQL_VIRTUAL_COLUMNS */
+
 /** Attempt to push down an index condition.
-* @param[in] keyno	MySQL key number
-* @param[in] idx_cond	Index condition to be checked
-* @return Part of idx_cond which the handler will not evaluate
-*/
-UNIV_INTERN
+@param[in] keyno MySQL key number
+@param[in] idx_cond Index condition to be checked
+@return Part of idx_cond which the handler will not evaluate */
+
 class Item*
 ha_innobase::idx_cond_push(
 	uint		keyno,
@@ -20263,10 +24373,9 @@ errmsg-utf8.txt directly as is.
 Push a warning message to the client, it is a wrapper around:
 
 void push_warning_printf(
-	THD *thd, Sql_condition::enum_warning_level level,
+	THD *thd, Sql_condition::enum_condition_level level,
 	uint code, const char *format, ...);
 */
-UNIV_INTERN
 void
 ib_senderrf(
 /*========*/
@@ -20275,7 +24384,8 @@ ib_senderrf(
 	ib_uint32_t	code,		/*!< MySQL error code */
 	...)				/*!< Args */
 {
-	va_list         args;
+	va_list		args;
+	char*		str = NULL;
 	const char*	format = innobase_get_err_msg(code);
 
 	/* If the caller wants to push a message to the client then
@@ -20288,27 +24398,32 @@ ib_senderrf(
 
 	va_start(args, code);
 
-	myf	l=0;
+	myf	l = Sql_condition::WARN_LEVEL_NOTE;
 
-	switch(level) {
+	switch (level) {
 	case IB_LOG_LEVEL_INFO:
-                l = ME_JUST_INFO;
+		l = ME_JUST_INFO;
 		break;
 	case IB_LOG_LEVEL_WARN:
-                l = ME_JUST_WARNING;
+		l = ME_JUST_WARNING;
 		break;
 	case IB_LOG_LEVEL_ERROR:
+		sd_notifyf(0, "STATUS=InnoDB: Error: %s", str);
+		l = 0;
+		break;
 	case IB_LOG_LEVEL_FATAL:
 		l = 0;
+		sd_notifyf(0, "STATUS=InnoDB: Fatal: %s", str);
 		break;
 	default:
 		l = 0;
 		break;
 	}
 
-        my_printv_error(code, format, MYF(l), args);
+	my_printv_error(code, format, MYF(l), args);
 
 	va_end(args);
+	free(str);
 
 	if (level == IB_LOG_LEVEL_FATAL) {
 		ut_error;
@@ -20323,10 +24438,9 @@ must be: "Some string ... %s".
 Push a warning message to the client, it is a wrapper around:
 
 void push_warning_printf(
-	THD *thd, Sql_condition::enum_warning_level level,
+	THD *thd, Sql_condition::enum_condition_level level,
 	uint code, const char *format, ...);
 */
-UNIV_INTERN
 void
 ib_errf(
 /*====*/
@@ -20336,7 +24450,7 @@ ib_errf(
 	const char*	format,		/*!< printf format */
 	...)				/*!< Args */
 {
-	char*		str;
+	char*		str = NULL;
 	va_list         args;
 
 	/* If the caller wants to push a message to the client then
@@ -20347,9 +24461,15 @@ ib_errf(
 
 	va_start(args, format);
 
-#ifdef __WIN__
+#ifdef _WIN32
 	int		size = _vscprintf(format, args) + 1;
-	str = static_cast<char*>(malloc(size));
+	if (size > 0) {
+		str = static_cast<char*>(malloc(size));
+	}
+	if (str == NULL) {
+		va_end(args);
+		return;	/* Watch for Out-Of-Memory */
+	}
 	str[size - 1] = 0x0;
 	vsnprintf(str, size, format, args);
 #elif HAVE_VASPRINTF
@@ -20361,8 +24481,12 @@ ib_errf(
 #else
 	/* Use a fixed length string. */
 	str = static_cast<char*>(malloc(BUFSIZ));
+	if (str == NULL) {
+		va_end(args);
+		return;	/* Watch for Out-Of-Memory */
+	}
 	my_vsnprintf(str, BUFSIZ, format, args);
-#endif /* __WIN__ */
+#endif /* _WIN32 */
 
 	ib_senderrf(thd, level, code, str);
 
@@ -20370,62 +24494,39 @@ ib_errf(
 	free(str);
 }
 
-/******************************************************************//**
-Write a message to the MySQL log, prefixed with "InnoDB: " */
-UNIV_INTERN
-void
-ib_logf(
-/*====*/
-	ib_log_level_t	level,		/*!< in: warning level */
-	const char*	format,		/*!< printf format */
-	...)				/*!< Args */
-{
-	char*		str;
-	va_list         args;
+/* Keep the first 16 characters as-is, since the url is sometimes used
+as an offset from this.*/
+const char*	TROUBLESHOOTING_MSG =
+	"Please refer to " REFMAN "innodb-troubleshooting.html"
+	" for how to resolve the issue.";
 
-	va_start(args, format);
+const char*	TROUBLESHOOT_DATADICT_MSG =
+	"Please refer to " REFMAN "innodb-troubleshooting-datadict.html"
+	" for how to resolve the issue.";
 
-#ifdef __WIN__
-	int		size = _vscprintf(format, args) + 1;
-	str = static_cast<char*>(malloc(size));
-	str[size - 1] = 0x0;
-	vsnprintf(str, size, format, args);
-#elif HAVE_VASPRINTF
-	if (vasprintf(&str, format, args) == -1) {
-		/* In case of failure use a fixed length string */
-		str = static_cast<char*>(malloc(BUFSIZ));
-		my_vsnprintf(str, BUFSIZ, format, args);
-	}
-#else
-	/* Use a fixed length string. */
-	str = static_cast<char*>(malloc(BUFSIZ));
-	my_vsnprintf(str, BUFSIZ, format, args);
-#endif /* __WIN__ */
+const char*	BUG_REPORT_MSG =
+	"Submit a detailed bug report to http://bugs.mysql.com";
 
-	switch(level) {
-	case IB_LOG_LEVEL_INFO:
-		sql_print_information("InnoDB: %s", str);
-		break;
-	case IB_LOG_LEVEL_WARN:
-		sql_print_warning("InnoDB: %s", str);
-		break;
-	case IB_LOG_LEVEL_ERROR:
-		sql_print_error("InnoDB: %s", str);
-		sd_notifyf(0, "STATUS=InnoDB: Error: %s", str);
-		break;
-	case IB_LOG_LEVEL_FATAL:
-		sql_print_error("InnoDB: %s", str);
-		sd_notifyf(0, "STATUS=InnoDB: Fatal: %s", str);
-		break;
-	}
+const char*	FORCE_RECOVERY_MSG =
+	"Please refer to " REFMAN "forcing-innodb-recovery.html"
+	" for information about forcing recovery.";
 
-	va_end(args);
-	free(str);
+const char*	ERROR_CREATING_MSG =
+	"Please refer to " REFMAN "error-creating-innodb.html";
 
-	if (level == IB_LOG_LEVEL_FATAL) {
-		ut_error;
-	}
-}
+const char*	OPERATING_SYSTEM_ERROR_MSG =
+	"Some operating system error numbers are described at"
+	" " REFMAN "operating-system-error-codes.html";
+
+const char*	FOREIGN_KEY_CONSTRAINTS_MSG =
+	"Please refer to " REFMAN "innodb-foreign-key-constraints.html"
+	" for correct foreign key definition.";
+
+const char*	SET_TRANSACTION_MSG =
+	"Please refer to " REFMAN "set-transaction.html";
+
+const char*	INNODB_PARAMETERS_MSG =
+	"Please refer to " REFMAN "innodb-parameters.html";
 
 /**********************************************************************
 Converts an identifier from my_charset_filename to UTF-8 charset.
@@ -20441,8 +24542,9 @@ innobase_convert_to_filename_charset(
 	CHARSET_INFO*	cs_to = &my_charset_filename;
 	CHARSET_INFO*	cs_from = system_charset_info;
 
-        return(strconvert( cs_from, from, strlen(from), cs_to, to,
-                           static_cast<uint>(len), &errors));
+	return(static_cast<uint>(strconvert(
+				cs_from, from, strlen(from),
+				cs_to, to, static_cast<size_t>(len), &errors)));
 }
 
 /**********************************************************************
@@ -20459,13 +24561,13 @@ innobase_convert_to_system_charset(
 	CHARSET_INFO*	cs1 = &my_charset_filename;
 	CHARSET_INFO*	cs2 = system_charset_info;
 
-        return(strconvert(cs1, from, strlen(from), cs2, to,
-                          static_cast<uint>(len), errors));
+	return(static_cast<uint>(strconvert(
+				cs1, from, strlen(from),
+				cs2, to, static_cast<size_t>(len), errors)));
 }
 
 /**********************************************************************
 Issue a warning that the row is too big. */
-UNIV_INTERN
 void
 ib_warn_row_too_big(const dict_table_t*	table)
 {
@@ -20479,10 +24581,6 @@ ib_warn_row_too_big(const dict_table_t*	table)
 
 	THD*	thd = current_thd;
 
-	if (thd == NULL) {
-		return;
-	}
-
 	push_warning_printf(
 		thd, Sql_condition::WARN_LEVEL_WARN, HA_ERR_TO_BIG_ROW,
 		"Row size too large (> %lu). Changing some columns to TEXT"
@@ -20493,6 +24591,98 @@ ib_warn_row_too_big(const dict_table_t*	table)
 		, prefix ? DICT_MAX_FIXED_COL_LEN : 0);
 }
 
+/** Validate the requested buffer pool size.  Also, reserve the necessary
+memory needed for buffer pool resize.
+@param[in]	thd	thread handle
+@param[in]	var	pointer to system variable
+@param[out]	save	immediate result for update function
+@param[in]	value	incoming string
+@return 0 on success, 1 on failure.
+*/
+static
+int
+innodb_buffer_pool_size_validate(
+	THD*				thd,
+	struct st_mysql_sys_var*	var,
+	void*				save,
+	struct st_mysql_value*		value)
+{
+	longlong	intbuf;
+
+
+	value->val_int(value, &intbuf);
+
+	if (!srv_was_started) {
+		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+				    ER_WRONG_ARGUMENTS,
+				    "Cannot update innodb_buffer_pool_size,"
+				    " because InnoDB is not started.");
+		return(1);
+	}
+
+#ifdef UNIV_DEBUG
+	if (buf_disable_resize_buffer_pool_debug == TRUE) {
+		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+			ER_WRONG_ARGUMENTS,
+			"Cannot update innodb_buffer_pool_size,"
+			" because innodb_disable_resize_buffer_pool_debug"
+			" is set.");
+		ib::warn() << "Cannot update innodb_buffer_pool_size,"
+			" because innodb_disable_resize_buffer_pool_debug"
+			" is set.";
+		return(1);
+	}
+#endif /* UNIV_DEBUG */
+
+
+	buf_pool_mutex_enter_all();
+
+	if (srv_buf_pool_old_size != srv_buf_pool_size) {
+		buf_pool_mutex_exit_all();
+		my_printf_error(ER_WRONG_ARGUMENTS,
+			"Another buffer pool resize is already in progress.", MYF(0));
+		return(1);
+	}
+
+	if (srv_buf_pool_instances > 1 && intbuf < BUF_POOL_SIZE_THRESHOLD) {
+		buf_pool_mutex_exit_all();
+
+		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+				    ER_WRONG_ARGUMENTS,
+				    "Cannot update innodb_buffer_pool_size"
+				    " to less than 1GB if"
+				    " innodb_buffer_pool_instances > 1.");
+		return(1);
+	}
+
+	ulint	requested_buf_pool_size
+		= buf_pool_size_align(static_cast<ulint>(intbuf));
+
+	*static_cast<longlong*>(save) = requested_buf_pool_size;
+
+	if (srv_buf_pool_size == requested_buf_pool_size) {
+		buf_pool_mutex_exit_all();
+		/* nothing to do */
+		return(0);
+	}
+
+	srv_buf_pool_size = requested_buf_pool_size;
+	buf_pool_mutex_exit_all();
+
+	if (intbuf != static_cast<longlong>(requested_buf_pool_size)) {
+		char	buf[64];
+		int	len = 64;
+		value->val_str(value, buf, &len);
+		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+				    ER_TRUNCATED_WRONG_VALUE,
+				    "Truncated incorrect %-.32s value: '%-.128s'",
+				    mysql_sysvar_buffer_pool_size.name,
+				    value->val_str(value, buf, &len));
+	}
+
+	return(0);
+}
+
 /*************************************************************//**
 Check for a valid value of innobase_compression_algorithm.
 @return	0 for valid innodb_compression_algorithm. */
@@ -20653,15 +24843,17 @@ ib_push_warning(
 		thd = current_thd;
 	}
 
-	va_start(args, format);
-	buf = (char *)my_malloc(MAX_BUF_SIZE, MYF(MY_WME));
-	vsprintf(buf,format, args);
+	if (thd) {
+		va_start(args, format);
+		buf = (char *)my_malloc(MAX_BUF_SIZE, MYF(MY_WME));
+		vsprintf(buf,format, args);
 
-	push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
-		convert_error_code_to_mysql((dberr_t)error, 0, thd),
-		buf);
-	my_free(buf);
-	va_end(args);
+		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+			convert_error_code_to_mysql((dberr_t)error, 0, thd),
+			buf);
+		my_free(buf);
+		va_end(args);
+	}
 }
 
 /********************************************************************//**
diff --git a/storage/innobase/handler/ha_innodb.h b/storage/innobase/handler/ha_innodb.h
index 478187e0b23..b436453e610 100644
--- a/storage/innobase/handler/ha_innodb.h
+++ b/storage/innobase/handler/ha_innodb.h
@@ -17,43 +17,54 @@ this program; if not, write to the Free Software Foundation, Inc.,
 
 *****************************************************************************/
 
-/*
-  This file is based on ha_berkeley.h of MySQL distribution
+/* The InnoDB handler: the interface between MySQL and InnoDB. */
 
-  This file defines the Innodb handler: the interface between MySQL and
-  Innodb
-*/
+/** "GEN_CLUST_INDEX" is the name reserved for InnoDB default
+system clustered index when there is no primary key. */
+extern const char innobase_index_reserve_name[];
 
-#include "dict0stats.h"
+/* "innodb_file_per_table" tablespace name  is reserved by InnoDB in order
+to explicitly create a file_per_table tablespace for the table. */
+extern const char reserved_file_per_table_space_name[];
 
-/* Structure defines translation table between mysql index and innodb
+/* "innodb_system" tablespace name is reserved by InnoDB for the
+system tablespace which uses space_id 0 and stores extra types of
+system pages like UNDO and doublewrite. */
+extern const char reserved_system_space_name[];
+
+/* "innodb_temporary" tablespace name is reserved by InnoDB for the
+predefined shared temporary tablespace. */
+extern const char reserved_temporary_space_name[];
+
+/* Structure defines translation table between mysql index and InnoDB
 index structures */
 struct innodb_idx_translate_t {
+
 	ulint		index_count;	/*!< number of valid index entries
 					in the index_mapping array */
+
 	ulint		array_size;	/*!< array size of index_mapping */
+
 	dict_index_t**	index_mapping;	/*!< index pointer array directly
-					maps to index in Innodb from MySQL
+					maps to index in InnoDB from MySQL
 					array index */
 };
 
-
 /** InnoDB table share */
 typedef struct st_innobase_share {
-	THR_LOCK		lock;		/*!< MySQL lock protecting
-						this structure */
-	const char*		table_name;	/*!< InnoDB table name */
-	uint			use_count;	/*!< reference count,
-						incremented in get_share()
-						and decremented in
-						free_share() */
-	void*			table_name_hash;/*!< hash table chain node */
-	innodb_idx_translate_t	idx_trans_tbl;	/*!< index translation
-						table between MySQL and
-						Innodb */
+	THR_LOCK	lock;
+	const char*	table_name;	/*!< InnoDB table name */
+	uint		use_count;	/*!< reference count,
+					incremented in get_share()
+					and decremented in
+					free_share() */
+	void*		table_name_hash;
+					/*!< hash table chain node */
+	innodb_idx_translate_t
+			idx_trans_tbl;	/*!< index translation table between
+					MySQL and InnoDB */
 } INNOBASE_SHARE;
 
-
 /** Prebuilt structures in an InnoDB table handle used within MySQL */
 struct row_prebuilt_t;
 
@@ -73,205 +84,262 @@ struct ha_table_option_struct
 	uint		encryption;		/*!<  DEFAULT, ON, OFF */
 	ulonglong	encryption_key_id;	/*!< encryption key id  */
 };
-
-
+/* JAN: TODO: MySQL 5.7 handler.h */
+struct st_handler_tablename
+{
+  const char *db;
+  const char *tablename;
+};
 /** The class defining a handle to an Innodb table */
 class ha_innobase: public handler
 {
-	row_prebuilt_t*	prebuilt;	/*!< prebuilt struct in InnoDB, used
-					to save CPU time with prebuilt data
-					structures*/
-	THD*		user_thd;	/*!< the thread handle of the user
-					currently using the handle; this is
-					set in external_lock function */
-	THR_LOCK_DATA	lock;
-	INNOBASE_SHARE*	share;		/*!< information for MySQL
-					table locking */
-
-	uchar*		upd_buf;	/*!< buffer used in updates */
-	ulint		upd_buf_size;	/*!< the size of upd_buf in bytes */
-	Table_flags	int_table_flags;
-	uint		primary_key;
-	ulong		start_of_scan;	/*!< this is set to 1 when we are
-					starting a table scan but have not
-					yet fetched any row, else 0 */
-	uint		last_match_mode;/* match mode of the latest search:
-					ROW_SEL_EXACT, ROW_SEL_EXACT_PREFIX,
-					or undefined */
-	uint		num_write_row;	/*!< number of write_row() calls */
-
-	ha_statistics*	ha_partition_stats; /*!< stats of the partition owner
-					handler (if there is one) */
-	uint store_key_val_for_row(uint keynr, char* buff, uint buff_len,
-                                   const uchar* record);
-	inline void update_thd(THD* thd);
-	void update_thd();
-	int change_active_index(uint keynr);
-	int general_fetch(uchar* buf, uint direction, uint match_mode);
-	dberr_t innobase_lock_autoinc();
-	ulonglong innobase_peek_autoinc();
-	dberr_t innobase_set_max_autoinc(ulonglong auto_inc);
-	dberr_t innobase_reset_autoinc(ulonglong auto_inc);
-	dberr_t innobase_get_autoinc(ulonglong* value);
-	void innobase_initialize_autoinc();
-	dict_index_t* innobase_get_index(uint keynr);
-
-#ifdef WITH_WSREP
-	int wsrep_append_keys(THD *thd, bool shared,
-			      const uchar* record0, const uchar* record1);
-#endif
-	/* Init values for the class: */
- public:
-	ha_innobase(handlerton *hton, TABLE_SHARE *table_arg);
+public:
+	ha_innobase(handlerton* hton, TABLE_SHARE* table_arg);
 	~ha_innobase();
-	/*
-	  Get the row type from the storage engine.  If this method returns
-	  ROW_TYPE_NOT_USED, the information in HA_CREATE_INFO should be used.
-	*/
+
+	/** Get the row type from the storage engine.  If this method returns
+	ROW_TYPE_NOT_USED, the information in HA_CREATE_INFO should be used. */
 	enum row_type get_row_type() const;
 
 	const char* table_type() const;
+
 	const char* index_type(uint key_number);
+
 	const char** bas_ext() const;
+
 	Table_flags table_flags() const;
+
 	ulong index_flags(uint idx, uint part, bool all_parts) const;
+
 	uint max_supported_keys() const;
+
 	uint max_supported_key_length() const;
+
 	uint max_supported_key_part_length() const;
+
 	const key_map* keys_to_use_for_scanning();
 
+	/** Opens dictionary table object using table name. For partition, we need to
+	try alternative lower/upper case names to support moving data files across
+	platforms.
+	@param[in]	table_name	name of the table/partition
+	@param[in]	norm_name	normalized name of the table/partition
+	@param[in]	is_partition	if this is a partition of a table
+	@param[in]	ignore_err	error to ignore for loading dictionary object
+	@return dictionary table object or NULL if not found */
+        static dict_table_t* open_dict_table(
+		const char*		table_name,
+		const char*		norm_name,
+		bool			is_partition,
+		dict_err_ignore_t	ignore_err);
+
 	int open(const char *name, int mode, uint test_if_locked);
+
 	handler* clone(const char *name, MEM_ROOT *mem_root);
+
 	int close(void);
+
 	double scan_time();
+
 	double read_time(uint index, uint ranges, ha_rows rows);
+
 	longlong get_memory_buffer_size() const;
 
+	int delete_all_rows();
+
 	int write_row(uchar * buf);
+
 	int update_row(const uchar * old_data, uchar * new_data);
+
 	int delete_row(const uchar * buf);
+
 	bool was_semi_consistent_read();
+
 	void try_semi_consistent_read(bool yes);
+
 	void unlock_row();
 
 	int index_init(uint index, bool sorted);
+
 	int index_end();
-	int index_read(uchar * buf, const uchar * key,
-		uint key_len, enum ha_rkey_function find_flag);
-	int index_read_idx(uchar * buf, uint index, const uchar * key,
-			   uint key_len, enum ha_rkey_function find_flag);
+
+	int index_read(
+		uchar*			buf,
+		const uchar*		key,
+		uint			key_len,
+		ha_rkey_function	find_flag);
+
 	int index_read_last(uchar * buf, const uchar * key, uint key_len);
+
 	int index_next(uchar * buf);
+
 	int index_next_same(uchar * buf, const uchar *key, uint keylen);
+
 	int index_prev(uchar * buf);
+
 	int index_first(uchar * buf);
+
 	int index_last(uchar * buf);
 
 	int rnd_init(bool scan);
+
 	int rnd_end();
+
 	int rnd_next(uchar *buf);
+
 	int rnd_pos(uchar * buf, uchar *pos);
 
 	int ft_init();
+
 	void ft_end();
-	FT_INFO *ft_init_ext(uint flags, uint inx, String* key);
+
+	FT_INFO* ft_init_ext(uint flags, uint inx, String* key);
+
+	FT_INFO* ft_init_ext_with_hints(
+		uint			inx,
+		String*			key,
+		void*			hints);
+		//Ft_hints*		hints);
+
 	int ft_read(uchar* buf);
 
+	int enable_indexes(uint mode);
+	int disable_indexes(uint mode);
+
 	void position(const uchar *record);
+
 	int info(uint);
+
 	int analyze(THD* thd,HA_CHECK_OPT* check_opt);
+
 	int optimize(THD* thd,HA_CHECK_OPT* check_opt);
+
 	int discard_or_import_tablespace(my_bool discard);
-	int extra(enum ha_extra_function operation);
+
+	int extra(ha_extra_function operation);
+
 	int reset();
+
 	int external_lock(THD *thd, int lock_type);
-	int transactional_table_lock(THD *thd, int lock_type);
+
 	int start_stmt(THD *thd, thr_lock_type lock_type);
+
 	void position(uchar *record);
-	ha_rows records_in_range(uint inx, key_range *min_key, key_range
-								*max_key);
+
+	ha_rows records_in_range(
+		uint			inx,
+		key_range*		min_key,
+		key_range*		max_key);
+
 	ha_rows estimate_rows_upper_bound();
 
+	// JAN: TODO: MySQL 5.7
+	// int records(ha_rows* num_rows);
+
 	void update_create_info(HA_CREATE_INFO* create_info);
-	int parse_table_name(const char*name,
-			     HA_CREATE_INFO* create_info,
-			     ulint flags,
-			     ulint flags2,
-			     char* norm_name,
-			     char* temp_path,
-			     char* remote_path);
+
+	int create(
+		const char*		name,
+		TABLE*			form,
+		HA_CREATE_INFO*		create_info);
+
 	const char* check_table_options(THD *thd, TABLE* table,
 		HA_CREATE_INFO*	create_info, const bool use_tablespace, const ulint file_format);
-	int create(const char *name, register TABLE *form,
-					HA_CREATE_INFO *create_info);
+
 	int truncate();
+
 	int delete_table(const char *name);
+
 	int rename_table(const char* from, const char* to);
 	int defragment_table(const char* name, const char* index_name,
 						bool async);
 	int check(THD* thd, HA_CHECK_OPT* check_opt);
 	char* update_table_comment(const char* comment);
+
 	char* get_foreign_key_create_info();
+
 	int get_foreign_key_list(THD *thd, List<FOREIGN_KEY_INFO> *f_key_list);
-	int get_parent_foreign_key_list(THD *thd,
-					List<FOREIGN_KEY_INFO> *f_key_list);
+
+	int get_parent_foreign_key_list(
+		THD*			thd,
+		List<FOREIGN_KEY_INFO>*	f_key_list);
+	int get_cascade_foreign_key_table_list(
+		THD*				thd,
+		List<st_handler_tablename>*	fk_table_list);
+
+
 	bool can_switch_engines();
+
 	uint referenced_by_foreign_key();
+
 	void free_foreign_key_create_info(char* str);
-	THR_LOCK_DATA **store_lock(THD *thd, THR_LOCK_DATA **to,
-					enum thr_lock_type lock_type);
+
+	//uint lock_count(void) const;
+
+	THR_LOCK_DATA** store_lock(
+		THD*			thd,
+		THR_LOCK_DATA**		to,
+		thr_lock_type		lock_type);
+
 	void init_table_handle_for_HANDLER();
-        virtual void get_auto_increment(ulonglong offset, ulonglong increment,
-                                        ulonglong nb_desired_values,
-                                        ulonglong *first_value,
-                                        ulonglong *nb_reserved_values);
+
+	virtual void get_auto_increment(
+		ulonglong		offset,
+		ulonglong		increment,
+		ulonglong		nb_desired_values,
+		ulonglong*		first_value,
+		ulonglong*		nb_reserved_values);
 	int reset_auto_increment(ulonglong value);
 
 	virtual bool get_error_message(int error, String *buf);
+
 	virtual bool get_foreign_dup_key(char*, uint, char*, uint);
+
 	uint8 table_cache_type();
-	/*
-	  ask handler about permission to cache table during query registration
+
+	/**
+	Ask handler about permission to cache table during query registration
 	*/
-	my_bool register_query_cache_table(THD *thd, char *table_key,
-					   uint key_length,
-					   qc_engine_callback *call_back,
-					   ulonglong *engine_data);
-	static const char *get_mysql_bin_log_name();
-	static ulonglong get_mysql_bin_log_pos();
+	my_bool register_query_cache_table(
+		THD*			thd,
+		char*			table_key,
+		uint			key_length,
+		qc_engine_callback*	call_back,
+		ulonglong*		engine_data);
+
 	bool primary_key_is_clustered();
-	int cmp_ref(const uchar *ref1, const uchar *ref2);
+
+	int cmp_ref(const uchar* ref1, const uchar* ref2);
+
 	/** On-line ALTER TABLE interface @see handler0alter.cc @{ */
 
 	/** Check if InnoDB supports a particular alter table in-place
-	@param altered_table	TABLE object for new version of table.
-	@param ha_alter_info	Structure describing changes to be done
+	@param altered_table TABLE object for new version of table.
+	@param ha_alter_info Structure describing changes to be done
 	by ALTER TABLE and holding data used during in-place alter.
 
-	@retval HA_ALTER_INPLACE_NOT_SUPPORTED	Not supported
-	@retval HA_ALTER_INPLACE_NO_LOCK	Supported
+	@retval HA_ALTER_INPLACE_NOT_SUPPORTED Not supported
+	@retval HA_ALTER_INPLACE_NO_LOCK Supported
 	@retval HA_ALTER_INPLACE_SHARED_LOCK_AFTER_PREPARE
-						Supported, but requires lock
-						during main phase and exclusive
-						lock during prepare phase.
+		Supported, but requires lock during main phase and
+		exclusive lock during prepare phase.
 	@retval HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE
-						Supported, prepare phase
-						requires exclusive lock.
-	*/
+		Supported, prepare phase requires exclusive lock.  */
 	enum_alter_inplace_result check_if_supported_inplace_alter(
 		TABLE*			altered_table,
 		Alter_inplace_info*	ha_alter_info);
+
 	/** Allows InnoDB to update internal structures with concurrent
 	writes blocked (provided that check_if_supported_inplace_alter()
 	did not return HA_ALTER_INPLACE_NO_LOCK).
 	This will be invoked before inplace_alter_table().
 
-	@param altered_table	TABLE object for new version of table.
-	@param ha_alter_info	Structure describing changes to be done
+	@param altered_table TABLE object for new version of table.
+	@param ha_alter_info Structure describing changes to be done
 	by ALTER TABLE and holding data used during in-place alter.
 
-	@retval true		Failure
-	@retval false		Success
+	@retval true Failure
+	@retval false Success
 	*/
 	bool prepare_inplace_alter_table(
 		TABLE*			altered_table,
@@ -282,12 +350,12 @@ class ha_innobase: public handler
 	The level of concurrency allowed during this operation depends
 	on the return value from check_if_supported_inplace_alter().
 
-	@param altered_table	TABLE object for new version of table.
-	@param ha_alter_info	Structure describing changes to be done
+	@param altered_table TABLE object for new version of table.
+	@param ha_alter_info Structure describing changes to be done
 	by ALTER TABLE and holding data used during in-place alter.
 
-	@retval true		Failure
-	@retval false		Success
+	@retval true Failure
+	@retval false Success
 	*/
 	bool inplace_alter_table(
 		TABLE*			altered_table,
@@ -300,22 +368,121 @@ class ha_innobase: public handler
 	inplace_alter_table() and thus might be higher than during
 	prepare_inplace_alter_table(). (E.g concurrent writes were
 	blocked during prepare, but might not be during commit).
-	@param altered_table	TABLE object for new version of table.
-	@param ha_alter_info	Structure describing changes to be done
+	@param altered_table TABLE object for new version of table.
+	@param ha_alter_info Structure describing changes to be done
 	by ALTER TABLE and holding data used during in-place alter.
-	@param commit		true => Commit, false => Rollback.
-	@retval true		Failure
-	@retval false		Success
+	@param commit true => Commit, false => Rollback.
+	@retval true Failure
+	@retval false Success
 	*/
 	bool commit_inplace_alter_table(
 		TABLE*			altered_table,
 		Alter_inplace_info*	ha_alter_info,
 		bool			commit);
 	/** @} */
-	void set_partition_owner_stats(ha_statistics *stats);
-	bool check_if_incompatible_data(HA_CREATE_INFO *info,
-					uint table_changes);
-private:
+
+	bool check_if_incompatible_data(
+		HA_CREATE_INFO*		info,
+		uint			table_changes);
+
+	/** @name Multi Range Read interface @{ */
+
+	/** Initialize multi range read @see DsMrr_impl::dsmrr_init
+	@param seq
+	@param seq_init_param
+	@param n_ranges
+	@param mode
+	@param buf */
+	int multi_range_read_init(
+		RANGE_SEQ_IF*		seq,
+		void*			seq_init_param,
+		uint			n_ranges,
+		uint			mode,
+		HANDLER_BUFFER*		buf);
+
+	/** Process next multi range read @see DsMrr_impl::dsmrr_next
+	@param range_info */
+	int multi_range_read_next(range_id_t *range_info);
+
+	/** Initialize multi range read and get information.
+	@see ha_myisam::multi_range_read_info_const
+	@see DsMrr_impl::dsmrr_info_const
+	@param keyno
+	@param seq
+	@param seq_init_param
+	@param n_ranges
+	@param bufsz
+	@param flags
+	@param cost */
+	ha_rows multi_range_read_info_const(
+		uint			keyno,
+		RANGE_SEQ_IF*		seq,
+		void*			seq_init_param,
+		uint			n_ranges,
+		uint*			bufsz,
+		uint*			flags,
+		Cost_estimate*		cost);
+
+	/** Initialize multi range read and get information.
+	@see DsMrr_impl::dsmrr_info
+	@param keyno
+	@param seq
+	@param seq_init_param
+	@param n_ranges
+	@param bufsz
+	@param flags
+	@param cost */
+	ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys,
+				      uint key_parts, uint* bufsz, uint* flags,
+				      Cost_estimate* cost);
+
+	int multi_range_read_explain_info(uint mrr_mode,
+					  char *str, size_t size);
+
+	/** Attempt to push down an index condition.
+	@param[in] keyno MySQL key number
+	@param[in] idx_cond Index condition to be checked
+	@return idx_cond if pushed; NULL if not pushed */
+	Item* idx_cond_push(uint keyno, Item* idx_cond);
+	/* @} */
+
+	/* An helper function for index_cond_func_innodb: */
+	bool is_thd_killed();
+
+protected:
+
+	/**
+	MySQL calls this method at the end of each statement. This method
+	exists for readability only, called from reset(). The name reset()
+	doesn't give any clue that it is called at the end of a statement. */
+	int end_stmt();
+
+	dberr_t innobase_get_autoinc(ulonglong* value);
+	void innobase_initialize_autoinc();
+	dberr_t innobase_lock_autoinc();
+	ulonglong innobase_peek_autoinc();
+	dberr_t innobase_set_max_autoinc(ulonglong auto_inc);
+	dberr_t innobase_reset_autoinc(ulonglong auto_inc);
+
+	/** Resets a query execution 'template'.
+	@see build_template() */
+	void reset_template();
+
+	/** Write Row Interface optimized for Intrinsic table. */
+	int intrinsic_table_write_row(uchar* record);
+
+protected:
+	inline void update_thd(THD* thd);
+	void update_thd();
+
+	int general_fetch(uchar* buf, uint direction, uint match_mode);
+	int change_active_index(uint keynr);
+	dict_index_t* innobase_get_index(uint keynr);
+
+#ifdef WITH_WSREP
+	int wsrep_append_keys(THD *thd, bool shared,
+			      const uchar* record0, const uchar* record1);
+#endif
 	/** Builds a 'template' to the prebuilt struct.
 
 	The template is used in fast retrieval of just those column
@@ -323,73 +490,56 @@ private:
 	@param whole_row true if access is needed to a whole row,
 	false if accessing individual fields is enough */
 	void build_template(bool whole_row);
-	/** Resets a query execution 'template'.
-	@see build_template() */
-	inline void reset_template();
 
-	int info_low(uint, bool);
+	virtual int info_low(uint, bool);
 
-public:
-	/** @name Multi Range Read interface @{ */
-	/** Initialize multi range read @see DsMrr_impl::dsmrr_init
-	* @param seq
-	* @param seq_init_param
-	* @param n_ranges
-	* @param mode
-	* @param buf
-	*/
-	int multi_range_read_init(RANGE_SEQ_IF* seq,
-				  void* seq_init_param,
-				  uint n_ranges, uint mode,
-				  HANDLER_BUFFER* buf);
-	/** Process next multi range read @see DsMrr_impl::dsmrr_next
-	* @param range_info
-	*/
-	int multi_range_read_next(range_id_t *range_info);
-	/** Initialize multi range read and get information.
-	* @see ha_myisam::multi_range_read_info_const
-	* @see DsMrr_impl::dsmrr_info_const
-	* @param keyno
-	* @param seq
-	* @param seq_init_param
-	* @param n_ranges
-	* @param bufsz
-	* @param flags
-	* @param cost
-	*/
-	ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF* seq,
-					   void* seq_init_param,
-					   uint n_ranges, uint* bufsz,
-					   uint* flags, Cost_estimate* cost);
-	/** Initialize multi range read and get information.
-	* @see DsMrr_impl::dsmrr_info
-	* @param keyno
-	* @param seq
-	* @param seq_init_param
-	* @param n_ranges
-	* @param bufsz
-	* @param flags
-	* @param cost
-	*/
-	ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys,
-				      uint key_parts, uint* bufsz, uint* flags,
-				      Cost_estimate* cost);
-
-        int multi_range_read_explain_info(uint mrr_mode, char *str,
-                                          size_t size);
-	/** Attempt to push down an index condition.
-	* @param[in] keyno	MySQL key number
-	* @param[in] idx_cond	Index condition to be checked
-	* @return idx_cond if pushed; NULL if not pushed
-	*/
-	class Item* idx_cond_push(uint keyno, class Item* idx_cond);
-
-private:
 	/** The multi range read session object */
-	DsMrr_impl ds_mrr;
-	/* @} */
+	DsMrr_impl		m_ds_mrr;
+
+	/** Save CPU time with prebuilt/cached data structures */
+	row_prebuilt_t*		m_prebuilt;
+
+	/** prebuilt pointer for the right prebuilt. For native
+	partitioning, points to the current partition prebuilt. */
+	row_prebuilt_t**	m_prebuilt_ptr;
+
+	/** Thread handle of the user currently using the handler;
+	this is set in external_lock function */
+	THD*			m_user_thd;
+
+	THR_LOCK_DATA	lock;
+
+	/** information for MySQL table locking */
+	INNOBASE_SHARE*		m_share;
+
+	/** buffer used in updates */
+	uchar*			m_upd_buf;
+
+	/** the size of upd_buf in bytes */
+	ulint			m_upd_buf_size;
+
+	/** Flags that specificy the handler instance (table) capability. */
+	Table_flags		m_int_table_flags;
+
+	/** Index into the server's primkary keye meta-data table->key_info{} */
+	uint			m_primary_key;
+
+	/** this is set to 1 when we are starting a table scan but have
+	not yet fetched any row, else false */
+	bool			m_start_of_scan;
+
+	/*!< match mode of the latest search: ROW_SEL_EXACT,
+	ROW_SEL_EXACT_PREFIX, or undefined */
+	uint			m_last_match_mode;
+
+	/** number of write_row() calls */
+	uint			m_num_write_row;
+
+        /** If mysql has locked with external_lock() */
+        bool                    m_mysql_has_locked;
 };
 
+
 /* Some accessor functions which the InnoDB plugin needs, but which
 can not be added to mysql/plugin.h as part of the public interface;
 the definitions are bracketed with #ifdef INNODB_COMPATIBILITY_HOOKS */
@@ -399,70 +549,50 @@ the definitions are bracketed with #ifdef INNODB_COMPATIBILITY_HOOKS */
 #endif
 
 LEX_STRING* thd_query_string(MYSQL_THD thd);
+size_t thd_query_safe(MYSQL_THD thd, char *buf, size_t buflen);
 
 extern "C" {
 
 struct charset_info_st *thd_charset(MYSQL_THD thd);
 
-/**
-  Check if a user thread is a replication slave thread
-  @param thd  user thread
-  @retval 0 the user thread is not a replication slave thread
-  @retval 1 the user thread is a replication slave thread
-*/
+/** Check if a user thread is a replication slave thread
+@param thd user thread
+@retval 0 the user thread is not a replication slave thread
+@retval 1 the user thread is a replication slave thread */
 int thd_slave_thread(const MYSQL_THD thd);
 
-/**
-  Check if a user thread is running a non-transactional update
-  @param thd  user thread
-  @retval 0 the user thread is not running a non-transactional update
-  @retval 1 the user thread is running a non-transactional update
-*/
+/** Check if a user thread is running a non-transactional update
+@param thd user thread
+@retval 0 the user thread is not running a non-transactional update
+@retval 1 the user thread is running a non-transactional update */
 int thd_non_transactional_update(const MYSQL_THD thd);
 
-/**
-  Get the user thread's binary logging format
-  @param thd  user thread
-  @return Value to be used as index into the binlog_format_names array
-*/
+/** Get the user thread's binary logging format
+@param thd user thread
+@return Value to be used as index into the binlog_format_names array */
 int thd_binlog_format(const MYSQL_THD thd);
 
-/**
-  Mark transaction to rollback and mark error as fatal to a sub-statement.
-  @param  thd   Thread handle
-  @param  all   TRUE <=> rollback main transaction.
-*/
-void thd_mark_transaction_to_rollback(MYSQL_THD thd, bool all);
-
-/**
-  Check if binary logging is filtered for thread's current db.
-  @param  thd   Thread handle
-  @retval 1 the query is not filtered, 0 otherwise.
-*/
+/** Check if binary logging is filtered for thread's current db.
+@param thd Thread handle
+@retval 1 the query is not filtered, 0 otherwise. */
 bool thd_binlog_filter_ok(const MYSQL_THD thd);
 
-/**
-  Check if the query may generate row changes which
-  may end up in the binary.
-  @param  thd   Thread handle
-  @return 1 the query may generate row changes, 0 otherwise.
+/** Check if the query may generate row changes which may end up in the binary.
+@param thd Thread handle
+@retval 1 the query may generate row changes, 0 otherwise.
 */
 bool thd_sqlcom_can_generate_row_events(const MYSQL_THD thd);
 
-/**
-  Gets information on the durability property requested by
-  a thread.
-  @param  thd   Thread handle
-  @return a durability property.
-*/
-enum durability_properties thd_get_durability_property(const MYSQL_THD thd);
+/** Gets information on the durability property requested by a thread.
+@param thd Thread handle
+@return a durability property. */
+durability_properties thd_get_durability_property(const MYSQL_THD thd);
 
 /** Is strict sql_mode set.
-@param thd	Thread object
-@return True if sql_mode has strict mode (all or trans), false otherwise.
-*/
-bool thd_is_strict_mode(const MYSQL_THD thd)
-MY_ATTRIBUTE((nonnull));
+@param thd Thread object
+@return True if sql_mode has strict mode (all or trans), false otherwise. */
+bool thd_is_strict_mode(const MYSQL_THD thd);
+
 } /* extern "C" */
 
 /** Get the file name and position of the MySQL binlog corresponding to the
@@ -470,13 +600,41 @@ MY_ATTRIBUTE((nonnull));
  */
 extern void mysql_bin_log_commit_pos(THD *thd, ulonglong *out_pos, const char **out_file);
 
+/** Get the partition_info working copy.
+@param	thd	Thread object.
+@return	NULL or pointer to partition_info working copy. */
+/* JAN: TODO: MySQL 5.7 Partitioning
+partition_info*
+thd_get_work_part_info(
+	THD*	thd);
+*/
+
+struct trx_t;
 #ifdef WITH_WSREP
 #include <mysql/service_wsrep.h>
+//extern "C" int wsrep_trx_order_before(void *thd1, void *thd2);
+
+extern "C" bool wsrep_thd_is_wsrep_on(THD *thd);
+
+
+extern "C" void wsrep_thd_set_exec_mode(THD *thd, enum wsrep_exec_mode mode);
+extern "C" void wsrep_thd_set_query_state(
+	THD *thd, enum wsrep_query_state state);
+
+extern "C" void wsrep_thd_set_trx_to_replay(THD *thd, uint64 trx_id);
+
+extern "C" uint32 wsrep_thd_wsrep_rand(THD *thd);
+extern "C" time_t wsrep_thd_query_start(THD *thd);
+extern "C" query_id_t wsrep_thd_query_id(THD *thd);
+extern "C" query_id_t wsrep_thd_wsrep_last_query_id(THD *thd);
+extern "C" void wsrep_thd_set_wsrep_last_query_id(THD *thd, query_id_t id);
 #endif
 
 extern const struct _ft_vft ft_vft_result;
 
-/* Structure Returned by ha_innobase::ft_init_ext() */
+#define FTS_NGRAM_PARSER_NAME "ngram"
+
+/** Structure Returned by ha_innobase::ft_init_ext() */
 typedef struct new_ft_info
 {
 	struct _ft_vft		*please;
@@ -485,49 +643,533 @@ typedef struct new_ft_info
 	fts_result_t*		ft_result;
 } NEW_FT_INFO;
 
-/*********************************************************************//**
+/**
 Allocates an InnoDB transaction for a MySQL handler object.
-@return	InnoDB transaction handle */
+@return InnoDB transaction handle */
 trx_t*
 innobase_trx_allocate(
-/*==================*/
 	MYSQL_THD	thd);	/*!< in: user thread handle */
 
+/** Match index columns between MySQL and InnoDB.
+This function checks whether the index column information
+is consistent between KEY info from mysql and that from innodb index.
+@param[in]	key_info	Index info from mysql
+@param[in]	index_info	Index info from InnoDB
+@return true if all column types match. */
+bool
+innobase_match_index_columns(
+	const KEY*		key_info,
+	const dict_index_t*	index_info);
+
 /*********************************************************************//**
 This function checks each index name for a table against reserved
 system default primary index name 'GEN_CLUST_INDEX'. If a name
 matches, this function pushes an warning message to the client,
 and returns true.
 @return true if the index name matches the reserved name */
-UNIV_INTERN
 bool
 innobase_index_name_is_reserved(
-/*============================*/
-	THD*		thd,		/*!< in/out: MySQL connection */
-	const KEY*	key_info,	/*!< in: Indexes to be created */
-	ulint		num_of_keys)	/*!< in: Number of indexes to
-					be created. */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	THD*			thd,		/*!< in/out: MySQL connection */
+	const KEY*		key_info,	/*!< in: Indexes to be
+						created */
+	ulint			num_of_keys)	/*!< in: Number of indexes to
+						be created. */
+	MY_ATTRIBUTE((warn_unused_result));
+
+extern const char reserved_file_per_table_space_name[];
 
-/*****************************************************************//**
 #ifdef WITH_WSREP
-extern "C" int wsrep_trx_is_aborting(void *thd_ptr);
+//extern "C" int wsrep_trx_is_aborting(void *thd_ptr);
 #endif
-Determines InnoDB table flags.
-@retval true if successful, false if error */
-UNIV_INTERN
+
+/** Check if the explicit tablespace targeted is file_per_table.
+@param[in]	create_info	Metadata for the table to create.
+@return true if the table is intended to use a file_per_table tablespace. */
+UNIV_INLINE
 bool
-innobase_table_flags(
-/*=================*/
-	const TABLE*		form,		/*!< in: table */
-	const HA_CREATE_INFO*	create_info,	/*!< in: information
-						on table columns and indexes */
-	THD*			thd,		/*!< in: connection */
-	bool			use_tablespace,	/*!< in: whether to create
-						outside system tablespace */
-	ulint*			flags,		/*!< out: DICT_TF flags */
-	ulint*			flags2)		/*!< out: DICT_TF2 flags */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+tablespace_is_file_per_table(
+	const HA_CREATE_INFO*	create_info)
+{
+	return(create_info->tablespace != NULL
+	       && (0 == strcmp(create_info->tablespace,
+			       reserved_file_per_table_space_name)));
+}
+
+/** Check if table will be explicitly put in an existing shared general
+or system tablespace.
+@param[in]	create_info	Metadata for the table to create.
+@return true if the table will use a shared general or system tablespace. */
+UNIV_INLINE
+bool
+tablespace_is_shared_space(
+const HA_CREATE_INFO*	create_info)
+{
+	return(create_info->tablespace != NULL
+		&& create_info->tablespace[0] != '\0'
+		&& (0 != strcmp(create_info->tablespace,
+		reserved_file_per_table_space_name)));
+}
+
+/** Check if table will be explicitly put in a general tablespace.
+@param[in]	create_info	Metadata for the table to create.
+@return true if the table will use a general tablespace. */
+UNIV_INLINE
+bool
+tablespace_is_general_space(
+	const HA_CREATE_INFO*	create_info)
+{
+	return(create_info->tablespace != NULL
+		&& create_info->tablespace[0] != '\0'
+		&& (0 != strcmp(create_info->tablespace,
+				reserved_file_per_table_space_name))
+		&& (0 != strcmp(create_info->tablespace,
+				reserved_temporary_space_name))
+		&& (0 != strcmp(create_info->tablespace,
+				reserved_system_space_name)));
+}
+
+/** Parse hint for table and its indexes, and update the information
+in dictionary.
+@param[in]	thd		Connection thread
+@param[in,out]	table		Target table
+@param[in]	table_share	Table definition */
+void
+innobase_parse_hint_from_comment(
+	THD*			thd,
+	dict_table_t*		table,
+	const TABLE_SHARE*	table_share);
+
+/** Class for handling create table information. */
+class create_table_info_t
+{
+public:
+	/** Constructor.
+	Used in two ways:
+	- all but file_per_table is used, when creating the table.
+	- all but name/path is used, when validating options and using flags. */
+	create_table_info_t(
+		THD*		thd,
+		TABLE*		form,
+		HA_CREATE_INFO*	create_info,
+		char*		table_name,
+		char*		temp_path,
+		char*		remote_path,
+		char*		tablespace)
+	:m_thd(thd),
+	m_form(form),
+	m_create_info(create_info),
+	m_table_name(table_name),
+	m_temp_path(temp_path),
+	m_remote_path(remote_path),
+	m_tablespace(tablespace),
+	m_innodb_file_per_table(srv_file_per_table)
+	{}
+
+	/** Initialize the object. */
+	int initialize();
+
+	/** Set m_tablespace_type. */
+	void set_tablespace_type(bool table_being_altered_is_file_per_table);
+
+	/** Create the internal innodb table. */
+	int create_table();
+
+	/** Update the internal data dictionary. */
+	int create_table_update_dict();
+
+	/** Validates the create options. Checks that the options
+	KEY_BLOCK_SIZE, ROW_FORMAT, DATA DIRECTORY, TEMPORARY & TABLESPACE
+	are compatible with each other and other settings.
+	These CREATE OPTIONS are not validated here unless innodb_strict_mode
+	is on. With strict mode, this function will report each problem it
+	finds using a custom message with error code
+	ER_ILLEGAL_HA_CREATE_OPTION, not its built-in message.
+	@return NULL if valid, string name of bad option if not. */
+	const char* create_options_are_invalid();
+
+	/** Validates engine specific table options not handled by
+	SQL-parser.
+	@return NULL if valid, string name of bad option if not. */
+	const char* check_table_options();
+
+	/** Validate DATA DIRECTORY option. */
+	bool create_option_data_directory_is_valid();
+
+	/** Validate TABLESPACE option. */
+	bool create_option_tablespace_is_valid();
+
+	/** Validate COMPRESSION option. */
+	bool create_option_compression_is_valid();
+
+	/** Prepare to create a table. */
+	int prepare_create_table(const char*		name);
+
+	void allocate_trx();
+
+	/** Determines InnoDB table flags.
+	If strict_mode=OFF, this will adjust the flags to what should be assumed.
+	@retval true if successful, false if error */
+	bool innobase_table_flags();
+
+	/** Set flags and append '/' to remote path if necessary. */
+	void set_remote_path_flags();
+
+	/** Get table flags. */
+	ulint flags() const
+	{ return(m_flags); }
+
+	/** Get table flags2. */
+	ulint flags2() const
+	{ return(m_flags2); }
+
+	/** Get trx. */
+	trx_t* trx() const
+	{ return(m_trx); }
+
+	/** Return table name. */
+	const char* table_name() const
+	{ return(m_table_name); }
+
+	THD* thd() const
+	{ return(m_thd); }
+
+	inline bool is_intrinsic_temp_table() const
+	{
+		/* DICT_TF2_INTRINSIC implies DICT_TF2_TEMPORARY */
+		ut_ad(!(m_flags2 & DICT_TF2_INTRINSIC)
+		      || (m_flags2 & DICT_TF2_TEMPORARY));
+		return((m_flags2 & DICT_TF2_INTRINSIC) != 0);
+	}
+
+	/** Normalizes a table name string.
+	A normalized name consists of the database name catenated to '/' and
+	table name. An example: test/mytable. On Windows normalization puts
+	both the database name and the table name always to lower case if
+	"set_lower_case" is set to true.
+	@param[in,out]	norm_name	Buffer to return the normalized name in.
+	@param[in]	name		Table name string.
+	@param[in]	set_lower_case	True if we want to set name to lower
+					case. */
+	static void normalize_table_name_low(
+		char*           norm_name,
+		const char*     name,
+		ibool           set_lower_case);
+
+private:
+	/** Parses the table name into normal name and either temp path or
+	remote path if needed.*/
+	int
+	parse_table_name(
+		const char*	name);
+
+	/** Create the internal innodb table definition. */
+	int create_table_def();
+
+	/** Connection thread handle. */
+	THD*		m_thd;
+
+	/** InnoDB transaction handle. */
+	trx_t*		m_trx;
+
+	/** Information on table columns and indexes. */
+	const TABLE*	m_form;
+
+	/** Create options. */
+	HA_CREATE_INFO*	m_create_info;
+
+	/** Table name */
+	char*		m_table_name;
+	/** If this is a table explicitly created by the user with the
+	TEMPORARY keyword, then this parameter is the dir path where the
+	table should be placed if we create an .ibd file for it
+	(no .ibd extension in the path, though).
+	Otherwise this is a zero length-string */
+	char*		m_temp_path;
+
+	/** Remote path (DATA DIRECTORY) or zero length-string */
+	char*		m_remote_path;
+
+	/** Tablespace name or zero length-string. */
+	char*		m_tablespace;
+
+	/** Local copy of srv_file_per_table. */
+	bool		m_innodb_file_per_table;
+
+	/** Allow file_per_table for this table either because:
+	1) the setting innodb_file_per_table=on,
+	2) it was explicitly requested by tablespace=innodb_file_per_table.
+	3) the table being altered is currently file_per_table */
+	bool		m_allow_file_per_table;
+
+	/** After all considerations, this shows whether we will actually
+	create a table and tablespace using file-per-table. */
+	bool		m_use_file_per_table;
+
+	/** Using DATA DIRECTORY */
+	bool		m_use_data_dir;
+
+	/** Using a Shared General Tablespace */
+	bool		m_use_shared_space;
+
+	/** Table flags */
+	ulint		m_flags;
+
+	/** Table flags2 */
+	ulint		m_flags2;
+};
+
+/**
+Retrieve the FTS Relevance Ranking result for doc with doc_id
+of prebuilt->fts_doc_id
+@return the relevance ranking value */
+float
+innobase_fts_retrieve_ranking(
+	FT_INFO*	fts_hdl);	/*!< in: FTS handler */
+
+/**
+Find and Retrieve the FTS Relevance Ranking result for doc with doc_id
+of prebuilt->fts_doc_id
+@return the relevance ranking value */
+float
+innobase_fts_find_ranking(
+	FT_INFO*	fts_hdl,	/*!< in: FTS handler */
+	uchar*		record,		/*!< in: Unused */
+	uint		len);		/*!< in: Unused */
+
+/**
+Free the memory for the FTS handler */
+void
+innobase_fts_close_ranking(
+	FT_INFO*	fts_hdl);	/*!< in: FTS handler */
+
+/**
+Initialize the table FTS stopword list
+@return TRUE if success */
+ibool
+innobase_fts_load_stopword(
+/*=======================*/
+	dict_table_t*	table,		/*!< in: Table has the FTS */
+	trx_t*		trx,		/*!< in: transaction */
+	THD*		thd)		/*!< in: current thread */
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Some defines for innobase_fts_check_doc_id_index() return value */
+enum fts_doc_id_index_enum {
+	FTS_INCORRECT_DOC_ID_INDEX,
+	FTS_EXIST_DOC_ID_INDEX,
+	FTS_NOT_EXIST_DOC_ID_INDEX
+};
+
+/**
+Check whether the table has a unique index with FTS_DOC_ID_INDEX_NAME
+on the Doc ID column.
+@return the status of the FTS_DOC_ID index */
+fts_doc_id_index_enum
+innobase_fts_check_doc_id_index(
+	const dict_table_t*	table,		/*!< in: table definition */
+	const TABLE*		altered_table,	/*!< in: MySQL table
+						that is being altered */
+	ulint*			fts_doc_col_no)	/*!< out: The column number for
+						Doc ID */
+	MY_ATTRIBUTE((warn_unused_result));
+
+/**
+Check whether the table has a unique index with FTS_DOC_ID_INDEX_NAME
+on the Doc ID column in MySQL create index definition.
+@return FTS_EXIST_DOC_ID_INDEX if there exists the FTS_DOC_ID index,
+FTS_INCORRECT_DOC_ID_INDEX if the FTS_DOC_ID index is of wrong format */
+fts_doc_id_index_enum
+innobase_fts_check_doc_id_index_in_def(
+	ulint		n_key,		/*!< in: Number of keys */
+	const KEY*	key_info)	/*!< in: Key definitions */
+	MY_ATTRIBUTE((warn_unused_result));
+
+/**
+@return version of the extended FTS API */
+uint
+innobase_fts_get_version();
+
+/**
+@return Which part of the extended FTS API is supported */
+ulonglong
+innobase_fts_flags();
+
+/**
+Find and Retrieve the FTS doc_id for the current result row
+@return the document ID */
+ulonglong
+innobase_fts_retrieve_docid(
+/*========================*/
+	FT_INFO_EXT*	fts_hdl);	/*!< in: FTS handler */
+
+/**
+Find and retrieve the size of the current result
+@return number of matching rows */
+ulonglong
+innobase_fts_count_matches(
+/*=======================*/
+	FT_INFO_EXT*	fts_hdl);	/*!< in: FTS handler */
+
+/**
+Copy table flags from MySQL's HA_CREATE_INFO into an InnoDB table object.
+Those flags are stored in .frm file and end up in the MySQL table object,
+but are frequently used inside InnoDB so we keep their copies into the
+InnoDB table object. */
+void
+innobase_copy_frm_flags_from_create_info(
+	dict_table_t*		innodb_table,	/*!< in/out: InnoDB table */
+	const HA_CREATE_INFO*	create_info);	/*!< in: create info */
+
+/**
+Copy table flags from MySQL's TABLE_SHARE into an InnoDB table object.
+Those flags are stored in .frm file and end up in the MySQL table object,
+but are frequently used inside InnoDB so we keep their copies into the
+InnoDB table object. */
+void
+innobase_copy_frm_flags_from_table_share(
+	dict_table_t*		innodb_table,	/*!< in/out: InnoDB table */
+	const TABLE_SHARE*	table_share);	/*!< in: table share */
+
+/** Set up base columns for virtual column
+@param[in]	table	the InnoDB table
+@param[in]	field	MySQL field
+@param[in,out]	v_col	virtual column to be set up */
+void
+innodb_base_col_setup(
+	dict_table_t*	table,
+	const Field*	field,
+	dict_v_col_t*	v_col);
+
+/** Set up base columns for stored column
+@param[in]	table	InnoDB table
+@param[in]	field	MySQL field
+@param[in,out]	s_col	stored column */
+void
+innodb_base_col_setup_for_stored(
+	const dict_table_t*	table,
+	const Field*		field,
+	dict_s_col_t*		s_col);
+
+/** whether this is a stored column */
+// JAN: TODO: MySQL 5.7 virtual fields
+//#define innobase_is_s_fld(field) ((field)->gcol_info && (field)->stored_in_db)
+#define innobase_is_s_fld(field) (field == NULL)
+// JAN: TODO: MySQL 5.7 virtual fields
+/** whether this is a computed virtual column */
+//#define innobase_is_v_fld(field) ((field)->gcol_info && !(field)->stored_in_db)
+#define innobase_is_v_fld(field) (field == NULL)
+
+/** Release temporary latches.
+Call this function when mysqld passes control to the client. That is to
+avoid deadlocks on the adaptive hash S-latch possibly held by thd. For more
+documentation, see handler.cc.
+@param[in]	hton	Handlerton.
+@param[in]	thd	MySQL thread.
+@return 0 */
+int
+innobase_release_temporary_latches(
+	handlerton*	hton,
+	THD*		thd);
+
+/** Always normalize table name to lower case on Windows */
+#ifdef _WIN32
+#define normalize_table_name(norm_name, name)           \
+	create_table_info_t::normalize_table_name_low(norm_name, name, TRUE)
+#else
+#define normalize_table_name(norm_name, name)           \
+	create_table_info_t::normalize_table_name_low(norm_name, name, FALSE)
+#endif /* _WIN32 */
+
+/** Obtain the InnoDB transaction of a MySQL thread.
+@param[in,out]	thd	MySQL thread handler.
+@return reference to transaction pointer */
+trx_t*& thd_to_trx(THD*	thd);
+
+/** Converts an InnoDB error code to a MySQL error code.
+Also tells to MySQL about a possible transaction rollback inside InnoDB caused
+by a lock wait timeout or a deadlock.
+@param[in]	error	InnoDB error code.
+@param[in]	flags	InnoDB table flags or 0.
+@param[in]	thd	MySQL thread or NULL.
+@return MySQL error code */
+int
+convert_error_code_to_mysql(
+	dberr_t	error,
+	ulint	flags,
+	THD*	thd);
+
+/** Converts a search mode flag understood by MySQL to a flag understood
+by InnoDB.
+@param[in]	find_flag	MySQL search mode flag.
+@return	InnoDB search mode flag. */
+page_cur_mode_t
+convert_search_mode_to_innobase(
+	enum ha_rkey_function	find_flag);
+
+/** Commits a transaction in an InnoDB database.
+@param[in]	trx	Transaction handle. */
+void
+innobase_commit_low(
+	trx_t*	trx);
+
+extern my_bool	innobase_stats_on_metadata;
+
+/** Calculate Record Per Key value.
+Need to exclude the NULL value if innodb_stats_method is set to "nulls_ignored"
+@param[in]	index	InnoDB index.
+@param[in]	i	The column we are calculating rec per key.
+@param[in]	records	Estimated total records.
+@return estimated record per key value */
+/* JAN: TODO: MySQL 5.7  */
+typedef float rec_per_key_t;
+rec_per_key_t
+innodb_rec_per_key(
+	dict_index_t*	index,
+	ulint		i,
+	ha_rows		records);
+
+/** Build template for the virtual columns and their base columns
+@param[in]	table		MySQL TABLE
+@param[in]	ib_table	InnoDB dict_table_t
+@param[in,out]	s_templ		InnoDB template structure
+@param[in]	add_v		new virtual columns added along with
+				add index call
+@param[in]	locked		true if innobase_share_mutex is held
+@param[in]	share_tbl_name	original MySQL table name */
+void
+innobase_build_v_templ(
+	const TABLE*		table,
+	const dict_table_t*	ib_table,
+	dict_vcol_templ_t*	s_templ,
+	const dict_add_v_col_t*	add_v,
+	bool			locked,
+	const char*		share_tbl_name);
+
+/** callback used by MySQL server layer to initialized
+the table virtual columns' template
+@param[in]	table		MySQL TABLE
+@param[in,out]	ib_table	InnoDB dict_table_t */
+void
+innobase_build_v_templ_callback(
+        const TABLE*	table,
+        void*		ib_table);
+
+/** Callback function definition, used by MySQL server layer to initialized
+the table virtual columns' template */
+typedef void (*my_gcolumn_templatecallback_t)(const TABLE*, void*);
+
+/********************************************************************//**
+Helper function to push frm mismatch error to error log and
+if needed to sql-layer. */
+UNIV_INTERN
+void
+ib_push_frm_error(
+/*==============*/
+	THD*		thd,		/*!< in: MySQL thd */
+	dict_table_t*	ib_table,	/*!< in: InnoDB table */
+	TABLE*		table,		/*!< in: MySQL table */
+	ulint		n_keys,		/*!< in: InnoDB #keys */
+	bool		push_warning);	/*!< in: print warning ? */
 
 /*****************************************************************//**
 Validates the create options. We may build on this function
@@ -545,146 +1187,3 @@ create_options_are_invalid(
 	HA_CREATE_INFO*	create_info,	/*!< in: create info. */
 	bool		use_tablespace)	/*!< in: srv_file_per_table */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-/*********************************************************************//**
-Retrieve the FTS Relevance Ranking result for doc with doc_id
-of prebuilt->fts_doc_id
-@return the relevance ranking value */
-UNIV_INTERN
-float
-innobase_fts_retrieve_ranking(
-/*==========================*/
-	FT_INFO*	fts_hdl);	/*!< in: FTS handler */
-
-/*********************************************************************//**
-Find and Retrieve the FTS Relevance Ranking result for doc with doc_id
-of prebuilt->fts_doc_id
-@return the relevance ranking value */
-UNIV_INTERN
-float
-innobase_fts_find_ranking(
-/*======================*/
-	FT_INFO*	fts_hdl,	/*!< in: FTS handler */
-	uchar*		record,		/*!< in: Unused */
-	uint		len);		/*!< in: Unused */
-/*********************************************************************//**
-Free the memory for the FTS handler */
-UNIV_INTERN
-void
-innobase_fts_close_ranking(
-/*=======================*/
-	FT_INFO*	fts_hdl)	/*!< in: FTS handler */
-	MY_ATTRIBUTE((nonnull));
-/*****************************************************************//**
-Initialize the table FTS stopword list
-@return TRUE if success */
-UNIV_INTERN
-ibool
-innobase_fts_load_stopword(
-/*=======================*/
-	dict_table_t*	table,		/*!< in: Table has the FTS */
-	trx_t*		trx,		/*!< in: transaction */
-	THD*		thd)		/*!< in: current thread */
-	MY_ATTRIBUTE((nonnull(1,3), warn_unused_result));
-
-/** Some defines for innobase_fts_check_doc_id_index() return value */
-enum fts_doc_id_index_enum {
-	FTS_INCORRECT_DOC_ID_INDEX,
-	FTS_EXIST_DOC_ID_INDEX,
-	FTS_NOT_EXIST_DOC_ID_INDEX
-};
-
-/*******************************************************************//**
-Check whether the table has a unique index with FTS_DOC_ID_INDEX_NAME
-on the Doc ID column.
-@return the status of the FTS_DOC_ID index */
-UNIV_INTERN
-enum fts_doc_id_index_enum
-innobase_fts_check_doc_id_index(
-/*============================*/
-	const dict_table_t*	table,		/*!< in: table definition */
-	const TABLE*		altered_table,	/*!< in: MySQL table
-						that is being altered */
-	ulint*			fts_doc_col_no)	/*!< out: The column number for
-						Doc ID */
-	MY_ATTRIBUTE((warn_unused_result));
-
-/*******************************************************************//**
-Check whether the table has a unique index with FTS_DOC_ID_INDEX_NAME
-on the Doc ID column in MySQL create index definition.
-@return FTS_EXIST_DOC_ID_INDEX if there exists the FTS_DOC_ID index,
-FTS_INCORRECT_DOC_ID_INDEX if the FTS_DOC_ID index is of wrong format */
-UNIV_INTERN
-enum fts_doc_id_index_enum
-innobase_fts_check_doc_id_index_in_def(
-/*===================================*/
-	ulint		n_key,		/*!< in: Number of keys */
-	const KEY*	key_info)	/*!< in: Key definitions */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-/***********************************************************************
-@return version of the extended FTS API */
-uint
-innobase_fts_get_version();
-
-/***********************************************************************
-@return Which part of the extended FTS API is supported */
-ulonglong
-innobase_fts_flags();
-
-/***********************************************************************
-Find and Retrieve the FTS doc_id for the current result row
-@return the document ID */
-ulonglong
-innobase_fts_retrieve_docid(
-/*============================*/
-	FT_INFO_EXT*	fts_hdl);	/*!< in: FTS handler */
-
-/***********************************************************************
-Find and retrieve the size of the current result
-@return number of matching rows */
-ulonglong
-innobase_fts_count_matches(
-/*============================*/
-	FT_INFO_EXT*	fts_hdl);	/*!< in: FTS handler */
-
-/** "GEN_CLUST_INDEX" is the name reserved for InnoDB default
-system clustered index when there is no primary key. */
-extern const char innobase_index_reserve_name[];
-
-/*********************************************************************//**
-Copy table flags from MySQL's HA_CREATE_INFO into an InnoDB table object.
-Those flags are stored in .frm file and end up in the MySQL table object,
-but are frequently used inside InnoDB so we keep their copies into the
-InnoDB table object. */
-UNIV_INTERN
-void
-innobase_copy_frm_flags_from_create_info(
-/*=====================================*/
-	dict_table_t*		innodb_table,	/*!< in/out: InnoDB table */
-	const HA_CREATE_INFO*	create_info);	/*!< in: create info */
-
-/*********************************************************************//**
-Copy table flags from MySQL's TABLE_SHARE into an InnoDB table object.
-Those flags are stored in .frm file and end up in the MySQL table object,
-but are frequently used inside InnoDB so we keep their copies into the
-InnoDB table object. */
-UNIV_INTERN
-void
-innobase_copy_frm_flags_from_table_share(
-/*=====================================*/
-	dict_table_t*		innodb_table,	/*!< in/out: InnoDB table */
-	const TABLE_SHARE*	table_share);	/*!< in: table share */
-
-/********************************************************************//**
-Helper function to push frm mismatch error to error log and
-if needed to sql-layer. */
-UNIV_INTERN
-void
-ib_push_frm_error(
-/*==============*/
-	THD*		thd,		/*!< in: MySQL thd */
-	dict_table_t*	ib_table,	/*!< in: InnoDB table */
-	TABLE*		table,		/*!< in: MySQL table */
-	ulint		n_keys,		/*!< in: InnoDB #keys */
-	bool		push_warning);	/*!< in: print warning ? */
diff --git a/storage/innobase/handler/ha_innopart.cc b/storage/innobase/handler/ha_innopart.cc
new file mode 100644
index 00000000000..5fd02dfa016
--- /dev/null
+++ b/storage/innobase/handler/ha_innopart.cc
@@ -0,0 +1,4461 @@
+/*****************************************************************************
+
+Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/** @file ha_innopart.cc
+Code for native partitioning in InnoDB.
+
+Created Nov 22, 2013 Mattias Jonsson */
+
+#include "univ.i"
+
+/* Include necessary SQL headers */
+#include <debug_sync.h>
+#include <log.h>
+#include <strfunc.h>
+#include <sql_acl.h>
+#include <sql_class.h>
+#include <sql_show.h>
+#include <sql_table.h>
+#include <my_check_opt.h>
+
+/* Include necessary InnoDB headers */
+#include "btr0sea.h"
+#include "dict0dict.h"
+#include "dict0stats.h"
+#include "lock0lock.h"
+#include "row0import.h"
+#include "row0merge.h"
+#include "row0mysql.h"
+#include "row0quiesce.h"
+#include "row0sel.h"
+#include "row0ins.h"
+#include "row0upd.h"
+#include "fsp0sysspace.h"
+#include "ut0ut.h"
+
+#include "ha_innodb.h"
+#include "ha_innopart.h"
+#include "partition_info.h"
+#include "key.h"
+
+#define INSIDE_HA_INNOPART_CC
+
+/* To be backwards compatible we also fold partition separator on windows. */
+#ifdef _WIN32
+const char* part_sep = "#p#";
+const char* sub_sep = "#sp#";
+#else
+const char* part_sep = "#P#";
+const char* sub_sep = "#SP#";
+#endif /* _WIN32 */
+
+/* Partition separator for *nix platforms */
+const char* part_sep_nix = "#P#";
+const char* sub_sep_nix = "#SP#";
+
+extern char*	innobase_file_format_max;
+
+Ha_innopart_share::Ha_innopart_share(
+	TABLE_SHARE*	table_share)
+	:
+	Partition_share(),
+	m_table_parts(),
+	m_index_mapping(),
+	m_tot_parts(),
+	m_index_count(),
+	m_ref_count(),
+	m_table_share(table_share)
+{}
+
+Ha_innopart_share::~Ha_innopart_share()
+{
+	ut_ad(m_ref_count == 0);
+	if (m_table_parts != NULL) {
+		ut_free(m_table_parts);
+		m_table_parts = NULL;
+	}
+	if (m_index_mapping != NULL) {
+		ut_free(m_index_mapping);
+		m_index_mapping = NULL;
+	}
+}
+
+/** Fold to lower case if windows or lower_case_table_names == 1.
+@param[in,out]	s	String to fold.*/
+void
+Ha_innopart_share::partition_name_casedn_str(
+	char*	s)
+{
+#ifdef _WIN32
+	innobase_casedn_str(s);
+#endif
+}
+
+/** Translate and append partition name.
+@param[out]	to	String to write in filesystem charset
+@param[in]	from	Name in system charset
+@param[in]	sep	Separator
+@param[in]	len	Max length of to buffer
+@return	length of written string. */
+size_t
+Ha_innopart_share::append_sep_and_name(
+	char*		to,
+	const char*	from,
+	const char*	sep,
+	size_t		len)
+{
+	size_t	ret;
+	size_t	sep_len = strlen(sep);
+
+	ut_ad(len > sep_len + strlen(from));
+	ut_ad(to != NULL);
+	ut_ad(from != NULL);
+	ut_ad(from[0] != '\0');
+	memcpy(to, sep, sep_len);
+
+	ret = tablename_to_filename(from, to + sep_len,
+		len - sep_len);
+
+	/* Don't convert to lower case for nix style name. */
+	if (strcmp(sep, part_sep_nix) != 0
+	    && strcmp(sep, sub_sep_nix) != 0) {
+
+		partition_name_casedn_str(to);
+	}
+
+	return(ret + sep_len);
+}
+
+/** Copy a cached MySQL row.
+If requested, also avoids overwriting non-read columns.
+@param[out]	buf		Row in MySQL format.
+@param[in]	cached_row	Which row to copy. */
+inline
+void
+ha_innopart::copy_cached_row(
+	uchar*		buf,
+	const uchar*	cached_row)
+{
+	if (m_prebuilt->keep_other_fields_on_keyread) {
+		row_sel_copy_cached_fields_for_mysql(buf, cached_row,
+			m_prebuilt);
+	} else {
+		memcpy(buf, cached_row, m_rec_length);
+	}
+}
+
+/** Open one partition.
+@param[in]	part_id		Partition id to open.
+@param[in]	partition_name	Name of internal innodb table to open.
+@return	false on success else true. */
+bool
+Ha_innopart_share::open_one_table_part(
+	uint		part_id,
+	const char*	partition_name)
+{
+	char	norm_name[FN_REFLEN];
+
+	normalize_table_name(norm_name, partition_name);
+	m_table_parts[part_id] =
+		ha_innobase::open_dict_table(partition_name, norm_name,
+					     TRUE, DICT_ERR_IGNORE_NONE);
+
+	if (m_table_parts[part_id] == NULL) {
+		return(true);
+	}
+
+	dict_table_t *ib_table = m_table_parts[part_id];
+	if ((!DICT_TF2_FLAG_IS_SET(ib_table, DICT_TF2_FTS_HAS_DOC_ID)
+	     && m_table_share->fields
+		 != (dict_table_get_n_user_cols(ib_table)
+		     + dict_table_get_n_v_cols(ib_table)))
+	    || (DICT_TF2_FLAG_IS_SET(ib_table, DICT_TF2_FTS_HAS_DOC_ID)
+		&& (m_table_share->fields
+		    != dict_table_get_n_user_cols(ib_table)
+		       + dict_table_get_n_v_cols(ib_table) - 1))) {
+		ib::warn() << "Partition `" << get_partition_name(part_id)
+			<< "` contains " << dict_table_get_n_user_cols(ib_table)
+			<< " user defined columns in InnoDB, but "
+			<< m_table_share->fields
+			<< " columns in MySQL. Please check"
+			" INFORMATION_SCHEMA.INNODB_SYS_COLUMNS and " REFMAN
+			"innodb-troubleshooting.html for how to resolve the"
+			" issue.";
+
+		/* Mark this partition as corrupted, so the drop table
+		or force recovery can still use it, but not others.
+		TODO: persist table->corrupted so it will be retained on
+		restart and out-of-bounds operations will see it. */
+
+		ib_table->corrupted = true;
+		dict_table_close(ib_table, FALSE, FALSE);
+	}
+
+	/* TODO: To save memory, compare with first partition and reuse
+	the column names etc. in the internal InnoDB meta-data cache. */
+
+	return(false);
+}
+
+/** Set up the virtual column template for partition table, and points
+all m_table_parts[]->vc_templ to it.
+@param[in]	table		MySQL TABLE object
+@param[in]	ib_table	InnoDB dict_table_t
+@param[in]	table_name	Table name (db/table_name) */
+void
+Ha_innopart_share::set_v_templ(
+	TABLE*		table,
+	dict_table_t*	ib_table,
+	const char*	name)
+{
+	ut_ad(mutex_own(&dict_sys->mutex));
+
+	if (ib_table->n_v_cols > 0) {
+		for (ulint i = 0; i < m_tot_parts; i++) {
+			if (m_table_parts[i]->vc_templ == NULL) {
+				m_table_parts[i]->vc_templ
+					= UT_NEW_NOKEY(dict_vcol_templ_t());
+				m_table_parts[i]->vc_templ->vtempl = NULL;
+			} else if (m_table_parts[i]->get_ref_count() == 1) {
+				/* Clean and refresh the template */
+				dict_free_vc_templ(m_table_parts[i]->vc_templ);
+				m_table_parts[i]->vc_templ->vtempl = NULL;
+			}
+
+			if (m_table_parts[i]->vc_templ->vtempl == NULL) {
+				innobase_build_v_templ(
+					table, ib_table,
+					m_table_parts[i]->vc_templ,
+					NULL, true, name);
+			}
+		}
+	}
+}
+
+/** Initialize the share with table and indexes per partition.
+@param[in]	part_info	Partition info (partition names to use).
+@param[in]	table_name	Table name (db/table_name).
+@return	false on success else true. */
+bool
+Ha_innopart_share::open_table_parts(
+	partition_info*	part_info,
+	const char*	table_name)
+{
+	size_t	table_name_len;
+	size_t	len;
+	uint	ib_num_index;
+	uint	mysql_num_index;
+	char	partition_name[FN_REFLEN];
+	bool	index_loaded = true;
+
+#ifndef DBUG_OFF
+	if (m_table_share->tmp_table == NO_TMP_TABLE) {
+		mysql_mutex_assert_owner(&m_table_share->LOCK_ha_data);
+	}
+#endif /* DBUG_OFF */
+	m_ref_count++;
+	if (m_table_parts != NULL) {
+		ut_ad(m_ref_count > 1);
+		ut_ad(m_tot_parts > 0);
+
+		/* Increment dict_table_t reference count for all partitions */
+		mutex_enter(&dict_sys->mutex);
+		for (uint i = 0; i < m_tot_parts; i++) {
+			dict_table_t*	table = m_table_parts[i];
+			table->acquire();
+			ut_ad(table->get_ref_count() >= m_ref_count);
+		}
+		mutex_exit(&dict_sys->mutex);
+
+		return(false);
+	}
+	ut_ad(m_ref_count == 1);
+	m_tot_parts = part_info->get_tot_partitions();
+	size_t	table_parts_size = sizeof(dict_table_t*) * m_tot_parts;
+	m_table_parts = static_cast<dict_table_t**>(
+		ut_zalloc(table_parts_size, mem_key_partitioning));
+	if (m_table_parts == NULL) {
+		m_ref_count--;
+		return(true);
+	}
+
+	/* Set up the array over all table partitions. */
+	table_name_len = strlen(table_name);
+	memcpy(partition_name, table_name, table_name_len);
+	List_iterator<partition_element>
+				part_it(part_info->partitions);
+	partition_element*	part_elem;
+	uint			i = 0;
+
+	while ((part_elem = part_it++)) {
+		len = append_sep_and_name(
+				partition_name + table_name_len,
+				part_elem->partition_name,
+				part_sep_nix,
+				FN_REFLEN - table_name_len);
+		if (part_info->is_sub_partitioned()) {
+			List_iterator<partition_element>
+				sub_it(part_elem->subpartitions);
+			partition_element*	sub_elem;
+			while ((sub_elem = sub_it++)) {
+				append_sep_and_name(
+					partition_name
+					+ table_name_len + len,
+					sub_elem->partition_name,
+					sub_sep_nix,
+					FN_REFLEN - table_name_len - len);
+				if (open_one_table_part(i, partition_name)) {
+					goto err;
+				}
+				i++;
+			}
+		} else {
+			if (open_one_table_part(i, partition_name)) {
+				goto err;
+			}
+			i++;
+		}
+	}
+	ut_ad(i == m_tot_parts);
+
+	/* Create the mapping of mysql index number to innodb indexes. */
+
+	ib_num_index = (uint) UT_LIST_GET_LEN(m_table_parts[0]->indexes);
+	mysql_num_index = part_info->table->s->keys;
+
+	/* If there exists inconsistency between MySQL and InnoDB dictionary
+	(metadata) information, the number of index defined in MySQL
+	could exceed that in InnoDB, do not build index translation
+	table in such case. */
+
+	if (ib_num_index < mysql_num_index) {
+		ut_ad(0);
+		goto err;
+	}
+
+	if (mysql_num_index != 0) {
+		size_t	alloc_size = mysql_num_index * m_tot_parts
+			* sizeof(*m_index_mapping);
+		m_index_mapping = static_cast<dict_index_t**>(
+			ut_zalloc(alloc_size, mem_key_partitioning));
+		if (m_index_mapping == NULL) {
+
+			/* Report an error if index_mapping continues to be
+			NULL and mysql_num_index is a non-zero value. */
+
+			ib::error() << "Failed to allocate memory for"
+				" index translation table. Number of"
+				" Index:" << mysql_num_index;
+			goto err;
+		}
+	}
+
+	/* For each index in the mysql key_info array, fetch its
+	corresponding InnoDB index pointer into index_mapping
+	array. */
+
+	for (ulint idx = 0; idx < mysql_num_index; idx++) {
+		for (ulint part = 0; part < m_tot_parts; part++) {
+			ulint	count = part * mysql_num_index + idx;
+
+			/* Fetch index pointers into index_mapping according
+			to mysql index sequence. */
+
+			m_index_mapping[count] = dict_table_get_index_on_name(
+				m_table_parts[part],
+				part_info->table->key_info[idx].name);
+
+			if (m_index_mapping[count] == NULL) {
+				ib::error() << "Cannot find index `"
+					<< part_info->table->key_info[idx].name
+					<< "` in InnoDB index dictionary"
+					" partition `"
+					<< get_partition_name(part) << "`.";
+				index_loaded = false;
+				break;
+			}
+
+			/* Double check fetched index has the same
+			column info as those in mysql key_info. */
+
+			if (!innobase_match_index_columns(
+					&part_info->table->key_info[idx],
+					m_index_mapping[count])) {
+				ib::error() << "Found index `"
+					<< part_info->table->key_info[idx].name
+					<< "` whose column info does not match"
+					" that of MySQL.";
+				index_loaded = false;
+				break;
+			}
+		}
+	}
+	if (!index_loaded && m_index_mapping != NULL) {
+		ut_free(m_index_mapping);
+		m_index_mapping = NULL;
+	}
+
+	/* Successfully built the translation table. */
+	m_index_count = mysql_num_index;
+
+	return(false);
+err:
+	close_table_parts();
+
+	return(true);
+}
+
+/** Close all partitions. */
+void
+Ha_innopart_share::close_table_parts()
+{
+#ifndef DBUG_OFF
+	if (m_table_share->tmp_table == NO_TMP_TABLE) {
+		mysql_mutex_assert_owner(&m_table_share->LOCK_ha_data);
+	}
+#endif /* DBUG_OFF */
+	m_ref_count--;
+	if (m_ref_count != 0) {
+
+		/* Decrement dict_table_t reference count for all partitions */
+		mutex_enter(&dict_sys->mutex);
+		for (uint i = 0; i < m_tot_parts; i++) {
+			dict_table_t*	table = m_table_parts[i];
+			table->release();
+			ut_ad(table->get_ref_count() >= m_ref_count);
+		}
+		mutex_exit(&dict_sys->mutex);
+
+		return;
+	}
+
+	/* Last instance closed, close all table partitions and
+	free the memory. */
+
+	mutex_enter(&dict_sys->mutex);
+	if (m_table_parts != NULL) {
+		for (uint i = 0; i < m_tot_parts; i++) {
+			if (m_table_parts[i] != NULL) {
+				dict_table_close(m_table_parts[i], TRUE, TRUE);
+			}
+		}
+		ut_free(m_table_parts);
+		m_table_parts = NULL;
+	}
+	mutex_exit(&dict_sys->mutex);
+	if (m_index_mapping != NULL) {
+		ut_free(m_index_mapping);
+		m_index_mapping = NULL;
+	}
+
+	m_tot_parts = 0;
+	m_index_count = 0;
+}
+
+/** Get index.
+Find the index of the specified partition and key number.
+@param[in]	part_id	Partition number.
+@param[in]	keynr	Key number.
+@return	Index pointer or NULL. */
+inline
+dict_index_t*
+Ha_innopart_share::get_index(
+	uint	part_id,
+	uint	keynr)
+{
+	ut_a(part_id < m_tot_parts);
+	ut_ad(keynr < m_index_count || keynr == MAX_KEY);
+	if (m_index_mapping == NULL
+	    || keynr >= m_index_count) {
+
+		if (keynr == MAX_KEY) {
+			return(dict_table_get_first_index(
+				get_table_part(part_id)));
+		}
+		return(NULL);
+	}
+	return(m_index_mapping[m_index_count * part_id + keynr]);
+}
+
+/** Get MySQL key number corresponding to InnoDB index.
+Calculates the key number used inside MySQL for an Innobase index. We will
+first check the "index translation table" for a match of the index to get
+the index number. If there does not exist an "index translation table",
+or not able to find the index in the translation table, then we will fall back
+to the traditional way of looping through dict_index_t list to find a
+match. In this case, we have to take into account if we generated a
+default clustered index for the table
+@param[in]	part_id	Partition the index belongs to.
+@param[in]	index	Index to return MySQL key number for.
+@return	the key number used inside MySQL or UINT_MAX if key is not found. */
+inline
+uint
+Ha_innopart_share::get_mysql_key(
+	uint			part_id,
+	const dict_index_t*	index)
+{
+	ut_ad(index != NULL);
+	ut_ad(m_index_mapping != NULL);
+	ut_ad(m_tot_parts);
+
+	if (index != NULL && m_index_mapping != NULL) {
+		uint	start;
+		uint	end;
+
+		if (part_id < m_tot_parts) {
+			start = part_id * m_index_count;
+			end = start + m_index_count;
+		} else {
+			start = 0;
+			end = m_tot_parts * m_index_count;
+		}
+		for (uint i = start; i < end; i++) {
+			if (m_index_mapping[i] == index) {
+				return(i % m_index_count);
+			}
+		}
+
+		/* Print an error message if we cannot find the index
+		in the "index translation table". */
+
+		if (index->is_committed()) {
+			ib::error() << "Cannot find index "
+				<< index->name
+				<< " in InnoDB index translation table.";
+		}
+	}
+
+	return(UINT_MAX);
+}
+
+/** Helper function for set bit in bitmap.
+@param[in,out]	buf	Bitmap buffer to update bit in.
+@param[in]	bit_pos	Bit number (index starts at 0). */
+static
+inline
+void
+set_bit(
+	byte*	buf,
+	size_t	pos)
+{
+	buf[pos/8] |= (0x1 << (pos & 0x7));
+}
+
+/** Helper function for clear bit in bitmap.
+@param[in,out]	buf	Bitmap buffer to update bit in.
+@param[in]	bit_pos	Bit number (index starts at 0). */
+static
+inline
+void
+clear_bit(
+	byte*	buf,
+	size_t	pos)
+{
+	buf[pos/8] &= ~(0x1 << (pos & 0x7));
+}
+
+/** Helper function for get bit in bitmap.
+@param[in,out]	buf	Bitmap buffer.
+@param[in]	bit_pos	Bit number (index starts at 0).
+@return	byte set to 0x0 or 0x1.
+@retval	0x0 bit not set.
+@retval	0x1 bet set. */
+static
+inline
+byte
+get_bit(
+	byte*	buf,
+	size_t	pos)
+{
+	return((buf[pos/8] >> (pos & 0x7)) & 0x1);
+}
+
+/** Helper class for encapsulating new/altered partitions during
+ADD/REORG/... PARTITION. */
+class Altered_partitions
+{
+private:
+	/** New partitions during ADD/REORG/... PARTITION. */
+	dict_table_t**	m_new_table_parts;
+
+	/** Insert nodes per partition. */
+	ins_node_t**	m_ins_nodes;
+
+	/** sql_stat_start per partition. */
+	byte*		m_sql_stat_start;
+
+	/** Trx id per partition. */
+	trx_id_t*	m_trx_ids;
+
+	/** Number of new partitions. */
+	size_t		m_num_new_parts;
+
+	/** Only need to create the partitions (no open/lock). */
+	bool		m_only_create;
+
+public:
+	Altered_partitions(
+		uint n_partitions,
+		bool only_create);
+
+	~Altered_partitions();
+
+	bool
+	initialize();
+
+	bool
+	only_create() const
+	{
+		return(m_only_create);
+	}
+
+	/** Set currently used partition.
+	@param[in]	new_part_id	Partition id to set.
+	@param[in]	part	InnoDB table to use. */
+	inline
+	void
+	set_part(
+		ulint		new_part_id,
+		dict_table_t*	part)
+	{
+		ut_ad(m_new_table_parts[new_part_id] == NULL);
+		m_new_table_parts[new_part_id] = part;
+		set_bit(m_sql_stat_start, new_part_id);
+	}
+
+	/** Get lower level InnoDB table for partition.
+	@param[in]	part_id	Partition id.
+	@return Lower level InnoDB table for the partition id. */
+	inline
+	dict_table_t*
+	part(
+		uint	part_id) const
+	{
+		ut_ad(part_id < m_num_new_parts);
+		return(m_new_table_parts[part_id]);
+	}
+
+	/** Set up prebuilt for using a specified partition.
+	@param[in]	prebuilt	Prebuilt to update.
+	@param[in]	new_part_id	Partition to use. */
+	inline
+	void
+	get_prebuilt(
+		row_prebuilt_t*	prebuilt,
+		uint		new_part_id) const
+	{
+		ut_ad(m_new_table_parts[new_part_id]);
+		prebuilt->table = m_new_table_parts[new_part_id];
+		prebuilt->ins_node = m_ins_nodes[new_part_id];
+		prebuilt->trx_id = m_trx_ids[new_part_id];
+		prebuilt->sql_stat_start = get_bit(m_sql_stat_start,
+						new_part_id);
+	}
+
+	/** Update cached values for a partition from prebuilt.
+	@param[in]	prebuilt	Prebuilt to copy from.
+	@param[in]	new_part_id	Partition id to copy. */
+	inline
+	void
+	set_from_prebuilt(
+		row_prebuilt_t*	prebuilt,
+		uint		new_part_id)
+	{
+		ut_ad(m_new_table_parts[new_part_id] == prebuilt->table);
+		m_ins_nodes[new_part_id] = prebuilt->ins_node;
+		m_trx_ids[new_part_id] = prebuilt->trx_id;
+		if (prebuilt->sql_stat_start == 0) {
+			clear_bit(m_sql_stat_start, new_part_id);
+		}
+	}
+};
+
+Altered_partitions::Altered_partitions(
+		uint n_partitions,
+		bool only_create)
+		:
+		m_new_table_parts(),
+		m_ins_nodes(),
+		m_sql_stat_start(),
+		m_trx_ids(),
+		m_num_new_parts(n_partitions),
+		m_only_create(only_create)
+	{}
+
+Altered_partitions::~Altered_partitions()
+{
+	if (m_new_table_parts != NULL) {
+		for (ulint i = 0; i < m_num_new_parts; i++) {
+			if (m_new_table_parts[i] != NULL) {
+				dict_table_close(m_new_table_parts[i],
+					false, true);
+			}
+		}
+		ut_free(m_new_table_parts);
+		m_new_table_parts = NULL;
+	}
+	if (m_ins_nodes != NULL) {
+		for (ulint i = 0; i < m_num_new_parts; i++) {
+			if (m_ins_nodes[i] != NULL) {
+				ins_node_t*	ins = m_ins_nodes[i];
+				ut_ad(ins->select == NULL);
+				que_graph_free_recursive(ins->select);
+				ins->select = NULL;
+				if (ins->entry_sys_heap != NULL) {
+					mem_heap_free(ins->entry_sys_heap);
+					ins->entry_sys_heap = NULL;
+				}
+			}
+		}
+		ut_free(m_ins_nodes);
+		m_ins_nodes = NULL;
+	}
+	if (m_sql_stat_start != NULL) {
+		ut_free(m_sql_stat_start);
+		m_sql_stat_start = NULL;
+	}
+	if (m_trx_ids != NULL) {
+		ut_free(m_trx_ids);
+		m_trx_ids = NULL;
+	}
+}
+
+/** Initialize the object.
+@return false on success else true. */
+bool
+Altered_partitions::initialize()
+{
+	size_t	alloc_size = sizeof(*m_new_table_parts) * m_num_new_parts;
+	m_new_table_parts = static_cast<dict_table_t**>(
+		ut_zalloc(alloc_size, mem_key_partitioning));
+	if (m_new_table_parts == NULL) {
+		return(true);
+	}
+
+	alloc_size = sizeof(*m_ins_nodes) * m_num_new_parts;
+	m_ins_nodes = static_cast<ins_node_t**>(
+		ut_zalloc(alloc_size, mem_key_partitioning));
+	if (m_ins_nodes == NULL) {
+		ut_free(m_new_table_parts);
+		m_new_table_parts = NULL;
+		return(true);
+	}
+
+	alloc_size = sizeof(*m_sql_stat_start)
+		* UT_BITS_IN_BYTES(m_num_new_parts);
+	m_sql_stat_start = static_cast<byte*>(
+		ut_zalloc(alloc_size, mem_key_partitioning));
+	if (m_sql_stat_start == NULL) {
+		ut_free(m_new_table_parts);
+		m_new_table_parts = NULL;
+		ut_free(m_ins_nodes);
+		m_ins_nodes = NULL;
+		return(true);
+	}
+
+	alloc_size = sizeof(*m_trx_ids) * m_num_new_parts;
+	m_trx_ids = static_cast<trx_id_t*>(
+		ut_zalloc(alloc_size, mem_key_partitioning));
+	if (m_trx_ids == NULL) {
+		ut_free(m_new_table_parts);
+		m_new_table_parts = NULL;
+		ut_free(m_ins_nodes);
+		m_ins_nodes = NULL;
+		ut_free(m_sql_stat_start);
+		m_sql_stat_start = NULL;
+		return(true);
+	}
+
+	return(false);
+}
+
+/** Construct ha_innopart handler.
+@param[in]	hton		Handlerton.
+@param[in]	table_arg	MySQL Table.
+@return	a new ha_innopart handler. */
+ha_innopart::ha_innopart(
+	handlerton*	hton,
+	TABLE_SHARE*	table_arg)
+	:
+	ha_innobase(hton, table_arg),
+	Partition_helper(this),
+	m_ins_node_parts(),
+	m_upd_node_parts(),
+	m_blob_heap_parts(),
+	m_trx_id_parts(),
+	m_row_read_type_parts(),
+	m_sql_stat_start_parts(),
+	m_pcur(),
+	m_clust_pcur(),
+	m_new_partitions()
+{
+	m_int_table_flags &=	~(HA_INNOPART_DISABLED_TABLE_FLAGS);
+
+	/* INNOBASE_SHARE is not used in ha_innopart.
+	This also flags for ha_innobase that it is a partitioned table.
+	And make it impossible to use legacy share functionality. */
+
+	m_share = NULL;
+}
+
+/** Destruct ha_innopart handler. */
+ha_innopart::~ha_innopart()
+{}
+
+/** Returned supported alter table flags.
+@param[in]	flags	Flags to support.
+@return	Supported flags. */
+uint
+ha_innopart::alter_table_flags(
+	uint	flags)
+{
+	return(HA_PARTITION_FUNCTION_SUPPORTED | HA_FAST_CHANGE_PARTITION);
+}
+
+/** Internally called for initializing auto increment value.
+Only called from ha_innobase::discard_or_import_table_space()
+and should not do anything, since it is ha_innopart will initialize
+it on first usage. */
+int
+ha_innopart::innobase_initialize_autoinc()
+{
+	ut_ad(0);
+	return(0);
+}
+
+/** Set the autoinc column max value.
+This should only be called once from ha_innobase::open().
+Therefore there's no need for a covering lock.
+@param[in]	no_lock	Ignored!
+@return	0 for success or error code. */
+inline
+int
+ha_innopart::initialize_auto_increment(
+	bool	/* no_lock */)
+{
+	int		error = 0;
+	ulonglong	auto_inc = 0;
+	const Field*	field = table->found_next_number_field;
+
+#ifndef DBUG_OFF
+	if (table_share->tmp_table == NO_TMP_TABLE)
+	{
+		mysql_mutex_assert_owner(m_part_share->auto_inc_mutex);
+	}
+#endif
+
+	/* Since a table can already be "open" in InnoDB's internal
+	data dictionary, we only init the autoinc counter once, the
+	first time the table is loaded. We can safely reuse the
+	autoinc value from a previous MySQL open. */
+
+	if (m_part_share->auto_inc_initialized) {
+		/* Already initialized, nothing to do. */
+		return(0);
+	}
+
+	if (field == NULL) {
+		ib::info() << "Unable to determine the AUTOINC column name";
+	}
+
+	if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
+		/* If the recovery level is set so high that writes
+		are disabled we force the AUTOINC counter to 0
+		value effectively disabling writes to the table.
+		Secondly, we avoid reading the table in case the read
+		results in failure due to a corrupted table/index.
+
+		We will not return an error to the client, so that the
+		tables can be dumped with minimal hassle. If an error
+		were returned in this case, the first attempt to read
+		the table would fail and subsequent SELECTs would succeed. */
+
+	} else if (field == NULL) {
+		/* This is a far more serious error, best to avoid
+		opening the table and return failure. */
+
+		my_error(ER_AUTOINC_READ_FAILED, MYF(0));
+		error = HA_ERR_AUTOINC_READ_FAILED;
+	} else {
+		dict_index_t*	index;
+		const char*	col_name;
+		ib_uint64_t	read_auto_inc;
+		ib_uint64_t	max_auto_inc = 0;
+		ulint		err;
+		dict_table_t*	ib_table;
+		ulonglong	col_max_value;
+
+		col_max_value = field->get_max_int_value();
+
+		update_thd(ha_thd());
+
+		col_name = field->field_name;
+		for (uint part = 0; part < m_tot_parts; part++) {
+			ib_table = m_part_share->get_table_part(part);
+			dict_table_autoinc_lock(ib_table);
+			read_auto_inc = dict_table_autoinc_read(ib_table);
+			if (read_auto_inc != 0) {
+				set_if_bigger(max_auto_inc, read_auto_inc);
+				dict_table_autoinc_unlock(ib_table);
+				continue;
+			}
+			/* Execute SELECT MAX(col_name) FROM TABLE; */
+			index = m_part_share->get_index(
+					part, table->s->next_number_index);
+			err = row_search_max_autoinc(
+				index, col_name, &read_auto_inc);
+
+			switch (err) {
+			case DB_SUCCESS: {
+				/* At the this stage we do not know the
+				increment nor the offset,
+				so use a default increment of 1. */
+
+				auto_inc = innobase_next_autoinc(
+					read_auto_inc, 1, 1, 0, col_max_value);
+				set_if_bigger(max_auto_inc, auto_inc);
+				dict_table_autoinc_initialize(ib_table,
+					auto_inc);
+				break;
+			}
+			case DB_RECORD_NOT_FOUND:
+				ib::error() << "MySQL and InnoDB data"
+					" dictionaries are out of sync. Unable"
+					" to find the AUTOINC column "
+					<< col_name << " in the InnoDB table "
+					<< index->table->name << ". We set the"
+					" next AUTOINC column value to 0, in"
+					" effect disabling the AUTOINC next"
+					" value generation.";
+
+				ib::info() << "You can either set the next"
+					" AUTOINC value explicitly using ALTER"
+					" TABLE or fix the data dictionary by"
+					" recreating the table.";
+
+				/* We want the open to succeed, so that the
+				user can take corrective action. ie. reads
+				should succeed but updates should fail. */
+
+				/* This will disable the AUTOINC generation. */
+				auto_inc = 0;
+				goto done;
+			default:
+				/* row_search_max_autoinc() should only return
+				one of DB_SUCCESS or DB_RECORD_NOT_FOUND. */
+
+				ut_error;
+			}
+			dict_table_autoinc_unlock(ib_table);
+		}
+		auto_inc = max_auto_inc;
+	}
+
+done:
+	m_part_share->next_auto_inc_val = auto_inc;
+	m_part_share->auto_inc_initialized = true;
+	return(error);
+}
+
+/** Opens a partitioned InnoDB table.
+Initializes needed data and opens the table which already exists
+in an InnoDB database.
+@param[in]	name		Table name (db/tablename)
+@param[in]	mode		Not used
+@param[in]	test_if_locked	Not used
+@return	0 or error number. */
+int
+ha_innopart::open(
+	const char*	name,
+	int		/*mode*/,
+	uint		/*test_if_locked*/)
+{
+	dict_table_t*	ib_table;
+	char		norm_name[FN_REFLEN];
+	THD*		thd;
+
+	DBUG_ENTER("ha_innopart::open");
+
+	ut_ad(table);
+	if (m_part_info == NULL) {
+		/* Must be during ::clone()! */
+		ut_ad(table->part_info != NULL);
+		m_part_info = table->part_info;
+	}
+	thd = ha_thd();
+
+	/* Under some cases MySQL seems to call this function while
+	holding search latch(es). This breaks the latching order as
+	we acquire dict_sys->mutex below and leads to a deadlock. */
+
+	if (thd != NULL) {
+		innobase_release_temporary_latches(ht, thd);
+	}
+
+	normalize_table_name(norm_name, name);
+
+	m_user_thd = NULL;
+
+	/* Get the Ha_innopart_share from the TABLE_SHARE. */
+	lock_shared_ha_data();
+	m_part_share = static_cast<Ha_innopart_share*>(get_ha_share_ptr());
+	if (m_part_share == NULL) {
+		m_part_share = new (std::nothrow)
+				Ha_innopart_share(table_share);
+		if (m_part_share == NULL) {
+share_error:
+			unlock_shared_ha_data();
+			DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
+		}
+		set_ha_share_ptr(static_cast<Handler_share*>(m_part_share));
+	}
+	if (m_part_share->open_table_parts(m_part_info, name)
+	    || m_part_share->populate_partition_name_hash(m_part_info)) {
+		goto share_error;
+	}
+	if (m_part_share->auto_inc_mutex == NULL
+	    && table->found_next_number_field != NULL) {
+		if (m_part_share->init_auto_inc_mutex(table_share)) {
+			goto share_error;
+		}
+	}
+	unlock_shared_ha_data();
+
+	/* Will be allocated if it is needed in ::update_row(). */
+	m_upd_buf = NULL;
+	m_upd_buf_size = 0;
+
+	/* Get pointer to a table object in InnoDB dictionary cache. */
+	ib_table = m_part_share->get_table_part(0);
+
+	m_pcur_parts = NULL;
+	m_clust_pcur_parts = NULL;
+	m_pcur_map = NULL;
+
+	/* TODO: Handle mismatching #P# vs #p# in upgrading to new DD instead!
+	See bug#58406, The problem exists when moving partitioned tables
+	between Windows and Unix-like platforms. InnoDB always folds the name
+	on windows, partitioning never folds partition (and #P# separator).
+	I.e. non of it follows lower_case_table_names correctly :( */
+
+	if (open_partitioning(m_part_share))
+	{
+		close();
+		DBUG_RETURN(HA_ERR_INITIALIZATION);
+	}
+
+	/* Currently we track statistics for all partitions, but for
+	the secondary indexes we only use the biggest partition. */
+
+	for (uint part_id = 0; part_id < m_tot_parts; part_id++) {
+		innobase_copy_frm_flags_from_table_share(
+			m_part_share->get_table_part(part_id),
+			table->s);
+		dict_stats_init(m_part_share->get_table_part(part_id));
+	}
+
+	MONITOR_INC(MONITOR_TABLE_OPEN);
+
+	bool	no_tablespace;
+
+	/* TODO: Should we do this check for every partition during ::open()? */
+	/* TODO: refactor this in ha_innobase so it can increase code reuse. */
+	if (dict_table_is_discarded(ib_table)) {
+
+		ib_senderrf(thd,
+			IB_LOG_LEVEL_WARN, ER_TABLESPACE_DISCARDED,
+			table->s->table_name.str);
+
+		/* Allow an open because a proper DISCARD should have set
+		all the flags and index root page numbers to FIL_NULL that
+		should prevent any DML from running but it should allow DDL
+		operations. */
+
+		no_tablespace = false;
+
+	} else if (ib_table->ibd_file_missing) {
+
+		ib_senderrf(
+			thd, IB_LOG_LEVEL_WARN,
+			ER_TABLESPACE_MISSING, norm_name);
+
+		/* This means we have no idea what happened to the tablespace
+		file, best to play it safe. */
+
+		no_tablespace = true;
+	} else {
+		no_tablespace = false;
+	}
+
+	if (!thd_tablespace_op(thd) && no_tablespace) {
+                set_my_errno(ENOENT);
+
+		lock_shared_ha_data();
+		m_part_share->close_table_parts();
+		unlock_shared_ha_data();
+		m_part_share = NULL;
+
+		DBUG_RETURN(HA_ERR_NO_SUCH_TABLE);
+	}
+
+	m_prebuilt = row_create_prebuilt(ib_table, table->s->reclength);
+
+	m_prebuilt->default_rec = table->s->default_values;
+	ut_ad(m_prebuilt->default_rec);
+
+	DBUG_ASSERT(table != NULL);
+	m_prebuilt->m_mysql_table = table;
+
+	if (ib_table->n_v_cols > 0) {
+		mutex_enter(&dict_sys->mutex);
+		m_part_share->set_v_templ(table, ib_table, name);
+		mutex_exit(&dict_sys->mutex);
+	}
+
+	/* Looks like MySQL-3.23 sometimes has primary key number != 0. */
+	m_primary_key = table->s->primary_key;
+	key_used_on_scan = m_primary_key;
+
+	/* Allocate a buffer for a 'row reference'. A row reference is
+	a string of bytes of length ref_length which uniquely specifies
+	a row in our table. Note that MySQL may also compare two row
+	references for equality by doing a simple memcmp on the strings
+	of length ref_length! */
+
+	if (!row_table_got_default_clust_index(ib_table)) {
+
+		m_prebuilt->clust_index_was_generated = FALSE;
+
+		if (UNIV_UNLIKELY(m_primary_key >= MAX_KEY)) {
+			table_name_t table_name;
+			table_name.m_name = const_cast<char*>(name);
+			ib::error() << "Table " << table_name
+				<< " has a primary key in InnoDB data"
+				" dictionary, but not in MySQL!";
+
+			/* This mismatch could cause further problems
+			if not attended, bring this to the user's attention
+			by printing a warning in addition to log a message
+			in the errorlog. */
+
+			push_warning_printf(thd, Sql_condition::SL_WARNING,
+					    ER_NO_SUCH_INDEX,
+					    "Table %s has a"
+					    " primary key in InnoDB data"
+					    " dictionary, but not in"
+					    " MySQL!", name);
+
+			/* If m_primary_key >= MAX_KEY, its (m_primary_key)
+			value could be out of bound if continue to index
+			into key_info[] array. Find InnoDB primary index,
+			and assign its key_length to ref_length.
+			In addition, since MySQL indexes are sorted starting
+			with primary index, unique index etc., initialize
+			ref_length to the first index key length in
+			case we fail to find InnoDB cluster index.
+
+			Please note, this will not resolve the primary
+			index mismatch problem, other side effects are
+			possible if users continue to use the table.
+			However, we allow this table to be opened so
+			that user can adopt necessary measures for the
+			mismatch while still being accessible to the table
+			date. */
+
+			if (table->key_info == NULL) {
+				ut_ad(table->s->keys == 0);
+				ref_length = 0;
+			} else {
+				ref_length = table->key_info[0].key_length;
+			}
+
+			/* Find corresponding cluster index
+			key length in MySQL's key_info[] array. */
+
+			for (uint i = 0; i < table->s->keys; i++) {
+				dict_index_t*	index;
+				index = innopart_get_index(0, i);
+				if (dict_index_is_clust(index)) {
+					ref_length =
+						 table->key_info[i].key_length;
+				}
+			}
+			ut_a(ref_length);
+			ref_length += PARTITION_BYTES_IN_POS;
+		} else {
+			/* MySQL allocates the buffer for ref.
+			key_info->key_length includes space for all key
+			columns + one byte for each column that may be
+			NULL. ref_length must be as exact as possible to
+			save space, because all row reference buffers are
+			allocated based on ref_length. */
+
+			ref_length = table->key_info[m_primary_key].key_length;
+			ref_length += PARTITION_BYTES_IN_POS;
+		}
+	} else {
+		if (m_primary_key != MAX_KEY) {
+			table_name_t table_name;
+			table_name.m_name = const_cast<char*>(name);
+			ib::error() << "Table " << table_name
+				<< " has no primary key in InnoDB data"
+				" dictionary, but has one in MySQL! If you"
+				" created the table with a MySQL version <"
+				" 3.23.54 and did not define a primary key,"
+				" but defined a unique key with all non-NULL"
+				" columns, then MySQL internally treats that"
+				" key as the primary key. You can fix this"
+				" error by dump + DROP + CREATE + reimport"
+				" of the table.";
+
+			/* This mismatch could cause further problems
+			if not attended, bring this to the user attention
+			by printing a warning in addition to log a message
+			in the errorlog. */
+
+			push_warning_printf(thd, Sql_condition::SL_WARNING,
+					    ER_NO_SUCH_INDEX,
+					    "InnoDB: Table %s has no"
+					    " primary key in InnoDB data"
+					    " dictionary, but has one in"
+					    " MySQL!", name);
+		}
+
+		m_prebuilt->clust_index_was_generated = TRUE;
+
+		ref_length = DATA_ROW_ID_LEN;
+		ref_length += PARTITION_BYTES_IN_POS;
+
+		/* If we automatically created the clustered index, then
+		MySQL does not know about it, and MySQL must NOT be aware
+		of the index used on scan, to make it avoid checking if we
+		update the column of the index. That is why we assert below
+		that key_used_on_scan is the undefined value MAX_KEY.
+		The column is the row id in the automatical generation case,
+		and it will never be updated anyway. */
+
+		if (key_used_on_scan != MAX_KEY) {
+			table_name_t table_name;
+			table_name.m_name = const_cast<char*>(name);
+			ib::warn() << "Table " << table_name
+				<< " key_used_on_scan is "
+				<< key_used_on_scan << " even though there is"
+				" no primary key inside InnoDB.";
+		}
+	}
+
+	/* Index block size in InnoDB: used by MySQL in query optimization. */
+	stats.block_size = UNIV_PAGE_SIZE;
+
+	if (m_prebuilt->table != NULL) {
+		/* We update the highest file format in the system table
+		space, if this table has higher file format setting. */
+
+		trx_sys_file_format_max_upgrade(
+			(const char**) &innobase_file_format_max,
+			dict_table_get_format(m_prebuilt->table));
+	}
+
+	/* Only if the table has an AUTOINC column. */
+	if (m_prebuilt->table != NULL
+	    && !m_prebuilt->table->ibd_file_missing
+	    && table->found_next_number_field != NULL) {
+		int	error;
+
+		/* Since a table can already be "open" in InnoDB's internal
+		data dictionary, we only init the autoinc counter once, the
+		first time the table is loaded,
+		see ha_innopart::initialize_auto_increment.
+		We can safely reuse the autoinc value from a previous MySQL
+		open. */
+
+		lock_auto_increment();
+		error = initialize_auto_increment(false);
+		unlock_auto_increment();
+		if (error != 0) {
+			close();
+			DBUG_RETURN(error);
+		}
+	}
+
+#ifdef HA_INNOPART_SUPPORTS_FULLTEXT
+	/* Set plugin parser for fulltext index. */
+	for (uint i = 0; i < table->s->keys; i++) {
+		if (table->key_info[i].flags & HA_USES_PARSER) {
+			dict_index_t*	index = innobase_get_index(i);
+			plugin_ref	parser = table->key_info[i].parser;
+
+			ut_ad(index->type & DICT_FTS);
+			index->parser =
+				static_cast<st_mysql_ftparser *>(
+					plugin_decl(parser)->info);
+
+			DBUG_EXECUTE_IF("fts_instrument_use_default_parser",
+				index->parser = &fts_default_parser;);
+		}
+	}
+#endif /* HA_INNOPART_SUPPORTS_FULLTEXT */
+
+	size_t	alloc_size = sizeof(*m_ins_node_parts) * m_tot_parts;
+	m_ins_node_parts = static_cast<ins_node_t**>(
+		ut_zalloc(alloc_size, mem_key_partitioning));
+
+	alloc_size = sizeof(*m_upd_node_parts) * m_tot_parts;
+	m_upd_node_parts = static_cast<upd_node_t**>(
+		ut_zalloc(alloc_size, mem_key_partitioning));
+
+	alloc_blob_heap_array();
+
+	alloc_size = sizeof(*m_trx_id_parts) * m_tot_parts;
+	m_trx_id_parts = static_cast<trx_id_t*>(
+		ut_zalloc(alloc_size, mem_key_partitioning));
+
+	alloc_size = sizeof(*m_row_read_type_parts) * m_tot_parts;
+	m_row_read_type_parts = static_cast<ulint*>(
+		ut_zalloc(alloc_size, mem_key_partitioning));
+
+	alloc_size = UT_BITS_IN_BYTES(m_tot_parts);
+	m_sql_stat_start_parts = static_cast<uchar*>(
+		ut_zalloc(alloc_size, mem_key_partitioning));
+	if (m_ins_node_parts == NULL
+	    || m_upd_node_parts == NULL
+	    || m_blob_heap_parts == NULL
+	    || m_trx_id_parts == NULL
+	    || m_row_read_type_parts == NULL
+	    || m_sql_stat_start_parts == NULL) {
+		close();  // Frees all the above.
+		DBUG_RETURN(HA_ERR_OUT_OF_MEM);
+	}
+	info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST);
+
+	DBUG_RETURN(0);
+}
+
+/** Get a cloned ha_innopart handler.
+@param[in]	name		Table name.
+@param[in]	mem_root	MySQL mem_root to use.
+@return	new ha_innopart handler. */
+handler*
+ha_innopart::clone(
+	const char*	name,
+	MEM_ROOT*	mem_root)
+{
+	ha_innopart*	new_handler;
+
+	DBUG_ENTER("ha_innopart::clone");
+
+	new_handler = dynamic_cast<ha_innopart*>(handler::clone(name,
+							mem_root));
+	if (new_handler != NULL) {
+		ut_ad(new_handler->m_prebuilt != NULL);
+
+		new_handler->m_prebuilt->select_lock_type =
+			m_prebuilt->select_lock_type;
+	}
+
+	DBUG_RETURN(new_handler);
+}
+
+/** Clear used ins_nodes and upd_nodes. */
+void ha_innopart::clear_ins_upd_nodes()
+{
+	/* Free memory from insert nodes. */
+	if (m_ins_node_parts != NULL) {
+		for (uint i = 0; i < m_tot_parts; i++) {
+			if (m_ins_node_parts[i] != NULL) {
+				ins_node_t*	ins = m_ins_node_parts[i];
+				if (ins->select != NULL) {
+					que_graph_free_recursive(ins->select);
+					ins->select = NULL;
+				}
+
+				if (ins->entry_sys_heap != NULL) {
+					mem_heap_free(ins->entry_sys_heap);
+					ins->entry_sys_heap = NULL;
+				}
+				m_ins_node_parts[i] = NULL;
+			}
+		}
+	}
+
+	/* Free memory from update nodes. */
+	if (m_upd_node_parts != NULL) {
+		for (uint i = 0; i < m_tot_parts; i++) {
+			if (m_upd_node_parts[i] != NULL) {
+				upd_node_t*	upd = m_upd_node_parts[i];
+				if (upd->cascade_top) {
+					mem_heap_free(upd->cascade_heap);
+					upd->cascade_top = false;
+					upd->cascade_heap = NULL;
+				}
+				if (upd->in_mysql_interface) {
+					btr_pcur_free_for_mysql(upd->pcur);
+					upd->in_mysql_interface = FALSE;
+				}
+
+				if (upd->select != NULL) {
+					que_graph_free_recursive(upd->select);
+					upd->select = NULL;
+				}
+				if (upd->heap != NULL) {
+					mem_heap_free(upd->heap);
+					upd->heap = NULL;
+				}
+				m_upd_node_parts[i] = NULL;
+			}
+		}
+	}
+}
+
+/** Closes a handle to an InnoDB table.
+@return	0 */
+int
+ha_innopart::close()
+{
+	THD*	thd;
+
+	DBUG_ENTER("ha_innopart::close");
+
+	thd = ha_thd();
+	if (thd != NULL) {
+		innobase_release_temporary_latches(ht, thd);
+	}
+
+	ut_ad(m_pcur_parts == NULL);
+	ut_ad(m_clust_pcur_parts == NULL);
+	close_partitioning();
+
+	ut_ad(m_part_share != NULL);
+	if (m_part_share != NULL) {
+		lock_shared_ha_data();
+		m_part_share->close_table_parts();
+		unlock_shared_ha_data();
+		m_part_share = NULL;
+	}
+	clear_ins_upd_nodes();
+	free_blob_heap_array();
+
+	/* Prevent double close of m_prebuilt->table. The real one was done
+	done in m_part_share->close_table_parts(). */
+	m_prebuilt->table = NULL;
+	row_prebuilt_free(m_prebuilt, FALSE);
+
+	if (m_upd_buf != NULL) {
+		ut_ad(m_upd_buf_size != 0);
+		/* Allocated with my_malloc! */
+		my_free(m_upd_buf);
+		m_upd_buf = NULL;
+		m_upd_buf_size = 0;
+	}
+
+	if (m_ins_node_parts != NULL) {
+		ut_free(m_ins_node_parts);
+		m_ins_node_parts = NULL;
+	}
+	if (m_upd_node_parts != NULL) {
+		ut_free(m_upd_node_parts);
+		m_upd_node_parts = NULL;
+	}
+	if (m_trx_id_parts != NULL) {
+		ut_free(m_trx_id_parts);
+		m_trx_id_parts = NULL;
+	}
+	if (m_row_read_type_parts != NULL) {
+		ut_free(m_row_read_type_parts);
+		m_row_read_type_parts = NULL;
+	}
+	if (m_sql_stat_start_parts != NULL) {
+		ut_free(m_sql_stat_start_parts);
+		m_sql_stat_start_parts = NULL;
+	}
+
+	MONITOR_INC(MONITOR_TABLE_CLOSE);
+
+	/* Tell InnoDB server that there might be work for
+	utility threads: */
+
+	srv_active_wake_master_thread();
+
+	DBUG_RETURN(0);
+}
+
+/** Change active partition.
+Copies needed info into m_prebuilt from the partition specific memory.
+@param[in]	part_id	Partition to set as active. */
+void
+ha_innopart::set_partition(
+	uint	part_id)
+{
+	DBUG_ENTER("ha_innopart::set_partition");
+
+	DBUG_PRINT("ha_innopart", ("partition id: %u", part_id));
+
+	if (part_id >= m_tot_parts) {
+		ut_ad(0);
+		DBUG_VOID_RETURN;
+	}
+	if (m_pcur_parts != NULL) {
+		m_prebuilt->pcur = &m_pcur_parts[m_pcur_map[part_id]];
+	}
+	if (m_clust_pcur_parts != NULL) {
+		m_prebuilt->clust_pcur =
+			&m_clust_pcur_parts[m_pcur_map[part_id]];
+	}
+	m_prebuilt->ins_node = m_ins_node_parts[part_id];
+	m_prebuilt->upd_node = m_upd_node_parts[part_id];
+
+	/* For unordered scan and table scan, use blob_heap from first
+	partition as we need exactly one blob. */
+	m_prebuilt->blob_heap = m_blob_heap_parts[m_ordered ? part_id : 0];
+
+#ifdef UNIV_DEBUG
+	if (m_prebuilt->blob_heap != NULL) {
+		DBUG_PRINT("ha_innopart", ("validating blob_heap: %p",
+					   m_prebuilt->blob_heap));
+		mem_heap_validate(m_prebuilt->blob_heap);
+	}
+#endif
+
+	m_prebuilt->trx_id = m_trx_id_parts[part_id];
+	m_prebuilt->row_read_type = m_row_read_type_parts[part_id];
+	m_prebuilt->sql_stat_start = get_bit(m_sql_stat_start_parts, part_id);
+	m_prebuilt->table = m_part_share->get_table_part(part_id);
+	m_prebuilt->index = innopart_get_index(part_id, active_index);
+
+	DBUG_VOID_RETURN;
+}
+
+/** Update active partition.
+Copies needed info from m_prebuilt into the partition specific memory.
+@param[in]	part_id	Partition to set as active. */
+void
+ha_innopart::update_partition(
+	uint	part_id)
+{
+	DBUG_ENTER("ha_innopart::update_partition");
+	DBUG_PRINT("ha_innopart", ("partition id: %u", part_id));
+
+	if (part_id >= m_tot_parts) {
+		ut_ad(0);
+		DBUG_VOID_RETURN;
+	}
+	m_ins_node_parts[part_id] = m_prebuilt->ins_node;
+	m_upd_node_parts[part_id] = m_prebuilt->upd_node;
+
+#ifdef UNIV_DEBUG
+	if (m_prebuilt->blob_heap != NULL) {
+		DBUG_PRINT("ha_innopart", ("validating blob_heap: %p",
+					   m_prebuilt->blob_heap));
+		mem_heap_validate(m_prebuilt->blob_heap);
+	}
+#endif
+
+	/* For unordered scan and table scan, use blob_heap from first
+	partition as we need exactly one blob anytime. */
+	m_blob_heap_parts[m_ordered ? part_id : 0] = m_prebuilt->blob_heap;
+
+	m_trx_id_parts[part_id] = m_prebuilt->trx_id;
+	m_row_read_type_parts[part_id] = m_prebuilt->row_read_type;
+	if (m_prebuilt->sql_stat_start == 0) {
+		clear_bit(m_sql_stat_start_parts, part_id);
+	}
+	m_last_part = part_id;
+	DBUG_VOID_RETURN;
+}
+
+/** Save currently highest auto increment value.
+@param[in]	nr	Auto increment value to save. */
+void
+ha_innopart::save_auto_increment(
+	ulonglong	nr)
+{
+
+	/* Store it in the shared dictionary of the partition.
+	TODO: When the new DD is done, store it in the table and make it
+	persistent! */
+
+	dict_table_autoinc_lock(m_prebuilt->table);
+	dict_table_autoinc_update_if_greater(m_prebuilt->table, nr + 1);
+	dict_table_autoinc_unlock(m_prebuilt->table);
+}
+
+/** Was the last returned row semi consistent read.
+In an UPDATE or DELETE, if the row under the cursor was locked by
+another transaction, and the engine used an optimistic read of the last
+committed row value under the cursor, then the engine returns 1 from
+this function. MySQL must NOT try to update this optimistic value. If
+the optimistic value does not match the WHERE condition, MySQL can
+decide to skip over this row. This can be used to avoid unnecessary
+lock waits.
+
+If this method returns true, it will also signal the storage
+engine that the next read will be a locking re-read of the row.
+@see handler.h and row0mysql.h
+@return	true if last read was semi consistent else false. */
+bool
+ha_innopart::was_semi_consistent_read()
+{
+	return(m_row_read_type_parts[m_last_part]
+		== ROW_READ_DID_SEMI_CONSISTENT);
+}
+
+/** Try semi consistent read.
+Tell the engine whether it should avoid unnecessary lock waits.
+If yes, in an UPDATE or DELETE, if the row under the cursor was locked
+by another transaction, the engine may try an optimistic read of
+the last committed row value under the cursor.
+@see handler.h and row0mysql.h
+@param[in]	yes	Should semi-consistent read be used. */
+void
+ha_innopart::try_semi_consistent_read(
+	bool	yes)
+{
+	ha_innobase::try_semi_consistent_read(yes);
+	for (uint i = m_part_info->get_first_used_partition();
+	     i < m_tot_parts;
+	     i = m_part_info->get_next_used_partition(i)) {
+
+		m_row_read_type_parts[i] = m_prebuilt->row_read_type;
+	}
+}
+
+/** Removes a lock on a row.
+Removes a new lock set on a row, if it was not read optimistically.
+This can be called after a row has been read in the processing of
+an UPDATE or a DELETE query. @see ha_innobase::unlock_row(). */
+void
+ha_innopart::unlock_row()
+{
+	ut_ad(m_last_part < m_tot_parts);
+	set_partition(m_last_part);
+	ha_innobase::unlock_row();
+	update_partition(m_last_part);
+}
+
+/** Write a row in partition.
+Stores a row in an InnoDB database, to the table specified in this
+handle.
+@param[in]	part_id	Partition to write to.
+@param[in]	record	A row in MySQL format.
+@return	0 or error code. */
+int
+ha_innopart::write_row_in_part(
+	uint	part_id,
+	uchar*	record)
+{
+	int	error;
+	Field*	saved_next_number_field = table->next_number_field;
+	DBUG_ENTER("ha_innopart::write_row_in_part");
+	set_partition(part_id);
+
+	/* Prevent update_auto_increment to be called
+	again in ha_innobase::write_row(). */
+
+	table->next_number_field = NULL;
+
+	/* TODO: try to avoid creating a new dtuple
+	(in row_get_prebuilt_insert_row()) for each partition).
+	Might be needed due to ins_node implementation. */
+
+	error = ha_innobase::write_row(record);
+	update_partition(part_id);
+	table->next_number_field = saved_next_number_field;
+	DBUG_RETURN(error);
+}
+
+/** Update a row in partition.
+Updates a row given as a parameter to a new value.
+@param[in]	part_id	Partition to update row in.
+@param[in]	old_row	Old row in MySQL format.
+@param[in]	new_row	New row in MySQL format.
+@return	0 or error number. */
+int
+ha_innopart::update_row_in_part(
+	uint		part_id,
+	const uchar*	old_row,
+	uchar*		new_row)
+{
+	int	     error;
+	DBUG_ENTER("ha_innopart::update_row_in_part");
+
+	set_partition(part_id);
+	error = ha_innobase::update_row(old_row, new_row);
+	update_partition(part_id);
+	DBUG_RETURN(error);
+}
+
+/** Deletes a row in partition.
+@param[in]	part_id	Partition to delete from.
+@param[in]	record	Row to delete in MySQL format.
+@return	0 or error number. */
+int
+ha_innopart::delete_row_in_part(
+	uint		part_id,
+	const uchar*	record)
+{
+	int	error;
+	DBUG_ENTER("ha_innopart::delete_row_in_part");
+	m_err_rec = NULL;
+
+	m_last_part = part_id;
+	set_partition(part_id);
+	error = ha_innobase::delete_row(record);
+	update_partition(part_id);
+	DBUG_RETURN(error);
+}
+
+/** Initializes a handle to use an index.
+@param[in]	keynr	Key (index) number.
+@param[in]	sorted	True if result MUST be sorted according to index.
+@return	0 or error number. */
+int
+ha_innopart::index_init(
+	uint	keynr,
+	bool	sorted)
+{
+	int	error;
+	uint	part_id = m_part_info->get_first_used_partition();
+	DBUG_ENTER("ha_innopart::index_init");
+
+	active_index = keynr;
+	if (part_id == MY_BIT_NONE) {
+		DBUG_RETURN(0);
+	}
+
+	error = ph_index_init_setup(keynr, sorted);
+	if (error != 0) {
+		DBUG_RETURN(error);
+	}
+
+	if (sorted) {
+		error = init_record_priority_queue();
+		if (error != 0) {
+			/* Needs cleanup in case it returns error. */
+			destroy_record_priority_queue();
+			DBUG_RETURN(error);
+		}
+		/* Disable prefetch.
+		The prefetch buffer is not partitioning aware, so it may return
+		rows from a different partition if either the prefetch buffer is
+		full, or it is non-empty and the partition is exhausted. */
+		m_prebuilt->m_no_prefetch = true;
+	}
+
+	/* For scan across partitions, the keys needs to be materialized */
+	m_prebuilt->m_read_virtual_key = true;
+
+	error = change_active_index(part_id, keynr);
+	if (error != 0) {
+		destroy_record_priority_queue();
+		DBUG_RETURN(error);
+	}
+
+	DBUG_EXECUTE_IF("partition_fail_index_init", {
+		destroy_record_priority_queue();
+		DBUG_RETURN(HA_ERR_NO_PARTITION_FOUND);
+	});
+
+	DBUG_RETURN(0);
+}
+
+/** End index cursor.
+@return	0 or error code. */
+int
+ha_innopart::index_end()
+{
+	uint	part_id = m_part_info->get_first_used_partition();
+	DBUG_ENTER("ha_innopart::index_end");
+
+	if (part_id == MY_BIT_NONE) {
+		/* Never initialized any index. */
+		active_index = MAX_KEY;
+		DBUG_RETURN(0);
+	}
+	if (m_ordered) {
+		destroy_record_priority_queue();
+		m_prebuilt->m_no_prefetch = false;
+	}
+	m_prebuilt->m_read_virtual_key = false;
+
+	DBUG_RETURN(ha_innobase::index_end());
+}
+
+/* Partitioning support functions. */
+
+/** Setup the ordered record buffer and the priority queue.
+@param[in]	used_parts	Number of used partitions in query.
+@return	false for success else true. */
+int
+ha_innopart::init_record_priority_queue_for_parts(
+	uint	used_parts)
+{
+	size_t	alloc_size;
+	void*	buf;
+
+	DBUG_ENTER("ha_innopart::init_record_priority_queue_for_parts");
+	ut_ad(used_parts >= 1);
+	/* TODO: Don't use this if only one partition is used! */
+	//ut_ad(used_parts > 1);
+
+	/* We could reuse current m_prebuilt->pcur/clust_pcur for the first
+	used partition, but it would complicate and affect performance,
+	so we trade some extra memory instead. */
+
+	m_pcur = m_prebuilt->pcur;
+	m_clust_pcur = m_prebuilt->clust_pcur;
+
+	/* If we searching for secondary key or doing a write/update
+	we will need two pcur, one for the active (secondary) index and
+	one for the clustered index. */
+
+	bool	need_clust_index =
+			m_curr_key_info[1] != NULL
+			|| get_lock_type() != F_RDLCK;
+
+	/* pcur and clust_pcur per partition.
+	By using zalloc, we do not need to initialize the pcur's! */
+
+	alloc_size = used_parts * sizeof(btr_pcur_t);
+	if (need_clust_index) {
+		alloc_size *= 2;
+	}
+	buf = ut_zalloc(alloc_size, mem_key_partitioning);
+	if (buf == NULL) {
+		DBUG_RETURN(true);
+	}
+	m_pcur_parts = static_cast<btr_pcur_t*>(buf);
+	if (need_clust_index) {
+		m_clust_pcur_parts = &m_pcur_parts[used_parts];
+	}
+	/* mapping from part_id to pcur. */
+	alloc_size = m_tot_parts * sizeof(*m_pcur_map);
+	buf = ut_zalloc(alloc_size, mem_key_partitioning);
+	if (buf == NULL) {
+		DBUG_RETURN(true);
+	}
+	m_pcur_map = static_cast<uint16_t*>(buf);
+	{
+		uint16_t pcur_count = 0;
+		for (uint i = m_part_info->get_first_used_partition();
+		     i < m_tot_parts;
+		     i = m_part_info->get_next_used_partition(i)) {
+			m_pcur_map[i] = pcur_count++;
+		}
+	}
+
+	DBUG_RETURN(false);
+}
+
+/** Destroy the ordered record buffer and the priority queue. */
+inline
+void
+ha_innopart::destroy_record_priority_queue_for_parts()
+{
+	DBUG_ENTER("ha_innopart::destroy_record_priority_queue");
+	if (m_pcur_parts != NULL) {
+		uint	used_parts;
+		used_parts = bitmap_bits_set(&m_part_info->read_partitions);
+		for (uint i = 0; i < used_parts; i++) {
+			btr_pcur_free(&m_pcur_parts[i]);
+			if (m_clust_pcur_parts != NULL) {
+				btr_pcur_free(&m_clust_pcur_parts[i]);
+			}
+		}
+		ut_free(m_pcur_parts);
+		m_clust_pcur_parts = NULL;
+		m_pcur_parts = NULL;
+		/* Reset the original m_prebuilt->pcur. */
+		m_prebuilt->pcur = m_pcur;
+		m_prebuilt->clust_pcur = m_clust_pcur;
+	}
+	if (m_pcur_map != NULL) {
+		ut_free(m_pcur_map);
+		m_pcur_map = NULL;
+	}
+	DBUG_VOID_RETURN;
+}
+
+/** Print error information.
+@param[in]	error	Error code (MySQL).
+@param[in]	errflag	Flags. */
+void
+ha_innopart::print_error(
+	int	error,
+	myf	errflag)
+{
+	DBUG_ENTER("ha_innopart::print_error");
+	if (print_partition_error(error, errflag)) {
+		ha_innobase::print_error(error, errflag);
+	}
+
+	DBUG_VOID_RETURN;
+}
+
+/** Can error be ignored.
+@param[in]	error	Error code to check.
+@return	true if ignorable else false. */
+bool
+ha_innopart::is_ignorable_error(
+	int	error)
+{
+	if (ha_innobase::is_ignorable_error(error)
+	    || error == HA_ERR_NO_PARTITION_FOUND
+	    || error == HA_ERR_NOT_IN_LOCK_PARTITIONS) {
+
+		return(true);
+	}
+	return(false);
+}
+
+/** Get the index for the current partition
+@param[in]	keynr	MySQL index number.
+@return	InnoDB index or NULL. */
+inline
+dict_index_t*
+ha_innopart::innobase_get_index(
+	uint	keynr)
+{
+	uint	part_id = m_last_part;
+	if (part_id >= m_tot_parts) {
+		ut_ad(0);
+		part_id = 0;
+	}
+	return(innopart_get_index(part_id, keynr));
+}
+
+/** Get the index for a handle.
+Does not change active index.
+@param[in]	keynr	Use this index; MAX_KEY means always clustered index,
+even if it was internally generated by InnoDB.
+@param[in]	part_id	From this partition.
+@return	NULL or index instance. */
+inline
+dict_index_t*
+ha_innopart::innopart_get_index(
+	uint	part_id,
+	uint	keynr)
+{
+	KEY*		key = NULL;
+	dict_index_t*	index = NULL;
+
+	DBUG_ENTER("innopart_get_index");
+
+	if (keynr != MAX_KEY && table->s->keys > 0) {
+		key = table->key_info + keynr;
+
+		index = m_part_share->get_index(part_id, keynr);
+
+		if (index != NULL) {
+			ut_a(ut_strcmp(index->name, key->name) == 0);
+		} else {
+			/* Can't find index with keynr in the translation
+			table. Only print message if the index translation
+			table exists. */
+
+			ib::warn() << "InnoDB could not find index "
+				<< (key ? key->name : "NULL")
+				<< " key no " << keynr << " for table "
+				<< m_prebuilt->table->name
+				<< " through its index translation table";
+
+			index = dict_table_get_index_on_name(m_prebuilt->table,
+							     key->name);
+		}
+	} else {
+		/* Get the generated index. */
+		ut_ad(keynr == MAX_KEY);
+		index = dict_table_get_first_index(
+				m_part_share->get_table_part(part_id));
+	}
+
+	if (index == NULL) {
+		ib::error() << "InnoDB could not find key n:o "
+			<< keynr << " with name " << (key ? key->name : "NULL")
+			<< " from dict cache for table "
+			<< m_prebuilt->table->name << " partition n:o "
+			<< part_id;
+	}
+
+	DBUG_RETURN(index);
+}
+
+/** Changes the active index of a handle.
+@param[in]	part_id	Use this partition.
+@param[in]	keynr	Use this index; MAX_KEY means always clustered index,
+even if it was internally generated by InnoDB.
+@return	0 or error number. */
+int
+ha_innopart::change_active_index(
+	uint	part_id,
+	uint	keynr)
+{
+	DBUG_ENTER("ha_innopart::change_active_index");
+
+	ut_ad(m_user_thd == ha_thd());
+	ut_a(m_prebuilt->trx == thd_to_trx(m_user_thd));
+
+	active_index = keynr;
+	set_partition(part_id);
+
+	if (UNIV_UNLIKELY(m_prebuilt->index == NULL)) {
+		ib::warn() << "change_active_index(" << part_id
+			<< "," << keynr << ") failed";
+		m_prebuilt->index_usable = FALSE;
+		DBUG_RETURN(1);
+	}
+
+	m_prebuilt->index_usable = row_merge_is_index_usable(m_prebuilt->trx,
+							   m_prebuilt->index);
+
+	if (UNIV_UNLIKELY(!m_prebuilt->index_usable)) {
+		if (dict_index_is_corrupted(m_prebuilt->index)) {
+			char table_name[MAX_FULL_NAME_LEN + 1];
+
+			innobase_format_name(
+				table_name, sizeof table_name,
+				m_prebuilt->index->table->name.m_name);
+
+			push_warning_printf(
+				m_user_thd, Sql_condition::SL_WARNING,
+				HA_ERR_INDEX_CORRUPT,
+				"InnoDB: Index %s for table %s is"
+				" marked as corrupted"
+				" (partition %u)",
+				m_prebuilt->index->name(), table_name, part_id);
+			DBUG_RETURN(HA_ERR_INDEX_CORRUPT);
+		} else {
+			push_warning_printf(
+				m_user_thd, Sql_condition::SL_WARNING,
+				HA_ERR_TABLE_DEF_CHANGED,
+				"InnoDB: insufficient history for index %u",
+				keynr);
+		}
+
+		/* The caller seems to ignore this. Thus, we must check
+		this again in row_search_for_mysql(). */
+
+		DBUG_RETURN(HA_ERR_TABLE_DEF_CHANGED);
+	}
+
+	ut_a(m_prebuilt->search_tuple != NULL);
+
+	/* If too expensive, cache the keynr and only update search_tuple when
+	keynr changes. Remember that the clustered index is also used for
+	MAX_KEY. */
+	dtuple_set_n_fields(m_prebuilt->search_tuple,
+		m_prebuilt->index->n_fields);
+
+	dict_index_copy_types(m_prebuilt->search_tuple, m_prebuilt->index,
+			m_prebuilt->index->n_fields);
+
+	/* MySQL changes the active index for a handle also during some
+	queries, for example SELECT MAX(a), SUM(a) first retrieves the
+	MAX() and then calculates the sum. Previously we played safe
+	and used the flag ROW_MYSQL_WHOLE_ROW below, but that caused
+	unnecessary copying. Starting from MySQL-4.1 we use a more
+	efficient flag here. */
+
+	/* TODO: Is this really needed?
+	Will it not be built in index_read? */
+
+	build_template(false);
+
+	DBUG_RETURN(0);
+}
+
+/** Return first record in index from a partition.
+@param[in]	part	Partition to read from.
+@param[out]	record	First record in index in the partition.
+@return	error number or 0. */
+int
+ha_innopart::index_first_in_part(
+	uint	part,
+	uchar*	record)
+{
+	int	error;
+	DBUG_ENTER("ha_innopart::index_first_in_part");
+
+	set_partition(part);
+	error = ha_innobase::index_first(record);
+	update_partition(part);
+
+	DBUG_RETURN(error);
+}
+
+/** Return next record in index from a partition.
+@param[in]	part	Partition to read from.
+@param[out]	record	Last record in index in the partition.
+@return	error number or 0. */
+int
+ha_innopart::index_next_in_part(
+	uint	part,
+	uchar*	record)
+{
+	DBUG_ENTER("ha_innopart::index_next_in_part");
+
+	int	error;
+
+	set_partition(part);
+	error = ha_innobase::index_next(record);
+	update_partition(part);
+
+	ut_ad(m_ordered_scan_ongoing
+	      || m_ordered_rec_buffer == NULL
+	      || m_prebuilt->used_in_HANDLER
+	      || m_part_spec.start_part >= m_part_spec.end_part);
+
+	DBUG_RETURN(error);
+}
+
+/** Return next same record in index from a partition.
+This routine is used to read the next record, but only if the key is
+the same as supplied in the call.
+@param[in]	part	Partition to read from.
+@param[out]	record	Last record in index in the partition.
+@param[in]	key	Key to match.
+@param[in]	length	Length of key.
+@return	error number or 0. */
+int
+ha_innopart::index_next_same_in_part(
+	uint		part,
+	uchar*		record,
+	const uchar*	key,
+	uint		length)
+{
+	int	error;
+
+	set_partition(part);
+	error = ha_innobase::index_next_same(record, key, length);
+	update_partition(part);
+	return(error);
+}
+
+/** Return last record in index from a partition.
+@param[in]	part	Partition to read from.
+@param[out]	record	Last record in index in the partition.
+@return	error number or 0. */
+int
+ha_innopart::index_last_in_part(
+	uint	part,
+	uchar*	record)
+{
+	int	error;
+
+	set_partition(part);
+	error = ha_innobase::index_last(record);
+	update_partition(part);
+	return(error);
+}
+
+/** Return previous record in index from a partition.
+@param[in]	part	Partition to read from.
+@param[out]	record	Last record in index in the partition.
+@return	error number or 0. */
+int
+ha_innopart::index_prev_in_part(
+	uint	part,
+	uchar*	record)
+{
+	int	error;
+
+	set_partition(part);
+	error = ha_innobase::index_prev(record);
+	update_partition(part);
+
+	ut_ad(m_ordered_scan_ongoing
+	      || m_ordered_rec_buffer == NULL
+	      || m_prebuilt->used_in_HANDLER
+	      || m_part_spec.start_part >= m_part_spec.end_part);
+
+	return(error);
+}
+
+/** Start index scan and return first record from a partition.
+This routine starts an index scan using a start key. The calling
+function will check the end key on its own.
+@param[in]	part		Partition to read from.
+@param[out]	record		First matching record in index in the partition.
+@param[in]	key		Key to match.
+@param[in]	keypart_map	Which part of the key to use.
+@param[in]	find_flag	Key condition/direction to use.
+@return	error number or 0. */
+int
+ha_innopart::index_read_map_in_part(
+	uint			part,
+	uchar*			record,
+	const uchar*		key,
+	key_part_map		keypart_map,
+	enum ha_rkey_function	find_flag)
+{
+	int	error;
+
+	set_partition(part);
+	error = ha_innobase::index_read_map(
+			record,
+			key,
+			keypart_map,
+			find_flag);
+	update_partition(part);
+	return(error);
+}
+
+/** Start index scan and return first record from a partition.
+This routine starts an index scan using a start key. The calling
+function will check the end key on its own.
+@param[in]	part		Partition to read from.
+@param[out]	record		First matching record in index in the partition.
+@param[in]	index		Index to read from.
+@param[in]	key		Key to match.
+@param[in]	keypart_map	Which part of the key to use.
+@param[in]	find_flag	Key condition/direction to use.
+@return	error number or 0. */
+int
+ha_innopart::index_read_idx_map_in_part(
+	uint			part,
+	uchar*			record,
+	uint			index,
+	const uchar*		key,
+	key_part_map		keypart_map,
+	enum ha_rkey_function	find_flag)
+{
+	int	error;
+
+	set_partition(part);
+	error = ha_innobase::index_read_idx_map(
+			record,
+			index,
+			key,
+			keypart_map,
+			find_flag);
+	update_partition(part);
+	return(error);
+}
+
+/** Return last matching record in index from a partition.
+@param[in]	part		Partition to read from.
+@param[out]	record		Last matching record in index in the partition.
+@param[in]	key		Key to match.
+@param[in]	keypart_map	Which part of the key to use.
+@return	error number or 0. */
+int
+ha_innopart::index_read_last_map_in_part(
+	uint		part,
+	uchar*		record,
+	const uchar*	key,
+	key_part_map	keypart_map)
+{
+	int	error;
+	set_partition(part);
+	error = ha_innobase::index_read_last_map(record, key, keypart_map);
+	update_partition(part);
+	return(error);
+}
+
+/** Start index scan and return first record from a partition.
+This routine starts an index scan using a start and end key.
+@param[in]	part		Partition to read from.
+@param[in,out]	record		First matching record in index in the partition,
+if NULL use table->record[0] as return buffer.
+@param[in]	start_key	Start key to match.
+@param[in]	end_key		End key to match.
+@param[in]	eq_range	Is equal range, start_key == end_key.
+@param[in]	sorted		Return rows in sorted order.
+@return	error number or 0. */
+int
+ha_innopart::read_range_first_in_part(
+	uint			part,
+	uchar*			record,
+	const key_range*	start_key,
+	const key_range*	end_key,
+	bool			eq_range,
+	bool			sorted)
+{
+	int	error;
+	uchar*	read_record = record;
+	set_partition(part);
+	if (read_record == NULL) {
+		read_record = table->record[0];
+	}
+	if (m_start_key.key != NULL) {
+		error = ha_innobase::index_read(
+				read_record,
+				m_start_key.key,
+				m_start_key.length,
+				m_start_key.flag);
+	} else {
+		error = ha_innobase::index_first(read_record);
+	}
+	if (error == HA_ERR_KEY_NOT_FOUND) {
+		error = HA_ERR_END_OF_FILE;
+	} else if (error == 0 && !in_range_check_pushed_down) {
+		/* compare_key uses table->record[0], so we
+		need to copy the data if not already there. */
+
+		if (record != NULL) {
+			copy_cached_row(table->record[0], read_record);
+		}
+		if (compare_key(end_range) > 0) {
+			/* must use ha_innobase:: due to set/update_partition
+			could overwrite states if ha_innopart::unlock_row()
+			was used. */
+			ha_innobase::unlock_row();
+			error = HA_ERR_END_OF_FILE;
+		}
+	}
+	update_partition(part);
+	return(error);
+}
+
+/** Return next record in index range scan from a partition.
+@param[in]	part	Partition to read from.
+@param[in,out]	record	First matching record in index in the partition,
+if NULL use table->record[0] as return buffer.
+@return	error number or 0. */
+int
+ha_innopart::read_range_next_in_part(
+	uint	part,
+	uchar*	record)
+{
+	int	error;
+	uchar*	read_record = record;
+
+	set_partition(part);
+	if (read_record == NULL) {
+		read_record = table->record[0];
+	}
+
+	/* TODO: Implement ha_innobase::read_range*?
+	So it will return HA_ERR_END_OF_FILE or
+	HA_ERR_KEY_NOT_FOUND when passing end_range. */
+
+	error = ha_innobase::index_next(read_record);
+	if (error == 0 && !in_range_check_pushed_down) {
+		/* compare_key uses table->record[0], so we
+		need to copy the data if not already there. */
+
+		if (record != NULL) {
+			copy_cached_row(table->record[0], read_record);
+		}
+		if (compare_key(end_range) > 0) {
+			/* must use ha_innobase:: due to set/update_partition
+			could overwrite states if ha_innopart::unlock_row()
+			was used. */
+			ha_innobase::unlock_row();
+			error = HA_ERR_END_OF_FILE;
+		}
+	}
+	update_partition(part);
+
+	return(error);
+}
+
+/** Initialize a table scan in a specific partition.
+@param[in]	part_id	Partition to initialize.
+@param[in]	scan	True if table/index scan false otherwise (for rnd_pos)
+@return	0 or error number. */
+int
+ha_innopart::rnd_init_in_part(
+	uint	part_id,
+	bool	scan)
+{
+	int	err;
+
+	if (m_prebuilt->clust_index_was_generated) {
+		err = change_active_index(part_id, MAX_KEY);
+	} else {
+		err = change_active_index(part_id, m_primary_key);
+	}
+
+	m_start_of_scan = 1;
+
+	/* Don't use semi-consistent read in random row reads (by position).
+	This means we must disable semi_consistent_read if scan is false. */
+
+	if (!scan) {
+		try_semi_consistent_read(false);
+	}
+
+	return(err);
+}
+
+/** Ends a table scan.
+@param[in]	part_id	Partition to end table scan in.
+@param[in]	scan	True for scan else random access.
+@return	0 or error number. */
+int
+ha_innopart::rnd_end_in_part(
+	uint	part_id,
+	bool	scan)
+{
+	return(index_end());
+}
+
+/** Read next row in partition.
+Reads the next row in a table scan (also used to read the FIRST row
+in a table scan).
+@param[in]	part_id	Partition to end table scan in.
+@param[out]	buf	Returns the row in this buffer, in MySQL format.
+@return	0, HA_ERR_END_OF_FILE or error number. */
+int
+ha_innopart::rnd_next_in_part(
+	uint	part_id,
+	uchar*	buf)
+{
+	int	error;
+
+	DBUG_ENTER("ha_innopart::rnd_next_in_part");
+
+	set_partition(part_id);
+	if (m_start_of_scan) {
+		error = ha_innobase::index_first(buf);
+
+		if (error == HA_ERR_KEY_NOT_FOUND) {
+			error = HA_ERR_END_OF_FILE;
+		}
+		m_start_of_scan = 0;
+	} else {
+		ha_statistic_increment(&SSV::ha_read_rnd_next_count);
+		error = ha_innobase::general_fetch(buf, ROW_SEL_NEXT, 0);
+	}
+
+	update_partition(part_id);
+	DBUG_RETURN(error);
+}
+
+/** Get a row from a position.
+Fetches a row from the table based on a row reference.
+@param[out]	buf	Returns the row in this buffer, in MySQL format.
+@param[in]	pos	Position, given as primary key value or DB_ROW_ID
+(if no primary key) of the row in MySQL format.  The length of data in pos has
+to be ref_length.
+@return	0, HA_ERR_KEY_NOT_FOUND or error code. */
+int
+ha_innopart::rnd_pos(
+	uchar*	buf,
+	uchar*	pos)
+{
+	int	error;
+	uint	part_id;
+	DBUG_ENTER("ha_innopart::rnd_pos");
+	ut_ad(PARTITION_BYTES_IN_POS == 2);
+	DBUG_DUMP("pos", pos, ref_length);
+
+	ha_statistic_increment(&SSV::ha_read_rnd_count);
+
+	ut_a(m_prebuilt->trx == thd_to_trx(ha_thd()));
+
+	/* Restore used partition. */
+	part_id = uint2korr(pos);
+
+	set_partition(part_id);
+
+	/* Note that we assume the length of the row reference is fixed
+	for the table, and it is == ref_length. */
+
+	error = ha_innobase::index_read(buf, pos + PARTITION_BYTES_IN_POS,
+				ref_length - PARTITION_BYTES_IN_POS,
+				HA_READ_KEY_EXACT);
+	DBUG_PRINT("info", ("part %u index_read returned %d", part_id, error));
+	DBUG_DUMP("buf", buf, table_share->reclength);
+
+	update_partition(part_id);
+
+	DBUG_RETURN(error);
+}
+
+/** Return position for cursor in last used partition.
+Stores a reference to the current row to 'ref' field of the handle. Note
+that in the case where we have generated the clustered index for the
+table, the function parameter is illogical: we MUST ASSUME that 'record'
+is the current 'position' of the handle, because if row ref is actually
+the row id internally generated in InnoDB, then 'record' does not contain
+it. We just guess that the row id must be for the record where the handle
+was positioned the last time.
+@param[out]	ref_arg	Pointer to buffer where to write the position.
+@param[in]	record	Record to position for. */
+void
+ha_innopart::position_in_last_part(
+	uchar*		ref_arg,
+	const uchar*	record)
+{
+	if (m_prebuilt->clust_index_was_generated) {
+		/* No primary key was defined for the table and we
+		generated the clustered index from row id: the
+		row reference will be the row id, not any key value
+		that MySQL knows of. */
+
+		memcpy(ref_arg, m_prebuilt->row_id, DATA_ROW_ID_LEN);
+	} else {
+
+		/* Copy primary key as the row reference */
+		KEY*	key_info = table->key_info + m_primary_key;
+		key_copy(ref_arg, (uchar*)record, key_info,
+			 key_info->key_length);
+	}
+}
+
+/** Fill in data_dir_path and tablespace name from internal data
+dictionary.
+@param	part_elem	Partition element to fill.
+@param	ib_table	InnoDB table to copy from. */
+void
+ha_innopart::update_part_elem(
+	partition_element*	part_elem,
+	dict_table_t*		ib_table)
+{
+	dict_get_and_save_data_dir_path(ib_table, false);
+	if (ib_table->data_dir_path != NULL) {
+		if (part_elem->data_file_name == NULL
+		    || strcmp(ib_table->data_dir_path,
+			part_elem->data_file_name) != 0) {
+
+			/* Play safe and allocate memory from TABLE and copy
+			instead of expose the internal data dictionary. */
+			part_elem->data_file_name =
+				strdup_root(&table->mem_root,
+					ib_table->data_dir_path);
+		}
+	} else {
+		part_elem->data_file_name = NULL;
+	}
+
+	part_elem->index_file_name = NULL;
+	dict_get_and_save_space_name(ib_table, false);
+	if (ib_table->tablespace != NULL) {
+		ut_ad(part_elem->tablespace_name == NULL
+		      || 0 == strcmp(part_elem->tablespace_name,
+				ib_table->tablespace));
+		if (part_elem->tablespace_name == NULL
+		    || strcmp(ib_table->tablespace,
+			part_elem->tablespace_name) != 0) {
+
+			/* Play safe and allocate memory from TABLE and copy
+			instead of expose the internal data dictionary. */
+			part_elem->tablespace_name =
+				strdup_root(&table->mem_root,
+					ib_table->tablespace);
+		}
+	}
+	else {
+		ut_ad(part_elem->tablespace_name == NULL
+		      || 0 == strcmp(part_elem->tablespace_name,
+				     "innodb_file_per_table"));
+		if (part_elem->tablespace_name != NULL
+		    && 0 != strcmp(part_elem->tablespace_name,
+				   "innodb_file_per_table")) {
+
+			/* Update part_elem tablespace to NULL same as in
+			innodb data dictionary ib_table. */
+			part_elem->tablespace_name = NULL;
+		}
+	}
+}
+
+/** Update create_info.
+Used in SHOW CREATE TABLE et al.
+@param[in,out]	create_info	Create info to update. */
+void
+ha_innopart::update_create_info(
+	HA_CREATE_INFO*	create_info)
+{
+	uint		num_subparts	= m_part_info->num_subparts;
+	uint		num_parts;
+	uint		part;
+	dict_table_t*	table;
+	List_iterator<partition_element>
+				part_it(m_part_info->partitions);
+	partition_element*	part_elem;
+	partition_element*	sub_elem;
+	DBUG_ENTER("ha_innopart::update_create_info");
+	if ((create_info->used_fields & HA_CREATE_USED_AUTO) == 0) {
+		info(HA_STATUS_AUTO);
+		create_info->auto_increment_value = stats.auto_increment_value;
+	}
+
+	num_parts = (num_subparts != 0) ? m_tot_parts / num_subparts : m_tot_parts;
+
+	/* DATA/INDEX DIRECTORY are never applied to the whole partitioned
+	table, only to its parts. */
+
+	create_info->data_file_name = NULL;
+	create_info->index_file_name = NULL;
+
+	/* Since update_create_info() can be called from
+	mysql_prepare_alter_table() when not all partitions are set up,
+	we look for that condition first.
+	If all partitions are not available then simply return,
+	since it does not need any updated partitioning info. */
+
+	if (!m_part_info->temp_partitions.is_empty()) {
+		DBUG_VOID_RETURN;
+	}
+	part = 0;
+	while ((part_elem = part_it++)) {
+		if (part >= num_parts) {
+			DBUG_VOID_RETURN;
+		}
+		if (m_part_info->is_sub_partitioned()) {
+			List_iterator<partition_element>
+				subpart_it(part_elem->subpartitions);
+			uint	subpart = 0;
+			while ((sub_elem = subpart_it++)) {
+				if (subpart >= num_subparts) {
+					DBUG_VOID_RETURN;
+				}
+				subpart++;
+			}
+			if (subpart != num_subparts) {
+				DBUG_VOID_RETURN;
+			}
+		}
+		part++;
+	}
+	if (part != num_parts) {
+		DBUG_VOID_RETURN;
+	}
+
+	/* part_elem->data_file_name and tablespace_name should be correct from
+	the .frm, but may have been changed, so update from SYS_DATAFILES.
+	index_file_name is ignored, so remove it. */
+
+	part = 0;
+	part_it.rewind();
+	while ((part_elem = part_it++)) {
+		if (m_part_info->is_sub_partitioned()) {
+			List_iterator<partition_element>
+				subpart_it(part_elem->subpartitions);
+			while ((sub_elem = subpart_it++)) {
+				table = m_part_share->get_table_part(part++);
+				update_part_elem(sub_elem, table);
+			}
+		} else {
+			table = m_part_share->get_table_part(part++);
+			update_part_elem(part_elem, table);
+		}
+	}
+	DBUG_VOID_RETURN;
+}
+
+/** Set create_info->data_file_name.
+@param[in]	part_elem	Partition to copy from.
+@param[in,out]	info		Create info to set. */
+static
+void
+set_create_info_dir(
+	partition_element*	part_elem,
+	HA_CREATE_INFO*		info)
+{
+	if (part_elem->data_file_name != NULL
+	    && part_elem->data_file_name[0] != '\0') {
+		info->data_file_name = part_elem->data_file_name;
+		/* Also implies non-default tablespace. */
+		info->tablespace = NULL;
+	}
+	if (part_elem->index_file_name != NULL
+	    && part_elem->index_file_name[0] != '\0') {
+		info->index_file_name = part_elem->index_file_name;
+	}
+	if (part_elem->tablespace_name != NULL
+	    && part_elem->tablespace_name[0] != '\0') {
+		info->tablespace = part_elem->tablespace_name;
+	}
+}
+
+/** Set flags and append '/' to remote path if necessary. */
+void
+create_table_info_t::set_remote_path_flags()
+{
+	if (m_remote_path[0] != '\0') {
+		ut_ad(DICT_TF_HAS_DATA_DIR(m_flags) != 0);
+
+		/* os_file_make_remote_pathname will truncate
+		everything after the last '/', so append '/'
+		if it is not the last character. */
+
+		size_t len = strlen(m_remote_path);
+		if (m_remote_path[len - 1] != OS_PATH_SEPARATOR) {
+			m_remote_path[len] = OS_PATH_SEPARATOR;
+			m_remote_path[len + 1] = '\0';
+		}
+	} else {
+		ut_ad(DICT_TF_HAS_DATA_DIR(m_flags) == 0);
+	}
+}
+
+/** Creates a new table to an InnoDB database.
+@param[in]	name		Table name (in filesystem charset).
+@param[in]	form		MySQL Table containing information of
+partitions, columns and indexes etc.
+@param[in]	create_info	Additional create information, like
+create statement string.
+@return	0 or error number. */
+int
+ha_innopart::create(
+	const char*	name,
+	TABLE*		form,
+	HA_CREATE_INFO*	create_info)
+{
+	int		error;
+	/** {database}/{tablename} */
+	char		table_name[FN_REFLEN];
+	/** absolute path of temp frm */
+	char		temp_path[FN_REFLEN];
+	/** absolute path of table */
+	char		remote_path[FN_REFLEN];
+	char		partition_name[FN_REFLEN];
+	char		tablespace_name[NAME_LEN + 1];
+	char*		table_name_end;
+	size_t		table_name_len;
+	char*		partition_name_start;
+	char		table_data_file_name[FN_REFLEN];
+	char		table_level_tablespace_name[NAME_LEN + 1];
+	const char*	index_file_name;
+	size_t		len;
+
+	create_table_info_t	info(ha_thd(),
+				     form,
+				     create_info,
+				     table_name,
+				     temp_path,
+				     remote_path,
+				     tablespace_name);
+
+	DBUG_ENTER("ha_innopart::create");
+	ut_ad(create_info != NULL);
+	ut_ad(m_part_info == form->part_info);
+	ut_ad(table_share != NULL);
+
+	/* Not allowed to create temporary partitioned tables. */
+	if (create_info != NULL
+	    && (create_info->options & HA_LEX_CREATE_TMP_TABLE) != 0) {
+		my_error(ER_PARTITION_NO_TEMPORARY, MYF(0));
+		ut_ad(0); // Can we support partitioned temporary tables?
+		DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
+	}
+
+	error = info.initialize();
+	if (error != 0) {
+		DBUG_RETURN(error);
+	}
+
+	/* Setup and check table level options. */
+	error = info.prepare_create_table(name);
+	if (error != 0) {
+		DBUG_RETURN(error);
+	}
+	ut_ad(temp_path[0] == '\0');
+	strcpy(partition_name, table_name);
+	partition_name_start = partition_name + strlen(partition_name);
+	table_name_len = strlen(table_name);
+	table_name_end = table_name + table_name_len;
+	if (create_info->data_file_name != NULL) {
+		/* Strip the tablename from the path. */
+		strncpy(table_data_file_name, create_info->data_file_name,
+			FN_REFLEN-1);
+		table_data_file_name[FN_REFLEN - 1] = '\0';
+		char* ptr = strrchr(table_data_file_name, OS_PATH_SEPARATOR);
+		ut_ad(ptr != NULL);
+		if (ptr != NULL) {
+			ptr++;
+			*ptr = '\0';
+			create_info->data_file_name = table_data_file_name;
+		}
+	} else {
+		table_data_file_name[0] = '\0';
+	}
+	index_file_name = create_info->index_file_name;
+	if (create_info->tablespace != NULL) {
+		strcpy(table_level_tablespace_name, create_info->tablespace);
+	} else {
+		table_level_tablespace_name[0] = '\0';
+	}
+
+	info.allocate_trx();
+
+	/* Latch the InnoDB data dictionary exclusively so that no deadlocks
+	or lock waits can happen in it during a table create operation.
+	Drop table etc. do this latching in row0mysql.cc. */
+
+	row_mysql_lock_data_dictionary(info.trx());
+
+	/* TODO: use the new DD tables instead to decrease duplicate info. */
+	List_iterator_fast <partition_element>
+		part_it(form->part_info->partitions);
+	partition_element* part_elem;
+	while ((part_elem = part_it++)) {
+		/* Append the partition name to the table name. */
+		len = Ha_innopart_share::append_sep_and_name(
+				partition_name_start,
+				part_elem->partition_name,
+				part_sep,
+				FN_REFLEN - table_name_len);
+		if ((table_name_len + len) >= FN_REFLEN) {
+			ut_ad(0);
+			goto cleanup;
+		}
+
+		/* Override table level DATA/INDEX DIRECTORY. */
+		set_create_info_dir(part_elem, create_info);
+
+		if (!form->part_info->is_sub_partitioned()) {
+			error = info.prepare_create_table(partition_name);
+			if (error != 0) {
+				goto cleanup;
+			}
+			info.set_remote_path_flags();
+			error = info.create_table();
+			if (error != 0) {
+				goto cleanup;
+			}
+		} else {
+			size_t	part_name_len = strlen(partition_name_start)
+						+ table_name_len;
+			char*	part_name_end = partition_name + part_name_len;
+			List_iterator_fast <partition_element>
+				sub_it(part_elem->subpartitions);
+			partition_element* sub_elem;
+
+			while ((sub_elem = sub_it++)) {
+				ut_ad(sub_elem->partition_name != NULL);
+
+				/* 'table' will be
+				<name>#P#<part_name>#SP#<subpart_name>.
+				Append the sub-partition name to
+				the partition name. */
+
+				len = Ha_innopart_share::append_sep_and_name(
+					part_name_end,
+					sub_elem->partition_name,
+					sub_sep,
+					FN_REFLEN - part_name_len);
+				if ((len + part_name_len) >= FN_REFLEN) {
+					ut_ad(0);
+					goto cleanup;
+				}
+				/* Override part level DATA/INDEX DIRECTORY. */
+				set_create_info_dir(sub_elem, create_info);
+
+				Ha_innopart_share::partition_name_casedn_str(
+					part_name_end + 4);
+				error = info.prepare_create_table(partition_name);
+				if (error != 0) {
+					goto cleanup;
+				}
+				info.set_remote_path_flags();
+				error = info.create_table();
+				if (error != 0) {
+					goto cleanup;
+				}
+
+				/* Reset partition level
+				DATA/INDEX DIRECTORY. */
+
+				create_info->data_file_name =
+					table_data_file_name;
+				create_info->index_file_name =
+					index_file_name;
+				create_info->tablespace =
+					table_level_tablespace_name;
+				set_create_info_dir(part_elem, create_info);
+			}
+		}
+		/* Reset table level DATA/INDEX DIRECTORY. */
+		create_info->data_file_name = table_data_file_name;
+		create_info->index_file_name = index_file_name;
+		create_info->tablespace = table_level_tablespace_name;
+	}
+
+	innobase_commit_low(info.trx());
+
+	row_mysql_unlock_data_dictionary(info.trx());
+
+	/* Flush the log to reduce probability that the .frm files and
+	the InnoDB data dictionary get out-of-sync if the user runs
+	with innodb_flush_log_at_trx_commit = 0. */
+
+	log_buffer_flush_to_disk();
+
+	part_it.rewind();
+	/* No need to use these now, only table_name will be used. */
+	create_info->data_file_name = NULL;
+	create_info->index_file_name = NULL;
+	while ((part_elem = part_it++)) {
+		Ha_innopart_share::append_sep_and_name(
+			table_name_end,
+			part_elem->partition_name,
+			part_sep,
+			FN_REFLEN - table_name_len);
+		if (!form->part_info->is_sub_partitioned()) {
+			error = info.create_table_update_dict();
+			if (error != 0) {
+				ut_ad(0);
+				goto end;
+			}
+		} else {
+			size_t	part_name_len = strlen(table_name_end);
+			char*	part_name_end = table_name_end + part_name_len;
+			List_iterator_fast <partition_element>
+				sub_it(part_elem->subpartitions);
+			partition_element* sub_elem;
+			while ((sub_elem = sub_it++)) {
+				Ha_innopart_share::append_sep_and_name(
+					part_name_end,
+					sub_elem->partition_name,
+					sub_sep,
+					FN_REFLEN - table_name_len
+					- part_name_len);
+				error = info.create_table_update_dict();
+				if (error != 0) {
+					ut_ad(0);
+					goto end;
+				}
+			}
+		}
+	}
+
+end:
+	/* Tell the InnoDB server that there might be work for
+	utility threads: */
+
+	srv_active_wake_master_thread();
+
+	trx_free_for_mysql(info.trx());
+
+	DBUG_RETURN(error);
+
+cleanup:
+	trx_rollback_for_mysql(info.trx());
+
+	row_mysql_unlock_data_dictionary(info.trx());
+
+	trx_free_for_mysql(info.trx());
+
+	DBUG_RETURN(error);
+}
+
+/** Discards or imports an InnoDB tablespace.
+@param[in]	discard	True if discard, else import.
+@return	0 or error number. */
+int
+ha_innopart::discard_or_import_tablespace(
+	my_bool	discard)
+{
+	int	error = 0;
+	uint	i;
+	DBUG_ENTER("ha_innopart::discard_or_import_tablespace");
+
+	for (i= m_part_info->get_first_used_partition();
+	     i < m_tot_parts;
+	     i= m_part_info->get_next_used_partition(i)) {
+
+		m_prebuilt->table = m_part_share->get_table_part(i);
+		error= ha_innobase::discard_or_import_tablespace(discard);
+		if (error != 0) {
+			break;
+		}
+	}
+	m_prebuilt->table = m_part_share->get_table_part(0);
+
+	/* IMPORT/DISCARD also means resetting auto_increment. Make sure
+	that auto_increment initialization is done after all partitions
+	are imported. */
+	if (table->found_next_number_field != NULL) {
+		lock_auto_increment();
+		m_part_share->next_auto_inc_val = 0;
+		m_part_share->auto_inc_initialized = false;
+		unlock_auto_increment();
+	}
+
+	DBUG_RETURN(error);
+}
+
+/** Compare key and rowid.
+Helper function for sorting records in the priority queue.
+a/b points to table->record[0] rows which must have the
+key fields set. The bytes before a and b store the rowid.
+This is used for comparing/sorting rows first according to
+KEY and if same KEY, by rowid (ref).
+@param[in]	key_info	Null terminated array of index information.
+@param[in]	a		Pointer to record+ref in first record.
+@param[in]	b		Pointer to record+ref in second record.
+@return	Return value is SIGN(first_rec - second_rec)
+@retval	0	Keys are equal.
+@retval	-1	second_rec is greater than first_rec.
+@retval	+1	first_rec is greater than second_rec. */
+int
+ha_innopart::key_and_rowid_cmp(
+	KEY**	key_info,
+	uchar	*a,
+	uchar	*b)
+{
+	int	cmp = key_rec_cmp(key_info, a, b);
+	if (cmp != 0) {
+		return(cmp);
+	}
+
+	/* We must compare by rowid, which is added before the record,
+	in the priority queue. */
+
+	return(memcmp(a - DATA_ROW_ID_LEN, b - DATA_ROW_ID_LEN,
+		DATA_ROW_ID_LEN));
+}
+
+/** Extra hints from MySQL.
+@param[in]	operation	Operation hint.
+@return	0 or error number. */
+int
+ha_innopart::extra(
+	enum ha_extra_function	operation)
+{
+	if (operation == HA_EXTRA_SECONDARY_SORT_ROWID) {
+		/* index_init(sorted=true) must have been called! */
+		ut_ad(m_ordered);
+		ut_ad(m_ordered_rec_buffer != NULL);
+		/* No index_read call must have been done! */
+		ut_ad(m_queue->empty());
+
+		/* If not PK is set as secondary sort, do secondary sort by
+		rowid/ref. */
+
+		ut_ad(m_curr_key_info[1] != NULL
+		      || m_prebuilt->clust_index_was_generated != 0
+		      || m_curr_key_info[0]
+			 == table->key_info + table->s->primary_key);
+
+		if (m_curr_key_info[1] == NULL
+		    && m_prebuilt->clust_index_was_generated) {
+			m_ref_usage = Partition_helper::REF_USED_FOR_SORT;
+			m_queue->m_fun = key_and_rowid_cmp;
+		}
+		return(0);
+	}
+	return(ha_innobase::extra(operation));
+}
+
+/** Delete all rows in a partition.
+@return	0 or error number. */
+int
+ha_innopart::truncate_partition_low()
+{
+	return(truncate());
+}
+
+/** Deletes all rows of a partitioned InnoDB table.
+@return	0 or error number. */
+int
+ha_innopart::truncate()
+{
+	dberr_t		err = DB_SUCCESS;
+	int		error;
+
+	DBUG_ENTER("ha_innopart::truncate");
+
+	if (high_level_read_only) {
+		DBUG_RETURN(HA_ERR_TABLE_READONLY);
+	}
+
+	/* TRUNCATE also means resetting auto_increment. Hence, reset
+	it so that it will be initialized again at the next use. */
+
+	if (table->found_next_number_field != NULL) {
+		lock_auto_increment();
+		m_part_share->next_auto_inc_val= 0;
+		m_part_share->auto_inc_initialized= false;
+		unlock_auto_increment();
+	}
+
+	/* Get the transaction associated with the current thd, or create one
+	if not yet created, and update m_prebuilt->trx. */
+
+	update_thd(ha_thd());
+
+	if (!trx_is_started(m_prebuilt->trx)) {
+		++m_prebuilt->trx->will_lock;
+	}
+	/* Truncate the table in InnoDB. */
+
+	for (uint i = m_part_info->get_first_used_partition();
+	     i < m_tot_parts;
+	     i = m_part_info->get_next_used_partition(i)) {
+
+		set_partition(i);
+		err = row_truncate_table_for_mysql(m_prebuilt->table,
+				m_prebuilt->trx);
+		update_partition(i);
+		if (err != DB_SUCCESS) {
+			break;
+		}
+	}
+
+	switch (err) {
+
+	case DB_TABLESPACE_DELETED:
+	case DB_TABLESPACE_NOT_FOUND:
+		ib_senderrf(
+			m_prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+			(err == DB_TABLESPACE_DELETED ?
+			ER_TABLESPACE_DISCARDED : ER_TABLESPACE_MISSING),
+			table->s->table_name.str);
+		table->status = STATUS_NOT_FOUND;
+		error = HA_ERR_NO_SUCH_TABLE;
+		break;
+
+	default:
+		error = convert_error_code_to_mysql(
+			err, m_prebuilt->table->flags,
+			m_prebuilt->trx->mysql_thd);
+		table->status = STATUS_NOT_FOUND;
+		break;
+	}
+	DBUG_RETURN(error);
+}
+
+/** Total number of rows in all used partitions.
+Returns the exact number of records that this client can see using this
+handler object.
+@param[out]	num_rows	Number of rows.
+@return	0 or error number. */
+int
+ha_innopart::records(
+	ha_rows*	num_rows)
+{
+	ha_rows	n_rows;
+	int	err;
+	DBUG_ENTER("ha_innopart::records()");
+
+	*num_rows = 0;
+
+	/* The index scan is probably so expensive, so the overhead
+	of the rest of the function is neglectable for each partition.
+	So no current reason for optimizing this further. */
+
+	for (uint i = m_part_info->get_first_used_partition();
+	     i < m_tot_parts;
+	     i = m_part_info->get_next_used_partition(i)) {
+
+		set_partition(i);
+		err = ha_innobase::records(&n_rows);
+		update_partition(i);
+		if (err != 0) {
+			*num_rows = HA_POS_ERROR;
+			DBUG_RETURN(err);
+		}
+		*num_rows += n_rows;
+	}
+	DBUG_RETURN(0);
+}
+
+/** Estimates the number of index records in a range.
+@param[in]	keynr	Index number.
+@param[in]	min_key	Start key value (or NULL).
+@param[in]	max_key	End key value (or NULL).
+@return	estimated number of rows. */
+ha_rows
+ha_innopart::records_in_range(
+	uint		keynr,
+	key_range*	min_key,
+	key_range*	max_key)
+{
+	KEY*		key;
+	dict_index_t*	index;
+	dtuple_t*	range_start;
+	dtuple_t*	range_end;
+	int64_t		n_rows = 0;
+	page_cur_mode_t	mode1;
+	page_cur_mode_t	mode2;
+	mem_heap_t*	heap;
+	uint		part_id;
+
+	DBUG_ENTER("ha_innopart::records_in_range");
+	DBUG_PRINT("info", ("keynr %u min %p max %p", keynr, min_key, max_key));
+
+	ut_a(m_prebuilt->trx == thd_to_trx(ha_thd()));
+
+	m_prebuilt->trx->op_info = (char*)"estimating records in index range";
+
+	/* In case MySQL calls this in the middle of a SELECT query, release
+	possible adaptive hash latch to avoid deadlocks of threads. */
+
+	trx_search_latch_release_if_reserved(m_prebuilt->trx);
+
+	active_index = keynr;
+
+	key = table->key_info + active_index;
+
+	part_id = m_part_info->get_first_used_partition();
+	if (part_id == MY_BIT_NONE) {
+		DBUG_RETURN(0);
+	}
+	/* This also sets m_prebuilt->index! */
+	set_partition(part_id);
+	index = m_prebuilt->index;
+
+	/* Only validate the first partition, to avoid too much overhead. */
+
+	/* There exists possibility of not being able to find requested
+	index due to inconsistency between MySQL and InoDB dictionary info.
+	Necessary message should have been printed in innopart_get_index(). */
+	if (index == NULL
+	    || dict_table_is_discarded(m_prebuilt->table)
+	    || dict_index_is_corrupted(index)
+	    || !row_merge_is_index_usable(m_prebuilt->trx, index)) {
+
+		n_rows = HA_POS_ERROR;
+		goto func_exit;
+	}
+
+	heap = mem_heap_create(2 * (key->actual_key_parts * sizeof(dfield_t)
+				    + sizeof(dtuple_t)));
+
+	range_start = dtuple_create(heap, key->actual_key_parts);
+	dict_index_copy_types(range_start, index, key->actual_key_parts);
+
+	range_end = dtuple_create(heap, key->actual_key_parts);
+	dict_index_copy_types(range_end, index, key->actual_key_parts);
+
+	row_sel_convert_mysql_key_to_innobase(
+		range_start,
+		m_prebuilt->srch_key_val1,
+		m_prebuilt->srch_key_val_len,
+		index,
+		(byte*) (min_key ? min_key->key : (const uchar*) 0),
+		(ulint) (min_key ? min_key->length : 0),
+		m_prebuilt->trx);
+
+	ut_ad(min_key != NULL
+	      ? range_start->n_fields > 0
+	      : range_start->n_fields == 0);
+
+	row_sel_convert_mysql_key_to_innobase(
+		range_end,
+		m_prebuilt->srch_key_val2,
+		m_prebuilt->srch_key_val_len,
+		index,
+		(byte*) (max_key != NULL ? max_key->key : (const uchar*) 0),
+		(ulint) (max_key != NULL ? max_key->length : 0),
+		m_prebuilt->trx);
+
+	ut_ad(max_key != NULL
+	      ? range_end->n_fields > 0
+	      : range_end->n_fields == 0);
+
+	mode1 = convert_search_mode_to_innobase(min_key ? min_key->flag :
+						HA_READ_KEY_EXACT);
+	mode2 = convert_search_mode_to_innobase(max_key ? max_key->flag :
+						HA_READ_KEY_EXACT);
+
+	if (mode1 != PAGE_CUR_UNSUPP && mode2 != PAGE_CUR_UNSUPP) {
+
+		n_rows = btr_estimate_n_rows_in_range(index, range_start,
+						      mode1, range_end,
+						      mode2);
+		DBUG_PRINT("info", ("part_id %u rows %ld", part_id,
+					(long int) n_rows));
+		for (part_id = m_part_info->get_next_used_partition(part_id);
+		     part_id < m_tot_parts;
+		     part_id = m_part_info->get_next_used_partition(part_id)) {
+
+			index = m_part_share->get_index(part_id, keynr);
+			int64_t n = btr_estimate_n_rows_in_range(index,
+							       range_start,
+							       mode1,
+							       range_end,
+							       mode2);
+			n_rows += n;
+			DBUG_PRINT("info", ("part_id %u rows %ld (%ld)",
+						part_id,
+						(long int) n,
+						(long int) n_rows));
+		}
+	} else {
+
+		n_rows = HA_POS_ERROR;
+	}
+
+	mem_heap_free(heap);
+
+func_exit:
+
+	m_prebuilt->trx->op_info = (char*)"";
+
+	/* The MySQL optimizer seems to believe an estimate of 0 rows is
+	always accurate and may return the result 'Empty set' based on that.
+	The accuracy is not guaranteed, and even if it were, for a locking
+	read we should anyway perform the search to set the next-key lock.
+	Add 1 to the value to make sure MySQL does not make the assumption! */
+
+	if (n_rows == 0) {
+		n_rows = 1;
+	}
+
+	DBUG_RETURN((ha_rows) n_rows);
+}
+
+/** Gives an UPPER BOUND to the number of rows in a table.
+This is used in filesort.cc.
+@return	upper bound of rows. */
+ha_rows
+ha_innopart::estimate_rows_upper_bound()
+{
+	const dict_index_t*	index;
+	ulonglong		estimate = 0;
+	ulonglong		local_data_file_length;
+	ulint			stat_n_leaf_pages;
+
+	DBUG_ENTER("ha_innopart::estimate_rows_upper_bound");
+
+	/* We do not know if MySQL can call this function before calling
+	external_lock(). To be safe, update the thd of the current table
+	handle. */
+
+	update_thd(ha_thd());
+
+	m_prebuilt->trx->op_info = "calculating upper bound for table rows";
+
+	/* In case MySQL calls this in the middle of a SELECT query, release
+	possible adaptive hash latch to avoid deadlocks of threads. */
+
+	trx_search_latch_release_if_reserved(m_prebuilt->trx);
+
+	for (uint i = m_part_info->get_first_used_partition();
+	     i < m_tot_parts;
+	     i = m_part_info->get_next_used_partition(i)) {
+
+		m_prebuilt->table = m_part_share->get_table_part(i);
+		index = dict_table_get_first_index(m_prebuilt->table);
+
+		stat_n_leaf_pages = index->stat_n_leaf_pages;
+
+		ut_a(stat_n_leaf_pages > 0);
+
+		local_data_file_length =
+			((ulonglong) stat_n_leaf_pages) * UNIV_PAGE_SIZE;
+
+		/* Calculate a minimum length for a clustered index record
+		and from that an upper bound for the number of rows.
+		Since we only calculate new statistics in row0mysql.cc when a
+		table has grown by a threshold factor,
+		we must add a safety factor 2 in front of the formula below. */
+
+		estimate += 2 * local_data_file_length
+			/ dict_index_calc_min_rec_len(index);
+	}
+
+	m_prebuilt->trx->op_info = "";
+
+	DBUG_RETURN((ha_rows) estimate);
+}
+
+/** Time estimate for full table scan.
+How many seeks it will take to read through the table. This is to be
+comparable to the number returned by records_in_range so that we can
+decide if we should scan the table or use keys.
+@return	estimated time measured in disk seeks. */
+double
+ha_innopart::scan_time()
+{
+	double	scan_time = 0.0;
+	DBUG_ENTER("ha_innopart::scan_time");
+
+	for (uint i = m_part_info->get_first_used_partition();
+	     i < m_tot_parts;
+	     i = m_part_info->get_next_used_partition(i)) {
+		m_prebuilt->table = m_part_share->get_table_part(i);
+		scan_time += ha_innobase::scan_time();
+	}
+	DBUG_RETURN(scan_time);
+}
+
+/** Updates the statistics for one partition (table).
+@param[in]	table		Table to update the statistics for.
+@param[in]	is_analyze	True if called from ::analyze().
+@return	error code. */
+static
+int
+update_table_stats(
+	dict_table_t*	table,
+	bool		is_analyze)
+{
+	dict_stats_upd_option_t	opt;
+	dberr_t			ret;
+
+	if (dict_stats_is_persistent_enabled(table)) {
+		if (is_analyze) {
+			opt = DICT_STATS_RECALC_PERSISTENT;
+		} else {
+			/* This is e.g. 'SHOW INDEXES',
+			fetch the persistent stats from disk. */
+			opt = DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY;
+		}
+	} else {
+		opt = DICT_STATS_RECALC_TRANSIENT;
+	}
+
+	ut_ad(!mutex_own(&dict_sys->mutex));
+	ret = dict_stats_update(table, opt);
+
+	if (ret != DB_SUCCESS) {
+		return(HA_ERR_GENERIC);
+	}
+	return(0);
+}
+
+/** Updates and return statistics.
+Returns statistics information of the table to the MySQL interpreter,
+in various fields of the handle object.
+@param[in]	flag		Flags for what to update and return.
+@param[in]	is_analyze	True if called from ::analyze().
+@return	HA_ERR_* error code or 0. */
+int
+ha_innopart::info_low(
+	uint	flag,
+	bool	is_analyze)
+{
+	dict_table_t*	ib_table;
+	ib_uint64_t	max_rows = 0;
+	uint		biggest_partition = 0;
+	int		error = 0;
+
+	DBUG_ENTER("ha_innopart::info_low");
+
+	/* If we are forcing recovery at a high level, we will suppress
+	statistics calculation on tables, because that may crash the
+	server if an index is badly corrupted. */
+
+	/* We do not know if MySQL can call this function before calling
+	external_lock(). To be safe, update the thd of the current table
+	handle. */
+
+	update_thd(ha_thd());
+
+	/* In case MySQL calls this in the middle of a SELECT query, release
+	possible adaptive hash latch to avoid deadlocks of threads. */
+
+	m_prebuilt->trx->op_info = (char*)"returning various info to MySQL";
+
+	trx_search_latch_release_if_reserved(m_prebuilt->trx);
+
+	ut_ad(m_part_share->get_table_part(0)->n_ref_count > 0);
+
+	if ((flag & HA_STATUS_TIME) != 0) {
+		stats.update_time = 0;
+
+		if (is_analyze) {
+			/* Only analyze the given partitions. */
+			int	error = set_altered_partitions();
+			if (error != 0) {
+				/* Already checked in mysql_admin_table! */
+				ut_ad(0);
+				DBUG_RETURN(error);
+			}
+		}
+		if (is_analyze || innobase_stats_on_metadata) {
+			m_prebuilt->trx->op_info = "updating table statistics";
+		}
+
+		/* TODO: Only analyze the PK for all partitions,
+		then the secondary indexes only for the largest partition! */
+		for (uint i = m_part_info->get_first_used_partition();
+		     i < m_tot_parts;
+		     i = m_part_info->get_next_used_partition(i)) {
+
+			ib_table = m_part_share->get_table_part(i);
+			if (is_analyze || innobase_stats_on_metadata) {
+				error = update_table_stats(ib_table, is_analyze);
+				if (error != 0) {
+					m_prebuilt->trx->op_info = "";
+					DBUG_RETURN(error);
+				}
+			}
+			set_if_bigger(stats.update_time,
+				(ulong) ib_table->update_time);
+		}
+
+		if (is_analyze || innobase_stats_on_metadata) {
+			m_prebuilt->trx->op_info =
+				"returning various info to MySQL";
+		}
+	}
+
+	if ((flag & HA_STATUS_VARIABLE) != 0) {
+
+		/* TODO: If this is called after pruning, then we could
+		also update the statistics according to the non-pruned
+		partitions, by allocating new rec_per_key on the TABLE,
+		instead of using the info from the TABLE_SHARE. */
+		ulint		stat_clustered_index_size = 0;
+		ulint		stat_sum_of_other_index_sizes = 0;
+		ib_uint64_t	n_rows = 0;
+		ulint		avail_space = 0;
+		bool		checked_sys_tablespace = false;
+
+		if ((flag & HA_STATUS_VARIABLE_EXTRA) != 0) {
+			stats.delete_length = 0;
+		}
+
+		for (uint i = m_part_info->get_first_used_partition();
+		     i < m_tot_parts;
+		     i = m_part_info->get_next_used_partition(i)) {
+
+			ib_table = m_part_share->get_table_part(i);
+			if ((flag & HA_STATUS_NO_LOCK) == 0) {
+				dict_table_stats_lock(ib_table, RW_S_LATCH);
+			}
+
+			ut_a(ib_table->stat_initialized);
+
+			n_rows += ib_table->stat_n_rows;
+			if (ib_table->stat_n_rows > max_rows) {
+				max_rows = ib_table->stat_n_rows;
+				biggest_partition = i;
+			}
+
+			stat_clustered_index_size +=
+				ib_table->stat_clustered_index_size;
+
+			stat_sum_of_other_index_sizes +=
+				ib_table->stat_sum_of_other_index_sizes;
+
+			if ((flag & HA_STATUS_NO_LOCK) == 0) {
+				dict_table_stats_unlock(ib_table, RW_S_LATCH);
+			}
+
+			if ((flag & HA_STATUS_VARIABLE_EXTRA) != 0
+			    && (flag & HA_STATUS_NO_LOCK) == 0
+			    && srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE
+			    && avail_space != ULINT_UNDEFINED) {
+
+				/* Only count system tablespace once! */
+				if (is_system_tablespace(ib_table->space)) {
+					if (checked_sys_tablespace) {
+						continue;
+					}
+					checked_sys_tablespace = true;
+				}
+
+				uintmax_t	space =
+					fsp_get_available_space_in_free_extents(
+						ib_table->space);
+				if (space == UINTMAX_MAX) {
+					THD*	thd = ha_thd();
+					const char* table_name
+						= ib_table->name.m_name;
+
+					push_warning_printf(
+						thd,
+						Sql_condition::SL_WARNING,
+						ER_CANT_GET_STAT,
+						"InnoDB: Trying to get the"
+						" free space for partition %s"
+						" but its tablespace has been"
+						" discarded or the .ibd file"
+						" is missing. Setting the free"
+						" space of the partition to"
+						" zero.",
+						ut_get_name(
+							m_prebuilt->trx,
+							table_name).c_str());
+				} else {
+					avail_space +=
+						static_cast<ulint>(space);
+				}
+			}
+		}
+
+		/*
+		The MySQL optimizer seems to assume in a left join that n_rows
+		is an accurate estimate if it is zero. Of course, it is not,
+		since we do not have any locks on the rows yet at this phase.
+		Since SHOW TABLE STATUS seems to call this function with the
+		HA_STATUS_TIME flag set, while the left join optimizer does not
+		set that flag, we add one to a zero value if the flag is not
+		set. That way SHOW TABLE STATUS will show the best estimate,
+		while the optimizer never sees the table empty. */
+
+		if (n_rows == 0 && (flag & HA_STATUS_TIME) == 0) {
+			n_rows++;
+		}
+
+		/* Fix bug#40386: Not flushing query cache after truncate.
+		n_rows can not be 0 unless the table is empty, set to 1
+		instead. The original problem of bug#29507 is actually
+		fixed in the server code. */
+		if (thd_sql_command(m_user_thd) == SQLCOM_TRUNCATE) {
+
+			n_rows = 1;
+
+			/* We need to reset the m_prebuilt value too, otherwise
+			checks for values greater than the last value written
+			to the table will fail and the autoinc counter will
+			not be updated. This will force write_row() into
+			attempting an update of the table's AUTOINC counter. */
+
+			m_prebuilt->autoinc_last_value = 0;
+		}
+
+		/* Take page_size from first partition. */
+		ib_table = m_part_share->get_table_part(0);
+		const page_size_t&	page_size =
+			dict_table_page_size(ib_table);
+
+		stats.records = (ha_rows) n_rows;
+		stats.deleted = 0;
+		stats.data_file_length =
+			((ulonglong) stat_clustered_index_size)
+			* page_size.physical();
+		stats.index_file_length =
+			((ulonglong) stat_sum_of_other_index_sizes)
+			* page_size.physical();
+
+		/* See ha_innobase::info_low() for comments! */
+		if ((flag & HA_STATUS_NO_LOCK) == 0
+		    && (flag & HA_STATUS_VARIABLE_EXTRA) != 0
+		    && srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE) {
+			stats.delete_length = avail_space * 1024;
+		}
+
+		stats.check_time = 0;
+		stats.mrr_length_per_rec = ref_length + sizeof(void*)
+						- PARTITION_BYTES_IN_POS;
+
+		if (stats.records == 0) {
+			stats.mean_rec_length = 0;
+		} else {
+			stats.mean_rec_length = (ulong)
+				(stats.data_file_length / stats.records);
+		}
+	}
+
+	if ((flag & HA_STATUS_CONST) != 0) {
+		/* Find max rows and biggest partition. */
+		for (uint i = 0; i < m_tot_parts; i++) {
+			/* Skip partitions from above. */
+			if ((flag & HA_STATUS_VARIABLE) == 0
+			    || !bitmap_is_set(&(m_part_info->read_partitions),
+					i)) {
+
+				ib_table = m_part_share->get_table_part(i);
+				if (ib_table->stat_n_rows > max_rows) {
+					max_rows = ib_table->stat_n_rows;
+					biggest_partition = i;
+				}
+			}
+		}
+		ib_table = m_part_share->get_table_part(biggest_partition);
+		/* Verify the number of index in InnoDB and MySQL
+		matches up. If m_prebuilt->clust_index_was_generated
+		holds, InnoDB defines GEN_CLUST_INDEX internally. */
+		ulint	num_innodb_index = UT_LIST_GET_LEN(ib_table->indexes)
+			- m_prebuilt->clust_index_was_generated;
+		if (table->s->keys < num_innodb_index) {
+			/* If there are too many indexes defined
+			inside InnoDB, ignore those that are being
+			created, because MySQL will only consider
+			the fully built indexes here. */
+
+			for (const dict_index_t* index =
+					UT_LIST_GET_FIRST(ib_table->indexes);
+			     index != NULL;
+			     index = UT_LIST_GET_NEXT(indexes, index)) {
+
+				/* First, online index creation is
+				completed inside InnoDB, and then
+				MySQL attempts to upgrade the
+				meta-data lock so that it can rebuild
+				the .frm file. If we get here in that
+				time frame, dict_index_is_online_ddl()
+				would not hold and the index would
+				still not be included in TABLE_SHARE. */
+				if (!index->is_committed()) {
+					num_innodb_index--;
+				}
+			}
+
+			if (table->s->keys < num_innodb_index
+			    && (innobase_fts_check_doc_id_index(ib_table,
+							NULL, NULL)
+				 == FTS_EXIST_DOC_ID_INDEX)) {
+				num_innodb_index--;
+			}
+		}
+
+		if (table->s->keys != num_innodb_index) {
+			ib::error() << "Table "
+				<< ib_table->name << " contains "
+				<< num_innodb_index
+				<< " indexes inside InnoDB, which"
+				" is different from the number of"
+				" indexes " << table->s->keys
+				<< " defined in the MySQL";
+		}
+
+		if ((flag & HA_STATUS_NO_LOCK) == 0) {
+			dict_table_stats_lock(ib_table, RW_S_LATCH);
+		}
+
+		ut_a(ib_table->stat_initialized);
+
+		for (ulong i = 0; i < table->s->keys; i++) {
+			ulong	j;
+			/* We could get index quickly through internal
+			index mapping with the index translation table.
+			The identity of index (match up index name with
+			that of table->key_info[i]) is already verified in
+			innopart_get_index(). */
+			dict_index_t*	index = innopart_get_index(
+							biggest_partition, i);
+
+			if (index == NULL) {
+				ib::error() << "Table "
+					<< ib_table->name << " contains fewer"
+					" indexes inside InnoDB than"
+					" are defined in the MySQL"
+					" .frm file. Have you mixed up"
+					" .frm files from different"
+					" installations? "
+					<< TROUBLESHOOTING_MSG;
+				break;
+			}
+
+			KEY*	key = &table->key_info[i];
+			for (j = 0;
+			     j < key->actual_key_parts;
+			     j++) {
+
+				if ((key->flags & HA_FULLTEXT) != 0) {
+					/* The whole concept has no validity
+					for FTS indexes. */
+					key->rec_per_key[j] = 1;
+					continue;
+				}
+
+				if ((j + 1) > index->n_uniq) {
+					ib::error() << "Index " << index->name
+						<< " of " << ib_table->name
+						<< " has " << index->n_uniq
+						<< " columns unique inside"
+						" InnoDB, but MySQL is"
+						" asking statistics for "
+						<< j + 1 << " columns. Have"
+						" you mixed up .frm files"
+						" from different"
+						" installations? "
+						<< TROUBLESHOOTING_MSG;
+					break;
+				}
+
+				/* innodb_rec_per_key() will use
+				index->stat_n_diff_key_vals[] and the value we
+				pass index->table->stat_n_rows. Both are
+				calculated by ANALYZE and by the background
+				stats gathering thread (which kicks in when too
+				much of the table has been changed). In
+				addition table->stat_n_rows is adjusted with
+				each DML (e.g. ++ on row insert). Those
+				adjustments are not MVCC'ed and not even
+				reversed on rollback. So,
+				index->stat_n_diff_key_vals[] and
+				index->table->stat_n_rows could have been
+				calculated at different time. This is
+				acceptable. */
+				const rec_per_key_t	rec_per_key =
+					innodb_rec_per_key(
+						index, j,
+						max_rows);
+
+				key->set_records_per_key(j, rec_per_key);
+
+				/* The code below is legacy and should be
+				removed together with this comment once we
+				are sure the new floating point rec_per_key,
+				set via set_records_per_key(), works fine. */
+
+				ulong	rec_per_key_int = static_cast<ulong>(
+					innodb_rec_per_key(index, j,
+							   max_rows));
+
+				/* Since MySQL seems to favor table scans
+				too much over index searches, we pretend
+				index selectivity is 2 times better than
+				our estimate: */
+
+				rec_per_key_int = rec_per_key_int / 2;
+
+				if (rec_per_key_int == 0) {
+					rec_per_key_int = 1;
+				}
+
+				key->rec_per_key[j] = rec_per_key_int;
+			}
+		}
+
+		if ((flag & HA_STATUS_NO_LOCK) == 0) {
+			dict_table_stats_unlock(ib_table, RW_S_LATCH);
+		}
+
+		char		path[FN_REFLEN];
+		os_file_stat_t	stat_info;
+		/* Use the first partition for create time until new DD. */
+		ib_table = m_part_share->get_table_part(0);
+		my_snprintf(path, sizeof(path), "%s/%s%s",
+			    mysql_data_home,
+			    table->s->normalized_path.str,
+			    reg_ext);
+
+		unpack_filename(path,path);
+
+		if (os_file_get_status(path, &stat_info, false, true) == DB_SUCCESS) {
+			stats.create_time = (ulong) stat_info.ctime;
+		}
+	}
+
+	if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
+
+		goto func_exit;
+	}
+
+	if ((flag & HA_STATUS_ERRKEY) != 0) {
+		const dict_index_t*	err_index;
+
+		ut_a(m_prebuilt->trx);
+		ut_a(m_prebuilt->trx->magic_n == TRX_MAGIC_N);
+
+		err_index = trx_get_error_info(m_prebuilt->trx);
+
+		if (err_index != NULL) {
+			errkey = m_part_share->get_mysql_key(m_last_part,
+							err_index);
+		} else {
+			errkey = (unsigned int) (
+				(m_prebuilt->trx->error_key_num
+				 == ULINT_UNDEFINED)
+					? UINT_MAX
+					: m_prebuilt->trx->error_key_num);
+		}
+	}
+
+	if ((flag & HA_STATUS_AUTO) != 0) {
+		/* auto_inc is only supported in first key for InnoDB! */
+		ut_ad(table_share->next_number_keypart == 0);
+		DBUG_PRINT("info", ("HA_STATUS_AUTO"));
+		if (table->found_next_number_field == NULL) {
+			stats.auto_increment_value = 0;
+		} else {
+			/* Lock to avoid two concurrent initializations. */
+			lock_auto_increment();
+			if (m_part_share->auto_inc_initialized) {
+				stats.auto_increment_value =
+					m_part_share->next_auto_inc_val;
+			} else {
+				/* The auto-inc mutex in the table_share is
+				locked, so we do not need to have the handlers
+				locked. */
+
+				error = initialize_auto_increment(
+					(flag & HA_STATUS_NO_LOCK) != 0);
+				stats.auto_increment_value =
+						m_part_share->next_auto_inc_val;
+			}
+			unlock_auto_increment();
+		}
+	}
+
+func_exit:
+	m_prebuilt->trx->op_info = (char*)"";
+
+	DBUG_RETURN(error);
+}
+
+/** Optimize table.
+This is mapped to "ALTER TABLE tablename ENGINE=InnoDB", which rebuilds
+the table in MySQL.
+@param[in]	thd		Connection thread handle.
+@param[in]	check_opt	Currently ignored.
+@return	0 for success else error code. */
+int
+ha_innopart::optimize(
+	THD*		thd,
+	HA_CHECK_OPT*	check_opt)
+{
+	return(HA_ADMIN_TRY_ALTER);
+}
+
+/** Checks a partitioned table.
+Tries to check that an InnoDB table is not corrupted. If corruption is
+noticed, prints to stderr information about it. In case of corruption
+may also assert a failure and crash the server. Also checks for records
+in wrong partition.
+@param[in]	thd		MySQL THD object/thread handle.
+@param[in]	check_opt	Check options.
+@return	HA_ADMIN_CORRUPT or HA_ADMIN_OK. */
+int
+ha_innopart::check(
+	THD*		thd,
+	HA_CHECK_OPT*	check_opt)
+{
+	uint	error = HA_ADMIN_OK;
+	uint	i;
+
+	DBUG_ENTER("ha_innopart::check");
+	/* TODO: Enhance this to:
+	- Every partition has the same structure.
+	- The names are correct (partition names checked in ::open()?)
+	Currently it only does normal InnoDB check of each partition. */
+
+	if (set_altered_partitions()) {
+		ut_ad(0);   // Already checked by set_part_state()!
+		DBUG_RETURN(HA_ADMIN_INVALID);
+	}
+	for (i = m_part_info->get_first_used_partition();
+	     i < m_tot_parts;
+	     i = m_part_info->get_next_used_partition(i)) {
+
+		m_prebuilt->table = m_part_share->get_table_part(i);
+		error = ha_innobase::check(thd, check_opt);
+		if (error != 0) {
+			break;
+		}
+		if ((check_opt->flags & (T_MEDIUM | T_EXTEND)) != 0) {
+			error = Partition_helper::check_misplaced_rows(i, false);
+			if (error != 0) {
+				break;
+			}
+		}
+	}
+	if (error != 0) {
+		print_admin_msg(
+			thd,
+			256,
+			"error",
+			table_share->db.str,
+			table->alias,
+			"check",
+			m_is_sub_partitioned ?
+			  "Subpartition %s returned error"
+			  : "Partition %s returned error",
+			m_part_share->get_partition_name(i));
+	}
+
+	DBUG_RETURN(error);
+}
+
+/** Repair a partitioned table.
+Only repairs records in wrong partitions (moves them to the correct
+partition or deletes them if not in any partition).
+@param[in]	thd		MySQL THD object/thread handle.
+@param[in]	repair_opt	Repair options.
+@return	0 or error code. */
+int
+ha_innopart::repair(
+	THD*		thd,
+	HA_CHECK_OPT*	repair_opt)
+{
+	uint	error = HA_ADMIN_OK;
+
+	DBUG_ENTER("ha_innopart::repair");
+
+	/* TODO: enable this warning to be clear about what is repaired.
+	Currently disabled to generate smaller test diffs. */
+#ifdef ADD_WARNING_FOR_REPAIR_ONLY_PARTITION
+	push_warning_printf(thd, Sql_condition::SL_WARNING,
+			    ER_ILLEGAL_HA,
+			    "Only moving rows from wrong partition to correct"
+			    " partition is supported,"
+			    " repairing InnoDB indexes is not yet supported!");
+#endif
+
+	/* Only repair partitions for MEDIUM or EXTENDED options. */
+	if ((repair_opt->flags & (T_MEDIUM | T_EXTEND)) == 0) {
+		DBUG_RETURN(HA_ADMIN_OK);
+	}
+	if (set_altered_partitions()) {
+		ut_ad(0);   // Already checked by set_part_state()!
+		DBUG_RETURN(HA_ADMIN_INVALID);
+	}
+	for (uint i = m_part_info->get_first_used_partition();
+	     i < m_tot_parts;
+	     i = m_part_info->get_next_used_partition(i)) {
+
+		/* TODO: Implement and use ha_innobase::repair()! */
+		error = Partition_helper::check_misplaced_rows(i, true);
+		if (error != 0) {
+			print_admin_msg(
+				thd,
+				256,
+				"error",
+				table_share->db.str,
+				table->alias,
+				"repair",
+				m_is_sub_partitioned ?
+				  "Subpartition %s returned error"
+				  : "Partition %s returned error",
+				m_part_share->get_partition_name(i));
+			break;
+		}
+	}
+
+	DBUG_RETURN(error);
+}
+
+/** Check if possible to switch engine (no foreign keys).
+Checks if ALTER TABLE may change the storage engine of the table.
+Changing storage engines is not allowed for tables for which there
+are foreign key constraints (parent or child tables).
+@return	true if can switch engines. */
+bool
+ha_innopart::can_switch_engines()
+{
+	bool	can_switch;
+
+	DBUG_ENTER("ha_innopart::can_switch_engines");
+	can_switch = ha_innobase::can_switch_engines();
+	ut_ad(can_switch);
+
+	DBUG_RETURN(can_switch);
+}
+
+/** Checks if a table is referenced by a foreign key.
+The MySQL manual states that a REPLACE is either equivalent to an INSERT,
+or DELETE(s) + INSERT. Only a delete is then allowed internally to resolve
+a duplicate key conflict in REPLACE, not an update.
+@return	> 0 if referenced by a FOREIGN KEY. */
+uint
+ha_innopart::referenced_by_foreign_key()
+{
+	if (dict_table_is_referenced_by_foreign_key(m_prebuilt->table)) {
+
+#ifndef HA_INNOPART_SUPPORTS_FOREIGN_KEYS
+		ut_ad(0);
+#endif /* HA_INNOPART_SUPPORTS_FOREIGN_KEYS */
+		return(1);
+	}
+
+	return(0);
+}
+
+/** Start statement.
+MySQL calls this function at the start of each SQL statement inside LOCK
+TABLES. Inside LOCK TABLES the ::external_lock method does not work to
+mark SQL statement borders. Note also a special case: if a temporary table
+is created inside LOCK TABLES, MySQL has not called external_lock() at all
+on that table.
+MySQL-5.0 also calls this before each statement in an execution of a stored
+procedure. To make the execution more deterministic for binlogging, MySQL-5.0
+locks all tables involved in a stored procedure with full explicit table
+locks (thd_in_lock_tables(thd) holds in store_lock()) before executing the
+procedure.
+@param[in]	thd		Handle to the user thread.
+@param[in]	lock_type	Lock type.
+@return	0 or error code. */
+int
+ha_innopart::start_stmt(
+	THD*		thd,
+	thr_lock_type	lock_type)
+{
+	int	error = 0;
+
+	if (m_part_info->get_first_used_partition() == MY_BIT_NONE) {
+		/* All partitions pruned away, do nothing! */
+		return(error);
+	}
+
+	error = ha_innobase::start_stmt(thd, lock_type);
+	if (m_prebuilt->sql_stat_start) {
+		memset(m_sql_stat_start_parts, 0xff,
+		       UT_BITS_IN_BYTES(m_tot_parts));
+	} else {
+		memset(m_sql_stat_start_parts, 0,
+		       UT_BITS_IN_BYTES(m_tot_parts));
+	}
+	return(error);
+}
+
+/** Function to store lock for all partitions in native partitioned table. Also
+look at ha_innobase::store_lock for more details.
+@param[in]	thd		user thread handle
+@param[in]	to		pointer to the current element in an array of
+pointers to lock structs
+@param[in]	lock_type	lock type to store in 'lock'; this may also be
+TL_IGNORE
+@retval	to	pointer to the current element in the 'to' array */
+THR_LOCK_DATA**
+ha_innopart::store_lock(
+	THD*			thd,
+	THR_LOCK_DATA**		to,
+	thr_lock_type		lock_type)
+{
+	trx_t*  trx = m_prebuilt->trx;
+	const uint sql_command = thd_sql_command(thd);
+
+	ha_innobase::store_lock(thd, to, lock_type);
+
+	if (sql_command == SQLCOM_FLUSH
+	    && lock_type == TL_READ_NO_INSERT) {
+		for (uint i = 1; i < m_tot_parts; i++) {
+			dict_table_t* table = m_part_share->get_table_part(i);
+
+			dberr_t err = row_quiesce_set_state(
+				table, QUIESCE_START, trx);
+			ut_a(err == DB_SUCCESS || err == DB_UNSUPPORTED);
+		}
+	}
+
+	return to;
+}
+
+/** Lock/prepare to lock table.
+As MySQL will execute an external lock for every new table it uses when it
+starts to process an SQL statement (an exception is when MySQL calls
+start_stmt for the handle) we can use this function to store the pointer to
+the THD in the handle. We will also use this function to communicate
+to InnoDB that a new SQL statement has started and that we must store a
+savepoint to our transaction handle, so that we are able to roll back
+the SQL statement in case of an error.
+@param[in]	thd		Handle to the user thread.
+@param[in]	lock_type	Lock type.
+@return	0 or error number. */
+int
+ha_innopart::external_lock(
+	THD*	thd,
+	int	lock_type)
+{
+	int	error = 0;
+
+	if (m_part_info->get_first_used_partition() == MY_BIT_NONE
+		&& !(m_mysql_has_locked
+		     && lock_type == F_UNLCK)) {
+
+		/* All partitions pruned away, do nothing! */
+		ut_ad(!m_mysql_has_locked);
+		return(error);
+	}
+	ut_ad(m_mysql_has_locked || lock_type != F_UNLCK);
+
+	m_prebuilt->table = m_part_share->get_table_part(0);
+	error = ha_innobase::external_lock(thd, lock_type);
+
+        for (uint i = 0; i < m_tot_parts; i++) {
+		dict_table_t* table = m_part_share->get_table_part(i);
+
+		switch (table->quiesce) {
+		case QUIESCE_START:
+			/* Check for FLUSH TABLE t WITH READ LOCK */
+			if (!srv_read_only_mode
+			    && thd_sql_command(thd) == SQLCOM_FLUSH
+			    && lock_type == F_RDLCK) {
+
+				ut_ad(table->quiesce == QUIESCE_START);
+
+				row_quiesce_table_start(table,
+							m_prebuilt->trx);
+
+				/* Use the transaction instance to track
+				UNLOCK TABLES. It can be done via START
+				TRANSACTION; too implicitly. */
+
+				++m_prebuilt->trx->flush_tables;
+			}
+			break;
+
+		case QUIESCE_COMPLETE:
+			/* Check for UNLOCK TABLES; implicit or explicit
+			or trx interruption. */
+			if (m_prebuilt->trx->flush_tables > 0
+			    && (lock_type == F_UNLCK
+				|| trx_is_interrupted(m_prebuilt->trx))) {
+
+				ut_ad(table->quiesce == QUIESCE_COMPLETE);
+				row_quiesce_table_complete(table,
+							   m_prebuilt->trx);
+
+				ut_a(m_prebuilt->trx->flush_tables > 0);
+				--m_prebuilt->trx->flush_tables;
+			}
+			break;
+
+		case QUIESCE_NONE:
+			break;
+
+		default:
+			ut_ad(0);
+		}
+	}
+
+	ut_ad(!m_auto_increment_lock);
+	ut_ad(!m_auto_increment_safe_stmt_log_lock);
+
+	if (m_prebuilt->sql_stat_start) {
+		memset(m_sql_stat_start_parts, 0xff,
+		       UT_BITS_IN_BYTES(m_tot_parts));
+	} else {
+		memset(m_sql_stat_start_parts, 0,
+		       UT_BITS_IN_BYTES(m_tot_parts));
+	}
+	return(error);
+}
+
+/** Get the current auto_increment value.
+@param[in]	offset			Table auto-inc offset.
+@param[in]	increment		Table auto-inc increment.
+@param[in]	nb_desired_values	Number of required values.
+@param[out]	first_value		The auto increment value.
+@param[out]	nb_reserved_values	Number of reserved values.
+@return	Auto increment value, or ~0 on failure. */
+void
+ha_innopart::get_auto_increment(
+	ulonglong	offset,
+	ulonglong	increment,
+	ulonglong	nb_desired_values,
+	ulonglong*	first_value,
+	ulonglong*	nb_reserved_values)
+{
+	DBUG_ENTER("ha_innopart::get_auto_increment");
+	if (table_share->next_number_keypart != 0) {
+		/* Only first key part allowed as autoinc for InnoDB tables! */
+		ut_ad(0);
+		*first_value = ULLONG_MAX;
+		DBUG_VOID_RETURN;
+	}
+	get_auto_increment_first_field(
+		increment,
+		nb_desired_values,
+		first_value,
+		nb_reserved_values);
+	DBUG_VOID_RETURN;
+}
+
+/** Compares two 'refs'.
+A 'ref' is the (internal) primary key value of the row.
+If there is no explicitly declared non-null unique key or a primary key, then
+InnoDB internally uses the row id as the primary key.
+It will use the partition id as secondary compare.
+@param[in]	ref1	An (internal) primary key value in the MySQL key value
+format.
+@param[in]	ref2	Reference to compare with (same type as ref1).
+@return	< 0 if ref1 < ref2, 0 if equal, else > 0. */
+int
+ha_innopart::cmp_ref(
+	const uchar*	ref1,
+	const uchar*	ref2)
+{
+	int	cmp;
+
+	cmp = ha_innobase::cmp_ref(ref1 + PARTITION_BYTES_IN_POS,
+				   ref2 + PARTITION_BYTES_IN_POS);
+
+	if (cmp != 0) {
+		return(cmp);
+	}
+
+	cmp = static_cast<int>(uint2korr(ref1))
+		- static_cast<int>(uint2korr(ref2));
+
+	return(cmp);
+}
+
+/** Prepare for creating new partitions during ALTER TABLE ... PARTITION.
+@param[in]	num_partitions	Number of new partitions to be created.
+@param[in]	only_create	True if only creating the partition
+(no open/lock is needed).
+@return	0 for success else error code. */
+int
+ha_innopart::prepare_for_new_partitions(
+	uint	num_partitions,
+	bool	only_create)
+{
+	m_new_partitions = UT_NEW(Altered_partitions(num_partitions,
+						     only_create),
+				  mem_key_partitioning);
+	if (m_new_partitions == NULL) {
+		return(HA_ERR_OUT_OF_MEM);
+	}
+	if (m_new_partitions->initialize()) {
+		UT_DELETE(m_new_partitions);
+		m_new_partitions = NULL;
+		return(HA_ERR_OUT_OF_MEM);
+	}
+	return(0);
+}
+
+/** Create a new partition to be filled during ALTER TABLE ... PARTITION.
+@param[in]	table		Table to create the partition in.
+@param[in]	create_info	Table/partition specific create info.
+@param[in]	part_name	Partition name.
+@param[in]	new_part_id	Partition id in new table.
+@param[in]	part_elem	Partition element.
+@return	0 for success else error code. */
+int
+ha_innopart::create_new_partition(
+	TABLE*			table,
+	HA_CREATE_INFO*		create_info,
+	const char*		part_name,
+	uint			new_part_id,
+	partition_element*	part_elem)
+{
+	int		error;
+	char		norm_name[FN_REFLEN];
+	const char*	tablespace_name_backup = create_info->tablespace;
+	const char*	data_file_name_backup = create_info->data_file_name;
+	DBUG_ENTER("ha_innopart::create_new_partition");
+	/* Delete by ddl_log on failure. */
+	normalize_table_name(norm_name, part_name);
+	set_create_info_dir(part_elem, create_info);
+
+	/* The below check is the same as for CREATE TABLE, but since we are
+	doing an alter here it will not trigger the check in
+	create_option_tablespace_is_valid(). */
+	if (tablespace_is_shared_space(create_info)
+	    && create_info->data_file_name != NULL
+	    && create_info->data_file_name[0] != '\0') {
+		my_printf_error(ER_ILLEGAL_HA_CREATE_OPTION,
+			"InnoDB: DATA DIRECTORY cannot be used"
+			" with a TABLESPACE assignment.", MYF(0));
+		DBUG_RETURN(HA_WRONG_CREATE_OPTION);
+	}
+
+	error = ha_innobase::create(norm_name, table, create_info);
+	create_info->tablespace = tablespace_name_backup;
+	create_info->data_file_name = data_file_name_backup;
+	if (error == HA_ERR_FOUND_DUPP_KEY) {
+		DBUG_RETURN(HA_ERR_TABLE_EXIST);
+	}
+	if (error != 0) {
+		DBUG_RETURN(error);
+	}
+	if (!m_new_partitions->only_create())
+	{
+		dict_table_t* part;
+		part = dict_table_open_on_name(norm_name,
+					       false,
+					       true,
+					       DICT_ERR_IGNORE_NONE);
+		if (part == NULL) {
+			DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
+		}
+		m_new_partitions->set_part(new_part_id, part);
+	}
+	DBUG_RETURN(0);
+}
+
+/** Close and finalize new partitions. */
+void
+ha_innopart::close_new_partitions()
+{
+	if (m_new_partitions != NULL) {
+		UT_DELETE(m_new_partitions);
+		m_new_partitions = NULL;
+	}
+}
+
+/** write row to new partition.
+@param[in]	new_part	New partition to write to.
+@return	0 for success else error code. */
+int
+ha_innopart::write_row_in_new_part(
+	uint	new_part)
+{
+	int	result;
+	DBUG_ENTER("ha_innopart::write_row_in_new_part");
+
+	m_last_part = new_part;
+	if (m_new_partitions->part(new_part) == NULL) {
+		/* Altered partition contains misplaced row. */
+		m_err_rec = table->record[0];
+		DBUG_RETURN(HA_ERR_ROW_IN_WRONG_PARTITION);
+	}
+	m_new_partitions->get_prebuilt(m_prebuilt, new_part);
+	result = ha_innobase::write_row(table->record[0]);
+	m_new_partitions->set_from_prebuilt(m_prebuilt, new_part);
+	DBUG_RETURN(result);
+}
+
+/** Allocate the array to hold blob heaps for all partitions */
+mem_heap_t**
+ha_innopart::alloc_blob_heap_array()
+{
+	DBUG_ENTER("ha_innopart::alloc_blob_heap_array");
+
+	const ulint	len = sizeof(mem_heap_t*) * m_tot_parts;
+	m_blob_heap_parts = static_cast<mem_heap_t**>(
+		ut_zalloc(len, mem_key_partitioning));
+	if (m_blob_heap_parts == NULL) {
+		DBUG_RETURN(NULL);
+	}
+
+	DBUG_RETURN(m_blob_heap_parts);
+}
+
+/** Free the array that holds blob heaps for all partitions */
+void
+ha_innopart::free_blob_heap_array()
+{
+	DBUG_ENTER("ha_innopart::free_blob_heap_array");
+
+	if (m_blob_heap_parts != NULL) {
+		clear_blob_heaps();
+		ut_free(m_blob_heap_parts);
+		m_blob_heap_parts = NULL;
+	}
+
+	DBUG_VOID_RETURN;
+}
+
+void
+ha_innopart::clear_blob_heaps()
+{
+	DBUG_ENTER("ha_innopart::clear_blob_heaps");
+
+	if (m_blob_heap_parts == NULL) {
+		DBUG_VOID_RETURN;
+	}
+
+	for (uint i = 0; i < m_tot_parts; i++) {
+		if (m_blob_heap_parts[i] != NULL) {
+			DBUG_PRINT("ha_innopart", ("freeing blob_heap: %p",
+						   m_blob_heap_parts[i]));
+			mem_heap_free(m_blob_heap_parts[i]);
+			m_blob_heap_parts[i] = NULL;
+		}
+	}
+
+	/* Reset blob_heap in m_prebuilt after freeing all heaps. It is set in
+	ha_innopart::set_partition to the blob heap of current partition. */
+	m_prebuilt->blob_heap = NULL;
+
+	DBUG_VOID_RETURN;
+}
+
+/** Reset state of file to after 'open'. This function is called
+after every statement for all tables used by that statement. */
+int
+ha_innopart::reset()
+{
+	DBUG_ENTER("ha_innopart::reset");
+
+	clear_blob_heaps();
+
+	DBUG_RETURN(ha_innobase::reset());
+}
+
+/****************************************************************************
+ * DS-MRR implementation
+ ***************************************************************************/
+
+/* TODO: move the default implementations into the base handler class! */
+/* TODO: See if it could be optimized for partitioned tables? */
+/* Use default ha_innobase implementation for now... */
diff --git a/storage/innobase/handler/ha_innopart.h b/storage/innobase/handler/ha_innopart.h
new file mode 100644
index 00000000000..8caa9cdd8d2
--- /dev/null
+++ b/storage/innobase/handler/ha_innopart.h
@@ -0,0 +1,1330 @@
+/*****************************************************************************
+
+Copyright (c) 2014, 2016, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/* The InnoDB Partition handler: the interface between MySQL and InnoDB. */
+
+#ifndef ha_innopart_h
+#define ha_innopart_h
+
+#include "partitioning/partition_handler.h"
+
+/* Forward declarations */
+class Altered_partitions;
+class partition_info;
+
+/** HA_DUPLICATE_POS and HA_READ_BEFORE_WRITE_REMOVAL is not
+set from ha_innobase, but cannot yet be supported in ha_innopart.
+Full text and geometry is not yet supported. */
+const handler::Table_flags	HA_INNOPART_DISABLED_TABLE_FLAGS =
+	( HA_CAN_FULLTEXT
+	| HA_CAN_FULLTEXT_EXT
+	| HA_CAN_GEOMETRY
+	| HA_DUPLICATE_POS
+	| HA_READ_BEFORE_WRITE_REMOVAL);
+
+/** InnoDB partition specific Handler_share. */
+class Ha_innopart_share : public Partition_share
+{
+private:
+	/** Array of all included table definitions (one per partition). */
+	dict_table_t**		m_table_parts;
+
+	/** Instead of INNOBASE_SHARE::idx_trans_tbl. Maps MySQL index number
+	to InnoDB index per partition. */
+	dict_index_t**		m_index_mapping;
+
+	/** Total number of partitions. */
+	uint			m_tot_parts;
+
+	/** Number of indexes. */
+	uint			m_index_count;
+
+	/** Reference count. */
+	uint			m_ref_count;
+
+	/** Pointer back to owning TABLE_SHARE. */
+	TABLE_SHARE*		m_table_share;
+
+public:
+	Ha_innopart_share(
+		TABLE_SHARE*	table_share);
+
+	~Ha_innopart_share();
+
+	/** Set innodb table for given partition.
+	@param[in]	part_id	Partition number.
+	@param[in]	table	Table. */
+	inline
+	void
+	set_table_part(
+		uint		part_id,
+		dict_table_t*	table)
+	{
+		ut_ad(m_table_parts != NULL);
+		ut_ad(part_id < m_tot_parts);
+		m_table_parts[part_id] = table;
+	}
+
+	/** Return innodb table for given partition.
+	@param[in]	part_id	Partition number.
+	@return	InnoDB table. */
+	inline
+	dict_table_t*
+	get_table_part(
+		uint	part_id) const
+	{
+		ut_ad(m_table_parts != NULL);
+		ut_ad(part_id < m_tot_parts);
+		return(m_table_parts[part_id]);
+	}
+
+	/** Return innodb index for given partition and key number.
+	@param[in]	part_id	Partition number.
+	@param[in]	keynr	Key number.
+	@return	InnoDB index. */
+	dict_index_t*
+	get_index(
+		uint	part_id,
+		uint	keynr);
+
+	/** Get MySQL key number corresponding to InnoDB index.
+	@param[in]	part_id	Partition number.
+	@param[in]	index	InnoDB index.
+	@return	MySQL key number or MAX_KEY if non-existent. */
+	uint
+	get_mysql_key(
+		uint			part_id,
+		const dict_index_t*	index);
+
+	/** Initialize the share with table and indexes per partition.
+	@param[in]	part_info	Partition info (partition names to use)
+	@param[in]	table_name	Table name (db/table_name)
+	@return false on success else true. */
+	bool
+	open_table_parts(
+		partition_info*	part_info,
+		const char*	table_name);
+
+	/** Close the table partitions.
+	If all instances are closed, also release the resources. */
+	void
+	close_table_parts();
+
+	/* Static helper functions. */
+	/** Fold to lower case if windows or lower_case_table_names == 1.
+	@param[in,out]	s	String to fold.*/
+	static
+	void
+	partition_name_casedn_str(
+		char*	s);
+
+	/** Translate and append partition name.
+	@param[out]	to	String to write in filesystem charset
+	@param[in]	from	Name in system charset
+	@param[in]	sep	Separator
+	@param[in]	len	Max length of to buffer
+	@return	length of written string. */
+	static
+	size_t
+	append_sep_and_name(
+		char*		to,
+		const char*	from,
+		const char*	sep,
+		size_t		len);
+
+	/** Set up the virtual column template for partition table, and points
+	all m_table_parts[]->vc_templ to it.
+	@param[in]      table           MySQL TABLE object
+	@param[in]      ib_table        InnoDB dict_table_t
+	@param[in]      table_name      Table name (db/table_name) */
+	void
+	set_v_templ(
+		TABLE*		table,
+		dict_table_t*	ib_table,
+		const char*	name);
+
+private:
+	/** Disable default constructor. */
+	Ha_innopart_share() {};
+
+	/** Open one partition (lower lever innodb table).
+	@param[in]	part_id	Partition to open.
+	@param[in]	partition_name	Name of partition.
+	@return false on success else true. */
+	bool
+	open_one_table_part(
+		uint		part_id,
+		const char*	partition_name);
+};
+
+/** The class defining a partitioning aware handle to an InnoDB table.
+Based on ha_innobase and extended with
+- Partition_helper for re-using common partitioning functionality
+- Partition_handler for providing partitioning specific api calls.
+Generic partitioning functions are implemented in Partition_helper.
+Lower level storage functions are implemented in ha_innobase.
+Partition_handler is inherited for implementing the handler level interface
+for partitioning specific functions, like change_partitions and
+truncate_partition.
+InnoDB specific functions related to partitioning is implemented here. */
+class ha_innopart:
+	public ha_innobase,
+	public Partition_helper,
+	public Partition_handler
+{
+public:
+	ha_innopart(
+		handlerton*	hton,
+		TABLE_SHARE*	table_arg);
+
+	~ha_innopart();
+
+	/** Clone this handler, used when needing more than one cursor
+	to the same table.
+	@param[in]	name		Table name.
+	@param[in]	mem_root	mem_root to allocate from.
+	@retval	Pointer to clone or NULL if error. */
+	handler*
+	clone(
+		const char*	name,
+		MEM_ROOT*	mem_root);
+
+	/** Check and register a table in the query cache.
+	Ask InnoDB if a query to a table can be cached.
+	@param[in]	thd		User thread handle.
+	@param[in]	table_key	Normalized path to the table.
+	@param[in]	key_length	Lenght of table_key.
+	@param[out]	call_back	Function pointer for checking if data
+	has changed.
+	@param[in,out]	engine_data	Data for call_back (not used).
+	@return TRUE if query caching of the table is permitted. */
+	my_bool
+	register_query_cache_table(
+		THD*			thd,
+		char*			table_key,
+		size_t			key_length,
+		qc_engine_callback*	call_back,
+		ulonglong*		engine_data)
+	{
+		/* Currently this would need to go through every
+		[sub] partition in the table to see if any of them has changed.
+		See row_search_check_if_query_cache_permitted().
+		So disabled until we can avoid check all partitions. */
+		return(FALSE);
+	}
+
+	/** On-line ALTER TABLE interface @see handler0alter.cc @{ */
+
+	/** Check if InnoDB supports a particular alter table in-place.
+	@param[in]	altered_table	TABLE object for new version of table.
+	@param[in,out]	ha_alter_info	Structure describing changes to be done
+	by ALTER TABLE and holding data used during in-place alter.
+	@retval	HA_ALTER_INPLACE_NOT_SUPPORTED	Not supported
+	@retval	HA_ALTER_INPLACE_NO_LOCK	Supported
+	@retval	HA_ALTER_INPLACE_SHARED_LOCK_AFTER_PREPARE	Supported, but
+	requires lock during main phase and exclusive lock during prepare
+	phase.
+	@retval	HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE	Supported, prepare
+	phase requires exclusive lock. */
+	enum_alter_inplace_result
+	check_if_supported_inplace_alter(
+		TABLE*			altered_table,
+		Alter_inplace_info*	ha_alter_info);
+
+	/** Prepare in-place ALTER for table.
+	Allows InnoDB to update internal structures with concurrent
+	writes blocked (provided that check_if_supported_inplace_alter()
+	did not return HA_ALTER_INPLACE_NO_LOCK).
+	This will be invoked before inplace_alter_table().
+	@param[in]	altered_table	TABLE object for new version of table.
+	@param[in,out]	ha_alter_info	Structure describing changes to be done
+	by ALTER TABLE and holding data used during in-place alter.
+	@retval	true	Failure.
+	@retval	false	Success. */
+	bool
+	prepare_inplace_alter_table(
+		TABLE*			altered_table,
+		Alter_inplace_info*	ha_alter_info);
+
+	/** Alter the table structure in-place.
+	Alter the table structure in-place with operations
+	specified using HA_ALTER_FLAGS and Alter_inplace_information.
+	The level of concurrency allowed during this operation depends
+	on the return value from check_if_supported_inplace_alter().
+	@param[in]	altered_table	TABLE object for new version of table.
+	@param[in,out]	ha_alter_info	Structure describing changes to be done
+	by ALTER TABLE and holding data used during in-place alter.
+	@retval	true	Failure.
+	@retval	false	Success. */
+	bool
+	inplace_alter_table(
+		TABLE*			altered_table,
+		Alter_inplace_info*	ha_alter_info);
+
+	/** Commit or rollback.
+	Commit or rollback the changes made during
+	prepare_inplace_alter_table() and inplace_alter_table() inside
+	the storage engine. Note that the allowed level of concurrency
+	during this operation will be the same as for
+	inplace_alter_table() and thus might be higher than during
+	prepare_inplace_alter_table(). (E.g concurrent writes were
+	blocked during prepare, but might not be during commit).
+	@param[in]	altered_table	TABLE object for new version of table.
+	@param[in]	ha_alter_info	Structure describing changes to be done
+	by ALTER TABLE and holding data used during in-place alter.
+	@param[in,out]	commit		true => Commit, false => Rollback.
+	@retval	true	Failure.
+	@retval	false	Success. */
+	bool
+	commit_inplace_alter_table(
+		TABLE*			altered_table,
+		Alter_inplace_info*	ha_alter_info,
+		bool			commit);
+
+	/** Notify the storage engine that the table structure (.frm) has
+	been updated.
+
+	ha_partition allows inplace operations that also upgrades the engine
+	if it supports partitioning natively. So if this is the case then
+	we will remove the .par file since it is not used with ha_innopart
+	(we use the internal data dictionary instead). */
+	void
+	notify_table_changed();
+	/** @} */
+
+	// TODO: should we implement init_table_handle_for_HANDLER() ?
+	// (or is sql_stat_start handled correctly anyway?)
+	int
+	optimize(
+		THD*		thd,
+		HA_CHECK_OPT*	check_opt);
+
+	int
+	discard_or_import_tablespace(
+		my_bool	discard);
+
+	/** Compare key and rowid.
+	Helper function for sorting records in the priority queue.
+	a/b points to table->record[0] rows which must have the
+	key fields set. The bytes before a and b store the rowid.
+	This is used for comparing/sorting rows first according to
+	KEY and if same KEY, by rowid (ref).
+
+	@param[in]	key_info	Null terminated array of index
+	information.
+	@param[in]	a		Pointer to record+ref in first record.
+	@param[in]	b		Pointer to record+ref in second record.
+	@return Return value is SIGN(first_rec - second_rec)
+	@retval	0	Keys are equal.
+	@retval	-1	second_rec is greater than first_rec.
+	@retval	+1	first_rec is greater than second_rec. */
+	static
+	int
+	key_and_rowid_cmp(
+		KEY**	key_info,
+		uchar	*a,
+		uchar	*b);
+
+	int
+	extra(
+		enum ha_extra_function	operation);
+
+	void
+	print_error(
+		int	error,
+		myf	errflag);
+
+	bool
+	is_ignorable_error(
+		int	error);
+
+	int
+	start_stmt(
+		THD*		thd,
+		thr_lock_type	lock_type);
+
+	ha_rows
+	records_in_range(
+		uint		inx,
+		key_range*	min_key,
+		key_range*	max_key);
+
+	ha_rows
+	estimate_rows_upper_bound();
+
+	uint
+	alter_table_flags(
+		uint	flags);
+
+	void
+	update_create_info(
+		HA_CREATE_INFO*	create_info);
+
+	int
+	create(
+		const char*	name,
+		TABLE*		form,
+		HA_CREATE_INFO*	create_info);
+
+	int
+	truncate();
+
+	int
+	check(
+		THD*		thd,
+		HA_CHECK_OPT*	check_opt);
+
+	/** Repair table.
+	Will only handle records in wrong partition, not repairing
+	corrupt innodb indexes.
+	@param[in]	thd	Thread context.
+	@param[in]	repair_opt	Repair options.
+	@return 0 or error code. */
+	int
+	repair(
+		THD*		thd,
+		HA_CHECK_OPT*	repair_opt);
+
+	bool
+	can_switch_engines();
+
+	uint
+	referenced_by_foreign_key();
+
+	void
+	get_auto_increment(
+		ulonglong	offset,
+		ulonglong	increment,
+		ulonglong	nb_desired_values,
+		ulonglong*	first_value,
+		ulonglong*	nb_reserved_values);
+
+	int
+	cmp_ref(
+		const uchar*	ref1,
+		const uchar*	ref2);
+
+	int
+	read_range_first(
+		const key_range*	start_key,
+		const key_range*	end_key,
+		bool			eq_range_arg,
+		bool			sorted)
+	{
+		return(Partition_helper::ph_read_range_first(
+						start_key,
+						end_key,
+						eq_range_arg,
+						sorted));
+	}
+
+	void
+	position(
+		const uchar*	record)
+	{
+		Partition_helper::ph_position(record);
+	}
+
+	int
+	rnd_pos_by_record(
+		uchar*	record)
+	{
+		return(Partition_helper::ph_rnd_pos_by_record(record));
+	}
+
+	/* TODO: Implement these! */
+	bool
+	check_if_incompatible_data(
+		HA_CREATE_INFO*	info,
+		uint		table_changes)
+	{
+		ut_ad(0);
+		return(COMPATIBLE_DATA_NO);
+	}
+
+	int
+	delete_all_rows()
+	{
+		return(handler::delete_all_rows());
+	}
+
+	int
+	disable_indexes(
+		uint	mode)
+	{
+		return(HA_ERR_WRONG_COMMAND);
+	}
+
+	int
+	enable_indexes(
+		uint	mode)
+	{
+		return(HA_ERR_WRONG_COMMAND);
+	}
+
+	void
+	free_foreign_key_create_info(
+		char*	str)
+	{
+		ut_ad(0);
+	}
+
+	int
+	ft_init()
+	{
+		ut_ad(0);
+		return(HA_ERR_WRONG_COMMAND);
+	}
+
+	FT_INFO*
+	ft_init_ext(
+		uint	flags,
+		uint	inx,
+		String*	key)
+	{
+		ut_ad(0);
+		return(NULL);
+	}
+
+	FT_INFO*
+	ft_init_ext_with_hints(
+		uint		inx,
+		String*		key,
+		Ft_hints*	hints)
+	{
+		ut_ad(0);
+		return(NULL);
+	}
+
+	int
+	ft_read(
+		uchar*	buf)
+	{
+		ut_ad(0);
+		return(HA_ERR_WRONG_COMMAND);
+	}
+
+	bool
+	get_foreign_dup_key(
+		char*	child_table_name,
+		uint	child_table_name_len,
+		char*	child_key_name,
+		uint	child_key_name_len)
+	{
+		ut_ad(0);
+		return(false);
+	}
+
+	// TODO: not yet supporting FK.
+	char*
+	get_foreign_key_create_info()
+	{
+		return(NULL);
+	}
+
+	// TODO: not yet supporting FK.
+	int
+	get_foreign_key_list(
+		THD*			thd,
+		List<FOREIGN_KEY_INFO>*	f_key_list)
+	{
+		return(0);
+	}
+
+	// TODO: not yet supporting FK.
+	int
+	get_parent_foreign_key_list(
+		THD*			thd,
+		List<FOREIGN_KEY_INFO>*	f_key_list)
+	{
+		return(0);
+	}
+
+	// TODO: not yet supporting FK.
+	int
+	get_cascade_foreign_key_table_list(
+		THD*				thd,
+		List<st_handler_tablename>*	fk_table_list)
+	{
+		return(0);
+	}
+
+	int
+	read_range_next()
+	{
+		return(Partition_helper::ph_read_range_next());
+	}
+
+	uint32
+	calculate_key_hash_value(
+		Field**	field_array)
+	{
+		return(Partition_helper::ph_calculate_key_hash_value(field_array));
+	}
+
+	Table_flags
+	table_flags() const
+	{
+		return(ha_innobase::table_flags() | HA_CAN_REPAIR);
+	}
+
+	void
+	release_auto_increment()
+	{
+		Partition_helper::ph_release_auto_increment();
+	}
+
+	/** Implementing Partition_handler interface @see partition_handler.h
+	@{ */
+
+	/** See Partition_handler. */
+	void
+	get_dynamic_partition_info(
+		ha_statistics*	stat_info,
+		ha_checksum*	check_sum,
+		uint		part_id)
+	{
+		Partition_helper::get_dynamic_partition_info_low(
+			stat_info,
+			check_sum,
+			part_id);
+	}
+
+	uint
+	alter_flags(
+		uint	flags MY_ATTRIBUTE((unused))) const
+	{
+		return(HA_PARTITION_FUNCTION_SUPPORTED
+		       | HA_FAST_CHANGE_PARTITION);
+	}
+
+	Partition_handler*
+	get_partition_handler()
+	{
+		return(static_cast<Partition_handler*>(this));
+	}
+
+	void
+	set_part_info(
+		partition_info*	part_info,
+		bool		early)
+	{
+		Partition_helper::set_part_info_low(part_info, early);
+	}
+
+	void
+	initialize_partitioning(
+		partition_info*	part_info,
+		bool		early)
+	{
+		Partition_helper::set_part_info_low(part_info, early);
+	}
+
+	handler*
+	get_handler()
+	{
+		return(static_cast<handler*>(this));
+	}
+	/** @} */
+
+private:
+	/** Pointer to Ha_innopart_share on the TABLE_SHARE. */
+	Ha_innopart_share*	m_part_share;
+
+	/** ins_node per partition. Synchronized with prebuilt->ins_node
+	when changing partitions. */
+	ins_node_t**		m_ins_node_parts;
+
+	/** upd_node per partition. Synchronized with prebuilt->upd_node
+	when changing partitions. */
+	upd_node_t**		m_upd_node_parts;
+
+	/** blob_heap per partition. Synchronized with prebuilt->blob_heap
+	when changing partitions. */
+	mem_heap_t**		m_blob_heap_parts;
+
+	/** trx_id from the partitions table->def_trx_id. Keep in sync
+	with prebuilt->trx_id when changing partitions.
+	prebuilt only reflects the current partition! */
+	trx_id_t*		m_trx_id_parts;
+
+	/** row_read_type per partition. */
+	ulint*			m_row_read_type_parts;
+
+	/** sql_stat_start per partition. */
+	uchar*			m_sql_stat_start_parts;
+
+	/** persistent cursors per partition. */
+	btr_pcur_t*		m_pcur_parts;
+
+	/** persistent cluster cursors per partition. */
+	btr_pcur_t*		m_clust_pcur_parts;
+
+	/** map from part_id to offset in above two arrays. */
+	uint16_t*		m_pcur_map;
+
+	/** Original m_prebuilt->pcur. */
+	btr_pcur_t*		m_pcur;
+
+	/** Original m_prebuilt->clust_pcur. */
+	btr_pcur_t*		m_clust_pcur;
+
+	/** New partitions during ADD/REORG/... PARTITION. */
+	Altered_partitions*	m_new_partitions;
+
+	/** Clear used ins_nodes and upd_nodes. */
+	void
+	clear_ins_upd_nodes();
+
+	/** Clear the blob heaps for all partitions */
+	void
+	clear_blob_heaps();
+
+	/** Reset state of file to after 'open'. This function is called
+	after every statement for all tables used by that statement. */
+	int
+	reset();
+
+	/** Allocate the array to hold blob heaps for all partitions */
+	mem_heap_t**
+	alloc_blob_heap_array();
+
+	/** Free the array that holds blob heaps for all partitions */
+	void
+	free_blob_heap_array();
+
+	/** Changes the active index of a handle.
+	@param[in]	part_id	Use this partition.
+	@param[in]	keynr	Use this index; MAX_KEY means always
+	clustered index, even if it was internally generated by InnoDB.
+	@return 0 or error code. */
+	int
+	change_active_index(
+		uint	part_id,
+		uint	keynr);
+
+	/** Move to next partition and set its index.
+	@return	0 for success else error number. */
+	int
+	next_partition_index();
+
+	/** Internally called for initializing auto increment value.
+	Should never be called, but defined to catch such errors.
+	@return 0 on success else error code. */
+	int
+	innobase_initialize_autoinc();
+
+	/** Get the index for the current partition
+	@param[in]	keynr	MySQL index number.
+	@return InnoDB index or NULL. */
+	dict_index_t*
+	innobase_get_index(
+		uint	keynr);
+
+	/** Get the index for a handle.
+	Does not change active index.
+	@param[in]	keynr	use this index; MAX_KEY means always clustered
+	index, even if it was internally generated by InnoDB.
+	@param[in]	part_id	From this partition.
+	@return NULL or index instance. */
+	dict_index_t*
+	innopart_get_index(
+		uint	part_id,
+		uint	keynr);
+
+	/** Change active partition.
+	Copies needed info into m_prebuilt from the partition specific memory.
+	@param[in]	part_id	Partition to set as active. */
+	void
+	set_partition(
+		uint	part_id);
+
+	/** Update active partition.
+	Copies needed info from m_prebuilt into the partition specific memory.
+	@param[in]	part_id	Partition to set as active. */
+	void
+	update_partition(
+		uint	part_id);
+
+	/** Helpers needed by Partition_helper, @see partition_handler.h @{ */
+
+	/** Set the autoinc column max value.
+	This should only be called once from ha_innobase::open().
+	Therefore there's no need for a covering lock.
+	@param[in]	no_lock	If locking should be skipped. Not used!
+	@return 0 on success else error code. */
+	int
+	initialize_auto_increment(
+		bool	/* no_lock */);
+
+	/** Save currently highest auto increment value.
+	@param[in]	nr	Auto increment value to save. */
+	void
+	save_auto_increment(
+		ulonglong	nr);
+
+	/** Setup the ordered record buffer and the priority queue.
+	@param[in]	used_parts	Number of used partitions in query.
+	@return false for success, else true. */
+	int
+	init_record_priority_queue_for_parts(
+		uint	used_parts);
+
+	/** Destroy the ordered record buffer and the priority queue. */
+	void
+	destroy_record_priority_queue_for_parts();
+
+	/** Prepare for creating new partitions during ALTER TABLE ...
+	PARTITION.
+	@param[in]	num_partitions	Number of new partitions to be created.
+	@param[in]	only_create	True if only creating the partition
+	(no open/lock is needed).
+	@return 0 for success else error code. */
+	int
+	prepare_for_new_partitions(
+		uint	num_partitions,
+		bool	only_create);
+
+	/** Create a new partition to be filled during ALTER TABLE ...
+	PARTITION.
+	@param[in]	table		Table to create the partition in.
+	@param[in]	create_info	Table/partition specific create info.
+	@param[in]	part_name	Partition name.
+	@param[in]	new_part_id	Partition id in new table.
+	@param[in]	part_elem	Partition element.
+	@return 0 for success else error code. */
+	int
+	create_new_partition(
+		TABLE*			table,
+		HA_CREATE_INFO*		create_info,
+		const char*		part_name,
+		uint			new_part_id,
+		partition_element*	part_elem);
+
+	/** Close and finalize new partitions. */
+	void
+	close_new_partitions();
+
+	/** write row to new partition.
+	@param[in]	new_part	New partition to write to.
+	@return 0 for success else error code. */
+	int
+	write_row_in_new_part(
+		uint	new_part);
+
+	/** Write a row in specific partition.
+	Stores a row in an InnoDB database, to the table specified in this
+	handle.
+	@param[in]	part_id	Partition to write to.
+	@param[in]	row	A row in MySQL format.
+	@return error code. */
+	int
+	write_row_in_part(
+		uint	part_id,
+		uchar*	row);
+
+	/** Update a row in partition.
+	Updates a row given as a parameter to a new value.
+	@param[in]	part_id	Partition to update row in.
+	@param[in]	old_row	Old row in MySQL format.
+	@param[in]	new_row	New row in MySQL format.
+	@return error number or 0. */
+	int
+	update_row_in_part(
+		uint		part_id,
+		const uchar*	old_row,
+		uchar*		new_row);
+
+	/** Deletes a row in partition.
+	@param[in]	part_id	Partition to delete from.
+	@param[in]	row	Row to delete in MySQL format.
+	@return error number or 0. */
+	int
+	delete_row_in_part(
+		uint		part_id,
+		const uchar*	row);
+
+	/** Return first record in index from a partition.
+	@param[in]	part	Partition to read from.
+	@param[out]	record	First record in index in the partition.
+	@return error number or 0. */
+	int
+	index_first_in_part(
+		uint	part,
+		uchar*	record);
+
+	/** Return last record in index from a partition.
+	@param[in]	part	Partition to read from.
+	@param[out]	record	Last record in index in the partition.
+	@return error number or 0. */
+	int
+	index_last_in_part(
+		uint	part,
+		uchar*	record);
+
+	/** Return previous record in index from a partition.
+	@param[in]	part	Partition to read from.
+	@param[out]	record	Last record in index in the partition.
+	@return error number or 0. */
+	int
+	index_prev_in_part(
+		uint	part,
+		uchar*	record);
+
+	/** Return next record in index from a partition.
+	@param[in]	part	Partition to read from.
+	@param[out]	record	Last record in index in the partition.
+	@return error number or 0. */
+	int
+	index_next_in_part(
+		uint	part,
+		uchar*	record);
+
+	/** Return next same record in index from a partition.
+	This routine is used to read the next record, but only if the key is
+	the same as supplied in the call.
+	@param[in]	part	Partition to read from.
+	@param[out]	record	Last record in index in the partition.
+	@param[in]	key	Key to match.
+	@param[in]	length	Length of key.
+	@return error number or 0. */
+	int
+	index_next_same_in_part(
+		uint		part,
+		uchar*		record,
+		const uchar*	key,
+		uint		length);
+
+	/** Start index scan and return first record from a partition.
+	This routine starts an index scan using a start key. The calling
+	function will check the end key on its own.
+	@param[in]	part	Partition to read from.
+	@param[out]	record	First matching record in index in the partition.
+	@param[in]	key	Key to match.
+	@param[in]	keypart_map	Which part of the key to use.
+	@param[in]	find_flag	Key condition/direction to use.
+	@return error number or 0. */
+	int
+	index_read_map_in_part(
+		uint			part,
+		uchar*			record,
+		const uchar*		key,
+		key_part_map		keypart_map,
+		enum ha_rkey_function	find_flag);
+
+	/** Return last matching record in index from a partition.
+	@param[in]	part	Partition to read from.
+	@param[out]	record	Last matching record in index in the partition.
+	@param[in]	key	Key to match.
+	@param[in]	keypart_map	Which part of the key to use.
+	@return error number or 0. */
+	int
+	index_read_last_map_in_part(
+		uint		part,
+		uchar*		record,
+		const uchar*	key,
+		key_part_map	keypart_map);
+
+	/** Start index scan and return first record from a partition.
+	This routine starts an index scan using a start and end key.
+	@param[in]	part	Partition to read from.
+	@param[out]	record	First matching record in index in the partition.
+	if NULL use table->record[0] as return buffer.
+	@param[in]	start_key	Start key to match.
+	@param[in]	end_key	End key to match.
+	@param[in]	eq_range	Is equal range, start_key == end_key.
+	@param[in]	sorted	Return rows in sorted order.
+	@return error number or 0. */
+	int
+	read_range_first_in_part(
+		uint			part,
+		uchar*			record,
+		const key_range*	start_key,
+		const key_range*	end_key,
+		bool			eq_range,
+		bool			sorted);
+
+	/** Return next record in index range scan from a partition.
+	@param[in]	part	Partition to read from.
+	@param[out]	record	First matching record in index in the partition.
+	if NULL use table->record[0] as return buffer.
+	@return error number or 0. */
+	int
+	read_range_next_in_part(
+		uint	part,
+		uchar*	record);
+
+	/** Start index scan and return first record from a partition.
+	This routine starts an index scan using a start key. The calling
+	function will check the end key on its own.
+	@param[in]	part	Partition to read from.
+	@param[out]	record	First matching record in index in the partition.
+	@param[in]	index	Index to read from.
+	@param[in]	key	Key to match.
+	@param[in]	keypart_map	Which part of the key to use.
+	@param[in]	find_flag	Key condition/direction to use.
+	@return error number or 0. */
+	int
+	index_read_idx_map_in_part(
+		uint			part,
+		uchar*			record,
+		uint			index,
+		const uchar*		key,
+		key_part_map		keypart_map,
+		enum ha_rkey_function	find_flag);
+
+	/** Initialize random read/scan of a specific partition.
+	@param[in]	part_id		Partition to initialize.
+	@param[in]	table_scan	True for scan else random access.
+	@return error number or 0. */
+	int
+	rnd_init_in_part(
+		uint	part_id,
+		bool	table_scan);
+
+	/** Get next row during scan of a specific partition.
+	@param[in]	part_id	Partition to read from.
+	@param[out]	record	Next row.
+	@return error number or 0. */
+	int
+	rnd_next_in_part(
+		uint	part_id,
+		uchar*	record);
+
+	/** End random read/scan of a specific partition.
+	@param[in]	part_id		Partition to end random read/scan.
+	@param[in]	table_scan	True for scan else random access.
+	@return error number or 0. */
+	int
+	rnd_end_in_part(
+		uint	part_id,
+		bool	table_scan);
+
+	/** Get a reference to the current cursor position in the last used
+	partition.
+	@param[out]	ref	Reference (PK if exists else row_id).
+	@param[in]	record	Record to position. */
+	void
+	position_in_last_part(
+		uchar*		ref,
+		const uchar*	record);
+
+	/** Read record by given record (by its PK) from the last used partition.
+	see handler::rnd_pos_by_record().
+	@param[in,out]	record	Record to position.
+	@return	0 or error number. */
+	int
+	rnd_pos_by_record_in_last_part(
+		uchar*	record)
+	{
+		/* Not much overhead to use default function.
+		This avoids out-of-sync code. */
+		return(handler::rnd_pos_by_record(record));
+	}
+
+	/** Copy a cached MySQL record.
+	@param[out]	to_record	Where to copy the MySQL record.
+	@param[in]	from_record	Which record to copy. */
+	void
+	copy_cached_row(
+		uchar*		to_record,
+		const uchar*	from_record);
+	/** @} */
+
+	/* Private handler:: functions specific for native InnoDB partitioning.
+	@see handler.h @{ */
+
+	int
+	open(
+		const char*	name,
+		int		mode,
+		uint		test_if_locked);
+
+	int
+	close();
+
+	double
+	scan_time();
+
+	/** Was the last returned row semi consistent read.
+	In an UPDATE or DELETE, if the row under the cursor was locked by
+	another transaction, and the engine used an optimistic read of the last
+	committed row value under the cursor, then the engine returns 1 from
+	this function. MySQL must NOT try to update this optimistic value. If
+	the optimistic value does not match the WHERE condition, MySQL can
+	decide to skip over this row. This can be used to avoid unnecessary
+	lock waits.
+
+	If this method returns true, it will also signal the storage
+	engine that the next read will be a locking re-read of the row.
+	@see handler.h and row0mysql.h
+	@return	true if last read was semi consistent else false. */
+	bool was_semi_consistent_read();
+
+	/** Try semi consistent read.
+	Tell the engine whether it should avoid unnecessary lock waits.
+	If yes, in an UPDATE or DELETE, if the row under the cursor was locked
+	by another transaction, the engine may try an optimistic read of
+	the last committed row value under the cursor.
+	@see handler.h and row0mysql.h
+	@param[in]	yes	Should semi-consistent read be used. */
+	void try_semi_consistent_read(
+		bool	yes);
+
+	/** Removes a lock on a row.
+	Removes a new lock set on a row, if it was not read optimistically.
+	This can be called after a row has been read in the processing of
+	an UPDATE or a DELETE query. @see ha_innobase::unlock_row(). */
+	void unlock_row();
+
+	int
+	index_init(
+		uint	index,
+		bool	sorted);
+
+	int
+	index_end();
+
+	int
+	rnd_init(
+		bool	scan)
+	{
+		return(Partition_helper::ph_rnd_init(scan));
+	}
+
+	int
+	rnd_end()
+	{
+		return(Partition_helper::ph_rnd_end());
+	}
+
+	int
+	external_lock(
+		THD*	thd,
+		int	lock_type);
+
+	THR_LOCK_DATA**
+	store_lock(
+		THD*			thd,
+		THR_LOCK_DATA**		to,
+		thr_lock_type		lock_type);
+
+	int
+	write_row(
+		uchar*	record)
+	{
+		return(Partition_helper::ph_write_row(record));
+	}
+
+	int
+	update_row(
+		const uchar*	old_record,
+		uchar*		new_record)
+	{
+		return(Partition_helper::ph_update_row(old_record, new_record));
+	}
+
+	int
+	delete_row(
+		const uchar*	record)
+	{
+		return(Partition_helper::ph_delete_row(record));
+	}
+	/** @} */
+
+	/** Truncate partition.
+	Called from Partition_handler::trunctate_partition(). */
+	int
+	truncate_partition_low();
+
+	/** Change partitions according to ALTER TABLE ... PARTITION ...
+	Called from Partition_handler::change_partitions().
+	@param[in]	create_info	Table create info.
+	@param[in]	path		Path including db/table_name.
+	@param[out]	copied		Number of copied rows.
+	@param[out]	deleted		Number of deleted rows.
+	@return	0 for success or error code. */
+	int
+	change_partitions_low(
+		HA_CREATE_INFO*		create_info,
+		const char*		path,
+		ulonglong* const	copied,
+		ulonglong* const	deleted)
+	{
+		return(Partition_helper::change_partitions(
+						create_info,
+						path,
+						copied,
+						deleted));
+	}
+
+	/** Access methods to protected areas in handler to avoid adding
+	friend class Partition_helper in class handler.
+	@see partition_handler.h @{ */
+
+	THD*
+	get_thd() const
+	{
+		return ha_thd();
+	}
+
+	TABLE*
+	get_table() const
+	{
+		return table;
+	}
+
+	bool
+	get_eq_range() const
+	{
+		return eq_range;
+	}
+
+	void
+	set_eq_range(bool eq_range_arg)
+	{
+		eq_range= eq_range_arg;
+	}
+
+	void
+	set_range_key_part(KEY_PART_INFO *key_part)
+	{
+		range_key_part= key_part;
+	}
+	/** @} */
+
+	/** Fill in data_dir_path and tablespace name from internal data
+	dictionary.
+	@param	part_elem	Partition element to fill.
+	@param	ib_table	InnoDB table to copy from. */
+	void
+	update_part_elem(
+		partition_element*	part_elem,
+		dict_table_t*		ib_table);
+protected:
+	/* Protected handler:: functions specific for native InnoDB partitioning.
+	@see handler.h @{ */
+
+	int
+	rnd_next(
+		uchar*	record)
+	{
+		return(Partition_helper::ph_rnd_next(record));
+	}
+
+	int
+	rnd_pos(
+		uchar*	record,
+		uchar*	pos);
+
+	int
+	records(
+		ha_rows*	num_rows);
+
+	int
+	index_next(
+		uchar*	record)
+	{
+		return(Partition_helper::ph_index_next(record));
+	}
+
+	int
+	index_next_same(
+		uchar*		record,
+		const uchar*	key,
+		uint		keylen)
+	{
+		return(Partition_helper::ph_index_next_same(record, key, keylen));
+	}
+
+	int
+	index_prev(
+		uchar*	record)
+	{
+		return(Partition_helper::ph_index_prev(record));
+	}
+
+	int
+	index_first(
+		uchar*	record)
+	{
+		return(Partition_helper::ph_index_first(record));
+	}
+
+	int
+	index_last(
+		uchar*	record)
+	{
+		return(Partition_helper::ph_index_last(record));
+	}
+
+	int
+	index_read_last_map(
+		uchar*		record,
+		const uchar*	key,
+		key_part_map	keypart_map)
+	{
+		return(Partition_helper::ph_index_read_last_map(
+						record,
+						key,
+						keypart_map));
+	}
+
+	int
+	index_read_map(
+		uchar*			buf,
+		const uchar*		key,
+		key_part_map		keypart_map,
+		enum ha_rkey_function	find_flag)
+	{
+		return(Partition_helper::ph_index_read_map(
+				buf,
+				key,
+				keypart_map,
+				find_flag));
+	}
+
+	int
+	index_read_idx_map(
+		uchar*			buf,
+		uint			index,
+		const uchar*		key,
+		key_part_map		keypart_map,
+		enum ha_rkey_function	find_flag)
+	{
+		return(Partition_helper::ph_index_read_idx_map(
+				buf,
+				index,
+				key,
+				keypart_map,
+				find_flag));
+	}
+	/** @} */
+
+	/** Updates and return statistics.
+	Returns statistics information of the table to the MySQL interpreter,
+	in various fields of the handle object.
+	@param[in]	flag		Flags for what to update and return.
+	@param[in]	is_analyze	True if called from ::analyze().
+	@return	HA_ERR_* error code or 0. */
+	int
+	info_low(
+		uint	flag,
+		bool	is_analyze);
+};
+#endif /* ha_innopart_h */
diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc
index ed2fb497a3b..328eaf2be78 100644
--- a/storage/innobase/handler/handler0alter.cc
+++ b/storage/innobase/handler/handler0alter.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2005, 2016, Oracle and/or its affiliates
+Copyright (c) 2005, 2016, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2013, 2016, MariaDB Corporation. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
@@ -22,36 +22,48 @@ this program; if not, write to the Free Software Foundation, Inc.,
 Smart ALTER TABLE
 *******************************************************/
 
-#include <my_global.h>
-#include <unireg.h>
-#include <mysqld_error.h>
-#include <log.h>
+/* Include necessary SQL headers */
+#include "ha_prototypes.h"
 #include <debug_sync.h>
-#include <innodb_priv.h>
-#include <sql_alter.h>
+#include <log.h>
+#include <sql_lex.h>
 #include <sql_class.h>
 #include <sql_table.h>
+#include <mysql/plugin.h>
 
+/* Include necessary InnoDB headers */
+#include "btr0sea.h"
 #include "dict0crea.h"
 #include "dict0dict.h"
 #include "dict0priv.h"
 #include "dict0stats.h"
 #include "dict0stats_bg.h"
+#include "fsp0sysspace.h"
 #include "log0log.h"
 #include "rem0types.h"
 #include "row0log.h"
 #include "row0merge.h"
-#include "srv0srv.h"
 #include "trx0trx.h"
 #include "trx0roll.h"
-#include "ha_prototypes.h"
 #include "handler0alter.h"
 #include "srv0mon.h"
 #include "fts0priv.h"
+#include "fts0plugin.h"
 #include "pars0pars.h"
 #include "row0sel.h"
 #include "ha_innodb.h"
+#include "ut0new.h"
+#include "ut0stage.h"
+#ifdef WITH_WSREP
+//#include "wsrep_api.h"
+#include <sql_acl.h>	// PROCESS_ACL
+#endif
 
+/* For supporting Native InnoDB Partitioning. */
+/* JAN: TODO: MySQL 5.7
+#include "partition_info.h"
+#include "ha_innopart.h"
+*/
 /** Operations for creating secondary indexes (no rebuild needed) */
 static const Alter_inplace_info::HA_ALTER_FLAGS INNOBASE_ONLINE_CREATE
 	= Alter_inplace_info::ADD_INDEX
@@ -68,10 +80,14 @@ static const Alter_inplace_info::HA_ALTER_FLAGS INNOBASE_ALTER_REBUILD
 	| Alter_inplace_info::ALTER_COLUMN_ORDER
 	| Alter_inplace_info::DROP_COLUMN
 	| Alter_inplace_info::ADD_COLUMN
+#ifdef MYSQL_VIRTUAL_COLUMNS
+	| Alter_inplace_info::ALTER_STORED_COLUMN_ORDER
+	| Alter_inplace_info::DROP_STORED_COLUMN
+	| Alter_inplace_info::ADD_STORED_BASE_COLUMN
+	| Alter_inplace_info::ALTER_STORED_COLUMN_TYPE
+#endif
 	| Alter_inplace_info::RECREATE_TABLE
 	/*
-	| Alter_inplace_info::ALTER_COLUMN_TYPE
-	| Alter_inplace_info::ALTER_COLUMN_EQUAL_PACK_LENGTH
 	*/
 	;
 
@@ -85,6 +101,9 @@ static const Alter_inplace_info::HA_ALTER_FLAGS INNOBASE_INPLACE_IGNORE
 	| Alter_inplace_info::ALTER_PARTITIONED
 	| Alter_inplace_info::ALTER_COLUMN_COLUMN_FORMAT
 	| Alter_inplace_info::ALTER_COLUMN_STORAGE_TYPE
+#ifdef MYSQL_VIRTUAL_COLUMNS
+	| Alter_inplace_info::ALTER_VIRTUAL_GCOL_EXPR
+#endif
 	| Alter_inplace_info::ALTER_RENAME;
 
 /** Operations on foreign key definitions (changing the schema only) */
@@ -98,7 +117,166 @@ static const Alter_inplace_info::HA_ALTER_FLAGS INNOBASE_ALTER_NOREBUILD
 	| INNOBASE_FOREIGN_OPERATIONS
 	| Alter_inplace_info::DROP_INDEX
 	| Alter_inplace_info::DROP_UNIQUE_INDEX
-	| Alter_inplace_info::ALTER_COLUMN_NAME;
+#ifdef MYSQL_RENAME_INDEX
+	| Alter_inplace_info::RENAME_INDEX
+#endif
+	| Alter_inplace_info::ALTER_COLUMN_NAME
+	| Alter_inplace_info::ALTER_COLUMN_EQUAL_PACK_LENGTH;
+#ifdef MYSQL_VIRTUAL_COLUMNS
+	| Alter_inplace_info::ALTER_INDEX_COMMENT
+	| Alter_inplace_info::ADD_VIRTUAL_COLUMN
+	| Alter_inplace_info::DROP_VIRTUAL_COLUMN
+	| Alter_inplace_info::ALTER_VIRTUAL_COLUMN_ORDER
+	| Alter_inplace_info::ALTER_VIRTUAL_COLUMN_TYPE
+#endif
+	;
+struct ha_innobase_inplace_ctx : public inplace_alter_handler_ctx
+{
+	/** Dummy query graph */
+	que_thr_t*	thr;
+	/** The prebuilt struct of the creating instance */
+	row_prebuilt_t*&	prebuilt;
+	/** InnoDB indexes being created */
+	dict_index_t**	add_index;
+	/** MySQL key numbers for the InnoDB indexes that are being created */
+	const ulint*	add_key_numbers;
+	/** number of InnoDB indexes being created */
+	ulint		num_to_add_index;
+	/** InnoDB indexes being dropped */
+	dict_index_t**	drop_index;
+	/** number of InnoDB indexes being dropped */
+	const ulint	num_to_drop_index;
+	/** InnoDB indexes being renamed */
+	dict_index_t**	rename;
+	/** number of InnoDB indexes being renamed */
+	const ulint	num_to_rename;
+	/** InnoDB foreign key constraints being dropped */
+	dict_foreign_t** drop_fk;
+	/** number of InnoDB foreign key constraints being dropped */
+	const ulint	num_to_drop_fk;
+	/** InnoDB foreign key constraints being added */
+	dict_foreign_t** add_fk;
+	/** number of InnoDB foreign key constraints being dropped */
+	const ulint	num_to_add_fk;
+	/** whether to create the indexes online */
+	bool		online;
+	/** memory heap */
+	mem_heap_t*	heap;
+	/** dictionary transaction */
+	trx_t*		trx;
+	/** original table (if rebuilt, differs from indexed_table) */
+	dict_table_t*	old_table;
+	/** table where the indexes are being created or dropped */
+	dict_table_t*	new_table;
+	/** mapping of old column numbers to new ones, or NULL */
+	const ulint*	col_map;
+	/** new column names, or NULL if nothing was renamed */
+	const char**	col_names;
+	/** added AUTO_INCREMENT column position, or ULINT_UNDEFINED */
+	const ulint	add_autoinc;
+	/** default values of ADD COLUMN, or NULL */
+	const dtuple_t*	add_cols;
+	/** autoinc sequence to use */
+	ib_sequence_t	sequence;
+	/** maximum auto-increment value */
+	ulonglong	max_autoinc;
+	/** temporary table name to use for old table when renaming tables */
+	const char*	tmp_name;
+	/** whether the order of the clustered index is unchanged */
+	bool		skip_pk_sort;
+	/** number of virtual columns to be added */
+	ulint		num_to_add_vcol;
+	/** virtual columns to be added */
+	dict_v_col_t*	add_vcol;
+	const char**	add_vcol_name;
+	/** number of virtual columns to be dropped */
+	ulint		num_to_drop_vcol;
+	/** virtual columns to be dropped */
+	dict_v_col_t*	drop_vcol;
+	const char**	drop_vcol_name;
+	/** ALTER TABLE stage progress recorder */
+	ut_stage_alter_t* m_stage;
+
+	ha_innobase_inplace_ctx(row_prebuilt_t*& prebuilt_arg,
+				dict_index_t** drop_arg,
+				ulint num_to_drop_arg,
+				dict_index_t** rename_arg,
+				ulint num_to_rename_arg,
+				dict_foreign_t** drop_fk_arg,
+				ulint num_to_drop_fk_arg,
+				dict_foreign_t** add_fk_arg,
+				ulint num_to_add_fk_arg,
+				bool online_arg,
+				mem_heap_t* heap_arg,
+				dict_table_t* new_table_arg,
+				const char** col_names_arg,
+				ulint add_autoinc_arg,
+				ulonglong autoinc_col_min_value_arg,
+				ulonglong autoinc_col_max_value_arg,
+				ulint num_to_drop_vcol_arg) :
+		inplace_alter_handler_ctx(),
+		prebuilt (prebuilt_arg),
+		add_index (0), add_key_numbers (0), num_to_add_index (0),
+		drop_index (drop_arg), num_to_drop_index (num_to_drop_arg),
+		rename (rename_arg), num_to_rename (num_to_rename_arg),
+		drop_fk (drop_fk_arg), num_to_drop_fk (num_to_drop_fk_arg),
+		add_fk (add_fk_arg), num_to_add_fk (num_to_add_fk_arg),
+		online (online_arg), heap (heap_arg), trx (0),
+		old_table (prebuilt_arg->table),
+		new_table (new_table_arg),
+		col_map (0), col_names (col_names_arg),
+		add_autoinc (add_autoinc_arg),
+		add_cols (0),
+		sequence(prebuilt->trx->mysql_thd,
+			 autoinc_col_min_value_arg, autoinc_col_max_value_arg),
+		max_autoinc (0),
+		tmp_name (0),
+		skip_pk_sort(false),
+		num_to_add_vcol(0),
+		add_vcol(0),
+		add_vcol_name(0),
+		num_to_drop_vcol(0),
+		drop_vcol(0),
+		drop_vcol_name(0),
+		m_stage(NULL)
+	{
+#ifdef UNIV_DEBUG
+		for (ulint i = 0; i < num_to_add_index; i++) {
+			ut_ad(!add_index[i]->to_be_dropped);
+		}
+		for (ulint i = 0; i < num_to_drop_index; i++) {
+			ut_ad(drop_index[i]->to_be_dropped);
+		}
+#endif /* UNIV_DEBUG */
+
+		thr = pars_complete_graph_for_exec(NULL, prebuilt->trx, heap,
+			prebuilt);
+	}
+
+	~ha_innobase_inplace_ctx()
+	{
+		UT_DELETE(m_stage);
+		mem_heap_free(heap);
+	}
+
+	/** Determine if the table will be rebuilt.
+	@return whether the table will be rebuilt */
+	bool need_rebuild () const { return(old_table != new_table); }
+
+private:
+	// Disable copying
+	ha_innobase_inplace_ctx(const ha_innobase_inplace_ctx&);
+	ha_innobase_inplace_ctx& operator=(const ha_innobase_inplace_ctx&);
+};
+
+/********************************************************************//**
+Get the upper limit of the MySQL integral and floating-point type.
+@return maximum allowed value for the field */
+UNIV_INTERN
+ulonglong
+innobase_get_int_col_max_value(
+/*===========================*/
+	const Field*	field);	/*!< in: MySQL field */
 
 /* Report an InnoDB error to the client by invoking my_error(). */
 static UNIV_COLD MY_ATTRIBUTE((nonnull))
@@ -132,11 +310,8 @@ my_error_innodb(
 		my_error(ER_RECORD_FILE_FULL, MYF(0), table);
 		ut_error;
 		break;
-	case DB_TEMP_FILE_WRITE_FAILURE:
-		my_error(ER_GET_ERRMSG, MYF(0),
-                         DB_TEMP_FILE_WRITE_FAILURE,
-                         ut_strerr(DB_TEMP_FILE_WRITE_FAILURE),
-                         "InnoDB");
+	case DB_TEMP_FILE_WRITE_FAIL:
+		my_error(ER_TEMP_FILE_WRITE_FAILURE, MYF(0));
 		break;
 	case DB_TOO_BIG_INDEX_COL:
 		my_error(ER_INDEX_COLUMN_TOO_LONG, MYF(0),
@@ -155,14 +330,20 @@ my_error_innodb(
 		my_error(ER_NOT_KEYFILE, MYF(0), table);
 		break;
 	case DB_TOO_BIG_RECORD:
+		/* We limit max record size to 16k for 64k page size. */
 		my_error(ER_TOO_BIG_ROWSIZE, MYF(0),
-			 page_get_free_space_of_empty(
+			 srv_page_size == UNIV_PAGE_SIZE_MAX
+			 ? REC_MAX_DATA_SIZE - 1
+			 : page_get_free_space_of_empty(
 				 flags & DICT_TF_COMPACT) / 2);
 		break;
 	case DB_INVALID_NULL:
 		/* TODO: report the row, as we do for DB_DUPLICATE_KEY */
 		my_error(ER_INVALID_USE_OF_NULL, MYF(0));
 		break;
+	case DB_CANT_CREATE_GEOMETRY_OBJECT:
+		my_error(ER_CANT_CREATE_GEOMETRY_OBJECT, MYF(0));
+		break;
 	case DB_TABLESPACE_EXISTS:
 		my_error(ER_TABLESPACE_EXISTS, MYF(0), table);
 		break;
@@ -181,8 +362,8 @@ my_error_innodb(
 }
 
 /** Determine if fulltext indexes exist in a given table.
-@param table		MySQL table
-@return			whether fulltext indexes exist on the table */
+@param table MySQL table
+@return whether fulltext indexes exist on the table */
 static
 bool
 innobase_fulltext_exist(
@@ -198,9 +379,27 @@ innobase_fulltext_exist(
 	return(false);
 }
 
+/** Determine if spatial indexes exist in a given table.
+@param table MySQL table
+@return whether spatial indexes exist on the table */
+static
+bool
+innobase_spatial_exist(
+/*===================*/
+	const   TABLE*  table)
+{
+	for (uint i = 0; i < table->s->keys; i++) {
+	       if (table->key_info[i].flags & HA_SPATIAL) {
+		       return(true);
+	       }
+	}
+
+	return(false);
+}
+
 /*******************************************************************//**
 Determine if ALTER TABLE needs to rebuild the table.
-@param ha_alter_info		the DDL operation
+@param ha_alter_info the DDL operation
 @param altered_table		MySQL original table
 @return whether it is necessary to rebuild the table */
 static MY_ATTRIBUTE((nonnull, warn_unused_result))
@@ -214,9 +413,12 @@ innobase_need_rebuild(
 	    == Alter_inplace_info::CHANGE_CREATE_OPTION
 	    && !(ha_alter_info->create_info->used_fields
 		 & (HA_CREATE_USED_ROW_FORMAT
-		    | HA_CREATE_USED_KEY_BLOCK_SIZE))) {
+			 | HA_CREATE_USED_KEY_BLOCK_SIZE))) {
+		// JAN: TODO: MySQL 5.7
+		//		    | HA_CREATE_USED_TABLESPACE))) {
 		/* Any other CHANGE_CREATE_OPTION than changing
-		ROW_FORMAT or KEY_BLOCK_SIZE is ignored. */
+		ROW_FORMAT, KEY_BLOCK_SIZE or TABLESPACE can be done
+		without rebuilding the table. */
 		return(false);
 	}
 
@@ -251,21 +453,137 @@ innobase_need_rebuild(
 	return(!!(ha_alter_info->handler_flags & INNOBASE_ALTER_REBUILD));
 }
 
+#ifdef MYSQL_VIRTUAL_COLUMNS
+/** Check if virtual column in old and new table are in order, excluding
+those dropped column. This is needed because when we drop a virtual column,
+ALTER_VIRTUAL_COLUMN_ORDER is also turned on, so we can't decide if this
+is a real ORDER change or just DROP COLUMN
+@param[in]	table		old TABLE
+@param[in]	altered_table	new TABLE
+@param[in]	ha_alter_info	Structure describing changes to be done
+by ALTER TABLE and holding data used during in-place alter.
+@return	true is all columns in order, false otherwise. */
+static
+bool
+check_v_col_in_order(
+	const TABLE*		table,
+	const TABLE*		altered_table,
+	Alter_inplace_info*	ha_alter_info)
+{
+	ulint	j = 0;
+
+	/* We don't support any adding new virtual column before
+	existed virtual column. */
+	if (ha_alter_info->handler_flags
+              & Alter_inplace_info::ADD_VIRTUAL_COLUMN) {
+		bool			has_new = false;
+
+		List_iterator_fast<Create_field> cf_it(
+			ha_alter_info->alter_info->create_list);
+
+		cf_it.rewind();
+
+		while (const Create_field* new_field = cf_it++) {
+			if (!new_field->is_virtual_gcol()) {
+				continue;
+			}
+
+			/* Found a new added virtual column. */
+			if (!new_field->field) {
+				has_new = true;
+				continue;
+			}
+
+			/* If there's any old virtual column
+			after the new added virtual column,
+			order must be changed. */
+			if (has_new) {
+				return(false);
+			}
+		}
+	}
+
+	/* directly return true if ALTER_VIRTUAL_COLUMN_ORDER is not on */
+	if (!(ha_alter_info->handler_flags
+              & Alter_inplace_info::ALTER_VIRTUAL_COLUMN_ORDER)) {
+		return(true);
+	}
+
+	for (ulint i = 0; i < table->s->fields; i++) {
+		Field*		field = table->s->field[i];
+		bool		dropped = false;
+		Alter_drop*	drop;
+
+		if (field->stored_in_db) {
+			continue;
+		}
+
+		ut_ad(innobase_is_v_fld(field));
+
+		/* Check if this column is in drop list */
+		List_iterator_fast<Alter_drop> cf_it(
+			ha_alter_info->alter_info->drop_list);
+
+		while ((drop = (cf_it++)) != NULL) {
+			if (my_strcasecmp(system_charset_info,
+					  field->field_name, drop->name) == 0) {
+				dropped = true;
+				break;
+			}
+		}
+
+		if (dropped) {
+			continue;
+		}
+
+		/* Now check if the next virtual column in altered table
+		matches this column */
+		while (j < altered_table->s->fields) {
+			 Field*  new_field = altered_table->s->field[j];
+
+			if (new_field->stored_in_db) {
+				j++;
+				continue;
+			}
+
+			if (my_strcasecmp(system_charset_info,
+					  field->field_name,
+					  new_field->field_name) != 0) {
+				/* different column */
+				return(false);
+			} else {
+				j++;
+				break;
+			}
+		}
+
+		if (j > altered_table->s->fields) {
+			/* there should not be less column in new table
+			without them being in drop list */
+			ut_ad(0);
+			return(false);
+		}
+	}
+
+	return(true);
+}
+#endif /* MYSQL_VIRTUAL_COLUMNS */
+
 /** Check if InnoDB supports a particular alter table in-place
-@param altered_table	TABLE object for new version of table.
-@param ha_alter_info	Structure describing changes to be done
+@param altered_table TABLE object for new version of table.
+@param ha_alter_info Structure describing changes to be done
 by ALTER TABLE and holding data used during in-place alter.
 
-@retval HA_ALTER_INPLACE_NOT_SUPPORTED	Not supported
-@retval HA_ALTER_INPLACE_NO_LOCK	Supported
+@retval HA_ALTER_INPLACE_NOT_SUPPORTED Not supported
+@retval HA_ALTER_INPLACE_NO_LOCK Supported
 @retval HA_ALTER_INPLACE_SHARED_LOCK_AFTER_PREPARE Supported, but requires
 lock during main phase and exclusive lock during prepare phase.
-@retval HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE	Supported, prepare phase
+@retval HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE Supported, prepare phase
 requires exclusive lock (any transactions that have accessed the table
 must commit or roll back first, and no transactions can access the table
 while prepare_inplace_alter_table() is executing)
 */
-UNIV_INTERN
+
 enum_alter_inplace_result
 ha_innobase::check_if_supported_inplace_alter(
 /*==========================================*/
@@ -274,14 +592,13 @@ ha_innobase::check_if_supported_inplace_alter(
 {
 	DBUG_ENTER("check_if_supported_inplace_alter");
 
-	if (high_level_read_only) {
-		ha_alter_info->unsupported_reason =
+	if (high_level_read_only
+	    || srv_sys_space.created_new_raw()
+	    || srv_force_recovery) {
+		ha_alter_info->unsupported_reason = (srv_force_recovery)?
+			"Operation not allowed when innodb_forced_recovery > 0." :
 			innobase_get_err_msg(ER_READ_ONLY_MODE);
-		DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
-	} else if (srv_created_new_raw || srv_force_recovery) {
 
-		ha_alter_info->unsupported_reason =
-			innobase_get_err_msg(ER_READ_ONLY_MODE);
 		DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
 	}
 
@@ -295,8 +612,23 @@ ha_innobase::check_if_supported_inplace_alter(
 		DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
 	}
 
+#ifdef MYSQL_ENCRYPTION
+	/* We don't support change encryption attribute with
+	inplace algorithm. */
+	char*	old_encryption = this->table->s->encrypt_type.str;
+	char*	new_encryption = altered_table->s->encrypt_type.str;
+
+	if (Encryption::is_none(old_encryption)
+	    != Encryption::is_none(new_encryption)) {
+		ha_alter_info->unsupported_reason =
+			innobase_get_err_msg(
+				ER_UNSUPPORTED_ALTER_ENCRYPTION_INPLACE);
+		DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
+	}
+#endif /* MYSQL_ENCRYPTION */
+
 	update_thd();
-	trx_search_latch_release_if_reserved(prebuilt->trx);
+	trx_search_latch_release_if_reserved(m_prebuilt->trx);
 
 	/* Change on engine specific table options require rebuild of the
 	table */
@@ -326,24 +658,34 @@ ha_innobase::check_if_supported_inplace_alter(
 		| INNOBASE_ALTER_NOREBUILD
 		| INNOBASE_ALTER_REBUILD)) {
 
+#ifdef MYSQL_VIRTUAL_COLUMNS
 		if (ha_alter_info->handler_flags
-			& (Alter_inplace_info::ALTER_COLUMN_EQUAL_PACK_LENGTH
-			   | Alter_inplace_info::ALTER_COLUMN_TYPE))
+		    & Alter_inplace_info::ALTER_STORED_COLUMN_TYPE) {
 			ha_alter_info->unsupported_reason = innobase_get_err_msg(
 				ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_COLUMN_TYPE);
+		}
+#endif
+
 		DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
 	}
 
 	/* Only support online add foreign key constraint when
 	check_foreigns is turned off */
-	if ((ha_alter_info->handler_flags
-	     & Alter_inplace_info::ADD_FOREIGN_KEY)
-	    && prebuilt->trx->check_foreigns) {
+	if ((ha_alter_info->handler_flags & Alter_inplace_info::ADD_FOREIGN_KEY)
+	    && m_prebuilt->trx->check_foreigns) {
 		ha_alter_info->unsupported_reason = innobase_get_err_msg(
 			ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_FK_CHECK);
 		DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
 	}
 
+#if 0
+	if (altered_table->file->ht != ht) {
+		/* Non-native partitioning table engine. No longer supported,
+		due to implementation of native InnoDB partitioning. */
+		DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
+	}
+#endif
+
 	if (!(ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE)) {
 		DBUG_RETURN(HA_ALTER_INPLACE_NO_LOCK);
 	}
@@ -354,24 +696,12 @@ ha_innobase::check_if_supported_inplace_alter(
 	NULL to a NOT NULL value. */
 	if ((ha_alter_info->handler_flags
 	     & Alter_inplace_info::ALTER_COLUMN_NOT_NULLABLE)
-	    && !thd_is_strict_mode(user_thd)) {
+	    && !thd_is_strict_mode(m_user_thd)) {
 		ha_alter_info->unsupported_reason = innobase_get_err_msg(
 			ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_NOT_NULL);
 		DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
 	}
 
-	/* InnoDB cannot IGNORE when creating unique indexes. IGNORE
-	should silently delete some duplicate rows. Our inplace_alter
-	code will not delete anything from existing indexes. */
-	if (ha_alter_info->ignore
-	    && (ha_alter_info->handler_flags
-		& (Alter_inplace_info::ADD_PK_INDEX
-		   | Alter_inplace_info::ADD_UNIQUE_INDEX))) {
-		ha_alter_info->unsupported_reason = innobase_get_err_msg(
-			ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_IGNORE);
-		DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
-	}
-
 	/* DROP PRIMARY KEY is only allowed in combination with ADD
 	PRIMARY KEY. */
 	if ((ha_alter_info->handler_flags
@@ -386,14 +716,14 @@ ha_innobase::check_if_supported_inplace_alter(
 	/* If a column change from NOT NULL to NULL,
 	and there's a implict pk on this column. the
 	table should be rebuild. The change should
-	only go through the "Copy" method.*/
+	only go through the "Copy" method. */
 	if ((ha_alter_info->handler_flags
 	     & Alter_inplace_info::ALTER_COLUMN_NULLABLE)) {
-		uint primary_key = altered_table->s->primary_key;
+		const uint my_primary_key = altered_table->s->primary_key;
 
-		/* See if MYSQL table has no pk but we do.*/
-		if (UNIV_UNLIKELY(primary_key >= MAX_KEY)
-		    && !row_table_got_default_clust_index(prebuilt->table)) {
+		/* See if MYSQL table has no pk but we do. */
+		if (UNIV_UNLIKELY(my_primary_key >= MAX_KEY)
+		    && !row_table_got_default_clust_index(m_prebuilt->table)) {
 			ha_alter_info->unsupported_reason = innobase_get_err_msg(
 				ER_PRIMARY_CANT_HAVE_NULL);
 			DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
@@ -413,10 +743,13 @@ ha_innobase::check_if_supported_inplace_alter(
 	*/
 	for (ulint i = 0, icol= 0; i < table->s->fields; i++) {
 		const Field*		field = table->field[i];
-		const dict_col_t*	col = dict_table_get_nth_col(prebuilt->table, icol);
+		const dict_col_t*	col = dict_table_get_nth_col(m_prebuilt->table, icol);
 		ulint		unsigned_flag;
-		if (!field->stored_in_db())
+
+		if (!field->stored_in_db()) {
 			continue;
+		}
+
 		icol++;
 
 		if (col->mtype != get_innobase_type_from_mysql_type(&unsigned_flag, field)) {
@@ -459,20 +792,63 @@ ha_innobase::check_if_supported_inplace_alter(
 		}
 	}
 
-	ulint n_indexes = UT_LIST_GET_LEN((prebuilt->table)->indexes);
+	ulint n_indexes = UT_LIST_GET_LEN((m_prebuilt->table)->indexes);
 
 	/* If InnoDB dictionary and MySQL frm file are not consistent
 	use "Copy" method. */
-	if (prebuilt->table->dict_frm_mismatch) {
+	if (m_prebuilt->table->dict_frm_mismatch) {
 
 		ha_alter_info->unsupported_reason = innobase_get_err_msg(
 			ER_NO_SUCH_INDEX);
-		ib_push_frm_error(user_thd, prebuilt->table, altered_table,
+		ib_push_frm_error(m_user_thd, m_prebuilt->table, altered_table,
 			n_indexes, true);
 
 		DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
 	}
 
+#ifdef MYSQL_VIRTUAL_COLUMNS
+	// JAN: TODO: MySQL 5.7 Virtual columns
+	/* If there is add or drop virtual columns, we will support operations
+	with these 2 options alone with inplace interface for now */
+
+	if (ha_alter_info->handler_flags
+	    & (Alter_inplace_info::ADD_VIRTUAL_COLUMN
+	       | Alter_inplace_info::DROP_VIRTUAL_COLUMN
+	       | Alter_inplace_info::ALTER_VIRTUAL_COLUMN_ORDER)) {
+		ulonglong flags = ha_alter_info->handler_flags;
+
+		/* TODO: uncomment the flags below, once we start to
+		support them */
+
+		flags &= ~(Alter_inplace_info::ADD_VIRTUAL_COLUMN
+			   | Alter_inplace_info::DROP_VIRTUAL_COLUMN
+			   | Alter_inplace_info::ALTER_VIRTUAL_COLUMN_ORDER
+		           | Alter_inplace_info::ALTER_VIRTUAL_GCOL_EXPR
+		/*
+			   | Alter_inplace_info::ALTER_STORED_COLUMN_ORDER
+			   | Alter_inplace_info::ADD_STORED_BASE_COLUMN
+			   | Alter_inplace_info::DROP_STORED_COLUMN
+			   | Alter_inplace_info::ALTER_STORED_COLUMN_ORDER
+			   | Alter_inplace_info::ADD_UNIQUE_INDEX
+		*/
+			   | Alter_inplace_info::ADD_INDEX
+			   | Alter_inplace_info::DROP_INDEX);
+
+		if (flags != 0
+		    || (altered_table->s->partition_info_str
+			&& altered_table->s->partition_info_str_len)
+		    || (!check_v_col_in_order(
+			this->table, altered_table, ha_alter_info))) {
+			ha_alter_info->unsupported_reason =
+				innobase_get_err_msg(
+				ER_UNSUPPORTED_ALTER_INPLACE_ON_VIRTUAL_COLUMN);
+			DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
+		}
+
+		add_drop_v_cols = true;
+	}
+#endif /* MYSQL_VIRTUAL_COLUMNS */
+
 	/* We should be able to do the operation in-place.
 	See if we can do it online (LOCK=NONE). */
 	bool	online = true;
@@ -485,6 +861,20 @@ ha_innobase::check_if_supported_inplace_alter(
 	     new_key < ha_alter_info->key_info_buffer
 		     + ha_alter_info->key_count;
 	     new_key++) {
+
+#ifdef MYSQL_VIRTUAL_COLUMNS
+		/* Do not support adding/droping a vritual column, while
+		there is a table rebuild caused by adding a new FTS_DOC_ID */
+		if ((new_key->flags & HA_FULLTEXT) && add_drop_v_cols
+		    && !DICT_TF2_FLAG_IS_SET(m_prebuilt->table,
+					     DICT_TF2_FTS_HAS_DOC_ID)) {
+			ha_alter_info->unsupported_reason =
+				innobase_get_err_msg(
+				ER_UNSUPPORTED_ALTER_INPLACE_ON_VIRTUAL_COLUMN);
+			DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
+		}
+#endif /* MYSQL_VIRTUAL_COLUMNS */
+
 		for (KEY_PART_INFO* key_part = new_key->key_part;
 		     key_part < new_key->key_part + new_key->user_defined_key_parts;
 		     key_part++) {
@@ -504,10 +894,11 @@ ha_innobase::check_if_supported_inplace_alter(
 
 			key_part->field = altered_table->field[
 				key_part->fieldnr];
+
 			/* In some special cases InnoDB emits "false"
 			duplicate key errors with NULL key values. Let
 			us play safe and ensure that we can correctly
-			print key values even in such cases .*/
+			print key values even in such cases. */
 			key_part->null_offset = key_part->field->null_offset();
 			key_part->null_bit = key_part->field->null_bit;
 
@@ -522,7 +913,7 @@ ha_innobase::check_if_supported_inplace_alter(
 
 			/* We cannot replace a hidden FTS_DOC_ID
 			with a user-visible FTS_DOC_ID. */
-			if (prebuilt->table->fts
+			if (m_prebuilt->table->fts
 			    && innobase_fulltext_exist(altered_table)
 			    && !my_strcasecmp(
 				    system_charset_info,
@@ -547,15 +938,45 @@ ha_innobase::check_if_supported_inplace_alter(
 					ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_AUTOINC);
 				online = false;
 			}
+
+#ifdef MYSQL_VIRTUAL_COLUMNS
+			if (key_part->field->is_virtual_gcol()) {
+				/* Do not support adding index on newly added
+				virtual column, while there is also a drop
+				virtual column in the same clause */
+				if (ha_alter_info->handler_flags
+				    & Alter_inplace_info::DROP_VIRTUAL_COLUMN) {
+					ha_alter_info->unsupported_reason =
+						innobase_get_err_msg(
+							ER_UNSUPPORTED_ALTER_INPLACE_ON_VIRTUAL_COLUMN);
+
+					DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
+				}
+
+				ha_alter_info->unsupported_reason =
+					innobase_get_err_msg(
+						ER_UNSUPPORTED_ALTER_ONLINE_ON_VIRTUAL_COLUMN);
+				online = false;
+			}
+#endif /* MYSQL_VIRTUAL_COLUMNS */
 		}
 	}
 
-	DBUG_ASSERT(!prebuilt->table->fts || prebuilt->table->fts->doc_col
+	DBUG_ASSERT(!m_prebuilt->table->fts || m_prebuilt->table->fts->doc_col
 		    <= table->s->stored_fields);
-	DBUG_ASSERT(!prebuilt->table->fts || prebuilt->table->fts->doc_col
-		    < dict_table_get_n_user_cols(prebuilt->table));
+	DBUG_ASSERT(!m_prebuilt->table->fts || m_prebuilt->table->fts->doc_col
+		    < dict_table_get_n_user_cols(m_prebuilt->table));
 
-	if (prebuilt->table->fts
+#ifdef MYSQL_SPATIAL_INDEX
+	if (ha_alter_info->handler_flags
+	    & Alter_inplace_info::ADD_SPATIAL_INDEX) {
+		ha_alter_info->unsupported_reason = innobase_get_err_msg(
+			ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_GIS);
+		online = false;
+	}
+#endif
+
+	if (m_prebuilt->table->fts
 	    && innobase_fulltext_exist(altered_table)) {
 		/* FULLTEXT indexes are supposed to remain. */
 		/* Disallow DROP INDEX FTS_DOC_ID_INDEX */
@@ -592,7 +1013,7 @@ ha_innobase::check_if_supported_inplace_alter(
 		}
 	}
 
-	prebuilt->trx->will_lock++;
+	m_prebuilt->trx->will_lock++;
 
 	if (!online) {
 		/* We already determined that only a non-locking
@@ -600,19 +1021,33 @@ ha_innobase::check_if_supported_inplace_alter(
 	} else if (((ha_alter_info->handler_flags
 		     & Alter_inplace_info::ADD_PK_INDEX)
 			|| innobase_need_rebuild(ha_alter_info, table))
-		   && (innobase_fulltext_exist(altered_table))) {
+		   && (innobase_fulltext_exist(altered_table)
+		       || innobase_spatial_exist(altered_table))) {
 		/* Refuse to rebuild the table online, if
-		fulltext indexes are to survive the rebuild. */
+		FULLTEXT OR SPATIAL indexes are to survive the rebuild. */
 		online = false;
 		/* If the table already contains fulltext indexes,
 		refuse to rebuild the table natively altogether. */
-		if (prebuilt->table->fts) {
+		if (m_prebuilt->table->fts) {
 			ha_alter_info->unsupported_reason = innobase_get_err_msg(
 				ER_INNODB_FT_LIMIT);
 			DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
 		}
-		ha_alter_info->unsupported_reason = innobase_get_err_msg(
-			ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_FTS);
+
+		if (innobase_spatial_exist(altered_table)) {
+#ifdef MYSQL_SPATIAL_INDEX
+			ha_alter_info->unsupported_reason =
+				innobase_get_err_msg(
+				ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_GIS);
+#endif
+			ha_alter_info->unsupported_reason = innobase_get_err_msg(
+				ER_INNODB_FT_LIMIT);
+			DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
+		} else {
+			ha_alter_info->unsupported_reason =
+				innobase_get_err_msg(
+				ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_FTS);
+		}
 	} else if ((ha_alter_info->handler_flags
 		    & Alter_inplace_info::ADD_INDEX)) {
 		/* Building a full-text index requires a lock.
@@ -678,12 +1113,12 @@ innobase_init_foreign(
                 same MySQL 'database' as the table itself. We store the name
                 to foreign->id. */
 
-                db_len = dict_get_db_name_len(table->name);
+                db_len = dict_get_db_name_len(table->name.m_name);
 
                 foreign->id = static_cast<char*>(mem_heap_alloc(
                         foreign->heap, db_len + strlen(constraint_name) + 2));
 
-                ut_memcpy(foreign->id, table->name, db_len);
+                ut_memcpy(foreign->id, table->name.m_name, db_len);
                 foreign->id[db_len] = '/';
                 strcpy(foreign->id + db_len + 1, constraint_name);
 
@@ -698,7 +1133,7 @@ innobase_init_foreign(
 
         foreign->foreign_table = table;
         foreign->foreign_table_name = mem_heap_strdup(
-                foreign->heap, table->name);
+                foreign->heap, table->name.m_name);
         dict_mem_foreign_table_name_lookup_set(foreign, TRUE);
 
         foreign->foreign_index = index;
@@ -778,6 +1213,7 @@ innobase_set_foreign_key_option(
 	ut_ad(!foreign->type);
 
 	switch (fk_key->delete_opt) {
+	// JAN: TODO: ? MySQL 5.7 used enum fk_option directly from sql_lex.h
 	case Foreign_key::FK_OPTION_NO_ACTION:
 	case Foreign_key::FK_OPTION_RESTRICT:
 	case Foreign_key::FK_OPTION_DEFAULT:
@@ -811,7 +1247,7 @@ innobase_set_foreign_key_option(
 /*******************************************************************//**
 Check if a foreign key constraint can make use of an index
 that is being created.
-@return	useable index, or NULL if none found */
+@return useable index, or NULL if none found */
 static MY_ATTRIBUTE((nonnull, warn_unused_result))
 const KEY*
 innobase_find_equiv_index(
@@ -826,7 +1262,8 @@ innobase_find_equiv_index(
 	for (uint i = 0; i < n_add; i++) {
 		const KEY*	key = &keys[add[i]];
 
-		if (key->user_defined_key_parts < n_cols) {
+		if (key->user_defined_key_parts < n_cols
+		    || key->flags & HA_SPATIAL) {
 no_match:
 			continue;
 		}
@@ -836,6 +1273,12 @@ no_match:
 			uint32			col_len
 				= key_part.field->pack_length();
 
+			/* Any index on virtual columns cannot be used
+			for reference constaint */
+			if (innobase_is_v_fld(key_part.field)) {
+				goto no_match;
+			}
+
 			/* The MySQL pack length contains 1 or 2 bytes
 			length field for a true VARCHAR. */
 
@@ -891,6 +1334,7 @@ innobase_find_fk_index(
 
 	while (index != NULL) {
 		if (!(index->type & DICT_FTS)
+		    && !dict_index_has_virtual(index)
 		    && dict_foreign_qualify_index(
 			    table, col_names, columns, n_cols,
 			    index, NULL, true, 0,
@@ -912,27 +1356,101 @@ next_rec:
 	return(NULL);
 }
 
-/*************************************************************//**
-Create InnoDB foreign key structure from MySQL alter_info
+#ifdef MYSQL_VIRTUAL_COLUMNS
+
+/** Check whether given column is a base of stored column.
+@param[in]	col_name	column name
+@param[in]	table		table
+@param[in]	s_cols		list of stored columns
+@return true if the given column is a base of stored column,else false. */
+static
+bool
+innobase_col_check_fk(
+	const char*		col_name,
+	const dict_table_t*	table,
+	dict_s_col_list*	s_cols)
+{
+	dict_s_col_list::const_iterator	it;
+
+	for (it = s_cols->begin();
+	     it != s_cols->end(); ++it) {
+		dict_s_col_t	s_col = *it;
+
+		for (ulint j = 0; j < s_col.num_base; j++) {
+			if (strcmp(col_name, dict_table_get_col_name(
+						table,
+						s_col.base_col[j]->ind)) == 0) {
+				return(true);
+			}
+		}
+	}
+
+	return(false);
+}
+
+/** Check whether the foreign key constraint is on base of any stored columns.
+@param[in]	foreign	Foriegn key constraing information
+@param[in]	table	table to which the foreign key objects
+to be added
+@param[in]	s_cols	list of stored column information in the table.
+@return true if yes, otherwise false. */
+static
+bool
+innobase_check_fk_stored(
+	const dict_foreign_t*	foreign,
+	const dict_table_t*	table,
+	dict_s_col_list*	s_cols)
+{
+	ulint	type = foreign->type;
+
+	type &= ~(DICT_FOREIGN_ON_DELETE_NO_ACTION
+		  | DICT_FOREIGN_ON_UPDATE_NO_ACTION);
+
+	if (type == 0 || s_cols == NULL) {
+		return(false);
+	}
+
+	for (ulint i = 0; i < foreign->n_fields; i++) {
+		if (innobase_col_check_fk(
+			foreign->foreign_col_names[i], table, s_cols)) {
+			return(true);
+		}
+	}
+
+	return(false);
+}
+#endif /* MYSQL_VIRTUAL_COLUMNS */
+
+/** Create InnoDB foreign key structure from MySQL alter_info
+@param[in]	ha_alter_info	alter table info
+@param[in]	table_share	TABLE_SHARE
+@param[in]	table		table object
+@param[in]	col_names	column names, or NULL to use
+table->col_names
+@param[in]	drop_index	indexes to be dropped
+@param[in]	n_drop_index	size of drop_index
+@param[out]	add_fk		foreign constraint added
+@param[out]	n_add_fk	number of foreign constraints
+added
+@param[in]	trx		user transaction
+@param[in]	s_cols		list of stored column information
 @retval true if successful
 @retval false on error (will call my_error()) */
 static MY_ATTRIBUTE((nonnull(1,2,3,7,8), warn_unused_result))
 bool
 innobase_get_foreign_key_info(
-/*==========================*/
 	Alter_inplace_info*
-			ha_alter_info,	/*!< in: alter table info */
+			ha_alter_info,
 	const TABLE_SHARE*
-			table_share,	/*!< in: the TABLE_SHARE */
-	dict_table_t*	table,		/*!< in: table */
-	const char**	col_names,	/*!< in: column names, or NULL
-					to use table->col_names */
-	dict_index_t**	drop_index,	/*!< in: indexes to be dropped */
-	ulint		n_drop_index,	/*!< in: size of drop_index[] */
-	dict_foreign_t**add_fk,		/*!< out: foreign constraint added */
-	ulint*		n_add_fk,	/*!< out: number of foreign
-					constraints added */
-	const trx_t*	trx)		/*!< in: user transaction */
+			table_share,
+	dict_table_t*	table,
+	const char**	col_names,
+	dict_index_t**	drop_index,
+	ulint		n_drop_index,
+	dict_foreign_t**add_fk,
+	ulint*		n_add_fk,
+	const trx_t*	trx,
+	dict_s_col_list*s_cols)
 {
 	Key*		key;
 	Foreign_key*	fk_key;
@@ -941,6 +1459,8 @@ innobase_get_foreign_key_info(
 	ulint		num_fk = 0;
 	Alter_info*	alter_info = ha_alter_info->alter_info;
 
+	DBUG_ENTER("innobase_get_foreign_key_info");
+
 	*n_add_fk = 0;
 
 	List_iterator<Key> key_iterator(alter_info->key_list);
@@ -1014,8 +1534,8 @@ innobase_get_foreign_key_info(
 
 		add_fk[num_fk] = dict_mem_foreign_create();
 
-#ifndef __WIN__
-		if(fk_key->ref_db.str) {
+#ifndef _WIN32
+		if (fk_key->ref_db.str) {
 			tablename_to_filename(fk_key->ref_db.str, db_name,
 					      MAX_DATABASE_NAME_LEN);
 			db_namep = db_name;
@@ -1046,7 +1566,7 @@ innobase_get_foreign_key_info(
 		mutex_enter(&dict_sys->mutex);
 
 		referenced_table_name = dict_get_referenced_table(
-			table->name,
+			table->name.m_name,
 			db_namep,
 			db_name_len,
 			tbl_namep,
@@ -1147,12 +1667,20 @@ innobase_get_foreign_key_info(
 			goto err_exit;
 		}
 
+#ifdef MYSQL_VIRTUAL_COLUMNS
+		if (innobase_check_fk_stored(
+			add_fk[num_fk], table, s_cols)) {
+			my_error(ER_CANNOT_ADD_FOREIGN_BASE_COL_STORED, MYF(0));
+			goto err_exit;
+		}
+#endif
+
 		num_fk++;
 	}
 
 	*n_add_fk = num_fk;
 
-	return(true);
+	DBUG_RETURN(true);
 err_exit:
 	for (ulint i = 0; i <= num_fk; i++) {
 		if (add_fk[i]) {
@@ -1160,7 +1688,7 @@ err_exit:
 		}
 	}
 
-	return(false);
+	DBUG_RETURN(false);
 }
 
 /*************************************************************//**
@@ -1214,6 +1742,8 @@ innobase_col_to_mysql(
 		memcpy(dest, data, len);
 		break;
 
+	case DATA_VAR_POINT:
+	case DATA_GEOMETRY:
 	case DATA_BLOB:
 		/* Skip MySQL BLOBs when reporting an erroneous row
 		during index creation or table rebuild. */
@@ -1237,6 +1767,7 @@ innobase_col_to_mysql(
 	case DATA_FLOAT:
 	case DATA_DOUBLE:
 	case DATA_DECIMAL:
+	case DATA_POINT:
 		/* Above are the valid column types for MySQL data. */
 		ut_ad(flen == len);
 		/* fall through */
@@ -1254,7 +1785,6 @@ innobase_col_to_mysql(
 
 /*************************************************************//**
 Copies an InnoDB record to table->record[0]. */
-UNIV_INTERN
 void
 innobase_rec_to_mysql(
 /*==================*/
@@ -1265,7 +1795,7 @@ innobase_rec_to_mysql(
 					rec, index, ...) */
 {
 	uint	n_fields	= table->s->stored_fields;
-        uint    sql_idx         = 0;
+	uint	sql_idx		= 0;
 
 	ut_ad(n_fields == dict_table_get_n_user_cols(index->table)
 	      - !!(DICT_TF2_FLAG_IS_SET(index->table,
@@ -1276,14 +1806,16 @@ innobase_rec_to_mysql(
 		ulint		ipos;
 		ulint		ilen;
 		const uchar*	ifield;
+		ulint prefix_col;
 
-                while (!((field= table->field[sql_idx])->stored_in_db()))
-                          sql_idx++;
+		while (!((field= table->field[sql_idx])->stored_in_db())) {
+			sql_idx++;
+		}
 
 		field->reset();
 
-		ipos = dict_index_get_nth_col_or_prefix_pos(index, i, TRUE,
-							    NULL);
+		ipos = dict_index_get_nth_col_or_prefix_pos(
+			index, i, true, false, &prefix_col);
 
 		if (ipos == ULINT_UNDEFINED
 		    || rec_offs_nth_extern(offsets, ipos)) {
@@ -1311,7 +1843,6 @@ null_field:
 
 /*************************************************************//**
 Copies an InnoDB index entry to table->record[0]. */
-UNIV_INTERN
 void
 innobase_fields_to_mysql(
 /*=====================*/
@@ -1320,23 +1851,36 @@ innobase_fields_to_mysql(
 	const dfield_t*		fields)	/*!< in: InnoDB index fields */
 {
 	uint	n_fields	= table->s->stored_fields;
-        uint    sql_idx         = 0;
+	uint	sql_idx		= 0;
+	ulint	num_v 		= 0;
 
 	ut_ad(n_fields == dict_table_get_n_user_cols(index->table)
+	      + dict_table_get_n_v_cols(index->table)
 	      - !!(DICT_TF2_FLAG_IS_SET(index->table,
 					DICT_TF2_FTS_HAS_DOC_ID)));
 
 	for (uint i = 0; i < n_fields; i++, sql_idx++) {
 		Field*		field;
 		ulint		ipos;
+		ulint		col_n;
+		ulint		prefix_col;
 
-                while (!((field= table->field[sql_idx])->stored_in_db()))
-                          sql_idx++;
+		while (!((field= table->field[sql_idx])->stored_in_db())) {
+			sql_idx++;
+		}
 
 		field->reset();
 
-		ipos = dict_index_get_nth_col_or_prefix_pos(index, i, TRUE,
-							    NULL);
+		if (innobase_is_v_fld(field)) {
+			col_n = num_v;
+			num_v++;
+		} else {
+			col_n = i - num_v;
+		}
+
+		ipos = dict_index_get_nth_col_or_prefix_pos(
+			index, col_n, true, innobase_is_v_fld(field),
+			&prefix_col);
 
 		if (ipos == ULINT_UNDEFINED
 		    || dfield_is_ext(&fields[ipos])
@@ -1359,7 +1903,6 @@ innobase_fields_to_mysql(
 
 /*************************************************************//**
 Copies an InnoDB row to table->record[0]. */
-UNIV_INTERN
 void
 innobase_row_to_mysql(
 /*==================*/
@@ -1367,30 +1910,41 @@ innobase_row_to_mysql(
 	const dict_table_t*	itab,	/*!< in: InnoDB table */
 	const dtuple_t*		row)	/*!< in: InnoDB row */
 {
-	uint  n_fields	= table->s->stored_fields;
-        uint  sql_idx   = 0;
+	uint	n_fields	= table->s->stored_fields;
+	uint	sql_idx		= 0;
+	uint	num_v		= 0;
 
 	/* The InnoDB row may contain an extra FTS_DOC_ID column at the end. */
 	ut_ad(row->n_fields == dict_table_get_n_cols(itab));
 	ut_ad(n_fields == row->n_fields - DATA_N_SYS_COLS
+	      + dict_table_get_n_v_cols(itab)
 	      - !!(DICT_TF2_FLAG_IS_SET(itab, DICT_TF2_FTS_HAS_DOC_ID)));
 
 	for (uint i = 0; i < n_fields; i++, sql_idx++) {
 		Field*          field;
-		const dfield_t*	df	= dtuple_get_nth_field(row, i);
 
-                while (!((field= table->field[sql_idx])->stored_in_db()))
-                          sql_idx++;
+		while (!((field= table->field[sql_idx])->stored_in_db())) {
+			sql_idx++;
+		}
 
 		field->reset();
 
+		if (innobase_is_v_fld(field)) {
+			/* Virtual column are not stored in InnoDB table, so
+			skip it */
+			num_v++;
+			continue;
+		}
+
+		const dfield_t*	df	= dtuple_get_nth_field(row, i - num_v);
+
 		if (dfield_is_ext(df) || dfield_is_null(df)) {
 			field->set_null();
 		} else {
 			field->set_notnull();
 
 			innobase_col_to_mysql(
-				dict_table_get_nth_col(itab, i),
+				dict_table_get_nth_col(itab, i - num_v),
 				static_cast<const uchar*>(dfield_get_data(df)),
 				dfield_get_len(df), field);
 		}
@@ -1399,7 +1953,6 @@ innobase_row_to_mysql(
 
 /*************************************************************//**
 Resets table->record[0]. */
-UNIV_INTERN
 void
 innobase_rec_reset(
 /*===============*/
@@ -1415,7 +1968,7 @@ innobase_rec_reset(
 
 /*******************************************************************//**
 This function checks that index keys are sensible.
-@return	0 or error number */
+@return 0 or error number */
 static MY_ATTRIBUTE((nonnull, warn_unused_result))
 int
 innobase_check_index_keys(
@@ -1452,11 +2005,24 @@ innobase_check_index_keys(
 		for (index = dict_table_get_first_index(innodb_table);
 		     index; index = dict_table_get_next_index(index)) {
 
-			if (!strcmp(key.name, index->name)) {
+			if (index->is_committed()
+			    && !strcmp(key.name, index->name)) {
 				break;
 			}
 		}
 
+		/* Now we are in a situation where we have "ADD INDEX x"
+		and an index by the same name already exists. We have 4
+		possible cases:
+		1. No further clauses for an index x are given. Should reject
+		the operation.
+		2. "DROP INDEX x" is given. Should allow the operation.
+		3. "RENAME INDEX x TO y" is given. Should allow the operation.
+		4. "DROP INDEX x, RENAME INDEX x TO y" is given. Should allow
+		the operation, since no name clash occurs. In this particular
+		case MySQL cancels the operation without calling InnoDB
+		methods. */
+
 		if (index) {
 			/* If a key by the same name is being created and
 			dropped, the name clash is OK. */
@@ -1470,6 +2036,24 @@ innobase_check_index_keys(
 				}
 			}
 
+#ifdef MYSQL_RENAME_INDEX
+			/* If a key by the same name is being created and
+			renamed, the name clash is OK. E.g.
+			ALTER TABLE t ADD INDEX i (col), RENAME INDEX i TO x
+			where the index "i" exists prior to the ALTER command.
+			In this case we:
+			1. rename the existing index from "i" to "x"
+			2. add the new index "i" */
+			for (uint i = 0; i < info->index_rename_count; i++) {
+				const KEY_PAIR*	pair
+					= &info->index_rename_buffer[i];
+
+				if (0 == strcmp(key.name, pair->old_key->name)) {
+					goto name_ok;
+				}
+			}
+#endif /* MYSQL_RENAME_INDEX */
+
 			my_error(ER_WRONG_NAME_FOR_INDEX, MYF(0), key.name);
 
 			return(ER_WRONG_NAME_FOR_INDEX);
@@ -1535,41 +2119,70 @@ name_ok:
 	return(0);
 }
 
-/*******************************************************************//**
-Create index field definition for key part */
-static MY_ATTRIBUTE((nonnull(2,3)))
+/** Create index field definition for key part
+@param[in]	altered_table		MySQL table that is being altered,
+					or NULL if a new clustered index
+					is not being created
+@param[in]	key_part		MySQL key definition
+@param[in,out]	index_field		index field
+@param[in]	new_clustered		new cluster
+@param[in]	fields			MySQL table fields*/
+static
 void
 innobase_create_index_field_def(
-/*============================*/
-	const TABLE*		altered_table,	/*!< in: MySQL table that is
-						being altered, or NULL
-						if a new clustered index is
-						not being created */
-	const KEY_PART_INFO*	key_part,	/*!< in: MySQL key definition */
-	index_field_t*		index_field,	/*!< out: index field
-						definition for key_part */
-	const Field**		fields)		/*!< in: MySQL table fields */
+	const TABLE*		altered_table,
+	const KEY_PART_INFO*	key_part,
+	index_field_t*		index_field,
+	bool			new_clustered,
+	const Field**		fields)
 {
 	const Field*	field;
 	ibool		is_unsigned;
 	ulint		col_type;
+	ulint		num_v = 0;
+	ulint		num_m_v = 0;
 
 	DBUG_ENTER("innobase_create_index_field_def");
 
 	ut_ad(key_part);
 	ut_ad(index_field);
 
-	field = altered_table
+	field = new_clustered
 		? altered_table->field[key_part->fieldnr]
 		: key_part->field;
 	ut_a(field);
 
-	index_field->col_no = key_part->fieldnr;
+	for (ulint i = 0; i < key_part->fieldnr; i++) {
+		const Field*		ifield = altered_table->field[i];
+		if (innobase_is_v_fld(ifield)) {
+			num_v++;
+		}
+
+		if (!ifield->stored_in_db()) {
+			num_m_v++;
+		}
+	}
+
+	col_type = get_innobase_type_from_mysql_type(
+		&is_unsigned, field);
+
+#ifdef MYSQL_VIRTUAL_COLUMNS
+	if (!field->stored_in_db && field->gcol_info) {
+		if (!field->stored_in_db && false) {
+			index_field->is_v_col = true;
+			index_field->col_no = num_v;
+		} else {
+			index_field->is_v_col = false;
+			index_field->col_no = key_part->fieldnr - num_v;
+		}
+	}
+#else
+	index_field->is_v_col = false;
+	index_field->col_no = key_part->fieldnr - num_m_v;
 	index_field->col_name = altered_table ? field->field_name : fields[key_part->fieldnr]->field_name;
+#endif /* MYSQL_VIRTUAL_COLUMNS */
 
-	col_type = get_innobase_type_from_mysql_type(&is_unsigned, field);
-
-	if (DATA_BLOB == col_type
+	if (DATA_LARGE_MTYPE(col_type)
 	    || (key_part->length < field->pack_length()
 		&& field->type() != MYSQL_TYPE_VARCHAR)
 	    || (field->type() == MYSQL_TYPE_VARCHAR
@@ -1584,32 +2197,30 @@ innobase_create_index_field_def(
 	DBUG_VOID_RETURN;
 }
 
-/*******************************************************************//**
-Create index definition for key */
+/** Create index definition for key
+@param[in]	altered_table		MySQL table that is being altered
+@param[in]	keys			key definitions
+@param[in]	key_number		MySQL key number
+@param[in]	new_clustered		true if generating a new clustered
+index on the table
+@param[in]	key_clustered		true if this is the new clustered index
+@param[out]	index			index definition
+@param[in]	heap			heap where memory is allocated */
 static MY_ATTRIBUTE((nonnull))
 void
 innobase_create_index_def(
-/*======================*/
-	const TABLE*		altered_table,	/*!< in: MySQL table that is
-						being altered */
-	const KEY*		keys,		/*!< in: key definitions */
-	ulint			key_number,	/*!< in: MySQL key number */
-	bool			new_clustered,	/*!< in: true if generating
-						a new clustered index
-						on the table */
-	bool			key_clustered,	/*!< in: true if this is
-						the new clustered index */
-	index_def_t*		index,		/*!< out: index definition */
-	mem_heap_t*		heap,		/*!< in: heap where memory
-						is allocated */
-	const Field**		fields)		/*!< in: MySQL table fields
-						*/
+	const TABLE*		altered_table,
+	const KEY*		keys,
+	ulint			key_number,
+	bool			new_clustered,
+	bool			key_clustered,
+	index_def_t*		index,
+	mem_heap_t*		heap,
+	const Field**		fields)
 {
 	const KEY*	key = &keys[key_number];
 	ulint		i;
-	ulint		len;
 	ulint		n_fields = key->user_defined_key_parts;
-	char*		index_name;
 
 	DBUG_ENTER("innobase_create_index_def");
 	DBUG_ASSERT(!key_clustered || new_clustered);
@@ -1618,43 +2229,101 @@ innobase_create_index_def(
 		mem_heap_alloc(heap, n_fields * sizeof *index->fields));
 	memset(index->fields, 0, n_fields * sizeof *index->fields);
 
-	index->ind_type = 0;
+	index->parser = NULL;
+	index->is_ngram = false;
 	index->key_number = key_number;
 	index->n_fields = n_fields;
-	len = strlen(key->name) + 1;
-	index->name = index_name = static_cast<char*>(
-		mem_heap_alloc(heap, len + !new_clustered));
-
-	if (!new_clustered) {
-		*index_name++ = TEMP_INDEX_PREFIX;
-	}
-
-	memcpy(index_name, key->name, len);
-
-	if (key->flags & HA_NOSAME) {
-		index->ind_type |= DICT_UNIQUE;
-	}
+	index->name = mem_heap_strdup(heap, key->name);
+	index->rebuild = new_clustered;
 
 	if (key_clustered) {
-		DBUG_ASSERT(!(key->flags & HA_FULLTEXT));
-		index->ind_type |= DICT_CLUSTERED;
+		DBUG_ASSERT(!(key->flags & (HA_FULLTEXT | HA_SPATIAL)));
+		DBUG_ASSERT(key->flags & HA_NOSAME);
+		index->ind_type = DICT_CLUSTERED | DICT_UNIQUE;
 	} else if (key->flags & HA_FULLTEXT) {
+		DBUG_ASSERT(!(key->flags & (HA_SPATIAL | HA_NOSAME)));
 		DBUG_ASSERT(!(key->flags & HA_KEYFLAG_MASK
 			      & ~(HA_FULLTEXT
 				  | HA_PACK_KEY
 				  | HA_BINARY_PACK_KEY)));
+		index->ind_type = DICT_FTS;
+
+		/* Note: key->parser is only parser name,
+			 we need to get parser from altered_table instead */
+
+		if (key->flags & HA_USES_PARSER) {
+			for (ulint j = 0; j < altered_table->s->keys; j++) {
+				if (ut_strcmp(altered_table->key_info[j].name,
+					      key->name) == 0) {
+					ut_ad(altered_table->key_info[j].flags
+					      & HA_USES_PARSER);
+
+					plugin_ref	parser =
+						altered_table->key_info[j].parser;
+					index->parser =
+						static_cast<st_mysql_ftparser*>(
+						plugin_decl(parser)->info);
+
+					index->is_ngram = strncmp(
+						plugin_name(parser)->str,
+						FTS_NGRAM_PARSER_NAME,
+						plugin_name(parser)->length)
+						 == 0;
+
+					break;
+				}
+			}
+
+			DBUG_EXECUTE_IF("fts_instrument_use_default_parser",
+				index->parser = &fts_default_parser;);
+			ut_ad(index->parser);
+		}
+	} else if (key->flags & HA_SPATIAL) {
 		DBUG_ASSERT(!(key->flags & HA_NOSAME));
-		DBUG_ASSERT(!index->ind_type);
-		index->ind_type |= DICT_FTS;
+		index->ind_type = DICT_SPATIAL;
+		ut_ad(n_fields == 1);
+		ulint	num_v = 0;
+
+		/* Need to count the virtual fields before this spatial
+		indexed field */
+		for (ulint i = 0; i < key->key_part->fieldnr; i++) {
+			if (innobase_is_v_fld(altered_table->field[i])) {
+				num_v++;
+			}
+		}
+		index->fields[0].col_no = key->key_part[0].fieldnr - num_v;
+		index->fields[0].prefix_len = 0;
+		index->fields[0].is_v_col = false;
+
+ #ifdef MYSQL_VIRTUAL_COLUMNS
+		if (!key->key_part[0].field->stored_in_db
+		    && key->key_part[0].field->gcol_info) {
+
+			/* Currently, the spatial index cannot be created
+			on virtual columns. It is blocked in server
+			layer */
+
+			ut_ad(0);
+			index->fields[0].is_v_col = true;
+		} else {
+
+			index->fields[0].is_v_col = false;
+		}
+#endif /* MYSQL_VIRTUAL_COLUMNS */
+	} else {
+		index->ind_type = (key->flags & HA_NOSAME) ? DICT_UNIQUE : 0;
 	}
 
-	if (!new_clustered) {
-		altered_table = NULL;
-	}
+	if (!(key->flags & HA_SPATIAL)) {
+		for (i = 0; i < n_fields; i++) {
+			innobase_create_index_field_def(
+				altered_table, &key->key_part[i],
+				&index->fields[i], new_clustered, fields);
 
-	for (i = 0; i < n_fields; i++) {
-		innobase_create_index_field_def(
-			altered_table, &key->key_part[i], &index->fields[i], fields);
+			if (index->fields[i].is_v_col) {
+				index->ind_type |= DICT_VIRTUAL;
+			}
+		}
 	}
 
 	DBUG_VOID_RETURN;
@@ -1672,22 +2341,31 @@ innobase_fts_check_doc_id_col(
 	const TABLE*		altered_table,
 					/*!< in: MySQL table with
 					fulltext index */
-	ulint*			fts_doc_col_no)
+	ulint*			fts_doc_col_no,
 					/*!< out: The column number for
 					Doc ID, or ULINT_UNDEFINED
 					if it is of wrong type */
+	ulint*			num_v)	/*!< out: number of virtual column */
 {
 	*fts_doc_col_no = ULINT_UNDEFINED;
 
 	const uint n_cols = altered_table->s->stored_fields;
-        uint sql_idx = 0;
-	uint i;
+	uint sql_idx = 0;
+	ulint	i;
+
+	*num_v = 0;
 
 	for (i = 0; i < n_cols; i++, sql_idx++) {
 		const Field*	field;
-                while (!((field= altered_table->field[sql_idx])->
-                                 stored_in_db()))
-                          sql_idx++;
+
+		while (!((field= altered_table->field[sql_idx])->stored_in_db())) {
+			sql_idx++;
+		}
+
+		if (innobase_is_v_fld(field)) {
+			(*num_v)++;
+		}
+
 		if (my_strcasecmp(system_charset_info,
 				  field->field_name, FTS_DOC_ID_COL_NAME)) {
 			continue;
@@ -1699,11 +2377,12 @@ innobase_fts_check_doc_id_col(
 		} else if (field->type() != MYSQL_TYPE_LONGLONG
 			   || field->pack_length() != 8
 			   || field->real_maybe_null()
-			   || !(field->flags & UNSIGNED_FLAG)) {
+			   || !(field->flags & UNSIGNED_FLAG)
+			   || innobase_is_v_fld(field)) {
 			my_error(ER_INNODB_FT_WRONG_DOCID_COLUMN, MYF(0),
 				 field->field_name);
 		} else {
-			*fts_doc_col_no = i;
+			*fts_doc_col_no = i - *num_v;
 		}
 
 		return(true);
@@ -1713,6 +2392,9 @@ innobase_fts_check_doc_id_col(
 		return(false);
 	}
 
+	/* Not to count the virtual columns */
+	i -= *num_v;
+
 	for (; i + DATA_N_SYS_COLS < (uint) table->n_cols; i++) {
 		const char*     name = dict_table_get_col_name(table, i);
 
@@ -1741,8 +2423,7 @@ innobase_fts_check_doc_id_col(
 /*******************************************************************//**
 Check whether the table has a unique index with FTS_DOC_ID_INDEX_NAME
 on the Doc ID column.
-@return	the status of the FTS_DOC_ID index */
-UNIV_INTERN
+@return the status of the FTS_DOC_ID index */
 enum fts_doc_id_index_enum
 innobase_fts_check_doc_id_index(
 /*============================*/
@@ -1811,7 +2492,8 @@ innobase_fts_check_doc_id_index(
 		if (strcmp(field->name, FTS_DOC_ID_COL_NAME) == 0
 		    && field->col->mtype == DATA_INT
 		    && field->col->len == 8
-		    && field->col->prtype & DATA_NOT_NULL) {
+		    && field->col->prtype & DATA_NOT_NULL
+		    && !dict_col_is_virtual(field->col)) {
 			if (fts_doc_col_no) {
 				*fts_doc_col_no = dict_col_get_no(field->col);
 			}
@@ -1828,9 +2510,8 @@ innobase_fts_check_doc_id_index(
 /*******************************************************************//**
 Check whether the table has a unique index with FTS_DOC_ID_INDEX_NAME
 on the Doc ID column in MySQL create index definition.
-@return	FTS_EXIST_DOC_ID_INDEX if there exists the FTS_DOC_ID index,
+@return FTS_EXIST_DOC_ID_INDEX if there exists the FTS_DOC_ID index,
 FTS_INCORRECT_DOC_ID_INDEX if the FTS_DOC_ID index is of wrong format */
-UNIV_INTERN
 enum fts_doc_id_index_enum
 innobase_fts_check_doc_id_index_in_def(
 /*===================================*/
@@ -1875,7 +2556,7 @@ ELSE
 
 ENDIF
 
-@return	key definitions */
+@return key definitions */
 static MY_ATTRIBUTE((nonnull, warn_unused_result, malloc))
 index_def_t*
 innobase_create_key_defs(
@@ -1940,11 +2621,14 @@ innobase_create_key_defs(
 			const uint	maybe_null
 				= key_info[*add].key_part[key_part].key_type
 				& FIELDFLAG_MAYBE_NULL;
+			bool		is_v
+				= innobase_is_v_fld(
+					key_info[*add].key_part[key_part].field);
 			DBUG_ASSERT(!maybe_null
 				    == !key_info[*add].key_part[key_part].
 				    field->real_maybe_null());
 
-			if (maybe_null) {
+			if (maybe_null || is_v) {
 				new_primary = false;
 				break;
 			}
@@ -1954,6 +2638,7 @@ innobase_create_key_defs(
 	const bool rebuild = new_primary || add_fts_doc_id
 		|| innobase_need_rebuild(ha_alter_info, table);
 
+
 	/* Reserve one more space if new_primary is true, and we might
 	need to add the FTS_DOC_ID_INDEX */
 	indexdef = indexdefs = static_cast<index_def_t*>(
@@ -1976,8 +2661,8 @@ innobase_create_key_defs(
 			index->fields = NULL;
 			index->n_fields = 0;
 			index->ind_type = DICT_CLUSTERED;
-			index->name = mem_heap_strdup(
-				heap, innobase_index_reserve_name);
+			index->name = innobase_index_reserve_name;
+			index->rebuild = true;
 			index->key_number = ~0;
 			primary_key_number = ULINT_UNDEFINED;
 			goto created_clustered;
@@ -1999,8 +2684,8 @@ created_clustered:
 			}
 			/* Copy the index definitions. */
 			innobase_create_index_def(
-				altered_table, key_info, i, TRUE, FALSE,
-				indexdef, heap, (const Field **)altered_table->field);
+				altered_table, key_info, i, true,
+				false, indexdef, heap, (const Field **)altered_table->field);
 
 			if (indexdef->ind_type & DICT_FTS) {
 				n_fts_add++;
@@ -2011,12 +2696,14 @@ created_clustered:
 		}
 
 		if (n_fts_add > 0) {
+			ulint	num_v = 0;
+
 			if (!add_fts_doc_id
 			    && !innobase_fts_check_doc_id_col(
 				    NULL, altered_table,
-				    &fts_doc_id_col)) {
+				    &fts_doc_id_col, &num_v)) {
 				fts_doc_id_col =
-                                  altered_table->s->stored_fields;
+					altered_table->s->stored_fields;
 				add_fts_doc_id = true;
 			}
 
@@ -2044,8 +2731,8 @@ created_clustered:
 
 		for (ulint i = 0; i < n_add; i++) {
 			innobase_create_index_def(
-				altered_table, key_info, add[i], FALSE, FALSE,
-				indexdef, heap, (const Field **)altered_table->field);
+				altered_table, key_info, add[i],
+				false, false, indexdef, heap, (const Field **)altered_table->field);
 
 			if (indexdef->ind_type & DICT_FTS) {
 				n_fts_add++;
@@ -2066,23 +2753,14 @@ created_clustered:
 		index->n_fields = 1;
 		index->fields->col_no = fts_doc_id_col;
 		index->fields->prefix_len = 0;
+		index->fields->is_v_col = false;
 		index->ind_type = DICT_UNIQUE;
+		ut_ad(!rebuild
+		      || !add_fts_doc_id
+		      || fts_doc_id_col <= altered_table->s->fields);
 
-		if (rebuild) {
-			index->name = mem_heap_strdup(
-				heap, FTS_DOC_ID_INDEX_NAME);
-			ut_ad(!add_fts_doc_id
-			      || fts_doc_id_col == altered_table->s->stored_fields);
-		} else {
-			char*	index_name;
-			index->name = index_name = static_cast<char*>(
-				mem_heap_alloc(
-					heap,
-					1 + sizeof FTS_DOC_ID_INDEX_NAME));
-			*index_name++ = TEMP_INDEX_PREFIX;
-			memcpy(index_name, FTS_DOC_ID_INDEX_NAME,
-			       sizeof FTS_DOC_ID_INDEX_NAME);
-		}
+		index->name = FTS_DOC_ID_INDEX_NAME;
+		index->rebuild = rebuild;
 
 		/* TODO: assign a real MySQL key number for this */
 		index->key_number = ULINT_UNDEFINED;
@@ -2099,7 +2777,7 @@ created_clustered:
 
 /*******************************************************************//**
 Check each index column size, make sure they do not exceed the max limit
-@return	true if index column size exceeds limit */
+@return true if index column size exceeds limit */
 static MY_ATTRIBUTE((nonnull, warn_unused_result))
 bool
 innobase_check_column_length(
@@ -2115,113 +2793,6 @@ innobase_check_column_length(
 	return(false);
 }
 
-struct ha_innobase_inplace_ctx : public inplace_alter_handler_ctx
-{
-	/** Dummy query graph */
-	que_thr_t*	thr;
-	/** reference to the prebuilt struct of the creating instance */
-	row_prebuilt_t*&prebuilt;
-	/** InnoDB indexes being created */
-	dict_index_t**	add_index;
-	/** MySQL key numbers for the InnoDB indexes that are being created */
-	const ulint*	add_key_numbers;
-	/** number of InnoDB indexes being created */
-	ulint		num_to_add_index;
-	/** InnoDB indexes being dropped */
-	dict_index_t**	drop_index;
-	/** number of InnoDB indexes being dropped */
-	const ulint	num_to_drop_index;
-	/** InnoDB foreign key constraints being dropped */
-	dict_foreign_t** drop_fk;
-	/** number of InnoDB foreign key constraints being dropped */
-	const ulint	num_to_drop_fk;
-	/** InnoDB foreign key constraints being added */
-	dict_foreign_t** add_fk;
-	/** number of InnoDB foreign key constraints being dropped */
-	const ulint	num_to_add_fk;
-	/** whether to create the indexes online */
-	bool		online;
-	/** memory heap */
-	mem_heap_t*	heap;
-	/** dictionary transaction */
-	trx_t*		trx;
-	/** original table (if rebuilt, differs from indexed_table) */
-	dict_table_t*	old_table;
-	/** table where the indexes are being created or dropped */
-	dict_table_t*	new_table;
-	/** mapping of old column numbers to new ones, or NULL */
-	const ulint*	col_map;
-	/** new column names, or NULL if nothing was renamed */
-	const char**	col_names;
-	/** added AUTO_INCREMENT column position, or ULINT_UNDEFINED */
-	const ulint	add_autoinc;
-	/** default values of ADD COLUMN, or NULL */
-	const dtuple_t*	add_cols;
-	/** autoinc sequence to use */
-	ib_sequence_t	sequence;
-	/** maximum auto-increment value */
-	ulonglong	max_autoinc;
-	/** temporary table name to use for old table when renaming tables */
-	const char*	tmp_name;
-
-	ha_innobase_inplace_ctx(row_prebuilt_t*& prebuilt_arg,
-				dict_index_t** drop_arg,
-				ulint num_to_drop_arg,
-				dict_foreign_t** drop_fk_arg,
-				ulint num_to_drop_fk_arg,
-				dict_foreign_t** add_fk_arg,
-				ulint num_to_add_fk_arg,
-				bool online_arg,
-				mem_heap_t* heap_arg,
-				dict_table_t* new_table_arg,
-				const char** col_names_arg,
-				ulint add_autoinc_arg,
-				ulonglong autoinc_col_min_value_arg,
-				ulonglong autoinc_col_max_value_arg) :
-		inplace_alter_handler_ctx(),
-		prebuilt (prebuilt_arg),
-		add_index (0), add_key_numbers (0), num_to_add_index (0),
-		drop_index (drop_arg), num_to_drop_index (num_to_drop_arg),
-		drop_fk (drop_fk_arg), num_to_drop_fk (num_to_drop_fk_arg),
-		add_fk (add_fk_arg), num_to_add_fk (num_to_add_fk_arg),
-		online (online_arg), heap (heap_arg), trx (0),
-		old_table (prebuilt_arg->table),
-		new_table (new_table_arg),
-		col_map (0), col_names (col_names_arg),
-		add_autoinc (add_autoinc_arg),
-		add_cols (0),
-		sequence(prebuilt->trx->mysql_thd,
-			 autoinc_col_min_value_arg, autoinc_col_max_value_arg),
-		max_autoinc (0),
-		tmp_name (0)
-	{
-#ifdef UNIV_DEBUG
-		for (ulint i = 0; i < num_to_add_index; i++) {
-			ut_ad(!add_index[i]->to_be_dropped);
-		}
-		for (ulint i = 0; i < num_to_drop_index; i++) {
-			ut_ad(drop_index[i]->to_be_dropped);
-		}
-#endif /* UNIV_DEBUG */
-
-		thr = pars_complete_graph_for_exec(NULL, prebuilt->trx, heap);
-	}
-
-	~ha_innobase_inplace_ctx()
-	{
-		mem_heap_free(heap);
-	}
-
-	/** Determine if the table will be rebuilt.
-	@return whether the table will be rebuilt */
-	bool need_rebuild () const { return(old_table != new_table); }
-
-private:
-	// Disable copying
-	ha_innobase_inplace_ctx(const ha_innobase_inplace_ctx&);
-	ha_innobase_inplace_ctx& operator=(const ha_innobase_inplace_ctx&);
-};
-
 /********************************************************************//**
 Drop any indexes that we were not able to free previously due to
 open table handles. */
@@ -2240,7 +2811,7 @@ online_retry_drop_indexes_low(
 	may have prebuilt->table pointing to the table. However, these
 	other threads should be between statements, waiting for the
 	next statement to execute, or for a meta-data lock. */
-	ut_ad(table->n_ref_count >= 1);
+	ut_ad(table->get_ref_count() >= 1);
 
 	if (table->drop_aborted) {
 		row_merge_drop_indexes(trx, table, TRUE);
@@ -2269,12 +2840,10 @@ online_retry_drop_indexes(
 		trx_free_for_mysql(trx);
 	}
 
-#ifdef UNIV_DEBUG
-	mutex_enter(&dict_sys->mutex);
-	dict_table_check_for_dup_indexes(table, CHECK_ALL_COMPLETE);
-	mutex_exit(&dict_sys->mutex);
-	ut_a(!table->drop_aborted);
-#endif /* UNIV_DEBUG */
+	ut_d(mutex_enter(&dict_sys->mutex));
+	ut_d(dict_table_check_for_dup_indexes(table, CHECK_ALL_COMPLETE));
+	ut_d(mutex_exit(&dict_sys->mutex));
+	ut_ad(!table->drop_aborted);
 }
 
 /********************************************************************//**
@@ -2287,7 +2856,9 @@ online_retry_drop_indexes_with_trx(
 	dict_table_t*	table,	/*!< in/out: table */
 	trx_t*		trx)	/*!< in/out: transaction */
 {
-	ut_ad(trx_state_eq(trx, TRX_STATE_NOT_STARTED));
+	ut_ad(trx_state_eq(trx, TRX_STATE_NOT_STARTED)
+	      || trx_state_eq(trx, TRX_STATE_FORCED_ROLLBACK));
+
 	ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH);
 
 	/* Now that the dictionary is being locked, check if we can
@@ -2305,11 +2876,11 @@ online_retry_drop_indexes_with_trx(
 }
 
 /** Determines if InnoDB is dropping a foreign key constraint.
-@param foreign		the constraint
-@param drop_fk		constraints being dropped
-@param n_drop_fk	number of constraints that are being dropped
+@param foreign the constraint
+@param drop_fk constraints being dropped
+@param n_drop_fk number of constraints that are being dropped
 @return whether the constraint is being dropped */
-inline MY_ATTRIBUTE((pure, nonnull, warn_unused_result))
+inline MY_ATTRIBUTE((warn_unused_result))
 bool
 innobase_dropping_foreign(
 /*======================*/
@@ -2328,15 +2899,15 @@ innobase_dropping_foreign(
 
 /** Determines if an InnoDB FOREIGN KEY constraint depends on a
 column that is being dropped or modified to NOT NULL.
-@param user_table	InnoDB table as it is before the ALTER operation
-@param col_name		Name of the column being altered
-@param drop_fk		constraints being dropped
-@param n_drop_fk	number of constraints that are being dropped
-@param drop		true=drop column, false=set NOT NULL
-@retval true		Not allowed (will call my_error())
-@retval false		Allowed
+@param user_table InnoDB table as it is before the ALTER operation
+@param col_name Name of the column being altered
+@param drop_fk constraints being dropped
+@param n_drop_fk number of constraints that are being dropped
+@param drop true=drop column, false=set NOT NULL
+@retval true Not allowed (will call my_error())
+@retval false Allowed
 */
-static MY_ATTRIBUTE((pure, nonnull, warn_unused_result))
+static MY_ATTRIBUTE((warn_unused_result))
 bool
 innobase_check_foreigns_low(
 /*========================*/
@@ -2412,7 +2983,7 @@ innobase_check_foreigns_low(
 				display_name, (sizeof display_name) - 1,
 				foreign->foreign_table_name,
 				strlen(foreign->foreign_table_name),
-				NULL, TRUE);
+				NULL);
 			*buf_end = '\0';
 			my_error(ER_FK_COLUMN_CANNOT_DROP_CHILD,
 				 MYF(0), col_name, foreign->id,
@@ -2427,16 +2998,16 @@ innobase_check_foreigns_low(
 
 /** Determines if an InnoDB FOREIGN KEY constraint depends on a
 column that is being dropped or modified to NOT NULL.
-@param ha_alter_info	Data used during in-place alter
-@param altered_table	MySQL table that is being altered
-@param old_table	MySQL table as it is before the ALTER operation
-@param user_table	InnoDB table as it is before the ALTER operation
-@param drop_fk		constraints being dropped
-@param n_drop_fk	number of constraints that are being dropped
-@retval true		Not allowed (will call my_error())
-@retval false		Allowed
+@param ha_alter_info Data used during in-place alter
+@param altered_table MySQL table that is being altered
+@param old_table MySQL table as it is before the ALTER operation
+@param user_table InnoDB table as it is before the ALTER operation
+@param drop_fk constraints being dropped
+@param n_drop_fk number of constraints that are being dropped
+@retval true Not allowed (will call my_error())
+@retval false Allowed
 */
-static MY_ATTRIBUTE((pure, nonnull, warn_unused_result))
+static MY_ATTRIBUTE((warn_unused_result))
 bool
 innobase_check_foreigns(
 /*====================*/
@@ -2475,12 +3046,35 @@ innobase_check_foreigns(
 	return(false);
 }
 
+/** Get the default POINT value in MySQL format
+@param[in]	heap	memory heap where allocated
+@param[in]	length	length of MySQL format
+@return mysql format data */
+static
+const byte*
+innobase_build_default_mysql_point(
+	mem_heap_t*	heap,
+	ulint		length)
+{
+	byte*	buf	= static_cast<byte*>(mem_heap_alloc(
+				heap, DATA_POINT_LEN + length));
+
+	byte*	wkb	= buf + length;
+
+	ulint   len = get_wkb_of_default_point(SPDIMS, wkb, DATA_POINT_LEN);
+	ut_ad(len == DATA_POINT_LEN);
+
+	row_mysql_store_blob_ref(buf, length, wkb, len);
+
+	return(buf);
+}
+
 /** Convert a default value for ADD COLUMN.
 
-@param heap	Memory heap where allocated
-@param dfield	InnoDB data field to copy to
-@param field	MySQL value for the column
-@param comp	nonzero if in compact format */
+@param heap Memory heap where allocated
+@param dfield InnoDB data field to copy to
+@param field MySQL value for the column
+@param comp nonzero if in compact format */
 static MY_ATTRIBUTE((nonnull))
 void
 innobase_build_col_map_add(
@@ -2499,21 +3093,33 @@ innobase_build_col_map_add(
 
 	byte*	buf	= static_cast<byte*>(mem_heap_alloc(heap, size));
 
+	const byte*	mysql_data = field->ptr;
+
+	if (dfield_get_type(dfield)->mtype == DATA_POINT) {
+		/** If the DATA_POINT field is NOT NULL, we need to
+		give it a default value, since DATA_POINT is a fixed length
+		type, we couldn't store a value of length 0, like other
+		geom types. Server doesn't provide the default value, and
+		we would use POINT(0 0) here instead. */
+
+		mysql_data = innobase_build_default_mysql_point(heap, size);
+	}
+
 	row_mysql_store_col_in_innobase_format(
-		dfield, buf, TRUE, field->ptr, size, comp);
+		dfield, buf, true, mysql_data, size, comp);
 }
 
 /** Construct the translation table for reordering, dropping or
 adding columns.
 
-@param ha_alter_info	Data used during in-place alter
-@param altered_table	MySQL table that is being altered
-@param table		MySQL table as it is before the ALTER operation
-@param new_table	InnoDB table corresponding to MySQL altered_table
-@param old_table	InnoDB table corresponding to MYSQL table
-@param add_cols		Default values for ADD COLUMN, or NULL if no ADD COLUMN
-@param heap		Memory heap where allocated
-@return	array of integers, mapping column numbers in the table
+@param ha_alter_info Data used during in-place alter
+@param altered_table MySQL table that is being altered
+@param table MySQL table as it is before the ALTER operation
+@param new_table InnoDB table corresponding to MySQL altered_table
+@param old_table InnoDB table corresponding to MYSQL table
+@param add_cols Default values for ADD COLUMN, or NULL if no ADD COLUMN
+@param heap Memory heap where allocated
+@return array of integers, mapping column numbers in the table
 to column numbers in altered_table */
 static MY_ATTRIBUTE((nonnull(1,2,3,4,5,7), warn_unused_result))
 const ulint*
@@ -2527,13 +3133,15 @@ innobase_build_col_map(
 	dtuple_t*		add_cols,
 	mem_heap_t*		heap)
 {
-        uint old_i, old_innobase_i;
+	uint old_i, old_innobase_i;
 	DBUG_ENTER("innobase_build_col_map");
 	DBUG_ASSERT(altered_table != table);
 	DBUG_ASSERT(new_table != old_table);
 	DBUG_ASSERT(dict_table_get_n_cols(new_table)
+		    + dict_table_get_n_v_cols(new_table)
 		    >= altered_table->s->stored_fields + DATA_N_SYS_COLS);
 	DBUG_ASSERT(dict_table_get_n_cols(old_table)
+		    + dict_table_get_n_v_cols(old_table)
 		    >= table->s->stored_fields + DATA_N_SYS_COLS);
 	DBUG_ASSERT(!!add_cols == !!(ha_alter_info->handler_flags
 				     & Alter_inplace_info::ADD_COLUMN));
@@ -2541,50 +3149,86 @@ innobase_build_col_map(
 		    == dict_table_get_n_cols(new_table));
 
 	ulint*	col_map = static_cast<ulint*>(
-		mem_heap_alloc(heap, old_table->n_cols * sizeof *col_map));
+		mem_heap_alloc(
+			heap, (old_table->n_cols + old_table->n_v_cols)
+			* sizeof *col_map));
 
 	List_iterator_fast<Create_field> cf_it(
 		ha_alter_info->alter_info->create_list);
 	uint i = 0, sql_idx = 0;
+	uint	num_v = 0;
 
 	/* Any dropped columns will map to ULINT_UNDEFINED. */
 	for (old_innobase_i = 0;
-             old_innobase_i + DATA_N_SYS_COLS < old_table->n_cols;
+	     old_innobase_i + DATA_N_SYS_COLS < old_table->n_cols;
 	     old_innobase_i++) {
 		col_map[old_innobase_i] = ULINT_UNDEFINED;
 	}
 
+	for (uint old_i = 0; old_i < old_table->n_v_cols; old_i++) {
+		col_map[old_i + old_table->n_cols] = ULINT_UNDEFINED;
+	}
+
 	while (const Create_field* new_field = cf_it++) {
-                if (!new_field->stored_in_db())
-                {
-                  sql_idx++;
-                  continue;
-                }
+		bool	is_v = false;
+
+		if (innobase_is_v_fld(new_field)) {
+			is_v = true;
+		}
+
+		ulint	num_old_v = 0;
+
+		if (!new_field->stored_in_db())
+		{
+			sql_idx++;
+			continue;
+		}
+
 		for (old_i = 0, old_innobase_i= 0;
-                     table->field[old_i];
-                     old_i++) {
+			table->field[old_i];
+			old_i++) {
 			const Field* field = table->field[old_i];
-                        if (!table->field[old_i]->stored_in_db())
-                          continue;
+
+			if (!table->field[old_i]->stored_in_db()) {
+				continue;
+			}
+
+			if (innobase_is_v_fld(field)) {
+				if (is_v && new_field->field == field) {
+					col_map[old_table->n_cols + num_v]
+						= num_old_v;
+					num_old_v++;
+					goto found_col;
+				}
+				num_old_v++;
+				continue;
+			}
+
 			if (new_field->field == field) {
 				col_map[old_innobase_i] = i;
 				goto found_col;
 			}
-                        old_innobase_i++;
+			old_innobase_i++;
 		}
 
+		ut_ad(!is_v);
 		innobase_build_col_map_add(
 			heap, dtuple_get_nth_field(add_cols, i),
 			altered_table->field[sql_idx],
 			dict_table_is_comp(new_table));
 found_col:
-		i++;
-                sql_idx++;
+		if (is_v) {
+			num_v++;
+		} else {
+			i++;
+			sql_idx++;
+		}
 	}
 
-	DBUG_ASSERT(i == altered_table->s->stored_fields);
+	DBUG_ASSERT(i == altered_table->s->stored_fields - num_v);
 
 	i = table->s->stored_fields;
+	//i = table->s->fields - old_table->n_v_cols;
 
 	/* Add the InnoDB hidden FTS_DOC_ID column, if any. */
 	if (i + DATA_N_SYS_COLS < old_table->n_cols) {
@@ -2597,6 +3241,7 @@ found_col:
 					    old_table, table->s->stored_fields),
 				    FTS_DOC_ID_COL_NAME));
 		if (altered_table->s->stored_fields + DATA_N_SYS_COLS
+		    - new_table->n_v_cols
 		    < new_table->n_cols) {
 			DBUG_ASSERT(DICT_TF2_FLAG_IS_SET(
 					    new_table,
@@ -2605,6 +3250,9 @@ found_col:
 				    + DATA_N_SYS_COLS + 1
 				    == new_table->n_cols);
 			col_map[i] = altered_table->s->stored_fields;
+			/* col_map[i] = altered_table->s->fields
+				     - new_table->n_v_cols;
+			*/
 		} else {
 			DBUG_ASSERT(!DICT_TF2_FLAG_IS_SET(
 					    new_table,
@@ -2628,9 +3276,9 @@ found_col:
 
 /** Drop newly create FTS index related auxiliary table during
 FIC create index process, before fts_add_index is called
-@param table    table that was being rebuilt online
-@param trx	transaction
-@return		DB_SUCCESS if successful, otherwise last error code
+@param table table that was being rebuilt online
+@param trx transaction
+@return DB_SUCCESS if successful, otherwise last error code
 */
 static
 dberr_t
@@ -2658,7 +3306,7 @@ innobase_drop_fts_index_table(
 	return(ret_err);
 }
 
-/** Get the new column names if any columns were renamed
+/** Get the new non-virtual column names if any columns were renamed
 @param ha_alter_info	Data used during in-place alter
 @param altered_table	MySQL table that is being altered
 @param table		MySQL table as it is before the ALTER operation
@@ -2678,7 +3326,7 @@ innobase_get_col_names(
 	uint			i;
 
 	DBUG_ENTER("innobase_get_col_names");
-	DBUG_ASSERT(user_table->n_def > table->s->fields);
+	DBUG_ASSERT(user_table->n_t_def > table->s->fields);
 	DBUG_ASSERT(ha_alter_info->handler_flags
 		    & Alter_inplace_info::ALTER_COLUMN_NAME);
 
@@ -2689,11 +3337,20 @@ innobase_get_col_names(
 	List_iterator_fast<Create_field> cf_it(
 		ha_alter_info->alter_info->create_list);
 	while (const Create_field* new_field = cf_it++) {
+		ulint	num_v = 0;
 		DBUG_ASSERT(i < altered_table->s->fields);
 
+		if (innobase_is_v_fld(new_field)) {
+			continue;
+		}
+
 		for (uint old_i = 0; table->field[old_i]; old_i++) {
+			if (innobase_is_v_fld(table->field[old_i])) {
+				num_v++;
+			}
+
 			if (new_field->field == table->field[old_i]) {
-				cols[old_i] = new_field->field_name;
+				cols[old_i - num_v] = new_field->field_name;
 				break;
 			}
 		}
@@ -2702,7 +3359,7 @@ innobase_get_col_names(
 	}
 
 	/* Copy the internal column names. */
-	i = table->s->fields;
+	i = table->s->fields - user_table->n_v_def;
 	cols[i] = dict_table_get_col_name(user_table, i);
 
 	while (++i < user_table->n_def) {
@@ -2712,21 +3369,1097 @@ innobase_get_col_names(
 	DBUG_RETURN(cols);
 }
 
+/** Check whether the column prefix is increased, decreased, or unchanged.
+@param[in]	new_prefix_len	new prefix length
+@param[in]	old_prefix_len	new prefix length
+@retval	1	prefix is increased
+@retval	0	prefix is unchanged
+@retval	-1	prefix is decreased */
+static inline
+lint
+innobase_pk_col_prefix_compare(
+	ulint	new_prefix_len,
+	ulint	old_prefix_len)
+{
+	ut_ad(new_prefix_len < REC_MAX_DATA_SIZE);
+	ut_ad(old_prefix_len < REC_MAX_DATA_SIZE);
+
+	if (new_prefix_len == old_prefix_len) {
+		return(0);
+	}
+
+	if (new_prefix_len == 0) {
+		new_prefix_len = ULINT_MAX;
+	}
+
+	if (old_prefix_len == 0) {
+		old_prefix_len = ULINT_MAX;
+	}
+
+	if (new_prefix_len > old_prefix_len) {
+		return(1);
+	} else {
+		return(-1);
+	}
+}
+
+/** Check whether the column is existing in old table.
+@param[in]	new_col_no	new column no
+@param[in]	col_map		mapping of old column numbers to new ones
+@param[in]	col_map_size	the column map size
+@return true if the column is existing, otherwise false. */
+static inline
+bool
+innobase_pk_col_is_existing(
+	const ulint	new_col_no,
+	const ulint*	col_map,
+	const ulint	col_map_size)
+{
+	for (ulint i = 0; i < col_map_size; i++) {
+		if (col_map[i] == new_col_no) {
+			return(true);
+		}
+	}
+
+	return(false);
+}
+
+/** Determine whether both the indexes have same set of primary key
+fields arranged in the same order.
+
+Rules when we cannot skip sorting:
+(1) Removing existing PK columns somewhere else than at the end of the PK;
+(2) Adding existing columns to the PK, except at the end of the PK when no
+columns are removed from the PK;
+(3) Changing the order of existing PK columns;
+(4) Decreasing the prefix length just like removing existing PK columns
+follows rule(1), Increasing the prefix length just like adding existing
+PK columns follows rule(2).
+@param[in]	col_map		mapping of old column numbers to new ones
+@param[in]	ha_alter_info	Data used during in-place alter
+@param[in]	old_clust_index	index to be compared
+@param[in]	new_clust_index index to be compared
+@retval true if both indexes have same order.
+@retval false. */
+static MY_ATTRIBUTE((warn_unused_result))
+bool
+innobase_pk_order_preserved(
+	const ulint*		col_map,
+	const dict_index_t*	old_clust_index,
+	const dict_index_t*	new_clust_index)
+{
+	ulint	old_n_uniq
+		= dict_index_get_n_ordering_defined_by_user(
+			old_clust_index);
+	ulint	new_n_uniq
+		= dict_index_get_n_ordering_defined_by_user(
+			new_clust_index);
+
+	ut_ad(dict_index_is_clust(old_clust_index));
+	ut_ad(dict_index_is_clust(new_clust_index));
+	ut_ad(old_clust_index->table != new_clust_index->table);
+	ut_ad(col_map != NULL);
+
+	if (old_n_uniq == 0) {
+		/* There was no PRIMARY KEY in the table.
+		If there is no PRIMARY KEY after the ALTER either,
+		no sorting is needed. */
+		return(new_n_uniq == old_n_uniq);
+	}
+
+	/* DROP PRIMARY KEY is only allowed in combination with
+	ADD PRIMARY KEY. */
+	ut_ad(new_n_uniq > 0);
+
+	/* The order of the last processed new_clust_index key field,
+	not counting ADD COLUMN, which are constant. */
+	lint	last_field_order = -1;
+	ulint	existing_field_count = 0;
+	ulint	old_n_cols = dict_table_get_n_cols(old_clust_index->table);
+	for (ulint new_field = 0; new_field < new_n_uniq; new_field++) {
+		ulint	new_col_no =
+			new_clust_index->fields[new_field].col->ind;
+
+		/* Check if there is a match in old primary key. */
+		ulint	old_field = 0;
+		while (old_field < old_n_uniq) {
+			ulint	old_col_no =
+				old_clust_index->fields[old_field].col->ind;
+
+			if (col_map[old_col_no] == new_col_no) {
+				break;
+			}
+
+			old_field++;
+		}
+
+		/* The order of key field in the new primary key.
+		1. old PK column:      idx in old primary key
+		2. existing column:    old_n_uniq + sequence no
+		3. newly added column: no order */
+		lint		new_field_order;
+		const bool	old_pk_column = old_field < old_n_uniq;
+
+		if (old_pk_column) {
+			new_field_order = old_field;
+		} else if (innobase_pk_col_is_existing(new_col_no, col_map,
+						       old_n_cols)) {
+			new_field_order = old_n_uniq + existing_field_count++;
+		} else {
+			/* Skip newly added column. */
+			continue;
+		}
+
+		if (last_field_order + 1 != new_field_order) {
+			/* Old PK order is not kept, or existing column
+			is not added at the end of old PK. */
+			return(false);
+		}
+
+		last_field_order = new_field_order;
+
+		if (!old_pk_column) {
+			continue;
+		}
+
+		/* Check prefix length change. */
+		const lint	prefix_change = innobase_pk_col_prefix_compare(
+			new_clust_index->fields[new_field].prefix_len,
+			old_clust_index->fields[old_field].prefix_len);
+
+		if (prefix_change < 0) {
+			/* If a column's prefix length is decreased, it should
+			be the last old PK column in new PK.
+			Note: we set last_field_order to -2, so that if	there
+			are any old PK colmns or existing columns after it in
+			new PK, the comparison to new_field_order will fail in
+			the next round.*/
+			last_field_order = -2;
+		} else if (prefix_change > 0) {
+			/* If a column's prefix length is increased, it	should
+			be the last PK column in old PK. */
+			if (old_field != old_n_uniq - 1) {
+				return(false);
+			}
+		}
+	}
+
+	return(true);
+}
+
+/** Update the mtype from DATA_BLOB to DATA_GEOMETRY for a specified
+GIS column of a table. This is used when we want to create spatial index
+on legacy GIS columns coming from 5.6, where we store GIS data as DATA_BLOB
+in innodb layer.
+@param[in]	table_id	table id
+@param[in]	col_name	column name
+@param[in]	trx		data dictionary transaction
+@retval true Failure
+@retval false Success */
+static
+bool
+innobase_update_gis_column_type(
+	table_id_t	table_id,
+	const char*	col_name,
+	trx_t*		trx)
+{
+	pars_info_t*	info;
+	dberr_t		error;
+
+	DBUG_ENTER("innobase_update_gis_column_type");
+
+	DBUG_ASSERT(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX);
+	ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH);
+	ut_ad(mutex_own(&dict_sys->mutex));
+	ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
+
+	info = pars_info_create();
+
+	pars_info_add_ull_literal(info, "tableid", table_id);
+	pars_info_add_str_literal(info, "name", col_name);
+	pars_info_add_int4_literal(info, "mtype", DATA_GEOMETRY);
+
+	trx->op_info = "update column type to DATA_GEOMETRY";
+
+	error = que_eval_sql(
+		info,
+		"PROCEDURE UPDATE_SYS_COLUMNS_PROC () IS\n"
+		"BEGIN\n"
+		"UPDATE SYS_COLUMNS SET MTYPE=:mtype\n"
+		"WHERE TABLE_ID=:tableid AND NAME=:name;\n"
+		"END;\n",
+		false, trx);
+
+	trx->error_state = DB_SUCCESS;
+	trx->op_info = "";
+
+	DBUG_RETURN(error != DB_SUCCESS);
+}
+
+/** Check if we are creating spatial indexes on GIS columns, which are
+legacy columns from earlier MySQL, such as 5.6. If so, we have to update
+the mtypes of the old GIS columns to DATA_GEOMETRY.
+In 5.6, we store GIS columns as DATA_BLOB in InnoDB layer, it will introduce
+confusion when we run latest server on older data. That's why we need to
+do the upgrade.
+@param[in] ha_alter_info	Data used during in-place alter
+@param[in] table		Table on which we want to add indexes
+@param[in] trx			Transaction
+@return DB_SUCCESS if update successfully or no columns need to be updated,
+otherwise DB_ERROR, which means we can't update the mtype for some
+column, and creating spatial index on it should be dangerous */
+static
+dberr_t
+innobase_check_gis_columns(
+	Alter_inplace_info*	ha_alter_info,
+	dict_table_t*		table,
+	trx_t*			trx)
+{
+	DBUG_ENTER("innobase_check_gis_columns");
+
+	for (uint key_num = 0;
+	     key_num < ha_alter_info->index_add_count;
+	     key_num++) {
+
+		const KEY&	key = ha_alter_info->key_info_buffer[
+			ha_alter_info->index_add_buffer[key_num]];
+
+		if (!(key.flags & HA_SPATIAL)) {
+			continue;
+		}
+
+		ut_ad(key.user_defined_key_parts == 1);
+		const KEY_PART_INFO&    key_part = key.key_part[0];
+
+		/* Does not support spatial index on virtual columns */
+		if (innobase_is_v_fld(key_part.field)) {
+			DBUG_RETURN(DB_UNSUPPORTED);
+		}
+
+		ulint col_nr = dict_table_has_column(
+			table,
+			key_part.field->field_name,
+			key_part.fieldnr);
+		ut_ad(col_nr != table->n_def);
+		dict_col_t*	col = &table->cols[col_nr];
+
+		if (col->mtype != DATA_BLOB) {
+			ut_ad(DATA_GEOMETRY_MTYPE(col->mtype));
+			continue;
+		}
+
+		const char* col_name = dict_table_get_col_name(
+			table, col_nr);
+
+		if (innobase_update_gis_column_type(
+			table->id, col_name, trx)) {
+
+			DBUG_RETURN(DB_ERROR);
+		} else {
+			col->mtype = DATA_GEOMETRY;
+
+			ib::info() << "Updated mtype of column" << col_name
+				<< " in table " << table->name
+				<< ", whose id is " << table->id
+				<< " to DATA_GEOMETRY";
+		}
+	}
+
+	DBUG_RETURN(DB_SUCCESS);
+}
+
+#ifdef MYSQL_VIRTUAL_COLUMNS
+
+/** Collect virtual column info for its addition
+@param[in] ha_alter_info	Data used during in-place alter
+@param[in] altered_table	MySQL table that is being altered to
+@param[in] table		MySQL table as it is before the ALTER operation
+@retval true Failure
+@retval false Success */
+static
+bool
+prepare_inplace_add_virtual(
+	Alter_inplace_info*	ha_alter_info,
+	const TABLE*		altered_table,
+	const TABLE*		table)
+{
+	ha_innobase_inplace_ctx*	ctx;
+	ulint				i = 0;
+	ulint				j = 0;
+	const Create_field*		new_field;
+
+	ctx = static_cast<ha_innobase_inplace_ctx*>
+		(ha_alter_info->handler_ctx);
+
+	ctx->num_to_add_vcol = altered_table->s->fields
+			       + ctx->num_to_drop_vcol - table->s->fields;
+
+	ctx->add_vcol = static_cast<dict_v_col_t*>(
+		 mem_heap_zalloc(ctx->heap, ctx->num_to_add_vcol
+				 * sizeof *ctx->add_vcol));
+	ctx->add_vcol_name = static_cast<const char**>(
+		 mem_heap_alloc(ctx->heap, ctx->num_to_add_vcol
+				* sizeof *ctx->add_vcol_name));
+
+	List_iterator_fast<Create_field> cf_it(
+		ha_alter_info->alter_info->create_list);
+
+	while ((new_field = (cf_it++)) != NULL) {
+		const Field* field = new_field->field;
+		ulint	old_i;
+
+		for (old_i = 0; table->field[old_i]; old_i++) {
+			const Field* n_field = table->field[old_i];
+			if (field == n_field) {
+				break;
+			}
+		}
+
+		i++;
+
+		if (table->field[old_i]) {
+			continue;
+		}
+
+		ut_ad(!field);
+
+		ulint	col_len;
+		ulint	is_unsigned;
+		ulint	field_type;
+		ulint	charset_no;
+
+		field =  altered_table->field[i - 1];
+
+		ulint	col_type
+				= get_innobase_type_from_mysql_type(
+					&is_unsigned, field);
+
+
+		if (!field->gcol_info || field->stored_in_db) {
+			my_error(ER_WRONG_KEY_COLUMN, MYF(0),
+				 field->field_name);
+			return(true);
+		}
+
+		col_len = field->pack_length();
+		field_type = (ulint) field->type();
+
+		if (!field->real_maybe_null()) {
+			field_type |= DATA_NOT_NULL;
+		}
+
+		if (field->binary()) {
+			field_type |= DATA_BINARY_TYPE;
+		}
+
+		if (is_unsigned) {
+			field_type |= DATA_UNSIGNED;
+		}
+
+		if (dtype_is_string_type(col_type)) {
+			charset_no = (ulint) field->charset()->number;
+
+			DBUG_EXECUTE_IF(
+				"ib_alter_add_virtual_fail",
+				charset_no += MAX_CHAR_COLL_NUM;);
+
+			if (charset_no > MAX_CHAR_COLL_NUM) {
+				my_error(ER_WRONG_KEY_COLUMN, MYF(0),
+					 field->field_name);
+				return(true);
+			}
+		} else {
+			charset_no = 0;
+		}
+
+		if (field->type() == MYSQL_TYPE_VARCHAR) {
+			uint32  length_bytes
+				= static_cast<const Field_varstring*>(
+					field)->length_bytes;
+
+			col_len -= length_bytes;
+
+			if (length_bytes == 2) {
+				field_type |= DATA_LONG_TRUE_VARCHAR;
+			}
+		}
+
+
+		ctx->add_vcol[j].m_col.prtype = dtype_form_prtype(
+						field_type, charset_no);
+
+		ctx->add_vcol[j].m_col.prtype |= DATA_VIRTUAL;
+
+		ctx->add_vcol[j].m_col.mtype = col_type;
+
+		ctx->add_vcol[j].m_col.len = col_len;
+
+		ctx->add_vcol[j].m_col.ind = i - 1;
+		/*		ctx->add_vcol[j].num_base =
+		  field->gcol_info->non_virtual_base_columns();
+		*/
+		ctx->add_vcol_name[j] = field->field_name;
+		ctx->add_vcol[j].base_col = static_cast<dict_col_t**>(
+			mem_heap_alloc(ctx->heap, ctx->add_vcol[j].num_base
+				       * sizeof *(ctx->add_vcol[j].base_col)));
+		ctx->add_vcol[j].v_pos = ctx->old_table->n_v_cols
+					 - ctx->num_to_drop_vcol + j;
+
+		/* No need to track the list */
+		ctx->add_vcol[j].v_indexes = NULL;
+		innodb_base_col_setup(ctx->old_table, field, &ctx->add_vcol[j]);
+		j++;
+	}
+
+	return(false);
+}
+
+/** Collect virtual column info for its addition
+@param[in] ha_alter_info	Data used during in-place alter
+@param[in] altered_table	MySQL table that is being altered to
+@param[in] table		MySQL table as it is before the ALTER operation
+@retval true Failure
+@retval false Success */
+static
+bool
+prepare_inplace_drop_virtual(
+	Alter_inplace_info*	ha_alter_info,
+	const TABLE*		altered_table,
+	const TABLE*		table)
+{
+	ha_innobase_inplace_ctx*	ctx;
+	ulint				i = 0;
+	ulint				j = 0;
+	Alter_drop *drop;
+
+	ctx = static_cast<ha_innobase_inplace_ctx*>
+		(ha_alter_info->handler_ctx);
+
+	ctx->num_to_drop_vcol = ha_alter_info->alter_info->drop_list.elements;
+
+	ctx->drop_vcol = static_cast<dict_v_col_t*>(
+		 mem_heap_alloc(ctx->heap, ctx->num_to_drop_vcol
+				* sizeof *ctx->drop_vcol));
+	ctx->drop_vcol_name = static_cast<const char**>(
+		 mem_heap_alloc(ctx->heap, ctx->num_to_drop_vcol
+				* sizeof *ctx->drop_vcol_name));
+
+	List_iterator_fast<Alter_drop> cf_it(
+		ha_alter_info->alter_info->drop_list);
+
+	while ((drop = (cf_it++)) != NULL) {
+		const Field* field;
+		ulint	old_i;
+
+		ut_ad(drop->type == Alter_drop::COLUMN);
+
+		for (old_i = 0; table->field[old_i]; old_i++) {
+			const Field* n_field = table->field[old_i];
+			if (!my_strcasecmp(system_charset_info,
+					   n_field->field_name, drop->name)) {
+				break;
+			}
+		}
+
+		i++;
+
+		if (!table->field[old_i]) {
+			continue;
+		}
+
+		ulint	col_len;
+		ulint	is_unsigned;
+		ulint	field_type;
+		ulint	charset_no;
+
+		field =  table->field[old_i];
+
+		ulint           col_type
+                                = get_innobase_type_from_mysql_type(
+                                        &is_unsigned, field);
+
+
+		if (!field->gcol_info || field->stored_in_db) {
+			my_error(ER_WRONG_KEY_COLUMN, MYF(0),
+				 field->field_name);
+			return(true);
+		}
+
+		col_len = field->pack_length();
+		field_type = (ulint) field->type();
+
+		if (!field->real_maybe_null()) {
+			field_type |= DATA_NOT_NULL;
+		}
+
+		if (field->binary()) {
+			field_type |= DATA_BINARY_TYPE;
+		}
+
+		if (is_unsigned) {
+			field_type |= DATA_UNSIGNED;
+		}
+
+		if (dtype_is_string_type(col_type)) {
+			charset_no = (ulint) field->charset()->number;
+
+			DBUG_EXECUTE_IF(
+				"ib_alter_add_virtual_fail",
+				charset_no += MAX_CHAR_COLL_NUM;);
+
+			if (charset_no > MAX_CHAR_COLL_NUM) {
+				my_error(ER_WRONG_KEY_COLUMN, MYF(0),
+					 field->field_name);
+				return(true);
+			}
+		} else {
+			charset_no = 0;
+		}
+
+		if (field->type() == MYSQL_TYPE_VARCHAR) {
+			uint32  length_bytes
+				= static_cast<const Field_varstring*>(
+					field)->length_bytes;
+
+			col_len -= length_bytes;
+
+			if (length_bytes == 2) {
+				field_type |= DATA_LONG_TRUE_VARCHAR;
+			}
+		}
+
+
+		ctx->drop_vcol[j].m_col.prtype = dtype_form_prtype(
+						field_type, charset_no);
+
+		ctx->drop_vcol[j].m_col.prtype |= DATA_VIRTUAL;
+
+		ctx->drop_vcol[j].m_col.mtype = col_type;
+
+		ctx->drop_vcol[j].m_col.len = col_len;
+
+		ctx->drop_vcol[j].m_col.ind = old_i;
+
+		ctx->drop_vcol_name[j] = field->field_name;
+
+		dict_v_col_t*	v_col = dict_table_get_nth_v_col_mysql(
+					ctx->old_table, old_i);
+		ctx->drop_vcol[j].v_pos = v_col->v_pos;
+		j++;
+	}
+
+	return(false);
+}
+
+/** Insert a new record to INNODB SYS_VIRTUAL
+@param[in] table	InnoDB table
+@param[in] pos		virtual column column no
+@param[in] base_pos	base column pos
+@param[in] trx		transaction
+@return DB_SUCCESS if successful, otherwise error code */
+static
+dberr_t
+innobase_insert_sys_virtual(
+	const dict_table_t*	table,
+	ulint			pos,
+	ulint			base_pos,
+	trx_t*			trx)
+{
+	pars_info_t*    info = pars_info_create();
+
+	pars_info_add_ull_literal(info, "id", table->id);
+
+	pars_info_add_int4_literal(info, "pos", pos);
+
+	pars_info_add_int4_literal(info, "base_pos", base_pos);
+
+	dberr_t error = que_eval_sql(
+			info,
+			"PROCEDURE P () IS\n"
+			"BEGIN\n"
+			"INSERT INTO SYS_VIRTUAL VALUES"
+			"(:id, :pos, :base_pos);\n"
+			"END;\n",
+			FALSE, trx);
+
+	return(error);
+}
+
+/** Update INNODB SYS_COLUMNS on new virtual columns
+@param[in] table	InnoDB table
+@param[in] col_name	column name
+@param[in] vcol		virtual column
+@param[in] trx		transaction
+@return DB_SUCCESS if successful, otherwise error code */
+static
+dberr_t
+innobase_add_one_virtual(
+	const dict_table_t*	table,
+	const char*		col_name,
+	dict_v_col_t*		vcol,
+	trx_t*			trx)
+{
+	ulint		pos = dict_create_v_col_pos(vcol->v_pos,
+						    vcol->m_col.ind);
+	ulint		mtype =	vcol->m_col.mtype;
+	ulint		prtype = vcol->m_col.prtype;
+	ulint		len = vcol->m_col.len;
+	pars_info_t*    info = pars_info_create();
+
+	pars_info_add_ull_literal(info, "id", table->id);
+
+	pars_info_add_int4_literal(info, "pos", pos);
+
+	pars_info_add_str_literal(info, "name", col_name);
+	pars_info_add_int4_literal(info, "mtype", mtype);
+	pars_info_add_int4_literal(info, "prtype", prtype);
+	pars_info_add_int4_literal(info, "len", len);
+	pars_info_add_int4_literal(info, "prec", vcol->num_base);
+
+	dberr_t error = que_eval_sql(
+			info,
+			"PROCEDURE P () IS\n"
+			"BEGIN\n"
+			"INSERT INTO SYS_COLUMNS VALUES"
+			"(:id, :pos, :name, :mtype, :prtype, :len, :prec);\n"
+			"END;\n",
+			FALSE, trx);
+
+	if (error != DB_SUCCESS) {
+		return(error);
+	}
+
+	for (ulint i = 0; i < vcol->num_base; i++) {
+		error = innobase_insert_sys_virtual(
+			table, pos, vcol->base_col[i]->ind, trx);
+		if (error != DB_SUCCESS) {
+			return(error);
+		}
+	}
+
+	return(error);
+}
+
+/** Update INNODB SYS_TABLES on number of virtual columns
+@param[in] user_table	InnoDB table
+@param[in] n_col	number of columns
+@param[in] trx		transaction
+@return DB_SUCCESS if successful, otherwise error code */
+static
+dberr_t
+innobase_update_n_virtual(
+	const dict_table_t*	table,
+	ulint			n_col,
+	trx_t*			trx)
+{
+	dberr_t		err = DB_SUCCESS;
+	pars_info_t*    info = pars_info_create();
+
+	pars_info_add_int4_literal(info, "num_col", n_col);
+	pars_info_add_ull_literal(info, "id", table->id);
+
+        err = que_eval_sql(
+                info,
+                "PROCEDURE RENUMBER_TABLE_ID_PROC () IS\n"
+                "BEGIN\n"
+                "UPDATE SYS_TABLES"
+                " SET N_COLS = :num_col\n"
+                " WHERE ID = :id;\n"
+		"END;\n", FALSE, trx);
+
+	return(err);
+}
+
+/** Update system table for adding virtual column(s)
+@param[in]	ha_alter_info	Data used during in-place alter
+@param[in]	altered_table	MySQL table that is being altered
+@param[in]	table		MySQL table as it is before the ALTER operation
+@param[in]	user_table	InnoDB table
+@param[in]	trx		transaction
+@retval true Failure
+@retval false Success */
+static
+bool
+innobase_add_virtual_try(
+	Alter_inplace_info*	ha_alter_info,
+	const TABLE*		altered_table,
+	const TABLE*		table,
+	const dict_table_t*     user_table,
+	trx_t*			trx)
+{
+	ha_innobase_inplace_ctx*	ctx;
+	dberr_t				err = DB_SUCCESS;
+
+	ctx = static_cast<ha_innobase_inplace_ctx*>(
+		ha_alter_info->handler_ctx);
+
+	for (ulint i = 0; i < ctx->num_to_add_vcol; i++) {
+
+		err = innobase_add_one_virtual(
+			user_table, ctx->add_vcol_name[i],
+			&ctx->add_vcol[i], trx);
+
+		if (err != DB_SUCCESS) {
+			my_error(ER_INTERNAL_ERROR, MYF(0),
+				 "InnoDB: ADD COLUMN...VIRTUAL");
+			return(true);
+		}
+	}
+
+
+	ulint	n_col = user_table->n_cols;
+	ulint	n_v_col = user_table->n_v_cols;
+
+	n_v_col +=  ctx->num_to_add_vcol;
+
+	n_col -= dict_table_get_n_sys_cols(user_table);
+
+	n_v_col -= ctx->num_to_drop_vcol;
+
+	ulint	new_n = dict_table_encode_n_col(n_col, n_v_col)
+			+ ((user_table->flags & DICT_TF_COMPACT) << 31);
+
+	err = innobase_update_n_virtual(user_table, new_n, trx);
+
+	if (err != DB_SUCCESS) {
+		my_error(ER_INTERNAL_ERROR, MYF(0),
+			 "InnoDB: ADD COLUMN...VIRTUAL");
+		return(true);
+	}
+
+	return(false);
+}
+
+/** Update INNODB SYS_COLUMNS on new virtual column's position
+@param[in]	table	InnoDB table
+@param[in]	old_pos	old position
+@param[in]	new_pos	new position
+@param[in]	trx	transaction
+@return DB_SUCCESS if successful, otherwise error code */
+static
+dberr_t
+innobase_update_v_pos_sys_columns(
+	const dict_table_t*	table,
+	ulint			old_pos,
+	ulint			new_pos,
+	trx_t*			trx)
+{
+	pars_info_t*    info = pars_info_create();
+
+	pars_info_add_int4_literal(info, "pos", old_pos);
+	pars_info_add_int4_literal(info, "val", new_pos);
+	pars_info_add_ull_literal(info, "id", table->id);
+
+	dberr_t error = que_eval_sql(
+			info,
+			"PROCEDURE P () IS\n"
+			"BEGIN\n"
+			"UPDATE SYS_COLUMNS\n"
+			"SET POS = :val\n"
+			"WHERE POS = :pos\n"
+			"AND TABLE_ID = :id;\n"
+			"END;\n",
+			FALSE, trx);
+
+	return(error);
+}
+
+/** Update INNODB SYS_VIRTUAL table with new virtual column position
+@param[in]	table		InnoDB table
+@param[in]	old_pos		old position
+@param[in]	new_pos		new position
+@param[in]	trx		transaction
+@return DB_SUCCESS if successful, otherwise error code */
+static
+dberr_t
+innobase_update_v_pos_sys_virtual(
+	const dict_table_t*	table,
+	ulint			old_pos,
+	ulint			new_pos,
+	trx_t*			trx)
+{
+	pars_info_t*    info = pars_info_create();
+
+	pars_info_add_int4_literal(info, "pos", old_pos);
+	pars_info_add_int4_literal(info, "val", new_pos);
+	pars_info_add_ull_literal(info, "id", table->id);
+
+	dberr_t error = que_eval_sql(
+			info,
+			"PROCEDURE P () IS\n"
+			"BEGIN\n"
+			"UPDATE SYS_VIRTUAL\n"
+			"SET POS = :val\n"
+			"WHERE POS = :pos\n"
+			"AND TABLE_ID = :id;\n"
+			"END;\n",
+			FALSE, trx);
+
+	return(error);
+}
+
+/** Update InnoDB system tables on dropping a virtual column
+@param[in]	table		InnoDB table
+@param[in]	col_name	column name of the dropping column
+@param[in]	drop_col	col information for the dropping column
+@param[in]	n_prev_dropped	number of previously dropped columns in the
+				same alter clause
+@param[in]	trx		transaction
+@return DB_SUCCESS if successful, otherwise error code */
+static
+dberr_t
+innobase_drop_one_virtual_sys_columns(
+	const dict_table_t*	table,
+	const char*		col_name,
+	dict_col_t*		drop_col,
+	ulint			n_prev_dropped,
+	trx_t*			trx)
+{
+	pars_info_t*    info = pars_info_create();
+	pars_info_add_ull_literal(info, "id", table->id);
+
+	pars_info_add_str_literal(info, "name", col_name);
+
+	dberr_t error = que_eval_sql(
+			info,
+			"PROCEDURE P () IS\n"
+			"BEGIN\n"
+			"DELETE FROM SYS_COLUMNS\n"
+			"WHERE TABLE_ID = :id\n"
+			"AND NAME = :name;\n"
+			"END;\n",
+			FALSE, trx);
+
+	if (error != DB_SUCCESS) {
+		return(error);
+	}
+
+	dict_v_col_t*	v_col = dict_table_get_nth_v_col_mysql(
+				table, drop_col->ind);
+
+	/* Adjust column positions for all subsequent columns */
+	for (ulint i = v_col->v_pos + 1; i < table->n_v_cols; i++) {
+		dict_v_col_t*   t_col = dict_table_get_nth_v_col(table, i);
+		ulint		old_p = dict_create_v_col_pos(
+					t_col->v_pos - n_prev_dropped,
+					t_col->m_col.ind - n_prev_dropped);
+		ulint		new_p = dict_create_v_col_pos(
+					t_col->v_pos - 1 - n_prev_dropped,
+					t_col->m_col.ind - 1 - n_prev_dropped);
+
+		error = innobase_update_v_pos_sys_columns(
+			table, old_p, new_p, trx);
+		if (error != DB_SUCCESS) {
+			return(error);
+		}
+		error = innobase_update_v_pos_sys_virtual(
+			table, old_p, new_p, trx);
+		if (error != DB_SUCCESS) {
+			return(error);
+		}
+	}
+
+	return(error);
+}
+
+/** Delete virtual column's info from INNODB SYS_VIRTUAL
+@param[in]	table	InnoDB table
+@param[in]	pos	position of the virtual column to be deleted
+@param[in]	trx	transaction
+@return DB_SUCCESS if successful, otherwise error code */
+static
+dberr_t
+innobase_drop_one_virtual_sys_virtual(
+	const dict_table_t*	table,
+	ulint			pos,
+	trx_t*			trx)
+{
+	pars_info_t*    info = pars_info_create();
+	pars_info_add_ull_literal(info, "id", table->id);
+
+	pars_info_add_int4_literal(info, "pos", pos);
+
+	dberr_t error = que_eval_sql(
+			info,
+			"PROCEDURE P () IS\n"
+			"BEGIN\n"
+			"DELETE FROM SYS_VIRTUAL\n"
+			"WHERE TABLE_ID = :id\n"
+			"AND POS = :pos;\n"
+			"END;\n",
+			FALSE, trx);
+
+	return(error);
+}
+
+/** Update system table for dropping virtual column(s)
+@param[in]	ha_alter_info	Data used during in-place alter
+@param[in]	altered_table	MySQL table that is being altered
+@param[in]	table		MySQL table as it is before the ALTER operation
+@param[in]	user_table	InnoDB table
+@param[in]	trx		transaction
+@retval true Failure
+@retval false Success */
+static
+bool
+innobase_drop_virtual_try(
+	Alter_inplace_info*	ha_alter_info,
+	const TABLE*		altered_table,
+	const TABLE*		table,
+	const dict_table_t*     user_table,
+	trx_t*			trx)
+{
+	ha_innobase_inplace_ctx*	ctx;
+	dberr_t				err = DB_SUCCESS;
+
+	ctx = static_cast<ha_innobase_inplace_ctx*>
+		(ha_alter_info->handler_ctx);
+
+	for (ulint i = 0; i < ctx->num_to_drop_vcol; i++) {
+
+		ulint	pos = dict_create_v_col_pos(
+			ctx->drop_vcol[i].v_pos - i,
+			ctx->drop_vcol[i].m_col.ind - i);
+		err = innobase_drop_one_virtual_sys_virtual(
+			user_table, pos, trx);
+
+		if (err != DB_SUCCESS) {
+			my_error(ER_INTERNAL_ERROR, MYF(0),
+				 "InnoDB: DROP COLUMN...VIRTUAL");
+			return(true);
+		}
+
+		err = innobase_drop_one_virtual_sys_columns(
+			user_table, ctx->drop_vcol_name[i],
+			&(ctx->drop_vcol[i].m_col), i, trx);
+
+		if (err != DB_SUCCESS) {
+			my_error(ER_INTERNAL_ERROR, MYF(0),
+				 "InnoDB: DROP COLUMN...VIRTUAL");
+			return(true);
+		}
+	}
+
+
+	ulint	n_col = user_table->n_cols;
+	ulint	n_v_col = user_table->n_v_cols;
+
+	n_v_col -=  ctx->num_to_drop_vcol;
+
+	n_col -= dict_table_get_n_sys_cols(user_table);
+
+	ulint	new_n = dict_table_encode_n_col(n_col, n_v_col)
+			+ ((user_table->flags & DICT_TF_COMPACT) << 31);
+
+	err = innobase_update_n_virtual(user_table, new_n, trx);
+
+	if (err != DB_SUCCESS) {
+		my_error(ER_INTERNAL_ERROR, MYF(0),
+			 "InnoDB: DROP COLUMN...VIRTUAL");
+	}
+
+	return(false);
+}
+
+/** Adjust the create index column number from "New table" to
+"old InnoDB table" while we are doing dropping virtual column. Since we do
+not create separate new table for the dropping/adding virtual columns.
+To correctly find the indexed column, we will need to find its col_no
+in the "Old Table", not the "New table".
+@param[in]	ha_alter_info	Data used during in-place alter
+@param[in]	old_table	MySQL table as it is before the ALTER operation
+@param[in]	num_v_dropped	number of virtual column dropped
+@param[in,out]	index_def	index definition */
+static
+void
+innodb_v_adjust_idx_col(
+	const Alter_inplace_info*	ha_alter_info,
+	const TABLE*			old_table,
+	ulint				num_v_dropped,
+	index_def_t*			index_def)
+{
+	List_iterator_fast<Create_field> cf_it(
+		ha_alter_info->alter_info->create_list);
+	for (ulint i = 0; i < index_def->n_fields; i++) {
+#ifdef UNIV_DEBUG
+		bool	col_found = false;
+#endif /* UNIV_DEBUG */
+		ulint	num_v = 0;
+
+		index_field_t*	index_field = &index_def->fields[i];
+
+		/* Only adjust virtual column col_no, since non-virtual
+		column position (in non-vcol list) won't change unless
+		table rebuild */
+		if (!index_field->is_v_col) {
+			continue;
+		}
+
+		const Field*	field = NULL;
+
+		cf_it.rewind();
+
+		/* Found the field in the new table */
+		while (const Create_field* new_field = cf_it++) {
+			if (!new_field->is_virtual_gcol()) {
+				continue;
+			}
+
+			field = new_field->field;
+
+			if (num_v == index_field->col_no) {
+				break;
+			}
+			num_v++;
+		}
+
+		if (!field) {
+			/* this means the field is a newly added field, this
+			should have been blocked when we drop virtual column
+			at the same time */
+			ut_ad(num_v_dropped > 0);
+			ut_a(0);
+		}
+
+		ut_ad(field->is_virtual_gcol());
+
+		num_v = 0;
+
+		/* Look for its position in old table */
+		for (uint old_i = 0; old_table->field[old_i]; old_i++) {
+			if (old_table->field[old_i] == field) {
+				/* Found it, adjust its col_no to its position
+				in old table */
+				index_def->fields[i].col_no = num_v;
+				ut_d(col_found = true);
+				break;
+			}
+
+			if (old_table->field[old_i]->is_virtual_gcol()) {
+				num_v++;
+			}
+		}
+
+		ut_ad(col_found);
+	}
+}
+#endif /* MYSQL_VIRTUAL_COLUMNS */
+
 /** Update internal structures with concurrent writes blocked,
 while preparing ALTER TABLE.
 
-@param ha_alter_info	Data used during in-place alter
-@param altered_table	MySQL table that is being altered
-@param old_table	MySQL table as it is before the ALTER operation
-@param table_name	Table name in MySQL
-@param flags		Table and tablespace flags
-@param flags2		Additional table flags
-@param fts_doc_id_col	The column number of FTS_DOC_ID
-@param add_fts_doc_id	Flag: add column FTS_DOC_ID?
+@param ha_alter_info Data used during in-place alter
+@param altered_table MySQL table that is being altered
+@param old_table MySQL table as it is before the ALTER operation
+@param table_name Table name in MySQL
+@param flags Table and tablespace flags
+@param flags2 Additional table flags
+@param fts_doc_id_col The column number of FTS_DOC_ID
+@param add_fts_doc_id Flag: add column FTS_DOC_ID?
 @param add_fts_doc_id_idx Flag: add index FTS_DOC_ID_INDEX (FTS_DOC_ID)?
 
-@retval true		Failure
-@retval false		Success
+@retval true Failure
+@retval false Success
 */
 static MY_ATTRIBUTE((warn_unused_result, nonnull(1,2,3,4)))
 bool
@@ -2749,8 +4482,9 @@ prepare_inplace_alter_table_dict(
 	dict_index_t*		fts_index	= NULL;
 	ulint			new_clustered	= 0;
 	dberr_t			error;
+	const char*		punch_hole_warning = NULL;
 	ulint			num_fts_index;
-        uint                    sql_idx;
+	dict_add_v_col_t*	add_v = NULL;
 	ha_innobase_inplace_ctx*ctx;
 
 	DBUG_ENTER("prepare_inplace_alter_table_dict");
@@ -2772,7 +4506,43 @@ prepare_inplace_alter_table_dict(
 
 	user_table = ctx->new_table;
 
-	trx_start_if_not_started_xa(ctx->prebuilt->trx);
+	trx_start_if_not_started_xa(ctx->prebuilt->trx, true);
+
+#ifdef MYSQL_VIRTUAL_COLUMNS
+	if (ha_alter_info->handler_flags
+	    & Alter_inplace_info::DROP_VIRTUAL_COLUMN) {
+		if (prepare_inplace_drop_virtual(
+			    ha_alter_info, altered_table, old_table)) {
+			DBUG_RETURN(true);
+		}
+	}
+
+	if (ha_alter_info->handler_flags
+	    & Alter_inplace_info::ADD_VIRTUAL_COLUMN) {
+		if (prepare_inplace_add_virtual(
+			    ha_alter_info, altered_table, old_table)) {
+			DBUG_RETURN(true);
+		}
+
+		/* Need information for newly added virtual columns
+		for create index */
+
+		if (ha_alter_info->handler_flags
+		    & Alter_inplace_info::ADD_INDEX) {
+			add_v = static_cast<dict_add_v_col_t*>(
+				mem_heap_alloc(ctx->heap, sizeof *add_v));
+			add_v->n_v_col = ctx->num_to_add_vcol;
+			add_v->v_col = ctx->add_vcol;
+			add_v->v_col_name = ctx->add_vcol_name;
+		}
+	}
+
+	/*
+	There should be no order change for virtual columns coming in
+	here
+	*/
+	ut_ad(check_v_col_in_order(old_table, altered_table, ha_alter_info));
+#endif /* MYSQL_VIRTUAL_COLUMNS */
 
 	/* Create a background transaction for the operations on
 	the data dictionary tables. */
@@ -2816,7 +4586,7 @@ prepare_inplace_alter_table_dict(
 		check_if_supported_inplace_alter(). */
 		ut_ad(0);
 		my_error(ER_NOT_SUPPORTED_YET, MYF(0),
-			 thd_query_string(ctx->prebuilt->trx->mysql_thd)->str);
+			 thd_query(ctx->prebuilt->trx->mysql_thd));
 		goto error_handled;
 	}
 
@@ -2883,12 +4653,16 @@ prepare_inplace_alter_table_dict(
 		const char*	new_table_name
 			= dict_mem_create_temporary_tablename(
 				ctx->heap,
-				ctx->new_table->name,
+				ctx->new_table->name.m_name,
 				ctx->new_table->id);
-		ulint		n_cols;
+		ulint		n_cols = 0;
+		ulint		n_v_cols = 0;
+		ulint		n_mv_cols = 0;
 		dtuple_t*	add_cols;
+		ulint		space_id = 0;
 		ulint		key_id = FIL_DEFAULT_ENCRYPTION_KEY;
 		fil_encryption_t mode = FIL_SPACE_ENCRYPTION_DEFAULT;
+		const char*	compression=NULL;
 
 		crypt_data = fil_space_get_crypt_data(ctx->prebuilt->table->space);
 
@@ -2903,7 +4677,21 @@ prepare_inplace_alter_table_dict(
 			goto new_clustered_failed;
 		}
 
-		n_cols = altered_table->s->stored_fields;
+		for (uint i = 0; i < altered_table->s->fields; i++) {
+			const Field*	field = altered_table->field[i];
+
+			if (innobase_is_v_fld(field)) {
+				n_v_cols++;
+			} else {
+				if (field->stored_in_db()) {
+					n_cols++;
+				} else {
+					n_mv_cols++;
+				}
+			}
+		}
+
+		ut_ad(n_cols + n_v_cols + n_mv_cols == altered_table->s->fields);
 
 		if (add_fts_doc_id) {
 			n_cols++;
@@ -2925,9 +4713,26 @@ prepare_inplace_alter_table_dict(
 			goto new_clustered_failed;
 		}
 
-		/* The initial space id 0 may be overridden later. */
+		/* Use the old tablespace unless the tablespace
+		is changing. */
+		if (DICT_TF_HAS_SHARED_SPACE(user_table->flags)
+		    && (ha_alter_info->create_info->tablespace == NULL
+			|| (0 == strcmp(ha_alter_info->create_info->tablespace,
+					user_table->tablespace)))) {
+			space_id = user_table->space;
+		} else if (tablespace_is_shared_space(
+				ha_alter_info->create_info)) {
+			space_id = fil_space_get_id_by_name(
+				ha_alter_info->create_info->tablespace);
+			ut_a(space_id != ULINT_UNDEFINED);
+		}
+
+		/* The initial space id 0 may be overridden later if this
+		table is going to be a file_per_table tablespace. */
 		ctx->new_table = dict_mem_table_create(
-			new_table_name, 0, n_cols, flags, flags2);
+			new_table_name, space_id, n_cols + n_v_cols, n_v_cols,
+			flags, flags2);
+
 		/* The rebuilt indexed_table will use the renamed
 		column names. */
 		ctx->col_names = NULL;
@@ -2938,20 +4743,21 @@ prepare_inplace_alter_table_dict(
 				user_table->data_dir_path);
 		}
 
-                sql_idx= 0;
-		for (uint i = 0; i < altered_table->s->stored_fields; i++, sql_idx++) {
-			const Field*	field;
-                        while (!((field= altered_table->field[sql_idx])->
-                                 stored_in_db()))
-                          sql_idx++;
+		for (uint i = 0, sql_idx=0; i < altered_table->s->stored_fields; i++, sql_idx++) {
+			Field*		field;
 			ulint		is_unsigned;
-			ulint		field_type
-				= (ulint) field->type();
+			ulint		charset_no;
+			ulint		col_len;
+
+			while (!((field= altered_table->field[sql_idx])->stored_in_db())) {
+				sql_idx++;
+			}
+
+			ulint		field_type = (ulint) field->type();
+			bool		is_virtual = innobase_is_v_fld(field);
 			ulint		col_type
 				= get_innobase_type_from_mysql_type(
 					&is_unsigned, field);
-			ulint		charset_no;
-			ulint		col_len;
 
 			/* we assume in dtype_form_prtype() that this
 			fits in two bytes */
@@ -3001,6 +4807,13 @@ prepare_inplace_alter_table_dict(
 				if (length_bytes == 2) {
 					field_type |= DATA_LONG_TRUE_VARCHAR;
 				}
+
+			}
+
+			if (col_type == DATA_POINT) {
+				/* DATA_POINT should be of fixed length,
+				instead of the pack_length(blob length). */
+				col_len = DATA_POINT_LEN;
 			}
 
 			if (dict_col_name_is_reserved(field->field_name)) {
@@ -3010,27 +4823,93 @@ prepare_inplace_alter_table_dict(
 				goto new_clustered_failed;
 			}
 
-			dict_mem_table_add_col(
-				ctx->new_table, ctx->heap,
-				field->field_name,
-				col_type,
-				dtype_form_prtype(field_type, charset_no),
-				col_len);
+			if (is_virtual) {
+#ifdef MYSQL_VIRTUAL_COLUMNS
+				dict_mem_table_add_v_col(
+					ctx->new_table, ctx->heap,
+					field->field_name,
+					col_type,
+					dtype_form_prtype(
+						field_type, charset_no)
+					| DATA_VIRTUAL,
+					col_len, i,
+					field->gcol_info->non_virtual_base_columns());
+#endif /* MYSQL_VIRTUAL_COLUMNS */
+			} else {
+				dict_mem_table_add_col(
+					ctx->new_table, ctx->heap,
+					field->field_name,
+					col_type,
+					dtype_form_prtype(
+						field_type, charset_no),
+					col_len);
+			}
 		}
 
+#ifdef MYSQL_VIRTUAL_COLUMNS
+		ulint		z = 0;
+
+		if (n_v_cols) {
+			for (uint i = 0; i < altered_table->s->fields; i++) {
+				dict_v_col_t*	v_col;
+				const Field*	field = altered_table->field[i];
+
+				if (!innobase_is_v_fld(field)) {
+					continue;
+				}
+				v_col = dict_table_get_nth_v_col(
+					ctx->new_table, z);
+				z++;
+				innodb_base_col_setup(
+					ctx->new_table, field, v_col);
+			}
+		}
+#endif /* MYSQL_VIRTUAL_COLUMNS */
+
 		if (add_fts_doc_id) {
 			fts_add_doc_id_column(ctx->new_table, ctx->heap);
 			ctx->new_table->fts->doc_col = fts_doc_id_col;
+			ut_ad(fts_doc_id_col
+			      == altered_table->s->stored_fields - n_v_cols);
 			ut_ad(fts_doc_id_col == altered_table->s->stored_fields);
+
 		} else if (ctx->new_table->fts) {
 			ctx->new_table->fts->doc_col = fts_doc_id_col;
 		}
 
+#ifdef MYSQL_COMPRESSION
+		compression = ha_alter_info->create_info->compress.str;
+
+		if (Compression::validate(compression) != DB_SUCCESS) {
+
+			compression = NULL;
+		}
+#endif /* MYSQL_COMPRESSION */
+
 		error = row_create_table_for_mysql(
-			ctx->new_table, ctx->trx, false, mode, key_id);
+			ctx->new_table, compression, ctx->trx, false, mode, key_id);
+
+		punch_hole_warning =
+			(error == DB_IO_NO_PUNCH_HOLE_FS)
+			? "Punch hole is not supported by the file system"
+			: "Page Compression is not supported for this"
+			  " tablespace";
 
 		switch (error) {
 			dict_table_t*	temp_table;
+		case DB_IO_NO_PUNCH_HOLE_FS:
+		case DB_IO_NO_PUNCH_HOLE_TABLESPACE:
+			push_warning_printf(
+				ctx->prebuilt->trx->mysql_thd,
+				Sql_condition::WARN_LEVEL_WARN,
+				HA_ERR_UNSUPPORTED,
+				"%s. Compression disabled for '%s'",
+				punch_hole_warning,
+				ctx->new_table->name.m_name);
+
+			error = DB_SUCCESS;
+
+
 		case DB_SUCCESS:
 			/* We need to bump up the table ref count and
 			before we can use it we need to open the
@@ -3039,13 +4918,13 @@ prepare_inplace_alter_table_dict(
 			the dict_sys->mutex. */
 			ut_ad(mutex_own(&dict_sys->mutex));
 			temp_table = dict_table_open_on_name(
-				ctx->new_table->name, TRUE, FALSE,
+				ctx->new_table->name.m_name, TRUE, FALSE,
 				DICT_ERR_IGNORE_NONE);
 			ut_a(ctx->new_table == temp_table);
 			/* n_ref_count must be 1, because purge cannot
 			be executing on this very table as we are
 			holding dict_operation_lock X-latch. */
-			DBUG_ASSERT(ctx->new_table->n_ref_count == 1);
+			DBUG_ASSERT(ctx->new_table->get_ref_count() == 1);
 			break;
 		case DB_TABLESPACE_EXISTS:
 			my_error(ER_TABLESPACE_EXISTS, MYF(0),
@@ -3055,13 +4934,17 @@ prepare_inplace_alter_table_dict(
 			my_error(HA_ERR_TABLE_EXIST, MYF(0),
 				 altered_table->s->table_name.str);
 			goto new_clustered_failed;
+		case DB_UNSUPPORTED:
+			my_error(ER_UNSUPPORTED_EXTENSION, MYF(0),
+				 ctx->new_table->name.m_name);
+			goto new_clustered_failed;
 		default:
 			my_error_innodb(error, table_name, flags);
-		new_clustered_failed:
+new_clustered_failed:
 			DBUG_ASSERT(ctx->trx != ctx->prebuilt->trx);
 			trx_rollback_to_savepoint(ctx->trx, NULL);
 
-			ut_ad(user_table->n_ref_count == 1);
+			ut_ad(user_table->get_ref_count() == 1);
 
 			online_retry_drop_indexes_with_trx(
 				user_table, ctx->trx);
@@ -3070,9 +4953,10 @@ prepare_inplace_alter_table_dict(
 
 		if (ha_alter_info->handler_flags
 		    & Alter_inplace_info::ADD_COLUMN) {
-			add_cols = dtuple_create(
+			add_cols = dtuple_create_with_vcol(
 				ctx->heap,
-				dict_table_get_n_cols(ctx->new_table));
+				dict_table_get_n_cols(ctx->new_table),
+				dict_table_get_n_v_cols(ctx->new_table));
 
 			dict_table_copy_types(add_cols, ctx->new_table);
 		} else {
@@ -3087,12 +4971,46 @@ prepare_inplace_alter_table_dict(
 	} else {
 		DBUG_ASSERT(!innobase_need_rebuild(ha_alter_info, old_table));
 
+		for (dict_index_t* index
+			     = dict_table_get_first_index(user_table);
+		     index != NULL;
+		     index = dict_table_get_next_index(index)) {
+			if (!index->to_be_dropped
+			    && dict_index_is_corrupted(index)) {
+				my_error(ER_CHECK_NO_SUCH_TABLE, MYF(0));
+				goto error_handled;
+			}
+		}
+
+		for (dict_index_t* index
+			     = dict_table_get_first_index(user_table);
+		     index != NULL;
+		     index = dict_table_get_next_index(index)) {
+			if (!index->to_be_dropped
+			    && dict_index_is_corrupted(index)) {
+				my_error(ER_CHECK_NO_SUCH_TABLE, MYF(0));
+				goto error_handled;
+			}
+		}
+
 		if (!ctx->new_table->fts
 		    && innobase_fulltext_exist(altered_table)) {
 			ctx->new_table->fts = fts_create(
 				ctx->new_table);
 			ctx->new_table->fts->doc_col = fts_doc_id_col;
 		}
+
+		/* Check if we need to update mtypes of legacy GIS columns.
+		This check is only needed when we don't have to rebuild
+		the table, since rebuild would update all mtypes for GIS
+		columns */
+		error = innobase_check_gis_columns(
+			ha_alter_info, ctx->new_table, ctx->trx);
+		if (error != DB_SUCCESS) {
+			ut_ad(error == DB_ERROR);
+			error = DB_UNSUPPORTED;
+			goto error_handling;
+		}
 	}
 
 	/* Assign table_id, so that no table id of
@@ -3104,9 +5022,18 @@ prepare_inplace_alter_table_dict(
 
 	for (ulint a = 0; a < ctx->num_to_add_index; a++) {
 
+#ifdef MYSQL_VIRTUAL_COLUMNS
+		if (index_defs[a].ind_type & DICT_VIRTUAL
+		    && ctx->num_to_drop_vcol > 0 && !new_clustered) {
+			innodb_v_adjust_idx_col(ha_alter_info, old_table,
+						ctx->num_to_drop_vcol,
+						&index_defs[a]);
+		}
+#endif /* MYSQL_VIRTUAL_COLUMNS */
+
 		ctx->add_index[a] = row_merge_create_index(
 			ctx->trx, ctx->new_table,
-			&index_defs[a], ctx->col_names);
+			&index_defs[a], add_v, ctx->col_names);
 
 		add_key_nums[a] = index_defs[a].key_number;
 
@@ -3116,6 +5043,9 @@ prepare_inplace_alter_table_dict(
 			goto error_handling;
 		}
 
+		DBUG_ASSERT(ctx->add_index[a]->is_committed()
+			    == !!new_clustered);
+
 		if (ctx->add_index[a]->type & DICT_FTS) {
 			DBUG_ASSERT(num_fts_index);
 			DBUG_ASSERT(!fts_index);
@@ -3145,8 +5075,8 @@ prepare_inplace_alter_table_dict(
 			rw_lock_x_lock(&ctx->add_index[a]->lock);
 
 			bool ok = row_log_allocate(ctx->add_index[a],
-						   NULL, true, NULL,
-						   NULL, path);
+						   NULL, true, NULL, NULL,
+						   path);
 			rw_lock_x_unlock(&ctx->add_index[a]->lock);
 
 			if (!ok) {
@@ -3162,22 +5092,31 @@ prepare_inplace_alter_table_dict(
 			error = DB_OUT_OF_MEMORY;
 			goto error_handling;);
 
-	if (new_clustered && ctx->online) {
-		/* Allocate a log for online table rebuild. */
-		dict_index_t* clust_index = dict_table_get_first_index(
+	if (new_clustered) {
+		dict_index_t*	clust_index = dict_table_get_first_index(
 			user_table);
+		dict_index_t*	new_clust_index = dict_table_get_first_index(
+			ctx->new_table);
+		ctx->skip_pk_sort = innobase_pk_order_preserved(
+			ctx->col_map, clust_index, new_clust_index);
 
-		rw_lock_x_lock(&clust_index->lock);
-		bool ok = row_log_allocate(
-			clust_index, ctx->new_table,
-			!(ha_alter_info->handler_flags
-			  & Alter_inplace_info::ADD_PK_INDEX),
-			ctx->add_cols, ctx->col_map, path);
-		rw_lock_x_unlock(&clust_index->lock);
+		DBUG_EXECUTE_IF("innodb_alter_table_pk_assert_no_sort",
+			DBUG_ASSERT(ctx->skip_pk_sort););
 
-		if (!ok) {
-			error = DB_OUT_OF_MEMORY;
-			goto error_handling;
+		if (ctx->online) {
+			/* Allocate a log for online table rebuild. */
+			rw_lock_x_lock(&clust_index->lock);
+			bool ok = row_log_allocate(
+				clust_index, ctx->new_table,
+				!(ha_alter_info->handler_flags
+				  & Alter_inplace_info::ADD_PK_INDEX),
+				ctx->add_cols, ctx->col_map, path);
+			rw_lock_x_unlock(&clust_index->lock);
+
+			if (!ok) {
+				error = DB_OUT_OF_MEMORY;
+				goto error_handling;
+			}
 		}
 	}
 
@@ -3205,11 +5144,17 @@ op_ok:
 #endif /* UNIV_DEBUG */
 		ut_ad(ctx->trx->dict_operation_lock_mode == RW_X_LATCH);
 		ut_ad(mutex_own(&dict_sys->mutex));
-#ifdef UNIV_SYNC_DEBUG
-		ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+		ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
 
 		DICT_TF2_FLAG_SET(ctx->new_table, DICT_TF2_FTS);
+		if (new_clustered) {
+			/* For !new_clustered, this will be set at
+			commit_cache_norebuild(). */
+			ctx->new_table->fts_doc_id_index
+				= dict_table_get_index_on_name(
+					ctx->new_table, FTS_DOC_ID_INDEX_NAME);
+			DBUG_ASSERT(ctx->new_table->fts_doc_id_index != NULL);
+		}
 
 		/* This function will commit the transaction and reset
 		the trx_t::dict_operation flag on success. */
@@ -3230,7 +5175,7 @@ op_ok:
 		    || ib_vector_size(ctx->new_table->fts->indexes) == 0) {
 			error = fts_create_common_tables(
 				ctx->trx, ctx->new_table,
-				user_table->name, TRUE);
+				user_table->name.m_name, TRUE);
 
 			DBUG_EXECUTE_IF(
 				"innodb_test_fail_after_fts_common_table",
@@ -3292,6 +5237,9 @@ error_handling:
 	case DB_DUPLICATE_KEY:
 		my_error(ER_DUP_KEY, MYF(0), "SYS_INDEXES");
 		break;
+	case DB_UNSUPPORTED:
+		my_error(ER_TABLE_CANT_HANDLE_SPKEYS, MYF(0), "SYS_COLUMNS");
+		break;
 	default:
 		my_error_innodb(error, table_name, user_table->flags);
 	}
@@ -3314,17 +5262,7 @@ error_handled:
 					ctx->new_table, ctx->trx);
 			}
 
-			dict_table_close(ctx->new_table, TRUE, FALSE);
-
-#if defined UNIV_DEBUG || defined UNIV_DDL_DEBUG
-			/* Nobody should have initialized the stats of the
-			newly created table yet. When this is the case, we
-			know that it has not been added for background stats
-			gathering. */
-			ut_a(!ctx->new_table->stat_initialized);
-#endif /* UNIV_DEBUG || UNIV_DDL_DEBUG */
-
-			row_merge_drop_table(ctx->trx, ctx->new_table);
+			dict_table_close_and_drop(ctx->trx, ctx->new_table);
 
 			/* Free the log for online table rebuild, if
 			one was allocated. */
@@ -3348,7 +5286,7 @@ error_handled:
 		/* n_ref_count must be 1, because purge cannot
 		be executing on this very table as we are
 		holding dict_operation_lock X-latch. */
-		DBUG_ASSERT(user_table->n_ref_count == 1 || ctx->online);
+		DBUG_ASSERT(user_table->get_ref_count() == 1 || ctx->online);
 
 		online_retry_drop_indexes_with_trx(user_table, ctx->trx);
 	} else {
@@ -3361,12 +5299,14 @@ error_handled:
 	ut_ad(!user_table->drop_aborted);
 
 err_exit:
+#ifdef UNIV_DEBUG
 	/* Clear the to_be_dropped flag in the data dictionary cache. */
 	for (ulint i = 0; i < ctx->num_to_drop_index; i++) {
-		DBUG_ASSERT(*ctx->drop_index[i]->name != TEMP_INDEX_PREFIX);
+		DBUG_ASSERT(ctx->drop_index[i]->is_committed());
 		DBUG_ASSERT(ctx->drop_index[i]->to_be_dropped);
 		ctx->drop_index[i]->to_be_dropped = 0;
 	}
+#endif /* UNIV_DEBUG */
 
 	row_mysql_unlock_data_dictionary(ctx->trx);
 
@@ -3473,19 +5413,257 @@ innobase_check_foreign_key_index(
 	return(false);
 }
 
+#ifdef MYSQL_RENAME_INDEX
+/**
+Rename a given index in the InnoDB data dictionary.
+
+@param index index to rename
+@param new_name new name of the index
+@param[in,out] trx dict transaction to use, not going to be committed here
+
+@retval true Failure
+@retval false Success */
+static MY_ATTRIBUTE((warn_unused_result))
+bool
+rename_index_in_data_dictionary(
+/*============================*/
+	const dict_index_t*	index,
+	const char*		new_name,
+	trx_t*			trx)
+{
+	DBUG_ENTER("rename_index_in_data_dictionary");
+
+	ut_ad(mutex_own(&dict_sys->mutex));
+	ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
+	ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH);
+
+	pars_info_t*	pinfo;
+	dberr_t		err;
+
+	pinfo = pars_info_create();
+
+	pars_info_add_ull_literal(pinfo, "table_id", index->table->id);
+	pars_info_add_ull_literal(pinfo, "index_id", index->id);
+	pars_info_add_str_literal(pinfo, "new_name", new_name);
+
+	trx->op_info = "Renaming an index in SYS_INDEXES";
+
+	DBUG_EXECUTE_IF(
+		"ib_rename_index_fail1",
+		DBUG_SET("+d,innodb_report_deadlock");
+	);
+
+	err = que_eval_sql(
+		pinfo,
+		"PROCEDURE RENAME_INDEX_IN_SYS_INDEXES () IS\n"
+		"BEGIN\n"
+		"UPDATE SYS_INDEXES SET\n"
+		"NAME = :new_name\n"
+		"WHERE\n"
+		"ID = :index_id AND\n"
+		"TABLE_ID = :table_id;\n"
+		"END;\n",
+		FALSE, trx); /* pinfo is freed by que_eval_sql() */
+
+	DBUG_EXECUTE_IF(
+		"ib_rename_index_fail1",
+		DBUG_SET("-d,innodb_report_deadlock");
+	);
+
+	trx->op_info = "";
+
+	if (err != DB_SUCCESS) {
+		my_error_innodb(err, index->table->name.m_name, 0);
+		DBUG_RETURN(true);
+	}
+
+	DBUG_RETURN(false);
+}
+
+/**
+Rename all indexes in data dictionary of a given table that are
+specified in ha_alter_info.
+
+@param ctx alter context, used to fetch the list of indexes to
+rename
+@param ha_alter_info fetch the new names from here
+@param[in,out] trx dict transaction to use, not going to be committed here
+
+@retval true Failure
+@retval false Success */
+static MY_ATTRIBUTE((warn_unused_result))
+bool
+rename_indexes_in_data_dictionary(
+/*==============================*/
+	const ha_innobase_inplace_ctx*	ctx,
+	const Alter_inplace_info*	ha_alter_info,
+	trx_t*				trx)
+{
+	DBUG_ENTER("rename_indexes_in_data_dictionary");
+
+	ut_ad(ctx->num_to_rename == ha_alter_info->index_rename_count);
+
+	for (ulint i = 0; i < ctx->num_to_rename; i++) {
+
+		KEY_PAIR*	pair = &ha_alter_info->index_rename_buffer[i];
+		dict_index_t*	index;
+
+		index = ctx->rename[i];
+
+		ut_ad(strcmp(index->name, pair->old_key->name) == 0);
+
+		if (rename_index_in_data_dictionary(index,
+						    pair->new_key->name,
+						    trx)) {
+			/* failed */
+			DBUG_RETURN(true);
+		}
+	}
+
+	DBUG_RETURN(false);
+}
+
+/**
+Rename a given index in the InnoDB data dictionary cache.
+
+@param[in,out] index index to rename
+@param new_name new index name
+*/
+static
+void
+rename_index_in_cache(
+/*==================*/
+	dict_index_t*	index,
+	const char*	new_name)
+{
+	DBUG_ENTER("rename_index_in_cache");
+
+	ut_ad(mutex_own(&dict_sys->mutex));
+	ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
+
+	size_t	old_name_len = strlen(index->name);
+	size_t	new_name_len = strlen(new_name);
+
+	if (old_name_len >= new_name_len) {
+		/* reuse the old buffer for the name if it is large enough */
+		memcpy(const_cast<char*>(index->name()), new_name,
+		       new_name_len + 1);
+	} else {
+		/* Free the old chunk of memory if it is at the topmost
+		place in the heap, otherwise the old chunk will be freed
+		when the index is evicted from the cache. This code will
+		kick-in in a repeated ALTER sequences where the old name is
+		alternately longer/shorter than the new name:
+		1. ALTER TABLE t RENAME INDEX a TO aa;
+		2. ALTER TABLE t RENAME INDEX aa TO a;
+		3. go to 1. */
+		index->name = mem_heap_strdup_replace(
+			index->heap,
+			/* Presumed topmost element of the heap: */
+			index->name, old_name_len + 1,
+			new_name);
+	}
+
+	DBUG_VOID_RETURN;
+}
+
+#
+/**
+Rename all indexes in data dictionary cache of a given table that are
+specified in ha_alter_info.
+
+@param ctx alter context, used to fetch the list of indexes to rename
+@param ha_alter_info fetch the new names from here
+*/
+static
+void
+rename_indexes_in_cache(
+/*====================*/
+	const ha_innobase_inplace_ctx*	ctx,
+	const Alter_inplace_info*	ha_alter_info)
+{
+	DBUG_ENTER("rename_indexes_in_cache");
+
+	ut_ad(ctx->num_to_rename == ha_alter_info->index_rename_count);
+
+	for (ulint i = 0; i < ctx->num_to_rename; i++) {
+		KEY_PAIR*	pair = &ha_alter_info->index_rename_buffer[i];
+		dict_index_t*	index;
+
+		index = ctx->rename[i];
+
+		ut_ad(strcmp(index->name, pair->old_key->name) == 0);
+
+		rename_index_in_cache(index, pair->new_key->name);
+	}
+
+	DBUG_VOID_RETURN;
+}
+#endif /* MYSQL_RENAME_INDEX */
+
+#ifdef MYSQL_VIRTUAL_COLUMNS
+/** Fill the stored column information in s_cols list.
+@param[in]	altered_table	mysql table object
+@param[in]	table		innodb table object
+@param[out]	s_cols		list of stored column
+@param[out]	s_heap		heap for storing stored
+column information. */
+static
+void
+alter_fill_stored_column(
+	const TABLE*		altered_table,
+	dict_table_t*		table,
+	dict_s_col_list**	s_cols,
+	mem_heap_t**		s_heap)
+{
+	ulint   n_cols = altered_table->s->fields;
+
+	for (ulint i = 0; i < n_cols; i++) {
+		Field* field = altered_table->field[i];
+		dict_s_col_t	s_col;
+
+		if (!innobase_is_s_fld(field)) {
+			continue;
+		}
+
+		ulint	num_base = field->gcol_info->non_virtual_base_columns();
+		dict_col_t*	col = dict_table_get_nth_col(table, i);
+
+		s_col.m_col = col;
+		s_col.s_pos = i;
+
+		if (*s_cols == NULL) {
+			*s_cols = UT_NEW_NOKEY(dict_s_col_list());
+			*s_heap = mem_heap_create(1000);
+		}
+
+		if (num_base != 0) {
+			s_col.base_col = static_cast<dict_col_t**>(mem_heap_zalloc(
+						*s_heap, num_base * sizeof(dict_col_t*)));
+		} else {
+			s_col.base_col = NULL;
+		}
+
+		s_col.num_base = num_base;
+		innodb_base_col_setup_for_stored(table, field, &s_col);
+		(*s_cols)->push_back(s_col);
+	}
+}
+#endif /* MYSQL_VIRTUAL_COLUMNS */
+
 /** Allows InnoDB to update internal structures with concurrent
 writes blocked (provided that check_if_supported_inplace_alter()
 did not return HA_ALTER_INPLACE_NO_LOCK).
 This will be invoked before inplace_alter_table().
 
-@param altered_table	TABLE object for new version of table.
-@param ha_alter_info	Structure describing changes to be done
+@param altered_table TABLE object for new version of table.
+@param ha_alter_info Structure describing changes to be done
 by ALTER TABLE and holding data used during in-place alter.
 
-@retval true		Failure
-@retval false		Success
+@retval true Failure
+@retval false Success
 */
-UNIV_INTERN
+
 bool
 ha_innobase::prepare_inplace_alter_table(
 /*=====================================*/
@@ -3494,16 +5672,16 @@ ha_innobase::prepare_inplace_alter_table(
 {
 	dict_index_t**	drop_index;	/*!< Index to be dropped */
 	ulint		n_drop_index;	/*!< Number of indexes to drop */
+	dict_index_t**	rename_index;	/*!< Indexes to be dropped */
+	ulint		n_rename_index;	/*!< Number of indexes to rename */
 	dict_foreign_t**drop_fk;	/*!< Foreign key constraints to drop */
 	ulint		n_drop_fk;	/*!< Number of foreign keys to drop */
 	dict_foreign_t**add_fk = NULL;	/*!< Foreign key constraints to drop */
 	ulint		n_add_fk;	/*!< Number of foreign keys to drop */
 	dict_table_t*	indexed_table;	/*!< Table where indexes are created */
-	mem_heap_t*     heap;
+	mem_heap_t*	heap;
 	const char**	col_names;
 	int		error;
-	ulint		flags;
-	ulint		flags2;
 	ulint		max_col_len;
 	ulint		add_autoinc_col_no	= ULINT_UNDEFINED;
 	ulonglong	autoinc_col_max_value	= 0;
@@ -3511,6 +5689,8 @@ ha_innobase::prepare_inplace_alter_table(
 	bool		add_fts_doc_id		= false;
 	bool		add_fts_doc_id_idx	= false;
 	bool		add_fts_idx		= false;
+	dict_s_col_list*s_cols			= NULL;
+	mem_heap_t*	s_heap			= NULL;
 
 	DBUG_ENTER("prepare_inplace_alter_table");
 	DBUG_ASSERT(!ha_alter_info->handler_ctx);
@@ -3525,7 +5705,7 @@ ha_innobase::prepare_inplace_alter_table(
 	MONITOR_ATOMIC_INC(MONITOR_PENDING_ALTER_TABLE);
 
 #ifdef UNIV_DEBUG
-	for (dict_index_t* index = dict_table_get_first_index(prebuilt->table);
+	for (dict_index_t* index = dict_table_get_first_index(m_prebuilt->table);
 	     index;
 	     index = dict_table_get_next_index(index)) {
 		ut_ad(!index->to_be_dropped);
@@ -3534,31 +5714,94 @@ ha_innobase::prepare_inplace_alter_table(
 
 	ut_d(mutex_enter(&dict_sys->mutex));
 	ut_d(dict_table_check_for_dup_indexes(
-		     prebuilt->table, CHECK_ABORTED_OK));
+		     m_prebuilt->table, CHECK_ABORTED_OK));
 	ut_d(mutex_exit(&dict_sys->mutex));
 
 	if (!(ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE)) {
 		/* Nothing to do */
-		goto func_exit;
+		DBUG_ASSERT(m_prebuilt->trx->dict_operation_lock_mode == 0);
+		if (ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE) {
+
+			online_retry_drop_indexes(
+				m_prebuilt->table, m_user_thd);
+
+		}
+		DBUG_RETURN(false);
 	}
 
+	indexed_table = m_prebuilt->table;
+
+	if (indexed_table->is_encrypted) {
+		String str;
+		const char* engine= table_type();
+		push_warning_printf(m_user_thd, Sql_condition::WARN_LEVEL_WARN,
+			HA_ERR_DECRYPTION_FAILED,
+			"Table %s is encrypted but encryption service or"
+			" used key_id is not available. "
+			" Can't continue reading table.",
+			indexed_table->name);
+		get_error_message(HA_ERR_DECRYPTION_FAILED, &str);
+		my_error(ER_GET_ERRMSG, MYF(0), HA_ERR_DECRYPTION_FAILED, str.c_ptr(), engine);
+
+		DBUG_RETURN(true);
+	}
+
+	if (indexed_table->corrupted
+	    || dict_table_get_first_index(indexed_table) == NULL
+	    || dict_index_is_corrupted(
+		    dict_table_get_first_index(indexed_table))) {
+		/* The clustered index is corrupted. */
+		my_error(ER_CHECK_NO_SUCH_TABLE, MYF(0));
+		DBUG_RETURN(true);
+	}
+
+	/* ALTER TABLE will not implicitly move a table from a single-table
+	tablespace to the system tablespace when innodb_file_per_table=OFF.
+	But it will implicitly move a table from the system tablespace to a
+	single-table tablespace if innodb_file_per_table = ON.
+	Tables found in a general tablespace will stay there unless ALTER
+	TABLE contains another TABLESPACE=name.  If that is found it will
+	explicitly move a table to the named tablespace.
+	So if you specify TABLESPACE=`innodb_system` a table can be moved
+	into the system tablespace from either a general or file-per-table
+	tablespace. But from then on, it is labeled as using a shared space
+	(the create options have tablespace=='innodb_system' and the
+	SHARED_SPACE flag is set in the table flags) so it can no longer be
+	implicitly moved to a file-per-table tablespace. */
+	bool	in_system_space = is_system_tablespace(indexed_table->space);
+	bool	is_file_per_table = !in_system_space
+			&& !DICT_TF_HAS_SHARED_SPACE(indexed_table->flags);
+#ifdef UNIV_DEBUG
+	bool	in_general_space = !in_system_space
+			&& DICT_TF_HAS_SHARED_SPACE(indexed_table->flags);
+
+	/* The table being altered can only be in a system tablespace,
+	or its own file-per-table tablespace, or a general tablespace. */
+	ut_ad(1 == in_system_space + is_file_per_table + in_general_space);
+#endif /* UNIV_DEBUG */
+
+	create_table_info_t	info(m_user_thd,
+				     altered_table,
+				     ha_alter_info->create_info,
+				     NULL,
+				     NULL,
+				     NULL,
+				     NULL);
+
+	info.set_tablespace_type(is_file_per_table);
+
 	if (ha_alter_info->handler_flags
 	    & Alter_inplace_info::CHANGE_CREATE_OPTION) {
+		const char* invalid_opt = info.create_options_are_invalid();
+
 		/* Check engine specific table options */
-		if (const char* invalid_tbopt = check_table_options(
-				user_thd, altered_table,
-				ha_alter_info->create_info,
-				prebuilt->table->space != 0,
-				srv_file_format)) {
+		if (const char* invalid_tbopt = info.check_table_options()) {
 			my_error(ER_ILLEGAL_HA_CREATE_OPTION, MYF(0),
 				 table_type(), invalid_tbopt);
 			goto err_exit_no_heap;
 		}
 
-		if (const char* invalid_opt = create_options_are_invalid(
-			    user_thd, altered_table,
-			    ha_alter_info->create_info,
-			    prebuilt->table->space != 0)) {
+		if (invalid_opt) {
 			my_error(ER_ILLEGAL_HA_CREATE_OPTION, MYF(0),
 				 table_type(), invalid_opt);
 			goto err_exit_no_heap;
@@ -3567,18 +5810,20 @@ ha_innobase::prepare_inplace_alter_table(
 
 	/* Check if any index name is reserved. */
 	if (innobase_index_name_is_reserved(
-		    user_thd,
+		    m_user_thd,
 		    ha_alter_info->key_info_buffer,
 		    ha_alter_info->key_count)) {
 err_exit_no_heap:
-		DBUG_ASSERT(prebuilt->trx->dict_operation_lock_mode == 0);
+		DBUG_ASSERT(m_prebuilt->trx->dict_operation_lock_mode == 0);
 		if (ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE) {
-			online_retry_drop_indexes(prebuilt->table, user_thd);
+
+			online_retry_drop_indexes(
+				m_prebuilt->table, m_user_thd);
 		}
 		DBUG_RETURN(true);
 	}
 
-	indexed_table = prebuilt->table;
+	indexed_table = m_prebuilt->table;
 
 	/* Check that index keys are sensible */
 	error = innobase_check_index_keys(ha_alter_info, indexed_table);
@@ -3623,16 +5868,18 @@ check_if_ok_to_rename:
 			}
 
 			/* Prohibit renaming a column to an internal column. */
-			const char*	s = prebuilt->table->col_names;
+			const char*	s = m_prebuilt->table->col_names;
 			unsigned j;
 			/* Skip user columns.
 			MySQL should have checked these already.
 			We want to allow renaming of c1 to c2, c2 to c1. */
 			for (j = 0; j < table->s->fields; j++) {
-				s += strlen(s) + 1;
+				if (!innobase_is_v_fld(table->field[j])) {
+					s += strlen(s) + 1;
+				}
 			}
 
-			for (; j < prebuilt->table->n_def; j++) {
+			for (; j < m_prebuilt->table->n_def; j++) {
 				if (!my_strcasecmp(
 					    system_charset_info, name, s)) {
 					my_error(ER_WRONG_COLUMN_NAME, MYF(0),
@@ -3645,16 +5892,11 @@ check_if_ok_to_rename:
 		}
 	}
 
-	if (!innobase_table_flags(altered_table,
-				  ha_alter_info->create_info,
-				  user_thd,
-				  srv_file_per_table
-				  || indexed_table->space != 0,
-				  &flags, &flags2)) {
+	if (!info.innobase_table_flags()) {
 		goto err_exit_no_heap;
 	}
 
-	max_col_len = DICT_MAX_FIELD_LEN_BY_FORMAT_FLAG(flags);
+	max_col_len = DICT_MAX_FIELD_LEN_BY_FORMAT_FLAG(info.flags());
 
 	/* Check each index's column length to make sure they do not
 	exceed limit */
@@ -3706,11 +5948,11 @@ check_if_ok_to_rename:
 			/* We need to drop any corrupted fts indexes
 			before we add a new fts index. */
 			if (add_fts_idx && index->type & DICT_CORRUPT) {
-				ib_errf(user_thd, IB_LOG_LEVEL_ERROR,
+				ib_errf(m_user_thd, IB_LOG_LEVEL_ERROR,
 					ER_INNODB_INDEX_CORRUPT,
 					"Fulltext index '%s' is corrupt. "
 					"you should drop this index first.",
-					index->name);
+					index->name());
 
 				goto err_exit_no_heap;
 			}
@@ -3768,8 +6010,8 @@ check_if_ok_to_rename:
 			}
 
 			for (dict_foreign_set::iterator it
-				= prebuilt->table->foreign_set.begin();
-			     it != prebuilt->table->foreign_set.end();
+				= m_prebuilt->table->foreign_set.begin();
+			     it != m_prebuilt->table->foreign_set.end();
 			     ++it) {
 
 				dict_foreign_t*	foreign = *it;
@@ -3789,13 +6031,14 @@ check_if_ok_to_rename:
 			}
 
 			my_error(ER_CANT_DROP_FIELD_OR_KEY, MYF(0),
-				 drop->type_name(), drop->name);
+				drop->type_name(), drop->name);
 			goto err_exit;
 found_fk:
 			continue;
 		}
 
 		DBUG_ASSERT(n_drop_fk > 0);
+
 		DBUG_ASSERT(n_drop_fk
 			    == ha_alter_info->alter_info->drop_list.elements);
 	} else {
@@ -3819,16 +6062,16 @@ found_fk:
 			const KEY*	key
 				= ha_alter_info->index_drop_buffer[i];
 			dict_index_t*	index
-				= dict_table_get_index_on_name_and_min_id(
+				= dict_table_get_index_on_name(
 					indexed_table, key->name);
 
 			if (!index) {
 				push_warning_printf(
-					user_thd,
+					m_user_thd,
 					Sql_condition::WARN_LEVEL_WARN,
 					HA_ERR_WRONG_INDEX,
-					"InnoDB could not find key "
-					"with name %s", key->name);
+					"InnoDB could not find key"
+					" with name %s", key->name);
 			} else {
 				ut_ad(!index->to_be_dropped);
 				if (!dict_index_is_clust(index)) {
@@ -3848,8 +6091,8 @@ found_fk:
 		    && !DICT_TF2_FLAG_IS_SET(
 			indexed_table, DICT_TF2_FTS_HAS_DOC_ID)) {
 			dict_index_t*	fts_doc_index
-				= dict_table_get_index_on_name(
-					indexed_table, FTS_DOC_ID_INDEX_NAME);
+				= indexed_table->fts_doc_id_index;
+			ut_ad(fts_doc_index);
 
 			// Add some fault tolerance for non-debug builds.
 			if (fts_doc_index == NULL) {
@@ -3879,7 +6122,7 @@ check_if_can_drop_indexes:
 
 		/* Prevent a race condition between DROP INDEX and
 		CREATE TABLE adding FOREIGN KEY constraints. */
-		row_mysql_lock_data_dictionary(prebuilt->trx);
+		row_mysql_lock_data_dictionary(m_prebuilt->trx);
 
 		if (!n_drop_index) {
 			drop_index = NULL;
@@ -3891,19 +6134,19 @@ check_if_can_drop_indexes:
 			}
 		}
 
-		if (prebuilt->trx->check_foreigns) {
+		if (m_prebuilt->trx->check_foreigns) {
 			for (uint i = 0; i < n_drop_index; i++) {
-			     dict_index_t*	index = drop_index[i];
+				dict_index_t*	index = drop_index[i];
 
 				if (innobase_check_foreign_key_index(
-					ha_alter_info, index,
-					indexed_table, col_names,
-					prebuilt->trx, drop_fk, n_drop_fk)) {
+						ha_alter_info, index,
+						indexed_table, col_names,
+						m_prebuilt->trx, drop_fk, n_drop_fk)) {
 					row_mysql_unlock_data_dictionary(
-						prebuilt->trx);
-					prebuilt->trx->error_info = index;
+						m_prebuilt->trx);
+					m_prebuilt->trx->error_info = index;
 					print_error(HA_ERR_DROP_INDEX_FK,
-						    MYF(0));
+						MYF(0));
 					goto err_exit;
 				}
 			}
@@ -3911,17 +6154,17 @@ check_if_can_drop_indexes:
 			/* If a primary index is dropped, need to check
 			any depending foreign constraints get affected */
 			if (drop_primary
-			    && innobase_check_foreign_key_index(
-				ha_alter_info, drop_primary,
-				indexed_table, col_names,
-				prebuilt->trx, drop_fk, n_drop_fk)) {
-				row_mysql_unlock_data_dictionary(prebuilt->trx);
+				&& innobase_check_foreign_key_index(
+					ha_alter_info, drop_primary,
+					indexed_table, col_names,
+					m_prebuilt->trx, drop_fk, n_drop_fk)) {
+				row_mysql_unlock_data_dictionary(m_prebuilt->trx);
 				print_error(HA_ERR_DROP_INDEX_FK, MYF(0));
 				goto err_exit;
 			}
 		}
 
-		row_mysql_unlock_data_dictionary(prebuilt->trx);
+		row_mysql_unlock_data_dictionary(m_prebuilt->trx);
 	} else {
 		drop_index = NULL;
 	}
@@ -3933,23 +6176,62 @@ check_if_can_drop_indexes:
 		     index != NULL; index = dict_table_get_next_index(index)) {
 
 			if (!index->to_be_dropped && dict_index_is_corrupted(index)) {
-				char	index_name[MAX_FULL_NAME_LEN + 1];
-
-				innobase_format_name(index_name, sizeof index_name,
-					index->name, TRUE);
-
-				my_error(ER_INDEX_CORRUPT, MYF(0), index_name);
+				my_error(ER_INDEX_CORRUPT, MYF(0), index->name());
 				DBUG_RETURN(true);
 			}
 		}
 	}
 
+	n_rename_index = 0;
+	rename_index = NULL;
+
+#ifdef MYSQL_RENAME_INDEX
+
+	n_rename_index = ha_alter_info->index_rename_count;
+
+	/* Create a list of dict_index_t objects that are to be renamed,
+	also checking for requests to rename nonexistent indexes. If
+	the table is going to be rebuilt (new_clustered == true in
+	prepare_inplace_alter_table_dict()), then this can be skipped,
+	but we don't for simplicity (we have not determined the value of
+	new_clustered yet). */
+	if (n_rename_index > 0) {
+		rename_index = static_cast<dict_index_t**>(
+			mem_heap_alloc(
+				heap,
+				n_rename_index * sizeof(*rename_index)));
+		for (ulint i = 0; i < n_rename_index; i++) {
+			dict_index_t*	index = NULL;
+			const char*	old_name = NULL;
+
+			const char*	old_name = ha_alter_info
+				->index_rename_buffer[i].old_key->name;
+
+			index = dict_table_get_index_on_name(indexed_table,
+							     old_name);
+
+			if (index == NULL) {
+				my_error(ER_KEY_DOES_NOT_EXITS, MYF(0),
+					 old_name,
+					 m_prebuilt->table->name.m_name);
+				goto err_exit;
+			}
+
+			rename_index[i] = index;
+		}
+	}
+#endif /* MYSQL_RENAME_INDEX */
+
 	n_add_fk = 0;
 
 	if (ha_alter_info->handler_flags
 	    & Alter_inplace_info::ADD_FOREIGN_KEY) {
-		ut_ad(!prebuilt->trx->check_foreigns);
+		ut_ad(!m_prebuilt->trx->check_foreigns);
 
+#ifdef MYSQL_VIRTUAL_COLUMNS
+		alter_fill_stored_column(altered_table, m_prebuilt->table,
+					 &s_cols, &s_heap);
+#endif
 		add_fk = static_cast<dict_foreign_t**>(
 			mem_heap_zalloc(
 				heap,
@@ -3958,30 +6240,40 @@ check_if_can_drop_indexes:
 
 		if (!innobase_get_foreign_key_info(
 			    ha_alter_info, table_share,
-			    prebuilt->table, col_names,
+			    m_prebuilt->table, col_names,
 			    drop_index, n_drop_index,
-			    add_fk, &n_add_fk, prebuilt->trx)) {
+			    add_fk, &n_add_fk, m_prebuilt->trx, s_cols)) {
 err_exit:
 			if (n_drop_index) {
-				row_mysql_lock_data_dictionary(prebuilt->trx);
+				row_mysql_lock_data_dictionary(m_prebuilt->trx);
 
 				/* Clear the to_be_dropped flags, which might
 				have been set at this point. */
 				for (ulint i = 0; i < n_drop_index; i++) {
-					DBUG_ASSERT(*drop_index[i]->name
-						    != TEMP_INDEX_PREFIX);
+					ut_ad(drop_index[i]->is_committed());
 					drop_index[i]->to_be_dropped = 0;
 				}
 
-				row_mysql_unlock_data_dictionary(prebuilt->trx);
+				row_mysql_unlock_data_dictionary(
+					m_prebuilt->trx);
 			}
 
 			if (heap) {
 				mem_heap_free(heap);
 			}
 
+			if (s_cols != NULL) {
+				UT_DELETE(s_cols);
+				mem_heap_free(s_heap);
+			}
+
 			goto err_exit_no_heap;
 		}
+
+		if (s_cols != NULL) {
+			UT_DELETE(s_cols);
+			mem_heap_free(s_heap);
+		}
 	}
 
 	if (!(ha_alter_info->handler_flags & INNOBASE_ALTER_DATA)
@@ -3992,20 +6284,40 @@ err_exit:
 		if (heap) {
 			ha_alter_info->handler_ctx
 				= new ha_innobase_inplace_ctx(
-					prebuilt,
+					(*m_prebuilt_ptr),
 					drop_index, n_drop_index,
+					rename_index, n_rename_index,
 					drop_fk, n_drop_fk,
 					add_fk, n_add_fk,
 					ha_alter_info->online,
 					heap, indexed_table,
-					col_names, ULINT_UNDEFINED, 0, 0);
+					col_names, ULINT_UNDEFINED, 0, 0, 0);
 		}
 
-func_exit:
-		DBUG_ASSERT(prebuilt->trx->dict_operation_lock_mode == 0);
+		DBUG_ASSERT(m_prebuilt->trx->dict_operation_lock_mode == 0);
 		if (ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE) {
-			online_retry_drop_indexes(prebuilt->table, user_thd);
+
+			online_retry_drop_indexes(
+				m_prebuilt->table, m_user_thd);
+
 		}
+
+#ifdef MYSQL_VIRTUAL_COLUMNS
+		if ((ha_alter_info->handler_flags
+		     & Alter_inplace_info::DROP_VIRTUAL_COLUMN)
+		    && prepare_inplace_drop_virtual(
+			    ha_alter_info, altered_table, table)) {
+			DBUG_RETURN(true);
+		}
+
+		if ((ha_alter_info->handler_flags
+		     & Alter_inplace_info::ADD_VIRTUAL_COLUMN)
+		    && prepare_inplace_add_virtual(
+			    ha_alter_info, altered_table, table)) {
+			DBUG_RETURN(true);
+		}
+#endif /* MYSQL_VIRTUAL_COLUMNS */
+
 		DBUG_RETURN(false);
 	}
 
@@ -4014,25 +6326,29 @@ func_exit:
 	add a Doc ID hidden column and rebuild the primary index */
 	if (innobase_fulltext_exist(altered_table)) {
 		ulint	doc_col_no;
+		ulint	num_v = 0;
 
 		if (!innobase_fts_check_doc_id_col(
-			    prebuilt->table, altered_table, &fts_doc_col_no)) {
+			    m_prebuilt->table,
+			    altered_table, &fts_doc_col_no, &num_v)) {
+
 			fts_doc_col_no = altered_table->s->stored_fields;
 			add_fts_doc_id = true;
 			add_fts_doc_id_idx = true;
 
 			push_warning_printf(
-				user_thd,
+				m_user_thd,
 				Sql_condition::WARN_LEVEL_WARN,
 				HA_ERR_WRONG_INDEX,
-				"InnoDB rebuilding table to add column "
-				FTS_DOC_ID_COL_NAME);
+				"InnoDB rebuilding table to add"
+				" column " FTS_DOC_ID_COL_NAME);
 		} else if (fts_doc_col_no == ULINT_UNDEFINED) {
 			goto err_exit;
 		}
 
 		switch (innobase_fts_check_doc_id_index(
-				prebuilt->table, altered_table, &doc_col_no)) {
+				m_prebuilt->table, altered_table,
+				&doc_col_no)) {
 		case FTS_NOT_EXIST_DOC_ID_INDEX:
 			add_fts_doc_id_idx = true;
 			break;
@@ -4041,32 +6357,39 @@ func_exit:
 				 FTS_DOC_ID_INDEX_NAME);
 			goto err_exit;
 		case FTS_EXIST_DOC_ID_INDEX:
-			DBUG_ASSERT(doc_col_no == fts_doc_col_no
-				    || doc_col_no == ULINT_UNDEFINED
-				    || (ha_alter_info->handler_flags
-					& (Alter_inplace_info::ALTER_COLUMN_ORDER
-					   | Alter_inplace_info::DROP_COLUMN
-					   | Alter_inplace_info::ADD_COLUMN)));
+			DBUG_ASSERT(
+				doc_col_no == fts_doc_col_no
+				|| doc_col_no == ULINT_UNDEFINED
+				|| (ha_alter_info->handler_flags));
+			/* JAN: TODO: MySQL 5.7 Virtual columns
+				    & (Alter_inplace_info::ALTER_STORED_COLUMN_ORDER
+				       | Alter_inplace_info::DROP_STORED_COLUMN
+				       |
+				    Alter_inplace_info::ADD_STORED_COLUMN)));
+			*/
 		}
 	}
 
 	/* See if an AUTO_INCREMENT column was added. */
 	uint i = 0, innodb_idx= 0;
+	ulint	num_v = 0;
 	List_iterator_fast<Create_field> cf_it(
 		ha_alter_info->alter_info->create_list);
 	while (const Create_field* new_field = cf_it++) {
 		const Field*	field;
-                if (!new_field->stored_in_db()) {
-                  i++;
-                  continue;
-                }
+		if (!new_field->stored_in_db()) {
+			i++;
+			continue;
+		}
 
 		DBUG_ASSERT(i < altered_table->s->fields);
 		DBUG_ASSERT(innodb_idx < altered_table->s->stored_fields);
 
 		for (uint old_i = 0; table->field[old_i]; old_i++) {
-                        if (!table->field[old_i]->stored_in_db())
-                          continue;
+			if (!table->field[old_i]->stored_in_db()) {
+				continue;
+			}
+
 			if (new_field->field == table->field[old_i]) {
 				goto found_col;
 			}
@@ -4092,70 +6415,146 @@ func_exit:
 			}
 			add_autoinc_col_no = innodb_idx;
 
-			autoinc_col_max_value = innobase_get_int_col_max_value(
-				field);
+			/* JAN: TODO: MySQL 5.7
+			autoinc_col_max_value =
+				field->get_max_int_value();
+			*/
+			autoinc_col_max_value = innobase_get_int_col_max_value(field);
 		}
 found_col:
+		if (innobase_is_v_fld(new_field)) {
+			++num_v;
+		}
+
 		i++;
-                innodb_idx++;
+		innodb_idx++;
 	}
 
 	DBUG_ASSERT(heap);
-	DBUG_ASSERT(user_thd == prebuilt->trx->mysql_thd);
+	DBUG_ASSERT(m_user_thd == m_prebuilt->trx->mysql_thd);
 	DBUG_ASSERT(!ha_alter_info->handler_ctx);
 
 	ha_alter_info->handler_ctx = new ha_innobase_inplace_ctx(
-		prebuilt,
+		(*m_prebuilt_ptr),
 		drop_index, n_drop_index,
+		rename_index, n_rename_index,
 		drop_fk, n_drop_fk, add_fk, n_add_fk,
 		ha_alter_info->online,
-		heap, prebuilt->table, col_names,
+		heap, m_prebuilt->table, col_names,
 		add_autoinc_col_no,
 		ha_alter_info->create_info->auto_increment_value,
-		autoinc_col_max_value);
+		autoinc_col_max_value, 0);
 
 	DBUG_RETURN(prepare_inplace_alter_table_dict(
 			    ha_alter_info, altered_table, table,
 			    table_share->table_name.str,
-			    flags, flags2,
+			    info.flags(), info.flags2(),
 			    fts_doc_col_no, add_fts_doc_id,
 			    add_fts_doc_id_idx));
 }
 
+/** Check that the column is part of a virtual index(index contains
+virtual column) in the table
+@param[in]	table		Table containing column
+@param[in]	col		column to be checked
+@return true if this column is indexed with other virtual columns */
+static
+bool
+dict_col_in_v_indexes(
+	dict_table_t*	table,
+	dict_col_t*	col)
+{
+	for (dict_index_t* index = dict_table_get_next_index(
+		dict_table_get_first_index(table)); index != NULL;
+		index = dict_table_get_next_index(index)) {
+		if (!dict_index_has_virtual(index)) {
+			continue;
+		}
+		for (ulint k = 0; k < index->n_fields; k++) {
+			dict_field_t*   field
+				= dict_index_get_nth_field(index, k);
+			if (field->col->ind == col->ind) {
+				return(true);
+			}
+		}
+	}
+
+	return(false);
+}
+
+/* Check whether a columnn length change alter operation requires
+to rebuild the template.
+@param[in]	altered_table	TABLE object for new version of table.
+@param[in]	ha_alter_info	Structure describing changes to be done
+				by ALTER TABLE and holding data used
+				during in-place alter.
+@param[in]	table		table being altered
+@return TRUE if needs rebuild. */
+static
+bool
+alter_templ_needs_rebuild(
+	TABLE*                  altered_table,
+	Alter_inplace_info*     ha_alter_info,
+	dict_table_t*		table)
+{
+        ulint	i = 0;
+        List_iterator_fast<Create_field>  cf_it(
+                ha_alter_info->alter_info->create_list);
+
+	for (Field** fp = altered_table->field; *fp; fp++, i++) {
+		cf_it.rewind();
+		while (const Create_field* cf = cf_it++) {
+			for (ulint j=0; j < table->n_cols; j++) {
+				dict_col_t* cols
+                                   = dict_table_get_nth_col(table, j);
+				if (cf->length > cols->len
+				    && dict_col_in_v_indexes(table, cols)) {
+					return(true);
+				}
+			}
+		}
+	}
+
+	return(false);
+}
+
+
 /** Alter the table structure in-place with operations
 specified using Alter_inplace_info.
 The level of concurrency allowed during this operation depends
 on the return value from check_if_supported_inplace_alter().
 
-@param altered_table	TABLE object for new version of table.
-@param ha_alter_info	Structure describing changes to be done
+@param altered_table TABLE object for new version of table.
+@param ha_alter_info Structure describing changes to be done
 by ALTER TABLE and holding data used during in-place alter.
 
-@retval true		Failure
-@retval false		Success
+@retval true Failure
+@retval false Success
 */
-UNIV_INTERN
+
 bool
 ha_innobase::inplace_alter_table(
 /*=============================*/
 	TABLE*			altered_table,
 	Alter_inplace_info*	ha_alter_info)
 {
-	dberr_t	error;
-
+	dberr_t			error;
+	dict_add_v_col_t*	add_v = NULL;
+	dict_vcol_templ_t*	s_templ = NULL;
+	dict_vcol_templ_t*	old_templ = NULL;
+	struct TABLE*		eval_table = altered_table;
+	bool			rebuild_templ = false;
 	DBUG_ENTER("inplace_alter_table");
 	DBUG_ASSERT(!srv_read_only_mode);
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(!rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-	ut_ad(!rw_lock_own(&dict_operation_lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(!rw_lock_own(dict_operation_lock, RW_LOCK_X));
+	ut_ad(!rw_lock_own(dict_operation_lock, RW_LOCK_S));
 
-	DEBUG_SYNC(user_thd, "innodb_inplace_alter_table_enter");
+	DEBUG_SYNC(m_user_thd, "innodb_inplace_alter_table_enter");
 
 	if (!(ha_alter_info->handler_flags & INNOBASE_ALTER_DATA)) {
 ok_exit:
-		DEBUG_SYNC(user_thd, "innodb_after_inplace_alter_table");
+		DEBUG_SYNC(m_user_thd, "innodb_after_inplace_alter_table");
 		DBUG_RETURN(false);
 	}
 
@@ -4171,13 +6570,83 @@ ok_exit:
 
 	DBUG_ASSERT(ctx);
 	DBUG_ASSERT(ctx->trx);
-	DBUG_ASSERT(ctx->prebuilt == prebuilt);
+	DBUG_ASSERT(ctx->prebuilt == m_prebuilt);
 
-	if (prebuilt->table->ibd_file_missing
-	    || dict_table_is_discarded(prebuilt->table)) {
+	dict_index_t*	pk = dict_table_get_first_index(m_prebuilt->table);
+	ut_ad(pk != NULL);
+
+	/* For partitioned tables this could be already allocated from a
+	previous partition invocation. For normal tables this is NULL. */
+	UT_DELETE(ctx->m_stage);
+
+	ctx->m_stage = UT_NEW_NOKEY(ut_stage_alter_t(pk));
+
+	if (m_prebuilt->table->ibd_file_missing
+	    || dict_table_is_discarded(m_prebuilt->table)) {
 		goto all_done;
 	}
 
+	/* If we are doing a table rebuilding or having added virtual
+	columns in the same clause, we will need to build a table template
+	that carries translation information between MySQL TABLE and InnoDB
+	table, which indicates the virtual columns and their base columns
+	info. This is used to do the computation callback, so that the
+	data in base columns can be extracted send to server.
+	If the Column length changes and it is a part of virtual
+	index then we need to rebuild the template. */
+	rebuild_templ
+	     = ctx->need_rebuild()
+	       || ((ha_alter_info->handler_flags
+		& Alter_inplace_info::ALTER_COLUMN_EQUAL_PACK_LENGTH)
+		&& alter_templ_needs_rebuild(
+		   altered_table, ha_alter_info, ctx->new_table));
+
+#ifdef MYSQL_VIRTUAL_COLUMNS
+	if ((ctx->new_table->n_v_cols > 0) && rebuild_templ) {
+		/* Save the templ if isn't NULL so as to restore the
+		original state in case of alter operation failures. */
+		if (ctx->new_table->vc_templ != NULL && !ctx->need_rebuild()) {
+			old_templ = ctx->new_table->vc_templ;
+		}
+		s_templ = UT_NEW_NOKEY(dict_vcol_templ_t());
+		s_templ->vtempl = NULL;
+
+		innobase_build_v_templ(
+			altered_table, ctx->new_table, s_templ,
+			NULL, false, NULL);
+
+		ctx->new_table->vc_templ = s_templ;
+	} else if (ctx->num_to_add_vcol > 0 && ctx->num_to_drop_vcol == 0) {
+		/* if there is ongoing drop virtual column, then we disallow
+		inplace add index on newly added virtual column, so it does
+		not need to come in here to rebuild template with add_v.
+		Please also see the assertion in innodb_v_adjust_idx_col() */
+
+		s_templ = UT_NEW_NOKEY(dict_vcol_templ_t());
+
+		add_v = static_cast<dict_add_v_col_t*>(
+			mem_heap_alloc(ctx->heap, sizeof *add_v));
+		add_v->n_v_col = ctx->num_to_add_vcol;
+		add_v->v_col = ctx->add_vcol;
+		add_v->v_col_name = ctx->add_vcol_name;
+
+		s_templ->vtempl = NULL;
+
+		innobase_build_v_templ(
+			altered_table, ctx->new_table, s_templ,
+			add_v, false, NULL);
+		old_templ = ctx->new_table->vc_templ;
+		ctx->new_table->vc_templ = s_templ;
+	}
+
+	/* Drop virtual column without rebuild will keep dict table
+	unchanged, we use old table to evaluate virtual column value
+	in innobase_get_computed_value(). */
+	if (!ctx->need_rebuild() && ctx->num_to_drop_vcol > 0) {
+		eval_table = table;
+	}
+#endif /* MYSQL_VIRTUAL_COLUMNS */
+
 	/* Read the clustered index of the table and build
 	indexes based on this information using temporary
 	files and merge sort. */
@@ -4185,19 +6654,31 @@ ok_exit:
 			error = DB_OUT_OF_MEMORY; goto oom;);
 
 	error = row_merge_build_indexes(
-		prebuilt->trx,
-		prebuilt->table, ctx->new_table,
+		m_prebuilt->trx,
+		m_prebuilt->table, ctx->new_table,
 		ctx->online,
 		ctx->add_index, ctx->add_key_numbers, ctx->num_to_add_index,
 		altered_table, ctx->add_cols, ctx->col_map,
-		ctx->add_autoinc, ctx->sequence);
+		ctx->add_autoinc, ctx->sequence, ctx->skip_pk_sort,
+		ctx->m_stage, add_v, eval_table);
+
+	if (s_templ) {
+		ut_ad(ctx->need_rebuild() || ctx->num_to_add_vcol > 0
+		      || rebuild_templ);
+		dict_free_vc_templ(s_templ);
+		UT_DELETE(s_templ);
+
+		ctx->new_table->vc_templ = old_templ;
+	}
+
 #ifndef DBUG_OFF
 oom:
 #endif /* !DBUG_OFF */
 	if (error == DB_SUCCESS && ctx->online && ctx->need_rebuild()) {
 		DEBUG_SYNC_C("row_log_table_apply1_before");
 		error = row_log_table_apply(
-			ctx->thr, prebuilt->table, altered_table);
+			ctx->thr, m_prebuilt->table, altered_table,
+			ctx->m_stage);
 	}
 
 	/* Init online ddl status variables */
@@ -4209,7 +6690,7 @@ oom:
 
 	DBUG_EXECUTE_IF("create_index_fail",
 			error = DB_DUPLICATE_KEY;
-			prebuilt->trx->error_key_num = ULINT_UNDEFINED;);
+			m_prebuilt->trx->error_key_num = ULINT_UNDEFINED;);
 
 	/* After an error, remove all those index definitions
 	from the dictionary which were defined. */
@@ -4220,13 +6701,13 @@ oom:
 	case DB_SUCCESS:
 		ut_d(mutex_enter(&dict_sys->mutex));
 		ut_d(dict_table_check_for_dup_indexes(
-			     prebuilt->table, CHECK_PARTIAL_OK));
+			     m_prebuilt->table, CHECK_PARTIAL_OK));
 		ut_d(mutex_exit(&dict_sys->mutex));
 		/* prebuilt->table->n_ref_count can be anything here,
 		given that we hold at most a shared lock on the table. */
 		goto ok_exit;
 	case DB_DUPLICATE_KEY:
-		if (prebuilt->trx->error_key_num == ULINT_UNDEFINED
+		if (m_prebuilt->trx->error_key_num == ULINT_UNDEFINED
 		    || ha_alter_info->key_count == 0) {
 			/* This should be the hidden index on
 			FTS_DOC_ID, or there is no PRIMARY KEY in the
@@ -4234,27 +6715,27 @@ oom:
 			reporting a bogus duplicate key error. */
 			dup_key = NULL;
 		} else {
-			DBUG_ASSERT(prebuilt->trx->error_key_num
+			DBUG_ASSERT(m_prebuilt->trx->error_key_num
 				    < ha_alter_info->key_count);
 			dup_key = &ha_alter_info->key_info_buffer[
-				prebuilt->trx->error_key_num];
+				m_prebuilt->trx->error_key_num];
 		}
 		print_keydup_error(altered_table, dup_key, MYF(0));
 		break;
 	case DB_ONLINE_LOG_TOO_BIG:
 		DBUG_ASSERT(ctx->online);
 		my_error(ER_INNODB_ONLINE_LOG_TOO_BIG, MYF(0),
-			 (prebuilt->trx->error_key_num == ULINT_UNDEFINED)
+			 (m_prebuilt->trx->error_key_num == ULINT_UNDEFINED)
 			 ? FTS_DOC_ID_INDEX_NAME
 			 : ha_alter_info->key_info_buffer[
-				 prebuilt->trx->error_key_num].name);
+				 m_prebuilt->trx->error_key_num].name);
 		break;
 	case DB_INDEX_CORRUPT:
 		my_error(ER_INDEX_CORRUPT, MYF(0),
-			 (prebuilt->trx->error_key_num == ULINT_UNDEFINED)
+			 (m_prebuilt->trx->error_key_num == ULINT_UNDEFINED)
 			 ? FTS_DOC_ID_INDEX_NAME
 			 : ha_alter_info->key_info_buffer[
-				 prebuilt->trx->error_key_num].name);
+				 m_prebuilt->trx->error_key_num].name);
 		break;
 	case DB_DECRYPTION_FAILED: {
 		String str;
@@ -4266,19 +6747,19 @@ oom:
 	default:
 		my_error_innodb(error,
 				table_share->table_name.str,
-				prebuilt->table->flags);
+				m_prebuilt->table->flags);
 	}
 
 	/* prebuilt->table->n_ref_count can be anything here, given
 	that we hold at most a shared lock on the table. */
-	prebuilt->trx->error_info = NULL;
+	m_prebuilt->trx->error_info = NULL;
 	ctx->trx->error_state = DB_SUCCESS;
 
 	DBUG_RETURN(true);
 }
 
 /** Free the modification log for online table rebuild.
-@param table	table that was being rebuilt online */
+@param table table that was being rebuilt online */
 static
 void
 innobase_online_rebuild_log_free(
@@ -4288,9 +6769,7 @@ innobase_online_rebuild_log_free(
 	dict_index_t* clust_index = dict_table_get_first_index(table);
 
 	ut_ad(mutex_own(&dict_sys->mutex));
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
 
 	rw_lock_x_lock(&clust_index->lock);
 
@@ -4307,12 +6786,62 @@ innobase_online_rebuild_log_free(
 	rw_lock_x_unlock(&clust_index->lock);
 }
 
+/** For each user column, which is part of an index which is not going to be
+dropped, it checks if the column number of the column is same as col_no
+argument passed.
+@param[in]	table	table object
+@param[in]	col_no	column number of the column which is to be checked
+@param[in]	is_v	if this is a virtual column
+@retval true column exists
+@retval false column does not exist, true if column is system column or
+it is in the index. */
+static
+bool
+check_col_exists_in_indexes(
+	const dict_table_t*	table,
+	ulint			col_no,
+	bool			is_v)
+{
+	/* This function does not check system columns */
+	if (!is_v && dict_table_get_nth_col(table, col_no)->mtype == DATA_SYS) {
+		return(true);
+	}
+
+	for (dict_index_t* index = dict_table_get_first_index(table); index;
+	     index = dict_table_get_next_index(index)) {
+
+		if (index->to_be_dropped) {
+			continue;
+		}
+
+		for (ulint i = 0; i < index->n_user_defined_cols; i++) {
+			const dict_col_t* idx_col
+				= dict_index_get_nth_col(index, i);
+
+			if (is_v && dict_col_is_virtual(idx_col)) {
+				const dict_v_col_t*   v_col = reinterpret_cast<
+					const dict_v_col_t*>(idx_col);
+				if (v_col->v_pos == col_no) {
+					return(true);
+				}
+			}
+
+			if (!is_v && !dict_col_is_virtual(idx_col)
+			    && dict_col_get_no(idx_col) == col_no) {
+				return(true);
+			}
+		}
+	}
+
+	return(false);
+}
+
 /** Rollback a secondary index creation, drop the indexes with
 temparary index prefix
-@param user_table	InnoDB table
-@param table		the TABLE
-@param locked		TRUE=table locked, FALSE=may need to do a lazy drop
-@param trx		the transaction
+@param user_table InnoDB table
+@param table the TABLE
+@param locked TRUE=table locked, FALSE=may need to do a lazy drop
+@param trx the transaction
 */
 static MY_ATTRIBUTE((nonnull))
 void
@@ -4342,11 +6871,11 @@ for inplace_alter_table() and thus might be higher than during
 prepare_inplace_alter_table(). (E.g concurrent writes were blocked
 during prepare, but might not be during commit).
 
-@param ha_alter_info	Data used during in-place alter.
-@param table		the TABLE
-@param prebuilt		the prebuilt struct
-@retval true		Failure
-@retval false		Success
+@param ha_alter_info Data used during in-place alter.
+@param table the TABLE
+@param prebuilt the prebuilt struct
+@retval true Failure
+@retval false Success
 */
 inline MY_ATTRIBUTE((nonnull, warn_unused_result))
 bool
@@ -4370,10 +6899,11 @@ rollback_inplace_alter_table(
 		goto func_exit;
 	}
 
+	trx_start_for_ddl(ctx->trx, TRX_DICT_OP_INDEX);
 	row_mysql_lock_data_dictionary(ctx->trx);
 
 	if (ctx->need_rebuild()) {
-		dberr_t	err;
+		dberr_t	err = DB_SUCCESS;
 		ulint	flags	= ctx->new_table->flags;
 
 		/* DML threads can access ctx->new_table via the
@@ -4383,7 +6913,7 @@ rollback_inplace_alter_table(
 		/* Since the FTS index specific auxiliary tables has
 		not yet registered with "table->fts" by fts_add_index(),
 		we will need explicitly delete them here */
-		if (DICT_TF2_FLAG_IS_SET(ctx->new_table, DICT_TF2_FTS)) {
+		if (dict_table_has_fts_index(ctx->new_table)) {
 
 			err = innobase_drop_fts_index_table(
 				ctx->new_table, ctx->trx);
@@ -4396,18 +6926,7 @@ rollback_inplace_alter_table(
 			}
 		}
 
-		/* Drop the table. */
-		dict_table_close(ctx->new_table, TRUE, FALSE);
-
-#if defined UNIV_DEBUG || defined UNIV_DDL_DEBUG
-		/* Nobody should have initialized the stats of the
-		newly created table yet. When this is the case, we
-		know that it has not been added for background stats
-		gathering. */
-		ut_a(!ctx->new_table->stat_initialized);
-#endif /* UNIV_DEBUG || UNIV_DDL_DEBUG */
-
-		err = row_merge_drop_table(ctx->trx, ctx->new_table);
+		dict_table_close_and_drop(ctx->trx, ctx->new_table);
 
 		switch (err) {
 		case DB_SUCCESS:
@@ -4422,8 +6941,6 @@ rollback_inplace_alter_table(
 			      & Alter_inplace_info::ADD_PK_INDEX));
 		DBUG_ASSERT(ctx->new_table == prebuilt->table);
 
-		trx_start_for_ddl(ctx->trx, TRX_DICT_OP_INDEX);
-
 		innobase_rollback_sec_index(
 			prebuilt->table, table, FALSE, ctx->trx);
 	}
@@ -4460,8 +6977,7 @@ func_exit:
 			commit_inplace_alter_table(). */
 			for (ulint i = 0; i < ctx->num_to_drop_index; i++) {
 				dict_index_t*	index = ctx->drop_index[i];
-				DBUG_ASSERT(*index->name != TEMP_INDEX_PREFIX);
-
+				DBUG_ASSERT(index->is_committed());
 				index->to_be_dropped = 0;
 			}
 
@@ -4469,17 +6985,32 @@ func_exit:
 		}
 	}
 
+	/* Reset dict_col_t::ord_part for those columns fail to be indexed,
+	we do this by checking every existing column, if any current
+	index would index them */
+	for (ulint i = 0; i < dict_table_get_n_cols(prebuilt->table); i++) {
+		if (!check_col_exists_in_indexes(prebuilt->table, i, false)) {
+			prebuilt->table->cols[i].ord_part = 0;
+		}
+	}
+
+	for (ulint i = 0; i < dict_table_get_n_v_cols(prebuilt->table); i++) {
+		if (!check_col_exists_in_indexes(prebuilt->table, i, true)) {
+			prebuilt->table->v_cols[i].m_col.ord_part = 0;
+		}
+	}
+
 	trx_commit_for_mysql(prebuilt->trx);
 	MONITOR_ATOMIC_DEC(MONITOR_PENDING_ALTER_TABLE);
 	DBUG_RETURN(fail);
 }
 
 /** Drop a FOREIGN KEY constraint from the data dictionary tables.
-@param trx		data dictionary transaction
-@param table_name	Table name in MySQL
-@param foreign_id	Foreign key constraint identifier
-@retval true		Failure
-@retval false		Success */
+@param trx data dictionary transaction
+@param table_name Table name in MySQL
+@param foreign_id Foreign key constraint identifier
+@retval true Failure
+@retval false Success */
 static MY_ATTRIBUTE((nonnull, warn_unused_result))
 bool
 innobase_drop_foreign_try(
@@ -4493,9 +7024,7 @@ innobase_drop_foreign_try(
 	DBUG_ASSERT(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX);
 	ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH);
 	ut_ad(mutex_own(&dict_sys->mutex));
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
 
 	/* Drop the constraint from the data dictionary. */
 	static const char sql[] =
@@ -4528,26 +7057,27 @@ innobase_drop_foreign_try(
 }
 
 /** Rename a column in the data dictionary tables.
-@param user_table	InnoDB table that was being altered
-@param trx		data dictionary transaction
-@param table_name	Table name in MySQL
-@param nth_col		0-based index of the column
-@param from		old column name
-@param to		new column name
-@param new_clustered	whether the table has been rebuilt
-@retval true		Failure
-@retval false		Success */
+@param[in] user_table	InnoDB table that was being altered
+@param[in] trx		data dictionary transaction
+@param[in] table_name	Table name in MySQL
+@param[in] nth_col	0-based index of the column
+@param[in] from		old column name
+@param[in] to		new column name
+@param[in] new_clustered whether the table has been rebuilt
+@param[in] is_virtual	whether it is a virtual column
+@retval true Failure
+@retval false Success */
 static MY_ATTRIBUTE((nonnull, warn_unused_result))
 bool
 innobase_rename_column_try(
-/*=======================*/
 	const dict_table_t*	user_table,
 	trx_t*			trx,
 	const char*		table_name,
 	ulint			nth_col,
 	const char*		from,
 	const char*		to,
-	bool			new_clustered)
+	bool			new_clustered,
+	bool			is_virtual)
 {
 	pars_info_t*	info;
 	dberr_t		error;
@@ -4557,9 +7087,7 @@ innobase_rename_column_try(
 	DBUG_ASSERT(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX);
 	ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH);
 	ut_ad(mutex_own(&dict_sys->mutex));
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
 
 	if (new_clustered) {
 		goto rename_foreign;
@@ -4739,13 +7267,13 @@ rename_foreign:
 }
 
 /** Rename columns in the data dictionary tables.
-@param ha_alter_info	Data used during in-place alter.
-@param ctx		In-place ALTER TABLE context
-@param table		the TABLE
-@param trx		data dictionary transaction
-@param table_name	Table name in MySQL
-@retval true		Failure
-@retval false		Success */
+@param ha_alter_info Data used during in-place alter.
+@param ctx In-place ALTER TABLE context
+@param table the TABLE
+@param trx data dictionary transaction
+@param table_name Table name in MySQL
+@retval true Failure
+@retval false Success */
 static MY_ATTRIBUTE((nonnull, warn_unused_result))
 bool
 innobase_rename_columns_try(
@@ -4758,25 +7286,35 @@ innobase_rename_columns_try(
 {
 	List_iterator_fast<Create_field> cf_it(
 		ha_alter_info->alter_info->create_list);
-	uint i = 0;
+	uint	i = 0;
+	ulint	num_v = 0;
 
 	DBUG_ASSERT(ctx);
 	DBUG_ASSERT(ha_alter_info->handler_flags
 		    & Alter_inplace_info::ALTER_COLUMN_NAME);
 
 	for (Field** fp = table->field; *fp; fp++, i++) {
-		if (!((*fp)->flags & FIELD_IS_RENAMED) || !((*fp)->stored_in_db())) {
-			continue;
+		bool	is_virtual = innobase_is_v_fld(*fp);
+
+		if (!((*fp)->flags & FIELD_IS_RENAMED)) {
+			goto processed_field;
 		}
 
 		cf_it.rewind();
 		while (Create_field* cf = cf_it++) {
 			if (cf->field == *fp) {
+				ulint	col_n = is_virtual
+						? dict_create_v_col_pos(
+							num_v, i)
+						: i - num_v;
+
 				if (innobase_rename_column_try(
-					    ctx->old_table, trx, table_name, i,
+					    ctx->old_table, trx, table_name,
+					    col_n,
 					    cf->field->field_name,
 					    cf->field_name,
-					    ctx->need_rebuild())) {
+					    ctx->need_rebuild(),
+					    is_virtual)) {
 					return(true);
 				}
 				goto processed_field;
@@ -4785,60 +7323,242 @@ innobase_rename_columns_try(
 
 		ut_error;
 processed_field:
+		if (is_virtual) {
+			num_v++;
+		}
+
 		continue;
 	}
 
 	return(false);
 }
 
-/** Rename columns in the data dictionary cache
+/** Enlarge a column in the data dictionary tables.
+@param user_table InnoDB table that was being altered
+@param trx data dictionary transaction
+@param table_name Table name in MySQL
+@param nth_col 0-based index of the column
+@param new_len new column length, in bytes
+@param is_v if it's a virtual column
+@retval true Failure
+@retval false Success */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+bool
+innobase_enlarge_column_try(
+/*========================*/
+	const dict_table_t*	user_table,
+	trx_t*			trx,
+	const char*		table_name,
+	ulint			nth_col,
+	ulint			new_len,
+	bool			is_v)
+{
+	pars_info_t*	info;
+	dberr_t		error;
+#ifdef UNIV_DEBUG
+	dict_col_t*	col;
+#endif /* UNIV_DEBUG */
+	dict_v_col_t*	v_col;
+	ulint		pos;
+
+	DBUG_ENTER("innobase_enlarge_column_try");
+
+	DBUG_ASSERT(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX);
+	ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH);
+	ut_ad(mutex_own(&dict_sys->mutex));
+	ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
+
+	if (is_v) {
+		v_col = dict_table_get_nth_v_col(user_table, nth_col);
+		pos = dict_create_v_col_pos(v_col->v_pos, v_col->m_col.ind);
+#ifdef UNIV_DEBUG
+		col = &v_col->m_col;
+#endif /* UNIV_DEBUG */
+	} else {
+#ifdef UNIV_DEBUG
+		col = dict_table_get_nth_col(user_table, nth_col);
+#endif /* UNIV_DEBUG */
+		pos = nth_col;
+	}
+
+#ifdef UNIV_DEBUG
+	ut_ad(col->len < new_len);
+	switch (col->mtype) {
+	case DATA_MYSQL:
+		/* NOTE: we could allow this when !(prtype & DATA_BINARY_TYPE)
+		and ROW_FORMAT is not REDUNDANT and mbminlen<mbmaxlen.
+		That is, we treat a UTF-8 CHAR(n) column somewhat like
+		a VARCHAR. */
+		ut_error;
+	case DATA_BINARY:
+	case DATA_VARCHAR:
+	case DATA_VARMYSQL:
+	case DATA_DECIMAL:
+	case DATA_BLOB:
+		break;
+	default:
+		ut_error;
+	}
+#endif /* UNIV_DEBUG */
+	info = pars_info_create();
+
+	pars_info_add_ull_literal(info, "tableid", user_table->id);
+	pars_info_add_int4_literal(info, "nth", pos);
+	pars_info_add_int4_literal(info, "new", new_len);
+
+	trx->op_info = "resizing column in SYS_COLUMNS";
+
+	error = que_eval_sql(
+		info,
+		"PROCEDURE RESIZE_SYS_COLUMNS_PROC () IS\n"
+		"BEGIN\n"
+		"UPDATE SYS_COLUMNS SET LEN=:new\n"
+		"WHERE TABLE_ID=:tableid AND POS=:nth;\n"
+		"END;\n",
+		FALSE, trx);
+
+	DBUG_EXECUTE_IF("ib_resize_column_error",
+			error = DB_OUT_OF_FILE_SPACE;);
+
+	trx->op_info = "";
+	trx->error_state = DB_SUCCESS;
+
+	if (error != DB_SUCCESS) {
+		my_error_innodb(error, table_name, 0);
+		DBUG_RETURN(true);
+	}
+
+	DBUG_RETURN(false);
+}
+
+/** Enlarge columns in the data dictionary tables.
+@param ha_alter_info Data used during in-place alter.
+@param table the TABLE
+@param user_table InnoDB table that was being altered
+@param trx data dictionary transaction
+@param table_name Table name in MySQL
+@retval true Failure
+@retval false Success */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+bool
+innobase_enlarge_columns_try(
+/*=========================*/
+	Alter_inplace_info*	ha_alter_info,
+	const TABLE*		table,
+	const dict_table_t*	user_table,
+	trx_t*			trx,
+	const char*		table_name)
+{
+	List_iterator_fast<Create_field> cf_it(
+		ha_alter_info->alter_info->create_list);
+	ulint	i = 0;
+	bool	is_v=false;
+
+	for (Field** fp = table->field; *fp; fp++, i++) {
+		ulint	idx;
+
+#ifdef MYSQL_VIRTUAL_COLUMNS
+	ulint	num_v = 0;
+		if ((*fp)->is_virtual_gcol()) {
+			is_v = true;
+			idx = num_v;
+			num_v++;
+		} else {
+			idx = i - num_v;
+			is_v = false;
+		}
+#else
+		idx = i;
+		is_v = false;
+#endif
+
+		cf_it.rewind();
+		while (Create_field* cf = cf_it++) {
+			if (cf->field == *fp) {
+				if ((*fp)->is_equal(cf)
+				    == IS_EQUAL_PACK_LENGTH
+				    && innobase_enlarge_column_try(
+					    user_table, trx, table_name,
+					    idx, cf->length, is_v)) {
+					return(true);
+				}
+
+				break;
+			}
+		}
+	}
+
+	return(false);
+}
+
+/** Rename or enlarge columns in the data dictionary cache
 as part of commit_cache_norebuild().
-@param ha_alter_info	Data used during in-place alter.
-@param table		the TABLE
-@param user_table	InnoDB table that was being altered */
+@param ha_alter_info Data used during in-place alter.
+@param table the TABLE
+@param user_table InnoDB table that was being altered */
 static MY_ATTRIBUTE((nonnull))
 void
-innobase_rename_columns_cache(
-/*==========================*/
+innobase_rename_or_enlarge_columns_cache(
+/*=====================================*/
 	Alter_inplace_info*	ha_alter_info,
 	const TABLE*		table,
 	dict_table_t*		user_table)
 {
 	if (!(ha_alter_info->handler_flags
-	      & Alter_inplace_info::ALTER_COLUMN_NAME)) {
+	      & (Alter_inplace_info::ALTER_COLUMN_EQUAL_PACK_LENGTH
+		 | Alter_inplace_info::ALTER_COLUMN_NAME))) {
 		return;
 	}
 
 	List_iterator_fast<Create_field> cf_it(
 		ha_alter_info->alter_info->create_list);
-	uint i = 0;
+	uint	i = 0;
+	ulint	num_v = 0;
 
 	for (Field** fp = table->field; *fp; fp++, i++) {
-		if (!((*fp)->flags & FIELD_IS_RENAMED)) {
-			continue;
-		}
+		bool	is_virtual = innobase_is_v_fld(*fp);
 
 		cf_it.rewind();
 		while (Create_field* cf = cf_it++) {
-			if (cf->field == *fp) {
-				dict_mem_table_col_rename(user_table, i,
-							  cf->field->field_name,
-							  cf->field_name);
-				goto processed_field;
+			if (cf->field != *fp) {
+				continue;
 			}
+
+			ulint	col_n = is_virtual ? num_v : i - num_v;
+
+			if ((*fp)->is_equal(cf) == IS_EQUAL_PACK_LENGTH) {
+				if (is_virtual) {
+					dict_table_get_nth_v_col(
+						user_table, col_n)->m_col.len
+					= cf->length;
+				} else {
+					dict_table_get_nth_col(
+						user_table, col_n)->len
+					= cf->length;
+				}
+			}
+
+			if ((*fp)->flags & FIELD_IS_RENAMED) {
+				dict_mem_table_col_rename(
+					user_table, col_n,
+					cf->field->field_name,
+					cf->field_name, is_virtual);
+			}
+
+			break;
 		}
 
-		ut_error;
-processed_field:
-		continue;
+		if (is_virtual) {
+			num_v++;
+		}
 	}
 }
 
 /** Get the auto-increment value of the table on commit.
-@param ha_alter_info	Data used during in-place alter
-@param ctx		In-place ALTER TABLE context
-@param altered_table	MySQL table that is being altered
-@param old_table	MySQL table as it is before the ALTER operation
+@param ha_alter_info Data used during in-place alter
+@param ctx In-place ALTER TABLE context
+@param altered_table MySQL table that is being altered
+@param old_table MySQL table as it is before the ALTER operation
 @return the next auto-increment value (0 if not present) */
 static MY_ATTRIBUTE((nonnull, warn_unused_result))
 ulonglong
@@ -4874,11 +7594,10 @@ commit_get_autoinc(
 
 		Field*	autoinc_field =
 			old_table->found_next_number_field;
-		KEY*	autoinc_key =
-			old_table->key_info + old_table->s->next_number_index;
 
-		dict_index_t*	index = dict_table_get_index_on_name(
-			ctx->old_table, autoinc_key->name);
+		dict_index_t*	index = dict_table_get_index_on_first_col(
+			ctx->old_table, autoinc_field->field_index,
+			autoinc_field->field_name);
 
 		max_autoinc = ha_alter_info->create_info->auto_increment_value;
 
@@ -4894,8 +7613,9 @@ commit_get_autoinc(
 			ulonglong	col_max_value;
 			ulonglong	offset;
 
-			col_max_value = innobase_get_int_col_max_value(
-				old_table->found_next_number_field);
+			col_max_value = innobase_get_int_col_max_value(autoinc_field);
+			// JAN: TODO: MySQL 5.7
+			//col_max_value = autoinc_field->get_max_int_value();
 
 			offset = ctx->prebuilt->autoinc_offset;
 			max_autoinc = innobase_next_autoinc(
@@ -4917,12 +7637,12 @@ commit_get_autoinc(
 
 /** Add or drop foreign key constraints to the data dictionary tables,
 but do not touch the data dictionary cache.
-@param ha_alter_info	Data used during in-place alter
-@param ctx		In-place ALTER TABLE context
-@param trx		Data dictionary transaction
-@param table_name	Table name in MySQL
-@retval true		Failure
-@retval false		Success
+@param ha_alter_info Data used during in-place alter
+@param ctx In-place ALTER TABLE context
+@param trx Data dictionary transaction
+@param table_name Table name in MySQL
+@retval true Failure
+@retval false Success
 */
 static MY_ATTRIBUTE((nonnull, warn_unused_result))
 bool
@@ -4949,7 +7669,7 @@ innobase_update_foreign_try(
 		      || fk->foreign_table == ctx->old_table);
 
 		dberr_t error = dict_create_add_foreign_id(
-			&foreign_id, ctx->old_table->name, fk);
+			&foreign_id, ctx->old_table->name.m_name, fk);
 
 		if (error != DB_SUCCESS) {
 			my_error(ER_TOO_LONG_IDENT, MYF(0),
@@ -4977,7 +7697,7 @@ innobase_update_foreign_try(
 		names, while the columns in ctx->old_table have not
 		been renamed yet. */
 		error = dict_create_add_foreign_to_dictionary(
-			(dict_table_t*)ctx->old_table,ctx->old_table->name, fk, trx);
+			(dict_table_t*)ctx->old_table,ctx->old_table->name.m_name, fk, trx);
 
 		DBUG_EXECUTE_IF(
 			"innodb_test_cannot_add_fk_system",
@@ -5020,6 +7740,8 @@ innobase_update_foreign_cache(
 
 	DBUG_ENTER("innobase_update_foreign_cache");
 
+	ut_ad(mutex_own(&dict_sys->mutex));
+
 	user_table = ctx->old_table;
 
 	/* Discard the added foreign keys, because we will
@@ -5051,17 +7773,23 @@ innobase_update_foreign_cache(
 	/* Load the old or added foreign keys from the data dictionary
 	and prevent the table from being evicted from the data
 	dictionary cache (work around the lack of WL#6049). */
-	err = dict_load_foreigns(user_table->name,
+	dict_names_t	fk_tables;
+
+	err = dict_load_foreigns(user_table->name.m_name,
 				 ctx->col_names, false, true,
-				 DICT_ERR_IGNORE_NONE);
+				 DICT_ERR_IGNORE_NONE,
+				 fk_tables);
 
 	if (err == DB_CANNOT_ADD_CONSTRAINT) {
+		fk_tables.clear();
+
 		/* It is possible there are existing foreign key are
 		loaded with "foreign_key checks" off,
 		so let's retry the loading with charset_check is off */
-		err = dict_load_foreigns(user_table->name,
+		err = dict_load_foreigns(user_table->name.m_name,
 					 ctx->col_names, false, false,
-					 DICT_ERR_IGNORE_NONE);
+					 DICT_ERR_IGNORE_NONE,
+					 fk_tables);
 
 		/* The load with "charset_check" off is successful, warn
 		the user that the foreign key has loaded with mis-matched
@@ -5073,25 +7801,46 @@ innobase_update_foreign_cache(
 				ER_ALTER_INFO,
 				"Foreign key constraints for table '%s'"
 				" are loaded with charset check off",
-				user_table->name);
-				
+				user_table->name.m_name);
 		}
 	}
 
+	/* For complete loading of foreign keys, all associated tables must
+	also be loaded. */
+	while (err == DB_SUCCESS && !fk_tables.empty()) {
+		dict_table_t*	table = dict_load_table(
+			fk_tables.front(), true, DICT_ERR_IGNORE_NONE);
+
+		if (table == NULL) {
+			table_name_t	table_name;
+			table_name.m_name = const_cast<char*>(
+						fk_tables.front());
+
+			err = DB_TABLE_NOT_FOUND;
+			ib::error()
+				<< "Failed to load table '" << table_name
+				<< "' which has a foreign key constraint with"
+				<< " table '" << user_table->name << "'.";
+			break;
+		}
+
+		fk_tables.pop_front();
+	}
+
 	DBUG_RETURN(err);
 }
 
 /** Commit the changes made during prepare_inplace_alter_table()
 and inplace_alter_table() inside the data dictionary tables,
 when rebuilding the table.
-@param ha_alter_info	Data used during in-place alter
-@param ctx		In-place ALTER TABLE context
-@param altered_table	MySQL table that is being altered
-@param old_table	MySQL table as it is before the ALTER operation
-@param trx		Data dictionary transaction
-@param table_name	Table name in MySQL
-@retval true		Failure
-@retval false		Success
+@param ha_alter_info Data used during in-place alter
+@param ctx In-place ALTER TABLE context
+@param altered_table MySQL table that is being altered
+@param old_table MySQL table as it is before the ALTER operation
+@param trx Data dictionary transaction
+@param table_name Table name in MySQL
+@retval true Failure
+@retval false Success
 */
 inline MY_ATTRIBUTE((nonnull, warn_unused_result))
 bool
@@ -5113,18 +7862,15 @@ commit_try_rebuild(
 	DBUG_ASSERT(!(ha_alter_info->handler_flags
 		      & Alter_inplace_info::DROP_FOREIGN_KEY)
 		    || ctx->num_to_drop_fk > 0);
-	DBUG_ASSERT(ctx->num_to_drop_fk
-		    == ha_alter_info->alter_info->drop_list.elements);
 
 	for (dict_index_t* index = dict_table_get_first_index(rebuilt_table);
 	     index;
 	     index = dict_table_get_next_index(index)) {
 		DBUG_ASSERT(dict_index_get_online_status(index)
 			    == ONLINE_INDEX_COMPLETE);
-		DBUG_ASSERT(*index->name != TEMP_INDEX_PREFIX);
+		DBUG_ASSERT(index->is_committed());
 		if (dict_index_is_corrupted(index)) {
-			my_error(ER_INDEX_CORRUPT, MYF(0),
-				 index->name);
+			my_error(ER_INDEX_CORRUPT, MYF(0), index->name());
 			DBUG_RETURN(true);
 		}
 	}
@@ -5140,7 +7886,7 @@ commit_try_rebuild(
 	for (ulint i = 0; i < ctx->num_to_drop_index; i++) {
 		dict_index_t*	index = ctx->drop_index[i];
 		DBUG_ASSERT(index->table == user_table);
-		DBUG_ASSERT(*index->name != TEMP_INDEX_PREFIX);
+		DBUG_ASSERT(index->is_committed());
 		DBUG_ASSERT(index->to_be_dropped);
 		index->to_be_dropped = 0;
 	}
@@ -5151,8 +7897,12 @@ commit_try_rebuild(
 
 	if (ctx->online) {
 		DEBUG_SYNC_C("row_log_table_apply2_before");
+
 		error = row_log_table_apply(
-			ctx->thr, user_table, altered_table);
+			ctx->thr, user_table, altered_table,
+			static_cast<ha_innobase_inplace_ctx*>(
+				ha_alter_info->handler_ctx)->m_stage);
+
 		ulint	err_key = thr_get_trx(ctx->thr)->error_key_num;
 
 		switch (error) {
@@ -5216,12 +7966,12 @@ commit_try_rebuild(
 		user_table, rebuilt_table, ctx->tmp_name, trx);
 
 	/* We must be still holding a table handle. */
-	DBUG_ASSERT(user_table->n_ref_count >= 1);
+	DBUG_ASSERT(user_table->get_ref_count() >= 1);
 
 	DBUG_EXECUTE_IF("ib_ddl_crash_after_rename", DBUG_SUICIDE(););
 	DBUG_EXECUTE_IF("ib_rebuild_cannot_rename", error = DB_ERROR;);
 
-	if (user_table->n_ref_count > 1) {
+	if (user_table->get_ref_count() > 1) {
 		/* This should only occur when an innodb_memcached
 		connection with innodb_api_enable_mdl=off was started
 		before commit_inplace_alter_table() locked the data
@@ -5231,19 +7981,20 @@ commit_try_rebuild(
 		/* Normally, n_ref_count must be 1, because purge
 		cannot be executing on this very table as we are
 		holding dict_operation_lock X-latch. */
-
-		error = DB_LOCK_WAIT_TIMEOUT;
+		my_printf_error(ER_ILLEGAL_HA, "Cannot complete the operation "
+			"because table is referenced by another connection.", MYF(0));
+		DBUG_RETURN(true);
 	}
 
 	switch (error) {
 	case DB_SUCCESS:
 		DBUG_RETURN(false);
 	case DB_TABLESPACE_EXISTS:
-		ut_a(rebuilt_table->n_ref_count == 1);
+		ut_a(rebuilt_table->get_ref_count() == 1);
 		my_error(ER_TABLESPACE_EXISTS, MYF(0), ctx->tmp_name);
 		DBUG_RETURN(true);
 	case DB_DUPLICATE_KEY:
-		ut_a(rebuilt_table->n_ref_count == 1);
+		ut_a(rebuilt_table->get_ref_count() == 1);
 		my_error(ER_TABLE_EXISTS_ERROR, MYF(0), ctx->tmp_name);
 		DBUG_RETURN(true);
 	default:
@@ -5254,7 +8005,7 @@ commit_try_rebuild(
 
 /** Apply the changes made during commit_try_rebuild(),
 to the data dictionary cache and the file system.
-@param ctx	In-place ALTER TABLE context */
+@param ctx In-place ALTER TABLE context */
 inline MY_ATTRIBUTE((nonnull))
 void
 commit_cache_rebuild(
@@ -5264,12 +8015,13 @@ commit_cache_rebuild(
 	dberr_t		error;
 
 	DBUG_ENTER("commit_cache_rebuild");
+	DEBUG_SYNC_C("commit_cache_rebuild");
 	DBUG_ASSERT(ctx->need_rebuild());
 	DBUG_ASSERT(dict_table_is_discarded(ctx->old_table)
 		    == dict_table_is_discarded(ctx->new_table));
 
 	const char* old_name = mem_heap_strdup(
-		ctx->heap, ctx->old_table->name);
+		ctx->heap, ctx->old_table->name.m_name);
 
 	/* We already committed and redo logged the renames,
 	so this must succeed. */
@@ -5284,71 +8036,56 @@ commit_cache_rebuild(
 	DBUG_VOID_RETURN;
 }
 
+/** Set of column numbers */
+typedef std::set<ulint, std::less<ulint>, ut_allocator<ulint> >	col_set;
+
 /** Store the column number of the columns in a list belonging
 to indexes which are not being dropped.
 @param[in]	ctx		In-place ALTER TABLE context
-@param[out]	drop_col_list	list which will be set, containing columns
-				which is part of index being dropped */
+@param[in, out]	drop_col_list	list which will be set, containing columns
+				which is part of index being dropped
+@param[in, out]	drop_v_col_list	list which will be set, containing
+				virtual columns which is part of index
+				being dropped */
 static
 void
 get_col_list_to_be_dropped(
-	ha_innobase_inplace_ctx*	ctx,
-	std::set<ulint>&		drop_col_list)
+	const ha_innobase_inplace_ctx*	ctx,
+	col_set&			drop_col_list,
+	col_set&			drop_v_col_list)
 {
 	for (ulint index_count = 0; index_count < ctx->num_to_drop_index;
 	     index_count++) {
-		dict_index_t*	index = ctx->drop_index[index_count];
+		const dict_index_t*	index = ctx->drop_index[index_count];
 
 		for (ulint col = 0; col < index->n_user_defined_cols; col++) {
-			ulint	col_no = dict_index_get_nth_col_no(index, col);
-			drop_col_list.insert(col_no);
-		}
-	}
-}
+			const dict_col_t*	idx_col
+				= dict_index_get_nth_col(index, col);
 
-/** For each column, which is part of an index which is not going to be
-dropped, it checks if the column number of the column is same as col_no
-argument passed.
-@param[in]	table	table object
-@param[in]	col_no	column number of the column which is to be checked
-@retval true column exists
-@retval false column does not exist. */
-static
-bool
-check_col_exists_in_indexes(
-	const dict_table_t*	table,
-	ulint			col_no)
-{
-	for (dict_index_t* index = dict_table_get_first_index(table); index;
-	     index = dict_table_get_next_index(index)) {
+			if (dict_col_is_virtual(idx_col)) {
+				const dict_v_col_t*	v_col
+					= reinterpret_cast<
+						const dict_v_col_t*>(idx_col);
+				drop_v_col_list.insert(v_col->v_pos);
 
-		if (index->to_be_dropped) {
-			continue;
-		}
-
-		for (ulint col = 0; col < index->n_user_defined_cols; col++) {
-
-			ulint	index_col_no = dict_index_get_nth_col_no(
-						index, col);
-			if (col_no == index_col_no) {
-				return(true);
+			} else {
+				ulint	col_no = dict_col_get_no(idx_col);
+				drop_col_list.insert(col_no);
 			}
 		}
 	}
-
-	return(false);
 }
 
 /** Commit the changes made during prepare_inplace_alter_table()
 and inplace_alter_table() inside the data dictionary tables,
 when not rebuilding the table.
-@param ha_alter_info	Data used during in-place alter
-@param ctx		In-place ALTER TABLE context
-@param old_table	MySQL table as it is before the ALTER operation
-@param trx		Data dictionary transaction
-@param table_name	Table name in MySQL
-@retval true		Failure
-@retval false		Success
+@param ha_alter_info Data used during in-place alter
+@param ctx In-place ALTER TABLE context
+@param old_table MySQL table as it is before the ALTER operation
+@param trx Data dictionary transaction
+@param table_name Table name in MySQL
+@retval true Failure
+@retval false Success
 */
 inline MY_ATTRIBUTE((nonnull, warn_unused_result))
 bool
@@ -5356,6 +8093,7 @@ commit_try_norebuild(
 /*=================*/
 	Alter_inplace_info*	ha_alter_info,
 	ha_innobase_inplace_ctx*ctx,
+	TABLE*			altered_table,
 	const TABLE*		old_table,
 	trx_t*			trx,
 	const char*		table_name)
@@ -5367,13 +8105,15 @@ commit_try_norebuild(
 		      & Alter_inplace_info::DROP_FOREIGN_KEY)
 		    || ctx->num_to_drop_fk > 0);
 	DBUG_ASSERT(ctx->num_to_drop_fk
-		    == ha_alter_info->alter_info->drop_list.elements);
+		    == ha_alter_info->alter_info->drop_list.elements
+		    || ctx->num_to_drop_vcol
+		       == ha_alter_info->alter_info->drop_list.elements);
 
 	for (ulint i = 0; i < ctx->num_to_add_index; i++) {
 		dict_index_t*	index = ctx->add_index[i];
 		DBUG_ASSERT(dict_index_get_online_status(index)
 			    == ONLINE_INDEX_COMPLETE);
-		DBUG_ASSERT(*index->name == TEMP_INDEX_PREFIX);
+		DBUG_ASSERT(!index->is_committed());
 		if (dict_index_is_corrupted(index)) {
 			/* Report a duplicate key
 			error for the index that was
@@ -5387,7 +8127,7 @@ commit_try_norebuild(
 			with a detailed reason once
 			WL#6379 has been implemented. */
 			my_error(ER_DUP_UNKNOWN_IN_INDEX,
-				 MYF(0), index->name + 1);
+				 MYF(0), index->name());
 			DBUG_RETURN(true);
 		}
 	}
@@ -5398,17 +8138,27 @@ commit_try_norebuild(
 
 	dberr_t	error;
 
-	/* We altered the table in place. */
-	/* Lose the TEMP_INDEX_PREFIX. */
+	/* We altered the table in place. Mark the indexes as committed. */
 	for (ulint i = 0; i < ctx->num_to_add_index; i++) {
 		dict_index_t*	index = ctx->add_index[i];
 		DBUG_ASSERT(dict_index_get_online_status(index)
 			    == ONLINE_INDEX_COMPLETE);
-		DBUG_ASSERT(*index->name
-			    == TEMP_INDEX_PREFIX);
+		DBUG_ASSERT(!index->is_committed());
 		error = row_merge_rename_index_to_add(
 			trx, ctx->new_table->id, index->id);
-		if (error != DB_SUCCESS) {
+		switch (error) {
+		case DB_SUCCESS:
+			break;
+		case DB_TOO_MANY_CONCURRENT_TRXS:
+			/* If we wrote some undo log here, then the
+			persistent data dictionary for this table may
+			probably be corrupted. This is because a
+			'trigger' on SYS_INDEXES could already have invoked
+			btr_free_if_exists(), which cannot be rolled back. */
+			DBUG_ASSERT(trx->undo_no == 0);
+			my_error(ER_TOO_MANY_CONCURRENT_TRXS, MYF(0));
+			DBUG_RETURN(true);
+		default:
 			sql_print_error(
 				"InnoDB: rename index to add: %lu\n",
 				(ulong) error);
@@ -5420,14 +8170,11 @@ commit_try_norebuild(
 	}
 
 	/* Drop any indexes that were requested to be dropped.
-	Rename them to TEMP_INDEX_PREFIX in the data
-	dictionary first. We do not bother to rename
-	index->name in the dictionary cache, because the index
-	is about to be freed after row_merge_drop_indexes_dict(). */
+	Flag them in the data dictionary first. */
 
 	for (ulint i = 0; i < ctx->num_to_drop_index; i++) {
 		dict_index_t*	index = ctx->drop_index[i];
-		DBUG_ASSERT(*index->name != TEMP_INDEX_PREFIX);
+		DBUG_ASSERT(index->is_committed());
 		DBUG_ASSERT(index->table == ctx->new_table);
 		DBUG_ASSERT(index->to_be_dropped);
 
@@ -5444,20 +8191,54 @@ commit_try_norebuild(
 		}
 	}
 
-	if (!(ha_alter_info->handler_flags
-	      & Alter_inplace_info::ALTER_COLUMN_NAME)) {
-		DBUG_RETURN(false);
+	if ((ha_alter_info->handler_flags
+	     & Alter_inplace_info::ALTER_COLUMN_NAME)
+	    && innobase_rename_columns_try(ha_alter_info, ctx, old_table,
+					   trx, table_name)) {
+		DBUG_RETURN(true);
 	}
 
-	DBUG_RETURN(innobase_rename_columns_try(ha_alter_info, ctx,
-						old_table, trx, table_name));
+	if ((ha_alter_info->handler_flags
+	     & Alter_inplace_info::ALTER_COLUMN_EQUAL_PACK_LENGTH)
+	    && innobase_enlarge_columns_try(ha_alter_info, old_table,
+					    ctx->old_table, trx, table_name)) {
+		DBUG_RETURN(true);
+	}
+
+#ifdef MYSQL_RENAME_INDEX
+	if ((ha_alter_info->handler_flags
+	     & Alter_inplace_info::RENAME_INDEX)
+	    && rename_indexes_in_data_dictionary(ctx, ha_alter_info, trx)) {
+		DBUG_RETURN(true);
+	}
+#endif /* MYSQL_RENAME_INDEX */
+
+#ifdef MYSQL_VIRTUAL_COLUMNS
+	if ((ha_alter_info->handler_flags
+	     & Alter_inplace_info::DROP_VIRTUAL_COLUMN)
+	    && innobase_drop_virtual_try(
+		    ha_alter_info, altered_table, old_table,
+		    ctx->old_table, trx)) {
+		DBUG_RETURN(true);
+	}
+
+	if ((ha_alter_info->handler_flags
+	     & Alter_inplace_info::ADD_VIRTUAL_COLUMN)
+	    && innobase_add_virtual_try(
+		    ha_alter_info, altered_table, old_table,
+		    ctx->old_table, trx)) {
+		DBUG_RETURN(true);
+	}
+#endif /* MYSQL_VIRTUAL_COLUMNS */
+
+	DBUG_RETURN(false);
 }
 
 /** Commit the changes to the data dictionary cache
 after a successful commit_try_norebuild() call.
-@param ctx		In-place ALTER TABLE context
-@param table		the TABLE before the ALTER
-@param trx		Data dictionary transaction object
+@param ctx In-place ALTER TABLE context
+@param table the TABLE before the ALTER
+@param trx Data dictionary transaction object
 (will be started and committed)
 @return whether all replacements were found for dropped indexes */
 inline MY_ATTRIBUTE((nonnull, warn_unused_result))
@@ -5474,26 +8255,36 @@ commit_cache_norebuild(
 
 	DBUG_ASSERT(!ctx->need_rebuild());
 
-	std::set<ulint>			drop_list;
-	std::set<ulint>::const_iterator	col_it;
+	col_set			drop_list;
+	col_set			v_drop_list;
+	col_set::const_iterator col_it;
 
 	/* Check if the column, part of an index to be dropped is part of any
 	other index which is not being dropped. If it so, then set the ord_part
 	of the column to 0. */
-	get_col_list_to_be_dropped(ctx, drop_list);
+	get_col_list_to_be_dropped(ctx, drop_list, v_drop_list);
 
-	for(col_it = drop_list.begin(); col_it != drop_list.end(); ++col_it) {
-		if (!check_col_exists_in_indexes(ctx->new_table, *col_it)) {
+	for (col_it = drop_list.begin(); col_it != drop_list.end(); ++col_it) {
+		if (!check_col_exists_in_indexes(ctx->new_table,
+						 *col_it, false)) {
 			ctx->new_table->cols[*col_it].ord_part = 0;
 		}
 	}
 
+	for (col_it = v_drop_list.begin();
+	     col_it != v_drop_list.end(); ++col_it) {
+		if (!check_col_exists_in_indexes(ctx->new_table,
+						 *col_it, true)) {
+			ctx->new_table->v_cols[*col_it].m_col.ord_part = 0;
+		}
+	}
+
 	for (ulint i = 0; i < ctx->num_to_add_index; i++) {
 		dict_index_t*	index = ctx->add_index[i];
 		DBUG_ASSERT(dict_index_get_online_status(index)
 			    == ONLINE_INDEX_COMPLETE);
-		DBUG_ASSERT(*index->name == TEMP_INDEX_PREFIX);
-		index->name++;
+		DBUG_ASSERT(!index->is_committed());
+		index->set_committed(true);
 	}
 
 	if (ctx->num_to_drop_index) {
@@ -5508,7 +8299,7 @@ commit_cache_norebuild(
 
 		for (ulint i = 0; i < ctx->num_to_drop_index; i++) {
 			dict_index_t*	index = ctx->drop_index[i];
-			DBUG_ASSERT(*index->name != TEMP_INDEX_PREFIX);
+			DBUG_ASSERT(index->is_committed());
 			DBUG_ASSERT(index->table == ctx->new_table);
 			DBUG_ASSERT(index->to_be_dropped);
 
@@ -5532,7 +8323,7 @@ commit_cache_norebuild(
 
 		for (ulint i = 0; i < ctx->num_to_drop_index; i++) {
 			dict_index_t*	index = ctx->drop_index[i];
-			DBUG_ASSERT(*index->name != TEMP_INDEX_PREFIX);
+			DBUG_ASSERT(index->is_committed());
 			DBUG_ASSERT(index->table == ctx->new_table);
 
 			if (index->type & DICT_FTS) {
@@ -5549,17 +8340,25 @@ commit_cache_norebuild(
 		trx_commit_for_mysql(trx);
 	}
 
+	ctx->new_table->fts_doc_id_index
+		= ctx->new_table->fts
+		? dict_table_get_index_on_name(
+			ctx->new_table, FTS_DOC_ID_INDEX_NAME)
+		: NULL;
+	DBUG_ASSERT((ctx->new_table->fts == NULL)
+		    == (ctx->new_table->fts_doc_id_index == NULL));
+
 	DBUG_RETURN(found);
 }
 
 /** Adjust the persistent statistics after non-rebuilding ALTER TABLE.
 Remove statistics for dropped indexes, add statistics for created indexes
 and rename statistics for renamed indexes.
-@param ha_alter_info	Data used during in-place alter
-@param ctx		In-place ALTER TABLE context
-@param altered_table	MySQL table that is being altered
-@param table_name	Table name in MySQL
-@param thd		MySQL connection
+@param ha_alter_info Data used during in-place alter
+@param ctx In-place ALTER TABLE context
+@param altered_table MySQL table that is being altered
+@param table_name Table name in MySQL
+@param thd MySQL connection
 */
 static
 void
@@ -5580,7 +8379,19 @@ alter_stats_norebuild(
 		DBUG_VOID_RETURN;
 	}
 
-	/* TODO: This will not drop the (unused) statistics for
+	/* Delete corresponding rows from the stats table. We do this
+	in a separate transaction from trx, because lock waits are not
+	allowed in a data dictionary transaction. (Lock waits are possible
+	on the statistics table, because it is directly accessible by users,
+	not covered by the dict_operation_lock.)
+
+	Because the data dictionary changes were already committed, orphaned
+	rows may be left in the statistics table if the system crashes.
+
+	FIXME: each change to the statistics tables is being committed in a
+	separate transaction, meaning that the operation is not atomic
+
+	FIXME: This will not drop the (unused) statistics for
 	FTS_DOC_ID_INDEX if it was a hidden index, dropped together
 	with the last renamining FULLTEXT index. */
 	for (i = 0; i < ha_alter_info->index_drop_count; i++) {
@@ -5595,7 +8406,7 @@ alter_stats_norebuild(
 		char	errstr[1024];
 
 		if (dict_stats_drop_index(
-			    ctx->new_table->name, key->name,
+			    ctx->new_table->name.m_name, key->name,
 			    errstr, sizeof errstr) != DB_SUCCESS) {
 			push_warning(thd,
 				     Sql_condition::WARN_LEVEL_WARN,
@@ -5603,6 +8414,31 @@ alter_stats_norebuild(
 		}
 	}
 
+#ifdef MYSQL_RENAME_INDEX
+	for (i = 0; i < ha_alter_info->index_rename_count; i++) {
+		KEY_PAIR*	pair = &ha_alter_info->index_rename_buffer[i];
+		dberr_t		err;
+
+		err = dict_stats_rename_index(ctx->new_table,
+					      pair->old_key->name,
+					      pair->new_key->name);
+
+		if (err != DB_SUCCESS) {
+			push_warning_printf(
+				thd,
+				Sql_condition::WARN_LEVEL_WARN,
+				ER_ERROR_ON_RENAME,
+				"Error renaming an index of table '%s'"
+				" from '%s' to '%s' in InnoDB persistent"
+				" statistics storage: %s",
+				table_name,
+				pair->old_key->name,
+				pair->new_key->name,
+				ut_strerr(err));
+		}
+	}
+#endif /* MYSQL_RENAME_INDEX */
+
 	for (i = 0; i < ctx->num_to_add_index; i++) {
 		dict_index_t*	index = ctx->add_index[i];
 		DBUG_ASSERT(index->table == ctx->new_table);
@@ -5619,9 +8455,9 @@ alter_stats_norebuild(
 /** Adjust the persistent statistics after rebuilding ALTER TABLE.
 Remove statistics for dropped indexes, add statistics for created indexes
 and rename statistics for renamed indexes.
-@param table		InnoDB table that was rebuilt by ALTER TABLE
-@param table_name	Table name in MySQL
-@param thd		MySQL connection
+@param table InnoDB table that was rebuilt by ALTER TABLE
+@param table_name Table name in MySQL
+@param thd MySQL connection
 */
 static
 void
@@ -5638,17 +8474,30 @@ alter_stats_rebuild(
 		DBUG_VOID_RETURN;
 	}
 
-	dberr_t	ret;
+#ifndef DBUG_OFF
+	bool	ibd_file_missing_orig = false;
+#endif /* DBUG_OFF */
 
-	ret = dict_stats_update(table, DICT_STATS_RECALC_PERSISTENT);
+	DBUG_EXECUTE_IF(
+		"ib_rename_index_fail2",
+		ibd_file_missing_orig = table->ibd_file_missing;
+		table->ibd_file_missing = TRUE;
+	);
+
+	dberr_t	ret = dict_stats_update(table, DICT_STATS_RECALC_PERSISTENT);
+
+	DBUG_EXECUTE_IF(
+		"ib_rename_index_fail2",
+		table->ibd_file_missing = ibd_file_missing_orig;
+	);
 
 	if (ret != DB_SUCCESS) {
 		push_warning_printf(
 			thd,
 			Sql_condition::WARN_LEVEL_WARN,
 			ER_ALTER_INFO,
-			"Error updating stats for table '%s' "
-			"after table rebuild: %s",
+			"Error updating stats for table '%s'"
+			" after table rebuild: %s",
 			table_name, ut_strerr(ret));
 	}
 
@@ -5673,14 +8522,14 @@ during this operation will be the same as for
 inplace_alter_table() and thus might be higher than during
 prepare_inplace_alter_table(). (E.g concurrent writes were
 blocked during prepare, but might not be during commit).
-@param altered_table	TABLE object for new version of table.
-@param ha_alter_info	Structure describing changes to be done
+@param altered_table TABLE object for new version of table.
+@param ha_alter_info Structure describing changes to be done
 by ALTER TABLE and holding data used during in-place alter.
-@param commit		true => Commit, false => Rollback.
-@retval true		Failure
-@retval false		Success
+@param commit true => Commit, false => Rollback.
+@retval true Failure
+@retval false Success
 */
-UNIV_INTERN
+
 bool
 ha_innobase::commit_inplace_alter_table(
 /*====================================*/
@@ -5688,32 +8537,40 @@ ha_innobase::commit_inplace_alter_table(
 	Alter_inplace_info*	ha_alter_info,
 	bool			commit)
 {
-	dberr_t				error;
-	ha_innobase_inplace_ctx*	ctx0
-		= static_cast<ha_innobase_inplace_ctx*>
+	dberr_t	error;
+	ha_innobase_inplace_ctx*ctx0;
+	struct mtr_buf_copy_t	logs;
+
+	ctx0 = static_cast<ha_innobase_inplace_ctx*>
 		(ha_alter_info->handler_ctx);
+
 #ifndef DBUG_OFF
-	uint				crash_inject_count	= 1;
-	uint				crash_fail_inject_count	= 1;
-	uint				failure_inject_count	= 1;
-#endif
+	uint	crash_inject_count	= 1;
+	uint	crash_fail_inject_count	= 1;
+	uint	failure_inject_count	= 1;
+#endif /* DBUG_OFF */
 
 	DBUG_ENTER("commit_inplace_alter_table");
 	DBUG_ASSERT(!srv_read_only_mode);
-	DBUG_ASSERT(!ctx0 || ctx0->prebuilt == prebuilt);
-	DBUG_ASSERT(!ctx0 || ctx0->old_table == prebuilt->table);
+	DBUG_ASSERT(!ctx0 || ctx0->prebuilt == m_prebuilt);
+	DBUG_ASSERT(!ctx0 || ctx0->old_table == m_prebuilt->table);
 
 	DEBUG_SYNC_C("innodb_commit_inplace_alter_table_enter");
 
 	DEBUG_SYNC_C("innodb_commit_inplace_alter_table_wait");
 
+	if (ctx0 != NULL && ctx0->m_stage != NULL) {
+		ctx0->m_stage->begin_phase_end();
+	}
+
 	if (!commit) {
 		/* A rollback is being requested. So far we may at
 		most have created some indexes. If any indexes were to
 		be dropped, they would actually be dropped in this
 		method if commit=true. */
-		DBUG_RETURN(rollback_inplace_alter_table(
-				    ha_alter_info, table, prebuilt));
+		const bool	ret = rollback_inplace_alter_table(
+			ha_alter_info, table, m_prebuilt);
+		DBUG_RETURN(ret);
 	}
 
 	if (!(ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE)) {
@@ -5731,13 +8588,13 @@ ha_innobase::commit_inplace_alter_table(
 	if (ha_alter_info->group_commit_ctx) {
 		ctx_array = ha_alter_info->group_commit_ctx;
 	} else {
-	ctx_single[0] = ctx0;
-	ctx_single[1] = NULL;
-	ctx_array = ctx_single;
+		ctx_single[0] = ctx0;
+		ctx_single[1] = NULL;
+		ctx_array = ctx_single;
 	}
 
 	DBUG_ASSERT(ctx0 == ctx_array[0]);
-	ut_ad(prebuilt->table == ctx0->old_table);
+	ut_ad(m_prebuilt->table == ctx0->old_table);
 	ha_alter_info->group_commit_ctx = NULL;
 
 	/* Free the ctx->trx of other partitions, if any. We will only
@@ -5755,12 +8612,12 @@ ha_innobase::commit_inplace_alter_table(
 		}
 	}
 
-	trx_start_if_not_started_xa(prebuilt->trx);
+	trx_start_if_not_started_xa(m_prebuilt->trx, true);
 
 	for (inplace_alter_handler_ctx** pctx = ctx_array; *pctx; pctx++) {
 		ha_innobase_inplace_ctx*	ctx
 			= static_cast<ha_innobase_inplace_ctx*>(*pctx);
-		DBUG_ASSERT(ctx->prebuilt->trx == prebuilt->trx);
+		DBUG_ASSERT(ctx->prebuilt->trx == m_prebuilt->trx);
 
 		/* Exclusively lock the table, to ensure that no other
 		transaction is holding locks on the table while we
@@ -5771,7 +8628,7 @@ ha_innobase::commit_inplace_alter_table(
 		holding InnoDB locks only, not MySQL locks. */
 
 		error = row_merge_lock_table(
-			prebuilt->trx, ctx->old_table, LOCK_X);
+			m_prebuilt->trx, ctx->old_table, LOCK_X);
 
 		if (error != DB_SUCCESS) {
 			my_error_innodb(
@@ -5780,7 +8637,7 @@ ha_innobase::commit_inplace_alter_table(
 		}
 	}
 
-	DEBUG_SYNC(user_thd, "innodb_alter_commit_after_lock_table");
+	DEBUG_SYNC(m_user_thd, "innodb_alter_commit_after_lock_table");
 
 	const bool	new_clustered	= ctx0->need_rebuild();
 	trx_t*		trx		= ctx0->trx;
@@ -5809,7 +8666,7 @@ ha_innobase::commit_inplace_alter_table(
 
 	if (!trx) {
 		DBUG_ASSERT(!new_clustered);
-		trx = innobase_trx_allocate(user_thd);
+		trx = innobase_trx_allocate(m_user_thd);
 	}
 
 	trx_start_for_ddl(trx, TRX_DICT_OP_INDEX);
@@ -5817,6 +8674,8 @@ ha_innobase::commit_inplace_alter_table(
 	or lock waits can happen in it during the data dictionary operation. */
 	row_mysql_lock_data_dictionary(trx);
 
+	ut_ad(log_append_on_checkpoint(NULL) == NULL);
+
 	/* Prevent the background statistics collection from accessing
 	the tables. */
 	for (;;) {
@@ -5861,7 +8720,7 @@ ha_innobase::commit_inplace_alter_table(
 
 		if (ctx->need_rebuild()) {
 			ctx->tmp_name = dict_mem_create_temporary_tablename(
-				ctx->heap, ctx->new_table->name,
+				ctx->heap, ctx->new_table->name.m_name,
 				ctx->new_table->id);
 
 			fail = commit_try_rebuild(
@@ -5869,7 +8728,7 @@ ha_innobase::commit_inplace_alter_table(
 				trx, table_share->table_name.str);
 		} else {
 			fail = commit_try_norebuild(
-				ha_alter_info, ctx, table, trx,
+				ha_alter_info, ctx, altered_table, table, trx,
 				table_share->table_name.str);
 		}
 		DBUG_INJECT_CRASH("ib_commit_inplace_crash",
@@ -5878,8 +8737,11 @@ ha_innobase::commit_inplace_alter_table(
 		{
 			/* Generate a dynamic dbug text. */
 			char buf[32];
-			ut_snprintf(buf, sizeof buf, "ib_commit_inplace_fail_%u",
+
+			ut_snprintf(buf, sizeof buf,
+				    "ib_commit_inplace_fail_%u",
 				    failure_inject_count++);
+
 			DBUG_EXECUTE_IF(buf,
 					my_error(ER_INTERNAL_ERROR, MYF(0),
 						 "Injected error!");
@@ -5916,7 +8778,7 @@ ha_innobase::commit_inplace_alter_table(
 				/* Out of memory or a problem will occur
 				when renaming files. */
 				fail = true;
-				my_error_innodb(error, ctx->old_table->name,
+				my_error_innodb(error, ctx->old_table->name.m_name,
 						ctx->old_table->flags);
 			}
 			DBUG_INJECT_CRASH("ib_commit_inplace_crash",
@@ -5934,10 +8796,30 @@ ha_innobase::commit_inplace_alter_table(
 		ut_ad(!trx->fts_trx);
 
 		if (fail) {
-			mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
+			mtr.set_log_mode(MTR_LOG_NO_REDO);
 			mtr_commit(&mtr);
 			trx_rollback_for_mysql(trx);
 		} else {
+			ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
+			ut_ad(trx_is_rseg_updated(trx));
+
+			if (mtr.get_log()->size() > 0) {
+				ut_ad(*mtr.get_log()->front()->begin()
+				      == MLOG_FILE_RENAME2);
+
+				/* Append the MLOG_FILE_RENAME2
+				records on checkpoint, as a separate
+				mini-transaction before the one that
+				contains the MLOG_CHECKPOINT marker. */
+				static const byte	multi
+					= MLOG_MULTI_REC_END;
+
+				mtr.get_log()->for_each_block(logs);
+				logs.m_buf.push(&multi, sizeof multi);
+
+				log_append_on_checkpoint(&logs.m_buf);
+			}
+
 			/* The following call commits the
 			mini-transaction, making the data dictionary
 			transaction committed at mtr.end_lsn. The
@@ -5945,8 +8827,6 @@ ha_innobase::commit_inplace_alter_table(
 			log_buffer_flush_to_disk() returns. In the
 			logical sense the commit in the file-based
 			data structures happens here. */
-			ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
-			ut_ad(trx->insert_undo || trx->update_undo);
 
 			trx_commit_low(trx, &mtr);
 		}
@@ -5956,6 +8836,7 @@ ha_innobase::commit_inplace_alter_table(
 		and the .frm files must be swapped manually by
 		the administrator. No loss of data. */
 		DBUG_EXECUTE_IF("innodb_alter_commit_crash_after_commit",
+				log_make_checkpoint_at(LSN_MAX, TRUE);
 				log_buffer_flush_to_disk();
 				DBUG_SUICIDE(););
 	}
@@ -5971,7 +8852,6 @@ ha_innobase::commit_inplace_alter_table(
 	update the in-memory structures, close some handles, release
 	temporary files, and (unless we rolled back) update persistent
 	statistics. */
-
 	for (inplace_alter_handler_ctx** pctx = ctx_array;
 	     *pctx; pctx++) {
 		ha_innobase_inplace_ctx*	ctx
@@ -5985,20 +8865,10 @@ ha_innobase::commit_inplace_alter_table(
 
 		if (fail) {
 			if (new_clustered) {
-				dict_table_close(ctx->new_table,
-						 TRUE, FALSE);
-
-#if defined UNIV_DEBUG || defined UNIV_DDL_DEBUG
-				/* Nobody should have initialized the
-				stats of the newly created table
-				yet. When this is the case, we know
-				that it has not been added for
-				background stats gathering. */
-				ut_a(!ctx->new_table->stat_initialized);
-#endif /* UNIV_DEBUG || UNIV_DDL_DEBUG */
-
 				trx_start_for_ddl(trx, TRX_DICT_OP_TABLE);
-				row_merge_drop_table(trx, ctx->new_table);
+
+				dict_table_close_and_drop(trx, ctx->new_table);
+
 				trx_commit_for_mysql(trx);
 				ctx->new_table = NULL;
 			} else {
@@ -6033,15 +8903,18 @@ ha_innobase::commit_inplace_alter_table(
 			implemented yet. */
 			ctx->old_table->to_be_dropped = true;
 
+			DBUG_PRINT("to_be_dropped",
+				   ("table: %s", ctx->old_table->name.m_name));
+
 			/* Rename the tablespace files. */
 			commit_cache_rebuild(ctx);
 
-			error = innobase_update_foreign_cache(ctx, user_thd);
+			error = innobase_update_foreign_cache(ctx, m_user_thd);
 			if (error != DB_SUCCESS) {
 				goto foreign_fail;
 			}
 		} else {
-			error = innobase_update_foreign_cache(ctx, user_thd);
+			error = innobase_update_foreign_cache(ctx, m_user_thd);
 
 			if (error != DB_SUCCESS) {
 foreign_fail:
@@ -6051,34 +8924,42 @@ foreign_fail:
 				kill and restart the server,
 				but the *.frm file has not
 				been replaced yet. */
-				my_error(ER_CANNOT_ADD_FOREIGN,
-					 MYF(0));
-				sql_print_error(
-					"InnoDB: dict_load_foreigns()"
-					" returned %u for %s",
-					(unsigned) error,
-					thd_query_string(user_thd)
-					->str);
-				ut_ad(0);
+				push_warning_printf(
+					m_user_thd,
+					Sql_condition::WARN_LEVEL_WARN,
+					ER_ALTER_INFO,
+					"InnoDB: Could not add foreign"
+					" key constraints.");
 			} else {
 				if (!commit_cache_norebuild(
 					    ctx, table, trx)) {
-					ut_a(!prebuilt->trx->check_foreigns);
+					ut_a(!m_prebuilt->trx->check_foreigns);
 				}
 
-				innobase_rename_columns_cache(
+				innobase_rename_or_enlarge_columns_cache(
 					ha_alter_info, table,
 					ctx->new_table);
+#ifdef MYSQL_RENAME_INDEX
+				rename_indexes_in_cache(ctx, ha_alter_info);
+#endif
 			}
+
 		}
+
+		dict_mem_table_free_foreign_vcol_set(ctx->new_table);
+		dict_mem_table_fill_foreign_vcol_set(ctx->new_table);
+
 		DBUG_INJECT_CRASH("ib_commit_inplace_crash",
 				  crash_inject_count++);
 	}
 
+	log_append_on_checkpoint(NULL);
+
 	/* Invalidate the index translation table. In partitioned
-	tables, there is one TABLE_SHARE (and also only one TABLE)
-	covering all partitions. */
-	share->idx_trans_tbl.index_count = 0;
+	tables, there is no share. */
+	if (m_share) {
+		m_share->idx_trans_tbl.index_count = 0;
+	}
 
 	if (trx == ctx0->trx) {
 		ctx0->trx = NULL;
@@ -6110,8 +8991,59 @@ foreign_fail:
 		DBUG_RETURN(true);
 	}
 
+	if (ctx0->num_to_drop_vcol || ctx0->num_to_add_vcol) {
+
+		if (ctx0->old_table->get_ref_count() > 1) {
+
+			row_mysql_unlock_data_dictionary(trx);
+			trx_free_for_mysql(trx);
+			my_printf_error(ER_ILLEGAL_HA, "Cannot complete the operation "
+				"because table is referenced by another connection.", MYF(0));
+			DBUG_RETURN(true);
+		}
+
+		trx_commit_for_mysql(m_prebuilt->trx);
+
+		if (btr_search_enabled) {
+			btr_search_disable(false);
+			btr_search_enable();
+		}
+
+		char	tb_name[FN_REFLEN];
+		ut_strcpy(tb_name, m_prebuilt->table->name.m_name);
+
+		tb_name[strlen(m_prebuilt->table->name.m_name)] = 0;
+
+		dict_table_close(m_prebuilt->table, true, false);
+		dict_table_remove_from_cache(m_prebuilt->table);
+		m_prebuilt->table = dict_table_open_on_name(
+			tb_name, TRUE, TRUE, DICT_ERR_IGNORE_NONE);
+
+		/* Drop outdated table stats. */
+		char	errstr[1024];
+		if (dict_stats_drop_table(
+			    m_prebuilt->table->name.m_name,
+			    errstr, sizeof(errstr))
+		    != DB_SUCCESS) {
+			push_warning_printf(
+				m_user_thd,
+				Sql_condition::WARN_LEVEL_WARN,
+				ER_ALTER_INFO,
+				"Deleting persistent statistics"
+				" for table '%s' in"
+				" InnoDB failed: %s",
+				table->s->table_name.str,
+				errstr);
+		}
+
+		row_mysql_unlock_data_dictionary(trx);
+		trx_free_for_mysql(trx);
+		MONITOR_ATOMIC_DEC(MONITOR_PENDING_ALTER_TABLE);
+		DBUG_RETURN(false);
+	}
+
 	/* Release the table locks. */
-	trx_commit_for_mysql(prebuilt->trx);
+	trx_commit_for_mysql(m_prebuilt->trx);
 
 	DBUG_EXECUTE_IF("ib_ddl_crash_after_user_trx_commit", DBUG_SUICIDE(););
 
@@ -6169,15 +9101,20 @@ foreign_fail:
 
 			char	errstr[1024];
 
-			DBUG_ASSERT(0 == strcmp(ctx->old_table->name,
+			DBUG_ASSERT(0 == strcmp(ctx->old_table->name.m_name,
 						ctx->tmp_name));
 
+			DBUG_EXECUTE_IF(
+				"ib_rename_index_fail3",
+				DBUG_SET("+d,innodb_report_deadlock");
+			);
+
 			if (dict_stats_drop_table(
-				    ctx->new_table->name,
+				    ctx->new_table->name.m_name,
 				    errstr, sizeof(errstr))
 			    != DB_SUCCESS) {
 				push_warning_printf(
-					user_thd,
+					m_user_thd,
 					Sql_condition::WARN_LEVEL_WARN,
 					ER_ALTER_INFO,
 					"Deleting persistent statistics"
@@ -6187,10 +9124,19 @@ foreign_fail:
 					errstr);
 			}
 
+			DBUG_EXECUTE_IF(
+				"ib_rename_index_fail3",
+				DBUG_SET("-d,innodb_report_deadlock");
+			);
+
 			DBUG_EXECUTE_IF("ib_ddl_crash_before_commit",
 					DBUG_SUICIDE(););
 
-			trx_t* const	user_trx = prebuilt->trx;
+			ut_ad(m_prebuilt != ctx->prebuilt
+			      || ctx == ctx0);
+			bool update_own_prebuilt =
+				(m_prebuilt == ctx->prebuilt);
+			trx_t* const	user_trx = m_prebuilt->trx;
 
 			row_prebuilt_free(ctx->prebuilt, TRUE);
 
@@ -6200,15 +9146,28 @@ foreign_fail:
 			before this is completed, some orphan tables
 			with ctx->tmp_name may be recovered. */
 			trx_start_for_ddl(trx, TRX_DICT_OP_TABLE);
-			row_merge_drop_table(trx, ctx->old_table);
+			error = row_merge_drop_table(trx, ctx->old_table);
+
+			if (error != DB_SUCCESS) {
+				ib::error() << "Inplace alter table " << ctx->old_table->name.m_name
+					    << " dropping copy of the old table failed error "
+					    << error
+					    << ". tmp_name " << (ctx->tmp_name ? ctx->tmp_name : "N/A")
+					    << " new_table " << (ctx->new_table ? ctx->new_table->name.m_name
+						    : "N/A");
+			}
+
 			trx_commit_for_mysql(trx);
 
 			/* Rebuild the prebuilt object. */
 			ctx->prebuilt = row_create_prebuilt(
 				ctx->new_table, altered_table->s->reclength);
-			trx_start_if_not_started(user_trx);
+			if (update_own_prebuilt) {
+				m_prebuilt = ctx->prebuilt;
+			}
+			trx_start_if_not_started(user_trx, true);
 			user_trx->will_lock++;
-			prebuilt->trx = user_trx;
+			m_prebuilt->trx = user_trx;
 		}
 		DBUG_INJECT_CRASH("ib_commit_inplace_crash",
 				  crash_inject_count++);
@@ -6231,7 +9190,7 @@ foreign_fail:
 
 			alter_stats_rebuild(
 				ctx->new_table, table->s->table_name.str,
-				user_thd);
+				m_user_thd);
 			DBUG_INJECT_CRASH("ib_commit_inplace_crash",
 					  crash_inject_count++);
 		}
@@ -6245,39 +9204,411 @@ foreign_fail:
 
 			alter_stats_norebuild(
 				ha_alter_info, ctx, altered_table,
-				table->s->table_name.str, user_thd);
+				table->s->table_name.str, m_user_thd);
 			DBUG_INJECT_CRASH("ib_commit_inplace_crash",
 					  crash_inject_count++);
 		}
 	}
 
+	/* We don't support compression for the system tablespace nor
+	the temporary tablespace. Only because they are shared tablespaces.
+	There is no other technical reason. */
+
+	innobase_parse_hint_from_comment(
+		m_user_thd, m_prebuilt->table, altered_table->s);
+
 	/* TODO: Also perform DROP TABLE and DROP INDEX after
 	the MDL downgrade. */
 
 #ifndef DBUG_OFF
 	dict_index_t* clust_index = dict_table_get_first_index(
-		prebuilt->table);
+		ctx0->prebuilt->table);
 	DBUG_ASSERT(!clust_index->online_log);
 	DBUG_ASSERT(dict_index_get_online_status(clust_index)
 		    == ONLINE_INDEX_COMPLETE);
 
-	for (dict_index_t* index = dict_table_get_first_index(
-		     prebuilt->table);
+	for (dict_index_t* index = clust_index;
 	     index;
 	     index = dict_table_get_next_index(index)) {
 		DBUG_ASSERT(!index->to_be_dropped);
 	}
 #endif /* DBUG_OFF */
-
 	MONITOR_ATOMIC_DEC(MONITOR_PENDING_ALTER_TABLE);
 	DBUG_RETURN(false);
 }
 
+
+/** Helper class for in-place alter, see handler.h */
+class ha_innopart_inplace_ctx : public inplace_alter_handler_ctx
+{
+/* Only used locally in this file, so have everything public for
+conveniance. */
+public:
+	/** Total number of partitions. */
+	uint				m_tot_parts;
+	/** Array of inplace contexts for all partitions. */
+	inplace_alter_handler_ctx**	ctx_array;
+	/** Array of prebuilt for all partitions. */
+	row_prebuilt_t**		prebuilt_array;
+
+	ha_innopart_inplace_ctx(THD *thd, uint tot_parts)
+		: inplace_alter_handler_ctx(),
+		m_tot_parts(tot_parts),
+		ctx_array(),
+		prebuilt_array()
+	{}
+
+	~ha_innopart_inplace_ctx()
+	{
+		if (ctx_array) {
+			for (uint i = 0; i < m_tot_parts; i++) {
+				delete ctx_array[i];
+			}
+			ut_free(ctx_array);
+		}
+		if (prebuilt_array) {
+			/* First entry is the original prebuilt! */
+			for (uint i = 1; i < m_tot_parts; i++) {
+				/* Don't close the tables. */
+				prebuilt_array[i]->table = NULL;
+				row_prebuilt_free(prebuilt_array[i], false);
+			}
+			ut_free(prebuilt_array);
+		}
+	}
+};
+
+#ifdef MYSQL_INNODB_PARTITIONING
+
+/** Check if supported inplace alter table.
+@param[in]	altered_table	Altered MySQL table.
+@param[in]	ha_alter_info	Information about inplace operations to do.
+@return	Lock level, not supported or error */
+enum_alter_inplace_result
+ha_innopart::check_if_supported_inplace_alter(
+	TABLE*			altered_table,
+	Alter_inplace_info*	ha_alter_info)
+{
+	DBUG_ENTER("ha_innopart::check_if_supported_inplace_alter");
+	DBUG_ASSERT(ha_alter_info->handler_ctx == NULL);
+
+	/* Not supporting these for partitioned tables yet! */
+
+	/* FK not yet supported. */
+	if (ha_alter_info->handler_flags
+		& (Alter_inplace_info::ADD_FOREIGN_KEY
+			| Alter_inplace_info::DROP_FOREIGN_KEY)) {
+
+		ha_alter_info->unsupported_reason = innobase_get_err_msg(
+			ER_FOREIGN_KEY_ON_PARTITIONED);
+		DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
+	}
+	/* FTS not yet supported either. */
+	if ((ha_alter_info->handler_flags
+		    & Alter_inplace_info::ADD_INDEX)) {
+
+		for (uint i = 0; i < ha_alter_info->index_add_count; i++) {
+			const KEY* key =
+				&ha_alter_info->key_info_buffer[
+					ha_alter_info->index_add_buffer[i]];
+			if (key->flags & HA_FULLTEXT) {
+				DBUG_ASSERT(!(key->flags & HA_KEYFLAG_MASK
+					      & ~(HA_FULLTEXT
+						  | HA_PACK_KEY
+						  | HA_GENERATED_KEY
+						  | HA_BINARY_PACK_KEY)));
+				ha_alter_info->unsupported_reason =
+					innobase_get_err_msg(
+					ER_FULLTEXT_NOT_SUPPORTED_WITH_PARTITIONING);
+				DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
+			}
+		}
+	}
+	/* We cannot allow INPLACE to change order of KEY partitioning fields! */
+	if ((ha_alter_info->handler_flags
+	     & Alter_inplace_info::ALTER_STORED_COLUMN_ORDER)
+	    && !m_part_info->same_key_column_order(
+				&ha_alter_info->alter_info->create_list)) {
+
+		DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
+	}
+
+	/* Cannot allow INPLACE for drop and create PRIMARY KEY if partition is
+	on Primary Key - PARTITION BY KEY() */
+	if ((ha_alter_info->handler_flags
+	     & (Alter_inplace_info::ADD_PK_INDEX
+		| Alter_inplace_info::DROP_PK_INDEX))) {
+
+		/* Check partition by key(). */
+		if ((m_part_info->part_type == HASH_PARTITION)
+		    && m_part_info->list_of_part_fields
+		    && m_part_info->part_field_list.is_empty()) {
+
+			DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
+		}
+
+		/* Check sub-partition by key(). */
+		if ((m_part_info->subpart_type == HASH_PARTITION)
+		    && m_part_info->list_of_subpart_fields
+		    && m_part_info->subpart_field_list.is_empty()) {
+
+			DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
+		}
+	}
+
+	/* Check for PK and UNIQUE should already be done when creating the
+	new table metadata.
+	(fix_partition_info/check_primary_key+check_unique_key) */
+
+	set_partition(0);
+	DBUG_RETURN(ha_innobase::check_if_supported_inplace_alter(altered_table,
+							ha_alter_info));
+}
+
+/** Prepare inplace alter table.
+Allows InnoDB to update internal structures with concurrent
+writes blocked (provided that check_if_supported_inplace_alter()
+did not return HA_ALTER_INPLACE_NO_LOCK).
+This will be invoked before inplace_alter_table().
+@param[in]	altered_table	TABLE object for new version of table.
+@param[in]	ha_alter_info	Structure describing changes to be done
+by ALTER TABLE and holding data used during in-place alter.
+@retval true Failure.
+@retval false Success. */
+bool
+ha_innopart::prepare_inplace_alter_table(
+	TABLE*			altered_table,
+	Alter_inplace_info*	ha_alter_info)
+{
+	THD* thd;
+	ha_innopart_inplace_ctx* ctx_parts;
+	bool res = true;
+	DBUG_ENTER("ha_innopart::prepare_inplace_alter_table");
+	DBUG_ASSERT(ha_alter_info->handler_ctx == NULL);
+
+	thd = ha_thd();
+
+	/* Clean up all ins/upd nodes. */
+	clear_ins_upd_nodes();
+	/* Based on Sql_alloc class, return NULL for new on failure. */
+	ctx_parts = new ha_innopart_inplace_ctx(thd, m_tot_parts);
+	if (!ctx_parts) {
+		DBUG_RETURN(HA_ALTER_ERROR);
+	}
+
+	uint ctx_array_size = sizeof(inplace_alter_handler_ctx*)
+				* (m_tot_parts + 1);
+	ctx_parts->ctx_array =
+		static_cast<inplace_alter_handler_ctx**>(
+					ut_malloc(ctx_array_size,
+					mem_key_partitioning));
+	if (!ctx_parts->ctx_array) {
+		DBUG_RETURN(HA_ALTER_ERROR);
+	}
+
+	/* Set all to NULL, including the terminating one. */
+	memset(ctx_parts->ctx_array, 0, ctx_array_size);
+
+	ctx_parts->prebuilt_array = static_cast<row_prebuilt_t**>(
+					ut_malloc(sizeof(row_prebuilt_t*)
+							* m_tot_parts,
+					mem_key_partitioning));
+	if (!ctx_parts->prebuilt_array) {
+		DBUG_RETURN(HA_ALTER_ERROR);
+	}
+	/* For the first partition use the current prebuilt. */
+	ctx_parts->prebuilt_array[0] = m_prebuilt;
+	/* Create new prebuilt for the rest of the partitions.
+	It is needed for the current implementation of
+	ha_innobase::commit_inplace_alter_table(). */
+	for (uint i = 1; i < m_tot_parts; i++) {
+		row_prebuilt_t* tmp_prebuilt;
+		tmp_prebuilt = row_create_prebuilt(
+					m_part_share->get_table_part(i),
+					table_share->reclength);
+		/* Use same trx as original prebuilt. */
+		tmp_prebuilt->trx = m_prebuilt->trx;
+		ctx_parts->prebuilt_array[i] = tmp_prebuilt;
+	}
+
+	const char*	save_tablespace =
+		ha_alter_info->create_info->tablespace;
+
+	const char*	save_data_file_name =
+		ha_alter_info->create_info->data_file_name;
+
+	for (uint i = 0; i < m_tot_parts; i++) {
+		m_prebuilt = ctx_parts->prebuilt_array[i];
+		m_prebuilt_ptr = ctx_parts->prebuilt_array + i;
+		ha_alter_info->handler_ctx = ctx_parts->ctx_array[i];
+		set_partition(i);
+
+		/* Set the tablespace and data_file_name value of the
+		alter_info to the tablespace value and data_file_name
+		value that was existing for the partition originally,
+		so that for ALTER TABLE the tablespace clause in create
+		option is ignored for existing partitions, and later
+		set it back to its old value */
+
+		ha_alter_info->create_info->tablespace =
+			m_prebuilt->table->tablespace;
+		ha_alter_info->create_info->data_file_name =
+			m_prebuilt->table->data_dir_path;
+
+		res = ha_innobase::prepare_inplace_alter_table(altered_table,
+							ha_alter_info);
+		update_partition(i);
+		ctx_parts->ctx_array[i] = ha_alter_info->handler_ctx;
+		if (res) {
+			break;
+		}
+	}
+	m_prebuilt = ctx_parts->prebuilt_array[0];
+	m_prebuilt_ptr = &m_prebuilt;
+	ha_alter_info->handler_ctx = ctx_parts;
+	ha_alter_info->group_commit_ctx = ctx_parts->ctx_array;
+	ha_alter_info->create_info->tablespace = save_tablespace;
+	ha_alter_info->create_info->data_file_name = save_data_file_name;
+	DBUG_RETURN(res);
+}
+
+/** Inplace alter table.
+Alter the table structure in-place with operations
+specified using Alter_inplace_info.
+The level of concurrency allowed during this operation depends
+on the return value from check_if_supported_inplace_alter().
+@param[in]	altered_table	TABLE object for new version of table.
+@param[in]	ha_alter_info	Structure describing changes to be done
+by ALTER TABLE and holding data used during in-place alter.
+@retval true Failure.
+@retval false Success. */
+bool
+ha_innopart::inplace_alter_table(
+	TABLE*			altered_table,
+	Alter_inplace_info*	ha_alter_info)
+{
+	bool res = true;
+	ha_innopart_inplace_ctx* ctx_parts;
+
+	ctx_parts = static_cast<ha_innopart_inplace_ctx*>(
+					ha_alter_info->handler_ctx);
+	for (uint i = 0; i < m_tot_parts; i++) {
+		m_prebuilt = ctx_parts->prebuilt_array[i];
+		ha_alter_info->handler_ctx = ctx_parts->ctx_array[i];
+		set_partition(i);
+		res = ha_innobase::inplace_alter_table(altered_table,
+						ha_alter_info);
+		ut_ad(ctx_parts->ctx_array[i] == ha_alter_info->handler_ctx);
+		ctx_parts->ctx_array[i] = ha_alter_info->handler_ctx;
+		if (res) {
+			break;
+		}
+	}
+	m_prebuilt = ctx_parts->prebuilt_array[0];
+	ha_alter_info->handler_ctx = ctx_parts;
+	return(res);
+}
+
+/** Commit or rollback inplace alter table.
+Commit or rollback the changes made during
+prepare_inplace_alter_table() and inplace_alter_table() inside
+the storage engine. Note that the allowed level of concurrency
+during this operation will be the same as for
+inplace_alter_table() and thus might be higher than during
+prepare_inplace_alter_table(). (E.g concurrent writes were
+blocked during prepare, but might not be during commit).
+@param[in]	altered_table	TABLE object for new version of table.
+@param[in]	ha_alter_info	Structure describing changes to be done
+by ALTER TABLE and holding data used during in-place alter.
+@param[in]	commit		true => Commit, false => Rollback.
+@retval true Failure.
+@retval false Success. */
+bool
+ha_innopart::commit_inplace_alter_table(
+	TABLE*			altered_table,
+	Alter_inplace_info*	ha_alter_info,
+	bool			commit)
+{
+	bool res = false;
+	ha_innopart_inplace_ctx* ctx_parts;
+
+	ctx_parts = static_cast<ha_innopart_inplace_ctx*>(
+					ha_alter_info->handler_ctx);
+	ut_ad(ctx_parts);
+	ut_ad(ctx_parts->prebuilt_array);
+	ut_ad(ctx_parts->prebuilt_array[0] == m_prebuilt);
+	if (commit) {
+		/* Commit is done through first partition (group commit). */
+		ut_ad(ha_alter_info->group_commit_ctx == ctx_parts->ctx_array);
+		ha_alter_info->handler_ctx = ctx_parts->ctx_array[0];
+		set_partition(0);
+		res = ha_innobase::commit_inplace_alter_table(altered_table,
+							ha_alter_info,
+							commit);
+		ut_ad(res || !ha_alter_info->group_commit_ctx);
+		goto end;
+	}
+	/* Rollback is done for each partition. */
+	for (uint i = 0; i < m_tot_parts; i++) {
+		m_prebuilt = ctx_parts->prebuilt_array[i];
+		ha_alter_info->handler_ctx = ctx_parts->ctx_array[i];
+		set_partition(i);
+		if (ha_innobase::commit_inplace_alter_table(altered_table,
+						ha_alter_info, commit)) {
+			res = true;
+		}
+		ut_ad(ctx_parts->ctx_array[i] == ha_alter_info->handler_ctx);
+		ctx_parts->ctx_array[i] = ha_alter_info->handler_ctx;
+	}
+end:
+	/* Move the ownership of the new tables back to
+	the m_part_share. */
+	ha_innobase_inplace_ctx*	ctx;
+	for (uint i = 0; i < m_tot_parts; i++) {
+		/* TODO: Fix to only use one prebuilt (i.e. make inplace
+		alter partition aware instead of using multiple prebuilt
+		copies... */
+		ctx = static_cast<ha_innobase_inplace_ctx*>(
+					ctx_parts->ctx_array[i]);
+		if (ctx) {
+			m_part_share->set_table_part(i, ctx->prebuilt->table);
+			ctx->prebuilt->table = NULL;
+			ctx_parts->prebuilt_array[i] = ctx->prebuilt;
+		}
+	}
+	/* The above juggling of prebuilt must be reset here. */
+	m_prebuilt = ctx_parts->prebuilt_array[0];
+	m_prebuilt->table = m_part_share->get_table_part(0);
+	ha_alter_info->handler_ctx = ctx_parts;
+	return(res);
+}
+
+/** Notify the storage engine that the table structure (.frm) has
+been updated.
+
+ha_partition allows inplace operations that also upgrades the engine
+if it supports partitioning natively. So if this is the case then
+we will remove the .par file since it is not used with ha_innopart
+(we use the internal data dictionary instead). */
+void
+ha_innopart::notify_table_changed()
+{
+	char	tmp_par_path[FN_REFLEN + 1];
+	strxnmov(tmp_par_path, FN_REFLEN, table->s->normalized_path.str,
+		".par", NullS);
+
+	if (my_access(tmp_par_path, W_OK) == 0)
+	{
+		my_delete(tmp_par_path, MYF(0));
+	}
+}
+#endif /* MYSQL_INNODB_PARTITIONING */
+
 /**
-@param thd - the session
-@param start_value - the lower bound
-@param max_value - the upper bound (inclusive) */
-UNIV_INTERN
+@param thd the session
+@param start_value the lower bound
+@param max_value the upper bound (inclusive) */
+
 ib_sequence_t::ib_sequence_t(
 	THD*		thd,
 	ulonglong	start_value,
@@ -6314,7 +9645,7 @@ ib_sequence_t::ib_sequence_t(
 /**
 Postfix increment
 @return the next value to insert */
-UNIV_INTERN
+
 ulonglong
 ib_sequence_t::operator++(int) UNIV_NOTHROW
 {
diff --git a/storage/innobase/handler/handler0alter_innopart.cc b/storage/innobase/handler/handler0alter_innopart.cc
new file mode 100644
index 00000000000..0f2d5c7e576
--- /dev/null
+++ b/storage/innobase/handler/handler0alter_innopart.cc
@@ -0,0 +1,307 @@
+/* JAN: TODO: MySQL 5.7 InnoDB partitioning. */
+
+/** Prepare inplace alter table.
+Allows InnoDB to update internal structures with concurrent
+writes blocked (provided that check_if_supported_inplace_alter()
+did not return HA_ALTER_INPLACE_NO_LOCK).
+This will be invoked before inplace_alter_table().
+@param[in]	altered_table	TABLE object for new version of table.
+@param[in]	ha_alter_info	Structure describing changes to be done
+by ALTER TABLE and holding data used during in-place alter.
+@retval true Failure.
+@retval false Success. */
+bool
+ha_innopart::prepare_inplace_alter_table(
+	TABLE*			altered_table,
+	Alter_inplace_info*	ha_alter_info)
+{
+	THD* thd;
+	ha_innopart_inplace_ctx* ctx_parts;
+	bool res = true;
+	DBUG_ENTER("ha_innopart::prepare_inplace_alter_table");
+	DBUG_ASSERT(ha_alter_info->handler_ctx == NULL);
+
+	thd = ha_thd();
+
+	/* Clean up all ins/upd nodes. */
+	clear_ins_upd_nodes();
+	/* Based on Sql_alloc class, return NULL for new on failure. */
+	ctx_parts = new ha_innopart_inplace_ctx(thd, m_tot_parts);
+	if (!ctx_parts) {
+		DBUG_RETURN(HA_ALTER_ERROR);
+	}
+
+	uint ctx_array_size = sizeof(inplace_alter_handler_ctx*)
+				* (m_tot_parts + 1);
+	ctx_parts->ctx_array =
+		static_cast<inplace_alter_handler_ctx**>(
+					ut_malloc(ctx_array_size,
+					mem_key_partitioning));
+	if (!ctx_parts->ctx_array) {
+		DBUG_RETURN(HA_ALTER_ERROR);
+	}
+
+	/* Set all to NULL, including the terminating one. */
+	memset(ctx_parts->ctx_array, 0, ctx_array_size);
+
+	ctx_parts->prebuilt_array = static_cast<row_prebuilt_t**>(
+					ut_malloc(sizeof(row_prebuilt_t*)
+							* m_tot_parts,
+					mem_key_partitioning));
+	if (!ctx_parts->prebuilt_array) {
+		DBUG_RETURN(HA_ALTER_ERROR);
+	}
+	/* For the first partition use the current prebuilt. */
+	ctx_parts->prebuilt_array[0] = m_prebuilt;
+	/* Create new prebuilt for the rest of the partitions.
+	It is needed for the current implementation of
+	ha_innobase::commit_inplace_alter_table(). */
+	for (uint i = 1; i < m_tot_parts; i++) {
+		row_prebuilt_t* tmp_prebuilt;
+		tmp_prebuilt = row_create_prebuilt(
+					m_part_share->get_table_part(i),
+					table_share->reclength);
+		/* Use same trx as original prebuilt. */
+		tmp_prebuilt->trx = m_prebuilt->trx;
+		ctx_parts->prebuilt_array[i] = tmp_prebuilt;
+	}
+
+	for (uint i = 0; i < m_tot_parts; i++) {
+		m_prebuilt = ctx_parts->prebuilt_array[i];
+		m_prebuilt_ptr = ctx_parts->prebuilt_array + i;
+		ha_alter_info->handler_ctx = ctx_parts->ctx_array[i];
+		set_partition(i);
+		res = ha_innobase::prepare_inplace_alter_table(altered_table,
+							ha_alter_info);
+		update_partition(i);
+		ctx_parts->ctx_array[i] = ha_alter_info->handler_ctx;
+		if (res) {
+			break;
+		}
+	}
+	m_prebuilt = ctx_parts->prebuilt_array[0];
+	m_prebuilt_ptr = &m_prebuilt;
+	ha_alter_info->handler_ctx = ctx_parts;
+	ha_alter_info->group_commit_ctx = ctx_parts->ctx_array;
+	DBUG_RETURN(res);
+}
+
+/** Inplace alter table.
+Alter the table structure in-place with operations
+specified using Alter_inplace_info.
+The level of concurrency allowed during this operation depends
+on the return value from check_if_supported_inplace_alter().
+@param[in]	altered_table	TABLE object for new version of table.
+@param[in]	ha_alter_info	Structure describing changes to be done
+by ALTER TABLE and holding data used during in-place alter.
+@retval true Failure.
+@retval false Success. */
+bool
+ha_innopart::inplace_alter_table(
+	TABLE*			altered_table,
+	Alter_inplace_info*	ha_alter_info)
+{
+	bool res = true;
+	ha_innopart_inplace_ctx* ctx_parts;
+
+	ctx_parts = static_cast<ha_innopart_inplace_ctx*>(
+					ha_alter_info->handler_ctx);
+	for (uint i = 0; i < m_tot_parts; i++) {
+		m_prebuilt = ctx_parts->prebuilt_array[i];
+		ha_alter_info->handler_ctx = ctx_parts->ctx_array[i];
+		set_partition(i);
+		res = ha_innobase::inplace_alter_table(altered_table,
+						ha_alter_info);
+		ut_ad(ctx_parts->ctx_array[i] == ha_alter_info->handler_ctx);
+		ctx_parts->ctx_array[i] = ha_alter_info->handler_ctx;
+		if (res) {
+			break;
+		}
+	}
+	m_prebuilt = ctx_parts->prebuilt_array[0];
+	ha_alter_info->handler_ctx = ctx_parts;
+	return(res);
+}
+
+/** Commit or rollback inplace alter table.
+Commit or rollback the changes made during
+prepare_inplace_alter_table() and inplace_alter_table() inside
+the storage engine. Note that the allowed level of concurrency
+during this operation will be the same as for
+inplace_alter_table() and thus might be higher than during
+prepare_inplace_alter_table(). (E.g concurrent writes were
+blocked during prepare, but might not be during commit).
+@param[in]	altered_table	TABLE object for new version of table.
+@param[in]	ha_alter_info	Structure describing changes to be done
+by ALTER TABLE and holding data used during in-place alter.
+@param[in]	commit		true => Commit, false => Rollback.
+@retval true Failure.
+@retval false Success. */
+bool
+ha_innopart::commit_inplace_alter_table(
+	TABLE*			altered_table,
+	Alter_inplace_info*	ha_alter_info,
+	bool			commit)
+{
+	bool res = false;
+	ha_innopart_inplace_ctx* ctx_parts;
+
+	ctx_parts = static_cast<ha_innopart_inplace_ctx*>(
+					ha_alter_info->handler_ctx);
+	ut_ad(ctx_parts);
+	ut_ad(ctx_parts->prebuilt_array);
+	ut_ad(ctx_parts->prebuilt_array[0] == m_prebuilt);
+	if (commit) {
+		/* Commit is done through first partition (group commit). */
+		ut_ad(ha_alter_info->group_commit_ctx == ctx_parts->ctx_array);
+		ha_alter_info->handler_ctx = ctx_parts->ctx_array[0];
+		set_partition(0);
+		res = ha_innobase::commit_inplace_alter_table(altered_table,
+							ha_alter_info,
+							commit);
+		ut_ad(res || !ha_alter_info->group_commit_ctx);
+		goto end;
+	}
+	/* Rollback is done for each partition. */
+	for (uint i = 0; i < m_tot_parts; i++) {
+		m_prebuilt = ctx_parts->prebuilt_array[i];
+		ha_alter_info->handler_ctx = ctx_parts->ctx_array[i];
+		set_partition(i);
+		if (ha_innobase::commit_inplace_alter_table(altered_table,
+						ha_alter_info, commit)) {
+			res = true;
+		}
+		ut_ad(ctx_parts->ctx_array[i] == ha_alter_info->handler_ctx);
+		ctx_parts->ctx_array[i] = ha_alter_info->handler_ctx;
+	}
+end:
+	/* Move the ownership of the new tables back to
+	the m_part_share. */
+	ha_innobase_inplace_ctx*	ctx;
+	for (uint i = 0; i < m_tot_parts; i++) {
+		/* TODO: Fix to only use one prebuilt (i.e. make inplace
+		alter partition aware instead of using multiple prebuilt
+		copies... */
+		ctx = static_cast<ha_innobase_inplace_ctx*>(
+					ctx_parts->ctx_array[i]);
+		if (ctx) {
+			m_part_share->set_table_part(i, ctx->prebuilt->table);
+			ctx->prebuilt->table = NULL;
+			ctx_parts->prebuilt_array[i] = ctx->prebuilt;
+		}
+	}
+	/* The above juggling of prebuilt must be reset here. */
+	m_prebuilt = ctx_parts->prebuilt_array[0];
+	m_prebuilt->table = m_part_share->get_table_part(0);
+	ha_alter_info->handler_ctx = ctx_parts;
+	return(res);
+}
+
+/** Notify the storage engine that the table structure (.frm) has
+been updated.
+
+ha_partition allows inplace operations that also upgrades the engine
+if it supports partitioning natively. So if this is the case then
+we will remove the .par file since it is not used with ha_innopart
+(we use the internal data dictionary instead). */
+void
+ha_innopart::notify_table_changed()
+{
+	char	tmp_par_path[FN_REFLEN + 1];
+	strxnmov(tmp_par_path, FN_REFLEN, table->s->normalized_path.str,
+		".par", NullS);
+
+	if (my_access(tmp_par_path, W_OK) == 0)
+	{
+		my_delete(tmp_par_path, MYF(0));
+	}
+}
+
+/** Check if supported inplace alter table.
+@param[in]	altered_table	Altered MySQL table.
+@param[in]	ha_alter_info	Information about inplace operations to do.
+@return	Lock level, not supported or error */
+enum_alter_inplace_result
+ha_innopart::check_if_supported_inplace_alter(
+	TABLE*			altered_table,
+	Alter_inplace_info*	ha_alter_info)
+{
+	DBUG_ENTER("ha_innopart::check_if_supported_inplace_alter");
+	DBUG_ASSERT(ha_alter_info->handler_ctx == NULL);
+
+	/* Not supporting these for partitioned tables yet! */
+
+	/* FK not yet supported. */
+	if (ha_alter_info->handler_flags
+		& (Alter_inplace_info::ADD_FOREIGN_KEY
+			| Alter_inplace_info::DROP_FOREIGN_KEY)) {
+
+		ha_alter_info->unsupported_reason = innobase_get_err_msg(
+			ER_FOREIGN_KEY_ON_PARTITIONED);
+		DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
+	}
+	/* FTS not yet supported either. */
+	if ((ha_alter_info->handler_flags
+		    & Alter_inplace_info::ADD_INDEX)) {
+
+		for (uint i = 0; i < ha_alter_info->index_add_count; i++) {
+			const KEY* key =
+				&ha_alter_info->key_info_buffer[
+					ha_alter_info->index_add_buffer[i]];
+			if (key->flags & HA_FULLTEXT) {
+				DBUG_ASSERT(!(key->flags & HA_KEYFLAG_MASK
+					      & ~(HA_FULLTEXT
+						  | HA_PACK_KEY
+						  | HA_GENERATED_KEY
+						  | HA_BINARY_PACK_KEY)));
+				ha_alter_info->unsupported_reason =
+					innobase_get_err_msg(
+					ER_FULLTEXT_NOT_SUPPORTED_WITH_PARTITIONING);
+				DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
+			}
+		}
+	}
+	/* We cannot allow INPLACE to change order of KEY partitioning fields! */
+	if ((ha_alter_info->handler_flags
+	     & Alter_inplace_info::ALTER_STORED_COLUMN_ORDER)
+	    && !m_part_info->same_key_column_order(
+				&ha_alter_info->alter_info->create_list)) {
+
+		DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
+	}
+
+	/* Cannot allow INPLACE for drop and create PRIMARY KEY if partition is
+	on Primary Key - PARTITION BY KEY() */
+	if ((ha_alter_info->handler_flags
+	     & (Alter_inplace_info::ADD_PK_INDEX
+		| Alter_inplace_info::DROP_PK_INDEX))) {
+
+		/* Check partition by key(). */
+		if ((m_part_info->part_type == HASH_PARTITION)
+		    && m_part_info->list_of_part_fields
+		    && m_part_info->part_field_list.is_empty()) {
+
+			DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
+		}
+
+		/* Check sub-partition by key(). */
+		if ((m_part_info->subpart_type == HASH_PARTITION)
+		    && m_part_info->list_of_subpart_fields
+		    && m_part_info->subpart_field_list.is_empty()) {
+
+			DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
+		}
+	}
+
+	/* Check for PK and UNIQUE should already be done when creating the
+	new table metadata.
+	(fix_partition_info/check_primary_key+check_unique_key) */
+
+	set_partition(0);
+	enum_alter_inplace_result res =
+		ha_innobase::check_if_supported_inplace_alter(altered_table,
+							ha_alter_info);
+
+	DBEUG_RETURN(res);
+}
+
diff --git a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc
index d1e6e3ed808..c7bfea4476b 100644
--- a/storage/innobase/handler/i_s.cc
+++ b/storage/innobase/handler/i_s.cc
@@ -25,20 +25,16 @@ Created July 18, 2007 Vasil Dimov
 Modified Dec 29, 2014 Jan Lindström (Added sys_semaphore_waits)
 *******************************************************/
 
+#include "ha_prototypes.h"
+#include <mysql_version.h>
+#include <field.h>
 #include "univ.i"
 
-#include <mysqld_error.h>
 #include <sql_acl.h>
+#include <sql_show.h>
+#include <sql_time.h>
 
-#include <m_ctype.h>
-#include <hash.h>
-#include <myisampack.h>
-#include <mysys_err.h>
-#include <my_sys.h>
 #include "i_s.h"
-#include <sql_plugin.h>
-#include <innodb_priv.h>
-
 #include "btr0pcur.h"
 #include "btr0types.h"
 #include "dict0dict.h"
@@ -48,7 +44,6 @@ Modified Dec 29, 2014 Jan Lindström (Added sys_semaphore_waits)
 #include "ibuf0ibuf.h"
 #include "dict0mem.h"
 #include "dict0types.h"
-#include "ha_prototypes.h"
 #include "srv0start.h"
 #include "trx0i_s.h"
 #include "trx0trx.h"
@@ -63,6 +58,9 @@ Modified Dec 29, 2014 Jan Lindström (Added sys_semaphore_waits)
 #include "sync0arr.h"
 #include "fil0fil.h"
 #include "fil0crypt.h"
+#include "fsp0sysspace.h"
+#include "ut0new.h"
+#include "dict0crea.h"
 
 /** structure associates a name string with a file page type and/or buffer
 page state. */
@@ -72,17 +70,28 @@ struct buf_page_desc_t{
 	ulint		type_value;	/*!< Page type or page state */
 };
 
-/** Change buffer B-tree page */
-#define	I_S_PAGE_TYPE_IBUF		(FIL_PAGE_TYPE_LAST + 1)
-
-/** Any states greater than I_S_PAGE_TYPE_IBUF would be treated as
-unknown. */
-#define	I_S_PAGE_TYPE_UNKNOWN		(I_S_PAGE_TYPE_IBUF + 1)
-
 /** We also define I_S_PAGE_TYPE_INDEX as the Index Page's position
 in i_s_page_type[] array */
 #define I_S_PAGE_TYPE_INDEX		1
 
+/** Any unassigned FIL_PAGE_TYPE will be treated as unknown. */
+#define	I_S_PAGE_TYPE_UNKNOWN		FIL_PAGE_TYPE_UNKNOWN
+
+/** R-tree index page */
+#define	I_S_PAGE_TYPE_RTREE		(FIL_PAGE_TYPE_LAST + 1)
+
+/** Change buffer B-tree page */
+#define	I_S_PAGE_TYPE_IBUF		(FIL_PAGE_TYPE_LAST + 2)
+
+#define I_S_PAGE_TYPE_LAST		I_S_PAGE_TYPE_IBUF
+
+#define I_S_PAGE_TYPE_BITS		4
+
+/* Check if we can hold all page types */
+#if I_S_PAGE_TYPE_LAST >= 1 << I_S_PAGE_TYPE_BITS
+# error i_s_page_type[] is too large
+#endif
+
 /** Name string for File Page Types */
 static buf_page_desc_t	i_s_page_type[] = {
 	{"ALLOCATED", FIL_PAGE_TYPE_ALLOCATED},
@@ -98,16 +107,13 @@ static buf_page_desc_t	i_s_page_type[] = {
 	{"BLOB", FIL_PAGE_TYPE_BLOB},
 	{"COMPRESSED_BLOB", FIL_PAGE_TYPE_ZBLOB},
 	{"COMPRESSED_BLOB2", FIL_PAGE_TYPE_ZBLOB2},
+	{"UNKNOWN", I_S_PAGE_TYPE_UNKNOWN},
+	{"RTREE_INDEX", I_S_PAGE_TYPE_RTREE},
 	{"IBUF_INDEX", I_S_PAGE_TYPE_IBUF},
 	{"PAGE COMPRESSED", FIL_PAGE_PAGE_COMPRESSED},
-	{"UNKNOWN", I_S_PAGE_TYPE_UNKNOWN}
+	{"PAGE COMPRESSED AND ENCRYPTED", FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED},
 };
 
-/* Check if we can hold all page type in a 4 bit value */
-#if I_S_PAGE_TYPE_UNKNOWN > 1<<4
-# error "i_s_page_type[] is too large"
-#endif
-
 /** This structure defines information we will fetch from pages
 currently cached in the buffer pool. It will be used to populate
 table INFORMATION_SCHEMA.INNODB_BUFFER_PAGE */
@@ -132,7 +138,7 @@ struct buf_page_info_t{
 	unsigned	zip_ssize:PAGE_ZIP_SSIZE_BITS;
 					/*!< Compressed page size */
 	unsigned	page_state:BUF_PAGE_STATE_BITS; /*!< Page state */
-	unsigned	page_type:4;	/*!< Page type */
+	unsigned	page_type:I_S_PAGE_TYPE_BITS;	/*!< Page type */
 	unsigned	num_recs:UNIV_PAGE_SIZE_SHIFT_MAX-2;
 					/*!< Number of records on Page */
 	unsigned	data_size:UNIV_PAGE_SIZE_SHIFT_MAX;
@@ -191,7 +197,7 @@ Common function to fill any of the dynamic tables:
 INFORMATION_SCHEMA.innodb_trx
 INFORMATION_SCHEMA.innodb_locks
 INFORMATION_SCHEMA.innodb_lock_waits
-@return	0 on success */
+@return 0 on success */
 static
 int
 trx_i_s_common_fill_table(
@@ -202,7 +208,7 @@ trx_i_s_common_fill_table(
 
 /*******************************************************************//**
 Unbind a dynamic INFORMATION_SCHEMA table.
-@return	0 on success */
+@return 0 on success */
 static
 int
 i_s_common_deinit(
@@ -211,7 +217,7 @@ i_s_common_deinit(
 /*******************************************************************//**
 Auxiliary function to store time_t value in MYSQL_TYPE_DATETIME
 field.
-@return	0 on success */
+@return 0 on success */
 static
 int
 field_store_time_t(
@@ -237,12 +243,15 @@ field_store_time_t(
 		memset(&my_time, 0, sizeof(my_time));
 	}
 
+	/* JAN: TODO: MySQL 5.7
+	return(field->store_time(&my_time, MYSQL_TIMESTAMP_DATETIME));
+	*/
 	return(field->store_time(&my_time));
 }
 
 /*******************************************************************//**
 Auxiliary function to store char* value in MYSQL_TYPE_STRING field.
-@return	0 on success */
+@return 0 on success */
 int
 field_store_string(
 /*===============*/
@@ -269,7 +278,7 @@ field_store_string(
 /*******************************************************************//**
 Store the name of an index in a MYSQL_TYPE_VARCHAR field.
 Handles the names of incomplete secondary indexes.
-@return	0 on success */
+@return 0 on success */
 static
 int
 field_store_index_name(
@@ -287,7 +296,7 @@ field_store_index_name(
 
 	/* Since TEMP_INDEX_PREFIX is not a valid UTF8, we need to convert
 	it to something else. */
-	if (index_name[0] == TEMP_INDEX_PREFIX) {
+	if (*index_name == *TEMP_INDEX_PREFIX_STR) {
 		char	buf[NAME_LEN + 1];
 		buf[0] = '?';
 		memcpy(buf + 1, index_name + 1, strlen(index_name));
@@ -307,8 +316,8 @@ field_store_index_name(
 
 /*******************************************************************//**
 Auxiliary function to store ulint value in MYSQL_TYPE_LONGLONG field.
-If the value is ULINT_UNDEFINED then the field it set to NULL.
-@return	0 on success */
+If the value is ULINT_UNDEFINED then the field is set to NULL.
+@return 0 on success */
 int
 field_store_ulint(
 /*==============*/
@@ -319,7 +328,7 @@ field_store_ulint(
 
 	if (n != ULINT_UNDEFINED) {
 
-		ret = field->store(static_cast<double>(n));
+		ret = field->store(n, true);
 		field->set_notnull();
 	} else {
 
@@ -555,7 +564,7 @@ static ST_FIELD_INFO	innodb_trx_fields_info[] =
 /*******************************************************************//**
 Read data from cache buffer and fill the INFORMATION_SCHEMA.innodb_trx
 table with it.
-@return	0 on success */
+@return 0 on success */
 static
 int
 fill_innodb_trx_from_cache(
@@ -620,12 +629,11 @@ fill_innodb_trx_from_cache(
 		}
 
 		/* trx_weight */
-		OK(fields[IDX_TRX_WEIGHT]->store((longlong) row->trx_weight,
-						 true));
+		OK(fields[IDX_TRX_WEIGHT]->store(row->trx_weight, true));
 
 		/* trx_mysql_thread_id */
 		OK(fields[IDX_TRX_MYSQL_THREAD_ID]->store(
-			   static_cast<double>(row->trx_mysql_thread_id)));
+			   row->trx_mysql_thread_id, true));
 
 		/* trx_query */
 		if (row->trx_query) {
@@ -646,31 +654,31 @@ fill_innodb_trx_from_cache(
 
 		/* trx_tables_in_use */
 		OK(fields[IDX_TRX_TABLES_IN_USE]->store(
-			   (longlong) row->trx_tables_in_use, true));
+			   row->trx_tables_in_use, true));
 
 		/* trx_tables_locked */
 		OK(fields[IDX_TRX_TABLES_LOCKED]->store(
-			   (longlong) row->trx_tables_locked, true));
+			   row->trx_tables_locked, true));
 
 		/* trx_lock_structs */
 		OK(fields[IDX_TRX_LOCK_STRUCTS]->store(
-			   (longlong) row->trx_lock_structs, true));
+			   row->trx_lock_structs, true));
 
 		/* trx_lock_memory_bytes */
 		OK(fields[IDX_TRX_LOCK_MEMORY_BYTES]->store(
-			   (longlong) row->trx_lock_memory_bytes, true));
+			   row->trx_lock_memory_bytes, true));
 
 		/* trx_rows_locked */
 		OK(fields[IDX_TRX_ROWS_LOCKED]->store(
-			   (longlong) row->trx_rows_locked, true));
+			   row->trx_rows_locked, true));
 
 		/* trx_rows_modified */
 		OK(fields[IDX_TRX_ROWS_MODIFIED]->store(
-			   (longlong) row->trx_rows_modified, true));
+			   row->trx_rows_modified, true));
 
 		/* trx_concurrency_tickets */
 		OK(fields[IDX_TRX_CONNCURRENCY_TICKETS]->store(
-			   (longlong) row->trx_concurrency_tickets, true));
+			   row->trx_concurrency_tickets, true));
 
 		/* trx_isolation_level */
 		OK(field_store_string(fields[IDX_TRX_ISOLATION_LEVEL],
@@ -678,11 +686,11 @@ fill_innodb_trx_from_cache(
 
 		/* trx_unique_checks */
 		OK(fields[IDX_TRX_UNIQUE_CHECKS]->store(
-			   static_cast<double>(row->trx_unique_checks)));
+			   row->trx_unique_checks, true));
 
 		/* trx_foreign_key_checks */
 		OK(fields[IDX_TRX_FOREIGN_KEY_CHECKS]->store(
-			   static_cast<double>(row->trx_foreign_key_checks)));
+			   row->trx_foreign_key_checks, true));
 
 		/* trx_last_foreign_key_error */
 		OK(field_store_string(fields[IDX_TRX_LAST_FOREIGN_KEY_ERROR],
@@ -690,20 +698,16 @@ fill_innodb_trx_from_cache(
 
 		/* trx_adaptive_hash_latched */
 		OK(fields[IDX_TRX_ADAPTIVE_HASH_LATCHED]->store(
-			   static_cast<double>(row->trx_has_search_latch)));
-
-		/* trx_adaptive_hash_timeout */
-		OK(fields[IDX_TRX_ADAPTIVE_HASH_TIMEOUT]->store(
-			   (longlong) row->trx_search_latch_timeout, true));
+			   row->trx_has_search_latch, true));
 
 		/* trx_is_read_only*/
 		OK(fields[IDX_TRX_READ_ONLY]->store(
-				(longlong) row->trx_is_read_only, true));
+			   row->trx_is_read_only, true));
 
 		/* trx_is_autocommit_non_locking */
 		OK(fields[IDX_TRX_AUTOCOMMIT_NON_LOCKING]->store(
-				(longlong) row->trx_is_autocommit_non_locking,
-				true));
+			   (longlong) row->trx_is_autocommit_non_locking,
+			   true));
 
 		OK(schema_table_store_record(thd, table));
 	}
@@ -713,7 +717,7 @@ fill_innodb_trx_from_cache(
 
 /*******************************************************************//**
 Bind the dynamic table INFORMATION_SCHEMA.innodb_trx
-@return	0 on success */
+@return 0 on success */
 static
 int
 innodb_trx_init(
@@ -886,7 +890,7 @@ static ST_FIELD_INFO	innodb_locks_fields_info[] =
 /*******************************************************************//**
 Read data from cache buffer and fill the INFORMATION_SCHEMA.innodb_locks
 table with it.
-@return	0 on success */
+@return 0 on success */
 static
 int
 fill_innodb_locks_from_cache(
@@ -941,9 +945,9 @@ fill_innodb_locks_from_cache(
 		bufend = innobase_convert_name(buf, sizeof(buf),
 					       row->lock_table,
 					       strlen(row->lock_table),
-					       thd, TRUE);
+					       thd);
 		OK(fields[IDX_LOCK_TABLE]->store(
-			buf, static_cast<uint>(bufend - buf),
+			buf, static_cast<size_t>(bufend - buf),
 			system_charset_info));
 
 		/* lock_index */
@@ -978,7 +982,7 @@ fill_innodb_locks_from_cache(
 
 /*******************************************************************//**
 Bind the dynamic table INFORMATION_SCHEMA.innodb_locks
-@return	0 on success */
+@return 0 on success */
 static
 int
 innodb_locks_init(
@@ -1091,7 +1095,7 @@ static ST_FIELD_INFO	innodb_lock_waits_fields_info[] =
 /*******************************************************************//**
 Read data from cache buffer and fill the
 INFORMATION_SCHEMA.innodb_lock_waits table with it.
-@return	0 on success */
+@return 0 on success */
 static
 int
 fill_innodb_lock_waits_from_cache(
@@ -1161,7 +1165,7 @@ fill_innodb_lock_waits_from_cache(
 
 /*******************************************************************//**
 Bind the dynamic table INFORMATION_SCHEMA.innodb_lock_waits
-@return	0 on success */
+@return 0 on success */
 static
 int
 innodb_lock_waits_init(
@@ -1234,7 +1238,7 @@ Common function to fill any of the dynamic tables:
 INFORMATION_SCHEMA.innodb_trx
 INFORMATION_SCHEMA.innodb_locks
 INFORMATION_SCHEMA.innodb_lock_waits
-@return	0 on success */
+@return 0 on success */
 static
 int
 trx_i_s_common_fill_table(
@@ -1272,10 +1276,8 @@ trx_i_s_common_fill_table(
 
 	if (trx_i_s_cache_is_truncated(cache)) {
 
-		/* XXX show warning to user if possible */
-		fprintf(stderr, "Warning: data in %s truncated due to "
-			"memory limit of %d bytes\n", table_name,
-			TRX_I_S_MEM_LIMIT);
+		ib::warn() << "Data in " << table_name << " truncated due to"
+			" memory limit of " << TRX_I_S_MEM_LIMIT << " bytes";
 	}
 
 	ret = 0;
@@ -1307,14 +1309,11 @@ trx_i_s_common_fill_table(
 		}
 
 	} else {
-
-		/* huh! what happened!? */
-		fprintf(stderr,
-			"InnoDB: trx_i_s_common_fill_table() was "
-			"called to fill unknown table: %s.\n"
-			"This function only knows how to fill "
-			"innodb_trx, innodb_locks and "
-			"innodb_lock_waits tables.\n", table_name);
+		ib::error() << "trx_i_s_common_fill_table() was"
+			" called to fill unknown table: " << table_name << "."
+			" This function only knows how to fill"
+			" innodb_trx, innodb_locks and"
+			" innodb_lock_waits tables.";
 
 		ret = 1;
 	}
@@ -1394,7 +1393,7 @@ static ST_FIELD_INFO	i_s_cmp_fields_info[] =
 /*******************************************************************//**
 Fill the dynamic table information_schema.innodb_cmp or
 innodb_cmp_reset.
-@return	0 on success, 1 on failure */
+@return 0 on success, 1 on failure */
 static
 int
 i_s_cmp_fill_low(
@@ -1428,16 +1427,11 @@ i_s_cmp_fill_low(
 		clear it.  We could introduce mutex protection, but it
 		could cause a measureable performance hit in
 		page0zip.cc. */
-		table->field[1]->store(
-			static_cast<double>(zip_stat->compressed));
-		table->field[2]->store(
-			static_cast<double>(zip_stat->compressed_ok));
-		table->field[3]->store(
-			static_cast<double>(zip_stat->compressed_usec / 1000000));
-		table->field[4]->store(
-			static_cast<double>(zip_stat->decompressed));
-		table->field[5]->store(
-			static_cast<double>(zip_stat->decompressed_usec / 1000000));
+		table->field[1]->store(zip_stat->compressed, true);
+		table->field[2]->store(zip_stat->compressed_ok, true);
+		table->field[3]->store(zip_stat->compressed_usec / 1000000, true);
+		table->field[4]->store(zip_stat->decompressed, true);
+		table->field[5]->store(zip_stat->decompressed_usec / 1000000, true);
 
 		if (reset) {
 			memset(zip_stat, 0, sizeof *zip_stat);
@@ -1454,7 +1448,7 @@ i_s_cmp_fill_low(
 
 /*******************************************************************//**
 Fill the dynamic table information_schema.innodb_cmp.
-@return	0 on success, 1 on failure */
+@return 0 on success, 1 on failure */
 static
 int
 i_s_cmp_fill(
@@ -1468,7 +1462,7 @@ i_s_cmp_fill(
 
 /*******************************************************************//**
 Fill the dynamic table information_schema.innodb_cmp_reset.
-@return	0 on success, 1 on failure */
+@return 0 on success, 1 on failure */
 static
 int
 i_s_cmp_reset_fill(
@@ -1482,7 +1476,7 @@ i_s_cmp_reset_fill(
 
 /*******************************************************************//**
 Bind the dynamic table information_schema.innodb_cmp.
-@return	0 on success */
+@return 0 on success */
 static
 int
 i_s_cmp_init(
@@ -1500,7 +1494,7 @@ i_s_cmp_init(
 
 /*******************************************************************//**
 Bind the dynamic table information_schema.innodb_cmp_reset.
-@return	0 on success */
+@return 0 on success */
 static
 int
 i_s_cmp_reset_init(
@@ -1699,7 +1693,7 @@ static ST_FIELD_INFO	i_s_cmp_per_index_fields_info[] =
 Fill the dynamic table
 information_schema.innodb_cmp_per_index or
 information_schema.innodb_cmp_per_index_reset.
-@return	0 on success, 1 on failure */
+@return 0 on success, 1 on failure */
 static
 int
 i_s_cmp_per_index_fill_low(
@@ -1764,25 +1758,24 @@ i_s_cmp_per_index_fill_low(
 		}
 
 		fields[IDX_COMPRESS_OPS]->store(
-			static_cast<double>(iter->second.compressed));
+			   iter->second.compressed, true);
 
 		fields[IDX_COMPRESS_OPS_OK]->store(
-			static_cast<double>(iter->second.compressed_ok));
+			   iter->second.compressed_ok, true);
 
 		fields[IDX_COMPRESS_TIME]->store(
-			static_cast<double>(iter->second.compressed_usec / 1000000));
+			   iter->second.compressed_usec / 1000000, true);
 
 		fields[IDX_UNCOMPRESS_OPS]->store(
-			static_cast<double>(iter->second.decompressed));
+			   iter->second.decompressed, true);
 
 		fields[IDX_UNCOMPRESS_TIME]->store(
-			static_cast<double>(iter->second.decompressed_usec / 1000000));
+			   iter->second.decompressed_usec / 1000000, true);
 
 		if (schema_table_store_record(thd, table)) {
 			status = 1;
 			break;
 		}
-
 		/* Release and reacquire the dict mutex to allow other
 		threads to proceed. This could eventually result in the
 		contents of INFORMATION_SCHEMA.innodb_cmp_per_index being
@@ -1794,6 +1787,7 @@ i_s_cmp_per_index_fill_low(
 	}
 
 	mutex_exit(&dict_sys->mutex);
+err:
 
 	if (reset) {
 		page_zip_reset_stat_per_index();
@@ -1804,7 +1798,7 @@ i_s_cmp_per_index_fill_low(
 
 /*******************************************************************//**
 Fill the dynamic table information_schema.innodb_cmp_per_index.
-@return	0 on success, 1 on failure */
+@return 0 on success, 1 on failure */
 static
 int
 i_s_cmp_per_index_fill(
@@ -1818,7 +1812,7 @@ i_s_cmp_per_index_fill(
 
 /*******************************************************************//**
 Fill the dynamic table information_schema.innodb_cmp_per_index_reset.
-@return	0 on success, 1 on failure */
+@return 0 on success, 1 on failure */
 static
 int
 i_s_cmp_per_index_reset_fill(
@@ -1832,7 +1826,7 @@ i_s_cmp_per_index_reset_fill(
 
 /*******************************************************************//**
 Bind the dynamic table information_schema.innodb_cmp_per_index.
-@return	0 on success */
+@return 0 on success */
 static
 int
 i_s_cmp_per_index_init(
@@ -1850,7 +1844,7 @@ i_s_cmp_per_index_init(
 
 /*******************************************************************//**
 Bind the dynamic table information_schema.innodb_cmp_per_index_reset.
-@return	0 on success */
+@return 0 on success */
 static
 int
 i_s_cmp_per_index_reset_init(
@@ -2023,7 +2017,7 @@ static ST_FIELD_INFO	i_s_cmpmem_fields_info[] =
 /*******************************************************************//**
 Fill the dynamic table information_schema.innodb_cmpmem or
 innodb_cmpmem_reset.
-@return	0 on success, 1 on failure */
+@return 0 on success, 1 on failure */
 static
 int
 i_s_cmpmem_fill_low(
@@ -2047,37 +2041,43 @@ i_s_cmpmem_fill_low(
 	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
 
 	for (ulint i = 0; i < srv_buf_pool_instances; i++) {
-		buf_pool_t*	buf_pool;
+		buf_pool_t*		buf_pool;
+		ulint			zip_free_len_local[BUF_BUDDY_SIZES_MAX + 1];
+		buf_buddy_stat_t	buddy_stat_local[BUF_BUDDY_SIZES_MAX + 1];
 
 		status	= 0;
 
 		buf_pool = buf_pool_from_array(i);
 
+		/* Save buddy stats for buffer pool in local variables. */
 		buf_pool_mutex_enter(buf_pool);
+		for (uint x = 0; x <= BUF_BUDDY_SIZES; x++) {
+
+			zip_free_len_local[x] = (x < BUF_BUDDY_SIZES) ?
+				UT_LIST_GET_LEN(buf_pool->zip_free[x]) : 0;
+
+			buddy_stat_local[x] = buf_pool->buddy_stat[x];
+
+			if (reset) {
+				/* This is protected by buf_pool->mutex. */
+				buf_pool->buddy_stat[x].relocated = 0;
+				buf_pool->buddy_stat[x].relocated_usec = 0;
+			}
+		}
+		buf_pool_mutex_exit(buf_pool);
 
 		for (uint x = 0; x <= BUF_BUDDY_SIZES; x++) {
 			buf_buddy_stat_t*	buddy_stat;
 
-			buddy_stat = &buf_pool->buddy_stat[x];
+			buddy_stat = &buddy_stat_local[x];
 
 			table->field[0]->store(BUF_BUDDY_LOW << x);
-			table->field[1]->store(static_cast<double>(i));
-			table->field[2]->store(static_cast<double>(
-				buddy_stat->used));
-			table->field[3]->store(static_cast<double>(
-				(x < BUF_BUDDY_SIZES)
-				? UT_LIST_GET_LEN(buf_pool->zip_free[x])
-				: 0));
-			table->field[4]->store(
-				(longlong) buddy_stat->relocated, true);
+			table->field[1]->store(i, true);
+			table->field[2]->store(buddy_stat->used, true);
+			table->field[3]->store(zip_free_len_local[x], true);
+			table->field[4]->store(buddy_stat->relocated, true);
 			table->field[5]->store(
-				static_cast<double>(buddy_stat->relocated_usec / 1000000));
-
-			if (reset) {
-				/* This is protected by buf_pool->mutex. */
-				buddy_stat->relocated = 0;
-				buddy_stat->relocated_usec = 0;
-			}
+				buddy_stat->relocated_usec / 1000000, true);
 
 			if (schema_table_store_record(thd, table)) {
 				status = 1;
@@ -2085,8 +2085,6 @@ i_s_cmpmem_fill_low(
 			}
 		}
 
-		buf_pool_mutex_exit(buf_pool);
-
 		if (status) {
 			break;
 		}
@@ -2097,7 +2095,7 @@ i_s_cmpmem_fill_low(
 
 /*******************************************************************//**
 Fill the dynamic table information_schema.innodb_cmpmem.
-@return	0 on success, 1 on failure */
+@return 0 on success, 1 on failure */
 static
 int
 i_s_cmpmem_fill(
@@ -2111,7 +2109,7 @@ i_s_cmpmem_fill(
 
 /*******************************************************************//**
 Fill the dynamic table information_schema.innodb_cmpmem_reset.
-@return	0 on success, 1 on failure */
+@return 0 on success, 1 on failure */
 static
 int
 i_s_cmpmem_reset_fill(
@@ -2125,7 +2123,7 @@ i_s_cmpmem_reset_fill(
 
 /*******************************************************************//**
 Bind the dynamic table information_schema.innodb_cmpmem.
-@return	0 on success */
+@return 0 on success */
 static
 int
 i_s_cmpmem_init(
@@ -2143,7 +2141,7 @@ i_s_cmpmem_init(
 
 /*******************************************************************//**
 Bind the dynamic table information_schema.innodb_cmpmem_reset.
-@return	0 on success */
+@return 0 on success */
 static
 int
 i_s_cmpmem_reset_init(
@@ -2419,7 +2417,7 @@ static ST_FIELD_INFO	innodb_metrics_fields_info[] =
 
 /**********************************************************************//**
 Fill the information schema metrics table.
-@return	0 on success */
+@return 0 on success */
 static
 int
 i_s_metrics_fill(
@@ -2692,7 +2690,7 @@ i_s_metrics_fill(
 
 /*******************************************************************//**
 Function to fill information schema metrics tables.
-@return	0 on success */
+@return 0 on success */
 static
 int
 i_s_metrics_fill_table(
@@ -2714,7 +2712,7 @@ i_s_metrics_fill_table(
 }
 /*******************************************************************//**
 Bind the dynamic table INFORMATION_SCHEMA.innodb_metrics
-@return	0 on success */
+@return 0 on success */
 static
 int
 innodb_metrics_init(
@@ -2798,7 +2796,7 @@ static ST_FIELD_INFO	i_s_stopword_fields_info[] =
 
 /*******************************************************************//**
 Fill the dynamic table information_schema.innodb_ft_default_stopword.
-@return	0 on success, 1 on failure */
+@return 0 on success, 1 on failure */
 static
 int
 i_s_stopword_fill(
@@ -2830,7 +2828,7 @@ i_s_stopword_fill(
 
 /*******************************************************************//**
 Bind the dynamic table information_schema.innodb_ft_default_stopword.
-@return	0 on success */
+@return 0 on success */
 static
 int
 i_s_stopword_init(
@@ -2914,7 +2912,7 @@ static ST_FIELD_INFO	i_s_fts_doc_fields_info[] =
 /*******************************************************************//**
 Fill the dynamic table INFORMATION_SCHEMA.INNODB_FT_DELETED or
 INFORMATION_SCHEMA.INNODB_FT_BEING_DELETED
-@return	0 on success, 1 on failure */
+@return 0 on success, 1 on failure */
 static
 int
 i_s_fts_deleted_generic_fill(
@@ -2942,19 +2940,19 @@ i_s_fts_deleted_generic_fill(
 	}
 
 	/* Prevent DDL to drop fts aux tables. */
-	rw_lock_s_lock(&dict_operation_lock);
+	rw_lock_s_lock(dict_operation_lock);
 
 	user_table = dict_table_open_on_name(
 		fts_internal_tbl_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE);
 
 	if (!user_table) {
-		rw_lock_s_unlock(&dict_operation_lock);
+		rw_lock_s_unlock(dict_operation_lock);
 
 		DBUG_RETURN(0);
 	} else if (!dict_table_has_fts_index(user_table)) {
 		dict_table_close(user_table, FALSE, FALSE);
 
-		rw_lock_s_unlock(&dict_operation_lock);
+		rw_lock_s_unlock(dict_operation_lock);
 
 		DBUG_RETURN(0);
 	}
@@ -2977,7 +2975,7 @@ i_s_fts_deleted_generic_fill(
 
 		doc_id = *(doc_id_t*) ib_vector_get_const(deleted->doc_ids, j);
 
-		OK(fields[I_S_FTS_DOC_ID]->store((longlong) doc_id, true));
+		OK(fields[I_S_FTS_DOC_ID]->store(doc_id, true));
 
 		OK(schema_table_store_record(thd, table));
 	}
@@ -2988,14 +2986,14 @@ i_s_fts_deleted_generic_fill(
 
 	dict_table_close(user_table, FALSE, FALSE);
 
-	rw_lock_s_unlock(&dict_operation_lock);
+	rw_lock_s_unlock(dict_operation_lock);
 
 	DBUG_RETURN(0);
 }
 
 /*******************************************************************//**
 Fill the dynamic table INFORMATION_SCHEMA.INNODB_FT_DELETED
-@return	0 on success, 1 on failure */
+@return 0 on success, 1 on failure */
 static
 int
 i_s_fts_deleted_fill(
@@ -3011,7 +3009,7 @@ i_s_fts_deleted_fill(
 
 /*******************************************************************//**
 Bind the dynamic table INFORMATION_SCHEMA.INNODB_FT_DELETED
-@return	0 on success */
+@return 0 on success */
 static
 int
 i_s_fts_deleted_init(
@@ -3078,7 +3076,7 @@ UNIV_INTERN struct st_maria_plugin	i_s_innodb_ft_deleted =
 
 /*******************************************************************//**
 Fill the dynamic table INFORMATION_SCHEMA.INNODB_FT_BEING_DELETED
-@return	0 on success, 1 on failure */
+@return 0 on success, 1 on failure */
 static
 int
 i_s_fts_being_deleted_fill(
@@ -3094,7 +3092,7 @@ i_s_fts_being_deleted_fill(
 
 /*******************************************************************//**
 Bind the dynamic table INFORMATION_SCHEMA.INNODB_FT_BEING_DELETED
-@return	0 on success */
+@return 0 on success */
 static
 int
 i_s_fts_being_deleted_init(
@@ -3223,7 +3221,7 @@ static ST_FIELD_INFO	i_s_fts_index_fields_info[] =
 /*******************************************************************//**
 Go through the Doc Node and its ilist, fill the dynamic table
 INFORMATION_SCHEMA.INNODB_FT_INDEX_CACHED for one FTS index on the table.
-@return	0 on success, 1 on failure */
+@return 0 on success, 1 on failure */
 static
 int
 i_s_fts_index_cache_fill_one_index(
@@ -3247,7 +3245,7 @@ i_s_fts_index_cache_fill_one_index(
 	index_charset = index_cache->charset;
 	conv_str.f_len = system_charset_info->mbmaxlen
 		* FTS_MAX_WORD_LEN_IN_CHAR;
-	conv_str.f_str = static_cast<byte*>(ut_malloc(conv_str.f_len));
+	conv_str.f_str = static_cast<byte*>(ut_malloc_nokey(conv_str.f_len));
 	conv_str.f_n_char = 0;
 
 	/* Go through each word in the index cache */
@@ -3296,28 +3294,28 @@ i_s_fts_index_cache_fill_one_index(
 					pos = fts_decode_vlc(&ptr);
 
 					OK(field_store_string(
-						fields[I_S_FTS_WORD],
-						word_str));
+						   fields[I_S_FTS_WORD],
+						   word_str));
 
 					OK(fields[I_S_FTS_FIRST_DOC_ID]->store(
-						(longlong) node->first_doc_id,
-						true));
+						   node->first_doc_id,
+						   true));
 
 					OK(fields[I_S_FTS_LAST_DOC_ID]->store(
-						(longlong) node->last_doc_id,
-						true));
+						   node->last_doc_id,
+						   true));
 
 					OK(fields[I_S_FTS_DOC_COUNT]->store(
-						static_cast<double>(node->doc_count)));
+						   node->doc_count, true));
 
 					OK(fields[I_S_FTS_ILIST_DOC_ID]->store(
-						(longlong) doc_id, true));
+						   doc_id, true));
 
 					OK(fields[I_S_FTS_ILIST_DOC_POS]->store(
-						static_cast<double>(pos)));
+						   pos, true));
 
 					OK(schema_table_store_record(
-						thd, table));
+						   thd, table));
 				}
 
 				++ptr;
@@ -3333,7 +3331,7 @@ i_s_fts_index_cache_fill_one_index(
 }
 /*******************************************************************//**
 Fill the dynamic table INFORMATION_SCHEMA.INNODB_FT_INDEX_CACHED
-@return	0 on success, 1 on failure */
+@return 0 on success, 1 on failure */
 static
 int
 i_s_fts_index_cache_fill(
@@ -3389,7 +3387,7 @@ i_s_fts_index_cache_fill(
 
 /*******************************************************************//**
 Bind the dynamic table INFORMATION_SCHEMA.INNODB_FT_INDEX_CACHE
-@return	0 on success */
+@return 0 on success */
 static
 int
 i_s_fts_index_cache_init(
@@ -3457,7 +3455,7 @@ UNIV_INTERN struct st_maria_plugin	i_s_innodb_ft_index_cache =
 /*******************************************************************//**
 Go through a FTS index auxiliary table, fetch its rows and fill
 FTS word cache structure.
-@return	DB_SUCCESS on success, otherwise error code */
+@return DB_SUCCESS on success, otherwise error code */
 static
 dberr_t
 i_s_fts_index_table_fill_selected(
@@ -3474,6 +3472,7 @@ i_s_fts_index_table_fill_selected(
 	que_t*			graph;
 	dberr_t			error;
 	fts_fetch_t		fetch;
+	char			table_name[MAX_FULL_NAME_LEN];
 
 	info = pars_info_create();
 
@@ -3494,14 +3493,16 @@ i_s_fts_index_table_fill_selected(
 
 	FTS_INIT_INDEX_TABLE(&fts_table, fts_get_suffix(selected),
 			     FTS_INDEX_TABLE, index);
+	fts_get_table_name(&fts_table, table_name);
+	pars_info_bind_id(info, true, "table_name", table_name);
 
 	graph = fts_parse_sql(
 		&fts_table, info,
 		"DECLARE FUNCTION my_func;\n"
 		"DECLARE CURSOR c IS"
-		" SELECT word, doc_count, first_doc_id, last_doc_id, "
-		"ilist\n"
-		" FROM %s WHERE word >= :word;\n"
+		" SELECT word, doc_count, first_doc_id, last_doc_id,"
+		" ilist\n"
+		" FROM $table_name WHERE word >= :word;\n"
 		"BEGIN\n"
 		"\n"
 		"OPEN c;\n"
@@ -3513,7 +3514,7 @@ i_s_fts_index_table_fill_selected(
 		"END LOOP;\n"
 		"CLOSE c;");
 
-	for(;;) {
+	for (;;) {
 		error = fts_eval_sql(trx, graph);
 
 		if (error == DB_SUCCESS) {
@@ -3523,17 +3524,14 @@ i_s_fts_index_table_fill_selected(
 		} else {
 			fts_sql_rollback(trx);
 
-			ut_print_timestamp(stderr);
-
 			if (error == DB_LOCK_WAIT_TIMEOUT) {
-				fprintf(stderr, "  InnoDB: Warning: "
-					"lock wait timeout reading "
-					"FTS index.  Retrying!\n");
+				ib::warn() << "Lock wait timeout reading"
+					" FTS index. Retrying!";
 
 				trx->error_state = DB_SUCCESS;
 			} else {
-				fprintf(stderr, "  InnoDB: Error: %d "
-				"while reading FTS index.\n", error);
+				ib::error() << "Error occurred while reading"
+					" FTS index: " << ut_strerr(error);
 				break;
 			}
 		}
@@ -3657,28 +3655,26 @@ i_s_fts_index_table_fill_one_fetch(
 					pos = fts_decode_vlc(&ptr);
 
 					OK(field_store_string(
-						fields[I_S_FTS_WORD],
-						word_str));
+						   fields[I_S_FTS_WORD],
+						   word_str));
 
 					OK(fields[I_S_FTS_FIRST_DOC_ID]->store(
-						(longlong) node->first_doc_id,
-						true));
+						longlong(node->first_doc_id), true));
 
 					OK(fields[I_S_FTS_LAST_DOC_ID]->store(
-						(longlong) node->last_doc_id,
-						true));
+						longlong(node->last_doc_id), true));
 
 					OK(fields[I_S_FTS_DOC_COUNT]->store(
-						static_cast<double>(node->doc_count)));
+						   node->doc_count, true));
 
 					OK(fields[I_S_FTS_ILIST_DOC_ID]->store(
-						(longlong) doc_id, true));
+						longlong(doc_id), true));
 
 					OK(fields[I_S_FTS_ILIST_DOC_POS]->store(
-						static_cast<double>(pos)));
+						   pos, true));
 
 					OK(schema_table_store_record(
-						thd, table));
+						   thd, table));
 				}
 
 				++ptr;
@@ -3696,7 +3692,7 @@ i_s_fts_index_table_fill_one_fetch(
 /*******************************************************************//**
 Go through a FTS index and its auxiliary tables, fetch rows in each table
 and fill INFORMATION_SCHEMA.INNODB_FT_INDEX_TABLE.
-@return	0 on success, 1 on failure */
+@return 0 on success, 1 on failure */
 static
 int
 i_s_fts_index_table_fill_one_index(
@@ -3707,7 +3703,6 @@ i_s_fts_index_table_fill_one_index(
 {
 	ib_vector_t*		words;
 	mem_heap_t*		heap;
-	fts_string_t		word;
 	CHARSET_INFO*		index_charset;
 	fts_string_t		conv_str;
 	dberr_t			error;
@@ -3721,21 +3716,21 @@ i_s_fts_index_table_fill_one_index(
 	words = ib_vector_create(ib_heap_allocator_create(heap),
 				 sizeof(fts_word_t), 256);
 
-	word.f_str = NULL;
-	word.f_len = 0;
-	word.f_n_char = 0;
-
 	index_charset = fts_index_get_charset(index);
 	conv_str.f_len = system_charset_info->mbmaxlen
 		* FTS_MAX_WORD_LEN_IN_CHAR;
-	conv_str.f_str = static_cast<byte*>(ut_malloc(conv_str.f_len));
+	conv_str.f_str = static_cast<byte*>(ut_malloc_nokey(conv_str.f_len));
 	conv_str.f_n_char = 0;
 
 	/* Iterate through each auxiliary table as described in
 	fts_index_selector */
-	for (ulint selected = 0; fts_index_selector[selected].value;
-	     selected++) {
-		bool	has_more = false;
+	for (ulint selected = 0; selected < FTS_NUM_AUX_INDEX; selected++) {
+		fts_string_t	word;
+		bool		has_more = false;
+
+		word.f_str = NULL;
+		word.f_len = 0;
+		word.f_n_char = 0;
 
 		do {
 			/* Fetch from index */
@@ -3758,7 +3753,7 @@ i_s_fts_index_table_fill_one_index(
 				/* Prepare start point for next fetch */
 				last_word = static_cast<fts_word_t*>(ib_vector_last(words));
 				ut_ad(last_word != NULL);
-				fts_utf8_string_dup(&word, &last_word->text, heap);
+				fts_string_dup(&word, &last_word->text, heap);
 			}
 
 			/* Fill into tables */
@@ -3780,7 +3775,7 @@ func_exit:
 }
 /*******************************************************************//**
 Fill the dynamic table INFORMATION_SCHEMA.INNODB_FT_INDEX_TABLE
-@return	0 on success, 1 on failure */
+@return 0 on success, 1 on failure */
 static
 int
 i_s_fts_index_table_fill(
@@ -3804,13 +3799,13 @@ i_s_fts_index_table_fill(
 	}
 
 	/* Prevent DDL to drop fts aux tables. */
-	rw_lock_s_lock(&dict_operation_lock);
+	rw_lock_s_lock(dict_operation_lock);
 
 	user_table = dict_table_open_on_name(
 		fts_internal_tbl_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE);
 
 	if (!user_table) {
-		rw_lock_s_unlock(&dict_operation_lock);
+		rw_lock_s_unlock(dict_operation_lock);
 
 		DBUG_RETURN(0);
 	}
@@ -3824,14 +3819,14 @@ i_s_fts_index_table_fill(
 
 	dict_table_close(user_table, FALSE, FALSE);
 
-	rw_lock_s_unlock(&dict_operation_lock);
+	rw_lock_s_unlock(dict_operation_lock);
 
 	DBUG_RETURN(0);
 }
 
 /*******************************************************************//**
 Bind the dynamic table INFORMATION_SCHEMA.INNODB_FT_INDEX_TABLE
-@return	0 on success */
+@return 0 on success */
 static
 int
 i_s_fts_index_table_init(
@@ -3930,7 +3925,7 @@ static const char* fts_config_key[] = {
 
 /*******************************************************************//**
 Fill the dynamic table INFORMATION_SCHEMA.INNODB_FT_CONFIG
-@return	0 on success, 1 on failure */
+@return 0 on success, 1 on failure */
 static
 int
 i_s_fts_config_fill(
@@ -3962,19 +3957,19 @@ i_s_fts_config_fill(
 	fields = table->field;
 
 	/* Prevent DDL to drop fts aux tables. */
-	rw_lock_s_lock(&dict_operation_lock);
+	rw_lock_s_lock(dict_operation_lock);
 
 	user_table = dict_table_open_on_name(
 		fts_internal_tbl_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE);
 
 	if (!user_table) {
-		rw_lock_s_unlock(&dict_operation_lock);
+		rw_lock_s_unlock(dict_operation_lock);
 
 		DBUG_RETURN(0);
 	} else if (!dict_table_has_fts_index(user_table)) {
 		dict_table_close(user_table, FALSE, FALSE);
 
-		rw_lock_s_unlock(&dict_operation_lock);
+		rw_lock_s_unlock(dict_operation_lock);
 
 		DBUG_RETURN(0);
 	}
@@ -4031,14 +4026,14 @@ i_s_fts_config_fill(
 
 	dict_table_close(user_table, FALSE, FALSE);
 
-	rw_lock_s_unlock(&dict_operation_lock);
+	rw_lock_s_unlock(dict_operation_lock);
 
 	DBUG_RETURN(0);
 }
 
 /*******************************************************************//**
 Bind the dynamic table INFORMATION_SCHEMA.INNODB_FT_CONFIG
-@return	0 on success */
+@return 0 on success */
 static
 int
 i_s_fts_config_init(
@@ -4103,6 +4098,287 @@ UNIV_INTERN struct st_maria_plugin	i_s_innodb_ft_config =
         STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
 };
 
+/* Fields of the dynamic table INNODB_TEMP_TABLE_INFO. */
+static ST_FIELD_INFO	i_s_innodb_temp_table_info_fields_info[] =
+{
+#define IDX_TEMP_TABLE_ID		0
+	{STRUCT_FLD(field_name,		"TABLE_ID"),
+	 STRUCT_FLD(field_length,	MY_INT64_NUM_DECIMAL_DIGITS),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONGLONG),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	MY_I_S_UNSIGNED),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+#define IDX_TEMP_TABLE_NAME		1
+	{STRUCT_FLD(field_name,		"NAME"),
+	 STRUCT_FLD(field_length,	MAX_TABLE_UTF8_LEN),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_STRING),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	MY_I_S_MAYBE_NULL),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+#define IDX_TEMP_TABLE_N_COLS		2
+	{STRUCT_FLD(field_name,		"N_COLS"),
+	 STRUCT_FLD(field_length,	MY_INT32_NUM_DECIMAL_DIGITS),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONG),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	MY_I_S_UNSIGNED),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+#define IDX_TEMP_TABLE_SPACE_ID		3
+	{STRUCT_FLD(field_name,		"SPACE"),
+	 STRUCT_FLD(field_length,	MY_INT32_NUM_DECIMAL_DIGITS),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONG),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	MY_I_S_UNSIGNED),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+#define IDX_TEMP_TABLE_PTT		4
+	{STRUCT_FLD(field_name,		"PER_TABLE_TABLESPACE"),
+	 STRUCT_FLD(field_length,	64),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_STRING),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	MY_I_S_MAYBE_NULL),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+#define IDX_TEMP_TABLE_IS_COMPRESSED	5
+	{STRUCT_FLD(field_name,		"IS_COMPRESSED"),
+	 STRUCT_FLD(field_length,	64),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_STRING),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	MY_I_S_MAYBE_NULL),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+	END_OF_ST_FIELD_INFO
+};
+
+struct temp_table_info_t{
+	table_id_t	m_table_id;
+	char		m_table_name[MAX_TABLE_UTF8_LEN];
+	unsigned	m_n_cols;
+	unsigned	m_space_id;
+	char		m_per_table_tablespace[64];
+	char		m_is_compressed[64];
+};
+
+typedef std::vector<temp_table_info_t, ut_allocator<temp_table_info_t> >
+	temp_table_info_cache_t;
+
+/*******************************************************************//**
+Fill Information Schema table INNODB_TEMP_TABLE_INFO for a particular
+temp-table
+@return 0 on success, 1 on failure */
+static
+int
+i_s_innodb_temp_table_info_fill(
+/*=============================*/
+	THD*				thd,		/*!< in: thread */
+	TABLE_LIST*			tables,		/*!< in/out: tables
+							to fill */
+	const temp_table_info_t*	info)		/*!< in: temp-table
+							information */
+{
+	TABLE*			table;
+	Field**			fields;
+
+	DBUG_ENTER("i_s_innodb_temp_table_info_fill");
+
+	table = tables->table;
+
+	fields = table->field;
+
+	OK(fields[IDX_TEMP_TABLE_ID]->store(info->m_table_id, true));
+
+	OK(field_store_string(
+		   fields[IDX_TEMP_TABLE_NAME], info->m_table_name));
+
+	OK(fields[IDX_TEMP_TABLE_N_COLS]->store(info->m_n_cols));
+
+	OK(fields[IDX_TEMP_TABLE_SPACE_ID]->store(info->m_space_id));
+
+	OK(field_store_string(
+		fields[IDX_TEMP_TABLE_PTT], info->m_per_table_tablespace));
+
+	OK(field_store_string(
+		fields[IDX_TEMP_TABLE_IS_COMPRESSED], info->m_is_compressed));
+
+	DBUG_RETURN(schema_table_store_record(thd, table));
+}
+
+/*******************************************************************//**
+Populate current table information to cache */
+static
+void
+innodb_temp_table_populate_cache(
+/*=============================*/
+	const dict_table_t*	table,  /*! in: table */
+	temp_table_info_t*	cache)  /*! in/out: populate data in this
+					cache */
+{
+	cache->m_table_id = table->id;
+
+	char	db_utf8[MAX_DB_UTF8_LEN];
+	char	table_utf8[MAX_TABLE_UTF8_LEN];
+
+	dict_fs2utf8(table->name.m_name,
+		     db_utf8, sizeof(db_utf8),
+		     table_utf8, sizeof(table_utf8));
+	strcpy(cache->m_table_name, table_utf8);
+
+	cache->m_n_cols = table->n_cols;
+
+	cache->m_space_id = table->space;
+
+	if (fsp_is_system_temporary(table->space)) {
+		strcpy(cache->m_per_table_tablespace, "FALSE");
+	} else {
+		strcpy(cache->m_per_table_tablespace, "TRUE");
+	}
+
+	if (dict_table_page_size(table).is_compressed()) {
+		strcpy(cache->m_is_compressed, "TRUE");
+	} else {
+		strcpy(cache->m_is_compressed, "FALSE");
+	}
+}
+
+/*******************************************************************//**
+This function will iterate over all available table and will fill
+stats for temp-tables to INNODB_TEMP_TABLE_INFO.
+@return 0 on success, 1 on failure */
+static
+int
+i_s_innodb_temp_table_info_fill_table(
+/*===================================*/
+	THD*		thd,		/*!< in: thread */
+	TABLE_LIST*	tables,		/*!< in/out: tables to fill */
+	Item*		)		/*!< in: condition (ignored) */
+{
+	int			status	= 0;
+	dict_table_t*		table	= NULL;
+
+	DBUG_ENTER("i_s_innodb_temp_table_info_fill_table");
+
+	/* Only allow the PROCESS privilege holder to access the stats */
+	if (check_global_access(thd, PROCESS_ACL)) {
+		DBUG_RETURN(0);
+	}
+
+	/* First populate all temp-table info by acquiring dict_sys->mutex.
+	Note: Scan is being done on NON-LRU list which mainly has system
+	table entries and temp-table entries. This means 2 things: list
+	is smaller so processing would be faster and most of the data
+	is relevant */
+	temp_table_info_cache_t all_temp_info_cache;
+	all_temp_info_cache.reserve(UT_LIST_GET_LEN(dict_sys->table_non_LRU));
+
+	mutex_enter(&dict_sys->mutex);
+	for (table = UT_LIST_GET_FIRST(dict_sys->table_non_LRU);
+	     table != NULL;
+	     table = UT_LIST_GET_NEXT(table_LRU, table)) {
+
+		if (!dict_table_is_temporary(table)) {
+			continue;
+		}
+
+		temp_table_info_t current_temp_table_info;
+
+		innodb_temp_table_populate_cache(
+			table, &current_temp_table_info);
+
+		all_temp_info_cache.push_back(current_temp_table_info);
+	}
+	mutex_exit(&dict_sys->mutex);
+
+	/* Now populate the info to MySQL table */
+	temp_table_info_cache_t::const_iterator end = all_temp_info_cache.end();
+	for (temp_table_info_cache_t::const_iterator it
+		= all_temp_info_cache.begin();
+	     it != end;
+	     it++) {
+		status = i_s_innodb_temp_table_info_fill(thd, tables, &(*it));
+		if (status) {
+			break;
+		}
+	}
+
+	DBUG_RETURN(status);
+}
+
+/*******************************************************************//**
+Bind the dynamic table INFORMATION_SCHEMA.INNODB_TEMP_TABLE_INFO.
+@return 0 on success, 1 on failure */
+static
+int
+i_s_innodb_temp_table_info_init(
+/*=============================*/
+	void*	p)	/*!< in/out: table schema object */
+{
+	ST_SCHEMA_TABLE*	schema;
+
+	DBUG_ENTER("i_s_innodb_temp_table_info_init");
+
+	schema = reinterpret_cast<ST_SCHEMA_TABLE*>(p);
+
+	schema->fields_info = i_s_innodb_temp_table_info_fields_info;
+	schema->fill_table = i_s_innodb_temp_table_info_fill_table;
+
+	DBUG_RETURN(0);
+}
+
+struct st_maria_plugin	i_s_innodb_temp_table_info =
+{
+	/* the plugin type (a MYSQL_XXX_PLUGIN value) */
+	/* int */
+	STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
+
+	/* pointer to type-specific plugin descriptor */
+	/* void* */
+	STRUCT_FLD(info, &i_s_info),
+
+	/* plugin name */
+	/* const char* */
+	STRUCT_FLD(name, "INNODB_TEMP_TABLE_INFO"),
+
+	/* plugin author (for SHOW PLUGINS) */
+	/* const char* */
+	STRUCT_FLD(author, plugin_author),
+
+	/* general descriptive text (for SHOW PLUGINS) */
+	/* const char* */
+	STRUCT_FLD(descr, "InnoDB Temp Table Stats"),
+
+	/* the plugin license (PLUGIN_LICENSE_XXX) */
+	/* int */
+	STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
+
+	/* the function to invoke when plugin is loaded */
+	/* int (*)(void*); */
+	STRUCT_FLD(init, i_s_innodb_temp_table_info_init),
+
+	/* the function to invoke when plugin is unloaded */
+	/* int (*)(void*); */
+	STRUCT_FLD(deinit, i_s_common_deinit),
+
+	/* plugin version (for SHOW PLUGINS) */
+	/* unsigned int */
+	STRUCT_FLD(version, INNODB_VERSION_SHORT),
+
+	/* struct st_mysql_show_var* */
+	STRUCT_FLD(status_vars, NULL),
+
+	/* struct st_mysql_sys_var** */
+	STRUCT_FLD(system_vars, NULL),
+
+	STRUCT_FLD(version_info, INNODB_VERSION_STR),
+	STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_GAMMA)
+};
+
 /* Fields of the dynamic table INNODB_BUFFER_POOL_STATS. */
 static ST_FIELD_INFO	i_s_innodb_buffer_stats_fields_info[] =
 {
@@ -4400,7 +4676,7 @@ static ST_FIELD_INFO	i_s_innodb_buffer_stats_fields_info[] =
 /*******************************************************************//**
 Fill Information Schema table INNODB_BUFFER_POOL_STATS for a particular
 buffer pool
-@return	0 on success, 1 on failure */
+@return 0 on success, 1 on failure */
 static
 int
 i_s_innodb_stats_fill(
@@ -4420,67 +4696,67 @@ i_s_innodb_stats_fill(
 	fields = table->field;
 
 	OK(fields[IDX_BUF_STATS_POOL_ID]->store(
-		static_cast<double>(info->pool_unique_id)));
+		   info->pool_unique_id, true));
 
 	OK(fields[IDX_BUF_STATS_POOL_SIZE]->store(
-		static_cast<double>(info->pool_size)));
+		   info->pool_size, true));
 
 	OK(fields[IDX_BUF_STATS_LRU_LEN]->store(
-		static_cast<double>(info->lru_len)));
+		   info->lru_len, true));
 
 	OK(fields[IDX_BUF_STATS_OLD_LRU_LEN]->store(
-		static_cast<double>(info->old_lru_len)));
+		   info->old_lru_len, true));
 
 	OK(fields[IDX_BUF_STATS_FREE_BUFFERS]->store(
-		static_cast<double>(info->free_list_len)));
+		   info->free_list_len, true));
 
 	OK(fields[IDX_BUF_STATS_FLUSH_LIST_LEN]->store(
-		static_cast<double>(info->flush_list_len)));
+		   info->flush_list_len, true));
 
 	OK(fields[IDX_BUF_STATS_PENDING_ZIP]->store(
-		static_cast<double>(info->n_pend_unzip)));
+		   info->n_pend_unzip, true));
 
 	OK(fields[IDX_BUF_STATS_PENDING_READ]->store(
-		static_cast<double>(info->n_pend_reads)));
+		   info->n_pend_reads, true));
 
 	OK(fields[IDX_BUF_STATS_FLUSH_LRU]->store(
-		static_cast<double>(info->n_pending_flush_lru)));
+		   info->n_pending_flush_lru, true));
 
 	OK(fields[IDX_BUF_STATS_FLUSH_LIST]->store(
-		static_cast<double>(info->n_pending_flush_list)));
+		   info->n_pending_flush_list, true));
 
 	OK(fields[IDX_BUF_STATS_PAGE_YOUNG]->store(
-		static_cast<double>(info->n_pages_made_young)));
+		   info->n_pages_made_young, true));
 
 	OK(fields[IDX_BUF_STATS_PAGE_NOT_YOUNG]->store(
-		static_cast<double>(info->n_pages_not_made_young)));
+		   info->n_pages_not_made_young, true));
 
 	OK(fields[IDX_BUF_STATS_PAGE_YOUNG_RATE]->store(
-		info->page_made_young_rate));
+		   info->page_made_young_rate));
 
 	OK(fields[IDX_BUF_STATS_PAGE_NOT_YOUNG_RATE]->store(
-		info->page_not_made_young_rate));
+		   info->page_not_made_young_rate));
 
 	OK(fields[IDX_BUF_STATS_PAGE_READ]->store(
-		static_cast<double>(info->n_pages_read)));
+		   info->n_pages_read, true));
 
 	OK(fields[IDX_BUF_STATS_PAGE_CREATED]->store(
-		static_cast<double>(info->n_pages_created)));
+		   info->n_pages_created, true));
 
 	OK(fields[IDX_BUF_STATS_PAGE_WRITTEN]->store(
-		static_cast<double>(info->n_pages_written)));
+		   info->n_pages_written, true));
 
 	OK(fields[IDX_BUF_STATS_GET]->store(
-		static_cast<double>(info->n_page_gets)));
+		   info->n_page_gets, true));
 
 	OK(fields[IDX_BUF_STATS_PAGE_READ_RATE]->store(
-		info->pages_read_rate));
+		   info->pages_read_rate));
 
 	OK(fields[IDX_BUF_STATS_PAGE_CREATE_RATE]->store(
-		info->pages_created_rate));
+		   info->pages_created_rate));
 
 	OK(fields[IDX_BUF_STATS_PAGE_WRITTEN_RATE]->store(
-		info->pages_written_rate));
+		   info->pages_written_rate));
 
 	if (info->n_page_get_delta) {
 		if (info->page_read_delta <= info->n_page_get_delta) {
@@ -4493,43 +4769,41 @@ i_s_innodb_stats_fill(
 		}
 
 		OK(fields[IDX_BUF_STATS_MADE_YOUNG_PCT]->store(
-			static_cast<double>(
-				1000 * info->young_making_delta
-				/ info->n_page_get_delta)));
+			   1000 * info->young_making_delta
+			   / info->n_page_get_delta, true));
 
 		OK(fields[IDX_BUF_STATS_NOT_MADE_YOUNG_PCT]->store(
-			static_cast<double>(
-				1000 * info->not_young_making_delta
-				/ info->n_page_get_delta)));
+			   1000 * info->not_young_making_delta
+			   / info->n_page_get_delta, true));
 	} else {
-		OK(fields[IDX_BUF_STATS_HIT_RATE]->store(0));
-		OK(fields[IDX_BUF_STATS_MADE_YOUNG_PCT]->store(0));
-		OK(fields[IDX_BUF_STATS_NOT_MADE_YOUNG_PCT]->store(0));
+		OK(fields[IDX_BUF_STATS_HIT_RATE]->store(0, true));
+		OK(fields[IDX_BUF_STATS_MADE_YOUNG_PCT]->store(0, true));
+		OK(fields[IDX_BUF_STATS_NOT_MADE_YOUNG_PCT]->store(0, true));
 	}
 
 	OK(fields[IDX_BUF_STATS_READ_AHREAD]->store(
-		static_cast<double>(info->n_ra_pages_read)));
+		   info->n_ra_pages_read, true));
 
 	OK(fields[IDX_BUF_STATS_READ_AHEAD_EVICTED]->store(
-		static_cast<double>(info->n_ra_pages_evicted)));
+		   info->n_ra_pages_evicted, true));
 
 	OK(fields[IDX_BUF_STATS_READ_AHEAD_RATE]->store(
-		info->pages_readahead_rate));
+		   info->pages_readahead_rate));
 
 	OK(fields[IDX_BUF_STATS_READ_AHEAD_EVICT_RATE]->store(
-		info->pages_evicted_rate));
+		   info->pages_evicted_rate));
 
 	OK(fields[IDX_BUF_STATS_LRU_IO_SUM]->store(
-		static_cast<double>(info->io_sum)));
+		   info->io_sum, true));
 
 	OK(fields[IDX_BUF_STATS_LRU_IO_CUR]->store(
-		static_cast<double>(info->io_cur)));
+		   info->io_cur, true));
 
 	OK(fields[IDX_BUF_STATS_UNZIP_SUM]->store(
-		static_cast<double>(info->unzip_sum)));
+		   info->unzip_sum, true));
 
 	OK(fields[IDX_BUF_STATS_UNZIP_CUR]->store(
-		static_cast<double>(info->unzip_cur)));
+		   info->unzip_cur, true));
 
 	DBUG_RETURN(schema_table_store_record(thd, table));
 }
@@ -4537,7 +4811,7 @@ i_s_innodb_stats_fill(
 /*******************************************************************//**
 This is the function that loops through each buffer pool and fetch buffer
 pool stats to information schema  table: I_S_INNODB_BUFFER_POOL_STATS
-@return	0 on success, 1 on failure */
+@return 0 on success, 1 on failure */
 static
 int
 i_s_innodb_buffer_stats_fill_table(
@@ -4557,7 +4831,7 @@ i_s_innodb_buffer_stats_fill_table(
 		DBUG_RETURN(0);
 	}
 
-	pool_info = (buf_pool_info_t*) mem_zalloc(
+	pool_info = (buf_pool_info_t*) ut_zalloc_nokey(
 		srv_buf_pool_instances *  sizeof *pool_info);
 
 	/* Walk through each buffer pool */
@@ -4577,14 +4851,14 @@ i_s_innodb_buffer_stats_fill_table(
 		}
 	}
 
-	mem_free(pool_info);
+	ut_free(pool_info);
 
 	DBUG_RETURN(status);
 }
 
 /*******************************************************************//**
 Bind the dynamic table INFORMATION_SCHEMA.INNODB_BUFFER_POOL_STATS.
-@return	0 on success, 1 on failure */
+@return 0 on success, 1 on failure */
 static
 int
 i_s_innodb_buffer_pool_stats_init(
@@ -4841,7 +5115,7 @@ static ST_FIELD_INFO	i_s_innodb_buffer_page_fields_info[] =
 /*******************************************************************//**
 Fill Information Schema table INNODB_BUFFER_PAGE with information
 cached in the buf_page_info_t array
-@return	0 on success, 1 on failure */
+@return 0 on success, 1 on failure */
 static
 int
 i_s_innodb_buffer_page_fill(
@@ -4875,43 +5149,43 @@ i_s_innodb_buffer_page_fill(
 		state_str = NULL;
 
 		OK(fields[IDX_BUFFER_POOL_ID]->store(
-			static_cast<double>(page_info->pool_id)));
+			   page_info->pool_id, true));
 
 		OK(fields[IDX_BUFFER_BLOCK_ID]->store(
-			static_cast<double>(page_info->block_id)));
+			   page_info->block_id, true));
 
 		OK(fields[IDX_BUFFER_PAGE_SPACE]->store(
-			static_cast<double>(page_info->space_id)));
+			   page_info->space_id, true));
 
 		OK(fields[IDX_BUFFER_PAGE_NUM]->store(
-			static_cast<double>(page_info->page_num)));
+			   page_info->page_num, true));
 
 		OK(field_store_string(
-			fields[IDX_BUFFER_PAGE_TYPE],
-			i_s_page_type[page_info->page_type].type_str));
+			   fields[IDX_BUFFER_PAGE_TYPE],
+			   i_s_page_type[page_info->page_type].type_str));
 
 		OK(fields[IDX_BUFFER_PAGE_FLUSH_TYPE]->store(
-			page_info->flush_type));
+			   page_info->flush_type));
 
 		OK(fields[IDX_BUFFER_PAGE_FIX_COUNT]->store(
-			page_info->fix_count));
+			   page_info->fix_count));
 
 		if (page_info->hashed) {
 			OK(field_store_string(
-				fields[IDX_BUFFER_PAGE_HASHED], "YES"));
+				   fields[IDX_BUFFER_PAGE_HASHED], "YES"));
 		} else {
 			OK(field_store_string(
-				fields[IDX_BUFFER_PAGE_HASHED], "NO"));
+				   fields[IDX_BUFFER_PAGE_HASHED], "NO"));
 		}
 
 		OK(fields[IDX_BUFFER_PAGE_NEWEST_MOD]->store(
-			(longlong) page_info->newest_mod, true));
+			   page_info->newest_mod, true));
 
 		OK(fields[IDX_BUFFER_PAGE_OLDEST_MOD]->store(
-			(longlong) page_info->oldest_mod, true));
+			   page_info->oldest_mod, true));
 
 		OK(fields[IDX_BUFFER_PAGE_ACCESS_TIME]->store(
-			page_info->access_time));
+			   page_info->access_time));
 
 		fields[IDX_BUFFER_PAGE_TABLE_NAME]->set_null();
 
@@ -4932,11 +5206,11 @@ i_s_innodb_buffer_page_fill(
 					table_name, sizeof(table_name),
 					index->table_name,
 					strlen(index->table_name),
-					thd, TRUE);
+					thd);
 
 				OK(fields[IDX_BUFFER_PAGE_TABLE_NAME]->store(
 					table_name,
-					static_cast<uint>(table_name_end - table_name),
+					static_cast<size_t>(table_name_end - table_name),
 					system_charset_info));
 				fields[IDX_BUFFER_PAGE_TABLE_NAME]->set_notnull();
 
@@ -4949,15 +5223,15 @@ i_s_innodb_buffer_page_fill(
 		}
 
 		OK(fields[IDX_BUFFER_PAGE_NUM_RECS]->store(
-			page_info->num_recs));
+			   page_info->num_recs, true));
 
 		OK(fields[IDX_BUFFER_PAGE_DATA_SIZE]->store(
-			page_info->data_size));
+			   page_info->data_size, true));
 
 		OK(fields[IDX_BUFFER_PAGE_ZIP_SIZE]->store(
-			page_info->zip_ssize
-			? (UNIV_ZIP_SIZE_MIN >> 1) << page_info->zip_ssize
-			: 0));
+			   page_info->zip_ssize
+			   ? (UNIV_ZIP_SIZE_MIN >> 1) << page_info->zip_ssize
+			   : 0, true));
 
 #if BUF_PAGE_STATE_BITS > 3
 # error "BUF_PAGE_STATE_BITS > 3, please ensure that all 1<<BUF_PAGE_STATE_BITS values are checked for"
@@ -5016,7 +5290,7 @@ i_s_innodb_buffer_page_fill(
 				      (page_info->is_old) ? "YES" : "NO"));
 
 		OK(fields[IDX_BUFFER_PAGE_FREE_CLOCK]->store(
-			page_info->freed_page_clock));
+			   page_info->freed_page_clock, true));
 
 		if (schema_table_store_record(thd, table)) {
 			DBUG_RETURN(1);
@@ -5037,14 +5311,14 @@ i_s_innodb_set_page_type(
 	ulint		page_type,	/*!< in: page type */
 	const byte*	frame)		/*!< in: buffer frame */
 {
-	if (page_type == FIL_PAGE_INDEX) {
+	if (fil_page_type_is_index(page_type)) {
 		const page_t*	page = (const page_t*) frame;
 
 		page_info->index_id = btr_page_get_index_id(page);
 
-		/* FIL_PAGE_INDEX is a bit special, its value
-		is defined as 17855, so we cannot use FIL_PAGE_INDEX
-		to index into i_s_page_type[] array, its array index
+		/* FIL_PAGE_INDEX and FIL_PAGE_RTREE are a bit special,
+		their values are defined as 17855 and 17854, so we cannot
+		use them to index into i_s_page_type[] array, its array index
 		in the i_s_page_type[] array is I_S_PAGE_TYPE_INDEX
 		(1) for index pages or I_S_PAGE_TYPE_IBUF for
 		change buffer index pages */
@@ -5052,6 +5326,8 @@ i_s_innodb_set_page_type(
 		    == static_cast<index_id_t>(DICT_IBUF_ID_MIN
 					       + IBUF_SPACE_ID)) {
 			page_info->page_type = I_S_PAGE_TYPE_IBUF;
+		} else if (page_type == FIL_PAGE_RTREE) {
+			page_info->page_type = I_S_PAGE_TYPE_RTREE;
 		} else {
 			page_info->page_type = I_S_PAGE_TYPE_INDEX;
 		}
@@ -5113,9 +5389,9 @@ i_s_innodb_buffer_page_get_info(
 		const byte*	frame;
 		ulint		page_type;
 
-		page_info->space_id = buf_page_get_space(bpage);
+		page_info->space_id = bpage->id.space();
 
-		page_info->page_num = buf_page_get_page_no(bpage);
+		page_info->page_num = bpage->id.page_no();
 
 		page_info->flush_type = bpage->flush_type;
 
@@ -5167,7 +5443,7 @@ i_s_innodb_buffer_page_get_info(
 /*******************************************************************//**
 This is the function that goes through each block of the buffer pool
 and fetch information to information schema tables: INNODB_BUFFER_PAGE.
-@return	0 on success, 1 on failure */
+@return 0 on success, 1 on failure */
 static
 int
 i_s_innodb_fill_buffer_pool(
@@ -5181,13 +5457,13 @@ i_s_innodb_fill_buffer_pool(
 	mem_heap_t*		heap;
 
 	DBUG_ENTER("i_s_innodb_fill_buffer_pool");
-	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
 
 	heap = mem_heap_create(10000);
 
 	/* Go through each chunk of buffer pool. Currently, we only
 	have one single chunk for each buffer pool */
-	for (ulint n = 0; n < buf_pool->n_chunks; n++) {
+	for (ulint n = 0;
+	     n < ut_min(buf_pool->n_chunks, buf_pool->n_chunks_new); n++) {
 		const buf_block_t*	block;
 		ulint			n_blocks;
 		buf_page_info_t*	info_buffer;
@@ -5205,7 +5481,7 @@ i_s_innodb_fill_buffer_pool(
 			/* we cache maximum MAX_BUF_INFO_CACHED number of
 			buffer page info */
 			num_to_process = ut_min(chunk_size,
-						MAX_BUF_INFO_CACHED);
+				(ulint)MAX_BUF_INFO_CACHED);
 
 			mem_size = num_to_process * sizeof(buf_page_info_t);
 
@@ -5257,7 +5533,7 @@ i_s_innodb_fill_buffer_pool(
 /*******************************************************************//**
 Fill page information for pages in InnoDB buffer pool to the
 dynamic table INFORMATION_SCHEMA.INNODB_BUFFER_PAGE
-@return	0 on success, 1 on failure */
+@return 0 on success, 1 on failure */
 static
 int
 i_s_innodb_buffer_page_fill_table(
@@ -5270,6 +5546,8 @@ i_s_innodb_buffer_page_fill_table(
 
 	DBUG_ENTER("i_s_innodb_buffer_page_fill_table");
 
+	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
+
 	/* deny access to user without PROCESS privilege */
 	if (check_global_access(thd, PROCESS_ACL)) {
 		DBUG_RETURN(0);
@@ -5296,7 +5574,7 @@ i_s_innodb_buffer_page_fill_table(
 
 /*******************************************************************//**
 Bind the dynamic table INFORMATION_SCHEMA.INNODB_BUFFER_PAGE.
-@return	0 on success, 1 on failure */
+@return 0 on success, 1 on failure */
 static
 int
 i_s_innodb_buffer_page_init(
@@ -5552,7 +5830,7 @@ static ST_FIELD_INFO	i_s_innodb_buf_page_lru_fields_info[] =
 /*******************************************************************//**
 Fill Information Schema table INNODB_BUFFER_PAGE_LRU with information
 cached in the buf_page_info_t array
-@return	0 on success, 1 on failure */
+@return 0 on success, 1 on failure */
 static
 int
 i_s_innodb_buf_page_lru_fill(
@@ -5589,43 +5867,43 @@ i_s_innodb_buf_page_lru_fill(
 		page_info = info_array + i;
 
 		OK(fields[IDX_BUF_LRU_POOL_ID]->store(
-			static_cast<double>(page_info->pool_id)));
+			   page_info->pool_id, true));
 
 		OK(fields[IDX_BUF_LRU_POS]->store(
-			static_cast<double>(page_info->block_id)));
+			   page_info->block_id, true));
 
 		OK(fields[IDX_BUF_LRU_PAGE_SPACE]->store(
-			static_cast<double>(page_info->space_id)));
+			   page_info->space_id, true));
 
 		OK(fields[IDX_BUF_LRU_PAGE_NUM]->store(
-			static_cast<double>(page_info->page_num)));
+			   page_info->page_num, true));
 
 		OK(field_store_string(
-			fields[IDX_BUF_LRU_PAGE_TYPE],
-			i_s_page_type[page_info->page_type].type_str));
+			   fields[IDX_BUF_LRU_PAGE_TYPE],
+			   i_s_page_type[page_info->page_type].type_str));
 
 		OK(fields[IDX_BUF_LRU_PAGE_FLUSH_TYPE]->store(
-			static_cast<double>(page_info->flush_type)));
+			   page_info->flush_type, true));
 
 		OK(fields[IDX_BUF_LRU_PAGE_FIX_COUNT]->store(
-			static_cast<double>(page_info->fix_count)));
+			   page_info->fix_count, true));
 
 		if (page_info->hashed) {
 			OK(field_store_string(
-				fields[IDX_BUF_LRU_PAGE_HASHED], "YES"));
+				   fields[IDX_BUF_LRU_PAGE_HASHED], "YES"));
 		} else {
 			OK(field_store_string(
-				fields[IDX_BUF_LRU_PAGE_HASHED], "NO"));
+				   fields[IDX_BUF_LRU_PAGE_HASHED], "NO"));
 		}
 
 		OK(fields[IDX_BUF_LRU_PAGE_NEWEST_MOD]->store(
-			page_info->newest_mod, true));
+			   page_info->newest_mod, true));
 
 		OK(fields[IDX_BUF_LRU_PAGE_OLDEST_MOD]->store(
-			page_info->oldest_mod, true));
+			   page_info->oldest_mod, true));
 
 		OK(fields[IDX_BUF_LRU_PAGE_ACCESS_TIME]->store(
-			page_info->access_time));
+			   page_info->access_time, true));
 
 		fields[IDX_BUF_LRU_PAGE_TABLE_NAME]->set_null();
 
@@ -5646,11 +5924,11 @@ i_s_innodb_buf_page_lru_fill(
 					table_name, sizeof(table_name),
 					index->table_name,
 					strlen(index->table_name),
-					thd, TRUE);
+					thd);
 
 				OK(fields[IDX_BUF_LRU_PAGE_TABLE_NAME]->store(
 					table_name,
-					static_cast<uint>(table_name_end - table_name),
+					static_cast<size_t>(table_name_end - table_name),
 					system_charset_info));
 				fields[IDX_BUF_LRU_PAGE_TABLE_NAME]->set_notnull();
 
@@ -5663,14 +5941,14 @@ i_s_innodb_buf_page_lru_fill(
 		}
 
 		OK(fields[IDX_BUF_LRU_PAGE_NUM_RECS]->store(
-			page_info->num_recs));
+			   page_info->num_recs, true));
 
 		OK(fields[IDX_BUF_LRU_PAGE_DATA_SIZE]->store(
-			page_info->data_size));
+			   page_info->data_size, true));
 
 		OK(fields[IDX_BUF_LRU_PAGE_ZIP_SIZE]->store(
-			page_info->zip_ssize ?
-				 512 << page_info->zip_ssize : 0));
+			   page_info->zip_ssize ?
+			   512 << page_info->zip_ssize : 0, true));
 
 		state = static_cast<enum buf_page_state>(page_info->page_state);
 
@@ -5716,7 +5994,7 @@ i_s_innodb_buf_page_lru_fill(
 				      (page_info->is_old) ? "YES" : "NO"));
 
 		OK(fields[IDX_BUF_LRU_PAGE_FREE_CLOCK]->store(
-			page_info->freed_page_clock));
+			   page_info->freed_page_clock, true));
 
 		if (schema_table_store_record(thd, table)) {
 			mem_heap_free(heap);
@@ -5734,7 +6012,7 @@ i_s_innodb_buf_page_lru_fill(
 /*******************************************************************//**
 This is the function that goes through buffer pool's LRU list
 and fetch information to INFORMATION_SCHEMA.INNODB_BUFFER_PAGE_LRU.
-@return	0 on success, 1 on failure */
+@return 0 on success, 1 on failure */
 static
 int
 i_s_innodb_fill_buffer_lru(
@@ -5751,7 +6029,6 @@ i_s_innodb_fill_buffer_lru(
 	ulint			lru_len;
 
 	DBUG_ENTER("i_s_innodb_fill_buffer_lru");
-	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
 
 	/* Obtain buf_pool mutex before allocate info_buffer, since
 	UT_LIST_GET_LEN(buf_pool->LRU) could change */
@@ -5762,6 +6039,10 @@ i_s_innodb_fill_buffer_lru(
 	/* Print error message if malloc fail */
 	info_buffer = (buf_page_info_t*) my_malloc(
 		lru_len * sizeof *info_buffer, MYF(MY_WME));
+	/* JAN: TODO: MySQL 5.7 PSI
+	info_buffer = (buf_page_info_t*) my_malloc(PSI_INSTRUMENT_ME,
+		lru_len * sizeof *info_buffer, MYF(MY_WME));
+	*/
 
 	if (!info_buffer) {
 		status = 1;
@@ -5804,7 +6085,7 @@ exit:
 /*******************************************************************//**
 Fill page information for pages in InnoDB buffer pool to the
 dynamic table INFORMATION_SCHEMA.INNODB_BUFFER_PAGE_LRU
-@return	0 on success, 1 on failure */
+@return 0 on success, 1 on failure */
 static
 int
 i_s_innodb_buf_page_lru_fill_table(
@@ -5817,6 +6098,8 @@ i_s_innodb_buf_page_lru_fill_table(
 
 	DBUG_ENTER("i_s_innodb_buf_page_lru_fill_table");
 
+	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
+
 	/* deny access to any users that do not hold PROCESS_ACL */
 	if (check_global_access(thd, PROCESS_ACL)) {
 		DBUG_RETURN(0);
@@ -5843,7 +6126,7 @@ i_s_innodb_buf_page_lru_fill_table(
 
 /*******************************************************************//**
 Bind the dynamic table INFORMATION_SCHEMA.INNODB_BUFFER_PAGE_LRU.
-@return	0 on success, 1 on failure */
+@return 0 on success, 1 on failure */
 static
 int
 i_s_innodb_buffer_page_lru_init(
@@ -5913,7 +6196,7 @@ UNIV_INTERN struct st_maria_plugin	i_s_innodb_buffer_page_lru =
 
 /*******************************************************************//**
 Unbind a dynamic INFORMATION_SCHEMA table.
-@return	0 on success */
+@return 0 on success */
 static
 int
 i_s_common_deinit(
@@ -6003,13 +6286,22 @@ static ST_FIELD_INFO	innodb_sys_tables_fields_info[] =
 	 STRUCT_FLD(old_name,		""),
 	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
 
+#define SYS_TABLES_SPACE_TYPE	8
+	{STRUCT_FLD(field_name,		"SPACE_TYPE"),
+	 STRUCT_FLD(field_length,	10),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_STRING),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	MY_I_S_MAYBE_NULL),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
 	END_OF_ST_FIELD_INFO
 };
 
 /**********************************************************************//**
 Populate information_schema.innodb_sys_tables table with information
 from SYS_TABLES.
-@return	0 on success */
+@return 0 on success */
 static
 int
 i_s_dict_fill_sys_tables(
@@ -6018,31 +6310,41 @@ i_s_dict_fill_sys_tables(
 	dict_table_t*	table,		/*!< in: table */
 	TABLE*		table_to_fill)	/*!< in/out: fill this table */
 {
-	Field**		fields;
-	ulint	compact		= DICT_TF_GET_COMPACT(table->flags);
-	ulint	atomic_blobs	= DICT_TF_HAS_ATOMIC_BLOBS(table->flags);
-	ulint	zip_size	= dict_tf_get_zip_size(table->flags);
-	const char* file_format;
-	const char* row_format;
+	Field**			fields;
+	ulint			compact = DICT_TF_GET_COMPACT(table->flags);
+	ulint			atomic_blobs = DICT_TF_HAS_ATOMIC_BLOBS(
+								table->flags);
+	const page_size_t&	page_size = dict_tf_get_page_size(table->flags);
+	const char*		file_format;
+	const char*		row_format;
+	const char*		space_type;
 
 	file_format = trx_sys_file_format_id_to_name(atomic_blobs);
 	if (!compact) {
 		row_format = "Redundant";
 	} else if (!atomic_blobs) {
 		row_format = "Compact";
-	} else if DICT_TF_GET_ZIP_SSIZE(table->flags) {
+	} else if (DICT_TF_GET_ZIP_SSIZE(table->flags)) {
 		row_format = "Compressed";
 	} else {
 		row_format = "Dynamic";
 	}
 
+	if (is_system_tablespace(table->space)) {
+		space_type = "System";
+	} else if (DICT_TF_HAS_SHARED_SPACE(table->flags)) {
+		space_type = "General";
+	} else {
+		space_type = "Single";
+	}
+
 	DBUG_ENTER("i_s_dict_fill_sys_tables");
 
 	fields = table_to_fill->field;
 
 	OK(fields[SYS_TABLES_ID]->store(longlong(table->id), TRUE));
 
-	OK(field_store_string(fields[SYS_TABLES_NAME], table->name));
+	OK(field_store_string(fields[SYS_TABLES_NAME], table->name.m_name));
 
 	OK(fields[SYS_TABLES_FLAG]->store(table->flags));
 
@@ -6054,8 +6356,12 @@ i_s_dict_fill_sys_tables(
 
 	OK(field_store_string(fields[SYS_TABLES_ROW_FORMAT], row_format));
 
-	OK(fields[SYS_TABLES_ZIP_PAGE_SIZE]->store(
-		static_cast<double>(zip_size)));
+	OK(fields[SYS_TABLES_ZIP_PAGE_SIZE]->store(static_cast<double>(
+				page_size.is_compressed()
+				? page_size.physical()
+				: 0)));
+
+	OK(field_store_string(fields[SYS_TABLES_SPACE_TYPE], space_type));
 
 	OK(schema_table_store_record(thd, table_to_fill));
 
@@ -6087,7 +6393,7 @@ i_s_sys_tables_fill_table(
 	}
 
 	heap = mem_heap_create(1000);
-	mutex_enter(&(dict_sys->mutex));
+	mutex_enter(&dict_sys->mutex);
 	mtr_start(&mtr);
 
 	rec = dict_startscan_system(&pcur, &mtr, SYS_TABLES);
@@ -6293,17 +6599,20 @@ static ST_FIELD_INFO	innodb_sys_tablestats_fields_info[] =
 	END_OF_ST_FIELD_INFO
 };
 
-/**********************************************************************//**
-Populate information_schema.innodb_sys_tablestats table with information
+/** Populate information_schema.innodb_sys_tablestats table with information
 from SYS_TABLES.
-@return	0 on success */
+@param[in]	thd		thread ID
+@param[in,out]	table		table
+@param[in]	ref_count	table reference count
+@param[in,out]	table_to_fill	fill this table
+@return 0 on success */
 static
 int
 i_s_dict_fill_sys_tablestats(
-/*=========================*/
-	THD*		thd,		/*!< in: thread */
-	dict_table_t*	table,		/*!< in: table */
-	TABLE*		table_to_fill)	/*!< in/out: fill this table */
+	THD*		thd,
+	dict_table_t*	table,
+	ulint		ref_count,
+	TABLE*		table_to_fill)
 {
 	Field**		fields;
 
@@ -6313,7 +6622,8 @@ i_s_dict_fill_sys_tablestats(
 
 	OK(fields[SYS_TABLESTATS_ID]->store(longlong(table->id), TRUE));
 
-	OK(field_store_string(fields[SYS_TABLESTATS_NAME], table->name));
+	OK(field_store_string(fields[SYS_TABLESTATS_NAME],
+			      table->name.m_name));
 
 	dict_table_stats_lock(table, RW_S_LATCH);
 
@@ -6322,35 +6632,34 @@ i_s_dict_fill_sys_tablestats(
 				      "Initialized"));
 
 		OK(fields[SYS_TABLESTATS_NROW]->store(table->stat_n_rows,
-						      TRUE));
+						      true));
 
 		OK(fields[SYS_TABLESTATS_CLUST_SIZE]->store(
-			static_cast<double>(table->stat_clustered_index_size)));
+			   table->stat_clustered_index_size, true));
 
 		OK(fields[SYS_TABLESTATS_INDEX_SIZE]->store(
-			static_cast<double>(table->stat_sum_of_other_index_sizes)));
+			   table->stat_sum_of_other_index_sizes, true));
 
 		OK(fields[SYS_TABLESTATS_MODIFIED]->store(
-			static_cast<double>(table->stat_modified_counter)));
+			   table->stat_modified_counter, true));
 	} else {
 		OK(field_store_string(fields[SYS_TABLESTATS_INIT],
 				      "Uninitialized"));
 
-		OK(fields[SYS_TABLESTATS_NROW]->store(0, TRUE));
+		OK(fields[SYS_TABLESTATS_NROW]->store(0, true));
 
-		OK(fields[SYS_TABLESTATS_CLUST_SIZE]->store(0));
+		OK(fields[SYS_TABLESTATS_CLUST_SIZE]->store(0, true));
 
-		OK(fields[SYS_TABLESTATS_INDEX_SIZE]->store(0));
+		OK(fields[SYS_TABLESTATS_INDEX_SIZE]->store(0, true));
 
-		OK(fields[SYS_TABLESTATS_MODIFIED]->store(0));
+		OK(fields[SYS_TABLESTATS_MODIFIED]->store(0, true));
 	}
 
 	dict_table_stats_unlock(table, RW_S_LATCH);
 
-	OK(fields[SYS_TABLESTATS_AUTONINC]->store(table->autoinc, TRUE));
+	OK(fields[SYS_TABLESTATS_AUTONINC]->store(table->autoinc, true));
 
-	OK(fields[SYS_TABLESTATS_TABLE_REF_COUNT]->store(
-		static_cast<double>(table->n_ref_count)));
+	OK(fields[SYS_TABLESTATS_TABLE_REF_COUNT]->store(static_cast<double>(ref_count), true));
 
 	OK(schema_table_store_record(thd, table_to_fill));
 
@@ -6392,6 +6701,7 @@ i_s_sys_tables_fill_table_stats(
 	while (rec) {
 		const char*	err_msg;
 		dict_table_t*	table_rec;
+		ulint		ref_count;
 
 		/* Fetch the dict_table_t structure corresponding to
 		this SYS_TABLES record */
@@ -6399,10 +6709,25 @@ i_s_sys_tables_fill_table_stats(
 			heap, rec, &table_rec,
 			DICT_TABLE_LOAD_FROM_CACHE, &mtr);
 
+		if (table_rec != NULL) {
+			ut_ad(err_msg == NULL);
+
+			ref_count = table_rec->get_ref_count();
+
+			/* Protect the dict_table_t object by incrementing
+			the reference count. */
+			table_rec->acquire();
+		}
+
 		mutex_exit(&dict_sys->mutex);
 
-		if (!err_msg) {
-			i_s_dict_fill_sys_tablestats(thd, table_rec,
+		DBUG_EXECUTE_IF("test_sys_tablestats", {
+			if (strcmp("test/t1", table_rec->name.m_name) == 0 ) {
+				DEBUG_SYNC_C("dict_table_not_protected");
+			}});
+
+		if (table_rec != NULL) {
+			i_s_dict_fill_sys_tablestats(thd, table_rec, ref_count,
 						     tables->table);
 		} else {
 			push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
@@ -6414,6 +6739,11 @@ i_s_sys_tables_fill_table_stats(
 
 		/* Get the next record */
 		mutex_enter(&dict_sys->mutex);
+
+		if (table_rec != NULL) {
+			table_rec->release();
+		}
+
 		mtr_start(&mtr);
 		rec = dict_getnext_system(&pcur, &mtr);
 	}
@@ -6562,6 +6892,15 @@ static ST_FIELD_INFO	innodb_sysindex_fields_info[] =
 	 STRUCT_FLD(old_name,		""),
 	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
 
+#define SYS_INDEX_MERGE_THRESHOLD 7
+	{STRUCT_FLD(field_name,		"MERGE_THRESHOLD"),
+	 STRUCT_FLD(field_length,	MY_INT32_NUM_DECIMAL_DIGITS),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONG),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	0),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
 	END_OF_ST_FIELD_INFO
 };
 
@@ -6587,9 +6926,9 @@ i_s_dict_fill_sys_indexes(
 
 	OK(field_store_index_name(fields[SYS_INDEX_NAME], index->name));
 
-	OK(fields[SYS_INDEX_ID]->store(longlong(index->id), TRUE));
+	OK(fields[SYS_INDEX_ID]->store(longlong(index->id), true));
 
-	OK(fields[SYS_INDEX_TABLE_ID]->store(longlong(table_id), TRUE));
+	OK(fields[SYS_INDEX_TABLE_ID]->store(longlong(table_id), true));
 
 	OK(fields[SYS_INDEX_TYPE]->store(index->type));
 
@@ -6604,6 +6943,8 @@ i_s_dict_fill_sys_indexes(
 
 	OK(fields[SYS_INDEX_SPACE]->store(index->space));
 
+	OK(fields[SYS_INDEX_MERGE_THRESHOLD]->store(index->merge_threshold));
+
 	OK(schema_table_store_record(thd, table_to_fill));
 
 	DBUG_RETURN(0);
@@ -6821,6 +7162,8 @@ i_s_dict_fill_sys_columns(
 	const char*	col_name,	/*!< in: column name */
 	dict_col_t*	column,		/*!< in: dict_col_t struct holding
 					more column information */
+	ulint		nth_v_col,	/*!< in: virtual column, its
+					sequence number (nth virtual col) */
 	TABLE*		table_to_fill)	/*!< in/out: fill this table */
 {
 	Field**		fields;
@@ -6829,11 +7172,16 @@ i_s_dict_fill_sys_columns(
 
 	fields = table_to_fill->field;
 
-	OK(fields[SYS_COLUMN_TABLE_ID]->store(longlong(table_id), TRUE));
+	OK(fields[SYS_COLUMN_TABLE_ID]->store((longlong) table_id, TRUE));
 
 	OK(field_store_string(fields[SYS_COLUMN_NAME], col_name));
 
-	OK(fields[SYS_COLUMN_POSITION]->store(column->ind));
+	if (dict_col_is_virtual(column)) {
+		ulint	pos = dict_create_v_col_pos(nth_v_col, column->ind);
+		OK(fields[SYS_COLUMN_POSITION]->store(pos, true));
+	} else {
+		OK(fields[SYS_COLUMN_POSITION]->store(column->ind, true));
+	}
 
 	OK(fields[SYS_COLUMN_MTYPE]->store(column->mtype));
 
@@ -6881,18 +7229,20 @@ i_s_sys_columns_fill_table(
 		const char*	err_msg;
 		dict_col_t	column_rec;
 		table_id_t	table_id;
+		ulint		nth_v_col;
 
 		/* populate a dict_col_t structure with information from
 		a SYS_COLUMNS row */
 		err_msg = dict_process_sys_columns_rec(heap, rec, &column_rec,
-						       &table_id, &col_name);
+						       &table_id, &col_name,
+						       &nth_v_col);
 
 		mtr_commit(&mtr);
 		mutex_exit(&dict_sys->mutex);
 
 		if (!err_msg) {
 			i_s_dict_fill_sys_columns(thd, table_id, col_name,
-						 &column_rec,
+						 &column_rec, nth_v_col,
 						 tables->table);
 		} else {
 			push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
@@ -6984,6 +7334,213 @@ UNIV_INTERN struct st_maria_plugin	i_s_innodb_sys_columns =
         STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
 };
 
+/**  SYS_VIRTUAL **************************************************/
+/** Fields of the dynamic table INFORMATION_SCHEMA.INNODB_SYS_VIRTUAL */
+static ST_FIELD_INFO	innodb_sys_virtual_fields_info[] =
+{
+#define SYS_VIRTUAL_TABLE_ID		0
+	{STRUCT_FLD(field_name,		"TABLE_ID"),
+	 STRUCT_FLD(field_length,	MY_INT64_NUM_DECIMAL_DIGITS),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONGLONG),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	MY_I_S_UNSIGNED),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+#define SYS_VIRTUAL_POS			1
+	{STRUCT_FLD(field_name,		"POS"),
+	 STRUCT_FLD(field_length,	MY_INT32_NUM_DECIMAL_DIGITS),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONG),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	MY_I_S_UNSIGNED),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+#define SYS_VIRTUAL_BASE_POS		2
+	{STRUCT_FLD(field_name,		"BASE_POS"),
+	 STRUCT_FLD(field_length,	MY_INT32_NUM_DECIMAL_DIGITS),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONG),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	MY_I_S_UNSIGNED),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+	END_OF_ST_FIELD_INFO
+};
+
+/** Function to populate the information_schema.innodb_sys_virtual with
+related information
+param[in]	thd		thread
+param[in]	table_id	table ID
+param[in]	pos		virtual column position
+param[in]	base_pos	base column position
+param[in,out]	table_to_fill	fill this table
+@return 0 on success */
+static
+int
+i_s_dict_fill_sys_virtual(
+	THD*		thd,
+	table_id_t	table_id,
+	ulint		pos,
+	ulint		base_pos,
+	TABLE*		table_to_fill)
+{
+	Field**		fields;
+
+	DBUG_ENTER("i_s_dict_fill_sys_virtual");
+
+	fields = table_to_fill->field;
+
+	OK(fields[SYS_VIRTUAL_TABLE_ID]->store((longlong) table_id, TRUE));
+
+	OK(fields[SYS_VIRTUAL_POS]->store(pos));
+
+	OK(fields[SYS_VIRTUAL_BASE_POS]->store(base_pos));
+
+	OK(schema_table_store_record(thd, table_to_fill));
+
+	DBUG_RETURN(0);
+}
+
+/** Function to fill information_schema.innodb_sys_virtual with information
+collected by scanning SYS_VIRTUAL table.
+param[in]	thd		thread
+param[in,out]	tables		tables to fill
+param[in]	item		condition (not used)
+@return 0 on success */
+static
+int
+i_s_sys_virtual_fill_table(
+	THD*		thd,
+	TABLE_LIST*	tables,
+	Item*		)
+{
+	btr_pcur_t	pcur;
+	const rec_t*	rec;
+	ulint		pos;
+	ulint		base_pos;
+	mem_heap_t*	heap;
+	mtr_t		mtr;
+
+	DBUG_ENTER("i_s_sys_virtual_fill_table");
+	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
+
+	/* deny access to user without PROCESS_ACL privilege */
+	if (check_global_access(thd, PROCESS_ACL)) {
+		DBUG_RETURN(0);
+	}
+
+	heap = mem_heap_create(1000);
+	mutex_enter(&dict_sys->mutex);
+	mtr_start(&mtr);
+
+	rec = dict_startscan_system(&pcur, &mtr, SYS_VIRTUAL);
+
+	while (rec) {
+		const char*	err_msg;
+		table_id_t	table_id;
+
+		/* populate a dict_col_t structure with information from
+		a SYS_VIRTUAL row */
+		err_msg = dict_process_sys_virtual_rec(heap, rec,
+						       &table_id, &pos,
+						       &base_pos);
+
+		mtr_commit(&mtr);
+		mutex_exit(&dict_sys->mutex);
+
+		if (!err_msg) {
+			i_s_dict_fill_sys_virtual(thd, table_id, pos, base_pos,
+						  tables->table);
+		} else {
+			push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+					    ER_CANT_FIND_SYSTEM_REC, "%s",
+					    err_msg);
+		}
+
+		mem_heap_empty(heap);
+
+		/* Get the next record */
+		mutex_enter(&dict_sys->mutex);
+		mtr_start(&mtr);
+		rec = dict_getnext_system(&pcur, &mtr);
+	}
+
+	mtr_commit(&mtr);
+	mutex_exit(&dict_sys->mutex);
+	mem_heap_free(heap);
+
+	DBUG_RETURN(0);
+}
+
+/** Bind the dynamic table INFORMATION_SCHEMA.innodb_sys_virtual
+param[in,out]	p	table schema object
+@return 0 on success */
+static
+int
+innodb_sys_virtual_init(
+	void*	p)
+{
+	ST_SCHEMA_TABLE*	schema;
+
+	DBUG_ENTER("innodb_sys_virtual_init");
+
+	schema = (ST_SCHEMA_TABLE*) p;
+
+	schema->fields_info = innodb_sys_virtual_fields_info;
+	schema->fill_table = i_s_sys_virtual_fill_table;
+
+	DBUG_RETURN(0);
+}
+
+struct st_maria_plugin	i_s_innodb_sys_virtual =
+{
+	/* the plugin type (a MYSQL_XXX_PLUGIN value) */
+	/* int */
+	STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
+
+	/* pointer to type-specific plugin descriptor */
+	/* void* */
+	STRUCT_FLD(info, &i_s_info),
+
+	/* plugin name */
+	/* const char* */
+	STRUCT_FLD(name, "INNODB_SYS_VIRTUAL"),
+
+	/* plugin author (for SHOW PLUGINS) */
+	/* const char* */
+	STRUCT_FLD(author, plugin_author),
+
+	/* general descriptive text (for SHOW PLUGINS) */
+	/* const char* */
+	STRUCT_FLD(descr, "InnoDB SYS_VIRTUAL"),
+
+	/* the plugin license (PLUGIN_LICENSE_XXX) */
+	/* int */
+	STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
+
+	/* the function to invoke when plugin is loaded */
+	/* int (*)(void*); */
+	STRUCT_FLD(init, innodb_sys_virtual_init),
+
+	/* the function to invoke when plugin is unloaded */
+	/* int (*)(void*); */
+	STRUCT_FLD(deinit, i_s_common_deinit),
+
+	/* plugin version (for SHOW PLUGINS) */
+	/* unsigned int */
+	STRUCT_FLD(version, INNODB_VERSION_SHORT),
+
+	/* struct st_mysql_show_var* */
+	STRUCT_FLD(status_vars, NULL),
+
+	/* struct st_mysql_sys_var** */
+	STRUCT_FLD(system_vars, NULL),
+
+	/* Maria extension */
+	STRUCT_FLD(version_info, INNODB_VERSION_STR),
+	STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_BETA),
+};
 /**  SYS_FIELDS  ***************************************************/
 /* Fields of the dynamic table INFORMATION_SCHEMA.INNODB_SYS_FIELDS */
 static ST_FIELD_INFO	innodb_sys_fields_fields_info[] =
@@ -7038,11 +7595,11 @@ i_s_dict_fill_sys_fields(
 
 	fields = table_to_fill->field;
 
-	OK(fields[SYS_FIELD_INDEX_ID]->store(longlong(index_id), TRUE));
+	OK(fields[SYS_FIELD_INDEX_ID]->store((longlong) index_id, TRUE));
 
 	OK(field_store_string(fields[SYS_FIELD_NAME], field->name));
 
-	OK(fields[SYS_FIELD_POS]->store(static_cast<double>(pos)));
+	OK(fields[SYS_FIELD_POS]->store(static_cast<double>(pos), true));
 
 	OK(schema_table_store_record(thd, table_to_fill));
 
@@ -7490,7 +8047,7 @@ i_s_dict_fill_sys_foreign_cols(
 
 	OK(field_store_string(fields[SYS_FOREIGN_COL_REF_NAME], ref_col_name));
 
-	OK(fields[SYS_FOREIGN_COL_POS]->store(static_cast<double>(pos)));
+	OK(fields[SYS_FOREIGN_COL_POS]->store(pos, true));
 
 	OK(schema_table_store_record(thd, table_to_fill));
 
@@ -7703,6 +8260,42 @@ static ST_FIELD_INFO	innodb_sys_tablespaces_fields_info[] =
 	 STRUCT_FLD(old_name,		""),
 	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
 
+#define SYS_TABLESPACES_SPACE_TYPE	7
+	{STRUCT_FLD(field_name,		"SPACE_TYPE"),
+	 STRUCT_FLD(field_length,	10),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_STRING),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	MY_I_S_MAYBE_NULL),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+#define SYS_TABLESPACES_FS_BLOCK_SIZE	8
+	{STRUCT_FLD(field_name,		"FS_BLOCK_SIZE"),
+	 STRUCT_FLD(field_length,	MY_INT32_NUM_DECIMAL_DIGITS),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONG),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	MY_I_S_UNSIGNED),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+#define SYS_TABLESPACES_FILE_SIZE	9
+	{STRUCT_FLD(field_name,		"FILE_SIZE"),
+	 STRUCT_FLD(field_length,	MY_INT64_NUM_DECIMAL_DIGITS),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONGLONG),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	MY_I_S_UNSIGNED),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+#define SYS_TABLESPACES_ALLOC_SIZE	10
+	{STRUCT_FLD(field_name,		"ALLOCATED_SIZE"),
+	 STRUCT_FLD(field_length,	MY_INT64_NUM_DECIMAL_DIGITS),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONGLONG),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	MY_I_S_UNSIGNED),
+	 STRUCT_FLD(old_name,           ""),
+	 STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
+
 	END_OF_ST_FIELD_INFO
 
 };
@@ -7721,50 +8314,125 @@ i_s_dict_fill_sys_tablespaces(
 	ulint		flags,		/*!< in: tablespace flags */
 	TABLE*		table_to_fill)	/*!< in/out: fill this table */
 {
-	Field**	fields;
-	ulint	atomic_blobs	= FSP_FLAGS_HAS_ATOMIC_BLOBS(flags);
-	ulint	page_size	= fsp_flags_get_page_size(flags);
-	ulint	zip_size	= fsp_flags_get_zip_size(flags);
-	const char* file_format;
-	const char* row_format;
+	Field**		fields;
+	ulint		atomic_blobs = FSP_FLAGS_HAS_ATOMIC_BLOBS(flags);
+	bool		is_compressed = FSP_FLAGS_GET_ZIP_SSIZE(flags);
+	const char*	file_format;
+	const char*	row_format;
+	const page_size_t	page_size(flags);
+	const char*	space_type;
 
 	DBUG_ENTER("i_s_dict_fill_sys_tablespaces");
 
 	file_format = trx_sys_file_format_id_to_name(atomic_blobs);
-	if (!atomic_blobs) {
+	if (is_system_tablespace(space)) {
 		row_format = "Compact or Redundant";
-	} else if DICT_TF_GET_ZIP_SSIZE(flags) {
+	} else if (fsp_is_shared_tablespace(flags) && !is_compressed) {
+		file_format = "Any";
+		row_format = "Any";
+	} else if (is_compressed) {
 		row_format = "Compressed";
-	} else {
+	} else if (atomic_blobs) {
 		row_format = "Dynamic";
+	} else {
+		row_format = "Compact or Redundant";
+	}
+
+	if (is_system_tablespace(space)) {
+		space_type = "System";
+	} else if (fsp_is_shared_tablespace(flags)) {
+		space_type = "General";
+	} else  {
+		space_type = "Single";
 	}
 
 	fields = table_to_fill->field;
 
-	OK(fields[SYS_TABLESPACES_SPACE]->store(
-		static_cast<double>(space)));
+	OK(fields[SYS_TABLESPACES_SPACE]->store(space, true));
 
 	OK(field_store_string(fields[SYS_TABLESPACES_NAME], name));
 
-	OK(fields[SYS_TABLESPACES_FLAGS]->store(
-		static_cast<double>(flags)));
+	OK(fields[SYS_TABLESPACES_FLAGS]->store(flags, true));
 
 	OK(field_store_string(fields[SYS_TABLESPACES_FILE_FORMAT],
 			      file_format));
 
-	OK(field_store_string(fields[SYS_TABLESPACES_ROW_FORMAT],
-			      row_format));
+	OK(field_store_string(fields[SYS_TABLESPACES_ROW_FORMAT], row_format));
 
 	OK(fields[SYS_TABLESPACES_PAGE_SIZE]->store(
-		static_cast<double>(page_size)));
+			univ_page_size.physical(), true));
 
 	OK(fields[SYS_TABLESPACES_ZIP_PAGE_SIZE]->store(
-		static_cast<double>(zip_size)));
+				page_size.is_compressed()
+				? page_size.physical()
+				: 0, true));
+
+	OK(field_store_string(fields[SYS_TABLESPACES_SPACE_TYPE],
+			      space_type));
+
+	char*	filepath = NULL;
+	if (FSP_FLAGS_HAS_DATA_DIR(flags)
+	    || FSP_FLAGS_GET_SHARED(flags)) {
+		mutex_enter(&dict_sys->mutex);
+		filepath = dict_get_first_path(space);
+		mutex_exit(&dict_sys->mutex);
+	}
+
+	if (filepath == NULL) {
+		filepath = fil_make_filepath(NULL, name, IBD, false);
+	}
+
+	os_file_stat_t	stat;
+	os_file_size_t	file;
+
+	memset(&file, 0xff, sizeof(file));
+	memset(&stat, 0x0, sizeof(stat));
+
+	if (filepath != NULL) {
+
+		file = os_file_get_size(filepath);
+
+		/* Get the file system (or Volume) block size. */
+		dberr_t	err = os_file_get_status(filepath, &stat, false, false);
+
+		switch(err) {
+		case DB_FAIL:
+			ib::warn()
+				<< "File '" << filepath << "', failed to get "
+				<< "stats";
+			break;
+
+		case DB_SUCCESS:
+		case DB_NOT_FOUND:
+			break;
+
+		default:
+			ib::error()
+				<< "File '" << filepath << "' "
+				<< ut_strerr(err);
+			break;
+		}
+
+		ut_free(filepath);
+	}
+
+	if (file.m_total_size == static_cast<os_offset_t>(~0)) {
+		stat.block_size = 0;
+		file.m_total_size = 0;
+		file.m_alloc_size = 0;
+	}
+
+	OK(fields[SYS_TABLESPACES_FS_BLOCK_SIZE]->store(stat.block_size, true));
+
+	OK(fields[SYS_TABLESPACES_FILE_SIZE]->store(file.m_total_size, true));
+
+	OK(fields[SYS_TABLESPACES_ALLOC_SIZE]->store(file.m_alloc_size, true));
 
 	OK(schema_table_store_record(thd, table_to_fill));
 
 	DBUG_RETURN(0);
 }
+
 /*******************************************************************//**
 Function to populate INFORMATION_SCHEMA.INNODB_SYS_TABLESPACES table.
 Loop through each record in SYS_TABLESPACES, and extract the column
@@ -7795,9 +8463,10 @@ i_s_sys_tablespaces_fill_table(
 	mutex_enter(&dict_sys->mutex);
 	mtr_start(&mtr);
 
-	rec = dict_startscan_system(&pcur, &mtr, SYS_TABLESPACES);
+	for (rec = dict_startscan_system(&pcur, &mtr, SYS_TABLESPACES);
+	     rec != NULL;
+	     rec = dict_getnext_system(&pcur, &mtr)) {
 
-	while (rec) {
 		const char*	err_msg;
 		ulint		space;
 		const char*	name;
@@ -7825,7 +8494,6 @@ i_s_sys_tablespaces_fill_table(
 		/* Get the next record */
 		mutex_enter(&dict_sys->mutex);
 		mtr_start(&mtr);
-		rec = dict_getnext_system(&pcur, &mtr);
 	}
 
 	mtr_commit(&mtr);
@@ -8755,9 +9423,10 @@ i_s_innodb_mutexes_fill_table(
 		DBUG_RETURN(0);
 	}
 
-	mutex_enter(&mutex_list_mutex);
+	// mutex_enter(&mutex_list_mutex);
 
-	for (mutex = UT_LIST_GET_FIRST(mutex_list); mutex != NULL;
+#ifdef JAN_TODO_FIXME
+	for (mutex = UT_LIST_GET_FIRST(os_mutex_list); mutex != NULL;
 	     mutex = UT_LIST_GET_NEXT(list, mutex)) {
 		if (mutex->count_os_wait == 0) {
 			continue;
@@ -8790,6 +9459,7 @@ i_s_innodb_mutexes_fill_table(
 	}
 
 	mutex_exit(&mutex_list_mutex);
+#endif /* JAN_TODO_FIXME */
 
 	mutex_enter(&rw_lock_list_mutex);
 
@@ -8805,7 +9475,7 @@ i_s_innodb_mutexes_fill_table(
 			continue;
 		}
 
-		OK(field_store_string(fields[MUTEXES_NAME], lock->lock_name));
+		//OK(field_store_string(fields[MUTEXES_NAME], lock->lock_name));
 		OK(field_store_string(fields[MUTEXES_CREATE_FILE], innobase_basename(lock->cfile_name)));
 		OK(field_store_ulint(fields[MUTEXES_CREATE_LINE], lock->cline));
 		OK(field_store_ulint(fields[MUTEXES_OS_WAITS], (longlong)lock->count_os_wait));
@@ -8818,7 +9488,7 @@ i_s_innodb_mutexes_fill_table(
 		my_snprintf(buf1, sizeof buf1, "combined %s",
 			    innobase_basename(block_lock->cfile_name));
 
-		OK(field_store_string(fields[MUTEXES_NAME], block_lock->lock_name));
+		//OK(field_store_string(fields[MUTEXES_NAME], block_lock->lock_name));
 		OK(field_store_string(fields[MUTEXES_CREATE_FILE], buf1));
 		OK(field_store_ulint(fields[MUTEXES_CREATE_LINE], block_lock->cline));
 		OK(field_store_ulint(fields[MUTEXES_OS_WAITS], (longlong)block_lock_oswait_count));
@@ -9176,3 +9846,147 @@ UNIV_INTERN struct st_maria_plugin	i_s_innodb_sys_semaphore_waits =
 	STRUCT_FLD(version_info, INNODB_VERSION_STR),
         STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
 };
+
+/** Fill handlerton based INFORMATION_SCHEMA.FILES table.
+@param[in,out]	thd	thread/connection descriptor
+@param[in,out]	tables	information schema tables to fill
+@retval 0 for success
+@retval HA_ERR_OUT_OF_MEM when running out of memory
+@return nonzero for failure */
+int
+i_s_files_table_fill(
+	THD*		thd,
+	TABLE_LIST*	tables)
+{
+	TABLE*			table_to_fill	= tables->table;
+	Field**			fields		= table_to_fill->field;
+	/* Use this class so that if the OK() macro returns,
+	fil_space_release() is called. */
+	FilSpace		space;
+
+	DBUG_ENTER("i_s_files_table_fill");
+
+	/* Gather information reportable to information_schema.files
+	for the first or next file in fil_system. */
+	for (const fil_node_t* node = fil_node_next(NULL);
+	     node != NULL;
+	     node = fil_node_next(node)) {
+		const char*	type = "TABLESPACE";
+		const char*	space_name;
+		/** Buffer to build file-per-table tablespace names.
+		Even though a space_id is often stored in a ulint, it cannot
+		be larger than 1<<32-1, which is 10 numeric characters. */
+		char		file_per_table_name[
+			sizeof("innodb_file_per_table_1234567890")];
+		uintmax_t	avail_space;
+		ulint		extent_pages;
+		ulint		extend_pages;
+
+		space = node->space;
+		fil_type_t	purpose = space()->purpose;
+
+		switch (purpose) {
+		case FIL_TYPE_LOG:
+			/* Do not report REDO LOGs to I_S.FILES */
+			space = NULL;
+			continue;
+		case FIL_TYPE_TABLESPACE:
+			if (!is_system_tablespace(space()->id)
+			    && space()->id <= srv_undo_tablespaces_open) {
+				type = "UNDO LOG";
+				break;
+			} /* else fall through for TABLESPACE */
+		case FIL_TYPE_IMPORT:
+			/* 'IMPORTING'is a status. The type is TABLESPACE. */
+			break;
+		case FIL_TYPE_TEMPORARY:
+			type = "TEMPORARY";
+			break;
+		};
+
+		page_size_t	page_size(space()->flags);
+
+		/* Single-table tablespaces are assigned to a schema. */
+		if (!is_predefined_tablespace(space()->id)
+		    && !FSP_FLAGS_GET_SHARED(space()->flags)) {
+			/* Their names will be like "test/t1" */
+			ut_ad(NULL != strchr(space()->name, '/'));
+
+			/* File-per-table tablespace names are generated
+			internally and certain non-file-system-allowed
+			characters are expanded which can make the space
+			name too long. In order to avoid that problem,
+			use a modified tablespace name.
+			Since we are not returning dbname and tablename,
+			the user must match the space_id to i_s_table.space
+			in order find the single table that is in it or the
+			schema it belongs to. */
+			ut_snprintf(
+				file_per_table_name,
+				sizeof(file_per_table_name),
+				"innodb_file_per_table_" ULINTPF,
+				space()->id);
+			space_name = file_per_table_name;
+		} else {
+			/* Only file-per-table space names contain '/'.
+                        This is not file-per-table . */
+			ut_ad(NULL == strchr(space()->name, '/'));
+
+			space_name = space()->name;
+		}
+
+		init_fill_schema_files_row(table_to_fill);
+
+		OK(field_store_ulint(fields[IS_FILES_FILE_ID],
+				     space()->id));
+		OK(field_store_string(fields[IS_FILES_FILE_NAME],
+				      node->name));
+		OK(field_store_string(fields[IS_FILES_FILE_TYPE],
+				      type));
+		OK(field_store_string(fields[IS_FILES_TABLESPACE_NAME],
+				      space_name));
+		OK(field_store_string(fields[IS_FILES_ENGINE],
+				      "InnoDB"));
+		OK(field_store_ulint(fields[IS_FILES_FREE_EXTENTS],
+				     space()->free_len));
+
+		extent_pages = fsp_get_extent_size_in_pages(page_size);
+
+		OK(field_store_ulint(fields[IS_FILES_TOTAL_EXTENTS],
+				     space()->size_in_header / extent_pages));
+		OK(field_store_ulint(fields[IS_FILES_EXTENT_SIZE],
+				     extent_pages * page_size.physical()));
+		OK(field_store_ulint(fields[IS_FILES_INITIAL_SIZE],
+				     node->init_size * page_size.physical()));
+
+		if (node->max_size >= ULINT_MAX) {
+			fields[IS_FILES_MAXIMUM_SIZE]->set_null();
+		} else {
+			OK(field_store_ulint(fields[IS_FILES_MAXIMUM_SIZE],
+				node->max_size * page_size.physical()));
+		}
+		if (space()->id == srv_sys_space.space_id()) {
+			extend_pages = srv_sys_space.get_increment();
+		} else if (space()->id == srv_tmp_space.space_id()) {
+			extend_pages = srv_tmp_space.get_increment();
+		} else {
+			extend_pages = fsp_get_pages_to_extend_ibd(
+				page_size, node->size);
+		}
+
+		OK(field_store_ulint(fields[IS_FILES_AUTOEXTEND_SIZE],
+				     extend_pages * page_size.physical()));
+
+		avail_space = fsp_get_available_space_in_free_extents(space());
+		OK(field_store_ulint(fields[IS_FILES_DATA_FREE],
+				     static_cast<ulint>(avail_space * 1024)));
+		OK(field_store_string(fields[IS_FILES_STATUS],
+				      (purpose == FIL_TYPE_IMPORT)
+				      ? "IMPORTING" : "NORMAL"));
+
+		schema_table_store_record(thd, table_to_fill);
+		space = NULL;
+	}
+
+	DBUG_RETURN(0);
+}
diff --git a/storage/innobase/handler/i_s.h b/storage/innobase/handler/i_s.h
index 979d9d80a7f..eb076ec802e 100644
--- a/storage/innobase/handler/i_s.h
+++ b/storage/innobase/handler/i_s.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2007, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2015, Oracle and/or its affiliates. All Rights Reserved.
 Copyrigth (c) 2014, 2015, MariaDB Corporation
 
 This program is free software; you can redistribute it and/or modify it under
@@ -50,6 +50,7 @@ extern struct st_maria_plugin	i_s_innodb_ft_config;
 extern struct st_maria_plugin	i_s_innodb_buffer_page;
 extern struct st_maria_plugin	i_s_innodb_buffer_page_lru;
 extern struct st_maria_plugin	i_s_innodb_buffer_stats;
+extern struct st_maria_plugin	i_s_innodb_temp_table_info;
 extern struct st_maria_plugin	i_s_innodb_sys_tables;
 extern struct st_maria_plugin	i_s_innodb_sys_tablestats;
 extern struct st_maria_plugin	i_s_innodb_sys_indexes;
@@ -60,10 +61,22 @@ extern struct st_maria_plugin	i_s_innodb_sys_foreign_cols;
 extern struct st_maria_plugin	i_s_innodb_sys_tablespaces;
 extern struct st_maria_plugin	i_s_innodb_sys_datafiles;
 extern struct st_maria_plugin	i_s_innodb_mutexes;
+extern struct st_maria_plugin	i_s_innodb_sys_virtual;
 extern struct st_maria_plugin	i_s_innodb_tablespaces_encryption;
 extern struct st_maria_plugin	i_s_innodb_tablespaces_scrubbing;
 extern struct st_maria_plugin	i_s_innodb_sys_semaphore_waits;
 
+/** Fill handlerton based INFORMATION_SCHEMA.FILES table.
+@param[in,out]	thd	thread/connection descriptor
+@param[in,out]	tables	information schema tables to fill
+@retval 0 for success
+@retval HA_ERR_OUT_OF_MEM when running out of memory
+@return nonzero for failure */
+int
+i_s_files_table_fill(
+	THD		*thd,
+	TABLE_LIST	*tables);
+
 /** maximum number of buffer page info we would cache. */
 #define MAX_BUF_INFO_CACHED		10000
 
diff --git a/storage/innobase/ibuf/ibuf0ibuf.cc b/storage/innobase/ibuf/ibuf0ibuf.cc
index 0a2140c4a29..71aa7be3ef7 100644
--- a/storage/innobase/ibuf/ibuf0ibuf.cc
+++ b/storage/innobase/ibuf/ibuf0ibuf.cc
@@ -24,10 +24,14 @@ Insert buffer
 Created 7/19/1997 Heikki Tuuri
 *******************************************************/
 
+#include "ha_prototypes.h"
+
 #include "ibuf0ibuf.h"
+#include "sync0sync.h"
+#include "btr0sea.h"
 
 #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
-UNIV_INTERN my_bool	srv_ibuf_disable_background_merge;
+my_bool	srv_ibuf_disable_background_merge;
 #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
 
 /** Number of bits describing a single page */
@@ -54,14 +58,13 @@ UNIV_INTERN my_bool	srv_ibuf_disable_background_merge;
 #include "btr0pcur.h"
 #include "btr0btr.h"
 #include "row0upd.h"
-#include "sync0sync.h"
 #include "dict0boot.h"
 #include "fut0lst.h"
 #include "lock0lock.h"
 #include "log0recv.h"
 #include "que0que.h"
 #include "srv0start.h" /* srv_shutdown_state */
-#include "ha_prototypes.h"
+#include "fsp0sysspace.h"
 #include "rem0cmp.h"
 
 /*	STRUCTURE OF AN INSERT BUFFER RECORD
@@ -190,25 +193,16 @@ level 2 i/o. However, if an OS thread does the i/o handling for itself, i.e.,
 it uses synchronous aio, it can access any pages, as long as it obeys the
 access order rules. */
 
-/** Table name for the insert buffer. */
-#define IBUF_TABLE_NAME		"SYS_IBUF_TABLE"
-
 /** Operations that can currently be buffered. */
-UNIV_INTERN ibuf_use_t	ibuf_use		= IBUF_USE_ALL;
+ibuf_use_t	ibuf_use		= IBUF_USE_ALL;
 
 #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
 /** Flag to control insert buffer debugging. */
-UNIV_INTERN uint	ibuf_debug;
+uint	ibuf_debug;
 #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
 
 /** The insert buffer control structure */
-UNIV_INTERN ibuf_t*	ibuf			= NULL;
-
-#ifdef UNIV_PFS_MUTEX
-UNIV_INTERN mysql_pfs_key_t	ibuf_pessimistic_insert_mutex_key;
-UNIV_INTERN mysql_pfs_key_t	ibuf_mutex_key;
-UNIV_INTERN mysql_pfs_key_t	ibuf_bitmap_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
+ibuf_t*	ibuf			= NULL;
 
 #ifdef UNIV_IBUF_COUNT_DEBUG
 /** Number of tablespaces in the ibuf_counts array */
@@ -219,27 +213,23 @@ UNIV_INTERN mysql_pfs_key_t	ibuf_bitmap_mutex_key;
 /** Buffered entry counts for file pages, used in debugging */
 static ulint	ibuf_counts[IBUF_COUNT_N_SPACES][IBUF_COUNT_N_PAGES];
 
-/******************************************************************//**
-Checks that the indexes to ibuf_counts[][] are within limits. */
+/** Checks that the indexes to ibuf_counts[][] are within limits.
+@param[in]	page_id	page id */
 UNIV_INLINE
 void
 ibuf_count_check(
-/*=============*/
-	ulint	space_id,	/*!< in: space identifier */
-	ulint	page_no)	/*!< in: page number */
+	const page_id_t&	page_id)
 {
-	if (space_id < IBUF_COUNT_N_SPACES && page_no < IBUF_COUNT_N_PAGES) {
+	if (page_id.space() < IBUF_COUNT_N_SPACES
+	    && page_id.page_no() < IBUF_COUNT_N_PAGES) {
 		return;
 	}
 
-	fprintf(stderr,
-		"InnoDB: UNIV_IBUF_COUNT_DEBUG limits space_id and page_no\n"
-		"InnoDB: and breaks crash recovery.\n"
-		"InnoDB: space_id=%lu, should be 0<=space_id<%lu\n"
-		"InnoDB: page_no=%lu, should be 0<=page_no<%lu\n",
-		(ulint) space_id, (ulint) IBUF_COUNT_N_SPACES,
-		(ulint) page_no, (ulint) IBUF_COUNT_N_PAGES);
-	ut_error;
+	ib::fatal() << "UNIV_IBUF_COUNT_DEBUG limits space_id and page_no"
+		" and breaks crash recovery. space_id=" << page_id.space()
+		<< ", should be 0<=space_id<" << IBUF_COUNT_N_SPACES
+		<< ". page_no=" << page_id.page_no()
+		<< ", should be 0<=page_no<" << IBUF_COUNT_N_PAGES;
 }
 #endif
 
@@ -300,31 +290,31 @@ static ib_mutex_t	ibuf_mutex;
 static ib_mutex_t	ibuf_bitmap_mutex;
 
 /** The area in pages from which contract looks for page numbers for merge */
-#define	IBUF_MERGE_AREA			8UL
+const ulint		IBUF_MERGE_AREA = 8;
 
 /** Inside the merge area, pages which have at most 1 per this number less
 buffered entries compared to maximum volume that can buffered for a single
 page are merged along with the page whose buffer became full */
-#define IBUF_MERGE_THRESHOLD		4
+const ulint		IBUF_MERGE_THRESHOLD = 4;
 
 /** In ibuf_contract at most this number of pages is read to memory in one
 batch, in order to merge the entries for them in the insert buffer */
-#define	IBUF_MAX_N_PAGES_MERGED		IBUF_MERGE_AREA
+const ulint		IBUF_MAX_N_PAGES_MERGED = IBUF_MERGE_AREA;
 
 /** If the combined size of the ibuf trees exceeds ibuf->max_size by this
 many pages, we start to contract it in connection to inserts there, using
 non-synchronous contract */
-#define IBUF_CONTRACT_ON_INSERT_NON_SYNC	0
+const ulint		IBUF_CONTRACT_ON_INSERT_NON_SYNC = 0;
 
 /** If the combined size of the ibuf trees exceeds ibuf->max_size by this
 many pages, we start to contract it in connection to inserts there, using
 synchronous contract */
-#define IBUF_CONTRACT_ON_INSERT_SYNC		5
+const ulint		IBUF_CONTRACT_ON_INSERT_SYNC = 5;
 
 /** If the combined size of the ibuf trees exceeds ibuf->max_size by
 this many pages, we start to contract it synchronous contract, but do
 not insert */
-#define IBUF_CONTRACT_DO_NOT_INSERT		10
+const ulint		IBUF_CONTRACT_DO_NOT_INSERT = 10;
 
 /* TODO: how to cope with drop table if there are records in the insert
 buffer for the indexes of the table? Is there actually any problem,
@@ -341,8 +331,8 @@ ibuf_enter(
 /*=======*/
 	mtr_t*	mtr)	/*!< in/out: mini-transaction */
 {
-	ut_ad(!mtr->inside_ibuf);
-	mtr->inside_ibuf = TRUE;
+	ut_ad(!mtr->is_inside_ibuf());
+	mtr->enter_ibuf();
 }
 
 /******************************************************************//**
@@ -354,8 +344,8 @@ ibuf_exit(
 /*======*/
 	mtr_t*	mtr)	/*!< in/out: mini-transaction */
 {
-	ut_ad(mtr->inside_ibuf);
-	mtr->inside_ibuf = FALSE;
+	ut_ad(mtr->is_inside_ibuf());
+	mtr->exit_ibuf();
 }
 
 /**************************************************************//**
@@ -374,7 +364,7 @@ ibuf_btr_pcur_commit_specify_mtr(
 
 /******************************************************************//**
 Gets the ibuf header page and x-latches it.
-@return	insert buffer header page */
+@return insert buffer header page */
 static
 page_t*
 ibuf_header_page_get(
@@ -387,7 +377,9 @@ ibuf_header_page_get(
 	page_t* page = NULL;
 
 	block = buf_page_get(
-		IBUF_SPACE_ID, 0, FSP_IBUF_HEADER_PAGE_NO, RW_X_LATCH, mtr);
+		page_id_t(IBUF_SPACE_ID, FSP_IBUF_HEADER_PAGE_NO),
+		univ_page_size, RW_X_LATCH, mtr);
+
 
 	if (!block->page.encrypted) {
 		buf_block_dbg_add_level(block, SYNC_IBUF_HEADER);
@@ -399,8 +391,8 @@ ibuf_header_page_get(
 }
 
 /******************************************************************//**
-Gets the root page and x-latches it.
-@return	insert buffer tree root page */
+Gets the root page and sx-latches it.
+@return insert buffer tree root page */
 static
 page_t*
 ibuf_tree_root_get(
@@ -413,10 +405,12 @@ ibuf_tree_root_get(
 	ut_ad(ibuf_inside(mtr));
 	ut_ad(mutex_own(&ibuf_mutex));
 
-	mtr_x_lock(dict_index_get_lock(ibuf->index), mtr);
+	mtr_sx_lock(dict_index_get_lock(ibuf->index), mtr);
 
+	/* only segment list access is exclusive each other */
 	block = buf_page_get(
-		IBUF_SPACE_ID, 0, FSP_IBUF_TREE_ROOT_PAGE_NO, RW_X_LATCH, mtr);
+		page_id_t(IBUF_SPACE_ID, FSP_IBUF_TREE_ROOT_PAGE_NO),
+		univ_page_size, RW_SX_LATCH, mtr);
 
 	buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE_NEW);
 
@@ -430,57 +424,54 @@ ibuf_tree_root_get(
 }
 
 #ifdef UNIV_IBUF_COUNT_DEBUG
-/******************************************************************//**
-Gets the ibuf count for a given page.
+
+/** Gets the ibuf count for a given page.
+@param[in]	page_id	page id
 @return number of entries in the insert buffer currently buffered for
 this page */
-UNIV_INTERN
 ulint
 ibuf_count_get(
-/*===========*/
-	ulint	space,	/*!< in: space id */
-	ulint	page_no)/*!< in: page number */
+	const page_id_t&	page_id)
 {
-	ibuf_count_check(space, page_no);
+	ibuf_count_check(page_id);
 
-	return(ibuf_counts[space][page_no]);
+	return(ibuf_counts[page_id.space()][page_id.page_no()]);
 }
 
-/******************************************************************//**
-Sets the ibuf count for a given page. */
+/** Sets the ibuf count for a given page.
+@param[in]	page_id	page id
+@param[in]	val	value to set */
 static
 void
 ibuf_count_set(
-/*===========*/
-	ulint	space,	/*!< in: space id */
-	ulint	page_no,/*!< in: page number */
-	ulint	val)	/*!< in: value to set */
+	const page_id_t&	page_id,
+	ulint			val)
 {
-	ibuf_count_check(space, page_no);
+	ibuf_count_check(page_id);
 	ut_a(val < UNIV_PAGE_SIZE);
 
-	ibuf_counts[space][page_no] = val;
+	ibuf_counts[page_id.space()][page_id.page_no()] = val;
 }
 #endif
 
 /******************************************************************//**
 Closes insert buffer and frees the data structures. */
-UNIV_INTERN
 void
 ibuf_close(void)
 /*============*/
 {
 	mutex_free(&ibuf_pessimistic_insert_mutex);
-	memset(&ibuf_pessimistic_insert_mutex,
-	       0x0, sizeof(ibuf_pessimistic_insert_mutex));
 
 	mutex_free(&ibuf_mutex);
-	memset(&ibuf_mutex, 0x0, sizeof(ibuf_mutex));
 
 	mutex_free(&ibuf_bitmap_mutex);
-	memset(&ibuf_bitmap_mutex, 0x0, sizeof(ibuf_mutex));
 
-	mem_free(ibuf);
+	dict_table_t*	ibuf_table = ibuf->index->table;
+	rw_lock_free(&ibuf->index->lock);
+	dict_mem_index_free(ibuf->index);
+	dict_mem_table_free(ibuf_table);
+
+	ut_free(ibuf);
 	ibuf = NULL;
 }
 
@@ -491,15 +482,14 @@ static
 void
 ibuf_size_update(
 /*=============*/
-	const page_t*	root,	/*!< in: ibuf tree root */
-	mtr_t*		mtr)	/*!< in: mtr */
+	const page_t*	root)	/*!< in: ibuf tree root */
 {
 	ut_ad(mutex_own(&ibuf_mutex));
 
 	ibuf->free_list_len = flst_get_len(root + PAGE_HEADER
-					   + PAGE_BTR_IBUF_FREE_LIST, mtr);
+					   + PAGE_BTR_IBUF_FREE_LIST);
 
-	ibuf->height = 1 + btr_page_get_level(root, mtr);
+	ibuf->height = 1 + btr_page_get_level_low(root);
 
 	/* the '1 +' is the ibuf header page */
 	ibuf->size = ibuf->seg_size - (1 + ibuf->free_list_len);
@@ -509,21 +499,17 @@ ibuf_size_update(
 Creates the insert buffer data structure at a database startup and initializes
 the data structures for the insert buffer.
 @return DB_SUCCESS or failure */
-UNIV_INTERN
 dberr_t
 ibuf_init_at_db_start(void)
 /*=======================*/
 {
 	page_t*		root;
 	mtr_t		mtr;
-	dict_table_t*	table;
-	mem_heap_t*	heap;
-	dict_index_t*	index;
 	ulint		n_used;
 	page_t*		header_page;
 	dberr_t		error= DB_SUCCESS;
 
-	ibuf = static_cast<ibuf_t*>(mem_zalloc(sizeof(ibuf_t)));
+	ibuf = static_cast<ibuf_t*>(ut_zalloc_nokey(sizeof(ibuf_t)));
 
 	/* At startup we intialize ibuf to have a maximum of
 	CHANGE_BUFFER_DEFAULT_SIZE in terms of percentage of the
@@ -533,21 +519,18 @@ ibuf_init_at_db_start(void)
 	ibuf->max_size = ((buf_pool_get_curr_size() / UNIV_PAGE_SIZE)
 			  * CHANGE_BUFFER_DEFAULT_SIZE) / 100;
 
-	mutex_create(ibuf_pessimistic_insert_mutex_key,
-		     &ibuf_pessimistic_insert_mutex,
-		     SYNC_IBUF_PESS_INSERT_MUTEX);
+	mutex_create(LATCH_ID_IBUF, &ibuf_mutex);
 
-	mutex_create(ibuf_mutex_key,
-		     &ibuf_mutex, SYNC_IBUF_MUTEX);
+	mutex_create(LATCH_ID_IBUF_BITMAP, &ibuf_bitmap_mutex);
 
-	mutex_create(ibuf_bitmap_mutex_key,
-		     &ibuf_bitmap_mutex, SYNC_IBUF_BITMAP_MUTEX);
+	mutex_create(LATCH_ID_IBUF_PESSIMISTIC_INSERT,
+		     &ibuf_pessimistic_insert_mutex);
 
 	mtr_start(&mtr);
 
-	mutex_enter(&ibuf_mutex);
+	mtr_x_lock_space(IBUF_SPACE_ID, &mtr);
 
-	mtr_x_lock(fil_space_get_latch(IBUF_SPACE_ID, NULL), &mtr);
+	mutex_enter(&ibuf_mutex);
 
 	header_page = ibuf_header_page_get(&mtr);
 
@@ -567,50 +550,37 @@ ibuf_init_at_db_start(void)
 		buf_block_t*	block;
 
 		block = buf_page_get(
-			IBUF_SPACE_ID, 0, FSP_IBUF_TREE_ROOT_PAGE_NO,
-			RW_X_LATCH, &mtr);
+			page_id_t(IBUF_SPACE_ID, FSP_IBUF_TREE_ROOT_PAGE_NO),
+			univ_page_size, RW_X_LATCH, &mtr);
+
 		buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE);
 
 		root = buf_block_get_frame(block);
 	}
 
-	ibuf_size_update(root, &mtr);
+	ibuf_size_update(root);
 	mutex_exit(&ibuf_mutex);
 
 	ibuf->empty = page_is_empty(root);
 	ibuf_mtr_commit(&mtr);
 
-	heap = mem_heap_create(450);
-
-	/* Use old-style record format for the insert buffer. */
-	table = dict_mem_table_create(IBUF_TABLE_NAME, IBUF_SPACE_ID, 1, 0, 0);
-
-	dict_mem_table_add_col(table, heap, "DUMMY_COLUMN", DATA_BINARY, 0, 0);
-
-	table->id = DICT_IBUF_ID_MIN + IBUF_SPACE_ID;
-
-	dict_table_add_to_cache(table, FALSE, heap);
-	mem_heap_free(heap);
-
-	index = dict_mem_index_create(
-		IBUF_TABLE_NAME, "CLUST_IND",
+	ibuf->index = dict_mem_index_create(
+		"innodb_change_buffer", "CLUST_IND",
 		IBUF_SPACE_ID, DICT_CLUSTERED | DICT_UNIVERSAL | DICT_IBUF, 1);
-
-	dict_mem_index_add_field(index, "DUMMY_COLUMN", 0);
-
-	index->id = DICT_IBUF_ID_MIN + IBUF_SPACE_ID;
-
-	error = dict_index_add_to_cache(table, index,
-					FSP_IBUF_TREE_ROOT_PAGE_NO, FALSE);
-	ut_a(error == DB_SUCCESS);
-
-	ibuf->index = dict_table_get_first_index(table);
+	ibuf->index->id = DICT_IBUF_ID_MIN + IBUF_SPACE_ID;
+	ibuf->index->table = dict_mem_table_create(
+		"innodb_change_buffer", IBUF_SPACE_ID, 1, 0, 0, 0);
+	ibuf->index->n_uniq = REC_MAX_N_FIELDS;
+	rw_lock_create(index_tree_rw_lock_key, &ibuf->index->lock,
+		       SYNC_IBUF_INDEX_TREE);
+	ibuf->index->search_info = btr_search_info_create(ibuf->index->heap);
+	ibuf->index->page = FSP_IBUF_TREE_ROOT_PAGE_NO;
+	ut_d(ibuf->index->cached = TRUE);
 	return (error);
 }
 
 /*********************************************************************//**
 Updates the max_size value for ibuf. */
-UNIV_INTERN
 void
 ibuf_max_size_update(
 /*=================*/
@@ -628,7 +598,6 @@ ibuf_max_size_update(
 #endif /* !UNIV_HOTBACKUP */
 /*********************************************************************//**
 Initializes an ibuf bitmap page. */
-UNIV_INTERN
 void
 ibuf_bitmap_page_init(
 /*==================*/
@@ -637,21 +606,14 @@ ibuf_bitmap_page_init(
 {
 	page_t*	page;
 	ulint	byte_offset;
-	ulint	zip_size = buf_block_get_zip_size(block);
-
-	ut_a(ut_is_2pow(zip_size));
 
 	page = buf_block_get_frame(block);
 	fil_page_set_type(page, FIL_PAGE_IBUF_BITMAP);
 
 	/* Write all zeros to the bitmap */
 
-	if (!zip_size) {
-		byte_offset = UT_BITS_IN_BYTES(UNIV_PAGE_SIZE
-					       * IBUF_BITS_PER_PAGE);
-	} else {
-		byte_offset = UT_BITS_IN_BYTES(zip_size * IBUF_BITS_PER_PAGE);
-	}
+	byte_offset = UT_BITS_IN_BYTES(block->page.size.physical()
+				       * IBUF_BITS_PER_PAGE);
 
 	memset(page + IBUF_BITMAP, 0, byte_offset);
 
@@ -664,8 +626,7 @@ ibuf_bitmap_page_init(
 
 /*********************************************************************//**
 Parses a redo log record of an ibuf bitmap page init.
-@return	end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
 byte*
 ibuf_parse_bitmap_init(
 /*===================*/
@@ -674,7 +635,8 @@ ibuf_parse_bitmap_init(
 	buf_block_t*	block,	/*!< in: block or NULL */
 	mtr_t*		mtr)	/*!< in: mtr or NULL */
 {
-	ut_ad(ptr && end_ptr);
+	ut_ad(ptr != NULL);
+	ut_ad(end_ptr != NULL);
 
 	if (block) {
 		ibuf_bitmap_page_init(block, mtr);
@@ -685,47 +647,49 @@ ibuf_parse_bitmap_init(
 #ifndef UNIV_HOTBACKUP
 # ifdef UNIV_DEBUG
 /** Gets the desired bits for a given page from a bitmap page.
-@param page	in: bitmap page
-@param offset	in: page whose bits to get
-@param zs	in: compressed page size in bytes; 0 for uncompressed pages
-@param bit	in: IBUF_BITMAP_FREE, IBUF_BITMAP_BUFFERED, ...
-@param mtr	in: mini-transaction holding an x-latch on the bitmap page
-@return	value of bits */
-#  define ibuf_bitmap_page_get_bits(page, offset, zs, bit, mtr)	\
-	ibuf_bitmap_page_get_bits_low(page, offset, zs,	\
+@param[in]	page		bitmap page
+@param[in]	page_id		page id whose bits to get
+@param[in]	page_size	page id whose bits to get
+@param[in]	bit		IBUF_BITMAP_FREE, IBUF_BITMAP_BUFFERED, ...
+@param[in,out]	mtr		mini-transaction holding an x-latch on the
+bitmap page
+@return value of bits */
+#  define ibuf_bitmap_page_get_bits(page, page_id, page_size, bit, mtr)	\
+	ibuf_bitmap_page_get_bits_low(page, page_id, page_size,		\
 				      MTR_MEMO_PAGE_X_FIX, mtr, bit)
 # else /* UNIV_DEBUG */
 /** Gets the desired bits for a given page from a bitmap page.
-@param page	in: bitmap page
-@param offset	in: page whose bits to get
-@param zs	in: compressed page size in bytes; 0 for uncompressed pages
-@param bit	in: IBUF_BITMAP_FREE, IBUF_BITMAP_BUFFERED, ...
-@param mtr	in: mini-transaction holding an x-latch on the bitmap page
-@return	value of bits */
-#  define ibuf_bitmap_page_get_bits(page, offset, zs, bit, mtr)		\
-	ibuf_bitmap_page_get_bits_low(page, offset, zs, bit)
+@param[in]	page		bitmap page
+@param[in]	page_id		page id whose bits to get
+@param[in]	page_size	page id whose bits to get
+@param[in]	bit		IBUF_BITMAP_FREE, IBUF_BITMAP_BUFFERED, ...
+@param[in,out]	mtr		mini-transaction holding an x-latch on the
+bitmap page
+@return value of bits */
+#  define ibuf_bitmap_page_get_bits(page, page_id, page_size, bit, mtr)	\
+	ibuf_bitmap_page_get_bits_low(page, page_id, page_size, bit)
 # endif /* UNIV_DEBUG */
 
-/********************************************************************//**
-Gets the desired bits for a given page from a bitmap page.
-@return	value of bits */
+/** Gets the desired bits for a given page from a bitmap page.
+@param[in]	page		bitmap page
+@param[in]	page_id		page id whose bits to get
+@param[in]	page_size	page size
+@param[in]	latch_type	MTR_MEMO_PAGE_X_FIX, MTR_MEMO_BUF_FIX, ...
+@param[in,out]	mtr		mini-transaction holding latch_type on the
+bitmap page
+@param[in]	bit		IBUF_BITMAP_FREE, IBUF_BITMAP_BUFFERED, ...
+@return value of bits */
 UNIV_INLINE
 ulint
 ibuf_bitmap_page_get_bits_low(
-/*==========================*/
-	const page_t*	page,	/*!< in: bitmap page */
-	ulint		page_no,/*!< in: page whose bits to get */
-	ulint		zip_size,/*!< in: compressed page size in bytes;
-				0 for uncompressed pages */
+	const page_t*		page,
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
 #ifdef UNIV_DEBUG
-	ulint		latch_type,
-				/*!< in: MTR_MEMO_PAGE_X_FIX,
-				MTR_MEMO_BUF_FIX, ... */
-	mtr_t*		mtr,	/*!< in: mini-transaction holding latch_type
-				on the bitmap page */
+	ulint			latch_type,
+	mtr_t*			mtr,
 #endif /* UNIV_DEBUG */
-	ulint		bit)	/*!< in: IBUF_BITMAP_FREE,
-				IBUF_BITMAP_BUFFERED, ... */
+	ulint			bit)
 {
 	ulint	byte_offset;
 	ulint	bit_offset;
@@ -736,16 +700,10 @@ ibuf_bitmap_page_get_bits_low(
 #if IBUF_BITS_PER_PAGE % 2
 # error "IBUF_BITS_PER_PAGE % 2 != 0"
 #endif
-	ut_ad(ut_is_2pow(zip_size));
 	ut_ad(mtr_memo_contains_page(mtr, page, latch_type));
 
-	if (!zip_size) {
-		bit_offset = (page_no % UNIV_PAGE_SIZE) * IBUF_BITS_PER_PAGE
-			+ bit;
-	} else {
-		bit_offset = (page_no & (zip_size - 1)) * IBUF_BITS_PER_PAGE
-			+ bit;
-	}
+	bit_offset = (page_id.page_no() % page_size.physical())
+		* IBUF_BITS_PER_PAGE + bit;
 
 	byte_offset = bit_offset / 8;
 	bit_offset = bit_offset % 8;
@@ -765,19 +723,22 @@ ibuf_bitmap_page_get_bits_low(
 	return(value);
 }
 
-/********************************************************************//**
-Sets the desired bit for a given page in a bitmap page. */
+/** Sets the desired bit for a given page in a bitmap page.
+@param[in,out]	page		bitmap page
+@param[in]	page_id		page id whose bits to set
+@param[in]	page_size	page size
+@param[in]	bit		IBUF_BITMAP_FREE, IBUF_BITMAP_BUFFERED, ...
+@param[in]	val		value to set
+@param[in,out]	mtr		mtr containing an x-latch to the bitmap page */
 static
 void
 ibuf_bitmap_page_set_bits(
-/*======================*/
-	page_t*	page,	/*!< in: bitmap page */
-	ulint	page_no,/*!< in: page whose bits to set */
-	ulint	zip_size,/*!< in: compressed page size in bytes;
-			0 for uncompressed pages */
-	ulint	bit,	/*!< in: IBUF_BITMAP_FREE, IBUF_BITMAP_BUFFERED, ... */
-	ulint	val,	/*!< in: value to set */
-	mtr_t*	mtr)	/*!< in: mtr containing an x-latch to the bitmap page */
+	page_t*			page,
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	ulint			bit,
+	ulint			val,
+	mtr_t*			mtr)
 {
 	ulint	byte_offset;
 	ulint	bit_offset;
@@ -787,20 +748,15 @@ ibuf_bitmap_page_set_bits(
 #if IBUF_BITS_PER_PAGE % 2
 # error "IBUF_BITS_PER_PAGE % 2 != 0"
 #endif
-	ut_ad(ut_is_2pow(zip_size));
 	ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr->is_named_space(page_id.space()));
 #ifdef UNIV_IBUF_COUNT_DEBUG
 	ut_a((bit != IBUF_BITMAP_BUFFERED) || (val != FALSE)
-	     || (0 == ibuf_count_get(page_get_space_id(page),
-				     page_no)));
+	     || (0 == ibuf_count_get(page_id)));
 #endif
-	if (!zip_size) {
-		bit_offset = (page_no % UNIV_PAGE_SIZE) * IBUF_BITS_PER_PAGE
-			+ bit;
-	} else {
-		bit_offset = (page_no & (zip_size - 1)) * IBUF_BITS_PER_PAGE
-			+ bit;
-	}
+
+	bit_offset = (page_id.page_no() % page_size.physical())
+		* IBUF_BITS_PER_PAGE + bit;
 
 	byte_offset = bit_offset / 8;
 	bit_offset = bit_offset % 8;
@@ -824,75 +780,70 @@ ibuf_bitmap_page_set_bits(
 			 MLOG_1BYTE, mtr);
 }
 
-/********************************************************************//**
-Calculates the bitmap page number for a given page number.
-@return	the bitmap page number where the file page is mapped */
+/** Calculates the bitmap page number for a given page number.
+@param[in]	page_id		page id
+@param[in]	page_size	page size
+@return the bitmap page id where the file page is mapped */
 UNIV_INLINE
-ulint
+const page_id_t
 ibuf_bitmap_page_no_calc(
-/*=====================*/
-	ulint	zip_size,	/*!< in: compressed page size in bytes;
-				0 for uncompressed pages */
-	ulint	page_no)	/*!< in: tablespace page number */
+	const page_id_t&	page_id,
+	const page_size_t&	page_size)
 {
-	ut_ad(ut_is_2pow(zip_size));
+	ulint	bitmap_page_no;
 
-	if (!zip_size) {
-		return(FSP_IBUF_BITMAP_OFFSET
-		       + (page_no & ~(UNIV_PAGE_SIZE - 1)));
-	} else {
-		return(FSP_IBUF_BITMAP_OFFSET
-		       + (page_no & ~(zip_size - 1)));
-	}
+	bitmap_page_no = FSP_IBUF_BITMAP_OFFSET
+		+ (page_id.page_no() & ~(page_size.physical() - 1));
+
+	return(page_id_t(page_id.space(), bitmap_page_no));
 }
 
-/********************************************************************//**
-Gets the ibuf bitmap page where the bits describing a given file page are
+/** Gets the ibuf bitmap page where the bits describing a given file page are
 stored.
+@param[in]	page_id		page id of the file page
+@param[in]	page_size	page size of the file page
+@param[in]	file		file name
+@param[in]	line		line where called
+@param[in,out]	mtr		mini-transaction
 @return bitmap page where the file page is mapped, that is, the bitmap
 page containing the descriptor bits for the file page; the bitmap page
 is x-latched */
 static
 page_t*
 ibuf_bitmap_get_map_page_func(
-/*==========================*/
-	ulint		space,	/*!< in: space id of the file page */
-	ulint		page_no,/*!< in: page number of the file page */
-	ulint		zip_size,/*!< in: compressed page size in bytes;
-				0 for uncompressed pages */
-	const char*	file,	/*!< in: file name */
-	ulint		line,	/*!< in: line where called */
-	mtr_t*		mtr)	/*!< in: mtr */
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	const char*		file,
+	ulint			line,
+	mtr_t*			mtr)
 {
 	buf_block_t*	block = NULL;
 	dberr_t		err = DB_SUCCESS;
 
-	block = buf_page_get_gen(space, zip_size,
-				 ibuf_bitmap_page_no_calc(zip_size, page_no),
-				 RW_X_LATCH, NULL, BUF_GET,
+	block = buf_page_get_gen(ibuf_bitmap_page_no_calc(page_id, page_size),
+				 page_size, RW_X_LATCH, NULL, BUF_GET,
 				 file, line, mtr, &err);
 
 	if (err != DB_SUCCESS) {
 		return NULL;
 	}
 
+
 	buf_block_dbg_add_level(block, SYNC_IBUF_BITMAP);
 
 	return(buf_block_get_frame(block));
 }
 
-/********************************************************************//**
-Gets the ibuf bitmap page where the bits describing a given file page are
+/** Gets the ibuf bitmap page where the bits describing a given file page are
 stored.
+@param[in]	page_id		page id of the file page
+@param[in]	page_size	page size of the file page
+@param[in,out]	mtr		mini-transaction
 @return bitmap page where the file page is mapped, that is, the bitmap
 page containing the descriptor bits for the file page; the bitmap page
-is x-latched
-@param space	in: space id of the file page
-@param page_no	in: page number of the file page
-@param zip_size	in: compressed page size in bytes; 0 for uncompressed pages
-@param mtr	in: mini-transaction */
-#define ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr)		\
-	ibuf_bitmap_get_map_page_func(space, page_no, zip_size,		\
+is x-latched */
+#define ibuf_bitmap_get_map_page(page_id, page_size, mtr)	\
+	ibuf_bitmap_get_map_page_func(page_id, page_size, \
 				      __FILE__, __LINE__, mtr)
 
 /************************************************************************//**
@@ -904,8 +855,6 @@ UNIV_INLINE
 void
 ibuf_set_free_bits_low(
 /*===================*/
-	ulint			zip_size,/*!< in: compressed page size in bytes;
-					0 for uncompressed pages */
 	const buf_block_t*	block,	/*!< in: index page; free bits are set if
 					the index is non-clustered and page
 					level is 0 */
@@ -913,29 +862,24 @@ ibuf_set_free_bits_low(
 	mtr_t*			mtr)	/*!< in/out: mtr */
 {
 	page_t*	bitmap_page;
-	ulint	space;
-	ulint	page_no;
+
+	ut_ad(mtr->is_named_space(block->page.id.space()));
 
 	if (!page_is_leaf(buf_block_get_frame(block))) {
 
 		return;
 	}
 
-	space = buf_block_get_space(block);
-	page_no = buf_block_get_page_no(block);
-	bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr);
-#ifdef UNIV_IBUF_DEBUG
-# if 0
-	fprintf(stderr,
-		"Setting space %lu page %lu free bits to %lu should be %lu\n",
-		space, page_no, val,
-		ibuf_index_page_calc_free(zip_size, block));
-# endif
+	bitmap_page = ibuf_bitmap_get_map_page(block->page.id,
+					       block->page.size, mtr);
 
-	ut_a(val <= ibuf_index_page_calc_free(zip_size, block));
+#ifdef UNIV_IBUF_DEBUG
+	ut_a(val <= ibuf_index_page_calc_free(block));
 #endif /* UNIV_IBUF_DEBUG */
-	ibuf_bitmap_page_set_bits(bitmap_page, page_no, zip_size,
-				  IBUF_BITMAP_FREE, val, mtr);
+
+	ibuf_bitmap_page_set_bits(
+		bitmap_page, block->page.id, block->page.size,
+		IBUF_BITMAP_FREE, val, mtr);
 }
 
 /************************************************************************//**
@@ -943,7 +887,6 @@ Sets the free bit of the page in the ibuf bitmap. This is done in a separate
 mini-transaction, hence this operation does not restrict further work to only
 ibuf bitmap operations, which would result if the latch to the bitmap page
 were kept. */
-UNIV_INTERN
 void
 ibuf_set_free_bits_func(
 /*====================*/
@@ -959,9 +902,6 @@ ibuf_set_free_bits_func(
 	mtr_t	mtr;
 	page_t*	page;
 	page_t*	bitmap_page;
-	ulint	space;
-	ulint	page_no;
-	ulint	zip_size;
 
 	page = buf_block_get_frame(block);
 
@@ -971,18 +911,32 @@ ibuf_set_free_bits_func(
 	}
 
 	mtr_start(&mtr);
+	const fil_space_t* space = mtr.set_named_space(block->page.id.space());
 
-	space = buf_block_get_space(block);
-	page_no = buf_block_get_page_no(block);
-	zip_size = buf_block_get_zip_size(block);
-	bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, &mtr);
+	bitmap_page = ibuf_bitmap_get_map_page(block->page.id,
+					       block->page.size, &mtr);
+
+	switch (space->purpose) {
+	case FIL_TYPE_LOG:
+		ut_ad(0);
+		break;
+	case FIL_TYPE_TABLESPACE:
+		/* Avoid logging while fixing up truncate of table. */
+		if (!srv_is_tablespace_truncated(block->page.id.space())) {
+			break;
+		}
+		/* fall through */
+	case FIL_TYPE_TEMPORARY:
+	case FIL_TYPE_IMPORT:
+		mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
+	}
 
 #ifdef UNIV_IBUF_DEBUG
 	if (max_val != ULINT_UNDEFINED) {
 		ulint	old_val;
 
 		old_val = ibuf_bitmap_page_get_bits(
-			bitmap_page, page_no, zip_size,
+			bitmap_page, block->page.id,
 			IBUF_BITMAP_FREE, &mtr);
 # if 0
 		if (old_val != max_val) {
@@ -998,13 +952,16 @@ ibuf_set_free_bits_func(
 # if 0
 	fprintf(stderr, "Setting page no %lu free bits to %lu should be %lu\n",
 		page_get_page_no(page), val,
-		ibuf_index_page_calc_free(zip_size, block));
+		ibuf_index_page_calc_free(block));
 # endif
 
-	ut_a(val <= ibuf_index_page_calc_free(zip_size, block));
+	ut_a(val <= ibuf_index_page_calc_free(block));
 #endif /* UNIV_IBUF_DEBUG */
-	ibuf_bitmap_page_set_bits(bitmap_page, page_no, zip_size,
-				  IBUF_BITMAP_FREE, val, &mtr);
+
+	ibuf_bitmap_page_set_bits(
+		bitmap_page, block->page.id, block->page.size,
+		IBUF_BITMAP_FREE, val, &mtr);
+
 	mtr_commit(&mtr);
 }
 
@@ -1017,7 +974,6 @@ buffer bitmap must never exceed the free space on a page.  It is safe
 to decrement or reset the bits in the bitmap in a mini-transaction
 that is committed before the mini-transaction that affects the free
 space. */
-UNIV_INTERN
 void
 ibuf_reset_free_bits(
 /*=================*/
@@ -1036,7 +992,6 @@ thread until mtr is committed.  NOTE: The free bits in the insert
 buffer bitmap must never exceed the free space on a page.  It is safe
 to set the free bits in the same mini-transaction that updated the
 page. */
-UNIV_INTERN
 void
 ibuf_update_free_bits_low(
 /*======================*/
@@ -1052,17 +1007,19 @@ ibuf_update_free_bits_low(
 	ulint	after;
 
 	ut_a(!buf_block_get_page_zip(block));
+	ut_ad(mtr->is_named_space(block->page.id.space()));
 
-	before = ibuf_index_page_calc_free_bits(0, max_ins_size);
+	before = ibuf_index_page_calc_free_bits(block->page.size.logical(),
+						max_ins_size);
 
-	after = ibuf_index_page_calc_free(0, block);
+	after = ibuf_index_page_calc_free(block);
 
 	/* This approach cannot be used on compressed pages, since the
 	computed value of "before" often does not match the current
 	state of the bitmap.  This is because the free space may
 	increase or decrease when a compressed page is reorganized. */
 	if (before != after) {
-		ibuf_set_free_bits_low(0, block, after, mtr);
+		ibuf_set_free_bits_low(block, after, mtr);
 	}
 }
 
@@ -1074,7 +1031,6 @@ thread until mtr is committed.  NOTE: The free bits in the insert
 buffer bitmap must never exceed the free space on a page.  It is safe
 to set the free bits in the same mini-transaction that updated the
 page. */
-UNIV_INTERN
 void
 ibuf_update_free_bits_zip(
 /*======================*/
@@ -1082,21 +1038,15 @@ ibuf_update_free_bits_zip(
 	mtr_t*		mtr)	/*!< in/out: mtr */
 {
 	page_t*	bitmap_page;
-	ulint	space;
-	ulint	page_no;
-	ulint	zip_size;
 	ulint	after;
 
-	space = buf_block_get_space(block);
-	page_no = buf_block_get_page_no(block);
-	zip_size = buf_block_get_zip_size(block);
-
 	ut_a(page_is_leaf(buf_block_get_frame(block)));
-	ut_a(zip_size);
+	ut_a(block->page.size.is_compressed());
 
-	bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr);
+	bitmap_page = ibuf_bitmap_get_map_page(block->page.id,
+					       block->page.size, mtr);
 
-	after = ibuf_index_page_calc_free_zip(zip_size, block);
+	after = ibuf_index_page_calc_free_zip(block);
 
 	if (after == 0) {
 		/* We move the page to the front of the buffer pool LRU list:
@@ -1107,8 +1057,9 @@ ibuf_update_free_bits_zip(
 		buf_page_make_young(&block->page);
 	}
 
-	ibuf_bitmap_page_set_bits(bitmap_page, page_no, zip_size,
-				  IBUF_BITMAP_FREE, after, mtr);
+	ibuf_bitmap_page_set_bits(
+		bitmap_page, block->page.id, block->page.size,
+		IBUF_BITMAP_FREE, after, mtr);
 }
 
 /**********************************************************************//**
@@ -1118,73 +1069,72 @@ virtually prevent any further operations until mtr is committed.
 NOTE: The free bits in the insert buffer bitmap must never exceed the
 free space on a page.  It is safe to set the free bits in the same
 mini-transaction that updated the pages. */
-UNIV_INTERN
 void
 ibuf_update_free_bits_for_two_pages_low(
 /*====================================*/
-	ulint		zip_size,/*!< in: compressed page size in bytes;
-				0 for uncompressed pages */
 	buf_block_t*	block1,	/*!< in: index page */
 	buf_block_t*	block2,	/*!< in: index page */
 	mtr_t*		mtr)	/*!< in: mtr */
 {
 	ulint	state;
 
+	ut_ad(mtr->is_named_space(block1->page.id.space()));
+	ut_ad(block1->page.id.space() == block2->page.id.space());
+
 	/* As we have to x-latch two random bitmap pages, we have to acquire
 	the bitmap mutex to prevent a deadlock with a similar operation
 	performed by another OS thread. */
 
 	mutex_enter(&ibuf_bitmap_mutex);
 
-	state = ibuf_index_page_calc_free(zip_size, block1);
+	state = ibuf_index_page_calc_free(block1);
 
-	ibuf_set_free_bits_low(zip_size, block1, state, mtr);
+	ibuf_set_free_bits_low(block1, state, mtr);
 
-	state = ibuf_index_page_calc_free(zip_size, block2);
+	state = ibuf_index_page_calc_free(block2);
 
-	ibuf_set_free_bits_low(zip_size, block2, state, mtr);
+	ibuf_set_free_bits_low(block2, state, mtr);
 
 	mutex_exit(&ibuf_bitmap_mutex);
 }
 
-/**********************************************************************//**
-Returns TRUE if the page is one of the fixed address ibuf pages.
-@return	TRUE if a fixed address ibuf i/o page */
+/** Returns TRUE if the page is one of the fixed address ibuf pages.
+@param[in]	page_id		page id
+@param[in]	page_size	page size
+@return TRUE if a fixed address ibuf i/o page */
 UNIV_INLINE
 ibool
 ibuf_fixed_addr_page(
-/*=================*/
-	ulint	space,	/*!< in: space id */
-	ulint	zip_size,/*!< in: compressed page size in bytes;
-			0 for uncompressed pages */
-	ulint	page_no)/*!< in: page number */
+	const page_id_t&	page_id,
+	const page_size_t&	page_size)
 {
-	return((space == IBUF_SPACE_ID && page_no == IBUF_TREE_ROOT_PAGE_NO)
-	       || ibuf_bitmap_page(zip_size, page_no));
+	return((page_id.space() == IBUF_SPACE_ID
+		&& page_id.page_no() == IBUF_TREE_ROOT_PAGE_NO)
+	       || ibuf_bitmap_page(page_id, page_size));
 }
 
-/***********************************************************************//**
-Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages.
-Must not be called when recv_no_ibuf_operations==TRUE.
-@return	TRUE if level 2 or level 3 page */
-UNIV_INTERN
+/** Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages.
+Must not be called when recv_no_ibuf_operations==true.
+@param[in]	page_id		page id
+@param[in]	page_size	page size
+@param[in]	x_latch		FALSE if relaxed check (avoid latching the
+bitmap page)
+@param[in]	file		file name
+@param[in]	line		line where called
+@param[in,out]	mtr		mtr which will contain an x-latch to the
+bitmap page if the page is not one of the fixed address ibuf pages, or NULL,
+in which case a new transaction is created.
+@return TRUE if level 2 or level 3 page */
 ibool
 ibuf_page_low(
-/*==========*/
-	ulint		space,	/*!< in: space id */
-	ulint		zip_size,/*!< in: compressed page size in bytes, or 0 */
-	ulint		page_no,/*!< in: page number */
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
 #ifdef UNIV_DEBUG
-	ibool		x_latch,/*!< in: FALSE if relaxed check
-				(avoid latching the bitmap page) */
+	ibool			x_latch,
 #endif /* UNIV_DEBUG */
-	const char*	file,	/*!< in: file name */
-	ulint		line,	/*!< in: line where called */
-	mtr_t*		mtr)	/*!< in: mtr which will contain an
-				x-latch to the bitmap page if the page
-				is not one of the fixed address ibuf
-				pages, or NULL, in which case a new
-				transaction is created. */
+	const char*		file,
+	ulint			line,
+	mtr_t*			mtr)
 {
 	ibool	ret;
 	mtr_t	local_mtr;
@@ -1193,15 +1143,15 @@ ibuf_page_low(
 	ut_ad(!recv_no_ibuf_operations);
 	ut_ad(x_latch || mtr == NULL);
 
-	if (ibuf_fixed_addr_page(space, zip_size, page_no)) {
+	if (ibuf_fixed_addr_page(page_id, page_size)) {
 
 		return(TRUE);
-	} else if (space != IBUF_SPACE_ID) {
+	} else if (page_id.space() != IBUF_SPACE_ID) {
 
 		return(FALSE);
 	}
 
-	ut_ad(fil_space_get_type(IBUF_SPACE_ID) == FIL_TABLESPACE);
+	ut_ad(fil_space_get_type(IBUF_SPACE_ID) == FIL_TYPE_TABLESPACE);
 
 #ifdef UNIV_DEBUG
 	if (!x_latch) {
@@ -1217,16 +1167,17 @@ ibuf_page_low(
 		not be modified by any other thread. Nobody should be
 		calling ibuf_add_free_page() or ibuf_remove_free_page()
 		while the page is linked to the insert buffer b-tree. */
+		dberr_t err = DB_SUCCESS;
 
-		bitmap_page = buf_block_get_frame(
-			buf_page_get_gen(
-				space, zip_size,
-				ibuf_bitmap_page_no_calc(zip_size, page_no),
-				RW_NO_LATCH, NULL, BUF_GET_NO_LATCH,
-				file, line, &local_mtr));
+		buf_block_t* block = buf_page_get_gen(
+				ibuf_bitmap_page_no_calc(page_id, page_size),
+				page_size, RW_NO_LATCH, NULL, BUF_GET_NO_LATCH,
+				file, line, &local_mtr, &err);
+
+		bitmap_page = buf_block_get_frame(block);
 
 		ret = ibuf_bitmap_page_get_bits_low(
-			bitmap_page, page_no, zip_size,
+			bitmap_page, page_id, page_size,
 			MTR_MEMO_BUF_FIX, &local_mtr, IBUF_BITMAP_IBUF);
 
 		mtr_commit(&local_mtr);
@@ -1239,10 +1190,10 @@ ibuf_page_low(
 		mtr_start(mtr);
 	}
 
-	bitmap_page = ibuf_bitmap_get_map_page_func(space, page_no, zip_size,
+	bitmap_page = ibuf_bitmap_get_map_page_func(page_id, page_size,
 						    file, line, mtr);
 
-	ret = ibuf_bitmap_page_get_bits(bitmap_page, page_no, zip_size,
+	ret = ibuf_bitmap_page_get_bits(bitmap_page, page_id, page_size,
 					IBUF_BITMAP_IBUF, mtr);
 
 	if (mtr == &local_mtr) {
@@ -1260,7 +1211,7 @@ ibuf_page_low(
 
 /********************************************************************//**
 Returns the page number field of an ibuf record.
-@return	page number */
+@return page number */
 static
 ulint
 ibuf_rec_get_page_no_func(
@@ -1298,7 +1249,7 @@ ibuf_rec_get_page_no_func(
 /********************************************************************//**
 Returns the space id field of an ibuf record. For < 4.1.x format records
 returns 0.
-@return	space id */
+@return space id */
 static
 ulint
 ibuf_rec_get_space_func(
@@ -1421,7 +1372,7 @@ ibuf_rec_get_info_func(
 
 /****************************************************************//**
 Returns the operation type field of an ibuf record.
-@return	operation type */
+@return operation type */
 static
 ibuf_op_t
 ibuf_rec_get_op_type_func(
@@ -1458,7 +1409,6 @@ Read the first two bytes from a record's fourth field (counter field in new
 records; something else in older records).
 @return "counter" field, or ULINT_UNDEFINED if for some reason it
 can't be read */
-UNIV_INTERN
 ulint
 ibuf_rec_get_counter(
 /*=================*/
@@ -1496,16 +1446,8 @@ ibuf_add_ops(
 {
 	ulint	i;
 
-#ifndef HAVE_ATOMIC_BUILTINS
-	ut_ad(mutex_own(&ibuf_mutex));
-#endif /* !HAVE_ATOMIC_BUILTINS */
-
 	for (i = 0; i < IBUF_OP_COUNT; i++) {
-#ifdef HAVE_ATOMIC_BUILTINS
 		os_atomic_increment_ulint(&arr[i], ops[i]);
-#else /* HAVE_ATOMIC_BUILTINS */
-		arr[i] += ops[i];
-#endif /* HAVE_ATOMIC_BUILTINS */
 	}
 }
 
@@ -1537,7 +1479,7 @@ ibuf_print_ops(
 
 /********************************************************************//**
 Creates a dummy index for inserting a record to a non-clustered index.
-@return	dummy index */
+@return dummy index */
 static
 dict_index_t*
 ibuf_dummy_index_create(
@@ -1549,7 +1491,7 @@ ibuf_dummy_index_create(
 	dict_index_t*	index;
 
 	table = dict_mem_table_create("IBUF_DUMMY",
-				      DICT_HDR_SPACE, n,
+				      DICT_HDR_SPACE, n, 0,
 				      comp ? DICT_TF_COMPACT : 0, 0);
 
 	index = dict_mem_index_create("IBUF_DUMMY", "IBUF_DUMMY",
@@ -1695,7 +1637,7 @@ ibuf_build_entry_from_ibuf_rec_func(
 
 /******************************************************************//**
 Get the data size.
-@return	size of fields */
+@return size of fields */
 UNIV_INLINE
 ulint
 ibuf_rec_get_size(
@@ -1819,7 +1761,7 @@ non-clustered index.
 NOTE that the original entry must be kept because we copy pointers to
 its fields.
 
-@return	own: entry to insert into an ibuf index tree */
+@return own: entry to insert into an ibuf index tree */
 static
 dtuple_t*
 ibuf_entry_build(
@@ -1981,7 +1923,7 @@ ibuf_entry_build(
 /*********************************************************************//**
 Builds a search tuple used to search buffered inserts for an index page.
 This is for >= 4.1.x format records.
-@return	own: search tuple */
+@return own: search tuple */
 static
 dtuple_t*
 ibuf_search_tuple_build(
@@ -2034,7 +1976,7 @@ ibuf_search_tuple_build(
 /*********************************************************************//**
 Checks if there are enough pages in the free list of the ibuf tree that we
 dare to start a pessimistic insert to the insert buffer.
-@return	TRUE if enough free pages in list */
+@return TRUE if enough free pages in list */
 UNIV_INLINE
 ibool
 ibuf_data_enough_free_for_insert(void)
@@ -2054,7 +1996,7 @@ ibuf_data_enough_free_for_insert(void)
 /*********************************************************************//**
 Checks if there are enough pages in the free list of the ibuf tree that we
 should remove them and free to the file space management.
-@return	TRUE if enough free pages in list */
+@return TRUE if enough free pages in list */
 UNIV_INLINE
 ibool
 ibuf_data_too_much_free(void)
@@ -2068,7 +2010,7 @@ ibuf_data_too_much_free(void)
 /*********************************************************************//**
 Allocates a new page from the ibuf file segment and adds it to the free
 list.
-@return	TRUE on success, FALSE if no space left */
+@return TRUE on success, FALSE if no space left */
 static
 ibool
 ibuf_add_free_page(void)
@@ -2076,20 +2018,17 @@ ibuf_add_free_page(void)
 {
 	mtr_t		mtr;
 	page_t*		header_page;
-	ulint		flags;
-	ulint		zip_size;
 	buf_block_t*	block;
 	page_t*		page;
 	page_t*		root;
 	page_t*		bitmap_page;
 
 	mtr_start(&mtr);
+	fil_space_t* space = mtr.set_sys_modified();
 
 	/* Acquire the fsp latch before the ibuf header, obeying the latching
 	order */
-	mtr_x_lock(fil_space_get_latch(IBUF_SPACE_ID, &flags), &mtr);
-	zip_size = fsp_flags_get_zip_size(flags);
-
+	mtr_x_lock(&space->latch, &mtr);
 	header_page = ibuf_header_page_get(&mtr);
 
 	/* Allocate a new page: NOTE that if the page has been a part of a
@@ -2134,14 +2073,15 @@ ibuf_add_free_page(void)
 	/* Set the bit indicating that this page is now an ibuf tree page
 	(level 2 page) */
 
-	bitmap_page = ibuf_bitmap_get_map_page(
-		IBUF_SPACE_ID, buf_block_get_page_no(block), zip_size, &mtr);
+	const page_id_t		page_id(IBUF_SPACE_ID, block->page.id.page_no());
+	const page_size_t	page_size(space->flags);
+
+	bitmap_page = ibuf_bitmap_get_map_page(page_id, page_size, &mtr);
 
 	mutex_exit(&ibuf_mutex);
 
-	ibuf_bitmap_page_set_bits(
-		bitmap_page, buf_block_get_page_no(block), zip_size,
-		IBUF_BITMAP_IBUF, TRUE, &mtr);
+	ibuf_bitmap_page_set_bits(bitmap_page, page_id, page_size,
+				  IBUF_BITMAP_IBUF, TRUE, &mtr);
 
 	ibuf_mtr_commit(&mtr);
 
@@ -2158,20 +2098,19 @@ ibuf_remove_free_page(void)
 	mtr_t	mtr;
 	mtr_t	mtr2;
 	page_t*	header_page;
-	ulint	flags;
-	ulint	zip_size;
 	ulint	page_no;
 	page_t*	page;
 	page_t*	root;
 	page_t*	bitmap_page;
 
 	mtr_start(&mtr);
+	fil_space_t*		space = mtr.set_sys_modified();
+	const page_size_t	page_size(space->flags);
 
 	/* Acquire the fsp latch before the ibuf header, obeying the latching
 	order */
-	mtr_x_lock(fil_space_get_latch(IBUF_SPACE_ID, &flags), &mtr);
-	zip_size = fsp_flags_get_zip_size(flags);
 
+	mtr_x_lock(&space->latch, &mtr);
 	header_page = ibuf_header_page_get(&mtr);
 
 	/* Prevent pessimistic inserts to insert buffer trees for a while */
@@ -2212,11 +2151,11 @@ ibuf_remove_free_page(void)
 	page from it. */
 
 	fseg_free_page(header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER,
-		       IBUF_SPACE_ID, page_no, &mtr);
+		       IBUF_SPACE_ID, page_no, false, &mtr);
 
-#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
-	buf_page_reset_file_page_was_freed(IBUF_SPACE_ID, page_no);
-#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
+	const page_id_t	page_id(IBUF_SPACE_ID, page_no);
+
+	ut_d(buf_page_reset_file_page_was_freed(page_id));
 
 	ibuf_enter(&mtr);
 
@@ -2230,8 +2169,7 @@ ibuf_remove_free_page(void)
 	{
 		buf_block_t*	block;
 
-		block = buf_page_get(
-			IBUF_SPACE_ID, 0, page_no, RW_X_LATCH, &mtr);
+		block = buf_page_get(page_id, univ_page_size, RW_X_LATCH, &mtr);
 
 		buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE);
 
@@ -2251,17 +2189,16 @@ ibuf_remove_free_page(void)
 	/* Set the bit indicating that this page is no more an ibuf tree page
 	(level 2 page) */
 
-	bitmap_page = ibuf_bitmap_get_map_page(
-		IBUF_SPACE_ID, page_no, zip_size, &mtr);
+	bitmap_page = ibuf_bitmap_get_map_page(page_id, page_size, &mtr);
 
 	mutex_exit(&ibuf_mutex);
 
 	ibuf_bitmap_page_set_bits(
-		bitmap_page, page_no, zip_size, IBUF_BITMAP_IBUF, FALSE, &mtr);
+		bitmap_page, page_id, page_size, IBUF_BITMAP_IBUF, FALSE,
+		&mtr);
+
+	ut_d(buf_page_set_file_page_was_freed(page_id));
 
-#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
-	buf_page_set_file_page_was_freed(IBUF_SPACE_ID, page_no);
-#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
 	ibuf_mtr_commit(&mtr);
 }
 
@@ -2269,17 +2206,11 @@ ibuf_remove_free_page(void)
 Frees excess pages from the ibuf free list. This function is called when an OS
 thread calls fsp services to allocate a new file segment, or a new page to a
 file segment, and the thread did not own the fsp latch before this call. */
-UNIV_INTERN
 void
 ibuf_free_excess_pages(void)
 /*========================*/
 {
-	ulint		i;
-
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(fil_space_get_latch(IBUF_SPACE_ID, NULL),
-			  RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(rw_lock_own(fil_space_get_latch(IBUF_SPACE_ID, NULL), RW_LOCK_X));
 
 	ut_ad(rw_lock_get_x_lock_count(
 		fil_space_get_latch(IBUF_SPACE_ID, NULL)) == 1);
@@ -2298,7 +2229,7 @@ ibuf_free_excess_pages(void)
 	/* Free at most a few pages at a time, so that we do not delay the
 	requested service too much */
 
-	for (i = 0; i < 4; i++) {
+	for (ulint i = 0; i < 4; i++) {
 
 		ibool	too_much_free;
 
@@ -2315,11 +2246,11 @@ ibuf_free_excess_pages(void)
 }
 
 #ifdef UNIV_DEBUG
-# define ibuf_get_merge_page_nos(contract,rec,mtr,ids,vers,pages,n_stored) \
-	ibuf_get_merge_page_nos_func(contract,rec,mtr,ids,vers,pages,n_stored)
+# define ibuf_get_merge_page_nos(contract,rec,mtr,ids,pages,n_stored) \
+	ibuf_get_merge_page_nos_func(contract,rec,mtr,ids,pages,n_stored)
 #else /* UNIV_DEBUG */
-# define ibuf_get_merge_page_nos(contract,rec,mtr,ids,vers,pages,n_stored) \
-	ibuf_get_merge_page_nos_func(contract,rec,ids,vers,pages,n_stored)
+# define ibuf_get_merge_page_nos(contract,rec,mtr,ids,pages,n_stored) \
+	ibuf_get_merge_page_nos_func(contract,rec,ids,pages,n_stored)
 #endif /* UNIV_DEBUG */
 
 /*********************************************************************//**
@@ -2339,9 +2270,6 @@ ibuf_get_merge_page_nos_func(
 	mtr_t*		mtr,	/*!< in: mini-transaction holding rec */
 #endif /* UNIV_DEBUG */
 	ulint*		space_ids,/*!< in/out: space id's of the pages */
-	ib_int64_t*	space_versions,/*!< in/out: tablespace version
-				timestamps; used to prevent reading in old
-				pages after DISCARD + IMPORT tablespace */
 	ulint*		page_nos,/*!< in/out: buffer for at least
 				IBUF_MAX_N_PAGES_MERGED many page numbers;
 				the page numbers are in an ascending order */
@@ -2366,7 +2294,8 @@ ibuf_get_merge_page_nos_func(
 
 	*n_stored = 0;
 
-	limit = ut_min(IBUF_MAX_N_PAGES_MERGED, buf_pool_get_curr_size() / 4);
+	limit = ut_min(IBUF_MAX_N_PAGES_MERGED,
+		       buf_pool_get_curr_size() / 4);
 
 	if (page_rec_is_supremum(rec)) {
 
@@ -2436,16 +2365,23 @@ ibuf_get_merge_page_nos_func(
 		} else {
 			rec_page_no = ibuf_rec_get_page_no(mtr, rec);
 			rec_space_id = ibuf_rec_get_space(mtr, rec);
-			/* In the system tablespace, the smallest
+			/* In the system tablespace the smallest
 			possible secondary index leaf page number is
-			bigger than IBUF_TREE_ROOT_PAGE_NO (4). In
-			other tablespaces, the clustered index tree is
-			created at page 3, which makes page 4 the
-			smallest possible secondary index leaf page
-			(and that only after DROP INDEX). */
-			ut_ad(rec_page_no
-			      > (ulint) IBUF_TREE_ROOT_PAGE_NO
-			      - (rec_space_id != 0));
+			bigger than FSP_DICT_HDR_PAGE_NO (7).
+			In all tablespaces, pages 0 and 1 are reserved
+			for the allocation bitmap and the change
+			buffer bitmap. In file-per-table tablespaces,
+			a file segment inode page will be created at
+			page 2 and the clustered index tree is created
+			at page 3.  So for file-per-table tablespaces,
+			page 4 is the smallest possible secondary
+			index leaf page. CREATE TABLESPACE also initially
+			uses pages 2 and 3 for the first created table,
+			but that table may be dropped, allowing page 2
+			to be reused for a secondary index leaf page.
+			To keep this assertion simple, just
+			make sure the page is >= 2. */
+			ut_ad(rec_page_no >= FSP_FIRST_INODE_PAGE_NO);
 		}
 
 #ifdef UNIV_IBUF_DEBUG
@@ -2465,8 +2401,6 @@ ibuf_get_merge_page_nos_func(
 				/ IBUF_MERGE_THRESHOLD)) {
 
 				space_ids[*n_stored] = prev_space_id;
-				space_versions[*n_stored]
-					= fil_space_get_version(prev_space_id);
 				page_nos[*n_stored] = prev_page_no;
 
 				(*n_stored)++;
@@ -2512,7 +2446,7 @@ ibuf_get_merge_page_nos_func(
 
 /*******************************************************************//**
 Get the matching records for space id.
-@return	current rec or NULL */
+@return current rec or NULL */
 static	MY_ATTRIBUTE((nonnull, warn_unused_result))
 const rec_t*
 ibuf_get_user_rec(
@@ -2544,13 +2478,11 @@ ibuf_get_merge_pages(
 	ulint		limit,	/*!< in: max page numbers to read */
 	ulint*		pages,	/*!< out: pages read */
 	ulint*		spaces,	/*!< out: spaces read */
-	ib_int64_t*	versions,/*!< out: space versions read */
 	ulint*		n_pages,/*!< out: number of pages read */
 	mtr_t*		mtr)	/*!< in: mini transaction */
 {
 	const rec_t*	rec;
 	ulint		volume = 0;
-	ib_int64_t	version = fil_space_get_version(space);
 
 	ut_a(space != ULINT_UNDEFINED);
 
@@ -2565,7 +2497,6 @@ ibuf_get_merge_pages(
 		if (*n_pages == 0 || pages[*n_pages - 1] != page_no) {
 			spaces[*n_pages] = space;
 			pages[*n_pages] = page_no;
-			versions[*n_pages] = version;
 			++*n_pages;
 		}
 
@@ -2596,7 +2527,6 @@ ibuf_merge_pages(
 	ulint		sum_sizes;
 	ulint		page_nos[IBUF_MAX_N_PAGES_MERGED];
 	ulint		space_ids[IBUF_MAX_N_PAGES_MERGED];
-	ib_int64_t	space_versions[IBUF_MAX_N_PAGES_MERGED];
 
 	*n_pages = 0;
 
@@ -2604,8 +2534,12 @@ ibuf_merge_pages(
 
 	/* Open a cursor to a randomly chosen leaf of the tree, at a random
 	position within the leaf */
+	bool available;
 
-	btr_pcur_open_at_rnd_pos(ibuf->index, BTR_SEARCH_LEAF, &pcur, &mtr);
+	available = btr_pcur_open_at_rnd_pos(ibuf->index, BTR_SEARCH_LEAF,
+					     &pcur, &mtr);
+	/* No one should make this index unavailable when server is running */
+	ut_a(available);
 
 	ut_ad(page_validate(btr_pcur_get_page(&pcur), ibuf->index));
 
@@ -2627,7 +2561,7 @@ ibuf_merge_pages(
 
 	sum_sizes = ibuf_get_merge_page_nos(TRUE,
 					    btr_pcur_get_rec(&pcur), &mtr,
-					    space_ids, space_versions,
+					    space_ids,
 					    page_nos, n_pages);
 #if 0 /* defined UNIV_IBUF_DEBUG */
 	fprintf(stderr, "Ibuf contract sync %lu pages %lu volume %lu\n",
@@ -2637,7 +2571,7 @@ ibuf_merge_pages(
 	btr_pcur_close(&pcur);
 
 	buf_read_ibuf_merge_pages(
-		sync, space_ids, space_versions, page_nos, *n_pages);
+		sync, space_ids, page_nos, *n_pages);
 
 	return(sum_sizes + 1);
 }
@@ -2646,7 +2580,6 @@ ibuf_merge_pages(
 Contracts insert buffer trees by reading pages referring to space_id
 to the buffer pool.
 @returns number of pages merged.*/
-UNIV_INTERN
 ulint
 ibuf_merge_space(
 /*=============*/
@@ -2660,6 +2593,8 @@ ibuf_merge_space(
 
 	ut_ad(space < SRV_LOG_SPACE_FIRST_ID);
 
+	ut_ad(space < SRV_LOG_SPACE_FIRST_ID);
+
 	ibuf_mtr_start(&mtr);
 
 	/* Position the cursor on the first matching record. */
@@ -2675,7 +2610,6 @@ ibuf_merge_space(
 	ulint		sum_sizes = 0;
 	ulint		pages[IBUF_MAX_N_PAGES_MERGED];
 	ulint		spaces[IBUF_MAX_N_PAGES_MERGED];
-	ib_int64_t	versions[IBUF_MAX_N_PAGES_MERGED];
 
 	if (page_is_empty(btr_pcur_get_page(&pcur))) {
 		/* If a B-tree page is empty, it must be the root page
@@ -2690,12 +2624,10 @@ ibuf_merge_space(
 	} else {
 
 		sum_sizes = ibuf_get_merge_pages(
-				&pcur, space, IBUF_MAX_N_PAGES_MERGED,
-				&pages[0], &spaces[0], &versions[0], &n_pages,
-				&mtr);
-		ib_logf(IB_LOG_LEVEL_INFO,"\n Size of pages merged %lu"
-                        ,sum_sizes);
-
+			&pcur, space, IBUF_MAX_N_PAGES_MERGED,
+			&pages[0], &spaces[0], &n_pages,
+			&mtr);
+		ib::info() << "Size of pages merged " << sum_sizes;
 	}
 
 	ibuf_mtr_commit(&mtr);
@@ -2703,18 +2635,16 @@ ibuf_merge_space(
 	btr_pcur_close(&pcur);
 
 	if (n_pages > 0) {
-
-#ifdef UNIV_DEBUG
 		ut_ad(n_pages <= UT_ARR_SIZE(pages));
 
+#ifdef UNIV_DEBUG
 		for (ulint i = 0; i < n_pages; ++i) {
 			ut_ad(spaces[i] == space);
-			ut_ad(i == 0 || versions[i] == versions[i - 1]);
 		}
 #endif /* UNIV_DEBUG */
 
 		buf_read_ibuf_merge_pages(
-			true, spaces, versions, pages, n_pages);
+			true, spaces, pages, n_pages);
 	}
 
 	return(n_pages);
@@ -2727,16 +2657,11 @@ the issued reads to complete
 @return a lower limit for the combined size in bytes of entries which
 will be merged from ibuf trees to the pages read, 0 if ibuf is
 empty */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
+static MY_ATTRIBUTE((warn_unused_result))
 ulint
 ibuf_merge(
-/*=======*/
-	ulint*		n_pages,	/*!< out: number of pages to
-					which merged */
-	bool		sync)		/*!< in: TRUE if the caller
-					wants to wait for the issued
-					read with the highest
-					tablespace address to complete */
+	ulint*		n_pages,
+	bool		sync)
 {
 	*n_pages = 0;
 
@@ -2764,10 +2689,7 @@ will be merged from ibuf trees to the pages read, 0 if ibuf is empty */
 static
 ulint
 ibuf_contract(
-/*==========*/
-	bool	sync)	/*!< in: TRUE if the caller wants to wait for the
-			issued read with the highest tablespace address
-			to complete */
+	bool	sync)
 {
 	ulint	n_pages;
 
@@ -2781,15 +2703,9 @@ based on the current size of the change buffer.
 @return a lower limit for the combined size in bytes of entries which
 will be merged from ibuf trees to the pages read, 0 if ibuf is
 empty */
-UNIV_INTERN
 ulint
 ibuf_merge_in_background(
-/*=====================*/
-	bool	full)	/*!< in: TRUE if the caller wants to
-			do a full contract based on PCT_IO(100).
-			If FALSE then the size of contract
-			batch is determined based on the
-			current size of the ibuf tree. */
+	bool	full)
 {
 	ulint	sum_bytes	= 0;
 	ulint	sum_pages	= 0;
@@ -2890,7 +2806,7 @@ ibuf_contract_after_insert(
 
 /*********************************************************************//**
 Determine if an insert buffer record has been encountered already.
-@return	TRUE if a new record, FALSE if possible duplicate */
+@return TRUE if a new record, FALSE if possible duplicate */
 static
 ibool
 ibuf_get_volume_buffered_hash(
@@ -2933,7 +2849,8 @@ ibuf_get_volume_buffered_hash(
 #else /* UNIV_DEBUG */
 # define ibuf_get_volume_buffered_count(mtr,rec,hash,size,n_recs)	\
 	ibuf_get_volume_buffered_count_func(rec,hash,size,n_recs)
-#endif
+#endif /* UNIV_DEBUG */
+
 /*********************************************************************//**
 Update the estimate of the number of records on a page, and
 get the space taken by merging the buffered record to the index page.
@@ -3144,12 +3061,11 @@ ibuf_get_volume_buffered(
 		buf_block_t*	block;
 
 		block = buf_page_get(
-			IBUF_SPACE_ID, 0, prev_page_no, RW_X_LATCH,
-			mtr);
+			page_id_t(IBUF_SPACE_ID, prev_page_no),
+			univ_page_size, RW_X_LATCH, mtr);
 
 		buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE);
 
-
 		prev_page = buf_block_get_frame(block);
 		ut_ad(page_validate(prev_page, ibuf->index));
 	}
@@ -3217,12 +3133,11 @@ count_later:
 		buf_block_t*	block;
 
 		block = buf_page_get(
-			IBUF_SPACE_ID, 0, next_page_no, RW_X_LATCH,
-			mtr);
+			page_id_t(IBUF_SPACE_ID, next_page_no),
+			univ_page_size, RW_X_LATCH, mtr);
 
 		buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE);
 
-
 		next_page = buf_block_get_frame(block);
 		ut_ad(page_validate(next_page, ibuf->index));
 	}
@@ -3259,7 +3174,6 @@ count_later:
 /*********************************************************************//**
 Reads the biggest tablespace id from the high end of the insert buffer
 tree and updates the counter in fil_system. */
-UNIV_INTERN
 void
 ibuf_update_max_tablespace_id(void)
 /*===============================*/
@@ -3383,12 +3297,12 @@ ibuf_get_entry_counter_low_func(
 #else /* UNIV_DEBUG */
 # define ibuf_get_entry_counter(space,page_no,rec,mtr,exact_leaf) \
 	ibuf_get_entry_counter_func(space,page_no,rec,exact_leaf)
-#endif
+#endif /* UNIV_DEBUG */
 
 /****************************************************************//**
 Calculate the counter field for an entry based on the current
 last record in ibuf for (space, page_no).
-@return	the counter field, or ULINT_UNDEFINED
+@return the counter field, or ULINT_UNDEFINED
 if we should abort this insertion to ibuf */
 static
 ulint
@@ -3434,28 +3348,32 @@ ibuf_get_entry_counter_func(
 	}
 }
 
-/*********************************************************************//**
-Buffer an operation in the insert/delete buffer, instead of doing it
+/** Buffer an operation in the insert/delete buffer, instead of doing it
 directly to the disk page, if this is possible.
-@return	DB_SUCCESS, DB_STRONG_FAIL or other error */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
+@param[in]	mode		BTR_MODIFY_PREV or BTR_MODIFY_TREE
+@param[in]	op		operation type
+@param[in]	no_counter	TRUE=use 5.0.3 format; FALSE=allow delete
+buffering
+@param[in]	entry		index entry to insert
+@param[in]	entry_size	rec_get_converted_size(index, entry)
+@param[in,out]	index		index where to insert; must not be unique
+or clustered
+@param[in]	page_id		page id where to insert
+@param[in]	page_size	page size
+@param[in,out]	thr		query thread
+@return DB_SUCCESS, DB_STRONG_FAIL or other error */
+static MY_ATTRIBUTE((warn_unused_result))
 dberr_t
 ibuf_insert_low(
-/*============*/
-	ulint		mode,	/*!< in: BTR_MODIFY_PREV or BTR_MODIFY_TREE */
-	ibuf_op_t	op,	/*!< in: operation type */
-	ibool		no_counter,
-				/*!< in: TRUE=use 5.0.3 format;
-				FALSE=allow delete buffering */
-	const dtuple_t*	entry,	/*!< in: index entry to insert */
-	ulint		entry_size,
-				/*!< in: rec_get_converted_size(index, entry) */
-	dict_index_t*	index,	/*!< in: index where to insert; must not be
-				unique or clustered */
-	ulint		space,	/*!< in: space id where to insert */
-	ulint		zip_size,/*!< in: compressed page size in bytes, or 0 */
-	ulint		page_no,/*!< in: page number where to insert */
-	que_thr_t*	thr)	/*!< in: query thread */
+	ulint			mode,
+	ibuf_op_t		op,
+	ibool			no_counter,
+	const dtuple_t*		entry,
+	ulint			entry_size,
+	dict_index_t*		index,
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	que_thr_t*		thr)
 {
 	big_rec_t*	dummy_big_rec;
 	btr_pcur_t	pcur;
@@ -3474,15 +3392,14 @@ ibuf_insert_low(
 	dberr_t		err;
 	ibool		do_merge;
 	ulint		space_ids[IBUF_MAX_N_PAGES_MERGED];
-	ib_int64_t	space_versions[IBUF_MAX_N_PAGES_MERGED];
 	ulint		page_nos[IBUF_MAX_N_PAGES_MERGED];
 	ulint		n_stored;
 	mtr_t		mtr;
 	mtr_t		bitmap_mtr;
 
 	ut_a(!dict_index_is_clust(index));
+	ut_ad(!dict_index_is_spatial(index));
 	ut_ad(dtuple_check_typed(entry));
-	ut_ad(ut_is_2pow(zip_size));
 	ut_ad(!no_counter || op == IBUF_OP_INSERT);
 	ut_a(op < IBUF_OP_COUNT);
 
@@ -3521,14 +3438,14 @@ ibuf_insert_low(
 	value just before actually inserting the entry.) */
 
 	ibuf_entry = ibuf_entry_build(
-		op, index, entry, space, page_no,
+		op, index, entry, page_id.space(), page_id.page_no(),
 		no_counter ? ULINT_UNDEFINED : 0xFFFF, heap);
 
 	/* Open a cursor to the insert buffer tree to calculate if we can add
 	the new entry to it without exceeding the free space limit for the
 	page. */
 
-	if (mode == BTR_MODIFY_TREE) {
+	if (BTR_LATCH_MODE_WITHOUT_INTENTION(mode) == BTR_MODIFY_TREE) {
 		for (;;) {
 			mutex_enter(&ibuf_pessimistic_insert_mutex);
 			mutex_enter(&ibuf_mutex);
@@ -3541,7 +3458,7 @@ ibuf_insert_low(
 			mutex_exit(&ibuf_mutex);
 			mutex_exit(&ibuf_pessimistic_insert_mutex);
 
-			if (UNIV_UNLIKELY(!ibuf_add_free_page())) {
+			if (!ibuf_add_free_page()) {
 
 				mem_heap_free(heap);
 				return(DB_STRONG_FAIL);
@@ -3557,14 +3474,15 @@ ibuf_insert_low(
 	/* Find out the volume of already buffered inserts for the same index
 	page */
 	min_n_recs = 0;
-	buffered = ibuf_get_volume_buffered(&pcur, space, page_no,
+	buffered = ibuf_get_volume_buffered(&pcur,
+					    page_id.space(),
+					    page_id.page_no(),
 					    op == IBUF_OP_DELETE
 					    ? &min_n_recs
 					    : NULL, &mtr);
 
 	if (op == IBUF_OP_DELETE
-	    && (min_n_recs < 2
-		|| buf_pool_watch_occurred(space, page_no))) {
+	    && (min_n_recs < 2 || buf_pool_watch_occurred(page_id))) {
 		/* The page could become empty after the record is
 		deleted, or the page has been read in to the buffer
 		pool.  Refuse to buffer the operation. */
@@ -3583,7 +3501,7 @@ ibuf_insert_low(
 		until after the IBUF_OP_DELETE has been buffered. */
 
 fail_exit:
-		if (mode == BTR_MODIFY_TREE) {
+		if (BTR_LATCH_MODE_WITHOUT_INTENTION(mode) == BTR_MODIFY_TREE) {
 			mutex_exit(&ibuf_mutex);
 			mutex_exit(&ibuf_pessimistic_insert_mutex);
 		}
@@ -3602,17 +3520,19 @@ fail_exit:
 	and done mtr_commit(&mtr) to release the latch. */
 
 #ifdef UNIV_IBUF_COUNT_DEBUG
-	ut_a((buffered == 0) || ibuf_count_get(space, page_no));
+	ut_a((buffered == 0) || ibuf_count_get(page_id));
 #endif
 	ibuf_mtr_start(&bitmap_mtr);
+	bitmap_mtr.set_named_space(page_id.space());
 
-	bitmap_page = ibuf_bitmap_get_map_page(space, page_no,
-					       zip_size, &bitmap_mtr);
+	bitmap_page = ibuf_bitmap_get_map_page(page_id, page_size,
+					       &bitmap_mtr);
 
 	/* We check if the index page is suitable for buffered entries */
 
-	if (buf_page_peek(space, page_no)
-	    || lock_rec_expl_exist_on_page(space, page_no)) {
+	if (buf_page_peek(page_id)
+	    || lock_rec_expl_exist_on_page(page_id.space(),
+					   page_id.page_no())) {
 
 		ibuf_mtr_commit(&bitmap_mtr);
 		goto fail_exit;
@@ -3620,11 +3540,11 @@ fail_exit:
 
 	if (op == IBUF_OP_INSERT) {
 		ulint	bits = ibuf_bitmap_page_get_bits(
-			bitmap_page, page_no, zip_size, IBUF_BITMAP_FREE,
+			bitmap_page, page_id, page_size, IBUF_BITMAP_FREE,
 			&bitmap_mtr);
 
 		if (buffered + entry_size + page_dir_calc_reserved_space(1)
-		    > ibuf_index_page_calc_free_from_bits(zip_size, bits)) {
+		    > ibuf_index_page_calc_free_from_bits(page_size, bits)) {
 			/* Release the bitmap page latch early. */
 			ibuf_mtr_commit(&bitmap_mtr);
 
@@ -3633,7 +3553,7 @@ fail_exit:
 
 			ibuf_get_merge_page_nos(FALSE,
 						btr_pcur_get_rec(&pcur), &mtr,
-						space_ids, space_versions,
+						space_ids,
 						page_nos, &n_stored);
 
 			goto fail_exit;
@@ -3645,7 +3565,8 @@ fail_exit:
 		insert. This can change the insert position, which can
 		result in the need to abort in some cases. */
 		ulint		counter = ibuf_get_entry_counter(
-			space, page_no, btr_pcur_get_rec(&pcur), &mtr,
+			page_id.space(), page_id.page_no(),
+			btr_pcur_get_rec(&pcur), &mtr,
 			btr_pcur_get_btr_cur(&pcur)->low_match
 			< IBUF_REC_FIELD_METADATA);
 		dfield_t*	field;
@@ -3666,11 +3587,11 @@ fail_exit:
 	buffered entries for this index page, if the bit is not set yet */
 
 	old_bit_value = ibuf_bitmap_page_get_bits(
-		bitmap_page, page_no, zip_size,
+		bitmap_page, page_id, page_size,
 		IBUF_BITMAP_BUFFERED, &bitmap_mtr);
 
 	if (!old_bit_value) {
-		ibuf_bitmap_page_set_bits(bitmap_page, page_no, zip_size,
+		ibuf_bitmap_page_set_bits(bitmap_page, page_id, page_size,
 					  IBUF_BITMAP_BUFFERED, TRUE,
 					  &bitmap_mtr);
 	}
@@ -3686,11 +3607,10 @@ fail_exit:
 			ibuf_entry, &ins_rec,
 			&dummy_big_rec, 0, thr, &mtr);
 		block = btr_cur_get_block(cursor);
-		ut_ad(buf_block_get_space(block) == IBUF_SPACE_ID);
+		ut_ad(block->page.id.space() == IBUF_SPACE_ID);
 
 		/* If this is the root page, update ibuf->empty. */
-		if (UNIV_UNLIKELY(buf_block_get_page_no(block)
-				  == FSP_IBUF_TREE_ROOT_PAGE_NO)) {
+		if (block->page.id.page_no() == FSP_IBUF_TREE_ROOT_PAGE_NO) {
 			const page_t*	root = buf_block_get_frame(block);
 
 			ut_ad(page_get_space_id(root) == IBUF_SPACE_ID);
@@ -3700,11 +3620,12 @@ fail_exit:
 			ibuf->empty = page_is_empty(root);
 		}
 	} else {
-		ut_ad(mode == BTR_MODIFY_TREE);
+		ut_ad(BTR_LATCH_MODE_WITHOUT_INTENTION(mode)
+		      == BTR_MODIFY_TREE);
 
-		/* We acquire an x-latch to the root page before the insert,
+		/* We acquire an sx-latch to the root page before the insert,
 		because a pessimistic insert releases the tree x-latch,
-		which would cause the x-latching of the root after that to
+		which would cause the sx-latching of the root after that to
 		break the latching order. */
 
 		root = ibuf_tree_root_get(&mtr);
@@ -3724,12 +3645,12 @@ fail_exit:
 		}
 
 		mutex_exit(&ibuf_pessimistic_insert_mutex);
-		ibuf_size_update(root, &mtr);
+		ibuf_size_update(root);
 		mutex_exit(&ibuf_mutex);
 		ibuf->empty = page_is_empty(root);
 
 		block = btr_cur_get_block(cursor);
-		ut_ad(buf_block_get_space(block) == IBUF_SPACE_ID);
+		ut_ad(block->page.id.space() == IBUF_SPACE_ID);
 	}
 
 	if (offsets_heap) {
@@ -3745,13 +3666,12 @@ fail_exit:
 func_exit:
 #ifdef UNIV_IBUF_COUNT_DEBUG
 	if (err == DB_SUCCESS) {
-		fprintf(stderr,
-			"Incrementing ibuf count of space %lu page %lu\n"
-			"from %lu by 1\n", space, page_no,
-			ibuf_count_get(space, page_no));
 
-		ibuf_count_set(space, page_no,
-			       ibuf_count_get(space, page_no) + 1);
+		ib::info() << "Incrementing ibuf count of page " << page_id
+			<< " from " << ibuf_count_get(space, page_no)
+			<< " by 1";
+
+		ibuf_count_set(page_id, ibuf_count_get(page_id) + 1);
 	}
 #endif
 
@@ -3760,7 +3680,8 @@ func_exit:
 
 	mem_heap_free(heap);
 
-	if (err == DB_SUCCESS && mode == BTR_MODIFY_TREE) {
+	if (err == DB_SUCCESS
+	    && BTR_LATCH_MODE_WITHOUT_INTENTION(mode) == BTR_MODIFY_TREE) {
 		ibuf_contract_after_insert(entry_size);
 	}
 
@@ -3768,29 +3689,31 @@ func_exit:
 #ifdef UNIV_IBUF_DEBUG
 		ut_a(n_stored <= IBUF_MAX_N_PAGES_MERGED);
 #endif
-		buf_read_ibuf_merge_pages(false, space_ids, space_versions,
+		buf_read_ibuf_merge_pages(false, space_ids,
 					  page_nos, n_stored);
 	}
 
 	return(err);
 }
 
-/*********************************************************************//**
-Buffer an operation in the insert/delete buffer, instead of doing it
+/** Buffer an operation in the insert/delete buffer, instead of doing it
 directly to the disk page, if this is possible. Does not do it if the index
 is clustered or unique.
-@return	TRUE if success */
-UNIV_INTERN
+@param[in]	op		operation type
+@param[in]	entry		index entry to insert
+@param[in,out]	index		index where to insert
+@param[in]	page_id		page id where to insert
+@param[in]	page_size	page size
+@param[in,out]	thr		query thread
+@return TRUE if success */
 ibool
 ibuf_insert(
-/*========*/
-	ibuf_op_t	op,	/*!< in: operation type */
-	const dtuple_t*	entry,	/*!< in: index entry to insert */
-	dict_index_t*	index,	/*!< in: index where to insert */
-	ulint		space,	/*!< in: space id where to insert */
-	ulint		zip_size,/*!< in: compressed page size in bytes, or 0 */
-	ulint		page_no,/*!< in: page number where to insert */
-	que_thr_t*	thr)	/*!< in: query thread */
+	ibuf_op_t		op,
+	const dtuple_t*		entry,
+	dict_index_t*		index,
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	que_thr_t*		thr)
 {
 	dberr_t		err;
 	ulint		entry_size;
@@ -3800,11 +3723,11 @@ ibuf_insert(
 	ibuf_use_t	use		= ibuf_use;
 	DBUG_ENTER("ibuf_insert");
 
-	DBUG_PRINT("ibuf", ("op: %d, space: %ld, page_no: %ld",
-			    op, space, page_no));
+	DBUG_PRINT("ibuf", ("op: %d, space: " UINT32PF ", page_no: " UINT32PF,
+			    op, page_id.space(), page_id.page_no()));
 
 	ut_ad(dtuple_check_typed(entry));
-	ut_ad(ut_is_2pow(zip_size));
+	ut_ad(page_id.space() != srv_tmp_space.space_id());
 
 	ut_a(!dict_index_is_clust(index));
 
@@ -3876,11 +3799,11 @@ check_watch:
 	buf_pool_watch_set(space, page_no). */
 
 	{
-		buf_page_t*	bpage;
-		buf_pool_t*	buf_pool = buf_pool_get(space, page_no);
-		bpage = buf_page_get_also_watch(buf_pool, space, page_no);
+		buf_pool_t*	buf_pool = buf_pool_get(page_id);
+		buf_page_t*	bpage
+			= buf_page_get_also_watch(buf_pool, page_id);
 
-		if (UNIV_LIKELY_NULL(bpage)) {
+		if (bpage != NULL) {
 			/* A buffer pool watch has been set or the
 			page has been read into the buffer pool.
 			Do not buffer the request.  If a purge operation
@@ -3903,11 +3826,11 @@ skip_watch:
 
 	err = ibuf_insert_low(BTR_MODIFY_PREV, op, no_counter,
 			      entry, entry_size,
-			      index, space, zip_size, page_no, thr);
+			      index, page_id, page_size, thr);
 	if (err == DB_FAIL) {
-		err = ibuf_insert_low(BTR_MODIFY_TREE, op, no_counter,
-				      entry, entry_size,
-				      index, space, zip_size, page_no, thr);
+		err = ibuf_insert_low(BTR_MODIFY_TREE | BTR_LATCH_FOR_INSERT,
+				      op, no_counter, entry, entry_size,
+				      index, page_id, page_size, thr);
 	}
 
 	if (err == DB_SUCCESS) {
@@ -3943,9 +3866,6 @@ ibuf_insert_to_index_page_low(
 				after which to insert the buffered entry */
 {
 	const page_t*	page;
-	ulint		space;
-	ulint		page_no;
-	ulint		zip_size;
 	const page_t*	bitmap_page;
 	ulint		old_bits;
 	rec_t*		rec;
@@ -3977,34 +3897,27 @@ ibuf_insert_to_index_page_low(
 
 	page = buf_block_get_frame(block);
 
-	ut_print_timestamp(stderr);
+	ib::error() << "Insert buffer insert fails; page free "
+		<< page_get_max_insert_size(page, 1) << ", dtuple size "
+		<< rec_get_converted_size(index, entry, 0);
 
-	fprintf(stderr,
-		"  InnoDB: Error: Insert buffer insert fails;"
-		" page free %lu, dtuple size %lu\n",
-		(ulong) page_get_max_insert_size(page, 1),
-		(ulong) rec_get_converted_size(index, entry, 0));
 	fputs("InnoDB: Cannot insert index record ", stderr);
 	dtuple_print(stderr, entry);
 	fputs("\nInnoDB: The table where this index record belongs\n"
 	      "InnoDB: is now probably corrupt. Please run CHECK TABLE on\n"
 	      "InnoDB: that table.\n", stderr);
 
-	space = page_get_space_id(page);
-	zip_size = buf_block_get_zip_size(block);
-	page_no = page_get_page_no(page);
+	bitmap_page = ibuf_bitmap_get_map_page(block->page.id,
+					       block->page.size, mtr);
+	old_bits = ibuf_bitmap_page_get_bits(
+		bitmap_page, block->page.id, block->page.size,
+		IBUF_BITMAP_FREE, mtr);
 
-	bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr);
-	old_bits = ibuf_bitmap_page_get_bits(bitmap_page, page_no, zip_size,
-					     IBUF_BITMAP_FREE, mtr);
+	ib::error() << "page " << block->page.id << ", size "
+		<< block->page.size.physical() << ", bitmap bits " << old_bits;
 
-	fprintf(stderr,
-		"InnoDB: space %lu, page %lu, zip_size %lu, bitmap bits %lu\n",
-		(ulong) space, (ulong) page_no,
-		(ulong) zip_size, (ulong) old_bits);
+	ib::error() << BUG_REPORT_MSG;
 
-	fputs("InnoDB: Submit a detailed bug report"
-	      " to http://bugs.mysql.com\n", stderr);
 	ut_ad(0);
 	DBUG_RETURN(NULL);
 }
@@ -4031,59 +3944,50 @@ ibuf_insert_to_index_page(
 
 	DBUG_ENTER("ibuf_insert_to_index_page");
 
-	DBUG_PRINT("ibuf", ("page_no: %ld", buf_block_get_page_no(block)));
-	DBUG_PRINT("ibuf", ("index name: %s", index->name));
-	DBUG_PRINT("ibuf", ("online status: %d",
-			    dict_index_get_online_status(index)));
+	DBUG_PRINT("ibuf", ("page " UINT32PF ":" UINT32PF,
+			    block->page.id.space(),
+			    block->page.id.page_no()));
 
+	ut_ad(!dict_index_is_online_ddl(index));// this is an ibuf_dummy index
 	ut_ad(ibuf_inside(mtr));
 	ut_ad(dtuple_check_typed(entry));
-	ut_ad(!buf_block_align(page)->index);
+	ut_ad(!block->index);
+	ut_ad(mtr->is_named_space(block->page.id.space()));
 
 	if (UNIV_UNLIKELY(dict_table_is_comp(index->table)
 			  != (ibool)!!page_is_comp(page))) {
-		fputs("InnoDB: Trying to insert a record from"
-		      " the insert buffer to an index page\n"
-		      "InnoDB: but the 'compact' flag does not match!\n",
-		      stderr);
+		ib::warn() << "Trying to insert a record from the insert"
+			" buffer to an index page but the 'compact' flag does"
+			" not match!";
 		goto dump;
 	}
 
 	rec = page_rec_get_next(page_get_infimum_rec(page));
 
 	if (page_rec_is_supremum(rec)) {
-		fputs("InnoDB: Trying to insert a record from"
-		      " the insert buffer to an index page\n"
-		      "InnoDB: but the index page is empty!\n",
-		      stderr);
+		ib::warn() << "Trying to insert a record from the insert"
+			" buffer to an index page but the index page"
+			" is empty!";
 		goto dump;
 	}
 
-	if (UNIV_UNLIKELY(rec_get_n_fields(rec, index)
-			  != dtuple_get_n_fields(entry))) {
-		fputs("InnoDB: Trying to insert a record from"
-		      " the insert buffer to an index page\n"
-		      "InnoDB: but the number of fields does not match!\n",
-		      stderr);
+	if (!rec_n_fields_is_sane(index, rec, entry)) {
+		ib::warn() << "Trying to insert a record from the insert"
+			" buffer to an index page but the number of fields"
+			" does not match!";
+		rec_print(stderr, rec, index);
 dump:
-		buf_page_print(page, 0, BUF_PAGE_PRINT_NO_CRASH);
-
 		dtuple_print(stderr, entry);
 		ut_ad(0);
 
-		fputs("InnoDB: The table where where"
-		      " this index record belongs\n"
-		      "InnoDB: is now probably corrupt."
-		      " Please run CHECK TABLE on\n"
-		      "InnoDB: your tables.\n"
-		      "InnoDB: Submit a detailed bug report to"
-		      " http://bugs.mysql.com!\n", stderr);
+		ib::warn() << "The table where this index record belongs"
+			" is now probably corrupt. Please run CHECK TABLE on"
+			" your tables. " << BUG_REPORT_MSG;
 
 		DBUG_VOID_RETURN;
 	}
 
-	low_match = page_cur_search(block, index, entry,
-				    PAGE_CUR_LE, &page_cur);
+	low_match = page_cur_search(block, index, entry, &page_cur);
 
 	heap = mem_heap_create(
 		sizeof(upd_t)
@@ -4141,12 +4045,12 @@ dump:
 			just write dummy trx_id(0), roll_ptr(0) */
 			btr_cur_update_in_place_log(BTR_KEEP_SYS_FLAG, rec,
 						    index, update, 0, 0, mtr);
+
 			DBUG_EXECUTE_IF(
 				"crash_after_log_ibuf_upd_inplace",
 				log_buffer_flush_to_disk();
-				ib_logf(IB_LOG_LEVEL_INFO,
-					"Wrote log record for ibuf update in "
-					"place operation");
+				ib::info() << "Wrote log record for ibuf"
+					" update in place operation";
 				DBUG_SUICIDE();
 			);
 
@@ -4214,8 +4118,7 @@ ibuf_set_del_mark(
 	ut_ad(ibuf_inside(mtr));
 	ut_ad(dtuple_check_typed(entry));
 
-	low_match = page_cur_search(
-		block, index, entry, PAGE_CUR_LE, &page_cur);
+	low_match = page_cur_search(block, index, entry, &page_cur);
 
 	if (low_match == dtuple_get_n_fields(entry)) {
 		rec_t*		rec;
@@ -4242,22 +4145,18 @@ ibuf_set_del_mark(
 		const buf_block_t*	block
 			= page_cur_get_block(&page_cur);
 
-		ut_print_timestamp(stderr);
-		fputs("  InnoDB: unable to find a record to delete-mark\n",
-		      stderr);
+		ib::error() << "Unable to find a record to delete-mark";
 		fputs("InnoDB: tuple ", stderr);
 		dtuple_print(stderr, entry);
 		fputs("\n"
 		      "InnoDB: record ", stderr);
 		rec_print(stderr, page_cur_get_rec(&page_cur), index);
-		fprintf(stderr, "\nspace %u offset %u"
-			" (%u records, index id %llu)\n"
-			"InnoDB: Submit a detailed bug report"
-			" to http://bugs.mysql.com\n",
-			(unsigned) buf_block_get_space(block),
-			(unsigned) buf_block_get_page_no(block),
-			(unsigned) page_get_n_recs(page),
-			(ulonglong) btr_page_get_index_id(page));
+
+		ib::error() << "page " << block->page.id << " ("
+			<< page_get_n_recs(page) << " records, index id "
+			<< btr_page_get_index_id(page) << ").";
+
+		ib::error() << BUG_REPORT_MSG;
 		ut_ad(0);
 	}
 }
@@ -4279,9 +4178,9 @@ ibuf_delete(
 
 	ut_ad(ibuf_inside(mtr));
 	ut_ad(dtuple_check_typed(entry));
+	ut_ad(!dict_index_is_spatial(index));
 
-	low_match = page_cur_search(
-		block, index, entry, PAGE_CUR_LE, &page_cur);
+	low_match = page_cur_search(block, index, entry, &page_cur);
 
 	if (low_match == dtuple_get_n_fields(entry)) {
 		page_zip_des_t*	page_zip= buf_block_get_page_zip(block);
@@ -4306,20 +4205,18 @@ ibuf_delete(
 			 & rec_get_info_bits(rec, page_is_comp(page)))) {
 			/* Refuse to purge the last record or a
 			record that has not been marked for deletion. */
-			ut_print_timestamp(stderr);
-			fputs("  InnoDB: unable to purge a record\n",
-			      stderr);
+			ib::error() << "Unable to purge a record";
 			fputs("InnoDB: tuple ", stderr);
 			dtuple_print(stderr, entry);
 			fputs("\n"
 			      "InnoDB: record ", stderr);
 			rec_print_new(stderr, rec, offsets);
-			fprintf(stderr, "\nspace %u offset %u"
+			fprintf(stderr, "\nspace " UINT32PF " offset " UINT32PF
 				" (%u records, index id %llu)\n"
 				"InnoDB: Submit a detailed bug report"
 				" to http://bugs.mysql.com\n",
-				(unsigned) buf_block_get_space(block),
-				(unsigned) buf_block_get_page_no(block),
+				block->page.id.space(),
+				block->page.id.page_no(),
 				(unsigned) page_get_n_recs(page),
 				(ulonglong) btr_page_get_index_id(page));
 
@@ -4358,7 +4255,7 @@ ibuf_delete(
 
 /*********************************************************************//**
 Restores insert buffer tree cursor position
-@return	TRUE if the position was restored; FALSE if not */
+@return TRUE if the position was restored; FALSE if not */
 static MY_ATTRIBUTE((nonnull))
 ibool
 ibuf_restore_pos(
@@ -4373,7 +4270,8 @@ ibuf_restore_pos(
 				position is to be restored */
 	mtr_t*		mtr)	/*!< in/out: mini-transaction */
 {
-	ut_ad(mode == BTR_MODIFY_LEAF || mode == BTR_MODIFY_TREE);
+	ut_ad(mode == BTR_MODIFY_LEAF
+	      || BTR_LATCH_MODE_WITHOUT_INTENTION(mode) == BTR_MODIFY_TREE);
 
 	if (btr_pcur_restore_position(mode, pcur, mtr)) {
 
@@ -4386,13 +4284,11 @@ ibuf_restore_pos(
 		entry.  Do not complain. */
 		ibuf_btr_pcur_commit_specify_mtr(pcur, mtr);
 	} else {
-		fprintf(stderr,
-			"InnoDB: ERROR: Submit the output to"
-			" http://bugs.mysql.com\n"
-			"InnoDB: ibuf cursor restoration fails!\n"
-			"InnoDB: ibuf record inserted to page %lu:%lu\n",
-			(ulong) space, (ulong) page_no);
-		fflush(stderr);
+		ib::error() << "ibuf cursor restoration fails!."
+			" ibuf record inserted to page "
+			<< space << ":" << page_no;
+
+		ib::error() << BUG_REPORT_MSG;
 
 		rec_print_old(stderr, btr_pcur_get_rec(pcur));
 		rec_print_old(stderr, pcur->old_rec);
@@ -4400,10 +4296,8 @@ ibuf_restore_pos(
 
 		rec_print_old(stderr,
 			      page_rec_get_next(btr_pcur_get_rec(pcur)));
-		fflush(stderr);
 
-		ibuf_btr_pcur_commit_specify_mtr(pcur, mtr);
-		ut_ad(0);
+		ib::fatal() << "Failed to restore ibuf position.";
 	}
 
 	return(FALSE);
@@ -4413,7 +4307,7 @@ ibuf_restore_pos(
 Deletes from ibuf the record on which pcur is positioned. If we have to
 resort to a pessimistic delete, this function commits mtr and closes
 the cursor.
-@return	TRUE if mtr was committed and pcur closed in this operation */
+@return TRUE if mtr was committed and pcur closed in this operation */
 static MY_ATTRIBUTE((warn_unused_result))
 ibool
 ibuf_delete_rec(
@@ -4446,8 +4340,9 @@ ibuf_delete_rec(
 		an assertion failure after crash recovery. */
 		btr_cur_set_deleted_flag_for_ibuf(
 			btr_pcur_get_rec(pcur), NULL, TRUE, mtr);
+
 		ibuf_mtr_commit(mtr);
-		log_write_up_to(LSN_MAX, LOG_WAIT_ALL_GROUPS, TRUE);
+		log_write_up_to(LSN_MAX, true);
 		DBUG_SUICIDE();
 	}
 #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
@@ -4455,6 +4350,8 @@ ibuf_delete_rec(
 	success = btr_cur_optimistic_delete(btr_pcur_get_btr_cur(pcur),
 					    0, mtr);
 
+	const page_id_t	page_id(space, page_no);
+
 	if (success) {
 		if (page_is_empty(btr_pcur_get_page(pcur))) {
 			/* If a B-tree page is empty, it must be the root page
@@ -4473,13 +4370,13 @@ ibuf_delete_rec(
 		}
 
 #ifdef UNIV_IBUF_COUNT_DEBUG
-		fprintf(stderr,
-			"Decrementing ibuf count of space %lu page %lu\n"
-			"from %lu by 1\n", space, page_no,
-			ibuf_count_get(space, page_no));
-		ibuf_count_set(space, page_no,
-			       ibuf_count_get(space, page_no) - 1);
-#endif
+		ib::info() << "Decrementing ibuf count of space " << space
+			<< " page " << page_no << " from "
+			<< ibuf_count_get(page_id) << " by 1";
+
+		ibuf_count_set(page_id, ibuf_count_get(page_id) - 1);
+#endif /* UNIV_IBUF_COUNT_DEBUG */
+
 		return(FALSE);
 	}
 
@@ -4501,115 +4398,106 @@ ibuf_delete_rec(
 	mutex_enter(&ibuf_mutex);
 
 	if (!ibuf_restore_pos(space, page_no, search_tuple,
-			      BTR_MODIFY_TREE, pcur, mtr)) {
+			      BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE,
+			      pcur, mtr)) {
 
 		mutex_exit(&ibuf_mutex);
-		ut_ad(mtr->state == MTR_COMMITTED);
+		ut_ad(mtr->has_committed());
 		goto func_exit;
 	}
 
 	root = ibuf_tree_root_get(mtr);
 
 	btr_cur_pessimistic_delete(&err, TRUE, btr_pcur_get_btr_cur(pcur), 0,
-				   RB_NONE, mtr);
+				   false, mtr);
 	ut_a(err == DB_SUCCESS);
 
 #ifdef UNIV_IBUF_COUNT_DEBUG
-	ibuf_count_set(space, page_no, ibuf_count_get(space, page_no) - 1);
-#endif
-	ibuf_size_update(root, mtr);
+	ibuf_count_set(page_id, ibuf_count_get(page_id) - 1);
+#endif /* UNIV_IBUF_COUNT_DEBUG */
+
+	ibuf_size_update(root);
 	mutex_exit(&ibuf_mutex);
 
 	ibuf->empty = page_is_empty(root);
 	ibuf_btr_pcur_commit_specify_mtr(pcur, mtr);
 
 func_exit:
-	ut_ad(mtr->state == MTR_COMMITTED);
+	ut_ad(mtr->has_committed());
 	btr_pcur_close(pcur);
 
 	return(TRUE);
 }
 
-/*********************************************************************//**
-When an index page is read from a disk to the buffer pool, this function
+/** When an index page is read from a disk to the buffer pool, this function
 applies any buffered operations to the page and deletes the entries from the
 insert buffer. If the page is not read, but created in the buffer pool, this
 function deletes its buffered entries from the insert buffer; there can
 exist entries for such a page if the page belonged to an index which
-subsequently was dropped. */
-UNIV_INTERN
+subsequently was dropped.
+@param[in,out]	block			if page has been read from disk,
+pointer to the page x-latched, else NULL
+@param[in]	page_id			page id of the index page
+@param[in]	update_ibuf_bitmap	normally this is set to TRUE, but
+if we have deleted or are deleting the tablespace, then we naturally do not
+want to update a non-existent bitmap page */
 void
 ibuf_merge_or_delete_for_page(
-/*==========================*/
-	buf_block_t*	block,	/*!< in: if page has been read from
-				disk, pointer to the page x-latched,
-				else NULL */
-	ulint		space,	/*!< in: space id of the index page */
-	ulint		page_no,/*!< in: page number of the index page */
-	ulint		zip_size,/*!< in: compressed page size in bytes,
-				or 0 */
-	ibool		update_ibuf_bitmap)/*!< in: normally this is set
-				to TRUE, but if we have deleted or are
-				deleting the tablespace, then we
-				naturally do not want to update a
-				non-existent bitmap page */
+	buf_block_t*		block,
+	const page_id_t&	page_id,
+	const page_size_t*	page_size,
+	ibool			update_ibuf_bitmap)
 {
 	mem_heap_t*	heap;
 	btr_pcur_t	pcur;
 	dtuple_t*	search_tuple;
 #ifdef UNIV_IBUF_DEBUG
 	ulint		volume			= 0;
-#endif
+#endif /* UNIV_IBUF_DEBUG */
 	page_zip_des_t*	page_zip		= NULL;
-	ibool		tablespace_being_deleted = FALSE;
-	ibool		corruption_noticed	= FALSE;
+	fil_space_t*	space			= NULL;
+	bool		corruption_noticed	= false;
 	mtr_t		mtr;
 
 	/* Counts for merged & discarded operations. */
 	ulint		mops[IBUF_OP_COUNT];
 	ulint		dops[IBUF_OP_COUNT];
 
-	ut_ad(!block || buf_block_get_space(block) == space);
-	ut_ad(!block || buf_block_get_page_no(block) == page_no);
-	ut_ad(!block || buf_block_get_zip_size(block) == zip_size);
-	ut_ad(!block || buf_block_get_io_fix(block) == BUF_IO_READ);
+	ut_ad(block == NULL || page_id.equals_to(block->page.id));
+	ut_ad(block == NULL || buf_block_get_io_fix(block) == BUF_IO_READ);
 
 	if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE
-	    || trx_sys_hdr_page(space, page_no)) {
+	    || trx_sys_hdr_page(page_id)
+	    || fsp_is_system_temporary(page_id.space())) {
 		return;
 	}
 
-	/* We cannot refer to zip_size in the following, because
-	zip_size is passed as ULINT_UNDEFINED (it is unknown) when
-	buf_read_ibuf_merge_pages() is merging (discarding) changes
-	for a dropped tablespace.  When block != NULL or
-	update_ibuf_bitmap is specified, the zip_size must be known.
-	That is why we will repeat the check below, with zip_size in
-	place of 0.  Passing zip_size as 0 assumes that the
+	/* We cannot refer to page_size in the following, because it is passed
+	as NULL (it is unknown) when buf_read_ibuf_merge_pages() is merging
+	(discarding) changes for a dropped tablespace. When block != NULL or
+	update_ibuf_bitmap is specified, then page_size must be known.
+	That is why we will repeat the check below, with page_size in
+	place of univ_page_size. Passing univ_page_size assumes that the
 	uncompressed page size always is a power-of-2 multiple of the
 	compressed page size. */
 
-	if (ibuf_fixed_addr_page(space, 0, page_no)
-	    || fsp_descr_page(0, page_no)) {
+	if (ibuf_fixed_addr_page(page_id, univ_page_size)
+	    || fsp_descr_page(page_id, univ_page_size)) {
 		return;
 	}
 
-	if (UNIV_LIKELY(update_ibuf_bitmap)) {
-		ut_a(ut_is_2pow(zip_size));
+	if (update_ibuf_bitmap) {
 
-		if (ibuf_fixed_addr_page(space, zip_size, page_no)
-		    || fsp_descr_page(zip_size, page_no)) {
+		ut_ad(page_size != NULL);
+
+		if (ibuf_fixed_addr_page(page_id, *page_size)
+		    || fsp_descr_page(page_id, *page_size)) {
 			return;
 		}
 
-		/* If the following returns FALSE, we get the counter
-		incremented, and must decrement it when we leave this
-		function. When the counter is > 0, that prevents tablespace
-		from being dropped. */
+		space = fil_space_acquire(page_id.space());
 
-		tablespace_being_deleted = fil_inc_pending_ops(space, true);
-
-		if (UNIV_UNLIKELY(tablespace_being_deleted)) {
+		if (space == NULL) {
 			/* Do not try to read the bitmap page from space;
 			just delete the ibuf records for the page */
 
@@ -4622,12 +4510,12 @@ ibuf_merge_or_delete_for_page(
 			ibuf_mtr_start(&mtr);
 
 			bitmap_page = ibuf_bitmap_get_map_page(
-				space, page_no, zip_size, &mtr);
+				page_id, *page_size, &mtr);
 
 			if (bitmap_page &&
 			    fil_page_get_type(bitmap_page) != FIL_PAGE_TYPE_ALLOCATED) {
 				bitmap_bits = ibuf_bitmap_page_get_bits(
-					bitmap_page, page_no, zip_size,
+				bitmap_page, page_id, *page_size,
 					IBUF_BITMAP_BUFFERED, &mtr);
 			}
 
@@ -4636,25 +4524,23 @@ ibuf_merge_or_delete_for_page(
 			if (!bitmap_bits) {
 				/* No inserts buffered for this page */
 
-				if (!tablespace_being_deleted) {
-					fil_decr_pending_ops(space);
-				}
-
+				fil_space_release(space);
 				return;
 			}
 		}
-	} else if (block
-		   && (ibuf_fixed_addr_page(space, zip_size, page_no)
-		      || fsp_descr_page(zip_size, page_no))) {
+	} else if (block != NULL
+		   && (ibuf_fixed_addr_page(page_id, *page_size)
+		       || fsp_descr_page(page_id, *page_size))) {
 
 		return;
 	}
 
 	heap = mem_heap_create(512);
 
-	search_tuple = ibuf_search_tuple_build(space, page_no, heap);
+	search_tuple = ibuf_search_tuple_build(
+		page_id.space(), page_id.page_no(), heap);
 
-	if (block) {
+	if (block != NULL) {
 		/* Move the ownership of the x-latch on the page to this OS
 		thread, so that we can acquire a second x-latch on it. This
 		is needed for the insert operations to the index page to pass
@@ -4663,50 +4549,23 @@ ibuf_merge_or_delete_for_page(
 		rw_lock_x_lock_move_ownership(&(block->lock));
 		page_zip = buf_block_get_page_zip(block);
 
-		if (UNIV_UNLIKELY(fil_page_get_type(block->frame)
-				  != FIL_PAGE_INDEX)
-		    || UNIV_UNLIKELY(!page_is_leaf(block->frame))) {
+		if (!fil_page_index_page_check(block->frame)
+		    || !page_is_leaf(block->frame)) {
 
-			page_t*	bitmap_page;
+			corruption_noticed = true;
 
-			corruption_noticed = TRUE;
+			ib::error() << "Corruption in the tablespace. Bitmap"
+				" shows insert buffer records to page "
+				<< page_id << " though the page type is "
+				<< fil_page_get_type(block->frame)
+				<< ", which is not an index leaf page. We try"
+				" to resolve the problem by skipping the"
+				" insert buffer merge for this page. Please"
+				" run CHECK TABLE on your tables to determine"
+				" if they are corrupt after this.";
 
-			ut_print_timestamp(stderr);
-
-			ibuf_mtr_start(&mtr);
-
-			fputs("  InnoDB: Dump of the ibuf bitmap page:\n",
-			      stderr);
-
-			bitmap_page = ibuf_bitmap_get_map_page(space, page_no,
-							       zip_size, &mtr);
-			buf_page_print(bitmap_page, 0,
-				       BUF_PAGE_PRINT_NO_CRASH);
-			ibuf_mtr_commit(&mtr);
-
-			fputs("\nInnoDB: Dump of the page:\n", stderr);
-
-			buf_page_print(block->frame, 0,
-				       BUF_PAGE_PRINT_NO_CRASH);
-
-			fprintf(stderr,
-				"InnoDB: Error: corruption in the tablespace."
-				" Bitmap shows insert\n"
-				"InnoDB: buffer records to page n:o %lu"
-				" though the page\n"
-				"InnoDB: type is %lu, which is"
-				" not an index leaf page!\n"
-				"InnoDB: We try to resolve the problem"
-				" by skipping the insert buffer\n"
-				"InnoDB: merge for this page."
-				" Please run CHECK TABLE on your tables\n"
-				"InnoDB: to determine if they are corrupt"
-				" after this.\n\n"
-				"InnoDB: Please submit a detailed bug report"
-				" to http://bugs.mysql.com\n\n",
-				(ulong) page_no,
-				(ulong)
-				fil_page_get_type(block->frame));
+			ib::error() << "Please submit a detailed bug"
+				" report to http://bugs.mysql.com";
 			ut_ad(0);
 		}
 	}
@@ -4723,9 +4582,11 @@ loop:
 		ibuf->index, search_tuple, PAGE_CUR_GE, BTR_MODIFY_LEAF,
 		&pcur, &mtr);
 
-	if (block) {
+	if (block != NULL) {
 		ibool success;
 
+		mtr.set_named_space(page_id.space());
+
 		success = buf_page_get_known_nowait(
 			RW_X_LATCH, block,
 			BUF_KEEP_OLD, __FILE__, __LINE__, &mtr);
@@ -4739,6 +4600,8 @@ loop:
 		the block is io-fixed. Other threads must not try to
 		latch an io-fixed block. */
 		buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE);
+	} else if (update_ibuf_bitmap) {
+		mtr.set_named_space(page_id.space());
 	}
 
 	if (!btr_pcur_is_on_user_rec(&pcur)) {
@@ -4755,10 +4618,10 @@ loop:
 		rec = btr_pcur_get_rec(&pcur);
 
 		/* Check if the entry is for this index page */
-		if (ibuf_rec_get_page_no(&mtr, rec) != page_no
-		    || ibuf_rec_get_space(&mtr, rec) != space) {
+		if (ibuf_rec_get_page_no(&mtr, rec) != page_id.page_no()
+		    || ibuf_rec_get_space(&mtr, rec) != page_id.space()) {
 
-			if (block) {
+			if (block != NULL) {
 				page_header_reset_last_insert(
 					block->frame, page_zip, &mtr);
 			}
@@ -4766,11 +4629,11 @@ loop:
 			goto reset_bit;
 		}
 
-		if (UNIV_UNLIKELY(corruption_noticed)) {
+		if (corruption_noticed) {
 			fputs("InnoDB: Discarding record\n ", stderr);
 			rec_print_old(stderr, rec);
 			fputs("\nInnoDB: from the insert buffer!\n\n", stderr);
-		} else if (block && !rec_get_deleted_flag(rec, 0)) {
+		} else if (block != NULL && !rec_get_deleted_flag(rec, 0)) {
 			/* Now we have at pcur a record which should be
 			applied on the index page; NOTE that the call below
 			copies pointers to fields in rec, and we must
@@ -4822,8 +4685,9 @@ loop:
 				ut_ad(rec == btr_pcur_get_rec(&pcur));
 				ut_ad(page_rec_is_user_rec(rec));
 				ut_ad(ibuf_rec_get_page_no(&mtr, rec)
-				      == page_no);
-				ut_ad(ibuf_rec_get_space(&mtr, rec) == space);
+				      == page_id.page_no());
+				ut_ad(ibuf_rec_get_space(&mtr, rec)
+				      == page_id.space());
 
 				/* Mark the change buffer record processed,
 				so that it will not be merged again in case
@@ -4839,6 +4703,7 @@ loop:
 				ibuf_btr_pcur_commit_specify_mtr(&pcur, &mtr);
 
 				ibuf_mtr_start(&mtr);
+				mtr.set_named_space(page_id.space());
 
 				success = buf_page_get_known_nowait(
 					RW_X_LATCH, block,
@@ -4853,12 +4718,13 @@ loop:
 				buf_block_dbg_add_level(
 					block, SYNC_IBUF_TREE_NODE);
 
-				if (!ibuf_restore_pos(space, page_no,
+				if (!ibuf_restore_pos(page_id.space(),
+						      page_id.page_no(),
 						      search_tuple,
 						      BTR_MODIFY_LEAF,
 						      &pcur, &mtr)) {
 
-					ut_ad(mtr.state == MTR_COMMITTED);
+					ut_ad(mtr.has_committed());
 					mops[op]++;
 					ibuf_dummy_index_free(dummy_index);
 					goto loop;
@@ -4877,12 +4743,12 @@ loop:
 		}
 
 		/* Delete the record from ibuf */
-		if (ibuf_delete_rec(space, page_no, &pcur, search_tuple,
-				    &mtr)) {
+		if (ibuf_delete_rec(page_id.space(), page_id.page_no(),
+				    &pcur, search_tuple, &mtr)) {
 			/* Deletion was pessimistic and mtr was committed:
 			we start from the beginning again */
 
-			ut_ad(mtr.state == MTR_COMMITTED);
+			ut_ad(mtr.has_committed());
 			goto loop;
 		} else if (btr_pcur_is_after_last_on_page(&pcur)) {
 			ibuf_mtr_commit(&mtr);
@@ -4893,27 +4759,26 @@ loop:
 	}
 
 reset_bit:
-	if (UNIV_LIKELY(update_ibuf_bitmap)) {
+	if (update_ibuf_bitmap) {
 		page_t*	bitmap_page;
 
-		bitmap_page = ibuf_bitmap_get_map_page(
-			space, page_no, zip_size, &mtr);
+		bitmap_page = ibuf_bitmap_get_map_page(page_id, *page_size,
+						       &mtr);
 
 		ibuf_bitmap_page_set_bits(
-			bitmap_page, page_no, zip_size,
+			bitmap_page, page_id, *page_size,
 			IBUF_BITMAP_BUFFERED, FALSE, &mtr);
 
-		if (block) {
+		if (block != NULL) {
 			ulint old_bits = ibuf_bitmap_page_get_bits(
-				bitmap_page, page_no, zip_size,
+				bitmap_page, page_id, *page_size,
 				IBUF_BITMAP_FREE, &mtr);
 
-			ulint new_bits = ibuf_index_page_calc_free(
-				zip_size, block);
+			ulint new_bits = ibuf_index_page_calc_free(block);
 
 			if (old_bits != new_bits) {
 				ibuf_bitmap_page_set_bits(
-					bitmap_page, page_no, zip_size,
+					bitmap_page, page_id, *page_size,
 					IBUF_BITMAP_FREE, new_bits, &mtr);
 			}
 		}
@@ -4923,37 +4788,23 @@ reset_bit:
 	btr_pcur_close(&pcur);
 	mem_heap_free(heap);
 
-#ifdef HAVE_ATOMIC_BUILTINS
 	os_atomic_increment_ulint(&ibuf->n_merges, 1);
 	ibuf_add_ops(ibuf->n_merged_ops, mops);
 	ibuf_add_ops(ibuf->n_discarded_ops, dops);
-#else /* HAVE_ATOMIC_BUILTINS */
-	/* Protect our statistics keeping from race conditions */
-	mutex_enter(&ibuf_mutex);
-
-	ibuf->n_merges++;
-	ibuf_add_ops(ibuf->n_merged_ops, mops);
-	ibuf_add_ops(ibuf->n_discarded_ops, dops);
-
-	mutex_exit(&ibuf_mutex);
-#endif /* HAVE_ATOMIC_BUILTINS */
-
-	if (update_ibuf_bitmap && !tablespace_being_deleted) {
-
-		fil_decr_pending_ops(space);
+	if (space != NULL) {
+		fil_space_release(space);
 	}
 
 #ifdef UNIV_IBUF_COUNT_DEBUG
-	ut_a(ibuf_count_get(space, page_no) == 0);
+	ut_a(ibuf_count_get(page_id) == 0);
 #endif
 }
 
 /*********************************************************************//**
 Deletes all entries in the insert buffer for a given space id. This is used
-in DISCARD TABLESPACE and IMPORT TABLESPACE.
+in DISCARD TABLESPACE, IMPORT TABLESPACE and TRUNCATE TABLESPACE.
 NOTE: this does not update the page free bitmaps in the space. The space will
 become CORRUPT when you call this function! */
-UNIV_INTERN
 void
 ibuf_delete_for_discarded_space(
 /*============================*/
@@ -5013,7 +4864,7 @@ loop:
 			/* Deletion was pessimistic and mtr was committed:
 			we start from the beginning again */
 
-			ut_ad(mtr.state == MTR_COMMITTED);
+			ut_ad(mtr.has_committed());
 			goto loop;
 		}
 
@@ -5029,22 +4880,14 @@ leave_loop:
 	ibuf_mtr_commit(&mtr);
 	btr_pcur_close(&pcur);
 
-#ifdef HAVE_ATOMIC_BUILTINS
 	ibuf_add_ops(ibuf->n_discarded_ops, dops);
-#else /* HAVE_ATOMIC_BUILTINS */
-	/* Protect our statistics keeping from race conditions */
-	mutex_enter(&ibuf_mutex);
-	ibuf_add_ops(ibuf->n_discarded_ops, dops);
-	mutex_exit(&ibuf_mutex);
-#endif /* HAVE_ATOMIC_BUILTINS */
 
 	mem_heap_free(heap);
 }
 
 /******************************************************************//**
 Looks if the insert buffer is empty.
-@return	true if empty */
-UNIV_INTERN
+@return true if empty */
 bool
 ibuf_is_empty(void)
 /*===============*/
@@ -5068,7 +4911,6 @@ ibuf_is_empty(void)
 
 /******************************************************************//**
 Prints info of ibuf. */
-UNIV_INTERN
 void
 ibuf_print(
 /*=======*/
@@ -5098,7 +4940,7 @@ ibuf_print(
 #ifdef UNIV_IBUF_COUNT_DEBUG
 	for (i = 0; i < IBUF_COUNT_N_SPACES; i++) {
 		for (j = 0; j < IBUF_COUNT_N_PAGES; j++) {
-			ulint	count = ibuf_count_get(i, j);
+			ulint	count = ibuf_count_get(page_id_t(i, j, 0));
 
 			if (count > 0) {
 				fprintf(stderr,
@@ -5116,24 +4958,23 @@ ibuf_print(
 /******************************************************************//**
 Checks the insert buffer bitmaps on IMPORT TABLESPACE.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 ibuf_check_bitmap_on_import(
 /*========================*/
 	const trx_t*	trx,		/*!< in: transaction */
 	ulint		space_id)	/*!< in: tablespace identifier */
 {
-	ulint	zip_size;
-	ulint	page_size;
 	ulint	size;
 	ulint	page_no;
 
 	ut_ad(space_id);
 	ut_ad(trx->mysql_thd);
 
-	zip_size = fil_space_get_zip_size(space_id);
+	bool			found;
+	const page_size_t&	page_size
+		= fil_space_get_page_size(space_id, &found);
 
-	if (zip_size == ULINT_UNDEFINED) {
+	if (!found) {
 		return(DB_TABLE_NOT_FOUND);
 	}
 
@@ -5145,9 +4986,13 @@ ibuf_check_bitmap_on_import(
 
 	mutex_enter(&ibuf_mutex);
 
-	page_size = zip_size ? zip_size : UNIV_PAGE_SIZE;
+	/* The two bitmap pages (allocation bitmap and ibuf bitmap) repeat
+	every page_size pages. For example if page_size is 16 KiB, then the
+	two bitmap pages repeat every 16 KiB * 16384 = 256 MiB. In the loop
+	below page_no is measured in number of pages since the beginning of
+	the space, as usual. */
 
-	for (page_no = 0; page_no < size; page_no += page_size) {
+	for (page_no = 0; page_no < size; page_no += page_size.physical()) {
 		mtr_t	mtr;
 		page_t*	bitmap_page;
 		ulint	i;
@@ -5164,14 +5009,40 @@ ibuf_check_bitmap_on_import(
 		ibuf_enter(&mtr);
 
 		bitmap_page = ibuf_bitmap_get_map_page(
-			space_id, page_no, zip_size, &mtr);
+			page_id_t(space_id, page_no), page_size, &mtr);
+
+		if (buf_page_is_zeroes(bitmap_page, page_size)) {
+			/* This means we got all-zero page instead of
+			ibuf bitmap page. The subsequent page should be
+			all-zero pages. */
+#ifdef UNIV_DEBUG
+			for (ulint curr_page = page_no + 1;
+			     curr_page < page_size.physical(); curr_page++) {
+
+				buf_block_t* block = buf_page_get(
+						page_id_t(space_id, curr_page),
+						page_size,
+						RW_S_LATCH, &mtr);
+	                        page_t*	page = buf_block_get_frame(block);
+				ut_ad(buf_page_is_zeroes(page, page_size));
+			}
+#endif /* UNIV_DEBUG */
+			ibuf_exit(&mtr);
+			mtr_commit(&mtr);
+			continue;
+		}
+
+		for (i = FSP_IBUF_BITMAP_OFFSET + 1;
+		     i < page_size.physical();
+		     i++) {
 
-		for (i = FSP_IBUF_BITMAP_OFFSET + 1; i < page_size; i++) {
 			const ulint	offset = page_no + i;
 
+			const page_id_t	cur_page_id(space_id, offset);
+
 			if (ibuf_bitmap_page_get_bits(
-				    bitmap_page, offset, zip_size,
-				    IBUF_BITMAP_IBUF, &mtr)) {
+					bitmap_page, cur_page_id, page_size,
+					IBUF_BITMAP_IBUF, &mtr)) {
 
 				mutex_exit(&ibuf_mutex);
 				ibuf_exit(&mtr);
@@ -5190,7 +5061,7 @@ ibuf_check_bitmap_on_import(
 			}
 
 			if (ibuf_bitmap_page_get_bits(
-				    bitmap_page, offset, zip_size,
+				    bitmap_page, cur_page_id, page_size,
 				    IBUF_BITMAP_BUFFERED, &mtr)) {
 
 				ib_errf(trx->mysql_thd,
@@ -5205,7 +5076,7 @@ ibuf_check_bitmap_on_import(
 				slightly corrupted tables can be
 				imported and dumped.  Clear the bit. */
 				ibuf_bitmap_page_set_bits(
-					bitmap_page, offset, zip_size,
+					bitmap_page, cur_page_id, page_size,
 					IBUF_BITMAP_BUFFERED, FALSE, &mtr);
 			}
 		}
@@ -5217,4 +5088,39 @@ ibuf_check_bitmap_on_import(
 	mutex_exit(&ibuf_mutex);
 	return(DB_SUCCESS);
 }
+
+/** Updates free bits and buffered bits for bulk loaded page.
+@param[in]	block	index page
+@param[in]	reset	flag if reset free val */
+void
+ibuf_set_bitmap_for_bulk_load(
+	buf_block_t*	block,
+	bool		reset)
+{
+	page_t*	bitmap_page;
+	mtr_t	mtr;
+	ulint	free_val;
+
+	ut_a(page_is_leaf(buf_block_get_frame(block)));
+
+	free_val = ibuf_index_page_calc_free(block);
+
+	mtr_start(&mtr);
+	mtr.set_named_space(block->page.id.space());
+
+	bitmap_page = ibuf_bitmap_get_map_page(block->page.id,
+                                               block->page.size, &mtr);
+
+	free_val = reset ? 0 : ibuf_index_page_calc_free(block);
+	ibuf_bitmap_page_set_bits(
+		bitmap_page, block->page.id, block->page.size,
+		IBUF_BITMAP_FREE, free_val, &mtr);
+
+	ibuf_bitmap_page_set_bits(
+		bitmap_page, block->page.id, block->page.size,
+		IBUF_BITMAP_BUFFERED, FALSE, &mtr);
+
+	mtr_commit(&mtr);
+}
+
 #endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/api0api.h b/storage/innobase/include/api0api.h
index 500bf4fe3b2..ec02febee74 100644
--- a/storage/innobase/include/api0api.h
+++ b/storage/innobase/include/api0api.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2011, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, 2016, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -30,16 +30,11 @@ InnoDB Native API
 #include "db0err.h"
 #include <stdio.h>
 
-#ifdef _MSC_VER
-#define strncasecmp		_strnicmp
-#define strcasecmp		_stricmp
-#endif
-
-#if defined(__GNUC__) && (__GNUC__ > 2) && ! defined(__INTEL_COMPILER)
+#if defined(__GNUC__)
 #define UNIV_NO_IGNORE		MY_ATTRIBUTE ((warn_unused_result))
 #else
 #define UNIV_NO_IGNORE
-#endif /* __GNUC__ && __GNUC__ > 2 && !__INTEL_COMPILER */
+#endif /* __GNUC__ */
 
 /* See comment about ib_bool_t as to why the two macros are unsigned long. */
 /** The boolean value of "true" used internally within InnoDB */
@@ -53,7 +48,11 @@ typedef enum dberr_t		ib_err_t;
 /** Representation of a byte within InnoDB */
 typedef unsigned char		ib_byte_t;
 /** Representation of an unsigned long int within InnoDB */
+#ifdef _WIN64
+typedef unsigned __int64	ib_ulint_t;
+#else
 typedef unsigned long int	ib_ulint_t;
+#endif /* _WIN64 */
 
 /* We assume C99 support except when using VisualStudio. */
 #if !defined(_MSC_VER)
@@ -324,25 +323,6 @@ typedef struct {
 	ib_charset_t*	charset;	/*!< Column charset */
 } ib_col_meta_t;
 
-/* Note: Must be in sync with trx0trx.h */
-/** @enum ib_trx_state_t The transaction state can be queried using the
-ib_trx_state() function. The InnoDB deadlock monitor can roll back a
-transaction and users should be prepared for this, especially where there
-is high contention. The way to determine the state of the transaction is to
-query it's state and check. */
-typedef enum {
-	IB_TRX_NOT_STARTED,		/*!< Has not started yet, the
-					transaction has not ben started yet.*/
-
-	IB_TRX_ACTIVE,			/*!< The transaction is currently
-					active and needs to be either
-					committed or rolled back. */
-
-	IB_TRX_COMMITTED_IN_MEMORY,	/*!< Not committed to disk yet */
-
-	IB_TRX_PREPARED			/*!< Support for 2PC/XA */
-} ib_trx_state_t;
-
 /* Note: Must be in sync with trx0trx.h */
 /** @enum ib_trx_level_t Transaction isolation levels */
 typedef enum {
@@ -416,11 +396,11 @@ typedef struct ib_cursor_t* ib_crsr_t;
 This function is used to compare two data fields for which the data type
 is such that we must use the client code to compare them.
 
-@param col_meta		column meta data
-@param p1		key
+@param col_meta column meta data
+@param p1 key
 @oaram p1_len		key length
-@param p2		second key
-@param p2_len		second key length
+@param p2 second key
+@param p2_len second key length
 @return 1, 0, -1, if a is greater, equal, less than b, respectively */
 
 typedef int (*ib_client_cmp_t)(
@@ -433,18 +413,6 @@ typedef int (*ib_client_cmp_t)(
 /* This should be the same as univ.i */
 /** Represents SQL_NULL length */
 #define	IB_SQL_NULL		0xFFFFFFFF
-/** The number of system columns in a row. */
-#define IB_N_SYS_COLS		3
-
-/** The maximum length of a text column. */
-#define MAX_TEXT_LEN		4096
-
-/* MySQL uses 3 byte UTF-8 encoding. */
-/** The maximum length of a column name in a table schema. */
-#define IB_MAX_COL_NAME_LEN	(64 * 3)
-
-/** The maximum length of a table name (plus database name). */
-#define IB_MAX_TABLE_NAME_LEN	(64 * 3) * 2
 
 /*****************************************************************//**
 Start a transaction that's been rolled back. This special function
@@ -453,8 +421,7 @@ a transaction. While the transaction has been rolled back the handle
 is still valid and can be reused by calling this function. If you
 don't want to reuse the transaction handle then you can free the handle
 by calling ib_trx_release().
-@return	innobase txn handle */
-
+@return innobase txn handle */
 ib_err_t
 ib_trx_start(
 /*=========*/
@@ -469,8 +436,7 @@ ib_trx_start(
 /*****************************************************************//**
 Begin a transaction. This will allocate a new transaction handle and
 put the transaction in the active state.
-@return	innobase txn handle */
-
+@return innobase txn handle */
 ib_trx_t
 ib_trx_begin(
 /*=========*/
@@ -480,21 +446,6 @@ ib_trx_begin(
 	ib_bool_t	auto_commit);	/*!< in: auto commit after each
 					single DML */
 
-/*****************************************************************//**
-Query the transaction's state. This function can be used to check for
-the state of the transaction in case it has been rolled back by the
-InnoDB deadlock detector. Note that when a transaction is selected as
-a victim for rollback, InnoDB will always return an appropriate error
-code indicating this. @see DB_DEADLOCK, @see DB_LOCK_TABLE_FULL and
-@see DB_LOCK_WAIT_TIMEOUT
-@return	transaction state */
-
-ib_trx_state_t
-ib_trx_state(
-/*=========*/
-	ib_trx_t	ib_trx);	/*!< in: trx handle */
-
-
 /*****************************************************************//**
 Check if the transaction is read_only */
 ib_u32_t
@@ -506,8 +457,7 @@ ib_trx_read_only(
 Release the resources of the transaction. If the transaction was
 selected as a victim by InnoDB and rolled back then use this function
 to free the transaction handle.
-@return	DB_SUCCESS or err code */
-
+@return DB_SUCCESS or err code */
 ib_err_t
 ib_trx_release(
 /*===========*/
@@ -516,8 +466,7 @@ ib_trx_release(
 /*****************************************************************//**
 Commit a transaction. This function will release the schema latches too.
 It will also free the transaction handle.
-@return	DB_SUCCESS or err code */
-
+@return DB_SUCCESS or err code */
 ib_err_t
 ib_trx_commit(
 /*==========*/
@@ -526,8 +475,7 @@ ib_trx_commit(
 /*****************************************************************//**
 Rollback a transaction. This function will release the schema latches too.
 It will also free the transaction handle.
-@return	DB_SUCCESS or err code */
-
+@return DB_SUCCESS or err code */
 ib_err_t
 ib_trx_rollback(
 /*============*/
@@ -535,8 +483,7 @@ ib_trx_rollback(
 
 /*****************************************************************//**
 Open an InnoDB table and return a cursor handle to it.
-@return	DB_SUCCESS or err code */
-
+@return DB_SUCCESS or err code */
 ib_err_t
 ib_cursor_open_table_using_id(
 /*==========================*/
@@ -545,22 +492,9 @@ ib_cursor_open_table_using_id(
 					can be NULL */
 	ib_crsr_t*	ib_crsr);	/*!< out,own: InnoDB cursor */
 
-/*****************************************************************//**
-Open an InnoDB index and return a cursor handle to it.
-@return	DB_SUCCESS or err code */
-
-ib_err_t
-ib_cursor_open_index_using_id(
-/*==========================*/
-	ib_id_u64_t	index_id,	/*!< in: index id of index to open */
-	ib_trx_t	ib_trx,		/*!< in: Current transaction handle
-					can be NULL */
-	ib_crsr_t*	ib_crsr);	/*!< out: InnoDB cursor */
-
 /*****************************************************************//**
 Open an InnoDB secondary index cursor and return a cursor handle to it.
 @return DB_SUCCESS or err code */
-
 ib_err_t
 ib_cursor_open_index_using_name(
 /*============================*/
@@ -572,8 +506,7 @@ ib_cursor_open_index_using_name(
 
 /*****************************************************************//**
 Open an InnoDB table by name and return a cursor handle to it.
-@return	DB_SUCCESS or err code */
-
+@return DB_SUCCESS or err code */
 ib_err_t
 ib_cursor_open_table(
 /*=================*/
@@ -584,26 +517,15 @@ ib_cursor_open_table(
 
 /*****************************************************************//**
 Reset the cursor.
-@return	DB_SUCCESS or err code */
-
+@return DB_SUCCESS or err code */
 ib_err_t
 ib_cursor_reset(
 /*============*/
 	ib_crsr_t	ib_crsr);	/*!< in/out: InnoDB cursor */
 
-
-/*****************************************************************//**
-set a cursor trx to NULL*/
-
-void
-ib_cursor_clear_trx(
-/*================*/
-	ib_crsr_t	ib_crsr);	/*!< in/out: InnoDB cursor */
-
 /*****************************************************************//**
 Close an InnoDB table and free the cursor.
-@return	DB_SUCCESS or err code */
-
+@return DB_SUCCESS or err code */
 ib_err_t
 ib_cursor_close(
 /*============*/
@@ -612,7 +534,6 @@ ib_cursor_close(
 /*****************************************************************//**
 Close the table, decrement n_ref_count count.
 @return DB_SUCCESS or err code */
-
 ib_err_t
 ib_cursor_close_table(
 /*==================*/
@@ -621,7 +542,6 @@ ib_cursor_close_table(
 /*****************************************************************//**
 update the cursor with new transactions and also reset the cursor
 @return DB_SUCCESS or err code */
-
 ib_err_t
 ib_cursor_new_trx(
 /*==============*/
@@ -631,26 +551,15 @@ ib_cursor_new_trx(
 /*****************************************************************//**
 Commit the transaction in a cursor
 @return DB_SUCCESS or err code */
-
 ib_err_t
 ib_cursor_commit_trx(
 /*=================*/
 	ib_crsr_t	ib_crsr,	/*!< in/out: InnoDB cursor */
 	ib_trx_t	ib_trx);	/*!< in: transaction */
 
-/********************************************************************//**
-Open a table using the table name, if found then increment table ref count.
-@return table instance if found */
-
-void*
-ib_open_table_by_name(
-/*==================*/
-	const char*	name);		/*!< in: table name to lookup */
-
 /*****************************************************************//**
 Insert a row to a table.
-@return	DB_SUCCESS or err code */
-
+@return DB_SUCCESS or err code */
 ib_err_t
 ib_cursor_insert_row(
 /*=================*/
@@ -659,8 +568,7 @@ ib_cursor_insert_row(
 
 /*****************************************************************//**
 Update a row in a table.
-@return	DB_SUCCESS or err code */
-
+@return DB_SUCCESS or err code */
 ib_err_t
 ib_cursor_update_row(
 /*=================*/
@@ -670,8 +578,7 @@ ib_cursor_update_row(
 
 /*****************************************************************//**
 Delete a row in a table.
-@return	DB_SUCCESS or err code */
-
+@return DB_SUCCESS or err code */
 ib_err_t
 ib_cursor_delete_row(
 /*=================*/
@@ -679,8 +586,7 @@ ib_cursor_delete_row(
 
 /*****************************************************************//**
 Read current row.
-@return	DB_SUCCESS or err code */
-
+@return DB_SUCCESS or err code */
 ib_err_t
 ib_cursor_read_row(
 /*===============*/
@@ -691,26 +597,15 @@ ib_cursor_read_row(
 
 /*****************************************************************//**
 Move cursor to the first record in the table.
-@return	DB_SUCCESS or err code */
-
+@return DB_SUCCESS or err code */
 ib_err_t
 ib_cursor_first(
 /*============*/
 	ib_crsr_t	ib_crsr);	/*!< in: InnoDB cursor instance */
 
-/*****************************************************************//**
-Move cursor to the last record in the table.
-@return	DB_SUCCESS or err code */
-
-ib_err_t
-ib_cursor_last(
-/*===========*/
-	ib_crsr_t	ib_crsr);	/*!< in: InnoDB cursor instance */
-
 /*****************************************************************//**
 Move cursor to the next record in the table.
-@return	DB_SUCCESS or err code */
-
+@return DB_SUCCESS or err code */
 ib_err_t
 ib_cursor_next(
 /*===========*/
@@ -718,8 +613,7 @@ ib_cursor_next(
 
 /*****************************************************************//**
 Search for key.
-@return	DB_SUCCESS or err code */
-
+@return DB_SUCCESS or err code */
 ib_err_t
 ib_cursor_moveto(
 /*=============*/
@@ -729,7 +623,6 @@ ib_cursor_moveto(
 
 /*****************************************************************//**
 Set the match mode for ib_cursor_move(). */
-
 void
 ib_cursor_set_match_mode(
 /*=====================*/
@@ -738,8 +631,7 @@ ib_cursor_set_match_mode(
 
 /*****************************************************************//**
 Set a column of the tuple. Make a copy using the tuple's heap.
-@return	DB_SUCCESS or error code */
-
+@return DB_SUCCESS or error code */
 ib_err_t
 ib_col_set_value(
 /*=============*/
@@ -752,8 +644,7 @@ ib_col_set_value(
 
 /*****************************************************************//**
 Get the size of the data available in the column the tuple.
-@return	bytes avail or IB_SQL_NULL */
-
+@return bytes avail or IB_SQL_NULL */
 ib_ulint_t
 ib_col_get_len(
 /*===========*/
@@ -762,8 +653,7 @@ ib_col_get_len(
 
 /*****************************************************************//**
 Copy a column value from the tuple.
-@return	bytes copied or IB_SQL_NULL */
-
+@return bytes copied or IB_SQL_NULL */
 ib_ulint_t
 ib_col_copy_value(
 /*==============*/
@@ -774,8 +664,7 @@ ib_col_copy_value(
 
 /*************************************************************//**
 Read a signed int 8 bit column from an InnoDB tuple.
-@return	DB_SUCCESS or error */
-
+@return DB_SUCCESS or error */
 ib_err_t
 ib_tuple_read_i8(
 /*=============*/
@@ -785,8 +674,7 @@ ib_tuple_read_i8(
 
 /*************************************************************//**
 Read an unsigned int 8 bit column from an InnoDB tuple.
-@return	DB_SUCCESS or error */
-
+@return DB_SUCCESS or error */
 ib_err_t
 ib_tuple_read_u8(
 /*=============*/
@@ -796,8 +684,7 @@ ib_tuple_read_u8(
 
 /*************************************************************//**
 Read a signed int 16 bit column from an InnoDB tuple.
-@return	DB_SUCCESS or error */
-
+@return DB_SUCCESS or error */
 ib_err_t
 ib_tuple_read_i16(
 /*==============*/
@@ -807,8 +694,7 @@ ib_tuple_read_i16(
 
 /*************************************************************//**
 Read an unsigned int 16 bit column from an InnoDB tuple.
-@return	DB_SUCCESS or error */
-
+@return DB_SUCCESS or error */
 ib_err_t
 ib_tuple_read_u16(
 /*==============*/
@@ -818,8 +704,7 @@ ib_tuple_read_u16(
 
 /*************************************************************//**
 Read a signed int 32 bit column from an InnoDB tuple.
-@return	DB_SUCCESS or error */
-
+@return DB_SUCCESS or error */
 ib_err_t
 ib_tuple_read_i32(
 /*==============*/
@@ -829,8 +714,7 @@ ib_tuple_read_i32(
 
 /*************************************************************//**
 Read an unsigned int 32 bit column from an InnoDB tuple.
-@return	DB_SUCCESS or error */
-
+@return DB_SUCCESS or error */
 ib_err_t
 ib_tuple_read_u32(
 /*==============*/
@@ -840,8 +724,7 @@ ib_tuple_read_u32(
 
 /*************************************************************//**
 Read a signed int 64 bit column from an InnoDB tuple.
-@return	DB_SUCCESS or error */
-
+@return DB_SUCCESS or error */
 ib_err_t
 ib_tuple_read_i64(
 /*==============*/
@@ -851,8 +734,7 @@ ib_tuple_read_i64(
 
 /*************************************************************//**
 Read an unsigned int 64 bit column from an InnoDB tuple.
-@return	DB_SUCCESS or error */
-
+@return DB_SUCCESS or error */
 ib_err_t
 ib_tuple_read_u64(
 /*==============*/
@@ -862,8 +744,7 @@ ib_tuple_read_u64(
 
 /*****************************************************************//**
 Get a column value pointer from the tuple.
-@return	NULL or pointer to buffer */
-
+@return NULL or pointer to buffer */
 const void*
 ib_col_get_value(
 /*=============*/
@@ -872,8 +753,7 @@ ib_col_get_value(
 
 /*****************************************************************//**
 Get a column type, length and attributes from the tuple.
-@return	len of column data */
-
+@return len of column data */
 ib_ulint_t
 ib_col_get_meta(
 /*============*/
@@ -883,8 +763,7 @@ ib_col_get_meta(
 
 /*****************************************************************//**
 "Clear" or reset an InnoDB tuple. We free the heap and recreate the tuple.
-@return	new tuple, or NULL */
-
+@return new tuple, or NULL */
 ib_tpl_t
 ib_tuple_clear(
 /*============*/
@@ -894,8 +773,7 @@ ib_tuple_clear(
 Create a new cluster key search tuple and copy the contents of  the
 secondary index key tuple columns that refer to the cluster index record
 to the cluster key. It does a deep copy of the column data.
-@return	DB_SUCCESS or error code */
-
+@return DB_SUCCESS or error code */
 ib_err_t
 ib_tuple_get_cluster_key(
 /*=====================*/
@@ -903,21 +781,9 @@ ib_tuple_get_cluster_key(
 	ib_tpl_t*	ib_dst_tpl,	/*!< out,own: destination tuple */
 	const ib_tpl_t	ib_src_tpl);	/*!< in: source tuple */
 
-/*****************************************************************//**
-Copy the contents of  source tuple to destination tuple. The tuples
-must be of the same type and belong to the same table/index.
-@return	DB_SUCCESS or error code */
-
-ib_err_t
-ib_tuple_copy(
-/*==========*/
-	ib_tpl_t	ib_dst_tpl,	/*!< in: destination tuple */
-	const ib_tpl_t	ib_src_tpl);	/*!< in: source tuple */
-
 /*****************************************************************//**
 Create an InnoDB tuple used for index/table search.
 @return tuple for current index */
-
 ib_tpl_t
 ib_sec_search_tuple_create(
 /*=======================*/
@@ -925,8 +791,7 @@ ib_sec_search_tuple_create(
 
 /*****************************************************************//**
 Create an InnoDB tuple used for index/table search.
-@return	tuple for current index */
-
+@return tuple for current index */
 ib_tpl_t
 ib_sec_read_tuple_create(
 /*=====================*/
@@ -934,8 +799,7 @@ ib_sec_read_tuple_create(
 
 /*****************************************************************//**
 Create an InnoDB tuple used for table key operations.
-@return	tuple for current table */
-
+@return tuple for current table */
 ib_tpl_t
 ib_clust_search_tuple_create(
 /*=========================*/
@@ -943,8 +807,7 @@ ib_clust_search_tuple_create(
 
 /*****************************************************************//**
 Create an InnoDB tuple for table row operations.
-@return	tuple for current table */
-
+@return tuple for current table */
 ib_tpl_t
 ib_clust_read_tuple_create(
 /*=======================*/
@@ -952,8 +815,7 @@ ib_clust_read_tuple_create(
 
 /*****************************************************************//**
 Return the number of user columns in the tuple definition.
-@return	number of user columns */
-
+@return number of user columns */
 ib_ulint_t
 ib_tuple_get_n_user_cols(
 /*=====================*/
@@ -961,8 +823,7 @@ ib_tuple_get_n_user_cols(
 
 /*****************************************************************//**
 Return the number of columns in the tuple definition.
-@return	number of columns */
-
+@return number of columns */
 ib_ulint_t
 ib_tuple_get_n_cols(
 /*================*/
@@ -970,7 +831,6 @@ ib_tuple_get_n_cols(
 
 /*****************************************************************//**
 Destroy an InnoDB tuple. */
-
 void
 ib_tuple_delete(
 /*============*/
@@ -979,8 +839,7 @@ ib_tuple_delete(
 /*****************************************************************//**
 Truncate a table. The cursor handle will be closed and set to NULL
 on success.
-@return	DB_SUCCESS or error code */
-
+@return DB_SUCCESS or error code */
 ib_err_t
 ib_cursor_truncate(
 /*===============*/
@@ -990,29 +849,16 @@ ib_cursor_truncate(
 
 /*****************************************************************//**
 Get a table id.
-@return	DB_SUCCESS if found */
-
+@return DB_SUCCESS if found */
 ib_err_t
 ib_table_get_id(
 /*============*/
 	const char*	table_name,	/*!< in: table to find */
 	ib_id_u64_t*	table_id);	/*!< out: table id if found */
 
-/*****************************************************************//**
-Get an index id.
-@return	DB_SUCCESS if found */
-
-ib_err_t
-ib_index_get_id(
-/*============*/
-	const char*	table_name,	/*!< in: find index for this table */
-	const char*	index_name,	/*!< in: index to find */
-	ib_id_u64_t*	index_id);	/*!< out: index id if found */
-
 /*****************************************************************//**
 Check if cursor is positioned.
-@return	IB_TRUE if positioned */
-
+@return IB_TRUE if positioned */
 ib_bool_t
 ib_cursor_is_positioned(
 /*====================*/
@@ -1022,7 +868,6 @@ ib_cursor_is_positioned(
 Checks if the data dictionary is latched in exclusive mode by a
 user transaction.
 @return TRUE if exclusive latch */
-
 ib_bool_t
 ib_schema_lock_is_exclusive(
 /*========================*/
@@ -1030,8 +875,7 @@ ib_schema_lock_is_exclusive(
 
 /*****************************************************************//**
 Lock an InnoDB cursor/table.
-@return	DB_SUCCESS or error code */
-
+@return DB_SUCCESS or error code */
 ib_err_t
 ib_cursor_lock(
 /*===========*/
@@ -1040,8 +884,7 @@ ib_cursor_lock(
 
 /*****************************************************************//**
 Set the Lock an InnoDB table using the table id.
-@return	DB_SUCCESS or error code */
-
+@return DB_SUCCESS or error code */
 ib_err_t
 ib_table_lock(
 /*===========*/
@@ -1051,8 +894,7 @@ ib_table_lock(
 
 /*****************************************************************//**
 Set the Lock mode of the cursor.
-@return	DB_SUCCESS or error code */
-
+@return DB_SUCCESS or error code */
 ib_err_t
 ib_cursor_set_lock_mode(
 /*====================*/
@@ -1061,111 +903,13 @@ ib_cursor_set_lock_mode(
 
 /*****************************************************************//**
 Set need to access clustered index record flag. */
-
 void
 ib_cursor_set_cluster_access(
 /*=========================*/
 	ib_crsr_t	ib_crsr);	/*!< in/out: InnoDB cursor */
 
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
-@return	DB_SUCESS or error */
-
-ib_err_t
-ib_tuple_write_i8(
-/*==============*/
-	ib_tpl_t	ib_tpl,		/*!< in/out: tuple to write to */
-	int		col_no,		/*!< in: column number */
-	ib_i8_t		val);		/*!< in: value to write */
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
-@return	DB_SUCESS or error */
-
-ib_err_t
-ib_tuple_write_i16(
-/*=================*/
-	ib_tpl_t	ib_tpl,		/*!< in/out: tuple to write to */
-	int		col_no,		/*!< in: column number */
-	ib_i16_t	val);		/*!< in: value to write */
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
-@return	DB_SUCESS or error */
-
-ib_err_t
-ib_tuple_write_i32(
-/*===============*/
-	ib_tpl_t	ib_tpl,		/*!< in/out: tuple to write to */
-	int		col_no,		/*!< in: column number */
-	ib_i32_t	val);		/*!< in: value to write */
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
-@return	DB_SUCESS or error */
-
-ib_err_t
-ib_tuple_write_i64(
-/*===============*/
-	ib_tpl_t	ib_tpl,		/*!< in/out: tuple to write to */
-	int		col_no,		/*!< in: column number */
-	ib_i64_t	val);		/*!< in: value to write */
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
-@return	DB_SUCESS or error */
-
-ib_err_t
-ib_tuple_write_u8(
-/*==============*/
-	ib_tpl_t	ib_tpl,		/*!< in/out: tuple to write to */
-	int		col_no,		/*!< in: column number */
-	ib_u8_t		val);		/*!< in: value to write */
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
-@return	DB_SUCESS or error */
-
-ib_err_t
-ib_tuple_write_u16(
-/*===============*/
-	ib_tpl_t	ib_tpl,		/*!< in/out: tuple to write to */
-	int		col_no,		/*!< in: column number */
-	ib_u16_t	val);		/*!< in: value to write */
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
-@return	DB_SUCESS or error */
-
-ib_err_t
-ib_tuple_write_u32(
-/*=================*/
-	ib_tpl_t	ib_tpl,		/*!< in/out: tuple to write to */
-	int		col_no,		/*!< in: column number */
-	ib_u32_t	val);		/*!< in: value to write */
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
-@return	DB_SUCESS or error */
-
-ib_err_t
-ib_tuple_write_u64(
-/*===============*/
-	ib_tpl_t	ib_tpl,		/*!< in/out: tuple to write to */
-	int		col_no,		/*!< in: column number */
-	ib_u64_t	val);		/*!< in: value to write */
-
 /*****************************************************************//**
 Inform the cursor that it's the start of an SQL statement. */
-
 void
 ib_cursor_stmt_begin(
 /*=================*/
@@ -1173,8 +917,7 @@ ib_cursor_stmt_begin(
 
 /*****************************************************************//**
 Write a double value to a column.
-@return	DB_SUCCESS or error */
-
+@return DB_SUCCESS or error */
 ib_err_t
 ib_tuple_write_double(
 /*==================*/
@@ -1184,8 +927,7 @@ ib_tuple_write_double(
 
 /*************************************************************//**
 Read a double column value from an InnoDB tuple.
-@return	DB_SUCCESS or error */
-
+@return DB_SUCCESS or error */
 ib_err_t
 ib_tuple_read_double(
 /*=================*/
@@ -1195,8 +937,7 @@ ib_tuple_read_double(
 
 /*****************************************************************//**
 Write a float value to a column.
-@return	DB_SUCCESS or error */
-
+@return DB_SUCCESS or error */
 ib_err_t
 ib_tuple_write_float(
 /*=================*/
@@ -1206,8 +947,7 @@ ib_tuple_write_float(
 
 /*************************************************************//**
 Read a float value from an InnoDB tuple.
-@return	DB_SUCCESS or error */
-
+@return DB_SUCCESS or error */
 ib_err_t
 ib_tuple_read_float(
 /*================*/
@@ -1218,7 +958,6 @@ ib_tuple_read_float(
 /*****************************************************************//**
 Get a column type, length and attributes from the tuple.
 @return len of column data */
-
 const char*
 ib_col_get_name(
 /*============*/
@@ -1228,7 +967,6 @@ ib_col_get_name(
 /*****************************************************************//**
 Get an index field name from the cursor.
 @return name of the field */
-
 const char*
 ib_get_idx_field_name(
 /*==================*/
@@ -1238,28 +976,15 @@ ib_get_idx_field_name(
 /*****************************************************************//**
 Truncate a table.
 @return DB_SUCCESS or error code */
-
 ib_err_t
 ib_table_truncate(
 /*==============*/
 	const char*	table_name,	/*!< in: table name */
 	ib_id_u64_t*	table_id);	/*!< out: new table id */
 
-/*****************************************************************//**
-Frees a possible InnoDB trx object associated with the current THD.
-@return DB_SUCCESS or error number */
-
-ib_err_t
-ib_close_thd(
-/*=========*/
-	void*		thd);		/*!< in: handle to the MySQL
-					thread of the user whose resources
-					should be free'd */
-
 /*****************************************************************//**
 Get generic configure status
 @return configure status*/
-
 int
 ib_cfg_get_cfg();
 /*============*/
@@ -1274,28 +999,16 @@ ib_cursor_set_memcached_sync(
 	ib_crsr_t	ib_crsr,	/*!< in: cursor */
 	ib_bool_t	flag);		/*!< in: true for increasing */
 
-/*****************************************************************//**
-Check whether the table name conforms to our requirements. Currently
-we only do a simple check for the presence of a '/'.
-@return DB_SUCCESS or err code */
-
-ib_err_t
-ib_table_name_check(
-/*================*/
-	const char*	name);		/*!< in: table name to check */
-
 /*****************************************************************//**
 Return isolation configuration set by "innodb_api_trx_level"
 @return trx isolation level*/
-
-ib_trx_state_t
+ib_trx_level_t
 ib_cfg_trx_level();
 /*==============*/
 
 /*****************************************************************//**
 Return configure value for background commit interval (in seconds)
 @return background commit interval (in seconds) */
-
 ib_ulint_t
 ib_cfg_bk_commit_interval();
 /*=======================*/
@@ -1303,10 +1016,25 @@ ib_cfg_bk_commit_interval();
 /*****************************************************************//**
 Get a trx start time.
 @return trx start_time */
-
 ib_u64_t
 ib_trx_get_start_time(
 /*==================*/
 	ib_trx_t	ib_trx);	/*!< in: transaction */
 
+/*****************************************************************//**
+Wrapper of ut_strerr() which converts an InnoDB error number to a
+human readable text message.
+@return string, describing the error */
+const char*
+ib_ut_strerr(
+/*=========*/
+	ib_err_t	num);		/*!< in: error number */
+
+/** Check the table whether it contains virtual columns.
+@param[in]	crsr	InnoDB Cursor
+@return true if table contains virtual column else false. */
+ib_bool_t
+ib_is_virtual_table(
+        ib_crsr_t	crsr);
+
 #endif /* api0api_h */
diff --git a/storage/innobase/include/api0misc.h b/storage/innobase/include/api0misc.h
index fcd748390d1..84ac3d622a9 100644
--- a/storage/innobase/include/api0misc.h
+++ b/storage/innobase/include/api0misc.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2008, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2008, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -49,9 +49,8 @@ extern ulong			ib_bk_commit_interval;
 
 /********************************************************************
 Handles user errors and lock waits detected by the database engine.
-@return	TRUE if it was a lock wait and we should continue running
+@return TRUE if it was a lock wait and we should continue running
 the query thread */
-UNIV_INTERN
 ibool
 ib_handle_errors(
 /*=============*/
@@ -66,8 +65,7 @@ ib_handle_errors(
 
 /*************************************************************************
 Sets a lock on a table.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
 dberr_t
 ib_trx_lock_table_with_retry(
 /*=========================*/
diff --git a/storage/innobase/include/btr0btr.h b/storage/innobase/include/btr0btr.h
index bf3f4a76301..c177f23824f 100644
--- a/storage/innobase/include/btr0btr.h
+++ b/storage/innobase/include/btr0btr.h
@@ -35,6 +35,7 @@ Created 6/2/1994 Heikki Tuuri
 #include "page0cur.h"
 #include "mtr0mtr.h"
 #include "btr0types.h"
+#include "gis0type.h"
 
 #ifndef UNIV_HOTBACKUP
 /** Maximum record size which can be stored on a page, without using the
@@ -67,7 +68,11 @@ enum btr_latch_mode {
 	/** Search the previous record. */
 	BTR_SEARCH_PREV = 35,
 	/** Modify the previous record. */
-	BTR_MODIFY_PREV = 36
+	BTR_MODIFY_PREV = 36,
+	/** Start searching the entire B-tree. */
+	BTR_SEARCH_TREE = 37,
+	/** Continue searching the entire B-tree. */
+	BTR_CONT_SEARCH_TREE = 38
 };
 
 /* BTR_INSERT, BTR_DELETE and BTR_DELETE_MARK are mutually exclusive. */
@@ -98,18 +103,47 @@ buffer when the record is not in the buffer pool. */
 already holding an S latch on the index tree */
 #define BTR_ALREADY_S_LATCHED	16384
 
+/** In the case of BTR_MODIFY_TREE, the caller specifies the intention
+to insert record only. It is used to optimize block->lock range.*/
+#define BTR_LATCH_FOR_INSERT	32768
+
+/** In the case of BTR_MODIFY_TREE, the caller specifies the intention
+to delete record only. It is used to optimize block->lock range.*/
+#define BTR_LATCH_FOR_DELETE	65536
+
+/** This flag is for undo insert of rtree. For rtree, we need this flag
+to find proper rec to undo insert.*/
+#define BTR_RTREE_UNDO_INS	131072
+
+/** In the case of BTR_MODIFY_LEAF, the caller intends to allocate or
+free the pages of externally stored fields. */
+#define BTR_MODIFY_EXTERNAL	262144
+
+/** Try to delete mark the record at the searched position when the
+record is in spatial index */
+#define BTR_RTREE_DELETE_MARK	524288
+
 #define BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode)	\
 	((latch_mode) & ~(BTR_INSERT			\
 			  | BTR_DELETE_MARK		\
+			  | BTR_RTREE_UNDO_INS		\
+			  | BTR_RTREE_DELETE_MARK	\
 			  | BTR_DELETE			\
 			  | BTR_ESTIMATE		\
 			  | BTR_IGNORE_SEC_UNIQUE	\
-			  | BTR_ALREADY_S_LATCHED))
+			  | BTR_ALREADY_S_LATCHED	\
+			  | BTR_LATCH_FOR_INSERT	\
+			  | BTR_LATCH_FOR_DELETE	\
+			  | BTR_MODIFY_EXTERNAL))
+
+#define BTR_LATCH_MODE_WITHOUT_INTENTION(latch_mode)	\
+	((latch_mode) & ~(BTR_LATCH_FOR_INSERT		\
+			  | BTR_LATCH_FOR_DELETE	\
+			  | BTR_MODIFY_EXTERNAL))
 #endif /* UNIV_HOTBACKUP */
 
 /**************************************************************//**
 Report that an index page is corrupted. */
-UNIV_INTERN
 void
 btr_corruption_report(
 /*==================*/
@@ -128,95 +162,9 @@ btr_corruption_report(
 	}
 
 #ifndef UNIV_HOTBACKUP
-#ifdef UNIV_BLOB_DEBUG
-# include "ut0rbt.h"
-/** An index->blobs entry for keeping track of off-page column references */
-struct btr_blob_dbg_t
-{
-	unsigned	blob_page_no:32;	/*!< first BLOB page number */
-	unsigned	ref_page_no:32;		/*!< referring page number */
-	unsigned	ref_heap_no:16;		/*!< referring heap number */
-	unsigned	ref_field_no:10;	/*!< referring field number */
-	unsigned	owner:1;		/*!< TRUE if BLOB owner */
-	unsigned	always_owner:1;		/*!< TRUE if always
-						has been the BLOB owner;
-						reset to TRUE on B-tree
-						page splits and merges */
-	unsigned	del:1;			/*!< TRUE if currently
-						delete-marked */
-};
-
 /**************************************************************//**
-Add a reference to an off-page column to the index->blobs map. */
-UNIV_INTERN
-void
-btr_blob_dbg_add_blob(
-/*==================*/
-	const rec_t*	rec,		/*!< in: clustered index record */
-	ulint		field_no,	/*!< in: number of off-page column */
-	ulint		page_no,	/*!< in: start page of the column */
-	dict_index_t*	index,		/*!< in/out: index tree */
-	const char*	ctx)		/*!< in: context (for logging) */
-	MY_ATTRIBUTE((nonnull));
-/**************************************************************//**
-Display the references to off-page columns.
-This function is to be called from a debugger,
-for example when a breakpoint on ut_dbg_assertion_failed is hit. */
-UNIV_INTERN
-void
-btr_blob_dbg_print(
-/*===============*/
-	const dict_index_t*	index)	/*!< in: index tree */
-	MY_ATTRIBUTE((nonnull));
-/**************************************************************//**
-Check that there are no references to off-page columns from or to
-the given page. Invoked when freeing or clearing a page.
-@return TRUE when no orphan references exist */
-UNIV_INTERN
-ibool
-btr_blob_dbg_is_empty(
-/*==================*/
-	dict_index_t*	index,		/*!< in: index */
-	ulint		page_no)	/*!< in: page number */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-/**************************************************************//**
-Modify the 'deleted' flag of a record. */
-UNIV_INTERN
-void
-btr_blob_dbg_set_deleted_flag(
-/*==========================*/
-	const rec_t*		rec,	/*!< in: record */
-	dict_index_t*		index,	/*!< in/out: index */
-	const ulint*		offsets,/*!< in: rec_get_offs(rec, index) */
-	ibool			del)	/*!< in: TRUE=deleted, FALSE=exists */
-	MY_ATTRIBUTE((nonnull));
-/**************************************************************//**
-Change the ownership of an off-page column. */
-UNIV_INTERN
-void
-btr_blob_dbg_owner(
-/*===============*/
-	const rec_t*		rec,	/*!< in: record */
-	dict_index_t*		index,	/*!< in/out: index */
-	const ulint*		offsets,/*!< in: rec_get_offs(rec, index) */
-	ulint			i,	/*!< in: ith field in rec */
-	ibool			own)	/*!< in: TRUE=owned, FALSE=disowned */
-	MY_ATTRIBUTE((nonnull));
-/** Assert that there are no BLOB references to or from the given page. */
-# define btr_blob_dbg_assert_empty(index, page_no)	\
-	ut_a(btr_blob_dbg_is_empty(index, page_no))
-#else /* UNIV_BLOB_DEBUG */
-# define btr_blob_dbg_add_blob(rec, field_no, page, index, ctx)	((void) 0)
-# define btr_blob_dbg_set_deleted_flag(rec, index, offsets, del)((void) 0)
-# define btr_blob_dbg_owner(rec, index, offsets, i, val)	((void) 0)
-# define btr_blob_dbg_assert_empty(index, page_no)		((void) 0)
-#endif /* UNIV_BLOB_DEBUG */
-
-/**************************************************************//**
-Gets the root node of a tree and x-latches it.
-@return	root page, x-latched */
-UNIV_INTERN
+Gets the root node of a tree and sx-latches it for segment access.
+@return root page, sx-latched */
 page_t*
 btr_root_get(
 /*=========*/
@@ -227,142 +175,117 @@ btr_root_get(
 /**************************************************************//**
 Checks and adjusts the root node of a tree during IMPORT TABLESPACE.
 @return error code, or DB_SUCCESS */
-UNIV_INTERN
 dberr_t
 btr_root_adjust_on_import(
 /*======================*/
 	const dict_index_t*	index)	/*!< in: index tree */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 
 /**************************************************************//**
 Gets the height of the B-tree (the level of the root, when the leaf
 level is assumed to be 0). The caller must hold an S or X latch on
 the index.
-@return	tree height (level of the root) */
-UNIV_INTERN
+@return tree height (level of the root) */
 ulint
 btr_height_get(
 /*===========*/
 	dict_index_t*	index,	/*!< in: index tree */
 	mtr_t*		mtr)	/*!< in/out: mini-transaction */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
-/**************************************************************//**
-Gets a buffer page and declares its latching order level. */
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Gets a buffer page and declares its latching order level.
+@param[in]	page_id	page id
+@param[in]	mode	latch mode
+@param[in]	file	file name
+@param[in]	line	line where called
+@param[in]	index	index tree, may be NULL if it is not an insert buffer
+tree
+@param[in,out]	mtr	mini-transaction
+@return block */
 UNIV_INLINE
 buf_block_t*
 btr_block_get_func(
-/*===============*/
-	ulint		space,		/*!< in: space id */
-	ulint		zip_size,	/*!< in: compressed page size in bytes
-					or 0 for uncompressed pages */
-	ulint		page_no,	/*!< in: page number */
-	ulint		mode,		/*!< in: latch mode */
-	const char*	file,		/*!< in: file name */
-	ulint		line,		/*!< in: line where called */
-	dict_index_t*	index,		/*!< in: index tree, may be NULL
-					if it is not an insert buffer tree */
-	mtr_t*		mtr);		/*!< in/out: mini-transaction */
-# ifdef UNIV_SYNC_DEBUG
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	ulint			mode,
+	const char*		file,
+	ulint			line,
+	dict_index_t*		index,
+	mtr_t*			mtr);
+
+# ifdef UNIV_DEBUG
 /** Gets a buffer page and declares its latching order level.
-@param space	tablespace identifier
-@param zip_size	compressed page size in bytes or 0 for uncompressed pages
-@param page_no	page number
-@param mode	latch mode
-@param index	index tree, may be NULL if not the insert buffer tree
-@param mtr	mini-transaction handle
+@param page_id tablespace/page identifier
+@param page_size page size
+@param mode latch mode
+@param index index tree, may be NULL if not the insert buffer tree
+@param mtr mini-transaction handle
 @return the block descriptor */
-#  define btr_block_get(space,zip_size,page_no,mode,index,mtr)	\
-	btr_block_get_func(space,zip_size,page_no,mode,		\
-			   __FILE__,__LINE__,index,mtr)
-# else /* UNIV_SYNC_DEBUG */
+#  define btr_block_get(page_id, page_size, mode, index, mtr)	\
+	btr_block_get_func(page_id, page_size, mode,		\
+		__FILE__, __LINE__, (dict_index_t*)index, mtr)
+# else /* UNIV_DEBUG */
 /** Gets a buffer page and declares its latching order level.
-@param space	tablespace identifier
-@param zip_size	compressed page size in bytes or 0 for uncompressed pages
-@param page_no	page number
-@param mode	latch mode
-@param idx	index tree, may be NULL if not the insert buffer tree
-@param mtr	mini-transaction handle
+@param page_id tablespace/page identifier
+@param page_size page size
+@param mode latch mode
+@param index index tree, may be NULL if not the insert buffer tree
+@param mtr mini-transaction handle
 @return the block descriptor */
-#  define btr_block_get(space,zip_size,page_no,mode,idx,mtr)		\
-		btr_block_get_func(space,zip_size,page_no,mode, \
-			__FILE__,__LINE__,idx,mtr)
-# endif /* UNIV_SYNC_DEBUG */
+#  define btr_block_get(page_id, page_size, mode, index, mtr)	\
+	btr_block_get_func(page_id, page_size, mode, __FILE__, __LINE__, (dict_index_t*)index, mtr)
+# endif /* UNIV_DEBUG */
 /** Gets a buffer page and declares its latching order level.
-@param space	tablespace identifier
-@param zip_size	compressed page size in bytes or 0 for uncompressed pages
-@param page_no	page number
-@param mode	latch mode
-@param idx	index tree, may be NULL if not the insert buffer tree
-@param mtr	mini-transaction handle
+@param page_id tablespace/page identifier
+@param page_size page size
+@param mode latch mode
+@param index index tree, may be NULL if not the insert buffer tree
+@param mtr mini-transaction handle
 @return the uncompressed page frame */
-# define btr_page_get(space,zip_size,page_no,mode,idx,mtr)		\
-	buf_block_get_frame(btr_block_get(space,zip_size,page_no, \
-			mode,idx,mtr))
+# define btr_page_get(page_id, page_size, mode, index, mtr)	\
+	buf_block_get_frame(btr_block_get(page_id, page_size,	\
+					  mode, index, mtr))
 #endif /* !UNIV_HOTBACKUP */
 /**************************************************************//**
 Gets the index id field of a page.
-@return	index id */
+@return index id */
 UNIV_INLINE
 index_id_t
 btr_page_get_index_id(
 /*==================*/
 	const page_t*	page)	/*!< in: index page */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 #ifndef UNIV_HOTBACKUP
 /********************************************************//**
 Gets the node level field in an index page.
-@return	level, leaf level == 0 */
+@return level, leaf level == 0 */
 UNIV_INLINE
 ulint
 btr_page_get_level_low(
 /*===================*/
 	const page_t*	page)	/*!< in: index page */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 #define btr_page_get_level(page, mtr) btr_page_get_level_low(page)
 /********************************************************//**
 Gets the next index page number.
-@return	next page number */
+@return next page number */
 UNIV_INLINE
 ulint
 btr_page_get_next(
 /*==============*/
 	const page_t*	page,	/*!< in: index page */
 	mtr_t*		mtr)	/*!< in: mini-transaction handle */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /********************************************************//**
 Gets the previous index page number.
-@return	prev page number */
+@return prev page number */
 UNIV_INLINE
 ulint
 btr_page_get_prev(
 /*==============*/
 	const page_t*	page,	/*!< in: index page */
 	mtr_t*		mtr)	/*!< in: mini-transaction handle */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*************************************************************//**
-Gets pointer to the previous user record in the tree. It is assumed
-that the caller has appropriate latches on the page and its neighbor.
-@return	previous user record, NULL if there is none */
-UNIV_INTERN
-rec_t*
-btr_get_prev_user_rec(
-/*==================*/
-	rec_t*	rec,	/*!< in: record on leaf level */
-	mtr_t*	mtr)	/*!< in: mtr holding a latch on the page, and if
-			needed, also to the previous page */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*************************************************************//**
-Gets pointer to the next user record in the tree. It is assumed
-that the caller has appropriate latches on the page and its neighbor.
-@return	next user record, NULL if there is none */
-UNIV_INTERN
-rec_t*
-btr_get_next_user_rec(
-/*==================*/
-	rec_t*	rec,	/*!< in: record on leaf level */
-	mtr_t*	mtr)	/*!< in: mtr holding a latch on the page, and if
-			needed, also to the next page */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /**************************************************************//**
 Releases the latch on a leaf page and bufferunfixes it. */
 UNIV_INLINE
@@ -380,60 +303,63 @@ NOTE: the offsets array must contain all offsets for the record since
 we read the last field according to offsets and assume that it contains
 the child page number. In other words offsets must have been retrieved
 with rec_get_offsets(n_fields=ULINT_UNDEFINED).
-@return	child node address */
+@return child node address */
 UNIV_INLINE
 ulint
 btr_node_ptr_get_child_page_no(
 /*===========================*/
 	const rec_t*	rec,	/*!< in: node pointer record */
 	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
-/************************************************************//**
-Creates the root node for a new index tree.
-@return	page number of the created root, FIL_NULL if did not succeed */
-UNIV_INTERN
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Create the root node for a new index tree.
+@param[in]	type			type of the index
+@param[in]	space			space where created
+@param[in]	page_size		page size
+@param[in]	index_id		index id
+@param[in]	index			index, or NULL when applying TRUNCATE
+log record during recovery
+@param[in]	btr_redo_create_info	used for applying TRUNCATE log
+@param[in]	mtr			mini-transaction handle
+record during recovery
+@return page number of the created root, FIL_NULL if did not succeed */
 ulint
 btr_create(
-/*=======*/
-	ulint		type,	/*!< in: type of the index */
-	ulint		space,	/*!< in: space where created */
-	ulint		zip_size,/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	index_id_t	index_id,/*!< in: index id */
-	dict_index_t*	index,	/*!< in: index */
-	mtr_t*		mtr)	/*!< in: mini-transaction handle */
-	MY_ATTRIBUTE((nonnull));
-/************************************************************//**
-Frees a B-tree except the root page, which MUST be freed after this
-by calling btr_free_root. */
-UNIV_INTERN
+	ulint			type,
+	ulint			space,
+	const page_size_t&	page_size,
+	index_id_t		index_id,
+	dict_index_t*		index,
+	const btr_create_t*	btr_redo_create_info,
+	mtr_t*			mtr);
+
+/** Free a persistent index tree if it exists.
+@param[in]	page_id		root page id
+@param[in]	page_size	page size
+@param[in]	index_id	PAGE_INDEX_ID contents
+@param[in,out]	mtr		mini-transaction */
 void
-btr_free_but_not_root(
-/*==================*/
-	ulint	space,		/*!< in: space where created */
-	ulint	zip_size,	/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	ulint	root_page_no);	/*!< in: root page number */
-/************************************************************//**
-Frees the B-tree root page. Other tree MUST already have been freed. */
-UNIV_INTERN
+btr_free_if_exists(
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	index_id_t		index_id,
+	mtr_t*			mtr);
+
+/** Free an index tree in a temporary tablespace or during TRUNCATE TABLE.
+@param[in]	page_id		root page id
+@param[in]	page_size	page size */
 void
-btr_free_root(
-/*==========*/
-	ulint	space,		/*!< in: space where created */
-	ulint	zip_size,	/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	ulint	root_page_no,	/*!< in: root page number */
-	mtr_t*	mtr)		/*!< in/out: mini-transaction */
-	MY_ATTRIBUTE((nonnull));
+btr_free(
+	const page_id_t&	page_id,
+	const page_size_t&	page_size);
+
 /*************************************************************//**
 Makes tree one level higher by splitting the root, and inserts
 the tuple. It is assumed that mtr contains an x-latch on the tree.
 NOTE that the operation of this function must always succeed,
 we cannot reverse it: therefore enough free disk space must be
 guaranteed to be available before this function is called.
-@return	inserted record */
-UNIV_INTERN
+@return inserted record */
 rec_t*
 btr_root_raise_and_insert(
 /*======================*/
@@ -448,7 +374,7 @@ btr_root_raise_and_insert(
 	const dtuple_t*	tuple,	/*!< in: tuple to insert */
 	ulint		n_ext,	/*!< in: number of externally stored columns */
 	mtr_t*		mtr)	/*!< in: mtr */
-	__attribute__((nonnull(2,3,4,7), warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /*************************************************************//**
 Reorganizes an index page.
 
@@ -460,7 +386,6 @@ IBUF_BITMAP_FREE is unaffected by reorganization.
 
 @retval true if the operation was successful
 @retval false if it is a compressed page, and recompression failed */
-UNIV_INTERN
 bool
 btr_page_reorganize_low(
 /*====================*/
@@ -474,7 +399,7 @@ btr_page_reorganize_low(
 	page_cur_t*	cursor,	/*!< in/out: page cursor */
 	dict_index_t*	index,	/*!< in: the index tree of the page */
 	mtr_t*		mtr)	/*!< in/out: mini-transaction */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /*************************************************************//**
 Reorganizes an index page.
 
@@ -486,7 +411,6 @@ IBUF_BITMAP_FREE is unaffected by reorganization.
 
 @retval true if the operation was successful
 @retval false if it is a compressed page, and recompression failed */
-UNIV_INTERN
 bool
 btr_page_reorganize(
 /*================*/
@@ -497,8 +421,7 @@ btr_page_reorganize(
 /*************************************************************//**
 Decides if the page should be split at the convergence point of
 inserts converging to left.
-@return	TRUE if split recommended */
-UNIV_INTERN
+@return TRUE if split recommended */
 ibool
 btr_page_get_split_rec_to_left(
 /*===========================*/
@@ -506,12 +429,11 @@ btr_page_get_split_rec_to_left(
 	rec_t**		split_rec)/*!< out: if split recommended,
 				the first record on upper half page,
 				or NULL if tuple should be first */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /*************************************************************//**
 Decides if the page should be split at the convergence point of
 inserts converging to right.
-@return	TRUE if split recommended */
-UNIV_INTERN
+@return TRUE if split recommended */
 ibool
 btr_page_get_split_rec_to_right(
 /*============================*/
@@ -519,7 +441,8 @@ btr_page_get_split_rec_to_right(
 	rec_t**		split_rec)/*!< out: if split recommended,
 				the first record on upper half page,
 				or NULL if tuple should be first */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
+
 /*************************************************************//**
 Splits an index page to halves and inserts the tuple. It is assumed
 that mtr holds an x-latch to the index tree. NOTE: the tree x-latch is
@@ -529,7 +452,6 @@ free disk space (2 pages) must be guaranteed to be available before
 this function is called.
 
 @return inserted record */
-UNIV_INTERN
 rec_t*
 btr_page_split_and_insert(
 /*======================*/
@@ -543,11 +465,10 @@ btr_page_split_and_insert(
 	const dtuple_t*	tuple,	/*!< in: tuple to insert */
 	ulint		n_ext,	/*!< in: number of externally stored columns */
 	mtr_t*		mtr)	/*!< in: mtr */
-	__attribute__((nonnull(2,3,4,7), warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /*******************************************************//**
 Inserts a data tuple to a tree on a non-leaf level. It is assumed
 that mtr holds an x-latch on the tree. */
-UNIV_INTERN
 void
 btr_insert_on_non_leaf_level_func(
 /*==============================*/
@@ -557,14 +478,12 @@ btr_insert_on_non_leaf_level_func(
 	dtuple_t*	tuple,	/*!< in: the record to be inserted */
 	const char*	file,	/*!< in: file name */
 	ulint		line,	/*!< in: line where called */
-	mtr_t*		mtr)	/*!< in: mtr */
-	MY_ATTRIBUTE((nonnull));
+	mtr_t*		mtr);	/*!< in: mtr */
 # define btr_insert_on_non_leaf_level(f,i,l,t,m)			\
 	btr_insert_on_non_leaf_level_func(f,i,l,t,__FILE__,__LINE__,m)
 #endif /* !UNIV_HOTBACKUP */
 /****************************************************************//**
 Sets a record as the predefined minimum record. */
-UNIV_INTERN
 void
 btr_set_min_rec_mark(
 /*=================*/
@@ -574,7 +493,6 @@ btr_set_min_rec_mark(
 #ifndef UNIV_HOTBACKUP
 /*************************************************************//**
 Deletes on the upper level the node pointer to a page. */
-UNIV_INTERN
 void
 btr_node_ptr_delete(
 /*================*/
@@ -585,15 +503,14 @@ btr_node_ptr_delete(
 #ifdef UNIV_DEBUG
 /************************************************************//**
 Checks that the node pointer to a page is appropriate.
-@return	TRUE */
-UNIV_INTERN
+@return TRUE */
 ibool
 btr_check_node_ptr(
 /*===============*/
 	dict_index_t*	index,	/*!< in: index tree */
 	buf_block_t*	block,	/*!< in: index page */
 	mtr_t*		mtr)	/*!< in: mtr */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 #endif /* UNIV_DEBUG */
 /*************************************************************//**
 Tries to merge the page first to the left immediate brother if such a
@@ -604,8 +521,7 @@ level lifts the records of the page to the father page, thus reducing the
 tree height. It is assumed that mtr holds an x-latch on the tree and on the
 page. If cursor is on the leaf level, mtr must also hold x-latches to
 the brothers, if they exist.
-@return	TRUE on success */
-UNIV_INTERN
+@return TRUE on success */
 ibool
 btr_compress(
 /*=========*/
@@ -621,20 +537,17 @@ btr_compress(
 Discards a page from a B-tree. This is used to remove the last record from
 a B-tree page: the whole page must be removed at the same time. This cannot
 be used for the root page, which is allowed to be empty. */
-UNIV_INTERN
 void
 btr_discard_page(
 /*=============*/
 	btr_cur_t*	cursor,	/*!< in: cursor on the page to discard: not on
 				the root page */
-	mtr_t*		mtr)	/*!< in: mtr */
-	MY_ATTRIBUTE((nonnull));
+	mtr_t*		mtr);	/*!< in: mtr */
 #endif /* !UNIV_HOTBACKUP */
 /****************************************************************//**
 Parses the redo log record for setting an index record as the predefined
 minimum record.
-@return	end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
 byte*
 btr_parse_set_min_rec_mark(
 /*=======================*/
@@ -646,8 +559,7 @@ btr_parse_set_min_rec_mark(
 	MY_ATTRIBUTE((nonnull(1,2), warn_unused_result));
 /***********************************************************//**
 Parses a redo log record of reorganizing a page.
-@return	end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
 byte*
 btr_parse_page_reorganize(
 /*======================*/
@@ -657,12 +569,11 @@ btr_parse_page_reorganize(
 	bool		compressed,/*!< in: true if compressed page */
 	buf_block_t*	block,	/*!< in: page to be reorganized, or NULL */
 	mtr_t*		mtr)	/*!< in: mtr or NULL */
-	MY_ATTRIBUTE((nonnull(1,2,3), warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 #ifndef UNIV_HOTBACKUP
 /**************************************************************//**
 Gets the number of pages in a B-tree.
-@return	number of pages, or ULINT_UNDEFINED if the index is unavailable */
-UNIV_INTERN
+@return number of pages, or ULINT_UNDEFINED if the index is unavailable */
 ulint
 btr_get_size(
 /*=========*/
@@ -670,7 +581,7 @@ btr_get_size(
 	ulint		flag,	/*!< in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */
 	mtr_t*		mtr)	/*!< in/out: mini-transaction where index
 				is s-latched */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /**************************************************************//**
 Gets the number of reserved and used pages in a B-tree.
 @return	number of pages reserved, or ULINT_UNDEFINED if the index
@@ -693,7 +604,6 @@ that the caller has made the reservation for free extents!
 @retval block, rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded
 (init_mtr == mtr, or the page was not previously freed in mtr)
 @retval block (not allocated or initialized) otherwise */
-UNIV_INTERN
 buf_block_t*
 btr_page_alloc(
 /*===========*/
@@ -708,11 +618,10 @@ btr_page_alloc(
 	mtr_t*		init_mtr)	/*!< in/out: mini-transaction
 					for x-latching and initializing
 					the page */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /**************************************************************//**
 Frees a file page used in an index tree. NOTE: cannot free field external
 storage pages because the page must contain info on its level. */
-UNIV_INTERN
 void
 btr_page_free(
 /*==========*/
@@ -721,19 +630,39 @@ btr_page_free(
 	mtr_t*		mtr)	/*!< in: mtr */
 	MY_ATTRIBUTE((nonnull));
 /**************************************************************//**
+Creates a new index page (not the root, and also not
+used in page reorganization).  @see btr_page_empty(). */
+void
+btr_page_create(
+/*============*/
+	buf_block_t*	block,	/*!< in/out: page to be created */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page, or NULL */
+	dict_index_t*	index,	/*!< in: index */
+	ulint		level,	/*!< in: the B-tree level of the page */
+	mtr_t*		mtr);	/*!< in: mtr */
+/**************************************************************//**
 Frees a file page used in an index tree. Can be used also to BLOB
-external storage pages, because the page level 0 can be given as an
-argument. */
-UNIV_INTERN
+external storage pages. */
 void
 btr_page_free_low(
 /*==============*/
 	dict_index_t*	index,	/*!< in: index tree */
 	buf_block_t*	block,	/*!< in: block to be freed, x-latched */
-	ulint		level,	/*!< in: page level */
+	ulint		level,	/*!< in: page level (ULINT_UNDEFINED=BLOB) */
 	bool		blob,   /*!< in: blob page */
 	mtr_t*		mtr)	/*!< in: mtr */
-	__attribute__((nonnull));
+	MY_ATTRIBUTE((nonnull(1,2)));
+/**************************************************************//**
+Gets the root node of a tree and x- or s-latches it.
+@return root page, x- or s-latched */
+buf_block_t*
+btr_root_block_get(
+/*===============*/
+	const dict_index_t*	index,	/*!< in: index tree */
+	ulint			mode,	/*!< in: either RW_S_LATCH
+					or RW_X_LATCH */
+	mtr_t*			mtr);	/*!< in: mtr */
+
 /*************************************************************//**
 Reorganizes an index page.
 
@@ -764,7 +693,6 @@ btr_page_reorganize_block(
 #ifdef UNIV_BTR_PRINT
 /*************************************************************//**
 Prints size info of a B-tree. */
-UNIV_INTERN
 void
 btr_print_size(
 /*===========*/
@@ -772,7 +700,6 @@ btr_print_size(
 	MY_ATTRIBUTE((nonnull));
 /**************************************************************//**
 Prints directories and other info of all nodes in the index. */
-UNIV_INTERN
 void
 btr_print_index(
 /*============*/
@@ -784,8 +711,7 @@ btr_print_index(
 /************************************************************//**
 Checks the size and number of fields in a record based on the definition of
 the index.
-@return	TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
 ibool
 btr_index_rec_validate(
 /*===================*/
@@ -794,39 +720,17 @@ btr_index_rec_validate(
 	ibool			dump_on_error)	/*!< in: TRUE if the function
 						should print hex dump of record
 						and page on error */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /**************************************************************//**
 Checks the consistency of an index tree.
 @return	DB_SUCCESS if ok, error code if not */
-UNIV_INTERN
 dberr_t
 btr_validate_index(
 /*===============*/
-	dict_index_t*	index,			/*!< in: index */
-	const trx_t*	trx)			/*!< in: transaction or 0 */
-	MY_ATTRIBUTE((nonnull(1), warn_unused_result));
-
-#ifdef UNIV_SYNC_DEBUG
-/*************************************************************//**
-Removes a page from the level list of pages.
-@param space	in: space where removed
-@param zip_size	in: compressed page size in bytes, or 0 for uncompressed
-@param page	in/out: page to remove
-@param index	in: index tree
-@param mtr	in/out: mini-transaction */
-# define btr_level_list_remove(space,zip_size,page,index,mtr)		\
-	btr_level_list_remove_func(space,zip_size,page,index,mtr)
-#else /* UNIV_SYNC_DEBUG */
-/*************************************************************//**
-Removes a page from the level list of pages.
-@param space	in: space where removed
-@param zip_size	in: compressed page size in bytes, or 0 for uncompressed
-@param page	in/out: page to remove
-@param index	in: index tree
-@param mtr	in/out: mini-transaction */
-# define btr_level_list_remove(space,zip_size,page,index,mtr)		\
-	btr_level_list_remove_func(space,zip_size,page,index,mtr)
-#endif /* UNIV_SYNC_DEBUG */
+	dict_index_t*	index,	/*!< in: index */
+	const trx_t*	trx,	/*!< in: transaction or 0 */
+	bool		lockout)/*!< in: true if X-latch index is intended */
+	MY_ATTRIBUTE((warn_unused_result));
 
 /*************************************************************//**
 Removes a page from the level list of pages. */
@@ -835,11 +739,19 @@ void
 btr_level_list_remove_func(
 /*=======================*/
 	ulint			space,	/*!< in: space where removed */
-	ulint			zip_size,/*!< in: compressed page size in bytes
-					or 0 for uncompressed pages */
+	const page_size_t&	page_size,/*!< in: page size */
 	page_t*			page,	/*!< in/out: page to remove */
 	dict_index_t*		index,	/*!< in: index tree */
 	mtr_t*			mtr);	/*!< in/out: mini-transaction */
+/*************************************************************//**
+Removes a page from the level list of pages.
+@param space	in: space where removed
+@param zip_size	in: compressed page size in bytes, or 0 for uncompressed
+@param page	in/out: page to remove
+@param index	in: index tree
+@param mtr	in/out: mini-transaction */
+# define btr_level_list_remove(space,zip_size,page,index,mtr)		\
+	btr_level_list_remove_func(space,zip_size,page,index,mtr)
 
 /*************************************************************//**
 If page is the only on its level, this function moves its records to the
diff --git a/storage/innobase/include/btr0btr.ic b/storage/innobase/include/btr0btr.ic
index 64b3d5a0975..58a0c6755b1 100644
--- a/storage/innobase/include/btr0btr.ic
+++ b/storage/innobase/include/btr0btr.ic
@@ -36,28 +36,31 @@ Created 6/2/1994 Heikki Tuuri
 					in btr_page_set_level and
 					btr_page_get_level_low */
 
-/**************************************************************//**
-Gets a buffer page and declares its latching order level. */
+/** Gets a buffer page and declares its latching order level.
+@param[in]	page_id	page id
+@param[in]	mode	latch mode
+@param[in]	file	file name
+@param[in]	line	line where called
+@param[in]	index	index tree, may be NULL if it is not an insert buffer
+tree
+@param[in,out]	mtr	mini-transaction
+@return block */
 UNIV_INLINE
 buf_block_t*
 btr_block_get_func(
-/*===============*/
-	ulint		space,		/*!< in: space id */
-	ulint		zip_size,	/*!< in: compressed page size in bytes
-					or 0 for uncompressed pages */
-	ulint		page_no,	/*!< in: page number */
-	ulint		mode,		/*!< in: latch mode */
-	const char*	file,		/*!< in: file name */
-	ulint		line,		/*!< in: line where called */
-	dict_index_t*	index,		/*!< in: index tree, may be NULL
-					if it is not an insert buffer tree */
-	mtr_t*		mtr)		/*!< in/out: mtr */
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	ulint			mode,
+	const char*		file,
+	ulint			line,
+	dict_index_t*		index,
+	mtr_t*			mtr)
 {
 	buf_block_t*	block;
-	dberr_t		err;
+	dberr_t		err=DB_SUCCESS;
 
-	block = buf_page_get_gen(space, zip_size, page_no, mode,
-		NULL, BUF_GET, file, line, mtr, &err);
+	block = buf_page_get_gen(
+		page_id, page_size, mode, NULL, BUF_GET, file, line, mtr, &err);
 
 	if (err == DB_DECRYPTION_FAILED) {
 		index->table->is_encrypted = true;
@@ -100,7 +103,7 @@ btr_page_set_index_id(
 
 /**************************************************************//**
 Gets the index id field of a page.
-@return	index id */
+@return index id */
 UNIV_INLINE
 index_id_t
 btr_page_get_index_id(
@@ -113,7 +116,7 @@ btr_page_get_index_id(
 #ifndef UNIV_HOTBACKUP
 /********************************************************//**
 Gets the node level field in an index page.
-@return	level, leaf level == 0 */
+@return level, leaf level == 0 */
 UNIV_INLINE
 ulint
 btr_page_get_level_low(
@@ -143,7 +146,8 @@ btr_page_set_level(
 	ulint		level,	/*!< in: level, leaf level == 0 */
 	mtr_t*		mtr)	/*!< in: mini-transaction handle */
 {
-	ut_ad(page && mtr);
+	ut_ad(page != NULL);
+	ut_ad(mtr != NULL);
 	ut_ad(level <= BTR_MAX_NODE_LEVEL);
 
 	if (page_zip) {
@@ -159,7 +163,7 @@ btr_page_set_level(
 
 /********************************************************//**
 Gets the next index page number.
-@return	next page number */
+@return next page number */
 UNIV_INLINE
 ulint
 btr_page_get_next(
@@ -170,10 +174,7 @@ btr_page_get_next(
 {
 	ut_ad(page != NULL);
 	ut_ad(mtr != NULL);
-#ifndef UNIV_INNOCHECKSUM
-	ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX)
-	      || mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_S_FIX));
-#endif /* UNIV_INNOCHECKSUM */
+
 	return(mach_read_from_4(page + FIL_PAGE_NEXT));
 }
 
@@ -202,7 +203,7 @@ btr_page_set_next(
 
 /********************************************************//**
 Gets the previous index page number.
-@return	prev page number */
+@return prev page number */
 UNIV_INLINE
 ulint
 btr_page_get_prev(
@@ -245,7 +246,7 @@ NOTE: the offsets array must contain all offsets for the record since
 we read the last field according to offsets and assume that it contains
 the child page number. In other words offsets must have been retrieved
 with rec_get_offsets(n_fields=ULINT_UNDEFINED).
-@return	child node address */
+@return child node address */
 UNIV_INLINE
 ulint
 btr_node_ptr_get_child_page_no(
@@ -266,15 +267,7 @@ btr_node_ptr_get_child_page_no(
 	ut_ad(len == 4);
 
 	page_no = mach_read_from_4(field);
-
-	if (page_no == 0) {
-		fprintf(stderr,
-			"InnoDB: a nonsensical page number 0"
-			" in a node ptr record at offset %lu\n",
-			(ulong) page_offset(rec));
-		buf_page_print(page_align(rec), 0, 0);
-		ut_ad(0);
-	}
+	ut_ad(page_no > 1);
 
 	return(page_no);
 }
@@ -290,12 +283,27 @@ btr_leaf_page_release(
 					BTR_MODIFY_LEAF */
 	mtr_t*		mtr)		/*!< in: mtr */
 {
-	ut_ad(latch_mode == BTR_SEARCH_LEAF || latch_mode == BTR_MODIFY_LEAF);
+	ut_ad(latch_mode == BTR_SEARCH_LEAF
+	      || latch_mode == BTR_MODIFY_LEAF
+	      || latch_mode == BTR_NO_LATCHES);
+
 	ut_ad(!mtr_memo_contains(mtr, block, MTR_MEMO_MODIFY));
 
-	mtr_memo_release(mtr, block,
-			 latch_mode == BTR_SEARCH_LEAF
-			 ? MTR_MEMO_PAGE_S_FIX
-			 : MTR_MEMO_PAGE_X_FIX);
+	ulint mode;
+	switch (latch_mode) {
+		case BTR_SEARCH_LEAF:
+			mode = MTR_MEMO_PAGE_S_FIX;
+			break;
+		case BTR_MODIFY_LEAF:
+			mode = MTR_MEMO_PAGE_X_FIX;
+			break;
+		case BTR_NO_LATCHES:
+			mode = MTR_MEMO_BUF_FIX;
+			break;
+		default:
+			ut_a(0);
+	}
+
+	mtr->memo_release(block, mode);
 }
 #endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/btr0bulk.h b/storage/innobase/include/btr0bulk.h
new file mode 100644
index 00000000000..a1887c3df2b
--- /dev/null
+++ b/storage/innobase/include/btr0bulk.h
@@ -0,0 +1,392 @@
+/*****************************************************************************
+
+Copyright (c) 2014, 2015, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/********************************************************************//**
+@file include/btr0bulk.h
+The B-tree bulk load
+
+Created 03/11/2014 Shaohua Wang
+*************************************************************************/
+
+#ifndef btr0bulk_h
+#define btr0bulk_h
+
+#include "dict0dict.h"
+#include "page0cur.h"
+#include "ut0new.h"
+
+#include <vector>
+
+/** Innodb B-tree index fill factor for bulk load. */
+extern	long	innobase_fill_factor;
+
+/*
+The proper function call sequence of PageBulk is as below:
+-- PageBulk::init
+-- PageBulk::insert
+-- PageBulk::finish
+-- PageBulk::compress(COMPRESSED table only)
+-- PageBulk::pageSplit(COMPRESSED table only)
+-- PageBulk::commit
+*/
+
+class PageBulk
+{
+public:
+	/** Constructor
+	@param[in]	index		B-tree index
+	@param[in]	page_no		page number
+	@param[in]	level		page level
+	@param[in]	trx_id		transaction id
+	@param[in]	observer	flush observer */
+	PageBulk(
+		dict_index_t*	index,
+		trx_id_t	trx_id,
+		ulint		page_no,
+		ulint		level,
+		FlushObserver*	observer)
+		:
+		m_heap(NULL),
+		m_index(index),
+		m_mtr(NULL),
+		m_trx_id(trx_id),
+		m_block(NULL),
+		m_page(NULL),
+		m_page_zip(NULL),
+		m_cur_rec(NULL),
+		m_page_no(page_no),
+		m_level(level),
+		m_is_comp(dict_table_is_comp(index->table)),
+		m_heap_top(NULL),
+		m_rec_no(0),
+		m_free_space(0),
+		m_reserved_space(0),
+#ifdef UNIV_DEBUG
+		m_total_data(0),
+#endif /* UNIV_DEBUG */
+		m_modify_clock(0),
+		m_flush_observer(observer),
+		m_err(DB_SUCCESS)
+	{
+		ut_ad(!dict_index_is_spatial(m_index));
+	}
+
+	/** Deconstructor */
+	~PageBulk()
+	{
+		mem_heap_free(m_heap);
+	}
+
+	/** Initialize members and allocate page if needed and start mtr.
+	Note: must be called and only once right after constructor.
+	@return error code */
+	dberr_t init();
+
+	/** Insert a record in the page.
+	@param[in]	rec		record
+	@param[in]	offsets		record offsets */
+	void insert(const rec_t* rec, ulint* offsets);
+
+	/** Mark end of insertion to the page. Scan all records to set page
+	dirs, and set page header members. */
+	void finish();
+
+	/** Commit mtr for a page
+	@param[in]	success		Flag whether all inserts succeed. */
+	void commit(bool success);
+
+	/** Compress if it is compressed table
+	@return	true	compress successfully or no need to compress
+	@return	false	compress failed. */
+	bool compress();
+
+	/** Check whether the record needs to be stored externally.
+	@return	true
+	@return	false */
+	bool needExt(const dtuple_t* tuple, ulint rec_size);
+
+	/** Store external record
+	@param[in]	big_rec		external recrod
+	@param[in]	offsets		record offsets
+	@return	error code */
+	dberr_t storeExt(const big_rec_t* big_rec, ulint* offsets);
+
+	/** Get node pointer
+	@return node pointer */
+	dtuple_t* getNodePtr();
+
+	/** Get split rec in the page. We split a page in half when compresssion
+	fails, and the split rec should be copied to the new page.
+	@return split rec */
+	rec_t*	getSplitRec();
+
+	/** Copy all records after split rec including itself.
+	@param[in]	rec	split rec */
+	void copyIn(rec_t*	split_rec);
+
+	/** Remove all records after split rec including itself.
+	@param[in]	rec	split rec	*/
+	void copyOut(rec_t*	split_rec);
+
+	/** Set next page
+	@param[in]	next_page_no	next page no */
+	void setNext(ulint	next_page_no);
+
+	/** Set previous page
+	@param[in]	prev_page_no	previous page no */
+	void setPrev(ulint	prev_page_no);
+
+	/** Release block by commiting mtr */
+	inline void release();
+
+	/** Start mtr and latch block */
+	inline dberr_t latch();
+
+	/** Check if required space is available in the page for the rec
+	to be inserted.	We check fill factor & padding here.
+	@param[in]	length		required length
+	@return true	if space is available */
+	inline bool isSpaceAvailable(ulint	rec_size);
+
+	/** Get page no */
+	ulint	getPageNo()
+	{
+		return(m_page_no);
+	}
+
+	/** Get page level */
+	ulint	getLevel()
+	{
+		return(m_level);
+	}
+
+	/** Get record no */
+	ulint	getRecNo()
+	{
+		return(m_rec_no);
+	}
+
+	/** Get page */
+	page_t*	getPage()
+	{
+		return(m_page);
+	}
+
+	/** Get page zip */
+	page_zip_des_t*	getPageZip()
+	{
+		return(m_page_zip);
+	}
+
+	dberr_t getError()
+	{
+		return(m_err);
+	}
+
+	/* Memory heap for internal allocation */
+	mem_heap_t*	m_heap;
+
+private:
+	/** The index B-tree */
+	dict_index_t*	m_index;
+
+	/** The min-transaction */
+	mtr_t*		m_mtr;
+
+	/** The transaction id */
+	trx_id_t	m_trx_id;
+
+	/** The buffer block */
+	buf_block_t*	m_block;
+
+	/** The page */
+	page_t*		m_page;
+
+	/** The page zip descriptor */
+	page_zip_des_t*	m_page_zip;
+
+	/** The current rec, just before the next insert rec */
+	rec_t*		m_cur_rec;
+
+	/** The page no */
+	ulint		m_page_no;
+
+	/** The page level in B-tree */
+	ulint		m_level;
+
+	/** Flag: is page in compact format */
+	const bool	m_is_comp;
+
+	/** The heap top in page for next insert */
+	byte*		m_heap_top;
+
+	/** User record no */
+	ulint		m_rec_no;
+
+	/** The free space left in the page */
+	ulint		m_free_space;
+
+	/** The reserved space for fill factor */
+	ulint		m_reserved_space;
+
+	/** The padding space for compressed page */
+	ulint		m_padding_space;
+
+#ifdef UNIV_DEBUG
+	/** Total data in the page */
+	ulint		m_total_data;
+#endif /* UNIV_DEBUG */
+
+	/** The modify clock value of the buffer block
+	when the block is re-pinned */
+	ib_uint64_t     m_modify_clock;
+
+	/** Flush observer */
+	FlushObserver*	m_flush_observer;
+
+	/** Operation result DB_SUCCESS or error code */
+	dberr_t		m_err;
+};
+
+typedef std::vector<PageBulk*, ut_allocator<PageBulk*> >
+	page_bulk_vector;
+
+class BtrBulk
+{
+public:
+	/** Constructor
+	@param[in]	index		B-tree index
+	@param[in]	trx_id		transaction id
+	@param[in]	observer	flush observer */
+	BtrBulk(
+		dict_index_t*	index,
+		trx_id_t	trx_id,
+		FlushObserver*	observer)
+		:
+		m_heap(NULL),
+		m_index(index),
+		m_trx_id(trx_id),
+		m_flush_observer(observer)
+	{
+		ut_ad(m_flush_observer != NULL);
+#ifdef UNIV_DEBUG
+		fil_space_inc_redo_skipped_count(m_index->space);
+#endif /* UNIV_DEBUG */
+	}
+
+	/** Destructor */
+	~BtrBulk()
+	{
+		mem_heap_free(m_heap);
+		UT_DELETE(m_page_bulks);
+
+#ifdef UNIV_DEBUG
+		fil_space_dec_redo_skipped_count(m_index->space);
+#endif /* UNIV_DEBUG */
+	}
+
+	/** Initialization
+	Note: must be called right after constructor. */
+	void init()
+	{
+		ut_ad(m_heap == NULL);
+		m_heap = mem_heap_create(1000);
+
+		m_page_bulks = UT_NEW_NOKEY(page_bulk_vector());
+	}
+
+	/** Insert a tuple
+	@param[in]	tuple	tuple to insert.
+	@return error code */
+	dberr_t	insert(dtuple_t*	tuple)
+	{
+		return(insert(tuple, 0));
+	}
+
+	/** Btree bulk load finish. We commit the last page in each level
+	and copy the last page in top level to the root page of the index
+	if no error occurs.
+	@param[in]	err	whether bulk load was successful until now
+	@return error code  */
+	dberr_t finish(dberr_t	err);
+
+	/** Release all latches */
+	void release();
+
+	/** Re-latch all latches */
+	void latch();
+
+private:
+	/** Insert a tuple to a page in a level
+	@param[in]	tuple	tuple to insert
+	@param[in]	level	B-tree level
+	@return error code */
+	dberr_t insert(dtuple_t* tuple, ulint level);
+
+	/** Split a page
+	@param[in]	page_bulk	page to split
+	@param[in]	next_page_bulk	next page
+	@return	error code */
+	dberr_t pageSplit(PageBulk* page_bulk,
+			  PageBulk* next_page_bulk);
+
+	/** Commit(finish) a page. We set next/prev page no, compress a page of
+	compressed table and split the page if compression fails, insert a node
+	pointer to father page if needed, and commit mini-transaction.
+	@param[in]	page_bulk	page to commit
+	@param[in]	next_page_bulk	next page
+	@param[in]	insert_father	flag whether need to insert node ptr
+	@return	error code */
+	dberr_t pageCommit(PageBulk* page_bulk,
+			   PageBulk* next_page_bulk,
+			   bool insert_father);
+
+	/** Abort a page when an error occurs
+	@param[in]	page_bulk	page bulk object
+	Note: we should call pageAbort for a PageBulk object, which is not in
+	m_page_bulks after pageCommit, and we will commit or abort PageBulk
+	objects in function "finish". */
+	void	pageAbort(PageBulk* page_bulk)
+	{
+		page_bulk->commit(false);
+	}
+
+	/** Log free check */
+	void logFreeCheck();
+
+private:
+	/** Memory heap for allocation */
+	mem_heap_t*		m_heap;
+
+	/** B-tree index */
+	dict_index_t*		m_index;
+
+	/** Transaction id */
+	trx_id_t		m_trx_id;
+
+	/** Root page level */
+	ulint			m_root_level;
+
+	/** Flush observer */
+	FlushObserver*		m_flush_observer;
+
+	/** Page cursor vector for all level */
+	page_bulk_vector*	m_page_bulks;
+};
+
+#endif
diff --git a/storage/innobase/include/btr0cur.h b/storage/innobase/include/btr0cur.h
index aa799e0fc00..f582f04733c 100644
--- a/storage/innobase/include/btr0cur.h
+++ b/storage/innobase/include/btr0cur.h
@@ -30,6 +30,7 @@ Created 10/16/1994 Heikki Tuuri
 #include "dict0dict.h"
 #include "page0cur.h"
 #include "btr0types.h"
+#include "gis0type.h"
 
 /** Mode flags for btr_cur operations; these can be ORed */
 enum {
@@ -52,6 +53,13 @@ enum {
 	BTR_KEEP_IBUF_BITMAP = 32
 };
 
+/* btr_cur_latch_leaves() returns latched blocks and savepoints. */
+struct btr_latch_leaves_t {
+	/* left block, target block and right block */
+	buf_block_t*	blocks[3];
+	ulint		savepoints[3];
+};
+
 #ifndef UNIV_HOTBACKUP
 #include "que0types.h"
 #include "row0types.h"
@@ -63,7 +71,7 @@ enum {
 #ifdef UNIV_DEBUG
 /*********************************************************//**
 Returns the page cursor component of a tree cursor.
-@return	pointer to page cursor component */
+@return pointer to page cursor component */
 UNIV_INLINE
 page_cur_t*
 btr_cur_get_page_cur(
@@ -71,7 +79,7 @@ btr_cur_get_page_cur(
 	const btr_cur_t*	cursor);/*!< in: tree cursor */
 /*********************************************************//**
 Returns the buffer block on which the tree cursor is positioned.
-@return	pointer to buffer block */
+@return pointer to buffer block */
 UNIV_INLINE
 buf_block_t*
 btr_cur_get_block(
@@ -79,7 +87,7 @@ btr_cur_get_block(
 	const btr_cur_t*	cursor);/*!< in: tree cursor */
 /*********************************************************//**
 Returns the record pointer of a tree cursor.
-@return	pointer to record */
+@return pointer to record */
 UNIV_INLINE
 rec_t*
 btr_cur_get_rec(
@@ -92,22 +100,15 @@ btr_cur_get_rec(
 #endif /* UNIV_DEBUG */
 /*********************************************************//**
 Returns the compressed page on which the tree cursor is positioned.
-@return	pointer to compressed page, or NULL if the page is not compressed */
+@return pointer to compressed page, or NULL if the page is not compressed */
 UNIV_INLINE
 page_zip_des_t*
 btr_cur_get_page_zip(
 /*=================*/
 	btr_cur_t*	cursor);/*!< in: tree cursor */
 /*********************************************************//**
-Invalidates a tree cursor by setting record pointer to NULL. */
-UNIV_INLINE
-void
-btr_cur_invalidate(
-/*===============*/
-	btr_cur_t*	cursor);/*!< in: tree cursor */
-/*********************************************************//**
 Returns the page of a tree cursor.
-@return	pointer to page */
+@return pointer to page */
 UNIV_INLINE
 page_t*
 btr_cur_get_page(
@@ -115,8 +116,8 @@ btr_cur_get_page(
 	btr_cur_t*	cursor);/*!< in: tree cursor */
 /*********************************************************//**
 Returns the index of a cursor.
-@param cursor	b-tree cursor
-@return	index */
+@param cursor b-tree cursor
+@return index */
 #define btr_cur_get_index(cursor) ((cursor)->index)
 /*********************************************************//**
 Positions a tree cursor at a given record. */
@@ -128,6 +129,26 @@ btr_cur_position(
 	rec_t*		rec,	/*!< in: record in tree */
 	buf_block_t*	block,	/*!< in: buffer block of rec */
 	btr_cur_t*	cursor);/*!< in: cursor */
+
+/** Optimistically latches the leaf page or pages requested.
+@param[in]	block		guessed buffer block
+@param[in]	modify_clock	modify clock value
+@param[in,out]	latch_mode	BTR_SEARCH_LEAF, ...
+@param[in,out]	cursor		cursor
+@param[in]	file		file name
+@param[in]	line		line where called
+@param[in]	mtr		mini-transaction
+@return true if success */
+bool
+btr_cur_optimistic_latch_leaves(
+	buf_block_t*	block,
+	ib_uint64_t	modify_clock,
+	ulint*		latch_mode,
+	btr_cur_t*	cursor,
+	const char*	file,
+	ulint		line,
+	mtr_t*		mtr);
+
 /********************************************************************//**
 Searches an index tree and positions a tree cursor on a given level.
 NOTE: n_fields_cmp in tuple must be set so that it cannot be compared
@@ -135,7 +156,6 @@ to node pointer page number fields on the upper levels of the tree!
 Note that if mode is PAGE_CUR_LE, which is used in inserts, then
 cursor->up_match and cursor->low_match both will have sensible values.
 If mode is PAGE_CUR_GE, then up_match will a have a sensible value. */
-UNIV_INTERN
 dberr_t
 btr_cur_search_to_nth_level(
 /*========================*/
@@ -144,7 +164,7 @@ btr_cur_search_to_nth_level(
 	const dtuple_t*	tuple,	/*!< in: data tuple; NOTE: n_fields_cmp in
 				tuple must be set so that it cannot get
 				compared to the node ptr page number field! */
-	ulint		mode,	/*!< in: PAGE_CUR_L, ...;
+	page_cur_mode_t	mode,	/*!< in: PAGE_CUR_L, ...;
 				NOTE that if the search is made using a unique
 				prefix of a record, mode should be PAGE_CUR_LE,
 				not PAGE_CUR_GE, as the latter may end up on
@@ -164,15 +184,48 @@ btr_cur_search_to_nth_level(
 				to protect the record! */
 	btr_cur_t*	cursor, /*!< in/out: tree cursor; the cursor page is
 				s- or x-latched, but see also above! */
-	ulint		has_search_latch,/*!< in: latch mode the caller
-				currently has on btr_search_latch:
+	ulint		has_search_latch,
+				/*!< in: latch mode the caller
+				currently has on search system:
 				RW_S_LATCH, or 0 */
 	const char*	file,	/*!< in: file name */
 	ulint		line,	/*!< in: line where called */
 	mtr_t*		mtr);	/*!< in: mtr */
+
+/** Searches an index tree and positions a tree cursor on a given level.
+This function will avoid placing latches the travesal path and so
+should be used only for cases where-in latching is not needed.
+
+@param[in]	index	index
+@param[in]	level	the tree level of search
+@param[in]	tuple	data tuple; Note: n_fields_cmp in compared
+			to the node ptr page node field
+@param[in]	mode	PAGE_CUR_L, ....
+			Insert should always be made using PAGE_CUR_LE
+			to search the position.
+@param[in,out]	cursor	tree cursor; points to record of interest.
+@param[in]	file	file name
+@param[in[	line	line where called from
+@param[in,out]	mtr	mtr
+@param[in]	mark_dirty
+			if true then mark the block as dirty
+@return DB_SUCCESS or error code */
+dberr_t
+btr_cur_search_to_nth_level_with_no_latch(
+	dict_index_t*		index,
+	ulint			level,
+	const dtuple_t*		tuple,
+	page_cur_mode_t		mode,
+	btr_cur_t*		cursor,
+	const char*		file,
+	ulint			line,
+	mtr_t*			mtr,
+	bool			mark_dirty = true)
+	__attribute__((warn_unused_result));
+
 /*****************************************************************//**
-Opens a cursor at either end of an index. */
-UNIV_INTERN
+Opens a cursor at either end of an index.
+@return DB_SUCCESS or error code */
 dberr_t
 btr_cur_open_at_index_side_func(
 /*============================*/
@@ -187,12 +240,44 @@ btr_cur_open_at_index_side_func(
 	ulint		line,		/*!< in: line where called */
 	mtr_t*		mtr)		/*!< in/out: mini-transaction */
 	MY_ATTRIBUTE((nonnull));
+
 #define btr_cur_open_at_index_side(f,i,l,c,lv,m)			\
 	btr_cur_open_at_index_side_func(f,i,l,c,lv,__FILE__,__LINE__,m)
+
+/** Opens a cursor at either end of an index.
+Avoid taking latches on buffer, just pin (by incrementing fix_count)
+to keep them in buffer pool. This mode is used by intrinsic table
+as they are not shared and so there is no need of latching.
+@param[in]	from_left	true if open to low end, false if open
+				to high end.
+@param[in]	index		index
+@param[in]	latch_mode	latch mode
+@param[in,out]	cursor		cursor
+@param[in]	file		file name
+@param[in]	line		line where called
+@param[in,out]	mtr		mini transaction
+@return DB_SUCCESS or error code
+*/
+dberr_t
+btr_cur_open_at_index_side_with_no_latch_func(
+	bool		from_left,
+	dict_index_t*	index,
+	btr_cur_t*	cursor,
+	ulint		level,
+	const char*	file,
+	ulint		line,
+	mtr_t*		mtr)
+	__attribute__((warn_unused_result));
+
+#define btr_cur_open_at_index_side_with_no_latch(f,i,c,lv,m)		\
+	btr_cur_open_at_index_side_with_no_latch_func(			\
+		f,i,c,lv,__FILE__,__LINE__,m)
+
 /**********************************************************************//**
-Positions a cursor at a randomly chosen position within a B-tree. */
-UNIV_INTERN
-void
+Positions a cursor at a randomly chosen position within a B-tree.
+@return true if the index is available and we have put the cursor, false
+if the index is unavailable */
+bool
 btr_cur_open_at_rnd_pos_func(
 /*=========================*/
 	dict_index_t*	index,		/*!< in: index */
@@ -209,8 +294,7 @@ It is assumed that mtr holds an x-latch on the page. The operation does
 not succeed if there is too little space on the page. If there is just
 one record on the page, the insert will always succeed; this is to
 prevent trying to split a page with just one record.
-@return	DB_SUCCESS, DB_WAIT_LOCK, DB_FAIL, or error number */
-UNIV_INTERN
+@return DB_SUCCESS, DB_WAIT_LOCK, DB_FAIL, or error number */
 dberr_t
 btr_cur_optimistic_insert(
 /*======================*/
@@ -241,8 +325,7 @@ Performs an insert on a page of an index tree. It is assumed that mtr
 holds an x-latch on the tree and on the cursor page. If the insert is
 made on the leaf level, to avoid deadlocks, mtr must also own x-latches
 to brothers of page, if those brothers exist.
-@return	DB_SUCCESS or error number */
-UNIV_INTERN
+@return DB_SUCCESS or error number */
 dberr_t
 btr_cur_pessimistic_insert(
 /*=======================*/
@@ -273,13 +356,12 @@ an update-in-place.
 
 @retval false if out of space; IBUF_BITMAP_FREE will be reset
 outside mtr if the page was recompressed
-@retval	true if enough place;
+@retval true if enough place;
 
 IMPORTANT: The caller will have to update IBUF_BITMAP_FREE if this is
 a secondary index leaf page. This has to be done either within the
 same mini-transaction, or by invoking ibuf_reset_free_bits() before
 mtr_commit(mtr). */
-UNIV_INTERN
 bool
 btr_cur_update_alloc_zip_func(
 /*==========================*/
@@ -307,7 +389,6 @@ Updates a record when the update causes no size changes in its fields.
 @retval DB_SUCCESS on success
 @retval DB_ZIP_OVERFLOW if there is not enough space left
 on the compressed page (IBUF_BITMAP_FREE was reset outside mtr) */
-UNIV_INTERN
 dberr_t
 btr_cur_update_in_place(
 /*====================*/
@@ -328,7 +409,6 @@ btr_cur_update_in_place(
 	MY_ATTRIBUTE((warn_unused_result, nonnull));
 /***********************************************************//**
 Writes a redo log record of updating a record in-place. */
-UNIV_INTERN
 void
 btr_cur_update_in_place_log(
 /*========================*/
@@ -351,7 +431,6 @@ so that tree compression is recommended.
 @retval DB_UNDERFLOW if the page would become too empty
 @retval DB_ZIP_OVERFLOW if there is not enough space left
 on the compressed page */
-UNIV_INTERN
 dberr_t
 btr_cur_optimistic_update(
 /*======================*/
@@ -377,8 +456,7 @@ Performs an update of a record on a page of a tree. It is assumed
 that mtr holds an x-latch on the tree and on the cursor page. If the
 update is made on the leaf level, to avoid deadlocks, mtr must also
 own x-latches to brothers of page, if those brothers exist.
-@return	DB_SUCCESS or error code */
-UNIV_INTERN
+@return DB_SUCCESS or error code */
 dberr_t
 btr_cur_pessimistic_update(
 /*=======================*/
@@ -396,9 +474,10 @@ btr_cur_pessimistic_update(
 				big_rec and the index tuple */
 	big_rec_t**	big_rec,/*!< out: big rec vector whose fields have to
 				be stored externally by the caller, or NULL */
-	const upd_t*	update,	/*!< in: update vector; this is allowed also
-				contain trx id and roll ptr fields, but
-				the values in update vector have no effect */
+	upd_t*		update,	/*!< in/out: update vector; this is allowed to
+				also contain trx id and roll ptr fields.
+				Non-updated columns that are moved offpage will
+				be appended to this. */
 	ulint		cmpl_info,/*!< in: compiler info on secondary index
 				updates */
 	que_thr_t*	thr,	/*!< in: query thread */
@@ -411,22 +490,22 @@ Marks a clustered index record deleted. Writes an undo log record to
 undo log on this delete marking. Writes in the trx id field the id
 of the deleting transaction, and in the roll ptr field pointer to the
 undo log record created.
-@return	DB_SUCCESS, DB_LOCK_WAIT, or error number */
-UNIV_INTERN
+@return DB_SUCCESS, DB_LOCK_WAIT, or error number */
 dberr_t
 btr_cur_del_mark_set_clust_rec(
 /*===========================*/
+	ulint		flags,  /*!< in: undo logging and locking flags */
 	buf_block_t*	block,	/*!< in/out: buffer block of the record */
 	rec_t*		rec,	/*!< in/out: record */
 	dict_index_t*	index,	/*!< in: clustered index of the record */
 	const ulint*	offsets,/*!< in: rec_get_offsets(rec) */
 	que_thr_t*	thr,	/*!< in: query thread */
+	const dtuple_t*	entry,	/*!< in: dtuple for the deleting record */
 	mtr_t*		mtr)	/*!< in/out: mini-transaction */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /***********************************************************//**
 Sets a secondary index record delete mark to TRUE or FALSE.
-@return	DB_SUCCESS, DB_LOCK_WAIT, or error number */
-UNIV_INTERN
+@return DB_SUCCESS, DB_LOCK_WAIT, or error number */
 dberr_t
 btr_cur_del_mark_set_sec_rec(
 /*=========================*/
@@ -442,8 +521,7 @@ that mtr holds an x-latch on the tree and on the cursor page. To avoid
 deadlocks, mtr must also own x-latches to brothers of page, if those
 brothers exist. NOTE: it is assumed that the caller has reserved enough
 free extents so that the compression will always succeed if done!
-@return	TRUE if compression occurred */
-UNIV_INTERN
+@return TRUE if compression occurred */
 ibool
 btr_cur_compress_if_useful(
 /*=======================*/
@@ -458,8 +536,7 @@ btr_cur_compress_if_useful(
 Removes the record on which the tree cursor is positioned. It is assumed
 that the mtr has an x-latch on the page where the cursor is positioned,
 but no latch on the whole tree.
-@return	TRUE if success, i.e., the page did not become too empty */
-UNIV_INTERN
+@return TRUE if success, i.e., the page did not become too empty */
 ibool
 btr_cur_optimistic_delete_func(
 /*===========================*/
@@ -489,8 +566,7 @@ or if it is the only page on the level. It is assumed that mtr holds
 an x-latch on the tree and on the cursor page. To avoid deadlocks,
 mtr must also own x-latches to brothers of page, if those brothers
 exist.
-@return	TRUE if compression occurred */
-UNIV_INTERN
+@return TRUE if compression occurred */
 ibool
 btr_cur_pessimistic_delete(
 /*=======================*/
@@ -508,14 +584,13 @@ btr_cur_pessimistic_delete(
 				stays valid: it points to successor of
 				deleted record on function exit */
 	ulint		flags,	/*!< in: BTR_CREATE_FLAG or 0 */
-	enum trx_rb_ctx	rb_ctx,	/*!< in: rollback context */
+	bool		rollback,/*!< in: performing rollback? */
 	mtr_t*		mtr)	/*!< in: mtr */
 	MY_ATTRIBUTE((nonnull));
 #endif /* !UNIV_HOTBACKUP */
 /***********************************************************//**
 Parses a redo log record of updating a record in-place.
-@return	end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
 byte*
 btr_cur_parse_update_in_place(
 /*==========================*/
@@ -527,8 +602,7 @@ btr_cur_parse_update_in_place(
 /****************************************************************//**
 Parses the redo log record for delete marking or unmarking of a clustered
 index record.
-@return	end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
 byte*
 btr_cur_parse_del_mark_set_clust_rec(
 /*=================================*/
@@ -540,8 +614,7 @@ btr_cur_parse_del_mark_set_clust_rec(
 /****************************************************************//**
 Parses the redo log record for delete marking or unmarking of a secondary
 index record.
-@return	end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
 byte*
 btr_cur_parse_del_mark_set_sec_rec(
 /*===============================*/
@@ -550,19 +623,22 @@ btr_cur_parse_del_mark_set_sec_rec(
 	page_t*		page,	/*!< in/out: page or NULL */
 	page_zip_des_t*	page_zip);/*!< in/out: compressed page, or NULL */
 #ifndef UNIV_HOTBACKUP
-/*******************************************************************//**
-Estimates the number of rows in a given index range.
-@return	estimated number of rows */
-UNIV_INTERN
-ib_int64_t
+
+/** Estimates the number of rows in a given index range.
+@param[in]	index	index
+@param[in]	tuple1	range start, may also be empty tuple
+@param[in]	mode1	search mode for range start
+@param[in]	tuple2	range end, may also be empty tuple
+@param[in]	mode2	search mode for range end
+@return estimated number of rows */
+int64_t
 btr_estimate_n_rows_in_range(
-/*=========================*/
-	dict_index_t*	index,	/*!< in: index */
-	const dtuple_t*	tuple1,	/*!< in: range start, may also be empty tuple */
-	ulint		mode1,	/*!< in: search mode for range start */
-	const dtuple_t*	tuple2,	/*!< in: range end, may also be empty tuple */
-	ulint		mode2,	/*!< in: search mode for range end */
-	trx_t*		trx);	/*!< in: trx */
+	dict_index_t*	index,
+	const dtuple_t*	tuple1,
+	page_cur_mode_t	mode1,
+	const dtuple_t*	tuple2,
+	page_cur_mode_t	mode2);
+
 /*******************************************************************//**
 Estimates the number of different key values in a given index, for
 each n-column prefix of the index where 1 <= n <= dict_index_get_n_unique(index).
@@ -571,9 +647,10 @@ The estimates are stored in the array index->stat_n_diff_key_vals[] (indexed
 index->stat_n_sample_sizes[].
 If innodb_stats_method is nulls_ignored, we also record the number of
 non-null values for each prefix and stored the estimates in
-array index->stat_n_non_null_key_vals. */
-UNIV_INTERN
-void
+array index->stat_n_non_null_key_vals.
+@return true if the index is available and we get the estimated numbers,
+false if the index is unavailable. */
+bool
 btr_estimate_number_of_different_key_vals(
 /*======================================*/
 	dict_index_t*	index);	/*!< in: index */
@@ -582,7 +659,6 @@ btr_estimate_number_of_different_key_vals(
 @param[in]	rec	record
 @param[in]	offsets	array returned by rec_get_offsets()
 @return externally stored part, in units of a database page */
-
 ulint
 btr_rec_get_externally_stored_len(
 	const rec_t*	rec,
@@ -593,7 +669,6 @@ Marks non-updated off-page fields as disowned by this record. The ownership
 must be transferred to the updated record which is inserted elsewhere in the
 index tree. In purge only the owner of externally stored field is allowed
 to free the field. */
-UNIV_INTERN
 void
 btr_cur_disown_inherited_fields(
 /*============================*/
@@ -613,7 +688,9 @@ enum blob_op {
 	/** Store off-page columns for an insert by update */
 	BTR_STORE_INSERT_UPDATE,
 	/** Store off-page columns for an update */
-	BTR_STORE_UPDATE
+	BTR_STORE_UPDATE,
+	/** Store off-page columns for a freshly inserted record by bulk */
+	BTR_STORE_INSERT_BULK
 };
 
 /*******************************************************************//**
@@ -631,32 +708,31 @@ Stores the fields in big_rec_vec to the tablespace and puts pointers to
 them in rec.  The extern flags in rec will have to be set beforehand.
 The fields are stored on pages allocated from leaf node
 file segment of the index tree.
-@return	DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
-UNIV_INTERN
+@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
 dberr_t
 btr_store_big_rec_extern_fields(
 /*============================*/
-	dict_index_t*	index,		/*!< in: index of rec; the index tree
-					MUST be X-latched */
-	buf_block_t*	rec_block,	/*!< in/out: block containing rec */
-	rec_t*		rec,		/*!< in/out: record */
-	const ulint*	offsets,	/*!< in: rec_get_offsets(rec, index);
-					the "external storage" flags in offsets
-					will not correspond to rec when
-					this function returns */
+	btr_pcur_t*	pcur,		/*!< in/out: a persistent cursor. if
+					btr_mtr is restarted, then this can
+					be repositioned. */
+	const upd_t*	upd,		/*!< in: update vector */
+	ulint*		offsets,	/*!< in/out: rec_get_offsets() on
+					pcur. the "external storage" flags
+					in offsets will correctly correspond
+					to rec when this function returns */
 	const big_rec_t*big_rec_vec,	/*!< in: vector containing fields
 					to be stored externally */
-	mtr_t*		btr_mtr,	/*!< in: mtr containing the
-					latches to the clustered index */
+	mtr_t*		btr_mtr,	/*!< in/out: mtr containing the
+					latches to the clustered index. can be
+					committed and restarted. */
 	enum blob_op	op)		/*! in: operation code */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 
 /*******************************************************************//**
 Frees the space in an externally stored field to the file space
 management if the field in data is owned the externally stored field,
 in a rollback we may have the additional condition that the field must
 not be inherited. */
-UNIV_INTERN
 void
 btr_free_externally_stored_field(
 /*=============================*/
@@ -677,69 +753,68 @@ btr_free_externally_stored_field(
 					to rec, or NULL if rec == NULL */
 	ulint		i,		/*!< in: field number of field_ref;
 					ignored if rec == NULL */
-	enum trx_rb_ctx	rb_ctx,		/*!< in: rollback context */
-	mtr_t*		local_mtr);	/*!< in: mtr containing the latch to
-					data an an X-latch to the index
-					tree */
-/*******************************************************************//**
-Copies the prefix of an externally stored field of a record.  The
-clustered index record must be protected by a lock or a page latch.
+	bool		rollback,	/*!< in: performing rollback? */
+	mtr_t*		local_mtr);	/*!< in: mtr containing the latch */
+/** Copies the prefix of an externally stored field of a record.
+The clustered index record must be protected by a lock or a page latch.
+@param[out]	buf		the field, or a prefix of it
+@param[in]	len		length of buf, in bytes
+@param[in]	page_size	BLOB page size
+@param[in]	data		'internally' stored part of the field
+containing also the reference to the external part; must be protected by
+a lock or a page latch
+@param[in]	local_len	length of data, in bytes
 @return the length of the copied field, or 0 if the column was being
 or has been deleted */
-UNIV_INTERN
 ulint
 btr_copy_externally_stored_field_prefix(
-/*====================================*/
-	byte*		buf,	/*!< out: the field, or a prefix of it */
-	ulint		len,	/*!< in: length of buf, in bytes */
-	ulint		zip_size,/*!< in: nonzero=compressed BLOB page size,
-				zero for uncompressed BLOBs */
-	const byte*	data,	/*!< in: 'internally' stored part of the
-				field containing also the reference to
-				the external part; must be protected by
-				a lock or a page latch */
-	ulint		local_len,/*!< in: length of data, in bytes */
-	trx_t*		trx);	/*!< in: transaction handle */
-/*******************************************************************//**
-Copies an externally stored field of a record to mem heap.  The
-clustered index record must be protected by a lock or a page latch.
+	byte*			buf,
+	ulint			len,
+	const page_size_t&	page_size,
+	const byte*		data,
+	ulint			local_len);
+
+/** Copies an externally stored field of a record to mem heap.
+The clustered index record must be protected by a lock or a page latch.
+@param[out]	len		length of the whole field
+@param[in]	data		'internally' stored part of the field
+containing also the reference to the external part; must be protected by
+a lock or a page latch
+@param[in]	page_size	BLOB page size
+@param[in]	local_len	length of data
+@param[in,out]	heap		mem heap
 @return the whole field copied to heap */
-UNIV_INTERN
 byte*
 btr_copy_externally_stored_field(
-/*=============================*/
-	ulint*		len,	/*!< out: length of the whole field */
-	const byte*	data,	/*!< in: 'internally' stored part of the
-				field containing also the reference to
-				the external part; must be protected by
-				a lock or a page latch */
-	ulint		zip_size,/*!< in: nonzero=compressed BLOB page size,
-				zero for uncompressed BLOBs */
-	ulint		local_len,/*!< in: length of data */
-	mem_heap_t*	heap,	/*!< in: mem heap */
-	trx_t*		trx);	/*!< in: transaction handle */
-/*******************************************************************//**
-Copies an externally stored field of a record to mem heap.
-@return	the field copied to heap, or NULL if the field is incomplete */
-UNIV_INTERN
+	ulint*			len,
+	const byte*		data,
+	const page_size_t&	page_size,
+	ulint			local_len,
+	mem_heap_t*		heap);
+
+/** Copies an externally stored field of a record to mem heap.
+@param[in]	rec		record in a clustered index; must be
+protected by a lock or a page latch
+@param[in]	offset		array returned by rec_get_offsets()
+@param[in]	page_size	BLOB page size
+@param[in]	no		field number
+@param[out]	len		length of the field
+@param[in,out]	heap		mem heap
+@return the field copied to heap, or NULL if the field is incomplete */
 byte*
 btr_rec_copy_externally_stored_field(
-/*=================================*/
-	const rec_t*	rec,	/*!< in: record in a clustered index;
-				must be protected by a lock or a page latch */
-	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
-	ulint		zip_size,/*!< in: nonzero=compressed BLOB page size,
-				zero for uncompressed BLOBs */
-	ulint		no,	/*!< in: field number */
-	ulint*		len,	/*!< out: length of the field */
-	mem_heap_t*	heap,	/*!< in: mem heap */
-	trx_t*		trx);	/*!< in: transaction handle */
+	const rec_t*		rec,
+	const ulint*		offsets,
+	const page_size_t&	page_size,
+	ulint			no,
+	ulint*			len,
+	mem_heap_t*		heap);
+
 /*******************************************************************//**
 Flags the data tuple fields that are marked as extern storage in the
 update vector.  We use this function to remember which fields we must
 mark as extern storage in a record inserted for an update.
-@return	number of flagged external columns */
-UNIV_INTERN
+@return number of flagged external columns */
 ulint
 btr_push_update_extern_fields(
 /*==========================*/
@@ -750,38 +825,74 @@ btr_push_update_extern_fields(
 /***********************************************************//**
 Sets a secondary index record's delete mark to the given value. This
 function is only used by the insert buffer merge mechanism. */
-UNIV_INTERN
 void
 btr_cur_set_deleted_flag_for_ibuf(
 /*==============================*/
 	rec_t*		rec,		/*!< in/out: record */
 	page_zip_des_t*	page_zip,	/*!< in/out: compressed page
 					corresponding to rec, or NULL
-					when the tablespace is
-					uncompressed */
+					when the tablespace is uncompressed */
 	ibool		val,		/*!< in: value to set */
 	mtr_t*		mtr);		/*!< in/out: mini-transaction */
+
+/******************************************************//**
+The following function is used to set the deleted bit of a record. */
+UNIV_INLINE
+void
+btr_rec_set_deleted_flag(
+/*=====================*/
+	rec_t*		rec,	/*!< in/out: physical record */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page (or NULL) */
+	ulint		flag);	/*!< in: nonzero if delete marked */
+
+/** Latches the leaf page or pages requested.
+@param[in]	block		leaf page where the search converged
+@param[in]	page_id		page id of the leaf
+@param[in]	latch_mode	BTR_SEARCH_LEAF, ...
+@param[in]	cursor		cursor
+@param[in]	mtr		mini-transaction
+@return	blocks and savepoints which actually latched. */
+btr_latch_leaves_t
+btr_cur_latch_leaves(
+	buf_block_t*		block,
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	ulint			latch_mode,
+	btr_cur_t*		cursor,
+	mtr_t*			mtr);
+
 /*######################################################################*/
 
 /** In the pessimistic delete, if the page data size drops below this
 limit, merging it to a neighbor is tried */
-#define BTR_CUR_PAGE_COMPRESS_LIMIT	(UNIV_PAGE_SIZE / 2)
+#define BTR_CUR_PAGE_COMPRESS_LIMIT(index) \
+	((UNIV_PAGE_SIZE * (ulint)((index)->merge_threshold)) / 100)
 
 /** A slot in the path array. We store here info on a search path down the
 tree. Each slot contains data on a single level of the tree. */
+struct btr_path_t {
+	/* Assume a page like:
+	records:             (inf, a, b, c, d, sup)
+	index of the record:    0, 1, 2, 3, 4, 5
+	*/
 
-struct btr_path_t{
-	ulint	nth_rec;	/*!< index of the record
-				where the page cursor stopped on
-				this level (index in alphabetical
-				order); value ULINT_UNDEFINED
-				denotes array end */
-	ulint	n_recs;		/*!< number of records on the page */
-	ulint	page_no;	/*!< no of the page containing the record */
-	ulint	page_level;	/*!< level of the page, if later we fetch
-				the page under page_no and it is no different
-				level then we know that the tree has been
-				reorganized */
+	/** Index of the record where the page cursor stopped on this level
+	(index in alphabetical order). Value ULINT_UNDEFINED denotes array
+	end. In the above example, if the search stopped on record 'c', then
+	nth_rec will be 3. */
+	ulint	nth_rec;
+
+	/** Number of the records on the page, not counting inf and sup.
+	In the above example n_recs will be 4. */
+	ulint	n_recs;
+
+	/** Number of the page containing the record. */
+	ulint	page_no;
+
+	/** Level of the page. If later we fetch the page under page_no
+	and it is no different level then we know that the tree has been
+	reorganized. */
+	ulint	page_level;
 };
 
 #define BTR_PATH_ARRAY_N_SLOTS	250	/*!< size of path array (in slots) */
@@ -858,7 +969,7 @@ struct btr_cur_t {
 					other search modes; see also the NOTE
 					in up_match! */
 	ulint		low_bytes;	/*!< number of matched bytes to the
-					right at the time cursor positioned;
+					left at the time cursor positioned;
 					only used internally in searches: not
 					defined after the search */
 	ulint		n_fields;	/*!< prefix length used in a hash
@@ -872,8 +983,22 @@ struct btr_cur_t {
 					rows in range, we store in this array
 					information of the path through
 					the tree */
+	rtr_info_t*	rtr_info;	/*!< rtree search info */
+	btr_cur_t():thr(NULL), rtr_info(NULL) {}
+					/* default values */
 };
 
+/******************************************************//**
+The following function is used to set the deleted bit of a record. */
+UNIV_INLINE
+void
+btr_rec_set_deleted_flag(
+/*=====================*/
+	rec_t*		rec,	/*!< in/out: physical record */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page (or NULL) */
+	ulint		flag);	/*!< in: nonzero if delete marked */
+
+
 /** If pessimistic delete fails because of lack of file space, there
 is still a good change of success a little later.  Try this many
 times. */
diff --git a/storage/innobase/include/btr0cur.ic b/storage/innobase/include/btr0cur.ic
index 43ee3304c0e..45c0d59a8aa 100644
--- a/storage/innobase/include/btr0cur.ic
+++ b/storage/innobase/include/btr0cur.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -39,7 +39,7 @@ if (btr_cur_limit_optimistic_insert_debug > 1\
 #ifdef UNIV_DEBUG
 /*********************************************************//**
 Returns the page cursor component of a tree cursor.
-@return	pointer to page cursor component */
+@return pointer to page cursor component */
 UNIV_INLINE
 page_cur_t*
 btr_cur_get_page_cur(
@@ -51,7 +51,7 @@ btr_cur_get_page_cur(
 
 /*********************************************************//**
 Returns the buffer block on which the tree cursor is positioned.
-@return	pointer to buffer block */
+@return pointer to buffer block */
 UNIV_INLINE
 buf_block_t*
 btr_cur_get_block(
@@ -63,7 +63,7 @@ btr_cur_get_block(
 
 /*********************************************************//**
 Returns the record pointer of a tree cursor.
-@return	pointer to record */
+@return pointer to record */
 UNIV_INLINE
 rec_t*
 btr_cur_get_rec(
@@ -76,7 +76,7 @@ btr_cur_get_rec(
 
 /*********************************************************//**
 Returns the compressed page on which the tree cursor is positioned.
-@return	pointer to compressed page, or NULL if the page is not compressed */
+@return pointer to compressed page, or NULL if the page is not compressed */
 UNIV_INLINE
 page_zip_des_t*
 btr_cur_get_page_zip(
@@ -86,20 +86,9 @@ btr_cur_get_page_zip(
 	return(buf_block_get_page_zip(btr_cur_get_block(cursor)));
 }
 
-/*********************************************************//**
-Invalidates a tree cursor by setting record pointer to NULL. */
-UNIV_INLINE
-void
-btr_cur_invalidate(
-/*===============*/
-	btr_cur_t*	cursor)	/*!< in: tree cursor */
-{
-	page_cur_invalidate(&(cursor->page_cur));
-}
-
 /*********************************************************//**
 Returns the page of a tree cursor.
-@return	pointer to page */
+@return pointer to page */
 UNIV_INLINE
 page_t*
 btr_cur_get_page(
@@ -130,7 +119,7 @@ btr_cur_position(
 /*********************************************************************//**
 Checks if compressing an index page where a btr cursor is placed makes
 sense.
-@return	TRUE if compression is recommended */
+@return TRUE if compression is recommended */
 UNIV_INLINE
 ibool
 btr_cur_compress_recommendation(
@@ -140,15 +129,17 @@ btr_cur_compress_recommendation(
 {
 	const page_t*	page;
 
-	ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
-				MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_is_block_fix(
+		mtr, btr_cur_get_block(cursor),
+		MTR_MEMO_PAGE_X_FIX, cursor->index->table));
 
 	page = btr_cur_get_page(cursor);
 
 	LIMIT_OPTIMISTIC_INSERT_DEBUG(page_get_n_recs(page) * 2,
 				      return(FALSE));
 
-	if ((page_get_data_size(page) < BTR_CUR_PAGE_COMPRESS_LIMIT)
+	if ((page_get_data_size(page)
+	     < BTR_CUR_PAGE_COMPRESS_LIMIT(cursor->index))
 	    || ((btr_page_get_next(page, mtr) == FIL_NULL)
 		&& (btr_page_get_prev(page, mtr) == FIL_NULL))) {
 
@@ -167,7 +158,7 @@ btr_cur_compress_recommendation(
 /*********************************************************************//**
 Checks if the record on which the cursor is placed can be deleted without
 making tree compression necessary (or, recommended).
-@return	TRUE if can be deleted without recommended compression */
+@return TRUE if can be deleted without recommended compression */
 UNIV_INLINE
 ibool
 btr_cur_can_delete_without_compress(
@@ -183,7 +174,8 @@ btr_cur_can_delete_without_compress(
 
 	page = btr_cur_get_page(cursor);
 
-	if ((page_get_data_size(page) - rec_size < BTR_CUR_PAGE_COMPRESS_LIMIT)
+	if ((page_get_data_size(page) - rec_size
+	     < BTR_CUR_PAGE_COMPRESS_LIMIT(cursor->index))
 	    || ((btr_page_get_next(page, mtr) == FIL_NULL)
 		&& (btr_page_get_prev(page, mtr) == FIL_NULL))
 	    || (page_get_n_recs(page) < 2)) {
@@ -211,6 +203,7 @@ btr_blob_op_is_update(
 {
 	switch (op) {
 	case BTR_STORE_INSERT:
+	case BTR_STORE_INSERT_BULK:
 		return(FALSE);
 	case BTR_STORE_INSERT_UPDATE:
 	case BTR_STORE_UPDATE:
@@ -220,4 +213,23 @@ btr_blob_op_is_update(
 	ut_ad(0);
 	return(FALSE);
 }
+
+/******************************************************//**
+The following function is used to set the deleted bit of a record. */
+UNIV_INLINE
+void
+btr_rec_set_deleted_flag(
+/*=====================*/
+	rec_t*		rec,	/*!< in/out: physical record */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page (or NULL) */
+	ulint		flag)	/*!< in: nonzero if delete marked */
+{
+	if (page_rec_is_comp(rec)) {
+		rec_set_deleted_flag_new(rec, page_zip, flag);
+	} else {
+		ut_ad(!page_zip);
+		rec_set_deleted_flag_old(rec, flag);
+	}
+}
+
 #endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/btr0pcur.h b/storage/innobase/include/btr0pcur.h
index dafe14ce556..02f4faf24a5 100644
--- a/storage/innobase/include/btr0pcur.h
+++ b/storage/innobase/include/btr0pcur.h
@@ -34,22 +34,24 @@ Created 2/23/1996 Heikki Tuuri
 #include "btr0cur.h"
 #include "btr0btr.h"
 #include "btr0types.h"
+#include "gis0rtree.h"
 
 /* Relative positions for a stored cursor position */
-#define BTR_PCUR_ON			1
-#define BTR_PCUR_BEFORE			2
-#define BTR_PCUR_AFTER			3
+enum btr_pcur_pos_t {
+	BTR_PCUR_ON		= 1,
+	BTR_PCUR_BEFORE		= 2,
+	BTR_PCUR_AFTER		= 3,
 /* Note that if the tree is not empty, btr_pcur_store_position does not
 use the following, but only uses the above three alternatives, where the
 position is stored relative to a specific record: this makes implementation
 of a scroll cursor easier */
-#define BTR_PCUR_BEFORE_FIRST_IN_TREE	4	/* in an empty tree */
-#define BTR_PCUR_AFTER_LAST_IN_TREE	5	/* in an empty tree */
+	BTR_PCUR_BEFORE_FIRST_IN_TREE	= 4,	/* in an empty tree */
+	BTR_PCUR_AFTER_LAST_IN_TREE	= 5	/* in an empty tree */
+};
 
 /**************************************************************//**
 Allocates memory for a persistent cursor object and initializes the cursor.
-@return	own: persistent cursor */
-UNIV_INTERN
+@return own: persistent cursor */
 btr_pcur_t*
 btr_pcur_create_for_mysql(void);
 /*============================*/
@@ -57,7 +59,6 @@ btr_pcur_create_for_mysql(void);
 /**************************************************************//**
 Resets a persistent cursor object, freeing ::old_rec_buf if it is
 allocated and resetting the other members to their initial values. */
-UNIV_INTERN
 void
 btr_pcur_reset(
 /*===========*/
@@ -65,14 +66,12 @@ btr_pcur_reset(
 
 /**************************************************************//**
 Frees the memory for a persistent cursor object. */
-UNIV_INTERN
 void
 btr_pcur_free_for_mysql(
 /*====================*/
 	btr_pcur_t*	cursor);	/*!< in, own: persistent cursor */
 /**************************************************************//**
 Copies the stored position of a pcur to another pcur. */
-UNIV_INTERN
 void
 btr_pcur_copy_stored_position(
 /*==========================*/
@@ -87,6 +86,14 @@ void
 btr_pcur_init(
 /*==========*/
 	btr_pcur_t*	pcur);	/*!< in: persistent cursor */
+
+/** Free old_rec_buf.
+@param[in]	pcur	Persistent cursor holding old_rec to be freed. */
+UNIV_INLINE
+void
+btr_pcur_free(
+	btr_pcur_t*	pcur);
+
 /**************************************************************//**
 Initializes and opens a persistent cursor to an index tree. It should be
 closed with btr_pcur_close. */
@@ -97,7 +104,7 @@ btr_pcur_open_low(
 	dict_index_t*	index,	/*!< in: index */
 	ulint		level,	/*!< in: level in the btree */
 	const dtuple_t*	tuple,	/*!< in: tuple on which search done */
-	ulint		mode,	/*!< in: PAGE_CUR_L, ...;
+	page_cur_mode_t	mode,	/*!< in: PAGE_CUR_L, ...;
 				NOTE that if the search is made using a unique
 				prefix of a record, mode should be
 				PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
@@ -119,7 +126,7 @@ btr_pcur_open_with_no_init_func(
 /*============================*/
 	dict_index_t*	index,	/*!< in: index */
 	const dtuple_t*	tuple,	/*!< in: tuple on which search done */
-	ulint		mode,	/*!< in: PAGE_CUR_L, ...;
+	page_cur_mode_t	mode,	/*!< in: PAGE_CUR_L, ...;
 				NOTE that if the search is made using a unique
 				prefix of a record, mode should be
 				PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
@@ -131,8 +138,9 @@ btr_pcur_open_with_no_init_func(
 				page, but assume that the caller uses his
 				btr search latch to protect the record! */
 	btr_pcur_t*	cursor, /*!< in: memory buffer for persistent cursor */
-	ulint		has_search_latch,/*!< in: latch mode the caller
-				currently has on btr_search_latch:
+	ulint		has_search_latch,
+				/*!< in: latch mode the caller
+				currently has on search system:
 				RW_S_LATCH, or 0 */
 	const char*	file,	/*!< in: file name */
 	ulint		line,	/*!< in: line where called */
@@ -181,13 +189,12 @@ PAGE_CUR_LE, on the last user record. If no such user record exists, then
 in the first case sets the cursor after last in tree, and in the latter case
 before first in tree. The latching mode must be BTR_SEARCH_LEAF or
 BTR_MODIFY_LEAF. */
-UNIV_INTERN
 void
 btr_pcur_open_on_user_rec_func(
 /*===========================*/
 	dict_index_t*	index,		/*!< in: index */
 	const dtuple_t*	tuple,		/*!< in: tuple on which search done */
-	ulint		mode,		/*!< in: PAGE_CUR_L, ... */
+	page_cur_mode_t	mode,		/*!< in: PAGE_CUR_L, ... */
 	ulint		latch_mode,	/*!< in: BTR_SEARCH_LEAF or
 					BTR_MODIFY_LEAF */
 	btr_pcur_t*	cursor,		/*!< in: memory buffer for persistent
@@ -198,9 +205,11 @@ btr_pcur_open_on_user_rec_func(
 #define btr_pcur_open_on_user_rec(i,t,md,l,c,m)				\
 	btr_pcur_open_on_user_rec_func(i,t,md,l,c,__FILE__,__LINE__,m)
 /**********************************************************************//**
-Positions a cursor at a randomly chosen position within a B-tree. */
+Positions a cursor at a randomly chosen position within a B-tree.
+@return true if the index is available and we have put the cursor, false
+if the index is unavailable */
 UNIV_INLINE
-void
+bool
 btr_pcur_open_at_rnd_pos_func(
 /*==========================*/
 	dict_index_t*	index,		/*!< in: index */
@@ -235,7 +244,6 @@ cursor data structure, or just setting a flag if the cursor id before the
 first in an EMPTY tree, or after the last in an EMPTY tree. NOTE that the
 page where the cursor is positioned must not be empty if the index tree is
 not totally empty! */
-UNIV_INTERN
 void
 btr_pcur_store_position(
 /*====================*/
@@ -256,7 +264,6 @@ restores to before first or after the last in the tree.
 @return TRUE if the cursor position was stored when it was on a user
 record and it can be restored on a user record whose ordering fields
 are identical to the ones of the original user record */
-UNIV_INTERN
 ibool
 btr_pcur_restore_position_func(
 /*===========================*/
@@ -269,7 +276,7 @@ btr_pcur_restore_position_func(
 	btr_pcur_restore_position_func(l,cur,__FILE__,__LINE__,mtr)
 /*********************************************************//**
 Gets the rel_pos field for a cursor whose position has been stored.
-@return	BTR_PCUR_ON, ... */
+@return BTR_PCUR_ON, ... */
 UNIV_INLINE
 ulint
 btr_pcur_get_rel_pos(
@@ -289,7 +296,7 @@ btr_pcur_commit_specify_mtr(
 /*********************************************************//**
 Moves the persistent cursor to the next record in the tree. If no records are
 left, the cursor stays 'after last in tree'.
-@return	TRUE if the cursor was not after last in tree */
+@return TRUE if the cursor was not after last in tree */
 UNIV_INLINE
 ibool
 btr_pcur_move_to_next(
@@ -300,8 +307,7 @@ btr_pcur_move_to_next(
 /*********************************************************//**
 Moves the persistent cursor to the previous record in the tree. If no records
 are left, the cursor stays 'before first in tree'.
-@return	TRUE if the cursor was not before first in tree */
-UNIV_INTERN
+@return TRUE if the cursor was not before first in tree */
 ibool
 btr_pcur_move_to_prev(
 /*==================*/
@@ -319,7 +325,7 @@ btr_pcur_move_to_last_on_page(
 /*********************************************************//**
 Moves the persistent cursor to the next user record in the tree. If no user
 records are left, the cursor ends up 'after last in tree'.
-@return	TRUE if the cursor moved forward, ending on a user record */
+@return TRUE if the cursor moved forward, ending on a user record */
 UNIV_INLINE
 ibool
 btr_pcur_move_to_next_user_rec(
@@ -332,7 +338,6 @@ Moves the persistent cursor to the first record on the next page.
 Releases the latch on the current page, and bufferunfixes it.
 Note that there must not be modifications on the current page,
 as then the x-latch can be released only in mtr_commit. */
-UNIV_INTERN
 void
 btr_pcur_move_to_next_page(
 /*=======================*/
@@ -349,7 +354,6 @@ The alphabetical position of the cursor is guaranteed to be sensible
 on return, but it may happen that the cursor is not positioned on the
 last record of any page, because the structure of the tree may have
 changed while the cursor had no latches. */
-UNIV_INTERN
 void
 btr_pcur_move_backward_from_page(
 /*=============================*/
@@ -359,7 +363,7 @@ btr_pcur_move_backward_from_page(
 #ifdef UNIV_DEBUG
 /*********************************************************//**
 Returns the btr cursor component of a persistent cursor.
-@return	pointer to btr cursor component */
+@return pointer to btr cursor component */
 UNIV_INLINE
 btr_cur_t*
 btr_pcur_get_btr_cur(
@@ -367,7 +371,7 @@ btr_pcur_get_btr_cur(
 	const btr_pcur_t*	cursor);	/*!< in: persistent cursor */
 /*********************************************************//**
 Returns the page cursor component of a persistent cursor.
-@return	pointer to page cursor component */
+@return pointer to page cursor component */
 UNIV_INLINE
 page_cur_t*
 btr_pcur_get_page_cur(
@@ -375,7 +379,7 @@ btr_pcur_get_page_cur(
 	const btr_pcur_t*	cursor);	/*!< in: persistent cursor */
 /*********************************************************//**
 Returns the page of a persistent cursor.
-@return	pointer to the page */
+@return pointer to the page */
 UNIV_INLINE
 page_t*
 btr_pcur_get_page(
@@ -383,7 +387,7 @@ btr_pcur_get_page(
 	const btr_pcur_t*	cursor);/*!< in: persistent cursor */
 /*********************************************************//**
 Returns the buffer block of a persistent cursor.
-@return	pointer to the block */
+@return pointer to the block */
 UNIV_INLINE
 buf_block_t*
 btr_pcur_get_block(
@@ -391,7 +395,7 @@ btr_pcur_get_block(
 	const btr_pcur_t*	cursor);/*!< in: persistent cursor */
 /*********************************************************//**
 Returns the record of a persistent cursor.
-@return	pointer to the record */
+@return pointer to the record */
 UNIV_INLINE
 rec_t*
 btr_pcur_get_rec(
@@ -493,53 +497,53 @@ enum pcur_pos_t {
 selects, updates, and deletes. */
 
 struct btr_pcur_t{
-	btr_cur_t	btr_cur;	/*!< a B-tree cursor */
-	ulint		latch_mode;	/*!< see TODO note below!
-					BTR_SEARCH_LEAF, BTR_MODIFY_LEAF,
-					BTR_MODIFY_TREE, or BTR_NO_LATCHES,
-					depending on the latching state of
-					the page and tree where the cursor is
-					positioned; BTR_NO_LATCHES means that
-					the cursor is not currently positioned:
-					we say then that the cursor is
-					detached; it can be restored to
-					attached if the old position was
-					stored in old_rec */
-	ulint		old_stored;	/*!< BTR_PCUR_OLD_STORED
-					or BTR_PCUR_OLD_NOT_STORED */
-	rec_t*		old_rec;	/*!< if cursor position is stored,
-					contains an initial segment of the
-					latest record cursor was positioned
-					either on, before, or after */
-	ulint		old_n_fields;	/*!< number of fields in old_rec */
-	ulint		rel_pos;	/*!< BTR_PCUR_ON, BTR_PCUR_BEFORE, or
-					BTR_PCUR_AFTER, depending on whether
-					cursor was on, before, or after the
-					old_rec record */
-	buf_block_t*	block_when_stored;/* buffer block when the position was
-					stored */
-	ib_uint64_t	modify_clock;	/*!< the modify clock value of the
-					buffer block when the cursor position
-					was stored */
-	enum pcur_pos_t	pos_state;	/*!< btr_pcur_store_position() and
-					btr_pcur_restore_position() state. */
-	ulint		search_mode;	/*!< PAGE_CUR_G, ... */
-	trx_t*		trx_if_known;	/*!< the transaction, if we know it;
-					otherwise this field is not defined;
-					can ONLY BE USED in error prints in
-					fatal assertion failures! */
+	/** a B-tree cursor */
+	btr_cur_t	btr_cur;
+	/** see TODO note below!
+	BTR_SEARCH_LEAF, BTR_MODIFY_LEAF, BTR_MODIFY_TREE or BTR_NO_LATCHES,
+	depending on the latching state of the page and tree where the cursor
+	is positioned; BTR_NO_LATCHES means that the cursor is not currently
+	positioned:
+	we say then that the cursor is detached; it can be restored to
+	attached if the old position was stored in old_rec */
+	ulint		latch_mode;
+	/** true if old_rec is stored */
+	bool		old_stored;
+	/** if cursor position is stored, contains an initial segment of the
+	latest record cursor was positioned either on, before or after */
+	rec_t*		old_rec;
+	/** number of fields in old_rec */
+	ulint		old_n_fields;
+	/** BTR_PCUR_ON, BTR_PCUR_BEFORE, or BTR_PCUR_AFTER, depending on
+	whether cursor was on, before, or after the old_rec record */
+	enum btr_pcur_pos_t	rel_pos;
+	/** buffer block when the position was stored */
+	buf_block_t*	block_when_stored;
+	/** the modify clock value of the buffer block when the cursor position
+	was stored */
+	ib_uint64_t	modify_clock;
+	/** the withdraw clock value of the buffer pool when the cursor
+	position was stored */
+	ulint		withdraw_clock;
+	/** btr_pcur_store_position() and btr_pcur_restore_position() state. */
+	enum pcur_pos_t	pos_state;
+	/** PAGE_CUR_G, ... */
+	page_cur_mode_t	search_mode;
+	/** the transaction, if we know it; otherwise this field is not defined;
+	can ONLY BE USED in error prints in fatal assertion failures! */
+	trx_t*		trx_if_known;
 	/*-----------------------------*/
 	/* NOTE that the following fields may possess dynamically allocated
 	memory which should be freed if not needed anymore! */
 
-	byte*		old_rec_buf;	/*!< NULL, or a dynamically allocated
-					buffer for old_rec */
-	ulint		buf_size;	/*!< old_rec_buf size if old_rec_buf
-					is not NULL */
-};
+	/** NULL, or a dynamically allocated buffer for old_rec */
+	byte*		old_rec_buf;
+	/** old_rec_buf size if old_rec_buf is not NULL */
+	ulint		buf_size;
 
-#define BTR_PCUR_OLD_STORED	908467085
-#define BTR_PCUR_OLD_NOT_STORED	122766467
+	/** Return the index of this persistent cursor */
+	dict_index_t*	index() const { return(btr_cur.index); }
+};
 
 #ifndef UNIV_NONINL
 #include "btr0pcur.ic"
diff --git a/storage/innobase/include/btr0pcur.ic b/storage/innobase/include/btr0pcur.ic
index 1cd13824542..6cd968b4682 100644
--- a/storage/innobase/include/btr0pcur.ic
+++ b/storage/innobase/include/btr0pcur.ic
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2015, 2016, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -26,7 +27,7 @@ Created 2/23/1996 Heikki Tuuri
 
 /*********************************************************//**
 Gets the rel_pos field for a cursor whose position has been stored.
-@return	BTR_PCUR_ON, ... */
+@return BTR_PCUR_ON, ... */
 UNIV_INLINE
 ulint
 btr_pcur_get_rel_pos(
@@ -35,7 +36,7 @@ btr_pcur_get_rel_pos(
 {
 	ut_ad(cursor);
 	ut_ad(cursor->old_rec);
-	ut_ad(cursor->old_stored == BTR_PCUR_OLD_STORED);
+	ut_ad(cursor->old_stored);
 	ut_ad(cursor->pos_state == BTR_PCUR_WAS_POSITIONED
 	      || cursor->pos_state == BTR_PCUR_IS_POSITIONED);
 
@@ -45,7 +46,7 @@ btr_pcur_get_rel_pos(
 #ifdef UNIV_DEBUG
 /*********************************************************//**
 Returns the btr cursor component of a persistent cursor.
-@return	pointer to btr cursor component */
+@return pointer to btr cursor component */
 UNIV_INLINE
 btr_cur_t*
 btr_pcur_get_btr_cur(
@@ -58,7 +59,7 @@ btr_pcur_get_btr_cur(
 
 /*********************************************************//**
 Returns the page cursor component of a persistent cursor.
-@return	pointer to page cursor component */
+@return pointer to page cursor component */
 UNIV_INLINE
 page_cur_t*
 btr_pcur_get_page_cur(
@@ -70,7 +71,7 @@ btr_pcur_get_page_cur(
 
 /*********************************************************//**
 Returns the page of a persistent cursor.
-@return	pointer to the page */
+@return pointer to the page */
 UNIV_INLINE
 page_t*
 btr_pcur_get_page(
@@ -84,7 +85,7 @@ btr_pcur_get_page(
 
 /*********************************************************//**
 Returns the buffer block of a persistent cursor.
-@return	pointer to the block */
+@return pointer to the block */
 UNIV_INLINE
 buf_block_t*
 btr_pcur_get_block(
@@ -98,7 +99,7 @@ btr_pcur_get_block(
 
 /*********************************************************//**
 Returns the record of a persistent cursor.
-@return	pointer to the record */
+@return pointer to the record */
 UNIV_INLINE
 rec_t*
 btr_pcur_get_rec(
@@ -260,7 +261,7 @@ btr_pcur_move_to_next_on_page(
 
 	page_cur_move_to_next(btr_pcur_get_page_cur(cursor));
 
-	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+	cursor->old_stored = false;
 }
 
 /*********************************************************//**
@@ -276,7 +277,7 @@ btr_pcur_move_to_prev_on_page(
 
 	page_cur_move_to_prev(btr_pcur_get_page_cur(cursor));
 
-	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+	cursor->old_stored = false;
 }
 
 /*********************************************************//**
@@ -294,13 +295,13 @@ btr_pcur_move_to_last_on_page(
 	page_cur_set_after_last(btr_pcur_get_block(cursor),
 				btr_pcur_get_page_cur(cursor));
 
-	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+	cursor->old_stored = false;
 }
 
 /*********************************************************//**
 Moves the persistent cursor to the next user record in the tree. If no user
 records are left, the cursor ends up 'after last in tree'.
-@return	TRUE if the cursor moved forward, ending on a user record */
+@return TRUE if the cursor moved forward, ending on a user record */
 UNIV_INLINE
 ibool
 btr_pcur_move_to_next_user_rec(
@@ -311,7 +312,7 @@ btr_pcur_move_to_next_user_rec(
 {
 	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
 	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
-	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+	cursor->old_stored = false;
 loop:
 	if (btr_pcur_is_after_last_on_page(cursor)) {
 
@@ -336,7 +337,7 @@ loop:
 /*********************************************************//**
 Moves the persistent cursor to the next record in the tree. If no records are
 left, the cursor stays 'after last in tree'.
-@return	TRUE if the cursor was not after last in tree */
+@return TRUE if the cursor was not after last in tree */
 UNIV_INLINE
 ibool
 btr_pcur_move_to_next(
@@ -348,7 +349,7 @@ btr_pcur_move_to_next(
 	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
 	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
 
-	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+	cursor->old_stored = false;
 
 	if (btr_pcur_is_after_last_on_page(cursor)) {
 
@@ -396,9 +397,21 @@ btr_pcur_init(
 /*==========*/
 	btr_pcur_t*	pcur)	/*!< in: persistent cursor */
 {
-	pcur->old_stored = BTR_PCUR_OLD_NOT_STORED;
+	pcur->old_stored = false;
 	pcur->old_rec_buf = NULL;
 	pcur->old_rec = NULL;
+
+	pcur->btr_cur.rtr_info = NULL;
+}
+
+/** Free old_rec_buf.
+@param[in]	pcur	Persistent cursor holding old_rec to be freed. */
+UNIV_INLINE
+void
+btr_pcur_free(
+	btr_pcur_t*	pcur)
+{
+	ut_free(pcur->old_rec_buf);
 }
 
 /**************************************************************//**
@@ -411,7 +424,7 @@ btr_pcur_open_low(
 	dict_index_t*	index,	/*!< in: index */
 	ulint		level,	/*!< in: level in the btree */
 	const dtuple_t*	tuple,	/*!< in: tuple on which search done */
-	ulint		mode,	/*!< in: PAGE_CUR_L, ...;
+	page_cur_mode_t	mode,	/*!< in: PAGE_CUR_L, ...;
 				NOTE that if the search is made using a unique
 				prefix of a record, mode should be
 				PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
@@ -424,6 +437,7 @@ btr_pcur_open_low(
 	mtr_t*		mtr)	/*!< in: mtr */
 {
 	btr_cur_t*	btr_cursor;
+	dberr_t err = DB_SUCCESS;
 
 	/* Initialize the cursor */
 
@@ -436,8 +450,33 @@ btr_pcur_open_low(
 
 	btr_cursor = btr_pcur_get_btr_cur(cursor);
 
-	btr_cur_search_to_nth_level(index, level, tuple, mode, latch_mode,
-				    btr_cursor, 0, file, line, mtr);
+	ut_ad(!dict_index_is_spatial(index));
+
+	if (dict_table_is_intrinsic(index->table)) {
+		ut_ad((latch_mode & BTR_MODIFY_LEAF)
+		      || (latch_mode & BTR_SEARCH_LEAF)
+		      || (latch_mode & BTR_MODIFY_TREE));
+		err = btr_cur_search_to_nth_level_with_no_latch(
+			index, level, tuple, mode, btr_cursor,
+			file, line, mtr,
+			(((latch_mode & BTR_MODIFY_LEAF)
+			 || (latch_mode & BTR_MODIFY_TREE)) ? true : false));
+	} else {
+		err = btr_cur_search_to_nth_level(
+			index, level, tuple, mode, latch_mode,
+			btr_cursor, 0, file, line, mtr);
+	}
+
+	if (err != DB_SUCCESS) {
+		ib::warn() << " Error code: " << err
+			   << " btr_pcur_open_low "
+			   << " level: " << level
+			   << " called from file: "
+			   << file << " line: " << line
+			   << " table: " << index->table->name
+			   << " index: " << index->name;
+	}
+
 	cursor->pos_state = BTR_PCUR_IS_POSITIONED;
 
 	cursor->trx_if_known = NULL;
@@ -452,7 +491,7 @@ btr_pcur_open_with_no_init_func(
 /*============================*/
 	dict_index_t*	index,	/*!< in: index */
 	const dtuple_t*	tuple,	/*!< in: tuple on which search done */
-	ulint		mode,	/*!< in: PAGE_CUR_L, ...;
+	page_cur_mode_t	mode,	/*!< in: PAGE_CUR_L, ...;
 				NOTE that if the search is made using a unique
 				prefix of a record, mode should be
 				PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
@@ -464,8 +503,9 @@ btr_pcur_open_with_no_init_func(
 				page, but assume that the caller uses his
 				btr search latch to protect the record! */
 	btr_pcur_t*	cursor, /*!< in: memory buffer for persistent cursor */
-	ulint		has_search_latch,/*!< in: latch mode the caller
-				currently has on btr_search_latch:
+	ulint		has_search_latch,
+				/*!< in: latch mode the caller
+				currently has on search system:
 				RW_S_LATCH, or 0 */
 	const char*	file,	/*!< in: file name */
 	ulint		line,	/*!< in: line where called */
@@ -474,19 +514,29 @@ btr_pcur_open_with_no_init_func(
 	btr_cur_t*	btr_cursor;
 	dberr_t		err = DB_SUCCESS;
 
-	cursor->latch_mode = latch_mode;
+	cursor->latch_mode = BTR_LATCH_MODE_WITHOUT_INTENTION(latch_mode);
 	cursor->search_mode = mode;
 
 	/* Search with the tree cursor */
 
 	btr_cursor = btr_pcur_get_btr_cur(cursor);
 
-	err = btr_cur_search_to_nth_level(index, 0, tuple, mode, latch_mode,
-				    	  btr_cursor, has_search_latch,
-				    	  file, line, mtr);
+	if (dict_table_is_intrinsic(index->table)) {
+		ut_ad((latch_mode & BTR_MODIFY_LEAF)
+		      || (latch_mode & BTR_SEARCH_LEAF));
+		err = btr_cur_search_to_nth_level_with_no_latch(
+			index, 0, tuple, mode, btr_cursor,
+			file, line, mtr,
+			((latch_mode & BTR_MODIFY_LEAF) ? true : false));
+	} else {
+		err = btr_cur_search_to_nth_level(
+			index, 0, tuple, mode, latch_mode, btr_cursor,
+			has_search_latch, file, line, mtr);
+	}
+
 	cursor->pos_state = BTR_PCUR_IS_POSITIONED;
 
-	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+	cursor->old_stored = false;
 
 	cursor->trx_if_known = NULL;
 	return err;
@@ -518,11 +568,18 @@ btr_pcur_open_at_index_side(
 		btr_pcur_init(pcur);
 	}
 
-	err = btr_cur_open_at_index_side(from_left, index, latch_mode,
-				   	 btr_pcur_get_btr_cur(pcur), level, mtr);
+	if (dict_table_is_intrinsic(index->table)) {
+		err = btr_cur_open_at_index_side_with_no_latch(
+			from_left, index,
+			btr_pcur_get_btr_cur(pcur), level, mtr);
+	} else {
+		err = btr_cur_open_at_index_side(
+			from_left, index, latch_mode,
+			btr_pcur_get_btr_cur(pcur), level, mtr);
+	}
 	pcur->pos_state = BTR_PCUR_IS_POSITIONED;
 
-	pcur->old_stored = BTR_PCUR_OLD_NOT_STORED;
+	pcur->old_stored = false;
 
 	pcur->trx_if_known = NULL;
 
@@ -530,9 +587,11 @@ btr_pcur_open_at_index_side(
 }
 
 /**********************************************************************//**
-Positions a cursor at a randomly chosen position within a B-tree. */
+Positions a cursor at a randomly chosen position within a B-tree.
+@return true if the index is available and we have put the cursor, false
+if the index is unavailable */
 UNIV_INLINE
-void
+bool
 btr_pcur_open_at_rnd_pos_func(
 /*==========================*/
 	dict_index_t*	index,		/*!< in: index */
@@ -549,13 +608,17 @@ btr_pcur_open_at_rnd_pos_func(
 
 	btr_pcur_init(cursor);
 
-	btr_cur_open_at_rnd_pos_func(index, latch_mode,
-				     btr_pcur_get_btr_cur(cursor),
-				     file, line, mtr);
+	bool	available;
+
+	available = btr_cur_open_at_rnd_pos_func(index, latch_mode,
+						 btr_pcur_get_btr_cur(cursor),
+						 file, line, mtr);
 	cursor->pos_state = BTR_PCUR_IS_POSITIONED;
-	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+	cursor->old_stored = false;
 
 	cursor->trx_if_known = NULL;
+
+	return(available);
 }
 
 /**************************************************************//**
@@ -576,18 +639,20 @@ btr_pcur_close(
 /*===========*/
 	btr_pcur_t*	cursor)	/*!< in: persistent cursor */
 {
-	if (cursor->old_rec_buf != NULL) {
+	ut_free(cursor->old_rec_buf);
 
-		mem_free(cursor->old_rec_buf);
-
-		cursor->old_rec = NULL;
-		cursor->old_rec_buf = NULL;
+	if (cursor->btr_cur.rtr_info) {
+		rtr_clean_rtr_info(cursor->btr_cur.rtr_info, true);
+		cursor->btr_cur.rtr_info = NULL;
 	}
 
+	cursor->old_rec = NULL;
+	cursor->old_rec_buf = NULL;
 	cursor->btr_cur.page_cur.rec = NULL;
 	cursor->btr_cur.page_cur.block = NULL;
+
 	cursor->old_rec = NULL;
-	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+	cursor->old_stored = false;
 
 	cursor->latch_mode = BTR_NO_LATCHES;
 	cursor->pos_state = BTR_PCUR_NOT_POSITIONED;
@@ -608,5 +673,5 @@ btr_pcur_move_before_first_on_page(
 	page_cur_set_before_first(btr_pcur_get_block(cursor),
 		btr_pcur_get_page_cur(cursor));
 
-	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+	cursor->old_stored = false;
 }
diff --git a/storage/innobase/include/btr0sea.h b/storage/innobase/include/btr0sea.h
index e25e2a4d49c..12659037904 100644
--- a/storage/innobase/include/btr0sea.h
+++ b/storage/innobase/include/btr0sea.h
@@ -34,59 +34,54 @@ Created 2/17/1996 Heikki Tuuri
 #include "mtr0mtr.h"
 #include "ha0ha.h"
 
-/*****************************************************************//**
-Creates and initializes the adaptive search system at a database start. */
-UNIV_INTERN
+/** Creates and initializes the adaptive search system at a database start.
+@param[in]	hash_size	hash table size. */
 void
-btr_search_sys_create(
-/*==================*/
-	ulint	hash_size);	/*!< in: hash index hash table size */
-/*****************************************************************//**
-Frees the adaptive search system at a database shutdown. */
-UNIV_INTERN
-void
-btr_search_sys_free(void);
-/*=====================*/
+btr_search_sys_create(ulint hash_size);
 
-/********************************************************************//**
-Disable the adaptive hash search system and empty the index. */
-UNIV_INTERN
+/** Resize hash index hash table.
+@param[in]	hash_size	hash index hash table size */
 void
-btr_search_disable(void);
-/*====================*/
-/********************************************************************//**
-Enable the adaptive hash search system. */
-UNIV_INTERN
+btr_search_sys_resize(ulint hash_size);
+
+/** Frees the adaptive search system at a database shutdown. */
 void
-btr_search_enable(void);
-/*====================*/
+btr_search_sys_free();
+
+/** Disable the adaptive hash search system and empty the index.
+@param  need_mutex      need to acquire dict_sys->mutex */
+void
+btr_search_disable(
+	bool	need_mutex);
+/** Enable the adaptive hash search system. */
+void
+btr_search_enable();
 
 /********************************************************************//**
 Returns search info for an index.
-@return	search info; search mutex reserved */
+@return search info; search mutex reserved */
 UNIV_INLINE
 btr_search_t*
 btr_search_get_info(
 /*================*/
 	dict_index_t*	index)	/*!< in: index */
 	MY_ATTRIBUTE((nonnull));
-/*****************************************************************//**
-Creates and initializes a search info struct.
-@return	own: search info struct */
-UNIV_INTERN
+
+/** Creates and initializes a search info struct.
+@param[in]	heap		heap where created.
+@return own: search info struct */
 btr_search_t*
-btr_search_info_create(
-/*===================*/
-	mem_heap_t*	heap);	/*!< in: heap where created */
-/*****************************************************************//**
-Returns the value of ref_count. The value is protected by
-btr_search_latch.
-@return	ref_count value. */
-UNIV_INTERN
+btr_search_info_create(mem_heap_t* heap);
+
+/** Returns the value of ref_count. The value is protected by latch.
+@param[in]	info		search info
+@param[in]	index		index identifier
+@return ref_count value. */
 ulint
 btr_search_info_get_ref_count(
-/*==========================*/
-	btr_search_t*   info);	/*!< in: search info. */
+	btr_search_t*	info,
+	dict_index_t*	index);
+
 /*********************************************************************//**
 Updates the search info. */
 UNIV_INLINE
@@ -95,108 +90,180 @@ btr_search_info_update(
 /*===================*/
 	dict_index_t*	index,	/*!< in: index of the cursor */
 	btr_cur_t*	cursor);/*!< in: cursor which was just positioned */
-/******************************************************************//**
-Tries to guess the right search position based on the hash search info
+
+/** Tries to guess the right search position based on the hash search info
 of the index. Note that if mode is PAGE_CUR_LE, which is used in inserts,
 and the function returns TRUE, then cursor->up_match and cursor->low_match
 both have sensible values.
-@return	TRUE if succeeded */
-UNIV_INTERN
+@param[in,out]	index		index
+@param[in,out]	info		index search info
+@param[in]	tuple		logical record
+@param[in]	mode		PAGE_CUR_L, ....
+@param[in]	latch_mode	BTR_SEARCH_LEAF, ...;
+				NOTE that only if has_search_latch is 0, we will
+				have a latch set on the cursor page, otherwise
+				we assume the caller uses his search latch
+				to protect the record!
+@param[out]	cursor		tree cursor
+@param[in]	has_search_latch
+				latch mode the caller currently has on
+				search system: RW_S/X_LATCH or 0
+@param[in]	mtr		mini transaction
+@return TRUE if succeeded */
 ibool
 btr_search_guess_on_hash(
-/*=====================*/
-	dict_index_t*	index,		/*!< in: index */
-	btr_search_t*	info,		/*!< in: index search info */
-	const dtuple_t*	tuple,		/*!< in: logical record */
-	ulint		mode,		/*!< in: PAGE_CUR_L, ... */
-	ulint		latch_mode,	/*!< in: BTR_SEARCH_LEAF, ... */
-	btr_cur_t*	cursor,		/*!< out: tree cursor */
-	ulint		has_search_latch,/*!< in: latch mode the caller
-					currently has on btr_search_latch:
-					RW_S_LATCH, RW_X_LATCH, or 0 */
-	mtr_t*		mtr);		/*!< in: mtr */
-/********************************************************************//**
-Moves or deletes hash entries for moved records. If new_page is already hashed,
-then the hash index for page, if any, is dropped. If new_page is not hashed,
-and page is hashed, then a new hash index is built to new_page with the same
-parameters as page (this often happens when a page is split). */
-UNIV_INTERN
+	dict_index_t*	index,
+	btr_search_t*	info,
+	const dtuple_t*	tuple,
+	ulint		mode,
+	ulint		latch_mode,
+	btr_cur_t*	cursor,
+	ulint		has_search_latch,
+	mtr_t*		mtr);
+
+/** Moves or deletes hash entries for moved records. If new_page is already
+hashed, then the hash index for page, if any, is dropped. If new_page is not
+hashed, and page is hashed, then a new hash index is built to new_page with the
+same parameters as page (this often happens when a page is split).
+@param[in,out]	new_block	records are copied to this page.
+@param[in,out]	block		index page from which record are copied, and the
+				copied records will be deleted from this page.
+@param[in,out]	index		record descriptor */
 void
 btr_search_move_or_delete_hash_entries(
-/*===================================*/
-	buf_block_t*	new_block,	/*!< in: records are copied
-					to this page */
-	buf_block_t*	block,		/*!< in: index page from which
-					records were copied, and the
-					copied records will be deleted
-					from this page */
-	dict_index_t*	index);		/*!< in: record descriptor */
-/********************************************************************//**
-Drops a page hash index. */
-UNIV_INTERN
+	buf_block_t*	new_block,
+	buf_block_t*	block,
+	dict_index_t*	index);
+
+/** Drop any adaptive hash index entries that point to an index page.
+@param[in,out]	block	block containing index page, s- or x-latched, or an
+			index page for which we know that
+			block->buf_fix_count == 0 or it is an index page which
+			has already been removed from the buf_pool->page_hash
+			i.e.: it is in state BUF_BLOCK_REMOVE_HASH */
 void
-btr_search_drop_page_hash_index(
-/*============================*/
-	buf_block_t*	block);	/*!< in: block containing index page,
-				s- or x-latched, or an index page
-				for which we know that
-				block->buf_fix_count == 0 */
-/********************************************************************//**
-Drops a possible page hash index when a page is evicted from the buffer pool
-or freed in a file segment. */
-UNIV_INTERN
+btr_search_drop_page_hash_index(buf_block_t* block);
+
+/** Drop any adaptive hash index entries that may point to an index
+page that may be in the buffer pool, when a page is evicted from the
+buffer pool or freed in a file segment.
+@param[in]	page_id		page id
+@param[in]	page_size	page size */
 void
 btr_search_drop_page_hash_when_freed(
-/*=================================*/
-	ulint	space,		/*!< in: space id */
-	ulint	zip_size,	/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	ulint	page_no);	/*!< in: page number */
-/********************************************************************//**
-Updates the page hash index when a single record is inserted on a page. */
-UNIV_INTERN
+	const page_id_t&	page_id,
+	const page_size_t&	page_size);
+
+/** Updates the page hash index when a single record is inserted on a page.
+@param[in]	cursor	cursor which was positioned to the place to insert
+			using btr_cur_search_, and the new record has been
+			inserted next to the cursor. */
 void
-btr_search_update_hash_node_on_insert(
-/*==================================*/
-	btr_cur_t*	cursor);/*!< in: cursor which was positioned to the
+btr_search_update_hash_node_on_insert(btr_cur_t* cursor);
+
+/** Updates the page hash index when a single record is inserted on a page.
+@param[in]	cursor		cursor which was positioned to the
 				place to insert using btr_cur_search_...,
 				and the new record has been inserted next
 				to the cursor */
-/********************************************************************//**
-Updates the page hash index when a single record is inserted on a page. */
-UNIV_INTERN
 void
-btr_search_update_hash_on_insert(
-/*=============================*/
-	btr_cur_t*	cursor);/*!< in: cursor which was positioned to the
-				place to insert using btr_cur_search_...,
-				and the new record has been inserted next
-				to the cursor */
-/********************************************************************//**
-Updates the page hash index when a single record is deleted from a page. */
-UNIV_INTERN
+btr_search_update_hash_on_insert(btr_cur_t* cursor);
+
+/** Updates the page hash index when a single record is deleted from a page.
+@param[in]	cursor	cursor which was positioned on the record to delete
+			using btr_cur_search_, the record is not yet deleted.*/
 void
-btr_search_update_hash_on_delete(
-/*=============================*/
-	btr_cur_t*	cursor);/*!< in: cursor which was positioned on the
-				record to delete using btr_cur_search_...,
-				the record is not yet deleted */
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
-/********************************************************************//**
-Validates the search system.
-@return	TRUE if ok */
-UNIV_INTERN
-ibool
-btr_search_validate(void);
-/*======================*/
-#endif /* defined UNIV_AHI_DEBUG || defined UNIV_DEBUG */
+btr_search_update_hash_on_delete(btr_cur_t* cursor);
+
+/** Validates the search system.
+@return true if ok */
+bool
+btr_search_validate();
+
+/** X-Lock the search latch (corresponding to given index)
+@param[in]	index	index handler */
+UNIV_INLINE
+void
+btr_search_x_lock(const dict_index_t* index);
+
+/** X-Unlock the search latch (corresponding to given index)
+@param[in]	index	index handler */
+UNIV_INLINE
+void
+btr_search_x_unlock(const dict_index_t* index);
+
+/** Lock all search latches in exclusive mode. */
+UNIV_INLINE
+void
+btr_search_x_lock_all();
+
+/** Unlock all search latches from exclusive mode. */
+UNIV_INLINE
+void
+btr_search_x_unlock_all();
+
+/** S-Lock the search latch (corresponding to given index)
+@param[in]	index	index handler */
+UNIV_INLINE
+void
+btr_search_s_lock(const dict_index_t* index);
+
+/** S-Unlock the search latch (corresponding to given index)
+@param[in]	index	index handler */
+UNIV_INLINE
+void
+btr_search_s_unlock(const dict_index_t* index);
+
+/** Lock all search latches in shared mode. */
+UNIV_INLINE
+void
+btr_search_s_lock_all();
+
+#ifdef UNIV_DEBUG
+/** Check if thread owns all the search latches.
+@param[in]	mode	lock mode check
+@retval true if owns all of them
+@retval false if does not own some of them */
+UNIV_INLINE
+bool
+btr_search_own_all(ulint mode);
+
+/** Check if thread owns any of the search latches.
+@param[in]	mode	lock mode check
+@retval true if owns any of them
+@retval false if owns no search latch */
+UNIV_INLINE
+bool
+btr_search_own_any(ulint mode);
+#endif /* UNIV_DEBUG */
+
+/** Unlock all search latches from shared mode. */
+UNIV_INLINE
+void
+btr_search_s_unlock_all();
+
+/** Get the latch based on index attributes.
+A latch is selected from an array of latches using pair of index-id, space-id.
+@param[in]	index	index handler
+@return latch */
+UNIV_INLINE
+rw_lock_t*
+btr_get_search_latch(const dict_index_t* index);
+
+/** Get the hash-table based on index attributes.
+A table is selected from an array of tables using pair of index-id, space-id.
+@param[in]	index	index handler
+@return hash table */
+UNIV_INLINE
+hash_table_t*
+btr_get_search_table(const dict_index_t* index);
 
 /** The search info struct in an index */
 struct btr_search_t{
 	ulint	ref_count;	/*!< Number of blocks in this index tree
 				that have search index built
 				i.e. block->index points to this index.
-				Protected by btr_search_latch except
+				Protected by search latch except
 				when during initialization in
 				btr_search_info_create(). */
 
@@ -205,6 +272,8 @@ struct btr_search_t{
 	the machine word, i.e., they cannot be turned into bit-fields. */
 	buf_block_t* root_guess;/*!< the root page frame when it was last time
 				fetched, or NULL */
+	ulint	withdraw_clock;	/*!< the withdraw clock value of the buffer
+				pool when root_guess was stored */
 	ulint	hash_analysis;	/*!< when this exceeds
 				BTR_SEARCH_HASH_ANALYSIS, the hash
 				analysis starts; this is reset if no
@@ -248,14 +317,24 @@ struct btr_search_t{
 
 /** The hash index system */
 struct btr_search_sys_t{
-	hash_table_t*	hash_index;	/*!< the adaptive hash index,
+	hash_table_t**	hash_tables;	/*!< the adaptive hash tables,
 					mapping dtuple_fold values
 					to rec_t pointers on index pages */
 };
 
+/** Latches protecting access to adaptive hash index. */
+extern rw_lock_t**		btr_search_latches;
+
 /** The adaptive hash index */
 extern btr_search_sys_t*	btr_search_sys;
 
+#ifdef UNIV_SEARCH_PERF_STAT
+/** Number of successful adaptive hash index lookups */
+extern ulint	btr_search_n_succ;
+/** Number of failed adaptive hash index lookups */
+extern ulint	btr_search_n_hash_fail;
+#endif /* UNIV_SEARCH_PERF_STAT */
+
 /** After change in n_fields or n_bytes in info, this many rounds are waited
 before starting the hash analysis again: this is to save CPU time when there
 is no hope in building a hash index. */
diff --git a/storage/innobase/include/btr0sea.ic b/storage/innobase/include/btr0sea.ic
index 0bd869be136..5f7c39ba500 100644
--- a/storage/innobase/include/btr0sea.ic
+++ b/storage/innobase/include/btr0sea.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -29,7 +29,6 @@ Created 2/17/1996 Heikki Tuuri
 
 /*********************************************************************//**
 Updates the search info. */
-UNIV_INTERN
 void
 btr_search_info_update_slow(
 /*========================*/
@@ -38,7 +37,7 @@ btr_search_info_update_slow(
 
 /********************************************************************//**
 Returns search info for an index.
-@return	search info; search mutex reserved */
+@return search info; search mutex reserved */
 UNIV_INLINE
 btr_search_t*
 btr_search_get_info(
@@ -57,13 +56,14 @@ btr_search_info_update(
 	dict_index_t*	index,	/*!< in: index of the cursor */
 	btr_cur_t*	cursor)	/*!< in: cursor which was just positioned */
 {
+	ut_ad(!rw_lock_own(btr_get_search_latch(index), RW_LOCK_S));
+	ut_ad(!rw_lock_own(btr_get_search_latch(index), RW_LOCK_X));
+
+	if (dict_index_is_spatial(index) || !btr_search_enabled) {
+		return;
+	}
+
 	btr_search_t*	info;
-
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
-	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
 	info = btr_search_get_info(index);
 
 	info->hash_analysis++;
@@ -80,3 +80,144 @@ btr_search_info_update(
 
 	btr_search_info_update_slow(info, cursor);
 }
+
+/** X-Lock the search latch (corresponding to given index)
+@param[in]	index	index handler */
+UNIV_INLINE
+void
+btr_search_x_lock(const dict_index_t* index)
+{
+	rw_lock_x_lock(btr_get_search_latch(index));
+}
+
+/** X-Unlock the search latch (corresponding to given index)
+@param[in]	index	index handler */
+UNIV_INLINE
+void
+btr_search_x_unlock(const dict_index_t* index)
+{
+	rw_lock_x_unlock(btr_get_search_latch(index));
+}
+
+/** Lock all search latches in exclusive mode. */
+UNIV_INLINE
+void
+btr_search_x_lock_all()
+{
+	for (ulint i = 0; i < btr_ahi_parts; ++i) {
+		rw_lock_x_lock(btr_search_latches[i]);
+	}
+}
+
+/** Unlock all search latches from exclusive mode. */
+UNIV_INLINE
+void
+btr_search_x_unlock_all()
+{
+	for (ulint i = 0; i < btr_ahi_parts; ++i) {
+		rw_lock_x_unlock(btr_search_latches[i]);
+	}
+}
+
+/** S-Lock the search latch (corresponding to given index)
+@param[in]	index	index handler */
+UNIV_INLINE
+void
+btr_search_s_lock(const dict_index_t* index)
+{
+	rw_lock_s_lock(btr_get_search_latch(index));
+}
+
+/** S-Unlock the search latch (corresponding to given index)
+@param[in]	index	index handler */
+UNIV_INLINE
+void
+btr_search_s_unlock(const dict_index_t* index)
+{
+	rw_lock_s_unlock(btr_get_search_latch(index));
+}
+
+/** Lock all search latches in shared mode. */
+UNIV_INLINE
+void
+btr_search_s_lock_all()
+{
+	for (ulint i = 0; i < btr_ahi_parts; ++i) {
+		rw_lock_s_lock(btr_search_latches[i]);
+	}
+}
+
+/** Unlock all search latches from shared mode. */
+UNIV_INLINE
+void
+btr_search_s_unlock_all()
+{
+	for (ulint i = 0; i < btr_ahi_parts; ++i) {
+		rw_lock_s_unlock(btr_search_latches[i]);
+	}
+}
+
+#ifdef UNIV_DEBUG
+/** Check if thread owns all the search latches.
+@param[in]	mode	lock mode check
+@retval true if owns all of them
+@retval false if does not own some of them */
+UNIV_INLINE
+bool
+btr_search_own_all(ulint mode)
+{
+	for (ulint i = 0; i < btr_ahi_parts; ++i) {
+		if (!rw_lock_own(btr_search_latches[i], mode)) {
+			return(false);
+		}
+	}
+	return(true);
+}
+
+/** Check if thread owns any of the search latches.
+@param[in]	mode	lock mode check
+@retval true if owns any of them
+@retval false if owns no search latch */
+UNIV_INLINE
+bool
+btr_search_own_any(ulint mode)
+{
+	for (ulint i = 0; i < btr_ahi_parts; ++i) {
+		if (rw_lock_own(btr_search_latches[i], mode)) {
+			return(true);
+		}
+	}
+	return(false);
+}
+#endif /* UNIV_DEBUG */
+
+/** Get the adaptive hash search index latch for a b-tree.
+@param[in]	index	b-tree index
+@return latch */
+UNIV_INLINE
+rw_lock_t*
+btr_get_search_latch(const dict_index_t* index)
+{
+	ut_ad(index != NULL);
+
+	ulint	ifold = ut_fold_ulint_pair(static_cast<ulint>(index->id),
+					   static_cast<ulint>(index->space));
+
+	return(btr_search_latches[ifold % btr_ahi_parts]);
+}
+
+/** Get the hash-table based on index attributes.
+A table is selected from an array of tables using pair of index-id, space-id.
+@param[in]	index	index handler
+@return hash table */
+UNIV_INLINE
+hash_table_t*
+btr_get_search_table(const dict_index_t* index)
+{
+	ut_ad(index != NULL);
+
+	ulint	ifold = ut_fold_ulint_pair(static_cast<ulint>(index->id),
+					   static_cast<ulint>(index->space));
+
+	return(btr_search_sys->hash_tables[ifold % btr_ahi_parts]);
+}
diff --git a/storage/innobase/include/btr0types.h b/storage/innobase/include/btr0types.h
index 04b69d8145c..19c21982011 100644
--- a/storage/innobase/include/btr0types.h
+++ b/storage/innobase/include/btr0types.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -31,6 +31,7 @@ Created 2/17/1996 Heikki Tuuri
 #include "rem0types.h"
 #include "page0types.h"
 #include "sync0rw.h"
+#include "page0size.h"
 
 /** Persistent cursor */
 struct btr_pcur_t;
@@ -39,165 +40,51 @@ struct btr_cur_t;
 /** B-tree search information for the adaptive hash index */
 struct btr_search_t;
 
-#ifndef UNIV_HOTBACKUP
-
-/** @brief The latch protecting the adaptive search system
-
-This latch protects the
-(1) hash index;
-(2) columns of a record to which we have a pointer in the hash index;
-
-but does NOT protect:
-
-(3) next record offset field in a record;
-(4) next or previous records on the same page.
-
-Bear in mind (3) and (4) when using the hash index.
-*/
-extern rw_lock_t*	btr_search_latch_temp;
-
-#endif /* UNIV_HOTBACKUP */
-
-/** The latch protecting the adaptive search system */
-#define btr_search_latch	(*btr_search_latch_temp)
-
-/** Flag: has the search system been enabled?
-Protected by btr_search_latch. */
+/** Is search system enabled.
+Search system is protected by array of latches. */
 extern char	btr_search_enabled;
 
-#ifdef UNIV_BLOB_DEBUG
-# include "buf0types.h"
-/** An index->blobs entry for keeping track of off-page column references */
-struct btr_blob_dbg_t;
-
-/** Insert to index->blobs a reference to an off-page column.
-@param index	the index tree
-@param b	the reference
-@param ctx	context (for logging) */
-UNIV_INTERN
-void
-btr_blob_dbg_rbt_insert(
-/*====================*/
-	dict_index_t*		index,	/*!< in/out: index tree */
-	const btr_blob_dbg_t*	b,	/*!< in: the reference */
-	const char*		ctx)	/*!< in: context (for logging) */
-	MY_ATTRIBUTE((nonnull));
-
-/** Remove from index->blobs a reference to an off-page column.
-@param index	the index tree
-@param b	the reference
-@param ctx	context (for logging) */
-UNIV_INTERN
-void
-btr_blob_dbg_rbt_delete(
-/*====================*/
-	dict_index_t*		index,	/*!< in/out: index tree */
-	const btr_blob_dbg_t*	b,	/*!< in: the reference */
-	const char*		ctx)	/*!< in: context (for logging) */
-	MY_ATTRIBUTE((nonnull));
-
-/**************************************************************//**
-Add to index->blobs any references to off-page columns from a record.
-@return number of references added */
-UNIV_INTERN
-ulint
-btr_blob_dbg_add_rec(
-/*=================*/
-	const rec_t*	rec,	/*!< in: record */
-	dict_index_t*	index,	/*!< in/out: index */
-	const ulint*	offsets,/*!< in: offsets */
-	const char*	ctx)	/*!< in: context (for logging) */
-	MY_ATTRIBUTE((nonnull));
-/**************************************************************//**
-Remove from index->blobs any references to off-page columns from a record.
-@return number of references removed */
-UNIV_INTERN
-ulint
-btr_blob_dbg_remove_rec(
-/*====================*/
-	const rec_t*	rec,	/*!< in: record */
-	dict_index_t*	index,	/*!< in/out: index */
-	const ulint*	offsets,/*!< in: offsets */
-	const char*	ctx)	/*!< in: context (for logging) */
-	MY_ATTRIBUTE((nonnull));
-/**************************************************************//**
-Count and add to index->blobs any references to off-page columns
-from records on a page.
-@return number of references added */
-UNIV_INTERN
-ulint
-btr_blob_dbg_add(
-/*=============*/
-	const page_t*	page,	/*!< in: rewritten page */
-	dict_index_t*	index,	/*!< in/out: index */
-	const char*	ctx)	/*!< in: context (for logging) */
-	MY_ATTRIBUTE((nonnull));
-/**************************************************************//**
-Count and remove from index->blobs any references to off-page columns
-from records on a page.
-Used when reorganizing a page, before copying the records.
-@return number of references removed */
-UNIV_INTERN
-ulint
-btr_blob_dbg_remove(
-/*================*/
-	const page_t*	page,	/*!< in: b-tree page */
-	dict_index_t*	index,	/*!< in/out: index */
-	const char*	ctx)	/*!< in: context (for logging) */
-	MY_ATTRIBUTE((nonnull));
-/**************************************************************//**
-Restore in index->blobs any references to off-page columns
-Used when page reorganize fails due to compressed page overflow. */
-UNIV_INTERN
-void
-btr_blob_dbg_restore(
-/*=================*/
-	const page_t*	npage,	/*!< in: page that failed to compress */
-	const page_t*	page,	/*!< in: copy of original page */
-	dict_index_t*	index,	/*!< in/out: index */
-	const char*	ctx)	/*!< in: context (for logging) */
-	MY_ATTRIBUTE((nonnull));
-
-/** Operation that processes the BLOB references of an index record
-@param[in]	rec	record on index page
-@param[in/out]	index	the index tree of the record
-@param[in]	offsets	rec_get_offsets(rec,index)
-@param[in]	ctx	context (for logging)
-@return			number of BLOB references processed */
-typedef ulint (*btr_blob_dbg_op_f)
-(const rec_t* rec,dict_index_t* index,const ulint* offsets,const char* ctx);
-
-/**************************************************************//**
-Count and process all references to off-page columns on a page.
-@return number of references processed */
-UNIV_INTERN
-ulint
-btr_blob_dbg_op(
-/*============*/
-	const page_t*		page,	/*!< in: B-tree leaf page */
-	const rec_t*		rec,	/*!< in: record to start from
-					(NULL to process the whole page) */
-	dict_index_t*		index,	/*!< in/out: index */
-	const char*		ctx,	/*!< in: context (for logging) */
-	const btr_blob_dbg_op_f	op)	/*!< in: operation on records */
-	MY_ATTRIBUTE((nonnull(1,3,4,5)));
-#else /* UNIV_BLOB_DEBUG */
-# define btr_blob_dbg_add_rec(rec, index, offsets, ctx)		((void) 0)
-# define btr_blob_dbg_add(page, index, ctx)			((void) 0)
-# define btr_blob_dbg_remove_rec(rec, index, offsets, ctx)	((void) 0)
-# define btr_blob_dbg_remove(page, index, ctx)			((void) 0)
-# define btr_blob_dbg_restore(npage, page, index, ctx)		((void) 0)
-# define btr_blob_dbg_op(page, rec, index, ctx, op)		((void) 0)
-#endif /* UNIV_BLOB_DEBUG */
+/** Number of adaptive hash index partition. */
+extern ulong	btr_ahi_parts;
 
 /** The size of a reference to data stored on a different page.
 The reference is stored at the end of the prefix of the field
 in the index record. */
-#define BTR_EXTERN_FIELD_REF_SIZE	20
+#define BTR_EXTERN_FIELD_REF_SIZE	FIELD_REF_SIZE
 
-/** A BLOB field reference full of zero, for use in assertions and tests.
-Initially, BLOB field references are set to zero, in
-dtuple_convert_big_rec(). */
-extern const byte field_ref_zero[BTR_EXTERN_FIELD_REF_SIZE];
+/** If the data don't exceed the size, the data are stored locally. */
+#define BTR_EXTERN_LOCAL_STORED_MAX_SIZE	\
+	(BTR_EXTERN_FIELD_REF_SIZE * 2)
+
+/** The information is used for creating a new index tree when
+applying TRUNCATE log record during recovery */
+struct btr_create_t {
+
+	explicit btr_create_t(const byte* const ptr)
+		:
+		format_flags(),
+		n_fields(),
+		field_len(),
+		fields(ptr),
+		trx_id_pos(ULINT_UNDEFINED)
+	{
+		/* Do nothing */
+	}
+
+	/** Page format */
+	ulint			format_flags;
+
+	/** Numbr of index fields */
+	ulint			n_fields;
+
+	/** The length of the encoded meta-data */
+	ulint			field_len;
+
+	/** Field meta-data, encoded. */
+	const byte* const	fields;
+
+	/** Position of trx-id column. */
+	ulint			trx_id_pos;
+};
 
 #endif
diff --git a/storage/innobase/include/buf0buddy.h b/storage/innobase/include/buf0buddy.h
index 7fc4408505d..c2c100e83e6 100644
--- a/storage/innobase/include/buf0buddy.h
+++ b/storage/innobase/include/buf0buddy.h
@@ -39,7 +39,7 @@ Allocate a block.  The thread calling this function must hold
 buf_pool->mutex and must not hold buf_pool->zip_mutex or any
 block->mutex.  The buf_pool->mutex may be released and reacquired.
 This function should only be used for allocating compressed page frames.
-@return	allocated block, never NULL */
+@return allocated block, never NULL */
 UNIV_INLINE
 byte*
 buf_buddy_alloc(
@@ -70,6 +70,24 @@ buf_buddy_free(
 					up to UNIV_PAGE_SIZE */
 	MY_ATTRIBUTE((nonnull));
 
+/** Reallocate a block.
+@param[in]	buf_pool	buffer pool instance
+@param[in]	buf		block to be reallocated, must be pointed
+to by the buffer pool
+@param[in]	size		block size, up to UNIV_PAGE_SIZE
+@retval false	if failed because of no free blocks. */
+bool
+buf_buddy_realloc(
+	buf_pool_t*	buf_pool,
+	void*		buf,
+	ulint		size);
+
+/** Combine all pairs of free buddies.
+@param[in]	buf_pool	buffer pool instance */
+void
+buf_buddy_condense_free(
+	buf_pool_t*	buf_pool);
+
 #ifndef UNIV_NONINL
 # include "buf0buddy.ic"
 #endif
diff --git a/storage/innobase/include/buf0buddy.ic b/storage/innobase/include/buf0buddy.ic
index 4352ebe8945..2b6d76df009 100644
--- a/storage/innobase/include/buf0buddy.ic
+++ b/storage/innobase/include/buf0buddy.ic
@@ -30,15 +30,12 @@ Created December 2006 by Marko Makela
 
 #include "buf0buf.h"
 #include "buf0buddy.h"
-#include "ut0ut.h"
-#include "sync0sync.h"
 
 /**********************************************************************//**
 Allocate a block.  The thread calling this function must hold
 buf_pool->mutex and must not hold buf_pool->zip_mutex or any block->mutex.
 The buf_pool_mutex may be released and reacquired.
-@return	allocated block, never NULL */
-UNIV_INTERN
+@return allocated block, never NULL */
 void*
 buf_buddy_alloc_low(
 /*================*/
@@ -54,7 +51,6 @@ buf_buddy_alloc_low(
 
 /**********************************************************************//**
 Deallocate a block. */
-UNIV_INTERN
 void
 buf_buddy_free_low(
 /*===============*/
@@ -67,7 +63,7 @@ buf_buddy_free_low(
 
 /**********************************************************************//**
 Get the index of buf_pool->zip_free[] for a given block size.
-@return	index of buf_pool->zip_free[], or BUF_BUDDY_SIZES */
+@return index of buf_pool->zip_free[], or BUF_BUDDY_SIZES */
 UNIV_INLINE
 ulint
 buf_buddy_get_slot(
@@ -91,7 +87,7 @@ Allocate a block.  The thread calling this function must hold
 buf_pool->mutex and must not hold buf_pool->zip_mutex or any
 block->mutex.  The buf_pool->mutex may be released and reacquired.
 This function should only be used for allocating compressed page frames.
-@return	allocated block, never NULL */
+@return allocated block, never NULL */
 UNIV_INLINE
 byte*
 buf_buddy_alloc(
diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
index c737f3a6f1d..c4bc107044d 100644
--- a/storage/innobase/include/buf0buf.h
+++ b/storage/innobase/include/buf0buf.h
@@ -31,6 +31,7 @@ Created 11/5/1995 Heikki Tuuri
 #include "fil0fil.h"
 #include "mtr0types.h"
 #include "buf0types.h"
+#ifndef UNIV_INNOCHECKSUM
 #include "hash0hash.h"
 #include "ut0byte.h"
 #include "page0types.h"
@@ -38,6 +39,11 @@ Created 11/5/1995 Heikki Tuuri
 #include "ut0rbt.h"
 #include "os0proc.h"
 #include "log0log.h"
+#include "srv0srv.h"
+#include <ostream>
+
+// Forward declaration
+struct fil_addr_t;
 
 /** @name Modes for buf_page_get_gen */
 /* @{ */
@@ -85,18 +91,28 @@ Created 11/5/1995 Heikki Tuuri
 
 extern	buf_pool_t*	buf_pool_ptr;	/*!< The buffer pools
 					of the database */
+
+extern	volatile bool	buf_pool_withdrawing; /*!< true when withdrawing buffer
+					pool pages might cause page relocation */
+
+extern	volatile ulint	buf_withdraw_clock; /*!< the clock is incremented
+					every time a pointer to a page may
+					become obsolete */
+
 #ifdef UNIV_DEBUG
-extern ibool		buf_debug_prints;/*!< If this is set TRUE, the program
-					prints info whenever read or flush
-					occurs */
+extern my_bool	buf_disable_resize_buffer_pool_debug; /*!< if TRUE, resizing
+					buffer pool is not allowed. */
 #endif /* UNIV_DEBUG */
-extern ulint srv_buf_pool_instances;
-extern ulint srv_buf_pool_curr_size;
 #else /* !UNIV_HOTBACKUP */
 extern buf_block_t*	back_block1;	/*!< first block, for --apply-log */
 extern buf_block_t*	back_block2;	/*!< second block, for page reorganize */
 #endif /* !UNIV_HOTBACKUP */
+#endif /* !UNIV_INNOCHECKSUM */
 
+/** Magic value to use instead of checksums when they are disabled */
+#define BUF_NO_CHECKSUM_MAGIC 0xDEADBEEFUL
+
+#ifndef UNIV_INNOCHECKSUM
 /** @brief States of a control block
 @see buf_page_t
 
@@ -204,6 +220,127 @@ struct buf_pools_list_size_t {
 	ulint	flush_list_bytes;	/*!< flush_list size in bytes */
 };
 
+/** Page identifier. */
+class page_id_t {
+public:
+
+	/** Constructor from (space, page_no).
+	@param[in]	space	tablespace id
+	@param[in]	page_no	page number */
+	page_id_t(ulint space, ulint page_no)
+		:
+		m_space(static_cast<ib_uint32_t>(space)),
+		m_page_no(static_cast<ib_uint32_t>(page_no)),
+		m_fold(ULINT_UNDEFINED)
+	{
+		ut_ad(space <= 0xFFFFFFFFU);
+		ut_ad(page_no <= 0xFFFFFFFFU);
+	}
+
+	/** Retrieve the tablespace id.
+	@return tablespace id */
+	inline ib_uint32_t space() const
+	{
+		return(m_space);
+	}
+
+	/** Retrieve the page number.
+	@return page number */
+	inline ib_uint32_t page_no() const
+	{
+		return(m_page_no);
+	}
+
+	/** Retrieve the fold value.
+	@return fold value */
+	inline ulint fold() const
+	{
+		/* Initialize m_fold if it has not been initialized yet. */
+		if (m_fold == ULINT_UNDEFINED) {
+			m_fold = (m_space << 20) + m_space + m_page_no;
+			ut_ad(m_fold != ULINT_UNDEFINED);
+		}
+
+		return(m_fold);
+	}
+
+	/** Copy the values from a given page_id_t object.
+	@param[in]	src	page id object whose values to fetch */
+	inline void copy_from(const page_id_t& src)
+	{
+		m_space = src.space();
+		m_page_no = src.page_no();
+		m_fold = src.fold();
+	}
+
+	/** Reset the values from a (space, page_no).
+	@param[in]	space	tablespace id
+	@param[in]	page_no	page number */
+	inline void reset(ulint space, ulint page_no)
+	{
+		m_space = static_cast<ib_uint32_t>(space);
+		m_page_no = static_cast<ib_uint32_t>(page_no);
+		m_fold = ULINT_UNDEFINED;
+
+		ut_ad(space <= 0xFFFFFFFFU);
+		ut_ad(page_no <= 0xFFFFFFFFU);
+	}
+
+	/** Reset the page number only.
+	@param[in]	page_no	page number */
+	inline void set_page_no(ulint page_no)
+	{
+		m_page_no = static_cast<ib_uint32_t>(page_no);
+		m_fold = ULINT_UNDEFINED;
+
+		ut_ad(page_no <= 0xFFFFFFFFU);
+	}
+
+	/** Check if a given page_id_t object is equal to the current one.
+	@param[in]	a	page_id_t object to compare
+	@return true if equal */
+	inline bool equals_to(const page_id_t& a) const
+	{
+		return(a.space() == m_space && a.page_no() == m_page_no);
+	}
+
+private:
+
+	/** Tablespace id. */
+	ib_uint32_t	m_space;
+
+	/** Page number. */
+	ib_uint32_t	m_page_no;
+
+	/** A fold value derived from m_space and m_page_no,
+	used in hashing. */
+	mutable ulint	m_fold;
+
+	/* Disable implicit copying. */
+	void operator=(const page_id_t&);
+
+	/** Declare the overloaded global operator<< as a friend of this
+	class. Refer to the global declaration for further details.  Print
+	the given page_id_t object.
+	@param[in,out]	out	the output stream
+	@param[in]	page_id	the page_id_t object to be printed
+	@return the output stream */
+        friend
+        std::ostream&
+        operator<<(
+                std::ostream&           out,
+                const page_id_t&        page_id);
+};
+
+/** Print the given page_id_t object.
+@param[in,out]	out	the output stream
+@param[in]	page_id	the page_id_t object to be printed
+@return the output stream */
+std::ostream&
+operator<<(
+	std::ostream&		out,
+	const page_id_t&	page_id);
+
 #ifndef UNIV_HOTBACKUP
 /********************************************************************//**
 Acquire mutex on all buffer pool instances */
@@ -221,8 +358,7 @@ buf_pool_mutex_exit_all(void);
 
 /********************************************************************//**
 Creates the buffer pool.
-@return	DB_SUCCESS if success, DB_ERROR if not enough memory or error */
-UNIV_INTERN
+@return DB_SUCCESS if success, DB_ERROR if not enough memory or error */
 dberr_t
 buf_pool_init(
 /*=========*/
@@ -231,42 +367,62 @@ buf_pool_init(
 /********************************************************************//**
 Frees the buffer pool at shutdown.  This must not be invoked before
 freeing all mutexes. */
-UNIV_INTERN
 void
 buf_pool_free(
 /*==========*/
 	ulint	n_instances);	/*!< in: numbere of instances to free */
 
+/** Determines if a block is intended to be withdrawn.
+@param[in]	buf_pool	buffer pool instance
+@param[in]	block		pointer to control block
+@retval true	if will be withdrawn */
+bool
+buf_block_will_withdrawn(
+	buf_pool_t*		buf_pool,
+	const buf_block_t*	block);
+
+/** Determines if a frame is intended to be withdrawn.
+@param[in]	buf_pool	buffer pool instance
+@param[in]	ptr		pointer to a frame
+@retval true	if will be withdrawn */
+bool
+buf_frame_will_withdrawn(
+	buf_pool_t*	buf_pool,
+	const byte*	ptr);
+
+/** Resize the buffer pool based on srv_buf_pool_size from
+srv_buf_pool_old_size. */
+void
+buf_pool_resize();
+
+/** This is the thread for resizing buffer pool. It waits for an event and
+when waked up either performs a resizing and sleeps again.
+@param[in]	arg	a dummy parameter required by os_thread_create.
+@return	this function does not return, calls os_thread_exit()
+*/
+extern "C"
+os_thread_ret_t
+DECLARE_THREAD(buf_resize_thread)(
+/*==============================*/
+	void*	arg);				/*!< in: a dummy parameter
+						required by os_thread_create */
+
 /********************************************************************//**
 Clears the adaptive hash index on all pages in the buffer pool. */
-UNIV_INTERN
 void
 buf_pool_clear_hash_index(void);
 /*===========================*/
 
-/********************************************************************//**
-Relocate a buffer control block.  Relocates the block on the LRU list
-and in buf_pool->page_hash.  Does not relocate bpage->list.
-The caller must take care of relocating bpage->list. */
-UNIV_INTERN
-void
-buf_relocate(
-/*=========*/
-	buf_page_t*	bpage,	/*!< in/out: control block being relocated;
-				buf_page_get_state(bpage) must be
-				BUF_BLOCK_ZIP_DIRTY or BUF_BLOCK_ZIP_PAGE */
-	buf_page_t*	dpage)	/*!< in/out: destination control block */
-	MY_ATTRIBUTE((nonnull));
 /*********************************************************************//**
 Gets the current size of buffer buf_pool in bytes.
-@return	size in bytes */
+@return size in bytes */
 UNIV_INLINE
 ulint
 buf_pool_get_curr_size(void);
 /*========================*/
 /*********************************************************************//**
 Gets the current size of buffer buf_pool in frames.
-@return	size in pages */
+@return size in pages */
 UNIV_INLINE
 ulint
 buf_pool_get_n_pages(void);
@@ -274,8 +430,7 @@ buf_pool_get_n_pages(void);
 /********************************************************************//**
 Gets the smallest oldest_modification lsn for any page in the pool. Returns
 zero if all modified pages have been flushed to disk.
-@return	oldest modification in pool, zero if none */
-UNIV_INTERN
+@return oldest modification in pool, zero if none */
 lsn_t
 buf_pool_get_oldest_modification(void);
 /*==================================*/
@@ -299,8 +454,7 @@ buf_page_free_descriptor(
 
 /********************************************************************//**
 Allocates a buffer block.
-@return	own: the allocated block, in state BUF_BLOCK_MEMORY */
-UNIV_INTERN
+@return own: the allocated block, in state BUF_BLOCK_MEMORY */
 buf_block_t*
 buf_block_alloc(
 /*============*/
@@ -317,7 +471,7 @@ buf_block_free(
 #endif /* !UNIV_HOTBACKUP */
 /*********************************************************************//**
 Copies contents of a buffer frame to a given buffer.
-@return	buf */
+@return buf */
 UNIV_INLINE
 byte*
 buf_frame_copy(
@@ -329,23 +483,21 @@ buf_frame_copy(
 NOTE! The following macros should be used instead of buf_page_get_gen,
 to improve debugging. Only values RW_S_LATCH and RW_X_LATCH are allowed
 in LA! */
-#define buf_page_get(SP, ZS, OF, LA, MTR)	 buf_page_get_gen(\
-				SP, ZS, OF, LA, NULL,\
-				BUF_GET, __FILE__, __LINE__, MTR)
+#define buf_page_get(ID, SIZE, LA, MTR)					\
+	buf_page_get_gen(ID, SIZE, LA, NULL, BUF_GET, __FILE__, __LINE__, MTR, NULL)
 /**************************************************************//**
 Use these macros to bufferfix a page with no latching. Remember not to
 read the contents of the page unless you know it is safe. Do not modify
 the contents of the page! We have separated this case, because it is
 error-prone programming not to set a latch, and it should be used
 with care. */
-#define buf_page_get_with_no_latch(SP, ZS, OF, MTR)	   buf_page_get_gen(\
-				SP, ZS, OF, RW_NO_LATCH, NULL,\
-				BUF_GET_NO_LATCH, __FILE__, __LINE__, MTR)
+#define buf_page_get_with_no_latch(ID, SIZE, MTR)	\
+	buf_page_get_gen(ID, SIZE, RW_NO_LATCH, NULL, BUF_GET_NO_LATCH, \
+			 __FILE__, __LINE__, MTR, NULL)
 /********************************************************************//**
 This is the general function used to get optimistic access to a database
 page.
-@return	TRUE if success */
-UNIV_INTERN
+@return TRUE if success */
 ibool
 buf_page_optimistic_get(
 /*====================*/
@@ -358,8 +510,7 @@ buf_page_optimistic_get(
 /********************************************************************//**
 This is used to get access to a known database page, when no waiting can be
 done.
-@return	TRUE if success */
-UNIV_INTERN
+@return TRUE if success */
 ibool
 buf_page_get_known_nowait(
 /*======================*/
@@ -370,96 +521,98 @@ buf_page_get_known_nowait(
 	ulint		line,	/*!< in: line where called */
 	mtr_t*		mtr);	/*!< in: mini-transaction */
 
-/*******************************************************************//**
-Given a tablespace id and page number tries to get that page. If the
+/** Given a tablespace id and page number tries to get that page. If the
 page is not in the buffer pool it is not loaded and NULL is returned.
-Suitable for using when holding the lock_sys_t::mutex. */
-UNIV_INTERN
+Suitable for using when holding the lock_sys_t::mutex.
+@param[in]	page_id	page id
+@param[in]	file	file name
+@param[in]	line	line where called
+@param[in]	mtr	mini-transaction
+@return pointer to a page or NULL */
 buf_block_t*
 buf_page_try_get_func(
-/*==================*/
-	ulint		space_id,/*!< in: tablespace id */
-	ulint		page_no,/*!< in: page number */
-	ulint		rw_latch,       /*!< in: RW_S_LATCH, RW_X_LATCH */
-	bool		possibly_freed, /*!< in: don't mind if page is freed */
-	const char*	file,	/*!< in: file name */
-	ulint		line,	/*!< in: line where called */
-	mtr_t*		mtr);	/*!< in: mini-transaction */
+	const page_id_t&	page_id,
+	const char*		file,
+	ulint			line,
+	mtr_t*			mtr);
 
-/** Tries to get a page. If the page is not in the buffer pool it is
-not loaded.  Suitable for using when holding the lock_sys_t::mutex.
-@param space_id	in: tablespace id
-@param page_no	in: page number
-@param mtr	in: mini-transaction
-@return		the page if in buffer pool, NULL if not */
-#define buf_page_try_get(space_id, page_no, mtr)	\
-	buf_page_try_get_func(space_id, page_no, RW_S_LATCH, false, \
-			      __FILE__, __LINE__, mtr);
+/** Tries to get a page.
+If the page is not in the buffer pool it is not loaded. Suitable for using
+when holding the lock_sys_t::mutex.
+@param[in]	page_id	page identifier
+@param[in]	mtr	mini-transaction
+@return the page if in buffer pool, NULL if not */
+#define buf_page_try_get(page_id, mtr)	\
+	buf_page_try_get_func((page_id), __FILE__, __LINE__, mtr);
 
-/********************************************************************//**
-Get read access to a compressed page (usually of type
+/** Get read access to a compressed page (usually of type
 FIL_PAGE_TYPE_ZBLOB or FIL_PAGE_TYPE_ZBLOB2).
 The page must be released with buf_page_release_zip().
 NOTE: the page is not protected by any latch.  Mutual exclusion has to
 be implemented at a higher level.  In other words, all possible
 accesses to a given page through this function must be protected by
 the same set of mutexes or latches.
-@return	pointer to the block, or NULL if not compressed */
-UNIV_INTERN
+@param[in]	page_id		page id
+@param[in]	page_size	page size
+@return pointer to the block */
 buf_page_t*
 buf_page_get_zip(
-/*=============*/
-	ulint		space,	/*!< in: space id */
-	ulint		zip_size,/*!< in: compressed page size */
-	ulint		offset);/*!< in: page number */
-/********************************************************************//**
-This is the general function used to get access to a database page.
-@return	pointer to the block or NULL */
-UNIV_INTERN
+	const page_id_t&	page_id,
+	const page_size_t&	page_size);
+
+/** This is the general function used to get access to a database page.
+@param[in]	page_id		page id
+@param[in]	rw_latch	RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH
+@param[in]	guess		guessed block or NULL
+@param[in]	mode		BUF_GET, BUF_GET_IF_IN_POOL,
+BUF_PEEK_IF_IN_POOL, BUF_GET_NO_LATCH, or BUF_GET_IF_IN_POOL_OR_WATCH
+@param[in]	file		file name
+@param[in]	line		line where called
+@param[in]	mtr		mini-transaction
+@param[out]	err		DB_SUCCESS or error code
+@param[in]	dirty_with_no_latch
+				mark page as dirty even if page
+				is being pinned without any latch
+@return pointer to the block or NULL */
 buf_block_t*
 buf_page_get_gen(
-/*=============*/
-	ulint		space,	/*!< in: space id */
-	ulint		zip_size,/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	ulint		offset,	/*!< in: page number */
-	ulint		rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
-	buf_block_t*	guess,	/*!< in: guessed block or NULL */
-	ulint		mode,	/*!< in: BUF_GET, BUF_GET_IF_IN_POOL,
-				BUF_PEEK_IF_IN_POOL, BUF_GET_NO_LATCH or
-				BUF_GET_IF_IN_POOL_OR_WATCH */
-	const char*	file,	/*!< in: file name */
-	ulint		line,	/*!< in: line where called */
-	mtr_t*		mtr,	/*!< in: mini-transaction */
-	dberr_t*	err = NULL); /*!< out: error code */
-/********************************************************************//**
-Initializes a page to the buffer buf_pool. The page is usually not read
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	ulint			rw_latch,
+	buf_block_t*		guess,
+	ulint			mode,
+	const char*		file,
+	ulint			line,
+	mtr_t*			mtr,
+	dberr_t*		err,
+	bool			dirty_with_no_latch = false);
+
+/** Initializes a page to the buffer buf_pool. The page is usually not read
 from a file even if it cannot be found in the buffer buf_pool. This is one
 of the functions which perform to a block a state transition NOT_USED =>
 FILE_PAGE (the other is buf_page_get_gen).
-@return	pointer to the block, page bufferfixed */
-UNIV_INTERN
+@param[in]	page_id		page id
+@param[in]	page_size	page size
+@param[in]	mtr		mini-transaction
+@return pointer to the block, page bufferfixed */
 buf_block_t*
 buf_page_create(
-/*============*/
-	ulint	space,	/*!< in: space id */
-	ulint	offset,	/*!< in: offset of the page within space in units of
-			a page */
-	ulint	zip_size,/*!< in: compressed page size, or 0 */
-	mtr_t*	mtr);	/*!< in: mini-transaction handle */
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	mtr_t*			mtr);
+
 #else /* !UNIV_HOTBACKUP */
-/********************************************************************//**
-Inits a page to the buffer buf_pool, for use in mysqlbackup --restore. */
-UNIV_INTERN
+
+/** Inits a page to the buffer buf_pool, for use in mysqlbackup --restore.
+@param[in]	page_id		page id
+@param[in]	page_size	page size
+@param[in,out]	block		block to init */
 void
 buf_page_init_for_backup_restore(
-/*=============================*/
-	ulint		space,	/*!< in: space id */
-	ulint		offset,	/*!< in: offset of the page within space
-				in units of a page */
-	ulint		zip_size,/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	buf_block_t*	block);	/*!< in: block to init */
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	buf_block_t*		block);
+
 #endif /* !UNIV_HOTBACKUP */
 
 #ifndef UNIV_HOTBACKUP
@@ -471,12 +624,11 @@ buf_page_release_zip(
 /*=================*/
 	buf_page_t*	bpage);		/*!< in: buffer block */
 /********************************************************************//**
-Decrements the bufferfix count of a buffer control block and releases
-a latch, if specified. */
+Releases a latch, if specified. */
 UNIV_INLINE
 void
-buf_page_release(
-/*=============*/
+buf_page_release_latch(
+/*=====================*/
 	buf_block_t*	block,		/*!< in: buffer block */
 	ulint		rw_latch);	/*!< in: RW_S_LATCH, RW_X_LATCH,
 					RW_NO_LATCH */
@@ -484,68 +636,62 @@ buf_page_release(
 Moves a page to the start of the buffer pool LRU list. This high-level
 function can be used to prevent an important page from slipping out of
 the buffer pool. */
-UNIV_INTERN
 void
 buf_page_make_young(
 /*================*/
 	buf_page_t*	bpage);	/*!< in: buffer block of a file page */
-/********************************************************************//**
-Returns TRUE if the page can be found in the buffer pool hash table.
 
+/** Returns TRUE if the page can be found in the buffer pool hash table.
 NOTE that it is possible that the page is not yet read from disk,
 though.
-
-@return	TRUE if found in the page hash table */
+@param[in]	page_id	page id
+@return TRUE if found in the page hash table */
 UNIV_INLINE
 ibool
 buf_page_peek(
-/*==========*/
-	ulint	space,	/*!< in: space id */
-	ulint	offset);/*!< in: page number */
-#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
-/********************************************************************//**
-Sets file_page_was_freed TRUE if the page is found in the buffer pool.
+	const page_id_t&	page_id);
+
+#ifdef UNIV_DEBUG
+
+/** Sets file_page_was_freed TRUE if the page is found in the buffer pool.
 This function should be called when we free a file page and want the
 debug version to check that it is not accessed any more unless
 reallocated.
-@return	control block if found in page hash table, otherwise NULL */
-UNIV_INTERN
+@param[in]	page_id	page id
+@return control block if found in page hash table, otherwise NULL */
 buf_page_t*
 buf_page_set_file_page_was_freed(
-/*=============================*/
-	ulint	space,	/*!< in: space id */
-	ulint	offset);/*!< in: page number */
-/********************************************************************//**
-Sets file_page_was_freed FALSE if the page is found in the buffer pool.
+	const page_id_t&	page_id);
+
+/** Sets file_page_was_freed FALSE if the page is found in the buffer pool.
 This function should be called when we free a file page and want the
 debug version to check that it is not accessed any more unless
 reallocated.
-@return	control block if found in page hash table, otherwise NULL */
-UNIV_INTERN
+@param[in]	page_id	page id
+@return control block if found in page hash table, otherwise NULL */
 buf_page_t*
 buf_page_reset_file_page_was_freed(
-/*===============================*/
-	ulint	space,	/*!< in: space id */
-	ulint	offset);	/*!< in: page number */
-#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
+	const page_id_t&	page_id);
+
+#endif /* UNIV_DEBUG */
 /********************************************************************//**
 Reads the freed_page_clock of a buffer block.
-@return	freed_page_clock */
+@return freed_page_clock */
 UNIV_INLINE
 ulint
 buf_page_get_freed_page_clock(
 /*==========================*/
 	const buf_page_t*	bpage)	/*!< in: block */
-	MY_ATTRIBUTE((pure));
+	MY_ATTRIBUTE((warn_unused_result));
 /********************************************************************//**
 Reads the freed_page_clock of a buffer block.
-@return	freed_page_clock */
+@return freed_page_clock */
 UNIV_INLINE
 ulint
 buf_block_get_freed_page_clock(
 /*===========================*/
 	const buf_block_t*	block)	/*!< in: block */
-	MY_ATTRIBUTE((pure));
+	MY_ATTRIBUTE((warn_unused_result));
 
 /********************************************************************//**
 Tells if a block is still close enough to the MRU end of the LRU list
@@ -553,7 +699,7 @@ meaning that it is not in danger of getting evicted and also implying
 that it has been accessed recently.
 Note that this is for heuristics only and does not reserve buffer pool
 mutex.
-@return	TRUE if block is close to MRU end of LRU */
+@return TRUE if block is close to MRU end of LRU */
 UNIV_INLINE
 ibool
 buf_page_peek_if_young(
@@ -563,7 +709,7 @@ buf_page_peek_if_young(
 Recommends a move of a block to the start of the LRU list if there is danger
 of dropping from the buffer pool. NOTE: does not reserve the buffer pool
 mutex.
-@return	TRUE if should be made younger */
+@return TRUE if should be made younger */
 UNIV_INLINE
 ibool
 buf_page_peek_if_too_old(
@@ -572,7 +718,7 @@ buf_page_peek_if_too_old(
 /********************************************************************//**
 Gets the youngest modification log sequence number for a frame.
 Returns zero if not file page or no modification occurred yet.
-@return	newest modification to page */
+@return newest modification to page */
 UNIV_INLINE
 lsn_t
 buf_page_get_newest_modification(
@@ -591,7 +737,7 @@ buf_block_modify_clock_inc(
 /********************************************************************//**
 Returns the value of the modify clock. The caller must have an s-lock
 or x-lock on the block.
-@return	value */
+@return value */
 UNIV_INLINE
 ib_uint64_t
 buf_block_get_modify_clock(
@@ -603,67 +749,96 @@ UNIV_INLINE
 void
 buf_block_buf_fix_inc_func(
 /*=======================*/
-# ifdef UNIV_SYNC_DEBUG
+# ifdef UNIV_DEBUG
 	const char*	file,	/*!< in: file name */
 	ulint		line,	/*!< in: line */
-# endif /* UNIV_SYNC_DEBUG */
+# endif /* UNIV_DEBUG */
 	buf_block_t*	block)	/*!< in/out: block to bufferfix */
 	MY_ATTRIBUTE((nonnull));
 
-/*******************************************************************//**
-Increments the bufferfix count. */
+/** Increments the bufferfix count.
+@param[in,out]	bpage	block to bufferfix
+@return the count */
 UNIV_INLINE
-void
+ulint
 buf_block_fix(
-/*===========*/
-	buf_block_t*	block);	/*!< in/out: block to bufferfix */
+	buf_page_t*	bpage);
 
-/*******************************************************************//**
-Increments the bufferfix count. */
+/** Increments the bufferfix count.
+@param[in,out]	block	block to bufferfix
+@return the count */
 UNIV_INLINE
-void
-buf_block_unfix(
-/*===========*/
-	buf_block_t*	block);	/*!< in/out: block to bufferfix */
+ulint
+buf_block_fix(
+	buf_block_t*	block);
 
-# ifdef UNIV_SYNC_DEBUG
+/** Decrements the bufferfix count.
+@param[in,out]	bpage	block to bufferunfix
+@return	the remaining buffer-fix count */
+UNIV_INLINE
+ulint
+buf_block_unfix(
+	buf_page_t*	bpage);
+/** Decrements the bufferfix count.
+@param[in,out]	block	block to bufferunfix
+@return	the remaining buffer-fix count */
+UNIV_INLINE
+ulint
+buf_block_unfix(
+	buf_block_t*	block);
+
+# ifdef UNIV_DEBUG
 /** Increments the bufferfix count.
-@param b	in/out: block to bufferfix
-@param f	in: file name where requested
-@param l	in: line number where requested */
+@param[in,out]	b	block to bufferfix
+@param[in]	f	file name where requested
+@param[in]	l	line number where requested */
 # define buf_block_buf_fix_inc(b,f,l) buf_block_buf_fix_inc_func(f,l,b)
-# else /* UNIV_SYNC_DEBUG */
+# else /* UNIV_DEBUG */
 /** Increments the bufferfix count.
-@param b	in/out: block to bufferfix
-@param f	in: file name where requested
-@param l	in: line number where requested */
+@param[in,out]	b	block to bufferfix
+@param[in]	f	file name where requested
+@param[in]	l	line number where requested */
 # define buf_block_buf_fix_inc(b,f,l) buf_block_buf_fix_inc_func(b)
-# endif /* UNIV_SYNC_DEBUG */
+# endif /* UNIV_DEBUG */
 #else /* !UNIV_HOTBACKUP */
 # define buf_block_modify_clock_inc(block) ((void) 0)
 #endif /* !UNIV_HOTBACKUP */
-/********************************************************************//**
-Checks if a page is corrupt.
-@return	TRUE if corrupted */
-UNIV_INTERN
-ibool
-buf_page_is_corrupted(
-/*==================*/
-	bool		check_lsn,	/*!< in: true if we need to check the
-					and complain about the LSN */
-	const byte*	read_buf,	/*!< in: a database page */
-	ulint		zip_size)	/*!< in: size of compressed page;
-					0 for uncompressed pages */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
-/********************************************************************//**
-Checks if a page is all zeroes.
-@return	TRUE if the page is all zeroes */
+#endif /* !UNIV_INNOCHECKSUM */
+
+/** Checks if a page contains only zeroes.
+@param[in]	read_buf	database page
+@param[in]	page_size	page size
+@return true if page is filled with zeroes */
 bool
 buf_page_is_zeroes(
-/*===============*/
-	const byte*	read_buf,	/*!< in: a database page */
-	const ulint	zip_size);	/*!< in: size of compressed page;
-					0 for uncompressed pages */
+	const byte*		read_buf,
+	const page_size_t&	page_size);
+
+/** Checks if a page is corrupt.
+@param[in]	check_lsn	true if we need to check and complain about
+the LSN
+@param[in]	read_buf	database page
+@param[in]	page_size	page size
+@param[in]	skip_checksum	if true, skip checksum
+@param[in]	page_no		page number of given read_buf
+@param[in]	strict_check	true if strict-check option is enabled
+@param[in]	is_log_enabled	true if log option is enabled
+@param[in]	log_file	file pointer to log_file
+@return TRUE if corrupted */
+ibool
+buf_page_is_corrupted(
+	bool			check_lsn,
+	const byte*		read_buf,
+	const page_size_t&	page_size,
+	bool			skip_checksum
+#ifdef UNIV_INNOCHECKSUM
+	,uintmax_t		page_no,
+	bool			strict_check,
+	bool			is_log_enabled,
+	FILE*			log_file
+#endif /* UNIV_INNOCHECKSUM */
+) MY_ATTRIBUTE((warn_unused_result));
+#ifndef UNIV_INNOCHECKSUM
 #ifndef UNIV_HOTBACKUP
 /**********************************************************************//**
 Gets the space id, page offset, and byte offset within page of a
@@ -678,19 +853,18 @@ buf_ptr_get_fsp_addr(
 /**********************************************************************//**
 Gets the hash value of a block. This can be used in searches in the
 lock hash table.
-@return	lock hash value */
+@return lock hash value */
 UNIV_INLINE
 ulint
 buf_block_get_lock_hash_val(
 /*========================*/
 	const buf_block_t*	block)	/*!< in: block */
-	MY_ATTRIBUTE((pure));
+	MY_ATTRIBUTE((warn_unused_result));
 #ifdef UNIV_DEBUG
 /*********************************************************************//**
 Finds a block in the buffer pool that points to a
 given compressed page.
-@return	buffer block pointing to the compressed page, or NULL */
-UNIV_INTERN
+@return buffer block pointing to the compressed page, or NULL */
 buf_block_t*
 buf_pool_contains_zip(
 /*==================*/
@@ -711,8 +885,7 @@ buf_frame_align(
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 /*********************************************************************//**
 Validates the buffer pool data structure.
-@return	TRUE */
-UNIV_INTERN
+@return TRUE */
 ibool
 buf_validate(void);
 /*==============*/
@@ -720,7 +893,6 @@ buf_validate(void);
 #if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 /*********************************************************************//**
 Prints info of the buffer pool data structure. */
-UNIV_INTERN
 void
 buf_print(void);
 /*============*/
@@ -733,23 +905,20 @@ enum buf_page_print_flags {
 	BUF_PAGE_PRINT_NO_FULL = 2
 };
 
-/********************************************************************//**
-Prints a page to stderr. */
-UNIV_INTERN
+/** Prints a page to stderr.
+@param[in]	read_buf	a database page
+@param[in]	page_size	page size
+@param[in]	flags		0 or BUF_PAGE_PRINT_NO_CRASH or
+BUF_PAGE_PRINT_NO_FULL */
 void
 buf_page_print(
-/*===========*/
-	const byte*	read_buf,	/*!< in: a database page */
-	ulint		zip_size,	/*!< in: compressed page size, or
-					0 for uncompressed pages */
-	ulint		flags)		/*!< in: 0 or
-					BUF_PAGE_PRINT_NO_CRASH or
-					BUF_PAGE_PRINT_NO_FULL */
-	UNIV_COLD MY_ATTRIBUTE((nonnull));
+	const byte*		read_buf,
+	const page_size_t&	page_size,
+	ulint			flags);
+
 /********************************************************************//**
 Decompress a block.
-@return	TRUE if successful */
-UNIV_INTERN
+@return TRUE if successful */
 ibool
 buf_zip_decompress(
 /*===============*/
@@ -759,22 +928,19 @@ buf_zip_decompress(
 #ifdef UNIV_DEBUG
 /*********************************************************************//**
 Returns the number of latched pages in the buffer pool.
-@return	number of latched pages */
-UNIV_INTERN
+@return number of latched pages */
 ulint
 buf_get_latched_pages_number(void);
 /*==============================*/
 #endif /* UNIV_DEBUG */
 /*********************************************************************//**
 Returns the number of pending buf pool read ios.
-@return	number of pending read I/O operations */
-UNIV_INTERN
+@return number of pending read I/O operations */
 ulint
 buf_get_n_pending_read_ios(void);
 /*============================*/
 /*********************************************************************//**
 Prints info of the buffer i/o. */
-UNIV_INTERN
 void
 buf_print_io(
 /*=========*/
@@ -783,7 +949,6 @@ buf_print_io(
 Collect buffer pool stats information for a buffer pool. Also
 record aggregated stats if there are more than one buffer pool
 in the server */
-UNIV_INTERN
 void
 buf_stats_get_pool_info(
 /*====================*/
@@ -794,36 +959,31 @@ buf_stats_get_pool_info(
 /*********************************************************************//**
 Returns the ratio in percents of modified pages in the buffer pool /
 database pages in the buffer pool.
-@return	modified page percentage ratio */
-UNIV_INTERN
+@return modified page percentage ratio */
 double
 buf_get_modified_ratio_pct(void);
 /*============================*/
 /**********************************************************************//**
 Refreshes the statistics used to print per-second averages. */
-UNIV_INTERN
 void
 buf_refresh_io_stats(
 /*=================*/
 	buf_pool_t*	buf_pool);	/*!< buffer pool instance */
 /**********************************************************************//**
 Refreshes the statistics used to print per-second averages. */
-UNIV_INTERN
 void
 buf_refresh_io_stats_all(void);
 /*=================*/
 /*********************************************************************//**
 Asserts that all file pages in the buffer are in a replaceable state.
-@return	TRUE */
-UNIV_INTERN
+@return TRUE */
 ibool
 buf_all_freed(void);
 /*===============*/
 /*********************************************************************//**
 Checks that there currently are no pending i/o-operations for the buffer
 pool.
-@return	number of pending i/o operations */
-UNIV_INTERN
+@return number of pending i/o operations */
 ulint
 buf_pool_check_no_pending_io(void);
 /*==============================*/
@@ -831,7 +991,6 @@ buf_pool_check_no_pending_io(void);
 Invalidates the file pages in the buffer pool when an archive recovery is
 completed. All the file pages buffered must be in a replaceable state when
 this function is called: not latched and not modified. */
-UNIV_INTERN
 void
 buf_pool_invalidate(void);
 /*=====================*/
@@ -841,7 +1000,7 @@ buf_pool_invalidate(void);
 --------------------------- LOWER LEVEL ROUTINES -------------------------
 =========================================================================*/
 
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
 /*********************************************************************//**
 Adds latch level info for the rw-lock protecting the buffer frame. This
 should be called in the debug version after a successful latching of a
@@ -852,13 +1011,13 @@ buf_block_dbg_add_level(
 /*====================*/
 	buf_block_t*	block,	/*!< in: buffer page
 				where we have acquired latch */
-	ulint		level);	/*!< in: latching order level */
-#else /* UNIV_SYNC_DEBUG */
+	latch_level_t	level);	/*!< in: latching order level */
+#else /* UNIV_DEBUG */
 # define buf_block_dbg_add_level(block, level) /* nothing */
-#endif /* UNIV_SYNC_DEBUG */
+#endif /* UNIV_DEBUG */
 /*********************************************************************//**
 Gets the state of a block.
-@return	state */
+@return state */
 UNIV_INLINE
 enum buf_page_state
 buf_page_get_state(
@@ -876,13 +1035,13 @@ buf_get_state_name(
 					block */
 /*********************************************************************//**
 Gets the state of a block.
-@return	state */
+@return state */
 UNIV_INLINE
 enum buf_page_state
 buf_block_get_state(
 /*================*/
 	const buf_block_t*	block)	/*!< in: pointer to the control block */
-	MY_ATTRIBUTE((pure));
+	MY_ATTRIBUTE((warn_unused_result));
 /*********************************************************************//**
 Sets the state of a block. */
 UNIV_INLINE
@@ -901,43 +1060,43 @@ buf_block_set_state(
 	enum buf_page_state	state);	/*!< in: state */
 /*********************************************************************//**
 Determines if a block is mapped to a tablespace.
-@return	TRUE if mapped */
+@return TRUE if mapped */
 UNIV_INLINE
 ibool
 buf_page_in_file(
 /*=============*/
 	const buf_page_t*	bpage)	/*!< in: pointer to control block */
-	MY_ATTRIBUTE((pure));
+	MY_ATTRIBUTE((warn_unused_result));
 #ifndef UNIV_HOTBACKUP
 /*********************************************************************//**
 Determines if a block should be on unzip_LRU list.
-@return	TRUE if block belongs to unzip_LRU */
+@return TRUE if block belongs to unzip_LRU */
 UNIV_INLINE
 ibool
 buf_page_belongs_to_unzip_LRU(
 /*==========================*/
 	const buf_page_t*	bpage)	/*!< in: pointer to control block */
-	MY_ATTRIBUTE((pure));
+	MY_ATTRIBUTE((warn_unused_result));
 
 /*********************************************************************//**
 Gets the mutex of a block.
-@return	pointer to mutex protecting bpage */
+@return pointer to mutex protecting bpage */
 UNIV_INLINE
-ib_mutex_t*
+BPageMutex*
 buf_page_get_mutex(
 /*===============*/
 	const buf_page_t*	bpage)	/*!< in: pointer to control block */
-	MY_ATTRIBUTE((pure));
+	MY_ATTRIBUTE((warn_unused_result));
 
 /*********************************************************************//**
 Get the flush type of a page.
-@return	flush type */
+@return flush type */
 UNIV_INLINE
 buf_flush_t
 buf_page_get_flush_type(
 /*====================*/
 	const buf_page_t*	bpage)	/*!< in: buffer page */
-	MY_ATTRIBUTE((pure));
+	MY_ATTRIBUTE((warn_unused_result));
 /*********************************************************************//**
 Set the flush type of a page. */
 UNIV_INLINE
@@ -946,33 +1105,34 @@ buf_page_set_flush_type(
 /*====================*/
 	buf_page_t*	bpage,		/*!< in: buffer page */
 	buf_flush_t	flush_type);	/*!< in: flush type */
-/*********************************************************************//**
-Map a block to a file page. */
+
+/** Map a block to a file page.
+@param[in,out]	block	pointer to control block
+@param[in]	page_id	page id */
 UNIV_INLINE
 void
 buf_block_set_file_page(
-/*====================*/
-	buf_block_t*		block,	/*!< in/out: pointer to control block */
-	ulint			space,	/*!< in: tablespace id */
-	ulint			page_no);/*!< in: page number */
+	buf_block_t*		block,
+	const page_id_t&	page_id);
+
 /*********************************************************************//**
 Gets the io_fix state of a block.
-@return	io_fix state */
+@return io_fix state */
 UNIV_INLINE
 enum buf_io_fix
 buf_page_get_io_fix(
 /*================*/
 	const buf_page_t*	bpage)	/*!< in: pointer to the control block */
-	MY_ATTRIBUTE((pure));
+	MY_ATTRIBUTE((warn_unused_result));
 /*********************************************************************//**
 Gets the io_fix state of a block.
-@return	io_fix state */
+@return io_fix state */
 UNIV_INLINE
 enum buf_io_fix
 buf_block_get_io_fix(
 /*================*/
 	const buf_block_t*	block)	/*!< in: pointer to the control block */
-	MY_ATTRIBUTE((pure));
+	MY_ATTRIBUTE((warn_unused_result));
 /*********************************************************************//**
 Sets the io_fix state of a block. */
 UNIV_INLINE
@@ -1018,17 +1178,17 @@ ibool
 buf_page_can_relocate(
 /*==================*/
 	const buf_page_t*	bpage)	/*!< control block being relocated */
-	MY_ATTRIBUTE((pure));
+	MY_ATTRIBUTE((warn_unused_result));
 
 /*********************************************************************//**
 Determine if a block has been flagged old.
-@return	TRUE if old */
+@return TRUE if old */
 UNIV_INLINE
 ibool
 buf_page_is_old(
 /*============*/
 	const buf_page_t*	bpage)	/*!< in: control block */
-	MY_ATTRIBUTE((pure));
+	MY_ATTRIBUTE((warn_unused_result));
 /*********************************************************************//**
 Flag a block old. */
 UNIV_INLINE
@@ -1039,13 +1199,13 @@ buf_page_set_old(
 	ibool		old);	/*!< in: old */
 /*********************************************************************//**
 Determine the time of first access of a block in the buffer pool.
-@return	ut_time_ms() at the time of first access, 0 if not accessed */
+@return ut_time_ms() at the time of first access, 0 if not accessed */
 UNIV_INLINE
 unsigned
 buf_page_is_accessed(
 /*=================*/
 	const buf_page_t*	bpage)	/*!< in: control block */
-	MY_ATTRIBUTE((nonnull, pure));
+	MY_ATTRIBUTE((warn_unused_result));
 /*********************************************************************//**
 Flag a block accessed. */
 UNIV_INLINE
@@ -1057,130 +1217,64 @@ buf_page_set_accessed(
 /*********************************************************************//**
 Gets the buf_block_t handle of a buffered file block if an uncompressed
 page frame exists, or NULL. Note: even though bpage is not declared a
-const we don't update its value. It is safe to make this pure.
-@return	control block, or NULL */
+const we don't update its value.
+@return control block, or NULL */
 UNIV_INLINE
 buf_block_t*
 buf_page_get_block(
 /*===============*/
 	buf_page_t*	bpage)	/*!< in: control block, or NULL */
-	MY_ATTRIBUTE((pure));
+	MY_ATTRIBUTE((warn_unused_result));
 #endif /* !UNIV_HOTBACKUP */
 #ifdef UNIV_DEBUG
 /*********************************************************************//**
 Gets a pointer to the memory frame of a block.
-@return	pointer to the frame */
+@return pointer to the frame */
 UNIV_INLINE
 buf_frame_t*
 buf_block_get_frame(
 /*================*/
 	const buf_block_t*	block)	/*!< in: pointer to the control block */
-	MY_ATTRIBUTE((pure));
+	MY_ATTRIBUTE((warn_unused_result));
 #else /* UNIV_DEBUG */
 # define buf_block_get_frame(block) (block)->frame
 #endif /* UNIV_DEBUG */
-/*********************************************************************//**
-Gets the space id of a block.
-@return	space id */
-UNIV_INLINE
-ulint
-buf_page_get_space(
-/*===============*/
-	const buf_page_t*	bpage)	/*!< in: pointer to the control block */
-	MY_ATTRIBUTE((pure));
-/*********************************************************************//**
-Gets the space id of a block.
-@return	space id */
-UNIV_INLINE
-ulint
-buf_block_get_space(
-/*================*/
-	const buf_block_t*	block)	/*!< in: pointer to the control block */
-	MY_ATTRIBUTE((pure));
-/*********************************************************************//**
-Gets the page number of a block.
-@return	page number */
-UNIV_INLINE
-ulint
-buf_page_get_page_no(
-/*=================*/
-	const buf_page_t*	bpage)	/*!< in: pointer to the control block */
-	MY_ATTRIBUTE((pure));
-/*********************************************************************//**
-Gets the page number of a block.
-@return	page number */
-UNIV_INLINE
-ulint
-buf_block_get_page_no(
-/*==================*/
-	const buf_block_t*	block)	/*!< in: pointer to the control block */
-	MY_ATTRIBUTE((pure));
-/*********************************************************************//**
-Gets the compressed page size of a block.
-@return	compressed page size, or 0 */
-UNIV_INLINE
-ulint
-buf_page_get_zip_size(
-/*==================*/
-	const buf_page_t*	bpage)	/*!< in: pointer to the control block */
-	MY_ATTRIBUTE((pure));
-/*********************************************************************//**
-Gets the compressed page size of a block.
-@return	compressed page size, or 0 */
-UNIV_INLINE
-ulint
-buf_block_get_zip_size(
-/*===================*/
-	const buf_block_t*	block)	/*!< in: pointer to the control block */
-	MY_ATTRIBUTE((pure));
+
 /*********************************************************************//**
 Gets the compressed page descriptor corresponding to an uncompressed page
 if applicable. */
 #define buf_block_get_page_zip(block) \
 	((block)->page.zip.data ? &(block)->page.zip : NULL)
 #ifndef UNIV_HOTBACKUP
-/*******************************************************************//**
-Gets the block to whose frame the pointer is pointing to.
-@return	pointer to block, never NULL */
-UNIV_INTERN
+
+/** Get a buffer block from an adaptive hash index pointer.
+This function does not return if the block is not identified.
+@param[in]	ptr	pointer to within a page frame
+@return pointer to block, never NULL */
 buf_block_t*
-buf_block_align(
-/*============*/
-	const byte*	ptr);	/*!< in: pointer to a frame */
+buf_block_from_ahi(const byte* ptr);
+
 /********************************************************************//**
 Find out if a pointer belongs to a buf_block_t. It can be a pointer to
 the buf_block_t itself or a member of it
-@return	TRUE if ptr belongs to a buf_block_t struct */
-UNIV_INTERN
+@return TRUE if ptr belongs to a buf_block_t struct */
 ibool
 buf_pointer_is_block_field(
 /*=======================*/
 	const void*		ptr);	/*!< in: pointer not
 					dereferenced */
 /** Find out if a pointer corresponds to a buf_block_t::mutex.
-@param m	in: mutex candidate
-@return		TRUE if m is a buf_block_t::mutex */
+@param m in: mutex candidate
+@return TRUE if m is a buf_block_t::mutex */
 #define buf_pool_is_block_mutex(m)			\
 	buf_pointer_is_block_field((const void*)(m))
 /** Find out if a pointer corresponds to a buf_block_t::lock.
-@param l	in: rw-lock candidate
-@return		TRUE if l is a buf_block_t::lock */
+@param l in: rw-lock candidate
+@return TRUE if l is a buf_block_t::lock */
 #define buf_pool_is_block_lock(l)			\
 	buf_pointer_is_block_field((const void*)(l))
 
-#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
-/*********************************************************************//**
-Gets the compressed page descriptor corresponding to an uncompressed page
-if applicable.
-@return	compressed page descriptor, or NULL */
-UNIV_INLINE
-const page_zip_des_t*
-buf_frame_get_page_zip(
-/*===================*/
-	const byte*	ptr);	/*!< in: pointer to the page */
-#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
-/********************************************************************//**
-Function which inits a page for read to the buffer buf_pool. If the page is
+/** Inits a page for read to the buffer buf_pool. If the page is
 (1) already in buf_pool, or
 (2) if we specify to read only ibuf pages and the page is not an ibuf page, or
 (3) if the space is deleted or being deleted,
@@ -1188,25 +1282,23 @@ then this function does nothing.
 Sets the io_fix flag to BUF_IO_READ and sets a non-recursive exclusive lock
 on the buffer frame. The io-handler must take care that the flag is cleared
 and the lock released later.
-@return	pointer to the block or NULL */
-UNIV_INTERN
+@param[out]	err			DB_SUCCESS or DB_TABLESPACE_DELETED
+@param[in]	mode			BUF_READ_IBUF_PAGES_ONLY, ...
+@param[in]	page_id			page id
+@param[in]	unzip			TRUE=request uncompressed page
+@return pointer to the block or NULL */
 buf_page_t*
 buf_page_init_for_read(
-/*===================*/
-	dberr_t*	err,	/*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED */
-	ulint		mode,	/*!< in: BUF_READ_IBUF_PAGES_ONLY, ... */
-	ulint		space,	/*!< in: space id */
-	ulint		zip_size,/*!< in: compressed page size, or 0 */
-	ibool		unzip,	/*!< in: TRUE=request uncompressed page */
-	ib_int64_t	tablespace_version,/*!< in: prevents reading from a wrong
-				version of the tablespace in case we have done
-				DISCARD + IMPORT */
-	ulint		offset);/*!< in: page number */
+	dberr_t*		err,
+	ulint			mode,
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	ibool			unzip);
+
 /********************************************************************//**
 Completes an asynchronous read or write request of a file page to or from
 the buffer pool.
 @return true if successful */
-UNIV_INTERN
 bool
 buf_page_io_complete(
 /*=================*/
@@ -1214,25 +1306,14 @@ buf_page_io_complete(
 	bool		evict = false);/*!< in: whether or not to evict
 				the page from LRU list. */
 /********************************************************************//**
-Calculates a folded value of a file page address to use in the page hash
-table.
-@return	the folded value */
-UNIV_INLINE
-ulint
-buf_page_address_fold(
-/*==================*/
-	ulint	space,	/*!< in: space id */
-	ulint	offset)	/*!< in: offset of the page within space */
-	MY_ATTRIBUTE((const));
-/********************************************************************//**
 Calculates the index of a buffer pool to the buf_pool[] array.
-@return	the position of the buffer pool in buf_pool[] */
+@return the position of the buffer pool in buf_pool[] */
 UNIV_INLINE
 ulint
 buf_pool_index(
 /*===========*/
 	const buf_pool_t*	buf_pool)	/*!< in: buffer pool */
-	MY_ATTRIBUTE((nonnull, const));
+	MY_ATTRIBUTE((warn_unused_result));
 /******************************************************************//**
 Returns the buffer pool instance given a page instance
 @return buf_pool */
@@ -1249,15 +1330,15 @@ buf_pool_t*
 buf_pool_from_block(
 /*================*/
 	const buf_block_t*	block); /*!< in: block */
-/******************************************************************//**
-Returns the buffer pool instance given space and offset of page
+
+/** Returns the buffer pool instance given a page id.
+@param[in]	page_id	page id
 @return buffer pool */
 UNIV_INLINE
 buf_pool_t*
 buf_pool_get(
-/*==========*/
-	ulint	space,	/*!< in: space id */
-	ulint	offset);/*!< in: offset of the page within space */
+	const page_id_t&	page_id);
+
 /******************************************************************//**
 Returns the buffer pool instance given its array index
 @return buffer pool */
@@ -1267,71 +1348,64 @@ buf_pool_from_array(
 /*================*/
 	ulint	index);		/*!< in: array index to get
 				buffer pool instance from */
-/******************************************************************//**
-Returns the control block of a file page, NULL if not found.
-@return	block, NULL if not found */
+
+/** Returns the control block of a file page, NULL if not found.
+@param[in]	buf_pool	buffer pool instance
+@param[in]	page_id		page id
+@return block, NULL if not found */
 UNIV_INLINE
 buf_page_t*
 buf_page_hash_get_low(
-/*==================*/
-	buf_pool_t*	buf_pool,/*!< buffer pool instance */
-	ulint		space,	/*!< in: space id */
-	ulint		offset,	/*!< in: offset of the page within space */
-	ulint		fold);	/*!< in: buf_page_address_fold(space, offset) */
-/******************************************************************//**
-Returns the control block of a file page, NULL if not found.
+	buf_pool_t*		buf_pool,
+	const page_id_t&	page_id);
+
+/** Returns the control block of a file page, NULL if not found.
 If the block is found and lock is not NULL then the appropriate
 page_hash lock is acquired in the specified lock mode. Otherwise,
 mode value is ignored. It is up to the caller to release the
 lock. If the block is found and the lock is NULL then the page_hash
 lock is released by this function.
-@return	block, NULL if not found, or watch sentinel (if watch is true) */
+@param[in]	buf_pool	buffer pool instance
+@param[in]	page_id		page id
+@param[in,out]	lock		lock of the page hash acquired if bpage is
+found, NULL otherwise. If NULL is passed then the hash_lock is released by
+this function.
+@param[in]	lock_mode	RW_LOCK_X or RW_LOCK_S. Ignored if
+lock == NULL
+@param[in]	watch		if true, return watch sentinel also.
+@return pointer to the bpage or NULL; if NULL, lock is also NULL or
+a watch sentinel. */
 UNIV_INLINE
 buf_page_t*
 buf_page_hash_get_locked(
-/*=====================*/
-					/*!< out: pointer to the bpage,
-					or NULL; if NULL, hash_lock
-					is also NULL. */
-	buf_pool_t*	buf_pool,	/*!< buffer pool instance */
-	ulint		space,		/*!< in: space id */
-	ulint		offset,		/*!< in: page number */
-	rw_lock_t**	lock,		/*!< in/out: lock of the page
-					hash acquired if bpage is
-					found. NULL otherwise. If NULL
-					is passed then the hash_lock
-					is released by this function */
-	ulint		lock_mode,	/*!< in: RW_LOCK_EX or
-					RW_LOCK_SHARED. Ignored if
-					lock == NULL */
-	bool		watch = false);	/*!< in: if true, return watch
-					sentinel also. */
-/******************************************************************//**
-Returns the control block of a file page, NULL if not found.
+	buf_pool_t*		buf_pool,
+	const page_id_t&	page_id,
+	rw_lock_t**		lock,
+	ulint			lock_mode,
+	bool			watch = false);
+
+/** Returns the control block of a file page, NULL if not found.
 If the block is found and lock is not NULL then the appropriate
 page_hash lock is acquired in the specified lock mode. Otherwise,
 mode value is ignored. It is up to the caller to release the
 lock. If the block is found and the lock is NULL then the page_hash
 lock is released by this function.
-@return	block, NULL if not found */
+@param[in]	buf_pool	buffer pool instance
+@param[in]	page_id		page id
+@param[in,out]	lock		lock of the page hash acquired if bpage is
+found, NULL otherwise. If NULL is passed then the hash_lock is released by
+this function.
+@param[in]	lock_mode	RW_LOCK_X or RW_LOCK_S. Ignored if
+lock == NULL
+@return pointer to the block or NULL; if NULL, lock is also NULL. */
 UNIV_INLINE
 buf_block_t*
 buf_block_hash_get_locked(
-/*=====================*/
-					/*!< out: pointer to the bpage,
-					or NULL; if NULL, hash_lock
-					is also NULL. */
-	buf_pool_t*	buf_pool,	/*!< buffer pool instance */
-	ulint		space,		/*!< in: space id */
-	ulint		offset,		/*!< in: page number */
-	rw_lock_t**	lock,		/*!< in/out: lock of the page
-					hash acquired if bpage is
-					found. NULL otherwise. If NULL
-					is passed then the hash_lock
-					is released by this function */
-	ulint		lock_mode);	/*!< in: RW_LOCK_EX or
-					RW_LOCK_SHARED. Ignored if
-					lock == NULL */
+	buf_pool_t*		buf_pool,
+	const page_id_t&	page_id,
+	rw_lock_t**		lock,
+	ulint			lock_mode);
+
 /* There are four different ways we can try to get a bpage or block
 from the page hash:
 1) Caller already holds the appropriate page hash lock: in the case call
@@ -1339,75 +1413,70 @@ buf_page_hash_get_low() function.
 2) Caller wants to hold page hash lock in x-mode
 3) Caller wants to hold page hash lock in s-mode
 4) Caller doesn't want to hold page hash lock */
-#define buf_page_hash_get_s_locked(b, s, o, l)			\
-	buf_page_hash_get_locked(b, s, o, l, RW_LOCK_SHARED)
-#define buf_page_hash_get_x_locked(b, s, o, l)			\
-	buf_page_hash_get_locked(b, s, o, l, RW_LOCK_EX)
-#define buf_page_hash_get(b, s, o)				\
-	buf_page_hash_get_locked(b, s, o, NULL, 0)
-#define buf_page_get_also_watch(b, s, o)			\
-	buf_page_hash_get_locked(b, s, o, NULL, 0, true)
+#define buf_page_hash_get_s_locked(b, page_id, l)		\
+	buf_page_hash_get_locked(b, page_id, l, RW_LOCK_S)
+#define buf_page_hash_get_x_locked(b, page_id, l)		\
+	buf_page_hash_get_locked(b, page_id, l, RW_LOCK_X)
+#define buf_page_hash_get(b, page_id)				\
+	buf_page_hash_get_locked(b, page_id, NULL, 0)
+#define buf_page_get_also_watch(b, page_id)			\
+	buf_page_hash_get_locked(b, page_id, NULL, 0, true)
 
-#define buf_block_hash_get_s_locked(b, s, o, l)			\
-	buf_block_hash_get_locked(b, s, o, l, RW_LOCK_SHARED)
-#define buf_block_hash_get_x_locked(b, s, o, l)			\
-	buf_block_hash_get_locked(b, s, o, l, RW_LOCK_EX)
-#define buf_block_hash_get(b, s, o)				\
-	buf_block_hash_get_locked(b, s, o, NULL, 0)
+#define buf_block_hash_get_s_locked(b, page_id, l)		\
+	buf_block_hash_get_locked(b, page_id, l, RW_LOCK_S)
+#define buf_block_hash_get_x_locked(b, page_id, l)		\
+	buf_block_hash_get_locked(b, page_id, l, RW_LOCK_X)
+#define buf_block_hash_get(b, page_id)				\
+	buf_block_hash_get_locked(b, page_id, NULL, 0)
 
 /*********************************************************************//**
 Gets the current length of the free list of buffer blocks.
-@return	length of the free list */
-UNIV_INTERN
+@return length of the free list */
 ulint
 buf_get_free_list_len(void);
 /*=======================*/
 
 /********************************************************************//**
 Determine if a block is a sentinel for a buffer pool watch.
-@return	TRUE if a sentinel for a buffer pool watch, FALSE if not */
-UNIV_INTERN
+@return TRUE if a sentinel for a buffer pool watch, FALSE if not */
 ibool
 buf_pool_watch_is_sentinel(
 /*=======================*/
-	buf_pool_t*		buf_pool,	/*!< buffer pool instance */
+	const buf_pool_t*	buf_pool,	/*!< buffer pool instance */
 	const buf_page_t*	bpage)		/*!< in: block */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
-/****************************************************************//**
-Add watch for the given page to be read in. Caller must have the buffer pool
+
+/** Add watch for the given page to be read in. Caller must have
+appropriate hash_lock for the bpage. This function may release the
+hash_lock and reacquire it.
+@param[in]	page_id		page id
+@param[in,out]	hash_lock	hash_lock currently latched
 @return NULL if watch set, block if the page is in the buffer pool */
-UNIV_INTERN
 buf_page_t*
 buf_pool_watch_set(
-/*===============*/
-	ulint	space,	/*!< in: space id */
-	ulint	offset,	/*!< in: page number */
-	ulint	fold)	/*!< in: buf_page_address_fold(space, offset) */
-	MY_ATTRIBUTE((warn_unused_result));
-/****************************************************************//**
-Stop watching if the page has been read in.
-buf_pool_watch_set(space,offset) must have returned NULL before. */
-UNIV_INTERN
+	const page_id_t&	page_id,
+	rw_lock_t**		hash_lock)
+MY_ATTRIBUTE((warn_unused_result));
+
+/** Stop watching if the page has been read in.
+buf_pool_watch_set(space,offset) must have returned NULL before.
+@param[in]	page_id	page id */
 void
 buf_pool_watch_unset(
-/*=================*/
-	ulint	space,	/*!< in: space id */
-	ulint	offset);/*!< in: page number */
-/****************************************************************//**
-Check if the page has been read in.
+	const page_id_t&	page_id);
+
+/** Check if the page has been read in.
 This may only be called after buf_pool_watch_set(space,offset)
 has returned NULL and before invoking buf_pool_watch_unset(space,offset).
-@return	FALSE if the given page was not read in, TRUE if it was */
-UNIV_INTERN
+@param[in]	page_id	page id
+@return FALSE if the given page was not read in, TRUE if it was */
 ibool
 buf_pool_watch_occurred(
-/*====================*/
-	ulint	space,	/*!< in: space id */
-	ulint	offset)	/*!< in: page number */
-	MY_ATTRIBUTE((warn_unused_result));
+	const page_id_t&	page_id)
+MY_ATTRIBUTE((warn_unused_result));
+
 /********************************************************************//**
 Get total buffer pool statistics. */
-UNIV_INTERN
 void
 buf_get_total_list_len(
 /*===================*/
@@ -1416,7 +1485,6 @@ buf_get_total_list_len(
 	ulint*		flush_list_len);/*!< out: length of all flush lists */
 /********************************************************************//**
 Get total list size in bytes from all buffer pools. */
-UNIV_INTERN
 void
 buf_get_total_list_size_in_bytes(
 /*=============================*/
@@ -1424,7 +1492,6 @@ buf_get_total_list_size_in_bytes(
 							in all buffer pools */
 /********************************************************************//**
 Get total buffer pool statistics. */
-UNIV_INTERN
 void
 buf_get_total_stat(
 /*===============*/
@@ -1440,15 +1507,33 @@ buf_get_nth_chunk_block(
 	ulint		n,		/*!< in: nth chunk in the buffer pool */
 	ulint*		chunk_size);	/*!< in: chunk size */
 
-/********************************************************************//**
-Calculate the checksum of a page from compressed table and update the page. */
-UNIV_INTERN
+/** Verify the possibility that a stored page is not in buffer pool.
+@param[in]	withdraw_clock	withdraw clock when stored the page
+@retval true	if the page might be relocated */
+UNIV_INLINE
+bool
+buf_pool_is_obsolete(
+	ulint	withdraw_clock);
+
+/** Calculate aligned buffer pool size based on srv_buf_pool_chunk_unit,
+if needed.
+@param[in]	size	size in bytes
+@return	aligned size */
+UNIV_INLINE
+ulint
+buf_pool_size_align(
+	ulint	size);
+
+/** Calculate the checksum of a page from compressed table and update the
+page.
+@param[in,out]	page	page to update
+@param[in]	size	compressed page size
+@param[in]	lsn	LSN to stamp on the page */
 void
 buf_flush_update_zip_checksum(
-/*==========================*/
-	buf_frame_t*	page,		/*!< in/out: Page to update */
-	ulint		zip_size,	/*!< in: Compressed page size */
-	lsn_t		lsn);		/*!< in: Lsn to stamp on the page */
+	buf_frame_t*	page,
+	ulint		size,
+	lsn_t		lsn);
 
 #endif /* !UNIV_HOTBACKUP */
 
@@ -1528,7 +1613,8 @@ for compressed and uncompressed frames */
 /** Number of bits used for buffer page states. */
 #define BUF_PAGE_STATE_BITS	3
 
-struct buf_page_t{
+class buf_page_t {
+public:
 	/** @name General fields
 	None of these bit-fields must be modified without holding
 	buf_page_get_mutex() [buf_block_t::mutex or
@@ -1537,36 +1623,21 @@ struct buf_page_t{
 	by buf_pool->mutex. */
 	/* @{ */
 
-	ib_uint32_t	space;		/*!< tablespace id; also protected
-					by buf_pool->mutex. */
-	ib_uint32_t	offset;		/*!< page number; also protected
-					by buf_pool->mutex. */
-	/** count of how manyfold this block is currently bufferfixed */
-#ifdef PAGE_ATOMIC_REF_COUNT
+	/** Page id. Protected by buf_pool mutex. */
+	page_id_t	id;
+
+	/** Page size. Protected by buf_pool mutex. */
+	page_size_t	size;
+
+	/** Count of how manyfold this block is currently bufferfixed. */
 	ib_uint32_t	buf_fix_count;
 
 	/** type of pending I/O operation; also protected by
-	buf_pool->mutex for writes only @see enum buf_io_fix */
-	byte		io_fix;
+	buf_pool->mutex for writes only */
+	buf_io_fix	io_fix;
 
-	byte		state;
-#else
-	unsigned	buf_fix_count:19;
-
-	/** type of pending I/O operation; also protected by
-	buf_pool->mutex for writes only @see enum buf_io_fix */
-	unsigned	io_fix:2;
-
-	/*!< state of the control block; also protected by buf_pool->mutex.
-	State transitions from BUF_BLOCK_READY_FOR_USE to BUF_BLOCK_MEMORY
-	need not be protected by buf_page_get_mutex(). @see enum buf_page_state.
-	State changes that are relevant to page_hash are additionally protected
-	by the appropriate page_hash mutex i.e.: if a page is in page_hash or
-	is being added to/removed from page_hash then the corresponding changes
-	must also be protected by page_hash mutex. */
-	unsigned	state:BUF_PAGE_STATE_BITS;
-
-#endif /* PAGE_ATOMIC_REF_COUNT */
+	/** Block state. @see buf_page_in_file */
+	buf_page_state	state;
 
 #ifndef UNIV_HOTBACKUP
 	unsigned	flush_type:2;	/*!< if this block is currently being
@@ -1592,6 +1663,7 @@ struct buf_page_t{
 					if written again we check is TRIM
 					operation needed. */
 
+	ulint           space;          /*!< space id */
 	unsigned        key_version;	/*!< key version for this block */
 	bool            page_encrypted; /*!< page is page encrypted */
 	bool            page_compressed;/*!< page is page compressed */
@@ -1631,7 +1703,7 @@ struct buf_page_t{
 					in one of the following lists in
 					buf_pool:
 
-					- BUF_BLOCK_NOT_USED:	free
+					- BUF_BLOCK_NOT_USED:	free, withdraw
 					- BUF_BLOCK_FILE_PAGE:	flush_list
 					- BUF_BLOCK_ZIP_DIRTY:	flush_list
 					- BUF_BLOCK_ZIP_PAGE:	zip_clean
@@ -1667,6 +1739,9 @@ struct buf_page_t{
 					should hold: in_free_list
 					== (state == BUF_BLOCK_NOT_USED) */
 #endif /* UNIV_DEBUG */
+
+	FlushObserver*	flush_observer;	/*!< flush observer */
+
 	lsn_t		newest_modification;
 					/*!< log sequence number of
 					the youngest modification to
@@ -1714,13 +1789,13 @@ struct buf_page_t{
 					0 if the block was never accessed
 					in the buffer pool. Protected by
 					block mutex */
-# if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
+# ifdef UNIV_DEBUG
 	ibool		file_page_was_freed;
 					/*!< this is set to TRUE when
 					fsp frees a page in buffer pool;
 					protected by buf_pool->zip_mutex
 					or buf_block_t::mutex. */
-# endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
+# endif /* UNIV_DEBUG */
 #endif /* !UNIV_HOTBACKUP */
 };
 
@@ -1740,6 +1815,8 @@ struct buf_block_t{
 					aligned to an address divisible by
 					UNIV_PAGE_SIZE */
 #ifndef UNIV_HOTBACKUP
+	BPageLock	lock;		/*!< read-write lock of the buffer
+					frame */
 	UT_LIST_NODE_T(buf_block_t) unzip_LRU;
 					/*!< node of the decompressed LRU list;
 					a block is in the unzip_LRU list
@@ -1749,15 +1826,8 @@ struct buf_block_t{
 	ibool		in_unzip_LRU_list;/*!< TRUE if the page is in the
 					decompressed LRU list;
 					used in debugging */
+	ibool		in_withdraw_list;
 #endif /* UNIV_DEBUG */
-	ib_mutex_t	mutex;		/*!< mutex protecting this block:
-					state (also protected by the buffer
-					pool mutex), io_fix, buf_fix_count,
-					and accessed; we introduce this new
-					mutex in InnoDB-5.1 to relieve
-					contention on the buffer pool mutex */
-	rw_lock_t	lock;		/*!< read-write lock of the buffer
-					frame */
 	unsigned	lock_hash_val:32;/*!< hashed value of the page address
 					in the record lock hash table;
 					protected by buf_block_t::lock
@@ -1765,15 +1835,6 @@ struct buf_block_t{
 				        in buf_page_get_gen(),
 					buf_page_init_for_read()
 					and buf_page_create()) */
-	ibool		check_index_page_at_flush;
-					/*!< TRUE if we know that this is
-					an index page, and want the database
-					to check its consistency before flush;
-					note that there may be pages in the
-					buffer pool which are index pages,
-					but this flag is not set because
-					we do not keep track of all pages;
-					NOT protected by any mutex */
 	/* @} */
 	/** @name Optimistic search field */
 	/* @{ */
@@ -1796,11 +1857,12 @@ struct buf_block_t{
 
 	ulint		n_hash_helps;	/*!< counter which controls building
 					of a new hash index for the page */
-	ulint		n_fields;	/*!< recommended prefix length for hash
+	volatile ulint	n_bytes;	/*!< recommended prefix length for hash
+					search: number of bytes in
+					an incomplete last field */
+	volatile ulint	n_fields;	/*!< recommended prefix length for hash
 					search: number of full fields */
-	ulint		n_bytes;	/*!< recommended prefix: number of bytes
-					in an incomplete field */
-	ibool		left_side;	/*!< TRUE or FALSE, depending on
+	volatile bool	left_side;	/*!< true or false, depending on
 					whether the leftmost record of several
 					records with the same prefix should be
 					indexed in the hash index */
@@ -1808,7 +1870,7 @@ struct buf_block_t{
 
 	/** @name Hash search fields
 	These 5 fields may only be modified when we have
-	an x-latch on btr_search_latch AND
+	an x-latch on search system AND
 	- we are holding an s-latch or x-latch on buf_block_t::lock or
 	- we know that buf_block_t::buf_fix_count == 0.
 
@@ -1816,7 +1878,7 @@ struct buf_block_t{
 	in the buffer pool in buf0buf.cc.
 
 	Another exception is that assigning block->index = NULL
-	is allowed whenever holding an x-latch on btr_search_latch. */
+	is allowed whenever holding an x-latch on search system. */
 
 	/* @{ */
 
@@ -1839,8 +1901,17 @@ struct buf_block_t{
 					complete, though: there may
 					have been hash collisions,
 					record deletions, etc. */
+	bool		made_dirty_with_no_latch;
+					/*!< true if block has been made dirty
+					without acquiring X/SX latch as the
+					block belongs to temporary tablespace
+					and block is always accessed by a
+					single thread. */
+	bool		skip_flush_check;
+					/*!< Skip check in buf_dblwr_check_block
+					during bulk load, protected by lock.*/
 	/* @} */
-# ifdef UNIV_SYNC_DEBUG
+# ifdef UNIV_DEBUG
 	/** @name Debug fields */
 	/* @{ */
 	rw_lock_t	debug_latch;	/*!< in the debug version, each thread
@@ -1849,16 +1920,23 @@ struct buf_block_t{
 					debug utilities in sync0rw */
 	/* @} */
 # endif
+	BPageMutex	mutex;		/*!< mutex protecting this block:
+					state (also protected by the buffer
+					pool mutex), io_fix, buf_fix_count,
+					and accessed; we introduce this new
+					mutex in InnoDB-5.1 to relieve
+					contention on the buffer pool mutex */
 #endif /* !UNIV_HOTBACKUP */
 };
 
 /** Check if a buf_block_t object is in a valid state
-@param block	buffer block
-@return		TRUE if valid */
+@param block buffer block
+@return TRUE if valid */
 #define buf_block_state_valid(block)				\
 (buf_block_get_state(block) >= BUF_BLOCK_NOT_USED		\
  && (buf_block_get_state(block) <= BUF_BLOCK_REMOVE_HASH))
 
+
 #ifndef UNIV_HOTBACKUP
 /**********************************************************************//**
 Compute the hash fold value for blocks in buf_pool->zip_hash. */
@@ -1890,7 +1968,7 @@ public:
 	virtual ~HazardPointer() {}
 
 	/** Get current value */
-	buf_page_t* get()
+	buf_page_t* get() const
 	{
 		ut_ad(mutex_own(m_mutex));
 		return(m_hp);
@@ -1920,7 +1998,7 @@ protected:
 	/** Buffer pool instance */
 	const buf_pool_t*	m_buf_pool;
 
-#if UNIV_DEBUG
+#ifdef UNIV_DEBUG
 	/** mutex that protects access to the m_hp. */
 	const ib_mutex_t*	m_mutex;
 #endif /* UNIV_DEBUG */
@@ -2074,15 +2152,14 @@ struct buf_pool_t{
 
 	/** @name General fields */
 	/* @{ */
-	ib_mutex_t	mutex;		/*!< Buffer pool mutex of this
+	BufPoolMutex	mutex;		/*!< Buffer pool mutex of this
 					instance */
-	ib_mutex_t	zip_mutex;	/*!< Zip mutex of this buffer
+	BufPoolZipMutex	zip_mutex;	/*!< Zip mutex of this buffer
 					pool instance, protects compressed
 					only pages (of type buf_page_t, not
 					buf_block_t */
 	ulint		instance_no;	/*!< Array index of this buffer
 					pool instance */
-	ulint		old_pool_size;  /*!< Old pool size in bytes */
 	ulint		curr_pool_size;	/*!< Current pool size in bytes */
 	ulint		LRU_old_ratio;  /*!< Reserve this much of the buffer
 					pool for "old" blocks */
@@ -2093,9 +2170,19 @@ struct buf_pool_t{
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 	ulint		mutex_exit_forbidden; /*!< Forbid release mutex */
 #endif
-	ulint		n_chunks;	/*!< number of buffer pool chunks */
+	ut_allocator<unsigned char>	allocator;	/*!< Allocator used for
+					allocating memory for the the "chunks"
+					member. */
+	volatile ulint	n_chunks;	/*!< number of buffer pool chunks */
+	volatile ulint	n_chunks_new;	/*!< new number of buffer pool chunks */
 	buf_chunk_t*	chunks;		/*!< buffer pool chunks */
+	buf_chunk_t*	chunks_old;	/*!< old buffer pool chunks to be freed
+					after resizing buffer pool */
 	ulint		curr_size;	/*!< current pool size in pages */
+	ulint		old_size;	/*!< previous pool size in pages */
+	ulint		read_ahead_area;/*!< size in pages of the area which
+					the read-ahead algorithms read if
+					invoked */
 	hash_table_t*	page_hash;	/*!< hash table of buf_page_t or
 					buf_block_t file pages,
 					buf_page_in_file() == TRUE,
@@ -2107,6 +2194,8 @@ struct buf_pool_t{
 					page_hash mutex. Lookups can happen
 					while holding the buf_pool->mutex or
 					the relevant page_hash mutex. */
+	hash_table_t*	page_hash_old;	/*!< old pointer to page_hash to be
+					freed after resizing buffer pool */
 	hash_table_t*	zip_hash;	/*!< hash table of buf_block_t blocks
 					whose frames are allocated to the
 					zip buddy system,
@@ -2130,7 +2219,7 @@ struct buf_pool_t{
 
 	/* @{ */
 
-	ib_mutex_t	flush_list_mutex;/*!< mutex protecting the
+	FlushListMutex	flush_list_mutex;/*!< mutex protecting the
 					flush list access. This mutex
 					protects flush_list, flush_rbt
 					and bpage::list pointers when
@@ -2197,6 +2286,15 @@ struct buf_pool_t{
 					/*!< base node of the free
 					block list */
 
+	UT_LIST_BASE_NODE_T(buf_page_t) withdraw;
+					/*!< base node of the withdraw
+					block list. It is only used during
+					shrinking buffer pool size, not to
+					reuse the blocks will be removed */
+
+	ulint		withdraw_target;/*!< target length of withdraw
+					block list, when withdrawing */
+
 	/** "hazard pointer" used during scan of LRU while doing
 	LRU list batch.  Protected by buf_pool::mutex */
 	LRUHp		lru_hp;
@@ -2211,6 +2309,7 @@ struct buf_pool_t{
 
 	UT_LIST_BASE_NODE_T(buf_page_t) LRU;
 					/*!< base node of the LRU list */
+
 	buf_page_t*	LRU_old;	/*!< pointer to the about
 					LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV
 					oldest blocks in the LRU list;
@@ -2258,6 +2357,15 @@ struct buf_pool_t{
 	/* @} */
 };
 
+/** Print the given buf_pool_t object.
+@param[in,out]	out		the output stream
+@param[in]	buf_pool	the buf_pool_t object to be printed
+@return the output stream */
+std::ostream&
+operator<<(
+        std::ostream&		out,
+        const buf_pool_t&	buf_pool);
+
 /** @name Accessors for buf_pool->mutex.
 Use these instead of accessing buf_pool->mutex directly. */
 /* @{ */
@@ -2265,77 +2373,79 @@ Use these instead of accessing buf_pool->mutex directly. */
 /** Test if a buffer pool mutex is owned. */
 #define buf_pool_mutex_own(b) mutex_own(&b->mutex)
 /** Acquire a buffer pool mutex. */
-#define buf_pool_mutex_enter(b) do {			\
-	ut_ad(!mutex_own(&b->zip_mutex));		\
-	mutex_enter(&b->mutex);				\
+#define buf_pool_mutex_enter(b) do {		\
+	ut_ad(!(b)->zip_mutex.is_owned());	\
+	mutex_enter(&(b)->mutex);		\
 } while (0)
 
 /** Test if flush list mutex is owned. */
-#define buf_flush_list_mutex_own(b) mutex_own(&b->flush_list_mutex)
+#define buf_flush_list_mutex_own(b) mutex_own(&(b)->flush_list_mutex)
 
 /** Acquire the flush list mutex. */
-#define buf_flush_list_mutex_enter(b) do {		\
-	mutex_enter(&b->flush_list_mutex);		\
+#define buf_flush_list_mutex_enter(b) do {	\
+	mutex_enter(&(b)->flush_list_mutex);	\
 } while (0)
 /** Release the flush list mutex. */
-# define buf_flush_list_mutex_exit(b) do {		\
-	mutex_exit(&b->flush_list_mutex);		\
+# define buf_flush_list_mutex_exit(b) do {	\
+	mutex_exit(&(b)->flush_list_mutex);	\
 } while (0)
 
+
 /** Test if block->mutex is owned. */
-#define buf_block_mutex_own(b)	mutex_own(&(b)->mutex)
+#define buf_page_mutex_own(b)	(b)->mutex.is_owned()
 
 /** Acquire the block->mutex. */
-#define buf_block_mutex_enter(b) do {			\
+#define buf_page_mutex_enter(b) do {			\
 	mutex_enter(&(b)->mutex);			\
 } while (0)
 
 /** Release the trx->mutex. */
-#define buf_block_mutex_exit(b) do {			\
-	mutex_exit(&(b)->mutex);				\
+#define buf_page_mutex_exit(b) do {			\
+	(b)->mutex.exit();				\
 } while (0)
 
 
 /** Get appropriate page_hash_lock. */
-# define buf_page_hash_lock_get(b, f)			\
-	hash_get_lock(b->page_hash, f)
+# define buf_page_hash_lock_get(buf_pool, page_id)	\
+	hash_get_lock((buf_pool)->page_hash, (page_id).fold())
 
-#ifdef UNIV_SYNC_DEBUG
+/** If not appropriate page_hash_lock, relock until appropriate. */
+# define buf_page_hash_lock_s_confirm(hash_lock, buf_pool, page_id)\
+	hash_lock_s_confirm(hash_lock, (buf_pool)->page_hash, (page_id).fold())
+
+# define buf_page_hash_lock_x_confirm(hash_lock, buf_pool, page_id)\
+	hash_lock_x_confirm(hash_lock, (buf_pool)->page_hash, (page_id).fold())
+
+#ifdef UNIV_DEBUG
 /** Test if page_hash lock is held in s-mode. */
-# define buf_page_hash_lock_held_s(b, p)		\
-	rw_lock_own(buf_page_hash_lock_get(b,		\
-		  buf_page_address_fold(p->space,	\
-					p->offset)),	\
-					RW_LOCK_SHARED)
+# define buf_page_hash_lock_held_s(buf_pool, bpage)	\
+	rw_lock_own(buf_page_hash_lock_get((buf_pool), (bpage)->id), RW_LOCK_S)
 
 /** Test if page_hash lock is held in x-mode. */
-# define buf_page_hash_lock_held_x(b, p)		\
-	rw_lock_own(buf_page_hash_lock_get(b,		\
-		  buf_page_address_fold(p->space,	\
-					p->offset)),	\
-					RW_LOCK_EX)
+# define buf_page_hash_lock_held_x(buf_pool, bpage)	\
+	rw_lock_own(buf_page_hash_lock_get((buf_pool), (bpage)->id), RW_LOCK_X)
 
 /** Test if page_hash lock is held in x or s-mode. */
-# define buf_page_hash_lock_held_s_or_x(b, p)		\
-	(buf_page_hash_lock_held_s(b, p)		\
-	 || buf_page_hash_lock_held_x(b, p))
+# define buf_page_hash_lock_held_s_or_x(buf_pool, bpage)\
+	(buf_page_hash_lock_held_s((buf_pool), (bpage))	\
+	 || buf_page_hash_lock_held_x((buf_pool), (bpage)))
 
-# define buf_block_hash_lock_held_s(b, p)		\
-	buf_page_hash_lock_held_s(b, &(p->page))
+# define buf_block_hash_lock_held_s(buf_pool, block)	\
+	buf_page_hash_lock_held_s((buf_pool), &(block)->page)
 
-# define buf_block_hash_lock_held_x(b, p)		\
-	buf_page_hash_lock_held_x(b, &(p->page))
+# define buf_block_hash_lock_held_x(buf_pool, block)	\
+	buf_page_hash_lock_held_x((buf_pool), &(block)->page)
 
-# define buf_block_hash_lock_held_s_or_x(b, p)		\
-	buf_page_hash_lock_held_s_or_x(b, &(p->page))
-#else /* UNIV_SYNC_DEBUG */
+# define buf_block_hash_lock_held_s_or_x(buf_pool, block)	\
+	buf_page_hash_lock_held_s_or_x((buf_pool), &(block)->page)
+#else /* UNIV_DEBUG */
 # define buf_page_hash_lock_held_s(b, p)	(TRUE)
 # define buf_page_hash_lock_held_x(b, p)	(TRUE)
 # define buf_page_hash_lock_held_s_or_x(b, p)	(TRUE)
 # define buf_block_hash_lock_held_s(b, p)	(TRUE)
 # define buf_block_hash_lock_held_x(b, p)	(TRUE)
 # define buf_block_hash_lock_held_s_or_x(b, p)	(TRUE)
-#endif /* UNIV_SYNC_DEBUG */
+#endif /* UNIV_DEBUG */
 
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 /** Forbid the release of the buffer pool mutex. */
@@ -2416,6 +2526,12 @@ struct	CheckInLRUList {
 	{
 		ut_a(elem->in_LRU_list);
 	}
+
+	static void validate(const buf_pool_t* buf_pool)
+	{
+		CheckInLRUList	check;
+		ut_list_validate(buf_pool->LRU, check);
+	}
 };
 
 /** Functor to validate the LRU list. */
@@ -2424,6 +2540,12 @@ struct	CheckInFreeList {
 	{
 		ut_a(elem->in_free_list);
 	}
+
+	static void validate(const buf_pool_t* buf_pool)
+	{
+		CheckInFreeList	check;
+		ut_list_validate(buf_pool->free, check);
+	}
 };
 
 struct	CheckUnzipLRUAndLRUList {
@@ -2432,11 +2554,18 @@ struct	CheckUnzipLRUAndLRUList {
                 ut_a(elem->page.in_LRU_list);
                 ut_a(elem->in_unzip_LRU_list);
 	}
+
+	static void validate(const buf_pool_t* buf_pool)
+	{
+		CheckUnzipLRUAndLRUList	check;
+		ut_list_validate(buf_pool->unzip_LRU, check);
+	}
 };
 #endif /* UNIV_DEBUG || defined UNIV_BUF_DEBUG */
 
 #ifndef UNIV_NONINL
 #include "buf0buf.ic"
 #endif
+#endif /* !UNIV_INNOCHECKSUM */
 
 #endif
diff --git a/storage/innobase/include/buf0buf.ic b/storage/innobase/include/buf0buf.ic
index e77c5a84202..bf7799774c6 100644
--- a/storage/innobase/include/buf0buf.ic
+++ b/storage/innobase/include/buf0buf.ic
@@ -1,8 +1,8 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2008, Google Inc.
-Copyright (c) 2014, 2015, MariaDB Corporation.
+Copyright (c) 2014, 2016, MariaDB Corporation.
 
 Portions of this file contain modifications contributed and copyrighted by
 Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -36,14 +36,25 @@ Created 11/5/1995 Heikki Tuuri
 #include "buf0flu.h"
 #include "buf0lru.h"
 #include "buf0rea.h"
+#include "sync0debug.h"
+#include "fsp0types.h"
+#include "ut0new.h"
 
 /** A chunk of buffers. The buffer pool is allocated in chunks. */
 struct buf_chunk_t{
-	ulint		mem_size;	/*!< allocated size of the chunk */
 	ulint		size;		/*!< size of frames[] and blocks[] */
-	void*		mem;		/*!< pointer to the memory area which
+	unsigned char*	mem;		/*!< pointer to the memory area which
 					was allocated for the frames */
+	ut_new_pfx_t	mem_pfx;	/*!< Auxiliary structure, describing
+					"mem". It is filled by the allocator's
+					alloc method and later passed to the
+					deallocate method. */
 	buf_block_t*	blocks;		/*!< array of buffer control blocks */
+
+	/** Get the size of 'mem' in bytes. */
+	size_t	mem_size() const {
+		return(mem_pfx.m_size);
+	}
 };
 
 /*********************************************************************//**
@@ -59,7 +70,7 @@ buf_pool_get_curr_size(void)
 
 /********************************************************************//**
 Calculates the index of a buffer pool to the buf_pool[] array.
-@return	the position of the buffer pool in buf_pool[] */
+@return the position of the buffer pool in buf_pool[] */
 UNIV_INLINE
 ulint
 buf_pool_index(
@@ -112,7 +123,7 @@ buf_pool_get_n_pages(void)
 
 /********************************************************************//**
 Reads the freed_page_clock of a buffer block.
-@return	freed_page_clock */
+@return freed_page_clock */
 UNIV_INLINE
 ulint
 buf_page_get_freed_page_clock(
@@ -125,7 +136,7 @@ buf_page_get_freed_page_clock(
 
 /********************************************************************//**
 Reads the freed_page_clock of a buffer block.
-@return	freed_page_clock */
+@return freed_page_clock */
 UNIV_INLINE
 ulint
 buf_block_get_freed_page_clock(
@@ -141,7 +152,7 @@ meaning that it is not in danger of getting evicted and also implying
 that it has been accessed recently.
 Note that this is for heuristics only and does not reserve buffer pool
 mutex.
-@return	TRUE if block is close to MRU end of LRU */
+@return TRUE if block is close to MRU end of LRU */
 UNIV_INLINE
 ibool
 buf_page_peek_if_young(
@@ -162,7 +173,7 @@ buf_page_peek_if_young(
 Recommends a move of a block to the start of the LRU list if there is danger
 of dropping from the buffer pool. NOTE: does not reserve the buffer pool
 mutex.
-@return	TRUE if should be made younger */
+@return TRUE if should be made younger */
 UNIV_INLINE
 ibool
 buf_page_peek_if_too_old(
@@ -179,6 +190,12 @@ buf_page_peek_if_too_old(
 	} else if (buf_LRU_old_threshold_ms && bpage->old) {
 		unsigned	access_time = buf_page_is_accessed(bpage);
 
+		/* It is possible that the below comparison returns an
+		unexpected result. 2^32 milliseconds pass in about 50 days,
+		so if the difference between ut_time_ms() and access_time
+		is e.g. 50 days + 15 ms, then the below will behave as if
+		it is 15 ms. This is known and fixing it would require to
+		increase buf_page_t::access_time from 32 to 64 bits. */
 		if (access_time > 0
 		    && ((ib_uint32_t) (ut_time_ms() - access_time))
 		    >= buf_LRU_old_threshold_ms) {
@@ -195,14 +212,14 @@ buf_page_peek_if_too_old(
 
 /*********************************************************************//**
 Gets the state of a block.
-@return	state */
+@return state */
 UNIV_INLINE
 enum buf_page_state
 buf_page_get_state(
 /*===============*/
 	const buf_page_t*	bpage)	/*!< in: pointer to the control block */
 {
-	enum buf_page_state	state = (enum buf_page_state) bpage->state;
+	enum buf_page_state	state	= bpage->state;
 
 #ifdef UNIV_DEBUG
 	switch (state) {
@@ -224,7 +241,7 @@ buf_page_get_state(
 }
 /*********************************************************************//**
 Gets the state of a block.
-@return	state */
+@return state */
 UNIV_INLINE
 enum buf_page_state
 buf_block_get_state(
@@ -303,7 +320,8 @@ buf_page_set_state(
 		break;
 	case BUF_BLOCK_FILE_PAGE:
 		if (!(state == BUF_BLOCK_NOT_USED
-	              || state == BUF_BLOCK_REMOVE_HASH)) {
+	              || state == BUF_BLOCK_REMOVE_HASH
+		      || state == BUF_BLOCK_FILE_PAGE)) {
 			const char *old_state_name = buf_get_state_name((buf_block_t*)bpage);
 			bpage->state = state;
 
@@ -314,10 +332,11 @@ buf_page_set_state(
 				old_state_name,
 				state,
 				buf_get_state_name((buf_block_t*)bpage));
+			ut_a(state == BUF_BLOCK_NOT_USED
+				|| state == BUF_BLOCK_REMOVE_HASH
+				|| state == BUF_BLOCK_FILE_PAGE);
 		}
 
-		ut_a(state == BUF_BLOCK_NOT_USED
-		     || state == BUF_BLOCK_REMOVE_HASH);
 		break;
 	case BUF_BLOCK_REMOVE_HASH:
 		ut_a(state == BUF_BLOCK_MEMORY);
@@ -341,7 +360,7 @@ buf_block_set_state(
 
 /*********************************************************************//**
 Determines if a block is mapped to a tablespace.
-@return	TRUE if mapped */
+@return TRUE if mapped */
 UNIV_INLINE
 ibool
 buf_page_in_file(
@@ -369,7 +388,7 @@ buf_page_in_file(
 #ifndef UNIV_HOTBACKUP
 /*********************************************************************//**
 Determines if a block should be on unzip_LRU list.
-@return	TRUE if block belongs to unzip_LRU */
+@return TRUE if block belongs to unzip_LRU */
 UNIV_INLINE
 ibool
 buf_page_belongs_to_unzip_LRU(
@@ -384,23 +403,22 @@ buf_page_belongs_to_unzip_LRU(
 
 /*********************************************************************//**
 Gets the mutex of a block.
-@return	pointer to mutex protecting bpage */
+@return pointer to mutex protecting bpage */
 UNIV_INLINE
-ib_mutex_t*
+BPageMutex*
 buf_page_get_mutex(
 /*===============*/
 	const buf_page_t*	bpage)	/*!< in: pointer to control block */
 {
+	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
+
 	switch (buf_page_get_state(bpage)) {
 	case BUF_BLOCK_POOL_WATCH:
 		ut_error;
 		return(NULL);
 	case BUF_BLOCK_ZIP_PAGE:
-	case BUF_BLOCK_ZIP_DIRTY: {
-		buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
-
+	case BUF_BLOCK_ZIP_DIRTY:
 		return(&buf_pool->zip_mutex);
-		}
 	default:
 		return(&((buf_block_t*) bpage)->mutex);
 	}
@@ -408,7 +426,7 @@ buf_page_get_mutex(
 
 /*********************************************************************//**
 Get the flush type of a page.
-@return	flush type */
+@return flush type */
 UNIV_INLINE
 buf_flush_t
 buf_page_get_flush_type(
@@ -443,24 +461,22 @@ buf_page_set_flush_type(
 	ut_ad(buf_page_get_flush_type(bpage) == flush_type);
 }
 
-/*********************************************************************//**
-Map a block to a file page. */
+/** Map a block to a file page.
+@param[in,out]	block	pointer to control block
+@param[in]	page_id	page id */
 UNIV_INLINE
 void
 buf_block_set_file_page(
-/*====================*/
-	buf_block_t*		block,	/*!< in/out: pointer to control block */
-	ulint			space,	/*!< in: tablespace id */
-	ulint			page_no)/*!< in: page number */
+	buf_block_t*		block,
+	const page_id_t&	page_id)
 {
 	buf_block_set_state(block, BUF_BLOCK_FILE_PAGE);
-	block->page.space = static_cast<ib_uint32_t>(space);
-	block->page.offset = static_cast<ib_uint32_t>(page_no);
+	block->page.id.copy_from(page_id);
 }
 
 /*********************************************************************//**
 Gets the io_fix state of a block.
-@return	io_fix state */
+@return io_fix state */
 UNIV_INLINE
 enum buf_io_fix
 buf_page_get_io_fix(
@@ -469,7 +485,8 @@ buf_page_get_io_fix(
 {
 	ut_ad(bpage != NULL);
 
-	enum buf_io_fix	io_fix = (enum buf_io_fix) bpage->io_fix;
+	enum buf_io_fix	io_fix	= bpage->io_fix;
+
 #ifdef UNIV_DEBUG
 	switch (io_fix) {
 	case BUF_IO_NONE:
@@ -485,7 +502,7 @@ buf_page_get_io_fix(
 
 /*********************************************************************//**
 Gets the io_fix state of a block.
-@return	io_fix state */
+@return io_fix state */
 UNIV_INLINE
 enum buf_io_fix
 buf_block_get_io_fix(
@@ -507,7 +524,7 @@ buf_page_set_io_fix(
 #ifdef UNIV_DEBUG
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
 	ut_ad(buf_pool_mutex_own(buf_pool));
-#endif
+#endif /* UNIV_DEBUG */
 	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
 
 	bpage->io_fix = io_fix;
@@ -544,7 +561,7 @@ buf_page_set_sticky(
 #ifdef UNIV_DEBUG
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
 	ut_ad(buf_pool_mutex_own(buf_pool));
-#endif
+#endif /* UNIV_DEBUG */
 	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
 	ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
 
@@ -562,7 +579,7 @@ buf_page_unset_sticky(
 #ifdef UNIV_DEBUG
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
 	ut_ad(buf_pool_mutex_own(buf_pool));
-#endif
+#endif /* UNIV_DEBUG */
 	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
 	ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_PIN);
 
@@ -581,7 +598,7 @@ buf_page_can_relocate(
 #ifdef UNIV_DEBUG
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
 	ut_ad(buf_pool_mutex_own(buf_pool));
-#endif
+#endif /* UNIV_DEBUG */
 	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
 	ut_ad(buf_page_in_file(bpage));
 	ut_ad(bpage->in_LRU_list);
@@ -592,7 +609,7 @@ buf_page_can_relocate(
 
 /*********************************************************************//**
 Determine if a block has been flagged old.
-@return	TRUE if old */
+@return TRUE if old */
 UNIV_INLINE
 ibool
 buf_page_is_old(
@@ -602,7 +619,7 @@ buf_page_is_old(
 #ifdef UNIV_DEBUG
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
 	ut_ad(buf_pool_mutex_own(buf_pool));
-#endif
+#endif /* UNIV_DEBUG */
 	ut_ad(buf_page_in_file(bpage));
 
 	return(bpage->old);
@@ -646,7 +663,7 @@ buf_page_set_old(
 
 /*********************************************************************//**
 Determine the time of first access of a block in the buffer pool.
-@return	ut_time_ms() at the time of first access, 0 if not accessed */
+@return ut_time_ms() at the time of first access, 0 if not accessed */
 UNIV_INLINE
 unsigned
 buf_page_is_accessed(
@@ -683,7 +700,7 @@ buf_page_set_accessed(
 /*********************************************************************//**
 Gets the buf_block_t handle of a buffered file block if an uncompressed
 page frame exists, or NULL.
-@return	control block, or NULL */
+@return control block, or NULL */
 UNIV_INLINE
 buf_block_t*
 buf_page_get_block(
@@ -705,7 +722,7 @@ buf_page_get_block(
 #ifdef UNIV_DEBUG
 /*********************************************************************//**
 Gets a pointer to the memory frame of a block.
-@return	pointer to the frame */
+@return pointer to the frame */
 UNIV_INLINE
 buf_frame_t*
 buf_block_get_frame(
@@ -742,50 +759,6 @@ ok:
 }
 #endif /* UNIV_DEBUG */
 
-/*********************************************************************//**
-Gets the space id of a block.
-@return	space id */
-UNIV_INLINE
-ulint
-buf_page_get_space(
-/*===============*/
-	const buf_page_t*	bpage)	/*!< in: pointer to the control block */
-{
-	ut_ad(bpage);
-	ut_a(buf_page_in_file(bpage));
-
-	return(bpage->space);
-}
-
-/*********************************************************************//**
-Gets the space id of a block.
-@return	space id */
-UNIV_INLINE
-ulint
-buf_block_get_space(
-/*================*/
-	const buf_block_t*	block)	/*!< in: pointer to the control block */
-{
-	ut_ad(block);
-	ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
-
-	return(block->page.space);
-}
-
-/*********************************************************************//**
-Gets the page number of a block.
-@return	page number */
-UNIV_INLINE
-ulint
-buf_page_get_page_no(
-/*=================*/
-	const buf_page_t*	bpage)	/*!< in: pointer to the control block */
-{
-	ut_ad(bpage);
-	ut_a(buf_page_in_file(bpage));
-
-	return(bpage->offset);
-}
 /***********************************************************************
 FIXME_FTS Gets the frame the pointer is pointing to. */
 UNIV_INLINE
@@ -804,64 +777,6 @@ buf_frame_align(
         return(frame);
 }
 
-/*********************************************************************//**
-Gets the page number of a block.
-@return	page number */
-UNIV_INLINE
-ulint
-buf_block_get_page_no(
-/*==================*/
-	const buf_block_t*	block)	/*!< in: pointer to the control block */
-{
-	ut_ad(block);
-	ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
-
-	return(block->page.offset);
-}
-
-/*********************************************************************//**
-Gets the compressed page size of a block.
-@return	compressed page size, or 0 */
-UNIV_INLINE
-ulint
-buf_page_get_zip_size(
-/*==================*/
-	const buf_page_t*	bpage)	/*!< in: pointer to the control block */
-{
-	return(bpage->zip.ssize
-	       ? (UNIV_ZIP_SIZE_MIN >> 1) << bpage->zip.ssize : 0);
-}
-
-/*********************************************************************//**
-Gets the compressed page size of a block.
-@return	compressed page size, or 0 */
-UNIV_INLINE
-ulint
-buf_block_get_zip_size(
-/*===================*/
-	const buf_block_t*	block)	/*!< in: pointer to the control block */
-{
-	return(block->page.zip.ssize
-	       ? (UNIV_ZIP_SIZE_MIN >> 1) << block->page.zip.ssize : 0);
-}
-
-#ifndef UNIV_HOTBACKUP
-#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
-/*********************************************************************//**
-Gets the compressed page descriptor corresponding to an uncompressed page
-if applicable.
-@return	compressed page descriptor, or NULL */
-UNIV_INLINE
-const page_zip_des_t*
-buf_frame_get_page_zip(
-/*===================*/
-	const byte*	ptr)	/*!< in: pointer to the page */
-{
-	return(buf_block_get_page_zip(buf_block_align(ptr)));
-}
-#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
-#endif /* !UNIV_HOTBACKUP */
-
 /**********************************************************************//**
 Gets the space id, page offset, and byte offset within page of a
 pointer pointing to a buffer frame containing a file page. */
@@ -885,7 +800,7 @@ buf_ptr_get_fsp_addr(
 /**********************************************************************//**
 Gets the hash value of the page the pointer is pointing to. This can be used
 in searches in the lock hash table.
-@return	lock hash value */
+@return lock hash value */
 UNIV_INLINE
 ulint
 buf_block_get_lock_hash_val(
@@ -894,10 +809,9 @@ buf_block_get_lock_hash_val(
 {
 	ut_ad(block);
 	ut_ad(buf_page_in_file(&block->page));
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&(((buf_block_t*) block)->lock), RW_LOCK_EXCLUSIVE)
-	      || rw_lock_own(&(((buf_block_t*) block)->lock), RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(rw_lock_own(&(((buf_block_t*) block)->lock), RW_LOCK_X)
+	      || rw_lock_own(&(((buf_block_t*) block)->lock), RW_LOCK_S));
+
 	return(block->lock_hash_val);
 }
 
@@ -912,8 +826,8 @@ buf_page_alloc_descriptor(void)
 {
 	buf_page_t*	bpage;
 
-	bpage = (buf_page_t*) ut_malloc(sizeof *bpage);
-	ut_d(memset(bpage, 0, sizeof *bpage));
+	bpage = (buf_page_t*) ut_zalloc_nokey(sizeof *bpage);
+	ut_ad(bpage);
 	UNIV_MEM_ALLOC(bpage, sizeof *bpage);
 
 	return(bpage);
@@ -942,13 +856,13 @@ buf_block_free(
 
 	buf_pool_mutex_enter(buf_pool);
 
-	mutex_enter(&block->mutex);
+	buf_page_mutex_enter(block);
 
 	ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
 
 	buf_LRU_block_free_non_file_page(block);
 
-	mutex_exit(&block->mutex);
+	buf_page_mutex_exit(block);
 
 	buf_pool_mutex_exit(buf_pool);
 }
@@ -956,7 +870,7 @@ buf_block_free(
 
 /*********************************************************************//**
 Copies contents of a buffer frame to a given buffer.
-@return	buf */
+@return buf */
 UNIV_INLINE
 byte*
 buf_frame_copy(
@@ -972,24 +886,10 @@ buf_frame_copy(
 }
 
 #ifndef UNIV_HOTBACKUP
-/********************************************************************//**
-Calculates a folded value of a file page address to use in the page hash
-table.
-@return	the folded value */
-UNIV_INLINE
-ulint
-buf_page_address_fold(
-/*==================*/
-	ulint	space,	/*!< in: space id */
-	ulint	offset)	/*!< in: offset of the page within space */
-{
-	return((space << 20) + space + offset);
-}
-
 /********************************************************************//**
 Gets the youngest modification log sequence number for a frame.
 Returns zero if not file page or no modification occurred yet.
-@return	newest modification to page */
+@return newest modification to page */
 UNIV_INLINE
 lsn_t
 buf_page_get_newest_modification(
@@ -998,7 +898,7 @@ buf_page_get_newest_modification(
 					page frame */
 {
 	lsn_t		lsn;
-	ib_mutex_t*	block_mutex = buf_page_get_mutex(bpage);
+	BPageMutex*	block_mutex = buf_page_get_mutex(bpage);
 
 	mutex_enter(block_mutex);
 
@@ -1023,13 +923,17 @@ buf_block_modify_clock_inc(
 /*=======================*/
 	buf_block_t*	block)	/*!< in: block */
 {
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
 	buf_pool_t*	buf_pool = buf_pool_from_bpage((buf_page_t*) block);
 
-	ut_ad((buf_pool_mutex_own(buf_pool)
-	       && (block->page.buf_fix_count == 0))
-	      || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
-#endif /* UNIV_SYNC_DEBUG */
+	/* No latch is acquired if block belongs to intrinsic table. */
+	if (!fsp_is_system_temporary(block->page.id.space())) {
+		ut_ad((buf_pool_mutex_own(buf_pool)
+		       && (block->page.buf_fix_count == 0))
+		      || rw_lock_own_flagged(&block->lock,
+					     RW_LOCK_FLAG_X | RW_LOCK_FLAG_SX));
+	}
+#endif /* UNIV_DEBUG */
 
 	block->modify_clock++;
 }
@@ -1037,38 +941,45 @@ buf_block_modify_clock_inc(
 /********************************************************************//**
 Returns the value of the modify clock. The caller must have an s-lock
 or x-lock on the block.
-@return	value */
+@return value */
 UNIV_INLINE
 ib_uint64_t
 buf_block_get_modify_clock(
 /*=======================*/
 	buf_block_t*	block)	/*!< in: block */
 {
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
-	      || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
-#endif /* UNIV_SYNC_DEBUG */
+#ifdef UNIV_DEBUG
+	/* No latch is acquired if block belongs to intrinsic table. */
+	if (!fsp_is_system_temporary(block->page.id.space())) {
+		ut_ad(rw_lock_own(&(block->lock), RW_LOCK_S)
+		      || rw_lock_own(&(block->lock), RW_LOCK_X)
+		      || rw_lock_own(&(block->lock), RW_LOCK_SX));
+	}
+#endif /* UNIV_DEBUG */
 
 	return(block->modify_clock);
 }
 
-/*******************************************************************//**
-Increments the bufferfix count. */
+/** Increments the bufferfix count.
+@param[in,out]	bpage	block to bufferfix
+@return the count */
 UNIV_INLINE
-void
+ulint
 buf_block_fix(
-/*===========*/
-	buf_block_t*	block)	/*!< in/out: block to bufferfix */
+	buf_page_t*	bpage)
 {
-#ifdef PAGE_ATOMIC_REF_COUNT
-	os_atomic_increment_uint32(&block->page.buf_fix_count, 1);
-#else
-	ib_mutex_t*	block_mutex = buf_page_get_mutex(&block->page);
+	return(os_atomic_increment_uint32(&bpage->buf_fix_count, 1));
+}
 
-	mutex_enter(block_mutex);
-	++block->page.buf_fix_count;
-	mutex_exit(block_mutex);
-#endif /* PAGE_ATOMIC_REF_COUNT */
+/** Increments the bufferfix count.
+@param[in,out]	block	block to bufferfix
+@return the count */
+UNIV_INLINE
+ulint
+buf_block_fix(
+	buf_block_t*	block)
+{
+	return(buf_block_fix(&block->page));
 }
 
 /*******************************************************************//**
@@ -1077,47 +988,48 @@ UNIV_INLINE
 void
 buf_block_buf_fix_inc_func(
 /*=======================*/
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
 	const char*	file,	/*!< in: file name */
 	ulint		line,	/*!< in: line */
-#endif /* UNIV_SYNC_DEBUG */
+#endif /* UNIV_DEBUG */
 	buf_block_t*	block)	/*!< in/out: block to bufferfix */
 {
-#ifdef UNIV_SYNC_DEBUG
-	ibool	ret;
+#ifdef UNIV_DEBUG
+	/* No debug latch is acquired if block belongs to system temporary.
+	Debug latch is not of much help if access to block is single
+	threaded. */
+	if (!fsp_is_system_temporary(block->page.id.space())) {
+		ibool   ret;
+		ret = rw_lock_s_lock_nowait(&block->debug_latch, file, line);
+		ut_a(ret);
+	}
+#endif /* UNIV_DEBUG */
 
-	ret = rw_lock_s_lock_nowait(&(block->debug_latch), file, line);
-	ut_a(ret);
-#endif /* UNIV_SYNC_DEBUG */
-
-#ifdef PAGE_ATOMIC_REF_COUNT
-	os_atomic_increment_uint32(&block->page.buf_fix_count, 1);
-#else
-	ut_ad(mutex_own(&block->mutex));
-
-	++block->page.buf_fix_count;
-#endif /* PAGE_ATOMIC_REF_COUNT */
+	buf_block_fix(block);
 }
 
-/*******************************************************************//**
-Decrements the bufferfix count. */
+/** Decrements the bufferfix count.
+@param[in,out]	bpage	block to bufferunfix
+@return	the remaining buffer-fix count */
 UNIV_INLINE
-void
+ulint
 buf_block_unfix(
-/*============*/
-	buf_block_t*	block)	/*!< in/out: block to bufferunfix */
+	buf_page_t*	bpage)
 {
-	ut_ad(block->page.buf_fix_count > 0);
+	ulint	count = os_atomic_decrement_uint32(&bpage->buf_fix_count, 1);
+	ut_ad(count + 1 != 0);
+	return(count);
+}
 
-#ifdef PAGE_ATOMIC_REF_COUNT
-	os_atomic_decrement_uint32(&block->page.buf_fix_count, 1);
-#else
-	ib_mutex_t*	block_mutex = buf_page_get_mutex(&block->page);
-
-	mutex_enter(block_mutex);
-	--block->page.buf_fix_count;
-	mutex_exit(block_mutex);
-#endif /* PAGE_ATOMIC_REF_COUNT */
+/** Decrements the bufferfix count.
+@param[in,out]	block	block to bufferunfix
+@return the remaining buffer-fix count */
+UNIV_INLINE
+ulint
+buf_block_unfix(
+	buf_block_t*	block)
+{
+	return(buf_block_unfix(&block->page));
 }
 
 /*******************************************************************//**
@@ -1128,39 +1040,34 @@ buf_block_buf_fix_dec(
 /*==================*/
 	buf_block_t*	block)	/*!< in/out: block to bufferunfix */
 {
-	ut_ad(block->page.buf_fix_count > 0);
+	buf_block_unfix(block);
 
-#ifdef PAGE_ATOMIC_REF_COUNT
-	os_atomic_decrement_uint32(&block->page.buf_fix_count, 1);
-#else
-	mutex_enter(&block->mutex);
-	--block->page.buf_fix_count;
-	mutex_exit(&block->mutex);
-#endif /* PAGE_ATOMIC_REF_COUNT */
-
-#ifdef UNIV_SYNC_DEBUG
-	rw_lock_s_unlock(&block->debug_latch);
-#endif
+#ifdef UNIV_DEBUG
+	/* No debug latch is acquired if block belongs to system temporary.
+	Debug latch is not of much help if access to block is single
+	threaded. */
+	if (!fsp_is_system_temporary(block->page.id.space())) {
+		rw_lock_s_unlock(&block->debug_latch);
+	}
+#endif /* UNIV_DEBUG */
 }
 
-/******************************************************************//**
-Returns the buffer pool instance given space and offset of page
+/** Returns the buffer pool instance given a page id.
+@param[in]	page_id	page id
 @return buffer pool */
 UNIV_INLINE
 buf_pool_t*
 buf_pool_get(
-/*==========*/
-	ulint	space,	/*!< in: space id */
-	ulint	offset)	/*!< in: offset of the page within space */
+	const page_id_t&	page_id)
 {
-	ulint	fold;
-	ulint	index;
-	ulint	ignored_offset;
+        /* 2log of BUF_READ_AHEAD_AREA (64) */
+        ulint		ignored_page_no = page_id.page_no() >> 6;
 
-	ignored_offset = offset >> 6; /* 2log of BUF_READ_AHEAD_AREA (64)*/
-	fold = buf_page_address_fold(space, ignored_offset);
-	index = fold % srv_buf_pool_instances;
-	return(&buf_pool_ptr[index]);
+        page_id_t	id(page_id.space(), ignored_page_no);
+
+        ulint		i = id.fold() % srv_buf_pool_instances;
+
+        return(&buf_pool_ptr[i]);
 }
 
 /******************************************************************//**
@@ -1178,103 +1085,98 @@ buf_pool_from_array(
 	return(&buf_pool_ptr[index]);
 }
 
-/******************************************************************//**
-Returns the control block of a file page, NULL if not found.
-@return	block, NULL if not found */
+/** Returns the control block of a file page, NULL if not found.
+@param[in]	buf_pool	buffer pool instance
+@param[in]	page_id		page id
+@return block, NULL if not found */
 UNIV_INLINE
 buf_page_t*
 buf_page_hash_get_low(
-/*==================*/
-	buf_pool_t*	buf_pool,/*!< buffer pool instance */
-	ulint		space,	/*!< in: space id */
-	ulint		offset,	/*!< in: offset of the page within space */
-	ulint		fold)	/*!< in: buf_page_address_fold(space, offset) */
+	buf_pool_t*		buf_pool,
+	const page_id_t&	page_id)
 {
 	buf_page_t*	bpage;
 
-#ifdef UNIV_SYNC_DEBUG
-	ulint		hash_fold;
+#ifdef UNIV_DEBUG
 	rw_lock_t*	hash_lock;
 
-	hash_fold = buf_page_address_fold(space, offset);
-	ut_ad(hash_fold == fold);
-
-	hash_lock = hash_get_lock(buf_pool->page_hash, fold);
-	ut_ad(rw_lock_own(hash_lock, RW_LOCK_EX)
-	      || rw_lock_own(hash_lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
+	hash_lock = hash_get_lock(buf_pool->page_hash, page_id.fold());
+	ut_ad(rw_lock_own(hash_lock, RW_LOCK_X)
+	      || rw_lock_own(hash_lock, RW_LOCK_S));
+#endif /* UNIV_DEBUG */
 
 	/* Look for the page in the hash table */
 
-	HASH_SEARCH(hash, buf_pool->page_hash, fold, buf_page_t*, bpage,
+	HASH_SEARCH(hash, buf_pool->page_hash, page_id.fold(), buf_page_t*,
+		    bpage,
 		    ut_ad(bpage->in_page_hash && !bpage->in_zip_hash
 			  && buf_page_in_file(bpage)),
-		    bpage->space == space && bpage->offset == offset);
+		    page_id.equals_to(bpage->id));
 	if (bpage) {
 		ut_a(buf_page_in_file(bpage));
 		ut_ad(bpage->in_page_hash);
 		ut_ad(!bpage->in_zip_hash);
+		ut_ad(buf_pool_from_bpage(bpage) == buf_pool);
 	}
 
 	return(bpage);
 }
 
-/******************************************************************//**
-Returns the control block of a file page, NULL if not found.
+/** Returns the control block of a file page, NULL if not found.
 If the block is found and lock is not NULL then the appropriate
 page_hash lock is acquired in the specified lock mode. Otherwise,
 mode value is ignored. It is up to the caller to release the
 lock. If the block is found and the lock is NULL then the page_hash
 lock is released by this function.
-@return	block, NULL if not found, or watch sentinel (if watch is true) */
+@param[in]	buf_pool	buffer pool instance
+@param[in]	page_id		page id
+@param[in,out]	lock		lock of the page hash acquired if bpage is
+found, NULL otherwise. If NULL is passed then the hash_lock is released by
+this function.
+@param[in]	lock_mode	RW_LOCK_X or RW_LOCK_S. Ignored if
+lock == NULL
+@param[in]	watch		if true, return watch sentinel also.
+@return pointer to the bpage or NULL; if NULL, lock is also NULL or
+a watch sentinel. */
 UNIV_INLINE
 buf_page_t*
 buf_page_hash_get_locked(
-/*=====================*/
-					/*!< out: pointer to the bpage,
-					or NULL; if NULL, hash_lock
-					is also NULL. */
-	buf_pool_t*	buf_pool,	/*!< buffer pool instance */
-	ulint		space,		/*!< in: space id */
-	ulint		offset,		/*!< in: page number */
-	rw_lock_t**	lock,		/*!< in/out: lock of the page
-					hash acquired if bpage is
-					found. NULL otherwise. If NULL
-					is passed then the hash_lock
-					is released by this function */
-	ulint		lock_mode,	/*!< in: RW_LOCK_EX or
-					RW_LOCK_SHARED. Ignored if
-					lock == NULL */
-	bool		watch)		/*!< in: if true, return watch
-					sentinel also. */
+	buf_pool_t*		buf_pool,
+	const page_id_t&	page_id,
+	rw_lock_t**		lock,
+	ulint			lock_mode,
+	bool			watch)
 {
 	buf_page_t*	bpage = NULL;
-	ulint		fold;
 	rw_lock_t*	hash_lock;
-	ulint		mode = RW_LOCK_SHARED;
+	ulint		mode = RW_LOCK_S;
 
 	if (lock != NULL) {
 		*lock = NULL;
-		ut_ad(lock_mode == RW_LOCK_EX
-		      || lock_mode == RW_LOCK_SHARED);
+		ut_ad(lock_mode == RW_LOCK_X
+		      || lock_mode == RW_LOCK_S);
 		mode = lock_mode;
 	}
 
-	fold = buf_page_address_fold(space, offset);
-	hash_lock = hash_get_lock(buf_pool->page_hash, fold);
+	hash_lock = hash_get_lock(buf_pool->page_hash, page_id.fold());
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX)
-	      && !rw_lock_own(hash_lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(!rw_lock_own(hash_lock, RW_LOCK_X)
+	      && !rw_lock_own(hash_lock, RW_LOCK_S));
 
-	if (mode == RW_LOCK_SHARED) {
+	if (mode == RW_LOCK_S) {
 		rw_lock_s_lock(hash_lock);
+
+		/* If not own buf_pool_mutex, page_hash can be changed. */
+		hash_lock = hash_lock_s_confirm(
+			hash_lock, buf_pool->page_hash, page_id.fold());
 	} else {
 		rw_lock_x_lock(hash_lock);
+		/* If not own buf_pool_mutex, page_hash can be changed. */
+		hash_lock = hash_lock_x_confirm(
+			hash_lock, buf_pool->page_hash, page_id.fold());
 	}
 
-	bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
+	bpage = buf_page_hash_get_low(buf_pool, page_id);
 
 	if (!bpage || buf_pool_watch_is_sentinel(buf_pool, bpage)) {
 		if (!watch) {
@@ -1284,8 +1186,7 @@ buf_page_hash_get_locked(
 	}
 
 	ut_ad(buf_page_in_file(bpage));
-	ut_ad(offset == bpage->offset);
-	ut_ad(space == bpage->space);
+	ut_ad(page_id.equals_to(bpage->id));
 
 	if (lock == NULL) {
 		/* The caller wants us to release the page_hash lock */
@@ -1297,7 +1198,7 @@ buf_page_hash_get_locked(
 	}
 
 unlock_and_exit:
-	if (mode == RW_LOCK_SHARED) {
+	if (mode == RW_LOCK_S) {
 		rw_lock_s_unlock(hash_lock);
 	} else {
 		rw_lock_x_unlock(hash_lock);
@@ -1306,52 +1207,46 @@ exit:
 	return(bpage);
 }
 
-/******************************************************************//**
-Returns the control block of a file page, NULL if not found.
+/** Returns the control block of a file page, NULL if not found.
 If the block is found and lock is not NULL then the appropriate
 page_hash lock is acquired in the specified lock mode. Otherwise,
 mode value is ignored. It is up to the caller to release the
 lock. If the block is found and the lock is NULL then the page_hash
 lock is released by this function.
-@return	block, NULL if not found */
+@param[in]	buf_pool	buffer pool instance
+@param[in]	page_id		page id
+@param[in,out]	lock		lock of the page hash acquired if bpage is
+found, NULL otherwise. If NULL is passed then the hash_lock is released by
+this function.
+@param[in]	lock_mode	RW_LOCK_X or RW_LOCK_S. Ignored if
+lock == NULL
+@return pointer to the block or NULL; if NULL, lock is also NULL. */
 UNIV_INLINE
 buf_block_t*
 buf_block_hash_get_locked(
-/*=====================*/
-					/*!< out: pointer to the bpage,
-					or NULL; if NULL, hash_lock
-					is also NULL. */
-	buf_pool_t*	buf_pool,	/*!< buffer pool instance */
-	ulint		space,		/*!< in: space id */
-	ulint		offset,		/*!< in: page number */
-	rw_lock_t**	lock,		/*!< in/out: lock of the page
-					hash acquired if bpage is
-					found. NULL otherwise. If NULL
-					is passed then the hash_lock
-					is released by this function */
-	ulint		lock_mode)	/*!< in: RW_LOCK_EX or
-					RW_LOCK_SHARED. Ignored if
-					lock == NULL */
+	buf_pool_t*		buf_pool,
+	const page_id_t&	page_id,
+	rw_lock_t**		lock,
+	ulint			lock_mode)
 {
 	buf_page_t*	bpage = buf_page_hash_get_locked(buf_pool,
-							 space,
-							 offset,
+							 page_id,
 							 lock,
 							 lock_mode);
 	buf_block_t*	block = buf_page_get_block(bpage);
 
-	if (block) {
+	if (block != NULL) {
+
 		ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
-#ifdef UNIV_SYNC_DEBUG
 		ut_ad(!lock || rw_lock_own(*lock, lock_mode));
-#endif /* UNIV_SYNC_DEBUG */
+
 		return(block);
 	} else if (bpage) {
 		/* It is not a block. Just a bpage */
 		ut_ad(buf_page_in_file(bpage));
 
 		if (lock) {
-			if (lock_mode == RW_LOCK_SHARED) {
+			if (lock_mode == RW_LOCK_S) {
 				rw_lock_s_unlock(*lock);
 			} else {
 				rw_lock_x_unlock(*lock);
@@ -1366,23 +1261,19 @@ buf_block_hash_get_locked(
 	return(NULL);
 }
 
-/********************************************************************//**
-Returns TRUE if the page can be found in the buffer pool hash table.
-
+/** Returns TRUE if the page can be found in the buffer pool hash table.
 NOTE that it is possible that the page is not yet read from disk,
 though.
-
-@return	TRUE if found in the page hash table */
+@param[in]	page_id	page id
+@return TRUE if found in the page hash table */
 UNIV_INLINE
 ibool
 buf_page_peek(
-/*==========*/
-	ulint	space,	/*!< in: space id */
-	ulint	offset)	/*!< in: page number */
+	const page_id_t&	page_id)
 {
-	buf_pool_t*		buf_pool = buf_pool_get(space, offset);
+	buf_pool_t*	buf_pool = buf_pool_get(page_id);
 
-	return(buf_page_hash_get(buf_pool, space, offset) != NULL);
+	return(buf_page_hash_get(buf_pool, page_id) != NULL);
 }
 
 /********************************************************************//**
@@ -1393,19 +1284,27 @@ buf_page_release_zip(
 /*=================*/
 	buf_page_t*	bpage)		/*!< in: buffer block */
 {
-	buf_block_t*	block;
-
-	block = (buf_block_t*) bpage;
+	ut_ad(bpage);
+	ut_a(bpage->buf_fix_count > 0);
 
 	switch (buf_page_get_state(bpage)) {
 	case BUF_BLOCK_FILE_PAGE:
-#ifdef UNIV_SYNC_DEBUG
-		rw_lock_s_unlock(&block->debug_latch);
-#endif /* UNUV_SYNC_DEBUG */
+#ifdef UNIV_DEBUG
+	{
+		/* No debug latch is acquired if block belongs to system
+		temporary. Debug latch is not of much help if access to block
+		is single threaded. */
+		buf_block_t*	block = reinterpret_cast<buf_block_t*>(bpage);
+		if (!fsp_is_system_temporary(block->page.id.space())) {
+			rw_lock_s_unlock(&block->debug_latch);
+		}
+	}
 		/* Fall through */
+#endif /* UNIV_DEBUG */
+
 	case BUF_BLOCK_ZIP_PAGE:
 	case BUF_BLOCK_ZIP_DIRTY:
-		buf_block_unfix(block);
+		buf_block_unfix(reinterpret_cast<buf_block_t*>(bpage));
 		return;
 
 	case BUF_BLOCK_POOL_WATCH:
@@ -1420,31 +1319,34 @@ buf_page_release_zip(
 }
 
 /********************************************************************//**
-Decrements the bufferfix count of a buffer control block and releases
-a latch, if specified. */
+Releases a latch, if specified. */
 UNIV_INLINE
 void
-buf_page_release(
-/*=============*/
+buf_page_release_latch(
+/*===================*/
 	buf_block_t*	block,		/*!< in: buffer block */
 	ulint		rw_latch)	/*!< in: RW_S_LATCH, RW_X_LATCH,
 					RW_NO_LATCH */
 {
-	ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
-
-#ifdef UNIV_SYNC_DEBUG
-	rw_lock_s_unlock(&(block->debug_latch));
-#endif
-	if (rw_latch == RW_S_LATCH) {
-		rw_lock_s_unlock(&(block->lock));
-	} else if (rw_latch == RW_X_LATCH) {
-		rw_lock_x_unlock(&(block->lock));
+#ifdef UNIV_DEBUG
+	/* No debug latch is acquired if block belongs to system
+	temporary. Debug latch is not of much help if access to block
+	is single threaded. */
+	if (!fsp_is_system_temporary(block->page.id.space())) {
+		rw_lock_s_unlock(&block->debug_latch);
 	}
+#endif /* UNIV_DEBUG */
 
-	buf_block_unfix(block);
+	if (rw_latch == RW_S_LATCH) {
+		rw_lock_s_unlock(&block->lock);
+	} else if (rw_latch == RW_SX_LATCH) {
+		rw_lock_sx_unlock(&block->lock);
+	} else if (rw_latch == RW_X_LATCH) {
+		rw_lock_x_unlock(&block->lock);
+	}
 }
 
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
 /*********************************************************************//**
 Adds latch level info for the rw-lock protecting the buffer frame. This
 should be called in the debug version after a successful latching of a
@@ -1455,12 +1357,12 @@ buf_block_dbg_add_level(
 /*====================*/
 	buf_block_t*	block,	/*!< in: buffer page
 				where we have acquired latch */
-	ulint		level)	/*!< in: latching order level */
+	latch_level_t	level)	/*!< in: latching order level */
 {
-	sync_thread_add_level(&block->lock, level, FALSE);
+	sync_check_lock(&block->lock, level);
 }
 
-#endif /* UNIV_SYNC_DEBUG */
+#endif /* UNIV_DEBUG */
 /********************************************************************//**
 Acquire mutex on all buffer pool instances. */
 UNIV_INLINE
@@ -1468,12 +1370,9 @@ void
 buf_pool_mutex_enter_all(void)
 /*==========================*/
 {
-	ulint   i;
+	for (ulint i = 0; i < srv_buf_pool_instances; ++i) {
+		buf_pool_t*	buf_pool = buf_pool_from_array(i);
 
-	for (i = 0; i < srv_buf_pool_instances; i++) {
-		buf_pool_t*	buf_pool;
-
-		buf_pool = buf_pool_from_array(i);
 		buf_pool_mutex_enter(buf_pool);
 	}
 }
@@ -1531,4 +1430,35 @@ buf_page_get_frame(
 	}
 }
 
+/** Verify the possibility that a stored page is not in buffer pool.
+@param[in]	withdraw_clock	withdraw clock when stored the page
+@retval true	if the page might be relocated */
+UNIV_INLINE
+bool
+buf_pool_is_obsolete(
+	ulint	withdraw_clock)
+{
+	return(buf_pool_withdrawing
+	       || buf_withdraw_clock != withdraw_clock);
+}
+
+/** Calculate aligned buffer pool size based on srv_buf_pool_chunk_unit,
+if needed.
+@param[in]	size	size in bytes
+@return	aligned size */
+UNIV_INLINE
+ulint
+buf_pool_size_align(
+	ulint	size)
+{
+	const ulint	m = srv_buf_pool_instances * srv_buf_pool_chunk_unit;
+	size = ut_max(size, srv_buf_pool_min_size);
+
+	if (size % m == 0) {
+		return(size);
+	} else {
+		return((size / m + 1) * m);
+	}
+}
+
 #endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/buf0checksum.h b/storage/innobase/include/buf0checksum.h
index 6818345f965..9405251dc74 100644
--- a/storage/innobase/include/buf0checksum.h
+++ b/storage/innobase/include/buf0checksum.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -30,26 +30,25 @@ Created Aug 11, 2011 Vasil Dimov
 
 #include "buf0types.h"
 
-/** Magic value to use instead of checksums when they are disabled */
-#define BUF_NO_CHECKSUM_MAGIC 0xDEADBEEFUL
-
-/********************************************************************//**
-Calculates a page CRC32 which is stored to the page when it is written
-to a file. Note that we must be careful to calculate the same value on
-32-bit and 64-bit architectures.
-@return	checksum */
-UNIV_INTERN
-ib_uint32_t
+/** Calculates the CRC32 checksum of a page. The value is stored to the page
+when it is written to a file and also checked for a match when reading from
+the file. When reading we allow both normal CRC32 and CRC-legacy-big-endian
+variants. Note that we must be careful to calculate the same value on 32-bit
+and 64-bit architectures.
+@param[in]	page			buffer page (UNIV_PAGE_SIZE bytes)
+@param[in]	use_legacy_big_endian	if true then use big endian
+byteorder when converting byte strings to integers
+@return checksum */
+uint32_t
 buf_calc_page_crc32(
-/*================*/
-	const byte*	page);	/*!< in: buffer page */
+	const byte*	page,
+	bool		use_legacy_big_endian = false);
 
 /********************************************************************//**
 Calculates a page checksum which is stored to the page when it is written
 to a file. Note that we must be careful to calculate the same value on
 32-bit and 64-bit architectures.
-@return	checksum */
-UNIV_INTERN
+@return checksum */
 ulint
 buf_calc_page_new_checksum(
 /*=======================*/
@@ -62,22 +61,22 @@ checksum.
 NOTE: we must first store the new formula checksum to
 FIL_PAGE_SPACE_OR_CHKSUM before calculating and storing this old checksum
 because this takes that field as an input!
-@return	checksum */
-UNIV_INTERN
+@return checksum */
 ulint
 buf_calc_page_old_checksum(
 /*=======================*/
 	const byte*	page);	/*!< in: buffer page */
 
+
 /********************************************************************//**
 Return a printable string describing the checksum algorithm.
-@return	algorithm name */
-UNIV_INTERN
+@return algorithm name */
 const char*
 buf_checksum_algorithm_name(
 /*========================*/
 	srv_checksum_algorithm_t	algo);	/*!< in: algorithm */
 
 extern ulong	srv_checksum_algorithm;
+extern bool	legacy_big_endian_checksum;
 
 #endif /* buf0checksum_h */
diff --git a/storage/innobase/include/buf0dblwr.h b/storage/innobase/include/buf0dblwr.h
index a62a6400d97..eb13c3b35e5 100644
--- a/storage/innobase/include/buf0dblwr.h
+++ b/storage/innobase/include/buf0dblwr.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -29,6 +29,7 @@ Created 2011/12/19 Inaam Rana
 #include "univ.i"
 #include "ut0byte.h"
 #include "log0log.h"
+#include "buf0types.h"
 #include "log0recv.h"
 
 #ifndef UNIV_HOTBACKUP
@@ -40,9 +41,10 @@ extern ibool		buf_dblwr_being_created;
 
 /****************************************************************//**
 Creates the doublewrite buffer to a new InnoDB installation. The header of the
-doublewrite buffer is placed on the trx system header page. */
-UNIV_INTERN
-void
+doublewrite buffer is placed on the trx system header page.
+@return true if successful, false if not. */
+MY_ATTRIBUTE((warn_unused_result))
+bool
 buf_dblwr_create(void);
 /*==================*/
 
@@ -51,29 +53,24 @@ At a database startup initializes the doublewrite buffer memory structure if
 we already have a doublewrite buffer created in the data files. If we are
 upgrading to an InnoDB version which supports multiple tablespaces, then this
 function performs the necessary update operations. If we are in a crash
-recovery, this function loads the pages from double write buffer into memory. */
-void
+recovery, this function loads the pages from double write buffer into memory.
+@return DB_SUCCESS or error code */
+dberr_t
 buf_dblwr_init_or_load_pages(
-/*=========================*/
 	os_file_t	file,
-	char*		path,
-	bool		load_corrupt_pages);
+	const char*	path);
 
-/****************************************************************//**
-Process the double write buffer pages. */
+/** Process and remove the double write buffer pages for all tablespaces. */
 void
 buf_dblwr_process(void);
-/*===================*/
 
 /****************************************************************//**
 frees doublewrite buffer. */
-UNIV_INTERN
 void
 buf_dblwr_free(void);
 /*================*/
 /********************************************************************//**
 Updates the doublewrite buffer when an IO request is completed. */
-UNIV_INTERN
 void
 buf_dblwr_update(
 /*=============*/
@@ -83,7 +80,6 @@ buf_dblwr_update(
 Determines if a page number is located inside the doublewrite buffer.
 @return TRUE if the location is inside the two blocks of the
 doublewrite buffer */
-UNIV_INTERN
 ibool
 buf_dblwr_page_inside(
 /*==================*/
@@ -92,18 +88,23 @@ buf_dblwr_page_inside(
 Posts a buffer page for writing. If the doublewrite memory buffer is
 full, calls buf_dblwr_flush_buffered_writes and waits for for free
 space to appear. */
-UNIV_INTERN
 void
 buf_dblwr_add_to_batch(
 /*====================*/
 	buf_page_t*	bpage);	/*!< in: buffer block to write */
+
+/********************************************************************//**
+Flush a batch of writes to the datafiles that have already been
+written to the dblwr buffer on disk. */
+void
+buf_dblwr_sync_datafiles();
+
 /********************************************************************//**
 Flushes possible buffered writes from the doublewrite memory buffer to disk,
 and also wakes up the aio thread if simulated aio is used. It is very
 important to call this function after a batch of writes has been posted,
 and also when we may have to wait for a page latch! Otherwise a deadlock
 of threads can occur. */
-UNIV_INTERN
 void
 buf_dblwr_flush_buffered_writes(void);
 /*=================================*/
@@ -115,7 +116,6 @@ flushes in the doublewrite buffer are in use we wait here for one to
 become free. We are guaranteed that a slot will become free because any
 thread that is using a slot must also release the slot before leaving
 this function. */
-UNIV_INTERN
 void
 buf_dblwr_write_single_page(
 /*========================*/
diff --git a/storage/innobase/include/buf0dump.h b/storage/innobase/include/buf0dump.h
index c704a8e97e0..3dbddfa6bf5 100644
--- a/storage/innobase/include/buf0dump.h
+++ b/storage/innobase/include/buf0dump.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2011, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2011, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -33,7 +33,6 @@ Wakes up the buffer pool dump/load thread and instructs it to start
 a dump. This function is called by MySQL code via buffer_pool_dump_now()
 and it should return immediately because the whole MySQL is frozen during
 its execution. */
-UNIV_INTERN
 void
 buf_dump_start();
 /*============*/
@@ -43,7 +42,6 @@ Wakes up the buffer pool dump/load thread and instructs it to start
 a load. This function is called by MySQL code via buffer_pool_load_now()
 and it should return immediately because the whole MySQL is frozen during
 its execution. */
-UNIV_INTERN
 void
 buf_load_start();
 /*============*/
@@ -52,7 +50,6 @@ buf_load_start();
 Aborts a currently running buffer pool load. This function is called by
 MySQL code via buffer_pool_load_abort() and it should return immediately
 because the whole MySQL is frozen during its execution. */
-UNIV_INTERN
 void
 buf_load_abort();
 /*============*/
@@ -62,7 +59,7 @@ This is the main thread for buffer pool dump/load. It waits for an
 event and when waked up either performs a dump or load and sleeps
 again.
 @return this function does not return, it calls os_thread_exit() */
-extern "C" UNIV_INTERN
+extern "C"
 os_thread_ret_t
 DECLARE_THREAD(buf_dump_thread)(
 /*============================*/
diff --git a/storage/innobase/include/buf0flu.h b/storage/innobase/include/buf0flu.h
index f1ca1039ccb..40083798d48 100644
--- a/storage/innobase/include/buf0flu.h
+++ b/storage/innobase/include/buf0flu.h
@@ -31,15 +31,23 @@ Created 11/5/1995 Heikki Tuuri
 #include "ut0byte.h"
 #include "log0log.h"
 #ifndef UNIV_HOTBACKUP
-#include "mtr0types.h"
 #include "buf0types.h"
 
 /** Flag indicating if the page_cleaner is in active state. */
-extern ibool buf_page_cleaner_is_active;
+extern bool buf_page_cleaner_is_active;
+
+#ifdef UNIV_DEBUG
+
+/** Value of MySQL global variable used to disable page cleaner. */
+extern my_bool		innodb_page_cleaner_disabled_debug;
+
+#endif /* UNIV_DEBUG */
 
 /** Event to synchronise with the flushing. */
 extern os_event_t	buf_flush_event;
 
+class ut_stage_alter_t;
+
 /** Handled page counters for a single flush */
 struct flush_counters_t {
 	ulint	flushed;	/*!< number of dirty pages flushed */
@@ -50,7 +58,6 @@ struct flush_counters_t {
 
 /********************************************************************//**
 Remove a block from the flush list of modified blocks. */
-UNIV_INTERN
 void
 buf_flush_remove(
 /*=============*/
@@ -59,7 +66,6 @@ buf_flush_remove(
 Relocates a buffer control block on the flush_list.
 Note that it is assumed that the contents of bpage has already been
 copied to dpage. */
-UNIV_INTERN
 void
 buf_flush_relocate_on_flush_list(
 /*=============================*/
@@ -67,22 +73,25 @@ buf_flush_relocate_on_flush_list(
 	buf_page_t*	dpage);	/*!< in/out: destination block */
 /********************************************************************//**
 Updates the flush system data structures when a write is completed. */
-UNIV_INTERN
 void
 buf_flush_write_complete(
 /*=====================*/
 	buf_page_t*	bpage);	/*!< in: pointer to the block in question */
 #endif /* !UNIV_HOTBACKUP */
-/********************************************************************//**
-Initializes a page for writing to the tablespace. */
-UNIV_INTERN
+/** Initialize a page for writing to the tablespace.
+@param[in]	block		buffer block; NULL if bypassing the buffer pool
+@param[in,out]	page		page frame
+@param[in,out]	page_zip_	compressed page, or NULL if uncompressed
+@param[in]	newest_lsn	newest modification LSN to the page
+@param[in]	skip_checksum	whether to disable the page checksum */
 void
 buf_flush_init_for_writing(
-/*=======================*/
-	byte*	page,		/*!< in/out: page */
-	void*	page_zip_,	/*!< in/out: compressed page, or NULL */
-	lsn_t	newest_lsn);	/*!< in: newest modification lsn
-				to the page */
+	const buf_block_t*	block,
+	byte*			page,
+	void*			page_zip_,
+	lsn_t			newest_lsn,
+	bool			skip_checksum);
+
 #ifndef UNIV_HOTBACKUP
 # if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
 /********************************************************************//**
@@ -91,36 +100,54 @@ NOTE: buf_pool->mutex and block->mutex must be held upon entering this
 function, and they will be released by this function after flushing.
 This is loosely based on buf_flush_batch() and buf_flush_page().
 @return TRUE if the page was flushed and the mutexes released */
-UNIV_INTERN
 ibool
 buf_flush_page_try(
 /*===============*/
 	buf_pool_t*	buf_pool,	/*!< in/out: buffer pool instance */
 	buf_block_t*	block)		/*!< in/out: buffer control block */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 # endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
-/*******************************************************************//**
-This utility flushes dirty blocks from the end of the flush list of
-all buffer pool instances.
+/** Do flushing batch of a given type.
 NOTE: The calling thread is not allowed to own any latches on pages!
+@param[in,out]	buf_pool	buffer pool instance
+@param[in]	type		flush type
+@param[in]	min_n		wished minimum mumber of blocks flushed
+(it is not guaranteed that the actual number is that big, though)
+@param[in]	lsn_limit	in the case BUF_FLUSH_LIST all blocks whose
+oldest_modification is smaller than this should be flushed (if their number
+does not exceed min_n), otherwise ignored
+@param[out]	n		the number of pages which were processed is
+passed back to caller. Ignored if NULL
+@retval true	if a batch was queued successfully.
+@retval false	if another batch of same type was already running. */
+bool
+buf_flush_do_batch(
+	buf_pool_t*		buf_pool,
+	buf_flush_t		type,
+	ulint			min_n,
+	lsn_t			lsn_limit,
+	flush_counters_t*	n);
+
+
+/** This utility flushes dirty blocks from the end of the flush list of all
+buffer pool instances.
+NOTE: The calling thread is not allowed to own any latches on pages!
+@param[in]	min_n		wished minimum mumber of blocks flushed (it is
+not guaranteed that the actual number is that big, though)
+@param[in]	lsn_limit	in the case BUF_FLUSH_LIST all blocks whose
+oldest_modification is smaller than this should be flushed (if their number
+does not exceed min_n), otherwise ignored
+@param[out]	n_processed	the number of pages which were processed is
+passed back to caller. Ignored if NULL.
 @return true if a batch was queued successfully for each buffer pool
 instance. false if another batch of same type was already running in
 at least one of the buffer pool instance */
-UNIV_INTERN
 bool
-buf_flush_list(
-/*===========*/
-	ulint		min_n,		/*!< in: wished minimum mumber of blocks
-					flushed (it is not guaranteed that the
-					actual number is that big, though) */
-	lsn_t		lsn_limit,	/*!< in the case BUF_FLUSH_LIST all
-					blocks whose oldest_modification is
-					smaller than this should be flushed
-					(if their number does not exceed
-					min_n), otherwise ignored */
-	ulint*		n_processed);	/*!< out: the number of pages
-					which were processed is passed
-					back to caller. Ignored if NULL */
+buf_flush_lists(
+	ulint			min_n,
+	lsn_t			lsn_limit,
+	ulint*			n_processed);
+
 /******************************************************************//**
 This function picks up a single page from the tail of the LRU
 list, flushes it (if it is dirty), removes it from page_hash and LRU
@@ -128,26 +155,31 @@ list and puts it on the free list. It is called from user threads when
 they are unable to find a replaceable page at the tail of the LRU
 list i.e.: when the background LRU flushing in the page_cleaner thread
 is not fast enough to keep pace with the workload.
-@return TRUE if success. */
-UNIV_INTERN
-ibool
+@return true if success. */
+bool
 buf_flush_single_page_from_LRU(
 /*===========================*/
 	buf_pool_t*	buf_pool);	/*!< in/out: buffer pool instance */
 /******************************************************************//**
 Waits until a flush batch of the given type ends */
-UNIV_INTERN
 void
 buf_flush_wait_batch_end(
 /*=====================*/
 	buf_pool_t*	buf_pool,	/*!< in: buffer pool instance */
 	buf_flush_t	type);		/*!< in: BUF_FLUSH_LRU
 					or BUF_FLUSH_LIST */
+/**
+Waits until a flush batch of the given lsn ends
+@param[in]	new_oldest	target oldest_modified_lsn to wait for */
+
+void
+buf_flush_wait_flushed(
+	lsn_t		new_oldest);
+
 /******************************************************************//**
 Waits until a flush batch of the given type ends. This is called by
 a thread that only wants to wait for a flush to end but doesn't do
 any flushing itself. */
-UNIV_INTERN
 void
 buf_flush_wait_batch_end_wait_only(
 /*===============================*/
@@ -162,8 +194,13 @@ UNIV_INLINE
 void
 buf_flush_note_modification(
 /*========================*/
-	buf_block_t*	block,	/*!< in: block which is modified */
-	mtr_t*		mtr);	/*!< in: mtr */
+	buf_block_t*	block,		/*!< in: block which is modified */
+	lsn_t		start_lsn,	/*!< in: start lsn of the first mtr in a
+					set of mtr's */
+	lsn_t		end_lsn,	/*!< in: end lsn of the last mtr in the
+					set of mtr's */
+	FlushObserver*	observer);	/*!< in: flush observer */
+
 /********************************************************************//**
 This function should be called when recovery has modified a buffer page. */
 UNIV_INLINE
@@ -178,23 +215,52 @@ buf_flush_recv_note_modification(
 /********************************************************************//**
 Returns TRUE if the file page block is immediately suitable for replacement,
 i.e., transition FILE_PAGE => NOT_USED allowed.
-@return	TRUE if can replace immediately */
-UNIV_INTERN
+@return TRUE if can replace immediately */
 ibool
 buf_flush_ready_for_replace(
 /*========================*/
 	buf_page_t*	bpage);	/*!< in: buffer control block, must be
 				buf_page_in_file(bpage) and in the LRU list */
+
+#ifdef UNIV_DEBUG
+/** Disables page cleaner threads (coordinator and workers).
+It's used by: SET GLOBAL innodb_page_cleaner_disabled_debug = 1 (0).
+@param[in]	thd		thread handle
+@param[in]	var		pointer to system variable
+@param[out]	var_ptr		where the formal string goes
+@param[in]	save		immediate result from check function */
+void
+buf_flush_page_cleaner_disabled_debug_update(
+	THD*				thd,
+	struct st_mysql_sys_var*	var,
+	void*				var_ptr,
+	const void*			save);
+#endif /* UNIV_DEBUG */
+
 /******************************************************************//**
 page_cleaner thread tasked with flushing dirty pages from the buffer
-pools. As of now we'll have only one instance of this thread.
+pools. As of now we'll have only one coordinator of this thread.
 @return a dummy parameter */
-extern "C" UNIV_INTERN
+extern "C"
 os_thread_ret_t
-DECLARE_THREAD(buf_flush_page_cleaner_thread)(
+DECLARE_THREAD(buf_flush_page_cleaner_coordinator)(
+/*===============================================*/
+	void*	arg);		/*!< in: a dummy parameter required by
+				os_thread_create */
+/******************************************************************//**
+Worker thread of page_cleaner.
+@return a dummy parameter */
+extern "C"
+os_thread_ret_t
+DECLARE_THREAD(buf_flush_page_cleaner_worker)(
 /*==========================================*/
 	void*	arg);		/*!< in: a dummy parameter required by
 				os_thread_create */
+/******************************************************************//**
+Initialize page_cleaner. */
+void
+buf_flush_page_cleaner_init(void);
+/*=============================*/
 /*********************************************************************//**
 Clears up tail of the LRU lists:
 * Put replaceable pages at the tail of LRU to the free list
@@ -202,13 +268,11 @@ Clears up tail of the LRU lists:
 The depth to which we scan each buffer pool is controlled by dynamic
 config parameter innodb_LRU_scan_depth.
 @return total pages flushed */
-UNIV_INTERN
 ulint
-buf_flush_LRU_tail(void);
-/*====================*/
+buf_flush_LRU_lists(void);
+/*=====================*/
 /*********************************************************************//**
 Wait for any possible LRU flushes that are in progress to end. */
-UNIV_INTERN
 void
 buf_flush_wait_LRU_batch_end(void);
 /*==============================*/
@@ -216,8 +280,7 @@ buf_flush_wait_LRU_batch_end(void);
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 /******************************************************************//**
 Validates the flush list.
-@return	TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
 ibool
 buf_flush_validate(
 /*===============*/
@@ -228,14 +291,12 @@ buf_flush_validate(
 Initialize the red-black tree to speed up insertions into the flush_list
 during recovery process. Should be called at the start of recovery
 process before any page has been read/written. */
-UNIV_INTERN
 void
 buf_flush_init_flush_rbt(void);
 /*==========================*/
 
 /********************************************************************//**
 Frees up the red-black tree. */
-UNIV_INTERN
 void
 buf_flush_free_flush_rbt(void);
 /*==========================*/
@@ -246,10 +307,9 @@ NOTE: in simulated aio we must call
 os_aio_simulated_wake_handler_threads after we have posted a batch of
 writes! NOTE: buf_pool->mutex and buf_page_get_mutex(bpage) must be
 held upon entering this function, and they will be released by this
-function if it returns true.
-@return TRUE if the page was flushed */
-UNIV_INTERN
-bool
+function.
+@return TRUE if page was flushed */
+ibool
 buf_flush_page(
 /*===========*/
 	buf_pool_t*	buf_pool,	/*!< in: buffer pool instance */
@@ -258,8 +318,7 @@ buf_flush_page(
 	bool		sync);		/*!< in: true if sync IO request */
 /********************************************************************//**
 Returns true if the block is modified and ready for flushing.
-@return	true if can flush immediately */
-UNIV_INTERN
+@return true if can flush immediately */
 bool
 buf_flush_ready_for_flush(
 /*======================*/
@@ -268,26 +327,116 @@ buf_flush_ready_for_flush(
 	buf_flush_t	flush_type)/*!< in: type of flush */
 	MY_ATTRIBUTE((warn_unused_result));
 
-#ifdef UNIV_DEBUG
 /******************************************************************//**
 Check if there are any dirty pages that belong to a space id in the flush
 list in a particular buffer pool.
-@return	number of dirty pages present in a single buffer pool */
-UNIV_INTERN
+@return number of dirty pages present in a single buffer pool */
 ulint
 buf_pool_get_dirty_pages_count(
 /*===========================*/
 	buf_pool_t*	buf_pool,	/*!< in: buffer pool */
-	ulint		id);		/*!< in: space id to check */
+	ulint		id,		/*!< in: space id to check */
+	FlushObserver*	observer);	/*!< in: flush observer to check */
 /******************************************************************//**
 Check if there are any dirty pages that belong to a space id in the flush list.
-@return	count of dirty pages present in all the buffer pools */
-UNIV_INTERN
+@return count of dirty pages present in all the buffer pools */
 ulint
 buf_flush_get_dirty_pages_count(
 /*============================*/
-	ulint		id);		/*!< in: space id to check */
-#endif /* UNIV_DEBUG */
+	ulint		id,		/*!< in: space id to check */
+	FlushObserver*	observer);	/*!< in: flush observer to check */
+
+/*******************************************************************//**
+Synchronously flush dirty blocks from the end of the flush list of all buffer
+pool instances.
+NOTE: The calling thread is not allowed to own any latches on pages! */
+void
+buf_flush_sync_all_buf_pools(void);
+/*==============================*/
+
+/** Request IO burst and wake page_cleaner up.
+@param[in]	lsn_limit	upper limit of LSN to be flushed */
+void
+buf_flush_request_force(
+	lsn_t	lsn_limit);
+
+/** We use FlushObserver to track flushing of non-redo logged pages in bulk
+create index(BtrBulk.cc).Since we disable redo logging during a index build,
+we need to make sure that all dirty pages modifed by the index build are
+flushed to disk before any redo logged operations go to the index. */
+
+class FlushObserver {
+public:
+	/** Constructor
+	@param[in]	space_id	table space id
+	@param[in]	trx		trx instance
+	@param[in]	stage		performance schema accounting object,
+	used by ALTER TABLE. It is passed to log_preflush_pool_modified_pages()
+	for accounting. */
+	FlushObserver(ulint space_id, trx_t* trx, ut_stage_alter_t* stage);
+
+	/** Deconstructor */
+	~FlushObserver();
+
+	/** Check pages have been flushed and removed from the flush list
+	in a buffer pool instance.
+	@pram[in]	instance_no	buffer pool instance no
+	@return true if the pages were removed from the flush list */
+	bool is_complete(ulint	instance_no)
+	{
+		return(m_flushed->at(instance_no) == m_removed->at(instance_no)
+		       || m_interrupted);
+	}
+
+	/** Interrupt observer not to wait. */
+	void interrupted()
+	{
+		m_interrupted = true;
+	}
+
+	/** Check whether trx is interrupted
+	@return true if trx is interrupted */
+	bool check_interrupted();
+
+	/** Flush dirty pages. */
+	void flush();
+
+	/** Notify observer of flushing a page
+	@param[in]	buf_pool	buffer pool instance
+	@param[in]	bpage		buffer page to flush */
+	void notify_flush(
+		buf_pool_t*	buf_pool,
+		buf_page_t*	bpage);
+
+	/** Notify observer of removing a page from flush list
+	@param[in]	buf_pool	buffer pool instance
+	@param[in]	bpage		buffer page flushed */
+	void notify_remove(
+		buf_pool_t*	buf_pool,
+		buf_page_t*	bpage);
+private:
+	/** Table space id */
+	ulint			m_space_id;
+
+	/** Trx instance */
+	trx_t*			m_trx;
+
+	/** Performance schema accounting object, used by ALTER TABLE.
+	If not NULL, then stage->begin_phase_flush() will be called initially,
+	specifying the number of pages to be attempted to be flushed and
+	subsequently, stage->inc() will be called for each page we attempt to
+	flush. */
+	ut_stage_alter_t*	m_stage;
+
+	/* Flush request sent */
+	std::vector<ulint>*	m_flushed;
+
+	/* Flush request finished */
+	std::vector<ulint>*	m_removed;
+
+	/* True if the operation was interrupted. */
+	bool			m_interrupted;
+};
 
 #endif /* !UNIV_HOTBACKUP */
 
diff --git a/storage/innobase/include/buf0flu.ic b/storage/innobase/include/buf0flu.ic
index a763cd115fe..ecb98e32619 100644
--- a/storage/innobase/include/buf0flu.ic
+++ b/storage/innobase/include/buf0flu.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -27,21 +27,21 @@ Created 11/5/1995 Heikki Tuuri
 #include "buf0buf.h"
 #include "mtr0mtr.h"
 #include "srv0srv.h"
+#include "fsp0types.h"
 
 /********************************************************************//**
 Inserts a modified block into the flush list. */
-UNIV_INTERN
 void
 buf_flush_insert_into_flush_list(
 /*=============================*/
 	buf_pool_t*	buf_pool,	/*!< buffer pool instance */
 	buf_block_t*	block,		/*!< in/out: block which is modified */
 	lsn_t		lsn);		/*!< in: oldest modification */
+
 /********************************************************************//**
 Inserts a modified block into the flush list in the right sorted position.
 This function is used by recovery, because there the modifications do not
 necessarily come in the order of lsn's. */
-UNIV_INTERN
 void
 buf_flush_insert_sorted_into_flush_list(
 /*====================================*/
@@ -57,40 +57,49 @@ UNIV_INLINE
 void
 buf_flush_note_modification(
 /*========================*/
-	buf_block_t*	block,	/*!< in: block which is modified */
-	mtr_t*		mtr)	/*!< in: mtr */
+	buf_block_t*	block,		/*!< in: block which is modified */
+	lsn_t		start_lsn,	/*!< in: start lsn of the mtr that
+					modified this block */
+	lsn_t		end_lsn,	/*!< in: end lsn of the mtr that
+					modified this block */
+	FlushObserver*	observer)	/*!< in: flush observer */
 {
-	buf_pool_t*	buf_pool = buf_pool_from_block(block);
+#ifdef UNIV_DEBUG
+	{
+		/* Allow write to proceed to shared temporary tablespace
+		in read-only mode. */
+		ut_ad(!srv_read_only_mode
+		      || fsp_is_system_temporary(block->page.id.space()));
+		ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+		ut_ad(block->page.buf_fix_count > 0);
 
-	ut_ad(!srv_read_only_mode);
-	ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
-	ut_ad(block->page.buf_fix_count > 0);
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+		buf_pool_t*	buf_pool = buf_pool_from_block(block);
 
-	ut_ad(!buf_pool_mutex_own(buf_pool));
-	ut_ad(!buf_flush_list_mutex_own(buf_pool));
-	ut_ad(!mtr->made_dirty || log_flush_order_mutex_own());
-
-	ut_ad(mtr->start_lsn != 0);
-	ut_ad(mtr->modifications);
+		ut_ad(!buf_pool_mutex_own(buf_pool));
+		ut_ad(!buf_flush_list_mutex_own(buf_pool));
+	}
+#endif /* UNIV_DEBUG */
 
 	mutex_enter(&block->mutex);
-	ut_ad(block->page.newest_modification <= mtr->end_lsn);
 
-	block->page.newest_modification = mtr->end_lsn;
+	ut_ad(block->page.newest_modification <= end_lsn);
+	block->page.newest_modification = end_lsn;
 
-	if (!block->page.oldest_modification) {
-		ut_a(mtr->made_dirty);
-		ut_ad(log_flush_order_mutex_own());
-		buf_flush_insert_into_flush_list(
-			buf_pool, block, mtr->start_lsn);
+	/* Don't allow to set flush observer from non-null to null,
+	or from one observer to another. */
+	ut_ad(block->page.flush_observer == NULL
+	      || block->page.flush_observer == observer);
+	block->page.flush_observer = observer;
+
+	if (block->page.oldest_modification == 0) {
+		buf_pool_t*	buf_pool = buf_pool_from_block(block);
+
+		buf_flush_insert_into_flush_list(buf_pool, block, start_lsn);
 	} else {
-		ut_ad(block->page.oldest_modification <= mtr->start_lsn);
+		ut_ad(block->page.oldest_modification <= start_lsn);
 	}
 
-	mutex_exit(&block->mutex);
+	buf_page_mutex_exit(block);
 
 	srv_stats.buf_pool_write_requests.inc();
 }
@@ -107,33 +116,36 @@ buf_flush_recv_note_modification(
 	lsn_t		end_lsn)	/*!< in: end lsn of the last mtr in the
 					set of mtr's */
 {
-	buf_pool_t*	buf_pool = buf_pool_from_block(block);
+#ifdef UNIV_DEBUG
+	{
+		ut_ad(!srv_read_only_mode);
+		ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+		ut_ad(block->page.buf_fix_count > 0);
 
-	ut_ad(!srv_read_only_mode);
-	ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
-	ut_ad(block->page.buf_fix_count > 0);
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+		buf_pool_t*	buf_pool = buf_pool_from_block(block);
 
-	ut_ad(!buf_pool_mutex_own(buf_pool));
-	ut_ad(!buf_flush_list_mutex_own(buf_pool));
-	ut_ad(log_flush_order_mutex_own());
+		ut_ad(!buf_pool_mutex_own(buf_pool));
+		ut_ad(!buf_flush_list_mutex_own(buf_pool));
 
-	ut_ad(start_lsn != 0);
-	ut_ad(block->page.newest_modification <= end_lsn);
+		ut_ad(start_lsn != 0);
+		ut_ad(block->page.newest_modification <= end_lsn);
+	}
+#endif /* UNIV_DEBUG */
+
+	buf_page_mutex_enter(block);
 
-	mutex_enter(&block->mutex);
 	block->page.newest_modification = end_lsn;
 
 	if (!block->page.oldest_modification) {
+		buf_pool_t*	buf_pool = buf_pool_from_block(block);
+
 		buf_flush_insert_sorted_into_flush_list(
 			buf_pool, block, start_lsn);
 	} else {
 		ut_ad(block->page.oldest_modification <= start_lsn);
 	}
 
-	mutex_exit(&block->mutex);
+	buf_page_mutex_exit(block);
 
 }
 #endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/buf0lru.h b/storage/innobase/include/buf0lru.h
index a7a65df33aa..0cbd77878ec 100644
--- a/storage/innobase/include/buf0lru.h
+++ b/storage/innobase/include/buf0lru.h
@@ -38,8 +38,7 @@ struct trx_t;
 Returns TRUE if less than 25 % of the buffer pool is available. This can be
 used in heuristics to prevent huge transactions eating up the whole buffer
 pool for their locks.
-@return	TRUE if less than 25 % of buffer pool left */
-UNIV_INTERN
+@return TRUE if less than 25 % of buffer pool left */
 ibool
 buf_LRU_buf_pool_running_out(void);
 /*==============================*/
@@ -56,7 +55,6 @@ Flushes all dirty pages or removes all pages belonging
 to a given tablespace. A PROBLEM: if readahead is being started, what
 guarantees that it will not try to read in pages after this operation
 has completed? */
-UNIV_INTERN
 void
 buf_LRU_flush_or_remove_pages(
 /*==========================*/
@@ -68,7 +66,6 @@ buf_LRU_flush_or_remove_pages(
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 /********************************************************************//**
 Insert a compressed block into buf_pool->zip_clean in the LRU order. */
-UNIV_INTERN
 void
 buf_LRU_insert_zip_clean(
 /*=====================*/
@@ -86,7 +83,6 @@ accessible via bpage.
 The caller must hold buf_pool->mutex and must not hold any
 buf_page_get_mutex() when calling this function.
 @return true if freed, false otherwise. */
-UNIV_INTERN
 bool
 buf_LRU_free_page(
 /*==============*/
@@ -96,21 +92,19 @@ buf_LRU_free_page(
 	MY_ATTRIBUTE((nonnull));
 /******************************************************************//**
 Try to free a replaceable block.
-@return	TRUE if found and freed */
-UNIV_INTERN
-ibool
+@return true if found and freed */
+bool
 buf_LRU_scan_and_free_block(
 /*========================*/
 	buf_pool_t*	buf_pool,	/*!< in: buffer pool instance */
-	ibool		scan_all)	/*!< in: scan whole LRU list
-					if TRUE, otherwise scan only
+	bool		scan_all)	/*!< in: scan whole LRU list
+					if true, otherwise scan only
 					'old' blocks. */
 	MY_ATTRIBUTE((nonnull,warn_unused_result));
 /******************************************************************//**
 Returns a free block from the buf_pool.  The block is taken off the
 free list.  If it is empty, returns NULL.
-@return	a free control block, or NULL if the buf_block->free list is empty */
-UNIV_INTERN
+@return a free control block, or NULL if the buf_block->free list is empty */
 buf_block_t*
 buf_LRU_get_free_only(
 /*==================*/
@@ -138,8 +132,7 @@ we put it to free list to be used.
     * scan LRU list even if buf_pool->try_LRU_scan is not set
 * iteration > 1:
   * same as iteration 1 but sleep 10ms
-@return	the free control block, in state BUF_BLOCK_READY_FOR_USE */
-UNIV_INTERN
+@return the free control block, in state BUF_BLOCK_READY_FOR_USE */
 buf_block_t*
 buf_LRU_get_free_block(
 /*===================*/
@@ -148,25 +141,21 @@ buf_LRU_get_free_block(
 /******************************************************************//**
 Determines if the unzip_LRU list should be used for evicting a victim
 instead of the general LRU list.
-@return	TRUE if should use unzip_LRU */
-UNIV_INTERN
+@return TRUE if should use unzip_LRU */
 ibool
 buf_LRU_evict_from_unzip_LRU(
 /*=========================*/
 	buf_pool_t*	buf_pool);
 /******************************************************************//**
 Puts a block back to the free list. */
-UNIV_INTERN
 void
 buf_LRU_block_free_non_file_page(
 /*=============================*/
 	buf_block_t*	block);	/*!< in: block, must not contain a file page */
 /******************************************************************//**
-Adds a block to the LRU list. Please make sure that the zip_size is
-already set into the page zip when invoking the function, so that we
-can get correct zip_size from the buffer page when adding a block
-into LRU */
-UNIV_INTERN
+Adds a block to the LRU list. Please make sure that the page_size is
+already set when invoking the function, so that we can get correct
+page_size from the buffer page when adding a block into LRU */
 void
 buf_LRU_add_block(
 /*==============*/
@@ -177,7 +166,6 @@ buf_LRU_add_block(
 				the start regardless of this parameter */
 /******************************************************************//**
 Adds a block to the LRU list of decompressed zip pages. */
-UNIV_INTERN
 void
 buf_unzip_LRU_add_block(
 /*====================*/
@@ -186,23 +174,20 @@ buf_unzip_LRU_add_block(
 				of the list, else put to the start */
 /******************************************************************//**
 Moves a block to the start of the LRU list. */
-UNIV_INTERN
 void
 buf_LRU_make_block_young(
 /*=====================*/
 	buf_page_t*	bpage);	/*!< in: control block */
 /******************************************************************//**
 Moves a block to the end of the LRU list. */
-UNIV_INTERN
 void
 buf_LRU_make_block_old(
 /*===================*/
 	buf_page_t*	bpage);	/*!< in: control block */
 /**********************************************************************//**
 Updates buf_pool->LRU_old_ratio.
-@return	updated old_pct */
-UNIV_INTERN
-ulint
+@return updated old_pct */
+uint
 buf_LRU_old_ratio_update(
 /*=====================*/
 	uint	old_pct,/*!< in: Reserve this percentage of
@@ -213,14 +198,12 @@ buf_LRU_old_ratio_update(
 /********************************************************************//**
 Update the historical stats that we are collecting for LRU eviction
 policy at the end of each interval. */
-UNIV_INTERN
 void
 buf_LRU_stat_update(void);
 /*=====================*/
 
 /******************************************************************//**
 Remove one page from LRU list and put it to free list */
-UNIV_INTERN
 void
 buf_LRU_free_one_page(
 /*==================*/
@@ -231,7 +214,6 @@ buf_LRU_free_one_page(
 
 /******************************************************************//**
 Adjust LRU hazard pointers if needed. */
-
 void
 buf_LRU_adjust_hp(
 /*==============*/
@@ -241,8 +223,7 @@ buf_LRU_adjust_hp(
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 /**********************************************************************//**
 Validates the LRU list.
-@return	TRUE */
-UNIV_INTERN
+@return TRUE */
 ibool
 buf_LRU_validate(void);
 /*==================*/
@@ -250,7 +231,6 @@ buf_LRU_validate(void);
 #if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 /**********************************************************************//**
 Prints the LRU list. */
-UNIV_INTERN
 void
 buf_LRU_print(void);
 /*===============*/
diff --git a/storage/innobase/include/buf0rea.h b/storage/innobase/include/buf0rea.h
index 10714031710..9c97a5147c1 100644
--- a/storage/innobase/include/buf0rea.h
+++ b/storage/innobase/include/buf0rea.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2015, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
@@ -28,36 +28,38 @@ Created 11/5/1995 Heikki Tuuri
 #define buf0rea_h
 
 #include "univ.i"
+#include "buf0buf.h"
 #include "buf0types.h"
 
+/** High-level function which reads a page asynchronously from a file to the
+buffer buf_pool if it is not already there. Sets the io_fix flag and sets
+an exclusive lock on the buffer frame. The flag is cleared and the x-lock
+released by the i/o-handler thread.
+@param[in]	page_id		page id
+@param[in]	page_size	page size
+@return TRUE if page has been read in, FALSE in case of failure */
+ibool
+buf_read_page(
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	buf_page_t**		bpage);
+
 /********************************************************************//**
 High-level function which reads a page asynchronously from a file to the
 buffer buf_pool if it is not already there. Sets the io_fix flag and sets
 an exclusive lock on the buffer frame. The flag is cleared and the x-lock
 released by the i/o-handler thread.
+@param[in]	page_id		page id
+@param[in]	page_size	page size
+@param[in]	sync		true if synchronous aio is desired
 @return TRUE if page has been read in, FALSE in case of failure */
-UNIV_INTERN
 ibool
-buf_read_page(
-/*==========*/
-	ulint	space,	/*!< in: space id */
-	ulint	zip_size,/*!< in: compressed page size in bytes, or 0 */
-	ulint	offset, /*!< in: page number */
-	buf_page_t** bpage);/*!< out: page */
-/********************************************************************//**
-High-level function which reads a page asynchronously from a file to the
-buffer buf_pool if it is not already there. Sets the io_fix flag and sets
-an exclusive lock on the buffer frame. The flag is cleared and the x-lock
-released by the i/o-handler thread.
-@return TRUE if page has been read in, FALSE in case of failure */
-UNIV_INTERN
-ibool
-buf_read_page_async(
-/*================*/
-	ulint	space,	/*!< in: space id */
-	ulint	offset);/*!< in: page number */
-/********************************************************************//**
-Applies a random read-ahead in buf_pool if there are at least a threshold
+buf_read_page_background(
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	bool			sync);
+
+/** Applies a random read-ahead in buf_pool if there are at least a threshold
 value of accessed pages from the random read-ahead area. Does not read any
 page, not even the one at the position (space, offset), if the read-ahead
 mechanism is not activated. NOTE 1: the calling thread may own latches on
@@ -66,23 +68,20 @@ end up waiting for these latches! NOTE 2: the calling thread must want
 access to the page given: this rule is set to prevent unintended read-aheads
 performed by ibuf routines, a situation which could result in a deadlock if
 the OS does not support asynchronous i/o.
+@param[in]	page_id		page id of a page which the current thread
+wants to access
+@param[in]	page_size	page size
+@param[in]	inside_ibuf	TRUE if we are inside ibuf routine
 @return number of page read requests issued; NOTE that if we read ibuf
 pages, it may happen that the page at the given page number does not
-get read even if we return a positive value!
-@return	number of page read requests issued */
-UNIV_INTERN
+get read even if we return a positive value! */
 ulint
 buf_read_ahead_random(
-/*==================*/
-	ulint	space,		/*!< in: space id */
-	ulint	zip_size,	/*!< in: compressed page size in bytes,
-				or 0 */
-	ulint	offset,		/*!< in: page number of a page which
-				the current thread wants to access */
-	ibool	inside_ibuf);	/*!< in: TRUE if we are inside ibuf
-				routine */
-/********************************************************************//**
-Applies linear read-ahead if in the buf_pool the page is a border page of
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	ibool			inside_ibuf);
+
+/** Applies linear read-ahead if in the buf_pool the page is a border page of
 a linear read-ahead area and all the pages in the area have been accessed.
 Does not read any page if the read-ahead mechanism is not activated. Note
 that the algorithm looks at the 'natural' adjacent successor and
@@ -104,20 +103,20 @@ latches!
 NOTE 3: the calling thread must want access to the page given: this rule is
 set to prevent unintended read-aheads performed by ibuf routines, a situation
 which could result in a deadlock if the OS does not support asynchronous io.
-@return	number of page read requests issued */
-UNIV_INTERN
+@param[in]	page_id		page id; see NOTE 3 above
+@param[in]	page_size	page size
+@param[in]	inside_ibuf	TRUE if we are inside ibuf routine
+@return number of page read requests issued */
 ulint
 buf_read_ahead_linear(
-/*==================*/
-	ulint	space,		/*!< in: space id */
-	ulint	zip_size,	/*!< in: compressed page size in bytes, or 0 */
-	ulint	offset,		/*!< in: page number; see NOTE 3 above */
-	ibool	inside_ibuf);	/*!< in: TRUE if we are inside ibuf routine */
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	ibool			inside_ibuf);
+
 /********************************************************************//**
 Issues read requests for pages which the ibuf module wants to read in, in
 order to contract the insert buffer tree. Technically, this function is like
 a read-ahead function. */
-UNIV_INTERN
 void
 buf_read_ibuf_merge_pages(
 /*======================*/
@@ -127,53 +126,37 @@ buf_read_ibuf_merge_pages(
 					to get read in, before this
 					function returns */
 	const ulint*	space_ids,	/*!< in: array of space ids */
-	const ib_int64_t* space_versions,/*!< in: the spaces must have
-					this version number
-					(timestamp), otherwise we
-					discard the read; we use this
-					to cancel reads if DISCARD +
-					IMPORT may have changed the
-					tablespace size */
 	const ulint*	page_nos,	/*!< in: array of page numbers
 					to read, with the highest page
 					number the last in the
 					array */
 	ulint		n_stored);	/*!< in: number of elements
 					in the arrays */
-/********************************************************************//**
-Issues read requests for pages which recovery wants to read in. */
-UNIV_INTERN
+
+/** Issues read requests for pages which recovery wants to read in.
+@param[in]	sync		true if the caller wants this function to wait
+for the highest address page to get read in, before this function returns
+@param[in]	space_id	tablespace id
+@param[in]	page_nos	array of page numbers to read, with the
+highest page number the last in the array
+@param[in]	n_stored	number of page numbers in the array */
+
 void
 buf_read_recv_pages(
-/*================*/
-	ibool		sync,		/*!< in: TRUE if the caller
-					wants this function to wait
-					for the highest address page
-					to get read in, before this
-					function returns */
-	ulint		space,		/*!< in: space id */
-	ulint		zip_size,	/*!< in: compressed page size in
-					bytes, or 0 */
-	const ulint*	page_nos,	/*!< in: array of page numbers
-					to read, with the highest page
-					number the last in the
-					array */
-	ulint		n_stored);	/*!< in: number of page numbers
-					in the array */
+	bool		sync,
+	ulint		space_id,
+	const ulint*	page_nos,
+	ulint		n_stored);
 
 /** The size in pages of the area which the read-ahead algorithms read if
 invoked */
-#define	BUF_READ_AHEAD_AREA(b)					\
-	ut_min(64, ut_2_power_up((b)->curr_size / 32))
+#define	BUF_READ_AHEAD_AREA(b)		((b)->read_ahead_area)
 
 /** @name Modes used in read-ahead @{ */
 /** read only pages belonging to the insert buffer tree */
 #define BUF_READ_IBUF_PAGES_ONLY	131
 /** read any page */
 #define BUF_READ_ANY_PAGE		132
-/** read any page, but ignore (return an error) if a page does not exist
-instead of crashing like BUF_READ_ANY_PAGE does */
-#define BUF_READ_IGNORE_NONEXISTENT_PAGES 1024
 /* @} */
 
 #endif
diff --git a/storage/innobase/include/buf0types.h b/storage/innobase/include/buf0types.h
index 11bbc9b5c8a..102b831ec61 100644
--- a/storage/innobase/include/buf0types.h
+++ b/storage/innobase/include/buf0types.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved
+Copyright (c) 1995, 2015, Oracle and/or its affiliates. All rights reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -26,12 +26,11 @@ Created 11/17/1995 Heikki Tuuri
 #ifndef buf0types_h
 #define buf0types_h
 
-#if defined(INNODB_PAGE_ATOMIC_REF_COUNT) && defined(HAVE_ATOMIC_BUILTINS)
-#define PAGE_ATOMIC_REF_COUNT
-#endif /* INNODB_PAGE_ATOMIC_REF_COUNT && HAVE_ATOMIC_BUILTINS */
+#include "os0event.h"
+#include "ut0ut.h"
 
 /** Buffer page (uncompressed or compressed) */
-struct buf_page_t;
+class buf_page_t;
 /** Buffer block for which an uncompressed page exists */
 struct buf_block_t;
 /** Buffer pool chunk comprising buf_block_t */
@@ -44,6 +43,8 @@ struct buf_pool_stat_t;
 struct buf_buddy_stat_t;
 /** Doublewrite memory struct */
 struct buf_dblwr_t;
+/** Flush observer for bulk create index */
+class FlushObserver;
 
 /** A buffer frame. @see page_t */
 typedef	byte	buf_frame_t;
@@ -96,6 +97,24 @@ enum srv_checksum_algorithm_t {
 						when reading */
 };
 
+inline
+bool
+is_checksum_strict(srv_checksum_algorithm_t algo)
+{
+	return(algo == SRV_CHECKSUM_ALGORITHM_STRICT_CRC32
+	       || algo == SRV_CHECKSUM_ALGORITHM_STRICT_INNODB
+	       || algo == SRV_CHECKSUM_ALGORITHM_STRICT_NONE);
+}
+
+inline
+bool
+is_checksum_strict(ulint algo)
+{
+	return(algo == SRV_CHECKSUM_ALGORITHM_STRICT_CRC32
+	       || algo == SRV_CHECKSUM_ALGORITHM_STRICT_INNODB
+	       || algo == SRV_CHECKSUM_ALGORITHM_STRICT_NONE);
+}
+
 /** Parameters of binary buddy system for compressed pages (buf0buddy.h) */
 /* @{ */
 /** Zip shift value for the smallest page size */
@@ -117,4 +136,16 @@ this must be equal to UNIV_PAGE_SIZE */
 #define BUF_BUDDY_HIGH	(BUF_BUDDY_LOW << BUF_BUDDY_SIZES)
 /* @} */
 
+#ifndef UNIV_INNOCHECKSUM
+
+#include "ut0mutex.h"
+#include "sync0rw.h"
+
+typedef ib_bpmutex_t BPageMutex;
+typedef ib_mutex_t BufPoolMutex;
+typedef ib_mutex_t FlushListMutex;
+typedef BPageMutex BufPoolZipMutex;
+typedef rw_lock_t BPageLock;
+#endif /* !UNIV_INNOCHECKSUM */
+
 #endif /* buf0types.h */
diff --git a/storage/innobase/include/data0data.h b/storage/innobase/include/data0data.h
index 1d954bfc07c..5537d70548a 100644
--- a/storage/innobase/include/data0data.h
+++ b/storage/innobase/include/data0data.h
@@ -33,14 +33,17 @@ Created 5/30/1994 Heikki Tuuri
 #include "mem0mem.h"
 #include "dict0types.h"
 
+#include <ostream>
+
 /** Storage for overflow data in a big record, that is, a clustered
 index record which needs external storage of data fields */
 struct big_rec_t;
+struct upd_t;
 
 #ifdef UNIV_DEBUG
 /*********************************************************************//**
 Gets pointer to the type struct of SQL data field.
-@return	pointer to the type struct */
+@return pointer to the type struct */
 UNIV_INLINE
 dtype_t*
 dfield_get_type(
@@ -49,7 +52,7 @@ dfield_get_type(
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /*********************************************************************//**
 Gets pointer to the data in a field.
-@return	pointer to data */
+@return pointer to data */
 UNIV_INLINE
 void*
 dfield_get_data(
@@ -67,11 +70,11 @@ void
 dfield_set_type(
 /*============*/
 	dfield_t*	field,	/*!< in: SQL data field */
-	const dtype_t*	type)	/*!< in: pointer to data type struct */
-	MY_ATTRIBUTE((nonnull));
+	const dtype_t*	type);	/*!< in: pointer to data type struct */
+
 /*********************************************************************//**
 Gets length of field data.
-@return	length of data; UNIV_SQL_NULL if SQL null data */
+@return length of data; UNIV_SQL_NULL if SQL null data */
 UNIV_INLINE
 ulint
 dfield_get_len(
@@ -89,7 +92,7 @@ dfield_set_len(
 	MY_ATTRIBUTE((nonnull));
 /*********************************************************************//**
 Determines if a field is SQL NULL
-@return	nonzero if SQL null data */
+@return nonzero if SQL null data */
 UNIV_INLINE
 ulint
 dfield_is_null(
@@ -98,7 +101,7 @@ dfield_is_null(
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /*********************************************************************//**
 Determines if a field is externally stored
-@return	nonzero if externally stored */
+@return nonzero if externally stored */
 UNIV_INLINE
 ulint
 dfield_is_ext(
@@ -113,6 +116,23 @@ dfield_set_ext(
 /*===========*/
 	dfield_t*	field)	/*!< in/out: field */
 	MY_ATTRIBUTE((nonnull));
+
+/** Gets spatial status for "external storage"
+@param[in,out]	field		field */
+UNIV_INLINE
+spatial_status_t
+dfield_get_spatial_status(
+	const dfield_t*	field);
+
+/** Sets spatial status for "external storage"
+@param[in,out]	field		field
+@param[in]	spatial_status	spatial status */
+UNIV_INLINE
+void
+dfield_set_spatial_status(
+	dfield_t*		field,
+	spatial_status_t	spatial_status);
+
 /*********************************************************************//**
 Sets pointer to the data and length in a field. */
 UNIV_INLINE
@@ -124,6 +144,15 @@ dfield_set_data(
 	ulint		len)	/*!< in: length or UNIV_SQL_NULL */
 	MY_ATTRIBUTE((nonnull(1)));
 /*********************************************************************//**
+Sets pointer to the data and length in a field. */
+UNIV_INLINE
+void
+dfield_write_mbr(
+/*=============*/
+	dfield_t*	field,	/*!< in: field */
+	const double*	mbr)	/*!< in: data */
+	MY_ATTRIBUTE((nonnull(1)));
+/*********************************************************************//**
 Sets a data field to SQL NULL. */
 UNIV_INLINE
 void
@@ -146,9 +175,9 @@ UNIV_INLINE
 void
 dfield_copy_data(
 /*=============*/
-	dfield_t*	field1,	/*!< out: field to copy to */
-	const dfield_t*	field2)	/*!< in: field to copy from */
-	MY_ATTRIBUTE((nonnull));
+	dfield_t*	field1,		/*!< out: field to copy to */
+	const dfield_t*	field2);	/*!< in: field to copy from */
+
 /*********************************************************************//**
 Copies a data field to another. */
 UNIV_INLINE
@@ -172,7 +201,7 @@ dfield_dup(
 Tests if two data fields are equal.
 If len==0, tests the data length and content for equality.
 If len>0, tests the first len bytes of the content for equality.
-@return	TRUE if both fields are NULL or if they are equal */
+@return TRUE if both fields are NULL or if they are equal */
 UNIV_INLINE
 ibool
 dfield_datas_are_binary_equal(
@@ -184,7 +213,7 @@ dfield_datas_are_binary_equal(
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /*********************************************************************//**
 Tests if dfield data length and content is equal to the given.
-@return	TRUE if equal */
+@return TRUE if equal */
 UNIV_INLINE
 ibool
 dfield_data_is_binary_equal(
@@ -196,29 +225,47 @@ dfield_data_is_binary_equal(
 #endif /* !UNIV_HOTBACKUP */
 /*********************************************************************//**
 Gets number of fields in a data tuple.
-@return	number of fields */
+@return number of fields */
 UNIV_INLINE
 ulint
 dtuple_get_n_fields(
 /*================*/
 	const dtuple_t*	tuple)	/*!< in: tuple */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
+/** Gets number of virtual fields in a data tuple.
+@param[in]	tuple	dtuple to check
+@return number of fields */
+UNIV_INLINE
+ulint
+dtuple_get_n_v_fields(
+	const dtuple_t*	tuple);
+
 #ifdef UNIV_DEBUG
-/*********************************************************************//**
-Gets nth field of a tuple.
-@return	nth field */
+/** Gets nth field of a tuple.
+@param[in]	tuple	tuple
+@param[in]	n	index of field
+@return nth field */
 UNIV_INLINE
 dfield_t*
 dtuple_get_nth_field(
-/*=================*/
-	const dtuple_t*	tuple,	/*!< in: tuple */
-	ulint		n);	/*!< in: index of field */
+	const dtuple_t*	tuple,
+	ulint		n);
+/** Gets nth virtual field of a tuple.
+@param[in]	tuple	tuple
+@oaran[in]	n	the nth field to get
+@return nth field */
+UNIV_INLINE
+dfield_t*
+dtuple_get_nth_v_field(
+	const dtuple_t*	tuple,
+	ulint		n);
 #else /* UNIV_DEBUG */
 # define dtuple_get_nth_field(tuple, n) ((tuple)->fields + (n))
+# define dtuple_get_nth_v_field(tuple, n) ((tuple)->fields + (tuple)->n_fields + (n))
 #endif /* UNIV_DEBUG */
 /*********************************************************************//**
 Gets info bits in a data tuple.
-@return	info bits */
+@return info bits */
 UNIV_INLINE
 ulint
 dtuple_get_info_bits(
@@ -236,7 +283,7 @@ dtuple_set_info_bits(
 	MY_ATTRIBUTE((nonnull));
 /*********************************************************************//**
 Gets number of fields used in record comparisons.
-@return	number of fields used in comparisons in rem0cmp.* */
+@return number of fields used in comparisons in rem0cmp.* */
 UNIV_INLINE
 ulint
 dtuple_get_n_fields_cmp(
@@ -259,25 +306,28 @@ creating a new dtuple_t object */
 #define DTUPLE_EST_ALLOC(n_fields)	\
 	(sizeof(dtuple_t) + (n_fields) * sizeof(dfield_t))
 
-/**********************************************************//**
-Creates a data tuple from an already allocated chunk of memory.
+/** Creates a data tuple from an already allocated chunk of memory.
 The size of the chunk must be at least DTUPLE_EST_ALLOC(n_fields).
 The default value for number of fields used in record comparisons
 for this tuple is n_fields.
-@return	created tuple (inside buf) */
+@param[in,out]	buf		buffer to use
+@param[in]	buf_size	buffer size
+@param[in]	n_fields	number of field
+@param[in]	n_v_fields	number of fields on virtual columns
+@return created tuple (inside buf) */
 UNIV_INLINE
 dtuple_t*
 dtuple_create_from_mem(
-/*===================*/
-	void*	buf,		/*!< in, out: buffer to use */
-	ulint	buf_size,	/*!< in: buffer size */
-	ulint	n_fields)	/*!< in: number of fields */
+	void*	buf,
+	ulint	buf_size,
+	ulint	n_fields,
+	ulint	n_v_fields)
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 
 /**********************************************************//**
 Creates a data tuple to a memory heap. The default value for number
 of fields used in record comparisons for this tuple is n_fields.
-@return	own: created tuple */
+@return own: created tuple */
 UNIV_INLINE
 dtuple_t*
 dtuple_create(
@@ -288,20 +338,56 @@ dtuple_create(
 	ulint		n_fields)/*!< in: number of fields */
 	MY_ATTRIBUTE((nonnull, malloc));
 
+
+/** Initialize the virtual field data in a dtuple_t
+@param[in,out]		vrow	dtuple contains the virtual fields */
+UNIV_INLINE
+void
+dtuple_init_v_fld(
+	const dtuple_t*	vrow);
+
+/** Duplicate the virtual field data in a dtuple_t
+@param[in,out]		vrow	dtuple contains the virtual fields
+@param[in]		heap	heap memory to use */
+UNIV_INLINE
+void
+dtuple_dup_v_fld(
+	const dtuple_t*	vrow,
+	mem_heap_t*	heap);
+
+/** Creates a data tuple with possible virtual columns to a memory heap.
+@param[in]	heap		memory heap where the tuple is created
+@param[in]	n_fields	number of fields
+@param[in]	n_v_fields	number of fields on virtual col
+@return own: created tuple */
+UNIV_INLINE
+dtuple_t*
+dtuple_create_with_vcol(
+	mem_heap_t*	heap,
+	ulint		n_fields,
+	ulint		n_v_fields);
+
 /*********************************************************************//**
 Sets number of fields used in a tuple. Normally this is set in
 dtuple_create, but if you want later to set it smaller, you can use this. */
-UNIV_INTERN
 void
 dtuple_set_n_fields(
 /*================*/
 	dtuple_t*	tuple,		/*!< in: tuple */
 	ulint		n_fields)	/*!< in: number of fields */
 	MY_ATTRIBUTE((nonnull));
+/** Copies a data tuple's virtaul fields to another. This is a shallow copy;
+@param[in,out]	d_tuple		destination tuple
+@param[in]	s_tuple		source tuple */
+UNIV_INLINE
+void
+dtuple_copy_v_fields(
+	dtuple_t*	d_tuple,
+	const dtuple_t*	s_tuple);
 /*********************************************************************//**
 Copies a data tuple to another.  This is a shallow copy; if a deep copy
 is desired, dfield_dup() will have to be invoked on each field.
-@return	own: copy of tuple */
+@return own: copy of tuple */
 UNIV_INLINE
 dtuple_t*
 dtuple_copy(
@@ -313,7 +399,7 @@ dtuple_copy(
 /**********************************************************//**
 The following function returns the sum of data lengths of a tuple. The space
 occupied by the field structs or the tuple struct is not counted.
-@return	sum of data lens */
+@return sum of data lens */
 UNIV_INLINE
 ulint
 dtuple_get_data_size(
@@ -323,37 +409,37 @@ dtuple_get_data_size(
 	MY_ATTRIBUTE((nonnull));
 /*********************************************************************//**
 Computes the number of externally stored fields in a data tuple.
-@return	number of fields */
+@return number of fields */
 UNIV_INLINE
 ulint
 dtuple_get_n_ext(
 /*=============*/
 	const dtuple_t*	tuple)	/*!< in: tuple */
 	MY_ATTRIBUTE((nonnull));
-/************************************************************//**
-Compare two data tuples, respecting the collation of character fields.
-@return 1, 0 , -1 if tuple1 is greater, equal, less, respectively,
-than tuple2 */
-UNIV_INTERN
+/** Compare two data tuples.
+@param[in] tuple1 first data tuple
+@param[in] tuple2 second data tuple
+@return positive, 0, negative if tuple1 is greater, equal, less, than tuple2,
+respectively */
 int
 dtuple_coll_cmp(
-/*============*/
-	const dtuple_t*	tuple1,	/*!< in: tuple 1 */
-	const dtuple_t*	tuple2)	/*!< in: tuple 2 */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
-/************************************************************//**
-Folds a prefix given as the number of fields of a tuple.
-@return	the folded value */
+	const dtuple_t*	tuple1,
+	const dtuple_t*	tuple2)
+	MY_ATTRIBUTE((warn_unused_result));
+/** Fold a prefix given as the number of fields of a tuple.
+@param[in]	tuple		index record
+@param[in]	n_fields	number of complete fields to fold
+@param[in]	n_bytes		number of bytes to fold in the last field
+@param[in]	index_id	index tree ID
+@return the folded value */
 UNIV_INLINE
 ulint
 dtuple_fold(
-/*========*/
-	const dtuple_t*	tuple,	/*!< in: the tuple */
-	ulint		n_fields,/*!< in: number of complete fields to fold */
-	ulint		n_bytes,/*!< in: number of bytes to fold in an
-				incomplete last field */
-	index_id_t	tree_id)/*!< in: index tree id */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	const dtuple_t*	tuple,
+	ulint		n_fields,
+	ulint		n_bytes,
+	index_id_t	tree_id)
+	MY_ATTRIBUTE((warn_unused_result));
 /*******************************************************************//**
 Sets types of fields binary in a tuple. */
 UNIV_INLINE
@@ -365,7 +451,7 @@ dtuple_set_types_binary(
 	MY_ATTRIBUTE((nonnull));
 /**********************************************************************//**
 Checks if a dtuple contains an SQL null value.
-@return	TRUE if some field is SQL null */
+@return TRUE if some field is SQL null */
 UNIV_INLINE
 ibool
 dtuple_contains_null(
@@ -374,8 +460,7 @@ dtuple_contains_null(
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /**********************************************************//**
 Checks that a data field is typed. Asserts an error if not.
-@return	TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
 ibool
 dfield_check_typed(
 /*===============*/
@@ -383,8 +468,7 @@ dfield_check_typed(
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /**********************************************************//**
 Checks that a data tuple is typed. Asserts an error if not.
-@return	TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
 ibool
 dtuple_check_typed(
 /*===============*/
@@ -392,8 +476,7 @@ dtuple_check_typed(
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /**********************************************************//**
 Checks that a data tuple is typed.
-@return	TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
 ibool
 dtuple_check_typed_no_assert(
 /*=========================*/
@@ -403,8 +486,7 @@ dtuple_check_typed_no_assert(
 /**********************************************************//**
 Validates the consistency of a tuple which must be complete, i.e,
 all fields must have been set.
-@return	TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
 ibool
 dtuple_validate(
 /*============*/
@@ -413,7 +495,6 @@ dtuple_validate(
 #endif /* UNIV_DEBUG */
 /*************************************************************//**
 Pretty prints a dfield value according to its data type. */
-UNIV_INTERN
 void
 dfield_print(
 /*=========*/
@@ -422,7 +503,6 @@ dfield_print(
 /*************************************************************//**
 Pretty prints a dfield value according to its data type. Also the hex string
 is printed if a string contains non-printable characters. */
-UNIV_INTERN
 void
 dfield_print_also_hex(
 /*==================*/
@@ -430,13 +510,41 @@ dfield_print_also_hex(
 	MY_ATTRIBUTE((nonnull));
 /**********************************************************//**
 The following function prints the contents of a tuple. */
-UNIV_INTERN
 void
 dtuple_print(
 /*=========*/
 	FILE*		f,	/*!< in: output stream */
 	const dtuple_t*	tuple)	/*!< in: tuple */
 	MY_ATTRIBUTE((nonnull));
+
+/** Print the contents of a tuple.
+@param[out]	o	output stream
+@param[in]	field	array of data fields
+@param[in]	n	number of data fields */
+void
+dfield_print(
+	std::ostream&	o,
+	const dfield_t*	field,
+	ulint		n);
+/** Print the contents of a tuple.
+@param[out]	o	output stream
+@param[in]	tuple	data tuple */
+void
+dtuple_print(
+	std::ostream&	o,
+	const dtuple_t*	tuple);
+
+/** Print the contents of a tuple.
+@param[out]	o	output stream
+@param[in]	tuple	data tuple */
+inline
+std::ostream&
+operator<<(std::ostream& o, const dtuple_t& tuple)
+{
+	dtuple_print(o, &tuple);
+	return(o);
+}
+
 /**************************************************************//**
 Moves parts of long fields in entry to the big record vector so that
 the size of tuple drops below the maximum record size allowed in the
@@ -445,20 +553,19 @@ to determine uniquely the insertion place of the tuple in the index.
 @return own: created big record vector, NULL if we are not able to
 shorten the entry enough, i.e., if there are too many fixed-length or
 short fields in entry or the index is clustered */
-UNIV_INTERN
 big_rec_t*
 dtuple_convert_big_rec(
 /*===================*/
 	dict_index_t*	index,	/*!< in: index */
+	upd_t*		upd,	/*!< in/out: update vector */
 	dtuple_t*	entry,	/*!< in/out: index entry */
 	ulint*		n_ext)	/*!< in/out: number of
 				externally stored columns */
-	MY_ATTRIBUTE((nonnull, malloc, warn_unused_result));
+	MY_ATTRIBUTE((malloc, warn_unused_result));
 /**************************************************************//**
 Puts back to entry the data stored in vector. Note that to ensure the
 fields in entry can accommodate the data, vector must have been created
 from entry with dtuple_convert_big_rec. */
-UNIV_INTERN
 void
 dtuple_convert_back_big_rec(
 /*========================*/
@@ -483,8 +590,17 @@ dtuple_big_rec_free(
 struct dfield_t{
 	void*		data;	/*!< pointer to data */
 	unsigned	ext:1;	/*!< TRUE=externally stored, FALSE=local */
-	unsigned	len:32;	/*!< data length; UNIV_SQL_NULL if SQL null */
+	unsigned	spatial_status:2;
+				/*!< spatial status of externally stored field
+				in undo log for purge */
+	unsigned	len;	/*!< data length; UNIV_SQL_NULL if SQL null */
 	dtype_t		type;	/*!< type of data */
+
+	/** Create a deep copy of this object
+	@param[in]	heap	the memory heap in which the clone will be
+				created.
+	@return	the cloned object. */
+	dfield_t* clone(mem_heap_t* heap);
 };
 
 /** Structure for an SQL data tuple of fields (logical record) */
@@ -502,6 +618,8 @@ struct dtuple_t {
 					default value in dtuple creation is
 					the same value as n_fields */
 	dfield_t*	fields;		/*!< fields */
+	ulint		n_v_fields;	/*!< number of virtual fields */
+	dfield_t*	v_fields;	/*!< fields on virtual column */
 	UT_LIST_NODE_T(dtuple_t) tuple_list;
 					/*!< data tuples can be linked into a
 					list using this field */
@@ -513,8 +631,20 @@ struct dtuple_t {
 #endif /* UNIV_DEBUG */
 };
 
+
 /** A slot for a field in a big rec vector */
 struct big_rec_field_t {
+
+	/** Constructor.
+	@param[in]	field_no_	the field number
+	@param[in]	len_		the data length
+	@param[in]	data_		the data */
+	big_rec_field_t(ulint field_no_, ulint len_, const void* data_)
+		: field_no(field_no_),
+		  len(len_),
+		  data(data_)
+	{}
+
 	ulint		field_no;	/*!< field number in record */
 	ulint		len;		/*!< stored data length, in bytes */
 	const void*	data;		/*!< stored data */
@@ -525,8 +655,36 @@ clustered index record which needs external storage of data fields */
 struct big_rec_t {
 	mem_heap_t*	heap;		/*!< memory heap from which
 					allocated */
+	const ulint	capacity;	/*!< fields array size */
 	ulint		n_fields;	/*!< number of stored fields */
 	big_rec_field_t*fields;		/*!< stored fields */
+
+	/** Constructor.
+	@param[in]	max	the capacity of the array of fields. */
+	explicit big_rec_t(const ulint max)
+		: heap(0),
+		  capacity(max),
+		  n_fields(0),
+		  fields(0)
+	{}
+
+	/** Append one big_rec_field_t object to the end of array of fields */
+	void append(const big_rec_field_t& field)
+	{
+		ut_ad(n_fields < capacity);
+		fields[n_fields] = field;
+		n_fields++;
+	}
+
+	/** Allocate a big_rec_t object in the given memory heap, and for
+	storing n_fld number of fields.
+	@param[in]	heap	memory heap in which this object is allocated
+	@param[in]	n_fld	maximum number of fields that can be stored in
+			this object
+	@return the allocated object */
+	static big_rec_t* alloc(
+		mem_heap_t*	heap,
+		ulint		n_fld);
 };
 
 #ifndef UNIV_NONINL
diff --git a/storage/innobase/include/data0data.ic b/storage/innobase/include/data0data.ic
index 11499ab928c..dc51735d340 100644
--- a/storage/innobase/include/data0data.ic
+++ b/storage/innobase/include/data0data.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -25,6 +25,7 @@ Created 5/30/1994 Heikki Tuuri
 
 #include "mem0mem.h"
 #include "ut0rnd.h"
+#include "btr0types.h"
 
 #ifdef UNIV_DEBUG
 /** Dummy variable to catch access to uninitialized fields.  In the
@@ -34,7 +35,7 @@ extern byte data_error;
 
 /*********************************************************************//**
 Gets pointer to the type struct of SQL data field.
-@return	pointer to the type struct */
+@return pointer to the type struct */
 UNIV_INLINE
 dtype_t*
 dfield_get_type(
@@ -65,7 +66,7 @@ dfield_set_type(
 #ifdef UNIV_DEBUG
 /*********************************************************************//**
 Gets pointer to the data in a field.
-@return	pointer to data */
+@return pointer to data */
 UNIV_INLINE
 void*
 dfield_get_data(
@@ -82,7 +83,7 @@ dfield_get_data(
 
 /*********************************************************************//**
 Gets length of field data.
-@return	length of data; UNIV_SQL_NULL if SQL null data */
+@return length of data; UNIV_SQL_NULL if SQL null data */
 UNIV_INLINE
 ulint
 dfield_get_len(
@@ -111,12 +112,12 @@ dfield_set_len(
 #endif /* UNIV_VALGRIND_DEBUG */
 
 	field->ext = 0;
-	field->len = len;
+	field->len = static_cast<unsigned int>(len);
 }
 
 /*********************************************************************//**
 Determines if a field is SQL NULL
-@return	nonzero if SQL null data */
+@return nonzero if SQL null data */
 UNIV_INLINE
 ulint
 dfield_is_null(
@@ -130,7 +131,7 @@ dfield_is_null(
 
 /*********************************************************************//**
 Determines if a field is externally stored
-@return	nonzero if externally stored */
+@return nonzero if externally stored */
 UNIV_INLINE
 ulint
 dfield_is_ext(
@@ -138,6 +139,7 @@ dfield_is_ext(
 	const dfield_t* field)	/*!< in: field */
 {
 	ut_ad(field);
+	ut_ad(!field->ext || field->len >= BTR_EXTERN_FIELD_REF_SIZE);
 
 	return(field->ext);
 }
@@ -155,6 +157,34 @@ dfield_set_ext(
 	field->ext = 1;
 }
 
+/** Gets spatial status for "external storage"
+@param[in,out]	field		field */
+UNIV_INLINE
+spatial_status_t
+dfield_get_spatial_status(
+	const dfield_t*	field)
+{
+	ut_ad(field);
+	ut_ad(dfield_is_ext(field));
+
+	return(static_cast<spatial_status_t>(field->spatial_status));
+}
+
+/** Sets spatial status for "external storage"
+@param[in,out]	field		field
+@param[in]	spatial_status	spatial status */
+UNIV_INLINE
+void
+dfield_set_spatial_status(
+	dfield_t*		field,
+	spatial_status_t	spatial_status)
+{
+	ut_ad(field);
+	ut_ad(dfield_is_ext(field));
+
+	field->spatial_status = spatial_status;
+}
+
 /*********************************************************************//**
 Sets pointer to the data and length in a field. */
 UNIV_INLINE
@@ -172,7 +202,31 @@ dfield_set_data(
 #endif /* UNIV_VALGRIND_DEBUG */
 	field->data = (void*) data;
 	field->ext = 0;
-	field->len = len;
+	field->len = static_cast<unsigned int>(len);
+}
+
+/*********************************************************************//**
+Sets pointer to the data and length in a field. */
+UNIV_INLINE
+void
+dfield_write_mbr(
+/*=============*/
+	dfield_t*	field,	/*!< in: field */
+	const double*	mbr)	/*!< in: data */
+{
+	ut_ad(field);
+
+#ifdef UNIV_VALGRIND_DEBUG
+	if (len != UNIV_SQL_NULL) UNIV_MEM_ASSERT_RW(data, len);
+#endif /* UNIV_VALGRIND_DEBUG */
+	field->ext = 0;
+
+	for (int i = 0; i < SPDIMS * 2; i++) {
+		mach_double_write(static_cast<byte*>(field->data)
+				  + i * sizeof(double), mbr[i]);
+	}
+
+	field->len = DATA_MBR_LEN;
 }
 
 /*********************************************************************//**
@@ -201,6 +255,7 @@ dfield_copy_data(
 	field1->data = field2->data;
 	field1->len = field2->len;
 	field1->ext = field2->ext;
+	field1->spatial_status = field2->spatial_status;
 }
 
 /*********************************************************************//**
@@ -235,7 +290,7 @@ dfield_dup(
 Tests if two data fields are equal.
 If len==0, tests the data length and content for equality.
 If len>0, tests the first len bytes of the content for equality.
-@return	TRUE if both fields are NULL or if they are equal */
+@return TRUE if both fields are NULL or if they are equal */
 UNIV_INLINE
 ibool
 dfield_datas_are_binary_equal(
@@ -262,7 +317,7 @@ dfield_datas_are_binary_equal(
 
 /*********************************************************************//**
 Tests if dfield data length and content is equal to the given.
-@return	TRUE if equal */
+@return TRUE if equal */
 UNIV_INLINE
 ibool
 dfield_data_is_binary_equal(
@@ -279,7 +334,7 @@ dfield_data_is_binary_equal(
 
 /*********************************************************************//**
 Gets info bits in a data tuple.
-@return	info bits */
+@return info bits */
 UNIV_INLINE
 ulint
 dtuple_get_info_bits(
@@ -307,7 +362,7 @@ dtuple_set_info_bits(
 
 /*********************************************************************//**
 Gets number of fields used in record comparisons.
-@return	number of fields used in comparisons in rem0cmp.* */
+@return number of fields used in comparisons in rem0cmp.* */
 UNIV_INLINE
 ulint
 dtuple_get_n_fields_cmp(
@@ -337,7 +392,7 @@ dtuple_set_n_fields_cmp(
 
 /*********************************************************************//**
 Gets number of fields in a data tuple.
-@return	number of fields */
+@return number of fields */
 UNIV_INLINE
 ulint
 dtuple_get_n_fields(
@@ -349,48 +404,85 @@ dtuple_get_n_fields(
 	return(tuple->n_fields);
 }
 
+/** Gets the number of virtual fields in a data tuple.
+@param[in]	tuple	dtuple to check
+@return number of fields */
+UNIV_INLINE
+ulint
+dtuple_get_n_v_fields(
+	const dtuple_t*	tuple)
+{
+	ut_ad(tuple);
+
+	return(tuple->n_v_fields);
+}
 #ifdef UNIV_DEBUG
-/*********************************************************************//**
-Gets nth field of a tuple.
-@return	nth field */
+/** Gets nth field of a tuple.
+@param[in]	tuple	tuple
+@param[in]	n	index of field
+@return nth field */
 UNIV_INLINE
 dfield_t*
 dtuple_get_nth_field(
-/*=================*/
-	const dtuple_t*	tuple,	/*!< in: tuple */
-	ulint		n)	/*!< in: index of field */
+	const dtuple_t*	tuple,
+	ulint		n)
 {
 	ut_ad(tuple);
 	ut_ad(n < tuple->n_fields);
 
 	return((dfield_t*) tuple->fields + n);
 }
+/** Gets nth virtual field of a tuple.
+@param[in]	tuple	tuple
+@oaran[in]	n	the nth field to get
+@return nth field */
+UNIV_INLINE
+dfield_t*
+dtuple_get_nth_v_field(
+	const dtuple_t*	tuple,
+	ulint		n)
+{
+	ut_ad(tuple);
+	ut_ad(n < tuple->n_v_fields);
+
+	return(static_cast<dfield_t*>(tuple->v_fields + n));
+}
 #endif /* UNIV_DEBUG */
 
-/**********************************************************//**
-Creates a data tuple from an already allocated chunk of memory.
+/** Creates a data tuple from an already allocated chunk of memory.
 The size of the chunk must be at least DTUPLE_EST_ALLOC(n_fields).
 The default value for number of fields used in record comparisons
 for this tuple is n_fields.
-@return	created tuple (inside buf) */
+@param[in,out]	buf		buffer to use
+@param[in]	buf_size	buffer size
+@param[in]	n_fields	number of field
+@param[in]	n_v_fields	number of fields on virtual columns
+@return created tuple (inside buf) */
 UNIV_INLINE
 dtuple_t*
 dtuple_create_from_mem(
-/*===================*/
-	void*	buf,		/*!< in, out: buffer to use */
-	ulint	buf_size,	/*!< in: buffer size */
-	ulint	n_fields)	/*!< in: number of fields */
+	void*	buf,
+	ulint	buf_size,
+	ulint	n_fields,
+	ulint	n_v_fields)
 {
 	dtuple_t*	tuple;
+	ulint		n_t_fields = n_fields + n_v_fields;
 
 	ut_ad(buf != NULL);
-	ut_a(buf_size >= DTUPLE_EST_ALLOC(n_fields));
+	ut_a(buf_size >= DTUPLE_EST_ALLOC(n_t_fields));
 
 	tuple = (dtuple_t*) buf;
 	tuple->info_bits = 0;
 	tuple->n_fields = n_fields;
+	tuple->n_v_fields = n_v_fields;
 	tuple->n_fields_cmp = n_fields;
 	tuple->fields = (dfield_t*) &tuple[1];
+	if (n_v_fields > 0) {
+		tuple->v_fields = &tuple->fields[n_fields];
+	} else {
+		tuple->v_fields = NULL;
+	}
 
 #ifdef UNIV_DEBUG
 	tuple->magic_n = DATA_TUPLE_MAGIC_N;
@@ -398,26 +490,61 @@ dtuple_create_from_mem(
 	{	/* In the debug version, initialize fields to an error value */
 		ulint	i;
 
-		for (i = 0; i < n_fields; i++) {
+		for (i = 0; i < n_t_fields; i++) {
 			dfield_t*       field;
 
-			field = dtuple_get_nth_field(tuple, i);
+			if (i >= n_fields) {
+				field = dtuple_get_nth_v_field(
+					tuple, i - n_fields);
+			} else {
+				field = dtuple_get_nth_field(tuple, i);
+			}
 
 			dfield_set_len(field, UNIV_SQL_NULL);
 			field->data = &data_error;
 			dfield_get_type(field)->mtype = DATA_ERROR;
+			dfield_get_type(field)->prtype = DATA_ERROR;
 		}
 	}
 #endif
-	UNIV_MEM_ASSERT_W(tuple->fields, n_fields * sizeof *tuple->fields);
-	UNIV_MEM_INVALID(tuple->fields, n_fields * sizeof *tuple->fields);
+	UNIV_MEM_ASSERT_W(tuple->fields, n_t_fields * sizeof *tuple->fields);
+	UNIV_MEM_INVALID(tuple->fields, n_t_fields * sizeof *tuple->fields);
 	return(tuple);
 }
 
+/** Duplicate the virtual field data in a dtuple_t
+@param[in,out]		vrow	dtuple contains the virtual fields
+@param[in]		heap	heap memory to use */
+UNIV_INLINE
+void
+dtuple_dup_v_fld(
+	const dtuple_t*	vrow,
+	mem_heap_t*	heap)
+{
+	for (ulint i = 0; i < vrow->n_v_fields; i++) {
+		dfield_t*       dfield = dtuple_get_nth_v_field(vrow, i);
+		dfield_dup(dfield, heap);
+	}
+}
+
+/** Initialize the virtual field data in a dtuple_t
+@param[in,out]		vrow	dtuple contains the virtual fields */
+UNIV_INLINE
+void
+dtuple_init_v_fld(
+	const dtuple_t*	vrow)
+{
+	for (ulint i = 0; i < vrow->n_v_fields; i++) {
+		dfield_t*       dfield = dtuple_get_nth_v_field(vrow, i);
+		dfield_get_type(dfield)->mtype = DATA_MISSING;
+		dfield_set_len(dfield, UNIV_SQL_NULL);
+	}
+}
+
 /**********************************************************//**
 Creates a data tuple to a memory heap. The default value for number
 of fields used in record comparisons for this tuple is n_fields.
-@return	own: created tuple */
+@return own: created tuple */
 UNIV_INLINE
 dtuple_t*
 dtuple_create(
@@ -426,6 +553,21 @@ dtuple_create(
 				is created, DTUPLE_EST_ALLOC(n_fields)
 				bytes will be allocated from this heap */
 	ulint		n_fields) /*!< in: number of fields */
+{
+	return(dtuple_create_with_vcol(heap, n_fields, 0));
+}
+
+/** Creates a data tuple with virtual columns to a memory heap.
+@param[in]	heap		memory heap where the tuple is created
+@param[in]	n_fields	number of fields
+@param[in]	n_v_fields	number of fields on virtual col
+@return own: created tuple */
+UNIV_INLINE
+dtuple_t*
+dtuple_create_with_vcol(
+	mem_heap_t*	heap,
+	ulint		n_fields,
+	ulint		n_v_fields)
 {
 	void*		buf;
 	ulint		buf_size;
@@ -433,18 +575,37 @@ dtuple_create(
 
 	ut_ad(heap);
 
-	buf_size = DTUPLE_EST_ALLOC(n_fields);
+	buf_size = DTUPLE_EST_ALLOC(n_fields + n_v_fields);
 	buf = mem_heap_alloc(heap, buf_size);
 
-	tuple = dtuple_create_from_mem(buf, buf_size, n_fields);
+	tuple = dtuple_create_from_mem(buf, buf_size, n_fields, n_v_fields);
 
 	return(tuple);
 }
 
+/** Copies a data tuple's virtual fields to another. This is a shallow copy;
+@param[in,out]	d_tuple		destination tuple
+@param[in]	s_tuple		source tuple */
+UNIV_INLINE
+void
+dtuple_copy_v_fields(
+	dtuple_t*	d_tuple,
+	const dtuple_t*	s_tuple)
+{
+
+	ulint		n_v_fields	= dtuple_get_n_v_fields(d_tuple);
+	ut_ad(n_v_fields == dtuple_get_n_v_fields(s_tuple));
+
+	for (ulint i = 0; i < n_v_fields; i++) {
+		dfield_copy(dtuple_get_nth_v_field(d_tuple, i),
+			    dtuple_get_nth_v_field(s_tuple, i));
+	}
+}
+
 /*********************************************************************//**
 Copies a data tuple to another.  This is a shallow copy; if a deep copy
 is desired, dfield_dup() will have to be invoked on each field.
-@return	own: copy of tuple */
+@return own: copy of tuple */
 UNIV_INLINE
 dtuple_t*
 dtuple_copy(
@@ -454,7 +615,9 @@ dtuple_copy(
 				where the tuple is created */
 {
 	ulint		n_fields	= dtuple_get_n_fields(tuple);
-	dtuple_t*	new_tuple	= dtuple_create(heap, n_fields);
+	ulint		n_v_fields	= dtuple_get_n_v_fields(tuple);
+	dtuple_t*	new_tuple	= dtuple_create_with_vcol(
+						heap, n_fields, n_v_fields);
 	ulint		i;
 
 	for (i = 0; i < n_fields; i++) {
@@ -462,6 +625,11 @@ dtuple_copy(
 			    dtuple_get_nth_field(tuple, i));
 	}
 
+	for (i = 0; i < n_v_fields; i++) {
+		dfield_copy(dtuple_get_nth_v_field(new_tuple, i),
+			    dtuple_get_nth_v_field(tuple, i));
+	}
+
 	return(new_tuple);
 }
 
@@ -469,7 +637,7 @@ dtuple_copy(
 The following function returns the sum of data lengths of a tuple. The space
 occupied by the field structs or the tuple struct is not counted. Neither
 is possible space in externally stored parts of the field.
-@return	sum of data lengths */
+@return sum of data lengths */
 UNIV_INLINE
 ulint
 dtuple_get_data_size(
@@ -506,7 +674,7 @@ dtuple_get_data_size(
 
 /*********************************************************************//**
 Computes the number of externally stored fields in a data tuple.
-@return	number of externally stored fields */
+@return number of externally stored fields */
 UNIV_INLINE
 ulint
 dtuple_get_n_ext(
@@ -546,18 +714,19 @@ dtuple_set_types_binary(
 	}
 }
 
-/************************************************************//**
-Folds a prefix given as the number of fields of a tuple.
-@return	the folded value */
+/** Fold a prefix given as the number of fields of a tuple.
+@param[in]	tuple		index record
+@param[in]	n_fields	number of complete fields to fold
+@param[in]	n_bytes		number of bytes to fold in the last field
+@param[in]	index_id	index tree ID
+@return the folded value */
 UNIV_INLINE
 ulint
 dtuple_fold(
-/*========*/
-	const dtuple_t*	tuple,	/*!< in: the tuple */
-	ulint		n_fields,/*!< in: number of complete fields to fold */
-	ulint		n_bytes,/*!< in: number of bytes to fold in an
-				incomplete last field */
-	index_id_t	tree_id)/*!< in: index tree id */
+	const dtuple_t*	tuple,
+	ulint		n_fields,
+	ulint		n_bytes,
+	index_id_t	tree_id)
 {
 	const dfield_t*	field;
 	ulint		i;
@@ -616,7 +785,7 @@ data_write_sql_null(
 
 /**********************************************************************//**
 Checks if a dtuple contains an SQL null value.
-@return	TRUE if some field is SQL null */
+@return TRUE if some field is SQL null */
 UNIV_INLINE
 ibool
 dtuple_contains_null(
diff --git a/storage/innobase/include/data0type.h b/storage/innobase/include/data0type.h
index 111664b0b52..00073dfca2c 100644
--- a/storage/innobase/include/data0type.h
+++ b/storage/innobase/include/data0type.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -29,19 +29,15 @@ Created 1/16/1996 Heikki Tuuri
 #include "univ.i"
 
 extern ulint	data_mysql_default_charset_coll;
-#define DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL 8
 #define DATA_MYSQL_BINARY_CHARSET_COLL 63
 
 /* SQL data type struct */
 struct dtype_t;
 
-/* SQL Like operator comparison types */
+/** SQL Like operator comparison types */
 enum ib_like_t {
-	IB_LIKE_EXACT,                  /* e.g.  STRING */
-	IB_LIKE_PREFIX,                 /* e.g., STRING% */
-	IB_LIKE_SUFFIX,                 /* e.g., %STRING */
-	IB_LIKE_SUBSTR,                 /* e.g., %STRING% */
-	IB_LIKE_REGEXP                  /* Future */
+	IB_LIKE_EXACT,	/**< e.g.  STRING */
+	IB_LIKE_PREFIX	/**< e.g., STRING% */
 };
 
 /*-------------------------------------------*/
@@ -79,8 +75,29 @@ binary strings */
 				DATA_VARMYSQL for all character sets, and the
 				charset-collation for tables created with it
 				can also be latin1_swedish_ci */
+
+/* DATA_POINT&DATA_VAR_POINT are for standard geometry datatype 'point' and
+DATA_GEOMETRY include all other standard geometry datatypes as described in
+OGC standard(line_string, polygon, multi_point, multi_polygon,
+multi_line_string, geometry_collection, geometry).
+Currently, geometry data is stored in the standard Well-Known Binary(WKB)
+format (http://www.opengeospatial.org/standards/sfa).
+We use BLOB as underlying datatype for DATA_GEOMETRY and DATA_VAR_POINT
+while CHAR for DATA_POINT */
+#define DATA_GEOMETRY	14	/* geometry datatype of variable length */
+/* The following two are disabled temporarily, we won't create them in
+get_innobase_type_from_mysql_type().
+TODO: We will enable DATA_POINT/them when we come to the fixed-length POINT
+again. */
+#define DATA_POINT	15	/* geometry datatype of fixed length POINT */
+#define DATA_VAR_POINT	16	/* geometry datatype of variable length
+				POINT, used when we want to store POINT
+				as BLOB internally */
 #define DATA_MTYPE_MAX	63	/* dtype_store_for_order_and_null_size()
 				requires the values are <= 63 */
+
+#define DATA_MTYPE_CURRENT_MIN	DATA_VARCHAR	/* minimum value of mtype */
+#define DATA_MTYPE_CURRENT_MAX	DATA_VAR_POINT	/* maximum value of mtype */
 /*-------------------------------------------*/
 /* The 'PRECISE TYPE' of a column */
 /*
@@ -149,6 +166,10 @@ be less than 256 */
 
 #define	DATA_N_SYS_COLS 3	/* number of system columns defined above */
 
+#define	DATA_ITT_N_SYS_COLS	2
+				/* number of system columns for intrinsic
+				temporary table */
+
 #define DATA_FTS_DOC_ID	3	/* Used as FTS DOC ID column */
 
 #define DATA_SYS_PRTYPE_MASK 0xF /* mask to extract the above from prtype */
@@ -166,10 +187,15 @@ be less than 256 */
 				In earlier versions this was set for some
 				BLOB columns.
 */
+#define DATA_GIS_MBR	2048	/* Used as GIS MBR column */
+#define DATA_MBR_LEN	SPDIMS * 2 * sizeof(double) /* GIS MBR length*/
+
 #define	DATA_LONG_TRUE_VARCHAR 4096	/* this is ORed to the precise data
 				type when the column is true VARCHAR where
 				MySQL uses 2 bytes to store the data len;
 				for shorter VARCHARs MySQL uses only 1 byte */
+#define	DATA_VIRTUAL	8192	/* Virtual column */
+
 /*-------------------------------------------*/
 
 /* This many bytes we need to store the type information affecting the
@@ -183,6 +209,15 @@ store the charset-collation number; one byte is left unused, though */
 /* Maximum multi-byte character length in bytes, plus 1 */
 #define DATA_MBMAX	5
 
+/* For DATA_POINT of dimension 2, the length of value in btree is always 25,
+which is the summary of:
+SRID_SIZE(4) + WKB_HEADER_SIZE(1+4) + POINT_DATA_SIZE(8*2).
+So the length of physical record or POINT KEYs on btree are 25.
+GIS_TODO: When we support multi-dimensions DATA_POINT, we should get the
+length from corresponding column or index definition, instead of this MACRO
+*/
+#define DATA_POINT_LEN	25
+
 /* Pack mbminlen, mbmaxlen to mbminmaxlen. */
 #define DATA_MBMINMAXLEN(mbminlen, mbmaxlen)	\
 	((mbmaxlen) * DATA_MBMAX + (mbminlen))
@@ -194,6 +229,30 @@ because in GCC it returns a long. */
 /* Get mbmaxlen from mbminmaxlen. */
 #define DATA_MBMAXLEN(mbminmaxlen) ((ulint) ((mbminmaxlen) / DATA_MBMAX))
 
+/* For checking if a geom_type is POINT */
+#define DATA_POINT_MTYPE(mtype) ((mtype) == DATA_POINT			\
+				 || (mtype) == DATA_VAR_POINT)
+
+/* For checking if mtype is GEOMETRY datatype */
+#define DATA_GEOMETRY_MTYPE(mtype)	(DATA_POINT_MTYPE(mtype)	\
+					 || (mtype) == DATA_GEOMETRY)
+
+/* For checking if mtype is BLOB or GEOMETRY, since we use BLOB as
+the underling datatype of GEOMETRY(not DATA_POINT) data. */
+#define DATA_LARGE_MTYPE(mtype) ((mtype) == DATA_BLOB			\
+				 || (mtype) == DATA_VAR_POINT		\
+				 || (mtype) == DATA_GEOMETRY)
+
+/* For checking if data type is big length data type. */
+#define DATA_BIG_LEN_MTYPE(len, mtype) ((len) > 255 || DATA_LARGE_MTYPE(mtype))
+
+/* For checking if the column is a big length column. */
+#define DATA_BIG_COL(col) DATA_BIG_LEN_MTYPE((col)->len, (col)->mtype)
+
+/* For checking if data type is large binary data type. */
+#define DATA_LARGE_BINARY(mtype,prtype) ((mtype) == DATA_GEOMETRY || \
+	((mtype) == DATA_BLOB && !((prtype) & DATA_BINARY_TYPE)))
+
 /* We now support 15 bits (up to 32767) collation number */
 #define MAX_CHAR_COLL_NUM	32767
 
@@ -203,7 +262,7 @@ because in GCC it returns a long. */
 #ifndef UNIV_HOTBACKUP
 /*********************************************************************//**
 Gets the MySQL type code from a dtype.
-@return	MySQL type code; this is NOT an InnoDB type code! */
+@return MySQL type code; this is NOT an InnoDB type code! */
 UNIV_INLINE
 ulint
 dtype_get_mysql_type(
@@ -213,8 +272,7 @@ dtype_get_mysql_type(
 Determine how many bytes the first n characters of the given string occupy.
 If the string is shorter than n characters, returns the number of bytes
 the characters in the string occupy.
-@return	length of the prefix, in bytes */
-UNIV_INTERN
+@return length of the prefix, in bytes */
 ulint
 dtype_get_at_most_n_mbchars(
 /*========================*/
@@ -231,8 +289,7 @@ dtype_get_at_most_n_mbchars(
 /*********************************************************************//**
 Checks if a data main type is a string type. Also a BLOB is considered a
 string type.
-@return	TRUE if string type */
-UNIV_INTERN
+@return TRUE if string type */
 ibool
 dtype_is_string_type(
 /*=================*/
@@ -241,8 +298,7 @@ dtype_is_string_type(
 Checks if a type is a binary string type. Note that for tables created with
 < 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column. For
 those DATA_BLOB columns this function currently returns FALSE.
-@return	TRUE if binary string type */
-UNIV_INTERN
+@return TRUE if binary string type */
 ibool
 dtype_is_binary_string_type(
 /*========================*/
@@ -253,8 +309,7 @@ Checks if a type is a non-binary string type. That is, dtype_is_string_type is
 TRUE and dtype_is_binary_string_type is FALSE. Note that for tables created
 with < 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column.
 For those DATA_BLOB columns this function currently returns TRUE.
-@return	TRUE if non-binary string type */
-UNIV_INTERN
+@return TRUE if non-binary string type */
 ibool
 dtype_is_non_binary_string_type(
 /*============================*/
@@ -280,7 +335,7 @@ dtype_copy(
 	const dtype_t*	type2);	/*!< in: type struct to copy from */
 /*********************************************************************//**
 Gets the SQL main data type.
-@return	SQL main data type */
+@return SQL main data type */
 UNIV_INLINE
 ulint
 dtype_get_mtype(
@@ -288,7 +343,7 @@ dtype_get_mtype(
 	const dtype_t*	type);	/*!< in: data type */
 /*********************************************************************//**
 Gets the precise data type.
-@return	precise data type */
+@return precise data type */
 UNIV_INLINE
 ulint
 dtype_get_prtype(
@@ -309,7 +364,7 @@ dtype_get_mblen(
 				multi-byte character */
 /*********************************************************************//**
 Gets the MySQL charset-collation code for MySQL string types.
-@return	MySQL charset-collation code */
+@return MySQL charset-collation code */
 UNIV_INLINE
 ulint
 dtype_get_charset_coll(
@@ -319,7 +374,6 @@ dtype_get_charset_coll(
 Forms a precise type from the < 4.1.2 format precise type plus the
 charset-collation code.
 @return precise type, including the charset-collation code */
-UNIV_INTERN
 ulint
 dtype_form_prtype(
 /*==============*/
@@ -330,7 +384,7 @@ dtype_form_prtype(
 Determines if a MySQL string type is a subset of UTF-8.  This function
 may return false negatives, in case further character-set collation
 codes are introduced in MySQL later.
-@return	TRUE if a subset of UTF-8 */
+@return TRUE if a subset of UTF-8 */
 UNIV_INLINE
 ibool
 dtype_is_utf8(
@@ -339,7 +393,7 @@ dtype_is_utf8(
 #endif /* !UNIV_HOTBACKUP */
 /*********************************************************************//**
 Gets the type length.
-@return	fixed length of the type, in bytes, or 0 if variable-length */
+@return fixed length of the type, in bytes, or 0 if variable-length */
 UNIV_INLINE
 ulint
 dtype_get_len(
@@ -377,19 +431,10 @@ dtype_set_mbminmaxlen(
 	ulint		mbmaxlen);	/*!< in: maximum length of a char,
 					in bytes, or 0 if this is not
 					a character type */
-/*********************************************************************//**
-Gets the padding character code for the type.
-@return	padding character code, or ULINT_UNDEFINED if no padding specified */
-UNIV_INLINE
-ulint
-dtype_get_pad_char(
-/*===============*/
-	ulint	mtype,		/*!< in: main type */
-	ulint	prtype);	/*!< in: precise type */
 #endif /* !UNIV_HOTBACKUP */
 /***********************************************************************//**
 Returns the size of a fixed size data type, 0 if not a fixed size type.
-@return	fixed size, or 0 */
+@return fixed size, or 0 */
 UNIV_INLINE
 ulint
 dtype_get_fixed_size_low(
@@ -403,7 +448,7 @@ dtype_get_fixed_size_low(
 #ifndef UNIV_HOTBACKUP
 /***********************************************************************//**
 Returns the minimum size of a data type.
-@return	minimum size */
+@return minimum size */
 UNIV_INLINE
 ulint
 dtype_get_min_size_low(
@@ -416,7 +461,7 @@ dtype_get_min_size_low(
 /***********************************************************************//**
 Returns the maximum size of a data type. Note: types in system tables may be
 incomplete and return incorrect information.
-@return	maximum size */
+@return maximum size */
 UNIV_INLINE
 ulint
 dtype_get_max_size_low(
@@ -427,7 +472,7 @@ dtype_get_max_size_low(
 /***********************************************************************//**
 Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a type.
 For fixed length types it is the fixed length of the type, otherwise 0.
-@return	SQL null storage size in ROW_FORMAT=REDUNDANT */
+@return SQL null storage size in ROW_FORMAT=REDUNDANT */
 UNIV_INLINE
 ulint
 dtype_get_sql_null_size(
@@ -486,15 +531,13 @@ dtype_sql_name(
 
 /*********************************************************************//**
 Validates a data type structure.
-@return	TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
 ibool
 dtype_validate(
 /*===========*/
 	const dtype_t*	type);	/*!< in: type struct to validate */
 /*********************************************************************//**
 Prints a data type structure. */
-UNIV_INTERN
 void
 dtype_print(
 /*========*/
diff --git a/storage/innobase/include/data0type.ic b/storage/innobase/include/data0type.ic
index d489bef89a8..57770ec0e17 100644
--- a/storage/innobase/include/data0type.ic
+++ b/storage/innobase/include/data0type.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -23,15 +23,13 @@ Data types
 Created 1/16/1996 Heikki Tuuri
 *******************************************************/
 
-#include <string.h> /* strlen() */
-
 #include "mach0data.h"
 #ifndef UNIV_HOTBACKUP
 # include "ha_prototypes.h"
 
 /*********************************************************************//**
 Gets the MySQL charset-collation code for MySQL string types.
-@return	MySQL charset-collation code */
+@return MySQL charset-collation code */
 UNIV_INLINE
 ulint
 dtype_get_charset_coll(
@@ -45,7 +43,7 @@ dtype_get_charset_coll(
 Determines if a MySQL string type is a subset of UTF-8.  This function
 may return false negatives, in case further character-set collation
 codes are introduced in MySQL later.
-@return	TRUE if a subset of UTF-8 */
+@return TRUE if a subset of UTF-8 */
 UNIV_INLINE
 ibool
 dtype_is_utf8(
@@ -68,7 +66,7 @@ dtype_is_utf8(
 
 /*********************************************************************//**
 Gets the MySQL type code from a dtype.
-@return	MySQL type code; this is NOT an InnoDB type code! */
+@return MySQL type code; this is NOT an InnoDB type code! */
 UNIV_INLINE
 ulint
 dtype_get_mysql_type(
@@ -180,7 +178,7 @@ dtype_copy(
 
 /*********************************************************************//**
 Gets the SQL main data type.
-@return	SQL main data type */
+@return SQL main data type */
 UNIV_INLINE
 ulint
 dtype_get_mtype(
@@ -194,7 +192,7 @@ dtype_get_mtype(
 
 /*********************************************************************//**
 Gets the precise data type.
-@return	precise data type */
+@return precise data type */
 UNIV_INLINE
 ulint
 dtype_get_prtype(
@@ -208,7 +206,7 @@ dtype_get_prtype(
 
 /*********************************************************************//**
 Gets the type length.
-@return	fixed length of the type, in bytes, or 0 if variable-length */
+@return fixed length of the type, in bytes, or 0 if variable-length */
 UNIV_INLINE
 ulint
 dtype_get_len(
@@ -248,45 +246,6 @@ dtype_get_mbmaxlen(
 	return(DATA_MBMAXLEN(type->mbminmaxlen));
 }
 
-/*********************************************************************//**
-Gets the padding character code for a type.
-@return	padding character code, or ULINT_UNDEFINED if no padding specified */
-UNIV_INLINE
-ulint
-dtype_get_pad_char(
-/*===============*/
-	ulint	mtype,		/*!< in: main type */
-	ulint	prtype)		/*!< in: precise type */
-{
-	switch (mtype) {
-	case DATA_FIXBINARY:
-	case DATA_BINARY:
-		if (dtype_get_charset_coll(prtype)
-		    == DATA_MYSQL_BINARY_CHARSET_COLL) {
-			/* Starting from 5.0.18, do not pad
-			VARBINARY or BINARY columns. */
-			return(ULINT_UNDEFINED);
-		}
-		/* Fall through */
-	case DATA_CHAR:
-	case DATA_VARCHAR:
-	case DATA_MYSQL:
-	case DATA_VARMYSQL:
-		/* Space is the padding character for all char and binary
-		strings, and starting from 5.0.3, also for TEXT strings. */
-
-		return(0x20);
-	case DATA_BLOB:
-		if (!(prtype & DATA_BINARY_TYPE)) {
-			return(0x20);
-		}
-		/* Fall through */
-	default:
-		/* No padding specified */
-		return(ULINT_UNDEFINED);
-	}
-}
-
 /**********************************************************************//**
 Stores for a type the information which determines its alphabetical ordering
 and the storage size of an SQL NULL value. This is the >= 4.1.x storage
@@ -309,7 +268,7 @@ dtype_new_store_for_order_and_null_size(
 
 	ut_ad(type);
 	ut_ad(type->mtype >= DATA_VARCHAR);
-	ut_ad(type->mtype <= DATA_MYSQL);
+	ut_ad(type->mtype <= DATA_MTYPE_MAX);
 
 	buf[0] = (byte)(type->mtype & 0xFFUL);
 
@@ -483,6 +442,9 @@ dtype_sql_name(
 	case DATA_BINARY:
 		ut_snprintf(name, name_sz, "VARBINARY(%u)", len);
 		break;
+	case DATA_GEOMETRY:
+		ut_snprintf(name, name_sz, "GEOMETRY");
+		break;
 	case DATA_BLOB:
 		switch (len) {
 		case 9:
@@ -513,7 +475,7 @@ dtype_sql_name(
 
 /***********************************************************************//**
 Returns the size of a fixed size data type, 0 if not a fixed size type.
-@return	fixed size, or 0 */
+@return fixed size, or 0 */
 UNIV_INLINE
 ulint
 dtype_get_fixed_size_low(
@@ -548,6 +510,7 @@ dtype_get_fixed_size_low(
 	case DATA_INT:
 	case DATA_FLOAT:
 	case DATA_DOUBLE:
+	case DATA_POINT:
 		return(len);
 	case DATA_MYSQL:
 #ifndef UNIV_HOTBACKUP
@@ -579,6 +542,8 @@ dtype_get_fixed_size_low(
 	case DATA_BINARY:
 	case DATA_DECIMAL:
 	case DATA_VARMYSQL:
+	case DATA_VAR_POINT:
+	case DATA_GEOMETRY:
 	case DATA_BLOB:
 		return(0);
 	default:
@@ -591,7 +556,7 @@ dtype_get_fixed_size_low(
 #ifndef UNIV_HOTBACKUP
 /***********************************************************************//**
 Returns the minimum size of a data type.
-@return	minimum size */
+@return minimum size */
 UNIV_INLINE
 ulint
 dtype_get_min_size_low(
@@ -625,6 +590,7 @@ dtype_get_min_size_low(
 	case DATA_INT:
 	case DATA_FLOAT:
 	case DATA_DOUBLE:
+	case DATA_POINT:
 		return(len);
 	case DATA_MYSQL:
 		if (prtype & DATA_BINARY_TYPE) {
@@ -647,6 +613,8 @@ dtype_get_min_size_low(
 	case DATA_BINARY:
 	case DATA_DECIMAL:
 	case DATA_VARMYSQL:
+	case DATA_VAR_POINT:
+	case DATA_GEOMETRY:
 	case DATA_BLOB:
 		return(0);
 	default:
@@ -659,7 +627,7 @@ dtype_get_min_size_low(
 /***********************************************************************//**
 Returns the maximum size of a data type. Note: types in system tables may be
 incomplete and return incorrect information.
-@return	maximum size */
+@return maximum size */
 UNIV_INLINE
 ulint
 dtype_get_max_size_low(
@@ -679,7 +647,10 @@ dtype_get_max_size_low(
 	case DATA_BINARY:
 	case DATA_DECIMAL:
 	case DATA_VARMYSQL:
+	case DATA_POINT:
 		return(len);
+	case DATA_VAR_POINT:
+	case DATA_GEOMETRY:
 	case DATA_BLOB:
 		break;
 	default:
@@ -693,7 +664,7 @@ dtype_get_max_size_low(
 /***********************************************************************//**
 Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a type.
 For fixed length types it is the fixed length of the type, otherwise 0.
-@return	SQL null storage size in ROW_FORMAT=REDUNDANT */
+@return SQL null storage size in ROW_FORMAT=REDUNDANT */
 UNIV_INLINE
 ulint
 dtype_get_sql_null_size(
diff --git a/storage/innobase/include/db0err.h b/storage/innobase/include/db0err.h
index feac81af98e..32f9117af84 100644
--- a/storage/innobase/include/db0err.h
+++ b/storage/innobase/include/db0err.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2015, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
@@ -27,6 +27,7 @@ Created 5/24/1996 Heikki Tuuri
 #ifndef db0err_h
 #define db0err_h
 
+/* Do not include univ.i because univ.i includes this. */
 
 enum dberr_t {
 	DB_SUCCESS_LOCKED_REC = 9,	/*!< like DB_SUCCESS, but a new
@@ -64,7 +65,8 @@ enum dberr_t {
 					which is referenced */
 	DB_CANNOT_ADD_CONSTRAINT,	/*!< adding a foreign key constraint
 					to a table failed */
-	DB_CORRUPTION,			/*!< data structure corruption noticed */
+	DB_CORRUPTION,			/*!< data structure corruption
+					noticed */
 	DB_CANNOT_DROP_CONSTRAINT,	/*!< dropping a foreign key constraint
 					from a table failed */
 	DB_NO_SAVEPOINT,		/*!< no savepoint exists with the given
@@ -124,20 +126,60 @@ enum dberr_t {
 	DB_ONLINE_LOG_TOO_BIG,		/*!< Modification log grew too big
 					during online index creation */
 
-	DB_IO_ERROR,			/*!< Generic IO error */
 	DB_IDENTIFIER_TOO_LONG,		/*!< Identifier name too long */
 	DB_FTS_EXCEED_RESULT_CACHE_LIMIT,	/*!< FTS query memory
 					exceeds result cache limit */
-	DB_TEMP_FILE_WRITE_FAILURE,	/*!< Temp file write failure */
+	DB_TEMP_FILE_WRITE_FAIL,	/*!< Temp file write failure */
+	DB_CANT_CREATE_GEOMETRY_OBJECT,	/*!< Cannot create specified Geometry
+					data object */
+	DB_CANNOT_OPEN_FILE,		/*!< Cannot open a file */
 	DB_FTS_TOO_MANY_WORDS_IN_PHRASE,
 					/*< Too many words in a phrase */
-	DB_TOO_BIG_FOR_REDO,		/* Record length greater than 10%
-					of redo log */
+
+	DB_TABLESPACE_TRUNCATED,	/*!< tablespace was truncated */
+
 	DB_DECRYPTION_FAILED,		/* Tablespace encrypted and
 					decrypt operation failed because
 					of missing key management plugin,
 					or missing or incorrect key or
 					incorret AES method or algorithm. */
+
+	DB_IO_ERROR = 100,		/*!< Generic IO error */
+
+	DB_IO_DECOMPRESS_FAIL,		/*!< Failure to decompress a page
+					after reading it from disk */
+
+	DB_IO_NO_PUNCH_HOLE,		/*!< Punch hole not supported by
+					InnoDB */
+
+	DB_IO_NO_PUNCH_HOLE_FS,		/*!< The file system doesn't support
+					punch hole */
+
+	DB_IO_NO_PUNCH_HOLE_TABLESPACE,	/*!< The tablespace doesn't support
+					punch hole */
+
+	DB_IO_DECRYPT_FAIL,		/*!< Failure to decrypt a page
+					after reading it from disk */
+
+	DB_IO_NO_ENCRYPT_TABLESPACE,	/*!< The tablespace doesn't support
+					encrypt */
+
+	DB_IO_PARTIAL_FAILED,		/*!< Partial IO request failed */
+
+	DB_FORCED_ABORT,		/*!< Transaction was forced to rollback
+					by a higher priority transaction */
+
+	DB_TABLE_CORRUPT,		/*!< Table/clustered index is
+					corrupted */
+
+	DB_WRONG_FILE_NAME,		/*!< Invalid Filename */
+
+	DB_COMPUTE_VALUE_FAILED,	/*!< Compute generated value failed */
+
+	DB_NO_FK_ON_S_BASE_COL,		/*!< Cannot add foreign constrain
+					placed on the base column of
+					stored column */
+
 	/* The following are partial failure codes */
 	DB_FAIL = 1000,
 	DB_OVERFLOW,
@@ -146,22 +188,18 @@ enum dberr_t {
 	DB_ZIP_OVERFLOW,
 	DB_RECORD_NOT_FOUND = 1500,
 	DB_END_OF_INDEX,
-	DB_DICT_CHANGED,		/*!< Some part of table dictionary has
-					changed. Such as index dropped or
-					foreign key dropped */
+	DB_NOT_FOUND,			/*!< Generic error code for "Not found"
+					type of errors */
 
-
-        /* The following are API only error codes. */
+	/* The following are API only error codes. */
 	DB_DATA_MISMATCH = 2000,	/*!< Column update or read failed
 					because the types mismatch */
 
-	DB_SCHEMA_NOT_LOCKED,		/*!< If an API function expects the
+	DB_SCHEMA_NOT_LOCKED		/*!< If an API function expects the
 					schema to be locked in exclusive mode
 					and if it's not then that API function
 					will return this error code */
 
-	DB_NOT_FOUND			/*!< Generic error code for "Not found"
-					type of errors */
 };
 
 #endif
diff --git a/storage/innobase/include/dict0boot.h b/storage/innobase/include/dict0boot.h
index 477e1150f43..5884ba4bcc2 100644
--- a/storage/innobase/include/dict0boot.h
+++ b/storage/innobase/include/dict0boot.h
@@ -39,41 +39,42 @@ typedef	byte	dict_hdr_t;
 
 /**********************************************************************//**
 Gets a pointer to the dictionary header and x-latches its page.
-@return	pointer to the dictionary header, page x-latched */
-UNIV_INTERN
+@return pointer to the dictionary header, page x-latched */
 dict_hdr_t*
 dict_hdr_get(
 /*=========*/
 	mtr_t*	mtr);	/*!< in: mtr */
 /**********************************************************************//**
 Returns a new table, index, or space id. */
-UNIV_INTERN
 void
 dict_hdr_get_new_id(
 /*================*/
-	table_id_t*	table_id,	/*!< out: table id
-					(not assigned if NULL) */
-	index_id_t*	index_id,	/*!< out: index id
-					(not assigned if NULL) */
-	ulint*		space_id);	/*!< out: space id
-					(not assigned if NULL) */
+	table_id_t*		table_id,	/*!< out: table id
+						(not assigned if NULL) */
+	index_id_t*		index_id,	/*!< out: index id
+						(not assigned if NULL) */
+	ulint*			space_id,	/*!< out: space id
+						(not assigned if NULL) */
+	const dict_table_t*	table,		/*!< in: table */
+	bool			disable_redo);	/*!< in: if true and table
+						object is NULL
+						then disable-redo */
 /**********************************************************************//**
 Writes the current value of the row id counter to the dictionary header file
 page. */
-UNIV_INTERN
 void
 dict_hdr_flush_row_id(void);
 /*=======================*/
 /**********************************************************************//**
 Returns a new row id.
-@return	the new id */
+@return the new id */
 UNIV_INLINE
 row_id_t
 dict_sys_get_new_row_id(void);
 /*=========================*/
 /**********************************************************************//**
 Reads a row id from a record or other 6-byte stored form.
-@return	row id */
+@return row id */
 UNIV_INLINE
 row_id_t
 dict_sys_read_row_id(
@@ -91,7 +92,6 @@ dict_sys_write_row_id(
 Initializes the data dictionary memory structures when the database is
 started. This function is also called when the data dictionary is created.
 @return DB_SUCCESS or error code. */
-UNIV_INTERN
 dberr_t
 dict_boot(void)
 /*===========*/
@@ -100,7 +100,6 @@ dict_boot(void)
 /*****************************************************************//**
 Creates and initializes the data dictionary at the server bootstrap.
 @return DB_SUCCESS or error code. */
-UNIV_INTERN
 dberr_t
 dict_create(void)
 /*=============*/
@@ -221,7 +220,8 @@ enum dict_col_sys_indexes_enum {
 	DICT_COL__SYS_INDEXES__TYPE		= 4,
 	DICT_COL__SYS_INDEXES__SPACE		= 5,
 	DICT_COL__SYS_INDEXES__PAGE_NO		= 6,
-	DICT_NUM_COLS__SYS_INDEXES		= 7
+	DICT_COL__SYS_INDEXES__MERGE_THRESHOLD	= 7,
+	DICT_NUM_COLS__SYS_INDEXES		= 8
 };
 /* The field numbers in the SYS_INDEXES clustered index */
 enum dict_fld_sys_indexes_enum {
@@ -234,7 +234,8 @@ enum dict_fld_sys_indexes_enum {
 	DICT_FLD__SYS_INDEXES__TYPE		= 6,
 	DICT_FLD__SYS_INDEXES__SPACE		= 7,
 	DICT_FLD__SYS_INDEXES__PAGE_NO		= 8,
-	DICT_NUM_FIELDS__SYS_INDEXES		= 9
+	DICT_FLD__SYS_INDEXES__MERGE_THRESHOLD	= 9,
+	DICT_NUM_FIELDS__SYS_INDEXES		= 10
 };
 /* The columns in SYS_FIELDS */
 enum dict_col_sys_fields_enum {
@@ -325,6 +326,23 @@ enum dict_fld_sys_datafiles_enum {
 	DICT_NUM_FIELDS__SYS_DATAFILES			= 4
 };
 
+/* The columns in SYS_VIRTUAL */
+enum dict_col_sys_virtual_enum {
+	DICT_COL__SYS_VIRTUAL__TABLE_ID		= 0,
+	DICT_COL__SYS_VIRTUAL__POS		= 1,
+	DICT_COL__SYS_VIRTUAL__BASE_POS		= 2,
+	DICT_NUM_COLS__SYS_VIRTUAL		= 3
+};
+/* The field numbers in the SYS_VIRTUAL clustered index */
+enum dict_fld_sys_virtual_enum {
+	DICT_FLD__SYS_VIRTUAL__TABLE_ID		= 0,
+	DICT_FLD__SYS_VIRTUAL__POS		= 1,
+	DICT_FLD__SYS_VIRTUAL__BASE_POS		= 2,
+	DICT_FLD__SYS_VIRTUAL__DB_TRX_ID	= 3,
+	DICT_FLD__SYS_VIRTUAL__DB_ROLL_PTR	= 4,
+	DICT_NUM_FIELDS__SYS_VIRTUAL		= 5
+};
+
 /* A number of the columns above occur in multiple tables.  These are the
 length of thos fields. */
 #define	DICT_FLD_LEN_SPACE	4
diff --git a/storage/innobase/include/dict0boot.ic b/storage/innobase/include/dict0boot.ic
index 2b156a4f672..e40c3f844e3 100644
--- a/storage/innobase/include/dict0boot.ic
+++ b/storage/innobase/include/dict0boot.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -25,7 +25,7 @@ Created 4/18/1996 Heikki Tuuri
 
 /**********************************************************************//**
 Returns a new row id.
-@return	the new id */
+@return the new id */
 UNIV_INLINE
 row_id_t
 dict_sys_get_new_row_id(void)
@@ -33,7 +33,7 @@ dict_sys_get_new_row_id(void)
 {
 	row_id_t	id;
 
-	mutex_enter(&(dict_sys->mutex));
+	mutex_enter(&dict_sys->mutex);
 
 	id = dict_sys->row_id;
 
@@ -44,14 +44,14 @@ dict_sys_get_new_row_id(void)
 
 	dict_sys->row_id++;
 
-	mutex_exit(&(dict_sys->mutex));
+	mutex_exit(&dict_sys->mutex);
 
 	return(id);
 }
 
 /**********************************************************************//**
 Reads a row id from a record or other 6-byte stored form.
-@return	row id */
+@return row id */
 UNIV_INLINE
 row_id_t
 dict_sys_read_row_id(
diff --git a/storage/innobase/include/dict0crea.h b/storage/innobase/include/dict0crea.h
index 150c76b2e65..f9ef39fa8c6 100644
--- a/storage/innobase/include/dict0crea.h
+++ b/storage/innobase/include/dict0crea.h
@@ -33,97 +33,155 @@ Created 1/8/1996 Heikki Tuuri
 #include "row0types.h"
 #include "mtr0mtr.h"
 #include "fil0crypt.h"
+#include "fsp0space.h"
 
 /*********************************************************************//**
 Creates a table create graph.
-@return	own: table create node */
-UNIV_INTERN
+@return own: table create node */
 tab_node_t*
 tab_create_graph_create(
 /*====================*/
-	dict_table_t*	table,	/*!< in: table to create, built as a memory data
-				structure */
-	mem_heap_t*	heap,	/*!< in: heap where created */
-	bool		commit, /*!< in: true if the commit node should be
-				added to the query graph */
-	fil_encryption_t mode,	/*!< in: encryption mode */
-	ulint		key_id);/*!< in: encryption key_id */
-/*********************************************************************//**
-Creates an index create graph.
-@return	own: index create node */
-UNIV_INTERN
+	dict_table_t*	table,		/*!< in: table to create, built as
+					a memory data structure */
+	mem_heap_t*	heap,		/*!< in: heap where created */
+	fil_encryption_t mode,		/*!< in: encryption mode */
+	ulint		key_id);	/*!< in: encryption key_id */
+
+/** Creates an index create graph.
+@param[in]	index	index to create, built as a memory data structure
+@param[in,out]	heap	heap where created
+@param[in]	add_v	new virtual columns added in the same clause with
+			add index
+@return own: index create node */
 ind_node_t*
 ind_create_graph_create(
-/*====================*/
-	dict_index_t*	index,	/*!< in: index to create, built as a memory data
-				structure */
-	mem_heap_t*	heap,	/*!< in: heap where created */
-	bool		commit);/*!< in: true if the commit node should be
-				added to the query graph */
+	dict_index_t*		index,
+	mem_heap_t*		heap,
+	const dict_add_v_col_t*	add_v);
+
 /***********************************************************//**
 Creates a table. This is a high-level function used in SQL execution graphs.
-@return	query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
 que_thr_t*
 dict_create_table_step(
 /*===================*/
-	que_thr_t*	thr);	/*!< in: query thread */
+	que_thr_t*	thr);		/*!< in: query thread */
+
+/** Builds a tablespace to store various objects.
+@param[in,out]	tablespace	Tablespace object describing what to build.
+@return DB_SUCCESS or error code. */
+dberr_t
+dict_build_tablespace(
+	Tablespace*	tablespace);
+
+/** Builds a tablespace to contain a table, using file-per-table=1.
+@param[in,out]	table	Table to build in its own tablespace.
+@param[in]	node	Table create node
+@return DB_SUCCESS or error code */
+dberr_t
+dict_build_tablespace_for_table(
+	dict_table_t*	table,
+	tab_node_t*	node);
+
+/** Assign a new table ID and put it into the table cache and the transaction.
+@param[in,out]	table	Table that needs an ID
+@param[in,out]	trx	Transaction */
+void
+dict_table_assign_new_id(
+	dict_table_t*	table,
+	trx_t*		trx);
+
 /***********************************************************//**
 Creates an index. This is a high-level function used in SQL execution
 graphs.
-@return	query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
 que_thr_t*
 dict_create_index_step(
 /*===================*/
-	que_thr_t*	thr);	/*!< in: query thread */
-/*******************************************************************//**
-Truncates the index tree associated with a row in SYS_INDEXES table.
-@return	new root page number, or FIL_NULL on failure */
-UNIV_INTERN
-ulint
-dict_truncate_index_tree(
-/*=====================*/
-	dict_table_t*	table,	/*!< in: the table the index belongs to */
-	ulint		space,	/*!< in: 0=truncate,
-				nonzero=create the index tree in the
-				given tablespace */
-	btr_pcur_t*	pcur,	/*!< in/out: persistent cursor pointing to
-				record in the clustered index of
-				SYS_INDEXES table. The cursor may be
-				repositioned in this call. */
-	mtr_t*		mtr);	/*!< in: mtr having the latch
-				on the record page. The mtr may be
-				committed and restarted in this call. */
-/*******************************************************************//**
-Drops the index tree associated with a row in SYS_INDEXES table. */
-UNIV_INTERN
+	que_thr_t*	thr);		/*!< in: query thread */
+
+/***************************************************************//**
+Builds an index definition but doesn't update sys_table.
+@return DB_SUCCESS or error code */
 void
-dict_drop_index_tree(
+dict_build_index_def(
 /*=================*/
-	rec_t*	rec,	/*!< in/out: record in the clustered index
-			of SYS_INDEXES table */
-	mtr_t*	mtr);	/*!< in: mtr having the latch on the record page */
+	const dict_table_t*	table,	/*!< in: table */
+	dict_index_t*		index,	/*!< in/out: index */
+	trx_t*			trx);	/*!< in/out: InnoDB transaction
+					handle */
+/***************************************************************//**
+Creates an index tree for the index if it is not a member of a cluster.
+Don't update SYSTEM TABLES.
+@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
+dberr_t
+dict_create_index_tree(
+/*===================*/
+	dict_index_t*	index,	/*!< in/out: index */
+	const trx_t*	trx);	/*!< in: InnoDB transaction handle */
+
+/*******************************************************************//**
+Recreate the index tree associated with a row in SYS_INDEXES table.
+@return	new root page number, or FIL_NULL on failure */
+ulint
+dict_recreate_index_tree(
+/*======================*/
+	const dict_table_t*	table,	/*!< in: the table the index
+					belongs to */
+	btr_pcur_t*		pcur,	/*!< in/out: persistent cursor pointing
+					to record in the clustered index of
+					SYS_INDEXES table. The cursor may be
+					repositioned in this call. */
+	mtr_t*			mtr);	/*!< in: mtr having the latch
+					on the record page. The mtr may be
+					committed and restarted in this call. */
+
+/** Drop the index tree associated with a row in SYS_INDEXES table.
+@param[in,out]	rec	SYS_INDEXES record
+@param[in,out]	pcur	persistent cursor on rec
+@param[in,out]	mtr	mini-transaction
+@return	whether freeing the B-tree was attempted */
+bool
+dict_drop_index_tree(
+	rec_t*		rec,
+	btr_pcur_t*	pcur,
+	mtr_t*		mtr);
+
+/***************************************************************//**
+Creates an index tree for the index if it is not a member of a cluster.
+Don't update SYSTEM TABLES.
+@return	DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
+dberr_t
+dict_create_index_tree_in_mem(
+/*==========================*/
+	dict_index_t*	index,		/*!< in/out: index */
+	const trx_t*	trx);		/*!< in: InnoDB transaction handle */
+
+/*******************************************************************//**
+Truncates the index tree but don't update SYSTEM TABLES.
+@return DB_SUCCESS or error */
+dberr_t
+dict_truncate_index_tree_in_mem(
+/*============================*/
+	dict_index_t*	index);		/*!< in/out: index */
+
+/*******************************************************************//**
+Drops the index tree but don't update SYS_INDEXES table. */
+void
+dict_drop_index_tree_in_mem(
+/*========================*/
+	const dict_index_t*	index,	/*!< in: index */
+	ulint			page_no);/*!< in: index page-no */
+
 /****************************************************************//**
 Creates the foreign key constraints system tables inside InnoDB
 at server bootstrap or server start if they are not found or are
 not of the right form.
-@return	DB_SUCCESS or error code */
-UNIV_INTERN
+@return DB_SUCCESS or error code */
 dberr_t
 dict_create_or_check_foreign_constraint_tables(void);
 /*================================================*/
 
-/********************************************************************//**
-Construct foreign key constraint defintion from data dictionary information.
-*/
-UNIV_INTERN
-char*
-dict_foreign_def_get(
-/*=================*/
-	dict_foreign_t*	foreign,/*!< in: foreign */
-	trx_t*		trx);	/*!< in: trx */
-
 /********************************************************************//**
 Generate a foreign key constraint name when it was not named by the user.
 A generated constraint has a name of the format dbname/tablename_ibfk_NUMBER,
@@ -133,11 +191,10 @@ UNIV_INLINE
 dberr_t
 dict_create_add_foreign_id(
 /*=======================*/
-	ulint*		id_nr,	/*!< in/out: number to use in id generation;
-				incremented if used */
-	const char*	name,	/*!< in: table name */
-	dict_foreign_t*	foreign)/*!< in/out: foreign key */
-	MY_ATTRIBUTE((nonnull));
+	ulint*		id_nr,		/*!< in/out: number to use in id
+					generation; incremented if used */
+	const char*	name,		/*!< in: table name */
+	dict_foreign_t*	foreign);	/*!< in/out: foreign key */
 
 /** Adds the given set of foreign key objects to the dictionary tables
 in the database. This function does not modify the dictionary cache. The
@@ -149,7 +206,6 @@ the dictionary tables
 local_fk_set belong to
 @param[in,out]	trx		transaction
 @return error code or DB_SUCCESS */
-UNIV_INTERN
 dberr_t
 dict_create_add_foreigns_to_dictionary(
 /*===================================*/
@@ -157,34 +213,66 @@ dict_create_add_foreigns_to_dictionary(
 	const dict_table_t*	table,
 	trx_t*			trx)
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
+
+/** Check if a foreign constraint is on columns server as base columns
+of any stored column. This is to prevent creating SET NULL or CASCADE
+constraint on such columns
+@param[in]	local_fk_set	set of foreign key objects, to be added to
+the dictionary tables
+@param[in]	table		table to which the foreign key objects in
+local_fk_set belong to
+@return true if yes, otherwise, false */
+bool
+dict_foreigns_has_s_base_col(
+	const dict_foreign_set&	local_fk_set,
+	const dict_table_t*	table);
+
 /****************************************************************//**
 Creates the tablespaces and datafiles system tables inside InnoDB
 at server bootstrap or server start if they are not found or are
 not of the right form.
-@return	DB_SUCCESS or error code */
-UNIV_INTERN
+@return DB_SUCCESS or error code */
 dberr_t
 dict_create_or_check_sys_tablespace(void);
 /*=====================================*/
-/********************************************************************//**
-Add a single tablespace definition to the data dictionary tables in the
-database.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+/** Creates the virtual column system tables inside InnoDB
+at server bootstrap or server start if they are not found or are
+not of the right form.
+@return DB_SUCCESS or error code */
 dberr_t
-dict_create_add_tablespace_to_dictionary(
-/*=====================================*/
-	ulint		space,		/*!< in: tablespace id */
-	const char*	name,		/*!< in: tablespace name */
-	ulint		flags,		/*!< in: tablespace flags */
-	const char*	path,		/*!< in: tablespace path */
-	trx_t*		trx,		/*!< in: transaction */
-	bool		commit);	/*!< in: if true then commit the
-					transaction */
+dict_create_or_check_sys_virtual();
+
+/** Put a tablespace definition into the data dictionary,
+replacing what was there previously.
+@param[in]	space	Tablespace id
+@param[in]	name	Tablespace name
+@param[in]	flags	Tablespace flags
+@param[in]	path	Tablespace path
+@param[in]	trx	Transaction
+@param[in]	commit	If true, commit the transaction
+@return error code or DB_SUCCESS */
+dberr_t
+dict_replace_tablespace_in_dictionary(
+	ulint		space_id,
+	const char*	name,
+	ulint		flags,
+	const char*	path,
+	trx_t*		trx,
+	bool		commit);
+
+/** Delete records from SYS_TABLESPACES and SYS_DATAFILES associated
+with a particular tablespace ID.
+@param[in]	space	Tablespace ID
+@param[in,out]	trx	Current transaction
+@return DB_SUCCESS if OK, dberr_t if the operation failed */
+dberr_t
+dict_delete_tablespace_and_datafiles(
+	ulint		space,
+	trx_t*		trx);
+
 /********************************************************************//**
 Add a foreign key definition to the data dictionary tables.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
 dberr_t
 dict_create_add_foreign_to_dictionary(
 /*==================================*/
@@ -206,65 +294,102 @@ dict_foreign_def_get(
 
 /* Table create node structure */
 struct tab_node_t{
-	que_common_t	common;	/*!< node type: QUE_NODE_TABLE_CREATE */
-	dict_table_t*	table;	/*!< table to create, built as a memory data
-				structure with dict_mem_... functions */
-	ins_node_t*	tab_def; /* child node which does the insert of
-				the table definition; the row to be inserted
-				is built by the parent node  */
-	ins_node_t*	col_def; /* child node which does the inserts of
-				the column definitions; the row to be inserted
-				is built by the parent node  */
-	commit_node_t*	commit_node;
-				/* child node which performs a commit after
-				a successful table creation */
+	que_common_t	common;		/*!< node type: QUE_NODE_TABLE_CREATE */
+	dict_table_t*	table;		/*!< table to create, built as a
+					memory data structure with
+					dict_mem_... functions */
+	ins_node_t*	tab_def;	/*!< child node which does the insert of
+					the table definition; the row to be
+					inserted is built by the parent node  */
+	ins_node_t*	col_def;	/*!< child node which does the inserts
+					of the column definitions; the row to
+					be inserted is built by the parent
+					node  */
+	ins_node_t*	v_col_def;	/*!< child node which does the inserts
+					of the sys_virtual row definitions;
+					the row to be inserted is built by
+					the parent node  */
 	/*----------------------*/
 	/* Local storage for this graph node */
-	ulint		state;	/*!< node execution state */
-	ulint		col_no;	/*!< next column definition to insert */
+	ulint		state;		/*!< node execution state */
+	ulint		col_no;		/*!< next column definition to insert */
 	ulint		key_id;	/*!< encryption key_id */
 	fil_encryption_t mode;	/*!< encryption mode */
-	mem_heap_t*	heap;	/*!< memory heap used as auxiliary storage */
+	ulint		base_col_no;	/*!< next base column to insert */
+	mem_heap_t*	heap;		/*!< memory heap used as auxiliary
+					storage */
 };
 
 /* Table create node states */
 #define	TABLE_BUILD_TABLE_DEF	1
 #define	TABLE_BUILD_COL_DEF	2
-#define	TABLE_COMMIT_WORK	3
+#define	TABLE_BUILD_V_COL_DEF	3
 #define	TABLE_ADD_TO_CACHE	4
 #define	TABLE_COMPLETED		5
 
 /* Index create node struct */
 
 struct ind_node_t{
-	que_common_t	common;	/*!< node type: QUE_NODE_INDEX_CREATE */
-	dict_index_t*	index;	/*!< index to create, built as a memory data
-				structure with dict_mem_... functions */
-	ins_node_t*	ind_def; /* child node which does the insert of
-				the index definition; the row to be inserted
-				is built by the parent node  */
-	ins_node_t*	field_def; /* child node which does the inserts of
-				the field definitions; the row to be inserted
-				is built by the parent node  */
-	commit_node_t*	commit_node;
-				/* child node which performs a commit after
-				a successful index creation */
+	que_common_t	common;		/*!< node type: QUE_NODE_INDEX_CREATE */
+	dict_index_t*	index;		/*!< index to create, built as a
+					memory data structure with
+					dict_mem_... functions */
+	ins_node_t*	ind_def;	/*!< child node which does the insert of
+					the index definition; the row to be
+					inserted is built by the parent node  */
+	ins_node_t*	field_def;	/*!< child node which does the inserts
+					of the field definitions; the row to
+					be inserted is built by the parent
+					node  */
 	/*----------------------*/
 	/* Local storage for this graph node */
-	ulint		state;	/*!< node execution state */
-	ulint		page_no;/* root page number of the index */
-	dict_table_t*	table;	/*!< table which owns the index */
-	dtuple_t*	ind_row;/* index definition row built */
-	ulint		field_no;/* next field definition to insert */
-	mem_heap_t*	heap;	/*!< memory heap used as auxiliary storage */
+	ulint		state;		/*!< node execution state */
+	ulint		page_no;	/* root page number of the index */
+	dict_table_t*	table;		/*!< table which owns the index */
+	dtuple_t*	ind_row;	/* index definition row built */
+	ulint		field_no;	/* next field definition to insert */
+	mem_heap_t*	heap;		/*!< memory heap used as auxiliary
+					storage */
+	const dict_add_v_col_t*
+			add_v;		/*!< new virtual columns that being
+					added along with an add index call */
 };
 
+/** Compose a column number for a virtual column, stored in the "POS" field
+of Sys_columns. The column number includes both its virtual column sequence
+(the "nth" virtual column) and its actual column position in original table
+@param[in]	v_pos		virtual column sequence
+@param[in]	col_pos		column position in original table definition
+@return	composed column position number */
+UNIV_INLINE
+ulint
+dict_create_v_col_pos(
+	ulint	v_pos,
+	ulint	col_pos);
+
+/** Get the column number for a virtual column (the column position in
+original table), stored in the "POS" field of Sys_columns
+@param[in]      pos             virtual column position
+@return column position in original table */
+UNIV_INLINE
+ulint
+dict_get_v_col_mysql_pos(
+        ulint   pos);
+
+/** Get a virtual column sequence (the "nth" virtual column) for a
+virtual column, stord in the "POS" field of Sys_columns
+@param[in]      pos             virtual column position
+@return virtual column sequence */
+UNIV_INLINE
+ulint
+dict_get_v_col_pos(
+        ulint   pos);
+
 /* Index create node states */
 #define	INDEX_BUILD_INDEX_DEF	1
 #define	INDEX_BUILD_FIELD_DEF	2
 #define	INDEX_CREATE_INDEX_TREE	3
-#define	INDEX_COMMIT_WORK	4
-#define	INDEX_ADD_TO_CACHE	5
+#define	INDEX_ADD_TO_CACHE	4
 
 #ifndef UNIV_NONINL
 #include "dict0crea.ic"
diff --git a/storage/innobase/include/dict0crea.ic b/storage/innobase/include/dict0crea.ic
index 1cbaa47032b..565e4ed1a8c 100644
--- a/storage/innobase/include/dict0crea.ic
+++ b/storage/innobase/include/dict0crea.ic
@@ -23,13 +23,14 @@ Database object creation
 Created 1/8/1996 Heikki Tuuri
 *******************************************************/
 
+#include "ha_prototypes.h"
+
 #include "mem0mem.h"
 
 /*********************************************************************//**
 Checks if a table name contains the string "/#sql" which denotes temporary
 tables in MySQL.
 @return true if temporary table */
-UNIV_INTERN
 bool
 row_is_mysql_tmp_table_name(
 /*========================*/
@@ -52,6 +53,8 @@ dict_create_add_foreign_id(
 	const char*	name,	/*!< in: table name */
 	dict_foreign_t*	foreign)/*!< in/out: foreign key */
 {
+	DBUG_ENTER("dict_create_add_foreign_id");
+
 	if (foreign->id == NULL) {
 		/* Generate a new constraint id */
 		ulint	namelen	= strlen(name);
@@ -87,12 +90,57 @@ dict_create_add_foreign_id(
 
 			if (innobase_check_identifier_length(
 				strchr(id,'/') + 1)) {
-				return(DB_IDENTIFIER_TOO_LONG);
+				DBUG_RETURN(DB_IDENTIFIER_TOO_LONG);
 			}
 		}
 		foreign->id = id;
+
+		DBUG_PRINT("dict_create_add_foreign_id",
+			   ("generated foreign id: %s", id));
 	}
 
-	return(DB_SUCCESS);
+
+	DBUG_RETURN(DB_SUCCESS);
 }
 
+/** Compose a column number for a virtual column, stored in the "POS" field
+of Sys_columns. The column number includes both its virtual column sequence
+(the "nth" virtual column) and its actual column position in original table
+@param[in]	v_pos		virtual column sequence
+@param[in]	col_pos		column position in original table definition
+@return composed column position number */
+UNIV_INLINE
+ulint
+dict_create_v_col_pos(
+	ulint	v_pos,
+	ulint	col_pos)
+{
+	ut_ad(v_pos <= REC_MAX_N_FIELDS);
+	ut_ad(col_pos <= REC_MAX_N_FIELDS);
+
+	return(((v_pos + 1) << 16) + col_pos);
+}
+
+/** Get the column number for a virtual column (the column position in
+original table), stored in the "POS" field of Sys_columns
+@param[in]	pos		virtual column position
+@return column position in original table */
+UNIV_INLINE
+ulint
+dict_get_v_col_mysql_pos(
+	ulint	pos)
+{
+	return(pos & 0xFFFF);
+}
+
+/** Get a virtual column sequence (the "nth" virtual column) for a
+virtual column, stord in the "POS" field of Sys_columns
+@param[in]	pos		virtual column position
+@return virtual column sequence */
+UNIV_INLINE
+ulint
+dict_get_v_col_pos(
+	ulint	pos)
+{
+	return((pos >> 16) - 1);
+}
diff --git a/storage/innobase/include/dict0defrag_bg.h b/storage/innobase/include/dict0defrag_bg.h
new file mode 100644
index 00000000000..eb2a6e6824f
--- /dev/null
+++ b/storage/innobase/include/dict0defrag_bg.h
@@ -0,0 +1,93 @@
+/*****************************************************************************
+
+Copyright (c) 2016, MariaDB Corporation. All rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/dict0defrag_bg.h
+Code used for background table and index
+defragmentation
+
+Created 25/08/2016 Jan Lindström
+*******************************************************/
+
+#ifndef dict0defrag_bg_h
+#define dict0defrag_bg_h
+
+#include "univ.i"
+
+#include "dict0types.h"
+#include "os0event.h"
+#include "os0thread.h"
+
+/*****************************************************************//**
+Initialize the defrag pool, called once during thread initialization. */
+void
+dict_defrag_pool_init(void);
+/*========================*/
+
+/*****************************************************************//**
+Free the resources occupied by the defrag pool, called once during
+thread de-initialization. */
+void
+dict_defrag_pool_deinit(void);
+/*==========================*/
+
+/*****************************************************************//**
+Add an index in a table to the defrag pool, which is processed by the
+background stats gathering thread. Only the table id and index id are
+added to the list, so the table can be closed after being enqueued and
+it will be opened when needed. If the table or index does not exist later
+(has been DROPped), then it will be removed from the pool and skipped. */
+void
+dict_stats_defrag_pool_add(
+/*=======================*/
+	const dict_index_t*	index);	/*!< in: table to add */
+
+/*****************************************************************//**
+Delete a given index from the auto defrag pool. */
+void
+dict_stats_defrag_pool_del(
+/*=======================*/
+	const dict_table_t*	table,	/*!<in: if given, remove
+					all entries for the table */
+	const dict_index_t*	index);	/*!< in: index to remove */
+
+/*****************************************************************//**
+Get the first index that has been added for updating persistent defrag
+stats and eventually save its stats. */
+void
+dict_defrag_process_entries_from_defrag_pool();
+/*===========================================*/
+
+/*********************************************************************//**
+Save defragmentation result.
+@return DB_SUCCESS or error code */
+dberr_t
+dict_stats_save_defrag_summary(
+/*============================*/
+	dict_index_t*	index)	/*!< in: index */
+	MY_ATTRIBUTE((warn_unused_result));
+
+/*********************************************************************//**
+Save defragmentation stats for a given index.
+@return DB_SUCCESS or error code */
+dberr_t
+dict_stats_save_defrag_stats(
+/*============================*/
+	dict_index_t*	index)	/*!< in: index */
+	MY_ATTRIBUTE((warn_unused_result));
+#endif /* dict0defrag_bg_h */
diff --git a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h
index 42f93b5a889..ccef08ff73f 100644
--- a/storage/innobase/include/dict0dict.h
+++ b/storage/innobase/include/dict0dict.h
@@ -29,20 +29,22 @@ Created 1/8/1996 Heikki Tuuri
 #define dict0dict_h
 
 #include "univ.i"
-#include "db0err.h"
-#include "dict0types.h"
-#include "dict0mem.h"
-#include "data0type.h"
 #include "data0data.h"
+#include "data0type.h"
+#include "dict0mem.h"
+#include "dict0types.h"
+#include "fsp0fsp.h"
+#include "fsp0sysspace.h"
+#include "hash0hash.h"
 #include "mem0mem.h"
 #include "rem0types.h"
-#include "ut0mem.h"
-#include "ut0lst.h"
-#include "hash0hash.h"
-#include "ut0rnd.h"
-#include "ut0byte.h"
-#include "trx0types.h"
 #include "row0types.h"
+#include "trx0types.h"
+#include "ut0byte.h"
+#include "ut0mem.h"
+#include "ut0new.h"
+#include "ut0rnd.h"
+#include <deque>
 #include "fsp0fsp.h"
 #include "dict0pagecompress.h"
 
@@ -50,20 +52,10 @@ extern bool innodb_table_stats_not_found;
 extern bool innodb_index_stats_not_found;
 
 #ifndef UNIV_HOTBACKUP
-# include "sync0sync.h"
 # include "sync0rw.h"
-/******************************************************************//**
-Makes all characters in a NUL-terminated UTF-8 string lower case. */
-UNIV_INTERN
-void
-dict_casedn_str(
-/*============*/
-	char*	a)	/*!< in/out: string to put in lower case */
-	MY_ATTRIBUTE((nonnull));
 /********************************************************************//**
 Get the database name length in a table name.
-@return	database name length */
-UNIV_INTERN
+@return database name length */
 ulint
 dict_get_db_name_len(
 /*=================*/
@@ -75,7 +67,6 @@ Open a table from its database and table name, this is currently used by
 foreign constraint parser to get the referenced table.
 @return complete table name with database and table name, allocated from
 heap memory passed in */
-UNIV_INTERN
 char*
 dict_get_referenced_table(
 /*======================*/
@@ -88,7 +79,6 @@ dict_get_referenced_table(
 	mem_heap_t*	heap);		/*!< in: heap memory */
 /*********************************************************************//**
 Frees a foreign key struct. */
-
 void
 dict_foreign_free(
 /*==============*/
@@ -98,7 +88,6 @@ Finds the highest [number] for foreign key constraints of the table. Looks
 only at the >= 4.0.18-format id's, which are of the form
 databasename/tablename_ibfk_[number].
 @return highest number, 0 if table has no new format foreign key constraints */
-UNIV_INTERN
 ulint
 dict_table_get_highest_foreign_id(
 /*==============================*/
@@ -106,8 +95,7 @@ dict_table_get_highest_foreign_id(
 					memory cache */
 /********************************************************************//**
 Return the end of table name where we have removed dbname and '/'.
-@return	table name */
-UNIV_INTERN
+@return table name */
 const char*
 dict_remove_db_name(
 /*================*/
@@ -130,15 +118,14 @@ enum dict_table_op_t {
 
 /**********************************************************************//**
 Returns a table object based on table id.
-@return	table, NULL if does not exist */
-UNIV_INTERN
+@return table, NULL if does not exist */
 dict_table_t*
 dict_table_open_on_id(
 /*==================*/
 	table_id_t	table_id,	/*!< in: table id */
 	ibool		dict_locked,	/*!< in: TRUE=data dictionary locked */
 	dict_table_op_t	table_op)	/*!< in: operation to perform */
-	__attribute__((warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 
 /**********************************************************************//**
 Returns a table object based on table id.
@@ -152,7 +139,6 @@ dict_table_open_on_index_id(
 	__attribute__((warn_unused_result));
 /********************************************************************//**
 Decrements the count of open handles to a table. */
-UNIV_INTERN
 void
 dict_table_close(
 /*=============*/
@@ -162,22 +148,22 @@ dict_table_close(
 					indexes after an aborted online
 					index creation */
 	MY_ATTRIBUTE((nonnull));
+/*********************************************************************//**
+Closes the only open handle to a table and drops a table while assuring
+that dict_sys->mutex is held the whole time.  This assures that the table
+is not evicted after the close when the count of open handles goes to zero.
+Because dict_sys->mutex is held, we do not need to call
+dict_table_prevent_eviction().  */
+void
+dict_table_close_and_drop(
+/*======================*/
+	trx_t*		trx,		/*!< in: data dictionary transaction */
+	dict_table_t*	table);		/*!< in/out: table */
 /**********************************************************************//**
 Inits the data dictionary module. */
-UNIV_INTERN
 void
 dict_init(void);
-/*===========*/
-/********************************************************************//**
-Gets the space id of every table of the data dictionary and makes a linear
-list and a hash table of them to the data dictionary cache. This function
-can be called at database startup if we did not need to do a crash recovery.
-In crash recovery we must scan the space id's from the .ibd files in MySQL
-database directories. */
-UNIV_INTERN
-void
-dict_load_space_id_list(void);
-/*=========================*/
+
 /*********************************************************************//**
 Gets the minimum number of bytes per character.
 @return minimum multi-byte char size, in bytes */
@@ -215,8 +201,8 @@ void
 dict_col_copy_type(
 /*===============*/
 	const dict_col_t*	col,	/*!< in: column */
-	dtype_t*		type)	/*!< out: data type */
-	MY_ATTRIBUTE((nonnull));
+	dtype_t*		type);	/*!< out: data type */
+
 /**********************************************************************//**
 Determine bytes of column prefix to be stored in the undo log. Please
 note if the table format is UNIV_FORMAT_A (< UNIV_FORMAT_B), no prefix
@@ -230,11 +216,23 @@ dict_max_field_len_store_undo(
 	const dict_col_t*	col)	/*!< in: column which index prefix
 					is based on */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
+
+/** Determine maximum bytes of a virtual column need to be stored
+in the undo log.
+@param[in]	table		dict_table_t for the table
+@param[in]	col_no		virtual column number
+@return maximum bytes of virtual column to be stored in the undo log */
+UNIV_INLINE
+ulint
+dict_max_v_field_len_store_undo(
+	dict_table_t*		table,
+	ulint			col_no);
+
 #endif /* !UNIV_HOTBACKUP */
 #ifdef UNIV_DEBUG
 /*********************************************************************//**
 Assert that a column and a data type match.
-@return	TRUE */
+@return TRUE */
 UNIV_INLINE
 ibool
 dict_col_type_assert_equal(
@@ -246,7 +244,7 @@ dict_col_type_assert_equal(
 #ifndef UNIV_HOTBACKUP
 /***********************************************************************//**
 Returns the minimum size of the column.
-@return	minimum size */
+@return minimum size */
 UNIV_INLINE
 ulint
 dict_col_get_min_size(
@@ -255,7 +253,7 @@ dict_col_get_min_size(
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /***********************************************************************//**
 Returns the maximum size of the column.
-@return	maximum size */
+@return maximum size */
 UNIV_INLINE
 ulint
 dict_col_get_max_size(
@@ -264,7 +262,7 @@ dict_col_get_max_size(
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /***********************************************************************//**
 Returns the size of a fixed size column, 0 if not a fixed size column.
-@return	fixed size, or 0 */
+@return fixed size, or 0 */
 UNIV_INLINE
 ulint
 dict_col_get_fixed_size(
@@ -275,7 +273,7 @@ dict_col_get_fixed_size(
 /***********************************************************************//**
 Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a column.
 For fixed length types it is the fixed length of the type, otherwise 0.
-@return	SQL null storage size in ROW_FORMAT=REDUNDANT */
+@return SQL null storage size in ROW_FORMAT=REDUNDANT */
 UNIV_INLINE
 ulint
 dict_col_get_sql_null_size(
@@ -285,7 +283,7 @@ dict_col_get_sql_null_size(
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /*********************************************************************//**
 Gets the column number.
-@return	col->ind, table column position (starting from 0) */
+@return col->ind, table column position (starting from 0) */
 UNIV_INLINE
 ulint
 dict_col_get_no(
@@ -301,11 +299,22 @@ dict_col_get_clust_pos(
 	const dict_col_t*	col,		/*!< in: table column */
 	const dict_index_t*	clust_index)	/*!< in: clustered index */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
+
+/** Gets the column position in the given index.
+@param[in]	col	table column
+@param[in]	index	index to be searched for column
+@return position of column in the given index. */
+UNIV_INLINE
+ulint
+dict_col_get_index_pos(
+	const dict_col_t*	col,
+	const dict_index_t*	index)
+	MY_ATTRIBUTE((nonnull, warn_unused_result));
+
 /****************************************************************//**
 If the given column name is reserved for InnoDB system columns, return
 TRUE.
-@return	TRUE if name is reserved */
-UNIV_INTERN
+@return TRUE if name is reserved */
 ibool
 dict_col_name_is_reserved(
 /*======================*/
@@ -313,7 +322,6 @@ dict_col_name_is_reserved(
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /********************************************************************//**
 Acquire the autoinc lock. */
-UNIV_INTERN
 void
 dict_table_autoinc_lock(
 /*====================*/
@@ -321,7 +329,6 @@ dict_table_autoinc_lock(
 	MY_ATTRIBUTE((nonnull));
 /********************************************************************//**
 Unconditionally set the autoinc counter. */
-UNIV_INTERN
 void
 dict_table_autoinc_initialize(
 /*==========================*/
@@ -331,14 +338,12 @@ dict_table_autoinc_initialize(
 
 /** Store autoinc value when the table is evicted.
 @param[in]	table	table evicted */
-UNIV_INTERN
 void
 dict_table_autoinc_store(
 	const dict_table_t*	table);
 
 /** Restore autoinc value when the table is loaded.
 @param[in]	table	table loaded */
-UNIV_INTERN
 void
 dict_table_autoinc_restore(
 	dict_table_t*	table);
@@ -346,8 +351,7 @@ dict_table_autoinc_restore(
 /********************************************************************//**
 Reads the next autoinc value (== autoinc counter value), 0 if not yet
 initialized.
-@return	value for a new row, or 0 */
-UNIV_INTERN
+@return value for a new row, or 0 */
 ib_uint64_t
 dict_table_autoinc_read(
 /*====================*/
@@ -356,7 +360,6 @@ dict_table_autoinc_read(
 /********************************************************************//**
 Updates the autoinc counter if the value supplied is greater than the
 current value. */
-UNIV_INTERN
 void
 dict_table_autoinc_update_if_greater(
 /*=================================*/
@@ -366,7 +369,6 @@ dict_table_autoinc_update_if_greater(
 	MY_ATTRIBUTE((nonnull));
 /********************************************************************//**
 Release the autoinc lock. */
-UNIV_INTERN
 void
 dict_table_autoinc_unlock(
 /*======================*/
@@ -375,7 +377,6 @@ dict_table_autoinc_unlock(
 #endif /* !UNIV_HOTBACKUP */
 /**********************************************************************//**
 Adds system columns to a table object. */
-UNIV_INTERN
 void
 dict_table_add_system_columns(
 /*==========================*/
@@ -383,9 +384,14 @@ dict_table_add_system_columns(
 	mem_heap_t*	heap)	/*!< in: temporary heap */
 	MY_ATTRIBUTE((nonnull));
 #ifndef UNIV_HOTBACKUP
+/** Mark if table has big rows.
+@param[in,out]	table	table handler */
+void
+dict_table_set_big_rows(
+	dict_table_t*	table)
+	MY_ATTRIBUTE((nonnull));
 /**********************************************************************//**
 Adds a table object to the dictionary cache. */
-UNIV_INTERN
 void
 dict_table_add_to_cache(
 /*====================*/
@@ -395,7 +401,6 @@ dict_table_add_to_cache(
 	MY_ATTRIBUTE((nonnull));
 /**********************************************************************//**
 Removes a table object from the dictionary cache. */
-UNIV_INTERN
 void
 dict_table_remove_from_cache(
 /*=========================*/
@@ -403,7 +408,6 @@ dict_table_remove_from_cache(
 	MY_ATTRIBUTE((nonnull));
 /**********************************************************************//**
 Removes a table object from the dictionary cache. */
-UNIV_INTERN
 void
 dict_table_remove_from_cache_low(
 /*=============================*/
@@ -412,8 +416,7 @@ dict_table_remove_from_cache_low(
 					to make room in the table LRU list */
 /**********************************************************************//**
 Renames a table object.
-@return	TRUE if success */
-UNIV_INTERN
+@return TRUE if success */
 dberr_t
 dict_table_rename_in_cache(
 /*=======================*/
@@ -424,19 +427,19 @@ dict_table_rename_in_cache(
 					to preserve the original table name
 					in constraints which reference it */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
-/**********************************************************************//**
-Removes an index from the dictionary cache. */
-UNIV_INTERN
+
+/** Removes an index from the dictionary cache.
+@param[in,out]	table	table whose index to remove
+@param[in,out]	index	index to remove, this object is destroyed and must not
+be accessed by the caller afterwards */
 void
 dict_index_remove_from_cache(
-/*=========================*/
-	dict_table_t*	table,	/*!< in/out: table */
-	dict_index_t*	index)	/*!< in, own: index */
-	MY_ATTRIBUTE((nonnull));
+	dict_table_t*	table,
+	dict_index_t*	index);
+
 /**********************************************************************//**
 Change the id of a table object in the dictionary cache. This is used in
 DISCARD TABLESPACE. */
-UNIV_INTERN
 void
 dict_table_change_id_in_cache(
 /*==========================*/
@@ -445,7 +448,6 @@ dict_table_change_id_in_cache(
 	MY_ATTRIBUTE((nonnull));
 /**********************************************************************//**
 Removes a foreign constraint struct from the dictionary cache. */
-UNIV_INTERN
 void
 dict_foreign_remove_from_cache(
 /*===========================*/
@@ -456,8 +458,7 @@ Adds a foreign key constraint object to the dictionary cache. May free
 the object if there already is an object with the same identifier in.
 At least one of foreign table or referenced table must already be in
 the dictionary cache!
-@return	DB_SUCCESS or error code */
-UNIV_INTERN
+@return DB_SUCCESS or error code */
 dberr_t
 dict_foreign_add_to_cache(
 /*======================*/
@@ -474,8 +475,7 @@ dict_foreign_add_to_cache(
 	MY_ATTRIBUTE((nonnull(1), warn_unused_result));
 /*********************************************************************//**
 Checks if a table is referenced by foreign keys.
-@return	TRUE if table is referenced by a foreign key */
-UNIV_INTERN
+@return TRUE if table is referenced by a foreign key */
 ibool
 dict_table_is_referenced_by_foreign_key(
 /*====================================*/
@@ -485,7 +485,6 @@ dict_table_is_referenced_by_foreign_key(
 Replace the index passed in with another equivalent index in the
 foreign key lists of the table.
 @return whether all replacements were found */
-UNIV_INTERN
 bool
 dict_foreign_replace_index(
 /*=======================*/
@@ -498,7 +497,6 @@ dict_foreign_replace_index(
 /**********************************************************************//**
 Determines whether a string starts with the specified keyword.
 @return TRUE if str starts with keyword */
-UNIV_INTERN
 ibool
 dict_str_starts_with_keyword(
 /*=========================*/
@@ -506,40 +504,38 @@ dict_str_starts_with_keyword(
 	const char*	str,		/*!< in: string to scan for keyword */
 	const char*	keyword)	/*!< in: keyword to look for */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Scans a table create SQL string and adds to the data dictionary
+/** Scans a table create SQL string and adds to the data dictionary
 the foreign key constraints declared in the string. This function
 should be called after the indexes for a table have been created.
 Each foreign key constraint must be accompanied with indexes in
 bot participating tables. The indexes are allowed to contain more
 fields than mentioned in the constraint.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+
+@param[in]	trx		transaction
+@param[in]	sql_string	table create statement where
+				foreign keys are declared like:
+				FOREIGN KEY (a, b) REFERENCES table2(c, d),
+				table2 can be written also with the database
+				name before it: test.table2; the default
+				database id the database of parameter name
+@param[in]	sql_length	length of sql_string
+@param[in]	name		table full name in normalized form
+@param[in]	reject_fks	if TRUE, fail with error code
+				DB_CANNOT_ADD_CONSTRAINT if any
+				foreign keys are found.
+@return error code or DB_SUCCESS */
 dberr_t
 dict_create_foreign_constraints(
-/*============================*/
-	trx_t*		trx,		/*!< in: transaction */
-	const char*	sql_string,	/*!< in: table create statement where
-					foreign keys are declared like:
-					FOREIGN KEY (a, b) REFERENCES
-					table2(c, d), table2 can be written
-					also with the database
-					name before it: test.table2; the
-					default database id the database of
-					parameter name */
-	size_t		sql_length,	/*!< in: length of sql_string */
-	const char*	name,		/*!< in: table full name in the
-					normalized form
-					database_name/table_name */
-	ibool		reject_fks)	/*!< in: if TRUE, fail with error
-					code DB_CANNOT_ADD_CONSTRAINT if
-					any foreign keys are found. */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	trx_t*			trx,
+	const char*		sql_string,
+	size_t			sql_length,
+	const char*		name,
+	ibool			reject_fks)
+	MY_ATTRIBUTE((warn_unused_result));
 /**********************************************************************//**
 Parses the CONSTRAINT id's to be dropped in an ALTER TABLE statement.
 @return DB_SUCCESS or DB_CANNOT_DROP_CONSTRAINT if syntax error or the
 constraint id does not match */
-UNIV_INTERN
 dberr_t
 dict_foreign_parse_drop_constraints(
 /*================================*/
@@ -557,27 +553,25 @@ Returns a table object and increments its open handle count.
 NOTE! This is a high-level function to be used mainly from outside the
 'dict' directory. Inside this directory dict_table_get_low
 is usually the appropriate function.
-@return	table, NULL if does not exist */
-UNIV_INTERN
+@param[in] table_name Table name
+@param[in] dict_locked TRUE=data dictionary locked
+@param[in] try_drop TRUE=try to drop any orphan indexes after
+				an aborted online index creation
+@param[in] ignore_err error to be ignored when loading the table
+@return table, NULL if does not exist */
 dict_table_t*
 dict_table_open_on_name(
-/*====================*/
-	const char*	table_name,	/*!< in: table name */
-	ibool		dict_locked,	/*!< in: TRUE=data dictionary locked */
-	ibool		try_drop,	/*!< in: TRUE=try to drop any orphan
-					indexes after an aborted online
-					index creation */
-	dict_err_ignore_t
-			ignore_err)	/*!< in: error to be ignored when
-					loading the table */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	const char*		table_name,
+	ibool			dict_locked,
+	ibool			try_drop,
+	dict_err_ignore_t	ignore_err)
+	MY_ATTRIBUTE((warn_unused_result));
 
 /*********************************************************************//**
 Tries to find an index whose first fields are the columns in the array,
 in the same order and is not marked for deletion and is not the same
 as types_idx.
-@return	matching index, NULL if not found */
-UNIV_INTERN
+@return matching index, NULL if not found */
 dict_index_t*
 dict_foreign_find_index(
 /*====================*/
@@ -604,7 +598,7 @@ dict_foreign_find_index(
 					/*!< out: column number where
 					error happened */
 	dict_index_t**		err_index)
-			                /*!< out: index where error
+					/*!< out: index where error
 					happened */
 
 	MY_ATTRIBUTE((nonnull(1,3), warn_unused_result));
@@ -612,13 +606,13 @@ dict_foreign_find_index(
 Returns a column's name.
 @return column name. NOTE: not guaranteed to stay valid if table is
 modified in any way (columns added, etc.). */
-UNIV_INTERN
 const char*
 dict_table_get_col_name(
 /*====================*/
 	const dict_table_t*	table,	/*!< in: table */
 	ulint			col_nr)	/*!< in: column number */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
+
 /**********************************************************************//**
 Returns a column's name.
 @return column name. NOTE: not guaranteed to stay valid if table is
@@ -630,17 +624,30 @@ dict_table_get_col_name_for_mysql(
 	const dict_table_t*	table,	/*!< in: table */
 	const char*		col_name)/*!< in: MySQL table column name */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
-/**********************************************************************//**
-Prints a table data. */
-UNIV_INTERN
-void
-dict_table_print(
-/*=============*/
-	dict_table_t*	table)	/*!< in: table */
-	MY_ATTRIBUTE((nonnull));
+
+/** Returns a virtual column's name.
+@param[in]	table		table object
+@param[in]	col_nr		virtual column number(nth virtual column)
+@return column name. */
+const char*
+dict_table_get_v_col_name(
+	const dict_table_t*	table,
+	ulint			col_nr);
+
+/** Check if the table has a given column.
+@param[in]	table		table object
+@param[in]	col_name	column name
+@param[in]	col_nr		column number guessed, 0 as default
+@return column number if the table has the specified column,
+otherwise table->n_def */
+ulint
+dict_table_has_column(
+	const dict_table_t*	table,
+	const char*		col_name,
+	ulint			col_nr = 0);
+
 /**********************************************************************//**
 Outputs info on foreign keys of a table. */
-UNIV_INTERN
 std::string
 dict_print_info_on_foreign_keys(
 /*============================*/
@@ -650,32 +657,22 @@ dict_print_info_on_foreign_keys(
 				of SHOW TABLE STATUS */
 	trx_t*		trx,	/*!< in: transaction */
 	dict_table_t*	table);	/*!< in: table */
+
 /**********************************************************************//**
 Outputs info on a foreign key of a table in a format suitable for
 CREATE TABLE. */
-UNIV_INTERN
 std::string
 dict_print_info_on_foreign_key_in_create_format(
 /*============================================*/
 	trx_t*		trx,		/*!< in: transaction */
 	dict_foreign_t*	foreign,	/*!< in: foreign key constraint */
 	ibool		add_newline);	/*!< in: whether to add a newline */
-/********************************************************************//**
-Displays the names of the index and the table. */
-UNIV_INTERN
-void
-dict_index_name_print(
-/*==================*/
-	FILE*			file,	/*!< in: output stream */
-	const trx_t*		trx,	/*!< in: transaction */
-	const dict_index_t*	index)	/*!< in: index to print */
-	MY_ATTRIBUTE((nonnull(1,3)));
+
 /*********************************************************************//**
 Tries to find an index whose first fields are the columns in the array,
 in the same order and is not marked for deletion and is not the same
 as types_idx.
-@return	matching index, NULL if not found */
-UNIV_INTERN
+@return matching index, NULL if not found */
 bool
 dict_foreign_qualify_index(
 /*====================*/
@@ -709,7 +706,7 @@ dict_foreign_qualify_index(
 #ifdef UNIV_DEBUG
 /********************************************************************//**
 Gets the first index on the table (the clustered index).
-@return	index, NULL if none exists */
+@return index, NULL if none exists */
 UNIV_INLINE
 dict_index_t*
 dict_table_get_first_index(
@@ -718,7 +715,7 @@ dict_table_get_first_index(
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /********************************************************************//**
 Gets the last index on the table.
-@return	index, NULL if none exists */
+@return index, NULL if none exists */
 UNIV_INLINE
 dict_index_t*
 dict_table_get_last_index(
@@ -727,7 +724,7 @@ dict_table_get_last_index(
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /********************************************************************//**
 Gets the next index on the table.
-@return	index, NULL if none left */
+@return index, NULL if none left */
 UNIV_INLINE
 dict_index_t*
 dict_table_get_next_index(
@@ -756,90 +753,142 @@ do {								\
 
 /********************************************************************//**
 Check whether the index is the clustered index.
-@return	nonzero for clustered index, zero for other indexes */
+@return nonzero for clustered index, zero for other indexes */
 UNIV_INLINE
 ulint
 dict_index_is_clust(
 /*================*/
 	const dict_index_t*	index)	/*!< in: index */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Check if index is auto-generated clustered index.
+@param[in]	index	index
+
+@return true if index is auto-generated clustered index. */
+UNIV_INLINE
+bool
+dict_index_is_auto_gen_clust(
+	const dict_index_t*	index);
+
 /********************************************************************//**
 Check whether the index is unique.
-@return	nonzero for unique index, zero for other indexes */
+@return nonzero for unique index, zero for other indexes */
 UNIV_INLINE
 ulint
 dict_index_is_unique(
 /*=================*/
 	const dict_index_t*	index)	/*!< in: index */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
+/********************************************************************//**
+Check whether the index is a Spatial Index.
+@return	nonzero for Spatial Index, zero for other indexes */
+UNIV_INLINE
+ulint
+dict_index_is_spatial(
+/*==================*/
+	const dict_index_t*	index)	/*!< in: index */
+	MY_ATTRIBUTE((warn_unused_result));
+/** Check whether the index contains a virtual column.
+@param[in]	index	index
+@return	nonzero for index on virtual column, zero for other indexes */
+UNIV_INLINE
+ulint
+dict_index_has_virtual(
+	const dict_index_t*	index);
 /********************************************************************//**
 Check whether the index is the insert buffer tree.
-@return	nonzero for insert buffer, zero for other indexes */
+@return nonzero for insert buffer, zero for other indexes */
 UNIV_INLINE
 ulint
 dict_index_is_ibuf(
 /*===============*/
 	const dict_index_t*	index)	/*!< in: index */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /********************************************************************//**
 Check whether the index is a secondary index or the insert buffer tree.
-@return	nonzero for insert buffer, zero for other indexes */
+@return nonzero for insert buffer, zero for other indexes */
 UNIV_INLINE
 ulint
 dict_index_is_sec_or_ibuf(
 /*======================*/
 	const dict_index_t*	index)	/*!< in: index */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 
-/************************************************************************
-Gets the all the FTS indexes for the table. NOTE: must not be called for
-tables which do not have an FTS-index. */
-UNIV_INTERN
+/** Get all the FTS indexes on a table.
+@param[in]	table	table
+@param[out]	indexes	all FTS indexes on this table
+@return number of FTS indexes */
 ulint
 dict_table_get_all_fts_indexes(
-/*===========================*/
-				/* out: number of indexes collected */
-	dict_table_t*	table,	/* in: table */
-	ib_vector_t*	indexes)/* out: vector for collecting FTS indexes */
-	MY_ATTRIBUTE((nonnull));
+	const dict_table_t*	table,
+	ib_vector_t*		indexes);
+
 /********************************************************************//**
-Gets the number of user-defined columns in a table in the dictionary
-cache.
-@return	number of user-defined (e.g., not ROW_ID) columns of a table */
+Gets the number of user-defined non-virtual columns in a table in the
+dictionary cache.
+@return number of user-defined (e.g., not ROW_ID) non-virtual
+columns of a table */
 UNIV_INLINE
 ulint
 dict_table_get_n_user_cols(
 /*=======================*/
 	const dict_table_t*	table)	/*!< in: table */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
+/** Gets the number of user-defined virtual and non-virtual columns in a table
+in the dictionary cache.
+@param[in]	table	table
+@return number of user-defined (e.g., not ROW_ID) columns of a table */
+UNIV_INLINE
+ulint
+dict_table_get_n_tot_u_cols(
+	const dict_table_t*	table);
 /********************************************************************//**
-Gets the number of system columns in a table in the dictionary cache.
-@return	number of system (e.g., ROW_ID) columns of a table */
+Gets the number of system columns in a table.
+For intrinsic table on ROW_ID column is added for all other
+tables TRX_ID and ROLL_PTR are all also appeneded.
+@return number of system (e.g., ROW_ID) columns of a table */
 UNIV_INLINE
 ulint
 dict_table_get_n_sys_cols(
 /*======================*/
 	const dict_table_t*	table)	/*!< in: table */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /********************************************************************//**
-Gets the number of all columns (also system) in a table in the dictionary
-cache.
-@return	number of columns of a table */
+Gets the number of all non-virtual columns (also system) in a table
+in the dictionary cache.
+@return number of columns of a table */
 UNIV_INLINE
 ulint
 dict_table_get_n_cols(
 /*==================*/
 	const dict_table_t*	table)	/*!< in: table */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Gets the number of virtual columns in a table in the dictionary cache.
+@param[in]	table	the table to check
+@return number of virtual columns of a table */
+UNIV_INLINE
+ulint
+dict_table_get_n_v_cols(
+	const dict_table_t*	table);
+
+/** Check if a table has indexed virtual columns
+@param[in]	table	the table to check
+@return true is the table has indexed virtual columns */
+UNIV_INLINE
+bool
+dict_table_has_indexed_v_cols(
+	const dict_table_t*	table);
+
 /********************************************************************//**
 Gets the approximately estimated number of rows in the table.
-@return	estimated number of rows */
+@return estimated number of rows */
 UNIV_INLINE
 ib_uint64_t
 dict_table_get_n_rows(
 /*==================*/
 	const dict_table_t*	table)	/*!< in: table */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /********************************************************************//**
 Increment the number of rows in the table by one.
 Notice that this operation is not protected by any latch, the number is
@@ -860,10 +909,21 @@ dict_table_n_rows_dec(
 /*==================*/
 	dict_table_t*	table)	/*!< in/out: table */
 	MY_ATTRIBUTE((nonnull));
+
+
+/** Get nth virtual column
+@param[in]	table	target table
+@param[in]	col_nr	column number in MySQL Table definition
+@return dict_v_col_t ptr */
+dict_v_col_t*
+dict_table_get_nth_v_col_mysql(
+	const dict_table_t*	table,
+	ulint			col_nr);
+
 #ifdef UNIV_DEBUG
 /********************************************************************//**
 Gets the nth column of a table.
-@return	pointer to column object */
+@return pointer to column object */
 UNIV_INLINE
 dict_col_t*
 dict_table_get_nth_col(
@@ -871,9 +931,18 @@ dict_table_get_nth_col(
 	const dict_table_t*	table,	/*!< in: table */
 	ulint			pos)	/*!< in: position of column */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
+/** Gets the nth virtual column of a table.
+@param[in]	table	table
+@param[in]	pos	position of virtual column
+@return pointer to virtual column object */
+UNIV_INLINE
+dict_v_col_t*
+dict_table_get_nth_v_col(
+        const dict_table_t*	table,
+        ulint			pos);
 /********************************************************************//**
 Gets the given system column of a table.
-@return	pointer to column object */
+@return pointer to column object */
 UNIV_INLINE
 dict_col_t*
 dict_table_get_sys_col(
@@ -882,14 +951,17 @@ dict_table_get_sys_col(
 	ulint			sys)	/*!< in: DATA_ROW_ID, ... */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 #else /* UNIV_DEBUG */
-#define dict_table_get_nth_col(table, pos) \
+#define dict_table_get_nth_col(table, pos)	\
 ((table)->cols + (pos))
-#define dict_table_get_sys_col(table, sys) \
-((table)->cols + (table)->n_cols + (sys) - DATA_N_SYS_COLS)
+#define dict_table_get_sys_col(table, sys)	\
+((table)->cols + (table)->n_cols + (sys)	\
+ - (dict_table_get_n_sys_cols(table)))
+/* Get nth virtual columns */
+#define dict_table_get_nth_v_col(table, pos)	((table)->v_cols + (pos))
 #endif /* UNIV_DEBUG */
 /********************************************************************//**
 Gets the given system column number of a table.
-@return	column number */
+@return column number */
 UNIV_INLINE
 ulint
 dict_table_get_sys_col_no(
@@ -900,7 +972,7 @@ dict_table_get_sys_col_no(
 #ifndef UNIV_HOTBACKUP
 /********************************************************************//**
 Returns the minimum data size of an index record.
-@return	minimum data size in bytes */
+@return minimum data size in bytes */
 UNIV_INLINE
 ulint
 dict_index_get_min_size(
@@ -910,16 +982,17 @@ dict_index_get_min_size(
 #endif /* !UNIV_HOTBACKUP */
 /********************************************************************//**
 Check whether the table uses the compact page format.
-@return	TRUE if table uses the compact page format */
+@return TRUE if table uses the compact page format */
 UNIV_INLINE
 ibool
 dict_table_is_comp(
 /*===============*/
 	const dict_table_t*	table)	/*!< in: table */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
+
 /********************************************************************//**
 Determine the file format of a table.
-@return	file format version */
+@return file format version */
 UNIV_INLINE
 ulint
 dict_table_get_format(
@@ -928,67 +1001,97 @@ dict_table_get_format(
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /********************************************************************//**
 Determine the file format from a dict_table_t::flags.
-@return	file format version */
+@return file format version */
 UNIV_INLINE
 ulint
 dict_tf_get_format(
 /*===============*/
 	ulint		flags)		/*!< in: dict_table_t::flags */
 	MY_ATTRIBUTE((warn_unused_result));
-/********************************************************************//**
-Set the various values in a dict_table_t::flags pointer. */
+
+/** Set the various values in a dict_table_t::flags pointer.
+@param[in,out]	flags,		Pointer to a 4 byte Table Flags
+@param[in]	format,		File Format
+@param[in]	zip_ssize	Zip Shift Size
+@param[in]	use_data_dir	Table uses DATA DIRECTORY
+@param[in]	shared_space	Table uses a General Shared Tablespace */
 UNIV_INLINE
 void
 dict_tf_set(
-/*========*/
-	ulint*		flags,		/*!< in/out: table */
-	rec_format_t	format,		/*!< in: file format */
-	ulint		zip_ssize,	/*!< in: zip shift size */
-	bool		remote_path,	/*!< in: table uses DATA DIRECTORY
-					*/
-        bool		page_compressed,/*!< in: table uses page compressed
-					pages */
-	ulint		page_compression_level, /*!< in: table page compression
-						 level */
-	ulint		atomic_writes)  /*!< in: table atomic
-					writes option value*/
-	__attribute__((nonnull));
-/********************************************************************//**
-Convert a 32 bit integer table flags to the 32 bit integer that is
-written into the tablespace header at the offset FSP_SPACE_FLAGS and is
-also stored in the fil_space_t::flags field.  The following chart shows
-the translation of the low order bit.  Other bits are the same.
+	ulint*		flags,
+	rec_format_t	format,
+	ulint		zip_ssize,
+	bool		use_data_dir,
+	bool		shared_space,
+	bool		page_compressed,
+	ulint		page_compression_level,
+	ulint		atomic_writes);
+
+/** Initialize a dict_table_t::flags pointer.
+@param[in]	compact,	Table uses Compact or greater
+@param[in]	zip_ssize	Zip Shift Size (log 2 minus 9)
+@param[in]	atomic_blobs	Table uses Compressed or Dynamic
+@param[in]	data_dir	Table uses DATA DIRECTORY
+@param[in]	shared_space	Table uses a General Shared Tablespace */
+UNIV_INLINE
+ulint
+dict_tf_init(
+	bool		compact,
+	ulint		zip_ssize,
+	bool		atomic_blobs,
+	bool		data_dir,
+	bool		shared_space,
+	bool		page_compressed,
+	ulint		page_compression_level,
+	ulint		atomic_writes);
+
+/** Convert a 32 bit integer table flags to the 32 bit FSP Flags.
+Fsp Flags are written into the tablespace header at the offset
+FSP_SPACE_FLAGS and are also stored in the fil_space_t::flags field.
+The following chart shows the translation of the low order bit.
+Other bits are the same.
 ========================= Low order bit ==========================
                     | REDUNDANT | COMPACT | COMPRESSED | DYNAMIC
 dict_table_t::flags |     0     |    1    |     1      |    1
 fil_space_t::flags  |     0     |    0    |     1      |    1
 ==================================================================
-@return	tablespace flags (fil_space_t::flags) */
-UNIV_INLINE
+@param[in]	table_flags	dict_table_t::flags
+@param[in]	is_temp		whether the tablespace is temporary
+@param[in]	is_encrypted	whether the tablespace is encrypted
+@return tablespace flags (fil_space_t::flags) */
 ulint
 dict_tf_to_fsp_flags(
-/*=================*/
-	ulint	flags)	/*!< in: dict_table_t::flags */
+	ulint	table_flags,
+	bool	is_temp,
+	bool	is_encrypted = false)
 	MY_ATTRIBUTE((const));
-/********************************************************************//**
-Extract the compressed page size from table flags.
-@return	compressed page size, or 0 if not compressed */
-UNIV_INLINE
-ulint
-dict_tf_get_zip_size(
-/*=================*/
-	ulint	flags)			/*!< in: flags */
-	__attribute__((const));
 
-/********************************************************************//**
-Check whether the table uses the compressed compact page format.
-@return	compressed page size, or 0 if not compressed */
+/** Extract the page size from table flags.
+@param[in]	flags	flags
+@return compressed page size, or 0 if not compressed */
 UNIV_INLINE
+const page_size_t
+dict_tf_get_page_size(
+	ulint	flags)
+MY_ATTRIBUTE((const));
+
+/** Determine the extent size (in pages) for the given table
+@param[in]	table	the table whose extent size is being
+			calculated.
+@return extent size in pages (256, 128 or 64) */
 ulint
-dict_table_zip_size(
-/*================*/
-	const dict_table_t*	table)	/*!< in: table */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+dict_table_extent_size(
+	const dict_table_t*	table);
+
+/** Get the table page size.
+@param[in]	table	table
+@return compressed page size, or 0 if not compressed */
+UNIV_INLINE
+const page_size_t
+dict_table_page_size(
+	const dict_table_t*	table)
+	MY_ATTRIBUTE((warn_unused_result));
+
 #ifndef UNIV_HOTBACKUP
 /*********************************************************************//**
 Obtain exclusive locks on all index trees of the table. This is to prevent
@@ -1011,8 +1114,7 @@ dict_table_x_unlock_indexes(
 /********************************************************************//**
 Checks if a column is in the ordering columns of the clustered index of a
 table. Column prefixes are treated like whole columns.
-@return	TRUE if the column, or its prefix, is in the clustered key */
-UNIV_INTERN
+@return TRUE if the column, or its prefix, is in the clustered key */
 ibool
 dict_table_col_in_clustered_key(
 /*============================*/
@@ -1028,11 +1130,21 @@ dict_table_has_fts_index(
 /*=====================*/
 	dict_table_t*   table)		/*!< in: table */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
+/** Copies types of virtual columns contained in table to tuple and sets all
+fields of the tuple to the SQL NULL value.  This function should
+be called right after dtuple_create().
+@param[in,out]	tuple	data tuple
+@param[in]	table	table
+*/
+void
+dict_table_copy_v_types(
+	dtuple_t*		tuple,
+	const dict_table_t*	table);
+
 /*******************************************************************//**
 Copies types of columns contained in table to tuple and sets all
 fields of the tuple to the SQL NULL value.  This function should
 be called right after dtuple_create(). */
-UNIV_INTERN
 void
 dict_table_copy_types(
 /*==================*/
@@ -1043,7 +1155,6 @@ dict_table_copy_types(
 Wait until all the background threads of the given table have exited, i.e.,
 bg_threads == 0. Note: bg_threads_mutex must be reserved when
 calling this. */
-UNIV_INTERN
 void
 dict_table_wait_for_bg_threads_to_exit(
 /*===================================*/
@@ -1055,8 +1166,7 @@ dict_table_wait_for_bg_threads_to_exit(
 Looks for an index with the given id. NOTE that we do not reserve
 the dictionary mutex: this function is for emergency purposes like
 printing info of a corrupt database page!
-@return	index or NULL if not found from cache */
-UNIV_INTERN
+@return index or NULL if not found from cache */
 dict_index_t*
 dict_index_find_on_id_low(
 /*======================*/
@@ -1067,41 +1177,56 @@ Make room in the table cache by evicting an unused table. The unused table
 should not be part of FK relationship and currently not used in any user
 transaction. There is no guarantee that it will remove a table.
 @return number of tables evicted. */
-UNIV_INTERN
 ulint
 dict_make_room_in_cache(
 /*====================*/
 	ulint		max_tables,	/*!< in: max tables allowed in cache */
 	ulint		pct_check);	/*!< in: max percent to check */
-/**********************************************************************//**
-Adds an index to the dictionary cache.
-@return	DB_SUCCESS, DB_TOO_BIG_RECORD, or DB_CORRUPTION */
-UNIV_INTERN
+
+#define BIG_ROW_SIZE	1024
+
+/** Adds an index to the dictionary cache.
+@param[in]	table	table on which the index is
+@param[in]	index	index; NOTE! The index memory
+			object is freed in this function!
+@param[in]	page_no	root page number of the index
+@param[in]	strict	TRUE=refuse to create the index
+			if records could be too big to fit in
+			an B-tree page
+@return DB_SUCCESS, DB_TOO_BIG_RECORD, or DB_CORRUPTION */
 dberr_t
 dict_index_add_to_cache(
-/*====================*/
-	dict_table_t*	table,	/*!< in: table on which the index is */
-	dict_index_t*	index,	/*!< in, own: index; NOTE! The index memory
-				object is freed in this function! */
-	ulint		page_no,/*!< in: root page number of the index */
-	ibool		strict)	/*!< in: TRUE=refuse to create the index
-				if records could be too big to fit in
-				an B-tree page */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
-/**********************************************************************//**
-Removes an index from the dictionary cache. */
-UNIV_INTERN
-void
-dict_index_remove_from_cache(
-/*=========================*/
-	dict_table_t*	table,	/*!< in/out: table */
-	dict_index_t*	index)	/*!< in, own: index */
-	MY_ATTRIBUTE((nonnull));
+	dict_table_t*	table,
+	dict_index_t*	index,
+	ulint		page_no,
+	ibool		strict)
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Adds an index to the dictionary cache, with possible indexing newly
+added column.
+@param[in]	table	table on which the index is
+@param[in]	index	index; NOTE! The index memory
+			object is freed in this function!
+@param[in]	add_v	new virtual column that being added along with
+			an add index call
+@param[in]	page_no	root page number of the index
+@param[in]	strict	TRUE=refuse to create the index
+			if records could be too big to fit in
+			an B-tree page
+@return DB_SUCCESS, DB_TOO_BIG_RECORD, or DB_CORRUPTION */
+dberr_t
+dict_index_add_to_cache_w_vcol(
+	dict_table_t*		table,
+	dict_index_t*		index,
+	const dict_add_v_col_t* add_v,
+	ulint			page_no,
+	ibool			strict)
+	MY_ATTRIBUTE((warn_unused_result));
 #endif /* !UNIV_HOTBACKUP */
 /********************************************************************//**
 Gets the number of fields in the internal representation of an index,
 including fields added by the dictionary system.
-@return	number of fields */
+@return number of fields */
 UNIV_INLINE
 ulint
 dict_index_get_n_fields(
@@ -1115,7 +1240,7 @@ Gets the number of fields in the internal representation of an index
 that uniquely determine the position of an index entry in the index, if
 we do not take multiversioning into account: in the B-tree use the value
 returned by dict_index_get_n_unique_in_tree.
-@return	number of fields */
+@return number of fields */
 UNIV_INLINE
 ulint
 dict_index_get_n_unique(
@@ -1127,7 +1252,7 @@ dict_index_get_n_unique(
 Gets the number of fields in the internal representation of an index
 which uniquely determine the position of an index entry in the index, if
 we also take multiversioning into account.
-@return	number of fields */
+@return number of fields */
 UNIV_INLINE
 ulint
 dict_index_get_n_unique_in_tree(
@@ -1135,12 +1260,28 @@ dict_index_get_n_unique_in_tree(
 	const dict_index_t*	index)	/*!< in: an internal representation
 					of index (in the dictionary cache) */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
+
+/** The number of fields in the nonleaf page of spatial index, except
+the page no field. */
+#define DICT_INDEX_SPATIAL_NODEPTR_SIZE	1
+/**
+Gets the number of fields on nonleaf page level in the internal representation
+of an index which uniquely determine the position of an index entry in the
+index, if we also take multiversioning into account. Note, it doesn't
+include page no field.
+@param[in]	index	index
+@return number of fields */
+UNIV_INLINE
+ulint
+dict_index_get_n_unique_in_tree_nonleaf(
+	const dict_index_t*	index)
+	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /********************************************************************//**
 Gets the number of user-defined ordering fields in the index. In the internal
 representation we add the row id to the ordering fields to make all indexes
 unique, but this function returns the number of fields the user defined
 in the index as ordering fields.
-@return	number of fields */
+@return number of fields */
 UNIV_INLINE
 ulint
 dict_index_get_n_ordering_defined_by_user(
@@ -1151,7 +1292,7 @@ dict_index_get_n_ordering_defined_by_user(
 #ifdef UNIV_DEBUG
 /********************************************************************//**
 Gets the nth field of an index.
-@return	pointer to field object */
+@return pointer to field object */
 UNIV_INLINE
 dict_field_t*
 dict_index_get_nth_field(
@@ -1164,7 +1305,7 @@ dict_index_get_nth_field(
 #endif /* UNIV_DEBUG */
 /********************************************************************//**
 Gets pointer to the nth column in an index.
-@return	column */
+@return column */
 UNIV_INLINE
 const dict_col_t*
 dict_index_get_nth_col(
@@ -1174,7 +1315,7 @@ dict_index_get_nth_col(
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /********************************************************************//**
 Gets the column number of the nth field in an index.
-@return	column number */
+@return column number */
 UNIV_INLINE
 ulint
 dict_index_get_nth_col_no(
@@ -1193,32 +1334,41 @@ dict_index_get_nth_col_pos(
 	const dict_index_t*	index,	/*!< in: index */
 	ulint			n,	/*!< in: column number */
 	ulint*			prefix_col_pos) /*!< out: col num if prefix */
-	__attribute__((nonnull(1), warn_unused_result));
-/********************************************************************//**
-Looks for column n in an index.
+	MY_ATTRIBUTE((nonnull(1), warn_unused_result));
+
+/** Looks for column n in an index.
+@param[in]	index		index
+@param[in]	n		column number
+@param[in]	inc_prefix	true=consider column prefixes too
+@param[in]	is_virtual	true==virtual column
 @return position in internal representation of the index;
 ULINT_UNDEFINED if not contained */
-UNIV_INTERN
 ulint
 dict_index_get_nth_col_or_prefix_pos(
-/*=================================*/
 	const dict_index_t*	index,		/*!< in: index */
 	ulint			n,		/*!< in: column number */
-	ibool			inc_prefix,	/*!< in: TRUE=consider
+	bool			inc_prefix,	/*!< in: TRUE=consider
 						column prefixes too */
-	ulint*			prefix_col_pos)	/*!< out: col num if prefix */
+	bool			is_virtual,	/*!< in: is a virtual column
+						*/
+	ulint*			prefix_col_pos) /*!< out: col num if prefix
+						*/
+	__attribute__((warn_unused_result));
 
-	__attribute__((nonnull(1), warn_unused_result));
 /********************************************************************//**
 Returns TRUE if the index contains a column or a prefix of that column.
-@return	TRUE if contains the column or its prefix */
-UNIV_INTERN
+@param[in]	index		index
+@param[in]	n		column number
+@param[in]	is_virtual	whether it is a virtual col
+@return TRUE if contains the column or its prefix */
 ibool
 dict_index_contains_col_or_prefix(
 /*==============================*/
 	const dict_index_t*	index,	/*!< in: index */
-	ulint			n)	/*!< in: column number */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	ulint			n,	/*!< in: column number */
+	bool			is_virtual)
+					/*!< in: whether it is a virtual col */
+	MY_ATTRIBUTE((warn_unused_result));
 /********************************************************************//**
 Looks for a matching field in an index. The column has to be the same. The
 column in index must be complete, or must contain a prefix longer than the
@@ -1226,7 +1376,6 @@ column in index2. That is, we must be able to construct the prefix in index2
 from the prefix in index.
 @return position in internal representation of the index;
 ULINT_UNDEFINED if not contained */
-UNIV_INTERN
 ulint
 dict_index_get_nth_field_pos(
 /*=========================*/
@@ -1236,17 +1385,17 @@ dict_index_get_nth_field_pos(
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /********************************************************************//**
 Looks for column n position in the clustered index.
-@return	position in internal representation of the clustered index */
-UNIV_INTERN
+@return position in internal representation of the clustered index */
 ulint
 dict_table_get_nth_col_pos(
 /*=======================*/
 	const dict_table_t*	table,	/*!< in: table */
-	ulint			n)	/*!< in: column number */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	ulint			n,	/*!< in: column number */
+	ulint*			prefix_col_pos) /*!< out: col num if prefix */
+	MY_ATTRIBUTE((nonnull(1), warn_unused_result));
 /********************************************************************//**
 Returns the position of a system column in an index.
-@return	position, ULINT_UNDEFINED if not contained */
+@return position, ULINT_UNDEFINED if not contained */
 UNIV_INLINE
 ulint
 dict_index_get_sys_col_pos(
@@ -1256,7 +1405,6 @@ dict_index_get_sys_col_pos(
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /*******************************************************************//**
 Adds a column to index. */
-UNIV_INTERN
 void
 dict_index_add_col(
 /*===============*/
@@ -1268,7 +1416,6 @@ dict_index_add_col(
 #ifndef UNIV_HOTBACKUP
 /*******************************************************************//**
 Copies types of fields contained in index to tuple. */
-UNIV_INTERN
 void
 dict_index_copy_types(
 /*==================*/
@@ -1280,7 +1427,7 @@ dict_index_copy_types(
 #endif /* !UNIV_HOTBACKUP */
 /*********************************************************************//**
 Gets the field column.
-@return	field->col, pointer to the table column */
+@return field->col, pointer to the table column */
 UNIV_INLINE
 const dict_col_t*
 dict_field_get_col(
@@ -1291,8 +1438,7 @@ dict_field_get_col(
 /**********************************************************************//**
 Returns an index object if it is found in the dictionary cache.
 Assumes that dict_sys->mutex is already being held.
-@return	index, NULL if not found */
-UNIV_INTERN
+@return index, NULL if not found */
 dict_index_t*
 dict_index_get_if_in_cache_low(
 /*===========================*/
@@ -1301,8 +1447,7 @@ dict_index_get_if_in_cache_low(
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 /**********************************************************************//**
 Returns an index object if it is found in the dictionary cache.
-@return	index, NULL if not found */
-UNIV_INTERN
+@return index, NULL if not found */
 dict_index_t*
 dict_index_get_if_in_cache(
 /*=======================*/
@@ -1313,8 +1458,7 @@ dict_index_get_if_in_cache(
 /**********************************************************************//**
 Checks that a tuple has n_fields_cmp value in a sensible range, so that
 no comparison can occur with the page number field in a node pointer.
-@return	TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
 ibool
 dict_index_check_search_tuple(
 /*==========================*/
@@ -1332,7 +1476,6 @@ enum check_name {
 };
 /**********************************************************************//**
 Check for duplicate index entries in a table [using the index name] */
-UNIV_INTERN
 void
 dict_table_check_for_dup_indexes(
 /*=============================*/
@@ -1344,8 +1487,7 @@ dict_table_check_for_dup_indexes(
 #endif /* UNIV_DEBUG */
 /**********************************************************************//**
 Builds a node pointer out of a physical record and a page number.
-@return	own: node pointer */
-UNIV_INTERN
+@return own: node pointer */
 dtuple_t*
 dict_index_build_node_ptr(
 /*======================*/
@@ -1362,8 +1504,7 @@ dict_index_build_node_ptr(
 /**********************************************************************//**
 Copies an initial segment of a physical record, long enough to specify an
 index entry uniquely.
-@return	pointer to the prefix record */
-UNIV_INTERN
+@return pointer to the prefix record */
 rec_t*
 dict_index_copy_rec_order_prefix(
 /*=============================*/
@@ -1377,8 +1518,7 @@ dict_index_copy_rec_order_prefix(
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /**********************************************************************//**
 Builds a typed data tuple out of a physical record.
-@return	own: data tuple */
-UNIV_INTERN
+@return own: data tuple */
 dtuple_t*
 dict_index_build_data_tuple(
 /*========================*/
@@ -1389,7 +1529,7 @@ dict_index_build_data_tuple(
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /*********************************************************************//**
 Gets the space id of the root of the index tree.
-@return	space id */
+@return space id */
 UNIV_INLINE
 ulint
 dict_index_get_space(
@@ -1407,7 +1547,7 @@ dict_index_set_space(
 	MY_ATTRIBUTE((nonnull));
 /*********************************************************************//**
 Gets the page number of the root of the index tree.
-@return	page number */
+@return page number */
 UNIV_INLINE
 ulint
 dict_index_get_page(
@@ -1416,7 +1556,7 @@ dict_index_get_page(
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /*********************************************************************//**
 Gets the read-write lock of the index tree.
-@return	read-write lock */
+@return read-write lock */
 UNIV_INLINE
 rw_lock_t*
 dict_index_get_lock(
@@ -1427,7 +1567,7 @@ dict_index_get_lock(
 Returns free space reserved for future updates of records. This is
 relevant only in the case of many consecutive inserts, as updates
 which make the records bigger might fragment the index.
-@return	number of free bytes on page, reserved for updates */
+@return number of free bytes on page, reserved for updates */
 UNIV_INLINE
 ulint
 dict_index_get_space_reserve(void);
@@ -1468,7 +1608,6 @@ dict_index_is_online_ddl(
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /*********************************************************************//**
 Calculates the minimum record length in an index. */
-UNIV_INTERN
 ulint
 dict_index_calc_min_rec_len(
 /*========================*/
@@ -1476,7 +1615,6 @@ dict_index_calc_min_rec_len(
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /********************************************************************//**
 Reserves the dictionary system mutex for MySQL. */
-UNIV_INTERN
 void
 dict_mutex_enter_for_mysql_func(const char * file, ulint line);
 /*============================*/
@@ -1486,7 +1624,6 @@ dict_mutex_enter_for_mysql_func(const char * file, ulint line);
 
 /********************************************************************//**
 Releases the dictionary system mutex for MySQL. */
-UNIV_INTERN
 void
 dict_mutex_exit_for_mysql(void);
 /*===========================*/
@@ -1497,7 +1634,6 @@ or from a thread that has not shared the table object with other threads.
 @param[in,out]	table	table whose stats latch to create
 @param[in]	enabled	if false then the latch is disabled
 and dict_table_stats_lock()/unlock() become noop on this table. */
-
 void
 dict_table_stats_latch_create(
 	dict_table_t*	table,
@@ -1507,33 +1643,29 @@ dict_table_stats_latch_create(
 This function is only called from either single threaded environment
 or from a thread that has not shared the table object with other threads.
 @param[in,out]	table	table whose stats latch to destroy */
-
 void
 dict_table_stats_latch_destroy(
 	dict_table_t*	table);
 
-/**********************************************************************//**
-Lock the appropriate latch to protect a given table's statistics.
-table->id is used to pick the corresponding latch from a global array of
-latches. */
-UNIV_INTERN
+/** Lock the appropriate latch to protect a given table's statistics.
+@param[in]	table		table whose stats to lock
+@param[in]	latch_mode	RW_S_LATCH or RW_X_LATCH */
 void
 dict_table_stats_lock(
-/*==================*/
-	dict_table_t*	table,		/*!< in: table */
-	ulint		latch_mode);	/*!< in: RW_S_LATCH or RW_X_LATCH */
-/**********************************************************************//**
-Unlock the latch that has been locked by dict_table_stats_lock() */
-UNIV_INTERN
+	dict_table_t*	table,
+	ulint		latch_mode);
+
+/** Unlock the latch that has been locked by dict_table_stats_lock().
+@param[in]	table		table whose stats to unlock
+@param[in]	latch_mode	RW_S_LATCH or RW_X_LATCH */
 void
 dict_table_stats_unlock(
-/*====================*/
-	dict_table_t*	table,		/*!< in: table */
-	ulint		latch_mode);	/*!< in: RW_S_LATCH or RW_X_LATCH */
+	dict_table_t*	table,
+	ulint		latch_mode);
+
 /********************************************************************//**
 Checks if the database name in two table names is the same.
-@return	TRUE if same db name */
-UNIV_INTERN
+@return TRUE if same db name */
 ibool
 dict_tables_have_same_db(
 /*=====================*/
@@ -1542,46 +1674,37 @@ dict_tables_have_same_db(
 	const char*	name2)	/*!< in: table name in the form
 				dbname '/' tablename */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Removes an index from the cache */
-UNIV_INTERN
-void
-dict_index_remove_from_cache(
-/*=========================*/
-	dict_table_t*	table,	/*!< in/out: table */
-	dict_index_t*	index)	/*!< in, own: index */
-	MY_ATTRIBUTE((nonnull));
-/**********************************************************************//**
-Get index by name
-@return	index, NULL if does not exist */
-UNIV_INTERN
+
+/** Get an index by name.
+@param[in]	table		the table where to look for the index
+@param[in]	name		the index name to look for
+@param[in]	committed	true=search for committed,
+false=search for uncommitted
+@return index, NULL if does not exist */
 dict_index_t*
 dict_table_get_index_on_name(
-/*=========================*/
-	dict_table_t*	table,	/*!< in: table */
-	const char*	name)	/*!< in: name of the index to find */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
-/**********************************************************************//**
-Looks for an index with the given id given a table instance.
-@return	index or NULL */
-UNIV_INTERN
-dict_index_t*
-dict_table_find_index_on_id(
-/*========================*/
-	const dict_table_t*	table,	/*!< in: table instance */
-	index_id_t		id)	/*!< in: index id */
-	__attribute__((nonnull, warn_unused_result));
-/**********************************************************************//**
-In case there is more than one index with the same name return the index
-with the min(id).
-@return	index, NULL if does not exist */
-UNIV_INTERN
-dict_index_t*
-dict_table_get_index_on_name_and_min_id(
-/*====================================*/
-	dict_table_t*	table,	/*!< in: table */
-	const char*	name)	/*!< in: name of the index to find */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	dict_table_t*	table,
+	const char*	name,
+	bool		committed=true)
+		MY_ATTRIBUTE((warn_unused_result));
+
+/** Get an index by name.
+@param[in]	table		the table where to look for the index
+@param[in]	name		the index name to look for
+@param[in]	committed	true=search for committed,
+false=search for uncommitted
+@return index, NULL if does not exist */
+inline
+const dict_index_t*
+dict_table_get_index_on_name(
+	const dict_table_t*	table,
+	const char*		name,
+	bool			committed=true)
+{
+	return(dict_table_get_index_on_name(
+		       const_cast<dict_table_t*>(table), name, committed));
+}
+
 /***************************************************************
 Check whether a column exists in an FTS index. */
 UNIV_INLINE
@@ -1591,27 +1714,39 @@ dict_table_is_fts_column(
 				/* out: ULINT_UNDEFINED if no match else
 				the offset within the vector */
 	ib_vector_t*	indexes,/* in: vector containing only FTS indexes */
-	ulint		col_no)	/* in: col number to search for */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	ulint		col_no,	/* in: col number to search for */
+	bool		is_virtual)/*!< in: whether it is a virtual column */
+	MY_ATTRIBUTE((warn_unused_result));
+/**********************************************************************//**
+Prevent table eviction by moving a table to the non-LRU list from the
+LRU list if it is not already there. */
+UNIV_INLINE
+void
+dict_table_prevent_eviction(
+/*========================*/
+	dict_table_t*	table)	/*!< in: table to prevent eviction */
+	MY_ATTRIBUTE((nonnull));
+
 /**********************************************************************//**
 Move a table to the non LRU end of the LRU list. */
-UNIV_INTERN
 void
 dict_table_move_from_lru_to_non_lru(
 /*================================*/
 	dict_table_t*	table)	/*!< in: table to move from LRU to non-LRU */
 	MY_ATTRIBUTE((nonnull));
-/**********************************************************************//**
-Move a table to the LRU list from the non-LRU list. */
-UNIV_INTERN
-void
-dict_table_move_from_non_lru_to_lru(
-/*================================*/
-	dict_table_t*	table)	/*!< in: table to move from non-LRU to LRU */
-	MY_ATTRIBUTE((nonnull));
+
+/** Looks for an index with the given id given a table instance.
+@param[in]	table	table instance
+@param[in]	id	index id
+@return index or NULL */
+dict_index_t*
+dict_table_find_index_on_id(
+	const dict_table_t*	table,
+	index_id_t		id)
+	MY_ATTRIBUTE((nonnull(1)));
+
 /**********************************************************************//**
 Move to the most recently used segment of the LRU list. */
-UNIV_INTERN
 void
 dict_move_to_mru(
 /*=============*/
@@ -1625,19 +1760,20 @@ constraint */
 
 /* Buffers for storing detailed information about the latest foreign key
 and unique key errors */
-extern FILE*	dict_foreign_err_file;
-extern ib_mutex_t	dict_foreign_err_mutex; /* mutex protecting the buffers */
+extern FILE*		dict_foreign_err_file;
+extern ib_mutex_t	dict_foreign_err_mutex; /* mutex protecting the
+						foreign key error messages */
 
 /** the dictionary system */
 extern dict_sys_t*	dict_sys;
 /** the data dictionary rw-latch protecting dict_sys */
-extern rw_lock_t	dict_operation_lock;
+extern rw_lock_t*	dict_operation_lock;
 
 typedef std::map<table_id_t, ib_uint64_t> autoinc_map_t;
 
 /* Dictionary system struct */
 struct dict_sys_t{
-	ib_mutex_t		mutex;		/*!< mutex protecting the data
+	DictSysMutex	mutex;		/*!< mutex protecting the data
 					dictionary; protects also the
 					disk-based dictionary system tables;
 					this mutex serializes CREATE TABLE
@@ -1654,13 +1790,14 @@ struct dict_sys_t{
 					on name */
 	hash_table_t*	table_id_hash;	/*!< hash table of the tables, based
 					on id */
-	ulint		size;		/*!< varying space in bytes occupied
+	lint		size;		/*!< varying space in bytes occupied
 					by the data dictionary table and
 					index objects */
 	dict_table_t*	sys_tables;	/*!< SYS_TABLES table */
 	dict_table_t*	sys_columns;	/*!< SYS_COLUMNS table */
 	dict_table_t*	sys_indexes;	/*!< SYS_INDEXES table */
 	dict_table_t*	sys_fields;	/*!< SYS_FIELDS table */
+	dict_table_t*	sys_virtual;	/*!< SYS_VIRTUAL table */
 
 	/*=============================*/
 	UT_LIST_BASE_NODE_T(dict_table_t)
@@ -1676,12 +1813,9 @@ struct dict_sys_t{
 
 /** dummy index for ROW_FORMAT=REDUNDANT supremum and infimum records */
 extern dict_index_t*	dict_ind_redundant;
-/** dummy index for ROW_FORMAT=COMPACT supremum and infimum records */
-extern dict_index_t*	dict_ind_compact;
 
 /**********************************************************************//**
-Inits dict_ind_redundant and dict_ind_compact. */
-UNIV_INTERN
+Inits dict_ind_redundant. */
 void
 dict_ind_init(void);
 /*===============*/
@@ -1701,7 +1835,7 @@ struct dict_col_meta_t {
 };
 
 /* This struct is used for checking whether a given table exists and
-whether it has a predefined schema (number of columns and columns names
+whether it has a predefined schema (number of columns and column names
 and types) */
 struct dict_table_schema_t {
 	const char*		table_name;	/* the name of the table whose
@@ -1729,7 +1863,6 @@ types. The order of the columns does not matter.
 The caller must own the dictionary mutex.
 dict_table_schema_check() @{
 @return DB_SUCCESS if the table exists and contains the necessary columns */
-UNIV_INTERN
 dberr_t
 dict_table_schema_check(
 /*====================*/
@@ -1748,7 +1881,6 @@ Converts a database and table name from filesystem encoding
 (e.g. d@i1b/a@q1b@1Kc, same format as used in dict_table_t::name) in two
 strings in UTF8 encoding (e.g. dцb and aюbØc). The output buffers must be
 at least MAX_DB_UTF8_LEN and MAX_TABLE_UTF8_LEN bytes. */
-UNIV_INTERN
 void
 dict_fs2utf8(
 /*=========*/
@@ -1760,16 +1892,19 @@ dict_fs2utf8(
 	size_t		table_utf8_size)/*!< in: table_utf8 size */
 	MY_ATTRIBUTE((nonnull));
 
+/** Resize the hash tables besed on the current buffer pool size. */
+void
+dict_resize();
+
 /**********************************************************************//**
 Closes the data dictionary module. */
-UNIV_INTERN
 void
 dict_close(void);
 /*============*/
 #ifndef UNIV_HOTBACKUP
 /**********************************************************************//**
 Check whether the table is corrupted.
-@return	nonzero for corrupted table, zero for valid tables */
+@return nonzero for corrupted table, zero for valid tables */
 UNIV_INLINE
 ulint
 dict_table_is_corrupted(
@@ -1779,7 +1914,7 @@ dict_table_is_corrupted(
 
 /**********************************************************************//**
 Check whether the index is corrupted.
-@return	nonzero for corrupted index, zero for valid indexes */
+@return nonzero for corrupted index, zero for valid indexes */
 UNIV_INLINE
 ulint
 dict_index_is_corrupted(
@@ -1791,7 +1926,6 @@ dict_index_is_corrupted(
 /**********************************************************************//**
 Flags an index and table corrupted both in the data dictionary cache
 and in the system table SYS_INDEXES. */
-UNIV_INTERN
 void
 dict_set_corrupted(
 /*===============*/
@@ -1800,63 +1934,164 @@ dict_set_corrupted(
 	const char*	ctx)	/*!< in: context */
 	UNIV_COLD MY_ATTRIBUTE((nonnull));
 
-/**********************************************************************//**
-Flags an index corrupted in the data dictionary cache only. This
+/** Flags an index corrupted in the data dictionary cache only. This
 is used mostly to mark a corrupted index when index's own dictionary
-is corrupted, and we force to load such index for repair purpose */
-UNIV_INTERN
+is corrupted, and we force to load such index for repair purpose
+@param[in,out]	index	index that is corrupted */
 void
 dict_set_corrupted_index_cache_only(
-/*================================*/
-	dict_index_t*	index,		/*!< in/out: index */
-	dict_table_t*	table)		/*!< in/out: table */
-	MY_ATTRIBUTE((nonnull));
+	dict_index_t*	index);
 
 /**********************************************************************//**
 Flags a table with specified space_id corrupted in the table dictionary
 cache.
 @return TRUE if successful */
-UNIV_INTERN
 ibool
 dict_set_corrupted_by_space(
 /*========================*/
 	ulint		space_id);	/*!< in: space ID */
 
-/********************************************************************//**
-Validate the table flags.
-@return	true if valid. */
+/** Sets merge_threshold in the SYS_INDEXES
+@param[in,out]	index		index
+@param[in]	merge_threshold	value to set */
+void
+dict_index_set_merge_threshold(
+	dict_index_t*	index,
+	ulint		merge_threshold);
+
+#ifdef UNIV_DEBUG
+/** Sets merge_threshold for all indexes in dictionary cache for debug.
+@param[in]	merge_threshold_all	value to set for all indexes */
+void
+dict_set_merge_threshold_all_debug(
+	uint	merge_threshold_all);
+#endif /* UNIV_DEBUG */
+
+/** Validate the table flags.
+@param[in]	flags	Table flags
+@return true if valid. */
 UNIV_INLINE
 bool
 dict_tf_is_valid(
-/*=============*/
-	ulint		flags)		/*!< in: table flags */
-	MY_ATTRIBUTE((warn_unused_result));
+	ulint	flags);
+
+/** Validate both table flags and table flags2 and make sure they
+are compatible.
+@param[in]	flags	Table flags
+@param[in]	flags2	Table flags2
+@return true if valid. */
+UNIV_INLINE
+bool
+dict_tf2_is_valid(
+	ulint	flags,
+	ulint	flags2);
 
 /********************************************************************//**
 Check if the tablespace for the table has been discarded.
-@return	true if the tablespace has been discarded. */
+@return true if the tablespace has been discarded. */
 UNIV_INLINE
 bool
 dict_table_is_discarded(
 /*====================*/
 	const dict_table_t*	table)	/*!< in: table to check */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 
 /********************************************************************//**
 Check if it is a temporary table.
-@return	true if temporary table flag is set. */
+@return true if temporary table flag is set. */
 UNIV_INLINE
 bool
 dict_table_is_temporary(
 /*====================*/
 	const dict_table_t*	table)	/*!< in: table to check */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
+
+/********************************************************************//**
+Check if it is a encrypted table.
+@return true if table encryption flag is set. */
+UNIV_INLINE
+bool
+dict_table_is_encrypted(
+/*====================*/
+	const dict_table_t*	table)	/*!< in: table to check */
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Check whether the table is intrinsic.
+An intrinsic table is a special kind of temporary table that
+is invisible to the end user.  It is created internally by the MySQL server
+layer or other module connected to InnoDB in order to gather and use data
+as part of a larger task.  Since access to it must be as fast as possible,
+it does not need UNDO semantics, system fields DB_TRX_ID & DB_ROLL_PTR,
+doublewrite, checksum, insert buffer, use of the shared data dictionary,
+locking, or even a transaction.  In short, these are not ACID tables at all,
+just temporary
+
+@param[in]	table	table to check
+@return true if intrinsic table flag is set. */
+UNIV_INLINE
+bool
+dict_table_is_intrinsic(
+	const dict_table_t*	table)
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Check if the table is in a shared tablespace (System or General).
+@param[in]	id	Space ID to check
+@return true if id is a shared tablespace, false if not. */
+UNIV_INLINE
+bool
+dict_table_in_shared_tablespace(
+	const dict_table_t*	table)
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Check whether locking is disabled for this table.
+Currently this is done for intrinsic table as their visibility is limited
+to the connection only.
+
+@param[in]	table	table to check
+@return true if locking is disabled. */
+UNIV_INLINE
+bool
+dict_table_is_locking_disabled(
+	const dict_table_t*	table)
+	MY_ATTRIBUTE((warn_unused_result));
+
+/********************************************************************//**
+Turn-off redo-logging if temporary table. */
+UNIV_INLINE
+void
+dict_disable_redo_if_temporary(
+/*===========================*/
+	const dict_table_t*	table,	/*!< in: table to check */
+	mtr_t*			mtr);	/*!< out: mini-transaction */
+
+/** Get table session row-id and increment the row-id counter for next use.
+@param[in,out]	table	table handler
+@return next table local row-id. */
+UNIV_INLINE
+row_id_t
+dict_table_get_next_table_sess_row_id(
+	dict_table_t*		table);
+
+/** Get table session trx-id and increment the trx-id counter for next use.
+@param[in,out]	table	table handler
+@return next table local trx-id. */
+UNIV_INLINE
+trx_id_t
+dict_table_get_next_table_sess_trx_id(
+	dict_table_t*		table);
+
+/** Get current session trx-id.
+@param[in]	table	table handler
+@return table local trx-id. */
+UNIV_INLINE
+trx_id_t
+dict_table_get_curr_table_sess_trx_id(
+	const dict_table_t*	table);
 
 #ifndef UNIV_HOTBACKUP
 /*********************************************************************//**
 This function should be called whenever a page is successfully
 compressed. Updates the compression padding information. */
-UNIV_INTERN
 void
 dict_index_zip_success(
 /*===================*/
@@ -1865,7 +2100,6 @@ dict_index_zip_success(
 /*********************************************************************//**
 This function should be called whenever a page compression attempt
 fails. Updates the compression padding information. */
-UNIV_INTERN
 void
 dict_index_zip_failure(
 /*===================*/
@@ -1874,7 +2108,6 @@ dict_index_zip_failure(
 /*********************************************************************//**
 Return the optimal page size, for which page will likely compress.
 @return page size beyond which page may not compress*/
-UNIV_INTERN
 ulint
 dict_index_zip_pad_optimal_page_size(
 /*=================================*/
@@ -1884,11 +2117,18 @@ dict_index_zip_pad_optimal_page_size(
 /*************************************************************//**
 Convert table flag to row format string.
 @return row format name */
-UNIV_INTERN
 const char*
 dict_tf_to_row_format_string(
 /*=========================*/
 	ulint	table_flag);		/*!< in: row format setting */
+/****************************************************************//**
+Return maximum size of the node pointer record.
+@return maximum size of the record in bytes */
+ulint
+dict_index_node_ptr_max_size(
+/*=========================*/
+	const dict_index_t*	index)	/*!< in: index */
+	MY_ATTRIBUTE((warn_unused_result));
 /*****************************************************************//**
 Get index by first field of the index
 @return index which is having first field matches
@@ -1898,8 +2138,68 @@ dict_index_t*
 dict_table_get_index_on_first_col(
 /*==============================*/
 	const dict_table_t*	table,		/*!< in: table */
-	ulint			col_index);	/*!< in: position of column
+	ulint			col_index,	/*!< in: position of column
 						in table */
+	const char*		field_name);	/*!< in: field name */
+/** Check if a column is a virtual column
+@param[in]	col	column
+@return true if it is a virtual column, false otherwise */
+UNIV_INLINE
+bool
+dict_col_is_virtual(
+	const dict_col_t*	col);
+
+/** encode number of columns and number of virtual columns in one
+4 bytes value. We could do this because the number of columns in
+InnoDB is limited to 1017
+@param[in]	n_col	number of non-virtual column
+@param[in]	n_v_col	number of virtual column
+@return encoded value */
+UNIV_INLINE
+ulint
+dict_table_encode_n_col(
+	ulint	n_col,
+	ulint	n_v_col);
+
+/** Decode number of virtual and non-virtual columns in one 4 bytes value.
+@param[in]	encoded	encoded value
+@param[in,out]	n_col	number of non-virtual column
+@param[in,out]	n_v_col	number of virtual column */
+UNIV_INLINE
+void
+dict_table_decode_n_col(
+	ulint	encoded,
+	ulint*	n_col,
+	ulint*	n_v_col);
+
+/** Look for any dictionary objects that are found in the given tablespace.
+@param[in]	space_id	Tablespace ID to search for.
+@return true if tablespace is empty. */
+bool
+dict_space_is_empty(
+	ulint	space_id);
+
+/** Find the space_id for the given name in sys_tablespaces.
+@param[in]	name	Tablespace name to search for.
+@return the tablespace ID. */
+ulint
+dict_space_get_id(
+	const char*	name);
+
+/** Free the virtual column template
+@param[in,out]	vc_templ	virtual column template */
+UNIV_INLINE
+void
+dict_free_vc_templ(
+	dict_vcol_templ_t*	vc_templ);
+
+/** Check whether the table have virtual index.
+@param[in]	table	InnoDB table
+@return true if the table have virtual index, false otherwise. */
+UNIV_INLINE
+bool
+dict_table_have_virtual_index(
+	dict_table_t*	table);
 
 #endif /* !UNIV_HOTBACKUP */
 
diff --git a/storage/innobase/include/dict0dict.ic b/storage/innobase/include/dict0dict.ic
index 3d2f0dff0da..8165263c95c 100644
--- a/storage/innobase/include/dict0dict.ic
+++ b/storage/innobase/include/dict0dict.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2016, Oracle and/or its affiliates
+Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2013, 2016, MariaDB Corporation
 
 This program is free software; you can redistribute it and/or modify it under
@@ -30,7 +30,8 @@ Created 1/8/1996 Heikki Tuuri
 #include "rem0types.h"
 #include "fsp0fsp.h"
 #include "srv0srv.h"
-#include "sync0rw.h" /* RW_S_LATCH */
+#include "sync0rw.h"
+#include "fsp0sysspace.h"
 
 /*********************************************************************//**
 Gets the minimum number of bytes per character.
@@ -89,12 +90,23 @@ dict_col_copy_type(
 	type->len = col->len;
 	type->mbminmaxlen = col->mbminmaxlen;
 }
+/** Check if a column is a virtual column
+@param[in]      col     column
+@return true if it is a virtual column, false otherwise */
+UNIV_INLINE
+bool
+dict_col_is_virtual(
+	const dict_col_t*	col)
+{
+	return(col->prtype & DATA_VIRTUAL);
+}
+
 #endif /* !UNIV_HOTBACKUP */
 
 #ifdef UNIV_DEBUG
 /*********************************************************************//**
 Assert that a column and a data type match.
-@return	TRUE */
+@return TRUE */
 UNIV_INLINE
 ibool
 dict_col_type_assert_equal(
@@ -119,7 +131,7 @@ dict_col_type_assert_equal(
 #ifndef UNIV_HOTBACKUP
 /***********************************************************************//**
 Returns the minimum size of the column.
-@return	minimum size */
+@return minimum size */
 UNIV_INLINE
 ulint
 dict_col_get_min_size(
@@ -131,7 +143,7 @@ dict_col_get_min_size(
 }
 /***********************************************************************//**
 Returns the maximum size of the column.
-@return	maximum size */
+@return maximum size */
 UNIV_INLINE
 ulint
 dict_col_get_max_size(
@@ -143,7 +155,7 @@ dict_col_get_max_size(
 #endif /* !UNIV_HOTBACKUP */
 /***********************************************************************//**
 Returns the size of a fixed size column, 0 if not a fixed size column.
-@return	fixed size, or 0 */
+@return fixed size, or 0 */
 UNIV_INLINE
 ulint
 dict_col_get_fixed_size(
@@ -157,7 +169,7 @@ dict_col_get_fixed_size(
 /***********************************************************************//**
 Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a column.
 For fixed length types it is the fixed length of the type, otherwise 0.
-@return	SQL null storage size in ROW_FORMAT=REDUNDANT */
+@return SQL null storage size in ROW_FORMAT=REDUNDANT */
 UNIV_INLINE
 ulint
 dict_col_get_sql_null_size(
@@ -170,7 +182,7 @@ dict_col_get_sql_null_size(
 
 /*********************************************************************//**
 Gets the column number.
-@return	col->ind, table column position (starting from 0) */
+@return col->ind, table column position (starting from 0) */
 UNIV_INLINE
 ulint
 dict_col_get_no(
@@ -208,11 +220,36 @@ dict_col_get_clust_pos(
 	return(ULINT_UNDEFINED);
 }
 
+/** Gets the column position in the given index.
+@param[in]	col	table column
+@param[in]	index	index to be searched for column
+@return position of column in the given index. */
+UNIV_INLINE
+ulint
+dict_col_get_index_pos(
+	const dict_col_t*	col,
+	const dict_index_t*	index)
+{
+	ulint	i;
+
+	ut_ad(col);
+
+	for (i = 0; i < index->n_def; i++) {
+		const dict_field_t*	field = &index->fields[i];
+
+		if (!field->prefix_len && field->col == col) {
+			return(i);
+		}
+	}
+
+	return(ULINT_UNDEFINED);
+}
+
 #ifndef UNIV_HOTBACKUP
 #ifdef UNIV_DEBUG
 /********************************************************************//**
 Gets the first index on the table (the clustered index).
-@return	index, NULL if none exists */
+@return index, NULL if none exists */
 UNIV_INLINE
 dict_index_t*
 dict_table_get_first_index(
@@ -227,7 +264,7 @@ dict_table_get_first_index(
 
 /********************************************************************//**
 Gets the last index on the table.
-@return	index, NULL if none exists */
+@return index, NULL if none exists */
 UNIV_INLINE
 dict_index_t*
 dict_table_get_last_index(
@@ -243,7 +280,7 @@ dict_table_get_last_index(
 
 /********************************************************************//**
 Gets the next index on the table.
-@return	index, NULL if none left */
+@return index, NULL if none left */
 UNIV_INLINE
 dict_index_t*
 dict_table_get_next_index(
@@ -260,7 +297,7 @@ dict_table_get_next_index(
 
 /********************************************************************//**
 Check whether the index is the clustered index.
-@return	nonzero for clustered index, zero for other indexes */
+@return nonzero for clustered index, zero for other indexes */
 UNIV_INLINE
 ulint
 dict_index_is_clust(
@@ -272,9 +309,22 @@ dict_index_is_clust(
 
 	return(index->type & DICT_CLUSTERED);
 }
+
+/** Check if index is auto-generated clustered index.
+@param[in]	index	index
+
+@return true if index is auto-generated clustered index. */
+UNIV_INLINE
+bool
+dict_index_is_auto_gen_clust(
+	const dict_index_t*	index)
+{
+	return(index->type == DICT_CLUSTERED);
+}
+
 /********************************************************************//**
 Check whether the index is unique.
-@return	nonzero for unique index, zero for other indexes */
+@return nonzero for unique index, zero for other indexes */
 UNIV_INLINE
 ulint
 dict_index_is_unique(
@@ -287,21 +337,6 @@ dict_index_is_unique(
 	return(index->type & DICT_UNIQUE);
 }
 
-/********************************************************************//**
-Check whether the index is the insert buffer tree.
-@return	nonzero for insert buffer, zero for other indexes */
-UNIV_INLINE
-ulint
-dict_index_is_ibuf(
-/*===============*/
-	const dict_index_t*	index)	/*!< in: index */
-{
-	ut_ad(index);
-	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
-	return(index->type & DICT_IBUF);
-}
-
 /********************************************************************//**
 Check whether the index is an universal index tree.
 @return	nonzero for universal tree, zero for other indexes */
@@ -317,9 +352,53 @@ dict_index_is_univ(
 	return(index->type & DICT_UNIVERSAL);
 }
 
+/********************************************************************//**
+Check whether the index is a Spatial Index.
+@return	nonzero for Spatial Index, zero for other indexes */
+UNIV_INLINE
+ulint
+dict_index_is_spatial(
+/*==================*/
+	const dict_index_t*	index)	/*!< in: index */
+{
+	ut_ad(index);
+	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+
+	return(index->type & DICT_SPATIAL);
+}
+
+/** Check whether the index contains a virtual column
+@param[in]	index	index
+@return	nonzero for the index has virtual column, zero for other indexes */
+UNIV_INLINE
+ulint
+dict_index_has_virtual(
+	const dict_index_t*	index)
+{
+	ut_ad(index);
+	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+
+	return(index->type & DICT_VIRTUAL);
+}
+
+/********************************************************************//**
+Check whether the index is the insert buffer tree.
+@return nonzero for insert buffer, zero for other indexes */
+UNIV_INLINE
+ulint
+dict_index_is_ibuf(
+/*===============*/
+	const dict_index_t*	index)	/*!< in: index */
+{
+	ut_ad(index);
+	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+
+	return(index->type & DICT_IBUF);
+}
+
 /********************************************************************//**
 Check whether the index is a secondary index or the insert buffer tree.
-@return	nonzero for insert buffer, zero for other indexes */
+@return nonzero for insert buffer, zero for other indexes */
 UNIV_INLINE
 ulint
 dict_index_is_sec_or_ibuf(
@@ -337,9 +416,10 @@ dict_index_is_sec_or_ibuf(
 }
 
 /********************************************************************//**
-Gets the number of user-defined columns in a table in the dictionary
-cache.
-@return	number of user-defined (e.g., not ROW_ID) columns of a table */
+Gets the number of user-defined non-virtual columns in a table in the
+dictionary cache.
+@return number of user-defined (e.g., not ROW_ID) non-virtual
+columns of a table */
 UNIV_INLINE
 ulint
 dict_table_get_n_user_cols(
@@ -349,12 +429,29 @@ dict_table_get_n_user_cols(
 	ut_ad(table);
 	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
 
-	return(table->n_cols - DATA_N_SYS_COLS);
+	return(table->n_cols - dict_table_get_n_sys_cols(table));
 }
 
+/** Gets the number of user-defined virtual and non-virtual columns in a table
+in the dictionary cache.
+@param[in]	table	table
+@return number of user-defined (e.g., not ROW_ID) columns of a table */
+UNIV_INLINE
+ulint
+dict_table_get_n_tot_u_cols(
+	const dict_table_t*	table)
+{
+	ut_ad(table);
+	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+
+	return(dict_table_get_n_user_cols(table)
+	       + dict_table_get_n_v_cols(table));
+}
 /********************************************************************//**
-Gets the number of system columns in a table in the dictionary cache.
-@return	number of system (e.g., ROW_ID) columns of a table */
+Gets the number of system columns in a table.
+For intrinsic table on ROW_ID column is added for all other
+tables TRX_ID and ROLL_PTR are all also appeneded.
+@return number of system (e.g., ROW_ID) columns of a table */
 UNIV_INLINE
 ulint
 dict_table_get_n_sys_cols(
@@ -363,15 +460,15 @@ dict_table_get_n_sys_cols(
 {
 	ut_ad(table);
 	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
-	ut_ad(table->cached);
 
-	return(DATA_N_SYS_COLS);
+	return(dict_table_is_intrinsic(table)
+	       ? DATA_ITT_N_SYS_COLS : DATA_N_SYS_COLS);
 }
 
 /********************************************************************//**
-Gets the number of all columns (also system) in a table in the dictionary
-cache.
-@return	number of columns of a table */
+Gets the number of all non-virtual columns (also system) in a table
+in the dictionary cache.
+@return number of non-virtual columns of a table */
 UNIV_INLINE
 ulint
 dict_table_get_n_cols(
@@ -384,9 +481,42 @@ dict_table_get_n_cols(
 	return(table->n_cols);
 }
 
+/** Gets the number of virtual columns in a table in the dictionary cache.
+@param[in]	table	the table to check
+@return number of virtual columns of a table */
+UNIV_INLINE
+ulint
+dict_table_get_n_v_cols(
+	const dict_table_t*	table)
+{
+	ut_ad(table);
+	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+
+	return(table->n_v_cols);
+}
+
+/** Check if a table has indexed virtual columns
+@param[in]	table	the table to check
+@return true is the table has indexed virtual columns */
+UNIV_INLINE
+bool
+dict_table_has_indexed_v_cols(
+	const dict_table_t*	table)
+{
+
+	for (ulint i = 0; i < table->n_v_cols; i++) {
+		const dict_v_col_t*     col = dict_table_get_nth_v_col(table, i);
+		if (col->m_col.ord_part) {
+			return(true);
+		}
+	}
+
+	return(false);
+}
+
 /********************************************************************//**
 Gets the approximately estimated number of rows in the table.
-@return	estimated number of rows */
+@return estimated number of rows */
 UNIV_INLINE
 ib_uint64_t
 dict_table_get_n_rows(
@@ -437,7 +567,7 @@ dict_table_n_rows_dec(
 #ifdef UNIV_DEBUG
 /********************************************************************//**
 Gets the nth column of a table.
-@return	pointer to column object */
+@return pointer to column object */
 UNIV_INLINE
 dict_col_t*
 dict_table_get_nth_col(
@@ -452,9 +582,26 @@ dict_table_get_nth_col(
 	return((dict_col_t*) (table->cols) + pos);
 }
 
+/** Gets the nth virtual column of a table.
+@param[in]	table	table
+@param[in]	pos	position of virtual column
+@return pointer to virtual column object */
+UNIV_INLINE
+dict_v_col_t*
+dict_table_get_nth_v_col(
+	const dict_table_t*	table,
+	ulint			pos)
+{
+	ut_ad(table);
+	ut_ad(pos < table->n_v_def);
+	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+
+	return(static_cast<dict_v_col_t*>(table->v_cols) + pos);
+}
+
 /********************************************************************//**
 Gets the given system column of a table.
-@return	pointer to column object */
+@return pointer to column object */
 UNIV_INLINE
 dict_col_t*
 dict_table_get_sys_col(
@@ -465,11 +612,12 @@ dict_table_get_sys_col(
 	dict_col_t*	col;
 
 	ut_ad(table);
-	ut_ad(sys < DATA_N_SYS_COLS);
+	ut_ad(sys < dict_table_get_n_sys_cols(table));
 	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
 
 	col = dict_table_get_nth_col(table, table->n_cols
-				     - DATA_N_SYS_COLS + sys);
+				     - dict_table_get_n_sys_cols(table)
+				     + sys);
 	ut_ad(col->mtype == DATA_SYS);
 	ut_ad(col->prtype == (sys | DATA_NOT_NULL));
 
@@ -479,7 +627,7 @@ dict_table_get_sys_col(
 
 /********************************************************************//**
 Gets the given system column number of a table.
-@return	column number */
+@return column number */
 UNIV_INLINE
 ulint
 dict_table_get_sys_col_no(
@@ -488,15 +636,15 @@ dict_table_get_sys_col_no(
 	ulint			sys)	/*!< in: DATA_ROW_ID, ... */
 {
 	ut_ad(table);
-	ut_ad(sys < DATA_N_SYS_COLS);
+	ut_ad(sys < dict_table_get_n_sys_cols(table));
 	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
 
-	return(table->n_cols - DATA_N_SYS_COLS + sys);
+	return(table->n_cols - dict_table_get_n_sys_cols(table) + sys);
 }
 
 /********************************************************************//**
 Check whether the table uses the compact page format.
-@return	TRUE if table uses the compact page format */
+@return TRUE if table uses the compact page format */
 UNIV_INLINE
 ibool
 dict_table_is_comp(
@@ -526,77 +674,44 @@ dict_table_has_fts_index(
 	return(DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS));
 }
 
-/********************************************************************//**
-Validate the table flags.
-@return	true if valid. */
+/** Validate the table flags.
+@param[in]	flags	Table flags
+@return true if valid. */
 UNIV_INLINE
 bool
 dict_tf_is_valid(
-/*=============*/
-	ulint	flags)		/*!< in: table flags */
+	ulint	flags)
 {
-	ulint	compact = DICT_TF_GET_COMPACT(flags);
+	bool	compact = DICT_TF_GET_COMPACT(flags);
 	ulint	zip_ssize = DICT_TF_GET_ZIP_SSIZE(flags);
-	ulint	atomic_blobs = DICT_TF_HAS_ATOMIC_BLOBS(flags);
+	bool	atomic_blobs = DICT_TF_HAS_ATOMIC_BLOBS(flags);
+	bool	data_dir = DICT_TF_HAS_DATA_DIR(flags);
+	bool	shared_space = DICT_TF_HAS_SHARED_SPACE(flags);
 	ulint	unused = DICT_TF_GET_UNUSED(flags);
-	ulint	page_compression = DICT_TF_GET_PAGE_COMPRESSION(flags);
+	bool	page_compression = DICT_TF_GET_PAGE_COMPRESSION(flags);
 	ulint	page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL(flags);
-	ulint	data_dir = DICT_TF_HAS_DATA_DIR(flags);
 	ulint	atomic_writes = DICT_TF_GET_ATOMIC_WRITES(flags);
+	bool	flags_corrupt = false;
 
 	/* Make sure there are no bits that we do not know about. */
 	if (unused != 0) {
-		fprintf(stderr,
-			"InnoDB: Error: table unused flags are %ld"
-			" in the data dictionary and are corrupted\n"
-			"InnoDB: Error: data dictionary flags are\n"
-			"InnoDB: compact %ld atomic_blobs %ld\n"
-			"InnoDB: unused %ld data_dir %ld zip_ssize %ld\n"
-			"InnoDB: page_compression %ld page_compression_level %ld\n"
-			"InnoDB: atomic_writes %ld\n",
-			unused,
-			compact, atomic_blobs, unused, data_dir, zip_ssize,
-			page_compression, page_compression_level, atomic_writes
-		);
+		flags_corrupt = true;
+	}
 
-		return(false);
-
-	} else if (atomic_blobs) {
-		/* Barracuda row formats COMPRESSED and DYNAMIC build on
-		the page structure introduced for the COMPACT row format
-		by allowing keys in secondary indexes to be made from
-		data stored off-page in the clustered index. */
+	if (atomic_blobs) {
+		/* Barracuda row formats COMPRESSED and DYNAMIC both use
+		atomic_blobs, which build on the page structure introduced
+		for the COMPACT row format by allowing keys in secondary
+		indexes to be made from data stored off-page in the
+		clustered index. */
 
 		if (!compact) {
-			fprintf(stderr,
-				"InnoDB: Error: table compact flags are %ld"
-				" in the data dictionary and are corrupted\n"
-				"InnoDB: Error: data dictionary flags are\n"
-				"InnoDB: compact %ld atomic_blobs %ld\n"
-				"InnoDB: unused %ld data_dir %ld zip_ssize %ld\n"
-				"InnoDB: page_compression %ld page_compression_level %ld\n"
-				"InnoDB: atomic_writes %ld\n",
-				compact, compact, atomic_blobs, unused, data_dir, zip_ssize,
-				page_compression, page_compression_level, atomic_writes
-			);
-			return(false);
+			flags_corrupt = true;
 		}
 
 	} else if (zip_ssize) {
-
 		/* Antelope does not support COMPRESSED row format. */
-		fprintf(stderr,
-			"InnoDB: Error: table flags are %ld"
-			" in the data dictionary and are corrupted\n"
-			"InnoDB: Error: data dictionary flags are\n"
-			"InnoDB: compact %ld atomic_blobs %ld\n"
-			"InnoDB: unused %ld data_dir %ld zip_ssize %ld\n"
-			"InnoDB: page_compression %ld page_compression_level %ld\n"
-			"InnoDB: atomic_writes %ld\n",
-			flags, compact, atomic_blobs, unused, data_dir, zip_ssize,
-			page_compression, page_compression_level, atomic_writes
-		);
-		return(false);
+		flags_corrupt = true;
 	}
 
 	if (zip_ssize) {
@@ -607,72 +722,84 @@ dict_tf_is_valid(
 		if (!compact
 		    || !atomic_blobs
 		    || zip_ssize > PAGE_ZIP_SSIZE_MAX) {
-
-			fprintf(stderr,
-				"InnoDB: Error: table compact flags are %ld in the data dictionary and are corrupted\n"
-				"InnoDB: Error: data dictionary flags are\n"
-				"InnoDB: compact %ld atomic_blobs %ld\n"
-				"InnoDB: unused %ld data_dir %ld zip_ssize %ld\n"
-				"InnoDB: page_compression %ld page_compression_level %ld\n"
-				"InnoDB: atomic_writes %ld\n",
-				flags,
-				compact, atomic_blobs, unused, data_dir, zip_ssize,
-				page_compression, page_compression_level, atomic_writes
-
-			);
-			return(false);
+			flags_corrupt = true;
 		}
 	}
 
-        if (page_compression || page_compression_level) {
+	if (page_compression || page_compression_level) {
 		/* Page compression format must have compact and
 		atomic_blobs and page_compression_level requires
 		page_compression */
 		if (!compact
 			|| !page_compression
 			|| !atomic_blobs) {
-
-			fprintf(stderr,
-				"InnoDB: Error: table flags are %ld in the data dictionary and are corrupted\n"
-				"InnoDB: Error: data dictionary flags are\n"
-				"InnoDB: compact %ld atomic_blobs %ld\n"
-				"InnoDB: unused %ld data_dir %ld zip_ssize %ld\n"
-				"InnoDB: page_compression %ld page_compression_level %ld\n"
-				"InnoDB: atomic_writes %ld\n",
-				flags, compact, atomic_blobs, unused, data_dir, zip_ssize,
-				page_compression, page_compression_level, atomic_writes
-			);
-			return(false);
+			flags_corrupt = true;
 		}
 	}
 
 	if (atomic_writes) {
 
 		if(atomic_writes > ATOMIC_WRITES_OFF) {
-
-			fprintf(stderr,
-				"InnoDB: Error: table flags are %ld in the data dictionary and are corrupted\n"
-				"InnoDB: Error: data dictionary flags are\n"
-				"InnoDB: compact %ld atomic_blobs %ld\n"
-				"InnoDB: unused %ld data_dir %ld zip_ssize %ld\n"
-				"InnoDB: page_compression %ld page_compression_level %ld\n"
-				"InnoDB: atomic_writes %ld\n",
-				flags, compact, atomic_blobs, unused, data_dir, zip_ssize,
-				page_compression, page_compression_level, atomic_writes
-			);
-			return(false);
+			flags_corrupt = true;
 		}
 	}
 
-	/* CREATE TABLE ... DATA DIRECTORY is supported for any row format,
-	so the DATA_DIR flag is compatible with all other table flags. */
+	/* HAS_DATA_DIR and SHARED_SPACE are mutually exclusive. */
+	if (data_dir && shared_space) {
+		flags_corrupt = true;
+	}
+
+	if (flags_corrupt) {
+		ib::error()
+			<< "InnoDB: Error: table unused flags are:" << flags
+			<< " in the data dictionary and are corrupted:"
+			<< " compact:" << compact
+			<< " atomic_blobs:" << atomic_blobs
+			<< " unused:" << unused
+			<< " data_dir:" << data_dir
+			<< " zip_ssize:" << zip_ssize
+			<< " page_compression:" << page_compression
+			<< " page_compression_level:" << page_compression_level
+			<< " atomic_writes:" << atomic_writes
+			<< " shared_space:" << shared_space;
+		return (false);
+	} else {
+		return(true);
+	}
+}
+
+/** Validate both table flags and table flags2 and make sure they
+are compatible.
+@param[in]	flags	Table flags
+@param[in]	flags2	Table flags2
+@return true if valid. */
+UNIV_INLINE
+bool
+dict_tf2_is_valid(
+	ulint	flags,
+	ulint	flags2)
+{
+	if (!dict_tf_is_valid(flags)) {
+		return(false);
+	}
+
+	if ((flags2 & DICT_TF2_UNUSED_BIT_MASK) != 0) {
+		return(false);
+	}
+
+	bool	file_per_table = ((flags2 & DICT_TF2_USE_FILE_PER_TABLE) != 0);
+	bool	shared_space = DICT_TF_HAS_SHARED_SPACE(flags);
+
+	if (file_per_table && shared_space) {
+		return(false);
+	}
 
 	return(true);
 }
 
 /********************************************************************//**
 Validate a SYS_TABLES TYPE field and return it.
-@return	Same as input after validating it as a SYS_TABLES TYPE field.
+@return Same as input after validating it as a SYS_TABLES TYPE field.
 If there is an error, return ULINT_UNDEFINED. */
 UNIV_INLINE
 ulint
@@ -686,7 +813,7 @@ dict_sys_tables_type_validate(
 	ulint	zip_ssize = DICT_TF_GET_ZIP_SSIZE(type);
 	ulint	atomic_blobs = DICT_TF_HAS_ATOMIC_BLOBS(type);
 	ulint	unused = DICT_TF_GET_UNUSED(type);
-	ulint	page_compression = DICT_TF_GET_PAGE_COMPRESSION(type);
+	bool	page_compression = DICT_TF_GET_PAGE_COMPRESSION(type);
 	ulint	page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL(type);
 	ulint	atomic_writes = DICT_TF_GET_ATOMIC_WRITES(type);
 
@@ -701,16 +828,17 @@ dict_sys_tables_type_validate(
 
 	if (redundant) {
 		if (zip_ssize || atomic_blobs) {
-			fprintf(stderr, "InnoDB: Error: SYS_TABLES::TYPE=Redundant, zip_ssize %lu atomic_blobs %lu\n",
-				zip_ssize, atomic_blobs);
+			ib::error()
+				<< "SYS_TABLES::TYPE=Redundant, zip_ssize:" << zip_ssize
+				<< " atomic_blobs:" << atomic_blobs;
 			return(ULINT_UNDEFINED);
 		}
 	}
 
 	/* Make sure there are no bits that we do not know about. */
 	if (unused) {
-		fprintf(stderr, "InnoDB: Error: SYS_TABLES::TYPE=%lu, unused %lu\n",
-			type, unused);
+		ib::error()
+			<< "SYS_TABLES::TYPE=" << type << " unused:" << unused;
 		return(ULINT_UNDEFINED);
 	}
 
@@ -725,8 +853,9 @@ dict_sys_tables_type_validate(
 
 	} else if (zip_ssize) {
 		/* Antelope does not support COMPRESSED format. */
-		fprintf(stderr, "InnoDB: Error: SYS_TABLES::TYPE=%lu, zip_ssize %lu\n",
-			type, zip_ssize);
+		ib::error()
+			<< "SYS_TABLES::TYPE=" << type << "zip_ssize:" << zip_ssize;
+
 		return(ULINT_UNDEFINED);
 	}
 
@@ -736,15 +865,17 @@ dict_sys_tables_type_validate(
 		should be in N_COLS, but we already know about the
 		low_order_bit and DICT_N_COLS_COMPACT flags. */
 		if (!atomic_blobs) {
-			fprintf(stderr, "InnoDB: Error: SYS_TABLES::TYPE=%lu, zip_ssize %lu atomic_blobs %lu\n",
-				type, zip_ssize, atomic_blobs);
+			ib::error() << "SYS_TABLES::TYPE=" << type
+				<< " zip_ssize:" << zip_ssize
+				<< " atomic_blobs:" << atomic_blobs;
 			return(ULINT_UNDEFINED);
 		}
 
 		/* Validate that the number is within allowed range. */
 		if (zip_ssize > PAGE_ZIP_SSIZE_MAX) {
-			fprintf(stderr, "InnoDB: Error: SYS_TABLES::TYPE=%lu, zip_ssize %lu max %d\n",
-				type, zip_ssize, PAGE_ZIP_SSIZE_MAX);
+			ib::error() << "SYS_TABLES::TYPE=" << type
+				<< " zip_ssize:" << zip_ssize
+				<< " max:" << PAGE_ZIP_SSIZE_MAX;
 			return(ULINT_UNDEFINED);
 		}
 	}
@@ -752,26 +883,28 @@ dict_sys_tables_type_validate(
 	/* There is nothing to validate for the data_dir field.
 	CREATE TABLE ... DATA DIRECTORY is supported for any row
 	format, so the DATA_DIR flag is compatible with any other
-	table flags. However, it is not used with TEMPORARY tables.*/
+	table flags. However, it is not used with TEMPORARY tables. */
 
-        if (page_compression || page_compression_level) {
+	if (page_compression || page_compression_level) {
 		/* page compressed row format must have low_order_bit and
 		atomic_blobs bits set and the DICT_N_COLS_COMPACT flag
 		should be in N_COLS, but we already know about the
 		low_order_bit and DICT_N_COLS_COMPACT flags. */
 
-                if (!atomic_blobs || !page_compression) {
-			fprintf(stderr, "InnoDB: Error: SYS_TABLES::TYPE=%lu, page_compression %lu page_compression_level %lu\n"
-				"InnoDB: Error: atomic_blobs %lu\n",
-				type, page_compression, page_compression_level, atomic_blobs);
+		if (!atomic_blobs || !page_compression) {
+			ib::error() << "SYS_TABLES::TYPE=" << type
+				<< " page_compression:" << page_compression
+				<< " page_compression_level:" << page_compression_level
+				<< " atomic_blobs:" << atomic_blobs;
+
 			return(ULINT_UNDEFINED);
 		}
 	}
 
 	/* Validate that the atomic writes number is within allowed range. */
 	if (atomic_writes > ATOMIC_WRITES_OFF) {
-		fprintf(stderr, "InnoDB: Error: SYS_TABLES::TYPE=%lu, atomic_writes %lu\n",
-				type, atomic_writes);
+		ib::error() << "SYS_TABLES::TYPE=" << type
+			    << " atomic_writes:" << atomic_writes;
 			return(ULINT_UNDEFINED);
 	}
 
@@ -783,7 +916,7 @@ dict_sys_tables_type_validate(
 Determine the file format from dict_table_t::flags
 The low order bit will be zero for REDUNDANT and 1 for COMPACT. For any
 other row_format, file_format is > 0 and DICT_TF_COMPACT will also be set.
-@return	file format version */
+@return file format version */
 UNIV_INLINE
 rec_format_t
 dict_tf_get_rec_format(
@@ -809,7 +942,7 @@ dict_tf_get_rec_format(
 
 /********************************************************************//**
 Determine the file format from a dict_table_t::flags.
-@return	file format version */
+@return file format version */
 UNIV_INLINE
 ulint
 dict_tf_get_format(
@@ -825,7 +958,7 @@ dict_tf_get_format(
 
 /********************************************************************//**
 Determine the file format of a table.
-@return	file format version */
+@return file format version */
 UNIV_INLINE
 ulint
 dict_table_get_format(
@@ -837,26 +970,29 @@ dict_table_get_format(
 	return(dict_tf_get_format(table->flags));
 }
 
-/********************************************************************//**
-Set the file format and zip size in a dict_table_t::flags.  If zip size
-is not needed, it should be 0. */
+/** Set the various values in a dict_table_t::flags pointer.
+@param[in,out]	flags,		Pointer to a 4 byte Table Flags
+@param[in]	format		File Format
+@param[in]	zip_ssize	Zip Shift Size
+@param[in]	use_data_dir	Table uses DATA DIRECTORY
+@param[in]	atomic_writes   Does table use atomic writes
+@param[in]	shared_space	Table uses a General Shared Tablespace
+@param[in]	page_compressed Table uses page compression
+@param[in]	page_compression_level Page compression level
+@param[in]	atomic_writes	Table uses atomic writes */
 UNIV_INLINE
 void
 dict_tf_set(
 /*========*/
-	ulint*		flags,		/*!< in/out: table flags */
-	rec_format_t	format,		/*!< in: file format */
-	ulint		zip_ssize,	/*!< in: zip shift size */
-	bool		use_data_dir,	/*!< in: table uses DATA DIRECTORY
-					*/
-	bool		page_compressed,/*!< in: table uses page compressed
-					pages */
-	ulint		page_compression_level, /*!< in: table page compression
-						 level */
-	ulint		atomic_writes)  /*!< in: table atomic writes setup */
+	ulint*		flags,
+	rec_format_t	format,
+	ulint		zip_ssize,
+	bool		use_data_dir,
+	bool		shared_space,
+	bool		page_compressed,
+	ulint		page_compression_level,
+	ulint		atomic_writes)
 {
-	atomic_writes_t awrites = (atomic_writes_t)atomic_writes;
-
 	switch (format) {
 	case REC_FORMAT_REDUNDANT:
 		*flags = 0;
@@ -878,9 +1014,17 @@ dict_tf_set(
 		break;
 	}
 
+	if (use_data_dir) {
+		*flags |= (1 << DICT_TF_POS_DATA_DIR);
+	}
+
+	if (shared_space) {
+		*flags |= (1 << DICT_TF_POS_SHARED_SPACE);
+	}
+
 	if (page_compressed) {
 		*flags |= (1 << DICT_TF_POS_ATOMIC_BLOBS)
-                       | (1 << DICT_TF_POS_PAGE_COMPRESSION)
+		       | (1 << DICT_TF_POS_PAGE_COMPRESSION)
 		       | (page_compression_level << DICT_TF_POS_PAGE_COMPRESSION_LEVEL);
 
 		ut_ad(zip_ssize == 0);
@@ -888,69 +1032,71 @@ dict_tf_set(
 		ut_ad(dict_tf_get_page_compression_level(*flags) == page_compression_level);
 	}
 
-	*flags |= (atomic_writes << DICT_TF_POS_ATOMIC_WRITES);
-	ut_a(dict_tf_get_atomic_writes(*flags) == awrites);
-
-	if (use_data_dir) {
-		*flags |= (1 << DICT_TF_POS_DATA_DIR);
+	if (atomic_writes) {
+		*flags |= (atomic_writes << DICT_TF_POS_ATOMIC_WRITES);
+		ut_a(dict_tf_get_atomic_writes(*flags) == atomic_writes);
 	}
 }
 
-/********************************************************************//**
-Convert a 32 bit integer table flags to the 32 bit integer that is
-written into the tablespace header at the offset FSP_SPACE_FLAGS and is
-also stored in the fil_space_t::flags field.  The following chart shows
-the translation of the low order bit.  Other bits are the same.
-========================= Low order bit ==========================
-                    | REDUNDANT | COMPACT | COMPRESSED | DYNAMIC
-dict_table_t::flags |     0     |    1    |     1      |    1
-fil_space_t::flags  |     0     |    0    |     1      |    1
-==================================================================
-@return	tablespace flags (fil_space_t::flags) */
+/** Initialize a dict_table_t::flags pointer.
+@param[in]	compact,	Table uses Compact or greater
+@param[in]	zip_ssize	Zip Shift Size (log 2 minus 9)
+@param[in]	atomic_blobs	Table uses Compressed or Dynamic
+@param[in]	data_dir	Table uses DATA DIRECTORY
+@param[in]	shared_space	Table uses a General Shared Tablespace
+@param[in]	page_compression Table uses page compression
+@param[in]	page_compression_level used compression level
+@param[in]	atomic_writes	Table atomic writes option */
 UNIV_INLINE
 ulint
-dict_tf_to_fsp_flags(
-/*=================*/
-	ulint	table_flags)	/*!< in: dict_table_t::flags */
+dict_tf_init(
+	bool		compact,
+	ulint		zip_ssize,
+	bool		atomic_blobs,
+	bool		data_dir,
+	bool		shared_space,
+	bool		page_compressed,
+	ulint		page_compression_level,
+	ulint		atomic_writes)
 {
-	ulint fsp_flags;
-	ulint page_compression = DICT_TF_GET_PAGE_COMPRESSION(table_flags);
-	ulint page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL(table_flags);
-	ulint atomic_writes = DICT_TF_GET_ATOMIC_WRITES(table_flags);
+	ulint	flags = 0;
 
-	DBUG_EXECUTE_IF("dict_tf_to_fsp_flags_failure",
-			return(ULINT_UNDEFINED););
+	if (compact) {
+		flags |= DICT_TF_COMPACT;
+	}
 
-	/* Adjust bit zero. */
-	fsp_flags = DICT_TF_HAS_ATOMIC_BLOBS(table_flags) ? 1 : 0;
+	if (zip_ssize) {
+		flags |= (zip_ssize << DICT_TF_POS_ZIP_SSIZE);
+	}
 
-	/* ZIP_SSIZE and ATOMIC_BLOBS are at the same position. */
-	fsp_flags |= table_flags & DICT_TF_MASK_ZIP_SSIZE;
-	fsp_flags |= table_flags & DICT_TF_MASK_ATOMIC_BLOBS;
+	if (atomic_blobs) {
+		flags |= (1 << DICT_TF_POS_ATOMIC_BLOBS);
+	}
 
-	/* In addition, tablespace flags also contain the page size. */
-	fsp_flags |= fsp_flags_set_page_size(fsp_flags, UNIV_PAGE_SIZE);
+	if (data_dir) {
+		flags |= (1 << DICT_TF_POS_DATA_DIR);
+	}
 
-	/* The DATA_DIR flag is in a different position in fsp_flag */
-	fsp_flags |= DICT_TF_HAS_DATA_DIR(table_flags)
-		     ? FSP_FLAGS_MASK_DATA_DIR : 0;
+	if (shared_space) {
+		flags |= (1 << DICT_TF_POS_SHARED_SPACE);
+	}
 
-	/* In addition, tablespace flags also contain if the page
-	compression is used for this table. */
-	fsp_flags |= FSP_FLAGS_SET_PAGE_COMPRESSION(fsp_flags, page_compression);
+	if (page_compressed) {
+		flags |= (1 << DICT_TF_POS_ATOMIC_BLOBS)
+		      | (1 << DICT_TF_POS_PAGE_COMPRESSION)
+		      | (page_compression_level << DICT_TF_POS_PAGE_COMPRESSION_LEVEL);
 
-	/* In addition, tablespace flags also contain page compression level
-	if page compression is used for this table. */
-	fsp_flags |= FSP_FLAGS_SET_PAGE_COMPRESSION_LEVEL(fsp_flags, page_compression_level);
+		ut_ad(zip_ssize == 0);
+		ut_ad(dict_tf_get_page_compression(flags) == TRUE);
+		ut_ad(dict_tf_get_page_compression_level(flags) == page_compression_level);
+	}
 
-	/* In addition, tablespace flags also contain flag if atomic writes
-	is used for this table */
-	fsp_flags |= FSP_FLAGS_SET_ATOMIC_WRITES(fsp_flags, atomic_writes);
+	if (atomic_writes) {
+		flags |= (atomic_writes << DICT_TF_POS_ATOMIC_WRITES);
+		ut_a(dict_tf_get_atomic_writes(flags) == atomic_writes);
+	}
 
-	ut_a(fsp_flags_is_valid(fsp_flags));
-	ut_a(dict_tf_verify_flags(table_flags, fsp_flags));
-
-	return(fsp_flags);
+	return(flags);
 }
 
 /********************************************************************//**
@@ -962,7 +1108,7 @@ Other bits are the same.
 SYS_TABLES.TYPE     |     1     |    1    |     1
 dict_table_t::flags |     0     |    1    |     1
 ==================================================================
-@return	ulint containing SYS_TABLES.TYPE */
+@return ulint containing SYS_TABLES.TYPE */
 UNIV_INLINE
 ulint
 dict_sys_tables_type_to_tf(
@@ -984,9 +1130,9 @@ dict_sys_tables_type_to_tf(
 			 | DICT_TF_MASK_PAGE_COMPRESSION
 			 | DICT_TF_MASK_PAGE_COMPRESSION_LEVEL
 			 | DICT_TF_MASK_ATOMIC_WRITES
+			 | DICT_TF_MASK_SHARED_SPACE);
 
-	);
-
+	ut_ad(!DICT_TF_GET_ZIP_SSIZE(flags) || DICT_TF_HAS_ATOMIC_BLOBS(flags));
 	return(flags);
 }
 
@@ -999,7 +1145,7 @@ the low order bit.  Other bits are the same.
 dict_table_t::flags |     0     |    1    |     1
 SYS_TABLES.TYPE     |     1     |    1    |     1
 ==================================================================
-@return	ulint containing SYS_TABLES.TYPE */
+@return ulint containing SYS_TABLES.TYPE */
 UNIV_INLINE
 ulint
 dict_tf_to_sys_tables_type(
@@ -1020,43 +1166,46 @@ dict_tf_to_sys_tables_type(
 			 | DICT_TF_MASK_DATA_DIR
 			 | DICT_TF_MASK_PAGE_COMPRESSION
 			 | DICT_TF_MASK_PAGE_COMPRESSION_LEVEL
-			 | DICT_TF_MASK_ATOMIC_WRITES);
+			 | DICT_TF_MASK_ATOMIC_WRITES
+			 | DICT_TF_MASK_SHARED_SPACE);
 
 	return(type);
 }
 
-/********************************************************************//**
-Extract the compressed page size from dict_table_t::flags.
-These flags are in memory, so assert that they are valid.
-@return	compressed page size, or 0 if not compressed */
+/** Extract the page size info from table flags.
+@param[in]	flags	flags
+@return a structure containing the compressed and uncompressed
+page sizes and a boolean indicating if the page is compressed. */
 UNIV_INLINE
-ulint
-dict_tf_get_zip_size(
-/*=================*/
-	ulint	flags)	/*!< in: flags */
+const page_size_t
+dict_tf_get_page_size(
+	ulint	flags)
 {
-	ulint zip_ssize = DICT_TF_GET_ZIP_SSIZE(flags);
-	ulint zip_size = (zip_ssize
-			  ? (UNIV_ZIP_SIZE_MIN >> 1) << zip_ssize
-			  : 0);
+	const ulint	zip_ssize = DICT_TF_GET_ZIP_SSIZE(flags);
+
+	if (zip_ssize == 0) {
+		return(univ_page_size);
+	}
+
+	const ulint	zip_size = (UNIV_ZIP_SIZE_MIN >> 1) << zip_ssize;
 
 	ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX);
 
-	return(zip_size);
+	return(page_size_t(zip_size, univ_page_size.logical(), true));
 }
 
-/********************************************************************//**
-Check whether the table uses the compressed compact page format.
-@return	compressed page size, or 0 if not compressed */
+/** Get the table page size.
+@param[in]	table	table
+@return a structure containing the compressed and uncompressed
+page sizes and a boolean indicating if the page is compressed */
 UNIV_INLINE
-ulint
-dict_table_zip_size(
-/*================*/
-	const dict_table_t*	table)	/*!< in: table */
+const page_size_t
+dict_table_page_size(
+	const dict_table_t*	table)
 {
-	ut_ad(table);
+	ut_ad(table != NULL);
 
-	return(dict_tf_get_zip_size(table->flags));
+	return(dict_tf_get_page_size(table->flags));
 }
 
 #ifndef UNIV_HOTBACKUP
@@ -1073,7 +1222,7 @@ dict_table_x_lock_indexes(
 	dict_index_t*   index;
 
 	ut_a(table);
-	ut_ad(mutex_own(&(dict_sys->mutex)));
+	ut_ad(mutex_own(&dict_sys->mutex));
 
 	/* Loop through each index of the table and lock them */
 	for (index = dict_table_get_first_index(table);
@@ -1094,7 +1243,7 @@ dict_table_x_unlock_indexes(
 	dict_index_t*   index;
 
 	ut_a(table);
-	ut_ad(mutex_own(&(dict_sys->mutex)));
+	ut_ad(mutex_own(&dict_sys->mutex));
 
 	for (index = dict_table_get_first_index(table);
 	     index != NULL;
@@ -1107,7 +1256,7 @@ dict_table_x_unlock_indexes(
 /********************************************************************//**
 Gets the number of fields in the internal representation of an index,
 including fields added by the dictionary system.
-@return	number of fields */
+@return number of fields */
 UNIV_INLINE
 ulint
 dict_index_get_n_fields(
@@ -1127,7 +1276,7 @@ Gets the number of fields in the internal representation of an index
 that uniquely determine the position of an index entry in the index, if
 we do not take multiversioning into account: in the B-tree use the value
 returned by dict_index_get_n_unique_in_tree.
-@return	number of fields */
+@return number of fields */
 UNIV_INLINE
 ulint
 dict_index_get_n_unique(
@@ -1146,7 +1295,7 @@ dict_index_get_n_unique(
 Gets the number of fields in the internal representation of an index
 which uniquely determine the position of an index entry in the index, if
 we also take multiversioning into account.
-@return	number of fields */
+@return number of fields */
 UNIV_INLINE
 ulint
 dict_index_get_n_unique_in_tree(
@@ -1166,12 +1315,38 @@ dict_index_get_n_unique_in_tree(
 	return(dict_index_get_n_fields(index));
 }
 
+/**
+Gets the number of fields on nonleaf page level in the internal representation
+of an index which uniquely determine the position of an index entry in the
+index, if we also take multiversioning into account. Note, it doesn't
+include page no field.
+@param[in]	index	index
+@return number of fields */
+UNIV_INLINE
+ulint
+dict_index_get_n_unique_in_tree_nonleaf(
+	const dict_index_t*	index)
+{
+	ut_ad(index != NULL);
+	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+	ut_ad(index->cached);
+
+	if (dict_index_is_spatial(index)) {
+		/* For spatial index, on non-leaf page, we have only
+		2 fields(mbr+page_no). So, except page no field,
+		there's one field there. */
+		return(DICT_INDEX_SPATIAL_NODEPTR_SIZE);
+	} else {
+		return(dict_index_get_n_unique_in_tree(index));
+	}
+}
+
 /********************************************************************//**
 Gets the number of user-defined ordering fields in the index. In the internal
 representation of clustered indexes we add the row id to the ordering fields
 to make a clustered index unique, but this function returns the number of
 fields the user defined in the index as ordering fields.
-@return	number of fields */
+@return number of fields */
 UNIV_INLINE
 ulint
 dict_index_get_n_ordering_defined_by_user(
@@ -1185,7 +1360,7 @@ dict_index_get_n_ordering_defined_by_user(
 #ifdef UNIV_DEBUG
 /********************************************************************//**
 Gets the nth field of an index.
-@return	pointer to field object */
+@return pointer to field object */
 UNIV_INLINE
 dict_field_t*
 dict_index_get_nth_field(
@@ -1203,7 +1378,7 @@ dict_index_get_nth_field(
 
 /********************************************************************//**
 Returns the position of a system column in an index.
-@return	position, ULINT_UNDEFINED if not contained */
+@return position, ULINT_UNDEFINED if not contained */
 UNIV_INLINE
 ulint
 dict_index_get_sys_col_pos(
@@ -1213,7 +1388,7 @@ dict_index_get_sys_col_pos(
 {
 	ut_ad(index);
 	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-	ut_ad(!dict_index_is_univ(index));
+	ut_ad(!dict_index_is_ibuf(index));
 
 	if (dict_index_is_clust(index)) {
 
@@ -1223,13 +1398,12 @@ dict_index_get_sys_col_pos(
 	}
 
 	return(dict_index_get_nth_col_pos(
-			index, dict_table_get_sys_col_no(index->table, type),
-			NULL));
+			index, dict_table_get_sys_col_no(index->table, type), NULL));
 }
 
 /*********************************************************************//**
 Gets the field column.
-@return	field->col, pointer to the table column */
+@return field->col, pointer to the table column */
 UNIV_INLINE
 const dict_col_t*
 dict_field_get_col(
@@ -1243,7 +1417,7 @@ dict_field_get_col(
 
 /********************************************************************//**
 Gets pointer to the nth column in an index.
-@return	column */
+@return column */
 UNIV_INLINE
 const dict_col_t*
 dict_index_get_nth_col(
@@ -1256,7 +1430,7 @@ dict_index_get_nth_col(
 
 /********************************************************************//**
 Gets the column number the nth field in an index.
-@return	column number */
+@return column number */
 UNIV_INLINE
 ulint
 dict_index_get_nth_col_no(
@@ -1279,14 +1453,14 @@ dict_index_get_nth_col_pos(
 	ulint			n,	/*!< in: column number */
 	ulint*			prefix_col_pos) /*!< out: col num if prefix */
 {
-	return(dict_index_get_nth_col_or_prefix_pos(index, n, FALSE,
+	return(dict_index_get_nth_col_or_prefix_pos(index, n, false, false,
 						    prefix_col_pos));
 }
 
 #ifndef UNIV_HOTBACKUP
 /********************************************************************//**
 Returns the minimum data size of an index record.
-@return	minimum data size in bytes */
+@return minimum data size in bytes */
 UNIV_INLINE
 ulint
 dict_index_get_min_size(
@@ -1306,7 +1480,7 @@ dict_index_get_min_size(
 
 /*********************************************************************//**
 Gets the space id of the root of the index tree.
-@return	space id */
+@return space id */
 UNIV_INLINE
 ulint
 dict_index_get_space(
@@ -1336,7 +1510,7 @@ dict_index_set_space(
 
 /*********************************************************************//**
 Gets the page number of the root of the index tree.
-@return	page number */
+@return page number */
 UNIV_INLINE
 ulint
 dict_index_get_page(
@@ -1351,7 +1525,7 @@ dict_index_get_page(
 
 /*********************************************************************//**
 Gets the read-write lock of the index tree.
-@return	read-write lock */
+@return read-write lock */
 UNIV_INLINE
 rw_lock_t*
 dict_index_get_lock(
@@ -1368,7 +1542,7 @@ dict_index_get_lock(
 Returns free space reserved for future updates of records. This is
 relevant only in the case of many consecutive inserts, as updates
 which make the records bigger might fragment the index.
-@return	number of free bytes on page, reserved for updates */
+@return number of free bytes on page, reserved for updates */
 UNIV_INLINE
 ulint
 dict_index_get_space_reserve(void)
@@ -1420,9 +1594,8 @@ dict_index_set_online_status(
 	enum online_index_status	status)	/*!< in: status */
 {
 	ut_ad(!(index->type & DICT_FTS));
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_X));
+
 #ifdef UNIV_DEBUG
 	switch (dict_index_get_online_status(index)) {
 	case ONLINE_INDEX_COMPLETE:
@@ -1482,7 +1655,8 @@ ulint
 dict_table_is_fts_column(
 /*=====================*/
 	ib_vector_t*	indexes,/*!< in: vector containing only FTS indexes */
-	ulint		col_no)	/*!< in: col number to search for */
+	ulint		col_no,	/*!< in: col number to search for */
+	bool		is_virtual) /*!< in: whether it is a virtual column */
 
 {
 	ulint		i;
@@ -1492,7 +1666,8 @@ dict_table_is_fts_column(
 
 		index = (dict_index_t*) ib_vector_getp(indexes, i);
 
-		if (dict_index_contains_col_or_prefix(index, col_no)) {
+		if (dict_index_contains_col_or_prefix(
+			index, col_no, is_virtual)) {
 
 			return(i);
 		}
@@ -1526,9 +1701,57 @@ dict_max_field_len_store_undo(
 	return(prefix_len);
 }
 
+/** Determine maximum bytes of a virtual column need to be stored
+in the undo log.
+@param[in]	table		dict_table_t for the table
+@param[in]	col_no		virtual column number
+@return maximum bytes of virtual column to be stored in the undo log */
+UNIV_INLINE
+ulint
+dict_max_v_field_len_store_undo(
+	dict_table_t*		table,
+	ulint			col_no)
+{
+	const dict_col_t*	col
+		= &dict_table_get_nth_v_col(table, col_no)->m_col;
+	ulint			max_log_len;
+
+	/* This calculation conforms to the non-virtual column
+	maximum log length calculation:
+	1) for UNIV_FORMAT_A, upto REC_ANTELOPE_MAX_INDEX_COL_LEN
+	for UNIV_FORMAT_B, upto col->max_prefix or
+	2) REC_VERSION_56_MAX_INDEX_COL_LEN, whichever is less */
+	if (dict_table_get_format(table) >= UNIV_FORMAT_B) {
+		if (DATA_BIG_COL(col) && col->max_prefix > 0) {
+			max_log_len = col->max_prefix;
+		} else {
+			max_log_len = DICT_MAX_FIELD_LEN_BY_FORMAT(table);
+		}
+	} else {
+		max_log_len = REC_ANTELOPE_MAX_INDEX_COL_LEN;
+	}
+
+	return(max_log_len);
+}
+
+/**********************************************************************//**
+Prevent table eviction by moving a table to the non-LRU list from the
+LRU list if it is not already there. */
+UNIV_INLINE
+void
+dict_table_prevent_eviction(
+/*========================*/
+	dict_table_t*	table)	/*!< in: table to prevent eviction */
+{
+	ut_ad(mutex_own(&dict_sys->mutex));
+	if (table->can_be_evicted) {
+		dict_table_move_from_lru_to_non_lru(table);
+	}
+}
+
 /********************************************************************//**
 Check whether the table is corrupted.
-@return	nonzero for corrupted table, zero for valid tables */
+@return nonzero for corrupted table, zero for valid tables */
 UNIV_INLINE
 ulint
 dict_table_is_corrupted(
@@ -1543,7 +1766,7 @@ dict_table_is_corrupted(
 
 /********************************************************************//**
 Check whether the index is corrupted.
-@return	nonzero for corrupted index, zero for valid indexes */
+@return nonzero for corrupted index, zero for valid indexes */
 UNIV_INLINE
 ulint
 dict_index_is_corrupted(
@@ -1559,7 +1782,7 @@ dict_index_is_corrupted(
 
 /********************************************************************//**
 Check if the tablespace for the table has been discarded.
-@return	true if the tablespace has been discarded. */
+@return true if the tablespace has been discarded. */
 UNIV_INLINE
 bool
 dict_table_is_discarded(
@@ -1571,7 +1794,7 @@ dict_table_is_discarded(
 
 /********************************************************************//**
 Check if it is a temporary table.
-@return	true if temporary table flag is set. */
+@return true if temporary table flag is set. */
 UNIV_INLINE
 bool
 dict_table_is_temporary(
@@ -1581,6 +1804,112 @@ dict_table_is_temporary(
 	return(DICT_TF2_FLAG_IS_SET(table, DICT_TF2_TEMPORARY));
 }
 
+/********************************************************************//**
+Check if it is a encrypted table.
+@return true if table encrypted flag is set. */
+UNIV_INLINE
+bool
+dict_table_is_encrypted(
+/*====================*/
+	const dict_table_t*	table)	/*!< in: table to check */
+{
+	return(DICT_TF2_FLAG_IS_SET(table, DICT_TF2_ENCRYPTION));
+}
+
+/** Check whether the table is intrinsic.
+An intrinsic table is a special kind of temporary table that
+is invisible to the end user.  It can be created internally by InnoDB, the MySQL
+server layer or other modules connected to InnoDB in order to gather and use
+data as part of a larger task.  Since access to it must be as fast as possible,
+it does not need UNDO semantics, system fields DB_TRX_ID & DB_ROLL_PTR,
+doublewrite, checksum, insert buffer, use of the shared data dictionary,
+locking, or even a transaction.  In short, these are not ACID tables at all,
+just temporary data stored and manipulated during a larger process.
+
+@param[in]	table	table to check
+@return true if intrinsic table flag is set. */
+UNIV_INLINE
+bool
+dict_table_is_intrinsic(
+	const dict_table_t*	table)
+{
+	return(DICT_TF2_FLAG_IS_SET(table, DICT_TF2_INTRINSIC));
+}
+
+/** Check if the table is in a shared tablespace (System or General).
+@param[in]	id	Space ID to check
+@return true if id is a shared tablespace, false if not. */
+UNIV_INLINE
+bool
+dict_table_in_shared_tablespace(
+	const dict_table_t*	table)
+{
+	return(is_system_tablespace(table->space)
+		|| DICT_TF_HAS_SHARED_SPACE(table->flags));
+}
+
+/** Check whether locking is disabled for this table.
+Currently this is done for intrinsic table as their visibility is limited
+to the connection only.
+
+@param[in]	table	table to check
+@return true if locking is disabled. */
+UNIV_INLINE
+bool
+dict_table_is_locking_disabled(
+	const dict_table_t*	table)
+{
+	return(dict_table_is_intrinsic(table));
+}
+
+/********************************************************************//**
+Turn-off redo-logging if temporary table. */
+UNIV_INLINE
+void
+dict_disable_redo_if_temporary(
+/*===========================*/
+	const dict_table_t*	table,	/*!< in: table to check */
+	mtr_t*			mtr)	/*!< out: mini-transaction */
+{
+	if (dict_table_is_temporary(table)) {
+		mtr_set_log_mode(mtr, MTR_LOG_NO_REDO);
+	}
+}
+
+/** Check if the table is found is a file_per_table tablespace.
+This test does not use table flags2 since some REDUNDANT tables in the
+system tablespace may have garbage in the MIX_LEN field where flags2 is
+stored. These garbage MIX_LEN fields were written before v3.23.52.
+A patch was added to v3.23.52 which initializes the MIX_LEN field to 0.
+Since file-per-table tablespaces were added in 4.1, any SYS_TABLES
+record with a non-zero space ID will have a reliable MIX_LEN field.
+However, this test does not use flags2 from SYS_TABLES.MIX_LEN.  Instead,
+assume that if the tablespace is not a predefined system tablespace and it
+is not a general shared tablespace, then it must be file-per-table.
+Also, during ALTER TABLE, the DICT_TF2_USE_FILE_PER_TABLE flag may not be
+set on one of the file-per-table tablespaces.
+This test cannot be done on a table in the process of being created
+because the space_id will be zero until the tablespace is created.
+@param[in]	table	An existing open table to check
+@return true if this table was created as a file-per-table tablespace. */
+UNIV_INLINE
+bool
+dict_table_is_file_per_table(
+	const dict_table_t*	table)	/*!< in: table to check */
+{
+	bool is_file_per_table =
+		!is_system_tablespace(table->space)
+		&& !DICT_TF_HAS_SHARED_SPACE(table->flags);
+
+	/* If the table is file-per-table and it is not redundant, then
+	it should have the flags2 bit for DICT_TF2_USE_FILE_PER_TABLE. */
+	ut_ad(!is_file_per_table
+	      || !DICT_TF_GET_COMPACT(table->flags)
+	      || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_USE_FILE_PER_TABLE));
+
+	return(is_file_per_table );
+}
+
 /**********************************************************************//**
 Get index by first field of the index
 @return index which is having first field matches
@@ -1589,9 +1918,10 @@ UNIV_INLINE
 dict_index_t*
 dict_table_get_index_on_first_col(
 /*==============================*/
-	const dict_table_t*	table,		/*!< in: table */
-	ulint			col_index)	/*!< in: position of column
+        const dict_table_t*     table,		/*!< in: table */
+        ulint                   col_index,	/*!< in: position of column
 						in table */
+	const char*             field_name)     /*!< in: field name */
 {
 	ut_ad(col_index < table->n_cols);
 
@@ -1604,8 +1934,165 @@ dict_table_get_index_on_first_col(
 			return(index);
 		}
 	}
+
+	/* If not yet found use field_name */
+	for (dict_index_t* index = dict_table_get_first_index(table);
+		index != NULL; index = dict_table_get_next_index(index)) {
+		if (!strcmp(index->fields[0].name, field_name)) {
+			return (index);
+		}
+	}
 	ut_error;
 	return(0);
 }
 
+/** Get table session row-id and increment the row-id counter for next use.
+@param[in,out]	table	table handler
+@return next table session row-id. */
+UNIV_INLINE
+row_id_t
+dict_table_get_next_table_sess_row_id(
+	dict_table_t*		table)
+{
+	return(++table->sess_row_id);
+}
+
+/** Get table session trx-id and increment the trx-id counter for next use.
+@param[in,out]	table	table handler
+@return next table session trx-id. */
+UNIV_INLINE
+trx_id_t
+dict_table_get_next_table_sess_trx_id(
+	dict_table_t*		table)
+{
+	return(++table->sess_trx_id);
+}
+
+/** Get current session trx-id.
+@param[in]	table	table handler
+@return table session trx-id. */
+UNIV_INLINE
+trx_id_t
+dict_table_get_curr_table_sess_trx_id(
+	const dict_table_t*	table)
+{
+	return(table->sess_trx_id);
+}
+
+/** Get reference count.
+@return current value of n_ref_count */
+inline
+ulint
+dict_table_t::get_ref_count() const
+{
+	ut_ad(mutex_own(&dict_sys->mutex) || dict_table_is_intrinsic(this));
+	return(n_ref_count);
+}
+
+/** Acquire the table handle. */
+inline
+void
+dict_table_t::acquire()
+{
+	ut_ad(mutex_own(&dict_sys->mutex) || dict_table_is_intrinsic(this));
+	++n_ref_count;
+}
+
+/** Release the table handle. */
+inline
+void
+dict_table_t::release()
+{
+	ut_ad(mutex_own(&dict_sys->mutex) || dict_table_is_intrinsic(this));
+	ut_ad(n_ref_count > 0);
+	--n_ref_count;
+}
+
+/** Check if tablespace name is "innodb_general".
+@param[in]	tablespace_name	tablespace name
+@retval		true		if name is "innodb_general"
+@retval		false		if name is not "innodb_general" */
+inline
+bool
+dict_table_has_temp_general_tablespace_name(
+	const char*	tablespace_name) {
+
+	return(tablespace_name != NULL
+	       && strncmp(tablespace_name, general_space_name,
+			  strlen(general_space_name)) == 0);
+}
+
+/** Encode the number of columns and number of virtual columns in a
+4 bytes value. We could do this because the number of columns in
+InnoDB is limited to 1017
+@param[in]      n_col   number of non-virtual column
+@param[in]      n_v_col number of virtual column
+@return encoded value */
+UNIV_INLINE
+ulint
+dict_table_encode_n_col(
+                ulint   n_col,
+                ulint   n_v_col)
+{
+	return(n_col + (n_v_col<<16));
+}
+
+/** decode number of virtual and non-virtual columns in one 4 bytes value.
+@param[in]      encoded encoded value
+@param[in,out]     n_col   number of non-virtual column
+@param[in,out]     n_v_col number of virtual column */
+UNIV_INLINE
+void
+dict_table_decode_n_col(
+                ulint   encoded,
+                ulint*  n_col,
+                ulint*  n_v_col)
+{
+
+	ulint	num = encoded & ~DICT_N_COLS_COMPACT;
+	*n_v_col = num >> 16;
+	*n_col = num & 0xFFFF;
+}
+
+/** Free the virtual column template
+@param[in,out]	vc_templ	virtual column template */
+void
+dict_free_vc_templ(
+	dict_vcol_templ_t*	vc_templ)
+{
+	if (vc_templ->vtempl != NULL) {
+		ut_ad(vc_templ->n_v_col > 0);
+		for (ulint i = 0; i < vc_templ->n_col
+		     + vc_templ->n_v_col; i++) {
+			if (vc_templ->vtempl[i] != NULL) {
+				ut_free(vc_templ->vtempl[i]);
+			}
+		}
+		ut_free(vc_templ->default_rec);
+		ut_free(vc_templ->vtempl);
+		vc_templ->vtempl = NULL;
+	}
+}
+
+/** Check whether the table have virtual index.
+@param[in]	table	InnoDB table
+@return true if the table have virtual index, false otherwise. */
+UNIV_INLINE
+bool
+dict_table_have_virtual_index(
+	dict_table_t*	table)
+{
+	for (ulint col_no = 0; col_no < dict_table_get_n_v_cols(table);
+	     col_no++) {
+		const dict_v_col_t*	col
+			= dict_table_get_nth_v_col(table, col_no);
+
+		if (col->m_col.ord_part) {
+			return(true);
+		}
+	}
+
+	return(false);
+}
+
 #endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/dict0load.h b/storage/innobase/include/dict0load.h
index dcbc3de8e94..6d01c38c432 100644
--- a/storage/innobase/include/dict0load.h
+++ b/storage/innobase/include/dict0load.h
@@ -33,6 +33,12 @@ Created 4/24/1996 Heikki Tuuri
 #include "ut0byte.h"
 #include "mem0mem.h"
 #include "btr0types.h"
+#include "ut0new.h"
+
+#include <deque>
+
+/** A stack of table names related through foreign key constraints */
+typedef std::deque<const char*, ut_allocator<const char*> >	dict_names_t;
 
 /** enum that defines all system table IDs. @see SYSTEM_TABLE_NAME[] */
 enum dict_system_id_t {
@@ -44,6 +50,7 @@ enum dict_system_id_t {
 	SYS_FOREIGN_COLS,
 	SYS_TABLESPACES,
 	SYS_DATAFILES,
+	SYS_VIRTUAL,
 
 	/* This must be last item. Defines the number of system tables. */
 	SYS_NUM_SYSTEM_TABLES
@@ -58,57 +65,37 @@ enum dict_table_info_t {
 					is in the cache, if so, return it */
 };
 
-/** Check type for dict_check_tablespaces_and_store_max_id() */
-enum dict_check_t {
-	/** No user tablespaces have been opened
-	(no crash recovery, no transactions recovered). */
-	DICT_CHECK_NONE_LOADED = 0,
-	/** Some user tablespaces may have been opened
-	(no crash recovery; recovered table locks for transactions). */
-	DICT_CHECK_SOME_LOADED,
-	/** All user tablespaces have been opened (crash recovery). */
-	DICT_CHECK_ALL_LOADED
-};
+/** Check each tablespace found in the data dictionary.
+Look at each table defined in SYS_TABLES that has a space_id > 0.
+If the tablespace is not yet in the fil_system cache, look up the
+tablespace in SYS_DATAFILES to ensure the correct path.
 
-/********************************************************************//**
-In a crash recovery we already have all the tablespace objects created.
-This function compares the space id information in the InnoDB data dictionary
-to what we already read with fil_load_single_table_tablespaces().
+In a crash recovery we already have some tablespace objects created from
+processing the REDO log.  Any other tablespace in SYS_TABLESPACES not
+previously used in recovery will be opened here.  We will compare the
+space_id information in the data dictionary to what we find in the
+tablespace file. In addition, more validation will be done if recovery
+was needed and force_recovery is not set.
 
-In a normal startup, we create the tablespace objects for every table in
-InnoDB's data dictionary, if the corresponding .ibd file exists.
-We also scan the biggest space id, and store it to fil_system. */
-UNIV_INTERN
+We also scan the biggest space id, and store it to fil_system.
+@param[in]	validate	true if recovery was needed */
 void
 dict_check_tablespaces_and_store_max_id(
-/*====================================*/
-	dict_check_t	dict_check);	/*!< in: how to check */
+	bool		validate);
+
 /********************************************************************//**
 Finds the first table name in the given database.
 @return own: table name, NULL if does not exist; the caller must free
 the memory in the string! */
-UNIV_INTERN
 char*
 dict_get_first_table_name_in_db(
 /*============================*/
 	const char*	name);	/*!< in: database name which ends to '/' */
 
 /********************************************************************//**
-Loads a table definition from a SYS_TABLES record to dict_table_t.
-Does not load any columns or indexes.
-@return error message, or NULL on success */
-UNIV_INTERN
-const char*
-dict_load_table_low(
-/*================*/
-	const char*	name,		/*!< in: table name */
-	const rec_t*	rec,		/*!< in: SYS_TABLES record */
-	dict_table_t**	table);		/*!< out,own: table, or NULL */
-/********************************************************************//**
 Loads a table column definition from a SYS_COLUMNS record to
 dict_table_t.
 @return error message, or NULL on success */
-UNIV_INTERN
 const char*
 dict_load_column_low(
 /*=================*/
@@ -122,14 +109,36 @@ dict_load_column_low(
 					or NULL if table != NULL */
 	table_id_t*	table_id,	/*!< out: table id */
 	const char**	col_name,	/*!< out: column name */
-	const rec_t*	rec);		/*!< in: SYS_COLUMNS record */
+	const rec_t*	rec,		/*!< in: SYS_COLUMNS record */
+	ulint*		nth_v_col);	/*!< out: if not NULL, this
+					records the "n" of "nth" virtual
+					column */
+
+/** Loads a virtual column "mapping" (to base columns) information
+from a SYS_VIRTUAL record
+@param[in,out]	table		table
+@param[in,out]	heap		memory heap
+@param[in,out]	column		mapped base column's dict_column_t
+@param[in,out]	table_id	table id
+@param[in,out]	pos		virtual column position
+@param[in,out]	base_pos	base column position
+@param[in]	rec		SYS_VIRTUAL record
+@return error message, or NULL on success */
+const char*
+dict_load_virtual_low(
+	dict_table_t*   table,
+	mem_heap_t*     heap,
+	dict_col_t**    column,
+	table_id_t*     table_id,
+	ulint*		pos,
+	ulint*		base_pos,
+	const rec_t*    rec);
 /********************************************************************//**
 Loads an index definition from a SYS_INDEXES record to dict_index_t.
 If allocate=TRUE, we will create a dict_index_t structure and fill it
 accordingly. If allocated=FALSE, the dict_index_t will be supplied by
 the caller and filled with information read from the record.  @return
 error message, or NULL on success */
-UNIV_INTERN
 const char*
 dict_load_index_low(
 /*================*/
@@ -147,7 +156,6 @@ dict_load_index_low(
 Loads an index field definition from a SYS_FIELDS record to
 dict_index_t.
 @return error message, or NULL on success */
-UNIV_INTERN
 const char*
 dict_load_field_low(
 /*================*/
@@ -170,44 +178,58 @@ Using the table->heap, copy the null-terminated filepath into
 table->data_dir_path and put a null byte before the extension.
 This allows SHOW CREATE TABLE to return the correct DATA DIRECTORY path.
 Make this data directory path only if it has not yet been saved. */
-UNIV_INTERN
 void
 dict_save_data_dir_path(
 /*====================*/
 	dict_table_t*	table,		/*!< in/out: table */
 	char*		filepath);	/*!< in: filepath of tablespace */
-/*****************************************************************//**
-Make sure the data_file_name is saved in dict_table_t if needed. Try to
-read it from the file dictionary first, then from SYS_DATAFILES. */
-UNIV_INTERN
+
+/** Get the first filepath from SYS_DATAFILES for a given space_id.
+@param[in]	space_id	Tablespace ID
+@return First filepath (caller must invoke ut_free() on it)
+@retval NULL if no SYS_DATAFILES entry was found. */
+char*
+dict_get_first_path(
+	ulint	space_id);
+
+/** Make sure the data_file_name is saved in dict_table_t if needed.
+Try to read it from the fil_system first, then from SYS_DATAFILES.
+@param[in]	table		Table object
+@param[in]	dict_mutex_own	true if dict_sys->mutex is owned already */
 void
 dict_get_and_save_data_dir_path(
-/*============================*/
-	dict_table_t*	table,		/*!< in/out: table */
-	bool		dict_mutex_own);	/*!< in: true if dict_sys->mutex
-					is owned already */
-/********************************************************************//**
-Loads a table definition and also all its index definitions, and also
+	dict_table_t*	table,
+	bool		dict_mutex_own);
+
+/** Make sure the tablespace name is saved in dict_table_t if needed.
+Try to read it from the file dictionary first, then from SYS_TABLESPACES.
+@param[in]	table		Table object
+@param[in]	dict_mutex_own)	true if dict_sys->mutex is owned already */
+void
+dict_get_and_save_space_name(
+	dict_table_t*	table,
+	bool		dict_mutex_own);
+
+/** Loads a table definition and also all its index definitions, and also
 the cluster definition if the table is a member in a cluster. Also loads
 all foreign key constraints where the foreign key is in the table or where
 a foreign key references columns in this table.
+@param[in]	name		Table name in the dbname/tablename format
+@param[in]	cached		true=add to cache, false=do not
+@param[in]	ignore_err	Error to be ignored when loading
+				table and its index definition
 @return table, NULL if does not exist; if the table is stored in an
-.ibd file, but the file does not exist, then we set the
-ibd_file_missing flag TRUE in the table object we return */
-UNIV_INTERN
+.ibd file, but the file does not exist, then we set the ibd_file_missing
+flag in the table object we return. */
 dict_table_t*
 dict_load_table(
-/*============*/
-	const char*	name,	/*!< in: table name in the
-				databasename/tablename format */
-	ibool		cached,	/*!< in: TRUE=add to cache, FALSE=do not */
+	const char*	name,
+	bool		cached,
 	dict_err_ignore_t ignore_err);
-				/*!< in: error to be ignored when loading
-				table and its indexes' definition */
+
 /***********************************************************************//**
 Loads a table object based on the table id.
-@return	table; NULL if table does not exist */
-UNIV_INTERN
+@return table; NULL if table does not exist */
 dict_table_t*
 dict_load_table_on_id(
 /*==================*/
@@ -218,7 +240,6 @@ dict_load_table_on_id(
 This function is called when the database is booted.
 Loads system table index definitions except for the clustered index which
 is added to the dictionary cache at booting before calling this function. */
-UNIV_INTERN
 void
 dict_load_sys_table(
 /*================*/
@@ -226,11 +247,13 @@ dict_load_sys_table(
 /***********************************************************************//**
 Loads foreign key constraints where the table is either the foreign key
 holder or where the table is referenced by a foreign key. Adds these
-constraints to the data dictionary. Note that we know that the dictionary
-cache already contains all constraints where the other relevant table is
-already in the dictionary cache.
-@return	DB_SUCCESS or error code */
-UNIV_INTERN
+constraints to the data dictionary.
+
+The foreign key constraint is loaded only if the referenced table is also
+in the dictionary cache.  If the referenced table is not in dictionary
+cache, then it is added to the output parameter (fk_tables).
+
+@return DB_SUCCESS or error code */
 dberr_t
 dict_load_foreigns(
 /*===============*/
@@ -242,20 +265,16 @@ dict_load_foreigns(
 						chained by FK */
 	bool			check_charsets,	/*!< in: whether to check
 						charset compatibility */
-	dict_err_ignore_t	ignore_err)	/*!< in: error to be ignored */
+	dict_err_ignore_t	ignore_err,	/*!< in: error to be ignored */
+	dict_names_t&		fk_tables)	/*!< out: stack of table names
+						which must be loaded
+						subsequently to load all the
+						foreign key constraints. */
 	MY_ATTRIBUTE((nonnull(1), warn_unused_result));
-/********************************************************************//**
-Prints to the standard output information on all tables found in the data
-dictionary system table. */
-UNIV_INTERN
-void
-dict_print(void);
-/*============*/
 
 /********************************************************************//**
 This function opens a system table, and return the first record.
-@return	first record of the system table */
-UNIV_INTERN
+@return first record of the system table */
 const rec_t*
 dict_startscan_system(
 /*==================*/
@@ -265,8 +284,7 @@ dict_startscan_system(
 	dict_system_id_t system_id);	/*!< in: which system table to open */
 /********************************************************************//**
 This function get the next system table record as we scan the table.
-@return	the record if found, NULL if end of scan. */
-UNIV_INTERN
+@return the record if found, NULL if end of scan. */
 const rec_t*
 dict_getnext_system(
 /*================*/
@@ -278,7 +296,6 @@ This function processes one SYS_TABLES record and populate the dict_table_t
 struct for the table. Extracted out of dict_print() to be used by
 both monitor table output and information schema innodb_sys_tables output.
 @return error message, or NULL on success */
-UNIV_INTERN
 const char*
 dict_process_sys_tables_rec_and_mtr_commit(
 /*=======================================*/
@@ -296,7 +313,6 @@ This function parses a SYS_INDEXES record and populate a dict_index_t
 structure with the information from the record. For detail information
 about SYS_INDEXES fields, please refer to dict_boot() function.
 @return error message, or NULL on success */
-UNIV_INTERN
 const char*
 dict_process_sys_indexes_rec(
 /*=========================*/
@@ -309,7 +325,6 @@ dict_process_sys_indexes_rec(
 This function parses a SYS_COLUMNS record and populate a dict_column_t
 structure with the information from the record.
 @return error message, or NULL on success */
-UNIV_INTERN
 const char*
 dict_process_sys_columns_rec(
 /*=========================*/
@@ -317,12 +332,29 @@ dict_process_sys_columns_rec(
 	const rec_t*	rec,		/*!< in: current SYS_COLUMNS rec */
 	dict_col_t*	column,		/*!< out: dict_col_t to be filled */
 	table_id_t*	table_id,	/*!< out: table id */
-	const char**	col_name);	/*!< out: column name */
+	const char**	col_name,	/*!< out: column name */
+	ulint*		nth_v_col);	/*!< out: if virtual col, this is
+					records its sequence number */
+
+/** This function parses a SYS_VIRTUAL record and extract virtual column
+information
+@param[in,out]	heap		heap memory
+@param[in]	rec		current SYS_COLUMNS rec
+@param[in,out]	table_id	table id
+@param[in,out]	pos		virtual column position
+@param[in,out]	base_pos	base column position
+@return error message, or NULL on success */
+const char*
+dict_process_sys_virtual_rec(
+	mem_heap_t*	heap,
+	const rec_t*	rec,
+	table_id_t*	table_id,
+	ulint*		pos,
+	ulint*		base_pos);
 /********************************************************************//**
 This function parses a SYS_FIELDS record and populate a dict_field_t
 structure with the information from the record.
 @return error message, or NULL on success */
-UNIV_INTERN
 const char*
 dict_process_sys_fields_rec(
 /*========================*/
@@ -338,7 +370,6 @@ This function parses a SYS_FOREIGN record and populate a dict_foreign_t
 structure with the information from the record. For detail information
 about SYS_FOREIGN fields, please refer to dict_load_foreign() function
 @return error message, or NULL on success */
-UNIV_INTERN
 const char*
 dict_process_sys_foreign_rec(
 /*=========================*/
@@ -350,7 +381,6 @@ dict_process_sys_foreign_rec(
 This function parses a SYS_FOREIGN_COLS record and extract necessary
 information from the record and return to caller.
 @return error message, or NULL on success */
-UNIV_INTERN
 const char*
 dict_process_sys_foreign_col_rec(
 /*=============================*/
@@ -365,7 +395,6 @@ dict_process_sys_foreign_col_rec(
 This function parses a SYS_TABLESPACES record, extracts necessary
 information from the record and returns to caller.
 @return error message, or NULL on success */
-UNIV_INTERN
 const char*
 dict_process_sys_tablespaces(
 /*=========================*/
@@ -378,7 +407,6 @@ dict_process_sys_tablespaces(
 This function parses a SYS_DATAFILES record, extracts necessary
 information from the record and returns to caller.
 @return error message, or NULL on success */
-UNIV_INTERN
 const char*
 dict_process_sys_datafiles(
 /*=======================*/
@@ -386,40 +414,29 @@ dict_process_sys_datafiles(
 	const rec_t*	rec,		/*!< in: current SYS_DATAFILES rec */
 	ulint*		space,		/*!< out: pace id */
 	const char**	path);		/*!< out: datafile path */
-/********************************************************************//**
-Get the filepath for a spaceid from SYS_DATAFILES. This function provides
-a temporary heap which is used for the table lookup, but not for the path.
-The caller must free the memory for the path returned. This function can
-return NULL if the space ID is not found in SYS_DATAFILES, then the caller
-will assume that the ibd file is in the normal datadir.
-@return	own: A copy of the first datafile found in SYS_DATAFILES.PATH for
-the given space ID. NULL if space ID is zero or not found. */
-UNIV_INTERN
-char*
-dict_get_first_path(
-/*================*/
-	ulint		space,	/*!< in: space id */
-	const char*	name);	/*!< in: tablespace name */
-/********************************************************************//**
-Update the record for space_id in SYS_TABLESPACES to this filepath.
-@return	DB_SUCCESS if OK, dberr_t if the insert failed */
-UNIV_INTERN
+
+/** Update the record for space_id in SYS_TABLESPACES to this filepath.
+@param[in]	space_id	Tablespace ID
+@param[in]	filepath	Tablespace filepath
+@return DB_SUCCESS if OK, dberr_t if the insert failed */
 dberr_t
 dict_update_filepath(
-/*=================*/
-	ulint		space_id,	/*!< in: space id */
-	const char*	filepath);	/*!< in: filepath */
-/********************************************************************//**
-Insert records into SYS_TABLESPACES and SYS_DATAFILES.
-@return	DB_SUCCESS if OK, dberr_t if the insert failed */
-UNIV_INTERN
+	ulint		space_id,
+	const char*	filepath);
+
+/** Replace records in SYS_TABLESPACES and SYS_DATAFILES associated with
+the given space_id using an independent transaction.
+@param[in]	space_id	Tablespace ID
+@param[in]	name		Tablespace name
+@param[in]	filepath	First filepath
+@param[in]	fsp_flags	Tablespace flags
+@return DB_SUCCESS if OK, dberr_t if the insert failed */
 dberr_t
-dict_insert_tablespace_and_filepath(
-/*================================*/
-	ulint		space,		/*!< in: space id */
-	const char*	name,		/*!< in: talespace name */
-	const char*	filepath,	/*!< in: filepath */
-	ulint		fsp_flags);	/*!< in: tablespace flags */
+dict_replace_tablespace_and_filepath(
+	ulint		space_id,
+	const char*	name,
+	const char*	filepath,
+	ulint		fsp_flags);
 
 #ifndef UNIV_NONINL
 #include "dict0load.ic"
diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h
index f964447fb8f..4fac0648bcb 100644
--- a/storage/innobase/include/dict0mem.h
+++ b/storage/innobase/include/dict0mem.h
@@ -41,13 +41,16 @@ Created 1/8/1996 Heikki Tuuri
 # include "sync0rw.h"
 #endif /* !UNIV_HOTBACKUP */
 #include "ut0mem.h"
-#include "ut0lst.h"
 #include "ut0rnd.h"
 #include "ut0byte.h"
 #include "hash0hash.h"
 #include "trx0types.h"
 #include "fts0fts.h"
+#include "buf0buf.h"
+#include "gis0type.h"
 #include "os0once.h"
+#include "ut0new.h"
+
 #include "fil0fil.h"
 #include <my_crypt.h>
 #include "fil0crypt.h"
@@ -62,7 +65,9 @@ struct ib_rbt_t;
 /** Type flags of an index: OR'ing of the flags is allowed to define a
 combination of types */
 /* @{ */
-#define DICT_CLUSTERED	1	/*!< clustered index */
+#define DICT_CLUSTERED	1	/*!< clustered index; for other than
+				auto-generated clustered indexes,
+				also DICT_UNIQUE will be set */
 #define DICT_UNIQUE	2	/*!< unique index */
 #define	DICT_UNIVERSAL	4	/*!< index which can contain records from any
 				other index */
@@ -71,8 +76,11 @@ combination of types */
 				in SYS_INDEXES.TYPE */
 #define	DICT_FTS	32	/* FTS index; can't be combined with the
 				other flags */
+#define	DICT_SPATIAL	64	/* SPATIAL index; can't be combined with the
+				other flags */
+#define	DICT_VIRTUAL	128	/* Index on Virtual column */
 
-#define	DICT_IT_BITS	6	/*!< number of bits used for
+#define	DICT_IT_BITS	8	/*!< number of bits used for
 				SYS_INDEXES.TYPE */
 /* @} */
 
@@ -115,20 +123,31 @@ the Compact page format is used, i.e ROW_FORMAT != REDUNDANT */
 
 /** Width of the COMPACT flag */
 #define DICT_TF_WIDTH_COMPACT		1
+
 /** Width of the ZIP_SSIZE flag */
 #define DICT_TF_WIDTH_ZIP_SSIZE		4
+
 /** Width of the ATOMIC_BLOBS flag.  The Antelope file formats broke up
 BLOB and TEXT fields, storing the first 768 bytes in the clustered index.
-Brracuda row formats store the whole blob or text field off-page atomically.
+Barracuda row formats store the whole blob or text field off-page atomically.
 Secondary indexes are created from this external data using row_ext_t
 to cache the BLOB prefixes. */
 #define DICT_TF_WIDTH_ATOMIC_BLOBS	1
+
 /** If a table is created with the MYSQL option DATA DIRECTORY and
 innodb-file-per-table, an older engine will not be able to find that table.
 This flag prevents older engines from attempting to open the table and
 allows InnoDB to update_create_info() accordingly. */
 #define DICT_TF_WIDTH_DATA_DIR		1
 
+/** Width of the SHARED tablespace flag.
+It is used to identify tables that exist inside a shared general tablespace.
+If a table is created with the TABLESPACE=tsname option, an older engine will
+not be able to find that table. This flag prevents older engines from attempting
+to open the table and allows InnoDB to quickly find the tablespace. */
+
+#define DICT_TF_WIDTH_SHARED_SPACE	1
+
 /**
 Width of the page compression flag
 */
@@ -148,15 +167,16 @@ DEFAULT=0, ON = 1, OFF = 2
 #define DICT_TF_WIDTH_ATOMIC_WRITES 2
 
 /** Width of all the currently known table flags */
-#define DICT_TF_BITS	(DICT_TF_WIDTH_COMPACT		\
-			+ DICT_TF_WIDTH_ZIP_SSIZE	\
-			+ DICT_TF_WIDTH_ATOMIC_BLOBS	\
-			+ DICT_TF_WIDTH_DATA_DIR        \
-			+ DICT_TF_WIDTH_PAGE_COMPRESSION \
-			+ DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL \
-		        + DICT_TF_WIDTH_ATOMIC_WRITES \
-		        + DICT_TF_WIDTH_PAGE_ENCRYPTION \
-		        + DICT_TF_WIDTH_PAGE_ENCRYPTION_KEY)
+#define DICT_TF_BITS	(DICT_TF_WIDTH_COMPACT			\
+			+ DICT_TF_WIDTH_ZIP_SSIZE		\
+			+ DICT_TF_WIDTH_ATOMIC_BLOBS		\
+			+ DICT_TF_WIDTH_DATA_DIR		\
+			+ DICT_TF_WIDTH_SHARED_SPACE		\
+			+ DICT_TF_WIDTH_PAGE_COMPRESSION	\
+			+ DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL	\
+			+ DICT_TF_WIDTH_ATOMIC_WRITES		\
+			+ DICT_TF_WIDTH_PAGE_ENCRYPTION		\
+			+ DICT_TF_WIDTH_PAGE_ENCRYPTION_KEY)
 
 /** A mask of all the known/used bits in table flags */
 #define DICT_TF_BIT_MASK	(~(~0 << DICT_TF_BITS))
@@ -172,9 +192,12 @@ DEFAULT=0, ON = 1, OFF = 2
 /** Zero relative shift position of the DATA_DIR field */
 #define DICT_TF_POS_DATA_DIR		(DICT_TF_POS_ATOMIC_BLOBS	\
 					+ DICT_TF_WIDTH_ATOMIC_BLOBS)
+/** Zero relative shift position of the SHARED TABLESPACE field */
+#define DICT_TF_POS_SHARED_SPACE	(DICT_TF_POS_DATA_DIR		\
+					+ DICT_TF_WIDTH_DATA_DIR)
 /** Zero relative shift position of the PAGE_COMPRESSION field */
-#define DICT_TF_POS_PAGE_COMPRESSION	(DICT_TF_POS_DATA_DIR	\
-		                        + DICT_TF_WIDTH_DATA_DIR)
+#define DICT_TF_POS_PAGE_COMPRESSION	(DICT_TF_POS_SHARED_SPACE	\
+					+ DICT_TF_WIDTH_SHARED_SPACE)
 /** Zero relative shift position of the PAGE_COMPRESSION_LEVEL field */
 #define DICT_TF_POS_PAGE_COMPRESSION_LEVEL	(DICT_TF_POS_PAGE_COMPRESSION	\
 					+ DICT_TF_WIDTH_PAGE_COMPRESSION)
@@ -183,12 +206,12 @@ DEFAULT=0, ON = 1, OFF = 2
 					+ DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL)
 /** Zero relative shift position of the PAGE_ENCRYPTION field */
 #define DICT_TF_POS_PAGE_ENCRYPTION	(DICT_TF_POS_ATOMIC_WRITES	\
-		                        + DICT_TF_WIDTH_ATOMIC_WRITES)
+					+ DICT_TF_WIDTH_ATOMIC_WRITES)
 /** Zero relative shift position of the PAGE_ENCRYPTION_KEY field */
 #define DICT_TF_POS_PAGE_ENCRYPTION_KEY	(DICT_TF_POS_PAGE_ENCRYPTION	\
-		                        + DICT_TF_WIDTH_PAGE_ENCRYPTION)
+					+ DICT_TF_WIDTH_PAGE_ENCRYPTION)
 #define DICT_TF_POS_UNUSED		(DICT_TF_POS_PAGE_ENCRYPTION_KEY     \
-		                        + DICT_TF_WIDTH_PAGE_ENCRYPTION_KEY)
+					+ DICT_TF_WIDTH_PAGE_ENCRYPTION_KEY)
 
 /** Bit mask of the COMPACT field */
 #define DICT_TF_MASK_COMPACT				\
@@ -206,6 +229,10 @@ DEFAULT=0, ON = 1, OFF = 2
 #define DICT_TF_MASK_DATA_DIR				\
 		((~(~0U << DICT_TF_WIDTH_DATA_DIR))	\
 		<< DICT_TF_POS_DATA_DIR)
+/** Bit mask of the SHARED_SPACE field */
+#define DICT_TF_MASK_SHARED_SPACE			\
+		((~(~0U << DICT_TF_WIDTH_SHARED_SPACE))	\
+		<< DICT_TF_POS_SHARED_SPACE)
 /** Bit mask of the PAGE_COMPRESSION field */
 #define DICT_TF_MASK_PAGE_COMPRESSION			\
 		((~(~0 << DICT_TF_WIDTH_PAGE_COMPRESSION)) \
@@ -239,10 +266,14 @@ DEFAULT=0, ON = 1, OFF = 2
 #define DICT_TF_HAS_ATOMIC_BLOBS(flags)			\
 		((flags & DICT_TF_MASK_ATOMIC_BLOBS)	\
 		>> DICT_TF_POS_ATOMIC_BLOBS)
-/** Return the value of the ATOMIC_BLOBS field */
+/** Return the value of the DATA_DIR field */
 #define DICT_TF_HAS_DATA_DIR(flags)			\
 		((flags & DICT_TF_MASK_DATA_DIR)	\
 		>> DICT_TF_POS_DATA_DIR)
+/** Return the value of the SHARED_SPACE field */
+#define DICT_TF_HAS_SHARED_SPACE(flags)			\
+		((flags & DICT_TF_MASK_SHARED_SPACE)	\
+		>> DICT_TF_POS_SHARED_SPACE)
 /** Return the value of the PAGE_COMPRESSION field */
 #define DICT_TF_GET_PAGE_COMPRESSION(flags)	       \
 		((flags & DICT_TF_MASK_PAGE_COMPRESSION) \
@@ -278,21 +309,26 @@ ROW_FORMAT=REDUNDANT.  InnoDB engines do not check these flags
 for unknown bits in order to protect backward incompatibility. */
 /* @{ */
 /** Total number of bits in table->flags2. */
-#define DICT_TF2_BITS			7
-#define DICT_TF2_BIT_MASK		~(~0U << DICT_TF2_BITS)
+#define DICT_TF2_BITS			9
+#define DICT_TF2_UNUSED_BIT_MASK	(~0U << DICT_TF2_BITS)
+#define DICT_TF2_BIT_MASK		~DICT_TF2_UNUSED_BIT_MASK
 
 /** TEMPORARY; TRUE for tables from CREATE TEMPORARY TABLE. */
 #define DICT_TF2_TEMPORARY		1
+
 /** The table has an internal defined DOC ID column */
 #define DICT_TF2_FTS_HAS_DOC_ID		2
+
 /** The table has an FTS index */
 #define DICT_TF2_FTS			4
+
 /** Need to add Doc ID column for FTS index build.
 This is a transient bit for index build */
 #define DICT_TF2_FTS_ADD_DOC_ID		8
+
 /** This bit is used during table creation to indicate that it will
 use its own tablespace instead of the system tablespace. */
-#define DICT_TF2_USE_TABLESPACE		16
+#define DICT_TF2_USE_FILE_PER_TABLE	16
 
 /** Set when we discard/detach the tablespace */
 #define DICT_TF2_DISCARDED		32
@@ -300,15 +336,25 @@ use its own tablespace instead of the system tablespace. */
 /** This bit is set if all aux table names (both common tables and
 index tables) of a FTS table are in HEX format. */
 #define DICT_TF2_FTS_AUX_HEX_NAME	64
+
+/** Intrinsic table bit
+Intrinsic table is table created internally by MySQL modules viz. Optimizer,
+FTS, etc.... Intrinsic table has all the properties of the normal table except
+it is not created by user and so not visible to end-user. */
+#define DICT_TF2_INTRINSIC		128
+
+/** Encryption table bit. */
+#define DICT_TF2_ENCRYPTION		256
+
 /* @} */
 
-#define DICT_TF2_FLAG_SET(table, flag)				\
+#define DICT_TF2_FLAG_SET(table, flag)		\
 	(table->flags2 |= (flag))
 
-#define DICT_TF2_FLAG_IS_SET(table, flag)			\
+#define DICT_TF2_FLAG_IS_SET(table, flag)	\
 	(table->flags2 & (flag))
 
-#define DICT_TF2_FLAG_UNSET(table, flag)			\
+#define DICT_TF2_FLAG_UNSET(table, flag)	\
 	(table->flags2 &= ~(flag))
 
 /** Tables could be chained together with Foreign key constraint. When
@@ -329,15 +375,17 @@ before proceeds. */
 
 /**********************************************************************//**
 Creates a table memory object.
-@return	own: table object */
-UNIV_INTERN
+@return own: table object */
 dict_table_t*
 dict_mem_table_create(
 /*==================*/
 	const char*	name,		/*!< in: table name */
 	ulint		space,		/*!< in: space where the clustered index
 					of the table is placed */
-	ulint		n_cols,		/*!< in: number of columns */
+	ulint		n_cols,		/*!< in: total number of columns
+					including virtual and non-virtual
+					columns */
+	ulint		n_v_cols,	/*!< in: number of virtual columns */
 	ulint		flags,		/*!< in: table flags */
 	ulint		flags2);	/*!< in: table flags2 */
 /**********************************************************************//**
@@ -350,14 +398,12 @@ dict_mem_table_is_system(
 	char	*name);		/*!< in: table name */
 /****************************************************************//**
 Free a table memory object. */
-UNIV_INTERN
 void
 dict_mem_table_free(
 /*================*/
 	dict_table_t*	table);		/*!< in: table */
 /**********************************************************************//**
 Adds a column definition to a table. */
-UNIV_INTERN
 void
 dict_mem_table_add_col(
 /*===================*/
@@ -368,21 +414,53 @@ dict_mem_table_add_col(
 	ulint		prtype,	/*!< in: precise type */
 	ulint		len)	/*!< in: precision */
 	MY_ATTRIBUTE((nonnull(1)));
+/** Adds a virtual column definition to a table.
+@param[in,out]	table		table
+@param[in]	heap		temporary memory heap, or NULL. It is
+				used to store name when we have not finished
+				adding all columns. When all columns are
+				added, the whole name will copy to memory from
+				table->heap
+@param[in]	name		column name
+@param[in]	mtype		main datatype
+@param[in]	prtype		precise type
+@param[in]	len		length
+@param[in]	pos		position in a table
+@param[in]	num_base	number of base columns
+@return the virtual column definition */
+dict_v_col_t*
+dict_mem_table_add_v_col(
+	dict_table_t*	table,
+	mem_heap_t*	heap,
+	const char*	name,
+	ulint		mtype,
+	ulint		prtype,
+	ulint		len,
+	ulint		pos,
+	ulint		num_base);
+
+/** Adds a stored column definition to a table.
+@param[in]	table		table
+@param[in]	num_base	number of base columns. */
+void
+dict_mem_table_add_s_col(
+	dict_table_t*	table,
+	ulint		num_base);
+
 /**********************************************************************//**
 Renames a column of a table in the data dictionary cache. */
-UNIV_INTERN
 void
 dict_mem_table_col_rename(
 /*======================*/
 	dict_table_t*	table,	/*!< in/out: table */
-	unsigned	nth_col,/*!< in: column index */
+	ulint		nth_col,/*!< in: column index */
 	const char*	from,	/*!< in: old column name */
-	const char*	to)	/*!< in: new column name */
-	MY_ATTRIBUTE((nonnull));
+	const char*	to,	/*!< in: new column name */
+	bool		is_virtual);
+				/*!< in: if this is a virtual column */
 /**********************************************************************//**
 This function populates a dict_col_t memory structure with
 supplied information. */
-UNIV_INTERN
 void
 dict_mem_fill_column_struct(
 /*========================*/
@@ -411,8 +489,7 @@ dict_mem_fill_index_struct(
 	ulint		n_fields);	/*!< in: number of fields */
 /**********************************************************************//**
 Creates an index memory object.
-@return	own: index object */
-UNIV_INTERN
+@return own: index object */
 dict_index_t*
 dict_mem_index_create(
 /*==================*/
@@ -428,7 +505,6 @@ dict_mem_index_create(
 Adds a field definition to an index. NOTE: does not take a copy
 of the column name if the field is a column. The memory occupied
 by the column name may be released only after publishing the index. */
-UNIV_INTERN
 void
 dict_mem_index_add_field(
 /*=====================*/
@@ -439,15 +515,13 @@ dict_mem_index_add_field(
 					INDEX (textcol(25)) */
 /**********************************************************************//**
 Frees an index memory object. */
-UNIV_INTERN
 void
 dict_mem_index_free(
 /*================*/
 	dict_index_t*	index);	/*!< in: index */
 /**********************************************************************//**
 Creates and initializes a foreign constraint memory object.
-@return	own: foreign constraint struct */
-UNIV_INTERN
+@return own: foreign constraint struct */
 dict_foreign_t*
 dict_mem_foreign_create(void);
 /*=========================*/
@@ -457,7 +531,6 @@ Sets the foreign_table_name_lookup pointer based on the value of
 lower_case_table_names.  If that is 0 or 1, foreign_table_name_lookup
 will point to foreign_table_name.  If 2, then another string is
 allocated from the heap and set to lower case. */
-UNIV_INTERN
 void
 dict_mem_foreign_table_name_lookup_set(
 /*===================================*/
@@ -469,13 +542,33 @@ Sets the referenced_table_name_lookup pointer based on the value of
 lower_case_table_names.  If that is 0 or 1, referenced_table_name_lookup
 will point to referenced_table_name.  If 2, then another string is
 allocated from the heap and set to lower case. */
-UNIV_INTERN
 void
 dict_mem_referenced_table_name_lookup_set(
 /*======================================*/
 	dict_foreign_t*	foreign,	/*!< in/out: foreign struct */
 	ibool		do_alloc);	/*!< in: is an alloc needed */
 
+/** Fills the dependent virtual columns in a set.
+Reason for being dependent are
+1) FK can be present on base column of virtual columns
+2) FK can be present on column which is a part of virtual index
+@param[in,out] foreign foreign key information. */
+void
+dict_mem_foreign_fill_vcol_set(
+       dict_foreign_t*	foreign);
+
+/** Fill virtual columns set in each fk constraint present in the table.
+@param[in,out] table   innodb table object. */
+void
+dict_mem_table_fill_foreign_vcol_set(
+        dict_table_t*	table);
+
+/** Free the vcol_set from all foreign key constraint on the table.
+@param[in,out] table   innodb table object. */
+void
+dict_mem_table_free_foreign_vcol_set(
+	dict_table_t*	table);
+
 /** Create a temporary tablename like "#sql-ibtid-inc where
   tid = the Table ID
   inc = a randomly initialized number that is incremented for each file
@@ -488,7 +581,6 @@ reasonably unique temporary file name.
 @param[in]	dbtab	Table name in the form database/table name
 @param[in]	id	Table id
 @return A unique temporary tablename suitable for InnoDB use */
-UNIV_INTERN
 char*
 dict_mem_create_temporary_tablename(
 	mem_heap_t*	heap,
@@ -496,10 +588,59 @@ dict_mem_create_temporary_tablename(
 	table_id_t	id);
 
 /** Initialize dict memory variables */
-
 void
 dict_mem_init(void);
 
+/** SQL identifier name wrapper for pretty-printing */
+class id_name_t
+{
+public:
+	/** Default constructor */
+	id_name_t()
+		: m_name()
+	{}
+	/** Constructor
+	@param[in]	name	identifier to assign */
+	explicit id_name_t(
+		const char*	name)
+		: m_name(name)
+	{}
+
+	/** Assignment operator
+	@param[in]	name	identifier to assign */
+	id_name_t& operator=(
+		const char*	name)
+	{
+		m_name = name;
+		return(*this);
+	}
+
+	/** Implicit type conversion
+	@return the name */
+	operator const char*() const
+	{
+		return(m_name);
+	}
+
+	/** Explicit type conversion
+	@return the name */
+	const char* operator()() const
+	{
+		return(m_name);
+	}
+
+private:
+	/** The name in internal representation */
+	const char*	m_name;
+};
+
+/** Table name wrapper for pretty-printing */
+struct table_name_t
+{
+	/** The name in internal representation */
+	char*	m_name;
+};
+
 /** Data structure for a column in a table */
 struct dict_col_t{
 	/*----------------------*/
@@ -545,6 +686,68 @@ struct dict_col_t{
 					3072 for Barracuda table */
 };
 
+/** Index information put in a list of virtual column structure. Index
+id and virtual column position in the index will be logged.
+There can be multiple entries for a given index, with a different position. */
+struct dict_v_idx_t {
+	/** active index on the column */
+	dict_index_t*	index;
+
+	/** position in this index */
+	ulint		nth_field;
+};
+
+/** Index list to put in dict_v_col_t */
+typedef	std::list<dict_v_idx_t, ut_allocator<dict_v_idx_t> >	dict_v_idx_list;
+
+/** Data structure for a virtual column in a table */
+struct dict_v_col_t{
+	/** column structure */
+	dict_col_t		m_col;
+
+	/** array of base column ptr */
+	dict_col_t**		base_col;
+
+	/** number of base column */
+	ulint			num_base;
+
+	/** column pos in table */
+	ulint			v_pos;
+
+	/** Virtual index list, and column position in the index,
+	the allocated memory is not from table->heap, nor it is
+	tracked by dict_sys->size */
+	dict_v_idx_list*	v_indexes;
+
+};
+
+/** Data structure for newly added virtual column in a table */
+struct dict_add_v_col_t{
+	/** number of new virtual column */
+	ulint			n_v_col;
+
+	/** column structures */
+	const dict_v_col_t*	v_col;
+
+	/** new col names */
+	const char**		v_col_name;
+};
+
+/** Data structure for a stored column in a table. */
+struct dict_s_col_t {
+	/** Stored column ptr */
+	dict_col_t*	m_col;
+	/** array of base col ptr */
+	dict_col_t**	base_col;
+	/** number of base columns */
+	ulint		num_base;
+	/** column pos in table */
+	ulint		s_pos;
+};
+
+/** list to put stored column for create_table_info_t */
+typedef std::list<dict_s_col_t, ut_allocator<dict_s_col_t> >	dict_s_col_list;
+
 /** @brief DICT_ANTELOPE_MAX_INDEX_COL_LEN is measured in bytes and
 is the maximum indexed column length (or indexed prefix length) in
 ROW_FORMAT=REDUNDANT and ROW_FORMAT=COMPACT. Also, in any format,
@@ -575,6 +778,7 @@ be REC_VERSION_56_MAX_INDEX_COL_LEN (3072) bytes */
 
 /** Defines the maximum fixed length column size */
 #define DICT_MAX_FIXED_COL_LEN		DICT_ANTELOPE_MAX_INDEX_COL_LEN
+
 #ifdef WITH_WSREP
 #define WSREP_MAX_SUPPORTED_KEY_LENGTH 3500
 #endif /* WITH_WSREP */
@@ -582,7 +786,7 @@ be REC_VERSION_56_MAX_INDEX_COL_LEN (3072) bytes */
 /** Data structure for a field in an index */
 struct dict_field_t{
 	dict_col_t*	col;		/*!< pointer to the table column */
-	const char*	name;		/*!< name of the column */
+	id_name_t	name;		/*!< name of the column */
 	unsigned	prefix_len:12;	/*!< 0 or the length of the column
 					prefix in bytes in a MySQL index of
 					type, e.g., INDEX (textcol(25));
@@ -634,12 +838,11 @@ extern ulong	zip_failure_threshold_pct;
 compression failures */
 extern ulong	zip_pad_max;
 
-/** Data structure to hold information about how much space in
+/** Data structure to hold information about about how much space in
 an uncompressed page should be left as padding to avoid compression
 failures. This estimate is based on a self-adapting heuristic. */
 struct zip_pad_info_t {
-	os_fast_mutex_t*
-			mutex;	/*!< mutex protecting the info */
+	SysMutex*	mutex;	/*!< mutex protecting the info */
 	ulint		pad;	/*!< number of bytes used as pad */
 	ulint		success;/*!< successful compression ops during
 				current round */
@@ -656,22 +859,124 @@ struct zip_pad_info_t {
 a certain index.*/
 #define STAT_DEFRAG_DATA_SIZE_N_SAMPLE	10
 
+/** If key is fixed length key then cache the record offsets on first
+computation. This will help save computation cycle that generate same
+redundant data. */
+class rec_cache_t
+{
+public:
+	/** Constructor */
+	rec_cache_t()
+		:
+		rec_size(),
+		offsets(),
+		sz_of_offsets(),
+		fixed_len_key(),
+		offsets_cached(),
+		key_has_null_cols()
+	{
+		/* Do Nothing. */
+	}
+
+public:
+	/** Record size. (for fixed length key record size is constant) */
+	ulint		rec_size;
+
+	/** Holds reference to cached offsets for record. */
+	ulint*		offsets;
+
+	/** Size of offset array */
+	uint32_t	sz_of_offsets;
+
+	/** If true, then key is fixed length key. */
+	bool		fixed_len_key;
+
+	/** If true, then offset has been cached for re-use. */
+	bool		offsets_cached;
+
+	/** If true, then key part can have columns that can take
+	NULL values. */
+	bool		key_has_null_cols;
+};
+
+/** Cache position of last inserted or selected record by caching record
+and holding reference to the block where record resides.
+Note: We don't commit mtr and hold it beyond a transaction lifetime as this is
+a special case (intrinsic table) that are not shared accross connection. */
+class last_ops_cur_t
+{
+public:
+	/** Constructor */
+	last_ops_cur_t()
+		:
+		rec(),
+		block(),
+		mtr(),
+		disable_caching(),
+		invalid()
+	{
+		/* Do Nothing. */
+	}
+
+	/* Commit mtr and re-initialize cache record and block to NULL. */
+	void release()
+	{
+		if (mtr.is_active()) {
+			mtr_commit(&mtr);
+		}
+		rec = NULL;
+		block = NULL;
+		invalid = false;
+	}
+
+public:
+	/** last inserted/selected record. */
+	rec_t*		rec;
+
+	/** block where record reside. */
+	buf_block_t*	block;
+
+	/** active mtr that will be re-used for next insert/select. */
+	mtr_t		mtr;
+
+	/** disable caching. (disabled when table involves blob/text.) */
+	bool		disable_caching;
+
+	/** If index structure is undergoing structural change viz.
+	split then invalidate the cached position as it would be no more
+	remain valid. Will be re-cached on post-split insert. */
+	bool		invalid;
+};
+
+/** "GEN_CLUST_INDEX" is the name reserved for InnoDB default
+system clustered index when there is no primary key. */
+const char innobase_index_reserve_name[] = "GEN_CLUST_INDEX";
+
+/* Estimated number of offsets in records (based on columns)
+to start with. */
+#define OFFS_IN_REC_NORMAL_SIZE		100
+
 /** Data structure for an index.  Most fields will be
 initialized to 0, NULL or FALSE in dict_mem_index_create(). */
 struct dict_index_t{
 	index_id_t	id;	/*!< id of the index */
 	mem_heap_t*	heap;	/*!< memory heap */
-	const char*	name;	/*!< index name */
+	id_name_t	name;	/*!< index name */
 	const char*	table_name;/*!< table name */
 	dict_table_t*	table;	/*!< back pointer to table */
 #ifndef UNIV_HOTBACKUP
 	unsigned	space:32;
 				/*!< space where the index tree is placed */
 	unsigned	page:32;/*!< index tree root page number */
+	unsigned	merge_threshold:6;
+				/*!< In the pessimistic delete, if the page
+				data size drops below this limit in percent,
+				merging it to a neighbor is tried */
+# define DICT_INDEX_MERGE_THRESHOLD_DEFAULT 50
 #endif /* !UNIV_HOTBACKUP */
 	unsigned	type:DICT_IT_BITS;
 				/*!< index type (DICT_CLUSTERED, DICT_UNIQUE,
-				DICT_UNIVERSAL, DICT_IBUF, DICT_CORRUPT) */
+				DICT_IBUF, DICT_CORRUPT) */
 #define MAX_KEY_LENGTH_BITS 12
 	unsigned	trx_id_offset:MAX_KEY_LENGTH_BITS;
 				/*!< position of the trx id column
@@ -685,6 +990,18 @@ struct dict_index_t{
 				/*!< number of columns the user defined to
 				be in the index: in the internal
 				representation we add more columns */
+	unsigned	allow_duplicates:1;
+				/*!< if true, allow duplicate values
+				even if index is created with unique
+				constraint */
+	unsigned	nulls_equal:1;
+				/*!< if true, SQL NULL == SQL NULL */
+	unsigned	disable_ahi:1;
+				/*!< in true, then disable AHI.
+				Currently limited to intrinsic
+				temporary table as index id is not
+				unqiue for such table which is one of the
+				validation criterion for ahi. */
 	unsigned	n_uniq:10;/*!< number of fields from the beginning
 				which are enough to determine an index
 				entry uniquely */
@@ -703,7 +1020,24 @@ struct dict_index_t{
 				by dict_operation_lock and
 				dict_sys->mutex. Other changes are
 				protected by index->lock. */
+	unsigned	uncommitted:1;
+				/*!< a flag that is set for secondary indexes
+				that have not been committed to the
+				data dictionary yet */
+
+#ifdef UNIV_DEBUG
+	uint32_t	magic_n;/*!< magic number */
+/** Value of dict_index_t::magic_n */
+# define DICT_INDEX_MAGIC_N	76789786
+#endif
 	dict_field_t*	fields;	/*!< array of field descriptions */
+	st_mysql_ftparser*
+			parser;	/*!< fulltext parser plugin */
+	bool		is_ngram;
+				/*!< true if it's ngram parser */
+	bool		has_new_v_col;
+				/*!< whether it has a newly added virtual
+				column in ALTER */
 #ifndef UNIV_HOTBACKUP
 	UT_LIST_NODE_T(dict_index_t)
 			indexes;/*!< list of indexes of the table */
@@ -764,26 +1098,48 @@ struct dict_index_t{
 				/* in which slot the next sample should be
 				saved. */
 	/* @} */
-	rw_lock_t	lock;	/*!< read-write lock protecting the
-				upper levels of the index tree */
+	last_ops_cur_t*	last_ins_cur;
+				/*!< cache the last insert position.
+				Currently limited to auto-generated
+				clustered index on intrinsic table only. */
+	last_ops_cur_t*	last_sel_cur;
+				/*!< cache the last selected position
+				Currently limited to intrinsic table only. */
+	rec_cache_t	rec_cache;
+				/*!< cache the field that needs to be
+				re-computed on each insert.
+				Limited to intrinsic table as this is common
+				share and can't be used without protection
+				if table is accessible to multiple-threads. */
+	rtr_ssn_t	rtr_ssn;/*!< Node sequence number for RTree */
+	rtr_info_track_t*
+			rtr_track;/*!< tracking all R-Tree search cursors */
 	trx_id_t	trx_id; /*!< id of the transaction that created this
 				index, or 0 if the index existed
 				when InnoDB was started up */
 	zip_pad_info_t	zip_pad;/*!< Information about state of
 				compression failures and successes */
+	rw_lock_t	lock;	/*!< read-write lock protecting the
+				upper levels of the index tree */
+
+	/** Determine if the index has been committed to the
+	data dictionary.
+	@return whether the index definition has been committed */
+	bool is_committed() const
+	{
+		ut_ad(!uncommitted || !(type & DICT_CLUSTERED));
+		return(UNIV_LIKELY(!uncommitted));
+	}
+
+	/** Flag an index committed or uncommitted.
+	@param[in]	committed	whether the index is committed */
+	void set_committed(bool committed)
+	{
+		ut_ad(!to_be_dropped);
+		ut_ad(committed || !(type & DICT_CLUSTERED));
+		uncommitted = !committed;
+	}
 #endif /* !UNIV_HOTBACKUP */
-#ifdef UNIV_BLOB_DEBUG
-	ib_mutex_t		blobs_mutex;
-				/*!< mutex protecting blobs */
-	ib_rbt_t*	blobs;	/*!< map of (page_no,heap_no,field_no)
-				to first_blob_page_no; protected by
-				blobs_mutex; @see btr_blob_dbg_t */
-#endif /* UNIV_BLOB_DEBUG */
-#ifdef UNIV_DEBUG
-	ulint		magic_n;/*!< magic number */
-/** Value of dict_index_t::magic_n */
-# define DICT_INDEX_MAGIC_N	76789786
-#endif
 };
 
 /** The status of online index creation */
@@ -806,6 +1162,11 @@ enum online_index_status {
 	ONLINE_INDEX_ABORTED_DROPPED
 };
 
+/** Set to store the virtual columns which are affected by Foreign
+key constraint. */
+typedef std::set<dict_v_col_t*, std::less<dict_v_col_t*>,
+		ut_allocator<dict_v_col_t*> >		dict_vcol_set;
+
 /** Data structure for a foreign key constraint; an example:
 FOREIGN KEY (A, B) REFERENCES TABLE2 (C, D).  Most fields will be
 initialized to 0, NULL or FALSE in dict_mem_foreign_create(). */
@@ -841,6 +1202,9 @@ struct dict_foreign_t{
 					does not generate new indexes
 					implicitly */
 	dict_index_t*	referenced_index;/*!< referenced index */
+
+	dict_vcol_set*	v_cols;		/*!< set of virtual columns affected
+					by foreign key constraint. */
 };
 
 std::ostream&
@@ -889,6 +1253,24 @@ struct dict_foreign_with_index {
 	const dict_index_t*	m_index;
 };
 
+#ifdef WITH_WSREP
+/** A function object to find a foreign key with the given index as the
+foreign index. Return the foreign key with matching criteria or NULL */
+struct dict_foreign_with_foreign_index {
+
+	dict_foreign_with_foreign_index(const dict_index_t*	index)
+	: m_index(index)
+	{}
+
+	bool operator()(const dict_foreign_t*	foreign) const
+	{
+		return(foreign->foreign_index == m_index);
+	}
+
+	const dict_index_t*	m_index;
+};
+#endif
+
 /* A function object to check if the foreign constraint is between different
 tables.  Returns true if foreign key constraint is between different tables,
 false otherwise. */
@@ -926,7 +1308,10 @@ struct dict_foreign_matches_id {
 	const char*	m_id;
 };
 
-typedef std::set<dict_foreign_t*, dict_foreign_compare> dict_foreign_set;
+typedef std::set<
+	dict_foreign_t*,
+	dict_foreign_compare,
+	ut_allocator<dict_foreign_t*> >	dict_foreign_set;
 
 std::ostream&
 operator<< (std::ostream& out, const dict_foreign_set& fk_set);
@@ -970,6 +1355,10 @@ dict_foreign_free(
 /*==============*/
 	dict_foreign_t*	foreign)	/*!< in, own: foreign key struct */
 {
+	if (foreign->v_cols != NULL) {
+		UT_DELETE(foreign->v_cols);
+	}
+
 	mem_heap_free(foreign->heap);
 }
 
@@ -1004,6 +1393,62 @@ a foreign key constraint is enforced, therefore RESTRICT just means no flag */
 #define DICT_FOREIGN_ON_UPDATE_NO_ACTION 32	/*!< ON UPDATE NO ACTION */
 /* @} */
 
+/** Display an identifier.
+@param[in,out]	s	output stream
+@param[in]	id_name	SQL identifier (other than table name)
+@return the output stream */
+std::ostream&
+operator<<(
+	std::ostream&		s,
+	const id_name_t&	id_name);
+
+/** Display a table name.
+@param[in,out]	s		output stream
+@param[in]	table_name	table name
+@return the output stream */
+std::ostream&
+operator<<(
+	std::ostream&		s,
+	const table_name_t&	table_name);
+
+/** List of locks that different transactions have acquired on a table. This
+list has a list node that is embedded in a nested union/structure. We have to
+generate a specific template for it. */
+
+typedef ut_list_base<lock_t, ut_list_node<lock_t> lock_table_t::*>
+	table_lock_list_t;
+
+/** mysql template structure defined in row0mysql.cc */
+struct mysql_row_templ_t;
+
+/** Structure defines template related to virtual columns and
+their base columns */
+struct dict_vcol_templ_t {
+	/** number of regular columns */
+	ulint			n_col;
+
+	/** number of virtual columns */
+	ulint			n_v_col;
+
+	/** array of templates for virtual col and their base columns */
+	mysql_row_templ_t**	vtempl;
+
+	/** table's database name */
+	std::string		db_name;
+
+	/** table name */
+	std::string		tb_name;
+
+	/** share->table_name */
+	std::string		share_name;
+
+	/** MySQL record length */
+	ulint			rec_len;
+
+	/** default column value if any */
+	byte*			default_rec;
+};
+
 /* This flag is for sync SQL DDL and memcached DML.
 if table->memcached_sync_count == DICT_TABLE_IN_DDL means there's DDL running on
 the table, DML from memcached will be blocked. */
@@ -1023,60 +1468,143 @@ typedef enum {
 
 /** Data structure for a database table.  Most fields will be
 initialized to 0, NULL or FALSE in dict_mem_table_create(). */
-struct dict_table_t{
+struct dict_table_t {
 
+	/** Get reference count.
+	@return current value of n_ref_count */
+	inline ulint get_ref_count() const;
+
+	/** Acquire the table handle. */
+	inline void acquire();
 
-	table_id_t	id;	/*!< id of the table */
-	mem_heap_t*	heap;	/*!< memory heap */
-	char*		name;	/*!< table name */
 	void*		thd;		/*!< thd */
 	fil_space_crypt_t *crypt_data; /*!< crypt data if present */
-	const char*	dir_path_of_temp_table;/*!< NULL or the directory path
-				where a TEMPORARY table that was explicitly
-				created by a user should be placed if
-				innodb_file_per_table is defined in my.cnf;
-				in Unix this is usually /tmp/..., in Windows
-				temp\... */
-	char*		data_dir_path; /*!< NULL or the directory path
-				specified by DATA DIRECTORY */
-	unsigned	space:32;
-				/*!< space where the clustered index of the
-				table is placed */
-	unsigned	flags:DICT_TF_BITS;	/*!< DICT_TF_... */
-	unsigned	flags2:DICT_TF2_BITS;	/*!< DICT_TF2_... */
-	unsigned	ibd_file_missing:1;
-				/*!< TRUE if this is in a single-table
-				tablespace and the .ibd file is missing; then
-				we must return in ha_innodb.cc an error if the
-				user tries to query such an orphaned table */
-	unsigned	cached:1;/*!< TRUE if the table object has been added
-				to the dictionary cache */
-	unsigned	to_be_dropped:1;
-				/*!< TRUE if the table is to be dropped, but
-				not yet actually dropped (could in the bk
-				drop list); It is turned on at the beginning
-				of row_drop_table_for_mysql() and turned off
-				just before we start to update system tables
-				for the drop. It is protected by
-				dict_operation_lock */
-	unsigned	n_def:10;/*!< number of columns defined so far */
-	unsigned	n_cols:10;/*!< number of columns */
-	unsigned	can_be_evicted:1;
-				/*!< TRUE if it's not an InnoDB system table
-				or a table that has no FK relationships */
-	unsigned	corrupted:1;
-				/*!< TRUE if table is corrupted */
-	unsigned	drop_aborted:1;
-				/*!< TRUE if some indexes should be dropped
-				after ONLINE_INDEX_ABORTED
-				or ONLINE_INDEX_ABORTED_DROPPED */
-	dict_col_t*	cols;	/*!< array of column descriptions */
-	const char*	col_names;
-				/*!< Column names packed in a character string
-				"name1\0name2\0...nameN\0".  Until
-				the string contains n_cols, it will be
-				allocated from a temporary heap.  The final
-				string will be allocated from table->heap. */
+
+	/** Release the table handle. */
+	inline void release();
+
+	/** Id of the table. */
+	table_id_t				id;
+
+	/** Memory heap. If you allocate from this heap after the table has
+	been created then be sure to account the allocation into
+	dict_sys->size. When closing the table we do something like
+	dict_sys->size -= mem_heap_get_size(table->heap) and if that is going
+	to become negative then we would assert. Something like this should do:
+	old_size = mem_heap_get_size()
+	mem_heap_alloc()
+	new_size = mem_heap_get_size()
+	dict_sys->size += new_size - old_size. */
+	mem_heap_t*				heap;
+
+	/** Table name. */
+	table_name_t				name;
+
+	/** NULL or the directory path where a TEMPORARY table that was
+	explicitly created by a user should be placed if innodb_file_per_table
+	is defined in my.cnf. In Unix this is usually "/tmp/...",
+	in Windows "temp\...". */
+	const char*				dir_path_of_temp_table;
+
+	/** NULL or the directory path specified by DATA DIRECTORY. */
+	char*					data_dir_path;
+
+	/** NULL or the tablespace name that this table is assigned to,
+	specified by the TABLESPACE option.*/
+	id_name_t				tablespace;
+
+	/** Space where the clustered index of the table is placed. */
+	uint32_t				space;
+
+	/** Stores information about:
+	1 row format (redundant or compact),
+	2 compressed page size (zip shift size),
+	3 whether using atomic blobs,
+	4 whether the table has been created with the option DATA DIRECTORY.
+	Use DICT_TF_GET_COMPACT(), DICT_TF_GET_ZIP_SSIZE(),
+	DICT_TF_HAS_ATOMIC_BLOBS() and DICT_TF_HAS_DATA_DIR() to parse this
+	flag. */
+	unsigned				flags:DICT_TF_BITS;
+
+	/** Stores information about:
+	1 whether the table has been created using CREATE TEMPORARY TABLE,
+	2 whether the table has an internally defined DOC ID column,
+	3 whether the table has a FTS index,
+	4 whether DOC ID column need to be added to the FTS index,
+	5 whether the table is being created its own tablespace,
+	6 whether the table has been DISCARDed,
+	7 whether the aux FTS tables names are in hex.
+	8 whether the table is instinc table.
+	9 whether the table has encryption setting.
+	Use DICT_TF2_FLAG_IS_SET() to parse this flag. */
+	unsigned				flags2:DICT_TF2_BITS;
+
+	/** TRUE if this is in a single-table tablespace and the .ibd file is
+	missing. Then we must return in ha_innodb.cc an error if the user
+	tries to query such an orphaned table. */
+	unsigned				ibd_file_missing:1;
+
+	/** TRUE if the table object has been added to the dictionary cache. */
+	unsigned				cached:1;
+
+	/** TRUE if the table is to be dropped, but not yet actually dropped
+	(could in the background drop list). It is turned on at the beginning
+	of row_drop_table_for_mysql() and turned off just before we start to
+	update system tables for the drop. It is protected by
+	dict_operation_lock. */
+	unsigned				to_be_dropped:1;
+
+	/** Number of non-virtual columns defined so far. */
+	unsigned				n_def:10;
+
+	/** Number of non-virtual columns. */
+	unsigned				n_cols:10;
+
+	/** Number of total columns (inlcude virtual and non-virtual) */
+	unsigned				n_t_cols:10;
+
+	/** Number of total columns defined so far. */
+	unsigned                                n_t_def:10;
+
+	/** Number of virtual columns defined so far. */
+	unsigned                                n_v_def:10;
+
+	/** Number of virtual columns. */
+	unsigned                                n_v_cols:10;
+
+	/** TRUE if it's not an InnoDB system table or a table that has no FK
+	relationships. */
+	unsigned				can_be_evicted:1;
+
+	/** TRUE if table is corrupted. */
+	unsigned				corrupted:1;
+
+	/** TRUE if some indexes should be dropped after ONLINE_INDEX_ABORTED
+	or ONLINE_INDEX_ABORTED_DROPPED. */
+	unsigned				drop_aborted:1;
+
+	/** Array of column descriptions. */
+	dict_col_t*				cols;
+
+	/** Array of virtual column descriptions. */
+	dict_v_col_t*				v_cols;
+
+	/** List of stored column descriptions. It is used only for foreign key
+	check during create table and copy alter operations.
+	During copy alter, s_cols list is filled during create table operation
+	and need to preserve till rename table operation. That is the
+	reason s_cols is a part of dict_table_t */
+	dict_s_col_list*			s_cols;
+
+	/** Column names packed in a character string
+	"name1\0name2\0...nameN\0". Until the string contains n_cols, it will
+	be allocated from a temporary heap. The final string will be allocated
+	from table->heap. */
+	const char*				col_names;
+
+	/** Virtual column names */
+	const char*				v_col_names;
+
 	bool		is_system_db;
 				/*!< True if the table belongs to a system
 				database (mysql, information_schema or
@@ -1086,276 +1614,307 @@ struct dict_table_t{
 				dictionary information and
 				MySQL FRM information mismatch. */
 #ifndef UNIV_HOTBACKUP
-	hash_node_t	name_hash; /*!< hash chain node */
-	hash_node_t	id_hash; /*!< hash chain node */
-	UT_LIST_BASE_NODE_T(dict_index_t)
-			indexes; /*!< list of indexes of the table */
+	/** Hash chain node. */
+	hash_node_t				name_hash;
 
-	dict_foreign_set	foreign_set;
-				/*!< set of foreign key constraints
-				in the table; these refer to columns
-				in other tables */
+	/** Hash chain node. */
+	hash_node_t				id_hash;
 
-	dict_foreign_set	referenced_set;
-				/*!< list of foreign key constraints
-				which refer to this table */
+	/** The FTS_DOC_ID_INDEX, or NULL if no fulltext indexes exist */
+	dict_index_t*				fts_doc_id_index;
+
+	/** List of indexes of the table. */
+	UT_LIST_BASE_NODE_T(dict_index_t)	indexes;
+
+	/** List of foreign key constraints in the table. These refer to
+	columns in other tables. */
+	UT_LIST_BASE_NODE_T(dict_foreign_t)	foreign_list;
+
+	/** List of foreign key constraints which refer to this table. */
+	UT_LIST_BASE_NODE_T(dict_foreign_t)	referenced_list;
+
+	/** Node of the LRU list of tables. */
+	UT_LIST_NODE_T(dict_table_t)		table_LRU;
+
+	/** Maximum recursive level we support when loading tables chained
+	together with FK constraints. If exceeds this level, we will stop
+	loading child table into memory along with its parent table. */
+	unsigned				fk_max_recusive_level:8;
+
+	/** Count of how many foreign key check operations are currently being
+	performed on the table. We cannot drop the table while there are
+	foreign key checks running on it. */
+	ulint					n_foreign_key_checks_running;
+
+	/** Transactions whose view low limit is greater than this number are
+	not allowed to store to the MySQL query cache or retrieve from it.
+	When a trx with undo logs commits, it sets this to the value of the
+	current time. */
+	trx_id_t				query_cache_inv_id;
+
+	/** Transaction id that last touched the table definition. Either when
+	loading the definition or CREATE TABLE, or ALTER TABLE (prepare,
+	commit, and rollback phases). */
+	trx_id_t				def_trx_id;
+
+	/*!< set of foreign key constraints in the table; these refer to
+	columns in other tables */
+	dict_foreign_set			foreign_set;
+
+	/*!< set of foreign key constraints which refer to this table */
+	dict_foreign_set			referenced_set;
 
-	UT_LIST_NODE_T(dict_table_t)
-			table_LRU; /*!< node of the LRU list of tables */
-	unsigned	fk_max_recusive_level:8;
-				/*!< maximum recursive level we support when
-				loading tables chained together with FK
-				constraints. If exceeds this level, we will
-				stop loading child table into memory along with
-				its parent table */
-	ulint		n_foreign_key_checks_running;
-				/*!< count of how many foreign key check
-				operations are currently being performed
-				on the table: we cannot drop the table while
-				there are foreign key checks running on
-				it! */
-	trx_id_t	def_trx_id;
-				/*!< transaction id that last touched
-				the table definition, either when
-				loading the definition or CREATE
-				TABLE, or ALTER TABLE (prepare,
-				commit, and rollback phases) */
-	trx_id_t	query_cache_inv_trx_id;
-				/*!< transactions whose trx id is
-				smaller than this number are not
-				allowed to store to the MySQL query
-				cache or retrieve from it; when a trx
-				with undo logs commits, it sets this
-				to the value of the trx id counter for
-				the tables it had an IX lock on */
 #ifdef UNIV_DEBUG
-	/*----------------------*/
-	ibool		does_not_fit_in_memory;
-				/*!< this field is used to specify in
-				simulations tables which are so big
-				that disk should be accessed: disk
-				access is simulated by putting the
-				thread to sleep for a while; NOTE that
-				this flag is not stored to the data
-				dictionary on disk, and the database
-				will forget about value TRUE if it has
-				to reload the table definition from
-				disk */
+	/** This field is used to specify in simulations tables which are so
+	big that disk should be accessed. Disk access is simulated by putting
+	the thread to sleep for a while. NOTE that this flag is not stored to
+	the data dictionary on disk, and the database will forget about value
+	TRUE if it has to reload the table definition from disk. */
+	ibool					does_not_fit_in_memory;
 #endif /* UNIV_DEBUG */
-	/*----------------------*/
-	unsigned	big_rows:1;
-				/*!< flag: TRUE if the maximum length of
-				a single row exceeds BIG_ROW_SIZE;
-				initialized in dict_table_add_to_cache() */
-				/** Statistics for query optimization */
-				/* @{ */
 
-	volatile os_once::state_t	stats_latch_created;
-				/*!< Creation state of 'stats_latch'. */
+	/** TRUE if the maximum length of a single row exceeds BIG_ROW_SIZE.
+	Initialized in dict_table_add_to_cache(). */
+	unsigned				big_rows:1;
 
-	rw_lock_t*	stats_latch; /*!< this latch protects:
-				dict_table_t::stat_initialized
-				dict_table_t::stat_n_rows (*)
-				dict_table_t::stat_clustered_index_size
-				dict_table_t::stat_sum_of_other_index_sizes
-				dict_table_t::stat_modified_counter (*)
-				dict_table_t::indexes*::stat_n_diff_key_vals[]
-				dict_table_t::indexes*::stat_index_size
-				dict_table_t::indexes*::stat_n_leaf_pages
-				(*) those are not always protected for
-				performance reasons */
-	unsigned	stat_initialized:1; /*!< TRUE if statistics have
-				been calculated the first time
-				after database startup or table creation */
-#define DICT_TABLE_IN_USED      -1
-	lint		memcached_sync_count;
-				/*!< count of how many handles are opened
-				to this table from memcached; DDL on the
-				table is NOT allowed until this count
-				goes to zero. If it's -1, means there's DDL
-		                on the table, DML from memcached will be
-				blocked. */
-	ib_time_t	stats_last_recalc;
-				/*!< Timestamp of last recalc of the stats */
-	ib_uint32_t	stat_persistent;
-				/*!< The two bits below are set in the
-				::stat_persistent member and have the following
-				meaning:
-				1. _ON=0, _OFF=0, no explicit persistent stats
-				setting for this table, the value of the global
-				srv_stats_persistent is used to determine
-				whether the table has persistent stats enabled
-				or not
-				2. _ON=0, _OFF=1, persistent stats are
-				explicitly disabled for this table, regardless
-				of the value of the global srv_stats_persistent
-				3. _ON=1, _OFF=0, persistent stats are
-				explicitly enabled for this table, regardless
-				of the value of the global srv_stats_persistent
-				4. _ON=1, _OFF=1, not allowed, we assert if
-				this ever happens. */
-#define DICT_STATS_PERSISTENT_ON	(1 << 1)
-#define DICT_STATS_PERSISTENT_OFF	(1 << 2)
-	ib_uint32_t	stats_auto_recalc;
-				/*!< The two bits below are set in the
-				::stats_auto_recalc member and have
-				the following meaning:
-				1. _ON=0, _OFF=0, no explicit auto recalc
-				setting for this table, the value of the global
-				srv_stats_persistent_auto_recalc is used to
-				determine whether the table has auto recalc
-				enabled or not
-				2. _ON=0, _OFF=1, auto recalc is explicitly
-				disabled for this table, regardless of the
-				value of the global
-				srv_stats_persistent_auto_recalc
-				3. _ON=1, _OFF=0, auto recalc is explicitly
-				enabled for this table, regardless of the
-				value of the global
-				srv_stats_persistent_auto_recalc
-				4. _ON=1, _OFF=1, not allowed, we assert if
-				this ever happens. */
-#define DICT_STATS_AUTO_RECALC_ON	(1 << 1)
-#define DICT_STATS_AUTO_RECALC_OFF	(1 << 2)
-	ulint		stats_sample_pages;
-				/*!< the number of pages to sample for this
-				table during persistent stats estimation;
-				if this is 0, then the value of the global
-				srv_stats_persistent_sample_pages will be
-				used instead. */
-	ib_uint64_t	stat_n_rows;
-				/*!< approximate number of rows in the table;
-				we periodically calculate new estimates */
-	ulint		stat_clustered_index_size;
-				/*!< approximate clustered index size in
-				database pages */
-	ulint		stat_sum_of_other_index_sizes;
-				/*!< other indexes in database pages */
-	ib_uint64_t	stat_modified_counter;
-				/*!< when a row is inserted, updated,
-				or deleted,
-				we add 1 to this number; we calculate new
-				estimates for the stat_... values for the
-				table and the indexes when about 1 / 16 of
-				table has been modified;
-				also when the estimate operation is
-				called for MySQL SHOW TABLE STATUS; the
-				counter is reset to zero at statistics
-				calculation; this counter is not protected by
-				any latch, because this is only used for
-				heuristics */
+	/** Statistics for query optimization. @{ */
 
-#define BG_STAT_IN_PROGRESS	((byte)(1 << 0))
-				/*!< BG_STAT_IN_PROGRESS is set in
-				stats_bg_flag when the background
-				stats code is working on this table. The DROP
-				TABLE code waits for this to be cleared
-				before proceeding. */
-#define BG_STAT_SHOULD_QUIT	((byte)(1 << 1))
-				/*!< BG_STAT_SHOULD_QUIT is set in
-				stats_bg_flag when DROP TABLE starts
-				waiting on BG_STAT_IN_PROGRESS to be cleared,
-				the background stats thread will detect this
-				and will eventually quit sooner */
-#define BG_SCRUB_IN_PROGRESS	((byte)(1 << 2))
+	/** Creation state of 'stats_latch'. */
+	volatile os_once::state_t		stats_latch_created;
+
+	/** This latch protects:
+	dict_table_t::stat_initialized,
+	dict_table_t::stat_n_rows (*),
+	dict_table_t::stat_clustered_index_size,
+	dict_table_t::stat_sum_of_other_index_sizes,
+	dict_table_t::stat_modified_counter (*),
+	dict_table_t::indexes*::stat_n_diff_key_vals[],
+	dict_table_t::indexes*::stat_index_size,
+	dict_table_t::indexes*::stat_n_leaf_pages.
+	(*) Those are not always protected for
+	performance reasons. */
+	rw_lock_t*				stats_latch;
+
+	/** TRUE if statistics have been calculated the first time after
+	database startup or table creation. */
+	unsigned				stat_initialized:1;
+
+	/** Timestamp of last recalc of the stats. */
+	ib_time_t				stats_last_recalc;
+
+	/** The two bits below are set in the 'stat_persistent' member. They
+	have the following meaning:
+	1. _ON=0, _OFF=0, no explicit persistent stats setting for this table,
+	the value of the global srv_stats_persistent is used to determine
+	whether the table has persistent stats enabled or not
+	2. _ON=0, _OFF=1, persistent stats are explicitly disabled for this
+	table, regardless of the value of the global srv_stats_persistent
+	3. _ON=1, _OFF=0, persistent stats are explicitly enabled for this
+	table, regardless of the value of the global srv_stats_persistent
+	4. _ON=1, _OFF=1, not allowed, we assert if this ever happens. */
+	#define DICT_STATS_PERSISTENT_ON	(1 << 1)
+	#define DICT_STATS_PERSISTENT_OFF	(1 << 2)
+
+	/** Indicates whether the table uses persistent stats or not. See
+	DICT_STATS_PERSISTENT_ON and DICT_STATS_PERSISTENT_OFF. */
+	ib_uint32_t				stat_persistent;
+
+	/** The two bits below are set in the 'stats_auto_recalc' member. They
+	have the following meaning:
+	1. _ON=0, _OFF=0, no explicit auto recalc setting for this table, the
+	value of the global srv_stats_persistent_auto_recalc is used to
+	determine whether the table has auto recalc enabled or not
+	2. _ON=0, _OFF=1, auto recalc is explicitly disabled for this table,
+	regardless of the value of the global srv_stats_persistent_auto_recalc
+	3. _ON=1, _OFF=0, auto recalc is explicitly enabled for this table,
+	regardless of the value of the global srv_stats_persistent_auto_recalc
+	4. _ON=1, _OFF=1, not allowed, we assert if this ever happens. */
+	#define DICT_STATS_AUTO_RECALC_ON	(1 << 1)
+	#define DICT_STATS_AUTO_RECALC_OFF	(1 << 2)
+
+	/** Indicates whether the table uses automatic recalc for persistent
+	stats or not. See DICT_STATS_AUTO_RECALC_ON and
+	DICT_STATS_AUTO_RECALC_OFF. */
+	ib_uint32_t				stats_auto_recalc;
+
+	/** The number of pages to sample for this table during persistent
+	stats estimation. If this is 0, then the value of the global
+	srv_stats_persistent_sample_pages will be used instead. */
+	ulint					stats_sample_pages;
+
+	/** Approximate number of rows in the table. We periodically calculate
+	new estimates. */
+	ib_uint64_t				stat_n_rows;
+
+	/** Approximate clustered index size in database pages. */
+	ulint					stat_clustered_index_size;
+
+	/** Approximate size of other indexes in database pages. */
+	ulint					stat_sum_of_other_index_sizes;
+
+	/** How many rows are modified since last stats recalc. When a row is
+	inserted, updated, or deleted, we add 1 to this number; we calculate
+	new estimates for the table and the indexes if the table has changed
+	too much, see row_update_statistics_if_needed(). The counter is reset
+	to zero at statistics calculation. This counter is not protected by
+	any latch, because this is only used for heuristics. */
+	ib_uint64_t				stat_modified_counter;
+
+	/** Background stats thread is not working on this table. */
+	#define BG_STAT_NONE			0
+
+	/** Set in 'stats_bg_flag' when the background stats code is working
+	on this table. The DROP TABLE code waits for this to be cleared before
+	proceeding. */
+	#define BG_STAT_IN_PROGRESS		(1 << 0)
+
+	/** Set in 'stats_bg_flag' when DROP TABLE starts waiting on
+	BG_STAT_IN_PROGRESS to be cleared. The background stats thread will
+	detect this and will eventually quit sooner. */
+	#define BG_STAT_SHOULD_QUIT		(1 << 1)
+
+	/** The state of the background stats thread wrt this table.
+	See BG_STAT_NONE, BG_STAT_IN_PROGRESS and BG_STAT_SHOULD_QUIT.
+	Writes are covered by dict_sys->mutex. Dirty reads are possible. */
+
+	#define BG_SCRUB_IN_PROGRESS	((byte)(1 << 2))
 				/*!< BG_SCRUB_IN_PROGRESS is set in
 				stats_bg_flag when the background
 				scrub code is working on this table. The DROP
 				TABLE code waits for this to be cleared
 				before proceeding. */
 
-#define BG_IN_PROGRESS (BG_STAT_IN_PROGRESS | BG_SCRUB_IN_PROGRESS)
+	#define BG_STAT_SHOULD_QUIT		(1 << 1)
+
+	#define BG_IN_PROGRESS (BG_STAT_IN_PROGRESS | BG_SCRUB_IN_PROGRESS)
+
+
+	/** The state of the background stats thread wrt this table.
+	See BG_STAT_NONE, BG_STAT_IN_PROGRESS and BG_STAT_SHOULD_QUIT.
+	Writes are covered by dict_sys->mutex. Dirty reads are possible. */
+	byte					stats_bg_flag;
 
-	byte 		stats_bg_flag;
-				/*!< see BG_STAT_* above.
-				Writes are covered by dict_sys->mutex.
-				Dirty reads are possible. */
 	bool		stats_error_printed;
 				/*!< Has persistent stats error beein
 				already printed for this table ? */
-				/* @} */
-	/*----------------------*/
-				/**!< The following fields are used by the
-				AUTOINC code.  The actual collection of
-				tables locked during AUTOINC read/write is
-				kept in trx_t. In order to quickly determine
-				whether a transaction has locked the AUTOINC
-				lock we keep a pointer to the transaction
-				here in the autoinc_trx variable. This is to
-				avoid acquiring the lock_sys_t::mutex and
-				scanning the vector in trx_t.
+	/* @} */
 
-				When an AUTOINC lock has to wait, the
-				corresponding lock instance is created on
-				the trx lock heap rather than use the
-				pre-allocated instance in autoinc_lock below.*/
-				/* @{ */
-	lock_t*		autoinc_lock;
-				/*!< a buffer for an AUTOINC lock
-				for this table: we allocate the memory here
-				so that individual transactions can get it
-				and release it without a need to allocate
-				space from the lock heap of the trx:
-				otherwise the lock heap would grow rapidly
-				if we do a large insert from a select */
-	ib_mutex_t*	autoinc_mutex;
-				/*!< mutex protecting the autoincrement
-				counter */
+	/** AUTOINC related members. @{ */
+
+	/* The actual collection of tables locked during AUTOINC read/write is
+	kept in trx_t. In order to quickly determine whether a transaction has
+	locked the AUTOINC lock we keep a pointer to the transaction here in
+	the 'autoinc_trx' member. This is to avoid acquiring the
+	lock_sys_t::mutex and scanning the vector in trx_t.
+	When an AUTOINC lock has to wait, the corresponding lock instance is
+	created on the trx lock heap rather than use the pre-allocated instance
+	in autoinc_lock below. */
+
+	/** A buffer for an AUTOINC lock for this table. We allocate the
+	memory here so that individual transactions can get it and release it
+	without a need to allocate space from the lock heap of the trx:
+	otherwise the lock heap would grow rapidly if we do a large insert
+	from a select. */
+	lock_t*					autoinc_lock;
 
 	/** Creation state of autoinc_mutex member */
-	volatile os_once::state_t
-			autoinc_mutex_created;
+	volatile os_once::state_t		autoinc_mutex_created;
 
-	ib_uint64_t	autoinc;/*!< autoinc counter value to give to the
-				next inserted row */
-	ulong		n_waiting_or_granted_auto_inc_locks;
-				/*!< This counter is used to track the number
-				of granted and pending autoinc locks on this
-				table. This value is set after acquiring the
-				lock_sys_t::mutex but we peek the contents to
-				determine whether other transactions have
-				acquired the AUTOINC lock or not. Of course
-				only one transaction can be granted the
-				lock but there can be multiple waiters. */
-	const trx_t*	autoinc_trx;
-				/*!< The transaction that currently holds the
-				the AUTOINC lock on this table.
-				Protected by lock_sys->mutex. */
-	fts_t*		fts;	/* FTS specific state variables */
-				/* @} */
-	/*----------------------*/
+	/** Mutex protecting the autoincrement counter. */
+	ib_mutex_t*				autoinc_mutex;
 
-	ib_quiesce_t	 quiesce;/*!< Quiescing states, protected by the
-				dict_index_t::lock. ie. we can only change
-				the state if we acquire all the latches
-				(dict_index_t::lock) in X mode of this table's
-				indexes. */
+	/** Autoinc counter value to give to the next inserted row. */
+	ib_uint64_t				autoinc;
+
+	/** This counter is used to track the number of granted and pending
+	autoinc locks on this table. This value is set after acquiring the
+	lock_sys_t::mutex but we peek the contents to determine whether other
+	transactions have acquired the AUTOINC lock or not. Of course only one
+	transaction can be granted the lock but there can be multiple
+	waiters. */
+	ulong					n_waiting_or_granted_auto_inc_locks;
+
+	/** The transaction that currently holds the the AUTOINC lock on this
+	table. Protected by lock_sys->mutex. */
+	const trx_t*				autoinc_trx;
+
+	/* @} */
+
+	/** Count of how many handles are opened to this table from memcached.
+	DDL on the table is NOT allowed until this count goes to zero. If
+	it is -1, then there's DDL on the table, DML from memcached will be
+	blocked. */
+	lint					memcached_sync_count;
+
+	/** FTS specific state variables. */
+	fts_t*					fts;
+
+	/** Quiescing states, protected by the dict_index_t::lock. ie. we can
+	only change the state if we acquire all the latches (dict_index_t::lock)
+	in X mode of this table's indexes. */
+	ib_quiesce_t				quiesce;
+
+	/** Count of the number of record locks on this table. We use this to
+	determine whether we can evict the table from the dictionary cache.
+	It is protected by lock_sys->mutex. */
+	ulint					n_rec_locks;
+
+#ifndef UNIV_DEBUG
+private:
+#endif
+	/** Count of how many handles are opened to this table. Dropping of the
+	table is NOT allowed until this count gets to zero. MySQL does NOT
+	itself check the number of open handles at DROP. */
+	ulint					n_ref_count;
+
+public:
+	/** List of locks on the table. Protected by lock_sys->mutex. */
+	table_lock_list_t			locks;
+
+	/** Timestamp of the last modification of this table. */
+	time_t					update_time;
+
+	/** row-id counter for use by intrinsic table for getting row-id.
+	Given intrinsic table semantics, row-id can be locally maintained
+	instead of getting it from central generator which involves mutex
+	locking. */
+	ib_uint64_t				sess_row_id;
+
+	/** trx_id counter for use by intrinsic table for getting trx-id.
+	Intrinsic table are not shared so don't need a central trx-id
+	but just need a increased counter to track consistent view while
+	proceeding SELECT as part of UPDATE. */
+	ib_uint64_t				sess_trx_id;
 
-	/*----------------------*/
-	ulint		n_rec_locks;
-				/*!< Count of the number of record locks on
-				this table. We use this to determine whether
-				we can evict the table from the dictionary
-				cache. It is protected by lock_sys->mutex. */
-	ulint		n_ref_count;
-				/*!< count of how many handles are opened
-				to this table; dropping of the table is
-				NOT allowed until this count gets to zero;
-				MySQL does NOT itself check the number of
-				open handles at drop */
-	UT_LIST_BASE_NODE_T(lock_t)
-			locks;	/*!< list of locks on the table; protected
-				by lock_sys->mutex */
 #endif /* !UNIV_HOTBACKUP */
-	ibool		is_encrypted;
+
+	bool					is_encrypted;
 
 #ifdef UNIV_DEBUG
-	ulint		magic_n;/*!< magic number */
-/** Value of dict_table_t::magic_n */
-# define DICT_TABLE_MAGIC_N	76333786
+	/** Value of 'magic_n'. */
+	#define DICT_TABLE_MAGIC_N		76333786
+
+	/** Magic number. */
+	ulint					magic_n;
 #endif /* UNIV_DEBUG */
+	/** mysql_row_templ_t for base columns used for compute the virtual
+	columns */
+	dict_vcol_templ_t*			vc_templ;
+
+	/** encryption key, it's only for export/import */
+	byte*					encryption_key;
+
+	/** encryption iv, it's only for export/import */
+	byte*					encryption_iv;
 };
 
+/*******************************************************************//**
+Initialise the table lock list. */
+void
+lock_table_lock_list_init(
+/*======================*/
+	table_lock_list_t*	locks);		/*!< List to initialise */
+
 /** A function object to add the foreign key constraint to the referenced set
 of the referenced table, if it exists in the dictionary cache. */
 struct dict_foreign_add_to_referenced_table {
@@ -1381,24 +1940,10 @@ dict_table_autoinc_destroy(
 	if (table->autoinc_mutex_created == os_once::DONE
 	    && table->autoinc_mutex != NULL) {
 		mutex_free(table->autoinc_mutex);
-		delete table->autoinc_mutex;
+		UT_DELETE(table->autoinc_mutex);
 	}
 }
 
-/** Allocate and init the autoinc latch of a given table.
-This function must not be called concurrently on the same table object.
-@param[in,out]	table_void	table whose autoinc latch to create */
-void
-dict_table_autoinc_alloc(
-	void*	table_void);
-
-/** Allocate and init the zip_pad_mutex of a given index.
-This function must not be called concurrently on the same index object.
-@param[in,out]	index_void	index whose zip_pad_mutex to create */
-void
-dict_index_zip_pad_alloc(
-	void*	index_void);
-
 /** Request for lazy creation of the autoinc latch of a given table.
 This function is only called from either single threaded environment
 or from a thread that has not shared the table object with other threads.
@@ -1408,13 +1953,8 @@ void
 dict_table_autoinc_create_lazy(
 	dict_table_t*	table)
 {
-#ifdef HAVE_ATOMIC_BUILTINS
 	table->autoinc_mutex = NULL;
 	table->autoinc_mutex_created = os_once::NEVER_DONE;
-#else /* HAVE_ATOMIC_BUILTINS */
-	dict_table_autoinc_alloc(table);
-	table->autoinc_mutex_created = os_once::DONE;
-#endif /* HAVE_ATOMIC_BUILTINS */
 }
 
 /** Request a lazy creation of dict_index_t::zip_pad::mutex.
@@ -1426,13 +1966,8 @@ void
 dict_index_zip_pad_mutex_create_lazy(
 	dict_index_t*	index)
 {
-#ifdef HAVE_ATOMIC_BUILTINS
 	index->zip_pad.mutex = NULL;
 	index->zip_pad.mutex_created = os_once::NEVER_DONE;
-#else /* HAVE_ATOMIC_BUILTINS */
-	dict_index_zip_pad_alloc(index);
-	index->zip_pad.mutex_created = os_once::DONE;
-#endif /* HAVE_ATOMIC_BUILTINS */
 }
 
 /** Destroy the zip_pad_mutex of the given index.
@@ -1446,8 +1981,8 @@ dict_index_zip_pad_mutex_destroy(
 {
 	if (index->zip_pad.mutex_created == os_once::DONE
 	    && index->zip_pad.mutex != NULL) {
-		os_fast_mutex_free(index->zip_pad.mutex);
-		delete index->zip_pad.mutex;
+		mutex_free(index->zip_pad.mutex);
+		UT_DELETE(index->zip_pad.mutex);
 	}
 }
 
@@ -1458,7 +1993,7 @@ void
 dict_index_zip_pad_unlock(
 	dict_index_t*	index)
 {
-	os_fast_mutex_unlock(index->zip_pad.mutex);
+	mutex_exit(index->zip_pad.mutex);
 }
 
 #ifdef UNIV_DEBUG
@@ -1474,8 +2009,36 @@ dict_table_autoinc_own(
 }
 #endif /* UNIV_DEBUG */
 
+/** Check whether the col is used in spatial index or regular index.
+@param[in]	col	column to check
+@return spatial status */
+inline
+spatial_status_t
+dict_col_get_spatial_status(
+	const dict_col_t*	col)
+{
+	spatial_status_t	spatial_status = SPATIAL_NONE;
+
+	/* Column is not a part of any index. */
+	if (!col->ord_part) {
+		return(spatial_status);
+	}
+
+	if (DATA_GEOMETRY_MTYPE(col->mtype)) {
+		if (col->max_prefix == 0) {
+			spatial_status = SPATIAL_ONLY;
+		} else {
+			/* Any regular index on a geometry column
+			should have a prefix. */
+			spatial_status = SPATIAL_MIXED;
+		}
+	}
+
+	return(spatial_status);
+}
+
 #ifndef UNIV_NONINL
 #include "dict0mem.ic"
 #endif
 
-#endif
+#endif /* dict0mem_h */
diff --git a/storage/innobase/include/dict0mem.ic b/storage/innobase/include/dict0mem.ic
index 38d51f61789..3269596feb7 100644
--- a/storage/innobase/include/dict0mem.ic
+++ b/storage/innobase/include/dict0mem.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -63,11 +63,42 @@ dict_mem_fill_index_struct(
 #ifndef UNIV_HOTBACKUP
 	index->space = (unsigned int) space;
 	index->page = FIL_NULL;
+	index->merge_threshold = DICT_INDEX_MERGE_THRESHOLD_DEFAULT;
 #endif /* !UNIV_HOTBACKUP */
 	index->table_name = table_name;
 	index->n_fields = (unsigned int) n_fields;
 	/* The '1 +' above prevents allocation
 	of an empty mem block */
+	index->allow_duplicates = false;
+	index->nulls_equal = false;
+	index->disable_ahi = false;
+
+	new (&index->rec_cache) rec_cache_t();
+
+	if (heap != NULL) {
+		index->last_ins_cur =
+			static_cast<last_ops_cur_t*>(mem_heap_alloc(
+				heap, sizeof(last_ops_cur_t)));
+
+		new (index->last_ins_cur) last_ops_cur_t();
+
+		index->last_sel_cur =
+			static_cast<last_ops_cur_t*>(mem_heap_alloc(
+				heap, sizeof(last_ops_cur_t)));
+
+		new (index->last_sel_cur) last_ops_cur_t();
+
+		index->rec_cache.offsets =
+			static_cast<ulint*>(mem_heap_alloc(
+				heap, sizeof(ulint) * OFFS_IN_REC_NORMAL_SIZE));
+
+		index->rec_cache.sz_of_offsets = OFFS_IN_REC_NORMAL_SIZE;
+	} else {
+		index->last_ins_cur = NULL;
+		index->last_sel_cur = NULL;
+		index->rec_cache.offsets = NULL;
+	}
+
 #ifdef UNIV_DEBUG
 	index->magic_n = DICT_INDEX_MAGIC_N;
 #endif /* UNIV_DEBUG */
diff --git a/storage/innobase/include/dict0priv.h b/storage/innobase/include/dict0priv.h
index e034662aba0..35548faeb93 100644
--- a/storage/innobase/include/dict0priv.h
+++ b/storage/innobase/include/dict0priv.h
@@ -26,10 +26,12 @@ Created  Fri 2 Jul 2010 13:30:38 EST - Sunny Bains
 #ifndef dict0priv_h
 #define dict0priv_h
 
+#include "univ.i"
+
 /**********************************************************************//**
 Gets a table; loads it to the dictionary cache if necessary. A low-level
 function. Note: Not to be called from outside dict0*c functions.
-@return	table, NULL if not found */
+@return table, NULL if not found */
 UNIV_INLINE
 dict_table_t*
 dict_table_get_low(
@@ -38,7 +40,7 @@ dict_table_get_low(
 
 /**********************************************************************//**
 Checks if a table is in the dictionary cache.
-@return	table, NULL if not found */
+@return table, NULL if not found */
 UNIV_INLINE
 dict_table_t*
 dict_table_check_if_in_cache_low(
@@ -47,7 +49,7 @@ dict_table_check_if_in_cache_low(
 
 /**********************************************************************//**
 Returns a table object based on table id.
-@return	table, NULL if does not exist */
+@return table, NULL if does not exist */
 UNIV_INLINE
 dict_table_t*
 dict_table_open_on_id_low(
diff --git a/storage/innobase/include/dict0priv.ic b/storage/innobase/include/dict0priv.ic
index 983218af78a..fd10c566be6 100644
--- a/storage/innobase/include/dict0priv.ic
+++ b/storage/innobase/include/dict0priv.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2010, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2010, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -31,7 +31,7 @@ Created  Wed 13 Oct 2010 16:10:14 EST Sunny Bains
 /**********************************************************************//**
 Gets a table; loads it to the dictionary cache if necessary. A low-level
 function.
-@return	table, NULL if not found */
+@return table, NULL if not found */
 UNIV_INLINE
 dict_table_t*
 dict_table_get_low(
@@ -41,24 +41,22 @@ dict_table_get_low(
 	dict_table_t*	table;
 
 	ut_ad(table_name);
-	ut_ad(mutex_own(&(dict_sys->mutex)));
+	ut_ad(mutex_own(&dict_sys->mutex));
 
 	table = dict_table_check_if_in_cache_low(table_name);
 
 	if (table && table->corrupted) {
-		fprintf(stderr, "InnoDB: table");
-		ut_print_name(stderr, NULL, TRUE, table->name);
+		ib::error	error;
+		error << "Table " << table->name << "is corrupted";
 		if (srv_load_corrupted) {
-			fputs(" is corrupted, but"
-			      " innodb_force_load_corrupted is set\n", stderr);
+			error << ", but innodb_force_load_corrupted is set";
 		} else {
-			fputs(" is corrupted\n", stderr);
 			return(NULL);
 		}
 	}
 
 	if (table == NULL) {
-		table = dict_load_table(table_name, TRUE, DICT_ERR_IGNORE_NONE);
+		table = dict_load_table(table_name, true, DICT_ERR_IGNORE_NONE);
 	}
 
 	ut_ad(!table || table->cached);
@@ -68,7 +66,7 @@ dict_table_get_low(
 
 /**********************************************************************//**
 Returns a table object based on table id.
-@return	table, NULL if does not exist */
+@return table, NULL if does not exist */
 UNIV_INLINE
 dict_table_t*
 dict_table_open_on_id_low(
@@ -81,7 +79,7 @@ dict_table_open_on_id_low(
 	dict_table_t*	table;
 	ulint		fold;
 
-	ut_ad(mutex_own(&(dict_sys->mutex)));
+	ut_ad(mutex_own(&dict_sys->mutex));
 
 	/* Look for the table name in the hash table */
 	fold = ut_fold_ull(table_id);
@@ -102,7 +100,7 @@ dict_table_open_on_id_low(
 
 /**********************************************************************//**
 Checks if a table is in the dictionary cache.
-@return	table, NULL if not found */
+@return table, NULL if not found */
 UNIV_INLINE
 dict_table_t*
 dict_table_check_if_in_cache_low(
@@ -112,15 +110,19 @@ dict_table_check_if_in_cache_low(
 	dict_table_t*	table;
 	ulint		table_fold;
 
+	DBUG_ENTER("dict_table_check_if_in_cache_low");
+	DBUG_PRINT("dict_table_check_if_in_cache_low",
+		   ("table: '%s'", table_name));
+
 	ut_ad(table_name);
-	ut_ad(mutex_own(&(dict_sys->mutex)));
+	ut_ad(mutex_own(&dict_sys->mutex));
 
 	/* Look for the table name in the hash table */
 	table_fold = ut_fold_string(table_name);
 
 	HASH_SEARCH(name_hash, dict_sys->table_hash, table_fold,
 		    dict_table_t*, table, ut_ad(table->cached),
-		    !strcmp(table->name, table_name));
-	return(table);
+		    !strcmp(table->name.m_name, table_name));
+	DBUG_RETURN(table);
 }
 #endif /*! UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/dict0stats.h b/storage/innobase/include/dict0stats.h
index 72501bf9429..8941b399f7d 100644
--- a/storage/innobase/include/dict0stats.h
+++ b/storage/innobase/include/dict0stats.h
@@ -28,7 +28,6 @@ Created Jan 06, 2010 Vasil Dimov
 
 #include "univ.i"
 
-#include "db0err.h"
 #include "dict0types.h"
 #include "trx0types.h"
 
@@ -60,7 +59,6 @@ is relatively quick and is used to calculate transient statistics that
 are not saved on disk.
 This was the only way to calculate statistics before the
 Persistent Statistics feature was introduced. */
-UNIV_INTERN
 void
 dict_stats_update_transient(
 /*========================*/
@@ -133,7 +131,6 @@ dict_stats_deinit(
 Calculates new estimates for table and index statistics. The statistics
 are used in query optimization.
 @return DB_* error code or DB_SUCCESS */
-UNIV_INTERN
 dberr_t
 dict_stats_update(
 /*==============*/
@@ -148,7 +145,6 @@ Removes the information for a particular index's stats from the persistent
 storage if it exists and if there is data stored for this index.
 This function creates its own trx and commits it.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 dict_stats_drop_index(
 /*==================*/
@@ -163,7 +159,6 @@ Removes the statistics for a table and all of its indexes from the
 persistent storage if it exists and if there is data stored for the table.
 This function creates its own transaction and commits it.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 dict_stats_drop_table(
 /*==================*/
@@ -174,7 +169,6 @@ dict_stats_drop_table(
 
 /*********************************************************************//**
 Fetches or calculates new estimates for index statistics. */
-UNIV_INTERN
 void
 dict_stats_update_for_index(
 /*========================*/
@@ -185,7 +179,6 @@ dict_stats_update_for_index(
 Renames a table in InnoDB persistent stats storage.
 This function creates its own transaction and commits it.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 dict_stats_rename_table(
 /*====================*/
@@ -194,7 +187,19 @@ dict_stats_rename_table(
 	char*		errstr,		/*!< out: error string if != DB_SUCCESS
 					is returned */
 	size_t		errstr_sz);	/*!< in: errstr size */
-
+/*********************************************************************//**
+Renames an index in InnoDB persistent stats storage.
+This function creates its own transaction and commits it.
+@return DB_SUCCESS or error code. DB_STATS_DO_NOT_EXIST will be returned
+if the persistent stats do not exist. */
+dberr_t
+dict_stats_rename_index(
+/*====================*/
+	const dict_table_t*	table,		/*!< in: table whose index
+						is renamed */
+	const char*		old_index_name,	/*!< in: old index name */
+	const char*		new_index_name)	/*!< in: new index name */
+	__attribute__((warn_unused_result));
 /*********************************************************************//**
 Save defragmentation result.
 @return DB_SUCCESS or error code */
@@ -228,8 +233,48 @@ dict_stats_empty_defrag_stats(
 	dict_index_t* index);	/*!< in: index to clear defragmentation stats */
 
 
+/*********************************************************************//**
+Renames an index in InnoDB persistent stats storage.
+This function creates its own transaction and commits it.
+@return DB_SUCCESS or error code. DB_STATS_DO_NOT_EXIST will be returned
+if the persistent stats do not exist. */
+dberr_t
+dict_stats_rename_index(
+/*====================*/
+	const dict_table_t*	table,		/*!< in: table whose index
+						is renamed */
+	const char*		old_index_name,	/*!< in: old index name */
+	const char*		new_index_name)	/*!< in: new index name */
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Save an individual index's statistic into the persistent statistics
+storage.
+@param[in]	index			index to be updated
+@param[in]	last_update		timestamp of the stat
+@param[in]	stat_name		name of the stat
+@param[in]	stat_value		value of the stat
+@param[in]	sample_size		n pages sampled or NULL
+@param[in]	stat_description	description of the stat
+@param[in,out]	trx			in case of NULL the function will
+allocate and free the trx object. If it is not NULL then it will be
+rolled back only in the case of error, but not freed.
+@return DB_SUCCESS or error code */
+dberr_t
+dict_stats_save_index_stat(
+	dict_index_t*	index,
+	lint		last_update,
+	const char*	stat_name,
+	ib_uint64_t	stat_value,
+	ib_uint64_t*	sample_size,
+	const char*	stat_description,
+	trx_t*		trx);
+
 #ifndef UNIV_NONINL
 #include "dict0stats.ic"
 #endif
 
+#ifdef UNIV_ENABLE_UNIT_TEST_DICT_STATS
+void test_dict_stats_all();
+#endif /* UNIV_ENABLE_UNIT_TEST_DICT_STATS */
+
 #endif /* dict0stats_h */
diff --git a/storage/innobase/include/dict0stats.ic b/storage/innobase/include/dict0stats.ic
index ec9a9065470..80709091734 100644
--- a/storage/innobase/include/dict0stats.ic
+++ b/storage/innobase/include/dict0stats.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2012, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, 2015, Oracle and/or its affiliates. All rights reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -23,10 +23,9 @@ Code used for calculating and manipulating table statistics.
 Created Jan 23, 2012 Vasil Dimov
 *******************************************************/
 
-#include "univ.i"
-#include "dict0dict.h" /* dict_table_stats_lock() */
-#include "dict0types.h" /* dict_table_t */
-#include "srv0srv.h" /* srv_stats_persistent, srv_stats_auto_recalc */
+#include "dict0dict.h"
+#include "dict0types.h"
+#include "srv0srv.h"
 
 /*********************************************************************//**
 Set the persistent statistics flag for a given table. This is set only
@@ -183,9 +182,9 @@ dict_stats_deinit(
 /*==============*/
 	dict_table_t*	table)	/*!< in/out: table */
 {
-	ut_ad(mutex_own(&dict_sys->mutex));
+	ut_ad(mutex_own(&dict_sys->mutex) || dict_table_is_intrinsic(table));
 
-	ut_a(table->n_ref_count == 0);
+	ut_a(table->get_ref_count() == 0);
 
 	dict_table_stats_lock(table, RW_X_LATCH);
 
diff --git a/storage/innobase/include/dict0stats_bg.h b/storage/innobase/include/dict0stats_bg.h
index 34dc4657829..50c2591332e 100644
--- a/storage/innobase/include/dict0stats_bg.h
+++ b/storage/innobase/include/dict0stats_bg.h
@@ -28,20 +28,28 @@ Created Apr 26, 2012 Vasil Dimov
 
 #include "univ.i"
 
-#include "dict0types.h" /* dict_table_t, table_id_t */
-#include "os0sync.h" /* os_event_t */
-#include "os0thread.h" /* DECLARE_THREAD */
+#include "dict0types.h"
+#include "os0event.h"
+#include "os0thread.h"
 
 /** Event to wake up the stats thread */
 extern os_event_t	dict_stats_event;
 
+#ifdef HAVE_PSI_INTERFACE
+extern mysql_pfs_key_t	dict_stats_recalc_pool_mutex_key;
+#endif /* HAVE_PSI_INTERFACE */
+
+#ifdef UNIV_DEBUG
+/** Value of MySQL global used to disable dict_stats thread. */
+extern my_bool		innodb_dict_stats_disabled_debug;
+#endif /* UNIV_DEBUG */
+
 /*****************************************************************//**
 Add a table to the recalc pool, which is processed by the
 background stats gathering thread. Only the table id is added to the
 list, so the table can be closed after being enqueued and it will be
 opened when needed. If the table does not exist later (has been DROPped),
 then it will be removed from the pool and skipped. */
-UNIV_INTERN
 void
 dict_stats_recalc_pool_add(
 /*=======================*/
@@ -50,37 +58,14 @@ dict_stats_recalc_pool_add(
 /*****************************************************************//**
 Delete a given table from the auto recalc pool.
 dict_stats_recalc_pool_del() */
-UNIV_INTERN
 void
 dict_stats_recalc_pool_del(
 /*=======================*/
 	const dict_table_t*	table);	/*!< in: table to remove */
 
-/*****************************************************************//**
-Add an index in a table to the defrag pool, which is processed by the
-background stats gathering thread. Only the table id and index id are
-added to the list, so the table can be closed after being enqueued and
-it will be opened when needed. If the table or index does not exist later
-(has been DROPped), then it will be removed from the pool and skipped. */
-UNIV_INTERN
-void
-dict_stats_defrag_pool_add(
-/*=======================*/
-	const dict_index_t*	index);	/*!< in: table to add */
-
-/*****************************************************************//**
-Delete a given index from the auto defrag pool. */
-UNIV_INTERN
-void
-dict_stats_defrag_pool_del(
-/*=======================*/
-	const dict_table_t*	table,	/*!<in: if given, remove
-					all entries for the table */
-	const dict_index_t*	index);	/*!< in: index to remove */
-
 /** Yield the data dictionary latch when waiting
 for the background thread to stop accessing a table.
-@param trx	transaction holding the data dictionary locks */
+@param trx transaction holding the data dictionary locks */
 #define DICT_STATS_BG_YIELD(trx)	do {	\
 	row_mysql_unlock_data_dictionary(trx);	\
 	os_thread_sleep(250000);		\
@@ -107,7 +92,6 @@ The background stats thread is guaranteed not to start using the specified
 table after this function returns and before the caller unlocks the data
 dictionary because it sets the BG_STAT_IN_PROGRESS bit in table->stats_bg_flag
 under dict_sys->mutex. */
-UNIV_INTERN
 void
 dict_stats_wait_bg_to_stop_using_table(
 /*===================================*/
@@ -117,7 +101,6 @@ dict_stats_wait_bg_to_stop_using_table(
 /*****************************************************************//**
 Initialize global variables needed for the operation of dict_stats_thread().
 Must be called before dict_stats_thread() is started. */
-UNIV_INTERN
 void
 dict_stats_thread_init();
 /*====================*/
@@ -125,23 +108,41 @@ dict_stats_thread_init();
 /*****************************************************************//**
 Free resources allocated by dict_stats_thread_init(), must be called
 after dict_stats_thread() has exited. */
-UNIV_INTERN
 void
 dict_stats_thread_deinit();
 /*======================*/
 
+#ifdef UNIV_DEBUG
+/** Disables dict stats thread. It's used by:
+	SET GLOBAL innodb_dict_stats_disabled_debug = 1 (0).
+@param[in]	thd		thread handle
+@param[in]	var		pointer to system variable
+@param[out]	var_ptr		where the formal string goes
+@param[in]	save		immediate result from check function */
+void
+dict_stats_disabled_debug_update(
+	THD*				thd,
+	struct st_mysql_sys_var*	var,
+	void*				var_ptr,
+	const void*			save);
+#endif /* UNIV_DEBUG */
+
 /*****************************************************************//**
 This is the thread for background stats gathering. It pops tables, from
 the auto recalc list and proceeds them, eventually recalculating their
 statistics.
 @return this function does not return, it calls os_thread_exit() */
-extern "C" UNIV_INTERN
+extern "C"
 os_thread_ret_t
 DECLARE_THREAD(dict_stats_thread)(
 /*==============================*/
 	void*	arg);	/*!< in: a dummy parameter
 			required by os_thread_create */
 
+/** Shutdown the dict stats thread. */
+void
+dict_stats_shutdown();
+
 # ifndef UNIV_NONINL
 #  include "dict0stats_bg.ic"
 # endif
diff --git a/storage/innobase/include/dict0types.h b/storage/innobase/include/dict0types.h
index 35430e8ea62..ae002dd9487 100644
--- a/storage/innobase/include/dict0types.h
+++ b/storage/innobase/include/dict0types.h
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved.
+Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, 2016, MariaDB Corporation. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -27,15 +27,19 @@ Created 1/8/1996 Heikki Tuuri
 #ifndef dict0types_h
 #define dict0types_h
 
+#include <ut0mutex.h>
+
 struct dict_sys_t;
 struct dict_col_t;
 struct dict_field_t;
 struct dict_index_t;
 struct dict_table_t;
 struct dict_foreign_t;
+struct dict_v_col_t;
 
 struct ind_node_t;
 struct tab_node_t;
+struct dict_add_v_col_t;
 
 /* Space id and page no where the dictionary header resides */
 #define	DICT_HDR_SPACE		0	/* the SYSTEM tablespace */
@@ -83,10 +87,14 @@ typedef enum {
 	ATOMIC_WRITES_OFF = 2
 } atomic_writes_t;
 
+#ifndef UNIV_INNOCHECKSUM
+typedef ib_mutex_t DictSysMutex;
+#endif /* !UNIV_INNOCHECKSUM */
+
 /** Prefix for tmp tables, adopted from sql/table.h */
-#define tmp_file_prefix		"#sql"
-#define tmp_file_prefix_length	4
-#define TEMP_FILE_PREFIX_INNODB	"#sql-ib"
+#define TEMP_FILE_PREFIX		"#sql"
+#define TEMP_FILE_PREFIX_LENGTH		4
+#define TEMP_FILE_PREFIX_INNODB		"#sql-ib"
 
 #define TEMP_TABLE_PREFIX                "#sql"
 #define TEMP_TABLE_PATH_PREFIX           "/" TEMP_TABLE_PREFIX
@@ -96,4 +104,31 @@ typedef enum {
 extern uint		ibuf_debug;
 #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
 
+/** Shift for spatial status */
+#define SPATIAL_STATUS_SHIFT	12
+
+/** Mask to encode/decode spatial status. */
+#define SPATIAL_STATUS_MASK	(3 << SPATIAL_STATUS_SHIFT)
+
+#if SPATIAL_STATUS_MASK < REC_VERSION_56_MAX_INDEX_COL_LEN
+# error SPATIAL_STATUS_MASK < REC_VERSION_56_MAX_INDEX_COL_LEN
+#endif
+
+/** whether a col is used in spatial index or regular index
+Note: the spatial status is part of persistent undo log,
+so we should not modify the values in MySQL 5.7 */
+enum spatial_status_t {
+	/* Unkown status (undo format in 5.7.9) */
+	SPATIAL_UNKNOWN = 0,
+
+	/** Not used in gis index. */
+	SPATIAL_NONE	= 1,
+
+	/** Used in both spatial index and regular index. */
+	SPATIAL_MIXED	= 2,
+
+	/** Only used in spatial index. */
+	SPATIAL_ONLY	= 3
+};
+
 #endif
diff --git a/storage/innobase/include/dyn0buf.h b/storage/innobase/include/dyn0buf.h
new file mode 100644
index 00000000000..3126c8e4683
--- /dev/null
+++ b/storage/innobase/include/dyn0buf.h
@@ -0,0 +1,505 @@
+/*****************************************************************************
+
+Copyright (c) 2013, 2016, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/dyn0buf.h
+The dynamically allocated buffer implementation
+
+Created 2013-03-16 Sunny Bains
+*******************************************************/
+
+#ifndef dyn0buf_h
+#define dyn0buf_h
+
+#include "univ.i"
+#include "ut0lst.h"
+#include "mem0mem.h"
+#include "dyn0types.h"
+
+/** Class that manages dynamic buffers. It uses a UT_LIST of
+dyn_buf_t::block_t instances. We don't use STL containers in
+order to avoid the overhead of heap calls. Using a custom memory
+allocator doesn't solve the problem either because we have to get
+the memory from somewhere. We can't use the block_t::m_data as the
+backend for the custom allocator because we would like the data in
+the blocks to be contiguous. */
+template <size_t SIZE = DYN_ARRAY_DATA_SIZE>
+class dyn_buf_t {
+public:
+
+	class block_t;
+
+	typedef UT_LIST_NODE_T(block_t) block_node_t;
+	typedef UT_LIST_BASE_NODE_T(block_t) block_list_t;
+
+	class block_t {
+	public:
+
+		block_t()
+		{
+			ut_ad(MAX_DATA_SIZE <= (2 << 15));
+			init();
+		}
+
+		~block_t() { }
+
+		/**
+		Gets the number of used bytes in a block.
+		@return	number of bytes used */
+		ulint used() const
+			MY_ATTRIBUTE((warn_unused_result))
+		{
+			return(static_cast<ulint>(m_used & ~DYN_BLOCK_FULL_FLAG));
+		}
+
+		/**
+		Gets pointer to the start of data.
+		@return	pointer to data */
+		byte* start()
+			MY_ATTRIBUTE((warn_unused_result))
+		{
+			return(m_data);
+		}
+
+		/**
+		@return start of data - non const version */
+		byte* begin()
+			MY_ATTRIBUTE((warn_unused_result))
+		{
+			return(m_data);
+		}
+
+		/**
+		@return end of used data - non const version */
+		byte* end()
+			MY_ATTRIBUTE((warn_unused_result))
+		{
+			return(begin() + m_used);
+		}
+
+		/**
+		@return start of data - const version */
+		const byte* begin() const
+			MY_ATTRIBUTE((warn_unused_result))
+		{
+			return(m_data);
+		}
+
+		/**
+		@return end of used data - const version */
+		const byte* end() const
+			MY_ATTRIBUTE((warn_unused_result))
+		{
+			return(begin() + m_used);
+		}
+
+	private:
+		/**
+		@return pointer to start of reserved space */
+		template <typename Type>
+		Type push(ib_uint32_t size)
+		{
+			Type	ptr = reinterpret_cast<Type>(end());
+
+			m_used += size;
+			ut_ad(m_used <= static_cast<ib_uint32_t>(MAX_DATA_SIZE));
+
+			return(ptr);
+		}
+
+		/**
+		Grow the stack. */
+		void close(const byte* ptr)
+		{
+			/* Check that it is within bounds */
+			ut_ad(ptr >= begin());
+			ut_ad(ptr <= begin() + m_buf_end);
+
+			/* We have done the boundary check above */
+			m_used = static_cast<ib_uint32_t>(ptr - begin());
+
+			ut_ad(m_used <= MAX_DATA_SIZE);
+			ut_d(m_buf_end = 0);
+		}
+
+		/**
+		Initialise the block */
+		void init()
+		{
+			m_used = 0;
+			ut_d(m_buf_end = 0);
+			ut_d(m_magic_n = DYN_BLOCK_MAGIC_N);
+		}
+	private:
+#ifdef UNIV_DEBUG
+		/** If opened then this is the buffer end offset, else 0 */
+		ulint		m_buf_end;
+
+		/** Magic number (DYN_BLOCK_MAGIC_N) */
+		ulint		m_magic_n;
+#endif /* UNIV_DEBUG */
+
+		/** SIZE - sizeof(m_node) + sizeof(m_used) */
+		enum {
+			MAX_DATA_SIZE = SIZE
+				      - sizeof(block_node_t)
+				      + sizeof(ib_uint32_t)
+		};
+
+		/** Storage */
+		byte		m_data[MAX_DATA_SIZE];
+
+		/** Doubly linked list node. */
+		block_node_t	m_node;
+
+		/** number of data bytes used in this block;
+		DYN_BLOCK_FULL_FLAG is set when the block becomes full */
+		ib_uint32_t	m_used;
+
+		friend class dyn_buf_t;
+	};
+
+	enum { MAX_DATA_SIZE = block_t::MAX_DATA_SIZE};
+
+	/** Default constructor */
+	dyn_buf_t()
+		:
+		m_heap(),
+		m_size()
+	{
+		UT_LIST_INIT(m_list, &block_t::m_node);
+		push_back(&m_first_block);
+	}
+
+	/** Destructor */
+	~dyn_buf_t()
+	{
+		erase();
+	}
+
+	/** Reset the buffer vector */
+	void erase()
+	{
+		if (m_heap != NULL) {
+			mem_heap_free(m_heap);
+			m_heap = NULL;
+
+			/* Initialise the list and add the first block. */
+			UT_LIST_INIT(m_list, &block_t::m_node);
+			push_back(&m_first_block);
+		} else {
+			m_first_block.init();
+			ut_ad(UT_LIST_GET_LEN(m_list) == 1);
+		}
+
+		m_size = 0;
+	}
+
+	/**
+	Makes room on top and returns a pointer to a buffer in it. After
+	copying the elements, the caller must close the buffer using close().
+	@param size	in bytes of the buffer; MUST be <= MAX_DATA_SIZE!
+	@return	pointer to the buffer */
+	byte* open(ulint size)
+		MY_ATTRIBUTE((warn_unused_result))
+	{
+		ut_ad(size > 0);
+		ut_ad(size <= MAX_DATA_SIZE);
+
+		block_t*	block;
+
+		block = has_space(size) ? back() : add_block();
+
+		ut_ad(block->m_used <= MAX_DATA_SIZE);
+		ut_d(block->m_buf_end = block->m_used + size);
+
+		return(block->end());
+	}
+
+	/**
+	Closes the buffer returned by open.
+	@param ptr	end of used space */
+	void close(const byte* ptr)
+	{
+		ut_ad(UT_LIST_GET_LEN(m_list) > 0);
+		block_t*	block = back();
+
+		m_size -= block->used();
+
+		block->close(ptr);
+
+		m_size += block->used();
+	}
+
+	/**
+	Makes room on top and returns a pointer to the added element.
+	The caller must copy the element to the pointer returned.
+	@param size	in bytes of the element
+	@return	pointer to the element */
+	template <typename Type>
+	Type push(ib_uint32_t size)
+	{
+		ut_ad(size > 0);
+		ut_ad(size <= MAX_DATA_SIZE);
+
+		block_t*	block;
+
+		block = has_space(size) ? back() : add_block();
+
+		m_size += size;
+
+		/* See ISO C++03 14.2/4 for why "template" is required. */
+
+		return(block->template push<Type>(size));
+	}
+
+	/**
+	Pushes n bytes.
+	@param str	string to write
+	@param len	string length */
+	void push(const byte* ptr, ib_uint32_t len)
+	{
+		while (len > 0) {
+			ib_uint32_t	n_copied;
+
+			if (len >= MAX_DATA_SIZE) {
+				n_copied = MAX_DATA_SIZE;
+			} else {
+				n_copied = len;
+			}
+
+			::memmove(push<byte*>(n_copied), ptr, n_copied);
+
+			ptr += n_copied;
+			len -= n_copied;
+		}
+	}
+
+	/**
+	Returns a pointer to an element in the buffer. const version.
+	@param pos	position of element in bytes from start
+	@return	pointer to element */
+	template <typename Type>
+	const Type at(ulint pos) const
+	{
+		block_t*	block = const_cast<block_t*>(
+			const_cast<dyn_buf_t*>(this)->find(pos));
+
+		return(reinterpret_cast<Type>(block->begin() + pos));
+	}
+
+	/**
+	Returns a pointer to an element in the buffer. non const version.
+	@param pos	position of element in bytes from start
+	@return	pointer to element */
+	template <typename Type>
+	Type at(ulint pos)
+	{
+		block_t*	block = const_cast<block_t*>(find(pos));
+
+		return(reinterpret_cast<Type>(block->begin() + pos));
+	}
+
+	/**
+	Returns the size of the total stored data.
+	@return	data size in bytes */
+	ulint size() const
+		MY_ATTRIBUTE((warn_unused_result))
+	{
+#ifdef UNIV_DEBUG
+		ulint	total_size = 0;
+
+		for (const block_t* block = UT_LIST_GET_FIRST(m_list);
+		     block != NULL;
+		     block = UT_LIST_GET_NEXT(m_node, block)) {
+
+			total_size += block->used();
+		}
+
+		ut_ad(total_size == m_size);
+#endif /* UNIV_DEBUG */
+		return(m_size);
+	}
+
+	/**
+	Iterate over each block and call the functor.
+	@return	false if iteration was terminated. */
+	template <typename Functor>
+	bool for_each_block(Functor& functor) const
+	{
+		for (const block_t* block = UT_LIST_GET_FIRST(m_list);
+		     block != NULL;
+		     block = UT_LIST_GET_NEXT(m_node, block)) {
+
+			if (!functor(block)) {
+				return(false);
+			}
+		}
+
+		return(true);
+	}
+
+	/**
+	Iterate over all the blocks in reverse and call the iterator
+	@return	false if iteration was terminated. */
+	template <typename Functor>
+	bool for_each_block_in_reverse(Functor& functor) const
+	{
+		for (block_t* block = UT_LIST_GET_LAST(m_list);
+		     block != NULL;
+		     block = UT_LIST_GET_PREV(m_node, block)) {
+
+			if (!functor(block)) {
+				return(false);
+			}
+		}
+
+		return(true);
+	}
+
+	/**
+	@return the first block */
+	block_t* front()
+		MY_ATTRIBUTE((warn_unused_result))
+	{
+		ut_ad(UT_LIST_GET_LEN(m_list) > 0);
+		return(UT_LIST_GET_FIRST(m_list));
+	}
+
+	/**
+	@return true if m_first_block block was not filled fully */
+	bool is_small() const
+		MY_ATTRIBUTE((warn_unused_result))
+	{
+		return(m_heap == NULL);
+	}
+
+private:
+	// Disable copying
+	dyn_buf_t(const dyn_buf_t&);
+	dyn_buf_t& operator=(const dyn_buf_t&);
+
+	/**
+	Add the block to the end of the list*/
+	void push_back(block_t* block)
+	{
+		block->init();
+
+		UT_LIST_ADD_LAST(m_list, block);
+	}
+
+	/** @return the last block in the list */
+	block_t* back()
+	{
+		return(UT_LIST_GET_LAST(m_list));
+	}
+
+	/*
+	@return true if request can be fullfilled */
+	bool has_space(ulint size) const
+	{
+		return(back()->m_used + size <= MAX_DATA_SIZE);
+	}
+
+	/*
+	@return true if request can be fullfilled */
+	bool has_space(ulint size)
+	{
+		return(back()->m_used + size <= MAX_DATA_SIZE);
+	}
+
+	/** Find the block that contains the pos.
+	@param pos	absolute offset, it is updated to make it relative
+			to the block
+	@return the block containing the pos. */
+	block_t* find(ulint& pos)
+	{
+		block_t*	block;
+
+		ut_ad(UT_LIST_GET_LEN(m_list) > 0);
+
+		for (block = UT_LIST_GET_FIRST(m_list);
+		     block != NULL;
+		     block = UT_LIST_GET_NEXT(m_node, block)) {
+
+			if (pos < block->used()) {
+				break;
+			}
+
+			pos -= block->used();
+		}
+
+		ut_ad(block != NULL);
+		ut_ad(block->used() >= pos);
+
+		return(block);
+	}
+
+	/**
+	Allocate and add a new block to m_list */
+	block_t* add_block()
+	{
+		block_t*	block;
+
+		if (m_heap == NULL) {
+			m_heap = mem_heap_create(sizeof(*block));
+		}
+
+		block = reinterpret_cast<block_t*>(
+			mem_heap_alloc(m_heap, sizeof(*block)));
+
+		push_back(block);
+
+		return(block);
+	}
+
+private:
+	/** Heap to use for memory allocation */
+	mem_heap_t*		m_heap;
+
+	/** Allocated blocks */
+	block_list_t		m_list;
+
+	/** Total size used by all blocks */
+	ulint			m_size;
+
+	/** The default block, should always be the first element. This
+	is for backwards compatibility and to avoid an extra heap allocation
+	for small REDO log records */
+	block_t			m_first_block;
+};
+
+typedef dyn_buf_t<DYN_ARRAY_DATA_SIZE> mtr_buf_t;
+
+/** mtr_buf_t copier */
+struct mtr_buf_copy_t {
+	/** The copied buffer */
+	mtr_buf_t	m_buf;
+
+	/** Append a block to the redo log buffer.
+	@return whether the appending should continue (always true here) */
+	bool operator()(const mtr_buf_t::block_t* block)
+	{
+		byte*	buf = m_buf.open(block->used());
+		memcpy(buf, block->begin(), block->used());
+		m_buf.close(buf + block->used());
+		return(true);
+	}
+};
+
+#endif /* dyn0buf_h */
diff --git a/storage/innobase/include/dyn0dyn.h b/storage/innobase/include/dyn0dyn.h
deleted file mode 100644
index 1bd10b6bf58..00000000000
--- a/storage/innobase/include/dyn0dyn.h
+++ /dev/null
@@ -1,199 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/dyn0dyn.h
-The dynamically allocated array
-
-Created 2/5/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef dyn0dyn_h
-#define dyn0dyn_h
-
-#include "univ.i"
-#include "ut0lst.h"
-#include "mem0mem.h"
-
-/** A block in a dynamically allocated array */
-struct dyn_block_t;
-/** Dynamically allocated array */
-typedef dyn_block_t		dyn_array_t;
-
-/** This is the initial 'payload' size of a dynamic array;
-this must be > MLOG_BUF_MARGIN + 30! */
-#define	DYN_ARRAY_DATA_SIZE	512
-
-/*********************************************************************//**
-Initializes a dynamic array.
-@return	initialized dyn array */
-UNIV_INLINE
-dyn_array_t*
-dyn_array_create(
-/*=============*/
-	dyn_array_t*	arr)	/*!< in/out memory buffer of
-				size sizeof(dyn_array_t) */
-	MY_ATTRIBUTE((nonnull));
-/************************************************************//**
-Frees a dynamic array. */
-UNIV_INLINE
-void
-dyn_array_free(
-/*===========*/
-	dyn_array_t*	arr)	/*!< in,own: dyn array */
-	MY_ATTRIBUTE((nonnull));
-/*********************************************************************//**
-Makes room on top of a dyn array and returns a pointer to a buffer in it.
-After copying the elements, the caller must close the buffer using
-dyn_array_close.
-@return	pointer to the buffer */
-UNIV_INLINE
-byte*
-dyn_array_open(
-/*===========*/
-	dyn_array_t*	arr,	/*!< in: dynamic array */
-	ulint		size)	/*!< in: size in bytes of the buffer; MUST be
-				smaller than DYN_ARRAY_DATA_SIZE! */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Closes the buffer returned by dyn_array_open. */
-UNIV_INLINE
-void
-dyn_array_close(
-/*============*/
-	dyn_array_t*	arr,	/*!< in: dynamic array */
-	const byte*	ptr)	/*!< in: end of used space */
-	MY_ATTRIBUTE((nonnull));
-/*********************************************************************//**
-Makes room on top of a dyn array and returns a pointer to
-the added element. The caller must copy the element to
-the pointer returned.
-@return	pointer to the element */
-UNIV_INLINE
-void*
-dyn_array_push(
-/*===========*/
-	dyn_array_t*	arr,	/*!< in/out: dynamic array */
-	ulint		size)	/*!< in: size in bytes of the element */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
-/************************************************************//**
-Returns pointer to an element in dyn array.
-@return	pointer to element */
-UNIV_INLINE
-void*
-dyn_array_get_element(
-/*==================*/
-	const dyn_array_t*	arr,	/*!< in: dyn array */
-	ulint			pos)	/*!< in: position of element
-					in bytes from array start */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
-/************************************************************//**
-Returns the size of stored data in a dyn array.
-@return	data size in bytes */
-UNIV_INLINE
-ulint
-dyn_array_get_data_size(
-/*====================*/
-	const dyn_array_t*	arr)	/*!< in: dyn array */
-	MY_ATTRIBUTE((nonnull, warn_unused_result, pure));
-/************************************************************//**
-Gets the first block in a dyn array.
-@param arr	dyn array
-@return		first block */
-#define dyn_array_get_first_block(arr) (arr)
-/************************************************************//**
-Gets the last block in a dyn array.
-@param arr	dyn array
-@return		last block */
-#define dyn_array_get_last_block(arr)				\
-	((arr)->heap ? UT_LIST_GET_LAST((arr)->base) : (arr))
-/********************************************************************//**
-Gets the next block in a dyn array.
-@param arr	dyn array
-@param block	dyn array block
-@return		pointer to next, NULL if end of list */
-#define dyn_array_get_next_block(arr, block)			\
-	((arr)->heap ? UT_LIST_GET_NEXT(list, block) : NULL)
-/********************************************************************//**
-Gets the previous block in a dyn array.
-@param arr	dyn array
-@param block	dyn array block
-@return		pointer to previous, NULL if end of list */
-#define dyn_array_get_prev_block(arr, block)			\
-	((arr)->heap ? UT_LIST_GET_PREV(list, block) : NULL)
-/********************************************************************//**
-Gets the number of used bytes in a dyn array block.
-@return	number of bytes used */
-UNIV_INLINE
-ulint
-dyn_block_get_used(
-/*===============*/
-	const dyn_block_t*	block)	/*!< in: dyn array block */
-	MY_ATTRIBUTE((nonnull, warn_unused_result, pure));
-/********************************************************************//**
-Gets pointer to the start of data in a dyn array block.
-@return	pointer to data */
-UNIV_INLINE
-byte*
-dyn_block_get_data(
-/*===============*/
-	const dyn_block_t*	block)	/*!< in: dyn array block */
-	MY_ATTRIBUTE((nonnull, warn_unused_result, pure));
-/********************************************************//**
-Pushes n bytes to a dyn array. */
-UNIV_INLINE
-void
-dyn_push_string(
-/*============*/
-	dyn_array_t*	arr,	/*!< in/out: dyn array */
-	const byte*	str,	/*!< in: string to write */
-	ulint		len)	/*!< in: string length */
-	MY_ATTRIBUTE((nonnull));
-
-/*#################################################################*/
-
-/** @brief A block in a dynamically allocated array.
-NOTE! Do not access the fields of the struct directly: the definition
-appears here only for the compiler to know its size! */
-struct dyn_block_t{
-	mem_heap_t*	heap;	/*!< in the first block this is != NULL
-				if dynamic allocation has been needed */
-	ulint		used;	/*!< number of data bytes used in this block;
-				DYN_BLOCK_FULL_FLAG is set when the block
-				becomes full */
-	byte		data[DYN_ARRAY_DATA_SIZE];
-				/*!< storage for array elements */
-	UT_LIST_BASE_NODE_T(dyn_block_t) base;
-				/*!< linear list of dyn blocks: this node is
-				used only in the first block */
-	UT_LIST_NODE_T(dyn_block_t) list;
-				/*!< linear list node: used in all blocks */
-#ifdef UNIV_DEBUG
-	ulint		buf_end;/*!< only in the debug version: if dyn
-				array is opened, this is the buffer
-				end offset, else this is 0 */
-	ulint		magic_n;/*!< magic number (DYN_BLOCK_MAGIC_N) */
-#endif
-};
-
-
-#ifndef UNIV_NONINL
-#include "dyn0dyn.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/dyn0dyn.ic b/storage/innobase/include/dyn0dyn.ic
deleted file mode 100644
index f18f2e6dff9..00000000000
--- a/storage/innobase/include/dyn0dyn.ic
+++ /dev/null
@@ -1,306 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/dyn0dyn.ic
-The dynamically allocated array
-
-Created 2/5/1996 Heikki Tuuri
-*******************************************************/
-
-/** Value of dyn_block_t::magic_n */
-#define DYN_BLOCK_MAGIC_N	375767
-/** Flag for dyn_block_t::used that indicates a full block */
-#define DYN_BLOCK_FULL_FLAG	0x1000000UL
-
-/************************************************************//**
-Adds a new block to a dyn array.
-@return	created block */
-UNIV_INTERN
-dyn_block_t*
-dyn_array_add_block(
-/*================*/
-	dyn_array_t*	arr)	/*!< in/out: dyn array */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-/********************************************************************//**
-Gets the number of used bytes in a dyn array block.
-@return	number of bytes used */
-UNIV_INLINE
-ulint
-dyn_block_get_used(
-/*===============*/
-	const dyn_block_t*	block)	/*!< in: dyn array block */
-{
-	ut_ad(block);
-
-	return((block->used) & ~DYN_BLOCK_FULL_FLAG);
-}
-
-/********************************************************************//**
-Gets pointer to the start of data in a dyn array block.
-@return	pointer to data */
-UNIV_INLINE
-byte*
-dyn_block_get_data(
-/*===============*/
-	const dyn_block_t*	block)	/*!< in: dyn array block */
-{
-	ut_ad(block);
-
-	return(const_cast<byte*>(block->data));
-}
-
-/*********************************************************************//**
-Initializes a dynamic array.
-@return	initialized dyn array */
-UNIV_INLINE
-dyn_array_t*
-dyn_array_create(
-/*=============*/
-	dyn_array_t*	arr)	/*!< in/out: memory buffer of
-				size sizeof(dyn_array_t) */
-{
-	ut_ad(arr);
-#if DYN_ARRAY_DATA_SIZE >= DYN_BLOCK_FULL_FLAG
-# error "DYN_ARRAY_DATA_SIZE >= DYN_BLOCK_FULL_FLAG"
-#endif
-
-	arr->heap = NULL;
-	arr->used = 0;
-
-	ut_d(arr->buf_end = 0);
-	ut_d(arr->magic_n = DYN_BLOCK_MAGIC_N);
-
-	return(arr);
-}
-
-/************************************************************//**
-Frees a dynamic array. */
-UNIV_INLINE
-void
-dyn_array_free(
-/*===========*/
-	dyn_array_t*	arr)	/*!< in: dyn array */
-{
-	if (arr->heap != NULL) {
-		mem_heap_free(arr->heap);
-	}
-
-	ut_d(arr->magic_n = 0);
-}
-
-/*********************************************************************//**
-Makes room on top of a dyn array and returns a pointer to the added element.
-The caller must copy the element to the pointer returned.
-@return	pointer to the element */
-UNIV_INLINE
-void*
-dyn_array_push(
-/*===========*/
-	dyn_array_t*	arr,	/*!< in/out: dynamic array */
-	ulint		size)	/*!< in: size in bytes of the element */
-{
-	dyn_block_t*	block;
-	ulint		used;
-
-	ut_ad(arr);
-	ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
-	ut_ad(size <= DYN_ARRAY_DATA_SIZE);
-	ut_ad(size);
-
-	block = arr;
-
-	if (block->used + size > DYN_ARRAY_DATA_SIZE) {
-		/* Get the last array block */
-
-		block = dyn_array_get_last_block(arr);
-
-		if (block->used + size > DYN_ARRAY_DATA_SIZE) {
-			block = dyn_array_add_block(arr);
-		}
-	}
-
-	used = block->used;
-
-	block->used = used + size;
-	ut_ad(block->used <= DYN_ARRAY_DATA_SIZE);
-
-	return(block->data + used);
-}
-
-/*********************************************************************//**
-Makes room on top of a dyn array and returns a pointer to a buffer in it.
-After copying the elements, the caller must close the buffer using
-dyn_array_close.
-@return	pointer to the buffer */
-UNIV_INLINE
-byte*
-dyn_array_open(
-/*===========*/
-	dyn_array_t*	arr,	/*!< in: dynamic array */
-	ulint		size)	/*!< in: size in bytes of the buffer; MUST be
-				smaller than DYN_ARRAY_DATA_SIZE! */
-{
-	dyn_block_t*	block;
-
-	ut_ad(arr);
-	ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
-	ut_ad(size <= DYN_ARRAY_DATA_SIZE);
-	ut_ad(size);
-
-	block = arr;
-
-	if (block->used + size > DYN_ARRAY_DATA_SIZE) {
-		/* Get the last array block */
-
-		block = dyn_array_get_last_block(arr);
-
-		if (block->used + size > DYN_ARRAY_DATA_SIZE) {
-			block = dyn_array_add_block(arr);
-			ut_a(size <= DYN_ARRAY_DATA_SIZE);
-		}
-	}
-
-	ut_ad(block->used <= DYN_ARRAY_DATA_SIZE);
-	ut_ad(arr->buf_end == 0);
-	ut_d(arr->buf_end = block->used + size);
-
-	return(block->data + block->used);
-}
-
-/*********************************************************************//**
-Closes the buffer returned by dyn_array_open. */
-UNIV_INLINE
-void
-dyn_array_close(
-/*============*/
-	dyn_array_t*	arr,	/*!< in/out: dynamic array */
-	const byte*	ptr)	/*!< in: end of used space */
-{
-	dyn_block_t*	block;
-
-	ut_ad(arr);
-	ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
-
-	block = dyn_array_get_last_block(arr);
-
-	ut_ad(arr->buf_end + block->data >= ptr);
-
-	block->used = ptr - block->data;
-
-	ut_ad(block->used <= DYN_ARRAY_DATA_SIZE);
-
-	ut_d(arr->buf_end = 0);
-}
-
-/************************************************************//**
-Returns pointer to an element in dyn array.
-@return	pointer to element */
-UNIV_INLINE
-void*
-dyn_array_get_element(
-/*==================*/
-	const dyn_array_t*	arr,	/*!< in: dyn array */
-	ulint			pos)	/*!< in: position of element
-					in bytes from array start */
-{
-	const dyn_block_t*	block;
-
-	ut_ad(arr);
-	ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
-
-	/* Get the first array block */
-	block = dyn_array_get_first_block(arr);
-
-	if (arr->heap != NULL) {
-		for (;;) {
-			ulint	used = dyn_block_get_used(block);
-
-			if (pos < used) {
-				break;
-			}
-
-			pos -= used;
-			block = UT_LIST_GET_NEXT(list, block);
-			ut_ad(block);
-		}
-	}
-
-	ut_ad(block);
-	ut_ad(dyn_block_get_used(block) >= pos);
-
-	return(const_cast<byte*>(block->data) + pos);
-}
-
-/************************************************************//**
-Returns the size of stored data in a dyn array.
-@return	data size in bytes */
-UNIV_INLINE
-ulint
-dyn_array_get_data_size(
-/*====================*/
-	const dyn_array_t*	arr)	/*!< in: dyn array */
-{
-	const dyn_block_t*	block;
-	ulint			sum	= 0;
-
-	ut_ad(arr);
-	ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
-
-	if (arr->heap == NULL) {
-
-		return(arr->used);
-	}
-
-	/* Get the first array block */
-	block = dyn_array_get_first_block(arr);
-
-	while (block != NULL) {
-		sum += dyn_block_get_used(block);
-		block = dyn_array_get_next_block(arr, block);
-	}
-
-	return(sum);
-}
-
-/********************************************************//**
-Pushes n bytes to a dyn array. */
-UNIV_INLINE
-void
-dyn_push_string(
-/*============*/
-	dyn_array_t*	arr,	/*!< in/out: dyn array */
-	const byte*	str,	/*!< in: string to write */
-	ulint		len)	/*!< in: string length */
-{
-	ulint	n_copied;
-
-	while (len > 0) {
-		if (len > DYN_ARRAY_DATA_SIZE) {
-			n_copied = DYN_ARRAY_DATA_SIZE;
-		} else {
-			n_copied = len;
-		}
-
-		memcpy(dyn_array_push(arr, n_copied), str, n_copied);
-
-		str += n_copied;
-		len -= n_copied;
-	}
-}
diff --git a/storage/innobase/include/mem0pool.ic b/storage/innobase/include/dyn0types.h
similarity index 54%
rename from storage/innobase/include/mem0pool.ic
rename to storage/innobase/include/dyn0types.h
index f4bafb8ba63..058a22f46e1 100644
--- a/storage/innobase/include/mem0pool.ic
+++ b/storage/innobase/include/dyn0types.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -16,9 +16,24 @@ this program; if not, write to the Free Software Foundation, Inc.,
 
 *****************************************************************************/
 
-/********************************************************************//**
-@file include/mem0pool.ic
-The lowest-level memory management
+/**************************************************//**
+@file include/dyn0types.h
+The dynamically allocated buffer types and constants
 
-Created 6/8/1994 Heikki Tuuri
-*************************************************************************/
+Created 2013-03-16 Sunny Bains
+*******************************************************/
+
+#ifndef dyn0types_h
+#define dyn0types_h
+
+/** Value of dyn_block_t::magic_n */
+#define DYN_BLOCK_MAGIC_N	375767
+
+/** This is the initial 'payload' size of a dynamic array;
+this must be > MLOG_BUF_MARGIN + 30! */
+#define	DYN_ARRAY_DATA_SIZE	512
+
+/** Flag for dyn_block_t::used that indicates a full block */
+#define DYN_BLOCK_FULL_FLAG	0x1000000UL
+
+#endif /* dyn0types_h */
diff --git a/storage/innobase/include/eval0eval.h b/storage/innobase/include/eval0eval.h
index e3b1e6c16b6..f0e5b4006b6 100644
--- a/storage/innobase/include/eval0eval.h
+++ b/storage/innobase/include/eval0eval.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -36,7 +36,6 @@ Created 12/29/1997 Heikki Tuuri
 Free the buffer from global dynamic memory for a value of a que_node,
 if it has been allocated in the above function. The freeing for pushed
 column values is done in sel_col_prefetch_buf_free. */
-UNIV_INTERN
 void
 eval_node_free_val_buf(
 /*===================*/
@@ -65,7 +64,7 @@ eval_node_set_int_val(
 	lint		val);	/*!< in: value to set */
 /*****************************************************************//**
 Gets an integer value from an expression node.
-@return	integer value */
+@return integer value */
 UNIV_INLINE
 lint
 eval_node_get_int_val(
@@ -91,7 +90,7 @@ eval_node_copy_val(
 	que_node_t*	node2);	/*!< in: node to copy from */
 /*****************************************************************//**
 Gets a iboolean value from a query node.
-@return	iboolean value */
+@return iboolean value */
 UNIV_INLINE
 ibool
 eval_node_get_ibool_val(
@@ -99,8 +98,7 @@ eval_node_get_ibool_val(
 	que_node_t*	node);	/*!< in: query graph node */
 /*****************************************************************//**
 Evaluates a comparison node.
-@return	the result of the comparison */
-UNIV_INTERN
+@return the result of the comparison */
 ibool
 eval_cmp(
 /*=====*/
diff --git a/storage/innobase/include/eval0eval.ic b/storage/innobase/include/eval0eval.ic
index e4b1dd08017..2f759301c03 100644
--- a/storage/innobase/include/eval0eval.ic
+++ b/storage/innobase/include/eval0eval.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -30,7 +30,6 @@ Created 12/29/1997 Heikki Tuuri
 
 /*****************************************************************//**
 Evaluates a function node. */
-UNIV_INTERN
 void
 eval_func(
 /*======*/
@@ -41,8 +40,7 @@ NOTE that this memory must be explicitly freed when the query graph is
 freed. If the node already has allocated buffer, that buffer is freed
 here. NOTE that this is the only function where dynamic memory should be
 allocated for a query node val field.
-@return	pointer to allocated buffer */
-UNIV_INTERN
+@return pointer to allocated buffer */
 byte*
 eval_node_alloc_val_buf(
 /*====================*/
@@ -54,7 +52,7 @@ eval_node_alloc_val_buf(
 
 /*****************************************************************//**
 Allocates a new buffer if needed.
-@return	pointer to buffer */
+@return pointer to buffer */
 UNIV_INLINE
 byte*
 eval_node_ensure_val_buf(
@@ -145,7 +143,7 @@ eval_node_set_int_val(
 
 /*****************************************************************//**
 Gets an integer non-SQL null value from an expression node.
-@return	integer value */
+@return integer value */
 UNIV_INLINE
 lint
 eval_node_get_int_val(
@@ -165,7 +163,7 @@ eval_node_get_int_val(
 
 /*****************************************************************//**
 Gets a iboolean value from a query node.
-@return	iboolean value */
+@return iboolean value */
 UNIV_INLINE
 ibool
 eval_node_get_ibool_val(
diff --git a/storage/innobase/include/eval0proc.h b/storage/innobase/include/eval0proc.h
index 7755fb10343..6705c2c7b64 100644
--- a/storage/innobase/include/eval0proc.h
+++ b/storage/innobase/include/eval0proc.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1998, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1998, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -33,7 +33,7 @@ Created 1/20/1998 Heikki Tuuri
 
 /**********************************************************************//**
 Performs an execution step of a procedure node.
-@return	query thread to run next or NULL */
+@return query thread to run next or NULL */
 UNIV_INLINE
 que_thr_t*
 proc_step(
@@ -41,39 +41,35 @@ proc_step(
 	que_thr_t*	thr);	/*!< in: query thread */
 /**********************************************************************//**
 Performs an execution step of an if-statement node.
-@return	query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
 que_thr_t*
 if_step(
 /*====*/
 	que_thr_t*	thr);	/*!< in: query thread */
 /**********************************************************************//**
 Performs an execution step of a while-statement node.
-@return	query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
 que_thr_t*
 while_step(
 /*=======*/
 	que_thr_t*	thr);	/*!< in: query thread */
 /**********************************************************************//**
 Performs an execution step of a for-loop node.
-@return	query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
 que_thr_t*
 for_step(
 /*=====*/
 	que_thr_t*	thr);	/*!< in: query thread */
 /**********************************************************************//**
 Performs an execution step of an assignment statement node.
-@return	query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
 que_thr_t*
 assign_step(
 /*========*/
 	que_thr_t*	thr);	/*!< in: query thread */
 /**********************************************************************//**
 Performs an execution step of a procedure call node.
-@return	query thread to run next or NULL */
+@return query thread to run next or NULL */
 UNIV_INLINE
 que_thr_t*
 proc_eval_step(
@@ -81,16 +77,14 @@ proc_eval_step(
 	que_thr_t*	thr);	/*!< in: query thread */
 /**********************************************************************//**
 Performs an execution step of an exit statement node.
-@return	query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
 que_thr_t*
 exit_step(
 /*======*/
 	que_thr_t*	thr);	/*!< in: query thread */
 /**********************************************************************//**
 Performs an execution step of a return-statement node.
-@return	query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
 que_thr_t*
 return_step(
 /*========*/
diff --git a/storage/innobase/include/eval0proc.ic b/storage/innobase/include/eval0proc.ic
index 81418bae2c9..cda3fd7b874 100644
--- a/storage/innobase/include/eval0proc.ic
+++ b/storage/innobase/include/eval0proc.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1998, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1998, 2013, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -29,7 +29,7 @@ Created 1/20/1998 Heikki Tuuri
 
 /**********************************************************************//**
 Performs an execution step of a procedure node.
-@return	query thread to run next or NULL */
+@return query thread to run next or NULL */
 UNIV_INLINE
 que_thr_t*
 proc_step(
@@ -64,7 +64,7 @@ proc_step(
 
 /**********************************************************************//**
 Performs an execution step of a procedure call node.
-@return	query thread to run next or NULL */
+@return query thread to run next or NULL */
 UNIV_INLINE
 que_thr_t*
 proc_eval_step(
diff --git a/storage/innobase/include/fil0crypt.h b/storage/innobase/include/fil0crypt.h
index fdc413e7520..acac155ef3f 100644
--- a/storage/innobase/include/fil0crypt.h
+++ b/storage/innobase/include/fil0crypt.h
@@ -128,7 +128,8 @@ fil_space_crypt_t *
 fil_space_create_crypt_data(
 /*========================*/
 	fil_encryption_t	encrypt_mode,	/*!< in: encryption mode */
-	uint			key_id);	/*!< in: encryption key id */
+	uint			key_id)		/*!< in: encryption key id */
+	__attribute__((warn_unused_result));
 
 /*********************************************************************
 Destroy crypt data */
@@ -211,7 +212,8 @@ UNIV_INTERN
 bool
 fil_space_check_encryption_read(
 /*============================*/
-	ulint space);          /*!< in: tablespace id */
+	ulint	space)	/*!< in: tablespace id */
+	__attribute__((warn_unused_result));
 
 /******************************************************************
 Decrypt a page
@@ -222,10 +224,11 @@ fil_space_decrypt(
 /*==============*/
 	fil_space_crypt_t*	crypt_data,	/*!< in: crypt data */
 	byte*			tmp_frame,	/*!< in: temporary buffer */
-	ulint			page_size,	/*!< in: page size */
+	const page_size_t&	page_size,	/*!< in: page size */
 	byte*			src_frame,	/*!< in:out: page buffer */
-	dberr_t*		err);		/*!< in: out: DB_SUCCESS or
+	dberr_t*		err)		/*!< in: out: DB_SUCCESS or
 						error code */
+	__attribute__((warn_unused_result));
 
 /*********************************************************************
 Encrypt buffer page
@@ -239,8 +242,9 @@ fil_space_encrypt(
 	ulint	offset,		/*!< in: page no */
 	lsn_t	lsn,		/*!< in: page lsn */
 	byte*	src_frame,	/*!< in: page frame */
-	ulint	size,		/*!< in: size of data to encrypt */
-	byte*	dst_frame);	/*!< in: where to encrypt to */
+	const page_size_t&	page_size,	/*!< in: page size */
+	byte*	dst_frame)	/*!< in: where to encrypt to */
+	__attribute__((warn_unused_result));
 
 /*********************************************************************
 Decrypt buffer page
@@ -250,10 +254,10 @@ UNIV_INTERN
 byte*
 fil_space_decrypt(
 /*==============*/
-	ulint	space,		/*!< in: tablespace id */
-	byte*	src_frame,	/*!< in: page frame */
-	ulint	page_size,	/*!< in: size of data to encrypt */
-	byte*	dst_frame)	/*!< in: where to decrypt to */
+	ulint			space,		/*!< in: tablespace id */
+	byte*			src_frame,	/*!< in: page frame */
+	const page_size_t&	page_size,	/*!< in: page size */
+	byte*			dst_frame)	/*!< in: where to decrypt to */
 	__attribute__((warn_unused_result));
 
 /*********************************************************************
@@ -265,8 +269,9 @@ UNIV_INTERN
 bool
 fil_space_verify_crypt_checksum(
 /*============================*/
-	const byte* src_frame,/*!< in: page frame */
-	ulint zip_size);      /*!< in: size of data to encrypt */
+	const byte*		src_frame,/*!< in: page frame */
+	const page_size_t&	page_size)	/*!< in: page size */
+	__attribute__((warn_unused_result));
 
 /*********************************************************************
 Init threads for key rotation */
@@ -408,9 +413,9 @@ fil_encrypt_buf(
 	ulint		offset,		/*!< in: Page offset */
 	lsn_t		lsn,		/*!< in: lsn */
 	byte*		src_frame,	/*!< in: Source page to be encrypted */
-	ulint		zip_size,	/*!< in: compressed size if
-					row_format compressed */
-	byte*		dst_frame);	/*!< in: outbut buffer */
+	const page_size_t&	page_size,	/*!< in: page size */
+	byte*		dst_frame)	/*!< in: outbut buffer */
+	__attribute__((warn_unused_result));
 
 /******************************************************************
 Calculate post encryption checksum
@@ -420,8 +425,9 @@ UNIV_INTERN
 ulint
 fil_crypt_calculate_checksum(
 /*=========================*/
-	ulint	zip_size,	/*!< in: zip_size or 0 */
-	byte*	dst_frame);	/*!< in: page where to calculate */
+	const page_size_t&	page_size,	/*!< in: page size */
+	byte*	dst_frame)	/*!< in: page where to calculate */
+	__attribute__((warn_unused_result));
 
 #ifndef UNIV_NONINL
 #include "fil0crypt.ic"
diff --git a/storage/innobase/include/fil0crypt.ic b/storage/innobase/include/fil0crypt.ic
index 5fafa6cd3f0..65ca4def85f 100644
--- a/storage/innobase/include/fil0crypt.ic
+++ b/storage/innobase/include/fil0crypt.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2015, MariaDB Corporation.
+Copyright (c) 2015, 2016, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -17,7 +17,7 @@ this program; if not, write to the Free Software Foundation, Inc.,
 *****************************************************************************/
 
 /**************************************************//**
-@file include/fil0fil.h
+@file include/fil0crypt.ic
 The low-level file system encryption support functions
 
 Created 04/01/2015 Jan Lindström
@@ -66,3 +66,54 @@ fil_page_encryption_status(
 	}
 	return 0;
 }
+
+/*******************************************************************//**
+Get current encryption mode from crypt_data.
+@return string representation */
+UNIV_INLINE
+const char *
+fil_crypt_get_mode(
+/*===============*/
+	const fil_space_crypt_t* crypt_data)
+{
+	ut_ad(crypt_data != NULL);
+
+	switch(crypt_data->encryption) {
+	case FIL_SPACE_ENCRYPTION_DEFAULT:
+		return("Default tablespace encryption mode");
+		break;
+	case FIL_SPACE_ENCRYPTION_ON:
+		return("Tablespace encrypted");
+		break;
+	case FIL_SPACE_ENCRYPTION_OFF:
+		return("Tablespace not encrypted");
+		break;
+	default:
+		ut_error;
+	}
+
+	return ("NULL");
+}
+
+/*******************************************************************//**
+Get current encryption type from crypt_data.
+@return string representation */
+UNIV_INLINE
+const char *
+fil_crypt_get_type(
+	const fil_space_crypt_t* crypt_data)
+{
+	ut_ad(crypt_data != NULL);
+	switch (crypt_data->type) {
+	case CRYPT_SCHEME_UNENCRYPTED:
+		return("scheme unencrypted");
+		break;
+	case CRYPT_SCHEME_1:
+		return("scheme encrypted");
+		break;
+	default:
+		ut_error;
+	}
+
+	return ("NULL");
+}
diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h
index c97143235bc..4171bed1611 100644
--- a/storage/innobase/include/fil0fil.h
+++ b/storage/innobase/include/fil0fil.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2016, Oracle and/or its affiliates.
+Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2013, 2016, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
@@ -30,28 +30,395 @@ Created 10/25/1995 Heikki Tuuri
 
 #ifndef UNIV_INNOCHECKSUM
 
+#include "log0recv.h"
 #include "dict0types.h"
-#include "ut0byte.h"
-#include "os0file.h"
-#include "hash0hash.h"
+#include "page0size.h"
 #ifndef UNIV_HOTBACKUP
-#include "sync0rw.h"
 #include "ibuf0types.h"
+#else
 #include "log0log.h"
+#include "os0file.h"
+#include "m_string.h"
 #endif /* !UNIV_HOTBACKUP */
 
 #include <list>
+#include <vector>
+
+#ifdef UNIV_HOTBACKUP
+#include <cstring>
+/** determine if file is intermediate / temporary.These files are created during
+reorganize partition, rename tables, add / drop columns etc.
+@param[in]	filepath asbosolute / relative or simply file name
+@retvalue	true	if it is intermediate file
+@retvalue	false	if it is normal file */
+inline
+bool
+is_intermediate_file(const std::string& filepath)
+{
+	std::string file_name = filepath;
+
+	// extract file name from relative or absolute file name
+	std::size_t pos = file_name.rfind(OS_PATH_SEPARATOR);
+	if (pos != std::string::npos)
+		file_name = file_name.substr(++pos);
+
+	transform(file_name.begin(), file_name.end(),
+		file_name.begin(), ::tolower);
+
+	if (file_name[0] != '#') {
+		pos = file_name.rfind("#tmp#.ibd");
+		if (pos != std::string::npos)
+			return true;
+		else
+			return false;  /* normal file name */
+	}
+
+	std::vector<std::string> file_name_patterns = {"#sql-", "#sql2-",
+		"#tmp#", "#ren#"};
+
+	/* search for the unsupported patterns */
+	for (auto itr = file_name_patterns.begin();
+		itr != file_name_patterns.end();
+		itr++) {
+
+		if (0 == std::strncmp(file_name.c_str(),
+			itr->c_str(), itr->length())){
+			return true;
+		}
+	}
+
+	return false;
+}
+#endif /* UNIV_HOTBACKUP */
+
+extern const char general_space_name[];
 
 // Forward declaration
 struct trx_t;
+class page_id_t;
+class truncate_t;
+struct fil_node_t;
 struct fil_space_t;
+struct btr_create_t;
 
-typedef std::list<const char*> space_name_list_t;
+/* structure containing encryption specification */
+typedef struct fil_space_crypt_struct fil_space_crypt_t;
+
+typedef std::list<char*, ut_allocator<char*> >	space_name_list_t;
+
+/** File types */
+enum fil_type_t {
+	/** temporary tablespace (temporary undo log or tables) */
+	FIL_TYPE_TEMPORARY,
+	/** a tablespace that is being imported (no logging until finished) */
+	FIL_TYPE_IMPORT,
+	/** persistent tablespace (for system, undo log or tables) */
+	FIL_TYPE_TABLESPACE,
+	/** redo log covering changes to files of FIL_TYPE_TABLESPACE */
+	FIL_TYPE_LOG
+};
+
+/** Check if fil_type is any of FIL_TYPE_TEMPORARY, FIL_TYPE_IMPORT
+or FIL_TYPE_TABLESPACE.
+@param[in]	type	variable of type fil_type_t
+@return true if any of FIL_TYPE_TEMPORARY, FIL_TYPE_IMPORT
+or FIL_TYPE_TABLESPACE */
+inline
+bool
+fil_type_is_data(
+	fil_type_t	type)
+{
+	return(type == FIL_TYPE_TEMPORARY
+	       || type == FIL_TYPE_IMPORT
+	       || type == FIL_TYPE_TABLESPACE);
+}
+
+struct fil_node_t;
+
+/** Tablespace or log data space */
+struct fil_space_t {
+	char*		name;	/*!< Tablespace name */
+	ulint		id;	/*!< space id */
+	lsn_t		max_lsn;
+				/*!< LSN of the most recent
+				fil_names_write_if_was_clean().
+				Reset to 0 by fil_names_clear().
+				Protected by log_sys->mutex.
+				If and only if this is nonzero, the
+				tablespace will be in named_spaces. */
+	bool		stop_ios;/*!< true if we want to rename the
+				.ibd file of tablespace and want to
+				stop temporarily posting of new i/o
+				requests on the file */
+	bool		stop_new_ops;
+				/*!< we set this true when we start
+				deleting a single-table tablespace.
+				When this is set following new ops
+				are not allowed:
+				* read IO request
+				* ibuf merge
+				* file flush
+				Note that we can still possibly have
+				new write operations because we don't
+				check this flag when doing flush
+				batches. */
+	bool		is_being_truncated;
+				/*!< this is set to true when we prepare to
+				truncate a single-table tablespace and its
+				.ibd file */
+#ifdef UNIV_DEBUG
+	ulint		redo_skipped_count;
+				/*!< reference count for operations who want
+				to skip redo log in the file space in order
+				to make fsp_space_modify_check pass. */
+#endif
+	fil_type_t	purpose;/*!< purpose */
+	UT_LIST_BASE_NODE_T(fil_node_t) chain;
+				/*!< base node for the file chain */
+	ulint		size;	/*!< tablespace file size in pages;
+				0 if not known yet */
+	ulint		size_in_header;
+				/* FSP_SIZE in the tablespace header;
+				0 if not known yet */
+	ulint		free_len;
+				/*!< length of the FSP_FREE list */
+	ulint		free_limit;
+				/*!< contents of FSP_FREE_LIMIT */
+	ulint		flags;	/*!< tablespace flags; see
+				fsp_flags_is_valid(),
+				page_size_t(ulint) (constructor) */
+	ulint		n_reserved_extents;
+				/*!< number of reserved free extents for
+				ongoing operations like B-tree page split */
+	ulint		n_pending_flushes; /*!< this is positive when flushing
+				the tablespace to disk; dropping of the
+				tablespace is forbidden if this is positive */
+	ulint		n_pending_ops;/*!< this is positive when we
+				have pending operations against this
+				tablespace. The pending operations can
+				be ibuf merges or lock validation code
+				trying to read a block.
+				Dropping of the tablespace is forbidden
+				if this is positive.
+				Protected by fil_system->mutex. */
+	hash_node_t	hash;	/*!< hash chain node */
+	hash_node_t	name_hash;/*!< hash chain the name_hash table */
+#ifndef UNIV_HOTBACKUP
+	rw_lock_t	latch;	/*!< latch protecting the file space storage
+				allocation */
+#endif /* !UNIV_HOTBACKUP */
+	UT_LIST_NODE_T(fil_space_t) unflushed_spaces;
+				/*!< list of spaces with at least one unflushed
+				file we have written to */
+	UT_LIST_NODE_T(fil_space_t) named_spaces;
+				/*!< list of spaces for which MLOG_FILE_NAME
+				records have been issued */
+	bool		is_in_unflushed_spaces;
+				/*!< true if this space is currently in
+				unflushed_spaces */
+	UT_LIST_NODE_T(fil_space_t) space_list;
+				/*!< list of all spaces */
+
+	/** Compression algorithm */
+	Compression::Type	compression_type;
+
+	/** Encryption algorithm */
+	Encryption::Type	encryption_type;
+
+	/** Encrypt key */
+	byte			encryption_key[ENCRYPTION_KEY_LEN];
+
+	/** Encrypt key length*/
+	ulint			encryption_klen;
+
+	/** Encrypt initial vector */
+	byte			encryption_iv[ENCRYPTION_KEY_LEN];
+
+	/** MariaDB encryption data */
+        fil_space_crypt_t* crypt_data;
+
+	/** Space file block size */
+	ulint		file_block_size;
+
+	/** True if we have already printed compression failure */
+	bool		printed_compression_failure;
+
+	/** True if page 0 of tablespace is read */
+	bool		read_page0;
+
+	/** Release the reserved free extents.
+	@param[in]	n_reserved	number of reserved extents */
+	void release_free_extents(ulint n_reserved);
+
+	ulint		magic_n;/*!< FIL_SPACE_MAGIC_N */
+};
+
+/** Value of fil_space_t::magic_n */
+#define	FIL_SPACE_MAGIC_N	89472
+
+/** File node of a tablespace or the log data space */
+struct fil_node_t {
+	/** tablespace containing this file */
+	fil_space_t*	space;
+	/** file name; protected by fil_system->mutex and log_sys->mutex. */
+	char*		name;
+	/** whether this file is open */
+	bool		is_open;
+	/** file handle (valid if is_open) */
+	os_file_t	handle;
+	/** event that groups and serializes calls to fsync */
+	os_event_t	sync_event;
+	/** whether the file actually is a raw device or disk partition */
+	bool		is_raw_disk;
+	/** size of the file in database pages (0 if not known yet);
+	the possible last incomplete megabyte may be ignored
+	if space->id == 0 */
+	ulint		size;
+	/** initial size of the file in database pages;
+	FIL_IBD_FILE_INITIAL_SIZE by default */
+	ulint		init_size;
+	/** maximum size of the file in database pages (0 if unlimited) */
+	ulint		max_size;
+	/** count of pending i/o's; is_open must be true if nonzero */
+	ulint		n_pending;
+	/** count of pending flushes; is_open must be true if nonzero */
+	ulint		n_pending_flushes;
+	/** whether the file is currently being extended */
+	bool		being_extended;
+	/** number of writes to the file since the system was started */
+	int64_t		modification_counter;
+	/** the modification_counter of the latest flush to disk */
+	int64_t		flush_counter;
+	/** link to other files in this tablespace */
+	UT_LIST_NODE_T(fil_node_t) chain;
+	/** link to the fil_system->LRU list (keeping track of open files) */
+	UT_LIST_NODE_T(fil_node_t) LRU;
+
+	/** whether the file system of this file supports PUNCH HOLE */
+	bool		punch_hole;
+
+	/** block size to use for punching holes */
+	ulint		block_size;
+
+	/** whether atomic write is enabled for this file */
+	bool		atomic_write;
+
+	/** FIL_NODE_MAGIC_N */
+	ulint		magic_n;
+};
+
+/** Value of fil_node_t::magic_n */
+#define	FIL_NODE_MAGIC_N	89389
+
+/** Common InnoDB file extentions */
+enum ib_extention {
+	NO_EXT = 0,
+	IBD = 1,
+	ISL = 2,
+	CFG = 3,
+	CFP = 4
+};
+extern const char* dot_ext[];
+#define DOT_IBD dot_ext[IBD]
+#define DOT_ISL dot_ext[ISL]
+#define DOT_CFG dot_ext[CFG]
+#define DOT_CPF dot_ext[CFP]
+
+/** Wrapper for a path to a directory.
+This folder may or may not yet esist.  Since not all directory paths
+end in "/", we should only use this for a directory path or a filepath
+that has a ".ibd" extension. */
+class Folder
+{
+public:
+	/** Default constructor */
+	Folder() : m_folder(NULL) {}
+
+	/** Constructor
+	@param[in]	path	pathname (not necessarily NUL-terminated)
+	@param[in]	len	length of the path, in bytes */
+	Folder(const char* path, size_t len);
+
+	/** Assignment operator
+	@param[in]	folder	folder string provided */
+	class Folder& operator=(const char* path);
+
+	/** Destructor */
+	~Folder()
+	{
+		ut_free(m_folder);
+	}
+
+	/** Implicit type conversion
+	@return the wrapped object */
+	operator const char*() const
+	{
+		return(m_folder);
+	}
+
+	/** Explicit type conversion
+	@return the wrapped object */
+	const char* operator()() const
+	{
+		return(m_folder);
+	}
+
+	/** return the length of m_folder
+	@return the length of m_folder */
+	size_t len()
+	{
+		return m_folder_len;
+	}
+
+	/** Determine if two folders are equal
+	@param[in]	other	folder to compare to
+	@return whether the folders are equal */
+	bool operator==(const Folder& other) const;
+
+	/** Determine if the left folder is the same or an ancestor of
+	(contains) the right folder.
+	@param[in]	other	folder to compare to
+	@return whether this is the same or an ancestor or the other folder. */
+	bool operator>=(const Folder& other) const;
+
+	/** Determine if the left folder is an ancestor of (contains)
+	the right folder.
+	@param[in]	other	folder to compare to
+	@return whether this is an ancestor of the other folder */
+	bool operator>(const Folder& other) const;
+
+	/** Determine if the directory referenced by m_folder exists.
+	@return whether the directory exists */
+	bool exists();
+
+private:
+	/** Build the basic folder name from the path and length provided
+	@param[in]	path	pathname (not necessarily NUL-terminated)
+	@param[in]	len	length of the path, in bytes */
+	void	make_path(const char* path, size_t len);
+
+	/** Resolve a relative path in m_folder to an absolute path
+	in m_abs_path setting m_abs_len. */
+	void	make_abs_path();
+
+	/** The wrapped folder string */
+	char*	m_folder;
+
+	/** Length of m_folder */
+	size_t	m_folder_len;
+
+	/** A full absolute path to the same file. */
+	char	m_abs_path[FN_REFLEN + 2];
+
+	/** Length of m_abs_path to the deepest folder */
+	size_t	m_abs_len;
+};
 
 /** When mysqld is run, the default directory "." is the mysqld datadir,
 but in the MySQL Embedded Server Library and mysqlbackup it is not the default
 directory, and we must set the base file path explicitly */
 extern const char*	fil_path_to_mysql_datadir;
+extern Folder   	folder_mysql_datadir;
 
 /** Initial size of a single-table tablespace in pages */
 #define FIL_IBD_FILE_INITIAL_SIZE	4
@@ -66,17 +433,15 @@ of the address is FIL_NULL, the address is considered undefined. */
 typedef	byte	fil_faddr_t;	/*!< 'type' definition in C: an address
 				stored in a file page is a string of bytes */
 
-#endif /* !UNIV_INNOCHECKSUM */
-
 #define FIL_ADDR_PAGE	0	/* first in address is the page offset */
 #define	FIL_ADDR_BYTE	4	/* then comes 2-byte byte offset within page*/
-
+#endif /* !UNIV_INNOCHECKSUM */
 #define	FIL_ADDR_SIZE	6	/* address size is 6 bytes */
 
 #ifndef UNIV_INNOCHECKSUM
 
 /** File space address */
-struct fil_addr_t{
+struct fil_addr_t {
 	ulint	page;		/*!< page number within a space */
 	ulint	boffset;	/*!< byte offset within the page */
 };
@@ -135,11 +500,34 @@ extern fil_addr_t	fil_addr_null;
 					used to encrypt the page + 32-bit checksum
 					or 64 bits of zero if no encryption
 					*/
-#define FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID  34 /*!< starting from 4.1.x this
-					contains the space id of the page */
+/** If page type is FIL_PAGE_COMPRESSED then the 8 bytes starting at
+FIL_PAGE_FILE_FLUSH_LSN are broken down as follows: */
+
+/** Control information version format (u8) */
+static const ulint FIL_PAGE_VERSION = FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION;
+
+/** Compression algorithm (u8) */
+static const ulint FIL_PAGE_ALGORITHM_V1 = FIL_PAGE_VERSION + 1;
+
+/** Original page type (u16) */
+static const ulint FIL_PAGE_ORIGINAL_TYPE_V1 = FIL_PAGE_ALGORITHM_V1 + 1;
+
+/** Original data size in bytes (u16)*/
+static const ulint FIL_PAGE_ORIGINAL_SIZE_V1 = FIL_PAGE_ORIGINAL_TYPE_V1 + 2;
+
+/** Size after compression (u16) */
+static const ulint FIL_PAGE_COMPRESS_SIZE_V1 = FIL_PAGE_ORIGINAL_SIZE_V1 + 2;
+
+/** This overloads FIL_PAGE_FILE_FLUSH_LSN for RTREE Split Sequence Number */
+#define	FIL_RTREE_SPLIT_SEQ_NUM	FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION
+
+/** starting from 4.1.x this contains the space id of the page */
+#define FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID  34
+
 #define FIL_PAGE_SPACE_ID  FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID
 
-#define FIL_PAGE_DATA		38	/*!< start of the data on the page */
+#define FIL_PAGE_DATA		38U	/*!< start of the data on the page */
+
 /* Following are used when page compression is used */
 #define FIL_PAGE_COMPRESSED_SIZE 2      /*!< Number of bytes used to store
 					actual payload data size on
@@ -161,6 +549,7 @@ extern fil_addr_t	fil_addr_null;
 						 then encrypted */
 #define FIL_PAGE_PAGE_COMPRESSED 34354  /*!< page compressed page */
 #define FIL_PAGE_INDEX		17855	/*!< B-tree node */
+#define FIL_PAGE_RTREE		17854	/*!< B-tree node */
 #define FIL_PAGE_UNDO_LOG	2	/*!< Undo log page */
 #define FIL_PAGE_INODE		3	/*!< Index node */
 #define FIL_PAGE_IBUF_FREE_LIST	4	/*!< Insert buffer free list */
@@ -174,21 +563,31 @@ extern fil_addr_t	fil_addr_null;
 #define FIL_PAGE_TYPE_BLOB	10	/*!< Uncompressed BLOB page */
 #define FIL_PAGE_TYPE_ZBLOB	11	/*!< First compressed BLOB page */
 #define FIL_PAGE_TYPE_ZBLOB2	12	/*!< Subsequent compressed BLOB page */
-#define FIL_PAGE_TYPE_COMPRESSED	13	/*!< Compressed page */
-#define FIL_PAGE_TYPE_LAST	FIL_PAGE_TYPE_COMPRESSED
+#define FIL_PAGE_TYPE_UNKNOWN	13	/*!< In old tablespaces, garbage
+					in FIL_PAGE_TYPE is replaced with this
+					value when flushing pages. */
+#define FIL_PAGE_COMPRESSED	14	/*!< Compressed page */
+#define FIL_PAGE_ENCRYPTED	15	/*!< Encrypted page */
+#define FIL_PAGE_COMPRESSED_AND_ENCRYPTED 16
+					/*!< Compressed and Encrypted page */
+#define FIL_PAGE_ENCRYPTED_RTREE 17	/*!< Encrypted R-tree page */
+
+/** Used by i_s.cc to index into the text description. */
+#define FIL_PAGE_TYPE_LAST	FIL_PAGE_TYPE_UNKNOWN
 					/*!< Last page type */
 /* @} */
 
-/** Space types @{ */
-#define FIL_TABLESPACE		501	/*!< tablespace */
-#define FIL_LOG			502	/*!< redo log */
-/* @} */
+/** macro to check whether the page type is index (Btree or Rtree) type */
+#define fil_page_type_is_index(page_type)                          \
+        (page_type == FIL_PAGE_INDEX || page_type == FIL_PAGE_RTREE)
+
+/** Check whether the page is index page (either regular Btree index or Rtree
+index */
+#define fil_page_index_page_check(page)                         \
+        fil_page_type_is_index(fil_page_get_type(page))
 
 #ifndef UNIV_INNOCHECKSUM
 
-/* structure containing encryption specification */
-typedef struct fil_space_crypt_struct fil_space_crypt_t;
-
 /** The number of fsyncs done to the log */
 extern ulint	fil_n_log_flushes;
 
@@ -200,152 +599,26 @@ extern ulint	fil_n_pending_tablespace_flushes;
 /** Number of files currently open */
 extern ulint	fil_n_file_opened;
 
-struct fsp_open_info {
-	ibool		success;	/*!< Has the tablespace been opened? */
-	const char*	check_msg;	/*!< fil_check_first_page() message */
-	ibool		valid;		/*!< Is the tablespace valid? */
-	os_file_t	file;		/*!< File handle */
-	char*		filepath;	/*!< File path to open */
-	lsn_t		lsn;		/*!< Flushed LSN from header page */
-	ulint		id;		/*!< Space ID */
-	ulint		flags;		/*!< Tablespace flags */
-	ulint		encryption_error; /*!< if an encryption error occurs */
-#ifdef UNIV_LOG_ARCHIVE
-	ulint		arch_log_no;	/*!< latest archived log file number */
-#endif /* UNIV_LOG_ARCHIVE */
-	fil_space_crypt_t* crypt_data;	/*!< crypt data */
-	dict_table_t*	table;		/*!< table */
-};
-
-struct fil_space_t;
-
-/** File node of a tablespace or the log data space */
-struct fil_node_t {
-	fil_space_t*	space;	/*!< backpointer to the space where this node
-				belongs */
-	char*		name;	/*!< path to the file */
-	ibool		open;	/*!< TRUE if file open */
-	os_file_t	handle;	/*!< OS handle to the file, if file open */
-	os_event_t	sync_event;/*!< Condition event to group and
-				serialize calls to fsync */
-	ibool		is_raw_disk;/*!< TRUE if the 'file' is actually a raw
-				device or a raw disk partition */
-	ulint		size;	/*!< size of the file in database pages, 0 if
-				not known yet; the possible last incomplete
-				megabyte may be ignored if space == 0 */
-	ulint		n_pending;
-				/*!< count of pending i/o's on this file;
-				closing of the file is not allowed if
-				this is > 0 */
-	ulint		n_pending_flushes;
-				/*!< count of pending flushes on this file;
-				closing of the file is not allowed if
-				this is > 0 */
-	ibool		being_extended;
-				/*!< TRUE if the node is currently
-				being extended. */
-	ib_int64_t	modification_counter;/*!< when we write to the file we
-				increment this by one */
-	ib_int64_t	flush_counter;/*!< up to what
-				modification_counter value we have
-				flushed the modifications to disk */
-	ulint		file_block_size;/*!< file system block size */
-	UT_LIST_NODE_T(fil_node_t) chain;
-				/*!< link field for the file chain */
-	UT_LIST_NODE_T(fil_node_t) LRU;
-				/*!< link field for the LRU list */
-	ulint		magic_n;/*!< FIL_NODE_MAGIC_N */
-};
-
-/** Value of fil_node_t::magic_n */
-#define	FIL_NODE_MAGIC_N	89389
-
-/** Tablespace or log data space: let us call them by a common name space */
-struct fil_space_t {
-	char*		name;	/*!< space name = the path to the first file in
-				it */
-	ulint		id;	/*!< space id */
-	ib_int64_t	tablespace_version;
-				/*!< in DISCARD/IMPORT this timestamp
-				is used to check if we should ignore
-				an insert buffer merge request for a
-				page because it actually was for the
-				previous incarnation of the space */
-	ibool		mark;	/*!< this is set to TRUE at database startup if
-				the space corresponds to a table in the InnoDB
-				data dictionary; so we can print a warning of
-				orphaned tablespaces */
-	ibool		stop_ios;/*!< TRUE if we want to rename the
-				.ibd file of tablespace and want to
-				stop temporarily posting of new i/o
-				requests on the file */
-	ibool		stop_new_ops;
-				/*!< we set this TRUE when we start
-				deleting a single-table tablespace.
-				When this is set following new ops
-				are not allowed:
-				* read IO request
-				* ibuf merge
-				* file flush
-				Note that we can still possibly have
-				new write operations because we don't
-				check this flag when doing flush
-				batches. */
-	ulint		purpose;/*!< FIL_TABLESPACE, FIL_LOG, or
-				FIL_ARCH_LOG */
-	UT_LIST_BASE_NODE_T(fil_node_t) chain;
-				/*!< base node for the file chain */
-	ulint		size;	/*!< space size in pages; 0 if a single-table
-				tablespace whose size we do not know yet;
-				last incomplete megabytes in data files may be
-				ignored if space == 0 */
-	ulint		flags;	/*!< tablespace flags; see
-				fsp_flags_is_valid(),
-				fsp_flags_get_zip_size() */
-	ulint		n_reserved_extents;
-				/*!< number of reserved free extents for
-				ongoing operations like B-tree page split */
-	ulint		n_pending_flushes; /*!< this is positive when flushing
-				the tablespace to disk; dropping of the
-				tablespace is forbidden if this is positive */
-	ulint		n_pending_ops;/*!< this is positive when we
-				have pending operations against this
-				tablespace. The pending operations can
-				be ibuf merges or lock validation code
-				trying to read a block.
-				Dropping of the tablespace is forbidden
-				if this is positive */
-	hash_node_t	hash;	/*!< hash chain node */
-	hash_node_t	name_hash;/*!< hash chain the name_hash table */
-#ifndef UNIV_HOTBACKUP
-	rw_lock_t	latch;	/*!< latch protecting the file space storage
-				allocation */
-#endif /* !UNIV_HOTBACKUP */
-	UT_LIST_NODE_T(fil_space_t) unflushed_spaces;
-				/*!< list of spaces with at least one unflushed
-				file we have written to */
-	bool		is_in_unflushed_spaces;
-				/*!< true if this space is currently in
-				unflushed_spaces */
-	bool		printed_compression_failure;
-				/*!< true if we have already printed
-				compression failure */
-	UT_LIST_NODE_T(fil_space_t) space_list;
-				/*!< list of all spaces */
-        fil_space_crypt_t* crypt_data;
-	ulint		file_block_size;/*!< file system block size */
-	ulint		magic_n;/*!< FIL_SPACE_MAGIC_N */
-};
-
-/** Value of fil_space_t::magic_n */
-#define	FIL_SPACE_MAGIC_N	89472
+/** Look up a tablespace.
+The caller should hold an InnoDB table lock or a MDL that prevents
+the tablespace from being dropped during the operation,
+or the caller should be in single-threaded crash recovery mode
+(no user connections that could drop tablespaces).
+If this is not the case, fil_space_acquire() and fil_space_release()
+should be used instead.
+@param[in]	id	tablespace ID
+@return tablespace, or NULL if not found */
+fil_space_t*
+fil_space_get(
+	ulint	id)
+	MY_ATTRIBUTE((warn_unused_result));
 
 /** The tablespace memory cache; also the totality of logs (the log
 data space) is stored here; below we talk about tablespaces, but also
 the ib_logfiles form a 'space' and it is handled here */
 struct fil_system_t {
 #ifndef UNIV_HOTBACKUP
-	ib_mutex_t		mutex;		/*!< The mutex protecting the cache */
+	ib_mutex_t	mutex;		/*!< The mutex protecting the cache */
 #endif /* !UNIV_HOTBACKUP */
 	hash_table_t*	spaces;		/*!< The hash table of spaces in the
 					system; they are hashed on the space
@@ -372,7 +645,7 @@ struct fil_system_t {
 	ulint		n_open;		/*!< number of files currently open */
 	ulint		max_n_open;	/*!< n_open is not allowed to exceed
 					this */
-	ib_int64_t	modification_counter;/*!< when we write to a file we
+	int64_t		modification_counter;/*!< when we write to a file we
 					increment this by one */
 	ulint		max_assigned_id;/*!< maximum space id in the existing
 					tables, or assigned during the time
@@ -380,7 +653,7 @@ struct fil_system_t {
 					startup we scan the data dictionary
 					and set here the maximum of the
 					space id's of the tables there */
-	ib_int64_t	tablespace_version;
+	int64_t		tablespace_version;
 					/*!< a counter which is incremented for
 					every space object memory creation;
 					every space mem object gets a
@@ -390,6 +663,12 @@ struct fil_system_t {
 					request */
 	UT_LIST_BASE_NODE_T(fil_space_t) space_list;
 					/*!< list of all file spaces */
+	UT_LIST_BASE_NODE_T(fil_space_t) named_spaces;
+					/*!< list of all file spaces
+					for which a MLOG_FILE_NAME
+					record has been written since
+					the latest redo log checkpoint.
+					Protected only by log_sys->mutex. */
 	ibool		space_id_reuse_warned;
 					/* !< TRUE if fil_space_create()
 					has issued a warning about
@@ -400,103 +679,118 @@ struct fil_system_t {
 initialized. */
 extern fil_system_t*	fil_system;
 
+#include "fil0crypt.h"
+
 #ifndef UNIV_HOTBACKUP
-/*******************************************************************//**
-Returns the version number of a tablespace, -1 if not found.
-@return version number, -1 if the tablespace does not exist in the
-memory cache */
-UNIV_INTERN
-ib_int64_t
-fil_space_get_version(
-/*==================*/
-	ulint	id);	/*!< in: space id */
-/*******************************************************************//**
-Returns the latch of a file space.
-@return	latch protecting storage allocation */
-UNIV_INTERN
+/** Returns the latch of a file space.
+@param[in]	id	space id
+@param[out]	flags	tablespace flags
+@return latch protecting storage allocation */
 rw_lock_t*
 fil_space_get_latch(
-/*================*/
-	ulint	id,	/*!< in: space id */
-	ulint*	zip_size);/*!< out: compressed page size, or
-			0 for uncompressed tablespaces */
-/*******************************************************************//**
-Returns the type of a file space.
-@return	FIL_TABLESPACE or FIL_LOG */
-UNIV_INTERN
-ulint
-fil_space_get_type(
-/*===============*/
-	ulint	id);	/*!< in: space id */
+	ulint	id,
+	ulint*	flags);
 
+/** Gets the type of a file space.
+@param[in]	id	tablespace identifier
+@return file type */
+fil_type_t
+fil_space_get_type(
+	ulint	id);
+
+/** Note that a tablespace has been imported.
+It is initially marked as FIL_TYPE_IMPORT so that no logging is
+done during the import process when the space ID is stamped to each page.
+Now we change it to FIL_SPACE_TABLESPACE to start redo and undo logging.
+NOTE: temporary tablespaces are never imported.
+@param[in]	id	tablespace identifier */
+void
+fil_space_set_imported(
+	ulint	id);
+
+# ifdef UNIV_DEBUG
+/** Determine if a tablespace is temporary.
+@param[in]	id	tablespace identifier
+@return whether it is a temporary tablespace */
+bool
+fsp_is_temporary(ulint id)
+MY_ATTRIBUTE((warn_unused_result, pure));
+# endif /* UNIV_DEBUG */
 #endif /* !UNIV_HOTBACKUP */
-/*******************************************************************//**
-Appends a new file to the chain of files of a space. File must be closed.
-@return pointer to the file name, or NULL on error */
-UNIV_INTERN
+
+/** Append a file to the chain of files of a space.
+@param[in]	name		file name of a file that is not open
+@param[in]	size		file size in entire database blocks
+@param[in,out]	space		tablespace from fil_space_create()
+@param[in]	is_raw		whether this is a raw device or partition
+@param[in]	atomic_write	true if atomic write enabled
+@param[in]	max_pages	maximum number of pages in file,
+ULINT_MAX means the file size is unlimited.
+@return pointer to the file name
+@retval NULL if error */
 char*
 fil_node_create(
-/*============*/
-	const char*	name,	/*!< in: file name (file must be closed) */
-	ulint		size,	/*!< in: file size in database blocks, rounded
-				downwards to an integer */
-	ulint		id,	/*!< in: space id where to append */
-	ibool		is_raw)	/*!< in: TRUE if a raw device or
-				a raw disk partition */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
-#ifdef UNIV_LOG_ARCHIVE
-/****************************************************************//**
-Drops files from the start of a file space, so that its size is cut by
-the amount given. */
-UNIV_INTERN
-void
-fil_space_truncate_start(
-/*=====================*/
-	ulint	id,		/*!< in: space id */
-	ulint	trunc_len);	/*!< in: truncate by this much; it is an error
-				if this does not equal to the combined size of
-				some initial files in the space */
-#endif /* UNIV_LOG_ARCHIVE */
-/*******************************************************************//**
-Creates a space memory object and puts it to the 'fil system' hash table.
-If there is an error, prints an error message to the .err log.
-@return	TRUE if success */
-UNIV_INTERN
-ibool
+	const char*	name,
+	ulint		size,
+	fil_space_t*	space,
+	bool		is_raw,
+	bool		atomic_write,
+	ulint		max_pages = ULINT_MAX)
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Create a space memory object and put it to the fil_system hash table.
+The tablespace name is independent from the tablespace file-name.
+Error messages are issued to the server log.
+@param[in]	name	tablespace name
+@param[in]	id	tablespace identifier
+@param[in]	flags	tablespace flags
+@param[in]	purpose	tablespace purpose
+@return pointer to created tablespace, to be filled in with fil_node_create()
+@retval NULL on failure (such as when the same tablespace exists) */
+fil_space_t*
 fil_space_create(
-/*=============*/
-	const char*	name,	/*!< in: space name */
-	ulint		id,	/*!< in: space id */
-	ulint		zip_size,/*!< in: compressed page size, or
-				0 for uncompressed tablespaces */
-	ulint		purpose, /*!< in: FIL_TABLESPACE, or FIL_LOG if log */
-	fil_space_crypt_t* crypt_data); /*!< in: crypt data */
+	const char*	name,
+	ulint		id,
+	ulint		flags,
+	fil_type_t	purpose,	/*!< in: FIL_TABLESPACE, or FIL_LOG if log */
+	fil_space_crypt_t* crypt_data)	/*!< in: crypt data */
+	MY_ATTRIBUTE((warn_unused_result));
 
 /*******************************************************************//**
 Assigns a new space id for a new single-table tablespace. This works simply by
 incrementing the global counter. If 4 billion id's is not enough, we may need
 to recycle id's.
-@return	TRUE if assigned, FALSE if not */
-UNIV_INTERN
-ibool
+@return true if assigned, false if not */
+bool
 fil_assign_new_space_id(
 /*====================*/
 	ulint*	space_id);	/*!< in/out: space id */
-/*******************************************************************//**
-Returns the path from the first fil_node_t found for the space ID sent.
+
+/** Frees a space object from the tablespace memory cache.
+Closes the files in the chain but does not delete them.
+There must not be any pending i/o's or flushes on the files.
+@param[in]	id		tablespace identifier
+@param[in]	x_latched	whether the caller holds X-mode space->latch
+@return true if success */
+bool
+fil_space_free(
+	ulint		id,
+	bool		x_latched);
+
+/** Returns the path from the first fil_node_t found with this space ID.
 The caller is responsible for freeing the memory allocated here for the
 value returned.
-@return	a copy of fil_node_t::path, NULL if space is zero or not found. */
-UNIV_INTERN
+@param[in]	id	Tablespace ID
+@return own: A copy of fil_node_t::path, NULL if space ID is zero
+or not found. */
 char*
 fil_space_get_first_path(
-/*=====================*/
-	ulint	id);	/*!< in: space id */
+	ulint		id);
+
 /*******************************************************************//**
 Returns the size of the space in pages. The tablespace must be cached in the
 memory cache.
-@return	space size, 0 if space not found */
-UNIV_INTERN
+@return space size, 0 if space not found */
 ulint
 fil_space_get_size(
 /*===============*/
@@ -504,34 +798,44 @@ fil_space_get_size(
 /*******************************************************************//**
 Returns the flags of the space. The tablespace must be cached
 in the memory cache.
-@return	flags, ULINT_UNDEFINED if space not found */
-UNIV_INTERN
+@return flags, ULINT_UNDEFINED if space not found */
 ulint
 fil_space_get_flags(
 /*================*/
 	ulint	id);	/*!< in: space id */
-/*******************************************************************//**
-Returns the compressed page size of the space, or 0 if the space
-is not compressed. The tablespace must be cached in the memory cache.
-@return	compressed page size, ULINT_UNDEFINED if space not found */
-UNIV_INTERN
-ulint
-fil_space_get_zip_size(
-/*===================*/
-	ulint	id);	/*!< in: space id */
-/*******************************************************************//**
-Checks if the pair space, page_no refers to an existing page in a tablespace
-file space. The tablespace must be cached in the memory cache.
-@return	TRUE if the address is meaningful */
-UNIV_INTERN
-ibool
-fil_check_adress_in_tablespace(
-/*===========================*/
-	ulint	id,	/*!< in: space id */
-	ulint	page_no);/*!< in: page number */
+
+/** Check if table is mark for truncate.
+@param[in]	id	space id
+@return true if tablespace is marked for truncate. */
+bool
+fil_space_is_being_truncated(
+	ulint id);
+
+/** Open each fil_node_t of a named fil_space_t if not already open.
+@param[in]	name	Tablespace name
+@return true if all file nodes are opened. */
+bool
+fil_space_open(
+	const char*	name);
+
+/** Close each fil_node_t of a named fil_space_t if open.
+@param[in]	name	Tablespace name */
+void
+fil_space_close(
+	const char*	name);
+
+/** Returns the page size of the space and whether it is compressed or not.
+The tablespace must be cached in the memory cache.
+@param[in]	id	space id
+@param[out]	found	true if tablespace was found
+@return page size */
+const page_size_t
+fil_space_get_page_size(
+	ulint	id,
+	bool*	found);
+
 /****************************************************************//**
 Initializes the tablespace memory cache. */
-UNIV_INTERN
 void
 fil_init(
 /*=====*/
@@ -539,7 +843,6 @@ fil_init(
 	ulint	max_n_open);	/*!< in: max number of open files */
 /*******************************************************************//**
 Initializes the tablespace memory cache. */
-UNIV_INTERN
 void
 fil_close(void);
 /*===========*/
@@ -549,21 +852,18 @@ database server shutdown. This should be called at a server startup after the
 space objects for the log and the system tablespace have been created. The
 purpose of this operation is to make sure we never run out of file descriptors
 if we need to read from the insert buffer or to write to the log. */
-UNIV_INTERN
 void
 fil_open_log_and_system_tablespace_files(void);
 /*==========================================*/
 /*******************************************************************//**
 Closes all open files. There must not be any pending i/o's or not flushed
 modifications in the files. */
-UNIV_INTERN
 void
 fil_close_all_files(void);
 /*=====================*/
 /*******************************************************************//**
 Closes the redo log files. There must not be any pending i/o's or not
 flushed modifications in the files. */
-UNIV_INTERN
 void
 fil_close_log_files(
 /*================*/
@@ -571,112 +871,198 @@ fil_close_log_files(
 /*******************************************************************//**
 Sets the max tablespace id counter if the given number is bigger than the
 previous value. */
-UNIV_INTERN
 void
 fil_set_max_space_id_if_bigger(
 /*===========================*/
 	ulint	max_id);/*!< in: maximum known id */
 #ifndef UNIV_HOTBACKUP
-/****************************************************************//**
-Writes the flushed lsn and the latest archived log number to the page
-header of the first page of each data file in the system tablespace.
-@return	DB_SUCCESS or error number */
-UNIV_INTERN
+
+/** Write the flushed LSN to the page header of the first page in the
+system tablespace.
+@param[in]	lsn	flushed LSN
+@return DB_SUCCESS or error number */
 dberr_t
-fil_write_flushed_lsn_to_data_files(
-/*================================*/
-	lsn_t	lsn,		/*!< in: lsn to write */
-	ulint	arch_log_no);	/*!< in: latest archived log file number */
-/*******************************************************************//**
-Reads the flushed lsn, arch no, and tablespace flag fields from a data
-file at database startup.
-@retval NULL on success, or if innodb_force_recovery is set
-@return pointer to an error message string */
-UNIV_INTERN
-const char*
-fil_read_first_page(
-/*================*/
-	os_file_t	data_file,		/*!< in: open data file */
-	ibool		one_read_already,	/*!< in: TRUE if min and max
-						parameters below already
-						contain sensible data */
-	ulint*		flags,			/*!< out: tablespace flags */
-	ulint*		space_id,		/*!< out: tablespace ID */
-#ifdef UNIV_LOG_ARCHIVE
-	ulint*		min_arch_log_no,	/*!< out: min of archived
-						log numbers in data files */
-	ulint*		max_arch_log_no,	/*!< out: max of archived
-						log numbers in data files */
-#endif /* UNIV_LOG_ARCHIVE */
-	lsn_t*		min_flushed_lsn,	/*!< out: min of flushed
-						lsn values in data files */
-	lsn_t*		max_flushed_lsn,	/*!< out: max of flushed
-						lsn values in data files */
-	fil_space_crypt_t** crypt_data)		/*!< out: crypt data */
+fil_write_flushed_lsn(
+	lsn_t	lsn)
+MY_ATTRIBUTE((warn_unused_result));
 
-	__attribute__((warn_unused_result));
-/*******************************************************************//**
-Increments the count of pending operation, if space is not being deleted.
-@return	TRUE if being deleted, and operation should be skipped */
-UNIV_INTERN
-ibool
-fil_inc_pending_ops(
-/*================*/
-	ulint	id,		/*!< in: space id */
-	ibool	print_err);	/*!< in: need to print error or not */
-/*******************************************************************//**
-Decrements the count of pending operations. */
-UNIV_INTERN
+/** Acquire a tablespace when it could be dropped concurrently.
+Used by background threads that do not necessarily hold proper locks
+for concurrency control.
+@param[in]	id	tablespace ID
+@return the tablespace, or NULL if missing or being deleted */
+fil_space_t*
+fil_space_acquire(
+	ulint	id)
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Acquire a tablespace that may not exist.
+Used by background threads that do not necessarily hold proper locks
+for concurrency control.
+@param[in]	id	tablespace ID
+@return the tablespace, or NULL if missing or being deleted */
+fil_space_t*
+fil_space_acquire_silent(
+	ulint	id)
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Release a tablespace acquired with fil_space_acquire().
+@param[in,out]	space	tablespace to release  */
 void
-fil_decr_pending_ops(
-/*=================*/
-	ulint	id);	/*!< in: space id */
+fil_space_release(
+	fil_space_t*	space);
+
+/** Wrapper with reference-counting for a fil_space_t. */
+class FilSpace
+{
+public:
+	/** Default constructor: Use this when reference counting
+	is done outside this wrapper. */
+	FilSpace() : m_space(NULL) {}
+
+	/** Constructor: Look up the tablespace and increment the
+	referece count if found.
+	@param[in]	space_id	tablespace ID */
+	explicit FilSpace(ulint space_id)
+		: m_space(fil_space_acquire(space_id)) {}
+
+	/** Assignment operator: This assumes that fil_space_acquire()
+	has already been done for the fil_space_t. The caller must
+	assign NULL if it calls fil_space_release().
+	@param[in]	space	tablespace to assign */
+	class FilSpace& operator=(
+		fil_space_t*	space)
+	{
+		/* fil_space_acquire() must have been invoked. */
+		ut_ad(space == NULL || space->n_pending_ops > 0);
+		m_space = space;
+		return(*this);
+	}
+
+	/** Destructor - Decrement the reference count if a fil_space_t
+	is still assigned. */
+	~FilSpace()
+	{
+		if (m_space != NULL) {
+			fil_space_release(m_space);
+		}
+	}
+
+	/** Implicit type conversion
+	@return the wrapped object */
+	operator const fil_space_t*() const
+	{
+		return(m_space);
+	}
+
+	/** Explicit type conversion
+	@return the wrapped object */
+	const fil_space_t* operator()() const
+	{
+		return(m_space);
+	}
+
+private:
+	/** The wrapped pointer */
+	fil_space_t*	m_space;
+};
+
 #endif /* !UNIV_HOTBACKUP */
-/*******************************************************************//**
-Parses the body of a log record written about an .ibd file operation. That is,
-the log record part after the standard (type, space id, page no) header of the
-log record.
 
-If desired, also replays the delete or rename operation if the .ibd file
-exists and the space id in it matches. Replays the create operation if a file
-at that path does not exist yet. If the database directory for the file to be
-created does not exist, then we create the directory, too.
+/********************************************************//**
+Creates the database directory for a table if it does not exist yet. */
+void
+fil_create_directory_for_tablename(
+/*===============================*/
+	const char*	name);	/*!< in: name in the standard
+				'databasename/tablename' format */
+/********************************************************//**
+Recreates table indexes by applying
+TRUNCATE log record during recovery.
+@return DB_SUCCESS or error code */
+dberr_t
+fil_recreate_table(
+/*===============*/
+	ulint			space_id,	/*!< in: space id */
+	ulint			format_flags,	/*!< in: page format */
+	ulint			flags,		/*!< in: tablespace flags */
+	const char*		name,		/*!< in: table name */
+	truncate_t&		truncate);	/*!< in/out: The information of
+						TRUNCATE log record */
+/********************************************************//**
+Recreates the tablespace and table indexes by applying
+TRUNCATE log record during recovery.
+@return DB_SUCCESS or error code */
+dberr_t
+fil_recreate_tablespace(
+/*====================*/
+	ulint			space_id,	/*!< in: space id */
+	ulint			format_flags,	/*!< in: page format */
+	ulint			flags,		/*!< in: tablespace flags */
+	const char*		name,		/*!< in: table name */
+	truncate_t&		truncate,	/*!< in/out: The information of
+						TRUNCATE log record */
+	lsn_t			recv_lsn);	/*!< in: the end LSN of
+						the log record */
+/** Replay a file rename operation if possible.
+@param[in]	space_id	tablespace identifier
+@param[in]	first_page_no	first page number in the file
+@param[in]	name		old file name
+@param[in]	new_name	new file name
+@return	whether the operation was successfully applied
+(the name did not exist, or new_name did not exist and
+name was successfully renamed to new_name)  */
+bool
+fil_op_replay_rename(
+	ulint		space_id,
+	ulint		first_page_no,
+	const char*	name,
+	const char*	new_name)
+	MY_ATTRIBUTE((warn_unused_result));
 
-Note that mysqlbackup --apply-log sets fil_path_to_mysql_datadir to point to
-the datadir that we should use in replaying the file operations.
-@return end of log record, or NULL if the record was not completely
-contained between ptr and end_ptr */
-UNIV_INTERN
-byte*
-fil_op_log_parse_or_replay(
-/*=======================*/
-	byte*	ptr,		/*!< in: buffer containing the log record body,
-				or an initial segment of it, if the record does
-				not fir completely between ptr and end_ptr */
-	byte*	end_ptr,	/*!< in: buffer end */
-	ulint	type,		/*!< in: the type of this log record */
-	ulint	space_id,	/*!< in: the space id of the tablespace in
-				question, or 0 if the log record should
-				only be parsed but not replayed */
-	ulint	log_flags);	/*!< in: redo log flags
-				(stored in the page number parameter) */
-/*******************************************************************//**
-Deletes a single-table tablespace. The tablespace must be cached in the
-memory cache.
-@return	TRUE if success */
-UNIV_INTERN
+/** Deletes an IBD tablespace, either general or single-table.
+The tablespace must be cached in the memory cache. This will delete the
+datafile, fil_space_t & fil_node_t entries from the file_system_t cache.
+@param[in]	space_id	Tablespace id
+@param[in]	buf_remove	Specify the action to take on the pages
+for this table in the buffer pool.
+@return true if success */
 dberr_t
 fil_delete_tablespace(
-/*==================*/
-	ulint		id,		/*!< in: space id */
-	buf_remove_t	buf_remove);	/*!< in: specify the action to take
-					on the tables pages in the buffer
-					pool */
+	ulint		id,
+	buf_remove_t	buf_remove);
+
+/** Truncate the tablespace to needed size.
+@param[in]	space_id	id of tablespace to truncate
+@param[in]	size_in_pages	truncate size.
+@return true if truncate was successful. */
+bool
+fil_truncate_tablespace(
+	ulint		space_id,
+	ulint		size_in_pages);
+
+/*******************************************************************//**
+Prepare for truncating a single-table tablespace. The tablespace
+must be cached in the memory cache.
+1) Check pending operations on a tablespace;
+2) Remove all insert buffer entries for the tablespace;
+@return DB_SUCCESS or error */
+dberr_t
+fil_prepare_for_truncate(
+/*=====================*/
+	ulint	id);			/*!< in: space id */
+/**********************************************************************//**
+Reinitialize the original tablespace header with the same space id
+for single tablespace */
+void
+fil_reinit_space_header(
+/*====================*/
+	ulint		id,	/*!< in: space id */
+	ulint		size);	/*!< in: size in blocks */
 /*******************************************************************//**
 Closes a single-table tablespace. The tablespace must be cached in the
 memory cache. Free all pages used by the tablespace.
-@return	DB_SUCCESS or error */
-UNIV_INTERN
+@return DB_SUCCESS or error */
 dberr_t
 fil_close_tablespace(
 /*=================*/
@@ -695,9 +1081,8 @@ memory cache. Discarding is like deleting a tablespace, but
  3. When the user does IMPORT TABLESPACE, the tablespace will have the
     same id as it originally had.
 
- 4. Free all the pages in use by the tablespace if rename=TRUE.
-@return	DB_SUCCESS or error */
-UNIV_INTERN
+ 4. Free all the pages in use by the tablespace if rename=true.
+@return DB_SUCCESS or error */
 dberr_t
 fil_discard_tablespace(
 /*===================*/
@@ -719,105 +1104,56 @@ fil_rename_tablespace_check(
 	const char*	new_path,
 	bool		is_discarded);
 
-/*******************************************************************//**
-Renames a single-table tablespace. The tablespace must be cached in the
-tablespace memory cache.
-@return	TRUE if success */
-UNIV_INTERN
-ibool
+/** Rename a single-table tablespace.
+The tablespace must exist in the memory cache.
+@param[in]	id		tablespace identifier
+@param[in]	old_path	old file name
+@param[in]	new_name	new table name in the
+databasename/tablename format
+@param[in]	new_path_in	new file name,
+or NULL if it is located in the normal data directory
+@return true if success */
+bool
 fil_rename_tablespace(
-/*==================*/
-	const char*	old_name_in,	/*!< in: old table name in the
-					standard databasename/tablename
-					format of InnoDB, or NULL if we
-					do the rename based on the space
-					id only */
-	ulint		id,		/*!< in: space id */
-	const char*	new_name,	/*!< in: new table name in the
-					standard databasename/tablename
-					format of InnoDB */
-	const char*	new_path);	/*!< in: new full datafile path
-					if the tablespace is remotely
-					located, or NULL if it is located
-					in the normal data directory. */
+	ulint		id,
+	const char*	old_path,
+	const char*	new_name,
+	const char*	new_path_in);
 
 /*******************************************************************//**
-Allocates a file name for a single-table tablespace. The string must be freed
-by caller with mem_free().
-@return	own: file name */
-UNIV_INTERN
+Allocates and builds a file name from a path, a table or tablespace name
+and a suffix. The string must be freed by caller with ut_free().
+@param[in] path NULL or the direcory path or the full path and filename.
+@param[in] name NULL if path is full, or Table/Tablespace name
+@param[in] suffix NULL or the file extention to use.
+@return own: file name */
 char*
-fil_make_ibd_name(
-/*==============*/
-	const char*	name,		/*!< in: table name or a dir path */
-	bool		is_full_path);	/*!< in: TRUE if it is a dir path */
-/*******************************************************************//**
-Allocates a file name for a tablespace ISL file (InnoDB Symbolic Link).
-The string must be freed by caller with mem_free().
-@return	own: file name */
-UNIV_INTERN
-char*
-fil_make_isl_name(
-/*==============*/
-	const char*	name);	/*!< in: table name */
-/*******************************************************************//**
-Creates a new InnoDB Symbolic Link (ISL) file.  It is always created
-under the 'datadir' of MySQL. The datadir is the directory of a
-running mysqld program. We can refer to it by simply using the path '.'.
-@return	DB_SUCCESS or error code */
-UNIV_INTERN
+fil_make_filepath(
+	const char*	path,
+	const char*	name,
+	ib_extention	suffix,
+	bool		strip_name);
+
+/** Creates a new General or Single-Table tablespace
+@param[in]	space_id	Tablespace ID
+@param[in]	name		Tablespace name in dbname/tablename format.
+For general tablespaces, the 'dbname/' part may be missing.
+@param[in]	path		Path and filename of the datafile to create.
+@param[in]	flags		Tablespace flags
+@param[in]	size		Initial size of the tablespace file in pages,
+must be >= FIL_IBD_FILE_INITIAL_SIZE
+@return DB_SUCCESS or error code */
 dberr_t
-fil_create_link_file(
-/*=================*/
-	const char*	tablename,	/*!< in: tablename */
-	const char*	filepath);	/*!< in: pathname of tablespace */
-/*******************************************************************//**
-Deletes an InnoDB Symbolic Link (ISL) file. */
-UNIV_INTERN
-void
-fil_delete_link_file(
-/*==================*/
-	const char*	tablename);	/*!< in: name of table */
-/*******************************************************************//**
-Reads an InnoDB Symbolic Link (ISL) file.
-It is always created under the 'datadir' of MySQL.  The name is of the
-form {databasename}/{tablename}. and the isl file is expected to be in a
-'{databasename}' directory called '{tablename}.isl'. The caller must free
-the memory of the null-terminated path returned if it is not null.
-@return	own: filepath found in link file, NULL if not found. */
-UNIV_INTERN
-char*
-fil_read_link_file(
-/*===============*/
-	const char*	name);		/*!< in: tablespace name */
-
-#include "fil0crypt.h"
-
-/*******************************************************************//**
-Creates a new single-table tablespace to a database directory of MySQL.
-Database directories are under the 'datadir' of MySQL. The datadir is the
-directory of a running mysqld program. We can refer to it by simply the
-path '.'. Tables created with CREATE TEMPORARY TABLE we place in the temp
-dir of the mysqld server.
-@return	DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fil_create_new_single_table_tablespace(
-/*===================================*/
-	ulint		space_id,	/*!< in: space id */
-	const char*	tablename,	/*!< in: the table name in the usual
-					databasename/tablename format
-					of InnoDB */
-	const char*	dir_path,	/*!< in: NULL or a dir path */
-	ulint		flags,		/*!< in: tablespace flags */
-	ulint		flags2,		/*!< in: table flags2 */
-	ulint		size,		/*!< in: the initial size of the
-					tablespace file in pages,
-					must be >= FIL_IBD_FILE_INITIAL_SIZE */
+fil_ibd_create(
+	ulint		space_id,
+	const char*	name,
+	const char*	path,
+	ulint		flags,
+	ulint		size,
 	fil_encryption_t mode,	/*!< in: encryption mode */
-	ulint		key_id)	/*!< in: encryption key_id */
-	__attribute__((nonnull, warn_unused_result));
-#ifndef UNIV_HOTBACKUP
+	ulint		key_id) /*!< in: encryption key_id */
+	MY_ATTRIBUTE((warn_unused_result));
+
 /********************************************************************//**
 Tries to open a single-table tablespace and optionally checks the space id is
 right in it. If does not succeed, prints an error message to the .err log. This
@@ -832,80 +1168,85 @@ If the validate boolean is set, we read the first page of the file and
 check that the space id in the file is what we expect. We assume that
 this function runs much faster if no check is made, since accessing the
 file inode probably is much faster (the OS caches them) than accessing
-the first page of the file.  This boolean may be initially FALSE, but if
+the first page of the file.  This boolean may be initially false, but if
 a remote tablespace is found it will be changed to true.
 
 If the fix_dict boolean is set, then it is safe to use an internal SQL
 statement to update the dictionary tables if they are incorrect.
 
-@return	DB_SUCCESS or error code */
-UNIV_INTERN
+@param[in]	validate	true if we should validate the tablespace
+@param[in]	fix_dict	true if the dictionary is available to be fixed
+@param[in]	purpose		FIL_TYPE_TABLESPACE or FIL_TYPE_TEMPORARY
+@param[in]	id		tablespace ID
+@param[in]	flags		tablespace flags
+@param[in]	space_name	tablespace name of the datafile
+If file-per-table, it is the table name in the databasename/tablename format
+@param[in]	path_in		expected filepath, usually read from dictionary
+@return DB_SUCCESS or error code */
 dberr_t
-fil_open_single_table_tablespace(
-/*=============================*/
-	bool		validate,	/*!< in: Do we validate tablespace? */
-	bool		fix_dict,	/*!< in: Can we fix the dictionary? */
-	ulint		id,		/*!< in: space id */
-	ulint		flags,		/*!< in: tablespace flags */
-	const char*	tablename,	/*!< in: table name in the
-					databasename/tablename format */
-	const char*	filepath,	/*!< in: tablespace filepath */
-	dict_table_t*	table)		/*!< in: table */
-	__attribute__((nonnull(5), warn_unused_result));
+fil_ibd_open(
+	bool		validate,
+	bool		fix_dict,
+	fil_type_t	purpose,
+	ulint		id,
+	ulint		flags,
+	const char*	tablename,
+	const char*	path_in,
+	dict_table_t*	table)	/*!< in: table */
+	MY_ATTRIBUTE((warn_unused_result));
 
-#endif /* !UNIV_HOTBACKUP */
-/********************************************************************//**
-At the server startup, if we need crash recovery, scans the database
-directories under the MySQL datadir, looking for .ibd files. Those files are
-single-table tablespaces. We need to know the space id in each of them so that
-we know into which file we should look to check the contents of a page stored
-in the doublewrite buffer, also to know where to apply log records where the
-space id is != 0.
-@return	DB_SUCCESS or error number */
-UNIV_INTERN
-dberr_t
-fil_load_single_table_tablespaces(void);
-/*===================================*/
-/*******************************************************************//**
-Returns TRUE if a single-table tablespace does not exist in the memory cache,
-or is being deleted there.
-@return	TRUE if does not exist or is being deleted */
-UNIV_INTERN
-ibool
-fil_tablespace_deleted_or_being_deleted_in_mem(
-/*===========================================*/
-	ulint		id,	/*!< in: space id */
-	ib_int64_t	version);/*!< in: tablespace_version should be this; if
-				you pass -1 as the value of this, then this
-				parameter is ignored */
-/*******************************************************************//**
-Returns TRUE if a single-table tablespace exists in the memory cache.
-@return	TRUE if exists */
-UNIV_INTERN
-ibool
-fil_tablespace_exists_in_mem(
-/*=========================*/
-	ulint	id);	/*!< in: space id */
+enum fil_load_status {
+	/** The tablespace file(s) were found and valid. */
+	FIL_LOAD_OK,
+	/** The name no longer matches space_id */
+	FIL_LOAD_ID_CHANGED,
+	/** The file(s) were not found */
+	FIL_LOAD_NOT_FOUND,
+	/** The file(s) were not valid */
+	FIL_LOAD_INVALID
+};
+
+/** Open a single-file tablespace and add it to the InnoDB data structures.
+@param[in]	space_id	tablespace ID
+@param[in]	filename	path/to/databasename/tablename.ibd
+@param[out]	space		the tablespace, or NULL on error
+@return status of the operation */
+enum fil_load_status
+fil_ibd_load(
+	ulint		space_id,
+	const char*	filename,
+	fil_space_t*&	space)
+	MY_ATTRIBUTE((warn_unused_result));
+
+
+/***********************************************************************//**
+A fault-tolerant function that tries to read the next file name in the
+directory. We retry 100 times if os_file_readdir_next_file() returns -1. The
+idea is to read as much good data as we can and jump over bad data.
+@return 0 if ok, -1 if error even after the retries, 1 if at the end
+of the directory */
+int
+fil_file_readdir_next_file(
+/*=======================*/
+	dberr_t*	err,	/*!< out: this is set to DB_ERROR if an error
+				was encountered, otherwise not changed */
+	const char*	dirname,/*!< in: directory name or path */
+	os_file_dir_t	dir,	/*!< in: directory stream */
+	os_file_stat_t*	info);	/*!< in/out: buffer where the
+				info is returned */
 #ifndef UNIV_HOTBACKUP
 /*******************************************************************//**
-Returns TRUE if a matching tablespace exists in the InnoDB tablespace memory
+Returns true if a matching tablespace exists in the InnoDB tablespace memory
 cache. Note that if we have not done a crash recovery at the database startup,
 there may be many tablespaces which are not yet in the memory cache.
-@return	TRUE if a matching tablespace exists in the memory cache */
-UNIV_INTERN
-ibool
+@return true if a matching tablespace exists in the memory cache */
+bool
 fil_space_for_table_exists_in_mem(
 /*==============================*/
 	ulint		id,		/*!< in: space id */
 	const char*	name,		/*!< in: table name in the standard
 					'databasename/tablename' format */
-	ibool		mark_space,	/*!< in: in crash recovery, at database
-					startup we mark all spaces which have
-					an associated table in the InnoDB
-					data dictionary, so that
-					we can print a warning about orphaned
-					tablespaces */
-	ibool		print_error_if_does_not_exist,
+	bool		print_error_if_does_not_exist,
 					/*!< in: print detailed error
 					information to the .err log if a
 					matching tablespace is not found from
@@ -913,39 +1254,30 @@ fil_space_for_table_exists_in_mem(
 	bool		adjust_space,	/*!< in: whether to adjust space id
 					when find table space mismatch */
 	mem_heap_t*	heap,		/*!< in: heap memory */
-	table_id_t	table_id);	/*!< in: table id */
+	table_id_t	table_id,	/*!< in: table id */
+	dict_table_t*	table);		/*!< in: table or NULL */
 #else /* !UNIV_HOTBACKUP */
 /********************************************************************//**
 Extends all tablespaces to the size stored in the space header. During the
 mysqlbackup --apply-log phase we extended the spaces on-demand so that log
 records could be appllied, but that may have left spaces still too small
 compared to the size stored in the space header. */
-UNIV_INTERN
 void
 fil_extend_tablespaces_to_stored_len(void);
 /*======================================*/
 #endif /* !UNIV_HOTBACKUP */
-/**********************************************************************//**
-Tries to extend a data file so that it would accommodate the number of pages
-given. The tablespace must be cached in the memory cache. If the space is big
-enough already, does nothing.
-@return	TRUE if success */
-UNIV_INTERN
-ibool
-fil_extend_space_to_desired_size(
-/*=============================*/
-	ulint*	actual_size,	/*!< out: size of the space after extension;
-				if we ran out of disk space this may be lower
-				than the desired size */
-	ulint	space_id,	/*!< in: space id */
-	ulint	size_after_extend);/*!< in: desired size in pages after the
-				extension; if the current space size is bigger
-				than this already, the function does nothing */
+/** Try to extend a tablespace if it is smaller than the specified size.
+@param[in,out]	space	tablespace
+@param[in]	size	desired size in pages
+@return whether the tablespace is at least as big as requested */
+bool
+fil_space_extend(
+	fil_space_t*	space,
+	ulint		size);
 /*******************************************************************//**
 Tries to reserve free extents in a file space.
-@return	TRUE if succeed */
-UNIV_INTERN
-ibool
+@return true if succeed */
+bool
 fil_space_reserve_free_extents(
 /*===========================*/
 	ulint	id,		/*!< in: space id */
@@ -953,7 +1285,6 @@ fil_space_reserve_free_extents(
 	ulint	n_to_reserve);	/*!< in: how many one wants to reserve */
 /*******************************************************************//**
 Releases free extents in a file space. */
-UNIV_INTERN
 void
 fil_space_release_free_extents(
 /*===========================*/
@@ -962,56 +1293,51 @@ fil_space_release_free_extents(
 /*******************************************************************//**
 Gets the number of reserved extents. If the database is silent, this number
 should be zero. */
-UNIV_INTERN
 ulint
 fil_space_get_n_reserved_extents(
 /*=============================*/
 	ulint	id);		/*!< in: space id */
-/********************************************************************//**
-Reads or writes data. This operation is asynchronous (aio).
-@return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
-i/o on a tablespace which does not exist */
-UNIV_INTERN
+
+/** Reads or writes data. This operation could be asynchronous (aio).
+
+@param[in]	type		IO context
+@param[in]	sync		true if synchronous aio is desired
+@param[in]	page_id		page id
+@param[in]	page_size	page size
+@param[in]	byte_offset	remainder of offset in bytes; in aio this
+				must be divisible by the OS block size
+@param[in]	len		how many bytes to read or write; this must
+				not cross a file boundary; in aio this must
+				be a block size multiple
+@param[in,out]	buf		buffer where to store read data or from where
+				to write; in aio this must be appropriately
+				aligned
+@param[in]	message		message for aio handler if non-sync aio
+				used, else ignored
+@param[in,out]	write_size	Actual write size initialized
+				after fist successfull trim
+				operation for this page and if
+				nitialized we do not trim again if
+				Actual page
+
+@return DB_SUCCESS, DB_TABLESPACE_DELETED or DB_TABLESPACE_TRUNCATED
+if we are trying to do i/o on a tablespace which does not exist */
 dberr_t
 fil_io(
-/*===*/
-	ulint	type,		/*!< in: OS_FILE_READ or OS_FILE_WRITE,
-				ORed to OS_FILE_LOG, if a log i/o
-				and ORed to OS_AIO_SIMULATED_WAKE_LATER
-				if simulated aio and we want to post a
-				batch of i/os; NOTE that a simulated batch
-				may introduce hidden chances of deadlocks,
-				because i/os are not actually handled until
-				all have been posted: use with great
-				caution! */
-	bool	sync,		/*!< in: true if synchronous aio is desired */
-	ulint	space_id,	/*!< in: space id */
-	ulint	zip_size,	/*!< in: compressed page size in bytes;
-				0 for uncompressed pages */
-	ulint	block_offset,	/*!< in: offset in number of blocks */
-	ulint	byte_offset,	/*!< in: remainder of offset in bytes; in
-				aio this must be divisible by the OS block
-				size */
-	ulint	len,		/*!< in: how many bytes to read or write; this
-				must not cross a file boundary; in aio this
-				must be a block size multiple */
-	void*	buf,		/*!< in/out: buffer where to store read data
-				or from where to write; in aio this must be
-				appropriately aligned */
-	void*	message,	/*!< in: message for aio handler if non-sync
-				aio used, else ignored */
-	ulint*	write_size)	/*!< in/out: Actual write size initialized
-			       after fist successfull trim
-			       operation for this page and if
-			       initialized we do not trim again if
-			       actual page size does not decrease. */
-	__attribute__((nonnull(8)));
+	const IORequest&	type,
+	bool			sync,
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	ulint			byte_offset,
+	ulint			len,
+	void*			buf,
+	void*			message,
+	ulint*			write_size);
 /**********************************************************************//**
 Waits for an aio operation to complete. This function is used to write the
 handler for completed requests. The aio array of pending requests is divided
 into segments (see os0file.cc for more info). The thread specifies which
 segment it wants to wait for. */
-UNIV_INTERN
 void
 fil_aio_wait(
 /*=========*/
@@ -1020,82 +1346,135 @@ fil_aio_wait(
 /**********************************************************************//**
 Flushes to disk possible writes cached by the OS. If the space does not exist
 or is being dropped, does not do anything. */
-UNIV_INTERN
 void
 fil_flush(
 /*======*/
 	ulint	space_id);	/*!< in: file space id (this can be a group of
 				log files or a tablespace of the database) */
-/**********************************************************************//**
-Flushes to disk writes in file spaces of the given type possibly cached by
-the OS. */
-UNIV_INTERN
+/** Flush to disk the writes in file spaces of the given type
+possibly cached by the OS.
+@param[in]	purpose	FIL_TYPE_TABLESPACE or FIL_TYPE_LOG */
 void
 fil_flush_file_spaces(
-/*==================*/
-	ulint	purpose);	/*!< in: FIL_TABLESPACE, FIL_LOG */
+	fil_type_t	purpose);
 /******************************************************************//**
 Checks the consistency of the tablespace cache.
-@return	TRUE if ok */
-UNIV_INTERN
-ibool
+@return true if ok */
+bool
 fil_validate(void);
 /*==============*/
 /********************************************************************//**
-Returns TRUE if file address is undefined.
-@return	TRUE if undefined */
-UNIV_INTERN
-ibool
+Returns true if file address is undefined.
+@return true if undefined */
+bool
 fil_addr_is_null(
 /*=============*/
 	fil_addr_t	addr);	/*!< in: address */
 /********************************************************************//**
 Get the predecessor of a file page.
-@return	FIL_PAGE_PREV */
-UNIV_INTERN
+@return FIL_PAGE_PREV */
 ulint
 fil_page_get_prev(
 /*==============*/
 	const byte*	page);	/*!< in: file page */
 /********************************************************************//**
 Get the successor of a file page.
-@return	FIL_PAGE_NEXT */
-UNIV_INTERN
+@return FIL_PAGE_NEXT */
 ulint
 fil_page_get_next(
 /*==============*/
 	const byte*	page);	/*!< in: file page */
 /*********************************************************************//**
 Sets the file page type. */
-UNIV_INTERN
 void
 fil_page_set_type(
 /*==============*/
 	byte*	page,	/*!< in/out: file page */
 	ulint	type);	/*!< in: type */
-/*********************************************************************//**
-Gets the file page type.
-@return type; NOTE that if the type has not been written to page, the
-return value not defined */
-UNIV_INTERN
+/** Reset the page type.
+Data files created before MySQL 5.1 may contain garbage in FIL_PAGE_TYPE.
+In MySQL 3.23.53, only undo log pages and index pages were tagged.
+Any other pages were written with uninitialized bytes in FIL_PAGE_TYPE.
+@param[in]	page_id	page number
+@param[in,out]	page	page with invalid FIL_PAGE_TYPE
+@param[in]	type	expected page type
+@param[in,out]	mtr	mini-transaction */
+void
+fil_page_reset_type(
+	const page_id_t&	page_id,
+	byte*			page,
+	ulint			type,
+	mtr_t*			mtr);
+/** Get the file page type.
+@param[in]	page	file page
+@return page type */
+inline
 ulint
 fil_page_get_type(
-/*==============*/
-	const byte*	page);	/*!< in: file page */
+	const byte*	page)
+{
+	return(mach_read_from_2(page + FIL_PAGE_TYPE));
+}
+/** Check (and if needed, reset) the page type.
+Data files created before MySQL 5.1 may contain
+garbage in the FIL_PAGE_TYPE field.
+In MySQL 3.23.53, only undo log pages and index pages were tagged.
+Any other pages were written with uninitialized bytes in FIL_PAGE_TYPE.
+@param[in]	page_id	page number
+@param[in,out]	page	page with possibly invalid FIL_PAGE_TYPE
+@param[in]	type	expected page type
+@param[in,out]	mtr	mini-transaction */
+inline
+void
+fil_page_check_type(
+	const page_id_t&	page_id,
+	byte*			page,
+	ulint			type,
+	mtr_t*			mtr)
+{
+	ulint	page_type	= fil_page_get_type(page);
+
+	if (page_type != type) {
+		fil_page_reset_type(page_id, page, type, mtr);
+	}
+}
+
+/** Check (and if needed, reset) the page type.
+Data files created before MySQL 5.1 may contain
+garbage in the FIL_PAGE_TYPE field.
+In MySQL 3.23.53, only undo log pages and index pages were tagged.
+Any other pages were written with uninitialized bytes in FIL_PAGE_TYPE.
+@param[in,out]	block	block with possibly invalid FIL_PAGE_TYPE
+@param[in]	type	expected page type
+@param[in,out]	mtr	mini-transaction */
+#define fil_block_check_type(block, type, mtr)				\
+	fil_page_check_type(block->page.id, block->frame, type, mtr)
+
+#ifdef UNIV_DEBUG
+/** Increase redo skipped of a tablespace.
+@param[in]	id	space id */
+void
+fil_space_inc_redo_skipped_count(
+	ulint		id);
+
+/** Decrease redo skipped of a tablespace.
+@param[in]	id	space id */
+void
+fil_space_dec_redo_skipped_count(
+	ulint		id);
 
 /*******************************************************************//**
-Returns TRUE if a single-table tablespace is being deleted.
-@return TRUE if being deleted */
-UNIV_INTERN
-ibool
-fil_tablespace_is_being_deleted(
-/*============================*/
+Check whether a single-table tablespace is redo skipped.
+@return true if redo skipped */
+bool
+fil_space_is_redo_skipped(
+/*======================*/
 	ulint		id);	/*!< in: space id */
+#endif
 
 /********************************************************************//**
 Delete the tablespace file and any related files like .cfg.
 This should not be called for temporary tables. */
-UNIV_INTERN
 void
 fil_delete_file(
 /*============*/
@@ -1104,42 +1483,37 @@ fil_delete_file(
 /** Callback functor. */
 struct PageCallback {
 
-	/**
-	Default constructor */
+	/** Default constructor */
 	PageCallback()
 		:
-		m_zip_size(),
-		m_page_size(),
+		m_page_size(0, 0, false),
 		m_filepath() UNIV_NOTHROW {}
 
 	virtual ~PageCallback() UNIV_NOTHROW {}
 
-	/**
-	Called for page 0 in the tablespace file at the start.
-	@param file_size - size of the file in bytes
-	@param block - contents of the first page in the tablespace file
-	@retval DB_SUCCESS or error code.*/
+	/** Called for page 0 in the tablespace file at the start.
+	@param file_size size of the file in bytes
+	@param block contents of the first page in the tablespace file
+	@retval DB_SUCCESS or error code. */
 	virtual dberr_t init(
 		os_offset_t		file_size,
 		const buf_block_t*	block) UNIV_NOTHROW = 0;
 
-	/**
-	Called for every page in the tablespace. If the page was not
+	/** Called for every page in the tablespace. If the page was not
 	updated then its state must be set to BUF_PAGE_NOT_USED. For
 	compressed tables the page descriptor memory will be at offset:
-       		block->frame + UNIV_PAGE_SIZE;
-	@param offset - physical offset within the file
-	@param block - block read from file, note it is not from the buffer pool
+	block->frame + UNIV_PAGE_SIZE;
+	@param offset physical offset within the file
+	@param block block read from file, note it is not from the buffer pool
 	@retval DB_SUCCESS or error code. */
 	virtual dberr_t operator()(
 		os_offset_t 	offset,
 		buf_block_t*	block) UNIV_NOTHROW = 0;
 
-	/**
-	Set the name of the physical file and the file handle that is used
+	/** Set the name of the physical file and the file handle that is used
 	to open it for the file that is being iterated over.
-	@param filename - then physical name of the tablespace file.
-	@param file - OS file handle */
+	@param filename then physical name of the tablespace file.
+	@param file OS file handle */
 	void set_file(const char* filename, os_file_t file) UNIV_NOTHROW
 	{
 		m_file = file;
@@ -1150,30 +1524,23 @@ struct PageCallback {
 	@return the space id of the tablespace */
 	virtual ulint get_space_id() const UNIV_NOTHROW = 0;
 
-	/** The compressed page size
-	@return the compressed page size */
-	ulint get_zip_size() const
-	{
-		return(m_zip_size);
-	}
-
 	/**
-	Set the tablespace compressed table size.
-	@return DB_SUCCESS if it is valie or DB_CORRUPTION if not */
-	dberr_t set_zip_size(const buf_frame_t* page) UNIV_NOTHROW;
+	@retval the space flags of the tablespace being iterated over */
+	virtual ulint get_space_flags() const UNIV_NOTHROW = 0;
+
+	/** Set the tablespace table size.
+	@param[in] page a page belonging to the tablespace */
+	void set_page_size(const buf_frame_t* page) UNIV_NOTHROW;
 
 	/** The compressed page size
 	@return the compressed page size */
-	ulint get_page_size() const
+	const page_size_t& get_page_size() const
 	{
 		return(m_page_size);
 	}
 
-	/** Compressed table page size */
-	ulint			m_zip_size;
-
 	/** The tablespace page size. */
-	ulint			m_page_size;
+	page_size_t		m_page_size;
 
 	/** File handle to the tablespace */
 	os_file_t		m_file;
@@ -1189,36 +1556,52 @@ protected:
 
 /********************************************************************//**
 Iterate over all the pages in the tablespace.
-@param table - the table definiton in the server
-@param n_io_buffers - number of blocks to read and write together
-@param callback - functor that will do the page updates
-@return	DB_SUCCESS or error code */
-UNIV_INTERN
+@param table the table definiton in the server
+@param n_io_buffers number of blocks to read and write together
+@param callback functor that will do the page updates
+@return DB_SUCCESS or error code */
 dberr_t
 fil_tablespace_iterate(
 /*===================*/
 	dict_table_t*		table,
 	ulint			n_io_buffers,
 	PageCallback&		callback)
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 
-/*******************************************************************//**
-Checks if a single-table tablespace for a given table name exists in the
-tablespace memory cache.
-@return	space id, ULINT_UNDEFINED if not found */
-UNIV_INTERN
+/********************************************************************//**
+Looks for a pre-existing fil_space_t with the given tablespace ID
+and, if found, returns the name and filepath in newly allocated buffers that the caller must free.
+@param[in] space_id The tablespace ID to search for.
+@param[out] name Name of the tablespace found.
+@param[out] fileapth The filepath of the first datafile for thtablespace found.
+@return true if tablespace is found, false if not. */
+bool
+fil_space_read_name_and_filepath(
+	ulint	space_id,
+	char**	name,
+	char**	filepath);
+
+/** Convert a file name to a tablespace name.
+@param[in]	filename	directory/databasename/tablename.ibd
+@return database/tablename string, to be freed with ut_free() */
+char*
+fil_path_to_space_name(
+	const char*	filename);
+
+/** Returns the space ID based on the tablespace name.
+The tablespace must be found in the tablespace memory cache.
+This call is made from external to this module, so the mutex is not owned.
+@param[in]	tablespace	Tablespace name
+@return space ID if tablespace found, ULINT_UNDEFINED if space not. */
 ulint
-fil_get_space_id_for_table(
-/*=======================*/
-	const char*	name);	/*!< in: table name in the standard
-				'databasename/tablename' format */
+fil_space_get_id_by_name(
+	const char*	tablespace);
 
 /**
 Iterate over all the spaces in the space list and fetch the
 tablespace names. It will return a copy of the name that must be
 freed by the caller using: delete[].
 @return DB_SUCCESS if all OK. */
-UNIV_INTERN
 dberr_t
 fil_get_space_names(
 /*================*/
@@ -1226,50 +1609,31 @@ fil_get_space_names(
 				/*!< in/out: Vector for collecting the names. */
 	MY_ATTRIBUTE((warn_unused_result));
 
+/** Return the next fil_node_t in the current or next fil_space_t.
+Once started, the caller must keep calling this until it returns NULL.
+fil_space_acquire() and fil_space_release() are invoked here which
+blocks a concurrent operation from dropping the tablespace.
+@param[in]	prev_node	Pointer to the previous fil_node_t.
+If NULL, use the first fil_space_t on fil_system->space_list.
+@return pointer to the next fil_node_t.
+@retval NULL if this was the last file node */
+const fil_node_t*
+fil_node_next(
+	const fil_node_t*	prev_node);
+
 /** Generate redo log for swapping two .ibd files
 @param[in]	old_table	old table
 @param[in]	new_table	new table
 @param[in]	tmp_name	temporary table name
 @param[in,out]	mtr		mini-transaction
 @return innodb error code */
-UNIV_INTERN
 dberr_t
 fil_mtr_rename_log(
 	const dict_table_t*	old_table,
 	const dict_table_t*	new_table,
 	const char*		tmp_name,
 	mtr_t*			mtr)
-	MY_ATTRIBUTE((nonnull));
-
-/*******************************************************************//**
-Finds the given page_no of the given space id from the double write buffer,
-and copies it to the corresponding .ibd file.
-@return true if copy was successful, or false. */
-bool
-fil_user_tablespace_restore_page(
-/*==============================*/
-	fsp_open_info*	fsp,		/* in: contains space id and .ibd
-					file information */
-	ulint		page_no);	/* in: page_no to obtain from double
-					write buffer */
-
-/*******************************************************************//**
-Return space flags */
-UNIV_INLINE
-ulint
-fil_space_flags(
-/*===========*/
-	fil_space_t*	space);	/*!< in: space */
-
-/*******************************************************************//**
-Returns a pointer to the file_space_t that is in the memory cache
-associated with a space id.
-@return	file_space_t pointer, NULL if space not found */
-fil_space_t*
-fil_space_get(
-/*==========*/
-	ulint	id);	/*!< in: space id */
-#endif /* !UNIV_INNOCHECKSUM */
+	MY_ATTRIBUTE((warn_unused_result));
 
 /****************************************************************//**
 Acquire fil_system mutex */
@@ -1282,7 +1646,6 @@ void
 fil_system_exit(void);
 /*==================*/
 
-#ifndef UNIV_INNOCHECKSUM
 /*******************************************************************//**
 Returns the table space by a given id, NULL if not found. */
 fil_space_t*
@@ -1309,7 +1672,6 @@ Get id of next tablespace or ULINT_UNDEFINED if none */
 UNIV_INTERN
 ulint
 fil_get_next_space(
-/*===============*/
 	ulint id);      /*!< in: space id */
 
 /******************************************************************
@@ -1328,6 +1690,143 @@ fil_get_next_space_safe(
 	ulint	id);	/*!< in: previous space id */
 
 
+/*******************************************************************//**
+by redo log.
+@param[in,out]	space	tablespace */
+void
+fil_names_dirty(
+	fil_space_t*	space);
+
+/** Write MLOG_FILE_NAME records when a non-predefined persistent
+tablespace was modified for the first time since the latest
+fil_names_clear().
+@param[in,out]	space	tablespace
+@param[in,out]	mtr	mini-transaction */
+void
+fil_names_dirty_and_write(
+	fil_space_t*	space,
+	mtr_t*		mtr);
+
+/** Set the compression type for the tablespace of a table
+@param[in]	table		Table that should be compressesed
+@param[in]	algorithm	Text representation of the algorithm
+@return DB_SUCCESS or error code */
+dberr_t
+fil_set_compression(
+	dict_table_t*	table,
+	const char*	algorithm)
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Get the compression type for the tablespace
+@param[in]	space_id	Space ID to check
+@return the compression algorithm */
+Compression::Type
+fil_get_compression(
+	ulint		space_id)
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Set the encryption type for the tablespace
+@param[in] space		Space ID of tablespace for which to set
+@param[in] algorithm		Encryption algorithm
+@param[in] key			Encryption key
+@param[in] iv			Encryption iv
+@return DB_SUCCESS or error code */
+dberr_t
+fil_set_encryption(
+	ulint			space_id,
+	Encryption::Type	algorithm,
+	byte*			key,
+	byte*			iv)
+	MY_ATTRIBUTE((warn_unused_result));
+
+/**
+@return true if the re-encrypt success */
+bool
+fil_encryption_rotate();
+
+/** Write MLOG_FILE_NAME records if a persistent tablespace was modified
+for the first time since the latest fil_names_clear().
+@param[in,out]	space	tablespace
+@param[in,out]	mtr	mini-transaction
+@return whether any MLOG_FILE_NAME record was written */
+inline MY_ATTRIBUTE((warn_unused_result))
+bool
+fil_names_write_if_was_clean(
+	fil_space_t*	space,
+	mtr_t*		mtr)
+{
+	ut_ad(log_mutex_own());
+
+	if (space == NULL) {
+		return(false);
+	}
+
+	const bool	was_clean = space->max_lsn == 0;
+	ut_ad(space->max_lsn <= log_sys->lsn);
+	space->max_lsn = log_sys->lsn;
+
+	if (was_clean) {
+		fil_names_dirty_and_write(space, mtr);
+	}
+
+	return(was_clean);
+}
+
+extern volatile bool	recv_recovery_on;
+
+/** During crash recovery, open a tablespace if it had not been opened
+yet, to get valid size and flags.
+@param[in,out]	space	tablespace */
+inline
+void
+fil_space_open_if_needed(
+	fil_space_t*	space)
+{
+	ut_ad(recv_recovery_on);
+
+	if (space->size == 0) {
+		/* Initially, size and flags will be set to 0,
+		until the files are opened for the first time.
+		fil_space_get_size() will open the file
+		and adjust the size and flags. */
+#ifdef UNIV_DEBUG
+		ulint		size	=
+#endif /* UNIV_DEBUG */
+			fil_space_get_size(space->id);
+		ut_ad(size == space->size);
+	}
+}
+
+/** On a log checkpoint, reset fil_names_dirty_and_write() flags
+and write out MLOG_FILE_NAME and MLOG_CHECKPOINT if needed.
+@param[in]	lsn		checkpoint LSN
+@param[in]	do_write	whether to always write MLOG_CHECKPOINT
+@return whether anything was written to the redo log
+@retval false	if no flags were set and nothing written
+@retval true	if anything was written to the redo log */
+bool
+fil_names_clear(
+	lsn_t	lsn,
+	bool	do_write);
+
+#if !defined(NO_FALLOCATE) && defined(UNIV_LINUX)
+/**
+Try and enable FusionIO atomic writes.
+@param[in] file		OS file handle
+@return true if successful */
+bool
+fil_fusionio_enable_atomic_write(os_file_t file);
+#endif /* !NO_FALLOCATE && UNIV_LINUX */
+
+/** Note that the file system where the file resides doesn't support PUNCH HOLE
+@param[in,out]	node		Node to set */
+void fil_no_punch_hole(fil_node_t* node);
+
+#ifdef UNIV_ENABLE_UNIT_TEST_MAKE_FILEPATH
+void test_make_filepath();
+#endif /* UNIV_ENABLE_UNIT_TEST_MAKE_FILEPATH */
+
+
 /*******************************************************************//**
 Returns the block size of the file space
 @return	block size */
@@ -1339,6 +1838,23 @@ fil_space_get_block_size(
 	ulint   offset, /*!< in: page offset */
 	ulint   len);	/*!< in: page len */
 
+/*******************************************************************//**
+Increments the count of pending operation, if space is not being deleted.
+@return	TRUE if being deleted, and operation should be skipped */
+UNIV_INTERN
+ibool
+fil_inc_pending_ops(
+/*================*/
+	ulint	id,		/*!< in: space id */
+	ibool	print_err);	/*!< in: need to print error or not */
+/*******************************************************************//**
+Decrements the count of pending operations. */
+UNIV_INTERN
+void
+fil_decr_pending_ops(
+/*=================*/
+	ulint	id);	/*!< in: space id */
+
 #endif /* UNIV_INNOCHECKSUM */
 
 #ifndef UNIV_INNOCHECKSUM
diff --git a/storage/innobase/include/fil0fil.ic b/storage/innobase/include/fil0fil.ic
index 5654d8f6178..b17bf8213ab 100644
--- a/storage/innobase/include/fil0fil.ic
+++ b/storage/innobase/include/fil0fil.ic
@@ -57,6 +57,8 @@ fil_get_page_type_name(
 	ulint	page_type)	/*!< in: FIL_PAGE_TYPE */
 {
 	switch(page_type) {
+	case FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED:
+		return (const char*)"PAGE_COMPRESSED_ENRYPTED";
 	case FIL_PAGE_PAGE_COMPRESSED:
 		return (const char*)"PAGE_COMPRESSED";
 	case FIL_PAGE_INDEX:
@@ -85,7 +87,7 @@ fil_get_page_type_name(
 		return (const char*)"ZBLOB";
 	case FIL_PAGE_TYPE_ZBLOB2:
 		return (const char*)"ZBLOB2";
-	case FIL_PAGE_TYPE_COMPRESSED:
+	case FIL_PAGE_COMPRESSED:
 		return (const char*)"ORACLE PAGE COMPRESSED";
 	default:
 		return (const char*)"PAGE TYPE CORRUPTED";
@@ -102,7 +104,7 @@ fil_node_get_block_size(
 	fil_node_t*     node)		/*!< in: Node where to get block
 					size */
 {
-	return (node->file_block_size);
+	return (node->block_size);
 }
 
 /****************************************************************//**
@@ -131,7 +133,7 @@ fil_page_type_validate(
 		page_type == FIL_PAGE_TYPE_XDES ||
 		page_type == FIL_PAGE_TYPE_BLOB ||
 		page_type == FIL_PAGE_TYPE_ZBLOB ||
-		page_type == FIL_PAGE_TYPE_COMPRESSED))) {
+		page_type == FIL_PAGE_COMPRESSED))) {
 
 		uint key_version = mach_read_from_4(page + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
 		bool page_compressed = (page_type == FIL_PAGE_PAGE_COMPRESSED);
@@ -165,7 +167,7 @@ fil_page_type_validate(
 			page_type == FIL_PAGE_TYPE_XDES ||
 			page_type == FIL_PAGE_TYPE_BLOB ||
 			page_type == FIL_PAGE_TYPE_ZBLOB ||
-			page_type == FIL_PAGE_TYPE_COMPRESSED);
+			page_type == FIL_PAGE_COMPRESSED);
 		return false;
 	}
 
diff --git a/storage/innobase/include/fil0pagecompress.h b/storage/innobase/include/fil0pagecompress.h
index 99a05f14ffe..4d27a61be64 100644
--- a/storage/innobase/include/fil0pagecompress.h
+++ b/storage/innobase/include/fil0pagecompress.h
@@ -44,7 +44,7 @@ Returns the page compression flag of the space, or false if the space
 is not compressed. The tablespace must be cached in the memory cache.
 @return	true if page compressed, false if not or space not found */
 UNIV_INTERN
-ibool
+bool
 fil_space_is_page_compressed(
 /*=========================*/
 	ulint	id);	/*!< in: space id */
@@ -53,7 +53,7 @@ Returns the page compression flag of the space, or false if the space
 is not compressed. The tablespace must be cached in the memory cache.
 @return	true if page compressed, false if not or space not found */
 UNIV_INTERN
-ibool
+bool
 fil_space_get_page_compressed(
 /*=========================*/
 	fil_space_t*	space);	/*!< in: space id */
@@ -66,24 +66,6 @@ atomic_writes_t
 fil_space_get_atomic_writes(
 /*=========================*/
 	ulint	id);	/*!< in: space id */
-/*******************************************************************//**
-Find out wheather the page is index page or not
-@return	true if page type index page, false if not */
-UNIV_INTERN
-ibool
-fil_page_is_index_page(
-/*===================*/
-	byte	*buf);	/*!< in: page */
-
-/****************************************************************//**
-Get the name of the compression algorithm used for page
-compression.
-@return compression algorithm name or "UNKNOWN" if not known*/
-UNIV_INTERN
-const char*
-fil_get_compression_alg_name(
-/*=========================*/
-       ulint	comp_alg);	/*!<in: compression algorithm number */
 
 /****************************************************************//**
 For page compressed pages compress the page before actual write
@@ -139,32 +121,8 @@ Get block size from fil node
 UNIV_INTERN
 ulint
 fil_node_get_block_size(
+/*====================*/
 	fil_node_t*	node);	/*!< in: Node where to get block
 				size */
-/*******************************************************************//**
-Find out wheather the page is page compressed
-@return	true if page is page compressed*/
-UNIV_INTERN
-ibool
-fil_page_is_compressed(
-/*===================*/
-	byte*	buf);	/*!< in: page */
 
-/*******************************************************************//**
-Find out wheather the page is page compressed
-@return	true if page is page compressed*/
-UNIV_INTERN
-ibool
-fil_page_is_compressed_encrypted(
-/*=============================*/
-	byte*	buf);	/*!< in: page */
-
-/*******************************************************************//**
-Find out wheather the page is page compressed with lzo method
-@return	true if page is page compressed with lzo method*/
-UNIV_INTERN
-ibool
-fil_page_is_lzo_compressed(
-/*=======================*/
-	byte*	buf);	/*!< in: page */
 #endif
diff --git a/storage/innobase/include/fsp0file.h b/storage/innobase/include/fsp0file.h
new file mode 100644
index 00000000000..83aa370abf0
--- /dev/null
+++ b/storage/innobase/include/fsp0file.h
@@ -0,0 +1,608 @@
+/*****************************************************************************
+
+Copyright (c) 2013, 2016, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/fsp0file.h
+Tablespace data file implementation.
+
+Created 2013-7-26 by Kevin Lewis
+*******************************************************/
+
+#ifndef fsp0file_h
+#define fsp0file_h
+
+#include "ha_prototypes.h"
+#include "log0log.h"
+#include "mem0mem.h"
+#include "os0file.h"
+#include "fil0crypt.h"
+#include <vector>
+
+/** Types of raw partitions in innodb_data_file_path */
+enum device_t {
+	SRV_NOT_RAW = 0,	/*!< Not a raw partition */
+	SRV_NEW_RAW,		/*!< A 'newraw' partition, only to be
+				initialized */
+	SRV_OLD_RAW		/*!< An initialized raw partition */
+};
+
+/** Data file control information. */
+class Datafile {
+
+	friend class Tablespace;
+	friend class SysTablespace;
+
+public:
+
+	Datafile()
+		:
+		m_name(),
+		m_filepath(),
+		m_filename(),
+		m_handle(OS_FILE_CLOSED),
+		m_open_flags(OS_FILE_OPEN),
+		m_size(),
+		m_order(),
+		m_type(SRV_NOT_RAW),
+		m_space_id(ULINT_UNDEFINED),
+		m_flags(),
+		m_exists(),
+		m_is_valid(),
+		m_first_page_buf(),
+		m_first_page(),
+		m_atomic_write(),
+		m_last_os_error(),
+		m_file_info(),
+		m_encryption_key(NULL),
+		m_encryption_iv(NULL),
+		m_crypt_info()
+	{
+		/* No op */
+	}
+
+	Datafile(const char* name, ulint flags, ulint size, ulint order)
+		:
+		m_name(mem_strdup(name)),
+		m_filepath(),
+		m_filename(),
+		m_handle(OS_FILE_CLOSED),
+		m_open_flags(OS_FILE_OPEN),
+		m_size(size),
+		m_order(order),
+		m_type(SRV_NOT_RAW),
+		m_space_id(ULINT_UNDEFINED),
+		m_flags(flags),
+		m_exists(),
+		m_is_valid(),
+		m_first_page_buf(),
+		m_first_page(),
+		m_atomic_write(),
+		m_last_os_error(),
+		m_file_info(),
+		m_encryption_key(NULL),
+		m_encryption_iv(NULL),
+		m_crypt_info()
+	{
+		ut_ad(m_name != NULL);
+		/* No op */
+	}
+
+	Datafile(const Datafile& file)
+		:
+		m_handle(file.m_handle),
+		m_open_flags(file.m_open_flags),
+		m_size(file.m_size),
+		m_order(file.m_order),
+		m_type(file.m_type),
+		m_space_id(file.m_space_id),
+		m_flags(file.m_flags),
+		m_exists(file.m_exists),
+		m_is_valid(file.m_is_valid),
+		m_first_page_buf(),
+		m_first_page(),
+		m_atomic_write(file.m_atomic_write),
+		m_last_os_error(),
+		m_file_info(),
+		m_encryption_key(NULL),
+		m_encryption_iv(NULL),
+		m_crypt_info()
+	{
+		m_name = mem_strdup(file.m_name);
+		ut_ad(m_name != NULL);
+
+		if (file.m_filepath != NULL) {
+			m_filepath = mem_strdup(file.m_filepath);
+			ut_a(m_filepath != NULL);
+			set_filename();
+		} else {
+			m_filepath = NULL;
+			m_filename = NULL;
+		}
+	}
+
+	virtual ~Datafile()
+	{
+		shutdown();
+	}
+
+	Datafile& operator=(const Datafile& file)
+	{
+		ut_a(this != &file);
+
+		ut_ad(m_name == NULL);
+		m_name = mem_strdup(file.m_name);
+		ut_a(m_name != NULL);
+
+		m_size = file.m_size;
+		m_order = file.m_order;
+		m_type = file.m_type;
+
+		ut_a(m_handle == OS_FILE_CLOSED);
+		m_handle = file.m_handle;
+
+		m_exists = file.m_exists;
+		m_is_valid = file.m_is_valid;
+		m_open_flags = file.m_open_flags;
+		m_space_id = file.m_space_id;
+		m_flags = file.m_flags;
+		m_last_os_error = 0;
+
+		if (m_filepath != NULL) {
+			ut_free(m_filepath);
+			m_filepath = NULL;
+			m_filename = NULL;
+		}
+
+		if (file.m_filepath != NULL) {
+			m_filepath = mem_strdup(file.m_filepath);
+			ut_a(m_filepath != NULL);
+			set_filename();
+		}
+
+		/* Do not make a copy of the first page,
+		it should be reread if needed */
+		m_first_page_buf = NULL;
+		m_first_page = NULL;
+		m_encryption_key = NULL;
+		m_encryption_iv = NULL;
+		/* Do not copy crypt info it is read from first page */
+		m_crypt_info = NULL;
+
+		m_atomic_write = file.m_atomic_write;
+
+		return(*this);
+	}
+
+	/** Initialize the name and flags of this datafile.
+	@param[in]	name	tablespace name, will be copied
+	@param[in]	flags	tablespace flags */
+	void init(const char* name, ulint flags);
+
+	/** Release the resources. */
+	virtual void shutdown();
+
+	/** Open a data file in read-only mode to check if it exists
+	so that it can be validated.
+	@param[in]	strict	whether to issue error messages
+	@return DB_SUCCESS or error code */
+	virtual dberr_t open_read_only(bool strict);
+
+	/** Open a data file in read-write mode during start-up so that
+	doublewrite pages can be restored and then it can be validated.
+	@param[in]	read_only_mode	if true, then readonly mode checks
+					are enforced.
+	@return DB_SUCCESS or error code */
+	virtual dberr_t open_read_write(bool read_only_mode)
+		MY_ATTRIBUTE((warn_unused_result));
+
+	/** Initialize OS specific file info. */
+	void init_file_info();
+
+	/** Close a data file.
+	@return DB_SUCCESS or error code */
+	dberr_t close();
+
+	/** Make a full filepath from a directory path and a filename.
+	Prepend the dirpath to filename using the extension given.
+	If dirpath is NULL, prepend the default datadir to filepath.
+	Store the result in m_filepath.
+	@param[in]	dirpath		directory path
+	@param[in]	filename	filename or filepath
+	@param[in]	ext		filename extension */
+	void make_filepath(
+		const char*	dirpath,
+		const char*	filename,
+		ib_extention	ext);
+
+	/** Set the filepath by duplicating the filepath sent in */
+	void set_filepath(const char* filepath);
+
+	/** Allocate and set the datafile or tablespace name in m_name.
+	If a name is provided, use it; else if the datafile is file-per-table,
+	extract a file-per-table tablespace name from m_filepath; else it is a
+	general tablespace, so just call it that for now. The value of m_name
+	will be freed in the destructor.
+	@param[in]	name	Tablespace Name if known, NULL if not */
+	void set_name(const char*	name);
+
+	/** Validates the datafile and checks that it conforms with
+	the expected space ID and flags.  The file should exist and be
+	successfully opened in order for this function to validate it.
+	@param[in]	space_id	The expected tablespace ID.
+	@param[in]	flags		The expected tablespace flags.
+	@param[in]	for_import	is it for importing
+	@retval DB_SUCCESS if tablespace is valid, DB_ERROR if not.
+	m_is_valid is also set true on success, else false. */
+	dberr_t validate_to_dd(
+		ulint		space_id,
+		ulint		flags,
+		bool		for_import)
+		MY_ATTRIBUTE((warn_unused_result));
+
+	/** Validates this datafile for the purpose of recovery.
+	The file should exist and be successfully opened. We initially
+	open it in read-only mode because we just want to read the SpaceID.
+	However, if the first page is corrupt and needs to be restored
+	from the doublewrite buffer, we will reopen it in write mode and
+	ry to restore that page.
+	@retval DB_SUCCESS if tablespace is valid, DB_ERROR if not.
+	m_is_valid is also set true on success, else false. */
+	dberr_t validate_for_recovery()
+		MY_ATTRIBUTE((warn_unused_result));
+
+	/** Checks the consistency of the first page of a datafile when the
+	tablespace is opened.  This occurs before the fil_space_t is created
+	so the Space ID found here must not already be open.
+	m_is_valid is set true on success, else false.
+	@param[out]	flush_lsn	contents of FIL_PAGE_FILE_FLUSH_LSN
+	@param[in]	for_import	if it is for importing
+	(only valid for the first file of the system tablespace)
+	@retval DB_SUCCESS on if the datafile is valid
+	@retval DB_CORRUPTION if the datafile is not readable
+	@retval DB_TABLESPACE_EXISTS if there is a duplicate space_id */
+	dberr_t validate_first_page(lsn_t*	flush_lsn,
+				    bool	for_import)
+		MY_ATTRIBUTE((warn_unused_result));
+
+	/** Get Datafile::m_name.
+	@return m_name */
+	const char*	name()	const
+	{
+		return(m_name);
+	}
+
+	/** Get Datafile::m_filepath.
+	@return m_filepath */
+	const char*	filepath()	const
+	{
+		return(m_filepath);
+	}
+
+	/** Get Datafile::m_handle.
+	@return m_handle */
+	os_file_t	handle()	const
+	{
+		return(m_handle);
+	}
+
+	/** Get Datafile::m_order.
+	@return m_order */
+	ulint	order()	const
+	{
+		return(m_order);
+	}
+
+	/** Get Datafile::m_space_id.
+	@return m_space_id */
+	ulint	space_id()	const
+	{
+		return(m_space_id);
+	}
+
+	/** Get Datafile::m_flags.
+	@return m_flags */
+	ulint	flags()	const
+	{
+		return(m_flags);
+	}
+
+	/**
+	@return true if m_handle is open, false if not */
+	bool	is_open()	const
+	{
+		return(m_handle != OS_FILE_CLOSED);
+	}
+
+	/** Get Datafile::m_is_valid.
+	@return m_is_valid */
+	bool	is_valid()	const
+	{
+		return(m_is_valid);
+	}
+
+	/** Get the last OS error reported
+	@return m_last_os_error */
+	ulint	last_os_error()		const
+	{
+		return(m_last_os_error);
+	}
+
+	fil_space_crypt_t* get_crypt_info() const
+	{
+		return(m_crypt_info);
+	}
+
+	/** Test if the filepath provided looks the same as this filepath
+	by string comparison. If they are two different paths to the same
+	file, same_as() will be used to show that after the files are opened.
+	@param[in]	other	filepath to compare with
+	@retval true if it is the same filename by char comparison
+	@retval false if it looks different */
+	bool same_filepath_as(const char* other) const;
+
+	/** Test if another opened datafile is the same file as this object.
+	@param[in]	other	Datafile to compare with
+	@return true if it is the same file, else false */
+	bool same_as(const Datafile&	other) const;
+
+private:
+	/** Free the filepath buffer. */
+	void free_filepath();
+
+	/** Set the filename pointer to the start of the file name
+	in the filepath. */
+	void set_filename()
+	{
+		if (m_filepath == NULL) {
+			return;
+		}
+
+		char* last_slash = strrchr(m_filepath, OS_PATH_SEPARATOR);
+
+		m_filename = last_slash ? last_slash + 1 : m_filepath;
+	}
+
+	/** Create/open a data file.
+	@param[in]	read_only_mode	if true, then readonly mode checks
+					are enforced.
+	@return DB_SUCCESS or error code */
+	dberr_t open_or_create(bool read_only_mode)
+		MY_ATTRIBUTE((warn_unused_result));
+
+	/** Reads a few significant fields from the first page of the
+	datafile, which must already be open.
+	@param[in]	read_only_mode	if true, then readonly mode checks
+					are enforced.
+	@return DB_SUCCESS or DB_IO_ERROR if page cannot be read */
+	dberr_t read_first_page(bool read_first_page)
+		MY_ATTRIBUTE((warn_unused_result));
+
+	/** Free the first page from memory when it is no longer needed. */
+	void free_first_page();
+
+	/** Set the Datafile::m_open_flags.
+	@param open_flags	The Open flags to set. */
+	void set_open_flags(os_file_create_t	open_flags)
+	{
+		m_open_flags = open_flags;
+	};
+
+	/** Determine if this datafile is on a Raw Device
+	@return true if it is a RAW device. */
+	bool is_raw_device()
+	{
+		return(m_type != SRV_NOT_RAW);
+	}
+
+	/* DATA MEMBERS */
+
+	/** Datafile name at the tablespace location.
+	This is either the basename of the file if an absolute path
+	was entered, or it is the relative path to the datadir or
+	Tablespace::m_path. */
+	char*			m_name;
+
+protected:
+	/** Physical file path with base name and extension */
+	char*			m_filepath;
+
+private:
+	/** Determine the space id of the given file descriptor by reading
+	a few pages from the beginning of the .ibd file.
+	@return DB_SUCCESS if space id was successfully identified,
+	else DB_ERROR. */
+	dberr_t find_space_id();
+
+	/** Finds a given page of the given space id from the double write
+	buffer and copies it to the corresponding .ibd file.
+	@param[in]	page_no		Page number to restore
+	@return DB_SUCCESS if page was restored, else DB_ERROR */
+	dberr_t restore_from_doublewrite(
+		ulint	restore_page_no);
+
+	/** Points into m_filepath to the file name with extension */
+	char*			m_filename;
+
+	/** Open file handle */
+	os_file_t		m_handle;
+
+	/** Flags to use for opening the data file */
+	os_file_create_t	m_open_flags;
+
+	/** size in database pages */
+	ulint			m_size;
+
+	/** ordinal position of this datafile in the tablespace */
+	ulint			m_order;
+
+	/** The type of the data file */
+	device_t		m_type;
+
+	/** Tablespace ID. Contained in the datafile header.
+	If this is a system tablespace, FSP_SPACE_ID is only valid
+	in the first datafile. */
+	ulint			m_space_id;
+
+	/** Tablespace flags. Contained in the datafile header.
+	If this is a system tablespace, FSP_SPACE_FLAGS are only valid
+	in the first datafile. */
+	ulint			m_flags;
+
+	/** true if file already existed on startup */
+	bool			m_exists;
+
+	/* true if the tablespace is valid */
+	bool			m_is_valid;
+
+	/** Buffer to hold first page */
+	byte*			m_first_page_buf;
+
+	/** Pointer to the first page held in the buffer above */
+	byte*			m_first_page;
+
+	/** true if atomic writes enabled for this file */
+	bool			m_atomic_write;
+
+protected:
+	/** Last OS error received so it can be reported if needed. */
+	ulint			m_last_os_error;
+
+public:
+	/** Use the following to determine the uniqueness of this datafile. */
+#ifdef _WIN32
+	/* Use fields dwVolumeSerialNumber, nFileIndexLow, nFileIndexHigh. */
+	BY_HANDLE_FILE_INFORMATION	m_file_info;
+#else
+	/* Use field st_ino. */
+	struct stat			m_file_info;
+#endif	/* WIN32 */
+
+	/** Encryption key read from first page */
+	byte*			m_encryption_key;
+
+	/** Encryption iv read from first page */
+	byte*			m_encryption_iv;
+
+	/** Encryption information */
+	fil_space_crypt_t* 	m_crypt_info;
+};
+
+
+/** Data file control information. */
+class RemoteDatafile : public Datafile
+{
+private:
+	/** Link filename (full path) */
+	char*	m_link_filepath;
+
+public:
+
+	RemoteDatafile()
+		:
+		m_link_filepath()
+	{
+		/* No op - base constructor is called. */
+	}
+
+	RemoteDatafile(const char* name, ulint size, ulint order)
+		:
+		m_link_filepath()
+	{
+		/* No op - base constructor is called. */
+	}
+
+	~RemoteDatafile()
+	{
+		shutdown();
+	}
+
+	/** Release the resources. */
+	void shutdown();
+
+	/** Get the link filepath.
+	@return m_link_filepath */
+	const char*	link_filepath()	const
+	{
+		return(m_link_filepath);
+	}
+
+	/** Set the link filepath. Use default datadir, the base name of
+	the path provided without its suffix, plus DOT_ISL.
+	@param[in]	path	filepath which contains a basename to use.
+				If NULL, use m_name as the basename. */
+	void set_link_filepath(const char* path);
+
+	/** Create a link filename based on the contents of m_name,
+	open that file, and read the contents into m_filepath.
+	@retval DB_SUCCESS if remote linked tablespace file is opened and read.
+	@retval DB_CANNOT_OPEN_FILE if the link file does not exist. */
+	dberr_t open_link_file();
+
+	/** Delete an InnoDB Symbolic Link (ISL) file. */
+	void delete_link_file(void);
+
+	/** Open a handle to the file linked to in an InnoDB Symbolic Link file
+	in read-only mode so that it can be validated.
+	@param[in]	strict	whether to issue error messages
+	@return DB_SUCCESS or error code */
+	dberr_t open_read_only(bool strict);
+
+	/** Opens a handle to the file linked to in an InnoDB Symbolic Link
+	file in read-write mode so that it can be restored from doublewrite
+	and validated.
+	@param[in]	read_only_mode	If true, then readonly mode checks
+					are enforced.
+	@return DB_SUCCESS or error code */
+	dberr_t open_read_write(bool read_only_mode)
+		MY_ATTRIBUTE((warn_unused_result));
+
+	/******************************************************************
+	Global Static Functions;  Cannot refer to data members.
+	******************************************************************/
+
+	/** Creates a new InnoDB Symbolic Link (ISL) file.  It is always
+	created under the 'datadir' of MySQL. The datadir is the directory
+	of a running mysqld program. We can refer to it by simply using
+	the path ".".
+	@param[in]	name		tablespace name
+	@param[in]	filepath	remote filepath of tablespace datafile
+	@param[in]	is_shared	true for general tablespace,
+					false for file-per-table
+	@return DB_SUCCESS or error code */
+	static dberr_t create_link_file(
+		const char*	name,
+		const char*	filepath,
+		bool		is_shared = false);
+
+	/** Delete an InnoDB Symbolic Link (ISL) file by name.
+	@param[in]	name	tablespace name */
+	static void delete_link_file(const char* name);
+
+	/** Read an InnoDB Symbolic Link (ISL) file by name.
+	It is always created under the datadir of MySQL.
+	For file-per-table tablespaces, the isl file is expected to be
+	in a 'database' directory and called 'tablename.isl'.
+	For general tablespaces, there will be no 'database' directory.
+	The 'basename.isl' will be in the datadir.
+	The caller must free the memory returned if it is not null.
+	@param[in]	link_filepath	filepath of the ISL file
+	@return Filepath of the IBD file read from the ISL file */
+	static char* read_link_file(
+		const char*	link_filepath);
+};
+#endif /* fsp0file_h */
diff --git a/storage/innobase/include/fsp0fsp.h b/storage/innobase/include/fsp0fsp.h
index b9ff05b4bd4..3709c4a4f24 100644
--- a/storage/innobase/include/fsp0fsp.h
+++ b/storage/innobase/include/fsp0fsp.h
@@ -33,177 +33,27 @@ Created 12/18/1995 Heikki Tuuri
 
 #ifndef UNIV_INNOCHECKSUM
 
-#include "mtr0mtr.h"
+#include "fsp0space.h"
 #include "fut0lst.h"
-#include "ut0byte.h"
+#include "mtr0mtr.h"
 #include "page0types.h"
+#include "rem0types.h"
+#include "ut0byte.h"
 
 #endif /* !UNIV_INNOCHECKSUM */
+#include "fsp0types.h"
 
-/* @defgroup fsp_flags InnoDB Tablespace Flag Constants @{ */
-
-/** Width of the POST_ANTELOPE flag */
-#define FSP_FLAGS_WIDTH_POST_ANTELOPE	1
-/** Number of flag bits used to indicate the tablespace zip page size */
-#define FSP_FLAGS_WIDTH_ZIP_SSIZE	4
-/** Width of the ATOMIC_BLOBS flag.  The ability to break up a long
-column into an in-record prefix and an externally stored part is available
-to the two Barracuda row formats COMPRESSED and DYNAMIC. */
-#define FSP_FLAGS_WIDTH_ATOMIC_BLOBS	1
-/** Number of flag bits used to indicate the tablespace page size */
-#define FSP_FLAGS_WIDTH_PAGE_SSIZE	4
-/** Width of the DATA_DIR flag.  This flag indicates that the tablespace
-is found in a remote location, not the default data directory. */
-#define FSP_FLAGS_WIDTH_DATA_DIR	1
-/** Number of flag bits used to indicate the page compression and compression level */
-#define FSP_FLAGS_WIDTH_PAGE_COMPRESSION  1
-#define FSP_FLAGS_WIDTH_PAGE_COMPRESSION_LEVEL 4
-
-/** Number of flag bits used to indicate atomic writes for this tablespace */
-#define FSP_FLAGS_WIDTH_ATOMIC_WRITES  2
-
-/** Width of all the currently known tablespace flags */
-#define FSP_FLAGS_WIDTH		(FSP_FLAGS_WIDTH_POST_ANTELOPE	\
-				+ FSP_FLAGS_WIDTH_ZIP_SSIZE	\
-				+ FSP_FLAGS_WIDTH_ATOMIC_BLOBS	\
-				+ FSP_FLAGS_WIDTH_PAGE_SSIZE	\
-				+ FSP_FLAGS_WIDTH_DATA_DIR      \
-				+ FSP_FLAGS_WIDTH_PAGE_COMPRESSION \
-				+ FSP_FLAGS_WIDTH_PAGE_COMPRESSION_LEVEL \
-				+ FSP_FLAGS_WIDTH_ATOMIC_WRITES )
-
-/** A mask of all the known/used bits in tablespace flags */
-#define FSP_FLAGS_MASK		(~(~0 << FSP_FLAGS_WIDTH))
-
-/** Zero relative shift position of the POST_ANTELOPE field */
-#define FSP_FLAGS_POS_POST_ANTELOPE	0
-/** Zero relative shift position of the ZIP_SSIZE field */
-#define FSP_FLAGS_POS_ZIP_SSIZE		(FSP_FLAGS_POS_POST_ANTELOPE	\
-					+ FSP_FLAGS_WIDTH_POST_ANTELOPE)
-/** Zero relative shift position of the ATOMIC_BLOBS field */
-#define FSP_FLAGS_POS_ATOMIC_BLOBS	(FSP_FLAGS_POS_ZIP_SSIZE	\
-					+ FSP_FLAGS_WIDTH_ZIP_SSIZE)
-/** Note that these need to be before the page size to be compatible with
-dictionary */
-/** Zero relative shift position of the PAGE_COMPRESSION field */
-#define FSP_FLAGS_POS_PAGE_COMPRESSION	(FSP_FLAGS_POS_ATOMIC_BLOBS	\
-					+ FSP_FLAGS_WIDTH_ATOMIC_BLOBS)
-/** Zero relative shift position of the PAGE_COMPRESSION_LEVEL field */
-#define FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL	(FSP_FLAGS_POS_PAGE_COMPRESSION	\
-					+ FSP_FLAGS_WIDTH_PAGE_COMPRESSION)
-/** Zero relative shift position of the ATOMIC_WRITES field */
-#define FSP_FLAGS_POS_ATOMIC_WRITES	(FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL	\
-					+ FSP_FLAGS_WIDTH_PAGE_COMPRESSION_LEVEL)
-/** Zero relative shift position of the PAGE_SSIZE field */
-#define FSP_FLAGS_POS_PAGE_SSIZE	(FSP_FLAGS_POS_ATOMIC_WRITES	\
-					+ FSP_FLAGS_WIDTH_ATOMIC_WRITES)
-/** Zero relative shift position of the start of the UNUSED bits */
-#define FSP_FLAGS_POS_DATA_DIR		(FSP_FLAGS_POS_PAGE_SSIZE	\
-					+ FSP_FLAGS_WIDTH_PAGE_SSIZE)
 #define FSP_FLAGS_POS_DATA_DIR_ORACLE	(FSP_FLAGS_POS_ATOMIC_BLOBS	\
 					+ FSP_FLAGS_WIDTH_ATOMIC_BLOBS  \
 					+ FSP_FLAGS_WIDTH_PAGE_SSIZE)
-/** Zero relative shift position of the start of the UNUSED bits */
-#define FSP_FLAGS_POS_UNUSED		(FSP_FLAGS_POS_DATA_DIR	\
-					+ FSP_FLAGS_WIDTH_DATA_DIR)
-
-/** Bit mask of the POST_ANTELOPE field */
-#define FSP_FLAGS_MASK_POST_ANTELOPE				\
-		((~(~0U << FSP_FLAGS_WIDTH_POST_ANTELOPE))	\
-		<< FSP_FLAGS_POS_POST_ANTELOPE)
-/** Bit mask of the ZIP_SSIZE field */
-#define FSP_FLAGS_MASK_ZIP_SSIZE				\
-		((~(~0U << FSP_FLAGS_WIDTH_ZIP_SSIZE))		\
-		<< FSP_FLAGS_POS_ZIP_SSIZE)
-/** Bit mask of the ATOMIC_BLOBS field */
-#define FSP_FLAGS_MASK_ATOMIC_BLOBS				\
-		((~(~0U << FSP_FLAGS_WIDTH_ATOMIC_BLOBS))	\
-		<< FSP_FLAGS_POS_ATOMIC_BLOBS)
-/** Bit mask of the PAGE_SSIZE field */
-#define FSP_FLAGS_MASK_PAGE_SSIZE				\
-		((~(~0U << FSP_FLAGS_WIDTH_PAGE_SSIZE))		\
-		<< FSP_FLAGS_POS_PAGE_SSIZE)
-/** Bit mask of the DATA_DIR field */
-#define FSP_FLAGS_MASK_DATA_DIR					\
-		((~(~0U << FSP_FLAGS_WIDTH_DATA_DIR))		\
-		<< FSP_FLAGS_POS_DATA_DIR)
 /** Bit mask of the DATA_DIR field */
 #define FSP_FLAGS_MASK_DATA_DIR_ORACLE				\
 		((~(~0 << FSP_FLAGS_WIDTH_DATA_DIR))		\
 		<< FSP_FLAGS_POS_DATA_DIR_ORACLE)
-/** Bit mask of the PAGE_COMPRESSION field */
-#define FSP_FLAGS_MASK_PAGE_COMPRESSION			\
-		((~(~0 << FSP_FLAGS_WIDTH_PAGE_COMPRESSION))	\
-		<< FSP_FLAGS_POS_PAGE_COMPRESSION)
-/** Bit mask of the PAGE_COMPRESSION_LEVEL field */
-#define FSP_FLAGS_MASK_PAGE_COMPRESSION_LEVEL		\
-		((~(~0 << FSP_FLAGS_WIDTH_PAGE_COMPRESSION_LEVEL))	\
-		<< FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL)
-/** Bit mask of the ATOMIC_WRITES field */
-#define FSP_FLAGS_MASK_ATOMIC_WRITES		\
-		((~(~0 << FSP_FLAGS_WIDTH_ATOMIC_WRITES))	\
-		<< FSP_FLAGS_POS_ATOMIC_WRITES)
-/** Return the value of the POST_ANTELOPE field */
-#define FSP_FLAGS_GET_POST_ANTELOPE(flags)			\
-		((flags & FSP_FLAGS_MASK_POST_ANTELOPE)		\
-		>> FSP_FLAGS_POS_POST_ANTELOPE)
-/** Return the value of the ZIP_SSIZE field */
-#define FSP_FLAGS_GET_ZIP_SSIZE(flags)				\
-		((flags & FSP_FLAGS_MASK_ZIP_SSIZE)		\
-		>> FSP_FLAGS_POS_ZIP_SSIZE)
-/** Return the value of the ATOMIC_BLOBS field */
-#define FSP_FLAGS_HAS_ATOMIC_BLOBS(flags)			\
-		((flags & FSP_FLAGS_MASK_ATOMIC_BLOBS)		\
-		>> FSP_FLAGS_POS_ATOMIC_BLOBS)
-/** Return the value of the PAGE_SSIZE field */
-#define FSP_FLAGS_GET_PAGE_SSIZE(flags)				\
-		((flags & FSP_FLAGS_MASK_PAGE_SSIZE)		\
-		>> FSP_FLAGS_POS_PAGE_SSIZE)
-/** Return the value of the DATA_DIR field */
-#define FSP_FLAGS_HAS_DATA_DIR(flags)				\
-		((flags & FSP_FLAGS_MASK_DATA_DIR)		\
-		>> FSP_FLAGS_POS_DATA_DIR)
+
 #define FSP_FLAGS_HAS_DATA_DIR_ORACLE(flags)			\
 		((flags & FSP_FLAGS_MASK_DATA_DIR_ORACLE)	\
 		>> FSP_FLAGS_POS_DATA_DIR_ORACLE)
-/** Return the contents of the UNUSED bits */
-#define FSP_FLAGS_GET_UNUSED(flags)				\
-		(flags >> FSP_FLAGS_POS_UNUSED)
-
-/** Return the value of the PAGE_COMPRESSION field */
-#define FSP_FLAGS_GET_PAGE_COMPRESSION(flags)		\
-		((flags & FSP_FLAGS_MASK_PAGE_COMPRESSION)	\
-		>> FSP_FLAGS_POS_PAGE_COMPRESSION)
-/** Return the value of the PAGE_COMPRESSION_LEVEL field */
-#define FSP_FLAGS_GET_PAGE_COMPRESSION_LEVEL(flags)		\
-		((flags & FSP_FLAGS_MASK_PAGE_COMPRESSION_LEVEL) \
-		>> FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL)
-/** Return the value of the ATOMIC_WRITES field */
-#define FSP_FLAGS_GET_ATOMIC_WRITES(flags)		\
-		((flags & FSP_FLAGS_MASK_ATOMIC_WRITES) \
-		>> FSP_FLAGS_POS_ATOMIC_WRITES)
-
-/** Set a PAGE_SSIZE into the correct bits in a given
-tablespace flags. */
-#define FSP_FLAGS_SET_PAGE_SSIZE(flags, ssize)			\
-		(flags | (ssize << FSP_FLAGS_POS_PAGE_SSIZE))
-
-/** Set a PAGE_COMPRESSION into the correct bits in a given
-tablespace flags. */
-#define FSP_FLAGS_SET_PAGE_COMPRESSION(flags, compression)	\
-		(flags | (compression << FSP_FLAGS_POS_PAGE_COMPRESSION))
-
-/** Set a PAGE_COMPRESSION_LEVEL into the correct bits in a given
-tablespace flags. */
-#define FSP_FLAGS_SET_PAGE_COMPRESSION_LEVEL(flags, level)	\
-		(flags | (level << FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL))
-
-/** Set a ATOMIC_WRITES into the correct bits in a given
-tablespace flags. */
-#define FSP_FLAGS_SET_ATOMIC_WRITES(flags, atomics)	\
-		(flags | (atomics << FSP_FLAGS_POS_ATOMIC_WRITES))
-
-/* @} */
 
 /* @defgroup Tablespace Header Constants (moved from fsp0fsp.c) @{ */
 
@@ -237,7 +87,7 @@ descriptor page, but used only in the first. */
 					< 64 pages, this number is 64, i.e.,
 					we have initialized the space
 					about the first extent, but have not
-					physically allocted those pages to the
+					physically allocated those pages to the
 					file */
 #define	FSP_SPACE_FLAGS		16	/* fsp_space_t.flags, similar to
 					dict_table_t::flags */
@@ -270,6 +120,7 @@ descriptor page, but used only in the first. */
 					FSP_FREE_LIMIT at a time */
 /* @} */
 
+#ifndef UNIV_INNOCHECKSUM
 
 /* @defgroup File Segment Inode Constants (moved from fsp0fsp.c) @{ */
 
@@ -318,9 +169,8 @@ typedef	byte	fseg_inode_t;
 	(16 + 3 * FLST_BASE_NODE_SIZE			\
 	 + FSEG_FRAG_ARR_N_SLOTS * FSEG_FRAG_SLOT_SIZE)
 
-#define FSP_SEG_INODES_PER_PAGE(zip_size)		\
-	(((zip_size ? zip_size : UNIV_PAGE_SIZE)	\
-	  - FSEG_ARR_OFFSET - 10) / FSEG_INODE_SIZE)
+#define FSP_SEG_INODES_PER_PAGE(page_size)		\
+	((page_size.physical() - FSEG_ARR_OFFSET - 10) / FSEG_INODE_SIZE)
 				/* Number of segment inodes which fit on a
 				single page */
 
@@ -407,60 +257,123 @@ the extent are free and which contain old tuple version to clean. */
 
 /* @} */
 
-#ifndef UNIV_INNOCHECKSUM
 /**********************************************************************//**
 Initializes the file space system. */
-UNIV_INTERN
 void
 fsp_init(void);
 /*==========*/
+
 /**********************************************************************//**
 Gets the size of the system tablespace from the tablespace header.  If
 we do not have an auto-extending data file, this should be equal to
 the size of the data files.  If there is an auto-extending data file,
 this can be smaller.
-@return	size in pages */
-UNIV_INTERN
+@return size in pages */
 ulint
 fsp_header_get_tablespace_size(void);
 /*================================*/
-/**********************************************************************//**
-Reads the file space size stored in the header page.
-@return	tablespace size stored in the space header */
-UNIV_INTERN
+
+/** Calculate the number of pages to extend a datafile.
+We extend single-table and general tablespaces first one extent at a time,
+but 4 at a time for bigger tablespaces. It is not enough to extend always
+by one extent, because we need to add at least one extent to FSP_FREE.
+A single extent descriptor page will track many extents. And the extent
+that uses its extent descriptor page is put onto the FSP_FREE_FRAG list.
+Extents that do not use their extent descriptor page are added to FSP_FREE.
+The physical page size is used to determine how many extents are tracked
+on one extent descriptor page. See xdes_calc_descriptor_page().
+@param[in]	page_size	page_size of the datafile
+@param[in]	size		current number of pages in the datafile
+@return number of pages to extend the file. */
 ulint
-fsp_get_size_low(
-/*=============*/
-	page_t*	page);	/*!< in: header page (page 0 in the tablespace) */
+fsp_get_pages_to_extend_ibd(
+	const page_size_t&	page_size,
+	ulint			size);
+
+/** Calculate the number of physical pages in an extent for this file.
+@param[in]	page_size	page_size of the datafile
+@return number of pages in an extent for this file. */
+UNIV_INLINE
+ulint
+fsp_get_extent_size_in_pages(const page_size_t&	page_size)
+{
+	return(FSP_EXTENT_SIZE * UNIV_PAGE_SIZE / page_size.physical());
+}
+
 /**********************************************************************//**
 Reads the space id from the first page of a tablespace.
-@return	space id, ULINT UNDEFINED if error */
-UNIV_INTERN
+@return space id, ULINT UNDEFINED if error */
 ulint
 fsp_header_get_space_id(
 /*====================*/
 	const page_t*	page);	/*!< in: first page of a tablespace */
-/**********************************************************************//**
-Reads the space flags from the first page of a tablespace.
-@return	flags */
-UNIV_INTERN
+
+/** Read a tablespace header field.
+@param[in]	page	first page of a tablespace
+@param[in]	field	the header field
+@return the contents of the header field */
+inline
 ulint
-fsp_header_get_flags(
-/*=================*/
-	const page_t*	page);	/*!< in: first page of a tablespace */
-/**********************************************************************//**
-Reads the compressed page size from the first page of a tablespace.
-@return	compressed page size in bytes, or 0 if uncompressed */
-UNIV_INTERN
+fsp_header_get_field(const page_t* page, ulint field)
+{
+	return(mach_read_from_4(FSP_HEADER_OFFSET + field + page));
+}
+
+/** Read the flags from the tablespace header page.
+@param[in]	page	first page of a tablespace
+@return the contents of FSP_SPACE_FLAGS */
+inline
 ulint
-fsp_header_get_zip_size(
-/*====================*/
-	const page_t*	page);	/*!< in: first page of a tablespace */
+fsp_header_get_flags(const page_t* page)
+{
+	return(fsp_header_get_field(page, FSP_SPACE_FLAGS));
+}
+
+/** Reads the page size from the first page of a tablespace.
+@param[in]	page	first page of a tablespace
+@return page size */
+page_size_t
+fsp_header_get_page_size(
+	const page_t*	page);
+
+/** Decoding the encryption info
+from the first page of a tablespace.
+@param[in/out]	key		key
+@param[in/out]	iv		iv
+@param[in]	encryption_info	encrytion info.
+@return true if success */
+bool
+fsp_header_decode_encryption_info(
+	byte*		key,
+	byte*		iv,
+	byte*		encryption_info);
+
+/** Reads the encryption key from the first page of a tablespace.
+@param[in]	fsp_flags	tablespace flags
+@param[in/out]	key		tablespace key
+@param[in/out]	iv		tablespace iv
+@param[in]	page	first page of a tablespace
+@return true if success */
+bool
+fsp_header_get_encryption_key(
+	ulint		fsp_flags,
+	byte*		key,
+	byte*		iv,
+	page_t*		page);
+
+/** Check the encryption key from the first page of a tablespace.
+@param[in]	fsp_flags	tablespace flags
+@param[in]	page		first page of a tablespace
+@return true if success */
+bool
+fsp_header_check_encryption_key(
+	ulint			fsp_flags,
+	page_t*			page);
+
 /**********************************************************************//**
 Writes the space id and flags to a tablespace header.  The flags contain
 row type, physical/compressed page size, and logical/uncompressed page
 size of the tablespace. */
-UNIV_INTERN
 void
 fsp_header_init_fields(
 /*===================*/
@@ -468,34 +381,46 @@ fsp_header_init_fields(
 	ulint	space_id,	/*!< in: space id */
 	ulint	flags);		/*!< in: tablespace flags (FSP_SPACE_FLAGS):
 				0, or table->flags if newer than COMPACT */
-/**********************************************************************//**
-Initializes the space header of a new created space and creates also the
-insert buffer tree root if space == 0. */
-UNIV_INTERN
-void
+
+/** Rotate the encryption info in the space header.
+@param[in]	space		tablespace
+@param[in]      encrypt_info	buffer for re-encrypt key.
+@param[in,out]	mtr		mini-transaction
+@return true if success. */
+bool
+fsp_header_rotate_encryption(
+	fil_space_t*		space,
+	byte*			encrypt_info,
+	mtr_t*			mtr);
+
+/** Initializes the space header of a new created space and creates also the
+insert buffer tree root if space == 0.
+@param[in]	space_id	space id
+@param[in]	size		current size in blocks
+@param[in,out]	mtr		min-transaction
+@return	true on success, otherwise false. */
+bool
 fsp_header_init(
-/*============*/
-	ulint	space,		/*!< in: space id */
-	ulint	size,		/*!< in: current size in blocks */
-	mtr_t*	mtr);		/*!< in/out: mini-transaction */
+	ulint	space_id,
+	ulint	size,
+	mtr_t*	mtr);
+
 /**********************************************************************//**
 Increases the space size field of a space. */
-UNIV_INTERN
 void
 fsp_header_inc_size(
 /*================*/
-	ulint	space,		/*!< in: space id */
+	ulint	space_id,	/*!< in: space id */
 	ulint	size_inc,	/*!< in: size increment in pages */
 	mtr_t*	mtr);		/*!< in/out: mini-transaction */
 /**********************************************************************//**
 Creates a new segment.
 @return the block where the segment header is placed, x-latched, NULL
 if could not create segment because of lack of space */
-UNIV_INTERN
 buf_block_t*
 fseg_create(
 /*========*/
-	ulint	space,	/*!< in: space id */
+	ulint	space_id,/*!< in: space id */
 	ulint	page,	/*!< in: page where the segment header is placed: if
 			this is != 0, the page must belong to another segment,
 			if this is 0, a new page will be allocated and it
@@ -507,11 +432,10 @@ fseg_create(
 Creates a new segment.
 @return the block where the segment header is placed, x-latched, NULL
 if could not create segment because of lack of space */
-UNIV_INTERN
 buf_block_t*
 fseg_create_general(
 /*================*/
-	ulint	space,	/*!< in: space id */
+	ulint	space_id,/*!< in: space id */
 	ulint	page,	/*!< in: page where the segment header is placed: if
 			this is != 0, the page must belong to another segment,
 			if this is 0, a new page will be allocated and it
@@ -528,8 +452,7 @@ fseg_create_general(
 /**********************************************************************//**
 Calculates the number of pages reserved by a segment, and how many pages are
 currently used.
-@return	number of reserved pages */
-UNIV_INTERN
+@return number of reserved pages */
 ulint
 fseg_n_reserved_pages(
 /*==================*/
@@ -540,15 +463,15 @@ fseg_n_reserved_pages(
 Allocates a single free page from a segment. This function implements
 the intelligent allocation strategy which tries to minimize
 file space fragmentation.
-@param[in/out] seg_header	segment header
-@param[in] hint			hint of which page would be desirable
-@param[in] direction		if the new page is needed because
+@param[in,out] seg_header segment header
+@param[in] hint hint of which page would be desirable
+@param[in] direction if the new page is needed because
 				of an index page split, and records are
 				inserted there in order, into which
 				direction they go alphabetically: FSP_DOWN,
 				FSP_UP, FSP_NO_DIR
-@param[in/out] mtr		mini-transaction
-@return	X-latched block, or NULL if no page could be allocated */
+@param[in,out] mtr mini-transaction
+@return X-latched block, or NULL if no page could be allocated */
 #define fseg_alloc_free_page(seg_header, hint, direction, mtr)		\
 	fseg_alloc_free_page_general(seg_header, hint, direction,	\
 				     FALSE, mtr, mtr)
@@ -560,7 +483,6 @@ fragmentation.
 @retval block, rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded
 (init_mtr == mtr, or the page was not previously freed in mtr)
 @retval block (not allocated or initialized) otherwise */
-UNIV_INTERN
 buf_block_t*
 fseg_alloc_free_page_general(
 /*=========================*/
@@ -583,8 +505,8 @@ fseg_alloc_free_page_general(
 				If init_mtr!=mtr, but the page is already
 				latched in mtr, do not initialize the page. */
 	MY_ATTRIBUTE((warn_unused_result, nonnull));
-/**********************************************************************//**
-Reserves free pages from a tablespace. All mini-transactions which may
+
+/** Reserves free pages from a tablespace. All mini-transactions which may
 use several pages from the tablespace should call this function beforehand
 and reserve enough free extents so that they certainly will be able
 to do their operation, like a B-tree page split, fully. Reservations
@@ -603,53 +525,71 @@ The purpose is to avoid dead end where the database is full but the
 user cannot free any space because these freeing operations temporarily
 reserve some space.
 
-Single-table tablespaces whose size is < 32 pages are a special case. In this
-function we would liberally reserve several 64 page extents for every page
-split or merge in a B-tree. But we do not want to waste disk space if the table
-only occupies < 32 pages. That is why we apply different rules in that special
-case, just ensuring that there are 3 free pages available.
-@return	TRUE if we were able to make the reservation */
-UNIV_INTERN
-ibool
+Single-table tablespaces whose size is < FSP_EXTENT_SIZE pages are a special
+case. In this function we would liberally reserve several extents for
+every page split or merge in a B-tree. But we do not want to waste disk space
+if the table only occupies < FSP_EXTENT_SIZE pages. That is why we apply
+different rules in that special case, just ensuring that there are n_pages
+free pages available.
+
+@param[out]	n_reserved	number of extents actually reserved; if we
+				return true and the tablespace size is <
+				FSP_EXTENT_SIZE pages, then this can be 0,
+				otherwise it is n_ext
+@param[in]	space_id	tablespace identifier
+@param[in]	n_ext		number of extents to reserve
+@param[in]	alloc_type	page reservation type (FSP_BLOB, etc)
+@param[in,out]	mtr		the mini transaction
+@param[in]	n_pages		for small tablespaces (tablespace size is
+				less than FSP_EXTENT_SIZE), number of free
+				pages to reserve.
+@return true if we were able to make the reservation */
+bool
 fsp_reserve_free_extents(
-/*=====================*/
-	ulint*	n_reserved,/*!< out: number of extents actually reserved; if we
-			return TRUE and the tablespace size is < 64 pages,
-			then this can be 0, otherwise it is n_ext */
-	ulint	space,	/*!< in: space id */
-	ulint	n_ext,	/*!< in: number of extents to reserve */
-	ulint	alloc_type,/*!< in: FSP_NORMAL, FSP_UNDO, or FSP_CLEANING */
-	mtr_t*	mtr);	/*!< in: mini-transaction */
-/**********************************************************************//**
-This function should be used to get information on how much we still
-will be able to insert new data to the database without running out the
-tablespace. Only free extents are taken into account and we also subtract
-the safety margin required by the above function fsp_reserve_free_extents.
-@return	available space in kB */
-UNIV_INTERN
-ullint
+	ulint*		n_reserved,
+	ulint		space_id,
+	ulint		n_ext,
+	fsp_reserve_t	alloc_type,
+	mtr_t*		mtr,
+	ulint		n_pages = 2);
+
+/** Calculate how many KiB of new data we will be able to insert to the
+tablespace without running out of space.
+@param[in]	space_id	tablespace ID
+@return available space in KiB
+@retval UINTMAX_MAX if unknown */
+uintmax_t
 fsp_get_available_space_in_free_extents(
-/*====================================*/
-	ulint	space);	/*!< in: space id */
+	ulint		space_id);
+
+/** Calculate how many KiB of new data we will be able to insert to the
+tablespace without running out of space. Start with a space object that has
+been acquired by the caller who holds it for the calculation,
+@param[in]	space		tablespace object from fil_space_acquire()
+@return available space in KiB */
+uintmax_t
+fsp_get_available_space_in_free_extents(
+	const fil_space_t*	space);
+
 /**********************************************************************//**
 Frees a single page of a segment. */
-UNIV_INTERN
 void
 fseg_free_page(
 /*===========*/
 	fseg_header_t*	seg_header, /*!< in: segment header */
-	ulint		space,	/*!< in: space id */
+	ulint		space_id, /*!< in: space id */
 	ulint		page,	/*!< in: page offset */
+	bool		ahi,	/*!< in: whether we may need to drop
+				the adaptive hash index */
 	mtr_t*		mtr);	/*!< in/out: mini-transaction */
 /**********************************************************************//**
 Checks if a single page of a segment is free.
-@return	true if free */
-UNIV_INTERN
+@return true if free */
 bool
 fseg_page_is_free(
 /*==============*/
 	fseg_header_t*	seg_header,	/*!< in: segment header */
-	ulint		space,		/*!< in: space id */
+	ulint		space_id,	/*!< in: space id */
 	ulint		page)		/*!< in: page offset */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /**********************************************************************//**
@@ -657,8 +597,7 @@ Frees part of a segment. This function can be used to free a segment
 by repeatedly calling this function in different mini-transactions.
 Doing the freeing in a single mini-transaction might result in
 too big a mini-transaction.
-@return	TRUE if freeing completed */
-UNIV_INTERN
+@return TRUE if freeing completed */
 ibool
 fseg_free_step(
 /*===========*/
@@ -666,58 +605,47 @@ fseg_free_step(
 				resides on the first page of the frag list
 				of the segment, this pointer becomes obsolete
 				after the last freeing step */
-	mtr_t*		mtr);	/*!< in/out: mini-transaction */
+	bool		ahi,	/*!< in: whether we may need to drop
+				the adaptive hash index */
+	mtr_t*		mtr)	/*!< in/out: mini-transaction */
+	MY_ATTRIBUTE((warn_unused_result));
 /**********************************************************************//**
 Frees part of a segment. Differs from fseg_free_step because this function
 leaves the header page unfreed.
-@return	TRUE if freeing completed, except the header page */
-UNIV_INTERN
+@return TRUE if freeing completed, except the header page */
 ibool
 fseg_free_step_not_header(
 /*======================*/
 	fseg_header_t*	header,	/*!< in: segment header which must reside on
 				the first fragment page of the segment */
-	mtr_t*		mtr);	/*!< in/out: mini-transaction */
-/***********************************************************************//**
-Checks if a page address is an extent descriptor page address.
-@return	TRUE if a descriptor page */
+	bool		ahi,	/*!< in: whether we may need to drop
+				the adaptive hash index */
+	mtr_t*		mtr)	/*!< in/out: mini-transaction */
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Checks if a page address is an extent descriptor page address.
+@param[in]	page_id		page id
+@param[in]	page_size	page size
+@return TRUE if a descriptor page */
 UNIV_INLINE
 ibool
 fsp_descr_page(
-/*===========*/
-	ulint	zip_size,/*!< in: compressed page size in bytes;
-			0 for uncompressed pages */
-	ulint	page_no);/*!< in: page number */
+	const page_id_t&	page_id,
+	const page_size_t&	page_size);
+
 /***********************************************************//**
 Parses a redo log record of a file page init.
-@return	end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
 byte*
 fsp_parse_init_file_page(
 /*=====================*/
 	byte*		ptr,	/*!< in: buffer */
 	byte*		end_ptr, /*!< in: buffer end */
 	buf_block_t*	block);	/*!< in: block or NULL */
-/*******************************************************************//**
-Validates the file space system and its segments.
-@return	TRUE if ok */
-UNIV_INTERN
-ibool
-fsp_validate(
-/*=========*/
-	ulint	space);	/*!< in: space id */
-/*******************************************************************//**
-Prints info of a file space. */
-UNIV_INTERN
-void
-fsp_print(
-/*======*/
-	ulint	space);	/*!< in: space id */
 #ifdef UNIV_DEBUG
 /*******************************************************************//**
 Validates a segment.
-@return	TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
 ibool
 fseg_validate(
 /*==========*/
@@ -727,7 +655,6 @@ fseg_validate(
 #ifdef UNIV_BTR_PRINT
 /*******************************************************************//**
 Writes info of a segment. */
-UNIV_INTERN
 void
 fseg_print(
 /*=======*/
@@ -735,42 +662,95 @@ fseg_print(
 	mtr_t*		mtr);	/*!< in/out: mini-transaction */
 #endif /* UNIV_BTR_PRINT */
 
-/********************************************************************//**
-Validate and return the tablespace flags, which are stored in the
-tablespace header at offset FSP_SPACE_FLAGS.  They should be 0 for
-ROW_FORMAT=COMPACT and ROW_FORMAT=REDUNDANT. The newer row formats,
-COMPRESSED and DYNAMIC, use a file format > Antelope so they should
-have a file format number plus the DICT_TF_COMPACT bit set.
-@return	true if check ok */
+/** Determine if the tablespace is compressed from tablespace flags.
+@param[in]	flags	Tablespace flags
+@return true if compressed, false if not compressed */
 UNIV_INLINE
 bool
-fsp_flags_is_valid(
-/*===============*/
-	ulint	flags)		/*!< in: tablespace flags */
-	MY_ATTRIBUTE((warn_unused_result, const));
-/********************************************************************//**
-Determine if the tablespace is compressed from dict_table_t::flags.
-@return	TRUE if compressed, FALSE if not compressed */
-UNIV_INLINE
-ibool
 fsp_flags_is_compressed(
-/*====================*/
-	ulint	flags);	/*!< in: tablespace flags */
+	ulint	flags);
 
-/********************************************************************//**
-Calculates the descriptor index within a descriptor page.
-@return	descriptor index */
+/** Determine if two tablespaces are equivalent or compatible.
+@param[in]	flags1	First tablespace flags
+@param[in]	flags2	Second tablespace flags
+@return true the flags are compatible, false if not */
+UNIV_INLINE
+bool
+fsp_flags_are_equal(
+	ulint	flags1,
+	ulint	flags2);
+
+/** Initialize an FSP flags integer.
+@param[in]	page_size	page sizes in bytes and compression flag.
+@param[in]	atomic_blobs	Used by Dynammic and Compressed.
+@param[in]	has_data_dir	This tablespace is in a remote location.
+@param[in]	is_shared	This tablespace can be shared by many tables.
+@param[in]	is_temporary	This tablespace is temporary.
+@param[in]	is_encrypted	This tablespace is encrypted.
+@return tablespace flags after initialization */
+UNIV_INLINE
+ulint
+fsp_flags_init(
+	const page_size_t&	page_size,
+	bool			atomic_blobs,
+	bool			has_data_dir,
+	bool			is_shared,
+	bool			is_temporary,
+	bool			page_compression,
+	ulint			page_compression_level,
+	ulint			atomic_writes,
+	bool			is_encrypted = false);
+
+/** Convert a 32 bit integer tablespace flags to the 32 bit table flags.
+This can only be done for a tablespace that was built as a file-per-table
+tablespace. Note that the fsp_flags cannot show the difference between a
+Compact and Redundant table, so an extra Compact boolean must be supplied.
+			Low order bit
+                    | REDUNDANT | COMPACT | COMPRESSED | DYNAMIC
+fil_space_t::flags  |     0     |    0    |     1      |    1
+dict_table_t::flags |     0     |    1    |     1      |    1
+@param[in]	fsp_flags	fil_space_t::flags
+@param[in]	compact		true if not Redundant row format
+@return tablespace flags (fil_space_t::flags) */
+ulint
+fsp_flags_to_dict_tf(
+	ulint	fsp_flags,
+	bool	compact);
+
+/** Calculates the descriptor index within a descriptor page.
+@param[in]	page_size	page size
+@param[in]	offset		page offset
+@return descriptor index */
 UNIV_INLINE
 ulint
 xdes_calc_descriptor_index(
-/*=======================*/
-	ulint	zip_size,	/*!< in: compressed page size in bytes;
-				0 for uncompressed pages */
-	ulint	offset);	/*!< in: page offset */
+	const page_size_t&	page_size,
+	ulint			offset);
+
+/** Gets pointer to a the extent descriptor of a page.
+The page where the extent descriptor resides is x-locked. If the page offset
+is equal to the free limit of the space, adds new extents from above the free
+limit to the space free list, if not free limit == space size. This adding
+is necessary to make the descriptor defined, as they are uninitialized
+above the free limit.
+@param[in]	space_id	space id
+@param[in]	offset		page offset; if equal to the free limit, we
+try to add new extents to the space free list
+@param[in]	page_size	page size
+@param[in,out]	mtr		mini-transaction
+@return pointer to the extent descriptor, NULL if the page does not
+exist in the space or if the offset exceeds the free limit */
+xdes_t*
+xdes_get_descriptor(
+	ulint			space_id,
+	ulint			offset,
+	const page_size_t&	page_size,
+	mtr_t*			mtr)
+MY_ATTRIBUTE((warn_unused_result));
 
 /**********************************************************************//**
 Gets a descriptor bit of a page.
-@return	TRUE if free */
+@return TRUE if free */
 UNIV_INLINE
 ibool
 xdes_get_bit(
@@ -780,51 +760,30 @@ xdes_get_bit(
 	ulint		offset);/*!< in: page offset within extent:
 				0 ... FSP_EXTENT_SIZE - 1 */
 
-/********************************************************************//**
-Calculates the page where the descriptor of a page resides.
-@return	descriptor page offset */
+/** Calculates the page where the descriptor of a page resides.
+@param[in]	page_size	page size
+@param[in]	offset		page offset
+@return descriptor page offset */
 UNIV_INLINE
 ulint
 xdes_calc_descriptor_page(
-/*======================*/
-	ulint	zip_size,	/*!< in: compressed page size in bytes;
-				0 for uncompressed pages */
-	ulint	offset);	/*!< in: page offset */
+	const page_size_t&	page_size,
+	ulint			offset);
 
 #endif /* !UNIV_INNOCHECKSUM */
 
-/********************************************************************//**
-Extract the zip size from tablespace flags.  A tablespace has only one
-physical page size whether that page is compressed or not.
-@return	compressed page size of the file-per-table tablespace in bytes,
-or zero if the table is not compressed.  */
-UNIV_INLINE
-ulint
-fsp_flags_get_zip_size(
-/*====================*/
-	ulint	flags);		/*!< in: tablespace flags */
-/********************************************************************//**
-Extract the page size from tablespace flags.
-@return	page size of the tablespace in bytes */
-UNIV_INLINE
-ulint
-fsp_flags_get_page_size(
-/*====================*/
-	ulint	flags);		/*!< in: tablespace flags */
-
-/*********************************************************************/
-/* @return offset into fsp header where crypt data is stored */
+/*********************************************************************//**
+@return offset into fsp header where crypt data is stored */
 UNIV_INTERN
 ulint
 fsp_header_get_crypt_offset(
 /*========================*/
-	ulint zip_size,		/*!< in: zip_size */
+	const page_size_t&	page_size,/*!< in: page size */
 	ulint* max_size);	/*!< out: free space after offset */
 
-#define fsp_page_is_free(space,page,mtr) \
-	fsp_page_is_free_func(space,page,mtr, __FILE__, __LINE__)
 
 #ifndef UNIV_INNOCHECKSUM
+
 /**********************************************************************//**
 Checks if a single page is free.
 @return	true if free */
@@ -837,7 +796,11 @@ fsp_page_is_free_func(
 	mtr_t*		mtr,		/*!< in/out: mini-transaction */
 	const char *file,
 	ulint line);
-#endif
+
+#define fsp_page_is_free(space,page,mtr)				\
+	fsp_page_is_free_func(space,page,mtr, __FILE__, __LINE__)
+
+#endif /* UNIV_INNOCHECKSUM */
 
 #ifndef UNIV_NONINL
 #include "fsp0fsp.ic"
diff --git a/storage/innobase/include/fsp0fsp.ic b/storage/innobase/include/fsp0fsp.ic
index 9f09a9d53e1..475dd238728 100644
--- a/storage/innobase/include/fsp0fsp.ic
+++ b/storage/innobase/include/fsp0fsp.ic
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
+Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, 2016, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -26,260 +26,255 @@ Created 12/18/1995 Heikki Tuuri
 
 #ifndef UNIV_INNOCHECKSUM
 
-/***********************************************************************//**
-Checks if a page address is an extent descriptor page address.
-@return	TRUE if a descriptor page */
+/** Checks if a page address is an extent descriptor page address.
+@param[in]	page_id		page id
+@param[in]	page_size	page size
+@return TRUE if a descriptor page */
 UNIV_INLINE
 ibool
 fsp_descr_page(
-/*===========*/
-	ulint	zip_size,/*!< in: compressed page size in bytes;
-			0 for uncompressed pages */
-	ulint	page_no)/*!< in: page number */
+	const page_id_t&	page_id,
+	const page_size_t&	page_size)
 {
-	ut_ad(ut_is_2pow(zip_size));
-
-	if (!zip_size) {
-		return((page_no & (UNIV_PAGE_SIZE - 1)) == FSP_XDES_OFFSET);
-	}
-
-	return((page_no & (zip_size - 1)) == FSP_XDES_OFFSET);
+	return((page_id.page_no() & (page_size.physical() - 1))
+	       == FSP_XDES_OFFSET);
 }
 
-/********************************************************************//**
-Validate and return the tablespace flags, which are stored in the
-tablespace header at offset FSP_SPACE_FLAGS.  They should be 0 for
-ROW_FORMAT=COMPACT and ROW_FORMAT=REDUNDANT. The newer row formats,
-COMPRESSED and DYNAMIC, use a file format > Antelope so they should
-have a file format number plus the DICT_TF_COMPACT bit set.
-@return	true if check ok */
+/** Determine if the tablespace is compressed from tablespace flags.
+@param[in]	flags	Tablespace flags
+@return true if compressed, false if not compressed */
 UNIV_INLINE
 bool
-fsp_flags_is_valid(
-/*===============*/
-	ulint	flags)		/*!< in: tablespace flags */
-{
-	ulint	post_antelope = FSP_FLAGS_GET_POST_ANTELOPE(flags);
-	ulint	zip_ssize = FSP_FLAGS_GET_ZIP_SSIZE(flags);
-	ulint	atomic_blobs = FSP_FLAGS_HAS_ATOMIC_BLOBS(flags);
-	ulint	page_ssize = FSP_FLAGS_GET_PAGE_SSIZE(flags);
-	ulint	unused = FSP_FLAGS_GET_UNUSED(flags);
-	ulint	page_compression = FSP_FLAGS_GET_PAGE_COMPRESSION(flags);
-	ulint	page_compression_level = FSP_FLAGS_GET_PAGE_COMPRESSION_LEVEL(flags);
-	ulint	atomic_writes = FSP_FLAGS_GET_ATOMIC_WRITES(flags);
-
-	DBUG_EXECUTE_IF("fsp_flags_is_valid_failure", return(false););
-
-	/* fsp_flags is zero unless atomic_blobs is set. */
-	/* Make sure there are no bits that we do not know about. */
-	if (unused != 0 || flags == 1) {
-		fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted unused %lu\n",
-			flags, unused);
-		return(false);
-	} else if (post_antelope) {
-		/* The Antelope row formats REDUNDANT and COMPACT did
-		not use tablespace flags, so this flag and the entire
-		4-byte field is zero for Antelope row formats. */
-
-		if (!atomic_blobs) {
-			fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted atomic_blobs %lu\n",
-				flags, atomic_blobs);
-			return(false);
-		}
-	}
-
-	if (!atomic_blobs) {
-		/* Barracuda row formats COMPRESSED and DYNAMIC build on
-		the page structure introduced for the COMPACT row format
-		by allowing long fields to be broken into prefix and
-		externally stored parts. */
-
-		if (post_antelope || zip_ssize != 0) {
-			fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted zip_ssize %lu atomic_blobs %lu\n",
-				flags, zip_ssize, atomic_blobs);
-			return(false);
-		}
-
-	} else if (!post_antelope || zip_ssize > PAGE_ZIP_SSIZE_MAX) {
-		fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted zip_ssize %lu max %d\n",
-			flags, zip_ssize, PAGE_ZIP_SSIZE_MAX);
-		return(false);
-	} else if (page_ssize > UNIV_PAGE_SSIZE_MAX) {
-
-		/* The page size field can be used for any row type, or it may
-		be zero for an original 16k page size.
-		Validate the page shift size is within allowed range. */
-
-		fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted page_ssize %lu max %lu\n",
-			flags, page_ssize, UNIV_PAGE_SSIZE_MAX);
-		return(false);
-
-	} else if (UNIV_PAGE_SIZE != UNIV_PAGE_SIZE_ORIG && !page_ssize) {
-		fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted page_ssize %lu max %lu:%d\n",
-			flags, page_ssize, UNIV_PAGE_SIZE, UNIV_PAGE_SIZE_ORIG);
-		return(false);
-	}
-
-	/* Page compression level requires page compression and atomic blobs
-	to be set */
-        if (page_compression_level || page_compression) {
-		if (!page_compression || !atomic_blobs) {
-			fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted page_compression %lu\n"
-				"InnoDB: Error: page_compression_level %lu atomic_blobs %lu\n",
-				flags, page_compression, page_compression_level, atomic_blobs);
-			return(false);
-		}
-	}
-
-	if (atomic_writes > ATOMIC_WRITES_OFF) {
-		fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted atomic_writes %lu\n",
-			flags, atomic_writes);
-		return (false);
-	}
-
-#if UNIV_FORMAT_MAX != UNIV_FORMAT_B
-# error "UNIV_FORMAT_MAX != UNIV_FORMAT_B, Add more validations."
-#endif
-
-	/* The DATA_DIR field can be used for any row type so there is
-	nothing here to validate. */
-
-	return(true);
-}
-
-/********************************************************************//**
-Determine if the tablespace is compressed from dict_table_t::flags.
-@return	TRUE if compressed, FALSE if not compressed */
-UNIV_INLINE
-ibool
 fsp_flags_is_compressed(
-/*====================*/
-	ulint	flags)	/*!< in: tablespace flags */
+	ulint	flags)
 {
 	return(FSP_FLAGS_GET_ZIP_SSIZE(flags) != 0);
 }
 
-#endif /* !UNIV_INNOCHECKSUM */
+#define ACTUAL_SSIZE(ssize)	(0 == ssize ? UNIV_PAGE_SSIZE_ORIG : ssize)
 
-/********************************************************************//**
-Extract the zip size from tablespace flags.
-@return	compressed page size of the file-per-table tablespace in bytes,
-or zero if the table is not compressed. */
+/** Determine if two tablespaces are equivalent or compatible.
+@param[in]	flags1	First tablespace flags
+@param[in]	flags2	Second tablespace flags
+@return true the flags are compatible, false if not */
 UNIV_INLINE
-ulint
-fsp_flags_get_zip_size(
-/*===================*/
-	ulint	flags)	/*!< in: tablespace flags */
+bool
+fsp_flags_are_equal(
+	ulint	flags1,
+	ulint	flags2)
 {
-	ulint	zip_size = 0;
-	ulint	ssize = FSP_FLAGS_GET_ZIP_SSIZE(flags);
-
-	/* Convert from a 'log2 minus 9' to a page size in bytes. */
-	if (ssize) {
-		zip_size = ((UNIV_ZIP_SIZE_MIN >> 1) << ssize);
-
-		ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX);
+	/* If either one of these flags is ULINT_UNDEFINED,
+	then they are not equal */
+	if (flags1 == ULINT_UNDEFINED || flags2 == ULINT_UNDEFINED) {
+		return(false);
 	}
 
-	return(zip_size);
-}
-
-/********************************************************************//**
-Extract the page size from tablespace flags.
-@return	page size of the tablespace in bytes */
-UNIV_INLINE
-ulint
-fsp_flags_get_page_size(
-/*====================*/
-	ulint	flags)	/*!< in: tablespace flags */
-{
-	ulint	page_size = 0;
-	ulint	ssize = FSP_FLAGS_GET_PAGE_SSIZE(flags);
-
-	/* Convert from a 'log2 minus 9' to a page size in bytes. */
-	if (UNIV_UNLIKELY(ssize)) {
-		page_size = ((UNIV_ZIP_SIZE_MIN >> 1) << ssize);
-
-		ut_ad(page_size <= UNIV_PAGE_SIZE_MAX);
-	} else {
-		/* If the page size was not stored, then it is the
-		original 16k. */
-		page_size = UNIV_PAGE_SIZE_ORIG;
+	if (!fsp_is_shared_tablespace(flags1) || !fsp_is_shared_tablespace(flags2)) {
+		/* At least one of these is a single-table tablespaces so all
+		flags must match. */
+		return(flags1 == flags2);
 	}
 
-	return(page_size);
+	/* Both are shared tablespaces which can contain all formats.
+	But they must have the same logical and physical page size.
+	Once InnoDB can support multiple page sizes together,
+	the logical page size will not matter. */
+	ulint zip_ssize1 = ACTUAL_SSIZE(FSP_FLAGS_GET_ZIP_SSIZE(flags1));
+	ulint zip_ssize2 = ACTUAL_SSIZE(FSP_FLAGS_GET_ZIP_SSIZE(flags2));
+	ulint page_ssize1 = ACTUAL_SSIZE(FSP_FLAGS_GET_PAGE_SSIZE(flags1));
+	ulint page_ssize2 = ACTUAL_SSIZE(FSP_FLAGS_GET_PAGE_SSIZE(flags2));
+
+	return(zip_ssize1 == zip_ssize2 && page_ssize1 == page_ssize2);
 }
 
-#ifndef UNIV_INNOCHECKSUM
-
-/********************************************************************//**
-Add the page size to the tablespace flags.
-@return	tablespace flags after page size is added */
+/** Convert a page size, which is a power of 2, to an ssize, which is
+the number of bit shifts from 512 to make that page size.
+@param[in]	page_size	compressed page size in bytes
+@return an ssize created from the page size provided. */
 UNIV_INLINE
 ulint
-fsp_flags_set_page_size(
-/*====================*/
-	ulint	flags,		/*!< in: tablespace flags */
-	ulint	page_size)	/*!< in: page size in bytes */
+page_size_to_ssize(
+	ulint	page_size)
 {
-	ulint ssize = 0;
-	ulint shift;
+	ulint ssize;
 
-	/* Page size should be > UNIV_PAGE_SIZE_MIN */
-	ut_ad(page_size >= UNIV_PAGE_SIZE_MIN);
-	ut_ad(page_size <= UNIV_PAGE_SIZE_MAX);
+	for (ssize = UNIV_ZIP_SIZE_SHIFT_MIN;
+	     ((ulint) 1 << ssize) < page_size;
+	     ssize++) {};
 
-	if (page_size == UNIV_PAGE_SIZE_ORIG) {
-		ut_ad(0 == FSP_FLAGS_GET_PAGE_SSIZE(flags));
+	return(ssize - UNIV_ZIP_SIZE_SHIFT_MIN + 1);
+}
+
+/** Add the compressed page size to the tablespace flags.
+@param[in]	flags		Tablespace flags
+@param[in]	page_size	page sizes in bytes and compression flag.
+@return tablespace flags after zip size is added */
+UNIV_INLINE
+ulint
+fsp_flags_set_zip_size(
+	ulint			flags,
+	const page_size_t&	page_size)
+{
+	if (!page_size.is_compressed()) {
 		return(flags);
 	}
 
-	for (shift = UNIV_PAGE_SIZE_SHIFT_MAX;
-	     shift >= UNIV_PAGE_SIZE_SHIFT_MIN;
-	     shift--) {
-		ulint	mask = (1 << shift);
-		if (page_size & mask) {
-			ut_ad(!(page_size & ~mask));
-			ssize = shift - UNIV_ZIP_SIZE_SHIFT_MIN + 1;
-			break;
-		}
-	}
+	/* Zip size should be a power of 2 between UNIV_ZIP_SIZE_MIN
+	and UNIV_ZIP_SIZE_MAX */
+	ut_ad(page_size.physical() >= UNIV_ZIP_SIZE_MIN);
+	ut_ad(page_size.physical() <= UNIV_ZIP_SIZE_MAX);
+	ut_ad(ut_is_2pow(page_size.physical()));
 
-	ut_ad(ssize);
+	ulint	ssize = page_size_to_ssize(page_size.physical());
+
+	ut_ad(ssize > 0);
 	ut_ad(ssize <= UNIV_PAGE_SSIZE_MAX);
 
-	flags = FSP_FLAGS_SET_PAGE_SSIZE(flags, ssize);
+	flags |= (ssize << FSP_FLAGS_POS_ZIP_SSIZE);
 
 	ut_ad(fsp_flags_is_valid(flags));
 
 	return(flags);
 }
 
-/********************************************************************//**
-Calculates the descriptor index within a descriptor page.
-@return	descriptor index */
+/** Add the page size to the tablespace flags.
+@param[in]	flags		Tablespace flags
+@param[in]	page_size	page sizes in bytes and compression flag.
+@return tablespace flags after page size is added */
+UNIV_INLINE
+ulint
+fsp_flags_set_page_size(
+	ulint			flags,
+	const page_size_t&	page_size)
+{
+	/* Page size should be a power of two between UNIV_PAGE_SIZE_MIN
+	and UNIV_PAGE_SIZE */
+	ut_ad(page_size.logical() >= UNIV_PAGE_SIZE_MIN);
+	ut_ad(page_size.logical() <= UNIV_PAGE_SIZE_MAX);
+	ut_ad(ut_is_2pow(page_size.logical()));
+
+	/* Remove this assert once we add support for different
+	page size per tablespace. Currently all tablespaces must
+	have a page size that is equal to innodb-page-size */
+	ut_ad(page_size.logical() == UNIV_PAGE_SIZE);
+
+	if (page_size.logical() == UNIV_PAGE_SIZE_ORIG) {
+		ut_ad(0 == FSP_FLAGS_GET_PAGE_SSIZE(flags));
+
+	} else {
+		ulint	ssize = page_size_to_ssize(page_size.logical());
+
+		ut_ad(ssize);
+		ut_ad(ssize <= UNIV_PAGE_SSIZE_MAX);
+
+		flags |= (ssize << FSP_FLAGS_POS_PAGE_SSIZE);
+	}
+
+	ut_ad(fsp_flags_is_valid(flags));
+
+	return(flags);
+}
+
+/** Initialize an FSP flags integer.
+@param[in]	page_size	page sizes in bytes and compression flag.
+@param[in]	atomic_blobs	Used by Dynammic and Compressed.
+@param[in]	has_data_dir	This tablespace is in a remote location.
+@param[in]	is_shared	This tablespace can be shared by many tables.
+@param[in]	is_temporary	This tablespace is temporary.
+@param[in]	is_encrypted	This tablespace is encrypted.
+@param[in]	page_compressed Table uses page compression
+@param[in]	page_compression_level Page compression level
+@param[in]	atomic_writes	Table uses atomic writes
+@@return tablespace flags after initialization */
+UNIV_INLINE
+ulint
+fsp_flags_init(
+	const page_size_t&	page_size,
+	bool			atomic_blobs,
+	bool			has_data_dir,
+	bool			is_shared,
+	bool			is_temporary,
+	bool			page_compression,
+	ulint			page_compression_level,
+	ulint			atomic_writes,
+	bool			is_encrypted)
+{
+	ut_ad(page_size.physical() <= page_size.logical());
+	ut_ad(!page_size.is_compressed() || atomic_blobs);
+
+	/* Page size should be a power of two between UNIV_PAGE_SIZE_MIN
+	and UNIV_PAGE_SIZE, but zip_size may be 0 if not compressed. */
+	ulint flags = fsp_flags_set_page_size(0, page_size);
+
+	if (atomic_blobs) {
+		flags |= FSP_FLAGS_MASK_POST_ANTELOPE
+			| FSP_FLAGS_MASK_ATOMIC_BLOBS;
+	}
+
+	/* If the zip_size is explicit and different from the default,
+	compressed row format is implied. */
+	flags = fsp_flags_set_zip_size(flags, page_size);
+
+	if (has_data_dir) {
+		flags |= FSP_FLAGS_MASK_DATA_DIR;
+	}
+
+	/* Shared tablespaces can hold all row formats, so we only mark the
+	POST_ANTELOPE and ATOMIC_BLOB bits if it is compressed. */
+	if (is_shared) {
+		ut_ad(!has_data_dir);
+		flags |= FSP_FLAGS_MASK_SHARED;
+	}
+
+	if (is_temporary) {
+		ut_ad(!has_data_dir);
+		flags |= FSP_FLAGS_MASK_TEMPORARY;
+	}
+
+	if (is_encrypted) {
+		flags |= FSP_FLAGS_MASK_ENCRYPTION;
+	}
+
+	/* In addition, tablespace flags also contain if the page
+	compression is used for this table. */
+	if (page_compression) {
+		flags |= FSP_FLAGS_SET_PAGE_COMPRESSION(flags, page_compression);
+	}
+
+	/* In addition, tablespace flags also contain page compression level
+	if page compression is used for this table. */
+	if (page_compression && page_compression_level) {
+		flags |= FSP_FLAGS_SET_PAGE_COMPRESSION_LEVEL(flags, page_compression_level);
+	}
+
+	/* In addition, tablespace flags also contain flag if atomic writes
+	is used for this table */
+	if (atomic_writes) {
+		flags |= FSP_FLAGS_SET_ATOMIC_WRITES(flags, atomic_writes);
+	}
+
+	ut_ad(fsp_flags_is_valid(flags));
+
+	return(flags);
+}
+
+/** Calculates the descriptor index within a descriptor page.
+@param[in]	page_size	page size
+@param[in]	offset		page offset
+@return descriptor index */
 UNIV_INLINE
 ulint
 xdes_calc_descriptor_index(
-/*=======================*/
-	ulint	zip_size,	/*!< in: compressed page size in bytes;
-				0 for uncompressed pages */
-	ulint	offset)		/*!< in: page offset */
+	const page_size_t&	page_size,
+	ulint			offset)
 {
-	ut_ad(ut_is_2pow(zip_size));
-
-	if (zip_size == 0) {
-		return(ut_2pow_remainder(offset, UNIV_PAGE_SIZE)
-		       / FSP_EXTENT_SIZE);
-	} else {
-		return(ut_2pow_remainder(offset, zip_size) / FSP_EXTENT_SIZE);
-	}
+	return(ut_2pow_remainder(offset, page_size.physical())
+	       / FSP_EXTENT_SIZE);
 }
-#endif /* !UNIV_INNOCHECKSUM */
 
 /**********************************************************************//**
 Gets a descriptor bit of a page.
-@return	TRUE if free */
+@return TRUE if free */
 UNIV_INLINE
 ibool
 xdes_get_bit(
@@ -303,17 +298,15 @@ xdes_get_bit(
 			bit_index));
 }
 
-#ifndef UNIV_INNOCHECKSUM
-/********************************************************************//**
-Calculates the page where the descriptor of a page resides.
-@return	descriptor page offset */
+/** Calculates the page where the descriptor of a page resides.
+@param[in]	page_size	page size
+@param[in]	offset		page offset
+@return descriptor page offset */
 UNIV_INLINE
 ulint
 xdes_calc_descriptor_page(
-/*======================*/
-	ulint	zip_size,	/*!< in: compressed page size in bytes;
-				0 for uncompressed pages */
-	ulint	offset)		/*!< in: page offset */
+	const page_size_t&	page_size,
+	ulint			offset)
 {
 #ifndef DOXYGEN /* Doxygen gets confused by these */
 # if UNIV_PAGE_SIZE_MAX <= XDES_ARR_OFFSET				\
@@ -335,16 +328,24 @@ xdes_calc_descriptor_page(
 	      + (UNIV_ZIP_SIZE_MIN / FSP_EXTENT_SIZE)
 	      * XDES_SIZE);
 
-	ut_ad(ut_is_2pow(zip_size));
-
-	if (zip_size == 0) {
-		return(ut_2pow_round(offset, UNIV_PAGE_SIZE));
-	} else {
-		ut_ad(zip_size > XDES_ARR_OFFSET
-		      + (zip_size / FSP_EXTENT_SIZE) * XDES_SIZE);
-		return(ut_2pow_round(offset, zip_size));
+#ifdef UNIV_DEBUG
+	if (page_size.is_compressed()) {
+		ut_a(page_size.physical() > XDES_ARR_OFFSET
+		     + (page_size.physical() / FSP_EXTENT_SIZE) * XDES_SIZE);
 	}
+#endif /* UNIV_DEBUG */
+
+	return(ut_2pow_round(offset, page_size.physical()));
 }
 
+/** Calculates the descriptor array size.
+@param[in]	page_size	page size
+@return size of descriptor array */
+UNIV_INLINE
+ulint
+xdes_arr_size(
+	const page_size_t&	page_size)
+{
+	return(page_size.physical()/FSP_EXTENT_SIZE);
+}
 #endif /* !UNIV_INNOCHECKSUM */
-
diff --git a/storage/innobase/include/fsp0pagecompress.h b/storage/innobase/include/fsp0pagecompress.h
index 5f943ee2b83..44bdddfa3bf 100644
--- a/storage/innobase/include/fsp0pagecompress.h
+++ b/storage/innobase/include/fsp0pagecompress.h
@@ -51,7 +51,7 @@ fsp_header_get_compression_level(
 Determine if the tablespace is page compressed from dict_table_t::flags.
 @return	TRUE if page compressed, FALSE if not compressed */
 UNIV_INLINE
-ibool
+bool
 fsp_flags_is_page_compressed(
 /*=========================*/
 	ulint	flags);	/*!< in: tablespace flags */
diff --git a/storage/innobase/include/fsp0pagecompress.ic b/storage/innobase/include/fsp0pagecompress.ic
index e879aa2c16e..a3971da6772 100644
--- a/storage/innobase/include/fsp0pagecompress.ic
+++ b/storage/innobase/include/fsp0pagecompress.ic
@@ -29,7 +29,7 @@ Created 11/12/2013 Jan Lindström jan.lindstrom@mariadb.com
 Determine if the tablespace is page compressed from dict_table_t::flags.
 @return	TRUE if page compressed, FALSE if not page compressed */
 UNIV_INLINE
-ibool
+bool
 fsp_flags_is_page_compressed(
 /*=========================*/
 	ulint	flags)	/*!< in: tablespace flags */
@@ -65,7 +65,7 @@ fsp_flags_get_atomic_writes(
 Find out wheather the page is index page or not
 @return	true if page type index page, false if not */
 UNIV_INLINE
-ibool
+bool
 fil_page_is_index_page(
 /*===================*/
 	byte*	buf)	/*!< in: page */
@@ -77,10 +77,10 @@ fil_page_is_index_page(
 Find out wheather the page is page compressed
 @return	true if page is page compressed, false if not */
 UNIV_INLINE
-ibool
+bool
 fil_page_is_compressed(
 /*===================*/
-	byte*	buf)	/*!< in: page */
+	const byte*	buf)	/*!< in: page */
 {
 	return(mach_read_from_2(buf+FIL_PAGE_TYPE) == FIL_PAGE_PAGE_COMPRESSED);
 }
@@ -89,10 +89,10 @@ fil_page_is_compressed(
 Find out wheather the page is page compressed
 @return	true if page is page compressed, false if not */
 UNIV_INLINE
-ibool
+bool
 fil_page_is_compressed_encrypted(
 /*=============================*/
-	byte*	buf)	/*!< in: page */
+	const byte*	buf)	/*!< in: page */
 {
 	return(mach_read_from_2(buf+FIL_PAGE_TYPE) == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED);
 }
@@ -125,7 +125,7 @@ Extract the page compression from space.
 @return true if space is page compressed, false if space is not found
 or space is not page compressed. */
 UNIV_INLINE
-ibool
+bool
 fil_space_is_page_compressed(
 /*=========================*/
 	ulint	id)	/*!< in: space id */
@@ -209,7 +209,7 @@ fil_space_get_atomic_writes(
 Find out wheather the page is page compressed with lzo method
 @return	true if page is page compressed with lzo method, false if not */
 UNIV_INLINE
-ibool
+bool
 fil_page_is_lzo_compressed(
 /*=======================*/
 	byte*	buf)	/*!< in: page */
diff --git a/storage/innobase/include/fsp0space.h b/storage/innobase/include/fsp0space.h
new file mode 100644
index 00000000000..603c71b4aa6
--- /dev/null
+++ b/storage/innobase/include/fsp0space.h
@@ -0,0 +1,256 @@
+/*****************************************************************************
+
+Copyright (c) 2013, 2016, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/fsp0space.h
+General shared tablespace implementation.
+
+Created 2013-7-26 by Kevin Lewis
+*******************************************************/
+
+#ifndef fsp0space_h
+#define fsp0space_h
+
+#include "univ.i"
+#include "fsp0file.h"
+#include "fsp0fsp.h"
+#include "fsp0types.h"
+#include "ut0new.h"
+
+#include <vector>
+
+/** Data structure that contains the information about shared tablespaces.
+Currently this can be the system tablespace or a temporary table tablespace */
+class Tablespace {
+
+public:
+	typedef std::vector<Datafile, ut_allocator<Datafile> >	files_t;
+
+	/** Data file information - each Datafile can be accessed globally */
+	files_t		m_files;
+
+	Tablespace()
+		:
+		m_files(),
+		m_name(),
+		m_space_id(ULINT_UNDEFINED),
+		m_path(),
+		m_flags(),
+		m_ignore_read_only(false)
+	{
+		/* No op */
+	}
+
+	virtual ~Tablespace()
+	{
+		shutdown();
+		ut_ad(m_files.empty());
+		ut_ad(m_space_id == ULINT_UNDEFINED);
+		if (m_name != NULL) {
+			ut_free(m_name);
+			m_name = NULL;
+		}
+		if (m_path != NULL) {
+			ut_free(m_path);
+			m_path = NULL;
+		}
+	}
+
+	// Disable copying
+	Tablespace(const Tablespace&);
+	Tablespace& operator=(const Tablespace&);
+
+	/** Set tablespace name
+	@param[in]	name	tablespace name */
+	void set_name(const char* name)
+	{
+		ut_ad(m_name == NULL);
+		m_name = mem_strdup(name);
+		ut_ad(m_name != NULL);
+	}
+
+	/** Get tablespace name
+	@return tablespace name */
+	const char* name()	const
+	{
+		return(m_name);
+	}
+
+	/** Set tablespace path and filename members.
+	@param[in]	path	where tablespace file(s) resides
+	@param[in]	len	length of the file path */
+	void set_path(const char* path, size_t len)
+	{
+		ut_ad(m_path == NULL);
+		m_path = mem_strdupl(path, len);
+		ut_ad(m_path != NULL);
+
+		os_normalize_path(m_path);
+	}
+
+	/** Set tablespace path and filename members.
+	@param[in]	path	where tablespace file(s) resides */
+	void set_path(const char* path)
+	{
+		set_path(path, strlen(path));
+	}
+
+	/** Get tablespace path
+	@return tablespace path */
+	const char* path()	const
+	{
+		return(m_path);
+	}
+
+	/** Set the space id of the tablespace
+	@param[in]	space_id	 tablespace ID to set */
+	void set_space_id(ulint space_id)
+	{
+		ut_ad(m_space_id == ULINT_UNDEFINED);
+		m_space_id = space_id;
+	}
+
+	/** Get the space id of the tablespace
+	@return m_space_id space id of the tablespace */
+	ulint space_id()	const
+	{
+		return(m_space_id);
+	}
+
+	/** Set the tablespace flags
+	@param[in]	fsp_flags	tablespace flags */
+	void set_flags(ulint fsp_flags)
+	{
+		ut_ad(fsp_flags_is_valid(fsp_flags));
+		m_flags = fsp_flags;
+	}
+
+	/** Get the tablespace flags
+	@return m_flags tablespace flags */
+	ulint flags()	const
+	{
+		return(m_flags);
+	}
+
+	/** Get the tablespace encryption mode
+	@return m_mode tablespace encryption mode */
+	fil_encryption_t encryption_mode() const
+	{
+		return (m_mode);
+	}
+
+	/** Get the tablespace encryption key_id
+	@return m_key_id tablespace encryption key_id */
+	ulint key_id() const
+	{
+		return (m_key_id);
+	}
+
+	/** Set Ignore Read Only Status for tablespace.
+	@param[in]	read_only_status	read only status indicator */
+	void set_ignore_read_only(bool read_only_status)
+	{
+		m_ignore_read_only = read_only_status;
+	}
+
+	/** Free the memory allocated by the Tablespace object */
+	void shutdown();
+
+	/** @return the sum of the file sizes of each Datafile */
+	ulint get_sum_of_sizes() const
+	{
+		ulint	sum = 0;
+
+		for (files_t::const_iterator it = m_files.begin();
+		     it != m_files.end(); ++it) {
+			sum += it->m_size;
+		}
+
+		return(sum);
+	}
+
+	/** Open or Create the data files if they do not exist.
+	@param[in]	is_temp	whether this is a temporary tablespace
+	@return DB_SUCCESS or error code */
+	dberr_t open_or_create(bool is_temp)
+		MY_ATTRIBUTE((warn_unused_result));
+
+	/** Delete all the data files. */
+	void delete_files();
+
+	/** Check if two tablespaces have common data file names.
+	@param[in]	other_space	Tablespace to check against this.
+	@return true if they have the same data filenames and paths */
+	bool intersection(const Tablespace* other_space);
+
+	/** Use the ADD DATAFILE path to create a Datafile object and add
+	it to the front of m_files. Parse the datafile path into a path
+	and a basename with extension 'ibd'. This datafile_path provided
+	may be an absolute or relative path, but it must end with the
+	extension .ibd and have a basename of at least 1 byte.
+
+	Set tablespace m_path member and add a Datafile with the filename.
+	@param[in]	datafile_path	full path of the tablespace file. */
+	dberr_t add_datafile(
+		const char*	datafile_path);
+
+	/* Return a pointer to the first Datafile for this Tablespace
+	@return pointer to the first Datafile for this Tablespace*/
+	Datafile* first_datafile()
+	{
+		ut_a(!m_files.empty());
+		return(&m_files.front());
+	}
+
+	/** Check if undo tablespace.
+	@return true if undo tablespace */
+	static bool is_undo_tablespace(ulint id);
+private:
+	/**
+	@param[in]	filename	Name to lookup in the data files.
+	@return true if the filename exists in the data files */
+	bool find(const char* filename);
+
+	/** Note that the data file was found.
+	@param[in]	file	data file object */
+	void file_found(Datafile& file);
+
+	/* DATA MEMBERS */
+
+	/** Name of the tablespace. */
+	char*		m_name;
+
+	/** Tablespace ID */
+	ulint		m_space_id;
+
+	/** Path where tablespace files will reside, not including a filename.*/
+	char*		m_path;
+
+	/** Tablespace flags */
+	ulint		m_flags;
+
+	/** Encryption mode and key_id */
+	fil_encryption_t m_mode;
+	ulint		m_key_id;
+
+protected:
+	/** Ignore server read only configuration for this tablespace. */
+	bool		m_ignore_read_only;
+};
+
+#endif /* fsp0space_h */
diff --git a/storage/innobase/include/fsp0sysspace.h b/storage/innobase/include/fsp0sysspace.h
new file mode 100644
index 00000000000..226d53ebd50
--- /dev/null
+++ b/storage/innobase/include/fsp0sysspace.h
@@ -0,0 +1,324 @@
+/*****************************************************************************
+
+Copyright (c) 2013, 2016, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/fsp0sysspace.h
+Multi file, shared, system tablespace implementation.
+
+Created 2013-7-26 by Kevin Lewis
+*******************************************************/
+
+#ifndef fsp0sysspace_h
+#define fsp0sysspace_h
+
+#include "univ.i"
+#include "fsp0space.h"
+
+/** If the last data file is auto-extended, we add this many pages to it
+at a time. We have to make this public because it is a config variable. */
+extern ulong sys_tablespace_auto_extend_increment;
+
+#ifdef UNIV_DEBUG
+/** Control if extra debug checks need to be done for temporary tablespace.
+Default = true that is disable such checks.
+This variable is not exposed to end-user but still kept as variable for
+developer to enable it during debug. */
+extern bool srv_skip_temp_table_checks_debug;
+#endif /* UNIV_DEBUG */
+
+/** Data structure that contains the information about shared tablespaces.
+Currently this can be the system tablespace or a temporary table tablespace */
+class SysTablespace : public Tablespace
+{
+public:
+
+	SysTablespace()
+		:
+		m_auto_extend_last_file(),
+		m_last_file_size_max(),
+		m_created_new_raw(),
+		m_is_tablespace_full(false),
+		m_sanity_checks_done(false),
+		m_crypt_info()
+	{
+		/* No op */
+	}
+
+	~SysTablespace()
+	{
+		shutdown();
+	}
+
+	/** Set tablespace full status
+	@param[in]	is_full		true if full */
+	void set_tablespace_full_status(bool is_full)
+	{
+		m_is_tablespace_full = is_full;
+	}
+
+	/** Get tablespace full status
+	@return true if table is full */
+	bool get_tablespace_full_status()
+	{
+		return(m_is_tablespace_full);
+	}
+
+	/** Set sanity check status
+	@param[in]	status	true if sanity checks are done */
+	void set_sanity_check_status(bool status)
+	{
+		m_sanity_checks_done = status;
+	}
+
+	/** Get sanity check status
+	@return true if sanity checks are done */
+	bool get_sanity_check_status()
+	{
+		return(m_sanity_checks_done);
+	}
+
+	/** Parse the input params and populate member variables.
+	@param	filepath	path to data files
+	@param	supports_raw	true if it supports raw devices
+	@return true on success parse */
+	bool parse_params(const char* filepath, bool supports_raw);
+
+	/** Check the data file specification.
+	@param[out]	create_new_db		true if a new database
+	is to be created
+	@param[in]	min_expected_size	expected tablespace
+	size in bytes
+	@return DB_SUCCESS if all OK else error code */
+	dberr_t check_file_spec(
+		bool*	create_new_db,
+		ulint	min_expected_tablespace_size);
+
+	/** Free the memory allocated by parse() */
+	void shutdown();
+
+	/** Normalize the file size, convert to extents. */
+	void normalize();
+
+	/**
+	@return true if a new raw device was created. */
+	bool created_new_raw() const
+	{
+		return(m_created_new_raw);
+	}
+
+	/**
+	@return auto_extend value setting */
+	ulint can_auto_extend_last_file() const
+	{
+		return(m_auto_extend_last_file);
+	}
+
+	/** Set the last file size.
+	@param[in]	size	the size to set */
+	void set_last_file_size(ulint size)
+	{
+		ut_ad(!m_files.empty());
+		m_files.back().m_size = size;
+	}
+
+	/** Get the size of the last data file in the tablespace
+	@return the size of the last data file in the array */
+	ulint last_file_size() const
+	{
+		ut_ad(!m_files.empty());
+		return(m_files.back().m_size);
+	}
+
+	/**
+	@return the autoextend increment in pages. */
+	ulint get_autoextend_increment() const
+	{
+		return(sys_tablespace_auto_extend_increment
+		       * ((1024 * 1024) / UNIV_PAGE_SIZE));
+	}
+
+	/** Roundoff to MegaBytes is similar as done in
+	SysTablespace::parse_units() function.
+	@return the pages when given size of file (bytes). */
+	ulint get_pages_from_size(os_offset_t size)
+	{
+		return (ulint)((size / (1024 * 1024))
+			       * ((1024 * 1024) / UNIV_PAGE_SIZE));
+	}
+
+	/**
+	@return next increment size */
+	ulint get_increment() const;
+
+	/** Open or create the data files
+	@param[in]  is_temp		whether this is a temporary tablespace
+	@param[in]  create_new_db	whether we are creating a new database
+	@param[out] sum_new_sizes	sum of sizes of the new files added
+	@param[out] flush_lsn		FIL_PAGE_FILE_FLUSH_LSN of first file
+	@return DB_SUCCESS or error code */
+	dberr_t open_or_create(
+		bool	is_temp,
+		bool	create_new_db,
+		ulint*	sum_new_sizes,
+		lsn_t*	flush_lsn)
+		MY_ATTRIBUTE((warn_unused_result));
+
+private:
+	/** Check the tablespace header for this tablespace.
+	@param[out]	flushed_lsn	the value of FIL_PAGE_FILE_FLUSH_LSN
+	@return DB_SUCCESS or error code */
+	dberr_t read_lsn_and_check_flags(lsn_t* flushed_lsn);
+
+	/**
+	@return true if the last file size is valid. */
+	bool is_valid_size() const
+	{
+		return(m_last_file_size_max >= last_file_size());
+	}
+
+	/**
+	@return true if configured to use raw devices */
+	bool has_raw_device();
+
+	/** Note that the data file was not found.
+	@param[in]	file		data file object
+	@param[out]	create_new_db	true if a new instance to be created
+	@return DB_SUCESS or error code */
+	dberr_t file_not_found(Datafile& file, bool* create_new_db);
+
+	/** Note that the data file was found.
+	@param[in,out]	file	data file object
+	@return true if a new instance to be created */
+	bool file_found(Datafile& file);
+
+	/** Create a data file.
+	@param[in,out]	file	data file object
+	@return DB_SUCCESS or error code */
+	dberr_t create(Datafile& file);
+
+	/** Create a data file.
+	@param[in,out]	file	data file object
+	@return DB_SUCCESS or error code */
+	dberr_t create_file(Datafile& file);
+
+	/** Open a data file.
+	@param[in,out]	file	data file object
+	@return DB_SUCCESS or error code */
+	dberr_t open_file(Datafile& file);
+
+	/** Set the size of the file.
+	@param[in,out]	file	data file object
+	@return DB_SUCCESS or error code */
+	dberr_t set_size(Datafile& file);
+
+	/** Convert a numeric string that optionally ends in G or M, to a
+	number containing megabytes.
+	@param[in]	ptr	string with a quantity in bytes
+	@param[out]	megs	the number in megabytes
+	@return next character in string */
+	static char* parse_units(char* ptr, ulint* megs);
+
+private:
+	enum file_status_t {
+		FILE_STATUS_VOID = 0,		/** status not set */
+		FILE_STATUS_RW_PERMISSION_ERROR,/** permission error */
+		FILE_STATUS_READ_WRITE_ERROR,	/** not readable/writable */
+		FILE_STATUS_NOT_REGULAR_FILE_ERROR /** not a regular file */
+	};
+
+	/** Verify the size of the physical file
+	@param[in]	file	data file object
+	@return DB_SUCCESS if OK else error code. */
+	dberr_t check_size(Datafile& file);
+
+	/** Check if a file can be opened in the correct mode.
+	@param[in,out]	file	data file object
+	@param[out]	reason	exact reason if file_status check failed.
+	@return DB_SUCCESS or error code. */
+	dberr_t check_file_status(
+		const Datafile& 	file,
+		file_status_t& 		reason);
+
+	/* DATA MEMBERS */
+
+	/** if true, then we auto-extend the last data file */
+	bool		m_auto_extend_last_file;
+
+	/** if != 0, this tells the max size auto-extending may increase the
+	last data file size */
+	ulint		m_last_file_size_max;
+
+	/** If the following is true we do not allow
+	inserts etc. This protects the user from forgetting
+	the 'newraw' keyword to my.cnf */
+	bool		m_created_new_raw;
+
+	/** Tablespace full status */
+	bool		m_is_tablespace_full;
+
+	/** if false, then sanity checks are still pending */
+	bool		m_sanity_checks_done;
+
+	/** Encryption information */
+	fil_space_crypt_t* m_crypt_info;
+};
+
+/* GLOBAL OBJECTS */
+
+/** The control info of the system tablespace. */
+extern SysTablespace srv_sys_space;
+
+/** The control info of a temporary table shared tablespace. */
+extern SysTablespace srv_tmp_space;
+
+/** Check if the space_id is for a system-tablespace (shared + temp).
+@param[in]	id	Space ID to check
+@return true if id is a system tablespace, false if not. */
+UNIV_INLINE
+bool
+is_system_tablespace(
+	ulint	id)
+{
+	return(id == srv_sys_space.space_id()
+	       || id == srv_tmp_space.space_id());
+}
+
+/** Check if shared-system or undo tablespace.
+@return true if shared-system or undo tablespace */
+UNIV_INLINE
+bool
+is_system_or_undo_tablespace(
+	ulint   id)
+{
+	return(id == srv_sys_space.space_id()
+	       || id <= srv_undo_tablespaces_open);
+}
+
+/** Check if predefined shared tablespace.
+@return true if predefined shared tablespace */
+UNIV_INLINE
+bool
+is_predefined_tablespace(
+	ulint   id)
+{
+	ut_ad(srv_sys_space.space_id() == TRX_SYS_SPACE);
+	ut_ad(TRX_SYS_SPACE == 0);
+	return(id <= srv_undo_tablespaces_open
+	       || id == srv_tmp_space.space_id());
+}
+#endif /* fsp0sysspace_h */
diff --git a/storage/innobase/include/fsp0types.h b/storage/innobase/include/fsp0types.h
index 4f2ca2594cb..73fd6341d94 100644
--- a/storage/innobase/include/fsp0types.h
+++ b/storage/innobase/include/fsp0types.h
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2014, 2016, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -26,9 +27,9 @@ Created May 26, 2009 Vasil Dimov
 #ifndef fsp0types_h
 #define fsp0types_h
 
-#include "univ.i"
+#ifndef UNIV_INNOCHECKSUM
 
-#include "fil0fil.h" /* for FIL_PAGE_DATA */
+#include "univ.i"
 #include "ut0byte.h"
 
 /** @name Flags for inserting records in order
@@ -42,6 +43,7 @@ fseg_alloc_free_page) */
 #define	FSP_NO_DIR	((byte)113)	/*!< no order */
 /* @} */
 
+#endif /* !UNIV_INNOCHECKSUM */
 /** File space extent size in pages
 page size | file space extent size
 ----------+-----------------------
@@ -51,23 +53,23 @@ page size | file space extent size
   32 KiB  |  64 pages = 2 MiB
   64 KiB  |  64 pages = 4 MiB
 */
-/** File space extent size (one megabyte if default two or four if not) in pages */
-#define	FSP_EXTENT_SIZE		((UNIV_PAGE_SIZE <= (16384) ?	\
-				(1048576U / UNIV_PAGE_SIZE) :	\
+#define FSP_EXTENT_SIZE         ((UNIV_PAGE_SIZE <= (16384) ?	\
+				(1048576 / UNIV_PAGE_SIZE) :	\
 				((UNIV_PAGE_SIZE <= (32768)) ?	\
-				(2097152U / UNIV_PAGE_SIZE) :	\
-				(4194304U / UNIV_PAGE_SIZE))))
+				(2097152 / UNIV_PAGE_SIZE) :	\
+				(4194304 / UNIV_PAGE_SIZE))))
 
-/** File space extent size (four megabytes) in pages for MAX page size */
-#define	FSP_EXTENT_SIZE_MAX	(4194304U / UNIV_PAGE_SIZE_MAX)
+/** File space extent size (four megabyte) in pages for MAX page size */
+#define	FSP_EXTENT_SIZE_MAX	(4194304 / UNIV_PAGE_SIZE_MAX)
 
 /** File space extent size (one megabyte) in pages for MIN page size */
-#define	FSP_EXTENT_SIZE_MIN	(1048576U / UNIV_PAGE_SIZE_MIN)
+#define	FSP_EXTENT_SIZE_MIN	(1048576 / UNIV_PAGE_SIZE_MIN)
 
 /** On a page of any file segment, data may be put starting from this
 offset */
 #define FSEG_PAGE_DATA		FIL_PAGE_DATA
 
+#ifndef UNIV_INNOCHECKSUM
 /** @name File segment header
 The file segment header points to the inode describing the file segment. */
 /* @{ */
@@ -82,11 +84,63 @@ typedef	byte	fseg_header_t;
 					header, in bytes */
 /* @} */
 
-/** Flags for fsp_reserve_free_extents @{ */
-#define FSP_NORMAL	1000000
-#define	FSP_UNDO	2000000
-#define FSP_CLEANING	3000000
-/* @} */
+#ifdef UNIV_DEBUG
+
+struct mtr_t;
+
+/** A wrapper class to print the file segment header information. */
+class fseg_header
+{
+public:
+	/** Constructor of fseg_header.
+	@param[in]	header	the underlying file segment header object
+	@param[in]	mtr	the mini-transaction.  No redo logs are
+				generated, only latches are checked within
+				mini-transaction */
+	fseg_header(
+		const fseg_header_t*	header,
+		mtr_t*			mtr)
+		:
+		m_header(header),
+		m_mtr(mtr)
+	{}
+
+	/** Print the file segment header to the given output stream.
+	@param[in,out]	out	the output stream into which the object
+				is printed.
+	@retval	the output stream into which the object was printed. */
+	std::ostream&
+	to_stream(std::ostream&	out) const;
+private:
+	/** The underlying file segment header */
+	const fseg_header_t*	m_header;
+
+	/** The mini transaction, which is used mainly to check whether
+	appropriate latches have been taken by the calling thread. */
+	mtr_t*			m_mtr;
+};
+
+/* Overloading the global output operator to print a file segment header
+@param[in,out]	out	the output stream into which object will be printed
+@param[in]	header	the file segment header to be printed
+@retval the output stream */
+inline
+std::ostream&
+operator<<(
+	std::ostream&		out,
+	const fseg_header&	header)
+{
+	return(header.to_stream(out));
+}
+#endif /* UNIV_DEBUG */
+
+/** Flags for fsp_reserve_free_extents */
+enum fsp_reserve_t {
+	FSP_NORMAL,	/* reservation during normal B-tree operations */
+	FSP_UNDO,	/* reservation done for undo logging */
+	FSP_CLEANING,	/* reservation done during purge operations */
+	FSP_BLOB	/* reservation being done for BLOB insertion */
+};
 
 /* Number of pages described in a single descriptor page: currently each page
 description takes less than 1 byte; a descriptor page is repeated every
@@ -127,4 +181,247 @@ every XDES_DESCRIBED_PER_PAGE pages in every tablespace. */
 /*--------------------------------------*/
 /* @} */
 
+/** Validate the tablespace flags.
+These flags are stored in the tablespace header at offset FSP_SPACE_FLAGS.
+They should be 0 for ROW_FORMAT=COMPACT and ROW_FORMAT=REDUNDANT.
+The newer row formats, COMPRESSED and DYNAMIC, use a file format > Antelope
+so they should have a file format number plus the DICT_TF_COMPACT bit set.
+@param[in]	flags	Tablespace flags
+@return true if valid, false if not */
+bool
+fsp_flags_is_valid(
+	ulint	flags)
+	MY_ATTRIBUTE((warn_unused_result, const));
+
+/** Check if tablespace is system temporary.
+@param[in]      space_id        verify is checksum is enabled for given space.
+@return true if tablespace is system temporary. */
+bool
+fsp_is_system_temporary(
+	ulint	space_id);
+
+/** Check if checksum is disabled for the given space.
+@param[in]	space_id	verify is checksum is enabled for given space.
+@return true if checksum is disabled for given space. */
+bool
+fsp_is_checksum_disabled(
+	ulint	space_id);
+
+/** Check if tablespace is file-per-table.
+@param[in]	space_id	Tablespace ID
+@param[in]	fsp_flags	Tablespace Flags
+@return true if tablespace is file-per-table. */
+bool
+fsp_is_file_per_table(
+	ulint	space_id,
+	ulint	fsp_flags);
+
+#ifdef UNIV_DEBUG
+/** Skip some of the sanity checks that are time consuming even in debug mode
+and can affect frequent verification runs that are done to ensure stability of
+the product.
+@return true if check should be skipped for given space. */
+bool
+fsp_skip_sanity_check(
+	ulint	space_id);
+#endif /* UNIV_DEBUG */
+
+#endif /* !UNIV_INNOCHECKSUM */
+
+/* @defgroup fsp_flags InnoDB Tablespace Flag Constants @{ */
+
+/** Width of the POST_ANTELOPE flag */
+#define FSP_FLAGS_WIDTH_POST_ANTELOPE	1
+/** Number of flag bits used to indicate the tablespace zip page size */
+#define FSP_FLAGS_WIDTH_ZIP_SSIZE	4
+/** Width of the ATOMIC_BLOBS flag.  The ability to break up a long
+column into an in-record prefix and an externally stored part is available
+to the two Barracuda row formats COMPRESSED and DYNAMIC. */
+#define FSP_FLAGS_WIDTH_ATOMIC_BLOBS	1
+/** Number of flag bits used to indicate the tablespace page size */
+#define FSP_FLAGS_WIDTH_PAGE_SSIZE	4
+/** Width of the DATA_DIR flag.  This flag indicates that the tablespace
+is found in a remote location, not the default data directory. */
+#define FSP_FLAGS_WIDTH_DATA_DIR	1
+/** Width of the SHARED flag.  This flag indicates that the tablespace
+was created with CREATE TABLESPACE and can be shared by multiple tables. */
+#define FSP_FLAGS_WIDTH_SHARED		1
+/** Width of the TEMPORARY flag.  This flag indicates that the tablespace
+is a temporary tablespace and everything in it is temporary, meaning that
+it is for a single client and should be deleted upon startup if it exists. */
+#define FSP_FLAGS_WIDTH_TEMPORARY	1
+/** Width of the encryption flag.  This flag indicates that the tablespace
+is a tablespace with encryption. */
+#define FSP_FLAGS_WIDTH_ENCRYPTION	1
+
+/** Number of flag bits used to indicate the page compression and compression level */
+#define FSP_FLAGS_WIDTH_PAGE_COMPRESSION  1
+#define FSP_FLAGS_WIDTH_PAGE_COMPRESSION_LEVEL 4
+
+/** Number of flag bits used to indicate atomic writes for this tablespace */
+#define FSP_FLAGS_WIDTH_ATOMIC_WRITES  2
+
+/** Width of all the currently known tablespace flags */
+#define FSP_FLAGS_WIDTH		(FSP_FLAGS_WIDTH_POST_ANTELOPE	\
+				+ FSP_FLAGS_WIDTH_ZIP_SSIZE	\
+				+ FSP_FLAGS_WIDTH_ATOMIC_BLOBS	\
+				+ FSP_FLAGS_WIDTH_PAGE_SSIZE	\
+				+ FSP_FLAGS_WIDTH_DATA_DIR	\
+				+ FSP_FLAGS_WIDTH_SHARED	\
+				+ FSP_FLAGS_WIDTH_TEMPORARY	\
+				+ FSP_FLAGS_WIDTH_ENCRYPTION	\
+				+ FSP_FLAGS_WIDTH_PAGE_COMPRESSION \
+				+ FSP_FLAGS_WIDTH_PAGE_COMPRESSION_LEVEL \
+				+ FSP_FLAGS_WIDTH_ATOMIC_WRITES )
+
+/** A mask of all the known/used bits in tablespace flags */
+#define FSP_FLAGS_MASK		(~(~0 << FSP_FLAGS_WIDTH))
+
+/** Zero relative shift position of the POST_ANTELOPE field */
+#define FSP_FLAGS_POS_POST_ANTELOPE	0
+/** Zero relative shift position of the ZIP_SSIZE field */
+#define FSP_FLAGS_POS_ZIP_SSIZE		(FSP_FLAGS_POS_POST_ANTELOPE	\
+					+ FSP_FLAGS_WIDTH_POST_ANTELOPE)
+/** Zero relative shift position of the ATOMIC_BLOBS field */
+#define FSP_FLAGS_POS_ATOMIC_BLOBS	(FSP_FLAGS_POS_ZIP_SSIZE	\
+					+ FSP_FLAGS_WIDTH_ZIP_SSIZE)
+/** Zero relative shift position of the PAGE_SSIZE field */
+#define FSP_FLAGS_POS_PAGE_SSIZE	(FSP_FLAGS_POS_ATOMIC_BLOBS	\
+					+ FSP_FLAGS_WIDTH_ATOMIC_BLOBS)
+/** Zero relative shift position of the start of the DATA_DIR bit */
+#define FSP_FLAGS_POS_DATA_DIR		(FSP_FLAGS_POS_PAGE_SSIZE	\
+					+ FSP_FLAGS_WIDTH_PAGE_SSIZE)
+/** Zero relative shift position of the start of the SHARED bit */
+#define FSP_FLAGS_POS_SHARED		(FSP_FLAGS_POS_DATA_DIR		\
+					+ FSP_FLAGS_WIDTH_DATA_DIR)
+/** Zero relative shift position of the start of the TEMPORARY bit */
+#define FSP_FLAGS_POS_TEMPORARY		(FSP_FLAGS_POS_SHARED		\
+					+ FSP_FLAGS_WIDTH_SHARED)
+/** Zero relative shift position of the start of the ENCRYPTION bit */
+#define FSP_FLAGS_POS_ENCRYPTION	(FSP_FLAGS_POS_TEMPORARY	\
+					+ FSP_FLAGS_WIDTH_TEMPORARY)
+/** Zero relative shift position of the PAGE_COMPRESSION field */
+#define FSP_FLAGS_POS_PAGE_COMPRESSION	(FSP_FLAGS_POS_ENCRYPTION	\
+					+ FSP_FLAGS_WIDTH_ENCRYPTION)
+/** Zero relative shift position of the PAGE_COMPRESSION_LEVEL field */
+#define FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL	(FSP_FLAGS_POS_PAGE_COMPRESSION	\
+					+ FSP_FLAGS_WIDTH_PAGE_COMPRESSION)
+/** Zero relative shift position of the ATOMIC_WRITES field */
+#define FSP_FLAGS_POS_ATOMIC_WRITES	(FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL	\
+					+ FSP_FLAGS_WIDTH_PAGE_COMPRESSION_LEVEL)
+/** Zero relative shift position of the start of the UNUSED bits */
+#define FSP_FLAGS_POS_UNUSED		(FSP_FLAGS_POS_ATOMIC_WRITES	\
+					+ FSP_FLAGS_WIDTH_ATOMIC_WRITES)
+
+
+/** Bit mask of the POST_ANTELOPE field */
+#define FSP_FLAGS_MASK_POST_ANTELOPE				\
+		((~(~0U << FSP_FLAGS_WIDTH_POST_ANTELOPE))	\
+		<< FSP_FLAGS_POS_POST_ANTELOPE)
+/** Bit mask of the ZIP_SSIZE field */
+#define FSP_FLAGS_MASK_ZIP_SSIZE				\
+		((~(~0U << FSP_FLAGS_WIDTH_ZIP_SSIZE))		\
+		<< FSP_FLAGS_POS_ZIP_SSIZE)
+/** Bit mask of the ATOMIC_BLOBS field */
+#define FSP_FLAGS_MASK_ATOMIC_BLOBS				\
+		((~(~0U << FSP_FLAGS_WIDTH_ATOMIC_BLOBS))	\
+		<< FSP_FLAGS_POS_ATOMIC_BLOBS)
+/** Bit mask of the PAGE_SSIZE field */
+#define FSP_FLAGS_MASK_PAGE_SSIZE				\
+		((~(~0U << FSP_FLAGS_WIDTH_PAGE_SSIZE))		\
+		<< FSP_FLAGS_POS_PAGE_SSIZE)
+/** Bit mask of the DATA_DIR field */
+#define FSP_FLAGS_MASK_DATA_DIR					\
+		((~(~0U << FSP_FLAGS_WIDTH_DATA_DIR))		\
+		<< FSP_FLAGS_POS_DATA_DIR)
+/** Bit mask of the SHARED field */
+#define FSP_FLAGS_MASK_SHARED					\
+		((~(~0U << FSP_FLAGS_WIDTH_SHARED))		\
+		<< FSP_FLAGS_POS_SHARED)
+/** Bit mask of the TEMPORARY field */
+#define FSP_FLAGS_MASK_TEMPORARY				\
+		((~(~0U << FSP_FLAGS_WIDTH_TEMPORARY))		\
+		<< FSP_FLAGS_POS_TEMPORARY)
+/** Bit mask of the ENCRYPTION field */
+#define FSP_FLAGS_MASK_ENCRYPTION				\
+		((~(~0U << FSP_FLAGS_WIDTH_ENCRYPTION))		\
+		<< FSP_FLAGS_POS_ENCRYPTION)
+/** Bit mask of the PAGE_COMPRESSION field */
+#define FSP_FLAGS_MASK_PAGE_COMPRESSION			\
+		((~(~0 << FSP_FLAGS_WIDTH_PAGE_COMPRESSION))	\
+		<< FSP_FLAGS_POS_PAGE_COMPRESSION)
+/** Bit mask of the PAGE_COMPRESSION_LEVEL field */
+#define FSP_FLAGS_MASK_PAGE_COMPRESSION_LEVEL		\
+		((~(~0 << FSP_FLAGS_WIDTH_PAGE_COMPRESSION_LEVEL))	\
+		<< FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL)
+/** Bit mask of the ATOMIC_WRITES field */
+#define FSP_FLAGS_MASK_ATOMIC_WRITES		\
+		((~(~0 << FSP_FLAGS_WIDTH_ATOMIC_WRITES))	\
+		<< FSP_FLAGS_POS_ATOMIC_WRITES)
+
+/** Return the value of the POST_ANTELOPE field */
+#define FSP_FLAGS_GET_POST_ANTELOPE(flags)			\
+		((flags & FSP_FLAGS_MASK_POST_ANTELOPE)		\
+		>> FSP_FLAGS_POS_POST_ANTELOPE)
+/** Return the value of the ZIP_SSIZE field */
+#define FSP_FLAGS_GET_ZIP_SSIZE(flags)				\
+		((flags & FSP_FLAGS_MASK_ZIP_SSIZE)		\
+		>> FSP_FLAGS_POS_ZIP_SSIZE)
+/** Return the value of the ATOMIC_BLOBS field */
+#define FSP_FLAGS_HAS_ATOMIC_BLOBS(flags)			\
+		((flags & FSP_FLAGS_MASK_ATOMIC_BLOBS)		\
+		>> FSP_FLAGS_POS_ATOMIC_BLOBS)
+/** Return the value of the PAGE_SSIZE field */
+#define FSP_FLAGS_GET_PAGE_SSIZE(flags)				\
+		((flags & FSP_FLAGS_MASK_PAGE_SSIZE)		\
+		>> FSP_FLAGS_POS_PAGE_SSIZE)
+/** Return the value of the DATA_DIR field */
+#define FSP_FLAGS_HAS_DATA_DIR(flags)				\
+		((flags & FSP_FLAGS_MASK_DATA_DIR)		\
+		>> FSP_FLAGS_POS_DATA_DIR)
+/** Return the contents of the SHARED field */
+#define FSP_FLAGS_GET_SHARED(flags)				\
+		((flags & FSP_FLAGS_MASK_SHARED)		\
+		>> FSP_FLAGS_POS_SHARED)
+/** Return the contents of the TEMPORARY field */
+#define FSP_FLAGS_GET_TEMPORARY(flags)				\
+		((flags & FSP_FLAGS_MASK_TEMPORARY)		\
+		>> FSP_FLAGS_POS_TEMPORARY)
+/** Return the contents of the ENCRYPTION field */
+#define FSP_FLAGS_GET_ENCRYPTION(flags)				\
+		((flags & FSP_FLAGS_MASK_ENCRYPTION)		\
+		>> FSP_FLAGS_POS_ENCRYPTION)
+/** Return the contents of the UNUSED bits */
+#define FSP_FLAGS_GET_UNUSED(flags)				\
+		(flags >> FSP_FLAGS_POS_UNUSED)
+/** Return the value of the PAGE_COMPRESSION field */
+#define FSP_FLAGS_GET_PAGE_COMPRESSION(flags)		\
+		((flags & FSP_FLAGS_MASK_PAGE_COMPRESSION)	\
+		>> FSP_FLAGS_POS_PAGE_COMPRESSION)
+/** Return the value of the PAGE_COMPRESSION_LEVEL field */
+#define FSP_FLAGS_GET_PAGE_COMPRESSION_LEVEL(flags)		\
+		((flags & FSP_FLAGS_MASK_PAGE_COMPRESSION_LEVEL) \
+		>> FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL)
+/** Return the value of the ATOMIC_WRITES field */
+#define FSP_FLAGS_GET_ATOMIC_WRITES(flags)		\
+		((flags & FSP_FLAGS_MASK_ATOMIC_WRITES) \
+		>> FSP_FLAGS_POS_ATOMIC_WRITES)
+/** Use an alias in the code for FSP_FLAGS_GET_SHARED() */
+#define fsp_is_shared_tablespace FSP_FLAGS_GET_SHARED
+/* @} */
+
+/** Set a PAGE_COMPRESSION into the correct bits in a given
+tablespace flags. */
+#define FSP_FLAGS_SET_PAGE_COMPRESSION(flags, compression)	\
+		(flags | (compression << FSP_FLAGS_POS_PAGE_COMPRESSION))
+
+/** Set a PAGE_COMPRESSION_LEVEL into the correct bits in a given
+tablespace flags. */
+#define FSP_FLAGS_SET_PAGE_COMPRESSION_LEVEL(flags, level)	\
+		(flags | (level << FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL))
+
+/** Set a ATOMIC_WRITES into the correct bits in a given
+tablespace flags. */
+#define FSP_FLAGS_SET_ATOMIC_WRITES(flags, atomics)	\
+		(flags | (atomics << FSP_FLAGS_POS_ATOMIC_WRITES))
 #endif /* fsp0types_h */
diff --git a/storage/innobase/include/fts0ast.h b/storage/innobase/include/fts0ast.h
index 50f62063893..87b7cf709c8 100644
--- a/storage/innobase/include/fts0ast.h
+++ b/storage/innobase/include/fts0ast.h
@@ -26,8 +26,16 @@ Created 2007/03/16/03 Sunny Bains
 #ifndef INNOBASE_FST0AST_H
 #define INNOBASE_FST0AST_H
 
-#include "mem0mem.h"
 #include "ha_prototypes.h"
+#include "mem0mem.h"
+
+#ifdef UNIV_PFS_MEMORY
+
+#define malloc(A)	ut_malloc_nokey(A)
+#define free(A)		ut_free(A)
+#define realloc(P, A)	ut_realloc(P, A)
+
+#endif /* UNIV_PFS_MEMORY */
 
 /* The type of AST Node */
 enum fts_ast_type_t {
@@ -35,6 +43,10 @@ enum fts_ast_type_t {
 	FTS_AST_NUMB,				/*!< Number */
 	FTS_AST_TERM,				/*!< Term (or word) */
 	FTS_AST_TEXT,				/*!< Text string */
+	FTS_AST_PARSER_PHRASE_LIST,		/*!< Phase for plugin parser
+						The difference from text type
+						is that we tokenize text into
+						term list */
 	FTS_AST_LIST,				/*!< Expression list */
 	FTS_AST_SUBEXP_LIST			/*!< Sub-Expression list */
 };
@@ -139,9 +151,8 @@ fts_ast_term_set_wildcard(
 	fts_ast_node_t*	node);			/*!< in: term to change */
 /********************************************************************
 Set the proximity attribute of a text node. */
-
 void
-fts_ast_term_set_distance(
+fts_ast_text_set_distance(
 /*======================*/
 	fts_ast_node_t*	node,			/*!< in/out: text node */
 	ulint		distance);		/*!< in: the text proximity
@@ -149,7 +160,6 @@ fts_ast_term_set_distance(
 /********************************************************************//**
 Free a fts_ast_node_t instance.
 @return next node to free */
-UNIV_INTERN
 fts_ast_node_t*
 fts_ast_free_node(
 /*==============*/
@@ -185,10 +195,16 @@ fts_ast_state_free(
 /*===============*/
 	fts_ast_state_t*state);			/*!< in: state instance
 						to free */
+/** Check only union operation involved in the node
+@param[in]	node	ast node to check
+@return true if the node contains only union else false. */
+bool
+fts_ast_node_check_union(
+	fts_ast_node_t*	node);
+
 /******************************************************************//**
 Traverse the AST - in-order traversal.
 @return DB_SUCCESS if all went well */
-UNIV_INTERN
 dberr_t
 fts_ast_visit(
 /*==========*/
@@ -206,7 +222,6 @@ Process (nested) sub-expression, create a new result set to store the
 sub-expression result by processing nodes under current sub-expression
 list. Merge the sub-expression result with that of parent expression list.
 @return DB_SUCCESS if all went well */
-UNIV_INTERN
 dberr_t
 fts_ast_visit_sub_exp(
 /*==================*/
@@ -216,7 +231,6 @@ fts_ast_visit_sub_exp(
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /********************************************************************
 Create a lex instance.*/
-UNIV_INTERN
 fts_lexer_t*
 fts_lexer_create(
 /*=============*/
@@ -226,7 +240,6 @@ fts_lexer_create(
 	MY_ATTRIBUTE((nonnull, malloc, warn_unused_result));
 /********************************************************************
 Free an fts_lexer_t instance.*/
-UNIV_INTERN
 void
 fts_lexer_free(
 /*===========*/
@@ -240,7 +253,6 @@ has one more byte than len
 @param[in] str		pointer to string
 @param[in] len		length of the string
 @return ast string with NUL-terminator */
-UNIV_INTERN
 fts_ast_string_t*
 fts_ast_string_create(
 	const byte*	str,
@@ -249,7 +261,6 @@ fts_ast_string_create(
 /**
 Free an ast string instance
 @param[in,out] ast_str		string to free */
-UNIV_INTERN
 void
 fts_ast_string_free(
 	fts_ast_string_t*	ast_str);
@@ -259,7 +270,6 @@ Translate ast string of type FTS_AST_NUMB to unsigned long by strtoul
 @param[in] str		string to translate
 @param[in] base		the base
 @return translated number */
-UNIV_INTERN
 ulint
 fts_ast_string_to_ul(
 	const fts_ast_string_t*	ast_str,
@@ -268,7 +278,6 @@ fts_ast_string_to_ul(
 /**
 Print the ast string
 @param[in] str		string to print */
-UNIV_INTERN
 void
 fts_ast_string_print(
 	const fts_ast_string_t*	ast_str);
@@ -314,6 +323,9 @@ struct fts_ast_node_t {
 	fts_ast_node_t*	next_alloc;		/*!< For tracking allocations */
 	bool		visited;		/*!< whether this node is
 						already processed */
+	/* Used by plugin parser */
+	fts_ast_node_t* up_node;		/*!< Direct up node */
+	bool		go_up;			/*!< Flag if go one level up */
 };
 
 /* To track state during parsing */
@@ -327,8 +339,32 @@ struct fts_ast_state_t {
 	fts_lexer_t*	lexer;			/*!< Lexer callback + arg */
 	CHARSET_INFO*	charset;		/*!< charset used for
 						tokenization */
+	/* Used by plugin parser */
+	fts_ast_node_t*	cur_node;		/*!< Current node into which
+						 we add new node */
+	int		depth;			/*!< Depth of parsing state */
 };
 
+/******************************************************************//**
+Create an AST term node, makes a copy of ptr for plugin parser
+@return node */
+extern
+fts_ast_node_t*
+fts_ast_create_node_term_for_parser(
+/*==========i=====================*/
+	void*		arg,			/*!< in: ast state */
+	const char*	ptr,			/*!< in: term string */
+	const ulint	len);			/*!< in: term string length */
+
+/******************************************************************//**
+Create an AST phrase list node for plugin parser
+@return node */
+extern
+fts_ast_node_t*
+fts_ast_create_node_phrase_list(
+/*============================*/
+	void*		arg);			/*!< in: ast state */
+
 #ifdef UNIV_DEBUG
 const char*
 fts_ast_oper_name_get(fts_ast_oper_t	oper);
diff --git a/storage/innobase/include/fts0blex.h b/storage/innobase/include/fts0blex.h
index d0e4cae0678..da93ab8617d 100644
--- a/storage/innobase/include/fts0blex.h
+++ b/storage/innobase/include/fts0blex.h
@@ -341,7 +341,7 @@ extern int fts0blex (yyscan_t yyscanner);
 #undef YY_DECL
 #endif
 
-#line 73 "fts0blex.l"
+#line 74 "fts0blex.l"
 
 
 #line 348 "../include/fts0blex.h"
diff --git a/storage/innobase/include/fts0fts.h b/storage/innobase/include/fts0fts.h
index 87b5787d416..2d256472ef6 100644
--- a/storage/innobase/include/fts0fts.h
+++ b/storage/innobase/include/fts0fts.h
@@ -26,7 +26,7 @@ Created 2011/09/02 Sunny Bains
 #ifndef fts0fts_h
 #define fts0fts_h
 
-#include "univ.i"
+#include "ha_prototypes.h"
 
 #include "data0type.h"
 #include "data0types.h"
@@ -41,6 +41,7 @@ Created 2011/09/02 Sunny Bains
 #include "ut0wqueue.h"
 #include "que0types.h"
 #include "ft_global.h"
+#include "mysql/plugin_ftparser.h"
 
 /** "NULL" value of a document id. */
 #define FTS_NULL_DOC_ID			0
@@ -65,7 +66,7 @@ optimize using a 4 byte Doc ID for FIC merge sort to reduce sort size */
 #define MAX_DOC_ID_OPT_VAL		1073741824
 
 /** Document id type. */
-typedef ib_uint64_t doc_id_t;
+typedef ib_id_t doc_id_t;
 
 /** doc_id_t printf format */
 #define FTS_DOC_ID_FORMAT	IB_ID_FMT
@@ -85,12 +86,16 @@ those defined in mysql file ft_global.h */
 #define FTS_BOOL	1
 #define FTS_SORTED	2
 #define FTS_EXPAND	4
-#define FTS_PROXIMITY	8
-#define FTS_PHRASE	16
-#define FTS_OPT_RANKING	32
+#define FTS_NO_RANKING	8
+#define FTS_PROXIMITY	16
+#define FTS_PHRASE	32
+#define FTS_OPT_RANKING	64
 
 #define FTS_INDEX_TABLE_IND_NAME	"FTS_INDEX_TABLE_IND"
 
+/** The number of FTS index partitions for a fulltext idnex */
+#define FTS_NUM_AUX_INDEX		6
+
 /** Threshold where our optimize thread automatically kicks in */
 #define FTS_OPTIMIZE_THRESHOLD		10000000
 
@@ -98,6 +103,31 @@ those defined in mysql file ft_global.h */
 should not exceed FTS_DOC_ID_MAX_STEP */
 #define FTS_DOC_ID_MAX_STEP		65535
 
+/** Maximum possible Fulltext word length */
+#define FTS_MAX_WORD_LEN		HA_FT_MAXBYTELEN
+
+/** Maximum possible Fulltext word length (in characters) */
+#define FTS_MAX_WORD_LEN_IN_CHAR	HA_FT_MAXCHARLEN
+
+/** Number of columns in FTS AUX Tables */
+#define FTS_DELETED_TABLE_NUM_COLS	1
+#define FTS_CONFIG_TABLE_NUM_COLS	2
+#define FTS_AUX_INDEX_TABLE_NUM_COLS	5
+
+/** DELETED_TABLE(doc_id BIGINT UNSIGNED) */
+#define FTS_DELETED_TABLE_COL_LEN	8
+/** CONFIG_TABLE(key CHAR(50), value CHAR(200)) */
+#define FTS_CONFIG_TABLE_KEY_COL_LEN	50
+#define FTS_CONFIG_TABLE_VALUE_COL_LEN	200
+
+#define FTS_INDEX_WORD_LEN		FTS_MAX_WORD_LEN
+#define FTS_INDEX_FIRST_DOC_ID_LEN	8
+#define FTS_INDEX_LAST_DOC_ID_LEN	8
+#define FTS_INDEX_DOC_COUNT_LEN		4
+/* BLOB COLUMN, 0 means VARIABLE SIZE */
+#define FTS_INDEX_ILIST_LEN		0
+
+
 /** Variable specifying the FTS parallel sort degree */
 extern ulong		fts_sort_pll_degree;
 
@@ -150,7 +180,7 @@ do {								\
 	(fts_table)->suffix = m_suffix;				\
         (fts_table)->type = m_type;				\
         (fts_table)->table_id = m_table->id;			\
-        (fts_table)->parent = m_table->name;			\
+        (fts_table)->parent = m_table->name.m_name;		\
         (fts_table)->table = m_table;				\
 } while (0);
 
@@ -159,7 +189,7 @@ do {								\
 	(fts_table)->suffix = m_suffix;				\
         (fts_table)->type = m_type;				\
         (fts_table)->table_id = m_index->table->id;		\
-        (fts_table)->parent = m_index->table->name;		\
+        (fts_table)->parent = m_index->table->name.m_name;	\
         (fts_table)->table = m_index->table;			\
         (fts_table)->index_id = m_index->id;			\
 } while (0);
@@ -306,35 +336,45 @@ enum	fts_status {
 typedef	enum fts_status	fts_status_t;
 
 /** The state of the FTS sub system. */
-struct fts_t {
-					/*!< mutex protecting bg_threads* and
-					fts_add_wq. */
-	ib_mutex_t		bg_threads_mutex;
+class fts_t {
+public:
+	/** fts_t constructor.
+	@param[in]	table	table with FTS indexes
+	@param[in,out]	heap	memory heap where 'this' is stored */
+	fts_t(
+		const dict_table_t*	table,
+		mem_heap_t*		heap);
 
-	ulint		bg_threads;	/*!< number of background threads
-					accessing this table */
+	/** fts_t destructor. */
+	~fts_t();
 
-					/*!< TRUE if background threads running
-					should stop themselves */
-	ulint		fts_status;	/*!< Status bit regarding fts
-					running state */
+	/** Mutex protecting bg_threads* and fts_add_wq. */
+	ib_mutex_t	bg_threads_mutex;
 
-	ib_wqueue_t*	add_wq;		/*!< Work queue for scheduling jobs
-					for the FTS 'Add' thread, or NULL
-					if the thread has not yet been
-					created. Each work item is a
-					fts_trx_doc_ids_t*. */
+	/** Number of background threads accessing this table. */
+	ulint		bg_threads;
 
-	fts_cache_t*	cache;		/*!< FTS memory buffer for this table,
-					or NULL if the table has no FTS
-					index. */
+	/** Status bit regarding fts running state. TRUE if background
+	threads running should stop themselves. */
+	ulint		fts_status;
 
-	ulint		doc_col;	/*!< FTS doc id hidden column number
-					in the CLUSTERED index. */
+	/** Work queue for scheduling jobs for the FTS 'Add' thread, or NULL
+	if the thread has not yet been created. Each work item is a
+	fts_trx_doc_ids_t*. */
+	ib_wqueue_t*	add_wq;
 
-	ib_vector_t*	indexes;	/*!< Vector of FTS indexes, this is
-					mainly for caching purposes. */
-	mem_heap_t*	fts_heap;	/*!< heap for fts_t allocation */
+	/** FTS memory buffer for this table, or NULL if the table has no FTS
+	index. */
+	fts_cache_t*	cache;
+
+	/** FTS doc id hidden column number in the CLUSTERED index. */
+	ulint		doc_col;
+
+	/** Vector of FTS indexes, this is mainly for caching purposes. */
+	ib_vector_t*	indexes;
+
+	/** Heap for fts_t allocation. */
+	mem_heap_t*	fts_heap;
 };
 
 struct fts_stopword_t;
@@ -366,12 +406,6 @@ extern ulong		fts_min_token_size;
 need a sync to free some memory */
 extern bool		fts_need_sync;
 
-/** Maximum possible Fulltext word length */
-#define FTS_MAX_WORD_LEN		HA_FT_MAXBYTELEN
-
-/** Maximum possible Fulltext word length (in characters) */
-#define FTS_MAX_WORD_LEN_IN_CHAR	HA_FT_MAXCHARLEN
-
 /** Variable specifying the table that has Fulltext index to display its
 content through information schema table */
 extern char*		fts_internal_tbl_name;
@@ -385,7 +419,6 @@ do {							\
 
 /******************************************************************//**
 Create a FTS cache. */
-UNIV_INTERN
 fts_cache_t*
 fts_cache_create(
 /*=============*/
@@ -394,7 +427,6 @@ fts_cache_create(
 /******************************************************************//**
 Create a FTS index cache.
 @return Index Cache */
-UNIV_INTERN
 fts_index_cache_t*
 fts_cache_index_cache_create(
 /*=========================*/
@@ -405,31 +437,26 @@ fts_cache_index_cache_create(
 Get the next available document id. This function creates a new
 transaction to generate the document id.
 @return DB_SUCCESS if OK */
-UNIV_INTERN
 dberr_t
 fts_get_next_doc_id(
 /*================*/
 	const dict_table_t*	table,	/*!< in: table */
-	doc_id_t*		doc_id)	/*!< out: new document id */
-	MY_ATTRIBUTE((nonnull));
+	doc_id_t*		doc_id);/*!< out: new document id */
 /*********************************************************************//**
 Update the next and last Doc ID in the CONFIG table to be the input
 "doc_id" value (+ 1). We would do so after each FTS index build or
 table truncate */
-UNIV_INTERN
 void
 fts_update_next_doc_id(
 /*===================*/
 	trx_t*			trx,		/*!< in/out: transaction */
 	const dict_table_t*	table,		/*!< in: table */
 	const char*		table_name,	/*!< in: table name, or NULL */
-	doc_id_t		doc_id)		/*!< in: DOC ID to set */
-	MY_ATTRIBUTE((nonnull(2)));
+	doc_id_t		doc_id);	/*!< in: DOC ID to set */
 
 /******************************************************************//**
 Create a new document id .
 @return DB_SUCCESS if all went well else error */
-UNIV_INTERN
 dberr_t
 fts_create_doc_id(
 /*==============*/
@@ -439,19 +466,17 @@ fts_create_doc_id(
 						value to this row. This is the
 						current row that is being
 						inserted. */
-	mem_heap_t*	heap)			/*!< in: heap */
-	MY_ATTRIBUTE((nonnull));
+	mem_heap_t*	heap);			/*!< in: heap */
+
 /******************************************************************//**
 Create a new fts_doc_ids_t.
 @return new fts_doc_ids_t. */
-UNIV_INTERN
 fts_doc_ids_t*
 fts_doc_ids_create(void);
 /*=====================*/
 
 /******************************************************************//**
 Free a fts_doc_ids_t. */
-UNIV_INTERN
 void
 fts_doc_ids_free(
 /*=============*/
@@ -459,7 +484,6 @@ fts_doc_ids_free(
 
 /******************************************************************//**
 Notify the FTS system about an operation on an FTS-indexed table. */
-UNIV_INTERN
 void
 fts_trx_add_op(
 /*===========*/
@@ -467,13 +491,11 @@ fts_trx_add_op(
 	dict_table_t*	table,			/*!< in: table */
 	doc_id_t	doc_id,			/*!< in: doc id */
 	fts_row_state	state,			/*!< in: state of the row */
-	ib_vector_t*	fts_indexes)		/*!< in: FTS indexes affected
+	ib_vector_t*	fts_indexes);		/*!< in: FTS indexes affected
 						(NULL=all) */
-	MY_ATTRIBUTE((nonnull(1,2)));
 
 /******************************************************************//**
 Free an FTS trx. */
-UNIV_INTERN
 void
 fts_trx_free(
 /*=========*/
@@ -484,7 +506,6 @@ Creates the common ancillary tables needed for supporting an FTS index
 on the given table. row_mysql_lock_data_dictionary must have been
 called before this.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 fts_create_common_tables(
 /*=====================*/
@@ -494,25 +515,23 @@ fts_create_common_tables(
 						index */
 	const char*	name,			/*!< in: table name */
 	bool		skip_doc_id_index)	/*!< in: Skip index on doc id */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /******************************************************************//**
 Wrapper function of fts_create_index_tables_low(), create auxiliary
 tables for an FTS index
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 fts_create_index_tables(
 /*====================*/
 	trx_t*			trx,		/*!< in: transaction handle */
 	const dict_index_t*	index)		/*!< in: the FTS index
 						instance */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /******************************************************************//**
 Creates the column specific ancillary tables needed for supporting an
 FTS index on the given table. row_mysql_lock_data_dictionary must have
 been called before this.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 fts_create_index_tables_low(
 /*========================*/
@@ -522,62 +541,59 @@ fts_create_index_tables_low(
 						instance */
 	const char*	table_name,		/*!< in: the table name */
 	table_id_t	table_id)		/*!< in: the table id */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /******************************************************************//**
 Add the FTS document id hidden column. */
-UNIV_INTERN
 void
 fts_add_doc_id_column(
 /*==================*/
 	dict_table_t*	table,	/*!< in/out: Table with FTS index */
-	mem_heap_t*	heap)	/*!< in: temporary memory heap, or NULL */
-	MY_ATTRIBUTE((nonnull(1)));
+	mem_heap_t*	heap);	/*!< in: temporary memory heap, or NULL */
 
 /*********************************************************************//**
 Drops the ancillary tables needed for supporting an FTS index on the
 given table. row_mysql_lock_data_dictionary must have been called before
 this.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 fts_drop_tables(
 /*============*/
 	trx_t*		trx,			/*!< in: transaction */
-	dict_table_t*	table)			/*!< in: table has the FTS
+	dict_table_t*	table);			/*!< in: table has the FTS
 						index */
-	MY_ATTRIBUTE((nonnull));
 /******************************************************************//**
 The given transaction is about to be committed; do whatever is necessary
 from the FTS system's POV.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 fts_commit(
 /*=======*/
 	trx_t*		trx)			/*!< in: transaction */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 
-/*******************************************************************//**
-FTS Query entry point.
+/** FTS Query entry point.
+@param[in]	trx		transaction
+@param[in]	index		fts index to search
+@param[in]	flags		FTS search mode
+@param[in]	query_str	FTS query
+@param[in]	query_len	FTS query string len in bytes
+@param[in,out]	result		result doc ids
+@param[in]	limit		limit value
 @return DB_SUCCESS if successful otherwise error code */
-UNIV_INTERN
 dberr_t
 fts_query(
-/*======*/
-	trx_t*		trx,			/*!< in: transaction */
-	dict_index_t*	index,			/*!< in: FTS index to search */
-	uint		flags,			/*!< in: FTS search mode */
-	const byte*	query,			/*!< in: FTS query */
-	ulint		query_len,		/*!< in: FTS query string len
-						in bytes */
-	fts_result_t**	result)			/*!< out: query result, to be
-						freed by the caller.*/
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	trx_t*		trx,
+	dict_index_t*	index,
+	uint		flags,
+	const byte*	query_str,
+	ulint		query_len,
+	fts_result_t**	result,
+	ulonglong	limit)
+	MY_ATTRIBUTE((warn_unused_result));
 
 /******************************************************************//**
 Retrieve the FTS Relevance Ranking result for doc with doc_id
 @return the relevance ranking value. */
-UNIV_INTERN
 float
 fts_retrieve_ranking(
 /*=================*/
@@ -587,7 +603,6 @@ fts_retrieve_ranking(
 
 /******************************************************************//**
 FTS Query sort result, returned by fts_query() on fts_ranking_t::rank. */
-UNIV_INTERN
 void
 fts_query_sort_result_on_rank(
 /*==========================*/
@@ -596,7 +611,6 @@ fts_query_sort_result_on_rank(
 
 /******************************************************************//**
 FTS Query free result, returned by fts_query(). */
-UNIV_INTERN
 void
 fts_query_free_result(
 /*==================*/
@@ -605,7 +619,6 @@ fts_query_free_result(
 
 /******************************************************************//**
 Extract the doc id from the FTS hidden column. */
-UNIV_INTERN
 doc_id_t
 fts_get_doc_id_from_row(
 /*====================*/
@@ -613,37 +626,45 @@ fts_get_doc_id_from_row(
 	dtuple_t*	row);			/*!< in: row whose FTS doc id we
 						want to extract.*/
 
-/******************************************************************//**
-Extract the doc id from the FTS hidden column. */
-UNIV_INTERN
+/** Extract the doc id from the record that belongs to index.
+@param[in]	table	table
+@param[in]	rec	record contains FTS_DOC_ID
+@param[in]	index	index of rec
+@param[in]	heap	heap memory
+@return doc id that was extracted from rec */
 doc_id_t
 fts_get_doc_id_from_rec(
-/*====================*/
-	dict_table_t*	table,			/*!< in: table */
-	const rec_t*	rec,			/*!< in: rec */
-	mem_heap_t*	heap);			/*!< in: heap */
+        dict_table_t*           table,
+        const rec_t*            rec,
+        const dict_index_t*     index,
+        mem_heap_t*             heap);
 
-/******************************************************************//**
-Update the query graph with a new document id.
-@return Doc ID used */
-UNIV_INTERN
+/** Add new fts doc id to the update vector.
+@param[in]	table		the table that contains the FTS index.
+@param[in,out]	ufield		the fts doc id field in the update vector.
+				No new memory is allocated for this in this
+				function.
+@param[in,out]	next_doc_id	the fts doc id that has been added to the
+				update vector.  If 0, a new fts doc id is
+				automatically generated.  The memory provided
+				for this argument will be used by the update
+				vector. Ensure that the life time of this
+				memory matches that of the update vector.
+@return the fts doc id used in the update vector */
 doc_id_t
 fts_update_doc_id(
-/*==============*/
-	dict_table_t*	table,			/*!< in: table */
-	upd_field_t*	ufield,			/*!< out: update node */
-	doc_id_t*	next_doc_id);		/*!< out: buffer for writing */
+	dict_table_t*	table,
+	upd_field_t*	ufield,
+	doc_id_t*	next_doc_id);
 
 /******************************************************************//**
 FTS initialize. */
-UNIV_INTERN
 void
 fts_startup(void);
 /*==============*/
 
 /******************************************************************//**
 Signal FTS threads to initiate shutdown. */
-UNIV_INTERN
 void
 fts_start_shutdown(
 /*===============*/
@@ -654,7 +675,6 @@ fts_start_shutdown(
 
 /******************************************************************//**
 Wait for FTS threads to shutdown. */
-UNIV_INTERN
 void
 fts_shutdown(
 /*=========*/
@@ -666,7 +686,6 @@ fts_shutdown(
 /******************************************************************//**
 Create an instance of fts_t.
 @return instance of fts_t */
-UNIV_INTERN
 fts_t*
 fts_create(
 /*=======*/
@@ -675,7 +694,6 @@ fts_create(
 
 /**********************************************************************//**
 Free the FTS resources. */
-UNIV_INTERN
 void
 fts_free(
 /*=====*/
@@ -685,16 +703,13 @@ fts_free(
 /*********************************************************************//**
 Run OPTIMIZE on the given table.
 @return DB_SUCCESS if all OK */
-UNIV_INTERN
 dberr_t
 fts_optimize_table(
 /*===============*/
-	dict_table_t*	table)			/*!< in: table to optimiza */
-	MY_ATTRIBUTE((nonnull));
+	dict_table_t*	table);			/*!< in: table to optimiza */
 
 /**********************************************************************//**
 Startup the optimize thread and create the work queue. */
-UNIV_INTERN
 void
 fts_optimize_init(void);
 /*====================*/
@@ -702,7 +717,6 @@ fts_optimize_init(void);
 /**********************************************************************//**
 Check whether the work queue is initialized.
 @return TRUE if optimze queue is initialized. */
-UNIV_INTERN
 ibool
 fts_optimize_is_init(void);
 /*======================*/
@@ -710,65 +724,49 @@ fts_optimize_is_init(void);
 /****************************************************************//**
 Drops index ancillary tables for a FTS index
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 fts_drop_index_tables(
 /*==================*/
 	trx_t*		trx,			/*!< in: transaction */
 	dict_index_t*	index)			/*!< in: Index to drop */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 
 /******************************************************************//**
 Remove the table from the OPTIMIZER's list. We do wait for
 acknowledgement from the consumer of the message. */
-UNIV_INTERN
 void
 fts_optimize_remove_table(
 /*======================*/
 	dict_table_t*	table);			/*!< in: table to remove */
 
+/** Shutdown fts optimize thread. */
+void
+fts_optimize_shutdown();
+
 /** Send sync fts cache for the table.
 @param[in]	table	table to sync */
-UNIV_INTERN
 void
 fts_optimize_request_sync_table(
 	dict_table_t*	table);
 
-/**********************************************************************//**
-Signal the optimize thread to prepare for shutdown. */
-UNIV_INTERN
-void
-fts_optimize_start_shutdown(void);
-/*==============================*/
-
-/**********************************************************************//**
-Inform optimize to clean up. */
-UNIV_INTERN
-void
-fts_optimize_end(void);
-/*===================*/
-
 /**********************************************************************//**
 Take a FTS savepoint. */
-UNIV_INTERN
 void
 fts_savepoint_take(
 /*===============*/
 	trx_t*		trx,			/*!< in: transaction */
 	fts_trx_t*	fts_trx,		/*!< in: fts transaction */
-	const char*	name)			/*!< in: savepoint name */
-	MY_ATTRIBUTE((nonnull));
+	const char*	name);			/*!< in: savepoint name */
+
 /**********************************************************************//**
 Refresh last statement savepoint. */
-UNIV_INTERN
 void
 fts_savepoint_laststmt_refresh(
 /*===========================*/
-	trx_t*		trx)			/*!< in: transaction */
-	MY_ATTRIBUTE((nonnull));
+	trx_t*		trx);			/*!< in: transaction */
+
 /**********************************************************************//**
 Release the savepoint data identified by  name. */
-UNIV_INTERN
 void
 fts_savepoint_release(
 /*==================*/
@@ -777,7 +775,6 @@ fts_savepoint_release(
 
 /**********************************************************************//**
 Free the FTS cache. */
-UNIV_INTERN
 void
 fts_cache_destroy(
 /*==============*/
@@ -785,14 +782,12 @@ fts_cache_destroy(
 
 /** Clear cache.
 @param[in,out]	cache	fts cache */
-UNIV_INTERN
 void
 fts_cache_clear(
 	fts_cache_t*	cache);
 
 /*********************************************************************//**
 Initialize things in cache. */
-UNIV_INTERN
 void
 fts_cache_init(
 /*===========*/
@@ -800,7 +795,6 @@ fts_cache_init(
 
 /*********************************************************************//**
 Rollback to and including savepoint indentified by name. */
-UNIV_INTERN
 void
 fts_savepoint_rollback(
 /*===================*/
@@ -809,7 +803,6 @@ fts_savepoint_rollback(
 
 /*********************************************************************//**
 Rollback to and including savepoint indentified by name. */
-UNIV_INTERN
 void
 fts_savepoint_rollback_last_stmt(
 /*=============================*/
@@ -818,7 +811,6 @@ fts_savepoint_rollback_last_stmt(
 /***********************************************************************//**
 Drop all orphaned FTS auxiliary tables, those that don't have a parent
 table or FTS index defined on them. */
-UNIV_INTERN
 void
 fts_drop_orphaned_tables(void);
 /*==========================*/
@@ -827,13 +819,12 @@ fts_drop_orphaned_tables(void);
 Since we do a horizontal split on the index table, we need to drop
 all the split tables.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 fts_drop_index_split_tables(
 /*========================*/
 	trx_t*		trx,			/*!< in: transaction */
 	dict_index_t*	index)			/*!< in: fts instance */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 
 /** Run SYNC on the table, i.e., write out data from the cache to the
 FTS auxiliary INDEX table and clear the cache at the end.
@@ -842,7 +833,6 @@ FTS auxiliary INDEX table and clear the cache at the end.
 @param[in]	wait		whether wait for existing sync to finish
 @param[in]      has_dict        whether has dict operation lock
 @return DB_SUCCESS on success, error code on failure. */
-UNIV_INTERN
 dberr_t
 fts_sync_table(
 	dict_table_t*	table,
@@ -853,7 +843,6 @@ fts_sync_table(
 /****************************************************************//**
 Free the query graph but check whether dict_sys->mutex is already
 held */
-UNIV_INTERN
 void
 fts_que_graph_free_check_lock(
 /*==========================*/
@@ -863,7 +852,6 @@ fts_que_graph_free_check_lock(
 
 /****************************************************************//**
 Create an FTS index cache. */
-UNIV_INTERN
 CHARSET_INFO*
 fts_index_get_charset(
 /*==================*/
@@ -872,7 +860,6 @@ fts_index_get_charset(
 /*********************************************************************//**
 Get the initial Doc ID by consulting the CONFIG table
 @return initial Doc ID */
-UNIV_INTERN
 doc_id_t
 fts_init_doc_id(
 /*============*/
@@ -923,15 +910,31 @@ innobase_mysql_fts_get_token(
 	const byte*	start,			/*!< in: start of text */
 	const byte*	end,			/*!< in: one character past
 						end of text */
-	fts_string_t*	token,			/*!< out: token's text */
-	ulint*		offset);		/*!< out: offset to token,
-						measured as characters from
-						'start' */
+	fts_string_t*	token);			/*!< out: token's text */
+
+/*************************************************************//**
+Get token char size by charset
+@return the number of token char size */
+ulint
+fts_get_token_size(
+/*===============*/
+	const CHARSET_INFO*	cs,		/*!< in: Character set */
+	const char*		token,		/*!< in: token */
+	ulint			len);		/*!< in: token length */
+
+/*************************************************************//**
+FULLTEXT tokenizer internal in MYSQL_FTPARSER_SIMPLE_MODE
+@return 0 if tokenize sucessfully */
+int
+fts_tokenize_document_internal(
+/*===========================*/
+	MYSQL_FTPARSER_PARAM*	param,	/*!< in: parser parameter */
+	const char*			doc,	/*!< in: document to tokenize */
+	int			len);	/*!< in: document length */
 
 /*********************************************************************//**
 Fetch COUNT(*) from specified table.
 @return the number of rows in the table */
-UNIV_INTERN
 ulint
 fts_get_rows_count(
 /*===============*/
@@ -940,7 +943,6 @@ fts_get_rows_count(
 /*************************************************************//**
 Get maximum Doc ID in a table if index "FTS_DOC_ID_INDEX" exists
 @return max Doc ID or 0 if index "FTS_DOC_ID_INDEX" does not exist */
-UNIV_INTERN
 doc_id_t
 fts_get_max_doc_id(
 /*===============*/
@@ -950,7 +952,6 @@ fts_get_max_doc_id(
 Check whether user supplied stopword table exists and is of
 the right format.
 @return the stopword column charset if qualifies */
-UNIV_INTERN
 CHARSET_INFO*
 fts_valid_stopword_table(
 /*=====================*/
@@ -959,7 +960,6 @@ fts_valid_stopword_table(
 /****************************************************************//**
 This function loads specified stopword into FTS cache
 @return TRUE if success */
-UNIV_INTERN
 ibool
 fts_load_stopword(
 /*==============*/
@@ -978,7 +978,6 @@ fts_load_stopword(
 /****************************************************************//**
 Create the vector of fts_get_doc_t instances.
 @return vector of fts_get_doc_t instances */
-UNIV_INTERN
 ib_vector_t*
 fts_get_docs_create(
 /*================*/
@@ -987,7 +986,6 @@ fts_get_docs_create(
 /****************************************************************//**
 Read the rows from the FTS index
 @return DB_SUCCESS if OK */
-UNIV_INTERN
 dberr_t
 fts_table_fetch_doc_ids(
 /*====================*/
@@ -1001,7 +999,6 @@ used. There are documents that have not yet sync-ed to auxiliary
 tables from last server abnormally shutdown, we will need to bring
 such document into FTS cache before any further operations
 @return TRUE if all OK */
-UNIV_INTERN
 ibool
 fts_init_index(
 /*===========*/
@@ -1010,7 +1007,6 @@ fts_init_index(
 						have cache lock */
 /*******************************************************************//**
 Add a newly create index in FTS cache */
-UNIV_INTERN
 void
 fts_add_index(
 /*==========*/
@@ -1020,19 +1016,16 @@ fts_add_index(
 /*******************************************************************//**
 Drop auxiliary tables related to an FTS index
 @return DB_SUCCESS or error number */
-UNIV_INTERN
 dberr_t
 fts_drop_index(
 /*===========*/
 	dict_table_t*	table,	/*!< in: Table where indexes are dropped */
 	dict_index_t*	index,	/*!< in: Index to be dropped */
-	trx_t*		trx)	/*!< in: Transaction for the drop */
-	MY_ATTRIBUTE((nonnull));
+	trx_t*		trx);	/*!< in: Transaction for the drop */
 
 /****************************************************************//**
 Rename auxiliary tables for all fts index for a table
 @return DB_SUCCESS or error code */
-
 dberr_t
 fts_rename_aux_tables(
 /*==================*/
@@ -1044,10 +1037,21 @@ fts_rename_aux_tables(
 Check indexes in the fts->indexes is also present in index cache and
 table->indexes list
 @return TRUE if all indexes match */
-UNIV_INTERN
 ibool
 fts_check_cached_index(
 /*===================*/
 	dict_table_t*	table);  /*!< in: Table where indexes are dropped */
+
+/** Check if the all the auxillary tables associated with FTS index are in
+consistent state. For now consistency is check only by ensuring
+index->page_no != FIL_NULL
+@param[out]	base_table	table has host fts index
+@param[in,out]	trx		trx handler */
+void
+fts_check_corrupt(
+	dict_table_t*	base_table,
+	trx_t*		trx);
+
+
 #endif /*!< fts0fts.h */
 
diff --git a/storage/innobase/include/fts0opt.h b/storage/innobase/include/fts0opt.h
index 92eaf8270d2..a9185ad8df1 100644
--- a/storage/innobase/include/fts0opt.h
+++ b/storage/innobase/include/fts0opt.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2001, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2001, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -27,7 +27,6 @@ Created 2011-02-15 Jimmy Yang
 
 /********************************************************************
 Callback function to fetch the rows in an FTS INDEX record. */
-UNIV_INTERN
 ibool
 fts_optimize_index_fetch_node(
 /*==========================*/
diff --git a/storage/innobase/include/fts0plugin.h b/storage/innobase/include/fts0plugin.h
new file mode 100644
index 00000000000..9bc9b6b9dd7
--- /dev/null
+++ b/storage/innobase/include/fts0plugin.h
@@ -0,0 +1,50 @@
+/*****************************************************************************
+
+Copyright (c) 2013, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/fts0plugin.h
+Full text search plugin header file
+
+Created 2013/06/04 Shaohua Wang
+***********************************************************************/
+
+#ifndef INNOBASE_FTS0PLUGIN_H
+#define INNOBASE_FTS0PLUGIN_H
+
+#include "ha_prototypes.h"
+
+extern struct st_mysql_ftparser fts_default_parser;
+
+struct fts_ast_state_t;
+
+#define PARSER_INIT(parser, arg) if (parser->init) { parser->init(arg); }
+#define PARSER_DEINIT(parser, arg) if (parser->deinit) { parser->deinit(arg); }
+
+/******************************************************************//**
+fts parse query by plugin parser.
+@return 0 if parse successfully, or return non-zero. */
+int
+fts_parse_by_parser(
+/*================*/
+	ibool			mode,	/*!< in: query boolean mode */
+	uchar*			query,	/*!< in: query string */
+	ulint			len,	/*!< in: query string length */
+	st_mysql_ftparser*	parse,	/*!< in: fts plugin parser */
+	fts_ast_state_t*	state);	/*!< in: query parser state */
+
+#endif	/* INNOBASE_FTS0PLUGIN_H */
diff --git a/storage/innobase/include/fts0priv.h b/storage/innobase/include/fts0priv.h
index 2d4e9d88fd1..1fd33c2b103 100644
--- a/storage/innobase/include/fts0priv.h
+++ b/storage/innobase/include/fts0priv.h
@@ -26,6 +26,7 @@ Created 2011/09/02 Sunny Bains
 #ifndef INNOBASE_FTS0PRIV_H
 #define INNOBASE_FTS0PRIV_H
 
+#include "univ.i"
 #include "dict0dict.h"
 #include "pars0pars.h"
 #include "que0que.h"
@@ -114,34 +115,35 @@ component.
 /******************************************************************//**
 Parse an SQL string. %s is replaced with the table's id.
 @return query graph */
-UNIV_INTERN
 que_t*
 fts_parse_sql(
 /*==========*/
 	fts_table_t*	fts_table,	/*!< in: FTS aux table */
 	pars_info_t*	info,		/*!< in: info struct, or NULL */
 	const char*	sql)		/*!< in: SQL string to evaluate */
-	MY_ATTRIBUTE((nonnull(3), malloc, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
+
 /******************************************************************//**
 Evaluate a parsed SQL statement
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 fts_eval_sql(
 /*=========*/
 	trx_t*		trx,		/*!< in: transaction */
 	que_t*		graph)		/*!< in: Parsed statement */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
+
 /******************************************************************//**
 Construct the name of an ancillary FTS table for the given table.
-@return own: table name, must be freed with mem_free() */
-UNIV_INTERN
-char*
+Caller must allocate enough memory(usually size of MAX_FULL_NAME_LEN)
+for param 'table_name'. */
+void
 fts_get_table_name(
 /*===============*/
 	const fts_table_t*
-			fts_table)	/*!< in: FTS aux table info */
-	MY_ATTRIBUTE((nonnull, malloc, warn_unused_result));
+			fts_table,	/*!< in: FTS aux table info */
+	char*		table_name);	/*!< in/out: aux table name */
+
 /******************************************************************//**
 Construct the column specification part of the SQL string for selecting the
 indexed FTS columns for the given table. Adds the necessary bound
@@ -157,14 +159,13 @@ Two indexed columns named "subject" and "content":
  "$sel0, $sel1",
  info/ids: sel0 -> "subject", sel1 -> "content",
 @return heap-allocated WHERE string */
-UNIV_INTERN
 const char*
 fts_get_select_columns_str(
 /*=======================*/
 	dict_index_t*	index,		/*!< in: FTS index */
 	pars_info_t*	info,		/*!< in/out: parser info */
 	mem_heap_t*	heap)		/*!< in: memory heap */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 
 /** define for fts_doc_fetch_by_doc_id() "option" value, defines whether
 we want to get Doc whose ID is equal to or greater or smaller than supplied
@@ -177,7 +178,6 @@ ID */
 Fetch document (= a single row's indexed text) with the given
 document id.
 @return: DB_SUCCESS if fetch is successful, else error */
-UNIV_INTERN
 dberr_t
 fts_doc_fetch_by_doc_id(
 /*====================*/
@@ -190,24 +190,21 @@ fts_doc_fetch_by_doc_id(
 	fts_sql_callback
 			callback,	/*!< in: callback to read
 					records */
-	void*		arg)		/*!< in: callback arg */
-	MY_ATTRIBUTE((nonnull(6)));
+	void*		arg);		/*!< in: callback arg */
 
 /*******************************************************************//**
 Callback function for fetch that stores the text of an FTS document,
 converting each column to UTF-16.
 @return always FALSE */
-UNIV_INTERN
 ibool
 fts_query_expansion_fetch_doc(
 /*==========================*/
 	void*		row,		/*!< in: sel_node_t* */
-	void*		user_arg)	/*!< in: fts_doc_t* */
-	MY_ATTRIBUTE((nonnull));
+	void*		user_arg);	/*!< in: fts_doc_t* */
+
 /********************************************************************
 Write out a single word's data as new entry/entries in the INDEX table.
 @return DB_SUCCESS if all OK. */
-UNIV_INTERN
 dberr_t
 fts_write_node(
 /*===========*/
@@ -216,22 +213,38 @@ fts_write_node(
 	fts_table_t*	fts_table,	/*!< in: the FTS aux index */
 	fts_string_t*	word,		/*!< in: word in UTF-8 */
 	fts_node_t*	node)		/*!< in: node columns */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Check fts token
+1. for ngram token, check whether the token contains any words in stopwords
+2. for non-ngram token, check if it's stopword or less than fts_min_token_size
+or greater than fts_max_token_size.
+@param[in]	token		token string
+@param[in]	stopwords	stopwords rb tree
+@param[in]	is_ngram	is ngram parser
+@param[in]	cs		token charset
+@retval true	if it is not stopword and length in range
+@retval false	if it is stopword or length not in range */
+bool
+fts_check_token(
+	const fts_string_t*	token,
+	const ib_rbt_t*		stopwords,
+	bool			is_ngram,
+	const CHARSET_INFO*	cs);
+
 /*******************************************************************//**
 Tokenize a document. */
-UNIV_INTERN
 void
 fts_tokenize_document(
 /*==================*/
 	fts_doc_t*	doc,		/*!< in/out: document to
 					tokenize */
-	fts_doc_t*	result)		/*!< out: if provided, save
+	fts_doc_t*	result,		/*!< out: if provided, save
 					result tokens here */
-	MY_ATTRIBUTE((nonnull(1)));
+	st_mysql_ftparser*	parser);/* in: plugin fts parser */
 
 /*******************************************************************//**
 Continue to tokenize a document. */
-UNIV_INTERN
 void
 fts_tokenize_document_next(
 /*=======================*/
@@ -239,23 +252,21 @@ fts_tokenize_document_next(
 					tokenize */
 	ulint		add_pos,	/*!< in: add this position to all
 					tokens from this tokenization */
-	fts_doc_t*	result)		/*!< out: if provided, save
+	fts_doc_t*	result,		/*!< out: if provided, save
 					result tokens here */
-	MY_ATTRIBUTE((nonnull(1)));
+	st_mysql_ftparser*	parser);/* in: plugin fts parser */
+
 /******************************************************************//**
 Initialize a document. */
-UNIV_INTERN
 void
 fts_doc_init(
 /*=========*/
-	fts_doc_t*	doc)		/*!< in: doc to initialize */
-	MY_ATTRIBUTE((nonnull));
+	fts_doc_t*	doc);		/*!< in: doc to initialize */
 
 /******************************************************************//**
 Do a binary search for a doc id in the array
 @return +ve index if found -ve index where it should be
         inserted if not found */
-UNIV_INTERN
 int
 fts_bsearch(
 /*========*/
@@ -263,27 +274,24 @@ fts_bsearch(
 	int		lower,		/*!< in: lower bound of array*/
 	int		upper,		/*!< in: upper bound of array*/
 	doc_id_t	doc_id)		/*!< in: doc id to lookup */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /******************************************************************//**
 Free document. */
-UNIV_INTERN
 void
 fts_doc_free(
 /*=========*/
-	fts_doc_t*	doc)		/*!< in: document */
-	MY_ATTRIBUTE((nonnull));
+	fts_doc_t*	doc);		/*!< in: document */
+
 /******************************************************************//**
 Free fts_optimizer_word_t instanace.*/
-UNIV_INTERN
 void
 fts_word_free(
 /*==========*/
-	fts_word_t*	word)		/*!< in: instance to free.*/
-	MY_ATTRIBUTE((nonnull));
+	fts_word_t*	word);		/*!< in: instance to free.*/
+
 /******************************************************************//**
 Read the rows from the FTS inde
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 fts_index_fetch_nodes(
 /*==================*/
@@ -292,19 +300,18 @@ fts_index_fetch_nodes(
 	fts_table_t*	fts_table,	/*!< in: FTS aux table */
 	const fts_string_t*
 			word,		/*!< in: the word to fetch */
-	fts_fetch_t*	fetch)		/*!< in: fetch callback.*/
-	MY_ATTRIBUTE((nonnull));
+	fts_fetch_t*	fetch);		/*!< in: fetch callback.*/
+
 /******************************************************************//**
 Create a fts_optimizer_word_t instance.
 @return new instance */
-UNIV_INTERN
 fts_word_t*
 fts_word_init(
 /*==========*/
 	fts_word_t*	word,		/*!< in: word to initialize */
 	byte*		utf8,		/*!< in: UTF-8 string */
-	ulint		len)		/*!< in: length of string in bytes */
-	MY_ATTRIBUTE((nonnull));
+	ulint		len);		/*!< in: length of string in bytes */
+
 /******************************************************************//**
 Compare two fts_trx_table_t instances, we actually compare the
 table id's here.
@@ -314,8 +321,8 @@ int
 fts_trx_table_cmp(
 /*==============*/
 	const void*	v1,		/*!< in: id1 */
-	const void*	v2)		/*!< in: id2 */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	const void*	v2);		/*!< in: id2 */
+
 /******************************************************************//**
 Compare a table id with a trx_table_t table id.
 @return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
@@ -324,43 +331,40 @@ int
 fts_trx_table_id_cmp(
 /*=================*/
 	const void*	p1,		/*!< in: id1 */
-	const void*	p2)		/*!< in: id2 */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	const void*	p2);		/*!< in: id2 */
+
 /******************************************************************//**
 Commit a transaction.
 @return DB_SUCCESS if all OK */
-UNIV_INTERN
 dberr_t
 fts_sql_commit(
 /*===========*/
-	trx_t*		trx)		/*!< in: transaction */
-	MY_ATTRIBUTE((nonnull));
+	trx_t*		trx);		/*!< in: transaction */
+
 /******************************************************************//**
 Rollback a transaction.
 @return DB_SUCCESS if all OK */
-UNIV_INTERN
 dberr_t
 fts_sql_rollback(
 /*=============*/
-	trx_t*		trx)		/*!< in: transaction */
-	MY_ATTRIBUTE((nonnull));
+	trx_t*		trx);		/*!< in: transaction */
+
 /******************************************************************//**
 Parse an SQL string. %s is replaced with the table's id. Don't acquire
 the dict mutex
 @return query graph */
-UNIV_INTERN
 que_t*
 fts_parse_sql_no_dict_lock(
 /*=======================*/
 	fts_table_t*	fts_table,	/*!< in: table with FTS index */
 	pars_info_t*	info,		/*!< in: parser info */
 	const char*	sql)		/*!< in: SQL string to evaluate */
-	MY_ATTRIBUTE((nonnull(3), malloc, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
+
 /******************************************************************//**
 Get value from config table. The caller must ensure that enough
 space is allocated for value to hold the column contents
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 fts_config_get_value(
 /*=================*/
@@ -368,15 +372,13 @@ fts_config_get_value(
 	fts_table_t*	fts_table,	/*!< in: the indexed FTS table */
 	const char*	name,		/*!< in: get config value for
 					this parameter name */
-	fts_string_t*	value)		/*!< out: value read from
+	fts_string_t*	value);		/*!< out: value read from
 					config table */
-	MY_ATTRIBUTE((nonnull));
 /******************************************************************//**
 Get value specific to an FTS index from the config table. The caller
 must ensure that enough space is allocated for value to hold the
 column contents.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 fts_config_get_index_value(
 /*=======================*/
@@ -386,11 +388,11 @@ fts_config_get_index_value(
 					this parameter name */
 	fts_string_t*	value)		/*!< out: value read from
 					config table */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
+
 /******************************************************************//**
 Set the value in the config table for name.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 fts_config_set_value(
 /*=================*/
@@ -399,12 +401,11 @@ fts_config_set_value(
 	const char*	name,		/*!< in: get config value for
 					this parameter name */
 	const fts_string_t*
-			value)		/*!< in: value to update */
-	MY_ATTRIBUTE((nonnull));
+			value);		/*!< in: value to update */
+
 /****************************************************************//**
 Set an ulint value in the config table.
 @return DB_SUCCESS if all OK else error code */
-UNIV_INTERN
 dberr_t
 fts_config_set_ulint(
 /*=================*/
@@ -412,11 +413,11 @@ fts_config_set_ulint(
 	fts_table_t*	fts_table,	/*!< in: the indexed FTS table */
 	const char*	name,		/*!< in: param name */
 	ulint		int_value)	/*!< in: value */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
+
 /******************************************************************//**
 Set the value specific to an FTS index in the config table.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 fts_config_set_index_value(
 /*=======================*/
@@ -426,11 +427,11 @@ fts_config_set_index_value(
 					this parameter name */
 	fts_string_t*	value)		/*!< out: value read from
 					config table */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
+
 /******************************************************************//**
 Increment the value in the config table for column name.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 fts_config_increment_value(
 /*=======================*/
@@ -439,11 +440,11 @@ fts_config_increment_value(
 	const char*	name,		/*!< in: increment config value
 					for this parameter name */
 	ulint		delta)		/*!< in: increment by this much */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
+
 /******************************************************************//**
 Increment the per index value in the config table for column name.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 fts_config_increment_index_value(
 /*=============================*/
@@ -451,12 +452,11 @@ fts_config_increment_index_value(
 	dict_index_t*	index,		/*!< in: FTS index */
 	const char*	name,		/*!< in: increment config value
 					for this parameter name */
-	ulint		delta)		/*!< in: increment by this much */
-	MY_ATTRIBUTE((nonnull));
+	ulint		delta);		/*!< in: increment by this much */
+
 /******************************************************************//**
 Get an ulint value from the config table.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 fts_config_get_index_ulint(
 /*=======================*/
@@ -464,11 +464,11 @@ fts_config_get_index_ulint(
 	dict_index_t*	index,		/*!< in: FTS index */
 	const char*	name,		/*!< in: param name */
 	ulint*		int_value)	/*!< out: value */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
+
 /******************************************************************//**
 Set an ulint value int the config table.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 fts_config_set_index_ulint(
 /*=======================*/
@@ -476,23 +476,22 @@ fts_config_set_index_ulint(
 	dict_index_t*	index,		/*!< in: FTS index */
 	const char*	name,		/*!< in: param name */
 	ulint		int_value)	/*!< in: value */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
+
 /******************************************************************//**
 Get an ulint value from the config table.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 fts_config_get_ulint(
 /*=================*/
 	trx_t*		trx,		/*!< in: transaction */
 	fts_table_t*	fts_table,	/*!< in: the indexed FTS table */
 	const char*	name,		/*!< in: param name */
-	ulint*		int_value)	/*!< out: value */
-	MY_ATTRIBUTE((nonnull));
+	ulint*		int_value);	/*!< out: value */
+
 /******************************************************************//**
 Search cache for word.
 @return the word node vector if found else NULL */
-UNIV_INTERN
 const ib_vector_t*
 fts_cache_find_word(
 /*================*/
@@ -500,21 +499,21 @@ fts_cache_find_word(
 			index_cache,	/*!< in: cache to search */
 	const fts_string_t*
 			text)		/*!< in: word to search for */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
+
 /******************************************************************//**
 Check cache for deleted doc id.
 @return TRUE if deleted */
-UNIV_INTERN
 ibool
 fts_cache_is_deleted_doc_id(
 /*========================*/
 	const fts_cache_t*
 			cache,		/*!< in: cache ito search */
 	doc_id_t	doc_id)		/*!< in: doc id to search for */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
+
 /******************************************************************//**
 Append deleted doc ids to vector and sort the vector. */
-UNIV_INTERN
 void
 fts_cache_append_deleted_doc_ids(
 /*=============================*/
@@ -526,7 +525,6 @@ Wait for the background thread to start. We poll to detect change
 of state, which is acceptable, since the wait should happen only
 once during startup.
 @return true if the thread started else FALSE (i.e timed out) */
-UNIV_INTERN
 ibool
 fts_wait_for_background_thread_to_start(
 /*====================================*/
@@ -539,19 +537,17 @@ fts_wait_for_background_thread_to_start(
 /******************************************************************//**
 Get the total number of words in the FTS for a particular FTS index.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 fts_get_total_word_count(
 /*=====================*/
 	trx_t*		trx,		/*!< in: transaction */
 	dict_index_t*	index,		/*!< in: for this index */
 	ulint*		total)		/*!< out: total words */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 #endif
 /******************************************************************//**
 Search the index specific cache for a particular FTS index.
 @return the index specific cache else NULL */
-UNIV_INTERN
 fts_index_cache_t*
 fts_find_index_cache(
 /*================*/
@@ -559,21 +555,22 @@ fts_find_index_cache(
 			cache,		/*!< in: cache to search */
 	const dict_index_t*
 			index)		/*!< in: index to search for */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
+
 /******************************************************************//**
 Write the table id to the given buffer (including final NUL). Buffer must be
 at least FTS_AUX_MIN_TABLE_ID_LENGTH bytes long.
-@return	number of bytes written */
+@return number of bytes written */
 UNIV_INLINE
 int
 fts_write_object_id(
 /*================*/
 	ib_id_t		id,		/*!< in: a table/index id */
 	char*		str,		/*!< in: buffer to write the id to */
-	bool		hex_format MY_ATTRIBUTE((unused)))
+	bool		hex_format MY_ATTRIBUTE((unused)));
 					/*!< in: true for fixed hex format,
 					false for old ambiguous format */
-	MY_ATTRIBUTE((nonnull));
+
 /******************************************************************//**
 Read the table id from the string generated by fts_write_object_id().
 @return TRUE if parse successful */
@@ -583,11 +580,11 @@ fts_read_object_id(
 /*===============*/
 	ib_id_t*	id,		/*!< out: a table id */
 	const char*	str)		/*!< in: buffer to read from */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
+
 /******************************************************************//**
 Get the table id.
 @return number of bytes written */
-UNIV_INTERN
 int
 fts_get_table_id(
 /*=============*/
@@ -596,55 +593,51 @@ fts_get_table_id(
 	char*		table_id)	/*!< out: table id, must be at least
 					FTS_AUX_MIN_TABLE_ID_LENGTH bytes
 					long */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
+
 /******************************************************************//**
 Add the table to add to the OPTIMIZER's list. */
-UNIV_INTERN
 void
 fts_optimize_add_table(
 /*===================*/
-	dict_table_t*	table)		/*!< in: table to add */
-	MY_ATTRIBUTE((nonnull));
+	dict_table_t*	table);		/*!< in: table to add */
+
 /******************************************************************//**
 Optimize a table. */
-UNIV_INTERN
 void
 fts_optimize_do_table(
 /*==================*/
-	dict_table_t*	table)		/*!< in: table to optimize */
-	MY_ATTRIBUTE((nonnull));
+	dict_table_t*	table);		/*!< in: table to optimize */
+
 /******************************************************************//**
 Construct the prefix name of an FTS table.
-@return own: table name, must be freed with mem_free() */
-UNIV_INTERN
+@return own: table name, must be freed with ut_free() */
 char*
 fts_get_table_name_prefix(
 /*======================*/
 	const fts_table_t*
 			fts_table)	/*!< in: Auxiliary table type */
-	MY_ATTRIBUTE((nonnull, malloc, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
+
 /******************************************************************//**
 Add node positions. */
-UNIV_INTERN
 void
 fts_cache_node_add_positions(
 /*=========================*/
 	fts_cache_t*	cache,		/*!< in: cache */
 	fts_node_t*	node,		/*!< in: word node */
 	doc_id_t	doc_id,		/*!< in: doc id */
-	ib_vector_t*	positions)	/*!< in: fts_token_t::positions */
-	MY_ATTRIBUTE((nonnull(2,4)));
+	ib_vector_t*	positions);	/*!< in: fts_token_t::positions */
 
 /******************************************************************//**
 Create the config table name for retrieving index specific value.
 @return index config parameter name */
-UNIV_INTERN
 char*
 fts_config_create_index_param_name(
 /*===============================*/
-	const char*		param,		/*!< in: base name of param */
-	const dict_index_t*	index)		/*!< in: index for config */
-	MY_ATTRIBUTE((nonnull, malloc, warn_unused_result));
+	const char*		param,	/*!< in: base name of param */
+	const dict_index_t*	index)	/*!< in: index for config */
+	MY_ATTRIBUTE((warn_unused_result));
 
 #ifndef UNIV_NONINL
 #include "fts0priv.ic"
diff --git a/storage/innobase/include/fts0priv.ic b/storage/innobase/include/fts0priv.ic
index 88f2d67c7b8..fa2cdd44a36 100644
--- a/storage/innobase/include/fts0priv.ic
+++ b/storage/innobase/include/fts0priv.ic
@@ -26,7 +26,7 @@ Created 2011/11/12 Sunny Bains
 /******************************************************************//**
 Write the table id to the given buffer (including final NUL). Buffer must be
 at least FTS_AUX_MIN_TABLE_ID_LENGTH bytes long.
-@return	number of bytes written */
+@return number of bytes written */
 UNIV_INLINE
 int
 fts_write_object_id(
@@ -46,36 +46,31 @@ fts_write_object_id(
 	/* Use this to construct old(5.6.14 and 5.7.3) windows
 	ambiguous aux table names */
 	DBUG_EXECUTE_IF("innodb_test_wrong_fts_aux_table_name",
-			return(sprintf(str, "%016llu", id)););
+			return(sprintf(str, "%016llu", (ulonglong) id)););
 
 #else /* _WIN32 */
 
 	/* Use this to construct old(5.6.14 and 5.7.3) windows
 	ambiguous aux table names */
 	DBUG_EXECUTE_IF("innodb_test_wrong_windows_fts_aux_table_name",
-			return(sprintf(str, "%016" PRIu64, id)););
+			return(sprintf(str, "%016llu", (ulonglong) id)););
 
 	DBUG_EXECUTE_IF("innodb_test_wrong_fts_aux_table_name",
-			return(sprintf(str, UINT64PFx, id)););
+			return(sprintf(str, "%016llx", (ulonglong) id)););
 
 #endif /* _WIN32 */
 
 	/* As above, but this is only for those tables failing to rename. */
 	if (!hex_format) {
-#ifdef _WIN32
-		// FIXME: Use ut_snprintf(), so does following one.
-		return(sprintf(str, "%016llu", id));
-#else /* _WIN32 */
-		return(sprintf(str, "%016" PRIu64, id));
-#endif /* _WIN32 */
+		return(sprintf(str, "%016llu", (ulonglong) id));
 	}
 
-	return(sprintf(str, UINT64PFx, id));
+	return(sprintf(str, "%016llx", (ulonglong) id));
 }
 
 /******************************************************************//**
 Read the table id from the string generated by fts_write_object_id().
-@return	TRUE if parse successful */
+@return TRUE if parse successful */
 UNIV_INLINE
 ibool
 fts_read_object_id(
@@ -91,7 +86,7 @@ fts_read_object_id(
 
 /******************************************************************//**
 Compare two fts_trx_table_t instances.
-@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2  */
+@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
 UNIV_INLINE
 int
 fts_trx_table_cmp(
@@ -99,8 +94,11 @@ fts_trx_table_cmp(
 	const void*	p1,			/*!< in: id1 */
 	const void*	p2)			/*!< in: id2 */
 {
-	const dict_table_t* table1 = (*(const fts_trx_table_t**) p1)->table;
-	const dict_table_t* table2 = (*(const fts_trx_table_t**) p2)->table;
+	const dict_table_t*	table1
+		= (*static_cast<const fts_trx_table_t* const*>(p1))->table;
+
+	const dict_table_t*	table2
+		= (*static_cast<const fts_trx_table_t* const*>(p2))->table;
 
 	return((table1->id > table2->id)
 	       ? 1
@@ -119,8 +117,9 @@ fts_trx_table_id_cmp(
 	const void*	p1,			/*!< in: id1 */
 	const void*	p2)			/*!< in: id2 */
 {
-	const ullint* table_id = (const ullint*) p1;
-	const dict_table_t* table2 = (*(const fts_trx_table_t**) p2)->table;
+	const uintmax_t*	table_id = static_cast<const uintmax_t*>(p1);
+	const dict_table_t*	table2
+		= (*static_cast<const fts_trx_table_t* const*>(p2))->table;
 
 	return((*table_id > table2->id)
 	       ? 1
diff --git a/storage/innobase/include/fts0tlex.h b/storage/innobase/include/fts0tlex.h
index f91533803e8..49bea8b08d4 100644
--- a/storage/innobase/include/fts0tlex.h
+++ b/storage/innobase/include/fts0tlex.h
@@ -341,7 +341,7 @@ extern int fts0tlex (yyscan_t yyscanner);
 #undef YY_DECL
 #endif
 
-#line 68 "fts0tlex.l"
+#line 69 "fts0tlex.l"
 
 
 #line 348 "../include/fts0tlex.h"
diff --git a/storage/innobase/include/fts0tokenize.h b/storage/innobase/include/fts0tokenize.h
new file mode 100644
index 00000000000..15726aea1de
--- /dev/null
+++ b/storage/innobase/include/fts0tokenize.h
@@ -0,0 +1,188 @@
+/*****************************************************************************
+
+Copyright (c) 2014, 2015, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file fts/fts0tokenize.cc
+Full Text Search plugin tokenizer refer to MyISAM
+
+Created 2014/11/17 Shaohua Wang
+***********************************************************************/
+
+#include "ft_global.h"
+#include "mysql/plugin_ftparser.h"
+#include "m_ctype.h"
+
+/* Macros and structs below are from ftdefs.h in MyISAM */
+/** Check a char is true word */
+#define true_word_char(c, ch) ((c) & (_MY_U | _MY_L | _MY_NMR) || (ch) == '_')
+
+/** Check if a char is misc word */
+#define misc_word_char(X)       0
+
+/** Boolean search syntax */
+static const char* fts_boolean_syntax = DEFAULT_FTB_SYNTAX;
+
+#define FTB_YES   (fts_boolean_syntax[0])
+#define FTB_EGAL  (fts_boolean_syntax[1])
+#define FTB_NO    (fts_boolean_syntax[2])
+#define FTB_INC   (fts_boolean_syntax[3])
+#define FTB_DEC   (fts_boolean_syntax[4])
+#define FTB_LBR   (fts_boolean_syntax[5])
+#define FTB_RBR   (fts_boolean_syntax[6])
+#define FTB_NEG   (fts_boolean_syntax[7])
+#define FTB_TRUNC (fts_boolean_syntax[8])
+#define FTB_LQUOT (fts_boolean_syntax[10])
+#define FTB_RQUOT (fts_boolean_syntax[11])
+
+/** FTS query token */
+typedef struct st_ft_word {
+        uchar* pos;     /*!< word start pointer */
+        uint   len;     /*!< word len */
+        double weight;  /*!< word weight, unused in innodb */
+} FT_WORD;
+
+/** Tokenizer for ngram referring to ft_get_word(ft_parser.c) in MyISAM.
+Differences: a. code format changed; b. stopword processing removed.
+@param[in]	cs	charset
+@param[in,out]	start	doc start pointer
+@param[in,out]	end	doc end pointer
+@param[in,out]	word	token
+@param[in,out]	info	token info
+@retval	0	eof
+@retval	1	word found
+@retval	2	left bracket
+@retval	3	right bracket
+@retval	4	stopword found */
+inline
+uchar
+fts_get_word(
+	const CHARSET_INFO*	cs,
+	uchar**			start,
+	uchar*			end,
+	FT_WORD*		word,
+	MYSQL_FTPARSER_BOOLEAN_INFO*
+				info)
+{
+	uchar*	doc = *start;
+	int	ctype;
+	uint	mwc;
+	uint	length;
+	int	mbl;
+
+	info->yesno = (FTB_YES ==' ') ? 1 : (info->quot != 0);
+	info->weight_adjust = info->wasign = 0;
+	info->type = FT_TOKEN_EOF;
+
+	while (doc < end) {
+		for (; doc < end;
+		     doc += (mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1))) {
+			mbl = cs->cset->ctype(cs, &ctype, doc, end);
+
+			if (true_word_char(ctype, *doc)) {
+				break;
+			}
+
+			if (*doc == FTB_RQUOT && info->quot) {
+				*start = doc + 1;
+				info->type = FT_TOKEN_RIGHT_PAREN;
+
+				return(info->type);
+			}
+
+			if (!info->quot) {
+				if (*doc == FTB_LBR
+				    || *doc == FTB_RBR
+				    || *doc == FTB_LQUOT) {
+					/* param->prev=' '; */
+					*start = doc + 1;
+					if (*doc == FTB_LQUOT) {
+						info->quot = (char*)1;
+					}
+
+					info->type = (*doc == FTB_RBR ?
+						       FT_TOKEN_RIGHT_PAREN :
+						       FT_TOKEN_LEFT_PAREN);
+
+					return(info->type);
+				}
+
+				if (info->prev == ' ') {
+					if (*doc == FTB_YES) {
+						info->yesno = +1;
+						continue;
+					} else if (*doc == FTB_EGAL) {
+						info->yesno = 0;
+						continue;
+					} else if (*doc == FTB_NO) {
+						info->yesno = -1;
+						continue;
+					} else if (*doc == FTB_INC) {
+						info->weight_adjust++;
+						continue;
+					} else if (*doc == FTB_DEC) {
+						info->weight_adjust--;
+						continue;
+					} else if (*doc == FTB_NEG) {
+						info->wasign = !info->wasign;
+						continue;
+					}
+				}
+			}
+
+			info->prev = *doc;
+			info->yesno = (FTB_YES == ' ') ? 1 : (info->quot != 0);
+			info->weight_adjust = info->wasign = 0;
+		}
+
+		mwc = length = 0;
+		for (word->pos = doc;
+		     doc < end;
+		     length++, doc += (mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1))) {
+			mbl = cs->cset->ctype(cs, &ctype, doc, end);
+
+			if (true_word_char(ctype, *doc)) {
+				mwc = 0;
+			} else if (!misc_word_char(*doc) || mwc) {
+				break;
+			} else {
+				mwc++;
+			}
+		}
+
+		/* Be sure *prev is true_word_char. */
+		info->prev = 'A';
+		word->len = (uint)(doc-word->pos) - mwc;
+
+		if ((info->trunc = (doc < end && *doc == FTB_TRUNC))) {
+			doc++;
+		}
+
+		/* We don't check stopword here. */
+		*start = doc;
+		info->type = FT_TOKEN_WORD;
+
+		return(info->type);
+	}
+
+	if (info->quot) {
+		*start = doc;
+		info->type = FT_TOKEN_RIGHT_PAREN;
+	}
+
+	return(info->type);
+}
diff --git a/storage/innobase/include/fts0types.h b/storage/innobase/include/fts0types.h
index e495fe72a60..039006265f6 100644
--- a/storage/innobase/include/fts0types.h
+++ b/storage/innobase/include/fts0types.h
@@ -26,16 +26,17 @@ Created 2007-03-27 Sunny Bains
 #ifndef INNOBASE_FTS0TYPES_H
 #define INNOBASE_FTS0TYPES_H
 
+#include "univ.i"
+#include "fts0fts.h"
+#include "fut0fut.h"
+#include "pars0pars.h"
 #include "que0types.h"
 #include "ut0byte.h"
-#include "fut0fut.h"
 #include "ut0rbt.h"
-#include "fts0fts.h"
 
 /** Types used within FTS. */
 struct fts_que_t;
 struct fts_node_t;
-struct fts_utf8_str_t;
 
 /** Callbacks used within FTS. */
 typedef pars_user_func_cb_t fts_sql_callback;
@@ -270,6 +271,12 @@ struct fts_doc_t {
 					same lifespan, most notably
 					the vector of token positions */
 	CHARSET_INFO*	charset;	/*!< Document's charset info */
+
+	st_mysql_ftparser* parser;	/*!< fts plugin parser */
+
+	bool		is_ngram;	/*!< Whether it is a ngram parser */
+
+	ib_rbt_t*	stopwords;	/*!< Stopwords */
 };
 
 /** A token and its positions within a document. */
@@ -284,33 +291,6 @@ struct fts_token_t {
 /** It's defined in fts/fts0fts.c */
 extern const fts_index_selector_t fts_index_selector[];
 
-/******************************************************************//**
-Compare two UTF-8 strings. */
-UNIV_INLINE
-int
-fts_utf8_string_cmp(
-/*================*/
-						/*!< out:
-						< 0 if n1 < n2,
-						0 if n1 == n2,
-						> 0 if n1 > n2 */
-	const void*	p1,			/*!< in: key */
-	const void*	p2);			/*!< in: node */
-
-/******************************************************************//**
-Compare two UTF-8 strings, and return match (0) if
-passed in "key" value equals or is the prefix of the "node" value. */
-UNIV_INLINE
-int
-fts_utf8_string_cmp_prefix(
-/*=======================*/
-						/*!< out:
-						< 0 if n1 < n2,
-						0 if n1 == n2,
-						> 0 if n1 > n2 */
-	const void*	p1,			/*!< in: key */
-	const void*	p2);			/*!< in: node */
-
 /******************************************************************//**
 Compare two fts_trx_row_t instances doc_ids. */
 UNIV_INLINE
@@ -361,11 +341,11 @@ fts_decode_vlc(
 			incremented by the number of bytes decoded */
 
 /******************************************************************//**
-Duplicate an UTF-8 string. */
+Duplicate a string. */
 UNIV_INLINE
 void
-fts_utf8_string_dup(
-/*================*/
+fts_string_dup(
+/*===========*/
 						/*!< out:
 						< 0 if n1 < n2,
 						0 if n1 == n2,
@@ -396,43 +376,6 @@ fts_encode_int(
 	byte*		buf);			/*!< in: buffer, must have
 						enough space */
 
-/******************************************************************//**
-Decode a UTF-8 character.
-
-http://www.unicode.org/versions/Unicode4.0.0/ch03.pdf:
-
- Scalar Value              1st Byte 2nd Byte 3rd Byte 4th Byte
-00000000 0xxxxxxx          0xxxxxxx
-00000yyy yyxxxxxx          110yyyyy 10xxxxxx
-zzzzyyyy yyxxxxxx          1110zzzz 10yyyyyy 10xxxxxx
-000uuuzz zzzzyyyy yyxxxxxx 11110uuu 10zzzzzz 10yyyyyy 10xxxxxx
-
-This function decodes UTF-8 sequences up to 6 bytes (31 bits).
-
-On error *ptr will point to the first byte that was not correctly
-decoded. This will hopefully help in resyncing the input. */
-UNIV_INLINE
-ulint
-fts_utf8_decode(
-/*============*/
-						/*!< out: UTF8_ERROR if *ptr
-						did not point to a valid
-						UTF-8 sequence, or the
-						Unicode code point. */
-	const byte**	ptr);			/*!< in/out: pointer to
-						UTF-8 string. The
-						pointer is advanced to
-						the start of the next
-						character. */
-
-/******************************************************************//**
-Lowercase an UTF-8 string. */
-UNIV_INLINE
-void
-fts_utf8_tolower(
-/*=============*/
-	fts_string_t*	str);			/*!< in: string */
-
 /******************************************************************//**
 Get the selected FTS aux INDEX suffix. */
 UNIV_INLINE
@@ -441,34 +384,17 @@ fts_get_suffix(
 /*===========*/
 	ulint		selected);		/*!< in: selected index */
 
-/********************************************************************
-Get the number of index selectors. */
-UNIV_INLINE
-ulint
-fts_get_n_selectors(void);
-/*=====================*/
-
-/******************************************************************//**
-Select the FTS auxiliary index for the given string.
+/** Select the FTS auxiliary index for the given character.
+@param[in]	cs	charset
+@param[in]	str	string
+@param[in]	len	string length in bytes
 @return the index to use for the string */
 UNIV_INLINE
 ulint
 fts_select_index(
-/*=============*/
-	const CHARSET_INFO*	cs,		/*!< Charset */
-	const byte*		str,		/*!< in: word string */
-	ulint			len);		/*!< in: string length */
-
-/********************************************************************
-Select the next FTS auxiliary index for the given character.
-@return the next index to use for character */
-UNIV_INLINE
-ulint
-fts_select_next_index(
-/*==================*/
-	const CHARSET_INFO*	cs,		/*!< Charset */
-	const byte*		str,		/*!< in: string */
-	ulint			len);		/*!< in: string length */
+	const CHARSET_INFO*	cs,
+	const byte*		str,
+	ulint			len);
 
 #ifndef UNIV_NONINL
 #include "fts0types.ic"
diff --git a/storage/innobase/include/fts0types.ic b/storage/innobase/include/fts0types.ic
index f0dfd023a70..417a1010919 100644
--- a/storage/innobase/include/fts0types.ic
+++ b/storage/innobase/include/fts0types.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2007, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -26,23 +26,16 @@ Created 2007-03-27 Sunny Bains
 #ifndef INNOBASE_FTS0TYPES_IC
 #define INNOBASE_FTS0TYPES_IC
 
-#include <ctype.h>
-
 #include "rem0cmp.h"
 #include "ha_prototypes.h"
 
-extern const ulint UTF8_ERROR;
-
-/* Determine if a UTF-8 continuation byte is valid. */
-#define fts_utf8_is_valid(b) (((b) & 0xC0) == 0x80)
-
 /******************************************************************//**
-Duplicate an UTF-8 string.
+Duplicate a string.
 @return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
 UNIV_INLINE
 void
-fts_utf8_string_dup(
-/*================*/
+fts_string_dup(
+/*===========*/
 	fts_string_t*		dst,		/*!< in: dup to here */
 	const fts_string_t*	src,		/*!< in: src string */
 	mem_heap_t*		heap)		/*!< in: heap to use */
@@ -103,183 +96,6 @@ fts_update_doc_id_cmp(
 	return((int)(up1->doc_id - up2->doc_id));
 }
 
-
-/******************************************************************//**
-Lowercase an UTF-8 string. */
-UNIV_INLINE
-void
-fts_utf8_tolower(
-/*=============*/
-	fts_string_t*	str)			/*!< in: string */
-{
-	innobase_casedn_str((char*) str->f_str);
-}
-
-/******************************************************************//**
-Compare two UTF-8 strings.
-@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
-UNIV_INLINE
-int
-fts_utf8_string_cmp(
-/*================*/
-	const void*	p1,			/*!< in: key */
-	const void*	p2)			/*!< in: node */
-{
-	const fts_string_t* s1 = (const fts_string_t*) p1;
-	const fts_string_t* s2 = (const fts_string_t*) p2;
-
-	return(cmp_data_data_slow_varchar(
-		s1->f_str, s1->f_len, s2->f_str, s2->f_len));
-}
-
-/******************************************************************//**
-Compare two UTF-8 strings, and return match (0) if
-passed in "key" value equals or is the prefix of the "node" value.
-@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
-UNIV_INLINE
-int
-fts_utf8_string_cmp_prefix(
-/*=======================*/
-	const void*	p1,			/*!< in: key */
-	const void*	p2)			/*!< in: node */
-{
-	int	result;
-	ulint	len;
-
-	const fts_string_t* s1 = (const fts_string_t*) p1;
-	const fts_string_t* s2 = (const fts_string_t*) p2;
-
-	len = ut_min(s1->f_len, s2->f_len);
-
-	result = cmp_data_data_slow_varchar(s1->f_str, len, s2->f_str, len);
-
-	if (result) {
-		return(result);
-	}
-
-	if (s1->f_len > s2->f_len) {
-		return(1);
-	}
-
-	return(0);
-}
-
-/******************************************************************//**
-Decode a UTF-8 character.
-
-http://www.unicode.org/versions/Unicode4.0.0/ch03.pdf:
-
- Scalar Value              1st Byte 2nd Byte 3rd Byte 4th Byte
-00000000 0xxxxxxx          0xxxxxxx
-00000yyy yyxxxxxx          110yyyyy 10xxxxxx
-zzzzyyyy yyxxxxxx          1110zzzz 10yyyyyy 10xxxxxx
-000uuuzz zzzzyyyy yyxxxxxx 11110uuu 10zzzzzz 10yyyyyy 10xxxxxx
-
-This function decodes UTF-8 sequences up to 6 bytes (31 bits).
-
-On error *ptr will point to the first byte that was not correctly
-decoded. This will hopefully help in resyncing the input.
-@return UTF8_ERROR if *ptr did not point to a valid
-UTF-8 sequence, or the Unicode code point. */
-UNIV_INLINE
-ulint
-fts_utf8_decode(
-/*============*/
-	const byte**	ptr)			/*!< in/out: pointer to
-						UTF-8 string. The
-						pointer is advanced to
-						the start of the next
-						character. */
-{
-	const byte*	p = *ptr;
-	ulint		ch = *p++;
-#ifdef UNIV_DEBUG
-	ulint		min_ch;
-#endif /* UNIV_DEBUG */
-
-	if (UNIV_LIKELY(ch < 0x80)) {
-		/* 0xxxxxxx */
-	} else if (UNIV_UNLIKELY(ch < 0xC0)) {
-		/* A continuation byte cannot start a code. */
-		goto err_exit;
-	} else if (ch < 0xE0) {
-		/* 110yyyyy 10xxxxxx */
-		ch &= 0x1F;
-		ut_d(min_ch = 0x80);
-		goto get1;
-	} else if (ch < 0xF0) {
-		/* 1110zzzz 10yyyyyy 10xxxxxx */
-		ch &= 0x0F;
-		ut_d(min_ch = 0x800);
-		goto get2;
-	} else if (ch < 0xF8) {
-		/* 11110uuu 10zzzzzz 10yyyyyy 10xxxxxx */
-		ch &= 0x07;
-		ut_d(min_ch = 0x10000);
-		goto get3;
-	} else if (ch < 0xFC) {
-		/* 111110tt 10uuuuuu 10zzzzzz 10yyyyyy 10xxxxxx */
-		ch &= 0x03;
-		ut_d(min_ch = 0x200000);
-		goto get4;
-	} else if (ch < 0xFE) {
-		/* 1111110s 10tttttt 10uuuuuu 10zzzzzz 10yyyyyy 10xxxxxx */
-		ut_d(min_ch = 0x4000000);
-		if (!fts_utf8_is_valid(*p)) {
-			goto err_exit;
-		}
-		ch <<= 6;
-		ch |= (*p++) & 0x3F;
-get4:
-		if (!fts_utf8_is_valid(*p)) {
-			goto err_exit;
-		}
-		ch <<= 6;
-		ch |= (*p++) & 0x3F;
-get3:
-		if (!fts_utf8_is_valid(*p)) {
-			goto err_exit;
-		}
-		ch <<= 6;
-		ch |= (*p++) & 0x3F;
-get2:
-		if (!fts_utf8_is_valid(*p)) {
-			goto err_exit;
-		}
-		ch <<= 6;
-		ch |= (*p++) & 0x3F;
-get1:
-		if (!fts_utf8_is_valid(*p)) {
-			goto err_exit;
-		}
-		ch <<= 6;
-		ch |= (*p++) & 0x3F;
-
-		/* The following is needed in the 6-byte case
-		when ulint is wider than 32 bits. */
-		ch &= 0xFFFFFFFF;
-
-		/* The code positions U+D800 to U+DFFF (UTF-16 surrogate pairs)
-		and U+FFFE and U+FFFF cannot occur in valid UTF-8. */
-
-		if ( (ch >= 0xD800 && ch <= 0xDFFF)
-#ifdef UNIV_DEBUG
-		     || ch < min_ch
-#endif /* UNIV_DEBUG */
-		     || ch == 0xFFFE || ch == 0xFFFF) {
-
-			ch = UTF8_ERROR;
-		}
-	} else {
-err_exit:
-		ch = UTF8_ERROR;
-	}
-
-	*ptr = p;
-
-	return(ch);
-}
-
 /******************************************************************//**
 Get the first character's code position for FTS index partition */
 extern
@@ -290,16 +106,41 @@ innobase_strnxfrm(
         const uchar*		p2,	/*!< in: string */
         const ulint		len2);	/*!< in: string length */
 
-/******************************************************************//**
-Select the FTS auxiliary index for the given character.
-@return the index to use for the string */
+/** Check if fts index charset is cjk
+@param[in]	cs	charset
+@retval	true	if the charset is cjk
+@retval	false	if not. */
+UNIV_INLINE
+bool
+fts_is_charset_cjk(
+	const CHARSET_INFO*	cs)
+{
+	if (strcmp(cs->name, "gb2312_chinese_ci") == 0
+	    || strcmp(cs->name, "gbk_chinese_ci") == 0
+	    || strcmp(cs->name, "big5_chinese_ci") == 0
+	    || strcmp(cs->name, "gb18030_chinese_ci") == 0
+	    || strcmp(cs->name, "ujis_japanese_ci") == 0
+	    || strcmp(cs->name, "sjis_japanese_ci") == 0
+	    || strcmp(cs->name, "cp932_japanese_ci") == 0
+	    || strcmp(cs->name, "eucjpms_japanese_ci") == 0
+	    || strcmp(cs->name, "euckr_korean_ci") == 0) {
+		return(true);
+	} else {
+		return(false);
+	}
+}
+
+/** Select the FTS auxiliary index for the given character by range.
+@param[in]	cs	charset
+@param[in]	str	string
+@param[in]	len	string length
+@retval	the index to use for the string */
 UNIV_INLINE
 ulint
-fts_select_index(
-/*=============*/
-	const CHARSET_INFO*	cs,	/*!< in: Charset */
-	const byte*		str,	/*!< in: string */
-	ulint			len)	/*!< in: string length */
+fts_select_index_by_range(
+	const CHARSET_INFO*	cs,
+	const byte*		str,
+	ulint			len)
 {
 	ulint			selected = 0;
 	ulint			value = innobase_strnxfrm(cs, str, len);
@@ -323,37 +164,64 @@ fts_select_index(
 	return(selected - 1);
 }
 
-/******************************************************************//**
-Select the next FTS auxiliary index for the given character.
-@return the next index to use for character */
+/** Select the FTS auxiliary index for the given character by hash.
+@param[in]	cs	charset
+@param[in]	str	string
+@param[in]	len	string length
+@retval the index to use for the string */
 UNIV_INLINE
 ulint
-fts_select_next_index(
-/*==================*/
-	const CHARSET_INFO*	cs,	/*!< in: Charset */
-	const byte*		str,	/*!< in: string */
-	ulint			len)	/*!< in: string length */
+fts_select_index_by_hash(
+	const CHARSET_INFO*	cs,
+	const byte*		str,
+	ulint			len)
 {
-	ulint		selected = 0;
-	ulint		value = innobase_strnxfrm(cs, str, len);
+	int	char_len;
+	ulong	nr1 = 1;
+	ulong	nr2 = 4;
 
-	while (fts_index_selector[selected].value != 0) {
+	ut_ad(!(str == NULL && len > 0));
 
-		if (fts_index_selector[selected].value == value) {
-
-			return(selected + 1);
-
-		} else if (fts_index_selector[selected].value > value) {
-
-			return(selected);
-		}
-
-		++selected;
+	if (str == NULL || len == 0) {
+		return 0;
 	}
 
-	ut_ad(selected > 0);
+	/* Get the first char */
+	/* JAN: TODO: MySQL 5.7 had
+	char_len = my_mbcharlen_ptr(cs, reinterpret_cast<const char*>(str),
+				    reinterpret_cast<const char*>(str + len));
+	*/
+	char_len = cs->cset->charlen(cs, str, str+len);
 
-	return((ulint) selected);
+	ut_ad(static_cast<ulint>(char_len) <= len);
+
+	/* Get collation hash code */
+	cs->coll->hash_sort(cs, str, char_len, &nr1, &nr2);
+
+	return(nr1 % FTS_NUM_AUX_INDEX);
+}
+
+/** Select the FTS auxiliary index for the given character.
+@param[in]	cs	charset
+@param[in]	str	string
+@param[in]	len	string length in bytes
+@retval	the index to use for the string */
+UNIV_INLINE
+ulint
+fts_select_index(
+	const CHARSET_INFO*	cs,
+	const byte*		str,
+	ulint			len)
+{
+	ulint	selected;
+
+	if (fts_is_charset_cjk(cs)) {
+		selected = fts_select_index_by_hash(cs, str, len);
+	} else {
+		selected = fts_select_index_by_range(cs, str, len);
+	}
+
+	return(selected);
 }
 
 /******************************************************************//**
@@ -367,22 +235,4 @@ fts_get_suffix(
 	return(fts_index_selector[selected].suffix);
 }
 
-/******************************************************************//**
-Get the number of index selectors.
-@return The number of selectors */
-UNIV_INLINE
-ulint
-fts_get_n_selectors(void)
-/*=====================*/
-{
-	ulint	i = 0;
-
-	// FIXME: This is a hack
-	while (fts_index_selector[i].value != 0) {
-		++i;
-	}
-
-	return(i);
-}
-
 #endif /* INNOBASE_FTS0TYPES_IC */
diff --git a/storage/innobase/include/fut0fut.h b/storage/innobase/include/fut0fut.h
index 851cdb44cdf..0b8b8b0e43b 100644
--- a/storage/innobase/include/fut0fut.h
+++ b/storage/innobase/include/fut0fut.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -32,24 +32,28 @@ Created 12/13/1995 Heikki Tuuri
 #include "fil0fil.h"
 #include "mtr0mtr.h"
 
-/********************************************************************//**
-Gets a pointer to a file address and latches the page.
-@return pointer to a byte in a frame; the file page in the frame is
+/** Gets a pointer to a file address and latches the page.
+@param[in]	space		space id
+@param[in]	page_size	page size
+@param[in]	addr		file address
+@param[in]	rw_latch	RW_S_LATCH, RW_X_LATCH, RW_SX_LATCH
+@param[out]	ptr_block	file page
+@param[in,out]	mtr		mini-transaction
+@return pointer to a byte in (*ptr_block)->frame; the *ptr_block is
 bufferfixed and latched */
 UNIV_INLINE
 byte*
 fut_get_ptr(
-/*========*/
-	ulint		space,	/*!< in: space id */
-	ulint		zip_size,/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	fil_addr_t	addr,	/*!< in: file address */
-	ulint		rw_latch, /*!< in: RW_S_LATCH, RW_X_LATCH */
-	mtr_t*		mtr);	/*!< in: mtr handle */
+	ulint			space,
+	const page_size_t&	page_size,
+	fil_addr_t		addr,
+	rw_lock_type_t		rw_latch,
+	mtr_t*			mtr,
+	buf_block_t**		ptr_block = NULL)
+	MY_ATTRIBUTE((warn_unused_result));
 
 #ifndef UNIV_NONINL
 #include "fut0fut.ic"
 #endif
 
-#endif
-
+#endif /* fut0fut_h */
diff --git a/storage/innobase/include/fut0fut.ic b/storage/innobase/include/fut0fut.ic
index b065b10b9ca..6fe031876e6 100644
--- a/storage/innobase/include/fut0fut.ic
+++ b/storage/innobase/include/fut0fut.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -26,31 +26,43 @@ Created 12/13/1995 Heikki Tuuri
 #include "sync0rw.h"
 #include "buf0buf.h"
 
-/********************************************************************//**
-Gets a pointer to a file address and latches the page.
-@return pointer to a byte in a frame; the file page in the frame is
+/** Gets a pointer to a file address and latches the page.
+@param[in]	space		space id
+@param[in]	page_size	page size
+@param[in]	addr		file address
+@param[in]	rw_latch	RW_S_LATCH, RW_X_LATCH, RW_SX_LATCH
+@param[in,out]	mtr		mini-transaction
+@param[out]	ptr_block	file page
+@return pointer to a byte in (*ptr_block)->frame; the *ptr_block is
 bufferfixed and latched */
 UNIV_INLINE
 byte*
 fut_get_ptr(
-/*========*/
-	ulint		space,	/*!< in: space id */
-	ulint		zip_size,/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	fil_addr_t	addr,	/*!< in: file address */
-	ulint		rw_latch, /*!< in: RW_S_LATCH, RW_X_LATCH */
-	mtr_t*		mtr)	/*!< in: mtr handle */
+	ulint			space,
+	const page_size_t&	page_size,
+	fil_addr_t		addr,
+	rw_lock_type_t		rw_latch,
+	mtr_t*			mtr,
+	buf_block_t**		ptr_block)
 {
 	buf_block_t*	block;
-	byte*		ptr;
+	byte*		ptr = NULL;
 
 	ut_ad(addr.boffset < UNIV_PAGE_SIZE);
-	ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
+	ut_ad((rw_latch == RW_S_LATCH)
+	      || (rw_latch == RW_X_LATCH)
+	      || (rw_latch == RW_SX_LATCH));
+
+	block = buf_page_get(page_id_t(space, addr.page), page_size,
+			     rw_latch, mtr);
 
-	block = buf_page_get(space, zip_size, addr.page, rw_latch, mtr);
 	ptr = buf_block_get_frame(block) + addr.boffset;
 
 	buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
 
+	if (ptr_block != NULL) {
+		*ptr_block = block;
+	}
+
 	return(ptr);
 }
diff --git a/storage/innobase/include/fut0lst.h b/storage/innobase/include/fut0lst.h
index 90f9a65d4fa..9c980d1358d 100644
--- a/storage/innobase/include/fut0lst.h
+++ b/storage/innobase/include/fut0lst.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -26,6 +26,8 @@ Created 11/28/1995 Heikki Tuuri
 #ifndef fut0lst_h
 #define fut0lst_h
 
+#ifndef UNIV_INNOCHECKSUM
+
 #include "univ.i"
 
 #include "fil0fil.h"
@@ -41,11 +43,12 @@ typedef	byte	flst_node_t;
 
 /* The physical size of a list base node in bytes */
 #define	FLST_BASE_NODE_SIZE	(4 + 2 * FIL_ADDR_SIZE)
+#endif /* !UNIV_INNOCHECKSUM */
 
 /* The physical size of a list node in bytes */
 #define	FLST_NODE_SIZE		(2 * FIL_ADDR_SIZE)
 
-#ifndef UNIV_HOTBACKUP
+#if !defined UNIV_HOTBACKUP && !defined UNIV_INNOCHECKSUM
 /********************************************************************//**
 Initializes a list base node. */
 UNIV_INLINE
@@ -56,7 +59,6 @@ flst_init(
 	mtr_t*			mtr);	/*!< in: mini-transaction handle */
 /********************************************************************//**
 Adds a node as the last node in a list. */
-UNIV_INTERN
 void
 flst_add_last(
 /*==========*/
@@ -65,7 +67,6 @@ flst_add_last(
 	mtr_t*			mtr);	/*!< in: mini-transaction handle */
 /********************************************************************//**
 Adds a node as the first node in a list. */
-UNIV_INTERN
 void
 flst_add_first(
 /*===========*/
@@ -74,7 +75,6 @@ flst_add_first(
 	mtr_t*			mtr);	/*!< in: mini-transaction handle */
 /********************************************************************//**
 Inserts a node after another in a list. */
-UNIV_INTERN
 void
 flst_insert_after(
 /*==============*/
@@ -84,7 +84,6 @@ flst_insert_after(
 	mtr_t*			mtr);	/*!< in: mini-transaction handle */
 /********************************************************************//**
 Inserts a node before another in a list. */
-UNIV_INTERN
 void
 flst_insert_before(
 /*===============*/
@@ -94,7 +93,6 @@ flst_insert_before(
 	mtr_t*			mtr);	/*!< in: mini-transaction handle */
 /********************************************************************//**
 Removes a node. */
-UNIV_INTERN
 void
 flst_remove(
 /*========*/
@@ -105,7 +103,6 @@ flst_remove(
 Cuts off the tail of the list, including the node given. The number of
 nodes which will be removed must be provided by the caller, as this function
 does not measure the length of the tail. */
-UNIV_INTERN
 void
 flst_cut_end(
 /*=========*/
@@ -118,7 +115,6 @@ flst_cut_end(
 Cuts off the tail of the list, not including the given node. The number of
 nodes which will be removed must be provided by the caller, as this function
 does not measure the length of the tail. */
-UNIV_INTERN
 void
 flst_truncate_end(
 /*==============*/
@@ -126,18 +122,16 @@ flst_truncate_end(
 	flst_node_t*		node2,	/*!< in: first node not to remove */
 	ulint			n_nodes,/*!< in: number of nodes to remove */
 	mtr_t*			mtr);	/*!< in: mini-transaction handle */
-/********************************************************************//**
-Gets list length.
-@return	length */
+/** Get the length of a list.
+@param[in]	base	base node
+@return length */
 UNIV_INLINE
 ulint
 flst_get_len(
-/*=========*/
-	const flst_base_node_t*	base,	/*!< in: pointer to base node */
-	mtr_t*			mtr);	/*!< in: mini-transaction handle */
+	const flst_base_node_t*	base);
 /********************************************************************//**
 Gets list first node address.
-@return	file address */
+@return file address */
 UNIV_INLINE
 fil_addr_t
 flst_get_first(
@@ -146,7 +140,7 @@ flst_get_first(
 	mtr_t*			mtr);	/*!< in: mini-transaction handle */
 /********************************************************************//**
 Gets list last node address.
-@return	file address */
+@return file address */
 UNIV_INLINE
 fil_addr_t
 flst_get_last(
@@ -155,7 +149,7 @@ flst_get_last(
 	mtr_t*			mtr);	/*!< in: mini-transaction handle */
 /********************************************************************//**
 Gets list next node address.
-@return	file address */
+@return file address */
 UNIV_INLINE
 fil_addr_t
 flst_get_next_addr(
@@ -164,7 +158,7 @@ flst_get_next_addr(
 	mtr_t*			mtr);	/*!< in: mini-transaction handle */
 /********************************************************************//**
 Gets list prev node address.
-@return	file address */
+@return file address */
 UNIV_INLINE
 fil_addr_t
 flst_get_prev_addr(
@@ -182,7 +176,7 @@ flst_write_addr(
 	mtr_t*		mtr);	/*!< in: mini-transaction handle */
 /********************************************************************//**
 Reads a file address.
-@return	file address */
+@return file address */
 UNIV_INLINE
 fil_addr_t
 flst_read_addr(
@@ -191,8 +185,7 @@ flst_read_addr(
 	mtr_t*			mtr);	/*!< in: mini-transaction handle */
 /********************************************************************//**
 Validates a file-based list.
-@return	TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
 ibool
 flst_validate(
 /*==========*/
@@ -200,7 +193,6 @@ flst_validate(
 	mtr_t*			mtr1);	/*!< in: mtr */
 /********************************************************************//**
 Prints info of a file-based list. */
-UNIV_INTERN
 void
 flst_print(
 /*=======*/
@@ -212,6 +204,6 @@ flst_print(
 #include "fut0lst.ic"
 #endif
 
-#endif /* !UNIV_HOTBACKUP */
+#endif /* !UNIV_HOTBACKUP && !UNIV_INNOCHECKSUM*/
 
 #endif
diff --git a/storage/innobase/include/fut0lst.ic b/storage/innobase/include/fut0lst.ic
index d18cf21378f..128dc77ed92 100644
--- a/storage/innobase/include/fut0lst.ic
+++ b/storage/innobase/include/fut0lst.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -54,7 +54,9 @@ flst_write_addr(
 	mtr_t*		mtr)	/*!< in: mini-transaction handle */
 {
 	ut_ad(faddr && mtr);
-	ut_ad(mtr_memo_contains_page(mtr, faddr, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page_flagged(mtr, faddr,
+					     MTR_MEMO_PAGE_X_FIX
+					     | MTR_MEMO_PAGE_SX_FIX));
 	ut_a(addr.page == FIL_NULL || addr.boffset >= FIL_PAGE_DATA);
 	ut_a(ut_align_offset(faddr, UNIV_PAGE_SIZE) >= FIL_PAGE_DATA);
 
@@ -65,7 +67,7 @@ flst_write_addr(
 
 /********************************************************************//**
 Reads a file address.
-@return	file address */
+@return file address */
 UNIV_INLINE
 fil_addr_t
 flst_read_addr(
@@ -94,29 +96,29 @@ flst_init(
 	flst_base_node_t*	base,	/*!< in: pointer to base node */
 	mtr_t*			mtr)	/*!< in: mini-transaction handle */
 {
-	ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page_flagged(mtr, base,
+					     MTR_MEMO_PAGE_X_FIX
+					     | MTR_MEMO_PAGE_SX_FIX));
 
 	mlog_write_ulint(base + FLST_LEN, 0, MLOG_4BYTES, mtr);
 	flst_write_addr(base + FLST_FIRST, fil_addr_null, mtr);
 	flst_write_addr(base + FLST_LAST, fil_addr_null, mtr);
 }
 
-/********************************************************************//**
-Gets list length.
-@return	length */
+/** Get the length of a list.
+@param[in]	base	base node
+@return length */
 UNIV_INLINE
 ulint
 flst_get_len(
-/*=========*/
-	const flst_base_node_t*	base,	/*!< in: pointer to base node */
-	mtr_t*			mtr)	/*!< in: mini-transaction handle */
+	const flst_base_node_t*	base)
 {
-	return(mtr_read_ulint(base + FLST_LEN, MLOG_4BYTES, mtr));
+	return(mach_read_from_4(base + FLST_LEN));
 }
 
 /********************************************************************//**
 Gets list first node address.
-@return	file address */
+@return file address */
 UNIV_INLINE
 fil_addr_t
 flst_get_first(
@@ -129,7 +131,7 @@ flst_get_first(
 
 /********************************************************************//**
 Gets list last node address.
-@return	file address */
+@return file address */
 UNIV_INLINE
 fil_addr_t
 flst_get_last(
@@ -142,7 +144,7 @@ flst_get_last(
 
 /********************************************************************//**
 Gets list next node address.
-@return	file address */
+@return file address */
 UNIV_INLINE
 fil_addr_t
 flst_get_next_addr(
@@ -155,7 +157,7 @@ flst_get_next_addr(
 
 /********************************************************************//**
 Gets list prev node address.
-@return	file address */
+@return file address */
 UNIV_INLINE
 fil_addr_t
 flst_get_prev_addr(
diff --git a/storage/innobase/include/gis0geo.h b/storage/innobase/include/gis0geo.h
new file mode 100644
index 00000000000..08895af545e
--- /dev/null
+++ b/storage/innobase/include/gis0geo.h
@@ -0,0 +1,162 @@
+/*****************************************************************************
+Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved.
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software Foundation,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+*****************************************************************************/
+
+/**************************************************//**
+@file gis0geo.h
+The r-tree define from MyISAM
+*******************************************************/
+
+#ifndef _gis0geo_h
+#define _gis0geo_h
+
+#include "my_global.h"
+#include "string.h"
+
+#define SPTYPE HA_KEYTYPE_DOUBLE
+#define SPLEN  8
+
+/* Since the mbr could be a point or a linestring, in this case, area of
+mbr is 0. So, we define this macro for calculating the area increasing
+when we need to enlarge the mbr. */
+#define LINE_MBR_WEIGHTS	0.001
+
+/* Types of "well-known binary representation" (wkb) format. */
+enum wkbType
+{
+  wkbPoint = 1,
+  wkbLineString = 2,
+  wkbPolygon = 3,
+  wkbMultiPoint = 4,
+  wkbMultiLineString = 5,
+  wkbMultiPolygon = 6,
+  wkbGeometryCollection = 7
+};
+
+/* Byte order of "well-known binary representation" (wkb) format. */
+enum wkbByteOrder
+{
+  wkbXDR = 0,    /* Big Endian    */
+  wkbNDR = 1     /* Little Endian */
+};
+
+/** Get the wkb of default POINT value, which represents POINT(0 0)
+if it's of dimension 2, etc.
+@param[in]	n_dims		dimensions
+@param[out]	wkb		wkb buffer for default POINT
+@param[in]	len		length of wkb buffer
+@return non-0 indicate the length of wkb of the default POINT,
+0 if the buffer is too small */
+uint
+get_wkb_of_default_point(
+	uint	n_dims,
+	uchar*	wkb,
+	uint	len);
+
+/*************************************************************//**
+Calculate minimal bounding rectangle (mbr) of the spatial object
+stored in "well-known binary representation" (wkb) format.
+@return 0 if ok */
+int
+rtree_mbr_from_wkb(
+/*===============*/
+	uchar*	wkb,		/*!< in: pointer to wkb. */
+	uint	size,		/*!< in: size of wkb. */
+	uint	n_dims,		/*!< in: dimensions. */
+	double*	mbr);		/*!< in/out: mbr. */
+
+/* Rtree split node structure. */
+struct rtr_split_node_t
+{
+	double	square;		/* square of the mbr.*/
+	int	n_node;		/* which group in.*/
+	uchar*	key;		/* key. */
+	double* coords;		/* mbr. */
+};
+
+/*************************************************************//**
+Inline function for reserving coords */
+inline
+static
+double*
+reserve_coords(double	**d_buffer,	/*!< in/out: buffer. */
+	       int	n_dim)		/*!< in: dimensions. */
+/*===========*/
+{
+  double *coords = *d_buffer;
+  (*d_buffer) += n_dim * 2;
+  return coords;
+}
+
+/*************************************************************//**
+Split rtree nodes.
+Return which group the first rec is in.  */
+int
+split_rtree_node(
+/*=============*/
+	rtr_split_node_t*	node,		/*!< in: split nodes.*/
+	int			n_entries,	/*!< in: entries number.*/
+	int			all_size,	/*!< in: total key's size.*/
+	int			key_size,	/*!< in: key's size.*/
+	int			min_size,	/*!< in: minimal group size.*/
+	int			size1,		/*!< in: size of group.*/
+	int			size2,		/*!< in: initial group sizes */
+	double**		d_buffer,	/*!< in/out: buffer.*/
+	int			n_dim,		/*!< in: dimensions. */
+	uchar*			first_rec);	/*!< in: the first rec. */
+
+/*************************************************************//**
+Compares two keys a and b depending on nextflag
+nextflag can contain these flags:
+   MBR_INTERSECT(a,b)  a overlaps b
+   MBR_CONTAIN(a,b)    a contains b
+   MBR_DISJOINT(a,b)   a disjoint b
+   MBR_WITHIN(a,b)     a within   b
+   MBR_EQUAL(a,b)      All coordinates of MBRs are equal
+   MBR_DATA(a,b)       Data reference is the same
+Returns 0 on success.  */
+int
+rtree_key_cmp(
+/*==========*/
+	page_cur_mode_t	mode,	/*!< in: compare method. */
+	const uchar*	b,	/*!< in: first key. */
+	int		b_len,	/*!< in: first key len. */
+	const uchar*	a,	/*!< in: second key. */
+	int		a_len);	/*!< in: second key len. */
+
+/*************************************************************//**
+Calculates MBR_AREA(a+b) - MBR_AREA(a)
+Note: when 'a' and 'b' objects are far from each other,
+the area increase can be really big, so this function
+can return 'inf' as a result.  */
+double
+rtree_area_increase(
+	const uchar*	a,		/*!< in: first mbr. */
+	const uchar*	b,		/*!< in: second mbr. */
+	int		a_len,		/*!< in: mbr length. */
+	double*		ab_area);	/*!< out: increased area. */
+
+/** Calculates overlapping area
+@param[in]	a	mbr a
+@param[in]	b	mbr b
+@param[in]	mbr_len	mbr length
+@return overlapping area */
+double
+rtree_area_overlapping(
+	const uchar*	a,
+	const uchar*	b,
+	int		mbr_len);
+#endif
diff --git a/storage/innobase/include/gis0rtree.h b/storage/innobase/include/gis0rtree.h
new file mode 100644
index 00000000000..436374fd6b2
--- /dev/null
+++ b/storage/innobase/include/gis0rtree.h
@@ -0,0 +1,589 @@
+/*****************************************************************************
+
+Copyright (c) 2014, 2016, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include gis0rtree.h
+R-tree header file
+
+Created 2013/03/27 Jimmy Yang and Allen Lai
+***********************************************************************/
+
+#ifndef gis0rtree_h
+#define gis0rtree_h
+
+#include "univ.i"
+
+#include "data0type.h"
+#include "data0types.h"
+#include "dict0types.h"
+#include "hash0hash.h"
+#include "mem0mem.h"
+#include "page0page.h"
+#include "rem0types.h"
+#include "row0types.h"
+#include "trx0types.h"
+#include "ut0vec.h"
+#include "ut0wqueue.h"
+#include "que0types.h"
+#include "gis0geo.h"
+#include "gis0type.h"
+#include "btr0types.h"
+#include "btr0cur.h"
+
+/* Whether MBR 'a' contains 'b' */
+#define	MBR_CONTAIN_CMP(a, b)					\
+	((((b)->xmin >= (a)->xmin) && ((b)->xmax <= (a)->xmax)	\
+	 && ((b)->ymin >= (a)->ymin) && ((b)->ymax <= (a)->ymax)))
+
+/* Whether MBR 'a' equals to 'b' */
+#define	MBR_EQUAL_CMP(a, b)					\
+	((((b)->xmin == (a)->xmin) && ((b)->xmax == (a)->xmax))	\
+	 && (((b)->ymin == (a)->ymin) && ((b)->ymax == (a)->ymax)))
+
+/* Whether MBR 'a' intersects 'b' */
+#define	MBR_INTERSECT_CMP(a, b)					\
+	((((b)->xmin <= (a)->xmax) || ((b)->xmax >= (a)->xmin))	\
+	 && (((b)->ymin <= (a)->ymax) || ((b)->ymax >= (a)->ymin)))
+
+/* Whether MBR 'a' and 'b' disjoint */
+#define	MBR_DISJOINT_CMP(a, b)	(!MBR_INTERSECT_CMP(a, b))
+
+/* Whether MBR 'a' within 'b' */
+#define	MBR_WITHIN_CMP(a, b)					\
+	((((b)->xmin <= (a)->xmin) && ((b)->xmax >= (a)->xmax))	\
+	 && (((b)->ymin <= (a)->ymin) && ((b)->ymax >= (a)->ymax)))
+
+/* Define it for rtree search mode checking. */
+#define RTREE_SEARCH_MODE(mode)					\
+	(((mode) >= PAGE_CUR_CONTAIN) && ((mode <= PAGE_CUR_RTREE_GET_FATHER)))
+
+/* Geometry data header */
+#define	GEO_DATA_HEADER_SIZE	4
+/**********************************************************************//**
+Builds a Rtree node pointer out of a physical record and a page number.
+@return own: node pointer */
+dtuple_t*
+rtr_index_build_node_ptr(
+/*=====================*/
+	const dict_index_t*	index,	/*!< in: index */
+	const rtr_mbr_t*	mbr,	/*!< in: mbr of lower page */
+	const rec_t*		rec,	/*!< in: record for which to build node
+					pointer */
+	ulint			page_no,/*!< in: page number to put in node
+					pointer */
+	mem_heap_t*		heap,	/*!< in: memory heap where pointer
+					created */
+	ulint			level);	/*!< in: level of rec in tree:
+					0 means leaf level */
+
+/*************************************************************//**
+Splits an R-tree index page to halves and inserts the tuple. It is assumed
+that mtr holds an x-latch to the index tree. NOTE: the tree x-latch is
+released within this function! NOTE that the operation of this
+function must always succeed, we cannot reverse it: therefore enough
+free disk space (2 pages) must be guaranteed to be available before
+this function is called.
+@return inserted record */
+rec_t*
+rtr_page_split_and_insert(
+/*======================*/
+	ulint		flags,	/*!< in: undo logging and locking flags */
+	btr_cur_t*	cursor,	/*!< in/out: cursor at which to insert; when the
+				function returns, the cursor is positioned
+				on the predecessor of the inserted record */
+	ulint**		offsets,/*!< out: offsets on inserted record */
+	mem_heap_t**	heap,	/*!< in/out: pointer to memory heap, or NULL */
+	const dtuple_t*	tuple,	/*!< in: tuple to insert */
+	ulint		n_ext,	/*!< in: number of externally stored columns */
+	mtr_t*		mtr);	/*!< in: mtr */
+
+/**************************************************************//**
+Sets the child node mbr in a node pointer. */
+UNIV_INLINE
+void
+rtr_page_cal_mbr(
+/*=============*/
+	const dict_index_t*	index,	/*!< in: index */
+	const buf_block_t*	block,	/*!< in: buffer block */
+	rtr_mbr_t*		mbr,	/*!< out: MBR encapsulates the page */
+	mem_heap_t*		heap);	/*!< in: heap for the memory
+					allocation */
+/*************************************************************//**
+Find the next matching record. This function will first exhaust
+the copied record listed in the rtr_info->matches vector before
+moving to next page
+@return true if there is next qualified record found, otherwise(if
+exhausted) false */
+bool
+rtr_pcur_move_to_next(
+/*==================*/
+	const dtuple_t*	tuple,	/*!< in: data tuple; NOTE: n_fields_cmp in
+				tuple must be set so that it cannot get
+				compared to the node ptr page number field! */
+	page_cur_mode_t	mode,	/*!< in: cursor search mode */
+	btr_pcur_t*	cursor, /*!< in: persistent cursor; NOTE that the
+				function may release the page latch */
+	ulint		cur_level,
+				/*!< in: current level */
+	mtr_t*		mtr);	/*!< in: mtr */
+
+/**************************************************************//**
+Restores the stored position of a persistent cursor bufferfixing the page */
+bool
+rtr_cur_restore_position_func(
+/*==========================*/
+	ulint		latch_mode,	/*!< in: BTR_SEARCH_LEAF, ... */
+	btr_cur_t*	cursor,		/*!< in: detached persistent cursor */
+	ulint		level,		/*!< in: index level */
+	const char*	file,		/*!< in: file name */
+	ulint		line,		/*!< in: line where called */
+	mtr_t*		mtr);		/*!< in: mtr */
+#define rtr_cur_restore_position(l,cur,level,mtr)		\
+	rtr_cur_restore_position_func(l,cur,level,__FILE__,__LINE__,mtr)
+
+/****************************************************************//**
+Searches the right position in rtree for a page cursor. */
+bool
+rtr_cur_search_with_match(
+/*======================*/
+	const buf_block_t*	block,	/*!< in: buffer block */
+	dict_index_t*		index,	/*!< in: index descriptor */
+	const dtuple_t*		tuple,	/*!< in: data tuple */
+	page_cur_mode_t		mode,	/*!< in: PAGE_CUR_L,
+					PAGE_CUR_LE, PAGE_CUR_G, or
+					PAGE_CUR_GE */
+	page_cur_t*		cursor,	/*!< in/out: page cursor */
+	rtr_info_t*		rtr_info);/*!< in/out: search stack */
+
+/****************************************************************//**
+Calculate the area increased for a new record
+@return area increased */
+double
+rtr_rec_cal_increase(
+/*=================*/
+	const dtuple_t*	dtuple,	/*!< in: data tuple to insert, which
+				cause area increase */
+	const rec_t*	rec,	/*!< in: physical record which differs from
+				dtuple in some of the common fields, or which
+				has an equal number or more fields than
+				dtuple */
+	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
+	double*		area);	/*!< out: increased area */
+
+/****************************************************************//**
+Following the right link to find the proper block for insert.
+@return the proper block.*/
+dberr_t
+rtr_ins_enlarge_mbr(
+/*=================*/
+	btr_cur_t*		cursor,	/*!< in: btr cursor */
+	que_thr_t*		thr,	/*!< in: query thread */
+	mtr_t*			mtr);	/*!< in: mtr */
+
+/********************************************************************//**
+*/
+void
+rtr_get_father_node(
+/*================*/
+	dict_index_t*	index,	/*!< in: index */
+	ulint		level,	/*!< in: the tree level of search */
+	const dtuple_t* tuple,	/*!< in: data tuple; NOTE: n_fields_cmp in
+				tuple must be set so that it cannot get
+				compared to the node ptr page number field! */
+	btr_cur_t*	sea_cur,/*!< in: search cursor */
+	btr_cur_t*	cursor,	/*!< in/out: tree cursor; the cursor page is
+				s- or x-latched */
+	ulint		page_no,/*!< in: current page no */
+	mtr_t*		mtr);	/*!< in: mtr */
+
+/**************************************************************//**
+push a nonleaf index node to the search path */
+UNIV_INLINE
+void
+rtr_non_leaf_stack_push(
+/*====================*/
+	rtr_node_path_t*	path,		/*!< in/out: search path */
+	ulint			pageno,		/*!< in: pageno to insert */
+	node_seq_t		seq_no,		/*!< in: Node sequence num */
+	ulint			level,		/*!< in: index level */
+	ulint			child_no,	/*!< in: child page no */
+	btr_pcur_t*		cursor,		/*!< in: position cursor */
+	double			mbr_inc);	/*!< in: MBR needs to be
+						enlarged */
+
+/**************************************************************//**
+push a nonleaf index node to the search path for insertion */
+void
+rtr_non_leaf_insert_stack_push(
+/*===========================*/
+	dict_index_t*		index,		/*!< in: index descriptor */
+	rtr_node_path_t*	path,		/*!< in/out: search path */
+	ulint			level,		/*!< in: index level */
+	const buf_block_t*	block,		/*!< in: block of the page */
+	const rec_t*		rec,		/*!< in: positioned record */
+	double			mbr_inc);	/*!< in: MBR needs to be
+						enlarged */
+
+/*****************************************************************//**
+Allocates a new Split Sequence Number.
+@return new SSN id */
+UNIV_INLINE
+node_seq_t
+rtr_get_new_ssn_id(
+/*===============*/
+	dict_index_t*	index);		/*!< in: the index struct */
+
+/*****************************************************************//**
+Get the current Split Sequence Number.
+@return current SSN id */
+UNIV_INLINE
+node_seq_t
+rtr_get_current_ssn_id(
+/*===================*/
+	dict_index_t*	index);		/*!< in/out: the index struct */
+
+/********************************************************************//**
+Create a RTree search info structure */
+rtr_info_t*
+rtr_create_rtr_info(
+/******************/
+	bool		need_prdt,	/*!< in: Whether predicate lock is
+					needed */
+	bool		init_matches,	/*!< in: Whether to initiate the
+					"matches" structure for collecting
+					matched leaf records */
+	btr_cur_t*	cursor,		/*!< in: tree search cursor */
+	dict_index_t*	index);		/*!< in: index struct */
+
+/********************************************************************//**
+Update a btr_cur_t with rtr_info */
+void
+rtr_info_update_btr(
+/******************/
+	btr_cur_t*	cursor,		/*!< in/out: tree cursor */
+	rtr_info_t*	rtr_info);	/*!< in: rtr_info to set to the
+					cursor */
+
+/********************************************************************//**
+Update a btr_cur_t with rtr_info */
+void
+rtr_init_rtr_info(
+/****************/
+	rtr_info_t*	rtr_info,	/*!< in: rtr_info to set to the
+					cursor */
+	bool		need_prdt,	/*!< in: Whether predicate lock is
+					needed */
+	btr_cur_t*	cursor,		/*!< in: tree search cursor */
+	dict_index_t*	index,		/*!< in: index structure */
+	bool		reinit);	/*!< in: Whether this is a reinit */
+
+/**************************************************************//**
+Clean up Rtree cursor */
+void
+rtr_clean_rtr_info(
+/*===============*/
+	rtr_info_t*	rtr_info,	/*!< in: RTree search info */
+	bool		free_all);	/*!< in: need to free rtr_info itself */
+
+/****************************************************************//**
+Get the bounding box content from an index record*/
+void
+rtr_get_mbr_from_rec(
+/*=================*/
+	const rec_t*	rec,	/*!< in: data tuple */
+	const ulint*	offsets,/*!< in: offsets array */
+	rtr_mbr_t*	mbr);	/*!< out MBR */
+
+/****************************************************************//**
+Get the bounding box content from a MBR data record */
+void
+rtr_get_mbr_from_tuple(
+/*===================*/
+	const dtuple_t*	dtuple,	/*!< in: data tuple */
+	rtr_mbr*	mbr);	/*!< out: mbr to fill */
+
+#define rtr_page_get_father_node_ptr(of,heap,sea,cur,mtr)		\
+	rtr_page_get_father_node_ptr_func(of,heap,sea,cur,__FILE__,__LINE__,mtr)
+
+/* Get the rtree page father.
+@param[in]	offsets		work area for the return value
+@param[in]	index		rtree index
+@param[in]	block		child page in the index
+@param[in]	mtr		mtr
+@param[in]	sea_cur		search cursor, contains information
+				about parent nodes in search
+@param[in]	cursor		cursor on node pointer record,
+				its page x-latched */
+void
+rtr_page_get_father(
+	dict_index_t*	index,
+	buf_block_t*	block,
+	mtr_t*		mtr,
+	btr_cur_t*	sea_cur,
+	btr_cur_t*	cursor);
+
+/************************************************************//**
+Returns the upper level node pointer to a R-Tree page. It is assumed
+that mtr holds an x-latch on the tree.
+@return rec_get_offsets() of the node pointer record */
+ulint*
+rtr_page_get_father_node_ptr_func(
+/*==============================*/
+	ulint*		offsets,/*!< in: work area for the return value */
+	mem_heap_t*	heap,	/*!< in: memory heap to use */
+	btr_cur_t*	sea_cur,/*!< in: search cursor */
+	btr_cur_t*	cursor,	/*!< in: cursor pointing to user record,
+				out: cursor on node pointer record,
+				its page x-latched */
+	const char*	file,	/*!< in: file name */
+	ulint		line,	/*!< in: line where called */
+	mtr_t*		mtr);	/*!< in: mtr */
+
+
+/************************************************************//**
+Returns the father block to a page. It is assumed that mtr holds
+an X or SX latch on the tree.
+@return rec_get_offsets() of the node pointer record */
+ulint*
+rtr_page_get_father_block(
+/*======================*/
+	ulint*		offsets,/*!< in: work area for the return value */
+	mem_heap_t*	heap,	/*!< in: memory heap to use */
+	dict_index_t*	index,	/*!< in: b-tree index */
+	buf_block_t*	block,	/*!< in: child page in the index */
+	mtr_t*		mtr,	/*!< in: mtr */
+	btr_cur_t*	sea_cur,/*!< in: search cursor, contains information
+				about parent nodes in search */
+	btr_cur_t*	cursor);/*!< out: cursor on node pointer record,
+				its page x-latched */
+/**************************************************************//**
+Store the parent path cursor
+@return number of cursor stored */
+ulint
+rtr_store_parent_path(
+/*==================*/
+	const buf_block_t*	block,	/*!< in: block of the page */
+	btr_cur_t*		btr_cur,/*!< in/out: persistent cursor */
+	ulint			latch_mode,
+					/*!< in: latch_mode */
+	ulint			level,	/*!< in: index level */
+	mtr_t*			mtr);	/*!< in: mtr */
+
+/**************************************************************//**
+Initializes and opens a persistent cursor to an index tree. It should be
+closed with btr_pcur_close. */
+void
+rtr_pcur_open_low(
+/*==============*/
+	dict_index_t*	index,	/*!< in: index */
+	ulint		level,	/*!< in: level in the btree */
+	const dtuple_t*	tuple,	/*!< in: tuple on which search done */
+	page_cur_mode_t	mode,	/*!< in: PAGE_CUR_L, ...;
+				NOTE that if the search is made using a unique
+				prefix of a record, mode should be
+				PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
+				may end up on the previous page from the
+				record! */
+	ulint		latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */
+	btr_pcur_t*	cursor,	/*!< in: memory buffer for persistent cursor */
+	const char*	file,	/*!< in: file name */
+	ulint		line,	/*!< in: line where called */
+	mtr_t*		mtr);	/*!< in: mtr */
+
+#define rtr_pcur_open(i,t,md,l,c,m)			\
+	rtr_pcur_open_low(i,0,t,md,l,c,__FILE__,__LINE__,m)
+
+struct btr_cur_t;
+
+/*********************************************************//**
+Returns the R-Tree node stored in the parent search path
+@return pointer to R-Tree cursor component */
+UNIV_INLINE
+node_visit_t*
+rtr_get_parent_node(
+/*================*/
+	btr_cur_t*	btr_cur,	/*!< in: persistent cursor */
+	ulint		level,		/*!< in: index level of buffer page */
+	ulint		is_insert);	/*!< in: whether it is insert */
+
+/*********************************************************//**
+Returns the R-Tree cursor stored in the parent search path
+@return pointer to R-Tree cursor component */
+UNIV_INLINE
+btr_pcur_t*
+rtr_get_parent_cursor(
+/*==================*/
+	btr_cur_t*	btr_cur,	/*!< in: persistent cursor */
+	ulint		level,		/*!< in: index level of buffer page */
+	ulint		is_insert);	/*!< in: whether insert operation */
+
+/*************************************************************//**
+Copy recs from a page to new_block of rtree. */
+void
+rtr_page_copy_rec_list_end_no_locks(
+/*================================*/
+	buf_block_t*	new_block,	/*!< in: index page to copy to */
+	buf_block_t*	block,		/*!< in: index page of rec */
+	rec_t*		rec,		/*!< in: record on page */
+	dict_index_t*	index,		/*!< in: record descriptor */
+	mem_heap_t*	heap,		/*!< in/out: heap memory */
+	rtr_rec_move_t*	rec_move,	/*!< in: recording records moved */
+	ulint		max_move,	/*!< in: num of rec to move */
+	ulint*		num_moved,	/*!< out: num of rec to move */
+	mtr_t*		mtr);		/*!< in: mtr */
+
+/*************************************************************//**
+Copy recs till a specified rec from a page to new_block of rtree. */
+void
+rtr_page_copy_rec_list_start_no_locks(
+/*==================================*/
+	buf_block_t*	new_block,	/*!< in: index page to copy to */
+	buf_block_t*	block,		/*!< in: index page of rec */
+	rec_t*		rec,		/*!< in: record on page */
+	dict_index_t*	index,		/*!< in: record descriptor */
+	mem_heap_t*	heap,		/*!< in/out: heap memory */
+	rtr_rec_move_t*	rec_move,	/*!< in: recording records moved */
+	ulint		max_move,	/*!< in: num of rec to move */
+	ulint*		num_moved,	/*!< out: num of rec to move */
+	mtr_t*		mtr);		/*!< in: mtr */
+
+/****************************************************************//**
+Merge 2 mbrs and update the the mbr that cursor is on. */
+dberr_t
+rtr_merge_and_update_mbr(
+/*=====================*/
+	btr_cur_t*		cursor,		/*!< in/out: cursor */
+	btr_cur_t*		cursor2,	/*!< in: the other cursor */
+	ulint*			offsets,	/*!< in: rec offsets */
+	ulint*			offsets2,	/*!< in: rec offsets */
+	page_t*			child_page,	/*!< in: the child page. */
+	buf_block_t*		merge_block,	/*!< in: page to merge */
+	buf_block_t*		block,		/*!< in: page be merged */
+	dict_index_t*		index,		/*!< in: index */
+	mtr_t*			mtr);		/*!< in: mtr */
+
+/*************************************************************//**
+Deletes on the upper level the node pointer to a page. */
+void
+rtr_node_ptr_delete(
+/*================*/
+	dict_index_t*	index,	/*!< in: index tree */
+	btr_cur_t*	sea_cur,/*!< in: search cursor, contains information
+				about parent nodes in search */
+	buf_block_t*	block,	/*!< in: page whose node pointer is deleted */
+	mtr_t*		mtr);	/*!< in: mtr */
+
+/****************************************************************//**
+Check two MBRs are identical or need to be merged */
+bool
+rtr_merge_mbr_changed(
+/*==================*/
+	btr_cur_t*	cursor,		/*!< in: cursor */
+	btr_cur_t*	cursor2,	/*!< in: the other cursor */
+	ulint*		offsets,	/*!< in: rec offsets */
+	ulint*		offsets2,	/*!< in: rec offsets */
+	rtr_mbr_t*	new_mbr,	/*!< out: MBR to update */
+	buf_block_t*	merge_block,	/*!< in: page to merge */
+	buf_block_t*	block,		/*!< in: page be merged */
+	dict_index_t*	index);		/*!< in: index */
+
+
+/**************************************************************//**
+Update the mbr field of a spatial index row.
+@return true if successful */
+bool
+rtr_update_mbr_field(
+/*=================*/
+	btr_cur_t*	cursor,		/*!< in: cursor pointed to rec.*/
+	ulint*		offsets,	/*!< in: offsets on rec. */
+	btr_cur_t*	cursor2,	/*!< in/out: cursor pointed to rec
+					that should be deleted.
+					this cursor is for btr_compress to
+					delete the merged page's father rec.*/
+	page_t*		child_page,	/*!< in: child page. */
+	rtr_mbr_t*	new_mbr,	/*!< in: the new mbr. */
+	rec_t*		new_rec,	/*!< in: rec to use */
+	mtr_t*		mtr);		/*!< in: mtr */
+
+/**************************************************************//**
+Check whether a Rtree page is child of a parent page
+@return true if there is child/parent relationship */
+bool
+rtr_check_same_block(
+/*=================*/
+	dict_index_t*	index,	/*!< in: index tree */
+	btr_cur_t*	cur,	/*!< in/out: position at the parent entry
+				pointing to the child if successful */
+	buf_block_t*	parentb,/*!< in: parent page to check */
+	buf_block_t*	childb, /*!< in: child Page */
+	mem_heap_t*	heap);	/*!< in: memory heap */
+
+/*********************************************************************//**
+Sets pointer to the data and length in a field. */
+UNIV_INLINE
+void
+rtr_write_mbr(
+/*==========*/
+	byte*			data,	/*!< out: data */
+	const rtr_mbr_t*	mbr);	/*!< in: data */
+
+/*********************************************************************//**
+Sets pointer to the data and length in a field. */
+UNIV_INLINE
+void
+rtr_read_mbr(
+/*==========*/
+	const byte*		data,	/*!< in: data */
+	rtr_mbr_t*		mbr);	/*!< out: data */
+
+/**************************************************************//**
+Check whether a discarding page is in anyone's search path */
+void
+rtr_check_discard_page(
+/*===================*/
+	dict_index_t*	index,	/*!< in: index */
+	btr_cur_t*	cursor,	/*!< in: cursor on the page to discard: not on
+				the root page */
+	buf_block_t*	block);	/*!< in: block of page to be discarded */
+
+/********************************************************************//**
+Reinitialize a RTree search info */
+UNIV_INLINE
+void
+rtr_info_reinit_in_cursor(
+/************************/
+	btr_cur_t*	cursor,		/*!< in/out: tree cursor */
+	dict_index_t*	index,		/*!< in: index struct */
+	bool		need_prdt);	/*!< in: Whether predicate lock is
+					needed */
+
+/** Estimates the number of rows in a given area.
+@param[in]	index	index
+@param[in]	tuple	range tuple containing mbr, may also be empty tuple
+@param[in]	mode	search mode
+@return estimated number of rows */
+int64_t
+rtr_estimate_n_rows_in_range(
+	dict_index_t*	index,
+	const dtuple_t*	tuple,
+	page_cur_mode_t	mode);
+
+#ifndef UNIV_NONINL
+#include "gis0rtree.ic"
+#endif
+#endif /*!< gis0rtree.h */
diff --git a/storage/innobase/include/gis0rtree.ic b/storage/innobase/include/gis0rtree.ic
new file mode 100644
index 00000000000..a30db122273
--- /dev/null
+++ b/storage/innobase/include/gis0rtree.ic
@@ -0,0 +1,274 @@
+/*****************************************************************************
+
+Copyright (c) 2014, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include gis0rtree.h
+R-tree Inline code
+
+Created 2013/03/27 Jimmy Yang and Allen Lai
+***********************************************************************/
+
+/**************************************************************//**
+Sets the child node mbr in a node pointer. */
+UNIV_INLINE
+void
+rtr_page_cal_mbr(
+/*=============*/
+	const dict_index_t*	index,	/*!< in: index */
+	const buf_block_t*	block,	/*!< in: buffer block */
+	rtr_mbr_t*		rtr_mbr,/*!< out: MBR encapsulates the page */
+	mem_heap_t*		heap)	/*!< in: heap for the memory
+					allocation */
+{
+	page_t*		page;
+	rec_t*		rec;
+	byte*		field;
+	ulint		len;
+	ulint*		offsets = NULL;
+	double		bmin, bmax;
+	double*		amin;
+	double*		amax;
+	ulint		inc = 0;
+	double*		mbr;
+
+	rtr_mbr->xmin = DBL_MAX;
+	rtr_mbr->ymin = DBL_MAX;
+	rtr_mbr->xmax = -DBL_MAX;
+	rtr_mbr->ymax = -DBL_MAX;
+
+	mbr = reinterpret_cast<double*>(rtr_mbr);
+
+	page = buf_block_get_frame(block);
+
+	rec = page_rec_get_next(page_get_infimum_rec(page));
+	offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
+
+	do {
+		/* The mbr address is in the first field. */
+		field = rec_get_nth_field(rec, offsets, 0, &len);
+
+		ut_ad(len == DATA_MBR_LEN);
+		inc = 0;
+		for (int i = 0; i < SPDIMS; i++) {
+			bmin = mach_double_read(field + inc);
+			bmax = mach_double_read(field + inc + sizeof(double));
+
+			amin = mbr + i * SPDIMS;
+			amax = mbr + i * SPDIMS + 1;
+
+			if (*amin > bmin)
+				*amin = bmin;
+			if (*amax < bmax)
+				*amax = bmax;
+
+			inc += 2 * sizeof(double);
+		}
+
+		rec = page_rec_get_next(rec);
+
+		if (rec == NULL) {
+			break;
+		}
+	} while (!page_rec_is_supremum(rec));
+}
+
+/**************************************************************//**
+push a nonleaf index node to the search path */
+UNIV_INLINE
+void
+rtr_non_leaf_stack_push(
+/*====================*/
+	rtr_node_path_t*	path,		/*!< in/out: search path */
+	ulint			pageno,		/*!< in: pageno to insert */
+	node_seq_t		seq_no,		/*!< in: Node sequence num */
+	ulint			level,		/*!< in: index page level */
+	ulint			child_no,	/*!< in: child page no */
+	btr_pcur_t*		cursor,		/*!< in: position cursor */
+	double			mbr_inc)	/*!< in: MBR needs to be
+						enlarged */
+{
+	node_visit_t	insert_val;
+
+	insert_val.page_no = pageno;
+	insert_val.seq_no = seq_no;
+	insert_val.level = level;
+	insert_val.child_no = child_no;
+	insert_val.cursor = cursor;
+	insert_val.mbr_inc = mbr_inc;
+
+	path->push_back(insert_val);
+
+#ifdef RTR_SEARCH_DIAGNOSTIC
+	fprintf(stderr, "INNODB_RTR: Push page %d, level %d, seq %d"
+			" to search stack \n",
+		static_cast<int>(pageno), static_cast<int>(level),
+		static_cast<int>(seq_no));
+#endif /* RTR_SEARCH_DIAGNOSTIC */
+}
+
+/*****************************************************************//**
+Allocates a new Split Sequence Number.
+@return new SSN id */
+UNIV_INLINE
+node_seq_t
+rtr_get_new_ssn_id(
+/*===============*/
+	dict_index_t*	index)	/*!< in/out: the index struct */
+{
+	node_seq_t	ssn;
+
+	mutex_enter(&(index->rtr_ssn.mutex));
+	ssn = ++index->rtr_ssn.seq_no;
+	mutex_exit(&(index->rtr_ssn.mutex));
+
+	return(ssn);
+}
+/*****************************************************************//**
+Get the current Split Sequence Number.
+@return current SSN id */
+UNIV_INLINE
+node_seq_t
+rtr_get_current_ssn_id(
+/*===================*/
+	dict_index_t*	index)	/*!< in: index struct */
+{
+	node_seq_t	ssn;
+
+	mutex_enter(&(index->rtr_ssn.mutex));
+	ssn = index->rtr_ssn.seq_no;
+	mutex_exit(&(index->rtr_ssn.mutex));
+
+	return(ssn);
+}
+
+/*********************************************************************//**
+Sets pointer to the data and length in a field. */
+UNIV_INLINE
+void
+rtr_write_mbr(
+/*==========*/
+	byte*			data,	/*!< out: data */
+	const rtr_mbr_t*	mbr)	/*!< in: data */
+{
+	const double* my_mbr = reinterpret_cast<const double*>(mbr);
+
+	for (int i = 0; i < SPDIMS * 2; i++) {
+		mach_double_write(data + i * sizeof(double), my_mbr[i]);
+	}
+}
+
+/*********************************************************************//**
+Sets pointer to the data and length in a field. */
+UNIV_INLINE
+void
+rtr_read_mbr(
+/*==========*/
+	const byte*	data,	/*!< in: data */
+	rtr_mbr_t*	mbr)	/*!< out: MBR */
+{
+	for (int i = 0; i < SPDIMS * 2; i++) {
+		(reinterpret_cast<double*>(mbr))[i] = mach_double_read(
+							data
+							+ i * sizeof(double));
+	}
+}
+
+/*********************************************************//**
+Returns the R-Tree node stored in the parent search path
+@return pointer to R-Tree cursor component in the parent path,
+NULL if parent path is empty or index is larger than num of items contained */
+UNIV_INLINE
+node_visit_t*
+rtr_get_parent_node(
+/*================*/
+	btr_cur_t*	btr_cur,	/*!< in: persistent cursor */
+	ulint		level,		/*!< in: index level of buffer page */
+	ulint		is_insert)	/*!< in: whether it is insert */
+{
+	ulint			num;
+	ulint			tree_height = btr_cur->tree_height;
+	node_visit_t*		found_node = NULL;
+
+	if (level >= tree_height) {
+		return(NULL);
+	}
+
+	mutex_enter(&btr_cur->rtr_info->rtr_path_mutex);
+
+	num = btr_cur->rtr_info->parent_path->size();
+
+	if (!num) {
+		mutex_exit(&btr_cur->rtr_info->rtr_path_mutex);
+		return(NULL);
+	}
+
+	if (is_insert) {
+		ulint	idx = tree_height - level - 1;
+		ut_ad(idx < num);
+
+		found_node = &(*btr_cur->rtr_info->parent_path)[idx];
+	} else {
+		node_visit_t*	node;
+
+		while (num > 0) {
+			node = &(*btr_cur->rtr_info->parent_path)[num - 1];
+
+			if (node->level == level) {
+				found_node = node;
+				break;
+			}
+			num--;
+		}
+	}
+
+	mutex_exit(&btr_cur->rtr_info->rtr_path_mutex);
+
+	return(found_node);
+}
+
+/*********************************************************//**
+Returns the R-Tree cursor stored in the parent search path
+@return pointer to R-Tree cursor component */
+UNIV_INLINE
+btr_pcur_t*
+rtr_get_parent_cursor(
+/*==================*/
+	btr_cur_t*	btr_cur,	/*!< in: persistent cursor */
+	ulint		level,		/*!< in: index level of buffer page */
+	ulint		is_insert)	/*!< in: whether insert operation */
+{
+	node_visit_t*   found_node = rtr_get_parent_node(
+					btr_cur, level, is_insert);
+
+	return((found_node) ? found_node->cursor : NULL);
+}
+
+/********************************************************************//**
+Reinitialize a R-Tree search info in btr_cur_t */
+UNIV_INLINE
+void
+rtr_info_reinit_in_cursor(
+/************************/
+	btr_cur_t*	cursor,		/*!< in/out: tree cursor */
+	dict_index_t*	index,		/*!< in: index struct */
+	bool		need_prdt)	/*!< in: Whether predicate lock is
+					needed */
+{
+	rtr_clean_rtr_info(cursor->rtr_info, false);
+	rtr_init_rtr_info(cursor->rtr_info, need_prdt, cursor, index, true);
+}
diff --git a/storage/innobase/include/gis0type.h b/storage/innobase/include/gis0type.h
new file mode 100644
index 00000000000..14c098f9608
--- /dev/null
+++ b/storage/innobase/include/gis0type.h
@@ -0,0 +1,168 @@
+/*****************************************************************************
+
+Copyright (c) 2014, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include gis0type.h
+R-tree header file
+
+Created 2013/03/27 Jimmy Yang
+***********************************************************************/
+
+#ifndef gis0type_h
+#define gis0type_h
+
+#include "univ.i"
+
+#include "buf0buf.h"
+#include "data0type.h"
+#include "data0types.h"
+#include "dict0types.h"
+#include "hash0hash.h"
+#include "mem0mem.h"
+#include "rem0types.h"
+#include "row0types.h"
+#include "trx0types.h"
+#include "ut0vec.h"
+#include "ut0wqueue.h"
+#include "que0types.h"
+#include "gis0geo.h"
+#include "ut0new.h"
+
+#include <vector>
+#include <list>
+
+/* Node Sequence Number. Only updated when page splits */
+typedef ib_uint32_t     node_seq_t;
+
+/* RTree internal non-leaf Nodes to be searched, from root to leaf */
+typedef	struct node_visit {
+	ulint		page_no;	/*!< the page number */
+	node_seq_t	seq_no;		/*!< the SSN (split sequence number */
+	ulint		level;		/*!< the page's index level */
+	ulint		child_no;	/*!< child page num if for parent
+					recording */
+	btr_pcur_t*	cursor;		/*!< cursor structure if we positioned
+					FIXME: there is no need to use whole
+					btr_pcur_t, just the position related
+					members */
+	double		mbr_inc;	/*!< whether this node needs to be
+					enlarged for insertion */
+} node_visit_t;
+
+typedef std::vector<node_visit_t, ut_allocator<node_visit_t> >	rtr_node_path_t;
+
+typedef	struct rtr_rec {
+		rec_t*	r_rec;		/*!< matched record */
+		bool	locked;		/*!< whether the record locked */
+} rtr_rec_t;
+
+typedef std::vector<rtr_rec_t, ut_allocator<rtr_rec_t> >	rtr_rec_vector;
+
+/* Structure for matched records on the leaf page */
+typedef	struct matched_rec {
+	byte*		bufp;		/*!< aligned buffer point */
+	byte		rec_buf[UNIV_PAGE_SIZE_MAX * 2];
+					/*!< buffer used to copy matching rec */
+	buf_block_t	block;		/*!< the shadow buffer block */
+	ulint		used;		/*!< memory used */
+	rtr_rec_vector*	matched_recs;	/*!< vector holding the matching rec */
+	ib_mutex_t	rtr_match_mutex;/*!< mutex protect the match_recs
+					vector */
+	bool		valid;		/*!< whether result in matched_recs
+					or this search is valid (page not
+					dropped) */
+	bool		locked;		/*!< whether these recs locked */
+} matched_rec_t;
+
+/* In memory representation of a minimum bounding rectangle */
+typedef struct rtr_mbr {
+	double	xmin;			/*!< minimum on x */
+	double	xmax;			/*!< maximum on x */
+	double	ymin;			/*!< minimum on y */
+	double	ymax;			/*!< maximum on y */
+} rtr_mbr_t;
+
+/* Maximum index level for R-Tree, this is consistent with BTR_MAX_LEVELS */
+#define RTR_MAX_LEVELS		100
+
+/* Number of pages we latch at leaf level when there is possible Tree
+modification (split, shrink), we always latch left, current
+and right pages */
+#define RTR_LEAF_LATCH_NUM	3
+
+/** Vectors holding the matching internal pages/nodes and leaf records */
+typedef	struct rtr_info{
+	rtr_node_path_t*path;	/*!< vector holding matching pages */
+	rtr_node_path_t*parent_path;
+				/*!< vector holding parent pages during
+				search */
+	matched_rec_t*	matches;/*!< struct holding matching leaf records */
+	ib_mutex_t	rtr_path_mutex;
+				/*!< mutex protect the "path" vector */
+	buf_block_t*	tree_blocks[RTR_MAX_LEVELS + RTR_LEAF_LATCH_NUM];
+				/*!< tracking pages that would be locked
+				at leaf level, for future free */
+        ulint		tree_savepoints[RTR_MAX_LEVELS + RTR_LEAF_LATCH_NUM];
+				/*!< savepoint used to release latches/blocks
+				on each level and leaf level */
+	rtr_mbr_t	mbr;	/*!< the search MBR */
+	que_thr_t*      thr;	/*!< the search thread */
+	mem_heap_t*	heap;	/*!< memory heap */
+	btr_cur_t*	cursor;	/*!< cursor used for search */
+	dict_index_t*	index;	/*!< index it is searching */
+	bool		need_prdt_lock;
+				/*!< whether we will need predicate lock
+				the tree */
+	bool		need_page_lock;
+				/*!< whether we will need predicate page lock
+				the tree */
+	bool		allocated;/*!< whether this structure is allocate or
+				on stack */
+	bool		mbr_adj;/*!< whether mbr will need to be enlarged
+				for an insertion operation */
+	bool		fd_del;	/*!< found deleted row */
+	const dtuple_t*	search_tuple;
+				/*!< search tuple being used */
+	page_cur_mode_t	search_mode;
+				/*!< current search mode */
+} rtr_info_t;
+
+typedef std::list<rtr_info_t*, ut_allocator<rtr_info_t*> >	rtr_info_active;
+
+/* Tracking structure for all onoging search for an index */
+typedef struct	rtr_info_track {
+	rtr_info_active*	rtr_active;	/*!< Active search info */
+	ib_mutex_t		rtr_active_mutex;
+						/*!< mutex to protect
+						rtr_active */
+} rtr_info_track_t;
+
+/* Node Sequence Number and mutex protects it. */
+typedef struct rtree_ssn {
+        ib_mutex_t      mutex;          /*!< mutex protect the seq num */
+        node_seq_t      seq_no;         /*!< the SSN (node sequence number) */
+} rtr_ssn_t;
+
+/* This is to record the record movement between pages. Used for corresponding
+lock movement */
+typedef struct rtr_rec_move {
+	rec_t*		old_rec;	/*!< record being moved in old page */
+	rec_t*		new_rec;	/*!< new record location */
+	bool		moved;		/*!< whether lock are moved too */
+} rtr_rec_move_t;
+#endif /*!< gis0rtree.h */
diff --git a/storage/innobase/include/ha0ha.h b/storage/innobase/include/ha0ha.h
index 07ab20ab995..15a99ddf683 100644
--- a/storage/innobase/include/ha0ha.h
+++ b/storage/innobase/include/ha0ha.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -47,7 +47,6 @@ ha_search_and_get_data(
 Looks for an element when we know the pointer to the data and updates
 the pointer to data if found.
 @return TRUE if found */
-UNIV_INTERN
 ibool
 ha_search_and_update_if_found_func(
 /*===============================*/
@@ -62,66 +61,55 @@ ha_search_and_update_if_found_func(
 #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
 /** Looks for an element when we know the pointer to the data and
 updates the pointer to data if found.
-@param table		in/out: hash table
-@param fold		in: folded value of the searched data
-@param data		in: pointer to the data
-@param new_block	in: block containing new_data
-@param new_data		in: new pointer to the data */
+@param table in/out: hash table
+@param fold in: folded value of the searched data
+@param data in: pointer to the data
+@param new_block in: block containing new_data
+@param new_data in: new pointer to the data */
 # define ha_search_and_update_if_found(table,fold,data,new_block,new_data) \
 	ha_search_and_update_if_found_func(table,fold,data,new_block,new_data)
 #else /* UNIV_AHI_DEBUG || UNIV_DEBUG */
 /** Looks for an element when we know the pointer to the data and
 updates the pointer to data if found.
-@param table		in/out: hash table
-@param fold		in: folded value of the searched data
-@param data		in: pointer to the data
-@param new_block	ignored: block containing new_data
-@param new_data		in: new pointer to the data */
+@param table in/out: hash table
+@param fold in: folded value of the searched data
+@param data in: pointer to the data
+@param new_block ignored: block containing new_data
+@param new_data in: new pointer to the data */
 # define ha_search_and_update_if_found(table,fold,data,new_block,new_data) \
 	ha_search_and_update_if_found_func(table,fold,data,new_data)
 #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+
 /*************************************************************//**
 Creates a hash table with at least n array cells.  The actual number
 of cells is chosen to be a prime number slightly bigger than n.
-@return	own: created table */
-UNIV_INTERN
+@return own: created table */
 hash_table_t*
-ha_create_func(
-/*===========*/
-	ulint	n,		/*!< in: number of array cells */
-#ifdef UNIV_SYNC_DEBUG
-	ulint	mutex_level,	/*!< in: level of the mutexes in the latching
-				order: this is used in the debug version */
-#endif /* UNIV_SYNC_DEBUG */
-	ulint	n_mutexes,	/*!< in: number of mutexes to protect the
+ib_create(
+/*======*/
+	ulint		n,	/*!< in: number of array cells */
+	latch_id_t	id,	/*!< in: latch ID */
+	ulint		n_mutexes,/*!< in: number of mutexes to protect the
 				hash table: must be a power of 2, or 0 */
-	ulint	type);		/*!< in: type of datastructure for which
+	ulint		type);	/*!< in: type of datastructure for which
 				the memory heap is going to be used e.g.:
 				MEM_HEAP_FOR_BTR_SEARCH or
 				MEM_HEAP_FOR_PAGE_HASH */
-#ifdef UNIV_SYNC_DEBUG
-/** Creates a hash table.
-@return		own: created table
-@param n_c	in: number of array cells.  The actual number of cells is
-chosen to be a slightly bigger prime number.
-@param level	in: level of the mutexes in the latching order
-@param n_m	in: number of mutexes to protect the hash table;
-		must be a power of 2, or 0 */
-# define ha_create(n_c,n_m,type,level) ha_create_func(n_c,level,n_m,type)
-#else /* UNIV_SYNC_DEBUG */
-/** Creates a hash table.
-@return		own: created table
-@param n_c	in: number of array cells.  The actual number of cells is
-chosen to be a slightly bigger prime number.
-@param level	in: level of the mutexes in the latching order
-@param n_m	in: number of mutexes to protect the hash table;
-		must be a power of 2, or 0 */
-# define ha_create(n_c,n_m,type,level) ha_create_func(n_c,n_m,type)
-#endif /* UNIV_SYNC_DEBUG */
+
+/** Recreate a hash table with at least n array cells. The actual number
+of cells is chosen to be a prime number slightly bigger than n.
+The new cells are all cleared. The heaps are recreated.
+The sync objects are reused.
+@param[in,out]	table	hash table to be resuzed (to be freed later)
+@param[in]	n	number of array cells
+@return	resized new table */
+hash_table_t*
+ib_recreate(
+	hash_table_t*	table,
+	ulint		n);
 
 /*************************************************************//**
 Empties a hash table and frees the memory heaps. */
-UNIV_INTERN
 void
 ha_clear(
 /*=====*/
@@ -131,8 +119,7 @@ ha_clear(
 Inserts an entry into a hash table. If an entry with the same fold number
 is found, its node is updated to point to the new data, and no new node
 is inserted.
-@return	TRUE if succeed, FALSE if no more memory could be allocated */
-UNIV_INTERN
+@return TRUE if succeed, FALSE if no more memory could be allocated */
 ibool
 ha_insert_for_fold_func(
 /*====================*/
@@ -151,11 +138,11 @@ ha_insert_for_fold_func(
 Inserts an entry into a hash table. If an entry with the same fold number
 is found, its node is updated to point to the new data, and no new node
 is inserted.
-@return	TRUE if succeed, FALSE if no more memory could be allocated
-@param t	in: hash table
-@param f	in: folded value of data
-@param b	in: buffer block containing the data
-@param d	in: data, must not be NULL */
+@return TRUE if succeed, FALSE if no more memory could be allocated
+@param t in: hash table
+@param f in: folded value of data
+@param b in: buffer block containing the data
+@param d in: data, must not be NULL */
 # define ha_insert_for_fold(t,f,b,d) 	do {		\
 	ha_insert_for_fold_func(t,f,b,d);		\
 	MONITOR_INC(MONITOR_ADAPTIVE_HASH_ROW_ADDED);	\
@@ -165,11 +152,11 @@ is inserted.
 Inserts an entry into a hash table. If an entry with the same fold number
 is found, its node is updated to point to the new data, and no new node
 is inserted.
-@return	TRUE if succeed, FALSE if no more memory could be allocated
-@param t	in: hash table
-@param f	in: folded value of data
-@param b	ignored: buffer block containing the data
-@param d	in: data, must not be NULL */
+@return TRUE if succeed, FALSE if no more memory could be allocated
+@param t in: hash table
+@param f in: folded value of data
+@param b ignored: buffer block containing the data
+@param d in: data, must not be NULL */
 # define ha_insert_for_fold(t,f,b,d)	do {		\
 	ha_insert_for_fold_func(t,f,d);			\
 	MONITOR_INC(MONITOR_ADAPTIVE_HASH_ROW_ADDED);	\
@@ -179,7 +166,7 @@ is inserted.
 /*********************************************************//**
 Looks for an element when we know the pointer to the data and deletes
 it from the hash table if found.
-@return	TRUE if found */
+@return TRUE if found */
 UNIV_INLINE
 ibool
 ha_search_and_delete_if_found(
@@ -191,7 +178,6 @@ ha_search_and_delete_if_found(
 /*****************************************************************//**
 Removes from the chain determined by fold all nodes whose data pointer
 points to the page given. */
-UNIV_INTERN
 void
 ha_remove_all_nodes_to_page(
 /*========================*/
@@ -201,8 +187,7 @@ ha_remove_all_nodes_to_page(
 #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
 /*************************************************************//**
 Validates a given range of the cells in hash table.
-@return	TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
 ibool
 ha_validate(
 /*========*/
@@ -212,7 +197,6 @@ ha_validate(
 #endif /* defined UNIV_AHI_DEBUG || defined UNIV_DEBUG */
 /*************************************************************//**
 Prints info of a hash table. */
-UNIV_INTERN
 void
 ha_print_info(
 /*==========*/
@@ -222,12 +206,12 @@ ha_print_info(
 
 /** The hash table external chain node */
 struct ha_node_t {
+	ulint		fold;	/*!< fold value for the data */
 	ha_node_t*	next;	/*!< next chain node or NULL if none */
 #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
 	buf_block_t*	block;	/*!< buffer block containing the data, or NULL */
 #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
-        const rec_t*	data;	/*!< pointer to the data */
-	ulint		fold;	/*!< fold value for the data */
+	const rec_t*	data;	/*!< pointer to the data */
 };
 
 #ifdef UNIV_DEBUG
diff --git a/storage/innobase/include/ha0ha.ic b/storage/innobase/include/ha0ha.ic
index c478ff54303..854ff9bc046 100644
--- a/storage/innobase/include/ha0ha.ic
+++ b/storage/innobase/include/ha0ha.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -29,7 +29,6 @@ Created 8/18/1994 Heikki Tuuri
 
 /***********************************************************//**
 Deletes a hash node. */
-UNIV_INTERN
 void
 ha_delete_hash_node(
 /*================*/
@@ -38,7 +37,7 @@ ha_delete_hash_node(
 
 /******************************************************************//**
 Gets a hash node data.
-@return	pointer to the data */
+@return pointer to the data */
 UNIV_INLINE
 const rec_t*
 ha_node_get_data(
@@ -68,33 +67,33 @@ ha_node_set_data_func(
 
 #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
 /** Sets hash node data.
-@param n	in: hash chain node
-@param b	in: buffer block containing the data
-@param d	in: pointer to the data */
+@param n in: hash chain node
+@param b in: buffer block containing the data
+@param d in: pointer to the data */
 # define ha_node_set_data(n,b,d) ha_node_set_data_func(n,b,d)
 #else /* UNIV_AHI_DEBUG || UNIV_DEBUG */
 /** Sets hash node data.
-@param n	in: hash chain node
-@param b	in: buffer block containing the data
-@param d	in: pointer to the data */
+@param n in: hash chain node
+@param b in: buffer block containing the data
+@param d in: pointer to the data */
 # define ha_node_set_data(n,b,d) ha_node_set_data_func(n,d)
 #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
 
 /******************************************************************//**
 Gets the next node in a hash chain.
-@return	next node, NULL if none */
+@return next node, NULL if none */
 UNIV_INLINE
 ha_node_t*
 ha_chain_get_next(
 /*==============*/
-	ha_node_t*	node)	/*!< in: hash chain node */
+	const ha_node_t*	node)	/*!< in: hash chain node */
 {
 	return(node->next);
 }
 
 /******************************************************************//**
 Gets the first node in a hash chain.
-@return	first node, NULL if none */
+@return first node, NULL if none */
 UNIV_INLINE
 ha_node_t*
 ha_chain_get_first(
@@ -122,9 +121,9 @@ hash_assert_can_modify(
 	if (table->type == HASH_TABLE_SYNC_MUTEX) {
 		ut_ad(mutex_own(hash_get_mutex(table, fold)));
 	} else if (table->type == HASH_TABLE_SYNC_RW_LOCK) {
-# ifdef UNIV_SYNC_DEBUG
+# ifdef UNIV_DEBUG
 		rw_lock_t* lock = hash_get_lock(table, fold);
-		ut_ad(rw_lock_own(lock, RW_LOCK_EX));
+		ut_ad(rw_lock_own(lock, RW_LOCK_X));
 # endif
 	} else {
 		ut_ad(table->type == HASH_TABLE_SYNC_NONE);
@@ -145,10 +144,10 @@ hash_assert_can_search(
 	if (table->type == HASH_TABLE_SYNC_MUTEX) {
 		ut_ad(mutex_own(hash_get_mutex(table, fold)));
 	} else if (table->type == HASH_TABLE_SYNC_RW_LOCK) {
-# ifdef UNIV_SYNC_DEBUG
+# ifdef UNIV_DEBUG
 		rw_lock_t* lock = hash_get_lock(table, fold);
-		ut_ad(rw_lock_own(lock, RW_LOCK_EX)
-		      || rw_lock_own(lock, RW_LOCK_SHARED));
+		ut_ad(rw_lock_own(lock, RW_LOCK_X)
+		      || rw_lock_own(lock, RW_LOCK_S));
 # endif
 	} else {
 		ut_ad(table->type == HASH_TABLE_SYNC_NONE);
@@ -167,20 +166,17 @@ ha_search_and_get_data(
 	hash_table_t*	table,	/*!< in: hash table */
 	ulint		fold)	/*!< in: folded value of the searched data */
 {
-	ha_node_t*	node;
-
 	hash_assert_can_search(table, fold);
 	ut_ad(btr_search_enabled);
 
-	node = ha_chain_get_first(table, fold);
+	for (const ha_node_t* node = ha_chain_get_first(table, fold);
+	     node != NULL;
+	     node = ha_chain_get_next(node)) {
 
-	while (node) {
 		if (node->fold == fold) {
 
 			return(node->data);
 		}
-
-		node = ha_chain_get_next(node);
 	}
 
 	return(NULL);
@@ -188,7 +184,7 @@ ha_search_and_get_data(
 
 /*********************************************************//**
 Looks for an element when we know the pointer to the data.
-@return	pointer to the hash table node, NULL if not found in the table */
+@return pointer to the hash table node, NULL if not found in the table */
 UNIV_INLINE
 ha_node_t*
 ha_search_with_data(
@@ -220,7 +216,7 @@ ha_search_with_data(
 /*********************************************************//**
 Looks for an element when we know the pointer to the data, and deletes
 it from the hash table, if found.
-@return	TRUE if found */
+@return TRUE if found */
 UNIV_INLINE
 ibool
 ha_search_and_delete_if_found(
diff --git a/storage/innobase/include/ha0storage.h b/storage/innobase/include/ha0storage.h
index 0073930b502..7dd6d26bad0 100644
--- a/storage/innobase/include/ha0storage.h
+++ b/storage/innobase/include/ha0storage.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2007, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -44,7 +44,7 @@ struct ha_storage_t;
 /*******************************************************************//**
 Creates a hash storage. If any of the parameters is 0, then a default
 value is used.
-@return	own: hash storage */
+@return own: hash storage */
 UNIV_INLINE
 ha_storage_t*
 ha_storage_create(
@@ -62,8 +62,7 @@ data_len bytes need to be allocated) and the size of storage is going to
 become more than "memlim" then "data" is not added and NULL is returned.
 To disable this behavior "memlim" can be set to 0, which stands for
 "no limit".
-@return	pointer to the copy */
-UNIV_INTERN
+@return pointer to the copy */
 const void*
 ha_storage_put_memlim(
 /*==================*/
@@ -74,10 +73,10 @@ ha_storage_put_memlim(
 
 /*******************************************************************//**
 Same as ha_storage_put_memlim() but without memory limit.
-@param storage	in/out: hash storage
-@param data	in: data to store
-@param data_len	in: data length
-@return		pointer to the copy of the string */
+@param storage in/out: hash storage
+@param data in: data to store
+@param data_len in: data length
+@return pointer to the copy of the string */
 #define ha_storage_put(storage, data, data_len)	\
 	ha_storage_put_memlim((storage), (data), (data_len), 0)
 
@@ -85,9 +84,9 @@ Same as ha_storage_put_memlim() but without memory limit.
 Copies string into the storage and returns a pointer to the copy. If the
 same string is already present, then pointer to it is returned.
 Strings are considered to be equal if strcmp(str1, str2) == 0.
-@param storage	in/out: hash storage
-@param str	in: string to put
-@return		pointer to the copy of the string */
+@param storage in/out: hash storage
+@param str in: string to put
+@return pointer to the copy of the string */
 #define ha_storage_put_str(storage, str)	\
 	((const char*) ha_storage_put((storage), (str), strlen(str) + 1))
 
@@ -96,10 +95,10 @@ Copies string into the storage and returns a pointer to the copy obeying
 a memory limit.
 If the same string is already present, then pointer to it is returned.
 Strings are considered to be equal if strcmp(str1, str2) == 0.
-@param storage	in/out: hash storage
-@param str	in: string to put
-@param memlim	in: memory limit to obey
-@return		pointer to the copy of the string */
+@param storage in/out: hash storage
+@param str in: string to put
+@param memlim in: memory limit to obey
+@return pointer to the copy of the string */
 #define ha_storage_put_str_memlim(storage, str, memlim)	\
 	((const char*) ha_storage_put_memlim((storage), (str),	\
 					     strlen(str) + 1, (memlim)))
@@ -126,7 +125,7 @@ ha_storage_free(
 
 /*******************************************************************//**
 Gets the size of the memory used by a storage.
-@return	bytes used */
+@return bytes used */
 UNIV_INLINE
 ulint
 ha_storage_get_size(
diff --git a/storage/innobase/include/ha0storage.ic b/storage/innobase/include/ha0storage.ic
index 7150ca045ec..e841925d320 100644
--- a/storage/innobase/include/ha0storage.ic
+++ b/storage/innobase/include/ha0storage.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2007, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2013, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -48,7 +48,7 @@ struct ha_storage_node_t {
 /*******************************************************************//**
 Creates a hash storage. If any of the parameters is 0, then a default
 value is used.
-@return	own: hash storage */
+@return own: hash storage */
 UNIV_INLINE
 ha_storage_t*
 ha_storage_create(
@@ -127,7 +127,7 @@ ha_storage_free(
 
 /*******************************************************************//**
 Gets the size of the memory used by a storage.
-@return	bytes used */
+@return bytes used */
 UNIV_INLINE
 ulint
 ha_storage_get_size(
diff --git a/storage/innobase/include/ha_prototypes.h b/storage/innobase/include/ha_prototypes.h
index 12453099ef7..f3641f93681 100644
--- a/storage/innobase/include/ha_prototypes.h
+++ b/storage/innobase/include/ha_prototypes.h
@@ -19,33 +19,44 @@ this program; if not, write to the Free Software Foundation, Inc.,
 /*******************************************************************//**
 @file include/ha_prototypes.h
 Prototypes for global functions in ha_innodb.cc that are called by
-InnoDB C code
+InnoDB C code.
 
-Created 5/11/2006 Osku Salerma
+NOTE: This header is intended to insulate InnoDB from SQL names and functions.
+Do not include any headers other than univ.i into this unless they are very
+simple headers.
 ************************************************************************/
 
 #ifndef HA_INNODB_PROTOTYPES_H
 #define HA_INNODB_PROTOTYPES_H
 
-#include "my_dbug.h"
-#include "mysqld_error.h"
-#include "my_compare.h"
-#include "my_sys.h"
-#include "m_string.h"
-#include "debug_sync.h"
-#include "my_base.h"
+#include "univ.i"
 
-#include "trx0types.h"
-#include "m_ctype.h" /* CHARSET_INFO */
+#if !defined UNIV_HOTBACKUP && !defined UNIV_INNOCHECKSUM
 
-// Forward declarations
+/* Forward declarations */
+class THD;
 class Field;
 struct fts_string_t;
+//typedef struct charset_info_st CHARSET_INFO;
+
+// JAN: TODO missing features:
+#undef MYSQL_57_SELECT_COUNT_OPTIMIZATION
+#undef MYSQL_COMPRESSION
+#undef MYSQL_ENCRYPTION
+#undef MYSQL_FT_INIT_EXT
+#undef MYSQL_INNODB_API_CB
+#undef MYSQL_INNODB_PARTITIONING
+#undef MYSQL_PFS
+#undef MYSQL_RENAME_INDEX
+#undef MYSQL_REPLACE_TRX_IN_THD
+#undef MYSQL_SPATIAL_INDEX
+#undef MYSQL_STORE_FTS_DOC_ID
+#undef MYSQL_TABLESPACES
+#undef MYSQL_VIRTUAL_COLUMNS
 
 /*********************************************************************//**
 Wrapper around MySQL's copy_and_convert function.
-@return	number of bytes copied to 'to' */
-UNIV_INTERN
+@return number of bytes copied to 'to' */
 ulint
 innobase_convert_string(
 /*====================*/
@@ -68,8 +79,7 @@ Not more than "buf_size" bytes are written to "buf".
 The result is always NUL-terminated (provided buf_size > 0) and the
 number of bytes that were written to "buf" is returned (including the
 terminating NUL).
-@return	number of bytes that were written */
-UNIV_INTERN
+@return number of bytes that were written */
 ulint
 innobase_raw_format(
 /*================*/
@@ -83,35 +93,50 @@ innobase_raw_format(
 
 /*****************************************************************//**
 Invalidates the MySQL query cache for the table. */
-UNIV_INTERN
 void
 innobase_invalidate_query_cache(
 /*============================*/
 	trx_t*		trx,		/*!< in: transaction which
 					modifies the table */
 	const char*	full_name,	/*!< in: concatenation of
-					database name, null char NUL,
+					database name, path separator,
 					table name, null char NUL;
 					NOTE that in Windows this is
 					always in LOWER CASE! */
 	ulint		full_name_len);	/*!< in: full name length where
 					also the null chars count */
 
+/** Quote a standard SQL identifier like tablespace, index or column name.
+@param[in]	file	output stream
+@param[in]	trx	InnoDB transaction, or NULL
+@param[in]	id	identifier to quote */
+void
+innobase_quote_identifier(
+	FILE*		file,
+	trx_t*		trx,
+	const char*	id);
+
+/** Quote an standard SQL identifier like tablespace, index or column name.
+Return the string as an std:string object.
+@param[in]	trx	InnoDB transaction, or NULL
+@param[in]	id	identifier to quote
+@return a std::string with id properly quoted. */
+std::string
+innobase_quote_identifier(
+	trx_t*		trx,
+	const char*	id);
+
 /*****************************************************************//**
-Convert a table or index name to the MySQL system_charset_info (UTF-8)
-and quote it if needed.
-@return	pointer to the end of buf */
-UNIV_INTERN
+Convert a table name to the MySQL system_charset_info (UTF-8).
+@return pointer to the end of buf */
 char*
 innobase_convert_name(
 /*==================*/
 	char*		buf,	/*!< out: buffer for converted identifier */
 	ulint		buflen,	/*!< in: length of buf, in bytes */
-	const char*	id,	/*!< in: identifier to convert */
+	const char*	id,	/*!< in: table name to convert */
 	ulint		idlen,	/*!< in: length of id, in bytes */
-	THD*		thd,	/*!< in: MySQL connection thread, or NULL */
-	ibool		table_id);/*!< in: TRUE=id is a table or database name;
-				FALSE=id is an index name */
+	THD*		thd);	/*!< in: MySQL connection thread, or NULL */
 
 /******************************************************************//**
 Returns true if the thread is the replication thread on the slave
@@ -119,32 +144,18 @@ server. Used in srv_conc_enter_innodb() to determine if the thread
 should be allowed to enter InnoDB - the replication thread is treated
 differently than other threads. Also used in
 srv_conc_force_exit_innodb().
-@return	true if thd is the replication thread */
-UNIV_INTERN
+@return true if thd is the replication thread */
 ibool
 thd_is_replication_slave_thread(
 /*============================*/
 	THD*	thd);	/*!< in: thread handle */
 
-/******************************************************************//**
-Gets information on the durability property requested by thread.
-Used when writing either a prepare or commit record to the log
-buffer.
-@return the durability property. */
-UNIV_INTERN
-enum durability_properties
-thd_requested_durability(
-/*=====================*/
-	const THD* thd)	/*!< in: thread handle */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
-
 /******************************************************************//**
 Returns true if the transaction this thread is processing has edited
 non-transactional tables. Used by the deadlock detector when deciding
 which transaction to rollback in case of a deadlock - we try to avoid
 rolling back transactions that have edited non-transactional tables.
-@return	true if non-transactional tables have been edited */
-UNIV_INTERN
+@return true if non-transactional tables have been edited */
 ibool
 thd_has_edited_nontrans_tables(
 /*===========================*/
@@ -152,7 +163,6 @@ thd_has_edited_nontrans_tables(
 
 /*************************************************************//**
 Prints info of a THD object (== user session thread) to the given file. */
-UNIV_INTERN
 void
 innobase_mysql_print_thd(
 /*=====================*/
@@ -161,24 +171,6 @@ innobase_mysql_print_thd(
 	uint	max_query_len);	/*!< in: max query length to print, or 0 to
 				   use the default max length */
 
-/*************************************************************//**
-InnoDB uses this function to compare two data fields for which the data type
-is such that we must use MySQL code to compare them.
-@return	1, 0, -1, if a is greater, equal, less than b, respectively */
-UNIV_INTERN
-int
-innobase_mysql_cmp(
-/*===============*/
-	int		mysql_type,	/*!< in: MySQL type */
-	uint		charset_number,	/*!< in: number of the charset */
-	const unsigned char* a,		/*!< in: data field */
-	unsigned int	a_length,	/*!< in: data field length,
-					not UNIV_SQL_NULL */
-	const unsigned char* b,		/*!< in: data field */
-	unsigned int	b_length)	/*!< in: data field length,
-					not UNIV_SQL_NULL */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
-
 /*****************************************************************//**
 Log code calls this whenever log has been written and/or flushed up
 to a new position. We use this to notify upper layer of a new commit
@@ -186,30 +178,24 @@ checkpoint when necessary.*/
 UNIV_INTERN
 void
 innobase_mysql_log_notify(
-/*===============*/
+/*======================*/
 	ib_uint64_t	write_lsn,	/*!< in: LSN written to log file */
 	ib_uint64_t	flush_lsn);	/*!< in: LSN flushed to disk */
 
-/**************************************************************//**
-Converts a MySQL type to an InnoDB type. Note that this function returns
+/** Converts a MySQL type to an InnoDB type. Note that this function returns
 the 'mtype' of InnoDB. InnoDB differentiates between MySQL's old <= 4.1
 VARCHAR and the new true VARCHAR in >= 5.0.3 by the 'prtype'.
-@return	DATA_BINARY, DATA_VARCHAR, ... */
-UNIV_INTERN
+@param[out]	unsigned_flag		DATA_UNSIGNED if an 'unsigned type';
+at least ENUM and SET, and unsigned integer types are 'unsigned types'
+@param[in]	f			MySQL Field
+@return DATA_BINARY, DATA_VARCHAR, ... */
 ulint
 get_innobase_type_from_mysql_type(
-/*==============================*/
-	ulint*		unsigned_flag,	/*!< out: DATA_UNSIGNED if an
-					'unsigned type';
-					at least ENUM and SET,
-					and unsigned integer
-					types are 'unsigned types' */
-	const void*	field)		/*!< in: MySQL Field */
-	MY_ATTRIBUTE((nonnull));
+	ulint*			unsigned_flag,
+	const void*		field);
 
 /******************************************************************//**
 Get the variable length bounds of the given character set. */
-UNIV_INTERN
 void
 innobase_get_cset_width(
 /*====================*/
@@ -219,8 +205,7 @@ innobase_get_cset_width(
 
 /******************************************************************//**
 Compares NUL-terminated UTF-8 strings case insensitively.
-@return	0 if a=b, <0 if a<b, >1 if a>b */
-UNIV_INTERN
+@return 0 if a=b, <0 if a<b, >1 if a>b */
 int
 innobase_strcasecmp(
 /*================*/
@@ -231,26 +216,22 @@ innobase_strcasecmp(
 Compares NUL-terminated UTF-8 strings case insensitively. The
 second string contains wildcards.
 @return 0 if a match is found, 1 if not */
-UNIV_INTERN
 int
 innobase_wildcasecmp(
 /*=================*/
 	const char*	a,	/*!< in: string to compare */
 	const char*	b);	/*!< in: wildcard string to compare */
 
-/******************************************************************//**
-Strip dir name from a full path name and return only its file name.
+/** Strip dir name from a full path name and return only the file name
+@param[in]	path_name	full path name
 @return file name or "null" if no file name */
-UNIV_INTERN
 const char*
 innobase_basename(
-/*==============*/
-	const char*	path_name);	/*!< in: full path name */
+	const char*	path_name);
 
 /******************************************************************//**
 Returns true if the thread is executing a SELECT statement.
-@return	true if thd is executing SELECT */
-UNIV_INTERN
+@return true if thd is executing SELECT */
 ibool
 thd_is_select(
 /*==========*/
@@ -258,29 +239,26 @@ thd_is_select(
 
 /******************************************************************//**
 Converts an identifier to a table name. */
-UNIV_INTERN
 void
 innobase_convert_from_table_id(
 /*===========================*/
-	struct charset_info_st*	cs,	/*!< in: the 'from' character set */
-	char*			to,	/*!< out: converted identifier */
-	const char*		from,	/*!< in: identifier to convert */
-	ulint			len);	/*!< in: length of 'to', in bytes; should
-					be at least 5 * strlen(to) + 1 */
+	CHARSET_INFO*	cs,	/*!< in: the 'from' character set */
+	char*		to,	/*!< out: converted identifier */
+	const char*	from,	/*!< in: identifier to convert */
+	ulint		len);	/*!< in: length of 'to', in bytes; should
+				be at least 5 * strlen(to) + 1 */
 /******************************************************************//**
 Converts an identifier to UTF-8. */
-UNIV_INTERN
 void
 innobase_convert_from_id(
 /*=====================*/
-	struct charset_info_st*	cs,	/*!< in: the 'from' character set */
-	char*			to,	/*!< out: converted identifier */
-	const char*		from,	/*!< in: identifier to convert */
-	ulint			len);	/*!< in: length of 'to', in bytes;
-					should be at least 3 * strlen(to) + 1 */
+	CHARSET_INFO*	cs,	/*!< in: the 'from' character set */
+	char*		to,	/*!< out: converted identifier */
+	const char*	from,	/*!< in: identifier to convert */
+	ulint		len);	/*!< in: length of 'to', in bytes;
+				should be at least 3 * strlen(to) + 1 */
 /******************************************************************//**
 Makes all characters in a NUL-terminated UTF-8 string lower case. */
-UNIV_INTERN
 void
 innobase_casedn_str(
 /*================*/
@@ -300,29 +278,41 @@ int wsrep_innobase_mysql_sort(int mysql_type, uint charset_number,
 
 /**********************************************************************//**
 Determines the connection character set.
-@return	connection character set */
-UNIV_INTERN
-struct charset_info_st*
+@return connection character set */
+CHARSET_INFO*
 innobase_get_charset(
 /*=================*/
 	THD*	thd);	/*!< in: MySQL thread handle */
-/**********************************************************************//**
-Determines the current SQL statement.
-@return	SQL statement string */
-UNIV_INTERN
+
+/** Determines the current SQL statement.
+Thread unsafe, can only be called from the thread owning the THD.
+@param[in]	thd	MySQL thread handle
+@param[out]	length	Length of the SQL statement
+@return			SQL statement string */
 const char*
-innobase_get_stmt(
-/*==============*/
-	THD*	thd,		/*!< in: MySQL thread handle */
-	size_t*	length)		/*!< out: length of the SQL statement */
-	MY_ATTRIBUTE((nonnull));
+innobase_get_stmt_unsafe(
+	THD*	thd,
+	size_t*	length);
+
+/** Determines the current SQL statement.
+Thread safe, can be called from any thread as the string is copied
+into the provided buffer.
+@param[in]	thd	MySQL thread handle
+@param[out]	buf	Buffer containing SQL statement
+@param[in]	buflen	Length of provided buffer
+@return			Length of the SQL statement */
+size_t
+innobase_get_stmt_safe(
+	THD*	thd,
+	char*	buf,
+	size_t	buflen);
+
 /******************************************************************//**
 This function is used to find the storage length in bytes of the first n
 characters for prefix indexes using a multibyte character set. The function
 finds charset information and returns length of prefix_len characters in the
 index field in bytes.
-@return	number of bytes occupied by the first n characters */
-UNIV_INTERN
+@return number of bytes occupied by the first n characters */
 ulint
 innobase_get_at_most_n_mbchars(
 /*===========================*/
@@ -333,20 +323,10 @@ innobase_get_at_most_n_mbchars(
 	ulint data_len,		/*!< in: length of the string in bytes */
 	const char* str);	/*!< in: character string */
 
-/*************************************************************//**
-InnoDB index push-down condition check
-@return ICP_NO_MATCH, ICP_MATCH, or ICP_OUT_OF_RANGE */
-UNIV_INTERN
-enum icp_result
-innobase_index_cond(
-/*================*/
-	void*	file)	/*!< in/out: pointer to ha_innobase */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /******************************************************************//**
 Returns true if the thread supports XA,
 global value of innodb_supports_xa if thd is NULL.
-@return	true if thd supports XA */
-UNIV_INTERN
+@return true if thd supports XA */
 ibool
 thd_supports_xa(
 /*============*/
@@ -364,8 +344,7 @@ thd_innodb_tmpdir(
 
 /******************************************************************//**
 Returns the lock wait timeout for the current connection.
-@return	the lock wait timeout, in seconds */
-UNIV_INTERN
+@return the lock wait timeout, in seconds */
 ulong
 thd_lock_wait_timeout(
 /*==================*/
@@ -373,19 +352,25 @@ thd_lock_wait_timeout(
 			the global innodb_lock_wait_timeout */
 /******************************************************************//**
 Add up the time waited for the lock for the current query. */
-UNIV_INTERN
 void
 thd_set_lock_wait_time(
 /*===================*/
 	THD*	thd,	/*!< in/out: thread handle */
 	ulint	value);	/*!< in: time waited for the lock */
 
+/** Get status of innodb_tmpdir.
+@param[in]	thd	thread handle, or NULL to query
+			the global innodb_tmpdir.
+@retval NULL if innodb_tmpdir="" */
+const char*
+thd_innodb_tmpdir(
+	THD*	thd);
+
 /**********************************************************************//**
 Get the current setting of the table_cache_size global parameter. We do
 a dirty read because for one there is no synchronization object and
 secondly there is little harm in doing so even if we get a torn read.
-@return	SQL statement string */
-UNIV_INTERN
+@return SQL statement string */
 ulint
 innobase_get_table_cache_size(void);
 /*===============================*/
@@ -395,39 +380,13 @@ Get the current setting of the lower_case_table_names global parameter from
 mysqld.cc. We do a dirty read because for one there is no synchronization
 object and secondly there is little harm in doing so even if we get a torn
 read.
-@return	value of lower_case_table_names */
-UNIV_INTERN
+@return value of lower_case_table_names */
 ulint
 innobase_get_lower_case_table_names(void);
 /*=====================================*/
 
-/*****************************************************************//**
-Frees a possible InnoDB trx object associated with the current THD.
-@return 0 or error number */
-UNIV_INTERN
-int
-innobase_close_thd(
-/*===============*/
-	THD*	thd);		/*!< in: MySQL thread handle for
-				which to close the connection */
-/*************************************************************//**
-Get the next token from the given string and store it in *token. */
-UNIV_INTERN
-ulint
-innobase_mysql_fts_get_token(
-/*=========================*/
-	CHARSET_INFO*	charset,	/*!< in: Character set */
-	const byte*	start,		/*!< in: start of text */
-	const byte*	end,		/*!< in: one character past end of
-					text */
-	fts_string_t*	token,		/*!< out: token's text */
-	ulint*		offset);	/*!< out: offset to token,
-					measured as characters from
-					'start' */
-
 /******************************************************************//**
 compare two character string case insensitively according to their charset. */
-UNIV_INTERN
 int
 innobase_fts_text_case_cmp(
 /*=======================*/
@@ -435,49 +394,62 @@ innobase_fts_text_case_cmp(
 	const void*	p1,		/*!< in: key */
 	const void*	p2);		/*!< in: node */
 
-/****************************************************************//**
-Get FTS field charset info from the field's prtype
-@return charset info */
-UNIV_INTERN
-CHARSET_INFO*
-innobase_get_fts_charset(
-/*=====================*/
-	int		mysql_type,	/*!< in: MySQL type */
-	uint		charset_number);/*!< in: number of the charset */
 /******************************************************************//**
 Returns true if transaction should be flagged as read-only.
-@return	true if the thd is marked as read-only */
-UNIV_INTERN
-ibool
+@return true if the thd is marked as read-only */
+bool
 thd_trx_is_read_only(
 /*=================*/
 	THD*	thd);	/*!< in/out: thread handle */
 
+#if 0
+/**
+Check if the transaction can be rolled back
+@param[in] requestor	Session requesting the lock
+@param[in] holder	Session that holds the lock
+@return the session that will be rolled back, null don't care */
+
+THD*
+thd_trx_arbitrate(THD* requestor, THD* holder);
+
+/**
+@param[in] thd		Session to check
+@return the priority */
+
+int
+thd_trx_priority(THD* thd);
+
+#else
+static inline THD* thd_trx_arbitrate(THD*, THD*) { return NULL; }
+static inline int thd_trx_priority(THD*) { return 0; }
+#endif
 /******************************************************************//**
 Check if the transaction is an auto-commit transaction. TRUE also
 implies that it is a SELECT (read-only) transaction.
-@return	true if the transaction is an auto commit read-only transaction. */
-UNIV_INTERN
+@return true if the transaction is an auto commit read-only transaction. */
 ibool
 thd_trx_is_auto_commit(
 /*===================*/
 	THD*	thd);	/*!< in: thread handle, or NULL */
 
+/******************************************************************//**
+Get the thread start time.
+@return the thread start time in seconds since the epoch. */
+ulint
+thd_start_time_in_secs(
+/*===================*/
+	THD*	thd);	/*!< in: thread handle, or NULL */
+
 /*****************************************************************//**
-A wrapper function of innobase_convert_name(), convert a table or
-index name to the MySQL system_charset_info (UTF-8) and quote it if needed.
-@return	pointer to the end of buf */
-UNIV_INTERN
+A wrapper function of innobase_convert_name(), convert a table name
+to the MySQL system_charset_info (UTF-8) and quote it if needed.
+@return pointer to the end of buf */
 void
 innobase_format_name(
 /*==================*/
-	char*		buf,		/*!< out: buffer for converted
-					identifier */
-	ulint		buflen,		/*!< in: length of buf, in bytes */
-	const char*	name,		/*!< in: index or table name
-					to format */
-	ibool		is_index_name)	/*!< in: index name */
-	MY_ATTRIBUTE((nonnull));
+	char*		buf,	/*!< out: buffer for converted identifier */
+	ulint		buflen,	/*!< in: length of buf, in bytes */
+	const char*	name);	/*!< in: table name to format */
 
 /** Corresponds to Sql_condition:enum_warning_level. */
 enum ib_log_level_t {
@@ -498,7 +470,6 @@ void push_warning_printf(
 	THD *thd, Sql_condition::enum_warning_level level,
 	uint code, const char *format, ...);
 */
-UNIV_INTERN
 void
 ib_errf(
 /*====*/
@@ -519,7 +490,6 @@ void push_warning_printf(
 	THD *thd, Sql_condition::enum_warning_level level,
 	uint code, const char *format, ...);
 */
-UNIV_INTERN
 void
 ib_senderrf(
 /*========*/
@@ -528,22 +498,19 @@ ib_senderrf(
 	ib_uint32_t	code,		/*!< MySQL error code */
 	...);				/*!< Args */
 
-/******************************************************************//**
-Write a message to the MySQL log, prefixed with "InnoDB: ".
-Wrapper around sql_print_information() */
-UNIV_INTERN
-void
-ib_logf(
-/*====*/
-	ib_log_level_t	level,		/*!< in: warning level */
-	const char*	format,		/*!< printf format */
-	...)				/*!< Args */
-	MY_ATTRIBUTE((format(printf, 2, 3)));
+extern const char* 	TROUBLESHOOTING_MSG;
+extern const char* 	TROUBLESHOOT_DATADICT_MSG;
+extern const char* 	BUG_REPORT_MSG;
+extern const char* 	FORCE_RECOVERY_MSG;
+extern const char*      ERROR_CREATING_MSG;
+extern const char*      OPERATING_SYSTEM_ERROR_MSG;
+extern const char*      FOREIGN_KEY_CONSTRAINTS_MSG;
+extern const char*      SET_TRANSACTION_MSG;
+extern const char*      INNODB_PARAMETERS_MSG;
 
 /******************************************************************//**
 Returns the NUL terminated value of glob_hostname.
-@return	pointer to glob_hostname. */
-UNIV_INTERN
+@return pointer to glob_hostname. */
 const char*
 server_get_hostname();
 /*=================*/
@@ -551,7 +518,6 @@ server_get_hostname();
 /******************************************************************//**
 Get the error message format string.
 @return the format string or 0 if not found. */
-UNIV_INTERN
 const char*
 innobase_get_err_msg(
 /*=================*/
@@ -574,8 +540,7 @@ values we want to reserve for multi-value inserts e.g.,
 innobase_next_autoinc() will be called with increment set to 3 where
 autoinc_lock_mode != TRADITIONAL because we want to reserve 3 values for
 the multi-value INSERT above.
-@return	the next value */
-UNIV_INTERN
+@return the next value */
 ulonglong
 innobase_next_autoinc(
 /*==================*/
@@ -586,16 +551,6 @@ innobase_next_autoinc(
 	ulonglong	max_value)	/*!< in: max value for type */
 	MY_ATTRIBUTE((pure, warn_unused_result));
 
-/********************************************************************//**
-Get the upper limit of the MySQL integral and floating-point type.
-@return maximum allowed value for the field */
-UNIV_INTERN
-ulonglong
-innobase_get_int_col_max_value(
-/*===========================*/
-	const Field*	field)	/*!< in: MySQL field */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
-
 /**********************************************************************
 Converts an identifier from my_charset_filename to UTF-8 charset. */
 uint
@@ -605,12 +560,10 @@ innobase_convert_to_system_charset(
 	const char*     from,		/* in: identifier to convert */
 	ulint           len,		/* in: length of 'to', in bytes */
 	uint*		errors);	/* out: error return */
-
 /**********************************************************************
 Check if the length of the identifier exceeds the maximum allowed.
 The input to this function is an identifier in charset my_charset_filename.
 return true when length of identifier is too long. */
-UNIV_INTERN
 my_bool
 innobase_check_identifier_length(
 /*=============================*/
@@ -622,9 +575,9 @@ Converts an identifier from my_charset_filename to UTF-8 charset. */
 uint
 innobase_convert_to_system_charset(
 /*===============================*/
-	char*           to,		/* out: converted identifier */
-	const char*     from,		/* in: identifier to convert */
-	ulint           len,		/* in: length of 'to', in bytes */
+	char*		to,		/* out: converted identifier */
+	const char*	from,		/* in: identifier to convert */
+	ulint		len,		/* in: length of 'to', in bytes */
 	uint*		errors);	/* out: error return */
 
 /**********************************************************************
@@ -632,9 +585,9 @@ Converts an identifier from my_charset_filename to UTF-8 charset. */
 uint
 innobase_convert_to_filename_charset(
 /*=================================*/
-	char*           to,     /* out: converted identifier */
-	const char*     from,   /* in: identifier to convert */
-	ulint           len);   /* in: length of 'to', in bytes */
+	char*		to,	/* out: converted identifier */
+	const char*	from,	/* in: identifier to convert */
+	ulint		len);	/* in: length of 'to', in bytes */
 
 /********************************************************************//**
 Helper function to push warnings from InnoDB internals to SQL-layer. */
@@ -662,12 +615,44 @@ database name catenated to '/' and table name. An example:
 test/mytable. On Windows normalization puts both the database name and the
 table name always to lower case if "set_lower_case" is set to TRUE. */
 void
-normalize_table_name_low(
-/*=====================*/
+normalize_table_name_c_low(
+/*=======================*/
 	char*		norm_name,	/*!< out: normalized name as a
 					null-terminated string */
 	const char*	name,		/*!< in: table name string */
 	ibool		set_lower_case); /*!< in: TRUE if we want to set
 					name to lower case */
+/*************************************************************//**
+InnoDB index push-down condition check defined in ha_innodb.cc
+@return ICP_NO_MATCH, ICP_MATCH, or ICP_OUT_OF_RANGE */
 
+#include <my_compare.h>
+
+ICP_RESULT
+innobase_index_cond(
+/*================*/
+	void*	file)	/*!< in/out: pointer to ha_innobase */
+	MY_ATTRIBUTE((warn_unused_result));
+
+/******************************************************************//**
+Gets information on the durability property requested by thread.
+Used when writing either a prepare or commit record to the log
+buffer.
+@return the durability property. */
+
+#include <dur_prop.h>
+
+enum durability_properties
+thd_requested_durability(
+/*=====================*/
+	const THD* thd)	/*!< in: thread handle */
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Update the system variable with the given value of the InnoDB
+buffer pool size.
+@param[in]	buf_pool_size	given value of buffer pool size.*/
+void
+innodb_set_buf_pool_size(ulonglong buf_pool_size);
+
+#endif /* !UNIV_HOTBACKUP && !UNIV_INNOCHECKSUM */
 #endif /* HA_INNODB_PROTOTYPES_H */
diff --git a/storage/innobase/include/handler0alter.h b/storage/innobase/include/handler0alter.h
index 3dd6c99eb6d..1c690839449 100644
--- a/storage/innobase/include/handler0alter.h
+++ b/storage/innobase/include/handler0alter.h
@@ -23,7 +23,6 @@ Smart ALTER TABLE
 
 /*************************************************************//**
 Copies an InnoDB record to table->record[0]. */
-UNIV_INTERN
 void
 innobase_rec_to_mysql(
 /*==================*/
@@ -36,7 +35,6 @@ innobase_rec_to_mysql(
 
 /*************************************************************//**
 Copies an InnoDB index entry to table->record[0]. */
-UNIV_INTERN
 void
 innobase_fields_to_mysql(
 /*=====================*/
@@ -47,7 +45,6 @@ innobase_fields_to_mysql(
 
 /*************************************************************//**
 Copies an InnoDB row to table->record[0]. */
-UNIV_INTERN
 void
 innobase_row_to_mysql(
 /*==================*/
@@ -58,7 +55,6 @@ innobase_row_to_mysql(
 
 /*************************************************************//**
 Resets table->record[0]. */
-UNIV_INTERN
 void
 innobase_rec_reset(
 /*===============*/
@@ -70,13 +66,12 @@ auto_increment_increment and auto_increment_offset variables. */
 struct ib_sequence_t {
 
 	/**
-	@param thd - the session
-	@param start_value - the lower bound
-	@param max_value - the upper bound (inclusive) */
+	@param thd the session
+	@param start_value the lower bound
+	@param max_value the upper bound (inclusive) */
 	ib_sequence_t(THD* thd, ulonglong start_value, ulonglong max_value);
 
-	/**
-	Postfix increment
+	/** Postfix increment
 	@return the value to insert */
 	ulonglong operator++(int) UNIV_NOTHROW;
 
diff --git a/storage/innobase/include/hash0hash.h b/storage/innobase/include/hash0hash.h
index 9a4077befb1..a7bcee1185b 100644
--- a/storage/innobase/include/hash0hash.h
+++ b/storage/innobase/include/hash0hash.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -29,7 +29,6 @@ Created 5/20/1997 Heikki Tuuri
 #include "univ.i"
 #include "mem0mem.h"
 #ifndef UNIV_HOTBACKUP
-# include "sync0sync.h"
 # include "sync0rw.h"
 #endif /* !UNIV_HOTBACKUP */
 
@@ -56,8 +55,7 @@ enum hash_table_sync_t {
 /*************************************************************//**
 Creates a hash table with >= n array cells. The actual number
 of cells is chosen to be a prime number slightly bigger than n.
-@return	own: created table */
-UNIV_INTERN
+@return own: created table */
 hash_table_t*
 hash_create(
 /*========*/
@@ -67,39 +65,26 @@ hash_create(
 Creates a sync object array array to protect a hash table.
 ::sync_obj can be mutexes or rw_locks depening on the type of
 hash table. */
-UNIV_INTERN
 void
-hash_create_sync_obj_func(
-/*======================*/
+hash_create_sync_obj(
+/*=================*/
 	hash_table_t*		table,	/*!< in: hash table */
-	enum hash_table_sync_t	type,	/*!< in: HASH_TABLE_SYNC_MUTEX
+	hash_table_sync_t	type,	/*!< in: HASH_TABLE_SYNC_MUTEX
 					or HASH_TABLE_SYNC_RW_LOCK */
-#ifdef UNIV_SYNC_DEBUG
-	ulint			sync_level,/*!< in: latching order level
-					of the mutexes: used in the
-					debug version */
-#endif /* UNIV_SYNC_DEBUG */
+	latch_id_t		id,	/*!< in: mutex/rw_lock ID */
 	ulint			n_sync_obj);/*!< in: number of sync objects,
 					must be a power of 2 */
-#ifdef UNIV_SYNC_DEBUG
-# define hash_create_sync_obj(t, s, n, level)			\
-			hash_create_sync_obj_func(t, s, level, n)
-#else /* UNIV_SYNC_DEBUG */
-# define hash_create_sync_obj(t, s, n, level)			\
-			hash_create_sync_obj_func(t, s, n)
-#endif /* UNIV_SYNC_DEBUG */
 #endif /* !UNIV_HOTBACKUP */
 
 /*************************************************************//**
 Frees a hash table. */
-UNIV_INTERN
 void
 hash_table_free(
 /*============*/
 	hash_table_t*	table);	/*!< in, own: hash table */
 /**************************************************************//**
 Calculates the hash value from a folded value.
-@return	hashed value */
+@return hashed value */
 UNIV_INLINE
 ulint
 hash_calc_hash(
@@ -268,7 +253,7 @@ do {									\
 
 /************************************************************//**
 Gets the nth cell in a hash table.
-@return	pointer to cell */
+@return pointer to cell */
 UNIV_INLINE
 hash_cell_t*
 hash_get_nth_cell(
@@ -286,7 +271,7 @@ hash_table_clear(
 
 /*************************************************************//**
 Returns the number of cells in a hash table.
-@return	number of cells */
+@return number of cells */
 UNIV_INLINE
 ulint
 hash_get_n_cells(
@@ -364,10 +349,12 @@ do {\
 	cell_count2222 = hash_get_n_cells(OLD_TABLE);\
 \
 	for (i2222 = 0; i2222 < cell_count2222; i2222++) {\
-		NODE_TYPE*	node2222 = HASH_GET_FIRST((OLD_TABLE), i2222);\
+		NODE_TYPE*	node2222 = static_cast<NODE_TYPE*>(\
+			HASH_GET_FIRST((OLD_TABLE), i2222));\
 \
 		while (node2222) {\
-			NODE_TYPE*	next2222 = node2222->PTR_NAME;\
+			NODE_TYPE*	next2222 = static_cast<NODE_TYPE*>(\
+				node2222->PTR_NAME);\
 			ulint		fold2222 = FOLD_FUNC(node2222);\
 \
 			HASH_INSERT(NODE_TYPE, PTR_NAME, (NEW_TABLE),\
@@ -380,7 +367,7 @@ do {\
 
 /************************************************************//**
 Gets the sync object index for a fold value in a hash table.
-@return	index */
+@return index */
 UNIV_INLINE
 ulint
 hash_get_sync_obj_index(
@@ -389,7 +376,7 @@ hash_get_sync_obj_index(
 	ulint		fold);	/*!< in: fold */
 /************************************************************//**
 Gets the nth heap in a hash table.
-@return	mem heap */
+@return mem heap */
 UNIV_INLINE
 mem_heap_t*
 hash_get_nth_heap(
@@ -398,7 +385,7 @@ hash_get_nth_heap(
 	ulint		i);	/*!< in: index of the heap */
 /************************************************************//**
 Gets the heap for a fold value in a hash table.
-@return	mem heap */
+@return mem heap */
 UNIV_INLINE
 mem_heap_t*
 hash_get_heap(
@@ -407,7 +394,7 @@ hash_get_heap(
 	ulint		fold);	/*!< in: fold */
 /************************************************************//**
 Gets the nth mutex in a hash table.
-@return	mutex */
+@return mutex */
 UNIV_INLINE
 ib_mutex_t*
 hash_get_nth_mutex(
@@ -416,7 +403,7 @@ hash_get_nth_mutex(
 	ulint		i);	/*!< in: index of the mutex */
 /************************************************************//**
 Gets the nth rw_lock in a hash table.
-@return	rw_lock */
+@return rw_lock */
 UNIV_INLINE
 rw_lock_t*
 hash_get_nth_lock(
@@ -425,7 +412,7 @@ hash_get_nth_lock(
 	ulint		i);	/*!< in: index of the rw_lock */
 /************************************************************//**
 Gets the mutex for a fold value in a hash table.
-@return	mutex */
+@return mutex */
 UNIV_INLINE
 ib_mutex_t*
 hash_get_mutex(
@@ -434,16 +421,42 @@ hash_get_mutex(
 	ulint		fold);	/*!< in: fold */
 /************************************************************//**
 Gets the rw_lock for a fold value in a hash table.
-@return	rw_lock */
+@return rw_lock */
 UNIV_INLINE
 rw_lock_t*
 hash_get_lock(
 /*==========*/
 	hash_table_t*	table,	/*!< in: hash table */
 	ulint		fold);	/*!< in: fold */
+
+/** If not appropriate rw_lock for a fold value in a hash table,
+relock S-lock the another rw_lock until appropriate for a fold value.
+@param[in]	hash_lock	latched rw_lock to be confirmed
+@param[in]	table		hash table
+@param[in]	fold		fold value
+@return	latched rw_lock */
+UNIV_INLINE
+rw_lock_t*
+hash_lock_s_confirm(
+	rw_lock_t*	hash_lock,
+	hash_table_t*	table,
+	ulint		fold);
+
+/** If not appropriate rw_lock for a fold value in a hash table,
+relock X-lock the another rw_lock until appropriate for a fold value.
+@param[in]	hash_lock	latched rw_lock to be confirmed
+@param[in]	table		hash table
+@param[in]	fold		fold value
+@return	latched rw_lock */
+UNIV_INLINE
+rw_lock_t*
+hash_lock_x_confirm(
+	rw_lock_t*	hash_lock,
+	hash_table_t*	table,
+	ulint		fold);
+
 /************************************************************//**
 Reserves the mutex for a fold value in a hash table. */
-UNIV_INTERN
 void
 hash_mutex_enter(
 /*=============*/
@@ -451,7 +464,6 @@ hash_mutex_enter(
 	ulint		fold);	/*!< in: fold */
 /************************************************************//**
 Releases the mutex for a fold value in a hash table. */
-UNIV_INTERN
 void
 hash_mutex_exit(
 /*============*/
@@ -459,21 +471,18 @@ hash_mutex_exit(
 	ulint		fold);	/*!< in: fold */
 /************************************************************//**
 Reserves all the mutexes of a hash table, in an ascending order. */
-UNIV_INTERN
 void
 hash_mutex_enter_all(
 /*=================*/
 	hash_table_t*	table);	/*!< in: hash table */
 /************************************************************//**
 Releases all the mutexes of a hash table. */
-UNIV_INTERN
 void
 hash_mutex_exit_all(
 /*================*/
 	hash_table_t*	table);	/*!< in: hash table */
 /************************************************************//**
 Releases all but the passed in mutex of a hash table. */
-UNIV_INTERN
 void
 hash_mutex_exit_all_but(
 /*====================*/
@@ -481,7 +490,6 @@ hash_mutex_exit_all_but(
 	ib_mutex_t*	keep_mutex);	/*!< in: mutex to keep */
 /************************************************************//**
 s-lock a lock for a fold value in a hash table. */
-UNIV_INTERN
 void
 hash_lock_s(
 /*========*/
@@ -489,7 +497,6 @@ hash_lock_s(
 	ulint		fold);	/*!< in: fold */
 /************************************************************//**
 x-lock a lock for a fold value in a hash table. */
-UNIV_INTERN
 void
 hash_lock_x(
 /*========*/
@@ -497,7 +504,6 @@ hash_lock_x(
 	ulint		fold);	/*!< in: fold */
 /************************************************************//**
 unlock an s-lock for a fold value in a hash table. */
-UNIV_INTERN
 void
 hash_unlock_s(
 /*==========*/
@@ -506,7 +512,6 @@ hash_unlock_s(
 	ulint		fold);	/*!< in: fold */
 /************************************************************//**
 unlock x-lock for a fold value in a hash table. */
-UNIV_INTERN
 void
 hash_unlock_x(
 /*==========*/
@@ -514,21 +519,18 @@ hash_unlock_x(
 	ulint		fold);	/*!< in: fold */
 /************************************************************//**
 Reserves all the locks of a hash table, in an ascending order. */
-UNIV_INTERN
 void
 hash_lock_x_all(
 /*============*/
 	hash_table_t*	table);	/*!< in: hash table */
 /************************************************************//**
 Releases all the locks of a hash table, in an ascending order. */
-UNIV_INTERN
 void
 hash_unlock_x_all(
 /*==============*/
 	hash_table_t*	table);	/*!< in: hash table */
 /************************************************************//**
 Releases all but passed in lock of a hash table, */
-UNIV_INTERN
 void
 hash_unlock_x_all_but(
 /*==================*/
diff --git a/storage/innobase/include/hash0hash.ic b/storage/innobase/include/hash0hash.ic
index 254f3f82e5d..b99ac1eb501 100644
--- a/storage/innobase/include/hash0hash.ic
+++ b/storage/innobase/include/hash0hash.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -27,7 +27,7 @@ Created 5/20/1997 Heikki Tuuri
 
 /************************************************************//**
 Gets the nth cell in a hash table.
-@return	pointer to cell */
+@return pointer to cell */
 UNIV_INLINE
 hash_cell_t*
 hash_get_nth_cell(
@@ -58,7 +58,7 @@ hash_table_clear(
 
 /*************************************************************//**
 Returns the number of cells in a hash table.
-@return	number of cells */
+@return number of cells */
 UNIV_INLINE
 ulint
 hash_get_n_cells(
@@ -72,7 +72,7 @@ hash_get_n_cells(
 
 /**************************************************************//**
 Calculates the hash value from a folded value.
-@return	hashed value */
+@return hashed value */
 UNIV_INLINE
 ulint
 hash_calc_hash(
@@ -88,7 +88,7 @@ hash_calc_hash(
 #ifndef UNIV_HOTBACKUP
 /************************************************************//**
 Gets the sync object index for a fold value in a hash table.
-@return	index */
+@return index */
 UNIV_INLINE
 ulint
 hash_get_sync_obj_index(
@@ -106,7 +106,7 @@ hash_get_sync_obj_index(
 
 /************************************************************//**
 Gets the nth heap in a hash table.
-@return	mem heap */
+@return mem heap */
 UNIV_INLINE
 mem_heap_t*
 hash_get_nth_heap(
@@ -124,7 +124,7 @@ hash_get_nth_heap(
 
 /************************************************************//**
 Gets the heap for a fold value in a hash table.
-@return	mem heap */
+@return mem heap */
 UNIV_INLINE
 mem_heap_t*
 hash_get_heap(
@@ -148,7 +148,7 @@ hash_get_heap(
 
 /************************************************************//**
 Gets the nth mutex in a hash table.
-@return	mutex */
+@return mutex */
 UNIV_INLINE
 ib_mutex_t*
 hash_get_nth_mutex(
@@ -166,7 +166,7 @@ hash_get_nth_mutex(
 
 /************************************************************//**
 Gets the mutex for a fold value in a hash table.
-@return	mutex */
+@return mutex */
 UNIV_INLINE
 ib_mutex_t*
 hash_get_mutex(
@@ -186,7 +186,7 @@ hash_get_mutex(
 
 /************************************************************//**
 Gets the nth rw_lock in a hash table.
-@return	rw_lock */
+@return rw_lock */
 UNIV_INLINE
 rw_lock_t*
 hash_get_nth_lock(
@@ -204,7 +204,7 @@ hash_get_nth_lock(
 
 /************************************************************//**
 Gets the rw_lock for a fold value in a hash table.
-@return	rw_lock */
+@return rw_lock */
 UNIV_INLINE
 rw_lock_t*
 hash_get_lock(
@@ -222,4 +222,58 @@ hash_get_lock(
 
 	return(hash_get_nth_lock(table, i));
 }
+
+/** If not appropriate rw_lock for a fold value in a hash table,
+relock S-lock the another rw_lock until appropriate for a fold value.
+@param[in]	hash_lock	latched rw_lock to be confirmed
+@param[in]	table		hash table
+@param[in]	fold		fold value
+@return	latched rw_lock */
+UNIV_INLINE
+rw_lock_t*
+hash_lock_s_confirm(
+	rw_lock_t*	hash_lock,
+	hash_table_t*	table,
+	ulint		fold)
+{
+	ut_ad(rw_lock_own(hash_lock, RW_LOCK_S));
+
+	rw_lock_t*	hash_lock_tmp = hash_get_lock(table, fold);
+
+	while (hash_lock_tmp != hash_lock) {
+		rw_lock_s_unlock(hash_lock);
+		hash_lock = hash_lock_tmp;
+		rw_lock_s_lock(hash_lock);
+		hash_lock_tmp = hash_get_lock(table, fold);
+	}
+
+	return(hash_lock);
+}
+
+/** If not appropriate rw_lock for a fold value in a hash table,
+relock X-lock the another rw_lock until appropriate for a fold value.
+@param[in]	hash_lock	latched rw_lock to be confirmed
+@param[in]	table		hash table
+@param[in]	fold		fold value
+@return	latched rw_lock */
+UNIV_INLINE
+rw_lock_t*
+hash_lock_x_confirm(
+	rw_lock_t*	hash_lock,
+	hash_table_t*	table,
+	ulint		fold)
+{
+	ut_ad(rw_lock_own(hash_lock, RW_LOCK_X));
+
+	rw_lock_t*	hash_lock_tmp = hash_get_lock(table, fold);
+
+	while (hash_lock_tmp != hash_lock) {
+		rw_lock_x_unlock(hash_lock);
+		hash_lock = hash_lock_tmp;
+		rw_lock_x_lock(hash_lock);
+		hash_lock_tmp = hash_get_lock(table, fold);
+	}
+
+	return(hash_lock);
+}
 #endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/ib0mutex.h b/storage/innobase/include/ib0mutex.h
new file mode 100644
index 00000000000..3ea0687da43
--- /dev/null
+++ b/storage/innobase/include/ib0mutex.h
@@ -0,0 +1,1166 @@
+/*****************************************************************************
+
+Copyright (c) 2013, 2015, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/ib0mutex.h
+Policy based mutexes.
+
+Created 2013-03-26 Sunny Bains.
+***********************************************************************/
+
+#ifndef UNIV_INNOCHECKSUM
+
+#ifndef ib0mutex_h
+#define ib0mutex_h
+
+#include "ut0ut.h"
+#include "ut0rnd.h"
+#include "os0event.h"
+
+/** OS mutex for tracking lock/unlock for debugging */
+template <template <typename> class Policy = NoPolicy>
+struct OSTrackMutex {
+
+	typedef Policy<OSTrackMutex> MutexPolicy;
+
+	explicit OSTrackMutex(bool destroy_mutex_at_exit = true)
+		UNIV_NOTHROW
+	{
+		ut_d(m_freed = true);
+		ut_d(m_locked = false);
+		ut_d(m_destroy_at_exit = destroy_mutex_at_exit);
+	}
+
+	~OSTrackMutex() UNIV_NOTHROW
+	{
+		ut_ad(!m_destroy_at_exit || !m_locked);
+	}
+
+	/** Initialise the mutex.
+	@param[in]	id              Mutex ID
+	@param[in]	filename	File where mutex was created
+	@param[in]	line		Line in filename */
+	void init(
+		latch_id_t	id,
+		const char*	filename,
+		uint32_t	line)
+		UNIV_NOTHROW
+	{
+		ut_ad(m_freed);
+		ut_ad(!m_locked);
+
+		m_mutex.init();
+
+		ut_d(m_freed = false);
+
+		m_policy.init(*this, id, filename, line);
+	}
+
+	/** Destroy the mutex */
+	void destroy() UNIV_NOTHROW
+	{
+		ut_ad(!m_locked);
+		ut_ad(innodb_calling_exit || !m_freed);
+
+		m_mutex.destroy();
+
+		ut_d(m_freed = true);
+
+		m_policy.destroy();
+	}
+
+	/** Release the mutex. */
+	void exit() UNIV_NOTHROW
+	{
+		ut_ad(m_locked);
+		ut_d(m_locked = false);
+		ut_ad(innodb_calling_exit || !m_freed);
+
+		m_mutex.exit();
+	}
+
+	/** Acquire the mutex.
+	@param[in]	max_spins	max number of spins
+	@param[in]	max_delay	max delay per spin
+	@param[in]	filename	from where called
+	@param[in]	line		within filename */
+	void enter(
+		uint32_t	max_spins,
+		uint32_t	max_delay,
+		const char*	filename,
+		uint32_t	line)
+		UNIV_NOTHROW
+	{
+		ut_ad(innodb_calling_exit || !m_freed);
+
+		m_mutex.enter();
+
+		ut_ad(!m_locked);
+		ut_d(m_locked = true);
+	}
+
+	/** @return true if locking succeeded */
+	bool try_lock() UNIV_NOTHROW
+	{
+		ut_ad(innodb_calling_exit || !m_freed);
+
+		bool	locked = m_mutex.try_lock();
+
+		if (locked) {
+			ut_ad(!m_locked);
+			ut_d(m_locked = locked);
+		}
+
+		return(locked);
+	}
+
+#ifdef UNIV_DEBUG
+	/** @return true if the thread owns the mutex. */
+	bool is_owned() const
+		UNIV_NOTHROW
+	{
+		return(m_locked && m_policy.is_owned());
+	}
+#endif /* UNIV_DEBUG */
+
+	/** @return non-const version of the policy */
+	MutexPolicy& policy()
+		UNIV_NOTHROW
+	{
+		return(m_policy);
+	}
+
+	/** @return the const version of the policy */
+	const MutexPolicy& policy() const
+		UNIV_NOTHROW
+	{
+		return(m_policy);
+	}
+
+private:
+#ifdef UNIV_DEBUG
+	/** true if the mutex has not be initialized */
+	bool			m_freed;
+
+	/** true if the mutex has been locked. */
+	bool			m_locked;
+
+	/** Do/Dont destroy mutex at exit */
+	bool			m_destroy_at_exit;
+#endif /* UNIV_DEBUG */
+
+	/** OS Mutex instance */
+	OSMutex			m_mutex;
+
+	/** Policy data */
+	MutexPolicy		m_policy;
+};
+
+
+#ifdef HAVE_IB_LINUX_FUTEX
+
+#include <linux/futex.h>
+#include <sys/syscall.h>
+
+/** Mutex implementation that used the Linux futex. */
+template <template <typename> class Policy = NoPolicy>
+struct TTASFutexMutex {
+
+	typedef Policy<TTASFutexMutex> MutexPolicy;
+
+	TTASFutexMutex() UNIV_NOTHROW
+		:
+		m_lock_word(MUTEX_STATE_UNLOCKED)
+	{
+		/* Check that lock_word is aligned. */
+		ut_ad(!((ulint) &m_lock_word % sizeof(ulint)));
+	}
+
+	~TTASFutexMutex()
+	{
+		ut_a(m_lock_word == MUTEX_STATE_UNLOCKED);
+	}
+
+	/** Called when the mutex is "created". Note: Not from the constructor
+	but when the mutex is initialised.
+	@param[in]	id		Mutex ID
+	@param[in]	filename	File where mutex was created
+	@param[in]	line		Line in filename */
+	void init(
+		latch_id_t	id,
+		const char*	filename,
+		uint32_t	line)
+		UNIV_NOTHROW
+	{
+		ut_a(m_lock_word == MUTEX_STATE_UNLOCKED);
+		m_policy.init(*this, id, filename, line);
+	}
+
+	/** Destroy the mutex. */
+	void destroy() UNIV_NOTHROW
+	{
+		/* The destructor can be called at shutdown. */
+		ut_a(m_lock_word == MUTEX_STATE_UNLOCKED);
+		m_policy.destroy();
+	}
+
+	/** Acquire the mutex.
+	@param[in]	max_spins	max number of spins
+	@param[in]	max_delay	max delay per spin
+	@param[in]	filename	from where called
+	@param[in]	line		within filename */
+	void enter(
+		uint32_t	max_spins,
+		uint32_t	max_delay,
+		const char*	filename,
+		uint32_t	line) UNIV_NOTHROW
+	{
+		uint32_t	n_spins;
+		lock_word_t	lock = ttas(max_spins, max_delay, n_spins);
+
+		/* If there were no waiters when this thread tried
+		to acquire the mutex then set the waiters flag now.
+		Additionally, when this thread set the waiters flag it is
+		possible that the mutex had already been released
+		by then. In this case the thread can assume it
+		was granted the mutex. */
+
+		uint32_t	n_waits;
+
+		if (lock != MUTEX_STATE_UNLOCKED) {
+
+			if (lock != MUTEX_STATE_LOCKED || !set_waiters()) {
+
+				n_waits = wait();
+			} else {
+				n_waits = 0;
+			}
+
+		} else {
+			n_waits = 0;
+		}
+
+		m_policy.add(n_spins, n_waits);
+	}
+
+	/** Release the mutex. */
+	void exit() UNIV_NOTHROW
+	{
+		/* If there are threads waiting then we have to wake
+		them up. Reset the lock state to unlocked so that waiting
+		threads can test for success. */
+
+		os_rmb;
+
+		if (state() == MUTEX_STATE_WAITERS) {
+
+			m_lock_word = MUTEX_STATE_UNLOCKED;
+
+		} else if (unlock() == MUTEX_STATE_LOCKED) {
+			/* No threads waiting, no need to signal a wakeup. */
+			return;
+		}
+
+		signal();
+	}
+
+	/** Try and lock the mutex.
+	@return the old state of the mutex */
+	lock_word_t trylock() UNIV_NOTHROW
+	{
+		return(CAS(&m_lock_word,
+			    MUTEX_STATE_UNLOCKED, MUTEX_STATE_LOCKED));
+	}
+
+	/** Try and lock the mutex.
+	@return true if successful */
+	bool try_lock() UNIV_NOTHROW
+	{
+		return(trylock() == MUTEX_STATE_UNLOCKED);
+	}
+
+	/** @return true if mutex is unlocked */
+	bool is_locked() const UNIV_NOTHROW
+	{
+		return(state() != MUTEX_STATE_UNLOCKED);
+	}
+
+#ifdef UNIV_DEBUG
+	/** @return true if the thread owns the mutex. */
+	bool is_owned() const UNIV_NOTHROW
+	{
+		return(is_locked() && m_policy.is_owned());
+	}
+#endif /* UNIV_DEBUG */
+
+	/** @return non-const version of the policy */
+	MutexPolicy& policy() UNIV_NOTHROW
+	{
+		return(m_policy);
+	}
+
+	/** @return const version of the policy */
+	const MutexPolicy& policy() const UNIV_NOTHROW
+	{
+		return(m_policy);
+	}
+private:
+	/** @return the lock state. */
+	lock_word_t state() const UNIV_NOTHROW
+	{
+		return(m_lock_word);
+	}
+
+	/** Release the mutex.
+	@return the new state of the mutex */
+	lock_word_t unlock() UNIV_NOTHROW
+	{
+		return(TAS(&m_lock_word, MUTEX_STATE_UNLOCKED));
+	}
+
+	/** Note that there are threads waiting and need to be woken up.
+	@return true if state was MUTEX_STATE_UNLOCKED (ie. granted) */
+	bool set_waiters() UNIV_NOTHROW
+	{
+		return(TAS(&m_lock_word, MUTEX_STATE_WAITERS)
+		       == MUTEX_STATE_UNLOCKED);
+	}
+
+	/** Set the waiters flag, only if the mutex is locked
+	@return true if succesful. */
+	bool try_set_waiters() UNIV_NOTHROW
+	{
+		return(CAS(&m_lock_word,
+			   MUTEX_STATE_LOCKED, MUTEX_STATE_WAITERS)
+		       != MUTEX_STATE_UNLOCKED);
+	}
+
+	/** Wait if the lock is contended.
+	@return the number of waits */
+	uint32_t wait() UNIV_NOTHROW
+	{
+		uint32_t	n_waits = 0;
+
+		/* Use FUTEX_WAIT_PRIVATE because our mutexes are
+		not shared between processes. */
+
+		do {
+			++n_waits;
+
+			syscall(SYS_futex, &m_lock_word,
+				FUTEX_WAIT_PRIVATE, MUTEX_STATE_WAITERS,
+				0, 0, 0);
+
+			// Since we are retrying the operation the return
+			// value doesn't matter.
+
+		} while (!set_waiters());
+
+		return(n_waits);
+	}
+
+	/** Wakeup a waiting thread */
+	void signal() UNIV_NOTHROW
+	{
+		syscall(SYS_futex, &m_lock_word, FUTEX_WAKE_PRIVATE,
+			MUTEX_STATE_LOCKED, 0, 0, 0);
+	}
+
+	/** Poll waiting for mutex to be unlocked.
+	@param[in]	max_spins	max spins
+	@param[in]	max_delay	max delay per spin
+	@param[out]	n_spins		retries before acquire
+	@return value of lock word before locking. */
+	lock_word_t ttas(
+		uint32_t	max_spins,
+		uint32_t	max_delay,
+		uint32_t&	n_spins) UNIV_NOTHROW
+	{
+		os_rmb;
+
+		for (n_spins = 0; n_spins < max_spins; ++n_spins) {
+
+			if (!is_locked()) {
+
+				lock_word_t	lock = trylock();
+
+				if (lock == MUTEX_STATE_UNLOCKED) {
+					/* Lock successful */
+					return(lock);
+				}
+			}
+
+			ut_delay(ut_rnd_interval(0, max_delay));
+		}
+
+		return(trylock());
+	}
+
+private:
+
+	/** Policy data */
+	MutexPolicy		m_policy;
+
+	/** lock_word is the target of the atomic test-and-set instruction
+	when atomic operations are enabled. */
+	lock_word_t		m_lock_word MY_ALIGNED(MY_ALIGNOF(ulint));
+};
+
+#endif /* HAVE_IB_LINUX_FUTEX */
+
+template <template <typename> class Policy = NoPolicy>
+struct TTASMutex {
+
+	typedef Policy<TTASMutex> MutexPolicy;
+
+	TTASMutex() UNIV_NOTHROW
+		:
+		m_lock_word(MUTEX_STATE_UNLOCKED)
+	{
+		/* Check that lock_word is aligned. */
+		ut_ad(!((ulint) &m_lock_word % sizeof(ulint)));
+	}
+
+	~TTASMutex()
+	{
+		ut_ad(m_lock_word == MUTEX_STATE_UNLOCKED);
+	}
+
+	/** Called when the mutex is "created". Note: Not from the constructor
+	but when the mutex is initialised.
+	@param[in]	id		Mutex ID
+	@param[in]	filename	File where mutex was created
+	@param[in]	line		Line in filename */
+	void init(
+		latch_id_t	id,
+		const char*	filename,
+		uint32_t	line)
+		UNIV_NOTHROW
+	{
+		ut_ad(m_lock_word == MUTEX_STATE_UNLOCKED);
+		m_policy.init(*this, id, filename, line);
+	}
+
+	/** Destroy the mutex. */
+	void destroy() UNIV_NOTHROW
+	{
+		/* The destructor can be called at shutdown. */
+		ut_ad(m_lock_word == MUTEX_STATE_UNLOCKED);
+		m_policy.destroy();
+	}
+
+	/**
+	Try and acquire the lock using TestAndSet.
+	@return	true if lock succeeded */
+	bool tas_lock() UNIV_NOTHROW
+	{
+		return(TAS(&m_lock_word, MUTEX_STATE_LOCKED)
+			== MUTEX_STATE_UNLOCKED);
+	}
+
+	/** In theory __sync_lock_release should be used to release the lock.
+	Unfortunately, it does not work properly alone. The workaround is
+	that more conservative __sync_lock_test_and_set is used instead. */
+	void tas_unlock() UNIV_NOTHROW
+	{
+#ifdef UNIV_DEBUG
+		ut_ad(state() == MUTEX_STATE_LOCKED);
+
+		lock_word_t	lock =
+#endif /* UNIV_DEBUG */
+
+		TAS(&m_lock_word, MUTEX_STATE_UNLOCKED);
+
+		ut_ad(lock == MUTEX_STATE_LOCKED);
+	}
+
+	/** Try and lock the mutex.
+	@return true on success */
+	bool try_lock() UNIV_NOTHROW
+	{
+		return(tas_lock());
+	}
+
+	/** Release the mutex. */
+	void exit() UNIV_NOTHROW
+	{
+		tas_unlock();
+	}
+
+	/** Acquire the mutex.
+	@param max_spins	max number of spins
+	@param max_delay	max delay per spin
+	@param filename		from where called
+	@param line		within filename */
+	void enter(
+		uint32_t	max_spins,
+		uint32_t	max_delay,
+		const char*	filename,
+		uint32_t	line) UNIV_NOTHROW
+	{
+		if (!try_lock()) {
+
+			uint32_t	n_spins = ttas(max_spins, max_delay);
+
+			/* No OS waits for spin mutexes */
+			m_policy.add(n_spins, 0);
+		}
+	}
+
+	/** @return the lock state. */
+	lock_word_t state() const UNIV_NOTHROW
+	{
+		return(m_lock_word);
+	}
+
+	/** @return true if locked by some thread */
+	bool is_locked() const UNIV_NOTHROW
+	{
+		return(m_lock_word != MUTEX_STATE_UNLOCKED);
+	}
+
+#ifdef UNIV_DEBUG
+	/** @return true if the calling thread owns the mutex. */
+	bool is_owned() const UNIV_NOTHROW
+	{
+		return(is_locked() && m_policy.is_owned());
+	}
+#endif /* UNIV_DEBUG */
+
+	/** @return non-const version of the policy */
+	MutexPolicy& policy() UNIV_NOTHROW
+	{
+		return(m_policy);
+	}
+
+	/** @return const version of the policy */
+	const MutexPolicy& policy() const UNIV_NOTHROW
+	{
+		return(m_policy);
+	}
+
+private:
+	/** Spin and try to acquire the lock.
+	@param[in]	max_spins	max spins
+	@param[in]	max_delay	max delay per spin
+	@return number spins before acquire */
+	uint32_t ttas(
+		uint32_t	max_spins,
+		uint32_t	max_delay)
+		UNIV_NOTHROW
+	{
+		uint32_t	i = 0;
+		const uint32_t	step = max_spins;
+
+		os_rmb;
+
+		do {
+			while (is_locked()) {
+
+				ut_delay(ut_rnd_interval(0, max_delay));
+
+				++i;
+
+				if (i >= max_spins) {
+
+					max_spins += step;
+
+					os_thread_yield();
+
+					break;
+				}
+			}
+
+		} while (!try_lock());
+
+		return(i);
+	}
+
+private:
+	// Disable copying
+	TTASMutex(const TTASMutex&);
+	TTASMutex& operator=(const TTASMutex&);
+
+	/** Policy data */
+	MutexPolicy		m_policy;
+
+	/** lock_word is the target of the atomic test-and-set instruction
+	when atomic operations are enabled. */
+	lock_word_t		m_lock_word;
+};
+
+template <template <typename> class Policy = NoPolicy>
+struct TTASEventMutex {
+
+	typedef Policy<TTASEventMutex> MutexPolicy;
+
+	TTASEventMutex()
+		UNIV_NOTHROW
+		:
+		m_lock_word(MUTEX_STATE_UNLOCKED),
+		m_waiters(),
+		m_event()
+	{
+		/* Check that lock_word is aligned. */
+		ut_ad(!((ulint) &m_lock_word % sizeof(ulint)));
+	}
+
+	~TTASEventMutex()
+		UNIV_NOTHROW
+	{
+		ut_ad(m_lock_word == MUTEX_STATE_UNLOCKED);
+	}
+
+	/** Called when the mutex is "created". Note: Not from the constructor
+	but when the mutex is initialised.
+	@param[in]	id		Mutex ID
+	@param[in]	filename	File where mutex was created
+	@param[in]	line		Line in filename */
+	void init(
+		latch_id_t	id,
+		const char*	filename,
+		uint32_t	line)
+		UNIV_NOTHROW
+	{
+		ut_a(m_event == 0);
+		ut_a(m_lock_word == MUTEX_STATE_UNLOCKED);
+
+		m_event = os_event_create(sync_latch_get_name(id));
+
+		m_policy.init(*this, id, filename, line);
+	}
+
+	/** This is the real desctructor. This mutex can be created in BSS and
+	its desctructor will be called on exit(). We can't call
+	os_event_destroy() at that stage. */
+	void destroy()
+		UNIV_NOTHROW
+	{
+		ut_ad(m_lock_word == MUTEX_STATE_UNLOCKED);
+
+		/* We have to free the event before InnoDB shuts down. */
+		os_event_destroy(m_event);
+		m_event = 0;
+
+		m_policy.destroy();
+	}
+
+	/** Try and lock the mutex. Note: POSIX returns 0 on success.
+	@return true on success */
+	bool try_lock()
+		UNIV_NOTHROW
+	{
+		return(tas_lock());
+	}
+
+	/** Release the mutex. */
+	void exit()
+		UNIV_NOTHROW
+	{
+		/* A problem: we assume that mutex_reset_lock word
+		is a memory barrier, that is when we read the waiters
+		field next, the read must be serialized in memory
+		after the reset. A speculative processor might
+		perform the read first, which could leave a waiting
+		thread hanging indefinitely.
+
+		Our current solution call every second
+		sync_arr_wake_threads_if_sema_free()
+		to wake up possible hanging threads if they are missed
+		in mutex_signal_object. */
+
+		tas_unlock();
+
+		if (m_waiters != 0) {
+			signal();
+		}
+	}
+
+	/** Acquire the mutex.
+	@param[in]	max_spins	max number of spins
+	@param[in]	max_delay	max delay per spin
+	@param[in]	filename	from where called
+	@param[in]	line		within filename */
+	void enter(
+		uint32_t	max_spins,
+		uint32_t	max_delay,
+		const char*	filename,
+		uint32_t	line)
+		UNIV_NOTHROW
+	{
+		if (!try_lock()) {
+			spin_and_try_lock(max_spins, max_delay, filename, line);
+		}
+	}
+
+	/** @return the lock state. */
+	lock_word_t state() const
+		UNIV_NOTHROW
+	{
+		return(m_lock_word);
+	}
+
+	/** The event that the mutex will wait in sync0arr.cc
+	@return even instance */
+	os_event_t event()
+		UNIV_NOTHROW
+	{
+		return(m_event);
+	}
+
+	/** @return true if locked by some thread */
+	bool is_locked() const
+		UNIV_NOTHROW
+	{
+		return(m_lock_word != MUTEX_STATE_UNLOCKED);
+	}
+
+#ifdef UNIV_DEBUG
+	/** @return true if the calling thread owns the mutex. */
+	bool is_owned() const
+		UNIV_NOTHROW
+	{
+		return(is_locked() && m_policy.is_owned());
+	}
+#endif /* UNIV_DEBUG */
+
+	/** @return non-const version of the policy */
+	MutexPolicy& policy()
+		UNIV_NOTHROW
+	{
+		return(m_policy);
+	}
+
+	/** @return const version of the policy */
+	const MutexPolicy& policy() const
+		UNIV_NOTHROW
+	{
+		return(m_policy);
+	}
+
+private:
+	/** Wait in the sync array.
+	@param[in]	filename	from where it was called
+	@param[in]	line		line number in file
+	@param[in]	spin		retry this many times again
+	@return true if the mutex acquisition was successful. */
+	bool wait(
+		const char*	filename,
+		uint32_t	line,
+		uint32_t	spin)
+		UNIV_NOTHROW;
+
+	/** Spin and wait for the mutex to become free.
+	@param[in]	max_spins	max spins
+	@param[in]	max_delay	max delay per spin
+	@param[in,out]	n_spins		spin start index
+	@return true if unlocked */
+	bool is_free(
+		uint32_t	max_spins,
+		uint32_t	max_delay,
+		uint32_t&	n_spins) const
+		UNIV_NOTHROW
+	{
+		ut_ad(n_spins <= max_spins);
+
+		/* Spin waiting for the lock word to become zero. Note
+		that we do not have to assume that the read access to
+		the lock word is atomic, as the actual locking is always
+		committed with atomic test-and-set. In reality, however,
+		all processors probably have an atomic read of a memory word. */
+
+		do {
+			if (!is_locked()) {
+				return(true);
+			}
+
+			ut_delay(ut_rnd_interval(0, max_delay));
+
+			++n_spins;
+
+		} while (n_spins < max_spins);
+
+		return(false);
+	}
+
+	/** Spin while trying to acquire the mutex
+	@param[in]	max_spins	max number of spins
+	@param[in]	max_delay	max delay per spin
+	@param[in]	filename	from where called
+	@param[in]	line		within filename */
+	void spin_and_try_lock(
+		uint32_t	max_spins,
+		uint32_t	max_delay,
+		const char*	filename,
+		uint32_t	line)
+		UNIV_NOTHROW
+	{
+		uint32_t	n_spins = 0;
+		uint32_t	n_waits = 0;
+		const uint32_t	step = max_spins;
+
+		os_rmb;
+
+		for (;;) {
+
+			/* If the lock was free then try and acquire it. */
+
+			if (is_free(max_spins, max_delay, n_spins)) {
+
+				if (try_lock()) {
+
+					break;
+				} else {
+
+					continue;
+				}
+
+			} else {
+				max_spins = n_spins + step;
+			}
+
+			++n_waits;
+
+			os_thread_yield();
+
+			/* The 4 below is a heuristic that has existed for a
+			very long time now. It is unclear if changing this
+			value will make a difference.
+
+			NOTE: There is a delay that happens before the retry,
+			finding a free slot in the sync arary and the yield
+			above. Otherwise we could have simply done the extra
+			spin above. */
+
+			if (wait(filename, line, 4)) {
+
+				n_spins += 4;
+
+				break;
+			}
+		}
+
+		/* Waits and yields will be the same number in our
+		mutex design */
+
+		m_policy.add(n_spins, n_waits);
+	}
+
+	/** @return the value of the m_waiters flag */
+	lock_word_t waiters() UNIV_NOTHROW
+	{
+		return(m_waiters);
+	}
+
+	/** Note that there are threads waiting on the mutex */
+	void set_waiters() UNIV_NOTHROW
+	{
+		m_waiters = 1;
+		os_wmb;
+	}
+
+	/** Note that there are no threads waiting on the mutex */
+	void clear_waiters() UNIV_NOTHROW
+	{
+		m_waiters = 0;
+		os_wmb;
+	}
+
+	/** Try and acquire the lock using TestAndSet.
+	@return	true if lock succeeded */
+	bool tas_lock() UNIV_NOTHROW
+	{
+		return(TAS(&m_lock_word, MUTEX_STATE_LOCKED)
+			== MUTEX_STATE_UNLOCKED);
+	}
+
+	/** In theory __sync_lock_release should be used to release the lock.
+	Unfortunately, it does not work properly alone. The workaround is
+	that more conservative __sync_lock_test_and_set is used instead. */
+	void tas_unlock() UNIV_NOTHROW
+	{
+		TAS(&m_lock_word, MUTEX_STATE_UNLOCKED);
+	}
+
+	/** Wakeup any waiting thread(s). */
+	void signal() UNIV_NOTHROW;
+
+private:
+	/** Disable copying */
+	TTASEventMutex(const TTASEventMutex&);
+	TTASEventMutex& operator=(const TTASEventMutex&);
+
+	/** lock_word is the target of the atomic test-and-set instruction
+	when atomic operations are enabled. */
+	lock_word_t		m_lock_word;
+
+	/** Set to 0 or 1. 1 if there are (or may be) threads waiting
+	in the global wait array for this mutex to be released. */
+	lock_word_t		m_waiters;
+
+	/** Used by sync0arr.cc for the wait queue */
+	os_event_t		m_event;
+
+	/** Policy data */
+	MutexPolicy		m_policy;
+};
+
+/** Mutex interface for all policy mutexes. This class handles the interfacing
+with the Performance Schema instrumentation. */
+template <typename MutexImpl>
+struct PolicyMutex
+{
+	typedef MutexImpl MutexType;
+	typedef typename MutexImpl::MutexPolicy Policy;
+
+	PolicyMutex() UNIV_NOTHROW : m_impl()
+	{
+#ifdef UNIV_PFS_MUTEX
+		m_ptr = 0;
+#endif /* UNIV_PFS_MUTEX */
+	}
+
+	~PolicyMutex() { }
+
+	/** @return non-const version of the policy */
+	Policy& policy() UNIV_NOTHROW
+	{
+		return(m_impl.policy());
+	}
+
+	/** @return const version of the policy */
+	const Policy& policy() const UNIV_NOTHROW
+	{
+		return(m_impl.policy());
+	}
+
+	/** Release the mutex. */
+	void exit() UNIV_NOTHROW
+	{
+#ifdef UNIV_PFS_MUTEX
+		pfs_exit();
+#endif /* UNIV_PFS_MUTEX */
+
+		policy().release(m_impl);
+
+		m_impl.exit();
+	}
+
+	/** Acquire the mutex.
+	@param n_spins	max number of spins
+	@param n_delay	max delay per spin
+	@param name	filename where locked
+	@param line	line number where locked */
+	void enter(
+		uint32_t	n_spins,
+		uint32_t	n_delay,
+		const char*	name,
+		uint32_t	line) UNIV_NOTHROW
+	{
+#ifdef UNIV_PFS_MUTEX
+		/* Note: locker is really an alias for state. That's why
+		it has to be in the same scope during pfs_end(). */
+
+		PSI_mutex_locker_state	state;
+		PSI_mutex_locker*	locker;
+
+		locker = pfs_begin_lock(&state, name, line);
+#endif /* UNIV_PFS_MUTEX */
+
+		policy().enter(m_impl, name, line);
+
+		m_impl.enter(n_spins, n_delay, name, line);
+
+		policy().locked(m_impl, name, line);
+#ifdef UNIV_PFS_MUTEX
+		pfs_end(locker, 0);
+#endif /* UNIV_PFS_MUTEX */
+	}
+
+	/** Try and lock the mutex, return 0 on SUCCESS and 1 otherwise.
+	@param name	filename where locked
+	@param line	line number where locked */
+	int trylock(const char* name, uint32_t line) UNIV_NOTHROW
+	{
+#ifdef UNIV_PFS_MUTEX
+		/* Note: locker is really an alias for state. That's why
+		it has to be in the same scope during pfs_end(). */
+
+		PSI_mutex_locker_state	state;
+		PSI_mutex_locker*	locker;
+
+		locker = pfs_begin_trylock(&state, name, line);
+#endif /* UNIV_PFS_MUTEX */
+
+		/* There is a subtlety here, we check the mutex ordering
+		after locking here. This is only done to avoid add and
+		then remove if the trylock was unsuccesful. */
+
+		int ret = m_impl.try_lock() ? 0 : 1;
+
+		if (ret == 0) {
+
+			policy().enter(m_impl, name, line);
+
+			policy().locked(m_impl, name, line);
+		}
+
+#ifdef UNIV_PFS_MUTEX
+		pfs_end(locker, 0);
+#endif /* UNIV_PFS_MUTEX */
+
+		return(ret);
+	}
+
+#ifdef UNIV_DEBUG
+	/** @return true if the thread owns the mutex. */
+	bool is_owned() const UNIV_NOTHROW
+	{
+		return(m_impl.is_owned());
+	}
+#endif /* UNIV_DEBUG */
+
+	/**
+	Initialise the mutex.
+
+	@param[in]	id              Mutex ID
+	@param[in]	filename	file where created
+	@param[in]	line		line number in file where created */
+	void init(
+		latch_id_t      id,
+		const char*	filename,
+		uint32_t	line)
+		UNIV_NOTHROW
+	{
+#ifdef UNIV_PFS_MUTEX
+		pfs_add(sync_latch_get_pfs_key(id));
+#endif /* UNIV_PFS_MUTEX */
+
+		m_impl.init(id, filename, line);
+	}
+
+	/** Free resources (if any) */
+	void destroy() UNIV_NOTHROW
+	{
+#ifdef UNIV_PFS_MUTEX
+		pfs_del();
+#endif /* UNIV_PFS_MUTEX */
+		m_impl.destroy();
+	}
+
+	/** Required for os_event_t */
+	operator sys_mutex_t*() UNIV_NOTHROW
+	{
+		return(m_impl.operator sys_mutex_t*());
+	}
+
+#ifdef UNIV_PFS_MUTEX
+	/** Performance schema monitoring - register mutex with PFS.
+
+	Note: This is public only because we want to get around an issue
+	with registering a subset of buffer pool pages with PFS when
+	PFS_GROUP_BUFFER_SYNC is defined. Therefore this has to then
+	be called by external code (see buf0buf.cc).
+
+	@param key - Performance Schema key. */
+	void pfs_add(mysql_pfs_key_t key) UNIV_NOTHROW
+	{
+		ut_ad(m_ptr == 0);
+		m_ptr = PSI_MUTEX_CALL(init_mutex)(key, this);
+	}
+
+private:
+
+	/** Performance schema monitoring.
+	@param state - PFS locker state
+	@param name - file name where locked
+	@param line - line number in file where locked */
+	PSI_mutex_locker* pfs_begin_lock(
+		PSI_mutex_locker_state*	state,
+		const char*		name,
+		uint32_t		line) UNIV_NOTHROW
+	{
+		if (m_ptr != 0) {
+			return(PSI_MUTEX_CALL(start_mutex_wait)(
+					state, m_ptr,
+					PSI_MUTEX_LOCK, name, (uint) line));
+		}
+
+		return(0);
+	}
+
+	/** Performance schema monitoring.
+	@param state - PFS locker state
+	@param name - file name where locked
+	@param line - line number in file where locked */
+	PSI_mutex_locker* pfs_begin_trylock(
+		PSI_mutex_locker_state*	state,
+		const char*		name,
+		uint32_t		line) UNIV_NOTHROW
+	{
+		if (m_ptr != 0) {
+			return(PSI_MUTEX_CALL(start_mutex_wait)(
+					state, m_ptr,
+					PSI_MUTEX_TRYLOCK, name, (uint) line));
+		}
+
+		return(0);
+	}
+
+	/** Performance schema monitoring
+	@param locker - PFS identifier
+	@param ret - 0 for success and 1 for failure */
+	void pfs_end(PSI_mutex_locker* locker, int ret) UNIV_NOTHROW
+	{
+		if (locker != 0) {
+			PSI_MUTEX_CALL(end_mutex_wait)(locker, ret);
+		}
+	}
+
+	/** Performance schema monitoring - register mutex release */
+	void pfs_exit()
+	{
+		if (m_ptr != 0) {
+			PSI_MUTEX_CALL(unlock_mutex)(m_ptr);
+		}
+	}
+
+	/** Performance schema monitoring - deregister */
+	void pfs_del()
+	{
+		if (m_ptr != 0) {
+			PSI_MUTEX_CALL(destroy_mutex)(m_ptr);
+			m_ptr = 0;
+		}
+	}
+#endif /* UNIV_PFS_MUTEX */
+
+private:
+	/** The mutex implementation */
+	MutexImpl		m_impl;
+
+#ifdef UNIV_PFS_MUTEX
+	/** The performance schema instrumentation hook. */
+	PSI_mutex*		m_ptr;
+#endif /* UNIV_PFS_MUTEX */
+
+};
+
+#endif /* ib0mutex_h */
+
+#endif /* !UNIV_INNOCHECKSUM */
diff --git a/storage/innobase/include/ibuf0ibuf.h b/storage/innobase/include/ibuf0ibuf.h
index 09c48822b9f..f3fd5e9a364 100644
--- a/storage/innobase/include/ibuf0ibuf.h
+++ b/storage/innobase/include/ibuf0ibuf.h
@@ -94,13 +94,11 @@ free bits could momentarily be set too high. */
 /******************************************************************//**
 Creates the insert buffer data structure at a database startup.
 @return DB_SUCCESS or failure */
-UNIV_INTERN
 dberr_t
 ibuf_init_at_db_start(void);
 /*=======================*/
 /*********************************************************************//**
 Updates the max_size value for ibuf. */
-UNIV_INTERN
 void
 ibuf_max_size_update(
 /*=================*/
@@ -109,7 +107,6 @@ ibuf_max_size_update(
 /*********************************************************************//**
 Reads the biggest tablespace id from the high end of the insert buffer
 tree and updates the counter in fil_system. */
-UNIV_INTERN
 void
 ibuf_update_max_tablespace_id(void);
 /*===============================*/
@@ -131,7 +128,6 @@ ibuf_mtr_commit(
 	MY_ATTRIBUTE((nonnull));
 /*********************************************************************//**
 Initializes an ibuf bitmap page. */
-UNIV_INTERN
 void
 ibuf_bitmap_page_init(
 /*==================*/
@@ -146,7 +142,6 @@ buffer bitmap must never exceed the free space on a page.  It is safe
 to decrement or reset the bits in the bitmap in a mini-transaction
 that is committed before the mini-transaction that affects the free
 space. */
-UNIV_INTERN
 void
 ibuf_reset_free_bits(
 /*=================*/
@@ -189,7 +184,6 @@ thread until mtr is committed.  NOTE: The free bits in the insert
 buffer bitmap must never exceed the free space on a page.  It is safe
 to set the free bits in the same mini-transaction that updated the
 page. */
-UNIV_INTERN
 void
 ibuf_update_free_bits_low(
 /*======================*/
@@ -208,7 +202,6 @@ thread until mtr is committed.  NOTE: The free bits in the insert
 buffer bitmap must never exceed the free space on a page.  It is safe
 to set the free bits in the same mini-transaction that updated the
 page. */
-UNIV_INTERN
 void
 ibuf_update_free_bits_zip(
 /*======================*/
@@ -221,12 +214,9 @@ virtually prevent any further operations until mtr is committed.
 NOTE: The free bits in the insert buffer bitmap must never exceed the
 free space on a page.  It is safe to set the free bits in the same
 mini-transaction that updated the pages. */
-UNIV_INTERN
 void
 ibuf_update_free_bits_for_two_pages_low(
 /*====================================*/
-	ulint		zip_size,/*!< in: compressed page size in bytes;
-				0 for uncompressed pages */
 	buf_block_t*	block1,	/*!< in: index page */
 	buf_block_t*	block2,	/*!< in: index page */
 	mtr_t*		mtr);	/*!< in: mtr */
@@ -254,114 +244,116 @@ ibool
 ibuf_inside(
 /*========*/
 	const mtr_t*	mtr)	/*!< in: mini-transaction */
-	MY_ATTRIBUTE((nonnull, pure));
-/***********************************************************************//**
-Checks if a page address is an ibuf bitmap page (level 3 page) address.
-@return	TRUE if a bitmap page */
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Checks if a page address is an ibuf bitmap page (level 3 page) address.
+@param[in]	page_id		page id
+@param[in]	page_size	page size
+@return TRUE if a bitmap page */
 UNIV_INLINE
 ibool
 ibuf_bitmap_page(
-/*=============*/
-	ulint	zip_size,/*!< in: compressed page size in bytes;
-			0 for uncompressed pages */
-	ulint	page_no);/*!< in: page number */
-/***********************************************************************//**
-Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages.
-Must not be called when recv_no_ibuf_operations==TRUE.
-@return	TRUE if level 2 or level 3 page */
-UNIV_INTERN
+	const page_id_t&	page_id,
+	const page_size_t&	page_size);
+
+/** Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages.
+Must not be called when recv_no_ibuf_operations==true.
+@param[in]	page_id		page id
+@param[in]	page_size	page size
+@param[in]	x_latch		FALSE if relaxed check (avoid latching the
+bitmap page)
+@param[in]	file		file name
+@param[in]	line		line where called
+@param[in,out]	mtr		mtr which will contain an x-latch to the
+bitmap page if the page is not one of the fixed address ibuf pages, or NULL,
+in which case a new transaction is created.
+@return TRUE if level 2 or level 3 page */
 ibool
 ibuf_page_low(
-/*==========*/
-	ulint		space,	/*!< in: space id */
-	ulint		zip_size,/*!< in: compressed page size in bytes, or 0 */
-	ulint		page_no,/*!< in: page number */
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
 #ifdef UNIV_DEBUG
-	ibool		x_latch,/*!< in: FALSE if relaxed check
-				(avoid latching the bitmap page) */
+	ibool			x_latch,
 #endif /* UNIV_DEBUG */
-	const char*	file,	/*!< in: file name */
-	ulint		line,	/*!< in: line where called */
-	mtr_t*		mtr)	/*!< in: mtr which will contain an
-				x-latch to the bitmap page if the page
-				is not one of the fixed address ibuf
-				pages, or NULL, in which case a new
-				transaction is created. */
+	const char*		file,
+	ulint			line,
+	mtr_t*			mtr)
 	MY_ATTRIBUTE((warn_unused_result));
+
 #ifdef UNIV_DEBUG
-/** Checks if a page is a level 2 or 3 page in the ibuf hierarchy of
-pages.  Must not be called when recv_no_ibuf_operations==TRUE.
-@param space	tablespace identifier
-@param zip_size	compressed page size in bytes, or 0
-@param page_no	page number
-@param mtr	mini-transaction or NULL
+
+/** Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages.
+Must not be called when recv_no_ibuf_operations==true.
+@param[in]	page_id		tablespace/page identifier
+@param[in]	page_size	page size
+@param[in,out]	mtr		mini-transaction or NULL
 @return TRUE if level 2 or level 3 page */
-# define ibuf_page(space, zip_size, page_no, mtr)			\
-	ibuf_page_low(space, zip_size, page_no, TRUE, __FILE__, __LINE__, mtr)
+# define ibuf_page(page_id, page_size, mtr)	\
+	ibuf_page_low(page_id, page_size, TRUE, __FILE__, __LINE__, mtr)
+
 #else /* UVIV_DEBUG */
-/** Checks if a page is a level 2 or 3 page in the ibuf hierarchy of
-pages.  Must not be called when recv_no_ibuf_operations==TRUE.
-@param space	tablespace identifier
-@param zip_size	compressed page size in bytes, or 0
-@param page_no	page number
-@param mtr	mini-transaction or NULL
+
+/** Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages.
+Must not be called when recv_no_ibuf_operations==true.
+@param[in]	page_id		tablespace/page identifier
+@param[in]	page_size	page size
+@param[in,out]	mtr		mini-transaction or NULL
 @return TRUE if level 2 or level 3 page */
-# define ibuf_page(space, zip_size, page_no, mtr)			\
-	ibuf_page_low(space, zip_size, page_no, __FILE__, __LINE__, mtr)
+# define ibuf_page(page_id, page_size, mtr)	\
+	ibuf_page_low(page_id, page_size, __FILE__, __LINE__, mtr)
+
 #endif /* UVIV_DEBUG */
 /***********************************************************************//**
 Frees excess pages from the ibuf free list. This function is called when an OS
 thread calls fsp services to allocate a new file segment, or a new page to a
 file segment, and the thread did not own the fsp latch before this call. */
-UNIV_INTERN
 void
 ibuf_free_excess_pages(void);
 /*========================*/
-/*********************************************************************//**
-Buffer an operation in the insert/delete buffer, instead of doing it
+
+/** Buffer an operation in the insert/delete buffer, instead of doing it
 directly to the disk page, if this is possible. Does not do it if the index
 is clustered or unique.
-@return	TRUE if success */
-UNIV_INTERN
+@param[in]	op		operation type
+@param[in]	entry		index entry to insert
+@param[in,out]	index		index where to insert
+@param[in]	page_id		page id where to insert
+@param[in]	page_size	page size
+@param[in,out]	thr		query thread
+@return TRUE if success */
 ibool
 ibuf_insert(
-/*========*/
-	ibuf_op_t	op,	/*!< in: operation type */
-	const dtuple_t*	entry,	/*!< in: index entry to insert */
-	dict_index_t*	index,	/*!< in: index where to insert */
-	ulint		space,	/*!< in: space id where to insert */
-	ulint		zip_size,/*!< in: compressed page size in bytes, or 0 */
-	ulint		page_no,/*!< in: page number where to insert */
-	que_thr_t*	thr);	/*!< in: query thread */
-/*********************************************************************//**
-When an index page is read from a disk to the buffer pool, this function
+	ibuf_op_t		op,
+	const dtuple_t*		entry,
+	dict_index_t*		index,
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	que_thr_t*		thr);
+
+/** When an index page is read from a disk to the buffer pool, this function
 applies any buffered operations to the page and deletes the entries from the
 insert buffer. If the page is not read, but created in the buffer pool, this
 function deletes its buffered entries from the insert buffer; there can
 exist entries for such a page if the page belonged to an index which
-subsequently was dropped. */
-UNIV_INTERN
+subsequently was dropped.
+@param[in,out]	block			if page has been read from disk,
+pointer to the page x-latched, else NULL
+@param[in]	page_id			page id of the index page
+@param[in]	update_ibuf_bitmap	normally this is set to TRUE, but
+if we have deleted or are deleting the tablespace, then we naturally do not
+want to update a non-existent bitmap page */
 void
 ibuf_merge_or_delete_for_page(
-/*==========================*/
-	buf_block_t*	block,	/*!< in: if page has been read from
-				disk, pointer to the page x-latched,
-				else NULL */
-	ulint		space,	/*!< in: space id of the index page */
-	ulint		page_no,/*!< in: page number of the index page */
-	ulint		zip_size,/*!< in: compressed page size in bytes,
-				or 0 */
-	ibool		update_ibuf_bitmap);/*!< in: normally this is set
-				to TRUE, but if we have deleted or are
-				deleting the tablespace, then we
-				naturally do not want to update a
-				non-existent bitmap page */
+	buf_block_t*		block,
+	const page_id_t&	page_id,
+	const page_size_t*	page_size,
+	ibool			update_ibuf_bitmap);
+
 /*********************************************************************//**
 Deletes all entries in the insert buffer for a given space id. This is used
 in DISCARD TABLESPACE and IMPORT TABLESPACE.
 NOTE: this does not update the page free bitmaps in the space. The space will
 become CORRUPT when you call this function! */
-UNIV_INTERN
 void
 ibuf_delete_for_discarded_space(
 /*============================*/
@@ -373,19 +365,13 @@ based on the current size of the change buffer.
 @return a lower limit for the combined size in bytes of entries which
 will be merged from ibuf trees to the pages read, 0 if ibuf is
 empty */
-UNIV_INTERN
 ulint
 ibuf_merge_in_background(
-	bool	full);	/*!< in: TRUE if the caller wants to
-			do a full contract based on PCT_IO(100).
-			If FALSE then the size of contract
-			batch is determined based on the
-			current size of the ibuf tree. */
+	bool	full);
 
 /** Contracts insert buffer trees by reading pages referring to space_id
 to the buffer pool.
 @returns number of pages merged.*/
-UNIV_INTERN
 ulint
 ibuf_merge_space(
 /*=============*/
@@ -394,8 +380,7 @@ ibuf_merge_space(
 #endif /* !UNIV_HOTBACKUP */
 /*********************************************************************//**
 Parses a redo log record of an ibuf bitmap page init.
-@return	end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
 byte*
 ibuf_parse_bitmap_init(
 /*===================*/
@@ -405,27 +390,24 @@ ibuf_parse_bitmap_init(
 	mtr_t*		mtr);	/*!< in: mtr or NULL */
 #ifndef UNIV_HOTBACKUP
 #ifdef UNIV_IBUF_COUNT_DEBUG
-/******************************************************************//**
-Gets the ibuf count for a given page.
+
+/** Gets the ibuf count for a given page.
+@param[in]	page_id	page id
 @return number of entries in the insert buffer currently buffered for
 this page */
-UNIV_INTERN
 ulint
 ibuf_count_get(
-/*===========*/
-	ulint	space,	/*!< in: space id */
-	ulint	page_no);/*!< in: page number */
+	const page_id_t&	page_id);
+
 #endif
 /******************************************************************//**
 Looks if the insert buffer is empty.
-@return	true if empty */
-UNIV_INTERN
+@return true if empty */
 bool
 ibuf_is_empty(void);
 /*===============*/
 /******************************************************************//**
 Prints info of ibuf. */
-UNIV_INTERN
 void
 ibuf_print(
 /*=======*/
@@ -433,15 +415,13 @@ ibuf_print(
 /********************************************************************
 Read the first two bytes from a record's fourth field (counter field in new
 records; something else in older records).
-@return	"counter" field, or ULINT_UNDEFINED if for some reason it can't be read */
-UNIV_INTERN
+@return "counter" field, or ULINT_UNDEFINED if for some reason it can't be read */
 ulint
 ibuf_rec_get_counter(
 /*=================*/
 	const rec_t*	rec);	/*!< in: ibuf record */
 /******************************************************************//**
 Closes insert buffer and frees the data structures. */
-UNIV_INTERN
 void
 ibuf_close(void);
 /*============*/
@@ -449,7 +429,6 @@ ibuf_close(void);
 /******************************************************************//**
 Checks the insert buffer bitmaps on IMPORT TABLESPACE.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 ibuf_check_bitmap_on_import(
 /*========================*/
@@ -457,6 +436,14 @@ ibuf_check_bitmap_on_import(
 	ulint		space_id)	/*!< in: tablespace identifier */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 
+/** Updates free bits and buffered bits for bulk loaded page.
+@param[in]      block   index page
+@param]in]      reset   flag if reset free val */
+void
+ibuf_set_bitmap_for_bulk_load(
+	buf_block_t*    block,
+	bool		reset);
+
 #define IBUF_HEADER_PAGE_NO	FSP_IBUF_HEADER_PAGE_NO
 #define IBUF_TREE_ROOT_PAGE_NO	FSP_IBUF_TREE_ROOT_PAGE_NO
 
@@ -468,7 +455,7 @@ for the file segment from which the pages for the ibuf tree are allocated */
 #define	IBUF_TREE_SEG_HEADER	0	/* fseg header for ibuf tree */
 
 /* The insert buffer tree itself is always located in space 0. */
-#define IBUF_SPACE_ID		0
+#define IBUF_SPACE_ID		static_cast<ulint>(0)
 
 #ifndef UNIV_NONINL
 #include "ibuf0ibuf.ic"
diff --git a/storage/innobase/include/ibuf0ibuf.ic b/storage/innobase/include/ibuf0ibuf.ic
index a5df9f7b6b4..de39592ae6b 100644
--- a/storage/innobase/include/ibuf0ibuf.ic
+++ b/storage/innobase/include/ibuf0ibuf.ic
@@ -25,6 +25,7 @@ Created 7/19/1997 Heikki Tuuri
 
 #include "page0page.h"
 #include "page0zip.h"
+#include "fsp0types.h"
 #ifndef UNIV_HOTBACKUP
 #include "buf0lru.h"
 
@@ -43,7 +44,7 @@ ibuf_mtr_start(
 	mtr_t*	mtr)	/*!< out: mini-transaction */
 {
 	mtr_start(mtr);
-	mtr->inside_ibuf = TRUE;
+	mtr->enter_ibuf();
 }
 /***************************************************************//**
 Commits an insert buffer mini-transaction. */
@@ -53,8 +54,9 @@ ibuf_mtr_commit(
 /*============*/
 	mtr_t*	mtr)	/*!< in/out: mini-transaction */
 {
-	ut_ad(mtr->inside_ibuf);
-	ut_d(mtr->inside_ibuf = FALSE);
+	ut_ad(mtr->is_inside_ibuf());
+	ut_d(mtr->exit_ibuf());
+
 	mtr_commit(mtr);
 }
 
@@ -93,7 +95,6 @@ Sets the free bit of the page in the ibuf bitmap. This is done in a separate
 mini-transaction, hence this operation does not restrict further work to only
 ibuf bitmap operations, which would result if the latch to the bitmap page
 were kept. */
-UNIV_INTERN
 void
 ibuf_set_free_bits_func(
 /*====================*/
@@ -127,6 +128,7 @@ ibuf_should_try(
 	return(ibuf_use != IBUF_USE_NONE
 	       && ibuf->max_size != 0
 	       && !dict_index_is_clust(index)
+	       && !dict_index_is_spatial(index)
 	       && index->table->quiesce == QUIESCE_NONE
 	       && (ignore_sec_unique || !dict_index_is_unique(index))
 	       && srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE);
@@ -145,54 +147,39 @@ ibuf_inside(
 /*========*/
 	const mtr_t*	mtr)	/*!< in: mini-transaction */
 {
-	return(mtr->inside_ibuf);
+	return(mtr->is_inside_ibuf());
 }
 
-/***********************************************************************//**
-Checks if a page address is an ibuf bitmap page address.
-@return	TRUE if a bitmap page */
+/** Checks if a page address is an ibuf bitmap page (level 3 page) address.
+@param[in]	page_id		page id
+@param[in]	page_size	page size
+@return TRUE if a bitmap page */
 UNIV_INLINE
 ibool
 ibuf_bitmap_page(
-/*=============*/
-	ulint	zip_size,/*!< in: compressed page size in bytes;
-			0 for uncompressed pages */
-	ulint	page_no)/*!< in: page number */
+	const page_id_t&	page_id,
+	const page_size_t&	page_size)
 {
-	ut_ad(ut_is_2pow(zip_size));
-
-	if (!zip_size) {
-		return((page_no & (UNIV_PAGE_SIZE - 1))
-			== FSP_IBUF_BITMAP_OFFSET);
-	}
-
-	return((page_no & (zip_size - 1)) == FSP_IBUF_BITMAP_OFFSET);
+	return((page_id.page_no() & (page_size.physical() - 1))
+	       == FSP_IBUF_BITMAP_OFFSET);
 }
 
-/*********************************************************************//**
-Translates the free space on a page to a value in the ibuf bitmap.
-@return	value for ibuf bitmap bits */
+/** Translates the free space on a page to a value in the ibuf bitmap.
+@param[in]	page_size	page size in bytes
+@param[in]	max_ins_size	maximum insert size after reorganize for
+the page
+@return value for ibuf bitmap bits */
 UNIV_INLINE
 ulint
 ibuf_index_page_calc_free_bits(
-/*===========================*/
-	ulint	zip_size,	/*!< in: compressed page size in bytes;
-				0 for uncompressed pages */
-	ulint	max_ins_size)	/*!< in: maximum insert size after reorganize
-				for the page */
+	ulint	page_size,
+	ulint	max_ins_size)
 {
 	ulint	n;
-	ut_ad(ut_is_2pow(zip_size));
-	ut_ad(!zip_size || zip_size > IBUF_PAGE_SIZE_PER_FREE_SPACE);
-	ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX);
+	ut_ad(ut_is_2pow(page_size));
+	ut_ad(page_size > IBUF_PAGE_SIZE_PER_FREE_SPACE);
 
-	if (zip_size) {
-		n = max_ins_size
-			/ (zip_size / IBUF_PAGE_SIZE_PER_FREE_SPACE);
-	} else {
-		n = max_ins_size
-			/ (UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE);
-	}
+	n = max_ins_size / (page_size / IBUF_PAGE_SIZE_PER_FREE_SPACE);
 
 	if (n == 3) {
 		n = 2;
@@ -205,54 +192,43 @@ ibuf_index_page_calc_free_bits(
 	return(n);
 }
 
-/*********************************************************************//**
-Translates the ibuf free bits to the free space on a page in bytes.
-@return	maximum insert size after reorganize for the page */
+/** Translates the ibuf free bits to the free space on a page in bytes.
+@param[in]	page_size	page_size
+@param[in]	bits		value for ibuf bitmap bits
+@return maximum insert size after reorganize for the page */
 UNIV_INLINE
 ulint
 ibuf_index_page_calc_free_from_bits(
-/*================================*/
-	ulint	zip_size,/*!< in: compressed page size in bytes;
-			0 for uncompressed pages */
-	ulint	bits)	/*!< in: value for ibuf bitmap bits */
+	const page_size_t&	page_size,
+	ulint			bits)
 {
 	ut_ad(bits < 4);
-	ut_ad(ut_is_2pow(zip_size));
-	ut_ad(!zip_size || zip_size > IBUF_PAGE_SIZE_PER_FREE_SPACE);
-	ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX);
-
-	if (zip_size) {
-		if (bits == 3) {
-			return(4 * zip_size / IBUF_PAGE_SIZE_PER_FREE_SPACE);
-		}
-
-		return(bits * zip_size / IBUF_PAGE_SIZE_PER_FREE_SPACE);
-	}
+	ut_ad(!page_size.is_compressed()
+	      || page_size.physical() > IBUF_PAGE_SIZE_PER_FREE_SPACE);
 
 	if (bits == 3) {
-		return(4 * UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE);
+		return(4 * page_size.physical()
+		       / IBUF_PAGE_SIZE_PER_FREE_SPACE);
 	}
 
-	return(bits * (UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE));
+	return(bits * (page_size.physical()
+		       / IBUF_PAGE_SIZE_PER_FREE_SPACE));
 }
 
 /*********************************************************************//**
 Translates the free space on a compressed page to a value in the ibuf bitmap.
-@return	value for ibuf bitmap bits */
+@return value for ibuf bitmap bits */
 UNIV_INLINE
 ulint
 ibuf_index_page_calc_free_zip(
 /*==========================*/
-	ulint			zip_size,
-					/*!< in: compressed page size in bytes */
 	const buf_block_t*	block)	/*!< in: buffer block */
 {
 	ulint			max_ins_size;
 	const page_zip_des_t*	page_zip;
 	lint			zip_max_ins;
 
-	ut_ad(zip_size == buf_block_get_zip_size(block));
-	ut_ad(zip_size);
+	ut_ad(block->page.size.is_compressed());
 
 	/* Consider the maximum insert size on the uncompressed page
 	without reorganizing the page. We must not assume anything
@@ -275,31 +251,29 @@ ibuf_index_page_calc_free_zip(
 		max_ins_size = (ulint) zip_max_ins;
 	}
 
-	return(ibuf_index_page_calc_free_bits(zip_size, max_ins_size));
+	return(ibuf_index_page_calc_free_bits(block->page.size.physical(),
+					      max_ins_size));
 }
 
 /*********************************************************************//**
 Translates the free space on a page to a value in the ibuf bitmap.
-@return	value for ibuf bitmap bits */
+@return value for ibuf bitmap bits */
 UNIV_INLINE
 ulint
 ibuf_index_page_calc_free(
 /*======================*/
-	ulint			zip_size,/*!< in: compressed page size in bytes;
-					0 for uncompressed pages */
 	const buf_block_t*	block)	/*!< in: buffer block */
 {
-	ut_ad(zip_size == buf_block_get_zip_size(block));
-
-	if (!zip_size) {
+	if (!block->page.size.is_compressed()) {
 		ulint	max_ins_size;
 
 		max_ins_size = page_get_max_insert_size_after_reorganize(
 			buf_block_get_frame(block), 1);
 
-		return(ibuf_index_page_calc_free_bits(0, max_ins_size));
+		return(ibuf_index_page_calc_free_bits(
+				block->page.size.physical(), max_ins_size));
 	} else {
-		return(ibuf_index_page_calc_free_zip(zip_size, block));
+		return(ibuf_index_page_calc_free_zip(block));
 	}
 }
 
@@ -335,21 +309,22 @@ ibuf_update_free_bits_if_full(
 	ulint	before;
 	ulint	after;
 
-	ut_ad(!buf_block_get_page_zip(block));
+	ut_ad(buf_block_get_page_zip(block) == NULL);
 
-	before = ibuf_index_page_calc_free_bits(0, max_ins_size);
+	before = ibuf_index_page_calc_free_bits(
+		block->page.size.physical(), max_ins_size);
 
 	if (max_ins_size >= increase) {
 #if ULINT32_UNDEFINED <= UNIV_PAGE_SIZE_MAX
 # error "ULINT32_UNDEFINED <= UNIV_PAGE_SIZE_MAX"
 #endif
-		after = ibuf_index_page_calc_free_bits(0, max_ins_size
-						       - increase);
+		after = ibuf_index_page_calc_free_bits(
+			block->page.size.physical(), max_ins_size - increase);
 #ifdef UNIV_IBUF_DEBUG
-		ut_a(after <= ibuf_index_page_calc_free(0, block));
+		ut_a(after <= ibuf_index_page_calc_free(block));
 #endif
 	} else {
-		after = ibuf_index_page_calc_free(0, block);
+		after = ibuf_index_page_calc_free(block);
 	}
 
 	if (after == 0) {
diff --git a/storage/innobase/include/lock0iter.h b/storage/innobase/include/lock0iter.h
index 0054850b526..ca97d22556a 100644
--- a/storage/innobase/include/lock0iter.h
+++ b/storage/innobase/include/lock0iter.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2007, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -46,7 +46,6 @@ record is stored. It can be undefined (ULINT_UNDEFINED) in two cases:
    bit_no is calculated in this function by using
    lock_rec_find_set_bit(). There is exactly one bit set in the bitmap
    of a wait lock. */
-UNIV_INTERN
 void
 lock_queue_iterator_reset(
 /*======================*/
@@ -59,8 +58,7 @@ lock_queue_iterator_reset(
 Gets the previous lock in the lock queue, returns NULL if there are no
 more locks (i.e. the current lock is the first one). The iterator is
 receded (if not-NULL is returned).
-@return	previous lock or NULL */
-
+@return previous lock or NULL */
 const lock_t*
 lock_queue_iterator_get_prev(
 /*=========================*/
diff --git a/storage/innobase/include/lock0lock.h b/storage/innobase/include/lock0lock.h
index a6fafd95754..eb554e02bc0 100644
--- a/storage/innobase/include/lock0lock.h
+++ b/storage/innobase/include/lock0lock.h
@@ -34,38 +34,41 @@ Created 5/7/1996 Heikki Tuuri
 #include "dict0types.h"
 #include "que0types.h"
 #include "lock0types.h"
-#include "read0types.h"
 #include "hash0hash.h"
 #include "srv0srv.h"
 #include "ut0vec.h"
+#include "gis0rtree.h"
+#include "lock0prdt.h"
 
-#ifdef UNIV_DEBUG
-extern ibool	lock_print_waits;
-#endif /* UNIV_DEBUG */
+// Forward declaration
+class ReadView;
 
 /*********************************************************************//**
 Gets the size of a lock struct.
-@return	size in bytes */
-UNIV_INTERN
+@return size in bytes */
 ulint
 lock_get_size(void);
 /*===============*/
 /*********************************************************************//**
 Creates the lock system at database start. */
-UNIV_INTERN
 void
 lock_sys_create(
 /*============*/
 	ulint	n_cells);	/*!< in: number of slots in lock hash table */
+/** Resize the lock hash table.
+@param[in]	n_cells	number of slots in lock hash table */
+void
+lock_sys_resize(
+	ulint	n_cells);
+
 /*********************************************************************//**
 Closes the lock system at database shutdown. */
-UNIV_INTERN
 void
 lock_sys_close(void);
 /*================*/
 /*********************************************************************//**
 Gets the heap_no of the smallest user record on a page.
-@return	heap_no of smallest user record, or PAGE_HEAP_NO_SUPREMUM */
+@return heap_no of smallest user record, or PAGE_HEAP_NO_SUPREMUM */
 UNIV_INLINE
 ulint
 lock_get_min_heap_no(
@@ -76,7 +79,6 @@ Updates the lock table when we have reorganized a page. NOTE: we copy
 also the locks set on the infimum of the page; the infimum may carry
 locks if an update of a record is occurring on the page, and its locks
 were temporarily stored on the infimum. */
-UNIV_INTERN
 void
 lock_move_reorganize_page(
 /*======================*/
@@ -87,7 +89,6 @@ lock_move_reorganize_page(
 /*************************************************************//**
 Moves the explicit locks on user records to another page if a record
 list end is moved to another page. */
-UNIV_INTERN
 void
 lock_move_rec_list_end(
 /*===================*/
@@ -98,7 +99,6 @@ lock_move_rec_list_end(
 /*************************************************************//**
 Moves the explicit locks on user records to another page if a record
 list start is moved to another page. */
-UNIV_INTERN
 void
 lock_move_rec_list_start(
 /*=====================*/
@@ -114,7 +114,6 @@ lock_move_rec_list_start(
 						were copied */
 /*************************************************************//**
 Updates the lock table when a page is split to the right. */
-UNIV_INTERN
 void
 lock_update_split_right(
 /*====================*/
@@ -122,7 +121,6 @@ lock_update_split_right(
 	const buf_block_t*	left_block);	/*!< in: left page */
 /*************************************************************//**
 Updates the lock table when a page is merged to the right. */
-UNIV_INTERN
 void
 lock_update_merge_right(
 /*====================*/
@@ -142,7 +140,6 @@ root page, even though they do not make sense on other than leaf
 pages: the reason is that in a pessimistic update the infimum record
 of the root page will act as a dummy carrier of the locks of the record
 to be updated. */
-UNIV_INTERN
 void
 lock_update_root_raise(
 /*===================*/
@@ -151,7 +148,6 @@ lock_update_root_raise(
 /*************************************************************//**
 Updates the lock table when a page is copied to another and the original page
 is removed from the chain of leaf pages, except if page is the root! */
-UNIV_INTERN
 void
 lock_update_copy_and_discard(
 /*=========================*/
@@ -161,7 +157,6 @@ lock_update_copy_and_discard(
 						NOT the root! */
 /*************************************************************//**
 Updates the lock table when a page is split to the left. */
-UNIV_INTERN
 void
 lock_update_split_left(
 /*===================*/
@@ -169,7 +164,6 @@ lock_update_split_left(
 	const buf_block_t*	left_block);	/*!< in: left page */
 /*************************************************************//**
 Updates the lock table when a page is merged to the left. */
-UNIV_INTERN
 void
 lock_update_merge_left(
 /*===================*/
@@ -193,7 +187,6 @@ lock_update_split_and_merge(
 /*************************************************************//**
 Resets the original locks on heir and replaces them with gap type locks
 inherited from rec. */
-UNIV_INTERN
 void
 lock_rec_reset_and_inherit_gap_locks(
 /*=================================*/
@@ -209,7 +202,6 @@ lock_rec_reset_and_inherit_gap_locks(
 						donating record */
 /*************************************************************//**
 Updates the lock table when a page is discarded. */
-UNIV_INTERN
 void
 lock_update_discard(
 /*================*/
@@ -221,7 +213,6 @@ lock_update_discard(
 						which will be discarded */
 /*************************************************************//**
 Updates the lock table when a new user record is inserted. */
-UNIV_INTERN
 void
 lock_update_insert(
 /*===============*/
@@ -229,7 +220,6 @@ lock_update_insert(
 	const rec_t*		rec);	/*!< in: the inserted record */
 /*************************************************************//**
 Updates the lock table when a record is removed. */
-UNIV_INTERN
 void
 lock_update_delete(
 /*===============*/
@@ -242,7 +232,6 @@ updated and the size of the record changes in the update. The record
 is in such an update moved, perhaps to another page. The infimum record
 acts as a dummy carrier record, taking care of lock releases while the
 actual record is being moved. */
-UNIV_INTERN
 void
 lock_rec_store_on_page_infimum(
 /*===========================*/
@@ -255,7 +244,6 @@ lock_rec_store_on_page_infimum(
 /*********************************************************************//**
 Restores the state of explicit lock requests on a single record, where the
 state was stored on the infimum of the page. */
-UNIV_INTERN
 void
 lock_rec_restore_from_page_infimum(
 /*===============================*/
@@ -269,8 +257,7 @@ lock_rec_restore_from_page_infimum(
 					the infimum */
 /*********************************************************************//**
 Determines if there are explicit record locks on a page.
-@return	an explicit record lock on the page, or NULL if there are none */
-UNIV_INTERN
+@return an explicit record lock on the page, or NULL if there are none */
 lock_t*
 lock_rec_expl_exist_on_page(
 /*========================*/
@@ -283,8 +270,7 @@ a record. If they do, first tests if the query thread should anyway
 be suspended for some reason; if not, then puts the transaction and
 the query thread to the lock wait state and inserts a waiting request
 for a gap x-lock to the lock queue.
-@return	DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
 dberr_t
 lock_rec_insert_check_and_lock(
 /*===========================*/
@@ -299,7 +285,83 @@ lock_rec_insert_check_and_lock(
 				inserted record maybe should inherit
 				LOCK_GAP type locks from the successor
 				record */
-	MY_ATTRIBUTE((nonnull(2,3,4,6,7), warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
+
+/*********************************************************************//**
+Enqueues a waiting request for a lock which cannot be granted immediately.
+Checks for deadlocks.
+@return DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED, or
+DB_SUCCESS_LOCKED_REC; DB_SUCCESS_LOCKED_REC means that
+there was a deadlock, but another transaction was chosen as a victim,
+and we got the lock immediately: no need to wait then */
+dberr_t
+lock_rec_enqueue_waiting(
+/*=====================*/
+	ulint			type_mode,/*!< in: lock mode this
+					transaction is requesting:
+					LOCK_S or LOCK_X, possibly
+					ORed with LOCK_GAP or
+					LOCK_REC_NOT_GAP, ORed with
+					LOCK_INSERT_INTENTION if this
+					waiting lock request is set
+					when performing an insert of
+					an index record */
+	const buf_block_t*	block,	/*!< in: buffer block containing
+					the record */
+	ulint			heap_no,/*!< in: heap number of the record */
+	dict_index_t*		index,	/*!< in: index of record */
+	que_thr_t*		thr,	/*!< in: query thread */
+	lock_prdt_t*		prdt);	/*!< in: Minimum Bounding Box */
+
+/*************************************************************//**
+Removes a record lock request, waiting or granted, from the queue and
+grants locks to other transactions in the queue if they now are entitled
+to a lock. NOTE: all record locks contained in in_lock are removed. */
+void
+lock_rec_dequeue_from_page(
+/*=======================*/
+        lock_t*         in_lock);        /*!< in: record lock object: all
+                                        record locks which are contained in
+                                        this lock object are removed;
+                                        transactions waiting behind will
+                                        get their lock requests granted,
+                                        if they are now qualified to it */
+
+/*************************************************************//**
+Moves the locks of a record to another record and resets the lock bits of
+the donating record. */
+UNIV_INLINE
+void
+lock_rec_move(
+/*==========*/
+        const buf_block_t*      receiver,       /*!< in: buffer block containing
+                                                the receiving record */
+        const buf_block_t*      donator,        /*!< in: buffer block containing
+                                                the donating record */
+        ulint                   receiver_heap_no,/*!< in: heap_no of the record
+                                                which gets the locks; there
+                                                must be no lock requests
+                                                on it! */
+        ulint                   donator_heap_no);/*!< in: heap_no of the record
+                                                which gives the locks */
+
+/*************************************************************//**
+Moves the locks of a record to another record and resets the lock bits of
+the donating record. */
+void
+lock_rec_move_low(
+/*==============*/
+	hash_table_t*		lock_hash,	/*!< in: hash  table to use */
+        const buf_block_t*      receiver,       /*!< in: buffer block containing
+                                                the receiving record */
+        const buf_block_t*      donator,        /*!< in: buffer block containing
+                                                the donating record */
+        ulint                   receiver_heap_no,/*!< in: heap_no of the record
+                                                which gets the locks; there
+                                                must be no lock requests
+                                                on it! */
+        ulint                   donator_heap_no);/*!< in: heap_no of the record
+                                                which gives the locks */
 /*********************************************************************//**
 Checks if locks of other transactions prevent an immediate modify (update,
 delete mark, or delete unmark) of a clustered index record. If they do,
@@ -307,8 +369,7 @@ first tests if the query thread should anyway be suspended for some
 reason; if not, then puts the transaction and the query thread to the
 lock wait state and inserts a waiting request for a record x-lock to the
 lock queue.
-@return	DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
 dberr_t
 lock_clust_rec_modify_check_and_lock(
 /*=================================*/
@@ -320,12 +381,11 @@ lock_clust_rec_modify_check_and_lock(
 	dict_index_t*		index,	/*!< in: clustered index */
 	const ulint*		offsets,/*!< in: rec_get_offsets(rec, index) */
 	que_thr_t*		thr)	/*!< in: query thread */
-	MY_ATTRIBUTE((warn_unused_result, nonnull));
+	MY_ATTRIBUTE((warn_unused_result));
 /*********************************************************************//**
 Checks if locks of other transactions prevent an immediate modify
 (delete mark or delete unmark) of a secondary index record.
-@return	DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
 dberr_t
 lock_sec_rec_modify_check_and_lock(
 /*===============================*/
@@ -341,13 +401,12 @@ lock_sec_rec_modify_check_and_lock(
 	que_thr_t*	thr,	/*!< in: query thread
 				(can be NULL if BTR_NO_LOCKING_FLAG) */
 	mtr_t*		mtr)	/*!< in/out: mini-transaction */
-	MY_ATTRIBUTE((warn_unused_result, nonnull(2,3,4,6)));
+	MY_ATTRIBUTE((warn_unused_result));
 /*********************************************************************//**
 Like lock_clust_rec_read_check_and_lock(), but reads a
 secondary index record.
-@return	DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
+@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
 or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
 dberr_t
 lock_sec_rec_read_check_and_lock(
 /*=============================*/
@@ -360,7 +419,7 @@ lock_sec_rec_read_check_and_lock(
 					read cursor */
 	dict_index_t*		index,	/*!< in: secondary index */
 	const ulint*		offsets,/*!< in: rec_get_offsets(rec, index) */
-	enum lock_mode		mode,	/*!< in: mode of the lock which
+	lock_mode		mode,	/*!< in: mode of the lock which
 					the read cursor should set on
 					records: LOCK_S or LOCK_X; the
 					latter is possible in
@@ -375,9 +434,8 @@ if the query thread should anyway be suspended for some reason; if not, then
 puts the transaction and the query thread to the lock wait state and inserts a
 waiting request for a record lock to the lock queue. Sets the requested mode
 lock on the record.
-@return	DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
+@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
 or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
 dberr_t
 lock_clust_rec_read_check_and_lock(
 /*===============================*/
@@ -390,7 +448,7 @@ lock_clust_rec_read_check_and_lock(
 					read cursor */
 	dict_index_t*		index,	/*!< in: clustered index */
 	const ulint*		offsets,/*!< in: rec_get_offsets(rec, index) */
-	enum lock_mode		mode,	/*!< in: mode of the lock which
+	lock_mode		mode,	/*!< in: mode of the lock which
 					the read cursor should set on
 					records: LOCK_S or LOCK_X; the
 					latter is possible in
@@ -407,8 +465,7 @@ waiting request for a record lock to the lock queue. Sets the requested mode
 lock on the record. This is an alternative version of
 lock_clust_rec_read_check_and_lock() that does not require the parameter
 "offsets".
-@return	DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
 dberr_t
 lock_clust_rec_read_check_and_lock_alt(
 /*===================================*/
@@ -420,7 +477,7 @@ lock_clust_rec_read_check_and_lock_alt(
 					be read or passed over by a
 					read cursor */
 	dict_index_t*		index,	/*!< in: clustered index */
-	enum lock_mode		mode,	/*!< in: mode of the lock which
+	lock_mode		mode,	/*!< in: mode of the lock which
 					the read cursor should set on
 					records: LOCK_S or LOCK_X; the
 					latter is possible in
@@ -428,12 +485,11 @@ lock_clust_rec_read_check_and_lock_alt(
 	ulint			gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or
 					LOCK_REC_NOT_GAP */
 	que_thr_t*		thr)	/*!< in: query thread */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /*********************************************************************//**
 Checks that a record is seen in a consistent read.
 @return true if sees, or false if an earlier version of the record
 should be retrieved */
-UNIV_INTERN
 bool
 lock_clust_rec_cons_read_sees(
 /*==========================*/
@@ -441,7 +497,7 @@ lock_clust_rec_cons_read_sees(
 				passed over by a read cursor */
 	dict_index_t*	index,	/*!< in: clustered index */
 	const ulint*	offsets,/*!< in: rec_get_offsets(rec, index) */
-	read_view_t*	view);	/*!< in: consistent read view */
+	ReadView*	view);	/*!< in: consistent read view */
 /*********************************************************************//**
 Checks that a non-clustered index record is seen in a consistent read.
 
@@ -452,20 +508,19 @@ record.
 
 @return true if certainly sees, or false if an earlier version of the
 clustered index record might be needed */
-UNIV_INTERN
 bool
 lock_sec_rec_cons_read_sees(
 /*========================*/
 	const rec_t*		rec,	/*!< in: user record which
 					should be read or passed over
 					by a read cursor */
-	const read_view_t*	view)	/*!< in: consistent read view */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	const dict_index_t*     index,  /*!< in: index */
+	const ReadView*	view)	/*!< in: consistent read view */
+	MY_ATTRIBUTE((warn_unused_result));
 /*********************************************************************//**
 Locks the specified database table in the mode given. If the lock cannot
 be granted immediately, the query thread is put to wait.
-@return	DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
 dberr_t
 lock_table(
 /*=======*/
@@ -473,22 +528,33 @@ lock_table(
 				does nothing */
 	dict_table_t*	table,	/*!< in/out: database table
 				in dictionary cache */
-	enum lock_mode	mode,	/*!< in: lock mode */
+	lock_mode	mode,	/*!< in: lock mode */
 	que_thr_t*	thr)	/*!< in: query thread */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /*********************************************************************//**
 Creates a table IX lock object for a resurrected transaction. */
-UNIV_INTERN
 void
 lock_table_ix_resurrect(
 /*====================*/
 	dict_table_t*	table,	/*!< in/out: table */
 	trx_t*		trx);	/*!< in/out: transaction */
+
+/** Sets a lock on a table based on the given mode.
+@param[in]	table	table to lock
+@param[in,out]	trx	transaction
+@param[in]	mode	LOCK_X or LOCK_S
+@return error code or DB_SUCCESS. */
+dberr_t
+lock_table_for_trx(
+	dict_table_t*	table,
+	trx_t*		trx,
+	enum lock_mode	mode)
+	MY_ATTRIBUTE((nonnull, warn_unused_result));
+
 /*************************************************************//**
 Removes a granted record lock of a transaction from the queue and grants
 locks to other transactions waiting in the queue if they now are entitled
 to a lock. */
-UNIV_INTERN
 void
 lock_rec_unlock(
 /*============*/
@@ -496,12 +562,11 @@ lock_rec_unlock(
 					set a record lock */
 	const buf_block_t*	block,	/*!< in: buffer block containing rec */
 	const rec_t*		rec,	/*!< in: record */
-	enum lock_mode		lock_mode);/*!< in: LOCK_S or LOCK_X */
+	lock_mode		lock_mode);/*!< in: LOCK_S or LOCK_X */
 /*********************************************************************//**
 Releases a transaction's locks, and releases possible other transactions
 waiting because of these locks. Change the state of the transaction to
 TRX_STATE_COMMITTED_IN_MEMORY. */
-UNIV_INTERN
 void
 lock_trx_release_locks(
 /*===================*/
@@ -511,7 +576,6 @@ Removes locks on a table to be dropped or truncated.
 If remove_also_table_sx_locks is TRUE then table-level S and X locks are
 also removed in addition to other table-level and record-level locks.
 No lock, that is going to be removed, is allowed to be a wait lock. */
-UNIV_INTERN
 void
 lock_remove_all_on_table(
 /*=====================*/
@@ -523,7 +587,7 @@ lock_remove_all_on_table(
 /*********************************************************************//**
 Calculates the fold value of a page file address: used in inserting or
 searching for a lock in the hash table.
-@return	folded value */
+@return folded value */
 UNIV_INLINE
 ulint
 lock_rec_fold(
@@ -534,7 +598,7 @@ lock_rec_fold(
 /*********************************************************************//**
 Calculates the hash value of a page file address: used in inserting or
 searching for a lock in the hash table.
-@return	hashed value */
+@return hashed value */
 UNIV_INLINE
 ulint
 lock_rec_hash(
@@ -542,12 +606,19 @@ lock_rec_hash(
 	ulint	space,	/*!< in: space */
 	ulint	page_no);/*!< in: page number */
 
+/*************************************************************//**
+Get the lock hash table */
+UNIV_INLINE
+hash_table_t*
+lock_hash_get(
+/*==========*/
+	ulint	mode);	/*!< in: lock mode */
+
 /**********************************************************************//**
 Looks for a set bit in a record lock bitmap. Returns ULINT_UNDEFINED,
 if none found.
 @return bit index == heap number of the record, or ULINT_UNDEFINED if
 none found */
-UNIV_INTERN
 ulint
 lock_rec_find_set_bit(
 /*==================*/
@@ -561,30 +632,26 @@ covered by an IX or IS table lock.
 IS table lock; dest if there is no source table, and NULL if the
 transaction is locking more than two tables or an inconsistency is
 found */
-UNIV_INTERN
 dict_table_t*
 lock_get_src_table(
 /*===============*/
 	trx_t*		trx,	/*!< in: transaction */
 	dict_table_t*	dest,	/*!< in: destination of ALTER TABLE */
-	enum lock_mode*	mode);	/*!< out: lock mode of the source table */
+	lock_mode*	mode);	/*!< out: lock mode of the source table */
 /*********************************************************************//**
 Determine if the given table is exclusively "owned" by the given
 transaction, i.e., transaction holds LOCK_IX and possibly LOCK_AUTO_INC
 on the table.
 @return TRUE if table is only locked by trx, with LOCK_IX, and
 possibly LOCK_AUTO_INC */
-UNIV_INTERN
 ibool
 lock_is_table_exclusive(
 /*====================*/
 	const dict_table_t*	table,	/*!< in: table */
-	const trx_t*		trx)	/*!< in: transaction */
-	MY_ATTRIBUTE((nonnull));
+	const trx_t*		trx);	/*!< in: transaction */
 /*********************************************************************//**
 Checks if a lock request lock1 has to wait for request lock2.
-@return	TRUE if lock1 has to wait for lock2 to be removed */
-UNIV_INTERN
+@return TRUE if lock1 has to wait for lock2 to be removed */
 ibool
 lock_has_to_wait(
 /*=============*/
@@ -595,7 +662,6 @@ lock_has_to_wait(
 				locks are record locks */
 /*********************************************************************//**
 Reports that a transaction id is insensible, i.e., in the future. */
-UNIV_INTERN
 void
 lock_report_trx_id_insanity(
 /*========================*/
@@ -603,11 +669,9 @@ lock_report_trx_id_insanity(
 	const rec_t*	rec,		/*!< in: user record */
 	dict_index_t*	index,		/*!< in: index */
 	const ulint*	offsets,	/*!< in: rec_get_offsets(rec, index) */
-	trx_id_t	max_trx_id)	/*!< in: trx_sys_get_max_trx_id() */
-	MY_ATTRIBUTE((nonnull));
+	trx_id_t	max_trx_id);	/*!< in: trx_sys_get_max_trx_id() */
 /*********************************************************************//**
 Prints info of a table lock. */
-UNIV_INTERN
 void
 lock_table_print(
 /*=============*/
@@ -615,7 +679,6 @@ lock_table_print(
 	const lock_t*	lock);	/*!< in: table type lock */
 /*********************************************************************//**
 Prints info of a record lock. */
-UNIV_INTERN
 void
 lock_rec_print(
 /*===========*/
@@ -625,18 +688,25 @@ lock_rec_print(
 Prints info of locks for all transactions.
 @return FALSE if not able to obtain lock mutex and exits without
 printing info */
-UNIV_INTERN
 ibool
 lock_print_info_summary(
 /*====================*/
 	FILE*	file,	/*!< in: file where to print */
 	ibool   nowait)	/*!< in: whether to wait for the lock mutex */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Prints transaction lock wait and MVCC state.
+@param[in,out]	file	file where to print
+@param[in]	trx	transaction */
+void
+lock_trx_print_wait_and_mvcc_state(
+	FILE*		file,
+	const trx_t*	trx);
+
 /*********************************************************************//**
 Prints info of locks for each transaction. This function assumes that the
 caller holds the lock mutex and more importantly it will release the lock
 mutex on behalf of the caller. (This should be fixed in the future). */
-UNIV_INTERN
 void
 lock_print_info_all_transactions(
 /*=============================*/
@@ -646,18 +716,25 @@ Return approximate number or record locks (bits set in the bitmap) for
 this transaction. Since delete-marked records may be removed, the
 record count will not be precise.
 The caller must be holding lock_sys->mutex. */
-UNIV_INTERN
 ulint
 lock_number_of_rows_locked(
 /*=======================*/
 	const trx_lock_t*	trx_lock)	/*!< in: transaction locks */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
+
+/*********************************************************************//**
+Return the number of table locks for a transaction.
+The caller must be holding lock_sys->mutex. */
+ulint
+lock_number_of_tables_locked(
+/*=========================*/
+	const trx_lock_t*	trx_lock)	/*!< in: transaction locks */
+	MY_ATTRIBUTE((warn_unused_result));
 
 /*******************************************************************//**
 Gets the type of a lock. Non-inline version for using outside of the
 lock module.
-@return	LOCK_TABLE or LOCK_REC */
-UNIV_INTERN
+@return LOCK_TABLE or LOCK_REC */
 ulint
 lock_get_type(
 /*==========*/
@@ -675,8 +752,7 @@ lock_get_trx(
 
 /*******************************************************************//**
 Gets the id of the transaction owning a lock.
-@return	transaction id */
-UNIV_INTERN
+@return transaction id */
 trx_id_t
 lock_get_trx_id(
 /*============*/
@@ -685,8 +761,7 @@ lock_get_trx_id(
 /*******************************************************************//**
 Gets the mode of a lock in a human readable string.
 The string should not be free()'d or modified.
-@return	lock mode */
-UNIV_INTERN
+@return lock mode */
 const char*
 lock_get_mode_str(
 /*==============*/
@@ -695,8 +770,7 @@ lock_get_mode_str(
 /*******************************************************************//**
 Gets the type of a lock in a human readable string.
 The string should not be free()'d or modified.
-@return	lock type */
-UNIV_INTERN
+@return lock type */
 const char*
 lock_get_type_str(
 /*==============*/
@@ -704,27 +778,22 @@ lock_get_type_str(
 
 /*******************************************************************//**
 Gets the id of the table on which the lock is.
-@return	id of the table */
-UNIV_INTERN
+@return id of the table */
 table_id_t
 lock_get_table_id(
 /*==============*/
 	const lock_t*	lock);	/*!< in: lock */
 
-/*******************************************************************//**
-Gets the name of the table on which the lock is.
-The string should not be free()'d or modified.
-@return	name of the table */
-UNIV_INTERN
-const char*
+/** Determine which table a lock is associated with.
+@param[in]	lock	the lock
+@return name of the table */
+const table_name_t&
 lock_get_table_name(
-/*================*/
-	const lock_t*	lock);	/*!< in: lock */
+	const lock_t*	lock);
 
 /*******************************************************************//**
 For a record lock, gets the index on which the lock is.
-@return	index */
-UNIV_INTERN
+@return index */
 const dict_index_t*
 lock_rec_get_index(
 /*===============*/
@@ -733,8 +802,7 @@ lock_rec_get_index(
 /*******************************************************************//**
 For a record lock, gets the name of the index on which the lock is.
 The string should not be free()'d or modified.
-@return	name of the index */
-UNIV_INTERN
+@return name of the index */
 const char*
 lock_rec_get_index_name(
 /*====================*/
@@ -742,8 +810,7 @@ lock_rec_get_index_name(
 
 /*******************************************************************//**
 For a record lock, gets the tablespace number on which the lock is.
-@return	tablespace number */
-UNIV_INTERN
+@return tablespace number */
 ulint
 lock_rec_get_space_id(
 /*==================*/
@@ -751,17 +818,15 @@ lock_rec_get_space_id(
 
 /*******************************************************************//**
 For a record lock, gets the page number on which the lock is.
-@return	page number */
-UNIV_INTERN
+@return page number */
 ulint
 lock_rec_get_page_no(
 /*=================*/
 	const lock_t*	lock);	/*!< in: lock */
 /*******************************************************************//**
 Check if there are any locks (table or rec) against table.
-@return	TRUE if locks exist */
-UNIV_INTERN
-ibool
+@return TRUE if locks exist */
+bool
 lock_table_has_locks(
 /*=================*/
 	const dict_table_t*	table);	/*!< in: check if there are any locks
@@ -770,8 +835,8 @@ lock_table_has_locks(
 
 /*********************************************************************//**
 A thread which wakes up threads whose lock wait may have lasted too long.
-@return	a dummy parameter */
-extern "C" UNIV_INTERN
+@return a dummy parameter */
+extern "C"
 os_thread_ret_t
 DECLARE_THREAD(lock_wait_timeout_thread)(
 /*=====================================*/
@@ -781,7 +846,6 @@ DECLARE_THREAD(lock_wait_timeout_thread)(
 /********************************************************************//**
 Releases a user OS thread waiting for a lock to be released, if the
 thread is already suspended. */
-UNIV_INTERN
 void
 lock_wait_release_thread_if_suspended(
 /*==================================*/
@@ -794,7 +858,6 @@ occurs during the wait trx->error_state associated with thr is
 != DB_SUCCESS when we return. DB_LOCK_WAIT_TIMEOUT and DB_DEADLOCK
 are possible errors. DB_DEADLOCK is returned if selective deadlock
 resolution chose this transaction as a victim. */
-UNIV_INTERN
 void
 lock_wait_suspend_thread(
 /*=====================*/
@@ -804,7 +867,6 @@ lock_wait_suspend_thread(
 Unlocks AUTO_INC type locks that were possibly reserved by a trx. This
 function should be called at the the end of an SQL statement, by the
 connection thread that owns the transaction (trx->mysql_thd). */
-UNIV_INTERN
 void
 lock_unlock_table_autoinc(
 /*======================*/
@@ -814,26 +876,36 @@ Check whether the transaction has already been rolled back because it
 was selected as a deadlock victim, or if it has to wait then cancel
 the wait lock.
 @return DB_DEADLOCK, DB_LOCK_WAIT or DB_SUCCESS */
-UNIV_INTERN
 dberr_t
 lock_trx_handle_wait(
 /*=================*/
-	trx_t*	trx)	/*!< in/out: trx lock state */
-	MY_ATTRIBUTE((nonnull));
+	trx_t*	trx,	/*!< in/out: trx lock state */
+	bool	lock_mutex_taken,
+	bool	trx_mutex_taken)
+	MY_ATTRIBUTE((nonnull(1), warn_unused_result));
 /*********************************************************************//**
 Get the number of locks on a table.
 @return number of locks */
-UNIV_INTERN
 ulint
 lock_table_get_n_locks(
 /*===================*/
-	const dict_table_t*	table)	/*!< in: table */
-	MY_ATTRIBUTE((nonnull));
+	const dict_table_t*	table);	/*!< in: table */
+/*******************************************************************//**
+Initialise the trx lock list. */
+void
+lock_trx_lock_list_init(
+/*====================*/
+	trx_lock_list_t*	lock_list);	/*!< List to initialise */
+
+/*******************************************************************//**
+Set the lock system timeout event. */
+void
+lock_set_timeout_event();
+/*====================*/
 #ifdef UNIV_DEBUG
 /*********************************************************************//**
 Checks that a transaction id is sensible, i.e., not in the future.
-@return	true if ok */
-UNIV_INTERN
+@return true if ok */
 bool
 lock_check_trx_id_sanity(
 /*=====================*/
@@ -841,12 +913,11 @@ lock_check_trx_id_sanity(
 	const rec_t*	rec,		/*!< in: user record */
 	dict_index_t*	index,		/*!< in: index */
 	const ulint*	offsets)	/*!< in: rec_get_offsets(rec, index) */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /*******************************************************************//**
 Check if the transaction holds any locks on the sys tables
 or its records.
-@return	the strongest lock found on any sys table or 0 for none */
-UNIV_INTERN
+@return the strongest lock found on any sys table or 0 for none */
 const lock_t*
 lock_trx_has_sys_table_locks(
 /*=========================*/
@@ -855,8 +926,7 @@ lock_trx_has_sys_table_locks(
 
 /*******************************************************************//**
 Check if the transaction holds an exclusive lock on a record.
-@return	whether the locks are held */
-UNIV_INTERN
+@return whether the locks are held */
 bool
 lock_trx_has_rec_x_lock(
 /*====================*/
@@ -864,9 +934,15 @@ lock_trx_has_rec_x_lock(
 	const dict_table_t*	table,	/*!< in: table to check */
 	const buf_block_t*	block,	/*!< in: buffer block of the record */
 	ulint			heap_no)/*!< in: record heap number */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 #endif /* UNIV_DEBUG */
 
+/**
+Allocate cached locks for the transaction.
+@param trx		allocate cached record locks for this transaction */
+void
+lock_trx_alloc_locks(trx_t* trx);
+
 /** Lock modes and types */
 /* @{ */
 #define LOCK_MODE_MASK	0xFUL	/*!< mask used to extract mode from the
@@ -912,11 +988,14 @@ lock_trx_has_rec_x_lock(
 				remains set when the waiting lock is granted,
 				or if the lock is inherited to a neighboring
 				record */
+#define LOCK_PREDICATE	8192	/*!< Predicate lock */
+#define LOCK_PRDT_PAGE	16384	/*!< Page lock */
 
-#if (LOCK_WAIT|LOCK_GAP|LOCK_REC_NOT_GAP|LOCK_INSERT_INTENTION)&LOCK_MODE_MASK
+
+#if (LOCK_WAIT|LOCK_GAP|LOCK_REC_NOT_GAP|LOCK_INSERT_INTENTION|LOCK_PREDICATE|LOCK_PRDT_PAGE)&LOCK_MODE_MASK
 # error
 #endif
-#if (LOCK_WAIT|LOCK_GAP|LOCK_REC_NOT_GAP|LOCK_INSERT_INTENTION)&LOCK_TYPE_MASK
+#if (LOCK_WAIT|LOCK_GAP|LOCK_REC_NOT_GAP|LOCK_INSERT_INTENTION|LOCK_PREDICATE|LOCK_PRDT_PAGE)&LOCK_TYPE_MASK
 # error
 #endif
 /* @} */
@@ -924,16 +1003,28 @@ lock_trx_has_rec_x_lock(
 /** Lock operation struct */
 struct lock_op_t{
 	dict_table_t*	table;	/*!< table to be locked */
-	enum lock_mode	mode;	/*!< lock mode */
+	lock_mode	mode;	/*!< lock mode */
 };
 
+typedef ib_mutex_t LockMutex;
+
 /** The lock system struct */
 struct lock_sys_t{
-	ib_mutex_t	mutex;			/*!< Mutex protecting the
+	char		pad1[CACHE_LINE_SIZE];	/*!< padding to prevent other
+						memory update hotspots from
+						residing on the same memory
+						cache line */
+	LockMutex	mutex;			/*!< Mutex protecting the
 						locks */
 	hash_table_t*	rec_hash;		/*!< hash table of the record
 						locks */
-	ib_mutex_t	wait_mutex;		/*!< Mutex protecting the
+	hash_table_t*	prdt_hash;		/*!< hash table of the predicate
+						lock */
+	hash_table_t*	prdt_page_hash;		/*!< hash table of the page
+						lock */
+
+	char		pad2[CACHE_LINE_SIZE];	/*!< Padding */
+	LockMutex	wait_mutex;		/*!< Mutex protecting the
 						next two fields */
 	srv_slot_t*	waiting_threads;	/*!< Array  of user threads
 						suspended while waiting for
@@ -960,14 +1051,56 @@ struct lock_sys_t{
 						is running */
 };
 
+/*************************************************************//**
+Removes a record lock request, waiting or granted, from the queue. */
+void
+lock_rec_discard(
+/*=============*/
+	lock_t*		in_lock);	/*!< in: record lock object: all
+					record locks which are contained
+					in this lock object are removed */
+
+/*************************************************************//**
+Moves the explicit locks on user records to another page if a record
+list start is moved to another page. */
+void
+lock_rtr_move_rec_list(
+/*===================*/
+	const buf_block_t*	new_block,	/*!< in: index page to
+						move to */
+	const buf_block_t*	block,		/*!< in: index page */
+	rtr_rec_move_t*		rec_move,	/*!< in: recording records
+						moved */
+	ulint			num_move);	/*!< in: num of rec to move */
+
+/*************************************************************//**
+Removes record lock objects set on an index page which is discarded. This
+function does not move locks, or check for waiting locks, therefore the
+lock bitmaps must already be reset when this function is called. */
+void
+lock_rec_free_all_from_discard_page(
+/*================================*/
+	const buf_block_t*	block);		/*!< in: page to be discarded */
+
+/** Reset the nth bit of a record lock.
+@param[in,out]	lock record lock
+@param[in] i	index of the bit that will be reset
+@param[in] type	whether the lock is in wait mode  */
+void
+lock_rec_trx_wait(
+	lock_t*		lock,
+	ulint		i,
+	ulint		type);
+
 /** The lock system */
 extern lock_sys_t*	lock_sys;
 
 /** Test if lock_sys->mutex can be acquired without waiting. */
-#define lock_mutex_enter_nowait() mutex_enter_nowait(&lock_sys->mutex)
+#define lock_mutex_enter_nowait() 		\
+	(lock_sys->mutex.trylock(__FILE__, __LINE__))
 
 /** Test if lock_sys->mutex is owned. */
-#define lock_mutex_own() mutex_own(&lock_sys->mutex)
+#define lock_mutex_own() (lock_sys->mutex.is_owned())
 
 /** Acquire the lock_sys->mutex. */
 #define lock_mutex_enter() do {			\
@@ -976,11 +1109,11 @@ extern lock_sys_t*	lock_sys;
 
 /** Release the lock_sys->mutex. */
 #define lock_mutex_exit() do {			\
-	mutex_exit(&lock_sys->mutex);		\
+	lock_sys->mutex.exit();			\
 } while (0)
 
 /** Test if lock_sys->wait_mutex is owned. */
-#define lock_wait_mutex_own() mutex_own(&lock_sys->wait_mutex)
+#define lock_wait_mutex_own() (lock_sys->wait_mutex.is_owned())
 
 /** Acquire the lock_sys->wait_mutex. */
 #define lock_wait_mutex_enter() do {		\
@@ -989,7 +1122,7 @@ extern lock_sys_t*	lock_sys;
 
 /** Release the lock_sys->wait_mutex. */
 #define lock_wait_mutex_exit() do {		\
-	mutex_exit(&lock_sys->wait_mutex);	\
+	lock_sys->wait_mutex.exit();		\
 } while (0)
 
 #ifdef WITH_WSREP
@@ -1009,7 +1142,19 @@ std::string
 lock_get_info(
 	const lock_t*);
 
+/*************************************************************//**
+Updates the lock table when a page is split and merged to
+two pages. */
+UNIV_INTERN
+void
+lock_update_split_and_merge(
+	const buf_block_t* left_block,	/*!< in: left page to which merged */
+	const rec_t* orig_pred,		/*!< in: original predecessor of
+					supremum on the left page before merge*/
+	const buf_block_t* right_block);/*!< in: right page from which merged */
+
 #endif /* WITH_WSREP */
+
 #ifndef UNIV_NONINL
 #include "lock0lock.ic"
 #endif
diff --git a/storage/innobase/include/lock0lock.ic b/storage/innobase/include/lock0lock.ic
index 736936954cb..ba2311c02ea 100644
--- a/storage/innobase/include/lock0lock.ic
+++ b/storage/innobase/include/lock0lock.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -23,7 +23,6 @@ The transaction lock system
 Created 5/7/1996 Heikki Tuuri
 *******************************************************/
 
-#include "sync0sync.h"
 #include "srv0srv.h"
 #include "dict0dict.h"
 #include "row0row.h"
@@ -41,7 +40,7 @@ Created 5/7/1996 Heikki Tuuri
 /*********************************************************************//**
 Calculates the fold value of a page file address: used in inserting or
 searching for a lock in the hash table.
-@return	folded value */
+@return folded value */
 UNIV_INLINE
 ulint
 lock_rec_fold(
@@ -55,7 +54,7 @@ lock_rec_fold(
 /*********************************************************************//**
 Calculates the hash value of a page file address: used in inserting or
 searching for a lock in the hash table.
-@return	hashed value */
+@return hashed value */
 UNIV_INLINE
 ulint
 lock_rec_hash(
@@ -69,7 +68,7 @@ lock_rec_hash(
 
 /*********************************************************************//**
 Gets the heap_no of the smallest user record on a page.
-@return	heap_no of smallest user record, or PAGE_HEAP_NO_SUPREMUM */
+@return heap_no of smallest user record, or PAGE_HEAP_NO_SUPREMUM */
 UNIV_INLINE
 ulint
 lock_get_min_heap_no(
@@ -90,3 +89,43 @@ lock_get_min_heap_no(
 						   FALSE)));
 	}
 }
+
+/*************************************************************//**
+Moves the locks of a record to another record and resets the lock bits of
+the donating record. */
+UNIV_INLINE
+void
+lock_rec_move(
+/*==========*/
+	const buf_block_t*	receiver,       /*!< in: buffer block containing
+						the receiving record */
+	const buf_block_t*	donator,        /*!< in: buffer block containing
+						the donating record */
+	ulint			receiver_heap_no,/*!< in: heap_no of the record
+						which gets the locks; there
+						must be no lock requests
+						on it! */
+	ulint			donator_heap_no)/*!< in: heap_no of the record
+                                                which gives the locks */
+{
+	lock_rec_move_low(lock_sys->rec_hash, receiver, donator,
+			  receiver_heap_no, donator_heap_no);
+}
+
+/*************************************************************//**
+Get the lock hash table */
+UNIV_INLINE
+hash_table_t*
+lock_hash_get(
+/*==========*/
+	ulint	mode)	/*!< in: lock mode */
+{
+	if (mode & LOCK_PREDICATE) {
+		return(lock_sys->prdt_hash);
+	} else if (mode & LOCK_PRDT_PAGE) {
+		return(lock_sys->prdt_page_hash);
+	} else {
+		return(lock_sys->rec_hash);
+	}
+}
+
diff --git a/storage/innobase/include/lock0prdt.h b/storage/innobase/include/lock0prdt.h
new file mode 100644
index 00000000000..6c61f07a4e8
--- /dev/null
+++ b/storage/innobase/include/lock0prdt.h
@@ -0,0 +1,227 @@
+/*****************************************************************************
+
+Copyright (c) 2014, 2016, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/lock0prdt.h
+The predicate lock system
+
+Created 9/7/2013 Jimmy Yang
+*******************************************************/
+#ifndef lock0prdt_h
+#define lock0prdt_h
+
+#include "univ.i"
+#include "lock0lock.h"
+
+/* Predicate lock data */
+typedef struct lock_prdt {
+	void*		data;		/* Predicate data */
+	uint16		op;		/* Predicate operator */
+} lock_prdt_t;
+
+/*********************************************************************//**
+Acquire a predicate lock on a block
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+dberr_t
+lock_prdt_lock(
+/*===========*/
+	buf_block_t*	block,	/*!< in/out: buffer block of rec */
+	lock_prdt_t*	prdt,	/*!< in: Predicate for the lock */
+	dict_index_t*	index,	/*!< in: secondary index */
+	enum lock_mode	mode,	/*!< in: mode of the lock which
+				the read cursor should set on
+				records: LOCK_S or LOCK_X; the
+				latter is possible in
+				SELECT FOR UPDATE */
+	ulint		type_mode,
+				/*!< in: LOCK_PREDICATE or LOCK_PRDT_PAGE */
+	que_thr_t*	thr,	/*!< in: query thread
+				(can be NULL if BTR_NO_LOCKING_FLAG) */
+	mtr_t*		mtr);	/*!< in/out: mini-transaction */
+
+/*********************************************************************//**
+Acquire a "Page" lock on a block
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+dberr_t
+lock_place_prdt_page_lock(
+/*======================*/
+	ulint		space,	/*!< in: space for the page to lock */
+	ulint		pageno,	/*!< in: page number */
+	dict_index_t*	index,	/*!< in: secondary index */
+	que_thr_t*	thr);	/*!< in: query thread */
+
+/*********************************************************************//**
+Checks two predicate locks are compatible with each other
+@return true if conflicts */
+bool
+lock_prdt_consistent(
+/*=================*/
+	lock_prdt_t*	prdt1,	/*!< in: Predicate for the lock */
+	lock_prdt_t*	prdt2,	/*!< in: Predicate for the lock */
+	ulint		op);	/*!< in: Predicate comparison operator */
+
+/*********************************************************************//**
+Initiate a Predicate lock from a MBR */
+void
+lock_init_prdt_from_mbr(
+/*====================*/
+	lock_prdt_t*	prdt,	/*!< in/out: predicate to initialized */
+	rtr_mbr_t*	mbr,	/*!< in: Minimum Bounding Rectangle */
+	ulint		mode,	/*!< in: Search mode */
+	mem_heap_t*	heap);	/*!< in: heap for allocating memory */
+
+/*********************************************************************//**
+Get predicate lock's minimum bounding box
+@return the minimum bounding box*/
+lock_prdt_t*
+lock_get_prdt_from_lock(
+/*====================*/
+	const lock_t*	lock);	/*!< in: the lock */
+
+/*********************************************************************//**
+Checks if a predicate lock request for a new lock has to wait for
+request lock2.
+@return true if new lock has to wait for lock2 to be removed */
+bool
+lock_prdt_has_to_wait(
+/*==================*/
+	const trx_t*	trx,	/*!< in: trx of new lock */
+	ulint		type_mode,/*!< in: precise mode of the new lock
+				to set: LOCK_S or LOCK_X, possibly
+				ORed to LOCK_PREDICATE or LOCK_PRDT_PAGE,
+				LOCK_INSERT_INTENTION */
+	lock_prdt_t*	prdt,	/*!< in: lock predicate to check */
+	const lock_t*	lock2);	/*!< in: another record lock; NOTE that
+				it is assumed that this has a lock bit
+				set on the same record as in the new
+				lock we are setting */
+
+/**************************************************************//**
+Update predicate lock when page splits */
+void
+lock_prdt_update_split(
+/*===================*/
+	buf_block_t*	block,		/*!< in/out: page to be split */
+	buf_block_t*	new_block,	/*!< in/out: the new half page */
+	lock_prdt_t*	prdt,		/*!< in: MBR on the old page */
+	lock_prdt_t*	new_prdt,	/*!< in: MBR on the new page */
+	ulint		space,		/*!< in: space id */
+	ulint		page_no);	/*!< in: page number */
+
+/**************************************************************//**
+Ajust locks from an ancester page of Rtree on the appropriate level . */
+void
+lock_prdt_update_parent(
+/*====================*/
+	buf_block_t*	left_block,	/*!< in/out: page to be split */
+	buf_block_t*	right_block,	/*!< in/out: the new half page */
+	lock_prdt_t*	left_prdt,	/*!< in: MBR on the old page */
+	lock_prdt_t*	right_prdt,	/*!< in: MBR on the new page */
+	lock_prdt_t*	parent_prdt,	/*!< in: original parent MBR */
+	ulint		space,		/*!< in: space id */
+	ulint		page_no);	/*!< in: page number */
+
+/*********************************************************************//**
+Checks if locks of other transactions prevent an immediate insert of
+a predicate record.
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+dberr_t
+lock_prdt_insert_check_and_lock(
+/*============================*/
+	ulint		flags,	/*!< in: if BTR_NO_LOCKING_FLAG bit is
+				set, does nothing */
+	const rec_t*	rec,	/*!< in: record after which to insert */
+	buf_block_t*	block,	/*!< in/out: buffer block of rec */
+	dict_index_t*	index,	/*!< in: index */
+	que_thr_t*	thr,	/*!< in: query thread */
+	mtr_t*		mtr,	/*!< in/out: mini-transaction */
+	lock_prdt_t*	prdt);	/*!< in: Minimum Bound Rectangle */
+
+/*********************************************************************//**
+Append a predicate to the lock */
+void
+lock_prdt_set_prdt(
+/*===============*/
+	lock_t*			lock,	/*!< in: lock */
+	const lock_prdt_t*	prdt);	/*!< in: Predicate */
+
+#if 0
+
+/*********************************************************************//**
+Checks if a predicate lock request for a new lock has to wait for
+request lock2.
+@return true if new lock has to wait for lock2 to be removed */
+UNIV_INLINE
+bool
+lock_prdt_has_to_wait(
+/*==================*/
+	const trx_t*	trx,	/*!< in: trx of new lock */
+	ulint		type_mode,/*!< in: precise mode of the new lock
+				to set: LOCK_S or LOCK_X, possibly
+				ORed to LOCK_PREDICATE or LOCK_PRDT_PAGE,
+				LOCK_INSERT_INTENTION */
+	lock_prdt_t*	prdt,	/*!< in: lock predicate to check */
+	const lock_t*	lock2);	/*!< in: another record lock; NOTE that
+				it is assumed that this has a lock bit
+				set on the same record as in the new
+				lock we are setting */
+
+/*********************************************************************//**
+Get predicate lock's minimum bounding box
+@return the minimum bounding box*/
+UNIV_INLINE
+rtr_mbr_t*
+prdt_get_mbr_from_prdt(
+/*===================*/
+	const lock_prdt_t*	prdt);	/*!< in: the lock predicate */
+
+
+#endif
+/*************************************************************//**
+Moves the locks of a record to another record and resets the lock bits of
+the donating record. */
+void
+lock_prdt_rec_move(
+/*===============*/
+	const buf_block_t*	receiver,	/*!< in: buffer block containing
+						the receiving record */
+	const buf_block_t*	donator);	/*!< in: buffer block containing
+						the donating record */
+
+/** Check whether there are R-tree Page lock on a buffer page
+@param[in]	trx	trx to test the lock
+@param[in]	space	space id for the page
+@param[in]	page_no	page number
+@return true if there is none */
+bool
+lock_test_prdt_page_lock(
+/*=====================*/
+	const trx_t*	trx,
+	ulint		space,
+	ulint		page_no);
+
+/** Removes predicate lock objects set on an index page which is discarded.
+@param[in]	block		page to be discarded
+@param[in]	lock_hash	lock hash */
+void
+lock_prdt_page_free_from_discard(
+/*=============================*/
+	const buf_block_t*	block,
+	hash_table_t*		lock_hash);
+
+#endif
diff --git a/storage/innobase/include/lock0priv.h b/storage/innobase/include/lock0priv.h
index b60dd0d92c8..0b2ab1bfcfe 100644
--- a/storage/innobase/include/lock0priv.h
+++ b/storage/innobase/include/lock0priv.h
@@ -38,7 +38,10 @@ those functions in lock/ */
 #include "dict0types.h"
 #include "hash0hash.h"
 #include "trx0types.h"
-#include "ut0lst.h"
+
+#ifndef UINT32_MAX
+#define UINT32_MAX             (4294967295U)
+#endif
 
 /** A table lock */
 struct lock_table_t {
@@ -47,18 +50,67 @@ struct lock_table_t {
 	UT_LIST_NODE_T(lock_t)
 			locks;		/*!< list of locks on the same
 					table */
+	/** Print the table lock into the given output stream
+	@param[in,out]	out	the output stream
+	@return the given output stream. */
+	std::ostream& print(std::ostream& out) const;
 };
 
+/** Print the table lock into the given output stream
+@param[in,out]	out	the output stream
+@return the given output stream. */
+inline
+std::ostream& lock_table_t::print(std::ostream& out) const
+{
+	out << "[lock_table_t: name=" << table->name << "]";
+	return(out);
+}
+
+/** The global output operator is overloaded to conveniently
+print the lock_table_t object into the given output stream.
+@param[in,out]	out	the output stream
+@param[in]	lock	the table lock
+@return the given output stream */
+inline
+std::ostream&
+operator<<(std::ostream& out, const lock_table_t& lock)
+{
+	return(lock.print(out));
+}
+
 /** Record lock for a page */
 struct lock_rec_t {
-	ulint	space;			/*!< space id */
-	ulint	page_no;		/*!< page number */
-	ulint	n_bits;			/*!< number of bits in the lock
+	ib_uint32_t	space;		/*!< space id */
+	ib_uint32_t	page_no;	/*!< page number */
+	ib_uint32_t	n_bits;		/*!< number of bits in the lock
 					bitmap; NOTE: the lock bitmap is
 					placed immediately after the
 					lock struct */
+
+	/** Print the record lock into the given output stream
+	@param[in,out]	out	the output stream
+	@return the given output stream. */
+	std::ostream& print(std::ostream& out) const;
 };
 
+/** Print the record lock into the given output stream
+@param[in,out]	out	the output stream
+@return the given output stream. */
+inline
+std::ostream& lock_rec_t::print(std::ostream& out) const
+{
+	out << "[lock_rec_t: space=" << space << ", page_no=" << page_no
+		<< ", n_bits=" << n_bits << "]";
+	return(out);
+}
+
+inline
+std::ostream&
+operator<<(std::ostream& out, const lock_rec_t& lock)
+{
+	return(lock.print(out));
+}
+
 /** Lock struct; protected by lock_sys->mutex */
 struct lock_t {
 	trx_t*		trx;		/*!< transaction owning the
@@ -66,14 +118,13 @@ struct lock_t {
 	UT_LIST_NODE_T(lock_t)
 			trx_locks;	/*!< list of the locks of the
 					transaction */
-	ulint		type_mode;	/*!< lock type, mode, LOCK_GAP or
-					LOCK_REC_NOT_GAP,
-					LOCK_INSERT_INTENTION,
-					wait flag, ORed */
-	hash_node_t	hash;		/*!< hash chain node for a record
-					lock */
+
 	dict_index_t*	index;		/*!< index for a record lock */
 
+	lock_t*		hash;		/*!< hash chain node for a record
+					lock. The link node in a singly linked
+					list, used during hashing. */
+
 	/* Statistics for how long lock has been held and time
 	how long this lock had to be waited before it was granted */
 	time_t		requested_time; /*!< Lock request time */
@@ -83,11 +134,842 @@ struct lock_t {
 		lock_table_t	tab_lock;/*!< table lock */
 		lock_rec_t	rec_lock;/*!< record lock */
 	} un_member;			/*!< lock details */
+
+	ib_uint32_t	type_mode;	/*!< lock type, mode, LOCK_GAP or
+					LOCK_REC_NOT_GAP,
+					LOCK_INSERT_INTENTION,
+					wait flag, ORed */
+
+	/** Determine if the lock object is a record lock.
+	@return true if record lock, false otherwise. */
+	bool is_record_lock() const
+	{
+		return(type() == LOCK_REC);
+	}
+
+	bool is_waiting() const
+	{
+		return(type_mode & LOCK_WAIT);
+	}
+
+	bool is_gap() const
+	{
+		return(type_mode & LOCK_GAP);
+	}
+
+	bool is_record_not_gap() const
+	{
+		return(type_mode & LOCK_REC_NOT_GAP);
+	}
+
+	bool is_insert_intention() const
+	{
+		return(type_mode & LOCK_INSERT_INTENTION);
+	}
+
+	ulint type() const {
+		return(type_mode & LOCK_TYPE_MASK);
+	}
+
+	enum lock_mode mode() const
+	{
+		return(static_cast<enum lock_mode>(type_mode & LOCK_MODE_MASK));
+	}
+
+	/** Print the lock object into the given output stream.
+	@param[in,out]	out	the output stream
+	@return the given output stream. */
+	std::ostream& print(std::ostream& out) const;
+
+	/** Convert the member 'type_mode' into a human readable string.
+	@return human readable string */
+	std::string type_mode_string() const;
+
+	const char* type_string() const
+	{
+		switch (type_mode & LOCK_TYPE_MASK) {
+		case LOCK_REC:
+			return("LOCK_REC");
+		case LOCK_TABLE:
+			return("LOCK_TABLE");
+		default:
+			ut_error;
+		}
+	}
 };
 
+/** Convert the member 'type_mode' into a human readable string.
+@return human readable string */
+inline
+std::string
+lock_t::type_mode_string() const
+{
+	std::ostringstream sout;
+	sout << type_string();
+	sout << " | " << lock_mode_string(mode());
+
+	if (is_record_not_gap()) {
+		sout << " | LOCK_REC_NOT_GAP";
+	}
+
+	if (is_waiting()) {
+		sout << " | LOCK_WAIT";
+	}
+
+	if (is_gap()) {
+		sout << " | LOCK_GAP";
+	}
+
+	if (is_insert_intention()) {
+		sout << " | LOCK_INSERT_INTENTION";
+	}
+	return(sout.str());
+}
+
+inline
+std::ostream&
+lock_t::print(std::ostream& out) const
+{
+	out << "[lock_t: type_mode=" << type_mode << "("
+		<< type_mode_string() << ")";
+
+	if (is_record_lock()) {
+		out << un_member.rec_lock;
+	} else {
+		out << un_member.tab_lock;
+	}
+
+	out << "]";
+	return(out);
+}
+
+inline
+std::ostream&
+operator<<(std::ostream& out, const lock_t& lock)
+{
+	return(lock.print(out));
+}
+
+#ifdef UNIV_DEBUG
+extern ibool	lock_print_waits;
+#endif /* UNIV_DEBUG */
+
+/** Restricts the length of search we will do in the waits-for
+graph of transactions */
+static const ulint	LOCK_MAX_N_STEPS_IN_DEADLOCK_CHECK = 1000000;
+
+/** Restricts the search depth we will do in the waits-for graph of
+transactions */
+static const ulint	LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK = 200;
+
+/** When releasing transaction locks, this specifies how often we release
+the lock mutex for a moment to give also others access to it */
+static const ulint	LOCK_RELEASE_INTERVAL = 1000;
+
+/* Safety margin when creating a new record lock: this many extra records
+can be inserted to the page without need to create a lock with a bigger
+bitmap */
+
+static const ulint	LOCK_PAGE_BITMAP_MARGIN = 64;
+
+/* An explicit record lock affects both the record and the gap before it.
+An implicit x-lock does not affect the gap, it only locks the index
+record from read or update.
+
+If a transaction has modified or inserted an index record, then
+it owns an implicit x-lock on the record. On a secondary index record,
+a transaction has an implicit x-lock also if it has modified the
+clustered index record, the max trx id of the page where the secondary
+index record resides is >= trx id of the transaction (or database recovery
+is running), and there are no explicit non-gap lock requests on the
+secondary index record.
+
+This complicated definition for a secondary index comes from the
+implementation: we want to be able to determine if a secondary index
+record has an implicit x-lock, just by looking at the present clustered
+index record, not at the historical versions of the record. The
+complicated definition can be explained to the user so that there is
+nondeterminism in the access path when a query is answered: we may,
+or may not, access the clustered index record and thus may, or may not,
+bump into an x-lock set there.
+
+Different transaction can have conflicting locks set on the gap at the
+same time. The locks on the gap are purely inhibitive: an insert cannot
+be made, or a select cursor may have to wait if a different transaction
+has a conflicting lock on the gap. An x-lock on the gap does not give
+the right to insert into the gap.
+
+An explicit lock can be placed on a user record or the supremum record of
+a page. The locks on the supremum record are always thought to be of the gap
+type, though the gap bit is not set. When we perform an update of a record
+where the size of the record changes, we may temporarily store its explicit
+locks on the infimum record of the page, though the infimum otherwise never
+carries locks.
+
+A waiting record lock can also be of the gap type. A waiting lock request
+can be granted when there is no conflicting mode lock request by another
+transaction ahead of it in the explicit lock queue.
+
+In version 4.0.5 we added yet another explicit lock type: LOCK_REC_NOT_GAP.
+It only locks the record it is placed on, not the gap before the record.
+This lock type is necessary to emulate an Oracle-like READ COMMITTED isolation
+level.
+
+-------------------------------------------------------------------------
+RULE 1: If there is an implicit x-lock on a record, and there are non-gap
+-------
+lock requests waiting in the queue, then the transaction holding the implicit
+x-lock also has an explicit non-gap record x-lock. Therefore, as locks are
+released, we can grant locks to waiting lock requests purely by looking at
+the explicit lock requests in the queue.
+
+RULE 3: Different transactions cannot have conflicting granted non-gap locks
+-------
+on a record at the same time. However, they can have conflicting granted gap
+locks.
+RULE 4: If a there is a waiting lock request in a queue, no lock request,
+-------
+gap or not, can be inserted ahead of it in the queue. In record deletes
+and page splits new gap type locks can be created by the database manager
+for a transaction, and without rule 4, the waits-for graph of transactions
+might become cyclic without the database noticing it, as the deadlock check
+is only performed when a transaction itself requests a lock!
+-------------------------------------------------------------------------
+
+An insert is allowed to a gap if there are no explicit lock requests by
+other transactions on the next record. It does not matter if these lock
+requests are granted or waiting, gap bit set or not, with the exception
+that a gap type request set by another transaction to wait for
+its turn to do an insert is ignored. On the other hand, an
+implicit x-lock by another transaction does not prevent an insert, which
+allows for more concurrency when using an Oracle-style sequence number
+generator for the primary key with many transactions doing inserts
+concurrently.
+
+A modify of a record is allowed if the transaction has an x-lock on the
+record, or if other transactions do not have any non-gap lock requests on the
+record.
+
+A read of a single user record with a cursor is allowed if the transaction
+has a non-gap explicit, or an implicit lock on the record, or if the other
+transactions have no x-lock requests on the record. At a page supremum a
+read is always allowed.
+
+In summary, an implicit lock is seen as a granted x-lock only on the
+record, not on the gap. An explicit lock with no gap bit set is a lock
+both on the record and the gap. If the gap bit is set, the lock is only
+on the gap. Different transaction cannot own conflicting locks on the
+record at the same time, but they may own conflicting locks on the gap.
+Granted locks on a record give an access right to the record, but gap type
+locks just inhibit operations.
+
+NOTE: Finding out if some transaction has an implicit x-lock on a secondary
+index record can be cumbersome. We may have to look at previous versions of
+the corresponding clustered index record to find out if a delete marked
+secondary index record was delete marked by an active transaction, not by
+a committed one.
+
+FACT A: If a transaction has inserted a row, it can delete it any time
+without need to wait for locks.
+
+PROOF: The transaction has an implicit x-lock on every index record inserted
+for the row, and can thus modify each record without the need to wait. Q.E.D.
+
+FACT B: If a transaction has read some result set with a cursor, it can read
+it again, and retrieves the same result set, if it has not modified the
+result set in the meantime. Hence, there is no phantom problem. If the
+biggest record, in the alphabetical order, touched by the cursor is removed,
+a lock wait may occur, otherwise not.
+
+PROOF: When a read cursor proceeds, it sets an s-lock on each user record
+it passes, and a gap type s-lock on each page supremum. The cursor must
+wait until it has these locks granted. Then no other transaction can
+have a granted x-lock on any of the user records, and therefore cannot
+modify the user records. Neither can any other transaction insert into
+the gaps which were passed over by the cursor. Page splits and merges,
+and removal of obsolete versions of records do not affect this, because
+when a user record or a page supremum is removed, the next record inherits
+its locks as gap type locks, and therefore blocks inserts to the same gap.
+Also, if a page supremum is inserted, it inherits its locks from the successor
+record. When the cursor is positioned again at the start of the result set,
+the records it will touch on its course are either records it touched
+during the last pass or new inserted page supremums. It can immediately
+access all these records, and when it arrives at the biggest record, it
+notices that the result set is complete. If the biggest record was removed,
+lock wait can occur because the next record only inherits a gap type lock,
+and a wait may be needed. Q.E.D. */
+
+/* If an index record should be changed or a new inserted, we must check
+the lock on the record or the next. When a read cursor starts reading,
+we will set a record level s-lock on each record it passes, except on the
+initial record on which the cursor is positioned before we start to fetch
+records. Our index tree search has the convention that the B-tree
+cursor is positioned BEFORE the first possibly matching record in
+the search. Optimizations are possible here: if the record is searched
+on an equality condition to a unique key, we could actually set a special
+lock on the record, a lock which would not prevent any insert before
+this record. In the next key locking an x-lock set on a record also
+prevents inserts just before that record.
+	There are special infimum and supremum records on each page.
+A supremum record can be locked by a read cursor. This records cannot be
+updated but the lock prevents insert of a user record to the end of
+the page.
+	Next key locks will prevent the phantom problem where new rows
+could appear to SELECT result sets after the select operation has been
+performed. Prevention of phantoms ensures the serilizability of
+transactions.
+	What should we check if an insert of a new record is wanted?
+Only the lock on the next record on the same page, because also the
+supremum record can carry a lock. An s-lock prevents insertion, but
+what about an x-lock? If it was set by a searched update, then there
+is implicitly an s-lock, too, and the insert should be prevented.
+What if our transaction owns an x-lock to the next record, but there is
+a waiting s-lock request on the next record? If this s-lock was placed
+by a read cursor moving in the ascending order in the index, we cannot
+do the insert immediately, because when we finally commit our transaction,
+the read cursor should see also the new inserted record. So we should
+move the read cursor backward from the next record for it to pass over
+the new inserted record. This move backward may be too cumbersome to
+implement. If we in this situation just enqueue a second x-lock request
+for our transaction on the next record, then the deadlock mechanism
+notices a deadlock between our transaction and the s-lock request
+transaction. This seems to be an ok solution.
+	We could have the convention that granted explicit record locks,
+lock the corresponding records from changing, and also lock the gaps
+before them from inserting. A waiting explicit lock request locks the gap
+before from inserting. Implicit record x-locks, which we derive from the
+transaction id in the clustered index record, only lock the record itself
+from modification, not the gap before it from inserting.
+	How should we store update locks? If the search is done by a unique
+key, we could just modify the record trx id. Otherwise, we could put a record
+x-lock on the record. If the update changes ordering fields of the
+clustered index record, the inserted new record needs no record lock in
+lock table, the trx id is enough. The same holds for a secondary index
+record. Searched delete is similar to update.
+
+PROBLEM:
+What about waiting lock requests? If a transaction is waiting to make an
+update to a record which another modified, how does the other transaction
+know to send the end-lock-wait signal to the waiting transaction? If we have
+the convention that a transaction may wait for just one lock at a time, how
+do we preserve it if lock wait ends?
+
+PROBLEM:
+Checking the trx id label of a secondary index record. In the case of a
+modification, not an insert, is this necessary? A secondary index record
+is modified only by setting or resetting its deleted flag. A secondary index
+record contains fields to uniquely determine the corresponding clustered
+index record. A secondary index record is therefore only modified if we
+also modify the clustered index record, and the trx id checking is done
+on the clustered index record, before we come to modify the secondary index
+record. So, in the case of delete marking or unmarking a secondary index
+record, we do not have to care about trx ids, only the locks in the lock
+table must be checked. In the case of a select from a secondary index, the
+trx id is relevant, and in this case we may have to search the clustered
+index record.
+
+PROBLEM: How to update record locks when page is split or merged, or
+--------------------------------------------------------------------
+a record is deleted or updated?
+If the size of fields in a record changes, we perform the update by
+a delete followed by an insert. How can we retain the locks set or
+waiting on the record? Because a record lock is indexed in the bitmap
+by the heap number of the record, when we remove the record from the
+record list, it is possible still to keep the lock bits. If the page
+is reorganized, we could make a table of old and new heap numbers,
+and permute the bitmaps in the locks accordingly. We can add to the
+table a row telling where the updated record ended. If the update does
+not require a reorganization of the page, we can simply move the lock
+bits for the updated record to the position determined by its new heap
+number (we may have to allocate a new lock, if we run out of the bitmap
+in the old one).
+	A more complicated case is the one where the reinsertion of the
+updated record is done pessimistically, because the structure of the
+tree may change.
+
+PROBLEM: If a supremum record is removed in a page merge, or a record
+---------------------------------------------------------------------
+removed in a purge, what to do to the waiting lock requests? In a split to
+the right, we just move the lock requests to the new supremum. If a record
+is removed, we could move the waiting lock request to its inheritor, the
+next record in the index. But, the next record may already have lock
+requests on its own queue. A new deadlock check should be made then. Maybe
+it is easier just to release the waiting transactions. They can then enqueue
+new lock requests on appropriate records.
+
+PROBLEM: When a record is inserted, what locks should it inherit from the
+-------------------------------------------------------------------------
+upper neighbor? An insert of a new supremum record in a page split is
+always possible, but an insert of a new user record requires that the upper
+neighbor does not have any lock requests by other transactions, granted or
+waiting, in its lock queue. Solution: We can copy the locks as gap type
+locks, so that also the waiting locks are transformed to granted gap type
+locks on the inserted record. */
+
+/* LOCK COMPATIBILITY MATRIX
+ *    IS IX S  X  AI
+ * IS +	 +  +  -  +
+ * IX +	 +  -  -  +
+ * S  +	 -  +  -  -
+ * X  -	 -  -  -  -
+ * AI +	 +  -  -  -
+ *
+ * Note that for rows, InnoDB only acquires S or X locks.
+ * For tables, InnoDB normally acquires IS or IX locks.
+ * S or X table locks are only acquired for LOCK TABLES.
+ * Auto-increment (AI) locks are needed because of
+ * statement-level MySQL binlog.
+ * See also lock_mode_compatible().
+ */
+static const byte lock_compatibility_matrix[5][5] = {
+ /**         IS     IX       S     X       AI */
+ /* IS */ {  TRUE,  TRUE,  TRUE,  FALSE,  TRUE},
+ /* IX */ {  TRUE,  TRUE,  FALSE, FALSE,  TRUE},
+ /* S  */ {  TRUE,  FALSE, TRUE,  FALSE,  FALSE},
+ /* X  */ {  FALSE, FALSE, FALSE, FALSE,  FALSE},
+ /* AI */ {  TRUE,  TRUE,  FALSE, FALSE,  FALSE}
+};
+
+/* STRONGER-OR-EQUAL RELATION (mode1=row, mode2=column)
+ *    IS IX S  X  AI
+ * IS +  -  -  -  -
+ * IX +  +  -  -  -
+ * S  +  -  +  -  -
+ * X  +  +  +  +  +
+ * AI -  -  -  -  +
+ * See lock_mode_stronger_or_eq().
+ */
+static const byte lock_strength_matrix[5][5] = {
+ /**         IS     IX       S     X       AI */
+ /* IS */ {  TRUE,  FALSE, FALSE,  FALSE, FALSE},
+ /* IX */ {  TRUE,  TRUE,  FALSE, FALSE,  FALSE},
+ /* S  */ {  TRUE,  FALSE, TRUE,  FALSE,  FALSE},
+ /* X  */ {  TRUE,  TRUE,  TRUE,  TRUE,   TRUE},
+ /* AI */ {  FALSE, FALSE, FALSE, FALSE,  TRUE}
+};
+
+/** Maximum depth of the DFS stack. */
+static const ulint MAX_STACK_SIZE = 4096;
+
+#define PRDT_HEAPNO	PAGE_HEAP_NO_INFIMUM
+/** Record locking request status */
+enum lock_rec_req_status {
+        /** Failed to acquire a lock */
+        LOCK_REC_FAIL,
+        /** Succeeded in acquiring a lock (implicit or already acquired) */
+        LOCK_REC_SUCCESS,
+        /** Explicitly created a new lock */
+        LOCK_REC_SUCCESS_CREATED
+};
+
+/**
+Record lock ID */
+struct RecID {
+
+	RecID(ulint space_id, ulint page_no, ulint heap_no)
+		:
+		m_space_id(static_cast<uint32_t>(space_id)),
+		m_page_no(static_cast<uint32_t>(page_no)),
+		m_heap_no(static_cast<uint32_t>(heap_no)),
+		m_fold(lock_rec_fold(m_space_id, m_page_no))
+	{
+		ut_ad(space_id < UINT32_MAX);
+		ut_ad(page_no < UINT32_MAX);
+		ut_ad(heap_no < UINT32_MAX);
+	}
+
+	RecID(const buf_block_t* block, ulint heap_no)
+		:
+		m_space_id(block->page.id.space()),
+		m_page_no(block->page.id.page_no()),
+		m_heap_no(static_cast<uint32_t>(heap_no)),
+		m_fold(lock_rec_fold(m_space_id, m_page_no))
+	{
+		ut_ad(heap_no < UINT32_MAX);
+	}
+
+	/**
+	@return the "folded" value of {space, page_no} */
+	ulint fold() const
+	{
+		return(m_fold);
+	}
+
+	/**
+	Tablespace ID */
+	uint32_t		m_space_id;
+
+	/**
+	Page number within the space ID */
+	uint32_t		m_page_no;
+
+	/**
+	Heap number within the page */
+	uint32_t		m_heap_no;
+
+	/**
+	Hashed key value */
+	ulint			m_fold;
+};
+
+/**
+Create record locks */
+class RecLock {
+public:
+
+	/**
+	@param[in,out] thr	Transaction query thread requesting the record
+				lock
+	@param[in] index	Index on which record lock requested
+	@param[in] rec_id	Record lock tuple {space, page_no, heap_no}
+	@param[in] mode		The lock mode */
+	RecLock(que_thr_t*	thr,
+		dict_index_t*	index,
+		const RecID&	rec_id,
+		ulint		mode)
+		:
+		m_thr(thr),
+		m_trx(thr_get_trx(thr)),
+		m_mode(mode),
+		m_index(index),
+		m_rec_id(rec_id)
+	{
+		ut_ad(is_predicate_lock(m_mode));
+
+		init(NULL);
+	}
+
+	/**
+	@param[in,out] thr	Transaction query thread requesting the record
+				lock
+	@param[in] index	Index on which record lock requested
+	@param[in] block	Buffer page containing record
+	@param[in] heap_no	Heap number within the block
+	@param[in] mode		The lock mode
+	@param[in] prdt		The predicate for the rtree lock */
+	RecLock(que_thr_t*	thr,
+		dict_index_t*	index,
+		const buf_block_t*
+				block,
+		ulint		heap_no,
+		ulint		mode,
+		lock_prdt_t*	prdt = NULL)
+		:
+		m_thr(thr),
+		m_trx(thr_get_trx(thr)),
+		m_mode(mode),
+		m_index(index),
+		m_rec_id(block, heap_no)
+	{
+		btr_assert_not_corrupted(block, index);
+
+		init(block->frame);
+	}
+
+	/**
+	@param[in] index	Index on which record lock requested
+	@param[in] rec_id	Record lock tuple {space, page_no, heap_no}
+	@param[in] mode		The lock mode */
+	RecLock(dict_index_t*	index,
+		const RecID&	rec_id,
+		ulint		mode)
+		:
+		m_thr(),
+		m_trx(),
+		m_mode(mode),
+		m_index(index),
+		m_rec_id(rec_id)
+	{
+		ut_ad(is_predicate_lock(m_mode));
+
+		init(NULL);
+	}
+
+	/**
+	@param[in] index	Index on which record lock requested
+	@param[in] block	Buffer page containing record
+	@param[in] heap_no	Heap number withing block
+	@param[in] mode		The lock mode */
+	RecLock(dict_index_t*	index,
+		const buf_block_t*
+				block,
+		ulint		heap_no,
+		ulint		mode)
+		:
+		m_thr(),
+		m_trx(),
+		m_mode(mode),
+		m_index(index),
+		m_rec_id(block, heap_no)
+	{
+		btr_assert_not_corrupted(block, index);
+
+		init(block->frame);
+	}
+
+	/**
+	Enqueue a lock wait for a transaction. If it is a high priority
+	transaction (cannot rollback) then jump ahead in the record lock wait
+	queue and if the transaction at the head of the queue is itself waiting
+	roll it back.
+	@param[in, out] wait_for	The lock that the the joining
+					transaction is waiting for
+	@param[in] prdt			Predicate [optional]
+	@return DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED, or
+		DB_SUCCESS_LOCKED_REC; DB_SUCCESS_LOCKED_REC means that
+		there was a deadlock, but another transaction was chosen
+		as a victim, and we got the lock immediately: no need to
+		wait then */
+	dberr_t add_to_waitq(
+		const lock_t*	wait_for,
+		const lock_prdt_t*
+				prdt = NULL);
+
+	/**
+	Create a lock for a transaction and initialise it.
+	@param[in, out] trx		Transaction requesting the new lock
+	@param[in] owns_trx_mutex	true if caller owns the trx_t::mutex
+	@param[in] add_to_hash		add the lock to hash table
+	@param[in] prdt			Predicate lock (optional)
+	@return new lock instance */
+	lock_t* create(
+		trx_t*		trx,
+		bool		owns_trx_mutex,
+		bool		add_to_hash,
+		const lock_prdt_t*
+				prdt = NULL);
+
+	lock_t* create(
+		lock_t* const	c_lock,
+		trx_t*		trx,
+		bool		owns_trx_mutex,
+		bool		add_to_hash,
+		const lock_prdt_t*
+				prdt = NULL);
+	/**
+	Check of the lock is on m_rec_id.
+	@param[in] lock			Lock to compare with
+	@return true if the record lock is on m_rec_id*/
+	bool is_on_row(const lock_t* lock) const;
+
+	/**
+	Create the lock instance
+	@param[in, out] trx	The transaction requesting the lock
+	@param[in, out] index	Index on which record lock is required
+	@param[in] mode		The lock mode desired
+	@param[in] rec_id	The record id
+	@param[in] size		Size of the lock + bitmap requested
+	@return a record lock instance */
+	static lock_t* lock_alloc(
+		trx_t*		trx,
+		dict_index_t*	index,
+		ulint		mode,
+		const RecID&	rec_id,
+		ulint		size);
+
+private:
+	/*
+	@return the record lock size in bytes */
+	size_t lock_size() const
+	{
+		return(m_size);
+	}
+
+	/**
+	Do some checks and prepare for creating a new record lock */
+	void prepare() const;
+
+	/**
+	Collect the transactions that will need to be rolled back asynchronously
+	@param[in, out] trx	Transaction to be rolled back */
+	void mark_trx_for_rollback(trx_t* trx);
+
+	/**
+	Jump the queue for the record over all low priority transactions and
+	add the lock. If all current granted locks are compatible, grant the
+	lock. Otherwise, mark all granted transaction for asynchronous
+	rollback and add to hit list.
+	@param[in, out]	lock		Lock being requested
+	@param[in]	conflict_lock	First conflicting lock from the head
+	@return true if the lock is granted */
+	bool jump_queue(lock_t* lock, const lock_t* conflict_lock);
+
+	/** Find position in lock queue and add the high priority transaction
+	lock. Intention and GAP only locks can be granted even if there are
+	waiting locks in front of the queue. To add the High priority
+	transaction in a safe position we keep the following rule.
+
+	1. If the lock can be granted, add it before the first waiting lock
+	in the queue so that all currently waiting locks need to do conflict
+	check before getting granted.
+
+	2. If the lock has to wait, add it after the last granted lock or the
+	last waiting high priority transaction in the queue whichever is later.
+	This ensures that the transaction is granted only after doing conflict
+	check with all granted transactions.
+	@param[in]      lock            Lock being requested
+	@param[in]      conflict_lock   First conflicting lock from the head
+	@param[out]     high_priority   high priority transaction ahead in queue
+	@return true if the lock can be granted */
+	bool
+	lock_add_priority(
+		lock_t*		lock,
+		const lock_t*	conflict_lock,
+		bool*		high_priority);
+
+	/** Iterate over the granted locks and prepare the hit list for ASYNC Rollback.
+	If the transaction is waiting for some other lock then wake up with deadlock error.
+	Currently we don't mark following transactions for ASYNC Rollback.
+	1. Read only transactions
+	2. Background transactions
+	3. Other High priority transactions
+	@param[in]      lock            Lock being requested
+	@param[in]      conflict_lock   First conflicting lock from the head */
+	void make_trx_hit_list(lock_t* lock, const lock_t* conflict_lock);
+
+	/**
+	Setup the requesting transaction state for lock grant
+	@param[in,out] lock	Lock for which to change state */
+	void set_wait_state(lock_t* lock);
+
+	/**
+	Add the lock to the record lock hash and the transaction's lock list
+	@param[in,out] lock	Newly created record lock to add to the
+				rec hash and the transaction lock list
+	@param[in] add_to_hash	If the lock should be added to the hash table */
+	void lock_add(lock_t* lock, bool add_to_hash);
+
+	/**
+	Check and resolve any deadlocks
+	@param[in, out] lock		The lock being acquired
+	@return DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED, or
+		DB_SUCCESS_LOCKED_REC; DB_SUCCESS_LOCKED_REC means that
+		there was a deadlock, but another transaction was chosen
+		as a victim, and we got the lock immediately: no need to
+		wait then */
+	dberr_t deadlock_check(lock_t* lock);
+
+	/**
+	Check the outcome of the deadlock check
+	@param[in,out] victim_trx	Transaction selected for rollback
+	@param[in,out] lock		Lock being requested
+	@return DB_LOCK_WAIT, DB_DEADLOCK or DB_SUCCESS_LOCKED_REC */
+	dberr_t check_deadlock_result(const trx_t* victim_trx, lock_t* lock);
+
+	/**
+	Setup the context from the requirements */
+	void init(const page_t* page)
+	{
+		ut_ad(lock_mutex_own());
+		ut_ad(!srv_read_only_mode);
+		ut_ad(dict_index_is_clust(m_index)
+		      || !dict_index_is_online_ddl(m_index));
+		ut_ad(m_thr == NULL || m_trx == thr_get_trx(m_thr));
+
+		m_size = is_predicate_lock(m_mode)
+			  ? lock_size(m_mode) : lock_size(page);
+
+		/** If rec is the supremum record, then we reset the
+		gap and LOCK_REC_NOT_GAP bits, as all locks on the
+		supremum are automatically of the gap type */
+
+		if (m_rec_id.m_heap_no == PAGE_HEAP_NO_SUPREMUM) {
+			ut_ad(!(m_mode & LOCK_REC_NOT_GAP));
+
+			m_mode &= ~(LOCK_GAP | LOCK_REC_NOT_GAP);
+		}
+	}
+
+	/**
+	Calculate the record lock physical size required for a predicate lock.
+	@param[in] mode For predicate locks the lock mode
+	@return the size of the lock data structure required in bytes */
+	static size_t lock_size(ulint mode)
+	{
+		ut_ad(is_predicate_lock(mode));
+
+		/* The lock is always on PAGE_HEAP_NO_INFIMUM(0),
+		so we only need 1 bit (which is rounded up to 1
+		byte) for lock bit setting */
+
+		size_t	n_bytes;
+
+		if (mode & LOCK_PREDICATE) {
+			const ulint	align = UNIV_WORD_SIZE - 1;
+
+			/* We will attach the predicate structure
+			after lock. Make sure the memory is
+			aligned on 8 bytes, the mem_heap_alloc
+			will align it with MEM_SPACE_NEEDED
+			anyway. */
+
+			n_bytes = (1 + sizeof(lock_prdt_t) + align) & ~align;
+
+			/* This should hold now */
+
+			ut_ad(n_bytes == sizeof(lock_prdt_t) + UNIV_WORD_SIZE);
+
+		} else {
+			n_bytes = 1;
+		}
+
+		return(n_bytes);
+	}
+
+	/**
+	Calculate the record lock physical size required, non-predicate lock.
+	@param[in] page		For non-predicate locks the buffer page
+	@return the size of the lock data structure required in bytes */
+	static size_t lock_size(const page_t* page)
+	{
+		ulint	n_recs = page_dir_get_n_heap(page);
+
+		/* Make lock bitmap bigger by a safety margin */
+
+		return(1 + ((n_recs + LOCK_PAGE_BITMAP_MARGIN) / 8));
+	}
+
+	/**
+	@return true if the requested lock mode is for a predicate
+		or page lock */
+	static bool is_predicate_lock(ulint mode)
+	{
+		return(mode & (LOCK_PREDICATE | LOCK_PRDT_PAGE));
+	}
+
+private:
+	/** The query thread of the transaction */
+	que_thr_t*		m_thr;
+
+	/**
+	Transaction requesting the record lock */
+	trx_t*			m_trx;
+
+	/**
+	Lock mode requested */
+	ulint			m_mode;
+
+	/**
+	Size of the record lock in bytes */
+	size_t			m_size;
+
+	/**
+	Index on which the record lock is required */
+	dict_index_t*		m_index;
+
+	/**
+	The record lock tuple {space, page_no, heap_no} */
+	RecID			m_rec_id;
+};
+
+#ifdef UNIV_DEBUG
+/** The count of the types of locks. */
+static const ulint      lock_types = UT_ARR_SIZE(lock_compatibility_matrix);
+#endif /* UNIV_DEBUG */
+
 /*********************************************************************//**
 Gets the type of a lock.
-@return	LOCK_TABLE or LOCK_REC */
+@return LOCK_TABLE or LOCK_REC */
 UNIV_INLINE
 ulint
 lock_get_type_low(
@@ -96,8 +978,7 @@ lock_get_type_low(
 
 /*********************************************************************//**
 Gets the previous record lock set on a record.
-@return	previous lock on the same record, NULL if none exists */
-UNIV_INTERN
+@return previous lock on the same record, NULL if none exists */
 const lock_t*
 lock_rec_get_prev(
 /*==============*/
@@ -107,7 +988,6 @@ lock_rec_get_prev(
 /*********************************************************************//**
 Cancels a waiting lock request and releases possible other transactions
 waiting behind it. */
-UNIV_INTERN
 void
 lock_cancel_waiting_and_release(
 /*============================*/
@@ -116,7 +996,7 @@ lock_cancel_waiting_and_release(
 /*********************************************************************//**
 Checks if some transaction has an implicit x-lock on a record in a clustered
 index.
-@return	transaction id of the transaction which has the x-lock, or 0 */
+@return transaction id of the transaction which has the x-lock, or 0 */
 UNIV_INLINE
 trx_id_t
 lock_clust_rec_some_has_impl(
@@ -124,7 +1004,171 @@ lock_clust_rec_some_has_impl(
 	const rec_t*		rec,	/*!< in: user record */
 	const dict_index_t*	index,	/*!< in: clustered index */
 	const ulint*		offsets)/*!< in: rec_get_offsets(rec, index) */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
+
+/*********************************************************************//**
+Gets the first or next record lock on a page.
+@return next lock, NULL if none exists */
+UNIV_INLINE
+const lock_t*
+lock_rec_get_next_on_page_const(
+/*============================*/
+	const lock_t*	lock);	/*!< in: a record lock */
+
+/*********************************************************************//**
+Gets the nth bit of a record lock.
+@return TRUE if bit set also if i == ULINT_UNDEFINED return FALSE*/
+UNIV_INLINE
+ibool
+lock_rec_get_nth_bit(
+/*=================*/
+	const lock_t*	lock,	/*!< in: record lock */
+	ulint		i);	/*!< in: index of the bit */
+
+/*********************************************************************//**
+Gets the number of bits in a record lock bitmap.
+@return number of bits */
+UNIV_INLINE
+ulint
+lock_rec_get_n_bits(
+/*================*/
+	const lock_t*	lock);	/*!< in: record lock */
+
+/**********************************************************************//**
+Sets the nth bit of a record lock to TRUE. */
+UNIV_INLINE
+void
+lock_rec_set_nth_bit(
+/*=================*/
+	lock_t*	lock,	/*!< in: record lock */
+	ulint	i);	/*!< in: index of the bit */
+
+/*********************************************************************//**
+Gets the first or next record lock on a page.
+@return next lock, NULL if none exists */
+UNIV_INLINE
+lock_t*
+lock_rec_get_next_on_page(
+/*======================*/
+	lock_t*		lock);		/*!< in: a record lock */
+/*********************************************************************//**
+Gets the first record lock on a page, where the page is identified by its
+file address.
+@return first lock, NULL if none exists */
+UNIV_INLINE
+lock_t*
+lock_rec_get_first_on_page_addr(
+/*============================*/
+	hash_table_t*   lock_hash,	/* Lock hash table */
+	ulint           space,		/*!< in: space */
+	ulint           page_no);	/*!< in: page number */
+
+/*********************************************************************//**
+Gets the first record lock on a page, where the page is identified by a
+pointer to it.
+@return first lock, NULL if none exists */
+UNIV_INLINE
+lock_t*
+lock_rec_get_first_on_page(
+/*=======================*/
+	hash_table_t*		lock_hash,	/*!< in: lock hash table */
+	const buf_block_t*	block);		/*!< in: buffer block */
+
+
+/*********************************************************************//**
+Gets the next explicit lock request on a record.
+@return next lock, NULL if none exists or if heap_no == ULINT_UNDEFINED */
+UNIV_INLINE
+lock_t*
+lock_rec_get_next(
+/*==============*/
+	ulint	heap_no,/*!< in: heap number of the record */
+	lock_t*	lock);	/*!< in: lock */
+
+/*********************************************************************//**
+Gets the next explicit lock request on a record.
+@return next lock, NULL if none exists or if heap_no == ULINT_UNDEFINED */
+UNIV_INLINE
+const lock_t*
+lock_rec_get_next_const(
+/*====================*/
+	ulint		heap_no,/*!< in: heap number of the record */
+	const lock_t*	lock);	/*!< in: lock */
+
+/*********************************************************************//**
+Gets the first explicit lock request on a record.
+@return first lock, NULL if none exists */
+UNIV_INLINE
+lock_t*
+lock_rec_get_first(
+/*===============*/
+	hash_table_t*		hash,	/*!< in: hash chain the lock on */
+	const buf_block_t*	block,	/*!< in: block containing the record */
+	ulint			heap_no);/*!< in: heap number of the record */
+
+/*********************************************************************//**
+Gets the mode of a lock.
+@return mode */
+UNIV_INLINE
+enum lock_mode
+lock_get_mode(
+/*==========*/
+	const lock_t*	lock);	/*!< in: lock */
+
+/*********************************************************************//**
+Calculates if lock mode 1 is compatible with lock mode 2.
+@return nonzero if mode1 compatible with mode2 */
+UNIV_INLINE
+ulint
+lock_mode_compatible(
+/*=================*/
+	enum lock_mode	mode1,	/*!< in: lock mode */
+	enum lock_mode	mode2);	/*!< in: lock mode */
+
+/*********************************************************************//**
+Calculates if lock mode 1 is stronger or equal to lock mode 2.
+@return nonzero if mode1 stronger or equal to mode2 */
+UNIV_INLINE
+ulint
+lock_mode_stronger_or_eq(
+/*=====================*/
+	enum lock_mode	mode1,	/*!< in: lock mode */
+	enum lock_mode	mode2);	/*!< in: lock mode */
+
+/*********************************************************************//**
+Gets the wait flag of a lock.
+@return LOCK_WAIT if waiting, 0 if not */
+UNIV_INLINE
+ulint
+lock_get_wait(
+/*==========*/
+	const lock_t*	lock);	/*!< in: lock */
+
+/*********************************************************************//**
+Looks for a suitable type record lock struct by the same trx on the same page.
+This can be used to save space when a new record lock should be set on a page:
+no new struct is needed, if a suitable old is found.
+@return lock or NULL */
+UNIV_INLINE
+lock_t*
+lock_rec_find_similar_on_page(
+/*==========================*/
+	ulint		type_mode,	/*!< in: lock type_mode field */
+	ulint		heap_no,	/*!< in: heap number of the record */
+	lock_t*		lock,		/*!< in: lock_rec_get_first_on_page() */
+	const trx_t*	trx);		/*!< in: transaction */
+
+/*********************************************************************//**
+Checks if a transaction has the specified table lock, or stronger. This
+function should only be called by the thread that owns the transaction.
+@return lock or NULL */
+UNIV_INLINE
+const lock_t*
+lock_table_has(
+/*===========*/
+	const trx_t*		trx,	/*!< in: transaction */
+	const dict_table_t*	table,	/*!< in: table */
+	enum lock_mode		mode);	/*!< in: lock mode */
 
 #ifndef UNIV_NONINL
 #include "lock0priv.ic"
diff --git a/storage/innobase/include/lock0priv.ic b/storage/innobase/include/lock0priv.ic
index 6b70dc33d3c..f6e5f7acb8f 100644
--- a/storage/innobase/include/lock0priv.ic
+++ b/storage/innobase/include/lock0priv.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2007, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -34,7 +34,7 @@ methods but they are used only in that file. */
 
 /*********************************************************************//**
 Gets the type of a lock.
-@return	LOCK_TABLE or LOCK_REC */
+@return LOCK_TABLE or LOCK_REC */
 UNIV_INLINE
 ulint
 lock_get_type_low(
@@ -49,7 +49,7 @@ lock_get_type_low(
 /*********************************************************************//**
 Checks if some transaction has an implicit x-lock on a record in a clustered
 index.
-@return	transaction id of the transaction which has the x-lock, or 0 */
+@return transaction id of the transaction which has the x-lock, or 0 */
 UNIV_INLINE
 trx_id_t
 lock_clust_rec_some_has_impl(
@@ -64,4 +64,362 @@ lock_clust_rec_some_has_impl(
 	return(row_get_rec_trx_id(rec, index, offsets));
 }
 
+/*********************************************************************//**
+Gets the number of bits in a record lock bitmap.
+@return	number of bits */
+UNIV_INLINE
+ulint
+lock_rec_get_n_bits(
+/*================*/
+	const lock_t*	lock)	/*!< in: record lock */
+{
+	return(lock->un_member.rec_lock.n_bits);
+}
+
+/**********************************************************************//**
+Sets the nth bit of a record lock to TRUE. */
+UNIV_INLINE
+void
+lock_rec_set_nth_bit(
+/*=================*/
+	lock_t*	lock,	/*!< in: record lock */
+	ulint	i)	/*!< in: index of the bit */
+{
+	ulint	byte_index;
+	ulint	bit_index;
+
+	ut_ad(lock);
+	ut_ad(lock_get_type_low(lock) == LOCK_REC);
+	ut_ad(i < lock->un_member.rec_lock.n_bits);
+
+	byte_index = i / 8;
+	bit_index = i % 8;
+
+	((byte*) &lock[1])[byte_index] |= 1 << bit_index;
+
+	++lock->trx->lock.n_rec_locks;
+}
+
+/*********************************************************************//**
+Gets the first or next record lock on a page.
+@return	next lock, NULL if none exists */
+UNIV_INLINE
+lock_t*
+lock_rec_get_next_on_page(
+/*======================*/
+	lock_t*	lock)	/*!< in: a record lock */
+{
+	return((lock_t*) lock_rec_get_next_on_page_const(lock));
+}
+
+/*********************************************************************//**
+Gets the first record lock on a page, where the page is identified by its
+file address.
+@return	first lock, NULL if none exists */
+UNIV_INLINE
+lock_t*
+lock_rec_get_first_on_page_addr(
+/*============================*/
+	hash_table_t*	lock_hash,	/* Lock hash table */
+	ulint		space,		/*!< in: space */
+	ulint		page_no)	/*!< in: page number */
+{
+	ut_ad(lock_mutex_own());
+
+	for (lock_t* lock = static_cast<lock_t*>(
+			HASH_GET_FIRST(lock_hash,
+				       lock_rec_hash(space, page_no)));
+	     lock != NULL;
+	     lock = static_cast<lock_t*>(HASH_GET_NEXT(hash, lock))) {
+
+		if (lock->un_member.rec_lock.space == space
+		    && lock->un_member.rec_lock.page_no == page_no) {
+
+			return(lock);
+		}
+	}
+
+	return(NULL);
+}
+
+/*********************************************************************//**
+Gets the first record lock on a page, where the page is identified by a
+pointer to it.
+@return	first lock, NULL if none exists */
+UNIV_INLINE
+lock_t*
+lock_rec_get_first_on_page(
+/*=======================*/
+	hash_table_t*		lock_hash,	/*!< in: lock hash table */
+	const buf_block_t*	block)		/*!< in: buffer block */
+{
+	ut_ad(lock_mutex_own());
+
+	ulint	space	= block->page.id.space();
+	ulint	page_no	= block->page.id.page_no();
+	ulint	hash = buf_block_get_lock_hash_val(block);
+
+	for (lock_t* lock = static_cast<lock_t*>(
+			HASH_GET_FIRST(lock_hash, hash));
+	     lock != NULL;
+	     lock = static_cast<lock_t*>(HASH_GET_NEXT(hash, lock))) {
+
+		if (lock->un_member.rec_lock.space == space
+		    && lock->un_member.rec_lock.page_no == page_no) {
+
+			return(lock);
+		}
+	}
+
+	return(NULL);
+}
+
+/*********************************************************************//**
+Gets the next explicit lock request on a record.
+@return	next lock, NULL if none exists or if heap_no == ULINT_UNDEFINED */
+UNIV_INLINE
+lock_t*
+lock_rec_get_next(
+/*==============*/
+	ulint	heap_no,/*!< in: heap number of the record */
+	lock_t*	lock)	/*!< in: lock */
+{
+	ut_ad(lock_mutex_own());
+
+	do {
+		ut_ad(lock_get_type_low(lock) == LOCK_REC);
+		lock = lock_rec_get_next_on_page(lock);
+	} while (lock && !lock_rec_get_nth_bit(lock, heap_no));
+
+	return(lock);
+}
+
+/*********************************************************************//**
+Gets the next explicit lock request on a record.
+@return	next lock, NULL if none exists or if heap_no == ULINT_UNDEFINED */
+UNIV_INLINE
+const lock_t*
+lock_rec_get_next_const(
+/*====================*/
+	ulint		heap_no,/*!< in: heap number of the record */
+	const lock_t*	lock)	/*!< in: lock */
+{
+	return(lock_rec_get_next(heap_no, (lock_t*) lock));
+}
+
+/*********************************************************************//**
+Gets the first explicit lock request on a record.
+@return	first lock, NULL if none exists */
+UNIV_INLINE
+lock_t*
+lock_rec_get_first(
+/*===============*/
+	hash_table_t*		hash,	/*!< in: hash chain the lock on */
+	const buf_block_t*	block,	/*!< in: block containing the record */
+	ulint			heap_no)/*!< in: heap number of the record */
+{
+	ut_ad(lock_mutex_own());
+
+	for (lock_t* lock = lock_rec_get_first_on_page(hash, block); lock;
+	     lock = lock_rec_get_next_on_page(lock)) {
+		if (lock_rec_get_nth_bit(lock, heap_no)) {
+			return(lock);
+		}
+	}
+
+	return(NULL);
+}
+
+/*********************************************************************//**
+Gets the nth bit of a record lock.
+@return TRUE if bit set also if i == ULINT_UNDEFINED return FALSE*/
+UNIV_INLINE
+ibool
+lock_rec_get_nth_bit(
+/*=================*/
+	const lock_t*	lock,	/*!< in: record lock */
+	ulint		i)	/*!< in: index of the bit */
+{
+	const byte*     b;
+
+	ut_ad(lock);
+	ut_ad(lock_get_type_low(lock) == LOCK_REC);
+
+	if (i >= lock->un_member.rec_lock.n_bits) {
+
+		return(FALSE);
+	}
+
+	b = ((const byte*) &lock[1]) + (i / 8);
+
+	return(1 & *b >> (i % 8));
+}
+
+/*********************************************************************//**
+Gets the first or next record lock on a page.
+@return next lock, NULL if none exists */
+UNIV_INLINE
+const lock_t*
+lock_rec_get_next_on_page_const(
+/*============================*/
+	const lock_t*	lock)	/*!< in: a record lock */
+{
+	ut_ad(lock_mutex_own());
+	ut_ad(lock_get_type_low(lock) == LOCK_REC);
+
+	ulint	space = lock->un_member.rec_lock.space;
+	ulint	page_no = lock->un_member.rec_lock.page_no;
+
+	while ((lock = static_cast<const lock_t*>(HASH_GET_NEXT(hash, lock)))
+	       != NULL) {
+
+		if (lock->un_member.rec_lock.space == space
+		    && lock->un_member.rec_lock.page_no == page_no) {
+
+			return(lock);
+		}
+	}
+
+	return(NULL);
+}
+
+/*********************************************************************//**
+Gets the mode of a lock.
+@return mode */
+UNIV_INLINE
+enum lock_mode
+lock_get_mode(
+/*==========*/
+	const lock_t*	lock)   /*!< in: lock */
+{
+	ut_ad(lock);
+
+	return(static_cast<enum lock_mode>(lock->type_mode & LOCK_MODE_MASK));
+}
+
+/*********************************************************************//**
+Calculates if lock mode 1 is compatible with lock mode 2.
+@return nonzero if mode1 compatible with mode2 */
+UNIV_INLINE
+ulint
+lock_mode_compatible(
+/*=================*/
+	enum lock_mode	mode1,	/*!< in: lock mode */
+	enum lock_mode	mode2)	/*!< in: lock mode */
+{
+	ut_ad((ulint) mode1 < lock_types);
+	ut_ad((ulint) mode2 < lock_types);
+
+	return(lock_compatibility_matrix[mode1][mode2]);
+}
+
+/*********************************************************************//**
+Calculates if lock mode 1 is stronger or equal to lock mode 2.
+@return nonzero if mode1 stronger or equal to mode2 */
+UNIV_INLINE
+ulint
+lock_mode_stronger_or_eq(
+/*=====================*/
+	enum lock_mode	mode1,	/*!< in: lock mode */
+	enum lock_mode	mode2)	/*!< in: lock mode */
+{
+	ut_ad((ulint) mode1 < lock_types);
+	ut_ad((ulint) mode2 < lock_types);
+
+	return(lock_strength_matrix[mode1][mode2]);
+}
+
+/*********************************************************************//**
+Gets the wait flag of a lock.
+@return LOCK_WAIT if waiting, 0 if not */
+UNIV_INLINE
+ulint
+lock_get_wait(
+/*==========*/
+	const lock_t*	lock)	/*!< in: lock */
+{
+	ut_ad(lock);
+
+	return(lock->type_mode & LOCK_WAIT);
+}
+
+/*********************************************************************//**
+Looks for a suitable type record lock struct by the same trx on the same page.
+This can be used to save space when a new record lock should be set on a page:
+no new struct is needed, if a suitable old is found.
+@return lock or NULL */
+UNIV_INLINE
+lock_t*
+lock_rec_find_similar_on_page(
+/*==========================*/
+	ulint           type_mode,      /*!< in: lock type_mode field */
+	ulint           heap_no,        /*!< in: heap number of the record */
+	lock_t*         lock,           /*!< in: lock_rec_get_first_on_page() */
+	const trx_t*    trx)            /*!< in: transaction */
+{
+	ut_ad(lock_mutex_own());
+
+	for (/* No op */;
+	     lock != NULL;
+	     lock = lock_rec_get_next_on_page(lock)) {
+
+		if (lock->trx == trx
+		    && lock->type_mode == type_mode
+		    && lock_rec_get_n_bits(lock) > heap_no) {
+
+			return(lock);
+		}
+	}
+
+	return(NULL);
+}
+
+/*********************************************************************//**
+Checks if a transaction has the specified table lock, or stronger. This
+function should only be called by the thread that owns the transaction.
+@return lock or NULL */
+UNIV_INLINE
+const lock_t*
+lock_table_has(
+/*===========*/
+	const trx_t*		trx,	/*!< in: transaction */
+	const dict_table_t*	table,	/*!< in: table */
+	lock_mode		in_mode)/*!< in: lock mode */
+{
+	if (trx->lock.table_locks.empty()) {
+		return(NULL);
+	}
+
+	typedef lock_pool_t::const_reverse_iterator iterator;
+
+	iterator	end = trx->lock.table_locks.rend();
+
+	/* Look for stronger locks the same trx already has on the table */
+
+	for (iterator it = trx->lock.table_locks.rbegin(); it != end; ++it) {
+
+		const lock_t*	lock = *it;
+
+		if (lock == NULL) {
+			continue;
+		}
+
+		lock_mode	mode = lock_get_mode(lock);
+
+		ut_ad(trx == lock->trx);
+		ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
+		ut_ad(lock->un_member.tab_lock.table != NULL);
+
+		if (table == lock->un_member.tab_lock.table
+		    && lock_mode_stronger_or_eq(mode, in_mode)) {
+
+			ut_ad(!lock_get_wait(lock));
+
+			return(lock);
+		}
+	}
+
+	return(NULL);
+}
+
 /* vim: set filetype=c: */
diff --git a/storage/innobase/include/lock0types.h b/storage/innobase/include/lock0types.h
index cf32e72f864..d08eaabfb1e 100644
--- a/storage/innobase/include/lock0types.h
+++ b/storage/innobase/include/lock0types.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -23,12 +23,16 @@ The transaction lock system global types
 Created 5/7/1996 Heikki Tuuri
 *******************************************************/
 
+#include "ut0lst.h"
+
 #ifndef lock0types_h
 #define lock0types_h
 
 #define lock_t ib_lock_t
+
 struct lock_t;
 struct lock_sys_t;
+struct lock_table_t;
 
 /* Basic lock modes */
 enum lock_mode {
@@ -43,5 +47,32 @@ enum lock_mode {
 	LOCK_NONE_UNSET = 255
 };
 
+/** Convert the given enum value into string.
+@param[in]	mode	the lock mode
+@return human readable string of the given enum value */
+inline
+const char* lock_mode_string(enum lock_mode mode)
+{
+	switch (mode) {
+	case LOCK_IS:
+		return("LOCK_IS");
+	case LOCK_IX:
+		return("LOCK_IX");
+	case LOCK_S:
+		return("LOCK_S");
+	case LOCK_X:
+		return("LOCK_X");
+	case LOCK_AUTO_INC:
+		return("LOCK_AUTO_INC");
+	case LOCK_NONE:
+		return("LOCK_NONE");
+	case LOCK_NONE_UNSET:
+		return("LOCK_NONE_UNSET");
+	default:
+		ut_error;
+	}
+}
 
-#endif
+typedef UT_LIST_BASE_NODE_T(lock_t) trx_lock_list_t;
+
+#endif /* lock0types_h */
diff --git a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h
index 3ff69278ad9..c1f25704217 100644
--- a/storage/innobase/include/log0log.h
+++ b/storage/innobase/include/log0log.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2013, Oracle and/or its affiliates. All rights reserved.
+Copyright (c) 1995, 2016, Oracle and/or its affiliates. All rights reserved.
 Copyright (c) 2009, Google Inc.
 
 Portions of this file contain modifications contributed and copyrighted by
@@ -34,49 +34,35 @@ Created 12/9/1995 Heikki Tuuri
 #define log0log_h
 
 #include "univ.i"
-#include "ut0byte.h"
-#include "ut0lst.h"
+#include "dyn0buf.h"
 #ifndef UNIV_HOTBACKUP
-#include "sync0sync.h"
 #include "sync0rw.h"
 #endif /* !UNIV_HOTBACKUP */
 #include "log0crypt.h"
-
-#define LSN_MAX			IB_UINT64_MAX
-
-#define LSN_PF			UINT64PF
+#include "log0types.h"
 
 /** Redo log buffer */
 struct log_t;
+
 /** Redo log group */
 struct log_group_t;
 
-#ifdef UNIV_DEBUG
-/** Flag: write to log file? */
-extern	ibool	log_do_write;
-/** Flag: enable debug output when writing to the log? */
-extern	ibool	log_debug_writes;
-#else /* UNIV_DEBUG */
-/** Write to log */
-# define log_do_write TRUE
-#endif /* UNIV_DEBUG */
+/** Magic value to use instead of log checksums when they are disabled */
+#define LOG_NO_CHECKSUM_MAGIC 0xDEADBEEFUL
 
-/** Wait modes for log_write_up_to @{ */
-#define LOG_NO_WAIT		91
-#define LOG_WAIT_ONE_GROUP	92
-#define	LOG_WAIT_ALL_GROUPS	93
-/* @} */
-/** Maximum number of log groups in log_group_t::checkpoint_buf */
-#define LOG_MAX_N_GROUPS	32
+typedef ulint (*log_checksum_func_t)(const byte* log_block);
+
+/** Pointer to the log checksum calculation function. Protected with
+log_sys->mutex. */
+extern log_checksum_func_t log_checksum_algorithm_ptr;
 
 /*******************************************************************//**
 Calculates where in log files we find a specified lsn.
-@return	log file number */
-UNIV_INTERN
+@return log file number */
 ulint
 log_calc_where_lsn_is(
 /*==================*/
-	ib_int64_t*	log_file_offset,	/*!< out: offset in that file
+	int64_t*	log_file_offset,	/*!< out: offset in that file
 						(including the header) */
 	ib_uint64_t	first_header_lsn,	/*!< in: first log file start
 						lsn */
@@ -84,26 +70,20 @@ log_calc_where_lsn_is(
 						determine */
 	ulint		n_log_files,		/*!< in: total number of log
 						files */
-	ib_int64_t	log_file_size);		/*!< in: log file size
+	int64_t		log_file_size);		/*!< in: log file size
 						(including the header) */
 #ifndef UNIV_HOTBACKUP
-/************************************************************//**
-Writes to the log the string given. The log must be released with
-log_release.
-@return	end lsn of the log record, zero if did not succeed */
+/** Append a string to the log.
+@param[in]	str		string
+@param[in]	len		string length
+@param[out]	start_lsn	start LSN of the log record
+@return end lsn of the log record, zero if did not succeed */
 UNIV_INLINE
 lsn_t
 log_reserve_and_write_fast(
-/*=======================*/
-	const void*	str,	/*!< in: string */
-	ulint		len,	/*!< in: string length */
-	lsn_t*		start_lsn);/*!< out: start lsn of the log record */
-/***********************************************************************//**
-Releases the log mutex. */
-UNIV_INLINE
-void
-log_release(void);
-/*=============*/
+	const void*	str,
+	ulint		len,
+	lsn_t*		start_lsn);
 /***********************************************************************//**
 Checks if there is need for a log buffer flush or a new checkpoint, and does
 this if yes. Any database operation should call this when it has modified
@@ -113,34 +93,45 @@ UNIV_INLINE
 void
 log_free_check(void);
 /*================*/
-/************************************************************//**
-Opens the log for log_write_low. The log must be closed with log_close and
-released with log_release.
-@return	start lsn of the log record */
-UNIV_INTERN
+
+/** Extends the log buffer.
+@param[in]	len	requested minimum size in bytes */
+void
+log_buffer_extend(
+	ulint	len);
+
+/** Check margin not to overwrite transaction log from the last checkpoint.
+If would estimate the log write to exceed the log_group_capacity,
+waits for the checkpoint is done enough.
+@param[in]	len	length of the data to be written */
+
+void
+log_margin_checkpoint_age(
+	ulint	len);
+
+/** Open the log for log_write_low. The log must be closed with log_close.
+@param[in]	len	length of the data to be written
+@return start lsn of the log record */
 lsn_t
 log_reserve_and_open(
-/*=================*/
-	ulint	len);	/*!< in: length of data to be catenated */
+	ulint	len);
 /************************************************************//**
 Writes to the log the string given. It is assumed that the caller holds the
 log mutex. */
-UNIV_INTERN
 void
 log_write_low(
 /*==========*/
-	byte*	str,		/*!< in: string */
-	ulint	str_len);	/*!< in: string length */
+	const byte*	str,		/*!< in: string */
+	ulint		str_len);	/*!< in: string length */
 /************************************************************//**
 Closes the log.
-@return	lsn */
-UNIV_INTERN
+@return lsn */
 lsn_t
 log_close(void);
 /*===========*/
 /************************************************************//**
 Gets the current lsn.
-@return	current lsn */
+@return current lsn */
 UNIV_INLINE
 lsn_t
 log_get_lsn(void);
@@ -162,7 +153,7 @@ log_get_flush_lsn(void);
 /****************************************************************
 Gets the log group capacity. It is OK to read the value without
 holding log_sys->mutex because it is constant.
-@return	log group capacity */
+@return log group capacity */
 UNIV_INLINE
 lsn_t
 log_get_capacity(void);
@@ -170,37 +161,31 @@ log_get_capacity(void);
 /****************************************************************
 Get log_sys::max_modified_age_async. It is OK to read the value without
 holding log_sys::mutex because it is constant.
-@return	max_modified_age_async */
+@return max_modified_age_async */
 UNIV_INLINE
 lsn_t
 log_get_max_modified_age_async(void);
 /*================================*/
 /******************************************************//**
 Initializes the log. */
-UNIV_INTERN
 void
 log_init(void);
 /*==========*/
 /******************************************************************//**
-Inits a log group to the log system. */
-UNIV_INTERN
-void
+Inits a log group to the log system.
+@return true if success, false if not */
+MY_ATTRIBUTE((warn_unused_result))
+bool
 log_group_init(
 /*===========*/
 	ulint	id,			/*!< in: group id */
 	ulint	n_files,		/*!< in: number of log files */
 	lsn_t	file_size,		/*!< in: log file size in bytes */
-	ulint	space_id,		/*!< in: space id of the file space
+	ulint	space_id);		/*!< in: space id of the file space
 					which contains the log files of this
 					group */
-	ulint	archive_space_id);	/*!< in: space id of the file space
-					which contains some archived log
-					files for this group; currently, only
-					for the first log group this is
-					used */
 /******************************************************//**
 Completes an i/o to a log file. */
-UNIV_INTERN
 void
 log_io_complete(
 /*============*/
@@ -210,156 +195,83 @@ This function is called, e.g., when a transaction wants to commit. It checks
 that the log has been written to the log file up to the last log entry written
 by the transaction. If there is a flush running, it waits and checks if the
 flush flushed enough. If not, starts a new flush. */
-UNIV_INTERN
 void
 log_write_up_to(
 /*============*/
 	lsn_t	lsn,	/*!< in: log sequence number up to which
 			the log should be written, LSN_MAX if not specified */
-	ulint	wait,	/*!< in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP,
-			or LOG_WAIT_ALL_GROUPS */
-	ibool	flush_to_disk);
-			/*!< in: TRUE if we want the written log
+	bool	flush_to_disk);
+			/*!< in: true if we want the written log
 			also to be flushed to disk */
-/****************************************************************//**
-Does a syncronous flush of the log buffer to disk. */
-UNIV_INTERN
+/** write to the log file up to the last log entry.
+@param[in]	sync	whether we want the written log
+also to be flushed to disk. */
 void
-log_buffer_flush_to_disk(void);
-/*==========================*/
+log_buffer_flush_to_disk(
+	bool sync = true);
 /****************************************************************//**
 This functions writes the log buffer to the log file and if 'flush'
 is set it forces a flush of the log file as well. This is meant to be
 called from background master thread only as it does not wait for
 the write (+ possible flush) to finish. */
-UNIV_INTERN
 void
 log_buffer_sync_in_background(
 /*==========================*/
-	ibool	flush);	/*<! in: flush the logs to disk */
-/******************************************************//**
-Makes a checkpoint. Note that this function does not flush dirty
+	bool	flush);	/*<! in: flush the logs to disk */
+/** Make a checkpoint. Note that this function does not flush dirty
 blocks from the buffer pool: it only checks what is lsn of the oldest
 modification in the pool, and writes information about the lsn in
-log files. Use log_make_checkpoint_at to flush also the pool.
-@return	TRUE if success, FALSE if a checkpoint write was already running */
-UNIV_INTERN
-ibool
+log files. Use log_make_checkpoint_at() to flush also the pool.
+@param[in]	sync		whether to wait for the write to complete
+@param[in]	write_always	force a write even if no log
+has been generated since the latest checkpoint
+@return true if success, false if a checkpoint write was already running */
+bool
 log_checkpoint(
-/*===========*/
-	ibool	sync,		/*!< in: TRUE if synchronous operation is
-				desired */
-	ibool	write_always);	/*!< in: the function normally checks if the
-				the new checkpoint would have a greater
-				lsn than the previous one: if not, then no
-				physical write is done; by setting this
-				parameter TRUE, a physical write will always be
-				made to log files */
-/****************************************************************//**
-Makes a checkpoint at a given lsn or later. */
-UNIV_INTERN
+	bool	sync,
+	bool	write_always);
+
+/** Make a checkpoint at or after a specified LSN.
+@param[in]	lsn		the log sequence number, or LSN_MAX
+for the latest LSN
+@param[in]	write_always	force a write even if no log
+has been generated since the latest checkpoint */
 void
 log_make_checkpoint_at(
-/*===================*/
-	lsn_t	lsn,		/*!< in: make a checkpoint at this or a
-				later lsn, if LSN_MAX, makes
-				a checkpoint at the latest lsn */
-	ibool	write_always);	/*!< in: the function normally checks if
-				the new checkpoint would have a
-				greater lsn than the previous one: if
-				not, then no physical write is done;
-				by setting this parameter TRUE, a
-				physical write will always be made to
-				log files */
+	lsn_t			lsn,
+	bool			write_always);
+
 /****************************************************************//**
 Makes a checkpoint at the latest lsn and writes it to first page of each
 data file in the database, so that we know that the file spaces contain
 all modifications up to that lsn. This can only be called at database
 shutdown. This function also writes all log in log files to the log archive. */
-UNIV_INTERN
 void
 logs_empty_and_mark_files_at_shutdown(void);
 /*=======================================*/
-/******************************************************//**
-Reads a checkpoint info from a log group header to log_sys->checkpoint_buf. */
-UNIV_INTERN
+/** Read a log group header page to log_sys->checkpoint_buf.
+@param[in]	group	log group
+@param[in]	header	0 or LOG_CHEKCPOINT_1 or LOG_CHECKPOINT2 */
 void
-log_group_read_checkpoint_info(
-/*===========================*/
-	log_group_t*	group,	/*!< in: log group */
-	ulint		field);	/*!< in: LOG_CHECKPOINT_1 or LOG_CHECKPOINT_2 */
-/*******************************************************************//**
-Gets info from a checkpoint about a log group. */
-UNIV_INTERN
+log_group_header_read(
+	const log_group_t*	group,
+	ulint			header);
+/** Write checkpoint info to the log header and invoke log_mutex_exit().
+@param[in]	sync	whether to wait for the write to complete */
 void
-log_checkpoint_get_nth_group_info(
-/*==============================*/
-	const byte*	buf,	/*!< in: buffer containing checkpoint info */
-	ulint		n,	/*!< in: nth slot */
-	ulint*		file_no,/*!< out: archived file number */
-	ulint*		offset);/*!< out: archived file offset */
-/******************************************************//**
-Writes checkpoint info to groups. */
-UNIV_INTERN
-void
-log_groups_write_checkpoint_info(void);
-/*==================================*/
-/********************************************************************//**
-Starts an archiving operation.
-@return	TRUE if succeed, FALSE if an archiving operation was already running */
-UNIV_INTERN
-ibool
-log_archive_do(
-/*===========*/
-	ibool	sync,	/*!< in: TRUE if synchronous operation is desired */
-	ulint*	n_bytes);/*!< out: archive log buffer size, 0 if nothing to
-			archive */
-/****************************************************************//**
-Writes the log contents to the archive up to the lsn when this function was
-called, and stops the archiving. When archiving is started again, the archived
-log file numbers start from a number one higher, so that the archiving will
-not write again to the archived log files which exist when this function
-returns.
-@return	DB_SUCCESS or DB_ERROR */
-UNIV_INTERN
-ulint
-log_archive_stop(void);
-/*==================*/
-/****************************************************************//**
-Starts again archiving which has been stopped.
-@return	DB_SUCCESS or DB_ERROR */
-UNIV_INTERN
-ulint
-log_archive_start(void);
-/*===================*/
-/****************************************************************//**
-Stop archiving the log so that a gap may occur in the archived log files.
-@return	DB_SUCCESS or DB_ERROR */
-UNIV_INTERN
-ulint
-log_archive_noarchivelog(void);
-/*==========================*/
-/****************************************************************//**
-Start archiving the log so that a gap may occur in the archived log files.
-@return	DB_SUCCESS or DB_ERROR */
-UNIV_INTERN
-ulint
-log_archive_archivelog(void);
-/*========================*/
-/******************************************************//**
-Generates an archived log file name. */
-UNIV_INTERN
-void
-log_archived_file_name_gen(
-/*=======================*/
-	char*	buf,	/*!< in: buffer where to write */
-	ulint	id,	/*!< in: group id */
-	ulint	file_no);/*!< in: file number */
+log_write_checkpoint_info(
+	bool	sync);
+
+/** Set extra data to be written to the redo log during checkpoint.
+@param[in]	buf	data to be appended on checkpoint, or NULL
+@return pointer to previous data to be appended on checkpoint */
+mtr_buf_t*
+log_append_on_checkpoint(
+	mtr_buf_t*	buf);
 #else /* !UNIV_HOTBACKUP */
 /******************************************************//**
 Writes info to a buffer of a log group when log files are created in
 backup restoration. */
-UNIV_INTERN
 void
 log_reset_first_header_and_checkpoint(
 /*==================================*/
@@ -369,49 +281,27 @@ log_reset_first_header_and_checkpoint(
 				we pretend that there is a checkpoint at
 				start + LOG_BLOCK_HDR_SIZE */
 #endif /* !UNIV_HOTBACKUP */
-/********************************************************************//**
+/**
 Checks that there is enough free space in the log to start a new query step.
 Flushes the log buffer or makes a new checkpoint if necessary. NOTE: this
 function may only be called if the calling thread owns no synchronization
 objects! */
-UNIV_INTERN
 void
 log_check_margins(void);
-/*===================*/
 #ifndef UNIV_HOTBACKUP
 /******************************************************//**
 Reads a specified log segment to a buffer. */
-UNIV_INTERN
 void
 log_group_read_log_seg(
 /*===================*/
-	ulint		type,		/*!< in: LOG_ARCHIVE or LOG_RECOVER */
 	byte*		buf,		/*!< in: buffer where to read */
 	log_group_t*	group,		/*!< in: log group */
 	lsn_t		start_lsn,	/*!< in: read area start */
 	lsn_t		end_lsn);	/*!< in: read area end */
-/******************************************************//**
-Writes a buffer to a log file group. */
-UNIV_INTERN
-void
-log_group_write_buf(
-/*================*/
-	log_group_t*	group,		/*!< in: log group */
-	byte*		buf,		/*!< in: buffer */
-	ulint		len,		/*!< in: buffer len; must be divisible
-					by OS_FILE_LOG_BLOCK_SIZE */
-	lsn_t		start_lsn,	/*!< in: start lsn of the buffer; must
-					be divisible by
-					OS_FILE_LOG_BLOCK_SIZE */
-	ulint		new_data_offset);/*!< in: start offset of new data in
-					buf: this parameter is used to decide
-					if we have to write a new log file
-					header */
 /********************************************************//**
 Sets the field values in group to correspond to a given lsn. For this function
 to work, the values must already be correctly initialized to correspond to
 some lsn, for instance, a checkpoint lsn. */
-UNIV_INTERN
 void
 log_group_set_fields(
 /*=================*/
@@ -421,8 +311,7 @@ log_group_set_fields(
 /******************************************************//**
 Calculates the data capacity of a log group, when the log file headers are not
 included.
-@return	capacity in bytes */
-UNIV_INTERN
+@return capacity in bytes */
 lsn_t
 log_group_get_capacity(
 /*===================*/
@@ -430,7 +319,7 @@ log_group_get_capacity(
 #endif /* !UNIV_HOTBACKUP */
 /************************************************************//**
 Gets a log block flush bit.
-@return	TRUE if this block was the first to be written in a log flush */
+@return TRUE if this block was the first to be written in a log flush */
 UNIV_INLINE
 ibool
 log_block_get_flush_bit(
@@ -438,7 +327,7 @@ log_block_get_flush_bit(
 	const byte*	log_block);	/*!< in: log block */
 /************************************************************//**
 Gets a log block number stored in the header.
-@return	log block number stored in the block header */
+@return log block number stored in the block header */
 UNIV_INLINE
 ulint
 log_block_get_hdr_no(
@@ -446,7 +335,7 @@ log_block_get_hdr_no(
 	const byte*	log_block);	/*!< in: log block */
 /************************************************************//**
 Gets a log block data length.
-@return	log block data length measured as a byte offset from the block start */
+@return log block data length measured as a byte offset from the block start */
 UNIV_INLINE
 ulint
 log_block_get_data_len(
@@ -462,15 +351,31 @@ log_block_set_data_len(
 	ulint	len);		/*!< in: data length */
 /************************************************************//**
 Calculates the checksum for a log block.
-@return	checksum */
+@return checksum */
 UNIV_INLINE
 ulint
 log_block_calc_checksum(
 /*====================*/
 	const byte*	block);	/*!< in: log block */
+
+/** Calculates the checksum for a log block using the CRC32 algorithm.
+@param[in]	block	log block
+@return checksum */
+UNIV_INLINE
+ulint
+log_block_calc_checksum_crc32(
+	const byte*	block);
+
+/** Calculates the checksum for a log block using the "no-op" algorithm.
+@param[in]	block	the redo log block
+@return		the calculated checksum value */
+UNIV_INLINE
+ulint
+log_block_calc_checksum_none(const byte*	block);
+
 /************************************************************//**
 Gets a log block checksum field value.
-@return	checksum */
+@return checksum */
 UNIV_INLINE
 ulint
 log_block_get_checksum(
@@ -503,7 +408,7 @@ log_block_set_first_rec_group(
 	ulint	offset);	/*!< in: offset, 0 if none */
 /************************************************************//**
 Gets a log block checkpoint number field (4 lowest bytes).
-@return	checkpoint no (4 lowest bytes) */
+@return checkpoint no (4 lowest bytes) */
 UNIV_INLINE
 ulint
 log_block_get_checkpoint_no(
@@ -517,6 +422,7 @@ log_block_init(
 /*===========*/
 	byte*	log_block,	/*!< in: pointer to the log buffer */
 	lsn_t	lsn);		/*!< in: lsn within the log block */
+#ifdef UNIV_HOTBACKUP
 /************************************************************//**
 Initializes a log block in the log buffer in the old, < 3.23.52 format, where
 there was no checksum yet. */
@@ -526,9 +432,10 @@ log_block_init_in_old_format(
 /*=========================*/
 	byte*	log_block,	/*!< in: pointer to the log buffer */
 	lsn_t	lsn);		/*!< in: lsn within the log block */
+#endif /* UNIV_HOTBACKUP */
 /************************************************************//**
 Converts a lsn to a log block number.
-@return	log block number, it is > 0 and <= 1G */
+@return log block number, it is > 0 and <= 1G */
 UNIV_INLINE
 ulint
 log_block_convert_lsn_to_no(
@@ -536,59 +443,52 @@ log_block_convert_lsn_to_no(
 	lsn_t	lsn);	/*!< in: lsn of a byte within the block */
 /******************************************************//**
 Prints info of the log. */
-UNIV_INTERN
 void
 log_print(
 /*======*/
 	FILE*	file);	/*!< in: file where to print */
 /******************************************************//**
 Peeks the current lsn.
-@return	TRUE if success, FALSE if could not get the log system mutex */
-UNIV_INTERN
+@return TRUE if success, FALSE if could not get the log system mutex */
 ibool
 log_peek_lsn(
 /*=========*/
 	lsn_t*	lsn);	/*!< out: if returns TRUE, current lsn is here */
 /**********************************************************************//**
 Refreshes the statistics used to print per-second averages. */
-UNIV_INTERN
 void
 log_refresh_stats(void);
 /*===================*/
 /********************************************************//**
 Closes all log groups. */
-UNIV_INTERN
 void
 log_group_close_all(void);
 /*=====================*/
 /********************************************************//**
 Shutdown the log system but do not release all the memory. */
-UNIV_INTERN
 void
 log_shutdown(void);
 /*==============*/
 /********************************************************//**
 Free the log system data structures. */
-UNIV_INTERN
 void
 log_mem_free(void);
 /*==============*/
 
+/** Redo log system */
 extern log_t*	log_sys;
 
+/** Whether to generate and require checksums on the redo log pages */
+extern my_bool	innodb_log_checksums;
+
 /* Values used as flags */
 #define LOG_FLUSH	7652559
 #define LOG_CHECKPOINT	78656949
-#ifdef UNIV_LOG_ARCHIVE
-# define LOG_ARCHIVE	11122331
-#endif /* UNIV_LOG_ARCHIVE */
-#define LOG_RECOVER	98887331
 
 /* The counting of lsn's starts from this value: this must be non-zero */
 #define LOG_START_LSN		((lsn_t) (16 * OS_FILE_LOG_BLOCK_SIZE))
 
 #define LOG_BUFFER_SIZE		(srv_log_buffer_size * UNIV_PAGE_SIZE)
-#define LOG_ARCHIVE_BUF_SIZE	(srv_log_buffer_size * UNIV_PAGE_SIZE / 4)
 
 /* Offsets of a log block header */
 #define	LOG_BLOCK_HDR_NO	0	/* block number which must be > 0 and
@@ -629,55 +529,42 @@ extern log_t*	log_sys;
 					.._HDR_NO */
 #define	LOG_BLOCK_TRL_SIZE	4	/* trailer size in bytes */
 
-/* Offsets for a checkpoint field */
+/* Offsets inside the checkpoint pages (redo log format version 1) */
 #define LOG_CHECKPOINT_NO		0
 #define LOG_CHECKPOINT_LSN		8
-#define LOG_CHECKPOINT_OFFSET_LOW32	16
-#define LOG_CHECKPOINT_LOG_BUF_SIZE	20
-#define	LOG_CHECKPOINT_ARCHIVED_LSN	24
-#define	LOG_CHECKPOINT_GROUP_ARRAY	32
+#define LOG_CHECKPOINT_OFFSET		16
+#define LOG_CHECKPOINT_LOG_BUF_SIZE	24
 
-/* For each value smaller than LOG_MAX_N_GROUPS the following 8 bytes: */
+/** Offsets of a log file header */
+/* @{ */
+/** Log file header format identifier (32-bit unsigned big-endian integer).
+This used to be called LOG_GROUP_ID and always written as 0,
+because InnoDB never supported more than one copy of the redo log. */
+#define LOG_HEADER_FORMAT	0
+/** 4 unused (zero-initialized) bytes. In format version 0, the
+LOG_FILE_START_LSN started here, 4 bytes earlier than LOG_HEADER_START_LSN,
+which the LOG_FILE_START_LSN was renamed to. */
+#define LOG_HEADER_PAD1		4
+/** LSN of the start of data in this log file (with format version 1;
+in format version 0, it was called LOG_FILE_START_LSN and at offset 4). */
+#define LOG_HEADER_START_LSN	8
+/** A null-terminated string which will contain either the string 'ibbackup'
+and the creation time if the log file was created by mysqlbackup --restore,
+or the MySQL version that created the redo log file. */
+#define LOG_HEADER_CREATOR	16
+/** End of the log file creator field. */
+#define LOG_HEADER_CREATOR_END	(LOG_HEADER_CREATOR + 32)
+/** Contents of the LOG_HEADER_CREATOR field */
+#define LOG_HEADER_CREATOR_CURRENT	"MySQL " INNODB_VERSION_STR
 
-#define LOG_CHECKPOINT_ARCHIVED_FILE_NO	0
-#define LOG_CHECKPOINT_ARCHIVED_OFFSET	4
+/** The redo log format identifier corresponding to the current format version.
+Stored in LOG_HEADER_FORMAT. */
+#define LOG_HEADER_FORMAT_CURRENT	1
 
-#define	LOG_CHECKPOINT_ARRAY_END	(LOG_CHECKPOINT_GROUP_ARRAY\
-							+ LOG_MAX_N_GROUPS * 8)
-#define LOG_CHECKPOINT_CHECKSUM_1	LOG_CHECKPOINT_ARRAY_END
-#define LOG_CHECKPOINT_CHECKSUM_2	(4 + LOG_CHECKPOINT_ARRAY_END)
-#if 0
-#define LOG_CHECKPOINT_FSP_FREE_LIMIT	(8 + LOG_CHECKPOINT_ARRAY_END)
-					/*!< Not used (0);
-					This used to contain the
-					current fsp free limit in
-					tablespace 0, in units of one
-					megabyte.
-
-					This information might have been used
-					since mysqlbackup version 0.35 but
-					before 1.41 to decide if unused ends of
-					non-auto-extending data files
-					in space 0 can be truncated.
-
-					This information was made obsolete
-					by mysqlbackup --compress. */
-#define LOG_CHECKPOINT_FSP_MAGIC_N	(12 + LOG_CHECKPOINT_ARRAY_END)
-					/*!< Not used (0);
-					This magic number tells if the
-					checkpoint contains the above field:
-					the field was added to
-					InnoDB-3.23.50 and
-					removed from MySQL 5.6 */
-#define LOG_CHECKPOINT_FSP_MAGIC_N_VAL	1441231243
-					/*!< if LOG_CHECKPOINT_FSP_MAGIC_N
-					contains this value, then
-					LOG_CHECKPOINT_FSP_FREE_LIMIT
-					is valid */
-#endif
-#define LOG_CHECKPOINT_OFFSET_HIGH32	(16 + LOG_CHECKPOINT_ARRAY_END)
+// JAN: TODO: Shoud 32 here be LOG_HEADER_CREATOR_END ?
+// Problem: Log format 5.6 == 5.7 ?
+#define LOG_CHECKPOINT_ARRAY_END	(32 + 32 * 8)
 #define LOG_CRYPT_VER			(20 + LOG_CHECKPOINT_ARRAY_END)
-
 #define LOG_CRYPT_MAX_ENTRIES           (5)
 #define LOG_CRYPT_ENTRY_SIZE            (4 + 4 + 2 * MY_AES_BLOCK_SIZE)
 #define LOG_CRYPT_SIZE                  (1 + 1 +			\
@@ -686,35 +573,8 @@ extern log_t*	log_sys;
 
 #define LOG_CHECKPOINT_SIZE		(20 + LOG_CHECKPOINT_ARRAY_END + \
 					 LOG_CRYPT_SIZE)
+/* @} */
 
-/* Offsets of a log file header */
-#define LOG_GROUP_ID		0	/* log group number */
-#define LOG_FILE_START_LSN	4	/* lsn of the start of data in this
-					log file */
-#define LOG_FILE_NO		12	/* 4-byte archived log file number;
-					this field is only defined in an
-					archived log file */
-#define LOG_FILE_WAS_CREATED_BY_HOT_BACKUP 16
-					/* a 32-byte field which contains
-					the string 'ibbackup' and the
-					creation time if the log file was
-					created by mysqlbackup --restore;
-					when mysqld is first time started
-					on the restored database, it can
-					print helpful info for the user */
-#define	LOG_FILE_ARCH_COMPLETED	OS_FILE_LOG_BLOCK_SIZE
-					/* this 4-byte field is TRUE when
-					the writing of an archived log file
-					has been completed; this field is
-					only defined in an archived log file */
-#define LOG_FILE_END_LSN	(OS_FILE_LOG_BLOCK_SIZE + 4)
-					/* lsn where the archived log file
-					at least extends: actually the
-					archived log file may extend to a
-					later lsn, as long as it is within the
-					same log block as this lsn; this field
-					is defined only when an archived log
-					file has been completely written */
 #define LOG_CHECKPOINT_1	OS_FILE_LOG_BLOCK_SIZE
 					/* first checkpoint field in the log
 					header; we write alternately to the
@@ -726,74 +586,72 @@ extern log_t*	log_sys;
 					header */
 #define LOG_FILE_HDR_SIZE	(4 * OS_FILE_LOG_BLOCK_SIZE)
 
-#define LOG_GROUP_OK		301
-#define LOG_GROUP_CORRUPTED	302
+/** The state of a log group */
+enum log_group_state_t {
+	/** No corruption detected */
+	LOG_GROUP_OK,
+	/** Corrupted */
+	LOG_GROUP_CORRUPTED
+};
+
+typedef ib_mutex_t	LogSysMutex;
+typedef ib_mutex_t	FlushOrderMutex;
 
 /** Log group consists of a number of log files, each of the same size; a log
-group is implemented as a space in the sense of the module fil0fil. */
+group is implemented as a space in the sense of the module fil0fil.
+Currently, this is only protected by log_sys->mutex. However, in the case
+of log_write_up_to(), we will access some members only with the protection
+of log_sys->write_mutex, which should affect nothing for now. */
 struct log_group_t{
-	/* The following fields are protected by log_sys->mutex */
-	ulint		id;		/*!< log group id */
-	ulint		n_files;	/*!< number of files in the group */
-	lsn_t		file_size;	/*!< individual log file size in bytes,
-					including the log file header */
-	ulint		space_id;	/*!< file space which implements the log
-					group */
-	ulint		state;		/*!< LOG_GROUP_OK or
-					LOG_GROUP_CORRUPTED */
-	lsn_t		lsn;		/*!< lsn used to fix coordinates within
-					the log group */
-	lsn_t		lsn_offset;	/*!< the offset of the above lsn */
-	ulint		n_pending_writes;/*!< number of currently pending flush
-					writes for this log group */
-	byte**		file_header_bufs_ptr;/*!< unaligned buffers */
-	byte**		file_header_bufs;/*!< buffers for each file
-					header in the group */
-#ifdef UNIV_LOG_ARCHIVE
-	/*-----------------------------*/
-	byte**		archive_file_header_bufs_ptr;/*!< unaligned buffers */
-	byte**		archive_file_header_bufs;/*!< buffers for each file
-					header in the group */
-	ulint		archive_space_id;/*!< file space which
-					implements the log group
-					archive */
-	ulint		archived_file_no;/*!< file number corresponding to
-					log_sys->archived_lsn */
-	ulint		archived_offset;/*!< file offset corresponding to
-					log_sys->archived_lsn, 0 if we have
-					not yet written to the archive file
-					number archived_file_no */
-	ulint		next_archived_file_no;/*!< during an archive write,
-					until the write is completed, we
-					store the next value for
-					archived_file_no here: the write
-					completion function then sets the new
-					value to ..._file_no */
-	ulint		next_archived_offset; /*!< like the preceding field */
-#endif /* UNIV_LOG_ARCHIVE */
-	/*-----------------------------*/
-	lsn_t		scanned_lsn;	/*!< used only in recovery: recovery scan
-					succeeded up to this lsn in this log
-					group */
-	byte*		checkpoint_buf_ptr;/*!< unaligned checkpoint header */
-	byte*		checkpoint_buf;	/*!< checkpoint header is written from
-					this buffer to the group */
-	UT_LIST_NODE_T(log_group_t)
-			log_groups;	/*!< list of log groups */
+	/** log group identifier (always 0) */
+	ulint				id;
+	/** number of files in the group */
+	ulint				n_files;
+	/** format of the redo log: e.g., LOG_HEADER_FORMAT_CURRENT */
+	ulint				format;
+	/** individual log file size in bytes, including the header */
+	lsn_t				file_size
+	/** file space which implements the log group */;
+	ulint				space_id;
+	/** corruption status */
+	log_group_state_t		state;
+	/** lsn used to fix coordinates within the log group */
+	lsn_t				lsn;
+	/** the byte offset of the above lsn */
+	lsn_t				lsn_offset;
+	/** unaligned buffers */
+	byte**				file_header_bufs_ptr;
+	/** buffers for each file header in the group */
+	byte**				file_header_bufs;
+
+	/** used only in recovery: recovery scan succeeded up to this
+	lsn in this log group */
+	lsn_t				scanned_lsn;
+	/** unaligned checkpoint header */
+	byte*				checkpoint_buf_ptr;
+	/** buffer for writing a checkpoint header */
+	byte*				checkpoint_buf;
+	/** list of log groups */
+	UT_LIST_NODE_T(log_group_t)	log_groups;
 };
 
 /** Redo log buffer */
 struct log_t{
-	byte		pad[CACHE_LINE_SIZE];	/*!< padding to prevent other memory
+	char		pad1[CACHE_LINE_SIZE];
+					/*!< Padding to prevent other memory
 					update hotspots from residing on the
 					same memory cache line */
 	lsn_t		lsn;		/*!< log sequence number */
 	ulint		buf_free;	/*!< first free offset within the log
-					buffer */
+					buffer in use */
 #ifndef UNIV_HOTBACKUP
-	ib_mutex_t		mutex;		/*!< mutex protecting the log */
-
-	ib_mutex_t		log_flush_order_mutex;/*!< mutex to serialize access to
+	char		pad2[CACHE_LINE_SIZE];/*!< Padding */
+	LogSysMutex	mutex;		/*!< mutex protecting the log */
+	char		pad3[CACHE_LINE_SIZE]; /*!< Padding */
+	LogSysMutex	write_mutex;	/*!< mutex protecting writing to log
+					file and accessing to log_group_t */
+	char		pad4[CACHE_LINE_SIZE];/*!< Padding */
+	FlushOrderMutex	log_flush_order_mutex;/*!< mutex to serialize access to
 					the flush list when we are putting
 					dirty blocks in the list. The idea
 					behind this mutex is to be able
@@ -802,22 +660,24 @@ struct log_t{
 					insertions in the flush_list happen
 					in the LSN order. */
 #endif /* !UNIV_HOTBACKUP */
-	byte*		buf_ptr;	/* unaligned log buffer */
-	byte*		buf;		/*!< log buffer */
-	ulint		buf_size;	/*!< log buffer size in bytes */
+	byte*		buf_ptr;	/*!< unaligned log buffer, which should
+					be of double of buf_size */
+	byte*		buf;		/*!< log buffer currently in use;
+					this could point to either the first
+					half of the aligned(buf_ptr) or the
+					second half in turns, so that log
+					write/flush to disk don't block
+					concurrent mtrs which will write
+					log to this buffer */
+	bool		first_in_use;	/*!< true if buf points to the first
+					half of the aligned(buf_ptr), false
+					if the second half */
+	ulint		buf_size;	/*!< log buffer size of each in bytes */
 	ulint		max_buf_free;	/*!< recommended maximum value of
-					buf_free, after which the buffer is
-					flushed */
- #ifdef UNIV_LOG_DEBUG
-	ulint		old_buf_free;	/*!< value of buf free when log was
-					last time opened; only in the debug
-					version */
-	ib_uint64_t	old_lsn;	/*!< value of lsn when log was
-					last time opened; only in the
-					debug version */
-#endif /* UNIV_LOG_DEBUG */
-	ibool		check_flush_or_checkpoint;
-					/*!< this is set to TRUE when there may
+					buf_free for the buffer in use, after
+					which the buffer is flushed */
+	bool		check_flush_or_checkpoint;
+					/*!< this is set when there may
 					be need to flush the log buffer, or
 					preflush buffer pool pages, or make
 					a checkpoint; this MUST be TRUE when
@@ -840,65 +700,23 @@ struct log_t{
 					groups */
 	volatile bool	is_extending;	/*!< this is set to true during extend
 					the log buffer size */
-	lsn_t		written_to_some_lsn;
-					/*!< first log sequence number not yet
-					written to any log group; for this to
-					be advanced, it is enough that the
-					write i/o has been completed for any
-					one log group */
-	lsn_t		written_to_all_lsn;
-					/*!< first log sequence number not yet
-					written to some log group; for this to
-					be advanced, it is enough that the
-					write i/o has been completed for all
-					log groups.
-					Note that since InnoDB currently
-					has only one log group therefore
-					this value is redundant. Also it
-					is possible that this value
-					falls behind the
-					flushed_to_disk_lsn transiently.
-					It is appropriate to use either
-					flushed_to_disk_lsn or
-					write_lsn which are always
-					up-to-date and accurate. */
-	lsn_t		write_lsn;	/*!< end lsn for the current running
-					write */
-	ulint		write_end_offset;/*!< the data in buffer has
-					been written up to this offset
-					when the current write ends:
-					this field will then be copied
-					to buf_next_to_write */
+	lsn_t		write_lsn;	/*!< last written lsn */
 	lsn_t		current_flush_lsn;/*!< end lsn for the current running
 					write + flush operation */
 	lsn_t		flushed_to_disk_lsn;
 					/*!< how far we have written the log
 					AND flushed to disk */
-	ulint		n_pending_writes;/*!< number of currently
-					pending flushes or writes */
-	/* NOTE on the 'flush' in names of the fields below: starting from
-	4.0.14, we separate the write of the log file and the actual fsync()
-	or other method to flush it to disk. The names below should really
-	be 'flush_or_write'! */
-	os_event_t	no_flush_event;	/*!< this event is in the reset state
-					when a flush or a write is running;
-					a thread should wait for this without
+	ulint		n_pending_flushes;/*!< number of currently
+					pending flushes; incrementing is
+					protected by the log mutex;
+					may be decremented between
+					resetting and setting flush_event */
+	os_event_t	flush_event;	/*!< this event is in the reset state
+					when a flush is running; a thread
+					should wait for this without
 					owning the log mutex, but NOTE that
-					to set or reset this event, the
+					to set this event, the
 					thread MUST own the log mutex! */
-	ibool		one_flushed;	/*!< during a flush, this is
-					first FALSE and becomes TRUE
-					when one log group has been
-					written or flushed */
-	os_event_t	one_flushed_event;/*!< this event is reset when the
-					flush or write has not yet completed
-					for any log group; e.g., this means
-					that a transaction has been committed
-					when this is set; a thread should wait
-					for this without owning the log mutex,
-					but NOTE that to set or reset this
-					event, the thread MUST own the log
-					mutex! */
 	ulint		n_log_ios;	/*!< number of log i/os initiated thus
 					far */
 	ulint		n_log_ios_old;	/*!< number of log i/o's at the
@@ -940,6 +758,13 @@ struct log_t{
 					/*!< latest checkpoint lsn */
 	lsn_t		next_checkpoint_lsn;
 					/*!< next checkpoint lsn */
+	mtr_buf_t*	append_on_checkpoint;
+					/*!< extra redo log records to write
+					during a checkpoint, or NULL if none.
+					The pointer is protected by
+					log_sys->mutex, and the data must
+					remain constant as long as this
+					pointer is not NULL. */
 	ulint		n_pending_checkpoint_writes;
 					/*!< number of currently pending
 					checkpoint writes */
@@ -952,46 +777,10 @@ struct log_t{
 	byte*		checkpoint_buf;	/*!< checkpoint header is read to this
 					buffer */
 	/* @} */
-#ifdef UNIV_LOG_ARCHIVE
-	/** Fields involved in archiving @{ */
-	ulint		archiving_state;/*!< LOG_ARCH_ON, LOG_ARCH_STOPPING
-					LOG_ARCH_STOPPED, LOG_ARCH_OFF */
-	lsn_t		archived_lsn;	/*!< archiving has advanced to this
-					lsn */
-	lsn_t		max_archived_lsn_age_async;
-					/*!< recommended maximum age of
-					archived_lsn, before we start
-					asynchronous copying to the archive */
-	lsn_t		max_archived_lsn_age;
-					/*!< maximum allowed age for
-					archived_lsn */
-	lsn_t		next_archived_lsn;/*!< during an archive write,
-					until the write is completed, we
-					store the next value for
-					archived_lsn here: the write
-					completion function then sets the new
-					value to archived_lsn */
-	ulint		archiving_phase;/*!< LOG_ARCHIVE_READ or
-					LOG_ARCHIVE_WRITE */
-	ulint		n_pending_archive_ios;
-					/*!< number of currently pending reads
-					or writes in archiving */
-	rw_lock_t	archive_lock;	/*!< this latch is x-locked when an
-					archive write is running; a thread
-					should wait for this without owning
-					the log mutex */
-	ulint		archive_buf_size;/*!< size of archive_buf */
-	byte*		archive_buf;	/*!< log segment is written to the
-					archive from this buffer */
-	os_event_t	archiving_on;	/*!< if archiving has been stopped,
-					a thread can wait for this event to
-					become signaled */
-	/* @} */
-#endif /* UNIV_LOG_ARCHIVE */
 };
 
 /** Test if flush order mutex is owned. */
-#define log_flush_order_mutex_own()	\
+#define log_flush_order_mutex_own()			\
 	mutex_own(&log_sys->log_flush_order_mutex)
 
 /** Acquire the flush order mutex. */
@@ -1003,15 +792,44 @@ struct log_t{
 	mutex_exit(&log_sys->log_flush_order_mutex);	\
 } while (0)
 
-#ifdef UNIV_LOG_ARCHIVE
-/** Archiving state @{ */
-#define LOG_ARCH_ON		71
-#define LOG_ARCH_STOPPING	72
-#define LOG_ARCH_STOPPING2	73
-#define LOG_ARCH_STOPPED	74
-#define LOG_ARCH_OFF		75
-/* @} */
-#endif /* UNIV_LOG_ARCHIVE */
+/** Test if log sys mutex is owned. */
+#define log_mutex_own() mutex_own(&log_sys->mutex)
+
+/** Test if log sys write mutex is owned. */
+#define log_write_mutex_own() mutex_own(&log_sys->write_mutex)
+
+/** Acquire the log sys mutex. */
+#define log_mutex_enter() mutex_enter(&log_sys->mutex)
+
+/** Acquire the log sys write mutex. */
+#define log_write_mutex_enter() mutex_enter(&log_sys->write_mutex)
+
+/** Acquire all the log sys mutexes. */
+#define log_mutex_enter_all() do {		\
+	mutex_enter(&log_sys->write_mutex);	\
+	mutex_enter(&log_sys->mutex);		\
+} while (0)
+
+/** Release the log sys mutex. */
+#define log_mutex_exit() mutex_exit(&log_sys->mutex)
+
+/** Release the log sys write mutex.*/
+#define log_write_mutex_exit() mutex_exit(&log_sys->write_mutex)
+
+/** Release all the log sys mutexes. */
+#define log_mutex_exit_all() do {		\
+	mutex_exit(&log_sys->mutex);		\
+	mutex_exit(&log_sys->write_mutex);	\
+} while (0)
+
+/** Calculate the offset of an lsn within a log group.
+@param[in]	lsn	log sequence number
+@param[in]	group	log group
+@return offset within the log group */
+lsn_t
+log_group_calc_lsn_offset(
+	lsn_t			lsn,
+	const log_group_t*	group);
 
 extern os_event_t log_scrub_event;
 /* log scrubbing speed, in bytes/sec */
diff --git a/storage/innobase/include/log0log.ic b/storage/innobase/include/log0log.ic
index 38ed2b51a4e..a53f8770cea 100644
--- a/storage/innobase/include/log0log.ic
+++ b/storage/innobase/include/log0log.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2010, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -25,27 +25,17 @@ Created 12/9/1995 Heikki Tuuri
 
 #include "os0file.h"
 #include "mach0data.h"
-#include "mtr0mtr.h"
 #include "srv0mon.h"
+#include "srv0srv.h"
+#include "ut0crc32.h"
 
-#ifdef UNIV_LOG_DEBUG
-/******************************************************//**
-Checks by parsing that the catenated log segment for a single mtr is
-consistent. */
-UNIV_INTERN
-ibool
-log_check_log_recs(
-/*===============*/
-	const byte*	buf,		/*!< in: pointer to the start of
-					the log segment in the
-					log_sys->buf log buffer */
-	ulint		len,		/*!< in: segment length in bytes */
-	ib_uint64_t	buf_start_lsn);	/*!< in: buffer start lsn */
-#endif /* UNIV_LOG_DEBUG */
+#ifdef UNIV_LOG_LSN_DEBUG
+#include "mtr0types.h"
+#endif /* UNIV_LOG_LSN_DEBUG */
 
 /************************************************************//**
 Gets a log block flush bit.
-@return	TRUE if this block was the first to be written in a log flush */
+@return TRUE if this block was the first to be written in a log flush */
 UNIV_INLINE
 ibool
 log_block_get_flush_bit(
@@ -85,7 +75,7 @@ log_block_set_flush_bit(
 
 /************************************************************//**
 Gets a log block number stored in the header.
-@return	log block number stored in the block header */
+@return log block number stored in the block header */
 UNIV_INLINE
 ulint
 log_block_get_hdr_no(
@@ -115,7 +105,7 @@ log_block_set_hdr_no(
 
 /************************************************************//**
 Gets a log block data length.
-@return	log block data length measured as a byte offset from the block start */
+@return log block data length measured as a byte offset from the block start */
 UNIV_INLINE
 ulint
 log_block_get_data_len(
@@ -164,7 +154,7 @@ log_block_set_first_rec_group(
 
 /************************************************************//**
 Gets a log block checkpoint number field (4 lowest bytes).
-@return	checkpoint no (4 lowest bytes) */
+@return checkpoint no (4 lowest bytes) */
 UNIV_INLINE
 ulint
 log_block_get_checkpoint_no(
@@ -188,7 +178,7 @@ log_block_set_checkpoint_no(
 
 /************************************************************//**
 Converts a lsn to a log block number.
-@return	log block number, it is > 0 and <= 1G */
+@return log block number, it is > 0 and <= 1G */
 UNIV_INLINE
 ulint
 log_block_convert_lsn_to_no(
@@ -200,12 +190,23 @@ log_block_convert_lsn_to_no(
 
 /************************************************************//**
 Calculates the checksum for a log block.
-@return	checksum */
+@return checksum */
 UNIV_INLINE
 ulint
 log_block_calc_checksum(
 /*====================*/
 	const byte*	block)	/*!< in: log block */
+{
+	return(log_checksum_algorithm_ptr(block));
+}
+
+/** Calculate the checksum for a log block using the pre-5.7.9 algorithm.
+@param[in]	block	log block
+@return		checksum */
+UNIV_INLINE
+ulint
+log_block_calc_checksum_format_0(
+	const byte*	block)
 {
 	ulint	sum;
 	ulint	sh;
@@ -228,9 +229,31 @@ log_block_calc_checksum(
 	return(sum);
 }
 
+/** Calculate the checksum for a log block using the MySQL 5.7 algorithm.
+@param[in]	block	log block
+@return checksum */
+UNIV_INLINE
+ulint
+log_block_calc_checksum_crc32(
+	const byte*	block)
+{
+	return(ut_crc32(block, OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE));
+}
+
+/** Calculates the checksum for a log block using the "no-op" algorithm.
+@param[in]     block   log block
+@return        checksum */
+UNIV_INLINE
+ulint
+log_block_calc_checksum_none(
+	const byte*	block)
+{
+	return(LOG_NO_CHECKSUM_MAGIC);
+}
+
 /************************************************************//**
 Gets a log block checksum field value.
-@return	checksum */
+@return checksum */
 UNIV_INLINE
 ulint
 log_block_get_checksum(
@@ -266,8 +289,6 @@ log_block_init(
 {
 	ulint	no;
 
-	ut_ad(mutex_own(&(log_sys->mutex)));
-
 	no = log_block_convert_lsn_to_no(lsn);
 
 	log_block_set_hdr_no(log_block, no);
@@ -276,6 +297,7 @@ log_block_init(
 	log_block_set_first_rec_group(log_block, 0);
 }
 
+#ifdef UNIV_HOTBACKUP
 /************************************************************//**
 Initializes a log block in the log buffer in the old format, where there
 was no checksum yet. */
@@ -288,7 +310,7 @@ log_block_init_in_old_format(
 {
 	ulint	no;
 
-	ut_ad(mutex_own(&(log_sys->mutex)));
+	ut_ad(log_mutex_own());
 
 	no = log_block_convert_lsn_to_no(lsn);
 
@@ -298,34 +320,47 @@ log_block_init_in_old_format(
 	log_block_set_data_len(log_block, LOG_BLOCK_HDR_SIZE);
 	log_block_set_first_rec_group(log_block, 0);
 }
+#endif /* UNIV_HOTBACKUP */
 
 #ifndef UNIV_HOTBACKUP
-/************************************************************//**
-Writes to the log the string given. The log must be released with
-log_release.
-@return	end lsn of the log record, zero if did not succeed */
+/** Append a string to the log.
+@param[in]	str		string
+@param[in]	len		string length
+@param[out]	start_lsn	start LSN of the log record
+@return end lsn of the log record, zero if did not succeed */
 UNIV_INLINE
 lsn_t
 log_reserve_and_write_fast(
-/*=======================*/
-	const void*	str,	/*!< in: string */
-	ulint		len,	/*!< in: string length */
-	lsn_t*		start_lsn)/*!< out: start lsn of the log record */
+	const void*	str,
+	ulint		len,
+	lsn_t*		start_lsn)
 {
-	ulint		data_len;
-#ifdef UNIV_LOG_LSN_DEBUG
-	/* length of the LSN pseudo-record */
-	ulint		lsn_len;
-#endif /* UNIV_LOG_LSN_DEBUG */
+	ut_ad(log_mutex_own());
+	ut_ad(len > 0);
 
-	mutex_enter(&log_sys->mutex);
 #ifdef UNIV_LOG_LSN_DEBUG
-	lsn_len = 1
+	/* Append a MLOG_LSN record after mtr_commit(), except when
+	the last bytes could be a MLOG_CHECKPOINT marker. We have special
+	handling when the log consists of only a single MLOG_CHECKPOINT
+	record since the latest checkpoint, and appending the
+	MLOG_LSN would ruin that.
+
+	Note that a longer redo log record could happen to end in what
+	looks like MLOG_CHECKPOINT, and we could be omitting MLOG_LSN
+	without reason. This is OK, because writing the MLOG_LSN is
+	just a 'best effort', aimed at finding log corruption due to
+	bugs in the redo log writing logic. */
+	const ulint	lsn_len
+		= len >= SIZE_OF_MLOG_CHECKPOINT
+		&& MLOG_CHECKPOINT == static_cast<const char*>(str)[
+			len - SIZE_OF_MLOG_CHECKPOINT]
+		? 0
+		: 1
 		+ mach_get_compressed_size(log_sys->lsn >> 32)
 		+ mach_get_compressed_size(log_sys->lsn & 0xFFFFFFFFUL);
 #endif /* UNIV_LOG_LSN_DEBUG */
 
-	data_len = len
+	const ulint	data_len = len
 #ifdef UNIV_LOG_LSN_DEBUG
 		+ lsn_len
 #endif /* UNIV_LOG_LSN_DEBUG */
@@ -336,39 +371,37 @@ log_reserve_and_write_fast(
 		/* The string does not fit within the current log block
 		or the log block would become full */
 
-		mutex_exit(&log_sys->mutex);
-
 		return(0);
 	}
 
 	*start_lsn = log_sys->lsn;
 
 #ifdef UNIV_LOG_LSN_DEBUG
-	{
+	if (lsn_len) {
 		/* Write the LSN pseudo-record. */
 		byte* b = &log_sys->buf[log_sys->buf_free];
+
 		*b++ = MLOG_LSN | (MLOG_SINGLE_REC_FLAG & *(const byte*) str);
+
 		/* Write the LSN in two parts,
 		as a pseudo page number and space id. */
 		b += mach_write_compressed(b, log_sys->lsn >> 32);
 		b += mach_write_compressed(b, log_sys->lsn & 0xFFFFFFFFUL);
 		ut_a(b - lsn_len == &log_sys->buf[log_sys->buf_free]);
 
-		memcpy(b, str, len);
-		len += lsn_len;
-	}
-#else /* UNIV_LOG_LSN_DEBUG */
-	memcpy(log_sys->buf + log_sys->buf_free, str, len);
-#endif /* UNIV_LOG_LSN_DEBUG */
+		::memcpy(b, str, len);
+
+		len += lsn_len;
+	} else
+#endif /* UNIV_LOG_LSN_DEBUG */
+	memcpy(log_sys->buf + log_sys->buf_free, str, len);
+
+	log_block_set_data_len(
+                reinterpret_cast<byte*>(ut_align_down(
+                        log_sys->buf + log_sys->buf_free,
+                        OS_FILE_LOG_BLOCK_SIZE)),
+                data_len);
 
-	log_block_set_data_len((byte*) ut_align_down(log_sys->buf
-						     + log_sys->buf_free,
-						     OS_FILE_LOG_BLOCK_SIZE),
-			       data_len);
-#ifdef UNIV_LOG_DEBUG
-	log_sys->old_buf_free = log_sys->buf_free;
-	log_sys->old_lsn = log_sys->lsn;
-#endif
 	log_sys->buf_free += len;
 
 	ut_ad(log_sys->buf_free <= log_sys->buf_size);
@@ -378,27 +411,12 @@ log_reserve_and_write_fast(
 	MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE,
 		    log_sys->lsn - log_sys->last_checkpoint_lsn);
 
-#ifdef UNIV_LOG_DEBUG
-	log_check_log_recs(log_sys->buf + log_sys->old_buf_free,
-			   log_sys->buf_free - log_sys->old_buf_free,
-			   log_sys->old_lsn);
-#endif
 	return(log_sys->lsn);
 }
 
-/***********************************************************************//**
-Releases the log mutex. */
-UNIV_INLINE
-void
-log_release(void)
-/*=============*/
-{
-	mutex_exit(&(log_sys->mutex));
-}
-
 /************************************************************//**
 Gets the current lsn.
-@return	current lsn */
+@return current lsn */
 UNIV_INLINE
 lsn_t
 log_get_lsn(void)
@@ -406,11 +424,11 @@ log_get_lsn(void)
 {
 	lsn_t	lsn;
 
-	mutex_enter(&(log_sys->mutex));
+	log_mutex_enter();
 
 	lsn = log_sys->lsn;
 
-	mutex_exit(&(log_sys->mutex));
+	log_mutex_exit();
 
 	return(lsn);
 }
@@ -421,15 +439,14 @@ Gets the last lsn that is fully flushed to disk.
 UNIV_INLINE
 ib_uint64_t
 log_get_flush_lsn(void)
-/*=============*/
 {
 	ib_uint64_t	lsn;
 
-	mutex_enter(&(log_sys->mutex));
+	log_mutex_enter();
 
 	lsn = log_sys->flushed_to_disk_lsn;
 
-	mutex_exit(&(log_sys->mutex));
+	log_mutex_exit();
 
 	return(lsn);
 }
@@ -440,7 +457,7 @@ Gets the current lsn with a trylock
 UNIV_INLINE
 lsn_t
 log_get_lsn_nowait(void)
-/*=============*/
+/*====================*/
 {
 	lsn_t	lsn=0;
 
@@ -457,7 +474,7 @@ log_get_lsn_nowait(void)
 /****************************************************************
 Gets the log group capacity. It is OK to read the value without
 holding log_sys->mutex because it is constant.
-@return	log group capacity */
+@return log group capacity */
 UNIV_INLINE
 lsn_t
 log_get_capacity(void)
@@ -469,7 +486,7 @@ log_get_capacity(void)
 /****************************************************************
 Get log_sys::max_modified_age_async. It is OK to read the value without
 holding log_sys::mutex because it is constant.
-@return	max_modified_age_async */
+@return max_modified_age_async */
 UNIV_INLINE
 lsn_t
 log_get_max_modified_age_async(void)
@@ -488,10 +505,24 @@ void
 log_free_check(void)
 /*================*/
 {
+#ifdef UNIV_DEBUG
+	/* During row_log_table_apply(), this function will be called while we
+	are holding some latches. This is OK, as long as we are not holding
+	any latches on buffer blocks. */
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(sync_thread_levels_empty_except_dict());
-#endif /* UNIV_SYNC_DEBUG */
+	static const latch_level_t latches[] = {
+		SYNC_DICT,		/* dict_sys->mutex during
+					commit_try_rebuild() */
+		SYNC_DICT_OPERATION,	/* dict_operation_lock X-latch during
+					commit_try_rebuild() */
+		SYNC_INDEX_TREE		/* index->lock */
+	};
+
+	sync_allowed_latches check(
+		latches, latches + sizeof(latches)/sizeof(*latches));
+
+        ut_ad(!sync_check_iterate(check));
+#endif /* UNIV_DEBUG */
 
 	if (log_sys->check_flush_or_checkpoint) {
 
diff --git a/storage/innobase/include/log0recv.h b/storage/innobase/include/log0recv.h
index b6c977bdc74..bd7118654f3 100644
--- a/storage/innobase/include/log0recv.h
+++ b/storage/innobase/include/log0recv.h
@@ -31,15 +31,18 @@ Created 9/20/1997 Heikki Tuuri
 #include "buf0types.h"
 #include "hash0hash.h"
 #include "log0log.h"
+#include "mtr0types.h"
+#include "ut0new.h"
+
 #include <list>
+#include <vector>
 
 #ifdef UNIV_HOTBACKUP
-extern ibool	recv_replay_file_ops;
+extern bool	recv_replay_file_ops;
 
 /*******************************************************************//**
 Reads the checkpoint info needed in hot backup.
-@return	TRUE if success */
-UNIV_INTERN
+@return TRUE if success */
 ibool
 recv_read_checkpoint_info_for_backup(
 /*=================================*/
@@ -55,7 +58,6 @@ recv_read_checkpoint_info_for_backup(
 /*******************************************************************//**
 Scans the log segment and n_bytes_scanned is set to the length of valid
 log scanned. */
-UNIV_INTERN
 void
 recv_scan_log_seg_for_backup(
 /*=========================*/
@@ -73,25 +75,15 @@ recv_scan_log_seg_for_backup(
 #endif /* UNIV_HOTBACKUP */
 /*******************************************************************//**
 Returns TRUE if recovery is currently running.
-@return	recv_recovery_on */
+@return recv_recovery_on */
 UNIV_INLINE
-ibool
+bool
 recv_recovery_is_on(void);
 /*=====================*/
-#ifdef UNIV_LOG_ARCHIVE
-/*******************************************************************//**
-Returns TRUE if recovery from backup is currently running.
-@return	recv_recovery_from_backup_on */
-UNIV_INLINE
-ibool
-recv_recovery_from_backup_is_on(void);
-/*=================================*/
-#endif /* UNIV_LOG_ARCHIVE */
 /************************************************************************//**
 Applies the hashed log records to the page, if the page lsn is less than the
 lsn of a log record. This can be called when a buffer page has just been
 read in, or also for a page already in the buffer pool. */
-UNIV_INTERN
 void
 recv_recover_page_func(
 /*===================*/
@@ -106,9 +98,9 @@ recv_recover_page_func(
 Applies the hashed log records to the page, if the page lsn is less than the
 lsn of a log record. This can be called when a buffer page has just been
 read in, or also for a page already in the buffer pool.
-@param jri	in: TRUE if just read in (the i/o handler calls this for
+@param jri in: TRUE if just read in (the i/o handler calls this for
 a freshly read page)
-@param block	in/out: the buffer block
+@param block in/out: the buffer block
 */
 # define recv_recover_page(jri, block)	recv_recover_page_func(jri, block)
 #else /* !UNIV_HOTBACKUP */
@@ -116,110 +108,33 @@ a freshly read page)
 Applies the hashed log records to the page, if the page lsn is less than the
 lsn of a log record. This can be called when a buffer page has just been
 read in, or also for a page already in the buffer pool.
-@param jri	in: TRUE if just read in (the i/o handler calls this for
+@param jri in: TRUE if just read in (the i/o handler calls this for
 a freshly read page)
-@param block	in/out: the buffer block
+@param block in/out: the buffer block
 */
 # define recv_recover_page(jri, block)	recv_recover_page_func(block)
 #endif /* !UNIV_HOTBACKUP */
-/********************************************************//**
-Recovers from a checkpoint. When this function returns, the database is able
-to start processing of new user transactions, but the function
-recv_recovery_from_checkpoint_finish should be called later to complete
-the recovery and free the resources used in it.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+/** Start recovering from a redo log checkpoint.
+@see recv_recovery_from_checkpoint_finish
+@param[in]	flush_lsn	FIL_PAGE_FILE_FLUSH_LSN
+of first system tablespace page
+@return error code or DB_SUCCESS */
 dberr_t
-recv_recovery_from_checkpoint_start_func(
-/*=====================================*/
-#ifdef UNIV_LOG_ARCHIVE
-	ulint		type,		/*!< in: LOG_CHECKPOINT or
-					LOG_ARCHIVE */
-	lsn_t		limit_lsn,	/*!< in: recover up to this lsn
-					if possible */
-#endif /* UNIV_LOG_ARCHIVE */
-	lsn_t		min_flushed_lsn,/*!< in: min flushed lsn from
-					data files */
-	lsn_t		max_flushed_lsn);/*!< in: max flushed lsn from
-					 data files */
-#ifdef UNIV_LOG_ARCHIVE
-/** Wrapper for recv_recovery_from_checkpoint_start_func().
-Recovers from a checkpoint. When this function returns, the database is able
-to start processing of new user transactions, but the function
-recv_recovery_from_checkpoint_finish should be called later to complete
-the recovery and free the resources used in it.
-@param type	in: LOG_CHECKPOINT or LOG_ARCHIVE
-@param lim	in: recover up to this log sequence number if possible
-@param min	in: minimum flushed log sequence number from data files
-@param max	in: maximum flushed log sequence number from data files
-@return	error code or DB_SUCCESS */
-# define recv_recovery_from_checkpoint_start(type,lim,min,max)		\
-	recv_recovery_from_checkpoint_start_func(type,lim,min,max)
-#else /* UNIV_LOG_ARCHIVE */
-/** Wrapper for recv_recovery_from_checkpoint_start_func().
-Recovers from a checkpoint. When this function returns, the database is able
-to start processing of new user transactions, but the function
-recv_recovery_from_checkpoint_finish should be called later to complete
-the recovery and free the resources used in it.
-@param type	ignored: LOG_CHECKPOINT or LOG_ARCHIVE
-@param lim	ignored: recover up to this log sequence number if possible
-@param min	in: minimum flushed log sequence number from data files
-@param max	in: maximum flushed log sequence number from data files
-@return	error code or DB_SUCCESS */
-# define recv_recovery_from_checkpoint_start(type,lim,min,max)		\
-	recv_recovery_from_checkpoint_start_func(min,max)
-#endif /* UNIV_LOG_ARCHIVE */
-/********************************************************//**
-Completes recovery from a checkpoint. */
-UNIV_INTERN
+recv_recovery_from_checkpoint_start(
+	lsn_t	flush_lsn);
+/** Complete recovery from a checkpoint. */
 void
 recv_recovery_from_checkpoint_finish(void);
-/*======================================*/
 /********************************************************//**
 Initiates the rollback of active transactions. */
-UNIV_INTERN
 void
 recv_recovery_rollback_active(void);
 /*===============================*/
-/*******************************************************//**
-Scans log from a buffer and stores new log data to the parsing buffer.
-Parses and hashes the log records if new data found.  Unless
-UNIV_HOTBACKUP is defined, this function will apply log records
-automatically when the hash table becomes full.
-@return TRUE if limit_lsn has been reached, or not able to scan any
-more in this log group */
-UNIV_INTERN
-ibool
-recv_scan_log_recs(
-/*===============*/
-	ulint		available_memory,/*!< in: we let the hash table of recs
-					to grow to this size, at the maximum */
-	ibool		store_to_hash,	/*!< in: TRUE if the records should be
-					stored to the hash table; this is set
-					to FALSE if just debug checking is
-					needed */
-	const byte*	buf,		/*!< in: buffer containing a log
-					segment or garbage */
-	ulint		len,		/*!< in: buffer length */
-	lsn_t		start_lsn,	/*!< in: buffer start lsn */
-	lsn_t*		contiguous_lsn,	/*!< in/out: it is known that all log
-					groups contain contiguous log data up
-					to this lsn */
-	lsn_t*		group_scanned_lsn);/*!< out: scanning succeeded up to
-					this lsn */
 /******************************************************//**
 Resets the logs. The contents of log files will be lost! */
-UNIV_INTERN
 void
 recv_reset_logs(
 /*============*/
-#ifdef UNIV_LOG_ARCHIVE
-	ulint		arch_log_no,	/*!< in: next archived log file number */
-	ibool		new_logs_created,/*!< in: TRUE if resetting logs
-					is done at the log creation;
-					FALSE if it is done after
-					archive recovery */
-#endif /* UNIV_LOG_ARCHIVE */
 	lsn_t		lsn);		/*!< in: reset to this lsn
 					rounded up to be divisible by
 					OS_FILE_LOG_BLOCK_SIZE, after
@@ -228,7 +143,6 @@ recv_reset_logs(
 #ifdef UNIV_HOTBACKUP
 /******************************************************//**
 Creates new log files after a backup has been restored. */
-UNIV_INTERN
 void
 recv_reset_log_files_for_backup(
 /*============================*/
@@ -240,33 +154,33 @@ recv_reset_log_files_for_backup(
 #endif /* UNIV_HOTBACKUP */
 /********************************************************//**
 Creates the recovery system. */
-UNIV_INTERN
 void
 recv_sys_create(void);
 /*=================*/
 /**********************************************************//**
 Release recovery system mutexes. */
-UNIV_INTERN
 void
 recv_sys_close(void);
 /*================*/
 /********************************************************//**
 Frees the recovery system memory. */
-UNIV_INTERN
 void
 recv_sys_mem_free(void);
 /*===================*/
 /********************************************************//**
 Inits the recovery system for a recovery operation. */
-UNIV_INTERN
 void
 recv_sys_init(
 /*==========*/
 	ulint	available_memory);	/*!< in: available memory in bytes */
 #ifndef UNIV_HOTBACKUP
 /********************************************************//**
+Frees the recovery system. */
+void
+recv_sys_debug_free(void);
+/*=====================*/
+/********************************************************//**
 Reset the state of the recovery system variables. */
-UNIV_INTERN
 void
 recv_sys_var_init(void);
 /*===================*/
@@ -274,49 +188,25 @@ recv_sys_var_init(void);
 /*******************************************************************//**
 Empties the hash table of stored log records, applying them to appropriate
 pages. */
-UNIV_INTERN
 dberr_t
 recv_apply_hashed_log_recs(
 /*=======================*/
-	ibool	allow_ibuf);	/*!< in: if TRUE, also ibuf operations are
+	ibool	allow_ibuf)	/*!< in: if TRUE, also ibuf operations are
 				allowed during the application; if FALSE,
 				no ibuf operations are allowed, and after
 				the application all file pages are flushed to
 				disk and invalidated in buffer pool: this
 				alternative means that no new log records
 				can be generated during the application */
+	__attribute__((warn_unused_result));
+
 #ifdef UNIV_HOTBACKUP
 /*******************************************************************//**
 Applies log records in the hash table to a backup. */
-UNIV_INTERN
 void
 recv_apply_log_recs_for_backup(void);
 /*================================*/
-#endif
-#ifdef UNIV_LOG_ARCHIVE
-/********************************************************//**
-Recovers from archived log files, and also from log files, if they exist.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
-ulint
-recv_recovery_from_archive_start(
-/*=============================*/
-	lsn_t		min_flushed_lsn,/*!< in: min flushed lsn field from the
-					data files */
-	lsn_t		limit_lsn,	/*!< in: recover up to this lsn if
-					possible */
-	ulint		first_log_no);	/*!< in: number of the first archived
-					log file to use in the recovery; the
-					file will be searched from
-					INNOBASE_LOG_ARCH_DIR specified in
-					server config file */
-/********************************************************//**
-Completes recovery from archive. */
-UNIV_INTERN
-void
-recv_recovery_from_archive_finish(void);
-/*===================================*/
-#endif /* UNIV_LOG_ARCHIVE */
+#endif /* UNIV_HOTBACKUP */
 
 /** Block of log record data */
 struct recv_data_t{
@@ -328,7 +218,7 @@ struct recv_data_t{
 
 /** Stored log record struct */
 struct recv_t{
-	byte		type;	/*!< log record type */
+	mlog_id_t	type;	/*!< log record type */
 	ulint		len;	/*!< log record body length in bytes */
 	recv_data_t*	data;	/*!< chain of blocks containing the log record
 				body */
@@ -352,9 +242,11 @@ enum recv_addr_state {
 	RECV_BEING_READ,
 	/** log records are being applied on the page */
 	RECV_BEING_PROCESSED,
-	/** log records have been applied on the page, or they have
-	been discarded because the tablespace does not exist */
-	RECV_PROCESSED
+	/** log records have been applied on the page */
+	RECV_PROCESSED,
+	/** log records have been discarded because the tablespace
+	does not exist */
+	RECV_DISCARDED
 };
 
 /** Hashed page file address struct */
@@ -369,17 +261,34 @@ struct recv_addr_t{
 };
 
 struct recv_dblwr_t {
-	void add(byte* page);
-
-	byte* find_page(ulint space_id, ulint page_no);
-
-	std::list<byte *> pages; /* Pages from double write buffer */
-
-	void operator() () {
-		pages.clear();
+	/** Add a page frame to the doublewrite recovery buffer. */
+	void add(const byte* page) {
+		pages.push_back(page);
 	}
+
+	/** Find a doublewrite copy of a page.
+	@param[in]	space_id	tablespace identifier
+	@param[in]	page_no		page number
+	@return	page frame
+	@retval NULL if no page was found */
+	const byte* find_page(ulint space_id, ulint page_no);
+
+	typedef std::list<const byte*, ut_allocator<const byte*> >	list;
+
+	/** Recovered doublewrite buffer page frames */
+	list	pages;
 };
 
+/* Recovery encryption information */
+typedef	struct recv_encryption {
+	ulint		space_id;	/*!< the page number */
+	byte*		key;		/*!< encryption key */
+	byte*		iv;		/*!< encryption iv */
+} recv_encryption_t;
+
+typedef std::vector<recv_encryption_t, ut_allocator<recv_encryption_t> >
+		encryption_list_t;
+
 /** Recovery system data structure */
 struct recv_sys_t{
 #ifndef UNIV_HOTBACKUP
@@ -389,6 +298,13 @@ struct recv_sys_t{
 	ib_mutex_t		writer_mutex;/*!< mutex coordinating
 				flushing between recv_writer_thread and
 				the recovery thread. */
+	os_event_t		flush_start;/*!< event to acticate
+				page cleaner threads */
+	os_event_t		flush_end;/*!< event to signal that the page
+				cleaner has finished the request */
+	buf_flush_t		flush_type;/*!< type of the flush request.
+				BUF_FLUSH_LRU: flush end of LRU, keeping free blocks.
+				BUF_FLUSH_LIST: flush all of blocks. */
 #endif /* !UNIV_HOTBACKUP */
 	ibool		apply_log_recs;
 				/*!< this is TRUE when log rec application to
@@ -398,10 +314,6 @@ struct recv_sys_t{
 	ibool		apply_batch_on;
 				/*!< this is TRUE when a log rec application
 				batch is running */
-	lsn_t		lsn;	/*!< log sequence number */
-	ulint		last_log_buf_size;
-				/*!< size of the log buffer when the database
-				last time wrote to the log */
 	byte*		last_block;
 				/*!< possible incomplete last recovered log
 				block */
@@ -427,18 +339,17 @@ struct recv_sys_t{
 	lsn_t		recovered_lsn;
 				/*!< the log records have been parsed up to
 				this lsn */
-	lsn_t		limit_lsn;/*!< recovery should be made at most
-				up to this lsn */
-	ibool		found_corrupt_log;
-				/*!< this is set to TRUE if we during log
-				scan find a corrupt log block, or a corrupt
-				log record, or there is a log parsing
-				buffer overflow */
-#ifdef UNIV_LOG_ARCHIVE
-	log_group_t*	archive_group;
-				/*!< in archive recovery: the log group whose
-				archive is read */
-#endif /* !UNIV_LOG_ARCHIVE */
+	bool		found_corrupt_log;
+				/*!< set when finding a corrupt log
+				block or record, or there is a log
+				parsing buffer overflow */
+	bool		found_corrupt_fs;
+				/*!< set when an inconsistency with
+				the file system contents is detected
+				during log scan or apply */
+	lsn_t		mlog_checkpoint_lsn;
+				/*!< the LSN of a MLOG_CHECKPOINT
+				record, or 0 if none was parsed */
 	mem_heap_t*	heap;	/*!< memory heap of log records and file
 				addresses*/
 	hash_table_t*	addr_hash;/*!< hash table of file addresses of pages */
@@ -446,6 +357,9 @@ struct recv_sys_t{
 				addresses in the hash table */
 
 	recv_dblwr_t	dblwr;
+
+	encryption_list_t*	/*!< Encryption information list */
+			encryption_list;
 };
 
 /** The recovery system */
@@ -454,7 +368,7 @@ extern recv_sys_t*	recv_sys;
 /** TRUE when applying redo log records during crash recovery; FALSE
 otherwise.  Note that this is FALSE while a background thread is
 rolling back incomplete transactions. */
-extern ibool		recv_recovery_on;
+extern volatile bool	recv_recovery_on;
 /** If the following is TRUE, the buffer pool file pages must be invalidated
 after recovery and no ibuf operations are allowed; this becomes TRUE if
 the log record hash table becomes too full, and log records must be merged
@@ -464,25 +378,28 @@ buffer pool before the pages have been recovered to the up-to-date state.
 
 TRUE means that recovery is running and no operations on the log files
 are allowed yet: the variable name is misleading. */
-extern ibool		recv_no_ibuf_operations;
+extern bool		recv_no_ibuf_operations;
 /** TRUE when recv_init_crash_recovery() has been called. */
-extern ibool		recv_needed_recovery;
+extern bool		recv_needed_recovery;
 #ifdef UNIV_DEBUG
 /** TRUE if writing to the redo log (mtr_commit) is forbidden.
 Protected by log_sys->mutex. */
-extern ibool		recv_no_log_write;
+extern bool		recv_no_log_write;
 #endif /* UNIV_DEBUG */
 
 /** TRUE if buf_page_is_corrupted() should check if the log sequence
 number (FIL_PAGE_LSN) is in the future.  Initially FALSE, and set by
-recv_recovery_from_checkpoint_start_func(). */
-extern ibool		recv_lsn_checks_on;
+recv_recovery_from_checkpoint_start(). */
+extern bool		recv_lsn_checks_on;
 #ifdef UNIV_HOTBACKUP
 /** TRUE when the redo log is being backed up */
-extern ibool		recv_is_making_a_backup;
+extern bool		recv_is_making_a_backup;
 #endif /* UNIV_HOTBACKUP */
-/** Maximum page number encountered in the redo log */
-extern ulint		recv_max_parsed_page_no;
+
+#ifndef UNIV_HOTBACKUP
+/** Flag indicating if recv_writer thread is active. */
+extern volatile bool	recv_writer_thread_active;
+#endif /* !UNIV_HOTBACKUP */
 
 /** Size of the parsing buffer; it must accommodate RECV_SCAN_SIZE many
 times! */
@@ -500,12 +417,9 @@ extern ulint	recv_n_pool_free_frames;
 
 /******************************************************//**
 Checks the 4-byte checksum to the trailer checksum field of a log
-block.  We also accept a log block in the old format before
-InnoDB-3.23.52 where the checksum field contains the log block number.
-@return TRUE if ok, or if the log block may be in the format of InnoDB
-version predating 3.23.52 */
-ibool
-log_block_checksum_is_ok_or_old_format(
+block.  */
+bool
+log_block_checksum_is_ok(
 /*===================================*/
 	const byte*	block,	/*!< in: pointer to a log block */
 	bool            print_err); /*!< in print error ? */
diff --git a/storage/innobase/include/log0recv.ic b/storage/innobase/include/log0recv.ic
index 32c28dd03e6..d197e5e3337 100644
--- a/storage/innobase/include/log0recv.ic
+++ b/storage/innobase/include/log0recv.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2013, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -27,27 +27,12 @@ Created 9/20/1997 Heikki Tuuri
 
 /*******************************************************************//**
 Returns TRUE if recovery is currently running.
-@return	recv_recovery_on */
+@return recv_recovery_on */
 UNIV_INLINE
-ibool
-recv_recovery_is_on(void)
-/*=====================*/
+bool
+recv_recovery_is_on()
+/*=================*/
 {
 	return(recv_recovery_on);
 }
 
-#ifdef UNIV_LOG_ARCHIVE
-/** TRUE when applying redo log records from an archived log file */
-extern ibool	recv_recovery_from_backup_on;
-
-/*******************************************************************//**
-Returns TRUE if recovery from backup is currently running.
-@return	recv_recovery_from_backup_on */
-UNIV_INLINE
-ibool
-recv_recovery_from_backup_is_on(void)
-/*=================================*/
-{
-	return(recv_recovery_from_backup_on);
-}
-#endif /* UNIV_LOG_ARCHIVE */
diff --git a/storage/innobase/include/log0types.h b/storage/innobase/include/log0types.h
new file mode 100644
index 00000000000..7674c758403
--- /dev/null
+++ b/storage/innobase/include/log0types.h
@@ -0,0 +1,50 @@
+/*****************************************************************************
+
+Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/log0types.h
+Log types
+
+Created 2013-03-15 Sunny Bains
+*******************************************************/
+
+#ifndef log0types_h
+#define log0types_h
+
+#include "univ.i"
+
+/* Type used for all log sequence number storage and arithmetics */
+typedef	ib_uint64_t		lsn_t;
+
+#define LSN_MAX			IB_UINT64_MAX
+
+#define LSN_PF			UINT64PF
+
+/** The redo log manager */
+struct RedoLog;
+
+/** The recovery implementation */
+struct redo_recover_t;
+
+#endif /* log0types_h */
diff --git a/storage/innobase/include/mach0data.h b/storage/innobase/include/mach0data.h
index 9859def0adc..4d32e2e7170 100644
--- a/storage/innobase/include/mach0data.h
+++ b/storage/innobase/include/mach0data.h
@@ -30,7 +30,7 @@ Created 11/28/1995 Heikki Tuuri
 #ifndef UNIV_INNOCHECKSUM
 
 #include "univ.i"
-#include "ut0byte.h"
+#include "mtr0types.h"
 
 /* The data and all fields are always stored in a database file
 in the same format: ascii, big-endian, ... .
@@ -47,13 +47,13 @@ mach_write_to_1(
 	ulint	n);	 /*!< in: ulint integer to be stored, >= 0, < 256 */
 /********************************************************//**
 The following function is used to fetch data from one byte.
-@return	ulint integer, >= 0, < 256 */
+@return ulint integer, >= 0, < 256 */
 UNIV_INLINE
 ulint
 mach_read_from_1(
 /*=============*/
 	const byte*	b)	/*!< in: pointer to byte */
-	MY_ATTRIBUTE((nonnull, pure));
+	MY_ATTRIBUTE((warn_unused_result));
 /*******************************************************//**
 The following function is used to store data in two consecutive
 bytes. We store the most significant byte to the lower address. */
@@ -66,19 +66,19 @@ mach_write_to_2(
 /********************************************************//**
 The following function is used to fetch data from two consecutive
 bytes. The most significant byte is at the lowest address.
-@return	ulint integer, >= 0, < 64k */
+@return ulint integer, >= 0, < 64k */
 UNIV_INLINE
 ulint
 mach_read_from_2(
 /*=============*/
 	const byte*	b)	/*!< in: pointer to two bytes */
-	MY_ATTRIBUTE((nonnull, pure));
+	MY_ATTRIBUTE((warn_unused_result));
 
 /********************************************************//**
 The following function is used to convert a 16-bit data item
 to the canonical format, for fast bytewise equality test
 against memory.
-@return	16-bit integer in canonical format */
+@return 16-bit integer in canonical format */
 UNIV_INLINE
 uint16
 mach_encode_2(
@@ -89,7 +89,7 @@ mach_encode_2(
 The following function is used to convert a 16-bit data item
 from the canonical format, for fast bytewise equality test
 against memory.
-@return	integer in machine-dependent format */
+@return integer in machine-dependent format */
 UNIV_INLINE
 ulint
 mach_decode_2(
@@ -108,13 +108,13 @@ mach_write_to_3(
 /********************************************************//**
 The following function is used to fetch data from 3 consecutive
 bytes. The most significant byte is at the lowest address.
-@return	ulint integer */
+@return ulint integer */
 UNIV_INLINE
 ulint
 mach_read_from_3(
 /*=============*/
 	const byte*	b)	/*!< in: pointer to 3 bytes */
-	MY_ATTRIBUTE((nonnull, pure));
+	MY_ATTRIBUTE((warn_unused_result));
 /*******************************************************//**
 The following function is used to store data in four consecutive
 bytes. We store the most significant byte to the lowest address. */
@@ -127,16 +127,16 @@ mach_write_to_4(
 /********************************************************//**
 The following function is used to fetch data from 4 consecutive
 bytes. The most significant byte is at the lowest address.
-@return	ulint integer */
+@return ulint integer */
 UNIV_INLINE
 ulint
 mach_read_from_4(
 /*=============*/
 	const byte*	b)	/*!< in: pointer to four bytes */
-	MY_ATTRIBUTE((nonnull, pure));
+	MY_ATTRIBUTE((warn_unused_result));
 /*********************************************************//**
 Writes a ulint in a compressed form (1..5 bytes).
-@return	stored size in bytes */
+@return stored size in bytes */
 UNIV_INLINE
 ulint
 mach_write_compressed(
@@ -145,22 +145,21 @@ mach_write_compressed(
 	ulint	n);	/*!< in: ulint integer to be stored */
 /*********************************************************//**
 Returns the size of an ulint when written in the compressed form.
-@return	compressed size in bytes */
+@return compressed size in bytes */
 UNIV_INLINE
 ulint
 mach_get_compressed_size(
 /*=====================*/
 	ulint	n)	/*!< in: ulint integer to be stored */
 	MY_ATTRIBUTE((const));
-/*********************************************************//**
-Reads a ulint in a compressed form.
-@return	read integer */
+/** Read a 32-bit integer in a compressed form.
+@param[in,out]	b	pointer to memory where to read;
+advanced by the number of bytes consumed
+@return unsigned value */
 UNIV_INLINE
-ulint
-mach_read_compressed(
-/*=================*/
-	const byte*	b)	/*!< in: pointer to memory from where to read */
-	MY_ATTRIBUTE((nonnull, pure));
+ib_uint32_t
+mach_read_next_compressed(
+	const byte**	b);
 /*******************************************************//**
 The following function is used to store data in 6 consecutive
 bytes. We store the most significant byte to the lowest address. */
@@ -173,13 +172,13 @@ mach_write_to_6(
 /********************************************************//**
 The following function is used to fetch data from 6 consecutive
 bytes. The most significant byte is at the lowest address.
-@return	48-bit integer */
+@return 48-bit integer */
 UNIV_INLINE
 ib_uint64_t
 mach_read_from_6(
 /*=============*/
 	const byte*	b)	/*!< in: pointer to 6 bytes */
-	MY_ATTRIBUTE((nonnull, pure));
+	MY_ATTRIBUTE((warn_unused_result));
 /*******************************************************//**
 The following function is used to store data in 7 consecutive
 bytes. We store the most significant byte to the lowest address. */
@@ -192,13 +191,13 @@ mach_write_to_7(
 /********************************************************//**
 The following function is used to fetch data from 7 consecutive
 bytes. The most significant byte is at the lowest address.
-@return	56-bit integer */
+@return 56-bit integer */
 UNIV_INLINE
 ib_uint64_t
 mach_read_from_7(
 /*=============*/
 	const byte*	b)	/*!< in: pointer to 7 bytes */
-	MY_ATTRIBUTE((nonnull, pure));
+	MY_ATTRIBUTE((warn_unused_result));
 /*******************************************************//**
 The following function is used to store data in 8 consecutive
 bytes. We store the most significant byte to the lowest address. */
@@ -211,97 +210,77 @@ mach_write_to_8(
 /********************************************************//**
 The following function is used to fetch data from 8 consecutive
 bytes. The most significant byte is at the lowest address.
-@return	64-bit integer */
+@return 64-bit integer */
 UNIV_INLINE
 ib_uint64_t
 mach_read_from_8(
 /*=============*/
 	const byte*	b)	/*!< in: pointer to 8 bytes */
-	MY_ATTRIBUTE((nonnull, pure));
+	MY_ATTRIBUTE((warn_unused_result));
 /*********************************************************//**
 Writes a 64-bit integer in a compressed form (5..9 bytes).
-@return	size in bytes */
+@return size in bytes */
 UNIV_INLINE
 ulint
-mach_ull_write_compressed(
+mach_u64_write_compressed(
 /*======================*/
 	byte*		b,	/*!< in: pointer to memory where to store */
 	ib_uint64_t	n);	/*!< in: 64-bit integer to be stored */
-/*********************************************************//**
-Returns the size of a 64-bit integer when written in the compressed form.
-@return	compressed size in bytes */
-UNIV_INLINE
-ulint
-mach_ull_get_compressed_size(
-/*=========================*/
-	ib_uint64_t	n);	/*!< in: 64-bit integer to be stored */
-/*********************************************************//**
-Reads a 64-bit integer in a compressed form.
-@return	the value read */
+/** Read a 64-bit integer in a compressed form.
+@param[in,out]	b	pointer to memory where to read;
+advanced by the number of bytes consumed
+@return unsigned value */
 UNIV_INLINE
 ib_uint64_t
-mach_ull_read_compressed(
-/*=====================*/
-	const byte*	b)	/*!< in: pointer to memory from where to read */
-	MY_ATTRIBUTE((nonnull, pure));
+mach_u64_read_next_compressed(
+	const byte**	b);
 /*********************************************************//**
 Writes a 64-bit integer in a compressed form (1..11 bytes).
-@return	size in bytes */
+@return size in bytes */
 UNIV_INLINE
 ulint
-mach_ull_write_much_compressed(
+mach_u64_write_much_compressed(
 /*===========================*/
 	byte*		b,	/*!< in: pointer to memory where to store */
 	ib_uint64_t	n);	/*!< in: 64-bit integer to be stored */
 /*********************************************************//**
-Returns the size of a 64-bit integer when written in the compressed form.
-@return	compressed size in bytes */
-UNIV_INLINE
-ulint
-mach_ull_get_much_compressed_size(
-/*==============================*/
-	ib_uint64_t	n)	/*!< in: 64-bit integer to be stored */
-	MY_ATTRIBUTE((const));
-/*********************************************************//**
 Reads a 64-bit integer in a compressed form.
-@return	the value read */
+@return the value read */
 UNIV_INLINE
 ib_uint64_t
-mach_ull_read_much_compressed(
+mach_u64_read_much_compressed(
 /*==========================*/
 	const byte*	b)	/*!< in: pointer to memory from where to read */
-	MY_ATTRIBUTE((nonnull, pure));
-/*********************************************************//**
-Reads a ulint in a compressed form if the log record fully contains it.
-@return	pointer to end of the stored field, NULL if not complete */
-UNIV_INTERN
-byte*
+	MY_ATTRIBUTE((warn_unused_result));
+/** Read a 32-bit integer in a compressed form.
+@param[in,out]	ptr	pointer to memory where to read;
+advanced by the number of bytes consumed, or set NULL if out of space
+@param[in]	end_ptr	end of the buffer
+@return unsigned value */
+ib_uint32_t
 mach_parse_compressed(
-/*==================*/
-	byte*	ptr,	/*!< in: pointer to buffer from where to read */
-	byte*	end_ptr,/*!< in: pointer to end of the buffer */
-	ulint*	val);	/*!< out: read value */
-/*********************************************************//**
-Reads a 64-bit integer in a compressed form
-if the log record fully contains it.
-@return pointer to end of the stored field, NULL if not complete */
+	const byte**	ptr,
+	const byte*	end_ptr);
+/** Read a 64-bit integer in a compressed form.
+@param[in,out]	ptr	pointer to memory where to read;
+advanced by the number of bytes consumed, or set NULL if out of space
+@param[in]	end_ptr	end of the buffer
+@return unsigned value */
 UNIV_INLINE
-byte*
-mach_ull_parse_compressed(
-/*======================*/
-	byte*		ptr,	/*!< in: pointer to buffer from where to read */
-	byte*		end_ptr,/*!< in: pointer to end of the buffer */
-	ib_uint64_t*	val);	/*!< out: read value */
+ib_uint64_t
+mach_u64_parse_compressed(
+	const byte**	ptr,
+	const byte*	end_ptr);
 #ifndef UNIV_HOTBACKUP
 /*********************************************************//**
 Reads a double. It is stored in a little-endian format.
-@return	double read */
+@return double read */
 UNIV_INLINE
 double
 mach_double_read(
 /*=============*/
 	const byte*	b)	/*!< in: pointer to memory from where to read */
-	MY_ATTRIBUTE((nonnull, pure));
+	MY_ATTRIBUTE((warn_unused_result));
 /*********************************************************//**
 Writes a double. It is stored in a little-endian format. */
 UNIV_INLINE
@@ -312,13 +291,13 @@ mach_double_write(
 	double	d);	/*!< in: double */
 /*********************************************************//**
 Reads a float. It is stored in a little-endian format.
-@return	float read */
+@return float read */
 UNIV_INLINE
 float
 mach_float_read(
 /*============*/
 	const byte*	b)	/*!< in: pointer to memory from where to read */
-	MY_ATTRIBUTE((nonnull, pure));
+	MY_ATTRIBUTE((warn_unused_result));
 /*********************************************************//**
 Writes a float. It is stored in a little-endian format. */
 UNIV_INLINE
@@ -329,14 +308,14 @@ mach_float_write(
 	float	d);	/*!< in: float */
 /*********************************************************//**
 Reads a ulint stored in the little-endian format.
-@return	unsigned long int */
+@return unsigned long int */
 UNIV_INLINE
 ulint
 mach_read_from_n_little_endian(
 /*===========================*/
 	const byte*	buf,		/*!< in: from where to read */
 	ulint		buf_size)	/*!< in: from how many bytes to read */
-	MY_ATTRIBUTE((nonnull, pure));
+	MY_ATTRIBUTE((warn_unused_result));
 /*********************************************************//**
 Writes a ulint in the little-endian format. */
 UNIV_INLINE
@@ -348,13 +327,13 @@ mach_write_to_n_little_endian(
 	ulint	n);		/*!< in: unsigned long int to write */
 /*********************************************************//**
 Reads a ulint stored in the little-endian format.
-@return	unsigned long int */
+@return unsigned long int */
 UNIV_INLINE
 ulint
 mach_read_from_2_little_endian(
 /*===========================*/
 	const byte*	buf)		/*!< in: from where to read */
-	MY_ATTRIBUTE((nonnull, pure));
+	MY_ATTRIBUTE((warn_unused_result));
 /*********************************************************//**
 Writes a ulint in the little-endian format. */
 UNIV_INLINE
@@ -366,7 +345,7 @@ mach_write_to_2_little_endian(
 /*********************************************************//**
 Convert integral type from storage byte order (big endian) to
 host byte order.
-@return	integer value */
+@return integer value */
 UNIV_INLINE
 ib_uint64_t
 mach_read_int_type(
@@ -398,15 +377,16 @@ mach_write_ulonglong(
 	ulint		len,		/*!< in: length of dest */
 	bool		usign);		/*!< in: signed or unsigned flag */
 
-/********************************************************//**
-Reads 1 - 4 bytes from a file page buffered in the buffer pool.
-@return	value read */
+/** Read 1 to 4 bytes from a file page buffered in the buffer pool.
+@param[in]	ptr	pointer where to read
+@param[in]	type	MLOG_1BYTE, MLOG_2BYTES, or MLOG_4BYTES
+@return value read */
 UNIV_INLINE
 ulint
 mach_read_ulint(
-/*============*/
-	const byte*	ptr,	/*!< in: pointer from where to read */
-	ulint		type);	/*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
+	const byte*	ptr,
+	mlog_id_t	type)
+	MY_ATTRIBUTE((warn_unused_result));
 
 #endif /* !UNIV_HOTBACKUP */
 #endif /* !UNIV_INNOCHECKSUM */
diff --git a/storage/innobase/include/mach0data.ic b/storage/innobase/include/mach0data.ic
index fe55adaf002..ece4a37bcce 100644
--- a/storage/innobase/include/mach0data.ic
+++ b/storage/innobase/include/mach0data.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -26,7 +26,7 @@ Created 11/28/1995 Heikki Tuuri
 
 #ifndef UNIV_INNOCHECKSUM
 
-#include "ut0mem.h"
+#include "mtr0types.h"
 
 /*******************************************************//**
 The following function is used to store data in one byte. */
@@ -42,20 +42,8 @@ mach_write_to_1(
 
 	b[0] = (byte) n;
 }
-#endif /* !UNIV_INNOCHECKSUM */
 
-/********************************************************//**
-The following function is used to fetch data from one byte.
-@return	ulint integer, >= 0, < 256 */
-UNIV_INLINE
-ulint
-mach_read_from_1(
-/*=============*/
-	const byte*	b)	/*!< in: pointer to byte */
-{
-	ut_ad(b);
-	return((ulint)(b[0]));
-}
+#endif /* !UNIV_INNOCHECKSUM */
 
 /*******************************************************//**
 The following function is used to store data in two consecutive
@@ -74,10 +62,23 @@ mach_write_to_2(
 	b[1] = (byte)(n);
 }
 
+/********************************************************//**
+The following function is used to fetch data from one byte.
+@return ulint integer, >= 0, < 256 */
+UNIV_INLINE
+ulint
+mach_read_from_1(
+/*=============*/
+	const byte*	b)	/*!< in: pointer to byte */
+{
+	ut_ad(b);
+	return((ulint)(b[0]));
+}
+
 /********************************************************//**
 The following function is used to fetch data from 2 consecutive
 bytes. The most significant byte is at the lowest address.
-@return	ulint integer */
+@return ulint integer */
 UNIV_INLINE
 ulint
 mach_read_from_2(
@@ -93,7 +94,7 @@ mach_read_from_2(
 The following function is used to convert a 16-bit data item
 to the canonical format, for fast bytewise equality test
 against memory.
-@return	16-bit integer in canonical format */
+@return 16-bit integer in canonical format */
 UNIV_INLINE
 uint16
 mach_encode_2(
@@ -109,7 +110,7 @@ mach_encode_2(
 The following function is used to convert a 16-bit data item
 from the canonical format, for fast bytewise equality test
 against memory.
-@return	integer in machine-dependent format */
+@return integer in machine-dependent format */
 UNIV_INLINE
 ulint
 mach_decode_2(
@@ -141,7 +142,7 @@ mach_write_to_3(
 /********************************************************//**
 The following function is used to fetch data from 3 consecutive
 bytes. The most significant byte is at the lowest address.
-@return	ulint integer */
+@return ulint integer */
 UNIV_INLINE
 ulint
 mach_read_from_3(
@@ -155,6 +156,8 @@ mach_read_from_3(
 		);
 }
 
+#endif /* !UNIV_INNOCHECKSUM */
+
 /*******************************************************//**
 The following function is used to store data in four consecutive
 bytes. We store the most significant byte to the lowest address. */
@@ -173,12 +176,10 @@ mach_write_to_4(
 	b[3] = (byte) n;
 }
 
-#endif /* !UNIV_INNOCHECKSUM */
-
 /********************************************************//**
 The following function is used to fetch data from 4 consecutive
 bytes. The most significant byte is at the lowest address.
-@return	ulint integer */
+@return ulint integer */
 UNIV_INLINE
 ulint
 mach_read_from_4(
@@ -202,7 +203,7 @@ the byte. If the most significant bit is zero, it means 1-byte storage,
 else if the 2nd bit is 0, it means 2-byte storage, else if 3rd is 0,
 it means 3-byte storage, else if 4th is 0, it means 4-byte storage,
 else the storage is 5-byte.
-@return	compressed size in bytes */
+@return compressed size in bytes */
 UNIV_INLINE
 ulint
 mach_write_compressed(
@@ -212,20 +213,25 @@ mach_write_compressed(
 {
 	ut_ad(b);
 
-	if (n < 0x80UL) {
+	if (n < 0x80) {
+		/* 0nnnnnnn (7 bits) */
 		mach_write_to_1(b, n);
 		return(1);
-	} else if (n < 0x4000UL) {
-		mach_write_to_2(b, n | 0x8000UL);
+	} else if (n < 0x4000) {
+		/* 10nnnnnn nnnnnnnn (14 bits) */
+		mach_write_to_2(b, n | 0x8000);
 		return(2);
-	} else if (n < 0x200000UL) {
-		mach_write_to_3(b, n | 0xC00000UL);
+	} else if (n < 0x200000) {
+		/* 110nnnnn nnnnnnnn nnnnnnnn (21 bits) */
+		mach_write_to_3(b, n | 0xC00000);
 		return(3);
-	} else if (n < 0x10000000UL) {
-		mach_write_to_4(b, n | 0xE0000000UL);
+	} else if (n < 0x10000000) {
+		/* 1110nnnn nnnnnnnn nnnnnnnn nnnnnnnn (28 bits) */
+		mach_write_to_4(b, n | 0xE0000000);
 		return(4);
 	} else {
-		mach_write_to_1(b, 0xF0UL);
+		/* 11110000 nnnnnnnn nnnnnnnn nnnnnnnn nnnnnnnn (32 bits) */
+		mach_write_to_1(b, 0xF0);
 		mach_write_to_4(b + 1, n);
 		return(5);
 	}
@@ -233,53 +239,108 @@ mach_write_compressed(
 
 /*********************************************************//**
 Returns the size of a ulint when written in the compressed form.
-@return	compressed size in bytes */
+@return compressed size in bytes */
 UNIV_INLINE
 ulint
 mach_get_compressed_size(
 /*=====================*/
 	ulint	n)	/*!< in: ulint integer (< 2^32) to be stored */
 {
-	if (n < 0x80UL) {
+	if (n < 0x80) {
+		/* 0nnnnnnn (7 bits) */
 		return(1);
-	} else if (n < 0x4000UL) {
+	} else if (n < 0x4000) {
+		/* 10nnnnnn nnnnnnnn (14 bits) */
 		return(2);
-	} else if (n < 0x200000UL) {
+	} else if (n < 0x200000) {
+		/* 110nnnnn nnnnnnnn nnnnnnnn (21 bits) */
 		return(3);
-	} else if (n < 0x10000000UL) {
+	} else if (n < 0x10000000) {
+		/* 1110nnnn nnnnnnnn nnnnnnnn nnnnnnnn (28 bits) */
 		return(4);
 	} else {
+		/* 11110000 nnnnnnnn nnnnnnnn nnnnnnnn nnnnnnnn (32 bits) */
 		return(5);
 	}
 }
 
 /*********************************************************//**
 Reads a ulint in a compressed form.
-@return	read integer (< 2^32) */
+@return read integer (< 2^32) */
 UNIV_INLINE
 ulint
 mach_read_compressed(
 /*=================*/
 	const byte*	b)	/*!< in: pointer to memory from where to read */
 {
-	ulint	flag;
+	ulint	val;
 
 	ut_ad(b);
 
-	flag = mach_read_from_1(b);
+	val = mach_read_from_1(b);
 
-	if (flag < 0x80UL) {
-		return(flag);
-	} else if (flag < 0xC0UL) {
-		return(mach_read_from_2(b) & 0x7FFFUL);
-	} else if (flag < 0xE0UL) {
-		return(mach_read_from_3(b) & 0x3FFFFFUL);
-	} else if (flag < 0xF0UL) {
-		return(mach_read_from_4(b) & 0x1FFFFFFFUL);
+	if (val < 0x80) {
+		/* 0nnnnnnn (7 bits) */
+	} else if (val < 0xC0) {
+		/* 10nnnnnn nnnnnnnn (14 bits) */
+		val = mach_read_from_2(b) & 0x3FFF;
+		ut_ad(val > 0x7F);
+	} else if (val < 0xE0) {
+		/* 110nnnnn nnnnnnnn nnnnnnnn (21 bits) */
+		val = mach_read_from_3(b) & 0x1FFFFF;
+		ut_ad(val > 0x3FFF);
+	} else if (val < 0xF0) {
+		/* 1110nnnn nnnnnnnn nnnnnnnn nnnnnnnn (28 bits) */
+		val = mach_read_from_4(b) & 0xFFFFFFF;
+		ut_ad(val > 0x1FFFFF);
 	} else {
-		ut_ad(flag == 0xF0UL);
-		return(mach_read_from_4(b + 1));
+		/* 11110000 nnnnnnnn nnnnnnnn nnnnnnnn nnnnnnnn (32 bits) */
+		ut_ad(val == 0xF0);
+		val = mach_read_from_4(b + 1);
+		ut_ad(val > 0xFFFFFFF);
 	}
+
+	return(val);
+}
+
+/** Read a 32-bit integer in a compressed form.
+@param[in,out]	b	pointer to memory where to read;
+advanced by the number of bytes consumed
+@return unsigned value */
+UNIV_INLINE
+ib_uint32_t
+mach_read_next_compressed(
+	const byte**	b)
+{
+	ulint	val = mach_read_from_1(*b);
+
+	if (val < 0x80) {
+		/* 0nnnnnnn (7 bits) */
+		++*b;
+	} else if (val < 0xC0) {
+		/* 10nnnnnn nnnnnnnn (14 bits) */
+		val = mach_read_from_2(*b) & 0x3FFF;
+		ut_ad(val > 0x7F);
+		*b += 2;
+	} else if (val < 0xE0) {
+		/* 110nnnnn nnnnnnnn nnnnnnnn (21 bits) */
+		val = mach_read_from_3(*b) & 0x1FFFFF;
+		ut_ad(val > 0x3FFF);
+		*b += 3;
+	} else if (val < 0xF0) {
+		/* 1110nnnn nnnnnnnn nnnnnnnn nnnnnnnn (28 bits) */
+		val = mach_read_from_4(*b) & 0xFFFFFFF;
+		ut_ad(val > 0x1FFFFF);
+		*b += 4;
+	} else {
+		/* 11110000 nnnnnnnn nnnnnnnn nnnnnnnn nnnnnnnn (32 bits) */
+		ut_ad(val == 0xF0);
+		val = mach_read_from_4(*b + 1);
+		ut_ad(val > 0xFFFFFFF);
+		*b += 5;
+	}
+
+	return(static_cast<ib_uint32_t>(val));
 }
 
 /*******************************************************//**
@@ -303,19 +364,20 @@ mach_write_to_8(
 /********************************************************//**
 The following function is used to fetch data from 8 consecutive
 bytes. The most significant byte is at the lowest address.
-@return	64-bit integer */
+@return 64-bit integer */
 UNIV_INLINE
 ib_uint64_t
 mach_read_from_8(
 /*=============*/
 	const byte*	b)	/*!< in: pointer to 8 bytes */
 {
-	ib_uint64_t	ull;
+	ib_uint64_t	u64;
 
-	ull = ((ib_uint64_t) mach_read_from_4(b)) << 32;
-	ull |= (ib_uint64_t) mach_read_from_4(b + 4);
+	u64 = mach_read_from_4(b);
+	u64 <<= 32;
+	u64 |= mach_read_from_4(b + 4);
 
-	return(ull);
+	return(u64);
 }
 
 #ifndef UNIV_INNOCHECKSUM
@@ -339,7 +401,7 @@ mach_write_to_7(
 /********************************************************//**
 The following function is used to fetch data from 7 consecutive
 bytes. The most significant byte is at the lowest address.
-@return	56-bit integer */
+@return 56-bit integer */
 UNIV_INLINE
 ib_uint64_t
 mach_read_from_7(
@@ -370,7 +432,7 @@ mach_write_to_6(
 /********************************************************//**
 The following function is used to fetch data from 6 consecutive
 bytes. The most significant byte is at the lowest address.
-@return	48-bit integer */
+@return 48-bit integer */
 UNIV_INLINE
 ib_uint64_t
 mach_read_from_6(
@@ -384,10 +446,10 @@ mach_read_from_6(
 
 /*********************************************************//**
 Writes a 64-bit integer in a compressed form (5..9 bytes).
-@return	size in bytes */
+@return size in bytes */
 UNIV_INLINE
 ulint
-mach_ull_write_compressed(
+mach_u64_write_compressed(
 /*======================*/
 	byte*		b,	/*!< in: pointer to memory where to store */
 	ib_uint64_t	n)	/*!< in: 64-bit integer to be stored */
@@ -402,48 +464,30 @@ mach_ull_write_compressed(
 	return(size + 4);
 }
 
-/*********************************************************//**
-Returns the size of a 64-bit integer when written in the compressed form.
-@return	compressed size in bytes */
-UNIV_INLINE
-ulint
-mach_ull_get_compressed_size(
-/*=========================*/
-	ib_uint64_t	n)	/*!< in: 64-bit integer to be stored */
-{
-	return(4 + mach_get_compressed_size((ulint) (n >> 32)));
-}
-
-/*********************************************************//**
-Reads a 64-bit integer in a compressed form.
-@return	the value read */
+/** Read a 64-bit integer in a compressed form.
+@param[in,out]	b	pointer to memory where to read;
+advanced by the number of bytes consumed
+@return unsigned value */
 UNIV_INLINE
 ib_uint64_t
-mach_ull_read_compressed(
-/*=====================*/
-	const byte*	b)	/*!< in: pointer to memory from where to read */
+mach_u64_read_next_compressed(
+	const byte**	b)
 {
-	ib_uint64_t	n;
-	ulint		size;
+	ib_uint64_t	val;
 
-	ut_ad(b);
-
-	n = (ib_uint64_t) mach_read_compressed(b);
-
-	size = mach_get_compressed_size((ulint) n);
-
-	n <<= 32;
-	n |= (ib_uint64_t) mach_read_from_4(b + size);
-
-	return(n);
+	val = mach_read_next_compressed(b);
+	val <<= 32;
+	val |= mach_read_from_4(*b);
+	*b += 4;
+	return(val);
 }
 
 /*********************************************************//**
 Writes a 64-bit integer in a compressed form (1..11 bytes).
-@return	size in bytes */
+@return size in bytes */
 UNIV_INLINE
 ulint
-mach_ull_write_much_compressed(
+mach_u64_write_much_compressed(
 /*===========================*/
 	byte*		b,	/*!< in: pointer to memory where to store */
 	ib_uint64_t	n)	/*!< in: 64-bit integer to be stored */
@@ -464,95 +508,111 @@ mach_ull_write_much_compressed(
 	return(size);
 }
 
-/*********************************************************//**
-Returns the size of a 64-bit integer when written in the compressed form.
-@return	compressed size in bytes */
-UNIV_INLINE
-ulint
-mach_ull_get_much_compressed_size(
-/*==============================*/
-	ib_uint64_t	n)	/*!< in: 64-bit integer to be stored */
-{
-	if (!(n >> 32)) {
-		return(mach_get_compressed_size((ulint) n));
-	}
-
-	return(1 + mach_get_compressed_size((ulint) (n >> 32))
-	       + mach_get_compressed_size((ulint) n & ULINT32_MASK));
-}
-
 /*********************************************************//**
 Reads a 64-bit integer in a compressed form.
-@return	the value read */
+@return the value read */
 UNIV_INLINE
 ib_uint64_t
-mach_ull_read_much_compressed(
+mach_u64_read_much_compressed(
 /*==========================*/
 	const byte*	b)	/*!< in: pointer to memory from where to read */
 {
 	ib_uint64_t	n;
-	ulint		size;
 
-	ut_ad(b);
-
-	if (*b != (byte)0xFF) {
-		n = 0;
-		size = 0;
-	} else {
-		n = (ib_uint64_t) mach_read_compressed(b + 1);
-
-		size = 1 + mach_get_compressed_size((ulint) n);
-		n <<= 32;
+	if (*b != 0xFF) {
+		return(mach_read_compressed(b));
 	}
 
-	n |= mach_read_compressed(b + size);
+	b++;
+	n = mach_read_next_compressed(&b);
+	n <<= 32;
+	n |= mach_read_compressed(b);
 
 	return(n);
 }
 
-/*********************************************************//**
-Reads a 64-bit integer in a compressed form
-if the log record fully contains it.
-@return pointer to end of the stored field, NULL if not complete */
+/** Read a 64-bit integer in a compressed form.
+@param[in,out]	b	pointer to memory where to read;
+advanced by the number of bytes consumed
+@return unsigned value */
 UNIV_INLINE
-byte*
-mach_ull_parse_compressed(
-/*======================*/
-	byte*		ptr,	/* in: pointer to buffer from where to read */
-	byte*		end_ptr,/* in: pointer to end of the buffer */
-	ib_uint64_t*	val)	/* out: read value */
+ib_uint64_t
+mach_read_next_much_compressed(
+	const byte**	b)
 {
-	ulint		size;
+	ib_uint64_t	val = mach_read_from_1(*b);
 
-	ut_ad(ptr);
-	ut_ad(end_ptr);
-	ut_ad(val);
-
-	if (end_ptr < ptr + 5) {
-
-		return(NULL);
+	if (val < 0x80) {
+		/* 0nnnnnnn (7 bits) */
+		++*b;
+	} else if (val < 0xC0) {
+		/* 10nnnnnn nnnnnnnn (14 bits) */
+		val = mach_read_from_2(*b) & 0x3FFF;
+		ut_ad(val > 0x7F);
+		*b += 2;
+	} else if (val < 0xE0) {
+		/* 110nnnnn nnnnnnnn nnnnnnnn (21 bits) */
+		val = mach_read_from_3(*b) & 0x1FFFFF;
+		ut_ad(val > 0x3FFF);
+		*b += 3;
+	} else if (val < 0xF0) {
+		/* 1110nnnn nnnnnnnn nnnnnnnn nnnnnnnn (28 bits) */
+		val = mach_read_from_4(*b) & 0xFFFFFFF;
+		ut_ad(val > 0x1FFFFF);
+		*b += 4;
+	} else if (val == 0xF0) {
+		/* 11110000 nnnnnnnn nnnnnnnn nnnnnnnn nnnnnnnn (32 bits) */
+		val = mach_read_from_4(*b + 1);
+		ut_ad(val > 0xFFFFFFF);
+		*b += 5;
+	} else {
+		/* 11111111 followed by up to 64 bits */
+		ut_ad(val == 0xFF);
+		++*b;
+		val = mach_read_next_compressed(b);
+		ut_ad(val > 0);
+		val <<= 32;
+		val |= mach_read_next_compressed(b);
 	}
 
-	*val = mach_read_compressed(ptr);
+	return(val);
+}
 
-	size = mach_get_compressed_size((ulint) *val);
+/** Read a 64-bit integer in a compressed form.
+@param[in,out]	ptr	pointer to memory where to read;
+advanced by the number of bytes consumed, or set NULL if out of space
+@param[in]	end_ptr	end of the buffer
+@return unsigned value */
+UNIV_INLINE
+ib_uint64_t
+mach_u64_parse_compressed(
+	const byte**	ptr,
+	const byte*	end_ptr)
+{
+	ib_uint64_t	val = 0;
 
-	ptr += size;
-
-	if (end_ptr < ptr + 4) {
-
-		return(NULL);
+	if (end_ptr < *ptr + 5) {
+		*ptr = NULL;
+		return(val);
 	}
 
-	*val <<= 32;
-	*val |= mach_read_from_4(ptr);
+	val = mach_read_next_compressed(ptr);
 
-	return(ptr + 4);
+	if (end_ptr < *ptr + 4) {
+		*ptr = NULL;
+		return(val);
+	}
+
+	val <<= 32;
+	val |= mach_read_from_4(*ptr);
+	*ptr += 4;
+
+	return(val);
 }
 #ifndef UNIV_HOTBACKUP
 /*********************************************************//**
 Reads a double. It is stored in a little-endian format.
-@return	double read */
+@return double read */
 UNIV_INLINE
 double
 mach_double_read(
@@ -601,7 +661,7 @@ mach_double_write(
 
 /*********************************************************//**
 Reads a float. It is stored in a little-endian format.
-@return	float read */
+@return float read */
 UNIV_INLINE
 float
 mach_float_read(
@@ -650,7 +710,7 @@ mach_float_write(
 
 /*********************************************************//**
 Reads a ulint stored in the little-endian format.
-@return	unsigned long int */
+@return unsigned long int */
 UNIV_INLINE
 ulint
 mach_read_from_n_little_endian(
@@ -714,7 +774,7 @@ mach_write_to_n_little_endian(
 
 /*********************************************************//**
 Reads a ulint stored in the little-endian format.
-@return	unsigned long int */
+@return unsigned long int */
 UNIV_INLINE
 ulint
 mach_read_from_2_little_endian(
@@ -746,7 +806,7 @@ mach_write_to_2_little_endian(
 /*********************************************************//**
 Convert integral type from storage byte order (big endian) to
 host byte order.
-@return	integer value */
+@return integer value */
 UNIV_INLINE
 ib_uint64_t
 mach_read_int_type(
@@ -757,8 +817,8 @@ mach_read_int_type(
 {
 	/* XXX this can be optimized on big-endian machines */
 
-	ullint	ret;
-	uint	i;
+	uintmax_t	ret;
+	uint		i;
 
 	if (unsigned_type || (src[0] & 0x80)) {
 
@@ -822,6 +882,8 @@ mach_write_int_type(
 	ulint		len,		/*!< in: length of src */
 	bool		usign)		/*!< in: signed or unsigned flag */
 {
+	ut_ad(len >= 1 && len <= 8);
+
 #ifdef WORDS_BIGENDIAN
         memcpy(dest, src, len);
 #else
@@ -859,32 +921,31 @@ mach_write_ulonglong(
 		*dest ^=  0x80;
 	}
 }
-#endif /* !UNIV_HOTBACKUP */
-#endif /* !UNIV_INNOCHECKSUM */
 
-#ifndef UNIV_HOTBACKUP
-/********************************************************//**
-Reads 1 - 4 bytes from a file page buffered in the buffer pool.
-@return	value read */
+/** Read 1 to 4 bytes from a file page buffered in the buffer pool.
+@param[in]	ptr	pointer where to read
+@param[in]	type	MLOG_1BYTE, MLOG_2BYTES, or MLOG_4BYTES
+@return value read */
 UNIV_INLINE
 ulint
 mach_read_ulint(
-/*============*/
-	const byte*	ptr,	/*!< in: pointer from where to read */
-	ulint		type)	/*!< in: 1,2 or 4 bytes */
+	const byte*	ptr,
+	mlog_id_t	type)
 {
 	switch (type) {
-	case 1:
+	case MLOG_1BYTE:
 		return(mach_read_from_1(ptr));
-	case 2:
+	case MLOG_2BYTES:
 		return(mach_read_from_2(ptr));
-	case 4:
+	case MLOG_4BYTES:
 		return(mach_read_from_4(ptr));
 	default:
-		ut_error;
+		break;
 	}
 
+	ut_error;
 	return(0);
 }
 
 #endif /* !UNIV_HOTBACKUP */
+#endif /* !UNIV_INNOCHECKSUM */
diff --git a/storage/innobase/include/mem0dbg.h b/storage/innobase/include/mem0dbg.h
deleted file mode 100644
index cc339b82910..00000000000
--- a/storage/innobase/include/mem0dbg.h
+++ /dev/null
@@ -1,150 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2010, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/mem0dbg.h
-The memory management: the debug code. This is not a compilation module,
-but is included in mem0mem.* !
-
-Created 6/9/1994 Heikki Tuuri
-*******************************************************/
-
-/* In the debug version each allocated field is surrounded with
-check fields whose sizes are given below */
-
-#ifdef UNIV_MEM_DEBUG
-# ifndef UNIV_HOTBACKUP
-/* The mutex which protects in the debug version the hash table
-containing the list of live memory heaps, and also the global
-variables in mem0dbg.cc. */
-extern ib_mutex_t	mem_hash_mutex;
-# endif /* !UNIV_HOTBACKUP */
-
-#define MEM_FIELD_HEADER_SIZE	ut_calc_align(2 * sizeof(ulint),\
-						UNIV_MEM_ALIGNMENT)
-#define MEM_FIELD_TRAILER_SIZE	sizeof(ulint)
-#else
-#define MEM_FIELD_HEADER_SIZE	0
-#endif
-
-
-/* Space needed when allocating for a user a field of
-length N. The space is allocated only in multiples of
-UNIV_MEM_ALIGNMENT. In the debug version there are also
-check fields at the both ends of the field. */
-#ifdef UNIV_MEM_DEBUG
-#define MEM_SPACE_NEEDED(N) ut_calc_align((N) + MEM_FIELD_HEADER_SIZE\
-		 + MEM_FIELD_TRAILER_SIZE, UNIV_MEM_ALIGNMENT)
-#else
-#define MEM_SPACE_NEEDED(N) ut_calc_align((N), UNIV_MEM_ALIGNMENT)
-#endif
-
-#if defined UNIV_MEM_DEBUG || defined UNIV_DEBUG
-/***************************************************************//**
-Checks a memory heap for consistency and prints the contents if requested.
-Outputs the sum of sizes of buffers given to the user (only in
-the debug version), the physical size of the heap and the number of
-blocks in the heap. In case of error returns 0 as sizes and number
-of blocks. */
-UNIV_INTERN
-void
-mem_heap_validate_or_print(
-/*=======================*/
-	mem_heap_t*	heap,	/*!< in: memory heap */
-	byte*		top,	/*!< in: calculate and validate only until
-				this top pointer in the heap is reached,
-				if this pointer is NULL, ignored */
-	ibool		 print,	 /*!< in: if TRUE, prints the contents
-				of the heap; works only in
-				the debug version */
-	ibool*		 error,	 /*!< out: TRUE if error */
-	ulint*		us_size,/*!< out: allocated memory
-				(for the user) in the heap,
-				if a NULL pointer is passed as this
-				argument, it is ignored; in the
-				non-debug version this is always -1 */
-	ulint*		ph_size,/*!< out: physical size of the heap,
-				if a NULL pointer is passed as this
-				argument, it is ignored */
-	ulint*		n_blocks); /*!< out: number of blocks in the heap,
-				if a NULL pointer is passed as this
-				argument, it is ignored */
-/**************************************************************//**
-Validates the contents of a memory heap.
-@return	TRUE if ok */
-UNIV_INTERN
-ibool
-mem_heap_validate(
-/*==============*/
-	mem_heap_t*   heap);	/*!< in: memory heap */
-#endif /* UNIV_MEM_DEBUG || UNIV_DEBUG */
-#ifdef UNIV_DEBUG
-/**************************************************************//**
-Checks that an object is a memory heap (or a block of it)
-@return	TRUE if ok */
-UNIV_INTERN
-ibool
-mem_heap_check(
-/*===========*/
-	mem_heap_t*   heap);	/*!< in: memory heap */
-#endif /* UNIV_DEBUG */
-#ifdef UNIV_MEM_DEBUG
-/*****************************************************************//**
-TRUE if no memory is currently allocated.
-@return	TRUE if no heaps exist */
-UNIV_INTERN
-ibool
-mem_all_freed(void);
-/*===============*/
-/*****************************************************************//**
-Validates the dynamic memory
-@return	TRUE if error */
-UNIV_INTERN
-ibool
-mem_validate_no_assert(void);
-/*=========================*/
-/************************************************************//**
-Validates the dynamic memory
-@return	TRUE if ok */
-UNIV_INTERN
-ibool
-mem_validate(void);
-/*===============*/
-#endif /* UNIV_MEM_DEBUG */
-/************************************************************//**
-Tries to find neigboring memory allocation blocks and dumps to stderr
-the neighborhood of a given pointer. */
-UNIV_INTERN
-void
-mem_analyze_corruption(
-/*===================*/
-	void*	ptr);	/*!< in: pointer to place of possible corruption */
-/*****************************************************************//**
-Prints information of dynamic memory usage and currently allocated memory
-heaps or buffers. Can only be used in the debug version. */
-UNIV_INTERN
-void
-mem_print_info(void);
-/*================*/
-/*****************************************************************//**
-Prints information of dynamic memory usage and currently allocated memory
-heaps or buffers since the last ..._print_info or..._print_new_info. */
-UNIV_INTERN
-void
-mem_print_new_info(void);
-/*====================*/
diff --git a/storage/innobase/include/mem0dbg.ic b/storage/innobase/include/mem0dbg.ic
deleted file mode 100644
index ec60ed35337..00000000000
--- a/storage/innobase/include/mem0dbg.ic
+++ /dev/null
@@ -1,109 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2010, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file include/mem0dbg.ic
-The memory management: the debug code. This is not an independent
-compilation module but is included in mem0mem.*.
-
-Created 6/8/1994 Heikki Tuuri
-*************************************************************************/
-
-#ifdef UNIV_MEM_DEBUG
-extern ulint	mem_current_allocated_memory;
-
-/******************************************************************//**
-Initializes an allocated memory field in the debug version. */
-UNIV_INTERN
-void
-mem_field_init(
-/*===========*/
-	byte*	buf,	/*!< in: memory field */
-	ulint	n);	/*!< in: how many bytes the user requested */
-/******************************************************************//**
-Erases an allocated memory field in the debug version. */
-UNIV_INTERN
-void
-mem_field_erase(
-/*============*/
-	byte*	buf,	/*!< in: memory field */
-	ulint	n);	/*!< in: how many bytes the user requested */
-/***************************************************************//**
-Initializes a buffer to a random combination of hex BA and BE.
-Used to initialize allocated memory. */
-UNIV_INTERN
-void
-mem_init_buf(
-/*=========*/
-	byte*	buf,	/*!< in: pointer to buffer */
-	ulint	 n);	 /*!< in: length of buffer */
-/***************************************************************//**
-Initializes a buffer to a random combination of hex DE and AD.
-Used to erase freed memory. */
-UNIV_INTERN
-void
-mem_erase_buf(
-/*==========*/
-	byte*	buf,	/*!< in: pointer to buffer */
-	ulint	n);	/*!< in: length of buffer */
-/***************************************************************//**
-Inserts a created memory heap to the hash table of
-current allocated memory heaps.
-Initializes the hash table when first called. */
-UNIV_INTERN
-void
-mem_hash_insert(
-/*============*/
-	mem_heap_t*	heap,	   /*!< in: the created heap */
-	const char*	file_name, /*!< in: file name of creation */
-	ulint		line);	   /*!< in: line where created */
-/***************************************************************//**
-Removes a memory heap (which is going to be freed by the caller)
-from the list of live memory heaps. Returns the size of the heap
-in terms of how much memory in bytes was allocated for the user of
-the heap (not the total space occupied by the heap).
-Also validates the heap.
-NOTE: This function does not free the storage occupied by the
-heap itself, only the node in the list of heaps. */
-UNIV_INTERN
-void
-mem_hash_remove(
-/*============*/
-	mem_heap_t*	heap,	   /*!< in: the heap to be freed */
-	const char*	file_name, /*!< in: file name of freeing */
-	ulint		line);	   /*!< in: line where freed */
-
-
-void
-mem_field_header_set_len(byte* field, ulint len);
-
-ulint
-mem_field_header_get_len(byte* field);
-
-void
-mem_field_header_set_check(byte* field, ulint check);
-
-ulint
-mem_field_header_get_check(byte* field);
-
-void
-mem_field_trailer_set_check(byte* field, ulint check);
-
-ulint
-mem_field_trailer_get_check(byte* field);
-#endif /* UNIV_MEM_DEBUG */
diff --git a/storage/innobase/include/mem0mem.h b/storage/innobase/include/mem0mem.h
index de9b8b29fd9..f8fdb53e132 100644
--- a/storage/innobase/include/mem0mem.h
+++ b/storage/innobase/include/mem0mem.h
@@ -30,22 +30,20 @@ Created 6/9/1994 Heikki Tuuri
 #include "ut0mem.h"
 #include "ut0byte.h"
 #include "ut0rnd.h"
-#ifndef UNIV_HOTBACKUP
-# include "sync0sync.h"
-#endif /* UNIV_HOTBACKUP */
-#include "ut0lst.h"
 #include "mach0data.h"
 
+#include <memory>
+
 /* -------------------- MEMORY HEAPS ----------------------------- */
 
-/* A block of a memory heap consists of the info structure
+/** A block of a memory heap consists of the info structure
 followed by an area of memory */
 typedef struct mem_block_info_t	mem_block_t;
 
-/* A memory heap is a nonempty linear list of memory blocks */
+/** A memory heap is a nonempty linear list of memory blocks */
 typedef mem_block_t		mem_heap_t;
 
-/* Types of allocation for memory heaps: DYNAMIC means allocation from the
+/** Types of allocation for memory heaps: DYNAMIC means allocation from the
 dynamic memory pool of the C compiler, BUFFER means allocation from the
 buffer pool; the latter method is used for very big heaps */
 
@@ -59,13 +57,13 @@ buffer pool; the latter method is used for very big heaps */
 					allocation functions can return
 					NULL. */
 
-/* Different type of heaps in terms of which datastructure is using them */
+/** Different type of heaps in terms of which datastructure is using them */
 #define MEM_HEAP_FOR_BTR_SEARCH		(MEM_HEAP_BTR_SEARCH | MEM_HEAP_BUFFER)
 #define MEM_HEAP_FOR_PAGE_HASH		(MEM_HEAP_DYNAMIC)
 #define MEM_HEAP_FOR_RECV_SYS		(MEM_HEAP_BUFFER)
 #define MEM_HEAP_FOR_LOCK_HEAP		(MEM_HEAP_BUFFER)
 
-/* The following start size is used for the first block in the memory heap if
+/** The following start size is used for the first block in the memory heap if
 the size is not specified, i.e., 0 is given as the parameter in the call of
 create. The standard size is the maximum (payload) size of the blocks used for
 allocations of small buffers. */
@@ -74,147 +72,192 @@ allocations of small buffers. */
 #define MEM_BLOCK_STANDARD_SIZE		\
 	(UNIV_PAGE_SIZE >= 16384 ? 8000 : MEM_MAX_ALLOC_IN_BUF)
 
-/* If a memory heap is allowed to grow into the buffer pool, the following
+/** If a memory heap is allowed to grow into the buffer pool, the following
 is the maximum size for a single allocated buffer: */
 #define MEM_MAX_ALLOC_IN_BUF		(UNIV_PAGE_SIZE - 200)
 
-/******************************************************************//**
-Initializes the memory system. */
-UNIV_INTERN
-void
-mem_init(
-/*=====*/
-	ulint	size);	/*!< in: common pool size in bytes */
-/******************************************************************//**
-Closes the memory system. */
-UNIV_INTERN
-void
-mem_close(void);
-/*===========*/
+/** Space needed when allocating for a user a field of length N.
+The space is allocated only in multiples of UNIV_MEM_ALIGNMENT.  */
+#define MEM_SPACE_NEEDED(N) ut_calc_align((N), UNIV_MEM_ALIGNMENT)
 
 #ifdef UNIV_DEBUG
-/**************************************************************//**
-Use this macro instead of the corresponding function! Macro for memory
-heap creation. */
+/** Macro for memory heap creation.
+@param[in]	size		Desired start block size. */
+# define mem_heap_create(size)					\
+	 mem_heap_create_func((size), __FILE__, __LINE__, MEM_HEAP_DYNAMIC)
 
-# define mem_heap_create(N)	mem_heap_create_func(		\
-		(N), __FILE__, __LINE__, MEM_HEAP_DYNAMIC)
-/**************************************************************//**
-Use this macro instead of the corresponding function! Macro for memory
-heap creation. */
-
-# define mem_heap_create_typed(N, T)	mem_heap_create_func(	\
-		(N), __FILE__, __LINE__, (T))
+/** Macro for memory heap creation.
+@param[in]	size		Desired start block size.
+@param[in]	type		Heap type */
+# define mem_heap_create_typed(size, type)			\
+	 mem_heap_create_func((size), __FILE__, __LINE__, (type))
 
 #else /* UNIV_DEBUG */
-/**************************************************************//**
-Use this macro instead of the corresponding function! Macro for memory
-heap creation. */
+/** Macro for memory heap creation.
+@param[in]	size		Desired start block size. */
+# define mem_heap_create(size) mem_heap_create_func((size), MEM_HEAP_DYNAMIC)
 
-# define mem_heap_create(N)	mem_heap_create_func(		\
-		(N), MEM_HEAP_DYNAMIC)
-/**************************************************************//**
-Use this macro instead of the corresponding function! Macro for memory
-heap creation. */
-
-# define mem_heap_create_typed(N, T)	mem_heap_create_func(	\
-		(N), (T))
+/** Macro for memory heap creation.
+@param[in]	size		Desired start block size.
+@param[in]	type		Heap type */
+# define mem_heap_create_typed(size, type)			\
+	 mem_heap_create_func((size), (type))
 
 #endif /* UNIV_DEBUG */
-/**************************************************************//**
-Use this macro instead of the corresponding function! Macro for memory
-heap freeing. */
 
-#define mem_heap_free(heap) mem_heap_free_func(\
-					  (heap), __FILE__, __LINE__)
-/*****************************************************************//**
-NOTE: Use the corresponding macros instead of this function. Creates a
-memory heap. For debugging purposes, takes also the file name and line as
-arguments.
+/** Creates a memory heap.
+NOTE: Use the corresponding macros instead of this function.
+A single user buffer of 'size' will fit in the block.
+0 creates a default size block.
+@param[in]	size		Desired start block size.
+@param[in]	file_name	File name where created
+@param[in]	line		Line where created
+@param[in]	type		Heap type
 @return own: memory heap, NULL if did not succeed (only possible for
 MEM_HEAP_BTR_SEARCH type heaps) */
 UNIV_INLINE
 mem_heap_t*
 mem_heap_create_func(
-/*=================*/
-	ulint		n,		/*!< in: desired start block size,
-					this means that a single user buffer
-					of size n will fit in the block,
-					0 creates a default size block */
+	ulint		size,
 #ifdef UNIV_DEBUG
-	const char*	file_name,	/*!< in: file name where created */
-	ulint		line,		/*!< in: line where created */
+	const char*	file_name,
+	ulint		line,
 #endif /* UNIV_DEBUG */
-	ulint		type);		/*!< in: heap type */
-/*****************************************************************//**
-NOTE: Use the corresponding macro instead of this function. Frees the space
-occupied by a memory heap. In the debug version erases the heap memory
-blocks. */
+	ulint		type);
+
+/** Frees the space occupied by a memory heap.
+NOTE: Use the corresponding macro instead of this function.
+@param[in]	heap	Heap to be freed */
 UNIV_INLINE
 void
-mem_heap_free_func(
-/*===============*/
-	mem_heap_t*	heap,		/*!< in, own: heap to be freed */
-	const char*	file_name,	/*!< in: file name where freed */
-	ulint		line);		/*!< in: line where freed */
-/***************************************************************//**
-Allocates and zero-fills n bytes of memory from a memory heap.
-@return	allocated, zero-filled storage */
+mem_heap_free(
+	mem_heap_t*	heap);
+
+/** Allocates and zero-fills n bytes of memory from a memory heap.
+@param[in]	heap	memory heap
+@param[in]	n	number of bytes; if the heap is allowed to grow into
+the buffer pool, this must be <= MEM_MAX_ALLOC_IN_BUF
+@return allocated, zero-filled storage */
 UNIV_INLINE
 void*
 mem_heap_zalloc(
-/*============*/
-	mem_heap_t*	heap,	/*!< in: memory heap */
-	ulint		n);	/*!< in: number of bytes; if the heap is allowed
-				to grow into the buffer pool, this must be
-				<= MEM_MAX_ALLOC_IN_BUF */
-/***************************************************************//**
-Allocates n bytes of memory from a memory heap.
+	mem_heap_t*	heap,
+	ulint		n);
+
+/** Allocates n bytes of memory from a memory heap.
+@param[in]	heap	memory heap
+@param[in]	n	number of bytes; if the heap is allowed to grow into
+the buffer pool, this must be <= MEM_MAX_ALLOC_IN_BUF
 @return allocated storage, NULL if did not succeed (only possible for
 MEM_HEAP_BTR_SEARCH type heaps) */
 UNIV_INLINE
 void*
 mem_heap_alloc(
-/*===========*/
-	mem_heap_t*	heap,	/*!< in: memory heap */
-	ulint		n);	/*!< in: number of bytes; if the heap is allowed
-				to grow into the buffer pool, this must be
-				<= MEM_MAX_ALLOC_IN_BUF */
-/*****************************************************************//**
-Returns a pointer to the heap top.
-@return	pointer to the heap top */
+	mem_heap_t*	heap,
+	ulint		n);
+
+/** Returns a pointer to the heap top.
+@param[in]	heap		memory heap
+@return pointer to the heap top */
 UNIV_INLINE
 byte*
 mem_heap_get_heap_top(
-/*==================*/
-	mem_heap_t*	heap);	/*!< in: memory heap */
-/*****************************************************************//**
-Frees the space in a memory heap exceeding the pointer given. The
-pointer must have been acquired from mem_heap_get_heap_top. The first
-memory block of the heap is not freed. */
+	mem_heap_t*	heap);
+
+/** Frees the space in a memory heap exceeding the pointer given.
+The pointer must have been acquired from mem_heap_get_heap_top.
+The first memory block of the heap is not freed.
+@param[in]	heap		heap from which to free
+@param[in]	old_top		pointer to old top of heap */
 UNIV_INLINE
 void
 mem_heap_free_heap_top(
-/*===================*/
-	mem_heap_t*	heap,	/*!< in: heap from which to free */
-	byte*		old_top);/*!< in: pointer to old top of heap */
-/*****************************************************************//**
-Empties a memory heap. The first memory block of the heap is not freed. */
+	mem_heap_t*	heap,
+	byte*		old_top);
+
+/** Empties a memory heap.
+The first memory block of the heap is not freed.
+@param[in]	heap		heap to empty */
 UNIV_INLINE
 void
 mem_heap_empty(
-/*===========*/
-	mem_heap_t*	heap);	/*!< in: heap to empty */
-/*****************************************************************//**
-Returns a pointer to the topmost element in a memory heap.
+	mem_heap_t*	heap);
+
+/** Returns a pointer to the topmost element in a memory heap.
 The size of the element must be given.
-@return	pointer to the topmost element */
+@param[in]	heap	memory heap
+@param[in]	n	size of the topmost element
+@return pointer to the topmost element */
 UNIV_INLINE
 void*
 mem_heap_get_top(
+	mem_heap_t*	heap,
+	ulint		n);
+
+/** Checks if a given chunk of memory is the topmost element stored in the
+heap. If this is the case, then calling mem_heap_free_top() would free
+that element from the heap.
+@param[in]	heap	memory heap
+@param[in]	buf	presumed topmost element
+@param[in]	buf_sz	size of buf in bytes
+@return true if topmost */
+UNIV_INLINE
+bool
+mem_heap_is_top(
+	mem_heap_t*	heap,
+	const void*	buf,
+	ulint		buf_sz)
+	MY_ATTRIBUTE((warn_unused_result));
+
+/*****************************************************************//**
+Allocate a new chunk of memory from a memory heap, possibly discarding
+the topmost element. If the memory chunk specified with (top, top_sz)
+is the topmost element, then it will be discarded, otherwise it will
+be left untouched and this function will be equivallent to
+mem_heap_alloc().
+@return allocated storage, NULL if did not succeed (only possible for
+MEM_HEAP_BTR_SEARCH type heaps) */
+UNIV_INLINE
+void*
+mem_heap_replace(
 /*=============*/
-	mem_heap_t*	heap,	/*!< in: memory heap */
-	ulint		n);	/*!< in: size of the topmost element */
+	mem_heap_t*	heap,	/*!< in/out: memory heap */
+	const void*	top,	/*!< in: chunk to discard if possible */
+	ulint		top_sz,	/*!< in: size of top in bytes */
+	ulint		new_sz);/*!< in: desired size of the new chunk */
+/*****************************************************************//**
+Allocate a new chunk of memory from a memory heap, possibly discarding
+the topmost element and then copy the specified data to it. If the memory
+chunk specified with (top, top_sz) is the topmost element, then it will be
+discarded, otherwise it will be left untouched and this function will be
+equivallent to mem_heap_dup().
+@return allocated storage, NULL if did not succeed (only possible for
+MEM_HEAP_BTR_SEARCH type heaps) */
+UNIV_INLINE
+void*
+mem_heap_dup_replace(
+/*=================*/
+	mem_heap_t*	heap,	/*!< in/out: memory heap */
+	const void*	top,	/*!< in: chunk to discard if possible */
+	ulint		top_sz,	/*!< in: size of top in bytes */
+	const void*	data,	/*!< in: new data to duplicate */
+	ulint		data_sz);/*!< in: size of data in bytes */
+/*****************************************************************//**
+Allocate a new chunk of memory from a memory heap, possibly discarding
+the topmost element and then copy the specified string to it. If the memory
+chunk specified with (top, top_sz) is the topmost element, then it will be
+discarded, otherwise it will be left untouched and this function will be
+equivallent to mem_heap_strdup().
+@return allocated string, NULL if did not succeed (only possible for
+MEM_HEAP_BTR_SEARCH type heaps) */
+UNIV_INLINE
+char*
+mem_heap_strdup_replace(
+/*====================*/
+	mem_heap_t*	heap,	/*!< in/out: memory heap */
+	const void*	top,	/*!< in: chunk to discard if possible */
+	ulint		top_sz,	/*!< in: size of top in bytes */
+	const char*	str);	/*!< in: new data to duplicate */
 /*****************************************************************//**
 Frees the topmost element in a memory heap.
 The size of the element must be given. */
@@ -231,58 +274,10 @@ ulint
 mem_heap_get_size(
 /*==============*/
 	mem_heap_t*	heap);		/*!< in: heap */
-/**************************************************************//**
-Use this macro instead of the corresponding function!
-Macro for memory buffer allocation */
-
-#define mem_zalloc(N)	memset(mem_alloc(N), 0, (N))
-
-#ifdef UNIV_DEBUG
-#define mem_alloc(N)	mem_alloc_func((N), __FILE__, __LINE__, NULL)
-#define mem_alloc2(N,S) mem_alloc_func((N), __FILE__, __LINE__, (S))
-#else /* UNIV_DEBUG */
-#define mem_alloc(N)	mem_alloc_func((N), NULL)
-#define mem_alloc2(N,S) mem_alloc_func((N), (S))
-#endif /* UNIV_DEBUG */
-
-/***************************************************************//**
-NOTE: Use the corresponding macro instead of this function.
-Allocates a single buffer of memory from the dynamic memory of
-the C compiler. Is like malloc of C. The buffer must be freed
-with mem_free.
-@return	own: free storage */
-UNIV_INLINE
-void*
-mem_alloc_func(
-/*===========*/
-	ulint		n,		/*!< in: requested size in bytes */
-#ifdef UNIV_DEBUG
-	const char*	file_name,	/*!< in: file name where created */
-	ulint		line,		/*!< in: line where created */
-#endif /* UNIV_DEBUG */
-	ulint*		size);		/*!< out: allocated size in bytes,
-					or NULL */
-
-/**************************************************************//**
-Use this macro instead of the corresponding function!
-Macro for memory buffer freeing */
-
-#define mem_free(PTR)	mem_free_func((PTR), __FILE__, __LINE__)
-/***************************************************************//**
-NOTE: Use the corresponding macro instead of this function.
-Frees a single buffer of storage from
-the dynamic memory of C compiler. Similar to free of C. */
-UNIV_INLINE
-void
-mem_free_func(
-/*==========*/
-	void*		ptr,		/*!< in, own: buffer to be freed */
-	const char*	file_name,	/*!< in: file name where created */
-	ulint		line);		/*!< in: line where created */
 
 /**********************************************************************//**
 Duplicates a NUL-terminated string.
-@return	own: a copy of the string, must be deallocated with mem_free */
+@return own: a copy of the string, must be deallocated with ut_free */
 UNIV_INLINE
 char*
 mem_strdup(
@@ -290,7 +285,7 @@ mem_strdup(
 	const char*	str);	/*!< in: string to be copied */
 /**********************************************************************//**
 Makes a NUL-terminated copy of a nonterminated string.
-@return	own: a copy of the string, must be deallocated with mem_free */
+@return own: a copy of the string, must be deallocated with ut_free */
 UNIV_INLINE
 char*
 mem_strdupl(
@@ -298,19 +293,19 @@ mem_strdupl(
 	const char*	str,	/*!< in: string to be copied */
 	ulint		len);	/*!< in: length of str, in bytes */
 
-/**********************************************************************//**
-Duplicates a NUL-terminated string, allocated from a memory heap.
-@return	own: a copy of the string */
-UNIV_INTERN
+/** Duplicates a NUL-terminated string, allocated from a memory heap.
+@param[in]	heap	memory heap where string is allocated
+@param[in]	str	string to be copied
+@return own: a copy of the string */
 char*
 mem_heap_strdup(
-/*============*/
-	mem_heap_t*	heap,	/*!< in: memory heap where string is allocated */
-	const char*	str);	/*!< in: string to be copied */
+	mem_heap_t*	heap,
+	const char*	str);
+
 /**********************************************************************//**
 Makes a NUL-terminated copy of a nonterminated string,
 allocated from a memory heap.
-@return	own: a copy of the string */
+@return own: a copy of the string */
 UNIV_INLINE
 char*
 mem_heap_strdupl(
@@ -321,8 +316,7 @@ mem_heap_strdupl(
 
 /**********************************************************************//**
 Concatenate two strings and return the result, using a memory heap.
-@return	own: the result */
-UNIV_INTERN
+@return own: the result */
 char*
 mem_heap_strcat(
 /*============*/
@@ -332,8 +326,7 @@ mem_heap_strcat(
 
 /**********************************************************************//**
 Duplicate a block of data, allocated from a memory heap.
-@return	own: a copy of the data */
-UNIV_INTERN
+@return own: a copy of the data */
 void*
 mem_heap_dup(
 /*=========*/
@@ -346,8 +339,7 @@ A simple sprintf replacement that dynamically allocates the space for the
 formatted string from the given heap. This supports a very limited set of
 the printf syntax: types 's' and 'u' and length modifier 'l' (which is
 required for the 'u' type).
-@return	heap-allocated formatted string */
-UNIV_INTERN
+@return heap-allocated formatted string */
 char*
 mem_heap_printf(
 /*============*/
@@ -355,15 +347,22 @@ mem_heap_printf(
 	const char*	format,	/*!< in: format string */
 	...) MY_ATTRIBUTE ((format (printf, 2, 3)));
 
-#ifdef MEM_PERIODIC_CHECK
-/******************************************************************//**
-Goes through the list of all allocated mem blocks, checks their magic
-numbers, and reports possible corruption. */
-UNIV_INTERN
+/** Checks that an object is a memory heap (or a block of it)
+@param[in]	heap	Memory heap to check */
+UNIV_INLINE
 void
-mem_validate_all_blocks(void);
-/*=========================*/
-#endif
+mem_block_validate(
+	const mem_heap_t*	heap);
+
+#ifdef UNIV_DEBUG
+/** Validates the contents of a memory heap.
+Asserts that the memory heap is consistent
+@param[in]	heap	Memory heap to validate */
+void
+mem_heap_validate(
+	const mem_heap_t*	heap);
+
+#endif /* UNIV_DEBUG */
 
 /*#######################################################################*/
 
@@ -403,11 +402,6 @@ struct mem_block_info_t {
 			pool, this contains the buf_block_t handle;
 			otherwise, this is NULL */
 #endif /* !UNIV_HOTBACKUP */
-#ifdef MEM_PERIODIC_CHECK
-	UT_LIST_NODE_T(mem_block_t) mem_block_list;
-			/* List of all mem blocks allocated; protected
-			by the mem_comm_pool mutex */
-#endif
 };
 
 #define MEM_BLOCK_MAGIC_N	764741555
@@ -416,10 +410,107 @@ struct mem_block_info_t {
 /* Header size for a memory heap block */
 #define MEM_BLOCK_HEADER_SIZE	ut_calc_align(sizeof(mem_block_info_t),\
 							UNIV_MEM_ALIGNMENT)
-#include "mem0dbg.h"
 
 #ifndef UNIV_NONINL
 #include "mem0mem.ic"
 #endif
 
+/** A C++ wrapper class to the mem_heap_t routines, so that it can be used
+as an STL allocator */
+template<typename T>
+class mem_heap_allocator
+{
+public:
+	typedef		T		value_type;
+	typedef		size_t		size_type;
+	typedef		ptrdiff_t	difference_type;
+	typedef		T*		pointer;
+	typedef		const T*	const_pointer;
+	typedef		T&		reference;
+	typedef		const T&	const_reference;
+
+	mem_heap_allocator(mem_heap_t* heap) : m_heap(heap) { }
+
+	mem_heap_allocator(const mem_heap_allocator& other)
+		:
+		m_heap(other.m_heap)
+	{
+		// Do nothing
+	}
+
+	template <typename U>
+	mem_heap_allocator (const mem_heap_allocator<U>& other)
+		:
+		m_heap(other.m_heap)
+	{
+		// Do nothing
+	}
+
+	~mem_heap_allocator() { m_heap = 0; }
+
+	size_type max_size() const
+	{
+		return(ULONG_MAX / sizeof(T));
+	}
+
+	/** This function returns a pointer to the first element of a newly
+	allocated array large enough to contain n objects of type T; only the
+	memory is allocated, and the objects are not constructed. Moreover,
+	an optional pointer argument (that points to an object already
+	allocated by mem_heap_allocator) can be used as a hint to the
+	implementation about where the new memory should be allocated in
+	order to improve locality. */
+	pointer	allocate(size_type n, const_pointer hint = 0)
+	{
+		return(reinterpret_cast<pointer>(
+			mem_heap_alloc(m_heap, n * sizeof(T))));
+	}
+
+	void deallocate(pointer p, size_type n) { }
+
+	pointer address (reference r) const { return(&r); }
+
+	const_pointer address (const_reference r) const { return(&r); }
+
+	void construct(pointer p, const_reference t)
+	{
+		new (reinterpret_cast<void*>(p)) T(t);
+	}
+
+	void destroy(pointer p)
+	{
+		(reinterpret_cast<T*>(p))->~T();
+	}
+
+	/** Allocators are required to supply the below template class member
+	which enables the possibility of obtaining a related allocator,
+	parametrized in terms of a different type. For example, given an
+	allocator type IntAllocator for objects of type int, a related
+	allocator type for objects of type long could be obtained using
+	IntAllocator::rebind<long>::other */
+	template <typename U>
+	struct rebind
+	{
+		typedef mem_heap_allocator<U> other;
+	};
+
+private:
+	mem_heap_t*	m_heap;
+	template <typename U> friend class mem_heap_allocator;
+};
+
+template <class T>
+bool operator== (const mem_heap_allocator<T>& left,
+		 const mem_heap_allocator<T>& right)
+{
+	return(left.heap == right.heap);
+}
+
+template <class T>
+bool operator!= (const mem_heap_allocator<T>& left,
+		 const mem_heap_allocator<T>& right)
+{
+	return(left.heap != right.heap);
+}
+
 #endif
diff --git a/storage/innobase/include/mem0mem.ic b/storage/innobase/include/mem0mem.ic
index 63e68150b61..3b4109ee52d 100644
--- a/storage/innobase/include/mem0mem.ic
+++ b/storage/innobase/include/mem0mem.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -23,10 +23,7 @@ The memory management
 Created 6/8/1994 Heikki Tuuri
 *************************************************************************/
 
-#include "mem0dbg.ic"
-#ifndef UNIV_HOTBACKUP
-# include "mem0pool.h"
-#endif /* !UNIV_HOTBACKUP */
+#include "ut0new.h"
 
 #ifdef UNIV_DEBUG
 # define mem_heap_create_block(heap, n, type, file_name, line)		\
@@ -43,7 +40,6 @@ Created 6/8/1994 Heikki Tuuri
 Creates a memory heap block where data can be allocated.
 @return own: memory heap block, NULL if did not succeed (only possible
 for MEM_HEAP_BTR_SEARCH type heaps) */
-UNIV_INTERN
 mem_block_t*
 mem_heap_create_block_func(
 /*=======================*/
@@ -56,33 +52,34 @@ mem_heap_create_block_func(
 #endif /* UNIV_DEBUG */
 	ulint		type);	/*!< in: type of heap: MEM_HEAP_DYNAMIC or
 				MEM_HEAP_BUFFER */
+
 /******************************************************************//**
 Frees a block from a memory heap. */
-UNIV_INTERN
 void
 mem_heap_block_free(
 /*================*/
 	mem_heap_t*	heap,	/*!< in: heap */
 	mem_block_t*	block);	/*!< in: block to free */
+
 #ifndef UNIV_HOTBACKUP
 /******************************************************************//**
 Frees the free_block field from a memory heap. */
-UNIV_INTERN
 void
 mem_heap_free_block_free(
 /*=====================*/
 	mem_heap_t*	heap);	/*!< in: heap */
 #endif /* !UNIV_HOTBACKUP */
+
 /***************************************************************//**
 Adds a new block to a memory heap.
+@param[in]	heap	memory heap
+@param[in]	n	number of bytes needed
 @return created block, NULL if did not succeed (only possible for
 MEM_HEAP_BTR_SEARCH type heaps) */
-UNIV_INTERN
 mem_block_t*
 mem_heap_add_block(
-/*===============*/
-	mem_heap_t*	heap,	/*!< in: memory heap */
-	ulint		n);	/*!< in: number of bytes user needs */
+	mem_heap_t*	heap,
+	ulint		n);
 
 UNIV_INLINE
 void
@@ -150,41 +147,49 @@ mem_block_get_start(mem_block_t* block)
 	return(block->start);
 }
 
-/***************************************************************//**
-Allocates and zero-fills n bytes of memory from a memory heap.
-@return	allocated, zero-filled storage */
+/** Checks that an object is a memory heap block
+@param[in]	block	Memory block to check. */
+UNIV_INLINE
+void
+mem_block_validate(
+	const mem_block_t*	block)
+{
+	ut_a(block->magic_n == MEM_BLOCK_MAGIC_N);
+}
+
+/** Allocates and zero-fills n bytes of memory from a memory heap.
+@param[in]	heap	memory heap
+@param[in]	n	number of bytes; if the heap is allowed to grow into
+the buffer pool, this must be <= MEM_MAX_ALLOC_IN_BUF
+@return allocated, zero-filled storage */
 UNIV_INLINE
 void*
 mem_heap_zalloc(
-/*============*/
-	mem_heap_t*	heap,	/*!< in: memory heap */
-	ulint		n)	/*!< in: number of bytes; if the heap is allowed
-				to grow into the buffer pool, this must be
-				<= MEM_MAX_ALLOC_IN_BUF */
+	mem_heap_t*	heap,
+	ulint		n)
 {
 	ut_ad(heap);
 	ut_ad(!(heap->type & MEM_HEAP_BTR_SEARCH));
 	return(memset(mem_heap_alloc(heap, n), 0, n));
 }
 
-/***************************************************************//**
-Allocates n bytes of memory from a memory heap.
+/** Allocates n bytes of memory from a memory heap.
+@param[in]	heap	memory heap
+@param[in]	n	number of bytes; if the heap is allowed to grow into
+the buffer pool, this must be <= MEM_MAX_ALLOC_IN_BUF
 @return allocated storage, NULL if did not succeed (only possible for
 MEM_HEAP_BTR_SEARCH type heaps) */
 UNIV_INLINE
 void*
 mem_heap_alloc(
-/*===========*/
-	mem_heap_t*	heap,	/*!< in: memory heap */
-	ulint		n)	/*!< in: number of bytes; if the heap is allowed
-				to grow into the buffer pool, this must be
-				<= MEM_MAX_ALLOC_IN_BUF */
+	mem_heap_t*	heap,
+	ulint		n)
 {
 	mem_block_t*	block;
 	void*		buf;
 	ulint		free;
 
-	ut_ad(mem_heap_check(heap));
+	ut_d(mem_block_validate(heap));
 
 	block = UT_LIST_GET_LAST(heap->base);
 
@@ -210,35 +215,22 @@ mem_heap_alloc(
 
 	mem_block_set_free(block, free + MEM_SPACE_NEEDED(n));
 
-#ifdef UNIV_MEM_DEBUG
-	UNIV_MEM_ALLOC(buf,
-		       n + MEM_FIELD_HEADER_SIZE + MEM_FIELD_TRAILER_SIZE);
-
-	/* In the debug version write debugging info to the field */
-	mem_field_init((byte*) buf, n);
-
-	/* Advance buf to point at the storage which will be given to the
-	caller */
-	buf = (byte*) buf + MEM_FIELD_HEADER_SIZE;
-
-#endif
 	UNIV_MEM_ALLOC(buf, n);
 	return(buf);
 }
 
-/*****************************************************************//**
-Returns a pointer to the heap top.
-@return	pointer to the heap top */
+/** Returns a pointer to the heap top.
+@param[in]	heap	memory heap
+@return pointer to the heap top */
 UNIV_INLINE
 byte*
 mem_heap_get_heap_top(
-/*==================*/
-	mem_heap_t*	heap)	/*!< in: memory heap */
+	mem_heap_t*	heap)
 {
 	mem_block_t*	block;
 	byte*		buf;
 
-	ut_ad(mem_heap_check(heap));
+	ut_d(mem_block_validate(heap));
 
 	block = UT_LIST_GET_LAST(heap->base);
 
@@ -247,37 +239,21 @@ mem_heap_get_heap_top(
 	return(buf);
 }
 
-/*****************************************************************//**
-Frees the space in a memory heap exceeding the pointer given. The
-pointer must have been acquired from mem_heap_get_heap_top. The first
-memory block of the heap is not freed. */
+/** Frees the space in a memory heap exceeding the pointer given.
+The pointer must have been acquired from mem_heap_get_heap_top.
+The first memory block of the heap is not freed.
+@param[in]	heap		heap from which to free
+@param[in]	old_top		pointer to old top of heap */
 UNIV_INLINE
 void
 mem_heap_free_heap_top(
-/*===================*/
-	mem_heap_t*	heap,	/*!< in: heap from which to free */
-	byte*		old_top)/*!< in: pointer to old top of heap */
+	mem_heap_t*	heap,
+	byte*		old_top)
 {
 	mem_block_t*	block;
 	mem_block_t*	prev_block;
-#if defined UNIV_MEM_DEBUG || defined UNIV_DEBUG
-	ibool		error;
-	ulint		total_size;
-	ulint		size;
 
-	ut_ad(mem_heap_check(heap));
-
-	/* Validate the heap and get its total allocated size */
-	mem_heap_validate_or_print(heap, NULL, FALSE, &error, &total_size,
-				   NULL, NULL);
-	ut_a(!error);
-
-	/* Get the size below top pointer */
-	mem_heap_validate_or_print(heap, old_top, FALSE, &error, &size, NULL,
-				   NULL);
-	ut_a(!error);
-
-#endif
+	ut_d(mem_heap_validate(heap));
 
 	block = UT_LIST_GET_LAST(heap->base);
 
@@ -306,15 +282,6 @@ mem_heap_free_heap_top(
 
 	ut_ad(mem_block_get_start(block) <= mem_block_get_free(block));
 	UNIV_MEM_ASSERT_W(old_top, (byte*) block + block->len - old_top);
-#if defined UNIV_MEM_DEBUG
-	/* In the debug version erase block from top up */
-	mem_erase_buf(old_top, (byte*) block + block->len - old_top);
-
-	/* Update allocated memory count */
-	mutex_enter(&mem_hash_mutex);
-	mem_current_allocated_memory -= (total_size - size);
-	mutex_exit(&mem_hash_mutex);
-#endif /* UNIV_MEM_DEBUG */
 	UNIV_MEM_ALLOC(old_top, (byte*) block + block->len - old_top);
 
 	/* If free == start, we may free the block if it is not the first
@@ -326,13 +293,13 @@ mem_heap_free_heap_top(
 	}
 }
 
-/*****************************************************************//**
-Empties a memory heap. The first memory block of the heap is not freed. */
+/** Empties a memory heap.
+The first memory block of the heap is not freed.
+@param[in]	heap	heap to empty */
 UNIV_INLINE
 void
 mem_heap_empty(
-/*===========*/
-	mem_heap_t*	heap)	/*!< in: heap to empty */
+	mem_heap_t*	heap)
 {
 	mem_heap_free_heap_top(heap, (byte*) heap + mem_block_get_start(heap));
 #ifndef UNIV_HOTBACKUP
@@ -342,41 +309,125 @@ mem_heap_empty(
 #endif /* !UNIV_HOTBACKUP */
 }
 
-/*****************************************************************//**
-Returns a pointer to the topmost element in a memory heap. The size of the
-element must be given.
-@return	pointer to the topmost element */
+/** Returns a pointer to the topmost element in a memory heap.
+The size of the element must be given.
+@param[in]	heap	memory heap
+@param[in]	n	size of the topmost element
+@return pointer to the topmost element */
 UNIV_INLINE
 void*
 mem_heap_get_top(
-/*=============*/
-	mem_heap_t*	heap,	/*!< in: memory heap */
-	ulint		n)	/*!< in: size of the topmost element */
+	mem_heap_t*	heap,
+	ulint		n)
 {
 	mem_block_t*	block;
 	byte*		buf;
 
-	ut_ad(mem_heap_check(heap));
+	ut_d(mem_block_validate(heap));
 
 	block = UT_LIST_GET_LAST(heap->base);
 
 	buf = (byte*) block + mem_block_get_free(block) - MEM_SPACE_NEEDED(n);
 
-#ifdef UNIV_MEM_DEBUG
-	ut_ad(mem_block_get_start(block) <= (ulint) (buf - (byte*) block));
-
-	/* In the debug version, advance buf to point at the storage which
-	was given to the caller in the allocation*/
-
-	buf += MEM_FIELD_HEADER_SIZE;
-
-	/* Check that the field lengths agree */
-	ut_ad(n == mem_field_header_get_len(buf));
-#endif
-
 	return((void*) buf);
 }
 
+/** Checks if a given chunk of memory is the topmost element stored in the
+heap. If this is the case, then calling mem_heap_free_top() would free
+that element from the heap.
+@param[in]	heap	memory heap
+@param[in]	buf	presumed topmost element
+@param[in]	buf_sz	size of buf in bytes
+@return true if topmost */
+UNIV_INLINE
+bool
+mem_heap_is_top(
+	mem_heap_t*	heap,
+	const void*	buf,
+	ulint		buf_sz)
+{
+	const byte*	first_free_byte;
+	const byte*	presumed_start_of_buf;
+
+	ut_d(mem_block_validate(heap));
+
+	first_free_byte = mem_heap_get_heap_top(heap);
+
+	presumed_start_of_buf = first_free_byte - MEM_SPACE_NEEDED(buf_sz);
+
+	return(presumed_start_of_buf == buf);
+}
+
+/*****************************************************************//**
+Allocate a new chunk of memory from a memory heap, possibly discarding
+the topmost element. If the memory chunk specified with (top, top_sz)
+is the topmost element, then it will be discarded, otherwise it will
+be left untouched and this function will be equivallent to
+mem_heap_alloc().
+@return allocated storage, NULL if did not succeed (only possible for
+MEM_HEAP_BTR_SEARCH type heaps) */
+UNIV_INLINE
+void*
+mem_heap_replace(
+/*=============*/
+	mem_heap_t*	heap,	/*!< in/out: memory heap */
+	const void*	top,	/*!< in: chunk to discard if possible */
+	ulint		top_sz,	/*!< in: size of top in bytes */
+	ulint		new_sz)	/*!< in: desired size of the new chunk */
+{
+	if (mem_heap_is_top(heap, top, top_sz)) {
+		mem_heap_free_top(heap, top_sz);
+	}
+
+	return(mem_heap_alloc(heap, new_sz));
+}
+
+/*****************************************************************//**
+Allocate a new chunk of memory from a memory heap, possibly discarding
+the topmost element and then copy the specified data to it. If the memory
+chunk specified with (top, top_sz) is the topmost element, then it will be
+discarded, otherwise it will be left untouched and this function will be
+equivallent to mem_heap_dup().
+@return allocated storage, NULL if did not succeed (only possible for
+MEM_HEAP_BTR_SEARCH type heaps) */
+UNIV_INLINE
+void*
+mem_heap_dup_replace(
+/*=================*/
+	mem_heap_t*	heap,	/*!< in/out: memory heap */
+	const void*	top,	/*!< in: chunk to discard if possible */
+	ulint		top_sz,	/*!< in: size of top in bytes */
+	const void*	data,	/*!< in: new data to duplicate */
+	ulint		data_sz)/*!< in: size of data in bytes */
+{
+	void*	p = mem_heap_replace(heap, top, top_sz, data_sz);
+
+	memcpy(p, data, data_sz);
+
+	return(p);
+}
+
+/*****************************************************************//**
+Allocate a new chunk of memory from a memory heap, possibly discarding
+the topmost element and then copy the specified string to it. If the memory
+chunk specified with (top, top_sz) is the topmost element, then it will be
+discarded, otherwise it will be left untouched and this function will be
+equivallent to mem_heap_strdup().
+@return allocated string, NULL if did not succeed (only possible for
+MEM_HEAP_BTR_SEARCH type heaps) */
+UNIV_INLINE
+char*
+mem_heap_strdup_replace(
+/*====================*/
+	mem_heap_t*	heap,	/*!< in/out: memory heap */
+	const void*	top,	/*!< in: chunk to discard if possible */
+	ulint		top_sz,	/*!< in: size of top in bytes */
+	const char*	str)	/*!< in: new data to duplicate */
+{
+	return(reinterpret_cast<char*>(mem_heap_dup_replace(
+			heap, top, top_sz, str, strlen(str) + 1)));
+}
+
 /*****************************************************************//**
 Frees the topmost element in a memory heap. The size of the element must be
 given. */
@@ -389,7 +440,7 @@ mem_heap_free_top(
 {
 	mem_block_t*	block;
 
-	ut_ad(mem_heap_check(heap));
+	ut_d(mem_block_validate(heap));
 
 	block = UT_LIST_GET_LAST(heap->base);
 
@@ -397,13 +448,6 @@ mem_heap_free_top(
 	mem_block_set_free(block, mem_block_get_free(block)
 			   - MEM_SPACE_NEEDED(n));
 	UNIV_MEM_ASSERT_W((byte*) block + mem_block_get_free(block), n);
-#ifdef UNIV_MEM_DEBUG
-
-	ut_ad(mem_block_get_start(block) <= mem_block_get_free(block));
-
-	/* In the debug version check the consistency, and erase field */
-	mem_field_erase((byte*) block + mem_block_get_free(block), n);
-#endif
 
 	/* If free == start, we may free the block if it is not the first
 	one */
@@ -420,81 +464,66 @@ mem_heap_free_top(
 	}
 }
 
-/*****************************************************************//**
-NOTE: Use the corresponding macros instead of this function. Creates a
-memory heap. For debugging purposes, takes also the file name and line as
-argument.
+/** Creates a memory heap.
+NOTE: Use the corresponding macros instead of this function.
+A single user buffer of 'size' will fit in the block.
+0 creates a default size block.
+@param[in]	size		Desired start block size.
+@param[in]	file_name	File name where created
+@param[in]	line		Line where created
+@param[in]	type		Heap type
 @return own: memory heap, NULL if did not succeed (only possible for
 MEM_HEAP_BTR_SEARCH type heaps) */
 UNIV_INLINE
 mem_heap_t*
 mem_heap_create_func(
-/*=================*/
-	ulint		n,		/*!< in: desired start block size,
-					this means that a single user buffer
-					of size n will fit in the block,
-					0 creates a default size block */
+	ulint		size,
 #ifdef UNIV_DEBUG
-	const char*	file_name,	/*!< in: file name where created */
-	ulint		line,		/*!< in: line where created */
+	const char*	file_name,
+	ulint		line,
 #endif /* UNIV_DEBUG */
-	ulint		type)		/*!< in: heap type */
+	ulint		type)
 {
 	mem_block_t*   block;
 
-	if (!n) {
-		n = MEM_BLOCK_START_SIZE;
+	if (!size) {
+		size = MEM_BLOCK_START_SIZE;
 	}
 
-	block = mem_heap_create_block(NULL, n, type, file_name, line);
+	block = mem_heap_create_block(NULL, size, type, file_name, line);
 
 	if (block == NULL) {
 
 		return(NULL);
 	}
 
-	UT_LIST_INIT(block->base);
+	/* The first block should not be in buffer pool,
+	because it might be relocated to resize buffer pool. */
+	ut_ad(block->buf_block == NULL);
+
+	UT_LIST_INIT(block->base, &mem_block_t::list);
 
 	/* Add the created block itself as the first block in the list */
-	UT_LIST_ADD_FIRST(list, block->base, block);
-
-#ifdef UNIV_MEM_DEBUG
-
-	mem_hash_insert(block, file_name, line);
-
-#endif
+	UT_LIST_ADD_FIRST(block->base, block);
 
 	return(block);
 }
 
-/*****************************************************************//**
-NOTE: Use the corresponding macro instead of this function. Frees the space
-occupied by a memory heap. In the debug version erases the heap memory
-blocks. */
+/** Frees the space occupied by a memory heap.
+NOTE: Use the corresponding macro instead of this function.
+@param[in]	heap	Heap to be freed */
 UNIV_INLINE
 void
-mem_heap_free_func(
-/*===============*/
-	mem_heap_t*	heap,		/*!< in, own: heap to be freed */
-	const char*	file_name MY_ATTRIBUTE((unused)),
-					/*!< in: file name where freed */
-	ulint		line  MY_ATTRIBUTE((unused)))
+mem_heap_free(
+	mem_heap_t*	heap)
 {
 	mem_block_t*	block;
 	mem_block_t*	prev_block;
 
-	ut_ad(mem_heap_check(heap));
+	ut_d(mem_block_validate(heap));
 
 	block = UT_LIST_GET_LAST(heap->base);
 
-#ifdef UNIV_MEM_DEBUG
-
-	/* In the debug version remove the heap from the hash table of heaps
-	and check its consistency */
-
-	mem_hash_remove(heap, file_name, line);
-
-#endif
 #ifndef UNIV_HOTBACKUP
 	if (heap->free_block) {
 		mem_heap_free_block_free(heap);
@@ -513,73 +542,6 @@ mem_heap_free_func(
 	}
 }
 
-/***************************************************************//**
-NOTE: Use the corresponding macro instead of this function.
-Allocates a single buffer of memory from the dynamic memory of
-the C compiler. Is like malloc of C. The buffer must be freed
-with mem_free.
-@return	own: free storage */
-UNIV_INLINE
-void*
-mem_alloc_func(
-/*===========*/
-	ulint		n,		/*!< in: desired number of bytes */
-#ifdef UNIV_DEBUG
-	const char*	file_name,	/*!< in: file name where created */
-	ulint		line,		/*!< in: line where created */
-#endif /* UNIV_DEBUG */
-	ulint*		size)		/*!< out: allocated size in bytes,
-					or NULL */
-{
-	mem_heap_t*	heap;
-	void*		buf;
-
-	heap = mem_heap_create_at(n, file_name, line);
-
-	/* Note that as we created the first block in the heap big enough
-	for the buffer requested by the caller, the buffer will be in the
-	first block and thus we can calculate the pointer to the heap from
-	the pointer to the buffer when we free the memory buffer. */
-
-	if (size) {
-		/* Adjust the allocation to the actual size of the
-		memory block. */
-		ulint	m = mem_block_get_len(heap)
-			- mem_block_get_free(heap);
-#ifdef UNIV_MEM_DEBUG
-		m -= MEM_FIELD_HEADER_SIZE + MEM_FIELD_TRAILER_SIZE;
-#endif /* UNIV_MEM_DEBUG */
-		ut_ad(m >= n);
-		n = m;
-		*size = m;
-	}
-
-	buf = mem_heap_alloc(heap, n);
-
-	ut_a((byte*) heap == (byte*) buf - MEM_BLOCK_HEADER_SIZE
-	     - MEM_FIELD_HEADER_SIZE);
-	return(buf);
-}
-
-/***************************************************************//**
-NOTE: Use the corresponding macro instead of this function. Frees a single
-buffer of storage from the dynamic memory of the C compiler. Similar to the
-free of C. */
-UNIV_INLINE
-void
-mem_free_func(
-/*==========*/
-	void*		ptr,		/*!< in, own: buffer to be freed */
-	const char*	file_name,	/*!< in: file name where created */
-	ulint		line)		/*!< in: line where created */
-{
-	mem_heap_t*   heap;
-
-	heap = (mem_heap_t*)((byte*) ptr - MEM_BLOCK_HEADER_SIZE
-			     - MEM_FIELD_HEADER_SIZE);
-	mem_heap_free_func(heap, file_name, line);
-}
-
 /*****************************************************************//**
 Returns the space in bytes occupied by a memory heap. */
 UNIV_INLINE
@@ -590,7 +552,7 @@ mem_heap_get_size(
 {
 	ulint		size	= 0;
 
-	ut_ad(mem_heap_check(heap));
+	ut_d(mem_block_validate(heap));
 
 	size = heap->total_size;
 
@@ -605,7 +567,7 @@ mem_heap_get_size(
 
 /**********************************************************************//**
 Duplicates a NUL-terminated string.
-@return	own: a copy of the string, must be deallocated with mem_free */
+@return own: a copy of the string, must be deallocated with ut_free */
 UNIV_INLINE
 char*
 mem_strdup(
@@ -613,12 +575,12 @@ mem_strdup(
 	const char*	str)	/*!< in: string to be copied */
 {
 	ulint	len = strlen(str) + 1;
-	return((char*) memcpy(mem_alloc(len), str, len));
+	return(static_cast<char*>(memcpy(ut_malloc_nokey(len), str, len)));
 }
 
 /**********************************************************************//**
 Makes a NUL-terminated copy of a nonterminated string.
-@return	own: a copy of the string, must be deallocated with mem_free */
+@return own: a copy of the string, must be deallocated with ut_free */
 UNIV_INLINE
 char*
 mem_strdupl(
@@ -626,15 +588,15 @@ mem_strdupl(
 	const char*	str,	/*!< in: string to be copied */
 	ulint		len)	/*!< in: length of str, in bytes */
 {
-	char*	s = (char*) mem_alloc(len + 1);
+	char*	s = static_cast<char*>(ut_malloc_nokey(len + 1));
 	s[len] = 0;
-	return((char*) memcpy(s, str, len));
+	return(static_cast<char*>(memcpy(s, str, len)));
 }
 
 /**********************************************************************//**
 Makes a NUL-terminated copy of a nonterminated string,
 allocated from a memory heap.
-@return	own: a copy of the string */
+@return own: a copy of the string */
 UNIV_INLINE
 char*
 mem_heap_strdupl(
diff --git a/storage/innobase/include/mem0pool.h b/storage/innobase/include/mem0pool.h
deleted file mode 100644
index a65ba50fdf9..00000000000
--- a/storage/innobase/include/mem0pool.h
+++ /dev/null
@@ -1,121 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/mem0pool.h
-The lowest-level memory management
-
-Created 6/9/1994 Heikki Tuuri
-*******************************************************/
-
-#ifndef mem0pool_h
-#define mem0pool_h
-
-#include "univ.i"
-#include "os0file.h"
-#include "ut0lst.h"
-
-/** Memory pool */
-struct mem_pool_t;
-
-/** The common memory pool */
-extern mem_pool_t*	mem_comm_pool;
-
-/** Memory area header */
-struct mem_area_t{
-	ulint		size_and_free;	/*!< memory area size is obtained by
-					anding with ~MEM_AREA_FREE; area in
-					a free list if ANDing with
-					MEM_AREA_FREE results in nonzero */
-	UT_LIST_NODE_T(mem_area_t)
-			free_list;	/*!< free list node */
-};
-
-/** Each memory area takes this many extra bytes for control information */
-#define MEM_AREA_EXTRA_SIZE	(ut_calc_align(sizeof(struct mem_area_t),\
-			UNIV_MEM_ALIGNMENT))
-
-/********************************************************************//**
-Creates a memory pool.
-@return	memory pool */
-UNIV_INTERN
-mem_pool_t*
-mem_pool_create(
-/*============*/
-	ulint	size);	/*!< in: pool size in bytes */
-/********************************************************************//**
-Frees a memory pool. */
-UNIV_INTERN
-void
-mem_pool_free(
-/*==========*/
-	mem_pool_t*	pool);	/*!< in, own: memory pool */
-/********************************************************************//**
-Allocates memory from a pool. NOTE: This low-level function should only be
-used in mem0mem.*!
-@return	own: allocated memory buffer */
-UNIV_INTERN
-void*
-mem_area_alloc(
-/*===========*/
-	ulint*		psize,	/*!< in: requested size in bytes; for optimum
-				space usage, the size should be a power of 2
-				minus MEM_AREA_EXTRA_SIZE;
-				out: allocated size in bytes (greater than
-				or equal to the requested size) */
-	mem_pool_t*	pool);	/*!< in: memory pool */
-/********************************************************************//**
-Frees memory to a pool. */
-UNIV_INTERN
-void
-mem_area_free(
-/*==========*/
-	void*		ptr,	/*!< in, own: pointer to allocated memory
-				buffer */
-	mem_pool_t*	pool);	/*!< in: memory pool */
-/********************************************************************//**
-Returns the amount of reserved memory.
-@return	reserved mmeory in bytes */
-UNIV_INTERN
-ulint
-mem_pool_get_reserved(
-/*==================*/
-	mem_pool_t*	pool);	/*!< in: memory pool */
-/********************************************************************//**
-Validates a memory pool.
-@return	TRUE if ok */
-UNIV_INTERN
-ibool
-mem_pool_validate(
-/*==============*/
-	mem_pool_t*	pool);	/*!< in: memory pool */
-/********************************************************************//**
-Prints info of a memory pool. */
-UNIV_INTERN
-void
-mem_pool_print_info(
-/*================*/
-	FILE*		outfile,/*!< in: output file to write to */
-	mem_pool_t*	pool);	/*!< in: memory pool */
-
-
-#ifndef UNIV_NONINL
-#include "mem0pool.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/mtr0log.h b/storage/innobase/include/mtr0log.h
index 18a345d050f..3819d3f694a 100644
--- a/storage/innobase/include/mtr0log.h
+++ b/storage/innobase/include/mtr0log.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -28,24 +28,26 @@ Created 12/7/1995 Heikki Tuuri
 
 #include "univ.i"
 #include "mtr0mtr.h"
-#include "dict0types.h"
+#include "dyn0buf.h"
+
+// Forward declaration
+struct dict_index_t;
 
 #ifndef UNIV_HOTBACKUP
 /********************************************************//**
 Writes 1, 2 or 4 bytes to a file page. Writes the corresponding log
 record to the mini-transaction log if mtr is not NULL. */
-UNIV_INTERN
 void
 mlog_write_ulint(
 /*=============*/
-	byte*	ptr,	/*!< in: pointer where to write */
-	ulint	val,	/*!< in: value to write */
-	byte	type,	/*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
-	mtr_t*	mtr);	/*!< in: mini-transaction handle */
+	byte*		ptr,	/*!< in: pointer where to write */
+	ulint		val,	/*!< in: value to write */
+	mlog_id_t	type,	/*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
+	mtr_t*		mtr);	/*!< in: mini-transaction handle */
+
 /********************************************************//**
 Writes 8 bytes to a file page. Writes the corresponding log
 record to the mini-transaction log, only if mtr is not NULL */
-UNIV_INTERN
 void
 mlog_write_ull(
 /*===========*/
@@ -55,7 +57,6 @@ mlog_write_ull(
 /********************************************************//**
 Writes a string to a file page buffered in the buffer pool. Writes the
 corresponding log record to the mini-transaction log. */
-UNIV_INTERN
 void
 mlog_write_string(
 /*==============*/
@@ -66,7 +67,6 @@ mlog_write_string(
 /********************************************************//**
 Logs a write of a string to a file page buffered in the buffer pool.
 Writes the corresponding log record to the mini-transaction log. */
-UNIV_INTERN
 void
 mlog_log_string(
 /*============*/
@@ -76,40 +76,34 @@ mlog_log_string(
 /********************************************************//**
 Writes initial part of a log record consisting of one-byte item
 type and four-byte space and page numbers. */
-UNIV_INTERN
 void
 mlog_write_initial_log_record(
 /*==========================*/
 	const byte*	ptr,	/*!< in: pointer to (inside) a buffer
 				frame holding the file page where
 				modification is made */
-	byte		type,	/*!< in: log item type: MLOG_1BYTE, ... */
+	mlog_id_t	type,	/*!< in: log item type: MLOG_1BYTE, ... */
 	mtr_t*		mtr);	/*!< in: mini-transaction handle */
 /********************************************************//**
-Writes a log record about an .ibd file create/delete/rename.
-@return	new value of log_ptr */
+Catenates 1 - 4 bytes to the mtr log. The value is not compressed. */
 UNIV_INLINE
-byte*
-mlog_write_initial_log_record_for_file_op(
-/*======================================*/
-	ulint	type,	/*!< in: MLOG_FILE_CREATE, MLOG_FILE_DELETE, or
-			MLOG_FILE_RENAME */
-	ulint	space_id,/*!< in: space id, if applicable */
-	ulint	page_no,/*!< in: page number (not relevant currently) */
-	byte*	log_ptr,/*!< in: pointer to mtr log which has been opened */
-	mtr_t*	mtr);	/*!< in: mtr */
+void
+mlog_catenate_ulint(
+/*================*/
+	mtr_buf_t*	dyn_buf,	/*!< in/out: buffer to write */
+	ulint		val,		/*!< in: value to write */
+	mlog_id_t	type);		/*!< in: type of value to write */
 /********************************************************//**
 Catenates 1 - 4 bytes to the mtr log. */
 UNIV_INLINE
 void
 mlog_catenate_ulint(
 /*================*/
-	mtr_t*	mtr,	/*!< in: mtr */
-	ulint	val,	/*!< in: value to write */
-	ulint	type);	/*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
+	mtr_t*		mtr,	/*!< in: mtr */
+	ulint		val,	/*!< in: value to write */
+	mlog_id_t	type);	/*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
 /********************************************************//**
 Catenates n bytes to the mtr log. */
-UNIV_INTERN
 void
 mlog_catenate_string(
 /*=================*/
@@ -122,8 +116,8 @@ UNIV_INLINE
 void
 mlog_catenate_ulint_compressed(
 /*===========================*/
-	mtr_t*	mtr,	/*!< in: mtr */
-	ulint	val);	/*!< in: value to write */
+	mtr_t*		mtr,	/*!< in: mtr */
+	ulint		val);	/*!< in: value to write */
 /********************************************************//**
 Catenates a compressed 64-bit integer to mlog. */
 UNIV_INLINE
@@ -134,27 +128,45 @@ mlog_catenate_ull_compressed(
 	ib_uint64_t	val);	/*!< in: value to write */
 /********************************************************//**
 Opens a buffer to mlog. It must be closed with mlog_close.
-@return	buffer, NULL if log mode MTR_LOG_NONE */
+@return buffer, NULL if log mode MTR_LOG_NONE */
 UNIV_INLINE
 byte*
 mlog_open(
 /*======*/
-	mtr_t*	mtr,	/*!< in: mtr */
-	ulint	size);	/*!< in: buffer size in bytes; MUST be
-			smaller than DYN_ARRAY_DATA_SIZE! */
+	mtr_t*		mtr,	/*!< in: mtr */
+	ulint		size);	/*!< in: buffer size in bytes; MUST be
+				smaller than DYN_ARRAY_DATA_SIZE! */
 /********************************************************//**
 Closes a buffer opened to mlog. */
 UNIV_INLINE
 void
 mlog_close(
 /*=======*/
-	mtr_t*	mtr,	/*!< in: mtr */
-	byte*	ptr);	/*!< in: buffer space from ptr up was not used */
+	mtr_t*		mtr,	/*!< in: mtr */
+	byte*		ptr);	/*!< in: buffer space from ptr up was
+				not used */
+
+/** Writes a log record about an operation.
+@param[in]	type		redo log record type
+@param[in]	space_id	tablespace identifier
+@param[in]	page_no		page number
+@param[in,out]	log_ptr		current end of mini-transaction log
+@param[in,out]	mtr		mini-transaction
+@return	end of mini-transaction log */
+UNIV_INLINE
+byte*
+mlog_write_initial_log_record_low(
+	mlog_id_t	type,
+	ulint		space_id,
+	ulint		page_no,
+	byte*		log_ptr,
+	mtr_t*		mtr);
+
 /********************************************************//**
 Writes the initial part of a log record (3..11 bytes).
 If the implementation of this function is changed, all
 size parameters to mlog_open() should be adjusted accordingly!
-@return	new value of log_ptr */
+@return new value of log_ptr */
 UNIV_INLINE
 byte*
 mlog_write_initial_log_record_fast(
@@ -162,7 +174,7 @@ mlog_write_initial_log_record_fast(
 	const byte*	ptr,	/*!< in: pointer to (inside) a buffer
 				frame holding the file page where
 				modification is made */
-	byte		type,	/*!< in: log item type: MLOG_1BYTE, ... */
+	mlog_id_t	type,	/*!< in: log item type: MLOG_1BYTE, ... */
 	byte*		log_ptr,/*!< in: pointer to mtr log which has
 				been opened */
 	mtr_t*		mtr);	/*!< in: mtr */
@@ -172,32 +184,30 @@ mlog_write_initial_log_record_fast(
 #endif /* !UNIV_HOTBACKUP */
 /********************************************************//**
 Parses an initial log record written by mlog_write_initial_log_record.
-@return	parsed record end, NULL if not a complete record */
-UNIV_INTERN
+@return parsed record end, NULL if not a complete record */
 byte*
 mlog_parse_initial_log_record(
 /*==========================*/
-	byte*	ptr,	/*!< in: buffer */
-	byte*	end_ptr,/*!< in: buffer end */
-	byte*	type,	/*!< out: log record type: MLOG_1BYTE, ... */
-	ulint*	space,	/*!< out: space id */
-	ulint*	page_no);/*!< out: page number */
+	const byte*	ptr,	/*!< in: buffer */
+	const byte*	end_ptr,/*!< in: buffer end */
+	mlog_id_t*	type,	/*!< out: log record type: MLOG_1BYTE, ... */
+	ulint*		space,	/*!< out: space id */
+	ulint*		page_no);/*!< out: page number */
 /********************************************************//**
 Parses a log record written by mlog_write_ulint or mlog_write_ull.
-@return	parsed record end, NULL if not a complete record */
-UNIV_INTERN
+@return parsed record end, NULL if not a complete record */
 byte*
 mlog_parse_nbytes(
 /*==============*/
-	ulint	type,	/*!< in: log record type: MLOG_1BYTE, ... */
-	byte*	ptr,	/*!< in: buffer */
-	byte*	end_ptr,/*!< in: buffer end */
-	byte*	page,	/*!< in: page where to apply the log record, or NULL */
-	void*	page_zip);/*!< in/out: compressed page, or NULL */
+	mlog_id_t	type,	/*!< in: log record type: MLOG_1BYTE, ... */
+	const byte*	ptr,	/*!< in: buffer */
+	const byte*	end_ptr,/*!< in: buffer end */
+	byte*		page,	/*!< in: page where to apply the log record,
+				or NULL */
+	void*		page_zip);/*!< in/out: compressed page, or NULL */
 /********************************************************//**
 Parses a log record written by mlog_write_string.
-@return	parsed record end, NULL if not a complete record */
-UNIV_INTERN
+@return parsed record end, NULL if not a complete record */
 byte*
 mlog_parse_string(
 /*==============*/
@@ -212,15 +222,14 @@ Opens a buffer for mlog, writes the initial log record and,
 if needed, the field lengths of an index.  Reserves space
 for further log entries.  The log entry must be closed with
 mtr_close().
-@return	buffer, NULL if log mode MTR_LOG_NONE */
-UNIV_INTERN
+@return buffer, NULL if log mode MTR_LOG_NONE */
 byte*
 mlog_open_and_write_index(
 /*======================*/
 	mtr_t*			mtr,	/*!< in: mtr */
 	const byte*		rec,	/*!< in: index record or page */
 	const dict_index_t*	index,	/*!< in: record descriptor */
-	byte			type,	/*!< in: log item type */
+	mlog_id_t		type,	/*!< in: log item type */
 	ulint			size);	/*!< in: requested buffer size in bytes
 					(if 0, calls mlog_close() and
 					returns NULL) */
@@ -228,8 +237,7 @@ mlog_open_and_write_index(
 
 /********************************************************//**
 Parses a log record written by mlog_open_and_write_index.
-@return	parsed record end, NULL if not a complete record */
-UNIV_INTERN
+@return parsed record end, NULL if not a complete record */
 byte*
 mlog_parse_index(
 /*=============*/
@@ -239,13 +247,13 @@ mlog_parse_index(
 	dict_index_t**	index);	/*!< out, own: dummy index */
 
 #ifndef UNIV_HOTBACKUP
-/* Insert, update, and maybe other functions may use this value to define an
+/** Insert, update, and maybe other functions may use this value to define an
 extra mlog buffer size for variable size data */
 #define MLOG_BUF_MARGIN	256
 #endif /* !UNIV_HOTBACKUP */
 
 #ifndef UNIV_NONINL
 #include "mtr0log.ic"
-#endif
+#endif /* UNIV_NOINL */
 
-#endif
+#endif /* mtr0log_h */
diff --git a/storage/innobase/include/mtr0log.ic b/storage/innobase/include/mtr0log.ic
index 6457e02d455..4015fe36d19 100644
--- a/storage/innobase/include/mtr0log.ic
+++ b/storage/innobase/include/mtr0log.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -23,36 +23,31 @@ Mini-transaction logging routines
 Created 12/7/1995 Heikki Tuuri
 *******************************************************/
 
-#include "mach0data.h"
-#include "ut0lst.h"
-#include "buf0buf.h"
 #include "buf0dblwr.h"
 #include "fsp0types.h"
-#include "trx0sys.h"
+#include "mach0data.h"
+#include "trx0types.h"
 
 /********************************************************//**
 Opens a buffer to mlog. It must be closed with mlog_close.
-@return	buffer, NULL if log mode MTR_LOG_NONE */
+@return buffer, NULL if log mode MTR_LOG_NONE or MTR_LOG_NO_REDO */
 UNIV_INLINE
 byte*
 mlog_open(
 /*======*/
 	mtr_t*	mtr,	/*!< in: mtr */
 	ulint	size)	/*!< in: buffer size in bytes; MUST be
-			smaller than DYN_ARRAY_DATA_SIZE! */
+			smaller than mtr_t::buf_t::MAX_DATA_SIZE! */
 {
-	dyn_array_t*	mlog;
+	mtr->set_modified();
 
-	mtr->modifications = TRUE;
-
-	if (mtr_get_log_mode(mtr) == MTR_LOG_NONE) {
+	if (mtr_get_log_mode(mtr) == MTR_LOG_NONE
+	    || mtr_get_log_mode(mtr) == MTR_LOG_NO_REDO) {
 
 		return(NULL);
 	}
 
-	mlog = &(mtr->log);
-
-	return(dyn_array_open(mlog, size));
+	return(mtr->get_log()->open(size));
 }
 
 /********************************************************//**
@@ -64,13 +59,10 @@ mlog_close(
 	mtr_t*	mtr,	/*!< in: mtr */
 	byte*	ptr)	/*!< in: buffer space from ptr up was not used */
 {
-	dyn_array_t*	mlog;
-
 	ut_ad(mtr_get_log_mode(mtr) != MTR_LOG_NONE);
+	ut_ad(mtr_get_log_mode(mtr) != MTR_LOG_NO_REDO);
 
-	mlog = &(mtr->log);
-
-	dyn_array_close(mlog, ptr);
+	mtr->get_log()->close(ptr);
 }
 
 #ifndef UNIV_HOTBACKUP
@@ -80,42 +72,49 @@ UNIV_INLINE
 void
 mlog_catenate_ulint(
 /*================*/
-	mtr_t*	mtr,	/*!< in: mtr */
-	ulint	val,	/*!< in: value to write */
-	ulint	type)	/*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
+	mtr_buf_t*	mtr_buf,	/*!< in/out: buffer to write */
+	ulint		val,		/*!< in: value to write */
+	mlog_id_t	type)		/*!< in: type of value to write */
 {
-	dyn_array_t*	mlog;
-	byte*		ptr;
+	ut_ad(MLOG_1BYTE == 1);
+	ut_ad(MLOG_2BYTES == 2);
+	ut_ad(MLOG_4BYTES == 4);
+	ut_ad(MLOG_8BYTES == 8);
 
-	if (mtr_get_log_mode(mtr) == MTR_LOG_NONE) {
+	byte*	ptr = mtr_buf->push<byte*>(type);
+
+	switch (type) {
+	case MLOG_4BYTES:
+		mach_write_to_4(ptr, val);
+		break;
+	case MLOG_2BYTES:
+		mach_write_to_2(ptr, val);
+		break;
+	case MLOG_1BYTE:
+		mach_write_to_1(ptr, val);
+		break;
+	default:
+		ut_error;
+	}
+}
+
+/********************************************************//**
+Catenates 1 - 4 bytes to the mtr log. The value is not compressed. */
+UNIV_INLINE
+void
+mlog_catenate_ulint(
+/*================*/
+	mtr_t*		mtr,	/*!< in/out: mtr */
+	ulint		val,	/*!< in: value to write */
+	mlog_id_t	type)	/*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
+{
+	if (mtr_get_log_mode(mtr) == MTR_LOG_NONE
+	    || mtr_get_log_mode(mtr) == MTR_LOG_NO_REDO) {
 
 		return;
 	}
 
-	mlog = &(mtr->log);
-
-#if MLOG_1BYTE != 1
-# error "MLOG_1BYTE != 1"
-#endif
-#if MLOG_2BYTES != 2
-# error "MLOG_2BYTES != 2"
-#endif
-#if MLOG_4BYTES != 4
-# error "MLOG_4BYTES != 4"
-#endif
-#if MLOG_8BYTES != 8
-# error "MLOG_8BYTES != 8"
-#endif
-	ptr = (byte*) dyn_array_push(mlog, type);
-
-	if (type == MLOG_4BYTES) {
-		mach_write_to_4(ptr, val);
-	} else if (type == MLOG_2BYTES) {
-		mach_write_to_2(ptr, val);
-	} else {
-		ut_ad(type == MLOG_1BYTE);
-		mach_write_to_1(ptr, val);
-	}
+	mlog_catenate_ulint(mtr->get_log(), val, type);
 }
 
 /********************************************************//**
@@ -161,16 +160,44 @@ mlog_catenate_ull_compressed(
 		return;
 	}
 
-	log_ptr += mach_ull_write_compressed(log_ptr, val);
+	log_ptr += mach_u64_write_compressed(log_ptr, val);
 
 	mlog_close(mtr, log_ptr);
 }
 
+/** Writes a log record about an operation.
+@param[in]	type		redo log record type
+@param[in]	space_id	tablespace identifier
+@param[in]	page_no		page number
+@param[in,out]	log_ptr		current end of mini-transaction log
+@param[in,out]	mtr		mini-transaction
+@return	end of mini-transaction log */
+UNIV_INLINE
+byte*
+mlog_write_initial_log_record_low(
+	mlog_id_t	type,
+	ulint		space_id,
+	ulint		page_no,
+	byte*		log_ptr,
+	mtr_t*		mtr)
+{
+	ut_ad(type <= MLOG_BIGGEST_TYPE || EXTRA_CHECK_MLOG_NUMBER(type));
+
+	mach_write_to_1(log_ptr, type);
+	log_ptr++;
+
+	log_ptr += mach_write_compressed(log_ptr, space_id);
+	log_ptr += mach_write_compressed(log_ptr, page_no);
+
+	mtr->added_rec();
+	return(log_ptr);
+}
+
 /********************************************************//**
 Writes the initial part of a log record (3..11 bytes).
 If the implementation of this function is changed, all
 size parameters to mlog_open() should be adjusted accordingly!
-@return	new value of log_ptr */
+@return new value of log_ptr */
 UNIV_INLINE
 byte*
 mlog_write_initial_log_record_fast(
@@ -178,21 +205,17 @@ mlog_write_initial_log_record_fast(
 	const byte*	ptr,	/*!< in: pointer to (inside) a buffer
 				frame holding the file page where
 				modification is made */
-	byte		type,	/*!< in: log item type: MLOG_1BYTE, ... */
+	mlog_id_t	type,	/*!< in: log item type: MLOG_1BYTE, ... */
 	byte*		log_ptr,/*!< in: pointer to mtr log which has
 				been opened */
-	mtr_t*		mtr)	/*!< in: mtr */
+	mtr_t*		mtr)	/*!< in/out: mtr */
 {
-#ifdef UNIV_DEBUG
-	buf_block_t*	block;
-#endif
 	const byte*	page;
 	ulint		space;
 	ulint		offset;
 
-	ut_ad(mtr_memo_contains_page(mtr, ptr, MTR_MEMO_PAGE_X_FIX));
-	ut_ad(type <= MLOG_BIGGEST_TYPE || EXTRA_CHECK_MLOG_NUMBER(type));
-	ut_ad(ptr && log_ptr);
+	ut_ad(log_ptr);
+	ut_d(mtr->memo_modify_page(ptr));
 
 	page = (const byte*) ut_align_down(ptr, UNIV_PAGE_SIZE);
 	space = mach_read_from_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
@@ -202,6 +225,7 @@ mlog_write_initial_log_record_fast(
 	the doublewrite buffer is located in pages
 	FSP_EXTENT_SIZE, ..., 3 * FSP_EXTENT_SIZE - 1 in the
 	system tablespace */
+
 	if (space == TRX_SYS_SPACE
 	    && offset >= FSP_EXTENT_SIZE && offset < 3 * FSP_EXTENT_SIZE) {
 		if (buf_dblwr_being_created) {
@@ -210,67 +234,17 @@ mlog_write_initial_log_record_fast(
 			anything for the doublewrite buffer pages. */
 			return(log_ptr);
 		} else {
-			fprintf(stderr,
-				"Error: trying to redo log a record of type "
-				"%d on page %lu of space %lu in the "
-				"doublewrite buffer, continuing anyway.\n"
-				"Please post a bug report to "
-				"bugs.mysql.com.\n",
-				type, offset, space);
+			ib::error() << "Trying to redo log a record of type "
+				<< type << "  on page "
+				<< page_id_t(space, offset) << "in the"
+				" doublewrite buffer, continuing anyway."
+				" Please post a bug report to"
+				" bugs.mysql.com.";
 			ut_ad(0);
 		}
 	}
 
-	mach_write_to_1(log_ptr, type);
-	log_ptr++;
-	log_ptr += mach_write_compressed(log_ptr, space);
-	log_ptr += mach_write_compressed(log_ptr, offset);
-
-	mtr->n_log_recs++;
-
-#ifdef UNIV_LOG_DEBUG
-	fprintf(stderr,
-		"Adding to mtr log record type %lu space %lu page no %lu\n",
-		(ulong) type, space, offset);
-#endif
-
-#ifdef UNIV_DEBUG
-	/* We now assume that all x-latched pages have been modified! */
-	block = (buf_block_t*) buf_block_align(ptr);
-
-	if (!mtr_memo_contains(mtr, block, MTR_MEMO_MODIFY)) {
-
-		mtr_memo_push(mtr, block, MTR_MEMO_MODIFY);
-	}
-#endif
-	return(log_ptr);
-}
-
-/********************************************************//**
-Writes a log record about an .ibd file create/delete/rename.
-@return	new value of log_ptr */
-UNIV_INLINE
-byte*
-mlog_write_initial_log_record_for_file_op(
-/*======================================*/
-	ulint	type,	/*!< in: MLOG_FILE_CREATE, MLOG_FILE_DELETE, or
-			MLOG_FILE_RENAME */
-	ulint	space_id,/*!< in: space id, if applicable */
-	ulint	page_no,/*!< in: page number (not relevant currently) */
-	byte*	log_ptr,/*!< in: pointer to mtr log which has been opened */
-	mtr_t*	mtr)	/*!< in: mtr */
-{
-	ut_ad(log_ptr);
-
-	mach_write_to_1(log_ptr, type);
-	log_ptr++;
-
-	/* We write dummy space id and page number */
-	log_ptr += mach_write_compressed(log_ptr, space_id);
-	log_ptr += mach_write_compressed(log_ptr, page_no);
-
-	mtr->n_log_recs++;
-
-	return(log_ptr);
+	return(mlog_write_initial_log_record_low(type, space, offset,
+						 log_ptr, mtr));
 }
 #endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/mtr0mtr.h b/storage/innobase/include/mtr0mtr.h
index c3307985532..4a1d867015d 100644
--- a/storage/innobase/include/mtr0mtr.h
+++ b/storage/innobase/include/mtr0mtr.h
@@ -29,427 +29,594 @@ Created 11/26/1995 Heikki Tuuri
 #define mtr0mtr_h
 
 #include "univ.i"
-#include "mem0mem.h"
-#include "dyn0dyn.h"
-#include "buf0types.h"
-#include "sync0rw.h"
-#include "ut0byte.h"
+#include "log0types.h"
 #include "mtr0types.h"
-#include "page0types.h"
+#include "buf0types.h"
 #include "trx0types.h"
+#include "dyn0buf.h"
 
-/* Logging modes for a mini-transaction */
-#define MTR_LOG_ALL		21	/* default mode: log all operations
-					modifying disk-based data */
-#define	MTR_LOG_NONE		22	/* log no operations */
-#define	MTR_LOG_NO_REDO		23	/* Don't generate REDO */
-/*#define	MTR_LOG_SPACE	23 */	/* log only operations modifying
-					file space page allocation data
-					(operations in fsp0fsp.* ) */
-#define	MTR_LOG_SHORT_INSERTS	24	/* inserts are logged in a shorter
-					form */
+/** Start a mini-transaction. */
+#define mtr_start(m)		(m)->start()
+/** Start a mini-transaction. */
+#define mtr_start_trx(m, t)		(m)->start((t))
 
-/* Types for the mlock objects to store in the mtr memo; NOTE that the
-first 3 values must be RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
-#define	MTR_MEMO_PAGE_S_FIX	RW_S_LATCH
-#define	MTR_MEMO_PAGE_X_FIX	RW_X_LATCH
-#define	MTR_MEMO_BUF_FIX	RW_NO_LATCH
-#ifdef UNIV_DEBUG
-# define MTR_MEMO_MODIFY	54
-#endif /* UNIV_DEBUG */
-#define	MTR_MEMO_S_LOCK		55
-#define	MTR_MEMO_X_LOCK		56
+/** Start a synchronous mini-transaction */
+#define mtr_start_sync(m)	(m)->start(true)
 
-/** @name Log item types
-The log items are declared 'byte' so that the compiler can warn if val
-and type parameters are switched in a call to mlog_write_ulint. NOTE!
-For 1 - 8 bytes, the flag value must give the length also! @{ */
-#define	MLOG_SINGLE_REC_FLAG	128		/*!< if the mtr contains only
-						one log record for one page,
-						i.e., write_initial_log_record
-						has been called only once,
-						this flag is ORed to the type
-						of that first log record */
-#define	MLOG_1BYTE		(1)		/*!< one byte is written */
-#define	MLOG_2BYTES		(2)		/*!< 2 bytes ... */
-#define	MLOG_4BYTES		(4)		/*!< 4 bytes ... */
-#define	MLOG_8BYTES		(8)		/*!< 8 bytes ... */
-#define	MLOG_REC_INSERT		((byte)9)	/*!< record insert */
-#define	MLOG_REC_CLUST_DELETE_MARK ((byte)10)	/*!< mark clustered index record
-						deleted */
-#define	MLOG_REC_SEC_DELETE_MARK ((byte)11)	/*!< mark secondary index record
-						deleted */
-#define MLOG_REC_UPDATE_IN_PLACE ((byte)13)	/*!< update of a record,
-						preserves record field sizes */
-#define MLOG_REC_DELETE		((byte)14)	/*!< delete a record from a
-						page */
-#define	MLOG_LIST_END_DELETE	((byte)15)	/*!< delete record list end on
-						index page */
-#define	MLOG_LIST_START_DELETE	((byte)16)	/*!< delete record list start on
-						index page */
-#define	MLOG_LIST_END_COPY_CREATED ((byte)17)	/*!< copy record list end to a
-						new created index page */
-#define	MLOG_PAGE_REORGANIZE	((byte)18)	/*!< reorganize an
-						index page in
-						ROW_FORMAT=REDUNDANT */
-#define MLOG_PAGE_CREATE	((byte)19)	/*!< create an index page */
-#define	MLOG_UNDO_INSERT	((byte)20)	/*!< insert entry in an undo
-						log */
-#define MLOG_UNDO_ERASE_END	((byte)21)	/*!< erase an undo log
-						page end */
-#define	MLOG_UNDO_INIT		((byte)22)	/*!< initialize a page in an
-						undo log */
-#define MLOG_UNDO_HDR_DISCARD	((byte)23)	/*!< discard an update undo log
-						header */
-#define	MLOG_UNDO_HDR_REUSE	((byte)24)	/*!< reuse an insert undo log
-						header */
-#define MLOG_UNDO_HDR_CREATE	((byte)25)	/*!< create an undo
-						log header */
-#define MLOG_REC_MIN_MARK	((byte)26)	/*!< mark an index
-						record as the
-						predefined minimum
-						record */
-#define MLOG_IBUF_BITMAP_INIT	((byte)27)	/*!< initialize an
-						ibuf bitmap page */
-/*#define	MLOG_FULL_PAGE	((byte)28)	full contents of a page */
-#ifdef UNIV_LOG_LSN_DEBUG
-# define MLOG_LSN		((byte)28)	/* current LSN */
-#endif
-#define MLOG_INIT_FILE_PAGE	((byte)29)	/*!< this means that a
-						file page is taken
-						into use and the prior
-						contents of the page
-						should be ignored: in
-						recovery we must not
-						trust the lsn values
-						stored to the file
-						page */
-#define MLOG_WRITE_STRING	((byte)30)	/*!< write a string to
-						a page */
-#define	MLOG_MULTI_REC_END	((byte)31)	/*!< if a single mtr writes
-						several log records,
-						this log record ends the
-						sequence of these records */
-#define MLOG_DUMMY_RECORD	((byte)32)	/*!< dummy log record used to
-						pad a log block full */
-#define MLOG_FILE_CREATE	((byte)33)	/*!< log record about an .ibd
-						file creation */
-#define MLOG_FILE_RENAME	((byte)34)	/*!< log record about an .ibd
-						file rename */
-#define MLOG_FILE_DELETE	((byte)35)	/*!< log record about an .ibd
-						file deletion */
-#define MLOG_COMP_REC_MIN_MARK	((byte)36)	/*!< mark a compact
-						index record as the
-						predefined minimum
-						record */
-#define MLOG_COMP_PAGE_CREATE	((byte)37)	/*!< create a compact
-						index page */
-#define MLOG_COMP_REC_INSERT	((byte)38)	/*!< compact record insert */
-#define MLOG_COMP_REC_CLUST_DELETE_MARK ((byte)39)
-						/*!< mark compact
-						clustered index record
-						deleted */
-#define MLOG_COMP_REC_SEC_DELETE_MARK ((byte)40)/*!< mark compact
-						secondary index record
-						deleted; this log
-						record type is
-						redundant, as
-						MLOG_REC_SEC_DELETE_MARK
-						is independent of the
-						record format. */
-#define MLOG_COMP_REC_UPDATE_IN_PLACE ((byte)41)/*!< update of a
-						compact record,
-						preserves record field
-						sizes */
-#define MLOG_COMP_REC_DELETE	((byte)42)	/*!< delete a compact record
-						from a page */
-#define MLOG_COMP_LIST_END_DELETE ((byte)43)	/*!< delete compact record list
-						end on index page */
-#define MLOG_COMP_LIST_START_DELETE ((byte)44)	/*!< delete compact record list
-						start on index page */
-#define MLOG_COMP_LIST_END_COPY_CREATED ((byte)45)
-						/*!< copy compact
-						record list end to a
-						new created index
-						page */
-#define MLOG_COMP_PAGE_REORGANIZE ((byte)46)	/*!< reorganize an index page */
-#define MLOG_FILE_CREATE2	((byte)47)	/*!< log record about creating
-						an .ibd file, with format */
-#define MLOG_ZIP_WRITE_NODE_PTR	((byte)48)	/*!< write the node pointer of
-						a record on a compressed
-						non-leaf B-tree page */
-#define MLOG_ZIP_WRITE_BLOB_PTR	((byte)49)	/*!< write the BLOB pointer
-						of an externally stored column
-						on a compressed page */
-#define MLOG_ZIP_WRITE_HEADER	((byte)50)	/*!< write to compressed page
-						header */
-#define MLOG_ZIP_PAGE_COMPRESS	((byte)51)	/*!< compress an index page */
-#define MLOG_ZIP_PAGE_COMPRESS_NO_DATA	((byte)52)/*!< compress an index page
-						without logging it's image */
-#define MLOG_ZIP_PAGE_REORGANIZE ((byte)53)	/*!< reorganize a compressed
-						page */
-#define MLOG_BIGGEST_TYPE	((byte)53)	/*!< biggest value (used in
-						assertions) */
+/** Start an asynchronous read-only mini-transaction */
+#define mtr_start_ro(m)	(m)->start(true, true)
 
-#define MLOG_FILE_WRITE_CRYPT_DATA ((byte)100)	/*!< log record for
-						writing/updating crypt data of
-						a tablespace */
+/** Commit a mini-transaction. */
+#define mtr_commit(m)		(m)->commit()
 
-#define EXTRA_CHECK_MLOG_NUMBER(x) \
-  ((x) == MLOG_FILE_WRITE_CRYPT_DATA)
-
-/* @} */
-
-/** @name Flags for MLOG_FILE operations
-(stored in the page number parameter, called log_flags in the
-functions).  The page number parameter was originally written as 0. @{ */
-#define MLOG_FILE_FLAG_TEMP	1	/*!< identifies TEMPORARY TABLE in
-					MLOG_FILE_CREATE, MLOG_FILE_CREATE2 */
-/* @} */
-
-/* included here because it needs MLOG_LSN defined */
-#include "log0log.h"
-
-/***************************************************************//**
-Starts a mini-transaction. */
-UNIV_INLINE
-void
-mtr_start_trx(
-/*======*/
-	mtr_t*	mtr,	/*!< out: mini-transaction */
-	trx_t*	trx)	/*!< in: transaction */
-	MY_ATTRIBUTE((nonnull (1)));
-/***************************************************************//**
-Starts a mini-transaction. */
-UNIV_INLINE
-void
-mtr_start(
-/*======*/
-	mtr_t*	mtr)	/*!< out: mini-transaction */
-{
-	mtr_start_trx(mtr, NULL);
-}
-	MY_ATTRIBUTE((nonnull))
-/***************************************************************//**
-Commits a mini-transaction. */
-UNIV_INTERN
-void
-mtr_commit(
-/*=======*/
-	mtr_t*	mtr)	/*!< in/out: mini-transaction */
-	MY_ATTRIBUTE((nonnull));
-/**********************************************************//**
-Sets and returns a savepoint in mtr.
+/** Set and return a savepoint in mtr.
 @return	savepoint */
-UNIV_INLINE
-ulint
-mtr_set_savepoint(
-/*==============*/
-	mtr_t*	mtr);	/*!< in: mtr */
-#ifndef UNIV_HOTBACKUP
-/**********************************************************//**
-Releases the (index tree) s-latch stored in an mtr memo after a
-savepoint. */
-UNIV_INLINE
-void
-mtr_release_s_latch_at_savepoint(
-/*=============================*/
-	mtr_t*		mtr,		/*!< in: mtr */
-	ulint		savepoint,	/*!< in: savepoint */
-	rw_lock_t*	lock);		/*!< in: latch to release */
-#else /* !UNIV_HOTBACKUP */
-# define mtr_release_s_latch_at_savepoint(mtr,savepoint,lock) ((void) 0)
-#endif /* !UNIV_HOTBACKUP */
+#define mtr_set_savepoint(m)	(m)->get_savepoint()
 
-/**********************************************************//**
-Releases a buf_page stored in an mtr memo after a
+/** Release the (index tree) s-latch stored in an mtr memo after a
 savepoint. */
-UNIV_INTERN
-void
-mtr_release_buf_page_at_savepoint(
-/*=============================*/
-	mtr_t*		mtr,		/*!< in: mtr */
-	ulint		savepoint,	/*!< in: savepoint */
-	buf_block_t*	block);		/*!< in: block to release */
+#define mtr_release_s_latch_at_savepoint(m, s, l)			\
+				(m)->release_s_latch_at_savepoint((s), (l))
 
-/***************************************************************//**
-Gets the logging mode of a mini-transaction.
+/** Get the logging mode of a mini-transaction.
 @return	logging mode: MTR_LOG_NONE, ... */
-UNIV_INLINE
-ulint
-mtr_get_log_mode(
-/*=============*/
-	mtr_t*	mtr);	/*!< in: mtr */
-/***************************************************************//**
-Changes the logging mode of a mini-transaction.
+#define mtr_get_log_mode(m)	(m)->get_log_mode()
+
+/** Change the logging mode of a mini-transaction.
 @return	old mode */
-UNIV_INLINE
-ulint
-mtr_set_log_mode(
-/*=============*/
-	mtr_t*	mtr,	/*!< in: mtr */
-	ulint	mode);	/*!< in: logging mode: MTR_LOG_NONE, ... */
-/********************************************************//**
-Reads 1 - 4 bytes from a file page buffered in the buffer pool.
+#define mtr_set_log_mode(m, d)	(m)->set_log_mode((d))
+
+/** Get the flush observer of a mini-transaction.
+@return flush observer object */
+#define mtr_get_flush_observer(m)	(m)->get_flush_observer()
+
+/** Set the flush observer of a mini-transaction. */
+#define mtr_set_flush_observer(m, d)	(m)->set_flush_observer((d))
+
+/** Read 1 - 4 bytes from a file page buffered in the buffer pool.
 @return	value read */
-UNIV_INTERN
-ulint
-mtr_read_ulint(
-/*===========*/
-	const byte*	ptr,	/*!< in: pointer from where to read */
-	ulint		type,	/*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
-	mtr_t*		mtr);	/*!< in: mini-transaction handle */
-#ifndef UNIV_HOTBACKUP
-/*********************************************************************//**
-This macro locks an rw-lock in s-mode. */
-#define mtr_s_lock(B, MTR)	mtr_s_lock_func((B), __FILE__, __LINE__,\
-						(MTR))
-/*********************************************************************//**
-This macro locks an rw-lock in x-mode. */
-#define mtr_x_lock(B, MTR)	mtr_x_lock_func((B), __FILE__, __LINE__,\
-						(MTR))
-/*********************************************************************//**
-NOTE! Use the macro above!
-Locks a lock in s-mode. */
-UNIV_INLINE
-void
-mtr_s_lock_func(
-/*============*/
-	rw_lock_t*	lock,	/*!< in: rw-lock */
-	const char*	file,	/*!< in: file name */
-	ulint		line,	/*!< in: line number */
-	mtr_t*		mtr);	/*!< in: mtr */
-/*********************************************************************//**
-NOTE! Use the macro above!
-Locks a lock in x-mode. */
-UNIV_INLINE
-void
-mtr_x_lock_func(
-/*============*/
-	rw_lock_t*	lock,	/*!< in: rw-lock */
-	const char*	file,	/*!< in: file name */
-	ulint		line,	/*!< in: line number */
-	mtr_t*		mtr);	/*!< in: mtr */
-#endif /* !UNIV_HOTBACKUP */
+#define mtr_read_ulint(p, t, m)	(m)->read_ulint((p), (t))
 
-/***************************************************//**
-Releases an object in the memo stack.
+/** Release an object in the memo stack.
 @return true if released */
-UNIV_INTERN
-bool
-mtr_memo_release(
-/*=============*/
-	mtr_t*	mtr,	/*!< in/out: mini-transaction */
-	void*	object,	/*!< in: object */
-	ulint	type)	/*!< in: object type: MTR_MEMO_S_LOCK, ... */
-	MY_ATTRIBUTE((nonnull));
+#define mtr_memo_release(m, o, t)					\
+				(m)->memo_release((o), (t))
+
 #ifdef UNIV_DEBUG
-# ifndef UNIV_HOTBACKUP
-/**********************************************************//**
-Checks if memo contains the given item.
-@return	TRUE if contains */
-UNIV_INLINE
-bool
-mtr_memo_contains(
-/*==============*/
-	mtr_t*		mtr,	/*!< in: mtr */
-	const void*	object,	/*!< in: object to search */
-	ulint		type)	/*!< in: type of object */
-	MY_ATTRIBUTE((warn_unused_result, nonnull));
 
-/**********************************************************//**
-Checks if memo contains the given page.
+/** Check if memo contains the given item ignore if table is intrinsic
+@return TRUE if contains or table is intrinsic. */
+#define mtr_is_block_fix(m, o, t, table)				\
+	(mtr_memo_contains(m, o, t)					\
+	 || dict_table_is_intrinsic(table))
+
+/** Check if memo contains the given page ignore if table is intrinsic
+@return TRUE if contains or table is intrinsic. */
+#define mtr_is_page_fix(m, p, t, table)					\
+	(mtr_memo_contains_page(m, p, t)				\
+	 || dict_table_is_intrinsic(table))
+
+/** Check if memo contains the given item.
 @return	TRUE if contains */
-UNIV_INTERN
-ibool
-mtr_memo_contains_page(
-/*===================*/
-	mtr_t*		mtr,	/*!< in: mtr */
-	const byte*	ptr,	/*!< in: pointer to buffer frame */
-	ulint		type);	/*!< in: type of object */
-/*********************************************************//**
-Prints info of an mtr handle. */
-UNIV_INTERN
-void
-mtr_print(
-/*======*/
-	mtr_t*	mtr);	/*!< in: mtr */
-# else /* !UNIV_HOTBACKUP */
-#  define mtr_memo_contains(mtr, object, type)		TRUE
-#  define mtr_memo_contains_page(mtr, ptr, type)	TRUE
-# endif /* !UNIV_HOTBACKUP */
+#define mtr_memo_contains(m, o, t)					\
+				(m)->memo_contains((m)->get_memo(), (o), (t))
+
+/** Check if memo contains the given page.
+@return	TRUE if contains */
+#define mtr_memo_contains_page(m, p, t)					\
+	(m)->memo_contains_page_flagged((p), (t))
 #endif /* UNIV_DEBUG */
-/*######################################################################*/
 
-#define	MTR_BUF_MEMO_SIZE	200	/* number of slots in memo */
+/** Print info of an mtr handle. */
+#define mtr_print(m)		(m)->print()
 
-/***************************************************************//**
-Returns the log object of a mini-transaction buffer.
+/** Return the log object of a mini-transaction buffer.
 @return	log */
-UNIV_INLINE
-dyn_array_t*
-mtr_get_log(
-/*========*/
-	mtr_t*	mtr);	/*!< in: mini-transaction */
-/***************************************************//**
-Pushes an object to an mtr memo stack. */
-UNIV_INLINE
+#define mtr_get_log(m)		(m)->get_log()
+
+/** Push an object to an mtr memo stack. */
+#define mtr_memo_push(m, o, t)	(m)->memo_push(o, t)
+
+/** Lock an rw-lock in s-mode. */
+#define mtr_s_lock(l, m)	(m)->s_lock((l), __FILE__, __LINE__)
+
+/** Lock an rw-lock in x-mode. */
+#define mtr_x_lock(l, m)	(m)->x_lock((l), __FILE__, __LINE__)
+
+/** Lock a tablespace in x-mode. */
+#define mtr_x_lock_space(s, m)	(m)->x_lock_space((s), __FILE__, __LINE__)
+
+/** Lock an rw-lock in sx-mode. */
+#define mtr_sx_lock(l, m)	(m)->sx_lock((l), __FILE__, __LINE__)
+
+#define mtr_memo_contains_flagged(m, p, l)				\
+				(m)->memo_contains_flagged((p), (l))
+
+#define mtr_memo_contains_page_flagged(m, p, l)				\
+				(m)->memo_contains_page_flagged((p), (l))
+
+#define mtr_release_block_at_savepoint(m, s, b)				\
+				(m)->release_block_at_savepoint((s), (b))
+
+#define mtr_block_sx_latch_at_savepoint(m, s, b)			\
+				(m)->sx_latch_at_savepoint((s), (b))
+
+#define mtr_block_x_latch_at_savepoint(m, s, b)				\
+				(m)->x_latch_at_savepoint((s), (b))
+
+/** Check if a mini-transaction is dirtying a clean page.
+@param b	block being x-fixed
+@return true if the mtr is dirtying a clean page. */
+#define mtr_block_dirtied(b)	mtr_t::is_block_dirtied((b))
+
+/** Forward declaration of a tablespace object */
+struct fil_space_t;
+
+/** Append records to the system-wide redo log buffer.
+@param[in]	log	redo log records */
 void
-mtr_memo_push(
-/*==========*/
-	mtr_t*	mtr,	/*!< in: mtr */
-	void*	object,	/*!< in: object */
-	ulint	type);	/*!< in: object type: MTR_MEMO_S_LOCK, ... */
+mtr_write_log(
+	const mtr_buf_t*	log);
 
 /** Mini-transaction memo stack slot. */
-struct mtr_memo_slot_t{
-	ulint	type;	/*!< type of the stored object (MTR_MEMO_S_LOCK, ...) */
-	void*	object;	/*!< pointer to the object */
+struct mtr_memo_slot_t {
+	/** pointer to the object */
+	void*		object;
+
+	/** type of the stored object (MTR_MEMO_S_LOCK, ...) */
+	ulint		type;
 };
 
-/* Mini-transaction handle and buffer */
-struct mtr_t{
+/** Mini-transaction handle and buffer */
+struct mtr_t {
+
+	/** State variables of the mtr */
+	struct Impl {
+
+		/** memo stack for locks etc. */
+		mtr_buf_t	m_memo;
+
+		/** mini-transaction log */
+		mtr_buf_t	m_log;
+
+		/** true if mtr has made at least one buffer pool page dirty */
+		bool		m_made_dirty;
+
+		/** true if inside ibuf changes */
+		bool		m_inside_ibuf;
+
+		/** true if the mini-transaction modified buffer pool pages */
+		bool		m_modifications;
+
+		/** Count of how many page initial log records have been
+		written to the mtr log */
+		ib_uint32_t	m_n_log_recs;
+
+		/** specifies which operations should be logged; default
+		value MTR_LOG_ALL */
+		mtr_log_t	m_log_mode;
 #ifdef UNIV_DEBUG
-	ulint		state;	/*!< MTR_ACTIVE, MTR_COMMITTING, MTR_COMMITTED */
-#endif
-	dyn_array_t	memo;	/*!< memo stack for locks etc. */
-	dyn_array_t	log;	/*!< mini-transaction log */
-	unsigned	inside_ibuf:1;
-				/*!< TRUE if inside ibuf changes */
-	unsigned	modifications:1;
-				/*!< TRUE if the mini-transaction
-				modified buffer pool pages */
-	unsigned	made_dirty:1;
-				/*!< TRUE if mtr has made at least
-				one buffer pool page dirty */
-	ulint		n_log_recs;
-				/* count of how many page initial log records
-				have been written to the mtr log */
-	ulint		n_freed_pages;
-				/* number of pages that have been freed in
-				this mini-transaction */
-	ulint		log_mode; /* specifies which operations should be
-				logged; default value MTR_LOG_ALL */
-	lsn_t		start_lsn;/* start lsn of the possible log entry for
-				this mtr */
-	lsn_t		end_lsn;/* end lsn of the possible log entry for
-				this mtr */
-#ifdef UNIV_DEBUG
-	ulint		magic_n;
+		/** Persistent user tablespace associated with the
+		mini-transaction, or 0 (TRX_SYS_SPACE) if none yet */
+		ulint		m_user_space_id;
 #endif /* UNIV_DEBUG */
-	trx_t*		trx;	/*!< transaction */
+		/** User tablespace that is being modified by the
+		mini-transaction */
+		fil_space_t*	m_user_space;
+		/** Undo tablespace that is being modified by the
+		mini-transaction */
+		fil_space_t*	m_undo_space;
+		/** System tablespace if it is being modified by the
+		mini-transaction */
+		fil_space_t*	m_sys_space;
+
+		/** State of the transaction */
+		mtr_state_t	m_state;
+
+		/** Flush Observer */
+		FlushObserver*	m_flush_observer;
+
+#ifdef UNIV_DEBUG
+		/** For checking corruption. */
+		ulint		m_magic_n;
+#endif /* UNIV_DEBUG */
+
+		/** Owning mini-transaction */
+		mtr_t*		m_mtr;
+
+		/* Transaction handle */
+		trx_t*			m_trx;
+	};
+
+	mtr_t()
+	{
+		m_impl.m_state = MTR_STATE_INIT;
+	}
+
+	~mtr_t() { }
+
+	/** Release the free extents that was reserved using
+	fsp_reserve_free_extents().  This is equivalent to calling
+	fil_space_release_free_extents().  This is intended for use
+	with index pages.
+	@param[in]	n_reserved	number of reserved extents */
+	void release_free_extents(ulint n_reserved);
+
+	/** Start a mini-transaction.
+	@param sync		true if it is a synchronous mini-transaction
+	@param read_only	true if read only mini-transaction */
+	void start(bool sync = true, bool read_only = false);
+
+	/** Start a mini-transaction.
+	@param sync		true if it is a synchronous mini-transaction
+	@param read_only	true if read only mini-transaction */
+	void start(trx_t* trx, bool sync = true, bool read_only = false);
+
+	/** @return whether this is an asynchronous mini-transaction. */
+	bool is_async() const
+	{
+		return(!m_sync);
+	}
+
+	/** Request a future commit to be synchronous. */
+	void set_sync()
+	{
+		m_sync = true;
+	}
+
+	/** Commit the mini-transaction. */
+	void commit();
+
+	/** Commit a mini-transaction that did not modify any pages,
+	but generated some redo log on a higher level, such as
+	MLOG_FILE_NAME records and a MLOG_CHECKPOINT marker.
+	The caller must invoke log_mutex_enter() and log_mutex_exit().
+	This is to be used at log_checkpoint().
+	@param[in]	checkpoint_lsn	the LSN of the log checkpoint  */
+	void commit_checkpoint(lsn_t checkpoint_lsn);
+
+	/** Return current size of the buffer.
+	@return	savepoint */
+	ulint get_savepoint() const
+		MY_ATTRIBUTE((warn_unused_result))
+	{
+		ut_ad(is_active());
+		ut_ad(m_impl.m_magic_n == MTR_MAGIC_N);
+
+		return(m_impl.m_memo.size());
+	}
+
+	/** Release the (index tree) s-latch stored in an mtr memo after a
+	savepoint.
+	@param savepoint	value returned by @see set_savepoint.
+	@param lock		latch to release */
+	inline void release_s_latch_at_savepoint(
+		ulint		savepoint,
+		rw_lock_t*	lock);
+
+	/** Release the block in an mtr memo after a savepoint. */
+	inline void release_block_at_savepoint(
+		ulint		savepoint,
+		buf_block_t*	block);
+
+	/** SX-latch a not yet latched block after a savepoint. */
+	inline void sx_latch_at_savepoint(ulint savepoint, buf_block_t* block);
+
+	/** X-latch a not yet latched block after a savepoint. */
+	inline void x_latch_at_savepoint(ulint savepoint, buf_block_t*	block);
+
+	/** Get the logging mode.
+	@return	logging mode */
+	inline mtr_log_t get_log_mode() const
+		MY_ATTRIBUTE((warn_unused_result));
+
+	/** Change the logging mode.
+	@param mode	 logging mode
+	@return	old mode */
+	inline mtr_log_t set_log_mode(mtr_log_t mode);
+
+	/** Note that the mini-transaction is modifying the system tablespace
+	(for example, for the change buffer or for undo logs)
+	@return the system tablespace */
+	fil_space_t* set_sys_modified()
+	{
+		if (!m_impl.m_sys_space) {
+			lookup_sys_space();
+		}
+		return(m_impl.m_sys_space);
+	}
+
+	/** Copy the tablespaces associated with the mini-transaction
+	(needed for generating MLOG_FILE_NAME records)
+	@param[in]	mtr	mini-transaction that may modify
+	the same set of tablespaces as this one */
+	void set_spaces(const mtr_t& mtr)
+	{
+		ut_ad(m_impl.m_user_space_id == TRX_SYS_SPACE);
+		ut_ad(!m_impl.m_user_space);
+		ut_ad(!m_impl.m_undo_space);
+		ut_ad(!m_impl.m_sys_space);
+
+		ut_d(m_impl.m_user_space_id = mtr.m_impl.m_user_space_id);
+		m_impl.m_user_space = mtr.m_impl.m_user_space;
+		m_impl.m_undo_space = mtr.m_impl.m_undo_space;
+		m_impl.m_sys_space = mtr.m_impl.m_sys_space;
+	}
+
+	/** Set the tablespace associated with the mini-transaction
+	(needed for generating a MLOG_FILE_NAME record)
+	@param[in]	space_id	user or system tablespace ID
+	@return	the tablespace */
+	fil_space_t* set_named_space(ulint space_id)
+	{
+		ut_ad(m_impl.m_user_space_id == TRX_SYS_SPACE);
+		ut_d(m_impl.m_user_space_id = space_id);
+		if (space_id == TRX_SYS_SPACE) {
+			return(set_sys_modified());
+		} else {
+			lookup_user_space(space_id);
+			return(m_impl.m_user_space);
+		}
+	}
+
+	/** Set the tablespace associated with the mini-transaction
+	(needed for generating a MLOG_FILE_NAME record)
+	@param[in]	space	user or system tablespace */
+	void set_named_space(fil_space_t* space);
+
+#ifdef UNIV_DEBUG
+	/** Check the tablespace associated with the mini-transaction
+	(needed for generating a MLOG_FILE_NAME record)
+	@param[in]	space	tablespace
+	@return whether the mini-transaction is associated with the space */
+	bool is_named_space(ulint space) const;
+#endif /* UNIV_DEBUG */
+
+	/** Read 1 - 4 bytes from a file page buffered in the buffer pool.
+	@param ptr	pointer from where to read
+	@param type)	MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES
+	@return	value read */
+	inline ulint read_ulint(const byte* ptr, mlog_id_t type) const
+		MY_ATTRIBUTE((warn_unused_result));
+
+	/** Locks a rw-latch in S mode.
+	NOTE: use mtr_s_lock().
+	@param lock	rw-lock
+	@param file	file name from where called
+	@param line	line number in file */
+	inline void s_lock(rw_lock_t* lock, const char* file, ulint line);
+
+	/** Locks a rw-latch in X mode.
+	NOTE: use mtr_x_lock().
+	@param lock	rw-lock
+	@param file	file name from where called
+	@param line	line number in file */
+	inline void x_lock(rw_lock_t* lock, const char*	file, ulint line);
+
+	/** Locks a rw-latch in X mode.
+	NOTE: use mtr_sx_lock().
+	@param lock	rw-lock
+	@param file	file name from where called
+	@param line	line number in file */
+	inline void sx_lock(rw_lock_t* lock, const char* file, ulint line);
+
+	/** Acquire a tablespace X-latch.
+	NOTE: use mtr_x_lock_space().
+	@param[in]	space_id	tablespace ID
+	@param[in]	file		file name from where called
+	@param[in]	line		line number in file
+	@return the tablespace object (never NULL) */
+	fil_space_t* x_lock_space(
+		ulint		space_id,
+		const char*	file,
+		ulint		line);
+
+	/** Release an object in the memo stack.
+	@param object	object
+	@param type	object type: MTR_MEMO_S_LOCK, ...
+	@return bool if lock released */
+	bool memo_release(const void* object, ulint type);
+	/** Release a page latch.
+	@param[in]	ptr	pointer to within a page frame
+	@param[in]	type	object type: MTR_MEMO_PAGE_X_FIX, ... */
+	void release_page(const void* ptr, mtr_memo_type_t type);
+
+	/** Note that the mini-transaction has modified data. */
+	void set_modified()
+	{
+		m_impl.m_modifications = true;
+	}
+
+	/** Set the state to not-modified. This will not log the
+	changes.  This is only used during redo log apply, to avoid
+	logging the changes. */
+	void discard_modifications()
+	{
+		m_impl.m_modifications = false;
+	}
+
+	/** Get the LSN of commit().
+	@return the commit LSN
+	@retval 0 if the transaction only modified temporary tablespaces */
+	lsn_t commit_lsn() const
+	{
+		ut_ad(has_committed());
+		return(m_commit_lsn);
+	}
+
+	/** Note that we are inside the change buffer code. */
+	void enter_ibuf()
+	{
+		m_impl.m_inside_ibuf = true;
+	}
+
+	/** Note that we have exited from the change buffer code. */
+	void exit_ibuf()
+	{
+		m_impl.m_inside_ibuf = false;
+	}
+
+	/** @return true if we are inside the change buffer code */
+	bool is_inside_ibuf() const
+	{
+		return(m_impl.m_inside_ibuf);
+	}
+
+	/*
+	@return true if the mini-transaction is active */
+	bool is_active() const
+	{
+		return(m_impl.m_state == MTR_STATE_ACTIVE);
+	}
+
+	/** Get flush observer
+	@return flush observer */
+	FlushObserver* get_flush_observer() const
+	{
+		return(m_impl.m_flush_observer);
+	}
+
+	/** Set flush observer
+	@param[in]	observer	flush observer */
+	void set_flush_observer(FlushObserver*	observer)
+	{
+		ut_ad(observer == NULL
+		      || m_impl.m_log_mode == MTR_LOG_NO_REDO);
+
+		m_impl.m_flush_observer = observer;
+	}
+
+#ifdef UNIV_DEBUG
+	/** Check if memo contains the given item.
+	@param memo	memo stack
+	@param object,	object to search
+	@param type	type of object
+	@return	true if contains */
+	static bool memo_contains(
+		mtr_buf_t*	memo,
+		const void*	object,
+		ulint		type)
+		MY_ATTRIBUTE((warn_unused_result));
+
+	/** Check if memo contains the given item.
+	@param object		object to search
+	@param flags		specify types of object (can be ORred) of
+				MTR_MEMO_PAGE_S_FIX ... values
+	@return true if contains */
+	bool memo_contains_flagged(const void* ptr, ulint flags) const;
+
+	/** Check if memo contains the given page.
+	@param[in]	ptr	pointer to within buffer frame
+	@param[in]	flags	specify types of object with OR of
+				MTR_MEMO_PAGE_S_FIX... values
+	@return	the block
+	@retval	NULL	if not found */
+	buf_block_t* memo_contains_page_flagged(
+		const byte*	ptr,
+		ulint		flags) const;
+
+	/** Mark the given latched page as modified.
+	@param[in]	ptr	pointer to within buffer frame */
+	void memo_modify_page(const byte* ptr);
+
+	/** Print info of an mtr handle. */
+	void print() const;
+
+	/** @return true if the mini-transaction has committed */
+	bool has_committed() const
+	{
+		return(m_impl.m_state == MTR_STATE_COMMITTED);
+	}
+
+	/** @return true if the mini-transaction is committing */
+	bool is_committing() const
+	{
+		return(m_impl.m_state == MTR_STATE_COMMITTING);
+	}
+
+	/** @return true if mini-transaction contains modifications. */
+	bool has_modifications() const
+	{
+		return(m_impl.m_modifications);
+	}
+
+	/** @return the memo stack */
+	const mtr_buf_t* get_memo() const
+	{
+		return(&m_impl.m_memo);
+	}
+
+	/** @return the memo stack */
+	mtr_buf_t* get_memo()
+	{
+		return(&m_impl.m_memo);
+	}
+#endif /* UNIV_DEBUG */
+
+	/** @return true if a record was added to the mini-transaction */
+	bool is_dirty() const
+	{
+		return(m_impl.m_made_dirty);
+	}
+
+	/** Note that a record has been added to the log */
+	void added_rec()
+	{
+		++m_impl.m_n_log_recs;
+	}
+
+	/** Get the buffered redo log of this mini-transaction.
+	@return	redo log */
+	const mtr_buf_t* get_log() const
+	{
+		ut_ad(m_impl.m_magic_n == MTR_MAGIC_N);
+
+		return(&m_impl.m_log);
+	}
+
+	/** Get the buffered redo log of this mini-transaction.
+	@return	redo log */
+	mtr_buf_t* get_log()
+	{
+		ut_ad(m_impl.m_magic_n == MTR_MAGIC_N);
+
+		return(&m_impl.m_log);
+	}
+
+	/** Push an object to an mtr memo stack.
+	@param object	object
+	@param type	object type: MTR_MEMO_S_LOCK, ... */
+	inline void memo_push(void* object, mtr_memo_type_t type);
+
+	/** Check if this mini-transaction is dirtying a clean page.
+	@param block	block being x-fixed
+	@return true if the mtr is dirtying a clean page. */
+	static bool is_block_dirtied(const buf_block_t* block)
+		MY_ATTRIBUTE((warn_unused_result));
+
+private:
+	/** Look up the system tablespace. */
+	void lookup_sys_space();
+	/** Look up the user tablespace.
+	@param[in]	space_id	tablespace ID  */
+	void lookup_user_space(ulint space_id);
+
+	class Command;
+
+	friend class Command;
+
+private:
+	Impl			m_impl;
+
+	/** LSN at commit time */
+	volatile lsn_t		m_commit_lsn;
+
+	/** true if it is synchronous mini-transaction */
+	bool			m_sync;
 };
 
-#ifdef UNIV_DEBUG
-# define MTR_MAGIC_N		54551
-#endif /* UNIV_DEBUG */
-
-#define MTR_ACTIVE		12231
-#define MTR_COMMITTING		56456
-#define MTR_COMMITTED		34676
-
 #ifndef UNIV_NONINL
 #include "mtr0mtr.ic"
-#endif
+#endif /* UNIV_NOINL */
 
-#endif
+#endif /* mtr0mtr_h */
diff --git a/storage/innobase/include/mtr0mtr.ic b/storage/innobase/include/mtr0mtr.ic
index 37cea34d4eb..b3d9b052d52 100644
--- a/storage/innobase/include/mtr0mtr.ic
+++ b/storage/innobase/include/mtr0mtr.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -23,129 +23,61 @@ Mini-transaction buffer
 Created 11/26/1995 Heikki Tuuri
 *******************************************************/
 
-#ifndef UNIV_HOTBACKUP
-# include "sync0sync.h"
-# include "sync0rw.h"
-#endif /* !UNIV_HOTBACKUP */
-#include "mach0data.h"
+#include "buf0buf.h"
 
-/***************************************************//**
-Checks if a mini-transaction is dirtying a clean page.
-@return TRUE if the mtr is dirtying a clean page. */
-UNIV_INTERN
-ibool
-mtr_block_dirtied(
-/*==============*/
-	const buf_block_t*	block)	/*!< in: block being x-fixed */
-	MY_ATTRIBUTE((nonnull,warn_unused_result));
-
-/***************************************************************//**
-Starts a mini-transaction. */
-UNIV_INLINE
-void
-mtr_start_trx(
-/*======*/
-	mtr_t*	mtr,	/*!< out: mini-transaction */
-	trx_t*	trx)	/*!< in: transaction */
-{
-	UNIV_MEM_INVALID(mtr, sizeof *mtr);
-
-	dyn_array_create(&(mtr->memo));
-	dyn_array_create(&(mtr->log));
-
-	mtr->log_mode = MTR_LOG_ALL;
-	mtr->inside_ibuf = FALSE;
-	mtr->modifications = FALSE;
-	mtr->made_dirty = FALSE;
-	mtr->n_log_recs = 0;
-	mtr->n_freed_pages = 0;
-	mtr->trx = trx;
-
-	ut_d(mtr->state = MTR_ACTIVE);
-	ut_d(mtr->magic_n = MTR_MAGIC_N);
-}
-
-/***************************************************//**
+/**
 Pushes an object to an mtr memo stack. */
-UNIV_INLINE
 void
-mtr_memo_push(
-/*==========*/
-	mtr_t*	mtr,	/*!< in: mtr */
-	void*	object,	/*!< in: object */
-	ulint	type)	/*!< in: object type: MTR_MEMO_S_LOCK, ... */
+mtr_t::memo_push(void* object, mtr_memo_type_t type)
 {
-	dyn_array_t*		memo;
-	mtr_memo_slot_t*	slot;
-
-	ut_ad(object);
+	ut_ad(is_active());
+	ut_ad(object != NULL);
 	ut_ad(type >= MTR_MEMO_PAGE_S_FIX);
-	ut_ad(type <= MTR_MEMO_X_LOCK);
-	ut_ad(mtr);
-	ut_ad(mtr->magic_n == MTR_MAGIC_N);
-	ut_ad(mtr->state == MTR_ACTIVE);
+	ut_ad(type <= MTR_MEMO_SX_LOCK);
+	ut_ad(m_impl.m_magic_n == MTR_MAGIC_N);
+	ut_ad(ut_is_2pow(type));
 
 	/* If this mtr has x-fixed a clean page then we set
 	the made_dirty flag. This tells us if we need to
 	grab log_flush_order_mutex at mtr_commit so that we
 	can insert the dirtied page to the flush list. */
-	if (type == MTR_MEMO_PAGE_X_FIX && !mtr->made_dirty) {
-		mtr->made_dirty =
-			mtr_block_dirtied((const buf_block_t*) object);
+
+	if ((type == MTR_MEMO_PAGE_X_FIX || type == MTR_MEMO_PAGE_SX_FIX)
+	    && !m_impl.m_made_dirty) {
+
+		m_impl.m_made_dirty = is_block_dirtied(
+			reinterpret_cast<const buf_block_t*>(object));
+	} else if (type == MTR_MEMO_BUF_FIX && !m_impl.m_made_dirty) {
+
+		if (reinterpret_cast<const buf_block_t*>(
+			object)->made_dirty_with_no_latch) {
+
+			m_impl.m_made_dirty = true;
+		}
 	}
 
-	memo = &(mtr->memo);
+	mtr_memo_slot_t*	slot;
 
-	slot = (mtr_memo_slot_t*) dyn_array_push(memo, sizeof *slot);
+	slot = m_impl.m_memo.push<mtr_memo_slot_t*>(sizeof(*slot));
 
-	slot->object = object;
 	slot->type = type;
+	slot->object = object;
 }
 
-/**********************************************************//**
-Sets and returns a savepoint in mtr.
-@return	savepoint */
-UNIV_INLINE
-ulint
-mtr_set_savepoint(
-/*==============*/
-	mtr_t*	mtr)	/*!< in: mtr */
-{
-	dyn_array_t*	memo;
-
-	ut_ad(mtr);
-	ut_ad(mtr->magic_n == MTR_MAGIC_N);
-	ut_ad(mtr->state == MTR_ACTIVE);
-
-	memo = &(mtr->memo);
-
-	return(dyn_array_get_data_size(memo));
-}
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************//**
+/**
 Releases the (index tree) s-latch stored in an mtr memo after a
 savepoint. */
-UNIV_INLINE
 void
-mtr_release_s_latch_at_savepoint(
-/*=============================*/
-	mtr_t*		mtr,		/*!< in: mtr */
-	ulint		savepoint,	/*!< in: savepoint */
-	rw_lock_t*	lock)		/*!< in: latch to release */
+mtr_t::release_s_latch_at_savepoint(
+	ulint		savepoint,
+	rw_lock_t*	lock)
 {
-	mtr_memo_slot_t* slot;
-	dyn_array_t*	memo;
+	ut_ad(is_active());
+	ut_ad(m_impl.m_magic_n == MTR_MAGIC_N);
 
-	ut_ad(mtr);
-	ut_ad(mtr->magic_n == MTR_MAGIC_N);
-	ut_ad(mtr->state == MTR_ACTIVE);
+	ut_ad(m_impl.m_memo.size() > savepoint);
 
-	memo = &(mtr->memo);
-
-	ut_ad(dyn_array_get_data_size(memo) > savepoint);
-
-	slot = (mtr_memo_slot_t*) dyn_array_get_element(memo, savepoint);
+	mtr_memo_slot_t* slot = m_impl.m_memo.at<mtr_memo_slot_t*>(savepoint);
 
 	ut_ad(slot->object == lock);
 	ut_ad(slot->type == MTR_MEMO_S_LOCK);
@@ -155,144 +87,201 @@ mtr_release_s_latch_at_savepoint(
 	slot->object = NULL;
 }
 
-# ifdef UNIV_DEBUG
-/**********************************************************//**
-Checks if memo contains the given item.
-@return	TRUE if contains */
-UNIV_INLINE
-bool
-mtr_memo_contains(
-/*==============*/
-	mtr_t*		mtr,	/*!< in: mtr */
-	const void*	object,	/*!< in: object to search */
-	ulint		type)	/*!< in: type of object */
+/**
+SX-latches the not yet latched block after a savepoint. */
+
+void
+mtr_t::sx_latch_at_savepoint(
+	ulint		savepoint,
+	buf_block_t*	block)
 {
-	ut_ad(mtr);
-	ut_ad(mtr->magic_n == MTR_MAGIC_N);
-	ut_ad(mtr->state == MTR_ACTIVE || mtr->state == MTR_COMMITTING);
+	ut_ad(is_active());
+	ut_ad(m_impl.m_magic_n == MTR_MAGIC_N);
+	ut_ad(m_impl.m_memo.size() > savepoint);
 
-	for (const dyn_block_t* block = dyn_array_get_last_block(&mtr->memo);
-	     block;
-	     block = dyn_array_get_prev_block(&mtr->memo, block)) {
-		const mtr_memo_slot_t*	start
-			= reinterpret_cast<mtr_memo_slot_t*>(
-				dyn_block_get_data(block));
-		mtr_memo_slot_t*	slot
-			= reinterpret_cast<mtr_memo_slot_t*>(
-				dyn_block_get_data(block)
-				+ dyn_block_get_used(block));
+	ut_ad(!memo_contains_flagged(
+			block,
+			MTR_MEMO_PAGE_S_FIX
+			| MTR_MEMO_PAGE_X_FIX
+			| MTR_MEMO_PAGE_SX_FIX));
 
-		ut_ad(!(dyn_block_get_used(block) % sizeof(mtr_memo_slot_t)));
+	mtr_memo_slot_t*	slot;
 
-		while (slot-- != start) {
-			if (object == slot->object && type == slot->type) {
-				return(true);
-			}
-		}
+	slot = m_impl.m_memo.at<mtr_memo_slot_t*>(savepoint);
+
+	ut_ad(slot->object == block);
+
+	/* == RW_NO_LATCH */
+	ut_a(slot->type == MTR_MEMO_BUF_FIX);
+
+	rw_lock_sx_lock(&block->lock);
+
+	if (!m_impl.m_made_dirty) {
+		m_impl.m_made_dirty = is_block_dirtied(block);
 	}
 
-	return(false);
+	slot->type = MTR_MEMO_PAGE_SX_FIX;
 }
-# endif /* UNIV_DEBUG */
-#endif /* !UNIV_HOTBACKUP */
 
-/***************************************************************//**
-Returns the log object of a mini-transaction buffer.
-@return	log */
-UNIV_INLINE
-dyn_array_t*
-mtr_get_log(
-/*========*/
-	mtr_t*	mtr)	/*!< in: mini-transaction */
+/**
+X-latches the not yet latched block after a savepoint. */
+
+void
+mtr_t::x_latch_at_savepoint(
+	ulint		savepoint,
+	buf_block_t*	block)
 {
-	ut_ad(mtr);
-	ut_ad(mtr->magic_n == MTR_MAGIC_N);
+	ut_ad(is_active());
+	ut_ad(m_impl.m_magic_n == MTR_MAGIC_N);
+	ut_ad(m_impl.m_memo.size() > savepoint);
 
-	return(&(mtr->log));
+	ut_ad(!memo_contains_flagged(
+			block,
+			MTR_MEMO_PAGE_S_FIX
+			| MTR_MEMO_PAGE_X_FIX
+			| MTR_MEMO_PAGE_SX_FIX));
+
+	mtr_memo_slot_t*	slot;
+
+	slot = m_impl.m_memo.at<mtr_memo_slot_t*>(savepoint);
+
+	ut_ad(slot->object == block);
+
+	/* == RW_NO_LATCH */
+	ut_a(slot->type == MTR_MEMO_BUF_FIX);
+
+	rw_lock_x_lock(&block->lock);
+
+	if (!m_impl.m_made_dirty) {
+		m_impl.m_made_dirty = is_block_dirtied(block);
+	}
+
+	slot->type = MTR_MEMO_PAGE_X_FIX;
 }
 
-/***************************************************************//**
+/**
+Releases the block in an mtr memo after a savepoint. */
+
+void
+mtr_t::release_block_at_savepoint(
+	ulint		savepoint,
+	buf_block_t*	block)
+{
+	ut_ad(is_active());
+	ut_ad(m_impl.m_magic_n == MTR_MAGIC_N);
+
+	mtr_memo_slot_t*	slot;
+
+	slot = m_impl.m_memo.at<mtr_memo_slot_t*>(savepoint);
+
+	ut_a(slot->object == block);
+
+	buf_block_unfix(reinterpret_cast<buf_block_t*>(block));
+
+	buf_page_release_latch(block, slot->type);
+
+	slot->object = NULL;
+}
+
+/**
 Gets the logging mode of a mini-transaction.
 @return	logging mode: MTR_LOG_NONE, ... */
-UNIV_INLINE
-ulint
-mtr_get_log_mode(
-/*=============*/
-	mtr_t*	mtr)	/*!< in: mtr */
-{
-	ut_ad(mtr);
-	ut_ad(mtr->log_mode >= MTR_LOG_ALL);
-	ut_ad(mtr->log_mode <= MTR_LOG_SHORT_INSERTS);
 
-	return(mtr->log_mode);
+mtr_log_t
+mtr_t::get_log_mode() const
+{
+	ut_ad(m_impl.m_log_mode >= MTR_LOG_ALL);
+	ut_ad(m_impl.m_log_mode <= MTR_LOG_SHORT_INSERTS);
+
+	return(m_impl.m_log_mode);
 }
 
-/***************************************************************//**
+/**
 Changes the logging mode of a mini-transaction.
 @return	old mode */
-UNIV_INLINE
-ulint
-mtr_set_log_mode(
-/*=============*/
-	mtr_t*	mtr,	/*!< in: mtr */
-	ulint	mode)	/*!< in: logging mode: MTR_LOG_NONE, ... */
-{
-	ulint	old_mode;
 
-	ut_ad(mtr);
+mtr_log_t
+mtr_t::set_log_mode(mtr_log_t mode)
+{
 	ut_ad(mode >= MTR_LOG_ALL);
 	ut_ad(mode <= MTR_LOG_SHORT_INSERTS);
 
-	old_mode = mtr->log_mode;
+	const mtr_log_t	old_mode = m_impl.m_log_mode;
 
-	if ((mode == MTR_LOG_SHORT_INSERTS) && (old_mode == MTR_LOG_NONE)) {
-		/* Do nothing */
-	} else {
-		mtr->log_mode = mode;
+	switch (old_mode) {
+	case MTR_LOG_NO_REDO:
+		/* Once this mode is set, it must not be changed. */
+		ut_ad(mode == MTR_LOG_NO_REDO || mode == MTR_LOG_NONE);
+		return(old_mode);
+	case MTR_LOG_NONE:
+		if (mode == old_mode || mode == MTR_LOG_SHORT_INSERTS) {
+			/* Keep MTR_LOG_NONE. */
+			return(old_mode);
+		}
+		/* fall through */
+	case MTR_LOG_SHORT_INSERTS:
+		ut_ad(mode == MTR_LOG_ALL);
+		/* fall through */
+	case MTR_LOG_ALL:
+		/* MTR_LOG_NO_REDO can only be set before generating
+		any redo log records. */
+		ut_ad(mode != MTR_LOG_NO_REDO
+		      || m_impl.m_n_log_recs == 0);
+		m_impl.m_log_mode = mode;
+		return(old_mode);
 	}
 
-	ut_ad(old_mode >= MTR_LOG_ALL);
-	ut_ad(old_mode <= MTR_LOG_SHORT_INSERTS);
-
+	ut_ad(0);
 	return(old_mode);
 }
 
-#ifndef UNIV_HOTBACKUP
-/*********************************************************************//**
+/**
 Locks a lock in s-mode. */
-UNIV_INLINE
-void
-mtr_s_lock_func(
-/*============*/
-	rw_lock_t*	lock,	/*!< in: rw-lock */
-	const char*	file,	/*!< in: file name */
-	ulint		line,	/*!< in: line number */
-	mtr_t*		mtr)	/*!< in: mtr */
-{
-	ut_ad(mtr);
-	ut_ad(lock);
 
+void
+mtr_t::s_lock(rw_lock_t* lock, const char* file, ulint line)
+{
 	rw_lock_s_lock_inline(lock, 0, file, line);
 
-	mtr_memo_push(mtr, lock, MTR_MEMO_S_LOCK);
+	memo_push(lock, MTR_MEMO_S_LOCK);
 }
 
-/*********************************************************************//**
+/**
 Locks a lock in x-mode. */
-UNIV_INLINE
-void
-mtr_x_lock_func(
-/*============*/
-	rw_lock_t*	lock,	/*!< in: rw-lock */
-	const char*	file,	/*!< in: file name */
-	ulint		line,	/*!< in: line number */
-	mtr_t*		mtr)	/*!< in: mtr */
-{
-	ut_ad(mtr);
-	ut_ad(lock);
 
+void
+mtr_t::x_lock(rw_lock_t* lock, const char* file, ulint line)
+{
 	rw_lock_x_lock_inline(lock, 0, file, line);
 
-	mtr_memo_push(mtr, lock, MTR_MEMO_X_LOCK);
+	memo_push(lock, MTR_MEMO_X_LOCK);
+}
+
+/**
+Locks a lock in sx-mode. */
+
+void
+mtr_t::sx_lock(rw_lock_t* lock, const char* file, ulint line)
+{
+	rw_lock_sx_lock_inline(lock, 0, file, line);
+
+	memo_push(lock, MTR_MEMO_SX_LOCK);
+}
+
+/**
+Reads 1 - 4 bytes from a file page buffered in the buffer pool.
+@return	value read */
+
+ulint
+mtr_t::read_ulint(const byte* ptr, mlog_id_t type) const
+{
+	ut_ad(is_active());
+
+	ut_ad(memo_contains_page_flagged(
+			ptr,
+			MTR_MEMO_PAGE_S_FIX
+			| MTR_MEMO_PAGE_X_FIX
+			| MTR_MEMO_PAGE_SX_FIX));
+
+	return(mach_read_ulint(ptr, type));
 }
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/mtr0types.h b/storage/innobase/include/mtr0types.h
index 43368c0b726..69a68830af1 100644
--- a/storage/innobase/include/mtr0types.h
+++ b/storage/innobase/include/mtr0types.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -26,6 +26,270 @@ Created 11/26/1995 Heikki Tuuri
 #ifndef mtr0types_h
 #define mtr0types_h
 
+#ifndef UNIV_INNOCHECKSUM
+#include "sync0rw.h"
+#endif /* UNIV_INNOCHECKSUM */
+
 struct mtr_t;
 
-#endif
+/** Logging modes for a mini-transaction */
+enum mtr_log_t {
+	/** Default mode: log all operations modifying disk-based data */
+	MTR_LOG_ALL = 21,
+
+	/** Log no operations and dirty pages are not added to the flush list */
+	MTR_LOG_NONE = 22,
+
+	/** Don't generate REDO log but add dirty pages to flush list */
+	MTR_LOG_NO_REDO = 23,
+
+	/** Inserts are logged in a shorter form */
+	MTR_LOG_SHORT_INSERTS = 24
+};
+
+/** @name Log item types
+The log items are declared 'byte' so that the compiler can warn if val
+and type parameters are switched in a call to mlog_write_ulint. NOTE!
+For 1 - 8 bytes, the flag value must give the length also! @{ */
+enum mlog_id_t {
+	/** if the mtr contains only one log record for one page,
+	i.e., write_initial_log_record has been called only once,
+	this flag is ORed to the type of that first log record */
+	MLOG_SINGLE_REC_FLAG = 128,
+
+	/** one byte is written */
+	MLOG_1BYTE = 1,
+
+	/** 2 bytes ... */
+	MLOG_2BYTES = 2,
+
+	/** 4 bytes ... */
+	MLOG_4BYTES = 4,
+
+	/** 8 bytes ... */
+	MLOG_8BYTES = 8,
+
+	/** Record insert */
+	MLOG_REC_INSERT = 9,
+
+	/** Mark clustered index record deleted */
+	MLOG_REC_CLUST_DELETE_MARK = 10,
+
+	/** Mark secondary index record deleted */
+	MLOG_REC_SEC_DELETE_MARK = 11,
+
+	/** update of a record, preserves record field sizes */
+	MLOG_REC_UPDATE_IN_PLACE = 13,
+
+	/*!< Delete a record from a page */
+	MLOG_REC_DELETE = 14,
+
+	/** Delete record list end on index page */
+	MLOG_LIST_END_DELETE = 15,
+
+	/** Delete record list start on index page */
+	MLOG_LIST_START_DELETE = 16,
+
+	/** Copy record list end to a new created index page */
+	MLOG_LIST_END_COPY_CREATED = 17,
+
+	/** Reorganize an index page in ROW_FORMAT=REDUNDANT */
+	MLOG_PAGE_REORGANIZE = 18,
+
+	/** Create an index page */
+	MLOG_PAGE_CREATE = 19,
+
+	/** Insert entry in an undo log */
+	MLOG_UNDO_INSERT = 20,
+
+	/** erase an undo log page end */
+	MLOG_UNDO_ERASE_END = 21,
+
+	/** initialize a page in an undo log */
+	MLOG_UNDO_INIT = 22,
+
+	/** discard an update undo log header */
+	MLOG_UNDO_HDR_DISCARD = 23,
+
+	/** reuse an insert undo log header */
+	MLOG_UNDO_HDR_REUSE = 24,
+
+	/** create an undo log header */
+	MLOG_UNDO_HDR_CREATE = 25,
+
+	/** mark an index record as the predefined minimum record */
+	MLOG_REC_MIN_MARK = 26,
+
+	/** initialize an ibuf bitmap page */
+	MLOG_IBUF_BITMAP_INIT = 27,
+
+#ifdef UNIV_LOG_LSN_DEBUG
+	/** Current LSN */
+	MLOG_LSN = 28,
+#endif /* UNIV_LOG_LSN_DEBUG */
+
+	/** this means that a file page is taken into use and the prior
+	contents of the page should be ignored: in recovery we must not
+	trust the lsn values stored to the file page.
+	Note: it's deprecated because it causes crash recovery problem
+	in bulk create index, and actually we don't need to reset page
+	lsn in recv_recover_page_func() now. */
+	MLOG_INIT_FILE_PAGE = 29,
+
+	/** write a string to a page */
+	MLOG_WRITE_STRING = 30,
+
+	/** If a single mtr writes several log records, this log
+	record ends the sequence of these records */
+	MLOG_MULTI_REC_END = 31,
+
+	/** dummy log record used to pad a log block full */
+	MLOG_DUMMY_RECORD = 32,
+
+	/** log record about an .ibd file creation */
+	//MLOG_FILE_CREATE = 33,
+
+	/** rename databasename/tablename (no .ibd file name suffix) */
+	//MLOG_FILE_RENAME = 34,
+
+	/** delete a tablespace file that starts with (space_id,page_no) */
+	MLOG_FILE_DELETE = 35,
+
+	/** mark a compact index record as the predefined minimum record */
+	MLOG_COMP_REC_MIN_MARK = 36,
+
+	/** create a compact index page */
+	MLOG_COMP_PAGE_CREATE = 37,
+
+	/** compact record insert */
+	MLOG_COMP_REC_INSERT = 38,
+
+	/** mark compact clustered index record deleted */
+	MLOG_COMP_REC_CLUST_DELETE_MARK = 39,
+
+	/** mark compact secondary index record deleted; this log
+	record type is redundant, as MLOG_REC_SEC_DELETE_MARK is
+	independent of the record format. */
+	MLOG_COMP_REC_SEC_DELETE_MARK = 40,
+
+	/** update of a compact record, preserves record field sizes */
+	MLOG_COMP_REC_UPDATE_IN_PLACE = 41,
+
+	/** delete a compact record from a page */
+	MLOG_COMP_REC_DELETE = 42,
+
+	/** delete compact record list end on index page */
+	MLOG_COMP_LIST_END_DELETE = 43,
+
+	/*** delete compact record list start on index page */
+	MLOG_COMP_LIST_START_DELETE = 44,
+
+	/** copy compact record list end to a new created index page */
+	MLOG_COMP_LIST_END_COPY_CREATED = 45,
+
+	/** reorganize an index page */
+	MLOG_COMP_PAGE_REORGANIZE = 46,
+
+	/** log record about creating an .ibd file, with format */
+	MLOG_FILE_CREATE2 = 47,
+
+	/** write the node pointer of a record on a compressed
+	non-leaf B-tree page */
+	MLOG_ZIP_WRITE_NODE_PTR = 48,
+
+	/** write the BLOB pointer of an externally stored column
+	on a compressed page */
+	MLOG_ZIP_WRITE_BLOB_PTR = 49,
+
+	/** write to compressed page header */
+	MLOG_ZIP_WRITE_HEADER = 50,
+
+	/** compress an index page */
+	MLOG_ZIP_PAGE_COMPRESS = 51,
+
+	/** compress an index page without logging it's image */
+	MLOG_ZIP_PAGE_COMPRESS_NO_DATA = 52,
+
+	/** reorganize a compressed page */
+	MLOG_ZIP_PAGE_REORGANIZE = 53,
+
+	/** rename a tablespace file that starts with (space_id,page_no) */
+	MLOG_FILE_RENAME2 = 54,
+
+	/** note the first use of a tablespace file since checkpoint */
+	MLOG_FILE_NAME = 55,
+
+	/** note that all buffered log was written since a checkpoint */
+	MLOG_CHECKPOINT = 56,
+
+	/** Create a R-Tree index page */
+	MLOG_PAGE_CREATE_RTREE = 57,
+
+	/** create a R-tree compact page */
+	MLOG_COMP_PAGE_CREATE_RTREE = 58,
+
+	/** this means that a file page is taken into use.
+	We use it to replace MLOG_INIT_FILE_PAGE. */
+	MLOG_INIT_FILE_PAGE2 = 59,
+
+	/** Table is being truncated. (Marked only for file-per-table) */
+	MLOG_TRUNCATE = 60,
+
+	/** notify that an index tree is being loaded without writing
+	redo log about individual pages */
+	MLOG_INDEX_LOAD = 61,
+
+	/** biggest value (used in assertions) */
+	MLOG_BIGGEST_TYPE = MLOG_INDEX_LOAD,
+
+	/** log record for writing/updating crypt data of
+	a tablespace */
+	MLOG_FILE_WRITE_CRYPT_DATA = 100,
+};
+
+/* @} */
+
+#define EXTRA_CHECK_MLOG_NUMBER(x) \
+  ((x) == MLOG_FILE_WRITE_CRYPT_DATA)
+
+/** Size of a MLOG_CHECKPOINT record in bytes.
+The record consists of a MLOG_CHECKPOINT byte followed by
+mach_write_to_8(checkpoint_lsn). */
+#define SIZE_OF_MLOG_CHECKPOINT	9
+
+/** Types for the mlock objects to store in the mtr memo; NOTE that the
+first 3 values must be RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
+enum mtr_memo_type_t {
+#ifndef UNIV_INNOCHECKSUM
+	MTR_MEMO_PAGE_S_FIX = RW_S_LATCH,
+
+	MTR_MEMO_PAGE_X_FIX = RW_X_LATCH,
+
+	MTR_MEMO_PAGE_SX_FIX = RW_SX_LATCH,
+
+	MTR_MEMO_BUF_FIX = RW_NO_LATCH,
+#endif /* !UNIV_CHECKSUM */
+
+#ifdef UNIV_DEBUG
+	MTR_MEMO_MODIFY = 32,
+#endif /* UNIV_DEBUG */
+
+	MTR_MEMO_S_LOCK = 64,
+
+	MTR_MEMO_X_LOCK = 128,
+
+	MTR_MEMO_SX_LOCK = 256
+};
+
+#ifdef UNIV_DEBUG
+# define MTR_MAGIC_N		54551
+#endif /* UNIV_DEBUG */
+
+enum mtr_state_t {
+	MTR_STATE_INIT = 0,
+	MTR_STATE_ACTIVE = 12231,
+	MTR_STATE_COMMITTING = 56456,
+	MTR_STATE_COMMITTED = 34676
+};
+
+#endif /* mtr0types_h */
diff --git a/storage/innobase/include/os0atomic.h b/storage/innobase/include/os0atomic.h
new file mode 100644
index 00000000000..7ac429cfc14
--- /dev/null
+++ b/storage/innobase/include/os0atomic.h
@@ -0,0 +1,397 @@
+/*****************************************************************************
+Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2008, Google Inc.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/os0atomic.h
+Macros for using atomics
+
+Created 2012-09-23 Sunny Bains (Split from os0sync.h)
+*******************************************************/
+
+#ifndef os0atomic_h
+#define os0atomic_h
+
+#include "univ.i"
+
+#ifdef _WIN32
+
+/** On Windows, InterlockedExchange operates on LONG variable */
+typedef LONG	lock_word_t;
+
+#elif defined(MUTEX_FUTEX)
+
+typedef int	lock_word_t;
+
+# else
+
+typedef ulint	lock_word_t;
+
+#endif /* _WIN32 */
+
+#if defined __i386__ || defined __x86_64__ || defined _M_IX86 \
+    || defined _M_X64 || defined __WIN__
+
+#define IB_STRONG_MEMORY_MODEL
+
+#endif /* __i386__ || __x86_64__ || _M_IX86 || _M_X64 || __WIN__ */
+
+/**********************************************************//**
+Atomic compare-and-swap and increment for InnoDB. */
+
+/** Do an atomic test and set.
+@param[in/out]	ptr	Memory location to set
+@param[in]	new_val	new value
+@return	old value of memory location. */
+UNIV_INLINE
+lock_word_t
+os_atomic_test_and_set(
+	volatile lock_word_t*	ptr,
+	lock_word_t		new_val);
+
+
+/** Do an atomic compare and set
+@param[in/out]	ptr	Memory location to set
+@param[in]	old_val	old value to compare
+@param[in]	new_val	new value to set
+@return the value of ptr before the operation. */
+UNIV_INLINE
+lock_word_t
+os_atomic_val_compare_and_swap(
+	volatile lock_word_t*	ptr,
+	lock_word_t		old_val,
+	lock_word_t		new_val);
+
+#ifdef _WIN32
+
+/**********************************************************//**
+Atomic compare and exchange of signed integers (both 32 and 64 bit).
+@return value found before the exchange.
+If it is not equal to old_value the exchange did not happen. */
+UNIV_INLINE
+lint
+win_cmp_and_xchg_lint(
+/*==================*/
+	volatile lint*	ptr,		/*!< in/out: source/destination */
+	lint		new_val,	/*!< in: exchange value */
+	lint		old_val);	/*!< in: value to compare to */
+
+/**********************************************************//**
+Atomic addition of signed integers.
+@return Initial value of the variable pointed to by ptr */
+UNIV_INLINE
+lint
+win_xchg_and_add(
+/*=============*/
+	volatile lint*	ptr,	/*!< in/out: address of destination */
+	lint		val);	/*!< in: number to be added */
+
+/**********************************************************//**
+Atomic compare and exchange of unsigned integers.
+@return value found before the exchange.
+If it is not equal to old_value the exchange did not happen. */
+UNIV_INLINE
+ulint
+win_cmp_and_xchg_ulint(
+/*===================*/
+	volatile ulint*	ptr,		/*!< in/out: source/destination */
+	ulint		new_val,	/*!< in: exchange value */
+	ulint		old_val);	/*!< in: value to compare to */
+
+/**********************************************************//**
+Atomic compare and exchange of 32 bit unsigned integers.
+@return value found before the exchange.
+If it is not equal to old_value the exchange did not happen. */
+UNIV_INLINE
+DWORD
+win_cmp_and_xchg_dword(
+/*===================*/
+	volatile DWORD*	ptr,		/*!< in/out: source/destination */
+	DWORD		new_val,	/*!< in: exchange value */
+	DWORD		old_val);	/*!< in: value to compare to */
+
+/**********************************************************//**
+Returns true if swapped, ptr is pointer to target, old_val is value to
+compare to, new_val is the value to swap in. */
+
+# define os_compare_and_swap_lint(ptr, old_val, new_val) \
+	(win_cmp_and_xchg_lint(ptr, new_val, old_val) == old_val)
+
+# define os_compare_and_swap_ulint(ptr, old_val, new_val) \
+	(win_cmp_and_xchg_ulint(ptr, new_val, old_val) == old_val)
+
+# define os_compare_and_swap_uint32(ptr, old_val, new_val) \
+	(InterlockedCompareExchange(ptr, new_val, old_val) == old_val)
+
+/* windows thread objects can always be passed to windows atomic functions */
+# define os_compare_and_swap_thread_id(ptr, old_val, new_val) \
+	(win_cmp_and_xchg_dword(ptr, new_val, old_val) == old_val)
+
+# define INNODB_RW_LOCKS_USE_ATOMICS
+# define IB_ATOMICS_STARTUP_MSG \
+	"Mutexes and rw_locks use Windows interlocked functions"
+
+/**********************************************************//**
+Returns the resulting value, ptr is pointer to target, amount is the
+amount of increment. */
+
+# define os_atomic_increment_lint(ptr, amount)			\
+	(win_xchg_and_add(ptr, amount) + amount)
+
+# define os_atomic_increment_ulint(ptr, amount)			\
+	(static_cast<ulint>(win_xchg_and_add(			\
+		reinterpret_cast<volatile lint*>(ptr),		\
+		static_cast<lint>(amount)))			\
+	+ static_cast<ulint>(amount))
+
+# define os_atomic_increment_uint32(ptr, amount)		\
+	(static_cast<ulint>(InterlockedExchangeAdd(		\
+		reinterpret_cast<long*>(ptr),			\
+		static_cast<long>(amount)))			\
+	+ static_cast<ulint>(amount))
+
+# define os_atomic_increment_uint64(ptr, amount)		\
+	(static_cast<ib_uint64_t>(InterlockedExchangeAdd64(	\
+		reinterpret_cast<LONGLONG*>(ptr),		\
+		static_cast<LONGLONG>(amount)))			\
+	+ static_cast<ib_uint64_t>(amount))
+
+/**********************************************************//**
+Returns the resulting value, ptr is pointer to target, amount is the
+amount to decrement. There is no atomic substract function on Windows */
+
+# define os_atomic_decrement_lint(ptr, amount)			\
+	(win_xchg_and_add(ptr, -(static_cast<lint>(amount))) - amount)
+
+# define os_atomic_decrement_ulint(ptr, amount)			\
+	(static_cast<ulint>(win_xchg_and_add(			\
+		reinterpret_cast<volatile lint*>(ptr),		\
+		-(static_cast<lint>(amount))))			\
+	- static_cast<ulint>(amount))
+
+# define os_atomic_decrement_uint32(ptr, amount)		\
+	(static_cast<ib_uint32_t>(InterlockedExchangeAdd(	\
+		reinterpret_cast<long*>(ptr),			\
+		-(static_cast<long>(amount))))			\
+	- static_cast<ib_uint32_t>(amount))
+
+# define os_atomic_decrement_uint64(ptr, amount)		\
+	(static_cast<ib_uint64_t>(InterlockedExchangeAdd64(	\
+		reinterpret_cast<LONGLONG*>(ptr),		\
+		-(static_cast<LONGLONG>(amount))))		\
+	- static_cast<ib_uint64_t>(amount))
+
+#else
+/* Fall back to GCC-style atomic builtins. */
+
+/**********************************************************//**
+Returns true if swapped, ptr is pointer to target, old_val is value to
+compare to, new_val is the value to swap in. */
+
+#if defined(HAVE_GCC_SYNC_BUILTINS)
+
+# define os_compare_and_swap(ptr, old_val, new_val) \
+	__sync_bool_compare_and_swap(ptr, old_val, new_val)
+
+# define os_compare_and_swap_ulint(ptr, old_val, new_val) \
+	os_compare_and_swap(ptr, old_val, new_val)
+
+# define os_compare_and_swap_lint(ptr, old_val, new_val) \
+	os_compare_and_swap(ptr, old_val, new_val)
+
+# define os_compare_and_swap_uint32(ptr, old_val, new_val) \
+	os_compare_and_swap(ptr, old_val, new_val)
+
+#else
+
+UNIV_INLINE
+bool
+os_compare_and_swap_ulint(volatile ulint* ptr, ulint old_val, ulint new_val)
+{
+#ifdef HAVE_IB_GCC_ATOMIC_SEQ_CST
+	return __atomic_compare_exchange_n(ptr, &old_val, new_val, 0,
+		__ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
+#else
+	return __sync_bool_compare_and_swap(ptr, old_val, new_val);
+#endif
+}
+
+UNIV_INLINE
+bool
+os_compare_and_swap_lint(volatile lint* ptr, lint old_val, lint new_val)
+{
+#ifdef HAVE_IB_GCC_ATOMIC_SEQ_CST
+	return __atomic_compare_exchange_n(ptr, &old_val, new_val, 0,
+				     __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
+#else
+	return __sync_bool_compare_and_swap(ptr, old_val, new_val);
+#endif
+}
+
+UNIV_INLINE
+bool
+os_compare_and_swap_uint32(volatile ib_uint32_t* ptr, ib_uint32_t old_val, ib_uint32_t new_val)
+{
+#ifdef HAVE_IB_GCC_ATOMIC_SEQ_CST
+	return __atomic_compare_exchange_n(ptr, &old_val, new_val, 0,
+			__ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
+#else
+	return __sync_bool_compare_and_swap(ptr, old_val, new_val);
+#endif
+}
+
+#endif /* HAVE_GCC_SYNC_BUILTINS */
+
+# ifdef HAVE_IB_ATOMIC_PTHREAD_T_GCC
+#if defined(HAVE_GCC_SYNC_BUILTINS)
+#  define os_compare_and_swap_thread_id(ptr, old_val, new_val) \
+	os_compare_and_swap(ptr, old_val, new_val)
+#else
+UNIV_INLINE
+bool
+os_compare_and_swap_thread_id(volatile os_thread_id_t* ptr, os_thread_id_t old_val, os_thread_id_t new_val)
+{
+#ifdef HAVE_IB_GCC_ATOMIC_SEQ_CST
+	return __atomic_compare_exchange_n(ptr, &old_val, new_val, 0,
+			__ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
+#else
+	return __sync_bool_compare_and_swap(ptr, old_val, new_val);
+#endif
+}
+#endif /* HAVE_GCC_SYNC_BUILTINS */
+#  define INNODB_RW_LOCKS_USE_ATOMICS
+#  define IB_ATOMICS_STARTUP_MSG \
+	"Mutexes and rw_locks use GCC atomic builtins"
+# else /* HAVE_IB_ATOMIC_PTHREAD_T_GCC */
+#  define IB_ATOMICS_STARTUP_MSG \
+	"Mutexes use GCC atomic builtins, rw_locks do not"
+# endif /* HAVE_IB_ATOMIC_PTHREAD_T_GCC */
+
+/**********************************************************//**
+Returns the resulting value, ptr is pointer to target, amount is the
+amount of increment. */
+
+#if defined(HAVE_GCC_SYNC_BUILTINS)
+# define os_atomic_increment(ptr, amount) \
+	__sync_add_and_fetch(ptr, amount)
+#else
+#ifdef HAVE_IB_GCC_ATOMIC_SEQ_CST
+# define os_atomic_increment(ptr, amount) \
+	__atomic_add_fetch(ptr, amount, __ATOMIC_SEQ_CST)
+#else
+# define os_atomic_increment(ptr, amount) \
+	__sync_add_and_fetch(ptr, amount)
+#endif
+
+#endif /* HAVE_GCC_SYNC_BUILTINS */
+
+# define os_atomic_increment_lint(ptr, amount) \
+	os_atomic_increment(ptr, amount)
+
+# define os_atomic_increment_ulint(ptr, amount) \
+	os_atomic_increment(ptr, amount)
+
+# define os_atomic_increment_uint32(ptr, amount ) \
+	os_atomic_increment(ptr, amount)
+
+# define os_atomic_increment_uint64(ptr, amount) \
+	os_atomic_increment(ptr, amount)
+
+/* Returns the resulting value, ptr is pointer to target, amount is the
+amount to decrement. */
+
+#if defined(HAVE_GCC_SYNC_BUILTINS)
+# define os_atomic_decrement(ptr, amount) \
+	__sync_sub_and_fetch(ptr, amount)
+#else
+#ifdef HAVE_IB_GCC_ATOMIC_SEQ_CST
+# define os_atomic_decrement(ptr, amount) \
+	__atomic_sub_fetch(ptr, amount, __ATOMIC_SEQ_CST)
+#else
+# define os_atomic_decrement(ptr, amount) \
+	__sync_sub_and_fetch(ptr, amount)
+#endif
+#endif /* HAVE_GCC_SYNC_BUILTINS */
+
+# define os_atomic_decrement_lint(ptr, amount) \
+	os_atomic_decrement(ptr, amount)
+
+# define os_atomic_decrement_ulint(ptr, amount) \
+	os_atomic_decrement(ptr, amount)
+
+# define os_atomic_decrement_uint32(ptr, amount) \
+	os_atomic_decrement(ptr, amount)
+
+# define os_atomic_decrement_uint64(ptr, amount) \
+	os_atomic_decrement(ptr, amount)
+
+#endif
+
+#define os_atomic_inc_ulint(m,v,d)	os_atomic_increment_ulint(v, d)
+#define os_atomic_dec_ulint(m,v,d)	os_atomic_decrement_ulint(v, d)
+#define TAS(l, n)			os_atomic_test_and_set((l), (n))
+#define CAS(l, o, n)		os_atomic_val_compare_and_swap((l), (o), (n))
+
+/** barrier definitions for memory ordering */
+#ifdef HAVE_IB_GCC_ATOMIC_THREAD_FENCE
+# define HAVE_MEMORY_BARRIER
+# define os_rmb	__atomic_thread_fence(__ATOMIC_ACQUIRE)
+# define os_wmb	__atomic_thread_fence(__ATOMIC_RELEASE)
+# define IB_MEMORY_BARRIER_STARTUP_MSG \
+	"GCC builtin __atomic_thread_fence() is used for memory barrier"
+
+#elif defined(HAVE_IB_GCC_SYNC_SYNCHRONISE)
+# define HAVE_MEMORY_BARRIER
+# define os_rmb	__sync_synchronize()
+# define os_wmb	__sync_synchronize()
+# define IB_MEMORY_BARRIER_STARTUP_MSG \
+	"GCC builtin __sync_synchronize() is used for memory barrier"
+
+#elif defined(HAVE_IB_MACHINE_BARRIER_SOLARIS)
+# define HAVE_MEMORY_BARRIER
+# include <mbarrier.h>
+# define os_rmb	__machine_r_barrier()
+# define os_wmb	__machine_w_barrier()
+# define IB_MEMORY_BARRIER_STARTUP_MSG \
+	"Solaris memory ordering functions are used for memory barrier"
+
+#elif defined(HAVE_WINDOWS_MM_FENCE) && defined(_WIN64)
+# define HAVE_MEMORY_BARRIER
+# include <mmintrin.h>
+# define os_rmb	_mm_lfence()
+# define os_wmb	_mm_sfence()
+# define IB_MEMORY_BARRIER_STARTUP_MSG \
+	"_mm_lfence() and _mm_sfence() are used for memory barrier"
+
+#else
+# define os_rmb
+# define os_wmb
+# define IB_MEMORY_BARRIER_STARTUP_MSG \
+	"Memory barrier is not used"
+#endif
+
+#ifndef UNIV_NONINL
+#include "os0atomic.ic"
+#endif /* UNIV_NOINL */
+
+#endif /* !os0atomic_h */
diff --git a/storage/innobase/include/os0atomic.ic b/storage/innobase/include/os0atomic.ic
new file mode 100644
index 00000000000..1f1c460bc47
--- /dev/null
+++ b/storage/innobase/include/os0atomic.ic
@@ -0,0 +1,224 @@
+/*****************************************************************************
+
+Copyright (c) 2013, 2016, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/os0atomics.ic
+The interface to the operating system synchronization primitives.
+
+Created 2012-09-23 Sunny Bains (Split from include/os0sync.ic)
+*******************************************************/
+
+#ifdef _WIN32
+#include <winbase.h>
+
+/* Use inline functions to make 64 and 32 bit versions of windows atomic
+functions so that typecasts are evaluated at compile time. Take advantage
+that lint is either __int64 or long int and windows atomic functions work
+on __int64 and LONG */
+
+/**********************************************************//**
+Atomic compare and exchange of unsigned integers.
+@return value found before the exchange.
+If it is not equal to old_value the exchange did not happen. */
+UNIV_INLINE
+lint
+win_cmp_and_xchg_lint(
+/*==================*/
+	volatile lint*	ptr,		/*!< in/out: source/destination */
+	lint		new_val,	/*!< in: exchange value */
+	lint		old_val)	/*!< in: value to compare to */
+{
+# ifdef _WIN64
+	return(InterlockedCompareExchange64(ptr, new_val, old_val));
+# else
+	return(InterlockedCompareExchange(ptr, new_val, old_val));
+# endif /* _WIN64 */
+}
+
+/**********************************************************//**
+Atomic addition of signed integers.
+@return Initial value of the variable pointed to by ptr */
+UNIV_INLINE
+lint
+win_xchg_and_add(
+/*=============*/
+	volatile lint*	ptr,	/*!< in/out: address of destination */
+	lint		val)	/*!< in: number to be added */
+{
+#ifdef _WIN64
+	return(InterlockedExchangeAdd64(ptr, val));
+#else
+	return(InterlockedExchangeAdd(ptr, val));
+#endif /* _WIN64 */
+}
+
+/**********************************************************//**
+Atomic compare and exchange of unsigned integers.
+@return value found before the exchange.
+If it is not equal to old_value the exchange did not happen. */
+UNIV_INLINE
+ulint
+win_cmp_and_xchg_ulint(
+/*===================*/
+	volatile ulint*	ptr,		/*!< in/out: source/destination */
+	ulint		new_val,	/*!< in: exchange value */
+	ulint		old_val)	/*!< in: value to compare to */
+{
+	return((ulint) win_cmp_and_xchg_lint(
+		(volatile lint*) ptr,
+		(lint) new_val,
+		(lint) old_val));
+}
+
+/**********************************************************//**
+Atomic compare and exchange of 32-bit unsigned integers.
+@return value found before the exchange.
+If it is not equal to old_value the exchange did not happen. */
+UNIV_INLINE
+DWORD
+win_cmp_and_xchg_dword(
+/*===================*/
+	volatile DWORD*	ptr,		/*!< in/out: source/destination */
+	DWORD		new_val,	/*!< in: exchange value */
+	DWORD		old_val)	/*!< in: value to compare to */
+{
+	ut_ad(sizeof(DWORD) == sizeof(LONG));	/* We assume this. */
+	return(InterlockedCompareExchange(
+		(volatile LONG*) ptr,
+		(LONG) new_val,
+		(LONG) old_val));
+}
+
+/** Do an atomic test and set.
+@param[in,out]	ptr	Memory location to set
+@param[in]	new_val	new value
+@return	old value of memory location. */
+UNIV_INLINE
+lock_word_t
+os_atomic_test_and_set(
+	volatile lock_word_t*	ptr,
+	lock_word_t		new_val)
+{
+	return(InterlockedExchange(ptr, new_val));
+}
+
+/** Do an atomic compare and set
+@param[in,out]	ptr	Memory location to set
+@param[in]	old_val	old value to compare
+@param[in]	new_val	new value to set
+@return the value of ptr before the operation. */
+UNIV_INLINE
+lock_word_t
+os_atomic_val_compare_and_swap(
+	volatile lock_word_t*	ptr,
+	lock_word_t		old_val,
+	lock_word_t		new_val)
+{
+	return(static_cast<lock_word_t>(win_cmp_and_xchg_lint(
+		reinterpret_cast<volatile lint*>(ptr),
+		static_cast<lint>(new_val),
+		static_cast<lint>(old_val))));
+}
+
+#elif defined(HAVE_IB_GCC_ATOMIC_COMPARE_EXCHANGE)
+
+/** Do an atomic test and set.
+@param[in,out]	ptr	Memory location to set
+@param[in]	new_val	new value
+@return	old value of memory location. */
+UNIV_INLINE
+lock_word_t
+os_atomic_test_and_set(
+	volatile lock_word_t*	ptr,
+	lock_word_t		new_val)
+{
+	lock_word_t	ret;
+
+	/* Silence a compiler warning about unused ptr. */
+	(void) ptr;
+
+#if defined(__powerpc__) || defined(__aarch64__)
+	__atomic_exchange(ptr, &new_val,  &ret, __ATOMIC_SEQ_CST);
+#else
+	__atomic_exchange(ptr, &new_val,  &ret, __ATOMIC_RELEASE);
+#endif
+
+	return(ret);
+}
+
+/** Do an atomic compare and set
+@param[in,out]	ptr	Memory location to set
+@param[in]	old_val	old value to compare
+@param[in]	new_val	new value to set
+@return the value of ptr before the operation. */
+UNIV_INLINE
+lock_word_t
+os_atomic_val_compare_and_swap(
+	volatile lock_word_t*	ptr,
+	lock_word_t		old_val,
+	lock_word_t		new_val)
+{
+	/* Silence a compiler warning about unused ptr. */
+	(void) ptr;
+
+#if defined(__powerpc__) || defined(__aarch64__)
+	__atomic_compare_exchange(ptr, &old_val, &new_val, false,
+				  __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
+#else
+	__atomic_compare_exchange(ptr, &old_val, &new_val, false,
+				  __ATOMIC_RELEASE, __ATOMIC_ACQUIRE);
+#endif
+
+	return(old_val);
+}
+
+#elif defined(IB_STRONG_MEMORY_MODEL)
+
+/** Do an atomic test and set.
+@param[in,out]	ptr	Memory location to set
+@param[in]	new_val	new value
+@return	old value of memory location. */
+UNIV_INLINE
+lock_word_t
+os_atomic_test_and_set(
+	volatile lock_word_t*	ptr,
+	lock_word_t		new_val)
+{
+	return(__sync_lock_test_and_set(ptr, new_val));
+}
+
+/** Do an atomic compare and set
+@param[in,out]	ptr	Memory location to set
+@param[in]	old_val	old value to compare
+@param[in]	new_val	new value to set
+@return the value of ptr before the operation. */
+UNIV_INLINE
+lock_word_t
+os_atomic_val_compare_and_swap(
+	volatile lock_word_t*	ptr,
+	lock_word_t		old_val,
+	lock_word_t		new_val)
+{
+	return(__sync_val_compare_and_swap(ptr, old_val, new_val));
+}
+
+#else
+
+#error "Unsupported platform"
+
+#endif /* _WIN32 */
diff --git a/storage/innobase/include/os0event.h b/storage/innobase/include/os0event.h
new file mode 100644
index 00000000000..d5fdc6ba080
--- /dev/null
+++ b/storage/innobase/include/os0event.h
@@ -0,0 +1,135 @@
+/*****************************************************************************
+Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/os0event.h
+The interface to the operating system condition variables
+
+Created 2012-09-23 Sunny Bains (split from os0sync.h)
+*******************************************************/
+
+#ifndef os0event_h
+#define os0event_h
+
+#include "univ.i"
+
+// Forward declaration.
+struct os_event;
+typedef struct os_event* os_event_t;
+
+/** Denotes an infinite delay for os_event_wait_time() */
+#define OS_SYNC_INFINITE_TIME   ULINT_UNDEFINED
+
+/** Return value of os_event_wait_time() when the time is exceeded */
+#define OS_SYNC_TIME_EXCEEDED   1
+
+/**
+Creates an event semaphore, i.e., a semaphore which may just have two states:
+signaled and nonsignaled. The created event is manual reset: it must be reset
+explicitly by calling os_event_reset().
+@return	the event handle */
+os_event_t
+os_event_create(
+/*============*/
+	const char*	name);	/*!< in: the name of the event, if NULL
+				the event is created without a name */
+
+/**
+Sets an event semaphore to the signaled state: lets waiting threads
+proceed. */
+void
+os_event_set(
+/*=========*/
+	os_event_t	event);	/*!< in/out: event to set */
+
+/**
+Check if the event is set.
+@return true if set */
+bool
+os_event_is_set(
+/*============*/
+	const os_event_t	event);	/*!< in: event to set */
+
+/**
+Resets an event semaphore to the nonsignaled state. Waiting threads will
+stop to wait for the event.
+The return value should be passed to os_even_wait_low() if it is desired
+that this thread should not wait in case of an intervening call to
+os_event_set() between this os_event_reset() and the
+os_event_wait_low() call. See comments for os_event_wait_low(). */
+int64_t
+os_event_reset(
+/*===========*/
+	os_event_t	event);	/*!< in/out: event to reset */
+
+/**
+Frees an event object. */
+void
+os_event_destroy(
+/*=============*/
+	os_event_t&	event);	/*!< in/own: event to free */
+
+/**
+Waits for an event object until it is in the signaled state.
+
+Typically, if the event has been signalled after the os_event_reset()
+we'll return immediately because event->is_set == TRUE.
+There are, however, situations (e.g.: sync_array code) where we may
+lose this information. For example:
+
+thread A calls os_event_reset()
+thread B calls os_event_set()   [event->is_set == TRUE]
+thread C calls os_event_reset() [event->is_set == FALSE]
+thread A calls os_event_wait()  [infinite wait!]
+thread C calls os_event_wait()  [infinite wait!]
+
+Where such a scenario is possible, to avoid infinite wait, the
+value returned by os_event_reset() should be passed in as
+reset_sig_count. */
+void
+os_event_wait_low(
+/*==============*/
+	os_event_t	event,		/*!< in/out: event to wait */
+	int64_t		reset_sig_count);/*!< in: zero or the value
+					returned by previous call of
+					os_event_reset(). */
+
+/** Blocking infinite wait on an event, until signealled.
+@param e - event to wait on. */
+#define os_event_wait(e) os_event_wait_low((e), 0)
+
+/**
+Waits for an event object until it is in the signaled state or
+a timeout is exceeded. In Unix the timeout is always infinite.
+@return 0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */
+ulint
+os_event_wait_time_low(
+/*===================*/
+	os_event_t	event,			/*!< in/out: event to wait */
+	ulint		time_in_usec,		/*!< in: timeout in
+						microseconds, or
+						OS_SYNC_INFINITE_TIME */
+	int64_t		reset_sig_count);	/*!< in: zero or the value
+						returned by previous call of
+						os_event_reset(). */
+
+/** Blocking timed wait on an event.
+@param e - event to wait on.
+@param t - timeout in microseconds */
+#define os_event_wait_time(e, t) os_event_wait_time_low((e), (t), 0)
+
+#endif /* !os0event_h */
diff --git a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h
index 2425a682e22..5e36cfc2ac0 100644
--- a/storage/innobase/include/os0file.h
+++ b/storage/innobase/include/os0file.h
@@ -2,7 +2,7 @@
 
 Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2009, Percona Inc.
-Copyright (c) 2013, 2015, MariaDB Corporation.
+Copyright (c) 2013, 2016, MariaDB Corporation.
 
 Portions of this file contain modifications contributed and copyrighted
 by Percona Inc.. Those modifications are
@@ -38,60 +38,67 @@ Created 10/21/1995 Heikki Tuuri
 
 #include "univ.i"
 
-#ifndef __WIN__
+#ifndef _WIN32
 #include <dirent.h>
 #include <sys/stat.h>
 #include <time.h>
-#endif
+#endif /* !_WIN32 */
 
 /** File node of a tablespace or the log data space */
 struct fil_node_t;
 
-extern ibool	os_has_said_disk_full;
-/** Flag: enable debug printout for asynchronous i/o */
-extern ibool	os_aio_print_debug;
-
-/** Number of pending os_file_pread() operations */
-extern ulint	os_file_n_pending_preads;
-/** Number of pending os_file_pwrite() operations */
-extern ulint	os_file_n_pending_pwrites;
+extern bool	os_has_said_disk_full;
 
 /** Number of pending read operations */
 extern ulint	os_n_pending_reads;
 /** Number of pending write operations */
 extern ulint	os_n_pending_writes;
 
-#ifdef __WIN__
-
-/** We define always WIN_ASYNC_IO, and check at run-time whether
-   the OS actually supports it: Win 95 does not, NT does. */
-#define WIN_ASYNC_IO
-
-/** Use unbuffered I/O */
-#define UNIV_NON_BUFFERED_IO
-
-#endif
-
 /** File offset in bytes */
 typedef ib_uint64_t os_offset_t;
-#ifdef __WIN__
+
+#ifdef _WIN32
+
+/**
+Gets the operating system version. Currently works only on Windows.
+@return OS_WIN95, OS_WIN31, OS_WINNT, OS_WIN2000, OS_WINXP, OS_WINVISTA,
+OS_WIN7. */
+
+ulint
+os_get_os_version();
+
+typedef HANDLE	os_file_dir_t;	/*!< directory stream */
+
+/** We define always WIN_ASYNC_IO, and check at run-time whether
+the OS actually supports it: Win 95 does not, NT does. */
+# define WIN_ASYNC_IO
+
+/** Use unbuffered I/O */
+# define UNIV_NON_BUFFERED_IO
+
 /** File handle */
 # define os_file_t	HANDLE
+
 /** Convert a C file descriptor to a native file handle
-@param fd	file descriptor
-@return		native file handle */
+@param fd file descriptor
+@return native file handle */
 # define OS_FILE_FROM_FD(fd) (HANDLE) _get_osfhandle(fd)
-#else
+
+#else /* _WIN32 */
+
+typedef DIR*	os_file_dir_t;	/*!< directory stream */
+
 /** File handle */
 typedef int	os_file_t;
-/** Convert a C file descriptor to a native file handle
-@param fd	file descriptor
-@return		native file handle */
-# define OS_FILE_FROM_FD(fd) fd
-#endif
 
-/** Umask for creating files */
-extern ulint	os_innodb_umask;
+/** Convert a C file descriptor to a native file handle
+@param fd file descriptor
+@return native file handle */
+# define OS_FILE_FROM_FD(fd) fd
+
+#endif /* _WIN32 */
+
+static const os_file_t OS_FILE_CLOSED = os_file_t(~0);
 
 /** The next value should be smaller or equal to the smallest sector size used
 on any disk. A log block is required to be a portion of disk which is written
@@ -125,93 +132,973 @@ enum os_file_create_t {
 					ON_ERROR_NO_EXIT is set */
 };
 
-#define OS_FILE_READ_ONLY		333
-#define	OS_FILE_READ_WRITE		444
-#define	OS_FILE_READ_ALLOW_DELETE	555	/* for mysqlbackup */
+static const ulint OS_FILE_READ_ONLY = 333;
+static const ulint OS_FILE_READ_WRITE = 444;
+
+/** Used by MySQLBackup */
+static const ulint OS_FILE_READ_ALLOW_DELETE = 555;
 
 /* Options for file_create */
-#define	OS_FILE_AIO			61
-#define	OS_FILE_NORMAL			62
+static const ulint OS_FILE_AIO = 61;
+static const ulint OS_FILE_NORMAL = 62;
 /* @} */
 
 /** Types for file create @{ */
-#define	OS_DATA_FILE			100
-#define OS_LOG_FILE			101
+static const ulint OS_DATA_FILE = 100;
+static const ulint OS_LOG_FILE = 101;
+static const ulint OS_DATA_TEMP_FILE = 102;
 /* @} */
 
 /** Error codes from os_file_get_last_error @{ */
-#define	OS_FILE_NAME_TOO_LONG		36
-#define	OS_FILE_NOT_FOUND		71
-#define	OS_FILE_DISK_FULL		72
-#define	OS_FILE_ALREADY_EXISTS		73
-#define	OS_FILE_PATH_ERROR		74
-#define	OS_FILE_AIO_RESOURCES_RESERVED	75	/* wait for OS aio resources
-						to become available again */
-#define	OS_FILE_SHARING_VIOLATION	76
-#define	OS_FILE_ERROR_NOT_SPECIFIED	77
-#define	OS_FILE_INSUFFICIENT_RESOURCE	78
-#define	OS_FILE_AIO_INTERRUPTED		79
-#define	OS_FILE_OPERATION_ABORTED	80
-#define	OS_FILE_ACCESS_VIOLATION	81
-#define	OS_FILE_OPERATION_NOT_SUPPORTED	125
-#define	OS_FILE_ERROR_MAX		200
+static const ulint OS_FILE_NAME_TOO_LONG = 36;
+static const ulint OS_FILE_NOT_FOUND = 71;
+static const ulint OS_FILE_DISK_FULL = 72;
+static const ulint OS_FILE_ALREADY_EXISTS = 73;
+static const ulint OS_FILE_PATH_ERROR = 74;
+
+/** wait for OS aio resources to become available again */
+static const ulint OS_FILE_AIO_RESOURCES_RESERVED = 75;
+
+static const ulint OS_FILE_SHARING_VIOLATION = 76;
+static const ulint OS_FILE_ERROR_NOT_SPECIFIED = 77;
+static const ulint OS_FILE_INSUFFICIENT_RESOURCE = 78;
+static const ulint OS_FILE_AIO_INTERRUPTED = 79;
+static const ulint OS_FILE_OPERATION_ABORTED = 80;
+static const ulint OS_FILE_ACCESS_VIOLATION = 81;
+static const ulint OS_FILE_OPERATION_NOT_SUPPORTED = 125;
+static const ulint OS_FILE_ERROR_MAX = 200;
 /* @} */
 
-/** Types for aio operations @{ */
-#define OS_FILE_READ	10
-#define OS_FILE_WRITE	11
+/** Compression algorithm. */
+struct Compression {
+
+	/** Algorithm types supported */
+	enum Type {
+		/* Note: During recovery we don't have the compression type
+		because the .frm file has not been read yet. Therefore
+		we write the recovered pages out without compression. */
+
+		/** No compression */
+		NONE = 0,
+
+		/** Use ZLib */
+		ZLIB = 1,
+
+		/** Use LZ4 faster variant, usually lower compression. */
+		LZ4 = 2
+	};
+
+	/** Compressed page meta-data */
+	struct meta_t {
+
+		/** Version number */
+		uint8_t		m_version;
+
+		/** Algorithm type */
+		Type		m_algorithm;
+
+		/** Original page type */
+		uint16_t	m_original_type;
+
+		/** Original page size, before compression */
+		uint16_t	m_original_size;
+
+		/** Size after compression */
+		uint16_t	m_compressed_size;
+	};
+
+	/** Default constructor */
+	Compression() : m_type(NONE) { };
+
+	/** Specific constructor
+	@param[in]	type		Algorithm type */
+	explicit Compression(Type type)
+		:
+		m_type(type)
+	{
+#ifdef UNIV_DEBUG
+		switch (m_type) {
+		case NONE:
+		case ZLIB:
+		case LZ4:
+
+		default:
+			ut_error;
+		}
+#endif /* UNIV_DEBUG */
+	}
+
+	/** Check the page header type field.
+	@param[in]	page		Page contents
+	@return true if it is a compressed page */
+	static bool is_compressed_page(const byte* page)
+		MY_ATTRIBUTE((warn_unused_result));
+
+        /** Check wether the compression algorithm is supported.
+        @param[in]      algorithm       Compression algorithm to check
+        @param[out]     type            The type that algorithm maps to
+        @return DB_SUCCESS or error code */
+	static dberr_t check(const char* algorithm, Compression* type)
+		MY_ATTRIBUTE((warn_unused_result));
+
+        /** Validate the algorithm string.
+        @param[in]      algorithm       Compression algorithm to check
+        @return DB_SUCCESS or error code */
+	static dberr_t validate(const char* algorithm)
+		MY_ATTRIBUTE((warn_unused_result));
+
+        /** Convert to a "string".
+        @param[in]      type            The compression type
+        @return the string representation */
+        static const char* to_string(Type type)
+		MY_ATTRIBUTE((warn_unused_result));
+
+        /** Convert the meta data to a std::string.
+        @param[in]      meta		Page Meta data
+        @return the string representation */
+        static std::string to_string(const meta_t& meta)
+		MY_ATTRIBUTE((warn_unused_result));
+
+	/** Deserizlise the page header compression meta-data
+	@param[in]	header		Pointer to the page header
+	@param[out]	control		Deserialised data */
+	static void deserialize_header(
+		const byte*	page,
+		meta_t*		control);
+
+        /** Check if the string is "empty" or "none".
+        @param[in]      algorithm       Compression algorithm to check
+        @return true if no algorithm requested */
+	static bool is_none(const char* algorithm)
+		MY_ATTRIBUTE((warn_unused_result));
+
+	/** Decompress the page data contents. Page type must be
+	FIL_PAGE_COMPRESSED, if not then the source contents are
+	left unchanged and DB_SUCCESS is returned.
+	@param[in]	dblwr_recover	true of double write recovery
+					in progress
+	@param[in,out]	src		Data read from disk, decompressed
+					data will be copied to this page
+	@param[in,out]	dst		Scratch area to use for decompression
+	@param[in]	dst_len		Size of the scratch area in bytes
+	@return DB_SUCCESS or error code */
+	static dberr_t deserialize(
+		bool		dblwr_recover,
+		byte*		src,
+		byte*		dst,
+		ulint		dst_len)
+		MY_ATTRIBUTE((warn_unused_result));
+
+	/** Compression type */
+	Type		m_type;
+};
+
+/** Encryption key length */
+static const ulint ENCRYPTION_KEY_LEN = 32;
+
+/** Encryption magic bytes size */
+static const ulint ENCRYPTION_MAGIC_SIZE = 3;
+
+/** Encryption magic bytes for 5.7.11, it's for checking the encryption information
+version. */
+static const char ENCRYPTION_KEY_MAGIC_V1[] = "lCA";
+
+/** Encryption magic bytes for 5.7.12+, it's for checking the encryption information
+version. */
+static const char ENCRYPTION_KEY_MAGIC_V2[] = "lCB";
+
+/** Encryption master key prifix */
+static const char ENCRYPTION_MASTER_KEY_PRIFIX[] = "INNODBKey";
+
+/** Encryption master key prifix size */
+static const ulint ENCRYPTION_MASTER_KEY_PRIFIX_LEN = 9;
+
+/** Encryption master key prifix size */
+static const ulint ENCRYPTION_MASTER_KEY_NAME_MAX_LEN = 100;
+
+/** UUID of server instance, it's needed for composing master key name */
+static const ulint ENCRYPTION_SERVER_UUID_LEN = 36;
+
+/** Encryption information total size for 5.7.11: magic number + master_key_id +
+key + iv + checksum */
+static const ulint ENCRYPTION_INFO_SIZE_V1 = (ENCRYPTION_MAGIC_SIZE \
+					 + (ENCRYPTION_KEY_LEN * 2) \
+					 + 2 * sizeof(ulint));
+
+/** Encryption information total size: magic number + master_key_id +
+key + iv + server_uuid + checksum */
+static const ulint ENCRYPTION_INFO_SIZE_V2 = (ENCRYPTION_MAGIC_SIZE \
+					 + (ENCRYPTION_KEY_LEN * 2) \
+					 + ENCRYPTION_SERVER_UUID_LEN \
+					 + 2 * sizeof(ulint));
+
+class IORequest;
+
+/** Encryption algorithm. */
+struct Encryption {
+
+	/** Algorithm types supported */
+	enum Type {
+
+		/** No encryption */
+		NONE = 0,
+
+		/** Use AES */
+		AES = 1,
+	};
+
+	/** Encryption information format version */
+	enum Version {
+
+		/** Version in 5.7.11 */
+		ENCRYPTION_VERSION_1 = 0,
+
+		/** Version in > 5.7.11 */
+		ENCRYPTION_VERSION_2 = 1,
+	};
+
+	/** Default constructor */
+	Encryption() : m_type(NONE) { };
+
+	/** Specific constructor
+	@param[in]	type		Algorithm type */
+	explicit Encryption(Type type)
+		:
+		m_type(type)
+	{
+#ifdef UNIV_DEBUG
+		switch (m_type) {
+		case NONE:
+		case AES:
+
+		default:
+			ut_error;
+		}
+#endif /* UNIV_DEBUG */
+	}
+
+	/** Copy constructor */
+	Encryption(const Encryption& other)
+		:
+		m_type(other.m_type),
+		m_key(other.m_key),
+		m_klen(other.m_klen),
+		m_iv(other.m_iv)
+	{ };
+
+	/** Check if page is encrypted page or not
+	@param[in]	page	page which need to check
+	@return true if it is a encrypted page */
+	static bool is_encrypted_page(const byte* page)
+		MY_ATTRIBUTE((warn_unused_result));
+
+	/** Check the encryption option and set it
+	@param[in]	option		encryption option
+	@param[in/out]	encryption	The encryption type
+	@return DB_SUCCESS or DB_UNSUPPORTED */
+	dberr_t set_algorithm(const char* option, Encryption* type)
+		MY_ATTRIBUTE((warn_unused_result));
+
+        /** Validate the algorithm string.
+        @param[in]      algorithm       Encryption algorithm to check
+        @return DB_SUCCESS or error code */
+	static dberr_t validate(const char* algorithm)
+		MY_ATTRIBUTE((warn_unused_result));
+
+        /** Convert to a "string".
+        @param[in]      type            The encryption type
+        @return the string representation */
+        static const char* to_string(Type type)
+		MY_ATTRIBUTE((warn_unused_result));
+
+        /** Check if the string is "empty" or "none".
+        @param[in]      algorithm       Encryption algorithm to check
+        @return true if no algorithm requested */
+	static bool is_none(const char* algorithm)
+		MY_ATTRIBUTE((warn_unused_result));
+
+        /** Generate random encryption value for key and iv.
+        @param[in,out]	value	Encryption value */
+	static void random_value(byte* value);
+
+	/** Create new master key for key rotation.
+        @param[in,out]	master_key	master key */
+	static void create_master_key(byte** master_key);
+
+        /** Get master key by key id.
+        @param[in]	master_key_id	master key id
+	@param[in]	srv_uuid	uuid of server instance
+        @param[in,out]	master_key	master key */
+	static void get_master_key(ulint master_key_id,
+				   char* srv_uuid,
+				   byte** master_key);
+
+        /** Get current master key and key id.
+        @param[in,out]	master_key_id	master key id
+        @param[in,out]	master_key	master key
+        @param[in,out]	version		encryption information version */
+	static void get_master_key(ulint* master_key_id,
+				   byte** master_key,
+				   Encryption::Version*  version);
+
+	/** Encrypt the page data contents. Page type can't be
+	FIL_PAGE_ENCRYPTED, FIL_PAGE_COMPRESSED_AND_ENCRYPTED,
+	FIL_PAGE_ENCRYPTED_RTREE.
+	@param[in]	type		IORequest
+	@param[in,out]	src		page data which need to encrypt
+	@param[in]	src_len		Size of the source in bytes
+	@param[in,out]	dst		destination area
+	@param[in,out]	dst_len		Size of the destination in bytes
+	@return buffer data, dst_len will have the length of the data */
+	byte* encrypt(
+		const IORequest&	type,
+		byte*			src,
+		ulint			src_len,
+		byte*			dst,
+		ulint*			dst_len)
+		MY_ATTRIBUTE((warn_unused_result));
+
+	/** Decrypt the page data contents. Page type must be
+	FIL_PAGE_ENCRYPTED, FIL_PAGE_COMPRESSED_AND_ENCRYPTED,
+	FIL_PAGE_ENCRYPTED_RTREE, if not then the source contents are
+	left unchanged and DB_SUCCESS is returned.
+	@param[in]	type		IORequest
+	@param[in,out]	src		Data read from disk, decrypt
+					data will be copied to this page
+	@param[in]	src_len		source data length
+	@param[in,out]	dst		Scratch area to use for decrypt
+	@param[in]	dst_len		Size of the scratch area in bytes
+	@return DB_SUCCESS or error code */
+	dberr_t decrypt(
+		const IORequest&	type,
+		byte*			src,
+		ulint			src_len,
+		byte*			dst,
+		ulint			dst_len)
+		MY_ATTRIBUTE((warn_unused_result));
+
+	/** Encrypt type */
+	Type			m_type;
+
+	/** Encrypt key */
+	byte*			m_key;
+
+	/** Encrypt key length*/
+	ulint			m_klen;
+
+	/** Encrypt initial vector */
+	byte*			m_iv;
+
+	/** Current master key id */
+	static ulint		master_key_id;
+
+	/** Current uuid of server instance */
+	static char		uuid[ENCRYPTION_SERVER_UUID_LEN + 1];
+};
+
+/** Types for AIO operations @{ */
+
+/** No transformations during read/write, write as is. */
+#define IORequestRead		IORequest(IORequest::READ)
+#define IORequestWrite		IORequest(IORequest::WRITE)
+#define IORequestLogRead	IORequest(IORequest::LOG | IORequest::READ)
+#define IORequestLogWrite	IORequest(IORequest::LOG | IORequest::WRITE)
+
+/**
+The IO Context that is passed down to the low level IO code */
+class IORequest {
+public:
+	/** Flags passed in the request, they can be ORred together. */
+	enum {
+		READ = 1,
+		WRITE = 2,
+
+		/** Double write buffer recovery. */
+		DBLWR_RECOVER = 4,
+
+		/** Enumarations below can be ORed to READ/WRITE above*/
+
+		/** Data file */
+		DATA_FILE = 8,
+
+		/** Log file request*/
+		LOG = 16,
+
+		/** Disable partial read warnings */
+		DISABLE_PARTIAL_IO_WARNINGS = 32,
+
+		/** Do not to wake i/o-handler threads, but the caller will do
+		the waking explicitly later, in this way the caller can post
+		several requests in a batch; NOTE that the batch must not be
+		so big that it exhausts the slots in AIO arrays! NOTE that
+		a simulated batch may introduce hidden chances of deadlocks,
+		because I/Os are not actually handled until all
+		have been posted: use with great caution! */
+		DO_NOT_WAKE = 64,
+
+		/** Ignore failed reads of non-existent pages */
+		IGNORE_MISSING = 128,
+
+		/** Use punch hole if available, only makes sense if
+		compression algorithm != NONE. Ignored if not set */
+		PUNCH_HOLE = 256,
+
+		/** Force raw read, do not try to compress/decompress.
+		This can be used to force a read and write without any
+		compression e.g., for redo log, merge sort temporary files
+		and the truncate redo log. */
+		NO_COMPRESSION = 512
+
+	};
+
+	/** Default constructor */
+	IORequest()
+		:
+		m_block_size(UNIV_SECTOR_SIZE),
+		m_type(READ),
+		m_compression()
+	{
+		/* No op */
+	}
+
+	/**
+	@param[in]	type		Request type, can be a value that is
+					ORed from the above enum */
+	explicit IORequest(ulint type)
+		:
+		m_block_size(UNIV_SECTOR_SIZE),
+		m_type(static_cast<uint16_t>(type)),
+		m_compression()
+	{
+		if (is_log()) {
+			disable_compression();
+		}
+
+		if (!is_punch_hole_supported()) {
+			clear_punch_hole();
+		}
+	}
+
+	/** Destructor */
+	~IORequest() { }
+
+	/** @return true if ignore missing flag is set */
+	static bool ignore_missing(ulint type)
+		MY_ATTRIBUTE((warn_unused_result))
+	{
+		return((type & IGNORE_MISSING) == IGNORE_MISSING);
+	}
+
+	/** @return true if it is a read request */
+	bool is_read() const
+		MY_ATTRIBUTE((warn_unused_result))
+	{
+		return((m_type & READ) == READ);
+	}
+
+	/** @return true if it is a write request */
+	bool is_write() const
+		MY_ATTRIBUTE((warn_unused_result))
+	{
+		return((m_type & WRITE) == WRITE);
+	}
+
+	/** @return true if it is a redo log write */
+	bool is_log() const
+		MY_ATTRIBUTE((warn_unused_result))
+	{
+		return((m_type & LOG) == LOG);
+	}
+
+	/** @return true if the simulated AIO thread should be woken up */
+	bool is_wake() const
+		MY_ATTRIBUTE((warn_unused_result))
+	{
+		return((m_type & DO_NOT_WAKE) == 0);
+	}
+
+	/** @return true if partial read warning disabled */
+	bool is_partial_io_warning_disabled() const
+		MY_ATTRIBUTE((warn_unused_result))
+	{
+		return((m_type & DISABLE_PARTIAL_IO_WARNINGS)
+		       == DISABLE_PARTIAL_IO_WARNINGS);
+	}
+
+	/** Disable partial read warnings */
+	void disable_partial_io_warnings()
+	{
+		m_type |= DISABLE_PARTIAL_IO_WARNINGS;
+	}
+
+	/** @return true if missing files should be ignored */
+	bool ignore_missing() const
+		MY_ATTRIBUTE((warn_unused_result))
+	{
+		return(ignore_missing(m_type));
+	}
+
+	/** @return true if punch hole should be used */
+	bool punch_hole() const
+		MY_ATTRIBUTE((warn_unused_result))
+	{
+		return((m_type & PUNCH_HOLE) == PUNCH_HOLE);
+	}
+
+	/** @return true if the read should be validated */
+	bool validate() const
+		MY_ATTRIBUTE((warn_unused_result))
+	{
+		ut_a(is_read() ^ is_write());
+
+		return(!is_read() || !punch_hole());
+	}
+
+	/** Set the punch hole flag */
+	void set_punch_hole()
+	{
+		if (is_punch_hole_supported()) {
+			m_type |= PUNCH_HOLE;
+		}
+	}
+
+	/** Clear the do not wake flag */
+	void clear_do_not_wake()
+	{
+		m_type &= ~DO_NOT_WAKE;
+	}
+
+	/** Clear the punch hole flag */
+	void clear_punch_hole()
+	{
+		m_type &= ~PUNCH_HOLE;
+	}
+
+	/** @return the block size to use for IO */
+	ulint block_size() const
+		MY_ATTRIBUTE((warn_unused_result))
+	{
+		return(m_block_size);
+	}
+
+	/** Set the block size for IO
+	@param[in] block_size		Block size to set */
+	void block_size(ulint block_size)
+	{
+		m_block_size = static_cast<uint32_t>(block_size);
+	}
+
+	/** Clear all compression related flags */
+	void clear_compressed()
+	{
+		clear_punch_hole();
+
+		m_compression.m_type  = Compression::NONE;
+	}
+
+	/** Compare two requests
+	@reutrn true if the are equal */
+	bool operator==(const IORequest& rhs) const
+	{
+		return(m_type == rhs.m_type);
+	}
+
+	/** Set compression algorithm
+	@param[in] compression	The compression algorithm to use */
+	void compression_algorithm(Compression::Type type)
+	{
+		if (type == Compression::NONE) {
+			return;
+		}
+
+		set_punch_hole();
+
+		m_compression.m_type = type;
+	}
+
+	/** Get the compression algorithm.
+	@return the compression algorithm */
+	Compression compression_algorithm() const
+		MY_ATTRIBUTE((warn_unused_result))
+	{
+		return(m_compression);
+	}
+
+	/** @return true if the page should be compressed */
+	bool is_compressed() const
+		MY_ATTRIBUTE((warn_unused_result))
+	{
+		return(compression_algorithm().m_type != Compression::NONE);
+	}
+
+	/** @return true if the page read should not be transformed. */
+	bool is_compression_enabled() const
+		MY_ATTRIBUTE((warn_unused_result))
+	{
+		return((m_type & NO_COMPRESSION) == 0);
+	}
+
+	/** Disable transformations. */
+	void disable_compression()
+	{
+		m_type |= NO_COMPRESSION;
+	}
+
+	/** Set encryption algorithm
+	@param[in] type		The encryption algorithm to use */
+	void encryption_algorithm(Encryption::Type type)
+	{
+		if (type == Encryption::NONE) {
+			return;
+		}
+
+		m_encryption.m_type = type;
+	}
+
+	/** Set encryption key and iv
+	@param[in] key		The encryption key to use
+	@param[in] key_len	length of the encryption key
+	@param[in] iv		The encryption iv to use */
+	void encryption_key(byte* key,
+			    ulint key_len,
+			    byte* iv)
+	{
+		m_encryption.m_key = key;
+		m_encryption.m_klen = key_len;
+		m_encryption.m_iv = iv;
+	}
+
+	/** Get the encryption algorithm.
+	@return the encryption algorithm */
+	Encryption encryption_algorithm() const
+		MY_ATTRIBUTE((warn_unused_result))
+	{
+		return(m_encryption);
+	}
+
+	/** @return true if the page should be encrypted. */
+	bool is_encrypted() const
+		MY_ATTRIBUTE((warn_unused_result))
+	{
+		return(m_encryption.m_type != Encryption::NONE);
+	}
+
+	/** Clear all encryption related flags */
+	void clear_encrypted()
+	{
+		m_encryption.m_key = NULL;
+		m_encryption.m_klen = 0;
+		m_encryption.m_iv = NULL;
+		m_encryption.m_type = Encryption::NONE;
+	}
+
+	/** Note that the IO is for double write recovery. */
+	void dblwr_recover()
+	{
+		m_type |= DBLWR_RECOVER;
+	}
+
+	/** @return true if the request is from the dblwr recovery */
+	bool is_dblwr_recover() const
+		MY_ATTRIBUTE((warn_unused_result))
+	{
+		return((m_type & DBLWR_RECOVER) == DBLWR_RECOVER);
+	}
+
+	/** @return true if punch hole is supported */
+	static bool is_punch_hole_supported()
+	{
+
+		/* In this debugging mode, we act as if punch hole is supported,
+		and then skip any calls to actually punch a hole here.
+		In this way, Transparent Page Compression is still being tested. */
+		DBUG_EXECUTE_IF("ignore_punch_hole",
+			return(true);
+		);
+
+#if defined(HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE) || defined(_WIN32)
+		return(true);
+#else
+		return(false);
+#endif /* HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE || _WIN32 */
+	}
+
+private:
+	/* File system best block size */
+	uint32_t		m_block_size;
+
+	/** Request type bit flags */
+	uint16_t		m_type;
+
+	/** Compression algorithm */
+	Compression		m_compression;
+
+	/** Encryption algorithm */
+	Encryption		m_encryption;
+};
 
-#define OS_FILE_LOG	256	/* This can be ORed to type */
 /* @} */
 
-#define OS_AIO_N_PENDING_IOS_PER_THREAD 32	/*!< Win NT does not allow more
-						than 64 */
+/** Sparse file size information. */
+struct os_file_size_t {
+	/** Total size of file in bytes */
+	os_offset_t	m_total_size;
+
+	/** If it is a sparse file then this is the number of bytes
+	actually allocated for the file. */
+	os_offset_t	m_alloc_size;
+};
+
+/** Win NT does not allow more than 64 */
+static const ulint OS_AIO_N_PENDING_IOS_PER_THREAD = 32;
 
 /** Modes for aio operations @{ */
-#define OS_AIO_NORMAL	21	/*!< Normal asynchronous i/o not for ibuf
-				pages or ibuf bitmap pages */
-#define OS_AIO_IBUF	22	/*!< Asynchronous i/o for ibuf pages or ibuf
-				bitmap pages */
-#define OS_AIO_LOG	23	/*!< Asynchronous i/o for the log */
-#define OS_AIO_SYNC	24	/*!< Asynchronous i/o where the calling thread
-				will itself wait for the i/o to complete,
-				doing also the job of the i/o-handler thread;
-				can be used for any pages, ibuf or non-ibuf.
-				This is used to save CPU time, as we can do
-				with fewer thread switches. Plain synchronous
-				i/o is not as good, because it must serialize
-				the file seek and read or write, causing a
-				bottleneck for parallelism. */
+/** Normal asynchronous i/o not for ibuf pages or ibuf bitmap pages */
+static const ulint OS_AIO_NORMAL = 21;
 
-#define OS_AIO_SIMULATED_WAKE_LATER	512 /*!< This can be ORed to mode
-				in the call of os_aio(...),
-				if the caller wants to post several i/o
-				requests in a batch, and only after that
-				wake the i/o-handler thread; this has
-				effect only in simulated aio */
+/**  Asynchronous i/o for ibuf pages or ibuf bitmap pages */
+static const ulint OS_AIO_IBUF = 22;
+
+/** Asynchronous i/o for the log */
+static const ulint OS_AIO_LOG = 23;
+
+/** Asynchronous i/o where the calling thread will itself wait for
+the i/o to complete, doing also the job of the i/o-handler thread;
+can be used for any pages, ibuf or non-ibuf.  This is used to save
+CPU time, as we can do with fewer thread switches. Plain synchronous
+I/O is not as good, because it must serialize the file seek and read
+or write, causing a bottleneck for parallelism. */
+static const ulint OS_AIO_SYNC = 24;
 /* @} */
 
-#define OS_WIN31	1	/*!< Microsoft Windows 3.x */
-#define OS_WIN95	2	/*!< Microsoft Windows 95 */
-#define OS_WINNT	3	/*!< Microsoft Windows NT 3.x */
-#define OS_WIN2000	4	/*!< Microsoft Windows 2000 */
-#define OS_WINXP	5	/*!< Microsoft Windows XP
-				or Windows Server 2003 */
-#define OS_WINVISTA	6	/*!< Microsoft Windows Vista
-				or Windows Server 2008 */
-#define OS_WIN7		7	/*!< Microsoft Windows 7
-				or Windows Server 2008 R2 */
-
-
 extern ulint	os_n_file_reads;
 extern ulint	os_n_file_writes;
 extern ulint	os_n_fsyncs;
 
+/* File types for directory entry data type */
+
+enum os_file_type_t {
+	OS_FILE_TYPE_UNKNOWN = 0,
+	OS_FILE_TYPE_FILE,			/* regular file */
+	OS_FILE_TYPE_DIR,			/* directory */
+	OS_FILE_TYPE_LINK,			/* symbolic link */
+	OS_FILE_TYPE_BLOCK			/* block device */
+};
+
+/* Maximum path string length in bytes when referring to tables with in the
+'./databasename/tablename.ibd' path format; we can allocate at least 2 buffers
+of this size from the thread stack; that is why this should not be made much
+bigger than 4000 bytes.  The maximum path length used by any storage engine
+in the server must be at least this big. */
+
+/* MySQL 5.7 my_global.h */
+#ifndef FN_REFLEN_SE
+#define FN_REFLEN_SE        4000
+#endif
+
+#define OS_FILE_MAX_PATH	4000
+#if (FN_REFLEN_SE < OS_FILE_MAX_PATH)
+# error "(FN_REFLEN_SE < OS_FILE_MAX_PATH)"
+#endif
+
+/** Struct used in fetching information of a file in a directory */
+struct os_file_stat_t {
+	char		name[OS_FILE_MAX_PATH];	/*!< path to a file */
+	os_file_type_t	type;			/*!< file type */
+	os_offset_t	size;			/*!< file size in bytes */
+	os_offset_t	alloc_size;		/*!< Allocated size for
+						sparse files in bytes */
+	size_t		block_size;		/*!< Block size to use for IO
+						in bytes*/
+	time_t		ctime;			/*!< creation time */
+	time_t		mtime;			/*!< modification time */
+	time_t		atime;			/*!< access time */
+	bool		rw_perm;		/*!< true if can be opened
+						in read-write mode. Only valid
+						if type == OS_FILE_TYPE_FILE */
+};
+
+#ifndef UNIV_HOTBACKUP
+/** Create a temporary file. This function is like tmpfile(3), but
+the temporary file is created in the given parameter path. If the path
+is null then it will create the file in the mysql server configuration
+parameter (--tmpdir).
+@param[in]	path	location for creating temporary file
+@return temporary file handle, or NULL on error */
+FILE*
+os_file_create_tmpfile(
+	const char*	path);
+#endif /* !UNIV_HOTBACKUP */
+
+/** The os_file_opendir() function opens a directory stream corresponding to the
+directory named by the dirname argument. The directory stream is positioned
+at the first entry. In both Unix and Windows we automatically skip the '.'
+and '..' items at the start of the directory listing.
+
+@param[in]	dirname		directory name; it must not contain a trailing
+				'\' or '/'
+@param[in]	is_fatal	true if we should treat an error as a fatal
+				error; if we try to open symlinks then we do
+				not wish a fatal error if it happens not to be
+				a directory
+@return directory stream, NULL if error */
+os_file_dir_t
+os_file_opendir(
+	const char*	dirname,
+	bool		is_fatal);
+
+/**
+Closes a directory stream.
+@param[in] dir	directory stream
+@return 0 if success, -1 if failure */
+int
+os_file_closedir(
+	os_file_dir_t	dir);
+
+/** This function returns information of the next file in the directory. We jump
+over the '.' and '..' entries in the directory.
+@param[in]	dirname		directory name or path
+@param[in]	dir		directory stream
+@param[out]	info		buffer where the info is returned
+@return 0 if ok, -1 if error, 1 if at the end of the directory */
+int
+os_file_readdir_next_file(
+	const char*	dirname,
+	os_file_dir_t	dir,
+	os_file_stat_t*	info);
+
+/**
+This function attempts to create a directory named pathname. The new directory
+gets default permissions. On Unix, the permissions are (0770 & ~umask). If the
+directory exists already, nothing is done and the call succeeds, unless the
+fail_if_exists arguments is true.
+
+@param[in]	pathname	directory name as null-terminated string
+@param[in]	fail_if_exists	if true, pre-existing directory is treated
+				as an error.
+@return true if call succeeds, false on error */
+bool
+os_file_create_directory(
+	const char*	pathname,
+	bool		fail_if_exists);
+
+/** NOTE! Use the corresponding macro os_file_create_simple(), not directly
+this function!
+A simple function to open or create a file.
+@param[in]	name		name of the file or path as a null-terminated
+				string
+@param[in]	create_mode	create mode
+@param[in]	access_type	OS_FILE_READ_ONLY or OS_FILE_READ_WRITE
+@param[in]	read_only	if true read only mode checks are enforced
+@param[out]	success		true if succeed, false if error
+@return own: handle to the file, not defined if error, error number
+	can be retrieved with os_file_get_last_error */
+os_file_t
+os_file_create_simple_func(
+	const char*	name,
+	ulint		create_mode,
+	ulint		access_type,
+	bool		read_only,
+	bool*		success);
+
+/** NOTE! Use the corresponding macro
+os_file_create_simple_no_error_handling(), not directly this function!
+A simple function to open or create a file.
+@param[in]	name		name of the file or path as a null-terminated string
+@param[in]	create_mode	create mode
+@param[in]	access_type	OS_FILE_READ_ONLY, OS_FILE_READ_WRITE, or
+				OS_FILE_READ_ALLOW_DELETE; the last option
+				is used by a backup program reading the file
+@param[in]	read_only	if true read only mode checks are enforced
+@param[out]	success		true if succeeded
+@return own: handle to the file, not defined if error, error number
+	can be retrieved with os_file_get_last_error */
+os_file_t
+os_file_create_simple_no_error_handling_func(
+	const char*	name,
+	ulint		create_mode,
+	ulint		access_type,
+	bool		read_only,
+	bool*		success)
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Tries to disable OS caching on an opened file descriptor.
+@param[in]	fd		file descriptor to alter
+@param[in]	file_name	file name, used in the diagnostic message
+@param[in]	name		"open" or "create"; used in the diagnostic
+				message */
+void
+os_file_set_nocache(
+/*================*/
+	os_file_t	fd,		/*!< in: file descriptor to alter */
+	const char*	file_name,
+	const char*	operation_name);
+
+/** NOTE! Use the corresponding macro os_file_create(), not directly
+this function!
+Opens an existing file or creates a new.
+@param[in]	name		name of the file or path as a null-terminated
+				string
+@param[in]	create_mode	create mode
+@param[in]	purpose		OS_FILE_AIO, if asynchronous, non-buffered I/O
+				is desired, OS_FILE_NORMAL, if any normal file;
+				NOTE that it also depends on type, os_aio_..
+				and srv_.. variables whether we really use
+				async I/O or unbuffered I/O: look in the
+				function source code for the exact rules
+@param[in]	type		OS_DATA_FILE or OS_LOG_FILE
+@param[in]	read_only	if true read only mode checks are enforced
+@param[in]	success		true if succeeded
+@return own: handle to the file, not defined if error, error number
+	can be retrieved with os_file_get_last_error */
+os_file_t
+os_file_create_func(
+	const char*	name,
+	ulint		create_mode,
+	ulint		purpose,
+	ulint		type,
+	bool		read_only,
+	bool*		success)
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Deletes a file. The file has to be closed before calling this.
+@param[in]	name		file path as a null-terminated string
+@return true if success */
+bool
+os_file_delete_func(const char* name);
+
+/** Deletes a file if it exists. The file has to be closed before calling this.
+@param[in]	name		file path as a null-terminated string
+@param[out]	exist		indicate if file pre-exist
+@return true if success */
+bool
+os_file_delete_if_exists_func(const char* name, bool* exist);
+
+/** NOTE! Use the corresponding macro os_file_rename(), not directly
+this function!
+Renames a file (can also move it to another directory). It is safest that the
+file is closed before calling this function.
+@param[in]	oldpath		old file path as a null-terminated string
+@param[in]	newpath		new file path
+@return true if success */
+bool
+os_file_rename_func(const char* oldpath, const char* newpath);
+
+/** NOTE! Use the corresponding macro os_file_close(), not directly this
+function!
+Closes a file handle. In case of error, error number can be retrieved with
+os_file_get_last_error.
+@param[in]	file		own: handle to a file
+@return true if success */
+bool
+os_file_close_func(os_file_t file);
+
 #ifdef UNIV_PFS_IO
+
 /* Keys to register InnoDB I/O with performance schema */
-extern mysql_pfs_key_t	innodb_file_data_key;
-extern mysql_pfs_key_t	innodb_file_log_key;
-extern mysql_pfs_key_t	innodb_file_temp_key;
+extern mysql_pfs_key_t	innodb_data_file_key;
+extern mysql_pfs_key_t	innodb_log_file_key;
+extern mysql_pfs_key_t	innodb_temp_file_key;
 
 /* Following four macros are instumentations to register
 various file I/O operations with performance schema.
@@ -226,7 +1113,7 @@ are used to register file deletion operations*/
 do {									\
 	locker = PSI_FILE_CALL(get_thread_file_name_locker)(		\
 		state, key, op, name, &locker);				\
-	if (UNIV_LIKELY(locker != NULL)) {				\
+	if (locker != NULL) {						\
 		PSI_FILE_CALL(start_file_open_wait)(			\
 			locker, src_file, src_line);			\
 	}								\
@@ -234,7 +1121,7 @@ do {									\
 
 # define register_pfs_file_open_end(locker, file)			\
 do {									\
-	if (UNIV_LIKELY(locker != NULL)) {				\
+	if (locker != NULL) {						\
 		PSI_FILE_CALL(end_file_open_wait_and_bind_to_descriptor)(\
 			locker, file);					\
 	}								\
@@ -245,7 +1132,7 @@ do {									\
 do {									\
 	locker = PSI_FILE_CALL(get_thread_file_name_locker)(		\
 		state, key, op, name, &locker);				\
-	if (UNIV_LIKELY(locker != NULL)) {				\
+	if (locker != NULL) {						\
 		PSI_FILE_CALL(start_file_close_wait)(			\
 			locker, src_file, src_line);			\
 	}								\
@@ -253,7 +1140,7 @@ do {									\
 
 # define register_pfs_file_close_end(locker, result)			\
 do {									\
-	if (UNIV_LIKELY(locker != NULL)) {				\
+	if (locker != NULL) {						\
 		PSI_FILE_CALL(end_file_close_wait)(			\
 			locker, result);				\
 	}								\
@@ -264,7 +1151,7 @@ do {									\
 do {									\
 	locker = PSI_FILE_CALL(get_thread_file_descriptor_locker)(	\
 		state, file, op);					\
-	if (UNIV_LIKELY(locker != NULL)) {				\
+	if (locker != NULL) {						\
 		PSI_FILE_CALL(start_file_wait)(				\
 			locker, count, src_file, src_line);		\
 	}								\
@@ -272,11 +1159,10 @@ do {									\
 
 # define register_pfs_file_io_end(locker, count)			\
 do {									\
-	if (UNIV_LIKELY(locker != NULL)) {				\
+	if (locker != NULL) {						\
 		PSI_FILE_CALL(end_file_wait)(locker, count);		\
 	}								\
 } while (0)
-#endif /* UNIV_PFS_IO  */
 
 /* Following macros/functions are file I/O APIs that would be performance
 schema instrumented if "UNIV_PFS_IO" is defined. They would point to
@@ -294,39 +1180,40 @@ os_file_write
 
 The wrapper functions have the prefix of "innodb_". */
 
-#ifdef UNIV_PFS_IO
-# define os_file_create(key, name, create, purpose, type, success, atomic_writes)	\
+# define os_file_create(key, name, create, purpose, type, read_only,	\
+			success)					\
 	pfs_os_file_create_func(key, name, create, purpose,	type,	\
-				success, atomic_writes, __FILE__, __LINE__)
+		read_only, success, __FILE__, __LINE__)
 
-# define os_file_create_simple(key, name, create, access, success)	\
+# define os_file_create_simple(key, name, create, access,		\
+		read_only, success)					\
 	pfs_os_file_create_simple_func(key, name, create, access,	\
-				       success, __FILE__, __LINE__)
+		read_only, success, __FILE__, __LINE__)
 
 # define os_file_create_simple_no_error_handling(			\
-	key, name, create_mode, access, success, atomic_writes)		\
+	key, name, create_mode, access, read_only, success)		\
 	pfs_os_file_create_simple_no_error_handling_func(		\
-		key, name, create_mode, access, success, atomic_writes, __FILE__, __LINE__)
+		key, name, create_mode, access,				\
+		read_only, success, __FILE__, __LINE__)
 
 # define os_file_close(file)						\
 	pfs_os_file_close_func(file, __FILE__, __LINE__)
 
-# define os_aio(type, is_log, mode, name, file, buf, offset,		\
-	n, page_size, message1, message2, write_size)			\
-	pfs_os_aio_func(type, is_log, mode, name, file, buf, offset,	\
-		        n, page_size, message1, message2, write_size,	\
-		        __FILE__, __LINE__)
+# define os_aio(type, mode, name, file, buf, offset,			\
+	n, read_only, message1, message2, wsize)			\
+	pfs_os_aio_func(type, mode, name, file, buf, offset,		\
+		n, read_only, message1, message2, wsize,		\
+			__FILE__, __LINE__)
 
+# define os_file_read(type, file, buf, offset, n)			\
+	pfs_os_file_read_func(type, file, buf, offset, n, __FILE__, __LINE__)
 
-# define os_file_read(file, buf, offset, n)				\
-	pfs_os_file_read_func(file, buf, offset, n, __FILE__, __LINE__)
+# define os_file_read_no_error_handling(type, file, buf, offset, n, o)	\
+	pfs_os_file_read_no_error_handling_func(			\
+		type, file, buf, offset, n, o, __FILE__, __LINE__)
 
-# define os_file_read_no_error_handling(file, buf, offset, n)		\
-	pfs_os_file_read_no_error_handling_func(file, buf, offset, n,	\
-		                                __FILE__, __LINE__)
-
-# define os_file_write(name, file, buf, offset, n)			\
-	pfs_os_file_write_func(name, file, buf, offset,			\
+# define os_file_write(type, name, file, buf, offset, n)	\
+	pfs_os_file_write_func(type, name, file, buf, offset,	\
 			       n, __FILE__, __LINE__)
 
 # define os_file_flush(file)						\
@@ -338,38 +1225,337 @@ The wrapper functions have the prefix of "innodb_". */
 # define os_file_delete(key, name)					\
 	pfs_os_file_delete_func(key, name, __FILE__, __LINE__)
 
-# define os_file_delete_if_exists(key, name)				\
-	pfs_os_file_delete_if_exists_func(key, name, __FILE__, __LINE__)
+# define os_file_delete_if_exists(key, name, exist)			\
+	pfs_os_file_delete_if_exists_func(key, name, exist, __FILE__, __LINE__)
+
+/** NOTE! Please use the corresponding macro os_file_create_simple(),
+not directly this function!
+A performance schema instrumented wrapper function for
+os_file_create_simple() which opens or creates a file.
+@param[in]	key		Performance Schema Key
+@param[in]	name		name of the file or path as a null-terminated
+				string
+@param[in]	create_mode	create mode
+@param[in]	access_type	OS_FILE_READ_ONLY or OS_FILE_READ_WRITE
+@param[in]	read_only	if true read only mode checks are enforced
+@param[out]	success		true if succeeded
+@param[in]	src_file	file name where func invoked
+@param[in]	src_line	line where the func invoked
+@return own: handle to the file, not defined if error, error number
+	can be retrieved with os_file_get_last_error */
+UNIV_INLINE
+os_file_t
+pfs_os_file_create_simple_func(
+	mysql_pfs_key_t key,
+	const char*	name,
+	ulint		create_mode,
+	ulint		access_type,
+	bool		read_only,
+	bool*		success,
+	const char*	src_file,
+	ulint		src_line)
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** NOTE! Please use the corresponding macro
+os_file_create_simple_no_error_handling(), not directly this function!
+A performance schema instrumented wrapper function for
+os_file_create_simple_no_error_handling(). Add instrumentation to
+monitor file creation/open.
+@param[in]	key		Performance Schema Key
+@param[in]	name		name of the file or path as a null-terminated
+				string
+@param[in]	create_mode	create mode
+@param[in]	access_type	OS_FILE_READ_ONLY, OS_FILE_READ_WRITE, or
+				OS_FILE_READ_ALLOW_DELETE; the last option is
+				used by a backup program reading the file
+@param[in]	read_only	if true read only mode checks are enforced
+@param[out]	success		true if succeeded
+@param[in]	src_file	file name where func invoked
+@param[in]	src_line	line where the func invoked
+@return own: handle to the file, not defined if error, error number
+	can be retrieved with os_file_get_last_error */
+UNIV_INLINE
+os_file_t
+pfs_os_file_create_simple_no_error_handling_func(
+	mysql_pfs_key_t key,
+	const char*	name,
+	ulint		create_mode,
+	ulint		access_type,
+	bool		read_only,
+	bool*		success,
+	const char*	src_file,
+	ulint		src_line)
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** NOTE! Please use the corresponding macro os_file_create(), not directly
+this function!
+A performance schema wrapper function for os_file_create().
+Add instrumentation to monitor file creation/open.
+@param[in]	key		Performance Schema Key
+@param[in]	name		name of the file or path as a null-terminated
+				string
+@param[in]	create_mode	create mode
+@param[in]	purpose		OS_FILE_AIO, if asynchronous, non-buffered I/O
+				is desired, OS_FILE_NORMAL, if any normal file;
+				NOTE that it also depends on type, os_aio_..
+				and srv_.. variables whether we really use
+				async I/O or unbuffered I/O: look in the
+				function source code for the exact rules
+@param[in]	read_only	if true read only mode checks are enforced
+@param[out]	success		true if succeeded
+@param[in]	src_file	file name where func invoked
+@param[in]	src_line	line where the func invoked
+@return own: handle to the file, not defined if error, error number
+	can be retrieved with os_file_get_last_error */
+UNIV_INLINE
+os_file_t
+pfs_os_file_create_func(
+	mysql_pfs_key_t key,
+	const char*	name,
+	ulint		create_mode,
+	ulint		purpose,
+	ulint		type,
+	bool		read_only,
+	bool*		success,
+	const char*	src_file,
+	ulint		src_line)
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** NOTE! Please use the corresponding macro os_file_close(), not directly
+this function!
+A performance schema instrumented wrapper function for os_file_close().
+@param[in]	file		handle to a file
+@param[in]	src_file	file name where func invoked
+@param[in]	src_line	line where the func invoked
+@return true if success */
+UNIV_INLINE
+bool
+pfs_os_file_close_func(
+	os_file_t	file,
+	const char*	src_file,
+	ulint		src_line);
+
+/** NOTE! Please use the corresponding macro os_file_read(), not directly
+this function!
+This is the performance schema instrumented wrapper function for
+os_file_read() which requests a synchronous read operation.
+@param[in, out]	type		IO request context
+@param[in]	file		Open file handle
+@param[out]	buf		buffer where to read
+@param[in]	offset		file offset where to read
+@param[in]	n		number of bytes to read
+@param[in]	src_file	file name where func invoked
+@param[in]	src_line	line where the func invoked
+@return DB_SUCCESS if request was successful */
+UNIV_INLINE
+dberr_t
+pfs_os_file_read_func(
+	IORequest&	type,
+	os_file_t	file,
+	void*		buf,
+	os_offset_t	offset,
+	ulint		n,
+	const char*	src_file,
+	ulint		src_line);
+
+/** NOTE! Please use the corresponding macro os_file_read_no_error_handling(),
+not directly this function!
+This is the performance schema instrumented wrapper function for
+os_file_read_no_error_handling_func() which requests a synchronous
+read operation.
+@param[in, out]	type		IO request context
+@param[in]	file		Open file handle
+@param[out]	buf		buffer where to read
+@param[in]	offset		file offset where to read
+@param[in]	n		number of bytes to read
+@param[out]	o		number of bytes actually read
+@param[in]	src_file	file name where func invoked
+@param[in]	src_line	line where the func invoked
+@return DB_SUCCESS if request was successful */
+UNIV_INLINE
+dberr_t
+pfs_os_file_read_no_error_handling_func(
+	IORequest&	type,
+	os_file_t	file,
+	void*		buf,
+	os_offset_t	offset,
+	ulint		n,
+	ulint*		o,
+	const char*	src_file,
+	ulint		src_line);
+
+/** NOTE! Please use the corresponding macro os_aio(), not directly this
+function!
+Performance schema wrapper function of os_aio() which requests
+an asynchronous I/O operation.
+@param[in]	type		IO request context
+@param[in]	mode		IO mode
+@param[in]	name		Name of the file or path as NUL terminated
+				string
+@param[in]	file		Open file handle
+@param[out]	buf		buffer where to read
+@param[in]	offset		file offset where to read
+@param[in]	n		number of bytes to read
+@param[in]	read_only	if true read only mode checks are enforced
+@param[in,out]	m1		Message for the AIO handler, (can be used to
+				identify a completed AIO operation); ignored
+				if mode is OS_AIO_SYNC
+@param[in,out]	m2		message for the AIO handler (can be used to
+				identify a completed AIO operation); ignored
+				if mode is OS_AIO_SYNC
+@param[in]	src_file	file name where func invoked
+@param[in]	src_line	line where the func invoked
+@return DB_SUCCESS if request was queued successfully, FALSE if fail */
+UNIV_INLINE
+dberr_t
+pfs_os_aio_func(
+	IORequest&	type,
+	ulint		mode,
+	const char*	name,
+	os_file_t	file,
+	void*		buf,
+	os_offset_t	offset,
+	ulint		n,
+	bool		read_only,
+	fil_node_t*	m1,
+	void*		m2,
+	ulint*		wsize,
+	const char*	src_file,
+	ulint		src_line);
+
+/** NOTE! Please use the corresponding macro os_file_write(), not directly
+this function!
+This is the performance schema instrumented wrapper function for
+os_file_write() which requests a synchronous write operation.
+@param[in, out]	type		IO request context
+@param[in]	name		Name of the file or path as NUL terminated
+				string
+@param[in]	file		Open file handle
+@param[out]	buf		buffer where to read
+@param[in]	offset		file offset where to read
+@param[in]	n		number of bytes to read
+@param[in]	src_file	file name where func invoked
+@param[in]	src_line	line where the func invoked
+@return DB_SUCCESS if request was successful */
+UNIV_INLINE
+dberr_t
+pfs_os_file_write_func(
+	IORequest&	type,
+	const char*	name,
+	os_file_t	file,
+	const void*	buf,
+	os_offset_t	offset,
+	ulint		n,
+	const char*	src_file,
+	ulint		src_line);
+
+/** NOTE! Please use the corresponding macro os_file_flush(), not directly
+this function!
+This is the performance schema instrumented wrapper function for
+os_file_flush() which flushes the write buffers of a given file to the disk.
+Flushes the write buffers of a given file to the disk.
+@param[in]	file		Open file handle
+@param[in]	src_file	file name where func invoked
+@param[in]	src_line	line where the func invoked
+@return TRUE if success */
+UNIV_INLINE
+bool
+pfs_os_file_flush_func(
+	os_file_t	file,
+	const char*	src_file,
+	ulint		src_line);
+
+/** NOTE! Please use the corresponding macro os_file_rename(), not directly
+this function!
+This is the performance schema instrumented wrapper function for
+os_file_rename()
+@param[in]	key		Performance Schema Key
+@param[in]	oldpath		old file path as a null-terminated string
+@param[in]	newpath		new file path
+@param[in]	src_file	file name where func invoked
+@param[in]	src_line	line where the func invoked
+@return true if success */
+UNIV_INLINE
+bool
+pfs_os_file_rename_func(
+	mysql_pfs_key_t	key,
+	const char*	oldpath,
+	const char*	newpath,
+	const char*	src_file,
+	ulint		src_line);
+
+/**
+NOTE! Please use the corresponding macro os_file_delete(), not directly
+this function!
+This is the performance schema instrumented wrapper function for
+os_file_delete()
+@param[in]	key		Performance Schema Key
+@param[in]	name		old file path as a null-terminated string
+@param[in]	src_file	file name where func invoked
+@param[in]	src_line	line where the func invoked
+@return true if success */
+UNIV_INLINE
+bool
+pfs_os_file_delete_func(
+	mysql_pfs_key_t	key,
+	const char*	name,
+	const char*	src_file,
+	ulint		src_line);
+
+/**
+NOTE! Please use the corresponding macro os_file_delete_if_exists(), not
+directly this function!
+This is the performance schema instrumented wrapper function for
+os_file_delete_if_exists()
+@param[in]	key		Performance Schema Key
+@param[in]	name		old file path as a null-terminated string
+@param[in]	exist		indicate if file pre-exist
+@param[in]	src_file	file name where func invoked
+@param[in]	src_line	line where the func invoked
+@return true if success */
+UNIV_INLINE
+bool
+pfs_os_file_delete_if_exists_func(
+	mysql_pfs_key_t	key,
+	const char*	name,
+	bool*		exist,
+	const char*	src_file,
+	ulint		src_line);
+
 #else /* UNIV_PFS_IO */
 
 /* If UNIV_PFS_IO is not defined, these I/O APIs point
 to original un-instrumented file I/O APIs */
-# define os_file_create(key, name, create, purpose, type, success, atomic_writes)	\
-	os_file_create_func(name, create, purpose, type, success, atomic_writes)
+# define os_file_create(key, name, create, purpose, type, read_only,	\
+			success)					\
+	os_file_create_func(name, create, purpose, type, read_only,	\
+			success)
 
-# define os_file_create_simple(key, name, create_mode, access, success) \
-	os_file_create_simple_func(name, create_mode, access, success)
+# define os_file_create_simple(key, name, create_mode, access,		\
+		read_only, success)					\
+	os_file_create_simple_func(name, create_mode, access,		\
+		read_only, success)
 
 # define os_file_create_simple_no_error_handling(			\
-	key, name, create_mode, access, success, atomic_writes)		\
-		os_file_create_simple_no_error_handling_func(		\
-			name, create_mode, access, success, atomic_writes)
+	key, name, create_mode, access, read_only, success)		\
+	os_file_create_simple_no_error_handling_func(			\
+		name, create_mode, access, read_only, success)
 
 # define os_file_close(file)	os_file_close_func(file)
 
-# define os_aio(type, is_log, mode, name, file, buf, offset, n, page_size, message1, \
-	message2, write_size)						\
-	os_aio_func(type, is_log, mode, name, file, buf, offset, n,	\
-		page_size, message1, message2, write_size)
+# define os_aio(type, mode, name, file, buf, offset,			\
+	n, read_only, message1, message2, wsize)			\
+	os_aio_func(type, mode, name, file, buf, offset,		\
+		n, read_only, message1, message2, wsize)
 
-# define os_file_read(file, buf, offset, n)				\
-	os_file_read_func(file, buf, offset, n)
+# define os_file_read(type, file, buf, offset, n)			\
+	os_file_read_func(type, file, buf, offset, n)
 
-# define os_file_read_no_error_handling(file, buf, offset, n)		\
-	os_file_read_no_error_handling_func(file, buf, offset, n)
+# define os_file_read_no_error_handling(type, file, buf, offset, n, o)	\
+	os_file_read_no_error_handling_func(type, file, buf, offset, n, o)
 
-# define os_file_write(name, file, buf, offset, n)			\
-	os_file_write_func(name, file, buf, offset, n)
+# define os_file_write(type, name, file, buf, offset, n)		\
+	os_file_write_func(type, name, file, buf, offset, n)
 
 # define os_file_flush(file)	os_file_flush_func(file)
 
@@ -378,649 +1564,172 @@ to original un-instrumented file I/O APIs */
 
 # define os_file_delete(key, name)	os_file_delete_func(name)
 
-# define os_file_delete_if_exists(key, name)				\
-	os_file_delete_if_exists_func(name)
+# define os_file_delete_if_exists(key, name, exist)			\
+	os_file_delete_if_exists_func(name, exist)
 
-#endif /* UNIV_PFS_IO */
-
-/* File types for directory entry data type */
-
-enum os_file_type_t {
-	OS_FILE_TYPE_UNKNOWN = 0,
-	OS_FILE_TYPE_FILE,			/* regular file
-						(or a character/block device) */
-	OS_FILE_TYPE_DIR,			/* directory */
-	OS_FILE_TYPE_LINK			/* symbolic link */
-};
-
-/* Maximum path string length in bytes when referring to tables with in the
-'./databasename/tablename.ibd' path format; we can allocate at least 2 buffers
-of this size from the thread stack; that is why this should not be made much
-bigger than 4000 bytes */
-#define OS_FILE_MAX_PATH	4000
-
-/** Struct used in fetching information of a file in a directory */
-struct os_file_stat_t {
-	char		name[OS_FILE_MAX_PATH];	/*!< path to a file */
-	os_file_type_t	type;			/*!< file type */
-	ib_int64_t	size;			/*!< file size */
-	time_t		ctime;			/*!< creation time */
-	time_t		mtime;			/*!< modification time */
-	time_t		atime;			/*!< access time */
-	bool		rw_perm;		/*!< true if can be opened
-						in read-write mode. Only valid
-						if type == OS_FILE_TYPE_FILE */
-};
-
-#ifdef __WIN__
-typedef HANDLE	os_file_dir_t;	/*!< directory stream */
-#else
-typedef DIR*	os_file_dir_t;	/*!< directory stream */
-#endif
-
-#ifdef __WIN__
-/***********************************************************************//**
-Gets the operating system version. Currently works only on Windows.
-@return	OS_WIN95, OS_WIN31, OS_WINNT, OS_WIN2000, OS_WINXP, OS_WINVISTA,
-OS_WIN7. */
-UNIV_INTERN
-ulint
-os_get_os_version(void);
-/*===================*/
-#endif /* __WIN__ */
-#ifndef UNIV_HOTBACKUP
-/****************************************************************//**
-Creates the seek mutexes used in positioned reads and writes. */
-UNIV_INTERN
-void
-os_io_init_simple(void);
-/*===================*/
-
-
-/** Create a temporary file. This function is like tmpfile(3), but
-the temporary file is created in the given parameter path. If the path
-is null then it will create the file in the mysql server configuration
-parameter (--tmpdir).
-@param[in]	path	location for creating temporary file
-@return temporary file handle, or NULL on error */
-UNIV_INTERN
-FILE*
-os_file_create_tmpfile(
-	const char*	path);
-
-#endif /* !UNIV_HOTBACKUP */
-/***********************************************************************//**
-The os_file_opendir() function opens a directory stream corresponding to the
-directory named by the dirname argument. The directory stream is positioned
-at the first entry. In both Unix and Windows we automatically skip the '.'
-and '..' items at the start of the directory listing.
-@return	directory stream, NULL if error */
-UNIV_INTERN
-os_file_dir_t
-os_file_opendir(
-/*============*/
-	const char*	dirname,	/*!< in: directory name; it must not
-					contain a trailing '\' or '/' */
-	ibool		error_is_fatal);/*!< in: TRUE if we should treat an
-					error as a fatal error; if we try to
-					open symlinks then we do not wish a
-					fatal error if it happens not to be
-					a directory */
-/***********************************************************************//**
-Closes a directory stream.
-@return	0 if success, -1 if failure */
-UNIV_INTERN
-int
-os_file_closedir(
-/*=============*/
-	os_file_dir_t	dir);	/*!< in: directory stream */
-/***********************************************************************//**
-This function returns information of the next file in the directory. We jump
-over the '.' and '..' entries in the directory.
-@return	0 if ok, -1 if error, 1 if at the end of the directory */
-UNIV_INTERN
-int
-os_file_readdir_next_file(
-/*======================*/
-	const char*	dirname,/*!< in: directory name or path */
-	os_file_dir_t	dir,	/*!< in: directory stream */
-	os_file_stat_t*	info);	/*!< in/out: buffer where the info is returned */
-/*****************************************************************//**
-This function attempts to create a directory named pathname. The new directory
-gets default permissions. On Unix, the permissions are (0770 & ~umask). If the
-directory exists already, nothing is done and the call succeeds, unless the
-fail_if_exists arguments is true.
-@return	TRUE if call succeeds, FALSE on error */
-UNIV_INTERN
-ibool
-os_file_create_directory(
-/*=====================*/
-	const char*	pathname,	/*!< in: directory name as
-					null-terminated string */
-	ibool		fail_if_exists);/*!< in: if TRUE, pre-existing directory
-					is treated as an error. */
-/****************************************************************//**
-NOTE! Use the corresponding macro os_file_create_simple(), not directly
-this function!
-A simple function to open or create a file.
-@return own: handle to the file, not defined if error, error number
-can be retrieved with os_file_get_last_error */
-UNIV_INTERN
-os_file_t
-os_file_create_simple_func(
-/*=======================*/
-	const char*	name,	/*!< in: name of the file or path as a
-				null-terminated string */
-	ulint		create_mode,/*!< in: create mode */
-	ulint		access_type,/*!< in: OS_FILE_READ_ONLY or
-				OS_FILE_READ_WRITE */
-	ibool*		success);/*!< out: TRUE if succeed, FALSE if error */
-/****************************************************************//**
-NOTE! Use the corresponding macro
-os_file_create_simple_no_error_handling(), not directly this function!
-A simple function to open or create a file.
-@return own: handle to the file, not defined if error, error number
-can be retrieved with os_file_get_last_error */
-UNIV_INTERN
-os_file_t
-os_file_create_simple_no_error_handling_func(
-/*=========================================*/
-	const char*	name,	/*!< in: name of the file or path as a
-				null-terminated string */
-	ulint		create_mode,/*!< in: create mode */
-	ulint		access_type,/*!< in: OS_FILE_READ_ONLY,
-				OS_FILE_READ_WRITE, or
-				OS_FILE_READ_ALLOW_DELETE; the last option is
-				used by a backup program reading the file */
-	ibool*		success,/*!< out: TRUE if succeed, FALSE if error */
-	ulint		atomic_writes)/*!< in: atomic writes table option
-				      value */
-	__attribute__((nonnull, warn_unused_result));
-/****************************************************************//**
-Tries to disable OS caching on an opened file descriptor. */
-UNIV_INTERN
-void
-os_file_set_nocache(
-/*================*/
-	os_file_t	fd,		/*!< in: file descriptor to alter */
-	const char*	file_name,	/*!< in: file name, used in the
-					diagnostic message */
-	const char*	operation_name);/*!< in: "open" or "create"; used in the
-					diagnostic message */
-/****************************************************************//**
-NOTE! Use the corresponding macro os_file_create(), not directly
-this function!
-Opens an existing file or creates a new.
-@return own: handle to the file, not defined if error, error number
-can be retrieved with os_file_get_last_error */
-UNIV_INTERN
-os_file_t
-os_file_create_func(
-/*================*/
-	const char*	name,	/*!< in: name of the file or path as a
-				null-terminated string */
-	ulint		create_mode,/*!< in: create mode */
-	ulint		purpose,/*!< in: OS_FILE_AIO, if asynchronous,
-				non-buffered i/o is desired,
-				OS_FILE_NORMAL, if any normal file;
-				NOTE that it also depends on type, os_aio_..
-				and srv_.. variables whether we really use
-				async i/o or unbuffered i/o: look in the
-				function source code for the exact rules */
-	ulint		type,	/*!< in: OS_DATA_FILE or OS_LOG_FILE */
-	ibool*		success,/*!< out: TRUE if succeed, FALSE if error */
-	ulint		atomic_writes)/*!< in: atomic writes table option
-				      value */
-	__attribute__((nonnull, warn_unused_result));
-/***********************************************************************//**
-Deletes a file. The file has to be closed before calling this.
-@return	TRUE if success */
-UNIV_INTERN
-bool
-os_file_delete_func(
-/*================*/
-	const char*	name);	/*!< in: file path as a null-terminated
-				string */
-
-/***********************************************************************//**
-Deletes a file if it exists. The file has to be closed before calling this.
-@return	TRUE if success */
-UNIV_INTERN
-bool
-os_file_delete_if_exists_func(
-/*==========================*/
-	const char*	name);	/*!< in: file path as a null-terminated
-				string */
-/***********************************************************************//**
-NOTE! Use the corresponding macro os_file_rename(), not directly
-this function!
-Renames a file (can also move it to another directory). It is safest that the
-file is closed before calling this function.
-@return	TRUE if success */
-UNIV_INTERN
-ibool
-os_file_rename_func(
-/*================*/
-	const char*	oldpath,	/*!< in: old file path as a
-					null-terminated string */
-	const char*	newpath);	/*!< in: new file path */
-/***********************************************************************//**
-NOTE! Use the corresponding macro os_file_close(), not directly this
-function!
-Closes a file handle. In case of error, error number can be retrieved with
-os_file_get_last_error.
-@return	TRUE if success */
-UNIV_INTERN
-ibool
-os_file_close_func(
-/*===============*/
-	os_file_t	file);	/*!< in, own: handle to a file */
-
-#ifdef UNIV_PFS_IO
-/****************************************************************//**
-NOTE! Please use the corresponding macro os_file_create_simple(),
-not directly this function!
-A performance schema instrumented wrapper function for
-os_file_create_simple() which opens or creates a file.
-@return own: handle to the file, not defined if error, error number
-can be retrieved with os_file_get_last_error */
-UNIV_INLINE
-os_file_t
-pfs_os_file_create_simple_func(
-/*===========================*/
-	mysql_pfs_key_t key,	/*!< in: Performance Schema Key */
-	const char*	name,	/*!< in: name of the file or path as a
-				null-terminated string */
-	ulint		create_mode,/*!< in: create mode */
-	ulint		access_type,/*!< in: OS_FILE_READ_ONLY or
-				OS_FILE_READ_WRITE */
-	ibool*		success,/*!< out: TRUE if succeed, FALSE if error */
-	const char*	src_file,/*!< in: file name where func invoked */
-	ulint		src_line)/*!< in: line where the func invoked */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-/****************************************************************//**
-NOTE! Please use the corresponding macro
-os_file_create_simple_no_error_handling(), not directly this function!
-A performance schema instrumented wrapper function for
-os_file_create_simple_no_error_handling(). Add instrumentation to
-monitor file creation/open.
-@return own: handle to the file, not defined if error, error number
-can be retrieved with os_file_get_last_error */
-UNIV_INLINE
-os_file_t
-pfs_os_file_create_simple_no_error_handling_func(
-/*=============================================*/
-	mysql_pfs_key_t key,	/*!< in: Performance Schema Key */
-	const char*	name,	/*!< in: name of the file or path as a
-				null-terminated string */
-	ulint		create_mode, /*!< in: file create mode */
-	ulint		access_type,/*!< in: OS_FILE_READ_ONLY,
-				OS_FILE_READ_WRITE, or
-				OS_FILE_READ_ALLOW_DELETE; the last option is
-				used by a backup program reading the file */
-	ibool*		success,/*!< out: TRUE if succeed, FALSE if error */
-	ulint		atomic_writes,/*!< in: atomic writes table option
-				value */
-	const char*	src_file,/*!< in: file name where func invoked */
-	ulint		src_line)/*!< in: line where the func invoked */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-/****************************************************************//**
-NOTE! Please use the corresponding macro os_file_create(), not directly
-this function!
-A performance schema wrapper function for os_file_create().
-Add instrumentation to monitor file creation/open.
-@return own: handle to the file, not defined if error, error number
-can be retrieved with os_file_get_last_error */
-UNIV_INLINE
-os_file_t
-pfs_os_file_create_func(
-/*====================*/
-	mysql_pfs_key_t key,	/*!< in: Performance Schema Key */
-	const char*	name,	/*!< in: name of the file or path as a
-				null-terminated string */
-	ulint		create_mode,/*!< in: file create mode */
-	ulint		purpose,/*!< in: OS_FILE_AIO, if asynchronous,
-				non-buffered i/o is desired,
-				OS_FILE_NORMAL, if any normal file;
-				NOTE that it also depends on type, os_aio_..
-				and srv_.. variables whether we really use
-				async i/o or unbuffered i/o: look in the
-				function source code for the exact rules */
-	ulint		type,	/*!< in: OS_DATA_FILE or OS_LOG_FILE */
-	ibool*		success,/*!< out: TRUE if succeed, FALSE if error */
-	ulint		atomic_writes,/*!< in: atomic writes table option
-				      value*/
-	const char*	src_file,/*!< in: file name where func invoked */
-	ulint		src_line)/*!< in: line where the func invoked */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-/***********************************************************************//**
-NOTE! Please use the corresponding macro os_file_close(), not directly
-this function!
-A performance schema instrumented wrapper function for os_file_close().
-@return TRUE if success */
-UNIV_INLINE
-ibool
-pfs_os_file_close_func(
-/*===================*/
-        os_file_t	file,	/*!< in, own: handle to a file */
-	const char*	src_file,/*!< in: file name where func invoked */
-	ulint		src_line);/*!< in: line where the func invoked */
-/*******************************************************************//**
-NOTE! Please use the corresponding macro os_file_read(), not directly
-this function!
-This is the performance schema instrumented wrapper function for
-os_file_read() which requests a synchronous read operation.
-@return	TRUE if request was successful, FALSE if fail */
-UNIV_INLINE
-ibool
-pfs_os_file_read_func(
-/*==================*/
-	os_file_t	file,	/*!< in: handle to a file */
-	void*		buf,	/*!< in: buffer where to read */
-	os_offset_t	offset,	/*!< in: file offset where to read */
-	ulint		n,	/*!< in: number of bytes to read */
-	const char*	src_file,/*!< in: file name where func invoked */
-	ulint		src_line);/*!< in: line where the func invoked */
-
-/*******************************************************************//**
-NOTE! Please use the corresponding macro os_file_read_no_error_handling(),
-not directly this function!
-This is the performance schema instrumented wrapper function for
-os_file_read_no_error_handling_func() which requests a synchronous
-read operation.
-@return	TRUE if request was successful, FALSE if fail */
-UNIV_INLINE
-ibool
-pfs_os_file_read_no_error_handling_func(
-/*====================================*/
-	os_file_t	file,	/*!< in: handle to a file */
-	void*		buf,	/*!< in: buffer where to read */
-	os_offset_t	offset,	/*!< in: file offset where to read */
-	ulint		n,	/*!< in: number of bytes to read */
-	const char*	src_file,/*!< in: file name where func invoked */
-	ulint		src_line);/*!< in: line where the func invoked */
-
-/*******************************************************************//**
-NOTE! Please use the corresponding macro os_aio(), not directly this
-function!
-Performance schema wrapper function of os_aio() which requests
-an asynchronous i/o operation.
-@return TRUE if request was queued successfully, FALSE if fail */
-UNIV_INLINE
-ibool
-pfs_os_aio_func(
-/*============*/
-	ulint		type,	/*!< in: OS_FILE_READ or OS_FILE_WRITE */
-	ulint		is_log,	/*!< in: 1 is OS_FILE_LOG or 0 */
-	ulint		mode,	/*!< in: OS_AIO_NORMAL etc. I/O mode */
-	const char*	name,	/*!< in: name of the file or path as a
-				null-terminated string */
-	os_file_t	file,	/*!< in: handle to a file */
-	void*		buf,	/*!< in: buffer where to read or from which
-				to write */
-	os_offset_t	offset,	/*!< in: file offset where to read or write */
-	ulint		n,	/*!< in: number of bytes to read or write */
-	ulint           page_size, /*!< in: page size in bytes */
-	fil_node_t*	message1,/*!< in: message for the aio handler
-				(can be used to identify a completed
-				aio operation); ignored if mode is
-				OS_AIO_SYNC */
-	void*		message2,/*!< in: message for the aio handler
-				(can be used to identify a completed
-				aio operation); ignored if mode is
-                                OS_AIO_SYNC */
-	ulint*		write_size,/*!< in/out: Actual write size initialized
-			       after fist successfull trim
-			       operation for this page and if
-			       initialized we do not trim again if
-			       actual page size does not decrease. */
-	const char*	src_file,/*!< in: file name where func invoked */
-	ulint		src_line);/*!< in: line where the func invoked */
-/*******************************************************************//**
-NOTE! Please use the corresponding macro os_file_write(), not directly
-this function!
-This is the performance schema instrumented wrapper function for
-os_file_write() which requests a synchronous write operation.
-@return	TRUE if request was successful, FALSE if fail */
-UNIV_INLINE
-ibool
-pfs_os_file_write_func(
-/*===================*/
-	const char*	name,	/*!< in: name of the file or path as a
-				null-terminated string */
-	os_file_t	file,	/*!< in: handle to a file */
-	const void*	buf,	/*!< in: buffer from which to write */
-	os_offset_t	offset,	/*!< in: file offset where to write */
-	ulint		n,	/*!< in: number of bytes to write */
-	const char*	src_file,/*!< in: file name where func invoked */
-	ulint		src_line);/*!< in: line where the func invoked */
-/***********************************************************************//**
-NOTE! Please use the corresponding macro os_file_flush(), not directly
-this function!
-This is the performance schema instrumented wrapper function for
-os_file_flush() which flushes the write buffers of a given file to the disk.
-Flushes the write buffers of a given file to the disk.
-@return TRUE if success */
-UNIV_INLINE
-ibool
-pfs_os_file_flush_func(
-/*===================*/
-	os_file_t	file,	/*!< in, own: handle to a file */
-	const char*	src_file,/*!< in: file name where func invoked */
-	ulint		src_line);/*!< in: line where the func invoked */
-
-/***********************************************************************//**
-NOTE! Please use the corresponding macro os_file_rename(), not directly
-this function!
-This is the performance schema instrumented wrapper function for
-os_file_rename()
-@return TRUE if success */
-UNIV_INLINE
-ibool
-pfs_os_file_rename_func(
-/*====================*/
-	mysql_pfs_key_t	key,	/*!< in: Performance Schema Key */
-	const char*	oldpath,/*!< in: old file path as a null-terminated
-				string */
-	const char*	newpath,/*!< in: new file path */
-	const char*	src_file,/*!< in: file name where func invoked */
-	ulint		src_line);/*!< in: line where the func invoked */
-
-/***********************************************************************//**
-NOTE! Please use the corresponding macro os_file_delete(), not directly
-this function!
-This is the performance schema instrumented wrapper function for
-os_file_delete()
-@return TRUE if success */
-UNIV_INLINE
-bool
-pfs_os_file_delete_func(
-/*====================*/
-	mysql_pfs_key_t	key,	/*!< in: Performance Schema Key */
-	const char*	name,	/*!< in: old file path as a null-terminated
-				string */
-	const char*	src_file,/*!< in: file name where func invoked */
-	ulint		src_line);/*!< in: line where the func invoked */
-
-/***********************************************************************//**
-NOTE! Please use the corresponding macro os_file_delete_if_exists(), not
-directly this function!
-This is the performance schema instrumented wrapper function for
-os_file_delete_if_exists()
-@return TRUE if success */
-UNIV_INLINE
-bool
-pfs_os_file_delete_if_exists_func(
-/*==============================*/
-	mysql_pfs_key_t	key,	/*!< in: Performance Schema Key */
-	const char*	name,	/*!< in: old file path as a null-terminated
-				string */
-	const char*	src_file,/*!< in: file name where func invoked */
-	ulint		src_line);/*!< in: line where the func invoked */
 #endif	/* UNIV_PFS_IO */
 
 #ifdef UNIV_HOTBACKUP
-/***********************************************************************//**
-Closes a file handle.
-@return	TRUE if success */
-UNIV_INTERN
-ibool
-os_file_close_no_error_handling(
-/*============================*/
-	os_file_t	file);	/*!< in, own: handle to a file */
+/** Closes a file handle.
+@param[in] file		handle to a file
+@return true if success */
+bool
+os_file_close_no_error_handling(os_file_t file);
 #endif /* UNIV_HOTBACKUP */
-/***********************************************************************//**
-Gets a file size.
-@return	file size, or (os_offset_t) -1 on failure */
-UNIV_INTERN
+
+/** Gets a file size.
+@param[in]	file		handle to a file
+@return file size if OK, else set m_total_size to ~0 and m_alloc_size
+	to errno */
+os_file_size_t
+os_file_get_size(
+	const char*	filename)
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Gets a file size.
+@param[in]	file		handle to a file
+@return file size, or (os_offset_t) -1 on failure */
 os_offset_t
 os_file_get_size(
-/*=============*/
-	os_file_t	file)	/*!< in: handle to a file */
+	os_file_t	file)
 	MY_ATTRIBUTE((warn_unused_result));
-/***********************************************************************//**
-Write the specified number of zeros to a newly created file.
-@return	TRUE if success */
-UNIV_INTERN
-ibool
+
+/** Write the specified number of zeros to a newly created file.
+@param[in]	name		name of the file or path as a null-terminated
+				string
+@param[in]	file		handle to a file
+@param[in]	size		file size
+@param[in]	read_only	Enable read-only checks if true
+@return true if success */
+bool
 os_file_set_size(
-/*=============*/
-	const char*	name,	/*!< in: name of the file or path as a
-				null-terminated string */
-	os_file_t	file,	/*!< in: handle to a file */
-	os_offset_t	size)	/*!< in: file size */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
-/***********************************************************************//**
-Truncates a file at its current position.
-@return	TRUE if success */
-UNIV_INTERN
-ibool
+	const char*	name,
+	os_file_t	file,
+	os_offset_t	size,
+	bool		read_only)
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Truncates a file at its current position.
+@param[in/out]	file	file to be truncated
+@return true if success */
+bool
 os_file_set_eof(
-/*============*/
 	FILE*		file);	/*!< in: file to be truncated */
-/***********************************************************************//**
-NOTE! Use the corresponding macro os_file_flush(), not directly this function!
+
+/** Truncates a file to a specified size in bytes. Do nothing if the size
+preserved is smaller or equal than current size of file.
+@param[in]	pathname	file path
+@param[in]	file		file to be truncated
+@param[in]	size		size preserved in bytes
+@return true if success */
+bool
+os_file_truncate(
+	const char*	pathname,
+	os_file_t	file,
+	os_offset_t	size);
+
+/** NOTE! Use the corresponding macro os_file_flush(), not directly this
+function!
 Flushes the write buffers of a given file to the disk.
-@return	TRUE if success */
-UNIV_INTERN
-ibool
+@param[in]	file		handle to a file
+@return true if success */
+bool
 os_file_flush_func(
-/*===============*/
-	os_file_t	file);	/*!< in, own: handle to a file */
-/***********************************************************************//**
-Retrieves the last error number if an error occurs in a file io function.
+	os_file_t	file);
+
+/** Retrieves the last error number if an error occurs in a file io function.
 The number should be retrieved before any other OS calls (because they may
 overwrite the error number). If the number is not known to this program,
 the OS error number + 100 is returned.
-@return	error number, or OS error number + 100 */
-UNIV_INTERN
+@param[in]	report		true if we want an error message printed
+				for all errors
+@return error number, or OS error number + 100 */
 ulint
 os_file_get_last_error(
-/*===================*/
-	bool	report_all_errors);	/*!< in: TRUE if we want an error message
-					printed of all errors */
-/*******************************************************************//**
-NOTE! Use the corresponding macro os_file_read(), not directly this function!
+	bool		report);
+
+/** NOTE! Use the corresponding macro os_file_read(), not directly this
+function!
 Requests a synchronous read operation.
-@return	TRUE if request was successful, FALSE if fail */
-UNIV_INTERN
-ibool
+@param[in]	type		IO request context
+@param[in]	file		Open file handle
+@param[out]	buf		buffer where to read
+@param[in]	offset		file offset where to read
+@param[in]	n		number of bytes to read
+@return DB_SUCCESS if request was successful */
+dberr_t
 os_file_read_func(
-/*==============*/
-	os_file_t	file,	/*!< in: handle to a file */
-	void*		buf,	/*!< in: buffer where to read */
-	os_offset_t	offset,	/*!< in: file offset where to read */
-	ulint		n);	/*!< in: number of bytes to read */
-/*******************************************************************//**
-Rewind file to its start, read at most size - 1 bytes from it to str, and
+	IORequest&	type,
+	os_file_t	file,
+	void*		buf,
+	os_offset_t	offset,
+	ulint		n)
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Rewind file to its start, read at most size - 1 bytes from it to str, and
 NUL-terminate str. All errors are silently ignored. This function is
-mostly meant to be used with temporary files. */
-UNIV_INTERN
+mostly meant to be used with temporary files.
+@param[in,out]	file		file to read from
+@param[in,out]	str		buffer where to read
+@param[in]	size		size of buffer */
 void
 os_file_read_string(
-/*================*/
-	FILE*	file,	/*!< in: file to read from */
-	char*	str,	/*!< in: buffer where to read */
-	ulint	size);	/*!< in: size of buffer */
-/*******************************************************************//**
-NOTE! Use the corresponding macro os_file_read_no_error_handling(),
+	FILE*		file,
+	char*		str,
+	ulint		size);
+
+/** NOTE! Use the corresponding macro os_file_read_no_error_handling(),
 not directly this function!
 Requests a synchronous positioned read operation. This function does not do
 any error handling. In case of error it returns FALSE.
-@return	TRUE if request was successful, FALSE if fail */
-UNIV_INTERN
-ibool
+@param[in]	type		IO request context
+@param[in]	file		Open file handle
+@param[out]	buf		buffer where to read
+@param[in]	offset		file offset where to read
+@param[in]	n		number of bytes to read
+@param[out]	o		number of bytes actually read
+@return DB_SUCCESS or error code */
+dberr_t
 os_file_read_no_error_handling_func(
-/*================================*/
-	os_file_t	file,	/*!< in: handle to a file */
-	void*		buf,	/*!< in: buffer where to read */
-	os_offset_t	offset,	/*!< in: file offset where to read */
-	ulint		n);	/*!< in: number of bytes to read */
+	IORequest&	type,
+	os_file_t	file,
+	void*		buf,
+	os_offset_t	offset,
+	ulint		n,
+	ulint*		o)
+	MY_ATTRIBUTE((warn_unused_result));
 
-/*******************************************************************//**
-NOTE! Use the corresponding macro os_file_write(), not directly this
+/** NOTE! Use the corresponding macro os_file_write(), not directly this
 function!
 Requests a synchronous write operation.
-@return	TRUE if request was successful, FALSE if fail */
-UNIV_INTERN
-ibool
+@param[in,out]	type		IO request context
+@param[in]	file		Open file handle
+@param[out]	buf		buffer where to read
+@param[in]	offset		file offset where to read
+@param[in]	n		number of bytes to read
+@return DB_SUCCESS if request was successful */
+dberr_t
 os_file_write_func(
-/*===============*/
-	const char*	name,	/*!< in: name of the file or path as a
-				null-terminated string */
-	os_file_t	file,	/*!< in: handle to a file */
-	const void*	buf,	/*!< in: buffer from which to write */
-	os_offset_t	offset,	/*!< in: file offset where to write */
-	ulint		n);	/*!< in: number of bytes to write */
+	IORequest&	type,
+	const char*	name,
+	os_file_t	file,
+	const void*	buf,
+	os_offset_t	offset,
+	ulint		n)
+	MY_ATTRIBUTE((warn_unused_result));
 
-/*******************************************************************//**
-Check the existence and type of the given file.
-@return	TRUE if call succeeded */
-UNIV_INTERN
-ibool
+/** Check the existence and type of the given file.
+@param[in]	path		pathname of the file
+@param[out]	exists		true if file exists
+@param[out]	type		type of the file (if it exists)
+@return true if call succeeded */
+bool
 os_file_status(
-/*===========*/
-	const char*	path,	/*!< in:	pathname of the file */
-	ibool*		exists,	/*!< out: TRUE if file exists */
-	os_file_type_t* type);	/*!< out: type of the file (if it exists) */
-/****************************************************************//**
-The function os_file_dirname returns a directory component of a
-null-terminated pathname string.  In the usual case, dirname returns
-the string up to, but not including, the final '/', and basename
-is the component following the final '/'.  Trailing '/' characters
-are not counted as part of the pathname.
+	const char*	path,
+	bool*		exists,
+	os_file_type_t* type);
 
-If path does not contain a slash, dirname returns the string ".".
-
-Concatenating the string returned by dirname, a "/", and the basename
-yields a complete pathname.
-
-The return value is  a copy of the directory component of the pathname.
-The copy is allocated from heap. It is the caller responsibility
-to free it after it is no longer needed.
-
-The following list of examples (taken from SUSv2) shows the strings
-returned by dirname and basename for different paths:
-
-       path	      dirname	     basename
-       "/usr/lib"     "/usr"	     "lib"
-       "/usr/"	      "/"	     "usr"
-       "usr"	      "."	     "usr"
-       "/"	      "/"	     "/"
-       "."	      "."	     "."
-       ".."	      "."	     ".."
-
-@return	own: directory component of the pathname */
-UNIV_INTERN
-char*
-os_file_dirname(
-/*============*/
-	const char*	path);	/*!< in: pathname */
-/****************************************************************//**
-This function returns a new path name after replacing the basename
+/** This function returns a new path name after replacing the basename
 in an old path with a new basename.  The old_path is a full path
 name including the extension.  The tablename is in the normal
 form "databasename/tablename".  The new base name is found after
@@ -1029,35 +1738,15 @@ the forward slash.  Both input strings are null terminated.
 This function allocates memory to be returned.  It is the callers
 responsibility to free the return value after it is no longer needed.
 
-@return	own: new full pathname */
-UNIV_INTERN
+@param[in]	old_path		pathname
+@param[in]	new_name		new file name
+@return own: new full pathname */
 char*
 os_file_make_new_pathname(
-/*======================*/
-	const char*	old_path,	/*!< in: pathname */
-	const char*	new_name);	/*!< in: new file name */
-/****************************************************************//**
-This function returns a remote path name by combining a data directory
-path provided in a DATA DIRECTORY clause with the tablename which is
-in the form 'database/tablename'.  It strips the file basename (which
-is the tablename) found after the last directory in the path provided.
-The full filepath created will include the database name as a directory
-under the path provided.  The filename is the tablename with the '.ibd'
-extension. All input and output strings are null-terminated.
+	const char*	old_path,
+	const char*	new_name);
 
-This function allocates memory to be returned.  It is the callers
-responsibility to free the return value after it is no longer needed.
-
-@return	own: A full pathname; data_dir_path/databasename/tablename.ibd */
-UNIV_INTERN
-char*
-os_file_make_remote_pathname(
-/*=========================*/
-	const char*	data_dir_path,	/*!< in: pathname */
-	const char*	tablename,	/*!< in: tablename */
-	const char*	extention);	/*!< in: file extention; ibd,cfg*/
-/****************************************************************//**
-This function reduces a null-terminated full remote path name into
+/** This function reduces a null-terminated full remote path name into
 the path that is sent by MySQL for DATA DIRECTORY clause.  It replaces
 the 'databasename/tablename.ibd' found at the end of the path with just
 'tablename'.
@@ -1067,278 +1756,270 @@ is allocated. The caller should allocate memory for the path sent in.
 This function manipulates that path in place.
 
 If the path format is not as expected, just return.  The result is used
-to inform a SHOW CREATE TABLE command. */
-UNIV_INTERN
+to inform a SHOW CREATE TABLE command.
+@param[in,out]	data_dir_path		Full path/data_dir_path */
 void
 os_file_make_data_dir_path(
-/*========================*/
-	char*	data_dir_path);	/*!< in/out: full path/data_dir_path */
-/****************************************************************//**
-Creates all missing subdirectories along the given path.
-@return	TRUE if call succeeded FALSE otherwise */
-UNIV_INTERN
-ibool
+	char*	data_dir_path);
+
+/** Create all missing subdirectories along the given path.
+@return DB_SUCCESS if OK, otherwise error code. */
+dberr_t
 os_file_create_subdirs_if_needed(
-/*=============================*/
-	const char*	path);	/*!< in: path name */
-/***********************************************************************
-Initializes the asynchronous io system. Creates one array each for ibuf
+	const char*	path);
+
+#ifdef UNIV_ENABLE_UNIT_TEST_GET_PARENT_DIR
+/* Test the function os_file_get_parent_dir. */
+void
+unit_test_os_file_get_parent_dir();
+#endif /* UNIV_ENABLE_UNIT_TEST_GET_PARENT_DIR */
+
+/** Initializes the asynchronous io system. Creates one array each for ibuf
 and log i/o. Also creates one array each for read and write where each
 array is divided logically into n_read_segs and n_write_segs
 respectively. The caller must create an i/o handler thread for each
 segment in these arrays. This function also creates the sync array.
-No i/o handler thread needs to be created for that */
-UNIV_INTERN
-ibool
-os_aio_init(
-/*========*/
-	ulint	n_per_seg,	/*<! in: maximum number of pending aio
-				operations allowed per segment */
-	ulint	n_read_segs,	/*<! in: number of reader threads */
-	ulint	n_write_segs,	/*<! in: number of writer threads */
-	ulint	n_slots_sync);	/*<! in: number of slots in the sync aio
-				array */
-/***********************************************************************
-Frees the asynchronous io system. */
-UNIV_INTERN
-void
-os_aio_free(void);
-/*=============*/
+No i/o handler thread needs to be created for that
+@param[in]	n_read_segs	number of reader threads
+@param[in]	n_write_segs	number of writer threads
+@param[in]	n_slots_sync	number of slots in the sync aio array */
 
-/*******************************************************************//**
+bool
+os_aio_init(
+	ulint		n_read_segs,
+	ulint		n_write_segs,
+	ulint		n_slots_sync);
+
+/**
+Frees the asynchronous io system. */
+void
+os_aio_free();
+
+/**
 NOTE! Use the corresponding macro os_aio(), not directly this function!
 Requests an asynchronous i/o operation.
-@return	TRUE if request was queued successfully, FALSE if fail */
-UNIV_INTERN
-ibool
+@param[in]	type		IO request context
+@param[in]	mode		IO mode
+@param[in]	name		Name of the file or path as NUL terminated
+				string
+@param[in]	file		Open file handle
+@param[out]	buf		buffer where to read
+@param[in]	offset		file offset where to read
+@param[in]	n		number of bytes to read
+@param[in]	read_only	if true read only mode checks are enforced
+@param[in,out]	m1		Message for the AIO handler, (can be used to
+				identify a completed AIO operation); ignored
+				if mode is OS_AIO_SYNC
+@param[in,out]	m2		message for the AIO handler (can be used to
+				identify a completed AIO operation); ignored
+				if mode is OS_AIO_SYNC
+@return DB_SUCCESS or error code */
+dberr_t
 os_aio_func(
-/*========*/
-	ulint		type,	/*!< in: OS_FILE_READ or OS_FILE_WRITE */
-	ulint		is_log,	/*!< in: 1 is OS_FILE_LOG or 0 */
-	ulint		mode,	/*!< in: OS_AIO_NORMAL, ..., possibly ORed
-				to OS_AIO_SIMULATED_WAKE_LATER: the
-				last flag advises this function not to wake
-				i/o-handler threads, but the caller will
-				do the waking explicitly later, in this
-				way the caller can post several requests in
-				a batch; NOTE that the batch must not be
-				so big that it exhausts the slots in aio
-				arrays! NOTE that a simulated batch
-				may introduce hidden chances of deadlocks,
-				because i/os are not actually handled until
-				all have been posted: use with great
-				caution! */
-	const char*	name,	/*!< in: name of the file or path as a
-				null-terminated string */
-	os_file_t	file,	/*!< in: handle to a file */
-	void*		buf,	/*!< in: buffer where to read or from which
-				to write */
-	os_offset_t	offset,	/*!< in: file offset where to read or write */
-	ulint		n,	/*!< in: number of bytes to read or write */
-	ulint           page_size, /*!< in: page size in bytes */
-	fil_node_t*	message1,/*!< in: message for the aio handler
-				(can be used to identify a completed
-				aio operation); ignored if mode is
-				OS_AIO_SYNC */
-	void*		message2,/*!< in: message for the aio handler
-				(can be used to identify a completed
-				aio operation); ignored if mode is
-				OS_AIO_SYNC */
-	ulint*		write_size);/*!< in/out: Actual write size initialized
-			       after fist successfull trim
-			       operation for this page and if
-			       initialized we do not trim again if
-			       actual page size does not decrease. */
-/************************************************************************//**
-Wakes up all async i/o threads so that they know to exit themselves in
+	IORequest&	type,
+	ulint		mode,
+	const char*	name,
+	os_file_t	file,
+	void*		buf,
+	os_offset_t	offset,
+	ulint		n,
+	bool		read_only,
+	fil_node_t*	m1,
+	void*		m2,
+	ulint*		wsize);
+
+/** Wakes up all async i/o threads so that they know to exit themselves in
 shutdown. */
-UNIV_INTERN
 void
-os_aio_wake_all_threads_at_shutdown(void);
-/*=====================================*/
-/************************************************************************//**
-Waits until there are no pending writes in os_aio_write_array. There can
+os_aio_wake_all_threads_at_shutdown();
+
+/** Waits until there are no pending writes in os_aio_write_array. There can
 be other, synchronous, pending writes. */
-UNIV_INTERN
 void
-os_aio_wait_until_no_pending_writes(void);
-/*=====================================*/
-/**********************************************************************//**
-Wakes up simulated aio i/o-handler threads if they have something to do. */
-UNIV_INTERN
+os_aio_wait_until_no_pending_writes();
+
+/** Wakes up simulated aio i/o-handler threads if they have something to do. */
 void
-os_aio_simulated_wake_handler_threads(void);
-/*=======================================*/
-/**********************************************************************//**
-This function can be called if one wants to post a batch of reads and
+os_aio_simulated_wake_handler_threads();
+
+/** This function can be called if one wants to post a batch of reads and
 prefers an i/o-handler thread to handle them all at once later. You must
 call os_aio_simulated_wake_handler_threads later to ensure the threads
 are not left sleeping! */
-UNIV_INTERN
 void
-os_aio_simulated_put_read_threads_to_sleep(void);
-/*============================================*/
+os_aio_simulated_put_read_threads_to_sleep();
 
-#ifdef WIN_ASYNC_IO
-/**********************************************************************//**
-This function is only used in Windows asynchronous i/o.
+/** This is the generic AIO handler interface function.
 Waits for an aio operation to complete. This function is used to wait the
-for completed requests. The aio array of pending requests is divided
+for completed requests. The AIO array of pending requests is divided
 into segments. The thread specifies which segment or slot it wants to wait
 for. NOTE: this function will also take care of freeing the aio slot,
 therefore no other thread is allowed to do the freeing!
-@return	TRUE if the aio operation succeeded */
-UNIV_INTERN
-ibool
-os_aio_windows_handle(
-/*==================*/
-	ulint	segment,	/*!< in: the number of the segment in the aio
-				arrays to wait for; segment 0 is the ibuf
-				i/o thread, segment 1 the log i/o thread,
-				then follow the non-ibuf read threads, and as
-				the last are the non-ibuf write threads; if
-				this is ULINT_UNDEFINED, then it means that
-				sync aio is used, and this parameter is
-				ignored */
-	ulint	pos,		/*!< this parameter is used only in sync aio:
-				wait for the aio slot at this position */
-	fil_node_t**message1,	/*!< out: the messages passed with the aio
-				request; note that also in the case where
-				the aio operation failed, these output
-				parameters are valid and can be used to
-				restart the operation, for example */
-	void**	message2,
-	ulint*	type);		/*!< out: OS_FILE_WRITE or ..._READ */
-#endif
+@param[in]	segment		the number of the segment in the aio arrays to
+				wait for; segment 0 is the ibuf I/O thread,
+				segment 1 the log I/O thread, then follow the
+				non-ibuf read threads, and as the last are the
+				non-ibuf write threads; if this is
+				ULINT_UNDEFINED, then it means that sync AIO
+				is used, and this parameter is ignored
+@param[out]	m1		the messages passed with the AIO request;
+				note that also in the case where the AIO
+				operation failed, these output parameters
+				are valid and can be used to restart the
+				operation, for example
+@param[out]	m2		callback message
+@param[out]	type		OS_FILE_WRITE or ..._READ
+@return DB_SUCCESS or error code */
+dberr_t
+os_aio_handler(
+	ulint		segment,
+	fil_node_t**	m1,
+	void**		m2,
+	IORequest*	type);
 
-/**********************************************************************//**
-Does simulated aio. This function should be called by an i/o-handler
-thread.
-@return	TRUE if the aio operation succeeded */
-UNIV_INTERN
-ibool
-os_aio_simulated_handle(
-/*====================*/
-	ulint	segment,	/*!< in: the number of the segment in the aio
-				arrays to wait for; segment 0 is the ibuf
-				i/o thread, segment 1 the log i/o thread,
-				then follow the non-ibuf read threads, and as
-				the last are the non-ibuf write threads */
-	fil_node_t**message1,	/*!< out: the messages passed with the aio
-				request; note that also in the case where
-				the aio operation failed, these output
-				parameters are valid and can be used to
-				restart the operation, for example */
-	void**	message2,
-	ulint*	type);		/*!< out: OS_FILE_WRITE or ..._READ */
-/**********************************************************************//**
-Validates the consistency of the aio system.
-@return	TRUE if ok */
-UNIV_INTERN
-ibool
-os_aio_validate(void);
-/*=================*/
-/**********************************************************************//**
-Prints info of the aio arrays. */
-UNIV_INTERN
+/** Prints info of the aio arrays.
+@param[in/out]	file		file where to print */
 void
-os_aio_print(
-/*=========*/
-	FILE*	file);	/*!< in: file where to print */
-/**********************************************************************//**
-Refreshes the statistics used to print per-second averages. */
-UNIV_INTERN
+os_aio_print(FILE* file);
+
+/** Refreshes the statistics used to print per-second averages. */
 void
-os_aio_refresh_stats(void);
-/*======================*/
+os_aio_refresh_stats();
+
+/** Checks that all slots in the system have been freed, that is, there are
+no pending io operations. */
+bool
+os_aio_all_slots_free();
 
 #ifdef UNIV_DEBUG
-/**********************************************************************//**
-Checks that all slots in the system have been freed, that is, there are
-no pending io operations. */
-UNIV_INTERN
-ibool
-os_aio_all_slots_free(void);
-/*=======================*/
+
+/** Prints all pending IO
+@param[in]	file	file where to print */
+void
+os_aio_print_pending_io(FILE* file);
+
 #endif /* UNIV_DEBUG */
 
-/*******************************************************************//**
-This function returns information about the specified file
-@return	DB_SUCCESS if all OK */
-UNIV_INTERN
+/** This function returns information about the specified file
+@param[in]	path		pathname of the file
+@param[in]	stat_info	information of a file in a directory
+@param[in]	check_rw_perm	for testing whether the file can be opened
+				in RW mode
+@param[in]	read_only	if true read only mode checks are enforced
+@return DB_SUCCESS if all OK */
 dberr_t
 os_file_get_status(
-/*===============*/
-	const char*	path,		/*!< in: pathname of the file */
-	os_file_stat_t* stat_info,	/*!< information of a file in a
-					directory */
-	bool		check_rw_perm);	/*!< in: for testing whether the
-					file can be opened in RW mode */
+	const char*	path,
+	os_file_stat_t* stat_info,
+	bool		check_rw_perm,
+	bool		read_only);
 
 #if !defined(UNIV_HOTBACKUP)
-/** Create a temporary file in the location specified by the parameter
-path. If the path is null, then it will be created in tmpdir.
+/** Creates a temporary file in the location specified by the parameter
+path. If the path is NULL then it will be created on --tmpdir location.
+This function is defined in ha_innodb.cc.
 @param[in]	path	location for creating temporary file
 @return temporary file descriptor, or < 0 on error */
-UNIV_INTERN
 int
 innobase_mysql_tmpfile(
 	const char*	path);
 #endif /* !UNIV_HOTBACKUP */
 
 
-#if defined(LINUX_NATIVE_AIO)
-/**************************************************************************
-This function is only used in Linux native asynchronous i/o.
-Waits for an aio operation to complete. This function is used to wait the
-for completed requests. The aio array of pending requests is divided
-into segments. The thread specifies which segment or slot it wants to wait
-for. NOTE: this function will also take care of freeing the aio slot,
-therefore no other thread is allowed to do the freeing!
-@return	TRUE if the IO was successful */
-UNIV_INTERN
-ibool
-os_aio_linux_handle(
-/*================*/
-	ulint	global_seg,	/*!< in: segment number in the aio array
-				to wait for; segment 0 is the ibuf
-				i/o thread, segment 1 is log i/o thread,
-				then follow the non-ibuf read threads,
-				and the last are the non-ibuf write
-				threads. */
-	fil_node_t**message1,	/*!< out: the messages passed with the */
-	void**	message2,	/*!< aio request; note that in case the
-				aio operation failed, these output
-				parameters are valid and can be used to
-				restart the operation. */
-	ulint*	type);		/*!< out: OS_FILE_WRITE or ..._READ */
-#endif /* LINUX_NATIVE_AIO */
-
-/****************************************************************//**
-Does error handling when a file operation fails.
-@return	TRUE if we should retry the operation */
-ibool
-os_file_handle_error_no_exit(
-/*=========================*/
-	const char*	name,		/*!< in: name of a file or NULL */
-	const char*	operation,	/*!< in: operation */
-	ibool		on_error_silent,/*!< in: if TRUE then don't print
-					any message to the log. */
-	const char*	file,		/*!< in: file name */
-	const ulint	line);		/*!< in: line */
-
-/***********************************************************************//**
-Try to get number of bytes per sector from file system.
-@return	file block size */
-UNIV_INTERN
+/** If it is a compressed page return the compressed page data + footer size
+@param[in]	buf		Buffer to check, must include header + 10 bytes
+@return ULINT_UNDEFINED if the page is not a compressed page or length
+	of the compressed data (including footer) if it is a compressed page */
 ulint
-os_file_get_block_size(
-/*===================*/
-	os_file_t	file,	/*!< in: handle to a file */
-	const char*	name);	/*!< in: file name */
+os_file_compressed_page_size(const byte* buf);
+
+/** If it is a compressed page return the original page data + footer size
+@param[in]	buf		Buffer to check, must include header + 10 bytes
+@return ULINT_UNDEFINED if the page is not a compressed page or length
+	of the original data + footer if it is a compressed page */
+ulint
+os_file_original_page_size(const byte* buf);
+
+/** Set the file create umask
+@param[in]	umask		The umask to use for file creation. */
+void
+os_file_set_umask(ulint umask);
+
+/** Free storage space associated with a section of the file.
+@param[in]	fh		Open file handle
+@param[in]	off		Starting offset (SEEK_SET)
+@param[in]	len		Size of the hole
+@return DB_SUCCESS or error code */
+dberr_t
+os_file_punch_hole(
+	os_file_t	fh,
+	os_offset_t	off,
+	os_offset_t	len)
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Check if the file system supports sparse files.
+
+Warning: On POSIX systems we try and punch a hole from offset 0 to
+the system configured page size. This should only be called on an empty
+file.
+
+Note: On Windows we use the name and on Unices we use the file handle.
+
+@param[in]	name		File name
+@param[in]	fh		File handle for the file - if opened
+@return true if the file system supports sparse files */
+bool
+os_is_sparse_file_supported(
+	const char*	path,
+	os_file_t	fh)
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Decompress the page data contents. Page type must be FIL_PAGE_COMPRESSED, if
+not then the source contents are left unchanged and DB_SUCCESS is returned.
+@param[in]	dblwr_recover	true of double write recovery in progress
+@param[in,out]	src		Data read from disk, decompressed data will be
+				copied to this page
+@param[in,out]	dst		Scratch area to use for decompression
+@param[in]	dst_len		Size of the scratch area in bytes
+@return DB_SUCCESS or error code */
+
+dberr_t
+os_file_decompress_page(
+	bool		dblwr_recover,
+	byte*		src,
+	byte*		dst,
+	ulint		dst_len)
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Normalizes a directory path for the current OS:
+On Windows, we convert '/' to '\', else we convert '\' to '/'.
+@param[in,out] str A null-terminated directory and file path */
+void os_normalize_path(char*	str);
+
+/* Determine if a path is an absolute path or not.
+@param[in]	OS directory or file path to evaluate
+@retval true if an absolute path
+@retval false if a relative path */
+UNIV_INLINE
+bool
+is_absolute_path(
+	const char*	path)
+{
+	if (path[0] == OS_PATH_SEPARATOR) {
+		return(true);
+	}
+
+#ifdef _WIN32
+	if (path[1] == ':' && path[2] == OS_PATH_SEPARATOR) {
+		return(true);
+	}
+#endif /* _WIN32 */
+
+	return(false);
+}
 
 #ifndef UNIV_NONINL
 #include "os0file.ic"
-#endif
+#endif /* UNIV_NONINL */
 
-#endif
+#endif /* os0file_h */
diff --git a/storage/innobase/include/os0file.ic b/storage/innobase/include/os0file.ic
index 6ca8b371093..74d0b2c83a8 100644
--- a/storage/innobase/include/os0file.ic
+++ b/storage/innobase/include/os0file.ic
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
-Copyright (c) 2010, 2011, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
+Copyright (c) 2010, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, 2016, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -27,40 +27,45 @@ Created 2/20/2010 Jimmy Yang
 #include "univ.i"
 
 #ifdef UNIV_PFS_IO
-/****************************************************************//**
-NOTE! Please use the corresponding macro os_file_create_simple(),
+/** NOTE! Please use the corresponding macro os_file_create_simple(),
 not directly this function!
 A performance schema instrumented wrapper function for
 os_file_create_simple() which opens or creates a file.
+@param[in]	key		Performance Schema Key
+@param[in]	name		name of the file or path as a null-terminated
+				string
+@param[in]	create_mode	create mode
+@param[in]	access_type	OS_FILE_READ_ONLY or OS_FILE_READ_WRITE
+@param[in]	read_only	if true read only mode checks are enforced
+@param[out]	success		true if succeeded
+@param[in]	src_file	file name where func invoked
+@param[in]	src_line	line where the func invoked
 @return own: handle to the file, not defined if error, error number
 can be retrieved with os_file_get_last_error */
 UNIV_INLINE
 os_file_t
 pfs_os_file_create_simple_func(
-/*===========================*/
-	mysql_pfs_key_t key,	/*!< in: Performance Schema Key */
-	const char*	name,	/*!< in: name of the file or path as a
-				null-terminated string */
-	ulint		create_mode,/*!< in: create mode */
-	ulint		access_type,/*!< in: OS_FILE_READ_ONLY or
-				OS_FILE_READ_WRITE */
-	ibool*		success,/*!< out: TRUE if succeed, FALSE if error */
-	const char*	src_file,/*!< in: file name where func invoked */
-	ulint		src_line)/*!< in: line where the func invoked */
+	mysql_pfs_key_t key,
+	const char*	name,
+	ulint		create_mode,
+	ulint		access_type,
+	bool		read_only,
+	bool*		success,
+	const char*	src_file,
+	ulint		src_line)
 {
-	os_file_t	file;
-	struct PSI_file_locker* locker = NULL;
 	PSI_file_locker_state	state;
+	struct PSI_file_locker* locker = NULL;
 
 	/* register a file open or creation depending on "create_mode" */
-	register_pfs_file_open_begin(&state, locker, key,
-				     ((create_mode == OS_FILE_CREATE)
-					? PSI_FILE_CREATE
-					: PSI_FILE_OPEN),
-				     name, src_file, src_line);
+	register_pfs_file_open_begin(
+		&state, locker, key,
+		(create_mode == OS_FILE_CREATE)
+		? PSI_FILE_CREATE : PSI_FILE_OPEN,
+		name, src_file, src_line);
 
-	file = os_file_create_simple_func(name, create_mode,
-					  access_type, success);
+	os_file_t	file = os_file_create_simple_func(
+		name, create_mode, access_type, read_only, success);
 
 	/* Regsiter the returning "file" value with the system */
 	register_pfs_file_open_end(locker, file);
@@ -68,394 +73,441 @@ pfs_os_file_create_simple_func(
 	return(file);
 }
 
-/****************************************************************//**
-NOTE! Please use the corresponding macro
+/** NOTE! Please use the corresponding macro
 os_file_create_simple_no_error_handling(), not directly this function!
 A performance schema instrumented wrapper function for
 os_file_create_simple_no_error_handling(). Add instrumentation to
 monitor file creation/open.
+@param[in]	key		Performance Schema Key
+@param[in]	name		name of the file or path as a null-terminated
+				string
+@param[in]	create_mode	create mode
+@param[in]	access_type	OS_FILE_READ_ONLY, OS_FILE_READ_WRITE, or
+				OS_FILE_READ_ALLOW_DELETE; the last option is
+				used by a backup program reading the file
+@param[in]	read_only	if true read only mode checks are enforced
+@param[out]	success		true if succeeded
+@param[in]	src_file	file name where func invoked
+@param[in]	src_line	line where the func invoked
 @return own: handle to the file, not defined if error, error number
 can be retrieved with os_file_get_last_error */
 UNIV_INLINE
 os_file_t
 pfs_os_file_create_simple_no_error_handling_func(
-/*=============================================*/
-	mysql_pfs_key_t key,	/*!< in: Performance Schema Key */
-	const char*	name,	/*!< in: name of the file or path as a
-				null-terminated string */
-	ulint		create_mode, /*!< in: file create mode */
-	ulint		access_type,/*!< in: OS_FILE_READ_ONLY,
-				OS_FILE_READ_WRITE, or
-				OS_FILE_READ_ALLOW_DELETE; the last option is
-				used by a backup program reading the file */
-	ibool*		success,/*!< out: TRUE if succeed, FALSE if error */
-	ulint		atomic_writes,/*!< in: atomic writes table option
-				value */
-	const char*	src_file,/*!< in: file name where func invoked */
-	ulint		src_line)/*!< in: line where the func invoked */
+	mysql_pfs_key_t key,
+	const char*	name,
+	ulint		create_mode,
+	ulint		access_type,
+	bool		read_only,
+	bool*		success,
+	const char*	src_file,
+	ulint		src_line)
 {
-	os_file_t	file;
-	struct PSI_file_locker* locker = NULL;
 	PSI_file_locker_state	state;
+	struct PSI_file_locker* locker = NULL;
 
 	/* register a file open or creation depending on "create_mode" */
-	register_pfs_file_open_begin(&state, locker, key,
-				     ((create_mode == OS_FILE_CREATE)
-					? PSI_FILE_CREATE
-					: PSI_FILE_OPEN),
-				     name, src_file, src_line);
+	register_pfs_file_open_begin(
+		&state, locker, key,
+		create_mode == OS_FILE_CREATE
+		? PSI_FILE_CREATE : PSI_FILE_OPEN,
+		name, src_file, src_line);
 
-	file = os_file_create_simple_no_error_handling_func(
-		name, create_mode, access_type, success, atomic_writes);
+	os_file_t	file = os_file_create_simple_no_error_handling_func(
+		name, create_mode, access_type, read_only, success);
 
 	register_pfs_file_open_end(locker, file);
 
 	return(file);
 }
 
-/****************************************************************//**
-NOTE! Please use the corresponding macro os_file_create(), not directly
+/** NOTE! Please use the corresponding macro os_file_create(), not directly
 this function!
 A performance schema wrapper function for os_file_create().
 Add instrumentation to monitor file creation/open.
+@param[in]	key		Performance Schema Key
+@param[in]	name		name of the file or path as a null-terminated
+				string
+@param[in]	create_mode	create mode
+@param[in]	purpose		OS_FILE_AIO, if asynchronous, non-buffered I/O
+				is desired, OS_FILE_NORMAL, if any normal file;
+				NOTE that it also depends on type, os_aio_..
+				and srv_.. variables whether we really us
+				async I/O or unbuffered I/O: look in the
+				function source code for the exact rules
+@param[in]	read_only	if true read only mode checks are enforced
+@param[out]	success		true if succeeded
+@param[in]	src_file	file name where func invoked
+@param[in]	src_line	line where the func invoked
 @return own: handle to the file, not defined if error, error number
 can be retrieved with os_file_get_last_error */
 UNIV_INLINE
 os_file_t
 pfs_os_file_create_func(
-/*====================*/
-	mysql_pfs_key_t key,	/*!< in: Performance Schema Key */
-	const char*	name,	/*!< in: name of the file or path as a
-				null-terminated string */
-	ulint		create_mode,/*!< in: file create mode */
-	ulint		purpose,/*!< in: OS_FILE_AIO, if asynchronous,
-				non-buffered i/o is desired,
-				OS_FILE_NORMAL, if any normal file;
-				NOTE that it also depends on type, os_aio_..
-				and srv_.. variables whether we really use
-				async i/o or unbuffered i/o: look in the
-				function source code for the exact rules */
-	ulint		type,	/*!< in: OS_DATA_FILE or OS_LOG_FILE */
-	ibool*		success,/*!< out: TRUE if succeed, FALSE if error */
-	ulint		atomic_writes, /*!< in: atomic writes table option
-				       value */
-	const char*	src_file,/*!< in: file name where func invoked */
-	ulint		src_line)/*!< in: line where the func invoked */
+	mysql_pfs_key_t key,
+	const char*	name,
+	ulint		create_mode,
+	ulint		purpose,
+	ulint		type,
+	bool		read_only,
+	bool*		success,
+	const char*	src_file,
+	ulint		src_line)
 {
-	os_file_t	file;
-	struct PSI_file_locker* locker = NULL;
 	PSI_file_locker_state	state;
+	struct PSI_file_locker* locker = NULL;
 
 	/* register a file open or creation depending on "create_mode" */
-	register_pfs_file_open_begin(&state, locker, key,
-				     ((create_mode == OS_FILE_CREATE)
-					? PSI_FILE_CREATE
-					: PSI_FILE_OPEN),
-				     name, src_file, src_line);
+	register_pfs_file_open_begin(
+		&state, locker, key,
+		create_mode == OS_FILE_CREATE
+		? PSI_FILE_CREATE : PSI_FILE_OPEN,
+		name, src_file, src_line);
 
-	file = os_file_create_func(name, create_mode, purpose, type, success, atomic_writes);
+	os_file_t	file = os_file_create_func(
+		name, create_mode, purpose, type, read_only, success);
 
 	register_pfs_file_open_end(locker, file);
 
 	return(file);
 }
-
-/***********************************************************************//**
+/**
 NOTE! Please use the corresponding macro os_file_close(), not directly
 this function!
 A performance schema instrumented wrapper function for os_file_close().
-@return TRUE if success */
+@param[in]	file		handle to a file
+@param[in]	src_file	file name where func invoked
+@param[in]	src_line	line where the func invoked
+@return true if success */
 UNIV_INLINE
-ibool
+bool
 pfs_os_file_close_func(
-/*===================*/
-        os_file_t	file,	/*!< in, own: handle to a file */
-	const char*	src_file,/*!< in: file name where func invoked */
-	ulint		src_line)/*!< in: line where the func invoked */
+	os_file_t	file,
+	const char*	src_file,
+	ulint		src_line)
 {
-	ibool	result;
-	struct PSI_file_locker*	locker = NULL;
 	PSI_file_locker_state	state;
+	struct PSI_file_locker*	locker = NULL;
 
 	/* register the file close */
-	register_pfs_file_io_begin(&state, locker, file, 0, PSI_FILE_CLOSE,
-				   src_file, src_line);
+	register_pfs_file_io_begin(
+		&state, locker, file, 0, PSI_FILE_CLOSE, src_file, src_line);
 
-	result = os_file_close_func(file);
+	bool	result = os_file_close_func(file);
 
 	register_pfs_file_io_end(locker, 0);
 
 	return(result);
 }
 
-/*******************************************************************//**
-NOTE! Please use the corresponding macro os_aio(), not directly this
+/** NOTE! Please use the corresponding macro os_aio(), not directly this
 function!
-Performance schema instrumented wrapper function of os_aio() which
-requests an asynchronous i/o operation.
-@return TRUE if request was queued successfully, FALSE if fail */
+Performance schema wrapper function of os_aio() which requests
+an asynchronous i/o operation.
+@param[in]	type		IO request context
+@param[in]	mode		IO mode
+@param[in]	name		Name of the file or path as NUL terminated
+				string
+@param[in]	file		Open file handle
+@param[out]	buf		buffer where to read
+@param[in]	offset		file offset where to read
+@param[in]	n		number of bytes to read
+@param[in]	read_only	if true read only mode checks are enforced
+@param[in,out]	m1		Message for the AIO handler, (can be used to
+				identify a completed AIO operation); ignored
+				if mode is OS_AIO_SYNC
+@param[in,out]	m2		message for the AIO handler (can be used to
+				identify a completed AIO operation); ignored
+				if mode is OS_AIO_SYNC
+@param[in,out]	write_size	Actual write size initialized
+				after fist successfull trim
+				operation for this page and if
+				initialized we do not trim again if
+				actual page size 
+@param[in]	src_file	file name where func invoked
+@param[in]	src_line	line where the func invoked
+@return DB_SUCCESS if request was queued successfully, FALSE if fail */
 UNIV_INLINE
-ibool
+dberr_t
 pfs_os_aio_func(
-/*============*/
-	ulint		type,	/*!< in: OS_FILE_READ or OS_FILE_WRITE */
-	ulint		is_log,	/*!< in: 1 is OS_FILE_LOG or 0 */
-	ulint		mode,	/*!< in: OS_AIO_NORMAL etc. I/O mode */
-	const char*	name,	/*!< in: name of the file or path as a
-				null-terminated string */
-	os_file_t	file,	/*!< in: handle to a file */
-	void*		buf,	/*!< in: buffer where to read or from which
-				to write */
-	os_offset_t	offset,	/*!< in: file offset where to read or write */
-	ulint		n,	/*!< in: number of bytes to read or write */
-	ulint           page_size, /*!< in: page size in bytes */
-	fil_node_t*	message1,/*!< in: message for the aio handler
-				(can be used to identify a completed
-				aio operation); ignored if mode is
-				OS_AIO_SYNC */
-	void*		message2,/*!< in: message for the aio handler
-				(can be used to identify a completed
-				aio operation); ignored if mode is
-                                OS_AIO_SYNC */
-	ulint*		write_size,/*!< in/out: Actual write size initialized
-			       after fist successfull trim
-			       operation for this page and if
-			       initialized we do not trim again if
-			       actual page size does not decrease. */
-	const char*	src_file,/*!< in: file name where func invoked */
-	ulint		src_line)/*!< in: line where the func invoked */
+	IORequest&	type,
+	ulint		mode,
+	const char*	name,
+	os_file_t	file,
+	void*		buf,
+	os_offset_t	offset,
+	ulint		n,
+	bool		read_only,
+	fil_node_t*	m1,
+	void*		m2,
+	ulint*		write_size,
+	const char*	src_file,
+	ulint		src_line)
 {
-	ibool	result;
-	struct PSI_file_locker*	locker = NULL;
 	PSI_file_locker_state	state;
+	struct PSI_file_locker*	locker = NULL;
+
+	ut_ad(type.validate());
 
 	/* Register the read or write I/O depending on "type" */
-	register_pfs_file_io_begin(&state, locker, file, n,
-				   (type == OS_FILE_WRITE)
-					? PSI_FILE_WRITE
-					: PSI_FILE_READ,
-				   src_file, src_line);
+	register_pfs_file_io_begin(
+		&state, locker, file, n,
+		type.is_write() ? PSI_FILE_WRITE : PSI_FILE_READ,
+		src_file, src_line);
 
-	result = os_aio_func(type, is_log, mode, name, file, buf, offset,
-			     n, page_size, message1, message2, write_size);
+	dberr_t	result = os_aio_func(
+		type, mode, name, file, buf, offset, n, read_only, m1, m2, write_size);
 
 	register_pfs_file_io_end(locker, n);
 
 	return(result);
 }
 
-/*******************************************************************//**
-NOTE! Please use the corresponding macro os_file_read(), not directly
+/** NOTE! Please use the corresponding macro os_file_read(), not directly
 this function!
 This is the performance schema instrumented wrapper function for
 os_file_read() which requests a synchronous read operation.
-@return TRUE if request was successful, FALSE if fail */
+@param[in, out]	type		IO request context
+@param[in]	file		Open file handle
+@param[out]	buf		buffer where to read
+@param[in]	offset		file offset where to read
+@param[in]	n		number of bytes to read
+@param[in]	src_file	file name where func invoked
+@param[in]	src_line	line where the func invoked
+@return DB_SUCCESS if request was successful */
 UNIV_INLINE
-ibool
+dberr_t
 pfs_os_file_read_func(
-/*==================*/
-	os_file_t	file,	/*!< in: handle to a file */
-	void*		buf,	/*!< in: buffer where to read */
-	os_offset_t	offset,	/*!< in: file offset where to read */
-	ulint		n,	/*!< in: number of bytes to read */
-	const char*	src_file,/*!< in: file name where func invoked */
-	ulint		src_line)/*!< in: line where the func invoked */
+	IORequest&	type,
+	os_file_t	file,
+	void*		buf,
+	os_offset_t	offset,
+	ulint		n,
+	const char*	src_file,
+	ulint		src_line)
 {
-	ibool	result;
-	struct PSI_file_locker*	locker = NULL;
 	PSI_file_locker_state	state;
+	struct PSI_file_locker*	locker = NULL;
 
-	register_pfs_file_io_begin(&state, locker, file, n, PSI_FILE_READ,
-				   src_file, src_line);
+	ut_ad(type.validate());
 
-	result = os_file_read_func(file, buf, offset, n);
+	register_pfs_file_io_begin(
+		&state, locker, file, n, PSI_FILE_READ, src_file, src_line);
+
+	dberr_t		result;
+
+	result = os_file_read_func(type, file, buf, offset, n);
 
 	register_pfs_file_io_end(locker, n);
 
 	return(result);
 }
 
-/*******************************************************************//**
-NOTE! Please use the corresponding macro
-os_file_read_no_error_handling(), not directly this function!
+/** NOTE! Please use the corresponding macro os_file_read_no_error_handling(),
+not directly this function!
 This is the performance schema instrumented wrapper function for
-os_file_read_no_error_handling() which requests a synchronous
-positioned read operation. This function does not do any error
-handling. In case of error it returns FALSE.
-@return TRUE if request was successful, FALSE if fail */
+os_file_read_no_error_handling_func() which requests a synchronous
+read operation.
+@param[in, out]	type		IO request context
+@param[in]	file		Open file handle
+@param[out]	buf		buffer where to read
+@param[in]	offset		file offset where to read
+@param[in]	n		number of bytes to read
+@param[out]	o		number of bytes actually read
+@param[in]	src_file	file name where func invoked
+@param[in]	src_line	line where the func invoked
+@return DB_SUCCESS if request was successful */
 UNIV_INLINE
-ibool
+dberr_t
 pfs_os_file_read_no_error_handling_func(
-/*====================================*/
-	os_file_t	file,	/*!< in: handle to a file */
-	void*		buf,	/*!< in: buffer where to read */
-	os_offset_t	offset,	/*!< in: file offset where to read */
-	ulint		n,	/*!< in: number of bytes to read */
-	const char*	src_file,/*!< in: file name where func invoked */
-	ulint		src_line)/*!< in: line where the func invoked */
+	IORequest&	type,
+	os_file_t	file,
+	void*		buf,
+	os_offset_t	offset,
+	ulint		n,
+	ulint*		o,
+	const char*	src_file,
+	ulint		src_line)
 {
-	ibool	result;
-	struct PSI_file_locker*	locker = NULL;
 	PSI_file_locker_state	state;
+	struct PSI_file_locker*	locker = NULL;
 
-	register_pfs_file_io_begin(&state, locker, file, n, PSI_FILE_READ,
-				   src_file, src_line);
+	register_pfs_file_io_begin(
+		&state, locker, file, n, PSI_FILE_READ, src_file, src_line);
 
-	result = os_file_read_no_error_handling_func(file, buf, offset, n);
+	dberr_t	result = os_file_read_no_error_handling_func(
+		type, file, buf, offset, n, o);
 
 	register_pfs_file_io_end(locker, n);
 
 	return(result);
 }
 
-/*******************************************************************//**
-NOTE! Please use the corresponding macro os_file_write(), not directly
+/** NOTE! Please use the corresponding macro os_file_write(), not directly
 this function!
 This is the performance schema instrumented wrapper function for
 os_file_write() which requests a synchronous write operation.
-@return TRUE if request was successful, FALSE if fail */
+@param[in, out]	type		IO request context
+@param[in]	name		Name of the file or path as NUL terminated
+				string
+@param[in]	file		Open file handle
+@param[out]	buf		buffer where to read
+@param[in]	offset		file offset where to read
+@param[in]	n		number of bytes to read
+@param[in]	src_file	file name where func invoked
+@param[in]	src_line	line where the func invoked
+@return DB_SUCCESS if request was successful */
 UNIV_INLINE
-ibool
+dberr_t
 pfs_os_file_write_func(
-/*===================*/
-	const char*	name,	/*!< in: name of the file or path as a
-				null-terminated string */
-	os_file_t	file,	/*!< in: handle to a file */
-	const void*	buf,	/*!< in: buffer from which to write */
-	os_offset_t	offset,	/*!< in: file offset where to write */
-	ulint		n,	/*!< in: number of bytes to write */
-	const char*	src_file,/*!< in: file name where func invoked */
-	ulint		src_line)/*!< in: line where the func invoked */
+	IORequest&	type,
+	const char*	name,
+	os_file_t	file,
+	const void*	buf,
+	os_offset_t	offset,
+	ulint		n,
+	const char*	src_file,
+	ulint		src_line)
 {
-	ibool	result;
-	struct PSI_file_locker*	locker = NULL;
 	PSI_file_locker_state	state;
+	struct PSI_file_locker*	locker = NULL;
 
-	register_pfs_file_io_begin(&state, locker, file, n, PSI_FILE_WRITE,
-				   src_file, src_line);
+	register_pfs_file_io_begin(
+		&state, locker, file, n, PSI_FILE_WRITE, src_file, src_line);
 
-	result = os_file_write_func(name, file, buf, offset, n);
+	dberr_t		result;
+
+	result = os_file_write_func(type, name, file, buf, offset, n);
 
 	register_pfs_file_io_end(locker, n);
 
 	return(result);
 }
 
-/***********************************************************************//**
-NOTE! Please use the corresponding macro os_file_flush(), not directly
+/** NOTE! Please use the corresponding macro os_file_flush(), not directly
 this function!
 This is the performance schema instrumented wrapper function for
 os_file_flush() which flushes the write buffers of a given file to the disk.
+Flushes the write buffers of a given file to the disk.
+@param[in]	file		Open file handle
+@param[in]	src_file	file name where func invoked
+@param[in]	src_line	line where the func invoked
 @return TRUE if success */
 UNIV_INLINE
-ibool
+bool
 pfs_os_file_flush_func(
-/*===================*/
-	os_file_t	file,	/*!< in, own: handle to a file */
-	const char*	src_file,/*!< in: file name where func invoked */
-	ulint		src_line)/*!< in: line where the func invoked */
+	os_file_t	file,
+	const char*	src_file,
+	ulint		src_line)
 {
-	ibool	result;
-	struct PSI_file_locker*	locker = NULL;
 	PSI_file_locker_state	state;
+	struct PSI_file_locker*	locker = NULL;
 
-	register_pfs_file_io_begin(&state, locker, file, 0, PSI_FILE_SYNC,
-				   src_file, src_line);
-	result = os_file_flush_func(file);
+	register_pfs_file_io_begin(
+		&state, locker, file, 0, PSI_FILE_SYNC, src_file, src_line);
+
+	bool	result = os_file_flush_func(file);
 
 	register_pfs_file_io_end(locker, 0);
 
 	return(result);
 }
 
-/***********************************************************************//**
-NOTE! Please use the corresponding macro os_file_rename(), not directly
+/** NOTE! Please use the corresponding macro os_file_rename(), not directly
 this function!
 This is the performance schema instrumented wrapper function for
 os_file_rename()
-@return TRUE if success */
+@param[in]	key		Performance Schema Key
+@param[in]	oldpath		old file path as a null-terminated string
+@param[in]	newpath		new file path
+@param[in]	src_file	file name where func invoked
+@param[in]	src_line	line where the func invoked
+@return true if success */
 UNIV_INLINE
-ibool
+bool
 pfs_os_file_rename_func(
-/*====================*/
-	mysql_pfs_key_t key,	/*!< in: Performance Schema Key */
-	const char*	oldpath,/*!< in: old file path as a null-terminated
-				string */
-	const char*	newpath,/*!< in: new file path */
-	const char*	src_file,/*!< in: file name where func invoked */
-	ulint		src_line)/*!< in: line where the func invoked */
+	mysql_pfs_key_t	key,
+	const char*	oldpath,
+	const char*	newpath,
+	const char*	src_file,
+	ulint		src_line)
+
 {
-	ibool	result;
-	struct PSI_file_locker*	locker = NULL;
 	PSI_file_locker_state	state;
+	struct PSI_file_locker*	locker = NULL;
 
-	register_pfs_file_open_begin(&state, locker, key, PSI_FILE_RENAME, newpath,
-				     src_file, src_line);
+	register_pfs_file_open_begin(
+		&state, locker, key, PSI_FILE_RENAME, newpath,
+		src_file, src_line);
 
-	result = os_file_rename_func(oldpath, newpath);
+	bool	result = os_file_rename_func(oldpath, newpath);
 
 	register_pfs_file_open_end(locker, 0);
 
 	return(result);
 }
 
-/***********************************************************************//**
-NOTE! Please use the corresponding macro os_file_delete(), not directly
+/** NOTE! Please use the corresponding macro os_file_delete(), not directly
 this function!
 This is the performance schema instrumented wrapper function for
 os_file_delete()
-@return TRUE if success */
+@param[in]	key		Performance Schema Key
+@param[in]	name		old file path as a null-terminated string
+@param[in]	src_file	file name where func invoked
+@param[in]	src_line	line where the func invoked
+@return true if success */
 UNIV_INLINE
 bool
 pfs_os_file_delete_func(
-/*====================*/
-	mysql_pfs_key_t key,		/*!< in: Performance Schema Key */
-	const char*	name,		/*!< in: file path as a null-terminated
-					string */
-	const char*	src_file,	/*!< in: file name where func invoked */
-	ulint		src_line)	/*!< in: line where the func invoked */
+	mysql_pfs_key_t	key,
+	const char*	name,
+	const char*	src_file,
+	ulint		src_line)
 {
-	bool	result;
-	struct PSI_file_locker*	locker = NULL;
 	PSI_file_locker_state	state;
+	struct PSI_file_locker*	locker = NULL;
 
-	register_pfs_file_close_begin(&state, locker, key, PSI_FILE_DELETE,
-				      name, src_file, src_line);
+	register_pfs_file_close_begin(
+		&state, locker, key, PSI_FILE_DELETE, name, src_file, src_line);
 
-	result = os_file_delete_func(name);
+	bool	result = os_file_delete_func(name);
 
 	register_pfs_file_close_end(locker, 0);
 
 	return(result);
 }
 
-/***********************************************************************//**
+/**
 NOTE! Please use the corresponding macro os_file_delete_if_exists(), not
 directly this function!
 This is the performance schema instrumented wrapper function for
 os_file_delete_if_exists()
-@return TRUE if success */
+@param[in]	key		Performance Schema Key
+@param[in]	name		old file path as a null-terminated string
+@param[in]	exist		indicate if file pre-exist
+@param[in]	src_file	file name where func invoked
+@param[in]	src_line	line where the func invoked
+@return true if success */
 UNIV_INLINE
 bool
 pfs_os_file_delete_if_exists_func(
-/*==============================*/
-	mysql_pfs_key_t key,		/*!< in: Performance Schema Key */
-	const char*	name,		/*!< in: file path as a null-terminated
-					string */
-	const char*	src_file,	/*!< in: file name where func invoked */
-	ulint		src_line)	/*!< in: line where the func invoked */
+	mysql_pfs_key_t	key,
+	const char*	name,
+	bool*		exist,
+	const char*	src_file,
+	ulint		src_line)
 {
-	bool	result;
-	struct PSI_file_locker*	locker = NULL;
 	PSI_file_locker_state	state;
+	struct PSI_file_locker*	locker = NULL;
 
-	register_pfs_file_close_begin(&state, locker, key, PSI_FILE_DELETE,
-				      name, src_file, src_line);
+	register_pfs_file_close_begin(
+		&state, locker, key, PSI_FILE_DELETE, name, src_file, src_line);
 
-	result = os_file_delete_if_exists_func(name);
+	bool	result = os_file_delete_if_exists_func(name, exist);
 
 	register_pfs_file_close_end(locker, 0);
 
 	return(result);
 }
 #endif /* UNIV_PFS_IO */
+
diff --git a/storage/innobase/include/os0once.h b/storage/innobase/include/os0once.h
index a8bbaf1d2d4..8cde40286bc 100644
--- a/storage/innobase/include/os0once.h
+++ b/storage/innobase/include/os0once.h
@@ -29,7 +29,7 @@ Created Feb 20, 2014 Vasil Dimov
 
 #include "univ.i"
 
-#include "os0sync.h"
+#include "os0atomic.h"
 #include "ut0ut.h"
 
 /** Execute a given function exactly once in a multi-threaded environment
@@ -67,7 +67,6 @@ public:
 	/** Finished execution. */
 	static const state_t	DONE = 2;
 
-#ifdef HAVE_ATOMIC_BUILTINS
 	/** Call a given function or wait its execution to complete if it is
 	already called by another thread.
 	@param[in,out]	state		control variable
@@ -119,7 +118,6 @@ public:
 			}
 		}
 	}
-#endif /* HAVE_ATOMIC_BUILTINS */
 };
 
 #endif /* os0once_h */
diff --git a/storage/innobase/include/os0proc.h b/storage/innobase/include/os0proc.h
index 613e3bd6947..af57b5d6a7a 100644
--- a/storage/innobase/include/os0proc.h
+++ b/storage/innobase/include/os0proc.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -37,38 +37,35 @@ Created 9/30/1995 Heikki Tuuri
 typedef void*			os_process_t;
 typedef unsigned long int	os_process_id_t;
 
-extern ibool os_use_large_pages;
-/* Large page size. This may be a boot-time option on some platforms */
-extern ulint os_large_page_size;
+/** The total amount of memory currently allocated from the operating
+system with os_mem_alloc_large(). */
+extern ulint	os_total_large_mem_allocated;
 
-/****************************************************************//**
-Converts the current process id to a number. It is not guaranteed that the
-number is unique. In Linux returns the 'process number' of the current
-thread. That number is the same as one sees in 'top', for example. In Linux
-the thread id is not the same as one sees in 'top'.
-@return	process id as a number */
-UNIV_INTERN
+/** Whether to use large pages in the buffer pool */
+extern my_bool	os_use_large_pages;
+
+/** Large page size. This may be a boot-time option on some platforms */
+extern uint	os_large_page_size;
+
+/** Converts the current process id to a number.
+@return process id as a number */
 ulint
 os_proc_get_number(void);
-/*====================*/
-/****************************************************************//**
-Allocates large pages memory.
-@return	allocated memory */
-UNIV_INTERN
+
+/** Allocates large pages memory.
+@param[in,out]	n	Number of bytes to allocate
+@return allocated memory */
 void*
 os_mem_alloc_large(
-/*===============*/
-	ulint*	n);			/*!< in/out: number of bytes */
-/****************************************************************//**
-Frees large pages memory. */
-UNIV_INTERN
+	ulint*	n);
+
+/** Frees large pages memory.
+@param[in]	ptr	pointer returned by os_mem_alloc_large()
+@param[in]	size	size returned by os_mem_alloc_large() */
 void
 os_mem_free_large(
-/*==============*/
-	void	*ptr,			/*!< in: pointer returned by
-					os_mem_alloc_large() */
-	ulint	size);			/*!< in: size returned by
-					os_mem_alloc_large() */
+	void	*ptr,
+	ulint	size);
 
 #ifndef UNIV_NONINL
 #include "os0proc.ic"
diff --git a/storage/innobase/include/os0sync.h b/storage/innobase/include/os0sync.h
deleted file mode 100644
index 0754210c47a..00000000000
--- a/storage/innobase/include/os0sync.h
+++ /dev/null
@@ -1,899 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2008, Google Inc.
-
-Portions of this file contain modifications contributed and copyrighted by
-Google, Inc. Those modifications are gratefully acknowledged and are described
-briefly in the InnoDB documentation. The contributions by Google are
-incorporated with their permission, and subject to the conditions contained in
-the file COPYING.Google.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/os0sync.h
-The interface to the operating system
-synchronization primitives.
-
-Created 9/6/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef os0sync_h
-#define os0sync_h
-
-#include "univ.i"
-#include "ut0lst.h"
-#include "sync0types.h"
-
-#if defined __i386__ || defined __x86_64__ || defined _M_IX86 \
-    || defined _M_X64 || defined __WIN__
-
-#define IB_STRONG_MEMORY_MODEL
-
-#endif /* __i386__ || __x86_64__ || _M_IX86 || _M_X64 || __WIN__ */
-
-#ifdef HAVE_WINDOWS_ATOMICS
-typedef LONG lock_word_t;	/*!< On Windows, InterlockedExchange operates
-				on LONG variable */
-#elif defined(HAVE_ATOMIC_BUILTINS) && !defined(HAVE_ATOMIC_BUILTINS_BYTE)
-typedef ulint lock_word_t;
-#else
-
-#define IB_LOCK_WORD_IS_BYTE
-
-typedef byte lock_word_t;
-
-#endif /* HAVE_WINDOWS_ATOMICS */
-
-#ifdef __WIN__
-/** Native event (slow)*/
-typedef HANDLE			os_native_event_t;
-/** Native mutex */
-typedef CRITICAL_SECTION	fast_mutex_t;
-/** Native condition variable. */
-typedef CONDITION_VARIABLE	os_cond_t;
-#else
-/** Native mutex */
-typedef pthread_mutex_t		fast_mutex_t;
-/** Native condition variable */
-typedef pthread_cond_t		os_cond_t;
-#endif
-
-/** Structure that includes Performance Schema Probe pfs_psi
-in the os_fast_mutex structure if UNIV_PFS_MUTEX is defined */
-struct os_fast_mutex_t {
-	fast_mutex_t		mutex;	/*!< os_fast_mutex */
-#ifdef UNIV_PFS_MUTEX
-	struct PSI_mutex*	pfs_psi;/*!< The performance schema
-					instrumentation hook */
-#endif
-};
-
-/** Operating system event handle */
-typedef struct os_event*	os_event_t;
-
-/** An asynchronous signal sent between threads */
-struct os_event {
-#ifdef __WIN__
-	HANDLE		handle;		/*!< kernel event object, slow,
-					used on older Windows */
-#endif
-	os_fast_mutex_t	os_mutex;	/*!< this mutex protects the next
-					fields */
-	ibool		is_set;		/*!< this is TRUE when the event is
-					in the signaled state, i.e., a thread
-					does not stop if it tries to wait for
-					this event */
-	ib_int64_t	signal_count;	/*!< this is incremented each time
-					the event becomes signaled */
-	os_cond_t	cond_var;	/*!< condition variable is used in
-					waiting for the event */
-	UT_LIST_NODE_T(os_event_t) os_event_list;
-					/*!< list of all created events */
-};
-
-/** Denotes an infinite delay for os_event_wait_time() */
-#define OS_SYNC_INFINITE_TIME   ULINT_UNDEFINED
-
-/** Return value of os_event_wait_time() when the time is exceeded */
-#define OS_SYNC_TIME_EXCEEDED   1
-
-/** Operating system mutex handle */
-typedef struct os_mutex_t*	os_ib_mutex_t;
-
-/** Mutex protecting counts and the event and OS 'slow' mutex lists */
-extern os_ib_mutex_t	os_sync_mutex;
-
-/** This is incremented by 1 in os_thread_create and decremented by 1 in
-os_thread_exit */
-extern ulint		os_thread_count;
-
-extern ulint		os_event_count;
-extern ulint		os_mutex_count;
-extern ulint		os_fast_mutex_count;
-
-/*********************************************************//**
-Initializes global event and OS 'slow' mutex lists. */
-UNIV_INTERN
-void
-os_sync_init(void);
-/*==============*/
-/*********************************************************//**
-Frees created events and OS 'slow' mutexes. */
-UNIV_INTERN
-void
-os_sync_free(void);
-/*==============*/
-/*********************************************************//**
-Creates an event semaphore, i.e., a semaphore which may just have two states:
-signaled and nonsignaled. The created event is manual reset: it must be reset
-explicitly by calling sync_os_reset_event.
-@return	the event handle */
-UNIV_INTERN
-os_event_t
-os_event_create(void);
-/*==================*/
-/**********************************************************//**
-Sets an event semaphore to the signaled state: lets waiting threads
-proceed. */
-UNIV_INTERN
-void
-os_event_set(
-/*=========*/
-	os_event_t	event);	/*!< in: event to set */
-/**********************************************************//**
-Resets an event semaphore to the nonsignaled state. Waiting threads will
-stop to wait for the event.
-The return value should be passed to os_even_wait_low() if it is desired
-that this thread should not wait in case of an intervening call to
-os_event_set() between this os_event_reset() and the
-os_event_wait_low() call. See comments for os_event_wait_low(). */
-UNIV_INTERN
-ib_int64_t
-os_event_reset(
-/*===========*/
-	os_event_t	event);	/*!< in: event to reset */
-/**********************************************************//**
-Frees an event object. */
-UNIV_INTERN
-void
-os_event_free(
-/*==========*/
-	os_event_t	event);	/*!< in: event to free */
-
-/**********************************************************//**
-Waits for an event object until it is in the signaled state.
-
-Typically, if the event has been signalled after the os_event_reset()
-we'll return immediately because event->is_set == TRUE.
-There are, however, situations (e.g.: sync_array code) where we may
-lose this information. For example:
-
-thread A calls os_event_reset()
-thread B calls os_event_set()   [event->is_set == TRUE]
-thread C calls os_event_reset() [event->is_set == FALSE]
-thread A calls os_event_wait()  [infinite wait!]
-thread C calls os_event_wait()  [infinite wait!]
-
-Where such a scenario is possible, to avoid infinite wait, the
-value returned by os_event_reset() should be passed in as
-reset_sig_count. */
-UNIV_INTERN
-void
-os_event_wait_low(
-/*==============*/
-	os_event_t	event,		/*!< in: event to wait */
-	ib_int64_t	reset_sig_count);/*!< in: zero or the value
-					returned by previous call of
-					os_event_reset(). */
-
-#define os_event_wait(event) os_event_wait_low(event, 0)
-#define os_event_wait_time(event, t) os_event_wait_time_low(event, t, 0)
-
-/**********************************************************//**
-Waits for an event object until it is in the signaled state or
-a timeout is exceeded. In Unix the timeout is always infinite.
-@return 0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */
-UNIV_INTERN
-ulint
-os_event_wait_time_low(
-/*===================*/
-	os_event_t	event,			/*!< in: event to wait */
-	ulint		time_in_usec,		/*!< in: timeout in
-						microseconds, or
-						OS_SYNC_INFINITE_TIME */
-	ib_int64_t	reset_sig_count);	/*!< in: zero or the value
-						returned by previous call of
-						os_event_reset(). */
-/*********************************************************//**
-Creates an operating system mutex semaphore. Because these are slow, the
-mutex semaphore of InnoDB itself (ib_mutex_t) should be used where possible.
-@return	the mutex handle */
-UNIV_INTERN
-os_ib_mutex_t
-os_mutex_create(void);
-/*=================*/
-/**********************************************************//**
-Acquires ownership of a mutex semaphore. */
-UNIV_INTERN
-void
-os_mutex_enter(
-/*===========*/
-	os_ib_mutex_t	mutex);	/*!< in: mutex to acquire */
-/**********************************************************//**
-Releases ownership of a mutex. */
-UNIV_INTERN
-void
-os_mutex_exit(
-/*==========*/
-	os_ib_mutex_t	mutex);	/*!< in: mutex to release */
-/**********************************************************//**
-Frees an mutex object. */
-UNIV_INTERN
-void
-os_mutex_free(
-/*==========*/
-	os_ib_mutex_t	mutex);	/*!< in: mutex to free */
-/**********************************************************//**
-Acquires ownership of a fast mutex. Currently in Windows this is the same
-as os_fast_mutex_lock!
-@return	0 if success, != 0 if was reserved by another thread */
-UNIV_INLINE
-ulint
-os_fast_mutex_trylock(
-/*==================*/
-	os_fast_mutex_t*	fast_mutex);	/*!< in: mutex to acquire */
-
-/**********************************************************************
-Following os_fast_ mutex APIs would be performance schema instrumented:
-
-os_fast_mutex_init
-os_fast_mutex_lock
-os_fast_mutex_unlock
-os_fast_mutex_free
-
-These mutex APIs will point to corresponding wrapper functions that contain
-the performance schema instrumentation.
-
-NOTE! The following macro should be used in mutex operation, not the
-corresponding function. */
-
-#ifdef UNIV_PFS_MUTEX
-# define os_fast_mutex_init(K, M)			\
-	pfs_os_fast_mutex_init(K, M)
-
-# define os_fast_mutex_lock(M)				\
-	pfs_os_fast_mutex_lock(M, __FILE__, __LINE__)
-
-# define os_fast_mutex_unlock(M)	pfs_os_fast_mutex_unlock(M)
-
-# define os_fast_mutex_free(M)		pfs_os_fast_mutex_free(M)
-
-/*********************************************************//**
-NOTE! Please use the corresponding macro os_fast_mutex_init(), not directly
-this function!
-A wrapper function for os_fast_mutex_init_func(). Initializes an operating
-system fast mutex semaphore. */
-UNIV_INLINE
-void
-pfs_os_fast_mutex_init(
-/*===================*/
-	PSI_mutex_key		key,		/*!< in: Performance Schema
-						key */
-	os_fast_mutex_t*	fast_mutex);	/*!< out: fast mutex */
-/**********************************************************//**
-NOTE! Please use the corresponding macro os_fast_mutex_free(), not directly
-this function!
-Wrapper function for pfs_os_fast_mutex_free(). Also destroys the performance
-schema probes when freeing the mutex */
-UNIV_INLINE
-void
-pfs_os_fast_mutex_free(
-/*===================*/
-	os_fast_mutex_t*	fast_mutex);	/*!< in/out: mutex to free */
-/**********************************************************//**
-NOTE! Please use the corresponding macro os_fast_mutex_lock, not directly
-this function!
-Wrapper function of os_fast_mutex_lock. Acquires ownership of a fast mutex. */
-UNIV_INLINE
-void
-pfs_os_fast_mutex_lock(
-/*===================*/
-	os_fast_mutex_t*	fast_mutex,	/*!< in/out: mutex to acquire */
-	const char*		file_name,	/*!< in: file name where
-						 locked */
-	ulint			line);		/*!< in: line where locked */
-/**********************************************************//**
-NOTE! Please use the corresponding macro os_fast_mutex_unlock, not directly
-this function!
-Wrapper function of os_fast_mutex_unlock. Releases ownership of a fast mutex. */
-UNIV_INLINE
-void
-pfs_os_fast_mutex_unlock(
-/*=====================*/
-	os_fast_mutex_t*	fast_mutex);	/*!< in/out: mutex to release */
-
-#else /* UNIV_PFS_MUTEX */
-
-# define os_fast_mutex_init(K, M)			\
-	os_fast_mutex_init_func(&((os_fast_mutex_t*)(M))->mutex)
-
-# define os_fast_mutex_lock(M)				\
-	os_fast_mutex_lock_func(&((os_fast_mutex_t*)(M))->mutex)
-
-# define os_fast_mutex_unlock(M)			\
-	os_fast_mutex_unlock_func(&((os_fast_mutex_t*)(M))->mutex)
-
-# define os_fast_mutex_free(M)				\
-	os_fast_mutex_free_func(&((os_fast_mutex_t*)(M))->mutex)
-#endif /* UNIV_PFS_MUTEX */
-
-/**********************************************************//**
-Acquires ownership of a fast mutex. Implies a full memory barrier even on
-platforms such as PowerPC where this is not normally required.
-@return	0 if success, != 0 if was reserved by another thread */
-UNIV_INLINE
-ulint
-os_fast_mutex_trylock_full_barrier(
-/*==================*/
-	os_fast_mutex_t*	fast_mutex);	/*!< in: mutex to acquire */
-/**********************************************************//**
-Releases ownership of a fast mutex. */
-UNIV_INTERN
-void
-os_fast_mutex_unlock_func(
-/*======================*/
-	fast_mutex_t*		fast_mutex);	/*!< in: mutex to release */
-/**********************************************************//**
-Releases ownership of a fast mutex. Implies a full memory barrier even on
-platforms such as PowerPC where this is not normally required. */
-UNIV_INTERN
-void
-os_fast_mutex_unlock_full_barrier(
-/*=================*/
-	os_fast_mutex_t*	fast_mutex);	/*!< in: mutex to release */
-/*********************************************************//**
-Initializes an operating system fast mutex semaphore. */
-UNIV_INTERN
-void
-os_fast_mutex_init_func(
-/*====================*/
-	fast_mutex_t*		fast_mutex);	/*!< in: fast mutex */
-/**********************************************************//**
-Acquires ownership of a fast mutex. */
-UNIV_INTERN
-void
-os_fast_mutex_lock_func(
-/*====================*/
-	fast_mutex_t*		fast_mutex);	/*!< in: mutex to acquire */
-/**********************************************************//**
-Frees an mutex object. */
-UNIV_INTERN
-void
-os_fast_mutex_free_func(
-/*====================*/
-	fast_mutex_t*		fast_mutex);	/*!< in: mutex to free */
-
-/**********************************************************//**
-Atomic compare-and-swap and increment for InnoDB. */
-
-#if defined(HAVE_IB_GCC_ATOMIC_BUILTINS)
-
-# define HAVE_ATOMIC_BUILTINS
-
-# ifdef HAVE_IB_GCC_ATOMIC_BUILTINS_BYTE
-#  define HAVE_ATOMIC_BUILTINS_BYTE
-# endif
-
-# ifdef HAVE_IB_GCC_ATOMIC_BUILTINS_64
-#  define HAVE_ATOMIC_BUILTINS_64
-# endif
-
-/**********************************************************//**
-Returns true if swapped, ptr is pointer to target, old_val is value to
-compare to, new_val is the value to swap in. */
-
-# define os_compare_and_swap(ptr, old_val, new_val) \
-	__sync_bool_compare_and_swap(ptr, old_val, new_val)
-
-# define os_compare_and_swap_ulint(ptr, old_val, new_val) \
-	os_compare_and_swap(ptr, old_val, new_val)
-
-# define os_compare_and_swap_lint(ptr, old_val, new_val) \
-	os_compare_and_swap(ptr, old_val, new_val)
-
-#  define os_compare_and_swap_uint32(ptr, old_val, new_val) \
-	os_compare_and_swap(ptr, old_val, new_val)
-
-# ifdef HAVE_IB_ATOMIC_PTHREAD_T_GCC
-#  define os_compare_and_swap_thread_id(ptr, old_val, new_val) \
-	os_compare_and_swap(ptr, old_val, new_val)
-#  define INNODB_RW_LOCKS_USE_ATOMICS
-#  define IB_ATOMICS_STARTUP_MSG \
-	"Mutexes and rw_locks use GCC atomic builtins"
-# else /* HAVE_IB_ATOMIC_PTHREAD_T_GCC */
-#  define IB_ATOMICS_STARTUP_MSG \
-	"Mutexes use GCC atomic builtins, rw_locks do not"
-# endif /* HAVE_IB_ATOMIC_PTHREAD_T_GCC */
-
-/**********************************************************//**
-Returns the resulting value, ptr is pointer to target, amount is the
-amount of increment. */
-
-# define os_atomic_increment(ptr, amount) \
-	__sync_add_and_fetch(ptr, amount)
-
-# define os_atomic_increment_lint(ptr, amount) \
-	os_atomic_increment(ptr, amount)
-
-# define os_atomic_increment_uint32(ptr, amount ) \
-	os_atomic_increment(ptr, amount)
-
-# define os_atomic_increment_ulint(ptr, amount) \
-	os_atomic_increment(ptr, amount)
-
-# define os_atomic_increment_uint64(ptr, amount) \
-	os_atomic_increment(ptr, amount)
-
-/* Returns the resulting value, ptr is pointer to target, amount is the
-amount to decrement. */
-
-# define os_atomic_decrement(ptr, amount) \
-	__sync_sub_and_fetch(ptr, amount)
-
-# define os_atomic_decrement_uint32(ptr, amount) \
-	os_atomic_decrement(ptr, amount)
-
-# define os_atomic_decrement_lint(ptr, amount) \
-	os_atomic_decrement(ptr, amount)
-
-# define os_atomic_decrement_ulint(ptr, amount) \
-	os_atomic_decrement(ptr, amount)
-
-# define os_atomic_decrement_uint64(ptr, amount) \
-	os_atomic_decrement(ptr, amount)
-
-# if defined(HAVE_ATOMIC_BUILTINS)
-
-/** Do an atomic test and set.
-@param[in,out]	ptr		Memory location to set to non-zero
-@return the previous value */
-inline
-lock_word_t
-os_atomic_test_and_set(volatile lock_word_t* ptr)
-{
-	return(__sync_lock_test_and_set(ptr, 1));
-}
-
-/** Do an atomic release.
-@param[in,out]	ptr		Memory location to write to
-@return the previous value */
-inline
-void
-os_atomic_clear(volatile lock_word_t* ptr)
-{
-	__sync_lock_release(ptr);
-}
-
-# elif defined(HAVE_IB_GCC_ATOMIC_TEST_AND_SET)
-
-/** Do an atomic test-and-set.
-@param[in,out]	ptr		Memory location to set to non-zero
-@return the previous value */
-inline
-lock_word_t
-os_atomic_test_and_set(volatile lock_word_t* ptr)
-{
-       return(__atomic_test_and_set(ptr, __ATOMIC_ACQUIRE));
-}
-
-/** Do an atomic clear.
-@param[in,out]	ptr		Memory location to set to zero */
-inline
-void
-os_atomic_clear(volatile lock_word_t* ptr)
-{
-	__atomic_clear(ptr, __ATOMIC_RELEASE);
-}
-
-# else
-
-#  error "Unsupported platform"
-
-# endif /* HAVE_IB_GCC_ATOMIC_TEST_AND_SET */
-
-#if defined(__powerpc__) || defined(__aarch64__)
-/*
-  os_atomic_test_and_set_byte_release() should imply a release barrier before
-  setting, and a full barrier after. But __sync_lock_test_and_set() is only
-  documented as an aquire barrier. So on PowerPC we need to add the full
-  barrier explicitly.  */
-# define os_atomic_test_and_set_byte_release(ptr, new_val) \
-        do { __sync_lock_release(ptr); \
-             __sync_synchronize(); } while (0)
-#else
-/*
-  On x86, __sync_lock_test_and_set() happens to be full barrier, due to
-  LOCK prefix.
-*/
-# define os_atomic_test_and_set_byte_release(ptr, new_val) \
-	__sync_lock_test_and_set(ptr, (byte) new_val)
-#endif
-/*
-  os_atomic_test_and_set_byte_acquire() is a full memory barrier on x86. But
-  in general, just an aquire barrier should be sufficient. */
-# define os_atomic_test_and_set_byte_acquire(ptr, new_val) \
-	__sync_lock_test_and_set(ptr, (byte) new_val)
-
-#elif defined(HAVE_IB_SOLARIS_ATOMICS)
-
-# define HAVE_ATOMIC_BUILTINS
-# define HAVE_ATOMIC_BUILTINS_BYTE
-# define HAVE_ATOMIC_BUILTINS_64
-
-/* If not compiling with GCC or GCC doesn't support the atomic
-intrinsics and running on Solaris >= 10 use Solaris atomics */
-
-# include <atomic.h>
-
-/**********************************************************//**
-Returns true if swapped, ptr is pointer to target, old_val is value to
-compare to, new_val is the value to swap in. */
-
-# define os_compare_and_swap_uint32(ptr, old_val, new_val) \
-	(atomic_cas_32(ptr, old_val, new_val) == old_val)
-
-# define os_compare_and_swap_ulint(ptr, old_val, new_val) \
-	(atomic_cas_ulong(ptr, old_val, new_val) == old_val)
-
-# define os_compare_and_swap_lint(ptr, old_val, new_val) \
-	((lint) atomic_cas_ulong((ulong_t*) ptr, old_val, new_val) == old_val)
-
-# ifdef HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS
-#  if SIZEOF_PTHREAD_T == 4
-#   define os_compare_and_swap_thread_id(ptr, old_val, new_val) \
-	((pthread_t) atomic_cas_32(ptr, old_val, new_val) == old_val)
-#  elif SIZEOF_PTHREAD_T == 8
-#   define os_compare_and_swap_thread_id(ptr, old_val, new_val) \
-	((pthread_t) atomic_cas_64(ptr, old_val, new_val) == old_val)
-#  else
-#   error "SIZEOF_PTHREAD_T != 4 or 8"
-#  endif /* SIZEOF_PTHREAD_T CHECK */
-#  define INNODB_RW_LOCKS_USE_ATOMICS
-#  define IB_ATOMICS_STARTUP_MSG \
-	"Mutexes and rw_locks use Solaris atomic functions"
-# else /* HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS */
-#  define IB_ATOMICS_STARTUP_MSG \
-	"Mutexes use Solaris atomic functions, rw_locks do not"
-# endif /* HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS */
-
-/**********************************************************//**
-Returns the resulting value, ptr is pointer to target, amount is the
-amount of increment. */
-
-# define os_atomic_increment_uint32(ptr, amount) \
-	atomic_add_32_nv(ptr, amount)
-
-# define os_atomic_increment_ulint(ptr, amount) \
-	atomic_add_long_nv(ptr, amount)
-
-# define os_atomic_increment_lint(ptr, amount) \
-	os_atomic_increment_ulint((ulong_t*) ptr, amount)
-
-# define os_atomic_increment_uint64(ptr, amount) \
-	atomic_add_64_nv(ptr, amount)
-
-/* Returns the resulting value, ptr is pointer to target, amount is the
-amount to decrement. */
-
-# define os_atomic_decrement_uint32(ptr, amount) \
-	os_atomic_increment_uint32(ptr, -(amount))
-
-# define os_atomic_decrement_lint(ptr, amount) \
-	os_atomic_increment_ulint((ulong_t*) ptr, -(amount))
-
-# define os_atomic_decrement_ulint(ptr, amount) \
-	os_atomic_increment_ulint(ptr, -(amount))
-
-# define os_atomic_decrement_uint64(ptr, amount) \
-	os_atomic_increment_uint64(ptr, -(amount))
-
-# ifdef IB_LOCK_WORD_IS_BYTE
-
-/** Do an atomic xchg and set to non-zero.
-@param[in,out]	ptr		Memory location to set to non-zero
-@return the previous value */
-inline
-lock_word_t
-os_atomic_test_and_set(volatile lock_word_t* ptr)
-{
-	return(atomic_swap_uchar(ptr, 1));
-}
-
-/** Do an atomic xchg and set to zero.
-@param[in,out]	ptr		Memory location to set to zero
-@return the previous value */
-inline
-lock_word_t
-os_atomic_clear(volatile lock_word_t* ptr)
-{
-	return(atomic_swap_uchar(ptr, 0));
-}
-
-# else
-
-/** Do an atomic xchg and set to non-zero.
-@param[in,out]	ptr		Memory location to set to non-zero
-@return the previous value */
-inline
-lock_word_t
-os_atomic_test_and_set(volatile lock_word_t* ptr)
-{
-	return(atomic_swap_ulong(ptr, 1));
-}
-
-/** Do an atomic xchg and set to zero.
-@param[in,out]	ptr		Memory location to set to zero
-@return the previous value */
-inline
-lock_word_t
-os_atomic_clear(volatile lock_word_t* ptr)
-{
-	return(atomic_swap_ulong(ptr, 0));
-}
-
-# endif /* IB_LOCK_WORD_IS_BYTE */
-
-# define os_atomic_test_and_set_byte_acquire(ptr, new_val) \
-	atomic_swap_uchar(ptr, new_val)
-
-# define os_atomic_test_and_set_byte_release(ptr, new_val) \
-	atomic_swap_uchar(ptr, new_val)
-
-#elif defined(HAVE_WINDOWS_ATOMICS)
-
-# define HAVE_ATOMIC_BUILTINS
-# define HAVE_ATOMIC_BUILTINS_BYTE
-
-# ifndef _WIN32
-#  define HAVE_ATOMIC_BUILTINS_64
-# endif
-
-/**********************************************************//**
-Atomic compare and exchange of signed integers (both 32 and 64 bit).
-@return value found before the exchange.
-If it is not equal to old_value the exchange did not happen. */
-UNIV_INLINE
-lint
-win_cmp_and_xchg_lint(
-/*==================*/
-	volatile lint*	ptr,		/*!< in/out: source/destination */
-	lint		new_val,	/*!< in: exchange value */
-	lint		old_val);	/*!< in: value to compare to */
-
-/**********************************************************//**
-Atomic addition of signed integers.
-@return Initial value of the variable pointed to by ptr */
-UNIV_INLINE
-lint
-win_xchg_and_add(
-/*=============*/
-	volatile lint*	ptr,	/*!< in/out: address of destination */
-	lint		val);	/*!< in: number to be added */
-
-/**********************************************************//**
-Atomic compare and exchange of unsigned integers.
-@return value found before the exchange.
-If it is not equal to old_value the exchange did not happen. */
-UNIV_INLINE
-ulint
-win_cmp_and_xchg_ulint(
-/*===================*/
-	volatile ulint*	ptr,		/*!< in/out: source/destination */
-	ulint		new_val,	/*!< in: exchange value */
-	ulint		old_val);	/*!< in: value to compare to */
-
-/**********************************************************//**
-Atomic compare and exchange of 32 bit unsigned integers.
-@return value found before the exchange.
-If it is not equal to old_value the exchange did not happen. */
-UNIV_INLINE
-DWORD
-win_cmp_and_xchg_dword(
-/*===================*/
-	volatile DWORD*	ptr,		/*!< in/out: source/destination */
-	DWORD		new_val,	/*!< in: exchange value */
-	DWORD		old_val);	/*!< in: value to compare to */
-
-/**********************************************************//**
-Returns true if swapped, ptr is pointer to target, old_val is value to
-compare to, new_val is the value to swap in. */
-
-# define os_compare_and_swap_uint32(ptr, old_val, new_val) \
-	(InterlockedCompareExchange(reinterpret_cast<volatile long*>(ptr), \
-				    new_val, old_val) == old_val)
-
-# define os_compare_and_swap_ulint(ptr, old_val, new_val) \
-	(win_cmp_and_xchg_ulint(ptr, new_val, old_val) == old_val)
-
-# define os_compare_and_swap_lint(ptr, old_val, new_val) \
-	(win_cmp_and_xchg_lint(ptr, new_val, old_val) == old_val)
-
-/* windows thread objects can always be passed to windows atomic functions */
-# define os_compare_and_swap_thread_id(ptr, old_val, new_val) \
-	(win_cmp_and_xchg_dword(ptr, new_val, old_val) == old_val)
-
-# define INNODB_RW_LOCKS_USE_ATOMICS
-# define IB_ATOMICS_STARTUP_MSG \
-	"Mutexes and rw_locks use Windows interlocked functions"
-
-/**********************************************************//**
-Returns the resulting value, ptr is pointer to target, amount is the
-amount of increment. */
-
-# define os_atomic_increment_lint(ptr, amount) \
-	(win_xchg_and_add(ptr, amount) + amount)
-
-# define os_atomic_increment_uint32(ptr, amount) \
-	((ulint) InterlockedExchangeAdd((long*) ptr, amount))
-
-# define os_atomic_increment_ulint(ptr, amount) \
-	((ulint) (win_xchg_and_add((lint*) ptr, (lint) amount) + amount))
-
-# define os_atomic_increment_uint64(ptr, amount)		\
-	((ib_uint64_t) (InterlockedExchangeAdd64(		\
-				(ib_int64_t*) ptr,		\
-				(ib_int64_t) amount) + amount))
-
-/**********************************************************//**
-Returns the resulting value, ptr is pointer to target, amount is the
-amount to decrement. There is no atomic substract function on Windows */
-
-# define os_atomic_decrement_uint32(ptr, amount) \
-	((ulint) InterlockedExchangeAdd((long*) ptr, (-amount)))
-
-# define os_atomic_decrement_lint(ptr, amount) \
-	(win_xchg_and_add(ptr, -(lint) amount) - amount)
-
-# define os_atomic_decrement_ulint(ptr, amount) \
-	((ulint) (win_xchg_and_add((lint*) ptr, -(lint) amount) - amount))
-
-# define os_atomic_decrement_uint64(ptr, amount)		\
-	((ib_uint64_t) (InterlockedExchangeAdd64(		\
-				(ib_int64_t*) ptr,		\
-				-(ib_int64_t) amount) - amount))
-
-/** Do an atomic test and set.
-InterlockedExchange() operates on LONG, and the LONG will be clobbered
-@param[in,out]	ptr		Memory location to set to non-zero
-@return the previous value */
-inline
-lock_word_t
-os_atomic_test_and_set(volatile lock_word_t* ptr)
-{
-	return(InterlockedExchange(ptr, 1));
-}
-
-/** Do an atomic release.
-InterlockedExchange() operates on LONG, and the LONG will be clobbered
-@param[in,out]	ptr		Memory location to set to zero
-@return the previous value */
-inline
-lock_word_t
-os_atomic_clear(volatile lock_word_t* ptr)
-{
-	return(InterlockedExchange(ptr, 0));
-}
-
-# define os_atomic_lock_release_byte(ptr) \
-	(void) InterlockedExchange(ptr, 0)
-
-#else
-# define IB_ATOMICS_STARTUP_MSG \
-	"Mutexes and rw_locks use InnoDB's own implementation"
-#endif
-#ifdef HAVE_ATOMIC_BUILTINS
-#define os_atomic_inc_ulint(m,v,d)	os_atomic_increment_ulint(v, d)
-#define os_atomic_dec_ulint(m,v,d)	os_atomic_decrement_ulint(v, d)
-#else
-#define os_atomic_inc_ulint(m,v,d)	os_atomic_inc_ulint_func(m, v, d)
-#define os_atomic_dec_ulint(m,v,d)	os_atomic_dec_ulint_func(m, v, d)
-#endif /* HAVE_ATOMIC_BUILTINS */
-
-/**********************************************************//**
-Following macros are used to update specified counter atomically
-if HAVE_ATOMIC_BUILTINS defined. Otherwise, use mutex passed in
-for synchronization */
-#ifdef HAVE_ATOMIC_BUILTINS
-#define os_increment_counter_by_amount(mutex, counter, amount)	\
-	(void) os_atomic_increment_ulint(&counter, amount)
-
-#define os_decrement_counter_by_amount(mutex, counter, amount)	\
-	(void) os_atomic_increment_ulint(&counter, (-((lint) amount)))
-#else
-#define os_increment_counter_by_amount(mutex, counter, amount)	\
-	do {							\
-		mutex_enter(&(mutex));				\
-		(counter) += (amount);				\
-		mutex_exit(&(mutex));				\
-	} while (0)
-
-#define os_decrement_counter_by_amount(mutex, counter, amount)	\
-	do {							\
-		ut_a(counter >= amount);			\
-		mutex_enter(&(mutex));				\
-		(counter) -= (amount);				\
-		mutex_exit(&(mutex));				\
-	} while (0)
-#endif  /* HAVE_ATOMIC_BUILTINS */
-
-#define os_inc_counter(mutex, counter)				\
-	os_increment_counter_by_amount(mutex, counter, 1)
-
-#define os_dec_counter(mutex, counter)				\
-	do {							\
-		os_decrement_counter_by_amount(mutex, counter, 1);\
-	} while (0);
-
-/** barrier definitions for memory ordering */
-#if defined(HAVE_IB_GCC_ATOMIC_THREAD_FENCE)
-# define HAVE_MEMORY_BARRIER
-# define os_rmb	__atomic_thread_fence(__ATOMIC_ACQUIRE)
-# define os_wmb	__atomic_thread_fence(__ATOMIC_RELEASE)
-# define os_mb __atomic_thread_fence(__ATOMIC_SEQ_CST)
-
-# define IB_MEMORY_BARRIER_STARTUP_MSG \
-	"GCC builtin __atomic_thread_fence() is used for memory barrier"
-
-#elif defined(HAVE_IB_GCC_SYNC_SYNCHRONISE)
-# define HAVE_MEMORY_BARRIER
-# define os_rmb	__sync_synchronize()
-# define os_wmb	__sync_synchronize()
-# define os_mb	__sync_synchronize()
-# define IB_MEMORY_BARRIER_STARTUP_MSG \
-	"GCC builtin __sync_synchronize() is used for memory barrier"
-
-#elif defined(HAVE_IB_MACHINE_BARRIER_SOLARIS)
-# define HAVE_MEMORY_BARRIER
-# include <mbarrier.h>
-# define os_rmb	__machine_r_barrier()
-# define os_wmb	__machine_w_barrier()
-# define os_mb __machine_rw_barrier()
-# define IB_MEMORY_BARRIER_STARTUP_MSG \
-	"Solaris memory ordering functions are used for memory barrier"
-
-#elif defined(HAVE_WINDOWS_MM_FENCE)
-# define HAVE_MEMORY_BARRIER
-# include <intrin.h>
-# define os_rmb	_mm_lfence()
-# define os_wmb	_mm_sfence()
-# define os_mb	_mm_mfence()
-# define IB_MEMORY_BARRIER_STARTUP_MSG \
-	"_mm_lfence() and _mm_sfence() are used for memory barrier"
-
-#else
-# define os_rmb do { } while(0)
-# define os_wmb do { } while(0)
-# define os_mb do { } while(0)
-# define IB_MEMORY_BARRIER_STARTUP_MSG \
-	"Memory barrier is not used"
-#endif
-
-#ifndef UNIV_NONINL
-#include "os0sync.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/os0sync.ic b/storage/innobase/include/os0sync.ic
deleted file mode 100644
index 4ebf84dba98..00000000000
--- a/storage/innobase/include/os0sync.ic
+++ /dev/null
@@ -1,266 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/os0sync.ic
-The interface to the operating system synchronization primitives.
-
-Created 9/6/1995 Heikki Tuuri
-*******************************************************/
-
-#ifdef __WIN__
-#include <winbase.h>
-#endif
-
-/**********************************************************//**
-Acquires ownership of a fast mutex.
-@return	0 if success, != 0 if was reserved by another thread */
-UNIV_INLINE
-ulint
-os_fast_mutex_trylock(
-/*==================*/
-	os_fast_mutex_t*	fast_mutex)	/*!< in: mutex to acquire */
-{
-	fast_mutex_t*	mutex = &fast_mutex->mutex;
-
-#ifdef __WIN__
-	return(!TryEnterCriticalSection(mutex));
-#else
-	/* NOTE that the MySQL my_pthread.h redefines pthread_mutex_trylock
-	so that it returns 0 on success. In the operating system
-	libraries, HP-UX-10.20 follows the old Posix 1003.4a Draft 4 and
-	returns 1 on success (but MySQL remaps that to 0), while Linux,
-	FreeBSD, Solaris, AIX, Tru64 Unix, HP-UX-11.0 return 0 on success. */
-
-	return((ulint) pthread_mutex_trylock(mutex));
-#endif
-}
-
-#ifdef UNIV_PFS_MUTEX
-/*********************************************************//**
-NOTE! Please use the corresponding macro os_fast_mutex_init(), not directly
-this function!
-A wrapper function for os_fast_mutex_init_func(). Initializes an operating
-system fast mutex semaphore. */
-UNIV_INLINE
-void
-pfs_os_fast_mutex_init(
-/*===================*/
-	PSI_mutex_key		key,		/*!< in: Performance Schema
-						key */
-	os_fast_mutex_t*	fast_mutex)	/*!< out: fast mutex */
-{
-#ifdef HAVE_PSI_MUTEX_INTERFACE
-	fast_mutex->pfs_psi = PSI_MUTEX_CALL(init_mutex)(key, &fast_mutex->mutex);
-#else
-	fast_mutex->pfs_psi = NULL;
-#endif
-
-	os_fast_mutex_init_func(&fast_mutex->mutex);
-}
-/******************************************************************//**
-NOTE! Please use the corresponding macro os_fast_mutex_free(), not directly
-this function!
-Wrapper function for pfs_os_fast_mutex_free(). Also destroys the performance
-schema probes when freeing the mutex */
-UNIV_INLINE
-void
-pfs_os_fast_mutex_free(
-/*===================*/
-	os_fast_mutex_t*	fast_mutex)  /*!< in/out: mutex */
-{
-#ifdef HAVE_PSI_MUTEX_INTERFACE
-	if (fast_mutex->pfs_psi != NULL)
-		PSI_MUTEX_CALL(destroy_mutex)(fast_mutex->pfs_psi);
-#endif
-	fast_mutex->pfs_psi = NULL;
-
-	os_fast_mutex_free_func(&fast_mutex->mutex);
-}
-/**********************************************************//**
-NOTE! Please use the corresponding macro os_fast_mutex_lock, not directly
-this function!
-Wrapper function of os_fast_mutex_lock_func. Acquires ownership of a fast
-mutex. */
-UNIV_INLINE
-void
-pfs_os_fast_mutex_lock(
-/*===================*/
-	os_fast_mutex_t*	fast_mutex,	/*!< in/out: mutex to acquire */
-	const char*		file_name,	/*!< in: file name where
-						 locked */
-	ulint			line)		/*!< in: line where locked */
-{
-#ifdef HAVE_PSI_MUTEX_INTERFACE
-	if (fast_mutex->pfs_psi != NULL)
-	{
-		PSI_mutex_locker* 	locker;
-		PSI_mutex_locker_state	state;
-
-		locker = PSI_MUTEX_CALL(start_mutex_wait)(
-			&state, fast_mutex->pfs_psi,
-			PSI_MUTEX_LOCK, file_name,
-			static_cast<uint>(line));
-
-		os_fast_mutex_lock_func(&fast_mutex->mutex);
-
-		if (locker != NULL)
-			PSI_MUTEX_CALL(end_mutex_wait)(locker, 0);
-	}
-	else
-#endif
-	{
-		os_fast_mutex_lock_func(&fast_mutex->mutex);
-	}
-
-	return;
-}
-/**********************************************************//**
-NOTE! Please use the corresponding macro os_fast_mutex_unlock, not directly
-this function!
-Wrapper function of os_fast_mutex_unlock_func. Releases ownership of a
-fast mutex. */
-UNIV_INLINE
-void
-pfs_os_fast_mutex_unlock(
-/*=====================*/
-	os_fast_mutex_t*	fast_mutex)	/*!< in/out: mutex to release */
-{
-#ifdef HAVE_PSI_MUTEX_INTERFACE
-	if (fast_mutex->pfs_psi != NULL)
-		PSI_MUTEX_CALL(unlock_mutex)(fast_mutex->pfs_psi);
-#endif
-
-	os_fast_mutex_unlock_func(&fast_mutex->mutex);
-}
-#endif /* UNIV_PFS_MUTEX */
-
-#ifdef HAVE_WINDOWS_ATOMICS
-
-/* Use inline functions to make 64 and 32 bit versions of windows atomic
-functions so that typecasts are evaluated at compile time. Take advantage
-that lint is either __int64 or long int and windows atomic functions work
-on __int64 and LONG */
-
-/**********************************************************//**
-Atomic compare and exchange of unsigned integers.
-@return value found before the exchange.
-If it is not equal to old_value the exchange did not happen. */
-UNIV_INLINE
-lint
-win_cmp_and_xchg_lint(
-/*==================*/
-	volatile lint*	ptr,		/*!< in/out: source/destination */
-	lint		new_val,	/*!< in: exchange value */
-	lint		old_val)	/*!< in: value to compare to */
-{
-# ifdef _WIN64
-	return(InterlockedCompareExchange64(ptr, new_val, old_val));
-# else
-	return(InterlockedCompareExchange(ptr, new_val, old_val));
-# endif
-}
-
-/**********************************************************//**
-Atomic addition of signed integers.
-@return Initial value of the variable pointed to by ptr */
-UNIV_INLINE
-lint
-win_xchg_and_add(
-/*=============*/
-	volatile lint*	ptr,	/*!< in/out: address of destination */
-	lint		val)	/*!< in: number to be added */
-{
-#ifdef _WIN64
-	return(InterlockedExchangeAdd64(ptr, val));
-#else
-	return(InterlockedExchangeAdd(ptr, val));
-#endif
-}
-
-/**********************************************************//**
-Atomic compare and exchange of unsigned integers.
-@return value found before the exchange.
-If it is not equal to old_value the exchange did not happen. */
-UNIV_INLINE
-ulint
-win_cmp_and_xchg_ulint(
-/*===================*/
-	volatile ulint*	ptr,		/*!< in/out: source/destination */
-	ulint		new_val,	/*!< in: exchange value */
-	ulint		old_val)	/*!< in: value to compare to */
-{
-	return((ulint) win_cmp_and_xchg_lint(
-		(volatile lint*) ptr,
-		(lint) new_val,
-		(lint) old_val));
-}
-
-/**********************************************************//**
-Atomic compare and exchange of 32-bit unsigned integers.
-@return value found before the exchange.
-If it is not equal to old_value the exchange did not happen. */
-UNIV_INLINE
-DWORD
-win_cmp_and_xchg_dword(
-/*===================*/
-	volatile DWORD*	ptr,		/*!< in/out: source/destination */
-	DWORD		new_val,	/*!< in: exchange value */
-	DWORD		old_val)	/*!< in: value to compare to */
-{
-	ut_ad(sizeof(DWORD) == sizeof(LONG));	/* We assume this. */
-	return(InterlockedCompareExchange(
-		(volatile LONG*) ptr,
-		(LONG) new_val,
-		(LONG) old_val));
-}
-
-#endif /* HAVE_WINDOWS_ATOMICS */
-
-
-/**********************************************************//**
-Acquires ownership of a fast mutex. Implies a full memory barrier even on
-platforms such as PowerPC where this is not normally required.
-@return	0 if success, != 0 if was reserved by another thread */
-UNIV_INLINE
-ulint
-os_fast_mutex_trylock_full_barrier(
-/*==================*/
-	os_fast_mutex_t*	fast_mutex)	/*!< in: mutex to acquire */
-{
-#ifdef __WIN__
-	if (TryEnterCriticalSection(&fast_mutex->mutex)) {
-
-		return(0);
-	} else {
-
-		return(1);
-	}
-#else
-	/* NOTE that the MySQL my_pthread.h redefines pthread_mutex_trylock
-	so that it returns 0 on success. In the operating system
-	libraries, HP-UX-10.20 follows the old Posix 1003.4a Draft 4 and
-	returns 1 on success (but MySQL remaps that to 0), while Linux,
-	FreeBSD, Solaris, AIX, Tru64 Unix, HP-UX-11.0 return 0 on success. */
-
-#ifdef __powerpc__
-	os_mb;
-#endif
-	return((ulint) pthread_mutex_trylock(&fast_mutex->mutex));
-#endif
-}
diff --git a/storage/innobase/include/os0thread.h b/storage/innobase/include/os0thread.h
index 9a1ada8fa0d..9ba35b6f359 100644
--- a/storage/innobase/include/os0thread.h
+++ b/storage/innobase/include/os0thread.h
@@ -41,8 +41,8 @@ can wait inside InnoDB */
 #define OS_THREAD_PRIORITY_NORMAL	2
 #define OS_THREAD_PRIORITY_ABOVE_NORMAL	3
 
-#ifdef __WIN__
-typedef void*			os_thread_t;
+#ifdef _WIN32
+typedef DWORD			os_thread_t;
 typedef DWORD			os_thread_id_t;	/*!< In Windows the thread id
 						is an unsigned long int */
 extern "C"  {
@@ -62,7 +62,7 @@ don't access the arguments and don't return any value, we should be safe. */
 #else
 
 typedef pthread_t		os_thread_t;
-typedef os_thread_t		os_thread_id_t;	/*!< In Unix we use the thread
+typedef pthread_t		os_thread_id_t;	/*!< In Unix we use the thread
 						handle itself as the id of
 						the thread */
 extern "C"  { typedef void*	(*os_thread_func_t)(void*); }
@@ -71,7 +71,7 @@ extern "C"  { typedef void*	(*os_thread_func_t)(void*); }
 #define DECLARE_THREAD(func)	func
 #define os_thread_create(f,a,i)	os_thread_create_func(f, a, i)
 
-#endif /* __WIN__ */
+#endif /* _WIN32 */
 
 /* Define a function pointer type to use in a typecast */
 typedef void* (*os_posix_f_t) (void*);
@@ -79,12 +79,14 @@ typedef void* (*os_posix_f_t) (void*);
 #ifdef HAVE_PSI_INTERFACE
 /* Define for performance schema registration key */
 typedef unsigned int    mysql_pfs_key_t;
-#endif
+#endif /* HAVE_PSI_INTERFACE */
+
+/** Number of threads active. */
+extern	ulint	os_thread_count;
 
 /***************************************************************//**
 Compares two thread ids for equality.
-@return	TRUE if equal */
-UNIV_INTERN
+@return TRUE if equal */
 ibool
 os_thread_eq(
 /*=========*/
@@ -93,20 +95,18 @@ os_thread_eq(
 /****************************************************************//**
 Converts an OS thread id to a ulint. It is NOT guaranteed that the ulint is
 unique for the thread though!
-@return	thread identifier as a number */
-UNIV_INTERN
+@return thread identifier as a number */
 ulint
 os_thread_pf(
 /*=========*/
 	os_thread_id_t	a);	/*!< in: OS thread identifier */
 /****************************************************************//**
 Creates a new thread of execution. The execution starts from
-the function given. The start function takes a void* parameter
-and returns a ulint.
+the function given.
 NOTE: We count the number of threads in os_thread_exit(). A created
-thread should always use that to exit and not use return() to exit.
-@return	handle to the thread */
-UNIV_INTERN
+thread should always use that to exit so thatthe thread count will be
+decremented.
+We do not return an error code because if there is one, we crash here. */
 os_thread_t
 os_thread_create_func(
 /*==================*/
@@ -117,36 +117,48 @@ os_thread_create_func(
 	os_thread_id_t*		thread_id);	/*!< out: id of the created
 						thread, or NULL */
 
-/*****************************************************************//**
-Exits the current thread. */
-UNIV_INTERN
+/** Exits the current thread. */
 void
-os_thread_exit(
-/*===========*/
-	void*	exit_value)	/*!< in: exit value; in Windows this void*
-				is cast as a DWORD */
+os_thread_exit()
 	UNIV_COLD MY_ATTRIBUTE((noreturn));
+
 /*****************************************************************//**
 Returns the thread identifier of current thread.
-@return	current thread identifier */
-UNIV_INTERN
+@return current thread identifier */
 os_thread_id_t
 os_thread_get_curr_id(void);
 /*========================*/
 /*****************************************************************//**
 Advises the os to give up remainder of the thread's time slice. */
-UNIV_INTERN
 void
 os_thread_yield(void);
 /*=================*/
 /*****************************************************************//**
 The thread sleeps at least the time given in microseconds. */
-UNIV_INTERN
 void
 os_thread_sleep(
 /*============*/
 	ulint	tm);	/*!< in: time in microseconds */
 
+/**
+Initializes OS thread management data structures. */
+void
+os_thread_init();
+/*============*/
+
+/**
+Frees OS thread management data structures. */
+void
+os_thread_free();
+/*============*/
+
+/*****************************************************************//**
+Check if there are threads active.
+@return true if the thread count > 0. */
+bool
+os_thread_active();
+/*==============*/
+
 #ifndef UNIV_NONINL
 #include "os0thread.ic"
 #endif
diff --git a/storage/innobase/include/page0cur.h b/storage/innobase/include/page0cur.h
index f04667ff29c..c111717d868 100644
--- a/storage/innobase/include/page0cur.h
+++ b/storage/innobase/include/page0cur.h
@@ -33,29 +33,15 @@ Created 10/4/1994 Heikki Tuuri
 #include "rem0rec.h"
 #include "data0data.h"
 #include "mtr0mtr.h"
+#include "gis0type.h"
 
 
 #define PAGE_CUR_ADAPT
 
-/* Page cursor search modes; the values must be in this order! */
-
-#define	PAGE_CUR_UNSUPP	0
-#define	PAGE_CUR_G	1
-#define	PAGE_CUR_GE	2
-#define	PAGE_CUR_L	3
-#define	PAGE_CUR_LE	4
-/*#define PAGE_CUR_LE_OR_EXTENDS 5*/ /* This is a search mode used in
-				 "column LIKE 'abc%' ORDER BY column DESC";
-				 we have to find strings which are <= 'abc' or
-				 which extend it */
-#ifdef UNIV_SEARCH_DEBUG
-# define PAGE_CUR_DBG	6	/* As PAGE_CUR_LE, but skips search shortcut */
-#endif /* UNIV_SEARCH_DEBUG */
-
 #ifdef UNIV_DEBUG
 /*********************************************************//**
 Gets pointer to the page frame where the cursor is positioned.
-@return	page */
+@return page */
 UNIV_INLINE
 page_t*
 page_cur_get_page(
@@ -63,7 +49,7 @@ page_cur_get_page(
 	page_cur_t*	cur);	/*!< in: page cursor */
 /*********************************************************//**
 Gets pointer to the buffer block where the cursor is positioned.
-@return	page */
+@return page */
 UNIV_INLINE
 buf_block_t*
 page_cur_get_block(
@@ -71,7 +57,7 @@ page_cur_get_block(
 	page_cur_t*	cur);	/*!< in: page cursor */
 /*********************************************************//**
 Gets pointer to the page frame where the cursor is positioned.
-@return	page */
+@return page */
 UNIV_INLINE
 page_zip_des_t*
 page_cur_get_page_zip(
@@ -79,7 +65,7 @@ page_cur_get_page_zip(
 	page_cur_t*	cur);	/*!< in: page cursor */
 /*********************************************************//**
 Gets the record where the cursor is positioned.
-@return	record */
+@return record */
 UNIV_INLINE
 rec_t*
 page_cur_get_rec(
@@ -111,7 +97,7 @@ page_cur_set_after_last(
 	page_cur_t*		cur);	/*!< in: cursor */
 /*********************************************************//**
 Returns TRUE if the cursor is before first user record on page.
-@return	TRUE if at start */
+@return TRUE if at start */
 UNIV_INLINE
 ibool
 page_cur_is_before_first(
@@ -119,7 +105,7 @@ page_cur_is_before_first(
 	const page_cur_t*	cur);	/*!< in: cursor */
 /*********************************************************//**
 Returns TRUE if the cursor is after last user record.
-@return	TRUE if at end */
+@return TRUE if at end */
 UNIV_INLINE
 ibool
 page_cur_is_after_last(
@@ -136,13 +122,6 @@ page_cur_position(
 					the record */
 	page_cur_t*		cur);	/*!< out: page cursor */
 /**********************************************************//**
-Invalidates a page cursor by setting the record pointer NULL. */
-UNIV_INLINE
-void
-page_cur_invalidate(
-/*================*/
-	page_cur_t*	cur);	/*!< out: page cursor */
-/**********************************************************//**
 Moves the cursor to the next record on page. */
 UNIV_INLINE
 void
@@ -168,7 +147,7 @@ if this is a compressed leaf page in a secondary index.
 This has to be done either within the same mini-transaction,
 or by invoking ibuf_reset_free_bits() before mtr_commit().
 
-@return	pointer to record if succeed, NULL otherwise */
+@return pointer to record if succeed, NULL otherwise */
 UNIV_INLINE
 rec_t*
 page_cur_tuple_insert(
@@ -179,7 +158,10 @@ page_cur_tuple_insert(
 	ulint**		offsets,/*!< out: offsets on *rec */
 	mem_heap_t**	heap,	/*!< in/out: pointer to memory heap, or NULL */
 	ulint		n_ext,	/*!< in: number of externally stored columns */
-	mtr_t*		mtr)	/*!< in: mini-transaction handle, or NULL */
+	mtr_t*		mtr,	/*!< in: mini-transaction handle, or NULL */
+	bool		use_cache = false)
+				/*!< in: if true, then use record cache to
+				hold the tuple converted record. */
 	MY_ATTRIBUTE((nonnull(1,2,3,4,5), warn_unused_result));
 #endif /* !UNIV_HOTBACKUP */
 /***********************************************************//**
@@ -193,7 +175,7 @@ if this is a compressed leaf page in a secondary index.
 This has to be done either within the same mini-transaction,
 or by invoking ibuf_reset_free_bits() before mtr_commit().
 
-@return	pointer to record if succeed, NULL otherwise */
+@return pointer to record if succeed, NULL otherwise */
 UNIV_INLINE
 rec_t*
 page_cur_rec_insert(
@@ -207,8 +189,7 @@ page_cur_rec_insert(
 Inserts a record next to page cursor on an uncompressed page.
 Returns pointer to inserted record if succeed, i.e., enough
 space available, NULL otherwise. The cursor stays at the same position.
-@return	pointer to record if succeed, NULL otherwise */
-UNIV_INTERN
+@return pointer to record if succeed, NULL otherwise */
 rec_t*
 page_cur_insert_rec_low(
 /*====================*/
@@ -219,6 +200,24 @@ page_cur_insert_rec_low(
 	ulint*		offsets,/*!< in/out: rec_get_offsets(rec, index) */
 	mtr_t*		mtr)	/*!< in: mini-transaction handle, or NULL */
 	MY_ATTRIBUTE((nonnull(1,2,3,4), warn_unused_result));
+
+/** Inserts a record next to page cursor on an uncompressed page.
+@param[in]	current_rec	pointer to current record after which
+				the new record is inserted.
+@param[in]	index		record descriptor
+@param[in]	tuple		pointer to a data tuple
+@param[in]	n_ext		number of externally stored columns
+@param[in]	mtr		mini-transaction handle, or NULL
+
+@return pointer to record if succeed, NULL otherwise */
+rec_t*
+page_cur_direct_insert_rec_low(
+	rec_t*		current_rec,
+	dict_index_t*	index,
+	const dtuple_t*	tuple,
+	ulint		n_ext,
+	mtr_t*		mtr);
+
 /***********************************************************//**
 Inserts a record next to page cursor on a compressed and uncompressed
 page. Returns pointer to inserted record if succeed, i.e.,
@@ -230,8 +229,7 @@ if this is a compressed leaf page in a secondary index.
 This has to be done either within the same mini-transaction,
 or by invoking ibuf_reset_free_bits() before mtr_commit().
 
-@return	pointer to record if succeed, NULL otherwise */
-UNIV_INTERN
+@return pointer to record if succeed, NULL otherwise */
 rec_t*
 page_cur_insert_rec_zip(
 /*====================*/
@@ -249,7 +247,6 @@ IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
 if this is a compressed leaf page in a secondary index.
 This has to be done either within the same mini-transaction,
 or by invoking ibuf_reset_free_bits() before mtr_commit(). */
-UNIV_INTERN
 void
 page_copy_rec_list_end_to_created_page(
 /*===================================*/
@@ -260,7 +257,6 @@ page_copy_rec_list_end_to_created_page(
 /***********************************************************//**
 Deletes a record at the page cursor. The cursor is moved to the
 next record after the deleted one. */
-UNIV_INTERN
 void
 page_cur_delete_rec(
 /*================*/
@@ -270,51 +266,83 @@ page_cur_delete_rec(
 					cursor->rec, index) */
 	mtr_t*			mtr);	/*!< in: mini-transaction handle */
 #ifndef UNIV_HOTBACKUP
-/****************************************************************//**
-Searches the right position for a page cursor.
-@return	number of matched fields on the left */
+/** Search the right position for a page cursor.
+@param[in] block buffer block
+@param[in] index index tree
+@param[in] tuple data tuple
+@param[in] mode PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G, or PAGE_CUR_GE
+@param[out] cursor page cursor
+@return number of matched fields on the left */
 UNIV_INLINE
 ulint
 page_cur_search(
-/*============*/
-	const buf_block_t*	block,	/*!< in: buffer block */
-	const dict_index_t*	index,	/*!< in: record descriptor */
-	const dtuple_t*		tuple,	/*!< in: data tuple */
-	ulint			mode,	/*!< in: PAGE_CUR_L,
-					PAGE_CUR_LE, PAGE_CUR_G, or
-					PAGE_CUR_GE */
-	page_cur_t*		cursor);/*!< out: page cursor */
+	const buf_block_t*	block,
+	const dict_index_t*	index,
+	const dtuple_t*		tuple,
+	page_cur_mode_t		mode,
+	page_cur_t*		cursor);
+
+/** Search the right position for a page cursor.
+@param[in] block buffer block
+@param[in] index index tree
+@param[in] tuple data tuple
+@param[out] cursor page cursor
+@return number of matched fields on the left */
+UNIV_INLINE
+ulint
+page_cur_search(
+	const buf_block_t*	block,
+	const dict_index_t*	index,
+	const dtuple_t*		tuple,
+	page_cur_t*		cursor);
+
 /****************************************************************//**
 Searches the right position for a page cursor. */
-UNIV_INTERN
 void
 page_cur_search_with_match(
 /*=======================*/
 	const buf_block_t*	block,	/*!< in: buffer block */
 	const dict_index_t*	index,	/*!< in: record descriptor */
 	const dtuple_t*		tuple,	/*!< in: data tuple */
-	ulint			mode,	/*!< in: PAGE_CUR_L,
+	page_cur_mode_t		mode,	/*!< in: PAGE_CUR_L,
 					PAGE_CUR_LE, PAGE_CUR_G, or
 					PAGE_CUR_GE */
 	ulint*			iup_matched_fields,
 					/*!< in/out: already matched
 					fields in upper limit record */
-	ulint*			iup_matched_bytes,
-					/*!< in/out: already matched
-					bytes in a field not yet
-					completely matched */
 	ulint*			ilow_matched_fields,
 					/*!< in/out: already matched
 					fields in lower limit record */
+	page_cur_t*		cursor,	/*!< out: page cursor */
+	rtr_info_t*		rtr_info);/*!< in/out: rtree search stack */
+/** Search the right position for a page cursor.
+@param[in]	block			buffer block
+@param[in]	index			index tree
+@param[in]	tuple			key to be searched for
+@param[in]	mode			search mode
+@param[in,out]	iup_matched_fields	already matched fields in the
+upper limit record
+@param[in,out]	iup_matched_bytes	already matched bytes in the
+first partially matched field in the upper limit record
+@param[in,out]	ilow_matched_fields	already matched fields in the
+lower limit record
+@param[in,out]	ilow_matched_bytes	already matched bytes in the
+first partially matched field in the lower limit record
+@param[out]	cursor			page cursor */
+void
+page_cur_search_with_match_bytes(
+	const buf_block_t*	block,
+	const dict_index_t*	index,
+	const dtuple_t*		tuple,
+	page_cur_mode_t		mode,
+	ulint*			iup_matched_fields,
+	ulint*			iup_matched_bytes,
+	ulint*			ilow_matched_fields,
 	ulint*			ilow_matched_bytes,
-					/*!< in/out: already matched
-					bytes in a field not yet
-					completely matched */
-	page_cur_t*		cursor);/*!< out: page cursor */
+	page_cur_t*		cursor);
 /***********************************************************//**
 Positions a page cursor on a randomly chosen user record on a page. If there
 are no user records, sets the cursor on the infimum record. */
-UNIV_INTERN
 void
 page_cur_open_on_rnd_user_rec(
 /*==========================*/
@@ -323,21 +351,19 @@ page_cur_open_on_rnd_user_rec(
 #endif /* !UNIV_HOTBACKUP */
 /***********************************************************//**
 Parses a log record of a record insert on a page.
-@return	end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
 byte*
 page_cur_parse_insert_rec(
 /*======================*/
 	ibool		is_short,/*!< in: TRUE if short inserts */
-	byte*		ptr,	/*!< in: buffer */
-	byte*		end_ptr,/*!< in: buffer end */
+	const byte*	ptr,	/*!< in: buffer */
+	const byte*	end_ptr,/*!< in: buffer end */
 	buf_block_t*	block,	/*!< in: page or NULL */
 	dict_index_t*	index,	/*!< in: record descriptor */
 	mtr_t*		mtr);	/*!< in: mtr or NULL */
 /**********************************************************//**
 Parses a log record of copying a record list end to a new created page.
-@return	end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
 byte*
 page_parse_copy_rec_list_to_created_page(
 /*=====================================*/
@@ -348,8 +374,7 @@ page_parse_copy_rec_list_to_created_page(
 	mtr_t*		mtr);	/*!< in: mtr or NULL */
 /***********************************************************//**
 Parses log record of a record delete on a page.
-@return	pointer to record end or NULL */
-UNIV_INTERN
+@return pointer to record end or NULL */
 byte*
 page_cur_parse_delete_rec(
 /*======================*/
@@ -361,8 +386,7 @@ page_cur_parse_delete_rec(
 /*******************************************************//**
 Removes the record from a leaf page. This function does not log
 any changes. It is used by the IMPORT tablespace functions.
-@return	true if success, i.e., the page did not become too empty */
-UNIV_INTERN
+@return true if success, i.e., the page did not become too empty */
 bool
 page_delete_rec(
 /*============*/
@@ -376,7 +400,9 @@ page_delete_rec(
 /** Index page cursor */
 
 struct page_cur_t{
-	byte*		rec;	/*!< pointer to a record on page */
+	const dict_index_t*	index;
+	rec_t*		rec;	/*!< pointer to a record on page */
+	ulint*		offsets;
 	buf_block_t*	block;	/*!< pointer to the block containing rec */
 };
 
diff --git a/storage/innobase/include/page0cur.ic b/storage/innobase/include/page0cur.ic
index 6e068d9f739..8f580ef2d43 100644
--- a/storage/innobase/include/page0cur.ic
+++ b/storage/innobase/include/page0cur.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2014, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2015, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
@@ -32,7 +32,7 @@ Created 10/4/1994 Heikki Tuuri
 
 /*********************************************************//**
 Gets pointer to the page frame where the cursor is positioned.
-@return	page */
+@return page */
 UNIV_INLINE
 page_t*
 page_cur_get_page(
@@ -50,7 +50,7 @@ page_cur_get_page(
 
 /*********************************************************//**
 Gets pointer to the buffer block where the cursor is positioned.
-@return	page */
+@return page */
 UNIV_INLINE
 buf_block_t*
 page_cur_get_block(
@@ -68,7 +68,7 @@ page_cur_get_block(
 
 /*********************************************************//**
 Gets pointer to the page frame where the cursor is positioned.
-@return	page */
+@return page */
 UNIV_INLINE
 page_zip_des_t*
 page_cur_get_page_zip(
@@ -80,7 +80,7 @@ page_cur_get_page_zip(
 
 /*********************************************************//**
 Gets the record where the cursor is positioned.
-@return	record */
+@return record */
 UNIV_INLINE
 rec_t*
 page_cur_get_rec(
@@ -127,7 +127,7 @@ page_cur_set_after_last(
 
 /*********************************************************//**
 Returns TRUE if the cursor is before first user record on page.
-@return	TRUE if at start */
+@return TRUE if at start */
 UNIV_INLINE
 ibool
 page_cur_is_before_first(
@@ -141,7 +141,7 @@ page_cur_is_before_first(
 
 /*********************************************************//**
 Returns TRUE if the cursor is after last user record.
-@return	TRUE if at end */
+@return TRUE if at end */
 UNIV_INLINE
 ibool
 page_cur_is_after_last(
@@ -171,20 +171,6 @@ page_cur_position(
 	cur->block = (buf_block_t*) block;
 }
 
-/**********************************************************//**
-Invalidates a page cursor by setting the record pointer NULL. */
-UNIV_INLINE
-void
-page_cur_invalidate(
-/*================*/
-	page_cur_t*	cur)	/*!< out: page cursor */
-{
-	ut_ad(cur);
-
-	cur->rec = NULL;
-	cur->block = NULL;
-}
-
 /**********************************************************//**
 Moves the cursor to the next record on page. */
 UNIV_INLINE
@@ -212,35 +198,47 @@ page_cur_move_to_prev(
 }
 
 #ifndef UNIV_HOTBACKUP
-/****************************************************************//**
-Searches the right position for a page cursor.
-@return	number of matched fields on the left */
+/** Search the right position for a page cursor.
+@param[in] block buffer block
+@param[in] index index tree
+@param[in] tuple data tuple
+@param[in] mode PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G, or PAGE_CUR_GE
+@param[out] cursor page cursor
+@return number of matched fields on the left */
 UNIV_INLINE
 ulint
 page_cur_search(
-/*============*/
-	const buf_block_t*	block,	/*!< in: buffer block */
-	const dict_index_t*	index,	/*!< in: record descriptor */
-	const dtuple_t*		tuple,	/*!< in: data tuple */
-	ulint			mode,	/*!< in: PAGE_CUR_L,
-					PAGE_CUR_LE, PAGE_CUR_G, or
-					PAGE_CUR_GE */
-	page_cur_t*		cursor)	/*!< out: page cursor */
+	const buf_block_t*	block,
+	const dict_index_t*	index,
+	const dtuple_t*		tuple,
+	page_cur_mode_t		mode,
+	page_cur_t*		cursor)
 {
-	ulint		low_matched_fields = 0;
-	ulint		low_matched_bytes = 0;
-	ulint		up_matched_fields = 0;
-	ulint		up_matched_bytes = 0;
+	ulint		low_match = 0;
+	ulint		up_match = 0;
 
 	ut_ad(dtuple_check_typed(tuple));
 
 	page_cur_search_with_match(block, index, tuple, mode,
-				   &up_matched_fields,
-				   &up_matched_bytes,
-				   &low_matched_fields,
-				   &low_matched_bytes,
-				   cursor);
-	return(low_matched_fields);
+				   &up_match, &low_match, cursor, NULL);
+	return(low_match);
+}
+
+/** Search the right position for a page cursor.
+@param[in] block buffer block
+@param[in] index index tree
+@param[in] tuple data tuple
+@param[out] cursor page cursor
+@return number of matched fields on the left */
+UNIV_INLINE
+ulint
+page_cur_search(
+	const buf_block_t*	block,
+	const dict_index_t*	index,
+	const dtuple_t*		tuple,
+	page_cur_t*		cursor)
+{
+	return(page_cur_search(block, index, tuple, PAGE_CUR_LE, cursor));
 }
 
 /***********************************************************//**
@@ -254,7 +252,7 @@ if this is a compressed leaf page in a secondary index.
 This has to be done either within the same mini-transaction,
 or by invoking ibuf_reset_free_bits() before mtr_commit().
 
-@return	pointer to record if succeed, NULL otherwise */
+@return pointer to record if succeed, NULL otherwise */
 UNIV_INLINE
 rec_t*
 page_cur_tuple_insert(
@@ -265,11 +263,13 @@ page_cur_tuple_insert(
 	ulint**		offsets,/*!< out: offsets on *rec */
 	mem_heap_t**	heap,	/*!< in/out: pointer to memory heap, or NULL */
 	ulint		n_ext,	/*!< in: number of externally stored columns */
-	mtr_t*		mtr)	/*!< in: mini-transaction handle, or NULL */
+	mtr_t*		mtr,	/*!< in: mini-transaction handle, or NULL */
+	bool		use_cache)
+				/*!< in: if true, then use record cache to
+				hold the tuple converted record. */
 {
-	ulint		size
-		= rec_get_converted_size(index, tuple, n_ext);
 	rec_t*		rec;
+	ulint		size = rec_get_converted_size(index, tuple, n_ext);
 
 	if (!*heap) {
 		*heap = mem_heap_create(size
@@ -280,8 +280,8 @@ page_cur_tuple_insert(
 
 	rec = rec_convert_dtuple_to_rec((byte*) mem_heap_alloc(*heap, size),
 					index, tuple, n_ext);
-	*offsets = rec_get_offsets(
-		rec, index, *offsets, ULINT_UNDEFINED, heap);
+
+	*offsets = rec_get_offsets(rec, index, *offsets, ULINT_UNDEFINED, heap);
 
 	if (buf_block_get_page_zip(cursor->block)) {
 		rec = page_cur_insert_rec_zip(
@@ -294,6 +294,35 @@ page_cur_tuple_insert(
 	ut_ad(!rec || !cmp_dtuple_rec(tuple, rec, *offsets));
 	return(rec);
 }
+
+/** Insert a record next to page cursor. Record is directly copied to
+the page from tuple without creating intermediate copy of the record.
+
+@param[in,out]	cursor	a page cursor
+@param[in]	tuple	pointer to a data tuple
+@param[in]	index	record descriptor
+@param[in]	n_ext	number of externally stored columns
+@param[in]	mtr	mini-transaction handle, or NULL
+
+@return pointer to record if succeed, NULL otherwise */
+UNIV_INLINE
+rec_t*
+page_cur_tuple_direct_insert(
+	page_cur_t*	cursor,
+	const dtuple_t*	tuple,
+	dict_index_t*	index,
+	ulint		n_ext,
+	mtr_t*		mtr)
+{
+	rec_t*		rec;
+
+	ut_ad(dict_table_is_intrinsic(index->table));
+
+	rec = page_cur_direct_insert_rec_low(
+		cursor->rec, index, tuple, n_ext, mtr);
+
+	return(rec);
+}
 #endif /* !UNIV_HOTBACKUP */
 
 /***********************************************************//**
@@ -307,7 +336,7 @@ if this is a compressed leaf page in a secondary index.
 This has to be done either within the same mini-transaction,
 or by invoking ibuf_reset_free_bits() before mtr_commit().
 
-@return	pointer to record if succeed, NULL otherwise */
+@return pointer to record if succeed, NULL otherwise */
 UNIV_INLINE
 rec_t*
 page_cur_rec_insert(
diff --git a/storage/innobase/include/page0page.h b/storage/innobase/include/page0page.h
index e8b4265bc68..3bb622127fb 100644
--- a/storage/innobase/include/page0page.h
+++ b/storage/innobase/include/page0page.h
@@ -36,7 +36,9 @@ Created 2/2/1994 Heikki Tuuri
 #include "data0data.h"
 #include "dict0dict.h"
 #include "rem0rec.h"
+#endif /* !UNIV_INNOCHECKSUM*/
 #include "fsp0fsp.h"
+#ifndef UNIV_INNOCHECKSUM
 #include "mtr0mtr.h"
 
 #ifdef UNIV_MATERIALIZE
@@ -44,14 +46,13 @@ Created 2/2/1994 Heikki Tuuri
 #define UNIV_INLINE
 #endif
 
-#endif /* !UNIV_INNOCHECKSUM */
-
 /*			PAGE HEADER
 			===========
 
 Index page header starts at the first offset left free by the FIL-module */
 
 typedef	byte		page_header_t;
+#endif /* !UNIV_INNOCHECKSUM */
 
 #define	PAGE_HEADER	FSEG_PAGE_DATA	/* index page header starts at this
 				offset */
@@ -82,6 +83,9 @@ typedef	byte		page_header_t;
 #define	PAGE_INDEX_ID	 28	/* index id where the page belongs.
 				This field should not be written to after
 				page creation. */
+
+#ifndef UNIV_INNOCHECKSUM
+
 #define PAGE_BTR_SEG_LEAF 36	/* file segment header for the leaf pages in
 				a B-tree: defined only on the root page of a
 				B-tree, but not in the root of an ibuf tree */
@@ -121,8 +125,6 @@ typedef	byte		page_header_t;
 				a new-style compact page */
 /*-----------------------------*/
 
-#ifndef UNIV_INNOCHECKSUM
-
 /* Heap numbers */
 #define PAGE_HEAP_NO_INFIMUM	0	/* page infimum */
 #define PAGE_HEAP_NO_SUPREMUM	1	/* page supremum */
@@ -167,7 +169,7 @@ extern my_bool srv_immediate_scrub_data_uncompressed;
 
 /************************************************************//**
 Gets the start of a page.
-@return	start of the page */
+@return start of the page */
 UNIV_INLINE
 page_t*
 page_align(
@@ -176,7 +178,7 @@ page_align(
 		MY_ATTRIBUTE((const));
 /************************************************************//**
 Gets the offset within a page.
-@return	offset from the start of the page */
+@return offset from the start of the page */
 UNIV_INLINE
 ulint
 page_offset(
@@ -192,7 +194,6 @@ page_get_max_trx_id(
 	const page_t*	page);	/*!< in: page */
 /*************************************************************//**
 Sets the max trx id field value. */
-UNIV_INTERN
 void
 page_set_max_trx_id(
 /*================*/
@@ -213,6 +214,27 @@ page_update_max_trx_id(
 	trx_id_t	trx_id,	/*!< in: transaction id */
 	mtr_t*		mtr);	/*!< in/out: mini-transaction */
 /*************************************************************//**
+Returns the RTREE SPLIT SEQUENCE NUMBER (FIL_RTREE_SPLIT_SEQ_NUM).
+@return SPLIT SEQUENCE NUMBER */
+UNIV_INLINE
+node_seq_t
+page_get_ssn_id(
+/*============*/
+	const page_t*	page);	/*!< in: page */
+/*************************************************************//**
+Sets the RTREE SPLIT SEQUENCE NUMBER field value */
+UNIV_INLINE
+void
+page_set_ssn_id(
+/*============*/
+	buf_block_t*	block,	/*!< in/out: page */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page whose
+				uncompressed part will be updated, or NULL */
+	node_seq_t	ssn_id,	/*!< in: split sequence id */
+	mtr_t*		mtr);	/*!< in/out: mini-transaction */
+
+#endif /* !UNIV_INNOCHECKSUM */
+/*************************************************************//**
 Reads the given header field. */
 UNIV_INLINE
 ulint
@@ -220,6 +242,8 @@ page_header_get_field(
 /*==================*/
 	const page_t*	page,	/*!< in: page */
 	ulint		field);	/*!< in: PAGE_N_DIR_SLOTS, ... */
+
+#ifndef UNIV_INNOCHECKSUM
 /*************************************************************//**
 Sets the given header field. */
 UNIV_INLINE
@@ -233,14 +257,14 @@ page_header_set_field(
 	ulint		val);	/*!< in: value */
 /*************************************************************//**
 Returns the offset stored in the given header field.
-@return	offset from the start of the page, or 0 */
+@return offset from the start of the page, or 0 */
 UNIV_INLINE
 ulint
 page_header_get_offs(
 /*=================*/
 	const page_t*	page,	/*!< in: page */
 	ulint		field)	/*!< in: PAGE_FREE, ... */
-	MY_ATTRIBUTE((nonnull, pure));
+	MY_ATTRIBUTE((warn_unused_result));
 
 /*************************************************************//**
 Returns the pointer stored in the given header field, or NULL. */
@@ -273,7 +297,7 @@ page_header_reset_last_insert(
 #endif /* !UNIV_HOTBACKUP */
 /************************************************************//**
 Gets the offset of the first record on the page.
-@return	offset of the first record in record list, relative from page */
+@return offset of the first record in record list, relative from page */
 UNIV_INLINE
 ulint
 page_get_infimum_offset(
@@ -281,7 +305,7 @@ page_get_infimum_offset(
 	const page_t*	page);	/*!< in: page which must have record(s) */
 /************************************************************//**
 Gets the offset of the last record on the page.
-@return	offset of the last record in record list, relative from page */
+@return offset of the last record in record list, relative from page */
 UNIV_INLINE
 ulint
 page_get_supremum_offset(
@@ -293,8 +317,7 @@ page_get_supremum_offset(
 /************************************************************//**
 Returns the nth record of the record list.
 This is the inverse function of page_rec_get_n_recs_before().
-@return	nth record */
-UNIV_INTERN
+@return nth record */
 const rec_t*
 page_rec_get_nth_const(
 /*===================*/
@@ -304,7 +327,7 @@ page_rec_get_nth_const(
 /************************************************************//**
 Returns the nth record of the record list.
 This is the inverse function of page_rec_get_n_recs_before().
-@return	nth record */
+@return nth record */
 UNIV_INLINE
 rec_t*
 page_rec_get_nth(
@@ -312,87 +335,59 @@ page_rec_get_nth(
 	page_t*	page,	/*< in: page */
 	ulint	nth)	/*!< in: nth record */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
-
 #ifndef UNIV_HOTBACKUP
 /************************************************************//**
 Returns the middle record of the records on the page. If there is an
 even number of records in the list, returns the first record of the
 upper half-list.
-@return	middle record */
+@return middle record */
 UNIV_INLINE
 rec_t*
 page_get_middle_rec(
 /*================*/
 	page_t*	page)	/*!< in: page */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*************************************************************//**
-Compares a data tuple to a physical record. Differs from the function
-cmp_dtuple_rec_with_match in the way that the record must reside on an
-index page, and also page infimum and supremum records can be given in
-the parameter rec. These are considered as the negative infinity and
-the positive infinity in the alphabetical order.
-@return 1, 0, -1, if dtuple is greater, equal, less than rec,
-respectively, when only the common first fields are compared */
-UNIV_INLINE
-int
-page_cmp_dtuple_rec_with_match(
-/*===========================*/
-	const dtuple_t*	dtuple,	/*!< in: data tuple */
-	const rec_t*	rec,	/*!< in: physical record on a page; may also
-				be page infimum or supremum, in which case
-				matched-parameter values below are not
-				affected */
-	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
-	ulint*		matched_fields, /*!< in/out: number of already completely
-				matched fields; when function returns
-				contains the value for current comparison */
-	ulint*		matched_bytes); /*!< in/out: number of already matched
-				bytes within the first field not completely
-				matched; when function returns contains the
-				value for current comparison */
 #endif /* !UNIV_HOTBACKUP */
-#endif /* !UNIV_INNOCHECKSUM */
 /*************************************************************//**
 Gets the page number.
-@return	page number */
+@return page number */
 UNIV_INLINE
 ulint
 page_get_page_no(
 /*=============*/
 	const page_t*	page);	/*!< in: page */
-#ifndef UNIV_INNOCHECKSUM
+
 /*************************************************************//**
 Gets the tablespace identifier.
-@return	space id */
+@return space id */
 UNIV_INLINE
 ulint
 page_get_space_id(
 /*==============*/
 	const page_t*	page);	/*!< in: page */
-#endif /* !UNIV_INNOCHECKSUM */
+
 /*************************************************************//**
 Gets the number of user records on page (the infimum and supremum records
 are not user records).
-@return	number of user records */
+@return number of user records */
 UNIV_INLINE
 ulint
 page_get_n_recs(
 /*============*/
 	const page_t*	page);	/*!< in: index page */
-#ifndef UNIV_INNOCHECKSUM
+
 /***************************************************************//**
 Returns the number of records before the given record in chain.
 The number includes infimum and supremum records.
 This is the inverse function of page_rec_get_nth().
-@return	number of records */
-UNIV_INTERN
+@return number of records */
 ulint
 page_rec_get_n_recs_before(
 /*=======================*/
 	const rec_t*	rec);	/*!< in: the physical record */
 /*************************************************************//**
 Gets the number of records in the heap.
-@return	number of user records */
+@return number of user records */
 UNIV_INLINE
 ulint
 page_dir_get_n_heap(
@@ -413,7 +408,7 @@ page_dir_set_n_heap(
 	ulint		n_heap);/*!< in: number of records */
 /*************************************************************//**
 Gets the number of dir slots in directory.
-@return	number of slots */
+@return number of slots */
 UNIV_INLINE
 ulint
 page_dir_get_n_slots(
@@ -432,7 +427,7 @@ page_dir_set_n_slots(
 #ifdef UNIV_DEBUG
 /*************************************************************//**
 Gets pointer to nth directory slot.
-@return	pointer to dir slot */
+@return pointer to dir slot */
 UNIV_INLINE
 page_dir_slot_t*
 page_dir_get_nth_slot(
@@ -440,13 +435,13 @@ page_dir_get_nth_slot(
 	const page_t*	page,	/*!< in: index page */
 	ulint		n);	/*!< in: position */
 #else /* UNIV_DEBUG */
-# define page_dir_get_nth_slot(page, n)		\
-	((page) + UNIV_PAGE_SIZE - PAGE_DIR	\
-	 - (n + 1) * PAGE_DIR_SLOT_SIZE)
+# define page_dir_get_nth_slot(page, n)			\
+	((page) + (UNIV_PAGE_SIZE - PAGE_DIR		\
+		   - (n + 1) * PAGE_DIR_SLOT_SIZE))
 #endif /* UNIV_DEBUG */
 /**************************************************************//**
 Used to check the consistency of a record on a page.
-@return	TRUE if succeed */
+@return TRUE if succeed */
 UNIV_INLINE
 ibool
 page_rec_check(
@@ -454,7 +449,7 @@ page_rec_check(
 	const rec_t*	rec);	/*!< in: record */
 /***************************************************************//**
 Gets the record pointed to by a directory slot.
-@return	pointer to record */
+@return pointer to record */
 UNIV_INLINE
 const rec_t*
 page_dir_slot_get_rec(
@@ -470,7 +465,7 @@ page_dir_slot_set_rec(
 	rec_t*		 rec);	/*!< in: record on the page */
 /***************************************************************//**
 Gets the number of records owned by a directory slot.
-@return	number of records */
+@return number of records */
 UNIV_INLINE
 ulint
 page_dir_slot_get_n_owned(
@@ -497,8 +492,7 @@ page_dir_calc_reserved_space(
 	ulint	n_recs);	/*!< in: number of records */
 /***************************************************************//**
 Looks for the directory slot which owns the given record.
-@return	the directory slot number */
-UNIV_INTERN
+@return the directory slot number */
 ulint
 page_dir_find_owner_slot(
 /*=====================*/
@@ -514,7 +508,7 @@ page_is_comp(
 	const page_t*	page);	/*!< in: index page */
 /************************************************************//**
 TRUE if the record is on a page in compact format.
-@return	nonzero if in compact format */
+@return nonzero if in compact format */
 UNIV_INLINE
 ulint
 page_rec_is_comp(
@@ -522,44 +516,50 @@ page_rec_is_comp(
 	const rec_t*	rec);	/*!< in: record */
 /***************************************************************//**
 Returns the heap number of a record.
-@return	heap number */
+@return heap number */
 UNIV_INLINE
 ulint
 page_rec_get_heap_no(
 /*=================*/
 	const rec_t*	rec);	/*!< in: the physical record */
-#endif /* !UNIV_INNOCHECKSUM */
 /************************************************************//**
 Determine whether the page is a B-tree leaf.
-@return	true if the page is a B-tree leaf (PAGE_LEVEL = 0) */
+@return true if the page is a B-tree leaf (PAGE_LEVEL = 0) */
 UNIV_INLINE
 bool
 page_is_leaf(
 /*=========*/
 	const page_t*	page)	/*!< in: page */
-	MY_ATTRIBUTE((nonnull, pure));
-#ifndef UNIV_INNOCHECKSUM
+	MY_ATTRIBUTE((warn_unused_result));
 /************************************************************//**
 Determine whether the page is empty.
-@return	true if the page is empty (PAGE_N_RECS = 0) */
+@return true if the page is empty (PAGE_N_RECS = 0) */
 UNIV_INLINE
 bool
 page_is_empty(
 /*==========*/
 	const page_t*	page)	/*!< in: page */
-	MY_ATTRIBUTE((nonnull, pure));
+	MY_ATTRIBUTE((warn_unused_result));
+/** Determine whether a page is an index root page.
+@param[in]	page	page frame
+@return true if the page is a root page of an index */
+UNIV_INLINE
+bool
+page_is_root(
+	const page_t*	page)
+	MY_ATTRIBUTE((warn_unused_result));
 /************************************************************//**
 Determine whether the page contains garbage.
-@return	true if the page contains garbage (PAGE_GARBAGE is not 0) */
+@return true if the page contains garbage (PAGE_GARBAGE is not 0) */
 UNIV_INLINE
 bool
 page_has_garbage(
 /*=============*/
 	const page_t*	page)	/*!< in: page */
-	MY_ATTRIBUTE((nonnull, pure));
+	MY_ATTRIBUTE((warn_unused_result));
 /************************************************************//**
 Gets the pointer to the next record on the page.
-@return	pointer to next record */
+@return pointer to next record */
 UNIV_INLINE
 const rec_t*
 page_rec_get_next_low(
@@ -568,7 +568,7 @@ page_rec_get_next_low(
 	ulint		comp);	/*!< in: nonzero=compact page layout */
 /************************************************************//**
 Gets the pointer to the next record on the page.
-@return	pointer to next record */
+@return pointer to next record */
 UNIV_INLINE
 rec_t*
 page_rec_get_next(
@@ -576,7 +576,7 @@ page_rec_get_next(
 	rec_t*	rec);	/*!< in: pointer to record */
 /************************************************************//**
 Gets the pointer to the next record on the page.
-@return	pointer to next record */
+@return pointer to next record */
 UNIV_INLINE
 const rec_t*
 page_rec_get_next_const(
@@ -586,7 +586,7 @@ page_rec_get_next_const(
 Gets the pointer to the next non delete-marked record on the page.
 If all subsequent records are delete-marked, then this function
 will return the supremum record.
-@return	pointer to next non delete-marked record or pointer to supremum */
+@return pointer to next non delete-marked record or pointer to supremum */
 UNIV_INLINE
 const rec_t*
 page_rec_get_next_non_del_marked(
@@ -604,7 +604,7 @@ page_rec_set_next(
 				must not be page infimum */
 /************************************************************//**
 Gets the pointer to the previous record.
-@return	pointer to previous record */
+@return pointer to previous record */
 UNIV_INLINE
 const rec_t*
 page_rec_get_prev_const(
@@ -613,7 +613,7 @@ page_rec_get_prev_const(
 				infimum */
 /************************************************************//**
 Gets the pointer to the previous record.
-@return	pointer to previous record */
+@return pointer to previous record */
 UNIV_INLINE
 rec_t*
 page_rec_get_prev(
@@ -622,7 +622,7 @@ page_rec_get_prev(
 				must not be page infimum */
 /************************************************************//**
 TRUE if the record is a user record on the page.
-@return	TRUE if a user record */
+@return TRUE if a user record */
 UNIV_INLINE
 ibool
 page_rec_is_user_rec_low(
@@ -631,7 +631,7 @@ page_rec_is_user_rec_low(
 	MY_ATTRIBUTE((const));
 /************************************************************//**
 TRUE if the record is the supremum record on a page.
-@return	TRUE if the supremum record */
+@return TRUE if the supremum record */
 UNIV_INLINE
 ibool
 page_rec_is_supremum_low(
@@ -640,7 +640,7 @@ page_rec_is_supremum_low(
 	MY_ATTRIBUTE((const));
 /************************************************************//**
 TRUE if the record is the infimum record on a page.
-@return	TRUE if the infimum record */
+@return TRUE if the infimum record */
 UNIV_INLINE
 ibool
 page_rec_is_infimum_low(
@@ -650,35 +650,80 @@ page_rec_is_infimum_low(
 
 /************************************************************//**
 TRUE if the record is a user record on the page.
-@return	TRUE if a user record */
+@return TRUE if a user record */
 UNIV_INLINE
 ibool
 page_rec_is_user_rec(
 /*=================*/
 	const rec_t*	rec)	/*!< in: record */
-	MY_ATTRIBUTE((const));
+	MY_ATTRIBUTE((warn_unused_result));
 /************************************************************//**
 TRUE if the record is the supremum record on a page.
-@return	TRUE if the supremum record */
+@return TRUE if the supremum record */
 UNIV_INLINE
 ibool
 page_rec_is_supremum(
 /*=================*/
 	const rec_t*	rec)	/*!< in: record */
-	MY_ATTRIBUTE((const));
+	MY_ATTRIBUTE((warn_unused_result));
 
 /************************************************************//**
 TRUE if the record is the infimum record on a page.
-@return	TRUE if the infimum record */
+@return TRUE if the infimum record */
 UNIV_INLINE
 ibool
 page_rec_is_infimum(
 /*================*/
 	const rec_t*	rec)	/*!< in: record */
-	MY_ATTRIBUTE((const));
+	MY_ATTRIBUTE((warn_unused_result));
+
+/************************************************************//**
+true if the record is the first user record on a page.
+@return true if the first user record */
+UNIV_INLINE
+bool
+page_rec_is_first(
+/*==============*/
+	const rec_t*	rec,	/*!< in: record */
+	const page_t*	page)	/*!< in: page */
+	MY_ATTRIBUTE((warn_unused_result));
+
+/************************************************************//**
+true if the record is the second user record on a page.
+@return true if the second user record */
+UNIV_INLINE
+bool
+page_rec_is_second(
+/*===============*/
+	const rec_t*	rec,	/*!< in: record */
+	const page_t*	page)	/*!< in: page */
+	MY_ATTRIBUTE((warn_unused_result));
+
+/************************************************************//**
+true if the record is the last user record on a page.
+@return true if the last user record */
+UNIV_INLINE
+bool
+page_rec_is_last(
+/*=============*/
+	const rec_t*	rec,	/*!< in: record */
+	const page_t*	page)	/*!< in: page */
+	MY_ATTRIBUTE((warn_unused_result));
+
+/************************************************************//**
+true if the record is the second last user record on a page.
+@return true if the second last user record */
+UNIV_INLINE
+bool
+page_rec_is_second_last(
+/*====================*/
+	const rec_t*	rec,	/*!< in: record */
+	const page_t*	page)	/*!< in: page */
+	MY_ATTRIBUTE((warn_unused_result));
+
 /***************************************************************//**
 Looks for the record which owns the given record.
-@return	the owner record */
+@return the owner record */
 UNIV_INLINE
 rec_t*
 page_rec_find_owner_rec(
@@ -700,7 +745,7 @@ page_rec_write_field(
 /************************************************************//**
 Returns the maximum combined size of records which can be inserted on top
 of record heap.
-@return	maximum combined size for inserted records */
+@return maximum combined size for inserted records */
 UNIV_INLINE
 ulint
 page_get_max_insert_size(
@@ -710,7 +755,7 @@ page_get_max_insert_size(
 /************************************************************//**
 Returns the maximum combined size of records which can be inserted on top
 of record heap if page is first reorganized.
-@return	maximum combined size for inserted records */
+@return maximum combined size for inserted records */
 UNIV_INLINE
 ulint
 page_get_max_insert_size_after_reorganize(
@@ -719,7 +764,7 @@ page_get_max_insert_size_after_reorganize(
 	ulint		n_recs);/*!< in: number of records */
 /*************************************************************//**
 Calculates free space if a page is emptied.
-@return	free space */
+@return free space */
 UNIV_INLINE
 ulint
 page_get_free_space_of_empty(
@@ -729,7 +774,7 @@ page_get_free_space_of_empty(
 /**********************************************************//**
 Returns the base extra size of a physical record.  This is the
 size of the fixed header, independent of the record size.
-@return	REC_N_NEW_EXTRA_BYTES or REC_N_OLD_EXTRA_BYTES */
+@return REC_N_NEW_EXTRA_BYTES or REC_N_OLD_EXTRA_BYTES */
 UNIV_INLINE
 ulint
 page_rec_get_base_extra_size(
@@ -738,7 +783,7 @@ page_rec_get_base_extra_size(
 /************************************************************//**
 Returns the sum of the sizes of the records in the record list
 excluding the infimum and supremum records.
-@return	data in bytes */
+@return data in bytes */
 UNIV_INLINE
 ulint
 page_get_data_size(
@@ -760,8 +805,7 @@ page_mem_alloc_free(
 	ulint		need);	/*!< in: number of bytes allocated */
 /************************************************************//**
 Allocates a block of memory from the heap of an index page.
-@return	pointer to start of allocated buffer, or NULL if allocation fails */
-UNIV_INTERN
+@return pointer to start of allocated buffer, or NULL if allocation fails */
 byte*
 page_mem_alloc_heap(
 /*================*/
@@ -789,32 +833,38 @@ page_mem_free(
 					 rec_get_offsets() */
 /**********************************************************//**
 Create an uncompressed B-tree index page.
-@return	pointer to the page */
-UNIV_INTERN
+@return pointer to the page */
 page_t*
 page_create(
 /*========*/
 	buf_block_t*	block,		/*!< in: a buffer block where the
 					page is created */
 	mtr_t*		mtr,		/*!< in: mini-transaction handle */
-	ulint		comp);		/*!< in: nonzero=compact page format */
+	ulint		comp,		/*!< in: nonzero=compact page format */
+	bool		is_rtree);	/*!< in: if creating R-tree page */
 /**********************************************************//**
 Create a compressed B-tree index page.
-@return	pointer to the page */
-UNIV_INTERN
+@return pointer to the page */
 page_t*
 page_create_zip(
 /*============*/
-	buf_block_t*	block,		/*!< in/out: a buffer frame where the
-					page is created */
-	dict_index_t*	index,		/*!< in: the index of the page */
-	ulint		level,		/*!< in: the B-tree level of the page */
-	trx_id_t	max_trx_id,	/*!< in: PAGE_MAX_TRX_ID */
-	mtr_t*		mtr)		/*!< in/out: mini-transaction */
-	MY_ATTRIBUTE((nonnull));
+	buf_block_t*		block,		/*!< in/out: a buffer frame
+						where the page is created */
+	dict_index_t*		index,		/*!< in: the index of the
+						page, or NULL when applying
+						TRUNCATE log
+						record during recovery */
+	ulint			level,		/*!< in: the B-tree level of
+						the page */
+	trx_id_t		max_trx_id,	/*!< in: PAGE_MAX_TRX_ID */
+	const redo_page_compress_t* page_comp_info,
+						/*!< in: used for applying
+						TRUNCATE log
+						record during recovery */
+	mtr_t*			mtr);		/*!< in/out: mini-transaction
+						handle */
 /**********************************************************//**
 Empty a previously created B-tree index page. */
-UNIV_INTERN
 void
 page_create_empty(
 /*==============*/
@@ -830,7 +880,6 @@ IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
 if new_block is a compressed leaf page in a secondary index.
 This has to be done either within the same mini-transaction,
 or by invoking ibuf_reset_free_bits() before mtr_commit(). */
-UNIV_INTERN
 void
 page_copy_rec_list_end_no_locks(
 /*============================*/
@@ -851,7 +900,6 @@ or by invoking ibuf_reset_free_bits() before mtr_commit().
 
 @return pointer to the original successor of the infimum record on
 new_page, or NULL on zip overflow (new_block will be decompressed) */
-UNIV_INTERN
 rec_t*
 page_copy_rec_list_end(
 /*===================*/
@@ -873,7 +921,6 @@ or by invoking ibuf_reset_free_bits() before mtr_commit().
 
 @return pointer to the original predecessor of the supremum record on
 new_page, or NULL on zip overflow (new_block will be decompressed) */
-UNIV_INTERN
 rec_t*
 page_copy_rec_list_start(
 /*=====================*/
@@ -886,7 +933,6 @@ page_copy_rec_list_start(
 /*************************************************************//**
 Deletes records from a page from a given record onward, including that record.
 The infimum and supremum records are not deleted. */
-UNIV_INTERN
 void
 page_delete_rec_list_end(
 /*=====================*/
@@ -903,7 +949,6 @@ page_delete_rec_list_end(
 /*************************************************************//**
 Deletes records from page, up to the given record, NOT including
 that record. Infimum and supremum records are not deleted. */
-UNIV_INTERN
 void
 page_delete_rec_list_start(
 /*=======================*/
@@ -923,7 +968,6 @@ or by invoking ibuf_reset_free_bits() before mtr_commit().
 
 @return TRUE on success; FALSE on compression failure (new_block will
 be decompressed) */
-UNIV_INTERN
 ibool
 page_move_rec_list_end(
 /*===================*/
@@ -942,8 +986,7 @@ if new_block is a compressed leaf page in a secondary index.
 This has to be done either within the same mini-transaction,
 or by invoking ibuf_reset_free_bits() before mtr_commit().
 
-@return	TRUE on success; FALSE on compression failure */
-UNIV_INTERN
+@return TRUE on success; FALSE on compression failure */
 ibool
 page_move_rec_list_start(
 /*=====================*/
@@ -955,7 +998,6 @@ page_move_rec_list_start(
 	MY_ATTRIBUTE((nonnull(1, 2, 4, 5)));
 /****************************************************************//**
 Splits a directory slot which owns too many records. */
-UNIV_INTERN
 void
 page_dir_split_slot(
 /*================*/
@@ -969,7 +1011,6 @@ Tries to balance the given directory slot with too few records
 with the upper neighbor, so that there are at least the minimum number
 of records owned by the slot; this may result in the merging of
 two slots. */
-UNIV_INTERN
 void
 page_dir_balance_slot(
 /*==================*/
@@ -979,12 +1020,11 @@ page_dir_balance_slot(
 	MY_ATTRIBUTE((nonnull(1)));
 /**********************************************************//**
 Parses a log record of a record list end or start deletion.
-@return	end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
 byte*
 page_parse_delete_rec_list(
 /*=======================*/
-	byte		type,	/*!< in: MLOG_LIST_END_DELETE,
+	mlog_id_t	type,	/*!< in: MLOG_LIST_END_DELETE,
 				MLOG_LIST_START_DELETE,
 				MLOG_COMP_LIST_END_DELETE or
 				MLOG_COMP_LIST_START_DELETE */
@@ -993,23 +1033,19 @@ page_parse_delete_rec_list(
 	buf_block_t*	block,	/*!< in/out: buffer block or NULL */
 	dict_index_t*	index,	/*!< in: record descriptor */
 	mtr_t*		mtr);	/*!< in: mtr or NULL */
-/***********************************************************//**
-Parses a redo log record of creating a page.
-@return	end of log record or NULL */
-UNIV_INTERN
-byte*
+/** Parses a redo log record of creating a page.
+@param[in,out]	block	buffer block, or NULL
+@param[in]	comp	nonzero=compact page format
+@param[in]	is_rtree whether it is rtree page */
+void
 page_parse_create(
-/*==============*/
-	byte*		ptr,	/*!< in: buffer */
-	byte*		end_ptr,/*!< in: buffer end */
-	ulint		comp,	/*!< in: nonzero=compact page format */
-	buf_block_t*	block,	/*!< in: block or NULL */
-	mtr_t*		mtr);	/*!< in: mtr or NULL */
+	buf_block_t*	block,
+	ulint		comp,
+	bool		is_rtree);
 #ifndef UNIV_HOTBACKUP
 /************************************************************//**
 Prints record contents including the data relevant only in
 the index page context. */
-UNIV_INTERN
 void
 page_rec_print(
 /*===========*/
@@ -1019,7 +1055,6 @@ page_rec_print(
 /***************************************************************//**
 This is used to print the contents of the directory for
 debugging purposes. */
-UNIV_INTERN
 void
 page_dir_print(
 /*===========*/
@@ -1028,7 +1063,6 @@ page_dir_print(
 /***************************************************************//**
 This is used to print the contents of the page record list for
 debugging purposes. */
-UNIV_INTERN
 void
 page_print_list(
 /*============*/
@@ -1037,7 +1071,6 @@ page_print_list(
 	ulint		pr_n);	/*!< in: print n first and n last entries */
 /***************************************************************//**
 Prints the info in a page header. */
-UNIV_INTERN
 void
 page_header_print(
 /*==============*/
@@ -1045,7 +1078,6 @@ page_header_print(
 /***************************************************************//**
 This is used to print the contents of the page for
 debugging purposes. */
-UNIV_INTERN
 void
 page_print(
 /*=======*/
@@ -1061,28 +1093,27 @@ page_print(
 The following is used to validate a record on a page. This function
 differs from rec_validate as it can also check the n_owned field and
 the heap_no field.
-@return	TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
 ibool
 page_rec_validate(
 /*==============*/
 	const rec_t*	rec,	/*!< in: physical record */
 	const ulint*	offsets);/*!< in: array returned by rec_get_offsets() */
+#ifdef UNIV_DEBUG
 /***************************************************************//**
 Checks that the first directory slot points to the infimum record and
 the last to the supremum. This function is intended to track if the
 bug fixed in 4.0.14 has caused corruption to users' databases. */
-UNIV_INTERN
 void
 page_check_dir(
 /*===========*/
 	const page_t*	page);	/*!< in: index page */
+#endif /* UNIV_DEBUG */
 /***************************************************************//**
 This function checks the consistency of an index page when we do not
 know the index. This is also resilient so that this should never crash
 even if the page is total garbage.
-@return	TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
 ibool
 page_simple_validate_old(
 /*=====================*/
@@ -1091,16 +1122,14 @@ page_simple_validate_old(
 This function checks the consistency of an index page when we do not
 know the index. This is also resilient so that this should never crash
 even if the page is total garbage.
-@return	TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
 ibool
 page_simple_validate_new(
 /*=====================*/
 	const page_t*	page);	/*!< in: index page in ROW_FORMAT!=REDUNDANT */
 /***************************************************************//**
 This function checks the consistency of an index page.
-@return	TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
 ibool
 page_validate(
 /*==========*/
@@ -1109,8 +1138,7 @@ page_validate(
 				the page record type definition */
 /***************************************************************//**
 Looks in the page record list for a record with the given heap number.
-@return	record, NULL if not found */
-
+@return record, NULL if not found */
 const rec_t*
 page_find_rec_with_heap_no(
 /*=======================*/
@@ -1120,7 +1148,6 @@ page_find_rec_with_heap_no(
 @param[in]	page	index tree leaf page
 @return the last record, not delete-marked
 @retval infimum record if all records are delete-marked */
-
 const rec_t*
 page_find_rec_max_not_deleted(
 	const page_t*	page);
@@ -1129,14 +1156,12 @@ page_find_rec_max_not_deleted(
 but different than the global setting innodb_checksum_algorithm.
 @param[in]	current_algo	current checksum algorithm
 @param[in]	page_checksum	page valid checksum
-@param[in]	space_id	tablespace id
-@param[in]	page_no		page number */
+@param[in]	page_id		page identifier */
 void
 page_warn_strict_checksum(
 	srv_checksum_algorithm_t	curr_algo,
 	srv_checksum_algorithm_t	page_checksum,
-	ulint				space_id,
-	ulint				page_no);
+	const page_id_t&		page_id);
 
 #ifdef UNIV_MATERIALIZE
 #undef UNIV_INLINE
diff --git a/storage/innobase/include/page0page.ic b/storage/innobase/include/page0page.ic
index cde3cad33f0..0a0ff41774c 100644
--- a/storage/innobase/include/page0page.ic
+++ b/storage/innobase/include/page0page.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -23,9 +23,11 @@ Index page routines
 Created 2/2/1994 Heikki Tuuri
 *******************************************************/
 
-#ifndef UNIV_INNOCHECKSUM
+#ifndef page0page_ic
+#define page0page_ic
 
 #include "mach0data.h"
+#ifndef UNIV_INNOCHECKSUM
 #ifdef UNIV_DEBUG
 # include "log0recv.h"
 #endif /* !UNIV_DEBUG */
@@ -40,10 +42,9 @@ Created 2/2/1994 Heikki Tuuri
 #define UNIV_INLINE
 #endif
 
-#endif /* !UNIV_INNOCHECKSUM */
 /************************************************************//**
 Gets the start of a page.
-@return	start of the page */
+@return start of the page */
 UNIV_INLINE
 page_t*
 page_align(
@@ -52,10 +53,10 @@ page_align(
 {
 	return((page_t*) ut_align_down(ptr, UNIV_PAGE_SIZE));
 }
-#ifndef UNIV_INNOCHECKSUM
+
 /************************************************************//**
 Gets the offset within a page.
-@return	offset from the start of the page */
+@return offset from the start of the page */
 UNIV_INLINE
 ulint
 page_offset(
@@ -107,7 +108,55 @@ page_update_max_trx_id(
 	}
 }
 
+/*************************************************************//**
+Returns the RTREE SPLIT SEQUENCE NUMBER (FIL_RTREE_SPLIT_SEQ_NUM).
+@return	SPLIT SEQUENCE NUMBER */
+UNIV_INLINE
+node_seq_t
+page_get_ssn_id(
+/*============*/
+	const page_t*	page)	/*!< in: page */
+{
+	ut_ad(page);
+
+	return(static_cast<node_seq_t>(
+		mach_read_from_8(page + FIL_RTREE_SPLIT_SEQ_NUM)));
+}
+
+/*************************************************************//**
+Sets the RTREE SPLIT SEQUENCE NUMBER field value */
+UNIV_INLINE
+void
+page_set_ssn_id(
+/*============*/
+	buf_block_t*	block,	/*!< in/out: page */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page whose
+				uncompressed part will be updated, or NULL */
+	node_seq_t	ssn_id,	/*!< in: transaction id */
+	mtr_t*		mtr)	/*!< in/out: mini-transaction */
+{
+	page_t*	page = buf_block_get_frame(block);
+#ifndef UNIV_HOTBACKUP
+	ut_ad(!mtr || mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_SX_FIX)
+	      || mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+#endif /* !UNIV_HOTBACKUP */
+
+	if (page_zip) {
+		mach_write_to_8(page + FIL_RTREE_SPLIT_SEQ_NUM, ssn_id);
+		page_zip_write_header(page_zip,
+				      page + FIL_RTREE_SPLIT_SEQ_NUM,
+				      8, mtr);
+#ifndef UNIV_HOTBACKUP
+	} else if (mtr) {
+		mlog_write_ull(page + FIL_RTREE_SPLIT_SEQ_NUM, ssn_id, mtr);
+#endif /* !UNIV_HOTBACKUP */
+	} else {
+		mach_write_to_8(page + FIL_RTREE_SPLIT_SEQ_NUM, ssn_id);
+	}
+}
+
 #endif /* !UNIV_INNOCHECKSUM */
+
 /*************************************************************//**
 Reads the given header field. */
 UNIV_INLINE
@@ -124,6 +173,7 @@ page_header_get_field(
 }
 
 #ifndef UNIV_INNOCHECKSUM
+
 /*************************************************************//**
 Sets the given header field. */
 UNIV_INLINE
@@ -150,7 +200,7 @@ page_header_set_field(
 
 /*************************************************************//**
 Returns the offset stored in the given header field.
-@return	offset from the start of the page, or 0 */
+@return offset from the start of the page, or 0 */
 UNIV_INLINE
 ulint
 page_header_get_offs(
@@ -215,7 +265,8 @@ page_header_reset_last_insert(
 				uncompressed part will be updated, or NULL */
 	mtr_t*		mtr)	/*!< in: mtr */
 {
-	ut_ad(page && mtr);
+	ut_ad(page != NULL);
+	ut_ad(mtr != NULL);
 
 	if (page_zip) {
 		mach_write_to_2(page + (PAGE_HEADER + PAGE_LAST_INSERT), 0);
@@ -229,7 +280,6 @@ page_header_reset_last_insert(
 }
 #endif /* !UNIV_HOTBACKUP */
 
-#endif /* !UNIV_INNOCHECKSUM */
 /************************************************************//**
 Determine whether the page is in new-style compact format.
 @return nonzero if the page is in compact format, zero if it is in
@@ -243,10 +293,9 @@ page_is_comp(
 	return(page_header_get_field(page, PAGE_N_HEAP) & 0x8000);
 }
 
-#ifndef UNIV_INNOCHECKSUM
 /************************************************************//**
 TRUE if the record is on a page in compact format.
-@return	nonzero if in compact format */
+@return nonzero if in compact format */
 UNIV_INLINE
 ulint
 page_rec_is_comp(
@@ -258,7 +307,7 @@ page_rec_is_comp(
 
 /***************************************************************//**
 Returns the heap number of a record.
-@return	heap number */
+@return heap number */
 UNIV_INLINE
 ulint
 page_rec_get_heap_no(
@@ -272,10 +321,9 @@ page_rec_get_heap_no(
 	}
 }
 
-#endif /* !UNIV_INNOCHECKSUM */
 /************************************************************//**
 Determine whether the page is a B-tree leaf.
-@return	true if the page is a B-tree leaf (PAGE_LEVEL = 0) */
+@return true if the page is a B-tree leaf (PAGE_LEVEL = 0) */
 UNIV_INLINE
 bool
 page_is_leaf(
@@ -285,10 +333,9 @@ page_is_leaf(
 	return(!*(const uint16*) (page + (PAGE_HEADER + PAGE_LEVEL)));
 }
 
-#ifndef UNIV_INNOCHECKSUM
 /************************************************************//**
 Determine whether the page is empty.
-@return	true if the page is empty (PAGE_N_RECS = 0) */
+@return true if the page is empty (PAGE_N_RECS = 0) */
 UNIV_INLINE
 bool
 page_is_empty(
@@ -298,9 +345,34 @@ page_is_empty(
 	return(!*(const uint16*) (page + (PAGE_HEADER + PAGE_N_RECS)));
 }
 
+/** Determine whether a page is an index root page.
+@param[in]	page	page frame
+@return true if the page is a root page of an index */
+UNIV_INLINE
+bool
+page_is_root(
+	const page_t*	page)
+{
+#if FIL_PAGE_PREV % 8
+# error FIL_PAGE_PREV must be 64-bit aligned
+#endif
+#if FIL_PAGE_NEXT != FIL_PAGE_PREV + 4
+# error FIL_PAGE_NEXT must be adjacent to FIL_PAGE_PREV
+#endif
+#if FIL_NULL != 0xffffffff
+# error FIL_NULL != 0xffffffff
+#endif
+	/* Check that this is an index page and both the PREV and NEXT
+	pointers are FIL_NULL, because the root page does not have any
+	siblings. */
+	return(fil_page_index_page_check(page)
+	       && *reinterpret_cast<const ib_uint64_t*>(page + FIL_PAGE_PREV)
+	       == IB_UINT64_MAX);
+}
+
 /************************************************************//**
 Determine whether the page contains garbage.
-@return	true if the page contains garbage (PAGE_GARBAGE is not 0) */
+@return true if the page contains garbage (PAGE_GARBAGE is not 0) */
 UNIV_INLINE
 bool
 page_has_garbage(
@@ -312,7 +384,7 @@ page_has_garbage(
 
 /************************************************************//**
 Gets the offset of the first record on the page.
-@return	offset of the first record in record list, relative from page */
+@return offset of the first record in record list, relative from page */
 UNIV_INLINE
 ulint
 page_get_infimum_offset(
@@ -331,7 +403,7 @@ page_get_infimum_offset(
 
 /************************************************************//**
 Gets the offset of the last record on the page.
-@return	offset of the last record in record list, relative from page */
+@return offset of the last record in record list, relative from page */
 UNIV_INLINE
 ulint
 page_get_supremum_offset(
@@ -350,7 +422,7 @@ page_get_supremum_offset(
 
 /************************************************************//**
 TRUE if the record is a user record on the page.
-@return	TRUE if a user record */
+@return TRUE if a user record */
 UNIV_INLINE
 ibool
 page_rec_is_user_rec_low(
@@ -386,7 +458,7 @@ page_rec_is_user_rec_low(
 
 /************************************************************//**
 TRUE if the record is the supremum record on a page.
-@return	TRUE if the supremum record */
+@return TRUE if the supremum record */
 UNIV_INLINE
 ibool
 page_rec_is_supremum_low(
@@ -402,7 +474,7 @@ page_rec_is_supremum_low(
 
 /************************************************************//**
 TRUE if the record is the infimum record on a page.
-@return	TRUE if the infimum record */
+@return TRUE if the infimum record */
 UNIV_INLINE
 ibool
 page_rec_is_infimum_low(
@@ -417,7 +489,7 @@ page_rec_is_infimum_low(
 
 /************************************************************//**
 TRUE if the record is a user record on the page.
-@return	TRUE if a user record */
+@return TRUE if a user record */
 UNIV_INLINE
 ibool
 page_rec_is_user_rec(
@@ -431,7 +503,7 @@ page_rec_is_user_rec(
 
 /************************************************************//**
 TRUE if the record is the supremum record on a page.
-@return	TRUE if the supremum record */
+@return TRUE if the supremum record */
 UNIV_INLINE
 ibool
 page_rec_is_supremum(
@@ -445,7 +517,7 @@ page_rec_is_supremum(
 
 /************************************************************//**
 TRUE if the record is the infimum record on a page.
-@return	TRUE if the infimum record */
+@return TRUE if the infimum record */
 UNIV_INLINE
 ibool
 page_rec_is_infimum(
@@ -457,10 +529,73 @@ page_rec_is_infimum(
 	return(page_rec_is_infimum_low(page_offset(rec)));
 }
 
+/************************************************************//**
+true if the record is the first user record on a page.
+@return true if the first user record */
+UNIV_INLINE
+bool
+page_rec_is_first(
+/*==============*/
+	const rec_t*	rec,	/*!< in: record */
+	const page_t*	page)	/*!< in: page */
+{
+	ut_ad(page_get_n_recs(page) > 0);
+
+	return(page_rec_get_next_const(page_get_infimum_rec(page)) == rec);
+}
+
+/************************************************************//**
+true if the record is the second user record on a page.
+@return true if the second user record */
+UNIV_INLINE
+bool
+page_rec_is_second(
+/*===============*/
+	const rec_t*	rec,	/*!< in: record */
+	const page_t*	page)	/*!< in: page */
+{
+	ut_ad(page_get_n_recs(page) > 1);
+
+	return(page_rec_get_next_const(
+		page_rec_get_next_const(page_get_infimum_rec(page))) == rec);
+}
+
+/************************************************************//**
+true if the record is the last user record on a page.
+@return true if the last user record */
+UNIV_INLINE
+bool
+page_rec_is_last(
+/*=============*/
+	const rec_t*	rec,	/*!< in: record */
+	const page_t*	page)	/*!< in: page */
+{
+	ut_ad(page_get_n_recs(page) > 0);
+
+	return(page_rec_get_next_const(rec) == page_get_supremum_rec(page));
+}
+
+/************************************************************//**
+true if the record is the second last user record on a page.
+@return true if the second last user record */
+UNIV_INLINE
+bool
+page_rec_is_second_last(
+/*====================*/
+	const rec_t*	rec,	/*!< in: record */
+	const page_t*	page)	/*!< in: page */
+{
+	ut_ad(page_get_n_recs(page) > 1);
+	ut_ad(!page_rec_is_last(rec, page));
+
+	return(page_rec_get_next_const(
+		page_rec_get_next_const(rec)) == page_get_supremum_rec(page));
+}
+
 /************************************************************//**
 Returns the nth record of the record list.
 This is the inverse function of page_rec_get_n_recs_before().
-@return	nth record */
+@return nth record */
 UNIV_INLINE
 rec_t*
 page_rec_get_nth(
@@ -476,7 +611,7 @@ page_rec_get_nth(
 Returns the middle record of the records on the page. If there is an
 even number of records in the list, returns the first record of the
 upper half-list.
-@return	middle record */
+@return middle record */
 UNIV_INLINE
 rec_t*
 page_get_middle_rec(
@@ -487,62 +622,11 @@ page_get_middle_rec(
 
 	return(page_rec_get_nth(page, middle));
 }
-
-/*************************************************************//**
-Compares a data tuple to a physical record. Differs from the function
-cmp_dtuple_rec_with_match in the way that the record must reside on an
-index page, and also page infimum and supremum records can be given in
-the parameter rec. These are considered as the negative infinity and
-the positive infinity in the alphabetical order.
-@return 1, 0, -1, if dtuple is greater, equal, less than rec,
-respectively, when only the common first fields are compared */
-UNIV_INLINE
-int
-page_cmp_dtuple_rec_with_match(
-/*===========================*/
-	const dtuple_t*	dtuple,	/*!< in: data tuple */
-	const rec_t*	rec,	/*!< in: physical record on a page; may also
-				be page infimum or supremum, in which case
-				matched-parameter values below are not
-				affected */
-	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
-	ulint*		matched_fields, /*!< in/out: number of already completely
-				matched fields; when function returns
-				contains the value for current comparison */
-	ulint*		matched_bytes) /*!< in/out: number of already matched
-				bytes within the first field not completely
-				matched; when function returns contains the
-				value for current comparison */
-{
-	ulint	rec_offset;
-
-	ut_ad(dtuple_check_typed(dtuple));
-	ut_ad(rec_offs_validate(rec, NULL, offsets));
-	ut_ad(!rec_offs_comp(offsets) == !page_rec_is_comp(rec));
-
-	rec_offset = page_offset(rec);
-
-	if (rec_offset == PAGE_NEW_INFIMUM
-	    || rec_offset == PAGE_OLD_INFIMUM) {
-
-		return(1);
-
-	} else if (rec_offset == PAGE_NEW_SUPREMUM
-		   || rec_offset == PAGE_OLD_SUPREMUM) {
-
-		return(-1);
-	}
-
-	return(cmp_dtuple_rec_with_match(dtuple, rec, offsets,
-					 matched_fields,
-					 matched_bytes));
-}
 #endif /* !UNIV_HOTBACKUP */
 
-#endif /* !UNIV_INNOCHECKSUM */
 /*************************************************************//**
 Gets the page number.
-@return	page number */
+@return page number */
 UNIV_INLINE
 ulint
 page_get_page_no(
@@ -553,10 +637,9 @@ page_get_page_no(
 	return(mach_read_from_4(page + FIL_PAGE_OFFSET));
 }
 
-#ifndef UNIV_INNOCHECKSUM
 /*************************************************************//**
 Gets the tablespace identifier.
-@return	space id */
+@return space id */
 UNIV_INLINE
 ulint
 page_get_space_id(
@@ -567,11 +650,10 @@ page_get_space_id(
 	return(mach_read_from_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
 }
 
-#endif /* !UNIV_INNOCHECKSUM */
 /*************************************************************//**
 Gets the number of user records on page (infimum and supremum records
 are not user records).
-@return	number of user records */
+@return number of user records */
 UNIV_INLINE
 ulint
 page_get_n_recs(
@@ -581,10 +663,9 @@ page_get_n_recs(
 	return(page_header_get_field(page, PAGE_N_RECS));
 }
 
-#ifndef UNIV_INNOCHECKSUM
 /*************************************************************//**
 Gets the number of dir slots in directory.
-@return	number of slots */
+@return number of slots */
 UNIV_INLINE
 ulint
 page_dir_get_n_slots(
@@ -609,7 +690,7 @@ page_dir_set_n_slots(
 
 /*************************************************************//**
 Gets the number of records in the heap.
-@return	number of user records */
+@return number of user records */
 UNIV_INLINE
 ulint
 page_dir_get_n_heap(
@@ -645,7 +726,7 @@ page_dir_set_n_heap(
 #ifdef UNIV_DEBUG
 /*************************************************************//**
 Gets pointer to nth directory slot.
-@return	pointer to dir slot */
+@return pointer to dir slot */
 UNIV_INLINE
 page_dir_slot_t*
 page_dir_get_nth_slot(
@@ -663,7 +744,7 @@ page_dir_get_nth_slot(
 
 /**************************************************************//**
 Used to check the consistency of a record on a page.
-@return	TRUE if succeed */
+@return TRUE if succeed */
 UNIV_INLINE
 ibool
 page_rec_check(
@@ -682,7 +763,7 @@ page_rec_check(
 
 /***************************************************************//**
 Gets the record pointed to by a directory slot.
-@return	pointer to record */
+@return pointer to record */
 UNIV_INLINE
 const rec_t*
 page_dir_slot_get_rec(
@@ -708,7 +789,7 @@ page_dir_slot_set_rec(
 
 /***************************************************************//**
 Gets the number of records owned by a directory slot.
-@return	number of records */
+@return number of records */
 UNIV_INLINE
 ulint
 page_dir_slot_get_n_owned(
@@ -758,7 +839,7 @@ page_dir_calc_reserved_space(
 
 /************************************************************//**
 Gets the pointer to the next record on the page.
-@return	pointer to next record */
+@return pointer to next record */
 UNIV_INLINE
 const rec_t*
 page_rec_get_next_low(
@@ -784,8 +865,6 @@ page_rec_get_next_low(
 			(void*) rec,
 			(ulong) page_get_space_id(page),
 			(ulong) page_get_page_no(page));
-		buf_page_print(page, 0, 0);
-
 		ut_error;
 	} else if (offs == 0) {
 
@@ -797,7 +876,7 @@ page_rec_get_next_low(
 
 /************************************************************//**
 Gets the pointer to the next record on the page.
-@return	pointer to next record */
+@return pointer to next record */
 UNIV_INLINE
 rec_t*
 page_rec_get_next(
@@ -809,7 +888,7 @@ page_rec_get_next(
 
 /************************************************************//**
 Gets the pointer to the next record on the page.
-@return	pointer to next record */
+@return pointer to next record */
 UNIV_INLINE
 const rec_t*
 page_rec_get_next_const(
@@ -823,7 +902,7 @@ page_rec_get_next_const(
 Gets the pointer to the next non delete-marked record on the page.
 If all subsequent records are delete-marked, then this function
 will return the supremum record.
-@return	pointer to next non delete-marked record or pointer to supremum */
+@return pointer to next non delete-marked record or pointer to supremum */
 UNIV_INLINE
 const rec_t*
 page_rec_get_next_non_del_marked(
@@ -874,7 +953,7 @@ page_rec_set_next(
 
 /************************************************************//**
 Gets the pointer to the previous record.
-@return	pointer to previous record */
+@return pointer to previous record */
 UNIV_INLINE
 const rec_t*
 page_rec_get_prev_const(
@@ -921,7 +1000,7 @@ page_rec_get_prev_const(
 
 /************************************************************//**
 Gets the pointer to the previous record.
-@return	pointer to previous record */
+@return pointer to previous record */
 UNIV_INLINE
 rec_t*
 page_rec_get_prev(
@@ -934,7 +1013,7 @@ page_rec_get_prev(
 
 /***************************************************************//**
 Looks for the record which owns the given record.
-@return	the owner record */
+@return the owner record */
 UNIV_INLINE
 rec_t*
 page_rec_find_owner_rec(
@@ -959,7 +1038,7 @@ page_rec_find_owner_rec(
 /**********************************************************//**
 Returns the base extra size of a physical record.  This is the
 size of the fixed header, independent of the record size.
-@return	REC_N_NEW_EXTRA_BYTES or REC_N_OLD_EXTRA_BYTES */
+@return REC_N_NEW_EXTRA_BYTES or REC_N_OLD_EXTRA_BYTES */
 UNIV_INLINE
 ulint
 page_rec_get_base_extra_size(
@@ -972,11 +1051,10 @@ page_rec_get_base_extra_size(
 	return(REC_N_NEW_EXTRA_BYTES + (ulint) !page_rec_is_comp(rec));
 }
 
-#endif /* !UNIV_INNOCHECKSUM */
 /************************************************************//**
 Returns the sum of the sizes of the records in the record list, excluding
 the infimum and supremum records.
-@return	data in bytes */
+@return data in bytes */
 UNIV_INLINE
 ulint
 page_get_data_size(
@@ -996,7 +1074,6 @@ page_get_data_size(
 	return(ret);
 }
 
-#ifndef UNIV_INNOCHECKSUM
 /************************************************************//**
 Allocates a block of memory from the free list of an index page. */
 UNIV_INLINE
@@ -1032,7 +1109,7 @@ page_mem_alloc_free(
 
 /*************************************************************//**
 Calculates free space if a page is emptied.
-@return	free space */
+@return free space */
 UNIV_INLINE
 ulint
 page_get_free_space_of_empty(
@@ -1082,7 +1159,7 @@ PAGE_DIR_SLOT_MIN_N_OWNED bytes for it. If the sum of these exceeds the
 value of page_get_free_space_of_empty, the insert is impossible, otherwise
 it is allowed. This function returns the maximum combined size of records
 which can be inserted on top of the record heap.
-@return	maximum combined size for inserted records */
+@return maximum combined size for inserted records */
 UNIV_INLINE
 ulint
 page_get_max_insert_size(
@@ -1124,7 +1201,7 @@ page_get_max_insert_size(
 /************************************************************//**
 Returns the maximum combined size of records which can be inserted on top
 of the record heap if a page is first reorganized.
-@return	maximum combined size for inserted records */
+@return maximum combined size for inserted records */
 UNIV_INLINE
 ulint
 page_get_max_insert_size_after_reorganize(
@@ -1198,3 +1275,5 @@ page_mem_free(
 #undef UNIV_INLINE
 #define UNIV_INLINE	UNIV_INLINE_ORIGINAL
 #endif
+
+#endif
diff --git a/storage/innobase/include/page0size.h b/storage/innobase/include/page0size.h
new file mode 100644
index 00000000000..ab917e1ff05
--- /dev/null
+++ b/storage/innobase/include/page0size.h
@@ -0,0 +1,202 @@
+/*****************************************************************************
+
+Copyright (c) 2013, 2015, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/page0size.h
+A class describing a page size.
+
+Created Nov 14, 2013 Vasil Dimov
+*******************************************************/
+
+#ifndef page0size_t
+#define page0size_t
+
+#include "univ.i"
+#include "fsp0types.h"
+
+#define FIELD_REF_SIZE 20
+
+/** A BLOB field reference full of zero, for use in assertions and
+tests.Initially, BLOB field references are set to zero, in
+dtuple_convert_big_rec(). */
+extern const byte field_ref_zero[FIELD_REF_SIZE];
+
+#define PAGE_SIZE_T_SIZE_BITS	17
+
+/** Page size descriptor. Contains the physical and logical page size, as well
+as whether the page is compressed or not. */
+class page_size_t {
+public:
+	/** Constructor from (physical, logical, is_compressed).
+	@param[in]	physical	physical (on-disk/zipped) page size
+	@param[in]	logical		logical (in-memory/unzipped) page size
+	@param[in]	is_compressed	whether the page is compressed */
+	page_size_t(ulint physical, ulint logical, bool is_compressed)
+	{
+		if (physical == 0) {
+			physical = UNIV_PAGE_SIZE_ORIG;
+		}
+		if (logical == 0) {
+			logical = UNIV_PAGE_SIZE_ORIG;
+		}
+
+		m_physical = static_cast<unsigned>(physical);
+		m_logical = static_cast<unsigned>(logical);
+		m_is_compressed = static_cast<unsigned>(is_compressed);
+
+		ut_ad(physical <= (1 << PAGE_SIZE_T_SIZE_BITS));
+		ut_ad(logical <= (1 << PAGE_SIZE_T_SIZE_BITS));
+
+		ut_ad(ut_is_2pow(physical));
+		ut_ad(ut_is_2pow(logical));
+
+		ut_ad(logical <= UNIV_PAGE_SIZE_MAX);
+		ut_ad(logical >= physical);
+		ut_ad(!is_compressed || physical <= UNIV_ZIP_SIZE_MAX);
+	}
+
+	/** Constructor from (fsp_flags).
+	@param[in]	fsp_flags	filespace flags */
+	explicit page_size_t(ulint fsp_flags)
+	{
+		ulint	ssize = FSP_FLAGS_GET_PAGE_SSIZE(fsp_flags);
+
+		/* If the logical page size is zero in fsp_flags, then use the
+		legacy 16k page size. */
+		ssize = (0 == ssize) ? UNIV_PAGE_SSIZE_ORIG : ssize;
+
+		/* Convert from a 'log2 minus 9' to a page size in bytes. */
+		const ulint	size = ((UNIV_ZIP_SIZE_MIN >> 1) << ssize);
+
+		ut_ad(size <= UNIV_PAGE_SIZE_MAX);
+		ut_ad(size <= (1 << PAGE_SIZE_T_SIZE_BITS));
+
+		m_logical = size;
+
+		ssize = FSP_FLAGS_GET_ZIP_SSIZE(fsp_flags);
+
+		/* If the fsp_flags have zero in the zip_ssize field, then it means
+		that the tablespace does not have compressed pages and the physical
+		page size is the same as the logical page size. */
+		if (ssize == 0) {
+			m_is_compressed = false;
+			m_physical = m_logical;
+		} else {
+			m_is_compressed = true;
+
+			/* Convert from a 'log2 minus 9' to a page size
+			in bytes. */
+			const ulint	phy
+				= ((UNIV_ZIP_SIZE_MIN >> 1) << ssize);
+
+			ut_ad(phy <= UNIV_ZIP_SIZE_MAX);
+			ut_ad(phy <= (1 << PAGE_SIZE_T_SIZE_BITS));
+
+			m_physical = phy;
+		}
+	}
+
+	/** Retrieve the physical page size (on-disk).
+	@return physical page size in bytes */
+	inline ulint physical() const
+	{
+		ut_ad(m_physical > 0);
+
+		return(m_physical);
+	}
+
+	/** Retrieve the logical page size (in-memory).
+	@return logical page size in bytes */
+	inline ulint logical() const
+	{
+		ut_ad(m_logical > 0);
+		return(m_logical);
+	}
+
+	/** Check whether the page is compressed on disk.
+	@return true if compressed */
+	inline bool is_compressed() const
+	{
+		return(m_is_compressed);
+	}
+
+	/** Copy the values from a given page_size_t object.
+	@param[in]	src	page size object whose values to fetch */
+	inline void copy_from(const page_size_t& src)
+	{
+		m_physical = src.physical();
+		m_logical = src.logical();
+		m_is_compressed = src.is_compressed();
+	}
+
+	/** Check if a given page_size_t object is equal to the current one.
+	@param[in]	a	page_size_t object to compare
+	@return true if equal */
+	inline bool equals_to(const page_size_t& a) const
+	{
+		return(a.physical() == m_physical
+		       && a.logical() == m_logical
+		       && a.is_compressed() == m_is_compressed);
+	}
+
+private:
+
+	/* Disable implicit copying. */
+	void operator=(const page_size_t&);
+
+	/* For non compressed tablespaces, physical page size is equal to
+	the logical page size and the data is stored in buf_page_t::frame
+	(and is also always equal to univ_page_size (--innodb-page-size=)).
+
+	For compressed tablespaces, physical page size is the compressed
+	page size as stored on disk and in buf_page_t::zip::data. The logical
+	page size is the uncompressed page size in memory - the size of
+	buf_page_t::frame (currently also always equal to univ_page_size
+	(--innodb-page-size=)). */
+
+	/** Physical page size. */
+	unsigned	m_physical:PAGE_SIZE_T_SIZE_BITS;
+
+	/** Logical page size. */
+	unsigned	m_logical:PAGE_SIZE_T_SIZE_BITS;
+
+	/** Flag designating whether the physical page is compressed, which is
+	true IFF the whole tablespace where the page belongs is compressed. */
+	unsigned	m_is_compressed:1;
+};
+
+/* Overloading the global output operator to conveniently print an object
+of type the page_size_t.
+@param[in,out]	out	the output stream
+@param[in]	obj	an object of type page_size_t to be printed
+@retval	the output stream */
+inline
+std::ostream&
+operator<<(
+	std::ostream&		out,
+	const page_size_t&	obj)
+{
+	out << "[page size: physical=" << obj.physical()
+		<< ", logical=" << obj.logical()
+		<< ", compressed=" << obj.is_compressed() << "]";
+	return(out);
+}
+
+extern page_size_t	univ_page_size;
+
+#endif /* page0size_t */
diff --git a/storage/innobase/include/page0types.h b/storage/innobase/include/page0types.h
index 2892e860875..fe56468c454 100644
--- a/storage/innobase/include/page0types.h
+++ b/storage/innobase/include/page0types.h
@@ -26,20 +26,18 @@ Created 2/2/1994 Heikki Tuuri
 #ifndef page0types_h
 #define page0types_h
 
-using namespace std;
-
-#include <map>
-
 #include "univ.i"
 #include "dict0types.h"
 #include "mtr0types.h"
-#include "sync0types.h"
-#include "os0thread.h"
+#include "ut0new.h"
+
+#include <map>
 
 /** Eliminates a name collision on HP-UX */
 #define page_t	   ib_page_t
 /** Type of the index page */
 typedef	byte		page_t;
+#ifndef UNIV_INNOCHECKSUM
 /** Index page cursor */
 struct page_cur_t;
 
@@ -63,6 +61,42 @@ ssize, which is the number of shifts from 512. */
 # error "PAGE_ZIP_SSIZE_MAX >= (1 << PAGE_ZIP_SSIZE_BITS)"
 #endif
 
+/* Page cursor search modes; the values must be in this order! */
+enum page_cur_mode_t {
+	PAGE_CUR_UNSUPP	= 0,
+	PAGE_CUR_G	= 1,
+	PAGE_CUR_GE	= 2,
+	PAGE_CUR_L	= 3,
+	PAGE_CUR_LE	= 4,
+
+/*      PAGE_CUR_LE_OR_EXTENDS = 5,*/ /* This is a search mode used in
+				 "column LIKE 'abc%' ORDER BY column DESC";
+				 we have to find strings which are <= 'abc' or
+				 which extend it */
+
+/* These search mode is for search R-tree index. */
+	PAGE_CUR_CONTAIN		= 7,
+	PAGE_CUR_INTERSECT		= 8,
+	PAGE_CUR_WITHIN			= 9,
+	PAGE_CUR_DISJOINT		= 10,
+	PAGE_CUR_MBR_EQUAL		= 11,
+	PAGE_CUR_RTREE_INSERT		= 12,
+	PAGE_CUR_RTREE_LOCATE		= 13,
+	PAGE_CUR_RTREE_GET_FATHER	= 14
+};
+
+
+/** The information used for compressing a page when applying
+TRUNCATE log record during recovery */
+struct redo_page_compress_t {
+	ulint		type;		/*!< index type */
+	index_id_t	index_id;	/*!< index id */
+	ulint		n_fields;	/*!< number of index fields */
+	ulint		field_len;	/*!< the length of index field */
+	const byte*	fields;		/*!< index field information */
+	ulint		trx_id_pos;	/*!< position of trx-id column. */
+};
+
 /** Compressed page descriptor */
 struct page_zip_des_t
 {
@@ -110,21 +144,21 @@ struct page_zip_stat_t {
 };
 
 /** Compression statistics types */
-typedef map<index_id_t, page_zip_stat_t>	page_zip_stat_per_index_t;
+typedef std::map<
+	index_id_t,
+	page_zip_stat_t,
+	std::less<index_id_t>,
+	ut_allocator<std::pair<const index_id_t, page_zip_stat_t> > >
+	page_zip_stat_per_index_t;
 
 /** Statistics on compression, indexed by page_zip_des_t::ssize - 1 */
-extern page_zip_stat_t				page_zip_stat[PAGE_ZIP_SSIZE_MAX];
+extern page_zip_stat_t			page_zip_stat[PAGE_ZIP_SSIZE_MAX];
 /** Statistics on compression, indexed by dict_index_t::id */
-extern page_zip_stat_per_index_t		page_zip_stat_per_index;
-extern ib_mutex_t				page_zip_stat_per_index_mutex;
-#ifdef HAVE_PSI_INTERFACE
-extern mysql_pfs_key_t				page_zip_stat_per_index_mutex_key;
-#endif /* HAVE_PSI_INTERFACE */
+extern page_zip_stat_per_index_t	page_zip_stat_per_index;
 
 /**********************************************************************//**
 Write the "deleted" flag of a record on a compressed page.  The flag must
 already have been written on the uncompressed page. */
-UNIV_INTERN
 void
 page_zip_rec_set_deleted(
 /*=====================*/
@@ -136,7 +170,6 @@ page_zip_rec_set_deleted(
 /**********************************************************************//**
 Write the "owned" flag of a record on a compressed page.  The n_owned field
 must already have been written on the uncompressed page. */
-UNIV_INTERN
 void
 page_zip_rec_set_owned(
 /*===================*/
@@ -147,7 +180,6 @@ page_zip_rec_set_owned(
 
 /**********************************************************************//**
 Shift the dense page directory when a record is deleted. */
-UNIV_INTERN
 void
 page_zip_dir_delete(
 /*================*/
@@ -160,7 +192,6 @@ page_zip_dir_delete(
 
 /**********************************************************************//**
 Add a slot to the dense page directory. */
-UNIV_INTERN
 void
 page_zip_dir_add_slot(
 /*==================*/
@@ -168,4 +199,5 @@ page_zip_dir_add_slot(
 	ulint		is_clustered)	/*!< in: nonzero for clustered index,
 					zero for others */
 	MY_ATTRIBUTE((nonnull));
+#endif /* !UNIV_INNOCHECKSUM */
 #endif
diff --git a/storage/innobase/include/page0zip.h b/storage/innobase/include/page0zip.h
index 4e362cec641..7b5df3d306b 100644
--- a/storage/innobase/include/page0zip.h
+++ b/storage/innobase/include/page0zip.h
@@ -1,9 +1,7 @@
-
 /*****************************************************************************
 
 Copyright (c) 2005, 2016, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2012, Facebook Inc.
-Copyright (c) 2013, 2016, MariaDB Corporation
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -34,36 +32,62 @@ Created June 2005 by Marko Makela
 # define UNIV_INLINE
 #endif
 
-#include "page0types.h"
-#include "buf0types.h"
+#ifdef UNIV_INNOCHECKSUM
+#include "univ.i"
+#include "buf0buf.h"
+#include "ut0crc32.h"
+#include "buf0checksum.h"
+#include "mach0data.h"
+#include "zlib.h"
+#endif /* UNIV_INNOCHECKSUM */
+
 #ifndef UNIV_INNOCHECKSUM
 #include "mtr0types.h"
+#include "page0types.h"
+#endif /* !UNIV_INNOCHECKSUM */
+
+#include "buf0types.h"
+
+#ifndef UNIV_INNOCHECKSUM
 #include "dict0types.h"
 #include "srv0srv.h"
 #include "trx0types.h"
 #include "mem0mem.h"
-#endif /* !UNIV_INNOCHECKSUM */
 
 /* Compression level to be used by zlib. Settable by user. */
 extern uint	page_zip_level;
 
 /* Default compression level. */
 #define DEFAULT_COMPRESSION_LEVEL	6
+/** Start offset of the area that will be compressed */
+#define PAGE_ZIP_START			PAGE_NEW_SUPREMUM_END
+/** Size of an compressed page directory entry */
+#define PAGE_ZIP_DIR_SLOT_SIZE		2
+/** Predefine the sum of DIR_SLOT, TRX_ID & ROLL_PTR */
+#define PAGE_ZIP_CLUST_LEAF_SLOT_SIZE		\
+		(PAGE_ZIP_DIR_SLOT_SIZE		\
+		+ DATA_TRX_ID_LEN		\
+		+ DATA_ROLL_PTR_LEN)
+/** Mask of record offsets */
+#define PAGE_ZIP_DIR_SLOT_MASK		0x3fff
+/** 'owned' flag */
+#define PAGE_ZIP_DIR_SLOT_OWNED		0x4000
+/** 'deleted' flag */
+#define PAGE_ZIP_DIR_SLOT_DEL		0x8000
 
 /* Whether or not to log compressed page images to avoid possible
 compression algorithm changes in zlib. */
 extern my_bool	page_zip_log_pages;
 
-#ifndef UNIV_INNOCHECKSUM
 /**********************************************************************//**
 Determine the size of a compressed page in bytes.
-@return	size in bytes */
+@return size in bytes */
 UNIV_INLINE
 ulint
 page_zip_get_size(
 /*==============*/
 	const page_zip_des_t*	page_zip)	/*!< in: compressed page */
-	MY_ATTRIBUTE((nonnull, pure));
+	MY_ATTRIBUTE((warn_unused_result));
 /**********************************************************************//**
 Set the size of a compressed page in bytes. */
 UNIV_INLINE
@@ -74,30 +98,40 @@ page_zip_set_size(
 	ulint		size);		/*!< in: size in bytes */
 
 #ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Determine if a record is so big that it needs to be stored externally.
-@return	FALSE if the entire record can be stored locally on the page */
+/** Determine if a record is so big that it needs to be stored externally.
+@param[in]	rec_size	length of the record in bytes
+@param[in]	comp		nonzero=compact format
+@param[in]	n_fields	number of fields in the record; ignored if
+tablespace is not compressed
+@param[in]	page_size	page size
+@return FALSE if the entire record can be stored locally on the page */
 UNIV_INLINE
 ibool
 page_zip_rec_needs_ext(
-/*===================*/
-	ulint	rec_size,	/*!< in: length of the record in bytes */
-	ulint	comp,		/*!< in: nonzero=compact format */
-	ulint	n_fields,	/*!< in: number of fields in the record;
-				ignored if zip_size == 0 */
-	ulint	zip_size)	/*!< in: compressed page size in bytes, or 0 */
-	MY_ATTRIBUTE((const));
+	ulint			rec_size,
+	ulint			comp,
+	ulint			n_fields,
+	const page_size_t&	page_size)
+	MY_ATTRIBUTE((warn_unused_result));
 
 /**********************************************************************//**
 Determine the guaranteed free space on an empty page.
-@return	minimum payload size on the page */
-UNIV_INTERN
+@return minimum payload size on the page */
 ulint
 page_zip_empty_size(
 /*================*/
 	ulint	n_fields,	/*!< in: number of columns in the index */
 	ulint	zip_size)	/*!< in: compressed page size in bytes */
 	MY_ATTRIBUTE((const));
+
+/** Check whether a tuple is too big for compressed table
+@param[in]	index	dict index object
+@param[in]	entry	entry for the index
+@return	true if it's too big, otherwise false */
+bool
+page_zip_is_too_big(
+	const dict_index_t*	index,
+	const dtuple_t*		entry);
 #endif /* !UNIV_HOTBACKUP */
 
 /**********************************************************************//**
@@ -111,36 +145,54 @@ page_zip_des_init(
 
 /**********************************************************************//**
 Configure the zlib allocator to use the given memory heap. */
-UNIV_INTERN
 void
 page_zip_set_alloc(
 /*===============*/
 	void*		stream,		/*!< in/out: zlib stream */
 	mem_heap_t*	heap);		/*!< in: memory heap to use */
-#endif /* !UNIV_INNOCHECKSUM */
 
 /**********************************************************************//**
 Compress a page.
 @return TRUE on success, FALSE on failure; page_zip will be left
 intact on failure. */
-UNIV_INTERN
 ibool
 page_zip_compress(
 /*==============*/
-	page_zip_des_t*	page_zip,/*!< in: size; out: data, n_blobs,
-				m_start, m_end, m_nonempty */
-	const page_t*	page,	/*!< in: uncompressed page */
-	dict_index_t*	index,	/*!< in: index of the B-tree node */
-	ulint		level,	/*!< in: compression level */
-	mtr_t*		mtr)	/*!< in: mini-transaction, or NULL */
-	MY_ATTRIBUTE((nonnull(1,2,3)));
+	page_zip_des_t*		page_zip,	/*!< in: size; out: data,
+						n_blobs, m_start, m_end,
+						m_nonempty */
+	const page_t*		page,		/*!< in: uncompressed page */
+	dict_index_t*		index,		/*!< in: index of the B-tree
+						node */
+	ulint			level,		/*!< in: commpression level */
+	const redo_page_compress_t* page_comp_info,
+						/*!< in: used for applying
+						TRUNCATE log
+						record during recovery */
+	mtr_t*			mtr);		/*!< in/out: mini-transaction,
+						or NULL */
+
+/**********************************************************************//**
+Write the index information for the compressed page.
+@return used size of buf */
+ulint
+page_zip_fields_encode(
+/*===================*/
+	ulint			n,	/*!< in: number of fields
+					to compress */
+	const dict_index_t*	index,	/*!< in: index comprising
+					at least n fields */
+	ulint			trx_id_pos,
+					/*!< in: position of the trx_id column
+					in the index, or ULINT_UNDEFINED if
+					this is a non-leaf page */
+	byte*			buf);	/*!< out: buffer of (n + 1) * 2 bytes */
 
 /**********************************************************************//**
 Decompress a page.  This function should tolerate errors on the compressed
 page.  Instead of letting assertions fail, it will return FALSE if an
 inconsistency is detected.
-@return	TRUE on success, FALSE on failure */
-UNIV_INTERN
+@return TRUE on success, FALSE on failure */
 ibool
 page_zip_decompress(
 /*================*/
@@ -153,11 +205,10 @@ page_zip_decompress(
 				after page creation */
 	MY_ATTRIBUTE((nonnull(1,2)));
 
-#ifndef UNIV_INNOCHECKSUM
 #ifdef UNIV_DEBUG
 /**********************************************************************//**
 Validate a compressed page descriptor.
-@return	TRUE if ok */
+@return TRUE if ok */
 UNIV_INLINE
 ibool
 page_zip_simple_validate(
@@ -165,13 +216,11 @@ page_zip_simple_validate(
 	const page_zip_des_t*	page_zip);	/*!< in: compressed page
 						descriptor */
 #endif /* UNIV_DEBUG */
-#endif /* !UNIV_INNOCHECKSUM */
 
 #ifdef UNIV_ZIP_DEBUG
 /**********************************************************************//**
 Check that the compressed and decompressed pages match.
-@return	TRUE if valid, FALSE if not */
-UNIV_INTERN
+@return TRUE if valid, FALSE if not */
 ibool
 page_zip_validate_low(
 /*==================*/
@@ -183,7 +232,6 @@ page_zip_validate_low(
 	MY_ATTRIBUTE((nonnull(1,2)));
 /**********************************************************************//**
 Check that the compressed and decompressed pages match. */
-UNIV_INTERN
 ibool
 page_zip_validate(
 /*==============*/
@@ -193,7 +241,6 @@ page_zip_validate(
 	MY_ATTRIBUTE((nonnull(1,2)));
 #endif /* UNIV_ZIP_DEBUG */
 
-#ifndef UNIV_INNOCHECKSUM
 /**********************************************************************//**
 Determine how big record can be inserted without recompressing the page.
 @return a positive number indicating the maximum size of a record
@@ -204,11 +251,11 @@ page_zip_max_ins_size(
 /*==================*/
 	const page_zip_des_t*	page_zip,/*!< in: compressed page */
 	ibool			is_clust)/*!< in: TRUE if clustered index */
-	MY_ATTRIBUTE((nonnull, pure));
+	MY_ATTRIBUTE((warn_unused_result));
 
 /**********************************************************************//**
 Determine if enough space is available in the modification log.
-@return	TRUE if page_zip_write_rec() will succeed */
+@return TRUE if page_zip_write_rec() will succeed */
 UNIV_INLINE
 ibool
 page_zip_available(
@@ -218,7 +265,7 @@ page_zip_available(
 	ulint			length,	/*!< in: combined size of the record */
 	ulint			create)	/*!< in: nonzero=add the record to
 					the heap */
-	MY_ATTRIBUTE((nonnull, pure));
+	MY_ATTRIBUTE((warn_unused_result));
 
 /**********************************************************************//**
 Write data to the uncompressed header portion of a page.  The data must
@@ -236,7 +283,6 @@ page_zip_write_header(
 /**********************************************************************//**
 Write an entire record on the compressed page.  The data must already
 have been written to the uncompressed page. */
-UNIV_INTERN
 void
 page_zip_write_rec(
 /*===============*/
@@ -249,8 +295,7 @@ page_zip_write_rec(
 
 /***********************************************************//**
 Parses a log record of writing a BLOB pointer of a record.
-@return	end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
 byte*
 page_zip_parse_write_blob_ptr(
 /*==========================*/
@@ -262,7 +307,6 @@ page_zip_parse_write_blob_ptr(
 /**********************************************************************//**
 Write a BLOB pointer of a record on the leaf page of a clustered index.
 The information must already have been updated on the uncompressed page. */
-UNIV_INTERN
 void
 page_zip_write_blob_ptr(
 /*====================*/
@@ -272,14 +316,12 @@ page_zip_write_blob_ptr(
 	dict_index_t*	index,	/*!< in: index of the page */
 	const ulint*	offsets,/*!< in: rec_get_offsets(rec, index) */
 	ulint		n,	/*!< in: column index */
-	mtr_t*		mtr)	/*!< in: mini-transaction handle,
+	mtr_t*		mtr);	/*!< in: mini-transaction handle,
 				or NULL if no logging is needed */
-	MY_ATTRIBUTE((nonnull(1,2,3,4)));
 
 /***********************************************************//**
 Parses a log record of writing the node pointer of a record.
-@return	end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
 byte*
 page_zip_parse_write_node_ptr(
 /*==========================*/
@@ -290,7 +332,6 @@ page_zip_parse_write_node_ptr(
 
 /**********************************************************************//**
 Write the node pointer of a record on a non-leaf compressed page. */
-UNIV_INTERN
 void
 page_zip_write_node_ptr(
 /*====================*/
@@ -298,12 +339,10 @@ page_zip_write_node_ptr(
 	byte*		rec,	/*!< in/out: record */
 	ulint		size,	/*!< in: data size of rec */
 	ulint		ptr,	/*!< in: node pointer */
-	mtr_t*		mtr)	/*!< in: mini-transaction, or NULL */
-	MY_ATTRIBUTE((nonnull(1,2)));
+	mtr_t*		mtr);	/*!< in: mini-transaction, or NULL */
 
 /**********************************************************************//**
 Write the trx_id and roll_ptr of a record on a B-tree leaf node page. */
-UNIV_INTERN
 void
 page_zip_write_trx_id_and_roll_ptr(
 /*===============================*/
@@ -318,7 +357,6 @@ page_zip_write_trx_id_and_roll_ptr(
 /**********************************************************************//**
 Write the "deleted" flag of a record on a compressed page.  The flag must
 already have been written on the uncompressed page. */
-UNIV_INTERN
 void
 page_zip_rec_set_deleted(
 /*=====================*/
@@ -330,7 +368,6 @@ page_zip_rec_set_deleted(
 /**********************************************************************//**
 Write the "owned" flag of a record on a compressed page.  The n_owned field
 must already have been written on the uncompressed page. */
-UNIV_INTERN
 void
 page_zip_rec_set_owned(
 /*===================*/
@@ -341,7 +378,6 @@ page_zip_rec_set_owned(
 
 /**********************************************************************//**
 Insert a record to the dense page directory. */
-UNIV_INTERN
 void
 page_zip_dir_insert(
 /*================*/
@@ -354,7 +390,6 @@ page_zip_dir_insert(
 /**********************************************************************//**
 Shift the dense page directory and the array of BLOB pointers
 when a record is deleted. */
-UNIV_INTERN
 void
 page_zip_dir_delete(
 /*================*/
@@ -368,7 +403,6 @@ page_zip_dir_delete(
 
 /**********************************************************************//**
 Add a slot to the dense page directory. */
-UNIV_INTERN
 void
 page_zip_dir_add_slot(
 /*==================*/
@@ -379,8 +413,7 @@ page_zip_dir_add_slot(
 
 /***********************************************************//**
 Parses a log record of writing to the header of a page.
-@return	end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
 byte*
 page_zip_parse_write_header(
 /*========================*/
@@ -416,7 +449,6 @@ bits in the same mini-transaction in such a way that the modification
 will be redo-logged.
 @return TRUE on success, FALSE on failure; page_zip will be left
 intact on failure, but page will be overwritten. */
-UNIV_INTERN
 ibool
 page_zip_reorganize(
 /*================*/
@@ -427,15 +459,12 @@ page_zip_reorganize(
 	dict_index_t*	index,	/*!< in: index of the B-tree node */
 	mtr_t*		mtr)	/*!< in: mini-transaction */
 	MY_ATTRIBUTE((nonnull));
-#endif /* !UNIV_INNOCHECKSUM */
-
 #ifndef UNIV_HOTBACKUP
 /**********************************************************************//**
 Copy the records of a page byte for byte.  Do not copy the page header
 or trailer, except those B-tree header fields that are directly
 related to the storage of records.  Also copy PAGE_MAX_TRX_ID.
 NOTE: The caller must update the lock table and the adaptive hash index. */
-UNIV_INTERN
 void
 page_zip_copy_recs(
 /*===============*/
@@ -446,45 +475,58 @@ page_zip_copy_recs(
 	const page_zip_des_t*	src_zip,	/*!< in: compressed page */
 	const page_t*		src,		/*!< in: page */
 	dict_index_t*		index,		/*!< in: index of the B-tree */
-	mtr_t*			mtr)		/*!< in: mini-transaction */
-	MY_ATTRIBUTE((nonnull));
+	mtr_t*			mtr);		/*!< in: mini-transaction */
 #endif /* !UNIV_HOTBACKUP */
 
 /**********************************************************************//**
 Parses a log record of compressing an index page.
-@return	end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
 byte*
 page_zip_parse_compress(
 /*====================*/
-	byte*		ptr,	/*!< in: buffer */
-	byte*		end_ptr,/*!< in: buffer end */
-	page_t*		page,	/*!< out: uncompressed page */
-	page_zip_des_t*	page_zip)/*!< out: compressed page */
-	MY_ATTRIBUTE((nonnull(1,2)));
+	byte*		ptr,		/*!< in: buffer */
+	byte*		end_ptr,	/*!< in: buffer end */
+	page_t*		page,		/*!< out: uncompressed page */
+	page_zip_des_t*	page_zip);	/*!< out: compressed page */
 
-/**********************************************************************//**
-Calculate the compressed page checksum.
-@return	page checksum */
-UNIV_INTERN
-ulint
+#endif /* !UNIV_INNOCHECKSUM */
+
+/** Calculate the compressed page checksum.
+@param[in]	data			compressed page
+@param[in]	size			size of compressed page
+@param[in]	algo			algorithm to use
+@param[in]	use_legacy_big_endian	only used if algo is
+SRV_CHECKSUM_ALGORITHM_CRC32 or SRV_CHECKSUM_ALGORITHM_STRICT_CRC32 - if true
+then use big endian byteorder when converting byte strings to integers.
+@return page checksum */
+uint32_t
 page_zip_calc_checksum(
-/*===================*/
-        const void*     data,   /*!< in: compressed page */
-        ulint           size,   /*!< in: size of compressed page */
-	srv_checksum_algorithm_t algo) /*!< in: algorithm to use */
-	MY_ATTRIBUTE((nonnull));
+	const void*			data,
+	ulint				size,
+	srv_checksum_algorithm_t	algo,
+	bool				use_legacy_big_endian = false);
 
 /**********************************************************************//**
 Verify a compressed page's checksum.
-@return	TRUE if the stored checksum is valid according to the value of
+@return TRUE if the stored checksum is valid according to the value of
 innodb_checksum_algorithm */
-UNIV_INTERN
 ibool
 page_zip_verify_checksum(
 /*=====================*/
 	const void*	data,	/*!< in: compressed page */
-	ulint		size);	/*!< in: size of compressed page */
+	ulint		size	/*!< in: size of compressed page */
+#ifdef UNIV_INNOCHECKSUM
+	/* these variables are used only for innochecksum tool. */
+	,uintmax_t	page_no,	/*!< in: page number of
+					given read_buf */
+	bool		strict_check,	/*!< in: true if strict-check
+					option is enable */
+	bool		is_log_enabled, /*!< in: true if log option is
+					enable */
+	FILE*		log_file	/*!< in: file pointer to
+					log_file */
+#endif /* UNIV_INNOCHECKSUM */
+);
 
 #ifndef UNIV_INNOCHECKSUM
 /**********************************************************************//**
@@ -499,7 +541,7 @@ page_zip_compress_write_log_no_data(
 	mtr_t*		mtr);	/*!< in: mtr */
 /**********************************************************************//**
 Parses a log record of compressing an index page without the data.
-@return	end of log record or NULL */
+@return end of log record or NULL */
 UNIV_INLINE
 byte*
 page_zip_parse_compress_no_data(
@@ -519,49 +561,11 @@ void
 page_zip_reset_stat_per_index();
 /*===========================*/
 
-#endif /* !UNIV_INNOCHECKSUM */
-
-#ifndef UNIV_HOTBACKUP
-/** Check if a pointer to an uncompressed page matches a compressed page.
-When we IMPORT a tablespace the blocks and accompanying frames are allocted
-from outside the buffer pool.
-@param ptr	pointer to an uncompressed page frame
-@param page_zip	compressed page descriptor
-@return		TRUE if ptr and page_zip refer to the same block */
-# define PAGE_ZIP_MATCH(ptr, page_zip)					\
-	(((page_zip)->m_external					\
-	  && (page_align(ptr) + UNIV_PAGE_SIZE == (page_zip)->data))	\
-	  || buf_frame_get_page_zip(ptr) == (page_zip))
-#else /* !UNIV_HOTBACKUP */
-/** Check if a pointer to an uncompressed page matches a compressed page.
-@param ptr	pointer to an uncompressed page frame
-@param page_zip	compressed page descriptor
-@return		TRUE if ptr and page_zip refer to the same block */
-# define PAGE_ZIP_MATCH(ptr, page_zip)				\
-	(page_align(ptr) + UNIV_PAGE_SIZE == (page_zip)->data)
-#endif /* !UNIV_HOTBACKUP */
-
 #ifdef UNIV_MATERIALIZE
 # undef UNIV_INLINE
 # define UNIV_INLINE	UNIV_INLINE_ORIGINAL
 #endif
 
-#ifdef UNIV_INNOCHECKSUM
-/** Issue a warning when the checksum that is stored in the page is valid,
-but different than the global setting innodb_checksum_algorithm.
-@param[in]	current_algo	current checksum algorithm
-@param[in]	page_checksum	page valid checksum
-@param[in]	space_id	tablespace id
-@param[in]	page_no		page number */
-void
-page_warn_strict_checksum(
-	srv_checksum_algorithm_t	curr_algo,
-	srv_checksum_algorithm_t	page_checksum,
-	ulint				space_id,
-	ulint				page_no);
-#endif /* UNIV_INNOCHECKSUM */
-
-#ifndef UNIV_INNOCHECKSUM
 #ifndef UNIV_NONINL
 # include "page0zip.ic"
 #endif
diff --git a/storage/innobase/include/page0zip.ic b/storage/innobase/include/page0zip.ic
index 6c7d8cd32c7..9963fe01c82 100644
--- a/storage/innobase/include/page0zip.ic
+++ b/storage/innobase/include/page0zip.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2005, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2005, 2016, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2012, Facebook Inc.
 
 This program is free software; you can redistribute it and/or modify it under
@@ -32,6 +32,7 @@ Created June 2005 by Marko Makela
 #include "page0zip.h"
 #include "mtr0log.h"
 #include "page0page.h"
+#include "srv0srv.h"
 
 /* The format of compressed pages is as follows.
 
@@ -100,20 +101,9 @@ In summary, the compressed page looks like this:
   - deleted records (free list) in link order
 */
 
-/** Start offset of the area that will be compressed */
-#define PAGE_ZIP_START		PAGE_NEW_SUPREMUM_END
-/** Size of an compressed page directory entry */
-#define PAGE_ZIP_DIR_SLOT_SIZE	2
-/** Mask of record offsets */
-#define PAGE_ZIP_DIR_SLOT_MASK	0x3fff
-/** 'owned' flag */
-#define PAGE_ZIP_DIR_SLOT_OWNED	0x4000
-/** 'deleted' flag */
-#define PAGE_ZIP_DIR_SLOT_DEL	0x8000
-
 /**********************************************************************//**
 Determine the size of a compressed page in bytes.
-@return	size in bytes */
+@return size in bytes */
 UNIV_INLINE
 ulint
 page_zip_get_size(
@@ -159,22 +149,23 @@ page_zip_set_size(
 }
 
 #ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Determine if a record is so big that it needs to be stored externally.
-@return	FALSE if the entire record can be stored locally on the page */
+/** Determine if a record is so big that it needs to be stored externally.
+@param[in]	rec_size	length of the record in bytes
+@param[in]	comp		nonzero=compact format
+@param[in]	n_fields	number of fields in the record; ignored if
+tablespace is not compressed
+@param[in]	page_size	page size
+@return FALSE if the entire record can be stored locally on the page */
 UNIV_INLINE
 ibool
 page_zip_rec_needs_ext(
-/*===================*/
-	ulint	rec_size,	/*!< in: length of the record in bytes */
-	ulint	comp,		/*!< in: nonzero=compact format */
-	ulint	n_fields,	/*!< in: number of fields in the record;
-				ignored if zip_size == 0 */
-	ulint	zip_size)	/*!< in: compressed page size in bytes, or 0 */
+	ulint			rec_size,
+	ulint			comp,
+	ulint			n_fields,
+	const page_size_t&	page_size)
 {
 	ut_ad(rec_size > comp ? REC_N_NEW_EXTRA_BYTES : REC_N_OLD_EXTRA_BYTES);
-	ut_ad(ut_is_2pow(zip_size));
-	ut_ad(comp || !zip_size);
+	ut_ad(comp || !page_size.is_compressed());
 
 #if UNIV_PAGE_SIZE_MAX > REC_MAX_DATA_SIZE
 	if (rec_size >= REC_MAX_DATA_SIZE) {
@@ -182,7 +173,7 @@ page_zip_rec_needs_ext(
 	}
 #endif
 
-	if (zip_size) {
+	if (page_size.is_compressed()) {
 		ut_ad(comp);
 		/* On a compressed page, there is a two-byte entry in
 		the dense page directory for every record.  But there
@@ -191,7 +182,7 @@ page_zip_rec_needs_ext(
 		the encoded heap number.  Check also the available space
 		on the uncompressed page. */
 		return(rec_size - (REC_N_NEW_EXTRA_BYTES - 2 - 1)
-		       >= page_zip_empty_size(n_fields, zip_size)
+		       >= page_zip_empty_size(n_fields, page_size.physical())
 		       || rec_size >= page_get_free_space_of_empty(TRUE) / 2);
 	}
 
@@ -202,7 +193,7 @@ page_zip_rec_needs_ext(
 #ifdef UNIV_DEBUG
 /**********************************************************************//**
 Validate a compressed page descriptor.
-@return	TRUE if ok */
+@return TRUE if ok */
 UNIV_INLINE
 ibool
 page_zip_simple_validate(
@@ -286,7 +277,7 @@ page_zip_max_ins_size(
 
 /**********************************************************************//**
 Determine if enough space is available in the modification log.
-@return	TRUE if enough space is available */
+@return TRUE if enough space is available */
 UNIV_INLINE
 ibool
 page_zip_available(
@@ -336,7 +327,6 @@ page_zip_des_init(
 
 /**********************************************************************//**
 Write a log record of writing to the uncompressed header portion of a page. */
-UNIV_INTERN
 void
 page_zip_write_header_log(
 /*======================*/
@@ -361,7 +351,6 @@ page_zip_write_header(
 {
 	ulint	pos;
 
-	ut_ad(PAGE_ZIP_MATCH(str, page_zip));
 	ut_ad(page_zip_simple_validate(page_zip));
 	UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
 
@@ -403,7 +392,7 @@ page_zip_compress_write_log_no_data(
 
 /**********************************************************************//**
 Parses a log record of compressing an index page without the data.
-@return	end of log record or NULL */
+@return end of log record or NULL */
 UNIV_INLINE
 byte*
 page_zip_parse_compress_no_data(
@@ -426,7 +415,7 @@ page_zip_parse_compress_no_data(
 	was successful. Crash in this case. */
 
 	if (page
-	    && !page_zip_compress(page_zip, page, index, level, NULL)) {
+	    && !page_zip_compress(page_zip, page, index, level, NULL, NULL)) {
 		ut_error;
 	}
 
diff --git a/storage/innobase/include/pars0opt.h b/storage/innobase/include/pars0opt.h
index 1084d644c90..cb1ce60ac22 100644
--- a/storage/innobase/include/pars0opt.h
+++ b/storage/innobase/include/pars0opt.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -37,7 +37,6 @@ Created 12/21/1997 Heikki Tuuri
 Optimizes a select. Decides which indexes to tables to use. The tables
 are accessed in the order that they were written to the FROM part in the
 select statement. */
-UNIV_INTERN
 void
 opt_search_plan(
 /*============*/
@@ -49,7 +48,6 @@ already exist in the list. If the column is already in the list, puts a value
 indirection to point to the occurrence in the column list, except if the
 column occurrence we are looking at is in the column list, in which case
 nothing is done. */
-UNIV_INTERN
 void
 opt_find_all_cols(
 /*==============*/
@@ -60,13 +58,14 @@ opt_find_all_cols(
 					to add new found columns */
 	plan_t*		plan,		/*!< in: plan or NULL */
 	que_node_t*	exp);		/*!< in: expression or condition */
+#ifdef UNIV_SQL_DEBUG
 /********************************************************************//**
 Prints info of a query plan. */
-UNIV_INTERN
 void
 opt_print_query_plan(
 /*=================*/
 	sel_node_t*	sel_node);	/*!< in: select node */
+#endif /* UNIV_SQL_DEBUG */
 
 #ifndef UNIV_NONINL
 #include "pars0opt.ic"
diff --git a/storage/innobase/include/pars0pars.h b/storage/innobase/include/pars0pars.h
index 73585c78a6a..7e153d0c19b 100644
--- a/storage/innobase/include/pars0pars.h
+++ b/storage/innobase/include/pars0pars.h
@@ -33,6 +33,7 @@ Created 11/19/1996 Heikki Tuuri
 #include "row0types.h"
 #include "trx0types.h"
 #include "ut0vec.h"
+#include "row0mysql.h"
 
 /** Type of the user functions. The first argument is always InnoDB-supplied
 and varies in type, while 'user_arg' is a user-supplied argument. The
@@ -44,12 +45,6 @@ typedef ibool	(*pars_user_func_cb_t)(void* arg, void* user_arg);
 information */
 extern int	yydebug;
 
-#ifdef UNIV_SQL_DEBUG
-/** If the following is set TRUE, the lexer will print the SQL string
-as it tokenizes it */
-extern ibool	pars_print_lexed;
-#endif /* UNIV_SQL_DEBUG */
-
 /* Global variable used while parsing a single procedure or query : the code is
 NOT re-entrant */
 extern sym_tab_t*	pars_sym_tab_global;
@@ -98,8 +93,7 @@ yyparse(void);
 
 /*************************************************************//**
 Parses an SQL string returning the query graph.
-@return	own: the query graph */
-UNIV_INTERN
+@return own: the query graph */
 que_t*
 pars_sql(
 /*=====*/
@@ -108,7 +102,6 @@ pars_sql(
 /*************************************************************//**
 Retrieves characters to the lexical analyzer.
 @return number of characters copied or 0 on EOF */
-UNIV_INTERN
 int
 pars_get_lex_chars(
 /*===============*/
@@ -117,15 +110,13 @@ pars_get_lex_chars(
 				in the buffer */
 /*************************************************************//**
 Called by yyparse on error. */
-UNIV_INTERN
 void
 yyerror(
 /*====*/
 	const char*	s);	/*!< in: error message string */
 /*********************************************************************//**
 Parses a variable declaration.
-@return	own: symbol table node of type SYM_VAR */
-UNIV_INTERN
+@return own: symbol table node of type SYM_VAR */
 sym_node_t*
 pars_variable_declaration(
 /*======================*/
@@ -134,8 +125,7 @@ pars_variable_declaration(
 	pars_res_word_t* type);	/*!< in: pointer to a type token */
 /*********************************************************************//**
 Parses a function expression.
-@return	own: function node in a query tree */
-UNIV_INTERN
+@return own: function node in a query tree */
 func_node_t*
 pars_func(
 /*======*/
@@ -144,8 +134,7 @@ pars_func(
 /*************************************************************************
 Rebind a LIKE search string. NOTE: We ignore any '%' characters embedded
 within the search string.
-@return	own: function node in a query tree */
-UNIV_INTERN
+@return own: function node in a query tree */
 int
 pars_like_rebind(
 /*=============*/
@@ -154,8 +143,7 @@ pars_like_rebind(
         ulint           len);   /* in: length of literal to (re) bind*/
 /*********************************************************************//**
 Parses an operator expression.
-@return	own: function node in a query tree */
-UNIV_INTERN
+@return own: function node in a query tree */
 func_node_t*
 pars_op(
 /*====*/
@@ -165,8 +153,7 @@ pars_op(
 				operator */
 /*********************************************************************//**
 Parses an ORDER BY clause. Order by a single column only is supported.
-@return	own: order-by node in a query tree */
-UNIV_INTERN
+@return own: order-by node in a query tree */
 order_node_t*
 pars_order_by(
 /*==========*/
@@ -175,8 +162,7 @@ pars_order_by(
 /*********************************************************************//**
 Parses a select list; creates a query graph node for the whole SELECT
 statement.
-@return	own: select node in a query tree */
-UNIV_INTERN
+@return own: select node in a query tree */
 sel_node_t*
 pars_select_list(
 /*=============*/
@@ -184,8 +170,7 @@ pars_select_list(
 	sym_node_t*	into_list);	/*!< in: variables list or NULL */
 /*********************************************************************//**
 Parses a cursor declaration.
-@return	sym_node */
-UNIV_INTERN
+@return sym_node */
 que_node_t*
 pars_cursor_declaration(
 /*====================*/
@@ -194,8 +179,7 @@ pars_cursor_declaration(
 	sel_node_t*	select_node);	/*!< in: select node */
 /*********************************************************************//**
 Parses a function declaration.
-@return	sym_node */
-UNIV_INTERN
+@return sym_node */
 que_node_t*
 pars_function_declaration(
 /*======================*/
@@ -203,8 +187,7 @@ pars_function_declaration(
 					table */
 /*********************************************************************//**
 Parses a select statement.
-@return	own: select node in a query tree */
-UNIV_INTERN
+@return own: select node in a query tree */
 sel_node_t*
 pars_select_statement(
 /*==================*/
@@ -218,8 +201,7 @@ pars_select_statement(
 	order_node_t*	order_by);	/*!< in: NULL or an order-by node */
 /*********************************************************************//**
 Parses a column assignment in an update.
-@return	column assignment node */
-UNIV_INTERN
+@return column assignment node */
 col_assign_node_t*
 pars_column_assignment(
 /*===================*/
@@ -227,8 +209,7 @@ pars_column_assignment(
 	que_node_t*	exp);	/*!< in: value to assign */
 /*********************************************************************//**
 Parses a delete or update statement start.
-@return	own: update node in a query tree */
-UNIV_INTERN
+@return own: update node in a query tree */
 upd_node_t*
 pars_update_statement_start(
 /*========================*/
@@ -238,8 +219,7 @@ pars_update_statement_start(
 					if delete */
 /*********************************************************************//**
 Parses an update or delete statement.
-@return	own: update node in a query tree */
-UNIV_INTERN
+@return own: update node in a query tree */
 upd_node_t*
 pars_update_statement(
 /*==================*/
@@ -249,8 +229,7 @@ pars_update_statement(
 	que_node_t*	search_cond);	/*!< in: search condition or NULL */
 /*********************************************************************//**
 Parses an insert statement.
-@return	own: update node in a query tree */
-UNIV_INTERN
+@return own: update node in a query tree */
 ins_node_t*
 pars_insert_statement(
 /*==================*/
@@ -259,8 +238,7 @@ pars_insert_statement(
 	sel_node_t*	select);	/*!< in: select condition or NULL */
 /*********************************************************************//**
 Parses a procedure parameter declaration.
-@return	own: symbol table node of type SYM_VAR */
-UNIV_INTERN
+@return own: symbol table node of type SYM_VAR */
 sym_node_t*
 pars_parameter_declaration(
 /*=======================*/
@@ -271,8 +249,7 @@ pars_parameter_declaration(
 	pars_res_word_t* type);	/*!< in: pointer to a type token */
 /*********************************************************************//**
 Parses an elsif element.
-@return	elsif node */
-UNIV_INTERN
+@return elsif node */
 elsif_node_t*
 pars_elsif_element(
 /*===============*/
@@ -280,8 +257,7 @@ pars_elsif_element(
 	que_node_t*	stat_list);	/*!< in: statement list */
 /*********************************************************************//**
 Parses an if-statement.
-@return	if-statement node */
-UNIV_INTERN
+@return if-statement node */
 if_node_t*
 pars_if_statement(
 /*==============*/
@@ -290,8 +266,7 @@ pars_if_statement(
 	que_node_t*	else_part);	/*!< in: else-part statement list */
 /*********************************************************************//**
 Parses a for-loop-statement.
-@return	for-statement node */
-UNIV_INTERN
+@return for-statement node */
 for_node_t*
 pars_for_statement(
 /*===============*/
@@ -301,8 +276,7 @@ pars_for_statement(
 	que_node_t*	stat_list);	/*!< in: statement list */
 /*********************************************************************//**
 Parses a while-statement.
-@return	while-statement node */
-UNIV_INTERN
+@return while-statement node */
 while_node_t*
 pars_while_statement(
 /*=================*/
@@ -310,22 +284,19 @@ pars_while_statement(
 	que_node_t*	stat_list);	/*!< in: statement list */
 /*********************************************************************//**
 Parses an exit statement.
-@return	exit statement node */
-UNIV_INTERN
+@return exit statement node */
 exit_node_t*
 pars_exit_statement(void);
 /*=====================*/
 /*********************************************************************//**
 Parses a return-statement.
-@return	return-statement node */
-UNIV_INTERN
+@return return-statement node */
 return_node_t*
 pars_return_statement(void);
 /*=======================*/
 /*********************************************************************//**
 Parses a procedure call.
-@return	function node */
-UNIV_INTERN
+@return function node */
 func_node_t*
 pars_procedure_call(
 /*================*/
@@ -333,8 +304,7 @@ pars_procedure_call(
 	que_node_t*	args);	/*!< in: argument list */
 /*********************************************************************//**
 Parses an assignment statement.
-@return	assignment statement node */
-UNIV_INTERN
+@return assignment statement node */
 assign_node_t*
 pars_assignment_statement(
 /*======================*/
@@ -343,8 +313,7 @@ pars_assignment_statement(
 /*********************************************************************//**
 Parses a fetch statement. into_list or user_func (but not both) must be
 non-NULL.
-@return	fetch statement node */
-UNIV_INTERN
+@return fetch statement node */
 fetch_node_t*
 pars_fetch_statement(
 /*=================*/
@@ -353,8 +322,7 @@ pars_fetch_statement(
 	sym_node_t*	user_func);	/*!< in: user function name, or NULL */
 /*********************************************************************//**
 Parses an open or close cursor statement.
-@return	fetch statement node */
-UNIV_INTERN
+@return fetch statement node */
 open_node_t*
 pars_open_statement(
 /*================*/
@@ -363,30 +331,26 @@ pars_open_statement(
 	sym_node_t*	cursor);	/*!< in: cursor node */
 /*********************************************************************//**
 Parses a row_printf-statement.
-@return	row_printf-statement node */
-UNIV_INTERN
+@return row_printf-statement node */
 row_printf_node_t*
 pars_row_printf_statement(
 /*======================*/
 	sel_node_t*	sel_node);	/*!< in: select node */
 /*********************************************************************//**
 Parses a commit statement.
-@return	own: commit node struct */
-UNIV_INTERN
+@return own: commit node struct */
 commit_node_t*
 pars_commit_statement(void);
 /*=======================*/
 /*********************************************************************//**
 Parses a rollback statement.
-@return	own: rollback node struct */
-UNIV_INTERN
+@return own: rollback node struct */
 roll_node_t*
 pars_rollback_statement(void);
 /*=========================*/
 /*********************************************************************//**
 Parses a column definition at a table creation.
-@return	column sym table node */
-UNIV_INTERN
+@return column sym table node */
 sym_node_t*
 pars_column_def(
 /*============*/
@@ -401,8 +365,7 @@ pars_column_def(
 						is of type NOT NULL. */
 /*********************************************************************//**
 Parses a table creation operation.
-@return	table create subgraph */
-UNIV_INTERN
+@return table create subgraph */
 tab_node_t*
 pars_create_table(
 /*==============*/
@@ -424,8 +387,7 @@ pars_create_table(
 					from disk */
 /*********************************************************************//**
 Parses an index creation operation.
-@return	index create subgraph */
-UNIV_INTERN
+@return index create subgraph */
 ind_node_t*
 pars_create_index(
 /*==============*/
@@ -438,8 +400,7 @@ pars_create_index(
 	sym_node_t*	column_list);	/*!< in: list of column names */
 /*********************************************************************//**
 Parses a procedure definition.
-@return	query fork node */
-UNIV_INTERN
+@return query fork node */
 que_fork_t*
 pars_procedure_definition(
 /*======================*/
@@ -453,38 +414,37 @@ Parses a stored procedure call, when this is not within another stored
 procedure, that is, the client issues a procedure call directly.
 In MySQL/InnoDB, stored InnoDB procedures are invoked via the
 parsed procedure tree, not via InnoDB SQL, so this function is not used.
-@return	query graph */
-UNIV_INTERN
+@return query graph */
 que_fork_t*
 pars_stored_procedure_call(
 /*=======================*/
 	sym_node_t*	sym_node);	/*!< in: stored procedure name */
-/******************************************************************//**
-Completes a query graph by adding query thread and fork nodes
+/** Completes a query graph by adding query thread and fork nodes
 above it and prepares the graph for running. The fork created is of
 type QUE_FORK_MYSQL_INTERFACE.
-@return	query thread node to run */
-UNIV_INTERN
+@param[in]	node		root node for an incomplete query
+				graph, or NULL for dummy graph
+@param[in]	trx		transaction handle
+@param[in]	heap		memory heap from which allocated
+@param[in]	prebuilt	row prebuilt structure
+@return query thread node to run */
 que_thr_t*
 pars_complete_graph_for_exec(
-/*=========================*/
-	que_node_t*	node,	/*!< in: root node for an incomplete
-				query graph, or NULL for dummy graph */
-	trx_t*		trx,	/*!< in: transaction handle */
-	mem_heap_t*	heap)	/*!< in: memory heap from which allocated */
+	que_node_t*	node,
+	trx_t*		trx,
+	mem_heap_t*	heap,
+	row_prebuilt_t*	prebuilt)
 	MY_ATTRIBUTE((nonnull(2,3), warn_unused_result));
 
 /****************************************************************//**
 Create parser info struct.
-@return	own: info struct */
-UNIV_INTERN
+@return own: info struct */
 pars_info_t*
 pars_info_create(void);
 /*==================*/
 
 /****************************************************************//**
 Free info struct and everything it contains. */
-UNIV_INTERN
 void
 pars_info_free(
 /*===========*/
@@ -492,7 +452,6 @@ pars_info_free(
 
 /****************************************************************//**
 Add bound literal. */
-UNIV_INTERN
 void
 pars_info_add_literal(
 /*==================*/
@@ -507,7 +466,6 @@ pars_info_add_literal(
 /****************************************************************//**
 Equivalent to pars_info_add_literal(info, name, str, strlen(str),
 DATA_VARCHAR, DATA_ENGLISH). */
-UNIV_INTERN
 void
 pars_info_add_str_literal(
 /*======================*/
@@ -517,7 +475,6 @@ pars_info_add_str_literal(
 /********************************************************************
 If the literal value already exists then it rebinds otherwise it
 creates a new entry.*/
-UNIV_INTERN
 void
 pars_info_bind_literal(
 /*===================*/
@@ -530,7 +487,6 @@ pars_info_bind_literal(
 /********************************************************************
 If the literal value already exists then it rebinds otherwise it
 creates a new entry.*/
-UNIV_INTERN
 void
 pars_info_bind_varchar_literal(
 /*===========================*/
@@ -547,7 +503,6 @@ pars_info_add_literal(info, name, buf, 4, DATA_INT, 0);
 
 except that the buffer is dynamically allocated from the info struct's
 heap. */
-UNIV_INTERN
 void
 pars_info_bind_int4_literal(
 /*=======================*/
@@ -557,7 +512,6 @@ pars_info_bind_int4_literal(
 /********************************************************************
 If the literal value already exists then it rebinds otherwise it
 creates a new entry. */
-UNIV_INTERN
 void
 pars_info_bind_int8_literal(
 /*=======================*/
@@ -566,7 +520,6 @@ pars_info_bind_int8_literal(
 	const ib_uint64_t*	val);		/*!< in: value */
 /****************************************************************//**
 Add user function. */
-UNIV_INTERN
 void
 pars_info_bind_function(
 /*===================*/
@@ -576,7 +529,6 @@ pars_info_bind_function(
 	void*			arg);	/*!< in: user-supplied argument */
 /****************************************************************//**
 Add bound id. */
-UNIV_INTERN
 void
 pars_info_bind_id(
 /*=============*/
@@ -593,7 +545,6 @@ pars_info_add_literal(info, name, buf, 4, DATA_INT, 0);
 
 except that the buffer is dynamically allocated from the info struct's
 heap. */
-UNIV_INTERN
 void
 pars_info_add_int4_literal(
 /*=======================*/
@@ -610,7 +561,6 @@ pars_info_add_literal(info, name, buf, 8, DATA_FIXBINARY, 0);
 
 except that the buffer is dynamically allocated from the info struct's
 heap. */
-UNIV_INTERN
 void
 pars_info_add_ull_literal(
 /*======================*/
@@ -621,7 +571,6 @@ pars_info_add_ull_literal(
 /****************************************************************//**
 If the literal value already exists then it rebinds otherwise it
 creates a new entry. */
-UNIV_INTERN
 void
 pars_info_bind_ull_literal(
 /*=======================*/
@@ -632,7 +581,6 @@ pars_info_bind_ull_literal(
 
 /****************************************************************//**
 Add bound id. */
-UNIV_INTERN
 void
 pars_info_add_id(
 /*=============*/
@@ -642,8 +590,7 @@ pars_info_add_id(
 
 /****************************************************************//**
 Get bound literal with the given name.
-@return	bound literal, or NULL if not found */
-UNIV_INTERN
+@return bound literal, or NULL if not found */
 pars_bound_lit_t*
 pars_info_get_bound_lit(
 /*====================*/
@@ -652,8 +599,7 @@ pars_info_get_bound_lit(
 
 /****************************************************************//**
 Get bound id with the given name.
-@return	bound id, or NULL if not found */
-UNIV_INTERN
+@return bound id, or NULL if not found */
 pars_bound_id_t*
 pars_info_get_bound_id(
 /*===================*/
@@ -662,7 +608,6 @@ pars_info_get_bound_id(
 
 /******************************************************************//**
 Release any resources used by the lexer. */
-UNIV_INTERN
 void
 pars_lexer_close(void);
 /*==================*/
diff --git a/storage/innobase/include/pars0sym.h b/storage/innobase/include/pars0sym.h
index bcf73639228..abd0c5cd4c8 100644
--- a/storage/innobase/include/pars0sym.h
+++ b/storage/innobase/include/pars0sym.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -35,8 +35,7 @@ Created 12/15/1997 Heikki Tuuri
 
 /******************************************************************//**
 Creates a symbol table for a single stored procedure or query.
-@return	own: symbol table */
-UNIV_INTERN
+@return own: symbol table */
 sym_tab_t*
 sym_tab_create(
 /*===========*/
@@ -45,15 +44,13 @@ sym_tab_create(
 Frees the memory allocated dynamically AFTER parsing phase for variables
 etc. in the symbol table. Does not free the mem heap where the table was
 originally created. Frees also SQL explicit cursor definitions. */
-UNIV_INTERN
 void
 sym_tab_free_private(
 /*=================*/
 	sym_tab_t*	sym_tab);	/*!< in, own: symbol table */
 /******************************************************************//**
 Adds an integer literal to a symbol table.
-@return	symbol table node */
-UNIV_INTERN
+@return symbol table node */
 sym_node_t*
 sym_tab_add_int_lit(
 /*================*/
@@ -61,8 +58,7 @@ sym_tab_add_int_lit(
 	ulint		val);		/*!< in: integer value */
 /******************************************************************//**
 Adds an string literal to a symbol table.
-@return	symbol table node */
-UNIV_INTERN
+@return symbol table node */
 sym_node_t*
 sym_tab_add_str_lit(
 /*================*/
@@ -72,8 +68,7 @@ sym_tab_add_str_lit(
 	ulint		len);		/*!< in: string length */
 /******************************************************************//**
 Add a bound literal to a symbol table.
-@return	symbol table node */
-UNIV_INTERN
+@return symbol table node */
 sym_node_t*
 sym_tab_add_bound_lit(
 /*==================*/
@@ -82,7 +77,6 @@ sym_tab_add_bound_lit(
 	ulint*		lit_type);	/*!< out: type of literal (PARS_*_LIT) */
 /**********************************************************************
 Rebind literal to a node in the symbol table. */
-
 sym_node_t*
 sym_tab_rebind_lit(
 /*===============*/
@@ -92,16 +86,14 @@ sym_tab_rebind_lit(
         ulint           length);        /* in: length of data */
 /******************************************************************//**
 Adds an SQL null literal to a symbol table.
-@return	symbol table node */
-UNIV_INTERN
+@return symbol table node */
 sym_node_t*
 sym_tab_add_null_lit(
 /*=================*/
 	sym_tab_t*	sym_tab);	/*!< in: symbol table */
 /******************************************************************//**
 Adds an identifier to a symbol table.
-@return	symbol table node */
-UNIV_INTERN
+@return symbol table node */
 sym_node_t*
 sym_tab_add_id(
 /*===========*/
@@ -111,8 +103,7 @@ sym_tab_add_id(
 
 /******************************************************************//**
 Add a bound identifier to a symbol table.
-@return	symbol table node */
-UNIV_INTERN
+@return symbol table node */
 sym_node_t*
 sym_tab_add_bound_id(
 /*===========*/
diff --git a/storage/innobase/include/que0que.h b/storage/innobase/include/que0que.h
index ba8828623af..3e90e0b25e3 100644
--- a/storage/innobase/include/que0que.h
+++ b/storage/innobase/include/que0que.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -37,17 +37,12 @@ Created 5/27/1996 Heikki Tuuri
 #include "row0types.h"
 #include "pars0types.h"
 
-/* If the following flag is set TRUE, the module will print trace info
-of SQL execution in the UNIV_SQL_DEBUG version */
-extern ibool	que_trace_on;
-
 /** Mutex protecting the query threads. */
 extern ib_mutex_t	que_thr_mutex;
 
 /***********************************************************************//**
 Creates a query graph fork node.
-@return	own: fork node */
-UNIV_INTERN
+@return own: fork node */
 que_fork_t*
 que_fork_create(
 /*============*/
@@ -79,26 +74,25 @@ que_node_set_parent(
 /*================*/
 	que_node_t*	node,	/*!< in: graph node */
 	que_node_t*	parent);/*!< in: parent */
-/***********************************************************************//**
-Creates a query graph thread node.
-@return	own: query thread node */
-UNIV_INTERN
+/** Creates a query graph thread node.
+@param[in]	parent		parent node, i.e., a fork node
+@param[in]	heap		memory heap where created
+@param[in]	prebuilt	row prebuilt structure
+@return own: query thread node */
 que_thr_t*
 que_thr_create(
-/*===========*/
-	que_fork_t*	parent,	/*!< in: parent node, i.e., a fork node */
-	mem_heap_t*	heap);	/*!< in: memory heap where created */
+	que_fork_t*	parent,
+	mem_heap_t*	heap,
+	row_prebuilt_t*	prebuilt);
 /**********************************************************************//**
 Frees a query graph, but not the heap where it was created. Does not free
 explicit cursor declarations, they are freed in que_graph_free. */
-UNIV_INTERN
 void
 que_graph_free_recursive(
 /*=====================*/
 	que_node_t*	node);	/*!< in: query graph node */
 /**********************************************************************//**
 Frees a query graph. */
-UNIV_INTERN
 void
 que_graph_free(
 /*===========*/
@@ -111,8 +105,7 @@ que_graph_free(
 Stops a query thread if graph or trx is in a state requiring it. The
 conditions are tested in the order (1) graph, (2) trx. The lock_sys_t::mutex
 has to be reserved.
-@return	TRUE if stopped */
-UNIV_INTERN
+@return TRUE if stopped */
 ibool
 que_thr_stop(
 /*=========*/
@@ -120,7 +113,6 @@ que_thr_stop(
 /**********************************************************************//**
 Moves a thread from another state to the QUE_THR_RUNNING state. Increments
 the n_active_thrs counters of the query graph and transaction. */
-UNIV_INTERN
 void
 que_thr_move_to_run_state_for_mysql(
 /*================================*/
@@ -129,7 +121,6 @@ que_thr_move_to_run_state_for_mysql(
 /**********************************************************************//**
 A patch for MySQL used to 'stop' a dummy query thread used in MySQL
 select, when there is no error or lock wait. */
-UNIV_INTERN
 void
 que_thr_stop_for_mysql_no_error(
 /*============================*/
@@ -140,14 +131,12 @@ A patch for MySQL used to 'stop' a dummy query thread used in MySQL. The
 query thread is stopped and made inactive, except in the case where
 it was put to the lock wait state in lock0lock.cc, but the lock has already
 been granted or the transaction chosen as a victim in deadlock resolution. */
-UNIV_INTERN
 void
 que_thr_stop_for_mysql(
 /*===================*/
 	que_thr_t*	thr);	/*!< in: query thread */
 /**********************************************************************//**
 Run a query thread. Handles lock waits. */
-UNIV_INTERN
 void
 que_run_threads(
 /*============*/
@@ -157,8 +146,7 @@ Moves a suspended query thread to the QUE_THR_RUNNING state and release
 a worker thread to execute it. This function should be used to end
 the wait state of a query thread waiting for a lock or a stored procedure
 completion.
-@return query thread instance of thread to wakeup or NULL  */
-UNIV_INTERN
+@return query thread instance of thread to wakeup or NULL */
 que_thr_t*
 que_thr_end_lock_wait(
 /*==================*/
@@ -172,7 +160,6 @@ is returned.
 @return a query thread of the graph moved to QUE_THR_RUNNING state, or
 NULL; the query thread should be executed by que_run_threads by the
 caller */
-UNIV_INTERN
 que_thr_t*
 que_fork_start_command(
 /*===================*/
@@ -200,7 +187,7 @@ UNIV_INLINE
 ulint
 que_node_get_type(
 /*==============*/
-	que_node_t*	node);	/*!< in: graph node */
+	const que_node_t*	node);	/*!< in: graph node */
 /***********************************************************************//**
 Gets pointer to the value data type field of a graph node. */
 UNIV_INLINE
@@ -217,7 +204,7 @@ que_node_get_val(
 	que_node_t*	node);	/*!< in: graph node */
 /***********************************************************************//**
 Gets the value buffer size of a graph node.
-@return	val buffer size, not defined if val.data == NULL in node */
+@return val buffer size, not defined if val.data == NULL in node */
 UNIV_INLINE
 ulint
 que_node_get_val_buf_size(
@@ -240,7 +227,7 @@ que_node_get_next(
 	que_node_t*	node);	/*!< in: node in a list */
 /*********************************************************************//**
 Gets the parent node of a query graph node.
-@return	parent node or NULL */
+@return parent node or NULL */
 UNIV_INLINE
 que_node_t*
 que_node_get_parent(
@@ -249,15 +236,14 @@ que_node_get_parent(
 /****************************************************************//**
 Get the first containing loop node (e.g. while_node_t or for_node_t) for the
 given node, or NULL if the node is not within a loop.
-@return	containing loop node, or NULL. */
-UNIV_INTERN
+@return containing loop node, or NULL. */
 que_node_t*
 que_node_get_containing_loop_node(
 /*==============================*/
 	que_node_t*	node);	/*!< in: node */
 /*********************************************************************//**
 Catenates a query graph node to a list of them, possible empty list.
-@return	one-way list of nodes */
+@return one-way list of nodes */
 UNIV_INLINE
 que_node_t*
 que_node_list_add_last(
@@ -274,7 +260,7 @@ que_node_list_get_last(
 	que_node_t*	node_list);	/* in: node list, or NULL */
 /*********************************************************************//**
 Gets a query graph node list length.
-@return	length, for NULL list 0 */
+@return length, for NULL list 0 */
 UNIV_INLINE
 ulint
 que_node_list_get_len(
@@ -293,7 +279,7 @@ que_thr_peek_stop(
 	que_thr_t*	thr);	/*!< in: query thread */
 /***********************************************************************//**
 Returns TRUE if the query graph is for a SELECT statement.
-@return	TRUE if a select */
+@return TRUE if a select */
 UNIV_INLINE
 ibool
 que_graph_is_select(
@@ -301,15 +287,13 @@ que_graph_is_select(
 	que_t*		graph);		/*!< in: graph */
 /**********************************************************************//**
 Prints info of an SQL query graph node. */
-UNIV_INTERN
 void
 que_node_print_info(
 /*================*/
 	que_node_t*	node);	/*!< in: query graph node */
 /*********************************************************************//**
 Evaluate the given SQL
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
 dberr_t
 que_eval_sql(
 /*=========*/
@@ -325,7 +309,6 @@ Round robin scheduler.
 @return a query thread of the graph moved to QUE_THR_RUNNING state, or
 NULL; the query thread should be executed by que_run_threads by the
 caller */
-UNIV_INTERN
 que_thr_t*
 que_fork_scheduler_round_robin(
 /*===========================*/
@@ -334,18 +317,43 @@ que_fork_scheduler_round_robin(
 
 /*********************************************************************//**
 Initialise the query sub-system. */
-UNIV_INTERN
 void
 que_init(void);
 /*==========*/
 
 /*********************************************************************//**
 Close the query sub-system. */
-UNIV_INTERN
 void
 que_close(void);
 /*===========*/
 
+/** Query thread states */
+enum que_thr_state_t {
+	QUE_THR_RUNNING,
+	QUE_THR_PROCEDURE_WAIT,
+	/** in selects this means that the thread is at the end of its
+	result set (or start, in case of a scroll cursor); in other
+	statements, this means the thread has done its task */
+	QUE_THR_COMPLETED,
+	QUE_THR_COMMAND_WAIT,
+	QUE_THR_LOCK_WAIT,
+	QUE_THR_SUSPENDED
+};
+
+/** Query thread lock states */
+enum que_thr_lock_t {
+	QUE_THR_LOCK_NOLOCK,
+	QUE_THR_LOCK_ROW,
+	QUE_THR_LOCK_TABLE
+};
+
+/** From where the cursor position is counted */
+enum que_cur_t {
+	QUE_CUR_NOT_DEFINED,
+	QUE_CUR_START,
+	QUE_CUR_END
+};
+
 /* Query graph query thread node: the fields are protected by the
 trx_t::mutex with the exceptions named below */
 
@@ -355,7 +363,7 @@ struct que_thr_t{
 					corruption */
 	que_node_t*	child;		/*!< graph child node */
 	que_t*		graph;		/*!< graph where this node belongs */
-	ulint		state;		/*!< state of the query thread */
+	que_thr_state_t	state;		/*!< state of the query thread */
 	ibool		is_active;	/*!< TRUE if the thread has been set
 					to the run state in
 					que_thr_move_to_run_state, but not
@@ -392,6 +400,8 @@ struct que_thr_t{
 	ulint		fk_cascade_depth; /*!< maximum cascading call depth
 					supported for foreign key constraint
 					related delete/updates */
+	row_prebuilt_t*	prebuilt;	/*!< prebuilt structure processed by
+					the query thread */
 };
 
 #define QUE_THR_MAGIC_N		8476583
@@ -500,29 +510,6 @@ struct que_fork_t{
 #define QUE_NODE_CALL		31
 #define QUE_NODE_EXIT		32
 
-/* Query thread states */
-#define QUE_THR_RUNNING		1
-#define QUE_THR_PROCEDURE_WAIT	2
-#define	QUE_THR_COMPLETED	3	/* in selects this means that the
-					thread is at the end of its result set
-					(or start, in case of a scroll cursor);
-					in other statements, this means the
-					thread has done its task */
-#define QUE_THR_COMMAND_WAIT	4
-#define QUE_THR_LOCK_WAIT	5
-#define QUE_THR_SUSPENDED	7
-#define QUE_THR_ERROR		8
-
-/* Query thread lock states */
-#define QUE_THR_LOCK_NOLOCK	0
-#define QUE_THR_LOCK_ROW	1
-#define QUE_THR_LOCK_TABLE	2
-
-/* From where the cursor position is counted */
-#define QUE_CUR_NOT_DEFINED	1
-#define QUE_CUR_START		2
-#define	QUE_CUR_END		3
-
 #ifndef UNIV_NONINL
 #include "que0que.ic"
 #endif
diff --git a/storage/innobase/include/que0que.ic b/storage/innobase/include/que0que.ic
index eff5a86d958..ec61081cfe2 100644
--- a/storage/innobase/include/que0que.ic
+++ b/storage/innobase/include/que0que.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2010, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -84,11 +84,9 @@ UNIV_INLINE
 ulint
 que_node_get_type(
 /*==============*/
-	que_node_t*	node)	/*!< in: graph node */
+	const que_node_t*	node)	/*!< in: graph node */
 {
-	ut_ad(node);
-
-	return(((que_common_t*) node)->type);
+	return(reinterpret_cast<const que_common_t*>(node)->type);
 }
 
 /***********************************************************************//**
@@ -106,7 +104,7 @@ que_node_get_val(
 
 /***********************************************************************//**
 Gets the value buffer size of a graph node.
-@return	val buffer size, not defined if val.data == NULL in node */
+@return val buffer size, not defined if val.data == NULL in node */
 UNIV_INLINE
 ulint
 que_node_get_val_buf_size(
@@ -161,7 +159,7 @@ que_node_get_data_type(
 
 /*********************************************************************//**
 Catenates a query graph node to a list of them, possible empty list.
-@return	one-way list of nodes */
+@return one-way list of nodes */
 UNIV_INLINE
 que_node_t*
 que_node_list_add_last(
@@ -216,7 +214,7 @@ que_node_list_get_last(
 }
 /*********************************************************************//**
 Gets the next list node in a list of query graph nodes.
-@return	next node in a list of nodes */
+@return next node in a list of nodes */
 UNIV_INLINE
 que_node_t*
 que_node_get_next(
@@ -228,7 +226,7 @@ que_node_get_next(
 
 /*********************************************************************//**
 Gets a query graph node list length.
-@return	length, for NULL list 0 */
+@return length, for NULL list 0 */
 UNIV_INLINE
 ulint
 que_node_list_get_len(
@@ -251,7 +249,7 @@ que_node_list_get_len(
 
 /*********************************************************************//**
 Gets the parent node of a query graph node.
-@return	parent node or NULL */
+@return parent node or NULL */
 UNIV_INLINE
 que_node_t*
 que_node_get_parent(
@@ -292,7 +290,7 @@ que_thr_peek_stop(
 
 /***********************************************************************//**
 Returns TRUE if the query graph is for a SELECT statement.
-@return	TRUE if a select */
+@return TRUE if a select */
 UNIV_INLINE
 ibool
 que_graph_is_select(
@@ -307,3 +305,4 @@ que_graph_is_select(
 
 	return(FALSE);
 }
+
diff --git a/storage/innobase/include/read0read.h b/storage/innobase/include/read0read.h
index ae75cfac6f5..129341be77c 100644
--- a/storage/innobase/include/read0read.h
+++ b/storage/innobase/include/read0read.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2013, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -28,166 +28,98 @@ Created 2/16/1997 Heikki Tuuri
 
 #include "univ.i"
 
-
-#include "ut0byte.h"
-#include "ut0lst.h"
-#include "trx0trx.h"
 #include "read0types.h"
 
-/*********************************************************************//**
-Opens a read view where exactly the transactions serialized before this
-point in time are seen in the view.
-@return	own: read view struct */
-UNIV_INTERN
-read_view_t*
-read_view_open_now(
-/*===============*/
-	trx_id_t	cr_trx_id,	/*!< in: trx_id of creating
-					transaction, or 0 used in purge */
-	mem_heap_t*	heap);		/*!< in: memory heap from which
-					allocated */
-/*********************************************************************//**
-Makes a copy of the oldest existing read view, or opens a new. The view
-must be closed with ..._close.
-@return	own: read view struct */
-UNIV_INTERN
-read_view_t*
-read_view_purge_open(
-/*=================*/
-	mem_heap_t*	heap);		/*!< in: memory heap from which
-					allocated */
-/*********************************************************************//**
-Remove a read view from the trx_sys->view_list. */
-UNIV_INLINE
-void
-read_view_remove(
-/*=============*/
-	read_view_t*	view,		/*!< in: read view, can be 0 */
-	bool		own_mutex);	/*!< in: true if caller owns the
-					trx_sys_t::mutex */
-/*********************************************************************//**
-Closes a consistent read view for MySQL. This function is called at an SQL
-statement end if the trx isolation level is <= TRX_ISO_READ_COMMITTED. */
-UNIV_INTERN
-void
-read_view_close_for_mysql(
-/*======================*/
-	trx_t*	trx);	/*!< in: trx which has a read view */
-/*********************************************************************//**
-Checks if a read view sees the specified transaction.
-@return	true if sees */
-UNIV_INLINE
-bool
-read_view_sees_trx_id(
-/*==================*/
-	const read_view_t*	view,	/*!< in: read view */
-	trx_id_t		trx_id)	/*!< in: trx id */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Prints a read view to stderr. */
-UNIV_INTERN
-void
-read_view_print(
-/*============*/
-	const read_view_t*	view);	/*!< in: read view */
-/*********************************************************************//**
-Create a consistent cursor view for mysql to be used in cursors. In this
-consistent read view modifications done by the creating transaction or future
-transactions are not visible. */
-UNIV_INTERN
-cursor_view_t*
-read_cursor_view_create_for_mysql(
-/*==============================*/
-	trx_t*		cr_trx);/*!< in: trx where cursor view is created */
-/*********************************************************************//**
-Close a given consistent cursor view for mysql and restore global read view
-back to a transaction read view. */
-UNIV_INTERN
-void
-read_cursor_view_close_for_mysql(
-/*=============================*/
-	trx_t*		trx,		/*!< in: trx */
-	cursor_view_t*	curview);	/*!< in: cursor view to be closed */
-/*********************************************************************//**
-This function sets a given consistent cursor view to a transaction
-read view if given consistent cursor view is not NULL. Otherwise, function
-restores a global read view to a transaction read view. */
-UNIV_INTERN
-void
-read_cursor_set_for_mysql(
-/*======================*/
-	trx_t*		trx,	/*!< in: transaction where cursor is set */
-	cursor_view_t*	curview);/*!< in: consistent cursor view to be set */
+#include <algorithm>
 
-/** Read view lists the trx ids of those transactions for which a consistent
-read should not see the modifications to the database. */
+/** The MVCC read view manager */
+class MVCC {
+public:
+	/** Constructor
+	@param size		Number of views to pre-allocate */
+	explicit MVCC(ulint size);
 
-struct read_view_t{
-	ulint		type;	/*!< VIEW_NORMAL, VIEW_HIGH_GRANULARITY */
-	undo_no_t	undo_no;/*!< 0 or if type is
-				VIEW_HIGH_GRANULARITY
-				transaction undo_no when this high-granularity
-				consistent read view was created */
-	trx_id_t	low_limit_no;
-				/*!< The view does not need to see the undo
-				logs for transactions whose transaction number
-				is strictly smaller (<) than this value: they
-				can be removed in purge if not needed by other
-				views */
-	trx_id_t	low_limit_id;
-				/*!< The read should not see any transaction
-				with trx id >= this value. In other words,
-				this is the "high water mark". */
-	trx_id_t	up_limit_id;
-				/*!< The read should see all trx ids which
-				are strictly smaller (<) than this value.
-				In other words,
-				this is the "low water mark". */
-	ulint		n_trx_ids;
-				/*!< Number of cells in the trx_ids array */
-	trx_id_t*	trx_ids;/*!< Additional trx ids which the read should
-				not see: typically, these are the read-write
-				active transactions at the time when the read
-				is serialized, except the reading transaction
-				itself; the trx ids in this array are in a
-				descending order. These trx_ids should be
-				between the "low" and "high" water marks,
-				that is, up_limit_id and low_limit_id. */
-	trx_id_t	creator_trx_id;
-				/*!< trx id of creating transaction, or
-				0 used in purge */
-	UT_LIST_NODE_T(read_view_t) view_list;
-				/*!< List of read views in trx_sys */
+	/** Destructor.
+	Free all the views in the m_free list */
+	~MVCC();
+
+	/**
+	Allocate and create a view.
+	@param view		view owned by this class created for the
+				caller. Must be freed by calling close()
+	@param trx		transaction creating the view */
+	void view_open(ReadView*& view, trx_t* trx);
+
+	/**
+	Close a view created by the above function.
+	@para view		view allocated by trx_open.
+	@param own_mutex	true if caller owns trx_sys_t::mutex */
+	void view_close(ReadView*& view, bool own_mutex);
+
+	/**
+	Release a view that is inactive but not closed. Caller must own
+	the trx_sys_t::mutex.
+	@param view		View to release */
+	void view_release(ReadView*& view);
+
+	/** Clones the oldest view and stores it in view. No need to
+	call view_close(). The caller owns the view that is passed in.
+	It will also move the closed views from the m_views list to the
+	m_free list. This function is called by Purge to create it view.
+	@param view		Preallocated view, owned by the caller */
+	void clone_oldest_view(ReadView* view);
+
+	/**
+	@return the number of active views */
+	ulint size() const;
+
+	/**
+	@return true if the view is active and valid */
+	static bool is_view_active(ReadView* view)
+	{
+		ut_a(view != reinterpret_cast<ReadView*>(0x1));
+
+		return(view != NULL && !(intptr_t(view) & 0x1));
+	}
+
+	/**
+	Set the view creator transaction id. Note: This shouldbe set only
+	for views created by RW transactions. */
+	static void set_view_creator_trx_id(ReadView* view, trx_id_t id);
+
+private:
+
+	/**
+	Validates a read view list. */
+	bool validate() const;
+
+	/**
+	Find a free view from the active list, if none found then allocate
+	a new view. This function will also attempt to move delete marked
+	views from the active list to the freed list.
+	@return a view to use */
+	inline ReadView* get_view();
+
+	/**
+	Get the oldest view in the system. It will also move the delete
+	marked read views from the views list to the freed list.
+	@return oldest view if found or NULL */
+	inline ReadView* get_oldest_view() const;
+
+private:
+	// Prevent copying
+	MVCC(const MVCC&);
+	MVCC& operator=(const MVCC&);
+
+private:
+	typedef UT_LIST_BASE_NODE_T(ReadView) view_list_t;
+
+	/** Free views ready for reuse. */
+	view_list_t		m_free;
+
+	/** Active and closed views, the closed views will have the
+	creator trx id set to TRX_ID_MAX */
+	view_list_t		m_views;
 };
 
-/** Read view types @{ */
-#define VIEW_NORMAL		1	/*!< Normal consistent read view
-					where transaction does not see changes
-					made by active transactions except
-					creating transaction. */
-#define VIEW_HIGH_GRANULARITY	2	/*!< High-granularity read view where
-					transaction does not see changes
-					made by active transactions and own
-					changes after a point in time when this
-					read view was created. */
-/* @} */
-
-/** Implement InnoDB framework to support consistent read views in
-cursors. This struct holds both heap where consistent read view
-is allocated and pointer to a read view. */
-
-struct cursor_view_t{
-	mem_heap_t*	heap;
-				/*!< Memory heap for the cursor view */
-	read_view_t*	read_view;
-				/*!< Consistent read view of the cursor*/
-	ulint		n_mysql_tables_in_use;
-				/*!< number of Innobase tables used in the
-				processing of this cursor */
-};
-
-#ifndef UNIV_NONINL
-#include "read0read.ic"
-#endif
-
-#endif
+#endif /* read0read_h */
diff --git a/storage/innobase/include/read0read.ic b/storage/innobase/include/read0read.ic
deleted file mode 100644
index 82c1028f12e..00000000000
--- a/storage/innobase/include/read0read.ic
+++ /dev/null
@@ -1,148 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/read0read.ic
-Cursor read
-
-Created 2/16/1997 Heikki Tuuri
-*******************************************************/
-
-#include "trx0sys.h"
-
-#ifdef UNIV_DEBUG
-/*********************************************************************//**
-Validates a read view object. */
-static
-bool
-read_view_validate(
-/*===============*/
-	const read_view_t*	view)	/*!< in: view to validate */
-{
-	ut_ad(mutex_own(&trx_sys->mutex));
-
-	/* Check that the view->trx_ids array is in descending order. */
-	for (ulint i = 1; i < view->n_trx_ids; ++i) {
-
-		ut_a(view->trx_ids[i] < view->trx_ids[i - 1]);
-	}
-
-	return(true);
-}
-
-/** Functor to validate the view list. */
-struct	ViewCheck {
-
-	ViewCheck() : m_prev_view(0) { }
-
-	void	operator()(const read_view_t* view)
-	{
-		ut_a(m_prev_view == NULL
-		     || m_prev_view->low_limit_no >= view->low_limit_no);
-
-		m_prev_view = view;
-	}
-
-	const read_view_t*	m_prev_view;
-};
-
-/*********************************************************************//**
-Validates a read view list. */
-static
-bool
-read_view_list_validate(void)
-/*=========================*/
-{
-	ut_ad(mutex_own(&trx_sys->mutex));
-
-	ut_list_map(trx_sys->view_list, &read_view_t::view_list, ViewCheck());
-
-	return(true);
-}
-#endif /* UNIV_DEBUG */
-
-/*********************************************************************//**
-Checks if a read view sees the specified transaction.
-@return	true if sees */
-UNIV_INLINE
-bool
-read_view_sees_trx_id(
-/*==================*/
-	const read_view_t*	view,	/*!< in: read view */
-	trx_id_t		trx_id)	/*!< in: trx id */
-{
-	if (trx_id < view->up_limit_id) {
-
-		return(true);
-	} else if (trx_id >= view->low_limit_id) {
-
-		return(false);
-	} else {
-		ulint	lower = 0;
-		ulint	upper = view->n_trx_ids - 1;
-
-		ut_a(view->n_trx_ids > 0);
-
-		do {
-			ulint		mid	= (lower + upper) >> 1;
-			trx_id_t	mid_id	= view->trx_ids[mid];
-
-			if (mid_id == trx_id) {
-				return(FALSE);
-			} else if (mid_id < trx_id) {
-				if (mid > 0) {
-					upper = mid - 1;
-				} else {
-					break;
-				}
-			} else {
-				lower = mid + 1;
-			}
-		} while (lower <= upper);
-	}
-
-	return(true);
-}
-
-/*********************************************************************//**
-Remove a read view from the trx_sys->view_list. */
-UNIV_INLINE
-void
-read_view_remove(
-/*=============*/
-	read_view_t*	view,		/*!< in: read view, can be 0 */
-	bool		own_mutex)	/*!< in: true if caller owns the
-					trx_sys_t::mutex */
-{
-	if (view != 0) {
-		if (!own_mutex) {
-			mutex_enter(&trx_sys->mutex);
-		}
-
-		ut_ad(read_view_validate(view));
-
-		UT_LIST_REMOVE(view_list, trx_sys->view_list, view);
-
-		ut_ad(read_view_list_validate());
-
-		if (!own_mutex) {
-			mutex_exit(&trx_sys->mutex);
-		}
-	}
-}
-
diff --git a/storage/innobase/include/read0types.h b/storage/innobase/include/read0types.h
index 969f4ebb637..c83c7e04f11 100644
--- a/storage/innobase/include/read0types.h
+++ b/storage/innobase/include/read0types.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -26,7 +26,306 @@ Created 2/16/1997 Heikki Tuuri
 #ifndef read0types_h
 #define read0types_h
 
-struct read_view_t;
-struct cursor_view_t;
+#include <algorithm>
+#include "dict0mem.h"
+
+#include "trx0types.h"
+
+// Friend declaration
+class MVCC;
+
+/** Read view lists the trx ids of those transactions for which a consistent
+read should not see the modifications to the database. */
+
+class ReadView {
+	/** This is similar to a std::vector but it is not a drop
+	in replacement. It is specific to ReadView. */
+	class ids_t {
+		typedef trx_ids_t::value_type value_type;
+
+		/**
+		Constructor */
+		ids_t() : m_ptr(), m_size(), m_reserved() { }
+
+		/**
+		Destructor */
+		~ids_t() { UT_DELETE_ARRAY(m_ptr); }
+
+		/**
+		Try and increase the size of the array. Old elements are
+		copied across. It is a no-op if n is < current size.
+
+		@param n 		Make space for n elements */
+		void reserve(ulint n);
+
+		/**
+		Resize the array, sets the current element count.
+		@param n		new size of the array, in elements */
+		void resize(ulint n)
+		{
+			ut_ad(n <= capacity());
+
+			m_size = n;
+		}
+
+		/**
+		Reset the size to 0 */
+		void clear() { resize(0); }
+
+		/**
+		@return the capacity of the array in elements */
+		ulint capacity() const { return(m_reserved); }
+
+		/**
+		Copy and overwrite the current array contents
+
+		@param start		Source array
+		@param end		Pointer to end of array */
+		void assign(const value_type* start, const value_type* end);
+
+		/**
+		Insert the value in the correct slot, preserving the order.
+		Doesn't check for duplicates. */
+		void insert(value_type value);
+
+		/**
+		@return the value of the first element in the array */
+		value_type front() const
+		{
+			ut_ad(!empty());
+
+			return(m_ptr[0]);
+		}
+
+		/**
+		@return the value of the last element in the array */
+		value_type back() const
+		{
+			ut_ad(!empty());
+
+			return(m_ptr[m_size - 1]);
+		}
+
+		/**
+		Append a value to the array.
+		@param value		the value to append */
+		void push_back(value_type value);
+
+		/**
+		@return a pointer to the start of the array */
+		trx_id_t* data() { return(m_ptr); };
+
+		/**
+		@return a const pointer to the start of the array */
+		const trx_id_t* data() const { return(m_ptr); };
+
+		/**
+		@return the number of elements in the array */
+		ulint size() const { return(m_size); }
+
+		/**
+		@return true if size() == 0 */
+		bool empty() const { return(size() == 0); }
+
+	private:
+		// Prevent copying
+		ids_t(const ids_t&);
+		ids_t& operator=(const ids_t&);
+
+	private:
+		/** Memory for the array */
+		value_type*	m_ptr;
+
+		/** Number of active elements in the array */
+		ulint		m_size;
+
+		/** Size of m_ptr in elements */
+		ulint		m_reserved;
+
+		friend class ReadView;
+	};
+public:
+	ReadView();
+	~ReadView();
+	/** Check whether transaction id is valid.
+	@param[in]	id		transaction id to check
+	@param[in]	name		table name */
+	static void check_trx_id_sanity(
+		trx_id_t		id,
+		const table_name_t&	name);
+
+	/** Check whether the changes by id are visible.
+	@param[in]	id	transaction id to check against the view
+	@param[in]	name	table name
+	@return whether the view sees the modifications of id. */
+	bool changes_visible(
+		trx_id_t		id,
+		const table_name_t&	name) const
+		MY_ATTRIBUTE((warn_unused_result))
+	{
+		ut_ad(id > 0);
+
+		if (id < m_up_limit_id || id == m_creator_trx_id) {
+
+			return(true);
+		}
+
+		check_trx_id_sanity(id, name);
+
+		if (id >= m_low_limit_id) {
+
+			return(false);
+
+		} else if (m_ids.empty()) {
+
+			return(true);
+		}
+
+		const ids_t::value_type*	p = m_ids.data();
+
+		return(!std::binary_search(p, p + m_ids.size(), id));
+	}
+
+	/**
+	@param id		transaction to check
+	@return true if view sees transaction id */
+	bool sees(trx_id_t id) const
+	{
+		return(id < m_up_limit_id);
+	}
+
+	/**
+	Mark the view as closed */
+	void close()
+	{
+		ut_ad(m_creator_trx_id != TRX_ID_MAX);
+		m_creator_trx_id = TRX_ID_MAX;
+	}
+
+	/**
+	@return true if the view is closed */
+	bool is_closed() const
+	{
+		return(m_closed);
+	}
+
+	/**
+	Write the limits to the file.
+	@param file		file to write to */
+	void print_limits(FILE* file) const
+	{
+		fprintf(file,
+			"Trx read view will not see trx with"
+			" id >= " TRX_ID_FMT ", sees < " TRX_ID_FMT "\n",
+			m_low_limit_id, m_up_limit_id);
+	}
+
+	/**
+	@return the low limit no */
+	trx_id_t low_limit_no() const
+	{
+		return(m_low_limit_no);
+	}
+
+	/**
+	@return the low limit id */
+	trx_id_t low_limit_id() const
+	{
+		return(m_low_limit_id);
+	}
+
+	/**
+	@return true if there are no transaction ids in the snapshot */
+	bool empty() const
+	{
+		return(m_ids.empty());
+	}
+
+#ifdef UNIV_DEBUG
+	/**
+	@param rhs		view to compare with
+	@return truen if this view is less than or equal rhs */
+	bool le(const ReadView* rhs) const
+	{
+		return(m_low_limit_no <= rhs->m_low_limit_no);
+	}
+
+	trx_id_t up_limit_id() const
+	{
+		return(m_up_limit_id);
+	}
+#endif /* UNIV_DEBUG */
+private:
+	/**
+	Copy the transaction ids from the source vector */
+	inline void copy_trx_ids(const trx_ids_t& trx_ids);
+
+	/**
+	Opens a read view where exactly the transactions serialized before this
+	point in time are seen in the view.
+	@param id		Creator transaction id */
+	inline void prepare(trx_id_t id);
+
+	/**
+	Complete the read view creation */
+	inline void complete();
+
+	/**
+	Copy state from another view. Must call copy_complete() to finish.
+	@param other		view to copy from */
+	inline void copy_prepare(const ReadView& other);
+
+	/**
+	Complete the copy, insert the creator transaction id into the
+	m_trx_ids too and adjust the m_up_limit_id *, if required */
+	inline void copy_complete();
+
+	/**
+	Set the creator transaction id, existing id must be 0 */
+	void creator_trx_id(trx_id_t id)
+	{
+		ut_ad(m_creator_trx_id == 0);
+		m_creator_trx_id = id;
+	}
+
+	friend class MVCC;
+
+private:
+	// Disable copying
+	ReadView(const ReadView&);
+	ReadView& operator=(const ReadView&);
+
+private:
+	/** The read should not see any transaction with trx id >= this
+	value. In other words, this is the "high water mark". */
+	trx_id_t	m_low_limit_id;
+
+	/** The read should see all trx ids which are strictly
+	smaller (<) than this value.  In other words, this is the
+	low water mark". */
+	trx_id_t	m_up_limit_id;
+
+	/** trx id of creating transaction, set to TRX_ID_MAX for free
+	views. */
+	trx_id_t	m_creator_trx_id;
+
+	/** Set of RW transactions that was active when this snapshot
+	was taken */
+	ids_t		m_ids;
+
+	/** The view does not need to see the undo logs for transactions
+	whose transaction number is strictly smaller (<) than this value:
+	they can be removed in purge if not needed by other views */
+	trx_id_t	m_low_limit_no;
+
+	/** AC-NL-RO transaction view that has been "closed". */
+	bool		m_closed;
+
+	typedef UT_LIST_NODE_T(ReadView) node_t;
+
+	/** List of read views in trx_sys */
+	byte		pad1[64 - sizeof(node_t)];
+	node_t		m_view_list;
+};
 
 #endif
diff --git a/storage/innobase/include/rem0cmp.h b/storage/innobase/include/rem0cmp.h
index 65116229fdc..a59479849a8 100644
--- a/storage/innobase/include/rem0cmp.h
+++ b/storage/innobase/include/rem0cmp.h
@@ -26,16 +26,16 @@ Created 7/1/1994 Heikki Tuuri
 #ifndef rem0cmp_h
 #define rem0cmp_h
 
-#include "univ.i"
+#include "ha_prototypes.h"
 #include "data0data.h"
 #include "data0type.h"
 #include "dict0dict.h"
 #include "rem0rec.h"
+#include <my_sys.h>
 
 /*************************************************************//**
 Returns TRUE if two columns are equal for comparison purposes.
-@return	TRUE if the columns are considered equal in comparisons */
-UNIV_INTERN
+@return TRUE if the columns are considered equal in comparisons */
 ibool
 cmp_cols_are_equal(
 /*===============*/
@@ -43,170 +43,142 @@ cmp_cols_are_equal(
 	const dict_col_t*	col2,	/*!< in: column 2 */
 	ibool			check_charsets);
 					/*!< in: whether to check charsets */
-/*************************************************************//**
-This function is used to compare two data fields for which we know the
-data type.
-@return	1, 0, -1, if data1 is greater, equal, less than data2, respectively */
-UNIV_INLINE
+/** Compare two data fields.
+@param[in] mtype main type
+@param[in] prtype precise type
+@param[in] data1 data field
+@param[in] len1 length of data1 in bytes, or UNIV_SQL_NULL
+@param[in] data2 data field
+@param[in] len2 length of data2 in bytes, or UNIV_SQL_NULL
+@return the comparison result of data1 and data2
+@retval 0 if data1 is equal to data2
+@retval negative if data1 is less than data2
+@retval positive if data1 is greater than data2 */
 int
 cmp_data_data(
-/*==========*/
-	ulint		mtype,	/*!< in: main type */
-	ulint		prtype,	/*!< in: precise type */
-	const byte*	data1,	/*!< in: data field (== a pointer to a memory
-				buffer) */
-	ulint		len1,	/*!< in: data field length or UNIV_SQL_NULL */
-	const byte*	data2,	/*!< in: data field (== a pointer to a memory
-				buffer) */
-	ulint		len2);	/*!< in: data field length or UNIV_SQL_NULL */
-/*************************************************************//**
-This function is used to compare two data fields for which we know the
-data type.
-@return	1, 0, -1, if data1 is greater, equal, less than data2, respectively */
-UNIV_INTERN
-int
-cmp_data_data_slow(
-/*===============*/
-	ulint		mtype,	/*!< in: main type */
-	ulint		prtype,	/*!< in: precise type */
-	const byte*	data1,	/*!< in: data field (== a pointer to a memory
-				buffer) */
-	ulint		len1,	/*!< in: data field length or UNIV_SQL_NULL */
-	const byte*	data2,	/*!< in: data field (== a pointer to a memory
-				buffer) */
-	ulint		len2);	/*!< in: data field length or UNIV_SQL_NULL */
+	ulint		mtype,
+	ulint		prtype,
+	const byte*	data1,
+	ulint		len1,
+	const byte*	data2,
+	ulint		len2)
+	MY_ATTRIBUTE((nonnull, warn_unused_result));
 
-/*****************************************************************
-This function is used to compare two data fields for which we know the
-data type to be VARCHAR.
-@return	1, 0, -1, if lhs is greater, equal, less than rhs, respectively */
-UNIV_INTERN
-int
-cmp_data_data_slow_varchar(
-/*=======================*/
-	const byte*	lhs,	/* in: data field (== a pointer to a memory
-				buffer) */
-	ulint		lhs_len,/* in: data field length or UNIV_SQL_NULL */
-	const byte*	rhs,	/* in: data field (== a pointer to a memory
-				buffer) */
-	ulint		rhs_len);/* in: data field length or UNIV_SQL_NULL */
-/*****************************************************************
-This function is used to compare two varchar/char fields. The comparison
-is for the LIKE operator.
-@return	1, 0, -1, if lhs is greater, equal, less than rhs, respectively */
-UNIV_INTERN
-int
-cmp_data_data_slow_like_prefix(
-/*===========================*/
-	const byte*	data1,	/* in: data field (== a pointer to a memory
-				buffer) */
-	ulint		len1,	/* in: data field length or UNIV_SQL_NULL */
-	const byte*	data2,	/* in: data field (== a pointer to a memory
-				buffer) */
-	ulint		len2);	/* in: data field length or UNIV_SQL_NULL */
-/*****************************************************************
-This function is used to compare two varchar/char fields. The comparison
-is for the LIKE operator.
-@return	1, 0, -1, if data1 is greater, equal, less than data2, respectively */
-UNIV_INTERN
-int
-cmp_data_data_slow_like_suffix(
-/*===========================*/
-	const byte*	data1,	/* in: data field (== a pointer to a memory
-				buffer) */
-	ulint		len1,	/* in: data field length or UNIV_SQL_NULL */
-	const byte*	data2,	/* in: data field (== a pointer to a memory
-				buffer) */
-	ulint		len2);	/* in: data field length or UNIV_SQL_NULL */
-/*****************************************************************
-This function is used to compare two varchar/char fields. The comparison
-is for the LIKE operator.
-@return	1, 0, -1, if data1 is greater, equal, less than data2, respectively */
-UNIV_INTERN
-int
-cmp_data_data_slow_like_substr(
-/*===========================*/
-	const byte*	data1,	/* in: data field (== a pointer to a memory
-				buffer) */
-	ulint		len1,	/* in: data field length or UNIV_SQL_NULL */
-	const byte*	data2,	/* in: data field (== a pointer to a memory
-				buffer) */
-	ulint		len2);	/* in: data field length or UNIV_SQL_NULL */
-/*************************************************************//**
-This function is used to compare two dfields where at least the first
-has its data type field set.
-@return 1, 0, -1, if dfield1 is greater, equal, less than dfield2,
-respectively */
+/** Compare two data fields.
+@param[in] dfield1 data field; must have type field set
+@param[in] dfield2 data field
+@return the comparison result of dfield1 and dfield2
+@retval 0 if dfield1 is equal to dfield2
+@retval negative if dfield1 is less than dfield2
+@retval positive if dfield1 is greater than dfield2 */
 UNIV_INLINE
 int
 cmp_dfield_dfield(
 /*==============*/
 	const dfield_t*	dfield1,/*!< in: data field; must have type field set */
 	const dfield_t*	dfield2);/*!< in: data field */
-/*************************************************************//**
-This function is used to compare a data tuple to a physical record.
-Only dtuple->n_fields_cmp first fields are taken into account for
-the data tuple! If we denote by n = n_fields_cmp, then rec must
-have either m >= n fields, or it must differ from dtuple in some of
-the m fields rec has. If rec has an externally stored field we do not
-compare it but return with value 0 if such a comparison should be
-made.
-@return 1, 0, -1, if dtuple is greater, equal, less than rec,
-respectively, when only the common first fields are compared, or until
-the first externally stored field in rec */
-UNIV_INTERN
+
+
+/** Compare a GIS data tuple to a physical record.
+@param[in] dtuple data tuple
+@param[in] rec B-tree record
+@param[in] offsets rec_get_offsets(rec)
+@param[in] mode compare mode
+@retval negative if dtuple is less than rec */
+int
+cmp_dtuple_rec_with_gis(
+/*====================*/
+	const dtuple_t*	dtuple,
+	const rec_t*	rec,
+	const ulint*	offsets,
+	page_cur_mode_t	mode)
+	MY_ATTRIBUTE((nonnull));
+
+/** Compare a GIS data tuple to a physical record in rtree non-leaf node.
+We need to check the page number field, since we don't store pk field in
+rtree non-leaf node.
+@param[in] dtuple data tuple
+@param[in] rec R-tree record
+@param[in] offsets rec_get_offsets(rec)
+@param[in] mode compare mode
+@retval negative if dtuple is less than rec */
+int
+cmp_dtuple_rec_with_gis_internal(
+	const dtuple_t*	dtuple,
+	const rec_t*	rec,
+	const ulint*	offsets);
+
+/** Compare a data tuple to a physical record.
+@param[in] dtuple data tuple
+@param[in] rec B-tree record
+@param[in] offsets rec_get_offsets(rec)
+@param[in] n_cmp number of fields to compare
+@param[in,out] matched_fields number of completely matched fields
+@return the comparison result of dtuple and rec
+@retval 0 if dtuple is equal to rec
+@retval negative if dtuple is less than rec
+@retval positive if dtuple is greater than rec */
 int
 cmp_dtuple_rec_with_match_low(
-/*==========================*/
-	const dtuple_t*	dtuple,	/*!< in: data tuple */
-	const rec_t*	rec,	/*!< in: physical record which differs from
-				dtuple in some of the common fields, or which
-				has an equal number or more fields than
-				dtuple */
-	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
-	ulint		n_cmp,	/*!< in: number of fields to compare */
-	ulint*		matched_fields,
-				/*!< in/out: number of already completely
-				matched fields; when function returns,
-				contains the value for current comparison */
-	ulint*		matched_bytes)
-				/*!< in/out: number of already matched
-				bytes within the first field not completely
-				matched; when function returns, contains the
-				value for current comparison */
+	const dtuple_t*	dtuple,
+	const rec_t*	rec,
+	const ulint*	offsets,
+	ulint		n_cmp,
+	ulint*		matched_fields)
 	MY_ATTRIBUTE((nonnull));
-#define cmp_dtuple_rec_with_match(tuple,rec,offsets,fields,bytes)	\
+#define cmp_dtuple_rec_with_match(tuple,rec,offsets,fields)		\
 	cmp_dtuple_rec_with_match_low(					\
-		tuple,rec,offsets,dtuple_get_n_fields_cmp(tuple),fields,bytes)
-/**************************************************************//**
-Compares a data tuple to a physical record.
+		tuple,rec,offsets,dtuple_get_n_fields_cmp(tuple),fields)
+/** Compare a data tuple to a physical record.
+@param[in]	dtuple		data tuple
+@param[in]	rec		B-tree or R-tree index record
+@param[in]	index		index tree
+@param[in]	offsets		rec_get_offsets(rec)
+@param[in,out]	matched_fields	number of completely matched fields
+@param[in,out]	matched_bytes	number of matched bytes in the first
+field that is not matched
+@return the comparison result of dtuple and rec
+@retval 0 if dtuple is equal to rec
+@retval negative if dtuple is less than rec
+@retval positive if dtuple is greater than rec */
+int
+cmp_dtuple_rec_with_match_bytes(
+	const dtuple_t*		dtuple,
+	const rec_t*		rec,
+	const dict_index_t*	index,
+	const ulint*		offsets,
+	ulint*			matched_fields,
+	ulint*			matched_bytes)
+	MY_ATTRIBUTE((warn_unused_result));
+/** Compare a data tuple to a physical record.
 @see cmp_dtuple_rec_with_match
-@return 1, 0, -1, if dtuple is greater, equal, less than rec, respectively */
-UNIV_INTERN
+@param[in] dtuple data tuple
+@param[in] rec B-tree record
+@param[in] offsets rec_get_offsets(rec)
+@return the comparison result of dtuple and rec
+@retval 0 if dtuple is equal to rec
+@retval negative if dtuple is less than rec
+@retval positive if dtuple is greater than rec */
 int
 cmp_dtuple_rec(
-/*===========*/
-	const dtuple_t*	dtuple,	/*!< in: data tuple */
-	const rec_t*	rec,	/*!< in: physical record */
-	const ulint*	offsets);/*!< in: array returned by rec_get_offsets() */
+	const dtuple_t*	dtuple,
+	const rec_t*	rec,
+	const ulint*	offsets);
 /**************************************************************//**
 Checks if a dtuple is a prefix of a record. The last field in dtuple
 is allowed to be a prefix of the corresponding field in the record.
-@return	TRUE if prefix */
-UNIV_INTERN
+@return TRUE if prefix */
 ibool
 cmp_dtuple_is_prefix_of_rec(
 /*========================*/
 	const dtuple_t*	dtuple,	/*!< in: data tuple */
 	const rec_t*	rec,	/*!< in: physical record */
 	const ulint*	offsets);/*!< in: array returned by rec_get_offsets() */
-/*************************************************************//**
-Compare two physical records that contain the same number of columns,
+/** Compare two physical records that contain the same number of columns,
 none of which are stored externally.
-@retval 1 if rec1 (including non-ordering columns) is greater than rec2
-@retval -1 if rec1 (including non-ordering columns) is less than rec2
+@retval positive if rec1 (including non-ordering columns) is greater than rec2
+@retval negative if rec1 (including non-ordering columns) is less than rec2
 @retval 0 if rec1 is a duplicate of rec2 */
-UNIV_INTERN
 int
 cmp_rec_rec_simple(
 /*===============*/
@@ -219,80 +191,64 @@ cmp_rec_rec_simple(
 					duplicate key value if applicable,
 					or NULL */
 	MY_ATTRIBUTE((nonnull(1,2,3,4), warn_unused_result));
-/*************************************************************//**
-This function is used to compare two physical records. Only the common
-first fields are compared, and if an externally stored field is
-encountered, then 0 is returned.
-@return 1, 0, -1 if rec1 is greater, equal, less, respectively */
-UNIV_INTERN
+/** Compare two B-tree records.
+@param[in] rec1 B-tree record
+@param[in] rec2 B-tree record
+@param[in] offsets1 rec_get_offsets(rec1, index)
+@param[in] offsets2 rec_get_offsets(rec2, index)
+@param[in] index B-tree index
+@param[in] nulls_unequal true if this is for index cardinality
+statistics estimation, and innodb_stats_method=nulls_unequal
+or innodb_stats_method=nulls_ignored
+@param[out] matched_fields number of completely matched fields
+within the first field not completely matched
+@return the comparison result
+@retval 0 if rec1 is equal to rec2
+@retval negative if rec1 is less than rec2
+@retval positive if rec2 is greater than rec2 */
 int
 cmp_rec_rec_with_match(
-/*===================*/
-	const rec_t*	rec1,	/*!< in: physical record */
-	const rec_t*	rec2,	/*!< in: physical record */
-	const ulint*	offsets1,/*!< in: rec_get_offsets(rec1, index) */
-	const ulint*	offsets2,/*!< in: rec_get_offsets(rec2, index) */
-	dict_index_t*	index,	/*!< in: data dictionary index */
-	ibool		nulls_unequal,
-				/* in: TRUE if this is for index statistics
-				cardinality estimation, and innodb_stats_method
-				is "nulls_unequal" or "nulls_ignored" */
-	ulint*		matched_fields, /*!< in/out: number of already completely
-				matched fields; when the function returns,
-				contains the value the for current
-				comparison */
-	ulint*		matched_bytes);/*!< in/out: number of already matched
-				bytes within the first field not completely
-				matched; when the function returns, contains
-				the value for the current comparison */
-/*************************************************************//**
-This function is used to compare two physical records. Only the common
-first fields are compared.
-@return 1, 0 , -1 if rec1 is greater, equal, less, respectively, than
-rec2; only the common first fields are compared */
+	const rec_t*		rec1,
+	const rec_t*		rec2,
+	const ulint*		offsets1,
+	const ulint*		offsets2,
+	const dict_index_t*	index,
+	bool			nulls_unequal,
+	ulint*			matched_fields);
+
+/** Compare two B-tree records.
+Only the common first fields are compared, and externally stored field
+are treated as equal.
+@param[in]	rec1		B-tree record
+@param[in]	rec2		B-tree record
+@param[in]	offsets1	rec_get_offsets(rec1, index)
+@param[in]	offsets2	rec_get_offsets(rec2, index)
+@param[out]	matched_fields	number of completely matched fields
+				within the first field not completely matched
+@return positive, 0, negative if rec1 is greater, equal, less, than rec2,
+respectively */
 UNIV_INLINE
 int
 cmp_rec_rec(
-/*========*/
-	const rec_t*	rec1,	/*!< in: physical record */
-	const rec_t*	rec2,	/*!< in: physical record */
-	const ulint*	offsets1,/*!< in: rec_get_offsets(rec1, index) */
-	const ulint*	offsets2,/*!< in: rec_get_offsets(rec2, index) */
-	dict_index_t*	index);	/*!< in: data dictionary index */
+	const rec_t*		rec1,
+	const rec_t*		rec2,
+	const ulint*		offsets1,
+	const ulint*		offsets2,
+	const dict_index_t*	index,
+	ulint*			matched_fields = NULL);
 
-/*****************************************************************
-This function is used to compare two dfields where at least the first
-has its data type field set. */
-UNIV_INTERN
+/** Compare two data fields.
+@param[in] dfield1 data field
+@param[in] dfield2 data field
+@return the comparison result of dfield1 and dfield2
+@retval 0 if dfield1 is equal to dfield2, or a prefix of dfield1
+@retval negative if dfield1 is less than dfield2
+@retval positive if dfield1 is greater than dfield2 */
+UNIV_INLINE
 int
 cmp_dfield_dfield_like_prefix(
-/*==========================*/
-				/* out: 1, 0, -1, if dfield1 is greater, equal,
-				less than dfield2, respectively */
-	dfield_t*	dfield1,/* in: data field; must have type field set */
-	dfield_t*	dfield2);/* in: data field */
-/*****************************************************************
-This function is used to compare two dfields where at least the first
-has its data type field set. */
-UNIV_INLINE
-int
-cmp_dfield_dfield_like_substr(
-/*==========================*/
-				/* out: 1, 0, -1, if dfield1 is greater, equal,
-				less than dfield2, respectively */
-	dfield_t*	dfield1,/* in: data field; must have type field set */
-	dfield_t*	dfield2);/* in: data field */
-/*****************************************************************
-This function is used to compare two dfields where at least the first
-has its data type field set. */
-UNIV_INLINE
-int
-cmp_dfield_dfield_like_suffix(
-/*==========================*/
-				/* out: 1, 0, -1, if dfield1 is greater, equal,
-				less than dfield2, respectively */
-	dfield_t*	dfield1,/* in: data field; must have type field set */
-	dfield_t*	dfield2);/* in: data field */
+	const dfield_t*	dfield1,
+	const dfield_t*	dfield2);
 
 #ifndef UNIV_NONINL
 #include "rem0cmp.ic"
diff --git a/storage/innobase/include/rem0cmp.ic b/storage/innobase/include/rem0cmp.ic
index 67a2dcacba1..bf913b93bfb 100644
--- a/storage/innobase/include/rem0cmp.ic
+++ b/storage/innobase/include/rem0cmp.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -23,91 +23,20 @@ Comparison services for records
 Created 7/1/1994 Heikki Tuuri
 ************************************************************************/
 
-/*************************************************************//**
-This function is used to compare two data fields for which we know the
-data type.
-@return	1, 0, -1, if data1 is greater, equal, less than data2, respectively */
-UNIV_INLINE
-int
-cmp_data_data(
-/*==========*/
-	ulint		mtype,	/*!< in: main type */
-	ulint		prtype,	/*!< in: precise type */
-	const byte*	data1,	/*!< in: data field (== a pointer to a memory
-				buffer) */
-	ulint		len1,	/*!< in: data field length or UNIV_SQL_NULL */
-	const byte*	data2,	/*!< in: data field (== a pointer to a memory
-				buffer) */
-	ulint		len2)	/*!< in: data field length or UNIV_SQL_NULL */
-{
-	return(cmp_data_data_slow(mtype, prtype, data1, len1, data2, len2));
-}
+#include <mysql_com.h>
 
-/*****************************************************************
-This function is used to compare two (CHAR) data fields for the LIKE
-operator. */
-UNIV_INLINE
-int
-cmp_data_data_like_prefix(
-/*======================*/
-				/* out: 1, 0, -1, if data1 is greater, equal,
-				less than data2, respectively */
-	byte*           data1,  /* in: data field (== a pointer to a memory
-				buffer) */
-	ulint           len1,   /* in: data field length or UNIV_SQL_NULL */
-	byte*           data2,  /* in: data field (== a pointer to a memory
-				buffer) */
-	ulint           len2)   /* in: data field length or UNIV_SQL_NULL */
-{
-	return(cmp_data_data_slow_like_prefix(data1, len1, data2, len2));
-}
-/*****************************************************************
-This function is used to compare two (CHAR) data fields for the LIKE
-operator. */
-UNIV_INLINE
-int
-cmp_data_data_like_suffix(
-/*======================*/
-				/* out: 1, 0, -1, if data1 is greater, equal,
-				less than data2, respectively */
-	byte*           data1,  /* in: data field (== a pointer to a memory
-				buffer) */
-	ulint           len1,   /* in: data field length or UNIV_SQL_NULL */
-	byte*           data2,  /* in: data field (== a pointer to a memory
-				buffer) */
-	ulint           len2)   /* in: data field length or UNIV_SQL_NULL */
-{
-	return(cmp_data_data_slow_like_suffix(data1, len1, data2, len2));
-}
-/*****************************************************************
-This function is used to compare two (CHAR) data fields for the LIKE
-operator. */
-UNIV_INLINE
-int
-cmp_data_data_like_substr(
-/*======================*/
-				/* out: 1, 0, -1, if data1 is greater, equal,
-				less than data2, respectively */
-	byte*           data1,  /* in: data field (== a pointer to a memory
-				buffer) */
-	ulint           len1,   /* in: data field length or UNIV_SQL_NULL */
-	byte*           data2,  /* in: data field (== a pointer to a memory
-				buffer) */
-	ulint           len2)   /* in: data field length or UNIV_SQL_NULL */
-{
-	return(cmp_data_data_slow_like_substr(data1, len1, data2, len2));
-}
-/*************************************************************//**
-This function is used to compare two dfields where at least the first
-has its data type field set.
-@return 1, 0, -1, if dfield1 is greater, equal, less than dfield2,
-respectively */
+/** Compare two data fields.
+@param[in] dfield1 data field; must have type field set
+@param[in] dfield2 data field
+@return the comparison result of dfield1 and dfield2
+@retval 0 if dfield1 is equal to dfield2
+@retval negative if dfield1 is less than dfield2
+@retval positive if dfield1 is greater than dfield2 */
 UNIV_INLINE
 int
 cmp_dfield_dfield(
-/*==============*/
-	const dfield_t*	dfield1,/*!< in: data field; must have type field set */
-	const dfield_t*	dfield2)/*!< in: data field */
+	const dfield_t*	dfield1,
+	const dfield_t*	dfield2)
 {
 	const dtype_t*	type;
 
@@ -122,65 +51,90 @@ cmp_dfield_dfield(
 			     dfield_get_len(dfield2)));
 }
 
-/*****************************************************************
-This function is used to compare two dfields where at least the first
-has its data type field set. */
-UNIV_INLINE
-int
-cmp_dfield_dfield_like_suffix(
-/*==========================*/
-				/* out: 1, 0, -1, if dfield1 is greater, equal,
-				less than dfield2, respectively */
-	dfield_t*       dfield1,/* in: data field; must have type field set */
-	dfield_t*       dfield2)/* in: data field */
-{
-	ut_ad(dfield_check_typed(dfield1));
-
-	return(cmp_data_data_like_suffix(
-		(byte*) dfield_get_data(dfield1),
-		dfield_get_len(dfield1),
-		(byte*) dfield_get_data(dfield2),
-		dfield_get_len(dfield2)));
-}
-
-/*****************************************************************
-This function is used to compare two dfields where at least the first
-has its data type field set. */
-UNIV_INLINE
-int
-cmp_dfield_dfield_like_substr(
-/*==========================*/
-				/* out: 1, 0, -1, if dfield1 is greater, equal,
-				less than dfield2, respectively */
-	dfield_t*       dfield1,/* in: data field; must have type field set */
-	dfield_t*       dfield2)/* in: data field */
-{
-	ut_ad(dfield_check_typed(dfield1));
-
-	return(cmp_data_data_like_substr(
-		(byte*) dfield_get_data(dfield1),
-		dfield_get_len(dfield1),
-		(byte*) dfield_get_data(dfield2),
-		dfield_get_len(dfield2)));
-}
-/*************************************************************//**
-This function is used to compare two physical records. Only the common
-first fields are compared.
-@return 1, 0 , -1 if rec1 is greater, equal, less, respectively, than
-rec2; only the common first fields are compared */
+/** Compare two B-tree records.
+Only the common first fields are compared, and externally stored field
+are treated as equal.
+@param[in]	rec1		B-tree record
+@param[in]	rec2		B-tree record
+@param[in]	offsets1	rec_get_offsets(rec1, index)
+@param[in]	offsets2	rec_get_offsets(rec2, index)
+@param[out]	matched_fields	number of completely matched fields
+				within the first field not completely matched
+@return positive, 0, negative if rec1 is greater, equal, less, than rec2,
+respectively */
 UNIV_INLINE
 int
 cmp_rec_rec(
-/*========*/
-	const rec_t*	rec1,	/*!< in: physical record */
-	const rec_t*	rec2,	/*!< in: physical record */
-	const ulint*	offsets1,/*!< in: rec_get_offsets(rec1, index) */
-	const ulint*	offsets2,/*!< in: rec_get_offsets(rec2, index) */
-	dict_index_t*	index)	/*!< in: data dictionary index */
+	const rec_t*		rec1,
+	const rec_t*		rec2,
+	const ulint*		offsets1,
+	const ulint*		offsets2,
+	const dict_index_t*	index,
+	ulint*			matched_fields)
 {
-	ulint	match_f		= 0;
-	ulint	match_b		= 0;
+	ulint	match_f;
+	int	ret;
 
-	return(cmp_rec_rec_with_match(rec1, rec2, offsets1, offsets2, index,
-				      FALSE, &match_f, &match_b));
+	ret = cmp_rec_rec_with_match(
+		rec1, rec2, offsets1, offsets2, index, false, &match_f);
+
+	if (matched_fields != NULL) {
+		*matched_fields = match_f;
+	}
+
+	return(ret);
+}
+
+/** Compare two data fields.
+@param[in] dfield1 data field
+@param[in] dfield2 data field
+@return the comparison result of dfield1 and dfield2
+@retval 0 if dfield1 is equal to dfield2, or a prefix of dfield1
+@retval negative if dfield1 is less than dfield2
+@retval positive if dfield1 is greater than dfield2 */
+UNIV_INLINE
+int
+cmp_dfield_dfield_like_prefix(
+	const dfield_t*	dfield1,
+	const dfield_t*	dfield2)
+{
+	const dtype_t*  type;
+
+	ut_ad(dfield_check_typed(dfield1));
+	ut_ad(dfield_check_typed(dfield2));
+
+	type = dfield_get_type(dfield1);
+
+#ifdef UNIV_DEBUG
+	switch (type->prtype & DATA_MYSQL_TYPE_MASK) {
+	case MYSQL_TYPE_BIT:
+	case MYSQL_TYPE_STRING:
+	case MYSQL_TYPE_VAR_STRING:
+	case MYSQL_TYPE_TINY_BLOB:
+	case MYSQL_TYPE_MEDIUM_BLOB:
+	case MYSQL_TYPE_BLOB:
+	case MYSQL_TYPE_LONG_BLOB:
+	case MYSQL_TYPE_VARCHAR:
+		break;
+	default:
+		ut_error;
+	}
+#endif /* UNIV_DEBUG */
+
+	uint cs_num = (uint) dtype_get_charset_coll(type->prtype);
+
+	if (CHARSET_INFO* cs = get_charset(cs_num, MYF(MY_WME))) {
+		return(cs->coll->strnncoll(
+			       cs,
+			       static_cast<uchar*>(
+				       dfield_get_data(dfield1)),
+			       dfield_get_len(dfield1),
+			       static_cast<uchar*>(
+				       dfield_get_data(dfield2)),
+			       dfield_get_len(dfield2),
+			       1));
+	}
+
+	ib::fatal() << "Unable to find charset-collation " << cs_num;
+	return(0);
 }
diff --git a/storage/innobase/include/rem0rec.h b/storage/innobase/include/rem0rec.h
index 83286a98f8e..8490e7c9c88 100644
--- a/storage/innobase/include/rem0rec.h
+++ b/storage/innobase/include/rem0rec.h
@@ -26,13 +26,14 @@ Created 5/30/1994 Heikki Tuuri
 #ifndef rem0rec_h
 #define rem0rec_h
 
-#ifndef UNIV_INNOCHECKSUM
 #include "univ.i"
 #include "data0data.h"
 #include "rem0types.h"
 #include "mtr0types.h"
 #include "page0types.h"
-#endif /* !UNIV_INNOCHECKSUM */
+#include "trx0types.h"
+#include <ostream>
+#include <sstream>
 
 /* Info bit denoting the predefined minimum record: this bit is set
 if and only if the record is the first user record on a non-leaf
@@ -87,43 +88,42 @@ significant bit denotes that the tail of a field is stored off-page. */
 
 /* Number of elements that should be initially allocated for the
 offsets[] array, first passed to rec_get_offsets() */
-#define REC_OFFS_NORMAL_SIZE	100
+#define REC_OFFS_NORMAL_SIZE	OFFS_IN_REC_NORMAL_SIZE
 #define REC_OFFS_SMALL_SIZE	10
 
-#ifndef UNIV_INNOCHECKSUM
 /******************************************************//**
 The following function is used to get the pointer of the next chained record
 on the same page.
-@return	pointer to the next chained record, or NULL if none */
+@return pointer to the next chained record, or NULL if none */
 UNIV_INLINE
 const rec_t*
 rec_get_next_ptr_const(
 /*===================*/
 	const rec_t*	rec,	/*!< in: physical record */
 	ulint		comp)	/*!< in: nonzero=compact page format */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /******************************************************//**
 The following function is used to get the pointer of the next chained record
 on the same page.
-@return	pointer to the next chained record, or NULL if none */
+@return pointer to the next chained record, or NULL if none */
 UNIV_INLINE
 rec_t*
 rec_get_next_ptr(
 /*=============*/
 	rec_t*	rec,	/*!< in: physical record */
 	ulint	comp)	/*!< in: nonzero=compact page format */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /******************************************************//**
 The following function is used to get the offset of the
 next chained record on the same page.
-@return	the page offset of the next chained record, or 0 if none */
+@return the page offset of the next chained record, or 0 if none */
 UNIV_INLINE
 ulint
 rec_get_next_offs(
 /*==============*/
 	const rec_t*	rec,	/*!< in: physical record */
 	ulint		comp)	/*!< in: nonzero=compact page format */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /******************************************************//**
 The following function is used to set the next record offset field
 of an old-style record. */
@@ -147,34 +147,49 @@ rec_set_next_offs_new(
 /******************************************************//**
 The following function is used to get the number of fields
 in an old-style record.
-@return	number of data fields */
+@return number of data fields */
 UNIV_INLINE
 ulint
 rec_get_n_fields_old(
 /*=================*/
 	const rec_t*	rec)	/*!< in: physical record */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /******************************************************//**
 The following function is used to get the number of fields
 in a record.
-@return	number of data fields */
+@return number of data fields */
 UNIV_INLINE
 ulint
 rec_get_n_fields(
 /*=============*/
 	const rec_t*		rec,	/*!< in: physical record */
 	const dict_index_t*	index)	/*!< in: record descriptor */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Confirms the n_fields of the entry is sane with comparing the other
+record in the same page specified
+@param[in]	index	index
+@param[in]	rec	record of the same page
+@param[in]	entry	index entry
+@return	true if n_fields is sane */
+UNIV_INLINE
+bool
+rec_n_fields_is_sane(
+	dict_index_t*	index,
+	const rec_t*	rec,
+	const dtuple_t*	entry)
+	MY_ATTRIBUTE((warn_unused_result));
+
 /******************************************************//**
 The following function is used to get the number of records owned by the
 previous directory record.
-@return	number of owned records */
+@return number of owned records */
 UNIV_INLINE
 ulint
 rec_get_n_owned_old(
 /*================*/
 	const rec_t*	rec)	/*!< in: old-style physical record */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /******************************************************//**
 The following function is used to set the number of owned records. */
 UNIV_INLINE
@@ -187,13 +202,13 @@ rec_set_n_owned_old(
 /******************************************************//**
 The following function is used to get the number of records owned by the
 previous directory record.
-@return	number of owned records */
+@return number of owned records */
 UNIV_INLINE
 ulint
 rec_get_n_owned_new(
 /*================*/
 	const rec_t*	rec)	/*!< in: new-style physical record */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /******************************************************//**
 The following function is used to set the number of owned records. */
 UNIV_INLINE
@@ -207,14 +222,14 @@ rec_set_n_owned_new(
 /******************************************************//**
 The following function is used to retrieve the info bits of
 a record.
-@return	info bits */
+@return info bits */
 UNIV_INLINE
 ulint
 rec_get_info_bits(
 /*==============*/
 	const rec_t*	rec,	/*!< in: physical record */
 	ulint		comp)	/*!< in: nonzero=compact page format */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /******************************************************//**
 The following function is used to set the info bits of a record. */
 UNIV_INLINE
@@ -235,13 +250,13 @@ rec_set_info_bits_new(
 	MY_ATTRIBUTE((nonnull));
 /******************************************************//**
 The following function retrieves the status bits of a new-style record.
-@return	status bits */
+@return status bits */
 UNIV_INLINE
 ulint
 rec_get_status(
 /*===========*/
 	const rec_t*	rec)	/*!< in: physical record */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 
 /******************************************************//**
 The following function is used to set the status bits of a new-style record. */
@@ -256,14 +271,14 @@ rec_set_status(
 /******************************************************//**
 The following function is used to retrieve the info and status
 bits of a record.  (Only compact records have status bits.)
-@return	info bits */
+@return info bits */
 UNIV_INLINE
 ulint
 rec_get_info_and_status_bits(
 /*=========================*/
 	const rec_t*	rec,	/*!< in: physical record */
 	ulint		comp)	/*!< in: nonzero=compact page format */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /******************************************************//**
 The following function is used to set the info and status
 bits of a record.  (Only compact records have status bits.) */
@@ -277,14 +292,14 @@ rec_set_info_and_status_bits(
 
 /******************************************************//**
 The following function tells if record is delete marked.
-@return	nonzero if delete marked */
+@return nonzero if delete marked */
 UNIV_INLINE
 ulint
 rec_get_deleted_flag(
 /*=================*/
 	const rec_t*	rec,	/*!< in: physical record */
 	ulint		comp)	/*!< in: nonzero=compact page format */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /******************************************************//**
 The following function is used to set the deleted bit. */
 UNIV_INLINE
@@ -306,23 +321,23 @@ rec_set_deleted_flag_new(
 	MY_ATTRIBUTE((nonnull(1)));
 /******************************************************//**
 The following function tells if a new-style record is a node pointer.
-@return	TRUE if node pointer */
+@return TRUE if node pointer */
 UNIV_INLINE
 ibool
 rec_get_node_ptr_flag(
 /*==================*/
 	const rec_t*	rec)	/*!< in: physical record */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /******************************************************//**
 The following function is used to get the order number
 of an old-style record in the heap of the index page.
-@return	heap order number */
+@return heap order number */
 UNIV_INLINE
 ulint
 rec_get_heap_no_old(
 /*================*/
 	const rec_t*	rec)	/*!< in: physical record */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /******************************************************//**
 The following function is used to set the heap number
 field in an old-style record. */
@@ -336,13 +351,13 @@ rec_set_heap_no_old(
 /******************************************************//**
 The following function is used to get the order number
 of a new-style record in the heap of the index page.
-@return	heap order number */
+@return heap order number */
 UNIV_INLINE
 ulint
 rec_get_heap_no_new(
 /*================*/
 	const rec_t*	rec)	/*!< in: physical record */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /******************************************************//**
 The following function is used to set the heap number
 field in a new-style record. */
@@ -356,13 +371,13 @@ rec_set_heap_no_new(
 /******************************************************//**
 The following function is used to test whether the data offsets
 in the record are stored in one-byte or two-byte format.
-@return	TRUE if 1-byte form */
+@return TRUE if 1-byte form */
 UNIV_INLINE
 ibool
 rec_get_1byte_offs_flag(
 /*====================*/
 	const rec_t*	rec)	/*!< in: physical record */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 
 /******************************************************//**
 The following function is used to set the 1-byte offsets flag. */
@@ -378,14 +393,14 @@ rec_set_1byte_offs_flag(
 Returns the offset of nth field end if the record is stored in the 1-byte
 offsets form. If the field is SQL null, the flag is ORed in the returned
 value.
-@return	offset of the start of the field, SQL null flag ORed */
+@return offset of the start of the field, SQL null flag ORed */
 UNIV_INLINE
 ulint
 rec_1_get_field_end_info(
 /*=====================*/
 	const rec_t*	rec,	/*!< in: record */
 	ulint		n)	/*!< in: field index */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 
 /******************************************************//**
 Returns the offset of nth field end if the record is stored in the 2-byte
@@ -399,7 +414,7 @@ rec_2_get_field_end_info(
 /*=====================*/
 	const rec_t*	rec,	/*!< in: record */
 	ulint		n)	/*!< in: field index */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 
 /******************************************************//**
 Returns nonzero if the field is stored off-page.
@@ -411,13 +426,12 @@ rec_2_is_field_extern(
 /*==================*/
 	const rec_t*	rec,	/*!< in: record */
 	ulint		n)	/*!< in: field index */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 
 /******************************************************//**
 Determine how many of the first n columns in a compact
 physical record are stored externally.
-@return	number of externally stored columns */
-UNIV_INTERN
+@return number of externally stored columns */
 ulint
 rec_get_n_extern_new(
 /*=================*/
@@ -429,8 +443,7 @@ rec_get_n_extern_new(
 /******************************************************//**
 The following function determines the offsets to each field
 in the record.	It can reuse a previously allocated array.
-@return	the new offsets */
-UNIV_INTERN
+@return the new offsets */
 ulint*
 rec_get_offsets_func(
 /*=================*/
@@ -465,7 +478,6 @@ rec_get_offsets_func(
 /******************************************************//**
 The following function determines the offsets to each field
 in the record.  It can reuse a previously allocated array. */
-UNIV_INTERN
 void
 rec_get_offsets_reverse(
 /*====================*/
@@ -482,7 +494,7 @@ rec_get_offsets_reverse(
 #ifdef UNIV_DEBUG
 /************************************************************//**
 Validates offsets returned by rec_get_offsets().
-@return	TRUE if valid */
+@return TRUE if valid */
 UNIV_INLINE
 ibool
 rec_offs_validate(
@@ -511,8 +523,7 @@ rec_offs_make_valid(
 /************************************************************//**
 The following function is used to get the offset to the nth
 data field in an old-style record.
-@return	offset to the field */
-UNIV_INTERN
+@return offset to the field */
 ulint
 rec_get_nth_field_offs_old(
 /*=======================*/
@@ -527,18 +538,18 @@ rec_get_nth_field_offs_old(
 Gets the physical size of an old-style field.
 Also an SQL null may have a field of size > 0,
 if the data type is of a fixed size.
-@return	field size in bytes */
+@return field size in bytes */
 UNIV_INLINE
 ulint
 rec_get_nth_field_size(
 /*===================*/
 	const rec_t*	rec,	/*!< in: record */
 	ulint		n)	/*!< in: index of the field */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /************************************************************//**
 The following function is used to get an offset to the nth
 data field in a record.
-@return	offset from the origin of rec */
+@return offset from the origin of rec */
 UNIV_INLINE
 ulint
 rec_get_nth_field_offs(
@@ -553,73 +564,81 @@ rec_get_nth_field_offs(
 /******************************************************//**
 Determine if the offsets are for a record in the new
 compact format.
-@return	nonzero if compact format */
+@return nonzero if compact format */
 UNIV_INLINE
 ulint
 rec_offs_comp(
 /*==========*/
 	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /******************************************************//**
 Determine if the offsets are for a record containing
 externally stored columns.
-@return	nonzero if externally stored */
+@return nonzero if externally stored */
 UNIV_INLINE
 ulint
 rec_offs_any_extern(
 /*================*/
 	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /******************************************************//**
 Determine if the offsets are for a record containing null BLOB pointers.
-@return	first field containing a null BLOB pointer, or NULL if none found */
+@return first field containing a null BLOB pointer, or NULL if none found */
 UNIV_INLINE
 const byte*
 rec_offs_any_null_extern(
 /*=====================*/
 	const rec_t*	rec,		/*!< in: record */
 	const ulint*	offsets)	/*!< in: rec_get_offsets(rec) */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /******************************************************//**
 Returns nonzero if the extern bit is set in nth field of rec.
-@return	nonzero if externally stored */
+@return nonzero if externally stored */
 UNIV_INLINE
 ulint
 rec_offs_nth_extern(
 /*================*/
 	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
 	ulint		n)	/*!< in: nth field */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Mark the nth field as externally stored.
+@param[in]	offsets		array returned by rec_get_offsets()
+@param[in]	n		nth field */
+void
+rec_offs_make_nth_extern(
+        ulint*		offsets,
+        const ulint     n);
 /******************************************************//**
 Returns nonzero if the SQL NULL bit is set in nth field of rec.
-@return	nonzero if SQL NULL */
+@return nonzero if SQL NULL */
 UNIV_INLINE
 ulint
 rec_offs_nth_sql_null(
 /*==================*/
 	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
 	ulint		n)	/*!< in: nth field */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /******************************************************//**
 Gets the physical size of a field.
-@return	length of field */
+@return length of field */
 UNIV_INLINE
 ulint
 rec_offs_nth_size(
 /*==============*/
 	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
 	ulint		n)	/*!< in: nth field */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 
 /******************************************************//**
 Returns the number of extern bits set in a record.
-@return	number of externally stored fields */
+@return number of externally stored fields */
 UNIV_INLINE
 ulint
 rec_offs_n_extern(
 /*==============*/
 	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /***********************************************************//**
 This is used to modify the value of an already existing field in a record.
 The previous value must have exactly the same size as the new value. If len
@@ -645,23 +664,23 @@ The following function returns the data size of an old-style physical
 record, that is the sum of field lengths. SQL null fields
 are counted as length 0 fields. The value returned by the function
 is the distance from record origin to record end in bytes.
-@return	size */
+@return size */
 UNIV_INLINE
 ulint
 rec_get_data_size_old(
 /*==================*/
 	const rec_t*	rec)	/*!< in: physical record */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /**********************************************************//**
 The following function returns the number of allocated elements
 for an array of offsets.
-@return	number of elements */
+@return number of elements */
 UNIV_INLINE
 ulint
 rec_offs_get_n_alloc(
 /*=================*/
 	const ulint*	offsets)/*!< in: array for rec_get_offsets() */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /**********************************************************//**
 The following function sets the number of allocated elements
 for an array of offsets. */
@@ -677,99 +696,101 @@ rec_offs_set_n_alloc(
 	rec_offs_set_n_alloc(offsets, (sizeof offsets) / sizeof *offsets)
 /**********************************************************//**
 The following function returns the number of fields in a record.
-@return	number of fields */
+@return number of fields */
 UNIV_INLINE
 ulint
 rec_offs_n_fields(
 /*==============*/
 	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /**********************************************************//**
 The following function returns the data size of a physical
 record, that is the sum of field lengths. SQL null fields
 are counted as length 0 fields. The value returned by the function
 is the distance from record origin to record end in bytes.
-@return	size */
+@return size */
 UNIV_INLINE
 ulint
 rec_offs_data_size(
 /*===============*/
 	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /**********************************************************//**
 Returns the total size of record minus data size of record.
 The value returned by the function is the distance from record
 start to record origin in bytes.
-@return	size */
+@return size */
 UNIV_INLINE
 ulint
 rec_offs_extra_size(
 /*================*/
 	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /**********************************************************//**
 Returns the total size of a physical record.
-@return	size */
+@return size */
 UNIV_INLINE
 ulint
 rec_offs_size(
 /*==========*/
 	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 #ifdef UNIV_DEBUG
 /**********************************************************//**
 Returns a pointer to the start of the record.
-@return	pointer to start */
+@return pointer to start */
 UNIV_INLINE
 byte*
 rec_get_start(
 /*==========*/
 	const rec_t*	rec,	/*!< in: pointer to record */
 	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /**********************************************************//**
 Returns a pointer to the end of the record.
-@return	pointer to end */
+@return pointer to end */
 UNIV_INLINE
 byte*
 rec_get_end(
 /*========*/
 	const rec_t*	rec,	/*!< in: pointer to record */
 	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 #else /* UNIV_DEBUG */
 # define rec_get_start(rec, offsets) ((rec) - rec_offs_extra_size(offsets))
 # define rec_get_end(rec, offsets) ((rec) + rec_offs_data_size(offsets))
 #endif /* UNIV_DEBUG */
-/***************************************************************//**
-Copies a physical record to a buffer.
-@return	pointer to the origin of the copy */
+
+/** Copy a physical record to a buffer.
+@param[in]	buf	buffer
+@param[in]	rec	physical record
+@param[in]	offsets	array returned by rec_get_offsets()
+@return pointer to the origin of the copy */
 UNIV_INLINE
 rec_t*
 rec_copy(
-/*=====*/
-	void*		buf,	/*!< in: buffer */
-	const rec_t*	rec,	/*!< in: physical record */
-	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
-	MY_ATTRIBUTE((nonnull));
+	void*		buf,
+	const rec_t*	rec,
+	const ulint*	offsets);
+
 #ifndef UNIV_HOTBACKUP
 /**********************************************************//**
 Determines the size of a data tuple prefix in a temporary file.
-@return	total size */
-UNIV_INTERN
+@return total size */
 ulint
 rec_get_converted_size_temp(
 /*========================*/
 	const dict_index_t*	index,	/*!< in: record descriptor */
 	const dfield_t*		fields,	/*!< in: array of data fields */
 	ulint			n_fields,/*!< in: number of data fields */
+	const dtuple_t*		v_entry,/*!< in: dtuple contains virtual column
+					data */
 	ulint*			extra)	/*!< out: extra size */
-	MY_ATTRIBUTE((warn_unused_result, nonnull));
+	MY_ATTRIBUTE((warn_unused_result));
 
 /******************************************************//**
 Determine the offset to each field in temporary file.
 @see rec_convert_dtuple_to_temp() */
-UNIV_INTERN
 void
 rec_init_offsets_temp(
 /*==================*/
@@ -782,21 +803,21 @@ rec_init_offsets_temp(
 /*********************************************************//**
 Builds a temporary file record out of a data tuple.
 @see rec_init_offsets_temp() */
-UNIV_INTERN
 void
 rec_convert_dtuple_to_temp(
 /*=======================*/
 	rec_t*			rec,		/*!< out: record */
 	const dict_index_t*	index,		/*!< in: record descriptor */
 	const dfield_t*		fields,		/*!< in: array of data fields */
-	ulint			n_fields)	/*!< in: number of fields */
-	MY_ATTRIBUTE((nonnull));
+	ulint			n_fields,	/*!< in: number of fields */
+	const dtuple_t*		v_entry);	/*!< in: dtuple contains
+						virtual column data */
+
 
 /**************************************************************//**
 Copies the first n fields of a physical record to a new physical record in
 a buffer.
-@return	own: copied record */
-UNIV_INTERN
+@return own: copied record */
 rec_t*
 rec_copy_prefix_to_buf(
 /*===================*/
@@ -809,28 +830,27 @@ rec_copy_prefix_to_buf(
 						or NULL */
 	ulint*			buf_size)	/*!< in/out: buffer size */
 	MY_ATTRIBUTE((nonnull));
-/************************************************************//**
-Folds a prefix of a physical record to a ulint.
-@return	the folded value */
+/** Fold a prefix of a physical record.
+@param[in]	rec		index record
+@param[in]	offsets		return value of rec_get_offsets()
+@param[in]	n_fields	number of complete fields to fold
+@param[in]	n_bytes		number of bytes to fold in the last field
+@param[in]	index_id	index tree ID
+@return the folded value */
 UNIV_INLINE
 ulint
 rec_fold(
-/*=====*/
-	const rec_t*	rec,		/*!< in: the physical record */
-	const ulint*	offsets,	/*!< in: array returned by
-					rec_get_offsets() */
-	ulint		n_fields,	/*!< in: number of complete
-					fields to fold */
-	ulint		n_bytes,	/*!< in: number of bytes to fold
-					in an incomplete last field */
-	index_id_t	tree_id)	/*!< in: index tree id */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	const rec_t*	rec,
+	const ulint*	offsets,
+	ulint		n_fields,
+	ulint		n_bytes,
+	index_id_t	tree_id)
+	MY_ATTRIBUTE((warn_unused_result));
 #endif /* !UNIV_HOTBACKUP */
 /*********************************************************//**
 Builds a physical record out of a data tuple and
 stores it into the given buffer.
-@return	pointer to the origin of physical record */
-UNIV_INTERN
+@return pointer to the origin of physical record */
 rec_t*
 rec_convert_dtuple_to_rec(
 /*======================*/
@@ -840,11 +860,11 @@ rec_convert_dtuple_to_rec(
 	const dtuple_t*		dtuple,	/*!< in: data tuple */
 	ulint			n_ext)	/*!< in: number of
 					externally stored columns */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /**********************************************************//**
 Returns the extra size of an old-style physical record if we know its
 data size and number of fields.
-@return	extra size */
+@return extra size */
 UNIV_INLINE
 ulint
 rec_get_converted_extra_size(
@@ -855,8 +875,7 @@ rec_get_converted_extra_size(
 	MY_ATTRIBUTE((const));
 /**********************************************************//**
 Determines the size of a data tuple prefix in ROW_FORMAT=COMPACT.
-@return	total size */
-UNIV_INTERN
+@return total size */
 ulint
 rec_get_converted_size_comp_prefix(
 /*===============================*/
@@ -867,8 +886,7 @@ rec_get_converted_size_comp_prefix(
 	MY_ATTRIBUTE((warn_unused_result, nonnull(1,2)));
 /**********************************************************//**
 Determines the size of a data tuple in ROW_FORMAT=COMPACT.
-@return	total size */
-UNIV_INTERN
+@return total size */
 ulint
 rec_get_converted_size_comp(
 /*========================*/
@@ -884,7 +902,7 @@ rec_get_converted_size_comp(
 /**********************************************************//**
 The following function returns the size of a data tuple when converted to
 a physical record.
-@return	size */
+@return size */
 UNIV_INLINE
 ulint
 rec_get_converted_size(
@@ -897,7 +915,6 @@ rec_get_converted_size(
 /**************************************************************//**
 Copies the first n fields of a physical record to a data tuple.
 The fields are copied to the memory heap. */
-UNIV_INTERN
 void
 rec_copy_prefix_to_dtuple(
 /*======================*/
@@ -911,8 +928,7 @@ rec_copy_prefix_to_dtuple(
 #endif /* !UNIV_HOTBACKUP */
 /***************************************************************//**
 Validates the consistency of a physical record.
-@return	TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
 ibool
 rec_validate(
 /*=========*/
@@ -921,7 +937,6 @@ rec_validate(
 	MY_ATTRIBUTE((nonnull));
 /***************************************************************//**
 Prints an old-style physical record. */
-UNIV_INTERN
 void
 rec_print_old(
 /*==========*/
@@ -932,7 +947,6 @@ rec_print_old(
 /***************************************************************//**
 Prints a physical record in ROW_FORMAT=COMPACT.  Ignores the
 record header. */
-UNIV_INTERN
 void
 rec_print_comp(
 /*===========*/
@@ -941,8 +955,16 @@ rec_print_comp(
 	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
 	MY_ATTRIBUTE((nonnull));
 /***************************************************************//**
+Prints a spatial index record. */
+void
+rec_print_mbr_rec(
+/*==========*/
+	FILE*		file,	/*!< in: file where to print */
+	const rec_t*	rec,	/*!< in: physical record */
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
+	MY_ATTRIBUTE((nonnull));
+/***************************************************************//**
 Prints a physical record. */
-UNIV_INTERN
 void
 rec_print_new(
 /*==========*/
@@ -952,7 +974,6 @@ rec_print_new(
 	MY_ATTRIBUTE((nonnull));
 /***************************************************************//**
 Prints a physical record. */
-UNIV_INTERN
 void
 rec_print(
 /*======*/
@@ -961,11 +982,121 @@ rec_print(
 	const dict_index_t*	index)	/*!< in: record descriptor */
 	MY_ATTRIBUTE((nonnull));
 
+/** Pretty-print a record.
+@param[in,out]	o	output stream
+@param[in]	rec	physical record
+@param[in]	info	rec_get_info_bits(rec)
+@param[in]	offsets	rec_get_offsets(rec) */
+void
+rec_print(
+	std::ostream&	o,
+	const rec_t*	rec,
+	ulint		info,
+	const ulint*	offsets);
+
+/** Wrapper for pretty-printing a record */
+struct rec_index_print
+{
+	/** Constructor */
+	rec_index_print(const rec_t* rec, const dict_index_t* index) :
+		m_rec(rec), m_index(index)
+	{}
+
+	/** Record */
+	const rec_t*		m_rec;
+	/** Index */
+	const dict_index_t*	m_index;
+};
+
+/** Display a record.
+@param[in,out]	o	output stream
+@param[in]	r	record to display
+@return	the output stream */
+std::ostream&
+operator<<(std::ostream& o, const rec_index_print& r);
+
+/** Wrapper for pretty-printing a record */
+struct rec_offsets_print
+{
+	/** Constructor */
+	rec_offsets_print(const rec_t* rec, const ulint* offsets) :
+		m_rec(rec), m_offsets(offsets)
+	{}
+
+	/** Record */
+	const rec_t*		m_rec;
+	/** Offsets to each field */
+	const ulint*		m_offsets;
+};
+
+/** Display a record.
+@param[in,out]	o	output stream
+@param[in]	r	record to display
+@return	the output stream */
+std::ostream&
+operator<<(std::ostream& o, const rec_offsets_print& r);
+
+# ifndef DBUG_OFF
+/** Pretty-printer of records and tuples */
+class rec_printer : public std::ostringstream {
+public:
+	/** Construct a pretty-printed record.
+	@param rec	record with header
+	@param offsets	rec_get_offsets(rec, ...) */
+	rec_printer(const rec_t* rec, const ulint* offsets)
+		:
+		std::ostringstream ()
+	{
+		rec_print(*this, rec,
+			  rec_get_info_bits(rec, rec_offs_comp(offsets)),
+			  offsets);
+	}
+
+	/** Construct a pretty-printed record.
+	@param rec record, possibly lacking header
+	@param info rec_get_info_bits(rec)
+	@param offsets rec_get_offsets(rec, ...) */
+	rec_printer(const rec_t* rec, ulint info, const ulint* offsets)
+		:
+		std::ostringstream ()
+	{
+		rec_print(*this, rec, info, offsets);
+	}
+
+	/** Construct a pretty-printed tuple.
+	@param tuple	data tuple */
+	rec_printer(const dtuple_t* tuple)
+		:
+		std::ostringstream ()
+	{
+		dtuple_print(*this, tuple);
+	}
+
+	/** Construct a pretty-printed tuple.
+	@param field	array of data tuple fields
+	@param n	number of fields */
+	rec_printer(const dfield_t* field, ulint n)
+		:
+		std::ostringstream ()
+	{
+		dfield_print(*this, field, n);
+	}
+
+	/** Destructor */
+	virtual ~rec_printer() {}
+
+private:
+	/** Copy constructor */
+	rec_printer(const rec_printer& other);
+	/** Assignment operator */
+	rec_printer& operator=(const rec_printer& other);
+};
+# endif /* !DBUG_OFF */
+
 # ifdef UNIV_DEBUG
 /************************************************************//**
 Reads the DB_TRX_ID of a clustered index record.
-@return	the value of DB_TRX_ID */
-UNIV_INTERN
+@return the value of DB_TRX_ID */
 trx_id_t
 rec_get_trx_id(
 /*===========*/
@@ -982,7 +1113,7 @@ are given in one byte (resp. two byte) format. */
 
 /* The data size of record must be smaller than this because we reserve
 two upmost bits in a two byte offset for special purposes */
-#define REC_MAX_DATA_SIZE	(16384)
+#define REC_MAX_DATA_SIZE	16384
 
 #ifdef WITH_WSREP
 int wsrep_rec_get_foreign_key(
@@ -993,9 +1124,9 @@ int wsrep_rec_get_foreign_key(
 	dict_index_t*	index_ref,  /* in: index for referenced table */
 	ibool		new_protocol); /* in: protocol > 1 */
 #endif /* WITH_WSREP */
+
 #ifndef UNIV_NONINL
 #include "rem0rec.ic"
 #endif
 
-#endif /* !UNIV_INNOCHECKSUM */
-#endif
+#endif /* rem0rec_h */
diff --git a/storage/innobase/include/rem0rec.ic b/storage/innobase/include/rem0rec.ic
index 5811a77a48b..b855a39da9e 100644
--- a/storage/innobase/include/rem0rec.ic
+++ b/storage/innobase/include/rem0rec.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -26,6 +26,7 @@ Created 5/30/1994 Heikki Tuuri
 #include "mach0data.h"
 #include "ut0byte.h"
 #include "dict0dict.h"
+#include "dict0boot.h"
 #include "btr0types.h"
 
 /* Compact flag ORed to the extra size returned by rec_get_offsets() */
@@ -136,7 +137,6 @@ and the shift needed to obtain each bit-field of the record. */
 
 /***********************************************************//**
 Sets the value of the ith field SQL null bit of an old-style record. */
-UNIV_INTERN
 void
 rec_set_nth_field_null_bit(
 /*=======================*/
@@ -146,7 +146,6 @@ rec_set_nth_field_null_bit(
 /***********************************************************//**
 Sets an old-style record field to SQL null.
 The physical size of the field is not changed. */
-UNIV_INTERN
 void
 rec_set_nth_field_sql_null(
 /*=======================*/
@@ -238,7 +237,7 @@ rec_set_bit_field_2(
 /******************************************************//**
 The following function is used to get the pointer of the next chained record
 on the same page.
-@return	pointer to the next chained record, or NULL if none */
+@return pointer to the next chained record, or NULL if none */
 UNIV_INLINE
 const rec_t*
 rec_get_next_ptr_const(
@@ -294,7 +293,7 @@ rec_get_next_ptr_const(
 /******************************************************//**
 The following function is used to get the pointer of the next chained record
 on the same page.
-@return	pointer to the next chained record, or NULL if none */
+@return pointer to the next chained record, or NULL if none */
 UNIV_INLINE
 rec_t*
 rec_get_next_ptr(
@@ -308,7 +307,7 @@ rec_get_next_ptr(
 /******************************************************//**
 The following function is used to get the offset of the next chained record
 on the same page.
-@return	the page offset of the next chained record, or 0 if none */
+@return the page offset of the next chained record, or 0 if none */
 UNIV_INLINE
 ulint
 rec_get_next_offs(
@@ -418,7 +417,7 @@ rec_set_next_offs_new(
 /******************************************************//**
 The following function is used to get the number of fields
 in an old-style record.
-@return	number of data fields */
+@return number of data fields */
 UNIV_INLINE
 ulint
 rec_get_n_fields_old(
@@ -458,7 +457,7 @@ rec_set_n_fields_old(
 
 /******************************************************//**
 The following function retrieves the status bits of a new-style record.
-@return	status bits */
+@return status bits */
 UNIV_INLINE
 ulint
 rec_get_status(
@@ -479,7 +478,7 @@ rec_get_status(
 /******************************************************//**
 The following function is used to get the number of fields
 in a record.
-@return	number of data fields */
+@return number of data fields */
 UNIV_INLINE
 ulint
 rec_get_n_fields(
@@ -508,10 +507,32 @@ rec_get_n_fields(
 	}
 }
 
+/** Confirms the n_fields of the entry is sane with comparing the other
+record in the same page specified
+@param[in]	index	index
+@param[in]	rec	record of the same page
+@param[in]	entry	index entry
+@return	true if n_fields is sane */
+UNIV_INLINE
+bool
+rec_n_fields_is_sane(
+	dict_index_t*	index,
+	const rec_t*	rec,
+	const dtuple_t*	entry)
+{
+	return(rec_get_n_fields(rec, index)
+	       == dtuple_get_n_fields(entry)
+	       /* a record for older SYS_INDEXES table
+	       (missing merge_threshold column) is acceptable. */
+	       || (index->table->id == DICT_INDEXES_ID
+		   && rec_get_n_fields(rec, index)
+		      == dtuple_get_n_fields(entry) - 1));
+}
+
 /******************************************************//**
 The following function is used to get the number of records owned by the
 previous directory record.
-@return	number of owned records */
+@return number of owned records */
 UNIV_INLINE
 ulint
 rec_get_n_owned_old(
@@ -538,7 +559,7 @@ rec_set_n_owned_old(
 /******************************************************//**
 The following function is used to get the number of records owned by the
 previous directory record.
-@return	number of owned records */
+@return number of owned records */
 UNIV_INLINE
 ulint
 rec_get_n_owned_new(
@@ -566,9 +587,22 @@ rec_set_n_owned_new(
 	}
 }
 
+#ifdef UNIV_DEBUG
+/** Check if the info bits are valid.
+@param[in]	bits	info bits to check
+@return true if valid */
+inline
+bool
+rec_info_bits_valid(
+	ulint	bits)
+{
+	return(0 == (bits & ~(REC_INFO_DELETED_FLAG | REC_INFO_MIN_REC_FLAG)));
+}
+#endif /* UNIV_DEBUG */
+
 /******************************************************//**
 The following function is used to retrieve the info bits of a record.
-@return	info bits */
+@return info bits */
 UNIV_INLINE
 ulint
 rec_get_info_bits(
@@ -576,9 +610,11 @@ rec_get_info_bits(
 	const rec_t*	rec,	/*!< in: physical record */
 	ulint		comp)	/*!< in: nonzero=compact page format */
 {
-	return(rec_get_bit_field_1(
-		       rec, comp ? REC_NEW_INFO_BITS : REC_OLD_INFO_BITS,
-		       REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT));
+	const ulint	val = rec_get_bit_field_1(
+		rec, comp ? REC_NEW_INFO_BITS : REC_OLD_INFO_BITS,
+		REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT);
+	ut_ad(rec_info_bits_valid(val));
+	return(val);
 }
 
 /******************************************************//**
@@ -590,6 +626,7 @@ rec_set_info_bits_old(
 	rec_t*	rec,	/*!< in: old-style physical record */
 	ulint	bits)	/*!< in: info bits */
 {
+	ut_ad(rec_info_bits_valid(bits));
 	rec_set_bit_field_1(rec, bits, REC_OLD_INFO_BITS,
 			    REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT);
 }
@@ -602,6 +639,7 @@ rec_set_info_bits_new(
 	rec_t*	rec,	/*!< in/out: new-style physical record */
 	ulint	bits)	/*!< in: info bits */
 {
+	ut_ad(rec_info_bits_valid(bits));
 	rec_set_bit_field_1(rec, bits, REC_NEW_INFO_BITS,
 			    REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT);
 }
@@ -622,7 +660,7 @@ rec_set_status(
 /******************************************************//**
 The following function is used to retrieve the info and status
 bits of a record.  (Only compact records have status bits.)
-@return	info bits */
+@return info bits */
 UNIV_INLINE
 ulint
 rec_get_info_and_status_bits(
@@ -663,7 +701,7 @@ rec_set_info_and_status_bits(
 
 /******************************************************//**
 The following function tells if record is delete marked.
-@return	nonzero if delete marked */
+@return nonzero if delete marked */
 UNIV_INLINE
 ulint
 rec_get_deleted_flag(
@@ -733,7 +771,7 @@ rec_set_deleted_flag_new(
 
 /******************************************************//**
 The following function tells if a new-style record is a node pointer.
-@return	TRUE if node pointer */
+@return TRUE if node pointer */
 UNIV_INLINE
 ibool
 rec_get_node_ptr_flag(
@@ -746,7 +784,7 @@ rec_get_node_ptr_flag(
 /******************************************************//**
 The following function is used to get the order number
 of an old-style record in the heap of the index page.
-@return	heap order number */
+@return heap order number */
 UNIV_INLINE
 ulint
 rec_get_heap_no_old(
@@ -774,7 +812,7 @@ rec_set_heap_no_old(
 /******************************************************//**
 The following function is used to get the order number
 of a new-style record in the heap of the index page.
-@return	heap order number */
+@return heap order number */
 UNIV_INLINE
 ulint
 rec_get_heap_no_new(
@@ -802,7 +840,7 @@ rec_set_heap_no_new(
 /******************************************************//**
 The following function is used to test whether the data offsets in the record
 are stored in one-byte or two-byte format.
-@return	TRUE if 1-byte form */
+@return TRUE if 1-byte form */
 UNIV_INLINE
 ibool
 rec_get_1byte_offs_flag(
@@ -839,7 +877,7 @@ rec_set_1byte_offs_flag(
 Returns the offset of nth field end if the record is stored in the 1-byte
 offsets form. If the field is SQL null, the flag is ORed in the returned
 value.
-@return	offset of the start of the field, SQL null flag ORed */
+@return offset of the start of the field, SQL null flag ORed */
 UNIV_INLINE
 ulint
 rec_1_get_field_end_info(
@@ -894,7 +932,7 @@ the fields. */
 /**********************************************************//**
 The following function returns the number of allocated elements
 for an array of offsets.
-@return	number of elements */
+@return number of elements */
 UNIV_INLINE
 ulint
 rec_offs_get_n_alloc(
@@ -928,7 +966,7 @@ rec_offs_set_n_alloc(
 
 /**********************************************************//**
 The following function returns the number of fields in a record.
-@return	number of fields */
+@return number of fields */
 UNIV_INLINE
 ulint
 rec_offs_n_fields(
@@ -947,7 +985,7 @@ rec_offs_n_fields(
 
 /************************************************************//**
 Validates offsets returned by rec_get_offsets().
-@return	TRUE if valid */
+@return TRUE if valid */
 UNIV_INLINE
 ibool
 rec_offs_validate(
@@ -1025,7 +1063,7 @@ rec_offs_make_valid(
 /************************************************************//**
 The following function is used to get an offset to the nth
 data field in a record.
-@return	offset from the origin of rec */
+@return offset from the origin of rec */
 UNIV_INLINE
 ulint
 rec_get_nth_field_offs(
@@ -1062,7 +1100,7 @@ rec_get_nth_field_offs(
 /******************************************************//**
 Determine if the offsets are for a record in the new
 compact format.
-@return	nonzero if compact format */
+@return nonzero if compact format */
 UNIV_INLINE
 ulint
 rec_offs_comp(
@@ -1076,7 +1114,7 @@ rec_offs_comp(
 /******************************************************//**
 Determine if the offsets are for a record containing
 externally stored columns.
-@return	nonzero if externally stored */
+@return nonzero if externally stored */
 UNIV_INLINE
 ulint
 rec_offs_any_extern(
@@ -1089,7 +1127,7 @@ rec_offs_any_extern(
 
 /******************************************************//**
 Determine if the offsets are for a record containing null BLOB pointers.
-@return	first field containing a null BLOB pointer, or NULL if none found */
+@return first field containing a null BLOB pointer, or NULL if none found */
 UNIV_INLINE
 const byte*
 rec_offs_any_null_extern(
@@ -1125,7 +1163,7 @@ rec_offs_any_null_extern(
 
 /******************************************************//**
 Returns nonzero if the extern bit is set in nth field of rec.
-@return	nonzero if externally stored */
+@return nonzero if externally stored */
 UNIV_INLINE
 ulint
 rec_offs_nth_extern(
@@ -1140,7 +1178,7 @@ rec_offs_nth_extern(
 
 /******************************************************//**
 Returns nonzero if the SQL NULL bit is set in nth field of rec.
-@return	nonzero if SQL NULL */
+@return nonzero if SQL NULL */
 UNIV_INLINE
 ulint
 rec_offs_nth_sql_null(
@@ -1155,7 +1193,7 @@ rec_offs_nth_sql_null(
 
 /******************************************************//**
 Gets the physical size of a field.
-@return	length of field */
+@return length of field */
 UNIV_INLINE
 ulint
 rec_offs_nth_size(
@@ -1174,7 +1212,7 @@ rec_offs_nth_size(
 
 /******************************************************//**
 Returns the number of extern bits set in a record.
-@return	number of externally stored fields */
+@return number of externally stored fields */
 UNIV_INLINE
 ulint
 rec_offs_n_extern(
@@ -1202,7 +1240,7 @@ offsets form. If the field is SQL null, the flag is ORed in the returned
 value. This function and the 2-byte counterpart are defined here because the
 C-compiler was not able to sum negative and positive constant offsets, and
 warned of constant arithmetic overflow within the compiler.
-@return	offset of the start of the PREVIOUS field, SQL null flag ORed */
+@return offset of the start of the PREVIOUS field, SQL null flag ORed */
 UNIV_INLINE
 ulint
 rec_1_get_prev_field_end_info(
@@ -1220,7 +1258,7 @@ rec_1_get_prev_field_end_info(
 Returns the offset of n - 1th field end if the record is stored in the 2-byte
 offsets form. If the field is SQL null, the flag is ORed in the returned
 value.
-@return	offset of the start of the PREVIOUS field, SQL null flag ORed */
+@return offset of the start of the PREVIOUS field, SQL null flag ORed */
 UNIV_INLINE
 ulint
 rec_2_get_prev_field_end_info(
@@ -1271,7 +1309,7 @@ rec_2_set_field_end_info(
 /******************************************************//**
 Returns the offset of nth field start if the record is stored in the 1-byte
 offsets form.
-@return	offset of the start of the field */
+@return offset of the start of the field */
 UNIV_INLINE
 ulint
 rec_1_get_field_start_offs(
@@ -1294,7 +1332,7 @@ rec_1_get_field_start_offs(
 /******************************************************//**
 Returns the offset of nth field start if the record is stored in the 2-byte
 offsets form.
-@return	offset of the start of the field */
+@return offset of the start of the field */
 UNIV_INLINE
 ulint
 rec_2_get_field_start_offs(
@@ -1319,7 +1357,7 @@ The following function is used to read the offset of the start of a data field
 in the record. The start of an SQL null field is the end offset of the
 previous non-null field, or 0, if none exists. If n is the number of the last
 field + 1, then the end offset of the last field is returned.
-@return	offset of the start of the field */
+@return offset of the start of the field */
 UNIV_INLINE
 ulint
 rec_get_field_start_offs(
@@ -1347,7 +1385,7 @@ rec_get_field_start_offs(
 Gets the physical size of an old-style field.
 Also an SQL null may have a field of size > 0,
 if the data type is of a fixed size.
-@return	field size in bytes */
+@return field size in bytes */
 UNIV_INLINE
 ulint
 rec_get_nth_field_size(
@@ -1415,7 +1453,7 @@ The following function returns the data size of an old-style physical
 record, that is the sum of field lengths. SQL null fields
 are counted as length 0 fields. The value returned by the function
 is the distance from record origin to record end in bytes.
-@return	size */
+@return size */
 UNIV_INLINE
 ulint
 rec_get_data_size_old(
@@ -1450,7 +1488,7 @@ The following function returns the data size of a physical
 record, that is the sum of field lengths. SQL null fields
 are counted as length 0 fields. The value returned by the function
 is the distance from record origin to record end in bytes.
-@return	size */
+@return size */
 UNIV_INLINE
 ulint
 rec_offs_data_size(
@@ -1470,7 +1508,7 @@ rec_offs_data_size(
 Returns the total size of record minus data size of record. The value
 returned by the function is the distance from record start to record origin
 in bytes.
-@return	size */
+@return size */
 UNIV_INLINE
 ulint
 rec_offs_extra_size(
@@ -1486,7 +1524,7 @@ rec_offs_extra_size(
 
 /**********************************************************//**
 Returns the total size of a physical record.
-@return	size */
+@return size */
 UNIV_INLINE
 ulint
 rec_offs_size(
@@ -1499,7 +1537,7 @@ rec_offs_size(
 #ifdef UNIV_DEBUG
 /**********************************************************//**
 Returns a pointer to the end of the record.
-@return	pointer to end */
+@return pointer to end */
 UNIV_INLINE
 byte*
 rec_get_end(
@@ -1513,7 +1551,7 @@ rec_get_end(
 
 /**********************************************************//**
 Returns a pointer to the start of the record.
-@return	pointer to start */
+@return pointer to start */
 UNIV_INLINE
 byte*
 rec_get_start(
@@ -1526,16 +1564,17 @@ rec_get_start(
 }
 #endif /* UNIV_DEBUG */
 
-/***************************************************************//**
-Copies a physical record to a buffer.
-@return	pointer to the origin of the copy */
+/** Copy a physical record to a buffer.
+@param[in]	buf	buffer
+@param[in]	rec	physical record
+@param[in]	offsets	array returned by rec_get_offsets()
+@return pointer to the origin of the copy */
 UNIV_INLINE
 rec_t*
 rec_copy(
-/*=====*/
-	void*		buf,	/*!< in: buffer */
-	const rec_t*	rec,	/*!< in: physical record */
-	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
+	void*		buf,
+	const rec_t*	rec,
+	const ulint*	offsets)
 {
 	ulint	extra_len;
 	ulint	data_len;
@@ -1556,7 +1595,7 @@ rec_copy(
 /**********************************************************//**
 Returns the extra size of an old-style physical record if we know its
 data size and number of fields.
-@return	extra size */
+@return extra size */
 UNIV_INLINE
 ulint
 rec_get_converted_extra_size(
@@ -1576,7 +1615,7 @@ rec_get_converted_extra_size(
 /**********************************************************//**
 The following function returns the size of a data tuple when converted to
 a physical record.
-@return	size */
+@return size */
 UNIV_INLINE
 ulint
 rec_get_converted_size(
@@ -1592,12 +1631,19 @@ rec_get_converted_size(
 	ut_ad(dtuple);
 	ut_ad(dtuple_check_typed(dtuple));
 
-	ut_ad(dict_index_is_univ(index)
+	ut_ad(dict_index_is_ibuf(index)
+
 	      || dtuple_get_n_fields(dtuple)
-	      == (((dtuple_get_info_bits(dtuple) & REC_NEW_STATUS_MASK)
-		   == REC_STATUS_NODE_PTR)
-		  ? dict_index_get_n_unique_in_tree(index) + 1
-		  : dict_index_get_n_fields(index)));
+		 == (((dtuple_get_info_bits(dtuple) & REC_NEW_STATUS_MASK)
+		      == REC_STATUS_NODE_PTR)
+		     ? dict_index_get_n_unique_in_tree_nonleaf(index) + 1
+		     : dict_index_get_n_fields(index))
+
+	      /* a record for older SYS_INDEXES table
+	      (missing merge_threshold column) is acceptable. */
+	      || (index->table->id == DICT_INDEXES_ID
+		  && dtuple_get_n_fields(dtuple)
+		     == dict_index_get_n_fields(index) - 1));
 
 	if (dict_table_is_comp(index->table)) {
 		return(rec_get_converted_size_comp(index,
@@ -1622,7 +1668,7 @@ rec_get_converted_size(
 	support multiple page sizes.  At that time, we will need
 	to consider the node pointer on these universal btrees. */
 
-	if (dict_index_is_univ(index)) {
+	if (dict_index_is_ibuf(index)) {
 		/* This is for the insert buffer B-tree.
 		All fields in the leaf tuple ascend to the
 		parent node plus the child page pointer. */
@@ -1651,22 +1697,21 @@ rec_get_converted_size(
 }
 
 #ifndef UNIV_HOTBACKUP
-/************************************************************//**
-Folds a prefix of a physical record to a ulint. Folds only existing fields,
-that is, checks that we do not run out of the record.
-@return	the folded value */
+/** Fold a prefix of a physical record.
+@param[in]	rec		index record
+@param[in]	offsets		return value of rec_get_offsets()
+@param[in]	n_fields	number of complete fields to fold
+@param[in]	n_bytes		number of bytes to fold in the last field
+@param[in]	index_id	index tree ID
+@return the folded value */
 UNIV_INLINE
 ulint
 rec_fold(
-/*=====*/
-	const rec_t*	rec,		/*!< in: the physical record */
-	const ulint*	offsets,	/*!< in: array returned by
-					rec_get_offsets() */
-	ulint		n_fields,	/*!< in: number of complete
-					fields to fold */
-	ulint		n_bytes,	/*!< in: number of bytes to fold
-					in an incomplete last field */
-	index_id_t	tree_id)	/*!< in: index tree id */
+	const rec_t*	rec,
+	const ulint*	offsets,
+	ulint		n_fields,
+	ulint		n_bytes,
+	index_id_t	tree_id)
 {
 	ulint		i;
 	const byte*	data;
@@ -1676,7 +1721,7 @@ rec_fold(
 
 	ut_ad(rec_offs_validate(rec, NULL, offsets));
 	ut_ad(rec_validate(rec, offsets));
-	ut_ad(n_fields + n_bytes > 0);
+	ut_ad(n_fields > 0 || n_bytes > 0);
 
 	n_fields_rec = rec_offs_n_fields(offsets);
 	ut_ad(n_fields <= n_fields_rec);
diff --git a/storage/innobase/include/row0ext.h b/storage/innobase/include/row0ext.h
index a098e2f9b29..1d788a4c217 100644
--- a/storage/innobase/include/row0ext.h
+++ b/storage/innobase/include/row0ext.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2006, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2006, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -31,11 +31,11 @@ Created September 2006 Marko Makela
 #include "data0types.h"
 #include "mem0mem.h"
 #include "dict0types.h"
+#include "page0size.h"
 
 /********************************************************************//**
 Creates a cache of column prefixes of externally stored columns.
-@return	own: column prefix cache */
-UNIV_INTERN
+@return own: column prefix cache */
 row_ext_t*
 row_ext_create(
 /*===========*/
@@ -92,6 +92,9 @@ struct row_ext_t{
 				REC_ANTELOPE_MAX_INDEX_COL_LEN or
 				REC_VERSION_56_MAX_INDEX_COL_LEN depending
 				on row format */
+	page_size_t	page_size;
+				/*!< page size of the externally stored
+				columns */
 	ulint		len[1];	/*!< prefix lengths; 0 if not cached */
 };
 
diff --git a/storage/innobase/include/row0ftsort.h b/storage/innobase/include/row0ftsort.h
index 00bd3317de3..7e39fe33d9f 100644
--- a/storage/innobase/include/row0ftsort.h
+++ b/storage/innobase/include/row0ftsort.h
@@ -35,6 +35,8 @@ Created 10/13/2010 Jimmy Yang
 #include "fts0types.h"
 #include "fts0priv.h"
 #include "row0merge.h"
+#include "btr0bulk.h"
+#include "os0thread.h"
 
 /** This structure defineds information the scan thread will fetch
 and put to the linked list for parallel tokenization/sort threads
@@ -53,7 +55,6 @@ struct fts_doc_item {
 tokenization threads and sort threads. */
 typedef UT_LIST_BASE_NODE_T(fts_doc_item_t)     fts_doc_list_t;
 
-#define FTS_NUM_AUX_INDEX	6
 #define FTS_PLL_MERGE		1
 
 /** Sort information passed to each individual parallel sort thread */
@@ -99,6 +100,16 @@ struct fts_psort_t {
 	ib_mutex_t		mutex;		/*!< mutex for fts_doc_list */
 };
 
+/** Row fts token for plugin parser */
+struct row_fts_token_t {
+	fts_string_t*	text;		/*!< token */
+	ulint		position;	/*!< token position in the document */
+	UT_LIST_NODE_T(row_fts_token_t)
+			token_list;	/*!< next token link */
+};
+
+typedef UT_LIST_BASE_NODE_T(row_fts_token_t)     fts_token_list_t;
+
 /** Structure stores information from string tokenization operation */
 struct fts_tokenize_ctx {
 	ulint			processed_len;  /*!< processed string length */
@@ -112,6 +123,7 @@ struct fts_tokenize_ctx {
 	ib_rbt_t*		cached_stopword;/*!< in: stopword list */
 	dfield_t		sort_field[FTS_NUM_FIELDS_SORT];
 						/*!< in: sort field */
+	fts_token_list_t	fts_token_list;
 };
 
 typedef struct fts_tokenize_ctx fts_tokenize_ctx_t;
@@ -119,13 +131,16 @@ typedef struct fts_tokenize_ctx fts_tokenize_ctx_t;
 /** Structure stores information needed for the insertion phase of FTS
 parallel sort. */
 struct fts_psort_insert {
-	trx_t*		trx;		/*!< Transaction used for insertion */
-	que_t**		ins_graph;	/*!< insert graph */
-	fts_table_t	fts_table;	/*!< auxiliary table */
 	CHARSET_INFO*	charset;	/*!< charset info */
 	mem_heap_t*	heap;		/*!< heap */
 	ibool		opt_doc_id_size;/*!< Whether to use smaller (4 bytes)
 					integer for Doc ID */
+	BtrBulk*	btr_bulk;	/*!< Bulk load instance */
+	dtuple_t*	tuple;		/*!< Tuple to insert */
+
+#ifdef UNIV_DEBUG
+	ulint		aux_index_id;	/*!< Auxiliary index id */
+#endif
 };
 
 typedef struct fts_psort_insert	fts_psort_insert_t;
@@ -159,7 +174,6 @@ tokenized doc string. The index has three "fields":
 3) Word's position in original 'doc'.
 
 @return dict_index_t structure for the fts sort index */
-UNIV_INTERN
 dict_index_t*
 row_merge_create_fts_sort_index(
 /*============================*/
@@ -176,7 +190,6 @@ row_merge_create_fts_sort_index(
 /********************************************************************//**
 Initialize FTS parallel sort structures.
 @return TRUE if all successful */
-UNIV_INTERN
 ibool
 row_fts_psort_info_init(
 /*====================*/
@@ -197,7 +210,6 @@ row_fts_psort_info_init(
 /********************************************************************//**
 Clean up and deallocate FTS parallel sort structures, and close
 temparary merge sort files */
-UNIV_INTERN
 void
 row_fts_psort_info_destroy(
 /*=======================*/
@@ -205,7 +217,6 @@ row_fts_psort_info_destroy(
 	fts_psort_t*	merge_info);	/*!< parallel merge info */
 /********************************************************************//**
 Free up merge buffers when merge sort is done */
-UNIV_INTERN
 void
 row_fts_free_pll_merge_buf(
 /*=======================*/
@@ -214,14 +225,12 @@ row_fts_free_pll_merge_buf(
 /*********************************************************************//**
 Function performs parallel tokenization of the incoming doc strings.
 @return OS_THREAD_DUMMY_RETURN */
-UNIV_INTERN
 os_thread_ret_t
 fts_parallel_tokenization(
 /*======================*/
 	void*		arg);		/*!< in: psort_info for the thread */
 /*********************************************************************//**
 Start the parallel tokenization and parallel merge sort */
-UNIV_INTERN
 void
 row_fts_start_psort(
 /*================*/
@@ -229,14 +238,12 @@ row_fts_start_psort(
 /*********************************************************************//**
 Function performs the merge and insertion of the sorted records.
 @return OS_THREAD_DUMMY_RETURN */
-UNIV_INTERN
 os_thread_ret_t
 fts_parallel_merge(
 /*===============*/
 	void*		arg);		/*!< in: parallel merge info */
 /*********************************************************************//**
 Kick off the parallel merge and insert thread */
-UNIV_INTERN
 void
 row_fts_start_parallel_merge(
 /*=========================*/
@@ -244,7 +251,6 @@ row_fts_start_parallel_merge(
 /********************************************************************//**
 Read sorted FTS data files and insert data tuples to auxillary tables.
 @return DB_SUCCESS or error number */
-UNIV_INTERN
 void
 row_fts_insert_tuple(
 /*=================*/
@@ -258,7 +264,6 @@ row_fts_insert_tuple(
 /********************************************************************//**
 Propagate a newly added record up one level in the selection tree
 @return parent where this value propagated to */
-UNIV_INTERN
 int
 row_merge_fts_sel_propagate(
 /*========================*/
@@ -272,7 +277,6 @@ row_merge_fts_sel_propagate(
 Read sorted file containing index data tuples and insert these data
 tuples to the index
 @return DB_SUCCESS or error number */
-UNIV_INTERN
 dberr_t
 row_fts_merge_insert(
 /*=================*/
diff --git a/storage/innobase/include/row0import.h b/storage/innobase/include/row0import.h
index a821c230a3b..137c28ac1fa 100644
--- a/storage/innobase/include/row0import.h
+++ b/storage/innobase/include/row0import.h
@@ -27,7 +27,6 @@ Created 2012-02-08 by Sunny Bains
 #define row0import_h
 
 #include "univ.i"
-#include "db0err.h"
 #include "dict0types.h"
 
 // Forward declarations
@@ -38,8 +37,7 @@ struct row_prebuilt_t;
 /*****************************************************************//**
 Imports a tablespace. The space id in the .ibd file must match the space id
 of the table in the data dictionary.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
 dberr_t
 row_import_for_mysql(
 /*=================*/
@@ -51,7 +49,6 @@ row_import_for_mysql(
 /*****************************************************************//**
 Update the DICT_TF2_DISCARDED flag in SYS_TABLES.
 @return DB_SUCCESS or error code. */
-UNIV_INTERN
 dberr_t
 row_import_update_discarded_flag(
 /*=============================*/
@@ -70,7 +67,6 @@ row_import_update_discarded_flag(
 Update the (space, root page) of a table's indexes from the values
 in the data dictionary.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 row_import_update_index_root(
 /*=========================*/
diff --git a/storage/innobase/include/row0ins.h b/storage/innobase/include/row0ins.h
index 71ee39070ef..4038c32b9c0 100644
--- a/storage/innobase/include/row0ins.h
+++ b/storage/innobase/include/row0ins.h
@@ -39,7 +39,6 @@ which lock either the success or the failure of the constraint. NOTE that
 the caller must have a shared latch on dict_foreign_key_check_lock.
 @return DB_SUCCESS, DB_LOCK_WAIT, DB_NO_REFERENCED_ROW, or
 DB_ROW_IS_REFERENCED */
-UNIV_INTERN
 dberr_t
 row_ins_check_foreign_constraint(
 /*=============================*/
@@ -56,8 +55,7 @@ row_ins_check_foreign_constraint(
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /*********************************************************************//**
 Creates an insert node struct.
-@return	own: insert node struct */
-UNIV_INTERN
+@return own: insert node struct */
 ins_node_t*
 ins_node_create(
 /*============*/
@@ -68,7 +66,6 @@ ins_node_create(
 Sets a new row to insert for an INS_DIRECT node. This function is only used
 if we have constructed the row separately, which is a rare case; this
 function is quite slow. */
-UNIV_INTERN
 void
 ins_node_set_new_row(
 /*=================*/
@@ -85,7 +82,6 @@ the delete marked record.
 @retval DB_LOCK_WAIT on lock wait when !(flags & BTR_NO_LOCKING_FLAG)
 @retval DB_FAIL if retry with BTR_MODIFY_TREE is needed
 @return error code */
-UNIV_INTERN
 dberr_t
 row_ins_clust_index_entry_low(
 /*==========================*/
@@ -97,8 +93,12 @@ row_ins_clust_index_entry_low(
 	ulint		n_uniq,	/*!< in: 0 or index->n_uniq */
 	dtuple_t*	entry,	/*!< in/out: index entry to insert */
 	ulint		n_ext,	/*!< in: number of externally stored columns */
-	que_thr_t*	thr)	/*!< in: query thread or NULL */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	que_thr_t*	thr,	/*!< in: query thread or NULL */
+	bool		dup_chk_only)
+				/*!< in: if true, just do duplicate check
+				and return. don't execute actual insert. */
+	MY_ATTRIBUTE((warn_unused_result));
+
 /***************************************************************//**
 Tries to insert an entry into a secondary index. If a record with exactly the
 same fields is found, the other record is necessarily marked deleted.
@@ -107,7 +107,6 @@ It is then unmarked. Otherwise, the entry is just inserted to the index.
 @retval DB_LOCK_WAIT on lock wait when !(flags & BTR_NO_LOCKING_FLAG)
 @retval DB_FAIL if retry with BTR_MODIFY_TREE is needed
 @return error code */
-UNIV_INTERN
 dberr_t
 row_ins_sec_index_entry_low(
 /*========================*/
@@ -122,13 +121,26 @@ row_ins_sec_index_entry_low(
 	dtuple_t*	entry,	/*!< in/out: index entry to insert */
 	trx_id_t	trx_id,	/*!< in: PAGE_MAX_TRX_ID during
 				row_log_table_apply(), or 0 */
-	que_thr_t*	thr)	/*!< in: query thread */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	que_thr_t*	thr,	/*!< in: query thread */
+	bool		dup_chk_only)
+				/*!< in: if true, just do duplicate check
+				and return. don't execute actual insert. */
+	MY_ATTRIBUTE((warn_unused_result));
+/** Sets the values of the dtuple fields in entry from the values of appropriate
+columns in row.
+@param[in]	index	index handler
+@param[out]	entry	index entry to make
+@param[in]	row	row */
+dberr_t
+row_ins_index_entry_set_vals(
+	const dict_index_t*	index,
+	dtuple_t*		entry,
+	const dtuple_t*		row);
+
 /***************************************************************//**
 Tries to insert the externally stored fields (off-page columns)
 of a clustered index entry.
 @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
-UNIV_INTERN
 dberr_t
 row_ins_index_entry_big_rec_func(
 /*=============================*/
@@ -155,35 +167,38 @@ Inserts an entry into a clustered index. Tries first optimistic,
 then pessimistic descent down the tree. If the entry matches enough
 to a delete marked record, performs the insert by updating or delete
 unmarking the delete marked record.
-@return	DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */
-UNIV_INTERN
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */
 dberr_t
 row_ins_clust_index_entry(
 /*======================*/
 	dict_index_t*	index,	/*!< in: clustered index */
 	dtuple_t*	entry,	/*!< in/out: index entry to insert */
 	que_thr_t*	thr,	/*!< in: query thread */
-	ulint		n_ext)	/*!< in: number of externally stored columns */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	ulint		n_ext,	/*!< in: number of externally stored columns */
+	bool		dup_chk_only)
+				/*!< in: if true, just do duplicate check
+				and return. don't execute actual insert. */
+	MY_ATTRIBUTE((warn_unused_result));
 /***************************************************************//**
 Inserts an entry into a secondary index. Tries first optimistic,
 then pessimistic descent down the tree. If the entry matches enough
 to a delete marked record, performs the insert by updating or delete
 unmarking the delete marked record.
-@return	DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */
-UNIV_INTERN
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */
 dberr_t
 row_ins_sec_index_entry(
 /*====================*/
 	dict_index_t*	index,	/*!< in: secondary index */
 	dtuple_t*	entry,	/*!< in/out: index entry to insert */
-	que_thr_t*	thr)	/*!< in: query thread */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	que_thr_t*	thr,	/*!< in: query thread */
+	bool		dup_chk_only)
+				/*!< in: if true, just do duplicate check
+				and return. don't execute actual insert. */
+	MY_ATTRIBUTE((warn_unused_result));
 /***********************************************************//**
 Inserts a row to a table. This is a high-level function used in
 SQL execution graphs.
-@return	query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
 que_thr_t*
 row_ins_step(
 /*=========*/
@@ -216,6 +231,10 @@ struct ins_node_t{
 				entry_list and sys fields are stored here;
 				if this is NULL, entry list should be created
 				and buffers for sys fields in row allocated */
+	dict_index_t*   duplicate;
+				/* This is the first index that reported
+				DB_DUPLICATE_KEY.  Used in the case of REPLACE
+				or INSERT ... ON DUPLICATE UPDATE. */
 	ulint		magic_n;
 };
 
diff --git a/storage/innobase/include/row0log.h b/storage/innobase/include/row0log.h
index 5ff148ff045..c8db44f23b3 100644
--- a/storage/innobase/include/row0log.h
+++ b/storage/innobase/include/row0log.h
@@ -35,6 +35,8 @@ Created 2011-05-26 Marko Makela
 #include "trx0types.h"
 #include "que0types.h"
 
+class ut_stage_alter_t;
+
 extern ulint onlineddl_rowlog_rows;
 extern ulint onlineddl_rowlog_pct_used;
 extern ulint onlineddl_pct_progress;
@@ -43,7 +45,6 @@ extern ulint onlineddl_pct_progress;
 Allocate the row log for an index and flag the index
 for online creation.
 @retval true if success, false if not */
-UNIV_INTERN
 bool
 row_log_allocate(
 /*=============*/
@@ -62,7 +63,6 @@ row_log_allocate(
 
 /******************************************************//**
 Free the row log for an index that was being created online. */
-UNIV_INTERN
 void
 row_log_free(
 /*=========*/
@@ -81,8 +81,8 @@ row_log_abort_sec(
 /******************************************************//**
 Try to log an operation to a secondary index that is
 (or was) being created.
-@retval	true if the operation was logged or can be ignored
-@retval	false if online index creation is not taking place */
+@retval true if the operation was logged or can be ignored
+@retval false if online index creation is not taking place */
 UNIV_INLINE
 bool
 row_log_online_op_try(
@@ -94,7 +94,6 @@ row_log_online_op_try(
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /******************************************************//**
 Logs an operation to a secondary index that is (or was) being created. */
-UNIV_INTERN
 void
 row_log_online_op(
 /*==============*/
@@ -107,7 +106,6 @@ row_log_online_op(
 /******************************************************//**
 Gets the error status of the online index rebuild log.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 row_log_table_get_error(
 /*====================*/
@@ -115,15 +113,25 @@ row_log_table_get_error(
 					that is being rebuilt online */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 
+/** Check whether a virtual column is indexed in the new table being
+created during alter table
+@param[in]	index	cluster index
+@param[in]	v_no	virtual column number
+@return true if it is indexed, else false */
+bool
+row_log_col_is_indexed(
+	const dict_index_t*	index,
+	ulint			v_no);
+
 /******************************************************//**
 Logs a delete operation to a table that is being rebuilt.
 This will be merged in row_log_table_apply_delete(). */
-UNIV_INTERN
 void
 row_log_table_delete(
 /*=================*/
 	const rec_t*	rec,	/*!< in: clustered index leaf page record,
 				page X-latched */
+	const dtuple_t*	ventry,	/*!< in: dtuple holding virtual column info */
 	dict_index_t*	index,	/*!< in/out: clustered index, S-latched
 				or X-latched */
 	const ulint*	offsets,/*!< in: rec_get_offsets(rec,index) */
@@ -134,7 +142,6 @@ row_log_table_delete(
 /******************************************************//**
 Logs an update operation to a table that is being rebuilt.
 This will be merged in row_log_table_apply_update(). */
-UNIV_INTERN
 void
 row_log_table_update(
 /*=================*/
@@ -143,16 +150,18 @@ row_log_table_update(
 	dict_index_t*	index,	/*!< in/out: clustered index, S-latched
 				or X-latched */
 	const ulint*	offsets,/*!< in: rec_get_offsets(rec,index) */
-	const dtuple_t*	old_pk)	/*!< in: row_log_table_get_pk()
+	const dtuple_t*	old_pk,	/*!< in: row_log_table_get_pk()
 				before the update */
-	UNIV_COLD MY_ATTRIBUTE((nonnull(1,2,3)));
+	const dtuple_t*	new_v_row,/*!< in: dtuple contains the new virtual
+				columns */
+	const dtuple_t*	old_v_row);/*!< in: dtuple contains the old virtual
+				columns */
 
 /******************************************************//**
 Constructs the old PRIMARY KEY and DB_TRX_ID,DB_ROLL_PTR
 of a table that is being rebuilt.
 @return tuple of PRIMARY KEY,DB_TRX_ID,DB_ROLL_PTR in the rebuilt table,
 or NULL if the PRIMARY KEY definition does not change */
-UNIV_INTERN
 const dtuple_t*
 row_log_table_get_pk(
 /*=================*/
@@ -170,19 +179,17 @@ row_log_table_get_pk(
 /******************************************************//**
 Logs an insert to a table that is being rebuilt.
 This will be merged in row_log_table_apply_insert(). */
-UNIV_INTERN
 void
 row_log_table_insert(
 /*=================*/
 	const rec_t*	rec,	/*!< in: clustered index leaf page record,
 				page X-latched */
+	const dtuple_t*	ventry,	/*!< in: dtuple holding virtual column info */
 	dict_index_t*	index,	/*!< in/out: clustered index, S-latched
 				or X-latched */
-	const ulint*	offsets)/*!< in: rec_get_offsets(rec,index) */
-	UNIV_COLD MY_ATTRIBUTE((nonnull));
+	const ulint*	offsets);/*!< in: rec_get_offsets(rec,index) */
 /******************************************************//**
 Notes that a BLOB is being freed during online ALTER TABLE. */
-UNIV_INTERN
 void
 row_log_table_blob_free(
 /*====================*/
@@ -191,51 +198,66 @@ row_log_table_blob_free(
 	UNIV_COLD MY_ATTRIBUTE((nonnull));
 /******************************************************//**
 Notes that a BLOB is being allocated during online ALTER TABLE. */
-UNIV_INTERN
 void
 row_log_table_blob_alloc(
 /*=====================*/
 	dict_index_t*	index,	/*!< in/out: clustered index, X-latched */
 	ulint		page_no)/*!< in: starting page number of the BLOB */
 	UNIV_COLD MY_ATTRIBUTE((nonnull));
-/******************************************************//**
-Apply the row_log_table log to a table upon completing rebuild.
+
+/** Apply the row_log_table log to a table upon completing rebuild.
+@param[in]	thr		query graph
+@param[in]	old_table	old table
+@param[in,out]	table		MySQL table (for reporting duplicates)
+@param[in,out]	stage		performance schema accounting object, used by
+ALTER TABLE. stage->begin_phase_log_table() will be called initially and then
+stage->inc() will be called for each block of log that is applied.
 @return DB_SUCCESS, or error code on failure */
-UNIV_INTERN
 dberr_t
 row_log_table_apply(
-/*================*/
-	que_thr_t*	thr,	/*!< in: query graph */
-	dict_table_t*	old_table,
-				/*!< in: old table */
-	struct TABLE*	table)	/*!< in/out: MySQL table
-				(for reporting duplicates) */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	que_thr_t*		thr,
+	dict_table_t*		old_table,
+	struct TABLE*		table,
+	ut_stage_alter_t*	stage)
+	MY_ATTRIBUTE((warn_unused_result));
 
 /******************************************************//**
 Get the latest transaction ID that has invoked row_log_online_op()
 during online creation.
 @return latest transaction ID, or 0 if nothing was logged */
-UNIV_INTERN
 trx_id_t
 row_log_get_max_trx(
 /*================*/
 	dict_index_t*	index)	/*!< in: index, must be locked */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 
-/******************************************************//**
-Merge the row log to the index upon completing index creation.
+/** Apply the row log to the index upon completing index creation.
+@param[in]	trx	transaction (for checking if the operation was
+interrupted)
+@param[in,out]	index	secondary index
+@param[in,out]	table	MySQL table (for reporting duplicates)
+@param[in,out]	stage	performance schema accounting object, used by
+ALTER TABLE. stage->begin_phase_log_index() will be called initially and then
+stage->inc() will be called for each block of log that is applied.
 @return DB_SUCCESS, or error code on failure */
-UNIV_INTERN
 dberr_t
 row_log_apply(
-/*==========*/
-	trx_t*		trx,	/*!< in: transaction (for checking if
-				the operation was interrupted) */
-	dict_index_t*	index,	/*!< in/out: secondary index */
-	struct TABLE*	table)	/*!< in/out: MySQL table
-				(for reporting duplicates) */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	const trx_t*		trx,
+	dict_index_t*		index,
+	struct TABLE*		table,
+	ut_stage_alter_t*	stage)
+	MY_ATTRIBUTE((warn_unused_result));
+
+#ifdef HAVE_PSI_STAGE_INTERFACE
+/** Estimate how much work is to be done by the log apply phase
+of an ALTER TABLE for this index.
+@param[in]	index	index whose log to assess
+@return work to be done by log-apply in abstract units
+*/
+ulint
+row_log_estimate_work(
+	const dict_index_t*	index);
+#endif /* HAVE_PSI_STAGE_INTERFACE */
 
 #ifndef UNIV_NONINL
 #include "row0log.ic"
diff --git a/storage/innobase/include/row0log.ic b/storage/innobase/include/row0log.ic
index b0f37dbd8e7..3570e5dca1b 100644
--- a/storage/innobase/include/row0log.ic
+++ b/storage/innobase/include/row0log.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2011, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2011, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -33,9 +33,7 @@ row_log_abort_sec(
 /*===============*/
 	dict_index_t*	index)	/*!< in/out: index (x-latched) */
 {
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_X));
 
 	ut_ad(!dict_index_is_clust(index));
 	dict_index_set_online_status(index, ONLINE_INDEX_ABORTED);
@@ -45,8 +43,8 @@ row_log_abort_sec(
 /******************************************************//**
 Try to log an operation to a secondary index that is
 (or was) being created.
-@retval	true if the operation was logged or can be ignored
-@retval	false if online index creation is not taking place */
+@retval true if the operation was logged or can be ignored
+@retval false if online index creation is not taking place */
 UNIV_INLINE
 bool
 row_log_online_op_try(
@@ -56,10 +54,10 @@ row_log_online_op_try(
 	trx_id_t	trx_id)	/*!< in: transaction ID for insert,
 				or 0 for delete */
 {
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_SHARED)
-	      || rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+
+	ut_ad(rw_lock_own_flagged(
+			dict_index_get_lock(index),
+			RW_LOCK_FLAG_S | RW_LOCK_FLAG_X | RW_LOCK_FLAG_SX));
 
 	switch (dict_index_get_online_status(index)) {
 	case ONLINE_INDEX_COMPLETE:
diff --git a/storage/innobase/include/row0merge.h b/storage/innobase/include/row0merge.h
index 04d4010ad48..f3b5860910c 100644
--- a/storage/innobase/include/row0merge.h
+++ b/storage/innobase/include/row0merge.h
@@ -35,11 +35,14 @@ Created 13/06/2005 Jan Lindstrom
 #include "mtr0mtr.h"
 #include "rem0types.h"
 #include "rem0rec.h"
-#include "read0types.h"
 #include "btr0types.h"
 #include "row0mysql.h"
 #include "lock0types.h"
 #include "srv0srv.h"
+#include "ut0stage.h"
+
+/* Reserve free space from every block for key_version */
+#define ROW_MERGE_RESERVE_SIZE 4
 
 /* Reserve free space from every block for key_version */
 #define ROW_MERGE_RESERVE_SIZE 4
@@ -112,17 +115,22 @@ struct index_field_t {
 	ulint		prefix_len;	/*!< column prefix length, or 0
 					if indexing the whole column */
 	const char*	col_name;	/*!< column name or NULL */
+	bool		is_v_col;	/*!< whether this is a virtual column */
 };
 
 /** Definition of an index being created */
 struct index_def_t {
 	const char*	name;		/*!< index name */
+	bool		rebuild;	/*!< whether the table is rebuilt */
 	ulint		ind_type;	/*!< 0, DICT_UNIQUE,
 					or DICT_CLUSTERED */
 	ulint		key_number;	/*!< MySQL key number,
 					or ULINT_UNDEFINED if none */
 	ulint		n_fields;	/*!< number of fields in index */
 	index_field_t*	fields;		/*!< field definitions */
+	st_mysql_ftparser*
+			parser;		/*!< fulltext parser plugin */
+	bool		is_ngram;	/*!< true if it's ngram parser */
 };
 
 /** Structure for reporting duplicate records. */
@@ -138,40 +146,39 @@ struct row_merge_dup_t {
 
 /*************************************************************//**
 Report a duplicate key. */
-UNIV_INTERN
 void
 row_merge_dup_report(
 /*=================*/
 	row_merge_dup_t*	dup,	/*!< in/out: for reporting duplicates */
 	const dfield_t*		entry)	/*!< in: duplicate index entry */
 	MY_ATTRIBUTE((nonnull));
+
 /*********************************************************************//**
 Sets an exclusive lock on a table, for the duration of creating indexes.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
 dberr_t
 row_merge_lock_table(
 /*=================*/
 	trx_t*		trx,		/*!< in/out: transaction */
 	dict_table_t*	table,		/*!< in: table to lock */
 	enum lock_mode	mode)		/*!< in: LOCK_X or LOCK_S */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((nonnull(1,2), warn_unused_result));
+
 /*********************************************************************//**
 Drop indexes that were created before an error occurred.
 The data dictionary must have been locked exclusively by the caller,
 because the transaction will not be committed. */
-UNIV_INTERN
 void
 row_merge_drop_indexes_dict(
 /*========================*/
 	trx_t*		trx,	/*!< in/out: dictionary transaction */
 	table_id_t	table_id)/*!< in: table identifier */
 	MY_ATTRIBUTE((nonnull));
+
 /*********************************************************************//**
 Drop those indexes which were created before an error occurred.
 The data dictionary must have been locked exclusively by the caller,
 because the transaction will not be committed. */
-UNIV_INTERN
 void
 row_merge_drop_indexes(
 /*===================*/
@@ -180,9 +187,9 @@ row_merge_drop_indexes(
 	ibool		locked)	/*!< in: TRUE=table locked,
 				FALSE=may need to do a lazy drop */
 	MY_ATTRIBUTE((nonnull));
+
 /*********************************************************************//**
 Drop all partially created indexes during crash recovery. */
-UNIV_INTERN
 void
 row_merge_drop_temp_indexes(void);
 /*=============================*/
@@ -191,7 +198,6 @@ row_merge_drop_temp_indexes(void);
 UNIV_PFS_IO defined, register the file descriptor with Performance Schema.
 @param[in]	path	location for creating temporary merge files.
 @return File descriptor */
-UNIV_INTERN
 int
 row_merge_file_create_low(
 	const char*	path)
@@ -199,7 +205,6 @@ row_merge_file_create_low(
 /*********************************************************************//**
 Destroy a merge file. And de-register the file from Performance Schema
 if UNIV_PFS_IO is defined. */
-UNIV_INTERN
 void
 row_merge_file_destroy_low(
 /*=======================*/
@@ -209,19 +214,19 @@ row_merge_file_destroy_low(
 Provide a new pathname for a table that is being renamed if it belongs to
 a file-per-table tablespace.  The caller is responsible for freeing the
 memory allocated for the return value.
-@return	new pathname of tablespace file, or NULL if space = 0 */
-UNIV_INTERN
+@return new pathname of tablespace file, or NULL if space = 0 */
 char*
 row_make_new_pathname(
 /*==================*/
 	dict_table_t*	table,		/*!< in: table to be renamed */
-	const char*	new_name);	/*!< in: new name */
+	const char*	new_name)	/*!< in: new name */
+	MY_ATTRIBUTE((nonnull, warn_unused_result));
+
 /*********************************************************************//**
 Rename the tables in the data dictionary.  The data dictionary must
 have been locked exclusively by the caller, because the transaction
 will not be committed.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
 dberr_t
 row_merge_rename_tables_dict(
 /*=========================*/
@@ -237,100 +242,119 @@ row_merge_rename_tables_dict(
 Rename an index in the dictionary that was created. The data
 dictionary must have been locked exclusively by the caller, because
 the transaction will not be committed.
-@return	DB_SUCCESS if all OK */
-UNIV_INTERN
+@return DB_SUCCESS if all OK */
 dberr_t
 row_merge_rename_index_to_add(
 /*==========================*/
 	trx_t*		trx,		/*!< in/out: transaction */
 	table_id_t	table_id,	/*!< in: table identifier */
 	index_id_t	index_id)	/*!< in: index identifier */
-	MY_ATTRIBUTE((nonnull));
+	MY_ATTRIBUTE((nonnull(1), warn_unused_result));
+
 /*********************************************************************//**
 Rename an index in the dictionary that is to be dropped. The data
 dictionary must have been locked exclusively by the caller, because
 the transaction will not be committed.
-@return	DB_SUCCESS if all OK */
-UNIV_INTERN
+@return DB_SUCCESS if all OK */
 dberr_t
 row_merge_rename_index_to_drop(
 /*===========================*/
 	trx_t*		trx,		/*!< in/out: transaction */
 	table_id_t	table_id,	/*!< in: table identifier */
 	index_id_t	index_id)	/*!< in: index identifier */
-	MY_ATTRIBUTE((nonnull));
-/*********************************************************************//**
-Create the index and load in to the dictionary.
-@return	index, or NULL on error */
-UNIV_INTERN
+	MY_ATTRIBUTE((nonnull(1), warn_unused_result));
+
+/** Create the index and load in to the dictionary.
+@param[in,out]	trx		trx (sets error_state)
+@param[in,out]	table		the index is on this table
+@param[in]	index_def	the index definition
+@param[in]	add_v		new virtual columns added along with add
+				index call
+@param[in]	col_names	column names if columns are renamed
+				or NULL
+@return index, or NULL on error */
 dict_index_t*
 row_merge_create_index(
-/*===================*/
-	trx_t*			trx,	/*!< in/out: trx (sets error_state) */
-	dict_table_t*		table,	/*!< in: the index is on this table */
+	trx_t*			trx,
+	dict_table_t*		table,
 	const index_def_t*	index_def,
-					/*!< in: the index definition */
-	const char**		col_names);
-					/*! in: column names if columns are
-					renamed or NULL */
+	const dict_add_v_col_t*	add_v,
+	const char**		col_names)
+	MY_ATTRIBUTE((warn_unused_result));
+
 /*********************************************************************//**
 Check if a transaction can use an index.
-@return	TRUE if index can be used by the transaction else FALSE */
-UNIV_INTERN
+@return TRUE if index can be used by the transaction else FALSE */
 ibool
 row_merge_is_index_usable(
 /*======================*/
 	const trx_t*		trx,	/*!< in: transaction */
-	const dict_index_t*	index);	/*!< in: index to check */
+	const dict_index_t*	index)	/*!< in: index to check */
+	MY_ATTRIBUTE((nonnull, warn_unused_result));
+
 /*********************************************************************//**
 Drop a table. The caller must have ensured that the background stats
 thread is not processing the table. This can be done by calling
 dict_stats_wait_bg_to_stop_using_table() after locking the dictionary and
 before calling this function.
-@return	DB_SUCCESS or error code */
-UNIV_INTERN
+@return DB_SUCCESS or error code */
 dberr_t
 row_merge_drop_table(
 /*=================*/
 	trx_t*		trx,		/*!< in: transaction */
 	dict_table_t*	table)		/*!< in: table instance to drop */
-	MY_ATTRIBUTE((nonnull));
-/*********************************************************************//**
-Build indexes on a table by reading a clustered index,
-creating a temporary file containing index entries, merge sorting
-these index entries and inserting sorted index entries to indexes.
-@return	DB_SUCCESS or error code */
-UNIV_INTERN
+	MY_ATTRIBUTE((nonnull, warn_unused_result));
+
+/** Build indexes on a table by reading a clustered index, creating a temporary
+file containing index entries, merge sorting these index entries and inserting
+sorted index entries to indexes.
+@param[in]	trx		transaction
+@param[in]	old_table	table where rows are read from
+@param[in]	new_table	table where indexes are created; identical to
+old_table unless creating a PRIMARY KEY
+@param[in]	online		true if creating indexes online
+@param[in]	indexes		indexes to be created
+@param[in]	key_numbers	MySQL key numbers
+@param[in]	n_indexes	size of indexes[]
+@param[in,out]	table		MySQL table, for reporting erroneous key value
+if applicable
+@param[in]	add_cols	default values of added columns, or NULL
+@param[in]	col_map		mapping of old column numbers to new ones, or
+NULL if old_table == new_table
+@param[in]	add_autoinc	number of added AUTO_INCREMENT columns, or
+ULINT_UNDEFINED if none is added
+@param[in,out]	sequence	autoinc sequence
+@param[in]	skip_pk_sort	whether the new PRIMARY KEY will follow
+existing order
+@param[in,out]	stage		performance schema accounting object, used by
+ALTER TABLE. stage->begin_phase_read_pk() will be called at the beginning of
+this function and it will be passed to other functions for further accounting.
+@param[in]	add_v		new virtual columns added along with indexes
+@param[in]	eval_table	mysql table used to evaluate virtual column
+				value, see innobase_get_computed_value().
+@return DB_SUCCESS or error code */
 dberr_t
 row_merge_build_indexes(
-/*====================*/
-	trx_t*		trx,		/*!< in: transaction */
-	dict_table_t*	old_table,	/*!< in: table where rows are
-					read from */
-	dict_table_t*	new_table,	/*!< in: table where indexes are
-					created; identical to old_table
-					unless creating a PRIMARY KEY */
-	bool		online,		/*!< in: true if creating indexes
-					online */
-	dict_index_t**	indexes,	/*!< in: indexes to be created */
-	const ulint*	key_numbers,	/*!< in: MySQL key numbers */
-	ulint		n_indexes,	/*!< in: size of indexes[] */
-	struct TABLE*	table,		/*!< in/out: MySQL table, for
-					reporting erroneous key value
-					if applicable */
-	const dtuple_t*	add_cols,	/*!< in: default values of
-					added columns, or NULL */
-	const ulint*	col_map,	/*!< in: mapping of old column
-					numbers to new ones, or NULL
-					if old_table == new_table */
-	ulint		add_autoinc,	/*!< in: number of added
-					AUTO_INCREMENT column, or
-					ULINT_UNDEFINED if none is added */
-	ib_sequence_t&	sequence)	/*!< in/out: autoinc sequence */
-	MY_ATTRIBUTE((nonnull(1,2,3,5,6,8), warn_unused_result));
+	trx_t*			trx,
+	dict_table_t*		old_table,
+	dict_table_t*		new_table,
+	bool			online,
+	dict_index_t**		indexes,
+	const ulint*		key_numbers,
+	ulint			n_indexes,
+	struct TABLE*		table,
+	const dtuple_t*		add_cols,
+	const ulint*		col_map,
+	ulint			add_autoinc,
+	ib_sequence_t&		sequence,
+	bool			skip_pk_sort,
+	ut_stage_alter_t*	stage,
+	const dict_add_v_col_t*	add_v,
+	struct TABLE*		eval_table)
+	MY_ATTRIBUTE((warn_unused_result));
+
 /********************************************************************//**
 Write a buffer to a block. */
-UNIV_INTERN
 void
 row_merge_buf_write(
 /*================*/
@@ -338,9 +362,9 @@ row_merge_buf_write(
 	const merge_file_t*	of,	/*!< in: output file */
 	row_merge_block_t*	block)	/*!< out: buffer for writing to file */
 	MY_ATTRIBUTE((nonnull));
+
 /********************************************************************//**
 Sort a buffer. */
-UNIV_INTERN
 void
 row_merge_buf_sort(
 /*===============*/
@@ -348,10 +372,10 @@ row_merge_buf_sort(
 	row_merge_dup_t*	dup)	/*!< in/out: reporter of duplicates
 					(NULL if non-unique index) */
 	MY_ATTRIBUTE((nonnull(1)));
+
 /********************************************************************//**
 Write a merge block to the file system.
 @return TRUE if request was successful, FALSE if fail */
-UNIV_INTERN
 ibool
 row_merge_write(
 /*============*/
@@ -361,12 +385,12 @@ row_merge_write(
 	const void*	buf,	/*!< in: data */
 	fil_space_crypt_t*	crypt_data,	/*!< in: table crypt data */
 	void*		crypt_buf,		/*!< in: crypt buf or NULL */
-	ulint		space);			/*!< in: space id */
+	ulint		space)			/*!< in: space id */
+	MY_ATTRIBUTE((warn_unused_result));
 
 /********************************************************************//**
 Empty a sort buffer.
 @return sort buffer */
-UNIV_INTERN
 row_merge_buf_t*
 row_merge_buf_empty(
 /*================*/
@@ -377,62 +401,73 @@ row_merge_buf_empty(
 @param[out]	merge_file	merge file structure
 @param[in]	path		location for creating temporary file
 @return file descriptor, or -1 on failure */
-UNIV_INTERN
 int
 row_merge_file_create(
 	merge_file_t*	merge_file,
-	const char*	path);
+	const char*	path)
+	MY_ATTRIBUTE((warn_unused_result, nonnull));
 
-/*********************************************************************//**
-Merge disk files.
+/** Merge disk files.
+@param[in]	trx	transaction
+@param[in]	dup	descriptor of index being created
+@param[in,out]	file	file containing index entries
+@param[in,out]	block	3 buffers
+@param[in,out]	tmpfd	temporary file handle
+@param[in]      update_progress true, if we should update progress status
+@param[in]      pct_progress total progress percent until now
+@param[in]      pct_ocst current progress percent
+@param[in]      crypt_data tale crypt data
+@param[in]      crypt_block crypt buf or NULL
+@param[in]      space    space_id
+@param[in,out]	stage	performance schema accounting object, used by
+ALTER TABLE. If not NULL, stage->begin_phase_sort() will be called initially
+and then stage->inc() will be called for each record processed.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 row_merge_sort(
 /*===========*/
-	trx_t*			trx,	/*!< in: transaction */
-	const row_merge_dup_t*	dup,	/*!< in: descriptor of
-					index being created */
-	merge_file_t*		file,	/*!< in/out: file containing
-					index entries */
-	row_merge_block_t*	block,	/*!< in/out: 3 buffers */
-	int*			tmpfd,	/*!< in/out: temporary file handle */
-	const bool		update_progress, /*!< in: update progress status variable or not */
-	const float		pct_progress, /*!< in: total progress percent until now */
-	const float		pct_cost, /*!< in: current progress percent */
-	fil_space_crypt_t*	crypt_data,/*!< in: table crypt data */
-	row_merge_block_t*	crypt_block, /*!< in: crypt buf or NULL */
-	ulint			space)	   /*!< in: space id */
-	__attribute__((nonnull(1,2,3,4,5)));
+	trx_t*			trx,
+	const row_merge_dup_t*	dup,
+	merge_file_t*		file,
+	row_merge_block_t*	block,
+	int*			tmpfd,
+	const bool		update_progress,
+	const float		pct_progress,
+	const float		pct_cost,
+	fil_space_crypt_t*	crypt_data,
+	row_merge_block_t*	crypt_block,
+	ulint			space,
+	ut_stage_alter_t*	stage = NULL)
+	MY_ATTRIBUTE((warn_unused_result));
+
 /*********************************************************************//**
 Allocate a sort buffer.
 @return own: sort buffer */
-UNIV_INTERN
 row_merge_buf_t*
 row_merge_buf_create(
 /*=================*/
 	dict_index_t*	index)	/*!< in: secondary index */
 	MY_ATTRIBUTE((warn_unused_result, nonnull, malloc));
+
 /*********************************************************************//**
 Deallocate a sort buffer. */
-UNIV_INTERN
 void
 row_merge_buf_free(
 /*===============*/
 	row_merge_buf_t*	buf)	/*!< in,own: sort buffer to be freed */
 	MY_ATTRIBUTE((nonnull));
+
 /*********************************************************************//**
 Destroy a merge file. */
-UNIV_INTERN
 void
 row_merge_file_destroy(
 /*===================*/
 	merge_file_t*	merge_file)	/*!< in/out: merge file structure */
 	MY_ATTRIBUTE((nonnull));
+
 /********************************************************************//**
 Read a merge block from the file system.
 @return TRUE if request was successful, FALSE if fail */
-UNIV_INTERN
 ibool
 row_merge_read(
 /*===========*/
@@ -443,12 +478,12 @@ row_merge_read(
 	row_merge_block_t*	buf,	/*!< out: data */
 	fil_space_crypt_t*	crypt_data,/*!< in: table crypt data */
 	row_merge_block_t*	crypt_buf, /*!< in: crypt buf or NULL */
-	ulint			space);	   /*!< in: space id */
+	ulint			space)	   /*!< in: space id */
+	MY_ATTRIBUTE((warn_unused_result));
 
 /********************************************************************//**
 Read a merge record.
 @return pointer to next record, or NULL on I/O error or end of list */
-UNIV_INTERN
 const byte*
 row_merge_read_rec(
 /*===============*/
@@ -465,5 +500,5 @@ row_merge_read_rec(
 	fil_space_crypt_t*	crypt_data,/*!< in: table crypt data */
 	row_merge_block_t*	crypt_block, /*!< in: crypt buf or NULL */
 	ulint			space)	   /*!< in: space id */
-	__attribute__((nonnull(1,2,3,4,6,7,8), warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 #endif /* row0merge.h */
diff --git a/storage/innobase/include/row0mysql.h b/storage/innobase/include/row0mysql.h
index 71e3b9bb19e..2d508c1a7df 100644
--- a/storage/innobase/include/row0mysql.h
+++ b/storage/innobase/include/row0mysql.h
@@ -27,7 +27,8 @@ Created 9/17/2000 Heikki Tuuri
 #ifndef row0mysql_h
 #define row0mysql_h
 
-#include "univ.i"
+#include "ha_prototypes.h"
+
 #include "data0data.h"
 #include "que0types.h"
 #include "dict0types.h"
@@ -36,6 +37,7 @@ Created 9/17/2000 Heikki Tuuri
 #include "btr0pcur.h"
 #include "trx0types.h"
 #include "fil0crypt.h"
+#include "sess0sess.h"
 
 // Forward declaration
 struct SysIndexCallback;
@@ -46,7 +48,6 @@ struct row_prebuilt_t;
 
 /*******************************************************************//**
 Frees the blob heap in prebuilt when no longer needed. */
-UNIV_INTERN
 void
 row_mysql_prebuilt_free_blob_heap(
 /*==============================*/
@@ -57,7 +58,6 @@ Stores a >= 5.0.3 format true VARCHAR length to dest, in the MySQL row
 format.
 @return pointer to the data, we skip the 1 or 2 bytes at the start
 that are used to store the len */
-UNIV_INTERN
 byte*
 row_mysql_store_true_var_len(
 /*=========================*/
@@ -69,7 +69,6 @@ Reads a >= 5.0.3 format true VARCHAR length, in the MySQL row format, and
 returns a pointer to the data.
 @return pointer to the data, we skip the 1 or 2 bytes at the start
 that are used to store the len */
-UNIV_INTERN
 const byte*
 row_mysql_read_true_varchar(
 /*========================*/
@@ -79,7 +78,6 @@ row_mysql_read_true_varchar(
 				or 2 bytes */
 /*******************************************************************//**
 Stores a reference to a BLOB in the MySQL format. */
-UNIV_INTERN
 void
 row_mysql_store_blob_ref(
 /*=====================*/
@@ -96,8 +94,7 @@ row_mysql_store_blob_ref(
 				header! */
 /*******************************************************************//**
 Reads a reference to a BLOB in the MySQL format.
-@return	pointer to BLOB data */
-UNIV_INTERN
+@return pointer to BLOB data */
 const byte*
 row_mysql_read_blob_ref(
 /*====================*/
@@ -106,9 +103,36 @@ row_mysql_read_blob_ref(
 					MySQL format */
 	ulint		col_len);	/*!< in: BLOB reference length
 					(not BLOB length) */
+/*******************************************************************//**
+Converts InnoDB geometry data format to MySQL data format. */
+void
+row_mysql_store_geometry(
+/*=====================*/
+	byte*		dest,		/*!< in/out: where to store */
+	ulint		dest_len,	/*!< in: dest buffer size: determines into
+					how many bytes the geometry length is stored,
+					the space for the length may vary from 1
+					to 4 bytes */
+	const byte*	src,		/*!< in: geometry data; if the value to store
+					is SQL NULL this should be NULL pointer */
+	ulint		src_len);	/*!< in: geometry length; if the value to store
+					is SQL NULL this should be 0; remember
+					also to set the NULL bit in the MySQL record
+					header! */
+/*******************************************************************//**
+Reads a reference to a geometry data in the MySQL format.
+@return pointer to geometry data */
+const byte*
+row_mysql_read_geometry(
+/*====================*/
+	ulint*		len,		/*!< out: geometry data length */
+	const byte*	ref,		/*!< in: reference in the
+					MySQL format */
+	ulint		col_len)	/*!< in: BLOB reference length
+					(not BLOB length) */
+	MY_ATTRIBUTE((nonnull(1,2), warn_unused_result));
 /**************************************************************//**
 Pad a column with spaces. */
-UNIV_INTERN
 void
 row_mysql_pad_col(
 /*==============*/
@@ -121,8 +145,7 @@ row_mysql_pad_col(
 Stores a non-SQL-NULL field given in the MySQL format in the InnoDB format.
 The counterpart of this function is row_sel_field_store_in_mysql_format() in
 row0sel.cc.
-@return	up to which byte we used buf in the conversion */
-UNIV_INTERN
+@return up to which byte we used buf in the conversion */
 byte*
 row_mysql_store_col_in_innobase_format(
 /*===================================*/
@@ -158,7 +181,6 @@ row_mysql_store_col_in_innobase_format(
 Handles user errors and lock waits detected by the database engine.
 @return true if it was a lock wait and we should continue running the
 query thread */
-UNIV_INTERN
 bool
 row_mysql_handle_errors(
 /*====================*/
@@ -171,8 +193,7 @@ row_mysql_handle_errors(
 	MY_ATTRIBUTE((nonnull(1,2)));
 /********************************************************************//**
 Create a prebuilt struct for a MySQL table handle.
-@return	own: a prebuilt struct */
-UNIV_INTERN
+@return own: a prebuilt struct */
 row_prebuilt_t*
 row_create_prebuilt(
 /*================*/
@@ -181,7 +202,6 @@ row_create_prebuilt(
 					the MySQL format */
 /********************************************************************//**
 Free a prebuilt struct for a MySQL table handle. */
-UNIV_INTERN
 void
 row_prebuilt_free(
 /*==============*/
@@ -190,7 +210,6 @@ row_prebuilt_free(
 /*********************************************************************//**
 Updates the transaction pointers in query graphs stored in the prebuilt
 struct. */
-UNIV_INTERN
 void
 row_update_prebuilt_trx(
 /*====================*/
@@ -203,18 +222,17 @@ AUTO_INC lock gives exclusive access to the auto-inc counter of the
 table. The lock is reserved only for the duration of an SQL statement.
 It is not compatible with another AUTO_INC or exclusive lock on the
 table.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
 dberr_t
 row_lock_table_autoinc_for_mysql(
 /*=============================*/
 	row_prebuilt_t*	prebuilt)	/*!< in: prebuilt struct in the MySQL
 					table handle */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
+
 /*********************************************************************//**
 Sets a table lock on the table mentioned in prebuilt.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
 dberr_t
 row_lock_table_for_mysql(
 /*=====================*/
@@ -227,20 +245,19 @@ row_lock_table_for_mysql(
 	ulint		mode)		/*!< in: lock mode of table
 					(ignored if table==NULL) */
 	MY_ATTRIBUTE((nonnull(1)));
-/*********************************************************************//**
-Does an insert for MySQL.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+
+/** Does an insert for MySQL.
+@param[in]	mysql_rec	row in the MySQL format
+@param[in,out]	prebuilt	prebuilt struct in MySQL handle
+@return error code or DB_SUCCESS*/
 dberr_t
 row_insert_for_mysql(
-/*=================*/
-	byte*		mysql_rec,	/*!< in: row in the MySQL format */
-	row_prebuilt_t*	prebuilt)	/*!< in: prebuilt struct in MySQL
-					handle */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	const byte*		mysql_rec,
+	row_prebuilt_t*		prebuilt)
+	MY_ATTRIBUTE((warn_unused_result));
+
 /*********************************************************************//**
 Builds a dummy query graph used in selects. */
-UNIV_INTERN
 void
 row_prebuild_sel_graph(
 /*===================*/
@@ -250,8 +267,7 @@ row_prebuild_sel_graph(
 Gets pointer to a prebuilt update vector used in updates. If the update
 graph has not yet been built in the prebuilt struct, then this function
 first builds it.
-@return	prebuilt update vector */
-UNIV_INTERN
+@return prebuilt update vector */
 upd_t*
 row_get_prebuilt_update_vector(
 /*===========================*/
@@ -260,50 +276,52 @@ row_get_prebuilt_update_vector(
 /*********************************************************************//**
 Checks if a table is such that we automatically created a clustered
 index on it (on row id).
-@return	TRUE if the clustered index was generated automatically */
-UNIV_INTERN
+@return TRUE if the clustered index was generated automatically */
 ibool
 row_table_got_default_clust_index(
 /*==============================*/
 	const dict_table_t*	table);	/*!< in: table */
-/*********************************************************************//**
-Does an update or delete of a row for MySQL.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+
+/** Does an update or delete of a row for MySQL.
+@param[in]	mysql_rec	row in the MySQL format
+@param[in,out]	prebuilt	prebuilt struct in MySQL handle
+@return error code or DB_SUCCESS */
 dberr_t
 row_update_for_mysql(
-/*=================*/
-	byte*		mysql_rec,	/*!< in: the row to be updated, in
-					the MySQL format */
-	row_prebuilt_t*	prebuilt)	/*!< in: prebuilt struct in MySQL
-					handle */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-This can only be used when srv_locks_unsafe_for_binlog is TRUE or this
+	const byte*		mysql_rec,
+	row_prebuilt_t*		prebuilt)
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Delete all rows for the given table by freeing/truncating indexes.
+@param[in,out]	table	table handler
+@return error code or DB_SUCCESS */
+dberr_t
+row_delete_all_rows(
+	dict_table_t*	table)
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** This can only be used when srv_locks_unsafe_for_binlog is TRUE or this
 session is using a READ COMMITTED or READ UNCOMMITTED isolation level.
 Before calling this function row_search_for_mysql() must have
 initialized prebuilt->new_rec_locks to store the information which new
 record locks really were set. This function removes a newly set
 clustered index record lock under prebuilt->pcur or
 prebuilt->clust_pcur.  Thus, this implements a 'mini-rollback' that
-releases the latest clustered index record lock we set. */
-UNIV_INTERN
+releases the latest clustered index record lock we set.
+@param[in,out]	prebuilt		prebuilt struct in MySQL handle
+@param[in]	has_latches_on_recs	TRUE if called so that we have the
+					latches on the records under pcur
+					and clust_pcur, and we do not need
+					to reposition the cursors. */
 void
 row_unlock_for_mysql(
-/*=================*/
-	row_prebuilt_t*	prebuilt,	/*!< in/out: prebuilt struct in MySQL
-					handle */
-	ibool		has_latches_on_recs)/*!< in: TRUE if called
-					so that we have the latches on
-					the records under pcur and
-					clust_pcur, and we do not need
-					to reposition the cursors. */
-	MY_ATTRIBUTE((nonnull));
+	row_prebuilt_t*	prebuilt,
+	ibool		has_latches_on_recs);
+
 /*********************************************************************//**
 Checks if a table name contains the string "/#sql" which denotes temporary
 tables in MySQL.
 @return true if temporary table */
-UNIV_INTERN
 bool
 row_is_mysql_tmp_table_name(
 /*========================*/
@@ -314,29 +332,15 @@ row_is_mysql_tmp_table_name(
 /*********************************************************************//**
 Creates an query graph node of 'update' type to be used in the MySQL
 interface.
-@return	own: update node */
-UNIV_INTERN
+@return own: update node */
 upd_node_t*
 row_create_update_node_for_mysql(
 /*=============================*/
 	dict_table_t*	table,	/*!< in: table to update */
 	mem_heap_t*	heap);	/*!< in: mem heap from which allocated */
-/**********************************************************************//**
-Does a cascaded delete or set null in a foreign key operation.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-row_update_cascade_for_mysql(
-/*=========================*/
-	que_thr_t*	thr,	/*!< in: query thread */
-	upd_node_t*	node,	/*!< in: update node used in the cascade
-				or set null operation */
-	dict_table_t*	table)	/*!< in: table where we do the operation */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /*********************************************************************//**
 Locks the data dictionary exclusively for performing a table create or other
 data dictionary modification operation. */
-UNIV_INTERN
 void
 row_mysql_lock_data_dictionary_func(
 /*================================*/
@@ -347,7 +351,6 @@ row_mysql_lock_data_dictionary_func(
 	row_mysql_lock_data_dictionary_func(trx, __FILE__, __LINE__)
 /*********************************************************************//**
 Unlocks the data dictionary exclusive lock. */
-UNIV_INTERN
 void
 row_mysql_unlock_data_dictionary(
 /*=============================*/
@@ -355,7 +358,6 @@ row_mysql_unlock_data_dictionary(
 /*********************************************************************//**
 Locks the data dictionary in shared mode from modifications, for performing
 foreign key check, rollback, or other operation invisible to MySQL. */
-UNIV_INTERN
 void
 row_mysql_freeze_data_dictionary_func(
 /*==================================*/
@@ -366,50 +368,48 @@ row_mysql_freeze_data_dictionary_func(
 	row_mysql_freeze_data_dictionary_func(trx, __FILE__, __LINE__)
 /*********************************************************************//**
 Unlocks the data dictionary shared lock. */
-UNIV_INTERN
 void
 row_mysql_unfreeze_data_dictionary(
 /*===============================*/
 	trx_t*	trx);	/*!< in/out: transaction */
 /*********************************************************************//**
-Creates a table for MySQL. If the name of the table ends in
-one of "innodb_monitor", "innodb_lock_monitor", "innodb_tablespace_monitor",
-"innodb_table_monitor", then this will also start the printing of monitor
-output by the master thread. If the table name ends in "innodb_mem_validate",
-InnoDB will try to invoke mem_validate(). On failure the transaction will
-be rolled back.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+Creates a table for MySQL. On failure the transaction will be rolled back
+and the 'table' object will be freed.
+@return error code or DB_SUCCESS */
 dberr_t
 row_create_table_for_mysql(
 /*=======================*/
 	dict_table_t*	table,	/*!< in, own: table definition
 				(will be freed, or on DB_SUCCESS
 				added to the data dictionary cache) */
+	const char*	compression,
+				/*!< in: compression algorithm to use,
+				can be NULL */
 	trx_t*		trx,	/*!< in/out: transaction */
 	bool		commit,	/*!< in: if true, commit the transaction */
 	fil_encryption_t mode,	/*!< in: encryption mode */
 	ulint		key_id)	/*!< in: encryption key_id */
-	__attribute__((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
+
 /*********************************************************************//**
 Does an index creation operation for MySQL. TODO: currently failure
 to create an index results in dropping the whole table! This is no problem
 currently as all indexes must be created at the same time as the table.
-@return	error number or DB_SUCCESS */
-UNIV_INTERN
+@return error number or DB_SUCCESS */
 dberr_t
 row_create_index_for_mysql(
 /*=======================*/
 	dict_index_t*	index,		/*!< in, own: index definition
 					(will be freed) */
 	trx_t*		trx,		/*!< in: transaction handle */
-	const ulint*	field_lengths)	/*!< in: if not NULL, must contain
+	const ulint*	field_lengths,	/*!< in: if not NULL, must contain
 					dict_index_get_n_fields(index)
 					actual field lengths for the
 					index columns, which are
 					then checked for not being too
 					large. */
-	MY_ATTRIBUTE((nonnull(1,2), warn_unused_result));
+	dict_table_t*	handler)	/* ! in/out: table handler. */
+	MY_ATTRIBUTE((warn_unused_result));
 /*********************************************************************//**
 Scans a table create SQL string and adds to the data dictionary
 the foreign key constraints declared in the string. This function
@@ -417,46 +417,50 @@ should be called after the indexes for a table have been created.
 Each foreign key constraint must be accompanied with indexes in
 bot participating tables. The indexes are allowed to contain more
 fields than mentioned in the constraint.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+
+@param[in]	trx		transaction
+@param[in]	sql_string	table create statement where
+				foreign keys are declared like:
+				FOREIGN KEY (a, b) REFERENCES table2(c, d),
+				table2 can be written also with the database
+				name before it: test.table2; the default
+				database id the database of parameter name
+@param[in]	sql_length	length of sql_string
+@param[in]	name		table full name in normalized form
+@param[in]	is_temp_table	true if table is temporary
+@param[in,out]	handler		table handler if table is intrinsic
+@param[in]	reject_fks	if TRUE, fail with error code
+				DB_CANNOT_ADD_CONSTRAINT if any
+				foreign keys are found.
+@return error code or DB_SUCCESS */
 dberr_t
 row_table_add_foreign_constraints(
-/*==============================*/
-	trx_t*		trx,		/*!< in: transaction */
-	const char*	sql_string,	/*!< in: table create statement where
-					foreign keys are declared like:
-				FOREIGN KEY (a, b) REFERENCES table2(c, d),
-					table2 can be written also with the
-					database name before it: test.table2 */
-	size_t		sql_length,	/*!< in: length of sql_string */
-	const char*	name,		/*!< in: table full name in the
-					normalized form
-					database_name/table_name */
-	ibool		reject_fks)	/*!< in: if TRUE, fail with error
-					code DB_CANNOT_ADD_CONSTRAINT if
-					any foreign keys are found. */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	trx_t*			trx,
+	const char*		sql_string,
+	size_t			sql_length,
+	const char*		name,
+	ibool			reject_fks)
+	MY_ATTRIBUTE((warn_unused_result));
+
 /*********************************************************************//**
 The master thread in srv0srv.cc calls this regularly to drop tables which
 we must drop in background after queries to them have ended. Such lazy
 dropping of tables is needed in ALTER TABLE on Unix.
-@return	how many tables dropped + remaining tables in list */
-UNIV_INTERN
+@return how many tables dropped + remaining tables in list */
 ulint
 row_drop_tables_for_mysql_in_background(void);
 /*=========================================*/
 /*********************************************************************//**
 Get the background drop list length. NOTE: the caller must own the kernel
 mutex!
-@return	how many tables in list */
-UNIV_INTERN
+@return how many tables in list */
 ulint
 row_get_background_drop_list_len_low(void);
 /*======================================*/
+
 /*********************************************************************//**
 Sets an exclusive lock on a table.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
 dberr_t
 row_mysql_lock_table(
 /*=================*/
@@ -468,8 +472,7 @@ row_mysql_lock_table(
 
 /*********************************************************************//**
 Truncates a table for MySQL.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
 dberr_t
 row_truncate_table_for_mysql(
 /*=========================*/
@@ -477,14 +480,10 @@ row_truncate_table_for_mysql(
 	trx_t*		trx)	/*!< in: transaction handle */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /*********************************************************************//**
-Drops a table for MySQL.  If the name of the dropped table ends in
-one of "innodb_monitor", "innodb_lock_monitor", "innodb_tablespace_monitor",
-"innodb_table_monitor", then this will also stop the printing of monitor
-output by the master thread.  If the data dictionary was not already locked
+Drops a table for MySQL.  If the data dictionary was not already locked
 by the transaction, the transaction will be committed.  Otherwise, the
 data dictionary will remain locked.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
 dberr_t
 row_drop_table_for_mysql(
 /*=====================*/
@@ -492,15 +491,15 @@ row_drop_table_for_mysql(
 	trx_t*		trx,	/*!< in: dictionary transaction handle */
 	bool		drop_db,/*!< in: true=dropping whole database */
 	ibool		create_failed,/*!<in: TRUE=create table failed
-				       because e.g. foreign key column
-				       type mismatch. */
-	bool		nonatomic = true)
+					because e.g. foreign key column
+					type mismatch. */
+	bool		nonatomic = true,
 				/*!< in: whether it is permitted
 				to release and reacquire dict_operation_lock */
-	MY_ATTRIBUTE((nonnull));
+	dict_table_t*	handler = NULL);
+				/*!< in/out: table handler. */
 /*********************************************************************//**
 Drop all temporary tables during crash recovery. */
-UNIV_INTERN
 void
 row_mysql_drop_temp_tables(void);
 /*============================*/
@@ -509,8 +508,7 @@ row_mysql_drop_temp_tables(void);
 Discards the tablespace of a table which stored in an .ibd file. Discarding
 means that this function deletes the .ibd file and assigns a new table id for
 the table. Also the flag table->ibd_file_missing is set TRUE.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
 dberr_t
 row_discard_tablespace_for_mysql(
 /*=============================*/
@@ -520,28 +518,28 @@ row_discard_tablespace_for_mysql(
 /*****************************************************************//**
 Imports a tablespace. The space id in the .ibd file must match the space id
 of the table in the data dictionary.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
 dberr_t
 row_import_tablespace_for_mysql(
 /*============================*/
 	dict_table_t*	table,		/*!< in/out: table */
 	row_prebuilt_t*	prebuilt)	/*!< in: prebuilt struct in MySQL */
         MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Drops a database for MySQL.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+
+/** Drop a database for MySQL.
+@param[in]	name	database name which ends at '/'
+@param[in]	trx	transaction handle
+@param[out]	found	number of dropped tables/partitions
+@return error code or DB_SUCCESS */
 dberr_t
 row_drop_database_for_mysql(
-/*========================*/
-	const char*	name,	/*!< in: database name which ends to '/' */
-	trx_t*		trx)	/*!< in: transaction handle */
-	MY_ATTRIBUTE((nonnull));
+	const char*	name,
+	trx_t*		trx,
+	ulint*		found);
+
 /*********************************************************************//**
 Renames a table for MySQL.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
 dberr_t
 row_rename_table_for_mysql(
 /*=======================*/
@@ -550,49 +548,52 @@ row_rename_table_for_mysql(
 	trx_t*		trx,		/*!< in/out: transaction */
 	bool		commit)		/*!< in: whether to commit trx */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
+
+/** Renames a partitioned table for MySQL.
+@param[in]	old_name	Old table name.
+@param[in]	new_name	New table name.
+@param[in,out]	trx		Transaction.
+@return error code or DB_SUCCESS */
+dberr_t
+row_rename_partitions_for_mysql(
+	const char*	old_name,
+	const char*	new_name,
+	trx_t*		trx)
+	MY_ATTRIBUTE((nonnull, warn_unused_result));
+
 /*********************************************************************//**
-Checks that the index contains entries in an ascending order, unique
-constraint is not broken, and calculates the number of index entries
+Scans an index for either COOUNT(*) or CHECK TABLE.
+If CHECK TABLE; Checks that the index contains entries in an ascending order,
+unique constraint is not broken, and calculates the number of index entries
 in the read view of the current transaction.
-@return true if ok */
-UNIV_INTERN
-bool
-row_check_index_for_mysql(
-/*======================*/
+@return DB_SUCCESS or other error */
+dberr_t
+row_scan_index_for_mysql(
+/*=====================*/
 	row_prebuilt_t*		prebuilt,	/*!< in: prebuilt struct
 						in MySQL handle */
 	const dict_index_t*	index,		/*!< in: index */
+	bool			check_keys,	/*!< in: true=check for mis-
+						ordered or duplicate records,
+						false=count the rows only */
 	ulint*			n_rows)		/*!< out: number of entries
 						seen in the consistent read */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Determines if a table is a magic monitor table.
-@return	true if monitor table */
-UNIV_INTERN
-bool
-row_is_magic_monitor_table(
-/*=======================*/
-	const char*	table_name)	/*!< in: name of the table, in the
-					form database/table_name */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /*********************************************************************//**
 Initialize this module */
-UNIV_INTERN
 void
 row_mysql_init(void);
 /*================*/
 
 /*********************************************************************//**
 Close this module */
-UNIV_INTERN
 void
 row_mysql_close(void);
 /*=================*/
 
 /*********************************************************************//**
 Reassigns the table identifier of a table.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
 dberr_t
 row_mysql_table_id_reassign(
 /*========================*/
@@ -655,6 +656,7 @@ struct mysql_row_templ_t {
 	ulint	is_unsigned;		/*!< if a column type is an integer
 					type and this field is != 0, then
 					it is an unsigned integer type */
+	ulint	is_virtual;		/*!< if a column is a virtual column */
 };
 
 #define MYSQL_FETCH_CACHE_SIZE		8
@@ -680,10 +682,6 @@ struct row_prebuilt_t {
 					an SQL statement: we may have to set
 					an intention lock on the table,
 					create a consistent read view etc. */
-	unsigned	mysql_has_locked:1;/*!< this is set TRUE when MySQL
-					calls external_lock on this handle
-					with a lock flag, and set FALSE when
-					with the F_UNLOCK flag */
 	unsigned	clust_index_was_generated:1;
 					/*!< if the user did not define a
 					primary key in MySQL, then Innobase
@@ -723,11 +721,18 @@ struct row_prebuilt_t {
 					is set but we later optimize out the
 					clustered index lookup */
 	unsigned	templ_contains_blob:1;/*!< TRUE if the template contains
-					a column with DATA_BLOB ==
-					get_innobase_type_from_mysql_type();
+					a column with DATA_LARGE_MTYPE(
+					get_innobase_type_from_mysql_type())
+					is TRUE;
 					not to be confused with InnoDB
 					externally stored columns
 					(VARCHAR can be off-page too) */
+	unsigned	templ_contains_fixed_point:1;/*!< TRUE if the
+					template contains a column with
+					DATA_POINT. Since InnoDB regards
+					DATA_POINT as non-BLOB type, the
+					templ_contains_blob can't tell us
+					if there is DATA_POINT */
 	mysql_row_templ_t* mysql_template;/*!< template used to transform
 					rows fast between MySQL and Innobase
 					formats; memory for this template
@@ -761,9 +766,9 @@ struct row_prebuilt_t {
 					trx_id or n_indexes mismatch. */
 	que_fork_t*	upd_graph;	/*!< Innobase SQL query graph used
 					in updates or deletes */
-	btr_pcur_t	pcur;		/*!< persistent cursor used in selects
+	btr_pcur_t*	pcur;		/*!< persistent cursor used in selects
 					and updates */
-	btr_pcur_t	clust_pcur;	/*!< persistent cursor used in
+	btr_pcur_t*	clust_pcur;	/*!< persistent cursor used in
 					some selects and updates */
 	que_fork_t*	sel_graph;	/*!< dummy query graph used in
 					selects */
@@ -856,6 +861,8 @@ struct row_prebuilt_t {
 	mem_heap_t*	old_vers_heap;	/*!< memory heap where a previous
 					version is built in consistent read */
 	bool		in_fts_query;	/*!< Whether we are in a FTS query */
+	bool		fts_doc_id_in_read_set; /*!< true if table has externally
+					defined FTS_DOC_ID coulmn. */
 	/*----------------------*/
 	ulonglong	autoinc_last_value;
 					/*!< last value of AUTO-INC interval */
@@ -879,12 +886,22 @@ struct row_prebuilt_t {
 	ulint		idx_cond_n_cols;/*!< Number of fields in idx_cond_cols.
 					0 if and only if idx_cond == NULL. */
 	/*----------------------*/
-	ulint		magic_n2;	/*!< this should be the same as
-					magic_n */
-	/*----------------------*/
 	unsigned	innodb_api:1;	/*!< whether this is a InnoDB API
 					query */
 	const rec_t*	innodb_api_rec;	/*!< InnoDB API search result */
+	/*----------------------*/
+
+	/*----------------------*/
+	rtr_info_t*	rtr_info;	/*!< R-tree Search Info */
+	/*----------------------*/
+
+	ulint		magic_n2;	/*!< this should be the same as
+					magic_n */
+
+	bool		ins_sel_stmt;	/*!< if true then ins_sel_statement. */
+
+	innodb_session_t*
+			session;	/*!< InnoDB session handler. */
 	byte*		srch_key_val1;  /*!< buffer used in converting
 					search key values from MySQL format
 					to InnoDB format.*/
@@ -892,7 +909,17 @@ struct row_prebuilt_t {
 					search key values from MySQL format
 					to InnoDB format.*/
 	uint		srch_key_val_len; /*!< Size of search key */
+	/** Disable prefetch. */
+	bool		m_no_prefetch;
 
+	/** Return materialized key for secondary index scan */
+	bool		m_read_virtual_key;
+
+	/** The MySQL table object */
+	TABLE*		m_mysql_table;
+
+	/** limit value to avoid fts result overflow */
+	ulonglong	m_fts_limit;
 };
 
 /** Callback for row_mysql_sys_index_iterate() */
@@ -900,11 +927,52 @@ struct SysIndexCallback {
 	virtual ~SysIndexCallback() { }
 
 	/** Callback method
-	@param mtr - current mini transaction
-	@param pcur - persistent cursor. */
+	@param mtr current mini transaction
+	@param pcur persistent cursor. */
 	virtual void operator()(mtr_t* mtr, btr_pcur_t* pcur) throw() = 0;
 };
 
+/** Get the computed value by supplying the base column values.
+@param[in,out]	row		the data row
+@param[in]	col		virtual column
+@param[in]	index		index on the virtual column
+@param[in,out]	local_heap	heap memory for processing large data etc.
+@param[in,out]	heap		memory heap that copies the actual index row
+@param[in]	ifield		index field
+@param[in]	thd		MySQL thread handle
+@param[in,out]	mysql_table	mysql table object
+@param[in]	old_table	during ALTER TABLE, this is the old table
+				or NULL.
+@param[in]	parent_update	update vector for the parent row
+@param[in]	foreign		foreign key information
+@return the field filled with computed value */
+dfield_t*
+innobase_get_computed_value(
+	const dtuple_t*		row,
+	const dict_v_col_t*	col,
+	const dict_index_t*	index,
+	mem_heap_t**		local_heap,
+	mem_heap_t*		heap,
+	const dict_field_t*	ifield,
+	THD*			thd,
+	TABLE*			mysql_table,
+	const dict_table_t*	old_table,
+	upd_t*			parent_update,
+	dict_foreign_t*		foreign);
+
+/** Get the computed value by supplying the base column values.
+@param[in,out]	table	the table whose virtual column template to be built */
+void
+innobase_init_vc_templ(
+	dict_table_t*	table);
+
+/** Change dbname and table name in table->vc_templ.
+@param[in,out]	table	the table whose virtual column template
+dbname and tbname to be renamed. */
+void
+innobase_rename_vc_templ(
+	dict_table_t*	table);
+
 #define ROW_PREBUILT_FETCH_MAGIC_N	465765687
 
 #define ROW_MYSQL_WHOLE_ROW	0
@@ -926,4 +994,10 @@ struct SysIndexCallback {
 #include "row0mysql.ic"
 #endif
 
+#ifdef UNIV_DEBUG
+/** Wait for the background drop list to become empty. */
+void
+row_wait_for_background_drop_list_empty();
+#endif /* UNIV_DEBUG */
+
 #endif /* row0mysql.h */
diff --git a/storage/innobase/include/row0purge.h b/storage/innobase/include/row0purge.h
index 5df899bc399..32a989833bc 100644
--- a/storage/innobase/include/row0purge.h
+++ b/storage/innobase/include/row0purge.h
@@ -34,20 +34,18 @@ Created 3/14/1997 Heikki Tuuri
 #include "trx0types.h"
 #include "que0types.h"
 #include "row0types.h"
-#include "row0purge.h"
 #include "ut0vec.h"
 
-/********************************************************************//**
-Creates a purge node to a query graph.
-@return	own: purge node */
-UNIV_INTERN
+/** Create a purge node to a query graph.
+@param[in]	parent	parent node, i.e., a thr node
+@param[in]	heap	memory heap where created
+@return own: purge node */
 purge_node_t*
 row_purge_node_create(
-/*==================*/
-	que_thr_t*	parent,		/*!< in: parent node, i.e., a
-					thr node */
-	mem_heap_t*	heap)		/*!< in: memory heap where created */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	que_thr_t*	parent,
+	mem_heap_t*	heap)
+	MY_ATTRIBUTE((warn_unused_result));
+
 /***********************************************************//**
 Determines if it is possible to remove a secondary index entry.
 Removal is possible if the secondary index entry does not refer to any
@@ -62,8 +60,7 @@ inserts a record that the secondary index entry would refer to.
 However, in that case, the user transaction would also re-insert the
 secondary index entry after purge has removed it and released the leaf
 page latch.
-@return	true if the secondary index record can be purged */
-UNIV_INTERN
+@return true if the secondary index record can be purged */
 bool
 row_purge_poss_sec(
 /*===============*/
@@ -74,8 +71,7 @@ row_purge_poss_sec(
 /***************************************************************
 Does the purge operation for a single undo log record. This is a high-level
 function used in an SQL execution graph.
-@return	query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
 que_thr_t*
 row_purge_step(
 /*===========*/
@@ -91,9 +87,9 @@ struct purge_node_t{
 	roll_ptr_t	roll_ptr;/* roll pointer to undo log record */
 	ib_vector_t*    undo_recs;/*!< Undo recs to purge */
 
-	undo_no_t	undo_no;/* undo number of the record */
+	undo_no_t	undo_no;/*!< undo number of the record */
 
-	ulint		rec_type;/* undo log record type: TRX_UNDO_INSERT_REC,
+	ulint		rec_type;/*!< undo log record type: TRX_UNDO_INSERT_REC,
 				... */
 	dict_table_t*	table;	/*!< table where purge is done */
 
@@ -118,6 +114,7 @@ struct purge_node_t{
 	btr_pcur_t	pcur;	/*!< persistent cursor used in searching the
 				clustered index record */
 	ibool		done;	/* Debug flag */
+	trx_id_t	trx_id;	/*!< trx id for this purging record */
 
 #ifdef UNIV_DEBUG
 	/***********************************************************//**
diff --git a/storage/innobase/include/row0quiesce.h b/storage/innobase/include/row0quiesce.h
index 35d8184d33c..ae14b3c63c1 100644
--- a/storage/innobase/include/row0quiesce.h
+++ b/storage/innobase/include/row0quiesce.h
@@ -37,7 +37,6 @@ struct trx_t;
 
 /*********************************************************************//**
 Quiesce the tablespace that the table resides in. */
-UNIV_INTERN
 void
 row_quiesce_table_start(
 /*====================*/
@@ -48,7 +47,6 @@ row_quiesce_table_start(
 /*********************************************************************//**
 Set a table's quiesce state.
 @return DB_SUCCESS or errro code. */
-UNIV_INTERN
 dberr_t
 row_quiesce_set_state(
 /*==================*/
@@ -59,7 +57,6 @@ row_quiesce_set_state(
 
 /*********************************************************************//**
 Cleanup after table quiesce. */
-UNIV_INTERN
 void
 row_quiesce_table_complete(
 /*=======================*/
diff --git a/storage/innobase/include/row0row.h b/storage/innobase/include/row0row.h
index b04068c5a5d..93ff90d020e 100644
--- a/storage/innobase/include/row0row.h
+++ b/storage/innobase/include/row0row.h
@@ -33,14 +33,13 @@ Created 4/20/1996 Heikki Tuuri
 #include "que0types.h"
 #include "mtr0mtr.h"
 #include "rem0types.h"
-#include "read0types.h"
 #include "row0types.h"
 #include "btr0types.h"
 
 /*********************************************************************//**
 Gets the offset of the DB_TRX_ID field, in bytes relative to the origin of
 a clustered index record.
-@return	offset of DATA_TRX_ID */
+@return offset of DATA_TRX_ID */
 UNIV_INLINE
 ulint
 row_get_trx_id_offset(
@@ -50,7 +49,7 @@ row_get_trx_id_offset(
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /*********************************************************************//**
 Reads the trx id field from a clustered index record.
-@return	value of the field */
+@return value of the field */
 UNIV_INLINE
 trx_id_t
 row_get_rec_trx_id(
@@ -61,7 +60,7 @@ row_get_rec_trx_id(
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /*********************************************************************//**
 Reads the roll pointer field from a clustered index record.
-@return	value of the field */
+@return value of the field */
 UNIV_INLINE
 roll_ptr_t
 row_get_rec_roll_ptr(
@@ -70,13 +69,18 @@ row_get_rec_roll_ptr(
 	const dict_index_t*	index,	/*!< in: clustered index */
 	const ulint*		offsets)/*!< in: rec_get_offsets(rec, index) */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
+
+/* Flags for row build type. */
+#define ROW_BUILD_NORMAL	0	/*!< build index row */
+#define ROW_BUILD_FOR_PURGE	1	/*!< build row for purge. */
+#define ROW_BUILD_FOR_UNDO	2	/*!< build row for undo. */
+#define ROW_BUILD_FOR_INSERT	3	/*!< build row for insert. */
 /*****************************************************************//**
 When an insert or purge to a table is performed, this function builds
 the entry to be inserted into or purged from an index on the table.
 @return index entry which should be inserted or purged
 @retval NULL if the externally stored columns in the clustered index record
 are unavailable and ext != NULL, or row is missing some needed columns. */
-UNIV_INTERN
 dtuple_t*
 row_build_index_entry_low(
 /*======================*/
@@ -85,9 +89,12 @@ row_build_index_entry_low(
 	const row_ext_t*	ext,	/*!< in: externally stored column
 					prefixes, or NULL */
 	dict_index_t*		index,	/*!< in: index on the table */
-	mem_heap_t*		heap)	/*!< in: memory heap from which
+	mem_heap_t*		heap,	/*!< in: memory heap from which
 					the memory for the index entry
 					is allocated */
+	ulint			flag)	/*!< in: ROW_BUILD_NORMAL,
+					ROW_BUILD_FOR_PURGE
+                                        or ROW_BUILD_FOR_UNDO */
 	MY_ATTRIBUTE((warn_unused_result, nonnull(1,3,4)));
 /*****************************************************************//**
 When an insert or purge to a table is performed, this function builds
@@ -111,8 +118,7 @@ row_build_index_entry(
 /*******************************************************************//**
 An inverse function to row_build_index_entry. Builds a row from a
 record in a clustered index.
-@return	own: row built; see the NOTE below! */
-UNIV_INTERN
+@return own: row built; see the NOTE below! */
 dtuple_t*
 row_build(
 /*======*/
@@ -153,14 +159,49 @@ row_build(
 	row_ext_t**		ext,	/*!< out, own: cache of
 					externally stored column
 					prefixes, or NULL */
-	mem_heap_t*		heap)	/*!< in: memory heap from which
+	mem_heap_t*		heap);	/*!< in: memory heap from which
 					the memory needed is allocated */
-	MY_ATTRIBUTE((nonnull(2,3,9)));
+
+/** An inverse function to row_build_index_entry. Builds a row from a
+record in a clustered index, with possible indexing on ongoing
+addition of new virtual columns.
+@param[in]	type		ROW_COPY_POINTERS or ROW_COPY_DATA;
+@param[in]	index		clustered index
+@param[in]	rec		record in the clustered index
+@param[in]	offsets		rec_get_offsets(rec,index) or NULL
+@param[in]	col_table	table, to check which
+				externally stored columns
+				occur in the ordering columns
+				of an index, or NULL if
+				index->table should be
+				consulted instead
+@param[in]	add_cols	default values of added columns, or NULL
+@param[in]	add_v		new virtual columns added
+				along with new indexes
+@param[in]	col_map		mapping of old column
+				numbers to new ones, or NULL
+@param[in]	ext		cache of externally stored column
+				prefixes, or NULL
+@param[in]	heap		memory heap from which
+				the memory needed is allocated
+@return own: row built */
+dtuple_t*
+row_build_w_add_vcol(
+	ulint			type,
+	const dict_index_t*	index,
+	const rec_t*		rec,
+	const ulint*		offsets,
+	const dict_table_t*	col_table,
+	const dtuple_t*		add_cols,
+	const dict_add_v_col_t*	add_v,
+	const ulint*		col_map,
+	row_ext_t**		ext,
+	mem_heap_t*		heap);
+
 /*******************************************************************//**
 Converts an index record to a typed data tuple.
 @return index entry built; does not set info_bits, and the data fields
 in the entry will point directly to rec */
-UNIV_INTERN
 dtuple_t*
 row_rec_to_index_entry_low(
 /*=======================*/
@@ -171,12 +212,11 @@ row_rec_to_index_entry_low(
 					stored columns */
 	mem_heap_t*		heap)	/*!< in: memory heap from which
 					the memory needed is allocated */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /*******************************************************************//**
 Converts an index record to a typed data tuple. NOTE that externally
 stored (often big) fields are NOT copied to heap.
-@return	own: index entry built */
-UNIV_INTERN
+@return own: index entry built */
 dtuple_t*
 row_rec_to_index_entry(
 /*===================*/
@@ -187,12 +227,11 @@ row_rec_to_index_entry(
 					stored columns */
 	mem_heap_t*		heap)	/*!< in: memory heap from which
 					the memory needed is allocated */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /*******************************************************************//**
 Builds from a secondary index record a row reference with which we can
 search the clustered index record.
-@return	own: row reference built; see the NOTE below! */
-UNIV_INTERN
+@return own: row reference built; see the NOTE below! */
 dtuple_t*
 row_build_row_ref(
 /*==============*/
@@ -210,11 +249,10 @@ row_build_row_ref(
 				as long as the row reference is used! */
 	mem_heap_t*	heap)	/*!< in: memory heap from which the memory
 				needed is allocated */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 /*******************************************************************//**
 Builds from a secondary index record a row reference with which we can
 search the clustered index record. */
-UNIV_INTERN
 void
 row_build_row_ref_in_tuple(
 /*=======================*/
@@ -252,8 +290,7 @@ row_build_row_ref_fast(
 /***************************************************************//**
 Searches the clustered index record for a row, if we have the row
 reference.
-@return	TRUE if found */
-UNIV_INTERN
+@return TRUE if found */
 ibool
 row_search_on_row_ref(
 /*==================*/
@@ -267,8 +304,7 @@ row_search_on_row_ref(
 /*********************************************************************//**
 Fetches the clustered index record for a secondary index record. The latches
 on the secondary index record are preserved.
-@return	record or NULL, if no record found */
-UNIV_INTERN
+@return record or NULL, if no record found */
 rec_t*
 row_get_clust_rec(
 /*==============*/
@@ -294,8 +330,7 @@ enum row_search_result {
 
 /***************************************************************//**
 Searches an index record.
-@return	whether the record was found or buffered */
-UNIV_INTERN
+@return whether the record was found or buffered */
 enum row_search_result
 row_search_index_entry(
 /*===================*/
@@ -322,8 +357,7 @@ Not more than "buf_size" bytes are written to "buf".
 The result is always NUL-terminated (provided buf_size is positive) and the
 number of bytes that were written to "buf" is returned (including the
 terminating NUL).
-@return	number of bytes that were written */
-UNIV_INTERN
+@return number of bytes that were written */
 ulint
 row_raw_format(
 /*===========*/
diff --git a/storage/innobase/include/row0row.ic b/storage/innobase/include/row0row.ic
index ac62422be1f..08c0f18e95b 100644
--- a/storage/innobase/include/row0row.ic
+++ b/storage/innobase/include/row0row.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -30,7 +30,7 @@ Created 4/20/1996 Heikki Tuuri
 /*********************************************************************//**
 Gets the offset of the DB_TRX_ID field, in bytes relative to the origin of
 a clustered index record.
-@return	offset of DATA_TRX_ID */
+@return offset of DATA_TRX_ID */
 UNIV_INLINE
 ulint
 row_get_trx_id_offset(
@@ -56,7 +56,7 @@ row_get_trx_id_offset(
 
 /*********************************************************************//**
 Reads the trx id field from a clustered index record.
-@return	value of the field */
+@return value of the field */
 UNIV_INLINE
 trx_id_t
 row_get_rec_trx_id(
@@ -81,7 +81,7 @@ row_get_rec_trx_id(
 
 /*********************************************************************//**
 Reads the roll pointer field from a clustered index record.
-@return	value of the field */
+@return value of the field */
 UNIV_INLINE
 roll_ptr_t
 row_get_rec_roll_ptr(
@@ -126,7 +126,8 @@ row_build_index_entry(
 	dtuple_t*	entry;
 
 	ut_ad(dtuple_check_typed(row));
-	entry = row_build_index_entry_low(row, ext, index, heap);
+	entry = row_build_index_entry_low(row, ext, index, heap,
+					  ROW_BUILD_NORMAL);
 	ut_ad(!entry || dtuple_check_typed(entry));
 	return(entry);
 }
diff --git a/storage/innobase/include/row0sel.h b/storage/innobase/include/row0sel.h
index fd5bc755a22..3e6863208af 100644
--- a/storage/innobase/include/row0sel.h
+++ b/storage/innobase/include/row0sel.h
@@ -31,17 +31,16 @@ Created 12/19/1997 Heikki Tuuri
 #include "que0types.h"
 #include "dict0types.h"
 #include "trx0types.h"
+#include "read0types.h"
 #include "row0types.h"
 #include "que0types.h"
 #include "pars0sym.h"
 #include "btr0pcur.h"
-#include "read0read.h"
 #include "row0mysql.h"
 
 /*********************************************************************//**
 Creates a select node struct.
-@return	own: select node struct */
-UNIV_INTERN
+@return own: select node struct */
 sel_node_t*
 sel_node_create(
 /*============*/
@@ -49,7 +48,6 @@ sel_node_create(
 /*********************************************************************//**
 Frees the memory private to a select node when a query graph is freed,
 does not free the heap where the node was originally created. */
-UNIV_INTERN
 void
 sel_node_free_private(
 /*==================*/
@@ -57,14 +55,13 @@ sel_node_free_private(
 /*********************************************************************//**
 Frees a prefetch buffer for a column, including the dynamically allocated
 memory for data stored there. */
-UNIV_INTERN
 void
 sel_col_prefetch_buf_free(
 /*======================*/
 	sel_buf_t*	prefetch_buf);	/*!< in, own: prefetch buffer */
 /*********************************************************************//**
 Gets the plan node for the nth table in a join.
-@return	plan node */
+@return plan node */
 UNIV_INLINE
 plan_t*
 sel_node_get_nth_plan(
@@ -74,15 +71,14 @@ sel_node_get_nth_plan(
 /**********************************************************************//**
 Performs a select step. This is a high-level function used in SQL execution
 graphs.
-@return	query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
 que_thr_t*
 row_sel_step(
 /*=========*/
 	que_thr_t*	thr);	/*!< in: query thread */
 /**********************************************************************//**
 Performs an execution step of an open or close cursor statement node.
-@return	query thread to run next or NULL */
+@return query thread to run next or NULL */
 UNIV_INLINE
 que_thr_t*
 open_step(
@@ -90,16 +86,14 @@ open_step(
 	que_thr_t*	thr);	/*!< in: query thread */
 /**********************************************************************//**
 Performs a fetch for a cursor.
-@return	query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
 que_thr_t*
 fetch_step(
 /*=======*/
 	que_thr_t*	thr);	/*!< in: query thread */
 /****************************************************************//**
 Sample callback function for fetch that prints each row.
-@return	always returns non-NULL */
-UNIV_INTERN
+@return always returns non-NULL */
 void*
 row_fetch_print(
 /*============*/
@@ -107,19 +101,30 @@ row_fetch_print(
 	void*	user_arg);	/*!< in:  not used */
 /***********************************************************//**
 Prints a row in a select result.
-@return	query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
 que_thr_t*
 row_printf_step(
 /*============*/
 	que_thr_t*	thr);	/*!< in: query thread */
+
+/** Copy used fields from cached row.
+Copy cache record field by field, don't touch fields that
+are not covered by current key.
+@param[out]	buf		Where to copy the MySQL row.
+@param[in]	cached_rec	What to copy (in MySQL row format).
+@param[in]	prebuilt	prebuilt struct. */
+void
+row_sel_copy_cached_fields_for_mysql(
+	byte*		buf,
+	const byte*	cached_rec,
+	row_prebuilt_t*	prebuilt);
+
 /****************************************************************//**
 Converts a key value stored in MySQL format to an Innobase dtuple. The last
 field of the key value may be just a prefix of a fixed length field: hence
 the parameter key_len. But currently we do not allow search keys where the
 last field is only a prefix of the full key field len and print a warning if
 such appears. */
-UNIV_INTERN
 void
 row_sel_convert_mysql_key_to_innobase(
 /*==================================*/
@@ -139,21 +144,108 @@ row_sel_convert_mysql_key_to_innobase(
 	const byte*	key_ptr,	/*!< in: MySQL key value */
 	ulint		key_len,	/*!< in: MySQL key value length */
 	trx_t*		trx);		/*!< in: transaction */
-/********************************************************************//**
-Searches for rows in the database. This is used in the interface to
+
+
+/** Searches for rows in the database. This is used in the interface to
 MySQL. This function opens a cursor, and also implements fetch next
 and fetch prev. NOTE that if we do a search with a full key value
 from a unique index (ROW_SEL_EXACT), then we will not store the cursor
 position and fetch next or fetch prev must not be tried to the cursor!
+
+@param[out]	buf		buffer for the fetched row in MySQL format
+@param[in]	mode		search mode PAGE_CUR_L
+@param[in,out]	prebuilt	prebuilt struct for the table handler;
+				this contains the info to search_tuple,
+				index; if search tuple contains 0 field then
+				we position the cursor at start or the end of
+				index, depending on 'mode'
+@param[in]	match_mode	0 or ROW_SEL_EXACT or ROW_SEL_EXACT_PREFIX
+@param[in]	direction	0 or ROW_SEL_NEXT or ROW_SEL_PREV;
+				Note: if this is != 0, then prebuilt must has a
+				pcur with stored position! In opening of a
+				cursor 'direction' should be 0.
 @return DB_SUCCESS, DB_RECORD_NOT_FOUND, DB_END_OF_INDEX, DB_DEADLOCK,
-DB_LOCK_TABLE_FULL, or DB_TOO_BIG_RECORD */
-UNIV_INTERN
+DB_LOCK_TABLE_FULL, DB_CORRUPTION, or DB_TOO_BIG_RECORD */
+UNIV_INLINE
 dberr_t
 row_search_for_mysql(
+	byte*		buf,
+	page_cur_mode_t	mode,
+	row_prebuilt_t*	prebuilt,
+	ulint		match_mode,
+	ulint		direction)
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Searches for rows in the database using cursor.
+function is meant for temporary table that are not shared accross connection
+and so lot of complexity is reduced especially locking and transaction related.
+The cursor is an iterator over the table/index.
+
+@param[out]	buf		buffer for the fetched row in MySQL format
+@param[in]	mode		search mode PAGE_CUR_L
+@param[in,out]	prebuilt	prebuilt struct for the table handler;
+				this contains the info to search_tuple,
+				index; if search tuple contains 0 field then
+				we position the cursor at start or the end of
+				index, depending on 'mode'
+@param[in]	match_mode	0 or ROW_SEL_EXACT or ROW_SEL_EXACT_PREFIX
+@param[in]	direction	0 or ROW_SEL_NEXT or ROW_SEL_PREV;
+				Note: if this is != 0, then prebuilt must has a
+				pcur with stored position! In opening of a
+				cursor 'direction' should be 0.
+@return DB_SUCCESS or error code */
+dberr_t
+row_search_no_mvcc(
+	byte*		buf,
+	page_cur_mode_t	mode,
+	row_prebuilt_t*	prebuilt,
+	ulint		match_mode,
+	ulint		direction)
+	MY_ATTRIBUTE((warn_unused_result));
+
+/** Searches for rows in the database using cursor.
+Function is mainly used for tables that are shared accorss connection and
+so it employs technique that can help re-construct the rows that
+transaction is suppose to see.
+It also has optimization such as pre-caching the rows, using AHI, etc.
+
+@param[out]	buf		buffer for the fetched row in MySQL format
+@param[in]	mode		search mode PAGE_CUR_L
+@param[in,out]	prebuilt	prebuilt struct for the table handler;
+				this contains the info to search_tuple,
+				index; if search tuple contains 0 field then
+				we position the cursor at start or the end of
+				index, depending on 'mode'
+@param[in]	match_mode	0 or ROW_SEL_EXACT or ROW_SEL_EXACT_PREFIX
+@param[in]	direction	0 or ROW_SEL_NEXT or ROW_SEL_PREV;
+				Note: if this is != 0, then prebuilt must has a
+				pcur with stored position! In opening of a
+				cursor 'direction' should be 0.
+@param[in]	ins_sel_stmt	if true, then this statement is
+				insert .... select statement. For normal table
+				this can be detected by checking out locked
+				tables using trx->mysql_n_tables_locked > 0
+				condition. For intrinsic table
+				external_lock is not invoked and so condition
+				above will not stand valid instead this is
+				traced using alternative condition
+				at caller level.
+@return DB_SUCCESS or error code */
+dberr_t
+row_search_mvcc(
+	byte*		buf,
+	page_cur_mode_t	mode,
+	row_prebuilt_t*	prebuilt,
+	ulint		match_mode,
+	ulint		direction)
+	MY_ATTRIBUTE((warn_unused_result));
+
+/********************************************************************//**
+Count rows in a R-Tree leaf level.
+@return DB_SUCCESS if successful */
+dberr_t
+row_count_rtree_recs(
 /*=================*/
-	byte*		buf,		/*!< in/out: buffer for the fetched
-					row in the MySQL format */
-	ulint		mode,		/*!< in: search mode PAGE_CUR_L, ... */
 	row_prebuilt_t*	prebuilt,	/*!< in: prebuilt struct for the
 					table handle; this contains the info
 					of search_tuple, index; if search
@@ -161,19 +253,13 @@ row_search_for_mysql(
 					position the cursor at the start or
 					the end of the index, depending on
 					'mode' */
-	ulint		match_mode,	/*!< in: 0 or ROW_SEL_EXACT or
-					ROW_SEL_EXACT_PREFIX */
-	ulint		direction)	/*!< in: 0 or ROW_SEL_NEXT or
-					ROW_SEL_PREV; NOTE: if this is != 0,
-					then prebuilt must have a pcur
-					with stored position! In opening of a
-					cursor 'direction' should be 0. */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	ulint*		n_rows);	/*!< out: number of entries
+					seen in the consistent read */
+
 /*******************************************************************//**
 Checks if MySQL at the moment is allowed for this table to retrieve a
 consistent read result, or store it to the query cache.
-@return	TRUE if storing or retrieving from the query cache is permitted */
-UNIV_INTERN
+@return TRUE if storing or retrieving from the query cache is permitted */
 ibool
 row_search_check_if_query_cache_permitted(
 /*======================================*/
@@ -182,8 +268,7 @@ row_search_check_if_query_cache_permitted(
 					'/' char, table name */
 /*******************************************************************//**
 Read the max AUTOINC value from an index.
-@return	DB_SUCCESS if all OK else error code */
-UNIV_INTERN
+@return DB_SUCCESS if all OK else error code */
 dberr_t
 row_search_max_autoinc(
 /*===================*/
@@ -233,7 +318,7 @@ struct plan_t{
 					for each field in the search
 					tuple */
 	dtuple_t*	tuple;		/*!< search tuple */
-	ulint		mode;		/*!< search mode: PAGE_CUR_G, ... */
+	page_cur_mode_t	mode;		/*!< search mode: PAGE_CUR_G, ... */
 	ulint		n_exact_match;	/*!< number of first fields in
 					the search tuple which must be
 					exactly matched */
@@ -312,7 +397,7 @@ struct sel_node_t{
 					containing the search plan and the
 					search data structures */
 	que_node_t*	search_cond;	/*!< search condition */
-	read_view_t*	read_view;	/*!< if the query is a non-locking
+	ReadView*	read_view;	/*!< if the query is a non-locking
 					consistent read, its read view is
 					placed here, otherwise NULL */
 	ibool		consistent_read;/*!< TRUE if the select is a consistent,
@@ -402,6 +487,45 @@ enum row_sel_match_mode {
 				of a fixed length column) */
 };
 
+#ifdef UNIV_DEBUG
+/** Convert a non-SQL-NULL field from Innobase format to MySQL format. */
+# define row_sel_field_store_in_mysql_format(dest,templ,idx,field,src,len) \
+        row_sel_field_store_in_mysql_format_func(dest,templ,idx,field,src,len)
+#else /* UNIV_DEBUG */
+/** Convert a non-SQL-NULL field from Innobase format to MySQL format. */
+# define row_sel_field_store_in_mysql_format(dest,templ,idx,field,src,len) \
+        row_sel_field_store_in_mysql_format_func(dest,templ,src,len)
+#endif /* UNIV_DEBUG */
+
+/**************************************************************//**
+Stores a non-SQL-NULL field in the MySQL format. The counterpart of this
+function is row_mysql_store_col_in_innobase_format() in row0mysql.cc. */
+
+void
+row_sel_field_store_in_mysql_format_func(
+/*=====================================*/
+        byte*           dest,   /*!< in/out: buffer where to store; NOTE
+                                that BLOBs are not in themselves
+                                stored here: the caller must allocate
+                                and copy the BLOB into buffer before,
+                                and pass the pointer to the BLOB in
+                                'data' */
+        const mysql_row_templ_t* templ,
+                                /*!< in: MySQL column template.
+                                Its following fields are referenced:
+                                type, is_unsigned, mysql_col_len,
+                                mbminlen, mbmaxlen */
+#ifdef UNIV_DEBUG
+        const dict_index_t* index,
+                                /*!< in: InnoDB index */
+        ulint           field_no,
+                                /*!< in: templ->rec_field_no or
+                                templ->clust_rec_field_no or
+                                templ->icp_rec_field_no */
+#endif /* UNIV_DEBUG */
+        const byte*     data,   /*!< in: data to store */
+        ulint           len);    /*!< in: length of the data */
+
 #ifndef UNIV_NONINL
 #include "row0sel.ic"
 #endif
diff --git a/storage/innobase/include/row0sel.ic b/storage/innobase/include/row0sel.ic
index d83a3448832..a816e4440e2 100644
--- a/storage/innobase/include/row0sel.ic
+++ b/storage/innobase/include/row0sel.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -27,7 +27,7 @@ Created 12/19/1997 Heikki Tuuri
 
 /*********************************************************************//**
 Gets the plan node for the nth table in a join.
-@return	plan node */
+@return plan node */
 UNIV_INLINE
 plan_t*
 sel_node_get_nth_plan(
@@ -55,7 +55,7 @@ sel_node_reset_cursor(
 
 /**********************************************************************//**
 Performs an execution step of an open or close cursor statement node.
-@return	query thread to run next or NULL */
+@return query thread to run next or NULL */
 UNIV_INLINE
 que_thr_t*
 open_step(
@@ -103,3 +103,43 @@ open_step(
 
 	return(thr);
 }
+
+
+/** Searches for rows in the database. This is used in the interface to
+MySQL. This function opens a cursor, and also implements fetch next
+and fetch prev. NOTE that if we do a search with a full key value
+from a unique index (ROW_SEL_EXACT), then we will not store the cursor
+position and fetch next or fetch prev must not be tried to the cursor!
+
+@param[out]	buf		buffer for the fetched row in MySQL format
+@param[in]	mode		search mode PAGE_CUR_L
+@param[in,out]	prebuilt	prebuilt struct for the table handler;
+				this contains the info to search_tuple,
+				index; if search tuple contains 0 field then
+				we position the cursor at start or the end of
+				index, depending on 'mode'
+@param[in]	match_mode	0 or ROW_SEL_EXACT or ROW_SEL_EXACT_PREFIX
+@param[in]	direction	0 or ROW_SEL_NEXT or ROW_SEL_PREV;
+				Note: if this is != 0, then prebuilt must has a
+				pcur with stored position! In opening of a
+				cursor 'direction' should be 0.
+@return DB_SUCCESS, DB_RECORD_NOT_FOUND, DB_END_OF_INDEX, DB_DEADLOCK,
+DB_LOCK_TABLE_FULL, DB_CORRUPTION, or DB_TOO_BIG_RECORD */
+UNIV_INLINE
+dberr_t
+row_search_for_mysql(
+	byte*			buf,
+	page_cur_mode_t		mode,
+	row_prebuilt_t*		prebuilt,
+	ulint			match_mode,
+	ulint			direction)
+{
+	if (!dict_table_is_intrinsic(prebuilt->table)) {
+		return(row_search_mvcc(
+			buf, mode, prebuilt, match_mode, direction));
+	} else {
+		return(row_search_no_mvcc(
+			buf, mode, prebuilt, match_mode, direction));
+	}
+}
+
diff --git a/storage/innobase/include/row0trunc.h b/storage/innobase/include/row0trunc.h
new file mode 100644
index 00000000000..b6c7810d522
--- /dev/null
+++ b/storage/innobase/include/row0trunc.h
@@ -0,0 +1,433 @@
+/*****************************************************************************
+
+Copyright (c) 2013, 2015, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0trunc.h
+TRUNCATE implementation
+
+Created 2013-04-25 Krunal Bauskar
+*******************************************************/
+
+#ifndef row0trunc_h
+#define row0trunc_h
+
+#include "row0mysql.h"
+#include "dict0boot.h"
+#include "fil0fil.h"
+#include "srv0start.h"
+#include "ut0new.h"
+
+#include <vector>
+
+/** The information of TRUNCATE log record.
+This class handles the recovery stage of TRUNCATE table. */
+class truncate_t {
+
+public:
+	/**
+	Constructor
+
+	@param old_table_id	old table id assigned to table before truncate
+	@param new_table_id	new table id that will be assigned to table
+				after truncate
+	@param dir_path		directory path */
+	truncate_t(
+		table_id_t	old_table_id,
+		table_id_t	new_table_id,
+		const char*	dir_path);
+
+	/**
+	Constructor
+
+	@param log_file_name	parse the log file during recovery to populate
+				information related to table to truncate */
+	truncate_t(const char*	log_file_name);
+
+	/**
+	Consturctor
+
+	@param space_id		space in which table reisde
+	@param name		table name
+	@param tablespace_flags	tablespace flags use for recreating tablespace
+	@param log_flags	page format flag
+	@param recv_lsn		lsn of redo log record. */
+	truncate_t(
+		ulint		space_id,
+		const char*	name,
+		ulint		tablespace_flags,
+		ulint		log_flags,
+		lsn_t		recv_lsn);
+
+	/** Destructor */
+	~truncate_t();
+
+	/** The index information of MLOG_FILE_TRUNCATE redo record */
+	struct index_t {
+
+		/* Default copy constructor and destructor should be OK. */
+
+		index_t();
+
+		/**
+		Set the truncate log values for a compressed table.
+		@return DB_CORRUPTION or error code */
+		dberr_t set(const dict_index_t* index);
+
+		typedef std::vector<byte, ut_allocator<byte> >	fields_t;
+
+		/** Index id */
+		index_id_t	m_id;
+
+		/** Index type */
+		ulint		m_type;
+
+		/** Root Page Number */
+		ulint		m_root_page_no;
+
+		/** New Root Page Number.
+		Note: This field is not persisted to TRUNCATE log but used
+		during truncate table fix-up for updating SYS_XXXX tables. */
+		ulint		m_new_root_page_no;
+
+		/** Number of index fields */
+		ulint		m_n_fields;
+
+		/** DATA_TRX_ID column position. */
+		ulint		m_trx_id_pos;
+
+		/** Compressed table field meta data, encode by
+		page_zip_fields_encode. Empty for non-compressed tables.
+		Should be NUL terminated. */
+		fields_t	m_fields;
+	};
+
+	/**
+	@return the directory path, can be NULL */
+	const char* get_dir_path() const
+	{
+		return(m_dir_path);
+	}
+
+	/**
+	Register index information
+
+	@param index	index information logged as part of truncate log. */
+	void add(index_t& index)
+	{
+		m_indexes.push_back(index);
+	}
+
+	/**
+	Add table to truncate post recovery.
+
+	@param ptr	table information need to complete truncate of table. */
+	static void add(truncate_t* ptr)
+	{
+		s_tables.push_back(ptr);
+	}
+
+	/**
+	Clear registered index vector */
+	void clear()
+	{
+		m_indexes.clear();
+	}
+
+	/**
+	@return old table id of the table to truncate */
+	table_id_t old_table_id() const
+	{
+		return(m_old_table_id);
+	}
+
+	/**
+	@return new table id of the table to truncate */
+	table_id_t new_table_id() const
+	{
+		return(m_new_table_id);
+	}
+
+	/**
+	Update root page number in SYS_XXXX tables.
+
+	@param trx			transaction object
+	@param table_id			table id for which information needs to
+					be updated.
+	@param reserve_dict_mutex       if TRUE, acquire/release
+					dict_sys->mutex around call to pars_sql.
+	@param mark_index_corrupted	if true, then mark index corrupted
+	@return DB_SUCCESS or error code */
+	dberr_t update_root_page_no(
+		trx_t*		trx,
+		table_id_t	table_id,
+		ibool		reserve_dict_mutex,
+		bool		mark_index_corrupted) const;
+
+	/** Create an index for a table.
+	@param[in]	table_name		table name, for which to create
+	the index
+	@param[in]	space_id		space id where we have to
+	create the index
+	@param[in]	page_size		page size of the .ibd file
+	@param[in]	index_type		type of index to truncate
+	@param[in]	index_id		id of index to truncate
+	@param[in]	btr_redo_create_info	control info for ::btr_create()
+	@param[in,out]	mtr			mini-transaction covering the
+	create index
+	@return root page no or FIL_NULL on failure */
+	ulint create_index(
+		const char*		table_name,
+		ulint			space_id,
+		const page_size_t&	page_size,
+		ulint			index_type,
+		index_id_t      	index_id,
+		const btr_create_t&	btr_redo_create_info,
+		mtr_t*			mtr) const;
+
+	/** Create the indexes for a table
+	@param[in]	table_name	table name, for which to create the
+	indexes
+	@param[in]	space_id	space id where we have to create the
+	indexes
+	@param[in]	page_size	page size of the .ibd file
+	@param[in]	flags		tablespace flags
+	@param[in]	format_flags	page format flags
+	@return DB_SUCCESS or error code. */
+	dberr_t create_indexes(
+		const char*		table_name,
+		ulint			space_id,
+		const page_size_t&	page_size,
+		ulint			flags,
+		ulint			format_flags);
+
+	/** Check if index has been modified since TRUNCATE log snapshot
+	was recorded.
+	@param space_id	space_id where table/indexes resides.
+	@return true if modified else false */
+	bool is_index_modified_since_logged(
+		ulint		space_id,
+		ulint		root_page_no) const;
+
+	/** Drop indexes for a table.
+	@param space_id		space_id where table/indexes resides.
+	@return DB_SUCCESS or error code. */
+	void drop_indexes(ulint	space_id) const;
+
+	/**
+	Parses log record during recovery
+	@param start_ptr	buffer containing log body to parse
+	@param end_ptr		buffer end
+
+	@return DB_SUCCESS or error code */
+	dberr_t parse(
+		byte*		start_ptr,
+		const byte*	end_ptr);
+
+	/** Parse MLOG_TRUNCATE log record from REDO log file during recovery.
+	@param[in,out]	start_ptr	buffer containing log body to parse
+	@param[in]	end_ptr		buffer end
+	@param[in]	space_id	tablespace identifier
+	@return parsed upto or NULL. */
+	static byte* parse_redo_entry(
+		byte*		start_ptr,
+		const byte*	end_ptr,
+		ulint		space_id);
+
+	/**
+	Write a log record for truncating a single-table tablespace.
+
+	@param start_ptr	buffer to write log record
+	@param end_ptr		buffer end
+	@param space_id		space id
+	@param tablename	the table name in the usual
+				databasename/tablename format of InnoDB
+	@param flags		tablespace flags
+	@param format_flags	page format
+	@param lsn		lsn while logging */
+	dberr_t write(
+		byte*		start_ptr,
+		byte*		end_ptr,
+		ulint		space_id,
+		const char*	tablename,
+		ulint		flags,
+		ulint		format_flags,
+		lsn_t		lsn) const;
+
+	/**
+	@return number of indexes parsed from the truncate log record */
+	size_t indexes() const;
+
+	/**
+	Truncate a single-table tablespace. The tablespace must be cached
+	in the memory cache.
+
+	Note: This is defined in fil0fil.cc because it needs to access some
+	types that are local to that file.
+
+	@param space_id		space id
+	@param dir_path		directory path
+	@param tablename	the table name in the usual
+				databasename/tablename format of InnoDB
+	@param flags		tablespace flags
+	@param default_size	if true, truncate to default size if tablespace
+				is being newly re-initialized.
+	@return DB_SUCCESS or error */
+	static dberr_t truncate(
+		ulint		space_id,
+		const char*	dir_path,
+		const char*	tablename,
+		ulint		flags,
+		bool		default_size);
+
+	/**
+	Fix the table truncate by applying information parsed from TRUNCATE log.
+	Fix-up includes re-creating table (drop and re-create indexes) 
+	@return	error code or DB_SUCCESS */
+	static dberr_t fixup_tables_in_system_tablespace();
+
+	/**
+	Fix the table truncate by applying information parsed from TRUNCATE log.
+	Fix-up includes re-creating tablespace.
+	@return	error code or DB_SUCCESS */
+	static dberr_t fixup_tables_in_non_system_tablespace();
+
+	/**
+	Check whether a tablespace was truncated during recovery
+	@param space_id		tablespace id to check
+	@return true if the tablespace was truncated */
+	static bool is_tablespace_truncated(ulint space_id);
+
+	/** Was tablespace truncated (on crash before checkpoint).
+	If the MLOG_TRUNCATE redo-record is still available then tablespace
+	was truncated and checkpoint is yet to happen.
+	@param[in]	space_id	tablespace id to check.
+	@return true if tablespace was truncated. */
+	static bool was_tablespace_truncated(ulint space_id);
+
+	/** Get the lsn associated with space.
+	@param[in]	space_id	tablespace id to check.
+	@return associated lsn. */
+	static lsn_t get_truncated_tablespace_init_lsn(ulint space_id);
+
+private:
+	typedef std::vector<index_t, ut_allocator<index_t> >	indexes_t;
+
+	/** Space ID of tablespace */
+	ulint			m_space_id;
+
+	/** ID of table that is being truncated. */
+	table_id_t		m_old_table_id;
+
+	/** New ID that will be assigned to table on truncation. */
+	table_id_t		m_new_table_id;
+
+	/** Data dir path of tablespace */
+	char*			m_dir_path;
+
+	/** Table name */
+	char*			m_tablename;
+
+	/** Tablespace Flags */
+	ulint			m_tablespace_flags;
+
+	/** Format flags (log flags; stored in page-no field of header) */
+	ulint			m_format_flags;
+
+	/** Index meta-data */
+	indexes_t		m_indexes;
+
+	/** LSN of TRUNCATE log record. */
+	lsn_t			m_log_lsn;
+
+	/** Log file name. */
+	char*			m_log_file_name;
+
+	/** Encryption information of the table */
+	fil_encryption_t	m_encryption;
+	uint			m_key_id;
+
+	/** Vector of tables to truncate. */
+	typedef	std::vector<truncate_t*, ut_allocator<truncate_t*> >
+		tables_t;
+
+	/** Information about tables to truncate post recovery */
+	static	tables_t	s_tables;
+
+	/** Information about truncated table
+	This is case when truncate is complete but checkpoint hasn't. */
+	typedef std::map<ulint, lsn_t>	truncated_tables_t;
+	static truncated_tables_t	s_truncated_tables;
+
+public:
+	/** If true then fix-up of table is active and so while creating
+	index instead of grabbing information from dict_index_t, grab it
+	from parsed truncate log record. */
+	static	bool		s_fix_up_active;
+};
+
+/**
+Parse truncate log file. */
+class TruncateLogParser {
+
+public:
+
+	/**
+	Scan and Parse truncate log files.
+
+	@param dir_path         look for log directory in following path
+	@return DB_SUCCESS or error code. */
+	static dberr_t scan_and_parse(
+		const char*	dir_path);
+
+private:
+	typedef std::vector<char*, ut_allocator<char*> >
+		trunc_log_files_t;
+
+private:
+	/**
+	Scan to find out truncate log file from the given directory path.
+
+	@param dir_path		look for log directory in following path.
+	@param log_files	cache to hold truncate log file name found.
+	@return DB_SUCCESS or error code. */
+	static dberr_t scan(
+		const char*		dir_path,
+		trunc_log_files_t&	log_files);
+
+	/**
+	Parse the log file and populate table to truncate information.
+	(Add this table to truncate information to central vector that is then
+	used by truncate fix-up routine to fix-up truncate action of the table.)
+
+	@param	log_file_name	log file to parse
+	@return DB_SUCCESS or error code. */
+	static dberr_t parse(
+		const char*		log_file_name);
+};
+
+
+/**
+Truncates a table for MySQL.
+@param table		table being truncated
+@param trx		transaction covering the truncate
+@return	error code or DB_SUCCESS */
+dberr_t
+row_truncate_table_for_mysql(dict_table_t* table, trx_t* trx);
+
+#endif /* row0trunc_h */
+
diff --git a/storage/innobase/include/row0uins.h b/storage/innobase/include/row0uins.h
index 89e334e5433..95a714d2338 100644
--- a/storage/innobase/include/row0uins.h
+++ b/storage/innobase/include/row0uins.h
@@ -40,12 +40,12 @@ the same clustered index unique key did not have any record, even delete
 marked, at the time of the insert.  InnoDB is eager in a rollback:
 if it figures out that an index record will be removed in the purge
 anyway, it will remove it in the rollback.
-@return	DB_SUCCESS */
-UNIV_INTERN
+@return DB_SUCCESS */
 dberr_t
 row_undo_ins(
 /*=========*/
-	undo_node_t*	node)	/*!< in: row undo node */
+	undo_node_t*	node,	/*!< in: row undo node */
+	que_thr_t*	thr)	/*!< in: query thread */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 #ifndef UNIV_NONINL
 #include "row0uins.ic"
diff --git a/storage/innobase/include/row0umod.h b/storage/innobase/include/row0umod.h
index 4f1d8e1f66c..a1bb42035a9 100644
--- a/storage/innobase/include/row0umod.h
+++ b/storage/innobase/include/row0umod.h
@@ -36,14 +36,13 @@ Created 2/27/1997 Heikki Tuuri
 
 /***********************************************************//**
 Undoes a modify operation on a row of a table.
-@return	DB_SUCCESS or error code */
-UNIV_INTERN
+@return DB_SUCCESS or error code */
 dberr_t
 row_undo_mod(
 /*=========*/
 	undo_node_t*	node,	/*!< in: row undo node */
 	que_thr_t*	thr)	/*!< in: query thread */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 
 #ifndef UNIV_NONINL
 #include "row0umod.ic"
diff --git a/storage/innobase/include/row0undo.h b/storage/innobase/include/row0undo.h
index 5dddfb4eae1..3d5b3574afa 100644
--- a/storage/innobase/include/row0undo.h
+++ b/storage/innobase/include/row0undo.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -38,8 +38,7 @@ Created 1/8/1997 Heikki Tuuri
 
 /********************************************************************//**
 Creates a row undo node to a query graph.
-@return	own: undo node */
-UNIV_INTERN
+@return own: undo node */
 undo_node_t*
 row_undo_node_create(
 /*=================*/
@@ -51,18 +50,17 @@ Looks for the clustered index record when node has the row reference.
 The pcur in node is used in the search. If found, stores the row to node,
 and stores the position of pcur, and detaches it. The pcur must be closed
 by the caller in any case.
-@return TRUE if found; NOTE the node->pcur must be closed by the
+@return true if found; NOTE the node->pcur must be closed by the
 caller, regardless of the return value */
-UNIV_INTERN
-ibool
+bool
 row_undo_search_clust_to_pcur(
 /*==========================*/
-	undo_node_t*	node);	/*!< in: row undo node */
+	undo_node_t*	node)	/*!< in/out: row undo node */
+	MY_ATTRIBUTE((warn_unused_result));
 /***********************************************************//**
 Undoes a row operation in a table. This is a high-level function used
 in SQL execution graphs.
-@return	query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
 que_thr_t*
 row_undo_step(
 /*==========*/
diff --git a/storage/innobase/include/row0upd.h b/storage/innobase/include/row0upd.h
index e59ec58b63c..3c1033fe419 100644
--- a/storage/innobase/include/row0upd.h
+++ b/storage/innobase/include/row0upd.h
@@ -32,6 +32,7 @@ Created 12/27/1996 Heikki Tuuri
 #include "btr0types.h"
 #include "dict0types.h"
 #include "trx0types.h"
+#include <stack>
 
 #ifndef UNIV_HOTBACKUP
 # include "btr0pcur.h"
@@ -39,9 +40,18 @@ Created 12/27/1996 Heikki Tuuri
 # include "pars0types.h"
 #endif /* !UNIV_HOTBACKUP */
 
+/** The std::deque to store cascade update nodes, that uses mem_heap_t
+as allocator. */
+typedef std::deque<upd_node_t*, mem_heap_allocator<upd_node_t*> >
+	deque_mem_heap_t;
+
+/** Double-ended queue of update nodes to be processed for cascade
+operations */
+typedef deque_mem_heap_t upd_cascade_t;
+
 /*********************************************************************//**
 Creates an update vector object.
-@return	own: update vector object */
+@return own: update vector object */
 UNIV_INLINE
 upd_t*
 upd_create(
@@ -51,7 +61,7 @@ upd_create(
 /*********************************************************************//**
 Returns the number of fields in the update vector == number of columns
 to be updated by an update vector.
-@return	number of fields */
+@return number of fields */
 UNIV_INLINE
 ulint
 upd_get_n_fields(
@@ -60,7 +70,7 @@ upd_get_n_fields(
 #ifdef UNIV_DEBUG
 /*********************************************************************//**
 Returns the nth field of an update vector.
-@return	update vector field */
+@return update vector field */
 UNIV_INLINE
 upd_field_t*
 upd_get_nth_field(
@@ -82,21 +92,32 @@ upd_field_set_field_no(
 					index */
 	dict_index_t*	index,		/*!< in: index */
 	trx_t*		trx);		/*!< in: transaction */
+
+/** set field number to a update vector field, marks this field is updated
+@param[in,out]	upd_field	update vector field
+@param[in]	field_no	virtual column sequence num
+@param[in]	index		index */
+UNIV_INLINE
+void
+upd_field_set_v_field_no(
+	upd_field_t*	upd_field,
+	ulint		field_no,
+	dict_index_t*	index);
 /*********************************************************************//**
 Returns a field of an update vector by field_no.
-@return	update vector field, or NULL */
+@return update vector field, or NULL */
 UNIV_INLINE
 const upd_field_t*
 upd_get_field_by_field_no(
 /*======================*/
 	const upd_t*	update,	/*!< in: update vector */
-	ulint		no)	/*!< in: field_no */
-	MY_ATTRIBUTE((nonnull, pure));
+	ulint		no,	/*!< in: field_no */
+	bool		is_virtual) /*!< in: if it is a virtual column */
+	MY_ATTRIBUTE((warn_unused_result));
 /*********************************************************************//**
 Writes into the redo log the values of trx id and roll ptr and enough info
 to determine their positions within a clustered index record.
-@return	new pointer to mlog */
-UNIV_INTERN
+@return new pointer to mlog */
 byte*
 row_upd_write_sys_vals_to_log(
 /*==========================*/
@@ -123,7 +144,6 @@ row_upd_rec_sys_fields(
 				  can be 0 during IMPORT */
 /*********************************************************************//**
 Sets the trx id or roll ptr field of a clustered index entry. */
-UNIV_INTERN
 void
 row_upd_index_entry_sys_field(
 /*==========================*/
@@ -136,15 +156,13 @@ row_upd_index_entry_sys_field(
 	ib_uint64_t	val);	/*!< in: value to write */
 /*********************************************************************//**
 Creates an update node for a query graph.
-@return	own: update node */
-UNIV_INTERN
+@return own: update node */
 upd_node_t*
 upd_node_create(
 /*============*/
 	mem_heap_t*	heap);	/*!< in: mem heap where created */
 /***********************************************************//**
 Writes to the redo log the new values of the fields occurring in the index. */
-UNIV_INTERN
 void
 row_upd_index_write_log(
 /*====================*/
@@ -159,7 +177,6 @@ Returns TRUE if row update changes size of some field in index or if some
 field to be updated is stored externally in rec or update.
 @return TRUE if the update changes the size of some field in index or
 the field is external in rec or update */
-UNIV_INTERN
 ibool
 row_upd_changes_field_size_or_external(
 /*===================================*/
@@ -169,7 +186,6 @@ row_upd_changes_field_size_or_external(
 /***********************************************************//**
 Returns true if row update contains disowned external fields.
 @return true if the update contains disowned external fields. */
-UNIV_INTERN
 bool
 row_upd_changes_disowned_external(
 /*==============================*/
@@ -182,7 +198,6 @@ record given. No field size changes are allowed. This function is
 usually invoked on a clustered index. The only use case for a
 secondary index is row_ins_sec_index_entry_by_modify() or its
 counterpart in ibuf_insert_to_index_page(). */
-UNIV_INTERN
 void
 row_upd_rec_in_place(
 /*=================*/
@@ -197,8 +212,7 @@ row_upd_rec_in_place(
 Builds an update vector from those fields which in a secondary index entry
 differ from a record that has the equal ordering fields. NOTE: we compare
 the fields as binary strings!
-@return	own: update vector of differing fields */
-UNIV_INTERN
+@return own: update vector of differing fields */
 upd_t*
 row_upd_build_sec_rec_difference_binary(
 /*====================================*/
@@ -208,30 +222,36 @@ row_upd_build_sec_rec_difference_binary(
 	const dtuple_t*	entry,	/*!< in: entry to insert */
 	mem_heap_t*	heap)	/*!< in: memory heap from which allocated */
 	MY_ATTRIBUTE((warn_unused_result, nonnull));
-/***************************************************************//**
-Builds an update vector from those fields, excluding the roll ptr and
+/** Builds an update vector from those fields, excluding the roll ptr and
 trx id fields, which in an index entry differ from a record that has
 the equal ordering fields. NOTE: we compare the fields as binary strings!
+@param[in]	index		clustered index
+@param[in]	entry		clustered index entry to insert
+@param[in]	rec		clustered index record
+@param[in]	offsets		rec_get_offsets(rec,index), or NULL
+@param[in]	no_sys		skip the system columns
+				DB_TRX_ID and DB_ROLL_PTR
+@param[in]	trx		transaction (for diagnostics),
+				or NULL
+@param[in]	heap		memory heap from which allocated
+@param[in,out]	mysql_table	NULL, or mysql table object when
+				user thread invokes dml
 @return own: update vector of differing fields, excluding roll ptr and
 trx id */
-UNIV_INTERN
-const upd_t*
+upd_t*
 row_upd_build_difference_binary(
-/*============================*/
-	dict_index_t*	index,	/*!< in: clustered index */
-	const dtuple_t*	entry,	/*!< in: entry to insert */
-	const rec_t*	rec,	/*!< in: clustered index record */
-	const ulint*	offsets,/*!< in: rec_get_offsets(rec,index), or NULL */
-	bool		no_sys,	/*!< in: skip the system columns
-				DB_TRX_ID and DB_ROLL_PTR */
-	trx_t*		trx,	/*!< in: transaction (for diagnostics),
-				or NULL */
-	mem_heap_t*	heap)	/*!< in: memory heap from which allocated */
+	dict_index_t*	index,
+	const dtuple_t*	entry,
+	const rec_t*	rec,
+	const ulint*	offsets,
+	bool		no_sys,
+	trx_t*		trx,
+	mem_heap_t*	heap,
+	TABLE*		mysql_table)
 	MY_ATTRIBUTE((nonnull(1,2,3,7), warn_unused_result));
 /***********************************************************//**
 Replaces the new column values stored in the update vector to the index entry
 given. */
-UNIV_INTERN
 void
 row_upd_index_replace_new_col_vals_index_pos(
 /*=========================================*/
@@ -254,7 +274,6 @@ row_upd_index_replace_new_col_vals_index_pos(
 /***********************************************************//**
 Replaces the new column values stored in the update vector to the index entry
 given. */
-UNIV_INTERN
 void
 row_upd_index_replace_new_col_vals(
 /*===============================*/
@@ -272,7 +291,6 @@ row_upd_index_replace_new_col_vals(
 	MY_ATTRIBUTE((nonnull));
 /***********************************************************//**
 Replaces the new column values stored in the update vector. */
-UNIV_INTERN
 void
 row_upd_replace(
 /*============*/
@@ -287,6 +305,23 @@ row_upd_replace(
 	const upd_t*		update,	/*!< in: an update vector built for the
 					clustered index */
 	mem_heap_t*		heap);	/*!< in: memory heap */
+/** Replaces the virtual column values stored in a dtuple with that of
+a update vector.
+@param[in,out]	row	dtuple whose column to be updated
+@param[in]	table	table
+@param[in]	update	an update vector built for the clustered index
+@param[in]	upd_new	update to new or old value
+@param[in,out]	undo_row undo row (if needs to be updated)
+@param[in]	ptr	remaining part in update undo log */
+void
+row_upd_replace_vcol(
+	dtuple_t*		row,
+	const dict_table_t*	table,
+	const upd_t*		update,
+	bool			upd_new,
+	dtuple_t*		undo_row,
+	const byte*		ptr);
+
 /***********************************************************//**
 Checks if an update vector changes an ordering field of an index record.
 
@@ -294,7 +329,6 @@ This function is fast if the update vector is short or the number of ordering
 fields in the index is small. Otherwise, this can be quadratic.
 NOTE: we compare the fields as binary strings!
 @return TRUE if update vector changes an ordering field in the index record */
-UNIV_INTERN
 ibool
 row_upd_changes_ord_field_binary_func(
 /*==================================*/
@@ -309,21 +343,22 @@ row_upd_changes_ord_field_binary_func(
 				row and the data values in update are not
 				known when this function is called, e.g., at
 				compile time */
-	const row_ext_t*ext)	/*!< NULL, or prefixes of the externally
+	const row_ext_t*ext,	/*!< NULL, or prefixes of the externally
 				stored columns in the old row */
+	ulint		flag)	/*!< in: ROW_BUILD_NORMAL,
+				ROW_BUILD_FOR_PURGE or ROW_BUILD_FOR_UNDO */
 	MY_ATTRIBUTE((nonnull(1,2), warn_unused_result));
 #ifdef UNIV_DEBUG
 # define row_upd_changes_ord_field_binary(index,update,thr,row,ext)	\
-	row_upd_changes_ord_field_binary_func(index,update,thr,row,ext)
+	row_upd_changes_ord_field_binary_func(index,update,thr,row,ext,0)
 #else /* UNIV_DEBUG */
 # define row_upd_changes_ord_field_binary(index,update,thr,row,ext)	\
-	row_upd_changes_ord_field_binary_func(index,update,row,ext)
+	row_upd_changes_ord_field_binary_func(index,update,row,ext,0)
 #endif /* UNIV_DEBUG */
 /***********************************************************//**
 Checks if an FTS indexed column is affected by an UPDATE.
 @return offset within fts_t::indexes if FTS indexed column updated else
 ULINT_UNDEFINED */
-UNIV_INTERN
 ulint
 row_upd_changes_fts_column(
 /*=======================*/
@@ -332,7 +367,6 @@ row_upd_changes_fts_column(
 /***********************************************************//**
 Checks if an FTS Doc ID column is affected by an UPDATE.
 @return whether Doc ID column is affected */
-UNIV_INTERN
 bool
 row_upd_changes_doc_id(
 /*===================*/
@@ -346,17 +380,25 @@ fields in the index is small. Otherwise, this can be quadratic.
 NOTE: we compare the fields as binary strings!
 @return TRUE if update vector may change an ordering field in an index
 record */
-UNIV_INTERN
 ibool
 row_upd_changes_some_index_ord_field_binary(
 /*========================================*/
 	const dict_table_t*	table,	/*!< in: table */
 	const upd_t*		update);/*!< in: update vector for the row */
+/** Stores to the heap the row on which the node->pcur is positioned.
+@param[in]	node		row update node
+@param[in]	thd		mysql thread handle
+@param[in,out]	mysql_table	NULL, or mysql table object when
+				user thread invokes dml */
+void
+row_upd_store_row(
+	upd_node_t*	node,
+	THD*		thd,
+	TABLE*		mysql_table);
 /***********************************************************//**
 Updates a row in a table. This is a high-level function used
 in SQL execution graphs.
-@return	query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
 que_thr_t*
 row_upd_step(
 /*=========*/
@@ -364,20 +406,18 @@ row_upd_step(
 #endif /* !UNIV_HOTBACKUP */
 /*********************************************************************//**
 Parses the log data of system field values.
-@return	log data end or NULL */
-UNIV_INTERN
+@return log data end or NULL */
 byte*
 row_upd_parse_sys_vals(
 /*===================*/
-	byte*		ptr,	/*!< in: buffer */
-	byte*		end_ptr,/*!< in: buffer end */
+	const byte*	ptr,	/*!< in: buffer */
+	const byte*	end_ptr,/*!< in: buffer end */
 	ulint*		pos,	/*!< out: TRX_ID position in record */
 	trx_id_t*	trx_id,	/*!< out: trx id */
 	roll_ptr_t*	roll_ptr);/*!< out: roll ptr */
 /*********************************************************************//**
 Updates the trx id and roll ptr field in a clustered index record in database
 recovery. */
-UNIV_INTERN
 void
 row_upd_rec_sys_fields_in_recovery(
 /*===============================*/
@@ -389,13 +429,12 @@ row_upd_rec_sys_fields_in_recovery(
 	roll_ptr_t	roll_ptr);/*!< in: roll ptr of the undo log record */
 /*********************************************************************//**
 Parses the log data written by row_upd_index_write_log.
-@return	log data end or NULL */
-UNIV_INTERN
+@return log data end or NULL */
 byte*
 row_upd_index_parse(
 /*================*/
-	byte*		ptr,	/*!< in: buffer */
-	byte*		end_ptr,/*!< in: buffer end */
+	const byte*	ptr,	/*!< in: buffer */
+	const byte*	end_ptr,/*!< in: buffer end */
 	mem_heap_t*	heap,	/*!< in: memory heap where update vector is
 				built */
 	upd_t**		update_out);/*!< out: update vector */
@@ -407,7 +446,9 @@ struct upd_field_t{
 					the clustered index, but in updating
 					a secondary index record in btr0cur.cc
 					this is the position in the secondary
-					index */
+					index. If this field is a virtual
+					column, then field_no represents
+					the nth virtual	column in the table */
 #ifndef UNIV_HOTBACKUP
 	unsigned	orig_len:16;	/*!< original length of the locally
 					stored part of an externally stored
@@ -418,14 +459,61 @@ struct upd_field_t{
 					query graph */
 #endif /* !UNIV_HOTBACKUP */
 	dfield_t	new_val;	/*!< new value for the column */
+	dfield_t*	old_v_val;	/*!< old value for the virtual column */
 };
 
+
+/* check whether an update field is on virtual column */
+#define upd_fld_is_virtual_col(upd_fld)			\
+	(((upd_fld)->new_val.type.prtype & DATA_VIRTUAL) == DATA_VIRTUAL)
+
+/* set DATA_VIRTUAL bit on update field to show it is a virtual column */
+#define upd_fld_set_virtual_col(upd_fld)			\
+	((upd_fld)->new_val.type.prtype |= DATA_VIRTUAL)
+
 /* Update vector structure */
 struct upd_t{
+	mem_heap_t*	heap;		/*!< heap from which memory allocated */
 	ulint		info_bits;	/*!< new value of info bits to record;
 					default is 0 */
+	dtuple_t*	old_vrow;	/*!< pointer to old row, used for
+					virtual column update now */
 	ulint		n_fields;	/*!< number of update fields */
 	upd_field_t*	fields;		/*!< array of update fields */
+
+	/** Append an update field to the end of array
+	@param[in]	field	an update field */
+	void append(const upd_field_t& field)
+	{
+		fields[n_fields++] = field;
+	}
+
+	/** Determine if the given field_no is modified.
+	@return true if modified, false otherwise.  */
+	bool is_modified(const ulint field_no) const
+	{
+		for (ulint i = 0; i < n_fields; ++i) {
+			if (field_no == fields[i].field_no) {
+				return(true);
+			}
+		}
+		return(false);
+	}
+
+#ifdef UNIV_DEBUG
+        bool validate() const
+        {
+                for (ulint i = 0; i < n_fields; ++i) {
+                        dfield_t* field = &fields[i].new_val;
+                        if (dfield_is_ext(field)) {
+				ut_ad(dfield_get_len(field)
+				      >= BTR_EXTERN_FIELD_REF_SIZE);
+                        }
+                }
+                return(true);
+        }
+#endif // UNIV_DEBUG
+
 };
 
 #ifndef UNIV_HOTBACKUP
@@ -444,11 +532,38 @@ struct upd_node_t{
 	dict_foreign_t*	foreign;/* NULL or pointer to a foreign key
 				constraint if this update node is used in
 				doing an ON DELETE or ON UPDATE operation */
-	upd_node_t*	cascade_node;/* NULL or an update node template which
-				is used to implement ON DELETE/UPDATE CASCADE
-				or ... SET NULL for foreign keys */
-	mem_heap_t*	cascade_heap;/* NULL or a mem heap where the cascade
-				node is created */
+
+	bool		cascade_top;
+				/*!< true if top level in cascade */
+
+	upd_cascade_t*	cascade_upd_nodes;
+				/*!< Queue of update nodes to handle the
+				cascade of update and delete operations in an
+				iterative manner.  Their parent/child
+				relations are properly maintained. All update
+				nodes point to this same queue.  All these
+				nodes are allocated in heap pointed to by
+				upd_node_t::cascade_heap. */
+
+	upd_cascade_t*	new_upd_nodes;
+				/*!< Intermediate list of update nodes in a
+				cascading update/delete operation.  After
+				processing one update node, this will be
+				concatenated to cascade_upd_nodes.  This extra
+				list is needed so that retry because of
+				DB_LOCK_WAIT works corrrectly. */
+
+	upd_cascade_t*	processed_cascades;
+				/*!< List of processed update nodes in a
+				cascading update/delete operation.  All the
+				cascade nodes are stored here, so that memory
+				can be freed. */
+
+	mem_heap_t*	cascade_heap;
+				/*!< NULL or a mem heap where cascade_upd_nodes
+				are created.  This heap is owned by the node
+				that has cascade_top=true. */
+
 	sel_node_t*	select;	/*!< query graph subtree implementing a base
 				table cursor: the rows returned will be
 				updated */
@@ -495,7 +610,25 @@ struct upd_node_t{
 	sym_node_t*	table_sym;/* table node in symbol table */
 	que_node_t*	col_assign_list;
 				/* column assignment list */
+
+	doc_id_t	fts_doc_id;
+				/* The FTS doc id of the row that is now
+				pointed to by the pcur. */
+
+	doc_id_t	fts_next_doc_id;
+				/* The new fts doc id that will be used
+				in update operation */
+
 	ulint		magic_n;
+
+#ifndef DBUG_OFF
+	/** Print information about this object into the trace log file. */
+	void dbug_trace();
+
+	/** Ensure that the member cascade_upd_nodes has only one update node
+	for each of the tables.  This is useful for testing purposes. */
+	void check_cascade_only_once();
+#endif /* !DBUG_OFF */
 };
 
 #define	UPD_NODE_MAGIC_N	1579975
@@ -511,11 +644,6 @@ struct upd_node_t{
 #define UPD_NODE_INSERT_CLUSTERED  3	/* clustered index record should be
 					inserted, old record is already delete
 					marked */
-#define UPD_NODE_INSERT_BLOB	   4	/* clustered index record should be
-					inserted, old record is already
-					delete-marked; non-updated BLOBs
-					should be inherited by the new record
-					and disowned by the old record */
 #define UPD_NODE_UPDATE_ALL_SEC	   5	/* an ordering field of the clustered
 					index record was changed, or this is
 					a delete operation: should update
diff --git a/storage/innobase/include/row0upd.ic b/storage/innobase/include/row0upd.ic
index 618a77fa4bf..ab1dc5c7076 100644
--- a/storage/innobase/include/row0upd.ic
+++ b/storage/innobase/include/row0upd.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -34,7 +34,7 @@ Created 12/27/1996 Heikki Tuuri
 
 /*********************************************************************//**
 Creates an update vector object.
-@return	own: update vector object */
+@return own: update vector object */
 UNIV_INLINE
 upd_t*
 upd_create(
@@ -44,11 +44,12 @@ upd_create(
 {
 	upd_t*	update;
 
-	update = (upd_t*) mem_heap_zalloc(heap, sizeof(upd_t));
+	update = static_cast<upd_t*>(mem_heap_zalloc(
+			heap, sizeof(upd_t) + sizeof(upd_field_t) * n));
 
 	update->n_fields = n;
-	update->fields = (upd_field_t*)
-		mem_heap_zalloc(heap, sizeof(upd_field_t) * n);
+	update->fields = reinterpret_cast<upd_field_t*>(&update[1]);
+	update->heap = heap;
 
 	return(update);
 }
@@ -56,7 +57,7 @@ upd_create(
 /*********************************************************************//**
 Returns the number of fields in the update vector == number of columns
 to be updated by an update vector.
-@return	number of fields */
+@return number of fields */
 UNIV_INLINE
 ulint
 upd_get_n_fields(
@@ -71,7 +72,7 @@ upd_get_n_fields(
 #ifdef UNIV_DEBUG
 /*********************************************************************//**
 Returns the nth field of an update vector.
-@return	update vector field */
+@return update vector field */
 UNIV_INLINE
 upd_field_t*
 upd_get_nth_field(
@@ -103,13 +104,12 @@ upd_field_set_field_no(
 	upd_field->orig_len = 0;
 
 	if (field_no >= dict_index_get_n_fields(index)) {
-		fprintf(stderr,
-			"InnoDB: Error: trying to access field %lu in ",
-			(ulong) field_no);
-		dict_index_name_print(stderr, trx, index);
-		fprintf(stderr, "\n"
-			"InnoDB: but index only has %lu fields\n",
-			(ulong) dict_index_get_n_fields(index));
+		ib::error()
+			<< " trying to access field " << field_no
+			<< " in " << index->name
+			<< " of table " << index->table->name
+			<< " which contains only " << index->n_fields
+			<< " fields";
 		ut_ad(0);
 	}
 
@@ -117,20 +117,55 @@ upd_field_set_field_no(
 			   dfield_get_type(&upd_field->new_val));
 }
 
+/** set field number to a update vector field, marks this field is updated.
+@param[in,out]	upd_field	update vector field
+@param[in]	field_no	virtual column sequence num
+@param[in]	index		index */
+UNIV_INLINE
+void
+upd_field_set_v_field_no(
+	upd_field_t*	upd_field,
+	ulint		field_no,
+	dict_index_t*	index)
+{
+	upd_field->field_no = field_no;
+	upd_field->orig_len = 0;
+
+	if (field_no >= dict_table_get_n_v_cols(index->table)) {
+		ib::error()
+			<< " trying to access virtual field " << field_no
+			<< " in " << index->name
+			<< " of table " << index->table->name
+			<< " which contains only " << index->table->n_v_cols
+			<< " virutal columns";
+		ut_ad(0);
+	}
+
+	dict_col_copy_type(&dict_table_get_nth_v_col(
+				index->table, field_no)->m_col,
+			   dfield_get_type(&upd_field->new_val));
+}
+
 /*********************************************************************//**
 Returns a field of an update vector by field_no.
-@return	update vector field, or NULL */
+@return update vector field, or NULL */
 UNIV_INLINE
 const upd_field_t*
 upd_get_field_by_field_no(
 /*======================*/
 	const upd_t*	update,	/*!< in: update vector */
-	ulint		no)	/*!< in: field_no */
+	ulint		no,	/*!< in: field_no */
+	bool		is_virtual) /*!< in: if it is virtual column */
 {
 	ulint	i;
 	for (i = 0; i < upd_get_n_fields(update); i++) {
 		const upd_field_t*	uf = upd_get_nth_field(update, i);
 
+		/* matches only if the field matches that of is_virtual */
+		if ((!is_virtual) != (!upd_fld_is_virtual_col(uf))) {
+			continue;
+		}
+
 		if (uf->field_no == no) {
 
 			return(uf);
diff --git a/storage/innobase/include/row0vers.h b/storage/innobase/include/row0vers.h
index 7b850215701..489db305fac 100644
--- a/storage/innobase/include/row0vers.h
+++ b/storage/innobase/include/row0vers.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -33,7 +33,10 @@ Created 2/6/1997 Heikki Tuuri
 #include "que0types.h"
 #include "rem0types.h"
 #include "mtr0mtr.h"
-#include "read0types.h"
+#include "dict0mem.h"
+
+// Forward declaration
+class ReadView;
 
 /*****************************************************************//**
 Finds out if an active transaction has inserted or modified a secondary
@@ -42,33 +45,36 @@ index record.
 NOTE that this function can return false positives but never false
 negatives. The caller must confirm all positive results by calling
 trx_is_active() while holding lock_sys->mutex. */
-UNIV_INTERN
-trx_id_t
+trx_t*
 row_vers_impl_x_locked(
 /*===================*/
 	const rec_t*	rec,	/*!< in: record in a secondary index */
 	dict_index_t*	index,	/*!< in: the secondary index */
 	const ulint*	offsets);/*!< in: rec_get_offsets(rec, index) */
+
 /*****************************************************************//**
 Finds out if we must preserve a delete marked earlier version of a clustered
 index record, because it is >= the purge view.
-@return	TRUE if earlier version should be preserved */
-UNIV_INTERN
+@param[in]	trx_id		transaction id in the version
+@param[in]	name		table name
+@param[in,out]	mtr		mini transaction  holding the latch on the
+				clustered index record; it will also hold
+				 the latch on purge_view
+@return TRUE if earlier version should be preserved */
 ibool
 row_vers_must_preserve_del_marked(
 /*==============================*/
-	trx_id_t	trx_id,	/*!< in: transaction id in the version */
-	mtr_t*		mtr);	/*!< in: mtr holding the latch on the
-				clustered index record; it will also
-				hold the latch on purge_view */
+	trx_id_t		trx_id,
+	const table_name_t&	name,
+	mtr_t*			mtr);
+
 /*****************************************************************//**
 Finds out if a version of the record, where the version >= the current
 purge view, should have ientry as its secondary index entry. We check
 if there is any not delete marked version of the record where the trx
 id >= purge view, and the secondary index entry == ientry; exactly in
 this case we return TRUE.
-@return	TRUE if earlier version should have */
-UNIV_INTERN
+@return TRUE if earlier version should have */
 ibool
 row_vers_old_has_index_entry(
 /*=========================*/
@@ -80,13 +86,15 @@ row_vers_old_has_index_entry(
 	mtr_t*		mtr,	/*!< in: mtr holding the latch on rec; it will
 				also hold the latch on purge_view */
 	dict_index_t*	index,	/*!< in: the secondary index */
-	const dtuple_t*	ientry);/*!< in: the secondary index entry */
+	const dtuple_t*	ientry,	/*!< in: the secondary index entry */
+	roll_ptr_t	roll_ptr,/*!< in: roll_ptr for the purge record */
+	trx_id_t	trx_id);/*!< in: transaction ID on the purging record */
+
 /*****************************************************************//**
 Constructs the version of a clustered index record which a consistent
 read should see. We assume that the trx id stored in rec is such that
 the consistent read should not see rec in its present version.
-@return	DB_SUCCESS or DB_MISSING_HISTORY */
-UNIV_INTERN
+@return DB_SUCCESS or DB_MISSING_HISTORY */
 dberr_t
 row_vers_build_for_consistent_read(
 /*===============================*/
@@ -99,23 +107,22 @@ row_vers_build_for_consistent_read(
 	dict_index_t*	index,	/*!< in: the clustered index */
 	ulint**		offsets,/*!< in/out: offsets returned by
 				rec_get_offsets(rec, index) */
-	read_view_t*	view,	/*!< in: the consistent read view */
+	ReadView*	view,	/*!< in: the consistent read view */
 	mem_heap_t**	offset_heap,/*!< in/out: memory heap from which
 				the offsets are allocated */
 	mem_heap_t*	in_heap,/*!< in: memory heap from which the memory for
 				*old_vers is allocated; memory for possible
 				intermediate versions is allocated and freed
 				locally within the function */
-	rec_t**		old_vers)/*!< out, own: old version, or NULL
+	rec_t**		old_vers,/*!< out, own: old version, or NULL
 				if the history is missing or the record
 				does not exist in the view, that is,
 				it was freshly inserted afterwards */
-	MY_ATTRIBUTE((nonnull(1,2,3,4,5,6,7)));
+	const dtuple_t**vrow);	/*!< out: reports virtual column info if any */
 
 /*****************************************************************//**
 Constructs the last committed version of a clustered index record,
 which should be seen by a semi-consistent read. */
-UNIV_INTERN
 void
 row_vers_build_for_semi_consistent_read(
 /*====================================*/
@@ -133,10 +140,11 @@ row_vers_build_for_semi_consistent_read(
 				*old_vers is allocated; memory for possible
 				intermediate versions is allocated and freed
 				locally within the function */
-	const rec_t**	old_vers)/*!< out: rec, old version, or NULL if the
+	const rec_t**	old_vers,/*!< out: rec, old version, or NULL if the
 				record does not exist in the view, that is,
 				it was freshly inserted afterwards */
-	MY_ATTRIBUTE((nonnull(1,2,3,4,5)));
+	const dtuple_t**vrow);	/*!< out: holds virtual column info if any
+				is updated in the view */
 
 
 #ifndef UNIV_NONINL
diff --git a/storage/innobase/include/sess0sess.h b/storage/innobase/include/sess0sess.h
new file mode 100644
index 00000000000..3778a45d64f
--- /dev/null
+++ b/storage/innobase/include/sess0sess.h
@@ -0,0 +1,146 @@
+/*****************************************************************************
+
+Copyright (c) 2013, 2014, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/sess0sess.h
+InnoDB session state tracker.
+Multi file, shared, system tablespace implementation.
+
+Created 2014-04-30 by Krunal Bauskar
+*******************************************************/
+
+#ifndef sess0sess_h
+#define sess0sess_h
+
+#include "univ.i"
+#include "dict0mem.h"
+#include "ut0new.h"
+
+#include <map>
+
+class dict_intrinsic_table_t {
+
+public:
+	/** Constructor
+	@param[in,out]	handler		table handler. */
+	dict_intrinsic_table_t(dict_table_t*	handler)
+		:
+		m_handler(handler)
+	{
+		/* Do nothing. */
+	}
+
+	/** Destructor */
+	~dict_intrinsic_table_t()
+	{
+		m_handler = NULL;
+	}
+
+public:
+
+	/* Table Handler holding other metadata information commonly needed
+	for any table. */
+	dict_table_t*				m_handler;
+};
+
+/** InnoDB private data that is cached in THD */
+typedef std::map<
+	std::string,
+	dict_intrinsic_table_t*,
+	std::less<std::string>,
+	ut_allocator<std::pair<const std::string, dict_intrinsic_table_t*> > >
+	table_cache_t;
+
+class innodb_session_t {
+public:
+	/** Constructor */
+	innodb_session_t()
+		: m_trx(),
+		  m_open_tables()
+	{
+		/* Do nothing. */
+	}
+
+	/** Destructor */
+	~innodb_session_t()
+	{
+		m_trx = NULL;
+
+		for (table_cache_t::iterator it = m_open_tables.begin();
+		     it != m_open_tables.end();
+		     ++it) {
+			delete(it->second);
+		}
+	}
+
+	/** Cache table handler.
+	@param[in]	table_name	name of the table
+	@param[in,out]	table		table handler to register */
+	void register_table_handler(
+		const char*	table_name,
+		dict_table_t*	table)
+	{
+		ut_ad(lookup_table_handler(table_name) == NULL);
+		m_open_tables.insert(table_cache_t::value_type(
+			table_name, new dict_intrinsic_table_t(table)));
+	}
+
+	/** Lookup for table handler given table_name.
+	@param[in]	table_name	name of the table to lookup */
+	dict_table_t* lookup_table_handler(
+		const char*	table_name)
+	{
+		table_cache_t::iterator it = m_open_tables.find(table_name);
+		return((it == m_open_tables.end())
+		       ? NULL : it->second->m_handler);
+	}
+
+	/** Remove table handler entry.
+	@param[in]	table_name	name of the table to remove */
+	void unregister_table_handler(
+		const char*	table_name)
+	{
+		table_cache_t::iterator it = m_open_tables.find(table_name);
+		if (it == m_open_tables.end()) {
+			return;
+		}
+
+		delete(it->second);
+		m_open_tables.erase(table_name);
+	}
+
+	/** Count of register table handler.
+	@return number of register table handlers */
+	uint count_register_table_handler() const
+	{
+		return(static_cast<uint>(m_open_tables.size()));
+	}
+
+public:
+
+	/** transaction handler. */
+	trx_t*		m_trx;
+
+	/** Handler of tables that are created or open but not added
+	to InnoDB dictionary as they are session specific.
+	Currently, limited to intrinsic temporary tables only. */
+	table_cache_t	m_open_tables;
+};
+
+
+#endif /* sess0sess_h */
diff --git a/storage/innobase/include/srv0conc.h b/storage/innobase/include/srv0conc.h
index cf61ef5528d..9573c5add84 100644
--- a/storage/innobase/include/srv0conc.h
+++ b/storage/innobase/include/srv0conc.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2011, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2011, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 Portions of this file contain modifications contributed and copyrighted by
 Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -52,32 +52,18 @@ we could get a deadlock. Value of 0 will disable the concurrency check. */
 
 extern ulong	srv_thread_concurrency;
 
-/*********************************************************************//**
-Initialise the concurrency management data structures */
-void
-srv_conc_init(void);
-/*===============*/
-
-/*********************************************************************//**
-Free the concurrency management data structures */
-void
-srv_conc_free(void);
-/*===============*/
-
+struct row_prebuilt_t;
 /*********************************************************************//**
 Puts an OS thread to wait if there are too many concurrent threads
-(>= srv_thread_concurrency) inside InnoDB. The threads wait in a FIFO queue. */
-UNIV_INTERN
+(>= srv_thread_concurrency) inside InnoDB. The threads wait in a FIFO queue.
+@param[in,out]	prebuilt	row prebuilt handler */
 void
 srv_conc_enter_innodb(
-/*==================*/
-	trx_t*	trx);		/*!< in: transaction object associated
-				with the thread */
+	row_prebuilt_t*	prebuilt);
 
 /*********************************************************************//**
 This lets a thread enter InnoDB regardless of the number of threads inside
 InnoDB. This must be called when a thread ends a lock wait. */
-UNIV_INTERN
 void
 srv_conc_force_enter_innodb(
 /*========================*/
@@ -87,7 +73,6 @@ srv_conc_force_enter_innodb(
 /*********************************************************************//**
 This must be called when a thread exits InnoDB in a lock wait or at the
 end of an SQL statement. */
-UNIV_INTERN
 void
 srv_conc_force_exit_innodb(
 /*=======================*/
@@ -96,14 +81,12 @@ srv_conc_force_exit_innodb(
 
 /*********************************************************************//**
 Get the count of threads waiting inside InnoDB. */
-UNIV_INTERN
 ulint
 srv_conc_get_waiting_threads(void);
 /*==============================*/
 
 /*********************************************************************//**
 Get the count of threads active inside InnoDB. */
-UNIV_INTERN
 ulint
 srv_conc_get_active_threads(void);
 /*==============================*/
diff --git a/storage/innobase/include/srv0mon.h b/storage/innobase/include/srv0mon.h
index 422cfc3eaf6..14c1d62c127 100644
--- a/storage/innobase/include/srv0mon.h
+++ b/storage/innobase/include/srv0mon.h
@@ -1,6 +1,6 @@
 /***********************************************************************
 
-Copyright (c) 2010, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2010, 2015, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2012, Facebook Inc.
 Copyright (c) 2013, 2016, MariaDB Corporation.
 
@@ -30,6 +30,14 @@ Created 12/15/2009	Jimmy Yang
 #define srv0mon_h
 
 #include "univ.i"
+
+#ifndef __STDC_LIMIT_MACROS
+/* Required for FreeBSD so that INT64_MAX is defined. */
+#define __STDC_LIMIT_MACROS
+#endif /* __STDC_LIMIT_MACROS */
+
+#include <stdint.h>
+
 #ifndef UNIV_HOTBACKUP
 
 
@@ -42,7 +50,7 @@ enum monitor_running_status {
 typedef enum monitor_running_status	monitor_running_t;
 
 /** Monitor counter value type */
-typedef	ib_int64_t			mon_type_t;
+typedef	int64_t				mon_type_t;
 
 /** Two monitor structures are defined in this file. One is
 "monitor_value_t" which contains dynamic counter values for each
@@ -98,9 +106,15 @@ enum monitor_type_t {
 };
 
 /** Counter minimum value is initialized to be max value of
- mon_type_t (ib_int64_t) */
-#define	MIN_RESERVED		((mon_type_t) (IB_UINT64_MAX >> 1))
-#define	MAX_RESERVED		(~MIN_RESERVED)
+ mon_type_t (int64_t) */
+#ifndef INT64_MAX
+#define INT64_MAX		(9223372036854775807LL)
+#endif
+#ifndef INT64_MIN
+#define INT64_MIN		(-9223372036854775807LL-1)
+#endif
+#define	MIN_RESERVED		INT64_MAX
+#define	MAX_RESERVED		INT64_MIN
 
 /** This enumeration defines internal monitor identifier used internally
 to identify each particular counter. Its value indexes into two arrays,
@@ -125,7 +139,6 @@ enum monitor_id_t {
 	MONITOR_TABLE_OPEN,
 	MONITOR_TABLE_CLOSE,
 	MONITOR_TABLE_REFERENCE,
-	MONITOR_OVLD_META_MEM_POOL,
 
 	/* Lock manager related counters */
 	MONITOR_MODULE_LOCK,
@@ -174,7 +187,6 @@ enum monitor_id_t {
 	MONITOR_FLUSH_BATCH_SCANNED,
 	MONITOR_FLUSH_BATCH_SCANNED_NUM_CALL,
 	MONITOR_FLUSH_BATCH_SCANNED_PER_CALL,
-	MONITOR_FLUSH_HP_RESCAN,
 	MONITOR_FLUSH_BATCH_TOTAL_PAGE,
 	MONITOR_FLUSH_BATCH_COUNT,
 	MONITOR_FLUSH_BATCH_PAGES,
@@ -182,6 +194,24 @@ enum monitor_id_t {
 	MONITOR_FLUSH_NEIGHBOR_COUNT,
 	MONITOR_FLUSH_NEIGHBOR_PAGES,
 	MONITOR_FLUSH_N_TO_FLUSH_REQUESTED,
+
+	MONITOR_FLUSH_N_TO_FLUSH_BY_AGE,
+	MONITOR_FLUSH_ADAPTIVE_AVG_TIME_SLOT,
+	MONITOR_LRU_BATCH_FLUSH_AVG_TIME_SLOT,
+
+	MONITOR_FLUSH_ADAPTIVE_AVG_TIME_THREAD,
+	MONITOR_LRU_BATCH_FLUSH_AVG_TIME_THREAD,
+	MONITOR_FLUSH_ADAPTIVE_AVG_TIME_EST,
+	MONITOR_LRU_BATCH_FLUSH_AVG_TIME_EST,
+	MONITOR_FLUSH_AVG_TIME,
+
+	MONITOR_FLUSH_ADAPTIVE_AVG_PASS,
+	MONITOR_LRU_BATCH_FLUSH_AVG_PASS,
+	MONITOR_FLUSH_AVG_PASS,
+
+	MONITOR_LRU_GET_FREE_LOOPS,
+	MONITOR_LRU_GET_FREE_WAITS,
+
 	MONITOR_FLUSH_AVG_PAGE_RATE,
 	MONITOR_FLUSH_LSN_AVG_RATE,
 	MONITOR_FLUSH_PCT_FOR_DIRTY,
@@ -299,12 +329,13 @@ enum monitor_id_t {
 	MONITOR_OVLD_BUF_OLDEST_LSN,
 	MONITOR_OVLD_MAX_AGE_ASYNC,
 	MONITOR_OVLD_MAX_AGE_SYNC,
-	MONITOR_PENDING_LOG_WRITE,
+	MONITOR_PENDING_LOG_FLUSH,
 	MONITOR_PENDING_CHECKPOINT_WRITE,
 	MONITOR_LOG_IO,
 	MONITOR_OVLD_LOG_WAITS,
 	MONITOR_OVLD_LOG_WRITE_REQUEST,
 	MONITOR_OVLD_LOG_WRITES,
+	MONITOR_OVLD_LOG_PADDED,
 
 	/* Page Manager related counters */
 	MONITOR_MODULE_PAGE,
@@ -386,10 +417,13 @@ enum monitor_id_t {
 	MONITOR_OVLD_SRV_PAGE_SIZE,
 	MONITOR_OVLD_RWLOCK_S_SPIN_WAITS,
 	MONITOR_OVLD_RWLOCK_X_SPIN_WAITS,
+	MONITOR_OVLD_RWLOCK_SX_SPIN_WAITS,
 	MONITOR_OVLD_RWLOCK_S_SPIN_ROUNDS,
 	MONITOR_OVLD_RWLOCK_X_SPIN_ROUNDS,
+	MONITOR_OVLD_RWLOCK_SX_SPIN_ROUNDS,
 	MONITOR_OVLD_RWLOCK_S_OS_WAITS,
 	MONITOR_OVLD_RWLOCK_X_OS_WAITS,
+	MONITOR_OVLD_RWLOCK_SX_OS_WAITS,
 
 	/* Data DML related counters */
 	MONITOR_MODULE_DML_STATS,
@@ -408,6 +442,8 @@ enum monitor_id_t {
 	MONITOR_BACKGROUND_DROP_TABLE,
 	MONITOR_ONLINE_CREATE_INDEX,
 	MONITOR_PENDING_ALTER_TABLE,
+	MONITOR_ALTER_TABLE_SORT_FILES,
+	MONITOR_ALTER_TABLE_LOG_FILES,
 
 	MONITOR_MODULE_ICP,
 	MONITOR_ICP_ATTEMPTS,
@@ -415,6 +451,10 @@ enum monitor_id_t {
 	MONITOR_ICP_OUT_OF_RANGE,
 	MONITOR_ICP_MATCH,
 
+	/* Mutex/RW-Lock related counters */
+	MONITOR_MODULE_LATCHES,
+	MONITOR_LATCHES,
+
 	/* This is used only for control system to turn
 	on/off and reset all monitor counters */
 	MONITOR_ALL_COUNTER,
@@ -567,37 +607,9 @@ on the counters */
 		}							\
 	}
 
-/** Increment a monitor counter under mutex protection.
-Use MONITOR_INC if appropriate mutex protection already exists.
-@param monitor	monitor to be incremented by 1
-@param mutex	mutex to acquire and relese */
-# define MONITOR_MUTEX_INC(mutex, monitor)				\
-	ut_ad(!mutex_own(mutex));					\
-	if (MONITOR_IS_ON(monitor)) {					\
-		mutex_enter(mutex);					\
-		if (++MONITOR_VALUE(monitor) > MONITOR_MAX_VALUE(monitor)) { \
-			MONITOR_MAX_VALUE(monitor) = MONITOR_VALUE(monitor); \
-		}							\
-		mutex_exit(mutex);					\
-	}
-/** Decrement a monitor counter under mutex protection.
-Use MONITOR_DEC if appropriate mutex protection already exists.
-@param monitor	monitor to be decremented by 1
-@param mutex	mutex to acquire and relese */
-# define MONITOR_MUTEX_DEC(mutex, monitor)				\
-	ut_ad(!mutex_own(mutex));					\
-	if (MONITOR_IS_ON(monitor)) {					\
-		mutex_enter(mutex);					\
-		if (--MONITOR_VALUE(monitor) < MONITOR_MIN_VALUE(monitor)) { \
-			MONITOR_MIN_VALUE(monitor) = MONITOR_VALUE(monitor); \
-		}							\
-		mutex_exit(mutex);					\
-	}
-
-#if defined HAVE_ATOMIC_BUILTINS_64
 /** Atomically increment a monitor counter.
 Use MONITOR_INC if appropriate mutex protection exists.
-@param monitor	monitor to be incremented by 1 */
+@param monitor monitor to be incremented by 1 */
 # define MONITOR_ATOMIC_INC(monitor)					\
 	if (MONITOR_IS_ON(monitor)) {					\
 		ib_uint64_t	value;					\
@@ -612,7 +624,7 @@ Use MONITOR_INC if appropriate mutex protection exists.
 
 /** Atomically decrement a monitor counter.
 Use MONITOR_DEC if appropriate mutex protection exists.
-@param monitor	monitor to be decremented by 1 */
+@param monitor monitor to be decremented by 1 */
 # define MONITOR_ATOMIC_DEC(monitor)					\
 	if (MONITOR_IS_ON(monitor)) {					\
 		ib_uint64_t	value;					\
@@ -624,34 +636,6 @@ Use MONITOR_DEC if appropriate mutex protection exists.
 			MONITOR_MIN_VALUE(monitor) = value;		\
 		}							\
 	}
-# define srv_mon_create() ((void) 0)
-# define srv_mon_free() ((void) 0)
-#else /* HAVE_ATOMIC_BUILTINS_64 */
-/** Mutex protecting atomic operations on platforms that lack
-built-in operations for atomic memory access */
-extern ib_mutex_t	monitor_mutex;
-/****************************************************************//**
-Initialize the monitor subsystem. */
-UNIV_INTERN
-void
-srv_mon_create(void);
-/*================*/
-/****************************************************************//**
-Close the monitor subsystem. */
-UNIV_INTERN
-void
-srv_mon_free(void);
-/*==============*/
-
-/** Atomically increment a monitor counter.
-Use MONITOR_INC if appropriate mutex protection exists.
-@param monitor	monitor to be incremented by 1 */
-# define MONITOR_ATOMIC_INC(monitor) MONITOR_MUTEX_INC(&monitor_mutex, monitor)
-/** Atomically decrement a monitor counter.
-Use MONITOR_DEC if appropriate mutex protection exists.
-@param monitor	monitor to be decremented by 1 */
-# define MONITOR_ATOMIC_DEC(monitor) MONITOR_MUTEX_DEC(&monitor_mutex, monitor)
-#endif /* HAVE_ATOMIC_BUILTINS_64 */
 
 #define	MONITOR_DEC(monitor)						\
 	if (MONITOR_IS_ON(monitor)) {					\
@@ -722,12 +706,12 @@ could already be checked as a module group */
 
 /** Add time difference between now and input "value" (in seconds) to the
 monitor counter
-@param monitor	monitor to update for the time difference
-@param value	the start time value */
+@param monitor monitor to update for the time difference
+@param value the start time value */
 #define	MONITOR_INC_TIME_IN_MICRO_SECS(monitor, value)			\
 	MONITOR_CHECK_DEFINED(value);					\
 	if (MONITOR_IS_ON(monitor)) {					\
-		ullint	old_time = (value);				\
+		uintmax_t	old_time = (value);				\
 		value = ut_time_us(NULL);				\
 		MONITOR_VALUE(monitor) += (mon_type_t) (value - old_time);\
 	}
@@ -735,13 +719,13 @@ monitor counter
 /** This macro updates 3 counters in one call. However, it only checks the
 main/first monitor counter 'monitor', to see it is on or off to decide
 whether to do the update.
-@param monitor		the main monitor counter to update. It accounts for
+@param monitor the main monitor counter to update. It accounts for
 			the accumulative value for the counter.
-@param monitor_n_calls	counter that counts number of times this macro is
+@param monitor_n_calls counter that counts number of times this macro is
 			called
-@param monitor_per_call	counter that records the current and max value of
+@param monitor_per_call counter that records the current and max value of
 			each incremental value
-@param value		incremental value to record this time */
+@param value incremental value to record this time */
 #define MONITOR_INC_VALUE_CUMULATIVE(					\
 		monitor, monitor_n_calls, monitor_per_call, value)	\
 	MONITOR_CHECK_DEFINED(value);					\
@@ -827,9 +811,8 @@ compensated by mon_last_value if accumulated value is required. */
 /****************************************************************//**
 Get monitor's monitor_info_t by its monitor id (index into the
 innodb_counter_info array
-@return	Point to corresponding monitor_info_t, or NULL if no such
+@return Point to corresponding monitor_info_t, or NULL if no such
 monitor */
-UNIV_INTERN
 monitor_info_t*
 srv_mon_get_info(
 /*=============*/
@@ -838,9 +821,8 @@ srv_mon_get_info(
 /****************************************************************//**
 Get monitor's name by its monitor id (index into the
 innodb_counter_info array
-@return	corresponding monitor name, or NULL if no such
+@return corresponding monitor name, or NULL if no such
 monitor */
-UNIV_INTERN
 const char*
 srv_mon_get_name(
 /*=============*/
@@ -850,9 +832,8 @@ srv_mon_get_name(
 /****************************************************************//**
 Turn on/off/reset monitor counters in a module. If module_value
 is NUM_MONITOR then turn on all monitor counters.
-@return	0 if successful, or the first monitor that cannot be
+@return 0 if successful, or the first monitor that cannot be
 turned on because it is already turned on. */
-UNIV_INTERN
 void
 srv_mon_set_module_control(
 /*=======================*/
@@ -869,7 +850,6 @@ mechanism to start/stop and reset the counters, so we simulate these
 controls by remembering the corresponding counter values when the
 corresponding monitors are turned on/off/reset, and do appropriate
 mathematics to deduct the actual value. */
-UNIV_INTERN
 void
 srv_mon_process_existing_counter(
 /*=============================*/
@@ -880,7 +860,7 @@ srv_mon_process_existing_counter(
 /*************************************************************//**
 This function is used to calculate the maximum counter value
 since the start of monitor counter
-@return	max counter value since start. */
+@return max counter value since start. */
 UNIV_INLINE
 mon_type_t
 srv_mon_calc_max_since_start(
@@ -889,7 +869,7 @@ srv_mon_calc_max_since_start(
 /*************************************************************//**
 This function is used to calculate the minimum counter value
 since the start of monitor counter
-@return	min counter value since start. */
+@return min counter value since start. */
 UNIV_INLINE
 mon_type_t
 srv_mon_calc_min_since_start(
@@ -898,7 +878,6 @@ srv_mon_calc_min_since_start(
 /*************************************************************//**
 Reset a monitor, create a new base line with the current monitor
 value. This baseline is recorded by MONITOR_VALUE_RESET(monitor) */
-UNIV_INTERN
 void
 srv_mon_reset(
 /*==========*/
@@ -912,7 +891,6 @@ srv_mon_reset_all(
 	monitor_id_t	monitor);	/*!< in: monitor id*/
 /*************************************************************//**
 Turn on monitor counters that are marked as default ON. */
-UNIV_INTERN
 void
 srv_mon_default_on(void);
 /*====================*/
diff --git a/storage/innobase/include/srv0mon.ic b/storage/innobase/include/srv0mon.ic
index 225390c6b6f..0cf76b2ea01 100644
--- a/storage/innobase/include/srv0mon.ic
+++ b/storage/innobase/include/srv0mon.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2010, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2010, 2013, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -26,7 +26,7 @@ Created 1/20/2010	Jimmy Yang
 /*************************************************************//**
 This function is used to calculate the maximum counter value
 since the start of monitor counter
-@return	max counter value since start. */
+@return max counter value since start. */
 UNIV_INLINE
 mon_type_t
 srv_mon_calc_max_since_start(
@@ -61,7 +61,7 @@ srv_mon_calc_max_since_start(
 /*************************************************************//**
 This function is used to calculate the minimum counter value
 since the start of monitor counter
-@return	min counter value since start. */
+@return min counter value since start. */
 UNIV_INLINE
 mon_type_t
 srv_mon_calc_min_since_start(
@@ -103,9 +103,9 @@ srv_mon_reset_all(
 {
 	/* Do not reset all counter values if monitor is still on. */
 	if (MONITOR_IS_ON(monitor)) {
-		fprintf(stderr, "InnoDB: Cannot reset all values for "
-			"monitor counter %s while it is on. Please "
-			"turn it off and retry. \n",
+		fprintf(stderr, "InnoDB: Cannot reset all values for"
+			" monitor counter %s while it is on. Please"
+			" turn it off and retry.\n",
 			srv_mon_get_name(monitor));
 	} else {
 		MONITOR_RESET_ALL(monitor);
diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
index 04c8cbecf99..5f8893b3e28 100644
--- a/storage/innobase/include/srv0srv.h
+++ b/storage/innobase/include/srv0srv.h
@@ -42,24 +42,31 @@ Created 10/10/1995 Heikki Tuuri
 #ifndef srv0srv_h
 #define srv0srv_h
 
+#include "my_global.h"
+
+#include "mysql/psi/mysql_stage.h"
+#include "mysql/psi/psi.h"
+
 #include "univ.i"
 #ifndef UNIV_HOTBACKUP
 #include "log0log.h"
-#include "sync0sync.h"
-#include "os0sync.h"
+#include "os0event.h"
 #include "que0types.h"
 #include "trx0types.h"
 #include "srv0conc.h"
 #include "buf0checksum.h"
 #include "ut0counter.h"
+#include "fil0fil.h"
+
+struct fil_space_t;
 
 /* Global counters used inside InnoDB. */
 struct srv_stats_t {
+	typedef ib_counter_t<ulint, 64> ulint_ctr_64_t;
 	typedef ib_counter_t<lsn_t, 1, single_indexer_t> lsn_ctr_1_t;
 	typedef ib_counter_t<ulint, 1, single_indexer_t> ulint_ctr_1_t;
 	typedef ib_counter_t<lint, 1, single_indexer_t> lint_ctr_1_t;
-	typedef ib_counter_t<ulint, 64> ulint_ctr_64_t;
-	typedef ib_counter_t<ib_int64_t, 1, single_indexer_t> ib_int64_ctr_1_t;
+	typedef ib_counter_t<int64_t, 1, single_indexer_t> int64_ctr_1_t;
 
 	/** Count the amount of data written in total (in bytes) */
 	ulint_ctr_1_t		data_written;
@@ -70,6 +77,9 @@ struct srv_stats_t {
 	/** Number of physical writes to the log performed */
 	ulint_ctr_1_t		log_writes;
 
+	/** Amount of data padded for log write ahead */
+	ulint_ctr_1_t		log_padded;
+
 	/** Amount of data written to the log files in bytes */
 	lsn_ctr_1_t		os_log_written;
 
@@ -142,7 +152,7 @@ struct srv_stats_t {
 	ulint_ctr_1_t		data_read;
 
 	/** Wait time of database locks */
-	ib_int64_ctr_1_t	n_lock_wait_time;
+	int64_ctr_1_t		n_lock_wait_time;
 
 	/** Number of database lock waits */
 	ulint_ctr_1_t		n_lock_wait_count;
@@ -195,6 +205,9 @@ extern os_event_t	srv_error_event;
 /** The buffer pool dump/load thread waits on this event. */
 extern os_event_t	srv_buf_dump_event;
 
+/** The buffer pool resize thread waits on this event. */
+extern os_event_t	srv_buf_resize_event;
+
 /** The buffer pool dump/load file name */
 #define SRV_BUF_DUMP_FILENAME_DEFAULT	"ib_buffer_pool"
 extern char*		srv_buf_dump_filename;
@@ -209,9 +222,10 @@ extern char		srv_disable_sort_file_cache;
 
 /* If the last data file is auto-extended, we add this many pages to it
 at a time */
-#define SRV_AUTO_EXTEND_INCREMENT	\
-	(srv_auto_extend_increment * ((1024 * 1024) / UNIV_PAGE_SIZE))
+#define SRV_AUTO_EXTEND_INCREMENT (srv_sys_space.get_autoextend_increment())
 
+/** Mutex protecting page_zip_stat_per_index */
+extern ib_mutex_t	page_zip_stat_per_index_mutex;
 /* Mutex for locking srv_monitor_file. Not created if srv_read_only_mode */
 extern ib_mutex_t	srv_monitor_file_mutex;
 /* Temporary file for innodb monitor output */
@@ -233,10 +247,6 @@ extern FILE*	srv_misc_tmpfile;
 
 extern char*	srv_data_home;
 
-#ifdef UNIV_LOG_ARCHIVE
-extern char*	srv_arch_dir;
-#endif /* UNIV_LOG_ARCHIVE */
-
 /** Set if InnoDB must operate in read-only mode. We don't do any
 recovery and open all tables in RO mode instead of RW mode. We don't
 sync the max trx id to disk either. */
@@ -249,10 +259,8 @@ dictionary tables are in the system tablespace 0 */
 extern my_bool	srv_file_per_table;
 /** Sleep delay for threads waiting to enter InnoDB. In micro-seconds. */
 extern	ulong	srv_thread_sleep_delay;
-#if defined(HAVE_ATOMIC_BUILTINS)
 /** Maximum sleep delay (in micro-seconds), value of 0 disables it.*/
 extern	ulong	srv_adaptive_max_sleep_delay;
-#endif /* HAVE_ATOMIC_BUILTINS */
 
 /** The file format to use on new *.ibd files. */
 extern ulint	srv_file_format;
@@ -291,8 +299,8 @@ extern my_bool srv_use_atomic_writes;
 extern ulong innodb_compression_algorithm;
 
 /* Number of flush threads */
-#define MTFLUSH_MAX_WORKER       64
-#define MTFLUSH_DEFAULT_WORKER   8
+#define MTFLUSH_MAX_WORKER		64
+#define MTFLUSH_DEFAULT_WORKER		8
 
 /* Number of threads used for multi-threaded flush */
 extern long    srv_mtflush_threads;
@@ -300,9 +308,6 @@ extern long    srv_mtflush_threads;
 /* If this flag is TRUE, then we will use multi threaded flush. */
 extern my_bool	srv_use_mtflush;
 
-#ifdef __WIN__
-extern ibool	srv_use_native_conditions;
-#endif /* __WIN__ */
 #endif /* !UNIV_HOTBACKUP */
 
 /** Server undo tablespaces directory, can be absolute path. */
@@ -314,66 +319,97 @@ extern ulong	srv_undo_tablespaces;
 /** The number of UNDO tablespaces that are open and ready to use. */
 extern ulint	srv_undo_tablespaces_open;
 
-/* The number of undo segments to use */
+/** The number of UNDO tablespaces that are active (hosting some rollback
+segment). It is quite possible that some of the tablespaces doesn't host
+any of the rollback-segment based on configuration used. */
+extern ulint	srv_undo_tablespaces_active;
+
+/** The number of undo segments to use */
 extern ulong	srv_undo_logs;
 
-extern ulint	srv_n_data_files;
-extern char**	srv_data_file_names;
-extern ulint*	srv_data_file_sizes;
-extern ulint*	srv_data_file_is_raw_partition;
+/** Maximum size of undo tablespace. */
+extern unsigned long long	srv_max_undo_log_size;
 
-extern ibool	srv_auto_extend_last_data_file;
-extern ulint	srv_last_file_size_max;
-extern char*	srv_log_group_home_dir;
-#ifndef UNIV_HOTBACKUP
-extern ulong	srv_auto_extend_increment;
+/** Rate at which UNDO records should be purged. */
+extern ulong	srv_purge_rseg_truncate_frequency;
 
-extern ibool	srv_created_new_raw;
+/** Enable or Disable Truncate of UNDO tablespace. */
+extern my_bool	srv_undo_log_truncate;
 
 /* Optimize prefix index queries to skip cluster index lookup when possible */
 /* Enables or disables this prefix optimization.  Disabled by default. */
 extern my_bool	srv_prefix_index_cluster_optimization;
 
+/** UNDO logs not redo logged, these logs reside in the temp tablespace.*/
+extern const ulong	srv_tmp_undo_logs;
+
+/** Default size of UNDO tablespace while it is created new. */
+extern const ulint	SRV_UNDO_TABLESPACE_SIZE_IN_PAGES;
+
+extern char*	srv_log_group_home_dir;
+
+#ifndef UNIV_HOTBACKUP
 /** Maximum number of srv_n_log_files, or innodb_log_files_in_group */
 #define SRV_N_LOG_FILES_MAX 100
 extern ulong	srv_n_log_files;
+/** At startup, this is the current redo log file size.
+During startup, if this is different from srv_log_file_size_requested
+(innodb_log_file_size), the redo log will be rebuilt and this size
+will be initialized to srv_log_file_size_requested.
+When upgrading from a previous redo log format, this will be set to 0,
+and writing to the redo log is not allowed.
+
+During startup, this is in bytes, and later converted to pages. */
 extern ib_uint64_t	srv_log_file_size;
+/** The value of the startup parameter innodb_log_file_size */
 extern ib_uint64_t	srv_log_file_size_requested;
 extern ulint	srv_log_buffer_size;
 extern ulong	srv_flush_log_at_trx_commit;
 extern uint	srv_flush_log_at_timeout;
+extern ulong	srv_log_write_ahead_size;
 extern char	srv_adaptive_flushing;
+extern my_bool	srv_flush_sync;
 
 #ifdef WITH_INNODB_DISALLOW_WRITES
 /* When this event is reset we do not allow any file writes to take place. */
 extern os_event_t	srv_allow_writes_event;
 #endif /* WITH_INNODB_DISALLOW_WRITES */
+
 /* If this flag is TRUE, then we will load the indexes' (and tables') metadata
 even if they are marked as "corrupted". Mostly it is for DBA to process
 corrupted index and table */
 extern my_bool	srv_load_corrupted;
 
-/* The sort order table of the MySQL latin1_swedish_ci character set
-collation */
-extern const byte*	srv_latin1_ordering;
-#ifndef UNIV_HOTBACKUP
-extern my_bool	srv_use_sys_malloc;
-#else
-extern ibool	srv_use_sys_malloc;
-#endif /* UNIV_HOTBACKUP */
-extern ulint	srv_buf_pool_size;	/*!< requested size in bytes */
-extern ulint    srv_buf_pool_instances; /*!< requested number of buffer pool instances */
-extern ulong	srv_n_page_hash_locks;	/*!< number of locks to
-					protect buf_pool->page_hash */
-extern ulong	srv_LRU_scan_depth;	/*!< Scan depth for LRU
-					flush batch */
-extern ulong	srv_flush_neighbors;	/*!< whether or not to flush
-					neighbors of a block */
-extern ulint	srv_buf_pool_old_size;	/*!< previously requested size */
-extern ulint	srv_buf_pool_curr_size;	/*!< current size in bytes */
+/** Requested size in bytes */
+extern ulint		srv_buf_pool_size;
+/** Minimum pool size in bytes */
+extern const ulint	srv_buf_pool_min_size;
+/** Default pool size in bytes */
+extern const ulint	srv_buf_pool_def_size;
+/** Requested buffer pool chunk size. Each buffer pool instance consists
+of one or more chunks. */
+extern ulong		srv_buf_pool_chunk_unit;
+/** Requested number of buffer pool instances */
+extern ulong		srv_buf_pool_instances;
+/** Default number of buffer pool instances */
+extern const ulong	srv_buf_pool_instances_default;
+/** Number of locks to protect buf_pool->page_hash */
+extern ulong	srv_n_page_hash_locks;
+/** Scan depth for LRU flush batch i.e.: number of blocks scanned*/
+extern ulong	srv_LRU_scan_depth;
+/** Whether or not to flush neighbors of a block */
 extern ulong	srv_buf_pool_dump_pct;	/*!< dump that may % of each buffer
 					pool during BP dump */
-extern ulint	srv_mem_pool_size;
+extern ulong	srv_flush_neighbors;
+/** Previously requested size */
+extern ulint	srv_buf_pool_old_size;
+/** Current size as scaling factor for the other components */
+extern ulint	srv_buf_pool_base_size;
+/** Current size in bytes */
+extern ulint	srv_buf_pool_curr_size;
+/** Dump this % of each buffer pool during BP dump */
+extern ulong	srv_buf_pool_dump_pct;
+/** Lock table size in bytes */
 extern ulint	srv_lock_table_size;
 
 extern ulint	srv_n_file_io_threads;
@@ -381,6 +417,7 @@ extern my_bool	srv_random_read_ahead;
 extern ulong	srv_read_ahead_threshold;
 extern ulint	srv_n_read_io_threads;
 extern ulint	srv_n_write_io_threads;
+
 /* Defragmentation, Origianlly facebook default value is 100, but it's too high */
 #define SRV_DEFRAGMENT_FREQUENCY_DEFAULT 40
 extern my_bool	srv_defragment;
@@ -393,6 +430,8 @@ extern ulonglong	srv_defragment_interval;
 
 extern ulong	srv_idle_flush_pct;
 
+extern uint	srv_change_buffer_max_size;
+
 /* Number of IO operations per second the server can do */
 extern ulong    srv_io_capacity;
 
@@ -411,18 +450,12 @@ to treat NULL value when collecting statistics. It is not defined
 as enum type because the configure option takes unsigned integer type. */
 extern ulong	srv_innodb_stats_method;
 
-#ifdef UNIV_LOG_ARCHIVE
-extern ibool		srv_log_archive_on;
-extern ibool		srv_archive_recovery;
-extern ib_uint64_t	srv_archive_recovery_limit_lsn;
-#endif /* UNIV_LOG_ARCHIVE */
-
 extern char*	srv_file_flush_method_str;
-extern ulint	srv_unix_file_flush_method;
-extern ulint	srv_win_file_flush_method;
 
 extern ulint	srv_max_n_open_files;
 
+extern ulong	srv_n_page_cleaners;
+
 extern double	srv_max_dirty_pages_pct;
 extern double	srv_max_dirty_pages_pct_lwm;
 
@@ -452,7 +485,9 @@ extern my_bool			srv_stats_sample_traditional;
 
 extern ibool	srv_use_doublewrite_buf;
 extern ulong	srv_doublewrite_batch_size;
+extern ulong	srv_checksum_algorithm;
 
+extern double	srv_max_buf_pool_modified_pct;
 extern my_bool	srv_force_primary_key;
 
 extern double	srv_max_buf_pool_modified_pct;
@@ -464,13 +499,7 @@ extern ulong	srv_replication_delay;
 
 extern my_bool	srv_print_innodb_monitor;
 extern my_bool	srv_print_innodb_lock_monitor;
-extern ibool	srv_print_innodb_tablespace_monitor;
 extern ibool	srv_print_verbose_log;
-#define DEPRECATED_MSG_INNODB_TABLE_MONITOR \
-	"Using innodb_table_monitor is deprecated and it may be removed " \
-	"in future releases. Please use the InnoDB INFORMATION_SCHEMA " \
-	"tables instead, see " REFMAN "innodb-i_s-tables.html"
-extern ibool	srv_print_innodb_table_monitor;
 
 extern ibool	srv_monitor_active;
 extern ibool	srv_error_monitor_active;
@@ -478,6 +507,9 @@ extern ibool	srv_error_monitor_active;
 /* TRUE during the lifetime of the buffer pool dump/load thread */
 extern ibool	srv_buf_dump_thread_active;
 
+/* true during the lifetime of the buffer pool resize thread */
+extern bool	srv_buf_resize_thread_active;
+
 /* TRUE during the lifetime of the stats thread */
 extern ibool	srv_dict_stats_thread_active;
 
@@ -495,39 +527,21 @@ extern ibool	srv_priority_boost;
 extern ulint	srv_truncated_status_writes;
 extern ulint	srv_available_undo_logs;
 
-extern	ulint	srv_mem_pool_size;
-extern	ulint	srv_lock_table_size;
-
-#ifdef UNIV_DEBUG
-extern	ibool	srv_print_thread_releases;
-extern	ibool	srv_print_lock_waits;
-extern	ibool	srv_print_buf_io;
-extern	ibool	srv_print_log_io;
-extern	ibool	srv_print_latch_waits;
-#else /* UNIV_DEBUG */
-# define srv_print_thread_releases	FALSE
-# define srv_print_lock_waits		FALSE
-# define srv_print_buf_io		FALSE
-# define srv_print_log_io		FALSE
-# define srv_print_latch_waits		FALSE
-#endif /* UNIV_DEBUG */
-
 #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
 extern my_bool	srv_ibuf_disable_background_merge;
 #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
 
 #ifdef UNIV_DEBUG
+extern my_bool	srv_sync_debug;
 extern my_bool	srv_purge_view_update_only_debug;
+
+/** Value of MySQL global used to disable master thread. */
+extern my_bool	srv_master_thread_disabled_debug;
 #endif /* UNIV_DEBUG */
 
 #define SRV_SEMAPHORE_WAIT_EXTENSION	7200
 extern ulint	srv_dml_needed_delay;
 
-#ifndef HAVE_ATOMIC_BUILTINS
-/** Mutex protecting some server global variables. */
-extern ib_mutex_t	server_mutex;
-#endif /* !HAVE_ATOMIC_BUILTINS */
-
 #define SRV_MAX_N_IO_THREADS	130
 
 /* Array of English strings describing the current state of an
@@ -574,21 +588,30 @@ extern ulong srv_buf_dump_status_frequency;
 
 # ifdef UNIV_PFS_THREAD
 /* Keys to register InnoDB threads with performance schema */
-extern mysql_pfs_key_t	buf_page_cleaner_thread_key;
-extern mysql_pfs_key_t	trx_rollback_clean_thread_key;
+extern mysql_pfs_key_t	buf_dump_thread_key;
+extern mysql_pfs_key_t	dict_stats_thread_key;
 extern mysql_pfs_key_t	io_handler_thread_key;
-extern mysql_pfs_key_t	srv_lock_timeout_thread_key;
-extern mysql_pfs_key_t	srv_error_monitor_thread_key;
-extern mysql_pfs_key_t	srv_monitor_thread_key;
-extern mysql_pfs_key_t	srv_master_thread_key;
-extern mysql_pfs_key_t	srv_purge_thread_key;
+extern mysql_pfs_key_t	io_ibuf_thread_key;
+extern mysql_pfs_key_t	io_log_thread_key;
+extern mysql_pfs_key_t	io_read_thread_key;
+extern mysql_pfs_key_t	io_write_thread_key;
+extern mysql_pfs_key_t	page_cleaner_thread_key;
 extern mysql_pfs_key_t	recv_writer_thread_key;
+extern mysql_pfs_key_t	srv_error_monitor_thread_key;
+extern mysql_pfs_key_t	srv_lock_timeout_thread_key;
+extern mysql_pfs_key_t	srv_master_thread_key;
+extern mysql_pfs_key_t	srv_monitor_thread_key;
+extern mysql_pfs_key_t	srv_purge_thread_key;
+extern mysql_pfs_key_t	srv_worker_thread_key;
+extern mysql_pfs_key_t	trx_rollback_clean_thread_key;
 
 /* This macro register the current thread and its key with performance
 schema */
 #  define pfs_register_thread(key)			\
 do {								\
 	struct PSI_thread* psi = PSI_THREAD_CALL(new_thread)(key, NULL, 0);\
+	/* JAN: TODO: MYSQL 5.7 PSI                             \
+	PSI_THREAD_CALL(set_thread_os_id)(psi);	*/		\
 	PSI_THREAD_CALL(set_thread)(psi);			\
 } while (0)
 
@@ -599,19 +622,45 @@ do {								\
 } while (0)
 # endif /* UNIV_PFS_THREAD */
 
+#ifdef HAVE_PSI_STAGE_INTERFACE
+/** Performance schema stage event for monitoring ALTER TABLE progress
+everything after flush log_make_checkpoint_at(). */
+extern PSI_stage_info	srv_stage_alter_table_end;
+
+/** Performance schema stage event for monitoring ALTER TABLE progress
+log_make_checkpoint_at(). */
+extern PSI_stage_info	srv_stage_alter_table_flush;
+
+/** Performance schema stage event for monitoring ALTER TABLE progress
+row_merge_insert_index_tuples(). */
+extern PSI_stage_info	srv_stage_alter_table_insert;
+
+/** Performance schema stage event for monitoring ALTER TABLE progress
+row_log_apply(). */
+extern PSI_stage_info	srv_stage_alter_table_log_index;
+
+/** Performance schema stage event for monitoring ALTER TABLE progress
+row_log_table_apply(). */
+extern PSI_stage_info	srv_stage_alter_table_log_table;
+
+/** Performance schema stage event for monitoring ALTER TABLE progress
+row_merge_sort(). */
+extern PSI_stage_info	srv_stage_alter_table_merge_sort;
+
+/** Performance schema stage event for monitoring ALTER TABLE progress
+row_merge_read_clustered_index(). */
+extern PSI_stage_info	srv_stage_alter_table_read_pk_internal_sort;
+
+/** Performance schema stage event for monitoring buffer pool load progress. */
+extern PSI_stage_info	srv_stage_buffer_pool_load;
+#endif /* HAVE_PSI_STAGE_INTERFACE */
+
 #endif /* !UNIV_HOTBACKUP */
 
-/** Types of raw partitions in innodb_data_file_path */
-enum {
-	SRV_NOT_RAW = 0,	/*!< Not a raw partition */
-	SRV_NEW_RAW,		/*!< A 'newraw' partition, only to be
-				initialized */
-	SRV_OLD_RAW		/*!< An initialized raw partition */
-};
-
+#ifndef _WIN32
 /** Alternatives for the file flush option in Unix; see the InnoDB manual
 about what these mean */
-enum {
+enum srv_unix_flush_t {
 	SRV_UNIX_FSYNC = 1,	/*!< fsync, the default */
 	SRV_UNIX_O_DSYNC,	/*!< open log files in O_SYNC mode */
 	SRV_UNIX_LITTLESYNC,	/*!< do not call os_file_flush()
@@ -632,12 +681,15 @@ enum {
 				this case user/DBA should be sure about
 				the integrity of the meta-data */
 };
-
+extern enum srv_unix_flush_t	srv_unix_file_flush_method;
+#else
 /** Alternatives for file i/o in Windows */
-enum {
+enum srv_win_flush_t {
 	SRV_WIN_IO_NORMAL = 1,	/*!< buffered I/O */
 	SRV_WIN_IO_UNBUFFERED	/*!< unbuffered I/O; this is the default */
 };
+extern enum srv_win_flush_t	srv_win_file_flush_method;
+#endif /* _WIN32 */
 
 /** Alternatives for srv_force_recovery. Non-zero values are intended
 to help the user get a damaged database up so that he can dump intact
@@ -676,6 +728,11 @@ enum srv_stats_method_name_enum {
 
 typedef enum srv_stats_method_name_enum		srv_stats_method_name_t;
 
+#ifdef UNIV_DEBUG
+/** Force all user tables to use page compression. */
+extern ulong	srv_debug_compress;
+#endif /* UNIV_DEBUG */
+
 #ifndef UNIV_HOTBACKUP
 /** Types of threads existing in the system. */
 enum srv_thread_type {
@@ -690,32 +747,27 @@ enum srv_thread_type {
 
 /*********************************************************************//**
 Boots Innobase server. */
-UNIV_INTERN
 void
 srv_boot(void);
 /*==========*/
 /*********************************************************************//**
 Initializes the server. */
-UNIV_INTERN
 void
 srv_init(void);
 /*==========*/
 /*********************************************************************//**
 Frees the data structures created in srv_init(). */
-UNIV_INTERN
 void
 srv_free(void);
 /*==========*/
 /*********************************************************************//**
 Initializes the synchronization primitives, memory system, and the thread
 local storage. */
-UNIV_INTERN
 void
 srv_general_init(void);
 /*==================*/
 /*********************************************************************//**
 Sets the info describing an i/o thread current state. */
-UNIV_INTERN
 void
 srv_set_io_thread_op_info(
 /*======================*/
@@ -724,7 +776,6 @@ srv_set_io_thread_op_info(
 				state */
 /*********************************************************************//**
 Resets the info describing an i/o thread current state. */
-UNIV_INTERN
 void
 srv_reset_io_thread_op_info();
 /*=========================*/
@@ -734,7 +785,6 @@ and wakes up the purge thread if it is suspended (not sleeping).  Note
 that there is a small chance that the purge thread stays suspended
 (we do not protect our operation with the srv_sys_t:mutex, for
 performance reasons). */
-UNIV_INTERN
 void
 srv_wake_purge_thread_if_not_active(void);
 /*=====================================*/
@@ -744,13 +794,17 @@ and wakes up the master thread if it is suspended (not sleeping). Used
 in the MySQL interface. Note that there is a small chance that the master
 thread stays suspended (we do not protect our operation with the kernel
 mutex, for performace reasons). */
-UNIV_INTERN
 void
-srv_active_wake_master_thread(void);
-/*===============================*/
+srv_active_wake_master_thread_low(void);
+/*===================================*/
+#define srv_active_wake_master_thread()					\
+	do {								\
+		if (!srv_read_only_mode) {				\
+			srv_active_wake_master_thread_low();		\
+		}							\
+	} while (0)
 /*******************************************************************//**
 Wakes up the master thread if it is suspended or being suspended. */
-UNIV_INTERN
 void
 srv_wake_master_thread(void);
 /*========================*/
@@ -758,7 +812,6 @@ srv_wake_master_thread(void);
 Outputs to a file the output of the InnoDB Monitor.
 @return FALSE if not all information printed
 due to failure to obtain necessary mutex */
-UNIV_INTERN
 ibool
 srv_printf_innodb_monitor(
 /*======================*/
@@ -772,7 +825,6 @@ srv_printf_innodb_monitor(
 
 /******************************************************************//**
 Function to pass InnoDB status variables to MySQL */
-UNIV_INTERN
 void
 srv_export_innodb_status(void);
 /*==========================*/
@@ -780,21 +832,18 @@ srv_export_innodb_status(void);
 Get current server activity count. We don't hold srv_sys::mutex while
 reading this value as it is only used in heuristics.
 @return activity count. */
-UNIV_INTERN
 ulint
 srv_get_activity_count(void);
 /*========================*/
 /*******************************************************************//**
 Check if there has been any activity.
 @return FALSE if no change in activity counter. */
-UNIV_INTERN
 ibool
 srv_check_activity(
 /*===============*/
 	ulint		old_activity_count);	/*!< old activity count */
 /******************************************************************//**
 Increment the server activity counter. */
-UNIV_INTERN
 void
 srv_inc_activity_count(void);
 /*=========================*/
@@ -802,7 +851,6 @@ srv_inc_activity_count(void);
 /**********************************************************************//**
 Enqueues a task to server task queue and releases a worker thread, if there
 is a suspended one. */
-UNIV_INTERN
 void
 srv_que_task_enqueue_low(
 /*=====================*/
@@ -813,7 +861,6 @@ Check whether any background thread is active. If so, return the thread
 type.
 @return SRV_NONE if all are are suspended or have exited, thread
 type if any are still active. */
-UNIV_INTERN
 enum srv_thread_type
 srv_get_active_thread_type(void);
 /*============================*/
@@ -822,8 +869,7 @@ extern "C" {
 
 /*********************************************************************//**
 A thread which prints the info output by various InnoDB monitors.
-@return	a dummy parameter */
-UNIV_INTERN
+@return a dummy parameter */
 os_thread_ret_t
 DECLARE_THREAD(srv_monitor_thread)(
 /*===============================*/
@@ -832,8 +878,7 @@ DECLARE_THREAD(srv_monitor_thread)(
 
 /*********************************************************************//**
 The master thread controlling the server.
-@return	a dummy parameter */
-UNIV_INTERN
+@return a dummy parameter */
 os_thread_ret_t
 DECLARE_THREAD(srv_master_thread)(
 /*==============================*/
@@ -843,8 +888,7 @@ DECLARE_THREAD(srv_master_thread)(
 /*************************************************************************
 A thread which prints warnings about semaphore waits which have lasted
 too long. These can be used to track bugs which cause hangs.
-@return	a dummy parameter */
-UNIV_INTERN
+@return a dummy parameter */
 os_thread_ret_t
 DECLARE_THREAD(srv_error_monitor_thread)(
 /*=====================================*/
@@ -853,8 +897,7 @@ DECLARE_THREAD(srv_error_monitor_thread)(
 
 /*********************************************************************//**
 Purge coordinator thread that schedules the purge tasks.
-@return	a dummy parameter */
-UNIV_INTERN
+@return a dummy parameter */
 os_thread_ret_t
 DECLARE_THREAD(srv_purge_coordinator_thread)(
 /*=========================================*/
@@ -863,8 +906,7 @@ DECLARE_THREAD(srv_purge_coordinator_thread)(
 
 /*********************************************************************//**
 Worker thread that reads tasks from the work queue and executes them.
-@return	a dummy parameter */
-UNIV_INTERN
+@return a dummy parameter */
 os_thread_ret_t
 DECLARE_THREAD(srv_worker_thread)(
 /*==============================*/
@@ -874,8 +916,7 @@ DECLARE_THREAD(srv_worker_thread)(
 
 /**********************************************************************//**
 Get count of tasks in the queue.
-@return number of tasks in queue  */
-UNIV_INTERN
+@return number of tasks in queue */
 ulint
 srv_get_task_queue_length(void);
 /*===========================*/
@@ -885,7 +926,6 @@ Releases threads of the type given from suspension in the thread table.
 NOTE! The server mutex has to be reserved by the caller!
 @return number of threads released: this may be less than n if not
 enough threads were suspended at the moment */
-UNIV_INTERN
 ulint
 srv_release_threads(
 /*================*/
@@ -896,18 +936,52 @@ srv_release_threads(
 Check whether any background thread are active. If so print which thread
 is active. Send the threads wakeup signal.
 @return name of thread that is active or NULL */
-UNIV_INTERN
 const char*
 srv_any_background_threads_are_active(void);
 /*=======================================*/
 
 /**********************************************************************//**
 Wakeup the purge threads. */
-UNIV_INTERN
 void
 srv_purge_wakeup(void);
 /*==================*/
 
+/** Call exit(3) */
+void
+srv_fatal_error();
+
+/** Check if tablespace is being truncated.
+(Ignore system-tablespace as we don't re-create the tablespace
+and so some of the action that are suppressed by this function
+for independent tablespace are not applicable to system-tablespace).
+@param	space_id	space_id to check for truncate action
+@return true		if being truncated, false if not being
+			truncated or tablespace is system-tablespace. */
+bool
+srv_is_tablespace_truncated(ulint space_id);
+
+/** Check if tablespace was truncated.
+@param[in]	space	space object to check for truncate action
+@return true if tablespace was truncated and we still have an active
+MLOG_TRUNCATE REDO log record. */
+bool
+srv_was_tablespace_truncated(const fil_space_t* space);
+
+#ifdef UNIV_DEBUG
+/** Disables master thread. It's used by:
+	SET GLOBAL innodb_master_thread_disabled_debug = 1 (0).
+@param[in]	thd		thread handle
+@param[in]	var		pointer to system variable
+@param[out]	var_ptr		where the formal string goes
+@param[in]	save		immediate result from check function */
+void
+srv_master_thread_disabled_debug_update(
+	THD*				thd,
+	struct st_mysql_sys_var*	var,
+	void*				var_ptr,
+	const void*			save);
+#endif /* UNIV_DEBUG */
+
 /** Status variables to be passed to MySQL */
 struct export_var_t{
 	ulint innodb_data_pending_reads;	/*!< Pending reads */
@@ -918,8 +992,9 @@ struct export_var_t{
 	ulint innodb_data_writes;		/*!< I/O write requests */
 	ulint innodb_data_written;		/*!< Data bytes written */
 	ulint innodb_data_reads;		/*!< I/O read requests */
-	char  innodb_buffer_pool_dump_status[512];/*!< Buf pool dump status */
-	char  innodb_buffer_pool_load_status[512];/*!< Buf pool load status */
+	char  innodb_buffer_pool_dump_status[OS_FILE_MAX_PATH + 128];/*!< Buf pool dump status */
+	char  innodb_buffer_pool_load_status[OS_FILE_MAX_PATH + 128];/*!< Buf pool load status */
+	char  innodb_buffer_pool_resize_status[512];/*!< Buf pool resize status */
 	ulint innodb_buffer_pool_pages_total;	/*!< Buffer pool size */
 	ulint innodb_buffer_pool_pages_data;	/*!< Data pages */
 	ulint innodb_buffer_pool_bytes_data;	/*!< File bytes used */
@@ -954,7 +1029,7 @@ struct export_var_t{
 	ulint innodb_pages_written;		/*!< buf_pool->stat.n_pages_written */
 	ulint innodb_row_lock_waits;		/*!< srv_n_lock_wait_count */
 	ulint innodb_row_lock_current_waits;	/*!< srv_n_lock_wait_current_count */
-	ib_int64_t innodb_row_lock_time;	/*!< srv_n_lock_wait_time
+	int64_t innodb_row_lock_time;		/*!< srv_n_lock_wait_time
 						/ 1000 */
 	ulint innodb_row_lock_time_avg;		/*!< srv_n_lock_wait_time
 						/ 1000
@@ -991,42 +1066,45 @@ struct export_var_t{
 	ulint innodb_purge_trx_id_age;		/*!< rw_max_trx_id - purged trx_id */
 	ulint innodb_purge_view_trx_id_age;	/*!< rw_max_trx_id
 						- purged view's min trx_id */
+	ulint innodb_ahi_drop_lookups;		/*!< number of adaptive hash
+						index lookups when freeing
+						file pages */
 #endif /* UNIV_DEBUG */
 
-	ib_int64_t innodb_page_compression_saved;/*!< Number of bytes saved
+	int64_t innodb_page_compression_saved;/*!< Number of bytes saved
 						by page compression */
-	ib_int64_t innodb_page_compression_trim_sect512;/*!< Number of 512b TRIM
+	int64_t innodb_page_compression_trim_sect512;/*!< Number of 512b TRIM
 						by page compression */
-	ib_int64_t innodb_page_compression_trim_sect1024;/*!< Number of 1K TRIM
+	int64_t innodb_page_compression_trim_sect1024;/*!< Number of 1K TRIM
 						by page compression */
-	ib_int64_t innodb_page_compression_trim_sect2048;/*!< Number of 2K TRIM
+	int64_t innodb_page_compression_trim_sect2048;/*!< Number of 2K TRIM
 						by page compression */
-	ib_int64_t innodb_page_compression_trim_sect4096;/*!< Number of 4K byte TRIM
+	int64_t innodb_page_compression_trim_sect4096;/*!< Number of 4K byte TRIM
 						by page compression */
-	ib_int64_t innodb_page_compression_trim_sect8192;/*!< Number of 8K TRIM
+	int64_t innodb_page_compression_trim_sect8192;/*!< Number of 8K TRIM
 						by page compression */
-	ib_int64_t innodb_page_compression_trim_sect16384;/*!< Number of 16K TRIM
+	int64_t innodb_page_compression_trim_sect16384;/*!< Number of 16K TRIM
 						by page compression */
-	ib_int64_t innodb_page_compression_trim_sect32768;/*!< Number of 32K TRIM
+	int64_t innodb_page_compression_trim_sect32768;/*!< Number of 32K TRIM
 						by page compression */
-	ib_int64_t innodb_index_pages_written;  /*!< Number of index pages
+	int64_t innodb_index_pages_written;  /*!< Number of index pages
 						written */
-	ib_int64_t innodb_non_index_pages_written;  /*!< Number of non index pages
+	int64_t innodb_non_index_pages_written;  /*!< Number of non index pages
 						written */
-	ib_int64_t innodb_pages_page_compressed;/*!< Number of pages
+	int64_t innodb_pages_page_compressed;/*!< Number of pages
 						compressed by page compression */
-	ib_int64_t innodb_page_compressed_trim_op;/*!< Number of TRIM operations
+	int64_t innodb_page_compressed_trim_op;/*!< Number of TRIM operations
 						induced by page compression */
-	ib_int64_t innodb_page_compressed_trim_op_saved;/*!< Number of TRIM operations
+	int64_t innodb_page_compressed_trim_op_saved;/*!< Number of TRIM operations
 						saved by page compression */
-	ib_int64_t innodb_pages_page_decompressed;/*!< Number of pages
+	int64_t innodb_pages_page_decompressed;/*!< Number of pages
 						decompressed by page
 						compression */
-	ib_int64_t innodb_pages_page_compression_error;/*!< Number of page
+	int64_t innodb_pages_page_compression_error;/*!< Number of page
 						compression errors */
-	ib_int64_t innodb_pages_encrypted;      /*!< Number of pages
+	int64_t innodb_pages_encrypted;      /*!< Number of pages
 						encrypted */
-	ib_int64_t innodb_pages_decrypted;      /*!< Number of pages
+	int64_t innodb_pages_decrypted;      /*!< Number of pages
 						decrypted */
 
 	ulint innodb_sec_rec_cluster_reads;	/*!< srv_sec_rec_cluster_reads */
@@ -1084,6 +1162,7 @@ struct srv_slot_t{
 # define srv_start_raw_disk_in_use		0
 # define srv_file_per_table			1
 #endif /* !UNIV_HOTBACKUP */
+
 #ifdef WITH_WSREP
 UNIV_INTERN
 void
diff --git a/storage/innobase/include/srv0start.h b/storage/innobase/include/srv0start.h
index d2e70f969b7..0dd98e5b19b 100644
--- a/storage/innobase/include/srv0start.h
+++ b/storage/innobase/include/srv0start.h
@@ -30,41 +30,46 @@ Created 10/10/1995 Heikki Tuuri
 #include "log0log.h"
 #include "ut0byte.h"
 
-#ifdef __WIN__
-#define SRV_PATH_SEPARATOR	'\\'
+// Forward declaration
+struct dict_table_t;
+
+#ifdef DBUG_OFF
+# define RECOVERY_CRASH(x) do {} while(0)
 #else
-#define SRV_PATH_SEPARATOR	'/'
-#endif
+# define RECOVERY_CRASH(x) do {						\
+	if (srv_force_recovery_crash == x) {				\
+		fprintf(stderr, "innodb_force_recovery_crash=%lu\n",	\
+			srv_force_recovery_crash);			\
+		fflush(stderr);						\
+		_exit(3);						\
+	}								\
+} while (0)
+#endif /* DBUG_OFF */
+
+/** Log 'spaces' have id's >= this */
+#define SRV_LOG_SPACE_FIRST_ID		0xFFFFFFF0UL
+
+/** If buffer pool is less than the size,
+only one buffer pool instance is used. */
+#define BUF_POOL_SIZE_THRESHOLD		(1024 * 1024 * 1024)
 
 /*********************************************************************//**
-Normalizes a directory path for Windows: converts slashes to backslashes. 
-*/
-UNIV_INTERN
-void
-srv_normalize_path_for_win(
-/*=======================*/
-	char*	str);	/*!< in/out: null-terminated character string */
-/*********************************************************************//**
-Reads the data files and their sizes from a character string given in
-the .cnf file.
-@return	TRUE if ok, FALSE on parse error */
-UNIV_INTERN
-ibool
-srv_parse_data_file_paths_and_sizes(
-/*================================*/
+Parse temporary tablespace configuration.
+@return true if ok, false on parse error */
+bool
+srv_parse_temp_data_file_paths_and_sizes(
+/*=====================================*/
 	char*	str);	/*!< in/out: the data file path string */
 /*********************************************************************//**
 Frees the memory allocated by srv_parse_data_file_paths_and_sizes()
 and srv_parse_log_group_home_dirs(). */
-UNIV_INTERN
 void
 srv_free_paths_and_sizes(void);
 /*==========================*/
 /*********************************************************************//**
 Adds a slash or a backslash to the end of a string if it is missing
 and the string is not empty.
-@return	string which has the separator if the string is not empty */
-UNIV_INTERN
+@return string which has the separator if the string is not empty */
 char*
 srv_add_path_separator_if_needed(
 /*=============================*/
@@ -73,22 +78,19 @@ srv_add_path_separator_if_needed(
 /****************************************************************//**
 Starts Innobase and creates a new database if database files
 are not found and the user wants.
-@return	DB_SUCCESS or error code */
-UNIV_INTERN
+@return DB_SUCCESS or error code */
 dberr_t
 innobase_start_or_create_for_mysql(void);
 /*====================================*/
 /****************************************************************//**
 Shuts down the Innobase database.
-@return	DB_SUCCESS or error code */
-UNIV_INTERN
+@return DB_SUCCESS or error code */
 dberr_t
 innobase_shutdown_for_mysql(void);
 
 /********************************************************************
 Signal all per-table background threads to shutdown, and wait for them to do
 so. */
-UNIV_INTERN
 void
 srv_shutdown_table_bg_threads(void);
 /*=============================*/
@@ -98,7 +100,6 @@ Copy the file path component of the physical file to parameter. It will
 copy up to and including the terminating path separator.
 @return number of bytes copied or ULINT_UNDEFINED if destination buffer
 	is smaller than the path to be copied. */
-UNIV_INTERN
 ulint
 srv_path_copy(
 /*==========*/
@@ -108,41 +109,54 @@ srv_path_copy(
 	const char*	table_name)	/*!< in: source table name */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 
-/*****************************************************************//**
-Get the meta-data filename from the table name. */
-UNIV_INTERN
+/**
+Shutdown all background threads created by InnoDB. */
+void
+srv_shutdown_all_bg_threads();
+
+/** Get the meta-data filename from the table name for a
+single-table tablespace.
+@param[in]	table		table object
+@param[out]	filename	filename
+@param[in]	max_len		filename max length */
 void
 srv_get_meta_data_filename(
-/*======================*/
-	dict_table_t*	table,		/*!< in: table */
-	char*			filename,	/*!< out: filename */
-	ulint			max_len)	/*!< in: filename max length */
-	MY_ATTRIBUTE((nonnull));
+	dict_table_t*	table,
+	char*		filename,
+	ulint		max_len);
+
+/** Get the encryption-data filename from the table name for a
+single-table tablespace.
+@param[in]	table		table object
+@param[out]	filename	filename
+@param[in]	max_len		filename max length */
+void
+srv_get_encryption_data_filename(
+	dict_table_t*	table,
+	char*		filename,
+	ulint		max_len);
 
 /** Log sequence number at shutdown */
 extern	lsn_t	srv_shutdown_lsn;
 /** Log sequence number immediately after startup */
 extern	lsn_t	srv_start_lsn;
 
-#ifdef HAVE_DARWIN_THREADS
-/** TRUE if the F_FULLFSYNC option is available */
-extern	ibool	srv_have_fullfsync;
-#endif
-
 /** TRUE if the server is being started */
-extern	ibool	srv_is_being_started;
+extern	bool	srv_is_being_started;
+/** TRUE if SYS_TABLESPACES is available for lookups */
+extern	bool	srv_sys_tablespaces_open;
 /** TRUE if the server was successfully started */
 extern	ibool	srv_was_started;
 /** TRUE if the server is being started, before rolling back any
 incomplete transactions */
-extern	ibool	srv_startup_is_before_trx_rollback_phase;
+extern	bool	srv_startup_is_before_trx_rollback_phase;
 
 /** TRUE if a raw partition is in use */
 extern	ibool	srv_start_raw_disk_in_use;
 
 
 /** Shutdown state */
-enum srv_shutdown_state {
+enum srv_shutdown_t {
 	SRV_SHUTDOWN_NONE = 0,	/*!< Database running normally */
 	SRV_SHUTDOWN_CLEANUP,	/*!< Cleaning up in
 				logs_empty_and_mark_files_at_shutdown() */
@@ -159,10 +173,7 @@ enum srv_shutdown_state {
 
 /** At a shutdown this value climbs from SRV_SHUTDOWN_NONE to
 SRV_SHUTDOWN_CLEANUP and then to SRV_SHUTDOWN_LAST_PHASE, and so on */
-extern	enum srv_shutdown_state	srv_shutdown_state;
+extern	enum srv_shutdown_t	srv_shutdown_state;
 #endif /* !UNIV_HOTBACKUP */
 
-/** Log 'spaces' have id's >= this */
-#define SRV_LOG_SPACE_FIRST_ID		0xFFFFFFF0UL
-
 #endif
diff --git a/storage/innobase/include/sync0arr.h b/storage/innobase/include/sync0arr.h
index 880d7d2a473..1a3cc93f0e9 100644
--- a/storage/innobase/include/sync0arr.h
+++ b/storage/innobase/include/sync0arr.h
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2015, 2016, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -27,12 +28,11 @@ Created 9/5/1995 Heikki Tuuri
 #define sync0arr_h
 
 #include "univ.i"
-#include "ut0lst.h"
-#include "ut0mem.h"
 #include "os0thread.h"
 
-/** Synchonization cell */
+/** Synchronization wait array cell */
 struct sync_cell_t;
+
 /** Synchronization wait array */
 struct sync_array_t;
 
@@ -42,112 +42,93 @@ in the instance for waiting for an object. The event of the cell is
 reset to nonsignalled state.
 If reserving cell of the instance fails, try to get another new
 instance until we can reserve an empty cell of it.
-@return the instance found, never NULL. */
+@return the sync array found, never NULL. */
 UNIV_INLINE
 sync_array_t*
 sync_array_get_and_reserve_cell(
-/*============================*/
 	void*		object,	/*!< in: pointer to the object to wait for */
 	ulint		type,	/*!< in: lock request type */
 	const char*	file,	/*!< in: file where requested */
 	ulint		line,	/*!< in: line where requested */
-	ulint*		index);	/*!< out: index of the reserved cell */
+	sync_cell_t**	cell);	/*!< out: the cell reserved, never NULL */
 /******************************************************************//**
 Reserves a wait array cell for waiting for an object.
-The event of the cell is reset to nonsignalled state.
-@return true if free cell is found, otherwise false */
-UNIV_INTERN
-bool
+The event of the cell is reset to nonsignalled state. */
+sync_cell_t*
 sync_array_reserve_cell(
-/*====================*/
 	sync_array_t*	arr,	/*!< in: wait array */
 	void*		object, /*!< in: pointer to the object to wait for */
 	ulint		type,	/*!< in: lock request type */
 	const char*	file,	/*!< in: file where requested */
-	ulint		line,	/*!< in: line where requested */
-	ulint*		index); /*!< out: index of the reserved cell */
+	ulint		line);	/*!< in: line where requested */
+
 /******************************************************************//**
 This function should be called when a thread starts to wait on
 a wait array cell. In the debug version this function checks
 if the wait for a semaphore will result in a deadlock, in which
 case prints info and asserts. */
-UNIV_INTERN
 void
 sync_array_wait_event(
-/*==================*/
 	sync_array_t*	arr,	/*!< in: wait array */
-	ulint		index);	 /*!< in: index of the reserved cell */
+	sync_cell_t*&	cell);	/*!< in: the reserved cell */
+
 /******************************************************************//**
 Frees the cell. NOTE! sync_array_wait_event frees the cell
 automatically! */
-UNIV_INTERN
 void
 sync_array_free_cell(
-/*=================*/
 	sync_array_t*	arr,	/*!< in: wait array */
-	ulint		index);	/*!< in: index of the cell in array */
+	sync_cell_t*&	cell);	/*!< in: the reserved cell */
+
 /**********************************************************************//**
 Note that one of the wait objects was signalled. */
-UNIV_INTERN
 void
-sync_array_object_signalled(void);
-/*=============================*/
+sync_array_object_signalled();
 
 /**********************************************************************//**
 If the wakeup algorithm does not work perfectly at semaphore relases,
 this function will do the waking (see the comment in mutex_exit). This
 function should be called about every 1 second in the server. */
-UNIV_INTERN
 void
-sync_arr_wake_threads_if_sema_free(void);
-/*====================================*/
+sync_arr_wake_threads_if_sema_free();
+
 /**********************************************************************//**
 Prints warnings of long semaphore waits to stderr.
-@return	TRUE if fatal semaphore wait threshold was exceeded */
-UNIV_INTERN
+@return TRUE if fatal semaphore wait threshold was exceeded */
 ibool
 sync_array_print_long_waits(
-/*========================*/
 	os_thread_id_t*	waiter,	/*!< out: longest waiting thread */
-	const void**	sema)	/*!< out: longest-waited-for semaphore */
-	MY_ATTRIBUTE((nonnull));
+	const void**	sema);	/*!< out: longest-waited-for semaphore */
+
 /********************************************************************//**
 Validates the integrity of the wait array. Checks
 that the number of reserved cells equals the count variable. */
-UNIV_INTERN
 void
 sync_array_validate(
-/*================*/
 	sync_array_t*	arr);	/*!< in: sync wait array */
+
 /**********************************************************************//**
 Prints info of the wait array. */
-UNIV_INTERN
 void
 sync_array_print(
-/*=============*/
 	FILE*		file);	/*!< in: file where to print */
 
 /**********************************************************************//**
 Create the primary system wait array(s), they are protected by an OS mutex */
-UNIV_INTERN
 void
 sync_array_init(
-/*============*/
 	ulint		n_threads);	/*!< in: Number of slots to create */
+
 /**********************************************************************//**
 Close sync array wait sub-system. */
-UNIV_INTERN
 void
-sync_array_close(void);
-/*==================*/
+sync_array_close();
 
 /**********************************************************************//**
 Get an instance of the sync wait array. */
-UNIV_INTERN
+UNIV_INLINE
 sync_array_t*
-sync_array_get(void);
-/*================*/
-
+sync_array_get();
 /**********************************************************************//**
 Prints info of the wait array without using any mutexes/semaphores. */
 UNIV_INTERN
@@ -166,6 +147,6 @@ sync_array_get_nth_cell(
 
 #ifndef UNIV_NONINL
 #include "sync0arr.ic"
-#endif
+#endif /* UNIV_NOINL */
 
-#endif
+#endif /* sync0arr_h */
diff --git a/storage/innobase/include/sync0arr.ic b/storage/innobase/include/sync0arr.ic
index 18a46dd0a41..a15e2176278 100644
--- a/storage/innobase/include/sync0arr.ic
+++ b/storage/innobase/include/sync0arr.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -25,8 +25,27 @@ Inline code
 Created 9/5/1995 Heikki Tuuri
 *******************************************************/
 
-/** User configured sync array size */
-extern ulong srv_sync_array_size;
+extern ulint		sync_array_size;
+extern sync_array_t**	sync_wait_array;
+
+#include "ut0counter.h"
+
+/**********************************************************************//**
+Get an instance of the sync wait array.
+@return an instance of the sync wait array. */
+
+UNIV_INLINE
+sync_array_t*
+sync_array_get()
+/*============*/
+{
+	if (sync_array_size <= 1) {
+		return(sync_wait_array[0]);
+	}
+
+	return(sync_wait_array[default_indexer_t<>::get_rnd_index()
+			       % sync_array_size]);
+}
 
 /******************************************************************//**
 Get an instance of the sync wait array and reserve a wait array cell
@@ -34,31 +53,33 @@ in the instance for waiting for an object. The event of the cell is
 reset to nonsignalled state.
 If reserving cell of the instance fails, try to get another new
 instance until we can reserve an empty cell of it.
-@return the instance found, never NULL. */
+@return the sync array reserved, never NULL. */
 UNIV_INLINE
 sync_array_t*
 sync_array_get_and_reserve_cell(
 /*============================*/
-	void*		object,	/*!< in: pointer to the object to wait for */
+	void*		object, /*!< in: pointer to the object to wait for */
 	ulint		type,	/*!< in: lock request type */
 	const char*	file,	/*!< in: file where requested */
 	ulint		line,	/*!< in: line where requested */
-	ulint*		index)	/*!< out: index of the reserved cell */
+	sync_cell_t**	cell)	/*!< out: the cell reserved, never NULL */
 {
-	sync_array_t*	sync_arr;
-	bool		reserved = false;
+	sync_array_t*	sync_arr = NULL;
 
-	for (ulint i = 0; i < srv_sync_array_size && !reserved; ++i) {
+	*cell = NULL;
+	for (ulint i = 0; i < sync_array_size && *cell == NULL; ++i) {
+		/* Although the sync_array is get in a random way currently,
+		we still try at most sync_array_size times, in case any
+		of the sync_array we get is full */
 		sync_arr = sync_array_get();
-		reserved = sync_array_reserve_cell(sync_arr, object, type,
-						   file, line, index);
-	}
+		*cell = sync_array_reserve_cell(sync_arr, object, type,
+					       file, line);
+        }
 
 	/* This won't be true every time, for the loop above may execute
 	more than srv_sync_array_size times to reserve a cell.
 	But an assertion here makes the code more solid. */
-	ut_a(reserved);
+	ut_a(*cell != NULL);
 
-	return sync_arr;
+	return(sync_arr);
 }
-
diff --git a/storage/innobase/include/sync0debug.h b/storage/innobase/include/sync0debug.h
new file mode 100644
index 00000000000..6b80c0b25a1
--- /dev/null
+++ b/storage/innobase/include/sync0debug.h
@@ -0,0 +1,105 @@
+/*****************************************************************************
+
+Copyright (c) 2013, 2015, Oracle and/or its affiliates. All Rights Reserved.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/sync0debug.h
+Debug checks for latches, header file
+
+Created 2012-08-21 Sunny Bains
+*******************************************************/
+
+#ifndef sync0debug_h
+#define sync0debug_h
+
+#include "univ.i"
+#include "sync0types.h"
+
+/** Initializes the synchronization data structures. */
+void
+sync_check_init();
+
+/** Frees the resources in synchronization data structures. */
+void
+sync_check_close();
+
+#ifdef UNIV_DEBUG
+/** Enable sync order checking. */
+void
+sync_check_enable();
+
+/** Check if it is OK to acquire the latch.
+@param[in]	latch	latch type */
+void
+sync_check_lock_validate(const latch_t* latch);
+
+/** Note that the lock has been granted
+@param[in]	latch	latch type */
+void
+sync_check_lock_granted(const latch_t* latch);
+
+/** Check if it is OK to acquire the latch.
+@param[in]	latch	latch type
+@param[in]	level	the level of the mutex */
+void
+sync_check_lock(const latch_t* latch, latch_level_t level);
+
+/**
+Check if it is OK to re-acquire the lock. */
+void
+sync_check_relock(const latch_t* latch);
+
+/** Removes a latch from the thread level array if it is found there.
+@param[in]	latch	to unlock */
+void
+sync_check_unlock(const latch_t* latch);
+
+/** Checks if the level array for the current thread contains a
+mutex or rw-latch at the specified level.
+@param[in]	level	to find
+@return	a matching latch, or NULL if not found */
+const latch_t*
+sync_check_find(latch_level_t level);
+
+/** Checks that the level array for the current thread is empty.
+Terminate iteration if the functor returns true.
+@param[in,out]	 functor	called for each element.
+@return true if the functor returns true */
+bool
+sync_check_iterate(sync_check_functor_t& functor);
+
+/** Acquires the debug mutex. We cannot use the mutex defined in sync0sync,
+because the debug mutex is also acquired in sync0arr while holding the OS
+mutex protecting the sync array, and the ordinary mutex_enter might
+recursively call routines in sync0arr, leading to a deadlock on the OS
+mutex. */
+void
+rw_lock_debug_mutex_enter();
+
+/** Releases the debug mutex. */
+void
+rw_lock_debug_mutex_exit();
+
+#endif /* UNIV_DEBUG */
+
+#endif /* !sync0debug_h */
diff --git a/storage/innobase/include/sync0policy.h b/storage/innobase/include/sync0policy.h
new file mode 100644
index 00000000000..0eaefc7167a
--- /dev/null
+++ b/storage/innobase/include/sync0policy.h
@@ -0,0 +1,550 @@
+/*****************************************************************************
+
+Copyright (c) 2013, 2015, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/sync0policy.h
+Policies for mutexes.
+
+Created 2012-08-21 Sunny Bains.
+***********************************************************************/
+
+#ifndef sync0policy_h
+#define sync0policy_h
+
+#include "univ.i"
+#include "ut0rnd.h"
+#include "os0thread.h"
+#include "sync0types.h"
+#include "srv0mon.h"
+
+#ifdef UNIV_DEBUG
+
+# define MUTEX_MAGIC_N 979585UL
+
+template <typename Mutex>
+class MutexDebug {
+public:
+
+	/** For passing context to SyncDebug */
+	struct Context : public latch_t {
+
+		/** Constructor */
+		Context()
+			:
+			m_mutex(),
+			m_filename(),
+			m_line(),
+			m_thread_id(os_thread_id_t(ULINT_UNDEFINED))
+		{
+			/* No op */
+		}
+
+		/** Create the context for SyncDebug
+		@param[in]	id	ID of the latch to track */
+		Context(latch_id_t id)
+			:
+			latch_t(id)
+		{
+			/* No op */
+		}
+
+		/** Set to locked state
+		@param[in]	mutex		The mutex to acquire
+		@param[in]	filename	File name from where to acquire
+		@param[in]	line		Line number in filename */
+		void locked(
+			const Mutex*		mutex,
+			const char*		filename,
+			ulint			line)
+			UNIV_NOTHROW
+		{
+			m_mutex = mutex;
+
+			m_thread_id = os_thread_get_curr_id();
+
+			m_filename = filename;
+
+			m_line = line;
+		}
+
+		/** Reset to unlock state */
+		void release()
+			UNIV_NOTHROW
+		{
+			m_mutex = NULL;
+
+			m_thread_id = os_thread_id_t(ULINT_UNDEFINED);
+
+			m_filename = NULL;
+
+			m_line = ULINT_UNDEFINED;
+		}
+
+		/** Print information about the latch
+		@return the string representation */
+		virtual std::string to_string() const
+			UNIV_NOTHROW
+		{
+			std::ostringstream msg;
+
+			msg << m_mutex->policy().to_string();
+
+			if (os_thread_pf(m_thread_id) != ULINT_UNDEFINED) {
+
+				msg << " addr: " << m_mutex
+				    << " acquired: " << locked_from().c_str();
+
+			} else {
+				msg << "Not locked";
+			}
+
+			return(msg.str());
+		}
+
+		/** @return the name of the file and line number in the file
+		from where the mutex was acquired "filename:line" */
+		virtual std::string locked_from() const
+		{
+			std::ostringstream msg;
+
+			msg << sync_basename(m_filename) << ":" << m_line;
+
+			return(std::string(msg.str()));
+		}
+
+		/** Mutex to check for lock order violation */
+		const Mutex*	m_mutex;
+
+		/** Filename from where enter was called */
+		const char*	m_filename;
+
+		/** Line mumber in filename */
+		ulint		m_line;
+
+		/** Thread ID of the thread that own(ed) the mutex */
+		os_thread_id_t	m_thread_id;
+	};
+
+	/** Constructor. */
+	MutexDebug()
+		:
+		m_magic_n(),
+		m_context()
+		UNIV_NOTHROW
+	{
+		/* No op */
+	}
+
+	/* Destructor */
+	virtual ~MutexDebug() { }
+
+	/** Mutex is being destroyed. */
+	void destroy() UNIV_NOTHROW
+	{
+		ut_ad(m_context.m_thread_id == os_thread_id_t(ULINT_UNDEFINED));
+
+		m_magic_n = 0;
+
+		m_context.m_thread_id = 0;
+	}
+
+	/** Called when the mutex is "created". Note: Not from the constructor
+	but when the mutex is initialised.
+	@param[in]	id              Mutex ID */
+	void init(latch_id_t id)
+		UNIV_NOTHROW;
+
+	/** Called when an attempt is made to lock the mutex
+	@param[in]	mutex		Mutex instance to be locked
+	@param[in]	filename	Filename from where it was called
+	@param[in]	line		Line number from where it was called */
+	void enter(
+		const Mutex*	mutex,
+		const char*	filename,
+		ulint		line)
+		UNIV_NOTHROW;
+
+	/** Called when the mutex is locked
+	@param[in]	mutex		Mutex instance that was locked
+	@param[in]	filename	Filename from where it was called
+	@param[in]	line		Line number from where it was called */
+	void locked(
+		const Mutex*	mutex,
+		const char*	filename,
+		ulint		line)
+		UNIV_NOTHROW;
+
+	/** Called when the mutex is released
+	@param[in]	mutx		Mutex that was released */
+	void release(const Mutex* mutex)
+		UNIV_NOTHROW;
+
+	/** @return true if thread owns the mutex */
+	bool is_owned() const UNIV_NOTHROW
+	{
+		return(os_thread_eq(
+				m_context.m_thread_id,
+				os_thread_get_curr_id()));
+	}
+
+	/** @return the name of the file from the mutex was acquired */
+	const char* get_enter_filename() const
+		UNIV_NOTHROW
+	{
+		return(m_context.m_filename);
+	}
+
+	/** @return the name of the file from the mutex was acquired */
+	ulint get_enter_line() const
+		UNIV_NOTHROW
+	{
+		return(m_context.m_line);
+	}
+
+	/** @return id of the thread that was trying to acquire the mutex */
+	os_thread_id_t get_thread_id() const
+		UNIV_NOTHROW
+	{
+		return(m_context.m_thread_id);
+	}
+
+	/** Magic number to check for memory corruption. */
+	ulint			m_magic_n;
+
+	/** Latch state of the mutex owner */
+	Context			m_context;
+};
+#endif /* UNIV_DEBUG */
+
+/* Do nothing */
+template <typename Mutex>
+struct NoPolicy {
+	/** Default constructor. */
+	NoPolicy() { }
+
+	void init(const Mutex&, latch_id_t, const char*, uint32_t)
+		UNIV_NOTHROW { }
+	void destroy() UNIV_NOTHROW { }
+	void enter(const Mutex&, const char*, ulint line) UNIV_NOTHROW { }
+	void add(uint32_t, uint32_t) UNIV_NOTHROW { }
+	void locked(const Mutex&, const char*, ulint) UNIV_NOTHROW { }
+	void release(const Mutex&) UNIV_NOTHROW { }
+	std::string to_string() const { return(""); };
+	latch_id_t get_id() const;
+};
+
+/** Collect the metrics per mutex instance, no aggregation. */
+template <typename Mutex>
+struct GenericPolicy
+#ifdef UNIV_DEBUG
+: public MutexDebug<Mutex>
+#endif /* UNIV_DEBUG */
+{
+public:
+	typedef Mutex MutexType;
+
+	/** Constructor. */
+	GenericPolicy()
+		UNIV_NOTHROW
+		:
+#ifdef UNIV_DEBUG
+		MutexDebug<MutexType>(),
+#endif /* UNIV_DEBUG */
+		m_count(),
+		m_id()
+		{ }
+
+	/** Destructor */
+	~GenericPolicy() { }
+
+	/** Called when the mutex is "created". Note: Not from the constructor
+	but when the mutex is initialised.
+	@param[in]	mutex		Mutex instance to track
+	@param[in]	id              Mutex ID
+	@param[in]	filename	File where mutex was created
+	@param[in]	line		Line in filename */
+	void init(
+		const MutexType&	mutex,
+		latch_id_t		id,
+		const char*		filename,
+		uint32_t		line)
+		UNIV_NOTHROW
+	{
+		m_id = id;
+
+		latch_meta_t&	meta = sync_latch_get_meta(id);
+
+		ut_ad(meta.get_id() == id);
+
+		meta.get_counter()->single_register(&m_count);
+
+		sync_file_created_register(this, filename, line);
+
+		ut_d(MutexDebug<MutexType>::init(m_id));
+	}
+
+	/** Called when the mutex is destroyed. */
+	void destroy()
+		UNIV_NOTHROW
+	{
+		latch_meta_t&	meta = sync_latch_get_meta(m_id);
+
+		meta.get_counter()->single_deregister(&m_count);
+
+		sync_file_created_deregister(this);
+
+		ut_d(MutexDebug<MutexType>::destroy());
+	}
+
+	/** Called after a successful mutex acquire.
+	@param[in]	n_spins		Number of times the thread did
+					spins while trying to acquire the mutex
+	@param[in]	n_waits		Number of times the thread waited
+					in some type of OS queue */
+	void add(
+		uint32_t	n_spins,
+		uint32_t	n_waits)
+		UNIV_NOTHROW
+	{
+		/* Currently global on/off. Keeps things simple and fast */
+
+		if (!m_count.m_enabled) {
+
+			return;
+		}
+
+		m_count.m_spins += n_spins;
+		m_count.m_waits += n_waits;
+
+		++m_count.m_calls;
+	}
+
+	/** Called when an attempt is made to lock the mutex
+	@param[in]	mutex		Mutex instance to be locked
+	@param[in]	filename	Filename from where it was called
+	@param[in]	line		Line number from where it was called */
+	void enter(
+		const MutexType&	mutex,
+		const char*		filename,
+		ulint			line)
+		UNIV_NOTHROW
+	{
+		ut_d(MutexDebug<MutexType>::enter(&mutex, filename, line));
+	}
+
+	/** Called when the mutex is locked
+	@param[in]	mutex		Mutex instance that is locked
+	@param[in]	filename	Filename from where it was called
+	@param[in]	line		Line number from where it was called */
+	void locked(
+		const MutexType&	mutex,
+		const char*		filename,
+		ulint			line)
+		UNIV_NOTHROW
+	{
+		ut_d(MutexDebug<MutexType>::locked(&mutex, filename, line));
+	}
+
+	/** Called when the mutex is released
+	@param[in]	mutex		Mutex instance that is released */
+	void release(const MutexType& mutex)
+		UNIV_NOTHROW
+	{
+		ut_d(MutexDebug<MutexType>::release(&mutex));
+	}
+
+	/** Print the information about the latch
+	@return the string representation */
+	std::string print() const
+		UNIV_NOTHROW;
+
+	/** @return the latch ID */
+	latch_id_t get_id() const
+		UNIV_NOTHROW
+	{
+		return(m_id);
+	}
+
+	/** @return the string representation */
+	std::string to_string() const;
+
+private:
+	typedef latch_meta_t::CounterType Counter;
+
+	/** The user visible counters, registered with the meta-data.  */
+	Counter::Count		m_count;
+
+	/** Latch meta data ID */
+	latch_id_t		m_id;
+};
+
+/** Track agregate metrics policy, used by the page mutex. There are just
+too many of them to count individually. */
+template <typename Mutex>
+class BlockMutexPolicy
+#ifdef UNIV_DEBUG
+: public MutexDebug<Mutex>
+#endif /* UNIV_DEBUG */
+{
+public:
+	typedef Mutex MutexType;
+	typedef typename latch_meta_t::CounterType::Count Count;
+
+	/** Default constructor. */
+	BlockMutexPolicy()
+		:
+#ifdef UNIV_DEBUG
+		MutexDebug<MutexType>(),
+#endif /* UNIV_DEBUG */
+		m_count(),
+		m_id()
+	{
+		/* Do nothing */
+	}
+
+	/** Destructor */
+	~BlockMutexPolicy() { }
+
+	/** Called when the mutex is "created". Note: Not from the constructor
+	but when the mutex is initialised.
+	@param[in]	mutex		Mutex instance to track
+	@param[in]	id              Mutex ID
+	@param[in]	filename	File where mutex was created
+	@param[in]	line		Line in filename */
+	void init(
+		const MutexType&	mutex,
+		latch_id_t		id,
+		const char*		filename,
+		uint32_t		line)
+		UNIV_NOTHROW
+	{
+		/* It can be LATCH_ID_BUF_BLOCK_MUTEX or
+		LATCH_ID_BUF_POOL_ZIP. Unfortunately, they
+		are mapped to the same mutex type in the
+		buffer pool code. */
+
+		m_id = id;
+
+		latch_meta_t&	meta = sync_latch_get_meta(m_id);
+
+		ut_ad(meta.get_id() == id);
+
+		m_count = meta.get_counter()->sum_register();
+
+		ut_d(MutexDebug<MutexType>::init(m_id));
+	}
+
+	/** Called when the mutex is destroyed. */
+	void destroy()
+		UNIV_NOTHROW
+	{
+		latch_meta_t&	meta = sync_latch_get_meta(m_id);
+
+		ut_ad(meta.get_id() == m_id);
+
+		meta.get_counter()->sum_deregister(m_count);
+
+		m_count = NULL;
+
+		ut_d(MutexDebug<MutexType>::destroy());
+	}
+
+	/** Called after a successful mutex acquire.
+	@param[in]	n_spins		Number of times the thread did
+					spins while trying to acquire the mutex
+	@param[in]	n_waits		Number of times the thread waited
+					in some type of OS queue */
+	void add(
+		uint32_t	n_spins,
+		uint32_t	n_waits)
+		UNIV_NOTHROW
+	{
+		if (!m_count->m_enabled) {
+
+			return;
+		}
+
+		m_count->m_spins += n_spins;
+		m_count->m_waits += n_waits;
+
+		++m_count->m_calls;
+	}
+
+	/** Called when the mutex is locked
+	@param[in]	mutex		Mutex instance that is locked
+	@param[in]	filename	Filename from where it was called
+	@param[in]	line		Line number from where it was called */
+	void locked(
+		const MutexType&	mutex,
+		const char*		filename,
+		ulint			line)
+		UNIV_NOTHROW
+	{
+		ut_d(MutexDebug<MutexType>::locked(&mutex, filename, line));
+	}
+
+	/** Called when the mutex is released
+	@param[in]	mutex		Mutex instance that is released */
+	void release(const MutexType& mutex)
+		UNIV_NOTHROW
+	{
+		ut_d(MutexDebug<MutexType>::release(&mutex));
+	}
+
+	/** Called when an attempt is made to lock the mutex
+	@param[in]	mutex		Mutex instance to be locked
+	@param[in]	filename	Filename from where it was called
+	@param[in]	line		Line number from where it was called */
+	void enter(
+		const MutexType&	mutex,
+		const char*		filename,
+		ulint			line)
+		UNIV_NOTHROW
+	{
+		ut_d(MutexDebug<MutexType>::enter(&mutex, filename, line));
+	}
+
+	/** Print the information about the latch
+	@return the string representation */
+	std::string print() const
+		UNIV_NOTHROW;
+
+	/** @return the latch ID */
+	latch_id_t get_id() const
+	{
+		return(m_id);
+	}
+
+	/** @return the string representation */
+	std::string to_string() const;
+
+private:
+	typedef latch_meta_t::CounterType Counter;
+
+	/** The user visible counters, registered with the meta-data.  */
+	Counter::Count*		m_count;
+
+	/** Latch meta data ID */
+	latch_id_t		m_id;
+};
+
+#ifndef UNIV_NONINL
+#include "sync0policy.ic"
+#endif /* UNIV_NOINL */
+
+#endif /* sync0policy_h */
diff --git a/storage/innobase/include/sync0policy.ic b/storage/innobase/include/sync0policy.ic
new file mode 100644
index 00000000000..f7598fe7854
--- /dev/null
+++ b/storage/innobase/include/sync0policy.ic
@@ -0,0 +1,100 @@
+/*****************************************************************************
+
+Copyright (c) 2013, 2015, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/sync0policy.ic
+Policy for mutexes.
+
+Created 2012-08-21 Sunny Bains.
+***********************************************************************/
+
+#include "sync0debug.h"
+
+template <typename Mutex>
+std::string GenericPolicy<Mutex>::to_string() const
+{
+	return(sync_mutex_to_string(get_id(), sync_file_created_get(this)));
+}
+
+template <typename Mutex>
+std::string BlockMutexPolicy<Mutex>::to_string() const
+{
+	/* I don't think it makes sense to keep track of the file name
+	and line number for each block mutex. Too much of overhead. Use the
+	latch id to figure out the location from the source. */
+	return(sync_mutex_to_string(get_id(), "buf0buf.cc:0"));
+}
+
+#ifdef UNIV_DEBUG
+
+template <typename Mutex>
+void MutexDebug<Mutex>::init(latch_id_t id)
+	UNIV_NOTHROW
+{
+	m_context.m_id = id;
+
+	m_context.release();
+
+	m_magic_n = MUTEX_MAGIC_N;
+}
+
+template <typename Mutex>
+void MutexDebug<Mutex>::enter(
+	const Mutex*	mutex,
+	const char*	name,
+	ulint		line)
+	UNIV_NOTHROW
+{
+	ut_ad(!is_owned());
+
+	Context	context(m_context.get_id());
+
+	context.locked(mutex, name, line);
+
+	/* Check for latch order violation. */
+
+	sync_check_lock_validate(&context);
+}
+
+template <typename Mutex>
+void MutexDebug<Mutex>::locked(
+	const Mutex*	mutex,
+	const char*	name,
+	ulint		line)
+	UNIV_NOTHROW
+{
+	ut_ad(!is_owned());
+	ut_ad(m_context.m_thread_id == os_thread_id_t(ULINT_UNDEFINED));
+
+	m_context.locked(mutex, name, line);
+
+	sync_check_lock_granted(&m_context);
+}
+
+template <typename Mutex>
+void MutexDebug<Mutex>::release(const Mutex* mutex)
+	UNIV_NOTHROW
+{
+	ut_ad(is_owned());
+
+	m_context.release();
+
+	sync_check_unlock(&m_context);
+}
+
+#endif /* UNIV_DEBUG */
diff --git a/storage/innobase/include/sync0rw.h b/storage/innobase/include/sync0rw.h
index b0fa214be81..64bbf4c4aac 100644
--- a/storage/innobase/include/sync0rw.h
+++ b/storage/innobase/include/sync0rw.h
@@ -35,115 +35,92 @@ Created 9/11/1995 Heikki Tuuri
 
 #include "univ.i"
 #ifndef UNIV_HOTBACKUP
-#include "ut0lst.h"
 #include "ut0counter.h"
-#include "sync0sync.h"
-#include "os0sync.h"
+#include "os0event.h"
+#include "ut0mutex.h"
 
 /** Enable semaphore request instrumentation */
 extern my_bool srv_instrument_semaphores;
 
-/* The following undef is to prevent a name conflict with a macro
-in MySQL: */
-#undef rw_lock_t
 #endif /* !UNIV_HOTBACKUP */
 
 /** Counters for RW locks. */
 struct rw_lock_stats_t {
-	typedef ib_counter_t<ib_int64_t, IB_N_SLOTS> ib_int64_counter_t;
+	typedef ib_counter_t<int64_t, IB_N_SLOTS> int64_counter_t;
 
 	/** number of spin waits on rw-latches,
 	resulted during shared (read) locks */
-	ib_int64_counter_t	rw_s_spin_wait_count;
+	int64_counter_t		rw_s_spin_wait_count;
 
 	/** number of spin loop rounds on rw-latches,
 	resulted during shared (read) locks */
-	ib_int64_counter_t	rw_s_spin_round_count;
+	int64_counter_t		rw_s_spin_round_count;
 
 	/** number of OS waits on rw-latches,
 	resulted during shared (read) locks */
-	ib_int64_counter_t	rw_s_os_wait_count;
-
-	/** number of unlocks (that unlock shared locks),
-	set only when UNIV_SYNC_PERF_STAT is defined */
-	ib_int64_counter_t	rw_s_exit_count;
+	int64_counter_t		rw_s_os_wait_count;
 
 	/** number of spin waits on rw-latches,
 	resulted during exclusive (write) locks */
-	ib_int64_counter_t	rw_x_spin_wait_count;
+	int64_counter_t		rw_x_spin_wait_count;
 
 	/** number of spin loop rounds on rw-latches,
 	resulted during exclusive (write) locks */
-	ib_int64_counter_t	rw_x_spin_round_count;
+	int64_counter_t		rw_x_spin_round_count;
 
 	/** number of OS waits on rw-latches,
 	resulted during exclusive (write) locks */
-	ib_int64_counter_t	rw_x_os_wait_count;
+	int64_counter_t		rw_x_os_wait_count;
 
-	/** number of unlocks (that unlock exclusive locks),
-	set only when UNIV_SYNC_PERF_STAT is defined */
-	ib_int64_counter_t	rw_x_exit_count;
+	/** number of spin waits on rw-latches,
+	resulted during sx locks */
+	int64_counter_t		rw_sx_spin_wait_count;
+
+	/** number of spin loop rounds on rw-latches,
+	resulted during sx locks */
+	int64_counter_t		rw_sx_spin_round_count;
+
+	/** number of OS waits on rw-latches,
+	resulted during sx locks */
+	int64_counter_t		rw_sx_os_wait_count;
 };
 
-/* Latch types; these are used also in btr0btr.h: keep the numerical values
-smaller than 30 and the order of the numerical values like below! */
-#define RW_S_LATCH	1
-#define	RW_X_LATCH	2
-#define	RW_NO_LATCH	3
+/* Latch types; these are used also in btr0btr.h and mtr0mtr.h: keep the
+numerical values smaller than 30 (smaller than BTR_MODIFY_TREE and
+MTR_MEMO_MODIFY) and the order of the numerical values like below! and they
+should be 2pow value to be used also as ORed combination of flag. */
+enum rw_lock_type_t {
+	RW_S_LATCH = 1,
+	RW_X_LATCH = 2,
+	RW_SX_LATCH = 4,
+	RW_NO_LATCH = 8
+};
 
 #ifndef UNIV_HOTBACKUP
-/* We decrement lock_word by this amount for each x_lock. It is also the
+/* We decrement lock_word by X_LOCK_DECR for each x_lock. It is also the
 start value for the lock_word, meaning that it limits the maximum number
-of concurrent read locks before the rw_lock breaks. The current value of
-0x00100000 allows 1,048,575 concurrent readers and 2047 recursive writers.*/
-#define X_LOCK_DECR		0x00100000
+of concurrent read locks before the rw_lock breaks. */
+/* We decrement lock_word by X_LOCK_HALF_DECR for sx_lock. */
+#define X_LOCK_DECR		0x20000000
+#define X_LOCK_HALF_DECR	0x10000000
 
+#ifdef rw_lock_t
+#undef rw_lock_t
+#endif
 struct rw_lock_t;
-#ifdef UNIV_SYNC_DEBUG
+
+#ifdef UNIV_DEBUG
 struct rw_lock_debug_t;
-#endif /* UNIV_SYNC_DEBUG */
+#endif /* UNIV_DEBUG */
 
 typedef UT_LIST_BASE_NODE_T(rw_lock_t)	rw_lock_list_t;
 
-extern rw_lock_list_t	rw_lock_list;
-extern ib_mutex_t		rw_lock_list_mutex;
-
-#ifdef UNIV_SYNC_DEBUG
-/* The global mutex which protects debug info lists of all rw-locks.
-To modify the debug info list of an rw-lock, this mutex has to be
-acquired in addition to the mutex protecting the lock. */
-extern os_fast_mutex_t		rw_lock_debug_mutex;
-#endif /* UNIV_SYNC_DEBUG */
+extern rw_lock_list_t			rw_lock_list;
+extern ib_mutex_t			rw_lock_list_mutex;
 
 /** Counters for RW locks. */
 extern rw_lock_stats_t	rw_lock_stats;
 
-#ifdef UNIV_PFS_RWLOCK
-/* Following are rwlock keys used to register with MySQL
-performance schema */
-# ifdef UNIV_LOG_ARCHIVE
-extern	mysql_pfs_key_t	archive_lock_key;
-# endif /* UNIV_LOG_ARCHIVE */
-extern	mysql_pfs_key_t btr_search_latch_key;
-extern	mysql_pfs_key_t	buf_block_lock_key;
-# ifdef UNIV_SYNC_DEBUG
-extern	mysql_pfs_key_t	buf_block_debug_latch_key;
-# endif /* UNIV_SYNC_DEBUG */
-extern	mysql_pfs_key_t	dict_operation_lock_key;
-extern	mysql_pfs_key_t	checkpoint_lock_key;
-extern	mysql_pfs_key_t	fil_space_latch_key;
-extern	mysql_pfs_key_t	fts_cache_rw_lock_key;
-extern	mysql_pfs_key_t	fts_cache_init_rw_lock_key;
-extern	mysql_pfs_key_t	trx_i_s_cache_lock_key;
-extern	mysql_pfs_key_t	trx_purge_latch_key;
-extern	mysql_pfs_key_t	index_tree_rw_lock_key;
-extern	mysql_pfs_key_t	index_online_log_key;
-extern	mysql_pfs_key_t	dict_table_stats_key;
-extern  mysql_pfs_key_t trx_sys_rw_lock_key;
-extern  mysql_pfs_key_t hash_table_rw_lock_key;
-#endif /* UNIV_PFS_RWLOCK */
-
-
 #ifndef UNIV_PFS_RWLOCK
 /******************************************************************//**
 Creates, or rather, initializes an rw-lock object in a specified memory
@@ -153,13 +130,8 @@ is necessary only if the memory block containing it is freed.
 if MySQL performance schema is enabled and "UNIV_PFS_RWLOCK" is
 defined, the rwlock are instrumented with performance schema probes. */
 # ifdef UNIV_DEBUG
-#  ifdef UNIV_SYNC_DEBUG
-#   define rw_lock_create(K, L, level)				\
+#  define rw_lock_create(K, L, level)				\
 	rw_lock_create_func((L), (level), #L, __FILE__, __LINE__)
-#  else	/* UNIV_SYNC_DEBUG */
-#   define rw_lock_create(K, L, level)				\
-	rw_lock_create_func((L), #L, __FILE__, __LINE__)
-#  endif/* UNIV_SYNC_DEBUG */
 # else /* UNIV_DEBUG */
 #  define rw_lock_create(K, L, level)				\
 	rw_lock_create_func((L), #L, __FILE__, __LINE__)
@@ -178,18 +150,46 @@ unlocking, not the corresponding function. */
 # define rw_lock_s_lock_gen(M, P)				\
 	rw_lock_s_lock_func((M), (P), __FILE__, __LINE__)
 
-# define rw_lock_s_lock_gen_nowait(M, P)			\
-	rw_lock_s_lock_low((M), (P), __FILE__, __LINE__)
-
 # define rw_lock_s_lock_nowait(M, F, L)				\
 	rw_lock_s_lock_low((M), 0, (F), (L))
 
-# ifdef UNIV_SYNC_DEBUG
+# ifdef UNIV_DEBUG
 #  define rw_lock_s_unlock_gen(L, P)	rw_lock_s_unlock_func(P, L)
 # else
 #  define rw_lock_s_unlock_gen(L, P)	rw_lock_s_unlock_func(L)
-# endif
+# endif /* UNIV_DEBUG */
 
+#define rw_lock_sx_lock(L)					\
+	rw_lock_sx_lock_func((L), 0, __FILE__, __LINE__)
+
+#define rw_lock_sx_lock_inline(M, P, F, L)			\
+	rw_lock_sx_lock_func((M), (P), (F), (L))
+
+#define rw_lock_sx_lock_gen(M, P)				\
+	rw_lock_sx_lock_func((M), (P), __FILE__, __LINE__)
+
+#define rw_lock_sx_lock_nowait(M, P)				\
+	rw_lock_sx_lock_low((M), (P), __FILE__, __LINE__)
+
+#define rw_lock_sx_lock(L)					\
+	rw_lock_sx_lock_func((L), 0, __FILE__, __LINE__)
+
+#define rw_lock_sx_lock_inline(M, P, F, L)			\
+	rw_lock_sx_lock_func((M), (P), (F), (L))
+
+#define rw_lock_sx_lock_gen(M, P)				\
+	rw_lock_sx_lock_func((M), (P), __FILE__, __LINE__)
+
+#define rw_lock_sx_lock_nowait(M, P)				\
+	rw_lock_sx_lock_low((M), (P), __FILE__, __LINE__)
+
+# ifdef UNIV_DEBUG
+#  define rw_lock_sx_unlock(L)		rw_lock_sx_unlock_func(0, L)
+#  define rw_lock_sx_unlock_gen(L, P)	rw_lock_sx_unlock_func(P, L)
+# else /* UNIV_DEBUG */
+#  define rw_lock_sx_unlock(L)		rw_lock_sx_unlock_func(L)
+#  define rw_lock_sx_unlock_gen(L, P)	rw_lock_sx_unlock_func(L)
+# endif /* UNIV_DEBUG */
 
 # define rw_lock_x_lock(M)					\
 	rw_lock_x_lock_func((M), 0, __FILE__, __LINE__)
@@ -206,7 +206,7 @@ unlocking, not the corresponding function. */
 # define rw_lock_x_lock_func_nowait_inline(M, F, L)		\
 	rw_lock_x_lock_func_nowait((M), (F), (L))
 
-# ifdef UNIV_SYNC_DEBUG
+# ifdef UNIV_DEBUG
 #  define rw_lock_x_unlock_gen(L, P)	rw_lock_x_unlock_func(P, L)
 # else
 #  define rw_lock_x_unlock_gen(L, P)	rw_lock_x_unlock_func(L)
@@ -218,13 +218,8 @@ unlocking, not the corresponding function. */
 
 /* Following macros point to Performance Schema instrumented functions. */
 # ifdef UNIV_DEBUG
-#  ifdef UNIV_SYNC_DEBUG
 #   define rw_lock_create(K, L, level)				\
 	pfs_rw_lock_create_func((K), (L), (level), #L, __FILE__, __LINE__)
-#  else	/* UNIV_SYNC_DEBUG */
-#   define rw_lock_create(K, L, level)				\
-	pfs_rw_lock_create_func((K), (L), #L, __FILE__, __LINE__)
-#  endif/* UNIV_SYNC_DEBUG */
 # else	/* UNIV_DEBUG */
 #  define rw_lock_create(K, L, level)				\
 	pfs_rw_lock_create_func((K), (L), #L, __FILE__, __LINE__)
@@ -243,18 +238,35 @@ unlocking, not the corresponding function. */
 # define rw_lock_s_lock_gen(M, P)				\
 	pfs_rw_lock_s_lock_func((M), (P), __FILE__, __LINE__)
 
-# define rw_lock_s_lock_gen_nowait(M, P)			\
-	pfs_rw_lock_s_lock_low((M), (P), __FILE__, __LINE__)
-
 # define rw_lock_s_lock_nowait(M, F, L)				\
 	pfs_rw_lock_s_lock_low((M), 0, (F), (L))
 
-# ifdef UNIV_SYNC_DEBUG
+# ifdef UNIV_DEBUG
 #  define rw_lock_s_unlock_gen(L, P)	pfs_rw_lock_s_unlock_func(P, L)
 # else
 #  define rw_lock_s_unlock_gen(L, P)	pfs_rw_lock_s_unlock_func(L)
 # endif
 
+# define rw_lock_sx_lock(M)					\
+	pfs_rw_lock_sx_lock_func((M), 0, __FILE__, __LINE__)
+
+# define rw_lock_sx_lock_inline(M, P, F, L)			\
+	pfs_rw_lock_sx_lock_func((M), (P), (F), (L))
+
+# define rw_lock_sx_lock_gen(M, P)				\
+	pfs_rw_lock_sx_lock_func((M), (P), __FILE__, __LINE__)
+
+#define rw_lock_sx_lock_nowait(M, P)				\
+	pfs_rw_lock_sx_lock_low((M), (P), __FILE__, __LINE__)
+
+# ifdef UNIV_DEBUG
+#  define rw_lock_sx_unlock(L)		pfs_rw_lock_sx_unlock_func(0, L)
+#  define rw_lock_sx_unlock_gen(L, P)	pfs_rw_lock_sx_unlock_func(P, L)
+# else
+#  define rw_lock_sx_unlock(L)		pfs_rw_lock_sx_unlock_func(L)
+#  define rw_lock_sx_unlock_gen(L, P)	pfs_rw_lock_sx_unlock_func(L)
+# endif
+
 # define rw_lock_x_lock(M)					\
 	pfs_rw_lock_x_lock_func((M), 0, __FILE__, __LINE__)
 
@@ -270,7 +282,7 @@ unlocking, not the corresponding function. */
 # define rw_lock_x_lock_func_nowait_inline(M, F, L)		\
 	pfs_rw_lock_x_lock_func_nowait((M), (F), (L))
 
-# ifdef UNIV_SYNC_DEBUG
+# ifdef UNIV_DEBUG
 #  define rw_lock_x_unlock_gen(L, P)	pfs_rw_lock_x_unlock_func(P, L)
 # else
 #  define rw_lock_x_unlock_gen(L, P)	pfs_rw_lock_x_unlock_func(L)
@@ -278,7 +290,7 @@ unlocking, not the corresponding function. */
 
 # define rw_lock_free(M)		pfs_rw_lock_free_func(M)
 
-#endif /* UNIV_PFS_RWLOCK */
+#endif /* !UNIV_PFS_RWLOCK */
 
 #define rw_lock_s_unlock(L)		rw_lock_s_unlock_gen(L, 0)
 #define rw_lock_x_unlock(L)		rw_lock_x_unlock_gen(L, 0)
@@ -288,15 +300,12 @@ Creates, or rather, initializes an rw-lock object in a specified memory
 location (which must be appropriately aligned). The rw-lock is initialized
 to the non-locked state. Explicit freeing of the rw-lock with rw_lock_free
 is necessary only if the memory block containing it is freed. */
-UNIV_INTERN
 void
 rw_lock_create_func(
 /*================*/
 	rw_lock_t*	lock,		/*!< in: pointer to memory */
 #ifdef UNIV_DEBUG
-# ifdef UNIV_SYNC_DEBUG
-	ulint		level,		/*!< in: level */
-# endif /* UNIV_SYNC_DEBUG */
+	latch_level_t	level,		/*!< in: level */
 #endif /* UNIV_DEBUG */
 	const char*	cmutex_name,	/*!< in: mutex name */
 	const char*	cfile_name,	/*!< in: file name where created */
@@ -305,26 +314,24 @@ rw_lock_create_func(
 Calling this function is obligatory only if the memory buffer containing
 the rw-lock is freed. Removes an rw-lock object from the global list. The
 rw-lock is checked to be in the non-locked state. */
-UNIV_INTERN
 void
 rw_lock_free_func(
 /*==============*/
-	rw_lock_t*	lock);	/*!< in: rw-lock */
+	rw_lock_t*	lock);		/*!< in/out: rw-lock */
 #ifdef UNIV_DEBUG
 /******************************************************************//**
 Checks that the rw-lock has been initialized and that there are no
 simultaneous shared and exclusive locks.
-@return	TRUE */
-UNIV_INTERN
-ibool
+@return true */
+bool
 rw_lock_validate(
 /*=============*/
-	rw_lock_t*	lock);	/*!< in: rw-lock */
+	const rw_lock_t*	lock);	/*!< in: rw-lock */
 #endif /* UNIV_DEBUG */
 /******************************************************************//**
 Low-level function which tries to lock an rw-lock in s-mode. Performs no
 spinning.
-@return	TRUE if success */
+@return TRUE if success */
 UNIV_INLINE
 ibool
 rw_lock_s_lock_low(
@@ -340,7 +347,7 @@ NOTE! Use the corresponding macro, not directly this function, except if
 you supply the file name and line number. Lock an rw-lock in shared mode
 for the current thread. If the rw-lock is locked in exclusive mode, or
 there is an exclusive lock request waiting, the function spins a preset
-time (controlled by SYNC_SPIN_ROUNDS), waiting for the lock, before
+time (controlled by srv_n_spin_wait_rounds), waiting for the lock, before
 suspending the thread. */
 UNIV_INLINE
 void
@@ -355,7 +362,7 @@ rw_lock_s_lock_func(
 NOTE! Use the corresponding macro, not directly this function! Lock an
 rw-lock in exclusive mode for the current thread if the lock can be
 obtained immediately.
-@return	TRUE if success */
+@return TRUE if success */
 UNIV_INLINE
 ibool
 rw_lock_x_lock_func_nowait(
@@ -369,22 +376,21 @@ UNIV_INLINE
 void
 rw_lock_s_unlock_func(
 /*==================*/
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
 	ulint		pass,	/*!< in: pass value; != 0, if the lock may have
 				been passed to another thread to unlock */
-#endif
+#endif /* UNIV_DEBUG */
 	rw_lock_t*	lock);	/*!< in/out: rw-lock */
 
 /******************************************************************//**
 NOTE! Use the corresponding macro, not directly this function! Lock an
 rw-lock in exclusive mode for the current thread. If the rw-lock is locked
 in shared or exclusive mode, or there is an exclusive lock request waiting,
-the function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting
+the function spins a preset time (controlled by srv_n_spin_wait_rounds), waiting
 for the lock, before suspending the thread. If the same thread has an x-lock
 on the rw-lock, locking succeed, with the following exception: if pass != 0,
 only a single x-lock may be taken on the lock. NOTE: If the same thread has
 an s-lock, locking does not succeed! */
-UNIV_INTERN
 void
 rw_lock_x_lock_func(
 /*================*/
@@ -394,16 +400,57 @@ rw_lock_x_lock_func(
 	const char*	file_name,/*!< in: file name where lock requested */
 	ulint		line);	/*!< in: line where requested */
 /******************************************************************//**
+Low-level function for acquiring an sx lock.
+@return FALSE if did not succeed, TRUE if success. */
+ibool
+rw_lock_sx_lock_low(
+/*================*/
+	rw_lock_t*	lock,	/*!< in: pointer to rw-lock */
+	ulint		pass,	/*!< in: pass value; != 0, if the lock will
+				be passed to another thread to unlock */
+	const char*	file_name,/*!< in: file name where lock requested */
+	ulint		line);	/*!< in: line where requested */
+/******************************************************************//**
+NOTE! Use the corresponding macro, not directly this function! Lock an
+rw-lock in SX mode for the current thread. If the rw-lock is locked
+in exclusive mode, or there is an exclusive lock request waiting,
+the function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting
+for the lock, before suspending the thread. If the same thread has an x-lock
+on the rw-lock, locking succeed, with the following exception: if pass != 0,
+only a single sx-lock may be taken on the lock. NOTE: If the same thread has
+an s-lock, locking does not succeed! */
+void
+rw_lock_sx_lock_func(
+/*=================*/
+	rw_lock_t*	lock,	/*!< in: pointer to rw-lock */
+	ulint		pass,	/*!< in: pass value; != 0, if the lock will
+				be passed to another thread to unlock */
+	const char*	file_name,/*!< in: file name where lock requested */
+	ulint		line);	/*!< in: line where requested */
+/******************************************************************//**
 Releases an exclusive mode lock. */
 UNIV_INLINE
 void
 rw_lock_x_unlock_func(
 /*==================*/
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
 	ulint		pass,	/*!< in: pass value; != 0, if the lock may have
 				been passed to another thread to unlock */
-#endif
+#endif /* UNIV_DEBUG */
 	rw_lock_t*	lock);	/*!< in/out: rw-lock */
+
+/******************************************************************//**
+Releases an sx mode lock. */
+UNIV_INLINE
+void
+rw_lock_sx_unlock_func(
+/*===================*/
+#ifdef UNIV_DEBUG
+	ulint		pass,	/*!< in: pass value; != 0, if the lock may have
+				been passed to another thread to unlock */
+#endif /* UNIV_DEBUG */
+	rw_lock_t*	lock);	/*!< in/out: rw-lock */
+
 /******************************************************************//**
 This function is used in the insert buffer to move the ownership of an
 x-latch on a buffer frame to the current thread. The x-latch was set by
@@ -412,7 +459,6 @@ read was done. The ownership is moved because we want that the current
 thread is able to acquire a second x-latch which is stored in an mtr.
 This, in turn, is needed to pass the debug checks of index page
 operations. */
-UNIV_INTERN
 void
 rw_lock_x_lock_move_ownership(
 /*==========================*/
@@ -421,15 +467,24 @@ rw_lock_x_lock_move_ownership(
 /******************************************************************//**
 Returns the value of writer_count for the lock. Does not reserve the lock
 mutex, so the caller must be sure it is not changed during the call.
-@return	value of writer_count */
+@return value of writer_count */
 UNIV_INLINE
 ulint
 rw_lock_get_x_lock_count(
 /*=====================*/
 	const rw_lock_t*	lock);	/*!< in: rw-lock */
+/******************************************************************//**
+Returns the number of sx-lock for the lock. Does not reserve the lock
+mutex, so the caller must be sure it is not changed during the call.
+@return value of writer_count */
+UNIV_INLINE
+ulint
+rw_lock_get_sx_lock_count(
+/*======================*/
+	const rw_lock_t*	lock);	/*!< in: rw-lock */
 /********************************************************************//**
 Check if there are threads waiting for the rw-lock.
-@return	1 if waiters, 0 otherwise */
+@return 1 if waiters, 0 otherwise */
 UNIV_INLINE
 ulint
 rw_lock_get_waiters(
@@ -438,15 +493,15 @@ rw_lock_get_waiters(
 /******************************************************************//**
 Returns the write-status of the lock - this function made more sense
 with the old rw_lock implementation.
-@return	RW_LOCK_NOT_LOCKED, RW_LOCK_EX, RW_LOCK_WAIT_EX */
+@return RW_LOCK_NOT_LOCKED, RW_LOCK_X, RW_LOCK_X_WAIT, RW_LOCK_SX */
 UNIV_INLINE
 ulint
 rw_lock_get_writer(
 /*===============*/
 	const rw_lock_t*	lock);	/*!< in: rw-lock */
 /******************************************************************//**
-Returns the number of readers.
-@return	number of readers */
+Returns the number of readers (s-locks).
+@return number of readers */
 UNIV_INLINE
 ulint
 rw_lock_get_reader_count(
@@ -455,16 +510,17 @@ rw_lock_get_reader_count(
 /******************************************************************//**
 Decrements lock_word the specified amount if it is greater than 0.
 This is used by both s_lock and x_lock operations.
-@return	TRUE if decr occurs */
+@return true if decr occurs */
 UNIV_INLINE
-ibool
+bool
 rw_lock_lock_word_decr(
 /*===================*/
 	rw_lock_t*	lock,		/*!< in/out: rw-lock */
-	ulint		amount);	/*!< in: amount to decrement */
+	ulint		amount,		/*!< in: amount to decrement */
+	lint		threshold);	/*!< in: threshold of judgement */
 /******************************************************************//**
 Increments lock_word the specified amount and returns new value.
-@return	lock->lock_word after increment */
+@return lock->lock_word after increment */
 UNIV_INLINE
 lint
 rw_lock_lock_word_incr(
@@ -485,81 +541,71 @@ void
 rw_lock_set_writer_id_and_recursion_flag(
 /*=====================================*/
 	rw_lock_t*	lock,		/*!< in/out: lock to work on */
-	ibool		recursive);	/*!< in: TRUE if recursion
+	bool		recursive);	/*!< in: true if recursion
 					allowed */
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
 /******************************************************************//**
 Checks if the thread has locked the rw-lock in the specified mode, with
 the pass value == 0. */
-UNIV_INTERN
 ibool
 rw_lock_own(
 /*========*/
 	rw_lock_t*	lock,		/*!< in: rw-lock */
-	ulint		lock_type)	/*!< in: lock type: RW_LOCK_SHARED,
-					RW_LOCK_EX */
+	ulint		lock_type)	/*!< in: lock type: RW_LOCK_S,
+					RW_LOCK_X */
 	MY_ATTRIBUTE((warn_unused_result));
-#endif /* UNIV_SYNC_DEBUG */
+
 /******************************************************************//**
-Checks if somebody has locked the rw-lock in the specified mode. */
-UNIV_INTERN
-ibool
+Checks if the thread has locked the rw-lock in the specified mode, with
+the pass value == 0. */
+bool
+rw_lock_own_flagged(
+/*================*/
+	const rw_lock_t*	lock,	/*!< in: rw-lock */
+	rw_lock_flags_t		flags)	/*!< in: specify lock types with
+					OR of the rw_lock_flag_t values */
+	MY_ATTRIBUTE((warn_unused_result));
+#endif /* UNIV_DEBUG */
+/******************************************************************//**
+Checks if somebody has locked the rw-lock in the specified mode.
+@return true if locked */
+bool
 rw_lock_is_locked(
 /*==============*/
 	rw_lock_t*	lock,		/*!< in: rw-lock */
-	ulint		lock_type);	/*!< in: lock type: RW_LOCK_SHARED,
-					RW_LOCK_EX */
-#ifdef UNIV_SYNC_DEBUG
+	ulint		lock_type);	/*!< in: lock type: RW_LOCK_S,
+					RW_LOCK_X or RW_LOCK_SX */
+#ifdef UNIV_DEBUG
 /***************************************************************//**
 Prints debug info of an rw-lock. */
-UNIV_INTERN
 void
 rw_lock_print(
 /*==========*/
-	rw_lock_t*	lock);	/*!< in: rw-lock */
+	rw_lock_t*	lock);		/*!< in: rw-lock */
 /***************************************************************//**
 Prints debug info of currently locked rw-locks. */
-UNIV_INTERN
 void
 rw_lock_list_print_info(
 /*====================*/
-	FILE*	file);		/*!< in: file where to print */
+	FILE*		file);		/*!< in: file where to print */
 /***************************************************************//**
 Returns the number of currently locked rw-locks.
 Works only in the debug version.
-@return	number of locked rw-locks */
-UNIV_INTERN
+@return number of locked rw-locks */
 ulint
 rw_lock_n_locked(void);
 /*==================*/
 
 /*#####################################################################*/
 
-/******************************************************************//**
-Acquires the debug mutex. We cannot use the mutex defined in sync0sync,
-because the debug mutex is also acquired in sync0arr while holding the OS
-mutex protecting the sync array, and the ordinary mutex_enter might
-recursively call routines in sync0arr, leading to a deadlock on the OS
-mutex. */
-UNIV_INTERN
-void
-rw_lock_debug_mutex_enter(void);
-/*===========================*/
-/******************************************************************//**
-Releases the debug mutex. */
-UNIV_INTERN
-void
-rw_lock_debug_mutex_exit(void);
-/*==========================*/
 /*********************************************************************//**
 Prints info of a debug struct. */
-UNIV_INTERN
 void
 rw_lock_debug_print(
 /*================*/
 	FILE*			f,	/*!< in: output stream */
-	rw_lock_debug_t*	info);	/*!< in: debug struct */
-#endif /* UNIV_SYNC_DEBUG */
+	const rw_lock_debug_t*	info);	/*!< in: debug struct */
+#endif /* UNIV_DEBUG */
 
 /* NOTE! The structure appears here only for the compiler to know its size.
 Do not use its fields directly! */
@@ -571,73 +617,124 @@ shared locks are allowed. To prevent starving of a writer blocked by
 readers, a writer may queue for x-lock by decrementing lock_word: no
 new readers will be let in while the thread waits for readers to
 exit. */
-struct rw_lock_t {
+
+struct rw_lock_t
+#ifdef UNIV_DEBUG
+	: public latch_t
+#endif /* UNIV_DEBUG */
+{
+	/** Holds the state of the lock. */
 	volatile lint	lock_word;
-				/*!< Holds the state of the lock. */
-	volatile ulint	waiters;/*!< 1: there are waiters */
-	volatile ibool	recursive;/*!< Default value FALSE which means the lock
-				is non-recursive. The value is typically set
-				to TRUE making normal rw_locks recursive. In
-				case of asynchronous IO, when a non-zero
-				value of 'pass' is passed then we keep the
-				lock non-recursive.
-				This flag also tells us about the state of
-				writer_thread field. If this flag is set
-				then writer_thread MUST contain the thread
-				id of the current x-holder or wait-x thread.
-				This flag must be reset in x_unlock
-				functions before incrementing the lock_word */
+
+	/** 1: there are waiters */
+	volatile ulint	waiters;
+
+	/** Default value FALSE which means the lock is non-recursive.
+	The value is typically set to TRUE making normal rw_locks recursive.
+	In case of asynchronous IO, when a non-zero value of 'pass' is
+	passed then we keep the lock non-recursive.
+
+	This flag also tells us about the state of writer_thread field.
+	If this flag is set then writer_thread MUST contain the thread
+	id of the current x-holder or wait-x thread.  This flag must be
+	reset in x_unlock functions before incrementing the lock_word */
+	volatile bool	recursive;
+
+	/** number of granted SX locks. */
+	volatile ulint	sx_recursive;
+
+	/** This is TRUE if the writer field is RW_LOCK_X_WAIT; this field
+	is located far from the memory update hotspot fields which are at
+	the start of this struct, thus we can peek this field without
+	causing much memory bus traffic */
+	bool		writer_is_wait_ex;
+
+	/** Thread id of writer thread. Is only guaranteed to have sane
+	and non-stale value iff recursive flag is set. */
 	volatile os_thread_id_t	writer_thread;
-				/*!< Thread id of writer thread. Is only
-				guaranteed to have sane and non-stale
-				value iff recursive flag is set. */
-	os_event_t	event;	/*!< Used by sync0arr.cc for thread queueing */
+
+	/** Used by sync0arr.cc for thread queueing */
+	os_event_t	event;
+
+	/** Event for next-writer to wait on. A thread must decrement
+	lock_word before waiting. */
 	os_event_t	wait_ex_event;
-				/*!< Event for next-writer to wait on. A thread
-				must decrement lock_word before waiting. */
+
+	/** File name where lock created */
+	const char*	cfile_name;
+
+	/** last s-lock file/line is not guaranteed to be correct */
+	const char*	last_s_file_name;
+
+	/** File name where last x-locked */
+	const char*	last_x_file_name;
+
+	/** Line where created */
+	unsigned	cline:13;
+
+	/** If 1 then the rw-lock is a block lock */
+	unsigned	is_block_lock:1;
+
+	/** Line number where last time s-locked */
+	unsigned	last_s_line:14;
+
+	/** Line number where last time x-locked */
+	unsigned	last_x_line:14;
+
+	const char*	lock_name;
+	const char*	file_name;/*!< File name where the lock was obtained */
+	ulint	line;		/*!< Line where the rw-lock was locked */
+	os_thread_id_t	thread_id;
+
+	/** Count of os_waits. May not be accurate */
+	uint32_t	count_os_wait;
+
+	/** All allocated rw locks are put into a list */
+	UT_LIST_NODE_T(rw_lock_t) list;
+
+#ifdef UNIV_PFS_RWLOCK
+	/** The instrumentation hook */
+	struct PSI_rwlock*	pfs_psi;
+#endif /* UNIV_PFS_RWLOCK */
+
 #ifndef INNODB_RW_LOCKS_USE_ATOMICS
-	ib_mutex_t	mutex;		/*!< The mutex protecting rw_lock_t */
+	/** The mutex protecting rw_lock_t */
+	mutable ib_mutex_t mutex;
 #endif /* INNODB_RW_LOCKS_USE_ATOMICS */
 
-	UT_LIST_NODE_T(rw_lock_t) list;
-				/*!< All allocated rw locks are put into a
-				list */
-#ifdef UNIV_SYNC_DEBUG
-	UT_LIST_BASE_NODE_T(rw_lock_debug_t) debug_list;
-				/*!< In the debug version: pointer to the debug
-				info list of the lock */
-	ulint	level;		/*!< Level in the global latching order. */
-#endif /* UNIV_SYNC_DEBUG */
-#ifdef UNIV_PFS_RWLOCK
-	struct PSI_rwlock *pfs_psi;/*!< The instrumentation hook */
-#endif
-	ulint count_os_wait;	/*!< Count of os_waits. May not be accurate */
-	const char*	cfile_name;/*!< File name where lock created */
-	const char*	lock_name; /*!< lock name */
- 	os_thread_id_t	thread_id;/*!< thread id */
-	const char*	file_name;/*!< File name where the lock was obtained */
-	ulint		line;	  /*!< Line where the rw-lock was locked */
-        /* last s-lock file/line is not guaranteed to be correct */
-	const char*	last_s_file_name;/*!< File name where last s-locked */
-	const char*	last_x_file_name;/*!< File name where last x-locked */
-	ibool		writer_is_wait_ex;
-				/*!< This is TRUE if the writer field is
-				RW_LOCK_WAIT_EX; this field is located far
-				from the memory update hotspot fields which
-				are at the start of this struct, thus we can
-				peek this field without causing much memory
-				bus traffic */
-	unsigned	cline:14;	/*!< Line where created */
-	unsigned	last_s_line:14;	/*!< Line number where last time s-locked */
-	unsigned	last_x_line:14;	/*!< Line number where last time x-locked */
 #ifdef UNIV_DEBUG
-	ulint	magic_n;	/*!< RW_LOCK_MAGIC_N */
 /** Value of rw_lock_t::magic_n */
-#define	RW_LOCK_MAGIC_N	22643
-#endif /* UNIV_DEBUG */
-};
+# define RW_LOCK_MAGIC_N	22643
 
-#ifdef UNIV_SYNC_DEBUG
+	/** Constructor */
+	rw_lock_t()
+	{
+		magic_n = RW_LOCK_MAGIC_N;
+	}
+
+	/** Destructor */
+	virtual ~rw_lock_t()
+	{
+		ut_ad(magic_n == RW_LOCK_MAGIC_N);
+		magic_n = 0;
+	}
+
+	virtual std::string to_string() const;
+	virtual std::string locked_from() const;
+
+	/** For checking memory corruption. */
+	ulint		magic_n;
+
+	/** In the debug version: pointer to the debug info list of the lock */
+	UT_LIST_BASE_NODE_T(rw_lock_debug_t) debug_list;
+
+	/** Level in the global latching order. */
+	latch_level_t	level;
+
+#endif /* UNIV_DEBUG */
+
+};
+#ifdef UNIV_DEBUG
 /** The structure for storing debug info of an rw-lock.  All access to this
 structure must be protected by rw_lock_debug_mutex_enter(). */
 struct	rw_lock_debug_t {
@@ -645,15 +742,15 @@ struct	rw_lock_debug_t {
 	os_thread_id_t thread_id;  /*!< The thread id of the thread which
 				locked the rw-lock */
 	ulint	pass;		/*!< Pass value given in the lock operation */
-	ulint	lock_type;	/*!< Type of the lock: RW_LOCK_EX,
-				RW_LOCK_SHARED, RW_LOCK_WAIT_EX */
+	ulint	lock_type;	/*!< Type of the lock: RW_LOCK_X,
+				RW_LOCK_S, RW_LOCK_X_WAIT */
 	const char*	file_name;/*!< File name where the lock was obtained */
 	ulint	line;		/*!< Line where the rw-lock was locked */
 	UT_LIST_NODE_T(rw_lock_debug_t) list;
 				/*!< Debug structs are linked in a two-way
 				list */
 };
-#endif /* UNIV_SYNC_DEBUG */
+#endif /* UNIV_DEBUG */
 
 /* For performance schema instrumentation, a new set of rwlock
 wrap functions are created if "UNIV_PFS_RWLOCK" is defined.
@@ -676,6 +773,8 @@ rw_lock_s_lock()
 rw_lock_s_lock_gen()
 rw_lock_s_lock_nowait()
 rw_lock_s_unlock_gen()
+rw_lock_sx_lock()
+rw_lock_sx_unlock_gen()
 rw_lock_free()
 */
 
@@ -692,9 +791,7 @@ pfs_rw_lock_create_func(
 					performance schema */
 	rw_lock_t*	lock,		/*!< in: rw lock */
 #ifdef UNIV_DEBUG
-# ifdef UNIV_SYNC_DEBUG
-	ulint		level,		/*!< in: level */
-# endif /* UNIV_SYNC_DEBUG */
+	latch_level_t	level,		/*!< in: level */
 #endif /* UNIV_DEBUG */
 	const char*	cmutex_name,	/*!< in: mutex name */
 	const char*	cfile_name,	/*!< in: file name where created */
@@ -774,25 +871,65 @@ UNIV_INLINE
 void
 pfs_rw_lock_s_unlock_func(
 /*======================*/
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
 	ulint		pass,	/*!< in: pass value; != 0, if the
 				lock may have been passed to another
 				thread to unlock */
-#endif
+#endif /* UNIV_DEBUG */
 	rw_lock_t*	lock);	/*!< in/out: rw-lock */
 /******************************************************************//**
-Performance schema instrumented wrap function for rw_lock_s_unlock_func()
+Performance schema instrumented wrap function for rw_lock_x_unlock_func()
 NOTE! Please use the corresponding macro rw_lock_x_unlock(), not directly
 this function! */
 UNIV_INLINE
 void
 pfs_rw_lock_x_unlock_func(
 /*======================*/
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
 	ulint		pass,	/*!< in: pass value; != 0, if the
 				lock may have been passed to another
 				thread to unlock */
-#endif
+#endif /* UNIV_DEBUG */
+	rw_lock_t*	lock);	/*!< in/out: rw-lock */
+/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_sx_lock_func()
+NOTE! Please use the corresponding macro rw_lock_sx_lock(), not directly
+this function! */
+UNIV_INLINE
+void
+pfs_rw_lock_sx_lock_func(
+/*====================*/
+	rw_lock_t*	lock,	/*!< in: pointer to rw-lock */
+	ulint		pass,	/*!< in: pass value; != 0, if the lock will
+				be passed to another thread to unlock */
+	const char*	file_name,/*!< in: file name where lock requested */
+	ulint		line);	/*!< in: line where requested */
+/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_sx_lock_nowait()
+NOTE! Please use the corresponding macro, not directly
+this function! */
+UNIV_INLINE
+ibool
+pfs_rw_lock_sx_lock_low(
+/*================*/
+	rw_lock_t*	lock,	/*!< in: pointer to rw-lock */
+	ulint		pass,	/*!< in: pass value; != 0, if the lock will
+				be passed to another thread to unlock */
+	const char*	file_name,/*!< in: file name where lock requested */
+	ulint		line);	/*!< in: line where requested */
+/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_sx_unlock_func()
+NOTE! Please use the corresponding macro rw_lock_sx_unlock(), not directly
+this function! */
+UNIV_INLINE
+void
+pfs_rw_lock_sx_unlock_func(
+/*======================*/
+#ifdef UNIV_DEBUG
+	ulint		pass,	/*!< in: pass value; != 0, if the
+				lock may have been passed to another
+				thread to unlock */
+#endif /* UNIV_DEBUG */
 	rw_lock_t*	lock);	/*!< in/out: rw-lock */
 /******************************************************************//**
 Performance schema instrumented wrap function for rw_lock_free_func()
@@ -808,7 +945,8 @@ pfs_rw_lock_free_func(
 
 #ifndef UNIV_NONINL
 #include "sync0rw.ic"
-#endif
+#endif /* !UNIV_NONINL */
+
 #endif /* !UNIV_HOTBACKUP */
 
-#endif
+#endif /* sync0rw.h */
diff --git a/storage/innobase/include/sync0rw.ic b/storage/innobase/include/sync0rw.ic
index 27970188165..f76473ed202 100644
--- a/storage/innobase/include/sync0rw.ic
+++ b/storage/innobase/include/sync0rw.ic
@@ -30,12 +30,13 @@ The read-write lock (for threads)
 Created 9/11/1995 Heikki Tuuri
 *******************************************************/
 
+#include "os0event.h"
+
 /******************************************************************//**
 Lock an rw-lock in shared mode for the current thread. If the rw-lock is
 locked in exclusive mode, or there is an exclusive lock request waiting,
-the function spins a preset time (controlled by SYNC_SPIN_ROUNDS),
+the function spins a preset time (controlled by srv_n_spin_wait_rounds),
 waiting for the lock before suspending the thread. */
-UNIV_INTERN
 void
 rw_lock_s_lock_spin(
 /*================*/
@@ -44,10 +45,9 @@ rw_lock_s_lock_spin(
 				be passed to another thread to unlock */
 	const char*	file_name,/*!< in: file name where lock requested */
 	ulint		line);	/*!< in: line where requested */
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
 /******************************************************************//**
 Inserts the debug information for an rw-lock. */
-UNIV_INTERN
 void
 rw_lock_add_debug_info(
 /*===================*/
@@ -58,18 +58,17 @@ rw_lock_add_debug_info(
 	ulint		line);		/*!< in: line where requested */
 /******************************************************************//**
 Removes a debug information struct for an rw-lock. */
-UNIV_INTERN
 void
 rw_lock_remove_debug_info(
 /*======================*/
 	rw_lock_t*	lock,		/*!< in: rw-lock */
 	ulint		pass,		/*!< in: pass value */
 	ulint		lock_type);	/*!< in: lock type */
-#endif /* UNIV_SYNC_DEBUG */
+#endif /* UNIV_DEBUG */
 
 /********************************************************************//**
 Check if there are threads waiting for the rw-lock.
-@return	1 if waiters, 0 otherwise */
+@return 1 if waiters, 0 otherwise */
 UNIV_INLINE
 ulint
 rw_lock_get_waiters(
@@ -118,7 +117,7 @@ rw_lock_reset_waiter_flag(
 /******************************************************************//**
 Returns the write-status of the lock - this function made more sense
 with the old rw_lock implementation.
-@return	RW_LOCK_NOT_LOCKED, RW_LOCK_EX, RW_LOCK_WAIT_EX */
+@return RW_LOCK_NOT_LOCKED, RW_LOCK_X, RW_LOCK_X_WAIT, RW_LOCK_SX */
 UNIV_INLINE
 ulint
 rw_lock_get_writer(
@@ -126,21 +125,31 @@ rw_lock_get_writer(
 	const rw_lock_t*	lock)	/*!< in: rw-lock */
 {
 	lint lock_word = lock->lock_word;
-	if (lock_word > 0) {
+
+	ut_ad(lock_word <= X_LOCK_DECR);
+	if (lock_word > X_LOCK_HALF_DECR) {
 		/* return NOT_LOCKED in s-lock state, like the writer
 		member of the old lock implementation. */
 		return(RW_LOCK_NOT_LOCKED);
-	} else if ((lock_word == 0) || (lock_word <= -X_LOCK_DECR)) {
-		return(RW_LOCK_EX);
+	} else if (lock_word > 0) {
+		/* sx-locked, no x-locks */
+		return(RW_LOCK_SX);
+	} else if (lock_word == 0
+		   || lock_word == -X_LOCK_HALF_DECR
+		   || lock_word <= -X_LOCK_DECR) {
+		/* x-lock with sx-lock is also treated as RW_LOCK_EX */
+		return(RW_LOCK_X);
 	} else {
-		ut_ad(lock_word > -X_LOCK_DECR);
-		return(RW_LOCK_WAIT_EX);
+		/* x-waiter with sx-lock is also treated as RW_LOCK_WAIT_EX
+		e.g. -X_LOCK_HALF_DECR < lock_word < 0 : without sx
+		     -X_LOCK_DECR < lock_word < -X_LOCK_HALF_DECR : with sx */
+		return(RW_LOCK_X_WAIT);
 	}
 }
 
 /******************************************************************//**
-Returns the number of readers.
-@return	number of readers */
+Returns the number of readers (s-locks).
+@return number of readers */
 UNIV_INLINE
 ulint
 rw_lock_get_reader_count(
@@ -148,13 +157,28 @@ rw_lock_get_reader_count(
 	const rw_lock_t*	lock)	/*!< in: rw-lock */
 {
 	lint lock_word = lock->lock_word;
-	if (lock_word > 0) {
-		/* s-locked, no x-waiters */
+	ut_ad(lock_word <= X_LOCK_DECR);
+
+	if (lock_word > X_LOCK_HALF_DECR) {
+		/* s-locked, no x-waiter */
 		return(X_LOCK_DECR - lock_word);
-	} else if (lock_word < 0 && lock_word > -X_LOCK_DECR) {
-		/* s-locked, with x-waiters */
+	} else if (lock_word > 0) {
+		/* s-locked, with sx-locks only */
+		return(X_LOCK_HALF_DECR - lock_word);
+	} else if (lock_word == 0) {
+		/* x-locked */
+		return(0);
+	} else if (lock_word > -X_LOCK_HALF_DECR) {
+		/* s-locked, with x-waiter */
 		return((ulint)(-lock_word));
+	} else if (lock_word == -X_LOCK_HALF_DECR) {
+		/* x-locked with sx-locks */
+		return(0);
+	} else if (lock_word > -X_LOCK_DECR) {
+		/* s-locked, with x-waiter and sx-lock */
+		return((ulint)(-(lock_word + X_LOCK_HALF_DECR)));
 	}
+	/* no s-locks */
 	return(0);
 }
 
@@ -167,12 +191,12 @@ rw_lock_get_mutex(
 {
 	return(&(lock->mutex));
 }
-#endif
+#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
 
 /******************************************************************//**
 Returns the value of writer_count for the lock. Does not reserve the lock
 mutex, so the caller must be sure it is not changed during the call.
-@return	value of writer_count */
+@return value of writer_count */
 UNIV_INLINE
 ulint
 rw_lock_get_x_lock_count(
@@ -180,10 +204,54 @@ rw_lock_get_x_lock_count(
 	const rw_lock_t*	lock)	/*!< in: rw-lock */
 {
 	lint lock_copy = lock->lock_word;
-	if ((lock_copy != 0) && (lock_copy > -X_LOCK_DECR)) {
+	ut_ad(lock_copy <= X_LOCK_DECR);
+
+	if (lock_copy == 0 || lock_copy == -X_LOCK_HALF_DECR) {
+		/* "1 x-lock" or "1 x-lock + sx-locks" */
+		return(1);
+	} else if (lock_copy > -X_LOCK_DECR) {
+		/* s-locks, one or more sx-locks if > 0, or x-waiter if < 0 */
 		return(0);
+	} else if (lock_copy > -(X_LOCK_DECR + X_LOCK_HALF_DECR)) {
+		/* no s-lock, no sx-lock, 2 or more x-locks.
+		First 2 x-locks are set with -X_LOCK_DECR,
+		all other recursive x-locks are set with -1 */
+		return(2 - (lock_copy + X_LOCK_DECR));
+	} else {
+		/* no s-lock, 1 or more sx-lock, 2 or more x-locks.
+		First 2 x-locks are set with -(X_LOCK_DECR + X_LOCK_HALF_DECR),
+		all other recursive x-locks are set with -1 */
+		return(2 - (lock_copy + X_LOCK_DECR + X_LOCK_HALF_DECR));
 	}
-	return((lock_copy == 0) ? 1 : (2 - (lock_copy + X_LOCK_DECR)));
+}
+
+/******************************************************************//**
+Returns the number of sx-lock for the lock. Does not reserve the lock
+mutex, so the caller must be sure it is not changed during the call.
+@return value of sx-lock count */
+UNIV_INLINE
+ulint
+rw_lock_get_sx_lock_count(
+/*======================*/
+	const rw_lock_t*	lock)	/*!< in: rw-lock */
+{
+#ifdef UNIV_DEBUG
+	lint lock_copy = lock->lock_word;
+
+	ut_ad(lock_copy <= X_LOCK_DECR);
+
+	while (lock_copy < 0) {
+		lock_copy += X_LOCK_DECR;
+	}
+
+	if (lock_copy > 0 && lock_copy <= X_LOCK_HALF_DECR) {
+		return(lock->sx_recursive);
+	}
+
+	return(0);
+#else /* UNIV_DEBUG */
+	return(lock->sx_recursive);
+#endif /* UNIV_DEBUG */
 }
 
 /******************************************************************//**
@@ -192,34 +260,35 @@ one for systems supporting atomic operations, one for others. This does
 does not support recusive x-locks: they should be handled by the caller and
 need not be atomic since they are performed by the current lock holder.
 Returns true if the decrement was made, false if not.
-@return	TRUE if decr occurs */
+@return true if decr occurs */
 UNIV_INLINE
-ibool
+bool
 rw_lock_lock_word_decr(
 /*===================*/
 	rw_lock_t*	lock,		/*!< in/out: rw-lock */
-	ulint		amount)		/*!< in: amount to decrement */
+	ulint		amount,		/*!< in: amount to decrement */
+	lint		threshold)	/*!< in: threshold of judgement */
 {
 #ifdef INNODB_RW_LOCKS_USE_ATOMICS
 	lint local_lock_word;
 
 	os_rmb;
 	local_lock_word = lock->lock_word;
-	while (local_lock_word > 0) {
+	while (local_lock_word > threshold) {
 		if (os_compare_and_swap_lint(&lock->lock_word,
 					     local_lock_word,
 					     local_lock_word - amount)) {
-			return(TRUE);
+			return(true);
 		}
 		local_lock_word = lock->lock_word;
 	}
-	return(FALSE);
+	return(false);
 #else /* INNODB_RW_LOCKS_USE_ATOMICS */
-	ibool success = FALSE;
+	bool success = false;
 	mutex_enter(&(lock->mutex));
-	if (lock->lock_word > 0) {
+	if (lock->lock_word > threshold) {
 		lock->lock_word -= amount;
-		success = TRUE;
+		success = true;
 	}
 	mutex_exit(&(lock->mutex));
 	return(success);
@@ -228,7 +297,7 @@ rw_lock_lock_word_decr(
 
 /******************************************************************//**
 Increments lock_word the specified amount and returns new value.
-@return	lock->lock_word after increment */
+@return lock->lock_word after increment */
 UNIV_INLINE
 lint
 rw_lock_lock_word_incr(
@@ -266,7 +335,7 @@ void
 rw_lock_set_writer_id_and_recursion_flag(
 /*=====================================*/
 	rw_lock_t*	lock,		/*!< in/out: lock to work on */
-	ibool		recursive)	/*!< in: TRUE if recursion
+	bool		recursive)	/*!< in: true if recursion
 					allowed */
 {
 	os_thread_id_t	curr_thread	= os_thread_get_curr_id();
@@ -300,7 +369,7 @@ rw_lock_set_writer_id_and_recursion_flag(
 /******************************************************************//**
 Low-level function which tries to lock an rw-lock in s-mode. Performs no
 spinning.
-@return	TRUE if success */
+@return TRUE if success */
 UNIV_INLINE
 ibool
 rw_lock_s_lock_low(
@@ -312,24 +381,25 @@ rw_lock_s_lock_low(
 	const char*	file_name, /*!< in: file name where lock requested */
 	ulint		line)	/*!< in: line where requested */
 {
-	if (!rw_lock_lock_word_decr(lock, 1)) {
+	if (!rw_lock_lock_word_decr(lock, 1, 0)) {
 		/* Locking did not succeed */
 		return(FALSE);
 	}
 
-#ifdef UNIV_SYNC_DEBUG
-	rw_lock_add_debug_info(lock, pass, RW_LOCK_SHARED, file_name, line);
-#endif
+	ut_d(rw_lock_add_debug_info(lock, pass, RW_LOCK_S, file_name, line));
+
 	/* These debugging values are not set safely: they may be incorrect
 	or even refer to a line that is invalid for the file name. */
 	lock->last_s_file_name = file_name;
 	lock->last_s_line = line;
 
+	/*
 	if (srv_instrument_semaphores) {
 		lock->thread_id = os_thread_get_curr_id();
 		lock->file_name = file_name;
 		lock->line = line;
 	}
+	*/
 
 	return(TRUE);	/* locking succeeded */
 }
@@ -338,7 +408,7 @@ rw_lock_s_lock_low(
 NOTE! Use the corresponding macro, not directly this function! Lock an
 rw-lock in shared mode for the current thread. If the rw-lock is locked
 in exclusive mode, or there is an exclusive lock request waiting, the
-function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting for
+function spins a preset time (controlled by srv_n_spin_wait_rounds), waiting for
 the lock, before suspending the thread. */
 UNIV_INLINE
 void
@@ -361,20 +431,14 @@ rw_lock_s_lock_func(
 	the threads which have s-locked a latch. This would use some CPU
 	time. */
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(!rw_lock_own(lock, RW_LOCK_SHARED)); /* see NOTE above */
-	ut_ad(!rw_lock_own(lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(!rw_lock_own(lock, RW_LOCK_S)); /* see NOTE above */
+	ut_ad(!rw_lock_own(lock, RW_LOCK_X));
 
-	if (rw_lock_s_lock_low(lock, pass, file_name, line)) {
+	if (!rw_lock_s_lock_low(lock, pass, file_name, line)) {
 
-		return; /* Success */
-	} else {
 		/* Did not succeed, try spin wait */
 
 		rw_lock_s_lock_spin(lock, pass, file_name, line);
-
-		return;
 	}
 }
 
@@ -382,7 +446,7 @@ rw_lock_s_lock_func(
 NOTE! Use the corresponding macro, not directly this function! Lock an
 rw-lock in exclusive mode for the current thread if the lock can be
 obtained immediately.
-@return	TRUE if success */
+@return TRUE if success */
 UNIV_INLINE
 ibool
 rw_lock_x_lock_func_nowait(
@@ -412,7 +476,7 @@ rw_lock_x_lock_func_nowait(
 	To achieve this we load it before os_compare_and_swap_lint(),
 	which implies full memory barrier in current implementation. */
 	if (success) {
-		rw_lock_set_writer_id_and_recursion_flag(lock, TRUE);
+		rw_lock_set_writer_id_and_recursion_flag(lock, true);
 
 	} else if (local_recursive
 		   && os_thread_eq(lock->writer_thread,
@@ -420,10 +484,15 @@ rw_lock_x_lock_func_nowait(
 		/* Relock: this lock_word modification is safe since no other
 		threads can modify (lock, unlock, or reserve) lock_word while
 		there is an exclusive writer and this is the writer thread. */
-		if (lock->lock_word == 0) {
-			lock->lock_word = -X_LOCK_DECR;
-		} else {
+		if (lock->lock_word == 0 || lock->lock_word == -X_LOCK_HALF_DECR) {
+			/* There are 1 x-locks */
+			lock->lock_word -= X_LOCK_DECR;
+		} else if (lock->lock_word <= -X_LOCK_DECR) {
+			/* There are 2 or more x-locks */
 			lock->lock_word--;
+		} else {
+			/* Failure */
+			return(FALSE);
 		}
 
 		/* Watch for too many recursive locks */
@@ -433,15 +502,16 @@ rw_lock_x_lock_func_nowait(
 		/* Failure */
 		return(FALSE);
 	}
-#ifdef UNIV_SYNC_DEBUG
-	rw_lock_add_debug_info(lock, 0, RW_LOCK_EX, file_name, line);
-#endif
 
+	ut_d(rw_lock_add_debug_info(lock, 0, RW_LOCK_X, file_name, line));
+
+	/*
 	if (srv_instrument_semaphores) {
 		lock->thread_id = os_thread_get_curr_id();
 		lock->file_name = file_name;
 		lock->line = line;
 	}
+	*/
 
 	lock->last_x_file_name = file_name;
 	lock->last_x_line = line;
@@ -457,22 +527,21 @@ UNIV_INLINE
 void
 rw_lock_s_unlock_func(
 /*==================*/
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
 	ulint		pass,	/*!< in: pass value; != 0, if the lock may have
 				been passed to another thread to unlock */
-#endif
+#endif /* UNIV_DEBUG */
 	rw_lock_t*	lock)	/*!< in/out: rw-lock */
 {
 	ut_ad(lock->lock_word > -X_LOCK_DECR);
 	ut_ad(lock->lock_word != 0);
 	ut_ad(lock->lock_word < X_LOCK_DECR);
 
-#ifdef UNIV_SYNC_DEBUG
-	rw_lock_remove_debug_info(lock, pass, RW_LOCK_SHARED);
-#endif
+	ut_d(rw_lock_remove_debug_info(lock, pass, RW_LOCK_S));
 
 	/* Increment lock_word to indicate 1 less reader */
-	if (rw_lock_lock_word_incr(lock, 1) == 0) {
+	lint	lock_word = rw_lock_lock_word_incr(lock, 1);
+	if (lock_word == 0 || lock_word == -X_LOCK_HALF_DECR) {
 
 		/* wait_ex waiter exists. It may not be asleep, but we signal
 		anyway. We do not wake other waiters, because they can't
@@ -483,10 +552,6 @@ rw_lock_s_unlock_func(
 	}
 
 	ut_ad(rw_lock_validate(lock));
-
-#ifdef UNIV_SYNC_PERF_STAT
-	rw_s_exit_count++;
-#endif
 }
 
 /******************************************************************//**
@@ -495,13 +560,14 @@ UNIV_INLINE
 void
 rw_lock_x_unlock_func(
 /*==================*/
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
 	ulint		pass,	/*!< in: pass value; != 0, if the lock may have
 				been passed to another thread to unlock */
-#endif
+#endif /* UNIV_DEBUG */
 	rw_lock_t*	lock)	/*!< in/out: rw-lock */
 {
-	ut_ad(lock->lock_word == 0 || lock->lock_word <= -X_LOCK_DECR);
+	ut_ad(lock->lock_word == 0 || lock->lock_word == -X_LOCK_HALF_DECR
+	      || lock->lock_word <= -X_LOCK_DECR);
 
 	/* lock->recursive flag also indicates if lock->writer_thread is
 	valid or stale. If we are the last of the recursive callers
@@ -514,22 +580,18 @@ rw_lock_x_unlock_func(
 		lock->recursive = FALSE;
 	}
 
-#ifdef UNIV_SYNC_DEBUG
-	rw_lock_remove_debug_info(lock, pass, RW_LOCK_EX);
-#endif
+	ut_d(rw_lock_remove_debug_info(lock, pass, RW_LOCK_X));
 
-	ulint x_lock_incr;
-	if (lock->lock_word == 0) {
-		x_lock_incr = X_LOCK_DECR;
-	} else if (lock->lock_word == -X_LOCK_DECR) {
-		x_lock_incr = X_LOCK_DECR;
-	} else {
-		ut_ad(lock->lock_word < -X_LOCK_DECR);
-		x_lock_incr = 1;
-	}
+	if (lock->lock_word == 0 || lock->lock_word == -X_LOCK_HALF_DECR) {
+		/* There is 1 x-lock */
+		/* atomic increment is needed, because it is last */
+		if (rw_lock_lock_word_incr(lock, X_LOCK_DECR) <= 0) {
+			ut_error;
+		}
 
-	if (rw_lock_lock_word_incr(lock, x_lock_incr) == X_LOCK_DECR) {
-		/* Lock is now free. May have to signal read/write waiters.
+		/* This no longer has an X-lock but it may still have
+		an SX-lock. So it is now free for S-locks by other threads.
+		We need to signal read/write waiters.
 		We do not need to signal wait_ex waiters, since they cannot
 		exist when there is a writer. */
 		if (lock->waiters) {
@@ -537,13 +599,68 @@ rw_lock_x_unlock_func(
 			os_event_set(lock->event);
 			sync_array_object_signalled();
 		}
+	} else if (lock->lock_word == -X_LOCK_DECR
+		   || lock->lock_word == -(X_LOCK_DECR + X_LOCK_HALF_DECR)) {
+		/* There are 2 x-locks */
+		lock->lock_word += X_LOCK_DECR;
+	} else {
+		/* There are more than 2 x-locks. */
+		ut_ad(lock->lock_word < -X_LOCK_DECR);
+		lock->lock_word += 1;
 	}
 
 	ut_ad(rw_lock_validate(lock));
+}
 
-#ifdef UNIV_SYNC_PERF_STAT
-	rw_x_exit_count++;
-#endif
+/******************************************************************//**
+Releases a sx mode lock. */
+UNIV_INLINE
+void
+rw_lock_sx_unlock_func(
+/*===================*/
+#ifdef UNIV_DEBUG
+	ulint		pass,	/*!< in: pass value; != 0, if the lock may have
+				been passed to another thread to unlock */
+#endif /* UNIV_DEBUG */
+	rw_lock_t*	lock)	/*!< in/out: rw-lock */
+{
+	ut_ad(rw_lock_get_sx_lock_count(lock));
+	ut_ad(lock->sx_recursive > 0);
+
+	--lock->sx_recursive;
+
+	ut_d(rw_lock_remove_debug_info(lock, pass, RW_LOCK_SX));
+
+	if (lock->sx_recursive == 0) {
+		/* Last caller in a possible recursive chain. */
+		if (lock->lock_word > 0) {
+			lock->recursive = FALSE;
+			UNIV_MEM_INVALID(&lock->writer_thread,
+					 sizeof lock->writer_thread);
+
+			if (rw_lock_lock_word_incr(lock, X_LOCK_HALF_DECR)
+			    <= X_LOCK_HALF_DECR) {
+				ut_error;
+			}
+			/* Lock is now free. May have to signal read/write
+			waiters. We do not need to signal wait_ex waiters,
+			since they cannot exist when there is an sx-lock
+			holder. */
+			if (lock->waiters) {
+				rw_lock_reset_waiter_flag(lock);
+				os_event_set(lock->event);
+				sync_array_object_signalled();
+			}
+		} else {
+			/* still has x-lock */
+			ut_ad(lock->lock_word == -X_LOCK_HALF_DECR
+			      || lock->lock_word <= -(X_LOCK_DECR
+						      + X_LOCK_HALF_DECR));
+			lock->lock_word += X_LOCK_HALF_DECR;
+		}
+	}
+
+	ut_ad(rw_lock_validate(lock));
 }
 
 #ifdef UNIV_PFS_RWLOCK
@@ -558,26 +675,24 @@ pfs_rw_lock_create_func(
 /*====================*/
 	mysql_pfs_key_t	key,		/*!< in: key registered with
 					performance schema */
-	rw_lock_t*	lock,		/*!< in: pointer to memory */
+	rw_lock_t*	lock,		/*!< in/out: pointer to memory */
 # ifdef UNIV_DEBUG
-#  ifdef UNIV_SYNC_DEBUG
-	ulint		level,		/*!< in: level */
-#  endif /* UNIV_SYNC_DEBUG */
+	latch_level_t	level,		/*!< in: level */
 # endif /* UNIV_DEBUG */
 	const char*	cmutex_name,	/*!< in: mutex name */
 	const char*	cfile_name,	/*!< in: file name where created */
 	ulint		cline)		/*!< in: file line where created */
 {
+	ut_d(new(lock) rw_lock_t());
+
 	/* Initialize the rwlock for performance schema */
 	lock->pfs_psi = PSI_RWLOCK_CALL(init_rwlock)(key, lock);
 
 	/* The actual function to initialize an rwlock */
 	rw_lock_create_func(lock,
-# ifdef UNIV_DEBUG
-#  ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
 			    level,
-#  endif /* UNIV_SYNC_DEBUG */
-# endif /* UNIV_DEBUG */
+#endif /* UNIV_DEBUG */
 			    cmutex_name,
 			    cfile_name,
 			    cline);
@@ -596,14 +711,17 @@ pfs_rw_lock_x_lock_func(
 	const char*	file_name,/*!< in: file name where lock requested */
 	ulint		line)	/*!< in: line where requested */
 {
-	if (lock->pfs_psi != NULL)
-	{
+	if (lock->pfs_psi != NULL) {
 		PSI_rwlock_locker*	locker;
 		PSI_rwlock_locker_state	state;
 
-		/* Record the entry of rw x lock request in performance schema */
+		/* Record the acquisition of a read-write lock in exclusive
+		mode in performance schema */
+/* MySQL 5.7 New PSI */
+#define PSI_RWLOCK_EXCLUSIVELOCK PSI_RWLOCK_WRITELOCK
+
 		locker = PSI_RWLOCK_CALL(start_rwlock_wrwait)(
-			&state, lock->pfs_psi, PSI_RWLOCK_WRITELOCK,
+			&state, lock->pfs_psi, PSI_RWLOCK_EXCLUSIVELOCK,
 			file_name, static_cast<uint>(line));
 
 		rw_lock_x_lock_func(
@@ -612,9 +730,7 @@ pfs_rw_lock_x_lock_func(
 		if (locker != NULL) {
 			PSI_RWLOCK_CALL(end_rwlock_wrwait)(locker, 0);
 		}
-	}
-	else
-	{
+	} else {
 		rw_lock_x_lock_func(lock, pass, file_name, line);
 	}
 }
@@ -623,7 +739,7 @@ Performance schema instrumented wrap function for
 rw_lock_x_lock_func_nowait()
 NOTE! Please use the corresponding macro rw_lock_x_lock_func(),
 not directly this function!
-@return	TRUE if success */
+@return TRUE if success */
 UNIV_INLINE
 ibool
 pfs_rw_lock_x_lock_func_nowait(
@@ -633,16 +749,18 @@ pfs_rw_lock_x_lock_func_nowait(
 				requested */
 	ulint		line)	/*!< in: line where requested */
 {
-	ibool	ret;
+	ibool		ret;
 
-	if (lock->pfs_psi != NULL)
-	{
+	if (lock->pfs_psi != NULL) {
 		PSI_rwlock_locker*	locker;
-		PSI_rwlock_locker_state		state;
+		PSI_rwlock_locker_state	state;
 
-		/* Record the entry of rw x lock request in performance schema */
+		/* Record the acquisition of a read-write trylock in exclusive
+		mode in performance schema */
+
+#define PSI_RWLOCK_TRYEXCLUSIVELOCK PSI_RWLOCK_TRYWRITELOCK
 		locker = PSI_RWLOCK_CALL(start_rwlock_wrwait)(
-			&state, lock->pfs_psi, PSI_RWLOCK_WRITELOCK,
+			&state, lock->pfs_psi, PSI_RWLOCK_TRYEXCLUSIVELOCK,
 			file_name, static_cast<uint>(line));
 
 		ret = rw_lock_x_lock_func_nowait(lock, file_name, line);
@@ -651,9 +769,7 @@ pfs_rw_lock_x_lock_func_nowait(
 			PSI_RWLOCK_CALL(end_rwlock_wrwait)(
 				locker, static_cast<int>(ret));
 		}
-	}
-	else
-	{
+	} else {
 		ret = rw_lock_x_lock_func_nowait(lock, file_name, line);
 	}
 
@@ -669,8 +785,7 @@ pfs_rw_lock_free_func(
 /*==================*/
 	rw_lock_t*	lock)	/*!< in: pointer to rw-lock */
 {
-	if (lock->pfs_psi != NULL)
-	{
+	if (lock->pfs_psi != NULL) {
 		PSI_RWLOCK_CALL(destroy_rwlock)(lock->pfs_psi);
 		lock->pfs_psi = NULL;
 	}
@@ -693,14 +808,14 @@ pfs_rw_lock_s_lock_func(
 				requested */
 	ulint		line)	/*!< in: line where requested */
 {
-	if (lock->pfs_psi != NULL)
-	{
+	if (lock->pfs_psi != NULL) {
 		PSI_rwlock_locker*	locker;
 		PSI_rwlock_locker_state	state;
 
+#define  PSI_RWLOCK_SHAREDLOCK  PSI_RWLOCK_READLOCK
 		/* Instrumented to inform we are aquiring a shared rwlock */
 		locker = PSI_RWLOCK_CALL(start_rwlock_rdwait)(
-			&state, lock->pfs_psi, PSI_RWLOCK_READLOCK,
+			&state, lock->pfs_psi, PSI_RWLOCK_SHAREDLOCK,
 			file_name, static_cast<uint>(line));
 
 		rw_lock_s_lock_func(lock, pass, file_name, line);
@@ -708,19 +823,50 @@ pfs_rw_lock_s_lock_func(
 		if (locker != NULL) {
 			PSI_RWLOCK_CALL(end_rwlock_rdwait)(locker, 0);
 		}
-	}
-	else
-	{
+	} else {
 		rw_lock_s_lock_func(lock, pass, file_name, line);
 	}
+}
+/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_sx_lock_func()
+NOTE! Please use the corresponding macro rw_lock_sx_lock(), not
+directly this function! */
+UNIV_INLINE
+void
+pfs_rw_lock_sx_lock_func(
+/*====================*/
+	rw_lock_t*	lock,	/*!< in: pointer to rw-lock */
+	ulint		pass,	/*!< in: pass value; != 0, if the
+				lock will be passed to another
+				thread to unlock */
+	const char*	file_name,/*!< in: file name where lock
+				requested */
+	ulint		line)	/*!< in: line where requested */
+{
+	if (lock->pfs_psi != NULL) {
+		PSI_rwlock_locker*	locker;
+		PSI_rwlock_locker_state	state;
 
-	return;
+#define PSI_RWLOCK_SHAREDEXCLUSIVELOCK PSI_RWLOCK_WRITELOCK
+		/* Instrumented to inform we are aquiring a shared rwlock */
+		locker = PSI_RWLOCK_CALL(start_rwlock_wrwait)(
+			&state, lock->pfs_psi, PSI_RWLOCK_SHAREDEXCLUSIVELOCK,
+			file_name, static_cast<uint>(line));
+
+		rw_lock_sx_lock_func(lock, pass, file_name, line);
+
+		if (locker != NULL) {
+			PSI_RWLOCK_CALL(end_rwlock_wrwait)(locker, 0);
+		}
+	} else {
+		rw_lock_sx_lock_func(lock, pass, file_name, line);
+	}
 }
 /******************************************************************//**
 Performance schema instrumented wrap function for rw_lock_s_lock_func()
 NOTE! Please use the corresponding macro rw_lock_s_lock(), not
 directly this function!
-@return	TRUE if success */
+@return TRUE if success */
 UNIV_INLINE
 ibool
 pfs_rw_lock_s_lock_low(
@@ -732,16 +878,16 @@ pfs_rw_lock_s_lock_low(
 	const char*	file_name, /*!< in: file name where lock requested */
 	ulint		line)	/*!< in: line where requested */
 {
-	ibool	ret;
+	ibool		ret;
 
-	if (lock->pfs_psi != NULL)
-	{
+	if (lock->pfs_psi != NULL) {
 		PSI_rwlock_locker*	locker;
 		PSI_rwlock_locker_state	state;
 
+#define PSI_RWLOCK_TRYSHAREDLOCK PSI_RWLOCK_TRYREADLOCK
 		/* Instrumented to inform we are aquiring a shared rwlock */
 		locker = PSI_RWLOCK_CALL(start_rwlock_rdwait)(
-			&state, lock->pfs_psi, PSI_RWLOCK_READLOCK,
+			&state, lock->pfs_psi, PSI_RWLOCK_TRYSHAREDLOCK,
 			file_name, static_cast<uint>(line));
 
 		ret = rw_lock_s_lock_low(lock, pass, file_name, line);
@@ -750,15 +896,54 @@ pfs_rw_lock_s_lock_low(
 			PSI_RWLOCK_CALL(end_rwlock_rdwait)(
 				locker, static_cast<int>(ret));
 		}
-	}
-	else
-	{
+	} else {
 		ret = rw_lock_s_lock_low(lock, pass, file_name, line);
 	}
 
 	return(ret);
 }
+/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_sx_lock_nowait()
+NOTE! Please use the corresponding macro, not
+directly this function!
+@return TRUE if success */
+UNIV_INLINE
+ibool
+pfs_rw_lock_sx_lock_low(
+/*====================*/
+	rw_lock_t*	lock,	/*!< in: pointer to rw-lock */
+	ulint		pass,	/*!< in: pass value; != 0, if the
+				lock will be passed to another
+				thread to unlock */
+	const char*	file_name, /*!< in: file name where lock requested */
+	ulint		line)	/*!< in: line where requested */
+{
+	ibool		ret;
 
+	if (lock->pfs_psi != NULL) {
+		PSI_rwlock_locker*	locker;
+		PSI_rwlock_locker_state	state;
+
+#define PSI_RWLOCK_TRYSHAREDEXCLUSIVELOCK PSI_RWLOCK_TRYWRITELOCK
+		/* Instrumented to inform we are aquiring a shared
+		exclusive rwlock */
+		locker = PSI_RWLOCK_CALL(start_rwlock_rdwait)(
+			&state, lock->pfs_psi,
+			PSI_RWLOCK_TRYSHAREDEXCLUSIVELOCK,
+			file_name, static_cast<uint>(line));
+
+		ret = rw_lock_sx_lock_low(lock, pass, file_name, line);
+
+		if (locker != NULL) {
+			PSI_RWLOCK_CALL(end_rwlock_rdwait)(
+				locker, static_cast<int>(ret));
+		}
+	} else {
+		ret = rw_lock_sx_lock_low(lock, pass, file_name, line);
+	}
+
+	return(ret);
+}
 /******************************************************************//**
 Performance schema instrumented wrap function for rw_lock_x_unlock_func()
 NOTE! Please use the corresponding macro rw_lock_x_unlock(), not directly
@@ -767,21 +952,49 @@ UNIV_INLINE
 void
 pfs_rw_lock_x_unlock_func(
 /*======================*/
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
 	ulint		pass,	/*!< in: pass value; != 0, if the
 				lock may have been passed to another
 				thread to unlock */
-#endif
+#endif /* UNIV_DEBUG */
 	rw_lock_t*	lock)	/*!< in/out: rw-lock */
 {
 	/* Inform performance schema we are unlocking the lock */
-	if (lock->pfs_psi != NULL)
+	if (lock->pfs_psi != NULL) {
 		PSI_RWLOCK_CALL(unlock_rwlock)(lock->pfs_psi);
+	}
 
 	rw_lock_x_unlock_func(
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
 		pass,
-#endif
+#endif /* UNIV_DEBUG */
+		lock);
+}
+
+/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_sx_unlock_func()
+NOTE! Please use the corresponding macro rw_lock_sx_unlock(), not directly
+this function! */
+UNIV_INLINE
+void
+pfs_rw_lock_sx_unlock_func(
+/*======================*/
+#ifdef UNIV_DEBUG
+	ulint		pass,	/*!< in: pass value; != 0, if the
+				lock may have been passed to another
+				thread to unlock */
+#endif /* UNIV_DEBUG */
+	rw_lock_t*	lock)	/*!< in/out: rw-lock */
+{
+	/* Inform performance schema we are unlocking the lock */
+	if (lock->pfs_psi != NULL) {
+		PSI_RWLOCK_CALL(unlock_rwlock)(lock->pfs_psi);
+	}
+
+	rw_lock_sx_unlock_func(
+#ifdef UNIV_DEBUG
+		pass,
+#endif /* UNIV_DEBUG */
 		lock);
 }
 
@@ -793,21 +1006,22 @@ UNIV_INLINE
 void
 pfs_rw_lock_s_unlock_func(
 /*======================*/
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
 	ulint		pass,	/*!< in: pass value; != 0, if the
 				lock may have been passed to another
 				thread to unlock */
-#endif
+#endif /* UNIV_DEBUG */
 	rw_lock_t*	lock)	/*!< in/out: rw-lock */
 {
 	/* Inform performance schema we are unlocking the lock */
-	if (lock->pfs_psi != NULL)
+	if (lock->pfs_psi != NULL) {
 		PSI_RWLOCK_CALL(unlock_rwlock)(lock->pfs_psi);
+	}
 
 	rw_lock_s_unlock_func(
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
 		pass,
-#endif
+#endif /* UNIV_DEBUG */
 		lock);
 
 }
diff --git a/storage/innobase/include/sync0sync.h b/storage/innobase/include/sync0sync.h
index 6dff729ee60..7fddada10f8 100644
--- a/storage/innobase/include/sync0sync.h
+++ b/storage/innobase/include/sync0sync.h
@@ -35,19 +35,7 @@ Created 9/5/1995 Heikki Tuuri
 #define sync0sync_h
 
 #include "univ.i"
-#include "sync0types.h"
-#include "ut0lst.h"
-#include "ut0mem.h"
-#include "os0thread.h"
-#include "os0sync.h"
-#include "sync0arr.h"
-
-/** Enable semaphore request instrumentation */
-extern my_bool srv_instrument_semaphores;
-
-#if  defined(UNIV_DEBUG) && !defined(UNIV_HOTBACKUP)
-extern "C" my_bool	timed_mutexes;
-#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
+#include "ut0counter.h"
 
 #if defined UNIV_PFS_MUTEX || defined UNIV_PFS_RWLOCK
 
@@ -82,759 +70,78 @@ extern mysql_pfs_key_t	ibuf_bitmap_mutex_key;
 extern mysql_pfs_key_t	ibuf_mutex_key;
 extern mysql_pfs_key_t	ibuf_pessimistic_insert_mutex_key;
 extern mysql_pfs_key_t	log_sys_mutex_key;
+extern mysql_pfs_key_t	log_sys_write_mutex_key;
+extern mysql_pfs_key_t	log_cmdq_mutex_key;
 extern mysql_pfs_key_t	log_flush_order_mutex_key;
-# ifndef HAVE_ATOMIC_BUILTINS
-extern mysql_pfs_key_t	server_mutex_key;
-# endif /* !HAVE_ATOMIC_BUILTINS */
-# ifdef UNIV_MEM_DEBUG
-extern mysql_pfs_key_t	mem_hash_mutex_key;
-# endif /* UNIV_MEM_DEBUG */
-extern mysql_pfs_key_t	mem_pool_mutex_key;
 extern mysql_pfs_key_t	mutex_list_mutex_key;
-extern mysql_pfs_key_t	purge_sys_bh_mutex_key;
+extern mysql_pfs_key_t	recalc_pool_mutex_key;
+extern mysql_pfs_key_t	page_cleaner_mutex_key;
+extern mysql_pfs_key_t	purge_sys_pq_mutex_key;
 extern mysql_pfs_key_t	recv_sys_mutex_key;
 extern mysql_pfs_key_t	recv_writer_mutex_key;
-extern mysql_pfs_key_t	rseg_mutex_key;
-# ifdef UNIV_SYNC_DEBUG
+extern mysql_pfs_key_t	rtr_active_mutex_key;
+extern mysql_pfs_key_t	rtr_match_mutex_key;
+extern mysql_pfs_key_t	rtr_path_mutex_key;
+extern mysql_pfs_key_t	rtr_ssn_mutex_key;
+extern mysql_pfs_key_t	redo_rseg_mutex_key;
+extern mysql_pfs_key_t	noredo_rseg_mutex_key;
+extern mysql_pfs_key_t page_zip_stat_per_index_mutex_key;
+# ifdef UNIV_DEBUG
 extern mysql_pfs_key_t	rw_lock_debug_mutex_key;
-# endif /* UNIV_SYNC_DEBUG */
+# endif /* UNIV_DEBUG */
 extern mysql_pfs_key_t	rw_lock_list_mutex_key;
 extern mysql_pfs_key_t	rw_lock_mutex_key;
 extern mysql_pfs_key_t	srv_dict_tmpfile_mutex_key;
 extern mysql_pfs_key_t	srv_innodb_monitor_mutex_key;
 extern mysql_pfs_key_t	srv_misc_tmpfile_mutex_key;
-extern mysql_pfs_key_t	srv_threads_mutex_key;
 extern mysql_pfs_key_t	srv_monitor_file_mutex_key;
-# ifdef UNIV_SYNC_DEBUG
+# ifdef UNIV_DEBUG
 extern mysql_pfs_key_t	sync_thread_mutex_key;
-# endif /* UNIV_SYNC_DEBUG */
+# endif /* UNIV_DEBUG */
 extern mysql_pfs_key_t	buf_dblwr_mutex_key;
 extern mysql_pfs_key_t	trx_undo_mutex_key;
 extern mysql_pfs_key_t	trx_mutex_key;
-extern mysql_pfs_key_t	lock_sys_mutex_key;
-extern mysql_pfs_key_t	lock_sys_wait_mutex_key;
+extern mysql_pfs_key_t	trx_pool_mutex_key;
+extern mysql_pfs_key_t	trx_pool_manager_mutex_key;
+extern mysql_pfs_key_t	lock_mutex_key;
+extern mysql_pfs_key_t	lock_wait_mutex_key;
 extern mysql_pfs_key_t	trx_sys_mutex_key;
 extern mysql_pfs_key_t	srv_sys_mutex_key;
-extern mysql_pfs_key_t	srv_sys_tasks_mutex_key;
-#ifndef HAVE_ATOMIC_BUILTINS
-extern mysql_pfs_key_t	srv_conc_mutex_key;
-#endif /* !HAVE_ATOMIC_BUILTINS */
-#ifndef HAVE_ATOMIC_BUILTINS_64
-extern mysql_pfs_key_t	monitor_mutex_key;
-#endif /* !HAVE_ATOMIC_BUILTINS_64 */
-extern mysql_pfs_key_t	event_os_mutex_key;
-extern mysql_pfs_key_t	ut_list_mutex_key;
-extern mysql_pfs_key_t	os_mutex_key;
+extern mysql_pfs_key_t	srv_threads_mutex_key;
+extern mysql_pfs_key_t	event_mutex_key;
+extern mysql_pfs_key_t	event_manager_mutex_key;
+extern mysql_pfs_key_t	sync_array_mutex_key;
+extern mysql_pfs_key_t	thread_mutex_key;
 extern mysql_pfs_key_t  zip_pad_mutex_key;
+extern mysql_pfs_key_t  row_drop_list_mutex_key;
 #endif /* UNIV_PFS_MUTEX */
 
-/******************************************************************//**
-Initializes the synchronization data structures. */
-UNIV_INTERN
-void
-sync_init(void);
-/*===========*/
-/******************************************************************//**
-Frees the resources in synchronization data structures. */
-UNIV_INTERN
-void
-sync_close(void);
-/*===========*/
-
-#undef mutex_free			/* Fix for MacOS X */
-
-#ifdef UNIV_PFS_MUTEX
-/**********************************************************************
-Following mutex APIs would be performance schema instrumented
-if "UNIV_PFS_MUTEX" is defined:
-
-mutex_create
-mutex_enter
-mutex_exit
-mutex_enter_nowait
-mutex_free
-
-These mutex APIs will point to corresponding wrapper functions that contain
-the performance schema instrumentation if "UNIV_PFS_MUTEX" is defined.
-The instrumented wrapper functions have the prefix of "innodb_".
-
-NOTE! The following macro should be used in mutex operation, not the
-corresponding function. */
-
-/******************************************************************//**
-Creates, or rather, initializes a mutex object to a specified memory
-location (which must be appropriately aligned). The mutex is initialized
-in the reset state. Explicit freeing of the mutex with mutex_free is
-necessary only if the memory block containing it is freed. */
+#ifdef UNIV_PFS_RWLOCK
+/* Following are rwlock keys used to register with MySQL
+performance schema */
+extern	mysql_pfs_key_t btr_search_latch_key;
+extern	mysql_pfs_key_t	buf_block_lock_key;
 # ifdef UNIV_DEBUG
-#  ifdef UNIV_SYNC_DEBUG
-#   define mutex_create(K, M, level)				\
-	pfs_mutex_create_func((K), (M), #M, (level), __FILE__, __LINE__)
-#  else
-#   define mutex_create(K, M, level)				\
-	pfs_mutex_create_func((K), (M), #M, __FILE__, __LINE__)
-#  endif/* UNIV_SYNC_DEBUG */
-# else
-#  define mutex_create(K, M, level)				\
-	pfs_mutex_create_func((K), (M), #M, __FILE__, __LINE__)
-# endif	/* UNIV_DEBUG */
-
-# define mutex_enter(M)						\
-	pfs_mutex_enter_func((M), __FILE__, __LINE__)
-
-# define mutex_enter_nowait(M)					\
-	pfs_mutex_enter_nowait_func((M), __FILE__, __LINE__)
-
-# define mutex_exit(M)	pfs_mutex_exit_func(M)
-
-# define mutex_free(M)	pfs_mutex_free_func(M)
-
-#else	/* UNIV_PFS_MUTEX */
-
-/* If "UNIV_PFS_MUTEX" is not defined, the mutex APIs point to
-original non-instrumented functions */
-# ifdef UNIV_DEBUG
-#  ifdef UNIV_SYNC_DEBUG
-#   define mutex_create(K, M, level)			\
-	mutex_create_func((M), #M, (level), __FILE__, __LINE__)
-#  else /* UNIV_SYNC_DEBUG */
-#   define mutex_create(K, M, level)				\
-	mutex_create_func((M), #M, __FILE__, __LINE__)
-#  endif /* UNIV_SYNC_DEBUG */
-# else /* UNIV_DEBUG */
-#  define mutex_create(K, M, level)				\
-	mutex_create_func((M), #M, __FILE__, __LINE__)
-# endif	/* UNIV_DEBUG */
-
-# define mutex_enter(M)	mutex_enter_func((M), __FILE__, __LINE__)
-
-# define mutex_enter_nowait(M)	\
-	mutex_enter_nowait_func((M), __FILE__, __LINE__)
-
-# define mutex_exit(M)	mutex_exit_func(M)
-
-# define mutex_free(M)	mutex_free_func(M)
-
-#endif	/* UNIV_PFS_MUTEX */
-
-/******************************************************************//**
-Creates, or rather, initializes a mutex object in a specified memory
-location (which must be appropriately aligned). The mutex is initialized
-in the reset state. Explicit freeing of the mutex with mutex_free is
-necessary only if the memory block containing it is freed. */
-UNIV_INTERN
-void
-mutex_create_func(
-/*==============*/
-	ib_mutex_t*	mutex,		/*!< in: pointer to memory */
-	const char*	cmutex_name,	/*!< in: mutex name */
-#ifdef UNIV_DEBUG
-# ifdef UNIV_SYNC_DEBUG
-	ulint		level,		/*!< in: level */
-# endif /* UNIV_SYNC_DEBUG */
-#endif /* UNIV_DEBUG */
-	const char*	cfile_name,	/*!< in: file name where created */
-	ulint		cline);		/*!< in: file line where created */
-
-/******************************************************************//**
-NOTE! Use the corresponding macro mutex_free(), not directly this function!
-Calling this function is obligatory only if the memory buffer containing
-the mutex is freed. Removes a mutex object from the mutex list. The mutex
-is checked to be in the reset state. */
-UNIV_INTERN
-void
-mutex_free_func(
-/*============*/
-	ib_mutex_t*	mutex);	/*!< in: mutex */
-/**************************************************************//**
-NOTE! The following macro should be used in mutex locking, not the
-corresponding function. */
-
-/* NOTE! currently same as mutex_enter! */
-
-#define mutex_enter_fast(M)	mutex_enter_func((M), __FILE__, __LINE__)
-/******************************************************************//**
-NOTE! Use the corresponding macro in the header file, not this function
-directly. Locks a mutex for the current thread. If the mutex is reserved
-the function spins a preset time (controlled by SYNC_SPIN_ROUNDS) waiting
-for the mutex before suspending the thread. */
-UNIV_INLINE
-void
-mutex_enter_func(
-/*=============*/
-	ib_mutex_t*	mutex,		/*!< in: pointer to mutex */
-	const char*	file_name,	/*!< in: file name where locked */
-	ulint		line);		/*!< in: line where locked */
-/********************************************************************//**
-NOTE! Use the corresponding macro in the header file, not this function
-directly. Tries to lock the mutex for the current thread. If the lock is not
-acquired immediately, returns with return value 1.
-@return	0 if succeed, 1 if not */
-UNIV_INTERN
-ulint
-mutex_enter_nowait_func(
-/*====================*/
-	ib_mutex_t*	mutex,		/*!< in: pointer to mutex */
-	const char*	file_name,	/*!< in: file name where mutex
-					requested */
-	ulint		line);		/*!< in: line where requested */
-/******************************************************************//**
-NOTE! Use the corresponding macro mutex_exit(), not directly this function!
-Unlocks a mutex owned by the current thread. */
-UNIV_INLINE
-void
-mutex_exit_func(
-/*============*/
-	ib_mutex_t*	mutex);	/*!< in: pointer to mutex */
-
-
-#ifdef UNIV_PFS_MUTEX
-/******************************************************************//**
-NOTE! Please use the corresponding macro mutex_create(), not directly
-this function!
-A wrapper function for mutex_create_func(), registers the mutex
-with peformance schema if "UNIV_PFS_MUTEX" is defined when
-creating the mutex */
-UNIV_INLINE
-void
-pfs_mutex_create_func(
-/*==================*/
-	PSI_mutex_key	key,		/*!< in: Performance Schema key */
-	ib_mutex_t*	mutex,		/*!< in: pointer to memory */
-	const char*	cmutex_name,	/*!< in: mutex name */
-# ifdef UNIV_DEBUG
-#  ifdef UNIV_SYNC_DEBUG
-	ulint		level,		/*!< in: level */
-#  endif /* UNIV_SYNC_DEBUG */
+extern	mysql_pfs_key_t	buf_block_debug_latch_key;
 # endif /* UNIV_DEBUG */
-	const char*	cfile_name,	/*!< in: file name where created */
-	ulint		cline);		/*!< in: file line where created */
-/******************************************************************//**
-NOTE! Please use the corresponding macro mutex_enter(), not directly
-this function!
-This is a performance schema instrumented wrapper function for
-mutex_enter_func(). */
-UNIV_INLINE
+extern	mysql_pfs_key_t	dict_operation_lock_key;
+extern	mysql_pfs_key_t	checkpoint_lock_key;
+extern	mysql_pfs_key_t	fil_space_latch_key;
+extern	mysql_pfs_key_t	fts_cache_rw_lock_key;
+extern	mysql_pfs_key_t	fts_cache_init_rw_lock_key;
+extern	mysql_pfs_key_t	trx_i_s_cache_lock_key;
+extern	mysql_pfs_key_t	trx_purge_latch_key;
+extern	mysql_pfs_key_t	index_tree_rw_lock_key;
+extern	mysql_pfs_key_t	index_online_log_key;
+extern	mysql_pfs_key_t	dict_table_stats_key;
+extern  mysql_pfs_key_t trx_sys_rw_lock_key;
+extern  mysql_pfs_key_t hash_table_locks_key;
+extern  mysql_pfs_key_t master_key_id_mutex_key;
+#endif /* UNIV_PFS_RWLOCK */
+
+/** Prints info of the sync system.
+@param[in]	file	where to print */
 void
-pfs_mutex_enter_func(
-/*=================*/
-	ib_mutex_t*	mutex,		/*!< in: pointer to mutex */
-	const char*	file_name,	/*!< in: file name where locked */
-	ulint		line);		/*!< in: line where locked */
-/********************************************************************//**
-NOTE! Please use the corresponding macro mutex_enter_nowait(), not directly
-this function!
-This is a performance schema instrumented wrapper function for
-mutex_enter_nowait_func.
-@return	0 if succeed, 1 if not */
-UNIV_INLINE
-ulint
-pfs_mutex_enter_nowait_func(
-/*========================*/
-	ib_mutex_t*	mutex,		/*!< in: pointer to mutex */
-	const char*	file_name,	/*!< in: file name where mutex
-					requested */
-	ulint		line);		/*!< in: line where requested */
-/******************************************************************//**
-NOTE! Please use the corresponding macro mutex_exit(), not directly
-this function!
-A wrap function of mutex_exit_func() with peformance schema instrumentation.
-Unlocks a mutex owned by the current thread. */
-UNIV_INLINE
-void
-pfs_mutex_exit_func(
-/*================*/
-	ib_mutex_t*	mutex);	/*!< in: pointer to mutex */
+sync_print(FILE* file);
 
-/******************************************************************//**
-NOTE! Please use the corresponding macro mutex_free(), not directly
-this function!
-Wrapper function for mutex_free_func(). Also destroys the performance
-schema probes when freeing the mutex */
-UNIV_INLINE
-void
-pfs_mutex_free_func(
-/*================*/
-	ib_mutex_t*	mutex);	/*!< in: mutex */
-
-#endif /* UNIV_PFS_MUTEX */
-
-#ifdef UNIV_SYNC_DEBUG
-/******************************************************************//**
-Returns TRUE if no mutex or rw-lock is currently locked.
-Works only in the debug version.
-@return	TRUE if no mutexes and rw-locks reserved */
-UNIV_INTERN
-ibool
-sync_all_freed(void);
-/*================*/
-#endif /* UNIV_SYNC_DEBUG */
-/*#####################################################################
-FUNCTION PROTOTYPES FOR DEBUGGING */
-/*******************************************************************//**
-Prints wait info of the sync system. */
-UNIV_INTERN
-void
-sync_print_wait_info(
-/*=================*/
-	FILE*	file);		/*!< in: file where to print */
-/*******************************************************************//**
-Prints info of the sync system. */
-UNIV_INTERN
-void
-sync_print(
-/*=======*/
-	FILE*	file);		/*!< in: file where to print */
-#ifdef UNIV_DEBUG
-/******************************************************************//**
-Checks that the mutex has been initialized.
-@return	TRUE */
-UNIV_INTERN
-ibool
-mutex_validate(
-/*===========*/
-	const ib_mutex_t*	mutex);	/*!< in: mutex */
-/******************************************************************//**
-Checks that the current thread owns the mutex. Works only
-in the debug version.
-@return	TRUE if owns */
-UNIV_INTERN
-ibool
-mutex_own(
-/*======*/
-	const ib_mutex_t*	mutex)	/*!< in: mutex */
-	MY_ATTRIBUTE((warn_unused_result));
-#endif /* UNIV_DEBUG */
-#ifdef UNIV_SYNC_DEBUG
-/******************************************************************//**
-Adds a latch and its level in the thread level array. Allocates the memory
-for the array if called first time for this OS thread. Makes the checks
-against other latch levels stored in the array for this thread. */
-UNIV_INTERN
-void
-sync_thread_add_level(
-/*==================*/
-	void*	latch,	/*!< in: pointer to a mutex or an rw-lock */
-	ulint	level,	/*!< in: level in the latching order; if
-			SYNC_LEVEL_VARYING, nothing is done */
-	ibool	relock)	/*!< in: TRUE if re-entering an x-lock */
-	MY_ATTRIBUTE((nonnull));
-/******************************************************************//**
-Removes a latch from the thread level array if it is found there.
-@return TRUE if found in the array; it is no error if the latch is
-not found, as we presently are not able to determine the level for
-every latch reservation the program does */
-UNIV_INTERN
-ibool
-sync_thread_reset_level(
-/*====================*/
-	void*	latch);	/*!< in: pointer to a mutex or an rw-lock */
-/******************************************************************//**
-Checks if the level array for the current thread contains a
-mutex or rw-latch at the specified level.
-@return	a matching latch, or NULL if not found */
-UNIV_INTERN
-void*
-sync_thread_levels_contains(
-/*========================*/
-	ulint	level);			/*!< in: latching order level
-					(SYNC_DICT, ...)*/
-/******************************************************************//**
-Checks that the level array for the current thread is empty.
-@return	a latch, or NULL if empty except the exceptions specified below */
-UNIV_INTERN
-void*
-sync_thread_levels_nonempty_gen(
-/*============================*/
-	ibool	dict_mutex_allowed)	/*!< in: TRUE if dictionary mutex is
-					allowed to be owned by the thread */
-	MY_ATTRIBUTE((warn_unused_result));
-/******************************************************************//**
-Checks if the level array for the current thread is empty,
-except for data dictionary latches. */
-#define sync_thread_levels_empty_except_dict()		\
-	(!sync_thread_levels_nonempty_gen(TRUE))
-/******************************************************************//**
-Checks if the level array for the current thread is empty,
-except for the btr_search_latch.
-@return	a latch, or NULL if empty except the exceptions specified below */
-UNIV_INTERN
-void*
-sync_thread_levels_nonempty_trx(
-/*============================*/
-	ibool	has_search_latch)
-				/*!< in: TRUE if and only if the thread
-				is supposed to hold btr_search_latch */
-	MY_ATTRIBUTE((warn_unused_result));
-
-/******************************************************************//**
-Gets the debug information for a reserved mutex. */
-UNIV_INTERN
-void
-mutex_get_debug_info(
-/*=================*/
-	ib_mutex_t*	mutex,		/*!< in: mutex */
-	const char**	file_name,	/*!< out: file where requested */
-	ulint*		line,		/*!< out: line where requested */
-	os_thread_id_t* thread_id);	/*!< out: id of the thread which owns
-					the mutex */
-/******************************************************************//**
-Counts currently reserved mutexes. Works only in the debug version.
-@return	number of reserved mutexes */
-UNIV_INTERN
-ulint
-mutex_n_reserved(void);
-/*==================*/
-#endif /* UNIV_SYNC_DEBUG */
-/******************************************************************//**
-NOT to be used outside this module except in debugging! Gets the value
-of the lock word. */
-UNIV_INLINE
-lock_word_t
-mutex_get_lock_word(
-/*================*/
-	const ib_mutex_t*	mutex);	/*!< in: mutex */
-#ifdef UNIV_SYNC_DEBUG
-/******************************************************************//**
-NOT to be used outside this module except in debugging! Gets the waiters
-field in a mutex.
-@return	value to set */
-UNIV_INLINE
-ulint
-mutex_get_waiters(
-/*==============*/
-	const ib_mutex_t*	mutex);	/*!< in: mutex */
-#endif /* UNIV_SYNC_DEBUG */
-
-/*
-		LATCHING ORDER WITHIN THE DATABASE
-		==================================
-
-The mutex or latch in the central memory object, for instance, a rollback
-segment object, must be acquired before acquiring the latch or latches to
-the corresponding file data structure. In the latching order below, these
-file page object latches are placed immediately below the corresponding
-central memory object latch or mutex.
-
-Synchronization object			Notes
-----------------------			-----
-
-Dictionary mutex			If we have a pointer to a dictionary
-|					object, e.g., a table, it can be
-|					accessed without reserving the
-|					dictionary mutex. We must have a
-|					reservation, a memoryfix, to the
-|					appropriate table object in this case,
-|					and the table must be explicitly
-|					released later.
-V
-Dictionary header
-|
-V
-Secondary index tree latch		The tree latch protects also all
-|					the B-tree non-leaf pages. These
-V					can be read with the page only
-Secondary index non-leaf		bufferfixed to save CPU time,
-|					no s-latch is needed on the page.
-|					Modification of a page requires an
-|					x-latch on the page, however. If a
-|					thread owns an x-latch to the tree,
-|					it is allowed to latch non-leaf pages
-|					even after it has acquired the fsp
-|					latch.
-V
-Secondary index leaf			The latch on the secondary index leaf
-|					can be kept while accessing the
-|					clustered index, to save CPU time.
-V
-Clustered index tree latch		To increase concurrency, the tree
-|					latch is usually released when the
-|					leaf page latch has been acquired.
-V
-Clustered index non-leaf
-|
-V
-Clustered index leaf
-|
-V
-Transaction system header
-|
-V
-Transaction undo mutex			The undo log entry must be written
-|					before any index page is modified.
-|					Transaction undo mutex is for the undo
-|					logs the analogue of the tree latch
-|					for a B-tree. If a thread has the
-|					trx undo mutex reserved, it is allowed
-|					to latch the undo log pages in any
-|					order, and also after it has acquired
-|					the fsp latch.
-V
-Rollback segment mutex			The rollback segment mutex must be
-|					reserved, if, e.g., a new page must
-|					be added to an undo log. The rollback
-|					segment and the undo logs in its
-|					history list can be seen as an
-|					analogue of a B-tree, and the latches
-|					reserved similarly, using a version of
-|					lock-coupling. If an undo log must be
-|					extended by a page when inserting an
-|					undo log record, this corresponds to
-|					a pessimistic insert in a B-tree.
-V
-Rollback segment header
-|
-V
-Purge system latch
-|
-V
-Undo log pages				If a thread owns the trx undo mutex,
-|					or for a log in the history list, the
-|					rseg mutex, it is allowed to latch
-|					undo log pages in any order, and even
-|					after it has acquired the fsp latch.
-|					If a thread does not have the
-|					appropriate mutex, it is allowed to
-|					latch only a single undo log page in
-|					a mini-transaction.
-V
-File space management latch		If a mini-transaction must allocate
-|					several file pages, it can do that,
-|					because it keeps the x-latch to the
-|					file space management in its memo.
-V
-File system pages
-|
-V
-lock_sys_wait_mutex			Mutex protecting lock timeout data
-|
-V
-lock_sys_mutex				Mutex protecting lock_sys_t
-|
-V
-trx_sys->mutex				Mutex protecting trx_sys_t
-|
-V
-Threads mutex				Background thread scheduling mutex
-|
-V
-query_thr_mutex				Mutex protecting query threads
-|
-V
-trx_mutex				Mutex protecting trx_t fields
-|
-V
-Search system mutex
-|
-V
-Buffer pool mutex
-|
-V
-Log mutex
-|
-Any other latch
-|
-V
-Memory pool mutex */
-
-/* Latching order levels. If you modify these, you have to also update
-sync_thread_add_level(). */
-
-/* User transaction locks are higher than any of the latch levels below:
-no latches are allowed when a thread goes to wait for a normal table
-or row lock! */
-#define SYNC_USER_TRX_LOCK	9999
-#define SYNC_NO_ORDER_CHECK	3000	/* this can be used to suppress
-					latching order checking */
-#define	SYNC_LEVEL_VARYING	2000	/* Level is varying. Only used with
-					buffer pool page locks, which do not
-					have a fixed level, but instead have
-					their level set after the page is
-					locked; see e.g.
-					ibuf_bitmap_get_map_page(). */
-#define SYNC_TRX_I_S_RWLOCK	1910	/* Used for
-					trx_i_s_cache_t::rw_lock */
-#define SYNC_TRX_I_S_LAST_READ	1900	/* Used for
-					trx_i_s_cache_t::last_read_mutex */
-#define SYNC_FILE_FORMAT_TAG	1200	/* Used to serialize access to the
-					file format tag */
-#define	SYNC_DICT_OPERATION	1010	/* table create, drop, etc. reserve
-					this in X-mode; implicit or backround
-					operations purge, rollback, foreign
-					key checks reserve this in S-mode */
-#define SYNC_FTS_CACHE		1005	/* FTS cache rwlock */
-#define SYNC_DICT		1000
-#define SYNC_DICT_AUTOINC_MUTEX	999
-#define SYNC_STATS_AUTO_RECALC	997
-#define SYNC_DICT_HEADER	995
-#define SYNC_IBUF_HEADER	914
-#define SYNC_IBUF_PESS_INSERT_MUTEX 912
-/*-------------------------------*/
-#define	SYNC_INDEX_TREE		900
-#define SYNC_TREE_NODE_NEW	892
-#define SYNC_TREE_NODE_FROM_HASH 891
-#define SYNC_TREE_NODE		890
-#define	SYNC_PURGE_LATCH	800
-#define	SYNC_TRX_UNDO		700
-#define SYNC_RSEG		600
-#define SYNC_RSEG_HEADER_NEW	591
-#define SYNC_RSEG_HEADER	590
-#define SYNC_TRX_UNDO_PAGE	570
-#define SYNC_EXTERN_STORAGE	500
-#define	SYNC_FSP		400
-#define	SYNC_FSP_PAGE		395
-#define SYNC_STATS_DEFRAG	390
-/*------------------------------------- Change buffer headers */
-#define SYNC_IBUF_MUTEX		370	/* ibuf_mutex */
-/*------------------------------------- Change buffer tree */
-#define SYNC_IBUF_INDEX_TREE	360
-#define SYNC_IBUF_TREE_NODE_NEW	359
-#define SYNC_IBUF_TREE_NODE	358
-#define	SYNC_IBUF_BITMAP_MUTEX	351
-#define	SYNC_IBUF_BITMAP	350
-/*------------------------------------- Change log for online create index */
-#define SYNC_INDEX_ONLINE_LOG	340
-/*------------------------------------- MySQL query cache mutex */
-/*------------------------------------- MySQL binlog mutex */
-/*-------------------------------*/
-#define SYNC_LOCK_WAIT_SYS	300
-#define SYNC_LOCK_SYS		299
-#define SYNC_TRX_SYS		298
-#define SYNC_TRX		297
-#define SYNC_THREADS		295
-#define SYNC_REC_LOCK		294
-#define SYNC_TRX_SYS_HEADER	290
-#define	SYNC_PURGE_QUEUE	200
-#define SYNC_LOG		170
-#define SYNC_LOG_FLUSH_ORDER	147
-#define SYNC_RECV		168
-#define SYNC_FTS_TOKENIZE	167
-#define SYNC_FTS_CACHE_INIT	166	/* Used for FTS cache initialization */
-#define SYNC_FTS_BG_THREADS	165
-#define SYNC_FTS_OPTIMIZE       164     // FIXME: is this correct number, test
-#define	SYNC_WORK_QUEUE		162
-#define	SYNC_SEARCH_SYS		160	/* NOTE that if we have a memory
-					heap that can be extended to the
-					buffer pool, its logical level is
-					SYNC_SEARCH_SYS, as memory allocation
-					can call routines there! Otherwise
-					the level is SYNC_MEM_HASH. */
-#define	SYNC_BUF_POOL		150	/* Buffer pool mutex */
-#define	SYNC_BUF_PAGE_HASH	149	/* buf_pool->page_hash rw_lock */
-#define	SYNC_BUF_BLOCK		146	/* Block mutex */
-#define	SYNC_BUF_FLUSH_LIST	145	/* Buffer flush list mutex */
-#define SYNC_DOUBLEWRITE	140
-#define	SYNC_ANY_LATCH		135
-#define	SYNC_MEM_HASH		131
-#define	SYNC_MEM_POOL		130
-
-/* Codes used to designate lock operations */
-#define RW_LOCK_NOT_LOCKED	350
-#define RW_LOCK_EX		351
-#define RW_LOCK_EXCLUSIVE	351
-#define RW_LOCK_SHARED		352
-#define RW_LOCK_WAIT_EX		353
-#define SYNC_MUTEX		354
-
-/* NOTE! The structure appears here only for the compiler to know its size.
-Do not use its fields directly! The structure used in the spin lock
-implementation of a mutual exclusion semaphore. */
-
-/** InnoDB mutex */
-struct ib_mutex_t {
-	os_event_t	event;	/*!< Used by sync0arr.cc for the wait queue */
-	volatile lock_word_t	lock_word;	/*!< lock_word is the target
-				of the atomic test-and-set instruction when
-				atomic operations are enabled. */
-
-#if !defined(HAVE_ATOMIC_BUILTINS)
-	os_fast_mutex_t
-		os_fast_mutex;	/*!< We use this OS mutex in place of lock_word
-				when atomic operations are not enabled */
-#endif
-	ulint	waiters;	/*!< This ulint is set to 1 if there are (or
-				may be) threads waiting in the global wait
-				array for this mutex to be released.
-				Otherwise, this is 0. */
-	UT_LIST_NODE_T(ib_mutex_t)	list; /*!< All allocated mutexes are put into
-				a list.	Pointers to the next and prev. */
-#ifdef UNIV_SYNC_DEBUG
-	ulint	level;		/*!< Level in the global latching order */
-#endif /* UNIV_SYNC_DEBUG */
-
-	const char*	file_name;	/*!< File where the mutex was locked */
-	ulint		line;		/*!< Line where the mutex was locked */
-	const char*	cfile_name;/*!< File name where mutex created */
-	ulint		cline;	/*!< Line where created */
-	ulong		count_os_wait;	/*!< count of os_wait */
-	const char*	cmutex_name;	/*!< mutex name */
-	os_thread_id_t thread_id;	/*!< The thread id of the thread
-					which locked the mutex. */
-#ifdef UNIV_DEBUG
-
-/** Value of mutex_t::magic_n */
-# define MUTEX_MAGIC_N	979585UL
-	ulint		magic_n;	/*!< MUTEX_MAGIC_N */
-	ulint		ib_mutex_type;	/*!< 0=usual mutex, 1=rw_lock mutex */
-#endif /* UNIV_DEBUG */
-#ifdef UNIV_PFS_MUTEX
-	struct PSI_mutex* pfs_psi;	/*!< The performance schema
-					instrumentation hook */
-#endif
-};
-
-/** Constant determining how long spin wait is continued before suspending
-the thread. A value 600 rounds on a 1995 100 MHz Pentium seems to correspond
-to 20 microseconds. */
-
-#define	SYNC_SPIN_ROUNDS	srv_n_spin_wait_rounds
-
-/** The number of mutex_exit calls. Intended for performance monitoring. */
-extern	ib_int64_t	mutex_exit_count;
-
-#ifdef UNIV_SYNC_DEBUG
-/** Latching order checks start when this is set TRUE */
-extern ibool	sync_order_checks_on;
-#endif /* UNIV_SYNC_DEBUG */
-
-/** This variable is set to TRUE when sync_init is called */
-extern ibool	sync_initialized;
-
-/** Global list of database mutexes (not OS mutexes) created. */
-typedef UT_LIST_BASE_NODE_T(ib_mutex_t)  ut_list_base_node_t;
-/** Global list of database mutexes (not OS mutexes) created. */
-extern ut_list_base_node_t  mutex_list;
-
-/** Mutex protecting the mutex_list variable */
-extern ib_mutex_t mutex_list_mutex;
-
-#ifndef HAVE_ATOMIC_BUILTINS
-/**********************************************************//**
-Function that uses a mutex to decrement a variable atomically */
-UNIV_INLINE
-void
-os_atomic_dec_ulint_func(
-/*=====================*/
-	ib_mutex_t*		mutex,		/*!< in: mutex guarding the
-						decrement */
-	volatile ulint*		var,		/*!< in/out: variable to
-						decrement */
-	ulint			delta);		/*!< in: delta to decrement */
-/**********************************************************//**
-Function that uses a mutex to increment a variable atomically */
-UNIV_INLINE
-void
-os_atomic_inc_ulint_func(
-/*=====================*/
-	ib_mutex_t*		mutex,		/*!< in: mutex guarding the
-						increment */
-	volatile ulint*		var,		/*!< in/out: variable to
-						increment */
-	ulint			delta);		/*!< in: delta to increment */
-#endif /* !HAVE_ATOMIC_BUILTINS */
-
-#ifndef UNIV_NONINL
-#include "sync0sync.ic"
-#endif
-
-#endif
+#endif /* !sync0sync_h */
diff --git a/storage/innobase/include/sync0sync.ic b/storage/innobase/include/sync0sync.ic
deleted file mode 100644
index 3d1f098826e..00000000000
--- a/storage/innobase/include/sync0sync.ic
+++ /dev/null
@@ -1,415 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2008, Google Inc.
-
-Portions of this file contain modifications contributed and copyrighted by
-Google, Inc. Those modifications are gratefully acknowledged and are described
-briefly in the InnoDB documentation. The contributions by Google are
-incorporated with their permission, and subject to the conditions contained in
-the file COPYING.Google.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/sync0sync.ic
-Mutex, the basic synchronization primitive
-
-Created 9/5/1995 Heikki Tuuri
-*******************************************************/
-
-/******************************************************************//**
-Sets the waiters field in a mutex. */
-UNIV_INTERN
-void
-mutex_set_waiters(
-/*==============*/
-	ib_mutex_t*	mutex,	/*!< in: mutex */
-	ulint		n);	/*!< in: value to set */
-/******************************************************************//**
-Reserves a mutex for the current thread. If the mutex is reserved, the
-function spins a preset time (controlled by SYNC_SPIN_ROUNDS) waiting
-for the mutex before suspending the thread. */
-UNIV_INTERN
-void
-mutex_spin_wait(
-/*============*/
-	ib_mutex_t*	mutex,		/*!< in: pointer to mutex */
-	const char*	file_name,	/*!< in: file name where mutex
-					requested */
-	ulint		line);		/*!< in: line where requested */
-#ifdef UNIV_SYNC_DEBUG
-/******************************************************************//**
-Sets the debug information for a reserved mutex. */
-UNIV_INTERN
-void
-mutex_set_debug_info(
-/*=================*/
-	ib_mutex_t*	mutex,		/*!< in: mutex */
-	const char*	file_name,	/*!< in: file where requested */
-	ulint		line);		/*!< in: line where requested */
-#endif /* UNIV_SYNC_DEBUG */
-/******************************************************************//**
-Releases the threads waiting in the primary wait array for this mutex. */
-UNIV_INTERN
-void
-mutex_signal_object(
-/*================*/
-	ib_mutex_t*	mutex);	/*!< in: mutex */
-
-/******************************************************************//**
-Performs an atomic test-and-set instruction to the lock_word field of a
-mutex.
-@return	the previous value of lock_word: 0 or 1 */
-UNIV_INLINE
-lock_word_t
-ib_mutex_test_and_set(
-/*==================*/
-	ib_mutex_t*	mutex)	/*!< in: mutex */
-{
-#if defined(HAVE_ATOMIC_BUILTINS)
-	return(os_atomic_test_and_set(&mutex->lock_word));
-#else
-	ibool	ret;
-
-	ret = os_fast_mutex_trylock_full_barrier(&(mutex->os_fast_mutex));
-
-	if (ret == 0) {
-		/* We check that os_fast_mutex_trylock does not leak
-		and allow race conditions */
-		ut_a(mutex->lock_word == 0);
-
-		mutex->lock_word = 1;
-	}
-
-	return((byte) ret);
-#endif /* HAVE_ATOMIC_BUILTINS */
-}
-
-/******************************************************************//**
-Performs a reset instruction to the lock_word field of a mutex. This
-instruction also serializes memory operations to the program order. */
-UNIV_INLINE
-void
-mutex_reset_lock_word(
-/*==================*/
-	ib_mutex_t*	mutex)	/*!< in: mutex */
-{
-#if defined(HAVE_ATOMIC_BUILTINS)
-	os_atomic_clear(&mutex->lock_word);
-#else
-	mutex->lock_word = 0;
-
-	os_fast_mutex_unlock(&(mutex->os_fast_mutex));
-#endif /* HAVE_ATOMIC_BUILTINS */
-}
-
-/******************************************************************//**
-Gets the value of the lock word. */
-UNIV_INLINE
-lock_word_t
-mutex_get_lock_word(
-/*================*/
-	const ib_mutex_t*	mutex)	/*!< in: mutex */
-{
-	ut_ad(mutex);
-
-	return(mutex->lock_word);
-}
-
-/******************************************************************//**
-Gets the waiters field in a mutex.
-@return	value to set */
-UNIV_INLINE
-ulint
-mutex_get_waiters(
-/*==============*/
-	const ib_mutex_t*	mutex)	/*!< in: mutex */
-{
-	const volatile ulint*	ptr;	/*!< declared volatile to ensure that
-					the value is read from memory */
-	ut_ad(mutex);
-
-	ptr = &(mutex->waiters);
-
-	return(*ptr);		/* Here we assume that the read of a single
-				word from memory is atomic */
-}
-
-/******************************************************************//**
-NOTE! Use the corresponding macro mutex_exit(), not directly this function!
-Unlocks a mutex owned by the current thread. */
-UNIV_INLINE
-void
-mutex_exit_func(
-/*============*/
-	ib_mutex_t*	mutex)	/*!< in: pointer to mutex */
-{
-	ut_ad(mutex_own(mutex));
-
-	mutex->thread_id = (os_thread_id_t) ULINT_UNDEFINED;
-
-#ifdef UNIV_SYNC_DEBUG
-	sync_thread_reset_level(mutex);
-#endif
-	mutex_reset_lock_word(mutex);
-
-	/* A problem: we assume that mutex_reset_lock word
-	is a memory barrier, that is when we read the waiters
-	field next, the read must be serialized in memory
-	after the reset. A speculative processor might
-	perform the read first, which could leave a waiting
-	thread hanging indefinitely.
-
-	Our current solution call every second
-	sync_arr_wake_threads_if_sema_free()
-	to wake up possible hanging threads if
-	they are missed in mutex_signal_object. */
-
-	/* We add a memory barrier to prevent reading of the
-	number of waiters before releasing the lock. */
-
-	os_mb;
-
-	if (mutex_get_waiters(mutex) != 0) {
-
-		mutex_signal_object(mutex);
-	}
-
-#ifdef UNIV_SYNC_PERF_STAT
-	mutex_exit_count++;
-#endif
-}
-
-/******************************************************************//**
-Locks a mutex for the current thread. If the mutex is reserved, the function
-spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting for the mutex
-before suspending the thread. */
-UNIV_INLINE
-void
-mutex_enter_func(
-/*=============*/
-	ib_mutex_t*	mutex,		/*!< in: pointer to mutex */
-	const char*	file_name,	/*!< in: file name where locked */
-	ulint		line)		/*!< in: line where locked */
-{
-	ut_ad(mutex_validate(mutex));
-#ifndef WITH_WSREP
-	/* this cannot be be granted when BF trx kills a trx in lock wait state */
-	ut_ad(!mutex_own(mutex));
-#endif /* WITH_WSREP */
-
-	/* Note that we do not peek at the value of lock_word before trying
-	the atomic test_and_set; we could peek, and possibly save time. */
-
-	if (!ib_mutex_test_and_set(mutex)) {
-		mutex->thread_id = os_thread_get_curr_id();
-#ifdef UNIV_SYNC_DEBUG
-		mutex_set_debug_info(mutex, file_name, line);
-#endif
-		if (srv_instrument_semaphores) {
-			mutex->file_name = file_name;
-			mutex->line = line;
-		}
-
-		return;	/* Succeeded! */
-	}
-
-	mutex_spin_wait(mutex, file_name, line);
-}
-
-#ifdef UNIV_PFS_MUTEX
-/******************************************************************//**
-NOTE! Please use the corresponding macro mutex_enter(), not directly
-this function!
-This is a performance schema instrumented wrapper function for
-mutex_enter_func(). */
-UNIV_INLINE
-void
-pfs_mutex_enter_func(
-/*=================*/
-	ib_mutex_t*	mutex,	/*!< in: pointer to mutex */
-	const char*	file_name,	/*!< in: file name where locked */
-	ulint		line)		/*!< in: line where locked */
-{
-	if (mutex->pfs_psi != NULL) {
-		PSI_mutex_locker*	locker;
-		PSI_mutex_locker_state	state;
-
-		locker = PSI_MUTEX_CALL(start_mutex_wait)(
-			&state, mutex->pfs_psi,
-			PSI_MUTEX_LOCK, file_name,
-			static_cast<uint>(line));
-
-		mutex_enter_func(mutex, file_name, line);
-
-		if (locker != NULL) {
-			PSI_MUTEX_CALL(end_mutex_wait)(locker, 0);
-		}
-	} else {
-		mutex_enter_func(mutex, file_name, line);
-	}
-}
-
-/********************************************************************//**
-NOTE! Please use the corresponding macro mutex_enter_nowait(), not directly
-this function!
-This is a performance schema instrumented wrapper function for
-mutex_enter_nowait_func.
-@return 0 if succeed, 1 if not */
-UNIV_INLINE
-ulint
-pfs_mutex_enter_nowait_func(
-/*========================*/
-	ib_mutex_t*	mutex,		/*!< in: pointer to mutex */
-	const char*	file_name,	/*!< in: file name where mutex
-					requested */
-	ulint		line)		/*!< in: line where requested */
-{
-	ulint		ret;
-
-	if (mutex->pfs_psi != NULL) {
-		PSI_mutex_locker*	locker;
-		PSI_mutex_locker_state		state;
-
-		locker = PSI_MUTEX_CALL(start_mutex_wait)(
-			&state, mutex->pfs_psi,
-			PSI_MUTEX_TRYLOCK, file_name,
-			static_cast<uint>(line));
-
-		ret = mutex_enter_nowait_func(mutex, file_name, line);
-
-		if (locker != NULL) {
-			PSI_MUTEX_CALL(end_mutex_wait)(locker, (int) ret);
-		}
-	} else {
-		ret = mutex_enter_nowait_func(mutex, file_name, line);
-	}
-
-	return(ret);
-}
-
-/******************************************************************//**
-NOTE! Please use the corresponding macro mutex_exit(), not directly
-this function!
-A wrap function of mutex_exit_func() with performance schema instrumentation.
-Unlocks a mutex owned by the current thread. */
-UNIV_INLINE
-void
-pfs_mutex_exit_func(
-/*================*/
-	ib_mutex_t*	mutex)	/*!< in: pointer to mutex */
-{
-	if (mutex->pfs_psi != NULL) {
-		PSI_MUTEX_CALL(unlock_mutex)(mutex->pfs_psi);
-	}
-
-	mutex_exit_func(mutex);
-}
-
-/******************************************************************//**
-NOTE! Please use the corresponding macro mutex_create(), not directly
-this function!
-A wrapper function for mutex_create_func(), registers the mutex
-with performance schema if "UNIV_PFS_MUTEX" is defined when
-creating the mutex */
-UNIV_INLINE
-void
-pfs_mutex_create_func(
-/*==================*/
-	mysql_pfs_key_t	key,		/*!< in: Performance Schema key */
-	ib_mutex_t*	mutex,		/*!< in: pointer to memory */
-	const char*	cmutex_name,	/*!< in: mutex name */
-# ifdef UNIV_DEBUG
-#  ifdef UNIV_SYNC_DEBUG
-	ulint		level,		/*!< in: level */
-#  endif /* UNIV_SYNC_DEBUG */
-# endif /* UNIV_DEBUG */
-	const char*	cfile_name,	/*!< in: file name where created */
-	ulint		cline)		/*!< in: file line where created */
-{
-	mutex->pfs_psi = PSI_MUTEX_CALL(init_mutex)(key, mutex);
-
-	mutex_create_func(mutex,
-			  cmutex_name,
-# ifdef UNIV_DEBUG
-#  ifdef UNIV_SYNC_DEBUG
-			  level,
-#  endif /* UNIV_SYNC_DEBUG */
-# endif /* UNIV_DEBUG */
-			  cfile_name,
-			  cline);
-}
-
-/******************************************************************//**
-NOTE! Please use the corresponding macro mutex_free(), not directly
-this function!
-Wrapper function for mutex_free_func(). Also destroys the performance
-schema probes when freeing the mutex */
-UNIV_INLINE
-void
-pfs_mutex_free_func(
-/*================*/
-	ib_mutex_t*	mutex)	/*!< in: mutex */
-{
-	if (mutex->pfs_psi != NULL) {
-		PSI_MUTEX_CALL(destroy_mutex)(mutex->pfs_psi);
-		mutex->pfs_psi = NULL;
-	}
-
-	mutex_free_func(mutex);
-}
-
-#endif /* UNIV_PFS_MUTEX */
-
-#ifndef HAVE_ATOMIC_BUILTINS
-/**********************************************************//**
-Function that uses a mutex to decrement a variable atomically */
-UNIV_INLINE
-void
-os_atomic_dec_ulint_func(
-/*=====================*/
-	ib_mutex_t*	mutex,		/*!< in: mutex guarding the dec */
-	volatile ulint*	var,		/*!< in/out: variable to decrement */
-	ulint		delta)		/*!< in: delta to decrement */
-{
-	mutex_enter(mutex);
-
-	/* I don't think we will encounter a situation where
-	this check will not be required. */
-	ut_ad(*var >= delta);
-
-	*var -= delta;
-
-	mutex_exit(mutex);
-}
-
-/**********************************************************//**
-Function that uses a mutex to increment a variable atomically */
-UNIV_INLINE
-void
-os_atomic_inc_ulint_func(
-/*=====================*/
-	ib_mutex_t*	mutex,		/*!< in: mutex guarding the increment */
-	volatile ulint*	var,		/*!< in/out: variable to increment */
-	ulint		delta)		/*!< in: delta to increment */
-{
-	mutex_enter(mutex);
-
-	*var += delta;
-
-	mutex_exit(mutex);
-}
-#endif /* !HAVE_ATOMIC_BUILTINS */
diff --git a/storage/innobase/include/sync0types.h b/storage/innobase/include/sync0types.h
index 0d143004a7a..0036135c1f0 100644
--- a/storage/innobase/include/sync0types.h
+++ b/storage/innobase/include/sync0types.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -26,6 +26,1239 @@ Created 9/5/1995 Heikki Tuuri
 #ifndef sync0types_h
 #define sync0types_h
 
-struct ib_mutex_t;
+#include <vector>
+#include <iostream>
 
+#include "ut0new.h"
+#include "ut0counter.h"
+
+#if defined(UNIV_DEBUG) && !defined(UNIV_INNOCHECKSUM)
+/** Set when InnoDB has invoked exit(). */
+extern bool	innodb_calling_exit;
+#endif /* UNIV_DEBUG && !UNIV_INNOCHECKSUM */
+
+#ifdef _WIN32
+/** Native mutex */
+typedef CRITICAL_SECTION	sys_mutex_t;
+#else
+/** Native mutex */
+typedef pthread_mutex_t		sys_mutex_t;
+#endif /* _WIN32 */
+
+/** The new (C++11) syntax allows the following and we should use it when it
+is available on platforms that we support.
+
+	enum class mutex_state_t : lock_word_t { ... };
+*/
+
+/** Mutex states. */
+enum mutex_state_t {
+	/** Mutex is free */
+	MUTEX_STATE_UNLOCKED = 0,
+
+	/** Mutex is acquired by some thread. */
+	MUTEX_STATE_LOCKED = 1,
+
+	/** Mutex is contended and there are threads waiting on the lock. */
+	MUTEX_STATE_WAITERS = 2
+};
+
+/*
+		LATCHING ORDER WITHIN THE DATABASE
+		==================================
+
+The mutex or latch in the central memory object, for instance, a rollback
+segment object, must be acquired before acquiring the latch or latches to
+the corresponding file data structure. In the latching order below, these
+file page object latches are placed immediately below the corresponding
+central memory object latch or mutex.
+
+Synchronization object			Notes
+----------------------			-----
+
+Dictionary mutex			If we have a pointer to a dictionary
+|					object, e.g., a table, it can be
+|					accessed without reserving the
+|					dictionary mutex. We must have a
+|					reservation, a memoryfix, to the
+|					appropriate table object in this case,
+|					and the table must be explicitly
+|					released later.
+V
+Dictionary header
+|
+V
+Secondary index tree latch		The tree latch protects also all
+|					the B-tree non-leaf pages. These
+V					can be read with the page only
+Secondary index non-leaf		bufferfixed to save CPU time,
+|					no s-latch is needed on the page.
+|					Modification of a page requires an
+|					x-latch on the page, however. If a
+|					thread owns an x-latch to the tree,
+|					it is allowed to latch non-leaf pages
+|					even after it has acquired the fsp
+|					latch.
+V
+Secondary index leaf			The latch on the secondary index leaf
+|					can be kept while accessing the
+|					clustered index, to save CPU time.
+V
+Clustered index tree latch		To increase concurrency, the tree
+|					latch is usually released when the
+|					leaf page latch has been acquired.
+V
+Clustered index non-leaf
+|
+V
+Clustered index leaf
+|
+V
+Transaction system header
+|
+V
+Transaction undo mutex			The undo log entry must be written
+|					before any index page is modified.
+|					Transaction undo mutex is for the undo
+|					logs the analogue of the tree latch
+|					for a B-tree. If a thread has the
+|					trx undo mutex reserved, it is allowed
+|					to latch the undo log pages in any
+|					order, and also after it has acquired
+|					the fsp latch.
+V
+Rollback segment mutex			The rollback segment mutex must be
+|					reserved, if, e.g., a new page must
+|					be added to an undo log. The rollback
+|					segment and the undo logs in its
+|					history list can be seen as an
+|					analogue of a B-tree, and the latches
+|					reserved similarly, using a version of
+|					lock-coupling. If an undo log must be
+|					extended by a page when inserting an
+|					undo log record, this corresponds to
+|					a pessimistic insert in a B-tree.
+V
+Rollback segment header
+|
+V
+Purge system latch
+|
+V
+Undo log pages				If a thread owns the trx undo mutex,
+|					or for a log in the history list, the
+|					rseg mutex, it is allowed to latch
+|					undo log pages in any order, and even
+|					after it has acquired the fsp latch.
+|					If a thread does not have the
+|					appropriate mutex, it is allowed to
+|					latch only a single undo log page in
+|					a mini-transaction.
+V
+File space management latch		If a mini-transaction must allocate
+|					several file pages, it can do that,
+|					because it keeps the x-latch to the
+|					file space management in its memo.
+V
+File system pages
+|
+V
+lock_sys_wait_mutex			Mutex protecting lock timeout data
+|
+V
+lock_sys_mutex				Mutex protecting lock_sys_t
+|
+V
+trx_sys->mutex				Mutex protecting trx_sys_t
+|
+V
+Threads mutex				Background thread scheduling mutex
+|
+V
+query_thr_mutex				Mutex protecting query threads
+|
+V
+trx_mutex				Mutex protecting trx_t fields
+|
+V
+Search system mutex
+|
+V
+Buffer pool mutex
+|
+V
+Log mutex
+|
+Any other latch
+|
+V
+Memory pool mutex */
+
+/** Latching order levels. If you modify these, you have to also update
+LatchDebug internals in sync0debug.cc */
+
+enum latch_level_t {
+	SYNC_UNKNOWN = 0,
+
+	SYNC_MUTEX = 1,
+
+	RW_LOCK_SX,
+	RW_LOCK_X_WAIT,
+	RW_LOCK_S,
+	RW_LOCK_X,
+	RW_LOCK_NOT_LOCKED,
+
+	SYNC_MONITOR_MUTEX,
+
+	SYNC_ANY_LATCH,
+
+	SYNC_DOUBLEWRITE,
+
+	SYNC_BUF_FLUSH_LIST,
+
+	SYNC_BUF_BLOCK,
+	SYNC_BUF_PAGE_HASH,
+
+	SYNC_BUF_POOL,
+
+	SYNC_POOL,
+	SYNC_POOL_MANAGER,
+
+	SYNC_SEARCH_SYS,
+
+	SYNC_WORK_QUEUE,
+
+	SYNC_FTS_TOKENIZE,
+	SYNC_FTS_OPTIMIZE,
+	SYNC_FTS_BG_THREADS,
+	SYNC_FTS_CACHE_INIT,
+	SYNC_RECV,
+	SYNC_LOG_FLUSH_ORDER,
+	SYNC_LOG,
+	SYNC_LOG_WRITE,
+	SYNC_PAGE_CLEANER,
+	SYNC_PURGE_QUEUE,
+	SYNC_TRX_SYS_HEADER,
+	SYNC_REC_LOCK,
+	SYNC_THREADS,
+	SYNC_TRX,
+	SYNC_TRX_SYS,
+	SYNC_LOCK_SYS,
+	SYNC_LOCK_WAIT_SYS,
+
+	SYNC_INDEX_ONLINE_LOG,
+
+	SYNC_IBUF_BITMAP,
+	SYNC_IBUF_BITMAP_MUTEX,
+	SYNC_IBUF_TREE_NODE,
+	SYNC_IBUF_TREE_NODE_NEW,
+	SYNC_IBUF_INDEX_TREE,
+
+	SYNC_IBUF_MUTEX,
+
+	SYNC_FSP_PAGE,
+	SYNC_FSP,
+	SYNC_EXTERN_STORAGE,
+	SYNC_TRX_UNDO_PAGE,
+	SYNC_RSEG_HEADER,
+	SYNC_RSEG_HEADER_NEW,
+	SYNC_NOREDO_RSEG,
+	SYNC_REDO_RSEG,
+	SYNC_TRX_UNDO,
+	SYNC_PURGE_LATCH,
+	SYNC_TREE_NODE,
+	SYNC_TREE_NODE_FROM_HASH,
+	SYNC_TREE_NODE_NEW,
+	SYNC_INDEX_TREE,
+
+	SYNC_IBUF_PESS_INSERT_MUTEX,
+	SYNC_IBUF_HEADER,
+	SYNC_DICT_HEADER,
+	SYNC_STATS_AUTO_RECALC,
+	SYNC_DICT_AUTOINC_MUTEX,
+	SYNC_DICT,
+	SYNC_FTS_CACHE,
+
+	SYNC_DICT_OPERATION,
+
+	SYNC_FILE_FORMAT_TAG,
+
+	SYNC_TRX_I_S_LAST_READ,
+
+	SYNC_TRX_I_S_RWLOCK,
+
+	SYNC_RECV_WRITER,
+
+	/** Level is varying. Only used with buffer pool page locks, which
+	do not have a fixed level, but instead have their level set after
+	the page is locked; see e.g.  ibuf_bitmap_get_map_page(). */
+
+	SYNC_LEVEL_VARYING,
+
+	/** This can be used to suppress order checking. */
+	SYNC_NO_ORDER_CHECK,
+
+	/** Maximum level value */
+	SYNC_LEVEL_MAX = SYNC_NO_ORDER_CHECK
+};
+
+/** Each latch has an ID. This id is used for creating the latch and to look
+up its meta-data. See sync0debug.c. */
+enum latch_id_t {
+	LATCH_ID_NONE = 0,
+	LATCH_ID_AUTOINC,
+	LATCH_ID_BUF_BLOCK_MUTEX,
+	LATCH_ID_BUF_POOL,
+	LATCH_ID_BUF_POOL_ZIP,
+	LATCH_ID_CACHE_LAST_READ,
+	LATCH_ID_DICT_FOREIGN_ERR,
+	LATCH_ID_DICT_SYS,
+	LATCH_ID_FILE_FORMAT_MAX,
+	LATCH_ID_FIL_SYSTEM,
+	LATCH_ID_FLUSH_LIST,
+	LATCH_ID_FTS_BG_THREADS,
+	LATCH_ID_FTS_DELETE,
+	LATCH_ID_FTS_OPTIMIZE,
+	LATCH_ID_FTS_DOC_ID,
+	LATCH_ID_FTS_PLL_TOKENIZE,
+	LATCH_ID_HASH_TABLE_MUTEX,
+	LATCH_ID_IBUF_BITMAP,
+	LATCH_ID_IBUF,
+	LATCH_ID_IBUF_PESSIMISTIC_INSERT,
+	LATCH_ID_LOG_SYS,
+	LATCH_ID_LOG_WRITE,
+	LATCH_ID_LOG_FLUSH_ORDER,
+	LATCH_ID_LIST,
+	LATCH_ID_MUTEX_LIST,
+	LATCH_ID_PAGE_CLEANER,
+	LATCH_ID_PURGE_SYS_PQ,
+	LATCH_ID_RECALC_POOL,
+	LATCH_ID_RECV_SYS,
+	LATCH_ID_RECV_WRITER,
+	LATCH_ID_REDO_RSEG,
+	LATCH_ID_NOREDO_RSEG,
+	LATCH_ID_RW_LOCK_DEBUG,
+	LATCH_ID_RTR_SSN_MUTEX,
+	LATCH_ID_RTR_ACTIVE_MUTEX,
+	LATCH_ID_RTR_MATCH_MUTEX,
+	LATCH_ID_RTR_PATH_MUTEX,
+	LATCH_ID_RW_LOCK_LIST,
+	LATCH_ID_RW_LOCK_MUTEX,
+	LATCH_ID_SRV_DICT_TMPFILE,
+	LATCH_ID_SRV_INNODB_MONITOR,
+	LATCH_ID_SRV_MISC_TMPFILE,
+	LATCH_ID_SRV_MONITOR_FILE,
+	LATCH_ID_SYNC_THREAD,
+	LATCH_ID_BUF_DBLWR,
+	LATCH_ID_TRX_UNDO,
+	LATCH_ID_TRX_POOL,
+	LATCH_ID_TRX_POOL_MANAGER,
+	LATCH_ID_TRX,
+	LATCH_ID_LOCK_SYS,
+	LATCH_ID_LOCK_SYS_WAIT,
+	LATCH_ID_TRX_SYS,
+	LATCH_ID_SRV_SYS,
+	LATCH_ID_SRV_SYS_TASKS,
+	LATCH_ID_PAGE_ZIP_STAT_PER_INDEX,
+	LATCH_ID_EVENT_MANAGER,
+	LATCH_ID_EVENT_MUTEX,
+	LATCH_ID_SYNC_ARRAY_MUTEX,
+	LATCH_ID_THREAD_MUTEX,
+	LATCH_ID_ZIP_PAD_MUTEX,
+	LATCH_ID_OS_AIO_READ_MUTEX,
+	LATCH_ID_OS_AIO_WRITE_MUTEX,
+	LATCH_ID_OS_AIO_LOG_MUTEX,
+	LATCH_ID_OS_AIO_IBUF_MUTEX,
+	LATCH_ID_OS_AIO_SYNC_MUTEX,
+	LATCH_ID_ROW_DROP_LIST,
+	LATCH_ID_INDEX_ONLINE_LOG,
+	LATCH_ID_WORK_QUEUE,
+	LATCH_ID_BTR_SEARCH,
+	LATCH_ID_BUF_BLOCK_LOCK,
+	LATCH_ID_BUF_BLOCK_DEBUG,
+	LATCH_ID_DICT_OPERATION,
+	LATCH_ID_CHECKPOINT,
+	LATCH_ID_FIL_SPACE,
+	LATCH_ID_FTS_CACHE,
+	LATCH_ID_FTS_CACHE_INIT,
+	LATCH_ID_TRX_I_S_CACHE,
+	LATCH_ID_TRX_PURGE,
+	LATCH_ID_IBUF_INDEX_TREE,
+	LATCH_ID_INDEX_TREE,
+	LATCH_ID_DICT_TABLE_STATS,
+	LATCH_ID_HASH_TABLE_RW_LOCK,
+	LATCH_ID_BUF_CHUNK_MAP_LATCH,
+	LATCH_ID_SYNC_DEBUG_MUTEX,
+	LATCH_ID_MASTER_KEY_ID_MUTEX,
+	LATCH_ID_SCRUB_STAT_MUTEX,
+	LATCH_ID_DEFRAGMENT_MUTEX,
+	LATCH_ID_BTR_DEFRAGMENT_MUTEX,
+	LATCH_ID_MTFLUSH_THREAD_MUTEX,
+	LATCH_ID_MTFLUSH_MUTEX,
+	LATCH_ID_FIL_CRYPT_MUTEX,
+	LATCH_ID_FIL_CRYPT_STAT_MUTEX,
+	LATCH_ID_FIL_CRYPT_DATA_MUTEX,
+	LATCH_ID_FIL_CRYPT_THREADS_MUTEX,
+	LATCH_ID_TEST_MUTEX,
+	LATCH_ID_MAX = LATCH_ID_TEST_MUTEX
+};
+
+#ifndef UNIV_INNOCHECKSUM
+/** OS mutex, without any policy. It is a thin wrapper around the
+system mutexes. The interface is different from the policy mutexes,
+to ensure that it is called directly and not confused with the
+policy mutexes. */
+struct OSMutex {
+
+	/** Constructor */
+	OSMutex()
+		UNIV_NOTHROW
+	{
+		ut_d(m_freed = true);
+	}
+
+	/** Create the mutex by calling the system functions. */
+	void init()
+		UNIV_NOTHROW
+	{
+		ut_ad(m_freed);
+
+#ifdef _WIN32
+		InitializeCriticalSection((LPCRITICAL_SECTION) &m_mutex);
+#else
+		{
+			int	ret = pthread_mutex_init(&m_mutex, NULL);
+			ut_a(ret == 0);
+		}
+#endif /* _WIN32 */
+
+		ut_d(m_freed = false);
+	}
+
+	/** Destructor */
+	~OSMutex() { }
+
+	/** Destroy the mutex */
+	void destroy()
+		UNIV_NOTHROW
+	{
+		ut_ad(innodb_calling_exit || !m_freed);
+#ifdef _WIN32
+		DeleteCriticalSection((LPCRITICAL_SECTION) &m_mutex);
+#else
+		int	ret;
+
+		ret = pthread_mutex_destroy(&m_mutex);
+
+		if (ret != 0) {
+
+			ib::error()
+				<< "Return value " << ret << " when calling "
+				<< "pthread_mutex_destroy().";
+		}
+#endif /* _WIN32 */
+		ut_d(m_freed = true);
+	}
+
+	/** Release the mutex. */
+	void exit()
+		UNIV_NOTHROW
+	{
+		ut_ad(innodb_calling_exit || !m_freed);
+#ifdef _WIN32
+		LeaveCriticalSection(&m_mutex);
+#else
+		int	ret = pthread_mutex_unlock(&m_mutex);
+		ut_a(ret == 0);
+#endif /* _WIN32 */
+	}
+
+	/** Acquire the mutex. */
+	void enter()
+		UNIV_NOTHROW
+	{
+		ut_ad(innodb_calling_exit || !m_freed);
+#ifdef _WIN32
+		EnterCriticalSection((LPCRITICAL_SECTION) &m_mutex);
+#else
+		int	ret = pthread_mutex_lock(&m_mutex);
+		ut_a(ret == 0);
+#endif /* _WIN32 */
+	}
+
+	/** @return true if locking succeeded */
+	bool try_lock()
+		UNIV_NOTHROW
+	{
+		ut_ad(innodb_calling_exit || !m_freed);
+#ifdef _WIN32
+		return(TryEnterCriticalSection(&m_mutex) != 0);
+#else
+		return(pthread_mutex_trylock(&m_mutex) == 0);
+#endif /* _WIN32 */
+	}
+
+	/** Required for os_event_t */
+	operator sys_mutex_t*()
+		UNIV_NOTHROW
+	{
+		return(&m_mutex);
+	}
+
+private:
+#ifdef UNIV_DEBUG
+	/** true if the mutex has been freed/destroyed. */
+	bool			m_freed;
+#endif /* UNIV_DEBUG */
+
+	sys_mutex_t		m_mutex;
+};
+
+#ifdef UNIV_PFS_MUTEX
+/** Latch element
+@param[in]	id		Latch id
+@param[in]	level		Latch level
+@param[in]	key		PFS key */
+# define LATCH_ADD(id, level, key)	latch_meta[LATCH_ID_ ## id] =	\
+	UT_NEW_NOKEY(latch_meta_t(LATCH_ID_ ## id, #id, level, #level, key))
+#else
+# define LATCH_ADD(id, level, key)	latch_meta[LATCH_ID_ ## id] =	\
+	UT_NEW_NOKEY(latch_meta_t(LATCH_ID_ ## id, #id, level, #level))
+#endif /* UNIV_PFS_MUTEX */
+
+/** Default latch counter */
+class LatchCounter {
+
+public:
+	/** The counts we collect for a mutex */
+	struct Count {
+
+		/** Constructor */
+		Count()
+			UNIV_NOTHROW
+			:
+			m_spins(),
+			m_waits(),
+			m_calls(),
+			m_enabled()
+		{
+			/* No op */
+		}
+
+		/** Rest the values to zero */
+		void reset()
+			UNIV_NOTHROW
+		{
+			m_spins = 0;
+			m_waits = 0;
+			m_calls = 0;
+		}
+
+		/** Number of spins trying to acquire the latch. */
+		uint32_t	m_spins;
+
+		/** Number of waits trying to acquire the latch */
+		uint32_t	m_waits;
+
+		/** Number of times it was called */
+		uint32_t	m_calls;
+
+		/** true if enabled */
+		bool		m_enabled;
+	};
+
+	/** Constructor */
+	LatchCounter()
+		UNIV_NOTHROW
+		:
+		m_active(false)
+	{
+		m_mutex.init();
+	}
+
+	/** Destructor */
+	~LatchCounter()
+		UNIV_NOTHROW
+	{
+		m_mutex.destroy();
+
+		for (Counters::iterator it = m_counters.begin();
+		     it != m_counters.end();
+		     ++it) {
+
+			Count*	count = *it;
+
+			UT_DELETE(count);
+		}
+	}
+
+	/** Reset all counters to zero. It is not protected by any
+	mutex and we don't care about atomicity. Unless it is a
+	demonstrated problem. The information collected is not
+	required for the correct functioning of the server. */
+	void reset()
+		UNIV_NOTHROW
+	{
+		m_mutex.enter();
+
+		Counters::iterator	end = m_counters.end();
+
+		for (Counters::iterator it = m_counters.begin();
+		     it != end;
+		     ++it) {
+
+			(*it)->reset();
+		}
+
+		m_mutex.exit();
+	}
+
+	/** @return the aggregate counter */
+	Count* sum_register()
+		UNIV_NOTHROW
+	{
+		m_mutex.enter();
+
+		Count*	count;
+
+		if (m_counters.empty()) {
+			count = UT_NEW_NOKEY(Count());
+			m_counters.push_back(count);
+		} else {
+			ut_a(m_counters.size() == 1);
+			count = m_counters[0];
+		}
+
+		m_mutex.exit();
+
+		return(count);
+	}
+
+	/** Deregister the count. We don't do anything
+	@param[in]	count		The count instance to deregister */
+	void sum_deregister(Count* count)
+		UNIV_NOTHROW
+	{
+		/* Do nothing */
+	}
+
+	/** Register a single instance counter */
+	void single_register(Count* count)
+		UNIV_NOTHROW
+	{
+		m_mutex.enter();
+
+		m_counters.push_back(count);
+
+		m_mutex.exit();
+	}
+
+	/** Deregister a single instance counter
+	@param[in]	count		The count instance to deregister */
+	void single_deregister(Count* count)
+		UNIV_NOTHROW
+	{
+		m_mutex.enter();
+
+		m_counters.erase(
+			std::remove(
+				m_counters.begin(),
+				m_counters.end(), count),
+			m_counters.end());
+
+		m_mutex.exit();
+	}
+
+	/** Iterate over the counters */
+	template <typename Callback>
+	void iterate(Callback& callback) const
+		UNIV_NOTHROW
+	{
+		Counters::const_iterator	end = m_counters.end();
+
+		for (Counters::const_iterator it = m_counters.begin();
+		     it != end;
+		     ++it) {
+
+			callback(*it);
+		}
+	}
+
+	/** Disable the monitoring */
+	void enable()
+		UNIV_NOTHROW
+	{
+		m_mutex.enter();
+
+		Counters::const_iterator	end = m_counters.end();
+
+		for (Counters::const_iterator it = m_counters.begin();
+		     it != end;
+		     ++it) {
+
+			(*it)->m_enabled = true;
+		}
+
+		m_active = true;
+
+		m_mutex.exit();
+	}
+
+	/** Disable the monitoring */
+	void disable()
+		UNIV_NOTHROW
+	{
+		m_mutex.enter();
+
+		Counters::const_iterator	end = m_counters.end();
+
+		for (Counters::const_iterator it = m_counters.begin();
+		     it != end;
+		     ++it) {
+
+			(*it)->m_enabled = false;
+		}
+
+		m_active = false;
+
+		m_mutex.exit();
+	}
+
+	/** @return if monitoring is active */
+	bool is_enabled() const
+		UNIV_NOTHROW
+	{
+		return(m_active);
+	}
+
+private:
+	/* Disable copying */
+	LatchCounter(const LatchCounter&);
+	LatchCounter& operator=(const LatchCounter&);
+
+private:
+	typedef OSMutex Mutex;
+	typedef std::vector<Count*> Counters;
+
+	/** Mutex protecting m_counters */
+	Mutex			m_mutex;
+
+	/** Counters for the latches */
+	Counters		m_counters;
+
+	/** if true then we collect the data */
+	bool			m_active;
+};
+
+/** Latch meta data */
+template <typename Counter = LatchCounter>
+class LatchMeta {
+
+public:
+	typedef Counter CounterType;
+
+#ifdef UNIV_PFS_MUTEX
+	typedef	mysql_pfs_key_t	pfs_key_t;
+#endif /* UNIV_PFS_MUTEX */
+
+	/** Constructor */
+	LatchMeta()
+		:
+		m_id(LATCH_ID_NONE),
+		m_name(),
+		m_level(SYNC_UNKNOWN),
+		m_level_name()
+#ifdef UNIV_PFS_MUTEX
+		,m_pfs_key()
+#endif /* UNIV_PFS_MUTEX */
+	{
+	}
+
+	/** Destructor */
+	~LatchMeta() { }
+
+	/** Constructor
+	@param[in]	id		Latch id
+	@param[in]	name		Latch name
+	@param[in]	level		Latch level
+	@param[in]	level_name	Latch level text representation
+	@param[in]	key		PFS key */
+	LatchMeta(
+		latch_id_t	id,
+		const char*	name,
+		latch_level_t	level,
+		const char*	level_name
+#ifdef UNIV_PFS_MUTEX
+		,pfs_key_t	key
+#endif /* UNIV_PFS_MUTEX */
+	      )
+		:
+		m_id(id),
+		m_name(name),
+		m_level(level),
+		m_level_name(level_name)
+#ifdef UNIV_PFS_MUTEX
+		,m_pfs_key(key)
+#endif /* UNIV_PFS_MUTEX */
+	{
+		/* No op */
+	}
+
+	/* Less than operator.
+	@param[in]	rhs		Instance to compare against
+	@return true if this.get_id() < rhs.get_id() */
+	bool operator<(const LatchMeta& rhs) const
+	{
+		return(get_id() < rhs.get_id());
+	}
+
+	/** @return the latch id */
+	latch_id_t get_id() const
+	{
+		return(m_id);
+	}
+
+	/** @return the latch name */
+	const char* get_name() const
+	{
+		return(m_name);
+	}
+
+	/** @return the latch level */
+	latch_level_t get_level() const
+	{
+		return(m_level);
+	}
+
+	/** @return the latch level name */
+	const char* get_level_name() const
+	{
+		return(m_level_name);
+	}
+
+#ifdef UNIV_PFS_MUTEX
+	/** @return the PFS key for the latch */
+	pfs_key_t get_pfs_key() const
+	{
+		return(m_pfs_key);
+	}
+#endif /* UNIV_PFS_MUTEX */
+
+	/** @return the counter instance */
+	Counter* get_counter()
+	{
+		return(&m_counter);
+	}
+
+private:
+	/** Latch id */
+	latch_id_t		m_id;
+
+	/** Latch name */
+	const char*		m_name;
+
+	/** Latch level in the ordering */
+	latch_level_t		m_level;
+
+	/** Latch level text representation */
+	const char*		m_level_name;
+
+#ifdef UNIV_PFS_MUTEX
+	/** PFS key */
+	pfs_key_t		m_pfs_key;
+#endif /* UNIV_PFS_MUTEX */
+
+	/** For gathering latch statistics */
+	Counter			m_counter;
+};
+
+typedef LatchMeta<LatchCounter> latch_meta_t;
+typedef std::vector<latch_meta_t*, ut_allocator<latch_meta_t*> > LatchMetaData;
+
+/** Note: This is accessed without any mutex protection. It is initialised
+at startup and elements should not be added to or removed from it after
+that.  See sync_latch_meta_init() */
+extern LatchMetaData	latch_meta;
+
+/** Get the latch meta-data from the latch ID
+@param[in]	id		Latch ID
+@return the latch meta data */
+inline
+latch_meta_t&
+sync_latch_get_meta(latch_id_t id)
+{
+	ut_ad(static_cast<size_t>(id) < latch_meta.size());
+	ut_ad(id == latch_meta[id]->get_id());
+
+	return(*latch_meta[id]);
+}
+
+/** Fetch the counter for the latch
+@param[in]	id		Latch ID
+@return the latch counter */
+inline
+latch_meta_t::CounterType*
+sync_latch_get_counter(latch_id_t id)
+{
+	latch_meta_t&	meta = sync_latch_get_meta(id);
+
+	return(meta.get_counter());
+}
+
+/** Get the latch name from the latch ID
+@param[in]	id		Latch ID
+@return the name, will assert if not found */
+inline
+const char*
+sync_latch_get_name(latch_id_t id)
+{
+	const latch_meta_t&	meta = sync_latch_get_meta(id);
+
+	return(meta.get_name());
+}
+
+/** Get the latch ordering level
+@param[in]	id		Latch id to lookup
+@return the latch level */
+inline
+latch_level_t
+sync_latch_get_level(latch_id_t id)
+{
+	const latch_meta_t&	meta = sync_latch_get_meta(id);
+
+	return(meta.get_level());
+}
+
+#ifdef HAVE_PSI_INTERFACE
+/** Get the latch PFS key from the latch ID
+@param[in]	id		Latch ID
+@return the PFS key */
+inline
+mysql_pfs_key_t
+sync_latch_get_pfs_key(latch_id_t id)
+{
+	const latch_meta_t&	meta = sync_latch_get_meta(id);
+
+	return(meta.get_pfs_key());
+}
 #endif
+
+/** String representation of the filename and line number where the
+latch was created
+@param[in]	id		Latch ID
+@param[in]	created		Filename and line number where it was crated
+@return the string representation */
+std::string
+sync_mutex_to_string(
+	latch_id_t		id,
+	const std::string&	created);
+
+/** Get the latch name from a sync level
+@param[in]	level		Latch level to lookup
+@return 0 if not found. */
+const char*
+sync_latch_get_name(latch_level_t level);
+
+/** Print the filename "basename"
+@return the basename */
+const char*
+sync_basename(const char* filename);
+
+/** Register a latch, called when it is created
+@param[in]	ptr		Latch instance that was created
+@param[in]	filename	Filename where it was created
+@param[in]	line		Line number in filename */
+void
+sync_file_created_register(
+	const void*	ptr,
+	const char*	filename,
+	uint16_t	line);
+
+/** Deregister a latch, called when it is destroyed
+@param[in]	ptr		Latch to be destroyed */
+void
+sync_file_created_deregister(const void* ptr);
+
+/** Get the string where the file was created. Its format is "name:line"
+@param[in]	ptr		Latch instance
+@return created information or "" if can't be found */
+std::string
+sync_file_created_get(const void* ptr);
+
+#ifdef UNIV_DEBUG
+
+/** All (ordered) latches, used in debugging, must derive from this class. */
+struct latch_t {
+
+	/** Constructor
+	@param[in]	id	The latch ID */
+	explicit latch_t(latch_id_t id = LATCH_ID_NONE)
+		UNIV_NOTHROW
+		:
+		m_id(id),
+		m_rw_lock(),
+		m_temp_fsp() { }
+
+	/** Destructor */
+	virtual ~latch_t() UNIV_NOTHROW { }
+
+	/** @return the latch ID */
+	latch_id_t get_id() const
+	{
+		return(m_id);
+	}
+
+	/** @return true if it is a rw-lock */
+	bool is_rw_lock() const
+		UNIV_NOTHROW
+	{
+		return(m_rw_lock);
+	}
+
+	/** Print the latch context
+	@return the string representation */
+	virtual std::string to_string() const = 0;
+
+	/** @return "filename:line" from where the latch was last locked */
+	virtual std::string locked_from() const = 0;
+
+	/** @return the latch level */
+	latch_level_t get_level() const
+		UNIV_NOTHROW
+	{
+		ut_a(m_id != LATCH_ID_NONE);
+
+		return(sync_latch_get_level(m_id));
+	}
+
+	/** @return true if the latch is for a temporary file space*/
+	bool is_temp_fsp() const
+		UNIV_NOTHROW
+	{
+		return(m_temp_fsp);
+	}
+
+	/** Set the temporary tablespace flag. The latch order constraints
+	are different for intrinsic tables. We don't always acquire the
+	index->lock. We need to figure out the context and add some special
+	rules during the checks. */
+	void set_temp_fsp()
+		UNIV_NOTHROW
+	{
+		ut_ad(get_id() == LATCH_ID_FIL_SPACE);
+		m_temp_fsp = true;
+	}
+
+	/** @return the latch name, m_id must be set  */
+	const char* get_name() const
+		UNIV_NOTHROW
+	{
+		ut_a(m_id != LATCH_ID_NONE);
+
+		return(sync_latch_get_name(m_id));
+	}
+
+	/** Latch ID */
+	latch_id_t	m_id;
+
+	/** true if it is a rw-lock. In debug mode, rw_lock_t derives from
+	this class and sets this variable. */
+	bool		m_rw_lock;
+
+	/** true if it is an temporary space latch */
+	bool		m_temp_fsp;
+};
+
+/** Subclass this to iterate over a thread's acquired latch levels. */
+struct sync_check_functor_t {
+	virtual ~sync_check_functor_t() { }
+	virtual bool operator()(const latch_level_t) = 0;
+	virtual bool result() const = 0;
+};
+
+/** Functor to check whether the calling thread owns the btr search mutex. */
+struct btrsea_sync_check : public sync_check_functor_t {
+
+	/** Constructor
+	@param[in]	has_search_latch	true if owns the latch */
+	explicit btrsea_sync_check(bool has_search_latch)
+		:
+		m_result(),
+		m_has_search_latch(has_search_latch) { }
+
+	/** Destructor */
+	virtual ~btrsea_sync_check() { }
+
+	/** Called for every latch owned by the calling thread.
+	@param[in]	level		Level of the existing latch
+	@return true if the predicate check is successful */
+	virtual bool operator()(const latch_level_t level)
+	{
+		/* If calling thread doesn't hold search latch then
+		check if there are latch level exception provided.
+
+		Note: Optimizer has added InnoDB intrinsic table as an
+		alternative to MyISAM intrinsic table. With this a new
+		control flow comes into existence, it is:
+
+		Server -> Plugin -> SE
+
+		Plugin in this case is I_S which is sharing the latch vector
+		of InnoDB and so there could be lock conflicts. Ideally
+		the Plugin should use a difference namespace latch vector
+		as it doesn't have any depedency with SE latching protocol.
+
+		Added check that will allow thread to hold I_S latches */
+
+		if (!m_has_search_latch
+		    && (level != SYNC_SEARCH_SYS
+			&& level != SYNC_FTS_CACHE
+			&& level != SYNC_TRX_I_S_RWLOCK
+			&& level != SYNC_TRX_I_S_LAST_READ)) {
+
+			m_result = true;
+
+			return(m_result);
+		}
+
+		return(false);
+	}
+
+	/** @return result from the check */
+	virtual bool result() const
+	{
+		return(m_result);
+	}
+
+private:
+	/** True if all OK */
+	bool		m_result;
+
+	/** If the caller owns the search latch */
+	const bool	m_has_search_latch;
+};
+
+/** Functor to check for dictionay latching constraints. */
+struct dict_sync_check : public sync_check_functor_t {
+
+	/** Constructor
+	@param[in]	dict_mutex_allow	true if the dict mutex
+						is allowed */
+	explicit dict_sync_check(bool dict_mutex_allowed)
+		:
+		m_result(),
+		m_dict_mutex_allowed(dict_mutex_allowed) { }
+
+	/** Destructor */
+	virtual ~dict_sync_check() { }
+
+	/** Check the latching constraints
+	@param[in]	level		The level held by the thread */
+	virtual bool operator()(const latch_level_t level)
+	{
+		if (!m_dict_mutex_allowed
+		    || (level != SYNC_DICT
+			&& level != SYNC_DICT_OPERATION
+			&& level != SYNC_FTS_CACHE
+			/* This only happens in recv_apply_hashed_log_recs. */
+			&& level != SYNC_RECV_WRITER
+			&& level != SYNC_NO_ORDER_CHECK)) {
+
+			m_result = true;
+
+			return(true);
+		}
+
+		return(false);
+	}
+
+	/** @return the result of the check */
+	virtual bool result() const
+	{
+		return(m_result);
+	}
+
+private:
+	/** True if all OK */
+	bool		m_result;
+
+	/** True if it is OK to hold the dict mutex */
+	const bool	m_dict_mutex_allowed;
+};
+
+/** Functor to check for given latching constraints. */
+struct sync_allowed_latches : public sync_check_functor_t {
+
+	/** Constructor
+	@param[in]	from	first element in an array of latch_level_t
+	@param[in]	to	last element in an array of latch_level_t */
+	sync_allowed_latches(
+		const latch_level_t*	from,
+		const latch_level_t*	to)
+		:
+		m_result(),
+		m_latches(from, to) { }
+
+	/** Checks whether the given latch_t violates the latch constraint.
+	This object maintains a list of allowed latch levels, and if the given
+	latch belongs to a latch level that is not there in the allowed list,
+	then it is a violation.
+
+	@param[in]	latch	The latch level to check
+	@return true if there is a latch ordering violation */
+	virtual bool operator()(const latch_level_t level)
+	{
+		for (latches_t::const_iterator it = m_latches.begin();
+		     it != m_latches.end();
+		     ++it) {
+
+			if (level == *it) {
+
+				m_result = false;
+
+				/* No violation */
+				return(false);
+			}
+		}
+
+		return(true);
+	}
+
+	/** @return the result of the check */
+	virtual bool result() const
+	{
+		return(m_result);
+	}
+
+private:
+	/** Save the result of validation check here
+	True if all OK */
+	bool		m_result;
+
+	typedef std::vector<latch_level_t, ut_allocator<latch_level_t> >
+		latches_t;
+
+	/** List of latch levels that are allowed to be held */
+	latches_t	m_latches;
+};
+
+/** Get the latch id from a latch name.
+@param[in]	id	Latch name
+@return LATCH_ID_NONE. */
+latch_id_t
+sync_latch_get_id(const char* name);
+
+typedef ulint rw_lock_flags_t;
+
+/* Flags to specify lock types for rw_lock_own_flagged() */
+enum rw_lock_flag_t {
+	RW_LOCK_FLAG_S  = 1 << 0,
+	RW_LOCK_FLAG_X  = 1 << 1,
+	RW_LOCK_FLAG_SX = 1 << 2
+};
+
+#endif /* UNIV_DBEUG */
+
+#endif /* UNIV_INNOCHECKSUM */
+
+#endif /* sync0types_h */
diff --git a/storage/innobase/include/trx0i_s.h b/storage/innobase/include/trx0i_s.h
index 662971a7841..f588d820743 100644
--- a/storage/innobase/include/trx0i_s.h
+++ b/storage/innobase/include/trx0i_s.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2007, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -31,7 +31,6 @@ Created July 17, 2007 Vasil Dimov
 #include "univ.i"
 #include "trx0types.h"
 #include "dict0types.h"
-#include "ut0ut.h"
 
 /** The maximum amount of memory that can be consumed by innodb_trx,
 innodb_locks and innodb_lock_waits information schema tables. */
@@ -134,13 +133,11 @@ struct i_s_trx_row_t {
 					in innodb_locks if trx
 					is waiting, or NULL */
 	ib_time_t	trx_wait_started; /*!< trx_t::wait_started */
-	ullint		trx_weight;	/*!< TRX_WEIGHT() */
+	uintmax_t	trx_weight;	/*!< TRX_WEIGHT() */
 	ulint		trx_mysql_thread_id; /*!< thd_get_thread_id() */
 	const char*	trx_query;	/*!< MySQL statement being
 					executed in the transaction */
-	struct charset_info_st*	trx_query_cs;
-					/*!< charset encode the MySQL
-					statement */
+	CHARSET_INFO*	trx_query_cs;	/*!< the charset of trx_query */
 	const char*	trx_operation_state; /*!< trx_t::op_info */
 	ulint		trx_tables_in_use;/*!< n_mysql_tables_in_use in
 					 trx_t */
@@ -153,7 +150,7 @@ struct i_s_trx_row_t {
 					/*!< mem_heap_get_size(
 					trx->lock_heap) */
 	ulint		trx_rows_locked;/*!< lock_number_of_rows_locked() */
-	ullint		trx_rows_modified;/*!< trx_t::undo_no */
+	uintmax_t	trx_rows_modified;/*!< trx_t::undo_no */
 	ulint		trx_concurrency_tickets;
 					/*!< n_tickets_to_enter_innodb in
 					trx_t */
@@ -167,8 +164,6 @@ struct i_s_trx_row_t {
 					/*!< detailed_error in trx_t */
 	ibool		trx_has_search_latch;
 					/*!< has_search_latch in trx_t */
-	ulint		trx_search_latch_timeout;
-					/*!< search_latch_timeout in trx_t */
 	ulint		trx_is_read_only;
 					/*!< trx_t::read_only */
 	ulint		trx_is_autocommit_non_locking;
@@ -200,14 +195,12 @@ extern trx_i_s_cache_t*	trx_i_s_cache;
 
 /*******************************************************************//**
 Initialize INFORMATION SCHEMA trx related cache. */
-UNIV_INTERN
 void
 trx_i_s_cache_init(
 /*===============*/
 	trx_i_s_cache_t*	cache);	/*!< out: cache to init */
 /*******************************************************************//**
 Free the INFORMATION SCHEMA trx related cache. */
-UNIV_INTERN
 void
 trx_i_s_cache_free(
 /*===============*/
@@ -215,7 +208,6 @@ trx_i_s_cache_free(
 
 /*******************************************************************//**
 Issue a shared/read lock on the tables cache. */
-UNIV_INTERN
 void
 trx_i_s_cache_start_read(
 /*=====================*/
@@ -223,7 +215,6 @@ trx_i_s_cache_start_read(
 
 /*******************************************************************//**
 Release a shared/read lock on the tables cache. */
-UNIV_INTERN
 void
 trx_i_s_cache_end_read(
 /*===================*/
@@ -231,7 +222,6 @@ trx_i_s_cache_end_read(
 
 /*******************************************************************//**
 Issue an exclusive/write lock on the tables cache. */
-UNIV_INTERN
 void
 trx_i_s_cache_start_write(
 /*======================*/
@@ -239,7 +229,6 @@ trx_i_s_cache_start_write(
 
 /*******************************************************************//**
 Release an exclusive/write lock on the tables cache. */
-UNIV_INTERN
 void
 trx_i_s_cache_end_write(
 /*====================*/
@@ -249,8 +238,7 @@ trx_i_s_cache_end_write(
 /*******************************************************************//**
 Retrieves the number of used rows in the cache for a given
 INFORMATION SCHEMA table.
-@return	number of rows */
-UNIV_INTERN
+@return number of rows */
 ulint
 trx_i_s_cache_get_rows_used(
 /*========================*/
@@ -260,8 +248,7 @@ trx_i_s_cache_get_rows_used(
 /*******************************************************************//**
 Retrieves the nth row in the cache for a given INFORMATION SCHEMA
 table.
-@return	row */
-UNIV_INTERN
+@return row */
 void*
 trx_i_s_cache_get_nth_row(
 /*======================*/
@@ -271,8 +258,7 @@ trx_i_s_cache_get_nth_row(
 
 /*******************************************************************//**
 Update the transactions cache if it has not been read for some time.
-@return	0 - fetched, 1 - not */
-UNIV_INTERN
+@return 0 - fetched, 1 - not */
 int
 trx_i_s_possibly_fetch_data_into_cache(
 /*===================================*/
@@ -281,13 +267,11 @@ trx_i_s_possibly_fetch_data_into_cache(
 /*******************************************************************//**
 Returns TRUE if the data in the cache is truncated due to the memory
 limit posed by TRX_I_S_MEM_LIMIT.
-@return	TRUE if truncated */
-UNIV_INTERN
+@return TRUE if truncated */
 ibool
 trx_i_s_cache_is_truncated(
 /*=======================*/
 	trx_i_s_cache_t*	cache);	/*!< in: cache */
-
 /** The maximum length of a resulting lock_id_size in
 trx_i_s_create_lock_id(), not including the terminating NUL.
 ":%lu:%lu:%lu" -> 63 chars */
@@ -298,8 +282,7 @@ Crafts a lock id string from a i_s_locks_row_t object. Returns its
 second argument. This function aborts if there is not enough space in
 lock_id. Be sure to provide at least TRX_I_S_LOCK_ID_MAX_LEN + 1 if you
 want to be 100% sure that it will not abort.
-@return	resulting lock id */
-UNIV_INTERN
+@return resulting lock id */
 char*
 trx_i_s_create_lock_id(
 /*===================*/
diff --git a/storage/innobase/include/trx0purge.h b/storage/innobase/include/trx0purge.h
index 1e13c883800..8917169dc94 100644
--- a/storage/innobase/include/trx0purge.h
+++ b/storage/innobase/include/trx0purge.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -34,6 +34,7 @@ Created 3/26/1996 Heikki Tuuri
 #include "page0page.h"
 #include "usr0sess.h"
 #include "fil0fil.h"
+#include "read0types.h"
 
 /** The global data structure coordinating a purge */
 extern trx_purge_t*	purge_sys;
@@ -45,7 +46,7 @@ extern trx_undo_rec_t	trx_purge_dummy_rec;
 /********************************************************************//**
 Calculates the file address of an undo log header when we have the file
 address of its history list node.
-@return	file address of the log */
+@return file address of the log */
 UNIV_INLINE
 fil_addr_t
 trx_purge_get_log_from_hist(
@@ -55,33 +56,34 @@ trx_purge_get_log_from_hist(
 /********************************************************************//**
 Creates the global purge system control structure and inits the history
 mutex. */
-UNIV_INTERN
 void
 trx_purge_sys_create(
 /*=================*/
 	ulint		n_purge_threads,/*!< in: number of purge threads */
-	ib_bh_t*	ib_bh);		/*!< in/own: UNDO log min binary heap*/
+	purge_pq_t*	purge_queue);	/*!< in/own: UNDO log min binary heap*/
 /********************************************************************//**
 Frees the global purge system control structure. */
-UNIV_INTERN
 void
 trx_purge_sys_close(void);
 /*======================*/
 /************************************************************************
 Adds the update undo log as the first log in the history list. Removes the
 update undo log segment from the rseg slot if it is too big for reuse. */
-UNIV_INTERN
 void
 trx_purge_add_update_undo_to_history(
 /*=================================*/
-	trx_t*	trx,		/*!< in: transaction */
-	page_t*	undo_page,	/*!< in: update undo log header page,
-				x-latched */
-	mtr_t*	mtr);		/*!< in: mtr */
+	trx_t*		trx,		/*!< in: transaction */
+	trx_undo_ptr_t*	undo_ptr,	/*!< in: update undo log. */
+	page_t*		undo_page,	/*!< in: update undo log header page,
+					x-latched */
+	bool		update_rseg_history_len,
+					/*!< in: if true: update rseg history
+					len else skip updating it. */
+	ulint		n_added_logs,	/*!< in: number of logs added */
+	mtr_t*		mtr);		/*!< in: mtr */
 /*******************************************************************//**
 This function runs a purge batch.
-@return	number of undo log pages handled in the batch */
-UNIV_INTERN
+@return number of undo log pages handled in the batch */
 ulint
 trx_purge(
 /*======*/
@@ -92,13 +94,11 @@ trx_purge(
 	bool	truncate);		/*!< in: truncate history if true */
 /*******************************************************************//**
 Stop purge and wait for it to stop, move to PURGE_STATE_STOP. */
-UNIV_INTERN
 void
 trx_purge_stop(void);
 /*================*/
 /*******************************************************************//**
 Resume purge, move to PURGE_STATE_RUN. */
-UNIV_INTERN
 void
 trx_purge_run(void);
 /*================*/
@@ -115,21 +115,275 @@ enum purge_state_t {
 /*******************************************************************//**
 Get the purge state.
 @return purge state. */
-UNIV_INTERN
 purge_state_t
 trx_purge_state(void);
 /*=================*/
 
+// Forward declaration
+struct TrxUndoRsegsIterator;
+
 /** This is the purge pointer/iterator. We need both the undo no and the
 transaction no up to which purge has parsed and applied the records. */
 struct purge_iter_t {
+	purge_iter_t()
+		:
+		trx_no(),
+		undo_no(),
+		undo_rseg_space(ULINT_UNDEFINED)
+	{
+		// Do nothing
+	}
+
 	trx_id_t	trx_no;		/*!< Purge has advanced past all
 					transactions whose number is less
 					than this */
 	undo_no_t	undo_no;	/*!< Purge has advanced past all records
 					whose undo number is less than this */
+	ulint		undo_rseg_space;
+					/*!< Last undo record resided in this
+					space id. */
 };
 
+
+/* Namespace to hold all the related functions and variables need for truncate
+of undo tablespace. */
+namespace undo {
+
+	typedef std::vector<ulint>		undo_spaces_t;
+	typedef	std::vector<trx_rseg_t*>	rseg_for_trunc_t;
+
+	/** Magic Number to indicate truncate action is complete. */
+	const ib_uint32_t			s_magic = 76845412;
+
+	/** Truncate Log file Prefix. */
+	const char* const			s_log_prefix = "undo_";
+
+	/** Truncate Log file Extension. */
+	const char* const			s_log_ext = "trunc.log";
+
+	/** Populate log file name based on space_id
+	@param[in]	space_id	id of the undo tablespace.
+	@return DB_SUCCESS or error code */
+	dberr_t populate_log_file_name(
+		ulint	space_id,
+		char*&	log_file_name);
+
+	/** Create the truncate log file.
+	@param[in]	space_id	id of the undo tablespace to truncate.
+	@return DB_SUCCESS or error code. */
+	dberr_t init(ulint space_id);
+
+	/** Mark completion of undo truncate action by writing magic number to
+	the log file and then removing it from the disk.
+	If we are going to remove it from disk then why write magic number ?
+	This is to safeguard from unlink (file-system) anomalies that will keep
+	the link to the file even after unlink action is successfull and
+	ref-count = 0.
+	@param[in]	space_id	id of the undo tablespace to truncate.*/
+	void done(ulint	space_id);
+
+	/** Check if TRUNCATE_DDL_LOG file exist.
+	@param[in]	space_id	id of the undo tablespace.
+	@return true if exist else false. */
+	bool is_log_present(ulint space_id);
+
+	/** Track UNDO tablespace mark for truncate. */
+	class Truncate {
+	public:
+
+		Truncate()
+			:
+			m_undo_for_trunc(ULINT_UNDEFINED),
+			m_rseg_for_trunc(),
+			m_scan_start(1),
+			m_purge_rseg_truncate_frequency(
+				static_cast<ulint>(
+				srv_purge_rseg_truncate_frequency))
+		{
+			/* Do Nothing. */
+		}
+
+		/** Clear the cached rollback segment. Normally done
+		when purge is about to shutdown. */
+		void clear()
+		{
+			reset();
+			rseg_for_trunc_t	temp;
+			m_rseg_for_trunc.swap(temp);
+		}
+
+		/** Is tablespace selected for truncate.
+		@return true if undo tablespace is marked for truncate */
+		bool is_marked() const
+		{
+			return(!(m_undo_for_trunc == ULINT_UNDEFINED));
+		}
+
+		/** Mark the tablespace for truncate.
+		@param[in]	undo_id		tablespace for truncate. */
+		void mark(ulint undo_id)
+		{
+			m_undo_for_trunc = undo_id;
+
+			m_scan_start = (undo_id + 1)
+					% (srv_undo_tablespaces_active + 1);
+			if (m_scan_start == 0) {
+				/* Note: UNDO tablespace ids starts from 1. */
+				m_scan_start = 1;
+			}
+
+			/* We found an UNDO-tablespace to truncate so set the
+			local purge rseg truncate frequency to 1. This will help
+			accelerate the purge action and in turn truncate. */
+			m_purge_rseg_truncate_frequency = 1;
+		}
+
+		/** Get the tablespace marked for truncate.
+		@return tablespace id marked for truncate. */
+		ulint get_marked_space_id() const
+		{
+			return(m_undo_for_trunc);
+		}
+
+		/** Add rseg to truncate vector.
+		@param[in,out]	rseg	rseg for truncate */
+		void add_rseg_to_trunc(trx_rseg_t* rseg)
+		{
+			m_rseg_for_trunc.push_back(rseg);
+		}
+
+		/** Get number of rsegs registered for truncate.
+		@return return number of rseg that belongs to tablespace mark
+		for truncate. */
+		ulint rsegs_size() const
+		{
+			return(m_rseg_for_trunc.size());
+		}
+
+		/** Get ith registered rseg.
+		@param[in]	id	index of rseg to get.
+		@return reference to registered rseg. */
+		trx_rseg_t* get_ith_rseg(ulint id)
+		{
+			ut_ad(id < m_rseg_for_trunc.size());
+			return(m_rseg_for_trunc.at(id));
+		}
+
+		/** Reset for next rseg truncate. */
+		void reset()
+		{
+			m_undo_for_trunc = ULINT_UNDEFINED;
+			m_rseg_for_trunc.clear();
+
+			/* Sync with global value as we are done with
+			truncate now. */
+			m_purge_rseg_truncate_frequency = static_cast<ulint>(
+				srv_purge_rseg_truncate_frequency);
+		}
+
+		/** Get the tablespace id to start scanning from.
+		@return	id of UNDO tablespace to start scanning from. */
+		ulint get_scan_start() const
+		{
+			return(m_scan_start);
+		}
+
+		/** Check if the tablespace needs fix-up (based on presence of
+		DDL truncate log)
+		@param	space_id	space id of the undo tablespace to check
+		@return true if fix up is needed else false */
+		bool needs_fix_up(ulint	space_id) const
+		{
+			return(is_log_present(space_id));
+		}
+
+		/** Add undo tablespace to truncate vector.
+		@param[in]	space_id	space id of tablespace to
+						truncate */
+		static void add_space_to_trunc_list(ulint space_id)
+		{
+			s_spaces_to_truncate.push_back(space_id);
+		}
+
+		/** Clear the truncate vector. */
+		static void clear_trunc_list()
+		{
+			s_spaces_to_truncate.clear();
+		}
+
+		/** Is tablespace marked for truncate.
+		@param[in]	space_id	space id to check
+		@return true if marked for truncate, else false. */
+		static bool is_tablespace_truncated(ulint space_id)
+		{
+			return(std::find(s_spaces_to_truncate.begin(),
+					 s_spaces_to_truncate.end(), space_id)
+			       != s_spaces_to_truncate.end());
+		}
+
+		/** Was a tablespace truncated at startup
+		@param[in]	space_id	space id to check
+		@return whether space_id was truncated at startup */
+		static bool was_tablespace_truncated(ulint space_id)
+		{
+			return(std::find(s_fix_up_spaces.begin(),
+					 s_fix_up_spaces.end(),
+					 space_id)
+			       != s_fix_up_spaces.end());
+		}
+
+		/** Get local rseg purge truncate frequency
+		@return rseg purge truncate frequency. */
+		ulint get_rseg_truncate_frequency() const
+		{
+			return(m_purge_rseg_truncate_frequency);
+		}
+
+		/* Start writing log information to a special file.
+		On successfull completion, file is removed.
+		On crash, file is used to complete the truncate action.
+		@param	space_id	space id of undo tablespace
+		@return DB_SUCCESS or error code. */
+		dberr_t start_logging(ulint space_id)
+		{
+			return(init(space_id));
+		}
+
+		/* Mark completion of logging./
+		@param	space_id	space id of undo tablespace */
+		void done_logging(ulint space_id)
+		{
+			return(done(space_id));
+		}
+
+	private:
+		/** UNDO tablespace is mark for truncate. */
+		ulint			m_undo_for_trunc;
+
+		/** rseg that resides in UNDO tablespace is marked for
+		truncate. */
+		rseg_for_trunc_t	m_rseg_for_trunc;
+
+		/** Start scanning for UNDO tablespace from this space_id.
+		This is to avoid bias selection of one tablespace always. */
+		ulint			m_scan_start;
+
+		/** Rollback segment(s) purge frequency. This is local
+		value maintained along with global value. It is set to global
+		value on start but when tablespace is marked for truncate it
+		is updated to 1 and then minimum value among 2 is used by
+		purge action. */
+		ulint			m_purge_rseg_truncate_frequency;
+
+		/** List of UNDO tablespace(s) to truncate. */
+		static undo_spaces_t	s_spaces_to_truncate;
+	public:
+		/** Undo tablespaces that were truncated at startup */
+		static undo_spaces_t	s_fix_up_spaces;
+	};	/* class Truncate */
+
+};	/* namespace undo */
+
 /** The control structure used in the purge operation */
 struct trx_purge_t{
 	sess_t*		sess;		/*!< System session running the purge
@@ -154,8 +408,9 @@ struct trx_purge_t{
 					without holding the latch. */
 	que_t*		query;		/*!< The query graph which will do the
 					parallelized purge operation */
-	read_view_t*	view;		/*!< The purge will not remove undo logs
+	ReadView	view;		/*!< The purge will not remove undo logs
 					which are >= this view (purge view) */
+	bool		view_active;	/*!< true if view is active */
 	volatile ulint	n_submitted;	/*!< Count of total tasks submitted
 					to the task queue */
 	volatile ulint	n_completed;	/*!< Count of total tasks completed */
@@ -194,15 +449,19 @@ struct trx_purge_t{
 	ulint		hdr_page_no;	/*!< Header page of the undo log where
 					the next record to purge belongs */
 	ulint		hdr_offset;	/*!< Header byte offset on the page */
-	/*-----------------------------*/
-	mem_heap_t*	heap;		/*!< Temporary storage used during a
-					purge: can be emptied after purge
-					completes */
-	/*-----------------------------*/
-	ib_bh_t*	ib_bh;		/*!< Binary min-heap, ordered on
-					rseg_queue_t::trx_no. It is protected
-					by the bh_mutex */
-	ib_mutex_t		bh_mutex;	/*!< Mutex protecting ib_bh */
+
+
+	TrxUndoRsegsIterator*
+			rseg_iter;	/*!< Iterator to get the next rseg
+					to process */
+
+	purge_pq_t*	purge_queue;	/*!< Binary min-heap, ordered on
+					TrxUndoRsegs::trx_no. It is protected
+					by the pq_mutex */
+	PQMutex		pq_mutex;	/*!< Mutex protecting purge_queue */
+
+	undo::Truncate	undo_trunc;	/*!< Track UNDO tablespace marked
+					for truncate. */
 };
 
 /** Info required to purge a record */
@@ -211,8 +470,40 @@ struct trx_purge_rec_t {
 	roll_ptr_t	roll_ptr;	/*!< File pointr to UNDO record */
 };
 
+/**
+Chooses the rollback segment with the smallest trx_no. */
+struct TrxUndoRsegsIterator {
+
+	/** Constructor */
+	TrxUndoRsegsIterator(trx_purge_t* purge_sys);
+
+	/** Sets the next rseg to purge in m_purge_sys.
+	@return page size of the table for which the log is.
+	NOTE: if rseg is NULL when this function returns this means that
+	there are no rollback segments to purge and then the returned page
+	size object should not be used. */
+	const page_size_t set_next();
+
+private:
+	// Disable copying
+	TrxUndoRsegsIterator(const TrxUndoRsegsIterator&);
+	TrxUndoRsegsIterator& operator=(const TrxUndoRsegsIterator&);
+
+	/** The purge system pointer */
+	trx_purge_t*			m_purge_sys;
+
+	/** The current element to process */
+	TrxUndoRsegs			m_trx_undo_rsegs;
+
+	/** Track the current element in m_trx_undo_rseg */
+	TrxUndoRsegs::iterator		m_iter;
+
+	/** Sentinel value */
+	static const TrxUndoRsegs	NullElement;
+};
+
 #ifndef UNIV_NONINL
 #include "trx0purge.ic"
-#endif
+#endif /* UNIV_NOINL */
 
-#endif
+#endif /* trx0purge_h */
diff --git a/storage/innobase/include/trx0purge.ic b/storage/innobase/include/trx0purge.ic
index ca9cc1fb894..c32651b7a00 100644
--- a/storage/innobase/include/trx0purge.ic
+++ b/storage/innobase/include/trx0purge.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -28,7 +28,7 @@ Created 3/26/1996 Heikki Tuuri
 /********************************************************************//**
 Calculates the file address of an undo log header when we have the file
 address of its history list node.
-@return	file address of the log */
+@return file address of the log */
 UNIV_INLINE
 fil_addr_t
 trx_purge_get_log_from_hist(
@@ -41,22 +41,23 @@ trx_purge_get_log_from_hist(
 	return(node_addr);
 }
 
-#ifdef UNIV_DEBUG
 /********************************************************************//**
 address of its history list node.
-@return	TRUE if purge_sys_t::limit <= purge_sys_t::iter*/
+@return true if purge_sys_t::limit <= purge_sys_t::iter */
 UNIV_INLINE
-ibool
+bool
 trx_purge_check_limit(void)
 /*=======================*/
 {
-	ut_ad(purge_sys->limit.trx_no <= purge_sys->iter.trx_no);
-
-	if (purge_sys->limit.trx_no == purge_sys->iter.trx_no) {
-		ut_ad(purge_sys->limit.undo_no <= purge_sys->iter.undo_no);
-	}
-
-	return(TRUE);
+	/* limit is used to track till what point purge element has been
+	processed and so limit <= iter.
+	undo_no ordering is enforced only within the same rollback segment.
+	If a transaction uses multiple rollback segments then we need to
+	consider the rollback segment space id too. */
+	return(purge_sys->iter.trx_no > purge_sys->limit.trx_no
+	       || (purge_sys->iter.trx_no == purge_sys->limit.trx_no
+		   && ((purge_sys->iter.undo_no >= purge_sys->limit.undo_no)
+		       || (purge_sys->iter.undo_rseg_space
+			   != purge_sys->limit.undo_rseg_space))));
 }
-#endif /* UNIV_DEBUG */
 
diff --git a/storage/innobase/include/trx0rec.h b/storage/innobase/include/trx0rec.h
index 359937e3583..b7a2deac63e 100644
--- a/storage/innobase/include/trx0rec.h
+++ b/storage/innobase/include/trx0rec.h
@@ -33,13 +33,15 @@ Created 3/26/1996 Heikki Tuuri
 #include "dict0types.h"
 #include "data0data.h"
 #include "rem0types.h"
+#include "page0types.h"
+#include "row0log.h"
 
 #ifndef UNIV_HOTBACKUP
 # include "que0types.h"
 
 /***********************************************************************//**
 Copies the undo record to the heap.
-@return	own: copy of undo log record */
+@return own: copy of undo log record */
 UNIV_INLINE
 trx_undo_rec_t*
 trx_undo_rec_copy(
@@ -48,7 +50,7 @@ trx_undo_rec_copy(
 	mem_heap_t*		heap);		/*!< in: heap where copied */
 /**********************************************************************//**
 Reads the undo log record type.
-@return	record type */
+@return record type */
 UNIV_INLINE
 ulint
 trx_undo_rec_get_type(
@@ -56,7 +58,7 @@ trx_undo_rec_get_type(
 	const trx_undo_rec_t*	undo_rec);	/*!< in: undo log record */
 /**********************************************************************//**
 Reads from an undo log record the record compiler info.
-@return	compiler info */
+@return compiler info */
 UNIV_INLINE
 ulint
 trx_undo_rec_get_cmpl_info(
@@ -64,7 +66,7 @@ trx_undo_rec_get_cmpl_info(
 	const trx_undo_rec_t*	undo_rec);	/*!< in: undo log record */
 /**********************************************************************//**
 Returns TRUE if an undo log record contains an extern storage field.
-@return	TRUE if extern */
+@return TRUE if extern */
 UNIV_INLINE
 ibool
 trx_undo_rec_get_extern_storage(
@@ -72,21 +74,12 @@ trx_undo_rec_get_extern_storage(
 	const trx_undo_rec_t*	undo_rec);	/*!< in: undo log record */
 /**********************************************************************//**
 Reads the undo log record number.
-@return	undo no */
+@return undo no */
 UNIV_INLINE
 undo_no_t
 trx_undo_rec_get_undo_no(
 /*=====================*/
 	const trx_undo_rec_t*	undo_rec);	/*!< in: undo log record */
-/**********************************************************************//**
-Returns the start of the undo record data area.
-@return	offset to the data area */
-UNIV_INLINE
-ulint
-trx_undo_rec_get_offset(
-/*====================*/
-	undo_no_t	undo_no)	/*!< in: undo no read from node */
-	MY_ATTRIBUTE((const));
 
 /**********************************************************************//**
 Returns the start of the undo record data area. */
@@ -95,8 +88,7 @@ Returns the start of the undo record data area. */
 
 /**********************************************************************//**
 Reads from an undo log record the general parameters.
-@return	remaining part of undo log record after reading these values */
-UNIV_INTERN
+@return remaining part of undo log record after reading these values */
 byte*
 trx_undo_rec_get_pars(
 /*==================*/
@@ -112,8 +104,7 @@ trx_undo_rec_get_pars(
 	MY_ATTRIBUTE((nonnull));
 /*******************************************************************//**
 Builds a row reference from an undo log record.
-@return	pointer to remaining part of undo record */
-UNIV_INTERN
+@return pointer to remaining part of undo record */
 byte*
 trx_undo_rec_get_row_ref(
 /*=====================*/
@@ -129,8 +120,7 @@ trx_undo_rec_get_row_ref(
 				needed is allocated */
 /*******************************************************************//**
 Skips a row reference from an undo log record.
-@return	pointer to remaining part of undo record */
-UNIV_INTERN
+@return pointer to remaining part of undo record */
 byte*
 trx_undo_rec_skip_row_ref(
 /*======================*/
@@ -140,12 +130,11 @@ trx_undo_rec_skip_row_ref(
 /**********************************************************************//**
 Reads from an undo log update record the system field values of the old
 version.
-@return	remaining part of undo log record after reading these values */
-UNIV_INTERN
+@return remaining part of undo log record after reading these values */
 byte*
 trx_undo_update_rec_get_sys_cols(
 /*=============================*/
-	byte*		ptr,		/*!< in: remaining part of undo
+	const byte*	ptr,		/*!< in: remaining part of undo
 					log record after reading
 					general parameters */
 	trx_id_t*	trx_id,		/*!< out: trx id */
@@ -155,11 +144,10 @@ trx_undo_update_rec_get_sys_cols(
 Builds an update vector based on a remaining part of an undo log record.
 @return remaining part of the record, NULL if an error detected, which
 means that the record is corrupted */
-UNIV_INTERN
 byte*
 trx_undo_update_rec_get_update(
 /*===========================*/
-	byte*		ptr,	/*!< in: remaining part in update undo log
+	const byte*	ptr,	/*!< in: remaining part in update undo log
 				record, after reading the row reference
 				NOTE that this copy of the undo log record must
 				be preserved as long as the update vector is
@@ -182,12 +170,11 @@ trx_undo_update_rec_get_update(
 Builds a partial row from an update undo log record, for purge.
 It contains the columns which occur as ordering in any index of the table.
 Any missing columns are indicated by col->mtype == DATA_MISSING.
-@return	pointer to remaining part of undo record */
-UNIV_INTERN
+@return pointer to remaining part of undo record */
 byte*
 trx_undo_rec_get_partial_row(
 /*=========================*/
-	byte*		ptr,	/*!< in: remaining part in update undo log
+	const byte*	ptr,	/*!< in: remaining part in update undo log
 				record of a suitable type, at the start of
 				the stored index columns;
 				NOTE that this copy of the undo log record must
@@ -207,8 +194,7 @@ Writes information to an undo log about an insert, update, or a delete marking
 of a clustered index record. This information is used in a rollback of the
 transaction and in consistent reads that must look to the history of this
 transaction.
-@return	DB_SUCCESS or error code */
-UNIV_INTERN
+@return DB_SUCCESS or error code */
 dberr_t
 trx_undo_report_row_operation(
 /*==========================*/
@@ -237,14 +223,27 @@ trx_undo_report_row_operation(
 /******************************************************************//**
 Copies an undo record to heap. This function can be called if we know that
 the undo log record exists.
-@return	own: copy of the record */
-UNIV_INTERN
+@return own: copy of the record */
 trx_undo_rec_t*
 trx_undo_get_undo_rec_low(
 /*======================*/
 	roll_ptr_t	roll_ptr,	/*!< in: roll pointer to record */
-	mem_heap_t*	heap)		/*!< in: memory heap where copied */
+	mem_heap_t*	heap,		/*!< in: memory heap where copied */
+	bool		is_redo_rseg)	/*!< in: true if redo rseg. */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
+
+/** status bit used for trx_undo_prev_version_build() */
+
+/** TRX_UNDO_PREV_IN_PURGE tells trx_undo_prev_version_build() that it
+is being called purge view and we would like to get the purge record
+even it is in the purge view (in normal case, it will return without
+fetching the purge record */
+#define		TRX_UNDO_PREV_IN_PURGE		0x1
+
+/** This tells trx_undo_prev_version_build() to fetch the old value in
+the undo log (which is the after image for an update) */
+#define		TRX_UNDO_GET_OLD_V_VALUE	0x2
+
 /*******************************************************************//**
 Build a previous version of a clustered index record. The caller must
 hold a latch on the index page of the clustered index record.
@@ -252,7 +251,6 @@ hold a latch on the index page of the clustered index record.
 or the table has been rebuilt
 @retval false if the previous version is earlier than purge_view,
 which means that it may have been removed */
-UNIV_INTERN
 bool
 trx_undo_prev_version_build(
 /*========================*/
@@ -265,15 +263,23 @@ trx_undo_prev_version_build(
 	ulint*		offsets,/*!< in/out: rec_get_offsets(rec, index) */
 	mem_heap_t*	heap,	/*!< in: memory heap from which the memory
 				needed is allocated */
-	rec_t**		old_vers)/*!< out, own: previous version, or NULL if
+	rec_t**		old_vers,/*!< out, own: previous version, or NULL if
 				rec is the first inserted version, or if
 				history data has been deleted */
-	MY_ATTRIBUTE((nonnull));
+	mem_heap_t*	v_heap,	/* !< in: memory heap used to create vrow
+				dtuple if it is not yet created. This heap
+				diffs from "heap" above in that it could be
+				prebuilt->old_vers_heap for selection */
+	const dtuple_t**vrow,	/*!< out: virtual column info, if any */
+	ulint		v_status);
+				/*!< in: status determine if it is going
+				into this function by purge thread or not.
+				And if we read "after image" of undo log */
+
 #endif /* !UNIV_HOTBACKUP */
 /***********************************************************//**
 Parses a redo log record of adding an undo log record.
-@return	end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
 byte*
 trx_undo_parse_add_undo_rec(
 /*========================*/
@@ -282,8 +288,7 @@ trx_undo_parse_add_undo_rec(
 	page_t*	page);	/*!< in: page or NULL */
 /***********************************************************//**
 Parses a redo log record of erasing of an undo page end.
-@return	end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
 byte*
 trx_undo_parse_erase_page_end(
 /*==========================*/
@@ -292,6 +297,53 @@ trx_undo_parse_erase_page_end(
 	page_t*	page,	/*!< in: page or NULL */
 	mtr_t*	mtr);	/*!< in: mtr or NULL */
 
+/** Read from an undo log record a non-virtual column value.
+@param[in,out]	ptr		pointer to remaining part of the undo record
+@param[in,out]	field		stored field
+@param[in,out]	len		length of the field, or UNIV_SQL_NULL
+@param[in,out]	orig_len	original length of the locally stored part
+of an externally stored column, or 0
+@return remaining part of undo log record after reading these values */
+byte*
+trx_undo_rec_get_col_val(
+        const byte*     ptr,
+        const byte**    field,
+        ulint*          len,
+        ulint*          orig_len);
+
+/** Read virtual column value from undo log
+@param[in]	table		the table
+@param[in]	ptr		undo log pointer
+@param[in,out]	row		the dtuple to fill
+@param[in]	in_purge        called by purge thread
+@param[in]	col_map		online rebuild column map */
+void
+trx_undo_read_v_cols(
+	const dict_table_t*	table,
+	const byte*		ptr,
+	const dtuple_t*		row,
+	bool			in_purge,
+	const ulint*		col_map);
+
+/** Read virtual column index from undo log if the undo log contains such
+info, and verify the column is still indexed, and output its position
+@param[in]	table		the table
+@param[in]	ptr		undo log pointer
+@param[in]	first_v_col	if this is the first virtual column, which
+				has the version marker
+@param[in,out]	is_undo_log	his function is used to parse both undo log,
+				and online log for virtual columns. So
+				check to see if this is undo log
+@param[out]	field_no	the column number
+@return remaining part of undo log record after reading these values */
+const byte*
+trx_undo_read_v_idx(
+	const dict_table_t*	table,
+	const byte*		ptr,
+	bool			first_v_col,
+	bool*			is_undo_log,
+	ulint*			field_no);
+
 #ifndef UNIV_HOTBACKUP
 
 /* Types of an undo log record: these have to be smaller than 16, as the
diff --git a/storage/innobase/include/trx0rec.ic b/storage/innobase/include/trx0rec.ic
index 08704f6b821..111c05c60aa 100644
--- a/storage/innobase/include/trx0rec.ic
+++ b/storage/innobase/include/trx0rec.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -26,7 +26,7 @@ Created 3/26/1996 Heikki Tuuri
 #ifndef UNIV_HOTBACKUP
 /**********************************************************************//**
 Reads from an undo log record the record type.
-@return	record type */
+@return record type */
 UNIV_INLINE
 ulint
 trx_undo_rec_get_type(
@@ -38,7 +38,7 @@ trx_undo_rec_get_type(
 
 /**********************************************************************//**
 Reads from an undo log record the record compiler info.
-@return	compiler info */
+@return compiler info */
 UNIV_INLINE
 ulint
 trx_undo_rec_get_cmpl_info(
@@ -50,7 +50,7 @@ trx_undo_rec_get_cmpl_info(
 
 /**********************************************************************//**
 Returns TRUE if an undo log record contains an extern storage field.
-@return	TRUE if extern */
+@return TRUE if extern */
 UNIV_INLINE
 ibool
 trx_undo_rec_get_extern_storage(
@@ -67,7 +67,7 @@ trx_undo_rec_get_extern_storage(
 
 /**********************************************************************//**
 Reads the undo log record number.
-@return	undo no */
+@return undo no */
 UNIV_INLINE
 undo_no_t
 trx_undo_rec_get_undo_no(
@@ -78,24 +78,12 @@ trx_undo_rec_get_undo_no(
 
 	ptr = undo_rec + 3;
 
-	return(mach_ull_read_much_compressed(ptr));
-}
-
-/**********************************************************************//**
-Returns the start of the undo record data area.
-@return	offset to the data area */
-UNIV_INLINE
-ulint
-trx_undo_rec_get_offset(
-/*====================*/
-	undo_no_t	undo_no)	/*!< in: undo no read from node */
-{
-	return(3 + mach_ull_get_much_compressed_size(undo_no));
+	return(mach_u64_read_much_compressed(ptr));
 }
 
 /***********************************************************************//**
 Copies the undo record to the heap.
-@return	own: copy of undo log record */
+@return own: copy of undo log record */
 UNIV_INLINE
 trx_undo_rec_t*
 trx_undo_rec_copy(
diff --git a/storage/innobase/include/trx0roll.h b/storage/innobase/include/trx0roll.h
index 98a667b2ec1..ec4c7d57e5d 100644
--- a/storage/innobase/include/trx0roll.h
+++ b/storage/innobase/include/trx0roll.h
@@ -39,76 +39,49 @@ Determines if this transaction is rolling back an incomplete transaction
 in crash recovery.
 @return TRUE if trx is an incomplete transaction that is being rolled
 back in crash recovery */
-UNIV_INTERN
 ibool
 trx_is_recv(
 /*========*/
 	const trx_t*	trx);	/*!< in: transaction */
 /*******************************************************************//**
 Returns a transaction savepoint taken at this point in time.
-@return	savepoint */
-UNIV_INTERN
+@return savepoint */
 trx_savept_t
 trx_savept_take(
 /*============*/
 	trx_t*	trx);	/*!< in: transaction */
-/*******************************************************************//**
-Frees an undo number array. */
-UNIV_INTERN
-void
-trx_undo_arr_free(
-/*==============*/
-	trx_undo_arr_t*	arr);	/*!< in: undo number array */
-/*******************************************************************//**
-Returns pointer to nth element in an undo number array.
-@return	pointer to the nth element */
-UNIV_INLINE
-trx_undo_inf_t*
-trx_undo_arr_get_nth_info(
-/*======================*/
-	trx_undo_arr_t*	arr,	/*!< in: undo number array */
-	ulint		n);	/*!< in: position */
 /********************************************************************//**
 Pops the topmost record when the two undo logs of a transaction are seen
-as a single stack of records ordered by their undo numbers. Inserts the
-undo number of the popped undo record to the array of currently processed
-undo numbers in the transaction. When the query thread finishes processing
-of this undo record, it must be released with trx_undo_rec_release.
+as a single stack of records ordered by their undo numbers.
+@return undo log record copied to heap, NULL if none left, or if the
+undo number of the top record would be less than the limit */
+trx_undo_rec_t*
+trx_roll_pop_top_rec_of_trx_low(
+/*============================*/
+	trx_t*		trx,		/*!< in/out: transaction */
+	trx_undo_ptr_t*	undo_ptr,	/*!< in: rollback segment to look
+					for next undo log record. */
+	undo_no_t	limit,		/*!< in: least undo number we need */
+	roll_ptr_t*	roll_ptr,	/*!< out: roll pointer to undo record */
+	mem_heap_t*	heap);		/*!< in/out: memory heap where copied */
+
+/********************************************************************//**
+Get next undo log record from redo and noredo rollback segments.
 @return undo log record copied to heap, NULL if none left, or if the
 undo number of the top record would be less than the limit */
-UNIV_INTERN
 trx_undo_rec_t*
 trx_roll_pop_top_rec_of_trx(
 /*========================*/
-	trx_t*		trx,	/*!< in: transaction */
-	undo_no_t	limit,	/*!< in: least undo number we need */
-	roll_ptr_t*	roll_ptr,/*!< out: roll pointer to undo record */
-	mem_heap_t*	heap);	/*!< in: memory heap where copied */
-/********************************************************************//**
-Reserves an undo log record for a query thread to undo. This should be
-called if the query thread gets the undo log record not using the pop
-function above.
-@return	TRUE if succeeded */
-UNIV_INTERN
-ibool
-trx_undo_rec_reserve(
-/*=================*/
-	trx_t*		trx,	/*!< in/out: transaction */
-	undo_no_t	undo_no);/*!< in: undo number of the record */
-/*******************************************************************//**
-Releases a reserved undo record. */
-UNIV_INTERN
-void
-trx_undo_rec_release(
-/*=================*/
-	trx_t*		trx,	/*!< in/out: transaction */
-	undo_no_t	undo_no);/*!< in: undo number */
+	trx_t*		trx,		/*!< in: transaction */
+	undo_no_t	limit,		/*!< in: least undo number we need */
+	roll_ptr_t*	roll_ptr,	/*!< out: roll pointer to undo record */
+	mem_heap_t*	heap);		/*!< in: memory heap where copied */
+
 /*******************************************************************//**
 Rollback or clean up any incomplete transactions which were
 encountered in crash recovery.  If the transaction already was
 committed, then we clean up a possible insert undo log. If the
 transaction was not yet committed, then we roll it back. */
-UNIV_INTERN
 void
 trx_rollback_or_clean_recovered(
 /*============================*/
@@ -120,8 +93,8 @@ encountered in crash recovery.  If the transaction already was
 committed, then we clean up a possible insert undo log. If the
 transaction was not yet committed, then we roll it back.
 Note: this is done in a background thread.
-@return	a dummy parameter */
-extern "C" UNIV_INTERN
+@return a dummy parameter */
+extern "C"
 os_thread_ret_t
 DECLARE_THREAD(trx_rollback_or_clean_all_recovered)(
 /*================================================*/
@@ -130,24 +103,21 @@ DECLARE_THREAD(trx_rollback_or_clean_all_recovered)(
 			os_thread_create */
 /*********************************************************************//**
 Creates a rollback command node struct.
-@return	own: rollback node struct */
-UNIV_INTERN
+@return own: rollback node struct */
 roll_node_t*
 roll_node_create(
 /*=============*/
 	mem_heap_t*	heap);	/*!< in: mem heap where created */
 /***********************************************************//**
 Performs an execution step for a rollback command node in a query graph.
-@return	query thread to run next, or NULL */
-UNIV_INTERN
+@return query thread to run next, or NULL */
 que_thr_t*
 trx_rollback_step(
 /*==============*/
 	que_thr_t*	thr);	/*!< in: query thread */
 /*******************************************************************//**
 Rollback a transaction used in MySQL.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
 dberr_t
 trx_rollback_for_mysql(
 /*===================*/
@@ -155,8 +125,7 @@ trx_rollback_for_mysql(
 	MY_ATTRIBUTE((nonnull));
 /*******************************************************************//**
 Rollback the latest SQL statement for MySQL.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
 dberr_t
 trx_rollback_last_sql_stat_for_mysql(
 /*=================================*/
@@ -164,8 +133,7 @@ trx_rollback_last_sql_stat_for_mysql(
 	MY_ATTRIBUTE((nonnull));
 /*******************************************************************//**
 Rollback a transaction to a given savepoint or do a complete rollback.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
 dberr_t
 trx_rollback_to_savepoint(
 /*======================*/
@@ -183,13 +151,12 @@ the row, these locks are naturally released in the rollback. Savepoints which
 were set after this savepoint are deleted.
 @return if no savepoint of the name found then DB_NO_SAVEPOINT,
 otherwise DB_SUCCESS */
-UNIV_INTERN
 dberr_t
 trx_rollback_to_savepoint_for_mysql(
 /*================================*/
 	trx_t*		trx,			/*!< in: transaction handle */
 	const char*	savepoint_name,		/*!< in: savepoint name */
-	ib_int64_t*	mysql_binlog_cache_pos)	/*!< out: the MySQL binlog cache
+	int64_t*	mysql_binlog_cache_pos)	/*!< out: the MySQL binlog cache
 						position corresponding to this
 						savepoint; MySQL needs this
 						information to remove the
@@ -201,14 +168,13 @@ Creates a named savepoint. If the transaction is not yet started, starts it.
 If there is already a savepoint of the same name, this call erases that old
 savepoint and replaces it with a new. Savepoints are deleted in a transaction
 commit or rollback.
-@return	always DB_SUCCESS */
-UNIV_INTERN
+@return always DB_SUCCESS */
 dberr_t
 trx_savepoint_for_mysql(
 /*====================*/
 	trx_t*		trx,			/*!< in: transaction handle */
 	const char*	savepoint_name,		/*!< in: savepoint name */
-	ib_int64_t	binlog_cache_pos)	/*!< in: MySQL binlog cache
+	int64_t		binlog_cache_pos)	/*!< in: MySQL binlog cache
 						position corresponding to this
 						connection at the time of the
 						savepoint */
@@ -218,7 +184,6 @@ Releases a named savepoint. Savepoints which
 were set after this savepoint are deleted.
 @return if no savepoint of the name found then DB_NO_SAVEPOINT,
 otherwise DB_SUCCESS */
-UNIV_INTERN
 dberr_t
 trx_release_savepoint_for_mysql(
 /*============================*/
@@ -227,7 +192,6 @@ trx_release_savepoint_for_mysql(
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /*******************************************************************//**
 Frees savepoint structs starting from savep. */
-UNIV_INTERN
 void
 trx_roll_savepoints_free(
 /*=====================*/
@@ -235,39 +199,20 @@ trx_roll_savepoints_free(
 	trx_named_savept_t*	savep);	/*!< in: free all savepoints > this one;
 					if this is NULL, free all savepoints
 					of trx */
-
-/** A cell of trx_undo_arr_t; used during a rollback and a purge */
-struct	trx_undo_inf_t{
-	ibool		in_use;	/*!< true if cell is being used */
-	trx_id_t	trx_no;	/*!< transaction number: not defined during
-				a rollback */
-	undo_no_t	undo_no;/*!< undo number of an undo record */
-};
-
-/** During a rollback and a purge, undo numbers of undo records currently being
-processed are stored in this array */
-
-struct trx_undo_arr_t{
-	ulint		n_cells;	/*!< number of cells in the array */
-	ulint		n_used;		/*!< number of cells in use */
-	trx_undo_inf_t*	infos;		/*!< the array of undo infos */
-	mem_heap_t*	heap;		/*!< memory heap from which allocated */
-};
-
 /** Rollback node states */
 enum roll_node_state {
 	ROLL_NODE_NONE = 0,		/*!< Unknown state */
 	ROLL_NODE_SEND,			/*!< about to send a rollback signal to
 					the transaction */
 	ROLL_NODE_WAIT			/*!< rollback signal sent to the
-				       	transaction, waiting for completion */
+					transaction, waiting for completion */
 };
 
 /** Rollback command node in a query graph */
 struct roll_node_t{
 	que_common_t		common;	/*!< node type: QUE_NODE_ROLLBACK */
 	enum roll_node_state	state;	/*!< node execution state */
-	ibool			partial;/*!< TRUE if we want a partial
+	bool			partial;/*!< TRUE if we want a partial
 					rollback */
 	trx_savept_t		savept;	/*!< savepoint to which to
 					roll back, in the case of a
@@ -280,7 +225,7 @@ struct trx_named_savept_t{
 	char*		name;		/*!< savepoint name */
 	trx_savept_t	savept;		/*!< the undo number corresponding to
 					the savepoint */
-	ib_int64_t	mysql_binlog_cache_pos;
+	int64_t		mysql_binlog_cache_pos;
 					/*!< the MySQL binlog cache position
 					corresponding to this savepoint, not
 					defined if the MySQL binlogging is not
diff --git a/storage/innobase/include/trx0roll.ic b/storage/innobase/include/trx0roll.ic
index 178e9bb730a..b09a1471150 100644
--- a/storage/innobase/include/trx0roll.ic
+++ b/storage/innobase/include/trx0roll.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -23,18 +23,40 @@ Transaction rollback
 Created 3/26/1996 Heikki Tuuri
 *******************************************************/
 
+#ifdef UNIV_DEBUG
 /*******************************************************************//**
-Returns pointer to nth element in an undo number array.
-@return	pointer to the nth element */
+Check if undo numbering is maintained while processing undo records
+for rollback.
+@return true if undo numbering is maintained. */
 UNIV_INLINE
-trx_undo_inf_t*
-trx_undo_arr_get_nth_info(
-/*======================*/
-	trx_undo_arr_t*	arr,	/*!< in: undo number array */
-	ulint		n)	/*!< in: position */
+bool
+trx_roll_check_undo_rec_ordering(
+/*=============================*/
+	undo_no_t	curr_undo_rec_no,	/*!< in: record number of
+						undo record to process. */
+	ulint		curr_undo_space_id,	/*!< in: space-id of rollback
+						segment that contains the
+						undo record to process. */
+	const trx_t*	trx)			/*!< in: transaction */
 {
-	ut_ad(arr);
-	ut_ad(n < arr->n_cells);
+	/* Each transaction now can have multiple rollback segments.
+	If a transaction involves temp and non-temp tables, both the rollback
+	segments will be active. In this case undo records will be distrubuted
+	across the two rollback segments.
+	CASE-1: UNDO action will apply all undo records from one rollback
+	segment before moving to next. This means undo record numbers can't be
+	sequential but ordering is still enforced as next undo record number
+	should be < processed undo record number.
+	CASE-2: For normal rollback (not initiated by crash) all rollback
+	segments will be active (including non-redo).
+	Based on transaction operation pattern undo record number of first
+	undo record from this new rollback segment can be > last undo number
+	from previous rollback segment and so we ignore this check if
+	rollback segments are switching. Once switched new rollback segment
+	should re-follow undo record number pattern (as mentioned in CASE-1). */
 
-	return(arr->infos + n);
+	return(curr_undo_space_id != trx->undo_rseg_space
+	       || curr_undo_rec_no + 1 <= trx->undo_no);
 }
+#endif /* UNIV_DEBUG */
+
diff --git a/storage/innobase/include/trx0rseg.h b/storage/innobase/include/trx0rseg.h
index 185b05876b4..b9cbd387a62 100644
--- a/storage/innobase/include/trx0rseg.h
+++ b/storage/innobase/include/trx0rseg.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -29,35 +29,40 @@ Created 3/26/1996 Heikki Tuuri
 #include "univ.i"
 #include "trx0types.h"
 #include "trx0sys.h"
-#include "ut0bh.h"
+#include "fut0lst.h"
+#include <vector>
 
-/******************************************************************//**
-Gets a rollback segment header.
-@return	rollback segment header, page x-latched */
+/** Gets a rollback segment header.
+@param[in]	space		space where placed
+@param[in]	page_no		page number of the header
+@param[in]	page_size	page size
+@param[in,out]	mtr		mini-transaction
+@return rollback segment header, page x-latched */
 UNIV_INLINE
 trx_rsegf_t*
 trx_rsegf_get(
-/*==========*/
-	ulint	space,		/*!< in: space where placed */
-	ulint	zip_size,	/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	ulint	page_no,	/*!< in: page number of the header */
-	mtr_t*	mtr);		/*!< in: mtr */
-/******************************************************************//**
-Gets a newly created rollback segment header.
-@return	rollback segment header, page x-latched */
+	ulint			space,
+	ulint			page_no,
+	const page_size_t&	page_size,
+	mtr_t*			mtr);
+
+/** Gets a newly created rollback segment header.
+@param[in]	space		space where placed
+@param[in]	page_no		page number of the header
+@param[in]	page_size	page size
+@param[in,out]	mtr		mini-transaction
+@return rollback segment header, page x-latched */
 UNIV_INLINE
 trx_rsegf_t*
 trx_rsegf_get_new(
-/*==============*/
-	ulint	space,		/*!< in: space where placed */
-	ulint	zip_size,	/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	ulint	page_no,	/*!< in: page number of the header */
-	mtr_t*	mtr);		/*!< in: mtr */
+	ulint			space,
+	ulint			page_no,
+	const page_size_t&	page_size,
+	mtr_t*			mtr);
+
 /***************************************************************//**
 Gets the file page number of the nth undo log slot.
-@return	page number of the undo log segment */
+@return page number of the undo log segment */
 UNIV_INLINE
 ulint
 trx_rsegf_get_nth_undo(
@@ -77,7 +82,7 @@ trx_rsegf_set_nth_undo(
 	mtr_t*		mtr);	/*!< in: mtr */
 /****************************************************************//**
 Looks for a free slot for an undo log segment.
-@return	slot index or ULINT_UNDEFINED if not found */
+@return slot index or ULINT_UNDEFINED if not found */
 UNIV_INLINE
 ulint
 trx_rsegf_undo_find_free(
@@ -86,51 +91,55 @@ trx_rsegf_undo_find_free(
 	mtr_t*		mtr);	/*!< in: mtr */
 /******************************************************************//**
 Looks for a rollback segment, based on the rollback segment id.
-@return	rollback segment */
+@return rollback segment */
 UNIV_INLINE
 trx_rseg_t*
 trx_rseg_get_on_id(
 /*===============*/
-	ulint	id);		/*!< in: rollback segment id */
-/****************************************************************//**
-Creates a rollback segment header. This function is called only when
-a new rollback segment is created in the database.
-@return	page number of the created segment, FIL_NULL if fail */
-UNIV_INTERN
+	ulint	id,		/*!< in: rollback segment id */
+	bool	is_redo_rseg);	/*!< in: true if redo rseg else false. */
+
+/** Creates a rollback segment header.
+This function is called only when a new rollback segment is created in
+the database.
+@param[in]	space		space id
+@param[in]	page_size	page size
+@param[in]	max_size	max size in pages
+@param[in]	rseg_slot_no	rseg id == slot number in trx sys
+@param[in,out]	mtr		mini-transaction
+@return page number of the created segment, FIL_NULL if fail */
 ulint
 trx_rseg_header_create(
-/*===================*/
-	ulint	space,		/*!< in: space id */
-	ulint	zip_size,	/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	ulint	max_size,	/*!< in: max size in pages */
-	ulint	rseg_slot_no,	/*!< in: rseg id == slot number in trx sys */
-	mtr_t*	mtr);		/*!< in: mtr */
+	ulint			space,
+	const page_size_t&	page_size,
+	ulint			max_size,
+	ulint			rseg_slot_no,
+	mtr_t*			mtr);
+
 /*********************************************************************//**
 Creates the memory copies for rollback segments and initializes the
 rseg array in trx_sys at a database startup. */
-UNIV_INTERN
 void
 trx_rseg_array_init(
 /*================*/
-	trx_sysf_t*	sys_header,	/*!< in/out: trx system header */
-	ib_bh_t*	ib_bh,		/*!< in: rseg queue */
-	mtr_t*		mtr);		/*!< in/out: mtr */
+	purge_pq_t*	purge_queue);	/*!< in: rseg queue */
+
 /***************************************************************************
 Free's an instance of the rollback segment in memory. */
-UNIV_INTERN
 void
 trx_rseg_mem_free(
 /*==============*/
-	trx_rseg_t*	rseg);		/*!< in, own: instance to free */
-
+	trx_rseg_t*	rseg,		/*!< in, own: instance to free */
+	trx_rseg_t**	rseg_array);	/*!< out: add rseg reference to this
+					central array. */
 /*********************************************************************
 Creates a rollback segment. */
-UNIV_INTERN
 trx_rseg_t*
 trx_rseg_create(
 /*============*/
-	ulint	space);			/*!< in: id of UNDO tablespace */
+	ulint	space_id,	/*!< in: id of UNDO tablespace */
+	ulint   nth_free_slot);	/*!< in: allocate nth free slot.
+				0 means next free slots. */
 
 /********************************************************************
 Get the number of unique rollback tablespaces in use except space id 0.
@@ -138,7 +147,6 @@ The last space id will be the sentinel value ULINT_UNDEFINED. The array
 will be sorted on space id. Note: space_ids should have have space for
 TRX_SYS_N_RSEGS + 1 elements.
 @return number of unique rollback tablespaces in use. */
-UNIV_INTERN
 ulint
 trx_rseg_get_n_undo_tablespaces(
 /*============================*/
@@ -150,51 +158,69 @@ trx_rseg_get_n_undo_tablespaces(
 /* Maximum number of transactions supported by a single rollback segment */
 #define TRX_RSEG_MAX_N_TRXS	(TRX_RSEG_N_SLOTS / 2)
 
-/* The rollback segment memory object */
-struct trx_rseg_t{
+/** The rollback segment memory object */
+struct trx_rseg_t {
 	/*--------------------------------------------------------*/
-	ulint		id;	/*!< rollback segment id == the index of
-				its slot in the trx system file copy */
-	ib_mutex_t		mutex;	/*!< mutex protecting the fields in this
-				struct except id, which is constant */
-	ulint		space;	/*!< space where the rollback segment is
-				header is placed */
-	ulint		zip_size;/* compressed page size of space
-				in bytes, or 0 for uncompressed spaces */
-	ulint		page_no;/* page number of the rollback segment
-				header */
-	ulint		max_size;/* maximum allowed size in pages */
-	ulint		curr_size;/* current size in pages */
+	/** rollback segment id == the index of its slot in the trx
+	system file copy */
+	ulint				id;
+
+	/** mutex protecting the fields in this struct except id,space,page_no
+	which are constant */
+	RsegMutex			mutex;
+
+	/** space where the rollback segment header is placed */
+	ulint				space;
+
+	/** page number of the rollback segment header */
+	ulint				page_no;
+
+	/** page size of the relevant tablespace */
+	page_size_t			page_size;
+
+	/** maximum allowed size in pages */
+	ulint				max_size;
+
+	/** current size in pages */
+	ulint				curr_size;
+
 	/*--------------------------------------------------------*/
 	/* Fields for update undo logs */
-	UT_LIST_BASE_NODE_T(trx_undo_t) update_undo_list;
-					/* List of update undo logs */
-	UT_LIST_BASE_NODE_T(trx_undo_t) update_undo_cached;
-					/* List of update undo log segments
-					cached for fast reuse */
+	/** List of update undo logs */
+	UT_LIST_BASE_NODE_T(trx_undo_t)	update_undo_list;
+
+	/** List of update undo log segments cached for fast reuse */
+	UT_LIST_BASE_NODE_T(trx_undo_t)	update_undo_cached;
+
 	/*--------------------------------------------------------*/
 	/* Fields for insert undo logs */
+	/** List of insert undo logs */
 	UT_LIST_BASE_NODE_T(trx_undo_t) insert_undo_list;
-					/* List of insert undo logs */
-	UT_LIST_BASE_NODE_T(trx_undo_t) insert_undo_cached;
-					/* List of insert undo log segments
-					cached for fast reuse */
-	/*--------------------------------------------------------*/
-	ulint		last_page_no;	/*!< Page number of the last not yet
-					purged log header in the history list;
-					FIL_NULL if all list purged */
-	ulint		last_offset;	/*!< Byte offset of the last not yet
-					purged log header */
-	trx_id_t	last_trx_no;	/*!< Transaction number of the last not
-					yet purged log */
-	ibool		last_del_marks;	/*!< TRUE if the last not yet purged log
-					needs purging */
-};
 
-/** For prioritising the rollback segments for purge. */
-struct rseg_queue_t {
-        trx_id_t	trx_no;         /*!< trx_rseg_t::last_trx_no */
-        trx_rseg_t*     rseg;           /*!< Rollback segment */
+	/** List of insert undo log segments cached for fast reuse */
+	UT_LIST_BASE_NODE_T(trx_undo_t) insert_undo_cached;
+
+	/*--------------------------------------------------------*/
+
+	/** Page number of the last not yet purged log header in the history
+	list; FIL_NULL if all list purged */
+	ulint				last_page_no;
+
+	/** Byte offset of the last not yet purged log header */
+	ulint				last_offset;
+
+	/** Transaction number of the last not yet purged log */
+	trx_id_t			last_trx_no;
+
+	/** TRUE if the last not yet purged log needs purging */
+	ibool				last_del_marks;
+
+	/** Reference counter to track rseg allocated transactions. */
+	ulint				trx_ref_count;
+
+	/** If true, then skip allocating this rseg as it reside in
+	UNDO-tablespace marked for truncate. */
+	bool				skip_allocation;
 };
 
 /* Undo log segment slot in a rollback segment header */
diff --git a/storage/innobase/include/trx0rseg.ic b/storage/innobase/include/trx0rseg.ic
index 30743da9b8c..9574be67ff8 100644
--- a/storage/innobase/include/trx0rseg.ic
+++ b/storage/innobase/include/trx0rseg.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -27,23 +27,26 @@ Created 3/26/1996 Heikki Tuuri
 #include "mtr0log.h"
 #include "trx0sys.h"
 
-/******************************************************************//**
-Gets a rollback segment header.
-@return	rollback segment header, page x-latched */
+/** Gets a rollback segment header.
+@param[in]	space		space where placed
+@param[in]	page_no		page number of the header
+@param[in]	page_size	page size
+@param[in,out]	mtr		mini-transaction
+@return rollback segment header, page x-latched */
 UNIV_INLINE
 trx_rsegf_t*
 trx_rsegf_get(
-/*==========*/
-	ulint	space,		/*!< in: space where placed */
-	ulint	zip_size,	/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	ulint	page_no,	/*!< in: page number of the header */
-	mtr_t*	mtr)		/*!< in: mtr */
+	ulint			space,
+	ulint			page_no,
+	const page_size_t&	page_size,
+	mtr_t*			mtr)
 {
 	buf_block_t*	block;
 	trx_rsegf_t*	header;
 
-	block = buf_page_get(space, zip_size, page_no, RW_X_LATCH, mtr);
+	block = buf_page_get(
+		page_id_t(space, page_no), page_size, RW_X_LATCH, mtr);
+
 	buf_block_dbg_add_level(block, SYNC_RSEG_HEADER);
 
 	header = TRX_RSEG + buf_block_get_frame(block);
@@ -51,23 +54,26 @@ trx_rsegf_get(
 	return(header);
 }
 
-/******************************************************************//**
-Gets a newly created rollback segment header.
-@return	rollback segment header, page x-latched */
+/** Gets a newly created rollback segment header.
+@param[in]	space		space where placed
+@param[in]	page_no		page number of the header
+@param[in]	page_size	page size
+@param[in,out]	mtr		mini-transaction
+@return rollback segment header, page x-latched */
 UNIV_INLINE
 trx_rsegf_t*
 trx_rsegf_get_new(
-/*==============*/
-	ulint	space,		/*!< in: space where placed */
-	ulint	zip_size,	/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	ulint	page_no,	/*!< in: page number of the header */
-	mtr_t*	mtr)		/*!< in: mtr */
+	ulint			space,
+	ulint			page_no,
+	const page_size_t&	page_size,
+	mtr_t*			mtr)
 {
 	buf_block_t*	block;
 	trx_rsegf_t*	header;
 
-	block = buf_page_get(space, zip_size, page_no, RW_X_LATCH, mtr);
+	block = buf_page_get(
+		page_id_t(space, page_no), page_size, RW_X_LATCH, mtr);
+
 	buf_block_dbg_add_level(block, SYNC_RSEG_HEADER_NEW);
 
 	header = TRX_RSEG + buf_block_get_frame(block);
@@ -77,7 +83,7 @@ trx_rsegf_get_new(
 
 /***************************************************************//**
 Gets the file page number of the nth undo log slot.
-@return	page number of the undo log segment */
+@return page number of the undo log segment */
 UNIV_INLINE
 ulint
 trx_rsegf_get_nth_undo(
@@ -86,12 +92,7 @@ trx_rsegf_get_nth_undo(
 	ulint		n,	/*!< in: index of slot */
 	mtr_t*		mtr)	/*!< in: mtr */
 {
-	if (n >= TRX_RSEG_N_SLOTS) {
-		fprintf(stderr,
-			"InnoDB: Error: trying to get slot %lu of rseg\n",
-			(ulong) n);
-		ut_error;
-	}
+	ut_a(n < TRX_RSEG_N_SLOTS);
 
 	return(mtr_read_ulint(rsegf + TRX_RSEG_UNDO_SLOTS
 			      + n * TRX_RSEG_SLOT_SIZE, MLOG_4BYTES, mtr));
@@ -108,12 +109,7 @@ trx_rsegf_set_nth_undo(
 	ulint		page_no,/*!< in: page number of the undo log segment */
 	mtr_t*		mtr)	/*!< in: mtr */
 {
-	if (n >= TRX_RSEG_N_SLOTS) {
-		fprintf(stderr,
-			"InnoDB: Error: trying to set slot %lu of rseg\n",
-			(ulong) n);
-		ut_error;
-	}
+	ut_a(n < TRX_RSEG_N_SLOTS);
 
 	mlog_write_ulint(rsegf + TRX_RSEG_UNDO_SLOTS + n * TRX_RSEG_SLOT_SIZE,
 			 page_no, MLOG_4BYTES, mtr);
@@ -121,7 +117,7 @@ trx_rsegf_set_nth_undo(
 
 /****************************************************************//**
 Looks for a free slot for an undo log segment.
-@return	slot index or ULINT_UNDEFINED if not found */
+@return slot index or ULINT_UNDEFINED if not found */
 UNIV_INLINE
 ulint
 trx_rsegf_undo_find_free(
@@ -131,19 +127,19 @@ trx_rsegf_undo_find_free(
 {
 	ulint		i;
 	ulint		page_no;
+	ulint		max_slots = TRX_RSEG_N_SLOTS;
 
-	for (i = 0;
-#ifndef UNIV_DEBUG
-	     i < TRX_RSEG_N_SLOTS;
-#else
-	     i < (trx_rseg_n_slots_debug ? trx_rseg_n_slots_debug : TRX_RSEG_N_SLOTS);
+#ifdef UNIV_DEBUG
+	if (trx_rseg_n_slots_debug) {
+		max_slots = ut_min(static_cast<ulint>(trx_rseg_n_slots_debug),
+				   static_cast<ulint>(TRX_RSEG_N_SLOTS));
+	}
 #endif
-	     i++) {
 
+	for (i = 0; i < max_slots; i++) {
 		page_no = trx_rsegf_get_nth_undo(rsegf, i, mtr);
 
 		if (page_no == FIL_NULL) {
-
 			return(i);
 		}
 	}
@@ -153,15 +149,25 @@ trx_rsegf_undo_find_free(
 
 /******************************************************************//**
 Looks for a rollback segment, based on the rollback segment id.
-@return	rollback segment */
+@return rollback segment */
 UNIV_INLINE
 trx_rseg_t*
 trx_rseg_get_on_id(
 /*===============*/
-	ulint	id)	/*!< in: rollback segment id */
+	ulint	id,		/*!< in: rollback segment id */
+	bool	is_redo_rseg)	/*!< in: true if redo rseg else false. */
 {
 	ut_a(id < TRX_SYS_N_RSEGS);
 
+	/* If redo rseg is being requested and id falls in range of
+	non-redo rseg that is from slot-1....slot-srv_tmp_undo_logs then
+	server is being upgraded from pre-5.7.2. In such case return
+	rseg from pending_purge_rseg_array array. */
+	if (is_redo_rseg && trx_sys_is_noredo_rseg_slot(id)) {
+		ut_ad(trx_sys->pending_purge_rseg_array[id] != NULL);
+		return(trx_sys->pending_purge_rseg_array[id]);
+	}
+
 	return(trx_sys->rseg_array[id]);
 }
 
diff --git a/storage/innobase/include/trx0sys.h b/storage/innobase/include/trx0sys.h
index 8c6b13f9dd4..ddf535158b6 100644
--- a/storage/innobase/include/trx0sys.h
+++ b/storage/innobase/include/trx0sys.h
@@ -28,99 +28,81 @@ Created 3/26/1996 Heikki Tuuri
 
 #include "univ.i"
 
-#include "trx0types.h"
-#include "fsp0types.h"
-#include "fil0fil.h"
 #include "buf0buf.h"
+#include "fil0fil.h"
+#include "trx0types.h"
 #ifndef UNIV_HOTBACKUP
+#include "mem0mem.h"
 #include "mtr0mtr.h"
 #include "ut0byte.h"
 #include "mem0mem.h"
-#include "sync0sync.h"
 #include "ut0lst.h"
-#include "ut0bh.h"
 #include "read0types.h"
 #include "page0types.h"
-#include "ut0bh.h"
+#include "ut0mutex.h"
+#include "trx0trx.h"
 #ifdef WITH_WSREP
 #include "trx0xa.h"
 #endif /* WITH_WSREP */
 
-typedef UT_LIST_BASE_NODE_T(trx_t) trx_list_t;
+typedef UT_LIST_BASE_NODE_T(trx_t) trx_ut_list_t;
 
-/** In a MySQL replication slave, in crash recovery we store the master log
-file name and position here. */
-/* @{ */
-/** Master binlog file name */
-extern char		trx_sys_mysql_master_log_name[];
-/** Master binlog file position.  We have successfully got the updates
-up to this position.  -1 means that no crash recovery was needed, or
-there was no master log position info inside InnoDB.*/
-extern ib_int64_t	trx_sys_mysql_master_log_pos;
-/* @} */
-
-/** If this MySQL server uses binary logging, after InnoDB has been inited
-and if it has done a crash recovery, we store the binlog file name and position
-here. */
-/* @{ */
-/** Binlog file name */
-extern char		trx_sys_mysql_bin_log_name[];
-/** Binlog file position, or -1 if unknown */
-extern ib_int64_t	trx_sys_mysql_bin_log_pos;
-/* @} */
+// Forward declaration
+class MVCC;
+class ReadView;
 
 /** The transaction system */
 extern trx_sys_t*	trx_sys;
 
-/***************************************************************//**
-Checks if a page address is the trx sys header page.
-@return	TRUE if trx sys header page */
+/** Checks if a page address is the trx sys header page.
+@param[in]	page_id	page id
+@return true if trx sys header page */
 UNIV_INLINE
-ibool
+bool
 trx_sys_hdr_page(
-/*=============*/
-	ulint	space,	/*!< in: space */
-	ulint	page_no);/*!< in: page number */
+	const page_id_t&	page_id);
+
 /*****************************************************************//**
 Creates and initializes the central memory structures for the transaction
 system. This is called when the database is started.
 @return min binary heap of rsegs to purge */
-UNIV_INTERN
-ib_bh_t*
+purge_pq_t*
 trx_sys_init_at_db_start(void);
 /*==========================*/
 /*****************************************************************//**
-Creates the trx_sys instance and initializes ib_bh and mutex. */
-UNIV_INTERN
+Creates the trx_sys instance and initializes purge_queue and mutex. */
 void
 trx_sys_create(void);
 /*================*/
 /*****************************************************************//**
 Creates and initializes the transaction system at the database creation. */
-UNIV_INTERN
 void
 trx_sys_create_sys_pages(void);
 /*==========================*/
 /****************************************************************//**
 Looks for a free slot for a rollback segment in the trx system file copy.
-@return	slot index or ULINT_UNDEFINED if not found */
-UNIV_INTERN
+@return slot index or ULINT_UNDEFINED if not found */
 ulint
 trx_sysf_rseg_find_free(
 /*====================*/
-	mtr_t*		mtr);		/*!< in: mtr */
+	mtr_t*	mtr,			/*!< in/out: mtr */
+	bool	include_tmp_slots,	/*!< in: if true, report slots reserved
+					for temp-tablespace as free slots. */
+	ulint	nth_free_slots);	/*!< in: allocate nth free slot.
+					0 means next free slot. */
 /***************************************************************//**
 Gets the pointer in the nth slot of the rseg array.
-@return	pointer to rseg object, NULL if slot not in use */
+@return pointer to rseg object, NULL if slot not in use */
 UNIV_INLINE
 trx_rseg_t*
 trx_sys_get_nth_rseg(
 /*=================*/
-	trx_sys_t*	sys,	/*!< in: trx system */
-	ulint		n);	/*!< in: index of slot */
+	trx_sys_t*	sys,		/*!< in: trx system */
+	ulint		n,		/*!< in: index of slot */
+	bool		is_redo_rseg);	/*!< in: true if redo rseg. */
 /**********************************************************************//**
 Gets a pointer to the transaction system file copy and x-locks its page.
-@return	pointer to system file copy, page x-locked */
+@return pointer to system file copy, page x-locked */
 UNIV_INLINE
 trx_sysf_t*
 trx_sysf_get(
@@ -129,7 +111,7 @@ trx_sysf_get(
 /*****************************************************************//**
 Gets the space of the nth rollback segment slot in the trx system
 file copy.
-@return	space id */
+@return space id */
 UNIV_INLINE
 ulint
 trx_sysf_rseg_get_space(
@@ -140,7 +122,7 @@ trx_sysf_rseg_get_space(
 /*****************************************************************//**
 Gets the page number of the nth rollback segment slot in the trx system
 file copy.
-@return	page number, FIL_NULL if slot unused */
+@return page number, FIL_NULL if slot unused */
 UNIV_INLINE
 ulint
 trx_sysf_rseg_get_page_no(
@@ -173,11 +155,11 @@ trx_sysf_rseg_set_page_no(
 	mtr_t*		mtr);		/*!< in: mtr */
 /*****************************************************************//**
 Allocates a new transaction id.
-@return	new, allocated trx id */
+@return new, allocated trx id */
 UNIV_INLINE
 trx_id_t
-trx_sys_get_new_trx_id(void);
-/*========================*/
+trx_sys_get_new_trx_id();
+/*===================*/
 /*****************************************************************//**
 Determines the maximum transaction id.
 @return maximum currently allocated trx id; will be stale after the
@@ -192,6 +174,14 @@ trx_sys_get_max_trx_id(void);
 extern uint			trx_rseg_n_slots_debug;
 #endif
 
+/*****************************************************************//**
+Check if slot-id is reserved slot-id for noredo rsegs. */
+UNIV_INLINE
+bool
+trx_sys_is_noredo_rseg_slot(
+/*========================*/
+	ulint	slot_id);	/*!< in: slot_id to check */
+
 /*****************************************************************//**
 Writes a trx id to an index page. In case that the id size changes in
 some future version, this function should be used instead of
@@ -206,7 +196,7 @@ trx_write_trx_id(
 Reads a trx id from an index page. In case that the id size changes in
 some future version, this function should be used instead of
 mach_read_...
-@return	id */
+@return id */
 UNIV_INLINE
 trx_id_t
 trx_read_trx_id(
@@ -214,10 +204,7 @@ trx_read_trx_id(
 	const byte*	ptr);	/*!< in: pointer to memory from where to read */
 /****************************************************************//**
 Looks for the trx instance with the given id in the rw trx_list.
-The caller must be holding trx_sys->mutex.
-@return	the trx handle or NULL if not found;
-the pointer must not be dereferenced unless lock_sys->mutex was
-acquired before calling this function and is still being held */
+@return	the trx handle or NULL if not found */
 UNIV_INLINE
 trx_t*
 trx_get_rw_trx_by_id(
@@ -228,18 +215,14 @@ Returns the minimum trx id in rw trx list. This is the smallest id for which
 the trx can possibly be active. (But, you must look at the trx->state to
 find out if the minimum trx id transaction itself is active, or already
 committed.)
-@return	the minimum trx id, or trx_sys->max_trx_id if the trx list is empty */
+@return the minimum trx id, or trx_sys->max_trx_id if the trx list is empty */
 UNIV_INLINE
 trx_id_t
 trx_rw_min_trx_id(void);
 /*===================*/
 /****************************************************************//**
-Checks if a rw transaction with the given id is active. Caller must hold
-trx_sys->mutex in shared mode. If the caller is not holding
-lock_sys->mutex, the transaction may already have been committed.
-@return	transaction instance if active, or NULL;
-the pointer must not be dereferenced unless lock_sys->mutex was
-acquired before calling this function and is still being held */
+Checks if a rw transaction with the given id is active.
+@return transaction instance if active, or NULL */
 UNIV_INLINE
 trx_t*
 trx_rw_is_active_low(
@@ -249,28 +232,27 @@ trx_rw_is_active_low(
 					that will be set if corrupt */
 /****************************************************************//**
 Checks if a rw transaction with the given id is active. If the caller is
-not holding lock_sys->mutex, the transaction may already have been
+not holding trx_sys->mutex, the transaction may already have been
 committed.
-@return	transaction instance if active, or NULL;
-the pointer must not be dereferenced unless lock_sys->mutex was
-acquired before calling this function and is still being held */
+@return transaction instance if active, or NULL; */
 UNIV_INLINE
 trx_t*
 trx_rw_is_active(
 /*=============*/
 	trx_id_t	trx_id,		/*!< in: trx id of the transaction */
-	ibool*		corrupt);	/*!< in: NULL or pointer to a flag
+	ibool*		corrupt,	/*!< in: NULL or pointer to a flag
 					that will be set if corrupt */
+	bool		do_ref_count);	/*!< in: if true then increment the
+					trx_t::n_ref_count */
 #ifdef UNIV_DEBUG
 /****************************************************************//**
-Checks whether a trx is in one of rw_trx_list or ro_trx_list.
-@return	TRUE if is in */
-UNIV_INTERN
-ibool
-trx_in_trx_list(
+Checks whether a trx is in on of rw_trx_list
+@return TRUE if is in */
+bool
+trx_in_rw_trx_list(
 /*============*/
 	const trx_t*	in_trx)		/*!< in: transaction */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 #endif /* UNIV_DEBUG */
 #if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
 /***********************************************************//**
@@ -288,22 +270,18 @@ Updates the offset information about the end of the MySQL binlog entry
 which corresponds to the transaction just being committed. In a MySQL
 replication slave updates the latest master binlog position up to which
 replication has proceeded. */
-UNIV_INTERN
 void
 trx_sys_update_mysql_binlog_offset(
 /*===============================*/
 	const char*	file_name,/*!< in: MySQL log file name */
-	ib_int64_t	offset,	/*!< in: position in that log file */
+	int64_t		offset,	/*!< in: position in that log file */
 	ulint		field,	/*!< in: offset of the MySQL log info field in
 				the trx sys header */
-#ifdef WITH_WSREP
         trx_sysf_t*     sys_header, /*!< in: trx sys header */
-#endif /* WITH_WSREP */
 	mtr_t*		mtr);	/*!< in: mtr */
 /*****************************************************************//**
 Prints to stderr the MySQL binlog offset info in the trx system header if
 the magic number shows it valid. */
-UNIV_INTERN
 void
 trx_sys_print_mysql_binlog_offset(void);
 /*===================================*/
@@ -311,53 +289,46 @@ trx_sys_print_mysql_binlog_offset(void);
 /** Update WSREP checkpoint XID in sys header. */
 void
 trx_sys_update_wsrep_checkpoint(
-        const XID*      xid,         /*!< in: WSREP XID */
-        trx_sysf_t*     sys_header,  /*!< in: sys_header */
-        mtr_t*          mtr);        /*!< in: mtr       */
+/*============================*/
+	const XID*	xid,		/*!< in: WSREP XID */
+	trx_sysf_t*	sys_header,	/*!< in: sys_header */
+	mtr_t*		mtr);		/*!< in: mtr */
 
 void
 /** Read WSREP checkpoint XID from sys header. */
 trx_sys_read_wsrep_checkpoint(
-        XID* xid); /*!< out: WSREP XID */
+/*==========================*/
+	XID* xid); /*!< out: WSREP XID */
 #endif /* WITH_WSREP */
-/*****************************************************************//**
-Prints to stderr the MySQL master log offset info in the trx system header if
-the magic number shows it valid. */
-UNIV_INTERN
-void
-trx_sys_print_mysql_master_log_pos(void);
-/*====================================*/
-/*****************************************************************//**
-Initializes the tablespace tag system. */
-UNIV_INTERN
+
+/** Initializes the tablespace tag system. */
 void
 trx_sys_file_format_init(void);
 /*==========================*/
+
 /*****************************************************************//**
 Closes the tablespace tag system. */
-UNIV_INTERN
 void
 trx_sys_file_format_close(void);
 /*===========================*/
+
 /********************************************************************//**
 Tags the system table space with minimum format id if it has not been
 tagged yet.
 WARNING: This function is only called during the startup and AFTER the
 redo log application during recovery has finished. */
-UNIV_INTERN
 void
 trx_sys_file_format_tag_init(void);
 /*==============================*/
+
 /*****************************************************************//**
 Shutdown/Close the transaction system. */
-UNIV_INTERN
 void
 trx_sys_close(void);
 /*===============*/
 /*****************************************************************//**
 Get the name representation of the file format from its id.
-@return	pointer to the name */
-UNIV_INTERN
+@return pointer to the name */
 const char*
 trx_sys_file_format_id_to_name(
 /*===========================*/
@@ -365,8 +336,7 @@ trx_sys_file_format_id_to_name(
 /*****************************************************************//**
 Set the file format id unconditionally except if it's already the
 same value.
-@return	TRUE if value updated */
-UNIV_INTERN
+@return TRUE if value updated */
 ibool
 trx_sys_file_format_max_set(
 /*========================*/
@@ -376,12 +346,13 @@ trx_sys_file_format_max_set(
 /*********************************************************************
 Creates the rollback segments
 @return number of rollback segments that are active. */
-UNIV_INTERN
 ulint
 trx_sys_create_rsegs(
 /*=================*/
 	ulint	n_spaces,	/*!< number of tablespaces for UNDO logs */
-	ulint	n_rsegs);	/*!< number of rollback segments to create */
+	ulint	n_rsegs,	/*!< number of rollback segments to create */
+	ulint	n_tmp_rsegs);	/*!< number of rollback segments reserved for
+				temp-tables. */
 /*****************************************************************//**
 Get the number of transaction in the system, independent of their state.
 @return count of transactions in trx_sys_t::trx_list */
@@ -393,7 +364,6 @@ trx_sys_get_n_rw_trx(void);
 /*********************************************************************
 Check if there are any active (non-prepared) transactions.
 @return total number of active transactions or 0 if none */
-UNIV_INTERN
 ulint
 trx_sys_any_active_transactions(void);
 /*=================================*/
@@ -401,7 +371,6 @@ trx_sys_any_active_transactions(void);
 /*****************************************************************//**
 Prints to stderr the MySQL binlog info in the system header if the
 magic number shows it valid. */
-UNIV_INTERN
 void
 trx_sys_print_mysql_binlog_offset_from_page(
 /*========================================*/
@@ -414,7 +383,6 @@ Even if the call succeeds and returns TRUE, the returned format id
 may be ULINT_UNDEFINED signalling that the format id was not present
 in the data file.
 @return TRUE if call succeeds */
-UNIV_INTERN
 ibool
 trx_sys_read_file_format_id(
 /*========================*/
@@ -425,7 +393,6 @@ trx_sys_read_file_format_id(
 /*****************************************************************//**
 Reads the file format id from the given per-table data file.
 @return TRUE if call succeeds */
-UNIV_INTERN
 ibool
 trx_sys_read_pertable_file_format_id(
 /*=================================*/
@@ -436,15 +403,13 @@ trx_sys_read_pertable_file_format_id(
 #endif /* !UNIV_HOTBACKUP */
 /*****************************************************************//**
 Get the name representation of the file format from its id.
-@return	pointer to the max format name */
-UNIV_INTERN
+@return pointer to the max format name */
 const char*
 trx_sys_file_format_max_get(void);
 /*=============================*/
 /*****************************************************************//**
 Check for the max file format tag stored on disk.
-@return	DB_SUCCESS or error code */
-UNIV_INTERN
+@return DB_SUCCESS or error code */
 dberr_t
 trx_sys_file_format_max_check(
 /*==========================*/
@@ -452,8 +417,7 @@ trx_sys_file_format_max_check(
 /********************************************************************//**
 Update the file format tag in the system tablespace only if the given
 format id is greater than the known max id.
-@return	TRUE if format_id was bigger than the known max id */
-UNIV_INTERN
+@return TRUE if format_id was bigger than the known max id */
 ibool
 trx_sys_file_format_max_upgrade(
 /*============================*/
@@ -461,31 +425,32 @@ trx_sys_file_format_max_upgrade(
 	ulint		format_id);	/*!< in: file format identifier */
 /*****************************************************************//**
 Get the name representation of the file format from its id.
-@return	pointer to the name */
-UNIV_INTERN
+@return pointer to the name */
 const char*
 trx_sys_file_format_id_to_name(
 /*===========================*/
 	const ulint	id);	/*!< in: id of the file format */
 
+/**
+Add the transaction to the RW transaction set
+@param trx		transaction instance to add */
+UNIV_INLINE
+void
+trx_sys_rw_trx_add(trx_t* trx);
+
 #ifdef UNIV_DEBUG
 /*************************************************************//**
-Validate the trx_sys_t::trx_list. */
-UNIV_INTERN
-ibool
-trx_sys_validate_trx_list(void);
-/*===========================*/
+Validate the trx_sys_t::rw_trx_list.
+@return true if the list is valid */
+bool
+trx_sys_validate_trx_list();
+/*========================*/
 #endif /* UNIV_DEBUG */
 
-/* The automatically created system rollback segment has this id */
+/** The automatically created system rollback segment has this id */
 #define TRX_SYS_SYSTEM_RSEG_ID	0
 
-/* Space id and page no where the trx system file copy resides */
-#define	TRX_SYS_SPACE	0	/* the SYSTEM tablespace */
-#include "fsp0fsp.h"
-#define	TRX_SYS_PAGE_NO	FSP_TRX_SYS_PAGE_NO
-
-/* The offset of the transaction system header on the page */
+/** The offset of the transaction system header on the page */
 #define	TRX_SYS		FSEG_PAGE_DATA
 
 /** Transaction system header */
@@ -522,9 +487,7 @@ rollback segment.  It initialized some arrays with this number of entries.
 We must remember this limit in order to keep file compatibility. */
 #define TRX_SYS_OLD_N_RSEGS		256
 
-/** Maximum length of MySQL binlog file name, in bytes.
-@see trx_sys_mysql_master_log_name
-@see trx_sys_mysql_bin_log_name */
+/** Maximum length of MySQL binlog file name, in bytes. */
 #define TRX_SYS_MYSQL_LOG_NAME_LEN	512
 /** Contents of TRX_SYS_MYSQL_LOG_MAGIC_N_FLD */
 #define TRX_SYS_MYSQL_LOG_MAGIC_N	873422344
@@ -532,10 +495,6 @@ We must remember this limit in order to keep file compatibility. */
 #if UNIV_PAGE_SIZE_MIN < 4096
 # error "UNIV_PAGE_SIZE_MIN < 4096"
 #endif
-/** The offset of the MySQL replication info in the trx system header;
-this contains the same fields as TRX_SYS_MYSQL_LOG_INFO below */
-#define TRX_SYS_MYSQL_MASTER_LOG_INFO	(UNIV_PAGE_SIZE - 2000)
-
 /** The offset of the MySQL binlog offset info in the trx system header */
 #define TRX_SYS_MYSQL_LOG_INFO		(UNIV_PAGE_SIZE - 1000)
 #define	TRX_SYS_MYSQL_LOG_MAGIC_N_FLD	0	/*!< magic number which is
@@ -633,13 +592,83 @@ identifier is added to this 64-bit constant. */
 
 #ifndef UNIV_HOTBACKUP
 /** The transaction system central memory data structure. */
-struct trx_sys_t{
+struct trx_sys_t {
 
-	ib_mutex_t		mutex;		/*!< mutex protecting most fields in
+	TrxSysMutex	mutex;		/*!< mutex protecting most fields in
 					this structure except when noted
 					otherwise */
+
+	MVCC*		mvcc;		/*!< Multi version concurrency control
+					manager */
+	volatile trx_id_t
+			max_trx_id;	/*!< The smallest number not yet
+					assigned as a transaction id or
+					transaction number. This is declared
+					volatile because it can be accessed
+					without holding any mutex during
+					AC-NL-RO view creation. */
+	trx_ut_list_t	serialisation_list;
+					/*!< Ordered on trx_t::no of all the
+					currenrtly active RW transactions */
+#ifdef UNIV_DEBUG
+	trx_id_t	rw_max_trx_id;	/*!< Max trx id of read-write
+					transactions which exist or existed */
+#endif /* UNIV_DEBUG */
+
+	char		pad1[64];	/*!< To avoid false sharing */
+	trx_ut_list_t	rw_trx_list;	/*!< List of active and committed in
+					memory read-write transactions, sorted
+					on trx id, biggest first. Recovered
+					transactions are always on this list. */
+
+	char		pad2[64];	/*!< To avoid false sharing */
+	trx_ut_list_t	mysql_trx_list;	/*!< List of transactions created
+					for MySQL. All user transactions are
+					on mysql_trx_list. The rw_trx_list
+					can contain system transactions and
+					recovered transactions that will not
+					be in the mysql_trx_list.
+					mysql_trx_list may additionally contain
+					transactions that have not yet been
+					started in InnoDB. */
+
+	trx_ids_t	rw_trx_ids;	/*!< Array of Read write transaction IDs
+					for MVCC snapshot. A ReadView would take
+					a snapshot of these transactions whose
+					changes are not visible to it. We should
+					remove transactions from the list before
+					committing in memory and releasing locks
+					to ensure right order of removal and
+					consistent snapshot. */
+
+	char		pad3[64];	/*!< To avoid false sharing */
+	trx_rseg_t*	rseg_array[TRX_SYS_N_RSEGS];
+					/*!< Pointer array to rollback
+					segments; NULL if slot not in use;
+					created and destroyed in
+					single-threaded mode; not protected
+					by any mutex, because it is read-only
+					during multi-threaded operation */
+	ulint		rseg_history_len;
+					/*!< Length of the TRX_RSEG_HISTORY
+					list (update undo logs for committed
+					transactions), protected by
+					rseg->mutex */
+
+	trx_rseg_t*	const pending_purge_rseg_array[TRX_SYS_N_RSEGS];
+					/*!< Pointer array to rollback segments
+					between slot-1..slot-srv_tmp_undo_logs
+					that are now replaced by non-redo
+					rollback segments. We need them for
+					scheduling purge if any of the rollback
+					segment has pending records to purge. */
+
+	TrxIdSet	rw_trx_set;	/*!< Mapping from transaction id
+					to transaction instance */
+
 	ulint		n_prepared_trx;	/*!< Number of transactions currently
 					in the XA PREPARED state */
+
 	ulint		n_prepared_recovered_trx; /*!< Number of transactions
 					currently in XA PREPARED state that are
 					also recovered. Such transactions cannot
@@ -648,58 +677,27 @@ struct trx_sys_t{
 					while there were XA PREPARED
 					transactions. We disable query cache
 					if such transactions exist. */
-	trx_id_t	max_trx_id;	/*!< The smallest number not yet
-					assigned as a transaction id or
-					transaction number */
-#ifdef UNIV_DEBUG
-	trx_id_t	rw_max_trx_id;	/*!< Max trx id of read-write transactions
-					which exist or existed */
-#endif
-	trx_list_t	rw_trx_list;	/*!< List of active and committed in
-					memory read-write transactions, sorted
-					on trx id, biggest first. Recovered
-					transactions are always on this list. */
-	trx_list_t	ro_trx_list;	/*!< List of active and committed in
-					memory read-only transactions, sorted
-					on trx id, biggest first. NOTE:
-					The order for read-only transactions
-					is not necessary. We should exploit
-					this and increase concurrency during
-					add/remove. */
-	trx_list_t	mysql_trx_list;	/*!< List of transactions created
-					for MySQL. All transactions on
-					ro_trx_list are on mysql_trx_list. The
-					rw_trx_list can contain system
-					transactions and recovered transactions
-					that will not be in the mysql_trx_list.
-					There can be active non-locking
-					auto-commit read only transactions that
-					are on this list but not on ro_trx_list.
-					mysql_trx_list may additionally contain
-					transactions that have not yet been
-					started in InnoDB. */
-	trx_rseg_t*	const rseg_array[TRX_SYS_N_RSEGS];
-					/*!< Pointer array to rollback
-					segments; NULL if slot not in use;
-					created and destroyed in
-					single-threaded mode; not protected
-					by any mutex, because it is read-only
-					during multi-threaded operation */
-	ulint		rseg_history_len;/*!< Length of the TRX_RSEG_HISTORY
-					list (update undo logs for committed
-					transactions), protected by
-					rseg->mutex */
-	UT_LIST_BASE_NODE_T(read_view_t) view_list;
-					/*!< List of read views sorted
-					on trx no, biggest first */
 };
 
 /** When a trx id which is zero modulo this number (which must be a power of
 two) is assigned, the field TRX_SYS_TRX_ID_STORE on the transaction system
 page is updated */
-#define TRX_SYS_TRX_ID_WRITE_MARGIN	256
+#define TRX_SYS_TRX_ID_WRITE_MARGIN	((trx_id_t) 256)
 #endif /* !UNIV_HOTBACKUP */
 
+/** Test if trx_sys->mutex is owned. */
+#define trx_sys_mutex_own() (trx_sys->mutex.is_owned())
+
+/** Acquire the trx_sys->mutex. */
+#define trx_sys_mutex_enter() do {			\
+	mutex_enter(&trx_sys->mutex);			\
+} while (0)
+
+/** Release the trx_sys->mutex. */
+#define trx_sys_mutex_exit() do {			\
+	trx_sys->mutex.exit();				\
+} while (0)
+
 #ifndef UNIV_NONINL
 #include "trx0sys.ic"
 #endif
diff --git a/storage/innobase/include/trx0sys.ic b/storage/innobase/include/trx0sys.ic
index 7265a97ae25..6158aea0c48 100644
--- a/storage/innobase/include/trx0sys.ic
+++ b/storage/innobase/include/trx0sys.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -47,63 +47,69 @@ typedef byte	trx_sysf_rseg_t;
 
 /*****************************************************************//**
 Writes the value of max_trx_id to the file based trx system header. */
-UNIV_INTERN
 void
 trx_sys_flush_max_trx_id(void);
 /*==========================*/
 
-/***************************************************************//**
-Checks if a page address is the trx sys header page.
-@return	TRUE if trx sys header page */
+/** Checks if a page address is the trx sys header page.
+@param[in]	page_id	page id
+@return true if trx sys header page */
 UNIV_INLINE
-ibool
+bool
 trx_sys_hdr_page(
-/*=============*/
-	ulint	space,	/*!< in: space */
-	ulint	page_no)/*!< in: page number */
+	const page_id_t&	page_id)
 {
-	if ((space == TRX_SYS_SPACE) && (page_no == TRX_SYS_PAGE_NO)) {
-
-		return(TRUE);
-	}
-
-	return(FALSE);
+	return(page_id.space() == TRX_SYS_SPACE
+	       && page_id.page_no() == TRX_SYS_PAGE_NO);
 }
 
 /***************************************************************//**
 Gets the pointer in the nth slot of the rseg array.
-@return	pointer to rseg object, NULL if slot not in use */
+@return pointer to rseg object, NULL if slot not in use */
 UNIV_INLINE
 trx_rseg_t*
 trx_sys_get_nth_rseg(
 /*=================*/
-	trx_sys_t*	sys,	/*!< in: trx system */
-	ulint		n)	/*!< in: index of slot */
+	trx_sys_t*	sys,		/*!< in: trx system */
+	ulint		n,		/*!< in: index of slot */
+	bool		is_redo_rseg)	/*!< in: true if redo rseg. */
 {
 	ut_ad(n < TRX_SYS_N_RSEGS);
 
+	/* If redo rseg is being requested and id falls in range of
+	non-redo rseg that is from slot-1....slot-srv_tmp_undo_logs then
+	server is being upgraded from pre-5.7.2. In such case return
+	rseg from pending_purge_rseg_array array. */
+	if (is_redo_rseg && trx_sys_is_noredo_rseg_slot(n)) {
+		ut_ad(trx_sys->pending_purge_rseg_array[n] != NULL);
+		return(trx_sys->pending_purge_rseg_array[n]);
+	}
+
 	return(sys->rseg_array[n]);
 }
 
 /**********************************************************************//**
 Gets a pointer to the transaction system header and x-latches its page.
-@return	pointer to system header, page x-latched. */
+@return pointer to system header, page x-latched. */
 UNIV_INLINE
 trx_sysf_t*
 trx_sysf_get(
 /*=========*/
 	mtr_t*	mtr)	/*!< in: mtr */
 {
-	buf_block_t*	block;
-	trx_sysf_t*	header;
+	buf_block_t*	block = NULL;
+	trx_sysf_t*	header = NULL;
 
 	ut_ad(mtr);
 
-	block = buf_page_get(TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO,
-			     RW_X_LATCH, mtr);
-	buf_block_dbg_add_level(block, SYNC_TRX_SYS_HEADER);
+	block = buf_page_get(page_id_t(TRX_SYS_SPACE, TRX_SYS_PAGE_NO),
+			     univ_page_size, RW_X_LATCH, mtr);
 
-	header = TRX_SYS + buf_block_get_frame(block);
+	if (block) {
+		buf_block_dbg_add_level(block, SYNC_TRX_SYS_HEADER);
+
+		header = TRX_SYS + buf_block_get_frame(block);
+	}
 
 	return(header);
 }
@@ -111,7 +117,7 @@ trx_sysf_get(
 /*****************************************************************//**
 Gets the space of the nth rollback segment slot in the trx system
 file copy.
-@return	space id */
+@return space id */
 UNIV_INLINE
 ulint
 trx_sysf_rseg_get_space(
@@ -131,7 +137,7 @@ trx_sysf_rseg_get_space(
 /*****************************************************************//**
 Gets the page number of the nth rollback segment slot in the trx system
 header.
-@return	page number, FIL_NULL if slot unused */
+@return page number, FIL_NULL if slot unused */
 UNIV_INLINE
 ulint
 trx_sysf_rseg_get_page_no(
@@ -208,15 +214,28 @@ trx_write_trx_id(
 #if DATA_TRX_ID_LEN != 6
 # error "DATA_TRX_ID_LEN != 6"
 #endif
+	ut_ad(id > 0);
 	mach_write_to_6(ptr, id);
 }
 
+/*****************************************************************//**
+Check if slot-id is reserved slot-id for noredo rsegs. */
+UNIV_INLINE
+bool
+trx_sys_is_noredo_rseg_slot(
+/*========================*/
+	ulint	slot_id)	/*!< in: slot_id to check */
+{
+	/* Slots allocated from temp-tablespace are no-redo slots. */
+	return(slot_id > 0 && slot_id < (srv_tmp_undo_logs + 1));
+}
+
 #ifndef UNIV_HOTBACKUP
 /*****************************************************************//**
 Reads a trx id from an index page. In case that the id size changes in
 some future version, this function should be used instead of
 mach_read_...
-@return	id */
+@return id */
 UNIV_INLINE
 trx_id_t
 trx_read_trx_id(
@@ -232,7 +251,7 @@ trx_read_trx_id(
 /****************************************************************//**
 Looks for the trx handle with the given id in rw_trx_list.
 The caller must be holding trx_sys->mutex.
-@return	the trx handle or NULL if not found;
+@return the trx handle or NULL if not found;
 the pointer must not be dereferenced unless lock_sys->mutex was
 acquired before calling this function and is still being held */
 UNIV_INLINE
@@ -241,57 +260,18 @@ trx_get_rw_trx_by_id(
 /*=================*/
 	trx_id_t	trx_id)	/*!< in: trx id to search for */
 {
-	trx_t*		trx;
-	ulint		len;
-	trx_t*		first;
+	ut_ad(trx_id > 0);
+	ut_ad(trx_sys_mutex_own());
 
-	ut_ad(mutex_own(&trx_sys->mutex));
-
-	len = UT_LIST_GET_LEN(trx_sys->rw_trx_list);
-
-	if (len == 0) {
+	if (trx_sys->rw_trx_set.empty()) {
 		return(NULL);
 	}
 
-	/* Because the list is ordered on trx id in descending order,
-	we try to speed things up a bit. */
+	TrxIdSet::iterator	it;
 
-	trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
-	assert_trx_in_rw_list(trx);
+	it = trx_sys->rw_trx_set.find(TrxTrack(trx_id));
 
-	if (trx_id == trx->id) {
-		return(trx);
-	} else if (len == 1 || trx_id > trx->id) {
-		return(NULL);
-	}
-
-	first = trx;
-
-	trx = UT_LIST_GET_LAST(trx_sys->rw_trx_list);
-	assert_trx_in_rw_list(trx);
-
-	if (trx_id == trx->id) {
-		return(trx);
-	} else if (len == 2 || trx_id < trx->id) {
-		return(NULL);
-	}
-
-	/* Search the list from the lower end (tail). */
-	if (trx_id < (first->id + trx->id) >> 1) {
-		for (trx = UT_LIST_GET_PREV(trx_list, trx);
-		     trx != NULL && trx_id > trx->id;
-		     trx = UT_LIST_GET_PREV(trx_list, trx)) {
-			assert_trx_in_rw_list(trx);
-		}
-	} else {
-		for (trx = UT_LIST_GET_NEXT(trx_list, first);
-		     trx != NULL && trx_id < trx->id;
-		     trx = UT_LIST_GET_NEXT(trx_list, trx)) {
-			assert_trx_in_rw_list(trx);
-		}
-	}
-
-	return((trx != NULL && trx->id == trx_id) ? trx : NULL);
+	return(it == trx_sys->rw_trx_set.end() ? NULL : it->m_trx);
 }
 
 /****************************************************************//**
@@ -299,18 +279,17 @@ Returns the minimum trx id in trx list. This is the smallest id for which
 the trx can possibly be active. (But, you must look at the trx->state
 to find out if the minimum trx id transaction itself is active, or already
 committed.). The caller must be holding the trx_sys_t::mutex in shared mode.
-@return	the minimum trx id, or trx_sys->max_trx_id if the trx list is empty */
+@return the minimum trx id, or trx_sys->max_trx_id if the trx list is empty */
 UNIV_INLINE
 trx_id_t
 trx_rw_min_trx_id_low(void)
 /*=======================*/
 {
 	trx_id_t	id;
-	const trx_t*	trx;
 
-	ut_ad(mutex_own(&trx_sys->mutex));
+	ut_ad(trx_sys_mutex_own());
 
-	trx = UT_LIST_GET_LAST(trx_sys->rw_trx_list);
+	const trx_t*	trx = UT_LIST_GET_LAST(trx_sys->rw_trx_list);
 
 	if (trx == NULL) {
 		id = trx_sys->max_trx_id;
@@ -334,12 +313,12 @@ trx_assert_recovered(
 {
 	const trx_t*	trx;
 
-	mutex_enter(&trx_sys->mutex);
+	trx_sys_mutex_enter();
 
 	trx = trx_get_rw_trx_by_id(trx_id);
 	ut_a(trx->is_recovered);
 
-	mutex_exit(&trx_sys->mutex);
+	trx_sys_mutex_exit();
 
 	return(TRUE);
 }
@@ -350,30 +329,25 @@ Returns the minimum trx id in rw trx list. This is the smallest id for which
 the rw trx can possibly be active. (But, you must look at the trx->state
 to find out if the minimum trx id transaction itself is active, or already
 committed.)
-@return	the minimum trx id, or trx_sys->max_trx_id if rw trx list is empty */
+@return the minimum trx id, or trx_sys->max_trx_id if rw trx list is empty */
 UNIV_INLINE
 trx_id_t
 trx_rw_min_trx_id(void)
 /*===================*/
 {
-	trx_id_t	id;
+	trx_sys_mutex_enter();
 
-	mutex_enter(&trx_sys->mutex);
+	trx_id_t	id = trx_rw_min_trx_id_low();
 
-	id = trx_rw_min_trx_id_low();
-
-	mutex_exit(&trx_sys->mutex);
+	trx_sys_mutex_exit();
 
 	return(id);
 }
 
 /****************************************************************//**
-Checks if a rw transaction with the given id is active. Caller must hold
-trx_sys->mutex. If the caller is not holding lock_sys->mutex, the
-transaction may already have been committed.
-@return	transaction instance if active, or NULL;
-the pointer must not be dereferenced unless lock_sys->mutex was
-acquired before calling this function and is still being held */
+Checks if a rw transaction with the given id is active.  If the caller is
+not holding lock_sys->mutex, the transaction may already have been committed.
+@return transaction instance if active, or NULL */
 UNIV_INLINE
 trx_t*
 trx_rw_is_active_low(
@@ -384,7 +358,7 @@ trx_rw_is_active_low(
 {
 	trx_t*		trx;
 
-	ut_ad(mutex_own(&trx_sys->mutex));
+	ut_ad(trx_sys_mutex_own());
 
 	if (trx_id < trx_rw_min_trx_id_low()) {
 
@@ -415,39 +389,43 @@ trx_rw_is_active_low(
 Checks if a rw transaction with the given id is active. If the caller is
 not holding lock_sys->mutex, the transaction may already have been
 committed.
-@return	transaction instance if active, or NULL;
-the pointer must not be dereferenced unless lock_sys->mutex was
-acquired before calling this function and is still being held */
+@return transaction instance if active, or NULL; */
 UNIV_INLINE
 trx_t*
 trx_rw_is_active(
 /*=============*/
 	trx_id_t	trx_id,		/*!< in: trx id of the transaction */
-	ibool*		corrupt)	/*!< in: NULL or pointer to a flag
+	ibool*		corrupt,	/*!< in: NULL or pointer to a flag
 					that will be set if corrupt */
+	bool		do_ref_count)	/*!< in: if true then increment the
+					trx_t::n_ref_count */
 {
 	trx_t*		trx;
 
-	mutex_enter(&trx_sys->mutex);
+	trx_sys_mutex_enter();
 
 	trx = trx_rw_is_active_low(trx_id, corrupt);
 
-	mutex_exit(&trx_sys->mutex);
+	if (trx != 0) {
+		trx = trx_reference(trx, do_ref_count);
+	}
+
+	trx_sys_mutex_exit();
 
 	return(trx);
 }
 
 /*****************************************************************//**
 Allocates a new transaction id.
-@return	new, allocated trx id */
+@return new, allocated trx id */
 UNIV_INLINE
 trx_id_t
-trx_sys_get_new_trx_id(void)
-/*========================*/
+trx_sys_get_new_trx_id()
+/*====================*/
 {
 #ifndef WITH_WSREP
 	/* wsrep_fake_trx_id  violates this assert */
-	ut_ad(mutex_own(&trx_sys->mutex));
+	ut_ad(trx_sys_mutex_own());
 #endif /* WITH_WSREP */
 
 	/* VERY important: after the database is started, max_trx_id value is
@@ -457,7 +435,7 @@ trx_sys_get_new_trx_id(void)
 	Thus trx id values will not overlap when the database is
 	repeatedly started! */
 
-	if (!(trx_sys->max_trx_id % (trx_id_t) TRX_SYS_TRX_ID_WRITE_MARGIN)) {
+	if (!(trx_sys->max_trx_id % TRX_SYS_TRX_ID_WRITE_MARGIN)) {
 
 		trx_sys_flush_max_trx_id();
 	}
@@ -474,24 +452,24 @@ trx_id_t
 trx_sys_get_max_trx_id(void)
 /*========================*/
 {
-#if UNIV_WORD_SIZE < DATA_TRX_ID_LEN
-	trx_id_t	max_trx_id;
-#endif
-
-	ut_ad(!mutex_own(&trx_sys->mutex));
+	ut_ad(!trx_sys_mutex_own());
 
 #if UNIV_WORD_SIZE < DATA_TRX_ID_LEN
 	/* Avoid torn reads. */
-	mutex_enter(&trx_sys->mutex);
-	max_trx_id = trx_sys->max_trx_id;
-	mutex_exit(&trx_sys->mutex);
+
+	trx_sys_mutex_enter();
+
+	trx_id_t	max_trx_id = trx_sys->max_trx_id;
+
+	trx_sys_mutex_exit();
+
 	return(max_trx_id);
 #else
 	/* Perform a dirty read. Callers should be prepared for stale
 	values, and we know that the value fits in a machine word, so
 	that it will be read and written atomically. */
 	return(trx_sys->max_trx_id);
-#endif
+#endif /* UNIV_WORD_SIZE < DATA_TRX_ID_LEN */
 }
 
 /*****************************************************************//**
@@ -504,12 +482,26 @@ trx_sys_get_n_rw_trx(void)
 {
 	ulint	n_trx;
 
-	mutex_enter(&trx_sys->mutex);
+	trx_sys_mutex_enter();
 
 	n_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list);
 
-	mutex_exit(&trx_sys->mutex);
+	trx_sys_mutex_exit();
 
 	return(n_trx);
 }
+
+/**
+Add the transaction to the RW transaction set
+@param trx		transaction instance to add */
+UNIV_INLINE
+void
+trx_sys_rw_trx_add(trx_t* trx)
+{
+	ut_ad(trx->id != 0);
+
+	trx_sys->rw_trx_set.insert(TrxTrack(trx->id, trx));
+	ut_d(trx->in_rw_trx_list = true);
+}
+
 #endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h
index add5e311957..839c3d057e7 100644
--- a/storage/innobase/include/trx0trx.h
+++ b/storage/innobase/include/trx0trx.h
@@ -26,33 +26,55 @@ Created 3/26/1996 Heikki Tuuri
 #ifndef trx0trx_h
 #define trx0trx_h
 
-#include "univ.i"
-#include "trx0types.h"
+#include <set>
+#include <list>
+
+#include "ha_prototypes.h"
+
 #include "dict0types.h"
+#include "trx0types.h"
+#include "ut0new.h"
+
 #ifndef UNIV_HOTBACKUP
 #include "lock0types.h"
 #include "log0log.h"
 #include "usr0types.h"
 #include "que0types.h"
 #include "mem0mem.h"
-#include "read0types.h"
 #include "trx0xa.h"
 #include "ut0vec.h"
 #include "fts0fts.h"
+#include "srv0srv.h"
+
+// Forward declaration
+struct mtr_t;
+
+// Forward declaration
+class ReadView;
+
+// Forward declaration
+class FlushObserver;
 
 /** Dummy session used currently in MySQL interface */
 extern sess_t*	trx_dummy_sess;
 
-/********************************************************************//**
-Releases the search latch if trx has reserved it. */
+/**
+Releases the search latch if trx has reserved it.
+@param[in,out] trx		Transaction that may own the AHI latch */
 UNIV_INLINE
 void
-trx_search_latch_release_if_reserved(
-/*=================================*/
-	trx_t*		trx); /*!< in: transaction */
+trx_search_latch_release_if_reserved(trx_t* trx);
+
+/** Set flush observer for the transaction
+@param[in/out]	trx		transaction struct
+@param[in]	observer	flush observer */
+void
+trx_set_flush_observer(
+	trx_t*		trx,
+	FlushObserver*	observer);
+
 /******************************************************************//**
 Set detailed error message for the transaction. */
-UNIV_INTERN
 void
 trx_set_detailed_error(
 /*===================*/
@@ -61,7 +83,6 @@ trx_set_detailed_error(
 /*************************************************************//**
 Set detailed error message for the transaction from a file. Note that the
 file is rewinded before reading from it. */
-UNIV_INTERN
 void
 trx_set_detailed_error_from_file(
 /*=============================*/
@@ -69,7 +90,7 @@ trx_set_detailed_error_from_file(
 	FILE*	file);	/*!< in: file to read message from */
 /****************************************************************//**
 Retrieves the error_info field from a trx.
-@return	the error info */
+@return the error info */
 UNIV_INLINE
 const dict_index_t*
 trx_get_error_info(
@@ -77,108 +98,145 @@ trx_get_error_info(
 	const trx_t*	trx);	/*!< in: trx object */
 /********************************************************************//**
 Creates a transaction object for MySQL.
-@return	own: transaction object */
-UNIV_INTERN
+@return own: transaction object */
 trx_t*
 trx_allocate_for_mysql(void);
 /*========================*/
 /********************************************************************//**
 Creates a transaction object for background operations by the master thread.
-@return	own: transaction object */
-UNIV_INTERN
+@return own: transaction object */
 trx_t*
 trx_allocate_for_background(void);
 /*=============================*/
-/********************************************************************//**
-Frees a transaction object of a background operation of the master thread. */
-UNIV_INTERN
+
+/** Frees and initialize a transaction object instantinated during recovery.
+@param trx trx object to free and initialize during recovery */
 void
-trx_free_for_background(
-/*====================*/
-	trx_t*	trx);	/*!< in, own: trx object */
+trx_free_resurrected(trx_t* trx);
+
+/** Free a transaction that was allocated by background or user threads.
+@param trx trx object to free */
+void
+trx_free_for_background(trx_t* trx);
+
 /********************************************************************//**
 At shutdown, frees a transaction object that is in the PREPARED state. */
-UNIV_INTERN
 void
 trx_free_prepared(
 /*==============*/
-	trx_t*	trx)	/*!< in, own: trx object */
-	UNIV_COLD MY_ATTRIBUTE((nonnull));
-/********************************************************************//**
-Frees a transaction object for MySQL. */
-UNIV_INTERN
-void
-trx_free_for_mysql(
-/*===============*/
 	trx_t*	trx);	/*!< in, own: trx object */
+
+/** Free a transaction object for MySQL.
+@param[in,out]	trx	transaction */
+void
+trx_free_for_mysql(trx_t*	trx);
+
+/** Disconnect a transaction from MySQL.
+@param[in,out]	trx	transaction */
+void
+trx_disconnect_plain(trx_t*	trx);
+
+/** Disconnect a prepared transaction from MySQL.
+@param[in,out]	trx	transaction */
+void
+trx_disconnect_prepared(trx_t*	trx);
+
 /****************************************************************//**
 Creates trx objects for transactions and initializes the trx list of
 trx_sys at database start. Rollback segment and undo log lists must
 already exist when this function is called, because the lists of
 transactions to be rolled back or cleaned up are built based on the
 undo log lists. */
-UNIV_INTERN
 void
 trx_lists_init_at_db_start(void);
 /*============================*/
 
-#ifdef UNIV_DEBUG
-#define trx_start_if_not_started_xa(t)				\
-	{							\
-	(t)->start_line = __LINE__;				\
-	(t)->start_file = __FILE__;				\
-	trx_start_if_not_started_xa_low((t));			\
-	}
-#else
-#define trx_start_if_not_started_xa(t)				\
-	trx_start_if_not_started_xa_low((t))
-#endif /* UNIV_DEBUG */
-
 /*************************************************************//**
 Starts the transaction if it is not yet started. */
-UNIV_INTERN
 void
 trx_start_if_not_started_xa_low(
 /*============================*/
-	trx_t*	trx);	/*!< in: transaction */
+	trx_t*	trx,		/*!< in/out: transaction */
+	bool	read_write);	/*!< in: true if read write transaction */
 /*************************************************************//**
 Starts the transaction if it is not yet started. */
-UNIV_INTERN
 void
 trx_start_if_not_started_low(
 /*=========================*/
-	trx_t*	trx);	/*!< in: transaction */
+	trx_t*	trx,		/*!< in/out: transaction */
+	bool	read_write);	/*!< in: true if read write transaction */
+
+/*************************************************************//**
+Starts a transaction for internal processing. */
+void
+trx_start_internal_low(
+/*===================*/
+	trx_t*	trx);		/*!< in/out: transaction */
+
+/** Starts a read-only transaction for internal processing.
+@param[in,out] trx	transaction to be started */
+void
+trx_start_internal_read_only_low(
+	trx_t*	trx);
 
 #ifdef UNIV_DEBUG
-#define trx_start_if_not_started(t)				\
-	{							\
+#define trx_start_if_not_started_xa(t, rw)			\
+	do {							\
 	(t)->start_line = __LINE__;				\
 	(t)->start_file = __FILE__;				\
-	trx_start_if_not_started_low((t));			\
-	}
+	trx_start_if_not_started_xa_low((t), rw);		\
+	} while (false)
+
+#define trx_start_if_not_started(t, rw)				\
+	do {							\
+	(t)->start_line = __LINE__;				\
+	(t)->start_file = __FILE__;				\
+	trx_start_if_not_started_low((t), rw);			\
+	} while (false)
+
+#define trx_start_internal(t)					\
+	do {							\
+	(t)->start_line = __LINE__;				\
+	(t)->start_file = __FILE__;				\
+	trx_start_internal_low((t));				\
+	} while (false)
+
+#define trx_start_internal_read_only(t)				\
+	do {							\
+	(t)->start_line = __LINE__;				\
+	(t)->start_file = __FILE__;				\
+	trx_start_internal_read_only_low(t);			\
+	} while (false)
 #else
-#define trx_start_if_not_started(t)				\
-	trx_start_if_not_started_low((t))
+#define trx_start_if_not_started(t, rw)				\
+	trx_start_if_not_started_low((t), rw)
+
+#define trx_start_internal(t)					\
+	trx_start_internal_low((t))
+
+#define trx_start_internal_read_only(t)				\
+	trx_start_internal_read_only_low(t)
+
+#define trx_start_if_not_started_xa(t, rw)			\
+	trx_start_if_not_started_xa_low((t), (rw))
 #endif /* UNIV_DEBUG */
 
 /*************************************************************//**
 Starts the transaction for a DDL operation. */
-UNIV_INTERN
 void
 trx_start_for_ddl_low(
 /*==================*/
 	trx_t*		trx,	/*!< in/out: transaction */
-	trx_dict_op_t	op)	/*!< in: dictionary operation type */
-	MY_ATTRIBUTE((nonnull));
+	trx_dict_op_t	op);	/*!< in: dictionary operation type */
 
 #ifdef UNIV_DEBUG
 #define trx_start_for_ddl(t, o)					\
-	{							\
+	do {							\
 	ut_ad((t)->start_file == 0);				\
 	(t)->start_line = __LINE__;				\
 	(t)->start_file = __FILE__;				\
 	trx_start_for_ddl_low((t), (o));			\
-	}
+	} while (0)
 #else
 #define trx_start_for_ddl(t, o)					\
 	trx_start_for_ddl_low((t), (o))
@@ -186,51 +244,46 @@ trx_start_for_ddl_low(
 
 /****************************************************************//**
 Commits a transaction. */
-UNIV_INTERN
 void
 trx_commit(
 /*=======*/
-	trx_t*	trx)	/*!< in/out: transaction */
-	MY_ATTRIBUTE((nonnull));
+	trx_t*	trx);	/*!< in/out: transaction */
+
 /****************************************************************//**
 Commits a transaction and a mini-transaction. */
-UNIV_INTERN
 void
 trx_commit_low(
 /*===========*/
 	trx_t*	trx,	/*!< in/out: transaction */
-	mtr_t*	mtr)	/*!< in/out: mini-transaction (will be committed),
+	mtr_t*	mtr);	/*!< in/out: mini-transaction (will be committed),
 			or NULL if trx made no modifications */
-	MY_ATTRIBUTE((nonnull(1)));
 /****************************************************************//**
 Cleans up a transaction at database startup. The cleanup is needed if
 the transaction already got to the middle of a commit when the database
 crashed, and we cannot roll it back. */
-UNIV_INTERN
 void
 trx_cleanup_at_db_startup(
 /*======================*/
 	trx_t*	trx);	/*!< in: transaction */
 /**********************************************************************//**
 Does the transaction commit for MySQL.
-@return	DB_SUCCESS or error number */
-UNIV_INTERN
+@return DB_SUCCESS or error number */
 dberr_t
 trx_commit_for_mysql(
 /*=================*/
 	trx_t*	trx);	/*!< in/out: transaction */
-/**********************************************************************//**
-Does the transaction prepare for MySQL. */
-UNIV_INTERN
-void
-trx_prepare_for_mysql(
-/*==================*/
-	trx_t*	trx);	/*!< in/out: trx handle */
+
+/**
+Does the transaction prepare for MySQL.
+@param[in, out] trx		Transaction instance to prepare */
+
+dberr_t
+trx_prepare_for_mysql(trx_t* trx);
+
 /**********************************************************************//**
 This function is used to find number of prepared transactions and
 their transaction objects for a recovery.
-@return	number of prepared transactions */
-UNIV_INTERN
+@return number of prepared transactions */
 int
 trx_recover_for_mysql(
 /*==================*/
@@ -239,26 +292,22 @@ trx_recover_for_mysql(
 /*******************************************************************//**
 This function is used to find one X/Open XA distributed transaction
 which is in the prepared state
-@return	trx or NULL; on match, the trx->xid will be invalidated;
+@return trx or NULL; on match, the trx->xid will be invalidated;
 note that the trx may have been committed, unless the caller is
 holding lock_sys->mutex */
-UNIV_INTERN
 trx_t *
 trx_get_trx_by_xid(
 /*===============*/
-	const XID*	xid);	/*!< in: X/Open XA transaction identifier */
+	XID*	xid);	/*!< in: X/Open XA transaction identifier */
 /**********************************************************************//**
 If required, flushes the log to disk if we called trx_commit_for_mysql()
 with trx->flush_log_later == TRUE. */
-UNIV_INTERN
 void
 trx_commit_complete_for_mysql(
 /*==========================*/
-	trx_t*	trx)	/*!< in/out: transaction */
-	MY_ATTRIBUTE((nonnull));
+	trx_t*	trx);	/*!< in/out: transaction */
 /**********************************************************************//**
 Marks the latest SQL statement ended. */
-UNIV_INTERN
 void
 trx_mark_sql_stat_end(
 /*==================*/
@@ -266,32 +315,44 @@ trx_mark_sql_stat_end(
 /********************************************************************//**
 Assigns a read view for a consistent read query. All the consistent reads
 within the same transaction will get the same read view, which is created
-when this function is first called for a new started transaction.
-@return	consistent read view */
-UNIV_INTERN
-read_view_t*
+when this function is first called for a new started transaction. */
+ReadView*
 trx_assign_read_view(
 /*=================*/
 	trx_t*	trx);	/*!< in: active transaction */
+
+/****************************************************************//**
+@return the transaction's read view or NULL if one not assigned. */
+UNIV_INLINE
+ReadView*
+trx_get_read_view(
+/*==============*/
+	trx_t*	trx);
+
+/****************************************************************//**
+@return the transaction's read view or NULL if one not assigned. */
+UNIV_INLINE
+const ReadView*
+trx_get_read_view(
+/*==============*/
+	const trx_t*	trx);
+
 /****************************************************************//**
 Prepares a transaction for commit/rollback. */
-UNIV_INTERN
 void
 trx_commit_or_rollback_prepare(
 /*===========================*/
 	trx_t*	trx);	/*!< in/out: transaction */
 /*********************************************************************//**
 Creates a commit command node struct.
-@return	own: commit node struct */
-UNIV_INTERN
+@return own: commit node struct */
 commit_node_t*
 trx_commit_node_create(
 /*===================*/
 	mem_heap_t*	heap);	/*!< in: mem heap where created */
 /***********************************************************//**
 Performs an execution step for a commit type node in a query graph.
-@return	query thread to run next, or NULL */
-UNIV_INTERN
+@return query thread to run next, or NULL */
 que_thr_t*
 trx_commit_step(
 /*============*/
@@ -300,7 +361,6 @@ trx_commit_step(
 /**********************************************************************//**
 Prints info about a transaction.
 Caller must hold trx_sys->mutex. */
-UNIV_INTERN
 void
 trx_print_low(
 /*==========*/
@@ -315,46 +375,41 @@ trx_print_low(
 			/*!< in: lock_number_of_rows_locked(&trx->lock) */
 	ulint		n_trx_locks,
 			/*!< in: length of trx->lock.trx_locks */
-	ulint		heap_size)
+	ulint		heap_size);
 			/*!< in: mem_heap_get_size(trx->lock.lock_heap) */
-	MY_ATTRIBUTE((nonnull));
 
 /**********************************************************************//**
 Prints info about a transaction.
 The caller must hold lock_sys->mutex and trx_sys->mutex.
 When possible, use trx_print() instead. */
-UNIV_INTERN
 void
 trx_print_latched(
 /*==============*/
 	FILE*		f,		/*!< in: output stream */
 	const trx_t*	trx,		/*!< in: transaction */
-	ulint		max_query_len)	/*!< in: max query length to print,
+	ulint		max_query_len);	/*!< in: max query length to print,
 					or 0 to use the default max length */
-	MY_ATTRIBUTE((nonnull));
 
 /**********************************************************************//**
 Prints info about a transaction.
 Acquires and releases lock_sys->mutex and trx_sys->mutex. */
-UNIV_INTERN
 void
 trx_print(
 /*======*/
 	FILE*		f,		/*!< in: output stream */
 	const trx_t*	trx,		/*!< in: transaction */
-	ulint		max_query_len)	/*!< in: max query length to print,
+	ulint		max_query_len);	/*!< in: max query length to print,
 					or 0 to use the default max length */
-	MY_ATTRIBUTE((nonnull));
 
 /**********************************************************************//**
 Determine if a transaction is a dictionary operation.
-@return	dictionary operation mode */
+@return dictionary operation mode */
 UNIV_INLINE
 enum trx_dict_op_t
 trx_get_dict_operation(
 /*===================*/
 	const trx_t*	trx)	/*!< in: transaction */
-	MY_ATTRIBUTE((pure));
+	MY_ATTRIBUTE((warn_unused_result));
 /**********************************************************************//**
 Flag a transaction a dictionary operation. */
 UNIV_INLINE
@@ -370,45 +425,37 @@ trx_set_dict_operation(
 Determines if a transaction is in the given state.
 The caller must hold trx_sys->mutex, or it must be the thread
 that is serving a running transaction.
-A running transaction must be in trx_sys->ro_trx_list or trx_sys->rw_trx_list
-unless it is a non-locking autocommit read only transaction, which is only
-in trx_sys->mysql_trx_list.
-@return	TRUE if trx->state == state */
+A running RW transaction must be in trx_sys->rw_trx_list.
+@return TRUE if trx->state == state */
 UNIV_INLINE
-ibool
+bool
 trx_state_eq(
 /*=========*/
 	const trx_t*	trx,	/*!< in: transaction */
-	trx_state_t	state)	/*!< in: state;
-				if state != TRX_STATE_NOT_STARTED
-				asserts that
-				trx->state != TRX_STATE_NOT_STARTED */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	trx_state_t	state)	/*!< in: state */
+	MY_ATTRIBUTE((warn_unused_result));
 # ifdef UNIV_DEBUG
 /**********************************************************************//**
 Asserts that a transaction has been started.
 The caller must hold trx_sys->mutex.
 @return TRUE if started */
-UNIV_INTERN
 ibool
 trx_assert_started(
 /*===============*/
 	const trx_t*	trx)	/*!< in: transaction */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 # endif /* UNIV_DEBUG */
 
 /**********************************************************************//**
 Determines if the currently running transaction has been interrupted.
-@return	TRUE if interrupted */
-UNIV_INTERN
+@return TRUE if interrupted */
 ibool
 trx_is_interrupted(
 /*===============*/
 	const trx_t*	trx);	/*!< in: transaction */
 /**********************************************************************//**
 Determines if the currently running transaction is in strict mode.
-@return	TRUE if strict */
-UNIV_INTERN
+@return TRUE if strict */
 ibool
 trx_is_strict(
 /*==========*/
@@ -420,22 +467,20 @@ trx_is_strict(
 /*******************************************************************//**
 Calculates the "weight" of a transaction. The weight of one transaction
 is estimated as the number of altered rows + the number of locked rows.
-@param t	transaction
-@return		transaction weight */
+@param t transaction
+@return transaction weight */
 #define TRX_WEIGHT(t)	((t)->undo_no + UT_LIST_GET_LEN((t)->lock.trx_locks))
 
 /*******************************************************************//**
 Compares the "weight" (or size) of two transactions. Transactions that
 have edited non-transactional tables are considered heavier than ones
 that have not.
-@return	TRUE if weight(a) >= weight(b) */
-UNIV_INTERN
-ibool
+@return true if weight(a) >= weight(b) */
+bool
 trx_weight_ge(
 /*==========*/
-	const trx_t*	a,	/*!< in: the first transaction to be compared */
-	const trx_t*	b);	/*!< in: the second transaction to be compared */
-
+	const trx_t*	a,	/*!< in: the transaction to be compared */
+	const trx_t*	b);	/*!< in: the transaction to be compared */
 /* Maximum length of a string that can be returned by
 trx_get_que_state_str(). */
 #define TRX_QUE_STATE_STR_MAX_LEN	12 /* "ROLLING BACK" */
@@ -443,64 +488,145 @@ trx_get_que_state_str(). */
 /*******************************************************************//**
 Retrieves transaction's que state in a human readable string. The string
 should not be free()'d or modified.
-@return	string in the data segment */
+@return string in the data segment */
 UNIV_INLINE
 const char*
 trx_get_que_state_str(
 /*==================*/
 	const trx_t*	trx);	/*!< in: transaction */
 
+/** Retreieves the transaction ID.
+In a given point in time it is guaranteed that IDs of the running
+transactions are unique. The values returned by this function for readonly
+transactions may be reused, so a subsequent RO transaction may get the same ID
+as a RO transaction that existed in the past. The values returned by this
+function should be used for printing purposes only.
+@param[in]	trx	transaction whose id to retrieve
+@return transaction id */
+UNIV_INLINE
+trx_id_t
+trx_get_id_for_print(
+	const trx_t*	trx);
+
 /****************************************************************//**
-Assign a read-only transaction a rollback-segment, if it is attempting
-to write to a TEMPORARY table. */
-UNIV_INTERN
+Assign a transaction temp-tablespace bound rollback-segment. */
 void
 trx_assign_rseg(
 /*============*/
-	trx_t*		trx);		/*!< A read-only transaction that
-					needs to be assigned a RBS. */
-/*******************************************************************//**
+	trx_t*		trx);		/*!< transaction that involves write
+					to temp-table. */
+
+/** Create the trx_t pool */
+void
+trx_pool_init();
+
+/** Destroy the trx_t pool */
+void
+trx_pool_close();
+
+/**
+Set the transaction as a read-write transaction if it is not already
+tagged as such.
+@param[in,out] trx	Transaction that needs to be "upgraded" to RW from RO */
+void
+trx_set_rw_mode(
+	trx_t*		trx);
+
+/**
+Increase the reference count. If the transaction is in state
+TRX_STATE_COMMITTED_IN_MEMORY then the transaction is considered
+committed and the reference count is not incremented.
+@param trx Transaction that is being referenced
+@param do_ref_count Increment the reference iff this is true
+@return transaction instance if it is not committed */
+UNIV_INLINE
+trx_t*
+trx_reference(
+	trx_t*		trx,
+	bool		do_ref_count);
+
+/**
+Release the transaction. Decrease the reference count.
+@param trx Transaction that is being released */
+UNIV_INLINE
+void
+trx_release_reference(
+	trx_t*		trx);
+
+/**
+Check if the transaction is being referenced. */
+#define trx_is_referenced(t)	((t)->n_ref > 0)
+
+/**
+@param[in] requestor	Transaction requesting the lock
+@param[in] holder	Transaction holding the lock
+@return the transaction that will be rolled back, null don't care */
+
+UNIV_INLINE
+const trx_t*
+trx_arbitrate(const trx_t* requestor, const trx_t* holder);
+
+/**
+@param[in] trx		Transaction to check
+@return true if the transaction is a high priority transaction.*/
+UNIV_INLINE
+bool
+trx_is_high_priority(const trx_t* trx);
+
+/**
+Kill all transactions that are blocking this transaction from acquiring locks.
+@param[in,out] trx	High priority transaction */
+
+void
+trx_kill_blocking(trx_t* trx);
+
+/**
+Check if redo/noredo rseg is modified for insert/update.
+@param[in] trx		Transaction to check */
+UNIV_INLINE
+bool
+trx_is_rseg_updated(const trx_t* trx);
+
+/**
 Transactions that aren't started by the MySQL server don't set
 the trx_t::mysql_thd field. For such transactions we set the lock
 wait timeout to 0 instead of the user configured value that comes
 from innodb_lock_wait_timeout via trx_t::mysql_thd.
-@param trx	transaction
-@return		lock wait timeout in seconds */
-#define trx_lock_wait_timeout_get(trx)					\
-	((trx)->mysql_thd != NULL					\
-	 ? thd_lock_wait_timeout((trx)->mysql_thd)			\
+@param trx transaction
+@return lock wait timeout in seconds */
+#define trx_lock_wait_timeout_get(t)					\
+	((t)->mysql_thd != NULL						\
+	 ? thd_lock_wait_timeout((t)->mysql_thd)			\
 	 : 0)
 
-/*******************************************************************//**
+/**
 Determine if the transaction is a non-locking autocommit select
 (implied read-only).
-@param t	transaction
-@return true	if non-locking autocommit select transaction. */
+@param t transaction
+@return true if non-locking autocommit select transaction. */
 #define trx_is_autocommit_non_locking(t)				\
 ((t)->auto_commit && (t)->will_lock == 0)
 
-/*******************************************************************//**
+/**
 Determine if the transaction is a non-locking autocommit select
 with an explicit check for the read-only status.
-@param t	transaction
-@return true	if non-locking autocommit read-only transaction. */
+@param t transaction
+@return true if non-locking autocommit read-only transaction. */
 #define trx_is_ac_nl_ro(t)						\
 ((t)->read_only && trx_is_autocommit_non_locking((t)))
 
-/*******************************************************************//**
+/**
 Assert that the transaction is in the trx_sys_t::rw_trx_list */
 #define assert_trx_in_rw_list(t) do {					\
 	ut_ad(!(t)->read_only);						\
-	assert_trx_in_list(t);						\
+	ut_ad((t)->in_rw_trx_list					\
+	      == !((t)->read_only || !(t)->rsegs.m_redo.rseg));		\
+	check_trx_state(t);						\
 } while (0)
 
-/*******************************************************************//**
-Assert that the transaction is either in trx_sys->ro_trx_list or
-trx_sys->rw_trx_list but not both and it cannot be an autocommit
-non-locking select */
-#define assert_trx_in_list(t) do {					\
-	ut_ad((t)->in_ro_trx_list == (t)->read_only);			\
-	ut_ad((t)->in_rw_trx_list == !(t)->read_only);			\
+/**
+Check transaction state */
+#define check_trx_state(t) do {						\
 	ut_ad(!trx_is_autocommit_non_locking((t)));			\
 	switch ((t)->state) {						\
 	case TRX_STATE_PREPARED:					\
@@ -509,15 +635,36 @@ non-locking select */
 	case TRX_STATE_COMMITTED_IN_MEMORY:				\
 		continue;						\
 	case TRX_STATE_NOT_STARTED:					\
+	case TRX_STATE_FORCED_ROLLBACK:					\
 		break;							\
 	}								\
 	ut_error;							\
 } while (0)
 
+/** Check if transaction is free so that it can be re-initialized.
+@param t transaction handle */
+#define	assert_trx_is_free(t)	do {					\
+	ut_ad(trx_state_eq((t), TRX_STATE_NOT_STARTED)			\
+	      || trx_state_eq((t), TRX_STATE_FORCED_ROLLBACK));		\
+	ut_ad(!trx_is_rseg_updated(trx));				\
+	ut_ad(!MVCC::is_view_active((t)->read_view));			\
+	ut_ad((t)->lock.wait_thr == NULL);				\
+	ut_ad(UT_LIST_GET_LEN((t)->lock.trx_locks) == 0);		\
+	ut_ad((t)->dict_operation == TRX_DICT_OP_NONE);			\
+} while(0)
+
+/** Check if transaction is in-active so that it can be freed and put back to
+transaction pool.
+@param t transaction handle */
+#define assert_trx_is_inactive(t) do {					\
+	assert_trx_is_free((t));					\
+	ut_ad((t)->dict_operation_lock_mode == 0);			\
+} while(0)
+
 #ifdef UNIV_DEBUG
 /*******************************************************************//**
 Assert that an autocommit non-locking select cannot be in the
-ro_trx_list nor the rw_trx_list and that it is a read-only transaction.
+rw_trx_list and that it is a read-only transaction.
 The tranasction must be in the mysql_trx_list. */
 # define assert_trx_nonlocking_or_in_list(t)				\
 	do {								\
@@ -525,23 +672,25 @@ The tranasction must be in the mysql_trx_list. */
 			trx_state_t	t_state = (t)->state;		\
 			ut_ad((t)->read_only);				\
 			ut_ad(!(t)->is_recovered);			\
-			ut_ad(!(t)->in_ro_trx_list);			\
 			ut_ad(!(t)->in_rw_trx_list);			\
 			ut_ad((t)->in_mysql_trx_list);			\
 			ut_ad(t_state == TRX_STATE_NOT_STARTED		\
+			      || t_state == TRX_STATE_FORCED_ROLLBACK	\
 			      || t_state == TRX_STATE_ACTIVE);		\
 		} else {						\
-			assert_trx_in_list(t);				\
+			check_trx_state(t);				\
 		}							\
 	} while (0)
 #else /* UNIV_DEBUG */
 /*******************************************************************//**
 Assert that an autocommit non-locking slect cannot be in the
-ro_trx_list nor the rw_trx_list and that it is a read-only transaction.
+rw_trx_list and that it is a read-only transaction.
 The tranasction must be in the mysql_trx_list. */
 # define assert_trx_nonlocking_or_in_list(trx) ((void)0)
 #endif /* UNIV_DEBUG */
 
+typedef std::vector<ib_lock_t*, ut_allocator<ib_lock_t*> >	lock_pool_t;
+
 /*******************************************************************//**
 Latching protocol for trx_lock_t::que_state.  trx_lock_t::que_state
 captures the state of the query thread during the execution of a query.
@@ -584,12 +733,12 @@ struct trx_lock_t {
 	ib_uint64_t	deadlock_mark;	/*!< A mark field that is initialized
 					to and checked against lock_mark_counter
 					by lock_deadlock_recursive(). */
-	ibool		was_chosen_as_deadlock_victim;
+	bool		was_chosen_as_deadlock_victim;
 					/*!< when the transaction decides to
-					wait for a lock, it sets this to FALSE;
+					wait for a lock, it sets this to false;
 					if another transaction chooses this
 					transaction as a victim in deadlock
-					resolution, it sets this to TRUE.
+					resolution, it sets this to true.
 					Protected by trx->mutex. */
 	time_t		wait_started;	/*!< lock wait started at this time,
 					protected only by lock_sys->mutex */
@@ -602,20 +751,26 @@ struct trx_lock_t {
 					only be modified by the thread that is
 					serving the running transaction. */
 
+	lock_pool_t	rec_pool;	/*!< Pre-allocated record locks */
+
+	lock_pool_t	table_pool;	/*!< Pre-allocated table locks */
+
+	ulint		rec_cached;	/*!< Next free rec lock in pool */
+
+	ulint		table_cached;	/*!< Next free table lock in pool */
+
 	mem_heap_t*	lock_heap;	/*!< memory heap for trx_locks;
 					protected by lock_sys->mutex */
 
-	UT_LIST_BASE_NODE_T(lock_t)
-			trx_locks;	/*!< locks requested
-					by the transaction;
+	trx_lock_list_t trx_locks;	/*!< locks requested by the transaction;
 					insertions are protected by trx->mutex
 					and lock_sys->mutex; removals are
 					protected by lock_sys->mutex */
 
-	ib_vector_t*	table_locks;	/*!< All table locks requested by this
+	lock_pool_t	table_locks;	/*!< All table locks requested by this
 					transaction, including AUTOINC locks */
 
-	ibool		cancel;		/*!< TRUE if the transaction is being
+	bool		cancel;		/*!< true if the transaction is being
 					rolled back either via deadlock
 					detection or due to lock timeout. The
 					caller has to acquire the trx_t::mutex
@@ -626,9 +781,21 @@ struct trx_lock_t {
 					mutex to prevent recursive deadlocks.
 					Protected by both the lock sys mutex
 					and the trx_t::mutex. */
+	ulint		n_rec_locks;	/*!< number of rec locks in this trx */
+
+	/** The transaction called ha_innobase::start_stmt() to
+	lock a table. Most likely a temporary table. */
+	bool		start_stmt;
 };
 
-#define TRX_MAGIC_N	91118598
+/** Type used to store the list of tables that are modified by a given
+transaction. We store pointers to the table objects in memory because
+we know that a table object will not be destroyed while a transaction
+that modified it is running. */
+typedef std::set<
+	dict_table_t*,
+	std::less<dict_table_t*>,
+	ut_allocator<dict_table_t*> >	trx_mod_tables_t;
 
 /** The transaction handle
 
@@ -679,13 +846,87 @@ typedef enum {
 	TRX_REPLICATION_ABORT = 2
 } trx_abort_t;
 
-struct trx_t{
-	ulint		magic_n;
 
-	ib_mutex_t	mutex;		/*!< Mutex protecting the fields
-					state and lock
-					(except some fields of lock, which
-					are protected by lock_sys->mutex) */
+/** Represents an instance of rollback segment along with its state variables.*/
+struct trx_undo_ptr_t {
+	trx_rseg_t*	rseg;		/*!< rollback segment assigned to the
+					transaction, or NULL if not assigned
+					yet */
+	trx_undo_t*	insert_undo;	/*!< pointer to the insert undo log, or
+					NULL if no inserts performed yet */
+	trx_undo_t*	update_undo;	/*!< pointer to the update undo log, or
+					NULL if no update performed yet */
+};
+
+/** Rollback segments assigned to a transaction for undo logging. */
+struct trx_rsegs_t {
+	/** undo log ptr holding reference to a rollback segment that resides in
+	system/undo tablespace used for undo logging of tables that needs
+	to be recovered on crash. */
+	trx_undo_ptr_t	m_redo;
+
+	/** undo log ptr holding reference to a rollback segment that resides in
+	temp tablespace used for undo logging of tables that doesn't need
+	to be recovered on crash. */
+	trx_undo_ptr_t	m_noredo;
+};
+
+enum trx_rseg_type_t {
+	TRX_RSEG_TYPE_NONE = 0,		/*!< void rollback segment type. */
+	TRX_RSEG_TYPE_REDO,		/*!< redo rollback segment. */
+	TRX_RSEG_TYPE_NOREDO		/*!< non-redo rollback segment. */
+};
+
+struct TrxVersion {
+	TrxVersion(trx_t* trx);
+
+	/**
+	@return true if the trx_t instance is the same */
+	bool operator==(const TrxVersion& rhs) const
+	{
+		return(rhs.m_trx == m_trx);
+	}
+
+	trx_t*		m_trx;
+	ulint		m_version;
+};
+
+typedef std::list<TrxVersion, ut_allocator<TrxVersion> > hit_list_t;
+
+struct trx_t {
+	TrxMutex	mutex;		/*!< Mutex protecting the fields
+					state and lock (except some fields
+					of lock, which are protected by
+					lock_sys->mutex) */
+
+	/* Note: in_depth was split from in_innodb for fixing a RO
+	performance issue. Acquiring the trx_t::mutex for each row
+	costs ~3% in performance. It is not required for correctness.
+	Therefore we increment/decrement in_depth without holding any
+	mutex. The assumption is that the Server will only ever call
+	the handler from one thread. This is not true for kill_connection.
+	Therefore in innobase_kill_connection. We don't increment this
+	counter via TrxInInnoDB. */
+
+	ib_uint32_t	in_depth;	/*!< Track nested TrxInInnoDB
+					count */
+
+	ib_uint32_t	in_innodb;	/*!< if the thread is executing
+					in the InnoDB context count > 0. */
+
+	bool		abort;		/*!< if this flag is set then
+					this transaction must abort when
+					it can */
+
+	trx_id_t	id;		/*!< transaction id */
+
+	trx_id_t	no;		/*!< transaction serialization number:
+					max trx id shortly before the
+					transaction is moved to
+					COMMITTED_IN_MEMORY state.
+					Protected by trx_sys_t::mutex
+					when trx->in_rw_trx_list. Initially
+					set to TRX_ID_MAX. */
 
 	/** State of the trx from the point of view of concurrency control
 	and the valid state transitions.
@@ -693,6 +934,7 @@ struct trx_t{
 	Possible states:
 
 	TRX_STATE_NOT_STARTED
+	TRX_STATE_FORCED_ROLLBACK
 	TRX_STATE_ACTIVE
 	TRX_STATE_PREPARED
 	TRX_STATE_COMMITTED_IN_MEMORY (alias below COMMITTED)
@@ -711,55 +953,90 @@ struct trx_t{
 	Recovered XA:
 	* NOT_STARTED -> PREPARED -> COMMITTED -> (freed)
 
-	XA (2PC) (shutdown before ROLLBACK or COMMIT):
+	XA (2PC) (shutdown or disconnect before ROLLBACK or COMMIT):
 	* NOT_STARTED -> PREPARED -> (freed)
 
+	Disconnected XA can become recovered:
+	* ... -> ACTIVE -> PREPARED (connected) -> PREPARED (disconnected)
+	Disconnected means from mysql e.g due to the mysql client disconnection.
 	Latching and various transaction lists membership rules:
 
 	XA (2PC) transactions are always treated as non-autocommit.
 
 	Transitions to ACTIVE or NOT_STARTED occur when
-	!in_rw_trx_list and !in_ro_trx_list (no trx_sys->mutex needed).
+	!in_rw_trx_list (no trx_sys->mutex needed).
 
 	Autocommit non-locking read-only transactions move between states
-	without holding any mutex. They are !in_rw_trx_list, !in_ro_trx_list.
+	without holding any mutex. They are !in_rw_trx_list.
+
+	All transactions, unless they are determined to be ac-nl-ro,
+	explicitly tagged as read-only or read-write, will first be put
+	on the read-only transaction list. Only when a !read-only transaction
+	in the read-only list tries to acquire an X or IX lock on a table
+	do we remove it from the read-only list and put it on the read-write
+	list. During this switch we assign it a rollback segment.
 
 	When a transaction is NOT_STARTED, it can be in_mysql_trx_list if
-	it is a user transaction. It cannot be in ro_trx_list or rw_trx_list.
+	it is a user transaction. It cannot be in rw_trx_list.
 
 	ACTIVE->PREPARED->COMMITTED is only possible when trx->in_rw_trx_list.
 	The transition ACTIVE->PREPARED is protected by trx_sys->mutex.
 
 	ACTIVE->COMMITTED is possible when the transaction is in
-	ro_trx_list or rw_trx_list.
+	rw_trx_list.
 
 	Transitions to COMMITTED are protected by both lock_sys->mutex
 	and trx->mutex.
 
 	NOTE: Some of these state change constraints are an overkill,
 	currently only required for a consistent view for printing stats.
-	This unnecessarily adds a huge cost for the general case.
+	This unnecessarily adds a huge cost for the general case. */
 
-	NOTE: In the future we should add read only transactions to the
-	ro_trx_list the first time they try to acquire a lock ie. by default
-	we treat all read-only transactions as non-locking.  */
 	trx_state_t	state;
 
+	ReadView*	read_view;	/*!< consistent read view used in the
+					transaction, or NULL if not yet set */
+
+	UT_LIST_NODE_T(trx_t)
+			trx_list;	/*!< list of transactions;
+					protected by trx_sys->mutex. */
+	UT_LIST_NODE_T(trx_t)
+			no_list;	/*!< Required during view creation
+					to check for the view limit for
+					transactions that are committing */
+
 	trx_lock_t	lock;		/*!< Information about the transaction
 					locks and state. Protected by
 					trx->mutex or lock_sys->mutex
 					or both */
-	ulint		is_recovered;	/*!< 0=normal transaction,
+	bool		is_recovered;	/*!< 0=normal transaction,
 					1=recovered, must be rolled back,
 					protected by trx_sys->mutex when
 					trx->in_rw_trx_list holds */
 
+	hit_list_t	hit_list;	/*!< List of transactions to kill,
+					when a high priority transaction
+					is blocked on a lock wait. */
+
+	os_thread_id_t	killed_by;	/*!< The thread ID that wants to
+					kill this transaction asynchronously.
+					This is required because we recursively
+					enter the handlerton methods and need
+					to distinguish between the kill thread
+					and the transaction thread.
+
+					Note: We need to be careful w.r.t the
+					Thread Pool. The thread doing the kill
+					should not leave InnoDB between the
+					mark and the actual async kill because
+					the running thread can change. */
+
 	/* These fields are not protected by any mutex. */
 	const char*	op_info;	/*!< English text describing the
 					current operation, or an empty
 					string */
 	ulint		isolation_level;/*!< TRX_ISO_REPEATABLE_READ, ... */
-	ulint		check_foreigns;	/*!< normally TRUE, but if the user
+	bool		check_foreigns;	/*!< normally TRUE, but if the user
 					wants to suppress foreign key checks,
 					(in table imports, for example) we
 					set this FALSE */
@@ -768,86 +1045,58 @@ struct trx_t{
 	commit between multiple storage engines and the binary log. When
 	an engine participates in a transaction, it's responsible for
 	registering itself using the trans_register_ha() API. */
-	unsigned	is_registered:1;/* This flag is set to 1 after the
+	bool		is_registered;	/* This flag is set to true after the
 					transaction has been registered with
 					the coordinator using the XA API, and
-					is set to 0 after commit or rollback. */
-	unsigned	active_commit_ordered:1;/* 1 if owns prepare mutex, if
-					this is set to 1 then registered should
-					also be set to 1. This is used in the
-					XA code */
+					is set to false  after commit or
+					rollback. */
+	unsigned	active_commit_ordered:1;/* 1 if owns prepare mutex */
 	/*------------------------------*/
-	ulint		check_unique_secondary;
+	bool		check_unique_secondary;
 					/*!< normally TRUE, but if the user
 					wants to speed up inserts by
 					suppressing unique key checks
 					for secondary indexes when we decide
 					if we can use the insert buffer for
 					them, we set this FALSE */
-	ulint		support_xa;	/*!< normally we do the XA two-phase
-					commit steps, but by setting this to
-					FALSE, one can save CPU time and about
-					150 bytes in the undo log size as then
-					we skip XA steps */
-	ulint		flush_log_later;/* In 2PC, we hold the
+	bool		support_xa;	/*!< normally we do the XA two-phase */
+	bool		flush_log_later;/* In 2PC, we hold the
 					prepare_commit mutex across
 					both phases. In that case, we
 					defer flush of the logs to disk
 					until after we release the
 					mutex. */
-	ulint		must_flush_log_later;/*!< this flag is set to TRUE in
+	bool		must_flush_log_later;/*!< this flag is set to TRUE in
 					trx_commit() if flush_log_later was
 					TRUE, and there were modifications by
 					the transaction; in that case we must
 					flush the log in
 					trx_commit_complete_for_mysql() */
 	ulint		duplicates;	/*!< TRX_DUP_IGNORE | TRX_DUP_REPLACE */
-	ulint		has_search_latch;
+	bool		has_search_latch;
 					/*!< TRUE if this trx has latched the
 					search system latch in S-mode */
-	ulint		search_latch_timeout;
-					/*!< If we notice that someone is
-					waiting for our S-lock on the search
-					latch to be released, we wait in
-					row0sel.cc for BTR_SEA_TIMEOUT new
-					searches until we try to keep
-					the search latch again over
-					calls from MySQL; this is intended
-					to reduce contention on the search
-					latch */
-	trx_dict_op_t	dict_operation;	/**< @see enum trx_dict_op */
+	trx_dict_op_t	dict_operation;	/**< @see enum trx_dict_op_t */
 
 	/* Fields protected by the srv_conc_mutex. */
-	ulint		declared_to_be_inside_innodb;
+	bool		declared_to_be_inside_innodb;
 					/*!< this is TRUE if we have declared
 					this transaction in
 					srv_conc_enter_innodb to be inside the
 					InnoDB engine */
-	ulint		n_tickets_to_enter_innodb;
+	ib_uint32_t	n_tickets_to_enter_innodb;
 					/*!< this can be > 0 only when
 					declared_to_... is TRUE; when we come
 					to srv_conc_innodb_enter, if the value
 					here is > 0, we decrement this by 1 */
-	ulint		dict_operation_lock_mode;
+	ib_uint32_t	dict_operation_lock_mode;
 					/*!< 0, RW_S_LATCH, or RW_X_LATCH:
 					the latch mode trx currently holds
 					on dict_operation_lock. Protected
 					by dict_operation_lock. */
 
-	trx_id_t	no;		/*!< transaction serialization number:
-					max trx id shortly before the
-					transaction is moved to
-					COMMITTED_IN_MEMORY state.
-					Protected by trx_sys_t::mutex
-					when trx->in_rw_trx_list. Initially
-					set to TRX_ID_MAX. */
-
-	time_t		start_time;	/*!< time the trx state last time became
+	time_t		start_time;	/*!< time the state last time became
 					TRX_STATE_ACTIVE */
-	trx_id_t	id;		/*!< transaction id */
-	XID		xid;		/*!< X/Open XA transaction
-					identification to identify a
-					transaction branch */
 	lsn_t		commit_lsn;	/*!< lsn at the time of the commit */
 	table_id_t	table_id;	/*!< Table to drop iff dict_operation
 					== TRX_DICT_OP_TABLE, or 0. */
@@ -861,39 +1110,32 @@ struct trx_t{
 					contains a pointer to the latest file
 					name; this is NULL if binlog is not
 					used */
-	ib_int64_t	mysql_log_offset;
+	int64_t		mysql_log_offset;
 					/*!< if MySQL binlog is used, this
 					field contains the end offset of the
 					binlog entry */
 	/*------------------------------*/
-	ulint		n_mysql_tables_in_use; /*!< number of Innobase tables
+	ib_uint32_t	n_mysql_tables_in_use; /*!< number of Innobase tables
 					used in the processing of the current
 					SQL statement in MySQL */
-	ulint		mysql_n_tables_locked;
+	ib_uint32_t	mysql_n_tables_locked;
 					/*!< how many tables the current SQL
 					statement uses, except those
 					in consistent read */
 	/*------------------------------*/
-	UT_LIST_NODE_T(trx_t)
-			trx_list;	/*!< list of transactions;
-					protected by trx_sys->mutex.
-					The same node is used for both
-					trx_sys_t::ro_trx_list and
-					trx_sys_t::rw_trx_list */
 #ifdef UNIV_DEBUG
 	/** The following two fields are mutually exclusive. */
 	/* @{ */
 
-	ibool		in_ro_trx_list;	/*!< TRUE if in trx_sys->ro_trx_list */
-	ibool		in_rw_trx_list;	/*!< TRUE if in trx_sys->rw_trx_list */
+	bool		in_rw_trx_list;	/*!< true if in trx_sys->rw_trx_list */
 	/* @} */
 #endif /* UNIV_DEBUG */
 	UT_LIST_NODE_T(trx_t)
 			mysql_trx_list;	/*!< list of transactions created for
 					MySQL; protected by trx_sys->mutex */
 #ifdef UNIV_DEBUG
-	ibool		in_mysql_trx_list;
-					/*!< TRUE if in
+	bool		in_mysql_trx_list;
+					/*!< true if in
 					trx_sys->mysql_trx_list */
 #endif /* UNIV_DEBUG */
 	/*------------------------------*/
@@ -915,24 +1157,12 @@ struct trx_t{
 					survive over a transaction commit, if
 					it is a stored procedure with a COMMIT
 					WORK statement, for instance */
-	mem_heap_t*	global_read_view_heap;
-					/*!< memory heap for the global read
-					view */
-	read_view_t*	global_read_view;
-					/*!< consistent read view associated
-					to a transaction or NULL */
-	read_view_t*	read_view;	/*!< consistent read view used in the
-					transaction or NULL, this read view
-					if defined can be normal read view
-					associated to a transaction (i.e.
-					same as global_read_view) or read view
-					associated to a cursor */
 	/*------------------------------*/
 	UT_LIST_BASE_NODE_T(trx_named_savept_t)
 			trx_savepoints;	/*!< savepoints set with SAVEPOINT ...,
 					oldest first */
 	/*------------------------------*/
-	ib_mutex_t	undo_mutex;	/*!< mutex protecting the fields in this
+	UndoMutex	undo_mutex;	/*!< mutex protecting the fields in this
 					section (down to undo_no_arr), EXCEPT
 					last_sql_stat_start, which can be
 					accessed only when we know that there
@@ -945,25 +1175,23 @@ struct trx_t{
 					with no gaps; thus it represents
 					the number of modified/inserted
 					rows in a transaction */
+	ulint		undo_rseg_space;
+					/*!< space id where last undo record
+					was written */
 	trx_savept_t	last_sql_stat_start;
 					/*!< undo_no when the last sql statement
 					was started: in case of an error, trx
 					is rolled back down to this undo
 					number; see note at undo_mutex! */
-	trx_rseg_t*	rseg;		/*!< rollback segment assigned to the
-					transaction, or NULL if not assigned
-					yet */
-	trx_undo_t*	insert_undo;	/*!< pointer to the insert undo log, or
-					NULL if no inserts performed yet */
-	trx_undo_t*	update_undo;	/*!< pointer to the update undo log, or
-					NULL if no update performed yet */
+	trx_rsegs_t	rsegs;		/* rollback segments for undo logging */
 	undo_no_t	roll_limit;	/*!< least undo number to undo during
-					a rollback */
+					a partial rollback; 0 otherwise */
+#ifdef UNIV_DEBUG
+	bool		in_rollback;	/*!< true when the transaction is
+					executing a partial or full rollback */
+#endif /* UNIV_DEBUG */
 	ulint		pages_undone;	/*!< number of undo log pages undone
 					since the last undo log truncation */
-	trx_undo_arr_t*	undo_no_arr;	/*!< array of undo numbers of undo log
-					records which are currently processed
-					by a rollback operation */
 	/*------------------------------*/
 	ulint		n_autoinc_rows;	/*!< no. of AUTO-INC rows required for
 					an SQL statement. This is useful for
@@ -975,43 +1203,76 @@ struct trx_t{
 					when the trx instance is destroyed.
 					Protected by lock_sys->mutex. */
 	/*------------------------------*/
-	ibool		read_only;	/*!< TRUE if transaction is flagged
+	bool		read_only;	/*!< true if transaction is flagged
 					as a READ-ONLY transaction.
-					if !auto_commit || will_lock > 0
-					then it will added to the list
-					trx_sys_t::ro_trx_list. A read only
+					if auto_commit && will_lock == 0
+					then it will be handled as a
+					AC-NL-RO-SELECT (Auto Commit Non-Locking
+					Read Only Select). A read only
 					transaction will not be assigned an
-					UNDO log. Non-locking auto-commit
-					read-only transaction will not be on
-					either list. */
-	ibool		auto_commit;	/*!< TRUE if it is an autocommit */
-	ulint		will_lock;	/*!< Will acquire some locks. Increment
+					UNDO log. */
+	bool		auto_commit;	/*!< true if it is an autocommit */
+	ib_uint32_t	will_lock;	/*!< Will acquire some locks. Increment
 					each time we determine that a lock will
 					be acquired by the MySQL layer. */
-	bool		ddl;		/*!< true if it is a transaction that
-					is being started for a DDL operation */
 	/*------------------------------*/
 	fts_trx_t*	fts_trx;	/*!< FTS information, or NULL if
 					transaction hasn't modified tables
 					with FTS indexes (yet). */
 	doc_id_t	fts_next_doc_id;/* The document id used for updates */
 	/*------------------------------*/
-	ulint		flush_tables;	/*!< if "covering" the FLUSH TABLES",
+	ib_uint32_t	flush_tables;	/*!< if "covering" the FLUSH TABLES",
 					count of tables being flushed. */
 
 	/*------------------------------*/
+	bool		ddl;		/*!< true if it is an internal
+					transaction for DDL */
+	bool		internal;	/*!< true if it is a system/internal
+					transaction background task. This
+					includes DDL transactions too.  Such
+					transactions are always treated as
+					read-write. */
+	/*------------------------------*/
 #ifdef UNIV_DEBUG
 	ulint		start_line;	/*!< Track where it was started from */
 	const char*	start_file;	/*!< Filename where it was started */
 #endif /* UNIV_DEBUG */
-	/*------------------------------*/
+
+	lint		n_ref;		/*!< Count of references, protected
+					by trx_t::mutex. We can't release the
+					locks nor commit the transaction until
+					this reference is 0.  We can change
+					the state to COMMITTED_IN_MEMORY to
+					signify that it is no longer
+					"active". */
+
+	/** Version of this instance. It is incremented each time the
+	instance is re-used in trx_start_low(). It is used to track
+	whether a transaction has been restarted since it was tagged
+	for asynchronous rollback. */
+	ulint		version;
+
+	XID*		xid;		/*!< X/Open XA transaction
+					identification to identify a
+					transaction branch */
+	trx_mod_tables_t mod_tables;	/*!< List of tables that were modified
+					by this transaction */
+        /*------------------------------*/
 	bool		api_trx;	/*!< trx started by InnoDB API */
 	bool		api_auto_commit;/*!< automatic commit */
 	bool		read_write;	/*!< if read and write operation */
 
 	/*------------------------------*/
-	char detailed_error[256];	/*!< detailed error message for last
+	char*		detailed_error;	/*!< detailed error message for last
 					error, or empty. */
+	FlushObserver*	flush_observer;	/*!< flush observer */
+
+#ifdef UNIV_DEBUG
+	bool		is_dd_trx;	/*!< True if the transaction is used for
+					doing Non-locking Read-only Read
+					Committed on DD tables */
+#endif /* UNIV_DEBUG */
+
 	/* Lock wait statistics */
 	ulint		n_rec_lock_waits;
 					/*!< Number of record lock waits,
@@ -1029,8 +1290,23 @@ struct trx_t{
 #ifdef WITH_WSREP
 	os_event_t	wsrep_event;	/* event waited for in srv_conc_slot */
 #endif /* WITH_WSREP */
+
+	ulint		magic_n;
 };
 
+/**
+Check if transaction is started.
+@param[in] trx		Transaction whose state we need to check
+@reutrn true if transaction is in state started */
+inline
+bool
+trx_is_started(
+	const trx_t*	trx)
+{
+	return(trx->state != TRX_STATE_NOT_STARTED
+	       && trx->state != TRX_STATE_FORCED_ROLLBACK);
+}
+
 /* Transaction isolation levels (trx->isolation_level) */
 #define TRX_ISO_READ_UNCOMMITTED	0	/* dirty read: non-locking
 						SELECTs are performed so that
@@ -1072,20 +1348,6 @@ Multiple flags can be combined with bitwise OR. */
 #define TRX_DUP_REPLACE	2	/* duplicate rows are to be replaced */
 
 
-/* Types of a trx signal */
-#define TRX_SIG_NO_SIGNAL		0
-#define TRX_SIG_TOTAL_ROLLBACK		1
-#define TRX_SIG_ROLLBACK_TO_SAVEPT	2
-#define TRX_SIG_COMMIT			3
-#define TRX_SIG_BREAK_EXECUTION		5
-
-/* Sender types of a signal */
-#define TRX_SIG_SELF		0	/* sent by the session itself, or
-					by an error occurring within this
-					session */
-#define TRX_SIG_OTHER_SESS	1	/* sent by another session (which
-					must hold rights to this) */
-
 /** Commit node states */
 enum commit_node_state {
 	COMMIT_NODE_SEND = 1,	/*!< about to send a commit signal to
@@ -1115,23 +1377,229 @@ struct commit_node_t{
 	mutex_exit(&t->mutex);			\
 } while (0)
 
-/** @brief The latch protecting the adaptive search system
+/** Track if a transaction is executing inside InnoDB code. It acts
+like a gate between the Server and InnoDB.  */
+class TrxInInnoDB {
+public:
+	/**
+	@param[in,out] trx	Transaction entering InnoDB via the handler
+	@param[in] disable	true if called from COMMIT/ROLLBACK method */
+	TrxInInnoDB(trx_t* trx, bool disable = false)
+		:
+		m_trx(trx)
+	{
+		enter(trx, disable);
+	}
 
-This latch protects the
-(1) hash index;
-(2) columns of a record to which we have a pointer in the hash index;
+	/**
+	Destructor */
+	~TrxInInnoDB()
+	{
+		exit(m_trx);
+	}
 
-but does NOT protect:
+	/**
+	@return true if the transaction has been marked for asynchronous
+		rollback */
+	bool is_aborted() const
+	{
+		return(is_aborted(m_trx));
+	}
 
-(3) next record offset field in a record;
-(4) next or previous records on the same page.
+	/**
+	@return true if the transaction can't be rolled back asynchronously */
+	bool is_rollback_disabled() const
+	{
+		return((m_trx->in_innodb & TRX_FORCE_ROLLBACK_DISABLE) > 0);
+	}
 
-Bear in mind (3) and (4) when using the hash index.
-*/
-extern rw_lock_t*	btr_search_latch_temp;
+	/**
+	@return true if the transaction has been marked for asynchronous
+		rollback */
+	static bool is_aborted(const trx_t* trx)
+	{
+		if (trx->state == TRX_STATE_NOT_STARTED) {
+			return(false);
+		}
 
-/** The latch protecting the adaptive search system */
-#define btr_search_latch	(*btr_search_latch_temp)
+		ut_ad(srv_read_only_mode || trx->in_depth > 0);
+		ut_ad(srv_read_only_mode || trx->in_innodb > 0);
+
+		return(trx->abort
+		       || trx->state == TRX_STATE_FORCED_ROLLBACK);
+	}
+
+	/**
+	Start statement requested for transaction.
+	@param[in, out] trx	Transaction at the start of a SQL statement */
+	static void begin_stmt(trx_t* trx)
+	{
+		enter(trx, false);
+	}
+
+	/**
+	Note an end statement for transaction
+	@param[in, out] trx	Transaction at end of a SQL statement */
+	static void end_stmt(trx_t* trx)
+	{
+		exit(trx);
+	}
+
+	/**
+	@return true if the rollback is being initiated by the thread that
+		marked the transaction for asynchronous rollback */
+	static bool is_async_rollback(const trx_t* trx)
+	{
+		return(trx->killed_by == os_thread_get_curr_id());
+	}
+
+private:
+	/**
+	Note that we have crossed into InnoDB code.
+	@param[in] disable	true if called from COMMIT/ROLLBACK method */
+	static void enter(trx_t* trx, bool disable)
+	{
+		if (srv_read_only_mode) {
+
+			return;
+		}
+
+		/* Avoid excessive mutex acquire/release */
+
+		ut_ad(!is_async_rollback(trx));
+
+		++trx->in_depth;
+
+		/* If trx->in_depth is greater than 1 then
+		transaction is already in InnoDB. */
+		if (trx->in_depth > 1) {
+
+			return;
+		}
+
+		/* Only the owning thread should release the latch. */
+
+		trx_search_latch_release_if_reserved(trx);
+
+		trx_mutex_enter(trx);
+
+		wait(trx);
+
+		ut_ad((trx->in_innodb & TRX_FORCE_ROLLBACK_MASK)
+		      < (TRX_FORCE_ROLLBACK_MASK - 1));
+
+		/* If it hasn't already been marked for async rollback.
+		and it will be committed/rolled back. */
+
+		if (!is_forced_rollback(trx)
+		    && disable
+		    && is_started(trx)
+		    && !trx_is_autocommit_non_locking(trx)) {
+
+			ut_ad(trx->killed_by == 0);
+
+			/* This transaction has crossed the point of no
+			return and cannot be rolled back asynchronously
+			now. It must commit or rollback synhronously. */
+
+			trx->in_innodb |= TRX_FORCE_ROLLBACK_DISABLE;
+		}
+
+		++trx->in_innodb;
+
+		trx_mutex_exit(trx);
+	}
+
+	/**
+	Note that we are exiting InnoDB code */
+	static void exit(trx_t* trx)
+	{
+		if (srv_read_only_mode) {
+
+			return;
+		}
+
+		/* Avoid excessive mutex acquire/release */
+
+		ut_ad(trx->in_depth > 0);
+
+		--trx->in_depth;
+
+		if (trx->in_depth > 0) {
+
+			return;
+		}
+
+		/* Only the owning thread should release the latch. */
+
+		trx_search_latch_release_if_reserved(trx);
+
+		trx_mutex_enter(trx);
+
+		ut_ad((trx->in_innodb & TRX_FORCE_ROLLBACK_MASK) > 0);
+
+		--trx->in_innodb;
+
+		trx_mutex_exit(trx);
+	}
+
+	/*
+	@return true if it is a forced rollback, asynchronously */
+	static bool is_forced_rollback(const trx_t* trx)
+	{
+		ut_ad(trx_mutex_own(trx));
+
+		return((trx->in_innodb & TRX_FORCE_ROLLBACK)) > 0;
+	}
+
+	/**
+	Wait for the asynchronous rollback to complete, if it is in progress */
+	static void wait(trx_t* trx)
+	{
+		ut_ad(trx_mutex_own(trx));
+
+		ulint	loop_count = 0;
+		/* start with optimistic sleep time - 20 micro seconds. */
+		ulint	sleep_time = 20;
+
+		while (is_forced_rollback(trx)) {
+
+			/* Wait for the async rollback to complete */
+
+			trx_mutex_exit(trx);
+
+			loop_count++;
+			/* If the wait is long, don't hog the cpu. */
+			if (loop_count < 100) {
+				/* 20 microseconds */
+				sleep_time = 20;
+			} else if (loop_count < 1000) {
+				/* 1 millisecond */
+				sleep_time = 1000;
+			} else {
+				/* 100 milliseconds */
+				sleep_time = 100000;
+			}
+
+			os_thread_sleep(sleep_time);
+
+			trx_mutex_enter(trx);
+		}
+	}
+
+	/**
+	@return true if transaction is started */
+	static bool is_started(const trx_t* trx)
+	{
+		ut_ad(trx_mutex_own(trx));
+
+		return(trx_is_started(trx));
+	}
+private:
+	/**
+	Transaction instance crossing the handler boundary from the Server. */
+	trx_t*			m_trx;
+};
 
 #ifndef UNIV_NONINL
 #include "trx0trx.ic"
diff --git a/storage/innobase/include/trx0trx.ic b/storage/innobase/include/trx0trx.ic
index 69ee17ea98b..7120a7aaced 100644
--- a/storage/innobase/include/trx0trx.ic
+++ b/storage/innobase/include/trx0trx.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -23,44 +23,48 @@ The transaction
 Created 3/26/1996 Heikki Tuuri
 *******************************************************/
 
+#include "read0read.h"
+
 /**********************************************************************//**
 Determines if a transaction is in the given state.
 The caller must hold trx_sys->mutex, or it must be the thread
 that is serving a running transaction.
-A running transaction must be in trx_sys->ro_trx_list or trx_sys->rw_trx_list
-unless it is a non-locking autocommit read only transaction, which is only
-in trx_sys->mysql_trx_list.
-@return	TRUE if trx->state == state */
+A running RW transaction must be in trx_sys->rw_trx_list.
+@return TRUE if trx->state == state */
 UNIV_INLINE
-ibool
+bool
 trx_state_eq(
 /*=========*/
 	const trx_t*	trx,	/*!< in: transaction */
-	trx_state_t	state)	/*!< in: state;
-				if state != TRX_STATE_NOT_STARTED
-				asserts that
-				trx->state != TRX_STATE_NOT_STARTED */
+	trx_state_t	state)	/*!< in: state */
 {
 #ifdef UNIV_DEBUG
 	switch (trx->state) {
 	case TRX_STATE_PREPARED:
+
 		ut_ad(!trx_is_autocommit_non_locking(trx));
 		return(trx->state == state);
 
 	case TRX_STATE_ACTIVE:
+
 		assert_trx_nonlocking_or_in_list(trx);
 		return(state == trx->state);
 
 	case TRX_STATE_COMMITTED_IN_MEMORY:
-		assert_trx_in_list(trx);
+
+		check_trx_state(trx);
 		return(state == trx->state);
 
 	case TRX_STATE_NOT_STARTED:
-		/* This state is not allowed for running transactions. */
-		ut_a(state == TRX_STATE_NOT_STARTED);
+	case TRX_STATE_FORCED_ROLLBACK:
+
+		/* These states are not allowed for running transactions. */
+		ut_a(state == TRX_STATE_NOT_STARTED
+		     || state == TRX_STATE_FORCED_ROLLBACK);
+
 		ut_ad(!trx->in_rw_trx_list);
-		ut_ad(!trx->in_ro_trx_list);
-		return(state == trx->state);
+
+		return(true);
 	}
 	ut_error;
 #endif /* UNIV_DEBUG */
@@ -69,7 +73,7 @@ trx_state_eq(
 
 /****************************************************************//**
 Retrieves the error_info field from a trx.
-@return	the error info */
+@return the error info */
 UNIV_INLINE
 const dict_index_t*
 trx_get_error_info(
@@ -82,7 +86,7 @@ trx_get_error_info(
 /*******************************************************************//**
 Retrieves transaction's que state in a human readable string. The string
 should not be free()'d or modified.
-@return	string in the data segment */
+@return string in the data segment */
 UNIV_INLINE
 const char*
 trx_get_que_state_str(
@@ -104,9 +108,45 @@ trx_get_que_state_str(
 	}
 }
 
+/** Retreieves the transaction ID.
+In a given point in time it is guaranteed that IDs of the running
+transactions are unique. The values returned by this function for readonly
+transactions may be reused, so a subsequent RO transaction may get the same ID
+as a RO transaction that existed in the past. The values returned by this
+function should be used for printing purposes only.
+@param[in]	trx	transaction whose id to retrieve
+@return transaction id */
+UNIV_INLINE
+trx_id_t
+trx_get_id_for_print(
+	const trx_t*	trx)
+{
+	/* Readonly and transactions whose intentions are unknown (whether
+	they will eventually do a WRITE) don't have trx_t::id assigned (it is
+	0 for those transactions). Transaction IDs in
+	innodb_trx.trx_id,
+	innodb_locks.lock_id,
+	innodb_locks.lock_trx_id,
+	innodb_lock_waits.requesting_trx_id,
+	innodb_lock_waits.blocking_trx_id should match because those tables
+	could be used in an SQL JOIN on those columns. Also trx_t::id is
+	printed by SHOW ENGINE INNODB STATUS, and in logs, so we must have the
+	same value printed everywhere consistently. */
+
+	/* DATA_TRX_ID_LEN is the storage size in bytes. */
+	static const trx_id_t	max_trx_id
+		= (1ULL << (DATA_TRX_ID_LEN * CHAR_BIT)) - 1;
+
+	ut_ad(trx->id <= max_trx_id);
+
+	return(trx->id != 0
+	       ? trx->id
+	       : reinterpret_cast<trx_id_t>(trx) | (max_trx_id + 1));
+}
+
 /**********************************************************************//**
 Determine if a transaction is a dictionary operation.
-@return	dictionary operation mode */
+@return dictionary operation mode */
 UNIV_INLINE
 enum trx_dict_op_t
 trx_get_dict_operation(
@@ -163,18 +203,192 @@ ok:
 	trx->dict_operation = op;
 }
 
-/********************************************************************//**
-Releases the search latch if trx has reserved it. */
+/**
+Releases the search latch if trx has reserved it.
+@param[in,out] trx		Transaction that may own the AHI latch */
 UNIV_INLINE
 void
-trx_search_latch_release_if_reserved(
-/*=================================*/
-	trx_t*	   trx) /*!< in: transaction */
+trx_search_latch_release_if_reserved(trx_t* trx)
 {
-	if (trx->has_search_latch) {
-		rw_lock_s_unlock(&btr_search_latch);
-
-		trx->has_search_latch = FALSE;
-	}
+	ut_a(!trx->has_search_latch);
 }
 
+/********************************************************************//**
+Check if redo rseg is modified for insert/update. */
+UNIV_INLINE
+bool
+trx_is_redo_rseg_updated(
+/*=====================*/
+	const trx_t*	   trx) /*!< in: transaction */
+{
+	return(trx->rsegs.m_redo.insert_undo != 0
+	       || trx->rsegs.m_redo.update_undo != 0);
+}
+
+/********************************************************************//**
+Check if noredo rseg is modified for insert/update. */
+UNIV_INLINE
+bool
+trx_is_noredo_rseg_updated(
+/*=======================*/
+	const trx_t*	   trx) /*!< in: transaction */
+{
+	return(trx->rsegs.m_noredo.insert_undo != 0
+	       || trx->rsegs.m_noredo.update_undo != 0);
+}
+
+/********************************************************************//**
+Check if redo/noredo rseg is modified for insert/update. */
+UNIV_INLINE
+bool
+trx_is_rseg_updated(
+/*================*/
+	const trx_t*	   trx) /*!< in: transaction */
+{
+	return(trx_is_redo_rseg_updated(trx)
+	       || trx_is_noredo_rseg_updated(trx));
+}
+
+/********************************************************************//**
+Check if redo/nonredo rseg is valid. */
+UNIV_INLINE
+bool
+trx_is_rseg_assigned(
+/*=================*/
+	const trx_t*	   trx) /*!< in: transaction */
+{
+	return(trx->rsegs.m_redo.rseg != NULL
+	       || trx->rsegs.m_noredo.rseg != NULL);
+}
+
+/**
+Increase the reference count. If the transaction is in state
+TRX_STATE_COMMITTED_IN_MEMORY then the transaction is considered
+committed and the reference count is not incremented.
+@param trx Transaction that is being referenced
+@param do_ref_count Increment the reference iff this is true
+@return transaction instance if it is not committed */
+UNIV_INLINE
+trx_t*
+trx_reference(
+	trx_t*		trx,
+	bool		do_ref_count)
+{
+	trx_mutex_enter(trx);
+
+	if (trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY)) {
+		trx_mutex_exit(trx);
+		trx = NULL;
+	} else if (do_ref_count) {
+		ut_ad(trx->n_ref >= 0);
+		++trx->n_ref;
+		trx_mutex_exit(trx);
+	} else {
+		trx_mutex_exit(trx);
+	}
+
+	return(trx);
+}
+
+/**
+Release the transaction. Decrease the reference count.
+@param trx Transaction that is being released */
+UNIV_INLINE
+void
+trx_release_reference(
+	trx_t*		trx)
+{
+	trx_mutex_enter(trx);
+
+	ut_ad(trx->n_ref > 0);
+	--trx->n_ref;
+
+	trx_mutex_exit(trx);
+}
+
+
+/**
+@param trx		Get the active view for this transaction, if one exists
+@return the transaction's read view or NULL if one not assigned. */
+UNIV_INLINE
+ReadView*
+trx_get_read_view(
+	trx_t*		trx)
+{
+	return(!MVCC::is_view_active(trx->read_view) ? NULL : trx->read_view);
+}
+
+/**
+@param trx		Get the active view for this transaction, if one exists
+@return the transaction's read view or NULL if one not assigned. */
+UNIV_INLINE
+const ReadView*
+trx_get_read_view(
+	const trx_t*	trx)
+{
+	return(!MVCC::is_view_active(trx->read_view) ? NULL : trx->read_view);
+}
+
+/**
+@param[in] trx		Transaction to check
+@return true if the transaction is a high priority transaction.*/
+UNIV_INLINE
+bool
+trx_is_high_priority(const trx_t* trx)
+{
+	if (trx->mysql_thd == NULL) {
+		return(false);
+	}
+
+	return(thd_trx_priority(trx->mysql_thd) > 0);
+}
+
+/**
+@param[in] requestor	Transaction requesting the lock
+@param[in] holder	Transaction holding the lock
+@return the transaction that will be rolled back, null don't care */
+UNIV_INLINE
+const trx_t*
+trx_arbitrate(const trx_t* requestor, const trx_t* holder)
+{
+	ut_ad(!trx_is_autocommit_non_locking(holder));
+	ut_ad(!trx_is_autocommit_non_locking(requestor));
+
+	/* Note: Background stats collection transactions also acquire
+	locks on user tables. They don't have an associated MySQL session
+	instance. */
+
+	if (requestor->mysql_thd == NULL) {
+
+		ut_ad(!trx_is_high_priority(requestor));
+
+		if (trx_is_high_priority(holder)) {
+			return(requestor);
+		} else {
+			return(NULL);
+		}
+
+	} else if (holder->mysql_thd == NULL) {
+
+		ut_ad(!trx_is_high_priority(holder));
+
+		if (trx_is_high_priority(requestor)) {
+			return(holder);
+		}
+
+		return(NULL);
+	}
+
+	const THD*	victim = thd_trx_arbitrate(
+		requestor->mysql_thd, holder->mysql_thd);
+
+	ut_ad(victim == NULL
+	      || victim == requestor->mysql_thd
+	      || victim == holder->mysql_thd);
+
+	if (victim != NULL) {
+		return(victim == requestor->mysql_thd ? requestor : holder);
+	}
+
+	return(NULL);
+}
diff --git a/storage/innobase/include/trx0types.h b/storage/innobase/include/trx0types.h
index 7ca95131328..37a53f900eb 100644
--- a/storage/innobase/include/trx0types.h
+++ b/storage/innobase/include/trx0types.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -27,13 +27,45 @@ Created 3/26/1996 Heikki Tuuri
 #define trx0types_h
 
 #include "ut0byte.h"
+#include "ut0mutex.h"
+#include "ut0new.h"
+
+#include <set>
+#include <queue>
+#include <vector>
+
+//#include <unordered_set>
 
 /** printf(3) format used for printing DB_TRX_ID and other system fields */
-#define TRX_ID_FMT		IB_ID_FMT
+#define TRX_ID_FMT	IB_ID_FMT
 
 /** maximum length that a formatted trx_t::id could take, not including
 the terminating NUL character. */
-#define TRX_ID_MAX_LEN		17
+static const ulint TRX_ID_MAX_LEN = 17;
+
+/** Space id of the transaction system page (the system tablespace) */
+static const ulint TRX_SYS_SPACE = 0;
+
+/** Page number of the transaction system page */
+#define TRX_SYS_PAGE_NO		FSP_TRX_SYS_PAGE_NO
+
+/** Random value to check for corruption of trx_t */
+static const ulint TRX_MAGIC_N = 91118598;
+
+/** If this flag is set then the transaction cannot be rolled back
+asynchronously. */
+static const ib_uint32_t TRX_FORCE_ROLLBACK_DISABLE = 1 << 29;
+
+/** Was the transaction rolled back asynchronously or by the
+owning thread. This flag is relevant only if TRX_FORCE_ROLLBACK
+is set.  */
+static const ib_uint32_t TRX_FORCE_ROLLBACK_ASYNC = 1 << 30;
+
+/** Mark the transaction for forced rollback */
+static const ib_uint32_t TRX_FORCE_ROLLBACK = 1 << 31;
+
+/** For masking out the above four flags */
+static const ib_uint32_t TRX_FORCE_ROLLBACK_MASK = 0x1FFFFFFF;
 
 /** Transaction execution states when trx->state == TRX_STATE_ACTIVE */
 enum trx_que_t {
@@ -46,9 +78,18 @@ enum trx_que_t {
 
 /** Transaction states (trx_t::state) */
 enum trx_state_t {
+
 	TRX_STATE_NOT_STARTED,
+
+	/** Same as not started but with additional semantics that it
+	was rolled back asynchronously the last time it was active. */
+	TRX_STATE_FORCED_ROLLBACK,
+
 	TRX_STATE_ACTIVE,
-	TRX_STATE_PREPARED,			/* Support for 2PC/XA */
+
+	/** Support for 2PC/XA */
+	TRX_STATE_PREPARED,
+
 	TRX_STATE_COMMITTED_IN_MEMORY
 };
 
@@ -81,10 +122,6 @@ struct trx_sig_t;
 struct trx_rseg_t;
 /** Transaction undo log */
 struct trx_undo_t;
-/** Array of undo numbers of undo records being rolled back or purged */
-struct trx_undo_arr_t;
-/** A cell of trx_undo_arr_t */
-struct trx_undo_inf_t;
 /** The control structure used in the purge operation */
 struct trx_purge_t;
 /** Rollback command node in a query graph */
@@ -95,21 +132,6 @@ struct commit_node_t;
 struct trx_named_savept_t;
 /* @} */
 
-/** Rollback contexts */
-enum trx_rb_ctx {
-	RB_NONE = 0,	/*!< no rollback */
-	RB_NORMAL,	/*!< normal rollback */
-	RB_RECOVERY_PURGE_REC,
-			/*!< rolling back an incomplete transaction,
-			in crash recovery, rolling back an
-			INSERT that was performed by updating a
-			delete-marked record; if the delete-marked record
-			no longer exists in an active read view, it will
-			be purged */
-	RB_RECOVERY	/*!< rolling back an incomplete transaction,
-			in crash recovery */
-};
-
 /** Row identifier (DB_ROW_ID, DATA_ROW_ID) */
 typedef ib_id_t	row_id_t;
 /** Transaction identifier (DB_TRX_ID, DATA_TRX_ID) */
@@ -142,6 +164,159 @@ typedef byte	trx_upagef_t;
 
 /** Undo log record */
 typedef	byte	trx_undo_rec_t;
+
 /* @} */
 
-#endif
+typedef ib_mutex_t RsegMutex;
+typedef ib_mutex_t TrxMutex;
+typedef ib_mutex_t UndoMutex;
+typedef ib_mutex_t PQMutex;
+typedef ib_mutex_t TrxSysMutex;
+
+/** Rollback segements from a given transaction with trx-no
+scheduled for purge. */
+class TrxUndoRsegs {
+private:
+	typedef std::vector<trx_rseg_t*, ut_allocator<trx_rseg_t*> >
+		trx_rsegs_t;
+public:
+	typedef trx_rsegs_t::iterator iterator;
+
+	/** Default constructor */
+	TrxUndoRsegs() : m_trx_no() { }
+
+	explicit TrxUndoRsegs(trx_id_t trx_no)
+		:
+		m_trx_no(trx_no)
+	{
+		// Do nothing
+	}
+
+	/** Get transaction number
+	@return trx_id_t - get transaction number. */
+	trx_id_t get_trx_no() const
+	{
+		return(m_trx_no);
+	}
+
+	/** Add rollback segment.
+	@param rseg rollback segment to add. */
+	void push_back(trx_rseg_t* rseg)
+	{
+		m_rsegs.push_back(rseg);
+	}
+
+	/** Erase the element pointed by given iterator.
+	@param[in]	iterator	iterator */
+	void erase(iterator& it)
+	{
+		m_rsegs.erase(it);
+	}
+
+	/** Number of registered rsegs.
+	@return size of rseg list. */
+	ulint size() const
+	{
+		return(m_rsegs.size());
+	}
+
+	/**
+	@return an iterator to the first element */
+	iterator begin()
+	{
+		return(m_rsegs.begin());
+	}
+
+	/**
+	@return an iterator to the end */
+	iterator end()
+	{
+		return(m_rsegs.end());
+	}
+
+	/** Append rollback segments from referred instance to current
+	instance. */
+	void append(const TrxUndoRsegs& append_from)
+	{
+		ut_ad(get_trx_no() == append_from.get_trx_no());
+
+		m_rsegs.insert(m_rsegs.end(),
+			       append_from.m_rsegs.begin(),
+			       append_from.m_rsegs.end());
+	}
+
+	/** Compare two TrxUndoRsegs based on trx_no.
+	@param elem1 first element to compare
+	@param elem2 second element to compare
+	@return true if elem1 > elem2 else false.*/
+	bool operator()(const TrxUndoRsegs& lhs, const TrxUndoRsegs& rhs)
+	{
+		return(lhs.m_trx_no > rhs.m_trx_no);
+	}
+
+	/** Compiler defined copy-constructor/assignment operator
+	should be fine given that there is no reference to a memory
+	object outside scope of class object.*/
+
+private:
+	/** The rollback segments transaction number. */
+	trx_id_t		m_trx_no;
+
+	/** Rollback segments of a transaction, scheduled for purge. */
+	trx_rsegs_t		m_rsegs;
+};
+
+typedef std::priority_queue<
+	TrxUndoRsegs,
+	std::vector<TrxUndoRsegs, ut_allocator<TrxUndoRsegs> >,
+	TrxUndoRsegs>	purge_pq_t;
+
+typedef std::vector<trx_id_t, ut_allocator<trx_id_t> >	trx_ids_t;
+
+/** Mapping read-write transactions from id to transaction instance, for
+creating read views and during trx id lookup for MVCC and locking. */
+struct TrxTrack {
+	explicit TrxTrack(trx_id_t id, trx_t* trx = NULL)
+		:
+		m_id(id),
+		m_trx(trx)
+	{
+		// Do nothing
+	}
+
+	trx_id_t	m_id;
+	trx_t*		m_trx;
+};
+
+struct TrxTrackHash {
+	size_t operator()(const TrxTrack& key) const
+	{
+		return(size_t(key.m_id));
+	}
+};
+
+/**
+Comparator for TrxMap */
+struct TrxTrackHashCmp {
+
+	bool operator() (const TrxTrack& lhs, const TrxTrack& rhs) const
+	{
+		return(lhs.m_id == rhs.m_id);
+	}
+};
+
+/**
+Comparator for TrxMap */
+struct TrxTrackCmp {
+
+	bool operator() (const TrxTrack& lhs, const TrxTrack& rhs) const
+	{
+		return(lhs.m_id < rhs.m_id);
+	}
+};
+
+//typedef std::unordered_set<TrxTrack, TrxTrackHash, TrxTrackHashCmp> TrxIdSet;
+typedef std::set<TrxTrack, TrxTrackCmp, ut_allocator<TrxTrack> >
+	TrxIdSet;
+
+#endif /* trx0types_h */
diff --git a/storage/innobase/include/trx0undo.h b/storage/innobase/include/trx0undo.h
index 42ac62916e0..60fbb9d2304 100644
--- a/storage/innobase/include/trx0undo.h
+++ b/storage/innobase/include/trx0undo.h
@@ -1,7 +1,6 @@
 /*****************************************************************************
 
 Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2013, 2016, MariaDB Corporation
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -28,7 +27,6 @@ Created 3/26/1996 Heikki Tuuri
 #define trx0undo_h
 
 #ifndef UNIV_INNOCHECKSUM
-
 #include "univ.i"
 #include "trx0types.h"
 #include "mtr0mtr.h"
@@ -39,7 +37,7 @@ Created 3/26/1996 Heikki Tuuri
 #ifndef UNIV_HOTBACKUP
 /***********************************************************************//**
 Builds a roll pointer.
-@return	roll pointer */
+@return roll pointer */
 UNIV_INLINE
 roll_ptr_t
 trx_undo_build_roll_ptr(
@@ -62,7 +60,7 @@ trx_undo_decode_roll_ptr(
 					entry within page */
 /***********************************************************************//**
 Returns TRUE if the roll pointer is of the insert type.
-@return	TRUE if insert undo log */
+@return TRUE if insert undo log */
 UNIV_INLINE
 ibool
 trx_undo_roll_ptr_is_insert(
@@ -70,13 +68,13 @@ trx_undo_roll_ptr_is_insert(
 	roll_ptr_t	roll_ptr);	/*!< in: roll pointer */
 /***********************************************************************//**
 Returns true if the record is of the insert type.
-@return	true if the record was freshly inserted (not updated). */
+@return true if the record was freshly inserted (not updated). */
 UNIV_INLINE
 bool
 trx_undo_trx_id_is_insert(
 /*======================*/
 	const byte*	trx_id)	/*!< in: DB_TRX_ID, followed by DB_ROLL_PTR */
-	MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 #endif /* !UNIV_HOTBACKUP */
 /*****************************************************************//**
 Writes a roll ptr to an index page. In case that the size changes in
@@ -93,41 +91,42 @@ trx_write_roll_ptr(
 Reads a roll ptr from an index page. In case that the roll ptr size
 changes in some future version, this function should be used instead of
 mach_read_...
-@return	roll ptr */
+@return roll ptr */
 UNIV_INLINE
 roll_ptr_t
 trx_read_roll_ptr(
 /*==============*/
 	const byte*	ptr);	/*!< in: pointer to memory from where to read */
 #ifndef UNIV_HOTBACKUP
-/******************************************************************//**
-Gets an undo log page and x-latches it.
-@return	pointer to page x-latched */
+
+/** Gets an undo log page and x-latches it.
+@param[in]	page_id		page id
+@param[in]	page_size	page size
+@param[in,out]	mtr		mini-transaction
+@return pointer to page x-latched */
 UNIV_INLINE
 page_t*
 trx_undo_page_get(
-/*==============*/
-	ulint	space,		/*!< in: space where placed */
-	ulint	zip_size,	/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	ulint	page_no,	/*!< in: page number */
-	mtr_t*	mtr);		/*!< in: mtr */
-/******************************************************************//**
-Gets an undo log page and s-latches it.
-@return	pointer to page s-latched */
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	mtr_t*			mtr);
+
+/** Gets an undo log page and s-latches it.
+@param[in]	page_id		page id
+@param[in]	page_size	page size
+@param[in,out]	mtr		mini-transaction
+@return pointer to page s-latched */
 UNIV_INLINE
 page_t*
 trx_undo_page_get_s_latched(
-/*========================*/
-	ulint	space,		/*!< in: space where placed */
-	ulint	zip_size,	/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	ulint	page_no,	/*!< in: page number */
-	mtr_t*	mtr);		/*!< in: mtr */
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	mtr_t*			mtr);
+
 /******************************************************************//**
 Returns the previous undo record on the page in the specified log, or
 NULL if none exists.
-@return	pointer to record, NULL if none */
+@return pointer to record, NULL if none */
 UNIV_INLINE
 trx_undo_rec_t*
 trx_undo_page_get_prev_rec(
@@ -138,7 +137,7 @@ trx_undo_page_get_prev_rec(
 /******************************************************************//**
 Returns the next undo log record on the page in the specified log, or
 NULL if none exists.
-@return	pointer to record, NULL if none */
+@return pointer to record, NULL if none */
 UNIV_INLINE
 trx_undo_rec_t*
 trx_undo_page_get_next_rec(
@@ -149,7 +148,7 @@ trx_undo_page_get_next_rec(
 /******************************************************************//**
 Returns the last undo record on the page in the specified undo log, or
 NULL if none exists.
-@return	pointer to record, NULL if none */
+@return pointer to record, NULL if none */
 UNIV_INLINE
 trx_undo_rec_t*
 trx_undo_page_get_last_rec(
@@ -160,7 +159,7 @@ trx_undo_page_get_last_rec(
 /******************************************************************//**
 Returns the first undo record on the page in the specified undo log, or
 NULL if none exists.
-@return	pointer to record, NULL if none */
+@return pointer to record, NULL if none */
 UNIV_INLINE
 trx_undo_rec_t*
 trx_undo_page_get_first_rec(
@@ -170,8 +169,7 @@ trx_undo_page_get_first_rec(
 	ulint	offset);/*!< in: undo log header offset on page */
 /***********************************************************************//**
 Gets the previous record in an undo log.
-@return	undo log record, the page s-latched, NULL if none */
-UNIV_INTERN
+@return undo log record, the page s-latched, NULL if none */
 trx_undo_rec_t*
 trx_undo_get_prev_rec(
 /*==================*/
@@ -182,8 +180,7 @@ trx_undo_get_prev_rec(
 	mtr_t*		mtr);	/*!< in: mtr */
 /***********************************************************************//**
 Gets the next record in an undo log.
-@return	undo log record, the page s-latched, NULL if none */
-UNIV_INTERN
+@return undo log record, the page s-latched, NULL if none */
 trx_undo_rec_t*
 trx_undo_get_next_rec(
 /*==================*/
@@ -191,37 +188,42 @@ trx_undo_get_next_rec(
 	ulint		page_no,/*!< in: undo log header page number */
 	ulint		offset,	/*!< in: undo log header offset on page */
 	mtr_t*		mtr);	/*!< in: mtr */
-/***********************************************************************//**
-Gets the first record in an undo log.
-@return	undo log record, the page latched, NULL if none */
-UNIV_INTERN
+
+/** Gets the first record in an undo log.
+@param[in]	space		undo log header space
+@param[in]	page_size	page size
+@param[in]	page_no		undo log header page number
+@param[in]	offset		undo log header offset on page
+@param[in]	mode		latching mode: RW_S_LATCH or RW_X_LATCH
+@param[in,out]	mtr		mini-transaction
+@return undo log record, the page latched, NULL if none */
 trx_undo_rec_t*
 trx_undo_get_first_rec(
-/*===================*/
-	ulint	space,	/*!< in: undo log header space */
-	ulint	zip_size,/*!< in: compressed page size in bytes
-			or 0 for uncompressed pages */
-	ulint	page_no,/*!< in: undo log header page number */
-	ulint	offset,	/*!< in: undo log header offset on page */
-	ulint	mode,	/*!< in: latching mode: RW_S_LATCH or RW_X_LATCH */
-	mtr_t*	mtr);	/*!< in: mtr */
+	ulint			space,
+	const page_size_t&	page_size,
+	ulint			page_no,
+	ulint			offset,
+	ulint			mode,
+	mtr_t*			mtr);
+
 /********************************************************************//**
 Tries to add a page to the undo log segment where the undo log is placed.
-@return	X-latched block if success, else NULL */
-UNIV_INTERN
+@return X-latched block if success, else NULL */
 buf_block_t*
 trx_undo_add_page(
 /*==============*/
-	trx_t*		trx,	/*!< in: transaction */
-	trx_undo_t*	undo,	/*!< in: undo log memory object */
-	mtr_t*		mtr)	/*!< in: mtr which does not have a latch to any
-				undo log page; the caller must have reserved
-				the rollback segment mutex */
+	trx_t*		trx,		/*!< in: transaction */
+	trx_undo_t*	undo,		/*!< in: undo log memory object */
+	trx_undo_ptr_t*	undo_ptr,	/*!< in: assign undo log from
+					referred rollback segment. */
+	mtr_t*		mtr)		/*!< in: mtr which does not have
+					a latch to any undo log page;
+					the caller must have reserved
+					the rollback segment mutex */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /********************************************************************//**
 Frees the last undo log page.
 The caller must hold the rollback segment mutex. */
-UNIV_INTERN
 void
 trx_undo_free_last_page_func(
 /*==========================*/
@@ -244,40 +246,37 @@ trx_undo_free_last_page_func(
 /***********************************************************************//**
 Truncates an undo log from the end. This function is used during a rollback
 to free space from an undo log. */
-UNIV_INTERN
 void
-trx_undo_truncate_end(
+trx_undo_truncate_end_func(
 /*=======================*/
 	trx_t*		trx,	/*!< in: transaction whose undo log it is */
 	trx_undo_t*	undo,	/*!< in/out: undo log */
 	undo_no_t	limit)	/*!< in: all undo records with undo number
 				>= this value should be truncated */
-	MY_ATTRIBUTE((nonnull));
+	MY_ATTRIBUTE((nonnull(1,2)));
 
-/***********************************************************************//**
-Truncates an undo log from the start. This function is used during a purge
-operation. */
-UNIV_INTERN
+#define trx_undo_truncate_end(trx, undo, limit) \
+	trx_undo_truncate_end_func(trx, undo, limit)
+
+/** Truncate the head of an undo log.
+NOTE that only whole pages are freed; the header page is not
+freed, but emptied, if all the records there are below the limit.
+@param[in,out]	rseg		rollback segment
+@param[in]	hdr_page_no	header page number
+@param[in]	hdr_offset	header offset on the page
+@param[in]	limit		first undo number to preserve
+(everything below the limit will be truncated) */
 void
 trx_undo_truncate_start(
-/*====================*/
-	trx_rseg_t*	rseg,		/*!< in: rollback segment */
-	ulint		space,		/*!< in: space id of the log */
-	ulint		hdr_page_no,	/*!< in: header page number */
-	ulint		hdr_offset,	/*!< in: header offset on the page */
-	undo_no_t	limit);		/*!< in: all undo pages with
-					undo numbers < this value
-					should be truncated; NOTE that
-					the function only frees whole
-					pages; the header page is not
-					freed, but emptied, if all the
-					records there are < limit */
+	trx_rseg_t*	rseg,
+	ulint		hdr_page_no,
+	ulint		hdr_offset,
+	undo_no_t	limit);
 /********************************************************************//**
 Initializes the undo log lists for a rollback segment memory copy.
 This function is only called when the database is started or a new
 rollback segment created.
-@return	the combined size of undo log segments in pages */
-UNIV_INTERN
+@return the combined size of undo log segments in pages */
 ulint
 trx_undo_lists_init(
 /*================*/
@@ -288,91 +287,113 @@ undo log reused.
 @return DB_SUCCESS if undo log assign successful, possible error codes
 are: DB_TOO_MANY_CONCURRENT_TRXS DB_OUT_OF_FILE_SPACE DB_READ_ONLY
 DB_OUT_OF_MEMORY */
-UNIV_INTERN
 dberr_t
 trx_undo_assign_undo(
 /*=================*/
-	trx_t*		trx,	/*!< in: transaction */
-	ulint		type)	/*!< in: TRX_UNDO_INSERT or TRX_UNDO_UPDATE */
+	trx_t*		trx,		/*!< in: transaction */
+	trx_undo_ptr_t*	undo_ptr,	/*!< in: assign undo log from
+					referred rollback segment. */
+	ulint		type)		/*!< in: TRX_UNDO_INSERT or
+					TRX_UNDO_UPDATE */
 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 /******************************************************************//**
 Sets the state of the undo log segment at a transaction finish.
-@return	undo log segment header page, x-latched */
-UNIV_INTERN
+@return undo log segment header page, x-latched */
 page_t*
 trx_undo_set_state_at_finish(
 /*=========================*/
 	trx_undo_t*	undo,	/*!< in: undo log memory copy */
 	mtr_t*		mtr);	/*!< in: mtr */
-/******************************************************************//**
-Sets the state of the undo log segment at a transaction prepare.
-@return	undo log segment header page, x-latched */
-UNIV_INTERN
+
+/** Set the state of the undo log segment at a XA PREPARE or XA ROLLBACK.
+@param[in,out]	trx		transaction
+@param[in,out]	undo		insert_undo or update_undo log
+@param[in]	rollback	false=XA PREPARE, true=XA ROLLBACK
+@param[in,out]	mtr		mini-transaction
+@return undo log segment header page, x-latched */
 page_t*
 trx_undo_set_state_at_prepare(
-/*==========================*/
-	trx_t*		trx,	/*!< in: transaction */
-	trx_undo_t*	undo,	/*!< in: undo log memory copy */
-	mtr_t*		mtr);	/*!< in: mtr */
+	trx_t*		trx,
+	trx_undo_t*	undo,
+	bool		rollback,
+	mtr_t*		mtr);
 
 /**********************************************************************//**
 Adds the update undo log header as the first in the history list, and
 frees the memory object, or puts it to the list of cached update undo log
 segments. */
-UNIV_INTERN
 void
 trx_undo_update_cleanup(
 /*====================*/
-	trx_t*	trx,		/*!< in: trx owning the update undo log */
-	page_t*	undo_page,	/*!< in: update undo log header page,
-				x-latched */
-	mtr_t*	mtr);		/*!< in: mtr */
-/******************************************************************//**
-Frees or caches an insert undo log after a transaction commit or rollback.
+	trx_t*		trx,		/*!< in: trx owning the update
+					undo log */
+	trx_undo_ptr_t*	undo_ptr,	/*!< in: update undo log. */
+	page_t*		undo_page,	/*!< in: update undo log header page,
+					x-latched */
+	bool		update_rseg_history_len,
+					/*!< in: if true: update rseg history
+					len else skip updating it. */
+	ulint		n_added_logs,	/*!< in: number of logs added */
+	mtr_t*		mtr);		/*!< in: mtr */
+
+/** Frees an insert undo log after a transaction commit or rollback.
 Knowledge of inserts is not needed after a commit or rollback, therefore
-the data can be discarded. */
-UNIV_INTERN
+the data can be discarded.
+@param[in,out]	undo_ptr	undo log to clean up
+@param[in]	noredo		whether the undo tablespace is redo logged */
 void
 trx_undo_insert_cleanup(
-/*====================*/
-	trx_t*	trx);	/*!< in: transaction handle */
+	trx_undo_ptr_t*	undo_ptr,
+	bool		noredo);
 
 /********************************************************************//**
 At shutdown, frees the undo logs of a PREPARED transaction. */
-UNIV_INTERN
 void
 trx_undo_free_prepared(
 /*===================*/
 	trx_t*	trx)	/*!< in/out: PREPARED transaction */
 	UNIV_COLD MY_ATTRIBUTE((nonnull));
+
+/* Forward declaration. */
+namespace undo {
+	class Truncate;
+};
+
+/** Truncate UNDO tablespace, reinitialize header and rseg.
+@param[in]	undo_trunc	UNDO tablespace handler
+@return true if success else false. */
+bool
+trx_undo_truncate_tablespace(
+	undo::Truncate*	undo_trunc);
+
 #endif /* !UNIV_HOTBACKUP */
 /***********************************************************//**
 Parses the redo log entry of an undo log page initialization.
-@return	end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
 byte*
 trx_undo_parse_page_init(
 /*=====================*/
-	byte*	ptr,	/*!< in: buffer */
-	byte*	end_ptr,/*!< in: buffer end */
-	page_t*	page,	/*!< in: page or NULL */
-	mtr_t*	mtr);	/*!< in: mtr or NULL */
-/***********************************************************//**
-Parses the redo log entry of an undo log page header create or reuse.
-@return	end of log record or NULL */
-UNIV_INTERN
+	const byte*	ptr,	/*!< in: buffer */
+	const byte*	end_ptr,/*!< in: buffer end */
+	page_t*		page,	/*!< in: page or NULL */
+	mtr_t*		mtr);	/*!< in: mtr or NULL */
+/** Parse the redo log entry of an undo log page header create or reuse.
+@param[in]	type	MLOG_UNDO_HDR_CREATE or MLOG_UNDO_HDR_REUSE
+@param[in]	ptr	redo log record
+@param[in]	end_ptr	end of log buffer
+@param[in,out]	page	page frame or NULL
+@param[in,out]	mtr	mini-transaction or NULL
+@return end of log record or NULL */
 byte*
 trx_undo_parse_page_header(
-/*=======================*/
-	ulint	type,	/*!< in: MLOG_UNDO_HDR_CREATE or MLOG_UNDO_HDR_REUSE */
-	byte*	ptr,	/*!< in: buffer */
-	byte*	end_ptr,/*!< in: buffer end */
-	page_t*	page,	/*!< in: page or NULL */
-	mtr_t*	mtr);	/*!< in: mtr or NULL */
+	mlog_id_t	type,
+	const byte*	ptr,
+	const byte*	end_ptr,
+	page_t*		page,
+	mtr_t*		mtr);
 /***********************************************************//**
 Parses the redo log entry of an undo log page header discard.
-@return	end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
 byte*
 trx_undo_parse_discard_latest(
 /*==========================*/
@@ -382,7 +403,6 @@ trx_undo_parse_discard_latest(
 	mtr_t*	mtr);	/*!< in: mtr or NULL */
 /************************************************************************
 Frees an undo log memory copy. */
-UNIV_INTERN
 void
 trx_undo_mem_free(
 /*==============*/
@@ -407,12 +427,12 @@ trx_undo_mem_free(
 #define	TRX_UNDO_PREPARED	5	/* contains an undo log of an
 					prepared transaction */
 
-#ifndef UNIV_HOTBACKUP
-#ifndef UNIV_INNOCHECKSUM
+#if !defined UNIV_HOTBACKUP && !defined UNIV_INNOCHECKSUM
+
 /** Transaction undo log memory object; this is protected by the undo_mutex
 in the corresponding transaction object */
 
-struct trx_undo_t{
+struct trx_undo_t {
 	/*-----------------------------*/
 	ulint		id;		/*!< undo log slot number within the
 					rollback segment */
@@ -439,8 +459,7 @@ struct trx_undo_t{
 	/*-----------------------------*/
 	ulint		space;		/*!< space id where the undo log
 					placed */
-	ulint		zip_size;	/*!< compressed page size of space
-					in bytes, or 0 for uncompressed */
+	page_size_t	page_size;
 	ulint		hdr_page_no;	/*!< page number of the header page in
 					the undo log */
 	ulint		hdr_offset;	/*!< header offset of the undo log on
@@ -462,13 +481,14 @@ struct trx_undo_t{
 	undo_no_t	top_undo_no;	/*!< undo number of the latest record */
 	buf_block_t*	guess_block;	/*!< guess for the buffer block where
 					the top page might reside */
+	ulint		withdraw_clock;	/*!< the withdraw clock value of the
+					buffer pool when guess_block was stored */
 	/*-----------------------------*/
 	UT_LIST_NODE_T(trx_undo_t) undo_list;
 					/*!< undo log objects in the rollback
 					segment are chained into lists */
 };
-#endif /* !UNIV_INNOCHECKSUM */
-#endif /* !UNIV_HOTBACKUP */
+#endif /* !UNIV_HOTBACKUP && !UNIV_INNOCHECKSUM */
 
 /** The offset of the undo log page header on pages of the undo log */
 #define	TRX_UNDO_PAGE_HDR	FSEG_PAGE_DATA
@@ -516,6 +536,9 @@ log segment */
 /* @{ */
 /*-------------------------------------------------------------*/
 #define	TRX_UNDO_STATE		0	/*!< TRX_UNDO_ACTIVE, ... */
+
+#ifndef UNIV_INNOCHECKSUM
+
 #define	TRX_UNDO_LAST_LOG	2	/*!< Offset of the last undo log header
 					on the segment header page, 0 if
 					none */
@@ -595,7 +618,6 @@ quite a large overhead. */
 					with the XA XID */
 /* @} */
 
-#ifndef UNIV_INNOCHECKSUM
 #ifndef UNIV_NONINL
 #include "trx0undo.ic"
 #endif
diff --git a/storage/innobase/include/trx0undo.ic b/storage/innobase/include/trx0undo.ic
index 577759d6c3d..f8e74d0fb03 100644
--- a/storage/innobase/include/trx0undo.ic
+++ b/storage/innobase/include/trx0undo.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -29,7 +29,7 @@ Created 3/26/1996 Heikki Tuuri
 #ifndef UNIV_HOTBACKUP
 /***********************************************************************//**
 Builds a roll pointer.
-@return	roll pointer */
+@return roll pointer */
 UNIV_INLINE
 roll_ptr_t
 trx_undo_build_roll_ptr(
@@ -85,7 +85,7 @@ trx_undo_decode_roll_ptr(
 
 /***********************************************************************//**
 Returns TRUE if the roll pointer is of the insert type.
-@return	TRUE if insert undo log */
+@return TRUE if insert undo log */
 UNIV_INLINE
 ibool
 trx_undo_roll_ptr_is_insert(
@@ -104,7 +104,7 @@ trx_undo_roll_ptr_is_insert(
 
 /***********************************************************************//**
 Returns true if the record is of the insert type.
-@return	true if the record was freshly inserted (not updated). */
+@return true if the record was freshly inserted (not updated). */
 UNIV_INLINE
 bool
 trx_undo_trx_id_is_insert(
@@ -140,7 +140,7 @@ trx_write_roll_ptr(
 Reads a roll ptr from an index page. In case that the roll ptr size
 changes in some future version, this function should be used instead of
 mach_read_...
-@return	roll ptr */
+@return roll ptr */
 UNIV_INLINE
 roll_ptr_t
 trx_read_roll_ptr(
@@ -154,41 +154,42 @@ trx_read_roll_ptr(
 }
 
 #ifndef UNIV_HOTBACKUP
-/******************************************************************//**
-Gets an undo log page and x-latches it.
-@return	pointer to page x-latched */
+
+/** Gets an undo log page and x-latches it.
+@param[in]	page_id		page id
+@param[in]	page_size	page size
+@param[in,out]	mtr		mini-transaction
+@return pointer to page x-latched */
 UNIV_INLINE
 page_t*
 trx_undo_page_get(
-/*==============*/
-	ulint	space,		/*!< in: space where placed */
-	ulint	zip_size,	/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	ulint	page_no,	/*!< in: page number */
-	mtr_t*	mtr)		/*!< in: mtr */
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	mtr_t*			mtr)
 {
-	buf_block_t*	block = buf_page_get(space, zip_size, page_no,
+	buf_block_t*	block = buf_page_get(page_id, page_size,
 					     RW_X_LATCH, mtr);
+
 	buf_block_dbg_add_level(block, SYNC_TRX_UNDO_PAGE);
 
 	return(buf_block_get_frame(block));
 }
 
-/******************************************************************//**
-Gets an undo log page and s-latches it.
-@return	pointer to page s-latched */
+/** Gets an undo log page and s-latches it.
+@param[in]	page_id		page id
+@param[in]	page_size	page size
+@param[in,out]	mtr		mini-transaction
+@return pointer to page s-latched */
 UNIV_INLINE
 page_t*
 trx_undo_page_get_s_latched(
-/*========================*/
-	ulint	space,		/*!< in: space where placed */
-	ulint	zip_size,	/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	ulint	page_no,	/*!< in: page number */
-	mtr_t*	mtr)		/*!< in: mtr */
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	mtr_t*			mtr)
 {
-	buf_block_t*	block = buf_page_get(space, zip_size, page_no,
+	buf_block_t*	block = buf_page_get(page_id, page_size,
 					     RW_S_LATCH, mtr);
+
 	buf_block_dbg_add_level(block, SYNC_TRX_UNDO_PAGE);
 
 	return(buf_block_get_frame(block));
@@ -197,7 +198,7 @@ trx_undo_page_get_s_latched(
 /******************************************************************//**
 Returns the start offset of the undo log records of the specified undo
 log on the page.
-@return	start offset */
+@return start offset */
 UNIV_INLINE
 ulint
 trx_undo_page_get_start(
@@ -222,7 +223,7 @@ trx_undo_page_get_start(
 /******************************************************************//**
 Returns the end offset of the undo log records of the specified undo
 log on the page.
-@return	end offset */
+@return end offset */
 UNIV_INLINE
 ulint
 trx_undo_page_get_end(
@@ -255,7 +256,7 @@ trx_undo_page_get_end(
 /******************************************************************//**
 Returns the previous undo record on the page in the specified log, or
 NULL if none exists.
-@return	pointer to record, NULL if none */
+@return pointer to record, NULL if none */
 UNIV_INLINE
 trx_undo_rec_t*
 trx_undo_page_get_prev_rec(
@@ -282,7 +283,7 @@ trx_undo_page_get_prev_rec(
 /******************************************************************//**
 Returns the next undo log record on the page in the specified log, or
 NULL if none exists.
-@return	pointer to record, NULL if none */
+@return pointer to record, NULL if none */
 UNIV_INLINE
 trx_undo_rec_t*
 trx_undo_page_get_next_rec(
@@ -312,7 +313,7 @@ trx_undo_page_get_next_rec(
 /******************************************************************//**
 Returns the last undo record on the page in the specified undo log, or
 NULL if none exists.
-@return	pointer to record, NULL if none */
+@return pointer to record, NULL if none */
 UNIV_INLINE
 trx_undo_rec_t*
 trx_undo_page_get_last_rec(
@@ -338,7 +339,7 @@ trx_undo_page_get_last_rec(
 /******************************************************************//**
 Returns the first undo record on the page in the specified undo log, or
 NULL if none exists.
-@return	pointer to record, NULL if none */
+@return pointer to record, NULL if none */
 UNIV_INLINE
 trx_undo_rec_t*
 trx_undo_page_get_first_rec(
diff --git a/storage/innobase/include/trx0xa.h b/storage/innobase/include/trx0xa.h
index 7caddfb7ba4..b333f32cd73 100644
--- a/storage/innobase/include/trx0xa.h
+++ b/storage/innobase/include/trx0xa.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -24,6 +24,13 @@ this program; if not, write to the Free Software Foundation, Inc.,
 #ifndef	XA_H
 #define	XA_H
 
+/* Missing MySQL 5.7 header */
+#ifdef HAVE_XA_H
+#include "xa.h"
+#else
+#include "handler.h"
+#endif
+
 /*
  * Transaction branch identification: XID and NULLXID:
  */
@@ -35,17 +42,6 @@ this program; if not, write to the Free Software Foundation, Inc.,
 #define	MAXGTRIDSIZE	 64		/*!< maximum size in bytes of gtrid */
 #define	MAXBQUALSIZE	 64		/*!< maximum size in bytes of bqual */
 
-/** X/Open XA distributed transaction identifier */
-struct xid_t {
-	long formatID;			/*!< format identifier; -1
-					means that the XID is null */
-	long gtrid_length;		/*!< value from 1 through 64 */
-	long bqual_length;		/*!< value from 1 through 64 */
-	char data[XIDDATASIZE];		/*!< distributed transaction
-					identifier */
-};
-/** X/Open XA distributed transaction identifier */
-typedef	struct xid_t XID;
 #endif
 /** X/Open XA distributed transaction status codes */
 /* @{ */
diff --git a/storage/innobase/include/univ.i b/storage/innobase/include/univ.i
index 8557f74f756..6907bfec583 100644
--- a/storage/innobase/include/univ.i
+++ b/storage/innobase/include/univ.i
@@ -2,7 +2,7 @@
 
 Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2008, Google Inc.
-Copyright (c) 2013, 2015, MariaDB Corporation.
+Copyright (c) 2013, 2016, MariaDB Corporation.
 
 Portions of this file contain modifications contributed and copyrighted by
 Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -44,8 +44,8 @@ Created 1/20/1994 Heikki Tuuri
 #define IB_TO_STR(s)	_IB_TO_STR(s)
 
 #define INNODB_VERSION_MAJOR	5
-#define INNODB_VERSION_MINOR	6
-#define INNODB_VERSION_BUGFIX	32
+#define INNODB_VERSION_MINOR	7
+#define INNODB_VERSION_BUGFIX	14
 
 /* The following is the InnoDB version as shown in
 SELECT plugin_version FROM information_schema.plugins;
@@ -75,63 +75,57 @@ the virtual method table (vtable) in GCC 3. */
 # define ha_innobase ha_innodb
 #endif /* MYSQL_DYNAMIC_PLUGIN */
 
-#if (defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)) && !defined(MYSQL_SERVER) && !defined(__WIN__)
-# undef __WIN__
-# define __WIN__
-
+#if defined(_WIN32)
 # include <windows.h>
+#endif /* _WIN32 */
 
-# ifdef _NT_
-#  define __NT__
-# endif
-
-#else
 /* The defines used with MySQL */
 
-/* Include two header files from MySQL to make the Unix flavor used
-in compiling more Posix-compatible. These headers also define __WIN__
-if we are compiling on Windows. */
-
 #ifndef UNIV_HOTBACKUP
+
+/* Include a minimum number of SQL header files so that few changes
+made in SQL code cause a complete InnoDB rebuild.  These headers are
+used throughout InnoDB but do not include too much themselves.  They
+support cross-platform development and expose comonly used SQL names. */
+
 # include <my_global.h>
-# include <my_pthread.h>
-#endif /* UNIV_HOTBACKUP */
+
+/* JAN: TODO: missing 5.7 header */
+#ifdef HAVE_MY_THREAD_H
+//# include <my_thread.h>
+#endif
+
+# ifndef UNIV_INNOCHECKSUM
+#  include <m_string.h>
+#  include <mysqld_error.h>
+# endif /* !UNIV_INNOCHECKSUM */
+#endif /* !UNIV_HOTBACKUP  */
 
 /* Include <sys/stat.h> to get S_I... macros defined for os0file.cc */
-# include <sys/stat.h>
-# if !defined(__WIN__)
-#  include <sys/mman.h> /* mmap() for os0proc.cc */
-# endif
+#include <sys/stat.h>
 
-/* Include the header file generated by GNU autoconf */
-# ifndef __WIN__
-#  ifndef UNIV_HOTBACKUP
-#   include "my_config.h"
-#  endif /* UNIV_HOTBACKUP */
-# endif
+#ifndef _WIN32
+# include <sys/mman.h> /* mmap() for os0proc.cc */
+# include <sched.h>
+#endif /* !_WIN32 */
 
-# ifdef HAVE_SCHED_H
-#  include <sched.h>
-# endif
+/* Include the header file generated by CMake */
+#ifndef _WIN32
+# ifndef UNIV_HOTBACKUP
+#  include "my_config.h"
+# endif /* UNIV_HOTBACKUP */
+#endif
 
-/* We only try to do explicit inlining of functions with gcc and
-Sun Studio */
-
-# ifdef HAVE_PREAD
-#  define HAVE_PWRITE
-# endif
-
-#endif /* #if (defined(WIN32) || ... */
-
-#ifndef __WIN__
-#define __STDC_FORMAT_MACROS    /* Enable C99 printf format macros */
+#include <stdint.h>
 #include <inttypes.h>
-#endif /* !__WIN__ */
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
 
 /* Following defines are to enable performance schema
-instrumentation in each of four InnoDB modules if
+instrumentation in each of five InnoDB modules if
 HAVE_PSI_INTERFACE is defined. */
-#if defined HAVE_PSI_INTERFACE && !defined UNIV_HOTBACKUP
+#if defined(HAVE_PSI_INTERFACE) && !defined(UNIV_HOTBACKUP)
 # define UNIV_PFS_MUTEX
 # define UNIV_PFS_RWLOCK
 /* For I/O instrumentation, performance schema rely
@@ -139,11 +133,17 @@ on a native descriptor to identify the file, this
 descriptor could conflict with our OS level descriptor.
 Disable IO instrumentation on Windows until this is
 resolved */
-# ifndef __WIN__
+# ifndef _WIN32
 #  define UNIV_PFS_IO
 # endif
 # define UNIV_PFS_THREAD
 
+// JAN: TODO: MySQL 5.7 PSI
+// # include "mysql/psi/psi.h" /* HAVE_PSI_MEMORY_INTERFACE */
+# ifdef HAVE_PSI_MEMORY_INTERFACE
+#  define UNIV_PFS_MEMORY
+# endif /* HAVE_PSI_MEMORY_INTERFACE */
+
 /* There are mutexes/rwlocks that we want to exclude from
 instrumentation even if their corresponding performance schema
 define is set. And this PFS_NOT_INSTRUMENTED is used
@@ -153,26 +153,53 @@ be excluded from instrumentation. */
 
 # define PFS_IS_INSTRUMENTED(key)	((key) != PFS_NOT_INSTRUMENTED)
 
+/* JAN: TODO: missing 5.7 header */
+#ifdef HAVE_PFS_THREAD_PROVIDER_H
+/* For PSI_MUTEX_CALL() and similar. */
+#include "pfs_thread_provider.h"
+#endif
+
+#include "my_pthread.h"
+#include "mysql/psi/mysql_thread.h"
+/* For PSI_FILE_CALL(). */
+/* JAN: TODO: missing 5.7 header */
+#ifdef HAVE_PFS_FILE_PROVIDER_H
+#include "pfs_file_provider.h"
+#endif
+
+#include "mysql/psi/mysql_file.h"
+
 #endif /* HAVE_PSI_INTERFACE */
 
-#ifdef __WIN__
+#ifdef _WIN32
 # define YY_NO_UNISTD_H 1
-#endif /* __WIN__ */
+/* VC++ tries to optimise for size by default, from V8+. The size of
+the pointer to member depends on whether the type is defined before the
+compiler sees the type in the translation unit. This default behaviour
+can cause the pointer to be a different size in different translation
+units, depending on the above rule. We force optimise for size behaviour
+for all cases. This is used by ut0lst.h related code. */
+# pragma pointers_to_members(full_generality, multiple_inheritance)
+#endif /* _WIN32 */
 
 /*			DEBUG VERSION CONTROL
 			===================== */
 
 /* When this macro is defined then additional test functions will be
 compiled. These functions live at the end of each relevant source file
-and have "test_" prefix. These functions are not called from anywhere in
-the code, they can be called from gdb after
+and have "test_" prefix. These functions can be called from the end of
+innobase_init() or they can be called from gdb after
 innobase_start_or_create_for_mysql() has executed using the call
-command. Not tested on Windows. */
+command. */
 /*
 #define UNIV_COMPILE_TEST_FUNCS
+#define UNIV_ENABLE_UNIT_TEST_GET_PARENT_DIR
+#define UNIV_ENABLE_UNIT_TEST_MAKE_FILEPATH
+#define UNIV_ENABLE_UNIT_TEST_DICT_STATS
+#define UNIV_ENABLE_UNIT_TEST_ROW_RAW_FORMAT_INT
 */
 
-#if defined(HAVE_valgrind)&& defined(HAVE_VALGRIND_MEMCHECK_H)
+#if defined HAVE_VALGRIND
 # define UNIV_DEBUG_VALGRIND
 #endif /* HAVE_VALGRIND */
 #if 0
@@ -191,29 +218,18 @@ command. Not tested on Windows. */
 #define UNIV_DEBUG_LOCK_VALIDATE		/* Enable
 						ut_ad(lock_rec_validate_page())
 						assertions. */
-#define UNIV_DEBUG_FILE_ACCESSES		/* Enable freed block access
-						debugging without UNIV_DEBUG */
 #define UNIV_LRU_DEBUG				/* debug the buffer pool LRU */
 #define UNIV_HASH_DEBUG				/* debug HASH_ macros */
-#define UNIV_LIST_DEBUG				/* debug UT_LIST_ macros */
 #define UNIV_LOG_LSN_DEBUG			/* write LSN to the redo log;
 this will break redo log file compatibility, but it may be useful when
 debugging redo log application problems. */
-#define UNIV_MEM_DEBUG				/* detect memory leaks etc */
 #define UNIV_IBUF_DEBUG				/* debug the insert buffer */
-#define UNIV_BLOB_DEBUG				/* track BLOB ownership;
-assumes that no BLOBs survive server restart */
 #define UNIV_IBUF_COUNT_DEBUG			/* debug the insert buffer;
 this limits the database to IBUF_COUNT_N_SPACES and IBUF_COUNT_N_PAGES,
 and the insert buffer must be empty when the database is started */
 #define UNIV_PERF_DEBUG                         /* debug flag that enables
                                                 light weight performance
                                                 related stuff. */
-#define UNIV_SYNC_DEBUG				/* debug mutex and latch
-operations (very slow); also UNIV_DEBUG must be defined */
-#define UNIV_SEARCH_DEBUG			/* debug B-tree comparisons */
-#define UNIV_SYNC_PERF_STAT			/* operation counts for
-						rw-locks and mutexes */
 #define UNIV_SEARCH_PERF_STAT			/* statistics for the
 						adaptive hash index */
 #define UNIV_SRV_PRINT_LATCH_WAITS		/* enable diagnostic output
@@ -237,27 +253,30 @@ operations (very slow); also UNIV_DEBUG must be defined */
 #define UNIV_BTR_DEBUG				/* check B-tree links */
 #define UNIV_LIGHT_MEM_DEBUG			/* light memory debugging */
 
-/*
-#define UNIV_SQL_DEBUG
-#define UNIV_LOG_DEBUG
-*/
-			/* the above option prevents forcing of log to disk
-			at a buffer page write: it should be tested with this
-			option off; also some ibuf tests are suppressed */
+// #define UNIV_SQL_DEBUG
 
 /* Linkage specifier for non-static InnoDB symbols (variables and functions)
 that are only referenced from within InnoDB, not from MySQL. We disable the
 GCC visibility directive on all Sun operating systems because there is no
 easy way to get it to work. See http://bugs.mysql.com/bug.php?id=52263. */
-#define MY_ATTRIBUTE __attribute__
 #if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(sun) || defined(__INTEL_COMPILER)
-# define UNIV_INTERN MY_ATTRIBUTE((visibility ("hidden")))
+# define UNIV_INTERN __attribute__((visibility ("hidden")))
 #else
 # define UNIV_INTERN
 #endif
-#if defined(INNODB_COMPILER_HINTS)      \
+
+#ifndef MY_ATTRIBUTE
+#if defined(__GNUC__)
+#  define MY_ATTRIBUTE(A) __attribute__(A)
+#else
+#  define MY_ATTRIBUTE(A)
+#endif
+#endif
+
+#if defined(COMPILER_HINTS)      \
     && defined __GNUC__                 \
     && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 3)
+
 /** Starting with GCC 4.3, the "cold" attribute is used to inform the
 compiler that a function is unlikely executed.  The function is
 optimized for size rather than speed and on many targets it is placed
@@ -281,26 +300,25 @@ rarely invoked function for size instead for speed. */
 definitions: */
 
 #define UNIV_NONINL
-#define UNIV_INLINE	UNIV_INTERN
+#define UNIV_INLINE
 
 #endif /* !UNIV_MUST_NOT_INLINE */
 
 #ifdef _WIN32
-#define UNIV_WORD_SIZE		4
-#elif defined(_WIN64)
-#define UNIV_WORD_SIZE		8
-#else
-/** MySQL config.h generated by GNU autoconf will define SIZEOF_LONG in Posix */
+# ifdef _WIN64
+#  define UNIV_WORD_SIZE	8
+# else
+#  define UNIV_WORD_SIZE	4
+# endif
+#else	 /* !_WIN32 */
+/** MySQL config.h generated by CMake will define SIZEOF_LONG in Posix */
 #define UNIV_WORD_SIZE		SIZEOF_LONG
-#endif
+#endif	 /* _WIN32 */
 
 /** The following alignment is used in memory allocations in memory heap
 management to ensure correct alignment for doubles etc. */
 #define UNIV_MEM_ALIGNMENT	8
 
-/** The following alignment is used in aligning lints etc. */
-#define UNIV_WORD_ALIGNMENT	UNIV_WORD_SIZE
-
 /*
 			DATABASE VERSION CONTROL
 			========================
@@ -387,6 +405,8 @@ and 2 bits for flags. This limits the uncompressed page size to 16k.
 #define UNIV_PAGE_SIZE_SHIFT_DEF	14
 /** Original 16k InnoDB Page Size Shift, in case the default changes */
 #define UNIV_PAGE_SIZE_SHIFT_ORIG	14
+/** Original 16k InnoDB Page Size as an ssize (log2 - 9) */
+#define UNIV_PAGE_SSIZE_ORIG		(UNIV_PAGE_SIZE_SHIFT_ORIG - 9)
 
 /** Minimum page size InnoDB currently supports. */
 #define UNIV_PAGE_SIZE_MIN	(1 << UNIV_PAGE_SIZE_SHIFT_MIN)
@@ -403,11 +423,16 @@ and 2 bits for flags. This limits the uncompressed page size to 16k.
 /** Largest compressed page size */
 #define UNIV_ZIP_SIZE_MAX	(1 << UNIV_ZIP_SIZE_SHIFT_MAX)
 
-/** Number of supported page sizes (The convention 'ssize' is used
-for 'log2 minus 9' or the number of shifts starting with 512.)
-This number varies depending on UNIV_PAGE_SIZE. */
-#define UNIV_PAGE_SSIZE_MAX					\
-	(UNIV_PAGE_SIZE_SHIFT - UNIV_ZIP_SIZE_SHIFT_MIN + 1)
+/** Largest possible ssize for an uncompressed page.
+(The convention 'ssize' is used for 'log2 minus 9' or the number of
+shifts starting with 512.)
+This max number varies depending on UNIV_PAGE_SIZE. */
+#define UNIV_PAGE_SSIZE_MAX	\
+	static_cast<ulint>(UNIV_PAGE_SIZE_SHIFT - UNIV_ZIP_SIZE_SHIFT_MIN + 1)
+
+/** Smallest possible ssize for an uncompressed page. */
+#define UNIV_PAGE_SSIZE_MIN	\
+	static_cast<ulint>(UNIV_PAGE_SIZE_SHIFT_MIN - UNIV_ZIP_SIZE_SHIFT_MIN + 1)
 
 /** Maximum number of parallel threads in a parallelized operation */
 #define UNIV_MAX_PARALLELISM	32
@@ -435,6 +460,10 @@ database name and table name. In addition, 14 bytes is added for:
 #define MAX_FULL_NAME_LEN				\
 	(MAX_TABLE_NAME_LEN + MAX_DATABASE_NAME_LEN + 14)
 
+/** Maximum length of the compression alogrithm string. Currently we support
+only (NONE | ZLIB | LZ4). */
+#define MAX_COMPRESSION_LEN     4
+
 /** The maximum length in bytes that a database name can occupy when stored in
 UTF8, including the terminating '\0', see dict_fs2utf8(). You must include
 mysql_com.h if you are to use this macro. */
@@ -458,33 +487,28 @@ the word size of the machine, that is on a 32-bit platform 32 bits, and on a
 64-bit platform 64 bits. We also give the printf format for the type as a
 macro ULINTPF. */
 
-
-#ifdef __WIN__
+#ifdef _WIN32
 /* Use the integer types and formatting strings defined in Visual Studio. */
-# define UINT32PF	"%I32u"
-# define INT64PF	"%I64d"
-# define UINT64PF	"%I64u"
-# define UINT64PFx	"%016I64x"
-# define DBUG_LSN_PF    "%llu"
-typedef __int64 ib_int64_t;
+# define UINT32PF	"%u"
+# define UINT64PF	"%llu"
+# define UINT64PFx	"%016llx"
+# define UINT64scan     "llu"
 typedef unsigned __int64 ib_uint64_t;
 typedef unsigned __int32 ib_uint32_t;
 #else
-/* Use the integer types and formatting strings defined in the C99 standard. */
-# define UINT32PF	"%" PRIu32
-# define INT64PF	"%" PRId64
-# define UINT64PF	"%" PRIu64
-# define UINT64PFx	"%016" PRIx64
-# define DBUG_LSN_PF    UINT64PF
-typedef int64_t ib_int64_t;
+# define UINT32PF	"%u"
+#if SIZEOF_LONG == 8
+# define UINT64PF	"%lu"
+# define UINT64PFx	"%016lx"
+# define UINT64scan     "lu"
+#else
+# define UINT64PF	"%llu"
+# define UINT64PFx	"%016llx"
+# define UINT64scan     "llu"
+#endif
 typedef uint64_t ib_uint64_t;
 typedef uint32_t ib_uint32_t;
-# endif /* __WIN__ */
-
-# define IB_ID_FMT	UINT64PF
-
-/* Type used for all log sequence number storage and arithmetics */
-typedef	ib_uint64_t		lsn_t;
+#endif /* _WIN32 */
 
 #ifdef _WIN64
 typedef unsigned __int64	ulint;
@@ -496,11 +520,7 @@ typedef long int		lint;
 # define ULINTPF		"%lu"
 #endif /* _WIN64 */
 
-#ifndef UNIV_HOTBACKUP
-typedef unsigned long long int	ullint;
-#endif /* UNIV_HOTBACKUP */
-
-#ifndef __WIN__
+#ifndef _WIN32
 #if SIZEOF_LONG != SIZEOF_VOIDP
 #error "Error: InnoDB's ulint must be of the same size as void*"
 #endif
@@ -526,12 +546,13 @@ typedef unsigned long long int	ullint;
 #define IB_UINT64_MAX		((ib_uint64_t) (~0ULL))
 
 /** The generic InnoDB system object identifier data type */
-typedef ib_uint64_t		ib_id_t;
-#define IB_ID_MAX		IB_UINT64_MAX
-
-/** The 'undefined' value for a ullint */
-#define ULLINT_UNDEFINED        ((ullint)(-1))
+typedef ib_uint64_t	        ib_id_t;
+#define IB_ID_MAX               (~(ib_id_t) 0)
+#define IB_ID_FMT               UINT64PF
 
+#ifndef UINTMAX_MAX
+#define UINTMAX_MAX		IB_UINT64_MAX
+#endif
 /** This 'ibool' type is used within Innobase. Remember that different included
 headers may define 'bool' differently. Do not assume that 'bool' is a ulint! */
 #define ibool			ulint
@@ -560,8 +581,7 @@ contains the sum of the following flag and the locally stored len. */
 
 #define UNIV_EXTERN_STORAGE_FIELD (UNIV_SQL_NULL - UNIV_PAGE_SIZE_DEF)
 
-#if defined(__GNUC__) && (__GNUC__ > 2) && ! defined(__INTEL_COMPILER)
-#define HAVE_GCC_GT_2
+#if defined(__GNUC__)
 /* Tell the compiler that variable/function is unused. */
 # define UNIV_UNUSED    MY_ATTRIBUTE ((unused))
 #else
@@ -569,7 +589,7 @@ contains the sum of the following flag and the locally stored len. */
 #endif /* CHECK FOR GCC VER_GT_2 */
 
 /* Some macros to improve branch prediction and reduce cache misses */
-#if defined(INNODB_COMPILER_HINTS) && defined(HAVE_GCC_GT_2)
+#if defined(COMPILER_HINTS) && defined(__GNUC__)
 /* Tell the compiler that 'expr' probably evaluates to 'constant'. */
 # define UNIV_EXPECT(expr,constant) __builtin_expect(expr, constant)
 /* Tell the compiler that a pointer is likely to be NULL */
@@ -582,28 +602,30 @@ it is read or written. */
 # define UNIV_PREFETCH_RW(addr) __builtin_prefetch(addr, 1, 3)
 
 /* Sun Studio includes sun_prefetch.h as of version 5.9 */
-#elif (defined(__SUNPRO_C) && __SUNPRO_C >= 0x590) \
-       || (defined(__SUNPRO_CC) && __SUNPRO_CC >= 0x590)
+#elif (defined(__SUNPRO_C) || defined(__SUNPRO_CC))
 
 # include <sun_prefetch.h>
 
-#if __SUNPRO_C >= 0x550
-# undef UNIV_INTERN
-# define UNIV_INTERN __hidden
-#endif /* __SUNPRO_C >= 0x550 */
-
 # define UNIV_EXPECT(expr,value) (expr)
 # define UNIV_LIKELY_NULL(expr) (expr)
 
-# if defined(INNODB_COMPILER_HINTS)
+# if defined(COMPILER_HINTS)
 //# define UNIV_PREFETCH_R(addr) sun_prefetch_read_many((void*) addr)
 #  define UNIV_PREFETCH_R(addr) ((void) 0)
 #  define UNIV_PREFETCH_RW(addr) sun_prefetch_write_many(addr)
 # else
 #  define UNIV_PREFETCH_R(addr) ((void) 0)
 #  define UNIV_PREFETCH_RW(addr) ((void) 0)
-# endif /* INNODB_COMPILER_HINTS */
+# endif /* COMPILER_HINTS */
 
+# elif defined __WIN__ && defined COMPILER_HINTS
+# include <xmmintrin.h>
+# define UNIV_EXPECT(expr,value) (expr)
+# define UNIV_LIKELY_NULL(expr) (expr)
+// __MM_HINT_T0 - (temporal data)
+// prefetch data into all levels of the cache hierarchy.
+# define UNIV_PREFETCH_R(addr) _mm_prefetch((char *) addr, _MM_HINT_T0)
+# define UNIV_PREFETCH_RW(addr) _mm_prefetch((char *) addr, _MM_HINT_T0)
 #else
 /* Dummy versions of the macros */
 # define UNIV_EXPECT(expr,value) (expr)
@@ -624,18 +646,25 @@ it is read or written. */
 Windows, so define a typedef for it and a macro to use at the end of such
 functions. */
 
-#ifdef __WIN__
+#ifdef _WIN32
 typedef ulint os_thread_ret_t;
-#define OS_THREAD_DUMMY_RETURN return(0)
+# define OS_THREAD_DUMMY_RETURN		return(0)
+# define OS_PATH_SEPARATOR		'\\'
+# define OS_PATH_SEPARATOR_ALT		'/'
 #else
 typedef void* os_thread_ret_t;
-#define OS_THREAD_DUMMY_RETURN return(NULL)
+# define OS_THREAD_DUMMY_RETURN		return(NULL)
+# define OS_PATH_SEPARATOR		'/'
+# define OS_PATH_SEPARATOR_ALT		'\\'
 #endif
 
 #include <stdio.h>
-#include "ut0dbg.h"
-#include "ut0ut.h"
 #include "db0err.h"
+#include "ut0dbg.h"
+#include "ut0lst.h"
+#include "ut0ut.h"
+#include "sync0types.h"
+
 #ifdef UNIV_DEBUG_VALGRIND
 # include <valgrind/memcheck.h>
 # define UNIV_MEM_VALID(addr, size) VALGRIND_MAKE_MEM_DEFINED(addr, size)
@@ -699,4 +728,10 @@ typedef void* os_thread_ret_t;
 extern ulong	srv_page_size_shift;
 extern ulong	srv_page_size;
 
+static const size_t UNIV_SECTOR_SIZE = 512;
+
+/* Dimension of spatial object we support so far. It has its root in
+myisam/sp_defs.h. We only support 2 dimension data */
+#define SPDIMS          2
+
 #endif
diff --git a/storage/innobase/include/usr0sess.h b/storage/innobase/include/usr0sess.h
index b5c80b97b43..21c32986259 100644
--- a/storage/innobase/include/usr0sess.h
+++ b/storage/innobase/include/usr0sess.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -38,14 +38,12 @@ Created 6/25/1996 Heikki Tuuri
 
 /*********************************************************************//**
 Opens a session.
-@return	own: session object */
-UNIV_INTERN
+@return own: session object */
 sess_t*
 sess_open(void);
 /*============*/
 /*********************************************************************//**
 Closes a session, freeing the memory occupied by it. */
-UNIV_INTERN
 void
 sess_close(
 /*=======*/
@@ -60,9 +58,6 @@ struct sess_t{
 					transaction instance designated by the
 					trx id changes, but the memory
 					structure is preserved */
-	UT_LIST_BASE_NODE_T(que_t)
-			graphs;		/*!< query graphs belonging to this
-					session */
 };
 
 /* Session states */
diff --git a/storage/innobase/include/ut0bh.h b/storage/innobase/include/ut0bh.h
deleted file mode 100644
index 1085736c7ab..00000000000
--- a/storage/innobase/include/ut0bh.h
+++ /dev/null
@@ -1,152 +0,0 @@
-/***************************************************************************//**
-
-Copyright (c) 2011, 2013, Oracle Corpn. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file include/ut0bh.h
-Binary min-heap interface.
-
-Created 2010-05-28 by Sunny Bains
-*******************************************************/
-
-#ifndef INNOBASE_UT0BH_H
-#define INNOBASE_UT0BH_H
-
-#include "univ.i"
-
-/** Comparison function for objects in the binary heap. */
-typedef int (*ib_bh_cmp_t)(const void* p1, const void* p2);
-
-struct ib_bh_t;
-
-/**********************************************************************//**
-Get the number of elements in the binary heap.
-@return number of elements */
-UNIV_INLINE
-ulint
-ib_bh_size(
-/*=======*/
-	const ib_bh_t*	ib_bh);			/*!< in: instance */
-
-/**********************************************************************//**
-Test if binary heap is empty.
-@return TRUE if empty. */
-UNIV_INLINE
-ibool
-ib_bh_is_empty(
-/*===========*/
-	const ib_bh_t*	ib_bh);			/*!< in: instance */
-
-/**********************************************************************//**
-Test if binary heap is full.
-@return TRUE if full. */
-UNIV_INLINE
-ibool
-ib_bh_is_full(
-/*===========*/
-	const ib_bh_t*	ib_bh);			/*!< in: instance */
-
-/**********************************************************************//**
-Get a pointer to the element.
-@return pointer to element */
-UNIV_INLINE
-void*
-ib_bh_get(
-/*=======*/
-	ib_bh_t*	ib_bh,			/*!< in: instance */
-	ulint		i);			/*!< in: index */
-
-/**********************************************************************//**
-Copy an element to the binary heap.
-@return pointer to copied element */
-UNIV_INLINE
-void*
-ib_bh_set(
-/*======*/
-	ib_bh_t*	ib_bh,			/*!< in/out: instance */
-	ulint		i,			/*!< in: index */
-	const void*	elem);			/*!< in: element to add */
-
-/**********************************************************************//**
-Return the first element from the binary heap.
-@return pointer to first element or NULL if empty. */
-UNIV_INLINE
-void*
-ib_bh_first(
-/*========*/
-	ib_bh_t*	ib_bh);			/*!< in: instance */
-
-/**********************************************************************//**
-Return the last element from the binary heap.
-@return pointer to last element or NULL if empty. */
-UNIV_INLINE
-void*
-ib_bh_last(
-/*========*/
-	ib_bh_t*	ib_bh);			/*!< in/out: instance */
-
-/**********************************************************************//**
-Create a binary heap.
-@return a new binary heap */
-UNIV_INTERN
-ib_bh_t*
-ib_bh_create(
-/*=========*/
-	ib_bh_cmp_t	compare,		/*!< in: comparator */
-	ulint		sizeof_elem,		/*!< in: size of one element */
-	ulint		max_elems);		/*!< in: max elements allowed */
-
-/**********************************************************************//**
-Free a binary heap.
-@return a new binary heap */
-UNIV_INTERN
-void
-ib_bh_free(
-/*=======*/
-	ib_bh_t*	ib_bh);			/*!< in,own: instance */
-
-/**********************************************************************//**
-Add an element to the binary heap. Note: The element is copied.
-@return pointer to added element or NULL if full. */
-UNIV_INTERN
-void*
-ib_bh_push(
-/*=======*/
-	ib_bh_t*	ib_bh,			/*!< in/out: instance */
-	const void*	elem);			/*!< in: element to add */
-
-/**********************************************************************//**
-Remove the first element from the binary heap. */
-UNIV_INTERN
-void
-ib_bh_pop(
-/*======*/
-	ib_bh_t*	ib_bh);			/*!< in/out: instance */
-
-/** Binary heap data structure */
-struct ib_bh_t {
-	ulint		max_elems;		/*!< max elements allowed */
-	ulint		n_elems;		/*!< current size */
-	ulint		sizeof_elem;		/*!< sizeof element */
-	ib_bh_cmp_t	compare;		/*!< comparator */
-};
-
-#ifndef UNIV_NONINL
-#include "ut0bh.ic"
-#endif
-
-#endif /* INNOBASE_UT0BH_H */
diff --git a/storage/innobase/include/ut0bh.ic b/storage/innobase/include/ut0bh.ic
deleted file mode 100644
index b11de5b8b3e..00000000000
--- a/storage/innobase/include/ut0bh.ic
+++ /dev/null
@@ -1,125 +0,0 @@
-/***************************************************************************//**
-
-Copyright (c) 2011, 2013, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file include/ut0bh.ic
-Binary min-heap implementation.
-
-Created 2011-01-15 by Sunny Bains
-*******************************************************/
-
-#include "ut0bh.h"
-#include "ut0mem.h"	/* For ut_memcpy() */
-
-/**********************************************************************//**
-Get the number of elements in the binary heap.
-@return number of elements */
-UNIV_INLINE
-ulint
-ib_bh_size(
-/*=======*/
-	const ib_bh_t*	ib_bh)			/*!< in: instance */
-{
-	return(ib_bh->n_elems);
-}
-
-/**********************************************************************//**
-Test if binary heap is empty.
-@return TRUE if empty. */
-UNIV_INLINE
-ibool
-ib_bh_is_empty(
-/*===========*/
-	const ib_bh_t*	ib_bh)			/*!< in: instance */
-{
-	return(ib_bh_size(ib_bh) == 0);
-}
-
-/**********************************************************************//**
-Test if binary heap is full.
-@return TRUE if full. */
-UNIV_INLINE
-ibool
-ib_bh_is_full(
-/*===========*/
-	const ib_bh_t*	ib_bh)			/*!< in: instance */
-{
-	return(ib_bh_size(ib_bh) >= ib_bh->max_elems);
-}
-
-/**********************************************************************//**
-Get a pointer to the element.
-@return pointer to element */
-UNIV_INLINE
-void*
-ib_bh_get(
-/*=======*/
-	ib_bh_t*	ib_bh,			/*!< in: instance */
-	ulint		i)			/*!< in: index */
-{
-	byte*		ptr = (byte*) (ib_bh + 1);
-
-	ut_a(i < ib_bh_size(ib_bh));
-
-	return(ptr + (ib_bh->sizeof_elem * i));
-}
-
-/**********************************************************************//**
-Copy an element to the binary heap.
-@return pointer to copied element */
-UNIV_INLINE
-void*
-ib_bh_set(
-/*======*/
-	ib_bh_t*	ib_bh,			/*!< in/out: instance */
-	ulint		i,			/*!< in: index */
-	const void*	elem)			/*!< in: element to add */
-{
-	void*		ptr = ib_bh_get(ib_bh, i);
-
-	ut_memcpy(ptr, elem, ib_bh->sizeof_elem);
-
-	return(ptr);
-}
-
-/**********************************************************************//**
-Return the first element from the binary heap.
-@return pointer to first element or NULL if empty. */
-UNIV_INLINE
-void*
-ib_bh_first(
-/*========*/
-	ib_bh_t*	ib_bh)			/*!< in: instance */
-{
-	return(ib_bh_is_empty(ib_bh) ? NULL : ib_bh_get(ib_bh, 0));
-}
-
-/**********************************************************************//**
-Return the last element from the binary heap.
-@return pointer to last element or NULL if empty. */
-UNIV_INLINE
-void*
-ib_bh_last(
-/*========*/
-	ib_bh_t*	ib_bh)			/*!< in/out: instance */
-{
-	return(ib_bh_is_empty(ib_bh)
-		? NULL
-		: ib_bh_get(ib_bh, ib_bh_size(ib_bh) - 1));
-}
-
diff --git a/storage/innobase/include/ut0byte.h b/storage/innobase/include/ut0byte.h
index 4893ab9f9af..bcf1b8e4a2d 100644
--- a/storage/innobase/include/ut0byte.h
+++ b/storage/innobase/include/ut0byte.h
@@ -32,7 +32,7 @@ Created 1/20/1994 Heikki Tuuri
 
 /*******************************************************//**
 Creates a 64-bit integer out of two 32-bit integers.
-@return	created integer */
+@return created integer */
 UNIV_INLINE
 ib_uint64_t
 ut_ull_create(
@@ -43,7 +43,7 @@ ut_ull_create(
 
 /********************************************************//**
 Rounds a 64-bit integer downward to a multiple of a power of 2.
-@return	rounded value */
+@return rounded value */
 UNIV_INLINE
 ib_uint64_t
 ut_uint64_align_down(
@@ -53,7 +53,7 @@ ut_uint64_align_down(
 					which must be a power of 2 */
 /********************************************************//**
 Rounds ib_uint64_t upward to a multiple of a power of 2.
-@return	rounded value */
+@return rounded value */
 UNIV_INLINE
 ib_uint64_t
 ut_uint64_align_up(
@@ -63,7 +63,7 @@ ut_uint64_align_up(
 					which must be a power of 2 */
 /*********************************************************//**
 The following function rounds up a pointer to the nearest aligned address.
-@return	aligned pointer */
+@return aligned pointer */
 UNIV_INLINE
 void*
 ut_align(
@@ -73,7 +73,7 @@ ut_align(
 /*********************************************************//**
 The following function rounds down a pointer to the nearest
 aligned address.
-@return	aligned pointer */
+@return aligned pointer */
 UNIV_INLINE
 void*
 ut_align_down(
@@ -84,7 +84,7 @@ ut_align_down(
 /*********************************************************//**
 The following function computes the offset of a pointer from the nearest
 aligned address.
-@return	distance from aligned pointer */
+@return distance from aligned pointer */
 UNIV_INLINE
 ulint
 ut_align_offset(
@@ -94,7 +94,7 @@ ut_align_offset(
 			MY_ATTRIBUTE((const));
 /*****************************************************************//**
 Gets the nth bit of a ulint.
-@return	TRUE if nth bit is 1; 0th bit is defined to be the least significant */
+@return TRUE if nth bit is 1; 0th bit is defined to be the least significant */
 UNIV_INLINE
 ibool
 ut_bit_get_nth(
@@ -103,7 +103,7 @@ ut_bit_get_nth(
 	ulint	n);	/*!< in: nth bit requested */
 /*****************************************************************//**
 Sets the nth bit of a ulint.
-@return	the ulint with the bit set as requested */
+@return the ulint with the bit set as requested */
 UNIV_INLINE
 ulint
 ut_bit_set_nth(
diff --git a/storage/innobase/include/ut0byte.ic b/storage/innobase/include/ut0byte.ic
index 1a7af5ae33d..9c0cd6ee3c3 100644
--- a/storage/innobase/include/ut0byte.ic
+++ b/storage/innobase/include/ut0byte.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -25,7 +25,7 @@ Created 5/30/1994 Heikki Tuuri
 
 /*******************************************************//**
 Creates a 64-bit integer out of two 32-bit integers.
-@return	created integer */
+@return created integer */
 UNIV_INLINE
 ib_uint64_t
 ut_ull_create(
@@ -40,7 +40,7 @@ ut_ull_create(
 
 /********************************************************//**
 Rounds a 64-bit integer downward to a multiple of a power of 2.
-@return	rounded value */
+@return rounded value */
 UNIV_INLINE
 ib_uint64_t
 ut_uint64_align_down(
@@ -57,7 +57,7 @@ ut_uint64_align_down(
 
 /********************************************************//**
 Rounds ib_uint64_t upward to a multiple of a power of 2.
-@return	rounded value */
+@return rounded value */
 UNIV_INLINE
 ib_uint64_t
 ut_uint64_align_up(
@@ -76,7 +76,7 @@ ut_uint64_align_up(
 
 /*********************************************************//**
 The following function rounds up a pointer to the nearest aligned address.
-@return	aligned pointer */
+@return aligned pointer */
 UNIV_INLINE
 void*
 ut_align(
@@ -96,7 +96,7 @@ ut_align(
 /*********************************************************//**
 The following function rounds down a pointer to the nearest
 aligned address.
-@return	aligned pointer */
+@return aligned pointer */
 UNIV_INLINE
 void*
 ut_align_down(
@@ -116,7 +116,7 @@ ut_align_down(
 /*********************************************************//**
 The following function computes the offset of a pointer from the nearest
 aligned address.
-@return	distance from aligned pointer */
+@return distance from aligned pointer */
 UNIV_INLINE
 ulint
 ut_align_offset(
@@ -135,7 +135,7 @@ ut_align_offset(
 
 /*****************************************************************//**
 Gets the nth bit of a ulint.
-@return	TRUE if nth bit is 1; 0th bit is defined to be the least significant */
+@return TRUE if nth bit is 1; 0th bit is defined to be the least significant */
 UNIV_INLINE
 ibool
 ut_bit_get_nth(
@@ -152,7 +152,7 @@ ut_bit_get_nth(
 
 /*****************************************************************//**
 Sets the nth bit of a ulint.
-@return	the ulint with the bit set as requested */
+@return the ulint with the bit set as requested */
 UNIV_INLINE
 ulint
 ut_bit_set_nth(
diff --git a/storage/innobase/include/ut0counter.h b/storage/innobase/include/ut0counter.h
index 447484ba985..175427df333 100644
--- a/storage/innobase/include/ut0counter.h
+++ b/storage/innobase/include/ut0counter.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -24,22 +24,26 @@ Counter utility class
 Created 2012/04/12 by Sunny Bains
 *******************************************************/
 
-#ifndef UT0COUNTER_H
-#define UT0COUNTER_H
+#ifndef ut0counter_h
+#define ut0counter_h
 
+#include <my_rdtsc.h>
 #include "univ.i"
-#include <string.h>
 #include "os0thread.h"
 
 /** CPU cache line size */
 #ifndef UNIV_HOTBACKUP
 # ifdef CPU_LEVEL1_DCACHE_LINESIZE
-#  define CACHE_LINE_SIZE              CPU_LEVEL1_DCACHE_LINESIZE
+#  define CACHE_LINE_SIZE	CPU_LEVEL1_DCACHE_LINESIZE
 # else
 #  error CPU_LEVEL1_DCACHE_LINESIZE is undefined
 # endif /* CPU_LEVEL1_DCACHE_LINESIZE */
 #else
-# define CACHE_LINE_SIZE 64
+#ifdef powerpc
+#define CACHE_LINE_SIZE		128
+#else
+# define CACHE_LINE_SIZE	64
+#endif /* __powerpc__ */
 #endif /* UNIV_HOTBACKUP */
 
 /** Default number of slots to use in ib_counter_t */
@@ -51,63 +55,67 @@ struct generic_indexer_t {
 	/** Default constructor/destructor should be OK. */
 
         /** @return offset within m_counter */
-        size_t offset(size_t index) const UNIV_NOTHROW {
+        static size_t offset(size_t index) UNIV_NOTHROW
+	{
                 return(((index % N) + 1) * (CACHE_LINE_SIZE / sizeof(Type)));
         }
 };
 
-#ifdef HAVE_SCHED_GETCPU
-#include <utmpx.h>
-/** Use the cpu id to index into the counter array. If it fails then
-use the thread id. */
-template <typename Type, int N>
-struct get_sched_indexer_t : public generic_indexer_t<Type, N> {
+/** Use the result of my_timer_cycles(), which mainly uses RDTSC for cycles,
+to index into the counter array. See the comments for my_timer_cycles() */
+template <typename Type=ulint, int N=1>
+struct counter_indexer_t : public generic_indexer_t<Type, N> {
+
 	/** Default constructor/destructor should be OK. */
 
-	/* @return result from sched_getcpu(), the thread id if it fails. */
-	size_t get_rnd_index() const UNIV_NOTHROW {
+	enum { fast = 1 };
 
-		size_t	cpu = sched_getcpu();
-		if (cpu == -1) {
-			cpu = (lint) os_thread_get_curr_id();
+	/** @return result from RDTSC or similar functions. */
+	static size_t get_rnd_index() UNIV_NOTHROW
+	{
+		size_t	c = static_cast<size_t>(my_timer_cycles());
+
+		if (c != 0) {
+			return(c);
+		} else {
+			/* We may go here if my_timer_cycles() returns 0,
+			so we have to have the plan B for the counter. */
+#if !defined(_WIN32)
+			return(size_t(os_thread_get_curr_id()));
+#else
+			LARGE_INTEGER cnt;
+			QueryPerformanceCounter(&cnt);
+
+			return(static_cast<size_t>(cnt.QuadPart));
+#endif /* !_WIN32 */
 		}
-
-		return(cpu);
-	}
-};
-#endif /* HAVE_SCHED_GETCPU */
-
-/** Use the thread id to index into the counter array. */
-template <typename Type, int N>
-struct thread_id_indexer_t : public generic_indexer_t<Type, N> {
-	/** Default constructor/destructor should are OK. */
-
-	/* @return a random number, currently we use the thread id. Where
-	thread id is represented as a pointer, it may not work as
-	effectively. */
-	size_t get_rnd_index() const UNIV_NOTHROW {
-		return((lint) os_thread_get_curr_id());
 	}
 };
 
-/** For counters wher N=1 */
-template <typename Type, int N=1>
+/** For counters where N=1 */
+template <typename Type=ulint, int N=1>
 struct single_indexer_t {
 	/** Default constructor/destructor should are OK. */
 
+	enum { fast = 0 };
+
         /** @return offset within m_counter */
-        size_t offset(size_t index) const UNIV_NOTHROW {
+        static size_t offset(size_t index) UNIV_NOTHROW
+	{
 		ut_ad(N == 1);
                 return((CACHE_LINE_SIZE / sizeof(Type)));
         }
 
-	/* @return 1 */
-	size_t get_rnd_index() const UNIV_NOTHROW {
+	/** @return 1 */
+	static size_t get_rnd_index() UNIV_NOTHROW
+	{
 		ut_ad(N == 1);
 		return(1);
 	}
 };
 
+#define	default_indexer_t	counter_indexer_t
+
 /** Class for using fuzzy counters. The counter is not protected by any
 mutex and the results are not guaranteed to be 100% accurate but close
 enough. Creates an array of counters and separates each element by the
@@ -115,7 +123,7 @@ CACHE_LINE_SIZE bytes */
 template <
 	typename Type,
 	int N = IB_N_SLOTS,
-	template<typename, int> class Indexer = thread_id_indexer_t>
+	template<typename, int> class Indexer = default_indexer_t>
 class ib_counter_t {
 public:
 	ib_counter_t() { memset(m_counter, 0x0, sizeof(m_counter)); }
@@ -125,6 +133,8 @@ public:
 		ut_ad(validate());
 	}
 
+	static bool is_fast() { return(Indexer<Type, N>::fast); }
+
 	bool validate() UNIV_NOTHROW {
 #ifdef UNIV_DEBUG
 		size_t	n = (CACHE_LINE_SIZE / sizeof(Type));
@@ -143,7 +153,7 @@ public:
 	void inc() UNIV_NOTHROW { add(1); }
 
 	/** If you can't use a good index id.
-	* @param n  - is the amount to increment */
+	@param n is the amount to increment */
 	void add(Type n) UNIV_NOTHROW {
 		size_t	i = m_policy.offset(m_policy.get_rnd_index());
 
@@ -152,10 +162,10 @@ public:
 		m_counter[i] += n;
 	}
 
-	/** Use this if you can use a unique indentifier, saves a
+	/** Use this if you can use a unique identifier, saves a
 	call to get_rnd_index().
-	@param i - index into a slot
-	@param n - amount to increment */
+	@param i index into a slot
+	@param n amount to increment */
 	void add(size_t index, Type n) UNIV_NOTHROW {
 		size_t	i = m_policy.offset(index);
 
@@ -168,7 +178,7 @@ public:
 	void dec() UNIV_NOTHROW { sub(1); }
 
 	/** If you can't use a good index id.
-	* @param - n is the amount to decrement */
+	@param n the amount to decrement */
 	void sub(Type n) UNIV_NOTHROW {
 		size_t	i = m_policy.offset(m_policy.get_rnd_index());
 
@@ -177,10 +187,10 @@ public:
 		m_counter[i] -= n;
 	}
 
-	/** Use this if you can use a unique indentifier, saves a
+	/** Use this if you can use a unique identifier, saves a
 	call to get_rnd_index().
-	@param i - index into a slot
-	@param n - amount to decrement */
+	@param i index into a slot
+	@param n amount to decrement */
 	void sub(size_t index, Type n) UNIV_NOTHROW {
 		size_t	i = m_policy.offset(index);
 
@@ -208,4 +218,4 @@ private:
 	Type		m_counter[(N + 1) * (CACHE_LINE_SIZE / sizeof(Type))];
 };
 
-#endif /* UT0COUNTER_H */
+#endif /* ut0counter_h */
diff --git a/storage/innobase/include/ut0crc32.h b/storage/innobase/include/ut0crc32.h
index af6f0bc74e7..91af6a910ff 100644
--- a/storage/innobase/include/ut0crc32.h
+++ b/storage/innobase/include/ut0crc32.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2011, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2011, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -29,24 +29,33 @@ Created Aug 10, 2011 Vasil Dimov
 #include "univ.i"
 
 /********************************************************************//**
-Initializes the data structures used by ut_crc32(). Does not do any
+Initializes the data structures used by ut_crc32*(). Does not do any
 allocations, would not hurt if called twice, but would be pointless. */
-UNIV_INTERN
 void
 ut_crc32_init();
 /*===========*/
 
 /********************************************************************//**
 Calculates CRC32.
-@param ptr	- data over which to calculate CRC32.
-@param len	- data length in bytes.
+@param ptr - data over which to calculate CRC32.
+@param len - data length in bytes.
 @return CRC32 (CRC-32C, using the GF(2) primitive polynomial 0x11EDC6F41,
 or 0x1EDC6F41 without the high-order bit) */
-typedef ib_uint32_t (*ib_ut_crc32_t)(const byte* ptr, ulint len);
+typedef uint32_t	(*ut_crc32_func_t)(const byte* ptr, ulint len);
 
-extern ib_ut_crc32_t	ut_crc32;
+/** Pointer to CRC32 calculation function. */
+extern ut_crc32_func_t	ut_crc32;
 
-extern bool	ut_crc32_sse2_enabled;
-extern bool	ut_crc32_power8_enabled;
+/** Pointer to CRC32 calculation function, which uses big-endian byte order
+when converting byte strings to integers internally. */
+extern ut_crc32_func_t	ut_crc32_legacy_big_endian;
+
+/** Pointer to CRC32-byte-by-byte calculation function (byte order agnostic,
+but very slow). */
+extern ut_crc32_func_t	ut_crc32_byte_by_byte;
+
+/** Flag that tells whether the CPU supports CRC32 or not */
+extern bool		ut_crc32_sse2_enabled;
+extern bool		ut_crc32_power8_enabled;
 
 #endif /* ut0crc32_h */
diff --git a/storage/innobase/include/ut0dbg.h b/storage/innobase/include/ut0dbg.h
index 3f5baef0a3c..1a61ed84a38 100644
--- a/storage/innobase/include/ut0dbg.h
+++ b/storage/innobase/include/ut0dbg.h
@@ -32,59 +32,38 @@ Created 1/30/1994 Heikki Tuuri
 #define ut_error	assert(0)
 #else /* !UNIV_INNOCHECKSUM */
 
-#include "univ.i"
-#include <stdlib.h>
-#include "os0thread.h"
+/* Do not include univ.i because univ.i includes this. */
 
-#if defined(__GNUC__) && (__GNUC__ > 2)
-/** Test if an assertion fails.
-@param EXPR	assertion expression
-@return		nonzero if EXPR holds, zero if not */
-# define UT_DBG_FAIL(EXPR) UNIV_UNLIKELY(!((ulint)(EXPR)))
-#else
-/** This is used to eliminate compiler warnings */
-extern ulint	ut_dbg_zero;
-/** Test if an assertion fails.
-@param EXPR	assertion expression
-@return		nonzero if EXPR holds, zero if not */
-# define UT_DBG_FAIL(EXPR) !((ulint)(EXPR) + ut_dbg_zero)
-#endif
+#include "os0thread.h"
 
 /*************************************************************//**
 Report a failed assertion. */
-UNIV_INTERN
 void
 ut_dbg_assertion_failed(
 /*====================*/
 	const char*	expr,	/*!< in: the failed assertion */
 	const char*	file,	/*!< in: source file containing the assertion */
 	ulint		line)	/*!< in: line number of the assertion */
-	UNIV_COLD MY_ATTRIBUTE((nonnull(2)));
-
-/** Abort the execution. */
-# define UT_DBG_PANIC abort()
+	UNIV_COLD MY_ATTRIBUTE((nonnull(2), noreturn));
 
 /** Abort execution if EXPR does not evaluate to nonzero.
-@param EXPR	assertion expression that should hold */
+@param EXPR assertion expression that should hold */
 #define ut_a(EXPR) do {						\
-	if (UT_DBG_FAIL(EXPR)) {				\
+	if (UNIV_UNLIKELY(!(ulint) (EXPR))) {			\
 		ut_dbg_assertion_failed(#EXPR,			\
 				__FILE__, (ulint) __LINE__);	\
-		UT_DBG_PANIC;					\
 	}							\
 } while (0)
 
 /** Abort execution. */
-#define ut_error do {						\
-	ut_dbg_assertion_failed(0, __FILE__, (ulint) __LINE__);	\
-	UT_DBG_PANIC;						\
-} while (0)
+#define ut_error						\
+	ut_dbg_assertion_failed(0, __FILE__, (ulint) __LINE__)
 
 #ifdef UNIV_DEBUG
 /** Debug assertion. Does nothing unless UNIV_DEBUG is defined. */
 #define ut_ad(EXPR)	ut_a(EXPR)
 /** Debug statement. Does nothing unless UNIV_DEBUG is defined. */
-#define ut_d(EXPR)	do {EXPR;} while (0)
+#define ut_d(EXPR)	EXPR
 #else
 /** Debug assertion. Does nothing unless UNIV_DEBUG is defined. */
 #define ut_ad(EXPR)
@@ -93,39 +72,114 @@ ut_dbg_assertion_failed(
 #endif
 
 /** Silence warnings about an unused variable by doing a null assignment.
-@param A	the unused variable */
+@param A the unused variable */
 #define UT_NOT_USED(A)	A = A
 
-#ifdef UNIV_COMPILE_TEST_FUNCS
+#if defined(HAVE_SYS_TIME_H) && defined(HAVE_SYS_RESOURCE_H)
+
+#define HAVE_UT_CHRONO_T
 
 #include <sys/types.h>
 #include <sys/time.h>
 #include <sys/resource.h>
 
-/** structure used for recording usage statistics */
-struct speedo_t {
-	struct rusage	ru;	/*!< getrusage() result */
-	struct timeval	tv;	/*!< gettimeofday() result */
+/** A "chronometer" used to clock snippets of code.
+Example usage:
+	ut_chrono_t	ch("this loop");
+	for (;;) { ... }
+	ch.show();
+would print the timings of the for() loop, prefixed with "this loop:" */
+class ut_chrono_t {
+public:
+	/** Constructor.
+	@param[in]	name	chrono's name, used when showing the values */
+	ut_chrono_t(
+		const char*	name)
+		:
+		m_name(name),
+		m_show_from_destructor(true)
+	{
+		reset();
+	}
+
+	/** Resets the chrono (records the current time in it). */
+	void
+	reset()
+	{
+		gettimeofday(&m_tv, NULL);
+
+		getrusage(RUSAGE_SELF, &m_ru);
+	}
+
+	/** Shows the time elapsed and usage statistics since the last reset. */
+	void
+	show()
+	{
+		struct rusage	ru_now;
+		struct timeval	tv_now;
+		struct timeval	tv_diff;
+
+		getrusage(RUSAGE_SELF, &ru_now);
+
+		gettimeofday(&tv_now, NULL);
+
+#ifndef timersub
+#define timersub(a, b, r)						\
+		do {							\
+			(r)->tv_sec = (a)->tv_sec - (b)->tv_sec;	\
+			(r)->tv_usec = (a)->tv_usec - (b)->tv_usec;	\
+			if ((r)->tv_usec < 0) {				\
+				(r)->tv_sec--;				\
+				(r)->tv_usec += 1000000;		\
+			}						\
+		} while (0)
+#endif /* timersub */
+
+#define CHRONO_PRINT(type, tvp)						\
+		fprintf(stderr, "%s: %s% 5ld.%06ld sec\n",		\
+			m_name, type,					\
+			static_cast<long>((tvp)->tv_sec),		\
+			static_cast<long>((tvp)->tv_usec))
+
+		timersub(&tv_now, &m_tv, &tv_diff);
+		CHRONO_PRINT("real", &tv_diff);
+
+		timersub(&ru_now.ru_utime, &m_ru.ru_utime, &tv_diff);
+		CHRONO_PRINT("user", &tv_diff);
+
+		timersub(&ru_now.ru_stime, &m_ru.ru_stime, &tv_diff);
+		CHRONO_PRINT("sys ", &tv_diff);
+	}
+
+	/** Cause the timings not to be printed from the destructor. */
+	void end()
+	{
+		m_show_from_destructor = false;
+	}
+
+	/** Destructor. */
+	~ut_chrono_t()
+	{
+		if (m_show_from_destructor) {
+			show();
+		}
+	}
+
+private:
+	/** Name of this chronometer. */
+	const char*	m_name;
+
+	/** True if the current timings should be printed by the destructor. */
+	bool		m_show_from_destructor;
+
+	/** getrusage() result as of the last reset(). */
+	struct rusage	m_ru;
+
+	/** gettimeofday() result as of the last reset(). */
+	struct timeval	m_tv;
 };
 
-/*******************************************************************//**
-Resets a speedo (records the current time in it). */
-UNIV_INTERN
-void
-speedo_reset(
-/*=========*/
-	speedo_t*	speedo);	/*!< out: speedo */
-
-/*******************************************************************//**
-Shows the time elapsed and usage statistics since the last reset of a
-speedo. */
-UNIV_INTERN
-void
-speedo_show(
-/*========*/
-	const speedo_t*	speedo);	/*!< in: speedo */
-
-#endif /* UNIV_COMPILE_TEST_FUNCS */
+#endif /* HAVE_SYS_TIME_H && HAVE_SYS_RESOURCE_H */
 
 #endif /* !UNIV_INNOCHECKSUM */
 
diff --git a/storage/innobase/include/ut0list.h b/storage/innobase/include/ut0list.h
index 796a272db59..3b91384bbff 100644
--- a/storage/innobase/include/ut0list.h
+++ b/storage/innobase/include/ut0list.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2006, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2006, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -54,8 +54,7 @@ struct ib_list_node_t;
 /****************************************************************//**
 Create a new list using mem_alloc. Lists created with this function must be
 freed with ib_list_free.
-@return	list */
-UNIV_INTERN
+@return list */
 ib_list_t*
 ib_list_create(void);
 /*=================*/
@@ -64,8 +63,7 @@ ib_list_create(void);
 /****************************************************************//**
 Create a new list using the given heap. ib_list_free MUST NOT BE CALLED for
 lists created with this function.
-@return	list */
-UNIV_INTERN
+@return list */
 ib_list_t*
 ib_list_create_heap(
 /*================*/
@@ -73,7 +71,6 @@ ib_list_create_heap(
 
 /****************************************************************//**
 Free a list. */
-UNIV_INTERN
 void
 ib_list_free(
 /*=========*/
@@ -81,8 +78,7 @@ ib_list_free(
 
 /****************************************************************//**
 Add the data to the start of the list.
-@return	new list node */
-UNIV_INTERN
+@return new list node */
 ib_list_node_t*
 ib_list_add_first(
 /*==============*/
@@ -92,8 +88,7 @@ ib_list_add_first(
 
 /****************************************************************//**
 Add the data to the end of the list.
-@return	new list node */
-UNIV_INTERN
+@return new list node */
 ib_list_node_t*
 ib_list_add_last(
 /*=============*/
@@ -103,8 +98,7 @@ ib_list_add_last(
 
 /****************************************************************//**
 Add the data after the indicated node.
-@return	new list node */
-UNIV_INTERN
+@return new list node */
 ib_list_node_t*
 ib_list_add_after(
 /*==============*/
@@ -116,7 +110,6 @@ ib_list_add_after(
 
 /****************************************************************//**
 Remove the node from the list. */
-UNIV_INTERN
 void
 ib_list_remove(
 /*===========*/
@@ -125,7 +118,7 @@ ib_list_remove(
 
 /****************************************************************//**
 Get the first node in the list.
-@return	first node, or NULL */
+@return first node, or NULL */
 UNIV_INLINE
 ib_list_node_t*
 ib_list_get_first(
@@ -134,7 +127,7 @@ ib_list_get_first(
 
 /****************************************************************//**
 Get the last node in the list.
-@return	last node, or NULL */
+@return last node, or NULL */
 UNIV_INLINE
 ib_list_node_t*
 ib_list_get_last(
diff --git a/storage/innobase/include/ut0list.ic b/storage/innobase/include/ut0list.ic
index 7a7f53adb2f..dd5389a68e1 100644
--- a/storage/innobase/include/ut0list.ic
+++ b/storage/innobase/include/ut0list.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2006, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2006, 2013, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -25,7 +25,7 @@ Created 4/26/2006 Osku Salerma
 
 /****************************************************************//**
 Get the first node in the list.
-@return	first node, or NULL */
+@return first node, or NULL */
 UNIV_INLINE
 ib_list_node_t*
 ib_list_get_first(
@@ -37,7 +37,7 @@ ib_list_get_first(
 
 /****************************************************************//**
 Get the last node in the list.
-@return	last node, or NULL */
+@return last node, or NULL */
 UNIV_INLINE
 ib_list_node_t*
 ib_list_get_last(
diff --git a/storage/innobase/include/ut0lst.h b/storage/innobase/include/ut0lst.h
index b53e7ade4c1..09733da20a0 100644
--- a/storage/innobase/include/ut0lst.h
+++ b/storage/innobase/include/ut0lst.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -21,122 +21,150 @@ this program; if not, write to the Free Software Foundation, Inc.,
 List utilities
 
 Created 9/10/1995 Heikki Tuuri
+Rewritten by Sunny Bains Dec 2011.
 ***********************************************************************/
 
 #ifndef ut0lst_h
 #define ut0lst_h
 
-#include "univ.i"
+/* Do not include univ.i because univ.i includes this. */
+
+#include "ut0dbg.h"
+
+/* This module implements the two-way linear list. Note that a single
+list node may belong to two or more lists, but is only on one list
+at a time. */
 
 /*******************************************************************//**
-Return offset of F in POD T.
-@param T	- POD pointer
-@param F	- Field in T */
-#define IB_OFFSETOF(T, F)						\
-	(reinterpret_cast<byte*>(&(T)->F) - reinterpret_cast<byte*>(T))
-
-/* This module implements the two-way linear list which should be used
-if a list is used in the database. Note that a single struct may belong
-to two or more lists, provided that the list are given different names.
-An example of the usage of the lists can be found in fil0fil.cc. */
-
-/*******************************************************************//**
-This macro expands to the unnamed type definition of a struct which acts
-as the two-way list base node. The base node contains pointers
-to both ends of the list and a count of nodes in the list (excluding
-the base node from the count).
-@param TYPE	the name of the list node data type */
-template <typename TYPE>
-struct ut_list_base {
-	typedef TYPE elem_type;
-
-	ulint	count;	/*!< count of nodes in list */
-	TYPE*	start;	/*!< pointer to list start, NULL if empty */
-	TYPE*	end;	/*!< pointer to list end, NULL if empty */
-};
-
-#define UT_LIST_BASE_NODE_T(TYPE)	ut_list_base<TYPE>
-
-/*******************************************************************//**
-This macro expands to the unnamed type definition of a struct which
-should be embedded in the nodes of the list, the node type must be a struct.
-This struct contains the pointers to next and previous nodes in the list.
-The name of the field in the node struct should be the name given
-to the list.
-@param TYPE	the list node type name */
-/* Example:
-struct LRU_node_t {
-	UT_LIST_NODE_T(LRU_node_t)	LRU_list;
-	...
-}
-The example implements an LRU list of name LRU_list. Its nodes are of type
-LRU_node_t. */
-
-template <typename TYPE>
-struct ut_list_node {
-	TYPE* 	prev;	/*!< pointer to the previous node,
-			NULL if start of list */
-	TYPE* 	next;	/*!< pointer to next node, NULL if end of list */
-};
-
-#define UT_LIST_NODE_T(TYPE)	ut_list_node<TYPE>
-
-/*******************************************************************//**
-Get the list node at offset.
-@param elem	- list element
-@param offset	- offset within element.
-@return reference to list node. */
+The two way list node.
+@param TYPE the list node type name */
 template <typename Type>
-ut_list_node<Type>&
-ut_elem_get_node(Type&	elem, size_t offset)
-{
-	ut_a(offset < sizeof(elem));
+struct ut_list_node {
+	Type*		prev;			/*!< pointer to the previous
+						node, NULL if start of list */
+	Type*		next;			/*!< pointer to next node,
+						NULL if end of list */
 
-	return(*reinterpret_cast<ut_list_node<Type>*>(
-		reinterpret_cast<byte*>(&elem) + offset));
-}
+	void reverse()
+	{
+		Type*	tmp = prev;
+		prev = next;
+		next = tmp;
+	}
+};
+
+/** Macro used for legacy reasons */
+#define UT_LIST_NODE_T(t)		ut_list_node<t>
 
 /*******************************************************************//**
+The two-way list base node. The base node contains pointers to both ends
+of the list and a count of nodes in the list (excluding the base node
+from the count). We also store a pointer to the member field so that it
+doesn't have to be specified when doing list operations.
+@param Type the type of the list element
+@param NodePtr field member pointer that points to the list node */
+template <typename Type, typename NodePtr>
+struct ut_list_base {
+	typedef Type elem_type;
+	typedef NodePtr node_ptr;
+	typedef ut_list_node<Type> node_type;
+
+	ulint		count;			/*!< count of nodes in list */
+	elem_type*	start;			/*!< pointer to list start,
+						NULL if empty */
+	elem_type*	end;			/*!< pointer to list end,
+						NULL if empty */
+	node_ptr	node;			/*!< Pointer to member field
+						that is used as a link node */
+#ifdef UNIV_DEBUG
+	ulint		init;			/*!< UT_LIST_INITIALISED if
+						the list was initialised with
+						UT_LIST_INIT() */
+#endif /* UNIV_DEBUG */
+
+	void reverse()
+	{
+		Type*	tmp = start;
+		start = end;
+		end = tmp;
+	}
+};
+
+#define UT_LIST_BASE_NODE_T(t)	ut_list_base<t, ut_list_node<t> t::*>
+
+#ifdef UNIV_DEBUG
+# define UT_LIST_INITIALISED		0xCAFE
+# define UT_LIST_INITIALISE(b)		(b).init = UT_LIST_INITIALISED
+# define UT_LIST_IS_INITIALISED(b)	ut_a(((b).init == UT_LIST_INITIALISED))
+#else
+# define UT_LIST_INITIALISE(b)
+# define UT_LIST_IS_INITIALISED(b)
+#endif /* UNIV_DEBUG */
+
+/*******************************************************************//**
+Note: This is really the list constructor. We should be able to use
+placement new here.
 Initializes the base node of a two-way list.
-@param BASE	the list base node
-*/
-#define UT_LIST_INIT(BASE)\
-{\
-	(BASE).count = 0;\
-	(BASE).start = NULL;\
-	(BASE).end   = NULL;\
-}\
+@param b the list base node
+@param pmf point to member field that will be used as the link node */
+#define UT_LIST_INIT(b, pmf)						\
+{									\
+	(b).count = 0;							\
+	(b).start = 0;							\
+	(b).end   = 0;							\
+	(b).node  = pmf;						\
+	UT_LIST_INITIALISE(b);						\
+}
+
+/** Functor for accessing the embedded node within a list element. This is
+required because some lists can have the node emebedded inside a nested
+struct/union. See lock0priv.h (table locks) for an example. It provides a
+specialised functor to grant access to the list node. */
+template <typename Type>
+struct GenericGetNode {
+
+	typedef ut_list_node<Type> node_type;
+
+	GenericGetNode(node_type Type::* node) : m_node(node) {}
+
+	node_type& operator() (Type& elem)
+	{
+		return(elem.*m_node);
+	}
+
+	node_type	Type::*m_node;
+};
 
 /*******************************************************************//**
 Adds the node as the first element in a two-way linked list.
-@param list	the base node (not a pointer to it)
-@param elem	the element to add
-@param offset	offset of list node in elem. */
-template <typename List, typename Type>
+@param list the base node (not a pointer to it)
+@param elem the element to add */
+template <typename List>
 void
 ut_list_prepend(
-	List&		list,
-	Type&		elem,
-	size_t		offset)
+	List&				list,
+	typename List::elem_type*	elem)
 {
-	ut_list_node<Type>&	elem_node = ut_elem_get_node(elem, offset);
+	typename List::node_type&	elem_node = elem->*list.node;
 
- 	elem_node.prev = 0;
- 	elem_node.next = list.start;
+	UT_LIST_IS_INITIALISED(list);
+
+	elem_node.prev = 0;
+	elem_node.next = list.start;
 
 	if (list.start != 0) {
-		ut_list_node<Type>&	base_node =
-			ut_elem_get_node(*list.start, offset);
+		typename List::node_type&	base_node =
+			list.start->*list.node;
 
-		ut_ad(list.start != &elem);
+		ut_ad(list.start != elem);
 
-		base_node.prev = &elem;
+		base_node.prev = elem;
 	}
 
-	list.start = &elem;
+	list.start = elem;
 
 	if (list.end == 0) {
-		list.end = &elem;
+		list.end = elem;
 	}
 
 	++list.count;
@@ -144,42 +172,41 @@ ut_list_prepend(
 
 /*******************************************************************//**
 Adds the node as the first element in a two-way linked list.
-@param NAME	list name
-@param LIST	the base node (not a pointer to it)
-@param ELEM	the element to add */
-#define UT_LIST_ADD_FIRST(NAME, LIST, ELEM)	\
-	ut_list_prepend(LIST, *ELEM, IB_OFFSETOF(ELEM, NAME))
+@param LIST the base node (not a pointer to it)
+@param ELEM the element to add */
+#define UT_LIST_ADD_FIRST(LIST, ELEM)	ut_list_prepend(LIST, ELEM)
 
 /*******************************************************************//**
 Adds the node as the last element in a two-way linked list.
-@param list	list
-@param elem	the element to add
-@param offset	offset of list node in elem */
-template <typename List, typename Type>
+@param list list
+@param elem the element to add
+@param get_node to get the list node for that element */
+template <typename List, typename Functor>
 void
 ut_list_append(
-	List&		list,
-	Type&		elem,
-	size_t		offset)
+	List&				list,
+	typename List::elem_type*	elem,
+	Functor				get_node)
 {
-	ut_list_node<Type>&	elem_node = ut_elem_get_node(elem, offset);
+	typename List::node_type&	node = get_node(*elem);
 
-	elem_node.next = 0;
-	elem_node.prev = list.end;
+	UT_LIST_IS_INITIALISED(list);
+
+	node.next = 0;
+	node.prev = list.end;
 
 	if (list.end != 0) {
-		ut_list_node<Type>&	base_node =
-			ut_elem_get_node(*list.end, offset);
+		typename List::node_type&	base_node = get_node(*list.end);
 
-		ut_ad(list.end != &elem);
+		ut_ad(list.end != elem);
 
-		base_node.next = &elem;
+		base_node.next = elem;
 	}
 
-	list.end = &elem;
+	list.end = elem;
 
 	if (list.start == 0) {
-		list.start = &elem;
+		list.start = elem;
 	}
 
 	++list.count;
@@ -187,45 +214,57 @@ ut_list_append(
 
 /*******************************************************************//**
 Adds the node as the last element in a two-way linked list.
-@param NAME	list name
-@param LIST	list
-@param ELEM	the element to add */
-#define UT_LIST_ADD_LAST(NAME, LIST, ELEM)\
-	ut_list_append(LIST, *ELEM, IB_OFFSETOF(ELEM, NAME))
+@param list list
+@param elem the element to add */
+template <typename List>
+void
+ut_list_append(
+	List&				list,
+	typename List::elem_type*	elem)
+{
+	ut_list_append(
+		list, elem,
+		GenericGetNode<typename List::elem_type>(list.node));
+}
+
+/*******************************************************************//**
+Adds the node as the last element in a two-way linked list.
+@param LIST list base node (not a pointer to it)
+@param ELEM the element to add */
+#define UT_LIST_ADD_LAST(LIST, ELEM)	ut_list_append(LIST, ELEM)
 
 /*******************************************************************//**
 Inserts a ELEM2 after ELEM1 in a list.
-@param list	the base node
-@param elem1	node after which ELEM2 is inserted
-@param elem2	node being inserted after NODE1
-@param offset	offset of list node in elem1 and elem2 */
-template <typename List, typename Type>
+@param list the base node
+@param elem1 node after which ELEM2 is inserted
+@param elem2 node being inserted after ELEM1 */
+template <typename List>
 void
 ut_list_insert(
-	List&		list,
-	Type&		elem1,
-	Type&		elem2,
-	size_t		offset)
+	List&				list,
+	typename List::elem_type*	elem1,
+	typename List::elem_type*	elem2)
 {
-	ut_ad(&elem1 != &elem2);
+	ut_ad(elem1 != elem2);
+	UT_LIST_IS_INITIALISED(list);
 
-	ut_list_node<Type>&	elem1_node = ut_elem_get_node(elem1, offset);
-	ut_list_node<Type>&	elem2_node = ut_elem_get_node(elem2, offset);
+	typename List::node_type&	elem1_node = elem1->*list.node;
+	typename List::node_type&	elem2_node = elem2->*list.node;
 
-	elem2_node.prev = &elem1;
+	elem2_node.prev = elem1;
 	elem2_node.next = elem1_node.next;
 
 	if (elem1_node.next != NULL) {
-		ut_list_node<Type>&	next_node =
-			ut_elem_get_node(*elem1_node.next, offset);
+		typename List::node_type&	next_node =
+			elem1_node.next->*list.node;
 
-		next_node.prev = &elem2;
+		next_node.prev = elem2;
 	}
 
-	elem1_node.next = &elem2;
+	elem1_node.next = elem2;
 
-	if (list.end == &elem1) {
-		list.end = &elem2;
+	if (list.end == elem1) {
+		list.end = elem2;
 	}
 
 	++list.count;
@@ -233,132 +272,179 @@ ut_list_insert(
 
 /*******************************************************************//**
 Inserts a ELEM2 after ELEM1 in a list.
-@param NAME	list name
-@param LIST	the base node
-@param ELEM1	node after which ELEM2 is inserted
-@param ELEM2	node being inserted after ELEM1 */
-#define UT_LIST_INSERT_AFTER(NAME, LIST, ELEM1, ELEM2)\
-	ut_list_insert(LIST, *ELEM1, *ELEM2, IB_OFFSETOF(ELEM1, NAME))
-
-#ifdef UNIV_LIST_DEBUG
-/** Invalidate the pointers in a list node.
-@param NAME	list name
-@param N	pointer to the node that was removed */
-# define UT_LIST_REMOVE_CLEAR(N)					\
-	(N).next = (Type*) -1;						\
-	(N).prev = (N).next
-#else
-/** Invalidate the pointers in a list node.
-@param NAME	list name
-@param N	pointer to the node that was removed */
-# define UT_LIST_REMOVE_CLEAR(N)
-#endif /* UNIV_LIST_DEBUG */
+@param LIST list base node (not a pointer to it)
+@param ELEM1 node after which ELEM2 is inserted
+@param ELEM2 node being inserted after ELEM1 */
+#define UT_LIST_INSERT_AFTER(LIST, ELEM1, ELEM2)			\
+	ut_list_insert(LIST, ELEM1, ELEM2)
 
+/*******************************************************************//**
+Inserts a ELEM2 after ELEM1 in a list.
+@param list the base node
+@param elem1 node after which ELEM2 is inserted
+@param elem2 node being inserted after ELEM1
+@param get_node to get the list node for that element */
+
+template <typename List, typename Functor>
+void
+ut_list_insert(
+	List&				list,
+	typename List::elem_type*	elem1,
+        typename List::elem_type*	elem2,
+	Functor				get_node)
+{
+	ut_ad(elem1 != elem2);
+	UT_LIST_IS_INITIALISED(list);
+
+	typename List::node_type&	elem1_node = get_node(*elem1);
+	typename List::node_type&	elem2_node = get_node(*elem2);
+
+	elem2_node.prev = elem1;
+	elem2_node.next = elem1_node.next;
+
+	if (elem1_node.next != NULL) {
+		typename List::node_type&	next_node =
+			get_node(*elem1_node.next);
+
+		next_node.prev = elem2;
+	}
+
+	elem1_node.next = elem2;
+
+	if (list.end == elem1) {
+		list.end = elem2;
+	}
+
+	++list.count;
+
+}
 /*******************************************************************//**
 Removes a node from a two-way linked list.
-@param list	the base node (not a pointer to it)
-@param elem	node to be removed from the list
-@param offset	offset of list node within elem */
-template <typename List, typename Type>
+@param list the base node (not a pointer to it)
+@param node member node within list element that is to be removed
+@param get_node functor to get the list node from elem */
+template <typename List, typename Functor>
 void
 ut_list_remove(
-	List&		list,
- 	Type&		elem,
-	size_t		offset)
+	List&				list,
+	typename List::node_type&	node,
+	Functor				get_node)
 {
-	ut_list_node<Type>&	elem_node = ut_elem_get_node(elem, offset);
-
 	ut_a(list.count > 0);
+	UT_LIST_IS_INITIALISED(list);
 
-	if (elem_node.next != NULL) {
-		ut_list_node<Type>&	next_node =
-			ut_elem_get_node(*elem_node.next, offset);
+	if (node.next != NULL) {
+		typename List::node_type&	next_node =
+			get_node(*node.next);
 
-		next_node.prev = elem_node.prev;
+		next_node.prev = node.prev;
 	} else {
-		list.end = elem_node.prev;
+		list.end = node.prev;
 	}
 
-	if (elem_node.prev != NULL) {
-		ut_list_node<Type>&	prev_node =
-			ut_elem_get_node(*elem_node.prev, offset);
+	if (node.prev != NULL) {
+		typename List::node_type&	prev_node =
+			get_node(*node.prev);
 
-		prev_node.next = elem_node.next;
+		prev_node.next = node.next;
 	} else {
-		list.start = elem_node.next;
+		list.start = node.next;
 	}
 
-	UT_LIST_REMOVE_CLEAR(elem_node);
+	node.next = 0;
+	node.prev = 0;
 
 	--list.count;
 }
 
 /*******************************************************************//**
 Removes a node from a two-way linked list.
-  aram NAME	list name
-@param LIST	the base node (not a pointer to it)
-@param ELEM	node to be removed from the list */
-#define UT_LIST_REMOVE(NAME, LIST, ELEM)				\
-	ut_list_remove(LIST, *ELEM, IB_OFFSETOF(ELEM, NAME))
+@param list the base node (not a pointer to it)
+@param elem element to be removed from the list
+@param get_node functor to get the list node from elem */
+template <typename List, typename Functor>
+void
+ut_list_remove(
+	List&				list,
+	typename List::elem_type*	elem,
+	Functor				get_node)
+{
+	ut_list_remove(list, get_node(*elem), get_node);
+}
+
+/*******************************************************************//**
+Removes a node from a two-way linked list.
+@param list the base node (not a pointer to it)
+@param elem element to be removed from the list */
+template <typename List>
+void
+ut_list_remove(
+	List&				list,
+	typename List::elem_type*	elem)
+{
+	ut_list_remove(
+		list, elem->*list.node,
+		GenericGetNode<typename List::elem_type>(list.node));
+}
+
+/*******************************************************************//**
+Removes a node from a two-way linked list.
+@param LIST the base node (not a pointer to it)
+@param ELEM node to be removed from the list */
+#define UT_LIST_REMOVE(LIST, ELEM)	ut_list_remove(LIST, ELEM)
 
 /********************************************************************//**
 Gets the next node in a two-way list.
-@param NAME	list name
-@param N	pointer to a node
-@return		the successor of N in NAME, or NULL */
-#define UT_LIST_GET_NEXT(NAME, N)\
-	(((N)->NAME).next)
+@param NAME list name
+@param N pointer to a node
+@return the successor of N in NAME, or NULL */
+#define UT_LIST_GET_NEXT(NAME, N)	(((N)->NAME).next)
 
 /********************************************************************//**
 Gets the previous node in a two-way list.
-@param NAME	list name
-@param N	pointer to a node
-@return		the predecessor of N in NAME, or NULL */
-#define UT_LIST_GET_PREV(NAME, N)\
-	(((N)->NAME).prev)
+@param NAME list name
+@param N pointer to a node
+@return the predecessor of N in NAME, or NULL */
+#define UT_LIST_GET_PREV(NAME, N)	(((N)->NAME).prev)
 
 /********************************************************************//**
 Alternative macro to get the number of nodes in a two-way list, i.e.,
 its length.
-@param BASE	the base node (not a pointer to it).
-@return		the number of nodes in the list */
-#define UT_LIST_GET_LEN(BASE)\
-	(BASE).count
+@param BASE the base node (not a pointer to it).
+@return the number of nodes in the list */
+#define UT_LIST_GET_LEN(BASE)		(BASE).count
 
 /********************************************************************//**
 Gets the first node in a two-way list.
-@param BASE	the base node (not a pointer to it)
-@return		first node, or NULL if the list is empty */
-#define UT_LIST_GET_FIRST(BASE)\
-	(BASE).start
+@param BASE the base node (not a pointer to it)
+@return first node, or NULL if the list is empty */
+#define UT_LIST_GET_FIRST(BASE)		(BASE).start
 
 /********************************************************************//**
 Gets the last node in a two-way list.
-@param BASE	the base node (not a pointer to it)
-@return		last node, or NULL if the list is empty */
-#define UT_LIST_GET_LAST(BASE)\
-	(BASE).end
+@param BASE the base node (not a pointer to it)
+@return last node, or NULL if the list is empty */
+#define UT_LIST_GET_LAST(BASE)		(BASE).end
 
 struct	NullValidate { void operator()(const void* elem) { } };
 
 /********************************************************************//**
 Iterate over all the elements and call the functor for each element.
-@param list	base node (not a pointer to it)
-@param functor	Functor that is called for each element in the list
-@parm  node	pointer to member node within list element */
+@param[in]	list	base node (not a pointer to it)
+@param[in,out]	functor	Functor that is called for each element in the list */
 template <typename List, class Functor>
 void
 ut_list_map(
-	List&		list,
-	ut_list_node<typename List::elem_type>
-			List::elem_type::*node,
-	Functor		functor)
+	const List&	list,
+	Functor&	functor)
 {
 	ulint		count = 0;
 
+	UT_LIST_IS_INITIALISED(list);
+
 	for (typename List::elem_type* elem = list.start;
 	     elem != 0;
-	     elem = (elem->*node).next, ++count) {
+	     elem = (elem->*list.node).next, ++count) {
 
 		functor(elem);
 	}
@@ -366,43 +452,95 @@ ut_list_map(
 	ut_a(count == list.count);
 }
 
+template <typename List>
+void
+ut_list_reverse(List& list)
+{
+	UT_LIST_IS_INITIALISED(list);
+
+	for (typename List::elem_type* elem = list.start;
+	     elem != 0;
+	     elem = (elem->*list.node).prev) {
+		(elem->*list.node).reverse();
+	}
+
+	list.reverse();
+}
+
+#define UT_LIST_REVERSE(LIST)	ut_list_reverse(LIST)
+
 /********************************************************************//**
 Checks the consistency of a two-way list.
-@param list	base node (not a pointer to it)
-@param functor	Functor that is called for each element in the list
-@parm  node	pointer to member node within list element */
+@param[in]		list base node (not a pointer to it)
+@param[in,out]		functor Functor that is called for each element in the list */
 template <typename List, class Functor>
 void
 ut_list_validate(
-	List&		list,
-	ut_list_node<typename List::elem_type>
-			List::elem_type::*node,
-	Functor		functor = NullValidate())
+	const List&	list,
+	Functor&	functor)
 {
-	ut_list_map(list, node, functor);
+	ut_list_map(list, functor);
 
+	/* Validate the list backwards. */
 	ulint		count = 0;
 
 	for (typename List::elem_type* elem = list.end;
 	     elem != 0;
-	     elem = (elem->*node).prev, ++count) {
-
-		functor(elem);
+	     elem = (elem->*list.node).prev) {
+		++count;
 	}
 
 	ut_a(count == list.count);
 }
 
-/********************************************************************//**
-Checks the consistency of a two-way list.
-@param NAME		the name of the list
-@param TYPE		node type
-@param LIST		base node (not a pointer to it)
-@param FUNCTOR		called for each list element */
-#define UT_LIST_VALIDATE(NAME, TYPE, LIST, FUNCTOR)			\
-	ut_list_validate(LIST, &TYPE::NAME, FUNCTOR)
+/** Check the consistency of a two-way list.
+@param[in] LIST base node reference */
+#define UT_LIST_CHECK(LIST) do {		\
+	NullValidate nullV;			\
+	ut_list_validate(LIST, nullV);		\
+} while (0)
 
-#define UT_LIST_CHECK(NAME, TYPE, LIST)					\
-	ut_list_validate(LIST, &TYPE::NAME, NullValidate())
+/** Move the given element to the beginning of the list.
+@param[in,out]	list	the list object
+@param[in]	elem	the element of the list which will be moved
+			to the beginning of the list. */
+template <typename List>
+void
+ut_list_move_to_front(
+	List&				list,
+	typename List::elem_type*	elem)
+{
+	ut_ad(ut_list_exists(list, elem));
+
+	if (UT_LIST_GET_FIRST(list) != elem) {
+		ut_list_remove(list, elem);
+		ut_list_prepend(list, elem);
+	}
+}
+
+#ifdef UNIV_DEBUG
+/** Check if the given element exists in the list.
+@param[in,out]	list	the list object
+@param[in]	elem	the element of the list which will be checked */
+template <typename List>
+bool
+ut_list_exists(
+	List&				list,
+	typename List::elem_type*	elem)
+{
+	typename List::elem_type*	e1;
+
+	for (e1 = UT_LIST_GET_FIRST(list); e1 != NULL;
+	     e1 = (e1->*list.node).next) {
+		if (elem == e1) {
+			return(true);
+		}
+	}
+	return(false);
+}
+#endif
+
+#define UT_LIST_MOVE_TO_FRONT(LIST, ELEM) \
+   ut_list_move_to_front(LIST, ELEM)
 
 #endif /* ut0lst.h */
diff --git a/storage/innobase/include/ut0mem.h b/storage/innobase/include/ut0mem.h
index 81470358f2f..6d56be4d820 100644
--- a/storage/innobase/include/ut0mem.h
+++ b/storage/innobase/include/ut0mem.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -27,137 +27,60 @@ Created 5/30/1994 Heikki Tuuri
 #define ut0mem_h
 
 #include "univ.i"
-#include <string.h>
 #ifndef UNIV_HOTBACKUP
-# include "os0sync.h"
-
-/** The total amount of memory currently allocated from the operating
-system with os_mem_alloc_large() or malloc().  Does not count malloc()
-if srv_use_sys_malloc is set.  Protected by ut_list_mutex. */
-extern ulint		ut_total_allocated_memory;
-
-/** Mutex protecting ut_total_allocated_memory and ut_mem_block_list */
-extern os_fast_mutex_t	ut_list_mutex;
+# include "os0event.h"
+# include "ut0mutex.h"
 #endif /* !UNIV_HOTBACKUP */
 
 /** Wrapper for memcpy(3).  Copy memory area when the source and
 target are not overlapping.
-* @param dest	in: copy to
-* @param sour	in: copy from
-* @param n	in: number of bytes to copy
-* @return	dest */
+@param[in,out]	dest	copy to
+@param[in]	src	copy from
+@param[in]	n	number of bytes to copy
+@return dest */
 UNIV_INLINE
 void*
-ut_memcpy(void* dest, const void* sour, ulint n);
+ut_memcpy(void* dest, const void* src, ulint n);
 
 /** Wrapper for memmove(3).  Copy memory area when the source and
 target are overlapping.
-* @param dest	in: copy to
-* @param sour	in: copy from
-* @param n	in: number of bytes to copy
-* @return	dest */
+@param[in,out]	dest	Move to
+@param[in]	src	Move from
+@param[in]	n	number of bytes to move
+@return dest */
 UNIV_INLINE
 void*
 ut_memmove(void* dest, const void* sour, ulint n);
 
 /** Wrapper for memcmp(3).  Compare memory areas.
-* @param str1	in: first memory block to compare
-* @param str2	in: second memory block to compare
-* @param n	in: number of bytes to compare
-* @return	negative, 0, or positive if str1 is smaller, equal,
+@param[in]	str1	first memory block to compare
+@param[in]	str2	second memory block to compare
+@param[in]	n	number of bytes to compare
+@return negative, 0, or positive if str1 is smaller, equal,
 		or greater than str2, respectively. */
 UNIV_INLINE
 int
 ut_memcmp(const void* str1, const void* str2, ulint n);
 
-/**********************************************************************//**
-Initializes the mem block list at database startup. */
-UNIV_INTERN
-void
-ut_mem_init(void);
-/*=============*/
-
-/**********************************************************************//**
-Allocates memory.
-@return	own: allocated memory */
-UNIV_INTERN
-void*
-ut_malloc_low(
-/*==========*/
-	ulint	n,			/*!< in: number of bytes to allocate */
-	ibool	assert_on_error)	/*!< in: if TRUE, we crash mysqld if
-					the memory cannot be allocated */
-	MY_ATTRIBUTE((malloc));
-/**********************************************************************//**
-Allocates memory. */
-#define ut_malloc(n) ut_malloc_low(n, TRUE)
-/**********************************************************************//**
-Frees a memory block allocated with ut_malloc. Freeing a NULL pointer is
-a nop. */
-UNIV_INTERN
-void
-ut_free(
-/*====*/
-	void* ptr);  /*!< in, own: memory block, can be NULL */
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Implements realloc. This is needed by /pars/lexyy.cc. Otherwise, you should not
-use this function because the allocation functions in mem0mem.h are the
-recommended ones in InnoDB.
-
-man realloc in Linux, 2004:
-
-       realloc()  changes the size of the memory block pointed to
-       by ptr to size bytes.  The contents will be  unchanged  to
-       the minimum of the old and new sizes; newly allocated mem�
-       ory will be uninitialized.  If ptr is NULL,  the	 call  is
-       equivalent  to malloc(size); if size is equal to zero, the
-       call is equivalent to free(ptr).	 Unless ptr is	NULL,  it
-       must  have  been	 returned by an earlier call to malloc(),
-       calloc() or realloc().
-
-RETURN VALUE
-       realloc() returns a pointer to the newly allocated memory,
-       which is suitably aligned for any kind of variable and may
-       be different from ptr, or NULL if the  request  fails.  If
-       size  was equal to 0, either NULL or a pointer suitable to
-       be passed to free() is returned.	 If realloc()  fails  the
-       original	 block	is  left  untouched  - it is not freed or
-       moved.
-@return	own: pointer to new mem block or NULL */
-UNIV_INTERN
-void*
-ut_realloc(
-/*=======*/
-	void*	ptr,	/*!< in: pointer to old block or NULL */
-	ulint	size);	/*!< in: desired size */
-/**********************************************************************//**
-Frees in shutdown all allocated memory not freed yet. */
-UNIV_INTERN
-void
-ut_free_all_mem(void);
-/*=================*/
-#endif /* !UNIV_HOTBACKUP */
-
 /** Wrapper for strcpy(3).  Copy a NUL-terminated string.
-* @param dest	in: copy to
-* @param sour	in: copy from
-* @return	dest */
+@param[in,out]	dest	Destination to copy to
+@param[in]	src	Source to copy from
+@return dest */
 UNIV_INLINE
 char*
-ut_strcpy(char* dest, const char* sour);
+ut_strcpy(char* dest, const char* src);
 
 /** Wrapper for strlen(3).  Determine the length of a NUL-terminated string.
-* @param str	in: string
-* @return	length of the string in bytes, excluding the terminating NUL */
+@param[in]	str	string
+@return length of the string in bytes, excluding the terminating NUL */
 UNIV_INLINE
 ulint
 ut_strlen(const char* str);
 
 /** Wrapper for strcmp(3).  Compare NUL-terminated strings.
-* @param str1	in: first string to compare
-* @param str2	in: second string to compare
-* @return	negative, 0, or positive if str1 is smaller, equal,
+@param[in]	str1	first string to compare
+@param[in]	str2	second string to compare
+@return negative, 0, or positive if str1 is smaller, equal,
 		or greater than str2, respectively. */
 UNIV_INLINE
 int
@@ -167,8 +90,7 @@ ut_strcmp(const char* str1, const char* str2);
 Copies up to size - 1 characters from the NUL-terminated string src to
 dst, NUL-terminating the result. Returns strlen(src), so truncation
 occurred if the return value >= size.
-@return	strlen(src) */
-UNIV_INTERN
+@return strlen(src) */
 ulint
 ut_strlcpy(
 /*=======*/
@@ -179,8 +101,7 @@ ut_strlcpy(
 /**********************************************************************//**
 Like ut_strlcpy, but if src doesn't fit in dst completely, copies the last
 (size - 1) bytes of src, not the first.
-@return	strlen(src) */
-UNIV_INTERN
+@return strlen(src) */
 ulint
 ut_strlcpy_rev(
 /*===========*/
@@ -191,8 +112,7 @@ ut_strlcpy_rev(
 /**********************************************************************//**
 Return the number of times s2 occurs in s1. Overlapping instances of s2
 are only counted once.
-@return	the number of times s2 occurs in s1 */
-UNIV_INTERN
+@return the number of times s2 occurs in s1 */
 ulint
 ut_strcount(
 /*========*/
@@ -202,8 +122,7 @@ ut_strcount(
 /**********************************************************************//**
 Replace every occurrence of s1 in str with s2. Overlapping instances of s1
 are only replaced once.
-@return	own: modified string, must be freed with mem_free() */
-UNIV_INTERN
+@return own: modified string, must be freed with ut_free() */
 char*
 ut_strreplace(
 /*==========*/
@@ -213,12 +132,11 @@ ut_strreplace(
 
 /********************************************************************
 Concatenate 3 strings.*/
-
 char*
 ut_str3cat(
 /*=======*/
 				/* out, own: concatenated string, must be
-				freed with mem_free() */
+				freed with ut_free() */
 	const char*	s1,	/* in: string 1 */
 	const char*	s2,	/* in: string 2 */
 	const char*	s3);	/* in: string 3 */
@@ -228,7 +146,7 @@ Converts a raw binary data to a NUL-terminated hex string. The output is
 truncated if there is not enough space in "hex", make sure "hex_size" is at
 least (2 * raw_size + 1) if you do not want this to happen. Returns the
 actual number of characters written to "hex" (including the NUL).
-@return	number of chars written */
+@return number of chars written */
 UNIV_INLINE
 ulint
 ut_raw_to_hex(
@@ -243,7 +161,7 @@ Adds single quotes to the start and end of string and escapes any quotes
 by doubling them. Returns the number of bytes that were written to "buf"
 (including the terminating NUL). If buf_size is too small then the
 trailing bytes from "str" are discarded.
-@return	number of bytes that were written */
+@return number of bytes that were written */
 UNIV_INLINE
 ulint
 ut_str_sql_format(
diff --git a/storage/innobase/include/ut0mem.ic b/storage/innobase/include/ut0mem.ic
index 5c9071d52cc..224ff98b0f4 100644
--- a/storage/innobase/include/ut0mem.ic
+++ b/storage/innobase/include/ut0mem.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -28,35 +28,35 @@ Created 5/30/1994 Heikki Tuuri
 
 /** Wrapper for memcpy(3).  Copy memory area when the source and
 target are not overlapping.
-* @param dest	in: copy to
-* @param sour	in: copy from
-* @param n	in: number of bytes to copy
-* @return	dest */
+@param[in,out]	dest	copy to
+@param[in]	src	copy from
+@param[in]	n	number of bytes to copy
+@return dest */
 UNIV_INLINE
 void*
-ut_memcpy(void* dest, const void* sour, ulint n)
+ut_memcpy(void* dest, const void* src, ulint n)
 {
-	return(memcpy(dest, sour, n));
+	return(memcpy(dest, src, n));
 }
 
 /** Wrapper for memmove(3).  Copy memory area when the source and
 target are overlapping.
-* @param dest	in: copy to
-* @param sour	in: copy from
-* @param n	in: number of bytes to copy
-* @return	dest */
+@param[in,out]	dest	Move to
+@param[in]	src	Move from
+@param[in]	n	number of bytes to move
+@return dest */
 UNIV_INLINE
 void*
-ut_memmove(void* dest, const void* sour, ulint n)
+ut_memmove(void* dest, const void* src, ulint n)
 {
-	return(memmove(dest, sour, n));
+	return(memmove(dest, src, n));
 }
 
 /** Wrapper for memcmp(3).  Compare memory areas.
-* @param str1	in: first memory block to compare
-* @param str2	in: second memory block to compare
-* @param n	in: number of bytes to compare
-* @return	negative, 0, or positive if str1 is smaller, equal,
+@param[in]	str1	first memory block to compare
+@param[in]	str2	second memory block to compare
+@param[in]	n	number of bytes to compare
+@return negative, 0, or positive if str1 is smaller, equal,
 		or greater than str2, respectively. */
 UNIV_INLINE
 int
@@ -66,19 +66,19 @@ ut_memcmp(const void* str1, const void* str2, ulint n)
 }
 
 /** Wrapper for strcpy(3).  Copy a NUL-terminated string.
-* @param dest	in: copy to
-* @param sour	in: copy from
-* @return	dest */
+@param[in,out]	dest	Destination to copy to
+@param[in]	src	Source to copy from
+@return dest */
 UNIV_INLINE
 char*
-ut_strcpy(char* dest, const char* sour)
+ut_strcpy(char* dest, const char* src)
 {
-	return(strcpy(dest, sour));
+	return(strcpy(dest, src));
 }
 
 /** Wrapper for strlen(3).  Determine the length of a NUL-terminated string.
-* @param str	in: string
-* @return	length of the string in bytes, excluding the terminating NUL */
+@param[in]	str	string
+@return length of the string in bytes, excluding the terminating NUL */
 UNIV_INLINE
 ulint
 ut_strlen(const char* str)
@@ -87,9 +87,9 @@ ut_strlen(const char* str)
 }
 
 /** Wrapper for strcmp(3).  Compare NUL-terminated strings.
-* @param str1	in: first string to compare
-* @param str2	in: second string to compare
-* @return	negative, 0, or positive if str1 is smaller, equal,
+@param[in]	str1	first string to compare
+@param[in]	str2	second string to compare
+@return negative, 0, or positive if str1 is smaller, equal,
 		or greater than str2, respectively. */
 UNIV_INLINE
 int
@@ -103,7 +103,7 @@ Converts a raw binary data to a NUL-terminated hex string. The output is
 truncated if there is not enough space in "hex", make sure "hex_size" is at
 least (2 * raw_size + 1) if you do not want this to happen. Returns the
 actual number of characters written to "hex" (including the NUL).
-@return	number of chars written */
+@return number of chars written */
 UNIV_INLINE
 ulint
 ut_raw_to_hex(
@@ -223,7 +223,7 @@ Adds single quotes to the start and end of string and escapes any quotes
 by doubling them. Returns the number of bytes that were written to "buf"
 (including the terminating NUL). If buf_size is too small then the
 trailing bytes from "str" are discarded.
-@return	number of bytes that were written */
+@return number of bytes that were written */
 UNIV_INLINE
 ulint
 ut_str_sql_format(
diff --git a/storage/innobase/include/ut0mutex.h b/storage/innobase/include/ut0mutex.h
new file mode 100644
index 00000000000..2614e26c7a2
--- /dev/null
+++ b/storage/innobase/include/ut0mutex.h
@@ -0,0 +1,225 @@
+/*****************************************************************************
+
+Copyright (c) 2012, 2015, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/ut0mutex.h
+Policy based mutexes.
+
+Created 2012-03-24 Sunny Bains.
+***********************************************************************/
+
+#ifndef UNIV_INNOCHECKSUM
+
+#ifndef ut0mutex_h
+#define ut0mutex_h
+
+extern ulong	srv_spin_wait_delay;
+extern ulong	srv_n_spin_wait_rounds;
+extern ulong	srv_force_recovery_crash;
+
+#include "os0atomic.h"
+#include "sync0policy.h"
+#include "ib0mutex.h"
+#include <set>
+
+/** Create a typedef using the MutexType<PolicyType>
+@param[in]	M		Mutex type
+@param[in[	P		Policy type
+@param[in]	T		The resulting typedef alias */
+#define UT_MUTEX_TYPE(M, P, T) typedef PolicyMutex<M<P> > T;
+
+typedef OSMutex EventMutex;
+
+#ifndef UNIV_DEBUG
+
+# ifdef HAVE_IB_LINUX_FUTEX
+UT_MUTEX_TYPE(TTASFutexMutex, GenericPolicy, FutexMutex);
+UT_MUTEX_TYPE(TTASFutexMutex, BlockMutexPolicy, BlockFutexMutex);
+# endif /* HAVE_IB_LINUX_FUTEX */
+
+UT_MUTEX_TYPE(TTASMutex, GenericPolicy, SpinMutex);
+UT_MUTEX_TYPE(TTASMutex, BlockMutexPolicy, BlockSpinMutex);
+
+
+UT_MUTEX_TYPE(OSTrackMutex, GenericPolicy, SysMutex);
+UT_MUTEX_TYPE(OSTrackMutex, BlockMutexPolicy, BlockSysMutex);
+
+UT_MUTEX_TYPE(TTASEventMutex, GenericPolicy, SyncArrayMutex);
+UT_MUTEX_TYPE(TTASEventMutex, BlockMutexPolicy, BlockSyncArrayMutex);
+
+#else /* !UNIV_DEBUG */
+
+# ifdef HAVE_IB_LINUX_FUTEX
+UT_MUTEX_TYPE(TTASFutexMutex, GenericPolicy, FutexMutex);
+UT_MUTEX_TYPE(TTASFutexMutex, BlockMutexPolicy, BlockFutexMutex);
+# endif /* HAVE_IB_LINUX_FUTEX */
+
+UT_MUTEX_TYPE(TTASMutex, GenericPolicy, SpinMutex);
+UT_MUTEX_TYPE(TTASMutex, BlockMutexPolicy, BlockSpinMutex);
+
+UT_MUTEX_TYPE(OSTrackMutex, GenericPolicy, SysMutex);
+UT_MUTEX_TYPE(OSTrackMutex, BlockMutexPolicy, BlockSysMutex);
+
+UT_MUTEX_TYPE(TTASEventMutex, GenericPolicy, SyncArrayMutex);
+UT_MUTEX_TYPE(TTASEventMutex, BlockMutexPolicy, BlockSyncArrayMutex);
+
+#endif /* !UNIV_DEBUG */
+
+#ifdef MUTEX_FUTEX
+/** The default mutex type. */
+typedef FutexMutex ib_mutex_t;
+typedef BlockFutexMutex ib_bpmutex_t;
+#define MUTEX_TYPE	"Uses futexes"
+#elif defined(MUTEX_SYS)
+typedef SysMutex ib_mutex_t;
+typedef BlockSysMutex ib_bpmutex_t;
+#define MUTEX_TYPE	"Uses system mutexes"
+#elif defined(MUTEX_EVENT)
+typedef SyncArrayMutex ib_mutex_t;
+typedef BlockSyncArrayMutex ib_bpmutex_t;
+#define MUTEX_TYPE	"Uses event mutexes"
+#else
+#error "ib_mutex_t type is unknown"
+#endif /* MUTEX_FUTEX */
+
+#include "ut0mutex.ic"
+
+extern ulong	srv_spin_wait_delay;
+extern ulong	srv_n_spin_wait_rounds;
+
+#define mutex_create(I, M)		mutex_init((M), (I), __FILE__, __LINE__)
+
+#define mutex_enter(M)			(M)->enter(			\
+					srv_n_spin_wait_rounds,		\
+					srv_spin_wait_delay,		\
+					__FILE__, __LINE__)
+
+#define mutex_enter_nospin(M)		(M)->enter(			\
+					0,				\
+					0,				\
+					__FILE__, __LINE__)
+
+#define mutex_enter_nowait(M)		(M)->trylock(__FILE__, __LINE__)
+
+#define mutex_exit(M)			(M)->exit()
+
+#define mutex_free(M)			mutex_destroy(M)
+
+#ifdef UNIV_DEBUG
+/**
+Checks that the mutex has been initialized. */
+#define mutex_validate(M)		(M)->validate()
+
+/**
+Checks that the current thread owns the mutex. Works only
+in the debug version. */
+#define mutex_own(M)			(M)->is_owned()
+#else
+#define mutex_own(M)			/* No op */
+#define mutex_validate(M)		/* No op */
+#endif /* UNIV_DEBUG */
+
+/** Iterate over the mutex meta data */
+class MutexMonitor {
+public:
+	/** Constructor */
+	MutexMonitor() { }
+
+	/** Destructor */
+	~MutexMonitor() { }
+
+	/** Enable the mutex monitoring */
+	void enable();
+
+	/** Disable the mutex monitoring */
+	void disable();
+
+	/** Reset the mutex monitoring values */
+	void reset();
+
+	/** Invoke the callback for each active mutex collection
+	@param[in,out]	callback	Functor to call
+	@return false if callback returned false */
+	template<typename Callback>
+	bool iterate(Callback& callback) const
+		UNIV_NOTHROW
+	{
+		LatchMetaData::iterator	end = latch_meta.end();
+
+		for (LatchMetaData::iterator it = latch_meta.begin();
+		     it != end;
+		     ++it) {
+
+			/* Some of the slots will be null in non-debug mode */
+
+			if (*it == NULL) {
+				continue;
+			}
+
+			latch_meta_t*	latch_meta = *it;
+
+			bool	ret = callback(*latch_meta);
+
+			if (!ret) {
+				return(ret);
+			}
+		}
+
+		return(true);
+	}
+};
+
+/** Defined in sync0sync.cc */
+extern MutexMonitor*	mutex_monitor;
+
+/**
+Creates, or rather, initializes a mutex object in a specified memory
+location (which must be appropriately aligned). The mutex is initialized
+in the reset state. Explicit freeing of the mutex with mutex_free is
+necessary only if the memory block containing it is freed.
+Add the mutex instance to the global mutex list.
+@param[in,out]	mutex		mutex to initialise
+@param[in]	id		The mutex ID (Latch ID)
+@param[in]	filename	Filename from where it was called
+@param[in]	line		Line number in filename from where called */
+template <typename Mutex>
+void mutex_init(
+	Mutex*		mutex,
+	latch_id_t	id,
+	const char*	file_name,
+	uint32_t	line)
+{
+	new(mutex) Mutex();
+
+	mutex->init(id, file_name, line);
+}
+
+/**
+Removes a mutex instance from the mutex list. The mutex is checked to
+be in the reset state.
+@param[in,out]	 mutex		mutex instance to destroy */
+template <typename Mutex>
+void mutex_destroy(
+	Mutex*		mutex)
+{
+	mutex->destroy();
+}
+
+#endif /* ut0mutex_h */
+
+#endif /* UNIV_INNOCHECKSUM */
diff --git a/storage/innobase/include/ut0mutex.ic b/storage/innobase/include/ut0mutex.ic
new file mode 100644
index 00000000000..04f1ecd2a85
--- /dev/null
+++ b/storage/innobase/include/ut0mutex.ic
@@ -0,0 +1,108 @@
+/*****************************************************************************
+
+Copyright (c) 2013, 2015, Oracle and/or its affiliates. All Rights Reserved.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/ut0mutex.ic
+Mutex implementation include file
+
+Created 2012/08/21 Sunny Bains
+*******************************************************/
+
+#include "sync0arr.h"
+#include "sync0debug.h"
+
+/**
+Wait in the sync array.
+@return true if the mutex acquisition was successful. */
+
+template <template <typename> class Policy>
+bool
+TTASEventMutex<Policy>::wait(
+	const char*	filename,
+	uint32_t	line,
+	uint32_t	spin)
+	UNIV_NOTHROW
+{
+	sync_cell_t*	cell;
+	sync_array_t*	sync_arr;
+
+	sync_arr = sync_array_get_and_reserve_cell(
+		this,
+		(m_policy.get_id() == LATCH_ID_BUF_BLOCK_MUTEX
+		 || m_policy.get_id() == LATCH_ID_BUF_POOL_ZIP)
+		? SYNC_BUF_BLOCK
+		: SYNC_MUTEX,
+		filename, line, &cell);
+
+	/* The memory order of the array reservation and
+	the change in the waiters field is important: when
+	we suspend a thread, we first reserve the cell and
+	then set waiters field to 1. When threads are released
+	in mutex_exit, the waiters field is first set to zero
+	and then the event is set to the signaled state. */
+
+	set_waiters();
+
+	/* Try to reserve still a few times. */
+
+	for (uint32_t i = 0; i < spin; ++i) {
+
+		if (try_lock()) {
+
+			sync_array_free_cell(sync_arr, cell);
+
+			/* Note that in this case we leave
+			the waiters field set to 1. We cannot
+			reset it to zero, as we do not know if
+			there are other waiters. */
+
+			return(true);
+		}
+	}
+
+	/* Now we know that there has been some thread
+	holding the mutex after the change in the wait
+	array and the waiters field was made.  Now there
+	is no risk of infinite wait on the event. */
+
+	sync_array_wait_event(sync_arr, cell);
+
+	return(false);
+}
+
+
+/** Wakeup any waiting thread(s). */
+
+template <template <typename> class Policy>
+void
+TTASEventMutex<Policy>::signal() UNIV_NOTHROW
+{
+	clear_waiters();
+
+	/* The memory order of resetting the waiters field and
+	signaling the object is important. See LEMMA 1 above. */
+	os_event_set(m_event);
+
+	sync_array_object_signalled();
+}
diff --git a/storage/innobase/include/ut0new.h b/storage/innobase/include/ut0new.h
new file mode 100644
index 00000000000..6f3c06cf978
--- /dev/null
+++ b/storage/innobase/include/ut0new.h
@@ -0,0 +1,931 @@
+/*****************************************************************************
+
+Copyright (c) 2014, 2015, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file ut/ut0new.h
+Instrumented memory allocator.
+
+Created May 26, 2014 Vasil Dimov
+*******************************************************/
+
+/** Dynamic memory allocation within InnoDB guidelines.
+All dynamic (heap) memory allocations (malloc(3), strdup(3), etc, "new",
+various std:: containers that allocate memory internally), that are done
+within InnoDB are instrumented. This means that InnoDB uses a custom set
+of functions for allocating memory, rather than calling e.g. "new" directly.
+
+Here follows a cheat sheet on what InnoDB functions to use whenever a
+standard one would have been used.
+
+Creating new objects with "new":
+--------------------------------
+Standard:
+  new expression
+  or
+  new(std::nothrow) expression
+InnoDB, default instrumentation:
+  UT_NEW_NOKEY(expression)
+InnoDB, custom instrumentation, preferred:
+  UT_NEW(expression, key)
+
+Destroying objects, created with "new":
+---------------------------------------
+Standard:
+  delete ptr
+InnoDB:
+  UT_DELETE(ptr)
+
+Creating new arrays with "new[]":
+---------------------------------
+Standard:
+  new type[num]
+  or
+  new(std::nothrow) type[num]
+InnoDB, default instrumentation:
+  UT_NEW_ARRAY_NOKEY(type, num)
+InnoDB, custom instrumentation, preferred:
+  UT_NEW_ARRAY(type, num, key)
+
+Destroying arrays, created with "new[]":
+----------------------------------------
+Standard:
+  delete[] ptr
+InnoDB:
+  UT_DELETE_ARRAY(ptr)
+
+Declaring a type with a std:: container, e.g. std::vector:
+----------------------------------------------------------
+Standard:
+  std::vector<t>
+InnoDB:
+  std::vector<t, ut_allocator<t> >
+
+Declaring objects of some std:: type:
+-------------------------------------
+Standard:
+  std::vector<t> v
+InnoDB, default instrumentation:
+  std::vector<t, ut_allocator<t> > v
+InnoDB, custom instrumentation, preferred:
+  std::vector<t, ut_allocator<t> > v(ut_allocator<t>(key))
+
+Raw block allocation (as usual in C++, consider whether using "new" would
+not be more appropriate):
+-------------------------------------------------------------------------
+Standard:
+  malloc(num)
+InnoDB, default instrumentation:
+  ut_malloc_nokey(num)
+InnoDB, custom instrumentation, preferred:
+  ut_malloc(num, key)
+
+Raw block resize:
+-----------------
+Standard:
+  realloc(ptr, new_size)
+InnoDB:
+  ut_realloc(ptr, new_size)
+
+Raw block deallocation:
+-----------------------
+Standard:
+  free(ptr)
+InnoDB:
+  ut_free(ptr)
+
+Note: the expression passed to UT_NEW() or UT_NEW_NOKEY() must always end
+with (), thus:
+Standard:
+  new int
+InnoDB:
+  UT_NEW_NOKEY(int())
+*/
+
+#ifndef ut0new_h
+#define ut0new_h
+
+#include <algorithm> /* std::min() */
+#include <limits> /* std::numeric_limits */
+#include <map> /* std::map */
+
+#include <stddef.h>
+#include <stdlib.h> /* malloc() */
+#include <string.h> /* strlen(), strrchr(), strncmp() */
+
+#include "my_global.h" /* needed for headers from mysql/psi/ */
+/* JAN: TODO: missing 5.7 header */
+#ifdef HAVE_MYSQL_MEMORY_H
+#include "mysql/psi/mysql_memory.h" /* PSI_MEMORY_CALL() */
+#endif
+
+#include "mysql/psi/psi_memory.h" /* PSI_memory_key, PSI_memory_info */
+
+#include "univ.i"
+
+#include "os0proc.h" /* os_mem_alloc_large() */
+#include "os0thread.h" /* os_thread_sleep() */
+#include "ut0ut.h" /* ut_strcmp_functor, ut_basename_noext() */
+
+#define	OUT_OF_MEMORY_MSG \
+	"Check if you should increase the swap file or ulimits of your" \
+	" operating system. Note that on most 32-bit computers the process" \
+	" memory space is limited to 2 GB or 4 GB."
+
+/** Maximum number of retries to allocate memory. */
+extern const size_t	alloc_max_retries;
+
+/** Keys for registering allocations with performance schema.
+Pointers to these variables are supplied to PFS code via the pfs_info[]
+array and the PFS code initializes them via PSI_MEMORY_CALL(register_memory)().
+mem_key_other and mem_key_std are special in the following way (see also
+ut_allocator::get_mem_key()):
+* If the caller has not provided a key and the file name of the caller is
+  unknown, then mem_key_std will be used. This happens only when called from
+  within std::* containers.
+* If the caller has not provided a key and the file name of the caller is
+  known, but is not amongst the predefined names (see ut_new_boot()) then
+  mem_key_other will be used. Generally this should not happen and if it
+  happens then that means that the list of predefined names must be extended.
+Keep this list alphabetically sorted. */
+extern PSI_memory_key	mem_key_ahi;
+extern PSI_memory_key	mem_key_buf_buf_pool;
+extern PSI_memory_key	mem_key_dict_stats_bg_recalc_pool_t;
+extern PSI_memory_key	mem_key_dict_stats_index_map_t;
+extern PSI_memory_key	mem_key_dict_stats_n_diff_on_level;
+extern PSI_memory_key	mem_key_other;
+extern PSI_memory_key	mem_key_row_log_buf;
+extern PSI_memory_key	mem_key_row_merge_sort;
+extern PSI_memory_key	mem_key_std;
+extern PSI_memory_key	mem_key_trx_sys_t_rw_trx_ids;
+extern PSI_memory_key	mem_key_partitioning;
+
+/** Setup the internal objects needed for UT_NEW() to operate.
+This must be called before the first call to UT_NEW(). */
+void
+ut_new_boot();
+
+#ifdef UNIV_PFS_MEMORY
+
+/** Retrieve a memory key (registered with PFS), given a portion of the file
+name of the caller.
+@param[in]	file	portion of the filename - basename without an extension
+@return registered memory key or PSI_NOT_INSTRUMENTED if not found */
+PSI_memory_key
+ut_new_get_key_by_file(
+	const char*	file);
+
+#endif /* UNIV_PFS_MEMORY */
+
+/** A structure that holds the necessary data for performance schema
+accounting. An object of this type is put in front of each allocated block
+of memory when allocation is done by ut_allocator::allocate(). This is
+because the data is needed even when freeing the memory. Users of
+ut_allocator::allocate_large() are responsible for maintaining this
+themselves. */
+struct ut_new_pfx_t {
+
+#ifdef UNIV_PFS_MEMORY
+
+	/** Performance schema key. Assigned to a name at startup via
+	PSI_MEMORY_CALL(register_memory)() and later used for accounting
+	allocations and deallocations with
+	PSI_MEMORY_CALL(memory_alloc)(key, size, owner) and
+	PSI_MEMORY_CALL(memory_free)(key, size, owner). */
+	PSI_memory_key	m_key;
+
+        /**
+          Thread owner.
+          Instrumented thread that owns the allocated memory.
+          This state is used by the performance schema to maintain
+          per thread statistics,
+          when memory is given from thread A to thread B.
+        */
+        struct PSI_thread *m_owner;
+
+#endif /* UNIV_PFS_MEMORY */
+
+	/** Size of the allocated block in bytes, including this prepended
+	aux structure (for ut_allocator::allocate()). For example if InnoDB
+	code requests to allocate 100 bytes, and sizeof(ut_new_pfx_t) is 16,
+	then 116 bytes are allocated in total and m_size will be 116.
+	ut_allocator::allocate_large() does not prepend this struct to the
+	allocated block and its users are responsible for maintaining it
+	and passing it later to ut_allocator::deallocate_large(). */
+	size_t		m_size;
+#if SIZEOF_VOIDP == 4
+	/** Pad the header size to a multiple of 64 bits on 32-bit systems,
+	so that the payload will be aligned to 64 bits. */
+	size_t		pad;
+#endif
+};
+
+/** Allocator class for allocating memory from inside std::* containers. */
+template <class T>
+class ut_allocator {
+public:
+	typedef T*		pointer;
+	typedef const T*	const_pointer;
+	typedef T&		reference;
+	typedef const T&	const_reference;
+	typedef T		value_type;
+	typedef size_t		size_type;
+	typedef ptrdiff_t	difference_type;
+
+	/** Default constructor. */
+	explicit
+	ut_allocator(
+		PSI_memory_key	key = PSI_NOT_INSTRUMENTED)
+		:
+#ifdef UNIV_PFS_MEMORY
+		m_key(key),
+#endif /* UNIV_PFS_MEMORY */
+		m_oom_fatal(true)
+	{
+	}
+
+	/** Constructor from allocator of another type. */
+	template <class U>
+	ut_allocator(
+		const ut_allocator<U>&	other)
+		: m_oom_fatal(other.is_oom_fatal())
+	{
+#ifdef UNIV_PFS_MEMORY
+		const PSI_memory_key	other_key = other.get_mem_key(NULL);
+
+		m_key = (other_key != mem_key_std)
+			? other_key
+			: PSI_NOT_INSTRUMENTED;
+#endif /* UNIV_PFS_MEMORY */
+	}
+
+	/** When out of memory (OOM) happens, report error and do not
+	make it fatal.
+	@return a reference to the allocator. */
+	ut_allocator&
+	set_oom_not_fatal() {
+		m_oom_fatal = false;
+		return(*this);
+	}
+
+	/** Check if allocation failure is a fatal error.
+	@return true if allocation failure is fatal, false otherwise. */
+	bool is_oom_fatal() const {
+		return(m_oom_fatal);
+	}
+
+	/** Return the maximum number of objects that can be allocated by
+	this allocator. */
+	size_type
+	max_size() const
+	{
+		const size_type	s_max = std::numeric_limits<size_type>::max();
+
+#ifdef UNIV_PFS_MEMORY
+		return((s_max - sizeof(ut_new_pfx_t)) / sizeof(T));
+#else
+		return(s_max / sizeof(T));
+#endif /* UNIV_PFS_MEMORY */
+	}
+
+	/** Allocate a chunk of memory that can hold 'n_elements' objects of
+	type 'T' and trace the allocation.
+	If the allocation fails this method may throw an exception. This
+	is mandated by the standard and if it returns NULL instead, then
+	STL containers that use it (e.g. std::vector) may get confused.
+	After successfull allocation the returned pointer must be passed
+	to ut_allocator::deallocate() when no longer needed.
+	@param[in]	n_elements	number of elements
+	@param[in]	hint		pointer to a nearby memory location,
+	unused by this implementation
+	@param[in]	file		file name of the caller
+	@param[in]	set_to_zero	if true, then the returned memory is
+	initialized with 0x0 bytes.
+	@return pointer to the allocated memory */
+	pointer
+	allocate(
+		size_type	n_elements,
+		const_pointer	hint = NULL,
+		const char*	file = NULL,
+		bool		set_to_zero = false,
+		bool		throw_on_error = true)
+	{
+		if (n_elements == 0) {
+			return(NULL);
+		}
+
+		if (n_elements > max_size()) {
+			if (throw_on_error) {
+				throw(std::bad_alloc());
+			} else {
+				return(NULL);
+			}
+		}
+
+		void*	ptr;
+		size_t	total_bytes = n_elements * sizeof(T);
+
+#ifdef UNIV_PFS_MEMORY
+		/* The header size must not ruin the 64-bit alignment
+		on 32-bit systems. Some allocated structures use
+		64-bit fields. */
+		ut_ad((sizeof(ut_new_pfx_t) & 7) == 0);
+		total_bytes += sizeof(ut_new_pfx_t);
+#endif /* UNIV_PFS_MEMORY */
+
+		for (size_t retries = 1; ; retries++) {
+
+			if (set_to_zero) {
+				ptr = calloc(1, total_bytes);
+			} else {
+				ptr = malloc(total_bytes);
+			}
+
+			if (ptr != NULL || retries >= alloc_max_retries) {
+				break;
+			}
+
+			os_thread_sleep(1000000 /* 1 second */);
+		}
+
+		if (ptr == NULL) {
+			ib::fatal_or_error(m_oom_fatal)
+				<< "Cannot allocate " << total_bytes
+				<< " bytes of memory after "
+				<< alloc_max_retries << " retries over "
+				<< alloc_max_retries << " seconds. OS error: "
+				<< strerror(errno) << " (" << errno << "). "
+				<< OUT_OF_MEMORY_MSG;
+			if (throw_on_error) {
+				throw(std::bad_alloc());
+			} else {
+				return(NULL);
+			}
+		}
+
+#ifdef UNIV_PFS_MEMORY
+		ut_new_pfx_t*	pfx = static_cast<ut_new_pfx_t*>(ptr);
+
+		allocate_trace(total_bytes, file, pfx);
+
+		return(reinterpret_cast<pointer>(pfx + 1));
+#else
+		return(reinterpret_cast<pointer>(ptr));
+#endif /* UNIV_PFS_MEMORY */
+	}
+
+	/** Free a memory allocated by allocate() and trace the deallocation.
+	@param[in,out]	ptr		pointer to memory to free
+	@param[in]	n_elements	number of elements allocated (unused) */
+	void
+	deallocate(
+		pointer		ptr,
+		size_type	n_elements = 0)
+	{
+		if (ptr == NULL) {
+			return;
+		}
+
+#ifdef UNIV_PFS_MEMORY
+		ut_new_pfx_t*	pfx = reinterpret_cast<ut_new_pfx_t*>(ptr) - 1;
+
+		deallocate_trace(pfx);
+
+		free(pfx);
+#else
+		// free(ptr);
+#endif /* UNIV_PFS_MEMORY */
+	}
+
+	/** Create an object of type 'T' using the value 'val' over the
+	memory pointed by 'p'. */
+	void
+	construct(
+		pointer		p,
+		const T&	val)
+	{
+		new(p) T(val);
+	}
+
+	/** Destroy an object pointed by 'p'. */
+	void
+	destroy(
+		pointer	p)
+	{
+		p->~T();
+	}
+
+	/** Return the address of an object. */
+	pointer
+	address(
+		reference	x) const
+	{
+		return(&x);
+	}
+
+	/** Return the address of a const object. */
+	const_pointer
+	address(
+		const_reference	x) const
+	{
+		return(&x);
+	}
+
+	template <class U>
+	struct rebind {
+		typedef ut_allocator<U>	other;
+	};
+
+	/* The following are custom methods, not required by the standard. */
+
+#ifdef UNIV_PFS_MEMORY
+
+	/** realloc(3)-like method.
+	The passed in ptr must have been returned by allocate() and the
+	pointer returned by this method must be passed to deallocate() when
+	no longer needed.
+	@param[in,out]	ptr		old pointer to reallocate
+	@param[in]	n_elements	new number of elements to allocate
+	@param[in]	file		file name of the caller
+	@return newly allocated memory */
+	pointer
+	reallocate(
+		void*		ptr,
+		size_type	n_elements,
+		const char*	file)
+	{
+		if (n_elements == 0) {
+			deallocate(static_cast<pointer>(ptr));
+			return(NULL);
+		}
+
+		if (ptr == NULL) {
+			return(allocate(n_elements, NULL, file, false, false));
+		}
+
+		if (n_elements > max_size()) {
+			return(NULL);
+		}
+
+		ut_new_pfx_t*	pfx_old;
+		ut_new_pfx_t*	pfx_new;
+		size_t		total_bytes;
+
+		pfx_old = reinterpret_cast<ut_new_pfx_t*>(ptr) - 1;
+
+		total_bytes = n_elements * sizeof(T) + sizeof(ut_new_pfx_t);
+
+		for (size_t retries = 1; ; retries++) {
+
+			pfx_new = static_cast<ut_new_pfx_t*>(
+				realloc(pfx_old, total_bytes));
+
+			if (pfx_new != NULL || retries >= alloc_max_retries) {
+				break;
+			}
+
+			os_thread_sleep(1000000 /* 1 second */);
+		}
+
+		if (pfx_new == NULL) {
+			ib::fatal_or_error(m_oom_fatal)
+				<< "Cannot reallocate " << total_bytes
+				<< " bytes of memory after "
+				<< alloc_max_retries << " retries over "
+				<< alloc_max_retries << " seconds. OS error: "
+				<< strerror(errno) << " (" << errno << "). "
+				<< OUT_OF_MEMORY_MSG;
+			/* not reached */
+			return(NULL);
+		}
+
+		/* pfx_new still contains the description of the old block
+		that was presumably freed by realloc(). */
+		deallocate_trace(pfx_new);
+
+		/* pfx_new is set here to describe the new block. */
+		allocate_trace(total_bytes, file, pfx_new);
+
+		return(reinterpret_cast<pointer>(pfx_new + 1));
+	}
+
+	/** Allocate, trace the allocation and construct 'n_elements' objects
+	of type 'T'. If the allocation fails or if some of the constructors
+	throws an exception, then this method will return NULL. It does not
+	throw exceptions. After successfull completion the returned pointer
+	must be passed to delete_array() when no longer needed.
+	@param[in]	n_elements	number of elements to allocate
+	@param[in]	file		file name of the caller
+	@return pointer to the first allocated object or NULL */
+	pointer
+	new_array(
+		size_type	n_elements,
+		const char*	file)
+	{
+		T*	p = allocate(n_elements, NULL, file, false, false);
+
+		if (p == NULL) {
+			return(NULL);
+		}
+
+		T*		first = p;
+		size_type	i;
+
+		try {
+			for (i = 0; i < n_elements; i++) {
+				new(p) T;
+				++p;
+			}
+		} catch (...) {
+			for (size_type j = 0; j < i; j++) {
+				--p;
+				p->~T();
+			}
+
+			deallocate(first);
+
+			throw;
+		}
+
+		return(first);
+	}
+
+	/** Destroy, deallocate and trace the deallocation of an array created
+	by new_array().
+	@param[in,out]	ptr	pointer to the first object in the array */
+	void
+	delete_array(
+		T*	ptr)
+	{
+		if (ptr == NULL) {
+			return;
+		}
+
+		const size_type	n_elements = n_elements_allocated(ptr);
+
+		T*		p = ptr + n_elements - 1;
+
+		for (size_type i = 0; i < n_elements; i++) {
+			p->~T();
+			--p;
+		}
+
+		deallocate(ptr);
+	}
+
+#endif /* UNIV_PFS_MEMORY */
+
+	/** Allocate a large chunk of memory that can hold 'n_elements'
+	objects of type 'T' and trace the allocation.
+	@param[in]	n_elements	number of elements
+	@param[out]	pfx		storage for the description of the
+	allocated memory. The caller must provide space for this one and keep
+	it until the memory is no longer needed and then pass it to
+	deallocate_large().
+	@return pointer to the allocated memory or NULL */
+	pointer
+	allocate_large(
+		size_type	n_elements,
+		ut_new_pfx_t*	pfx)
+	{
+		if (n_elements == 0 || n_elements > max_size()) {
+			return(NULL);
+		}
+
+		ulint	n_bytes = n_elements * sizeof(T);
+
+		pointer	ptr = reinterpret_cast<pointer>(
+			os_mem_alloc_large(&n_bytes));
+
+#ifdef UNIV_PFS_MEMORY
+		if (ptr != NULL) {
+			allocate_trace(n_bytes, NULL, pfx);
+		}
+#else
+		pfx->m_size = n_bytes;
+#endif /* UNIV_PFS_MEMORY */
+
+		return(ptr);
+	}
+
+	/** Free a memory allocated by allocate_large() and trace the
+	deallocation.
+	@param[in,out]	ptr	pointer to memory to free
+	@param[in]	pfx	descriptor of the memory, as returned by
+	allocate_large(). */
+	void
+	deallocate_large(
+		pointer			ptr,
+		const ut_new_pfx_t*	pfx)
+	{
+#ifdef UNIV_PFS_MEMORY
+		deallocate_trace(pfx);
+#endif /* UNIV_PFS_MEMORY */
+
+		os_mem_free_large(ptr, pfx->m_size);
+	}
+
+#ifdef UNIV_PFS_MEMORY
+
+	/** Get the performance schema key to use for tracing allocations.
+	@param[in]	file	file name of the caller or NULL if unknown
+	@return performance schema key */
+	PSI_memory_key
+	get_mem_key(
+		const char*	file) const
+	{
+		if (m_key != PSI_NOT_INSTRUMENTED) {
+			return(m_key);
+		}
+
+		if (file == NULL) {
+			return(mem_key_std);
+		}
+
+		/* e.g. "btr0cur", derived from "/path/to/btr0cur.cc" */
+		char		keyname[FILENAME_MAX];
+		const size_t	len = ut_basename_noext(file, keyname,
+							sizeof(keyname));
+		/* If sizeof(keyname) was not enough then the output would
+		be truncated, assert that this did not happen. */
+		ut_a(len < sizeof(keyname));
+
+		const PSI_memory_key	key = ut_new_get_key_by_file(keyname);
+
+		if (key != PSI_NOT_INSTRUMENTED) {
+			return(key);
+		}
+
+		return(mem_key_other);
+	}
+
+private:
+
+	/** Retrieve the size of a memory block allocated by new_array().
+	@param[in]	ptr	pointer returned by new_array().
+	@return size of memory block */
+	size_type
+	n_elements_allocated(
+		const_pointer	ptr)
+	{
+		const ut_new_pfx_t*	pfx
+			= reinterpret_cast<const ut_new_pfx_t*>(ptr) - 1;
+
+		const size_type		user_bytes
+			= pfx->m_size - sizeof(ut_new_pfx_t);
+
+		ut_ad(user_bytes % sizeof(T) == 0);
+
+		return(user_bytes / sizeof(T));
+	}
+
+	/** Trace a memory allocation.
+	After the accounting, the data needed for tracing the deallocation
+	later is written into 'pfx'.
+	The PFS event name is picked on the following criteria:
+	1. If key (!= PSI_NOT_INSTRUMENTED) has been specified when constructing
+	   this ut_allocator object, then the name associated with that key will
+	   be used (this is the recommended approach for new code)
+	2. Otherwise, if "file" is NULL, then the name associated with
+	   mem_key_std will be used
+	3. Otherwise, if an entry is found by ut_new_get_key_by_file(), that
+	   corresponds to "file", that will be used (see ut_new_boot())
+	4. Otherwise, the name associated with mem_key_other will be used.
+	@param[in]	size	number of bytes that were allocated
+	@param[in]	file	file name of the caller or NULL if unknown
+	@param[out]	pfx	placeholder to store the info which will be
+	needed when freeing the memory */
+	void
+	allocate_trace(
+		size_t		size,
+		const char*	file,
+		ut_new_pfx_t*	pfx)
+	{
+		const PSI_memory_key	key = get_mem_key(file);
+
+		pfx->m_key = PSI_MEMORY_CALL(memory_alloc)(key, size, & pfx->m_owner);
+		pfx->m_size = size;
+	}
+
+	/** Trace a memory deallocation.
+	@param[in]	pfx	info for the deallocation */
+	void
+	deallocate_trace(
+		const ut_new_pfx_t*	pfx)
+	{
+		PSI_MEMORY_CALL(memory_free)(pfx->m_key, pfx->m_size, pfx->m_owner);
+	}
+
+	/** Performance schema key. */
+	PSI_memory_key	m_key;
+
+#endif /* UNIV_PFS_MEMORY */
+
+private:
+
+	/** Assignment operator, not used, thus disabled (private). */
+	template <class U>
+	void
+	operator=(
+		const ut_allocator<U>&);
+
+	/** A flag to indicate whether out of memory (OOM) error is considered
+	fatal.  If true, it is fatal. */
+	bool	m_oom_fatal;
+};
+
+/** Compare two allocators of the same type.
+As long as the type of A1 and A2 is the same, a memory allocated by A1
+could be freed by A2 even if the pfs mem key is different. */
+template <typename T>
+inline
+bool
+operator==(
+	const ut_allocator<T>&	lhs,
+	const ut_allocator<T>&	rhs)
+{
+	return(true);
+}
+
+/** Compare two allocators of the same type. */
+template <typename T>
+inline
+bool
+operator!=(
+	const ut_allocator<T>&	lhs,
+	const ut_allocator<T>&	rhs)
+{
+	return(!(lhs == rhs));
+}
+
+#ifdef UNIV_PFS_MEMORY
+
+/** Allocate, trace the allocation and construct an object.
+Use this macro instead of 'new' within InnoDB.
+For example: instead of
+	Foo*	f = new Foo(args);
+use:
+	Foo*	f = UT_NEW(Foo(args), mem_key_some);
+Upon failure to allocate the memory, this macro may return NULL. It
+will not throw exceptions. After successfull allocation the returned
+pointer must be passed to UT_DELETE() when no longer needed.
+@param[in]	expr	any expression that could follow "new"
+@param[in]	key	performance schema memory tracing key
+@return pointer to the created object or NULL */
+#define UT_NEW(expr, key) \
+	/* Placement new will return NULL and not attempt to construct an
+	object if the passed in pointer is NULL, e.g. if allocate() has
+	failed to allocate memory and has returned NULL. */ \
+	::new(ut_allocator<byte>(key).allocate( \
+		sizeof expr, NULL, __FILE__, false, false)) expr
+
+/** Allocate, trace the allocation and construct an object.
+Use this macro instead of 'new' within InnoDB and instead of UT_NEW()
+when creating a dedicated memory key is not feasible.
+For example: instead of
+	Foo*	f = new Foo(args);
+use:
+	Foo*	f = UT_NEW_NOKEY(Foo(args));
+Upon failure to allocate the memory, this macro may return NULL. It
+will not throw exceptions. After successfull allocation the returned
+pointer must be passed to UT_DELETE() when no longer needed.
+@param[in]	expr	any expression that could follow "new"
+@return pointer to the created object or NULL */
+#define UT_NEW_NOKEY(expr)	UT_NEW(expr, PSI_NOT_INSTRUMENTED)
+
+/** Destroy, deallocate and trace the deallocation of an object created by
+UT_NEW() or UT_NEW_NOKEY().
+We can't instantiate ut_allocator without having the type of the object, thus
+we redirect this to a templated function. */
+#define UT_DELETE(ptr)		ut_delete(ptr)
+
+/** Destroy and account object created by UT_NEW() or UT_NEW_NOKEY().
+@param[in,out]	ptr	pointer to the object */
+template <typename T>
+inline
+void
+ut_delete(
+	T*	ptr)
+{
+	if (ptr == NULL) {
+		return;
+	}
+
+	ut_allocator<T>	allocator;
+
+	allocator.destroy(ptr);
+	allocator.deallocate(ptr);
+}
+
+/** Allocate and account 'n_elements' objects of type 'type'.
+Use this macro to allocate memory within InnoDB instead of 'new[]'.
+The returned pointer must be passed to UT_DELETE_ARRAY().
+@param[in]	type		type of objects being created
+@param[in]	n_elements	number of objects to create
+@param[in]	key		performance schema memory tracing key
+@return pointer to the first allocated object or NULL */
+#define UT_NEW_ARRAY(type, n_elements, key) \
+	ut_allocator<type>(key).new_array(n_elements, __FILE__)
+
+/** Allocate and account 'n_elements' objects of type 'type'.
+Use this macro to allocate memory within InnoDB instead of 'new[]' and
+instead of UT_NEW_ARRAY() when it is not feasible to create a dedicated key.
+@param[in]	type		type of objects being created
+@param[in]	n_elements	number of objects to create
+@return pointer to the first allocated object or NULL */
+#define UT_NEW_ARRAY_NOKEY(type, n_elements) \
+	UT_NEW_ARRAY(type, n_elements, PSI_NOT_INSTRUMENTED)
+
+/** Destroy, deallocate and trace the deallocation of an array created by
+UT_NEW_ARRAY() or UT_NEW_ARRAY_NOKEY().
+We can't instantiate ut_allocator without having the type of the object, thus
+we redirect this to a templated function. */
+#define UT_DELETE_ARRAY(ptr)	ut_delete_array(ptr)
+
+/** Destroy and account objects created by UT_NEW_ARRAY() or
+UT_NEW_ARRAY_NOKEY().
+@param[in,out]	ptr	pointer to the first object in the array */
+template <typename T>
+inline
+void
+ut_delete_array(
+	T*	ptr)
+{
+	ut_allocator<T>().delete_array(ptr);
+}
+
+#define ut_malloc(n_bytes, key)		static_cast<void*>( \
+	ut_allocator<byte>(key).allocate( \
+		n_bytes, NULL, __FILE__, false, false))
+
+#define ut_zalloc(n_bytes, key)		static_cast<void*>( \
+	ut_allocator<byte>(key).allocate( \
+		n_bytes, NULL, __FILE__, true, false))
+
+#define ut_malloc_nokey(n_bytes)	static_cast<void*>( \
+	ut_allocator<byte>(PSI_NOT_INSTRUMENTED).allocate( \
+		n_bytes, NULL, __FILE__, false, false))
+
+#define ut_zalloc_nokey(n_bytes)	static_cast<void*>( \
+	ut_allocator<byte>(PSI_NOT_INSTRUMENTED).allocate( \
+		n_bytes, NULL, __FILE__, true, false))
+
+#define ut_zalloc_nokey_nofatal(n_bytes)	static_cast<void*>( \
+	ut_allocator<byte>(PSI_NOT_INSTRUMENTED). \
+		set_oom_not_fatal(). \
+		allocate(n_bytes, NULL, __FILE__, true, false))
+
+#define ut_realloc(ptr, n_bytes)	static_cast<void*>( \
+	ut_allocator<byte>(PSI_NOT_INSTRUMENTED).reallocate( \
+		ptr, n_bytes, __FILE__))
+
+#define ut_free(ptr)	ut_allocator<byte>(PSI_NOT_INSTRUMENTED).deallocate( \
+	reinterpret_cast<byte*>(ptr))
+
+#else /* UNIV_PFS_MEMORY */
+
+/* Fallbacks when memory tracing is disabled at compile time. */
+
+#define UT_NEW(expr, key)		::new(std::nothrow) expr
+#define UT_NEW_NOKEY(expr)		::new(std::nothrow) expr
+#define UT_DELETE(ptr)			::delete ptr
+
+#define UT_NEW_ARRAY(type, n_elements, key) \
+	::new(std::nothrow) type[n_elements]
+
+#define UT_NEW_ARRAY_NOKEY(type, n_elements) \
+	::new(std::nothrow) type[n_elements]
+
+#define UT_DELETE_ARRAY(ptr)		::delete[] ptr
+
+#define ut_malloc(n_bytes, key)		::malloc(n_bytes)
+
+#define ut_zalloc(n_bytes, key)		::calloc(1, n_bytes)
+
+#define ut_malloc_nokey(n_bytes)	::malloc(n_bytes)
+
+#define ut_zalloc_nokey(n_bytes)	::calloc(1, n_bytes)
+
+#define ut_zalloc_nokey_nofatal(n_bytes)	::calloc(1, n_bytes)
+
+#define ut_realloc(ptr, n_bytes)	::realloc(ptr, n_bytes)
+
+#define ut_free(ptr)			::free(ptr)
+
+#endif /* UNIV_PFS_MEMORY */
+
+#endif /* ut0new_h */
diff --git a/storage/innobase/include/ut0pool.h b/storage/innobase/include/ut0pool.h
new file mode 100644
index 00000000000..f60608bf6c6
--- /dev/null
+++ b/storage/innobase/include/ut0pool.h
@@ -0,0 +1,366 @@
+/*****************************************************************************
+
+Copyright (c) 2013, 2014, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/ut0pool.h
+Object pool.
+
+Created 2012-Feb-26 Sunny Bains
+***********************************************************************/
+
+#ifndef ut0pool_h
+#define ut0pool_h
+
+#include <vector>
+#include <queue>
+#include <functional>
+
+#include "ut0new.h"
+
+/** Allocate the memory for the object in blocks. We keep the objects sorted
+on pointer so that they are closer together in case they have to be iterated
+over in a list. */
+template <typename Type, typename Factory, typename LockStrategy>
+struct Pool {
+
+	typedef Type value_type;
+
+	// FIXME: Add an assertion to check alignment and offset is
+	// as we expect it. Also, sizeof(void*) can be 8, can we impove on this.
+	struct Element {
+		Pool*		m_pool;
+		value_type	m_type;
+	};
+
+	/** Constructor
+	@param size size of the memory block */
+	Pool(size_t size)
+		:
+		m_end(),
+		m_start(),
+		m_size(size),
+		m_last()
+	{
+		ut_a(size >= sizeof(Element));
+
+		m_lock_strategy.create();
+
+		ut_a(m_start == 0);
+
+		m_start = reinterpret_cast<Element*>(ut_zalloc_nokey(m_size));
+
+		m_last = m_start;
+
+		m_end = &m_start[m_size / sizeof(*m_start)];
+
+		/* Note: Initialise only a small subset, even though we have
+		allocated all the memory. This is required only because PFS
+		(MTR) results change if we instantiate too many mutexes up
+		front. */
+
+		init(ut_min(size_t(16), size_t(m_end - m_start)));
+
+		ut_ad(m_pqueue.size() <= size_t(m_last - m_start));
+	}
+
+	/** Destructor */
+	~Pool()
+	{
+		m_lock_strategy.destroy();
+
+		for (Element* elem = m_start; elem != m_last; ++elem) {
+
+			ut_ad(elem->m_pool == this);
+			Factory::destroy(&elem->m_type);
+		}
+
+		ut_free(m_start);
+		m_end = m_last = m_start = 0;
+		m_size = 0;
+	}
+
+	/** Get an object from the pool.
+	@retrun a free instance or NULL if exhausted. */
+	Type*	get()
+	{
+		Element*	elem;
+
+		m_lock_strategy.enter();
+
+		if (!m_pqueue.empty()) {
+
+			elem = m_pqueue.top();
+			m_pqueue.pop();
+
+		} else if (m_last < m_end) {
+
+			/* Initialise the remaining elements. */
+			init(m_end - m_last);
+
+			ut_ad(!m_pqueue.empty());
+
+			elem = m_pqueue.top();
+			m_pqueue.pop();
+		} else {
+			elem = NULL;
+		}
+
+		m_lock_strategy.exit();
+
+		return(elem != NULL ? &elem->m_type : 0);
+	}
+
+	/** Add the object to the pool.
+	@param ptr object to free */
+	static void mem_free(value_type* ptr)
+	{
+		Element*	elem;
+		byte*		p = reinterpret_cast<byte*>(ptr + 1);
+
+		elem = reinterpret_cast<Element*>(p - sizeof(*elem));
+
+		elem->m_pool->put(elem);
+	}
+
+protected:
+	// Disable copying
+	Pool(const Pool&);
+	Pool& operator=(const Pool&);
+
+private:
+
+	/* We only need to compare on pointer address. */
+	typedef std::priority_queue<
+		Element*,
+		std::vector<Element*, ut_allocator<Element*> >,
+		std::greater<Element*> >	pqueue_t;
+
+	/** Release the object to the free pool
+	@param elem element to free */
+	void put(Element* elem)
+	{
+		m_lock_strategy.enter();
+
+		ut_ad(elem >= m_start && elem < m_last);
+
+		ut_ad(Factory::debug(&elem->m_type));
+
+		m_pqueue.push(elem);
+
+		m_lock_strategy.exit();
+	}
+
+	/** Initialise the elements.
+	@param n_elems Number of elements to initialise */
+	void init(size_t n_elems)
+	{
+		ut_ad(size_t(m_end - m_last) >= n_elems);
+
+		for (size_t i = 0; i < n_elems; ++i, ++m_last) {
+
+			m_last->m_pool = this;
+			Factory::init(&m_last->m_type);
+			m_pqueue.push(m_last);
+		}
+
+		ut_ad(m_last <= m_end);
+	}
+
+private:
+	/** Pointer to the last element */
+	Element*		m_end;
+
+	/** Pointer to the first element */
+	Element*		m_start;
+
+	/** Size of the block in bytes */
+	size_t			m_size;
+
+	/** Upper limit of used space */
+	Element*		m_last;
+
+	/** Priority queue ordered on the pointer addresse. */
+	pqueue_t		m_pqueue;
+
+	/** Lock strategy to use */
+	LockStrategy		m_lock_strategy;
+};
+
+template <typename Pool, typename LockStrategy>
+struct PoolManager {
+
+	typedef Pool PoolType;
+	typedef typename PoolType::value_type value_type;
+
+	PoolManager(size_t size)
+		:
+		m_size(size)
+	{
+		create();
+	}
+
+	~PoolManager()
+	{
+		destroy();
+
+		ut_a(m_pools.empty());
+	}
+
+	/** Get an element from one of the pools.
+	@return instance or NULL if pool is empty. */
+	value_type* get()
+	{
+		size_t		index = 0;
+		size_t		delay = 1;
+		value_type*	ptr = NULL;
+
+		do {
+			m_lock_strategy.enter();
+
+			ut_ad(!m_pools.empty());
+
+			size_t	n_pools = m_pools.size();
+
+			PoolType*	pool = m_pools[index % n_pools];
+
+			m_lock_strategy.exit();
+
+			ptr = pool->get();
+
+			if (ptr == 0 && (index / n_pools) > 2) {
+
+				if (!add_pool(n_pools)) {
+
+					ib::error() << "Failed to allocate"
+						" memory for a pool of size "
+						<< m_size << " bytes. Will"
+						" wait for " << delay
+						<< " seconds for a thread to"
+						" free a resource";
+
+					/* There is nothing much we can do
+					except crash and burn, however lets
+					be a little optimistic and wait for
+					a resource to be freed. */
+					os_thread_sleep(delay * 1000000);
+
+					if (delay < 32) {
+						delay <<= 1;
+					}
+
+				} else {
+					delay = 1;
+				}
+			}
+
+			++index;
+
+		} while (ptr == NULL);
+
+		return(ptr);
+	}
+
+	static void mem_free(value_type* ptr)
+	{
+		PoolType::mem_free(ptr);
+	}
+
+private:
+	/** Add a new pool
+	@param n_pools Number of pools that existed when the add pool was
+			called.
+	@return true on success */
+	bool add_pool(size_t n_pools)
+	{
+		bool	added = false;
+
+		m_lock_strategy.enter();
+
+		if (n_pools < m_pools.size()) {
+			/* Some other thread already added a pool. */
+			added = true;
+		} else {
+			PoolType*	pool;
+
+			ut_ad(n_pools == m_pools.size());
+
+			pool = UT_NEW_NOKEY(PoolType(m_size));
+
+			if (pool != NULL) {
+
+				ut_ad(n_pools <= m_pools.size());
+
+				m_pools.push_back(pool);
+
+				ib::info() << "Number of pools: "
+					<< m_pools.size();
+
+				added = true;
+			}
+		}
+
+		ut_ad(n_pools < m_pools.size() || !added);
+
+		m_lock_strategy.exit();
+
+		return(added);
+	}
+
+	/** Create the pool manager. */
+	void create()
+	{
+		ut_a(m_size > sizeof(value_type));
+		m_lock_strategy.create();
+
+		add_pool(0);
+	}
+
+	/** Release the resources. */
+	void destroy()
+	{
+		typename Pools::iterator it;
+		typename Pools::iterator end = m_pools.end();
+
+		for (it = m_pools.begin(); it != end; ++it) {
+			PoolType*	pool = *it;
+
+			UT_DELETE(pool);
+		}
+
+		m_pools.clear();
+
+		m_lock_strategy.destroy();
+	}
+private:
+	// Disable copying
+	PoolManager(const PoolManager&);
+	PoolManager& operator=(const PoolManager&);
+
+	typedef std::vector<PoolType*, ut_allocator<PoolType*> >	Pools;
+
+	/** Size of each block */
+	size_t		m_size;
+
+	/** Pools managed this manager */
+	Pools		m_pools;
+
+	/** Lock strategy to use */
+	LockStrategy		m_lock_strategy;
+};
+
+#endif /* ut0pool_h */
diff --git a/storage/innobase/include/ut0rbt.h b/storage/innobase/include/ut0rbt.h
index 59e3fc94598..9555fe6137c 100644
--- a/storage/innobase/include/ut0rbt.h
+++ b/storage/innobase/include/ut0rbt.h
@@ -1,6 +1,6 @@
-/***************************************************************************//**
+/*****************************************************************************
 
-Copyright (c) 2007, 2010, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -111,15 +111,13 @@ struct ib_rbt_bound_t {
 
 /**********************************************************************//**
 Free an instance of  a red black tree */
-UNIV_INTERN
 void
 rbt_free(
 /*=====*/
 	ib_rbt_t*	tree);			/*!< in: rb tree to free */
 /**********************************************************************//**
 Create an instance of a red black tree
-@return	rb tree instance */
-UNIV_INTERN
+@return rb tree instance */
 ib_rbt_t*
 rbt_create(
 /*=======*/
@@ -128,8 +126,7 @@ rbt_create(
 /**********************************************************************//**
 Create an instance of a red black tree, whose comparison function takes
 an argument
-@return	rb tree instance */
-UNIV_INTERN
+@return rb tree instance */
 ib_rbt_t*
 rbt_create_arg_cmp(
 /*===============*/
@@ -139,7 +136,6 @@ rbt_create_arg_cmp(
 	void*	cmp_arg);		/*!< in: compare fn arg */
 /**********************************************************************//**
 Delete a node from the red black tree, identified by key */
-UNIV_INTERN
 ibool
 rbt_delete(
 /*=======*/
@@ -149,8 +145,7 @@ rbt_delete(
 /**********************************************************************//**
 Remove a node from the red black tree, NOTE: This function will not delete
 the node instance, THAT IS THE CALLERS RESPONSIBILITY.
-@return	the deleted node with the const. */
-UNIV_INTERN
+@return the deleted node with the const. */
 ib_rbt_node_t*
 rbt_remove_node(
 /*============*/
@@ -163,8 +158,7 @@ rbt_remove_node(
 /**********************************************************************//**
 Return a node from the red black tree, identified by
 key, NULL if not found
-@return	node if found else return NULL */
-UNIV_INTERN
+@return node if found else return NULL */
 const ib_rbt_node_t*
 rbt_lookup(
 /*=======*/
@@ -172,8 +166,7 @@ rbt_lookup(
 	const void*	key);			/*!< in: key to lookup */
 /**********************************************************************//**
 Add data to the red black tree, identified by key (no dups yet!)
-@return	inserted node */
-UNIV_INTERN
+@return inserted node */
 const ib_rbt_node_t*
 rbt_insert(
 /*=======*/
@@ -183,8 +176,7 @@ rbt_insert(
 						copied to the node.*/
 /**********************************************************************//**
 Add a new node to the tree, useful for data that is pre-sorted.
-@return	appended node */
-UNIV_INTERN
+@return appended node */
 const ib_rbt_node_t*
 rbt_add_node(
 /*=========*/
@@ -194,24 +186,21 @@ rbt_add_node(
 						to the node */
 /**********************************************************************//**
 Return the left most data node in the tree
-@return	left most node */
-UNIV_INTERN
+@return left most node */
 const ib_rbt_node_t*
 rbt_first(
 /*======*/
 	const ib_rbt_t*	tree);			/*!< in: rb tree */
 /**********************************************************************//**
 Return the right most data node in the tree
-@return	right most node */
-UNIV_INTERN
+@return right most node */
 const ib_rbt_node_t*
 rbt_last(
 /*=====*/
 	const ib_rbt_t*	tree);			/*!< in: rb tree */
 /**********************************************************************//**
 Return the next node from current.
-@return	successor node to current that is passed in. */
-UNIV_INTERN
+@return successor node to current that is passed in. */
 const ib_rbt_node_t*
 rbt_next(
 /*=====*/
@@ -220,8 +209,7 @@ rbt_next(
 			current);
 /**********************************************************************//**
 Return the prev node from current.
-@return	precedessor node to current that is passed in */
-UNIV_INTERN
+@return precedessor node to current that is passed in */
 const ib_rbt_node_t*
 rbt_prev(
 /*=====*/
@@ -230,8 +218,7 @@ rbt_prev(
 			current);
 /**********************************************************************//**
 Find the node that has the lowest key that is >= key.
-@return	node that satisfies the lower bound constraint or NULL */
-UNIV_INTERN
+@return node that satisfies the lower bound constraint or NULL */
 const ib_rbt_node_t*
 rbt_lower_bound(
 /*============*/
@@ -239,8 +226,7 @@ rbt_lower_bound(
 	const void*	key);			/*!< in: key to search */
 /**********************************************************************//**
 Find the node that has the greatest key that is <= key.
-@return	node that satisifies the upper bound constraint or NULL */
-UNIV_INTERN
+@return node that satisifies the upper bound constraint or NULL */
 const ib_rbt_node_t*
 rbt_upper_bound(
 /*============*/
@@ -250,8 +236,7 @@ rbt_upper_bound(
 Search for the key, a node will be retuned in parent.last, whether it
 was found or not. If not found then parent.last will contain the
 parent node for the possibly new key otherwise the matching node.
-@return	result of last comparison */
-UNIV_INTERN
+@return result of last comparison */
 int
 rbt_search(
 /*=======*/
@@ -262,8 +247,7 @@ rbt_search(
 Search for the key, a node will be retuned in parent.last, whether it
 was found or not. If not found then parent.last will contain the
 parent node for the possibly new key otherwise the matching node.
-@return	result of last comparison */
-UNIV_INTERN
+@return result of last comparison */
 int
 rbt_search_cmp(
 /*===========*/
@@ -276,15 +260,13 @@ rbt_search_cmp(
 						with argument */
 /**********************************************************************//**
 Clear the tree, deletes (and free's) all the nodes. */
-UNIV_INTERN
 void
 rbt_clear(
 /*======*/
 	ib_rbt_t*	tree);			/*!< in: rb tree */
 /**********************************************************************//**
 Merge the node from dst into src. Return the number of nodes merged.
-@return	no. of recs merged */
-UNIV_INTERN
+@return no. of recs merged */
 ulint
 rbt_merge_uniq(
 /*===========*/
@@ -296,8 +278,7 @@ Delete the nodes from src after copying node to dst. As a side effect
 the duplicates will be left untouched in the src, since we don't support
 duplicates (yet). NOTE: src and dst must be similar, the function doesn't
 check for this condition (yet).
-@return	no. of recs merged */
-UNIV_INTERN
+@return no. of recs merged */
 ulint
 rbt_merge_uniq_destructive(
 /*=======================*/
@@ -306,15 +287,13 @@ rbt_merge_uniq_destructive(
 /**********************************************************************//**
 Verify the integrity of the RB tree. For debugging. 0 failure else height
 of tree (in count of black nodes).
-@return	TRUE if OK FALSE if tree invalid. */
-UNIV_INTERN
+@return TRUE if OK FALSE if tree invalid. */
 ibool
 rbt_validate(
 /*=========*/
 	const ib_rbt_t*	tree);			/*!< in: tree to validate */
 /**********************************************************************//**
 Iterate over the tree in depth first order. */
-UNIV_INTERN
 void
 rbt_print(
 /*======*/
diff --git a/storage/innobase/include/ut0rnd.h b/storage/innobase/include/ut0rnd.h
index 6ed3ee3b2e5..aa5b4b6745d 100644
--- a/storage/innobase/include/ut0rnd.h
+++ b/storage/innobase/include/ut0rnd.h
@@ -45,7 +45,7 @@ ut_rnd_set_seed(
 	ulint	 seed);		 /*!< in: seed */
 /********************************************************//**
 The following function generates a series of 'random' ulint integers.
-@return	the next 'random' number */
+@return the next 'random' number */
 UNIV_INLINE
 ulint
 ut_rnd_gen_next_ulint(
@@ -56,32 +56,26 @@ The following function generates 'random' ulint integers which
 enumerate the value space (let there be N of them) of ulint integers
 in a pseudo-random fashion. Note that the same integer is repeated
 always after N calls to the generator.
-@return	the 'random' number */
+@return the 'random' number */
 UNIV_INLINE
 ulint
 ut_rnd_gen_ulint(void);
 /*==================*/
 /********************************************************//**
 Generates a random integer from a given interval.
-@return	the 'random' number */
+@return the 'random' number */
 UNIV_INLINE
 ulint
 ut_rnd_interval(
 /*============*/
 	ulint	low,	/*!< in: low limit; can generate also this value */
 	ulint	high);	/*!< in: high limit; can generate also this value */
-/*********************************************************//**
-Generates a random iboolean value.
-@return	the random value */
-UNIV_INLINE
-ibool
-ut_rnd_gen_ibool(void);
-/*=================*/
+
 /*******************************************************//**
 The following function generates a hash value for a ulint integer
 to a hash table of size table_size, which should be a prime or some
 random number to work reliably.
-@return	hash value */
+@return hash value */
 UNIV_INLINE
 ulint
 ut_hash_ulint(
@@ -90,7 +84,7 @@ ut_hash_ulint(
 	ulint	 table_size);	/*!< in: hash table size */
 /*************************************************************//**
 Folds a 64-bit integer.
-@return	folded value */
+@return folded value */
 UNIV_INLINE
 ulint
 ut_fold_ull(
@@ -99,18 +93,17 @@ ut_fold_ull(
 	MY_ATTRIBUTE((const));
 /*************************************************************//**
 Folds a character string ending in the null character.
-@return	folded value */
+@return folded value */
 UNIV_INLINE
 ulint
 ut_fold_string(
 /*===========*/
 	const char*	str)	/*!< in: null-terminated string */
-	MY_ATTRIBUTE((pure));
+	MY_ATTRIBUTE((warn_unused_result));
 /***********************************************************//**
 Looks for a prime number slightly greater than the given argument.
 The prime is chosen so that it is not near any power of 2.
-@return	prime */
-UNIV_INTERN
+@return prime */
 ulint
 ut_find_prime(
 /*==========*/
@@ -121,7 +114,7 @@ ut_find_prime(
 
 /*************************************************************//**
 Folds a pair of ulints.
-@return	folded value */
+@return folded value */
 UNIV_INLINE
 ulint
 ut_fold_ulint_pair(
@@ -131,7 +124,7 @@ ut_fold_ulint_pair(
 	MY_ATTRIBUTE((const));
 /*************************************************************//**
 Folds a binary string.
-@return	folded value */
+@return folded value */
 UNIV_INLINE
 ulint
 ut_fold_binary(
diff --git a/storage/innobase/include/ut0rnd.ic b/storage/innobase/include/ut0rnd.ic
index 024c59e553b..503c9482ea3 100644
--- a/storage/innobase/include/ut0rnd.ic
+++ b/storage/innobase/include/ut0rnd.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -54,7 +54,7 @@ ut_rnd_set_seed(
 
 /********************************************************//**
 The following function generates a series of 'random' ulint integers.
-@return	the next 'random' number */
+@return the next 'random' number */
 UNIV_INLINE
 ulint
 ut_rnd_gen_next_ulint(
@@ -81,7 +81,7 @@ The following function generates 'random' ulint integers which
 enumerate the value space of ulint integers in a pseudo random
 fashion. Note that the same integer is repeated always after
 2 to power 32 calls to the generator (if ulint is 32-bit).
-@return	the 'random' number */
+@return the 'random' number */
 UNIV_INLINE
 ulint
 ut_rnd_gen_ulint(void)
@@ -98,7 +98,7 @@ ut_rnd_gen_ulint(void)
 
 /********************************************************//**
 Generates a random integer from a given interval.
-@return	the 'random' number */
+@return the 'random' number */
 UNIV_INLINE
 ulint
 ut_rnd_interval(
@@ -120,31 +120,11 @@ ut_rnd_interval(
 	return(low + (rnd % (high - low)));
 }
 
-/*********************************************************//**
-Generates a random iboolean value.
-@return	the random value */
-UNIV_INLINE
-ibool
-ut_rnd_gen_ibool(void)
-/*=================*/
-{
-	ulint	 x;
-
-	x = ut_rnd_gen_ulint();
-
-	if (((x >> 20) + (x >> 15)) & 1) {
-
-		return(TRUE);
-	}
-
-	return(FALSE);
-}
-
 /*******************************************************//**
 The following function generates a hash value for a ulint integer
 to a hash table of size table_size, which should be a prime
 or some random number for the hash table to work reliably.
-@return	hash value */
+@return hash value */
 UNIV_INLINE
 ulint
 ut_hash_ulint(
@@ -160,7 +140,7 @@ ut_hash_ulint(
 
 /*************************************************************//**
 Folds a 64-bit integer.
-@return	folded value */
+@return folded value */
 UNIV_INLINE
 ulint
 ut_fold_ull(
@@ -173,7 +153,7 @@ ut_fold_ull(
 
 /*************************************************************//**
 Folds a character string ending in the null character.
-@return	folded value */
+@return folded value */
 UNIV_INLINE
 ulint
 ut_fold_string(
@@ -196,7 +176,7 @@ ut_fold_string(
 
 /*************************************************************//**
 Folds a pair of ulints.
-@return	folded value */
+@return folded value */
 UNIV_INLINE
 ulint
 ut_fold_ulint_pair(
@@ -210,7 +190,7 @@ ut_fold_ulint_pair(
 
 /*************************************************************//**
 Folds a binary string.
-@return	folded value */
+@return folded value */
 UNIV_INLINE
 ulint
 ut_fold_binary(
diff --git a/storage/innobase/include/ut0stage.h b/storage/innobase/include/ut0stage.h
new file mode 100644
index 00000000000..1cccb0b8f84
--- /dev/null
+++ b/storage/innobase/include/ut0stage.h
@@ -0,0 +1,594 @@
+/*****************************************************************************
+
+Copyright (c) 2014, 2015, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file ut/ut0stage.h
+Supplementary code to performance schema stage instrumentation.
+
+Created Nov 12, 2014 Vasil Dimov
+*******************************************************/
+
+#ifndef ut0stage_h
+#define ut0stage_h
+
+#include <algorithm>
+#include <math.h>
+
+#include "my_global.h" /* needed for headers from mysql/psi/ */
+
+#include "mysql/psi/mysql_stage.h" /* mysql_stage_inc_work_completed */
+#include "mysql/psi/psi.h" /* HAVE_PSI_STAGE_INTERFACE, PSI_stage_progress */
+
+#include "univ.i"
+
+#include "dict0mem.h" /* dict_index_t */
+#include "row0log.h" /* row_log_estimate_work() */
+#include "srv0srv.h" /* ut_stage_alter_t */
+
+#ifdef HAVE_PSI_STAGE_INTERFACE
+
+typedef void PSI_stage_progress;
+
+/** Class used to report ALTER TABLE progress via performance_schema.
+The only user of this class is the ALTER TABLE code and it calls the methods
+in the following order
+constructor
+begin_phase_read_pk()
+  multiple times:
+    n_pk_recs_inc() // once per record read
+    inc() // once per page read
+end_phase_read_pk()
+if any new indexes are being added, for each one:
+  begin_phase_sort()
+    multiple times:
+      inc() // once per record sorted
+  begin_phase_insert()
+    multiple times:
+      inc() // once per record inserted
+  being_phase_log_index()
+    multiple times:
+      inc() // once per log-block applied
+begin_phase_flush()
+    multiple times:
+      inc() // once per page flushed
+begin_phase_log_table()
+    multiple times:
+      inc() // once per log-block applied
+begin_phase_end()
+destructor
+
+This class knows the specifics of each phase and tries to increment the
+progress in an even manner across the entire ALTER TABLE lifetime. */
+class ut_stage_alter_t {
+public:
+	/** Constructor.
+	@param[in]	pk	primary key of the old table */
+	explicit
+	ut_stage_alter_t(
+		const dict_index_t*	pk)
+		:
+		m_progress(NULL),
+		m_pk(pk),
+		m_n_pk_recs(0),
+		m_n_pk_pages(0),
+		m_n_recs_processed(0),
+		m_n_flush_pages(0),
+		m_cur_phase(NOT_STARTED)
+	{
+	}
+
+	/** Destructor. */
+	~ut_stage_alter_t();
+
+	/** Flag an ALTER TABLE start (read primary key phase).
+	@param[in]	n_sort_indexes	number of indexes that will be sorted
+	during ALTER TABLE, used for estimating the total work to be done */
+	void
+	begin_phase_read_pk(
+		ulint	n_sort_indexes);
+
+	/** Increment the number of records in PK (table) with 1.
+	This is used to get more accurate estimate about the number of
+	records per page which is needed because some phases work on
+	per-page basis while some work on per-record basis and we want
+	to get the progress as even as possible. */
+	void
+	n_pk_recs_inc();
+
+	/** Flag either one record or one page processed, depending on the
+	current phase.
+	@param[in]	inc_val	flag this many units processed at once */
+	void
+	inc(
+		ulint	inc_val = 1);
+
+	/** Flag the end of reading of the primary key.
+	Here we know the exact number of pages and records and calculate
+	the number of records per page and refresh the estimate. */
+	void
+	end_phase_read_pk();
+
+	/** Flag the beginning of the sort phase.
+	@param[in]	sort_multi_factor	since merge sort processes
+	one page more than once we only update the estimate once per this
+	many pages processed. */
+	void
+	begin_phase_sort(
+		double	sort_multi_factor);
+
+	/** Flag the beginning of the insert phase. */
+	void
+	begin_phase_insert();
+
+	/** Flag the beginning of the flush phase.
+	@param[in]	n_flush_pages	this many pages are going to be
+	flushed */
+	void
+	begin_phase_flush(
+		ulint	n_flush_pages);
+
+	/** Flag the beginning of the log index phase. */
+	void
+	begin_phase_log_index();
+
+	/** Flag the beginning of the log table phase. */
+	void
+	begin_phase_log_table();
+
+	/** Flag the beginning of the end phase. */
+	void
+	begin_phase_end();
+
+private:
+
+	/** Update the estimate of total work to be done. */
+	void
+	reestimate();
+
+	/** Change the current phase.
+	@param[in]	new_stage	pointer to the new stage to change to */
+	void
+	change_phase(
+		const PSI_stage_info*	new_stage);
+
+	/** Performance schema accounting object. */
+	/* TODO: MySQL 5.7 PSI */
+	PSI_stage_progress*	m_progress;
+
+	/** Old table PK. Used for calculating the estimate. */
+	const dict_index_t*	m_pk;
+
+	/** Number of records in the primary key (table), including delete
+	marked records. */
+	ulint			m_n_pk_recs;
+
+	/** Number of leaf pages in the primary key. */
+	ulint			m_n_pk_pages;
+
+	/** Estimated number of records per page in the primary key. */
+	double			m_n_recs_per_page;
+
+	/** Number of indexes that are being added. */
+	ulint			m_n_sort_indexes;
+
+	/** During the sort phase, increment the counter once per this
+	many pages processed. This is because sort processes one page more
+	than once. */
+	ulint			m_sort_multi_factor;
+
+	/** Number of records processed during sort & insert phases. We
+	need to increment the counter only once page, or once per
+	recs-per-page records. */
+	ulint			m_n_recs_processed;
+
+	/** Number of pages to flush. */
+	ulint			m_n_flush_pages;
+
+	/** Current phase. */
+	enum {
+		NOT_STARTED = 0,
+		READ_PK = 1,
+		SORT = 2,
+		INSERT = 3,
+		FLUSH = 4,
+		/* JAN: TODO: MySQL 5.7 vrs. MariaDB sql/log.h
+		LOG_INDEX = 5,
+		LOG_TABLE = 6, */
+		LOG_INNODB_INDEX = 5,
+		LOG_INNODB_TABLE = 6,
+		END = 7,
+	}			m_cur_phase;
+};
+
+/** Destructor. */
+inline
+ut_stage_alter_t::~ut_stage_alter_t()
+{
+	if (m_progress == NULL) {
+		return;
+	}
+
+	/* TODO: MySQL 5.7 PSI: Set completed = estimated before we quit.
+	mysql_stage_set_work_completed(
+		m_progress,
+		mysql_stage_get_work_estimated(m_progress));
+
+	mysql_end_stage();
+	*/
+}
+
+/** Flag an ALTER TABLE start (read primary key phase).
+@param[in]	n_sort_indexes	number of indexes that will be sorted
+during ALTER TABLE, used for estimating the total work to be done */
+inline
+void
+ut_stage_alter_t::begin_phase_read_pk(
+	ulint	n_sort_indexes)
+{
+	m_n_sort_indexes = n_sort_indexes;
+
+	m_cur_phase = READ_PK;
+
+	/* TODO: MySQL 5.7 PSI
+	m_progress = mysql_set_stage(
+		srv_stage_alter_table_read_pk_internal_sort.m_key);
+
+	mysql_stage_set_work_completed(m_progress, 0);
+	*/
+	reestimate();
+}
+
+/** Increment the number of records in PK (table) with 1.
+This is used to get more accurate estimate about the number of
+records per page which is needed because some phases work on
+per-page basis while some work on per-record basis and we want
+to get the progress as even as possible. */
+inline
+void
+ut_stage_alter_t::n_pk_recs_inc()
+{
+	m_n_pk_recs++;
+}
+
+/** Flag either one record or one page processed, depending on the
+current phase.
+@param[in]	inc_val	flag this many units processed at once */
+inline
+void
+ut_stage_alter_t::inc(
+	ulint	inc_val /* = 1 */)
+{
+	if (m_progress == NULL) {
+		return;
+	}
+
+	ulint	multi_factor = 1;
+	bool	should_proceed = true;
+
+	switch (m_cur_phase) {
+	case NOT_STARTED:
+		ut_error;
+	case READ_PK:
+		m_n_pk_pages++;
+		ut_ad(inc_val == 1);
+		/* Overall the read pk phase will read all the pages from the
+		PK and will do work, proportional to the number of added
+		indexes, thus when this is called once per read page we
+		increment with 1 + m_n_sort_indexes */
+		inc_val = 1 + m_n_sort_indexes;
+		break;
+	case SORT:
+		multi_factor = m_sort_multi_factor;
+		/* fall through */
+	case INSERT: {
+		/* Increment the progress every nth record. During
+		sort and insert phases, this method is called once per
+		record processed. We need fractional point numbers here
+		because "records per page" is such a number naturally and
+		to avoid rounding skew we want, for example: if there are
+		(double) N records per page, then the work_completed
+	        should be incremented on the inc() calls round(k*N),
+		for k=1,2,3... */
+		const double	every_nth = m_n_recs_per_page * multi_factor;
+
+		const ulint	k = static_cast<ulint>(
+			round(m_n_recs_processed / every_nth));
+
+		const ulint	nth = static_cast<ulint>(
+			round(k * every_nth));
+
+		should_proceed = m_n_recs_processed == nth;
+
+		m_n_recs_processed++;
+
+		break;
+	}
+	case FLUSH:
+		break;
+	/* JAN: TODO: MySQL 5.7
+	case LOG_INDEX:
+		break;
+	case LOG_TABLE:
+	break; */
+	case LOG_INNODB_INDEX:
+	case LOG_INNODB_TABLE:
+		break;
+	case END:
+		break;
+	}
+
+	if (should_proceed) {
+		/* TODO: MySQL 5.7 PSI
+		mysql_stage_inc_work_completed(m_progress, inc_val);
+		*/
+		reestimate();
+	}
+}
+
+/** Flag the end of reading of the primary key.
+Here we know the exact number of pages and records and calculate
+the number of records per page and refresh the estimate. */
+inline
+void
+ut_stage_alter_t::end_phase_read_pk()
+{
+	reestimate();
+
+	if (m_n_pk_pages == 0) {
+		/* The number of pages in the PK could be 0 if the tree is
+		empty. In this case we set m_n_recs_per_page to 1 to avoid
+		division by zero later. */
+		m_n_recs_per_page = 1.0;
+	} else {
+		m_n_recs_per_page = std::max(
+			static_cast<double>(m_n_pk_recs) / m_n_pk_pages,
+			1.0);
+	}
+}
+
+/** Flag the beginning of the sort phase.
+@param[in]	sort_multi_factor	since merge sort processes
+one page more than once we only update the estimate once per this
+many pages processed. */
+inline
+void
+ut_stage_alter_t::begin_phase_sort(
+	double	sort_multi_factor)
+{
+	if (sort_multi_factor <= 1.0) {
+		m_sort_multi_factor = 1;
+	} else {
+		m_sort_multi_factor = static_cast<ulint>(
+			round(sort_multi_factor));
+	}
+
+	change_phase(&srv_stage_alter_table_merge_sort);
+}
+
+/** Flag the beginning of the insert phase. */
+inline
+void
+ut_stage_alter_t::begin_phase_insert()
+{
+	change_phase(&srv_stage_alter_table_insert);
+}
+
+/** Flag the beginning of the flush phase.
+@param[in]	n_flush_pages	this many pages are going to be
+flushed */
+inline
+void
+ut_stage_alter_t::begin_phase_flush(
+	ulint	n_flush_pages)
+{
+	m_n_flush_pages = n_flush_pages;
+
+	reestimate();
+
+	change_phase(&srv_stage_alter_table_flush);
+}
+
+/** Flag the beginning of the log index phase. */
+inline
+void
+ut_stage_alter_t::begin_phase_log_index()
+{
+	change_phase(&srv_stage_alter_table_log_index);
+}
+
+/** Flag the beginning of the log table phase. */
+inline
+void
+ut_stage_alter_t::begin_phase_log_table()
+{
+	change_phase(&srv_stage_alter_table_log_table);
+}
+
+/** Flag the beginning of the end phase. */
+inline
+void
+ut_stage_alter_t::begin_phase_end()
+{
+	change_phase(&srv_stage_alter_table_end);
+}
+
+/** Update the estimate of total work to be done. */
+inline
+void
+ut_stage_alter_t::reestimate()
+{
+	if (m_progress == NULL) {
+		return;
+	}
+
+	/* During the log table phase we calculate the estimate as
+	work done so far + log size remaining. */
+	if (m_cur_phase == LOG_INNODB_TABLE) {
+		/* TODO: MySQL 5.7 PSI
+		mysql_stage_set_work_estimated(
+			m_progress,
+			mysql_stage_get_work_completed(m_progress)
+			+ row_log_estimate_work(m_pk));
+		*/
+		return;
+	}
+
+	/* During the other phases we use a formula, regardless of
+	how much work has been done so far. */
+
+	/* For number of pages in the PK - if the PK has not been
+	read yet, use stat_n_leaf_pages (approximate), otherwise
+	use the exact number we gathered. */
+	const ulint	n_pk_pages
+		= m_cur_phase != READ_PK
+		? m_n_pk_pages
+		: m_pk->stat_n_leaf_pages;
+
+	/* If flush phase has not started yet and we do not know how
+	many pages are to be flushed, then use a wild guess - the
+	number of pages in the PK / 2. */
+	if (m_n_flush_pages == 0) {
+		m_n_flush_pages = n_pk_pages / 2;
+	}
+
+	ulonglong	estimate __attribute__((unused))
+		= n_pk_pages
+		* (1 /* read PK */
+		   + m_n_sort_indexes /* row_merge_buf_sort() inside the
+				      read PK per created index */
+		   + m_n_sort_indexes * 2 /* sort & insert per created index */)
+		+ m_n_flush_pages
+		+ row_log_estimate_work(m_pk);
+
+	/* Prevent estimate < completed */
+	/* TODO: MySQL 5.7 PSI
+	estimate = std::max(estimate,
+			    mysql_stage_get_work_completed(m_progress));
+
+	mysql_stage_set_work_estimated(m_progress, estimate);
+	*/
+}
+
+/** Change the current phase.
+@param[in]	new_stage	pointer to the new stage to change to */
+inline
+void
+ut_stage_alter_t::change_phase(
+	const PSI_stage_info*	new_stage)
+{
+	if (m_progress == NULL) {
+		return;
+	}
+
+	if (new_stage == &srv_stage_alter_table_read_pk_internal_sort) {
+		m_cur_phase = READ_PK;
+	} else if (new_stage == &srv_stage_alter_table_merge_sort) {
+		m_cur_phase = SORT;
+	} else if (new_stage == &srv_stage_alter_table_insert) {
+		m_cur_phase = INSERT;
+	} else if (new_stage == &srv_stage_alter_table_flush) {
+		m_cur_phase = FLUSH;
+	/* JAN: TODO: MySQL 5.7 used LOG_INDEX and LOG_TABLE */
+	} else if (new_stage == &srv_stage_alter_table_log_index) {
+		m_cur_phase = LOG_INNODB_INDEX;
+	} else if (new_stage == &srv_stage_alter_table_log_table) {
+		m_cur_phase = LOG_INNODB_TABLE;
+	} else if (new_stage == &srv_stage_alter_table_end) {
+		m_cur_phase = END;
+	} else {
+		ut_error;
+	}
+
+	/* TODO: MySQL 5.7 PSI
+	const ulonglong	c = mysql_stage_get_work_completed(m_progress);
+	const ulonglong	e = mysql_stage_get_work_estimated(m_progress);
+
+	m_progress = mysql_set_stage(new_stage->m_key);
+
+	mysql_stage_set_work_completed(m_progress, c);
+	mysql_stage_set_work_estimated(m_progress, e);
+	*/
+}
+#else /* HAVE_PSI_STAGE_INTERFACE */
+
+class ut_stage_alter_t {
+public:
+	explicit
+	ut_stage_alter_t(
+		const dict_index_t*	pk)
+	{
+	}
+
+	void
+	begin_phase_read_pk(
+		ulint	n_sort_indexes)
+	{
+	}
+
+	void
+	n_pk_recs_inc()
+	{
+	}
+
+	void
+	inc(
+		ulint	inc_val = 1)
+	{
+	}
+
+	void
+	end_phase_read_pk()
+	{
+	}
+
+	void
+	begin_phase_sort(
+		double	sort_multi_factor)
+	{
+	}
+
+	void
+	begin_phase_insert()
+	{
+	}
+
+	void
+	begin_phase_flush(
+		ulint	n_flush_pages)
+	{
+	}
+
+	void
+	begin_phase_log_index()
+	{
+	}
+
+	void
+	begin_phase_log_table()
+	{
+	}
+
+	void
+	begin_phase_end()
+	{
+	}
+};
+
+#endif /* HAVE_PSI_STAGE_INTERFACE */
+
+#endif /* ut0stage_h */
diff --git a/storage/innobase/include/ut0ut.h b/storage/innobase/include/ut0ut.h
index 6786ad166e8..5a1c3989f4d 100644
--- a/storage/innobase/include/ut0ut.h
+++ b/storage/innobase/include/ut0ut.h
@@ -26,49 +26,35 @@ Created 1/20/1994 Heikki Tuuri
 #ifndef ut0ut_h
 #define ut0ut_h
 
-#include "univ.i"
+/* Do not include univ.i because univ.i includes this. */
+
+#include <ostream>
+#include <sstream>
 
 #ifndef UNIV_INNOCHECKSUM
 
 #include "db0err.h"
 
 #ifndef UNIV_HOTBACKUP
-# include "os0sync.h" /* for HAVE_ATOMIC_BUILTINS */
+# include "os0atomic.h"
 #endif /* UNIV_HOTBACKUP */
 
-#endif /* !UNIV_INNOCHECKSUM */
-
 #include <time.h>
+
 #ifndef MYSQL_SERVER
 #include <ctype.h>
-#endif
+#endif /* MYSQL_SERVER */
 
-#include <stdarg.h> /* for va_list */
+#include <stdarg.h>
 
 #include <string>
 
-/** Index name prefix in fast index creation */
-#define	TEMP_INDEX_PREFIX	'\377'
 /** Index name prefix in fast index creation, as a string constant */
 #define TEMP_INDEX_PREFIX_STR	"\377"
 
 /** Time stamp */
 typedef time_t	ib_time_t;
 
-/* In order to call a piece of code, when a function returns or when the
-scope ends, use this utility class.  It will invoke the given function
-object in its destructor. */
-template<typename F>
-struct ut_when_dtor {
-	ut_when_dtor(F& p) : f(p) {}
-	~ut_when_dtor() {
-		f();
-	}
-private:
-	F& f;
-};
-
-#ifndef UNIV_INNOCHECKSUM
 #ifndef UNIV_HOTBACKUP
 # if defined(HAVE_PAUSE_INSTRUCTION)
    /* According to the gcc info page, asm volatile means that the
@@ -83,7 +69,7 @@ private:
 
 # elif defined(HAVE_FAKE_PAUSE_INSTRUCTION)
 #  define UT_RELAX_CPU() __asm__ __volatile__ ("rep; nop")
-# elif defined(HAVE_WINDOWS_ATOMICS)
+# elif defined _WIN32
    /* In the Win32 API, the x86 PAUSE instruction is executed by calling
    the YieldProcessor macro defined in WinNT.h. It is a CPU architecture-
    independent way by using YieldProcessor. */
@@ -94,7 +80,18 @@ private:
      volatile lint      volatile_var = __ppc_get_timebase(); \
    } while (0)
 # else
-#  define UT_RELAX_CPU() ((void)0) /* avoid warning for an empty statement */
+#  define UT_RELAX_CPU() do { \
+     volatile lint	volatile_var; \
+     os_compare_and_swap_lint(&volatile_var, 0, 1); \
+   } while (0)
+# endif
+
+# if defined(HAVE_HMT_PRIORITY_INSTRUCTION)
+#  define UT_LOW_PRIORITY_CPU() __asm__ __volatile__ ("or 1,1,1")
+#  define UT_RESUME_PRIORITY_CPU() __asm__ __volatile__ ("or 2,2,2")
+# else
+#  define UT_LOW_PRIORITY_CPU() ((void)0)
+#  define UT_RESUME_PRIORITY_CPU() ((void)0)
 # endif
 
 #if defined (__GNUC__)
@@ -117,13 +114,13 @@ private:
 /*********************************************************************//**
 Delays execution for at most max_wait_us microseconds or returns earlier
 if cond becomes true.
-@param cond		in: condition to wait for; evaluated every 2 ms
-@param max_wait_us	in: maximum delay to wait, in microseconds */
+@param cond in: condition to wait for; evaluated every 2 ms
+@param max_wait_us in: maximum delay to wait, in microseconds */
 #define UT_WAIT_FOR(cond, max_wait_us)				\
 do {								\
-	ullint	start_us;					\
+	uintmax_t	start_us;					\
 	start_us = ut_time_us(NULL);				\
-	while (!(cond) 						\
+	while (!(cond)						\
 	       && ut_time_us(NULL) - start_us < (max_wait_us)) {\
 								\
 		os_thread_sleep(2000 /* 2 ms */);		\
@@ -131,95 +128,82 @@ do {								\
 } while (0)
 #endif /* !UNIV_HOTBACKUP */
 
-template <class T> T ut_min(T a, T b) { return(a < b ? a : b); }
-template <class T> T ut_max(T a, T b) { return(a > b ? a : b); }
+#define ut_max	std::max
+#define ut_min	std::min
 
-/******************************************************//**
-Calculates the minimum of two ulints.
-@return	minimum */
-UNIV_INLINE
-ulint
-ut_min(
-/*===*/
-	ulint	 n1,	/*!< in: first number */
-	ulint	 n2);	/*!< in: second number */
-/******************************************************//**
-Calculates the maximum of two ulints.
-@return	maximum */
-UNIV_INLINE
-ulint
-ut_max(
-/*===*/
-	ulint	 n1,	/*!< in: first number */
-	ulint	 n2);	/*!< in: second number */
-/****************************************************************//**
-Calculates minimum of two ulint-pairs. */
+/** Calculate the minimum of two pairs.
+@param[out]	min_hi	MSB of the minimum pair
+@param[out]	min_lo	LSB of the minimum pair
+@param[in]	a_hi	MSB of the first pair
+@param[in]	a_lo	LSB of the first pair
+@param[in]	b_hi	MSB of the second pair
+@param[in]	b_lo	LSB of the second pair */
 UNIV_INLINE
 void
 ut_pair_min(
-/*========*/
-	ulint*	a,	/*!< out: more significant part of minimum */
-	ulint*	b,	/*!< out: less significant part of minimum */
-	ulint	a1,	/*!< in: more significant part of first pair */
-	ulint	b1,	/*!< in: less significant part of first pair */
-	ulint	a2,	/*!< in: more significant part of second pair */
-	ulint	b2);	/*!< in: less significant part of second pair */
+	ulint*	min_hi,
+	ulint*	min_lo,
+	ulint	a_hi,
+	ulint	a_lo,
+	ulint	b_hi,
+	ulint	b_lo);
 /******************************************************//**
 Compares two ulints.
-@return	1 if a > b, 0 if a == b, -1 if a < b */
+@return 1 if a > b, 0 if a == b, -1 if a < b */
 UNIV_INLINE
 int
 ut_ulint_cmp(
 /*=========*/
 	ulint	a,	/*!< in: ulint */
 	ulint	b);	/*!< in: ulint */
-/*******************************************************//**
-Compares two pairs of ulints.
-@return	-1 if a < b, 0 if a == b, 1 if a > b */
+/** Compare two pairs of integers.
+@param[in]	a_h	more significant part of first pair
+@param[in]	a_l	less significant part of first pair
+@param[in]	b_h	more significant part of second pair
+@param[in]	b_l	less significant part of second pair
+@return comparison result of (a_h,a_l) and (b_h,b_l)
+@retval -1 if (a_h,a_l) is less than (b_h,b_l)
+@retval 0 if (a_h,a_l) is equal to (b_h,b_l)
+@retval 1 if (a_h,a_l) is greater than (b_h,b_l) */
 UNIV_INLINE
 int
 ut_pair_cmp(
-/*========*/
-	ulint	a1,	/*!< in: more significant part of first pair */
-	ulint	a2,	/*!< in: less significant part of first pair */
-	ulint	b1,	/*!< in: more significant part of second pair */
-	ulint	b2);	/*!< in: less significant part of second pair */
-#endif /* !UNIV_INNOCHECKSUM */
-/*************************************************************//**
-Determines if a number is zero or a power of two.
-@param n	in: number
-@return		nonzero if n is zero or a power of two; zero otherwise */
-#define ut_is_2pow(n) UNIV_LIKELY(!((n) & ((n) - 1)))
+	ulint	a_h,
+	ulint	a_l,
+	ulint	b_h,
+	ulint	b_l)
+	MY_ATTRIBUTE((warn_unused_result));
+
 /*************************************************************//**
 Calculates fast the remainder of n/m when m is a power of two.
-@param n	in: numerator
-@param m	in: denominator, must be a power of two
-@return		the remainder of n/m */
+@param n in: numerator
+@param m in: denominator, must be a power of two
+@return the remainder of n/m */
 #define ut_2pow_remainder(n, m) ((n) & ((m) - 1))
 /*************************************************************//**
 Calculates the biggest multiple of m that is not bigger than n
 when m is a power of two.  In other words, rounds n down to m * k.
-@param n	in: number to round down
-@param m	in: alignment, must be a power of two
-@return		n rounded down to the biggest possible integer multiple of m */
+@param n in: number to round down
+@param m in: alignment, must be a power of two
+@return n rounded down to the biggest possible integer multiple of m */
 #define ut_2pow_round(n, m) ((n) & ~((m) - 1))
 /** Align a number down to a multiple of a power of two.
-@param n	in: number to round down
-@param m	in: alignment, must be a power of two
-@return		n rounded down to the biggest possible integer multiple of m */
+@param n in: number to round down
+@param m in: alignment, must be a power of two
+@return n rounded down to the biggest possible integer multiple of m */
 #define ut_calc_align_down(n, m) ut_2pow_round(n, m)
 /********************************************************//**
 Calculates the smallest multiple of m that is not smaller than n
 when m is a power of two.  In other words, rounds n up to m * k.
-@param n	in: number to round up
-@param m	in: alignment, must be a power of two
-@return		n rounded up to the smallest possible integer multiple of m */
+@param n in: number to round up
+@param m in: alignment, must be a power of two
+@return n rounded up to the smallest possible integer multiple of m */
 #define ut_calc_align(n, m) (((n) + ((m) - 1)) & ~((m) - 1))
-#ifndef UNIV_INNOCHECKSUM
+
 /*************************************************************//**
 Calculates fast the 2-logarithm of a number, rounded upward to an
 integer.
-@return	logarithm in the base 2, rounded upward */
+@return logarithm in the base 2, rounded upward */
 UNIV_INLINE
 ulint
 ut_2_log(
@@ -227,7 +211,7 @@ ut_2_log(
 	ulint	n);	/*!< in: number */
 /*************************************************************//**
 Calculates 2 to power n.
-@return	2 to power n */
+@return 2 to power n */
 UNIV_INLINE
 ulint
 ut_2_exp(
@@ -235,28 +219,23 @@ ut_2_exp(
 	ulint	n);	/*!< in: number */
 /*************************************************************//**
 Calculates fast the number rounded up to the nearest power of 2.
-@return	first power of 2 which is >= n */
-UNIV_INTERN
+@return first power of 2 which is >= n */
 ulint
 ut_2_power_up(
 /*==========*/
 	ulint	n)	/*!< in: number != 0 */
 	MY_ATTRIBUTE((const));
 
-#endif /* !UNIV_INNOCHECKSUM */
-
 /** Determine how many bytes (groups of 8 bits) are needed to
 store the given number of bits.
-@param b	in: bits
-@return		number of bytes (octets) needed to represent b */
+@param b in: bits
+@return number of bytes (octets) needed to represent b */
 #define UT_BITS_IN_BYTES(b) (((b) + 7) / 8)
 
-#ifndef UNIV_INNOCHECKSUM
 /**********************************************************//**
 Returns system time. We do not specify the format of the time returned:
 the only way to manipulate it is to use the function ut_difftime.
-@return	system time */
-UNIV_INTERN
+@return system time */
 ib_time_t
 ut_time(void);
 /*=========*/
@@ -266,8 +245,7 @@ Returns system time.
 Upon successful completion, the value 0 is returned; otherwise the
 value -1 is returned and the global variable errno is set to indicate the
 error.
-@return	0 on success, -1 otherwise */
-UNIV_INTERN
+@return 0 on success, -1 otherwise */
 int
 ut_usectime(
 /*========*/
@@ -278,18 +256,16 @@ ut_usectime(
 Returns the number of microseconds since epoch. Similar to
 time(3), the return value is also stored in *tloc, provided
 that tloc is non-NULL.
-@return	us since epoch */
-UNIV_INTERN
-ullint
+@return us since epoch */
+uintmax_t
 ut_time_us(
 /*=======*/
-	ullint*	tloc);	/*!< out: us since epoch, if non-NULL */
+	uintmax_t*	tloc);	/*!< out: us since epoch, if non-NULL */
 /**********************************************************//**
 Returns the number of milliseconds since some epoch.  The
 value may wrap around.  It should only be used for heuristic
 purposes.
-@return	ms since epoch */
-UNIV_INTERN
+@return ms since epoch */
 ulint
 ut_time_ms(void);
 /*============*/
@@ -300,15 +276,13 @@ Returns the number of milliseconds since some epoch.  The
 value may wrap around.  It should only be used for heuristic
 purposes.
 @return ms since epoch */
-UNIV_INTERN
 ulint
 ut_time_ms(void);
 /*============*/
 
 /**********************************************************//**
 Returns the difference of two times in seconds.
-@return	time2 - time1 expressed in seconds */
-UNIV_INTERN
+@return time2 - time1 expressed in seconds */
 double
 ut_difftime(
 /*========*/
@@ -317,9 +291,25 @@ ut_difftime(
 
 #endif /* !UNIV_INNOCHECKSUM */
 
+/** Determines if a number is zero or a power of two.
+@param[in]	n	number
+@return nonzero if n is zero or a power of two; zero otherwise */
+#define ut_is_2pow(n) UNIV_LIKELY(!((n) & ((n) - 1)))
+
+/** Functor that compares two C strings. Can be used as a comparator for
+e.g. std::map that uses char* as keys. */
+struct ut_strcmp_functor
+{
+	bool operator()(
+		const char*	a,
+		const char*	b) const
+	{
+		return(strcmp(a, b) < 0);
+	}
+};
+
 /**********************************************************//**
 Prints a timestamp to a file. */
-UNIV_INTERN
 void
 ut_print_timestamp(
 /*===============*/
@@ -330,7 +320,6 @@ ut_print_timestamp(
 
 /**********************************************************//**
 Sprintfs a timestamp to a buffer, 13..14 chars plus terminating NUL. */
-UNIV_INTERN
 void
 ut_sprintf_timestamp(
 /*=================*/
@@ -339,14 +328,12 @@ ut_sprintf_timestamp(
 /**********************************************************//**
 Sprintfs a timestamp to a buffer with no spaces and with ':' characters
 replaced by '_'. */
-UNIV_INTERN
 void
 ut_sprintf_timestamp_without_extra_chars(
 /*=====================================*/
 	char*	buf); /*!< in: buffer where to sprintf */
 /**********************************************************//**
 Returns current year, month, day. */
-UNIV_INTERN
 void
 ut_get_year_month_day(
 /*==================*/
@@ -357,16 +344,14 @@ ut_get_year_month_day(
 /*************************************************************//**
 Runs an idle loop on CPU. The argument gives the desired delay
 in microseconds on 100 MHz Pentium + Visual C++.
-@return	dummy value */
-UNIV_INTERN
-void
+@return dummy value */
+ulint
 ut_delay(
 /*=====*/
 	ulint	delay);	/*!< in: delay in microseconds on 100 MHz Pentium */
 #endif /* UNIV_HOTBACKUP */
 /*************************************************************//**
 Prints the contents of a memory buffer in hex and ascii. */
-UNIV_INTERN
 void
 ut_print_buf(
 /*=========*/
@@ -374,83 +359,70 @@ ut_print_buf(
 	const void*	buf,	/*!< in: memory buffer */
 	ulint		len);	/*!< in: length of the buffer */
 
-/**********************************************************************//**
-Outputs a NUL-terminated file name, quoted with apostrophes. */
-UNIV_INTERN
+/*************************************************************//**
+Prints the contents of a memory buffer in hex. */
 void
-ut_print_filename(
-/*==============*/
-	FILE*		f,	/*!< in: output stream */
-	const char*	name);	/*!< in: name to print */
+ut_print_buf_hex(
+/*=============*/
+	std::ostream&	o,	/*!< in/out: output stream */
+	const void*	buf,	/*!< in: memory buffer */
+	ulint		len)	/*!< in: length of the buffer */
+	MY_ATTRIBUTE((nonnull));
+/*************************************************************//**
+Prints the contents of a memory buffer in hex and ascii. */
+void
+ut_print_buf(
+/*=========*/
+	std::ostream&	o,	/*!< in/out: output stream */
+	const void*	buf,	/*!< in: memory buffer */
+	ulint		len)	/*!< in: length of the buffer */
+	MY_ATTRIBUTE((nonnull));
 
 #ifndef UNIV_HOTBACKUP
 /* Forward declaration of transaction handle */
 struct trx_t;
 
+/** Get a fixed-length string, quoted as an SQL identifier.
+If the string contains a slash '/', the string will be
+output as two identifiers separated by a period (.),
+as in SQL database_name.identifier.
+ @param		[in]	trx		transaction (NULL=no quotes).
+ @param		[in]	name		table name.
+ @retval	String quoted as an SQL identifier.
+*/
+std::string
+ut_get_name(
+	const trx_t*	trx,
+	const char*	name);
+
 /**********************************************************************//**
 Outputs a fixed-length string, quoted as an SQL identifier.
 If the string contains a slash '/', the string will be
 output as two identifiers separated by a period (.),
 as in SQL database_name.identifier. */
-UNIV_INTERN
 void
 ut_print_name(
 /*==========*/
-	FILE*		f,	/*!< in: output stream */
+	FILE*		ef,	/*!< in: stream */
 	const trx_t*	trx,	/*!< in: transaction */
-	ibool		table_id,/*!< in: TRUE=print a table name,
-				FALSE=print other identifier */
-	const char*	name);	/*!< in: name to print */
-
-/**********************************************************************//**
-Outputs a fixed-length string, quoted as an SQL identifier.
-If the string contains a slash '/', the string will be
-output as two identifiers separated by a period (.),
-as in SQL database_name.identifier. */
-UNIV_INTERN
-void
-ut_print_namel(
-/*===========*/
-	FILE*		f,	/*!< in: output stream */
-	const trx_t*	trx,	/*!< in: transaction (NULL=no quotes) */
-	ibool		table_id,/*!< in: TRUE=print a table name,
-				FALSE=print other identifier */
-	const char*	name,	/*!< in: name to print */
-	ulint		namelen);/*!< in: length of name */
-/**********************************************************************//**
-Outputs a fixed-length string, quoted as an SQL identifier.
-If the string contains a slash '/', the string will be
-output as two identifiers separated by a period (.),
-as in SQL database_name.identifier. */
-UNIV_INTERN
-std::string
-ut_get_name(
-/*=========*/
-	const trx_t*	trx,	/*!< in: transaction (NULL=no quotes) */
-	ibool		table_id,/*!< in: TRUE=print a table name,
-				FALSE=print other identifier */
-	const char*	name);	/*!< in: name to print */
-/**********************************************************************//**
-Formats a table or index name, quoted as an SQL identifier. If the name
-contains a slash '/', the result will contain two identifiers separated by
-a period (.), as in SQL database_name.identifier.
+	const char*	name);	/*!< in: table name to print */
+/** Format a table name, quoted as an SQL identifier.
+If the name contains a slash '/', the result will contain two
+identifiers separated by a period (.), as in SQL
+database_name.table_name.
+@see table_name_t
+@param[in]	name		table or index name
+@param[out]	formatted	formatted result, will be NUL-terminated
+@param[in]	formatted_size	size of the buffer in bytes
 @return pointer to 'formatted' */
-UNIV_INTERN
 char*
 ut_format_name(
-/*===========*/
-	const char*	name,		/*!< in: table or index name, must be
-					'\0'-terminated */
-	ibool		is_table,	/*!< in: if TRUE then 'name' is a table
-					name */
-	char*		formatted,	/*!< out: formatted result, will be
-					'\0'-terminated */
-	ulint		formatted_size);/*!< out: no more than this number of
-					bytes will be written to 'formatted' */
+	const char*	name,
+	char*		formatted,
+	ulint		formatted_size);
 
 /**********************************************************************//**
 Catenate files. */
-UNIV_INTERN
 void
 ut_copy_file(
 /*=========*/
@@ -458,7 +430,7 @@ ut_copy_file(
 	FILE*	src);	/*!< in: input file to be appended to output */
 #endif /* !UNIV_HOTBACKUP */
 
-#ifdef __WIN__
+#ifdef _WIN32
 /**********************************************************************//**
 A substitute for vsnprintf(3), formatted output conversion into
 a limited buffer. Note: this function DOES NOT return the number of
@@ -466,7 +438,6 @@ characters that would have been printed if the buffer was unlimited because
 VC's _vsnprintf() returns -1 in this case and we would need to call
 _vscprintf() in addition to estimate that but we would need another copy
 of "ap" for that and VC does not provide va_copy(). */
-UNIV_INTERN
 void
 ut_vsnprintf(
 /*=========*/
@@ -480,7 +451,6 @@ A substitute for snprintf(3), formatted output conversion into
 a limited buffer.
 @return number of characters that would have been printed if the size
 were unlimited, not including the terminating '\0'. */
-UNIV_INTERN
 int
 ut_snprintf(
 /*========*/
@@ -502,35 +472,180 @@ of "ap" for that and VC does not provide va_copy(). */
 A wrapper for snprintf(3), formatted output conversion into
 a limited buffer. */
 # define ut_snprintf	snprintf
-#endif /* __WIN__ */
+#endif /* _WIN32 */
 
 /*************************************************************//**
 Convert an error number to a human readable text message. The
 returned string is static and should not be freed or modified.
-@return	string, describing the error */
-UNIV_INTERN
+@return string, describing the error */
 const char*
 ut_strerr(
 /*======*/
 	dberr_t	num);	/*!< in: error number */
 
-/****************************************************************
-Sort function for ulint arrays. */
-UNIV_INTERN
-void
-ut_ulint_sort(
-/*==========*/
-	ulint*	arr,		/*!< in/out: array to sort */
-	ulint*	aux_arr,	/*!< in/out: aux array to use in sort */
-	ulint	low,		/*!< in: lower bound */
-	ulint	high)		/*!< in: upper bound */
-	MY_ATTRIBUTE((nonnull));
+#endif /* !UNIV_INNOCHECKSUM */
+
+#ifdef UNIV_PFS_MEMORY
+
+/** Extract the basename of a file without its extension.
+For example, extract "foo0bar" out of "/path/to/foo0bar.cc".
+@param[in]	file		file path, e.g. "/path/to/foo0bar.cc"
+@param[out]	base		result, e.g. "foo0bar"
+@param[in]	base_size	size of the output buffer 'base', if there
+is not enough space, then the result will be truncated, but always
+'\0'-terminated
+@return number of characters that would have been printed if the size
+were unlimited (not including the final ‘\0’) */
+size_t
+ut_basename_noext(
+	const char*	file,
+	char*		base,
+	size_t		base_size);
+
+#endif /* UNIV_PFS_MEMORY */
+
+namespace ib {
+
+/** This is a wrapper class, used to print any unsigned integer type
+in hexadecimal format.  The main purpose of this data type is to
+overload the global operator<<, so that we can print the given
+wrapper value in hex. */
+struct hex {
+	explicit hex(uintmax_t t): m_val(t) {}
+	const uintmax_t	m_val;
+};
+
+/** This is an overload of the global operator<< for the user defined type
+ib::hex.  The unsigned value held in the ib::hex wrapper class will be printed
+into the given output stream in hexadecimal format.
+@param[in,out]	lhs	the output stream into which rhs is written.
+@param[in]	rhs	the object to be written into lhs.
+@retval	reference to the output stream. */
+inline
+std::ostream&
+operator<<(
+	std::ostream&	lhs,
+	const hex&	rhs)
+{
+	std::ios_base::fmtflags	ff = lhs.flags();
+	lhs << std::showbase << std::hex << rhs.m_val;
+	lhs.setf(ff);
+	return(lhs);
+}
+
+/** The class logger is the base class of all the error log related classes.
+It contains a std::ostringstream object.  The main purpose of this class is
+to forward operator<< to the underlying std::ostringstream object.  Do not
+use this class directly, instead use one of the derived classes. */
+class logger {
+public:
+	template<typename T>
+	logger& operator<<(const T& rhs)
+	{
+		m_oss << rhs;
+		return(*this);
+	}
+
+	/** Write the given buffer to the internal string stream object.
+	@param[in]	buf	the buffer whose contents will be logged.
+	@param[in]	count	the length of the buffer buf.
+	@return the output stream into which buffer was written. */
+	std::ostream&
+	write(
+		const char*		buf,
+		std::streamsize		count)
+	{
+		return(m_oss.write(buf, count));
+	}
+
+	/** Write the given buffer to the internal string stream object.
+	@param[in]	buf	the buffer whose contents will be logged.
+	@param[in]	count	the length of the buffer buf.
+	@return the output stream into which buffer was written. */
+	std::ostream&
+	write(
+		const byte*		buf,
+		std::streamsize		count)
+	{
+		return(m_oss.write(reinterpret_cast<const char*>(buf), count));
+	}
+
+	std::ostringstream	m_oss;
+protected:
+	/* This class must not be used directly, hence making the default
+	constructor protected. */
+	logger() {}
+};
+
+/** The class info is used to emit informational log messages.  It is to be
+used similar to std::cout.  But the log messages will be emitted only when
+the dtor is called.  The preferred usage of this class is to make use of
+unnamed temporaries as follows:
+
+info() << "The server started successfully.";
+
+In the above usage, the temporary object will be destroyed at the end of the
+statement and hence the log message will be emitted at the end of the
+statement.  If a named object is created, then the log message will be emitted
+only when it goes out of scope or destroyed. */
+class info : public logger {
+public:
+	~info();
+};
+
+/** The class warn is used to emit warnings.  Refer to the documentation of
+class info for further details. */
+class warn : public logger {
+public:
+	~warn();
+};
+
+/** The class error is used to emit error messages.  Refer to the
+documentation of class info for further details. */
+class error : public logger {
+public:
+	~error();
+};
+
+/** The class fatal is used to emit an error message and stop the server
+by crashing it.  Use this class when MySQL server needs to be stopped
+immediately.  Refer to the documentation of class info for usage details. */
+class fatal : public logger {
+public:
+	~fatal();
+};
+
+/** Emit an error message if the given predicate is true, otherwise emit a
+warning message */
+class error_or_warn : public logger {
+public:
+	error_or_warn(bool	pred)
+	: m_error(pred)
+	{}
+
+	~error_or_warn();
+private:
+	const bool	m_error;
+};
+
+/** Emit a fatal message if the given predicate is true, otherwise emit a
+error message. */
+class fatal_or_error : public logger {
+public:
+	fatal_or_error(bool	pred)
+	: m_fatal(pred)
+	{}
+
+	~fatal_or_error();
+private:
+	const bool	m_fatal;
+};
+
+} // namespace ib
 
 #ifndef UNIV_NONINL
 #include "ut0ut.ic"
 #endif
 
-#endif /* !UNIV_INNOCHECKSUM */
-
 #endif
 
diff --git a/storage/innobase/include/ut0ut.ic b/storage/innobase/include/ut0ut.ic
index 4e0f76e1957..31e81f7336e 100644
--- a/storage/innobase/include/ut0ut.ic
+++ b/storage/innobase/include/ut0ut.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -23,60 +23,40 @@ Various utilities
 Created 5/30/1994 Heikki Tuuri
 *******************************************************************/
 
-/******************************************************//**
-Calculates the minimum of two ulints.
-@return	minimum */
-UNIV_INLINE
-ulint
-ut_min(
-/*===*/
-	ulint	 n1,	/*!< in: first number */
-	ulint	 n2)	/*!< in: second number */
-{
-	return((n1 <= n2) ? n1 : n2);
-}
+#include <algorithm>
 
-/******************************************************//**
-Calculates the maximum of two ulints.
-@return	maximum */
-UNIV_INLINE
-ulint
-ut_max(
-/*===*/
-	ulint	 n1,	/*!< in: first number */
-	ulint	 n2)	/*!< in: second number */
-{
-	return((n1 <= n2) ? n2 : n1);
-}
-
-/****************************************************************//**
-Calculates minimum of two ulint-pairs. */
+/** Calculate the minimum of two pairs.
+@param[out]	min_hi	MSB of the minimum pair
+@param[out]	min_lo	LSB of the minimum pair
+@param[in]	a_hi	MSB of the first pair
+@param[in]	a_lo	LSB of the first pair
+@param[in]	b_hi	MSB of the second pair
+@param[in]	b_lo	LSB of the second pair */
 UNIV_INLINE
 void
 ut_pair_min(
-/*========*/
-	ulint*	a,	/*!< out: more significant part of minimum */
-	ulint*	b,	/*!< out: less significant part of minimum */
-	ulint	a1,	/*!< in: more significant part of first pair */
-	ulint	b1,	/*!< in: less significant part of first pair */
-	ulint	a2,	/*!< in: more significant part of second pair */
-	ulint	b2)	/*!< in: less significant part of second pair */
+	ulint*	min_hi,
+	ulint*	min_lo,
+	ulint	a_hi,
+	ulint	a_lo,
+	ulint	b_hi,
+	ulint	b_lo)
 {
-	if (a1 == a2) {
-		*a = a1;
-		*b = ut_min(b1, b2);
-	} else if (a1 < a2) {
-		*a = a1;
-		*b = b1;
+	if (a_hi == b_hi) {
+		*min_hi = a_hi;
+		*min_lo = std::min(a_lo, b_lo);
+	} else if (a_hi < b_hi) {
+		*min_hi = a_hi;
+		*min_lo = a_lo;
 	} else {
-		*a = a2;
-		*b = b2;
+		*min_hi = b_hi;
+		*min_lo = b_lo;
 	}
 }
 
 /******************************************************//**
 Compares two ulints.
-@return	1 if a > b, 0 if a == b, -1 if a < b */
+@return 1 if a > b, 0 if a == b, -1 if a < b */
 UNIV_INLINE
 int
 ut_ulint_cmp(
@@ -93,35 +73,36 @@ ut_ulint_cmp(
 	}
 }
 
-/*******************************************************//**
-Compares two pairs of ulints.
-@return	-1 if a < b, 0 if a == b, 1 if a > b */
+/** Compare two pairs of integers.
+@param[in]	a_h	more significant part of first pair
+@param[in]	a_l	less significant part of first pair
+@param[in]	b_h	more significant part of second pair
+@param[in]	b_l	less significant part of second pair
+@return comparison result of (a_h,a_l) and (b_h,b_l)
+@retval -1 if (a_h,a_l) is less than (b_h,b_l)
+@retval 0 if (a_h,a_l) is equal to (b_h,b_l)
+@retval 1 if (a_h,a_l) is greater than (b_h,b_l) */
 UNIV_INLINE
 int
 ut_pair_cmp(
-/*========*/
-	ulint	a1,	/*!< in: more significant part of first pair */
-	ulint	a2,	/*!< in: less significant part of first pair */
-	ulint	b1,	/*!< in: more significant part of second pair */
-	ulint	b2)	/*!< in: less significant part of second pair */
+	ulint	a_h,
+	ulint	a_l,
+	ulint	b_h,
+	ulint	b_l)
 {
-	if (a1 > b1) {
-		return(1);
-	} else if (a1 < b1) {
+	if (a_h < b_h) {
 		return(-1);
-	} else if (a2 > b2) {
-		return(1);
-	} else if (a2 < b2) {
-		return(-1);
-	} else {
-		return(0);
 	}
+	if (a_h > b_h) {
+		return(1);
+	}
+	return(ut_ulint_cmp(a_l, b_l));
 }
 
 /*************************************************************//**
 Calculates fast the 2-logarithm of a number, rounded upward to an
 integer.
-@return	logarithm in the base 2, rounded upward */
+@return logarithm in the base 2, rounded upward */
 UNIV_INLINE
 ulint
 ut_2_log(
@@ -151,7 +132,7 @@ ut_2_log(
 
 /*************************************************************//**
 Calculates 2 to power n.
-@return	2 to power n */
+@return 2 to power n */
 UNIV_INLINE
 ulint
 ut_2_exp(
diff --git a/storage/innobase/include/ut0vec.h b/storage/innobase/include/ut0vec.h
index 432fb348a09..b5c0beddc15 100644
--- a/storage/innobase/include/ut0vec.h
+++ b/storage/innobase/include/ut0vec.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2006, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2006, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -64,7 +64,6 @@ freeing it when done with the vector.
 
 /********************************************************************
 Create a new vector with the given initial size. */
-UNIV_INTERN
 ib_vector_t*
 ib_vector_create(
 /*=============*/
@@ -124,7 +123,6 @@ ib_vector_size(
 
 /********************************************************************
 Increase the size of the vector. */
-UNIV_INTERN
 void
 ib_vector_resize(
 /*=============*/
@@ -142,7 +140,7 @@ ib_vector_is_empty(
 
 /****************************************************************//**
 Get the n'th element.
-@return	n'th element */
+@return n'th element */
 UNIV_INLINE
 void*
 ib_vector_get(
@@ -161,7 +159,7 @@ ib_vector_get_const(
 	ulint			n);	/* in: element index to get */
 /****************************************************************//**
 Get last element. The vector must not be empty.
-@return	last element */
+@return last element */
 UNIV_INLINE
 void*
 ib_vector_get_last(
@@ -263,53 +261,6 @@ ib_heap_allocator_free(
 /*===================*/
 	ib_alloc_t*	ib_ut_alloc);	/* in: alloc instace to free */
 
-/********************************************************************
-Wrapper for ut_free(). */
-UNIV_INLINE
-void
-ib_ut_free(
-/*=======*/
-	ib_alloc_t*	allocator,	/* in: allocator */
-	void*		ptr);		/* in: size in bytes */
-
-/********************************************************************
-Wrapper for ut_malloc(). */
-UNIV_INLINE
-void*
-ib_ut_malloc(
-/*=========*/
-					/* out: pointer to allocated memory */
-	ib_alloc_t*	allocator,	/* in: allocator */
-	ulint		size);		/* in: size in bytes */
-
-/********************************************************************
-Wrapper for ut_realloc(). */
-UNIV_INLINE
-void*
-ib_ut_resize(
-/*=========*/
-					/* out: pointer to reallocated
-					memory */
-	ib_alloc_t*	allocator,	/* in: allocator */
-	void*		old_ptr,	/* in: pointer to memory */
-	ulint		old_size,	/* in: old size in bytes */
-	ulint		new_size);	/* in: new size in bytes */
-
-/********************************************************************
-Create a heap allocator that uses the passed in heap. */
-UNIV_INLINE
-ib_alloc_t*
-ib_ut_allocator_create(void);
-/*=========================*/
-
-/********************************************************************
-Create a heap allocator that uses the passed in heap. */
-UNIV_INLINE
-void
-ib_ut_allocator_free(
-/*=================*/
-	ib_alloc_t*	ib_ut_alloc);	/* in: alloc instace to free */
-
 /* Allocator used by ib_vector_t. */
 struct ib_alloc_t {
 	ib_mem_alloc_t	mem_malloc;	/* For allocating memory */
diff --git a/storage/innobase/include/ut0vec.ic b/storage/innobase/include/ut0vec.ic
index f41a85e1d1d..17f4df579b6 100644
--- a/storage/innobase/include/ut0vec.ic
+++ b/storage/innobase/include/ut0vec.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2006, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2006, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -23,6 +23,8 @@ A vector of pointers to data items
 Created 4/6/2006 Osku Salerma
 ************************************************************************/
 
+#include "ut0new.h"
+
 #define	IB_VEC_OFFSET(v, i)	(vec->sizeof_value * i)
 
 /********************************************************************
@@ -54,6 +56,7 @@ ib_heap_free(
 /********************************************************************
 The default ib_vector_t heap resize. Since we can't resize the heap
 we have to copy the elements from the old ptr to the new ptr.
+We always assume new_size >= old_size, so the buffer won't overflow.
 Uses mem_heap_alloc(). */
 UNIV_INLINE
 void*
@@ -67,6 +70,7 @@ ib_heap_resize(
 	void*		new_ptr;
 	mem_heap_t*	heap = (mem_heap_t*) allocator->arg;
 
+	ut_a(new_size >= old_size);
 	new_ptr = mem_heap_alloc(heap, new_size);
 	memcpy(new_ptr, old_ptr, old_size);
 
@@ -104,74 +108,6 @@ ib_heap_allocator_free(
 	mem_heap_free((mem_heap_t*) ib_ut_alloc->arg);
 }
 
-/********************************************************************
-Wrapper around ut_malloc(). */
-UNIV_INLINE
-void*
-ib_ut_malloc(
-/*=========*/
-	ib_alloc_t*	allocator UNIV_UNUSED,	/* in: allocator */
-	ulint		size)			/* in: size in bytes */
-{
-	return(ut_malloc(size));
-}
-
-/********************************************************************
-Wrapper around ut_free(). */
-UNIV_INLINE
-void
-ib_ut_free(
-/*=======*/
-	ib_alloc_t*	allocator UNIV_UNUSED,	/* in: allocator */
-	void*		ptr)			/* in: size in bytes */
-{
-	ut_free(ptr);
-}
-
-/********************************************************************
-Wrapper aroung ut_realloc(). */
-UNIV_INLINE
-void*
-ib_ut_resize(
-/*=========*/
-	ib_alloc_t*	allocator UNIV_UNUSED,	/* in: allocator */
-	void*		old_ptr,	/* in: pointer to memory */
-	ulint		old_size UNIV_UNUSED,/* in: old size in bytes */
-	ulint		new_size)	/* in: new size in bytes */
-{
-	return(ut_realloc(old_ptr, new_size));
-}
-
-/********************************************************************
-Create a ut allocator. */
-UNIV_INLINE
-ib_alloc_t*
-ib_ut_allocator_create(void)
-/*========================*/
-{
-	ib_alloc_t*	ib_ut_alloc;
-
-	ib_ut_alloc = (ib_alloc_t*) ut_malloc(sizeof(*ib_ut_alloc));
-
-	ib_ut_alloc->arg = NULL;
-	ib_ut_alloc->mem_release = ib_ut_free;
-	ib_ut_alloc->mem_malloc = ib_ut_malloc;
-	ib_ut_alloc->mem_resize = ib_ut_resize;
-
-	return(ib_ut_alloc);
-}
-
-/********************************************************************
-Free a ut allocator. */
-UNIV_INLINE
-void
-ib_ut_allocator_free(
-/*=================*/
-	ib_alloc_t*	ib_ut_alloc)	/* in: alloc instace to free */
-{
-	ut_free(ib_ut_alloc);
-}
-
 /********************************************************************
 Get number of elements in vector. */
 UNIV_INLINE
@@ -214,7 +150,7 @@ ib_vector_get_const(
 }
 /****************************************************************//**
 Get last element. The vector must not be empty.
-@return	last element */
+@return last element */
 UNIV_INLINE
 void*
 ib_vector_get_last(
@@ -286,7 +222,7 @@ ib_vector_last_const(
 
 /****************************************************************//**
 Remove the last element from the vector.
-@return	last vector element */
+@return last vector element */
 UNIV_INLINE
 void*
 ib_vector_pop(
@@ -392,24 +328,13 @@ ib_vector_free(
 /*===========*/
 	ib_vector_t*	vec)		/* in, own: vector */
 {
-	/* Currently we only support two types of allocators, heap
-	and ut_malloc(), when the heap is freed all the elements are
-	freed too. With ut allocator, we need to free the elements,
-	the vector instance and the allocator separately. */
+	/* Currently we only support one type of allocator - heap,
+	when the heap is freed all the elements are freed too. */
 
 	/* Only the heap allocator uses the arg field. */
-	if (vec->allocator->arg) {
-		mem_heap_free((mem_heap_t*) vec->allocator->arg);
-	} else {
-		ib_alloc_t*	allocator;
+	ut_ad(vec->allocator->arg != NULL);
 
-		allocator = vec->allocator;
-
-		allocator->mem_release(allocator, vec->data);
-		allocator->mem_release(allocator, vec);
-
-		ib_ut_allocator_free(allocator);
-	}
+	mem_heap_free((mem_heap_t*) vec->allocator->arg);
 }
 
 /********************************************************************
diff --git a/storage/innobase/include/ut0wqueue.h b/storage/innobase/include/ut0wqueue.h
index 9906e299808..771d8d6ae5c 100644
--- a/storage/innobase/include/ut0wqueue.h
+++ b/storage/innobase/include/ut0wqueue.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2006, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2006, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -34,41 +34,37 @@ processing.
 
 #include "ut0list.h"
 #include "mem0mem.h"
-#include "os0sync.h"
-#include "sync0types.h"
 
+// Forward declaration
+struct ib_list_t;
 struct ib_wqueue_t;
 
 /****************************************************************//**
 Create a new work queue.
-@return	work queue */
-UNIV_INTERN
+@return work queue */
 ib_wqueue_t*
-ib_wqueue_create(void);
-/*===================*/
+ib_wqueue_create();
+/*===============*/
 
 /****************************************************************//**
 Free a work queue. */
-UNIV_INTERN
 void
 ib_wqueue_free(
 /*===========*/
-	ib_wqueue_t*	wq);	/*!< in: work queue */
+	ib_wqueue_t*	wq);		/*!< in: work queue */
 
 /****************************************************************//**
 Add a work item to the queue. */
-UNIV_INTERN
 void
 ib_wqueue_add(
 /*==========*/
-	ib_wqueue_t*	wq,	/*!< in: work queue */
-	void*		item,	/*!< in: work item */
-	mem_heap_t*	heap);	/*!< in: memory heap to use for allocating the
-				list node */
+	ib_wqueue_t*	wq,		/*!< in: work queue */
+	void*		item,		/*!< in: work item */
+	mem_heap_t*	heap);		/*!< in: memory heap to use for
+					allocating the list node */
 
 /********************************************************************
 Check if queue is empty. */
-
 ibool
 ib_wqueue_is_empty(
 /*===============*/
@@ -78,16 +74,14 @@ ib_wqueue_is_empty(
 
 /****************************************************************//**
 Wait for a work item to appear in the queue.
-@return	work item */
-UNIV_INTERN
+@return work item */
 void*
 ib_wqueue_wait(
 /*===========*/
-	ib_wqueue_t*	wq);	/*!< in: work queue */
+	ib_wqueue_t*	wq);		/*!< in: work queue */
 
 /********************************************************************
 Wait for a work item to appear in the queue for specified time. */
-
 void*
 ib_wqueue_timedwait(
 /*================*/
@@ -102,7 +96,6 @@ void*
 ib_wqueue_nowait(
 /*=============*/
 	ib_wqueue_t*	wq);		/*<! in: work queue */
-
 /********************************************************************
 Get number of items on queue.
 @return number of items on queue */
@@ -112,11 +105,4 @@ ib_wqueue_len(
 	ib_wqueue_t*	wq);		/*<! in: work queue */
 
 
-/* Work queue. */
-struct ib_wqueue_t {
-	ib_mutex_t		mutex;	/*!< mutex protecting everything */
-	ib_list_t*	items;	/*!< work item list */
-	os_event_t	event;	/*!< event we use to signal additions to list */
-};
-
-#endif
+#endif /* IB_WORK_QUEUE_H */
diff --git a/storage/innobase/innodb.cmake b/storage/innobase/innodb.cmake
new file mode 100644
index 00000000000..c016f536a67
--- /dev/null
+++ b/storage/innobase/innodb.cmake
@@ -0,0 +1,533 @@
+# Copyright (c) 2006, 2016, Oracle and/or its affiliates. All rights reserved.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+
+# This is the CMakeLists for InnoDB
+
+INCLUDE(CheckFunctionExists)
+INCLUDE(CheckCSourceCompiles)
+INCLUDE(CheckCSourceRuns)
+INCLUDE(lz4)
+INCLUDE(lzo)
+INCLUDE(lzma)
+INCLUDE(bzip2)
+INCLUDE(snappy)
+
+MYSQL_CHECK_LZ4()
+MYSQL_CHECK_LZO()
+MYSQL_CHECK_LZMA()
+MYSQL_CHECK_BZIP2()
+MYSQL_CHECK_SNAPPY()
+
+IF(CMAKE_CROSSCOMPILING)
+  # Use CHECK_C_SOURCE_COMPILES instead of CHECK_C_SOURCE_RUNS when
+  # cross-compiling. Not as precise, but usually good enough.
+  # This only make sense for atomic tests in this file, this trick doesn't
+  # work in a general case.
+  MACRO(CHECK_C_SOURCE SOURCE VAR)
+    CHECK_C_SOURCE_COMPILES("${SOURCE}" "${VAR}")
+  ENDMACRO()
+ELSE()
+  MACRO(CHECK_C_SOURCE SOURCE VAR)
+    CHECK_C_SOURCE_RUNS("${SOURCE}" "${VAR}")
+  ENDMACRO()
+ENDIF()
+
+## MySQL 5.7 LZ4 (not needed)
+##IF(LZ4_INCLUDE_DIR AND LZ4_LIBRARY)
+##  ADD_DEFINITIONS(-DHAVE_LZ4=1)
+##  INCLUDE_DIRECTORIES(${LZ4_INCLUDE_DIR})
+##ENDIF()
+
+# OS tests
+IF(UNIX)
+  IF(CMAKE_SYSTEM_NAME STREQUAL "Linux")
+
+    ADD_DEFINITIONS("-DUNIV_LINUX -D_GNU_SOURCE=1")
+
+    CHECK_INCLUDE_FILES (libaio.h HAVE_LIBAIO_H)
+    CHECK_LIBRARY_EXISTS(aio io_queue_init "" HAVE_LIBAIO)
+
+    IF(HAVE_LIBAIO_H AND HAVE_LIBAIO)
+      ADD_DEFINITIONS(-DLINUX_NATIVE_AIO=1)
+      LINK_LIBRARIES(aio)
+    ENDIF()
+    IF(HAVE_LIBNUMA)
+      LINK_LIBRARIES(numa)
+    ENDIF()
+  ELSEIF(CMAKE_SYSTEM_NAME MATCHES "HP*")
+    ADD_DEFINITIONS("-DUNIV_HPUX")
+  ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "AIX")
+    ADD_DEFINITIONS("-DUNIV_AIX")
+  ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "SunOS")
+    ADD_DEFINITIONS("-DUNIV_SOLARIS")
+  ENDIF()
+ENDIF()
+
+OPTION(INNODB_COMPILER_HINTS "Compile InnoDB with compiler hints" ON)
+MARK_AS_ADVANCED(INNODB_COMPILER_HINTS)
+
+IF(INNODB_COMPILER_HINTS)
+   ADD_DEFINITIONS("-DCOMPILER_HINTS")
+ENDIF()
+
+SET(MUTEXTYPE "event" CACHE STRING "Mutex type: event, sys or futex")
+
+IF(CMAKE_CXX_COMPILER_ID MATCHES "GNU")
+# After: WL#5825 Using C++ Standard Library with MySQL code
+#       we no longer use -fno-exceptions
+#	SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-exceptions")
+
+# Add -Wconversion if compiling with GCC
+## As of Mar 15 2011 this flag causes 3573+ warnings. If you are reading this
+## please fix them and enable the following code:
+#SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wconversion")
+
+  IF (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64" OR
+      CMAKE_SYSTEM_PROCESSOR MATCHES "i386")
+    INCLUDE(CheckCXXCompilerFlag)
+    CHECK_CXX_COMPILER_FLAG("-fno-builtin-memcmp" HAVE_NO_BUILTIN_MEMCMP)
+    IF (HAVE_NO_BUILTIN_MEMCMP)
+      # Work around http://gcc.gnu.org/bugzilla/show_bug.cgi?id=43052
+      SET_SOURCE_FILES_PROPERTIES(${CMAKE_CURRENT_SOURCE_DIR}/rem/rem0cmp.cc
+	PROPERTIES COMPILE_FLAGS -fno-builtin-memcmp)
+    ENDIF()
+  ENDIF()
+ENDIF()
+
+# Enable InnoDB's UNIV_DEBUG in debug builds
+SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DUNIV_DEBUG -DUNIV_SYNC_DEBUG")
+
+OPTION(WITH_INNODB_EXTRA_DEBUG "Enable extra InnoDB debug checks" OFF)
+IF(WITH_INNODB_EXTRA_DEBUG)
+  IF(NOT WITH_DEBUG)
+    MESSAGE(FATAL_ERROR "WITH_INNODB_EXTRA_DEBUG can be enabled only when WITH_DEBUG is enabled")
+  ENDIF()
+
+  SET(EXTRA_DEBUG_FLAGS "")
+  SET(EXTRA_DEBUG_FLAGS "${EXTRA_DEBUG_FLAGS} -DUNIV_AHI_DEBUG")
+  SET(EXTRA_DEBUG_FLAGS "${EXTRA_DEBUG_FLAGS} -DUNIV_DDL_DEBUG")
+  SET(EXTRA_DEBUG_FLAGS "${EXTRA_DEBUG_FLAGS} -DUNIV_DEBUG_FILE_ACCESSES")
+  SET(EXTRA_DEBUG_FLAGS "${EXTRA_DEBUG_FLAGS} -DUNIV_ZIP_DEBUG")
+
+  SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${EXTRA_DEBUG_FLAGS}")
+  SET(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} ${EXTRA_DEBUG_FLAGS}")
+ENDIF()
+
+CHECK_FUNCTION_EXISTS(sched_getcpu  HAVE_SCHED_GETCPU)
+IF(HAVE_SCHED_GETCPU)
+ ADD_DEFINITIONS(-DHAVE_SCHED_GETCPU=1)
+ENDIF()
+
+CHECK_FUNCTION_EXISTS(nanosleep HAVE_NANOSLEEP)
+IF(HAVE_NANOSLEEP)
+ ADD_DEFINITIONS(-DHAVE_NANOSLEEP=1)
+ENDIF()
+
+IF(NOT MSVC)
+  CHECK_C_SOURCE_RUNS(
+  "
+  #define _GNU_SOURCE
+  #include <fcntl.h>
+  #include <linux/falloc.h>
+  int main()
+  {
+    /* Ignore the return value for now. Check if the flags exist.
+    The return value is checked  at runtime. */
+    fallocate(0, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, 0);
+
+    return(0);
+  }"
+  HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE
+  )
+ENDIF()
+
+IF(HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE)
+ ADD_DEFINITIONS(-DHAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE=1)
+ENDIF()
+
+IF(NOT MSVC)
+# either define HAVE_IB_GCC_ATOMIC_BUILTINS or not
+  # either define HAVE_IB_GCC_ATOMIC_BUILTINS or not
+  # workaround for gcc 4.1.2 RHEL5/x86, gcc atomic ops only work under -march=i686
+  IF(CMAKE_SYSTEM_PROCESSOR STREQUAL "i686" AND CMAKE_COMPILER_IS_GNUCC AND
+     CMAKE_C_COMPILER_VERSION VERSION_LESS "4.1.3")
+    SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=i686")
+    SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=i686")
+  ENDIF()
+  CHECK_C_SOURCE(
+  "
+  int main()
+  {
+    long	x;
+    long	y;
+    long	res;
+
+    x = 10;
+    y = 123;
+    res = __sync_bool_compare_and_swap(&x, x, y);
+    if (!res || x != y) {
+      return(1);
+    }
+
+    x = 10;
+    y = 123;
+    res = __sync_bool_compare_and_swap(&x, x + 1, y);
+    if (res || x != 10) {
+      return(1);
+    }
+    x = 10;
+    y = 123;
+    res = __sync_add_and_fetch(&x, y);
+    if (res != 123 + 10 || x != 123 + 10) {
+      return(1);
+    }
+    return(0);
+  }"
+  HAVE_IB_GCC_ATOMIC_BUILTINS
+  )
+  CHECK_C_SOURCE(
+  "
+  int main()
+  {
+    long	res;
+    char	c;
+
+    c = 10;
+    res = __sync_lock_test_and_set(&c, 123);
+    if (res != 10 || c != 123) {
+      return(1);
+    }
+    return(0);
+  }"
+  HAVE_IB_GCC_ATOMIC_BUILTINS_BYTE
+  )
+  CHECK_C_SOURCE(
+  "#include<stdint.h>
+  int main()
+  {
+    int64_t	x,y,res;
+
+    x = 10;
+    y = 123;
+    res = __sync_sub_and_fetch(&y, x);
+    if (res != y || y != 113) {
+      return(1);
+    }
+    res = __sync_add_and_fetch(&y, x);
+    if (res != y || y != 123) {
+      return(1);
+    }
+    return(0);
+  }"
+  HAVE_IB_GCC_ATOMIC_BUILTINS_64
+  )
+  CHECK_C_SOURCE(
+  "#include<stdint.h>
+  int main()
+  {
+    __sync_synchronize();
+    return(0);
+  }"
+  HAVE_IB_GCC_SYNC_SYNCHRONISE
+  )
+  CHECK_C_SOURCE(
+  "#include<stdint.h>
+  int main()
+  {
+    __atomic_thread_fence(__ATOMIC_ACQUIRE);
+    __atomic_thread_fence(__ATOMIC_RELEASE);
+    return(0);
+  }"
+  HAVE_IB_GCC_ATOMIC_THREAD_FENCE
+  )
+  CHECK_C_SOURCE(
+  "#include<stdint.h>
+  int main()
+  {
+    unsigned char	c;
+
+    __atomic_test_and_set(&c, __ATOMIC_ACQUIRE);
+    __atomic_clear(&c, __ATOMIC_RELEASE);
+    return(0);
+  }"
+  HAVE_IB_GCC_ATOMIC_TEST_AND_SET
+  )
+  CHECK_C_SOURCE_RUNS(
+  "#include<stdint.h>
+  int main()
+  {
+    unsigned char	a = 0;
+    unsigned char	b = 0;
+    unsigned char	c = 1;
+
+    __atomic_exchange(&a, &b,  &c, __ATOMIC_RELEASE);
+    __atomic_compare_exchange(&a, &b, &c, 0,
+			      __ATOMIC_RELEASE, __ATOMIC_ACQUIRE);
+    return(0);
+  }"
+  HAVE_IB_GCC_ATOMIC_COMPARE_EXCHANGE
+  )
+CHECK_C_SOURCE_RUNS(
+  "#include<stdint.h>
+  int main()
+  {
+    unsigned char	a = 0;
+    unsigned char	b = 0;
+    unsigned char	c = 1;
+
+    __atomic_compare_exchange_n(&a, &b, &c, 0,
+			      __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
+    return (0);
+  }"
+  HAVE_IB_GCC_ATOMIC_SEQ_CST
+  )
+
+IF (HAVE_IB_GCC_ATOMIC_SEQ_CST)
+  ADD_DEFINITIONS(-DHAVE_IB_GCC_ATOMIC_CST=1)
+ENDIF()
+
+IF(HAVE_IB_GCC_ATOMIC_BUILTINS)
+ ADD_DEFINITIONS(-DHAVE_IB_GCC_ATOMIC_BUILTINS=1)
+ENDIF()
+
+IF(HAVE_IB_GCC_ATOMIC_BUILTINS_BYTE)
+ ADD_DEFINITIONS(-DHAVE_IB_GCC_ATOMIC_BUILTINS_BYTE=1)
+ENDIF()
+
+IF(HAVE_IB_GCC_ATOMIC_BUILTINS_64)
+ ADD_DEFINITIONS(-DHAVE_IB_GCC_ATOMIC_BUILTINS_64=1)
+ENDIF()
+
+IF(HAVE_IB_GCC_SYNC_SYNCHRONISE)
+ ADD_DEFINITIONS(-DHAVE_IB_GCC_SYNC_SYNCHRONISE=1)
+ENDIF()
+
+IF(HAVE_IB_GCC_ATOMIC_THREAD_FENCE)
+ ADD_DEFINITIONS(-DHAVE_IB_GCC_ATOMIC_THREAD_FENCE=1)
+ENDIF()
+
+IF(HAVE_IB_GCC_ATOMIC_TEST_AND_SET)
+ ADD_DEFINITIONS(-DHAVE_IB_GCC_ATOMIC_TEST_AND_SET=1)
+ENDIF()
+
+IF(HAVE_IB_GCC_ATOMIC_COMPARE_EXCHANGE)
+ ADD_DEFINITIONS(-DHAVE_IB_GCC_ATOMIC_COMPARE_EXCHANGE=1)
+ENDIF()
+
+ # either define HAVE_IB_ATOMIC_PTHREAD_T_GCC or not
+IF(NOT CMAKE_CROSSCOMPILING)
+  CHECK_C_SOURCE_RUNS(
+  "
+  #include <pthread.h>
+  #include <string.h>
+
+  int main() {
+    pthread_t       x1;
+    pthread_t       x2;
+    pthread_t       x3;
+
+    memset(&x1, 0x0, sizeof(x1));
+    memset(&x2, 0x0, sizeof(x2));
+    memset(&x3, 0x0, sizeof(x3));
+
+    __sync_bool_compare_and_swap(&x1, x2, x3);
+
+    return(0);
+  }"
+  HAVE_IB_ATOMIC_PTHREAD_T_GCC)
+ENDIF()
+
+IF(HAVE_IB_ATOMIC_PTHREAD_T_GCC)
+  ADD_DEFINITIONS(-DHAVE_IB_ATOMIC_PTHREAD_T_GCC=1)
+ENDIF()
+
+# Only use futexes on Linux if GCC atomics are available
+IF(NOT MSVC AND NOT CMAKE_CROSSCOMPILING)
+  CHECK_C_SOURCE_RUNS(
+  "
+  #include <stdio.h>
+  #include <unistd.h>
+  #include <errno.h>
+  #include <assert.h>
+  #include <linux/futex.h>
+  #include <unistd.h>
+  #include <sys/syscall.h>
+
+   int futex_wait(int* futex, int v) {
+	return(syscall(SYS_futex, futex, FUTEX_WAIT_PRIVATE, v, NULL, NULL, 0));
+   }
+
+   int futex_signal(int* futex) {
+	return(syscall(SYS_futex, futex, FUTEX_WAKE, 1, NULL, NULL, 0));
+   }
+
+  int main() {
+	int	ret;
+	int	m = 1;
+
+	/* It is setup to fail and return EWOULDBLOCK. */
+	ret = futex_wait(&m, 0);
+	assert(ret == -1 && errno == EWOULDBLOCK);
+	/* Shouldn't wake up any threads. */
+	assert(futex_signal(&m) == 0);
+
+	return(0);
+  }"
+  HAVE_IB_LINUX_FUTEX)
+ENDIF()
+
+IF(HAVE_IB_LINUX_FUTEX)
+  ADD_DEFINITIONS(-DHAVE_IB_LINUX_FUTEX=1)
+ENDIF()
+
+ENDIF(NOT MSVC)
+
+CHECK_FUNCTION_EXISTS(asprintf  HAVE_ASPRINTF)
+CHECK_FUNCTION_EXISTS(vasprintf  HAVE_VASPRINTF)
+
+CHECK_CXX_SOURCE_COMPILES("struct t1{ int a; char *b; }; struct t1 c= { .a=1, .b=0 }; main() { }" HAVE_C99_INITIALIZERS)
+IF(HAVE_C99_INITIALIZERS)
+  ADD_DEFINITIONS(-DHAVE_C99_INITIALIZERS)
+ENDIF()
+
+# Solaris atomics
+IF(CMAKE_SYSTEM_NAME STREQUAL "SunOS")
+  CHECK_FUNCTION_EXISTS(atomic_cas_ulong  HAVE_ATOMIC_CAS_ULONG)
+  CHECK_FUNCTION_EXISTS(atomic_cas_32 HAVE_ATOMIC_CAS_32)
+  CHECK_FUNCTION_EXISTS(atomic_cas_64 HAVE_ATOMIC_CAS_64)
+  CHECK_FUNCTION_EXISTS(atomic_add_long_nv HAVE_ATOMIC_ADD_LONG_NV)
+  CHECK_FUNCTION_EXISTS(atomic_swap_uchar HAVE_ATOMIC_SWAP_UCHAR)
+  IF(HAVE_ATOMIC_CAS_ULONG AND
+     HAVE_ATOMIC_CAS_32 AND
+     HAVE_ATOMIC_CAS_64 AND
+     HAVE_ATOMIC_ADD_LONG_NV AND
+     HAVE_ATOMIC_SWAP_UCHAR)
+    SET(HAVE_IB_SOLARIS_ATOMICS 1)
+  ENDIF()
+
+  IF(HAVE_IB_SOLARIS_ATOMICS)
+    ADD_DEFINITIONS(-DHAVE_IB_SOLARIS_ATOMICS=1)
+  ENDIF()
+
+  # either define HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS or not
+  CHECK_C_SOURCE_COMPILES(
+  "   #include <pthread.h>
+      #include <string.h>
+
+      int main(int argc, char** argv) {
+        pthread_t       x1;
+        pthread_t       x2;
+        pthread_t       x3;
+
+        memset(&x1, 0x0, sizeof(x1));
+        memset(&x2, 0x0, sizeof(x2));
+        memset(&x3, 0x0, sizeof(x3));
+
+        if (sizeof(pthread_t) == 4) {
+
+          atomic_cas_32(&x1, x2, x3);
+
+        } else if (sizeof(pthread_t) == 8) {
+
+          atomic_cas_64(&x1, x2, x3);
+
+        } else {
+
+          return(1);
+        }
+
+      return(0);
+    }
+  " HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS)
+  CHECK_C_SOURCE_COMPILES(
+  "#include <mbarrier.h>
+  int main() {
+    __machine_r_barrier();
+    __machine_w_barrier();
+    return(0);
+  }"
+  HAVE_IB_MACHINE_BARRIER_SOLARIS)
+
+  IF(HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS)
+    ADD_DEFINITIONS(-DHAVE_IB_ATOMIC_PTHREAD_T_SOLARIS=1)
+  ENDIF()
+  IF(HAVE_IB_MACHINE_BARRIER_SOLARIS)
+    ADD_DEFINITIONS(-DHAVE_IB_MACHINE_BARRIER_SOLARIS=1)
+  ENDIF()
+ENDIF()
+
+
+IF(UNIX)
+# this is needed to know which one of atomic_cas_32() or atomic_cas_64()
+# to use in the source
+SET(CMAKE_EXTRA_INCLUDE_FILES pthread.h)
+CHECK_TYPE_SIZE(pthread_t SIZEOF_PTHREAD_T)
+SET(CMAKE_EXTRA_INCLUDE_FILES)
+ENDIF()
+
+IF(SIZEOF_PTHREAD_T)
+  ADD_DEFINITIONS(-DSIZEOF_PTHREAD_T=${SIZEOF_PTHREAD_T})
+ENDIF()
+
+IF(MSVC)
+  ADD_DEFINITIONS(-DHAVE_WINDOWS_ATOMICS)
+  ADD_DEFINITIONS(-DHAVE_WINDOWS_MM_FENCE)
+ENDIF()
+
+SET(MUTEXTYPE "event" CACHE STRING "Mutex type: event, sys or futex")
+
+IF(MUTEXTYPE MATCHES "event")
+  ADD_DEFINITIONS(-DMUTEX_EVENT)
+ELSEIF(MUTEXTYPE MATCHES "futex" AND DEFINED HAVE_IB_LINUX_FUTEX)
+  ADD_DEFINITIONS(-DMUTEX_FUTEX)
+ELSE()
+   ADD_DEFINITIONS(-DMUTEX_SYS)
+ENDIF()
+ 
+# Include directories under innobase
+INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/storage/innobase/include
+		    ${CMAKE_SOURCE_DIR}/storage/innobase/handler)
+
+# Sun Studio bug with -xO2
+IF(CMAKE_CXX_COMPILER_ID MATCHES "SunPro"
+	AND CMAKE_CXX_FLAGS_RELEASE MATCHES "O2"
+	AND NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
+	# Sun Studio 12 crashes with -xO2 flag, but not with higher optimization
+	# -xO3
+	SET_SOURCE_FILES_PROPERTIES(${CMAKE_CURRENT_SOURCE_DIR}/rem/rem0rec.cc
+    PROPERTIES COMPILE_FLAGS -xO3)
+ENDIF()
+
+# Removing compiler optimizations for innodb/mem/* files on 64-bit Windows
+# due to 64-bit compiler error, See MySQL Bug #19424, #36366, #34297
+IF (MSVC AND CMAKE_SIZEOF_VOID_P EQUAL 8)
+	SET_SOURCE_FILES_PROPERTIES(mem/mem0mem.cc mem/mem0pool.cc
+				    PROPERTIES COMPILE_FLAGS -Od)
+ENDIF()
+
+IF(MSVC)
+  # Avoid "unreferenced label" warning in generated file
+  GET_FILENAME_COMPONENT(_SRC_DIR ${CMAKE_CURRENT_LIST_FILE} PATH)
+  SET_SOURCE_FILES_PROPERTIES(${_SRC_DIR}/pars/pars0grm.c
+          PROPERTIES COMPILE_FLAGS "/wd4102")
+  SET_SOURCE_FILES_PROPERTIES(${_SRC_DIR}/pars/lexyy.c
+          PROPERTIES COMPILE_FLAGS "/wd4003")
+ENDIF()
+      
+# Include directories under innobase
+INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/storage/innobase/include
+		    ${CMAKE_SOURCE_DIR}/storage/innobase/handler
+                    ${CMAKE_SOURCE_DIR}/libbinlogevents/include )
+
diff --git a/storage/innobase/lock/lock0iter.cc b/storage/innobase/lock/lock0iter.cc
index b424d2fc757..db30bcf1ca6 100644
--- a/storage/innobase/lock/lock0iter.cc
+++ b/storage/innobase/lock/lock0iter.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2007, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -27,11 +27,10 @@ Created July 16, 2007 Vasil Dimov
 #define LOCK_MODULE_IMPLEMENTATION
 
 #include "univ.i"
+#include "dict0mem.h"
 #include "lock0iter.h"
 #include "lock0lock.h"
 #include "lock0priv.h"
-#include "ut0dbg.h"
-#include "ut0lst.h"
 
 /*******************************************************************//**
 Initialize lock queue iterator so that it starts to iterate from
@@ -42,7 +41,6 @@ record is stored. It can be undefined (ULINT_UNDEFINED) in two cases:
    bit_no is calculated in this function by using
    lock_rec_find_set_bit(). There is exactly one bit set in the bitmap
    of a wait lock. */
-UNIV_INTERN
 void
 lock_queue_iterator_reset(
 /*======================*/
@@ -78,8 +76,7 @@ lock_queue_iterator_reset(
 Gets the previous lock in the lock queue, returns NULL if there are no
 more locks (i.e. the current lock is the first one). The iterator is
 receded (if not-NULL is returned).
-@return	previous lock or NULL */
-UNIV_INTERN
+@return previous lock or NULL */
 const lock_t*
 lock_queue_iterator_get_prev(
 /*=========================*/
diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc
index e045470d1fd..8f65966f1cb 100644
--- a/storage/innobase/lock/lock0lock.cc
+++ b/storage/innobase/lock/lock0lock.cc
@@ -26,6 +26,11 @@ Created 5/7/1996 Heikki Tuuri
 
 #define LOCK_MODULE_IMPLEMENTATION
 
+
+#include "ha_prototypes.h"
+
+#include <mysql/service_thd_error_context.h>
+
 #include "lock0lock.h"
 #include "lock0priv.h"
 
@@ -34,373 +39,36 @@ Created 5/7/1996 Heikki Tuuri
 #include "lock0priv.ic"
 #endif
 
-#include "ha_prototypes.h"
+#include "dict0mem.h"
 #include "usr0sess.h"
 #include "trx0purge.h"
-#include "dict0mem.h"
-#include "dict0boot.h"
 #include "trx0sys.h"
-#include "pars0pars.h" /* pars_complete_graph_for_exec() */
-#include "que0que.h" /* que_node_get_parent() */
-#include "row0mysql.h" /* row_mysql_handle_errors() */
-#include "row0sel.h" /* sel_node_create(), sel_node_t */
-#include "row0types.h" /* sel_node_t */
 #include "srv0mon.h"
 #include "ut0vec.h"
 #include "btr0btr.h"
 #include "dict0boot.h"
+#include "ut0new.h"
+#include "row0sel.h"
+#include "row0mysql.h"
+#include "pars0pars.h"
+
 #include <set>
-#include "mysql/plugin.h"
 
+#ifdef WITH_WSREP
 #include <mysql/service_wsrep.h>
+#endif /* WITH_WSREP */
 
-#include <string>
-#include <sstream>
+/** Total number of cached record locks */
+static const ulint	REC_LOCK_CACHE = 8;
 
-/* Restricts the length of search we will do in the waits-for
-graph of transactions */
-#define LOCK_MAX_N_STEPS_IN_DEADLOCK_CHECK 1000000
+/** Maximum record lock size in bytes */
+static const ulint	REC_LOCK_SIZE = sizeof(ib_lock_t) + 256;
 
-/* Restricts the search depth we will do in the waits-for graph of
-transactions */
-#define LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK 200
+/** Total number of cached table locks */
+static const ulint	TABLE_LOCK_CACHE = 8;
 
-/* When releasing transaction locks, this specifies how often we release
-the lock mutex for a moment to give also others access to it */
-
-#define LOCK_RELEASE_INTERVAL		1000
-
-/* Safety margin when creating a new record lock: this many extra records
-can be inserted to the page without need to create a lock with a bigger
-bitmap */
-
-#define LOCK_PAGE_BITMAP_MARGIN		64
-
-/* An explicit record lock affects both the record and the gap before it.
-An implicit x-lock does not affect the gap, it only locks the index
-record from read or update.
-
-If a transaction has modified or inserted an index record, then
-it owns an implicit x-lock on the record. On a secondary index record,
-a transaction has an implicit x-lock also if it has modified the
-clustered index record, the max trx id of the page where the secondary
-index record resides is >= trx id of the transaction (or database recovery
-is running), and there are no explicit non-gap lock requests on the
-secondary index record.
-
-This complicated definition for a secondary index comes from the
-implementation: we want to be able to determine if a secondary index
-record has an implicit x-lock, just by looking at the present clustered
-index record, not at the historical versions of the record. The
-complicated definition can be explained to the user so that there is
-nondeterminism in the access path when a query is answered: we may,
-or may not, access the clustered index record and thus may, or may not,
-bump into an x-lock set there.
-
-Different transaction can have conflicting locks set on the gap at the
-same time. The locks on the gap are purely inhibitive: an insert cannot
-be made, or a select cursor may have to wait if a different transaction
-has a conflicting lock on the gap. An x-lock on the gap does not give
-the right to insert into the gap.
-
-An explicit lock can be placed on a user record or the supremum record of
-a page. The locks on the supremum record are always thought to be of the gap
-type, though the gap bit is not set. When we perform an update of a record
-where the size of the record changes, we may temporarily store its explicit
-locks on the infimum record of the page, though the infimum otherwise never
-carries locks.
-
-A waiting record lock can also be of the gap type. A waiting lock request
-can be granted when there is no conflicting mode lock request by another
-transaction ahead of it in the explicit lock queue.
-
-In version 4.0.5 we added yet another explicit lock type: LOCK_REC_NOT_GAP.
-It only locks the record it is placed on, not the gap before the record.
-This lock type is necessary to emulate an Oracle-like READ COMMITTED isolation
-level.
-
--------------------------------------------------------------------------
-RULE 1: If there is an implicit x-lock on a record, and there are non-gap
--------
-lock requests waiting in the queue, then the transaction holding the implicit
-x-lock also has an explicit non-gap record x-lock. Therefore, as locks are
-released, we can grant locks to waiting lock requests purely by looking at
-the explicit lock requests in the queue.
-
-RULE 3: Different transactions cannot have conflicting granted non-gap locks
--------
-on a record at the same time. However, they can have conflicting granted gap
-locks.
-RULE 4: If a there is a waiting lock request in a queue, no lock request,
--------
-gap or not, can be inserted ahead of it in the queue. In record deletes
-and page splits new gap type locks can be created by the database manager
-for a transaction, and without rule 4, the waits-for graph of transactions
-might become cyclic without the database noticing it, as the deadlock check
-is only performed when a transaction itself requests a lock!
--------------------------------------------------------------------------
-
-An insert is allowed to a gap if there are no explicit lock requests by
-other transactions on the next record. It does not matter if these lock
-requests are granted or waiting, gap bit set or not, with the exception
-that a gap type request set by another transaction to wait for
-its turn to do an insert is ignored. On the other hand, an
-implicit x-lock by another transaction does not prevent an insert, which
-allows for more concurrency when using an Oracle-style sequence number
-generator for the primary key with many transactions doing inserts
-concurrently.
-
-A modify of a record is allowed if the transaction has an x-lock on the
-record, or if other transactions do not have any non-gap lock requests on the
-record.
-
-A read of a single user record with a cursor is allowed if the transaction
-has a non-gap explicit, or an implicit lock on the record, or if the other
-transactions have no x-lock requests on the record. At a page supremum a
-read is always allowed.
-
-In summary, an implicit lock is seen as a granted x-lock only on the
-record, not on the gap. An explicit lock with no gap bit set is a lock
-both on the record and the gap. If the gap bit is set, the lock is only
-on the gap. Different transaction cannot own conflicting locks on the
-record at the same time, but they may own conflicting locks on the gap.
-Granted locks on a record give an access right to the record, but gap type
-locks just inhibit operations.
-
-NOTE: Finding out if some transaction has an implicit x-lock on a secondary
-index record can be cumbersome. We may have to look at previous versions of
-the corresponding clustered index record to find out if a delete marked
-secondary index record was delete marked by an active transaction, not by
-a committed one.
-
-FACT A: If a transaction has inserted a row, it can delete it any time
-without need to wait for locks.
-
-PROOF: The transaction has an implicit x-lock on every index record inserted
-for the row, and can thus modify each record without the need to wait. Q.E.D.
-
-FACT B: If a transaction has read some result set with a cursor, it can read
-it again, and retrieves the same result set, if it has not modified the
-result set in the meantime. Hence, there is no phantom problem. If the
-biggest record, in the alphabetical order, touched by the cursor is removed,
-a lock wait may occur, otherwise not.
-
-PROOF: When a read cursor proceeds, it sets an s-lock on each user record
-it passes, and a gap type s-lock on each page supremum. The cursor must
-wait until it has these locks granted. Then no other transaction can
-have a granted x-lock on any of the user records, and therefore cannot
-modify the user records. Neither can any other transaction insert into
-the gaps which were passed over by the cursor. Page splits and merges,
-and removal of obsolete versions of records do not affect this, because
-when a user record or a page supremum is removed, the next record inherits
-its locks as gap type locks, and therefore blocks inserts to the same gap.
-Also, if a page supremum is inserted, it inherits its locks from the successor
-record. When the cursor is positioned again at the start of the result set,
-the records it will touch on its course are either records it touched
-during the last pass or new inserted page supremums. It can immediately
-access all these records, and when it arrives at the biggest record, it
-notices that the result set is complete. If the biggest record was removed,
-lock wait can occur because the next record only inherits a gap type lock,
-and a wait may be needed. Q.E.D. */
-
-/* If an index record should be changed or a new inserted, we must check
-the lock on the record or the next. When a read cursor starts reading,
-we will set a record level s-lock on each record it passes, except on the
-initial record on which the cursor is positioned before we start to fetch
-records. Our index tree search has the convention that the B-tree
-cursor is positioned BEFORE the first possibly matching record in
-the search. Optimizations are possible here: if the record is searched
-on an equality condition to a unique key, we could actually set a special
-lock on the record, a lock which would not prevent any insert before
-this record. In the next key locking an x-lock set on a record also
-prevents inserts just before that record.
-	There are special infimum and supremum records on each page.
-A supremum record can be locked by a read cursor. This records cannot be
-updated but the lock prevents insert of a user record to the end of
-the page.
-	Next key locks will prevent the phantom problem where new rows
-could appear to SELECT result sets after the select operation has been
-performed. Prevention of phantoms ensures the serilizability of
-transactions.
-	What should we check if an insert of a new record is wanted?
-Only the lock on the next record on the same page, because also the
-supremum record can carry a lock. An s-lock prevents insertion, but
-what about an x-lock? If it was set by a searched update, then there
-is implicitly an s-lock, too, and the insert should be prevented.
-What if our transaction owns an x-lock to the next record, but there is
-a waiting s-lock request on the next record? If this s-lock was placed
-by a read cursor moving in the ascending order in the index, we cannot
-do the insert immediately, because when we finally commit our transaction,
-the read cursor should see also the new inserted record. So we should
-move the read cursor backward from the next record for it to pass over
-the new inserted record. This move backward may be too cumbersome to
-implement. If we in this situation just enqueue a second x-lock request
-for our transaction on the next record, then the deadlock mechanism
-notices a deadlock between our transaction and the s-lock request
-transaction. This seems to be an ok solution.
-	We could have the convention that granted explicit record locks,
-lock the corresponding records from changing, and also lock the gaps
-before them from inserting. A waiting explicit lock request locks the gap
-before from inserting. Implicit record x-locks, which we derive from the
-transaction id in the clustered index record, only lock the record itself
-from modification, not the gap before it from inserting.
-	How should we store update locks? If the search is done by a unique
-key, we could just modify the record trx id. Otherwise, we could put a record
-x-lock on the record. If the update changes ordering fields of the
-clustered index record, the inserted new record needs no record lock in
-lock table, the trx id is enough. The same holds for a secondary index
-record. Searched delete is similar to update.
-
-PROBLEM:
-What about waiting lock requests? If a transaction is waiting to make an
-update to a record which another modified, how does the other transaction
-know to send the end-lock-wait signal to the waiting transaction? If we have
-the convention that a transaction may wait for just one lock at a time, how
-do we preserve it if lock wait ends?
-
-PROBLEM:
-Checking the trx id label of a secondary index record. In the case of a
-modification, not an insert, is this necessary? A secondary index record
-is modified only by setting or resetting its deleted flag. A secondary index
-record contains fields to uniquely determine the corresponding clustered
-index record. A secondary index record is therefore only modified if we
-also modify the clustered index record, and the trx id checking is done
-on the clustered index record, before we come to modify the secondary index
-record. So, in the case of delete marking or unmarking a secondary index
-record, we do not have to care about trx ids, only the locks in the lock
-table must be checked. In the case of a select from a secondary index, the
-trx id is relevant, and in this case we may have to search the clustered
-index record.
-
-PROBLEM: How to update record locks when page is split or merged, or
---------------------------------------------------------------------
-a record is deleted or updated?
-If the size of fields in a record changes, we perform the update by
-a delete followed by an insert. How can we retain the locks set or
-waiting on the record? Because a record lock is indexed in the bitmap
-by the heap number of the record, when we remove the record from the
-record list, it is possible still to keep the lock bits. If the page
-is reorganized, we could make a table of old and new heap numbers,
-and permute the bitmaps in the locks accordingly. We can add to the
-table a row telling where the updated record ended. If the update does
-not require a reorganization of the page, we can simply move the lock
-bits for the updated record to the position determined by its new heap
-number (we may have to allocate a new lock, if we run out of the bitmap
-in the old one).
-	A more complicated case is the one where the reinsertion of the
-updated record is done pessimistically, because the structure of the
-tree may change.
-
-PROBLEM: If a supremum record is removed in a page merge, or a record
----------------------------------------------------------------------
-removed in a purge, what to do to the waiting lock requests? In a split to
-the right, we just move the lock requests to the new supremum. If a record
-is removed, we could move the waiting lock request to its inheritor, the
-next record in the index. But, the next record may already have lock
-requests on its own queue. A new deadlock check should be made then. Maybe
-it is easier just to release the waiting transactions. They can then enqueue
-new lock requests on appropriate records.
-
-PROBLEM: When a record is inserted, what locks should it inherit from the
--------------------------------------------------------------------------
-upper neighbor? An insert of a new supremum record in a page split is
-always possible, but an insert of a new user record requires that the upper
-neighbor does not have any lock requests by other transactions, granted or
-waiting, in its lock queue. Solution: We can copy the locks as gap type
-locks, so that also the waiting locks are transformed to granted gap type
-locks on the inserted record. */
-
-#define LOCK_STACK_SIZE		OS_THREAD_MAX_N
-
-/* LOCK COMPATIBILITY MATRIX
- *    IS IX S  X  AI
- * IS +	 +  +  -  +
- * IX +	 +  -  -  +
- * S  +	 -  +  -  -
- * X  -	 -  -  -  -
- * AI +	 +  -  -  -
- *
- * Note that for rows, InnoDB only acquires S or X locks.
- * For tables, InnoDB normally acquires IS or IX locks.
- * S or X table locks are only acquired for LOCK TABLES.
- * Auto-increment (AI) locks are needed because of
- * statement-level MySQL binlog.
- * See also lock_mode_compatible().
- */
-static const byte lock_compatibility_matrix[5][5] = {
- /**         IS     IX       S     X       AI */
- /* IS */ {  TRUE,  TRUE,  TRUE,  FALSE,  TRUE},
- /* IX */ {  TRUE,  TRUE,  FALSE, FALSE,  TRUE},
- /* S  */ {  TRUE,  FALSE, TRUE,  FALSE,  FALSE},
- /* X  */ {  FALSE, FALSE, FALSE, FALSE,  FALSE},
- /* AI */ {  TRUE,  TRUE,  FALSE, FALSE,  FALSE}
-};
-
-/* STRONGER-OR-EQUAL RELATION (mode1=row, mode2=column)
- *    IS IX S  X  AI
- * IS +  -  -  -  -
- * IX +  +  -  -  -
- * S  +  -  +  -  -
- * X  +  +  +  +  +
- * AI -  -  -  -  +
- * See lock_mode_stronger_or_eq().
- */
-static const byte lock_strength_matrix[5][5] = {
- /**         IS     IX       S     X       AI */
- /* IS */ {  TRUE,  FALSE, FALSE,  FALSE, FALSE},
- /* IX */ {  TRUE,  TRUE,  FALSE, FALSE,  FALSE},
- /* S  */ {  TRUE,  FALSE, TRUE,  FALSE,  FALSE},
- /* X  */ {  TRUE,  TRUE,  TRUE,  TRUE,   TRUE},
- /* AI */ {  FALSE, FALSE, FALSE, FALSE,  TRUE}
-};
-
-/** Deadlock check context. */
-struct lock_deadlock_ctx_t {
-	const trx_t*	start;		/*!< Joining transaction that is
-					requesting a lock in an incompatible
-					mode */
-
-	const lock_t*	wait_lock;	/*!< Lock that trx wants */
-
-	ib_uint64_t	mark_start;	/*!<  Value of lock_mark_count at
-					the start of the deadlock check. */
-
-	ulint		depth;		/*!< Stack depth */
-
-	ulint		cost;		/*!< Calculation steps thus far */
-
-	ibool		too_deep;	/*!< TRUE if search was too deep and
-					was aborted */
-};
-
-/** DFS visited node information used during deadlock checking. */
-struct lock_stack_t {
-	const lock_t*	lock;			/*!< Current lock */
-	const lock_t*	wait_lock;		/*!< Waiting for lock */
-	ulint		heap_no;		/*!< heap number if rec lock */
-};
-
-extern "C" void thd_report_wait_for(MYSQL_THD thd, MYSQL_THD other_thd);
-extern "C" int thd_need_wait_for(const MYSQL_THD thd);
-extern "C"
-int thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd);
-
-/** Stack to use during DFS search. Currently only a single stack is required
-because there is no parallel deadlock check. This stack is protected by
-the lock_sys_t::mutex. */
-static lock_stack_t*	lock_stack;
-
-#ifdef UNIV_DEBUG
-/** The count of the types of locks. */
-static const ulint	lock_types = UT_ARR_SIZE(lock_compatibility_matrix);
-#endif /* UNIV_DEBUG */
-
-#ifdef UNIV_PFS_MUTEX
-/* Key to register mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t	lock_sys_mutex_key;
-/* Key to register mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t	lock_sys_wait_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
+/** Size in bytes, of the table lock instance */
+static const ulint	TABLE_LOCK_SIZE = sizeof(ib_lock_t);
 
 /* Buffer to collect THDs to report waits for. */
 struct thd_wait_reports {
@@ -409,13 +77,224 @@ struct thd_wait_reports {
 	trx_t *waitees[64];		/*!< Trxs for thd_report_wait_for() */
 };
 
+extern "C" void thd_report_wait_for(MYSQL_THD thd, MYSQL_THD other_thd);
+extern "C" int thd_need_wait_for(const MYSQL_THD thd);
+extern "C" int thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd);
+
+/** Deadlock checker. */
+class DeadlockChecker {
+public:
+	/** Checks if a joining lock request results in a deadlock. If
+	a deadlock is found this function will resolve the deadlock
+	by choosing a victim transaction and rolling it back. It
+	will attempt to resolve all deadlocks. The returned transaction
+	id will be the joining transaction id or 0 if some other
+	transaction was chosen as a victim and rolled back or no
+	deadlock found.
+
+	@param lock lock the transaction is requesting
+	@param trx transaction requesting the lock
+
+	@return id of transaction chosen as victim or 0 */
+	static const trx_t* check_and_resolve(
+		const lock_t*	lock,
+		const trx_t*	trx);
+
+private:
+	/** Do a shallow copy. Default destructor OK.
+	@param trx the start transaction (start node)
+	@param wait_lock lock that a transaction wants
+	@param mark_start visited node counter */
+	DeadlockChecker(
+		const trx_t*	trx,
+		const lock_t*	wait_lock,
+		ib_uint64_t	mark_start,
+		struct thd_wait_reports* waitee_buf_ptr)
+		:
+		m_cost(),
+		m_start(trx),
+		m_too_deep(),
+		m_wait_lock(wait_lock),
+		m_mark_start(mark_start),
+		m_n_elems(),
+		m_waitee_ptr(waitee_buf_ptr)
+	{
+	}
+
+	/** Check if the search is too deep. */
+	bool is_too_deep() const
+	{
+		return(m_n_elems > LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK
+		       || m_cost > LOCK_MAX_N_STEPS_IN_DEADLOCK_CHECK);
+	}
+
+	/** Save current state.
+	@param lock lock to push on the stack.
+	@param heap_no the heap number to push on the stack.
+	@return false if stack is full. */
+	bool push(const lock_t*	lock, ulint heap_no)
+	{
+		ut_ad((lock_get_type_low(lock) & LOCK_REC)
+		      || (lock_get_type_low(lock) & LOCK_TABLE));
+
+		ut_ad(((lock_get_type_low(lock) & LOCK_TABLE) != 0)
+		      == (heap_no == ULINT_UNDEFINED));
+
+		/* Ensure that the stack is bounded. */
+		if (m_n_elems >= UT_ARR_SIZE(s_states)) {
+			return(false);
+		}
+
+		state_t&	state = s_states[m_n_elems++];
+
+		state.m_lock = lock;
+		state.m_wait_lock = m_wait_lock;
+		state.m_heap_no =heap_no;
+
+		return(true);
+	}
+
+	/** Restore state.
+	@param[out] lock current lock
+	@param[out] heap_no current heap_no */
+	void pop(const lock_t*& lock, ulint& heap_no)
+	{
+		ut_a(m_n_elems > 0);
+
+		const state_t&	state = s_states[--m_n_elems];
+
+		lock = state.m_lock;
+		heap_no = state.m_heap_no;
+		m_wait_lock = state.m_wait_lock;
+	}
+
+	/** Check whether the node has been visited.
+	@param lock lock to check
+	@return true if the node has been visited */
+	bool is_visited(const lock_t* lock) const
+	{
+		return(lock->trx->lock.deadlock_mark > m_mark_start);
+	}
+
+	/** Get the next lock in the queue that is owned by a transaction
+	whose sub-tree has not already been searched.
+	Note: "next" here means PREV for table locks.
+	@param lock Lock in queue
+	@param heap_no heap_no if lock is a record lock else ULINT_UNDEFINED
+	@return next lock or NULL if at end of queue */
+	const lock_t* get_next_lock(const lock_t* lock, ulint heap_no) const;
+
+	/** Get the first lock to search. The search starts from the current
+	wait_lock. What we are really interested in is an edge from the
+	current wait_lock's owning transaction to another transaction that has
+	a lock ahead in the queue. We skip locks where the owning transaction's
+	sub-tree has already been searched.
+
+	Note: The record locks are traversed from the oldest lock to the
+	latest. For table locks we go from latest to oldest.
+
+	For record locks, we first position the iterator on first lock on
+	the page and then reposition on the actual heap_no. This is required
+	due to the way the record lock has is implemented.
+
+	@param[out] heap_no if rec lock, else ULINT_UNDEFINED.
+
+	@return first lock or NULL */
+	const lock_t* get_first_lock(ulint* heap_no) const;
+
+	/** Notify that a deadlock has been detected and print the conflicting
+	transaction info.
+	@param lock lock causing deadlock */
+	void notify(const lock_t* lock) const;
+
+	/** Select the victim transaction that should be rolledback.
+	@return victim transaction */
+	const trx_t* select_victim() const;
+
+	/** Rollback transaction selected as the victim. */
+	void trx_rollback();
+
+	/** Looks iteratively for a deadlock. Note: the joining transaction
+	may have been granted its lock by the deadlock checks.
+
+	@return 0 if no deadlock else the victim transaction.*/
+	const trx_t* search();
+
+	/** Print transaction data to the deadlock file and possibly to stderr.
+	@param trx transaction
+	@param max_query_len max query length to print */
+	static void print(const trx_t* trx, ulint max_query_len);
+
+	/** rewind(3) the file used for storing the latest detected deadlock
+	and print a heading message to stderr if printing of all deadlocks to
+	stderr is enabled. */
+	static void start_print();
+
+	/** Print lock data to the deadlock file and possibly to stderr.
+	@param lock record or table type lock */
+	static void print(const lock_t* lock);
+
+	/** Print a message to the deadlock file and possibly to stderr.
+	@param msg message to print */
+	static void print(const char* msg);
+
+	/** Print info about transaction that was rolled back.
+	@param trx transaction rolled back
+	@param lock lock trx wants */
+	static void rollback_print(const trx_t* trx, const lock_t* lock);
+
+private:
+	/** DFS state information, used during deadlock checking. */
+	struct state_t {
+		const lock_t*	m_lock;		/*!< Current lock */
+		const lock_t*	m_wait_lock;	/*!< Waiting for lock */
+		ulint		m_heap_no;	/*!< heap number if rec lock */
+	};
+
+	/** Used in deadlock tracking. Protected by lock_sys->mutex. */
+	static ib_uint64_t	s_lock_mark_counter;
+
+	/** Calculation steps thus far. It is the count of the nodes visited. */
+	ulint			m_cost;
+
+	/** Joining transaction that is requesting a lock in an
+	incompatible mode */
+	const trx_t*		m_start;
+
+	/** TRUE if search was too deep and was aborted */
+	bool			m_too_deep;
+
+	/** Lock that trx wants */
+	const lock_t*		m_wait_lock;
+
+	/**  Value of lock_mark_count at the start of the deadlock check. */
+	ib_uint64_t		m_mark_start;
+
+	/** Number of states pushed onto the stack */
+	size_t			m_n_elems;
+
+	/** This is to avoid malloc/free calls. */
+	static state_t		s_states[MAX_STACK_SIZE];
+
+	/** Buffer to collect THDs to report waits for. */
+	struct thd_wait_reports* m_waitee_ptr;
+};
+
+/** Counter to mark visited nodes during deadlock search. */
+ib_uint64_t	DeadlockChecker::s_lock_mark_counter = 0;
+
+/** The stack used for deadlock searches. */
+DeadlockChecker::state_t	DeadlockChecker::s_states[MAX_STACK_SIZE];
+
+extern "C" void thd_report_wait_for(MYSQL_THD thd, MYSQL_THD other_thd);
+extern "C" int thd_need_wait_for(const MYSQL_THD thd);
+extern "C"
+int thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd);
 
 #ifdef UNIV_DEBUG
-UNIV_INTERN ibool	lock_print_waits	= FALSE;
-
 /*********************************************************************//**
 Validates the lock system.
-@return	TRUE if ok */
+@return TRUE if ok */
 static
 bool
 lock_validate();
@@ -423,67 +302,27 @@ lock_validate();
 
 /*********************************************************************//**
 Validates the record lock queues on a page.
-@return	TRUE if ok */
+@return TRUE if ok */
 static
 ibool
 lock_rec_validate_page(
 /*===================*/
 	const buf_block_t*	block)	/*!< in: buffer block */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
+	MY_ATTRIBUTE((warn_unused_result));
 #endif /* UNIV_DEBUG */
 
 /* The lock system */
-UNIV_INTERN lock_sys_t*	lock_sys	= NULL;
+lock_sys_t*	lock_sys	= NULL;
 
 /** We store info on the latest deadlock error to this buffer. InnoDB
 Monitor will then fetch it and print */
-UNIV_INTERN ibool	lock_deadlock_found = FALSE;
+bool	lock_deadlock_found = false;
+
 /** Only created if !srv_read_only_mode */
 static FILE*		lock_latest_err_file;
 
-/********************************************************************//**
-Checks if a joining lock request results in a deadlock. If a deadlock is
-found this function will resolve the dadlock by choosing a victim transaction
-and rolling it back. It will attempt to resolve all deadlocks. The returned
-transaction id will be the joining transaction id or 0 if some other
-transaction was chosen as a victim and rolled back or no deadlock found.
-
-@return id of transaction chosen as victim or 0 */
-static
-trx_id_t
-lock_deadlock_check_and_resolve(
-/*===========================*/
-	const lock_t*	lock,	/*!< in: lock the transaction is requesting */
-	const trx_t*	trx);	/*!< in: transaction */
-
-/*********************************************************************//**
-Gets the nth bit of a record lock.
-@return	TRUE if bit set also if i == ULINT_UNDEFINED return FALSE*/
-UNIV_INLINE
-ibool
-lock_rec_get_nth_bit(
-/*=================*/
-	const lock_t*	lock,	/*!< in: record lock */
-	ulint		i)	/*!< in: index of the bit */
-{
-	const byte*	b;
-
-	ut_ad(lock);
-	ut_ad(lock_get_type_low(lock) == LOCK_REC);
-
-	if (i >= lock->un_member.rec_lock.n_bits) {
-
-		return(FALSE);
-	}
-
-	b = ((const byte*) &lock[1]) + (i / 8);
-
-	return(1 & *b >> (i % 8));
-}
-
 /*********************************************************************//**
 Reports that a transaction id is insensible, i.e., in the future. */
-UNIV_INTERN
 void
 lock_report_trx_id_insanity(
 /*========================*/
@@ -493,27 +332,22 @@ lock_report_trx_id_insanity(
 	const ulint*	offsets,	/*!< in: rec_get_offsets(rec, index) */
 	trx_id_t	max_trx_id)	/*!< in: trx_sys_get_max_trx_id() */
 {
-	ut_print_timestamp(stderr);
-	fputs("  InnoDB: Error: transaction id associated with record\n",
-	      stderr);
-	rec_print_new(stderr, rec, offsets);
-	fputs("InnoDB: in ", stderr);
-	dict_index_name_print(stderr, NULL, index);
-	fprintf(stderr, "\n"
-		"InnoDB: is " TRX_ID_FMT " which is higher than the"
-		" global trx id counter " TRX_ID_FMT "!\n"
-		"InnoDB: The table is corrupt. You have to do"
-		" dump + drop + reimport.\n",
-		trx_id, max_trx_id);
+	ib::error()
+		<< "Transaction id " << trx_id
+		<< " associated with record" << rec_offsets_print(rec, offsets)
+		<< " in index " << index->name
+		<< " of table " << index->table->name
+		<< " is greater than the global counter " << max_trx_id
+		<< "! The table is corrupted.";
 }
 
 /*********************************************************************//**
 Checks that a transaction id is sensible, i.e., not in the future.
-@return	true if ok */
+@return true if ok */
 #ifdef UNIV_DEBUG
-UNIV_INTERN
+
 #else
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
+static MY_ATTRIBUTE((warn_unused_result))
 #endif
 bool
 lock_check_trx_id_sanity(
@@ -523,17 +357,14 @@ lock_check_trx_id_sanity(
 	dict_index_t*	index,		/*!< in: index */
 	const ulint*	offsets)	/*!< in: rec_get_offsets(rec, index) */
 {
-	bool		is_ok;
-	trx_id_t	max_trx_id;
-
 	ut_ad(rec_offs_validate(rec, index, offsets));
 
-	max_trx_id = trx_sys_get_max_trx_id();
-	is_ok = trx_id < max_trx_id;
+	trx_id_t	max_trx_id = trx_sys_get_max_trx_id();
+	bool		is_ok = trx_id < max_trx_id;
 
-	if (UNIV_UNLIKELY(!is_ok)) {
-		lock_report_trx_id_insanity(trx_id,
-					    rec, index, offsets, max_trx_id);
+	if (!is_ok) {
+		lock_report_trx_id_insanity(
+			trx_id, rec, index, offsets, max_trx_id);
 	}
 
 	return(is_ok);
@@ -543,7 +374,6 @@ lock_check_trx_id_sanity(
 Checks that a record is seen in a consistent read.
 @return true if sees, or false if an earlier version of the record
 should be retrieved */
-UNIV_INTERN
 bool
 lock_clust_rec_cons_read_sees(
 /*==========================*/
@@ -551,20 +381,27 @@ lock_clust_rec_cons_read_sees(
 				passed over by a read cursor */
 	dict_index_t*	index,	/*!< in: clustered index */
 	const ulint*	offsets,/*!< in: rec_get_offsets(rec, index) */
-	read_view_t*	view)	/*!< in: consistent read view */
+	ReadView*	view)	/*!< in: consistent read view */
 {
-	trx_id_t	trx_id;
-
 	ut_ad(dict_index_is_clust(index));
 	ut_ad(page_rec_is_user_rec(rec));
 	ut_ad(rec_offs_validate(rec, index, offsets));
 
+	/* Temp-tables are not shared across connections and multiple
+	transactions from different connections cannot simultaneously
+	operate on same temp-table and so read of temp-table is
+	always consistent read. */
+	if (srv_read_only_mode || dict_table_is_temporary(index->table)) {
+		ut_ad(view == 0 || dict_table_is_temporary(index->table));
+		return(true);
+	}
+
 	/* NOTE that we call this function while holding the search
 	system latch. */
 
-	trx_id = row_get_rec_trx_id(rec, index, offsets);
+	trx_id_t	trx_id = row_get_rec_trx_id(rec, index, offsets);
 
-	return(read_view_sees_trx_id(view, trx_id));
+	return(view->changes_visible(trx_id, index->table->name));
 }
 
 /*********************************************************************//**
@@ -577,17 +414,15 @@ record.
 
 @return true if certainly sees, or false if an earlier version of the
 clustered index record might be needed */
-UNIV_INTERN
 bool
 lock_sec_rec_cons_read_sees(
 /*========================*/
 	const rec_t*		rec,	/*!< in: user record which
 					should be read or passed over
 					by a read cursor */
-	const read_view_t*	view)	/*!< in: consistent read view */
+	const dict_index_t*	index,	/*!< in: index */
+	const ReadView*	view)	/*!< in: consistent read view */
 {
-	trx_id_t	max_trx_id;
-
 	ut_ad(page_rec_is_user_rec(rec));
 
 	/* NOTE that we might call this function while holding the search
@@ -596,17 +431,26 @@ lock_sec_rec_cons_read_sees(
 	if (recv_recovery_is_on()) {
 
 		return(false);
+
+	} else if (dict_table_is_temporary(index->table)) {
+
+		/* Temp-tables are not shared across connections and multiple
+		transactions from different connections cannot simultaneously
+		operate on same temp-table and so read of temp-table is
+		always consistent read. */
+
+		return(true);
 	}
 
-	max_trx_id = page_get_max_trx_id(page_align(rec));
-	ut_ad(max_trx_id);
+	trx_id_t	max_trx_id = page_get_max_trx_id(page_align(rec));
 
-	return(max_trx_id < view->up_limit_id);
+	ut_ad(max_trx_id > 0);
+
+	return(view->sees(max_trx_id));
 }
 
 /*********************************************************************//**
 Creates the lock system at database start. */
-UNIV_INTERN
 void
 lock_sys_create(
 /*============*/
@@ -614,13 +458,9 @@ lock_sys_create(
 {
 	ulint	lock_sys_sz;
 
-	lock_sys_sz = sizeof(*lock_sys)
-		+ OS_THREAD_MAX_N * sizeof(srv_slot_t);
+	lock_sys_sz = sizeof(*lock_sys) + OS_THREAD_MAX_N * sizeof(srv_slot_t);
 
-	lock_sys = static_cast<lock_sys_t*>(mem_zalloc(lock_sys_sz));
-
-	lock_stack = static_cast<lock_stack_t*>(
-		mem_zalloc(sizeof(*lock_stack) * LOCK_STACK_SIZE));
+	lock_sys = static_cast<lock_sys_t*>(ut_zalloc_nokey(lock_sys_sz));
 
 	void*	ptr = &lock_sys[1];
 
@@ -628,14 +468,15 @@ lock_sys_create(
 
 	lock_sys->last_slot = lock_sys->waiting_threads;
 
-	mutex_create(lock_sys_mutex_key, &lock_sys->mutex, SYNC_LOCK_SYS);
+	mutex_create(LATCH_ID_LOCK_SYS, &lock_sys->mutex);
 
-	mutex_create(lock_sys_wait_mutex_key,
-		     &lock_sys->wait_mutex, SYNC_LOCK_WAIT_SYS);
+	mutex_create(LATCH_ID_LOCK_SYS_WAIT, &lock_sys->wait_mutex);
 
-	lock_sys->timeout_event = os_event_create();
+	lock_sys->timeout_event = os_event_create(0);
 
 	lock_sys->rec_hash = hash_create(n_cells);
+	lock_sys->prdt_hash = hash_create(n_cells);
+	lock_sys->prdt_page_hash = hash_create(n_cells);
 
 	if (!srv_read_only_mode) {
 		lock_latest_err_file = os_file_create_tmpfile(NULL);
@@ -643,9 +484,76 @@ lock_sys_create(
 	}
 }
 
+/** Calculates the fold value of a lock: used in migrating the hash table.
+@param[in]	lock	record lock object
+@return	folded value */
+static
+ulint
+lock_rec_lock_fold(
+	const lock_t*	lock)
+{
+	return(lock_rec_fold(lock->un_member.rec_lock.space,
+			     lock->un_member.rec_lock.page_no));
+}
+
+/** Resize the lock hash tables.
+@param[in]	n_cells	number of slots in lock hash table */
+void
+lock_sys_resize(
+	ulint	n_cells)
+{
+	hash_table_t*	old_hash;
+
+	lock_mutex_enter();
+
+	old_hash = lock_sys->rec_hash;
+	lock_sys->rec_hash = hash_create(n_cells);
+	HASH_MIGRATE(old_hash, lock_sys->rec_hash, lock_t, hash,
+		     lock_rec_lock_fold);
+	hash_table_free(old_hash);
+
+	old_hash = lock_sys->prdt_hash;
+	lock_sys->prdt_hash = hash_create(n_cells);
+	HASH_MIGRATE(old_hash, lock_sys->prdt_hash, lock_t, hash,
+		     lock_rec_lock_fold);
+	hash_table_free(old_hash);
+
+	old_hash = lock_sys->prdt_page_hash;
+	lock_sys->prdt_page_hash = hash_create(n_cells);
+	HASH_MIGRATE(old_hash, lock_sys->prdt_page_hash, lock_t, hash,
+		     lock_rec_lock_fold);
+	hash_table_free(old_hash);
+
+	/* need to update block->lock_hash_val */
+	for (ulint i = 0; i < srv_buf_pool_instances; ++i) {
+		buf_pool_t*	buf_pool = buf_pool_from_array(i);
+
+		buf_pool_mutex_enter(buf_pool);
+		buf_page_t*	bpage;
+		bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
+
+		while (bpage != NULL) {
+			if (buf_page_get_state(bpage)
+			    == BUF_BLOCK_FILE_PAGE) {
+				buf_block_t*	block;
+				block = reinterpret_cast<buf_block_t*>(
+					bpage);
+
+				block->lock_hash_val
+					= lock_rec_hash(
+						bpage->id.space(),
+						bpage->id.page_no());
+			}
+			bpage = UT_LIST_GET_NEXT(LRU, bpage);
+		}
+		buf_pool_mutex_exit(buf_pool);
+	}
+
+	lock_mutex_exit();
+}
+
 /*********************************************************************//**
 Closes the lock system at database shutdown. */
-UNIV_INTERN
 void
 lock_sys_close(void)
 /*================*/
@@ -656,21 +564,30 @@ lock_sys_close(void)
 	}
 
 	hash_table_free(lock_sys->rec_hash);
+	hash_table_free(lock_sys->prdt_hash);
+	hash_table_free(lock_sys->prdt_page_hash);
 
-	mutex_free(&lock_sys->mutex);
-	mutex_free(&lock_sys->wait_mutex);
+	os_event_destroy(lock_sys->timeout_event);
 
-	mem_free(lock_stack);
-	mem_free(lock_sys);
+	mutex_destroy(&lock_sys->mutex);
+	mutex_destroy(&lock_sys->wait_mutex);
+
+	srv_slot_t*	slot = lock_sys->waiting_threads;
+
+	for (ulint i = 0; i < OS_THREAD_MAX_N; i++, ++slot) {
+		if (slot->event != NULL) {
+			os_event_destroy(slot->event);
+		}
+	}
+
+	ut_free(lock_sys);
 
 	lock_sys = NULL;
-	lock_stack = NULL;
 }
 
 /*********************************************************************//**
 Gets the size of a lock struct.
-@return	size in bytes */
-UNIV_INTERN
+@return size in bytes */
 ulint
 lock_get_size(void)
 /*===============*/
@@ -678,34 +595,6 @@ lock_get_size(void)
 	return((ulint) sizeof(lock_t));
 }
 
-/*********************************************************************//**
-Gets the mode of a lock.
-@return	mode */
-UNIV_INLINE
-enum lock_mode
-lock_get_mode(
-/*==========*/
-	const lock_t*	lock)	/*!< in: lock */
-{
-	ut_ad(lock);
-
-	return(static_cast<enum lock_mode>(lock->type_mode & LOCK_MODE_MASK));
-}
-
-/*********************************************************************//**
-Gets the wait flag of a lock.
-@return	LOCK_WAIT if waiting, 0 if not */
-UNIV_INLINE
-ulint
-lock_get_wait(
-/*==========*/
-	const lock_t*	lock)	/*!< in: lock */
-{
-	ut_ad(lock);
-
-	return(lock->type_mode & LOCK_WAIT);
-}
-
 /*********************************************************************//**
 Gets the source table of an ALTER TABLE transaction.  The table must be
 covered by an IX or IS table lock.
@@ -713,13 +602,12 @@ covered by an IX or IS table lock.
 IS table lock; dest if there is no source table, and NULL if the
 transaction is locking more than two tables or an inconsistency is
 found */
-UNIV_INTERN
 dict_table_t*
 lock_get_src_table(
 /*===============*/
 	trx_t*		trx,	/*!< in: transaction */
 	dict_table_t*	dest,	/*!< in: destination of ALTER TABLE */
-	enum lock_mode*	mode)	/*!< out: lock mode of the source table */
+	lock_mode*	mode)	/*!< out: lock mode of the source table */
 {
 	dict_table_t*	src;
 	lock_t*		lock;
@@ -741,7 +629,7 @@ lock_get_src_table(
 	     lock != NULL;
 	     lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
 		lock_table_t*	tab_lock;
-		enum lock_mode	lock_mode;
+		lock_mode	lock_mode;
 		if (!(lock_get_type_low(lock) & LOCK_TABLE)) {
 			/* We are only interested in table locks. */
 			continue;
@@ -796,7 +684,6 @@ transaction, i.e., transaction holds LOCK_IX and possibly LOCK_AUTO_INC
 on the table.
 @return TRUE if table is only locked by trx, with LOCK_IX, and
 possibly LOCK_AUTO_INC */
-UNIV_INTERN
 ibool
 lock_is_table_exclusive(
 /*====================*/
@@ -885,7 +772,7 @@ lock_reset_lock_and_trx_wait(
 
 /*********************************************************************//**
 Gets the gap flag of a record lock.
-@return	LOCK_GAP or 0 */
+@return LOCK_GAP or 0 */
 UNIV_INLINE
 ulint
 lock_rec_get_gap(
@@ -900,7 +787,7 @@ lock_rec_get_gap(
 
 /*********************************************************************//**
 Gets the LOCK_REC_NOT_GAP flag of a record lock.
-@return	LOCK_REC_NOT_GAP or 0 */
+@return LOCK_REC_NOT_GAP or 0 */
 UNIV_INLINE
 ulint
 lock_rec_get_rec_not_gap(
@@ -915,7 +802,7 @@ lock_rec_get_rec_not_gap(
 
 /*********************************************************************//**
 Gets the waiting insert flag of a record lock.
-@return	LOCK_INSERT_INTENTION or 0 */
+@return LOCK_INSERT_INTENTION or 0 */
 UNIV_INLINE
 ulint
 lock_rec_get_insert_intention(
@@ -928,48 +815,15 @@ lock_rec_get_insert_intention(
 	return(lock->type_mode & LOCK_INSERT_INTENTION);
 }
 
-/*********************************************************************//**
-Calculates if lock mode 1 is stronger or equal to lock mode 2.
-@return	nonzero if mode1 stronger or equal to mode2 */
-UNIV_INLINE
-ulint
-lock_mode_stronger_or_eq(
-/*=====================*/
-	enum lock_mode	mode1,	/*!< in: lock mode */
-	enum lock_mode	mode2)	/*!< in: lock mode */
-{
-	ut_ad((ulint) mode1 < lock_types);
-	ut_ad((ulint) mode2 < lock_types);
-
-	return(lock_strength_matrix[mode1][mode2]);
-}
-
-/*********************************************************************//**
-Calculates if lock mode 1 is compatible with lock mode 2.
-@return	nonzero if mode1 compatible with mode2 */
-UNIV_INLINE
-ulint
-lock_mode_compatible(
-/*=================*/
-	enum lock_mode	mode1,	/*!< in: lock mode */
-	enum lock_mode	mode2)	/*!< in: lock mode */
-{
-	ut_ad((ulint) mode1 < lock_types);
-	ut_ad((ulint) mode2 < lock_types);
-
-	return(lock_compatibility_matrix[mode1][mode2]);
-}
-
 /*********************************************************************//**
 Checks if a lock request for a new lock has to wait for request lock2.
-@return	TRUE if new lock has to wait for lock2 to be removed */
+@return TRUE if new lock has to wait for lock2 to be removed */
 UNIV_INLINE
 ibool
 lock_rec_has_to_wait(
 /*=================*/
-#ifdef WITH_WSREP
-	ibool		for_locking, /*!< is caller locking or releasing */
-#endif /* WITH_WSREP */
+	bool		for_locking,
+				/*!< in is called locking or releasing */
 	const trx_t*	trx,	/*!< in: trx of new lock */
 	ulint		type_mode,/*!< in: precise mode of the new lock
 				to set: LOCK_S or LOCK_X, possibly
@@ -979,7 +833,8 @@ lock_rec_has_to_wait(
 				it is assumed that this has a lock bit
 				set on the same record as in the new
 				lock we are setting */
-	ibool lock_is_on_supremum)  /*!< in: TRUE if we are setting the
+	bool		lock_is_on_supremum)
+				/*!< in: TRUE if we are setting the
 				lock on the 'supremum' record of an
 				index page: we know then that the lock
 				request is really for a 'gap' type lock */
@@ -988,7 +843,7 @@ lock_rec_has_to_wait(
 	ut_ad(lock_get_type_low(lock2) == LOCK_REC);
 
 	if (trx != lock2->trx
-	    && !lock_mode_compatible(static_cast<enum lock_mode>(
+	    && !lock_mode_compatible(static_cast<lock_mode>(
 			             LOCK_MODE_MASK & type_mode),
 				     lock_get_mode(lock2))) {
 
@@ -1073,59 +928,73 @@ lock_rec_has_to_wait(
 		    wsrep_thd_is_BF(lock2->trx->mysql_thd, TRUE)) {
 
 			if (wsrep_debug) {
-				fprintf(stderr,
-					"BF-BF lock conflict, locking: %lu\n",
-					for_locking);
+				ib::info() <<
+					"BF-BF lock conflict, locking: " << for_locking;
 				lock_rec_print(stderr, lock2);
+				ib::info() << " SQL1: "
+					   << wsrep_thd_query(trx->mysql_thd);
+				ib::info() << " SQL2: "
+					   << wsrep_thd_query(lock2->trx->mysql_thd);
 			}
 
 			if (wsrep_trx_order_before(trx->mysql_thd,
 						   lock2->trx->mysql_thd) &&
 			    (type_mode & LOCK_MODE_MASK) == LOCK_X        &&
-			    (lock2->type_mode & LOCK_MODE_MASK) == LOCK_X)
-			{
+			    (lock2->type_mode & LOCK_MODE_MASK) == LOCK_X) {
 				if (for_locking || wsrep_debug) {
 					/* exclusive lock conflicts are not
 					   accepted */
-					fprintf(stderr,
+					ib::info() <<
 						"BF-BF X lock conflict,"
-						"mode: %lu supremum: %lu\n",
-						type_mode, lock_is_on_supremum);
-					fprintf(stderr,
-						"conflicts states: my %d locked %d\n",
-						wsrep_thd_conflict_state(trx->mysql_thd, FALSE), 
-						wsrep_thd_conflict_state(lock2->trx->mysql_thd, FALSE) );
+						"mode: " << type_mode <<
+						" supremum: " << lock_is_on_supremum;
+					ib::info() <<
+						"conflicts states: my "
+						   << wsrep_thd_conflict_state(trx->mysql_thd, FALSE)
+						   << " locked "
+						   << wsrep_thd_conflict_state(lock2->trx->mysql_thd, FALSE);
 					lock_rec_print(stderr, lock2);
-					if (for_locking) return FALSE;
-					//abort();
+					ib::info() << " SQL1: "
+						   << wsrep_thd_query(trx->mysql_thd);
+					ib::info() << " SQL2: "
+						   << wsrep_thd_query(lock2->trx->mysql_thd);
+
+					if (for_locking) {
+						return FALSE;
+					}
 				}
 			} else {
 				/* if lock2->index->n_uniq <=
 				   lock2->index->n_user_defined_cols
 				   operation is on uniq index
 				*/
-				if (wsrep_debug) fprintf(stderr,
-					"BF conflict, modes: %lu %lu, "
-					"idx: %s-%s n_uniq %u n_user %u\n",
-					type_mode, lock2->type_mode,
-					lock2->index->name, 
-					lock2->index->table_name,
-					lock2->index->n_uniq,
-					lock2->index->n_user_defined_cols);
+				if (wsrep_debug) {
+					ib::info() <<
+						"BF conflict, modes: "
+						   << type_mode << ":" << lock2->type_mode
+						   << " idx: " << lock2->index->name()
+						   << " table: " << lock2->index->table->name.m_name
+						   << " n_uniq: " << lock2->index->n_uniq
+						   << " n_user: " << lock2->index->n_user_defined_cols;
+					ib::info() << " SQL1: "
+						   << wsrep_thd_query(trx->mysql_thd);
+					ib::info() << " SQL2: "
+						   << wsrep_thd_query(lock2->trx->mysql_thd);
+				}
 				return FALSE;
 			}
 		}
 #endif /* WITH_WSREP */
+
 		return(TRUE);
 	}
-	
+
 	return(FALSE);
 }
 
 /*********************************************************************//**
 Checks if a lock request lock1 has to wait for request lock2.
-@return	TRUE if lock1 has to wait for lock2 to be removed */
-UNIV_INTERN
+@return TRUE if lock1 has to wait for lock2 to be removed */
 ibool
 lock_has_to_wait(
 /*=============*/
@@ -1146,14 +1015,17 @@ lock_has_to_wait(
 			/* If this lock request is for a supremum record
 			then the second bit on the lock bitmap is set */
 
-#ifdef WITH_WSREP
-			return(lock_rec_has_to_wait(FALSE, lock1->trx,
-#else
-			return(lock_rec_has_to_wait(lock1->trx,
-#endif /* WITH_WSREP */
-						    lock1->type_mode, lock2,
-						    lock_rec_get_nth_bit(
-							    lock1, 1)));
+			if (lock1->type_mode
+			    & (LOCK_PREDICATE | LOCK_PRDT_PAGE)) {
+				return(lock_prdt_has_to_wait(
+					lock1->trx, lock1->type_mode,
+					lock_get_prdt_from_lock(lock1),
+					lock2));
+			} else {
+				return(lock_rec_has_to_wait(false,
+					lock1->trx, lock1->type_mode, lock2,
+					lock_rec_get_nth_bit(lock1, true)));
+			}
 		}
 
 		return(TRUE);
@@ -1164,54 +1036,17 @@ lock_has_to_wait(
 
 /*============== RECORD LOCK BASIC FUNCTIONS ============================*/
 
-/*********************************************************************//**
-Gets the number of bits in a record lock bitmap.
-@return	number of bits */
-UNIV_INLINE
-ulint
-lock_rec_get_n_bits(
-/*================*/
-	const lock_t*	lock)	/*!< in: record lock */
-{
-	return(lock->un_member.rec_lock.n_bits);
-}
-
-/**********************************************************************//**
-Sets the nth bit of a record lock to TRUE. */
-UNIV_INLINE
-void
-lock_rec_set_nth_bit(
-/*=================*/
-	lock_t*	lock,	/*!< in: record lock */
-	ulint	i)	/*!< in: index of the bit */
-{
-	ulint	byte_index;
-	ulint	bit_index;
-
-	ut_ad(lock);
-	ut_ad(lock_get_type_low(lock) == LOCK_REC);
-	ut_ad(i < lock->un_member.rec_lock.n_bits);
-
-	byte_index = i / 8;
-	bit_index = i % 8;
-
-	((byte*) &lock[1])[byte_index] |= 1 << bit_index;
-}
-
 /**********************************************************************//**
 Looks for a set bit in a record lock bitmap. Returns ULINT_UNDEFINED,
 if none found.
 @return bit index == heap number of the record, or ULINT_UNDEFINED if
 none found */
-UNIV_INTERN
 ulint
 lock_rec_find_set_bit(
 /*==================*/
 	const lock_t*	lock)	/*!< in: record lock with at least one bit set */
 {
-	ulint	i;
-
-	for (i = 0; i < lock_rec_get_n_bits(lock); i++) {
+	for (ulint i = 0; i < lock_rec_get_n_bits(lock); ++i) {
 
 		if (lock_rec_get_nth_bit(lock, i)) {
 
@@ -1222,112 +1057,52 @@ lock_rec_find_set_bit(
 	return(ULINT_UNDEFINED);
 }
 
-/**********************************************************************//**
-Resets the nth bit of a record lock. */
+/** Reset the nth bit of a record lock.
+@param[in,out] lock record lock
+@param[in] i index of the bit that will be reset
+@return previous value of the bit */
 UNIV_INLINE
-void
+byte
 lock_rec_reset_nth_bit(
-/*===================*/
-	lock_t*	lock,	/*!< in: record lock */
-	ulint	i)	/*!< in: index of the bit which must be set to TRUE
-			when this function is called */
+	lock_t*	lock,
+	ulint	i)
 {
-	ulint	byte_index;
-	ulint	bit_index;
-
-	ut_ad(lock);
 	ut_ad(lock_get_type_low(lock) == LOCK_REC);
 	ut_ad(i < lock->un_member.rec_lock.n_bits);
 
-	byte_index = i / 8;
-	bit_index = i % 8;
+	byte*	b = reinterpret_cast<byte*>(&lock[1]) + (i >> 3);
+	byte	mask = 1 << (i & 7);
+	byte	bit = *b & mask;
+	*b &= ~mask;
 
-	((byte*) &lock[1])[byte_index] &= ~(1 << bit_index);
-}
-
-/*********************************************************************//**
-Gets the first or next record lock on a page.
-@return	next lock, NULL if none exists */
-UNIV_INLINE
-const lock_t*
-lock_rec_get_next_on_page_const(
-/*============================*/
-	const lock_t*	lock)	/*!< in: a record lock */
-{
-	ulint	space;
-	ulint	page_no;
-
-	ut_ad(lock_mutex_own());
-	ut_ad(lock_get_type_low(lock) == LOCK_REC);
-
-	space = lock->un_member.rec_lock.space;
-	page_no = lock->un_member.rec_lock.page_no;
-
-	for (;;) {
-		lock = static_cast<const lock_t*>(HASH_GET_NEXT(hash, lock));
-
-		if (!lock) {
-
-			break;
-		}
-
-		if ((lock->un_member.rec_lock.space == space)
-		    && (lock->un_member.rec_lock.page_no == page_no)) {
-
-			break;
-		}
+	if (bit != 0) {
+		ut_ad(lock->trx->lock.n_rec_locks > 0);
+		--lock->trx->lock.n_rec_locks;
 	}
 
-	return(lock);
+	return(bit);
 }
 
-/*********************************************************************//**
-Gets the first or next record lock on a page.
-@return	next lock, NULL if none exists */
-UNIV_INLINE
-lock_t*
-lock_rec_get_next_on_page(
-/*======================*/
-	lock_t*	lock)	/*!< in: a record lock */
+/** Reset the nth bit of a record lock.
+@param[in,out]	lock record lock
+@param[in] i	index of the bit that will be reset
+@param[in] type	whether the lock is in wait mode */
+void
+lock_rec_trx_wait(
+	lock_t*	lock,
+	ulint	i,
+	ulint	type)
 {
-	return((lock_t*) lock_rec_get_next_on_page_const(lock));
-}
+	lock_rec_reset_nth_bit(lock, i);
 
-/*********************************************************************//**
-Gets the first record lock on a page, where the page is identified by its
-file address.
-@return	first lock, NULL if none exists */
-UNIV_INLINE
-lock_t*
-lock_rec_get_first_on_page_addr(
-/*============================*/
-	ulint	space,	/*!< in: space */
-	ulint	page_no)/*!< in: page number */
-{
-	lock_t*	lock;
-
-	ut_ad(lock_mutex_own());
-
-	for (lock = static_cast<lock_t*>(
-			HASH_GET_FIRST(lock_sys->rec_hash,
-				       lock_rec_hash(space, page_no)));
-	      lock != NULL;
-	      lock = static_cast<lock_t*>(HASH_GET_NEXT(hash, lock))) {
-
-		if (lock->un_member.rec_lock.space == space
-		    && lock->un_member.rec_lock.page_no == page_no) {
-
-			break;
-		}
+	if (type & LOCK_WAIT) {
+		lock_reset_lock_and_trx_wait(lock);
 	}
-
-	return(lock);
 }
 
 /*********************************************************************//**
 Determines if there are explicit record locks on a page.
-@return	an explicit record lock on the page, or NULL if there are none */
-UNIV_INTERN
+@return an explicit record lock on the page, or NULL if there are none */
 lock_t*
 lock_rec_expl_exist_on_page(
 /*========================*/
@@ -1337,103 +1112,14 @@ lock_rec_expl_exist_on_page(
 	lock_t*	lock;
 
 	lock_mutex_enter();
-	lock = lock_rec_get_first_on_page_addr(space, page_no);
+	/* Only used in ibuf pages, so rec_hash is good enough */
+	lock = lock_rec_get_first_on_page_addr(lock_sys->rec_hash,
+					       space, page_no);
 	lock_mutex_exit();
 
 	return(lock);
 }
 
-/*********************************************************************//**
-Gets the first record lock on a page, where the page is identified by a
-pointer to it.
-@return	first lock, NULL if none exists */
-UNIV_INLINE
-lock_t*
-lock_rec_get_first_on_page(
-/*=======================*/
-	const buf_block_t*	block)	/*!< in: buffer block */
-{
-	ulint	hash;
-	lock_t*	lock;
-	ulint	space	= buf_block_get_space(block);
-	ulint	page_no	= buf_block_get_page_no(block);
-
-	ut_ad(lock_mutex_own());
-
-	hash = buf_block_get_lock_hash_val(block);
-
-	for (lock = static_cast<lock_t*>(
-			HASH_GET_FIRST( lock_sys->rec_hash, hash));
-	     lock != NULL;
-	     lock = static_cast<lock_t*>(HASH_GET_NEXT(hash, lock))) {
-
-		if ((lock->un_member.rec_lock.space == space)
-		    && (lock->un_member.rec_lock.page_no == page_no)) {
-
-			break;
-		}
-	}
-
-	return(lock);
-}
-
-/*********************************************************************//**
-Gets the next explicit lock request on a record.
-@return	next lock, NULL if none exists or if heap_no == ULINT_UNDEFINED */
-UNIV_INLINE
-lock_t*
-lock_rec_get_next(
-/*==============*/
-	ulint	heap_no,/*!< in: heap number of the record */
-	lock_t*	lock)	/*!< in: lock */
-{
-	ut_ad(lock_mutex_own());
-
-	do {
-		ut_ad(lock_get_type_low(lock) == LOCK_REC);
-		lock = lock_rec_get_next_on_page(lock);
-	} while (lock && !lock_rec_get_nth_bit(lock, heap_no));
-
-	return(lock);
-}
-
-/*********************************************************************//**
-Gets the next explicit lock request on a record.
-@return	next lock, NULL if none exists or if heap_no == ULINT_UNDEFINED */
-UNIV_INLINE
-const lock_t*
-lock_rec_get_next_const(
-/*====================*/
-	ulint		heap_no,/*!< in: heap number of the record */
-	const lock_t*	lock)	/*!< in: lock */
-{
-	return(lock_rec_get_next(heap_no, (lock_t*) lock));
-}
-
-/*********************************************************************//**
-Gets the first explicit lock request on a record.
-@return	first lock, NULL if none exists */
-UNIV_INLINE
-lock_t*
-lock_rec_get_first(
-/*===============*/
-	const buf_block_t*	block,	/*!< in: block containing the record */
-	ulint			heap_no)/*!< in: heap number of the record */
-{
-	lock_t*	lock;
-
-	ut_ad(lock_mutex_own());
-
-	for (lock = lock_rec_get_first_on_page(block); lock;
-	     lock = lock_rec_get_next_on_page(lock)) {
-		if (lock_rec_get_nth_bit(lock, heap_no)) {
-			break;
-		}
-	}
-
-	return(lock);
-}
-
 /*********************************************************************//**
 Resets the record lock bitmap to zero. NOTE: does not touch the wait_lock
 pointer in the transaction! This function is used in lock object creation
@@ -1460,7 +1146,7 @@ lock_rec_bitmap_reset(
 
 /*********************************************************************//**
 Copies a record lock to heap.
-@return	copy of lock */
+@return copy of lock */
 static
 lock_t*
 lock_rec_copy(
@@ -1479,18 +1165,18 @@ lock_rec_copy(
 
 /*********************************************************************//**
 Gets the previous record lock set on a record.
-@return	previous lock on the same record, NULL if none exists */
-UNIV_INTERN
+@return previous lock on the same record, NULL if none exists */
 const lock_t*
 lock_rec_get_prev(
 /*==============*/
 	const lock_t*	in_lock,/*!< in: record lock */
 	ulint		heap_no)/*!< in: heap number of the record */
 {
-	lock_t*	lock;
-	ulint	space;
-	ulint	page_no;
-	lock_t*	found_lock	= NULL;
+	lock_t*		lock;
+	ulint		space;
+	ulint		page_no;
+	lock_t*		found_lock	= NULL;
+	hash_table_t*	hash;
 
 	ut_ad(lock_mutex_own());
 	ut_ad(lock_get_type_low(in_lock) == LOCK_REC);
@@ -1498,7 +1184,9 @@ lock_rec_get_prev(
 	space = in_lock->un_member.rec_lock.space;
 	page_no = in_lock->un_member.rec_lock.page_no;
 
-	for (lock = lock_rec_get_first_on_page_addr(space, page_no);
+	hash = lock_hash_get(in_lock->type_mode);
+
+	for (lock = lock_rec_get_first_on_page_addr(hash, space, page_no);
 	     /* No op */;
 	     lock = lock_rec_get_next_on_page(lock)) {
 
@@ -1516,63 +1204,12 @@ lock_rec_get_prev(
 	}
 }
 
-/*============= FUNCTIONS FOR ANALYZING TABLE LOCK QUEUE ================*/
-
-/*********************************************************************//**
-Checks if a transaction has the specified table lock, or stronger. This
-function should only be called by the thread that owns the transaction.
-@return	lock or NULL */
-UNIV_INLINE
-const lock_t*
-lock_table_has(
-/*===========*/
-	const trx_t*		trx,	/*!< in: transaction */
-	const dict_table_t*	table,	/*!< in: table */
-	enum lock_mode		mode)	/*!< in: lock mode */
-{
-	lint			i;
-
-	if (ib_vector_is_empty(trx->lock.table_locks)) {
-		return(NULL);
-	}
-
-	/* Look for stronger locks the same trx already has on the table */
-
-	for (i = ib_vector_size(trx->lock.table_locks) - 1; i >= 0; --i) {
-		const lock_t*	lock;
-		enum lock_mode	lock_mode;
-
-		lock = *static_cast<const lock_t**>(
-			ib_vector_get(trx->lock.table_locks, i));
-
-		if (lock == NULL) {
-			continue;
-		}
-
-		lock_mode = lock_get_mode(lock);
-
-		ut_ad(trx == lock->trx);
-		ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
-		ut_ad(lock->un_member.tab_lock.table != NULL);
-
-		if (table == lock->un_member.tab_lock.table
-		    && lock_mode_stronger_or_eq(lock_mode, mode)) {
-
-			ut_ad(!lock_get_wait(lock));
-
-			return(lock);
-		}
-	}
-
-	return(NULL);
-}
-
 /*============= FUNCTIONS FOR ANALYZING RECORD LOCK QUEUE ================*/
 
 /*********************************************************************//**
 Checks if a transaction has a GRANTED explicit lock on rec stronger or equal
 to precise_mode.
-@return	lock or NULL */
+@return lock or NULL */
 UNIV_INLINE
 lock_t*
 lock_rec_has_expl(
@@ -1594,7 +1231,7 @@ lock_rec_has_expl(
 	      || (precise_mode & LOCK_MODE_MASK) == LOCK_X);
 	ut_ad(!(precise_mode & LOCK_INSERT_INTENTION));
 
-	for (lock = lock_rec_get_first(block, heap_no);
+	for (lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no);
 	     lock != NULL;
 	     lock = lock_rec_get_next(heap_no, lock)) {
 
@@ -1602,7 +1239,7 @@ lock_rec_has_expl(
 		    && !lock_rec_get_insert_intention(lock)
 		    && lock_mode_stronger_or_eq(
 			    lock_get_mode(lock),
-			    static_cast<enum lock_mode>(
+			    static_cast<lock_mode>(
 				    precise_mode & LOCK_MODE_MASK))
 		    && !lock_get_wait(lock)
 		    && (!lock_rec_get_rec_not_gap(lock)
@@ -1619,48 +1256,41 @@ lock_rec_has_expl(
 	return(NULL);
 }
 
-#ifdef WITH_WSREP
-static
-void
-lock_rec_discard(lock_t*	in_lock);
-#endif
 #ifdef UNIV_DEBUG
 /*********************************************************************//**
 Checks if some other transaction has a lock request in the queue.
-@return	lock or NULL */
+@return lock or NULL */
 static
 const lock_t*
 lock_rec_other_has_expl_req(
 /*========================*/
-	enum lock_mode		mode,	/*!< in: LOCK_S or LOCK_X */
-	ulint			gap,	/*!< in: LOCK_GAP if also gap
-					locks are taken into account,
-					or 0 if not */
-	ulint			wait,	/*!< in: LOCK_WAIT if also
-					waiting locks are taken into
-					account, or 0 if not */
+	lock_mode		mode,	/*!< in: LOCK_S or LOCK_X */
 	const buf_block_t*	block,	/*!< in: buffer block containing
 					the record */
+	bool			wait,	/*!< in: whether also waiting locks
+					are taken into account */
 	ulint			heap_no,/*!< in: heap number of the record */
 	const trx_t*		trx)	/*!< in: transaction, or NULL if
 					requests by all transactions
 					are taken into account */
 {
-	const lock_t*	lock;
 
 	ut_ad(lock_mutex_own());
 	ut_ad(mode == LOCK_X || mode == LOCK_S);
-	ut_ad(gap == 0 || gap == LOCK_GAP);
-	ut_ad(wait == 0 || wait == LOCK_WAIT);
 
-	for (lock = lock_rec_get_first(block, heap_no);
+	/* Only GAP lock can be on SUPREMUM, and we are not looking for
+	GAP lock */
+	if (heap_no == PAGE_HEAP_NO_SUPREMUM) {
+		return(NULL);
+	}
+
+	for (const lock_t* lock = lock_rec_get_first(lock_sys->rec_hash,
+						     block, heap_no);
 	     lock != NULL;
 	     lock = lock_rec_get_next_const(heap_no, lock)) {
 
 		if (lock->trx != trx
-		    && (gap
-			|| !(lock_rec_get_gap(lock)
-			     || heap_no == PAGE_HEAP_NO_SUPREMUM))
+		    && !lock_rec_get_gap(lock)
 		    && (wait || !lock_get_wait(lock))
 		    && lock_mode_stronger_or_eq(lock_get_mode(lock), mode)) {
 
@@ -1676,14 +1306,17 @@ lock_rec_other_has_expl_req(
 static
 void
 wsrep_kill_victim(
+/*==============*/
 	const trx_t * const trx,
 	const lock_t *lock)
 {
-        ut_ad(lock_mutex_own());
-        ut_ad(trx_mutex_own(lock->trx));
+	ut_ad(lock_mutex_own());
+	ut_ad(trx_mutex_own(lock->trx));
 
 	/* quit for native mysql */
-	if (!wsrep_on(trx->mysql_thd)) return;
+	if (!wsrep_on(trx->mysql_thd)) {
+		return;
+	}
 
 	my_bool bf_this  = wsrep_thd_is_BF(trx->mysql_thd, FALSE);
 	my_bool bf_other = wsrep_thd_is_BF(lock->trx->mysql_thd, TRUE);
@@ -1694,7 +1327,7 @@ wsrep_kill_victim(
 
 		if (lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
 			if (wsrep_debug) {
-				fprintf(stderr, "WSREP: BF victim waiting\n");
+				ib::info() << "WSREP: BF victim waiting\n";
 			}
 			/* cannot release lock, until our lock
 			is in the queue*/
@@ -1702,35 +1335,35 @@ wsrep_kill_victim(
 			if (wsrep_log_conflicts) {
 				mutex_enter(&trx_sys->mutex);
 				if (bf_this) {
-					fputs("\n*** Priority TRANSACTION:\n",
-					      stderr);
+					ib::info() << "*** Priority TRANSACTION:";
 				} else {
-					fputs("\n*** Victim TRANSACTION:\n",
-					      stderr);
+					ib::info() << "*** Victim TRANSACTION:";
 				}
 
 				trx_print_latched(stderr, trx, 3000);
 
 				if (bf_other) {
-					fputs("\n*** Priority TRANSACTION:\n",
-					      stderr);
+					ib::info() << "*** Priority TRANSACTION:";
 				} else {
-					fputs("\n*** Victim TRANSACTION:\n",
-					      stderr);
+					ib::info() << "*** Victim TRANSACTION:";
 				}
 
 				trx_print_latched(stderr, lock->trx, 3000);
 
 				mutex_exit(&trx_sys->mutex);
 
-				fputs("*** WAITING FOR THIS LOCK TO BE GRANTED:\n",
-				      stderr);
+				ib::info() << "*** WAITING FOR THIS LOCK TO BE GRANTED:";
 
 				if (lock_get_type(lock) == LOCK_REC) {
 					lock_rec_print(stderr, lock);
 				} else {
 					lock_table_print(stderr, lock);
 				}
+
+				ib::info() << " SQL1: "
+					   << wsrep_thd_query(trx->mysql_thd);
+				ib::info() << " SQL2: "
+					   << wsrep_thd_query(lock->trx->mysql_thd);
 			}
 
 			lock->trx->abort_type = TRX_WSREP_ABORT;
@@ -1740,16 +1373,17 @@ wsrep_kill_victim(
 		}
 	}
 }
-#endif
+#endif /* WITH_WSREP */
+
 /*********************************************************************//**
 Checks if some other transaction has a conflicting explicit lock request
 in the queue, so that we have to wait.
-@return	lock or NULL */
+@return lock or NULL */
 static
 const lock_t*
 lock_rec_other_has_conflicting(
 /*===========================*/
-	enum lock_mode		mode,	/*!< in: LOCK_S or LOCK_X,
+	ulint			mode,	/*!< in: LOCK_S or LOCK_X,
 					possibly ORed to LOCK_GAP or
 					LOC_REC_NOT_GAP,
 					LOCK_INSERT_INTENTION */
@@ -1759,58 +1393,23 @@ lock_rec_other_has_conflicting(
 	const trx_t*		trx)	/*!< in: our transaction */
 {
 	const lock_t*		lock;
-	ibool			is_supremum;
 
 	ut_ad(lock_mutex_own());
 
-	is_supremum = (heap_no == PAGE_HEAP_NO_SUPREMUM);
+	bool	is_supremum = (heap_no == PAGE_HEAP_NO_SUPREMUM);
 
-	for (lock = lock_rec_get_first(block, heap_no);
+	for (lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no);
 	     lock != NULL;
 	     lock = lock_rec_get_next_const(heap_no, lock)) {
 
+		if (lock_rec_has_to_wait(true, trx, mode, lock, is_supremum)) {
 #ifdef WITH_WSREP
-		if (lock_rec_has_to_wait(TRUE, trx, mode, lock, is_supremum)) {
 			if (wsrep_on(trx->mysql_thd)) {
 				trx_mutex_enter(lock->trx);
-				wsrep_kill_victim(trx, lock);
+				wsrep_kill_victim((trx_t *)trx, (lock_t *)lock);
 				trx_mutex_exit(lock->trx);
 			}
-#else
- 		if (lock_rec_has_to_wait(trx, mode, lock, is_supremum)) {
 #endif /* WITH_WSREP */
-
-			return(lock);
-		}
-	}
-
-	return(NULL);
-}
-
-/*********************************************************************//**
-Looks for a suitable type record lock struct by the same trx on the same page.
-This can be used to save space when a new record lock should be set on a page:
-no new struct is needed, if a suitable old is found.
-@return	lock or NULL */
-UNIV_INLINE
-lock_t*
-lock_rec_find_similar_on_page(
-/*==========================*/
-	ulint		type_mode,	/*!< in: lock type_mode field */
-	ulint		heap_no,	/*!< in: heap number of the record */
-	lock_t*		lock,		/*!< in: lock_rec_get_first_on_page() */
-	const trx_t*	trx)		/*!< in: transaction */
-{
-	ut_ad(lock_mutex_own());
-
-	for (/* No op */;
-	     lock != NULL;
-	     lock = lock_rec_get_next_on_page(lock)) {
-
-		if (lock->trx == trx
-		    && lock->type_mode == type_mode
-		    && lock_rec_get_n_bits(lock) > heap_no) {
-
 			return(lock);
 		}
 	}
@@ -1821,24 +1420,24 @@ lock_rec_find_similar_on_page(
 /*********************************************************************//**
 Checks if some transaction has an implicit x-lock on a record in a secondary
 index.
-@return	transaction id of the transaction which has the x-lock, or 0;
+@return transaction id of the transaction which has the x-lock, or 0;
 NOTE that this function can return false positives but never false
 negatives. The caller must confirm all positive results by calling
 trx_is_active(). */
 static
-trx_id_t
+trx_t*
 lock_sec_rec_some_has_impl(
 /*=======================*/
 	const rec_t*	rec,	/*!< in: user record */
 	dict_index_t*	index,	/*!< in: secondary index */
 	const ulint*	offsets)/*!< in: rec_get_offsets(rec, index) */
 {
-	trx_id_t	trx_id;
+	trx_t*		trx;
 	trx_id_t	max_trx_id;
 	const page_t*	page = page_align(rec);
 
 	ut_ad(!lock_mutex_own());
-	ut_ad(!mutex_own(&trx_sys->mutex));
+	ut_ad(!trx_sys_mutex_own());
 	ut_ad(!dict_index_is_clust(index));
 	ut_ad(page_rec_is_user_rec(rec));
 	ut_ad(rec_offs_validate(rec, index, offsets));
@@ -1853,23 +1452,21 @@ lock_sec_rec_some_has_impl(
 
 	if (max_trx_id < trx_rw_min_trx_id() && !recv_recovery_is_on()) {
 
-		trx_id = 0;
+		trx = 0;
 
 	} else if (!lock_check_trx_id_sanity(max_trx_id, rec, index, offsets)) {
 
-		buf_page_print(page, 0, 0);
-
 		/* The page is corrupt: try to avoid a crash by returning 0 */
-		trx_id = 0;
+		trx = 0;
 
 	/* In this case it is possible that some transaction has an implicit
 	x-lock. We have to look in the clustered index. */
 
 	} else {
-		trx_id = row_vers_impl_x_locked(rec, index, offsets);
+		trx = row_vers_impl_x_locked(rec, index, offsets);
 	}
 
-	return(trx_id);
+	return(trx);
 }
 
 #ifdef UNIV_DEBUG
@@ -1885,7 +1482,7 @@ lock_rec_other_trx_holds_expl(
 	ulint			precise_mode,	/*!< in: LOCK_S or LOCK_X
 						possibly ORed to LOCK_GAP or
 						LOCK_REC_NOT_GAP. */
-	trx_id_t		trx_id,		/*!< in: trx holding implicit
+	trx_t*			trx,		/*!< in: trx holding implicit
 						lock on rec */
 	const rec_t*		rec,		/*!< in: user record */
 	const buf_block_t*	block)		/*!< in: buffer block
@@ -1895,7 +1492,7 @@ lock_rec_other_trx_holds_expl(
 
 	lock_mutex_enter();
 
-	if (trx_t *impl_trx = trx_rw_is_active(trx_id, NULL)) {
+	if (trx_t* impl_trx = trx_rw_is_active(trx->id, NULL, false)) {
 		ulint heap_no = page_rec_get_heap_no(rec);
 		mutex_enter(&trx_sys->mutex);
 
@@ -1903,7 +1500,7 @@ lock_rec_other_trx_holds_expl(
 		     t != NULL;
 		     t = UT_LIST_GET_NEXT(trx_list, t)) {
 
-			lock_t *expl_lock = lock_rec_has_expl(
+			lock_t* expl_lock = lock_rec_has_expl(
 				precise_mode, block, heap_no, t);
 
 			if (expl_lock && expl_lock->trx != impl_trx) {
@@ -1915,7 +1512,7 @@ lock_rec_other_trx_holds_expl(
 		}
 
 		mutex_exit(&trx_sys->mutex);
-        }
+	}
 
 	lock_mutex_exit();
 
@@ -1928,14 +1525,26 @@ Return approximate number or record locks (bits set in the bitmap) for
 this transaction. Since delete-marked records may be removed, the
 record count will not be precise.
 The caller must be holding lock_sys->mutex. */
-UNIV_INTERN
 ulint
 lock_number_of_rows_locked(
 /*=======================*/
 	const trx_lock_t*	trx_lock)	/*!< in: transaction locks */
+{
+	ut_ad(lock_mutex_own());
+
+	return(trx_lock->n_rec_locks);
+}
+
+/*********************************************************************//**
+Return the number of table locks for a transaction.
+The caller must be holding lock_sys->mutex. */
+ulint
+lock_number_of_tables_locked(
+/*=========================*/
+	const trx_lock_t*	trx_lock)	/*!< in: transaction locks */
 {
 	const lock_t*	lock;
-	ulint		n_records = 0;
+	ulint		n_tables = 0;
 
 	ut_ad(lock_mutex_own());
 
@@ -1943,19 +1552,12 @@ lock_number_of_rows_locked(
 	     lock != NULL;
 	     lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
 
-		if (lock_get_type_low(lock) == LOCK_REC) {
-			ulint	n_bit;
-			ulint	n_bits = lock_rec_get_n_bits(lock);
-
-			for (n_bit = 0; n_bit < n_bits; n_bit++) {
-				if (lock_rec_get_nth_bit(lock, n_bit)) {
-					n_records++;
-				}
-			}
+		if (lock_get_type_low(lock) == LOCK_TABLE) {
+			n_tables++;
 		}
 	}
 
-	return(n_records);
+	return(n_tables);
 }
 
 /*============== RECORD LOCK CREATION AND QUEUE MANAGEMENT =============*/
@@ -1964,109 +1566,209 @@ lock_number_of_rows_locked(
 static
 void
 wsrep_print_wait_locks(
-/*============*/
+/*===================*/
 	lock_t*		c_lock) /* conflicting lock to print */
 {
 	if (wsrep_debug &&  c_lock->trx->lock.wait_lock != c_lock) {
-		fprintf(stderr, "WSREP: c_lock != wait lock\n");
-		if (lock_get_type_low(c_lock) & LOCK_TABLE)
-			lock_table_print(stderr, c_lock);
-		else
-			lock_rec_print(stderr, c_lock);
+		ib::info() << "WSREP: c_lock != wait lock";
+		ib::info() << " SQL: "
+			   << wsrep_thd_query(c_lock->trx->mysql_thd);
 
-		if (lock_get_type_low(c_lock->trx->lock.wait_lock) & LOCK_TABLE)
+		if (lock_get_type_low(c_lock) & LOCK_TABLE) {
+			lock_table_print(stderr, c_lock);
+		} else {
+			lock_rec_print(stderr, c_lock);
+		}
+
+		if (lock_get_type_low(c_lock->trx->lock.wait_lock) & LOCK_TABLE) {
 			lock_table_print(stderr, c_lock->trx->lock.wait_lock);
-		else
+		} else {
 			lock_rec_print(stderr, c_lock->trx->lock.wait_lock);
+		}
 	}
 }
 #endif /* WITH_WSREP */
 
-/*********************************************************************//**
-Creates a new record lock and inserts it to the lock queue. Does NOT check
-for deadlocks or lock compatibility!
-@return	created lock */
-static
-lock_t*
-lock_rec_create(
-/*============*/
-#ifdef WITH_WSREP
-	lock_t*			const c_lock,   /* conflicting lock */
-	que_thr_t*		thr,
-#endif
-	ulint			type_mode,/*!< in: lock mode and wait
-					flag, type is ignored and
-					replaced by LOCK_REC */
-	const buf_block_t*	block,	/*!< in: buffer block containing
-					the record */
-	ulint			heap_no,/*!< in: heap number of the record */
-	dict_index_t*		index,	/*!< in: index of record */
-	trx_t*			trx,	/*!< in/out: transaction */
-	ibool			caller_owns_trx_mutex)
-					/*!< in: TRUE if caller owns
-					trx mutex */
+/**
+Check of the lock is on m_rec_id.
+@param[in] lock			Lock to compare with
+@return true if the record lock is on m_rec_id*/
+/**
+@param[in] rhs			Lock to compare with
+@return true if the record lock equals rhs */
+bool
+RecLock::is_on_row(const lock_t* lock) const
 {
-	lock_t*		lock;
-	ulint		page_no;
-	ulint		space;
-	ulint		n_bits;
-	ulint		n_bytes;
-	const page_t*	page;
+	ut_ad(lock_get_type_low(lock) == LOCK_REC);
 
+	const lock_rec_t&	other = lock->un_member.rec_lock;
+
+	return(other.space == m_rec_id.m_space_id
+	       && other.page_no == m_rec_id.m_page_no
+	       && lock_rec_get_nth_bit(lock, m_rec_id.m_heap_no));
+}
+
+/**
+Do some checks and prepare for creating a new record lock */
+void
+RecLock::prepare() const
+{
 	ut_ad(lock_mutex_own());
-	ut_ad(caller_owns_trx_mutex == trx_mutex_own(trx));
-	ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index));
+	ut_ad(m_trx == thr_get_trx(m_thr));
 
-	/* Non-locking autocommit read-only transactions should not set
-	any locks. */
-	assert_trx_in_list(trx);
+	/* Test if there already is some other reason to suspend thread:
+	we do not enqueue a lock request if the query thread should be
+	stopped anyway */
 
-	space = buf_block_get_space(block);
-	page_no	= buf_block_get_page_no(block);
-	page = block->frame;
-
-	btr_assert_not_corrupted(block, index);
-
-	/* If rec is the supremum record, then we reset the gap and
-	LOCK_REC_NOT_GAP bits, as all locks on the supremum are
-	automatically of the gap type */
-
-	if (UNIV_UNLIKELY(heap_no == PAGE_HEAP_NO_SUPREMUM)) {
-		ut_ad(!(type_mode & LOCK_REC_NOT_GAP));
-
-		type_mode = type_mode & ~(LOCK_GAP | LOCK_REC_NOT_GAP);
+	if (que_thr_stop(m_thr)) {
+		ut_error;
 	}
 
-	/* Make lock bitmap bigger by a safety margin */
-	n_bits = page_dir_get_n_heap(page) + LOCK_PAGE_BITMAP_MARGIN;
-	n_bytes = 1 + n_bits / 8;
+	switch (trx_get_dict_operation(m_trx)) {
+	case TRX_DICT_OP_NONE:
+		break;
+	case TRX_DICT_OP_TABLE:
+	case TRX_DICT_OP_INDEX:
+		ib::error() << "A record lock wait happens in a dictionary"
+			" operation. index " << m_index->name
+			<< " of table " << m_index->table->name
+			<< ". " << BUG_REPORT_MSG;
+		ut_ad(0);
+	}
 
-	lock = static_cast<lock_t*>(
-		mem_heap_alloc(trx->lock.lock_heap, sizeof(lock_t) + n_bytes));
+	ut_ad(m_index->table->n_ref_count > 0
+	      || !m_index->table->can_be_evicted);
+}
+
+/**
+Create the lock instance
+@param[in, out] trx	The transaction requesting the lock
+@param[in, out] index	Index on which record lock is required
+@param[in] mode		The lock mode desired
+@param[in] rec_id	The record id
+@param[in] size		Size of the lock + bitmap requested
+@return a record lock instance */
+lock_t*
+RecLock::lock_alloc(
+	trx_t*		trx,
+	dict_index_t*	index,
+	ulint		mode,
+	const RecID&	rec_id,
+	ulint		size)
+{
+	ut_ad(lock_mutex_own());
+
+	lock_t*	lock;
+
+	if (trx->lock.rec_cached >= trx->lock.rec_pool.size()
+	    || sizeof(*lock) + size > REC_LOCK_SIZE) {
+
+		ulint		n_bytes = size + sizeof(*lock);
+		mem_heap_t*	heap = trx->lock.lock_heap;
+
+		lock = reinterpret_cast<lock_t*>(mem_heap_alloc(heap, n_bytes));
+	} else {
+
+		lock = trx->lock.rec_pool[trx->lock.rec_cached];
+		++trx->lock.rec_cached;
+	}
 
 	lock->trx = trx;
 
-	lock->type_mode = (type_mode & ~LOCK_TYPE_MASK) | LOCK_REC;
 	lock->index = index;
 
-	lock->un_member.rec_lock.space = space;
-	lock->un_member.rec_lock.page_no = page_no;
-	lock->un_member.rec_lock.n_bits = n_bytes * 8;
+	/* Setup the lock attributes */
 
-	/* Reset to zero the bitmap which resides immediately after the
-	lock struct */
+	lock->type_mode = LOCK_REC | (mode & ~LOCK_TYPE_MASK);
 
-	lock_rec_bitmap_reset(lock);
+	lock_rec_t&	rec_lock = lock->un_member.rec_lock;
+
+	/* Predicate lock always on INFIMUM (0) */
+
+	if (is_predicate_lock(mode)) {
+
+		rec_lock.n_bits = 8;
+
+		memset(&lock[1], 0x0, 1);
+
+	} else {
+		ut_ad(8 * size < UINT32_MAX);
+		rec_lock.n_bits = static_cast<uint32_t>(8 * size);
+
+		memset(&lock[1], 0x0, size);
+	}
+
+	rec_lock.space = rec_id.m_space_id;
+
+	rec_lock.page_no = rec_id.m_page_no;
 
 	/* Set the bit corresponding to rec */
-	lock_rec_set_nth_bit(lock, heap_no);
 
-	lock->requested_time = ut_time();
-	lock->wait_time = 0;
+	lock_rec_set_nth_bit(lock, rec_id.m_heap_no);
 
-	index->table->n_rec_locks++;
+	MONITOR_INC(MONITOR_NUM_RECLOCK);
 
-	ut_ad(index->table->n_ref_count > 0 || !index->table->can_be_evicted);
+	MONITOR_INC(MONITOR_RECLOCK_CREATED);
+
+	return(lock);
+}
+
+/**
+Add the lock to the record lock hash and the transaction's lock list
+@param[in,out] lock	Newly created record lock to add to the rec hash
+@param[in] add_to_hash	If the lock should be added to the hash table */
+void
+RecLock::lock_add(lock_t* lock, bool add_to_hash)
+{
+	ut_ad(lock_mutex_own());
+	ut_ad(trx_mutex_own(lock->trx));
+
+	if (add_to_hash) {
+		ulint	key = m_rec_id.fold();
+
+		++lock->index->table->n_rec_locks;
+
+		HASH_INSERT(lock_t, hash, lock_hash_get(m_mode), key, lock);
+	}
+
+	if (m_mode & LOCK_WAIT) {
+		lock_set_lock_and_trx_wait(lock, lock->trx);
+	}
+
+	UT_LIST_ADD_LAST(lock->trx->lock.trx_locks, lock);
+}
+
+/**
+Create a new lock.
+@param[in,out] trx		Transaction requesting the lock
+@param[in] owns_trx_mutex	true if caller owns the trx_t::mutex
+@param[in] add_to_hash		add the lock to hash table
+@param[in] prdt			Predicate lock (optional)
+@return a new lock instance */
+lock_t*
+RecLock::create(trx_t* trx, bool owns_trx_mutex, bool add_to_hash, const lock_prdt_t* prdt)
+{
+	return create(NULL, trx, owns_trx_mutex, add_to_hash, prdt);
+}
+lock_t*
+RecLock::create(
+	lock_t* const c_lock,
+	trx_t*	trx,
+	bool	owns_trx_mutex,
+	bool	add_to_hash,
+	const	lock_prdt_t* prdt)
+{
+	ut_ad(lock_mutex_own());
+	ut_ad(owns_trx_mutex == trx_mutex_own(trx));
+
+	/* Create the explicit lock instance and initialise it. */
+
+	lock_t*	lock = lock_alloc(trx, m_index, m_mode, m_rec_id, m_size);
+
+	if (prdt != NULL && (m_mode & LOCK_PREDICATE)) {
+
+		lock_prdt_set_prdt(lock, prdt);
+	}
 
 #ifdef WITH_WSREP
 	if (c_lock                      &&
@@ -2083,7 +1785,9 @@ lock_rec_create(
 			prev = hash;
 			hash = (lock_t *)hash->hash;
 		}
+
 		lock->hash = hash;
+
 		if (prev) {
 			prev->hash = lock;
 		} else {
@@ -2104,23 +1808,24 @@ lock_rec_create(
 
 			trx->lock.que_state = TRX_QUE_LOCK_WAIT;
 			lock_set_lock_and_trx_wait(lock, trx);
-			UT_LIST_ADD_LAST(trx_locks, trx->lock.trx_locks, lock);
+			UT_LIST_ADD_LAST(trx->lock.trx_locks, lock);
 
-			ut_ad(thr != NULL);
-			trx->lock.wait_thr = thr;
-			thr->state = QUE_THR_LOCK_WAIT;
+			ut_ad(m_thr != NULL);
+			trx->lock.wait_thr = m_thr;
+			m_thr->state = QUE_THR_LOCK_WAIT;
 
 			/* have to release trx mutex for the duration of
 			   victim lock release. This will eventually call
 			   lock_grant, which wants to grant trx mutex again
 			*/
-			if (caller_owns_trx_mutex) {
+			if (owns_trx_mutex) {
 				trx_mutex_exit(trx);
 			}
+
 			lock_cancel_waiting_and_release(
 				c_lock->trx->lock.wait_lock);
 
-			if (caller_owns_trx_mutex) {
+			if (owns_trx_mutex) {
 				trx_mutex_enter(trx);
 			}
 
@@ -2128,151 +1833,77 @@ lock_rec_create(
 			   does not matter if wait_lock was released above
 			 */
 			if (c_lock->trx->lock.wait_lock == c_lock) {
+				if (wsrep_debug) {
+					ib::info() <<
+						"victim trx waits for some other lock than c_lock";
+				}
 				lock_reset_lock_and_trx_wait(lock);
 			}
 
 			trx_mutex_exit(c_lock->trx);
 
 			if (wsrep_debug) {
-				fprintf(
-					stderr,
-					"WSREP: c_lock canceled %llu\n",
-					(ulonglong) c_lock->trx->id);
+				ib::info() << "WSREP: c_lock canceled " << c_lock->trx->id;
+				ib::info() << " SQL1: "
+					   << wsrep_thd_query(c_lock->trx->mysql_thd);
+				ib::info() << " SQL2: "
+					   << wsrep_thd_query(trx->mysql_thd);
 			}
 
+                        ++lock->index->table->n_rec_locks;
 			/* have to bail out here to avoid lock_set_lock... */
 			return(lock);
 		}
 		trx_mutex_exit(c_lock->trx);
+		/* we don't want to add to hash anymore, but need other updates from lock_add */
+		++lock->index->table->n_rec_locks;
+		lock_add(lock, false);
 	} else {
-		HASH_INSERT(lock_t, hash, lock_sys->rec_hash,
-			    lock_rec_fold(space, page_no), lock);
-	}
-#else
-	HASH_INSERT(lock_t, hash, lock_sys->rec_hash,
-		    lock_rec_fold(space, page_no), lock);
 #endif /* WITH_WSREP */
 
-	if (!caller_owns_trx_mutex) {
+	/* Ensure that another transaction doesn't access the trx
+	lock state and lock data structures while we are adding the
+	lock and changing the transaction state to LOCK_WAIT */
+
+	if (!owns_trx_mutex) {
 		trx_mutex_enter(trx);
 	}
-	ut_ad(trx_mutex_own(trx));
 
-	if (type_mode & LOCK_WAIT) {
-		lock_set_lock_and_trx_wait(lock, trx);
-	}
+	lock_add(lock, add_to_hash);
 
-	UT_LIST_ADD_LAST(trx_locks, trx->lock.trx_locks, lock);
-
-	if (!caller_owns_trx_mutex) {
+	if (!owns_trx_mutex) {
 		trx_mutex_exit(trx);
 	}
+#ifdef WITH_WSREP
+	}
+#endif /* WITH_WSREP */
 
-	MONITOR_INC(MONITOR_RECLOCK_CREATED);
-	MONITOR_INC(MONITOR_NUM_RECLOCK);
 	return(lock);
 }
 
-/*********************************************************************//**
-Enqueues a waiting request for a lock which cannot be granted immediately.
-Checks for deadlocks.
-@return DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED, or
-DB_SUCCESS_LOCKED_REC; DB_SUCCESS_LOCKED_REC means that
-there was a deadlock, but another transaction was chosen as a victim,
-and we got the lock immediately: no need to wait then */
-static
+/**
+Check the outcome of the deadlock check
+@param[in,out] victim_trx	Transaction selected for rollback
+@param[in,out] lock		Lock being requested
+@return DB_LOCK_WAIT, DB_DEADLOCK or DB_SUCCESS_LOCKED_REC */
 dberr_t
-lock_rec_enqueue_waiting(
-/*=====================*/
-#ifdef WITH_WSREP
-	lock_t*			c_lock,   /* conflicting lock */
-#endif
-	ulint			type_mode,/*!< in: lock mode this
-					transaction is requesting:
-					LOCK_S or LOCK_X, possibly
-					ORed with LOCK_GAP or
-					LOCK_REC_NOT_GAP, ORed with
-					LOCK_INSERT_INTENTION if this
-					waiting lock request is set
-					when performing an insert of
-					an index record */
-	const buf_block_t*	block,	/*!< in: buffer block containing
-					the record */
-	ulint			heap_no,/*!< in: heap number of the record */
-	dict_index_t*		index,	/*!< in: index of record */
-	que_thr_t*		thr)	/*!< in: query thread */
+RecLock::check_deadlock_result(const trx_t* victim_trx, lock_t* lock)
 {
-	trx_t*			trx;
-	lock_t*			lock;
-	trx_id_t		victim_trx_id;
-
 	ut_ad(lock_mutex_own());
-	ut_ad(!srv_read_only_mode);
-	ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index));
+	ut_ad(m_trx == lock->trx);
+	ut_ad(trx_mutex_own(m_trx));
 
-	trx = thr_get_trx(thr);
+	if (victim_trx != NULL) {
 
-	ut_ad(trx_mutex_own(trx));
-
-	/* Test if there already is some other reason to suspend thread:
-	we do not enqueue a lock request if the query thread should be
-	stopped anyway */
-
-	if (que_thr_stop(thr)) {
-		ut_error;
-
-		return(DB_QUE_THR_SUSPENDED);
-	}
-
-	switch (trx_get_dict_operation(trx)) {
-	case TRX_DICT_OP_NONE:
-		break;
-	case TRX_DICT_OP_TABLE:
-	case TRX_DICT_OP_INDEX:
-		ut_print_timestamp(stderr);
-		fputs("  InnoDB: Error: a record lock wait happens"
-		      " in a dictionary operation!\n"
-		      "InnoDB: ", stderr);
-		dict_index_name_print(stderr, trx, index);
-		fputs(".\n"
-		      "InnoDB: Submit a detailed bug report"
-		      " to http://bugs.mysql.com\n",
-		      stderr);
-		ut_ad(0);
-	}
-
-	/* Enqueue the lock request that will wait to be granted, note that
-	we already own the trx mutex. */
-	lock = lock_rec_create(
-#ifdef WITH_WSREP
-                c_lock, thr,
-#endif /* WITH_WSREP */
-		type_mode | LOCK_WAIT, block, heap_no, index, trx, TRUE);
-
-	/* Release the mutex to obey the latching order.
-	This is safe, because lock_deadlock_check_and_resolve()
-	is invoked when a lock wait is enqueued for the currently
-	running transaction. Because trx is a running transaction
-	(it is not currently suspended because of a lock wait),
-	its state can only be changed by this thread, which is
-	currently associated with the transaction. */
-
-	trx_mutex_exit(trx);
-
-	victim_trx_id = lock_deadlock_check_and_resolve(lock, trx);
-
-	trx_mutex_enter(trx);
-
-	if (victim_trx_id != 0) {
-
-		ut_ad(victim_trx_id == trx->id);
+		ut_ad(victim_trx == m_trx);
 
 		lock_reset_lock_and_trx_wait(lock);
-		lock_rec_reset_nth_bit(lock, heap_no);
+
+		lock_rec_reset_nth_bit(lock, m_rec_id.m_heap_no);
 
 		return(DB_DEADLOCK);
 
-	} else if (trx->lock.wait_lock == NULL) {
+	} else if (m_trx->lock.wait_lock == NULL) {
 
 		/* If there was a deadlock but we chose another
 		transaction as a victim, it is possible that we
@@ -2281,26 +1912,177 @@ lock_rec_enqueue_waiting(
 		return(DB_SUCCESS_LOCKED_REC);
 	}
 
-	trx->lock.que_state = TRX_QUE_LOCK_WAIT;
+	return(DB_LOCK_WAIT);
+}
 
-	trx->lock.was_chosen_as_deadlock_victim = FALSE;
-	trx->lock.wait_started = ut_time();
+/**
+Check and resolve any deadlocks
+@param[in, out] lock		The lock being acquired
+@return DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED, or
+	DB_SUCCESS_LOCKED_REC; DB_SUCCESS_LOCKED_REC means that
+	there was a deadlock, but another transaction was chosen
+	as a victim, and we got the lock immediately: no need to
+	wait then */
+dberr_t
+RecLock::deadlock_check(lock_t* lock)
+{
+	ut_ad(lock_mutex_own());
+	ut_ad(lock->trx == m_trx);
+	ut_ad(trx_mutex_own(m_trx));
 
-	ut_a(que_thr_stop(thr));
+	bool	async_rollback = m_trx->in_innodb & TRX_FORCE_ROLLBACK_ASYNC;
+
+	/* This is safe, because DeadlockChecker::check_and_resolve()
+	is invoked when a lock wait is enqueued for the currently
+	running transaction. Because m_trx is a running transaction
+	(it is not currently suspended because of a lock wait),
+	its state can only be changed by this thread, which is
+	currently associated with the transaction. */
+
+	trx_mutex_exit(m_trx);
+
+	/* If transaction is marked for ASYNC rollback then we should
+	not allow it to wait for another lock causing possible deadlock.
+	We return current transaction as deadlock victim here. */
+
+	const trx_t*	victim_trx = async_rollback ? m_trx
+			: DeadlockChecker::check_and_resolve(lock, m_trx);
+
+	trx_mutex_enter(m_trx);
+
+	/* Check the outcome of the deadlock test. It is possible that
+	the transaction that blocked our lock was rolled back and we
+	were granted our lock. */
+
+	dberr_t	err = check_deadlock_result(victim_trx, lock);
+
+	if (err == DB_LOCK_WAIT) {
+
+		set_wait_state(lock);
+
+		MONITOR_INC(MONITOR_LOCKREC_WAIT);
+	}
+
+	return(err);
+}
+
+/**
+Collect the transactions that will need to be rolled back asynchronously
+@param[in, out] trx	Transaction to be rolled back */
+void
+RecLock::mark_trx_for_rollback(trx_t* trx)
+{
+	trx->abort = true;
+
+	ut_ad(!trx->read_only);
+	ut_ad(trx_mutex_own(m_trx));
+	ut_ad(!(trx->in_innodb & TRX_FORCE_ROLLBACK));
+	ut_ad(!(trx->in_innodb & TRX_FORCE_ROLLBACK_ASYNC));
+	ut_ad(!(trx->in_innodb & TRX_FORCE_ROLLBACK_DISABLE));
+
+	/* Note that we will attempt an async rollback. The _ASYNC
+	flag will be cleared if the transaction is rolled back
+	synchronously before we get a chance to do it. */
+
+	trx->in_innodb |= TRX_FORCE_ROLLBACK | TRX_FORCE_ROLLBACK_ASYNC;
+
+	bool		cas;
+	os_thread_id_t	thread_id = os_thread_get_curr_id();
+
+	cas = os_compare_and_swap_thread_id(&trx->killed_by, 0, thread_id);
+
+	ut_a(cas);
+
+	m_trx->hit_list.push_back(hit_list_t::value_type(trx));
 
 #ifdef UNIV_DEBUG
-	if (lock_print_waits) {
-		fprintf(stderr, "Lock wait for trx " TRX_ID_FMT " in index ",
-			trx->id);
-		ut_print_name(stderr, trx, FALSE, index->name);
+	THD*	thd = trx->mysql_thd;
+
+	if (thd != NULL) {
+
+		char	buffer[1024];
+		ib::info() << "Blocking transaction: ID: " << trx->id << " - "
+			<< " Blocked transaction ID: "<< m_trx->id << " - "
+			<< thd_get_error_context_description(thd, buffer, sizeof(buffer),
+						512);
 	}
 #endif /* UNIV_DEBUG */
+}
 
-	MONITOR_INC(MONITOR_LOCKREC_WAIT);
+/**
+Setup the requesting transaction state for lock grant
+@param[in,out] lock		Lock for which to change state */
+void
+RecLock::set_wait_state(lock_t* lock)
+{
+	ut_ad(lock_mutex_own());
+	ut_ad(m_trx == lock->trx);
+	ut_ad(trx_mutex_own(m_trx));
+	ut_ad(lock_get_wait(lock));
 
-	trx->n_rec_lock_waits++;
+	m_trx->lock.wait_started = ut_time();
 
-	return(DB_LOCK_WAIT);
+	m_trx->lock.que_state = TRX_QUE_LOCK_WAIT;
+
+	m_trx->lock.was_chosen_as_deadlock_victim = false;
+
+	bool	stopped = que_thr_stop(m_thr);
+	ut_a(stopped);
+}
+
+/**
+Enqueue a lock wait for normal transaction. If it is a high priority transaction
+then jump the record lock wait queue and if the transaction at the head of the
+queue is itself waiting roll it back, also do a deadlock check and resolve.
+@param[in, out] wait_for	The lock that the joining transaction is
+				waiting for
+@param[in] prdt			Predicate [optional]
+@return DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED, or
+	DB_SUCCESS_LOCKED_REC; DB_SUCCESS_LOCKED_REC means that
+	there was a deadlock, but another transaction was chosen
+	as a victim, and we got the lock immediately: no need to
+	wait then */
+dberr_t
+RecLock::add_to_waitq(const lock_t* wait_for, const lock_prdt_t* prdt)
+{
+	ut_ad(lock_mutex_own());
+	ut_ad(m_trx == thr_get_trx(m_thr));
+	ut_ad(trx_mutex_own(m_trx));
+
+	DEBUG_SYNC_C("rec_lock_add_to_waitq");
+
+	m_mode |= LOCK_WAIT;
+
+	/* Do the preliminary checks, and set query thread state */
+
+	prepare();
+
+	bool	high_priority = trx_is_high_priority(m_trx);
+
+	/* Don't queue the lock to hash table, if high priority transaction. */
+	lock_t*	lock = create(m_trx, true, !high_priority, prdt);
+
+	/* Attempt to jump over the low priority waiting locks. */
+	if (high_priority && jump_queue(lock, wait_for)) {
+
+		/* Lock is granted */
+		return(DB_SUCCESS);
+	}
+
+	ut_ad(lock_get_wait(lock));
+
+	dberr_t	err = deadlock_check(lock);
+
+	ut_ad(trx_mutex_own(m_trx));
+
+	/* m_trx->mysql_thd is NULL if it's an internal trx. So current_thd is used */
+	if (err == DB_LOCK_WAIT) {
+		ut_ad(wait_for && wait_for->trx);
+		wait_for->trx->abort_type = TRX_REPLICATION_ABORT;
+		thd_report_wait_for(current_thd, wait_for->trx->mysql_thd);
+		wait_for->trx->abort_type = TRX_SERVER_ABORT;
+	}
+	return(err);
 }
 
 /*********************************************************************//**
@@ -2310,9 +2092,9 @@ on the record, and the request to be added is not a waiting request, we
 can reuse a suitable record lock object already existing on the same page,
 just setting the appropriate bit in its bitmap. This is a low-level function
 which does NOT check for deadlocks or lock compatibility!
-@return	lock where the bit was set */
+@return lock where the bit was set */
 static
-lock_t*
+void
 lock_rec_add_to_queue(
 /*==================*/
 	ulint			type_mode,/*!< in: lock mode, wait, gap
@@ -2323,18 +2105,15 @@ lock_rec_add_to_queue(
 	ulint			heap_no,/*!< in: heap number of the record */
 	dict_index_t*		index,	/*!< in: index of record */
 	trx_t*			trx,	/*!< in/out: transaction */
-	ibool			caller_owns_trx_mutex)
+	bool			caller_owns_trx_mutex)
 					/*!< in: TRUE if caller owns the
 					transaction mutex */
 {
-	lock_t*	lock;
-	lock_t*	first_lock;
-
+#ifdef UNIV_DEBUG
 	ut_ad(lock_mutex_own());
 	ut_ad(caller_owns_trx_mutex == trx_mutex_own(trx));
 	ut_ad(dict_index_is_clust(index)
 	      || dict_index_get_online_status(index) != ONLINE_INDEX_CREATION);
-#ifdef UNIV_DEBUG
 	switch (type_mode & LOCK_MODE_MASK) {
 	case LOCK_X:
 	case LOCK_S:
@@ -2344,21 +2123,33 @@ lock_rec_add_to_queue(
 	}
 
 	if (!(type_mode & (LOCK_WAIT | LOCK_GAP))) {
-		enum lock_mode	mode = (type_mode & LOCK_MODE_MASK) == LOCK_S
+		lock_mode	mode = (type_mode & LOCK_MODE_MASK) == LOCK_S
 			? LOCK_X
 			: LOCK_S;
 		const lock_t*	other_lock
-			= lock_rec_other_has_expl_req(mode, 0, LOCK_WAIT,
-						      block, heap_no, trx);
+			= lock_rec_other_has_expl_req(
+				mode, block, false, heap_no, trx);
 #ifdef WITH_WSREP
-		/* this can potentionally assert with wsrep */
-		if (wsrep_thd_is_wsrep(trx->mysql_thd)) {
-			if (wsrep_debug && other_lock) {
-				fprintf(stderr,
-					"WSREP: InnoDB assert ignored\n");
-			}
-		} else {
-			ut_a(!other_lock);
+		//ut_a(!other_lock || (wsrep_thd_is_BF(trx->mysql_thd, FALSE) &&
+                //                     wsrep_thd_is_BF(other_lock->trx->mysql_thd, TRUE)));
+		if (other_lock &&
+			wsrep_on(trx->mysql_thd) &&
+			!wsrep_thd_is_BF(trx->mysql_thd, FALSE) &&
+			!wsrep_thd_is_BF(other_lock->trx->mysql_thd, TRUE)) {
+
+			ib::info() << "WSREP BF lock conflict for my lock:\n BF:" <<
+				((wsrep_thd_is_BF(trx->mysql_thd, FALSE)) ? "BF" : "normal") << " exec: " <<
+				wsrep_thd_exec_mode(trx->mysql_thd) << " conflict: " <<
+				wsrep_thd_conflict_state(trx->mysql_thd, false) << " seqno: " <<
+				wsrep_thd_trx_seqno(trx->mysql_thd) << " SQL: " <<
+				wsrep_thd_query(trx->mysql_thd);
+			trx_t* otrx = other_lock->trx;
+			ib::info() << "WSREP other lock:\n BF:" <<
+				((wsrep_thd_is_BF(otrx->mysql_thd, FALSE)) ? "BF" : "normal") << " exec: " <<
+				wsrep_thd_exec_mode(otrx->mysql_thd) << " conflict: " <<
+				wsrep_thd_conflict_state(otrx->mysql_thd, false) << " seqno: " <<
+				wsrep_thd_trx_seqno(otrx->mysql_thd) << " SQL: " <<
+				wsrep_thd_query(otrx->mysql_thd);
 		}
 #else
 		ut_a(!other_lock);
@@ -2373,38 +2164,33 @@ lock_rec_add_to_queue(
 	try to avoid unnecessary memory consumption of a new record lock
 	struct for a gap type lock */
 
-	if (UNIV_UNLIKELY(heap_no == PAGE_HEAP_NO_SUPREMUM)) {
+	if (heap_no == PAGE_HEAP_NO_SUPREMUM) {
 		ut_ad(!(type_mode & LOCK_REC_NOT_GAP));
 
 		/* There should never be LOCK_REC_NOT_GAP on a supremum
 		record, but let us play safe */
 
-		type_mode = type_mode & ~(LOCK_GAP | LOCK_REC_NOT_GAP);
+		type_mode &= ~(LOCK_GAP | LOCK_REC_NOT_GAP);
 	}
 
+	lock_t*		lock;
+	lock_t*		first_lock;
+	hash_table_t*	hash = lock_hash_get(type_mode);
+
 	/* Look for a waiting lock request on the same record or on a gap */
 
-	for (first_lock = lock = lock_rec_get_first_on_page(block);
+	for (first_lock = lock = lock_rec_get_first_on_page(hash, block);
 	     lock != NULL;
 	     lock = lock_rec_get_next_on_page(lock)) {
 
 		if (lock_get_wait(lock)
 		    && lock_rec_get_nth_bit(lock, heap_no)) {
-#ifdef WITH_WSREP
-			if (wsrep_thd_is_BF(trx->mysql_thd, FALSE)) {
-				if (wsrep_debug) {
-					fprintf(stderr,
-						"BF skipping wait: %lu\n",
-						(ulong) trx->id);
-					lock_rec_print(stderr, lock);
-				}
-		  } else
-#endif
-			goto somebody_waits;
+
+			break;
 		}
 	}
 
-	if (UNIV_LIKELY(!(type_mode & LOCK_WAIT))) {
+	if (lock == NULL && !(type_mode & LOCK_WAIT)) {
 
 		/* Look for a similar record lock on the same page:
 		if one is found and there are no waiting lock requests,
@@ -2413,35 +2199,18 @@ lock_rec_add_to_queue(
 		lock = lock_rec_find_similar_on_page(
 			type_mode, heap_no, first_lock, trx);
 
-		if (lock) {
+		if (lock != NULL) {
 
 			lock_rec_set_nth_bit(lock, heap_no);
 
-			return(lock);
+			return;
 		}
 	}
 
-somebody_waits:
-#ifdef WITH_WSREP
-	return(lock_rec_create(NULL, NULL,
-			type_mode, block, heap_no, index, trx,
-			caller_owns_trx_mutex));
-#else
- 	return(lock_rec_create(
-                        type_mode, block, heap_no, index, trx,
-                        caller_owns_trx_mutex));
-#endif /* WITH_WSREP */
-}
+	RecLock		rec_lock(index, block, heap_no, type_mode);
 
-/** Record locking request status */
-enum lock_rec_req_status {
-	/** Failed to acquire a lock */
-	LOCK_REC_FAIL,
-	/** Succeeded in acquiring a lock (implicit or already acquired) */
-	LOCK_REC_SUCCESS,
-	/** Explicitly created a new lock */
-	LOCK_REC_SUCCESS_CREATED
-};
+	rec_lock.create(trx, caller_owns_trx_mutex, true);
+}
 
 /*********************************************************************//**
 This is a fast routine for locking a record in the most common cases:
@@ -2452,10 +2221,10 @@ explicit locks. This function sets a normal next-key lock, or in the case of
 a page supremum record, a gap type lock.
 @return whether the locking succeeded */
 UNIV_INLINE
-enum lock_rec_req_status
+lock_rec_req_status
 lock_rec_lock_fast(
 /*===============*/
-	ibool			impl,	/*!< in: if TRUE, no lock is set
+	bool			impl,	/*!< in: if TRUE, no lock is set
 					if no wait is necessary: we
 					assume that the caller will
 					set an implicit lock */
@@ -2468,15 +2237,13 @@ lock_rec_lock_fast(
 	dict_index_t*		index,	/*!< in: index of record */
 	que_thr_t*		thr)	/*!< in: query thread */
 {
-	lock_t*			lock;
-	trx_t*			trx;
-	enum lock_rec_req_status status = LOCK_REC_SUCCESS;
-
 	ut_ad(lock_mutex_own());
+	ut_ad(!srv_read_only_mode);
 	ut_ad((LOCK_MODE_MASK & mode) != LOCK_S
 	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
 	ut_ad((LOCK_MODE_MASK & mode) != LOCK_X
-	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
+	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX)
+	      || srv_read_only_mode);
 	ut_ad((LOCK_MODE_MASK & mode) == LOCK_S
 	      || (LOCK_MODE_MASK & mode) == LOCK_X);
 	ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP
@@ -2486,21 +2253,21 @@ lock_rec_lock_fast(
 
 	DBUG_EXECUTE_IF("innodb_report_deadlock", return(LOCK_REC_FAIL););
 
-	lock = lock_rec_get_first_on_page(block);
+	lock_t*	lock = lock_rec_get_first_on_page(lock_sys->rec_hash, block);
 
-	trx = thr_get_trx(thr);
+	trx_t*	trx = thr_get_trx(thr);
+
+	lock_rec_req_status	status = LOCK_REC_SUCCESS;
 
 	if (lock == NULL) {
+
 		if (!impl) {
+			RecLock	rec_lock(index, block, heap_no, mode);
+
 			/* Note that we don't own the trx mutex. */
-#ifdef WITH_WSREP
-			lock = lock_rec_create(NULL, thr,
-				mode, block, heap_no, index, trx, FALSE);
-#else
-			lock = lock_rec_create(
-				mode, block, heap_no, index, trx, FALSE);
-#endif /* WITH_WSREP */
+			rec_lock.create(trx, false, true);
 		}
+
 		status = LOCK_REC_SUCCESS_CREATED;
 	} else {
 		trx_mutex_enter(trx);
@@ -2532,7 +2299,7 @@ This is the general, and slower, routine for locking a record. This is a
 low-level function which does NOT look at implicit locks! Checks lock
 compatibility within explicit locks. This function sets a normal next-key
 lock, or in the case of a page supremum record, a gap type lock.
-@return	DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
+@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
 or DB_QUE_THR_SUSPENDED */
 static
 dberr_t
@@ -2551,13 +2318,8 @@ lock_rec_lock_slow(
 	dict_index_t*		index,	/*!< in: index of record */
 	que_thr_t*		thr)	/*!< in: query thread */
 {
-	trx_t*			trx;
-#ifdef WITH_WSREP
-	lock_t*			c_lock(NULL);
-#endif
-	dberr_t			err = DB_SUCCESS;
-
 	ut_ad(lock_mutex_own());
+	ut_ad(!srv_read_only_mode);
 	ut_ad((LOCK_MODE_MASK & mode) != LOCK_S
 	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
 	ut_ad((LOCK_MODE_MASK & mode) != LOCK_X
@@ -2571,47 +2333,47 @@ lock_rec_lock_slow(
 
 	DBUG_EXECUTE_IF("innodb_report_deadlock", return(DB_DEADLOCK););
 
-	trx = thr_get_trx(thr);
+	dberr_t	err;
+	trx_t*	trx = thr_get_trx(thr);
+
 	trx_mutex_enter(trx);
 
 	if (lock_rec_has_expl(mode, block, heap_no, trx)) {
 
 		/* The trx already has a strong enough lock on rec: do
 		nothing */
-#ifdef WITH_WSREP
-	} else if ((c_lock = (ib_lock_t*)lock_rec_other_has_conflicting(
-                        static_cast<enum lock_mode>(mode),
-                        block, heap_no, trx))) {
-#else
-	} else if (lock_rec_other_has_conflicting(
-			static_cast<enum lock_mode>(mode),
-			block, heap_no, trx)) {
-#endif /* WITH_WSREP */
 
-		/* If another transaction has a non-gap conflicting
-		request in the queue, as this transaction does not
-		have a lock strong enough already granted on the
-		record, we have to wait. */
+		err = DB_SUCCESS;
 
-#ifdef WITH_WSREP
-		/* c_lock is NULL here if jump to enqueue_waiting happened
-		but it's ok because lock is not NULL in that case and c_lock
-		is not used. */
-		err = lock_rec_enqueue_waiting(c_lock,
-			mode, block, heap_no, index, thr);
-#else
-		err = lock_rec_enqueue_waiting(
-			mode, block, heap_no, index, thr);
-#endif /* WITH_WSREP */
+	} else {
 
-	} else if (!impl) {
-		/* Set the requested lock on the record, note that
-		we already own the transaction mutex. */
+		const lock_t* wait_for = lock_rec_other_has_conflicting(
+			mode, block, heap_no, trx);
 
-		lock_rec_add_to_queue(
-			LOCK_REC | mode, block, heap_no, index, trx, TRUE);
+		if (wait_for != NULL) {
 
-		err = DB_SUCCESS_LOCKED_REC;
+			/* If another transaction has a non-gap conflicting
+			request in the queue, as this transaction does not
+			have a lock strong enough already granted on the
+			record, we may have to wait. */
+
+			RecLock	rec_lock(thr, index, block, heap_no, mode);
+
+			err = rec_lock.add_to_waitq(wait_for);
+
+		} else if (!impl) {
+
+			/* Set the requested lock on the record, note that
+			we already own the transaction mutex. */
+
+			lock_rec_add_to_queue(
+				LOCK_REC | mode, block, heap_no, index, trx,
+				true);
+
+			err = DB_SUCCESS_LOCKED_REC;
+		} else {
+			err = DB_SUCCESS;
+		}
 	}
 
 	trx_mutex_exit(trx);
@@ -2625,13 +2387,13 @@ possible, enqueues a waiting lock request. This is a low-level function
 which does NOT look at implicit locks! Checks lock compatibility within
 explicit locks. This function sets a normal next-key lock, or in the case
 of a page supremum record, a gap type lock.
-@return	DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
+@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
 or DB_QUE_THR_SUSPENDED */
 static
 dberr_t
 lock_rec_lock(
 /*==========*/
-	ibool			impl,	/*!< in: if TRUE, no lock is set
+	bool			impl,	/*!< in: if true, no lock is set
 					if no wait is necessary: we
 					assume that the caller will
 					set an implicit lock */
@@ -2645,6 +2407,7 @@ lock_rec_lock(
 	que_thr_t*		thr)	/*!< in: query thread */
 {
 	ut_ad(lock_mutex_own());
+	ut_ad(!srv_read_only_mode);
 	ut_ad((LOCK_MODE_MASK & mode) != LOCK_S
 	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
 	ut_ad((LOCK_MODE_MASK & mode) != LOCK_X
@@ -2674,7 +2437,7 @@ lock_rec_lock(
 
 /*********************************************************************//**
 Checks if a waiting record lock request still has to wait in a queue.
-@return	lock that is causing the wait */
+@return lock that is causing the wait */
 static
 const lock_t*
 lock_rec_has_to_wait_in_queue(
@@ -2687,6 +2450,7 @@ lock_rec_has_to_wait_in_queue(
 	ulint		heap_no;
 	ulint		bit_mask;
 	ulint		bit_offset;
+	hash_table_t*	hash;
 
 	ut_ad(lock_mutex_own());
 	ut_ad(lock_get_wait(wait_lock));
@@ -2699,7 +2463,9 @@ lock_rec_has_to_wait_in_queue(
 	bit_offset = heap_no / 8;
 	bit_mask = static_cast<ulint>(1 << (heap_no % 8));
 
-	for (lock = lock_rec_get_first_on_page_addr(space, page_no);
+	hash = lock_hash_get(wait_lock->type_mode);
+
+	for (lock = lock_rec_get_first_on_page_addr(hash, space, page_no);
 	     lock != wait_lock;
 	     lock = lock_rec_get_next_on_page_const(lock)) {
 
@@ -2714,7 +2480,8 @@ lock_rec_has_to_wait_in_queue(
 				/* don't wait for another BF lock */
 				continue;
 			}
-#endif
+#endif /* WITH_WSREP */
+
 			return(lock);
 		}
 	}
@@ -2740,10 +2507,9 @@ lock_grant(
 	if (lock_get_mode(lock) == LOCK_AUTO_INC) {
 		dict_table_t*	table = lock->un_member.tab_lock.table;
 
-		if (UNIV_UNLIKELY(table->autoinc_trx == lock->trx)) {
-			fprintf(stderr,
-				"InnoDB: Error: trx already had"
-				" an AUTO-INC lock!\n");
+		if (table->autoinc_trx == lock->trx) {
+			ib::error() << "Transaction already had an"
+				<< " AUTO-INC lock!";
 		} else {
 			table->autoinc_trx = lock->trx;
 
@@ -2751,12 +2517,8 @@ lock_grant(
 		}
 	}
 
-#ifdef UNIV_DEBUG
-	if (lock_print_waits) {
-		fprintf(stderr, "Lock wait for trx " TRX_ID_FMT " ends\n",
-			lock->trx->id);
-	}
-#endif /* UNIV_DEBUG */
+	DBUG_PRINT("ib_lock", ("wait for trx " TRX_ID_FMT " ends",
+			       trx_get_id_for_print(lock->trx)));
 
 	/* If we are resolving a deadlock by choosing another transaction
 	as a victim, then our original transaction may not be in the
@@ -2773,18 +2535,233 @@ lock_grant(
 		}
 	}
 
-	/* Cumulate total lock wait time for statistics */
-	if (lock_get_type_low(lock) & LOCK_TABLE) {
-		lock->trx->total_table_lock_wait_time +=
-			(ulint)difftime(ut_time(), lock->trx->lock.wait_started);
-	} else {
-		lock->trx->total_rec_lock_wait_time +=
-			(ulint)difftime(ut_time(), lock->trx->lock.wait_started);
+	trx_mutex_exit(lock->trx);
+}
+
+/**
+Jump the queue for the record over all low priority transactions and
+add the lock. If all current granted locks are compatible, grant the
+lock. Otherwise, mark all granted transaction for asynchronous
+rollback and add to hit list.
+@param[in, out]	lock		Lock being requested
+@param[in]	conflict_lock	First conflicting lock from the head
+@return true if the lock is granted */
+bool
+RecLock::jump_queue(
+	lock_t*		lock,
+	const lock_t*	conflict_lock)
+{
+	ut_ad(m_trx == lock->trx);
+	ut_ad(trx_mutex_own(m_trx));
+	ut_ad(conflict_lock->trx != m_trx);
+	ut_ad(trx_is_high_priority(m_trx));
+	ut_ad(m_rec_id.m_heap_no != ULINT32_UNDEFINED);
+
+	bool	high_priority = false;
+
+	/* Find out the position to add the lock. If there are other high
+	priority transactions in waiting state then we should add it after
+	the last high priority transaction. Otherwise, we can add it after
+	the last granted lock jumping over the wait queue. */
+	bool grant_lock = lock_add_priority(lock, conflict_lock,
+					    &high_priority);
+
+	if (grant_lock) {
+
+		ut_ad(conflict_lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT);
+		ut_ad(conflict_lock->trx->lock.wait_lock == conflict_lock);
+
+#ifdef UNIV_DEBUG
+		ib::info() << "Granting High Priority Transaction (ID): "
+			   << lock->trx->id << " the lock jumping over"
+			   << " waiting Transaction (ID): "
+			   << conflict_lock->trx->id;
+#endif /* UNIV_DEBUG */
+
+		lock_reset_lock_and_trx_wait(lock);
+		return(true);
 	}
 
-	lock->wait_time = (ulint)difftime(ut_time(), lock->requested_time);
+	/* If another high priority transaction is found waiting
+	victim transactions are already marked for rollback. */
+	if (high_priority) {
 
-	trx_mutex_exit(lock->trx);
+		return(false);
+	}
+
+	/* The lock is placed after the last granted lock in the queue. Check and add
+	low priority transactinos to hit list for ASYNC rollback. */
+	make_trx_hit_list(lock, conflict_lock);
+
+	return(false);
+}
+
+/** Find position in lock queue and add the high priority transaction
+lock. Intention and GAP only locks can be granted even if there are
+waiting locks in front of the queue. To add the High priority
+transaction in a safe position we keep the following rule.
+
+1. If the lock can be granted, add it before the first waiting lock
+in the queue so that all currently waiting locks need to do conflict
+check before getting granted.
+
+2. If the lock has to wait, add it after the last granted lock or the
+last waiting high priority transaction in the queue whichever is later.
+This ensures that the transaction is granted only after doing conflict
+check with all granted transactions.
+@param[in]	lock		Lock being requested
+@param[in]	conflict_lock	First conflicting lock from the head
+@param[out]	high_priority	high priority transaction ahead in queue
+@return true if the lock can be granted */
+bool
+RecLock::lock_add_priority(
+	lock_t*		lock,
+	const lock_t*	conflict_lock,
+	bool*		high_priority)
+{
+	ut_ad(high_priority);
+
+	*high_priority = false;
+
+	/* If the first conflicting lock is waiting for the current row,
+	then all other granted locks are compatible and the lock can be
+	directly granted if no other high priority transactions are
+	waiting. We need to recheck with all granted transaction as there
+	could be granted GAP or Intention locks down the queue. */
+	bool	grant_lock = (conflict_lock->is_waiting());
+	lock_t*	lock_head = NULL;
+	lock_t*	grant_position = NULL;
+	lock_t*	add_position = NULL;
+
+	HASH_SEARCH(hash, lock_sys->rec_hash, m_rec_id.fold(), lock_t*,
+		    lock_head, ut_ad(lock_head->is_record_lock()), true);
+
+	ut_ad(lock_head);
+
+	for (lock_t* next = lock_head; next != NULL; next = next->hash) {
+
+		/* check only for locks on the current row */
+		if (!is_on_row(next)) {
+			continue;
+		}
+
+		if (next->is_waiting()) {
+			/* grant lock position is the granted lock just before
+			the first wait lock in the queue. */
+			if (grant_position == NULL) {
+				grant_position = add_position;
+			}
+
+			if (trx_is_high_priority(next->trx)) {
+
+				*high_priority = true;
+				grant_lock = false;
+				add_position = next;
+			}
+		} else {
+
+			add_position = next;
+			/* Cannot grant lock if there is any conflicting
+			granted lock. */
+			if (grant_lock && lock_has_to_wait(lock, next)) {
+				grant_lock = false;
+			}
+		}
+	}
+
+	/* If the lock is to be granted it is safe to add before the first
+	waiting lock in the queue. */
+	if (grant_lock) {
+
+		ut_ad(!lock_has_to_wait(lock, grant_position));
+		add_position = grant_position;
+	}
+
+	ut_ad(add_position != NULL);
+
+	/* Add the lock to lock hash table. */
+	lock->hash = add_position->hash;
+	add_position->hash = lock;
+	++lock->index->table->n_rec_locks;
+
+	return(grant_lock);
+}
+
+/** Iterate over the granted locks and prepare the hit list for ASYNC Rollback.
+If the transaction is waiting for some other lock then wake up with deadlock error.
+Currently we don't mark following transactions for ASYNC Rollback.
+1. Read only transactions
+2. Background transactions
+3. Other High priority transactions
+@param[in]	lock		Lock being requested
+@param[in]	conflict_lock	First conflicting lock from the head */
+void
+RecLock::make_trx_hit_list(
+	lock_t*		lock,
+	const lock_t*	conflict_lock)
+{
+	const lock_t*	next;
+
+	for (next = conflict_lock; next != NULL; next = next->hash) {
+
+		/* All locks ahead in the queue are checked. */
+		if (next == lock) {
+
+			ut_ad(next->is_waiting());
+			break;
+		}
+
+		trx_t*	trx = next->trx;
+		/* Check only for conflicting, granted locks on the current row.
+		Currently, we don't rollback read only transactions, transactions
+		owned by background threads. */
+		if (trx == lock->trx
+		    || !is_on_row(next)
+		    || next->is_waiting()
+		    || trx->read_only
+		    || trx->mysql_thd == NULL
+		    || !lock_has_to_wait(lock, next)) {
+
+			continue;
+		}
+
+		trx_mutex_enter(trx);
+
+		/* Skip high priority transactions, if already marked for abort
+		by some other transaction or if ASYNC rollback is disabled. A
+		transaction must complete kill/abort of a victim transaction once
+		marked and added to hit list. */
+		if (trx_is_high_priority(trx)
+		    || (trx->in_innodb & TRX_FORCE_ROLLBACK_DISABLE) != 0
+		    || trx->abort) {
+
+			trx_mutex_exit(trx);
+			continue;
+		}
+
+		/* If the transaction is waiting on some other resource then
+		wake it up with DEAD_LOCK error so that it can rollback. */
+		if (trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
+
+			/* Assert that it is not waiting for current record. */
+			ut_ad(trx->lock.wait_lock != next);
+#ifdef UNIV_DEBUG
+			ib::info() << "High Priority Transaction (ID): "
+				   << lock->trx->id << " waking up blocking"
+				   << " transaction (ID): " << trx->id;
+#endif /* UNIV_DEBUG */
+			trx->lock.was_chosen_as_deadlock_victim = true;
+			lock_cancel_waiting_and_release(trx->lock.wait_lock);
+			trx_mutex_exit(trx);
+			continue;
+		}
+
+		/* Mark for ASYNC Rollback and add to hit list. */
+		mark_trx_for_rollback(trx);
+		trx_mutex_exit(trx);
+	}
+
+	ut_ad(next == lock);
 }
 
 /*************************************************************//**
@@ -2826,7 +2803,6 @@ lock_rec_cancel(
 Removes a record lock request, waiting or granted, from the queue and
 grants locks to other transactions in the queue if they now are entitled
 to a lock. NOTE: all record locks contained in in_lock are removed. */
-static
 void
 lock_rec_dequeue_from_page(
 /*=======================*/
@@ -2841,6 +2817,7 @@ lock_rec_dequeue_from_page(
 	ulint		page_no;
 	lock_t*		lock;
 	trx_lock_t*	trx_lock;
+	hash_table_t*	lock_hash;
 
 	ut_ad(lock_mutex_own());
 	ut_ad(lock_get_type_low(in_lock) == LOCK_REC);
@@ -2851,12 +2828,15 @@ lock_rec_dequeue_from_page(
 	space = in_lock->un_member.rec_lock.space;
 	page_no = in_lock->un_member.rec_lock.page_no;
 
+	ut_ad(in_lock->index->table->n_rec_locks > 0);
 	in_lock->index->table->n_rec_locks--;
 
-	HASH_DELETE(lock_t, hash, lock_sys->rec_hash,
+	lock_hash = lock_hash_get(in_lock->type_mode);
+
+	HASH_DELETE(lock_t, hash, lock_hash,
 		    lock_rec_fold(space, page_no), in_lock);
 
-	UT_LIST_REMOVE(trx_locks, trx_lock->trx_locks, in_lock);
+	UT_LIST_REMOVE(trx_lock->trx_locks, in_lock);
 
 	MONITOR_INC(MONITOR_RECLOCK_REMOVED);
 	MONITOR_DEC(MONITOR_NUM_RECLOCK);
@@ -2865,7 +2845,7 @@ lock_rec_dequeue_from_page(
 	locks if there are no conflicting locks ahead. Stop at the first
 	X lock that is waiting or has been granted. */
 
-	for (lock = lock_rec_get_first_on_page_addr(space, page_no);
+	for (lock = lock_rec_get_first_on_page_addr(lock_hash, space, page_no);
 	     lock != NULL;
 	     lock = lock_rec_get_next_on_page(lock)) {
 
@@ -2874,14 +2854,27 @@ lock_rec_dequeue_from_page(
 
 			/* Grant the lock */
 			ut_ad(lock->trx != in_lock->trx);
+
+			bool exit_trx_mutex = false;
+
+			if (lock->trx->abort_type != TRX_SERVER_ABORT) {
+				ut_ad(trx_mutex_own(lock->trx));
+				trx_mutex_exit(lock->trx);
+				exit_trx_mutex = true;
+			}
+
 			lock_grant(lock);
+
+			if (exit_trx_mutex) {
+				ut_ad(!trx_mutex_own(lock->trx));
+				trx_mutex_enter(lock->trx);
+			}
 		}
 	}
 }
 
 /*************************************************************//**
 Removes a record lock request, waiting or granted, from the queue. */
-static
 void
 lock_rec_discard(
 /*=============*/
@@ -2901,12 +2894,13 @@ lock_rec_discard(
 	space = in_lock->un_member.rec_lock.space;
 	page_no = in_lock->un_member.rec_lock.page_no;
 
+	ut_ad(in_lock->index->table->n_rec_locks > 0);
 	in_lock->index->table->n_rec_locks--;
 
-	HASH_DELETE(lock_t, hash, lock_sys->rec_hash,
-		    lock_rec_fold(space, page_no), in_lock);
+	HASH_DELETE(lock_t, hash, lock_hash_get(in_lock->type_mode),
+			    lock_rec_fold(space, page_no), in_lock);
 
-	UT_LIST_REMOVE(trx_locks, trx_lock->trx_locks, in_lock);
+	UT_LIST_REMOVE(trx_lock->trx_locks, in_lock);
 
 	MONITOR_INC(MONITOR_RECLOCK_REMOVED);
 	MONITOR_DEC(MONITOR_NUM_RECLOCK);
@@ -2918,21 +2912,16 @@ function does not move locks, or check for waiting locks, therefore the
 lock bitmaps must already be reset when this function is called. */
 static
 void
-lock_rec_free_all_from_discard_page(
-/*================================*/
-	const buf_block_t*	block)	/*!< in: page to be discarded */
+lock_rec_free_all_from_discard_page_low(
+/*====================================*/
+	ulint		space,
+	ulint		page_no,
+	hash_table_t*	lock_hash)
 {
-	ulint	space;
-	ulint	page_no;
 	lock_t*	lock;
 	lock_t*	next_lock;
 
-	ut_ad(lock_mutex_own());
-
-	space = buf_block_get_space(block);
-	page_no = buf_block_get_page_no(block);
-
-	lock = lock_rec_get_first_on_page_addr(space, page_no);
+	lock = lock_rec_get_first_on_page_addr(lock_hash, space, page_no);
 
 	while (lock != NULL) {
 		ut_ad(lock_rec_find_set_bit(lock) == ULINT_UNDEFINED);
@@ -2946,8 +2935,61 @@ lock_rec_free_all_from_discard_page(
 	}
 }
 
+/*************************************************************//**
+Removes record lock objects set on an index page which is discarded. This
+function does not move locks, or check for waiting locks, therefore the
+lock bitmaps must already be reset when this function is called. */
+void
+lock_rec_free_all_from_discard_page(
+/*================================*/
+	const buf_block_t*	block)	/*!< in: page to be discarded */
+{
+	ulint	space;
+	ulint	page_no;
+
+	ut_ad(lock_mutex_own());
+
+	space = block->page.id.space();
+	page_no = block->page.id.page_no();
+
+	lock_rec_free_all_from_discard_page_low(
+		space, page_no, lock_sys->rec_hash);
+	lock_rec_free_all_from_discard_page_low(
+		space, page_no, lock_sys->prdt_hash);
+	lock_rec_free_all_from_discard_page_low(
+		space, page_no, lock_sys->prdt_page_hash);
+}
+
 /*============= RECORD LOCK MOVING AND INHERITING ===================*/
 
+/*************************************************************//**
+Resets the lock bits for a single record. Releases transactions waiting for
+lock requests here. */
+static
+void
+lock_rec_reset_and_release_wait_low(
+/*================================*/
+	hash_table_t*		hash,	/*!< in: hash table */
+	const buf_block_t*	block,	/*!< in: buffer block containing
+					the record */
+	ulint			heap_no)/*!< in: heap number of record */
+{
+	lock_t*	lock;
+
+	ut_ad(lock_mutex_own());
+
+	for (lock = lock_rec_get_first(hash, block, heap_no);
+	     lock != NULL;
+	     lock = lock_rec_get_next(heap_no, lock)) {
+
+		if (lock_get_wait(lock)) {
+			lock_rec_cancel(lock);
+		} else {
+			lock_rec_reset_nth_bit(lock, heap_no);
+		}
+	}
+}
+
 /*************************************************************//**
 Resets the lock bits for a single record. Releases transactions waiting for
 lock requests here. */
@@ -2959,20 +3001,13 @@ lock_rec_reset_and_release_wait(
 					the record */
 	ulint			heap_no)/*!< in: heap number of record */
 {
-	lock_t*	lock;
+	lock_rec_reset_and_release_wait_low(
+		lock_sys->rec_hash, block, heap_no);
 
-	ut_ad(lock_mutex_own());
-
-	for (lock = lock_rec_get_first(block, heap_no);
-	     lock != NULL;
-	     lock = lock_rec_get_next(heap_no, lock)) {
-
-		if (lock_get_wait(lock)) {
-			lock_rec_cancel(lock);
-		} else {
-			lock_rec_reset_nth_bit(lock, heap_no);
-		}
-	}
+	lock_rec_reset_and_release_wait_low(
+		lock_sys->prdt_hash, block, PAGE_HEAP_NO_INFIMUM);
+	lock_rec_reset_and_release_wait_low(
+		lock_sys->prdt_page_hash, block, PAGE_HEAP_NO_INFIMUM);
 }
 
 /*************************************************************//**
@@ -3003,9 +3038,9 @@ lock_rec_inherit_to_gap(
 	READ COMMITTED isolation level, we do not want locks set
 	by an UPDATE or a DELETE to be inherited as gap type locks. But we
 	DO want S-locks/X-locks(taken for replace) set by a consistency
-	constraint to be inherited also then */
+	constraint to be inherited also then. */
 
-	for (lock = lock_rec_get_first(block, heap_no);
+	for (lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no);
 	     lock != NULL;
 	     lock = lock_rec_get_next(heap_no, lock)) {
 
@@ -3015,7 +3050,6 @@ lock_rec_inherit_to_gap(
 			  <= TRX_ISO_READ_COMMITTED)
 			 && lock_get_mode(lock) ==
 			 (lock->trx->duplicates ? LOCK_S : LOCK_X))) {
-
 			lock_rec_add_to_queue(
 				LOCK_REC | LOCK_GAP | lock_get_mode(lock),
 				heir_block, heir_heap_no, lock->index,
@@ -3044,7 +3078,7 @@ lock_rec_inherit_to_gap_if_gap_lock(
 
 	lock_mutex_enter();
 
-	for (lock = lock_rec_get_first(block, heap_no);
+	for (lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no);
 	     lock != NULL;
 	     lock = lock_rec_get_next(heap_no, lock)) {
 
@@ -3065,10 +3099,10 @@ lock_rec_inherit_to_gap_if_gap_lock(
 /*************************************************************//**
 Moves the locks of a record to another record and resets the lock bits of
 the donating record. */
-static
 void
-lock_rec_move(
-/*==========*/
+lock_rec_move_low(
+/*==============*/
+	hash_table_t*		lock_hash,	/*!< in: hash table to use */
 	const buf_block_t*	receiver,	/*!< in: buffer block containing
 						the receiving record */
 	const buf_block_t*	donator,	/*!< in: buffer block containing
@@ -3084,9 +3118,14 @@ lock_rec_move(
 
 	ut_ad(lock_mutex_own());
 
-	ut_ad(lock_rec_get_first(receiver, receiver_heap_no) == NULL);
+	/* If the lock is predicate lock, it resides on INFIMUM record */
+	ut_ad(lock_rec_get_first(
+		lock_hash, receiver, receiver_heap_no) == NULL
+	      || lock_hash == lock_sys->prdt_hash
+	      || lock_hash == lock_sys->prdt_page_hash);
 
-	for (lock = lock_rec_get_first(donator, donator_heap_no);
+	for (lock = lock_rec_get_first(lock_hash,
+				       donator, donator_heap_no);
 	     lock != NULL;
 	     lock = lock_rec_get_next(donator_heap_no, lock)) {
 
@@ -3094,7 +3133,7 @@ lock_rec_move(
 
 		lock_rec_reset_nth_bit(lock, donator_heap_no);
 
-		if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) {
+		if (type_mode & LOCK_WAIT) {
 			lock_reset_lock_and_trx_wait(lock);
 		}
 
@@ -3106,7 +3145,41 @@ lock_rec_move(
 			lock->index, lock->trx, FALSE);
 	}
 
-	ut_ad(lock_rec_get_first(donator, donator_heap_no) == NULL);
+	ut_ad(lock_rec_get_first(lock_sys->rec_hash,
+				 donator, donator_heap_no) == NULL);
+}
+
+/** Move all the granted locks to the front of the given lock list.
+All the waiting locks will be at the end of the list.
+@param[in,out]	lock_list	the given lock list.  */
+static
+void
+lock_move_granted_locks_to_front(
+	UT_LIST_BASE_NODE_T(lock_t)&	lock_list)
+{
+	lock_t*	lock;
+
+	bool seen_waiting_lock = false;
+
+	for (lock = UT_LIST_GET_FIRST(lock_list); lock;
+	     lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
+
+		if (!seen_waiting_lock) {
+			if (lock->is_waiting()) {
+				seen_waiting_lock = true;
+			}
+			continue;
+		}
+
+		ut_ad(seen_waiting_lock);
+
+		if (!lock->is_waiting()) {
+			lock_t* prev = UT_LIST_GET_PREV(trx_locks, lock);
+			ut_a(prev);
+			UT_LIST_MOVE_TO_FRONT(lock_list, lock);
+			lock = prev;
+		}
+	}
 }
 
 /*************************************************************//**
@@ -3114,7 +3187,6 @@ Updates the lock table when we have reorganized a page. NOTE: we copy
 also the locks set on the infimum of the page; the infimum may carry
 locks if an update of a record is occurring on the page, and its locks
 were temporarily stored on the infimum. */
-UNIV_INTERN
 void
 lock_move_reorganize_page(
 /*======================*/
@@ -3130,7 +3202,8 @@ lock_move_reorganize_page(
 
 	lock_mutex_enter();
 
-	lock = lock_rec_get_first_on_page(block);
+	/* FIXME: This needs to deal with predicate lock too */
+	lock = lock_rec_get_first_on_page(lock_sys->rec_hash, block);
 
 	if (lock == NULL) {
 		lock_mutex_exit();
@@ -3144,13 +3217,13 @@ lock_move_reorganize_page(
 	bitmaps in the original locks; chain the copies of the locks
 	using the trx_locks field in them. */
 
-	UT_LIST_INIT(old_locks);
+	UT_LIST_INIT(old_locks, &lock_t::trx_locks);
 
 	do {
 		/* Make a copy of the lock */
 		lock_t*	old_lock = lock_rec_copy(lock, heap);
 
-		UT_LIST_ADD_LAST(trx_locks, old_locks, old_lock);
+		UT_LIST_ADD_LAST(old_locks, old_lock);
 
 		/* Reset bitmap of lock */
 		lock_rec_bitmap_reset(lock);
@@ -3166,70 +3239,59 @@ lock_move_reorganize_page(
 	comp = page_is_comp(block->frame);
 	ut_ad(comp == page_is_comp(oblock->frame));
 
+	lock_move_granted_locks_to_front(old_locks);
+
+	DBUG_EXECUTE_IF("do_lock_reverse_page_reorganize",
+			UT_LIST_REVERSE(old_locks););
+
 	for (lock = UT_LIST_GET_FIRST(old_locks); lock;
 	     lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
+
 		/* NOTE: we copy also the locks set on the infimum and
 		supremum of the page; the infimum may carry locks if an
 		update of a record is occurring on the page, and its locks
 		were temporarily stored on the infimum */
-		page_cur_t	cur1;
-		page_cur_t	cur2;
-
-		page_cur_set_before_first(block, &cur1);
-		page_cur_set_before_first(oblock, &cur2);
+		const rec_t*	rec1 = page_get_infimum_rec(
+			buf_block_get_frame(block));
+		const rec_t*	rec2 = page_get_infimum_rec(
+			buf_block_get_frame(oblock));
 
 		/* Set locks according to old locks */
 		for (;;) {
 			ulint	old_heap_no;
 			ulint	new_heap_no;
 
-			ut_ad(comp || !memcmp(page_cur_get_rec(&cur1),
-					      page_cur_get_rec(&cur2),
-					      rec_get_data_size_old(
-						      page_cur_get_rec(
-							      &cur2))));
-			if (UNIV_LIKELY(comp)) {
-				old_heap_no = rec_get_heap_no_new(
-					page_cur_get_rec(&cur2));
-				new_heap_no = rec_get_heap_no_new(
-					page_cur_get_rec(&cur1));
+			if (comp) {
+				old_heap_no = rec_get_heap_no_new(rec2);
+				new_heap_no = rec_get_heap_no_new(rec1);
+
+				rec1 = page_rec_get_next_low(rec1, TRUE);
+				rec2 = page_rec_get_next_low(rec2, TRUE);
 			} else {
-				old_heap_no = rec_get_heap_no_old(
-					page_cur_get_rec(&cur2));
-				new_heap_no = rec_get_heap_no_old(
-					page_cur_get_rec(&cur1));
+				old_heap_no = rec_get_heap_no_old(rec2);
+				new_heap_no = rec_get_heap_no_old(rec1);
+				ut_ad(!memcmp(rec1, rec2,
+					      rec_get_data_size_old(rec2)));
+
+				rec1 = page_rec_get_next_low(rec1, FALSE);
+				rec2 = page_rec_get_next_low(rec2, FALSE);
 			}
 
-			if (lock_rec_get_nth_bit(lock, old_heap_no)) {
-
-				/* Clear the bit in old_lock. */
-				ut_d(lock_rec_reset_nth_bit(lock,
-							    old_heap_no));
-
+			/* Clear the bit in old_lock. */
+			if (old_heap_no < lock->un_member.rec_lock.n_bits
+			    && lock_rec_reset_nth_bit(lock, old_heap_no)) {
 				/* NOTE that the old lock bitmap could be too
 				small for the new heap number! */
 
 				lock_rec_add_to_queue(
 					lock->type_mode, block, new_heap_no,
 					lock->index, lock->trx, FALSE);
-
-				/* if (new_heap_no == PAGE_HEAP_NO_SUPREMUM
-				&& lock_get_wait(lock)) {
-				fprintf(stderr,
-				"---\n--\n!!!Lock reorg: supr type %lu\n",
-				lock->type_mode);
-				} */
 			}
 
-			if (UNIV_UNLIKELY
-			    (new_heap_no == PAGE_HEAP_NO_SUPREMUM)) {
-
+			if (new_heap_no == PAGE_HEAP_NO_SUPREMUM) {
 				ut_ad(old_heap_no == PAGE_HEAP_NO_SUPREMUM);
 				break;
 			}
-
-			page_cur_move_to_next(&cur1);
-			page_cur_move_to_next(&cur2);
 		}
 
 #ifdef UNIV_DEBUG
@@ -3237,12 +3299,10 @@ lock_move_reorganize_page(
 			ulint	i = lock_rec_find_set_bit(lock);
 
 			/* Check that all locks were moved. */
-			if (UNIV_UNLIKELY(i != ULINT_UNDEFINED)) {
-				fprintf(stderr,
-					"lock_move_reorganize_page():"
-					" %lu not moved in %p\n",
-					(ulong) i, (void*) lock);
-				ut_error;
+			if (i != ULINT_UNDEFINED) {
+				ib::fatal() << "lock_move_reorganize_page(): "
+					<< i << " not moved in "
+					<< (void*) lock;
 			}
 		}
 #endif /* UNIV_DEBUG */
@@ -3260,7 +3320,6 @@ lock_move_reorganize_page(
 /*************************************************************//**
 Moves the explicit locks on user records to another page if a record
 list end is moved to another page. */
-UNIV_INTERN
 void
 lock_move_rec_list_end(
 /*===================*/
@@ -3272,6 +3331,9 @@ lock_move_rec_list_end(
 	lock_t*		lock;
 	const ulint	comp	= page_rec_is_comp(rec);
 
+	ut_ad(buf_block_get_frame(block) == page_align(rec));
+	ut_ad(comp == page_is_comp(buf_block_get_frame(new_block)));
+
 	lock_mutex_enter();
 
 	/* Note: when we move locks from record to record, waiting locks
@@ -3280,61 +3342,73 @@ lock_move_rec_list_end(
 	table to the end of the hash chain, and lock_rec_add_to_queue
 	does not reuse locks if there are waiters in the queue. */
 
-	for (lock = lock_rec_get_first_on_page(block); lock;
+	for (lock = lock_rec_get_first_on_page(lock_sys->rec_hash, block); lock;
 	     lock = lock_rec_get_next_on_page(lock)) {
-		page_cur_t	cur1;
-		page_cur_t	cur2;
+		const rec_t*	rec1	= rec;
+		const rec_t*	rec2;
 		const ulint	type_mode = lock->type_mode;
 
-		page_cur_position(rec, block, &cur1);
+		if (comp) {
+			if (page_offset(rec1) == PAGE_NEW_INFIMUM) {
+				rec1 = page_rec_get_next_low(rec1, TRUE);
+			}
 
-		if (page_cur_is_before_first(&cur1)) {
-			page_cur_move_to_next(&cur1);
+			rec2 = page_rec_get_next_low(
+				buf_block_get_frame(new_block)
+				+ PAGE_NEW_INFIMUM, TRUE);
+		} else {
+			if (page_offset(rec1) == PAGE_OLD_INFIMUM) {
+				rec1 = page_rec_get_next_low(rec1, FALSE);
+			}
+
+			rec2 = page_rec_get_next_low(
+				buf_block_get_frame(new_block)
+				+ PAGE_OLD_INFIMUM, FALSE);
 		}
 
-		page_cur_set_before_first(new_block, &cur2);
-		page_cur_move_to_next(&cur2);
-
 		/* Copy lock requests on user records to new page and
 		reset the lock bits on the old */
 
-		while (!page_cur_is_after_last(&cur1)) {
-			ulint	heap_no;
+		for (;;) {
+			ulint	rec1_heap_no;
+			ulint	rec2_heap_no;
 
 			if (comp) {
-				heap_no = rec_get_heap_no_new(
-					page_cur_get_rec(&cur1));
+				rec1_heap_no = rec_get_heap_no_new(rec1);
+
+				if (rec1_heap_no == PAGE_HEAP_NO_SUPREMUM) {
+					break;
+				}
+
+				rec2_heap_no = rec_get_heap_no_new(rec2);
+				rec1 = page_rec_get_next_low(rec1, TRUE);
+				rec2 = page_rec_get_next_low(rec2, TRUE);
 			} else {
-				heap_no = rec_get_heap_no_old(
-					page_cur_get_rec(&cur1));
-				ut_ad(!memcmp(page_cur_get_rec(&cur1),
-					 page_cur_get_rec(&cur2),
-					 rec_get_data_size_old(
-						 page_cur_get_rec(&cur2))));
+				rec1_heap_no = rec_get_heap_no_old(rec1);
+
+				if (rec1_heap_no == PAGE_HEAP_NO_SUPREMUM) {
+					break;
+				}
+
+				rec2_heap_no = rec_get_heap_no_old(rec2);
+
+				ut_ad(!memcmp(rec1, rec2,
+					      rec_get_data_size_old(rec2)));
+
+				rec1 = page_rec_get_next_low(rec1, FALSE);
+				rec2 = page_rec_get_next_low(rec2, FALSE);
 			}
 
-			if (lock_rec_get_nth_bit(lock, heap_no)) {
-				lock_rec_reset_nth_bit(lock, heap_no);
-
-				if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) {
+			if (rec1_heap_no < lock->un_member.rec_lock.n_bits
+			    && lock_rec_reset_nth_bit(lock, rec1_heap_no)) {
+				if (type_mode & LOCK_WAIT) {
 					lock_reset_lock_and_trx_wait(lock);
 				}
 
-				if (comp) {
-					heap_no = rec_get_heap_no_new(
-						page_cur_get_rec(&cur2));
-				} else {
-					heap_no = rec_get_heap_no_old(
-						page_cur_get_rec(&cur2));
-				}
-
 				lock_rec_add_to_queue(
-					type_mode, new_block, heap_no,
+					type_mode, new_block, rec2_heap_no,
 					lock->index, lock->trx, FALSE);
 			}
-
-			page_cur_move_to_next(&cur1);
-			page_cur_move_to_next(&cur2);
 		}
 	}
 
@@ -3349,7 +3423,6 @@ lock_move_rec_list_end(
 /*************************************************************//**
 Moves the explicit locks on user records to another page if a record
 list start is moved to another page. */
-UNIV_INTERN
 void
 lock_move_rec_list_start(
 /*=====================*/
@@ -3370,62 +3443,62 @@ lock_move_rec_list_start(
 
 	ut_ad(block->frame == page_align(rec));
 	ut_ad(new_block->frame == page_align(old_end));
+	ut_ad(comp == page_rec_is_comp(old_end));
 
 	lock_mutex_enter();
 
-	for (lock = lock_rec_get_first_on_page(block); lock;
+	for (lock = lock_rec_get_first_on_page(lock_sys->rec_hash, block); lock;
 	     lock = lock_rec_get_next_on_page(lock)) {
-		page_cur_t	cur1;
-		page_cur_t	cur2;
+		const rec_t*	rec1;
+		const rec_t*	rec2;
 		const ulint	type_mode = lock->type_mode;
 
-		page_cur_set_before_first(block, &cur1);
-		page_cur_move_to_next(&cur1);
-
-		page_cur_position(old_end, new_block, &cur2);
-		page_cur_move_to_next(&cur2);
+		if (comp) {
+			rec1 = page_rec_get_next_low(
+				buf_block_get_frame(block)
+				+ PAGE_NEW_INFIMUM, TRUE);
+			rec2 = page_rec_get_next_low(old_end, TRUE);
+		} else {
+			rec1 = page_rec_get_next_low(
+				buf_block_get_frame(block)
+				+ PAGE_OLD_INFIMUM, FALSE);
+			rec2 = page_rec_get_next_low(old_end, FALSE);
+		}
 
 		/* Copy lock requests on user records to new page and
 		reset the lock bits on the old */
 
-		while (page_cur_get_rec(&cur1) != rec) {
-			ulint	heap_no;
+		while (rec1 != rec) {
+			ulint	rec1_heap_no;
+			ulint	rec2_heap_no;
 
 			if (comp) {
-				heap_no = rec_get_heap_no_new(
-					page_cur_get_rec(&cur1));
+				rec1_heap_no = rec_get_heap_no_new(rec1);
+				rec2_heap_no = rec_get_heap_no_new(rec2);
+
+				rec1 = page_rec_get_next_low(rec1, TRUE);
+				rec2 = page_rec_get_next_low(rec2, TRUE);
 			} else {
-				heap_no = rec_get_heap_no_old(
-					page_cur_get_rec(&cur1));
-				ut_ad(!memcmp(page_cur_get_rec(&cur1),
-					      page_cur_get_rec(&cur2),
-					      rec_get_data_size_old(
-						      page_cur_get_rec(
-							      &cur2))));
+				rec1_heap_no = rec_get_heap_no_old(rec1);
+				rec2_heap_no = rec_get_heap_no_old(rec2);
+
+				ut_ad(!memcmp(rec1, rec2,
+					      rec_get_data_size_old(rec2)));
+
+				rec1 = page_rec_get_next_low(rec1, FALSE);
+				rec2 = page_rec_get_next_low(rec2, FALSE);
 			}
 
-			if (lock_rec_get_nth_bit(lock, heap_no)) {
-				lock_rec_reset_nth_bit(lock, heap_no);
-
-				if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) {
+			if (rec1_heap_no < lock->un_member.rec_lock.n_bits
+			    && lock_rec_reset_nth_bit(lock, rec1_heap_no)) {
+				if (type_mode & LOCK_WAIT) {
 					lock_reset_lock_and_trx_wait(lock);
 				}
 
-				if (comp) {
-					heap_no = rec_get_heap_no_new(
-						page_cur_get_rec(&cur2));
-				} else {
-					heap_no = rec_get_heap_no_old(
-						page_cur_get_rec(&cur2));
-				}
-
 				lock_rec_add_to_queue(
-					type_mode, new_block, heap_no,
+					type_mode, new_block, rec2_heap_no,
 					lock->index, lock->trx, FALSE);
 			}
-
-			page_cur_move_to_next(&cur1);
-			page_cur_move_to_next(&cur2);
 		}
 
 #ifdef UNIV_DEBUG
@@ -3434,14 +3507,11 @@ lock_move_rec_list_start(
 
 			for (i = PAGE_HEAP_NO_USER_LOW;
 			     i < lock_rec_get_n_bits(lock); i++) {
-				if (UNIV_UNLIKELY
-				    (lock_rec_get_nth_bit(lock, i))) {
-
-					fprintf(stderr,
-						"lock_move_rec_list_start():"
-						" %lu not moved in %p\n",
-						(ulong) i, (void*) lock);
-					ut_error;
+				if (lock_rec_get_nth_bit(lock, i)) {
+					ib::fatal()
+						<< "lock_move_rec_list_start():"
+						<< i << " not moved in "
+						<<  (void*) lock;
 				}
 			}
 		}
@@ -3455,9 +3525,88 @@ lock_move_rec_list_start(
 #endif
 }
 
+/*************************************************************//**
+Moves the explicit locks on user records to another page if a record
+list start is moved to another page. */
+void
+lock_rtr_move_rec_list(
+/*===================*/
+	const buf_block_t*	new_block,	/*!< in: index page to
+						move to */
+	const buf_block_t*	block,		/*!< in: index page */
+	rtr_rec_move_t*		rec_move,       /*!< in: recording records
+						moved */
+	ulint			num_move)       /*!< in: num of rec to move */
+{
+	lock_t*		lock;
+	ulint		comp;
+
+	if (!num_move) {
+		return;
+	}
+
+	comp = page_rec_is_comp(rec_move[0].old_rec);
+
+	ut_ad(block->frame == page_align(rec_move[0].old_rec));
+	ut_ad(new_block->frame == page_align(rec_move[0].new_rec));
+	ut_ad(comp == page_rec_is_comp(rec_move[0].new_rec));
+
+	lock_mutex_enter();
+
+	for (lock = lock_rec_get_first_on_page(lock_sys->rec_hash, block); lock;
+	     lock = lock_rec_get_next_on_page(lock)) {
+		ulint		moved = 0;
+		const rec_t*	rec1;
+		const rec_t*	rec2;
+		const ulint	type_mode = lock->type_mode;
+
+		/* Copy lock requests on user records to new page and
+		reset the lock bits on the old */
+
+		while (moved < num_move) {
+			ulint	rec1_heap_no;
+			ulint	rec2_heap_no;
+
+			rec1 = rec_move[moved].old_rec;
+			rec2 = rec_move[moved].new_rec;
+
+			if (comp) {
+				rec1_heap_no = rec_get_heap_no_new(rec1);
+				rec2_heap_no = rec_get_heap_no_new(rec2);
+
+			} else {
+				rec1_heap_no = rec_get_heap_no_old(rec1);
+				rec2_heap_no = rec_get_heap_no_old(rec2);
+
+				ut_ad(!memcmp(rec1, rec2,
+					      rec_get_data_size_old(rec2)));
+			}
+
+			if (rec1_heap_no < lock->un_member.rec_lock.n_bits
+			    && lock_rec_reset_nth_bit(lock, rec1_heap_no)) {
+				if (type_mode & LOCK_WAIT) {
+					lock_reset_lock_and_trx_wait(lock);
+				}
+
+				lock_rec_add_to_queue(
+					type_mode, new_block, rec2_heap_no,
+					lock->index, lock->trx, FALSE);
+
+				rec_move[moved].moved = true;
+			}
+
+			moved++;
+		}
+	}
+
+	lock_mutex_exit();
+
+#ifdef UNIV_DEBUG_LOCK_VALIDATE
+	ut_ad(lock_rec_validate_page(block));
+#endif
+}
 /*************************************************************//**
 Updates the lock table when a page is split to the right. */
-UNIV_INTERN
 void
 lock_update_split_right(
 /*====================*/
@@ -3485,7 +3634,6 @@ lock_update_split_right(
 
 /*************************************************************//**
 Updates the lock table when a page is merged to the right. */
-UNIV_INTERN
 void
 lock_update_merge_right(
 /*====================*/
@@ -3512,12 +3660,22 @@ lock_update_merge_right(
 	/* Reset the locks on the supremum of the left page, releasing
 	waiting transactions */
 
-	lock_rec_reset_and_release_wait(left_block,
-					PAGE_HEAP_NO_SUPREMUM);
+	lock_rec_reset_and_release_wait_low(
+		lock_sys->rec_hash, left_block, PAGE_HEAP_NO_SUPREMUM);
+
+#ifdef UNIV_DEBUG
+	/* there should exist no page lock on the left page,
+	otherwise, it will be blocked from merge */
+	ulint	space = left_block->page.id.space();
+	ulint	page_no = left_block->page.id.page_no();
+	ut_ad(lock_rec_get_first_on_page_addr(
+			lock_sys->prdt_page_hash, space, page_no) == NULL);
+#endif /* UNIV_DEBUG */
 
 	lock_rec_free_all_from_discard_page(left_block);
 
 	lock_mutex_exit();
+
 }
 
 /*************************************************************//**
@@ -3527,7 +3685,6 @@ root page, even though they do not make sense on other than leaf
 pages: the reason is that in a pessimistic update the infimum record
 of the root page will act as a dummy carrier of the locks of the record
 to be updated. */
-UNIV_INTERN
 void
 lock_update_root_raise(
 /*===================*/
@@ -3547,7 +3704,6 @@ lock_update_root_raise(
 /*************************************************************//**
 Updates the lock table when a page is copied to another and the original page
 is removed from the chain of leaf pages, except if page is the root! */
-UNIV_INTERN
 void
 lock_update_copy_and_discard(
 /*=========================*/
@@ -3570,7 +3726,6 @@ lock_update_copy_and_discard(
 
 /*************************************************************//**
 Updates the lock table when a page is split to the left. */
-UNIV_INTERN
 void
 lock_update_split_left(
 /*===================*/
@@ -3592,7 +3747,6 @@ lock_update_split_left(
 
 /*************************************************************//**
 Updates the lock table when a page is merged to the left. */
-UNIV_INTERN
 void
 lock_update_merge_left(
 /*===================*/
@@ -3624,8 +3778,8 @@ lock_update_merge_left(
 		/* Reset the locks on the supremum of the left page,
 		releasing waiting transactions */
 
-		lock_rec_reset_and_release_wait(left_block,
-						PAGE_HEAP_NO_SUPREMUM);
+		lock_rec_reset_and_release_wait_low(
+			lock_sys->rec_hash, left_block, PAGE_HEAP_NO_SUPREMUM);
 	}
 
 	/* Move the locks from the supremum of right page to the supremum
@@ -3634,56 +3788,24 @@ lock_update_merge_left(
 	lock_rec_move(left_block, right_block,
 		      PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
 
+#ifdef UNIV_DEBUG
+	/* there should exist no page lock on the right page,
+	otherwise, it will be blocked from merge */
+	ulint	space = right_block->page.id.space();
+	ulint	page_no = right_block->page.id.page_no();
+	lock_t*	lock_test = lock_rec_get_first_on_page_addr(
+		lock_sys->prdt_page_hash, space, page_no);
+	ut_ad(!lock_test);
+#endif /* UNIV_DEBUG */
+
 	lock_rec_free_all_from_discard_page(right_block);
 
 	lock_mutex_exit();
 }
 
-/*************************************************************//**
-Updates the lock table when a page is split and merged to
-two pages. */
-UNIV_INTERN
-void
-lock_update_split_and_merge(
-	const buf_block_t* left_block,	/*!< in: left page to which merged */
-	const rec_t* orig_pred,		/*!< in: original predecessor of
-					supremum on the left page before merge*/
-	const buf_block_t* right_block)	/*!< in: right page from which merged */
-{
-	const rec_t* left_next_rec;
-
-	ut_a(left_block && right_block);
-	ut_a(orig_pred);
-
-	lock_mutex_enter();
-
-	left_next_rec = page_rec_get_next_const(orig_pred);
-
-	/* Inherit the locks on the supremum of the left page to the
-	first record which was moved from the right page */
-	lock_rec_inherit_to_gap(
-		left_block, left_block,
-		page_rec_get_heap_no(left_next_rec),
-		PAGE_HEAP_NO_SUPREMUM);
-
-	/* Reset the locks on the supremum of the left page,
-	releasing waiting transactions */
-	lock_rec_reset_and_release_wait(left_block,
-					PAGE_HEAP_NO_SUPREMUM);
-
-	/* Inherit the locks to the supremum of the left page from the
-	successor of the infimum on the right page */
-	lock_rec_inherit_to_gap(left_block, right_block,
-				PAGE_HEAP_NO_SUPREMUM,
-				lock_get_min_heap_no(right_block));
-
-	lock_mutex_exit();
-}
-
 /*************************************************************//**
 Resets the original locks on heir and replaces them with gap type locks
 inherited from rec. */
-UNIV_INTERN
 void
 lock_rec_reset_and_inherit_gap_locks(
 /*=================================*/
@@ -3709,7 +3831,6 @@ lock_rec_reset_and_inherit_gap_locks(
 
 /*************************************************************//**
 Updates the lock table when a page is discarded. */
-UNIV_INTERN
 void
 lock_update_discard(
 /*================*/
@@ -3720,13 +3841,14 @@ lock_update_discard(
 	const buf_block_t*	block)		/*!< in: index page
 						which will be discarded */
 {
-	const page_t*	page = block->frame;
 	const rec_t*	rec;
 	ulint		heap_no;
+	const page_t*	page = block->frame;
 
 	lock_mutex_enter();
 
-	if (!lock_rec_get_first_on_page(block)) {
+	if (!lock_rec_get_first_on_page(lock_sys->rec_hash, block)
+	    && (!lock_rec_get_first_on_page(lock_sys->prdt_hash, block))) {
 		/* No locks exist on page, nothing to do */
 
 		lock_mutex_exit();
@@ -3772,7 +3894,6 @@ lock_update_discard(
 
 /*************************************************************//**
 Updates the lock table when a new user record is inserted. */
-UNIV_INTERN
 void
 lock_update_insert(
 /*===============*/
@@ -3803,7 +3924,6 @@ lock_update_insert(
 
 /*************************************************************//**
 Updates the lock table when a record is removed. */
-UNIV_INTERN
 void
 lock_update_delete(
 /*===============*/
@@ -3848,7 +3968,6 @@ updated and the size of the record changes in the update. The record
 is moved in such an update, perhaps to another page. The infimum record
 acts as a dummy carrier record, taking care of lock releases while the
 actual record is being moved. */
-UNIV_INTERN
 void
 lock_rec_store_on_page_infimum(
 /*===========================*/
@@ -3873,7 +3992,6 @@ lock_rec_store_on_page_infimum(
 /*********************************************************************//**
 Restores the state of explicit lock requests on a single record, where the
 state was stored on the infimum of the page. */
-UNIV_INTERN
 void
 lock_rec_restore_from_page_infimum(
 /*===============================*/
@@ -3895,712 +4013,38 @@ lock_rec_restore_from_page_infimum(
 	lock_mutex_exit();
 }
 
-/*=========== DEADLOCK CHECKING ======================================*/
-
-/*********************************************************************//**
-rewind(3) the file used for storing the latest detected deadlock and
-print a heading message to stderr if printing of all deadlocks to stderr
-is enabled. */
-UNIV_INLINE
-void
-lock_deadlock_start_print()
-/*=======================*/
-{
-	ut_ad(lock_mutex_own());
-	ut_ad(!srv_read_only_mode);
-
-	rewind(lock_latest_err_file);
-	ut_print_timestamp(lock_latest_err_file);
-
-	if (srv_print_all_deadlocks) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr, "InnoDB: transactions deadlock detected, "
-			"dumping detailed information.\n");
-		ut_print_timestamp(stderr);
-	}
-}
-
-/*********************************************************************//**
-Print a message to the deadlock file and possibly to stderr. */
-UNIV_INLINE
-void
-lock_deadlock_fputs(
-/*================*/
-	const char*	msg)	/*!< in: message to print */
-{
-	if (!srv_read_only_mode) {
-		fputs(msg, lock_latest_err_file);
-
-		if (srv_print_all_deadlocks) {
-			fputs(msg, stderr);
-		}
-	}
-}
-
-/*********************************************************************//**
-Print transaction data to the deadlock file and possibly to stderr. */
-UNIV_INLINE
-void
-lock_deadlock_trx_print(
-/*====================*/
-	const trx_t*	trx,		/*!< in: transaction */
-	ulint		max_query_len)	/*!< in: max query length to print,
-					or 0 to use the default max length */
-{
-	ut_ad(lock_mutex_own());
-	ut_ad(!srv_read_only_mode);
-
-	ulint	n_rec_locks = lock_number_of_rows_locked(&trx->lock);
-	ulint	n_trx_locks = UT_LIST_GET_LEN(trx->lock.trx_locks);
-	ulint	heap_size = mem_heap_get_size(trx->lock.lock_heap);
-
-	mutex_enter(&trx_sys->mutex);
-
-	trx_print_low(lock_latest_err_file, trx, max_query_len,
-		      n_rec_locks, n_trx_locks, heap_size);
-
-	if (srv_print_all_deadlocks) {
-		trx_print_low(stderr, trx, max_query_len,
-			      n_rec_locks, n_trx_locks, heap_size);
-	}
-
-	mutex_exit(&trx_sys->mutex);
-}
-
-/*********************************************************************//**
-Print lock data to the deadlock file and possibly to stderr. */
-UNIV_INLINE
-void
-lock_deadlock_lock_print(
-/*=====================*/
-	const lock_t*	lock)	/*!< in: record or table type lock */
-{
-	ut_ad(lock_mutex_own());
-	ut_ad(!srv_read_only_mode);
-
-	if (lock_get_type_low(lock) == LOCK_REC) {
-		lock_rec_print(lock_latest_err_file, lock);
-
-		if (srv_print_all_deadlocks) {
-			lock_rec_print(stderr, lock);
-		}
-	} else {
-		lock_table_print(lock_latest_err_file, lock);
-
-		if (srv_print_all_deadlocks) {
-			lock_table_print(stderr, lock);
-		}
-	}
-}
-
-/** Used in deadlock tracking. Protected by lock_sys->mutex. */
-static ib_uint64_t	lock_mark_counter = 0;
-
-/** Check if the search is too deep. */
-#define lock_deadlock_too_deep(c)				\
-	(c->depth > LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK		\
-	 || c->cost > LOCK_MAX_N_STEPS_IN_DEADLOCK_CHECK)
-
-/********************************************************************//**
-Get the next lock in the queue that is owned by a transaction whose
-sub-tree has not already been searched.
-@return next lock or NULL if at end of queue */
-static
-const lock_t*
-lock_get_next_lock(
-/*===============*/
-	const lock_deadlock_ctx_t*
-				ctx,	/*!< in: deadlock context */
-	const lock_t*		lock,	/*!< in: lock in the queue */
-	ulint			heap_no)/*!< in: heap no if rec lock else
-					ULINT_UNDEFINED */
-{
-	ut_ad(lock_mutex_own());
-
-	do {
-		if (lock_get_type_low(lock) == LOCK_REC) {
-			ut_ad(heap_no != ULINT_UNDEFINED);
-			lock = lock_rec_get_next_const(heap_no, lock);
-		} else {
-			ut_ad(heap_no == ULINT_UNDEFINED);
-			ut_ad(lock_get_type_low(lock) == LOCK_TABLE);
-
-			lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock);
-		}
-	} while (lock != NULL
-		 && lock->trx->lock.deadlock_mark > ctx->mark_start);
-
-	ut_ad(lock == NULL
-	      || lock_get_type_low(lock) == lock_get_type_low(ctx->wait_lock));
-
-	return(lock);
-}
-
-/********************************************************************//**
-Get the first lock to search. The search starts from the current
-wait_lock. What we are really interested in is an edge from the
-current wait_lock's owning transaction to another transaction that has
-a lock ahead in the queue. We skip locks where the owning transaction's
-sub-tree has already been searched.
-@return first lock or NULL */
-static
-const lock_t*
-lock_get_first_lock(
-/*================*/
-	const lock_deadlock_ctx_t*
-				ctx,	/*!< in: deadlock context */
-	ulint*			heap_no)/*!< out: heap no if rec lock,
-					else ULINT_UNDEFINED */
-{
-	const lock_t*		lock;
-
-	ut_ad(lock_mutex_own());
-
-	lock = ctx->wait_lock;
-
-	if (lock_get_type_low(lock) == LOCK_REC) {
-
-		*heap_no = lock_rec_find_set_bit(lock);
-		ut_ad(*heap_no != ULINT_UNDEFINED);
-
-		lock = lock_rec_get_first_on_page_addr(
-			lock->un_member.rec_lock.space,
-			lock->un_member.rec_lock.page_no);
-
-		/* Position on the first lock on the physical record. */
-		if (!lock_rec_get_nth_bit(lock, *heap_no)) {
-			lock = lock_rec_get_next_const(*heap_no, lock);
-		}
-
-	} else {
-		*heap_no = ULINT_UNDEFINED;
-		ut_ad(lock_get_type_low(lock) == LOCK_TABLE);
-		dict_table_t*   table = lock->un_member.tab_lock.table;
-		lock = UT_LIST_GET_FIRST(table->locks);
-	}
-
-	ut_a(lock != NULL);
-	ut_a(lock != ctx->wait_lock);
-	ut_ad(lock_get_type_low(lock) == lock_get_type_low(ctx->wait_lock));
-
-	return(lock);
-}
-
-/********************************************************************//**
-Notify that a deadlock has been detected and print the conflicting
-transaction info. */
-static
-void
-lock_deadlock_notify(
-/*=================*/
-	const lock_deadlock_ctx_t*	ctx,	/*!< in: deadlock context */
-	const lock_t*			lock)	/*!< in: lock causing
-						deadlock */
-{
-	ut_ad(lock_mutex_own());
-	ut_ad(!srv_read_only_mode);
-
-	lock_deadlock_start_print();
-
-	lock_deadlock_fputs("\n*** (1) TRANSACTION:\n");
-
-	lock_deadlock_trx_print(ctx->wait_lock->trx, 3000);
-
-	lock_deadlock_fputs("*** (1) WAITING FOR THIS LOCK TO BE GRANTED:\n");
-
-	lock_deadlock_lock_print(ctx->wait_lock);
-
-	lock_deadlock_fputs("*** (2) TRANSACTION:\n");
-
-	lock_deadlock_trx_print(lock->trx, 3000);
-
-	lock_deadlock_fputs("*** (2) HOLDS THE LOCK(S):\n");
-
-	lock_deadlock_lock_print(lock);
-
-	/* It is possible that the joining transaction was granted its
-	lock when we rolled back some other waiting transaction. */
-
-	if (ctx->start->lock.wait_lock != 0) {
-		lock_deadlock_fputs(
-			"*** (2) WAITING FOR THIS LOCK TO BE GRANTED:\n");
-
-		lock_deadlock_lock_print(ctx->start->lock.wait_lock);
-	}
-
-#ifdef UNIV_DEBUG
-	if (lock_print_waits) {
-		fputs("Deadlock detected\n", stderr);
-	}
-#endif /* UNIV_DEBUG */
-}
-
-/********************************************************************//**
-Select the victim transaction that should be rolledback.
-@return victim transaction */
-static
-const trx_t*
-lock_deadlock_select_victim(
-/*========================*/
-	const lock_deadlock_ctx_t*	ctx)	/*!< in: deadlock context */
-{
-	ut_ad(lock_mutex_own());
-	ut_ad(ctx->start->lock.wait_lock != 0);
-	ut_ad(ctx->wait_lock->trx != ctx->start);
-
-	if (trx_weight_ge(ctx->wait_lock->trx, ctx->start)) {
-		/* The joining  transaction is 'smaller',
-		choose it as the victim and roll it back. */
-
-#ifdef WITH_WSREP
-		if (wsrep_thd_is_BF(ctx->start->mysql_thd, TRUE)) {
-			return(ctx->wait_lock->trx);
-		}
-		else
-#endif /* WITH_WSREP */
-			return(ctx->start);
-	}
-
-#ifdef WITH_WSREP
-	if (wsrep_thd_is_BF(ctx->wait_lock->trx->mysql_thd, TRUE)) {
-		return(ctx->start);
-	}
-	else
-#endif /* WITH_WSREP */
-		return(ctx->wait_lock->trx);
-}
-
-/********************************************************************//**
-Pop the deadlock search state from the stack.
-@return stack slot instance that was on top of the stack. */
-static
-const lock_stack_t*
-lock_deadlock_pop(
-/*==============*/
-	lock_deadlock_ctx_t*	ctx)		/*!< in/out: context */
-{
-	ut_ad(lock_mutex_own());
-
-	ut_ad(ctx->depth > 0);
-
-	return(&lock_stack[--ctx->depth]);
-}
-
-/********************************************************************//**
-Push the deadlock search state onto the stack.
-@return slot that was used in the stack */
-static
-lock_stack_t*
-lock_deadlock_push(
-/*===============*/
-	lock_deadlock_ctx_t*	ctx,		/*!< in/out: context */
-	const lock_t*		lock,		/*!< in: current lock */
-	ulint			heap_no)	/*!< in: heap number */
-{
-	ut_ad(lock_mutex_own());
-
-	/* Save current search state. */
-
-	if (LOCK_STACK_SIZE > ctx->depth) {
-		lock_stack_t*	stack;
-
-		stack = &lock_stack[ctx->depth++];
-
-		stack->lock = lock;
-		stack->heap_no = heap_no;
-		stack->wait_lock = ctx->wait_lock;
-
-		return(stack);
-	}
-
-	return(NULL);
-}
-
-/********************************************************************//**
-Looks iteratively for a deadlock. Note: the joining transaction may
-have been granted its lock by the deadlock checks.
-@return 0 if no deadlock else the victim transaction id.*/
-static
-trx_id_t
-lock_deadlock_search(
-/*=================*/
-	lock_deadlock_ctx_t*	ctx,	/*!< in/out: deadlock context */
-	struct thd_wait_reports*waitee_ptr) /*!< in/out: list of waitees */
-{
-	const lock_t*	lock;
-	ulint		heap_no;
-
-	ut_ad(lock_mutex_own());
-	ut_ad(!trx_mutex_own(ctx->start));
-
-	ut_ad(ctx->start != NULL);
-	ut_ad(ctx->wait_lock != NULL);
-	assert_trx_in_list(ctx->wait_lock->trx);
-	ut_ad(ctx->mark_start <= lock_mark_counter);
-
-	/* Look at the locks ahead of wait_lock in the lock queue. */
-	lock = lock_get_first_lock(ctx, &heap_no);
-
-	for (;;) {
-
-		/* We should never visit the same sub-tree more than once. */
-		ut_ad(lock == NULL
-		      || lock->trx->lock.deadlock_mark <= ctx->mark_start);
-
-		while (ctx->depth > 0 && lock == NULL) {
-			const lock_stack_t*	stack;
-
-			/* Restore previous search state. */
-
-			stack = lock_deadlock_pop(ctx);
-
-			lock = stack->lock;
-			heap_no = stack->heap_no;
-			ctx->wait_lock = stack->wait_lock;
-
-			lock = lock_get_next_lock(ctx, lock, heap_no);
-		}
-
-		if (lock == NULL) {
-			break;
-		} else if (lock == ctx->wait_lock) {
-
-			/* We can mark this subtree as searched */
-			ut_ad(lock->trx->lock.deadlock_mark <= ctx->mark_start);
-
-			lock->trx->lock.deadlock_mark = ++lock_mark_counter;
-
-			/* We are not prepared for an overflow. This 64-bit
-			counter should never wrap around. At 10^9 increments
-			per second, it would take 10^3 years of uptime. */
-
-			ut_ad(lock_mark_counter > 0);
-
-			lock = NULL;
-
-		} else if (!lock_has_to_wait(ctx->wait_lock, lock)) {
-
-			/* No conflict, next lock */
-			lock = lock_get_next_lock(ctx, lock, heap_no);
-
-		} else if (lock->trx == ctx->start) {
-
-			/* Found a cycle. */
-
-			lock_deadlock_notify(ctx, lock);
-
-			return(lock_deadlock_select_victim(ctx)->id);
-
-		} else if (lock_deadlock_too_deep(ctx)) {
-
-			/* Search too deep to continue. */
-
-			ctx->too_deep = TRUE;
-
-#ifdef WITH_WSREP
-			if (wsrep_thd_is_BF(ctx->start->mysql_thd, TRUE)) {
-				return(ctx->wait_lock->trx->id);
-			}
-			else
-#endif /* WITH_WSREP */
-			/* Select the joining transaction as the victim. */
-				return(ctx->start->id);
-
-		} else {
-			/* We do not need to report autoinc locks to the upper
-			layer. These locks are released before commit, so they
-			can not cause deadlocks with binlog-fixed commit
-			order. */
-			if (waitee_ptr &&
-			    (lock_get_type_low(lock) != LOCK_TABLE ||
-			     lock_get_mode(lock) != LOCK_AUTO_INC)) {
-				if (waitee_ptr->used ==
-				    sizeof(waitee_ptr->waitees) /
-				    sizeof(waitee_ptr->waitees[0])) {
-					waitee_ptr->next =
-						(struct thd_wait_reports *)
-						mem_alloc(sizeof(*waitee_ptr));
-					waitee_ptr = waitee_ptr->next;
-					if (!waitee_ptr) {
-						ctx->too_deep = TRUE;
-						return(ctx->start->id);
-					}
-					waitee_ptr->next = NULL;
-					waitee_ptr->used = 0;
-				}
-				waitee_ptr->waitees[waitee_ptr->used++] = lock->trx;
-			}
-
-			if (lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
-
-				/* Another trx ahead has requested a lock in an
-				incompatible mode, and is itself waiting for a lock. */
-
-				++ctx->cost;
-
-				/* Save current search state. */
-				if (!lock_deadlock_push(ctx, lock, heap_no)) {
-
-					/* Unable to save current search state, stack
-					size not big enough. */
-
-					ctx->too_deep = TRUE;
-#ifdef WITH_WSREP
-				if (wsrep_thd_is_BF(ctx->start->mysql_thd, TRUE))
-					return(lock->trx->id);
-				else
-#endif /* WITH_WSREP */
-
-					return(ctx->start->id);
-				}
-
-				ctx->wait_lock = lock->trx->lock.wait_lock;
-				lock = lock_get_first_lock(ctx, &heap_no);
-
-				if (lock->trx->lock.deadlock_mark > ctx->mark_start) {
-					lock = lock_get_next_lock(ctx, lock, heap_no);
-				}
-
-			} else {
-				lock = lock_get_next_lock(ctx, lock, heap_no);
-			}
-		}
-	}
-
-	ut_a(lock == NULL && ctx->depth == 0);
-
-	/* No deadlock found. */
-	return(0);
-}
-
-/********************************************************************//**
-Print info about transaction that was rolled back. */
-static
-void
-lock_deadlock_joining_trx_print(
-/*============================*/
-	const trx_t*	trx,		/*!< in: transaction rolled back */
-	const lock_t*	lock)		/*!< in: lock trx wants */
-{
-	ut_ad(lock_mutex_own());
-	ut_ad(!srv_read_only_mode);
-
-	/* If the lock search exceeds the max step
-	or the max depth, the current trx will be
-	the victim. Print its information. */
-	lock_deadlock_start_print();
-
-	lock_deadlock_fputs(
-		"TOO DEEP OR LONG SEARCH IN THE LOCK TABLE"
-		" WAITS-FOR GRAPH, WE WILL ROLL BACK"
-		" FOLLOWING TRANSACTION \n\n"
-		"*** TRANSACTION:\n");
-
-	lock_deadlock_trx_print(trx, 3000);
-
-	lock_deadlock_fputs("*** WAITING FOR THIS LOCK TO BE GRANTED:\n");
-
-	lock_deadlock_lock_print(lock);
-}
-
-/********************************************************************//**
-Rollback transaction selected as the victim. */
-static
-void
-lock_deadlock_trx_rollback(
-/*=======================*/
-	lock_deadlock_ctx_t*	ctx)		/*!< in: deadlock context */
-{
-	trx_t*			trx;
-
-	ut_ad(lock_mutex_own());
-
-	trx = ctx->wait_lock->trx;
-
-	lock_deadlock_fputs("*** WE ROLL BACK TRANSACTION (1)\n");
-
-	trx_mutex_enter(trx);
-
-	trx->lock.was_chosen_as_deadlock_victim = TRUE;
-
-	lock_cancel_waiting_and_release(trx->lock.wait_lock);
-
-	trx_mutex_exit(trx);
-}
-
-static
-void
-lock_report_waiters_to_mysql(
-/*=======================*/
-	struct thd_wait_reports*	waitee_buf_ptr,	/*!< in: set of trxs */
-	THD*				mysql_thd,	/*!< in: THD */
-	trx_id_t			victim_trx_id)	/*!< in: Trx selected
-							as deadlock victim, if
-							any */
-{
-	struct thd_wait_reports*	p;
-	struct thd_wait_reports*	q;
-	ulint				i;
-
-	p = waitee_buf_ptr;
-	while (p) {
-		i = 0;
-		while (i < p->used) {
-			trx_t *w_trx = p->waitees[i];
-			/*  There is no need to report waits to a trx already
-			selected as a victim. */
-			if (w_trx->id != victim_trx_id) {
-				/* If thd_report_wait_for() decides to kill the
-				transaction, then we will get a call back into
-				innobase_kill_query. We mark this by setting
-				current_lock_mutex_owner, so we can avoid trying
-				to recursively take lock_sys->mutex. */
-				w_trx->abort_type = TRX_REPLICATION_ABORT;
-				thd_report_wait_for(mysql_thd, w_trx->mysql_thd);
-				w_trx->abort_type = TRX_SERVER_ABORT;
-			}
-			++i;
-		}
-		q = p->next;
-		if (p != waitee_buf_ptr) {
-			mem_free(p);
-		}
-		p = q;
-	}
-}
-
-
-/********************************************************************//**
-Checks if a joining lock request results in a deadlock. If a deadlock is
-found this function will resolve the dadlock by choosing a victim transaction
-and rolling it back. It will attempt to resolve all deadlocks. The returned
-transaction id will be the joining transaction id or 0 if some other
-transaction was chosen as a victim and rolled back or no deadlock found.
-
-@return id of transaction chosen as victim or 0 */
-static
-trx_id_t
-lock_deadlock_check_and_resolve(
-/*============================*/
-	const lock_t*	lock,	/*!< in: lock the transaction is requesting */
-	const trx_t*	trx)	/*!< in: transaction */
-{
-	trx_id_t		victim_trx_id;
-	struct thd_wait_reports	waitee_buf;
-	struct thd_wait_reports*waitee_buf_ptr;
-	THD*			start_mysql_thd;
-
-	ut_ad(trx != NULL);
-	ut_ad(lock != NULL);
-	ut_ad(lock_mutex_own());
-	assert_trx_in_list(trx);
-
-	start_mysql_thd = trx->mysql_thd;
-	if (start_mysql_thd && thd_need_wait_for(start_mysql_thd)) {
-		waitee_buf_ptr = &waitee_buf;
-	} else {
-		waitee_buf_ptr = NULL;
-	}
-
-	/* Try and resolve as many deadlocks as possible. */
-	do {
-		lock_deadlock_ctx_t	ctx;
-
-		/* Reset the context. */
-		ctx.cost = 0;
-		ctx.depth = 0;
-		ctx.start = trx;
-		ctx.too_deep = FALSE;
-		ctx.wait_lock = lock;
-		ctx.mark_start = lock_mark_counter;
-
-		if (waitee_buf_ptr) {
-			waitee_buf_ptr->next = NULL;
-			waitee_buf_ptr->used = 0;
-		}
-
-		victim_trx_id = lock_deadlock_search(&ctx, waitee_buf_ptr);
-
-		/* Report waits to upper layer, as needed. */
-		if (waitee_buf_ptr) {
-			lock_report_waiters_to_mysql(waitee_buf_ptr,
-						     start_mysql_thd,
-						     victim_trx_id);
-		}
-
-		/* Search too deep, we rollback the joining transaction. */
-		if (ctx.too_deep) {
-
-			ut_a(trx == ctx.start);
-			ut_a(victim_trx_id == trx->id);
-
-#ifdef WITH_WSREP
-			if (!wsrep_thd_is_BF(ctx.start->mysql_thd, TRUE))
-			{
-#endif /* WITH_WSREP */
-				if (!srv_read_only_mode) {
-					lock_deadlock_joining_trx_print(trx, lock);
-				}
-#ifdef WITH_WSREP
-			} else {
-			  /* BF processor */;
-			}
-#endif /* WITH_WSREP */
-
-			MONITOR_INC(MONITOR_DEADLOCK);
-
-		} else if (victim_trx_id != 0 && victim_trx_id != trx->id) {
-
-			ut_ad(victim_trx_id == ctx.wait_lock->trx->id);
-			lock_deadlock_trx_rollback(&ctx);
-
-			lock_deadlock_found = TRUE;
-
-			MONITOR_INC(MONITOR_DEADLOCK);
-		}
-
-	} while (victim_trx_id != 0 && victim_trx_id != trx->id);
-
-	/* If the joining transaction was selected as the victim. */
-	if (victim_trx_id != 0) {
-		ut_a(victim_trx_id == trx->id);
-
-		lock_deadlock_fputs("*** WE ROLL BACK TRANSACTION (2)\n");
-
-		lock_deadlock_found = TRUE;
-	}
-
-	return(victim_trx_id);
-}
-
 /*========================= TABLE LOCKS ==============================*/
 
+/** Functor for accessing the embedded node within a table lock. */
+struct TableLockGetNode {
+	ut_list_node<lock_t>& operator() (lock_t& elem)
+	{
+		return(elem.un_member.tab_lock.locks);
+	}
+};
+
 /*********************************************************************//**
 Creates a table lock object and adds it as the last in the lock queue
 of the table. Does NOT check for deadlocks or lock compatibility.
-@return	own: new lock object */
+@return own: new lock object */
 UNIV_INLINE
 lock_t*
 lock_table_create(
 /*==============*/
-#ifdef WITH_WSREP
-	lock_t*		c_lock, /*!< in: conflicting lock */
-#endif
+	lock_t*		c_lock,	/*!< in: conflicting lock or NULL */
 	dict_table_t*	table,	/*!< in/out: database table
 				in dictionary cache */
 	ulint		type_mode,/*!< in: lock mode possibly ORed with
 				LOCK_WAIT */
 	trx_t*		trx)	/*!< in: trx */
 {
-	lock_t*	lock;
+	lock_t*		lock;
 
 	ut_ad(table && trx);
 	ut_ad(lock_mutex_own());
 	ut_ad(trx_mutex_own(trx));
 
-	/* Non-locking autocommit read-only transactions should not set
-	any locks. */
-	assert_trx_in_list(trx);
+	check_trx_state(trx);
 
 	if ((type_mode & LOCK_MODE_MASK) == LOCK_AUTO_INC) {
 		++table->n_waiting_or_granted_auto_inc_locks;
@@ -4616,87 +4060,103 @@ lock_table_create(
 		table->autoinc_trx = trx;
 
 		ib_vector_push(trx->autoinc_locks, &lock);
+
+	} else if (trx->lock.table_cached < trx->lock.table_pool.size()) {
+		lock = trx->lock.table_pool[trx->lock.table_cached++];
 	} else {
+
 		lock = static_cast<lock_t*>(
 			mem_heap_alloc(trx->lock.lock_heap, sizeof(*lock)));
+
 	}
 
-	lock->type_mode = type_mode | LOCK_TABLE;
+	lock->type_mode = ib_uint32_t(type_mode | LOCK_TABLE);
 	lock->trx = trx;
-	lock->requested_time = ut_time();
-	lock->wait_time = 0;
 
 	lock->un_member.tab_lock.table = table;
 
 	ut_ad(table->n_ref_count > 0 || !table->can_be_evicted);
 
-	UT_LIST_ADD_LAST(trx_locks, trx->lock.trx_locks, lock);
+	UT_LIST_ADD_LAST(trx->lock.trx_locks, lock);
 
 #ifdef WITH_WSREP
-	if (wsrep_thd_is_wsrep(trx->mysql_thd)) {
-		if (c_lock && wsrep_thd_is_BF(trx->mysql_thd, FALSE)) {
-			UT_LIST_INSERT_AFTER(
-				un_member.tab_lock.locks, table->locks, c_lock, lock);
-		} else {
-			UT_LIST_ADD_LAST(un_member.tab_lock.locks, table->locks, lock);
-		}
-
-		if (c_lock) {
-			trx_mutex_enter(c_lock->trx);
-		}
-
-		if (c_lock && c_lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
-
-			c_lock->trx->lock.was_chosen_as_deadlock_victim = TRUE;
-
-			if (wsrep_debug) {
-				wsrep_print_wait_locks(c_lock);
-				wsrep_print_wait_locks(c_lock->trx->lock.wait_lock);
-			}
-
-			/* have to release trx mutex for the duration of
-			victim lock release. This will eventually call
-			lock_grant, which wants to grant trx mutex again
-			*/
-			/* caller has trx_mutex, have to release for lock cancel */
-			trx_mutex_exit(trx);
-			lock_cancel_waiting_and_release(c_lock->trx->lock.wait_lock);
-			trx_mutex_enter(trx);
-
-			/* trx might not wait for c_lock, but some other lock
-			does not matter if wait_lock was released above
-			*/
-			if (c_lock->trx->lock.wait_lock == c_lock) {
-				lock_reset_lock_and_trx_wait(lock);
-			}
-
-			if (wsrep_debug) {
-				fprintf(stderr, "WSREP: c_lock canceled %llu\n",
-					(ulonglong) c_lock->trx->id);
-			}
-		}
-		if (c_lock) {
-			trx_mutex_exit(c_lock->trx);
+	if (c_lock && wsrep_thd_is_BF(trx->mysql_thd, FALSE)) {
+		ut_list_insert(table->locks, c_lock, lock, TableLockGetNode());
+		if (wsrep_debug) {
+			ib::info() << "table lock BF conflict for " <<
+				c_lock->trx->id;
+			ib::info() << " SQL: "
+				   << wsrep_thd_query(c_lock->trx->mysql_thd);
 		}
 	} else {
-#endif /* WITH_WSREP */
-	UT_LIST_ADD_LAST(un_member.tab_lock.locks, table->locks, lock);
-#ifdef WITH_WSREP
+		ut_list_append(table->locks, lock, TableLockGetNode());
 	}
+	if (c_lock) {
+		ut_ad(!trx_mutex_own(c_lock->trx));
+		trx_mutex_enter(c_lock->trx);
+	}
+
+	if (c_lock && c_lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
+		c_lock->trx->lock.was_chosen_as_deadlock_victim = TRUE;
+
+		if (wsrep_debug) {
+			wsrep_print_wait_locks(c_lock);
+		}
+
+		/* have to release trx mutex for the duration of
+		   victim lock release. This will eventually call
+		   lock_grant, which wants to grant trx mutex again
+		*/
+		/* caller has trx_mutex, have to release for lock cancel */
+		trx_mutex_exit(trx);
+		lock_cancel_waiting_and_release(c_lock->trx->lock.wait_lock);
+		trx_mutex_enter(trx);
+
+		/* trx might not wait for c_lock, but some other lock
+		does not matter if wait_lock was released above
+		*/
+		if (c_lock->trx->lock.wait_lock == c_lock) {
+			lock_reset_lock_and_trx_wait(lock);
+		}
+
+		if (wsrep_debug) {
+			ib::info() << "WSREP: c_lock canceled " << c_lock->trx->id;
+			ib::info() << " SQL: "
+					   << wsrep_thd_query(c_lock->trx->mysql_thd);
+		}
+	}
+
+	if (c_lock) {
+		trx_mutex_exit(c_lock->trx);
+	}
+#else
+	ut_list_append(table->locks, lock, TableLockGetNode());
 #endif /* WITH_WSREP */
 
-	if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) {
+	if (type_mode & LOCK_WAIT) {
 
 		lock_set_lock_and_trx_wait(lock, trx);
 	}
 
-	ib_vector_push(lock->trx->lock.table_locks, &lock);
+	lock->trx->lock.table_locks.push_back(lock);
 
 	MONITOR_INC(MONITOR_TABLELOCK_CREATED);
 	MONITOR_INC(MONITOR_NUM_TABLELOCK);
 
 	return(lock);
 }
+UNIV_INLINE
+lock_t*
+lock_table_create(
+/*==============*/
+	dict_table_t*	table,	/*!< in/out: database table
+				in dictionary cache */
+	ulint		type_mode,/*!< in: lock mode possibly ORed with
+				LOCK_WAIT */
+	trx_t*		trx)	/*!< in: trx */
+{
+	return (lock_table_create(NULL, table, type_mode, trx));
+}
 
 /*************************************************************//**
 Pops autoinc lock requests from the transaction's autoinc_locks. We
@@ -4763,7 +4223,7 @@ lock_table_remove_autoinc_lock(
 			autoinc_lock = *static_cast<lock_t**>(
 				ib_vector_get(trx->autoinc_locks, i));
 
-			if (UNIV_LIKELY(autoinc_lock == lock)) {
+			if (autoinc_lock == lock) {
 				void*	null_var = NULL;
 				ib_vector_set(trx->autoinc_locks, i, &null_var);
 				return;
@@ -4822,8 +4282,8 @@ lock_table_remove_low(
 		table->n_waiting_or_granted_auto_inc_locks--;
 	}
 
-	UT_LIST_REMOVE(trx_locks, trx->lock.trx_locks, lock);
-	UT_LIST_REMOVE(un_member.tab_lock.locks, table->locks, lock);
+	UT_LIST_REMOVE(trx->lock.trx_locks, lock);
+	ut_list_remove(table->locks, lock, TableLockGetNode());
 
 	MONITOR_INC(MONITOR_TABLELOCK_REMOVED);
 	MONITOR_DEC(MONITOR_NUM_TABLELOCK);
@@ -4840,9 +4300,7 @@ static
 dberr_t
 lock_table_enqueue_waiting(
 /*=======================*/
-#ifdef WITH_WSREP
-	lock_t*		c_lock, /*!< in: conflicting lock */
-#endif
+	lock_t*		c_lock,	/*!< in: conflicting lock or NULL */
 	ulint		mode,	/*!< in: lock mode this transaction is
 				requesting */
 	dict_table_t*	table,	/*!< in/out: table */
@@ -4850,7 +4308,6 @@ lock_table_enqueue_waiting(
 {
 	trx_t*		trx;
 	lock_t*		lock;
-	trx_id_t	victim_trx_id;
 
 	ut_ad(lock_mutex_own());
 	ut_ad(!srv_read_only_mode);
@@ -4873,31 +4330,24 @@ lock_table_enqueue_waiting(
 		break;
 	case TRX_DICT_OP_TABLE:
 	case TRX_DICT_OP_INDEX:
-		ut_print_timestamp(stderr);
-		fputs("  InnoDB: Error: a table lock wait happens"
-		      " in a dictionary operation!\n"
-		      "InnoDB: Table name ", stderr);
-		ut_print_name(stderr, trx, TRUE, table->name);
-		fputs(".\n"
-		      "InnoDB: Submit a detailed bug report"
-		      " to http://bugs.mysql.com\n",
-		      stderr);
+		ib::error() << "A table lock wait happens in a dictionary"
+			" operation. Table " << table->name
+			<< ". " << BUG_REPORT_MSG;
 		ut_ad(0);
 	}
 
-	/* Enqueue the lock request that will wait to be granted */
-
 #ifdef WITH_WSREP
 	if (trx->lock.was_chosen_as_deadlock_victim) {
 		return(DB_DEADLOCK);
 	}
-	lock = lock_table_create(c_lock, table, mode | LOCK_WAIT, trx);
-#else
- 	lock = lock_table_create(table, mode | LOCK_WAIT, trx);
 #endif /* WITH_WSREP */
 
+	/* Enqueue the lock request that will wait to be granted */
+	lock = lock_table_create(c_lock, table, mode | LOCK_WAIT, trx);
+
+	bool	async_rollback = trx->in_innodb & TRX_FORCE_ROLLBACK_ASYNC;
 	/* Release the mutex to obey the latching order.
-	This is safe, because lock_deadlock_check_and_resolve()
+	This is safe, because DeadlockChecker::check_and_resolve()
 	is invoked when a lock wait is enqueued for the currently
 	running transaction. Because trx is a running transaction
 	(it is not currently suspended because of a lock wait),
@@ -4906,12 +4356,17 @@ lock_table_enqueue_waiting(
 
 	trx_mutex_exit(trx);
 
-	victim_trx_id = lock_deadlock_check_and_resolve(lock, trx);
+	/* If transaction is marked for ASYNC rollback then we should
+	not allow it to wait for another lock causing possible deadlock.
+	We return current transaction as deadlock victim here. */
+
+	const trx_t*	victim_trx = async_rollback ? trx
+			: DeadlockChecker::check_and_resolve(lock, trx);
 
 	trx_mutex_enter(trx);
 
-	if (victim_trx_id != 0) {
-		ut_ad(victim_trx_id == trx->id);
+	if (victim_trx != 0) {
+		ut_ad(victim_trx == trx);
 
 		/* The order here is important, we don't want to
 		lose the state of the lock before calling remove. */
@@ -4919,6 +4374,7 @@ lock_table_enqueue_waiting(
 		lock_reset_lock_and_trx_wait(lock);
 
 		return(DB_DEADLOCK);
+
 	} else if (trx->lock.wait_lock == NULL) {
 		/* Deadlock resolution chose another transaction as a victim,
 		and we accidentally got our lock granted! */
@@ -4929,8 +4385,7 @@ lock_table_enqueue_waiting(
 	trx->lock.que_state = TRX_QUE_LOCK_WAIT;
 
 	trx->lock.wait_started = ut_time();
-	trx->lock.was_chosen_as_deadlock_victim = FALSE;
-	trx->n_table_lock_waits++;
+	trx->lock.was_chosen_as_deadlock_victim = false;
 
 	ut_a(que_thr_stop(thr));
 
@@ -4942,7 +4397,7 @@ lock_table_enqueue_waiting(
 /*********************************************************************//**
 Checks if other transactions have an incompatible mode lock request in
 the lock queue.
-@return	lock or NULL */
+@return lock or NULL */
 UNIV_INLINE
 const lock_t*
 lock_table_other_has_incompatible(
@@ -4953,7 +4408,7 @@ lock_table_other_has_incompatible(
 					waiting locks are taken into
 					account, or 0 if not */
 	const dict_table_t*	table,	/*!< in: table */
-	enum lock_mode		mode)	/*!< in: lock mode */
+	lock_mode		mode)	/*!< in: lock mode */
 {
 	const lock_t*	lock;
 
@@ -4968,16 +4423,18 @@ lock_table_other_has_incompatible(
 		    && (wait || !lock_get_wait(lock))) {
 
 #ifdef WITH_WSREP
-			if(wsrep_thd_is_wsrep(trx->mysql_thd)) {
+			if (wsrep_on(lock->trx->mysql_thd)) {
 				if (wsrep_debug) {
-					fprintf(stderr, "WSREP: trx " TRX_ID_FMT " table lock abort\n",
-						trx->id);
+					ib::info() << "WSREP: table lock abort for table:"
+						   << table->name.m_name;
+					ib::info() << " SQL: "
+					   << wsrep_thd_query(lock->trx->mysql_thd);
 				}
 				trx_mutex_enter(lock->trx);
 				wsrep_kill_victim((trx_t *)trx, (lock_t *)lock);
 				trx_mutex_exit(lock->trx);
 			}
-#endif
+#endif /* WITH_WSREP */
 
 			return(lock);
 		}
@@ -4989,8 +4446,7 @@ lock_table_other_has_incompatible(
 /*********************************************************************//**
 Locks the specified database table in the mode given. If the lock cannot
 be granted immediately, the query thread is put to wait.
-@return	DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
 dberr_t
 lock_table(
 /*=======*/
@@ -4998,20 +4454,20 @@ lock_table(
 				does nothing */
 	dict_table_t*	table,	/*!< in/out: database table
 				in dictionary cache */
-	enum lock_mode	mode,	/*!< in: lock mode */
+	lock_mode	mode,	/*!< in: lock mode */
 	que_thr_t*	thr)	/*!< in: query thread */
 {
-#ifdef WITH_WSREP
-	lock_t *c_lock = NULL;
-#endif
 	trx_t*		trx;
 	dberr_t		err;
 	const lock_t*	wait_for;
 
-	ut_ad(table != NULL);
-	ut_ad(thr != NULL);
+	ut_ad(table && thr);
 
-	if (flags & BTR_NO_LOCKING_FLAG) {
+	/* Given limited visibility of temp-table we can avoid
+	locking overhead */
+	if ((flags & BTR_NO_LOCKING_FLAG)
+	    || srv_read_only_mode
+	    || dict_table_is_temporary(table)) {
 
 		return(DB_SUCCESS);
 	}
@@ -5030,6 +4486,18 @@ lock_table(
 		return(DB_SUCCESS);
 	}
 
+	/* Read only transactions can write to temp tables, we don't want
+	to promote them to RW transactions. Their updates cannot be visible
+	to other transactions. Therefore we can keep them out
+	of the read views. */
+
+	if ((mode == LOCK_IX || mode == LOCK_X)
+	    && !trx->read_only
+	    && trx->rsegs.m_redo.rseg == 0) {
+
+		trx_set_rw_mode(trx);
+	}
+
 	lock_mutex_enter();
 
 	DBUG_EXECUTE_IF("fatal-semaphore-timeout",
@@ -5038,13 +4506,8 @@ lock_table(
 	/* We have to check if the new lock is compatible with any locks
 	other transactions have in the table lock queue. */
 
-#ifdef WITH_WSREP
-	wait_for = lock_table_other_has_incompatible(
-                trx, LOCK_WAIT, table, mode);
-#else
 	wait_for = lock_table_other_has_incompatible(
 		trx, LOCK_WAIT, table, mode);
-#endif
 
 	trx_mutex_enter(trx);
 
@@ -5052,17 +4515,9 @@ lock_table(
 	mode: this trx may have to wait */
 
 	if (wait_for != NULL) {
-#ifdef WITH_WSREP
-                err = lock_table_enqueue_waiting((ib_lock_t*)wait_for, mode | flags, table, thr);
-#else
-		err = lock_table_enqueue_waiting(mode | flags, table, thr);
-#endif
+		err = lock_table_enqueue_waiting((lock_t*)wait_for, mode | flags, table, thr);
 	} else {
-#ifdef WITH_WSREP
-	        lock_table_create(c_lock, table, mode | flags, trx);
-#else
 		lock_table_create(table, mode | flags, trx);
-#endif
 
 		ut_a(!flags || mode == LOCK_S || mode == LOCK_X);
 
@@ -5078,7 +4533,6 @@ lock_table(
 
 /*********************************************************************//**
 Creates a table IX lock object for a resurrected transaction. */
-UNIV_INTERN
 void
 lock_table_ix_resurrect(
 /*====================*/
@@ -5100,20 +4554,16 @@ lock_table_ix_resurrect(
 		      trx, LOCK_WAIT, table, LOCK_IX));
 
 	trx_mutex_enter(trx);
-#ifdef WITH_WSREP
-	lock_table_create(NULL, table, LOCK_IX, trx);
-#else
 	lock_table_create(table, LOCK_IX, trx);
-#endif
 	lock_mutex_exit();
 	trx_mutex_exit(trx);
 }
 
 /*********************************************************************//**
 Checks if a waiting table lock request still has to wait in a queue.
-@return	TRUE if still has to wait */
+@return TRUE if still has to wait */
 static
-ibool
+bool
 lock_table_has_to_wait_in_queue(
 /*============================*/
 	const lock_t*	wait_lock)	/*!< in: waiting table lock */
@@ -5132,11 +4582,11 @@ lock_table_has_to_wait_in_queue(
 
 		if (lock_has_to_wait(wait_lock, lock)) {
 
-			return(TRUE);
+			return(true);
 		}
 	}
 
-	return(FALSE);
+	return(false);
 }
 
 /*************************************************************//**
@@ -5151,12 +4601,10 @@ lock_table_dequeue(
 			behind will get their lock requests granted, if
 			they are now qualified to it */
 {
-	lock_t*	lock;
-
 	ut_ad(lock_mutex_own());
 	ut_a(lock_get_type_low(in_lock) == LOCK_TABLE);
 
-	lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, in_lock);
+	lock_t*	lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, in_lock);
 
 	lock_table_remove_low(in_lock);
 
@@ -5177,13 +4625,90 @@ lock_table_dequeue(
 	}
 }
 
+/** Sets a lock on a table based on the given mode.
+@param[in]	table	table to lock
+@param[in,out]	trx	transaction
+@param[in]	mode	LOCK_X or LOCK_S
+@return error code or DB_SUCCESS. */
+dberr_t
+lock_table_for_trx(
+	dict_table_t*	table,
+	trx_t*		trx,
+	enum lock_mode	mode)
+{
+	mem_heap_t*	heap;
+	que_thr_t*	thr;
+	dberr_t		err;
+	sel_node_t*	node;
+	heap = mem_heap_create(512);
+
+	node = sel_node_create(heap);
+	thr = pars_complete_graph_for_exec(node, trx, heap, NULL);
+	thr->graph->state = QUE_FORK_ACTIVE;
+
+	/* We use the select query graph as the dummy graph needed
+	in the lock module call */
+
+	thr = static_cast<que_thr_t*>(
+		que_fork_get_first_thr(
+			static_cast<que_fork_t*>(que_node_get_parent(thr))));
+
+	que_thr_move_to_run_state_for_mysql(thr, trx);
+
+run_again:
+	thr->run_node = thr;
+	thr->prev_node = thr->common.parent;
+
+	err = lock_table(0, table, mode, thr);
+
+	trx->error_state = err;
+
+	if (UNIV_LIKELY(err == DB_SUCCESS)) {
+		que_thr_stop_for_mysql_no_error(thr, trx);
+	} else {
+		que_thr_stop_for_mysql(thr);
+
+		if (err != DB_QUE_THR_SUSPENDED) {
+			bool	was_lock_wait;
+
+			was_lock_wait = row_mysql_handle_errors(
+				&err, trx, thr, NULL);
+
+			if (was_lock_wait) {
+				goto run_again;
+			}
+		} else {
+			que_thr_t*	run_thr;
+			que_node_t*	parent;
+
+			parent = que_node_get_parent(thr);
+
+			run_thr = que_fork_start_command(
+				static_cast<que_fork_t*>(parent));
+
+			ut_a(run_thr == thr);
+
+			/* There was a lock wait but the thread was not
+			in a ready to run or running state. */
+			trx->error_state = DB_LOCK_WAIT;
+
+			goto run_again;
+
+		}
+	}
+
+	que_graph_free(thr->graph);
+	trx->op_info = "";
+
+	return(err);
+}
+
 /*=========================== LOCK RELEASE ==============================*/
 
 /*************************************************************//**
 Removes a granted record lock of a transaction from the queue and grants
 locks to other transactions waiting in the queue if they now are entitled
 to a lock. */
-UNIV_INTERN
 void
 lock_rec_unlock(
 /*============*/
@@ -5191,7 +4716,7 @@ lock_rec_unlock(
 					set a record lock */
 	const buf_block_t*	block,	/*!< in: buffer block containing rec */
 	const rec_t*		rec,	/*!< in: record */
-	enum lock_mode		lock_mode)/*!< in: LOCK_S or LOCK_X */
+	lock_mode		lock_mode)/*!< in: LOCK_S or LOCK_X */
 {
 	lock_t*		first_lock;
 	lock_t*		lock;
@@ -5210,7 +4735,7 @@ lock_rec_unlock(
 	lock_mutex_enter();
 	trx_mutex_enter(trx);
 
-	first_lock = lock_rec_get_first(block, heap_no);
+	first_lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no);
 
 	/* Find the last lock with the same lock_mode and transaction
 	on the record. */
@@ -5225,15 +4750,14 @@ lock_rec_unlock(
 	lock_mutex_exit();
 	trx_mutex_exit(trx);
 
-	stmt = innobase_get_stmt(trx->mysql_thd, &stmt_len);
-	ut_print_timestamp(stderr);
-	fprintf(stderr,
-		" InnoDB: Error: unlock row could not"
-		" find a %lu mode lock on the record\n",
-		(ulong) lock_mode);
-	ut_print_timestamp(stderr);
-	fprintf(stderr, " InnoDB: current statement: %.*s\n",
-		(int) stmt_len, stmt);
+	stmt = innobase_get_stmt_unsafe(trx->mysql_thd, &stmt_len);
+
+	{
+		ib::error	err;
+		err << "Unlock row could not find a " << lock_mode
+			<< " mode lock on the record. Current statement: ";
+		err.write(stmt, stmt_len);
+	}
 
 	return;
 
@@ -5258,6 +4782,47 @@ released:
 	trx_mutex_exit(trx);
 }
 
+#ifdef UNIV_DEBUG
+/*********************************************************************//**
+Check if a transaction that has X or IX locks has set the dict_op
+code correctly. */
+static
+void
+lock_check_dict_lock(
+/*==================*/
+	const lock_t*	lock)	/*!< in: lock to check */
+{
+	if (lock_get_type_low(lock) == LOCK_REC) {
+
+		/* Check if the transcation locked a record
+		in a system table in X mode. It should have set
+		the dict_op code correctly if it did. */
+		if (lock->index->table->id < DICT_HDR_FIRST_ID
+		    && lock_get_mode(lock) == LOCK_X) {
+
+			ut_ad(lock_get_mode(lock) != LOCK_IX);
+			ut_ad(lock->trx->dict_operation != TRX_DICT_OP_NONE);
+		}
+	} else {
+		ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
+
+		const dict_table_t*	table;
+
+		table = lock->un_member.tab_lock.table;
+
+		/* Check if the transcation locked a system table
+		in IX mode. It should have set the dict_op code
+		correctly if it did. */
+		if (table->id < DICT_HDR_FIRST_ID
+		    && (lock_get_mode(lock) == LOCK_X
+			|| lock_get_mode(lock) == LOCK_IX)) {
+
+			ut_ad(lock->trx->dict_operation != TRX_DICT_OP_NONE);
+		}
+	}
+}
+#endif /* UNIV_DEBUG */
+
 /*********************************************************************//**
 Releases transaction locks, and releases possible other transactions waiting
 because of these locks. */
@@ -5269,49 +4834,25 @@ lock_release(
 {
 	lock_t*		lock;
 	ulint		count = 0;
-	trx_id_t	max_trx_id;
+	trx_id_t	max_trx_id = trx_sys_get_max_trx_id();
 
 	ut_ad(lock_mutex_own());
 	ut_ad(!trx_mutex_own(trx));
-
-	max_trx_id = trx_sys_get_max_trx_id();
+	ut_ad(!trx->is_dd_trx);
 
 	for (lock = UT_LIST_GET_LAST(trx->lock.trx_locks);
 	     lock != NULL;
 	     lock = UT_LIST_GET_LAST(trx->lock.trx_locks)) {
 
+		ut_d(lock_check_dict_lock(lock));
+
 		if (lock_get_type_low(lock) == LOCK_REC) {
 
-#ifdef UNIV_DEBUG
-			/* Check if the transcation locked a record
-			in a system table in X mode. It should have set
-			the dict_op code correctly if it did. */
-			if (lock->index->table->id < DICT_HDR_FIRST_ID
-			    && lock_get_mode(lock) == LOCK_X) {
-
-				ut_ad(lock_get_mode(lock) != LOCK_IX);
-				ut_ad(trx->dict_operation != TRX_DICT_OP_NONE);
-			}
-#endif /* UNIV_DEBUG */
-
 			lock_rec_dequeue_from_page(lock);
 		} else {
 			dict_table_t*	table;
 
 			table = lock->un_member.tab_lock.table;
-#ifdef UNIV_DEBUG
-			ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
-
-			/* Check if the transcation locked a system table
-			in IX mode. It should have set the dict_op code
-			correctly if it did. */
-			if (table->id < DICT_HDR_FIRST_ID
-			    && (lock_get_mode(lock) == LOCK_X
-				|| lock_get_mode(lock) == LOCK_IX)) {
-
-				ut_ad(trx->dict_operation != TRX_DICT_OP_NONE);
-			}
-#endif /* UNIV_DEBUG */
 
 			if (lock_get_mode(lock) != LOCK_IS
 			    && trx->undo_no != 0) {
@@ -5320,14 +4861,14 @@ lock_release(
 				block the use of the MySQL query cache for
 				all currently active transactions. */
 
-				table->query_cache_inv_trx_id = max_trx_id;
+				table->query_cache_inv_id = max_trx_id;
 			}
 
 			lock_table_dequeue(lock);
 		}
 
 		if (count == LOCK_RELEASE_INTERVAL) {
-			/* Release the  mutex for a while, so that we
+			/* Release the mutex for a while, so that we
 			do not monopolize it */
 
 			lock_mutex_exit();
@@ -5339,18 +4880,6 @@ lock_release(
 
 		++count;
 	}
-
-	/* We don't remove the locks one by one from the vector for
-	efficiency reasons. We simply reset it because we would have
-	released all the locks anyway. */
-
-	ib_vector_reset(trx->lock.table_locks);
-
-	ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
-	ut_a(ib_vector_is_empty(trx->autoinc_locks));
-	ut_a(ib_vector_is_empty(trx->lock.table_locks));
-
-	mem_heap_empty(trx->lock.lock_heap);
 }
 
 /* True if a lock mode is S or X */
@@ -5366,7 +4895,6 @@ lock_trx_table_locks_remove(
 /*========================*/
 	const lock_t*	lock_to_remove)		/*!< in: lock to remove */
 {
-	lint		i;
 	trx_t*		trx = lock_to_remove->trx;
 
 	ut_ad(lock_mutex_own());
@@ -5378,11 +4906,13 @@ lock_trx_table_locks_remove(
 		ut_ad(trx_mutex_own(trx));
 	}
 
-	for (i = ib_vector_size(trx->lock.table_locks) - 1; i >= 0; --i) {
-		const lock_t*	lock;
+	typedef lock_pool_t::reverse_iterator iterator;
 
-		lock = *static_cast<lock_t**>(
-			ib_vector_get(trx->lock.table_locks, i));
+	iterator	end = trx->lock.table_locks.rend();
+
+	for (iterator it = trx->lock.table_locks.rbegin(); it != end; ++it) {
+
+		const lock_t*	lock = *it;
 
 		if (lock == NULL) {
 			continue;
@@ -5393,8 +4923,8 @@ lock_trx_table_locks_remove(
 		ut_a(lock->un_member.tab_lock.table != NULL);
 
 		if (lock == lock_to_remove) {
-			void*	null_var = NULL;
-			ib_vector_set(trx->lock.table_locks, i, &null_var);
+
+			*it = NULL;
 
 			if (!trx->lock.cancel) {
 				trx_mutex_exit(trx);
@@ -5467,21 +4997,17 @@ lock_remove_recovered_trx_record_locks(
 				held on records in this table or on the
 				table itself */
 {
-	trx_t*		trx;
-	ulint		n_recovered_trx = 0;
-
 	ut_a(table != NULL);
 	ut_ad(lock_mutex_own());
 
+	ulint		n_recovered_trx = 0;
+
 	mutex_enter(&trx_sys->mutex);
 
-	for (trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
+	for (trx_t* trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
 	     trx != NULL;
 	     trx = UT_LIST_GET_NEXT(trx_list, trx)) {
 
-		lock_t*	lock;
-		lock_t*	next_lock;
-
 		assert_trx_in_rw_list(trx);
 
 		if (!trx->is_recovered) {
@@ -5492,7 +5018,9 @@ lock_remove_recovered_trx_record_locks(
 		implicit locks cannot be converted to explicit ones
 		while we are scanning the explicit locks. */
 
-		for (lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
+		lock_t*	next_lock;
+
+		for (lock_t* lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
 		     lock != NULL;
 		     lock = next_lock) {
 
@@ -5533,7 +5061,6 @@ Removes locks on a table to be dropped or truncated.
 If remove_also_table_sx_locks is TRUE then table-level S and X locks are
 also removed in addition to other table-level and record-level locks.
 No lock, that is going to be removed, is allowed to be a wait lock. */
-UNIV_INTERN
 void
 lock_remove_all_on_table(
 /*=====================*/
@@ -5604,11 +5131,10 @@ lock_remove_all_on_table(
 	lock_mutex_exit();
 }
 
-/*===================== VALIDATION AND DEBUGGING  ====================*/
+/*===================== VALIDATION AND DEBUGGING ====================*/
 
 /*********************************************************************//**
 Prints info of a table lock. */
-UNIV_INTERN
 void
 lock_table_print(
 /*=============*/
@@ -5619,17 +5145,19 @@ lock_table_print(
 	ut_a(lock_get_type_low(lock) == LOCK_TABLE);
 
 	fputs("TABLE LOCK table ", file);
-	ut_print_name(file, lock->trx, TRUE,
-		      lock->un_member.tab_lock.table->name);
-	fprintf(file, " trx id " TRX_ID_FMT, lock->trx->id);
+	ut_print_name(file, lock->trx,
+		      lock->un_member.tab_lock.table->name.m_name);
+	fprintf(file, " trx id " TRX_ID_FMT, trx_get_id_for_print(lock->trx));
 
 	if (lock_get_mode(lock) == LOCK_S) {
 		fputs(" lock mode S", file);
 	} else if (lock_get_mode(lock) == LOCK_X) {
+		ut_ad(lock->trx->id != 0);
 		fputs(" lock mode X", file);
 	} else if (lock_get_mode(lock) == LOCK_IS) {
 		fputs(" lock mode IS", file);
 	} else if (lock_get_mode(lock) == LOCK_IX) {
+		ut_ad(lock->trx->id != 0);
 		fputs(" lock mode IX", file);
 	} else if (lock_get_mode(lock) == LOCK_AUTO_INC) {
 		fputs(" lock mode AUTO-INC", file);
@@ -5642,26 +5170,19 @@ lock_table_print(
 		fputs(" waiting", file);
 	}
 
-	fprintf(file, " lock hold time %lu wait time before grant %lu ",
-		(ulint)difftime(ut_time(), lock->requested_time),
-		lock->wait_time);
-
 	putc('\n', file);
 }
 
 /*********************************************************************//**
 Prints info of a record lock. */
-UNIV_INTERN
 void
 lock_rec_print(
 /*===========*/
 	FILE*		file,	/*!< in: file where to print */
 	const lock_t*	lock)	/*!< in: record type lock */
 {
-	const buf_block_t*	block;
 	ulint			space;
 	ulint			page_no;
-	ulint			i;
 	mtr_t			mtr;
 	mem_heap_t*		heap		= NULL;
 	ulint			offsets_[REC_OFFS_NORMAL_SIZE];
@@ -5674,18 +5195,13 @@ lock_rec_print(
 	space = lock->un_member.rec_lock.space;
 	page_no = lock->un_member.rec_lock.page_no;
 
-	fprintf(file, "RECORD LOCKS space id %lu page no %lu n bits %lu ",
+	fprintf(file, "RECORD LOCKS space id %lu page no %lu n bits %lu "
+		"index %s of table ",
 		(ulong) space, (ulong) page_no,
-		(ulong) lock_rec_get_n_bits(lock));
-
-	dict_index_name_print(file, lock->trx, lock->index);
-
-	/* Print number of table locks */
-	fprintf(file, " trx table locks %lu total table locks %lu ",
-		ib_vector_size(lock->trx->lock.table_locks),
-		UT_LIST_GET_LEN(lock->index->table->locks));
-
-	fprintf(file, " trx id " TRX_ID_FMT, lock->trx->id);
+		(ulong) lock_rec_get_n_bits(lock),
+		lock->index->name());
+	ut_print_name(file, lock->trx, lock->index->table_name);
+	fprintf(file, " trx id " TRX_ID_FMT, trx_get_id_for_print(lock->trx));
 
 	if (lock_get_mode(lock) == LOCK_S) {
 		fputs(" lock mode S", file);
@@ -5713,15 +5229,13 @@ lock_rec_print(
 
 	mtr_start(&mtr);
 
-	fprintf(file, " lock hold time %lu wait time before grant %lu ",
-		(ulint)difftime(ut_time(), lock->requested_time),
-		lock->wait_time);
-
 	putc('\n', file);
 
-	block = buf_page_try_get(space, page_no, &mtr);
+	const buf_block_t*	block;
 
-	for (i = 0; i < lock_rec_get_n_bits(lock); ++i) {
+	block = buf_page_try_get(page_id_t(space, page_no), &mtr);
+
+	for (ulint i = 0; i < lock_rec_get_n_bits(lock); ++i) {
 
 		if (!lock_rec_get_nth_bit(lock, i)) {
 			continue;
@@ -5747,7 +5261,8 @@ lock_rec_print(
 	}
 
 	mtr_commit(&mtr);
-	if (UNIV_LIKELY_NULL(heap)) {
+
+	if (heap) {
 		mem_heap_free(heap);
 	}
 }
@@ -5762,7 +5277,7 @@ http://bugs.mysql.com/36942 */
 #ifdef PRINT_NUM_OF_LOCK_STRUCTS
 /*********************************************************************//**
 Calculates the number of record lock structs in the record lock hash table.
-@return	number of record locks */
+@return number of record locks */
 static
 ulint
 lock_get_n_rec_locks(void)
@@ -5794,12 +5309,11 @@ lock_get_n_rec_locks(void)
 Prints info of locks for all transactions.
 @return FALSE if not able to obtain lock mutex
 and exits without printing info */
-UNIV_INTERN
 ibool
 lock_print_info_summary(
 /*====================*/
 	FILE*	file,	/*!< in: file where to print */
-	ibool   nowait)	/*!< in: whether to wait for the lock mutex */
+	ibool	nowait)	/*!< in: whether to wait for the lock mutex */
 {
 	/* if nowait is FALSE, wait on the lock mutex,
 	otherwise return immediately if fail to obtain the
@@ -5807,8 +5321,8 @@ lock_print_info_summary(
 	if (!nowait) {
 		lock_mutex_enter();
 	} else if (lock_mutex_enter_nowait()) {
-		fputs("FAIL TO OBTAIN LOCK MUTEX, "
-		      "SKIP LOCK INFO PRINTING\n", file);
+		fputs("FAIL TO OBTAIN LOCK MUTEX,"
+		      " SKIP LOCK INFO PRINTING\n", file);
 		return(FALSE);
 	}
 
@@ -5879,29 +5393,313 @@ lock_print_info_summary(
 	return(TRUE);
 }
 
+/** Functor to print not-started transaction from the mysql_trx_list. */
+
+struct	PrintNotStarted {
+
+	PrintNotStarted(FILE* file) : m_file(file) { }
+
+	void	operator()(const trx_t* trx)
+	{
+		ut_ad(trx->in_mysql_trx_list);
+		ut_ad(mutex_own(&trx_sys->mutex));
+
+		/* See state transitions and locking rules in trx0trx.h */
+
+		if (trx_state_eq(trx, TRX_STATE_NOT_STARTED)) {
+
+			fputs("---", m_file);
+			trx_print_latched(m_file, trx, 600);
+		}
+	}
+
+	FILE*		m_file;
+};
+
+/** Iterate over a transaction's locks. Keeping track of the
+iterator using an ordinal value. */
+
+class TrxLockIterator {
+public:
+	TrxLockIterator() { rewind(); }
+
+	/** Get the m_index(th) lock of a transaction.
+	@return current lock or 0 */
+	const lock_t* current(const trx_t* trx) const
+	{
+		lock_t*	lock;
+		ulint	i = 0;
+
+		for (lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
+		     lock != NULL && i < m_index;
+		     lock = UT_LIST_GET_NEXT(trx_locks, lock), ++i) {
+
+			/* No op */
+		}
+
+		return(lock);
+	}
+
+	/** Set the ordinal value to 0 */
+	void rewind()
+	{
+		m_index = 0;
+	}
+
+	/** Increment the ordinal value.
+	@retun the current index value */
+	ulint next()
+	{
+		return(++m_index);
+	}
+
+private:
+	/** Current iterator position */
+	ulint		m_index;
+};
+
+/** This iterates over both the RW and RO trx_sys lists. We need to keep
+track where the iterator was up to and we do that using an ordinal value. */
+
+class TrxListIterator {
+public:
+	TrxListIterator() : m_index()
+	{
+		/* We iterate over the RW trx list first. */
+
+		m_trx_list = &trx_sys->rw_trx_list;
+	}
+
+	/** Get the current transaction whose ordinality is m_index.
+	@return current transaction or 0 */
+
+	const trx_t* current()
+	{
+		return(reposition());
+	}
+
+	/** Advance the transaction current ordinal value and reset the
+	transaction lock ordinal value */
+
+	void next()
+	{
+		++m_index;
+		m_lock_iter.rewind();
+	}
+
+	TrxLockIterator& lock_iter()
+	{
+		return(m_lock_iter);
+	}
+
+private:
+	/** Reposition the "cursor" on the current transaction. If it
+	is the first time then the "cursor" will be positioned on the
+	first transaction.
+
+	@return transaction instance or 0 */
+	const trx_t* reposition() const
+	{
+		ulint	i;
+		trx_t*	trx;
+
+		/* Make the transaction at the ordinal value of m_index
+		the current transaction. ie. reposition/restore */
+
+		for (i = 0, trx = UT_LIST_GET_FIRST(*m_trx_list);
+		     trx != NULL && (i < m_index);
+		     trx = UT_LIST_GET_NEXT(trx_list, trx), ++i) {
+
+			check_trx_state(trx);
+		}
+
+		return(trx);
+	}
+
+	/** Ordinal value of the transaction in the current transaction list */
+	ulint			m_index;
+
+	/** Current transaction list */
+	trx_ut_list_t*		m_trx_list;
+
+	/** For iterating over a transaction's locks */
+	TrxLockIterator		m_lock_iter;
+};
+
+/** Prints transaction lock wait and MVCC state.
+@param[in,out]	file	file where to print
+@param[in]	trx	transaction */
+void
+lock_trx_print_wait_and_mvcc_state(
+	FILE*		file,
+	const trx_t*	trx)
+{
+	fprintf(file, "---");
+
+	trx_print_latched(file, trx, 600);
+
+	const ReadView*	read_view = trx_get_read_view(trx);
+
+	if (read_view != NULL) {
+		read_view->print_limits(file);
+	}
+
+	if (trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
+
+		fprintf(file,
+			"------- TRX HAS BEEN WAITING %lu SEC"
+			" FOR THIS LOCK TO BE GRANTED:\n",
+			(ulong) difftime(ut_time(), trx->lock.wait_started));
+
+		if (lock_get_type_low(trx->lock.wait_lock) == LOCK_REC) {
+			lock_rec_print(file, trx->lock.wait_lock);
+		} else {
+			lock_table_print(file, trx->lock.wait_lock);
+		}
+
+		fprintf(file, "------------------\n");
+	}
+}
+
+/*********************************************************************//**
+Prints info of locks for a transaction. This function will release the
+lock mutex and the trx_sys_t::mutex if the page was read from disk.
+@return true if page was read from the tablespace */
+static
+bool
+lock_rec_fetch_page(
+/*================*/
+	const lock_t*	lock)	/*!< in: record lock */
+{
+	ut_ad(lock_get_type_low(lock) == LOCK_REC);
+
+	ulint			space_id = lock->un_member.rec_lock.space;
+	fil_space_t*		space;
+	bool			found;
+	const page_size_t&	page_size = fil_space_get_page_size(space_id,
+								    &found);
+	ulint			page_no = lock->un_member.rec_lock.page_no;
+
+	/* Check if the .ibd file exists. */
+	if (found) {
+		mtr_t	mtr;
+
+		lock_mutex_exit();
+
+		mutex_exit(&trx_sys->mutex);
+
+		DEBUG_SYNC_C("innodb_monitor_before_lock_page_read");
+
+		/* Check if the space is exists or not. only
+		when the space is valid, try to get the page. */
+		space = fil_space_acquire(space_id);
+		if (space) {
+			dberr_t err = DB_SUCCESS;
+			mtr_start(&mtr);
+			buf_page_get_gen(
+				page_id_t(space_id, page_no), page_size,
+				RW_NO_LATCH, NULL,
+				BUF_GET_POSSIBLY_FREED,
+				__FILE__, __LINE__, &mtr, &err);
+			mtr_commit(&mtr);
+			fil_space_release(space);
+		}
+
+		lock_mutex_enter();
+
+		mutex_enter(&trx_sys->mutex);
+
+		return(true);
+	}
+
+	return(false);
+}
+
+/*********************************************************************//**
+Prints info of locks for a transaction.
+@return true if all printed, false if latches were released. */
+static
+bool
+lock_trx_print_locks(
+/*=================*/
+	FILE*		file,		/*!< in/out: File to write */
+	const trx_t*	trx,		/*!< in: current transaction */
+	TrxLockIterator&iter,		/*!< in: transaction lock iterator */
+	bool		load_block)	/*!< in: if true then read block
+					from disk */
+{
+	const lock_t* lock;
+
+	/* Iterate over the transaction's locks. */
+	while ((lock = iter.current(trx)) != 0) {
+
+		if (lock_get_type_low(lock) == LOCK_REC) {
+
+			if (load_block) {
+
+				/* Note: lock_rec_fetch_page() will
+				release both the lock mutex and the
+				trx_sys_t::mutex if it does a read
+				from disk. */
+
+				if (lock_rec_fetch_page(lock)) {
+					/* We need to resync the
+					current transaction. */
+					return(false);
+				}
+
+				/* It is a single table tablespace
+				and the .ibd file is missing
+				(TRUNCATE TABLE probably stole the
+				locks): just print the lock without
+				attempting to load the page in the
+				buffer pool. */
+
+				fprintf(file,
+					"RECORD LOCKS on non-existing"
+					" space %u\n",
+					lock->un_member.rec_lock.space);
+			}
+
+			/* Print all the record locks on the page from
+			the record lock bitmap */
+
+			lock_rec_print(file, lock);
+
+			load_block = true;
+
+		} else {
+			ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
+
+			lock_table_print(file, lock);
+		}
+
+		if (iter.next() >= 10) {
+
+			fprintf(file,
+				"10 LOCKS PRINTED FOR THIS TRX:"
+				" SUPPRESSING FURTHER PRINTS\n");
+
+			break;
+		}
+	}
+
+	return(true);
+}
+
 /*********************************************************************//**
 Prints info of locks for each transaction. This function assumes that the
 caller holds the lock mutex and more importantly it will release the lock
 mutex on behalf of the caller. (This should be fixed in the future). */
-UNIV_INTERN
 void
 lock_print_info_all_transactions(
 /*=============================*/
-	FILE*	file)	/*!< in: file where to print */
+	FILE*		file)	/*!< in/out: file where to print */
 {
-	const lock_t*	lock;
-	ibool		load_page_first = TRUE;
-	ulint		nth_trx		= 0;
-	ulint		nth_lock	= 0;
-	ulint		i;
-	mtr_t		mtr;
-	const trx_t*	trx;
-	trx_list_t*	trx_list = &trx_sys->rw_trx_list;
+	ut_ad(lock_mutex_own());
 
 	fprintf(file, "LIST OF TRANSACTIONS FOR EACH SESSION:\n");
 
-	ut_ad(lock_mutex_own());
-
 	mutex_enter(&trx_sys->mutex);
 
 	/* First print info on non-active transactions */
@@ -5910,229 +5708,99 @@ lock_print_info_all_transactions(
 	transactions will be omitted here. The information will be
 	available from INFORMATION_SCHEMA.INNODB_TRX. */
 
-	for (trx = UT_LIST_GET_FIRST(trx_sys->mysql_trx_list);
-	     trx != NULL;
-	     trx = UT_LIST_GET_NEXT(mysql_trx_list, trx)) {
+	PrintNotStarted	print_not_started(file);
+	ut_list_map(trx_sys->mysql_trx_list, print_not_started);
 
-		ut_ad(trx->in_mysql_trx_list);
+	const trx_t*	trx;
+	TrxListIterator	trx_iter;
+	const trx_t*	prev_trx = 0;
 
-		/* See state transitions and locking rules in trx0trx.h */
+	/* Control whether a block should be fetched from the buffer pool. */
+	bool		load_block = true;
+	bool		monitor = srv_print_innodb_lock_monitor;
 
-		if (trx_state_eq(trx, TRX_STATE_NOT_STARTED)) {
-			fputs("---", file);
-			trx_print_latched(file, trx, 600);
-		}
-	}
+	while ((trx = trx_iter.current()) != 0) {
 
-loop:
-	/* Since we temporarily release lock_sys->mutex and
-	trx_sys->mutex when reading a database page in below,
-	variable trx may be obsolete now and we must loop
-	through the trx list to get probably the same trx,
-	or some other trx. */
+		check_trx_state(trx);
 
-	for (trx = UT_LIST_GET_FIRST(*trx_list), i = 0;
-	     trx && (i < nth_trx);
-	     trx = UT_LIST_GET_NEXT(trx_list, trx), i++) {
+		if (trx != prev_trx) {
+			lock_trx_print_wait_and_mvcc_state(file, trx);
+			prev_trx = trx;
 
-		assert_trx_in_list(trx);
-		ut_ad(trx->read_only == (trx_list == &trx_sys->ro_trx_list));
-	}
-
-	ut_ad(trx == NULL
-	      || trx->read_only == (trx_list == &trx_sys->ro_trx_list));
-
-	if (trx == NULL) {
-		/* Check the read-only transaction list next. */
-		if (trx_list == &trx_sys->rw_trx_list) {
-			trx_list = &trx_sys->ro_trx_list;
-			nth_trx = 0;
-			nth_lock = 0;
-			goto loop;
+			/* The transaction that read in the page is no
+			longer the one that read the page in. We need to
+			force a page read. */
+			load_block = true;
 		}
 
-		lock_mutex_exit();
-		mutex_exit(&trx_sys->mutex);
+		/* If we need to print the locked record contents then we
+		need to fetch the containing block from the buffer pool. */
+		if (monitor) {
 
-		ut_ad(lock_validate());
+			/* Print the locks owned by the current transaction. */
+			TrxLockIterator& lock_iter = trx_iter.lock_iter();
 
-		return;
-	}
+			if (!lock_trx_print_locks(
+					file, trx, lock_iter, load_block)) {
 
-	assert_trx_in_list(trx);
+				/* Resync trx_iter, the trx_sys->mutex and
+				the lock mutex were released. A page was
+				successfully read in.  We need to print its
+				contents on the next call to
+				lock_trx_print_locks(). On the next call to
+				lock_trx_print_locks() we should simply print
+				the contents of the page just read in.*/
+				load_block = false;
 
-	if (nth_lock == 0) {
-		fputs("---", file);
-
-		trx_print_latched(file, trx, 600);
-
-		if (trx->read_view) {
-			fprintf(file,
-				"Trx read view will not see trx with"
-				" id >= " TRX_ID_FMT
-				", sees < " TRX_ID_FMT "\n",
-				trx->read_view->low_limit_id,
-				trx->read_view->up_limit_id);
-		}
-
-		/* Total trx lock waits and times */
-		fprintf(file, "Trx #rec lock waits %lu #table lock waits %lu\n",
-			trx->n_rec_lock_waits, trx->n_table_lock_waits);
-		fprintf(file, "Trx total rec lock wait time %lu SEC\n",
-			trx->total_rec_lock_wait_time);
-		fprintf(file, "Trx total table lock wait time %lu SEC\n",
-			trx->total_table_lock_wait_time);
-
-		if (trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
-
-			fprintf(file,
-				"------- TRX HAS BEEN WAITING %lu SEC"
-				" FOR THIS LOCK TO BE GRANTED:\n",
-				(ulong) difftime(ut_time(),
-						 trx->lock.wait_started));
-
-			if (lock_get_type_low(trx->lock.wait_lock) == LOCK_REC) {
-				lock_rec_print(file, trx->lock.wait_lock);
-			} else {
-				lock_table_print(file, trx->lock.wait_lock);
+				continue;
 			}
-
-			fputs("------------------\n", file);
-		}
-	}
-
-	if (!srv_print_innodb_lock_monitor) {
-		nth_trx++;
-		goto loop;
-	}
-
-	i = 0;
-
-	/* Look at the note about the trx loop above why we loop here:
-	lock may be an obsolete pointer now. */
-
-	lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
-
-	while (lock && (i < nth_lock)) {
-		lock = UT_LIST_GET_NEXT(trx_locks, lock);
-		i++;
-	}
-
-	if (lock == NULL) {
-		nth_trx++;
-		nth_lock = 0;
-
-		goto loop;
-	}
-
-	if (lock_get_type_low(lock) == LOCK_REC) {
-		if (load_page_first) {
-			ulint	space	= lock->un_member.rec_lock.space;
-			ulint	zip_size= fil_space_get_zip_size(space);
-			ulint	page_no = lock->un_member.rec_lock.page_no;
-			ibool	tablespace_being_deleted = FALSE;
-
-			if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
-
-				/* It is a single table tablespace and
-				the .ibd file is missing (TRUNCATE
-				TABLE probably stole the locks): just
-				print the lock without attempting to
-				load the page in the buffer pool. */
-
-				fprintf(file, "RECORD LOCKS on"
-					" non-existing space %lu\n",
-					(ulong) space);
-				goto print_rec;
-			}
-
-			lock_mutex_exit();
-			mutex_exit(&trx_sys->mutex);
-
-			DEBUG_SYNC_C("innodb_monitor_before_lock_page_read");
-
-			/* Check if the space is exists or not. only when the space
-			is valid, try to get the page. */
-			tablespace_being_deleted = fil_inc_pending_ops(space, false);
-
-			if (!tablespace_being_deleted) {
-				mtr_start(&mtr);
-
-				buf_page_get_gen(space, zip_size, page_no,
-						 RW_NO_LATCH, NULL,
-						 BUF_GET_POSSIBLY_FREED,
-						 __FILE__, __LINE__, &mtr);
-
-				mtr_commit(&mtr);
-
-				fil_decr_pending_ops(space);
-			} else {
-				fprintf(file, "RECORD LOCKS on"
-					" non-existing space %lu\n",
-					(ulong) space);
-			}
-
-			load_page_first = FALSE;
-
-			lock_mutex_enter();
-
-			mutex_enter(&trx_sys->mutex);
-
-			goto loop;
 		}
 
-print_rec:
-		lock_rec_print(file, lock);
-	} else {
-		ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
+		load_block = true;
 
-		lock_table_print(file, lock);
+		/* All record lock details were printed without fetching
+		a page from disk, or we didn't need to print the detail. */
+		trx_iter.next();
 	}
 
-	load_page_first = TRUE;
+	lock_mutex_exit();
+	mutex_exit(&trx_sys->mutex);
 
-	nth_lock++;
-
-	if (nth_lock >= 10) {
-		fputs("10 LOCKS PRINTED FOR THIS TRX:"
-		      " SUPPRESSING FURTHER PRINTS\n",
-		      file);
-
-		nth_trx++;
-		nth_lock = 0;
-	}
-
-	goto loop;
+	ut_ad(lock_validate());
 }
 
 #ifdef UNIV_DEBUG
 /*********************************************************************//**
 Find the the lock in the trx_t::trx_lock_t::table_locks vector.
-@return TRUE if found */
+@return true if found */
 static
-ibool
+bool
 lock_trx_table_locks_find(
 /*======================*/
 	trx_t*		trx,		/*!< in: trx to validate */
 	const lock_t*	find_lock)	/*!< in: lock to find */
 {
-	lint		i;
-	ibool		found = FALSE;
+	bool		found = false;
 
 	trx_mutex_enter(trx);
 
-	for (i = ib_vector_size(trx->lock.table_locks) - 1; i >= 0; --i) {
-		const lock_t*	lock;
+	typedef lock_pool_t::const_reverse_iterator iterator;
 
-		lock = *static_cast<const lock_t**>(
-			ib_vector_get(trx->lock.table_locks, i));
+	iterator	end = trx->lock.table_locks.rend();
+
+	for (iterator it = trx->lock.table_locks.rbegin(); it != end; ++it) {
+
+		const lock_t*	lock = *it;
 
 		if (lock == NULL) {
+
 			continue;
+
 		} else if (lock == find_lock) {
+
 			/* Can't be duplicates. */
 			ut_a(!found);
-			found = TRUE;
+			found = true;
 		}
 
 		ut_a(trx == lock->trx);
@@ -6147,7 +5815,7 @@ lock_trx_table_locks_find(
 
 /*********************************************************************//**
 Validates the lock queue on a table.
-@return	TRUE if ok */
+@return TRUE if ok */
 static
 ibool
 lock_table_queue_validate(
@@ -6157,7 +5825,7 @@ lock_table_queue_validate(
 	const lock_t*	lock;
 
 	ut_ad(lock_mutex_own());
-	ut_ad(mutex_own(&trx_sys->mutex));
+	ut_ad(trx_sys_mutex_own());
 
 	for (lock = UT_LIST_GET_FIRST(table->locks);
 	     lock != NULL;
@@ -6187,7 +5855,7 @@ lock_table_queue_validate(
 
 /*********************************************************************//**
 Validates the lock queue on a single record.
-@return	TRUE if ok */
+@return TRUE if ok */
 static
 ibool
 lock_rec_queue_validate(
@@ -6222,17 +5890,18 @@ lock_rec_queue_validate(
 
 	if (!page_rec_is_user_rec(rec)) {
 
-		for (lock = lock_rec_get_first(block, heap_no);
+		for (lock = lock_rec_get_first(lock_sys->rec_hash,
+					       block, heap_no);
 		     lock != NULL;
 		     lock = lock_rec_get_next_const(heap_no, lock)) {
 
-			ut_a(trx_in_trx_list(lock->trx));
+			ut_ad(!trx_is_ac_nl_ro(lock->trx));
 
 			if (lock_get_wait(lock)) {
 				ut_a(lock_rec_has_to_wait_in_queue(lock));
 			}
 
-			if (index) {
+			if (index != NULL) {
 				ut_a(lock->index == index);
 			}
 		}
@@ -6240,8 +5909,11 @@ lock_rec_queue_validate(
 		goto func_exit;
 	}
 
-	if (!index);
-	else if (dict_index_is_clust(index)) {
+	if (index == NULL) {
+
+		/* Nothing we can do */
+
+	} else if (dict_index_is_clust(index)) {
 		trx_id_t	trx_id;
 
 		/* Unlike the non-debug code, this invariant can only succeed
@@ -6254,20 +5926,59 @@ lock_rec_queue_validate(
 		/* impl_trx cannot be committed until lock_mutex_exit()
 		because lock_trx_release_locks() acquires lock_sys->mutex */
 
-		if (impl_trx != NULL
-		    && lock_rec_other_has_expl_req(LOCK_S, 0, LOCK_WAIT,
-						   block, heap_no, impl_trx)) {
+		if (impl_trx != NULL) {
+			const lock_t*	other_lock
+				= lock_rec_other_has_expl_req(
+					LOCK_S, block, true, heap_no,
+					impl_trx);
 
-			ut_a(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP,
-					       block, heap_no, impl_trx));
+			/* The impl_trx is holding an implicit lock on the
+			given record 'rec'. So there cannot be another
+			explicit granted lock.  Also, there can be another
+			explicit waiting lock only if the impl_trx has an
+			explicit granted lock. */
+
+			if (other_lock != NULL) {
+#ifdef WITH_WSREP
+				if (wsrep_on(other_lock->trx->mysql_thd) && !lock_get_wait(other_lock) ) {
+
+					ib::info() << "WSREP impl BF lock conflict for my impl lock:\n BF:" <<
+						((wsrep_thd_is_BF(impl_trx->mysql_thd, FALSE)) ? "BF" : "normal") << " exec: " <<
+						wsrep_thd_exec_mode(impl_trx->mysql_thd) << " conflict: " <<
+						wsrep_thd_conflict_state(impl_trx->mysql_thd, false) << " seqno: " <<
+						wsrep_thd_trx_seqno(impl_trx->mysql_thd) << " SQL: " <<
+						wsrep_thd_query(impl_trx->mysql_thd);
+
+					trx_t* otrx = other_lock->trx;
+
+					ib::info() << "WSREP other lock:\n BF:" <<
+						((wsrep_thd_is_BF(otrx->mysql_thd, FALSE)) ? "BF" : "normal")  << " exec: " <<
+						wsrep_thd_exec_mode(otrx->mysql_thd) << " conflict: " <<
+						wsrep_thd_conflict_state(otrx->mysql_thd, false) << " seqno: " <<
+						wsrep_thd_trx_seqno(otrx->mysql_thd) << " SQL: " <<
+						wsrep_thd_query(otrx->mysql_thd);
+				}
+
+				if (wsrep_on(other_lock->trx->mysql_thd) && !lock_rec_has_expl(
+					LOCK_X | LOCK_REC_NOT_GAP,
+					block, heap_no, impl_trx)) {
+					ib::info() << "WSREP impl BF lock conflict";
+				}
+#else /* !WITH_WSREP */
+				ut_a(lock_get_wait(other_lock));
+				ut_a(lock_rec_has_expl(
+					LOCK_X | LOCK_REC_NOT_GAP,
+					block, heap_no, impl_trx));
+#endif /* WITH_WSREP */
+			}
 		}
 	}
 
-	for (lock = lock_rec_get_first(block, heap_no);
+	for (lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no);
 	     lock != NULL;
 	     lock = lock_rec_get_next_const(heap_no, lock)) {
 
-		ut_a(trx_in_trx_list(lock->trx));
+		ut_ad(!trx_is_ac_nl_ro(lock->trx));
 
 		if (index) {
 			ut_a(lock->index == index);
@@ -6275,18 +5986,24 @@ lock_rec_queue_validate(
 
 		if (!lock_rec_get_gap(lock) && !lock_get_wait(lock)) {
 
-#ifndef WITH_WSREP
-			enum lock_mode	mode;
+			lock_mode	mode;
 
 			if (lock_get_mode(lock) == LOCK_S) {
 				mode = LOCK_X;
 			} else {
 				mode = LOCK_S;
 			}
-			ut_a(!lock_rec_other_has_expl_req(
-				mode, 0, 0, block, heap_no, lock->trx));
-#endif /* WITH_WSREP */
 
+			const lock_t*	other_lock
+				= lock_rec_other_has_expl_req(
+					mode, block, false, heap_no,
+					lock->trx);
+#ifdef WITH_WSREP
+			ut_a(!other_lock || wsrep_thd_is_BF(lock->trx->mysql_thd, FALSE) ||
+			     wsrep_thd_is_BF(other_lock->trx->mysql_thd, FALSE));
+#else
+			ut_a(!other_lock);
+#endif /* WITH_WSREP */
 		} else if (lock_get_wait(lock) && !lock_rec_get_gap(lock)) {
 
 			ut_a(lock_rec_has_to_wait_in_queue(lock));
@@ -6304,7 +6021,7 @@ func_exit:
 
 /*********************************************************************//**
 Validates the record lock queues on a page.
-@return	TRUE if ok */
+@return TRUE if ok */
 static
 ibool
 lock_rec_validate_page(
@@ -6326,16 +6043,15 @@ lock_rec_validate_page(
 	lock_mutex_enter();
 	mutex_enter(&trx_sys->mutex);
 loop:
-	lock = lock_rec_get_first_on_page_addr(buf_block_get_space(block),
-					       buf_block_get_page_no(block));
+	lock = lock_rec_get_first_on_page_addr(
+		lock_sys->rec_hash,
+		block->page.id.space(), block->page.id.page_no());
 
 	if (!lock) {
 		goto function_exit;
 	}
 
-#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
-	ut_a(!block->page.file_page_was_freed);
-#endif
+	ut_ad(!block->page.file_page_was_freed);
 
 	for (i = 0; i < nth_lock; i++) {
 
@@ -6346,15 +6062,15 @@ loop:
 		}
 	}
 
-	ut_a(trx_in_trx_list(lock->trx));
+	ut_ad(!trx_is_ac_nl_ro(lock->trx));
 
-# ifdef UNIV_SYNC_DEBUG
+# ifdef UNIV_DEBUG
 	/* Only validate the record queues when this thread is not
 	holding a space->latch.  Deadlocks are possible due to
 	latching order violation when UNIV_DEBUG is defined while
-	UNIV_SYNC_DEBUG is not. */
-	if (!sync_thread_levels_contains(SYNC_FSP))
-# endif /* UNIV_SYNC_DEBUG */
+	UNIV_DEBUG is not. */
+	if (!sync_check_find(SYNC_FSP))
+# endif /* UNIV_DEBUG */
 	for (i = nth_bit; i < lock_rec_get_n_bits(lock); i++) {
 
 		if (i == 1 || lock_rec_get_nth_bit(lock, i)) {
@@ -6363,11 +6079,7 @@ loop:
 			ut_a(rec);
 			offsets = rec_get_offsets(rec, lock->index, offsets,
 						  ULINT_UNDEFINED, &heap);
-#if 0
-			fprintf(stderr,
-				"Validating %u %u\n",
-				block->page.space, block->page.offset);
-#endif
+
 			/* If this thread is holding the file space
 			latch (fil_space_t::latch), the following
 			check WILL break the latching order and may
@@ -6391,7 +6103,7 @@ function_exit:
 	lock_mutex_exit();
 	mutex_exit(&trx_sys->mutex);
 
-	if (UNIV_LIKELY_NULL(heap)) {
+	if (heap != NULL) {
 		mem_heap_free(heap);
 	}
 	return(TRUE);
@@ -6399,20 +6111,19 @@ function_exit:
 
 /*********************************************************************//**
 Validates the table locks.
-@return	TRUE if ok */
+@return TRUE if ok */
 static
 ibool
 lock_validate_table_locks(
 /*======================*/
-	const trx_list_t*	trx_list)	/*!< in: trx list */
+	const trx_ut_list_t*	trx_list)	/*!< in: trx list */
 {
 	const trx_t*	trx;
 
 	ut_ad(lock_mutex_own());
-	ut_ad(mutex_own(&trx_sys->mutex));
+	ut_ad(trx_sys_mutex_own());
 
-	ut_ad(trx_list == &trx_sys->rw_trx_list
-	      || trx_list == &trx_sys->ro_trx_list);
+	ut_ad(trx_list == &trx_sys->rw_trx_list);
 
 	for (trx = UT_LIST_GET_FIRST(*trx_list);
 	     trx != NULL;
@@ -6420,8 +6131,7 @@ lock_validate_table_locks(
 
 		const lock_t*	lock;
 
-		assert_trx_in_list(trx);
-		ut_ad(trx->read_only == (trx_list == &trx_sys->ro_trx_list));
+		check_trx_state(trx);
 
 		for (lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
 		     lock != NULL;
@@ -6441,7 +6151,7 @@ lock_validate_table_locks(
 /*********************************************************************//**
 Validate record locks up to a limit.
 @return lock at limit or NULL if no more locks in the hash bucket */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
+static MY_ATTRIBUTE((warn_unused_result))
 const lock_t*
 lock_rec_validate(
 /*==============*/
@@ -6451,7 +6161,7 @@ lock_rec_validate(
 					(space, page_no) */
 {
 	ut_ad(lock_mutex_own());
-	ut_ad(mutex_own(&trx_sys->mutex));
+	ut_ad(trx_sys_mutex_own());
 
 	for (const lock_t* lock = static_cast<const lock_t*>(
 			HASH_GET_FIRST(lock_sys->rec_hash, start));
@@ -6460,8 +6170,8 @@ lock_rec_validate(
 
 		ib_uint64_t	current;
 
-		ut_a(trx_in_trx_list(lock->trx));
-		ut_a(lock_get_type(lock) == LOCK_REC);
+		ut_ad(!trx_is_ac_nl_ro(lock->trx));
+		ut_ad(lock_get_type(lock) == LOCK_REC);
 
 		current = ut_ull_create(
 			lock->un_member.rec_lock.space,
@@ -6482,7 +6192,7 @@ static
 void
 lock_rec_block_validate(
 /*====================*/
-	ulint		space,
+	ulint		space_id,
 	ulint		page_no)
 {
 	/* The lock and the block that it is referring to may be freed at
@@ -6495,40 +6205,56 @@ lock_rec_block_validate(
 
 	/* Make sure that the tablespace is not deleted while we are
 	trying to access the page. */
-	if (!fil_inc_pending_ops(space, true)) {
+	if (fil_space_t* space = fil_space_acquire(space_id)) {
+		dberr_t err = DB_SUCCESS;
 		mtr_start(&mtr);
+
 		block = buf_page_get_gen(
-			space, fil_space_get_zip_size(space),
-			page_no, RW_X_LATCH, NULL,
+			page_id_t(space_id, page_no),
+			page_size_t(space->flags),
+			RW_X_LATCH, NULL,
 			BUF_GET_POSSIBLY_FREED,
-			__FILE__, __LINE__, &mtr);
+			__FILE__, __LINE__, &mtr, &err);
 
-		buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
+		if (err != DB_SUCCESS) {
+			ib::error() << "Lock rec block validate failed for tablespace "
+				   << ((space && space->name) ? space->name : " system ")
+				   << " space_id " << space_id
+				   << " page_no " << page_no << " err " << err;
+		}
+
+		if (block) {
+			buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
+
+			ut_ad(lock_rec_validate_page(block));
+		}
 
-		ut_ad(lock_rec_validate_page(block));
 		mtr_commit(&mtr);
 
-		fil_decr_pending_ops(space);
+		fil_space_release(space);
 	}
 }
 
 /*********************************************************************//**
 Validates the lock system.
-@return	TRUE if ok */
+@return TRUE if ok */
 static
 bool
 lock_validate()
 /*===========*/
 {
-	typedef	std::pair<ulint, ulint> page_addr_t;
-	typedef std::set<page_addr_t> page_addr_set;
-	page_addr_set pages;
+	typedef	std::pair<ulint, ulint>		page_addr_t;
+	typedef std::set<
+		page_addr_t,
+		std::less<page_addr_t>,
+		ut_allocator<page_addr_t> >	page_addr_set;
+
+	page_addr_set	pages;
 
 	lock_mutex_enter();
 	mutex_enter(&trx_sys->mutex);
 
 	ut_a(lock_validate_table_locks(&trx_sys->rw_trx_list));
-	ut_a(lock_validate_table_locks(&trx_sys->ro_trx_list));
 
 	/* Iterate over all the record locks and validate the locks. We
 	don't want to hog the lock_sys_t::mutex and the trx_sys_t::mutex.
@@ -6567,8 +6293,7 @@ a record. If they do, first tests if the query thread should anyway
 be suspended for some reason; if not, then puts the transaction and
 the query thread to the lock wait state and inserts a waiting request
 for a gap x-lock to the lock queue.
-@return	DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
 dberr_t
 lock_rec_insert_check_and_lock(
 /*===========================*/
@@ -6584,29 +6309,25 @@ lock_rec_insert_check_and_lock(
 				LOCK_GAP type locks from the successor
 				record */
 {
-	const rec_t*	next_rec;
-	trx_t*		trx;
-	lock_t*		lock;
-	dberr_t		err;
-	ulint		next_rec_heap_no;
-	ibool		inherit_in = *inherit;
-#ifdef WITH_WSREP
-	lock_t*		c_lock=NULL;
-#endif
-
 	ut_ad(block->frame == page_align(rec));
 	ut_ad(!dict_index_is_online_ddl(index)
 	      || dict_index_is_clust(index)
 	      || (flags & BTR_CREATE_FLAG));
+	ut_ad(mtr->is_named_space(index->space));
 
 	if (flags & BTR_NO_LOCKING_FLAG) {
 
 		return(DB_SUCCESS);
 	}
 
-	trx = thr_get_trx(thr);
-	next_rec = page_rec_get_next_const(rec);
-	next_rec_heap_no = page_rec_get_heap_no(next_rec);
+	ut_ad(!dict_table_is_temporary(index->table));
+
+	dberr_t		err;
+	lock_t*		lock;
+	ibool		inherit_in = *inherit;
+	trx_t*		trx = thr_get_trx(thr);
+	const rec_t*	next_rec = page_rec_get_next_const(rec);
+	ulint		heap_no = page_rec_get_heap_no(next_rec);
 
 	lock_mutex_enter();
 	/* Because this code is invoked for a running transaction by
@@ -6618,9 +6339,9 @@ lock_rec_insert_check_and_lock(
 	BTR_NO_LOCKING_FLAG and skip the locking altogether. */
 	ut_ad(lock_table_has(trx, index->table, LOCK_IX));
 
-	lock = lock_rec_get_first(block, next_rec_heap_no);
+	lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no);
 
-	if (UNIV_LIKELY(lock == NULL)) {
+	if (lock == NULL) {
 		/* We optimize CPU time usage in the simplest case */
 
 		lock_mutex_exit();
@@ -6637,6 +6358,12 @@ lock_rec_insert_check_and_lock(
 		return(DB_SUCCESS);
 	}
 
+	/* Spatial index does not use GAP lock protection. It uses
+	"predicate lock" to protect the "range" */
+	if (dict_index_is_spatial(index)) {
+		return(DB_SUCCESS);
+	}
+
 	*inherit = TRUE;
 
 	/* If another transaction has an explicit lock request which locks
@@ -6649,32 +6376,21 @@ lock_rec_insert_check_and_lock(
 	had to wait for their insert. Both had waiting gap type lock requests
 	on the successor, which produced an unnecessary deadlock. */
 
-#ifdef WITH_WSREP
-	if ((c_lock = (ib_lock_t*)lock_rec_other_has_conflicting(
-		    static_cast<enum lock_mode>(
-                            LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION),
-		    block, next_rec_heap_no, trx))) {
-#else
-	if (lock_rec_other_has_conflicting(
-		    static_cast<enum lock_mode>(
-			    LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION),
-		    block, next_rec_heap_no, trx)) {
-#endif /* WITH_WSREP */
+	const ulint	type_mode = LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION;
+
+	const lock_t*	wait_for = lock_rec_other_has_conflicting(
+				type_mode, block, heap_no, trx);
+
+	if (wait_for != NULL) {
+
+		RecLock	rec_lock(thr, index, block, heap_no, type_mode);
 
-		/* Note that we may get DB_SUCCESS also here! */
 		trx_mutex_enter(trx);
 
-#ifdef WITH_WSREP
-		err = lock_rec_enqueue_waiting(c_lock,
-                        LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION,
-			block, next_rec_heap_no, index, thr);
-#else
-		err = lock_rec_enqueue_waiting(
-			LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION,
-			block, next_rec_heap_no, index, thr);
-#endif /* WITH_WSREP */
+		err = rec_lock.add_to_waitq(wait_for);
 
 		trx_mutex_exit(trx);
+
 	} else {
 		err = DB_SUCCESS;
 	}
@@ -6689,10 +6405,10 @@ lock_rec_insert_check_and_lock(
 		if (!inherit_in || dict_index_is_clust(index)) {
 			break;
 		}
+
 		/* Update the page max trx id field */
-		page_update_max_trx_id(block,
-				       buf_block_get_page_zip(block),
-				       trx->id, mtr);
+		page_update_max_trx_id(
+			block, buf_block_get_page_zip(block), trx->id, mtr);
 	default:
 		/* We only care about the two return values. */
 		break;
@@ -6711,7 +6427,7 @@ lock_rec_insert_check_and_lock(
 		ut_ad(lock_rec_queue_validate(
 				FALSE, block, next_rec, index, offsets));
 
-		if (UNIV_LIKELY_NULL(heap)) {
+		if (heap != NULL) {
 			mem_heap_free(heap);
 		}
 	}
@@ -6720,6 +6436,49 @@ lock_rec_insert_check_and_lock(
 	return(err);
 }
 
+/*********************************************************************//**
+Creates an explicit record lock for a running transaction that currently only
+has an implicit lock on the record. The transaction instance must have a
+reference count > 0 so that it can't be committed and freed before this
+function has completed. */
+static
+void
+lock_rec_convert_impl_to_expl_for_trx(
+/*==================================*/
+	const buf_block_t*	block,	/*!< in: buffer block of rec */
+	const rec_t*		rec,	/*!< in: user record on page */
+	dict_index_t*		index,	/*!< in: index of record */
+	const ulint*		offsets,/*!< in: rec_get_offsets(rec, index) */
+	trx_t*			trx,	/*!< in/out: active transaction */
+	ulint			heap_no)/*!< in: rec heap number to lock */
+{
+	ut_ad(trx_is_referenced(trx));
+
+	DEBUG_SYNC_C("before_lock_rec_convert_impl_to_expl_for_trx");
+
+	lock_mutex_enter();
+
+	ut_ad(!trx_state_eq(trx, TRX_STATE_NOT_STARTED));
+
+	if (!trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY)
+	    && !lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP,
+				  block, heap_no, trx)) {
+
+		ulint	type_mode;
+
+		type_mode = (LOCK_REC | LOCK_X | LOCK_REC_NOT_GAP);
+
+		lock_rec_add_to_queue(
+			type_mode, block, heap_no, index, trx, FALSE);
+	}
+
+	lock_mutex_exit();
+
+	trx_release_reference(trx);
+
+	DEBUG_SYNC_C("after_lock_rec_convert_impl_to_expl_for_trx");
+}
+
 /*********************************************************************//**
 If a transaction has an implicit x-lock on a record, but no explicit x-lock
 set on the record, sets one for it. */
@@ -6732,7 +6491,7 @@ lock_rec_convert_impl_to_expl(
 	dict_index_t*		index,	/*!< in: index of record */
 	const ulint*		offsets)/*!< in: rec_get_offsets(rec, index) */
 {
-	trx_id_t		trx_id;
+	trx_t*		trx;
 
 	ut_ad(!lock_mutex_own());
 	ut_ad(page_rec_is_user_rec(rec));
@@ -6740,47 +6499,31 @@ lock_rec_convert_impl_to_expl(
 	ut_ad(!page_rec_is_comp(rec) == !rec_offs_comp(offsets));
 
 	if (dict_index_is_clust(index)) {
+		trx_id_t	trx_id;
+
 		trx_id = lock_clust_rec_some_has_impl(rec, index, offsets);
-		/* The clustered index record was last modified by
-		this transaction. The transaction may have been
-		committed a long time ago. */
+
+		trx = trx_rw_is_active(trx_id, NULL, true);
 	} else {
 		ut_ad(!dict_index_is_online_ddl(index));
-		trx_id = lock_sec_rec_some_has_impl(rec, index, offsets);
-		/* The transaction can be committed before the
-		trx_is_active(trx_id, NULL) check below, because we are not
-		holding lock_mutex. */
 
-		ut_ad(!lock_rec_other_trx_holds_expl(LOCK_S | LOCK_REC_NOT_GAP,
-						     trx_id, rec, block));
+		trx = lock_sec_rec_some_has_impl(rec, index, offsets);
+
+		ut_ad(!trx || !lock_rec_other_trx_holds_expl(
+				LOCK_S | LOCK_REC_NOT_GAP, trx, rec, block));
 	}
 
-	if (trx_id != 0) {
-		trx_t*	impl_trx;
+	if (trx != 0) {
 		ulint	heap_no = page_rec_get_heap_no(rec);
 
-		lock_mutex_enter();
+		ut_ad(trx_is_referenced(trx));
 
 		/* If the transaction is still active and has no
-		explicit x-lock set on the record, set one for it */
+		explicit x-lock set on the record, set one for it.
+		trx cannot be committed until the ref count is zero. */
 
-		impl_trx = trx_rw_is_active(trx_id, NULL);
-
-		/* impl_trx cannot be committed until lock_mutex_exit()
-		because lock_trx_release_locks() acquires lock_sys->mutex */
-
-		if (impl_trx != NULL
-		    && !lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, block,
-					  heap_no, impl_trx)) {
-			ulint	type_mode = (LOCK_REC | LOCK_X
-					     | LOCK_REC_NOT_GAP);
-
-			lock_rec_add_to_queue(
-				type_mode, block, heap_no, index,
-				impl_trx, FALSE);
-		}
-
-		lock_mutex_exit();
+		lock_rec_convert_impl_to_expl_for_trx(
+			block, rec, index, offsets, trx, heap_no);
 	}
 }
 
@@ -6791,8 +6534,7 @@ first tests if the query thread should anyway be suspended for some
 reason; if not, then puts the transaction and the query thread to the
 lock wait state and inserts a waiting request for a record x-lock to the
 lock queue.
-@return	DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
 dberr_t
 lock_clust_rec_modify_check_and_lock(
 /*=================================*/
@@ -6816,6 +6558,7 @@ lock_clust_rec_modify_check_and_lock(
 
 		return(DB_SUCCESS);
 	}
+	ut_ad(!dict_table_is_temporary(index->table));
 
 	heap_no = rec_offs_comp(offsets)
 		? rec_get_heap_no_new(rec)
@@ -6827,9 +6570,8 @@ lock_clust_rec_modify_check_and_lock(
 	lock_rec_convert_impl_to_expl(block, rec, index, offsets);
 
 	lock_mutex_enter();
-	trx_t*		trx __attribute__((unused))= thr_get_trx(thr);
 
-	ut_ad(lock_table_has(trx, index->table, LOCK_IX));
+	ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
 
 	err = lock_rec_lock(TRUE, LOCK_X | LOCK_REC_NOT_GAP,
 			    block, heap_no, index, thr);
@@ -6840,7 +6582,7 @@ lock_clust_rec_modify_check_and_lock(
 
 	ut_ad(lock_rec_queue_validate(FALSE, block, rec, index, offsets));
 
-	if (UNIV_UNLIKELY(err == DB_SUCCESS_LOCKED_REC)) {
+	if (err == DB_SUCCESS_LOCKED_REC) {
 		err = DB_SUCCESS;
 	}
 
@@ -6850,8 +6592,7 @@ lock_clust_rec_modify_check_and_lock(
 /*********************************************************************//**
 Checks if locks of other transactions prevent an immediate modify (delete
 mark or delete unmark) of a secondary index record.
-@return	DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
 dberr_t
 lock_sec_rec_modify_check_and_lock(
 /*===============================*/
@@ -6874,11 +6615,13 @@ lock_sec_rec_modify_check_and_lock(
 	ut_ad(!dict_index_is_clust(index));
 	ut_ad(!dict_index_is_online_ddl(index) || (flags & BTR_CREATE_FLAG));
 	ut_ad(block->frame == page_align(rec));
+	ut_ad(mtr->is_named_space(index->space));
 
 	if (flags & BTR_NO_LOCKING_FLAG) {
 
 		return(DB_SUCCESS);
 	}
+	ut_ad(!dict_table_is_temporary(index->table));
 
 	heap_no = page_rec_get_heap_no(rec);
 
@@ -6887,10 +6630,9 @@ lock_sec_rec_modify_check_and_lock(
 	index record, and this would not have been possible if another active
 	transaction had modified this secondary index record. */
 
-	trx_t* trx __attribute__((unused))= thr_get_trx(thr);
 	lock_mutex_enter();
 
-	ut_ad(lock_table_has(trx, index->table, LOCK_IX));
+	ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
 
 	err = lock_rec_lock(TRUE, LOCK_X | LOCK_REC_NOT_GAP,
 			    block, heap_no, index, thr);
@@ -6912,7 +6654,7 @@ lock_sec_rec_modify_check_and_lock(
 		ut_ad(lock_rec_queue_validate(
 			FALSE, block, rec, index, offsets));
 
-		if (UNIV_LIKELY_NULL(heap)) {
+		if (heap != NULL) {
 			mem_heap_free(heap);
 		}
 	}
@@ -6935,9 +6677,8 @@ lock_sec_rec_modify_check_and_lock(
 /*********************************************************************//**
 Like lock_clust_rec_read_check_and_lock(), but reads a
 secondary index record.
-@return	DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
+@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
 or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
 dberr_t
 lock_sec_rec_read_check_and_lock(
 /*=============================*/
@@ -6950,7 +6691,7 @@ lock_sec_rec_read_check_and_lock(
 					read cursor */
 	dict_index_t*		index,	/*!< in: secondary index */
 	const ulint*		offsets,/*!< in: rec_get_offsets(rec, index) */
-	enum lock_mode		mode,	/*!< in: mode of the lock which
+	lock_mode		mode,	/*!< in: mode of the lock which
 					the read cursor should set on
 					records: LOCK_S or LOCK_X; the
 					latter is possible in
@@ -6969,7 +6710,9 @@ lock_sec_rec_read_check_and_lock(
 	ut_ad(rec_offs_validate(rec, index, offsets));
 	ut_ad(mode == LOCK_X || mode == LOCK_S);
 
-	if (flags & BTR_NO_LOCKING_FLAG) {
+	if ((flags & BTR_NO_LOCKING_FLAG)
+	    || srv_read_only_mode
+	    || dict_table_is_temporary(index->table)) {
 
 		return(DB_SUCCESS);
 	}
@@ -6987,13 +6730,12 @@ lock_sec_rec_read_check_and_lock(
 		lock_rec_convert_impl_to_expl(block, rec, index, offsets);
 	}
 
-	trx_t* trx __attribute__((unused))= thr_get_trx(thr);
 	lock_mutex_enter();
 
 	ut_ad(mode != LOCK_X
-	      || lock_table_has(trx, index->table, LOCK_IX));
+	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
 	ut_ad(mode != LOCK_S
-	      || lock_table_has(trx, index->table, LOCK_IS));
+	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
 
 	err = lock_rec_lock(FALSE, mode | gap_mode,
 			    block, heap_no, index, thr);
@@ -7014,9 +6756,8 @@ if the query thread should anyway be suspended for some reason; if not, then
 puts the transaction and the query thread to the lock wait state and inserts a
 waiting request for a record lock to the lock queue. Sets the requested mode
 lock on the record.
-@return	DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
+@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
 or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
 dberr_t
 lock_clust_rec_read_check_and_lock(
 /*===============================*/
@@ -7029,7 +6770,7 @@ lock_clust_rec_read_check_and_lock(
 					read cursor */
 	dict_index_t*		index,	/*!< in: clustered index */
 	const ulint*		offsets,/*!< in: rec_get_offsets(rec, index) */
-	enum lock_mode		mode,	/*!< in: mode of the lock which
+	lock_mode		mode,	/*!< in: mode of the lock which
 					the read cursor should set on
 					records: LOCK_S or LOCK_X; the
 					latter is possible in
@@ -7048,28 +6789,28 @@ lock_clust_rec_read_check_and_lock(
 	      || gap_mode == LOCK_REC_NOT_GAP);
 	ut_ad(rec_offs_validate(rec, index, offsets));
 
-	if (flags & BTR_NO_LOCKING_FLAG) {
+	if ((flags & BTR_NO_LOCKING_FLAG)
+	    || srv_read_only_mode
+	    || dict_table_is_temporary(index->table)) {
 
 		return(DB_SUCCESS);
 	}
 
 	heap_no = page_rec_get_heap_no(rec);
 
-	if (UNIV_LIKELY(heap_no != PAGE_HEAP_NO_SUPREMUM)) {
+	if (heap_no != PAGE_HEAP_NO_SUPREMUM) {
 
 		lock_rec_convert_impl_to_expl(block, rec, index, offsets);
 	}
 
 	lock_mutex_enter();
-	trx_t* trx __attribute__((unused))= thr_get_trx(thr);
 
 	ut_ad(mode != LOCK_X
-	      || lock_table_has(trx, index->table, LOCK_IX));
+	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
 	ut_ad(mode != LOCK_S
-	      || lock_table_has(trx, index->table, LOCK_IS));
+	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
 
-	err = lock_rec_lock(FALSE, mode | gap_mode,
-			    block, heap_no, index, thr);
+	err = lock_rec_lock(FALSE, mode | gap_mode, block, heap_no, index, thr);
 
 	MONITOR_INC(MONITOR_NUM_RECLOCK_REQ);
 
@@ -7077,6 +6818,8 @@ lock_clust_rec_read_check_and_lock(
 
 	ut_ad(lock_rec_queue_validate(FALSE, block, rec, index, offsets));
 
+	DEBUG_SYNC_C("after_lock_clust_rec_read_check_and_lock");
+
 	return(err);
 }
 /*********************************************************************//**
@@ -7088,8 +6831,7 @@ waiting request for a record lock to the lock queue. Sets the requested mode
 lock on the record. This is an alternative version of
 lock_clust_rec_read_check_and_lock() that does not require the parameter
 "offsets".
-@return	DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
 dberr_t
 lock_clust_rec_read_check_and_lock_alt(
 /*===================================*/
@@ -7101,7 +6843,7 @@ lock_clust_rec_read_check_and_lock_alt(
 					be read or passed over by a
 					read cursor */
 	dict_index_t*		index,	/*!< in: clustered index */
-	enum lock_mode		mode,	/*!< in: mode of the lock which
+	lock_mode		mode,	/*!< in: mode of the lock which
 					the read cursor should set on
 					records: LOCK_S or LOCK_X; the
 					latter is possible in
@@ -7124,7 +6866,7 @@ lock_clust_rec_read_check_and_lock_alt(
 		mem_heap_free(tmp_heap);
 	}
 
-	if (UNIV_UNLIKELY(err == DB_SUCCESS_LOCKED_REC)) {
+	if (err == DB_SUCCESS_LOCKED_REC) {
 		err = DB_SUCCESS;
 	}
 
@@ -7208,8 +6950,7 @@ lock_release_autoinc_locks(
 /*******************************************************************//**
 Gets the type of a lock. Non-inline version for using outside of the
 lock module.
-@return	LOCK_TABLE or LOCK_REC */
-UNIV_INTERN
+@return LOCK_TABLE or LOCK_REC */
 ulint
 lock_get_type(
 /*==========*/
@@ -7218,36 +6959,21 @@ lock_get_type(
 	return(lock_get_type_low(lock));
 }
 
-/*******************************************************************//**
-Gets the trx of the lock. Non-inline version for using outside of the
-lock module.
-@return	trx_t* */
-UNIV_INTERN
-trx_t*
-lock_get_trx(
-/*=========*/
-	const lock_t*	lock)	/*!< in: lock */
-{
-	return (lock->trx);
-}
-
 /*******************************************************************//**
 Gets the id of the transaction owning a lock.
-@return	transaction id */
-UNIV_INTERN
+@return transaction id */
 trx_id_t
 lock_get_trx_id(
 /*============*/
 	const lock_t*	lock)	/*!< in: lock */
 {
-	return(lock->trx->id);
+	return(trx_get_id_for_print(lock->trx));
 }
 
 /*******************************************************************//**
 Gets the mode of a lock in a human readable string.
 The string should not be free()'d or modified.
-@return	lock mode */
-UNIV_INTERN
+@return lock mode */
 const char*
 lock_get_mode_str(
 /*==============*/
@@ -7293,8 +7019,7 @@ lock_get_mode_str(
 /*******************************************************************//**
 Gets the type of a lock in a human readable string.
 The string should not be free()'d or modified.
-@return	lock type */
-UNIV_INTERN
+@return lock type */
 const char*
 lock_get_type_str(
 /*==============*/
@@ -7312,7 +7037,7 @@ lock_get_type_str(
 
 /*******************************************************************//**
 Gets the table on which the lock is.
-@return	table */
+@return table */
 UNIV_INLINE
 dict_table_t*
 lock_get_table(
@@ -7334,8 +7059,7 @@ lock_get_table(
 
 /*******************************************************************//**
 Gets the id of the table on which the lock is.
-@return	id of the table */
-UNIV_INTERN
+@return id of the table */
 table_id_t
 lock_get_table_id(
 /*==============*/
@@ -7348,27 +7072,19 @@ lock_get_table_id(
 	return(table->id);
 }
 
-/*******************************************************************//**
-Gets the name of the table on which the lock is.
-The string should not be free()'d or modified.
-@return	name of the table */
-UNIV_INTERN
-const char*
+/** Determine which table a lock is associated with.
+@param[in]	lock	the lock
+@return name of the table */
+const table_name_t&
 lock_get_table_name(
-/*================*/
-	const lock_t*	lock)	/*!< in: lock */
+	const lock_t*	lock)
 {
-	dict_table_t*	table;
-
-	table = lock_get_table(lock);
-
-	return(table->name);
+	return(lock_get_table(lock)->name);
 }
 
 /*******************************************************************//**
 For a record lock, gets the index on which the lock is.
-@return	index */
-UNIV_INTERN
+@return index */
 const dict_index_t*
 lock_rec_get_index(
 /*===============*/
@@ -7384,8 +7100,7 @@ lock_rec_get_index(
 /*******************************************************************//**
 For a record lock, gets the name of the index on which the lock is.
 The string should not be free()'d or modified.
-@return	name of the index */
-UNIV_INTERN
+@return name of the index */
 const char*
 lock_rec_get_index_name(
 /*====================*/
@@ -7400,8 +7115,7 @@ lock_rec_get_index_name(
 
 /*******************************************************************//**
 For a record lock, gets the tablespace number on which the lock is.
-@return	tablespace number */
-UNIV_INTERN
+@return tablespace number */
 ulint
 lock_rec_get_space_id(
 /*==================*/
@@ -7414,8 +7128,7 @@ lock_rec_get_space_id(
 
 /*******************************************************************//**
 For a record lock, gets the page number on which the lock is.
-@return	page number */
-UNIV_INTERN
+@return page number */
 ulint
 lock_rec_get_page_no(
 /*=================*/
@@ -7429,7 +7142,6 @@ lock_rec_get_page_no(
 /*********************************************************************//**
 Cancels a waiting lock request and releases possible other transactions
 waiting behind it. */
-UNIV_INTERN
 void
 lock_cancel_waiting_and_release(
 /*============================*/
@@ -7440,7 +7152,7 @@ lock_cancel_waiting_and_release(
 	ut_ad(lock_mutex_own());
 	ut_ad(trx_mutex_own(lock->trx));
 
-	lock->trx->lock.cancel = TRUE;
+	lock->trx->lock.cancel = true;
 
 	if (lock_get_type_low(lock) == LOCK_REC) {
 
@@ -7468,14 +7180,13 @@ lock_cancel_waiting_and_release(
 		lock_wait_release_thread_if_suspended(thr);
 	}
 
-	lock->trx->lock.cancel = FALSE;
+	lock->trx->lock.cancel = false;
 }
 
 /*********************************************************************//**
 Unlocks AUTO_INC type locks that were possibly reserved by a trx. This
 function should be called at the the end of an SQL statement, by the
 connection thread that owns the transaction (trx->mysql_thd). */
-UNIV_INTERN
 void
 lock_unlock_table_autoinc(
 /*======================*/
@@ -7484,9 +7195,12 @@ lock_unlock_table_autoinc(
 	ut_ad(!lock_mutex_own());
 	ut_ad(!trx_mutex_own(trx));
 	ut_ad(!trx->lock.wait_lock);
+
 	/* This can be invoked on NOT_STARTED, ACTIVE, PREPARED,
 	but not COMMITTED transactions. */
+
 	ut_ad(trx_state_eq(trx, TRX_STATE_NOT_STARTED)
+	      || trx_state_eq(trx, TRX_STATE_FORCED_ROLLBACK)
 	      || !trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY));
 
 	/* This function is invoked for a running transaction by the
@@ -7506,30 +7220,42 @@ lock_unlock_table_autoinc(
 Releases a transaction's locks, and releases possible other transactions
 waiting because of these locks. Change the state of the transaction to
 TRX_STATE_COMMITTED_IN_MEMORY. */
-UNIV_INTERN
 void
 lock_trx_release_locks(
 /*===================*/
 	trx_t*	trx)	/*!< in/out: transaction */
 {
-	assert_trx_in_list(trx);
+	check_trx_state(trx);
 
 	if (trx_state_eq(trx, TRX_STATE_PREPARED)) {
+
 		mutex_enter(&trx_sys->mutex);
+
 		ut_a(trx_sys->n_prepared_trx > 0);
-		trx_sys->n_prepared_trx--;
+		--trx_sys->n_prepared_trx;
+
 		if (trx->is_recovered) {
 			ut_a(trx_sys->n_prepared_recovered_trx > 0);
 			trx_sys->n_prepared_recovered_trx--;
 		}
+
 		mutex_exit(&trx_sys->mutex);
 	} else {
 		ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
 	}
 
-	/* The transition of trx->state to TRX_STATE_COMMITTED_IN_MEMORY
-	is protected by both the lock_sys->mutex and the trx->mutex. */
-	lock_mutex_enter();
+	bool	release_lock;
+
+	release_lock = (UT_LIST_GET_LEN(trx->lock.trx_locks) > 0);
+
+	/* Don't take lock_sys mutex if trx didn't acquire any lock. */
+	if (release_lock) {
+
+		/* The transition of trx->state to TRX_STATE_COMMITTED_IN_MEMORY
+		is protected by both the lock_sys->mutex and the trx->mutex. */
+		lock_mutex_enter();
+	}
+
 	trx_mutex_enter(trx);
 
 	/* The following assignment makes the transaction committed in memory
@@ -7550,6 +7276,34 @@ lock_trx_release_locks(
 	trx->state = TRX_STATE_COMMITTED_IN_MEMORY;
 	/*--------------------------------------*/
 
+	if (trx_is_referenced(trx)) {
+
+		ut_a(release_lock);
+
+		lock_mutex_exit();
+
+		while (trx_is_referenced(trx)) {
+
+			trx_mutex_exit(trx);
+
+			DEBUG_SYNC_C("waiting_trx_is_not_referenced");
+
+			/** Doing an implicit to explicit conversion
+			should not be expensive. */
+			ut_delay(ut_rnd_interval(0, srv_spin_wait_delay));
+
+			trx_mutex_enter(trx);
+		}
+
+		trx_mutex_exit(trx);
+
+		lock_mutex_enter();
+
+		trx_mutex_enter(trx);
+	}
+
+	ut_ad(!trx_is_referenced(trx));
+
 	/* If the background thread trx_rollback_or_clean_recovered()
 	is still active then there is a chance that the rollback
 	thread may see this trx as COMMITTED_IN_MEMORY and goes ahead
@@ -7561,13 +7315,30 @@ lock_trx_release_locks(
 	background thread. To avoid this race we unconditionally unset
 	the is_recovered flag. */
 
-	trx->is_recovered = FALSE;
+	trx->is_recovered = false;
 
 	trx_mutex_exit(trx);
 
-	lock_release(trx);
+	if (release_lock) {
 
-	lock_mutex_exit();
+		lock_release(trx);
+
+		lock_mutex_exit();
+	}
+
+	trx->lock.n_rec_locks = 0;
+
+	/* We don't remove the locks one by one from the vector for
+	efficiency reasons. We simply reset it because we would have
+	released all the locks anyway. */
+
+	trx->lock.table_locks.clear();
+
+	ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
+	ut_a(ib_vector_is_empty(trx->autoinc_locks));
+	ut_a(trx->lock.table_locks.empty());
+
+	mem_heap_empty(trx->lock.lock_heap);
 }
 
 /*********************************************************************//**
@@ -7575,30 +7346,69 @@ Check whether the transaction has already been rolled back because it
 was selected as a deadlock victim, or if it has to wait then cancel
 the wait lock.
 @return DB_DEADLOCK, DB_LOCK_WAIT or DB_SUCCESS */
-UNIV_INTERN
 dberr_t
 lock_trx_handle_wait(
 /*=================*/
-	trx_t*	trx)	/*!< in/out: trx lock state */
+	trx_t*	trx,	/*!< in/out: trx lock state */
+	bool	lock_mutex_taken,
+	bool	trx_mutex_taken)
 {
-	dberr_t	err;
+	dberr_t	err=DB_SUCCESS;
+	bool take_lock_mutex = false;
+	bool take_trx_mutex = false;
 
-	lock_mutex_enter();
+	if (!lock_mutex_taken) {
+		ut_ad(!lock_mutex_own());
+		lock_mutex_enter();
+		take_lock_mutex = true;
+	}
 
-	trx_mutex_enter(trx);
+	if (!trx_mutex_taken) {
+		ut_ad(!trx_mutex_own(trx));
+		trx_mutex_enter(trx);
+		take_trx_mutex = true;
+	}
 
 	if (trx->lock.was_chosen_as_deadlock_victim) {
 		err = DB_DEADLOCK;
 	} else if (trx->lock.wait_lock != NULL) {
+		bool take_wait_trx_mutex = false;
+		trx_t* wait_trx = trx->lock.wait_lock->trx;
+
+		/* We take trx mutex for waiting trx if we have not yet
+		already taken it or we know that waiting trx and parameter
+		trx are not same and we are not already holding trx mutex. */
+		if ((wait_trx && wait_trx == trx && !take_trx_mutex && !trx_mutex_taken) ||
+		    (wait_trx && wait_trx != trx && wait_trx->abort_type == TRX_SERVER_ABORT)) {
+			ut_ad(!trx_mutex_own(wait_trx));
+			trx_mutex_enter(wait_trx);
+			take_wait_trx_mutex = true;
+		}
+
+		ut_ad(trx_mutex_own(wait_trx));
+
 		lock_cancel_waiting_and_release(trx->lock.wait_lock);
+
+		if (wait_trx && take_wait_trx_mutex) {
+			ut_ad(trx_mutex_own(wait_trx));
+			trx_mutex_exit(wait_trx);
+		}
+
 		err = DB_LOCK_WAIT;
 	} else {
 		/* The lock was probably granted before we got here. */
 		err = DB_SUCCESS;
 	}
 
-	lock_mutex_exit();
-	trx_mutex_exit(trx);
+	if (take_lock_mutex) {
+		ut_ad(lock_mutex_own());
+		lock_mutex_exit();
+	}
+
+	if (take_trx_mutex) {
+		ut_ad(trx_mutex_own(trx));
+		trx_mutex_exit(trx);
+	}
 
 	return(err);
 }
@@ -7606,7 +7416,6 @@ lock_trx_handle_wait(
 /*********************************************************************//**
 Get the number of locks on a table.
 @return number of locks */
-UNIV_INTERN
 ulint
 lock_table_get_n_locks(
 /*===================*/
@@ -7626,7 +7435,7 @@ lock_table_get_n_locks(
 #ifdef UNIV_DEBUG
 /*******************************************************************//**
 Do an exhaustive check for any locks (table or rec) against the table.
-@return	lock if found */
+@return lock if found */
 static
 const lock_t*
 lock_table_locks_lookup(
@@ -7635,16 +7444,13 @@ lock_table_locks_lookup(
 						any locks held on records in
 						this table or on the table
 						itself */
-	const trx_list_t*	trx_list)	/*!< in: trx list to check */
+	const trx_ut_list_t*	trx_list)	/*!< in: trx list to check */
 {
 	trx_t*			trx;
 
 	ut_a(table != NULL);
 	ut_ad(lock_mutex_own());
-	ut_ad(mutex_own(&trx_sys->mutex));
-
-	ut_ad(trx_list == &trx_sys->rw_trx_list
-	      || trx_list == &trx_sys->ro_trx_list);
+	ut_ad(trx_sys_mutex_own());
 
 	for (trx = UT_LIST_GET_FIRST(*trx_list);
 	     trx != NULL;
@@ -7652,8 +7458,7 @@ lock_table_locks_lookup(
 
 		const lock_t*	lock;
 
-		assert_trx_in_list(trx);
-		ut_ad(trx->read_only == (trx_list == &trx_sys->ro_trx_list));
+		check_trx_state(trx);
 
 		for (lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
 		     lock != NULL;
@@ -7679,9 +7484,8 @@ lock_table_locks_lookup(
 
 /*******************************************************************//**
 Check if there are any locks (table or rec) against table.
-@return	TRUE if table has either table or record locks. */
-UNIV_INTERN
-ibool
+@return true if table has either table or record locks. */
+bool
 lock_table_has_locks(
 /*=================*/
 	const dict_table_t*	table)	/*!< in: check if there are any locks
@@ -7699,7 +7503,6 @@ lock_table_has_locks(
 		mutex_enter(&trx_sys->mutex);
 
 		ut_ad(!lock_table_locks_lookup(table, &trx_sys->rw_trx_list));
-		ut_ad(!lock_table_locks_lookup(table, &trx_sys->ro_trx_list));
 
 		mutex_exit(&trx_sys->mutex);
 	}
@@ -7710,29 +7513,59 @@ lock_table_has_locks(
 	return(has_locks);
 }
 
+/*******************************************************************//**
+Initialise the table lock list. */
+void
+lock_table_lock_list_init(
+/*======================*/
+	table_lock_list_t*	lock_list)	/*!< List to initialise */
+{
+	UT_LIST_INIT(*lock_list, &lock_table_t::locks);
+}
+
+/*******************************************************************//**
+Initialise the trx lock list. */
+void
+lock_trx_lock_list_init(
+/*====================*/
+	trx_lock_list_t*	lock_list)	/*!< List to initialise */
+{
+	UT_LIST_INIT(*lock_list, &lock_t::trx_locks);
+}
+
+/*******************************************************************//**
+Set the lock system timeout event. */
+void
+lock_set_timeout_event()
+/*====================*/
+{
+	os_event_set(lock_sys->timeout_event);
+}
+
 #ifdef UNIV_DEBUG
 /*******************************************************************//**
 Check if the transaction holds any locks on the sys tables
 or its records.
-@return	the strongest lock found on any sys table or 0 for none */
-UNIV_INTERN
+@return the strongest lock found on any sys table or 0 for none */
 const lock_t*
 lock_trx_has_sys_table_locks(
 /*=========================*/
 	const trx_t*	trx)	/*!< in: transaction to check */
 {
-	lint		i;
 	const lock_t*	strongest_lock = 0;
 	lock_mode	strongest = LOCK_NONE;
 
 	lock_mutex_enter();
 
-	/* Find a valid mode. Note: ib_vector_size() can be 0. */
-	for (i = ib_vector_size(trx->lock.table_locks) - 1; i >= 0; --i) {
-		const lock_t*	lock;
+	typedef lock_pool_t::const_reverse_iterator iterator;
 
-		lock = *static_cast<const lock_t**>(
-			ib_vector_get(trx->lock.table_locks, i));
+	iterator	end = trx->lock.table_locks.rend();
+	iterator	it = trx->lock.table_locks.rbegin();
+
+	/* Find a valid mode. Note: ib_vector_size() can be 0. */
+
+	for (/* No op */; it != end; ++it) {
+		const lock_t*	lock = *it;
 
 		if (lock != NULL
 		    && dict_is_sys_table(lock->un_member.tab_lock.table->id)) {
@@ -7749,11 +7582,8 @@ lock_trx_has_sys_table_locks(
 		return(NULL);
 	}
 
-	for (/* No op */; i >= 0; --i) {
-		const lock_t*	lock;
-
-		lock = *static_cast<const lock_t**>(
-			ib_vector_get(trx->lock.table_locks, i));
+	for (/* No op */; it != end; ++it) {
+		const lock_t*	lock = *it;
 
 		if (lock == NULL) {
 			continue;
@@ -7780,8 +7610,7 @@ lock_trx_has_sys_table_locks(
 
 /*******************************************************************//**
 Check if the transaction holds an exclusive lock on a record.
-@return	whether the locks are held */
-UNIV_INTERN
+@return whether the locks are held */
 bool
 lock_trx_has_rec_x_lock(
 /*====================*/
@@ -7793,39 +7622,653 @@ lock_trx_has_rec_x_lock(
 	ut_ad(heap_no > PAGE_HEAP_NO_SUPREMUM);
 
 	lock_mutex_enter();
-	ut_a(lock_table_has(trx, table, LOCK_IX));
+	ut_a(lock_table_has(trx, table, LOCK_IX)
+	     || dict_table_is_temporary(table));
 	ut_a(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP,
-			       block, heap_no, trx));
+			       block, heap_no, trx)
+	     || dict_table_is_temporary(table));
 	lock_mutex_exit();
 	return(true);
 }
 #endif /* UNIV_DEBUG */
 
-/*******************************************************************//**
-Get lock mode and table/index name
-@return	string containing lock info */
-std::string
-lock_get_info(
-	const lock_t* lock)
+/** rewind(3) the file used for storing the latest detected deadlock and
+print a heading message to stderr if printing of all deadlocks to stderr
+is enabled. */
+void
+DeadlockChecker::start_print()
 {
-	std::string info;
-	std::string mode("mode ");
-	std::string index("index ");
-	std::string table("table ");
-	std::string n_uniq(" n_uniq");
-	std::string n_user(" n_user");
-	std::string lock_mode((lock_get_mode_str(lock)));
-	std::string iname(lock->index->name);
-	std::string tname(lock->index->table_name);
+	ut_ad(lock_mutex_own());
 
-#define SSTR( x ) reinterpret_cast< std::ostringstream & >(	\
-        ( std::ostringstream() << std::dec << x ) ).str()
+	rewind(lock_latest_err_file);
+	ut_print_timestamp(lock_latest_err_file);
 
-	info = mode + lock_mode
-		+ index + iname
-		+ table + tname
-		+ n_uniq + SSTR(lock->index->n_uniq)
-		+ n_user + SSTR(lock->index->n_user_defined_cols);
-
-	return info;
+	if (srv_print_all_deadlocks) {
+		ib::info() << "Transactions deadlock detected, dumping"
+			<< " detailed information.";
+	}
+}
+
+/** Print a message to the deadlock file and possibly to stderr.
+@param msg message to print */
+void
+DeadlockChecker::print(const char* msg)
+{
+	fputs(msg, lock_latest_err_file);
+
+	if (srv_print_all_deadlocks) {
+		ib::info() << msg;
+	}
+}
+
+/** Print transaction data to the deadlock file and possibly to stderr.
+@param trx transaction
+@param max_query_len max query length to print */
+void
+DeadlockChecker::print(const trx_t* trx, ulint max_query_len)
+{
+	ut_ad(lock_mutex_own());
+
+	ulint	n_rec_locks = lock_number_of_rows_locked(&trx->lock);
+	ulint	n_trx_locks = UT_LIST_GET_LEN(trx->lock.trx_locks);
+	ulint	heap_size = mem_heap_get_size(trx->lock.lock_heap);
+
+	mutex_enter(&trx_sys->mutex);
+
+	trx_print_low(lock_latest_err_file, trx, max_query_len,
+		      n_rec_locks, n_trx_locks, heap_size);
+
+	if (srv_print_all_deadlocks) {
+		trx_print_low(stderr, trx, max_query_len,
+			      n_rec_locks, n_trx_locks, heap_size);
+	}
+
+	mutex_exit(&trx_sys->mutex);
+}
+
+/** Print lock data to the deadlock file and possibly to stderr.
+@param lock record or table type lock */
+void
+DeadlockChecker::print(const lock_t* lock)
+{
+	ut_ad(lock_mutex_own());
+
+	if (lock_get_type_low(lock) == LOCK_REC) {
+		lock_rec_print(lock_latest_err_file, lock);
+
+		if (srv_print_all_deadlocks) {
+			lock_rec_print(stderr, lock);
+		}
+	} else {
+		lock_table_print(lock_latest_err_file, lock);
+
+		if (srv_print_all_deadlocks) {
+			lock_table_print(stderr, lock);
+		}
+	}
+}
+
+/** Get the next lock in the queue that is owned by a transaction whose
+sub-tree has not already been searched.
+Note: "next" here means PREV for table locks.
+
+@param lock Lock in queue
+@param heap_no heap_no if lock is a record lock else ULINT_UNDEFINED
+
+@return next lock or NULL if at end of queue */
+const lock_t*
+DeadlockChecker::get_next_lock(const lock_t* lock, ulint heap_no) const
+{
+	ut_ad(lock_mutex_own());
+
+	do {
+		if (lock_get_type_low(lock) == LOCK_REC) {
+			ut_ad(heap_no != ULINT_UNDEFINED);
+			lock = lock_rec_get_next_const(heap_no, lock);
+		} else {
+			ut_ad(heap_no == ULINT_UNDEFINED);
+			ut_ad(lock_get_type_low(lock) == LOCK_TABLE);
+
+			lock = UT_LIST_GET_NEXT(
+				un_member.tab_lock.locks, lock);
+		}
+
+	} while (lock != NULL && is_visited(lock));
+
+	ut_ad(lock == NULL
+	      || lock_get_type_low(lock) == lock_get_type_low(m_wait_lock));
+
+	return(lock);
+}
+
+/** Get the first lock to search. The search starts from the current
+wait_lock. What we are really interested in is an edge from the
+current wait_lock's owning transaction to another transaction that has
+a lock ahead in the queue. We skip locks where the owning transaction's
+sub-tree has already been searched.
+
+Note: The record locks are traversed from the oldest lock to the
+latest. For table locks we go from latest to oldest.
+
+For record locks, we first position the "iterator" on the first lock on
+the page and then reposition on the actual heap_no. This is required
+due to the way the record lock has is implemented.
+
+@param[out] heap_no if rec lock, else ULINT_UNDEFINED.
+@return first lock or NULL */
+const lock_t*
+DeadlockChecker::get_first_lock(ulint* heap_no) const
+{
+	ut_ad(lock_mutex_own());
+
+	const lock_t*	lock = m_wait_lock;
+
+	if (lock_get_type_low(lock) == LOCK_REC) {
+		hash_table_t*	lock_hash;
+
+		lock_hash = lock->type_mode & LOCK_PREDICATE
+			? lock_sys->prdt_hash
+			: lock_sys->rec_hash;
+
+		/* We are only interested in records that match the heap_no. */
+		*heap_no = lock_rec_find_set_bit(lock);
+
+		ut_ad(*heap_no <= 0xffff);
+		ut_ad(*heap_no != ULINT_UNDEFINED);
+
+		/* Find the locks on the page. */
+		lock = lock_rec_get_first_on_page_addr(
+			lock_hash,
+			lock->un_member.rec_lock.space,
+			lock->un_member.rec_lock.page_no);
+
+		/* Position on the first lock on the physical record.*/
+		if (!lock_rec_get_nth_bit(lock, *heap_no)) {
+			lock = lock_rec_get_next_const(*heap_no, lock);
+		}
+
+		ut_a(!lock_get_wait(lock));
+	} else {
+		/* Table locks don't care about the heap_no. */
+		*heap_no = ULINT_UNDEFINED;
+		ut_ad(lock_get_type_low(lock) == LOCK_TABLE);
+		dict_table_t*	table = lock->un_member.tab_lock.table;
+		lock = UT_LIST_GET_FIRST(table->locks);
+	}
+
+	/* Must find at least two locks, otherwise there cannot be a
+	waiting lock, secondly the first lock cannot be the wait_lock. */
+	ut_a(lock != NULL);
+	ut_a(lock != m_wait_lock);
+
+	/* Check that the lock type doesn't change. */
+	ut_ad(lock_get_type_low(lock) == lock_get_type_low(m_wait_lock));
+
+	return(lock);
+}
+
+/** Notify that a deadlock has been detected and print the conflicting
+transaction info.
+@param lock lock causing deadlock */
+void
+DeadlockChecker::notify(const lock_t* lock) const
+{
+	ut_ad(lock_mutex_own());
+
+	start_print();
+
+	print("\n*** (1) TRANSACTION:\n");
+
+	print(m_wait_lock->trx, 3000);
+
+	print("*** (1) WAITING FOR THIS LOCK TO BE GRANTED:\n");
+
+	print(m_wait_lock);
+
+	print("*** (2) TRANSACTION:\n");
+
+	print(lock->trx, 3000);
+
+	print("*** (2) HOLDS THE LOCK(S):\n");
+
+	print(lock);
+
+	/* It is possible that the joining transaction was granted its
+	lock when we rolled back some other waiting transaction. */
+
+	if (m_start->lock.wait_lock != 0) {
+		print("*** (2) WAITING FOR THIS LOCK TO BE GRANTED:\n");
+
+		print(m_start->lock.wait_lock);
+	}
+
+	DBUG_PRINT("ib_lock", ("deadlock detected"));
+}
+
+/** Select the victim transaction that should be rolledback.
+@return victim transaction */
+const trx_t*
+DeadlockChecker::select_victim() const
+{
+	ut_ad(lock_mutex_own());
+	ut_ad(m_start->lock.wait_lock != 0);
+	ut_ad(m_wait_lock->trx != m_start);
+
+	if (thd_trx_priority(m_start->mysql_thd) > 0
+	    || thd_trx_priority(m_wait_lock->trx->mysql_thd) > 0) {
+
+		const trx_t*	victim;
+
+		victim = trx_arbitrate(m_start, m_wait_lock->trx);
+
+		if (victim != NULL) {
+
+			return(victim);
+		}
+	}
+
+	if (trx_weight_ge(m_wait_lock->trx, m_start)) {
+
+		/* The joining transaction is 'smaller',
+		choose it as the victim and roll it back. */
+
+#ifdef WITH_WSREP
+		if (wsrep_thd_is_BF(m_start->mysql_thd, TRUE)) {
+			return(m_wait_lock->trx);
+		} else {
+#endif /* WITH_WSREP */
+			return(m_start);
+#ifdef WITH_WSREP
+		}
+#endif
+	}
+
+#ifdef WITH_WSREP
+	if (wsrep_thd_is_BF(m_wait_lock->trx->mysql_thd, TRUE)) {
+		return(m_start);
+	} else {
+#endif /* WITH_WSREP */
+		return(m_wait_lock->trx);
+#ifdef WITH_WSREP
+	}
+#endif
+}
+
+/** Looks iteratively for a deadlock. Note: the joining transaction may
+have been granted its lock by the deadlock checks.
+@return 0 if no deadlock else the victim transaction instance.*/
+const trx_t*
+DeadlockChecker::search()
+{
+	ut_ad(lock_mutex_own());
+	ut_ad(!trx_mutex_own(m_start));
+
+	ut_ad(m_start != NULL);
+	ut_ad(m_wait_lock != NULL);
+	check_trx_state(m_wait_lock->trx);
+	ut_ad(m_mark_start <= s_lock_mark_counter);
+
+	/* Look at the locks ahead of wait_lock in the lock queue. */
+	ulint		heap_no;
+	const lock_t*	lock = get_first_lock(&heap_no);
+
+	for (;;) {
+
+		/* We should never visit the same sub-tree more than once. */
+		ut_ad(lock == NULL || !is_visited(lock));
+
+		while (m_n_elems > 0 && lock == NULL) {
+
+			/* Restore previous search state. */
+
+			pop(lock, heap_no);
+
+			lock = get_next_lock(lock, heap_no);
+		}
+
+		if (lock == NULL) {
+			break;
+		} else if (lock == m_wait_lock) {
+
+			/* We can mark this subtree as searched */
+			ut_ad(lock->trx->lock.deadlock_mark <= m_mark_start);
+
+			lock->trx->lock.deadlock_mark = ++s_lock_mark_counter;
+
+			/* We are not prepared for an overflow. This 64-bit
+			counter should never wrap around. At 10^9 increments
+			per second, it would take 10^3 years of uptime. */
+
+			ut_ad(s_lock_mark_counter > 0);
+
+			/* Backtrack */
+			lock = NULL;
+
+		} else if (!lock_has_to_wait(m_wait_lock, lock)) {
+
+			/* No conflict, next lock */
+			lock = get_next_lock(lock, heap_no);
+
+		} else if (lock->trx == m_start) {
+
+			/* Found a cycle. */
+
+			notify(lock);
+
+			return(select_victim());
+
+		} else if (is_too_deep()) {
+
+			/* Search too deep to continue. */
+			m_too_deep = true;
+			return(m_start);
+
+		} else {
+			/* We do not need to report autoinc locks to the upper
+			layer. These locks are released before commit, so they
+			can not cause deadlocks with binlog-fixed commit
+			order. */
+			if (m_waitee_ptr &&
+			    (lock_get_type_low(lock) != LOCK_TABLE ||
+			     lock_get_mode(lock) != LOCK_AUTO_INC)) {
+				if (m_waitee_ptr->used ==
+				    sizeof(m_waitee_ptr->waitees) /
+				    sizeof(m_waitee_ptr->waitees[0])) {
+					m_waitee_ptr->next =
+						(struct thd_wait_reports *)
+						ut_malloc_nokey(sizeof(*m_waitee_ptr));
+					m_waitee_ptr = m_waitee_ptr->next;
+
+					if (!m_waitee_ptr) {
+						m_too_deep = true;
+						return (m_start);
+					}
+
+					m_waitee_ptr->next = NULL;
+					m_waitee_ptr->used = 0;
+				}
+
+				m_waitee_ptr->waitees[m_waitee_ptr->used++] = lock->trx;
+			}
+
+			if (lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
+
+				/* Another trx ahead has requested a lock in an
+				incompatible mode, and is itself waiting for a lock. */
+
+				++m_cost;
+
+				if (!push(lock, heap_no)) {
+					m_too_deep = true;
+					return(m_start);
+				}
+
+
+				m_wait_lock = lock->trx->lock.wait_lock;
+
+				lock = get_first_lock(&heap_no);
+
+				if (is_visited(lock)) {
+					lock = get_next_lock(lock, heap_no);
+				}
+
+			} else {
+				lock = get_next_lock(lock, heap_no);
+			}
+		}
+	}
+
+	ut_a(lock == NULL && m_n_elems == 0);
+
+	/* No deadlock found. */
+	return(0);
+}
+
+/** Print info about transaction that was rolled back.
+@param trx transaction rolled back
+@param lock lock trx wants */
+void
+DeadlockChecker::rollback_print(const trx_t*	trx, const lock_t* lock)
+{
+	ut_ad(lock_mutex_own());
+
+	/* If the lock search exceeds the max step
+	or the max depth, the current trx will be
+	the victim. Print its information. */
+	start_print();
+
+	print("TOO DEEP OR LONG SEARCH IN THE LOCK TABLE"
+	      " WAITS-FOR GRAPH, WE WILL ROLL BACK"
+	      " FOLLOWING TRANSACTION \n\n"
+	      "*** TRANSACTION:\n");
+
+	print(trx, 3000);
+
+	print("*** WAITING FOR THIS LOCK TO BE GRANTED:\n");
+
+	print(lock);
+}
+
+/** Rollback transaction selected as the victim. */
+void
+DeadlockChecker::trx_rollback()
+{
+	ut_ad(lock_mutex_own());
+
+	trx_t*	trx = m_wait_lock->trx;
+
+	print("*** WE ROLL BACK TRANSACTION (1)\n");
+
+	trx_mutex_enter(trx);
+
+	trx->lock.was_chosen_as_deadlock_victim = true;
+
+	lock_cancel_waiting_and_release(trx->lock.wait_lock);
+
+	trx_mutex_exit(trx);
+}
+
+static
+void
+lock_report_waiters_to_mysql(
+/*=======================*/
+	struct thd_wait_reports*	waitee_buf_ptr,	/*!< in: set of trxs */
+	THD*				mysql_thd,	/*!< in: THD */
+	const trx_t*			victim_trx)	/*!< in: Trx selected
+							as deadlock victim, if
+							any */
+{
+	struct thd_wait_reports*	p;
+	struct thd_wait_reports*	q;
+	ulint				i;
+
+	p = waitee_buf_ptr;
+	while (p) {
+		i = 0;
+		while (i < p->used) {
+			trx_t *w_trx = p->waitees[i];
+			/*  There is no need to report waits to a trx already
+			selected as a victim. */
+			if (w_trx != victim_trx) {
+				/* If thd_report_wait_for() decides to kill the
+				transaction, then we will get a call back into
+				innobase_kill_query. We mark this by setting
+				current_lock_mutex_owner, so we can avoid trying
+				to recursively take lock_sys->mutex. */
+				w_trx->abort_type = TRX_REPLICATION_ABORT;
+				thd_report_wait_for(mysql_thd, w_trx->mysql_thd);
+				w_trx->abort_type = TRX_SERVER_ABORT;
+			}
+			++i;
+		}
+		q = p->next;
+		if (p != waitee_buf_ptr) {
+			ut_free(p);
+		}
+		p = q;
+	}
+}
+
+/** Checks if a joining lock request results in a deadlock. If a deadlock is
+found this function will resolve the deadlock by choosing a victim transaction
+and rolling it back. It will attempt to resolve all deadlocks. The returned
+transaction id will be the joining transaction instance or NULL if some other
+transaction was chosen as a victim and rolled back or no deadlock found.
+
+@param lock lock the transaction is requesting
+@param trx transaction requesting the lock
+
+@return transaction instanace chosen as victim or 0 */
+const trx_t*
+DeadlockChecker::check_and_resolve(const lock_t* lock, const trx_t* trx)
+{
+	ut_ad(lock_mutex_own());
+	check_trx_state(trx);
+	ut_ad(!srv_read_only_mode);
+
+	const trx_t*	victim_trx;
+	struct thd_wait_reports	waitee_buf;
+	struct thd_wait_reports*waitee_buf_ptr;
+	THD*			start_mysql_thd;
+
+	start_mysql_thd = trx->mysql_thd;
+
+	if (start_mysql_thd && thd_need_wait_for(start_mysql_thd)) {
+		waitee_buf_ptr = &waitee_buf;
+	} else {
+		waitee_buf_ptr = NULL;
+	}
+
+	/* Try and resolve as many deadlocks as possible. */
+	do {
+		if (waitee_buf_ptr) {
+			waitee_buf_ptr->next = NULL;
+			waitee_buf_ptr->used = 0;
+		}
+
+		DeadlockChecker	checker(trx, lock, s_lock_mark_counter, waitee_buf_ptr);
+
+		victim_trx = checker.search();
+
+		/* Report waits to upper layer, as needed. */
+		if (waitee_buf_ptr) {
+			lock_report_waiters_to_mysql(waitee_buf_ptr,
+						     start_mysql_thd,
+						     victim_trx);
+		}
+
+		/* Search too deep, we rollback the joining transaction only
+		if it is possible to rollback. Otherwise we rollback the
+		transaction that is holding the lock that the joining
+		transaction wants. */
+		if (checker.is_too_deep()) {
+
+			ut_ad(trx == checker.m_start);
+			ut_ad(trx == victim_trx);
+
+#ifdef WITH_WSREP
+			if (!wsrep_thd_is_BF(victim_trx->mysql_thd, TRUE))
+			{
+#endif /* WITH_WSREP */
+				rollback_print(victim_trx, lock);
+#ifdef WITH_WSREP
+			} else {
+			  /* BF processor */;
+			}
+#endif /* WITH_WSREP */
+
+			MONITOR_INC(MONITOR_DEADLOCK);
+
+			break;
+
+		} else if (victim_trx != 0 && victim_trx != trx) {
+
+			ut_ad(victim_trx == checker.m_wait_lock->trx);
+
+			checker.trx_rollback();
+
+			lock_deadlock_found = true;
+
+			MONITOR_INC(MONITOR_DEADLOCK);
+		}
+
+	} while (victim_trx != NULL && victim_trx != trx);
+
+	/* If the joining transaction was selected as the victim. */
+	if (victim_trx != NULL) {
+
+		print("*** WE ROLL BACK TRANSACTION (2)\n");
+
+		lock_deadlock_found = true;
+	}
+
+	return(victim_trx);
+}
+
+/**
+Allocate cached locks for the transaction.
+@param trx		allocate cached record locks for this transaction */
+void
+lock_trx_alloc_locks(trx_t* trx)
+{
+	ulint	sz = REC_LOCK_SIZE * REC_LOCK_CACHE;
+	byte*	ptr = reinterpret_cast<byte*>(ut_malloc_nokey(sz));
+
+	/* We allocate one big chunk and then distribute it among
+	the rest of the elements. The allocated chunk pointer is always
+	at index 0. */
+
+	for (ulint i = 0; i < REC_LOCK_CACHE; ++i, ptr += REC_LOCK_SIZE) {
+		trx->lock.rec_pool.push_back(
+			reinterpret_cast<ib_lock_t*>(ptr));
+	}
+
+	sz = TABLE_LOCK_SIZE * TABLE_LOCK_CACHE;
+	ptr = reinterpret_cast<byte*>(ut_malloc_nokey(sz));
+
+	for (ulint i = 0; i < TABLE_LOCK_CACHE; ++i, ptr += TABLE_LOCK_SIZE) {
+		trx->lock.table_pool.push_back(
+			reinterpret_cast<ib_lock_t*>(ptr));
+	}
+
+}
+/*************************************************************//**
+Updates the lock table when a page is split and merged to
+two pages. */
+UNIV_INTERN
+void
+lock_update_split_and_merge(
+	const buf_block_t* left_block,	/*!< in: left page to which merged */
+	const rec_t* orig_pred,		/*!< in: original predecessor of
+					supremum on the left page before merge*/
+	const buf_block_t* right_block)	/*!< in: right page from which merged */
+{
+	const rec_t* left_next_rec;
+
+	ut_a(left_block && right_block);
+	ut_a(orig_pred);
+
+	lock_mutex_enter();
+
+	left_next_rec = page_rec_get_next_const(orig_pred);
+
+	/* Inherit the locks on the supremum of the left page to the
+	first record which was moved from the right page */
+	lock_rec_inherit_to_gap(
+		left_block, left_block,
+		page_rec_get_heap_no(left_next_rec),
+		PAGE_HEAP_NO_SUPREMUM);
+
+	/* Reset the locks on the supremum of the left page,
+	releasing waiting transactions */
+	lock_rec_reset_and_release_wait(left_block,
+					PAGE_HEAP_NO_SUPREMUM);
+
+	/* Inherit the locks to the supremum of the left page from the
+	successor of the infimum on the right page */
+	lock_rec_inherit_to_gap(left_block, right_block,
+				PAGE_HEAP_NO_SUPREMUM,
+				lock_get_min_heap_no(right_block));
+
+	lock_mutex_exit();
 }
diff --git a/storage/innobase/lock/lock0prdt.cc b/storage/innobase/lock/lock0prdt.cc
new file mode 100644
index 00000000000..d26ae0f91e4
--- /dev/null
+++ b/storage/innobase/lock/lock0prdt.cc
@@ -0,0 +1,1057 @@
+/*****************************************************************************
+
+Copyright (c) 2014, 2016, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file lock/lock0prdt.cc
+The transaction lock system
+
+Created 9/7/2013 Jimmy Yang
+*******************************************************/
+
+#define LOCK_MODULE_IMPLEMENTATION
+
+#include "lock0lock.h"
+#include "lock0priv.h"
+#include "lock0prdt.h"
+
+#ifdef UNIV_NONINL
+#include "lock0lock.ic"
+#include "lock0priv.ic"
+#include "lock0prdt.ic"
+#endif
+
+#include "ha_prototypes.h"
+#include "usr0sess.h"
+#include "trx0purge.h"
+#include "dict0mem.h"
+#include "dict0boot.h"
+#include "trx0sys.h"
+#include "srv0mon.h"
+#include "ut0vec.h"
+#include "btr0btr.h"
+#include "dict0boot.h"
+#include <set>
+
+/*********************************************************************//**
+Get a minimum bounding box from a Predicate
+@return	the minimum bounding box */
+UNIV_INLINE
+rtr_mbr_t*
+prdt_get_mbr_from_prdt(
+/*===================*/
+	const lock_prdt_t*	prdt)	/*!< in: the lock predicate */
+{
+	rtr_mbr_t*	mbr_loc = reinterpret_cast<rtr_mbr_t*>(prdt->data);
+
+	return(mbr_loc);
+}
+
+/*********************************************************************//**
+Get a predicate from a lock
+@return	the predicate */
+lock_prdt_t*
+lock_get_prdt_from_lock(
+/*====================*/
+	const lock_t*	lock)	/*!< in: the lock */
+{
+	lock_prdt_t*	prdt = reinterpret_cast<lock_prdt_t*>(
+				&((reinterpret_cast<byte*>(
+					const_cast<lock_t*>(&lock[1])))[
+						UNIV_WORD_SIZE]));
+
+	return(prdt);
+}
+
+/*********************************************************************//**
+Get a minimum bounding box directly from a lock
+@return	the minimum bounding box*/
+UNIV_INLINE
+rtr_mbr_t*
+lock_prdt_get_mbr_from_lock(
+/*========================*/
+	const lock_t*	lock)	/*!< in: the lock */
+{
+	ut_ad(lock->type_mode & LOCK_PREDICATE);
+
+	lock_prdt_t*	prdt = lock_get_prdt_from_lock(lock);
+
+	rtr_mbr_t*	mbr_loc = prdt_get_mbr_from_prdt(prdt);
+
+	return(mbr_loc);
+}
+
+/*********************************************************************//**
+Append a predicate to the lock */
+void
+lock_prdt_set_prdt(
+/*===============*/
+	lock_t*			lock,	/*!< in: lock */
+	const lock_prdt_t*	prdt)	/*!< in: Predicate */
+{
+	ut_ad(lock->type_mode & LOCK_PREDICATE);
+
+	memcpy(&(((byte*) &lock[1])[UNIV_WORD_SIZE]), prdt, sizeof *prdt);
+}
+
+/*********************************************************************//**
+Checks if a predicate lock request for a new lock has to wait for
+another lock.
+@return	true if new lock has to wait for lock2 to be released */
+bool
+lock_prdt_has_to_wait(
+/*==================*/
+	const trx_t*	trx,	/*!< in: trx of new lock */
+	ulint		type_mode,/*!< in: precise mode of the new lock
+				to set: LOCK_S or LOCK_X, possibly
+				ORed to LOCK_PREDICATE or LOCK_PRDT_PAGE,
+				LOCK_INSERT_INTENTION */
+	lock_prdt_t*	prdt,	/*!< in: lock predicate to check */
+	const lock_t*	lock2)	/*!< in: another record lock; NOTE that
+				it is assumed that this has a lock bit
+				set on the same record as in the new
+				lock we are setting */
+{
+	lock_prdt_t*	cur_prdt = lock_get_prdt_from_lock(lock2);
+
+	ut_ad(trx && lock2);
+	ut_ad((lock2->type_mode & LOCK_PREDICATE && type_mode & LOCK_PREDICATE)
+	      || (lock2->type_mode & LOCK_PRDT_PAGE
+		  && type_mode & LOCK_PRDT_PAGE));
+
+	ut_ad(type_mode & (LOCK_PREDICATE | LOCK_PRDT_PAGE));
+
+	if (trx != lock2->trx
+	    && !lock_mode_compatible(static_cast<lock_mode>(
+			             LOCK_MODE_MASK & type_mode),
+				     lock_get_mode(lock2))) {
+
+		/* If it is a page lock, then return true (conflict) */
+		if (type_mode & LOCK_PRDT_PAGE) {
+			ut_ad(lock2->type_mode & LOCK_PRDT_PAGE);
+
+			return(true);
+		}
+
+		/* Predicate lock does not conflicts with non-predicate lock */
+		if (!(lock2->type_mode & LOCK_PREDICATE)) {
+			return(FALSE);
+		}
+
+		ut_ad(lock2->type_mode & LOCK_PREDICATE);
+
+		if (!(type_mode & LOCK_INSERT_INTENTION)) {
+			/* PREDICATE locks without LOCK_INSERT_INTENTION flag
+			do not need to wait for anything. This is because
+			different users can have conflicting lock types
+			on predicates. */
+
+			return(FALSE);
+		}
+
+		if (lock2->type_mode & LOCK_INSERT_INTENTION) {
+
+			/* No lock request needs to wait for an insert
+			intention lock to be removed. This makes it similar
+			to GAP lock, that allows conflicting insert intention
+			locks */
+			return(FALSE);
+		}
+
+		if (!lock_prdt_consistent(cur_prdt, prdt, 0)) {
+			return(false);
+		}
+
+		return(TRUE);
+	}
+
+	return(FALSE);
+}
+
+/*********************************************************************//**
+Checks if a transaction has a GRANTED stronger or equal predicate lock
+on the page
+@return	lock or NULL */
+UNIV_INLINE
+lock_t*
+lock_prdt_has_lock(
+/*===============*/
+	ulint			precise_mode,	/*!< in: LOCK_S or LOCK_X */
+	ulint			type_mode,	/*!< in: LOCK_PREDICATE etc. */
+	const buf_block_t*	block,		/*!< in: buffer block
+						containing the record */
+	lock_prdt_t*		prdt,		/*!< in: The predicate to be
+						attached to the new lock */
+	const trx_t*		trx)		/*!< in: transaction */
+{
+	lock_t*		lock;
+
+	ut_ad(lock_mutex_own());
+	ut_ad((precise_mode & LOCK_MODE_MASK) == LOCK_S
+	      || (precise_mode & LOCK_MODE_MASK) == LOCK_X);
+	ut_ad(!(precise_mode & LOCK_INSERT_INTENTION));
+
+	for (lock = lock_rec_get_first(
+		lock_hash_get(type_mode), block, PRDT_HEAPNO);
+	     lock != NULL;
+	     lock = lock_rec_get_next(PRDT_HEAPNO, lock)) {
+		ut_ad(lock->type_mode & (LOCK_PREDICATE | LOCK_PRDT_PAGE));
+
+		if (lock->trx == trx
+		    && !(lock->type_mode & LOCK_INSERT_INTENTION)
+		    && !lock_get_wait(lock)
+		    && lock_mode_stronger_or_eq(
+			    lock_get_mode(lock),
+			    static_cast<lock_mode>(
+				    precise_mode & LOCK_MODE_MASK))) {
+			if (lock->type_mode & LOCK_PRDT_PAGE) {
+				return(lock);
+			}
+
+			ut_ad(lock->type_mode & LOCK_PREDICATE);
+			lock_prdt_t*	cur_prdt = lock_get_prdt_from_lock(
+							lock);
+
+			/* if the lock predicate operator is the same
+			as the one to look, and prdicate test is successful,
+			then we find a lock */
+			if (cur_prdt->op == prdt->op
+			    && lock_prdt_consistent(cur_prdt, prdt, 0)) {
+
+				return(lock);
+			}
+		}
+	}
+
+	return(NULL);
+}
+
+/*********************************************************************//**
+Checks if some other transaction has a conflicting predicate
+lock request in the queue, so that we have to wait.
+@return	lock or NULL */
+static
+const lock_t*
+lock_prdt_other_has_conflicting(
+/*============================*/
+	ulint			mode,	/*!< in: LOCK_S or LOCK_X,
+					possibly ORed to LOCK_PREDICATE or
+					LOCK_PRDT_PAGE, LOCK_INSERT_INTENTION */
+	const buf_block_t*	block,	/*!< in: buffer block containing
+					the record */
+	lock_prdt_t*		prdt,    /*!< in: Predicates (currently)
+					the Minimum Bounding Rectangle)
+					the new lock will be on */
+	const trx_t*		trx)	/*!< in: our transaction */
+{
+	ut_ad(lock_mutex_own());
+
+	for (const lock_t* lock = lock_rec_get_first(
+		lock_hash_get(mode), block, PRDT_HEAPNO);
+	     lock != NULL;
+	     lock = lock_rec_get_next_const(PRDT_HEAPNO, lock)) {
+
+		if (lock->trx == trx) {
+			continue;
+		}
+
+		if (lock_prdt_has_to_wait(trx, mode, prdt, lock)) {
+			return(lock);
+		}
+	}
+
+	return(NULL);
+}
+
+/*********************************************************************//**
+Reset the Minimum Bounding Rectangle (to a large area) */
+static
+void
+lock_prdt_enlarge_mbr(
+/*==================*/
+	const lock_t*	lock,	/*!< in/out: lock to modify */
+	rtr_mbr_t*	mbr)    /*!< in: Minimum Bounding Rectangle */
+{
+	rtr_mbr_t*	cur_mbr = lock_prdt_get_mbr_from_lock(lock);
+
+	if (cur_mbr->xmin > mbr->xmin) {
+		cur_mbr->xmin = mbr->xmin;
+	}
+
+	if (cur_mbr->ymin > mbr->ymin) {
+		cur_mbr->ymin = mbr->ymin;
+	}
+
+	if (cur_mbr->xmax < mbr->xmax) {
+		cur_mbr->xmax = mbr->xmax;
+	}
+
+	if (cur_mbr->ymax < mbr->ymax) {
+		cur_mbr->ymax = mbr->ymax;
+	}
+}
+
+/*********************************************************************//**
+Reset the predicates to a "covering" (larger) predicates */
+static
+void
+lock_prdt_enlarge_prdt(
+/*===================*/
+	lock_t*		lock,	/*!< in/out: lock to modify */
+	lock_prdt_t*	prdt)	/*!< in: predicate */
+{
+	rtr_mbr_t*	mbr = prdt_get_mbr_from_prdt(prdt);
+
+	lock_prdt_enlarge_mbr(lock, mbr);
+}
+
+/*********************************************************************//**
+Check two predicates' MBRs are the same
+@return	true if they are the same */
+static
+bool
+lock_prdt_is_same(
+/*==============*/
+	lock_prdt_t*	prdt1,		/*!< in: MBR with the lock */
+	lock_prdt_t*	prdt2)		/*!< in: MBR with the lock */
+{
+	rtr_mbr_t*	mbr1 = prdt_get_mbr_from_prdt(prdt1);
+	rtr_mbr_t*	mbr2 = prdt_get_mbr_from_prdt(prdt2);
+
+	if (prdt1->op == prdt2->op && MBR_EQUAL_CMP(mbr1, mbr2)) {
+		return(true);
+	}
+
+	return(false);
+}
+
+/*********************************************************************//**
+Looks for a similar predicate lock struct by the same trx on the same page.
+This can be used to save space when a new record lock should be set on a page:
+no new struct is needed, if a suitable old one is found.
+@return	lock or NULL */
+static
+lock_t*
+lock_prdt_find_on_page(
+/*===================*/
+	ulint			type_mode,	/*!< in: lock type_mode field */
+	const buf_block_t*	block,		/*!< in: buffer block */
+	lock_prdt_t*		prdt,		/*!< in: MBR with the lock */
+	const trx_t*		trx)		/*!< in: transaction */
+{
+	lock_t*	lock;
+
+	ut_ad(lock_mutex_own());
+
+	for (lock = lock_rec_get_first_on_page(lock_hash_get(type_mode), block);
+	     lock != NULL;
+	     lock = lock_rec_get_next_on_page(lock)) {
+
+		if (lock->trx == trx
+		    && lock->type_mode == type_mode) {
+			if (lock->type_mode & LOCK_PRDT_PAGE) {
+				return(lock);
+			}
+
+			ut_ad(lock->type_mode & LOCK_PREDICATE);
+
+			if (lock_prdt_is_same(lock_get_prdt_from_lock(lock),
+					      prdt)) {
+				return(lock);
+			}
+		}
+	}
+
+	return(NULL);
+}
+
+/*********************************************************************//**
+Adds a predicate lock request in the predicate lock queue.
+@return	lock where the bit was set */
+static
+lock_t*
+lock_prdt_add_to_queue(
+/*===================*/
+	ulint			type_mode,/*!< in: lock mode, wait, predicate
+					etc. flags; type is ignored
+					and replaced by LOCK_REC */
+	const buf_block_t*	block,	/*!< in: buffer block containing
+					the record */
+	dict_index_t*		index,	/*!< in: index of record */
+	trx_t*			trx,	/*!< in/out: transaction */
+	lock_prdt_t*		prdt,	/*!< in: Minimum Bounding Rectangle
+					the new lock will be on */
+	bool			caller_owns_trx_mutex)
+					/*!< in: TRUE if caller owns the
+					transaction mutex */
+{
+	ut_ad(lock_mutex_own());
+	ut_ad(caller_owns_trx_mutex == trx_mutex_own(trx));
+	ut_ad(!dict_index_is_clust(index) && !dict_index_is_online_ddl(index));
+	ut_ad(type_mode & (LOCK_PREDICATE | LOCK_PRDT_PAGE));
+
+#ifdef UNIV_DEBUG
+	switch (type_mode & LOCK_MODE_MASK) {
+	case LOCK_X:
+	case LOCK_S:
+		break;
+	default:
+		ut_error;
+	}
+#endif /* UNIV_DEBUG */
+
+	type_mode |= LOCK_REC;
+
+	/* Look for a waiting lock request on the same record or on a gap */
+
+	lock_t*		lock;
+
+	for (lock = lock_rec_get_first_on_page(lock_hash_get(type_mode), block);
+	     lock != NULL;
+	     lock = lock_rec_get_next_on_page(lock)) {
+
+		if (lock_get_wait(lock)
+		    && lock_rec_get_nth_bit(lock, PRDT_HEAPNO)
+		    && lock->type_mode & (LOCK_PREDICATE | LOCK_PRDT_PAGE)) {
+
+			break;
+		}
+	}
+
+	if (lock == NULL && !(type_mode & LOCK_WAIT)) {
+
+		/* Look for a similar record lock on the same page:
+		if one is found and there are no waiting lock requests,
+		we can just set the bit */
+
+		lock = lock_prdt_find_on_page(type_mode, block, prdt, trx);
+
+		if (lock != NULL) {
+
+			if (lock->type_mode & LOCK_PREDICATE) {
+				lock_prdt_enlarge_prdt(lock, prdt);
+			}
+
+			return(lock);
+		}
+	}
+
+	RecLock	rec_lock(index, block, PRDT_HEAPNO, type_mode);
+
+	return(rec_lock.create(trx, caller_owns_trx_mutex, true, prdt));
+}
+
+/*********************************************************************//**
+Checks if locks of other transactions prevent an immediate insert of
+a predicate record.
+@return	DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+dberr_t
+lock_prdt_insert_check_and_lock(
+/*============================*/
+	ulint		flags,	/*!< in: if BTR_NO_LOCKING_FLAG bit is
+				set, does nothing */
+	const rec_t*	rec,	/*!< in: record after which to insert */
+	buf_block_t*	block,	/*!< in/out: buffer block of rec */
+	dict_index_t*	index,	/*!< in: index */
+	que_thr_t*	thr,	/*!< in: query thread */
+	mtr_t*		mtr,	/*!< in/out: mini-transaction */
+	lock_prdt_t*	prdt)	/*!< in: Predicates with Minimum Bound
+				Rectangle */
+{
+	ut_ad(block->frame == page_align(rec));
+
+	if (flags & BTR_NO_LOCKING_FLAG) {
+
+		return(DB_SUCCESS);
+	}
+
+	ut_ad(!dict_table_is_temporary(index->table));
+	ut_ad(!dict_index_is_clust(index));
+
+	trx_t*	trx = thr_get_trx(thr);
+
+	lock_mutex_enter();
+
+	/* Because this code is invoked for a running transaction by
+	the thread that is serving the transaction, it is not necessary
+	to hold trx->mutex here. */
+
+	ut_ad(lock_table_has(trx, index->table, LOCK_IX));
+
+	lock_t*		lock;
+
+	/* Only need to check locks on prdt_hash */
+	lock = lock_rec_get_first(lock_sys->prdt_hash, block, PRDT_HEAPNO);
+
+	if (lock == NULL) {
+		lock_mutex_exit();
+
+		/* Update the page max trx id field */
+		page_update_max_trx_id(block, buf_block_get_page_zip(block),
+				       trx->id, mtr);
+
+		return(DB_SUCCESS);
+	}
+
+	ut_ad(lock->type_mode & LOCK_PREDICATE);
+
+	dberr_t		err;
+
+	/* If another transaction has an explicit lock request which locks
+	the predicate, waiting or granted, on the successor, the insert
+	has to wait.
+
+	Similar to GAP lock, we do not consider lock from inserts conflicts
+	with each other */
+
+	const ulint	mode = LOCK_X | LOCK_PREDICATE | LOCK_INSERT_INTENTION;
+
+	const lock_t*	wait_for = lock_prdt_other_has_conflicting(
+		mode, block, prdt, trx);
+
+	if (wait_for != NULL) {
+		rtr_mbr_t*	mbr = prdt_get_mbr_from_prdt(prdt);
+
+		/* Allocate MBR on the lock heap */
+		lock_init_prdt_from_mbr(prdt, mbr, 0, trx->lock.lock_heap);
+
+		RecLock	rec_lock(thr, index, block, PRDT_HEAPNO, mode);
+
+		/* Note that we may get DB_SUCCESS also here! */
+
+		trx_mutex_enter(trx);
+
+		err = rec_lock.add_to_waitq(wait_for, prdt);
+
+		trx_mutex_exit(trx);
+
+	} else {
+		err = DB_SUCCESS;
+	}
+
+	lock_mutex_exit();
+
+	switch (err) {
+	case DB_SUCCESS_LOCKED_REC:
+		err = DB_SUCCESS;
+		/* fall through */
+	case DB_SUCCESS:
+		/* Update the page max trx id field */
+		page_update_max_trx_id(block,
+				       buf_block_get_page_zip(block),
+				       trx->id, mtr);
+	default:
+		/* We only care about the two return values. */
+		break;
+	}
+
+	return(err);
+}
+
+/**************************************************************//**
+Check whether any predicate lock in parent needs to propagate to
+child page after split. */
+void
+lock_prdt_update_parent(
+/*====================*/
+        buf_block_t*    left_block,	/*!< in/out: page to be split */
+        buf_block_t*    right_block,	/*!< in/out: the new half page */
+        lock_prdt_t*	left_prdt,	/*!< in: MBR on the old page */
+        lock_prdt_t*	right_prdt,	/*!< in: MBR on the new page */
+	lock_prdt_t*	parent_prdt,	/*!< in: original parent MBR */
+	ulint		space,		/*!< in: parent space id */
+	ulint		page_no)	/*!< in: parent page number */
+{
+	lock_t*		lock;
+
+	lock_mutex_enter();
+
+	/* Get all locks in parent */
+	for (lock = lock_rec_get_first_on_page_addr(
+			lock_sys->prdt_hash, space, page_no);
+	     lock;
+	     lock = lock_rec_get_next_on_page(lock)) {
+		lock_prdt_t*	lock_prdt;
+		ulint		op = PAGE_CUR_DISJOINT;
+
+		ut_ad(lock);
+
+		if (!(lock->type_mode & LOCK_PREDICATE)
+		    || (lock->type_mode & LOCK_MODE_MASK) == LOCK_X) {
+			continue;
+		}
+
+		lock_prdt = lock_get_prdt_from_lock(lock);
+
+		/* Check each lock in parent to see if it intersects with
+		left or right child */
+		if (!lock_prdt_consistent(lock_prdt, left_prdt, op)
+		    && !lock_prdt_find_on_page(lock->type_mode, left_block,
+					       lock_prdt, lock->trx)) {
+			lock_prdt_add_to_queue(lock->type_mode,
+					       left_block, lock->index,
+					       lock->trx, lock_prdt,
+					       FALSE);
+		}
+
+		if (!lock_prdt_consistent(lock_prdt, right_prdt, op)
+		    && !lock_prdt_find_on_page(lock->type_mode, right_block,
+					       lock_prdt, lock->trx)) {
+			lock_prdt_add_to_queue(lock->type_mode, right_block,
+					       lock->index, lock->trx,
+					       lock_prdt, FALSE);
+		}
+	}
+
+	lock_mutex_exit();
+}
+
+/**************************************************************//**
+Update predicate lock when page splits */
+static
+void
+lock_prdt_update_split_low(
+/*=======================*/
+	buf_block_t*	block,		/*!< in/out: page to be split */
+	buf_block_t*	new_block,	/*!< in/out: the new half page */
+	lock_prdt_t*	prdt,		/*!< in: MBR on the old page */
+	lock_prdt_t*	new_prdt,	/*!< in: MBR on the new page */
+	ulint		space,		/*!< in: space id */
+	ulint		page_no,	/*!< in: page number */
+	ulint		type_mode)	/*!< in: LOCK_PREDICATE or
+					LOCK_PRDT_PAGE */
+{
+	lock_t*		lock;
+
+	lock_mutex_enter();
+
+	for (lock = lock_rec_get_first_on_page_addr(
+			lock_hash_get(type_mode), space, page_no);
+	     lock;
+	     lock = lock_rec_get_next_on_page(lock)) {
+		ut_ad(lock);
+
+		/* First dealing with Page Lock */
+		if (lock->type_mode & LOCK_PRDT_PAGE) {
+			/* Duplicate the lock to new page */
+			trx_mutex_enter(lock->trx);
+			lock_prdt_add_to_queue(lock->type_mode,
+					       new_block,
+					       lock->index,
+					       lock->trx, NULL, TRUE);
+
+			trx_mutex_exit(lock->trx);
+			continue;
+		}
+
+		/* Now dealing with Predicate Lock */
+		lock_prdt_t*	lock_prdt;
+		ulint		op = PAGE_CUR_DISJOINT;
+
+		ut_ad(lock->type_mode & LOCK_PREDICATE);
+
+		/* No need to duplicate waiting X locks */
+		if ((lock->type_mode & LOCK_MODE_MASK) == LOCK_X) {
+			continue;
+		}
+
+		lock_prdt = lock_get_prdt_from_lock(lock);
+
+		if (lock_prdt_consistent(lock_prdt, prdt, op)) {
+
+			if (!lock_prdt_consistent(lock_prdt, new_prdt, op)) {
+				/* Move the lock to new page */
+				trx_mutex_enter(lock->trx);
+				lock_prdt_add_to_queue(lock->type_mode,
+						       new_block,
+						       lock->index,
+						       lock->trx, lock_prdt,
+						       TRUE);
+				trx_mutex_exit(lock->trx);
+			}
+		} else if (!lock_prdt_consistent(lock_prdt, new_prdt, op)) {
+			/* Duplicate the lock to new page */
+			trx_mutex_enter(lock->trx);
+			lock_prdt_add_to_queue(lock->type_mode,
+					       new_block,
+					       lock->index,
+					       lock->trx, lock_prdt, TRUE);
+
+			trx_mutex_exit(lock->trx);
+		}
+	}
+
+	lock_mutex_exit();
+}
+
+/**************************************************************//**
+Update predicate lock when page splits */
+void
+lock_prdt_update_split(
+/*===================*/
+	buf_block_t*	block,		/*!< in/out: page to be split */
+	buf_block_t*	new_block,	/*!< in/out: the new half page */
+	lock_prdt_t*	prdt,		/*!< in: MBR on the old page */
+	lock_prdt_t*	new_prdt,	/*!< in: MBR on the new page */
+	ulint		space,		/*!< in: space id */
+	ulint		page_no)	/*!< in: page number */
+{
+	lock_prdt_update_split_low(block, new_block, prdt, new_prdt,
+				   space, page_no, LOCK_PREDICATE);
+
+	lock_prdt_update_split_low(block, new_block, NULL, NULL,
+				   space, page_no, LOCK_PRDT_PAGE);
+}
+
+/*********************************************************************//**
+Initiate a Predicate Lock from a MBR */
+void
+lock_init_prdt_from_mbr(
+/*====================*/
+	lock_prdt_t*	prdt,	/*!< in/out: predicate to initialized */
+	rtr_mbr_t*	mbr,	/*!< in: Minimum Bounding Rectangle */
+	ulint		mode,	/*!< in: Search mode */
+	mem_heap_t*	heap)	/*!< in: heap for allocating memory */
+{
+	memset(prdt, 0, sizeof(*prdt));
+
+	if (heap != NULL) {
+		prdt->data = mem_heap_alloc(heap, sizeof(*mbr));
+		ut_memcpy(prdt->data, mbr, sizeof(*mbr));
+	} else {
+		prdt->data = static_cast<void*>(mbr);
+	}
+
+	prdt->op = static_cast<uint16>(mode);
+}
+
+/*********************************************************************//**
+Checks two predicate locks are compatible with each other
+@return	true if consistent */
+bool
+lock_prdt_consistent(
+/*=================*/
+	lock_prdt_t*	prdt1,	/*!< in: Predicate for the lock */
+	lock_prdt_t*	prdt2,	/*!< in: Predicate for the lock */
+	ulint		op)	/*!< in: Predicate comparison operator */
+{
+	bool		ret = false;
+	rtr_mbr_t*	mbr1 = prdt_get_mbr_from_prdt(prdt1);
+	rtr_mbr_t*	mbr2 = prdt_get_mbr_from_prdt(prdt2);
+	ulint		action;
+
+	if (op) {
+		action = op;
+	} else {
+		if (prdt2->op != 0 && (prdt1->op != prdt2->op)) {
+			return(false);
+		}
+
+		action = prdt1->op;
+	}
+
+	switch (action) {
+	case PAGE_CUR_CONTAIN:
+		ret = MBR_CONTAIN_CMP(mbr1, mbr2);
+		break;
+	case PAGE_CUR_DISJOINT:
+		ret = MBR_DISJOINT_CMP(mbr1, mbr2);
+		break;
+	case PAGE_CUR_MBR_EQUAL:
+		ret = MBR_EQUAL_CMP(mbr1, mbr2);
+		break;
+	case PAGE_CUR_INTERSECT:
+		ret = MBR_INTERSECT_CMP(mbr1, mbr2);
+		break;
+	case PAGE_CUR_WITHIN:
+		ret = MBR_WITHIN_CMP(mbr1, mbr2);
+		break;
+	default:
+		ib::error() << "invalid operator " << action;
+		ut_error;
+	}
+
+	return(ret);
+}
+
+/*********************************************************************//**
+Acquire a predicate lock on a block
+@return	DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+dberr_t
+lock_prdt_lock(
+/*===========*/
+	buf_block_t*	block,	/*!< in/out: buffer block of rec */
+	lock_prdt_t*	prdt,	/*!< in: Predicate for the lock */
+	dict_index_t*	index,	/*!< in: secondary index */
+	lock_mode	mode,	/*!< in: mode of the lock which
+				the read cursor should set on
+				records: LOCK_S or LOCK_X; the
+				latter is possible in
+				SELECT FOR UPDATE */
+	ulint		type_mode,
+				/*!< in: LOCK_PREDICATE or LOCK_PRDT_PAGE */
+	que_thr_t*	thr,	/*!< in: query thread
+				(can be NULL if BTR_NO_LOCKING_FLAG) */
+	mtr_t*		mtr)	/*!< in/out: mini-transaction */
+{
+	trx_t*		trx = thr_get_trx(thr);
+	dberr_t		err = DB_SUCCESS;
+	lock_rec_req_status	status = LOCK_REC_SUCCESS;
+
+	if (trx->read_only || dict_table_is_temporary(index->table)) {
+		return(DB_SUCCESS);
+	}
+
+	ut_ad(!dict_index_is_clust(index));
+	ut_ad(!dict_index_is_online_ddl(index));
+	ut_ad(type_mode & (LOCK_PREDICATE | LOCK_PRDT_PAGE));
+
+	hash_table_t*	hash = type_mode == LOCK_PREDICATE
+		? lock_sys->prdt_hash
+		: lock_sys->prdt_page_hash;
+
+	/* Another transaction cannot have an implicit lock on the record,
+	because when we come here, we already have modified the clustered
+	index record, and this would not have been possible if another active
+	transaction had modified this secondary index record. */
+
+	lock_mutex_enter();
+
+	const ulint	prdt_mode = mode | type_mode;
+	lock_t*		lock = lock_rec_get_first_on_page(hash, block);
+
+	if (lock == NULL) {
+
+		RecLock	rec_lock(index, block, PRDT_HEAPNO, prdt_mode);
+
+		lock = rec_lock.create(trx, false, true);
+
+		status = LOCK_REC_SUCCESS_CREATED;
+
+	} else {
+		trx_mutex_enter(trx);
+
+		if (lock_rec_get_next_on_page(lock)
+		    || lock->trx != trx
+		    || lock->type_mode != (LOCK_REC | prdt_mode)
+		    || lock_rec_get_n_bits(lock) == 0
+		    || ((type_mode & LOCK_PREDICATE)
+		        && (!lock_prdt_consistent(
+				lock_get_prdt_from_lock(lock), prdt, 0)))) {
+
+			lock = lock_prdt_has_lock(
+				mode, type_mode, block, prdt, trx);
+
+			if (lock == NULL) {
+
+				const lock_t*	wait_for;
+
+				wait_for = lock_prdt_other_has_conflicting(
+					prdt_mode, block, prdt, trx);
+
+				if (wait_for != NULL) {
+
+					RecLock	rec_lock(
+						thr, index, block, PRDT_HEAPNO,
+						prdt_mode, prdt);
+
+					err = rec_lock.add_to_waitq(wait_for);
+
+				} else {
+
+					lock_prdt_add_to_queue(
+						prdt_mode, block, index, trx,
+						prdt, true);
+
+					status = LOCK_REC_SUCCESS;
+				}
+			}
+
+			trx_mutex_exit(trx);
+
+		} else {
+			trx_mutex_exit(trx);
+
+			if (!lock_rec_get_nth_bit(lock, PRDT_HEAPNO)) {
+				lock_rec_set_nth_bit(lock, PRDT_HEAPNO);
+				status = LOCK_REC_SUCCESS_CREATED;
+			}
+		}
+	}
+
+	lock_mutex_exit();
+
+	if (status == LOCK_REC_SUCCESS_CREATED && type_mode == LOCK_PREDICATE) {
+		/* Append the predicate in the lock record */
+		lock_prdt_set_prdt(lock, prdt);
+	}
+
+	return(err);
+}
+
+/*********************************************************************//**
+Acquire a "Page" lock on a block
+@return	DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+dberr_t
+lock_place_prdt_page_lock(
+/*======================*/
+	ulint		space,		/*!< in: space for the page to lock */
+	ulint		page_no,	/*!< in: page number */
+	dict_index_t*	index,		/*!< in: secondary index */
+	que_thr_t*	thr)		/*!< in: query thread */
+{
+	ut_ad(thr != NULL);
+	ut_ad(!srv_read_only_mode);
+
+	ut_ad(!dict_index_is_clust(index));
+	ut_ad(!dict_index_is_online_ddl(index));
+
+	/* Another transaction cannot have an implicit lock on the record,
+	because when we come here, we already have modified the clustered
+	index record, and this would not have been possible if another active
+	transaction had modified this secondary index record. */
+
+	lock_mutex_enter();
+
+	const lock_t*	lock = lock_rec_get_first_on_page_addr(
+		lock_sys->prdt_page_hash, space, page_no);
+
+	const ulint	mode = LOCK_S | LOCK_PRDT_PAGE;
+	trx_t*		trx = thr_get_trx(thr);
+
+	if (lock != NULL) {
+
+		trx_mutex_enter(trx);
+
+		/* Find a matching record lock owned by this transaction. */
+
+		while (lock != NULL && lock->trx != trx) {
+
+			lock = lock_rec_get_next_on_page_const(lock);
+		}
+
+		ut_ad(lock == NULL || lock->type_mode == (mode | LOCK_REC));
+		ut_ad(lock == NULL || lock_rec_get_n_bits(lock) != 0);
+
+		trx_mutex_exit(trx);
+	}
+
+	if (lock == NULL) {
+		RecID	rec_id(space, page_no, PRDT_HEAPNO);
+		RecLock	rec_lock(index, rec_id, mode);
+
+		rec_lock.create(trx, false, true);
+
+#ifdef PRDT_DIAG
+		printf("GIS_DIAGNOSTIC: page lock %d\n", (int) page_no);
+#endif /* PRDT_DIAG */
+	}
+
+	lock_mutex_exit();
+
+	return(DB_SUCCESS);
+}
+
+/** Check whether there are R-tree Page lock on a page
+@param[in]	trx	trx to test the lock
+@param[in]	space	space id for the page
+@param[in]	page_no	page number
+@return	true if there is none */
+bool
+lock_test_prdt_page_lock(
+	const trx_t*    trx,
+	ulint           space,
+	ulint           page_no)
+{
+	lock_t*		lock;
+
+	lock_mutex_enter();
+
+	lock = lock_rec_get_first_on_page_addr(
+		lock_sys->prdt_page_hash, space, page_no);
+
+	lock_mutex_exit();
+
+	return(lock == NULL || trx == lock->trx);
+}
+
+/*************************************************************//**
+Moves the locks of a page to another page and resets the lock bits of
+the donating records. */
+void
+lock_prdt_rec_move(
+/*===============*/
+	const buf_block_t*	receiver,	/*!< in: buffer block containing
+						the receiving record */
+	const buf_block_t*	donator)	/*!< in: buffer block containing
+						the donating record */
+{
+	lock_t* lock;
+
+	if (!lock_sys->prdt_hash) {
+		return;
+	}
+
+	lock_mutex_enter();
+
+	for (lock = lock_rec_get_first(lock_sys->prdt_hash,
+				       donator, PRDT_HEAPNO);
+	     lock != NULL;
+	     lock = lock_rec_get_next(PRDT_HEAPNO, lock)) {
+
+		const ulint     type_mode = lock->type_mode;
+		lock_prdt_t*	lock_prdt = lock_get_prdt_from_lock(lock);
+
+		lock_rec_trx_wait(lock, PRDT_HEAPNO, type_mode);
+
+		lock_prdt_add_to_queue(
+			type_mode, receiver, lock->index, lock->trx,
+			lock_prdt, FALSE);
+	}
+
+	lock_mutex_exit();
+}
+
+/** Removes predicate lock objects set on an index page which is discarded.
+@param[in]	block		page to be discarded
+@param[in]	lock_hash	lock hash */
+void
+lock_prdt_page_free_from_discard(
+	const buf_block_t*      block,
+	hash_table_t*		lock_hash)
+{
+	lock_t*	lock;
+	lock_t*	next_lock;
+	ulint	space;
+	ulint	page_no;
+
+	ut_ad(lock_mutex_own());
+
+	space = block->page.id.space();
+	page_no = block->page.id.page_no();
+
+	lock = lock_rec_get_first_on_page_addr(lock_hash, space, page_no);
+
+	while (lock != NULL) {
+		next_lock = lock_rec_get_next_on_page(lock);
+
+		lock_rec_discard(lock);
+
+		lock = next_lock;
+	}
+}
+
diff --git a/storage/innobase/lock/lock0wait.cc b/storage/innobase/lock/lock0wait.cc
index c7bd223c491..1330658626e 100644
--- a/storage/innobase/lock/lock0wait.cc
+++ b/storage/innobase/lock/lock0wait.cc
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2014, 2016, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -25,16 +26,17 @@ Created 25/5/2010 Sunny Bains
 
 #define LOCK_MODULE_IMPLEMENTATION
 
+#include "ha_prototypes.h"
+#include <mysql/service_thd_wait.h>
+#include <mysql/service_wsrep.h>
+
 #include "srv0mon.h"
 #include "que0que.h"
 #include "lock0lock.h"
 #include "row0mysql.h"
 #include "srv0start.h"
-#include "ha_prototypes.h"
 #include "lock0priv.h"
 
-#include <mysql/service_wsrep.h>
-
 /*********************************************************************//**
 Print the contents of the lock_sys_t::waiting_threads array. */
 static
@@ -42,14 +44,11 @@ void
 lock_wait_table_print(void)
 /*=======================*/
 {
-	ulint			i;
-	const srv_slot_t*	slot;
-
 	ut_ad(lock_wait_mutex_own());
 
-	slot = lock_sys->waiting_threads;
+	const srv_slot_t*	slot = lock_sys->waiting_threads;
 
-	for (i = 0; i < OS_THREAD_MAX_N; i++, ++slot) {
+	for (ulint i = 0; i < OS_THREAD_MAX_N; i++, ++slot) {
 
 		fprintf(stderr,
 			"Slot %lu: thread type %lu,"
@@ -126,7 +125,7 @@ lock_wait_table_release_slot(
 
 /*********************************************************************//**
 Reserves a slot in the thread table for the current user OS thread.
-@return	reserved slot */
+@return reserved slot */
 static
 srv_slot_t*
 lock_wait_table_reserve_slot(
@@ -150,7 +149,7 @@ lock_wait_table_reserve_slot(
 			slot->thr->slot = slot;
 
 			if (slot->event == NULL) {
-				slot->event = os_event_create();
+				slot->event = os_event_create(0);
 				ut_a(slot->event);
 			}
 
@@ -170,16 +169,10 @@ lock_wait_table_reserve_slot(
 		}
 	}
 
-	ut_print_timestamp(stderr);
-
-	fprintf(stderr,
-		"  InnoDB: There appear to be %lu user"
-		" threads currently waiting\n"
-		"InnoDB: inside InnoDB, which is the"
-		" upper limit. Cannot continue operation.\n"
-		"InnoDB: As a last thing, we print"
-		" a list of waiting threads.\n", (ulong) OS_THREAD_MAX_N);
-
+	ib::error() << "There appear to be " << OS_THREAD_MAX_N << " user"
+		" threads currently waiting inside InnoDB, which is the upper"
+		" limit. Cannot continue operation. Before aborting, we print"
+		" a list of waiting threads.";
 	lock_wait_table_print();
 
 	ut_error;
@@ -188,23 +181,23 @@ lock_wait_table_reserve_slot(
 
 #ifdef WITH_WSREP
 /*********************************************************************//**
-check if lock timeout was for priority thread, 
+check if lock timeout was for priority thread,
 as a side effect trigger lock monitor
-@return        false for regular lock timeout */
+@return	false for regular lock timeout */
 static ibool
 wsrep_is_BF_lock_timeout(
 /*====================*/
     trx_t* trx) /* in: trx to check for lock priority */
 {
-       if (wsrep_on(trx->mysql_thd) &&
-           wsrep_thd_is_BF(trx->mysql_thd, FALSE)) {
-               fprintf(stderr, "WSREP: BF lock wait long\n");
-                srv_print_innodb_monitor       = TRUE;
-                srv_print_innodb_lock_monitor  = TRUE;
-                os_event_set(srv_monitor_event);
-                return TRUE;
-       }
-       return FALSE;
+	if (wsrep_on(trx->mysql_thd) &&
+	    wsrep_thd_is_BF(trx->mysql_thd, FALSE)) {
+		fprintf(stderr, "WSREP: BF lock wait long\n");
+		srv_print_innodb_monitor 	= TRUE;
+		srv_print_innodb_lock_monitor 	= TRUE;
+		os_event_set(srv_monitor_event);
+		return TRUE;
+	}
+	return FALSE;
  }
 #endif /* WITH_WSREP */
 
@@ -214,7 +207,6 @@ occurs during the wait trx->error_state associated with thr is
 != DB_SUCCESS when we return. DB_LOCK_WAIT_TIMEOUT and DB_DEADLOCK
 are possible errors. DB_DEADLOCK is returned if selective deadlock
 resolution chose this transaction as a victim. */
-UNIV_INTERN
 void
 lock_wait_suspend_thread(
 /*=====================*/
@@ -224,10 +216,9 @@ lock_wait_suspend_thread(
 	srv_slot_t*	slot;
 	double		wait_time;
 	trx_t*		trx;
-	ulint		had_dict_lock;
 	ibool		was_declared_inside_innodb;
-	ib_int64_t	start_time			= 0;
-	ib_int64_t	finish_time;
+	int64_t		start_time = 0;
+	int64_t		finish_time;
 	ulint		sec;
 	ulint		ms;
 	ulong		lock_wait_timeout;
@@ -260,7 +251,7 @@ lock_wait_suspend_thread(
 		if (trx->lock.was_chosen_as_deadlock_victim) {
 
 			trx->error_state = DB_DEADLOCK;
-			trx->lock.was_chosen_as_deadlock_victim = FALSE;
+			trx->lock.was_chosen_as_deadlock_victim = false;
 		}
 
 		lock_wait_mutex_exit();
@@ -279,10 +270,14 @@ lock_wait_suspend_thread(
 		if (ut_usectime(&sec, &ms) == -1) {
 			start_time = -1;
 		} else {
-			start_time = (ib_int64_t) sec * 1000000 + ms;
+			start_time = static_cast<int64_t>(sec) * 1000000 + ms;
 		}
 	}
 
+	/* Wake the lock timeout monitor thread, if it is suspended */
+
+	os_event_set(lock_sys->timeout_event);
+
 	lock_wait_mutex_exit();
 	trx_mutex_exit(trx);
 
@@ -296,7 +291,7 @@ lock_wait_suspend_thread(
 
 	lock_mutex_exit();
 
-	had_dict_lock = trx->dict_operation_lock_mode;
+	ulint	had_dict_lock = trx->dict_operation_lock_mode;
 
 	switch (had_dict_lock) {
 	case 0:
@@ -370,7 +365,7 @@ lock_wait_suspend_thread(
 		if (ut_usectime(&sec, &ms) == -1) {
 			finish_time = -1;
 		} else {
-			finish_time = (ib_int64_t) sec * 1000000 + ms;
+			finish_time = static_cast<int64_t>(sec) * 1000000 + ms;
 		}
 
 		diff_time = (finish_time > start_time) ?
@@ -391,21 +386,25 @@ lock_wait_suspend_thread(
 		/* Record the lock wait time for this thread */
 		thd_set_lock_wait_time(trx->mysql_thd, diff_time);
 
+		DBUG_EXECUTE_IF("lock_instrument_slow_query_log",
+			os_thread_sleep(1000););
+	}
+
+	/* The transaction is chosen as deadlock victim during sleep. */
+	if (trx->error_state == DB_DEADLOCK) {
+		return;
 	}
 
 	if (lock_wait_timeout < 100000000
-	    && wait_time > (double) lock_wait_timeout) {
+	    && wait_time > (double) lock_wait_timeout
 #ifdef WITH_WSREP
-                if (!wsrep_on(trx->mysql_thd) ||
-                    (!wsrep_is_BF_lock_timeout(trx) &&
-                     trx->error_state != DB_DEADLOCK)) {
+	    && (!wsrep_on(trx->mysql_thd) ||
+	       (!wsrep_is_BF_lock_timeout(trx) && trx->error_state != DB_DEADLOCK))
 #endif /* WITH_WSREP */
+	    && !trx_is_high_priority(trx)) {
 
 		trx->error_state = DB_LOCK_WAIT_TIMEOUT;
 
-#ifdef WITH_WSREP
-                }
-#endif /* WITH_WSREP */
 		MONITOR_INC(MONITOR_TIMEOUT);
 	}
 
@@ -418,7 +417,6 @@ lock_wait_suspend_thread(
 /********************************************************************//**
 Releases a user OS thread waiting for a lock to be released, if the
 thread is already suspended. */
-UNIV_INTERN
 void
 lock_wait_release_thread_if_suspended(
 /*==================================*/
@@ -439,7 +437,7 @@ lock_wait_release_thread_if_suspended(
 		if (trx->lock.was_chosen_as_deadlock_victim) {
 
 			trx->error_state = DB_DEADLOCK;
-			trx->lock.was_chosen_as_deadlock_victim = FALSE;
+			trx->lock.was_chosen_as_deadlock_victim = false;
 		}
 
 		os_event_set(thr->slot->event);
@@ -486,13 +484,14 @@ lock_wait_check_and_cancel(
 
 		trx_mutex_enter(trx);
 
-		if (trx->lock.wait_lock) {
+		if (trx->lock.wait_lock != NULL && !trx_is_high_priority(trx)) {
 
 			ut_a(trx->lock.que_state == TRX_QUE_LOCK_WAIT);
+
 #ifdef WITH_WSREP
                         if (!wsrep_is_BF_lock_timeout(trx)) {
 #endif /* WITH_WSREP */
-			lock_cancel_waiting_and_release(trx->lock.wait_lock);
+				lock_cancel_waiting_and_release(trx->lock.wait_lock);
 #ifdef WITH_WSREP
                         }
 #endif /* WITH_WSREP */
@@ -507,8 +506,8 @@ lock_wait_check_and_cancel(
 
 /*********************************************************************//**
 A thread which wakes up threads whose lock wait may have lasted too long.
-@return	a dummy parameter */
-extern "C" UNIV_INTERN
+@return a dummy parameter */
+extern "C"
 os_thread_ret_t
 DECLARE_THREAD(lock_wait_timeout_thread)(
 /*=====================================*/
@@ -516,7 +515,7 @@ DECLARE_THREAD(lock_wait_timeout_thread)(
 			/* in: a dummy parameter required by
 			os_thread_create */
 {
-	ib_int64_t	sig_count = 0;
+	int64_t		sig_count = 0;
 	os_event_t	event = lock_sys->timeout_event;
 
 	ut_ad(!srv_read_only_mode);
@@ -570,7 +569,8 @@ DECLARE_THREAD(lock_wait_timeout_thread)(
 	/* We count the number of threads in os_thread_exit(). A created
 	thread should always use that to exit and not use return() to exit. */
 
-	os_thread_exit(NULL);
+	os_thread_exit();
 
 	OS_THREAD_DUMMY_RETURN;
 }
+
diff --git a/storage/innobase/log/log0crypt.cc b/storage/innobase/log/log0crypt.cc
index f518845b1a8..3a4447d2b85 100644
--- a/storage/innobase/log/log0crypt.cc
+++ b/storage/innobase/log/log0crypt.cc
@@ -97,8 +97,6 @@ get_crypt_info(
 /*===========*/
 	ib_uint64_t checkpoint_no)
 {
-	/* so that no one is modifying array while we search */
-	ut_ad(mutex_own(&(log_sys->mutex)));
 	size_t items = crypt_info.size();
 
 	/* a log block only stores 4-bytes of checkpoint no */
@@ -197,14 +195,14 @@ log_blocks_crypt(
 		block can't be encrypted. */
 		if (info == NULL ||
 		    info->key_version == UNENCRYPTED_KEY_VER ||
-			(log_block_checksum_is_ok_or_old_format(log_block, false) &&
+			(log_block_checksum_is_ok(log_block, false) &&
 			 what == ENCRYPTION_FLAG_DECRYPT)) {
 			memcpy(dst_block, log_block, OS_FILE_LOG_BLOCK_SIZE);
 			goto next;
 		}
 
-		ut_ad(what == ENCRYPTION_FLAG_DECRYPT ? !log_block_checksum_is_ok_or_old_format(log_block, false) :
-			log_block_checksum_is_ok_or_old_format(log_block, false));
+		ut_ad(what == ENCRYPTION_FLAG_DECRYPT ? !log_block_checksum_is_ok(log_block, false) :
+			log_block_checksum_is_ok(log_block, false));
 
 		// Assume log block header is not encrypted
 		memcpy(dst_block, log_block, LOG_BLOCK_HDR_SIZE);
@@ -260,11 +258,13 @@ init_crypt_key(
 	rc = encryption_key_get(LOG_DEFAULT_ENCRYPTION_KEY, info->key_version, mysqld_key, &keylen);
 
 	if (rc) {
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Redo log crypto: getting mysqld crypto key "
-			"from key version failed err = %u. Reason could be that requested"
-			" key_version %u is not found or required encryption "
-			" key management is not found.", rc, info->key_version);
+		ib::error()
+			<< "Redo log crypto: getting mysqld crypto key "
+			<< "from key version failed err = " << rc
+			<< " Reason could be that requested key_version "
+			<< info->key_version
+			<< "is not found or required encryption "
+			<< " key management is not found.";
 		return false;
 	}
 
@@ -372,18 +372,18 @@ log_crypt_set_ver_and_key(
 		memset(info.crypt_nonce, 0, sizeof(info.crypt_nonce));
 	} else {
 		if (my_random_bytes(info.crypt_msg, MY_AES_BLOCK_SIZE) != MY_AES_OK) {
-			ib_logf(IB_LOG_LEVEL_ERROR,
-				"Redo log crypto: generate "
-				"%u-byte random number as crypto msg failed.",
-				MY_AES_BLOCK_SIZE);
+			ib::error()
+				<< "Redo log crypto: generate "
+				<< MY_AES_BLOCK_SIZE
+				<< "-byte random number as crypto msg failed.";
 			ut_error;
 		}
 
 		if (my_random_bytes(info.crypt_nonce, MY_AES_BLOCK_SIZE) != MY_AES_OK) {
-			ib_logf(IB_LOG_LEVEL_ERROR,
-				"Redo log crypto: generate "
-				"%u-byte random number as AES_CTR nonce failed.",
-				MY_AES_BLOCK_SIZE);
+			ib::error()
+				<< "Redo log crypto: generate "
+				<< MY_AES_BLOCK_SIZE
+				<< "-byte random number as AES_CTR nonce failed.";
 			ut_error;
 		}
 
@@ -619,19 +619,19 @@ log_crypt_print_error(
 {
 	switch(err_info) {
 	case LOG_CRYPT_KEY_NOT_FOUND:
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Redo log crypto: getting mysqld crypto key "
-			"from key version failed. Reason could be that "
-			"requested key version is not found or required "
-			"encryption key management plugin is not found.");
+		ib::error()
+			<< "Redo log crypto: getting mysqld crypto key "
+			<< "from key version failed. Reason could be that "
+			<< "requested key version is not found or required "
+			<< "encryption key management plugin is not found.";
 		break;
 	case LOG_DECRYPT_MAYBE_FAILED:
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Redo log crypto: failed to decrypt log block. "
-			"Reason could be that requested key version is "
-			"not found, required encryption key management "
-			"plugin is not found or configured encryption "
-			"algorithm and/or method does not match.");
+		ib::error()
+			<< "Redo log crypto: failed to decrypt log block. "
+			<< "Reason could be that requested key version is "
+			<< "not found, required encryption key management "
+			<< "plugin is not found or configured encryption "
+			<< "algorithm and/or method does not match.";
 		break;
 	default:
 		ut_error; /* Real bug */
diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc
index ce5f75b4c61..95090c0890a 100644
--- a/storage/innobase/log/log0log.cc
+++ b/storage/innobase/log/log0log.cc
@@ -31,26 +31,32 @@ Database log
 Created 12/9/1995 Heikki Tuuri
 *******************************************************/
 
+#include "ha_prototypes.h"
+#include <debug_sync.h>
+
 #include "log0log.h"
 
 #ifdef UNIV_NONINL
 #include "log0log.ic"
 #endif
 
-#ifndef UNIV_HOTBACKUP
 #include "mem0mem.h"
 #include "buf0buf.h"
+#ifndef UNIV_HOTBACKUP
 #include "buf0flu.h"
 #include "srv0srv.h"
 #include "log0recv.h"
 #include "fil0fil.h"
 #include "dict0boot.h"
+#include "dict0stats_bg.h"
 #include "srv0srv.h"
 #include "srv0start.h"
 #include "trx0sys.h"
 #include "trx0trx.h"
 #include "trx0roll.h"
 #include "srv0mon.h"
+#include "sync0sync.h"
+#endif /* !UNIV_HOTBACKUP */
 
 /* Used for debugging */
 // #define DEBUG_CRYPT 1
@@ -81,39 +87,26 @@ reduce the size of the log.
 
 */
 
-/* Global log system variable */
-UNIV_INTERN log_t*	log_sys	= NULL;
+/** Redo log system */
+log_t*	log_sys	= NULL;
+
+/** Whether to generate and require checksums on the redo log pages */
+my_bool	innodb_log_checksums;
+
+/** Pointer to the log checksum calculation function */
+log_checksum_func_t log_checksum_algorithm_ptr;
 
 /* Next log block number to do dummy record filling if no log records written
 for a while */
 static ulint		next_lbn_to_pad = 0;
 
-#ifdef UNIV_PFS_RWLOCK
-UNIV_INTERN mysql_pfs_key_t	checkpoint_lock_key;
-# ifdef UNIV_LOG_ARCHIVE
-UNIV_INTERN mysql_pfs_key_t	archive_lock_key;
-# endif
-#endif /* UNIV_PFS_RWLOCK */
-
-#ifdef UNIV_PFS_MUTEX
-UNIV_INTERN mysql_pfs_key_t	log_sys_mutex_key;
-UNIV_INTERN mysql_pfs_key_t	log_flush_order_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
-
-#ifdef UNIV_DEBUG
-UNIV_INTERN ibool	log_do_write = TRUE;
-#endif /* UNIV_DEBUG */
-
 /* These control how often we print warnings if the last checkpoint is too
 old */
-UNIV_INTERN ibool	log_has_printed_chkp_warning = FALSE;
-UNIV_INTERN time_t	log_last_warning_time;
+bool	log_has_printed_chkp_warning = false;
+time_t	log_last_warning_time;
 
-#ifdef UNIV_LOG_ARCHIVE
-/* Pointer to this variable is used as the i/o-message when we do i/o to an
-archive */
-UNIV_INTERN byte	log_archive_io;
-#endif /* UNIV_LOG_ARCHIVE */
+bool	log_has_printed_chkp_margine_warning = false;
+time_t	log_last_margine_warning_time;
 
 /* A margin for free space in the log buffer before a log entry is catenated */
 #define LOG_BUF_WRITE_MARGIN	(4 * OS_FILE_LOG_BLOCK_SIZE)
@@ -140,39 +133,22 @@ should be bigger than LOG_POOL_PREFLUSH_RATIO_SYNC */
 the previous */
 #define LOG_POOL_PREFLUSH_RATIO_ASYNC	8
 
-/* Extra margin, in addition to one log file, used in archiving */
-#define LOG_ARCHIVE_EXTRA_MARGIN	(4 * UNIV_PAGE_SIZE)
-
-/* This parameter controls asynchronous writing to the archive */
-#define LOG_ARCHIVE_RATIO_ASYNC		16
-
 /* Codes used in unlocking flush latches */
 #define LOG_UNLOCK_NONE_FLUSHED_LOCK	1
 #define LOG_UNLOCK_FLUSH_LOCK		2
 
-/* States of an archiving operation */
-#define	LOG_ARCHIVE_READ	1
-#define	LOG_ARCHIVE_WRITE	2
-
 /******************************************************//**
 Completes a checkpoint write i/o to a log file. */
 static
 void
 log_io_complete_checkpoint(void);
 /*============================*/
-#ifdef UNIV_LOG_ARCHIVE
-/******************************************************//**
-Completes an archiving i/o. */
-static
-void
-log_io_complete_archive(void);
-/*=========================*/
-#endif /* UNIV_LOG_ARCHIVE */
 
+#ifndef UNIV_HOTBACKUP
 /****************************************************************//**
 Returns the oldest modified block lsn in the pool, or log_sys->lsn if none
 exists.
-@return	LSN of oldest modification */
+@return LSN of oldest modification */
 static
 lsn_t
 log_buf_pool_get_oldest_modification(void)
@@ -180,7 +156,7 @@ log_buf_pool_get_oldest_modification(void)
 {
 	lsn_t	lsn;
 
-	ut_ad(mutex_own(&(log_sys->mutex)));
+	ut_ad(log_mutex_own());
 
 	lsn = buf_pool_get_oldest_modification();
 
@@ -191,10 +167,10 @@ log_buf_pool_get_oldest_modification(void)
 
 	return(lsn);
 }
+#endif  /* !UNIV_HOTBACKUP */
 
 /** Extends the log buffer.
-@param[in] len	requested minimum size in bytes */
-static
+@param[in]	len	requested minimum size in bytes */
 void
 log_buffer_extend(
 	ulint	len)
@@ -203,37 +179,46 @@ log_buffer_extend(
 	ulint	move_end;
 	byte	tmp_buf[OS_FILE_LOG_BLOCK_SIZE];
 
-	mutex_enter(&(log_sys->mutex));
+	log_mutex_enter_all();
 
 	while (log_sys->is_extending) {
 		/* Another thread is trying to extend already.
 		Needs to wait for. */
-		mutex_exit(&(log_sys->mutex));
+		log_mutex_exit_all();
 
 		log_buffer_flush_to_disk();
 
-		mutex_enter(&(log_sys->mutex));
+		log_mutex_enter_all();
 
 		if (srv_log_buffer_size > len / UNIV_PAGE_SIZE) {
 			/* Already extended enough by the others */
-			mutex_exit(&(log_sys->mutex));
+			log_mutex_exit_all();
 			return;
 		}
 	}
 
+	if (len >= log_sys->buf_size / 2) {
+		DBUG_EXECUTE_IF("ib_log_buffer_is_short_crash",
+				DBUG_SUICIDE(););
+
+		/* log_buffer is too small. try to extend instead of crash. */
+		ib::warn() << "The transaction log size is too large"
+			" for innodb_log_buffer_size (" << len << " >= "
+			<< LOG_BUFFER_SIZE << " / 2). Trying to extend it.";
+	}
+
 	log_sys->is_extending = true;
 
-	while (log_sys->n_pending_writes != 0
-	       || ut_calc_align_down(log_sys->buf_free,
-				     OS_FILE_LOG_BLOCK_SIZE)
-		  != ut_calc_align_down(log_sys->buf_next_to_write,
-					OS_FILE_LOG_BLOCK_SIZE)) {
+	while (ut_calc_align_down(log_sys->buf_free,
+				  OS_FILE_LOG_BLOCK_SIZE)
+	       != ut_calc_align_down(log_sys->buf_next_to_write,
+				     OS_FILE_LOG_BLOCK_SIZE)) {
 		/* Buffer might have >1 blocks to write still. */
-		mutex_exit(&(log_sys->mutex));
+		log_mutex_exit_all();
 
 		log_buffer_flush_to_disk();
 
-		mutex_enter(&(log_sys->mutex));
+		log_mutex_enter_all();
 	}
 
 	move_start = ut_calc_align_down(
@@ -250,12 +235,17 @@ log_buffer_extend(
 
 	/* reallocate log buffer */
 	srv_log_buffer_size = len / UNIV_PAGE_SIZE + 1;
-	mem_free(log_sys->buf_ptr);
+	ut_free(log_sys->buf_ptr);
+
+	log_sys->buf_size = LOG_BUFFER_SIZE;
+
 	log_sys->buf_ptr = static_cast<byte*>(
-		mem_zalloc(LOG_BUFFER_SIZE + OS_FILE_LOG_BLOCK_SIZE));
+		ut_zalloc_nokey(log_sys->buf_size * 2 + OS_FILE_LOG_BLOCK_SIZE));
 	log_sys->buf = static_cast<byte*>(
 		ut_align(log_sys->buf_ptr, OS_FILE_LOG_BLOCK_SIZE));
-	log_sys->buf_size = LOG_BUFFER_SIZE;
+
+	log_sys->first_in_use = true;
+
 	log_sys->max_buf_free = log_sys->buf_size / LOG_BUF_FLUSH_RATIO
 		- LOG_BUF_FLUSH_MARGIN;
 
@@ -265,53 +255,122 @@ log_buffer_extend(
 	ut_ad(log_sys->is_extending);
 	log_sys->is_extending = false;
 
-	mutex_exit(&(log_sys->mutex));
+	log_mutex_exit_all();
 
-	ib_logf(IB_LOG_LEVEL_INFO,
-		"innodb_log_buffer_size was extended to %lu.",
-		LOG_BUFFER_SIZE);
+	ib::info() << "innodb_log_buffer_size was extended to "
+		<< LOG_BUFFER_SIZE << ".";
 }
 
-/************************************************************//**
-Opens the log for log_write_low. The log must be closed with log_close and
-released with log_release.
-@return	start lsn of the log record */
-UNIV_INTERN
+#ifndef UNIV_HOTBACKUP
+/** Calculate actual length in redo buffer and file including
+block header and trailer.
+@param[in]	len	length to write
+@return actual length to write including header and trailer. */
+static inline
+ulint
+log_calculate_actual_len(
+	ulint len)
+{
+	ut_ad(log_mutex_own());
+
+	/* actual length stored per block */
+	const ulint	len_per_blk = OS_FILE_LOG_BLOCK_SIZE
+		- (LOG_BLOCK_HDR_SIZE + LOG_BLOCK_TRL_SIZE);
+
+	/* actual data length in last block already written */
+	ulint	extra_len = (log_sys->buf_free % OS_FILE_LOG_BLOCK_SIZE);
+
+	ut_ad(extra_len >= LOG_BLOCK_HDR_SIZE);
+	extra_len -= LOG_BLOCK_HDR_SIZE;
+
+	/* total extra length for block header and trailer */
+	extra_len = ((len + extra_len) / len_per_blk)
+		* (LOG_BLOCK_HDR_SIZE + LOG_BLOCK_TRL_SIZE);
+
+	return(len + extra_len);
+}
+
+/** Check margin not to overwrite transaction log from the last checkpoint.
+If would estimate the log write to exceed the log_group_capacity,
+waits for the checkpoint is done enough.
+@param[in]	len	length of the data to be written */
+
+void
+log_margin_checkpoint_age(
+	ulint	len)
+{
+	ulint	margin = log_calculate_actual_len(len);
+
+	ut_ad(log_mutex_own());
+
+	if (margin > log_sys->log_group_capacity) {
+		/* return with warning output to avoid deadlock */
+		if (!log_has_printed_chkp_margine_warning
+		    || difftime(time(NULL),
+				log_last_margine_warning_time) > 15) {
+			log_has_printed_chkp_margine_warning = true;
+			log_last_margine_warning_time = time(NULL);
+
+			ib::error() << "The transaction log files are too"
+				" small for the single transaction log (size="
+				<< len << "). So, the last checkpoint age"
+				" might exceed the log group capacity "
+				<< log_sys->log_group_capacity << ".";
+		}
+
+		return;
+	}
+
+	/* Our margin check should ensure that we never reach this condition.
+	Try to do checkpoint once. We cannot keep waiting here as it might
+	result in hang in case the current mtr has latch on oldest lsn */
+	if (log_sys->lsn - log_sys->last_checkpoint_lsn + margin
+	    > log_sys->log_group_capacity) {
+		/* The log write of 'len' might overwrite the transaction log
+		after the last checkpoint. Makes checkpoint. */
+
+		bool	flushed_enough = false;
+
+		if (log_sys->lsn - log_buf_pool_get_oldest_modification()
+		    + margin
+		    <= log_sys->log_group_capacity) {
+			flushed_enough = true;
+		}
+
+		log_sys->check_flush_or_checkpoint = true;
+		log_mutex_exit();
+
+		DEBUG_SYNC_C("margin_checkpoint_age_rescue");
+
+		if (!flushed_enough) {
+			os_thread_sleep(100000);
+		}
+		log_checkpoint(true, false);
+
+		log_mutex_enter();
+	}
+
+	return;
+}
+#endif /* !UNIV_HOTBACKUP */
+/** Open the log for log_write_low. The log must be closed with log_close.
+@param[in]	len	length of the data to be written
+@return start lsn of the log record */
 lsn_t
 log_reserve_and_open(
-/*=================*/
-	ulint	len)	/*!< in: length of data to be catenated */
+	ulint	len)
 {
-	log_t*	log			= log_sys;
 	ulint	len_upper_limit;
-#ifdef UNIV_LOG_ARCHIVE
-	lsn_t	archived_lsn_age;
-	ulint	dummy;
-#endif /* UNIV_LOG_ARCHIVE */
 #ifdef UNIV_DEBUG
 	ulint	count			= 0;
 #endif /* UNIV_DEBUG */
 
-	if (len >= log->buf_size / 2) {
-		DBUG_EXECUTE_IF("ib_log_buffer_is_short_crash",
-				DBUG_SUICIDE(););
-
-		/* log_buffer is too small. try to extend instead of crash. */
-		ib_logf(IB_LOG_LEVEL_WARN,
-			"The transaction log size is too large"
-			" for innodb_log_buffer_size (%lu >= %lu / 2). "
-			"Trying to extend it.",
-			len, LOG_BUFFER_SIZE);
-
-		log_buffer_extend((len + 1) * 2);
-	}
 loop:
-	mutex_enter(&(log->mutex));
+	ut_ad(log_mutex_own());
 	ut_ad(!recv_no_log_write);
 
-	if (log->is_extending) {
-
-		mutex_exit(&(log->mutex));
+	if (log_sys->is_extending) {
+		log_mutex_exit();
 
 		/* Log buffer size is extending. Writing up to the next block
 		should wait for the extending finished. */
@@ -320,75 +379,50 @@ loop:
 
 		ut_ad(++count < 50);
 
+		log_mutex_enter();
 		goto loop;
 	}
 
 	/* Calculate an upper limit for the space the string may take in the
 	log buffer */
 
-	len_upper_limit = LOG_BUF_WRITE_MARGIN + (5 * len) / 4;
+	len_upper_limit = LOG_BUF_WRITE_MARGIN + srv_log_write_ahead_size
+			  + (5 * len) / 4;
 
-	if (log->buf_free + len_upper_limit > log->buf_size) {
+	if (log_sys->buf_free + len_upper_limit > log_sys->buf_size) {
+		log_mutex_exit();
 
-		mutex_exit(&(log->mutex));
+		DEBUG_SYNC_C("log_buf_size_exceeded");
 
-		/* Not enough free space, do a syncronous flush of the log
-		buffer */
-
-		log_buffer_flush_to_disk();
+		/* Not enough free space, do a write of the log buffer */
+		log_buffer_sync_in_background(false);
 
 		srv_stats.log_waits.inc();
 
 		ut_ad(++count < 50);
 
+		log_mutex_enter();
 		goto loop;
 	}
 
-#ifdef UNIV_LOG_ARCHIVE
-	if (log->archiving_state != LOG_ARCH_OFF) {
-
-		archived_lsn_age = log->lsn - log->archived_lsn;
-		if (archived_lsn_age + len_upper_limit
-		    > log->max_archived_lsn_age) {
-			/* Not enough free archived space in log groups: do a
-			synchronous archive write batch: */
-
-			mutex_exit(&(log->mutex));
-
-			ut_ad(len_upper_limit <= log->max_archived_lsn_age);
-
-			log_archive_do(TRUE, &dummy);
-
-			ut_ad(++count < 50);
-
-			goto loop;
-		}
-	}
-#endif /* UNIV_LOG_ARCHIVE */
-
-#ifdef UNIV_LOG_DEBUG
-	log->old_buf_free = log->buf_free;
-	log->old_lsn = log->lsn;
-#endif
-	return(log->lsn);
+	return(log_sys->lsn);
 }
 
 /************************************************************//**
 Writes to the log the string given. It is assumed that the caller holds the
 log mutex. */
-UNIV_INTERN
 void
 log_write_low(
 /*==========*/
-	byte*	str,		/*!< in: string */
-	ulint	str_len)	/*!< in: string length */
+	const byte*	str,		/*!< in: string */
+	ulint		str_len)	/*!< in: string length */
 {
 	log_t*	log	= log_sys;
 	ulint	len;
 	ulint	data_len;
 	byte*	log_block;
 
-	ut_ad(mutex_own(&(log->mutex)));
+	ut_ad(log_mutex_own());
 part_loop:
 	ut_ad(!recv_no_log_write);
 	/* Calculate a part length */
@@ -447,8 +481,7 @@ part_loop:
 
 /************************************************************//**
 Closes the log.
-@return	lsn */
-UNIV_INTERN
+@return lsn */
 lsn_t
 log_close(void)
 /*===========*/
@@ -460,7 +493,7 @@ log_close(void)
 	log_t*		log	= log_sys;
 	lsn_t		checkpoint_age;
 
-	ut_ad(mutex_own(&(log->mutex)));
+	ut_ad(log_mutex_own());
 	ut_ad(!recv_no_log_write);
 
 	lsn = log->lsn;
@@ -482,37 +515,26 @@ log_close(void)
 
 	if (log->buf_free > log->max_buf_free) {
 
-		log->check_flush_or_checkpoint = TRUE;
+		log->check_flush_or_checkpoint = true;
 	}
 
 	checkpoint_age = lsn - log->last_checkpoint_lsn;
 
 	if (checkpoint_age >= log->log_group_capacity) {
-		/* TODO: split btr_store_big_rec_extern_fields() into small
-		steps so that we can release all latches in the middle, and
-		call log_free_check() to ensure we never write over log written
-		after the latest checkpoint. In principle, we should split all
-		big_rec operations, but other operations are smaller. */
+		DBUG_EXECUTE_IF(
+			"print_all_chkp_warnings",
+			log_has_printed_chkp_warning = false;);
 
 		if (!log_has_printed_chkp_warning
 		    || difftime(time(NULL), log_last_warning_time) > 15) {
 
-			log_has_printed_chkp_warning = TRUE;
+			log_has_printed_chkp_warning = true;
 			log_last_warning_time = time(NULL);
 
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				" InnoDB: ERROR: the age of the last"
-				" checkpoint is " LSN_PF ",\n"
-				"InnoDB: which exceeds the log group"
-				" capacity " LSN_PF ".\n"
-				"InnoDB: If you are using big"
-				" BLOB or TEXT rows, you must set the\n"
-				"InnoDB: combined size of log files"
-				" at least 10 times bigger than the\n"
-				"InnoDB: largest such row.\n",
-				checkpoint_age,
-				log->log_group_capacity);
+			ib::error() << "The age of the last checkpoint is "
+				<< checkpoint_age << ", which exceeds the log"
+				" group capacity " << log->log_group_capacity
+				<< ".";
 		}
 	}
 
@@ -527,61 +549,25 @@ log_close(void)
 	    || lsn - oldest_lsn > log->max_modified_age_sync
 	    || checkpoint_age > log->max_checkpoint_age_async) {
 
-		log->check_flush_or_checkpoint = TRUE;
+		log->check_flush_or_checkpoint = true;
 	}
 function_exit:
 
-#ifdef UNIV_LOG_DEBUG
-	log_check_log_recs(log->buf + log->old_buf_free,
-			   log->buf_free - log->old_buf_free, log->old_lsn);
-#endif
-
 	return(lsn);
 }
 
-/******************************************************//**
-Pads the current log block full with dummy log records. Used in producing
-consistent archived log files and scrubbing redo log. */
-static
-void
-log_pad_current_log_block(void)
-/*===========================*/
-{
-	byte		b		= MLOG_DUMMY_RECORD;
-	ulint		pad_length;
-	ulint		i;
-	ib_uint64_t	lsn;
-
-	/* We retrieve lsn only because otherwise gcc crashed on HP-UX */
-	lsn = log_reserve_and_open(OS_FILE_LOG_BLOCK_SIZE);
-
-	pad_length = OS_FILE_LOG_BLOCK_SIZE
-		- (log_sys->buf_free % OS_FILE_LOG_BLOCK_SIZE)
-		- LOG_BLOCK_TRL_SIZE;
-
-	for (i = 0; i < pad_length; i++) {
-		log_write_low(&b, 1);
-	}
-
-	lsn = log_sys->lsn;
-
-	log_close();
-	log_release();
-
-	ut_a(lsn % OS_FILE_LOG_BLOCK_SIZE == LOG_BLOCK_HDR_SIZE);
-}
-
 /******************************************************//**
 Calculates the data capacity of a log group, when the log file headers are not
 included.
-@return	capacity in bytes */
-UNIV_INTERN
+@return capacity in bytes */
 lsn_t
 log_group_get_capacity(
 /*===================*/
 	const log_group_t*	group)	/*!< in: log group */
 {
-	ut_ad(mutex_own(&(log_sys->mutex)));
+	/* The lsn parameters are updated while holding both the mutexes
+	and it is ok to have either of them while reading */
+	ut_ad(log_mutex_own() || log_write_mutex_own());
 
 	return((group->file_size - LOG_FILE_HDR_SIZE) * group->n_files);
 }
@@ -589,7 +575,7 @@ log_group_get_capacity(
 /******************************************************//**
 Calculates the offset within a log group, when the log file headers are not
 included.
-@return	size offset (<= offset) */
+@return size offset (<= offset) */
 UNIV_INLINE
 lsn_t
 log_group_calc_size_offset(
@@ -598,7 +584,9 @@ log_group_calc_size_offset(
 					log group */
 	const log_group_t*	group)	/*!< in: log group */
 {
-	ut_ad(mutex_own(&(log_sys->mutex)));
+	/* The lsn parameters are updated while holding both the mutexes
+	and it is ok to have either of them while reading */
+	ut_ad(log_mutex_own() || log_write_mutex_own());
 
 	return(offset - LOG_FILE_HDR_SIZE * (1 + offset / group->file_size));
 }
@@ -606,7 +594,7 @@ log_group_calc_size_offset(
 /******************************************************//**
 Calculates the offset within a log group, when the log file headers are
 included.
-@return	real offset (>= offset) */
+@return real offset (>= offset) */
 UNIV_INLINE
 lsn_t
 log_group_calc_real_offset(
@@ -615,21 +603,22 @@ log_group_calc_real_offset(
 					log group */
 	const log_group_t*	group)	/*!< in: log group */
 {
-	ut_ad(mutex_own(&(log_sys->mutex)));
+	/* The lsn parameters are updated while holding both the mutexes
+	and it is ok to have either of them while reading */
+	ut_ad(log_mutex_own() || log_write_mutex_own());
 
 	return(offset + LOG_FILE_HDR_SIZE
 	       * (1 + offset / (group->file_size - LOG_FILE_HDR_SIZE)));
 }
 
-/******************************************************//**
-Calculates the offset of an lsn within a log group.
-@return	offset within the log group */
-static
+/** Calculate the offset of an lsn within a log group.
+@param[in]	lsn	log sequence number
+@param[in]	group	log group
+@return offset within the log group */
 lsn_t
 log_group_calc_lsn_offset(
-/*======================*/
-	lsn_t			lsn,	/*!< in: lsn */
-	const log_group_t*	group)	/*!< in: log group */
+	lsn_t			lsn,
+	const log_group_t*	group)
 {
 	lsn_t	gr_lsn;
 	lsn_t	gr_lsn_size_offset;
@@ -637,11 +626,14 @@ log_group_calc_lsn_offset(
 	lsn_t	group_size;
 	lsn_t	offset;
 
-	ut_ad(mutex_own(&(log_sys->mutex)));
+	/* The lsn parameters are updated while holding both the mutexes
+	and it is ok to have either of them while reading */
+	ut_ad(log_mutex_own() || log_write_mutex_own());
 
 	gr_lsn = group->lsn;
 
-	gr_lsn_size_offset = log_group_calc_size_offset(group->lsn_offset, group);
+	gr_lsn_size_offset = log_group_calc_size_offset(
+		group->lsn_offset, group);
 
 	group_size = log_group_get_capacity(group);
 
@@ -666,20 +658,13 @@ log_group_calc_lsn_offset(
 
 	return(log_group_calc_real_offset(offset, group));
 }
-#endif /* !UNIV_HOTBACKUP */
-
-#ifdef UNIV_DEBUG
-UNIV_INTERN ibool	log_debug_writes = FALSE;
-#endif /* UNIV_DEBUG */
-
 /*******************************************************************//**
 Calculates where in log files we find a specified lsn.
-@return	log file number */
-UNIV_INTERN
+@return log file number */
 ulint
 log_calc_where_lsn_is(
 /*==================*/
-	ib_int64_t*	log_file_offset,	/*!< out: offset in that file
+	int64_t*	log_file_offset,	/*!< out: offset in that file
 						(including the header) */
 	ib_uint64_t	first_header_lsn,	/*!< in: first log file start
 						lsn */
@@ -687,18 +672,18 @@ log_calc_where_lsn_is(
 						determine */
 	ulint		n_log_files,		/*!< in: total number of log
 						files */
-	ib_int64_t	log_file_size)		/*!< in: log file size
+	int64_t		log_file_size)		/*!< in: log file size
 						(including the header) */
 {
-	ib_int64_t	capacity	= log_file_size - LOG_FILE_HDR_SIZE;
+	int64_t		capacity	= log_file_size - LOG_FILE_HDR_SIZE;
 	ulint		file_no;
-	ib_int64_t	add_this_many;
+	int64_t		add_this_many;
 
 	if (lsn < first_header_lsn) {
 		add_this_many = 1 + (first_header_lsn - lsn)
-			/ (capacity * (ib_int64_t) n_log_files);
+			/ (capacity * static_cast<int64_t>(n_log_files));
 		lsn += add_this_many
-			* capacity * (ib_int64_t) n_log_files;
+			* capacity * static_cast<int64_t>(n_log_files);
 	}
 
 	ut_a(lsn >= first_header_lsn);
@@ -712,12 +697,11 @@ log_calc_where_lsn_is(
 	return(file_no);
 }
 
-#ifndef UNIV_HOTBACKUP
+
 /********************************************************//**
 Sets the field values in group to correspond to a given lsn. For this function
 to work, the values must already be correctly initialized to correspond to
 some lsn, for instance, a checkpoint lsn. */
-UNIV_INTERN
 void
 log_group_set_fields(
 /*=================*/
@@ -728,33 +712,31 @@ log_group_set_fields(
 	group->lsn_offset = log_group_calc_lsn_offset(lsn, group);
 	group->lsn = lsn;
 }
-
+#ifndef UNIV_HOTBACKUP
 /*****************************************************************//**
-Calculates the recommended highest values for lsn - last_checkpoint_lsn,
-lsn - buf_get_oldest_modification(), and lsn - max_archive_lsn_age.
-@return error value FALSE if the smallest log group is too small to
+Calculates the recommended highest values for lsn - last_checkpoint_lsn
+and lsn - buf_get_oldest_modification().
+@retval true on success
+@retval false if the smallest log group is too small to
 accommodate the number of OS threads in the database server */
-static
-ibool
+static MY_ATTRIBUTE((warn_unused_result))
+bool
 log_calc_max_ages(void)
 /*===================*/
 {
 	log_group_t*	group;
 	lsn_t		margin;
 	ulint		free;
-	ibool		success		= TRUE;
+	bool		success	= true;
 	lsn_t		smallest_capacity;
-	lsn_t		archive_margin;
-	lsn_t		smallest_archive_margin;
 
-	mutex_enter(&(log_sys->mutex));
+	log_mutex_enter();
 
 	group = UT_LIST_GET_FIRST(log_sys->log_groups);
 
 	ut_ad(group);
 
 	smallest_capacity = LSN_MAX;
-	smallest_archive_margin = LSN_MAX;
 
 	while (group) {
 		if (log_group_get_capacity(group) < smallest_capacity) {
@@ -762,15 +744,6 @@ log_calc_max_ages(void)
 			smallest_capacity = log_group_get_capacity(group);
 		}
 
-		archive_margin = log_group_get_capacity(group)
-			- (group->file_size - LOG_FILE_HDR_SIZE)
-			- LOG_ARCHIVE_EXTRA_MARGIN;
-
-		if (archive_margin < smallest_archive_margin) {
-
-			smallest_archive_margin = archive_margin;
-		}
-
 		group = UT_LIST_GET_NEXT(log_groups, group);
 	}
 
@@ -785,7 +758,7 @@ log_calc_max_ages(void)
 	free = LOG_CHECKPOINT_FREE_PER_THREAD * (10 + srv_thread_concurrency)
 		+ LOG_CHECKPOINT_EXTRA_FREE;
 	if (free >= smallest_capacity / 2) {
-		success = FALSE;
+		success = false;
 
 		goto failure;
 	} else {
@@ -805,34 +778,19 @@ log_calc_max_ages(void)
 		/ LOG_POOL_CHECKPOINT_RATIO_ASYNC;
 	log_sys->max_checkpoint_age = margin;
 
-#ifdef UNIV_LOG_ARCHIVE
-	log_sys->max_archived_lsn_age = smallest_archive_margin;
-
-	log_sys->max_archived_lsn_age_async = smallest_archive_margin
-		- smallest_archive_margin / LOG_ARCHIVE_RATIO_ASYNC;
-#endif /* UNIV_LOG_ARCHIVE */
 failure:
-	mutex_exit(&(log_sys->mutex));
+	log_mutex_exit();
 
 	if (!success) {
-		fprintf(stderr,
-			"InnoDB: Error: ib_logfiles are too small"
-			" for innodb_thread_concurrency %lu.\n"
-			"InnoDB: The combined size of ib_logfiles"
-			" should be bigger than\n"
-			"InnoDB: 200 kB * innodb_thread_concurrency.\n"
-			"InnoDB: To get mysqld to start up, set"
-			" innodb_thread_concurrency in my.cnf\n"
-			"InnoDB: to a lower value, for example, to 8."
-			" After an ERROR-FREE shutdown\n"
-			"InnoDB: of mysqld you can adjust the size of"
-			" ib_logfiles, as explained in\n"
-			"InnoDB: " REFMAN "adding-and-removing.html\n"
-			"InnoDB: Cannot continue operation."
-			" Calling exit(1).\n",
-			(ulong) srv_thread_concurrency);
-
-		exit(1);
+		ib::error() << "Cannot continue operation. ib_logfiles are too"
+			" small for innodb_thread_concurrency "
+			<< srv_thread_concurrency << ". The combined size of"
+			" ib_logfiles should be bigger than"
+			" 200 kB * innodb_thread_concurrency. To get mysqld"
+			" to start up, set innodb_thread_concurrency in"
+			" my.cnf to a lower value, for example, to 8. After"
+			" an ERROR-FREE shutdown of mysqld you can adjust"
+			" the size of ib_logfiles. " << INNODB_PARAMETERS_MSG;
 	}
 
 	return(success);
@@ -840,20 +798,16 @@ failure:
 
 /******************************************************//**
 Initializes the log. */
-UNIV_INTERN
 void
 log_init(void)
 /*==========*/
 {
-	log_sys = static_cast<log_t*>(mem_alloc(sizeof(log_t)));
+	log_sys = static_cast<log_t*>(ut_zalloc_nokey(sizeof(log_t)));
 
-	mutex_create(log_sys_mutex_key, &log_sys->mutex, SYNC_LOG);
+	mutex_create(LATCH_ID_LOG_SYS, &log_sys->mutex);
+	mutex_create(LATCH_ID_LOG_WRITE, &log_sys->write_mutex);
 
-	mutex_create(log_flush_order_mutex_key,
-		     &log_sys->log_flush_order_mutex,
-		     SYNC_LOG_FLUSH_ORDER);
-
-	mutex_enter(&(log_sys->mutex));
+	mutex_create(LATCH_ID_LOG_FLUSH_ORDER, &log_sys->log_flush_order_mutex);
 
 	/* Start the lsn from one log block from zero: this way every
 	log record has a start lsn != zero, a fact which we will use */
@@ -863,89 +817,46 @@ log_init(void)
 	ut_a(LOG_BUFFER_SIZE >= 16 * OS_FILE_LOG_BLOCK_SIZE);
 	ut_a(LOG_BUFFER_SIZE >= 4 * UNIV_PAGE_SIZE);
 
-	log_sys->buf_ptr = static_cast<byte*>(
-		mem_zalloc(LOG_BUFFER_SIZE + OS_FILE_LOG_BLOCK_SIZE));
+	log_sys->buf_size = LOG_BUFFER_SIZE;
 
+	log_sys->buf_ptr = static_cast<byte*>(
+		ut_zalloc_nokey(log_sys->buf_size * 2 + OS_FILE_LOG_BLOCK_SIZE));
 	log_sys->buf = static_cast<byte*>(
 		ut_align(log_sys->buf_ptr, OS_FILE_LOG_BLOCK_SIZE));
 
-	log_sys->buf_size = LOG_BUFFER_SIZE;
-	log_sys->is_extending = false;
+	log_sys->first_in_use = true;
 
 	log_sys->max_buf_free = log_sys->buf_size / LOG_BUF_FLUSH_RATIO
 		- LOG_BUF_FLUSH_MARGIN;
-	log_sys->check_flush_or_checkpoint = TRUE;
-	UT_LIST_INIT(log_sys->log_groups);
-
-	log_sys->n_log_ios = 0;
+	log_sys->check_flush_or_checkpoint = true;
+	UT_LIST_INIT(log_sys->log_groups, &log_group_t::log_groups);
 
 	log_sys->n_log_ios_old = log_sys->n_log_ios;
 	log_sys->last_printout_time = time(NULL);
 	/*----------------------------*/
 
-	log_sys->buf_next_to_write = 0;
+	log_sys->write_lsn = log_sys->lsn;
 
-	log_sys->write_lsn = 0;
-	log_sys->current_flush_lsn = 0;
-	log_sys->flushed_to_disk_lsn = 0;
+	log_sys->flush_event = os_event_create(0);
 
-	log_sys->written_to_some_lsn = log_sys->lsn;
-	log_sys->written_to_all_lsn = log_sys->lsn;
-
-	log_sys->n_pending_writes = 0;
-
-	log_sys->no_flush_event = os_event_create();
-
-	os_event_set(log_sys->no_flush_event);
-
-	log_sys->one_flushed_event = os_event_create();
-
-	os_event_set(log_sys->one_flushed_event);
+	os_event_set(log_sys->flush_event);
 
 	/*----------------------------*/
 
-	log_sys->next_checkpoint_no = 0;
 	log_sys->last_checkpoint_lsn = log_sys->lsn;
-	log_sys->n_pending_checkpoint_writes = 0;
 
-
-	rw_lock_create(checkpoint_lock_key, &log_sys->checkpoint_lock,
-		       SYNC_NO_ORDER_CHECK);
+	rw_lock_create(
+		checkpoint_lock_key, &log_sys->checkpoint_lock,
+		SYNC_NO_ORDER_CHECK);
 
 	log_sys->checkpoint_buf_ptr = static_cast<byte*>(
-		mem_zalloc(2 * OS_FILE_LOG_BLOCK_SIZE));
+		ut_zalloc_nokey(2 * OS_FILE_LOG_BLOCK_SIZE));
 
 	log_sys->checkpoint_buf = static_cast<byte*>(
 		ut_align(log_sys->checkpoint_buf_ptr, OS_FILE_LOG_BLOCK_SIZE));
 
 	/*----------------------------*/
 
-#ifdef UNIV_LOG_ARCHIVE
-	/* Under MySQL, log archiving is always off */
-	log_sys->archiving_state = LOG_ARCH_OFF;
-	log_sys->archived_lsn = log_sys->lsn;
-	log_sys->next_archived_lsn = 0;
-
-	log_sys->n_pending_archive_ios = 0;
-
-	rw_lock_create(archive_lock_key, &log_sys->archive_lock,
-		       SYNC_NO_ORDER_CHECK);
-
-	log_sys->archive_buf = NULL;
-
-	/* ut_align(
-	ut_malloc(LOG_ARCHIVE_BUF_SIZE
-	+ OS_FILE_LOG_BLOCK_SIZE),
-	OS_FILE_LOG_BLOCK_SIZE); */
-	log_sys->archive_buf_size = 0;
-
-	/* memset(log_sys->archive_buf, '\0', LOG_ARCHIVE_BUF_SIZE); */
-
-	log_sys->archiving_on = os_event_create();
-#endif /* UNIV_LOG_ARCHIVE */
-
-	/*----------------------------*/
-
 	log_block_init(log_sys->buf, log_sys->lsn);
 	log_block_set_first_rec_group(log_sys->buf, LOG_BLOCK_HDR_SIZE);
 
@@ -954,250 +865,90 @@ log_init(void)
 
 	MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE,
 		    log_sys->lsn - log_sys->last_checkpoint_lsn);
-
-	mutex_exit(&(log_sys->mutex));
-
-#ifdef UNIV_LOG_DEBUG
-	recv_sys_create();
-	recv_sys_init(buf_pool_get_curr_size());
-
-	recv_sys->parse_start_lsn = log_sys->lsn;
-	recv_sys->scanned_lsn = log_sys->lsn;
-	recv_sys->scanned_checkpoint_no = 0;
-	recv_sys->recovered_lsn = log_sys->lsn;
-	recv_sys->limit_lsn = LSN_MAX;
-#endif
 }
 
 /******************************************************************//**
-Inits a log group to the log system. */
-UNIV_INTERN
-void
+Inits a log group to the log system.
+@return true if success, false if not */
+MY_ATTRIBUTE((warn_unused_result))
+bool
 log_group_init(
 /*===========*/
 	ulint	id,			/*!< in: group id */
 	ulint	n_files,		/*!< in: number of log files */
 	lsn_t	file_size,		/*!< in: log file size in bytes */
-	ulint	space_id,		/*!< in: space id of the file space
+	ulint	space_id)		/*!< in: space id of the file space
 					which contains the log files of this
 					group */
-	ulint	archive_space_id MY_ATTRIBUTE((unused)))
-					/*!< in: space id of the file space
-					which contains some archived log
-					files for this group; currently, only
-					for the first log group this is
-					used */
 {
 	ulint	i;
-
 	log_group_t*	group;
 
-	group = static_cast<log_group_t*>(mem_alloc(sizeof(log_group_t)));
+	group = static_cast<log_group_t*>(ut_malloc_nokey(sizeof(log_group_t)));
 
 	group->id = id;
 	group->n_files = n_files;
+	group->format = LOG_HEADER_FORMAT_CURRENT;
 	group->file_size = file_size;
 	group->space_id = space_id;
 	group->state = LOG_GROUP_OK;
 	group->lsn = LOG_START_LSN;
 	group->lsn_offset = LOG_FILE_HDR_SIZE;
-	group->n_pending_writes = 0;
 
 	group->file_header_bufs_ptr = static_cast<byte**>(
-		mem_zalloc(sizeof(byte*) * n_files));
+		ut_zalloc_nokey(sizeof(byte*) * n_files));
 
 	group->file_header_bufs = static_cast<byte**>(
-		mem_zalloc(sizeof(byte**) * n_files));
-
-#ifdef UNIV_LOG_ARCHIVE
-	group->archive_file_header_bufs_ptr = static_cast<byte*>(
-		mem_zalloc( sizeof(byte*) * n_files));
-
-	group->archive_file_header_bufs = static_cast<byte*>(
-		mem_zalloc(sizeof(byte*) * n_files));
-#endif /* UNIV_LOG_ARCHIVE */
+		ut_zalloc_nokey(sizeof(byte**) * n_files));
 
 	for (i = 0; i < n_files; i++) {
 		group->file_header_bufs_ptr[i] = static_cast<byte*>(
-			mem_zalloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE));
+			ut_zalloc_nokey(LOG_FILE_HDR_SIZE
+					+ OS_FILE_LOG_BLOCK_SIZE));
 
 		group->file_header_bufs[i] = static_cast<byte*>(
 			ut_align(group->file_header_bufs_ptr[i],
 				 OS_FILE_LOG_BLOCK_SIZE));
-
-#ifdef UNIV_LOG_ARCHIVE
-		group->archive_file_header_bufs_ptr[i] = static_cast<byte*>(
-			mem_zalloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE));
-
-		group->archive_file_header_bufs[i] = static_cast<byte*>(
-			ut_align(group->archive_file_header_bufs_ptr[i],
-				 OS_FILE_LOG_BLOCK_SIZE));
-#endif /* UNIV_LOG_ARCHIVE */
 	}
 
-#ifdef UNIV_LOG_ARCHIVE
-	group->archive_space_id = archive_space_id;
-
-	group->archived_file_no = 0;
-	group->archived_offset = 0;
-#endif /* UNIV_LOG_ARCHIVE */
-
 	group->checkpoint_buf_ptr = static_cast<byte*>(
-		mem_zalloc(2 * OS_FILE_LOG_BLOCK_SIZE));
+		ut_zalloc_nokey(2 * OS_FILE_LOG_BLOCK_SIZE));
 
 	group->checkpoint_buf = static_cast<byte*>(
 		ut_align(group->checkpoint_buf_ptr,OS_FILE_LOG_BLOCK_SIZE));
 
-	UT_LIST_ADD_LAST(log_groups, log_sys->log_groups, group);
-
-	ut_a(log_calc_max_ages());
+	UT_LIST_ADD_LAST(log_sys->log_groups, group);
+	return(log_calc_max_ages());
 }
-
-/******************************************************************//**
-Does the unlockings needed in flush i/o completion. */
-UNIV_INLINE
-void
-log_flush_do_unlocks(
-/*=================*/
-	ulint	code)	/*!< in: any ORed combination of LOG_UNLOCK_FLUSH_LOCK
-			and LOG_UNLOCK_NONE_FLUSHED_LOCK */
-{
-	ut_ad(mutex_own(&(log_sys->mutex)));
-
-	/* NOTE that we must own the log mutex when doing the setting of the
-	events: this is because transactions will wait for these events to
-	be set, and at that moment the log flush they were waiting for must
-	have ended. If the log mutex were not reserved here, the i/o-thread
-	calling this function might be preempted for a while, and when it
-	resumed execution, it might be that a new flush had been started, and
-	this function would erroneously signal the NEW flush as completed.
-	Thus, the changes in the state of these events are performed
-	atomically in conjunction with the changes in the state of
-	log_sys->n_pending_writes etc. */
-
-	if (code & LOG_UNLOCK_NONE_FLUSHED_LOCK) {
-		os_event_set(log_sys->one_flushed_event);
-	}
-
-	if (code & LOG_UNLOCK_FLUSH_LOCK) {
-		os_event_set(log_sys->no_flush_event);
-	}
-}
-
-/******************************************************************//**
-Checks if a flush is completed for a log group and does the completion
-routine if yes.
-@return	LOG_UNLOCK_NONE_FLUSHED_LOCK or 0 */
-UNIV_INLINE
-ulint
-log_group_check_flush_completion(
-/*=============================*/
-	log_group_t*	group)	/*!< in: log group */
-{
-	ut_ad(mutex_own(&(log_sys->mutex)));
-
-	if (!log_sys->one_flushed && group->n_pending_writes == 0) {
-#ifdef UNIV_DEBUG
-		if (log_debug_writes) {
-			fprintf(stderr,
-				"Log flushed first to group %lu\n",
-				(ulong) group->id);
-		}
-#endif /* UNIV_DEBUG */
-		log_sys->written_to_some_lsn = log_sys->write_lsn;
-		log_sys->one_flushed = TRUE;
-
-		return(LOG_UNLOCK_NONE_FLUSHED_LOCK);
-	}
-
-#ifdef UNIV_DEBUG
-	if (log_debug_writes && (group->n_pending_writes == 0)) {
-
-		fprintf(stderr, "Log flushed to group %lu\n",
-			(ulong) group->id);
-	}
-#endif /* UNIV_DEBUG */
-	return(0);
-}
-
-/******************************************************//**
-Checks if a flush is completed and does the completion routine if yes.
-@return	LOG_UNLOCK_FLUSH_LOCK or 0 */
-static
-ulint
-log_sys_check_flush_completion(void)
-/*================================*/
-{
-	ulint	move_start;
-	ulint	move_end;
-
-	ut_ad(mutex_own(&(log_sys->mutex)));
-
-	if (log_sys->n_pending_writes == 0) {
-
-		log_sys->written_to_all_lsn = log_sys->write_lsn;
-		log_sys->buf_next_to_write = log_sys->write_end_offset;
-
-		if (log_sys->write_end_offset > log_sys->max_buf_free / 2) {
-			/* Move the log buffer content to the start of the
-			buffer */
-
-			move_start = ut_calc_align_down(
-				log_sys->write_end_offset,
-				OS_FILE_LOG_BLOCK_SIZE);
-			move_end = ut_calc_align(log_sys->buf_free,
-						 OS_FILE_LOG_BLOCK_SIZE);
-
-			ut_memmove(log_sys->buf, log_sys->buf + move_start,
-				   move_end - move_start);
-			log_sys->buf_free -= move_start;
-
-			log_sys->buf_next_to_write -= move_start;
-		}
-
-		return(LOG_UNLOCK_FLUSH_LOCK);
-	}
-
-	return(0);
-}
-
+#endif /* !UNIV_HOTBACKUP */
 /******************************************************//**
 Completes an i/o to a log file. */
-UNIV_INTERN
 void
 log_io_complete(
 /*============*/
 	log_group_t*	group)	/*!< in: log group or a dummy pointer */
 {
-	ulint	unlock;
-
-#ifdef UNIV_LOG_ARCHIVE
-	if ((byte*) group == &log_archive_io) {
-		/* It was an archive write */
-
-		log_io_complete_archive();
-
-		return;
-	}
-#endif /* UNIV_LOG_ARCHIVE */
-
 	if ((ulint) group & 0x1UL) {
 		/* It was a checkpoint write */
 		group = (log_group_t*)((ulint) group - 1);
 
-		if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
-		    && srv_unix_file_flush_method != SRV_UNIX_NOSYNC) {
-
+#ifdef _WIN32
+		fil_flush(group->space_id);
+#else
+		switch (srv_unix_file_flush_method) {
+		case SRV_UNIX_O_DSYNC:
+		case SRV_UNIX_NOSYNC:
+			break;
+		case SRV_UNIX_FSYNC:
+		case SRV_UNIX_LITTLESYNC:
+		case SRV_UNIX_O_DIRECT:
+		case SRV_UNIX_O_DIRECT_NO_FSYNC:
 			fil_flush(group->space_id);
 		}
+#endif /* _WIN32 */
 
-#ifdef UNIV_DEBUG
-		if (log_debug_writes) {
-			fprintf(stderr,
-				"Checkpoint info written to group %lu\n",
-				group->id);
-		}
-#endif /* UNIV_DEBUG */
+		DBUG_PRINT("ib_log", ("checkpoint info written to group %u",
+				      unsigned(group->id)));
 		log_io_complete_checkpoint();
 
 		return;
@@ -1205,30 +956,6 @@ log_io_complete(
 
 	ut_error;	/*!< We currently use synchronous writing of the
 			logs and cannot end up here! */
-
-	if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
-	    && srv_unix_file_flush_method != SRV_UNIX_NOSYNC
-	    && srv_flush_log_at_trx_commit != 2) {
-
-		fil_flush(group->space_id);
-	}
-
-	mutex_enter(&(log_sys->mutex));
-	ut_ad(!recv_no_log_write);
-
-	ut_a(group->n_pending_writes > 0);
-	ut_a(log_sys->n_pending_writes > 0);
-
-	group->n_pending_writes--;
-	log_sys->n_pending_writes--;
-	MONITOR_DEC(MONITOR_PENDING_LOG_WRITE);
-
-	unlock = log_group_check_flush_completion(group);
-	unlock = unlock | log_sys_check_flush_completion();
-
-	log_flush_do_unlocks(unlock);
-
-	mutex_exit(&(log_sys->mutex));
 }
 
 /******************************************************//**
@@ -1246,46 +973,45 @@ log_group_file_header_flush(
 	byte*	buf;
 	lsn_t	dest_offset;
 
-	ut_ad(mutex_own(&(log_sys->mutex)));
+	ut_ad(log_write_mutex_own());
 	ut_ad(!recv_no_log_write);
+	ut_ad(group->id == 0);
 	ut_a(nth_file < group->n_files);
 
 	buf = *(group->file_header_bufs + nth_file);
 
-	mach_write_to_4(buf + LOG_GROUP_ID, group->id);
-	mach_write_to_8(buf + LOG_FILE_START_LSN, start_lsn);
-
-	/* Wipe over possible label of mysqlbackup --restore */
-	memcpy(buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP, "    ", 4);
+	memset(buf, 0, OS_FILE_LOG_BLOCK_SIZE);
+	mach_write_to_4(buf + LOG_HEADER_FORMAT, LOG_HEADER_FORMAT_CURRENT);
+	mach_write_to_8(buf + LOG_HEADER_START_LSN, start_lsn);
+	strcpy(reinterpret_cast<char*>(buf) + LOG_HEADER_CREATOR,
+	       LOG_HEADER_CREATOR_CURRENT);
+	ut_ad(LOG_HEADER_CREATOR_END - LOG_HEADER_CREATOR
+	      >= sizeof LOG_HEADER_CREATOR_CURRENT);
+	log_block_set_checksum(buf, log_block_calc_checksum_crc32(buf));
 
 	dest_offset = nth_file * group->file_size;
 
-#ifdef UNIV_DEBUG
-	if (log_debug_writes) {
-		fprintf(stderr,
-			"Writing log file header to group %lu file %lu\n",
-			(ulong) group->id, (ulong) nth_file);
-	}
-#endif /* UNIV_DEBUG */
-	if (log_do_write) {
- 	       DBUG_PRINT("ib_log", ("write " LSN_PF
-                                     " group " ULINTPF
-                                     " file " ULINTPF " header",
-                                     start_lsn, group->id, nth_file));
-		log_sys->n_log_ios++;
+	DBUG_PRINT("ib_log", ("write " LSN_PF
+			      " group " ULINTPF
+			      " file " ULINTPF " header",
+			      start_lsn, group->id, nth_file));
 
-		MONITOR_INC(MONITOR_LOG_IO);
+	log_sys->n_log_ios++;
 
-		srv_stats.os_log_pending_writes.inc();
+	MONITOR_INC(MONITOR_LOG_IO);
 
-		fil_io(OS_FILE_WRITE | OS_FILE_LOG, true, group->space_id, 0,
-		       (ulint) (dest_offset / UNIV_PAGE_SIZE),
-		       (ulint) (dest_offset % UNIV_PAGE_SIZE),
-		       OS_FILE_LOG_BLOCK_SIZE,
-		       buf, group, 0);
+	srv_stats.os_log_pending_writes.inc();
 
-		srv_stats.os_log_pending_writes.dec();
-	}
+	const ulint	page_no
+		= (ulint) (dest_offset / univ_page_size.physical());
+
+	fil_io(IORequestLogWrite, true,
+	       page_id_t(group->space_id, page_no),
+	       univ_page_size,
+	       (ulint) (dest_offset % univ_page_size.physical()),
+	       OS_FILE_LOG_BLOCK_SIZE, buf, group, NULL);
+
+	srv_stats.os_log_pending_writes.dec();
 }
 
 /******************************************************//**
@@ -1303,7 +1029,7 @@ log_block_store_checksum(
 
 /******************************************************//**
 Writes a buffer to a log file group. */
-UNIV_INTERN
+static
 void
 log_group_write_buf(
 /*================*/
@@ -1311,6 +1037,9 @@ log_group_write_buf(
 	byte*		buf,		/*!< in: buffer */
 	ulint		len,		/*!< in: buffer len; must be divisible
 					by OS_FILE_LOG_BLOCK_SIZE */
+#ifdef UNIV_DEBUG
+	ulint		pad_len,	/*!< in: pad len in the buffer len */
+#endif /* UNIV_DEBUG */
 	lsn_t		start_lsn,	/*!< in: start lsn of the buffer; must
 					be divisible by
 					OS_FILE_LOG_BLOCK_SIZE */
@@ -1320,20 +1049,15 @@ log_group_write_buf(
 					header */
 {
 	ulint		write_len;
-	ibool		write_header;
+	bool		write_header	= new_data_offset == 0;
 	lsn_t		next_offset;
 	ulint		i;
 
-	ut_ad(mutex_own(&(log_sys->mutex)));
+	ut_ad(log_write_mutex_own());
 	ut_ad(!recv_no_log_write);
 	ut_a(len % OS_FILE_LOG_BLOCK_SIZE == 0);
 	ut_a(start_lsn % OS_FILE_LOG_BLOCK_SIZE == 0);
 
-	if (new_data_offset == 0) {
-		write_header = TRUE;
-	} else {
-		write_header = FALSE;
-	}
 loop:
 	if (len == 0) {
 
@@ -1342,8 +1066,8 @@ loop:
 
 	next_offset = log_group_calc_lsn_offset(start_lsn, group);
 
-	if ((next_offset % group->file_size == LOG_FILE_HDR_SIZE)
-	    && write_header) {
+	if (write_header
+	    && next_offset % group->file_size == LOG_FILE_HDR_SIZE) {
 		/* We start to write a new log file instance in the group */
 
 		ut_a(next_offset / group->file_size <= ULINT_MAX);
@@ -1366,101 +1090,144 @@ loop:
 		write_len = len;
 	}
 
-#ifdef UNIV_DEBUG
-	if (log_debug_writes) {
+	DBUG_PRINT("ib_log",
+		   ("write " LSN_PF " to " LSN_PF
+		    ": group " ULINTPF " len " ULINTPF
+		    " blocks " ULINTPF ".." ULINTPF,
+		    start_lsn, next_offset,
+		    group->id, write_len,
+		    log_block_get_hdr_no(buf),
+		    log_block_get_hdr_no(
+			    buf + write_len
+			    - OS_FILE_LOG_BLOCK_SIZE)));
 
-		fprintf(stderr,
-			"Writing log file segment to group %lu"
-			" offset " LSN_PF " len %lu\n"
-			"start lsn " LSN_PF "\n"
-			"First block n:o %lu last block n:o %lu\n",
-			(ulong) group->id, next_offset,
-			write_len,
-			start_lsn,
-			(ulong) log_block_get_hdr_no(buf),
-			(ulong) log_block_get_hdr_no(
-				buf + write_len - OS_FILE_LOG_BLOCK_SIZE));
-		ut_a(log_block_get_hdr_no(buf)
-		     == log_block_convert_lsn_to_no(start_lsn));
+	ut_ad(pad_len >= len
+	      || log_block_get_hdr_no(buf)
+		 == log_block_convert_lsn_to_no(start_lsn));
 
-		for (i = 0; i < write_len / OS_FILE_LOG_BLOCK_SIZE; i++) {
-
-			ut_a(log_block_get_hdr_no(buf) + i
-			     == log_block_get_hdr_no(
-				     buf + i * OS_FILE_LOG_BLOCK_SIZE));
-		}
-	}
-#endif /* UNIV_DEBUG */
 	/* Calculate the checksums for each log block and write them to
 	the trailer fields of the log blocks */
 
 	for (i = 0; i < write_len / OS_FILE_LOG_BLOCK_SIZE; i++) {
+		ut_ad(pad_len >= len
+		      || i * OS_FILE_LOG_BLOCK_SIZE >= len - pad_len
+		      || log_block_get_hdr_no(
+			      buf + i * OS_FILE_LOG_BLOCK_SIZE)
+			 == log_block_get_hdr_no(buf) + i);
 		log_block_store_checksum(buf + i * OS_FILE_LOG_BLOCK_SIZE);
 	}
 
-	if (log_do_write) {
-		log_sys->n_log_ios++;
+	log_sys->n_log_ios++;
 
-		MONITOR_INC(MONITOR_LOG_IO);
+	MONITOR_INC(MONITOR_LOG_IO);
 
-		srv_stats.os_log_pending_writes.inc();
+	srv_stats.os_log_pending_writes.inc();
 
-		ut_a(next_offset / UNIV_PAGE_SIZE <= ULINT_MAX);
+	ut_a(next_offset / UNIV_PAGE_SIZE <= ULINT_MAX);
 
-		log_encrypt_before_write(log_sys->next_checkpoint_no,
-					 buf, write_len);
+	log_encrypt_before_write(log_sys->next_checkpoint_no,
+				buf, write_len);
+	const ulint	page_no
+		= (ulint) (next_offset / univ_page_size.physical());
 
-		fil_io(OS_FILE_WRITE | OS_FILE_LOG, true, group->space_id, 0,
-		       (ulint) (next_offset / UNIV_PAGE_SIZE),
-		       (ulint) (next_offset % UNIV_PAGE_SIZE), write_len, buf,
-			group, 0);
+	fil_io(IORequestLogWrite, true,
+	       page_id_t(group->space_id, page_no),
+	       univ_page_size,
+	       (ulint) (next_offset % UNIV_PAGE_SIZE), write_len, buf,
+	       group, NULL);
 
-		srv_stats.os_log_pending_writes.dec();
+	srv_stats.os_log_pending_writes.dec();
 
-		srv_stats.os_log_written.add(write_len);
-		srv_stats.log_writes.inc();
-	}
+	srv_stats.os_log_written.add(write_len);
+	srv_stats.log_writes.inc();
 
 	if (write_len < len) {
 		start_lsn += write_len;
 		len -= write_len;
 		buf += write_len;
 
-		write_header = TRUE;
+		write_header = true;
 
 		goto loop;
 	}
 }
 
-/******************************************************//**
-This function is called, e.g., when a transaction wants to commit. It checks
-that the log has been written to the log file up to the last log entry written
-by the transaction. If there is a flush running, it waits and checks if the
-flush flushed enough. If not, starts a new flush. */
-UNIV_INTERN
+/** Flush the log has been written to the log file. */
+static
+void
+log_write_flush_to_disk_low()
+{
+	ut_a(log_sys->n_pending_flushes == 1); /* No other threads here */
+
+#ifndef _WIN32
+	bool	do_flush = srv_unix_file_flush_method != SRV_UNIX_O_DSYNC;
+#else
+	bool	do_flush = true;
+#endif
+	if (do_flush) {
+		log_group_t*	group = UT_LIST_GET_FIRST(log_sys->log_groups);
+		fil_flush(group->space_id);
+		log_sys->flushed_to_disk_lsn = log_sys->current_flush_lsn;
+	}
+
+	log_sys->n_pending_flushes--;
+	MONITOR_DEC(MONITOR_PENDING_LOG_FLUSH);
+
+	os_event_set(log_sys->flush_event);
+}
+
+/** Switch the log buffer in use, and copy the content of last block
+from old log buffer to the head of the to be used one. Thus, buf_free and
+buf_next_to_write would be changed accordingly */
+static inline
+void
+log_buffer_switch()
+{
+	ut_ad(log_mutex_own());
+	ut_ad(log_write_mutex_own());
+
+	const byte*	old_buf = log_sys->buf;
+	ulint		area_end = ut_calc_align(log_sys->buf_free,
+						 OS_FILE_LOG_BLOCK_SIZE);
+
+	if (log_sys->first_in_use) {
+		ut_ad(log_sys->buf == ut_align(log_sys->buf_ptr,
+					       OS_FILE_LOG_BLOCK_SIZE));
+		log_sys->buf += log_sys->buf_size;
+	} else {
+		log_sys->buf -= log_sys->buf_size;
+		ut_ad(log_sys->buf == ut_align(log_sys->buf_ptr,
+					       OS_FILE_LOG_BLOCK_SIZE));
+	}
+
+	log_sys->first_in_use = !log_sys->first_in_use;
+
+	/* Copy the last block to new buf */
+	ut_memcpy(log_sys->buf,
+		  old_buf + area_end - OS_FILE_LOG_BLOCK_SIZE,
+		  OS_FILE_LOG_BLOCK_SIZE);
+
+	log_sys->buf_free %= OS_FILE_LOG_BLOCK_SIZE;
+	log_sys->buf_next_to_write = log_sys->buf_free;
+}
+
+/** Ensure that the log has been written to the log file up to a given
+log entry (such as that of a transaction commit). Start a new write, or
+wait and check if an already running write is covering the request.
+@param[in]	lsn		log sequence number that should be
+included in the redo log file write
+@param[in]	flush_to_disk	whether the written log should also
+be flushed to the file system */
 void
 log_write_up_to(
-/*============*/
-	lsn_t	lsn,	/*!< in: log sequence number up to which
-			the log should be written,
-			LSN_MAX if not specified */
-	ulint	wait,	/*!< in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP,
-			or LOG_WAIT_ALL_GROUPS */
-	ibool	flush_to_disk)
-			/*!< in: TRUE if we want the written log
-			also to be flushed to disk */
+	lsn_t	lsn,
+	bool	flush_to_disk)
 {
-	log_group_t*	group;
-	ulint		start_offset;
-	ulint		end_offset;
-	ulint		area_start;
-	ulint		area_end;
 #ifdef UNIV_DEBUG
 	ulint		loop_count	= 0;
 #endif /* UNIV_DEBUG */
-	ulint		unlock;
-	ib_uint64_t	write_lsn;
-	ib_uint64_t	flush_lsn;
+	byte*           write_buf;
+	lsn_t           write_lsn;
 
 	ut_ad(!srv_read_only_mode);
 
@@ -1472,97 +1239,103 @@ log_write_up_to(
 	}
 
 loop:
-#ifdef UNIV_DEBUG
-	loop_count++;
+	ut_ad(++loop_count < 128);
 
-	ut_ad(loop_count < 5);
-
-# if 0
-	if (loop_count > 2) {
-		fprintf(stderr, "Log loop count %lu\n", loop_count);
+#if UNIV_WORD_SIZE > 7
+	/* We can do a dirty read of LSN. */
+	/* NOTE: Currently doesn't do dirty read for
+	(flush_to_disk == true) case, because the log_mutex
+	contention also works as the arbitrator for write-IO
+	(fsync) bandwidth between log files and data files. */
+	os_rmb;
+	if (!flush_to_disk && log_sys->write_lsn >= lsn) {
+		return;
 	}
-# endif
 #endif
 
-	mutex_enter(&(log_sys->mutex));
+	log_write_mutex_enter();
 	ut_ad(!recv_no_log_write);
 
-	if (flush_to_disk
-	    && log_sys->flushed_to_disk_lsn >= lsn) {
-
-		mutex_exit(&(log_sys->mutex));
+	lsn_t	limit_lsn = flush_to_disk
+		? log_sys->flushed_to_disk_lsn
+		: log_sys->write_lsn;
 
+	if (limit_lsn >= lsn) {
+		log_write_mutex_exit();
 		return;
 	}
 
-	if (!flush_to_disk
-	    && (log_sys->written_to_all_lsn >= lsn
-		|| (log_sys->written_to_some_lsn >= lsn
-		    && wait != LOG_WAIT_ALL_GROUPS))) {
-
-		mutex_exit(&(log_sys->mutex));
-
-		return;
-	}
-
-	if (log_sys->n_pending_writes > 0) {
-		/* A write (+ possibly flush to disk) is running */
-
-		if (flush_to_disk
-		    && log_sys->current_flush_lsn >= lsn) {
-			/* The write + flush will write enough: wait for it to
-			complete */
-
-			goto do_waits;
-		}
-
-		if (!flush_to_disk
-		    && log_sys->write_lsn >= lsn) {
-			/* The write will write enough: wait for it to
-			complete */
-
-			goto do_waits;
-		}
-
-		mutex_exit(&(log_sys->mutex));
-
-		/* Wait for the write to complete and try to start a new
-		write */
-
-		os_event_wait(log_sys->no_flush_event);
-
+#ifdef _WIN32
+# ifndef UNIV_HOTBACKUP
+	/* write requests during fil_flush() might not be good for Windows */
+	if (log_sys->n_pending_flushes > 0
+	    || !os_event_is_set(log_sys->flush_event)) {
+		log_write_mutex_exit();
+		os_event_wait(log_sys->flush_event);
 		goto loop;
 	}
+# else
+	if (log_sys->n_pending_flushes > 0) {
+		goto loop;
+	}
+# endif  /* !UNIV_HOTBACKUP */
+#endif /* _WIN32 */
 
+	/* If it is a write call we should just go ahead and do it
+	as we checked that write_lsn is not where we'd like it to
+	be. If we have to flush as well then we check if there is a
+	pending flush and based on that we wait for it to finish
+	before proceeding further. */
+	if (flush_to_disk
+	    && (log_sys->n_pending_flushes > 0
+		|| !os_event_is_set(log_sys->flush_event))) {
+		/* Figure out if the current flush will do the job
+		for us. */
+		bool work_done = log_sys->current_flush_lsn >= lsn;
+
+		log_write_mutex_exit();
+
+		os_event_wait(log_sys->flush_event);
+
+		if (work_done) {
+			return;
+		} else {
+			goto loop;
+		}
+	}
+
+	log_mutex_enter();
 	if (!flush_to_disk
 	    && log_sys->buf_free == log_sys->buf_next_to_write) {
 		/* Nothing to write and no flush to disk requested */
-
-		mutex_exit(&(log_sys->mutex));
-
+		log_mutex_exit_all();
 		return;
 	}
 
+	log_group_t*	group;
+	ulint		start_offset;
+	ulint		end_offset;
+	ulint		area_start;
+	ulint		area_end;
+	ulong		write_ahead_size = srv_log_write_ahead_size;
+	ulint		pad_size;
+
 	DBUG_PRINT("ib_log", ("write " LSN_PF " to " LSN_PF,
-			      log_sys->written_to_all_lsn,
+			      log_sys->write_lsn,
 			      log_sys->lsn));
-#ifdef UNIV_DEBUG
-	if (log_debug_writes) {
-		fprintf(stderr,
-			"Writing log from " LSN_PF " up to lsn " LSN_PF "\n",
-			log_sys->written_to_all_lsn,
-			log_sys->lsn);
+	if (flush_to_disk) {
+		log_sys->n_pending_flushes++;
+		log_sys->current_flush_lsn = log_sys->lsn;
+		MONITOR_INC(MONITOR_PENDING_LOG_FLUSH);
+		os_event_reset(log_sys->flush_event);
+
+		if (log_sys->buf_free == log_sys->buf_next_to_write) {
+			/* Nothing to write, flush only */
+			log_mutex_exit_all();
+			log_write_flush_to_disk_low();
+			return;
+		}
 	}
-#endif /* UNIV_DEBUG */
-	log_sys->n_pending_writes++;
-	MONITOR_INC(MONITOR_PENDING_LOG_WRITE);
-
-	group = UT_LIST_GET_FIRST(log_sys->log_groups);
-	group->n_pending_writes++;	/*!< We assume here that we have only
-					one log group! */
-
-	os_event_reset(log_sys->no_flush_event);
-	os_event_reset(log_sys->one_flushed_event);
 
 	start_offset = log_sys->buf_next_to_write;
 	end_offset = log_sys->buf_free;
@@ -1572,124 +1345,87 @@ loop:
 
 	ut_ad(area_end - area_start > 0);
 
-	log_sys->write_lsn = log_sys->lsn;
-
-	if (flush_to_disk) {
-		log_sys->current_flush_lsn = log_sys->lsn;
-	}
-
-	log_sys->one_flushed = FALSE;
-
 	log_block_set_flush_bit(log_sys->buf + area_start, TRUE);
 	log_block_set_checkpoint_no(
 		log_sys->buf + area_end - OS_FILE_LOG_BLOCK_SIZE,
 		log_sys->next_checkpoint_no);
 
-	/* Copy the last, incompletely written, log block a log block length
-	up, so that when the flush operation writes from the log buffer, the
-	segment to write will not be changed by writers to the log */
+	write_lsn = log_sys->lsn;
+	write_buf = log_sys->buf;
 
-	ut_memcpy(log_sys->buf + area_end,
-		  log_sys->buf + area_end - OS_FILE_LOG_BLOCK_SIZE,
-		  OS_FILE_LOG_BLOCK_SIZE);
-
-	log_sys->buf_free += OS_FILE_LOG_BLOCK_SIZE;
-	log_sys->write_end_offset = log_sys->buf_free;
+	log_buffer_switch();
 
 	group = UT_LIST_GET_FIRST(log_sys->log_groups);
 
+	log_group_set_fields(group, log_sys->write_lsn);
+
+	log_mutex_exit();
+	/* Calculate pad_size if needed. */
+	pad_size = 0;
+	if (write_ahead_size > OS_FILE_LOG_BLOCK_SIZE) {
+		lsn_t	end_offset;
+		ulint	end_offset_in_unit;
+		end_offset = log_group_calc_lsn_offset(
+			ut_uint64_align_up(write_lsn,
+					   OS_FILE_LOG_BLOCK_SIZE),
+			group);
+		end_offset_in_unit = (ulint) (end_offset % write_ahead_size);
+
+		if (end_offset_in_unit > 0
+		    && (area_end - area_start) > end_offset_in_unit) {
+			/* The first block in the unit was initialized
+			after the last writing.
+			Needs to be written padded data once. */
+			pad_size = write_ahead_size - end_offset_in_unit;
+
+			if (area_end + pad_size > log_sys->buf_size) {
+				pad_size = log_sys->buf_size - area_end;
+			}
+
+			::memset(write_buf + area_end, 0, pad_size);
+		}
+	}
 	/* Do the write to the log files */
-
-	while (group) {
-		log_group_write_buf(
-			group, log_sys->buf + area_start,
-			area_end - area_start,
-			ut_uint64_align_down(log_sys->written_to_all_lsn,
-					     OS_FILE_LOG_BLOCK_SIZE),
-			start_offset - area_start);
-
-		log_group_set_fields(group, log_sys->write_lsn);
-
-		group = UT_LIST_GET_NEXT(log_groups, group);
-	}
-
-	mutex_exit(&(log_sys->mutex));
-
-	if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) {
-		/* O_DSYNC means the OS did not buffer the log file at all:
-		so we have also flushed to disk what we have written */
-
-		log_sys->flushed_to_disk_lsn = log_sys->write_lsn;
-
-	} else if (flush_to_disk) {
-
-		group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
-		fil_flush(group->space_id);
-		log_sys->flushed_to_disk_lsn = log_sys->write_lsn;
-	}
-
-	mutex_enter(&(log_sys->mutex));
-
-	group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
-	ut_a(group->n_pending_writes == 1);
-	ut_a(log_sys->n_pending_writes == 1);
-
-	group->n_pending_writes--;
-	log_sys->n_pending_writes--;
-	MONITOR_DEC(MONITOR_PENDING_LOG_WRITE);
-
-	unlock = log_group_check_flush_completion(group);
-	unlock = unlock | log_sys_check_flush_completion();
-
-	log_flush_do_unlocks(unlock);
-
-	write_lsn = log_sys->write_lsn;
-	flush_lsn = log_sys->flushed_to_disk_lsn;
-
-	mutex_exit(&(log_sys->mutex));
-
-	innobase_mysql_log_notify(write_lsn, flush_lsn);
-
-	return;
-
-do_waits:
-	mutex_exit(&(log_sys->mutex));
-
-	switch (wait) {
-	case LOG_WAIT_ONE_GROUP:
-		os_event_wait(log_sys->one_flushed_event);
-		break;
-	case LOG_WAIT_ALL_GROUPS:
-		os_event_wait(log_sys->no_flush_event);
-		break;
+	log_group_write_buf(
+		group, write_buf + area_start,
+		area_end - area_start + pad_size,
 #ifdef UNIV_DEBUG
-	case LOG_NO_WAIT:
-		break;
-	default:
-		ut_error;
+		pad_size,
 #endif /* UNIV_DEBUG */
+		ut_uint64_align_down(log_sys->write_lsn,
+				     OS_FILE_LOG_BLOCK_SIZE),
+		start_offset - area_start);
+	srv_stats.log_padded.add(pad_size);
+	log_sys->write_lsn = write_lsn;
+
+#ifndef _WIN32
+	if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) {
+		/* O_SYNC means the OS did not buffer the log file at all:
+		so we have also flushed to disk what we have written */
+		log_sys->flushed_to_disk_lsn = log_sys->write_lsn;
+	}
+#endif /* !_WIN32 */
+
+	log_write_mutex_exit();
+
+	if (flush_to_disk) {
+		log_write_flush_to_disk_low();
+		ib_uint64_t write_lsn = log_sys->write_lsn;
+		ib_uint64_t flush_lsn = log_sys->flushed_to_disk_lsn;
+
+		innobase_mysql_log_notify(write_lsn, flush_lsn);
 	}
 }
 
-/****************************************************************//**
-Does a syncronous flush of the log buffer to disk. */
-UNIV_INTERN
+/** write to the log file up to the last log entry.
+@param[in]	sync	whether we want the written log
+also to be flushed to disk. */
 void
-log_buffer_flush_to_disk(void)
-/*==========================*/
+log_buffer_flush_to_disk(
+	bool sync)
 {
-	lsn_t	lsn;
-
 	ut_ad(!srv_read_only_mode);
-	mutex_enter(&(log_sys->mutex));
-
-	lsn = log_sys->lsn;
-
-	mutex_exit(&(log_sys->mutex));
-
-	log_write_up_to(lsn, LOG_WAIT_ALL_GROUPS, TRUE);
+	log_write_up_to(log_get_lsn(), sync);
 }
 
 /****************************************************************//**
@@ -1697,21 +1433,28 @@ This functions writes the log buffer to the log file and if 'flush'
 is set it forces a flush of the log file as well. This is meant to be
 called from background master thread only as it does not wait for
 the write (+ possible flush) to finish. */
-UNIV_INTERN
 void
 log_buffer_sync_in_background(
 /*==========================*/
-	ibool	flush)	/*!< in: flush the logs to disk */
+	bool	flush)	/*!< in: flush the logs to disk */
 {
 	lsn_t	lsn;
 
-	mutex_enter(&(log_sys->mutex));
+	log_mutex_enter();
 
 	lsn = log_sys->lsn;
 
-	mutex_exit(&(log_sys->mutex));
+	if (flush
+	    && log_sys->n_pending_flushes > 0
+	    && log_sys->current_flush_lsn >= lsn) {
+		/* The write + flush will write enough */
+		log_mutex_exit();
+		return;
+	}
 
-	log_write_up_to(lsn, LOG_NO_WAIT, flush);
+	log_mutex_exit();
+
+	log_write_up_to(lsn, flush);
 }
 
 /********************************************************************
@@ -1726,42 +1469,37 @@ log_flush_margin(void)
 	log_t*	log	= log_sys;
 	lsn_t	lsn	= 0;
 
-	mutex_enter(&(log->mutex));
+	log_mutex_enter();
 
 	if (log->buf_free > log->max_buf_free) {
-
-		if (log->n_pending_writes > 0) {
-			/* A flush is running: hope that it will provide enough
-			free space */
-		} else {
-			lsn = log->lsn;
-		}
+		/* We can write during flush */
+		lsn = log->lsn;
 	}
 
-	mutex_exit(&(log->mutex));
+	log_mutex_exit();
 
 	if (lsn) {
-		log_write_up_to(lsn, LOG_NO_WAIT, FALSE);
+		log_write_up_to(lsn, false);
 	}
 }
-
-/****************************************************************//**
-Advances the smallest lsn for which there are unflushed dirty blocks in the
-buffer pool. NOTE: this function may only be called if the calling thread owns
-no synchronization objects!
+#ifndef UNIV_HOTBACKUP
+/** Advances the smallest lsn for which there are unflushed dirty blocks in the
+buffer pool.
+NOTE: this function may only be called if the calling thread owns no
+synchronization objects!
+@param[in]	new_oldest	try to advance oldest_modified_lsn at least to
+this lsn
 @return false if there was a flush batch of the same type running,
 which means that we could not start this flush batch */
 static
 bool
 log_preflush_pool_modified_pages(
-/*=============================*/
-	lsn_t	new_oldest)	/*!< in: try to advance oldest_modified_lsn
-				at least to this lsn */
+	lsn_t			new_oldest)
 {
 	bool	success;
-	ulint	n_pages;
 
 	if (recv_recovery_on) {
+		dberr_t err = DB_SUCCESS;
 		/* If the recovery is running, we must first apply all
 		log records to their respective file pages to get the
 		right modify lsn values to these pages: otherwise, there
@@ -1771,26 +1509,49 @@ log_preflush_pool_modified_pages(
 		and we could not make a new checkpoint on the basis of the
 		info on the buffer pool only. */
 
-		recv_apply_hashed_log_recs(TRUE);
+		err = recv_apply_hashed_log_recs(TRUE);
+
+		if (err != DB_SUCCESS) {
+			ib::warn() << "recv_apply_hashed_log_recs failed err: "
+				   << err << " file: " << __FILE__ << " line: " << __LINE__;
+		}
 	}
 
-	success = buf_flush_list(ULINT_MAX, new_oldest, &n_pages);
+	if (new_oldest == LSN_MAX
+	    || !buf_page_cleaner_is_active
+	    || srv_is_being_started) {
 
-	buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
+		ulint	n_pages;
 
-	if (!success) {
-		MONITOR_INC(MONITOR_FLUSH_SYNC_WAITS);
+		success = buf_flush_lists(ULINT_MAX, new_oldest, &n_pages);
+
+		buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
+
+		if (!success) {
+			MONITOR_INC(MONITOR_FLUSH_SYNC_WAITS);
+		}
+
+		MONITOR_INC_VALUE_CUMULATIVE(
+			MONITOR_FLUSH_SYNC_TOTAL_PAGE,
+			MONITOR_FLUSH_SYNC_COUNT,
+			MONITOR_FLUSH_SYNC_PAGES,
+			n_pages);
+	} else {
+		/* better to wait for flushed by page cleaner */
+
+		if (srv_flush_sync) {
+			/* wake page cleaner for IO burst */
+			buf_flush_request_force(new_oldest);
+		}
+
+		buf_flush_wait_flushed(new_oldest);
+
+		success = true;
 	}
 
-	MONITOR_INC_VALUE_CUMULATIVE(
-		MONITOR_FLUSH_SYNC_TOTAL_PAGE,
-		MONITOR_FLUSH_SYNC_COUNT,
-		MONITOR_FLUSH_SYNC_PAGES,
-		n_pages);
-
 	return(success);
 }
-
+#endif /* !UNIV_HOTBACKUP */
 /******************************************************//**
 Completes a checkpoint. */
 static
@@ -1798,7 +1559,7 @@ void
 log_complete_checkpoint(void)
 /*=========================*/
 {
-	ut_ad(mutex_own(&(log_sys->mutex)));
+	ut_ad(log_mutex_own());
 	ut_ad(log_sys->n_pending_checkpoint_writes == 0);
 
 	log_sys->next_checkpoint_no++;
@@ -1807,6 +1568,11 @@ log_complete_checkpoint(void)
 	MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE,
 		    log_sys->lsn - log_sys->last_checkpoint_lsn);
 
+	DBUG_PRINT("ib_log", ("checkpoint ended at " LSN_PF
+			      ", flushed to " LSN_PF,
+			      log_sys->last_checkpoint_lsn,
+			      log_sys->flushed_to_disk_lsn));
+
 	rw_lock_x_unlock_gen(&(log_sys->checkpoint_lock), LOG_CHECKPOINT);
 }
 
@@ -1817,56 +1583,17 @@ void
 log_io_complete_checkpoint(void)
 /*============================*/
 {
-	mutex_enter(&(log_sys->mutex));
+	MONITOR_DEC(MONITOR_PENDING_CHECKPOINT_WRITE);
+
+	log_mutex_enter();
 
 	ut_ad(log_sys->n_pending_checkpoint_writes > 0);
 
-	log_sys->n_pending_checkpoint_writes--;
-	MONITOR_DEC(MONITOR_PENDING_CHECKPOINT_WRITE);
-
-	if (log_sys->n_pending_checkpoint_writes == 0) {
+	if (--log_sys->n_pending_checkpoint_writes == 0) {
 		log_complete_checkpoint();
 	}
 
-	mutex_exit(&(log_sys->mutex));
-}
-
-/*******************************************************************//**
-Writes info to a checkpoint about a log group. */
-static
-void
-log_checkpoint_set_nth_group_info(
-/*==============================*/
-	byte*	buf,	/*!< in: buffer for checkpoint info */
-	ulint	n,	/*!< in: nth slot */
-	ulint	file_no,/*!< in: archived file number */
-	ulint	offset)	/*!< in: archived file offset */
-{
-	ut_ad(n < LOG_MAX_N_GROUPS);
-
-	mach_write_to_4(buf + LOG_CHECKPOINT_GROUP_ARRAY
-			+ 8 * n + LOG_CHECKPOINT_ARCHIVED_FILE_NO, file_no);
-	mach_write_to_4(buf + LOG_CHECKPOINT_GROUP_ARRAY
-			+ 8 * n + LOG_CHECKPOINT_ARCHIVED_OFFSET, offset);
-}
-
-/*******************************************************************//**
-Gets info from a checkpoint about a log group. */
-UNIV_INTERN
-void
-log_checkpoint_get_nth_group_info(
-/*==============================*/
-	const byte*	buf,	/*!< in: buffer containing checkpoint info */
-	ulint		n,	/*!< in: nth slot */
-	ulint*		file_no,/*!< out: archived file number */
-	ulint*		offset)	/*!< out: archived file offset */
-{
-	ut_ad(n < LOG_MAX_N_GROUPS);
-
-	*file_no = mach_read_from_4(buf + LOG_CHECKPOINT_GROUP_ARRAY
-				    + 8 * n + LOG_CHECKPOINT_ARCHIVED_FILE_NO);
-	*offset = mach_read_from_4(buf + LOG_CHECKPOINT_GROUP_ARRAY
-				   + 8 * n + LOG_CHECKPOINT_ARCHIVED_OFFSET);
+	log_mutex_exit();
 }
 
 /******************************************************//**
@@ -1877,24 +1604,23 @@ log_group_checkpoint(
 /*=================*/
 	log_group_t*	group)	/*!< in: log group */
 {
-	log_group_t*	group2;
-#ifdef UNIV_LOG_ARCHIVE
-	ib_uint64_t	archived_lsn;
-	ib_uint64_t	next_archived_lsn;
-#endif /* UNIV_LOG_ARCHIVE */
 	lsn_t		lsn_offset;
-	ulint		write_offset;
-	ulint		fold;
 	byte*		buf;
-	ulint		i;
 
 	ut_ad(!srv_read_only_mode);
-	ut_ad(mutex_own(&(log_sys->mutex)));
+	ut_ad(log_mutex_own());
 #if LOG_CHECKPOINT_SIZE > OS_FILE_LOG_BLOCK_SIZE
 # error "LOG_CHECKPOINT_SIZE > OS_FILE_LOG_BLOCK_SIZE"
 #endif
 
+	DBUG_PRINT("ib_log", ("checkpoint " UINT64PF " at " LSN_PF
+			      " written to group " ULINTPF,
+			      log_sys->next_checkpoint_no,
+			      log_sys->next_checkpoint_lsn,
+			      group->id));
+
 	buf = group->checkpoint_buf;
+	memset(buf, 0, OS_FILE_LOG_BLOCK_SIZE);
 
 	mach_write_to_8(buf + LOG_CHECKPOINT_NO, log_sys->next_checkpoint_no);
 	mach_write_to_8(buf + LOG_CHECKPOINT_LSN, log_sys->next_checkpoint_lsn);
@@ -1903,99 +1629,47 @@ log_group_checkpoint(
 
 	lsn_offset = log_group_calc_lsn_offset(log_sys->next_checkpoint_lsn,
 					       group);
-	mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET_LOW32,
-			lsn_offset & 0xFFFFFFFFUL);
-	mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET_HIGH32,
-			lsn_offset >> 32);
+	mach_write_to_8(buf + LOG_CHECKPOINT_OFFSET, lsn_offset);
+	mach_write_to_8(buf + LOG_CHECKPOINT_LOG_BUF_SIZE, log_sys->buf_size);
 
-	mach_write_to_4(buf + LOG_CHECKPOINT_LOG_BUF_SIZE, log_sys->buf_size);
+	log_block_set_checksum(buf, log_block_calc_checksum_crc32(buf));
 
-#ifdef UNIV_LOG_ARCHIVE
-	if (log_sys->archiving_state == LOG_ARCH_OFF) {
-		archived_lsn = LSN_MAX;
-	} else {
-		archived_lsn = log_sys->archived_lsn;
+	MONITOR_INC(MONITOR_PENDING_CHECKPOINT_WRITE);
 
-		if (archived_lsn != log_sys->next_archived_lsn) {
-			next_archived_lsn = log_sys->next_archived_lsn;
-			/* For debugging only */
-		}
+	log_sys->n_log_ios++;
+
+	MONITOR_INC(MONITOR_LOG_IO);
+
+	ut_ad(LOG_CHECKPOINT_1 < univ_page_size.physical());
+	ut_ad(LOG_CHECKPOINT_2 < univ_page_size.physical());
+
+	if (log_sys->n_pending_checkpoint_writes++ == 0) {
+		rw_lock_x_lock_gen(&log_sys->checkpoint_lock,
+				   LOG_CHECKPOINT);
 	}
 
-	mach_write_to_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN, archived_lsn);
-#else /* UNIV_LOG_ARCHIVE */
-	mach_write_to_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN, LSN_MAX);
-#endif /* UNIV_LOG_ARCHIVE */
+	/* Note: We alternate the physical place of the checkpoint info.
+	See the (next_checkpoint_no & 1) below. */
 
-	for (i = 0; i < LOG_MAX_N_GROUPS; i++) {
-		log_checkpoint_set_nth_group_info(buf, i, 0, 0);
-	}
+	/* We send as the last parameter the group machine address
+	added with 1, as we want to distinguish between a normal log
+	file write and a checkpoint field write */
 
-	group2 = UT_LIST_GET_FIRST(log_sys->log_groups);
+	fil_io(IORequestLogWrite, false,
+	       page_id_t(group->space_id, 0),
+	       univ_page_size,
+	       (log_sys->next_checkpoint_no & 1)
+	       ? LOG_CHECKPOINT_2 : LOG_CHECKPOINT_1,
+	       OS_FILE_LOG_BLOCK_SIZE,
+	       buf, (byte*) group + 1, NULL);
 
-	while (group2) {
-		log_checkpoint_set_nth_group_info(buf, group2->id,
-#ifdef UNIV_LOG_ARCHIVE
-						  group2->archived_file_no,
-						  group2->archived_offset
-#else /* UNIV_LOG_ARCHIVE */
-						  0, 0
-#endif /* UNIV_LOG_ARCHIVE */
-						  );
-
-		group2 = UT_LIST_GET_NEXT(log_groups, group2);
-	}
-
-	fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1);
-	mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_1, fold);
-
-	fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN,
-			      LOG_CHECKPOINT_CHECKSUM_2 - LOG_CHECKPOINT_LSN);
-	mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_2, fold);
-
-	/* We alternate the physical place of the checkpoint info in the first
-	log file */
-
-	if ((log_sys->next_checkpoint_no & 1) == 0) {
-		write_offset = LOG_CHECKPOINT_1;
-	} else {
-		write_offset = LOG_CHECKPOINT_2;
-	}
-
-	if (log_do_write) {
-		if (log_sys->n_pending_checkpoint_writes == 0) {
-
-			rw_lock_x_lock_gen(&(log_sys->checkpoint_lock),
-					   LOG_CHECKPOINT);
-		}
-
-		log_sys->n_pending_checkpoint_writes++;
-		MONITOR_INC(MONITOR_PENDING_CHECKPOINT_WRITE);
-
-		log_sys->n_log_ios++;
-
-		MONITOR_INC(MONITOR_LOG_IO);
-
-		/* We send as the last parameter the group machine address
-		added with 1, as we want to distinguish between a normal log
-		file write and a checkpoint field write */
-
-		fil_io(OS_FILE_WRITE | OS_FILE_LOG, false, group->space_id, 0,
-		       write_offset / UNIV_PAGE_SIZE,
-		       write_offset % UNIV_PAGE_SIZE,
-		       OS_FILE_LOG_BLOCK_SIZE,
-			buf, ((byte*) group + 1), 0);
-
-		ut_ad(((ulint) group & 0x1UL) == 0);
-	}
+	ut_ad(((ulint) group & 0x1UL) == 0);
 }
-#endif /* !UNIV_HOTBACKUP */
 
 #ifdef UNIV_HOTBACKUP
 /******************************************************//**
 Writes info to a buffer of a log group when log files are created in
 backup restoration. */
-UNIV_INTERN
 void
 log_reset_first_header_and_checkpoint(
 /*==================================*/
@@ -2005,80 +1679,66 @@ log_reset_first_header_and_checkpoint(
 				we pretend that there is a checkpoint at
 				start + LOG_BLOCK_HDR_SIZE */
 {
-	ulint		fold;
 	byte*		buf;
 	ib_uint64_t	lsn;
 
-	mach_write_to_4(hdr_buf + LOG_GROUP_ID, 0);
-	mach_write_to_8(hdr_buf + LOG_FILE_START_LSN, start);
+	mach_write_to_4(hdr_buf + LOG_HEADER_FORMAT,
+			LOG_HEADER_FORMAT_CURRENT);
+	mach_write_to_8(hdr_buf + LOG_HEADER_START_LSN, start);
 
 	lsn = start + LOG_BLOCK_HDR_SIZE;
 
 	/* Write the label of mysqlbackup --restore */
-	strcpy((char*) hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
-	       "ibbackup ");
+	strcpy((char*)hdr_buf + LOG_HEADER_CREATOR, LOG_HEADER_CREATOR_CURRENT);
 	ut_sprintf_timestamp((char*) hdr_buf
-			     + (LOG_FILE_WAS_CREATED_BY_HOT_BACKUP
-				+ (sizeof "ibbackup ") - 1));
+			     + (LOG_HEADER_CREATOR
+			     + (sizeof LOG_HEADER_CREATOR_CURRENT) - 1));
 	buf = hdr_buf + LOG_CHECKPOINT_1;
+	memset(buf, 0, OS_FILE_LOG_BLOCK_SIZE);
 
-	mach_write_to_8(buf + LOG_CHECKPOINT_NO, 0);
+	/*mach_write_to_8(buf + LOG_CHECKPOINT_NO, 0);*/
 	mach_write_to_8(buf + LOG_CHECKPOINT_LSN, lsn);
 
 	log_crypt_write_checkpoint_buf(buf);
 
-	mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET_LOW32,
+	mach_write_to_8(buf + LOG_CHECKPOINT_OFFSET,
 			LOG_FILE_HDR_SIZE + LOG_BLOCK_HDR_SIZE);
-	mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET_HIGH32, 0);
+	mach_write_to_8(buf + LOG_CHECKPOINT_LOG_BUF_SIZE, 2 * 1024 * 1024);
 
-	mach_write_to_4(buf + LOG_CHECKPOINT_LOG_BUF_SIZE, 2 * 1024 * 1024);
-
-	mach_write_to_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN, LSN_MAX);
-
-	fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1);
-	mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_1, fold);
-
-	fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN,
-			      LOG_CHECKPOINT_CHECKSUM_2 - LOG_CHECKPOINT_LSN);
-	mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_2, fold);
-
-	/* Starting from InnoDB-3.23.50, we should also write info on
-	allocated size in the tablespace, but unfortunately we do not
-	know it here */
+	log_block_set_checksum(buf, log_block_calc_checksum_crc32(buf));
 }
 #endif /* UNIV_HOTBACKUP */
 
 #ifndef UNIV_HOTBACKUP
-/******************************************************//**
-Reads a checkpoint info from a log group header to log_sys->checkpoint_buf. */
-UNIV_INTERN
+/** Read a log group header page to log_sys->checkpoint_buf.
+@param[in]	group	log group
+@param[in]	header	0 or LOG_CHEKCPOINT_1 or LOG_CHECKPOINT2 */
 void
-log_group_read_checkpoint_info(
-/*===========================*/
-	log_group_t*	group,	/*!< in: log group */
-	ulint		field)	/*!< in: LOG_CHECKPOINT_1 or LOG_CHECKPOINT_2 */
+log_group_header_read(
+	const log_group_t*	group,
+	ulint			header)
 {
-	ut_ad(mutex_own(&(log_sys->mutex)));
+	ut_ad(log_mutex_own());
 
 	log_sys->n_log_ios++;
 
 	MONITOR_INC(MONITOR_LOG_IO);
 
-	fil_io(OS_FILE_READ | OS_FILE_LOG, true, group->space_id, 0,
-	       field / UNIV_PAGE_SIZE, field % UNIV_PAGE_SIZE,
-		OS_FILE_LOG_BLOCK_SIZE, log_sys->checkpoint_buf, NULL, 0);
+	fil_io(IORequestLogRead, true,
+	       page_id_t(group->space_id, header / univ_page_size.physical()),
+	       univ_page_size, header % univ_page_size.physical(),
+	       OS_FILE_LOG_BLOCK_SIZE, log_sys->checkpoint_buf, NULL, NULL);
 }
 
-/******************************************************//**
-Writes checkpoint info to groups. */
-UNIV_INTERN
+/** Write checkpoint info to the log header and invoke log_mutex_exit().
+@param[in]	sync	whether to wait for the write to complete */
 void
-log_groups_write_checkpoint_info(void)
-/*==================================*/
+log_write_checkpoint_info(
+	bool	sync)
 {
 	log_group_t*	group;
 
-	ut_ad(mutex_own(&(log_sys->mutex)));
+	ut_ad(log_mutex_own());
 
 	if (!srv_read_only_mode) {
 		for (group = UT_LIST_GET_FIRST(log_sys->log_groups);
@@ -2088,91 +1748,6 @@ log_groups_write_checkpoint_info(void)
 			log_group_checkpoint(group);
 		}
 	}
-}
-
-/******************************************************//**
-Makes a checkpoint. Note that this function does not flush dirty
-blocks from the buffer pool: it only checks what is lsn of the oldest
-modification in the pool, and writes information about the lsn in
-log files. Use log_make_checkpoint_at to flush also the pool.
-@return	TRUE if success, FALSE if a checkpoint write was already running */
-UNIV_INTERN
-ibool
-log_checkpoint(
-/*===========*/
-	ibool	sync,		/*!< in: TRUE if synchronous operation is
-				desired */
-	ibool	write_always)	/*!< in: the function normally checks if the
-				the new checkpoint would have a greater
-				lsn than the previous one: if not, then no
-				physical write is done; by setting this
-				parameter TRUE, a physical write will always be
-				made to log files */
-{
-	lsn_t	oldest_lsn;
-
-	ut_ad(!srv_read_only_mode);
-
-	if (recv_recovery_is_on()) {
-		recv_apply_hashed_log_recs(TRUE);
-	}
-
-	if (srv_unix_file_flush_method != SRV_UNIX_NOSYNC) {
-		fil_flush_file_spaces(FIL_TABLESPACE);
-	}
-
-	mutex_enter(&(log_sys->mutex));
-
-	ut_ad(!recv_no_log_write);
-	oldest_lsn = log_buf_pool_get_oldest_modification();
-
-	mutex_exit(&(log_sys->mutex));
-
-	/* Because log also contains headers and dummy log records,
-	if the buffer pool contains no dirty buffers, oldest_lsn
-	gets the value log_sys->lsn from the previous function,
-	and we must make sure that the log is flushed up to that
-	lsn. If there are dirty buffers in the buffer pool, then our
-	write-ahead-logging algorithm ensures that the log has been flushed
-	up to oldest_lsn. */
-
-	log_write_up_to(oldest_lsn, LOG_WAIT_ALL_GROUPS, TRUE);
-
-	mutex_enter(&(log_sys->mutex));
-
-	if (!write_always
-	    && log_sys->last_checkpoint_lsn >= oldest_lsn) {
-
-		mutex_exit(&(log_sys->mutex));
-
-		return(TRUE);
-	}
-
-	ut_ad(log_sys->flushed_to_disk_lsn >= oldest_lsn);
-
-	if (log_sys->n_pending_checkpoint_writes > 0) {
-		/* A checkpoint write is running */
-
-		mutex_exit(&(log_sys->mutex));
-
-		if (sync) {
-			/* Wait for the checkpoint write to complete */
-			rw_lock_s_lock(&(log_sys->checkpoint_lock));
-			rw_lock_s_unlock(&(log_sys->checkpoint_lock));
-		}
-
-		return(FALSE);
-	}
-
-	log_sys->next_checkpoint_lsn = oldest_lsn;
-#ifdef UNIV_DEBUG
-	if (log_debug_writes) {
-		fprintf(stderr, "Making checkpoint no "
-			LSN_PF " at lsn " LSN_PF "\n",
-			log_sys->next_checkpoint_no,
-			oldest_lsn);
-	}
-#endif /* UNIV_DEBUG */
 
 	/* generate key version and key used to encrypt future blocks,
 	*
@@ -2180,38 +1755,187 @@ log_checkpoint(
 	* the checkpoint info has been written and THEN blocks will be encrypted
 	* with new key
 	*/
-	log_crypt_set_ver_and_key(log_sys->next_checkpoint_no + 1);
-	log_groups_write_checkpoint_info();
+	if (srv_encrypt_log) {
+		log_crypt_set_ver_and_key(log_sys->next_checkpoint_no + 1);
+	}
+
+	log_mutex_exit();
 
 	MONITOR_INC(MONITOR_NUM_CHECKPOINT);
 
-	mutex_exit(&(log_sys->mutex));
-
 	if (sync) {
 		/* Wait for the checkpoint write to complete */
-		rw_lock_s_lock(&(log_sys->checkpoint_lock));
-		rw_lock_s_unlock(&(log_sys->checkpoint_lock));
-	}
+		rw_lock_s_lock(&log_sys->checkpoint_lock);
+		rw_lock_s_unlock(&log_sys->checkpoint_lock);
 
-	return(TRUE);
+		DEBUG_SYNC_C("checkpoint_completed");
+
+		DBUG_EXECUTE_IF(
+			"crash_after_checkpoint",
+			DBUG_SUICIDE(););
+	}
 }
 
-/****************************************************************//**
-Makes a checkpoint at a given lsn or later. */
-UNIV_INTERN
+/** Set extra data to be written to the redo log during checkpoint.
+@param[in]	buf	data to be appended on checkpoint, or NULL
+@return pointer to previous data to be appended on checkpoint */
+mtr_buf_t*
+log_append_on_checkpoint(
+	mtr_buf_t*	buf)
+{
+	log_mutex_enter();
+	mtr_buf_t*	old = log_sys->append_on_checkpoint;
+	log_sys->append_on_checkpoint = buf;
+	log_mutex_exit();
+	return(old);
+}
+
+/** Make a checkpoint. Note that this function does not flush dirty
+blocks from the buffer pool: it only checks what is lsn of the oldest
+modification in the pool, and writes information about the lsn in
+log files. Use log_make_checkpoint_at() to flush also the pool.
+@param[in]	sync		whether to wait for the write to complete
+@param[in]	write_always	force a write even if no log
+has been generated since the latest checkpoint
+@return true if success, false if a checkpoint write was already running */
+bool
+log_checkpoint(
+	bool	sync,
+	bool	write_always)
+{
+	lsn_t	oldest_lsn;
+
+	ut_ad(!srv_read_only_mode);
+
+	DBUG_EXECUTE_IF("no_checkpoint",
+			/* We sleep for a long enough time, forcing
+			the checkpoint doesn't happen any more. */
+			os_thread_sleep(360000000););
+
+	if (recv_recovery_is_on()) {
+		dberr_t err = DB_SUCCESS;
+
+		err = recv_apply_hashed_log_recs(TRUE);
+
+		if (err != DB_SUCCESS) {
+			ib::warn() << "recv_apply_hashed_log_recs failed err: "
+				   << err << " file: " << __FILE__ << " line: " << __LINE__;
+		}
+	}
+
+#ifndef _WIN32
+	switch (srv_unix_file_flush_method) {
+	case SRV_UNIX_NOSYNC:
+		break;
+	case SRV_UNIX_O_DSYNC:
+	case SRV_UNIX_FSYNC:
+	case SRV_UNIX_LITTLESYNC:
+	case SRV_UNIX_O_DIRECT:
+	case SRV_UNIX_O_DIRECT_NO_FSYNC:
+		fil_flush_file_spaces(FIL_TYPE_TABLESPACE);
+	}
+#endif /* !_WIN32 */
+
+	log_mutex_enter();
+
+	ut_ad(!recv_no_log_write);
+	oldest_lsn = log_buf_pool_get_oldest_modification();
+
+	/* Because log also contains headers and dummy log records,
+	log_buf_pool_get_oldest_modification() will return log_sys->lsn
+	if the buffer pool contains no dirty buffers.
+	We must make sure that the log is flushed up to that lsn.
+	If there are dirty buffers in the buffer pool, then our
+	write-ahead-logging algorithm ensures that the log has been
+	flushed up to oldest_lsn. */
+
+	ut_ad(oldest_lsn >= log_sys->last_checkpoint_lsn);
+	if (!write_always
+	    && oldest_lsn
+	    <= log_sys->last_checkpoint_lsn + SIZE_OF_MLOG_CHECKPOINT) {
+		/* Do nothing, because nothing was logged (other than
+		a MLOG_CHECKPOINT marker) since the previous checkpoint. */
+		log_mutex_exit();
+		return(true);
+	}
+	/* Repeat the MLOG_FILE_NAME records after the checkpoint, in
+	case some log records between the checkpoint and log_sys->lsn
+	need them. Finally, write a MLOG_CHECKPOINT marker. Redo log
+	apply expects to see a MLOG_CHECKPOINT after the checkpoint,
+	except on clean shutdown, where the log will be empty after
+	the checkpoint.
+	It is important that we write out the redo log before any
+	further dirty pages are flushed to the tablespace files.  At
+	this point, because log_mutex_own(), mtr_commit() in other
+	threads will be blocked, and no pages can be added to the
+	flush lists. */
+	lsn_t		flush_lsn	= oldest_lsn;
+	const bool	do_write
+		= srv_shutdown_state == SRV_SHUTDOWN_NONE
+		|| flush_lsn != log_sys->lsn;
+
+	if (fil_names_clear(flush_lsn, do_write)) {
+		ut_ad(log_sys->lsn >= flush_lsn + SIZE_OF_MLOG_CHECKPOINT);
+		flush_lsn = log_sys->lsn;
+	}
+
+	log_mutex_exit();
+
+	log_write_up_to(flush_lsn, true);
+
+	DBUG_EXECUTE_IF(
+		"using_wa_checkpoint_middle",
+		if (write_always) {
+			DEBUG_SYNC_C("wa_checkpoint_middle");
+
+			const my_bool b = TRUE;
+			buf_flush_page_cleaner_disabled_debug_update(
+				NULL, NULL, NULL, &b);
+			dict_stats_disabled_debug_update(
+				NULL, NULL, NULL, &b);
+			srv_master_thread_disabled_debug_update(
+				NULL, NULL, NULL, &b);
+		});
+
+	log_mutex_enter();
+
+	ut_ad(log_sys->flushed_to_disk_lsn >= flush_lsn);
+	ut_ad(flush_lsn >= oldest_lsn);
+
+	if (log_sys->last_checkpoint_lsn >= oldest_lsn) {
+		log_mutex_exit();
+		return(true);
+	}
+
+	if (log_sys->n_pending_checkpoint_writes > 0) {
+		/* A checkpoint write is running */
+		log_mutex_exit();
+
+		if (sync) {
+			/* Wait for the checkpoint write to complete */
+			rw_lock_s_lock(&log_sys->checkpoint_lock);
+			rw_lock_s_unlock(&log_sys->checkpoint_lock);
+		}
+
+		return(false);
+	}
+
+	log_sys->next_checkpoint_lsn = oldest_lsn;
+	log_write_checkpoint_info(sync);
+	ut_ad(!log_mutex_own());
+
+	return(true);
+}
+
+/** Make a checkpoint at or after a specified LSN.
+@param[in]	lsn		the log sequence number, or LSN_MAX
+for the latest LSN
+@param[in]	write_always	force a write even if no log
+has been generated since the latest checkpoint */
 void
 log_make_checkpoint_at(
-/*===================*/
-	lsn_t	lsn,		/*!< in: make a checkpoint at this or a
-				later lsn, if LSN_MAX, makes
-				a checkpoint at the latest lsn */
-	ibool	write_always)	/*!< in: the function normally checks if
-				the new checkpoint would have a
-				greater lsn than the previous one: if
-				not, then no physical write is done;
-				by setting this parameter TRUE, a
-				physical write will always be made to
-				log files */
+	lsn_t			lsn,
+	bool			write_always)
 {
 	/* Preflush pages synchronously */
 
@@ -2219,7 +1943,7 @@ log_make_checkpoint_at(
 		/* Flush as much as we can */
 	}
 
-	while (!log_checkpoint(TRUE, write_always)) {
+	while (!log_checkpoint(true, write_always)) {
 		/* Force a checkpoint */
 	}
 }
@@ -2239,20 +1963,15 @@ log_checkpoint_margin(void)
 	lsn_t		checkpoint_age;
 	ib_uint64_t	advance;
 	lsn_t		oldest_lsn;
-	ibool		checkpoint_sync;
-	ibool		do_checkpoint;
 	bool		success;
 loop:
-	checkpoint_sync = FALSE;
-	do_checkpoint = FALSE;
 	advance = 0;
 
-	mutex_enter(&(log->mutex));
+	log_mutex_enter();
 	ut_ad(!recv_no_log_write);
 
-	if (log->check_flush_or_checkpoint == FALSE) {
-		mutex_exit(&(log->mutex));
-
+	if (!log->check_flush_or_checkpoint) {
+		log_mutex_exit();
 		return;
 	}
 
@@ -2263,29 +1982,30 @@ loop:
 	if (age > log->max_modified_age_sync) {
 
 		/* A flush is urgent: we have to do a synchronous preflush */
-		advance = 2 * (age - log->max_modified_age_sync);
+		advance = age - log->max_modified_age_sync;
 	}
 
 	checkpoint_age = log->lsn - log->last_checkpoint_lsn;
 
+	bool	checkpoint_sync;
+	bool	do_checkpoint;
+
 	if (checkpoint_age > log->max_checkpoint_age) {
 		/* A checkpoint is urgent: we do it synchronously */
-
-		checkpoint_sync = TRUE;
-
-		do_checkpoint = TRUE;
-
+		checkpoint_sync = true;
+		do_checkpoint = true;
 	} else if (checkpoint_age > log->max_checkpoint_age_async) {
 		/* A checkpoint is not urgent: do it asynchronously */
-
-		do_checkpoint = TRUE;
-
-		log->check_flush_or_checkpoint = FALSE;
+		do_checkpoint = true;
+		checkpoint_sync = false;
+		log->check_flush_or_checkpoint = false;
 	} else {
-		log->check_flush_or_checkpoint = FALSE;
+		do_checkpoint = false;
+		checkpoint_sync = false;
+		log->check_flush_or_checkpoint = false;
 	}
 
-	mutex_exit(&(log->mutex));
+	log_mutex_exit();
 
 	if (advance) {
 		lsn_t	new_oldest = oldest_lsn + advance;
@@ -2296,11 +2016,11 @@ loop:
 		and can proceed. If it did not succeed, there was another
 		thread doing a flush at the same time. */
 		if (!success) {
-			mutex_enter(&(log->mutex));
+			log_mutex_enter();
 
-			log->check_flush_or_checkpoint = TRUE;
+			log->check_flush_or_checkpoint = true;
 
-			mutex_exit(&(log->mutex));
+			log_mutex_exit();
 			goto loop;
 		}
 	}
@@ -2317,11 +2037,9 @@ loop:
 
 /******************************************************//**
 Reads a specified log segment to a buffer. */
-UNIV_INTERN
 void
 log_group_read_log_seg(
 /*===================*/
-	ulint		type,		/*!< in: LOG_ARCHIVE or LOG_RECOVER */
 	byte*		buf,		/*!< in: buffer where to read */
 	log_group_t*	group,		/*!< in: log group */
 	lsn_t		start_lsn,	/*!< in: read area start */
@@ -2329,11 +2047,9 @@ log_group_read_log_seg(
 {
 	ulint	len;
 	lsn_t	source_offset;
-	bool	sync;
 
-	ut_ad(mutex_own(&(log_sys->mutex)));
+	ut_ad(log_mutex_own());
 
-	sync = (type == LOG_RECOVER);
 loop:
 	source_offset = log_group_calc_lsn_offset(start_lsn, group);
 
@@ -2350,23 +2066,20 @@ loop:
 			(source_offset % group->file_size));
 	}
 
-#ifdef UNIV_LOG_ARCHIVE
-	if (type == LOG_ARCHIVE) {
-
-		log_sys->n_pending_archive_ios++;
-	}
-#endif /* UNIV_LOG_ARCHIVE */
-
 	log_sys->n_log_ios++;
 
 	MONITOR_INC(MONITOR_LOG_IO);
 
 	ut_a(source_offset / UNIV_PAGE_SIZE <= ULINT_MAX);
 
-	fil_io(OS_FILE_READ | OS_FILE_LOG, sync, group->space_id, 0,
-	       (ulint) (source_offset / UNIV_PAGE_SIZE),
-	       (ulint) (source_offset % UNIV_PAGE_SIZE),
-		len, buf, NULL, 0);
+	const ulint	page_no
+		= (ulint) (source_offset / univ_page_size.physical());
+
+	fil_io(IORequestLogRead, true,
+	       page_id_t(group->space_id, page_no),
+	       univ_page_size,
+	       (ulint) (source_offset % univ_page_size.physical()),
+	       len, buf, NULL, NULL);
 
 #ifdef DEBUG_CRYPT
 	fprintf(stderr, "BEFORE DECRYPT: block: %lu checkpoint: %lu %.8lx %.8lx offset %lu\n",
@@ -2385,7 +2098,6 @@ loop:
 			log_block_calc_checksum(buf),
 			log_block_get_checksum(buf));
 #endif
-
 	start_lsn += len;
 	buf += len;
 
@@ -2395,847 +2107,24 @@ loop:
 	}
 }
 
-#ifdef UNIV_LOG_ARCHIVE
-/******************************************************//**
-Generates an archived log file name. */
-UNIV_INTERN
-void
-log_archived_file_name_gen(
-/*=======================*/
-	char*	buf,	/*!< in: buffer where to write */
-	ulint	id MY_ATTRIBUTE((unused)),
-			/*!< in: group id;
-			currently we only archive the first group */
-	ulint	file_no)/*!< in: file number */
-{
-	sprintf(buf, "%sib_arch_log_%010lu", srv_arch_dir, (ulong) file_no);
-}
-
-/******************************************************//**
-Writes a log file header to a log file space. */
-static
-void
-log_group_archive_file_header_write(
-/*================================*/
-	log_group_t*	group,		/*!< in: log group */
-	ulint		nth_file,	/*!< in: header to the nth file in the
-					archive log file space */
-	ulint		file_no,	/*!< in: archived file number */
-	ib_uint64_t	start_lsn)	/*!< in: log file data starts at this
-					lsn */
-{
-	byte*	buf;
-	ulint	dest_offset;
-
-	ut_ad(mutex_own(&(log_sys->mutex)));
-
-	ut_a(nth_file < group->n_files);
-
-	buf = *(group->archive_file_header_bufs + nth_file);
-
-	mach_write_to_4(buf + LOG_GROUP_ID, group->id);
-	mach_write_to_8(buf + LOG_FILE_START_LSN, start_lsn);
-	mach_write_to_4(buf + LOG_FILE_NO, file_no);
-
-	mach_write_to_4(buf + LOG_FILE_ARCH_COMPLETED, FALSE);
-
-	dest_offset = nth_file * group->file_size;
-
-	log_sys->n_log_ios++;
-
-	MONITOR_INC(MONITOR_LOG_IO);
-
-	fil_io(OS_FILE_WRITE | OS_FILE_LOG, true, group->archive_space_id,
-	       dest_offset / UNIV_PAGE_SIZE,
-	       dest_offset % UNIV_PAGE_SIZE,
-	       2 * OS_FILE_LOG_BLOCK_SIZE,
-	       buf, &log_archive_io, 0);
-}
-
-/******************************************************//**
-Writes a log file header to a completed archived log file. */
-static
-void
-log_group_archive_completed_header_write(
-/*=====================================*/
-	log_group_t*	group,		/*!< in: log group */
-	ulint		nth_file,	/*!< in: header to the nth file in the
-					archive log file space */
-	ib_uint64_t	end_lsn)	/*!< in: end lsn of the file */
-{
-	byte*	buf;
-	ulint	dest_offset;
-
-	ut_ad(mutex_own(&(log_sys->mutex)));
-	ut_a(nth_file < group->n_files);
-
-	buf = *(group->archive_file_header_bufs + nth_file);
-
-	mach_write_to_4(buf + LOG_FILE_ARCH_COMPLETED, TRUE);
-	mach_write_to_8(buf + LOG_FILE_END_LSN, end_lsn);
-
-	dest_offset = nth_file * group->file_size + LOG_FILE_ARCH_COMPLETED;
-
-	log_sys->n_log_ios++;
-
-	MONITOR_INC(MONITOR_LOG_IO);
-
-	fil_io(OS_FILE_WRITE | OS_FILE_LOG, true, group->archive_space_id,
-	       dest_offset / UNIV_PAGE_SIZE,
-	       dest_offset % UNIV_PAGE_SIZE,
-	       OS_FILE_LOG_BLOCK_SIZE,
-	       buf + LOG_FILE_ARCH_COMPLETED,
-	       &log_archive_io, 0);
-}
-
-/******************************************************//**
-Does the archive writes for a single log group. */
-static
-void
-log_group_archive(
-/*==============*/
-	log_group_t*	group)	/*!< in: log group */
-{
-	os_file_t	file_handle;
-	lsn_t		start_lsn;
-	lsn_t		end_lsn;
-	char		name[1024];
-	byte*		buf;
-	ulint		len;
-	ibool		ret;
-	lsn_t		next_offset;
-	ulint		n_files;
-	ulint		open_mode;
-
-	ut_ad(mutex_own(&(log_sys->mutex)));
-
-	start_lsn = log_sys->archived_lsn;
-
-	ut_a(start_lsn % OS_FILE_LOG_BLOCK_SIZE == 0);
-
-	end_lsn = log_sys->next_archived_lsn;
-
-	ut_a(end_lsn % OS_FILE_LOG_BLOCK_SIZE == 0);
-
-	buf = log_sys->archive_buf;
-
-	n_files = 0;
-
-	next_offset = group->archived_offset;
-loop:
-	if ((next_offset % group->file_size == 0)
-	    || (fil_space_get_size(group->archive_space_id) == 0)) {
-
-		/* Add the file to the archive file space; create or open the
-		file */
-
-		if (next_offset % group->file_size == 0) {
-			open_mode = OS_FILE_CREATE;
-		} else {
-			open_mode = OS_FILE_OPEN;
-		}
-
-		log_archived_file_name_gen(name, group->id,
-					   group->archived_file_no + n_files);
-
-		file_handle = os_file_create(innodb_file_log_key,
-					     name, open_mode,
-					     OS_FILE_AIO,
-					     OS_DATA_FILE, &ret);
-
-		if (!ret && (open_mode == OS_FILE_CREATE)) {
-			file_handle = os_file_create(
-				innodb_file_log_key, name, OS_FILE_OPEN,
-				OS_FILE_AIO, OS_DATA_FILE, &ret);
-		}
-
-		if (!ret) {
-			fprintf(stderr,
-				"InnoDB: Cannot create or open"
-				" archive log file %s.\n"
-				"InnoDB: Cannot continue operation.\n"
-				"InnoDB: Check that the log archive"
-				" directory exists,\n"
-				"InnoDB: you have access rights to it, and\n"
-				"InnoDB: there is space available.\n", name);
-			exit(1);
-		}
-
-#ifdef UNIV_DEBUG
-		if (log_debug_writes) {
-			fprintf(stderr, "Created archive file %s\n", name);
-		}
-#endif /* UNIV_DEBUG */
-
-		ret = os_file_close(file_handle);
-
-		ut_a(ret);
-
-		/* Add the archive file as a node to the space */
-
-		fil_node_create(name, group->file_size / UNIV_PAGE_SIZE,
-				group->archive_space_id, FALSE);
-
-		if (next_offset % group->file_size == 0) {
-			log_group_archive_file_header_write(
-				group, n_files,
-				group->archived_file_no + n_files,
-				start_lsn);
-
-			next_offset += LOG_FILE_HDR_SIZE;
-		}
-	}
-
-	len = end_lsn - start_lsn;
-
-	if (group->file_size < (next_offset % group->file_size) + len) {
-
-		len = group->file_size - (next_offset % group->file_size);
-	}
-
-#ifdef UNIV_DEBUG
-	if (log_debug_writes) {
-		fprintf(stderr,
-			"Archiving starting at lsn " LSN_PF ", len %lu"
-			" to group %lu\n",
-			start_lsn,
-			(ulong) len, (ulong) group->id);
-	}
-#endif /* UNIV_DEBUG */
-
-	log_sys->n_pending_archive_ios++;
-
-	log_sys->n_log_ios++;
-
-	MONITOR_INC(MONITOR_LOG_IO);
-
-	//TODO (jonaso): This must be dead code??
-	log_encrypt_before_write(log_sys->next_checkpoint_no, buf, len);
-
-	fil_io(OS_FILE_WRITE | OS_FILE_LOG, false, group->archive_space_id,
-	       (ulint) (next_offset / UNIV_PAGE_SIZE),
-	       (ulint) (next_offset % UNIV_PAGE_SIZE),
-	       ut_calc_align(len, OS_FILE_LOG_BLOCK_SIZE), buf,
-	       &log_archive_io, 0);
-
-	start_lsn += len;
-	next_offset += len;
-	buf += len;
-
-	if (next_offset % group->file_size == 0) {
-		n_files++;
-	}
-
-	if (end_lsn != start_lsn) {
-
-		goto loop;
-	}
-
-	group->next_archived_file_no = group->archived_file_no + n_files;
-	group->next_archived_offset = next_offset % group->file_size;
-
-	ut_a(group->next_archived_offset % OS_FILE_LOG_BLOCK_SIZE == 0);
-}
-
-/*****************************************************//**
-(Writes to the archive of each log group.) Currently, only the first
-group is archived. */
-static
-void
-log_archive_groups(void)
-/*====================*/
-{
-	log_group_t*	group;
-
-	ut_ad(mutex_own(&(log_sys->mutex)));
-
-	group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
-	log_group_archive(group);
-}
-
-/*****************************************************//**
-Completes the archiving write phase for (each log group), currently,
-the first log group. */
-static
-void
-log_archive_write_complete_groups(void)
-/*===================================*/
-{
-	log_group_t*	group;
-	ulint		end_offset;
-	ulint		trunc_files;
-	ulint		n_files;
-	ib_uint64_t	start_lsn;
-	ib_uint64_t	end_lsn;
-	ulint		i;
-
-	ut_ad(mutex_own(&(log_sys->mutex)));
-
-	group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
-	group->archived_file_no = group->next_archived_file_no;
-	group->archived_offset = group->next_archived_offset;
-
-	/* Truncate from the archive file space all but the last
-	file, or if it has been written full, all files */
-
-	n_files = (UNIV_PAGE_SIZE
-		   * fil_space_get_size(group->archive_space_id))
-		/ group->file_size;
-	ut_ad(n_files > 0);
-
-	end_offset = group->archived_offset;
-
-	if (end_offset % group->file_size == 0) {
-
-		trunc_files = n_files;
-	} else {
-		trunc_files = n_files - 1;
-	}
-
-#ifdef UNIV_DEBUG
-	if (log_debug_writes && trunc_files) {
-		fprintf(stderr,
-			"Complete file(s) archived to group %lu\n",
-			(ulong) group->id);
-	}
-#endif /* UNIV_DEBUG */
-
-	/* Calculate the archive file space start lsn */
-	start_lsn = log_sys->next_archived_lsn
-		- (end_offset - LOG_FILE_HDR_SIZE + trunc_files
-		   * (group->file_size - LOG_FILE_HDR_SIZE));
-	end_lsn = start_lsn;
-
-	for (i = 0; i < trunc_files; i++) {
-
-		end_lsn += group->file_size - LOG_FILE_HDR_SIZE;
-
-		/* Write a notice to the headers of archived log
-		files that the file write has been completed */
-
-		log_group_archive_completed_header_write(group, i, end_lsn);
-	}
-
-	fil_space_truncate_start(group->archive_space_id,
-				 trunc_files * group->file_size);
-
-#ifdef UNIV_DEBUG
-	if (log_debug_writes) {
-		fputs("Archiving writes completed\n", stderr);
-	}
-#endif /* UNIV_DEBUG */
-}
-
-/******************************************************//**
-Completes an archiving i/o. */
-static
-void
-log_archive_check_completion_low(void)
-/*==================================*/
-{
-	ut_ad(mutex_own(&(log_sys->mutex)));
-
-	if (log_sys->n_pending_archive_ios == 0
-	    && log_sys->archiving_phase == LOG_ARCHIVE_READ) {
-
-#ifdef UNIV_DEBUG
-		if (log_debug_writes) {
-			fputs("Archiving read completed\n", stderr);
-		}
-#endif /* UNIV_DEBUG */
-
-		/* Archive buffer has now been read in: start archive writes */
-
-		log_sys->archiving_phase = LOG_ARCHIVE_WRITE;
-
-		log_archive_groups();
-	}
-
-	if (log_sys->n_pending_archive_ios == 0
-	    && log_sys->archiving_phase == LOG_ARCHIVE_WRITE) {
-
-		log_archive_write_complete_groups();
-
-		log_sys->archived_lsn = log_sys->next_archived_lsn;
-
-		rw_lock_x_unlock_gen(&(log_sys->archive_lock), LOG_ARCHIVE);
-	}
-}
-
-/******************************************************//**
-Completes an archiving i/o. */
-static
-void
-log_io_complete_archive(void)
-/*=========================*/
-{
-	log_group_t*	group;
-
-	mutex_enter(&(log_sys->mutex));
-
-	group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
-	mutex_exit(&(log_sys->mutex));
-
-	fil_flush(group->archive_space_id);
-
-	mutex_enter(&(log_sys->mutex));
-
-	ut_ad(log_sys->n_pending_archive_ios > 0);
-
-	log_sys->n_pending_archive_ios--;
-
-	log_archive_check_completion_low();
-
-	mutex_exit(&(log_sys->mutex));
-}
-
-/********************************************************************//**
-Starts an archiving operation.
-@return	TRUE if succeed, FALSE if an archiving operation was already running */
-UNIV_INTERN
-ibool
-log_archive_do(
-/*===========*/
-	ibool	sync,	/*!< in: TRUE if synchronous operation is desired */
-	ulint*	n_bytes)/*!< out: archive log buffer size, 0 if nothing to
-			archive */
-{
-	ibool		calc_new_limit;
-	ib_uint64_t	start_lsn;
-	ib_uint64_t	limit_lsn;
-
-	calc_new_limit = TRUE;
-loop:
-	mutex_enter(&(log_sys->mutex));
-
-	switch (log_sys->archiving_state) {
-	case LOG_ARCH_OFF:
-arch_none:
-		mutex_exit(&(log_sys->mutex));
-
-		*n_bytes = 0;
-
-		return(TRUE);
-	case LOG_ARCH_STOPPED:
-	case LOG_ARCH_STOPPING2:
-		mutex_exit(&(log_sys->mutex));
-
-		os_event_wait(log_sys->archiving_on);
-
-		goto loop;
-	}
-
-	start_lsn = log_sys->archived_lsn;
-
-	if (calc_new_limit) {
-		ut_a(log_sys->archive_buf_size % OS_FILE_LOG_BLOCK_SIZE == 0);
-		limit_lsn = start_lsn + log_sys->archive_buf_size;
-
-		*n_bytes = log_sys->archive_buf_size;
-
-		if (limit_lsn >= log_sys->lsn) {
-
-			limit_lsn = ut_uint64_align_down(
-				log_sys->lsn, OS_FILE_LOG_BLOCK_SIZE);
-		}
-	}
-
-	if (log_sys->archived_lsn >= limit_lsn) {
-
-		goto arch_none;
-	}
-
-	if (log_sys->written_to_all_lsn < limit_lsn) {
-
-		mutex_exit(&(log_sys->mutex));
-
-		log_write_up_to(limit_lsn, LOG_WAIT_ALL_GROUPS, TRUE);
-
-		calc_new_limit = FALSE;
-
-		goto loop;
-	}
-
-	if (log_sys->n_pending_archive_ios > 0) {
-		/* An archiving operation is running */
-
-		mutex_exit(&(log_sys->mutex));
-
-		if (sync) {
-			rw_lock_s_lock(&(log_sys->archive_lock));
-			rw_lock_s_unlock(&(log_sys->archive_lock));
-		}
-
-		*n_bytes = log_sys->archive_buf_size;
-
-		return(FALSE);
-	}
-
-	rw_lock_x_lock_gen(&(log_sys->archive_lock), LOG_ARCHIVE);
-
-	log_sys->archiving_phase = LOG_ARCHIVE_READ;
-
-	log_sys->next_archived_lsn = limit_lsn;
-
-#ifdef UNIV_DEBUG
-	if (log_debug_writes) {
-		fprintf(stderr,
-			"Archiving from lsn " LSN_PF " to lsn " LSN_PF "\n",
-			log_sys->archived_lsn, limit_lsn);
-	}
-#endif /* UNIV_DEBUG */
-
-	/* Read the log segment to the archive buffer */
-
-	log_group_read_log_seg(LOG_ARCHIVE, log_sys->archive_buf,
-			       UT_LIST_GET_FIRST(log_sys->log_groups),
-			       start_lsn, limit_lsn);
-
-	mutex_exit(&(log_sys->mutex));
-
-	if (sync) {
-		rw_lock_s_lock(&(log_sys->archive_lock));
-		rw_lock_s_unlock(&(log_sys->archive_lock));
-	}
-
-	*n_bytes = log_sys->archive_buf_size;
-
-	return(TRUE);
-}
-
-/****************************************************************//**
-Writes the log contents to the archive at least up to the lsn when this
-function was called. */
-static
-void
-log_archive_all(void)
-/*=================*/
-{
-	ib_uint64_t	present_lsn;
-	ulint		dummy;
-
-	mutex_enter(&(log_sys->mutex));
-
-	if (log_sys->archiving_state == LOG_ARCH_OFF) {
-		mutex_exit(&(log_sys->mutex));
-
-		return;
-	}
-
-	present_lsn = log_sys->lsn;
-
-	mutex_exit(&(log_sys->mutex));
-
-	log_pad_current_log_block();
-
-	for (;;) {
-		mutex_enter(&(log_sys->mutex));
-
-		if (present_lsn <= log_sys->archived_lsn) {
-
-			mutex_exit(&(log_sys->mutex));
-
-			return;
-		}
-
-		mutex_exit(&(log_sys->mutex));
-
-		log_archive_do(TRUE, &dummy);
-	}
-}
-
-/*****************************************************//**
-Closes the possible open archive log file (for each group) the first group,
-and if it was open, increments the group file count by 2, if desired. */
-static
-void
-log_archive_close_groups(
-/*=====================*/
-	ibool	increment_file_count)	/*!< in: TRUE if we want to increment
-					the file count */
-{
-	log_group_t*	group;
-	ulint		trunc_len;
-
-	ut_ad(mutex_own(&(log_sys->mutex)));
-
-	if (log_sys->archiving_state == LOG_ARCH_OFF) {
-
-		return;
-	}
-
-	group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
-	trunc_len = UNIV_PAGE_SIZE
-		* fil_space_get_size(group->archive_space_id);
-	if (trunc_len > 0) {
-		ut_a(trunc_len == group->file_size);
-
-		/* Write a notice to the headers of archived log
-		files that the file write has been completed */
-
-		log_group_archive_completed_header_write(
-			group, 0, log_sys->archived_lsn);
-
-		fil_space_truncate_start(group->archive_space_id,
-					 trunc_len);
-		if (increment_file_count) {
-			group->archived_offset = 0;
-			group->archived_file_no += 2;
-		}
-
-#ifdef UNIV_DEBUG
-		if (log_debug_writes) {
-			fprintf(stderr,
-				"Incrementing arch file no to %lu"
-				" in log group %lu\n",
-				(ulong) group->archived_file_no + 2,
-				(ulong) group->id);
-		}
-#endif /* UNIV_DEBUG */
-	}
-}
-
-/****************************************************************//**
-Writes the log contents to the archive up to the lsn when this function was
-called, and stops the archiving. When archiving is started again, the archived
-log file numbers start from 2 higher, so that the archiving will not write
-again to the archived log files which exist when this function returns.
-@return	DB_SUCCESS or DB_ERROR */
-UNIV_INTERN
-ulint
-log_archive_stop(void)
-/*==================*/
-{
-	ibool	success;
-
-	mutex_enter(&(log_sys->mutex));
-
-	if (log_sys->archiving_state != LOG_ARCH_ON) {
-
-		mutex_exit(&(log_sys->mutex));
-
-		return(DB_ERROR);
-	}
-
-	log_sys->archiving_state = LOG_ARCH_STOPPING;
-
-	mutex_exit(&(log_sys->mutex));
-
-	log_archive_all();
-
-	mutex_enter(&(log_sys->mutex));
-
-	log_sys->archiving_state = LOG_ARCH_STOPPING2;
-	os_event_reset(log_sys->archiving_on);
-
-	mutex_exit(&(log_sys->mutex));
-
-	/* Wait for a possible archiving operation to end */
-
-	rw_lock_s_lock(&(log_sys->archive_lock));
-	rw_lock_s_unlock(&(log_sys->archive_lock));
-
-	mutex_enter(&(log_sys->mutex));
-
-	/* Close all archived log files, incrementing the file count by 2,
-	if appropriate */
-
-	log_archive_close_groups(TRUE);
-
-	mutex_exit(&(log_sys->mutex));
-
-	/* Make a checkpoint, so that if recovery is needed, the file numbers
-	of new archived log files will start from the right value */
-
-	success = FALSE;
-
-	while (!success) {
-		success = log_checkpoint(TRUE, TRUE);
-	}
-
-	mutex_enter(&(log_sys->mutex));
-
-	log_sys->archiving_state = LOG_ARCH_STOPPED;
-
-	mutex_exit(&(log_sys->mutex));
-
-	return(DB_SUCCESS);
-}
-
-/****************************************************************//**
-Starts again archiving which has been stopped.
-@return	DB_SUCCESS or DB_ERROR */
-UNIV_INTERN
-ulint
-log_archive_start(void)
-/*===================*/
-{
-	mutex_enter(&(log_sys->mutex));
-
-	if (log_sys->archiving_state != LOG_ARCH_STOPPED) {
-
-		mutex_exit(&(log_sys->mutex));
-
-		return(DB_ERROR);
-	}
-
-	log_sys->archiving_state = LOG_ARCH_ON;
-
-	os_event_set(log_sys->archiving_on);
-
-	mutex_exit(&(log_sys->mutex));
-
-	return(DB_SUCCESS);
-}
-
-/****************************************************************//**
-Stop archiving the log so that a gap may occur in the archived log files.
-@return	DB_SUCCESS or DB_ERROR */
-UNIV_INTERN
-ulint
-log_archive_noarchivelog(void)
-/*==========================*/
-{
-loop:
-	mutex_enter(&(log_sys->mutex));
-
-	if (log_sys->archiving_state == LOG_ARCH_STOPPED
-	    || log_sys->archiving_state == LOG_ARCH_OFF) {
-
-		log_sys->archiving_state = LOG_ARCH_OFF;
-
-		os_event_set(log_sys->archiving_on);
-
-		mutex_exit(&(log_sys->mutex));
-
-		return(DB_SUCCESS);
-	}
-
-	mutex_exit(&(log_sys->mutex));
-
-	log_archive_stop();
-
-	os_thread_sleep(500000);
-
-	goto loop;
-}
-
-/****************************************************************//**
-Start archiving the log so that a gap may occur in the archived log files.
-@return	DB_SUCCESS or DB_ERROR */
-UNIV_INTERN
-ulint
-log_archive_archivelog(void)
-/*========================*/
-{
-	mutex_enter(&(log_sys->mutex));
-
-	if (log_sys->archiving_state == LOG_ARCH_OFF) {
-
-		log_sys->archiving_state = LOG_ARCH_ON;
-
-		log_sys->archived_lsn
-			= ut_uint64_align_down(log_sys->lsn,
-					       OS_FILE_LOG_BLOCK_SIZE);
-		mutex_exit(&(log_sys->mutex));
-
-		return(DB_SUCCESS);
-	}
-
-	mutex_exit(&(log_sys->mutex));
-
-	return(DB_ERROR);
-}
-
-/****************************************************************//**
-Tries to establish a big enough margin of free space in the log groups, such
-that a new log entry can be catenated without an immediate need for
-archiving. */
-static
-void
-log_archive_margin(void)
-/*====================*/
-{
-	log_t*	log		= log_sys;
-	ulint	age;
-	ibool	sync;
-	ulint	dummy;
-loop:
-	mutex_enter(&(log->mutex));
-
-	if (log->archiving_state == LOG_ARCH_OFF) {
-		mutex_exit(&(log->mutex));
-
-		return;
-	}
-
-	age = log->lsn - log->archived_lsn;
-
-	if (age > log->max_archived_lsn_age) {
-
-		/* An archiving is urgent: we have to do synchronous i/o */
-
-		sync = TRUE;
-
-	} else if (age > log->max_archived_lsn_age_async) {
-
-		/* An archiving is not urgent: we do asynchronous i/o */
-
-		sync = FALSE;
-	} else {
-		/* No archiving required yet */
-
-		mutex_exit(&(log->mutex));
-
-		return;
-	}
-
-	mutex_exit(&(log->mutex));
-
-	log_archive_do(sync, &dummy);
-
-	if (sync == TRUE) {
-		/* Check again that enough was written to the archive */
-
-		goto loop;
-	}
-}
-#endif /* UNIV_LOG_ARCHIVE */
-
-/********************************************************************//**
+/**
 Checks that there is enough free space in the log to start a new query step.
 Flushes the log buffer or makes a new checkpoint if necessary. NOTE: this
 function may only be called if the calling thread owns no synchronization
 objects! */
-UNIV_INTERN
 void
 log_check_margins(void)
-/*===================*/
 {
-loop:
-	log_flush_margin();
+	bool	check;
 
-	log_checkpoint_margin();
-
-#ifdef UNIV_LOG_ARCHIVE
-	log_archive_margin();
-#endif /* UNIV_LOG_ARCHIVE */
-
-	mutex_enter(&(log_sys->mutex));
-	ut_ad(!recv_no_log_write);
-
-	if (log_sys->check_flush_or_checkpoint) {
-
-		mutex_exit(&(log_sys->mutex));
-
-		goto loop;
-	}
-
-	mutex_exit(&(log_sys->mutex));
+	do {
+		log_flush_margin();
+		log_checkpoint_margin();
+		log_mutex_enter();
+		ut_ad(!recv_no_log_write);
+		check = log_sys->check_flush_or_checkpoint;
+		log_mutex_exit();
+	} while (check);
 }
 
 /****************************************************************//**
@@ -3243,21 +2132,19 @@ Makes a checkpoint at the latest lsn and writes it to first page of each
 data file in the database, so that we know that the file spaces contain
 all modifications up to that lsn. This can only be called at database
 shutdown. This function also writes all log in log files to the log archive. */
-UNIV_INTERN
 void
 logs_empty_and_mark_files_at_shutdown(void)
 /*=======================================*/
 {
 	lsn_t			lsn;
-	ulint			arch_log_no;
 	ulint			count = 0;
 	ulint			total_trx;
 	ulint			pending_io;
 	enum srv_thread_type	active_thd;
 	const char*		thread_name;
-	ibool			server_busy;
+	dberr_t			err = DB_SUCCESS;
 
-	ib_logf(IB_LOG_LEVEL_INFO, "Starting shutdown...");
+	ib::info() << "Starting shutdown...";
 
 	while (srv_fast_shutdown == 0 && trx_rollback_or_clean_is_active) {
 		/* we should wait until rollback after recovery end
@@ -3285,8 +2172,8 @@ loop:
 		threads check will be done later. */
 
 		if (srv_print_verbose_log && count > 600) {
-			ib_logf(IB_LOG_LEVEL_INFO,
-				"Waiting for %s to exit", thread_name);
+			ib::info() << "Waiting for " << thread_name
+				<< " to exit";
 			count = 0;
 		}
 
@@ -3303,9 +2190,8 @@ loop:
 	if (total_trx > 0) {
 
 		if (srv_print_verbose_log && count > 600) {
-			ib_logf(IB_LOG_LEVEL_INFO,
-				"Waiting for %lu active transactions to finish",
-				(ulong) total_trx);
+			ib::info() << "Waiting for " << total_trx << " active"
+				<< " transactions to finish";
 
 			count = 0;
 		}
@@ -3349,9 +2235,9 @@ loop:
 				break;
 			}
 
-			ib_logf(IB_LOG_LEVEL_INFO,
-				"Waiting for %s to be suspended",
-				thread_type);
+			ib::info() << "Waiting for " << thread_type
+				<< " to be suspended";
+
 			count = 0;
 		}
 
@@ -3367,28 +2253,21 @@ loop:
 		++count;
 		os_thread_sleep(100000);
 		if (srv_print_verbose_log && count > 600) {
-			ib_logf(IB_LOG_LEVEL_INFO,
-				"Waiting for page_cleaner to "
-				"finish flushing of buffer pool");
+			ib::info() << "Waiting for page_cleaner to"
+				" finish flushing of buffer pool";
 			count = 0;
 		}
 	}
 
-	mutex_enter(&log_sys->mutex);
-	server_busy = log_sys->n_pending_checkpoint_writes
-#ifdef UNIV_LOG_ARCHIVE
-		|| log_sys->n_pending_archive_ios
-#endif /* UNIV_LOG_ARCHIVE */
-		|| log_sys->n_pending_writes;
-	mutex_exit(&log_sys->mutex);
+	log_mutex_enter();
+	const ulint	n_write	= log_sys->n_pending_checkpoint_writes;
+	const ulint	n_flush	= log_sys->n_pending_flushes;
+	log_mutex_exit();
 
-	if (server_busy) {
+	if (n_write != 0 || n_flush != 0) {
 		if (srv_print_verbose_log && count > 600) {
-			ib_logf(IB_LOG_LEVEL_INFO,
-				"Pending checkpoint_writes: %lu. "
-				"Pending log flush writes: %lu",
-				(ulong) log_sys->n_pending_checkpoint_writes,
-				(ulong) log_sys->n_pending_writes);
+			ib::info() << "Pending checkpoint_writes: " << n_write
+				<< ". Pending log flush writes: " << n_flush;
 			count = 0;
 		}
 		goto loop;
@@ -3398,25 +2277,20 @@ loop:
 
 	if (pending_io) {
 		if (srv_print_verbose_log && count > 600) {
-			ib_logf(IB_LOG_LEVEL_INFO,
-				"Waiting for %lu buffer page I/Os to complete",
-				(ulong) pending_io);
+			ib::info() << "Waiting for " << pending_io << " buffer"
+				" page I/Os to complete";
 			count = 0;
 		}
 
 		goto loop;
 	}
 
-#ifdef UNIV_LOG_ARCHIVE
-	log_archive_all();
-#endif /* UNIV_LOG_ARCHIVE */
 	if (srv_fast_shutdown == 2) {
 		if (!srv_read_only_mode) {
-			ib_logf(IB_LOG_LEVEL_INFO,
-				"MySQL has requested a very fast shutdown "
-				"without flushing the InnoDB buffer pool to "
-				"data files. At the next mysqld startup "
-				"InnoDB will do a crash recovery!");
+			ib::info() << "MySQL has requested a very fast"
+				" shutdown without flushing the InnoDB buffer"
+				" pool to data files. At the next mysqld"
+				" startup InnoDB will do a crash recovery!";
 
 			/* In this fastest shutdown we do not flush the
 			buffer pool:
@@ -3434,9 +2308,9 @@ loop:
 			thread_name = srv_any_background_threads_are_active();
 
 			if (thread_name != NULL) {
-				ib_logf(IB_LOG_LEVEL_WARN,
-					"Background thread %s woke up "
-					"during shutdown", thread_name);
+				ib::warn() << "Background thread "
+					<< thread_name << " woke up during"
+					" shutdown";
 				goto loop;
 			}
 		}
@@ -3456,53 +2330,33 @@ loop:
 		log_make_checkpoint_at(LSN_MAX, TRUE);
 	}
 
-	mutex_enter(&log_sys->mutex);
+	log_mutex_enter();
 
 	lsn = log_sys->lsn;
 
-	if (lsn != log_sys->last_checkpoint_lsn
-#ifdef UNIV_LOG_ARCHIVE
-	    || (srv_log_archive_on
-		&& lsn != log_sys->archived_lsn + LOG_BLOCK_HDR_SIZE)
-#endif /* UNIV_LOG_ARCHIVE */
-	    ) {
+	ut_ad(lsn >= log_sys->last_checkpoint_lsn);
 
-		mutex_exit(&log_sys->mutex);
+	log_mutex_exit();
 
+	if (lsn != log_sys->last_checkpoint_lsn) {
 		goto loop;
 	}
 
-	arch_log_no = 0;
-
-#ifdef UNIV_LOG_ARCHIVE
-	UT_LIST_GET_FIRST(log_sys->log_groups)->archived_file_no;
-
-	if (0 == UT_LIST_GET_FIRST(log_sys->log_groups)->archived_offset) {
-
-		arch_log_no--;
-	}
-
-	log_archive_close_groups(TRUE);
-#endif /* UNIV_LOG_ARCHIVE */
-
-	mutex_exit(&log_sys->mutex);
-
 	/* Check that the background threads stay suspended */
 	thread_name = srv_any_background_threads_are_active();
 	if (thread_name != NULL) {
-		ib_logf(IB_LOG_LEVEL_WARN,
-			"Background thread %s woke up during shutdown",
-			thread_name);
+		ib::warn() << "Background thread " << thread_name << " woke up"
+			" during shutdown";
 
 		goto loop;
 	}
 
 	if (!srv_read_only_mode) {
-		fil_flush_file_spaces(FIL_TABLESPACE);
-		fil_flush_file_spaces(FIL_LOG);
+		fil_flush_file_spaces(FIL_TYPE_TABLESPACE);
+		fil_flush_file_spaces(FIL_TYPE_LOG);
 	}
 
-	/* The call fil_write_flushed_lsn_to_data_files() will pass the buffer
+	/* The call fil_write_flushed_lsn() will bypass the buffer
 	pool: therefore it is essential that the buffer pool has been
 	completely flushed to disk! (We do not call fil_write... if the
 	'very fast' shutdown is enabled.) */
@@ -3510,8 +2364,8 @@ loop:
 	if (!buf_all_freed()) {
 
 		if (srv_print_verbose_log && count > 600) {
-			ib_logf(IB_LOG_LEVEL_INFO,
-				"Waiting for dirty buffer pages to be flushed");
+			ib::info() << "Waiting for dirty buffer pages to be"
+				" flushed";
 			count = 0;
 		}
 
@@ -3530,18 +2384,20 @@ loop:
 	ut_a(lsn == log_sys->lsn);
 
 	if (lsn < srv_start_lsn) {
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Log sequence number at shutdown " LSN_PF " "
-			"is lower than at startup " LSN_PF "!",
-			lsn, srv_start_lsn);
+		ib::error() << "Log sequence number at shutdown " << lsn
+			<< " is lower than at startup " << srv_start_lsn
+			<< "!";
 	}
 
 	srv_shutdown_lsn = lsn;
 
 	if (!srv_read_only_mode) {
-		fil_write_flushed_lsn_to_data_files(lsn, arch_log_no);
+		err = fil_write_flushed_lsn(lsn);
 
-		fil_flush_file_spaces(FIL_TABLESPACE);
+		if (err != DB_SUCCESS) {
+			ib::error() << "Writing flushed lsn " << lsn
+				    << " failed at shutdown error " << err;
+		}
 	}
 
 	fil_close_all_files();
@@ -3556,62 +2412,9 @@ loop:
 	ut_a(lsn == log_sys->lsn);
 }
 
-#ifdef UNIV_LOG_DEBUG
-/******************************************************//**
-Checks by parsing that the catenated log segment for a single mtr is
-consistent. */
-UNIV_INTERN
-ibool
-log_check_log_recs(
-/*===============*/
-	const byte*	buf,		/*!< in: pointer to the start of
-					the log segment in the
-					log_sys->buf log buffer */
-	ulint		len,		/*!< in: segment length in bytes */
-	ib_uint64_t	buf_start_lsn)	/*!< in: buffer start lsn */
-{
-	ib_uint64_t	contiguous_lsn;
-	ib_uint64_t	scanned_lsn;
-	const byte*	start;
-	const byte*	end;
-	byte*		buf1;
-	byte*		scan_buf;
-
-	ut_ad(mutex_own(&(log_sys->mutex)));
-
-	if (len == 0) {
-
-		return(TRUE);
-	}
-
-	start = ut_align_down(buf, OS_FILE_LOG_BLOCK_SIZE);
-	end = ut_align(buf + len, OS_FILE_LOG_BLOCK_SIZE);
-
-	buf1 = mem_alloc((end - start) + OS_FILE_LOG_BLOCK_SIZE);
-	scan_buf = ut_align(buf1, OS_FILE_LOG_BLOCK_SIZE);
-
-	ut_memcpy(scan_buf, start, end - start);
-
-	recv_scan_log_recs((buf_pool_get_n_pages()
-			   - (recv_n_pool_free_frames * srv_buf_pool_instances))
-			   * UNIV_PAGE_SIZE, FALSE, scan_buf, end - start,
-			   ut_uint64_align_down(buf_start_lsn,
-						OS_FILE_LOG_BLOCK_SIZE),
-			   &contiguous_lsn, &scanned_lsn);
-
-	ut_a(scanned_lsn == buf_start_lsn + len);
-	ut_a(recv_sys->recovered_lsn == scanned_lsn);
-
-	mem_free(buf1);
-
-	return(TRUE);
-}
-#endif /* UNIV_LOG_DEBUG */
-
 /******************************************************//**
 Peeks the current lsn.
-@return	TRUE if success, FALSE if could not get the log system mutex */
-UNIV_INTERN
+@return TRUE if success, FALSE if could not get the log system mutex */
 ibool
 log_peek_lsn(
 /*=========*/
@@ -3620,7 +2423,7 @@ log_peek_lsn(
 	if (0 == mutex_enter_nowait(&(log_sys->mutex))) {
 		*lsn = log_sys->lsn;
 
-		mutex_exit(&(log_sys->mutex));
+		log_mutex_exit();
 
 		return(TRUE);
 	}
@@ -3630,7 +2433,6 @@ log_peek_lsn(
 
 /******************************************************//**
 Prints info of the log. */
-UNIV_INTERN
 void
 log_print(
 /*======*/
@@ -3639,7 +2441,7 @@ log_print(
 	double	time_elapsed;
 	time_t	current_time;
 
-	mutex_enter(&(log_sys->mutex));
+	log_mutex_enter();
 
 	fprintf(file,
 		"Log sequence number " LSN_PF "\n"
@@ -3661,23 +2463,24 @@ log_print(
 	}
 
 	fprintf(file,
-		"%lu pending log writes, %lu pending chkp writes\n"
-		"%lu log i/o's done, %.2f log i/o's/second\n",
-		(ulong) log_sys->n_pending_writes,
-		(ulong) log_sys->n_pending_checkpoint_writes,
-		(ulong) log_sys->n_log_ios,
-		((double)(log_sys->n_log_ios - log_sys->n_log_ios_old)
-		 / time_elapsed));
+		ULINTPF " pending log flushes, "
+		ULINTPF " pending chkp writes\n"
+		ULINTPF " log i/o's done, %.2f log i/o's/second\n",
+		log_sys->n_pending_flushes,
+		log_sys->n_pending_checkpoint_writes,
+		log_sys->n_log_ios,
+		static_cast<double>(
+			log_sys->n_log_ios - log_sys->n_log_ios_old)
+		/ time_elapsed);
 
 	log_sys->n_log_ios_old = log_sys->n_log_ios;
 	log_sys->last_printout_time = current_time;
 
-	mutex_exit(&(log_sys->mutex));
+	log_mutex_exit();
 }
 
 /**********************************************************************//**
 Refreshes the statistics used to print per-second averages. */
-UNIV_INTERN
 void
 log_refresh_stats(void)
 /*===================*/
@@ -3697,28 +2500,17 @@ log_group_close(
 	ulint	i;
 
 	for (i = 0; i < group->n_files; i++) {
-		mem_free(group->file_header_bufs_ptr[i]);
-#ifdef UNIV_LOG_ARCHIVE
-		mem_free(group->archive_file_header_bufs_ptr[i]);
-#endif /* UNIV_LOG_ARCHIVE */
+		ut_free(group->file_header_bufs_ptr[i]);
 	}
 
-	mem_free(group->file_header_bufs_ptr);
-	mem_free(group->file_header_bufs);
-
-#ifdef UNIV_LOG_ARCHIVE
-	mem_free(group->archive_file_header_bufs_ptr);
-	mem_free(group->archive_file_header_bufs);
-#endif /* UNIV_LOG_ARCHIVE */
-
-	mem_free(group->checkpoint_buf_ptr);
-
-	mem_free(group);
+	ut_free(group->file_header_bufs_ptr);
+	ut_free(group->file_header_bufs);
+	ut_free(group->checkpoint_buf_ptr);
+	ut_free(group);
 }
 
 /********************************************************//**
 Closes all log groups. */
-UNIV_INTERN
 void
 log_group_close_all(void)
 /*=====================*/
@@ -3731,7 +2523,8 @@ log_group_close_all(void)
 		log_group_t*	prev_group = group;
 
 		group = UT_LIST_GET_NEXT(log_groups, group);
-		UT_LIST_REMOVE(log_groups, log_sys->log_groups, prev_group);
+
+		UT_LIST_REMOVE(log_sys->log_groups, prev_group);
 
 		log_group_close(prev_group);
 	}
@@ -3739,49 +2532,39 @@ log_group_close_all(void)
 
 /********************************************************//**
 Shutdown the log system but do not release all the memory. */
-UNIV_INTERN
 void
 log_shutdown(void)
 /*==============*/
 {
 	log_group_close_all();
 
-	mem_free(log_sys->buf_ptr);
+	ut_free(log_sys->buf_ptr);
 	log_sys->buf_ptr = NULL;
 	log_sys->buf = NULL;
-	mem_free(log_sys->checkpoint_buf_ptr);
+	ut_free(log_sys->checkpoint_buf_ptr);
 	log_sys->checkpoint_buf_ptr = NULL;
 	log_sys->checkpoint_buf = NULL;
 
-	os_event_free(log_sys->no_flush_event);
-	os_event_free(log_sys->one_flushed_event);
+	os_event_destroy(log_sys->flush_event);
 
 	rw_lock_free(&log_sys->checkpoint_lock);
 
 	mutex_free(&log_sys->mutex);
-
-#ifdef UNIV_LOG_ARCHIVE
-	rw_lock_free(&log_sys->archive_lock);
-	os_event_create();
-#endif /* UNIV_LOG_ARCHIVE */
-
-#ifdef UNIV_LOG_DEBUG
-	recv_sys_debug_free();
-#endif
+	mutex_free(&log_sys->write_mutex);
+	mutex_free(&log_sys->log_flush_order_mutex);
 
 	recv_sys_close();
 }
 
 /********************************************************//**
 Free the log system data structures. */
-UNIV_INTERN
 void
 log_mem_free(void)
 /*==============*/
 {
 	if (log_sys != NULL) {
 		recv_sys_mem_free();
-		mem_free(log_sys);
+		ut_free(log_sys);
 
 		log_sys = NULL;
 	}
@@ -3792,6 +2575,43 @@ UNIV_INTERN os_event_t log_scrub_event = NULL;
 
 UNIV_INTERN ibool srv_log_scrub_thread_active = FALSE;
 
+/******************************************************//**
+Pads the current log block full with dummy log records. Used in producing
+consistent archived log files and scrubbing redo log. */
+static
+void
+log_pad_current_log_block(void)
+/*===========================*/
+{
+	byte		b		= MLOG_DUMMY_RECORD;
+	ulint		pad_length;
+	ulint		i;
+	lsn_t		lsn;
+
+	/* We retrieve lsn only because otherwise gcc crashed on HP-UX */
+	lsn = log_reserve_and_open(OS_FILE_LOG_BLOCK_SIZE);
+
+	pad_length = OS_FILE_LOG_BLOCK_SIZE
+		- (log_sys->buf_free % OS_FILE_LOG_BLOCK_SIZE)
+		- LOG_BLOCK_TRL_SIZE;
+	if (pad_length
+	    == (OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE
+		- LOG_BLOCK_TRL_SIZE)) {
+
+		pad_length = 0;
+	}
+
+	for (i = 0; i < pad_length; i++) {
+		log_write_low(&b, 1);
+	}
+
+	lsn = log_sys->lsn;
+
+	log_close();
+
+	ut_a(lsn % OS_FILE_LOG_BLOCK_SIZE == LOG_BLOCK_HDR_SIZE);
+}
+
 /*****************************************************************//*
 If no log record has been written for a while, fill current log
 block with dummy records. */
@@ -3801,10 +2621,12 @@ log_scrub()
 /*=========*/
 {
 	ulint cur_lbn = log_block_convert_lsn_to_no(log_sys->lsn);
+
 	if (next_lbn_to_pad == cur_lbn)
 	{
 		log_pad_current_log_block();
 	}
+
 	next_lbn_to_pad = log_block_convert_lsn_to_no(log_sys->lsn);
 }
 
@@ -3843,7 +2665,7 @@ DECLARE_THREAD(log_scrub_thread)(
 
 	/* We count the number of threads in os_thread_exit(). A created
 	thread should always use that to exit and not use return() to exit. */
-	os_thread_exit(NULL);
+	os_thread_exit();
 
 	OS_THREAD_DUMMY_RETURN;
 }
diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc
index 9fde18757c5..a7c3f337287 100644
--- a/storage/innobase/log/log0recv.cc
+++ b/storage/innobase/log/log0recv.cc
@@ -25,11 +25,11 @@ Recovery
 Created 9/20/1997 Heikki Tuuri
 *******************************************************/
 
-// First include (the generated) my_config.h, to get correct platform defines.
-#include "my_config.h"
-#include <stdio.h>                              // Solaris/x86 header file bug
+#include "ha_prototypes.h"
 
 #include <vector>
+#include <map>
+#include <string>
 #include <my_systemd.h>
 
 #include "log0recv.h"
@@ -38,6 +38,10 @@ Created 9/20/1997 Heikki Tuuri
 #include "log0recv.ic"
 #endif
 
+#ifdef HAVE_MY_AES_H
+#include <my_aes.h>
+#endif
+
 #include "log0crypt.h"
 
 #include "mem0mem.h"
@@ -53,21 +57,21 @@ Created 9/20/1997 Heikki Tuuri
 #include "trx0undo.h"
 #include "trx0rec.h"
 #include "fil0fil.h"
-#include "fil0crypt.h"
+#include "fsp0sysspace.h"
+#include "ut0new.h"
+#include "row0trunc.h"
 #ifndef UNIV_HOTBACKUP
 # include "buf0rea.h"
 # include "srv0srv.h"
 # include "srv0start.h"
 # include "trx0roll.h"
 # include "row0merge.h"
-# include "sync0sync.h"
 #else /* !UNIV_HOTBACKUP */
-
-
-/** This is set to FALSE if the backup was originally taken with the
+/** This is set to false if the backup was originally taken with the
 mysqlbackup --include regexp option: then we do not want to create tables in
 directories which were not included */
-UNIV_INTERN ibool	recv_replay_file_ops	= TRUE;
+bool	recv_replay_file_ops	= true;
+#include "fut0lst.h"
 #endif /* !UNIV_HOTBACKUP */
 
 /** Log records are stored in the hash table in chunks at most of this size;
@@ -78,39 +82,29 @@ this must be less than UNIV_PAGE_SIZE as it is stored in the buffer pool */
 #define RECV_READ_AHEAD_AREA	32
 
 /** The recovery system */
-UNIV_INTERN recv_sys_t*	recv_sys = NULL;
+recv_sys_t*	recv_sys = NULL;
 /** TRUE when applying redo log records during crash recovery; FALSE
 otherwise.  Note that this is FALSE while a background thread is
 rolling back incomplete transactions. */
-UNIV_INTERN ibool	recv_recovery_on;
-#ifdef UNIV_LOG_ARCHIVE
-/** TRUE when applying redo log records from an archived log file */
-UNIV_INTERN ibool	recv_recovery_from_backup_on;
-#endif /* UNIV_LOG_ARCHIVE */
+volatile bool	recv_recovery_on;
 
 #ifndef UNIV_HOTBACKUP
 /** TRUE when recv_init_crash_recovery() has been called. */
-UNIV_INTERN ibool	recv_needed_recovery;
+bool	recv_needed_recovery;
+#else
+# define recv_needed_recovery			false
+# define buf_pool_get_curr_size() (5 * 1024 * 1024)
+#endif /* !UNIV_HOTBACKUP */
 # ifdef UNIV_DEBUG
 /** TRUE if writing to the redo log (mtr_commit) is forbidden.
 Protected by log_sys->mutex. */
-UNIV_INTERN ibool	recv_no_log_write = FALSE;
+bool	recv_no_log_write = false;
 # endif /* UNIV_DEBUG */
 
 /** TRUE if buf_page_is_corrupted() should check if the log sequence
 number (FIL_PAGE_LSN) is in the future.  Initially FALSE, and set by
-recv_recovery_from_checkpoint_start_func(). */
-UNIV_INTERN ibool	recv_lsn_checks_on;
-
-/** There are two conditions under which we scan the logs, the first
-is normal startup and the second is when we do a recovery from an
-archive.
-This flag is set if we are doing a scan from the last checkpoint during
-startup. If we find log entries that were written after the last checkpoint
-we know that the server was not cleanly shutdown. We must then initialize
-the crash recovery environment before attempting to store these entries in
-the log hash table. */
-static ibool		recv_log_scan_is_startup_type;
+recv_recovery_from_checkpoint_start(). */
+bool	recv_lsn_checks_on;
 
 /** If the following is TRUE, the buffer pool file pages must be invalidated
 after recovery and no ibuf operations are allowed; this becomes TRUE if
@@ -121,17 +115,22 @@ buffer pool before the pages have been recovered to the up-to-date state.
 
 TRUE means that recovery is running and no operations on the log files
 are allowed yet: the variable name is misleading. */
-UNIV_INTERN ibool	recv_no_ibuf_operations;
+#ifndef UNIV_HOTBACKUP
+bool	recv_no_ibuf_operations;
 /** TRUE when the redo log is being backed up */
-# define recv_is_making_a_backup		FALSE
+# define recv_is_making_a_backup		false
 /** TRUE when recovering from a backed up redo log file */
-# define recv_is_from_backup			FALSE
+# define recv_is_from_backup			false
 #else /* !UNIV_HOTBACKUP */
-# define recv_needed_recovery			FALSE
+/** true if the backup is an offline backup */
+volatile bool is_online_redo_copy = true;
+/**true if the last flushed lsn read at the start of backup */
+volatile lsn_t backup_redo_log_flushed_lsn;
+
 /** TRUE when the redo log is being backed up */
-UNIV_INTERN ibool	recv_is_making_a_backup	= FALSE;
+bool	recv_is_making_a_backup	= false;
 /** TRUE when recovering from a backed up redo log file */
-UNIV_INTERN ibool	recv_is_from_backup	= FALSE;
+bool	recv_is_from_backup	= false;
 # define buf_pool_get_curr_size() (5 * 1024 * 1024)
 #endif /* !UNIV_HOTBACKUP */
 /** The following counter is used to decide when to print info on
@@ -139,65 +138,665 @@ log scan */
 static ulint	recv_scan_print_counter;
 
 /** The type of the previous parsed redo log record */
-static ulint	recv_previous_parsed_rec_type;
+static mlog_id_t	recv_previous_parsed_rec_type;
 /** The offset of the previous parsed redo log record */
 static ulint	recv_previous_parsed_rec_offset;
 /** The 'multi' flag of the previous parsed redo log record */
 static ulint	recv_previous_parsed_rec_is_multi;
 
-/** Maximum page number encountered in the redo log */
-UNIV_INTERN ulint	recv_max_parsed_page_no;
-
 /** This many frames must be left free in the buffer pool when we scan
 the log and store the scanned log records in the buffer pool: we will
 use these free frames to read in pages when we start applying the
 log records to the database.
 This is the default value. If the actual size of the buffer pool is
 larger than 10 MB we'll set this value to 512. */
-UNIV_INTERN ulint	recv_n_pool_free_frames;
+ulint	recv_n_pool_free_frames;
 
 /** The maximum lsn we see for a page during the recovery process. If this
 is bigger than the lsn we are able to scan up to, that is an indication that
 the recovery failed and the database may be corrupt. */
-UNIV_INTERN lsn_t	recv_max_page_lsn;
+lsn_t	recv_max_page_lsn;
 
 #ifdef UNIV_PFS_THREAD
-UNIV_INTERN mysql_pfs_key_t	trx_rollback_clean_thread_key;
+mysql_pfs_key_t	trx_rollback_clean_thread_key;
 #endif /* UNIV_PFS_THREAD */
 
-#ifdef UNIV_PFS_MUTEX
-UNIV_INTERN mysql_pfs_key_t	recv_sys_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
-
 #ifndef UNIV_HOTBACKUP
 # ifdef UNIV_PFS_THREAD
-UNIV_INTERN mysql_pfs_key_t	recv_writer_thread_key;
+mysql_pfs_key_t	recv_writer_thread_key;
 # endif /* UNIV_PFS_THREAD */
 
-# ifdef UNIV_PFS_MUTEX
-UNIV_INTERN mysql_pfs_key_t	recv_writer_mutex_key;
-# endif /* UNIV_PFS_MUTEX */
-
 /** Flag indicating if recv_writer thread is active. */
-UNIV_INTERN bool		recv_writer_thread_active = false;
-UNIV_INTERN os_thread_t		recv_writer_thread_handle = 0;
+volatile bool	recv_writer_thread_active = false;
 #endif /* !UNIV_HOTBACKUP */
 
+#ifndef	DBUG_OFF
+/** Return string name of the redo log record type.
+@param[in]	type	record log record enum
+@return string name of record log record */
+const char*
+get_mlog_string(mlog_id_t type);
+#endif /* !DBUG_OFF */
+
 /* prototypes */
 
 #ifndef UNIV_HOTBACKUP
 /*******************************************************//**
 Initialize crash recovery environment. Can be called iff
-recv_needed_recovery == FALSE. */
+recv_needed_recovery == false. */
 static
 void
 recv_init_crash_recovery(void);
 /*===========================*/
 #endif /* !UNIV_HOTBACKUP */
 
+/** Tablespace item during recovery */
+struct file_name_t {
+	/** Tablespace file name (MLOG_FILE_NAME) */
+	std::string	name;
+	/** Tablespace object (NULL if not valid or not found) */
+	fil_space_t*	space;
+	/** Whether the tablespace has been deleted */
+	bool		deleted;
+
+	/** Constructor */
+	file_name_t(std::string name_, bool deleted_) :
+		name(name_), space(NULL), deleted (deleted_) {}
+};
+
+/** Map of dirty tablespaces during recovery */
+typedef std::map<
+	ulint,
+	file_name_t,
+	std::less<ulint>,
+	ut_allocator<std::pair<const ulint, file_name_t> > >	recv_spaces_t;
+
+static recv_spaces_t	recv_spaces;
+
+/** Process a file name from a MLOG_FILE_* record.
+@param[in,out]	name		file name
+@param[in]	len		length of the file name
+@param[in]	space_id	the tablespace ID
+@param[in]	deleted		whether this is a MLOG_FILE_DELETE record
+@retval true if able to process file successfully.
+@retval false if unable to process the file */
+static
+bool
+fil_name_process(
+	char*	name,
+	ulint	len,
+	ulint	space_id,
+	bool	deleted)
+{
+	bool	processed = true;
+
+	/* We will also insert space=NULL into the map, so that
+	further checks can ensure that a MLOG_FILE_NAME record was
+	scanned before applying any page records for the space_id. */
+
+	os_normalize_path(name);
+	file_name_t	fname(std::string(name, len - 1), deleted);
+	std::pair<recv_spaces_t::iterator,bool> p = recv_spaces.insert(
+		std::make_pair(space_id, fname));
+	ut_ad(p.first->first == space_id);
+
+	file_name_t&	f = p.first->second;
+
+	if (deleted) {
+		/* Got MLOG_FILE_DELETE */
+
+		if (!p.second && !f.deleted) {
+			f.deleted = true;
+			if (f.space != NULL) {
+				fil_space_free(space_id, false);
+				f.space = NULL;
+			}
+		}
+
+		ut_ad(f.space == NULL);
+	} else if (p.second // the first MLOG_FILE_NAME or MLOG_FILE_RENAME2
+		   || f.name != fname.name) {
+		fil_space_t*	space;
+
+		/* Check if the tablespace file exists and contains
+		the space_id. If not, ignore the file after displaying
+		a note. Abort if there are multiple files with the
+		same space_id. */
+		switch (fil_ibd_load(space_id, name, space)) {
+		case FIL_LOAD_OK:
+			ut_ad(space != NULL);
+
+#ifdef MYSQL_ENCRYPTION
+			/* For encrypted tablespace, set key and iv. */
+			if (FSP_FLAGS_GET_ENCRYPTION(space->flags)
+			    && recv_sys->encryption_list != NULL) {
+				dberr_t				err;
+				encryption_list_t::iterator	it;
+
+				for (it = recv_sys->encryption_list->begin();
+				     it != recv_sys->encryption_list->end();
+				     it++) {
+					if (it->space_id == space->id) {
+						err = fil_set_encryption(
+							space->id,
+							Encryption::AES,
+							it->key,
+							it->iv);
+						if (err != DB_SUCCESS) {
+							ib::error()
+								<< "Can't set"
+								" encryption"
+								" information"
+								" for"
+								" tablespace"
+								<< space->name
+								<< "!";
+						}
+						ut_free(it->key);
+						ut_free(it->iv);
+						it->key = NULL;
+						it->iv = NULL;
+						it->space_id = 0;
+					}
+				}
+			}
+#endif /* MYSQL_ENCRYPTION */
+
+			if (f.space == NULL || f.space == space) {
+				f.name = fname.name;
+				f.space = space;
+				f.deleted = false;
+			} else {
+				ib::error() << "Tablespace " << space_id
+					<< " has been found in two places: '"
+					<< f.name << "' and '" << name << "'."
+					" You must delete one of them.";
+				recv_sys->found_corrupt_fs = true;
+				processed = false;
+			}
+			break;
+
+		case FIL_LOAD_ID_CHANGED:
+			ut_ad(space == NULL);
+			break;
+
+		case FIL_LOAD_NOT_FOUND:
+			/* No matching tablespace was found; maybe it
+			was renamed, and we will find a subsequent
+			MLOG_FILE_* record. */
+			ut_ad(space == NULL);
+
+			if (srv_force_recovery) {
+				/* Without innodb_force_recovery,
+				missing tablespaces will only be
+				reported in
+				recv_init_crash_recovery_spaces().
+				Enable some more diagnostics when
+				forcing recovery. */
+
+				ib::info()
+					<< "At LSN: " << recv_sys->recovered_lsn
+					<< ": unable to open file " << name
+					<< " for tablespace " << space_id;
+			}
+			break;
+
+		case FIL_LOAD_INVALID:
+			ut_ad(space == NULL);
+			if (srv_force_recovery == 0) {
+#ifndef UNIV_HOTBACKUP
+				ib::warn() << "We do not continue the crash"
+					" recovery, because the table may"
+					" become corrupt if we cannot apply"
+					" the log records in the InnoDB log to"
+					" it. To fix the problem and start"
+					" mysqld:";
+				ib::info() << "1) If there is a permission"
+					" problem in the file and mysqld"
+					" cannot open the file, you should"
+					" modify the permissions.";
+				ib::info() << "2) If the tablespace is not"
+					" needed, or you can restore an older"
+					" version from a backup, then you can"
+					" remove the .ibd file, and use"
+					" --innodb_force_recovery=1 to force"
+					" startup without this file.";
+				ib::info() << "3) If the file system or the"
+					" disk is broken, and you cannot"
+					" remove the .ibd file, you can set"
+					" --innodb_force_recovery.";
+				recv_sys->found_corrupt_fs = true;
+#else
+				ib::warn() << "We do not continue the apply-log"
+					" operation because the tablespace may"
+					" become corrupt if we cannot apply"
+					" the log records in the redo log"
+					" records to it.";
+#endif /* !UNIV_BACKUP  */
+				processed = false;
+				break;
+			}
+
+			ib::info() << "innodb_force_recovery was set to "
+				<< srv_force_recovery << ". Continuing crash"
+				" recovery even though we cannot access the"
+				" files for tablespace " << space_id << ".";
+			break;
+		}
+	}
+	return(processed);
+}
+
+#ifndef UNIV_HOTBACKUP
+/** Parse or process a MLOG_FILE_* record.
+@param[in]	ptr		redo log record
+@param[in]	end		end of the redo log buffer
+@param[in]	space_id	the tablespace ID
+@param[in]	first_page_no	first page number in the file
+@param[in]	type		MLOG_FILE_NAME or MLOG_FILE_DELETE
+or MLOG_FILE_CREATE2 or MLOG_FILE_RENAME2
+@param[in]	apply		whether to apply the record
+@return pointer to next redo log record
+@retval NULL if this log record was truncated */
+static
+byte*
+fil_name_parse(
+	byte*		ptr,
+	const byte*	end,
+	ulint		space_id,
+	ulint		first_page_no,
+	mlog_id_t	type,
+	bool		apply)
+{
+	if (type == MLOG_FILE_CREATE2) {
+		if (end < ptr + 4) {
+			return(NULL);
+		}
+		ptr += 4;
+	}
+
+	if (end < ptr + 2) {
+		return(NULL);
+	}
+
+	ulint	len = mach_read_from_2(ptr);
+	ptr += 2;
+	if (end < ptr + len) {
+		return(NULL);
+	}
+
+	/* MLOG_FILE_* records should only be written for
+	user-created tablespaces. The name must be long enough
+	and end in .ibd. */
+	bool corrupt = is_predefined_tablespace(space_id)
+		|| first_page_no != 0 // TODO: multi-file user tablespaces
+		|| len < sizeof "/a.ibd\0"
+		|| memcmp(ptr + len - 5, DOT_IBD, 5) != 0
+		|| memchr(ptr, OS_PATH_SEPARATOR, len) == NULL;
+
+	byte*	end_ptr	= ptr + len;
+
+	switch (type) {
+	default:
+		ut_ad(0); // the caller checked this
+	case MLOG_FILE_NAME:
+		if (corrupt) {
+			ib::error() << "MLOG_FILE_NAME incorrect:" << ptr;
+			recv_sys->found_corrupt_log = true;
+			break;
+		}
+
+		fil_name_process(
+			reinterpret_cast<char*>(ptr), len, space_id, false);
+		break;
+	case MLOG_FILE_DELETE:
+		if (corrupt) {
+			ib::error() << "MLOG_FILE_DELETE incorrect:" << ptr;
+			recv_sys->found_corrupt_log = true;
+			break;
+		}
+
+		fil_name_process(
+			reinterpret_cast<char*>(ptr), len, space_id, true);
+
+		break;
+	case MLOG_FILE_CREATE2:
+		break;
+	case MLOG_FILE_RENAME2:
+		if (corrupt) {
+			ib::error() << "MLOG_FILE_RENAME2 incorrect:" << ptr;
+			recv_sys->found_corrupt_log = true;
+		}
+
+		/* The new name follows the old name. */
+		byte*	new_name = end_ptr + 2;
+		if (end < new_name) {
+			return(NULL);
+		}
+
+		ulint	new_len = mach_read_from_2(end_ptr);
+
+		if (end < end_ptr + 2 + new_len) {
+			return(NULL);
+		}
+
+		end_ptr += 2 + new_len;
+
+		corrupt = corrupt
+			|| new_len < sizeof "/a.ibd\0"
+			|| memcmp(new_name + new_len - 5, DOT_IBD, 5) != 0
+			|| !memchr(new_name, OS_PATH_SEPARATOR, new_len);
+
+		if (corrupt) {
+			ib::error() << "MLOG_FILE_RENAME2 new_name incorrect:" << ptr
+				    << " new_name: " << new_name;
+			recv_sys->found_corrupt_log = true;
+			break;
+		}
+
+		fil_name_process(
+			reinterpret_cast<char*>(ptr), len,
+			space_id, false);
+		fil_name_process(
+			reinterpret_cast<char*>(new_name), new_len,
+			space_id, false);
+
+		if (!apply) {
+			break;
+		}
+		if (!fil_op_replay_rename(
+			    space_id, first_page_no,
+			    reinterpret_cast<const char*>(ptr),
+			    reinterpret_cast<const char*>(new_name))) {
+			recv_sys->found_corrupt_fs = true;
+		}
+	}
+
+	return(end_ptr);
+}
+#else /* !UNIV_HOTBACKUP */
+/** Parse a file name retrieved from a MLOG_FILE_* record,
+and return the absolute file path corresponds to backup dir
+as well as in the form of database/tablespace
+@param[in]	file_name		path emitted by the redo log
+@param[out]	absolute_path	absolute path of tablespace
+corresponds to backup dir
+@param[out]	tablespace_name	name in the form of database/table */
+static
+void
+make_abs_file_path(
+	const std::string&	name,
+	std::string&		absolute_path,
+	std::string&		tablespace_name)
+{
+	std::string file_name = name;
+	std::string path = fil_path_to_mysql_datadir;
+	size_t pos = std::string::npos;
+
+	if (is_absolute_path(file_name.c_str())) {
+
+		pos = file_name.rfind(OS_PATH_SEPARATOR);
+		std::string temp_name = file_name.substr(0, pos);
+		pos = temp_name.rfind(OS_PATH_SEPARATOR);
+		++pos;
+		file_name = file_name.substr(pos, file_name.length());
+		path += OS_PATH_SEPARATOR + file_name;
+	} else {
+		pos = file_name.find(OS_PATH_SEPARATOR);
+		++pos;
+		file_name = file_name.substr(pos, file_name.length());
+		path += OS_PATH_SEPARATOR + file_name;
+	}
+
+	absolute_path = path;
+
+	/* remove the .ibd extension */
+	pos = file_name.rfind(".ibd");
+	if (pos != std::string::npos)
+		tablespace_name = file_name.substr(0, pos);
+
+	/* space->name uses '/', not OS_PATH_SEPARATOR,
+	update the seperator */
+	if (OS_PATH_SEPARATOR != '/') {
+		pos = tablespace_name.find(OS_PATH_SEPARATOR);
+		while (pos != std::string::npos) {
+			tablespace_name[pos] = '/';
+			pos = tablespace_name.find(OS_PATH_SEPARATOR);
+		}
+	}
+
+}
+
+/** Wrapper around fil_name_process()
+@param[in]	name		absolute path of tablespace file
+@param[in]	space_id	the tablespace ID
+@retval		true		if able to process file successfully.
+@retval		false		if unable to process the file */
+bool
+fil_name_process(
+	const char*	name,
+	ulint	space_id)
+{
+	size_t length = strlen(name);
+	++length;
+
+	char* file_name = static_cast<char*>(ut_malloc_nokey(length));
+	strncpy(file_name, name,length);
+
+	bool processed = fil_name_process(file_name, length, space_id, false);
+
+	ut_free(file_name);
+	return(processed);
+}
+
+/** Parse or process a MLOG_FILE_* record.
+@param[in]	ptr		redo log record
+@param[in]	end		end of the redo log buffer
+@param[in]	space_id	the tablespace ID
+@param[in]	first_page_no	first page number in the file
+@param[in]	type		MLOG_FILE_NAME or MLOG_FILE_DELETE
+or MLOG_FILE_CREATE2 or MLOG_FILE_RENAME2
+@param[in]	apply		whether to apply the record
+@retval	pointer to next redo log record
+@retval	NULL if this log record was truncated */
+static
+byte*
+fil_name_parse(
+	byte*		ptr,
+	const byte*	end,
+	ulint		space_id,
+	ulint		first_page_no,
+	mlog_id_t	type,
+	bool		apply)
+{
+
+	ulint flags = mach_read_from_4(ptr);
+
+	if (type == MLOG_FILE_CREATE2) {
+		if (end < ptr + 4) {
+			return(NULL);
+		}
+		ptr += 4;
+	}
+
+	if (end < ptr + 2) {
+		return(NULL);
+	}
+
+	ulint	len = mach_read_from_2(ptr);
+	ptr += 2;
+	if (end < ptr + len) {
+		return(NULL);
+	}
+
+	os_normalize_path(reinterpret_cast<char*>(ptr));
+
+	/* MLOG_FILE_* records should only be written for
+	user-created tablespaces. The name must be long enough
+	and end in .ibd. */
+	bool corrupt = is_predefined_tablespace(space_id)
+		|| first_page_no != 0 // TODO: multi-file user tablespaces
+		|| len < sizeof "/a.ibd\0"
+		|| memcmp(ptr + len - 5, DOT_IBD, 5) != 0
+		|| memchr(ptr, OS_PATH_SEPARATOR, len) == NULL;
+
+	byte*	end_ptr = ptr + len;
+
+	if (corrupt) {
+		recv_sys->found_corrupt_log = true;
+		return(end_ptr);
+	}
+
+	std::string abs_file_path, tablespace_name;
+	char* name = reinterpret_cast<char*>(ptr);
+	char* new_name = NULL;
+	recv_spaces_t::iterator itr;
+
+	make_abs_file_path(name, abs_file_path, tablespace_name);
+
+	if (!recv_is_making_a_backup) {
+
+		name = static_cast<char*>(ut_malloc_nokey(
+			(abs_file_path.length() + 1)));
+		strcpy(name, abs_file_path.c_str());
+		len = strlen(name) + 1;
+	}
+	switch (type) {
+	default:
+		ut_ad(0); // the caller checked this
+	case MLOG_FILE_NAME:
+		/* Don't validate tablespaces while copying redo logs
+		because backup process might keep some tablespace handles
+		open in server datadir.
+		Maintain "map of dirty tablespaces" so that assumptions
+		for other redo log records are not broken even for dirty
+		tablespaces during apply log */
+		if (!recv_is_making_a_backup) {
+			recv_spaces.insert(std::make_pair(space_id,
+						file_name_t(abs_file_path,
+						false)));
+		}
+		break;
+	case MLOG_FILE_DELETE:
+		/* Don't validate tablespaces while copying redo logs
+		because backup process might keep some tablespace handles
+		open in server datadir. */
+		if (recv_is_making_a_backup)
+			break;
+
+		fil_name_process(
+			name, len, space_id, true);
+
+		if (apply && recv_replay_file_ops
+			&& fil_space_get(space_id)) {
+			dberr_t	err = fil_delete_tablespace(
+				space_id, BUF_REMOVE_FLUSH_NO_WRITE);
+			ut_a(err == DB_SUCCESS);
+		}
+
+		break;
+	case MLOG_FILE_CREATE2:
+		if (recv_is_making_a_backup
+		    || (!recv_replay_file_ops)
+		    || (is_intermediate_file(abs_file_path.c_str()))
+		    || (fil_space_get(space_id))
+		    || (fil_space_get_id_by_name(
+				tablespace_name.c_str()) != ULINT_UNDEFINED)) {
+			/* Don't create table while :-
+			1. scanning the redo logs during backup
+			2. apply-log on a partial backup
+			3. if it is intermediate file
+			4. tablespace is already loaded in memory */
+		} else {
+			itr = recv_spaces.find(space_id);
+			if (itr == recv_spaces.end()
+				|| (itr->second.name != abs_file_path)) {
+
+				dberr_t ret = fil_ibd_create(
+					space_id, tablespace_name.c_str(),
+					abs_file_path.c_str(),
+					flags, FIL_IBD_FILE_INITIAL_SIZE);
+
+				if (ret != DB_SUCCESS) {
+					ib::fatal() << "Could not create the"
+						<< " tablespace : "
+						<< abs_file_path
+						<< " with space Id : "
+						<< space_id;
+				}
+			}
+		}
+		break;
+	case MLOG_FILE_RENAME2:
+		/* The new name follows the old name. */
+		byte*	new_table_name = end_ptr + 2;
+		if (end < new_table_name) {
+			return(NULL);
+		}
+
+		ulint	new_len = mach_read_from_2(end_ptr);
+
+		if (end < end_ptr + 2 + new_len) {
+			return(NULL);
+		}
+
+		end_ptr += 2 + new_len;
+
+		char* new_table = reinterpret_cast<char*>(new_table_name);
+		os_normalize_path(new_table);
+
+		corrupt = corrupt
+			|| new_len < sizeof "/a.ibd\0"
+			|| memcmp(new_table_name + new_len - 5, DOT_IBD, 5) != 0
+			|| !memchr(new_table_name, OS_PATH_SEPARATOR, new_len);
+
+		if (corrupt) {
+			recv_sys->found_corrupt_log = true;
+			break;
+		}
+
+		if (recv_is_making_a_backup
+		    || (!recv_replay_file_ops)
+		    || (is_intermediate_file(name))
+		    || (is_intermediate_file(new_table))) {
+			/* Don't rename table while :-
+			1. scanning the redo logs during backup
+			2. apply-log on a partial backup
+			3. The new name is already used.
+			4. A tablespace is not open in memory with the old name.
+			This will prevent unintended renames during recovery. */
+			break;
+		} else {
+			make_abs_file_path(new_table, abs_file_path,
+					   tablespace_name);
+
+			new_name = static_cast<char*>(ut_malloc_nokey(
+				(abs_file_path.length() + 1)));
+			strcpy(new_name, abs_file_path.c_str());
+			new_len = strlen(new_name) + 1;
+		}
+
+		fil_name_process(name, len, space_id, false);
+		fil_name_process( new_name, new_len, space_id, false);
+
+		if (!fil_op_replay_rename(
+			space_id, first_page_no,
+			name,
+			new_name)) {
+			recv_sys->found_corrupt_fs = true;
+		}
+	}
+
+	if (!recv_is_making_a_backup) {
+		ut_free(name);
+		ut_free(new_name);
+	}
+	return(end_ptr);
+}
+#endif /* UNIV_HOTBACKUP */
+
 /********************************************************//**
 Creates the recovery system. */
-UNIV_INTERN
 void
 recv_sys_create(void)
 /*=================*/
@@ -207,14 +806,10 @@ recv_sys_create(void)
 		return;
 	}
 
-	recv_sys = static_cast<recv_sys_t*>(mem_zalloc(sizeof(*recv_sys)));
+	recv_sys = static_cast<recv_sys_t*>(ut_zalloc_nokey(sizeof(*recv_sys)));
 
-	mutex_create(recv_sys_mutex_key, &recv_sys->mutex, SYNC_RECV);
-
-#ifndef UNIV_HOTBACKUP
-	mutex_create(recv_writer_mutex_key, &recv_sys->writer_mutex,
-		     SYNC_LEVEL_VARYING);
-#endif /* !UNIV_HOTBACKUP */
+	mutex_create(LATCH_ID_RECV_SYS, &recv_sys->mutex);
+	mutex_create(LATCH_ID_RECV_WRITER, &recv_sys->writer_mutex);
 
 	recv_sys->heap = NULL;
 	recv_sys->addr_hash = NULL;
@@ -222,7 +817,6 @@ recv_sys_create(void)
 
 /********************************************************//**
 Release recovery system mutexes. */
-UNIV_INTERN
 void
 recv_sys_close(void)
 /*================*/
@@ -235,14 +829,17 @@ recv_sys_close(void)
 		if (recv_sys->heap != NULL) {
 			mem_heap_free(recv_sys->heap);
 		}
-
-		if (recv_sys->buf != NULL) {
-			ut_free(recv_sys->buf);
+#ifndef UNIV_HOTBACKUP
+		if (recv_sys->flush_start != NULL) {
+			os_event_destroy(recv_sys->flush_start);
 		}
 
-		if (recv_sys->last_block_buf_start != NULL) {
-			mem_free(recv_sys->last_block_buf_start);
+		if (recv_sys->flush_end != NULL) {
+			os_event_destroy(recv_sys->flush_end);
 		}
+#endif /* !UNIV_HOTBACKUP */
+		ut_free(recv_sys->buf);
+		ut_free(recv_sys->last_block_buf_start);
 
 #ifndef UNIV_HOTBACKUP
 		ut_ad(!recv_writer_thread_active);
@@ -251,14 +848,15 @@ recv_sys_close(void)
 
 		mutex_free(&recv_sys->mutex);
 
-		mem_free(recv_sys);
+		ut_free(recv_sys);
 		recv_sys = NULL;
 	}
+
+	recv_spaces.clear();
 }
 
 /********************************************************//**
 Frees the recovery system memory. */
-UNIV_INTERN
 void
 recv_sys_mem_free(void)
 /*===================*/
@@ -271,16 +869,18 @@ recv_sys_mem_free(void)
 		if (recv_sys->heap != NULL) {
 			mem_heap_free(recv_sys->heap);
 		}
-
-		if (recv_sys->buf != NULL) {
-			ut_free(recv_sys->buf);
+#ifndef UNIV_HOTBACKUP
+		if (recv_sys->flush_start != NULL) {
+			os_event_destroy(recv_sys->flush_start);
 		}
 
-		if (recv_sys->last_block_buf_start != NULL) {
-			mem_free(recv_sys->last_block_buf_start);
+		if (recv_sys->flush_end != NULL) {
+			os_event_destroy(recv_sys->flush_end);
 		}
-
-		mem_free(recv_sys);
+#endif /* !UNIV_HOTBACKUP */
+		ut_free(recv_sys->buf);
+		ut_free(recv_sys->last_block_buf_start);
+		ut_free(recv_sys);
 		recv_sys = NULL;
 	}
 }
@@ -288,41 +888,19 @@ recv_sys_mem_free(void)
 #ifndef UNIV_HOTBACKUP
 /************************************************************
 Reset the state of the recovery system variables. */
-UNIV_INTERN
 void
 recv_sys_var_init(void)
 /*===================*/
 {
-	recv_lsn_checks_on = FALSE;
-
-	recv_n_pool_free_frames = 256;
-
-	recv_recovery_on = FALSE;
-
-#ifdef UNIV_LOG_ARCHIVE
-	recv_recovery_from_backup_on = FALSE;
-#endif /* UNIV_LOG_ARCHIVE */
-
-	recv_needed_recovery = FALSE;
-
-	recv_lsn_checks_on = FALSE;
-
-	recv_log_scan_is_startup_type = FALSE;
-
-	recv_no_ibuf_operations = FALSE;
-
+	recv_recovery_on = false;
+	recv_needed_recovery = false;
+	recv_lsn_checks_on = false;
+	recv_no_ibuf_operations = false;
 	recv_scan_print_counter	= 0;
-
-	recv_previous_parsed_rec_type	= 999999;
-
+	recv_previous_parsed_rec_type = MLOG_SINGLE_REC_FLAG;
 	recv_previous_parsed_rec_offset	= 0;
-
 	recv_previous_parsed_rec_is_multi = 0;
-
-	recv_max_parsed_page_no	= 0;
-
 	recv_n_pool_free_frames	= 256;
-
 	recv_max_page_lsn = 0;
 }
 
@@ -330,7 +908,7 @@ recv_sys_var_init(void)
 recv_writer thread tasked with flushing dirty pages from the buffer
 pools.
 @return a dummy parameter */
-extern "C" UNIV_INTERN
+extern "C"
 os_thread_ret_t
 DECLARE_THREAD(recv_writer_thread)(
 /*===============================*/
@@ -345,8 +923,8 @@ DECLARE_THREAD(recv_writer_thread)(
 #endif /* UNIV_PFS_THREAD */
 
 #ifdef UNIV_DEBUG_THREAD_CREATION
-	fprintf(stderr, "InnoDB: recv_writer thread running, id %lu\n",
-		os_thread_pf(os_thread_get_curr_id()));
+	ib::info() << "recv_writer thread running, id "
+		<< os_thread_pf(os_thread_get_curr_id());
 #endif /* UNIV_DEBUG_THREAD_CREATION */
 
 	recv_writer_thread_active = true;
@@ -355,7 +933,7 @@ DECLARE_THREAD(recv_writer_thread)(
 
 		/* Wait till we get a signal to clean the LRU list.
 		Bounded by max wait time of 100ms. */
-		ib_int64_t      sig_count = os_event_reset(buf_flush_event);
+		ib_uint64_t      sig_count = os_event_reset(buf_flush_event);
 		os_event_wait_time_low(buf_flush_event, 100000, sig_count);
 
 		mutex_enter(&recv_sys->writer_mutex);
@@ -366,7 +944,10 @@ DECLARE_THREAD(recv_writer_thread)(
 		}
 
 		/* Flush pages from end of LRU if required */
-		buf_flush_LRU_tail();
+		os_event_reset(recv_sys->flush_end);
+		recv_sys->flush_type = BUF_FLUSH_LRU;
+		os_event_set(recv_sys->flush_start);
+		os_event_wait(recv_sys->flush_end);
 
 		mutex_exit(&recv_sys->writer_mutex);
 	}
@@ -376,7 +957,7 @@ DECLARE_THREAD(recv_writer_thread)(
 	/* We count the number of threads in os_thread_exit().
 	A created thread should always use that to exit and not
 	use return() to exit. */
-	os_thread_exit(NULL);
+	os_thread_exit();
 
 	OS_THREAD_DUMMY_RETURN;
 }
@@ -384,7 +965,6 @@ DECLARE_THREAD(recv_writer_thread)(
 
 /************************************************************
 Inits the recovery system for a recovery operation. */
-UNIV_INTERN
 void
 recv_sys_init(
 /*==========*/
@@ -396,19 +976,18 @@ recv_sys_init(
 	}
 
 #ifndef UNIV_HOTBACKUP
-	/* Initialize red-black tree for fast insertions into the
-	flush_list during recovery process.
-	As this initialization is done while holding the buffer pool
-	mutex we perform it before acquiring recv_sys->mutex. */
-	buf_flush_init_flush_rbt();
-
 	mutex_enter(&(recv_sys->mutex));
 
 	recv_sys->heap = mem_heap_create_typed(256,
 					MEM_HEAP_FOR_RECV_SYS);
+
+	if (!srv_read_only_mode) {
+		recv_sys->flush_start = os_event_create(0);
+		recv_sys->flush_end = os_event_create(0);
+	}
 #else /* !UNIV_HOTBACKUP */
 	recv_sys->heap = mem_heap_create(256);
-	recv_is_from_backup = TRUE;
+	recv_is_from_backup = true;
 #endif /* !UNIV_HOTBACKUP */
 
 	/* Set appropriate value of recv_n_pool_free_frames. */
@@ -417,7 +996,8 @@ recv_sys_init(
 		recv_n_pool_free_frames = 512;
 	}
 
-	recv_sys->buf = static_cast<byte*>(ut_malloc(RECV_PARSING_BUF_SIZE));
+	recv_sys->buf = static_cast<byte*>(
+		ut_malloc_nokey(RECV_PARSING_BUF_SIZE));
 	recv_sys->len = 0;
 	recv_sys->recovered_offset = 0;
 
@@ -428,55 +1008,48 @@ recv_sys_init(
 	recv_sys->apply_batch_on = FALSE;
 
 	recv_sys->last_block_buf_start = static_cast<byte*>(
-		mem_alloc(2 * OS_FILE_LOG_BLOCK_SIZE));
+		ut_malloc_nokey(2 * OS_FILE_LOG_BLOCK_SIZE));
 
 	recv_sys->last_block = static_cast<byte*>(ut_align(
 		recv_sys->last_block_buf_start, OS_FILE_LOG_BLOCK_SIZE));
 
-	recv_sys->found_corrupt_log = FALSE;
+	recv_sys->found_corrupt_log = false;
+	recv_sys->found_corrupt_fs = false;
+	recv_sys->mlog_checkpoint_lsn = 0;
 
 	recv_max_page_lsn = 0;
 
 	/* Call the constructor for recv_sys_t::dblwr member */
 	new (&recv_sys->dblwr) recv_dblwr_t();
 
+	recv_sys->encryption_list = NULL;
 	mutex_exit(&(recv_sys->mutex));
 }
 
 /********************************************************//**
-Empties the hash table when it has been fully processed.
-@return DB_SUCCESS when successfull or DB_ERROR when fails. */
+Empties the hash table when it has been fully processed.*/
 static
-dberr_t
+void
 recv_sys_empty_hash(void)
 /*=====================*/
 {
 	ut_ad(mutex_own(&(recv_sys->mutex)));
 
 	if (recv_sys->n_addrs != 0) {
-		fprintf(stderr,
-			"InnoDB: Error: %lu pages with log records"
-			" were left unprocessed!\n"
-			"InnoDB: Maximum page number with"
-			" log records on it %lu\n",
-			(ulong) recv_sys->n_addrs,
-			(ulong) recv_max_parsed_page_no);
-		return DB_ERROR;
+		ib::fatal() << recv_sys->n_addrs << " pages with log records"
+			" were left unprocessed!";
 	}
 
 	hash_table_free(recv_sys->addr_hash);
 	mem_heap_empty(recv_sys->heap);
 
 	recv_sys->addr_hash = hash_create(buf_pool_get_curr_size() / 512);
-
-	return DB_SUCCESS;
 }
 
 #ifndef UNIV_HOTBACKUP
-# ifndef UNIV_LOG_DEBUG
+
 /********************************************************//**
 Frees the recovery system. */
-static
 void
 recv_sys_debug_free(void)
 /*=====================*/
@@ -486,167 +1059,45 @@ recv_sys_debug_free(void)
 	hash_table_free(recv_sys->addr_hash);
 	mem_heap_free(recv_sys->heap);
 	ut_free(recv_sys->buf);
-	mem_free(recv_sys->last_block_buf_start);
+	ut_free(recv_sys->last_block_buf_start);
 
 	recv_sys->buf = NULL;
 	recv_sys->heap = NULL;
 	recv_sys->addr_hash = NULL;
 	recv_sys->last_block_buf_start = NULL;
 
+	/* wake page cleaner up to progress */
+	if (!srv_read_only_mode) {
+		ut_ad(!recv_recovery_on);
+		ut_ad(!recv_writer_thread_active);
+		os_event_reset(buf_flush_event);
+		os_event_set(recv_sys->flush_start);
+	}
+
+	if (recv_sys->encryption_list != NULL) {
+		encryption_list_t::iterator	it;
+
+		for (it = recv_sys->encryption_list->begin();
+		     it != recv_sys->encryption_list->end();
+		     it++) {
+			if (it->key != NULL) {
+				ut_free(it->key);
+				it->key = NULL;
+			}
+			if (it->iv != NULL) {
+				ut_free(it->iv);
+				it->iv = NULL;
+			}
+		}
+
+		recv_sys->encryption_list->swap(*recv_sys->encryption_list);
+
+		UT_DELETE(recv_sys->encryption_list);
+		recv_sys->encryption_list = NULL;
+	}
+
 	mutex_exit(&(recv_sys->mutex));
-
-	/* Free up the flush_rbt. */
-	buf_flush_free_flush_rbt();
 }
-# endif /* UNIV_LOG_DEBUG */
-
-# ifdef UNIV_LOG_ARCHIVE
-/********************************************************//**
-Truncates possible corrupted or extra records from a log group. */
-static
-void
-recv_truncate_group(
-/*================*/
-	log_group_t*	group,		/*!< in: log group */
-	lsn_t		recovered_lsn,	/*!< in: recovery succeeded up to this
-					lsn */
-	lsn_t		limit_lsn,	/*!< in: this was the limit for
-					recovery */
-	lsn_t		checkpoint_lsn,	/*!< in: recovery was started from this
-					checkpoint */
-	lsn_t		archived_lsn)	/*!< in: the log has been archived up to
-					this lsn */
-{
-	lsn_t		start_lsn;
-	lsn_t		end_lsn;
-	lsn_t		finish_lsn1;
-	lsn_t		finish_lsn2;
-	lsn_t		finish_lsn;
-
-	if (archived_lsn == LSN_MAX) {
-		/* Checkpoint was taken in the NOARCHIVELOG mode */
-		archived_lsn = checkpoint_lsn;
-	}
-
-	finish_lsn1 = ut_uint64_align_down(archived_lsn,
-					   OS_FILE_LOG_BLOCK_SIZE)
-		+ log_group_get_capacity(group);
-
-	finish_lsn2 = ut_uint64_align_up(recovered_lsn,
-					 OS_FILE_LOG_BLOCK_SIZE)
-		+ recv_sys->last_log_buf_size;
-
-	if (limit_lsn != LSN_MAX) {
-		/* We do not know how far we should erase log records: erase
-		as much as possible */
-
-		finish_lsn = finish_lsn1;
-	} else {
-		/* It is enough to erase the length of the log buffer */
-		finish_lsn = finish_lsn1 < finish_lsn2
-			? finish_lsn1 : finish_lsn2;
-	}
-
-	ut_a(RECV_SCAN_SIZE <= log_sys->buf_size);
-
-	memset(log_sys->buf, 0, RECV_SCAN_SIZE);
-
-	start_lsn = ut_uint64_align_down(recovered_lsn,
-					 OS_FILE_LOG_BLOCK_SIZE);
-
-	if (start_lsn != recovered_lsn) {
-		/* Copy the last incomplete log block to the log buffer and
-		edit its data length: */
-		lsn_t	diff = recovered_lsn - start_lsn;
-
-		ut_a(diff <= 0xFFFFUL);
-
-		ut_memcpy(log_sys->buf, recv_sys->last_block,
-			  OS_FILE_LOG_BLOCK_SIZE);
-		log_block_set_data_len(log_sys->buf, (ulint) diff);
-	}
-
-	if (start_lsn >= finish_lsn) {
-
-		return;
-	}
-
-	for (;;) {
-		ulint	len;
-
-		end_lsn = start_lsn + RECV_SCAN_SIZE;
-
-		if (end_lsn > finish_lsn) {
-
-			end_lsn = finish_lsn;
-		}
-
-		len = (ulint) (end_lsn - start_lsn);
-
-		log_group_write_buf(group, log_sys->buf, len, start_lsn, 0);
-		if (end_lsn >= finish_lsn) {
-
-			return;
-		}
-
-		memset(log_sys->buf, 0, RECV_SCAN_SIZE);
-
-		start_lsn = end_lsn;
-	}
-}
-
-/********************************************************//**
-Copies the log segment between group->recovered_lsn and recovered_lsn from the
-most up-to-date log group to group, so that it contains the latest log data. */
-static
-void
-recv_copy_group(
-/*============*/
-	log_group_t*	up_to_date_group,	/*!< in: the most up-to-date log
-						group */
-	log_group_t*	group,			/*!< in: copy to this log
-						group */
-	lsn_t		recovered_lsn)		/*!< in: recovery succeeded up
-						to this lsn */
-{
-	lsn_t		start_lsn;
-	lsn_t		end_lsn;
-
-	if (group->scanned_lsn >= recovered_lsn) {
-
-		return;
-	}
-
-	ut_a(RECV_SCAN_SIZE <= log_sys->buf_size);
-
-	start_lsn = ut_uint64_align_down(group->scanned_lsn,
-					 OS_FILE_LOG_BLOCK_SIZE);
-	for (;;) {
-		ulint	len;
-
-		end_lsn = start_lsn + RECV_SCAN_SIZE;
-
-		if (end_lsn > recovered_lsn) {
-			end_lsn = ut_uint64_align_up(recovered_lsn,
-						     OS_FILE_LOG_BLOCK_SIZE);
-		}
-
-		log_group_read_log_seg(LOG_RECOVER, log_sys->buf,
-				       up_to_date_group, start_lsn, end_lsn);
-
-		len = (ulint) (end_lsn - start_lsn);
-
-		log_group_write_buf(group, log_sys->buf, len, start_lsn, 0);
-
-		if (end_lsn >= recovered_lsn) {
-
-			return;
-		}
-
-		start_lsn = end_lsn;
-	}
-}
-# endif /* UNIV_LOG_ARCHIVE */
 
 /********************************************************//**
 Copies a log segment from the most up-to-date log group to the other log
@@ -655,13 +1106,8 @@ about the latest checkpoint to the groups, and inits the fields in the group
 memory structs to up-to-date values. */
 static
 void
-recv_synchronize_groups(
-/*====================*/
-#ifdef UNIV_LOG_ARCHIVE
-	log_group_t*	up_to_date_group	/*!< in: the most up-to-date
-						log group */
-#endif
-	)
+recv_synchronize_groups(void)
+/*=========================*/
 {
 	lsn_t		start_lsn;
 	lsn_t		end_lsn;
@@ -678,90 +1124,193 @@ recv_synchronize_groups(
 
 	ut_a(start_lsn != end_lsn);
 
-	log_group_read_log_seg(LOG_RECOVER, recv_sys->last_block,
-#ifdef UNIV_LOG_ARCHIVE
-			       up_to_date_group,
-#else /* UNIV_LOG_ARCHIVE */
+	log_group_read_log_seg(recv_sys->last_block,
 			       UT_LIST_GET_FIRST(log_sys->log_groups),
-#endif /* UNIV_LOG_ARCHIVE */
 			       start_lsn, end_lsn);
 
 	for (log_group_t* group = UT_LIST_GET_FIRST(log_sys->log_groups);
 	     group;
 	     group = UT_LIST_GET_NEXT(log_groups, group)) {
-#ifdef UNIV_LOG_ARCHIVE
-		if (group != up_to_date_group) {
-
-			/* Copy log data if needed */
-
-			recv_copy_group(group, up_to_date_group,
-					recovered_lsn);
-		}
-#endif /* UNIV_LOG_ARCHIVE */
 		/* Update the fields in the group struct to correspond to
 		recovered_lsn */
 
 		log_group_set_fields(group, recovered_lsn);
-		ut_a(log_sys);
-
 	}
-	/* Copy the checkpoint info to the groups; remember that we have
+
+	/* Copy the checkpoint info to the log; remember that we have
 	incremented checkpoint_no by one, and the info will not be written
 	over the max checkpoint info, thus making the preservation of max
 	checkpoint info on disk certain */
 
-	log_groups_write_checkpoint_info();
-
-	mutex_exit(&(log_sys->mutex));
-
-	/* Wait for the checkpoint write to complete */
-	rw_lock_s_lock(&(log_sys->checkpoint_lock));
-	rw_lock_s_unlock(&(log_sys->checkpoint_lock));
-
-	mutex_enter(&(log_sys->mutex));
+	log_write_checkpoint_info(true);
+	log_mutex_enter();
 }
 #endif /* !UNIV_HOTBACKUP */
 
-/***********************************************************************//**
-Checks the consistency of the checkpoint info
-@return	TRUE if ok */
+/** Check the consistency of a log header block.
+@param[in]	log header block
+@return true if ok */
 static
-ibool
-recv_check_cp_is_consistent(
-/*========================*/
-	const byte*	buf)	/*!< in: buffer containing checkpoint info */
+bool
+recv_check_log_header_checksum(
+	const byte*	buf)
 {
-	ulint	fold;
-
-	fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1);
-
-	if ((fold & 0xFFFFFFFFUL) != mach_read_from_4(
-		    buf + LOG_CHECKPOINT_CHECKSUM_1)) {
-		return(FALSE);
-	}
-
-	fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN,
-			      LOG_CHECKPOINT_CHECKSUM_2 - LOG_CHECKPOINT_LSN);
-
-	if ((fold & 0xFFFFFFFFUL) != mach_read_from_4(
-		    buf + LOG_CHECKPOINT_CHECKSUM_2)) {
-		return(FALSE);
-	}
-
-	return(TRUE);
+	return(log_block_get_checksum(buf)
+	       == log_block_calc_checksum_crc32(buf));
 }
 
 #ifndef UNIV_HOTBACKUP
-/********************************************************//**
-Looks for the maximum consistent checkpoint from the log groups.
-@return	error code or DB_SUCCESS */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
+/** Find the latest checkpoint in the format-0 log header.
+@param[out]	max_group	log group, or NULL
+@param[out]	max_field	LOG_CHECKPOINT_1 or LOG_CHECKPOINT_2
+@return error code or DB_SUCCESS */
+static MY_ATTRIBUTE((warn_unused_result))
+dberr_t
+recv_find_max_checkpoint_0(
+	log_group_t**	max_group,
+	ulint*		max_field)
+{
+	log_group_t*	group = UT_LIST_GET_FIRST(log_sys->log_groups);
+	ib_uint64_t	max_no = 0;
+	ib_uint64_t	checkpoint_no;
+	byte*		buf	= log_sys->checkpoint_buf;
+
+	ut_ad(group->format == 0);
+	ut_ad(UT_LIST_GET_NEXT(log_groups, group) == NULL);
+
+	/** Offset of the first checkpoint checksum */
+	static const uint CHECKSUM_1 = 288;
+	/** Offset of the second checkpoint checksum */
+	static const uint CHECKSUM_2 = CHECKSUM_1 + 4;
+	/** Most significant bits of the checkpoint offset */
+	static const uint OFFSET_HIGH32 = CHECKSUM_2 + 12;
+	/** Least significant bits of the checkpoint offset */
+	static const uint OFFSET_LOW32 = 16;
+
+	for (ulint field = LOG_CHECKPOINT_1; field <= LOG_CHECKPOINT_2;
+	     field += LOG_CHECKPOINT_2 - LOG_CHECKPOINT_1) {
+		log_group_header_read(group, field);
+
+		if (static_cast<uint32_t>(ut_fold_binary(buf, CHECKSUM_1))
+		    != mach_read_from_4(buf + CHECKSUM_1)
+		    || static_cast<uint32_t>(
+			    ut_fold_binary(buf + LOG_CHECKPOINT_LSN,
+					   CHECKSUM_2 - LOG_CHECKPOINT_LSN))
+		    != mach_read_from_4(buf + CHECKSUM_2)) {
+			DBUG_PRINT("ib_log",
+				   ("invalid pre-5.7.9 checkpoint " ULINTPF,
+				    field));
+			continue;
+		}
+
+		group->state = LOG_GROUP_OK;
+
+		group->lsn = mach_read_from_8(
+			buf + LOG_CHECKPOINT_LSN);
+		group->lsn_offset = static_cast<ib_uint64_t>(
+			mach_read_from_4(buf + OFFSET_HIGH32)) << 32
+			| mach_read_from_4(buf + OFFSET_LOW32);
+		checkpoint_no = mach_read_from_8(
+			buf + LOG_CHECKPOINT_NO);
+
+		if (!log_crypt_read_checkpoint_buf(buf)) {
+			ib::error() << "Reading checkpoint encryption info failed.";
+			return DB_ERROR;
+		}
+
+		DBUG_PRINT("ib_log",
+			   ("checkpoint " UINT64PF " at " LSN_PF
+			    " found in group " ULINTPF,
+			    checkpoint_no, group->lsn, group->id));
+
+		if (checkpoint_no >= max_no) {
+			*max_group = group;
+			*max_field = field;
+			max_no = checkpoint_no;
+		}
+	}
+
+	if (*max_group != NULL) {
+		return(DB_SUCCESS);
+	}
+
+	ib::error() << "Upgrade after a crash is not supported."
+		" This redo log was created before MySQL 5.7.9,"
+		" and we did not find a valid checkpoint."
+		" Please follow the instructions at"
+		" " REFMAN "upgrading.html";
+	return(DB_ERROR);
+}
+
+/** Determine if a pre-5.7.9 redo log is clean.
+@param[in]	lsn	checkpoint LSN
+@return error code
+@retval	DB_SUCCESS	if the redo log is clean
+@retval DB_ERROR	if the redo log is corrupted or dirty */
+static
+dberr_t
+recv_log_format_0_recover(lsn_t lsn)
+{
+	log_mutex_enter();
+	log_group_t*	group = UT_LIST_GET_FIRST(log_sys->log_groups);
+	const lsn_t	source_offset
+		= log_group_calc_lsn_offset(lsn, group);
+	log_mutex_exit();
+	const ulint	page_no
+		= (ulint) (source_offset / univ_page_size.physical());
+	byte*		buf = log_sys->buf;
+
+	static const char* NO_UPGRADE_RECOVERY_MSG =
+		"Upgrade after a crash is not supported."
+		" This redo log was created before MySQL 5.7.9";
+	static const char* NO_UPGRADE_RTFM_MSG =
+		". Please follow the instructions at "
+		REFMAN "upgrading.html";
+
+	fil_io(IORequestLogRead, true,
+	       page_id_t(group->space_id, page_no),
+	       univ_page_size,
+	       (ulint) ((source_offset & ~(OS_FILE_LOG_BLOCK_SIZE - 1))
+			% univ_page_size.physical()),
+		OS_FILE_LOG_BLOCK_SIZE, buf, NULL, NULL);
+
+	if (log_block_calc_checksum_format_0(buf)
+	    != log_block_get_checksum(buf)) {
+		ib::error() << NO_UPGRADE_RECOVERY_MSG
+			<< ", and it appears corrupted"
+			<< NO_UPGRADE_RTFM_MSG;
+		return(DB_CORRUPTION);
+	}
+
+	if (log_block_get_data_len(buf)
+	    != (source_offset & (OS_FILE_LOG_BLOCK_SIZE - 1))) {
+		ib::error() << NO_UPGRADE_RECOVERY_MSG
+			<< NO_UPGRADE_RTFM_MSG;
+		return(DB_ERROR);
+	}
+
+	/* Mark the redo log for upgrading. */
+	srv_log_file_size = 0;
+	recv_sys->parse_start_lsn = recv_sys->recovered_lsn
+		= recv_sys->scanned_lsn
+		= recv_sys->mlog_checkpoint_lsn = lsn;
+	log_sys->last_checkpoint_lsn = log_sys->next_checkpoint_lsn
+		= log_sys->lsn = log_sys->write_lsn
+		= log_sys->current_flush_lsn = log_sys->flushed_to_disk_lsn
+		= lsn;
+	log_sys->next_checkpoint_no = 0;
+	return(DB_SUCCESS);
+}
+
+/** Find the latest checkpoint in the log header.
+@param[out]	max_group	log group, or NULL
+@param[out]	max_field	LOG_CHECKPOINT_1 or LOG_CHECKPOINT_2
+@return error code or DB_SUCCESS */
+static MY_ATTRIBUTE((warn_unused_result))
 dberr_t
 recv_find_max_checkpoint(
-/*=====================*/
-	log_group_t**	max_group,	/*!< out: max group */
-	ulint*		max_field)	/*!< out: LOG_CHECKPOINT_1 or
-					LOG_CHECKPOINT_2 */
+	log_group_t**	max_group,
+	ulint*		max_field)
 {
 	log_group_t*	group;
 	ib_uint64_t	max_no;
@@ -780,82 +1329,93 @@ recv_find_max_checkpoint(
 	while (group) {
 		group->state = LOG_GROUP_CORRUPTED;
 
+		log_group_header_read(group, 0);
+		/* Check the header page checksum. There was no
+		checksum in the first redo log format (version 0). */
+		group->format = mach_read_from_4(buf + LOG_HEADER_FORMAT);
+		if (group->format != 0
+		    && !recv_check_log_header_checksum(buf)) {
+			ib::error() << "Invalid redo log header checksum.";
+			return(DB_CORRUPTION);
+		}
+
+		switch (group->format) {
+		case 0:
+			return(recv_find_max_checkpoint_0(
+				       max_group, max_field));
+		case LOG_HEADER_FORMAT_CURRENT:
+			break;
+		default:
+			/* Ensure that the string is NUL-terminated. */
+			buf[LOG_HEADER_CREATOR_END] = 0;
+			ib::error() << "Unsupported redo log format."
+				" The redo log was created"
+				" with " << buf + LOG_HEADER_CREATOR <<
+				". Please follow the instructions at "
+				REFMAN "upgrading-downgrading.html";
+			/* Do not issue a message about a possibility
+			to cleanly shut down the newer server version
+			and to remove the redo logs, because the
+			format of the system data structures may
+			radically change after MySQL 5.7. */
+			return(DB_ERROR);
+		}
+
 		for (field = LOG_CHECKPOINT_1; field <= LOG_CHECKPOINT_2;
 		     field += LOG_CHECKPOINT_2 - LOG_CHECKPOINT_1) {
 
-			log_group_read_checkpoint_info(group, field);
+			log_group_header_read(group, field);
 
-			if (!recv_check_cp_is_consistent(buf)) {
-#ifdef UNIV_DEBUG
-				if (log_debug_writes) {
-					fprintf(stderr,
-						"InnoDB: Checkpoint in group"
-						" %lu at %lu invalid, %lu\n",
-						(ulong) group->id,
-						(ulong) field,
-						(ulong) mach_read_from_4(
-							buf
-							+ LOG_CHECKPOINT_CHECKSUM_1));
-
-				}
-#endif /* UNIV_DEBUG */
-				goto not_consistent;
+			if (!recv_check_log_header_checksum(buf)) {
+				DBUG_PRINT("ib_log",
+					   ("invalid checkpoint,"
+					    " group " ULINTPF " at " ULINTPF
+					    ", checksum %x",
+					    group->id, field,
+					    (unsigned) log_block_get_checksum(
+						    buf)));
+				continue;
 			}
 
 			group->state = LOG_GROUP_OK;
 
 			group->lsn = mach_read_from_8(
 				buf + LOG_CHECKPOINT_LSN);
-			group->lsn_offset = mach_read_from_4(
-				buf + LOG_CHECKPOINT_OFFSET_LOW32);
-			group->lsn_offset |= ((lsn_t) mach_read_from_4(
-				buf + LOG_CHECKPOINT_OFFSET_HIGH32)) << 32;
+			group->lsn_offset = mach_read_from_8(
+				buf + LOG_CHECKPOINT_OFFSET);
 			checkpoint_no = mach_read_from_8(
 				buf + LOG_CHECKPOINT_NO);
 
 			if (!log_crypt_read_checkpoint_buf(buf)) {
+				ib::error() << "Reading checkpoint encryption info failed.";
 				return DB_ERROR;
 			}
 
-#ifdef UNIV_DEBUG
-			if (log_debug_writes) {
-				fprintf(stderr,
-					"InnoDB: Checkpoint number %lu"
-					" found in group %lu\n",
-					(ulong) checkpoint_no,
-					(ulong) group->id);
-			}
-#endif /* UNIV_DEBUG */
+			DBUG_PRINT("ib_log",
+				   ("checkpoint " UINT64PF " at " LSN_PF
+				    " found in group " ULINTPF,
+				    checkpoint_no, group->lsn, group->id));
 
 			if (checkpoint_no >= max_no) {
 				*max_group = group;
 				*max_field = field;
 				max_no = checkpoint_no;
 			}
-
-not_consistent:
-			;
 		}
 
 		group = UT_LIST_GET_NEXT(log_groups, group);
 	}
 
 	if (*max_group == NULL) {
-
-		fprintf(stderr,
-			"InnoDB: No valid checkpoint found.\n"
-			"InnoDB: If you are attempting downgrade"
-			" from MySQL 5.7.9 or later,\n"
-			"InnoDB: please refer to " REFMAN
-			"upgrading-downgrading.html\n"
-			"InnoDB: If this error appears when you are"
-			" creating an InnoDB database,\n"
-			"InnoDB: the problem may be that during"
-			" an earlier attempt you managed\n"
-			"InnoDB: to create the InnoDB data files,"
-			" but log file creation failed.\n"
-			"InnoDB: If that is the case, please refer to\n"
-			"InnoDB: " REFMAN "error-creating-innodb.html\n");
+		/* Before 5.7.9, we could get here during database
+		initialization if we created an ib_logfile0 file that
+		was filled with zeroes, and were killed. After
+		5.7.9, we would reject such a file already earlier,
+		when checking the file header. */
+		ib::error() << "No valid checkpoint found"
+			" (corrupted redo log)."
+			" You can try --innodb-force-recovery=6"
+			" as a last resort.";
 		return(DB_ERROR);
 	}
 
@@ -864,8 +1424,7 @@ not_consistent:
 #else /* !UNIV_HOTBACKUP */
 /*******************************************************************//**
 Reads the checkpoint info needed in hot backup.
-@return	TRUE if success */
-UNIV_INTERN
+@return TRUE if success */
 ibool
 recv_read_checkpoint_info_for_backup(
 /*=================================*/
@@ -884,14 +1443,14 @@ recv_read_checkpoint_info_for_backup(
 
 	cp_buf = hdr + LOG_CHECKPOINT_1;
 
-	if (recv_check_cp_is_consistent(cp_buf)) {
+	if (recv_check_log_header_checksum(cp_buf)) {
 		max_cp_no = mach_read_from_8(cp_buf + LOG_CHECKPOINT_NO);
 		max_cp = LOG_CHECKPOINT_1;
 	}
 
 	cp_buf = hdr + LOG_CHECKPOINT_2;
 
-	if (recv_check_cp_is_consistent(cp_buf)) {
+	if (recv_check_log_header_checksum(cp_buf)) {
 		if (mach_read_from_8(cp_buf + LOG_CHECKPOINT_NO) > max_cp_no) {
 			max_cp = LOG_CHECKPOINT_2;
 		}
@@ -904,68 +1463,44 @@ recv_read_checkpoint_info_for_backup(
 	cp_buf = hdr + max_cp;
 
 	*lsn = mach_read_from_8(cp_buf + LOG_CHECKPOINT_LSN);
-	*offset = mach_read_from_4(
-		cp_buf + LOG_CHECKPOINT_OFFSET_LOW32);
-	*offset |= ((lsn_t) mach_read_from_4(
-			    cp_buf + LOG_CHECKPOINT_OFFSET_HIGH32)) << 32;
+	*offset = mach_read_from_8(
+		cp_buf + LOG_CHECKPOINT_OFFSET);
 
 	*cp_no = mach_read_from_8(cp_buf + LOG_CHECKPOINT_NO);
 
-	*first_header_lsn = mach_read_from_8(hdr + LOG_FILE_START_LSN);
+	*first_header_lsn = mach_read_from_8(hdr + LOG_HEADER_START_LSN);
 
 	return(TRUE);
 }
 #endif /* !UNIV_HOTBACKUP */
 
-/******************************************************//**
-Checks the 4-byte checksum to the trailer checksum field of a log
-block.  We also accept a log block in the old format before
-InnoDB-3.23.52 where the checksum field contains the log block number.
-@return TRUE if ok, or if the log block may be in the format of InnoDB
-version predating 3.23.52 */
-ibool
-log_block_checksum_is_ok_or_old_format(
-/*===================================*/
+/** Check the 4-byte checksum to the trailer checksum field of a log
+block.
+@param[in]	log block
+@return whether the checksum matches */
+bool
+log_block_checksum_is_ok(
 	const byte*	block,	/*!< in: pointer to a log block */
 	bool            print_err) /*!< in print error ? */
 {
-#ifdef UNIV_LOG_DEBUG
-	return(TRUE);
-#endif /* UNIV_LOG_DEBUG */
-	if (log_block_calc_checksum(block) == log_block_get_checksum(block)) {
-
-		return(TRUE);
+	if (log_block_get_checksum(block) != log_block_calc_checksum(block) &&
+	    print_err) {
+		ib::error() << " Log block checkpoint not correct."
+			    << " block: " << log_block_get_hdr_no(block)
+			    << " checkpoint no: " <<  log_block_get_checkpoint_no(block)
+			    << " calc checkpoint: " << log_block_calc_checksum(block)
+			    << " stored checkpoint: " << log_block_get_checksum(block);
 	}
 
-	if (log_block_get_hdr_no(block) == log_block_get_checksum(block)) {
-
-		/* We assume the log block is in the format of
-		InnoDB version < 3.23.52 and the block is ok */
-#if 0
-		fprintf(stderr,
-			"InnoDB: Scanned old format < InnoDB-3.23.52"
-			" log block number %lu\n",
-			log_block_get_hdr_no(block));
-#endif
-		return(TRUE);
-	}
-
-	if (print_err) {
-		fprintf(stderr, "BROKEN: block: %lu checkpoint: %lu %.8lx %.8lx\n",
-			log_block_get_hdr_no(block),
-			log_block_get_checkpoint_no(block),
-			log_block_calc_checksum(block),
-			log_block_get_checksum(block));
-	}
-
-	return(FALSE);
+	return(!innodb_log_checksums
+	       || log_block_get_checksum(block)
+	       == log_block_calc_checksum(block));
 }
 
 #ifdef UNIV_HOTBACKUP
 /*******************************************************************//**
 Scans the log segment and n_bytes_scanned is set to the length of valid
 log scanned. */
-UNIV_INTERN
 void
 recv_scan_log_seg_for_backup(
 /*=========================*/
@@ -997,7 +1532,7 @@ recv_scan_log_seg_for_backup(
 #endif
 
 		if (no != log_block_convert_lsn_to_no(*scanned_lsn)
-		    || !log_block_checksum_is_ok_or_old_format(log_block)) {
+		    || !log_block_checksum_is_ok(log_block)) {
 #if 0
 			fprintf(stderr,
 				"Log block n:o %lu, scanned lsn n:o %lu\n",
@@ -1053,27 +1588,243 @@ recv_scan_log_seg_for_backup(
 }
 #endif /* UNIV_HOTBACKUP */
 
-/*******************************************************************//**
-Tries to parse a single log record body and also applies it to a page if
-specified. File ops are parsed, but not applied in this function.
-@return	log record end, NULL if not a complete record */
+#ifdef MYSQL_ENCRYPTION
+
+/** Parse or process a write encryption info record.
+@param[in]	ptr		redo log record
+@param[in]	end		end of the redo log buffer
+@param[in]	space_id	the tablespace ID
+@return log record end, NULL if not a complete record */
+static
+byte*
+fil_write_encryption_parse(
+	byte*		ptr,
+	const byte*	end,
+	ulint		space_id)
+{
+	fil_space_t*	space;
+	ulint		offset;
+	ulint		len;
+	byte*		key = NULL;
+	byte*		iv = NULL;
+	bool		is_new = false;
+
+	space = fil_space_get(space_id);
+	if (space == NULL) {
+		encryption_list_t::iterator	it;
+
+		if (recv_sys->encryption_list == NULL) {
+			recv_sys->encryption_list =
+				UT_NEW_NOKEY(encryption_list_t());
+		}
+
+		for (it = recv_sys->encryption_list->begin();
+		     it != recv_sys->encryption_list->end();
+		     it++) {
+			if (it->space_id == space_id) {
+				key = it->key;
+				iv = it->iv;
+			}
+		}
+
+		if (key == NULL) {
+			key = static_cast<byte*>(ut_malloc_nokey(
+					ENCRYPTION_KEY_LEN));
+			iv = static_cast<byte*>(ut_malloc_nokey(
+					ENCRYPTION_KEY_LEN));
+			is_new = true;
+		}
+	} else {
+		key = space->encryption_key;
+		iv = space->encryption_iv;
+	}
+
+	offset = mach_read_from_2(ptr);
+	ptr += 2;
+	len = mach_read_from_2(ptr);
+
+	ptr += 2;
+	if (end < ptr + len) {
+		return(NULL);
+	}
+
+	if (offset >= UNIV_PAGE_SIZE
+	    || len + offset > UNIV_PAGE_SIZE
+	    || (len != ENCRYPTION_INFO_SIZE_V1
+		&& len != ENCRYPTION_INFO_SIZE_V2)) {
+		recv_sys->found_corrupt_log = TRUE;
+		return(NULL);
+	}
+
+#ifdef	UNIV_ENCRYPT_DEBUG
+	if (space) {
+		fprintf(stderr, "Got %lu from redo log:", space->id);
+	}
+#endif
+
+	if (!fsp_header_decode_encryption_info(key,
+					       iv,
+					       ptr)) {
+		recv_sys->found_corrupt_log = TRUE;
+		ib::warn() << "Encryption information"
+			<< " in the redo log of space "
+			<< space_id << " is invalid";
+	}
+
+	ut_ad(len == ENCRYPTION_INFO_SIZE_V1
+	      || len == ENCRYPTION_INFO_SIZE_V2);
+
+	ptr += len;
+
+	if (space == NULL) {
+		if (is_new) {
+			recv_encryption_t info;
+
+			/* Add key and iv to list */
+			info.space_id = space_id;
+			info.key = key;
+			info.iv = iv;
+
+			recv_sys->encryption_list->push_back(info);
+		}
+	} else {
+		ut_ad(FSP_FLAGS_GET_ENCRYPTION(space->flags));
+
+		space->encryption_type = Encryption::AES;
+		space->encryption_klen = ENCRYPTION_KEY_LEN;
+	}
+
+	return(ptr);
+}
+#endif /* MYSQL_ENCRYPTION */
+
+/** Try to parse a single log record body and also applies it if
+specified.
+@param[in]	type		redo log entry type
+@param[in]	ptr		redo log record body
+@param[in]	end_ptr		end of buffer
+@param[in]	space_id	tablespace identifier
+@param[in]	page_no		page number
+@param[in]	apply		whether to apply the record
+@param[in,out]	block		buffer block, or NULL if
+a page log record should not be applied
+or if it is a MLOG_FILE_ operation
+@param[in,out]	mtr		mini-transaction, or NULL if
+a page log record should not be applied
+@return log record end, NULL if not a complete record */
 static
 byte*
 recv_parse_or_apply_log_rec_body(
-/*=============================*/
-	byte		type,	/*!< in: type */
-	byte*		ptr,	/*!< in: pointer to a buffer */
-	byte*		end_ptr,/*!< in: pointer to the buffer end */
-	buf_block_t*	block,	/*!< in/out: buffer block or NULL; if
-				not NULL, then the log record is
-				applied to the page, and the log
-				record should be complete then */
-	mtr_t*		mtr,	/*!< in: mtr or NULL; should be non-NULL
-				if and only if block is non-NULL */
-	ulint		space_id)
-				/*!< in: tablespace id obtained by
-				parsing initial log record */
+	mlog_id_t	type,
+	byte*		ptr,
+	byte*		end_ptr,
+	ulint		space_id,
+	ulint		page_no,
+	bool		apply,
+	buf_block_t*	block,
+	mtr_t*		mtr)
 {
+	ut_ad(!block == !mtr);
+#ifndef UNIV_HOTBACKUP
+	ut_ad(!apply || recv_sys->mlog_checkpoint_lsn != 0);
+#endif /* !UNIV_HOTBACKUP */
+
+	switch (type) {
+	case MLOG_FILE_NAME:
+	case MLOG_FILE_DELETE:
+	case MLOG_FILE_CREATE2:
+	case MLOG_FILE_RENAME2:
+		ut_ad(block == NULL);
+		/* Collect the file names when parsing the log,
+		before applying any log records. */
+		return(fil_name_parse(ptr, end_ptr, space_id, page_no, type,
+				      apply));
+	case MLOG_INDEX_LOAD:
+#ifdef UNIV_HOTBACKUP
+		/* While scaning redo logs during  backup phase a
+		MLOG_INDEX_LOAD type redo log record indicates a DDL
+		(create index, alter table...)is performed with
+		'algorithm=inplace'. This redo log indicates that
+
+		1. The DDL was started after MEB started backing up, in which
+		case MEB will not be able to take a consistent backup and should
+		fail. or
+		2. There is a possibility of this record existing in the REDO
+		even after the completion of the index create operation. This is
+		because of InnoDB does  not checkpointing after the flushing the
+		index pages.
+
+		If MEB gets the last_redo_flush_lsn and that is less than the
+		lsn of the current record MEB fails the backup process.
+		Error out in case of online backup and emit a warning in case
+		of offline backup and continue.
+		*/
+		if (!recv_recovery_on) {
+			if (is_online_redo_copy) {
+				if (backup_redo_log_flushed_lsn
+				    < recv_sys->recovered_lsn) {
+					ib::trace() << "Last flushed lsn: "
+						<< backup_redo_log_flushed_lsn
+						<< " load_index lsn "
+						<< recv_sys->recovered_lsn;
+
+					if (backup_redo_log_flushed_lsn == 0)
+						ib::error() << "MEB was not "
+							"able to determine the"
+							"InnoDB Engine Status";
+
+					ib::fatal() << "An optimized(without"
+						" redo logging) DDLoperation"
+						" has been performed. All"
+						" modified pages may not have"
+						" been flushed to the disk yet."
+						" \n    MEB will not be able"
+						" take a consistent backup."
+						" Retry the backup operation";
+				}
+				/** else the index is flushed to disk before
+				backup started hence no error */
+			} else {
+				/* offline backup */
+				ib::trace() << "Last flushed lsn: "
+					<< backup_redo_log_flushed_lsn
+					<< " load_index lsn "
+					<< recv_sys->recovered_lsn;
+
+				ib::warn() << "An optimized(without redo"
+					" logging) DDL operation has been"
+					" performed. All modified pages may not"
+					" have been flushed to the disk yet."
+					" \n    This offline backup may not"
+					" be consistent";
+			}
+		}
+#endif /* UNIV_HOTBACKUP */
+		if (end_ptr < ptr + 8) {
+			return(NULL);
+		}
+		return(ptr + 8);
+	case MLOG_TRUNCATE:
+		return(truncate_t::parse_redo_entry(ptr, end_ptr, space_id));
+	case MLOG_WRITE_STRING:
+		/* For encrypted tablespace, we need to get the
+		encryption key information before the page 0 is recovered.
+	        Otherwise, redo will not find the key to decrypt
+		the data pages. */
+#ifdef MYSQL_ENCRYPTION
+		if (page_no == 0 && !is_system_tablespace(space_id)
+		    && !apply) {
+			return(fil_write_encryption_parse(ptr,
+							  end_ptr,
+							  space_id));
+		}
+#endif
+		break;
+
+	default:
+		break;
+	}
+
 	dict_index_t*	index	= NULL;
 	page_t*		page;
 	page_zip_des_t*	page_zip;
@@ -1081,18 +1832,29 @@ recv_parse_or_apply_log_rec_body(
 	ulint		page_type;
 #endif /* UNIV_DEBUG */
 
-	ut_ad(!block == !mtr);
-
 	if (block) {
+		/* Applying a page log record. */
+		ut_ad(apply);
 		page = block->frame;
 		page_zip = buf_block_get_page_zip(block);
 		ut_d(page_type = fil_page_get_type(page));
+	} else if (apply
+		   && !is_predefined_tablespace(space_id)
+		   && recv_spaces.find(space_id) == recv_spaces.end()) {
+		ib::fatal() << "Missing MLOG_FILE_NAME or MLOG_FILE_DELETE"
+			" for redo log record " << type << " (page "
+			<< space_id << ":" << page_no << ") at "
+			<< recv_sys->recovered_lsn << ".";
+		return(NULL);
 	} else {
+		/* Parsing a page log record. */
 		page = NULL;
 		page_zip = NULL;
 		ut_d(page_type = FIL_PAGE_TYPE_ALLOCATED);
 	}
 
+	const byte*	old_ptr = ptr;
+
 	switch (type) {
 #ifdef UNIV_LOG_LSN_DEBUG
 	case MLOG_LSN:
@@ -1100,11 +1862,118 @@ recv_parse_or_apply_log_rec_body(
 		break;
 #endif /* UNIV_LOG_LSN_DEBUG */
 	case MLOG_1BYTE: case MLOG_2BYTES: case MLOG_4BYTES: case MLOG_8BYTES:
-		/* Note that crypt data can be set to empty page */
+#ifdef UNIV_DEBUG
+		if (page && page_type == FIL_PAGE_TYPE_ALLOCATED
+		    && end_ptr >= ptr + 2) {
+			/* It is OK to set FIL_PAGE_TYPE and certain
+			list node fields on an empty page.  Any other
+			write is not OK. */
+
+			/* NOTE: There may be bogus assertion failures for
+			dict_hdr_create(), trx_rseg_header_create(),
+			trx_sys_create_doublewrite_buf(), and
+			trx_sysf_create().
+			These are only called during database creation. */
+			ulint	offs = mach_read_from_2(ptr);
+
+			switch (type) {
+			default:
+				ut_error;
+			case MLOG_2BYTES:
+				/* Note that this can fail when the
+				redo log been written with something
+				older than InnoDB Plugin 1.0.4. */
+				ut_ad(offs == FIL_PAGE_TYPE
+				      || offs == IBUF_TREE_SEG_HEADER
+				      + IBUF_HEADER + FSEG_HDR_OFFSET
+				      || offs == PAGE_BTR_IBUF_FREE_LIST
+				      + PAGE_HEADER + FIL_ADDR_BYTE
+				      || offs == PAGE_BTR_IBUF_FREE_LIST
+				      + PAGE_HEADER + FIL_ADDR_BYTE
+				      + FIL_ADDR_SIZE
+				      || offs == PAGE_BTR_SEG_LEAF
+				      + PAGE_HEADER + FSEG_HDR_OFFSET
+				      || offs == PAGE_BTR_SEG_TOP
+				      + PAGE_HEADER + FSEG_HDR_OFFSET
+				      || offs == PAGE_BTR_IBUF_FREE_LIST_NODE
+				      + PAGE_HEADER + FIL_ADDR_BYTE
+				      + 0 /*FLST_PREV*/
+				      || offs == PAGE_BTR_IBUF_FREE_LIST_NODE
+				      + PAGE_HEADER + FIL_ADDR_BYTE
+				      + FIL_ADDR_SIZE /*FLST_NEXT*/);
+				break;
+			case MLOG_4BYTES:
+				/* Note that this can fail when the
+				redo log been written with something
+				older than InnoDB Plugin 1.0.4. */
+				ut_ad(0
+				      || offs == IBUF_TREE_SEG_HEADER
+				      + IBUF_HEADER + FSEG_HDR_SPACE
+				      || offs == IBUF_TREE_SEG_HEADER
+				      + IBUF_HEADER + FSEG_HDR_PAGE_NO
+				      || offs == PAGE_BTR_IBUF_FREE_LIST
+				      + PAGE_HEADER/* flst_init */
+				      || offs == PAGE_BTR_IBUF_FREE_LIST
+				      + PAGE_HEADER + FIL_ADDR_PAGE
+				      || offs == PAGE_BTR_IBUF_FREE_LIST
+				      + PAGE_HEADER + FIL_ADDR_PAGE
+				      + FIL_ADDR_SIZE
+				      || offs == PAGE_BTR_SEG_LEAF
+				      + PAGE_HEADER + FSEG_HDR_PAGE_NO
+				      || offs == PAGE_BTR_SEG_LEAF
+				      + PAGE_HEADER + FSEG_HDR_SPACE
+				      || offs == PAGE_BTR_SEG_TOP
+				      + PAGE_HEADER + FSEG_HDR_PAGE_NO
+				      || offs == PAGE_BTR_SEG_TOP
+				      + PAGE_HEADER + FSEG_HDR_SPACE
+				      || offs == PAGE_BTR_IBUF_FREE_LIST_NODE
+				      + PAGE_HEADER + FIL_ADDR_PAGE
+				      + 0 /*FLST_PREV*/
+				      || offs == PAGE_BTR_IBUF_FREE_LIST_NODE
+				      + PAGE_HEADER + FIL_ADDR_PAGE
+				      + FIL_ADDR_SIZE /*FLST_NEXT*/);
+				break;
+			}
+		}
+#endif /* UNIV_DEBUG */
 		ptr = mlog_parse_nbytes(type, ptr, end_ptr, page, page_zip);
+		if (ptr != NULL && page != NULL
+		    && page_no == 0 && type == MLOG_4BYTES) {
+			ulint	offs = mach_read_from_2(old_ptr);
+			switch (offs) {
+				fil_space_t*	space;
+				ulint		val;
+			default:
+				break;
+			case FSP_HEADER_OFFSET + FSP_SPACE_FLAGS:
+			case FSP_HEADER_OFFSET + FSP_SIZE:
+			case FSP_HEADER_OFFSET + FSP_FREE_LIMIT:
+			case FSP_HEADER_OFFSET + FSP_FREE + FLST_LEN:
+				space = fil_space_get(space_id);
+				ut_a(space != NULL);
+				val = mach_read_from_4(page + offs);
+
+				switch (offs) {
+				case FSP_HEADER_OFFSET + FSP_SPACE_FLAGS:
+					space->flags = val;
+					break;
+				case FSP_HEADER_OFFSET + FSP_SIZE:
+					space->size_in_header = val;
+					break;
+				case FSP_HEADER_OFFSET + FSP_FREE_LIMIT:
+					space->free_limit = val;
+					break;
+				case FSP_HEADER_OFFSET + FSP_FREE + FLST_LEN:
+					space->free_len = val;
+					ut_ad(val == flst_get_len(
+						      page + offs));
+					break;
+				}
+			}
+		}
 		break;
 	case MLOG_REC_INSERT: case MLOG_COMP_REC_INSERT:
-		ut_ad(!page || page_type == FIL_PAGE_INDEX);
+		ut_ad(!page || fil_page_type_is_index(page_type));
 
 		if (NULL != (ptr = mlog_parse_index(
 				     ptr, end_ptr,
@@ -1118,7 +1987,7 @@ recv_parse_or_apply_log_rec_body(
 		}
 		break;
 	case MLOG_REC_CLUST_DELETE_MARK: case MLOG_COMP_REC_CLUST_DELETE_MARK:
-		ut_ad(!page || page_type == FIL_PAGE_INDEX);
+		ut_ad(!page || fil_page_type_is_index(page_type));
 
 		if (NULL != (ptr = mlog_parse_index(
 				     ptr, end_ptr,
@@ -1132,7 +2001,7 @@ recv_parse_or_apply_log_rec_body(
 		}
 		break;
 	case MLOG_COMP_REC_SEC_DELETE_MARK:
-		ut_ad(!page || page_type == FIL_PAGE_INDEX);
+		ut_ad(!page || fil_page_type_is_index(page_type));
 		/* This log record type is obsolete, but we process it for
 		backward compatibility with MySQL 5.0.3 and 5.0.4. */
 		ut_a(!page || page_is_comp(page));
@@ -1143,12 +2012,12 @@ recv_parse_or_apply_log_rec_body(
 		}
 		/* Fall through */
 	case MLOG_REC_SEC_DELETE_MARK:
-		ut_ad(!page || page_type == FIL_PAGE_INDEX);
+		ut_ad(!page || fil_page_type_is_index(page_type));
 		ptr = btr_cur_parse_del_mark_set_sec_rec(ptr, end_ptr,
 							 page, page_zip);
 		break;
 	case MLOG_REC_UPDATE_IN_PLACE: case MLOG_COMP_REC_UPDATE_IN_PLACE:
-		ut_ad(!page || page_type == FIL_PAGE_INDEX);
+		ut_ad(!page || fil_page_type_is_index(page_type));
 
 		if (NULL != (ptr = mlog_parse_index(
 				     ptr, end_ptr,
@@ -1163,7 +2032,7 @@ recv_parse_or_apply_log_rec_body(
 		break;
 	case MLOG_LIST_END_DELETE: case MLOG_COMP_LIST_END_DELETE:
 	case MLOG_LIST_START_DELETE: case MLOG_COMP_LIST_START_DELETE:
-		ut_ad(!page || page_type == FIL_PAGE_INDEX);
+		ut_ad(!page || fil_page_type_is_index(page_type));
 
 		if (NULL != (ptr = mlog_parse_index(
 				     ptr, end_ptr,
@@ -1178,7 +2047,7 @@ recv_parse_or_apply_log_rec_body(
 		}
 		break;
 	case MLOG_LIST_END_COPY_CREATED: case MLOG_COMP_LIST_END_COPY_CREATED:
-		ut_ad(!page || page_type == FIL_PAGE_INDEX);
+		ut_ad(!page || fil_page_type_is_index(page_type));
 
 		if (NULL != (ptr = mlog_parse_index(
 				     ptr, end_ptr,
@@ -1194,7 +2063,7 @@ recv_parse_or_apply_log_rec_body(
 	case MLOG_PAGE_REORGANIZE:
 	case MLOG_COMP_PAGE_REORGANIZE:
 	case MLOG_ZIP_PAGE_REORGANIZE:
-		ut_ad(!page || page_type == FIL_PAGE_INDEX);
+		ut_ad(!page || fil_page_type_is_index(page_type));
 
 		if (NULL != (ptr = mlog_parse_index(
 				     ptr, end_ptr,
@@ -1212,9 +2081,11 @@ recv_parse_or_apply_log_rec_body(
 	case MLOG_PAGE_CREATE: case MLOG_COMP_PAGE_CREATE:
 		/* Allow anything in page_type when creating a page. */
 		ut_a(!page_zip);
-		ptr = page_parse_create(ptr, end_ptr,
-					type == MLOG_COMP_PAGE_CREATE,
-					block, mtr);
+		page_parse_create(block, type == MLOG_COMP_PAGE_CREATE, false);
+		break;
+	case MLOG_PAGE_CREATE_RTREE: case MLOG_COMP_PAGE_CREATE_RTREE:
+		page_parse_create(block, type == MLOG_COMP_PAGE_CREATE_RTREE,
+				  true);
 		break;
 	case MLOG_UNDO_INSERT:
 		ut_ad(!page || page_type == FIL_PAGE_UNDO_LOG);
@@ -1239,7 +2110,7 @@ recv_parse_or_apply_log_rec_body(
 						 page, mtr);
 		break;
 	case MLOG_REC_MIN_MARK: case MLOG_COMP_REC_MIN_MARK:
-		ut_ad(!page || page_type == FIL_PAGE_INDEX);
+		ut_ad(!page || fil_page_type_is_index(page_type));
 		/* On a compressed page, MLOG_COMP_REC_MIN_MARK
 		will be followed by MLOG_COMP_REC_DELETE
 		or MLOG_ZIP_WRITE_HEADER(FIL_PAGE_PREV, FIL_NULL)
@@ -1250,7 +2121,7 @@ recv_parse_or_apply_log_rec_body(
 			page, mtr);
 		break;
 	case MLOG_REC_DELETE: case MLOG_COMP_REC_DELETE:
-		ut_ad(!page || page_type == FIL_PAGE_INDEX);
+		ut_ad(!page || fil_page_type_is_index(page_type));
 
 		if (NULL != (ptr = mlog_parse_index(
 				     ptr, end_ptr,
@@ -1268,43 +2139,25 @@ recv_parse_or_apply_log_rec_body(
 		ptr = ibuf_parse_bitmap_init(ptr, end_ptr, block, mtr);
 		break;
 	case MLOG_INIT_FILE_PAGE:
+	case MLOG_INIT_FILE_PAGE2:
 		/* Allow anything in page_type when creating a page. */
 		ptr = fsp_parse_init_file_page(ptr, end_ptr, block);
 		break;
 	case MLOG_WRITE_STRING:
-		/* Allow setting crypt_data also for empty page */
 		ptr = mlog_parse_string(ptr, end_ptr, page, page_zip);
 		break;
-	case MLOG_FILE_RENAME:
-		/* Do not rerun file-based log entries if this is
-		IO completion from a page read. */
-		if (page == NULL) {
-			ptr = fil_op_log_parse_or_replay(ptr, end_ptr, type,
-							 space_id, 0);
-		}
-		break;
-	case MLOG_FILE_CREATE:
-	case MLOG_FILE_DELETE:
-	case MLOG_FILE_CREATE2:
-		/* Do not rerun file-based log entries if this is
-		IO completion from a page read. */
-		if (page == NULL) {
-			ptr = fil_op_log_parse_or_replay(ptr, end_ptr,
-							 type, 0, 0);
-		}
-		break;
 	case MLOG_ZIP_WRITE_NODE_PTR:
-		ut_ad(!page || page_type == FIL_PAGE_INDEX);
+		ut_ad(!page || fil_page_type_is_index(page_type));
 		ptr = page_zip_parse_write_node_ptr(ptr, end_ptr,
 						    page, page_zip);
 		break;
 	case MLOG_ZIP_WRITE_BLOB_PTR:
-		ut_ad(!page || page_type == FIL_PAGE_INDEX);
+		ut_ad(!page || fil_page_type_is_index(page_type));
 		ptr = page_zip_parse_write_blob_ptr(ptr, end_ptr,
 						    page, page_zip);
 		break;
 	case MLOG_ZIP_WRITE_HEADER:
-		ut_ad(!page || page_type == FIL_PAGE_INDEX);
+		ut_ad(!page || fil_page_type_is_index(page_type));
 		ptr = page_zip_parse_write_header(ptr, end_ptr,
 						  page, page_zip);
 		break;
@@ -1328,7 +2181,9 @@ recv_parse_or_apply_log_rec_body(
 		break;
 	default:
 		ptr = NULL;
-		recv_sys->found_corrupt_log = TRUE;
+		ib::error() << "Incorrect log record type:" << type;
+
+		recv_sys->found_corrupt_log = true;
 	}
 
 	if (index) {
@@ -1344,7 +2199,7 @@ recv_parse_or_apply_log_rec_body(
 /*********************************************************************//**
 Calculates the fold value of a page file address: used in inserting or
 searching for a log record in the hash table.
-@return	folded value */
+@return folded value */
 UNIV_INLINE
 ulint
 recv_fold(
@@ -1358,7 +2213,7 @@ recv_fold(
 /*********************************************************************//**
 Calculates the hash value of a page file address: used in inserting or
 searching for a log record in the hash table.
-@return	folded value */
+@return folded value */
 UNIV_INLINE
 ulint
 recv_hash(
@@ -1371,7 +2226,7 @@ recv_hash(
 
 /*********************************************************************//**
 Gets the hashed file address struct for a page.
-@return	file address struct, NULL if not found from the hash table */
+@return file address struct, NULL if not found from the hash table */
 static
 recv_addr_t*
 recv_get_fil_addr_struct(
@@ -1404,13 +2259,13 @@ static
 void
 recv_add_to_hash_table(
 /*===================*/
-	byte	type,		/*!< in: log record type */
-	ulint	space,		/*!< in: space id */
-	ulint	page_no,	/*!< in: page number */
-	byte*	body,		/*!< in: log record body */
-	byte*	rec_end,	/*!< in: log record end */
-	lsn_t	start_lsn,	/*!< in: start lsn of the mtr */
-	lsn_t	end_lsn)	/*!< in: end lsn of the mtr */
+	mlog_id_t	type,		/*!< in: log record type */
+	ulint		space,		/*!< in: space id */
+	ulint		page_no,	/*!< in: page number */
+	byte*		body,		/*!< in: log record body */
+	byte*		rec_end,	/*!< in: log record end */
+	lsn_t		start_lsn,	/*!< in: start lsn of the mtr */
+	lsn_t		end_lsn)	/*!< in: end lsn of the mtr */
 {
 	recv_t*		recv;
 	ulint		len;
@@ -1418,12 +2273,14 @@ recv_add_to_hash_table(
 	recv_data_t**	prev_field;
 	recv_addr_t*	recv_addr;
 
-	if (fil_tablespace_deleted_or_being_deleted_in_mem(space, -1)) {
-		/* The tablespace does not exist any more: do not store the
-		log record */
-
-		return;
-	}
+	ut_ad(type != MLOG_FILE_DELETE);
+	ut_ad(type != MLOG_FILE_CREATE2);
+	ut_ad(type != MLOG_FILE_RENAME2);
+	ut_ad(type != MLOG_FILE_NAME);
+	ut_ad(type != MLOG_DUMMY_RECORD);
+	ut_ad(type != MLOG_CHECKPOINT);
+	ut_ad(type != MLOG_INDEX_LOAD);
+	ut_ad(type != MLOG_TRUNCATE);
 
 	len = rec_end - body;
 
@@ -1445,7 +2302,7 @@ recv_add_to_hash_table(
 		recv_addr->page_no = page_no;
 		recv_addr->state = RECV_NOT_PROCESSED;
 
-		UT_LIST_INIT(recv_addr->rec_list);
+		UT_LIST_INIT(recv_addr->rec_list, &recv_t::rec_list);
 
 		HASH_INSERT(recv_addr_t, addr_hash, recv_sys->addr_hash,
 			    recv_fold(space, page_no), recv_addr);
@@ -1456,7 +2313,7 @@ recv_add_to_hash_table(
 #endif
 	}
 
-	UT_LIST_ADD_LAST(rec_list, recv_addr->rec_list, recv);
+	UT_LIST_ADD_LAST(recv_addr->rec_list, recv);
 
 	prev_field = &(recv->data);
 
@@ -1524,7 +2381,6 @@ recv_data_copy_to_buf(
 Applies the hashed log records to the page, if the page lsn is less than the
 lsn of a log record. This can be called when a buffer page has just been
 read in, or also for a page already in the buffer pool. */
-UNIV_INTERN
 void
 recv_recover_page_func(
 /*===================*/
@@ -1545,9 +2401,6 @@ recv_recover_page_func(
 	lsn_t		page_lsn;
 	lsn_t		page_newest_lsn;
 	ibool		modification_to_page;
-#ifndef UNIV_HOTBACKUP
-	ibool		success;
-#endif /* !UNIV_HOTBACKUP */
 	mtr_t		mtr;
 
 	mutex_enter(&(recv_sys->mutex));
@@ -1561,22 +2414,26 @@ recv_recover_page_func(
 		return;
 	}
 
-	recv_addr = recv_get_fil_addr_struct(buf_block_get_space(block),
-					     buf_block_get_page_no(block));
+	recv_addr = recv_get_fil_addr_struct(block->page.id.space(),
+					     block->page.id.page_no());
 
 	if ((recv_addr == NULL)
 	    || (recv_addr->state == RECV_BEING_PROCESSED)
 	    || (recv_addr->state == RECV_PROCESSED)) {
+		ut_ad(recv_addr == NULL || recv_needed_recovery);
 
 		mutex_exit(&(recv_sys->mutex));
 
 		return;
 	}
 
-#if 0
-	fprintf(stderr, "Recovering space %lu, page %lu\n",
-		buf_block_get_space(block), buf_block_get_page_no(block));
-#endif
+#ifndef UNIV_HOTBACKUP
+	ut_ad(recv_needed_recovery);
+
+	DBUG_PRINT("ib_log",
+		   ("Applying log to page %u:%u",
+		    recv_addr->space, recv_addr->page_no));
+#endif /* !UNIV_HOTBACKUP */
 
 	recv_addr->state = RECV_BEING_PROCESSED;
 
@@ -1598,10 +2455,9 @@ recv_recover_page_func(
 		rw_lock_x_lock_move_ownership(&block->lock);
 	}
 
-	success = buf_page_get_known_nowait(RW_X_LATCH, block,
-					    BUF_KEEP_OLD,
-					    __FILE__, __LINE__,
-					    &mtr);
+	ibool	success = buf_page_get_known_nowait(
+		RW_X_LATCH, block, BUF_KEEP_OLD,
+		__FILE__, __LINE__, &mtr);
 	ut_a(success);
 
 	buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
@@ -1633,11 +2489,14 @@ recv_recover_page_func(
 	while (recv) {
 		end_lsn = recv->end_lsn;
 
+		ut_ad(end_lsn
+		      <= UT_LIST_GET_FIRST(log_sys->log_groups)->scanned_lsn);
+
 		if (recv->len > RECV_DATA_BLOCK_SIZE) {
 			/* We have to copy the record body to a separate
 			buffer */
 
-			buf = static_cast<byte*>(mem_alloc(recv->len));
+			buf = static_cast<byte*>(ut_malloc_nokey(recv->len));
 
 			recv_data_copy_to_buf(buf, recv);
 		} else {
@@ -1656,7 +2515,33 @@ recv_recover_page_func(
 			}
 		}
 
-		if (recv->start_lsn >= page_lsn) {
+		/* If per-table tablespace was truncated and there exist REDO
+		records before truncate that are to be applied as part of
+		recovery (checkpoint didn't happen since truncate was done)
+		skip such records using lsn check as they may not stand valid
+		post truncate.
+		LSN at start of truncate is recorded and any redo record
+		with LSN less than recorded LSN is skipped.
+		Note: We can't skip complete recv_addr as same page may have
+		valid REDO records post truncate those needs to be applied. */
+		bool	skip_recv = false;
+		if (srv_was_tablespace_truncated(fil_space_get(recv_addr->space))) {
+			lsn_t	init_lsn =
+				truncate_t::get_truncated_tablespace_init_lsn(
+				recv_addr->space);
+			skip_recv = (recv->start_lsn < init_lsn);
+		}
+
+		/* Ignore applying the redo logs for tablespace that is
+		truncated. Post recovery there is fixup action that will
+		restore the tablespace back to normal state.
+		Applying redo at this stage can result in error given that
+		redo will have action recorded on page before tablespace
+		was re-inited and that would lead to an error while applying
+		such action. */
+		if (recv->start_lsn >= page_lsn
+		    && !srv_is_tablespace_truncated(recv_addr->space)
+		    && !skip_recv) {
 
 			lsn_t	end_lsn;
 
@@ -1667,17 +2552,17 @@ recv_recover_page_func(
 			}
 
 			DBUG_PRINT("ib_log",
-				   ("apply " DBUG_LSN_PF ": %u len %u "
-				    "page %u:%u", recv->start_lsn,
-				    (unsigned) recv->type,
-				    (unsigned) recv->len,
-				    (unsigned) recv_addr->space,
-				    (unsigned) recv_addr->page_no));
+				   ("apply " LSN_PF ":"
+				    " %s len " ULINTPF " page %u:%u",
+				    recv->start_lsn,
+				    get_mlog_string(recv->type), recv->len,
+				    recv_addr->space,
+				    recv_addr->page_no));
 
-			recv_parse_or_apply_log_rec_body(recv->type, buf,
-							 buf + recv->len,
-							 block, &mtr,
-							 recv_addr->space);
+			recv_parse_or_apply_log_rec_body(
+				recv->type, buf, buf + recv->len,
+				recv_addr->space, recv_addr->page_no,
+				true, block, &mtr);
 
 			end_lsn = recv->start_lsn + recv->len;
 			mach_write_to_8(FIL_PAGE_LSN + page, end_lsn);
@@ -1692,14 +2577,14 @@ recv_recover_page_func(
 		}
 
 		if (recv->len > RECV_DATA_BLOCK_SIZE) {
-			mem_free(buf);
+			ut_free(buf);
 		}
 
 		recv = UT_LIST_GET_NEXT(rec_list, recv);
 	}
 
 #ifdef UNIV_ZIP_DEBUG
-	if (fil_page_get_type(page) == FIL_PAGE_INDEX) {
+	if (fil_page_index_page_check(page)) {
 		page_zip_des_t*	page_zip = buf_block_get_page_zip(block);
 
 		ut_a(!page_zip
@@ -1715,12 +2600,14 @@ recv_recover_page_func(
 		buf_flush_recv_note_modification(block, start_lsn, end_lsn);
 		log_flush_order_mutex_exit();
 	}
+#else /* !UNIV_HOTBACKUP */
+	start_lsn = start_lsn; /* Silence compiler */
 #endif /* !UNIV_HOTBACKUP */
 
 	/* Make sure that committing mtr does not change the modification
 	lsn values of page */
 
-	mtr.modifications = FALSE;
+	mtr.discard_modifications();
 
 	mtr_commit(&mtr);
 
@@ -1740,32 +2627,34 @@ recv_recover_page_func(
 }
 
 #ifndef UNIV_HOTBACKUP
-/*******************************************************************//**
-Reads in pages which have hashed log records, from an area around a given
+/** Reads in pages which have hashed log records, from an area around a given
 page number.
-@return	number of pages found */
+@param[in]	page_id	page id
+@return number of pages found */
 static
 ulint
 recv_read_in_area(
-/*==============*/
-	ulint	space,	/*!< in: space */
-	ulint	zip_size,/*!< in: compressed page size in bytes, or 0 */
-	ulint	page_no)/*!< in: page number */
+	const page_id_t&	page_id)
 {
 	recv_addr_t* recv_addr;
 	ulint	page_nos[RECV_READ_AHEAD_AREA];
 	ulint	low_limit;
 	ulint	n;
 
-	low_limit = page_no - (page_no % RECV_READ_AHEAD_AREA);
+	low_limit = page_id.page_no()
+		- (page_id.page_no() % RECV_READ_AHEAD_AREA);
 
 	n = 0;
 
-	for (page_no = low_limit; page_no < low_limit + RECV_READ_AHEAD_AREA;
+	for (ulint page_no = low_limit;
+	     page_no < low_limit + RECV_READ_AHEAD_AREA;
 	     page_no++) {
-		recv_addr = recv_get_fil_addr_struct(space, page_no);
 
-		if (recv_addr && !buf_page_peek(space, page_no)) {
+		recv_addr = recv_get_fil_addr_struct(page_id.space(), page_no);
+
+		const page_id_t	cur_page_id(page_id.space(), page_no);
+
+		if (recv_addr && !buf_page_peek(cur_page_id)) {
 
 			mutex_enter(&(recv_sys->mutex));
 
@@ -1781,7 +2670,7 @@ recv_read_in_area(
 		}
 	}
 
-	buf_read_recv_pages(FALSE, space, zip_size, page_nos, n);
+	buf_read_recv_pages(FALSE, page_id.space(), page_nos, n);
 	/*
 	fprintf(stderr, "Recv pages at %lu n %lu\n", page_nos[0], n);
 	*/
@@ -1792,7 +2681,6 @@ recv_read_in_area(
 Empties the hash table of stored log records, applying them to appropriate
 pages.
 @return DB_SUCCESS when successfull or DB_ERROR when fails. */
-UNIV_INTERN
 dberr_t
 recv_apply_hashed_log_recs(
 /*=======================*/
@@ -1809,8 +2697,8 @@ recv_apply_hashed_log_recs(
 	recv_addr_t* recv_addr;
 	ulint	i;
 	ibool	has_printed	= FALSE;
-	ulong progress;
 	mtr_t	mtr;
+	ulint	progress = 0;
 	dberr_t err = DB_SUCCESS;
 loop:
 	mutex_enter(&(recv_sys->mutex));
@@ -1824,10 +2712,10 @@ loop:
 		goto loop;
 	}
 
-	ut_ad((!allow_ibuf) == mutex_own(&log_sys->mutex));
+	ut_ad(!allow_ibuf == log_mutex_own());
 
 	if (!allow_ibuf) {
-		recv_no_ibuf_operations = TRUE;
+		recv_no_ibuf_operations = true;
 	}
 
 	recv_sys->apply_log_recs = TRUE;
@@ -1841,16 +2729,35 @@ loop:
 		     recv_addr = static_cast<recv_addr_t*>(
 				HASH_GET_NEXT(addr_hash, recv_addr))) {
 
-			ulint	space = recv_addr->space;
-			ulint	zip_size = fil_space_get_zip_size(space);
-			ulint	page_no = recv_addr->page_no;
+			if (srv_is_tablespace_truncated(recv_addr->space)) {
+				/* Avoid applying REDO log for the tablespace
+				that is schedule for TRUNCATE. */
+				ut_a(recv_sys->n_addrs);
+				recv_addr->state = RECV_DISCARDED;
+				recv_sys->n_addrs--;
+				continue;
+			}
+
+			if (recv_addr->state == RECV_DISCARDED) {
+				ut_a(recv_sys->n_addrs);
+				recv_sys->n_addrs--;
+				continue;
+			}
+
+			const page_id_t		page_id(recv_addr->space,
+							recv_addr->page_no);
+			bool			found;
+			const page_size_t&	page_size
+				= fil_space_get_page_size(recv_addr->space,
+							  &found);
+
+			ut_ad(found);
 
 			if (recv_addr->state == RECV_NOT_PROCESSED) {
 				if (!has_printed) {
-					ib_logf(IB_LOG_LEVEL_INFO,
-						"Starting an apply batch"
+					ib::info() << "Starting an apply batch"
 						" of log records"
-						" to the database...");
+						" to the database...";
 					fputs("InnoDB: Progress in percent: ",
 					      stderr);
 					has_printed = TRUE;
@@ -1858,32 +2765,33 @@ loop:
 
 				mutex_exit(&(recv_sys->mutex));
 
-				if (buf_page_peek(space, page_no)) {
+				if (buf_page_peek(page_id)) {
 					buf_block_t*	block;
 
 					mtr_start(&mtr);
 
 					block = buf_page_get(
-						space, zip_size, page_no,
+						page_id, page_size,
 						RW_X_LATCH, &mtr);
+
 					buf_block_dbg_add_level(
 						block, SYNC_NO_ORDER_CHECK);
 
 					recv_recover_page(FALSE, block);
 					mtr_commit(&mtr);
 				} else {
-					recv_read_in_area(space, zip_size,
-							  page_no);
+					recv_read_in_area(page_id);
 				}
 
 				mutex_enter(&(recv_sys->mutex));
 			}
 		}
 
-		progress = (ulong) (i * 100)
-                                   / hash_get_n_cells(recv_sys->addr_hash);
+		progress = (ulint) ((i * 100) / hash_get_n_cells(recv_sys->addr_hash));
+
 		if (has_printed
 		    && progress
+		    && (i * 100) / hash_get_n_cells(recv_sys->addr_hash)
 		    != ((i + 1) * 100)
 		    / hash_get_n_cells(recv_sys->addr_hash)) {
 
@@ -1910,14 +2818,13 @@ loop:
 	}
 
 	if (!allow_ibuf) {
-		bool	success;
 
 		/* Flush all the file pages to disk and invalidate them in
 		the buffer pool */
 
-		ut_d(recv_no_log_write = TRUE);
+		ut_d(recv_no_log_write = true);
 		mutex_exit(&(recv_sys->mutex));
-		mutex_exit(&(log_sys->mutex));
+		log_mutex_exit();
 
 		/* Stop the recv_writer thread from issuing any LRU
 		flush batches. */
@@ -1926,31 +2833,30 @@ loop:
 		/* Wait for any currently run batch to end. */
 		buf_flush_wait_LRU_batch_end();
 
-		success = buf_flush_list(ULINT_MAX, LSN_MAX, NULL);
-
-		ut_a(success);
-
-		buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
+		os_event_reset(recv_sys->flush_end);
+		recv_sys->flush_type = BUF_FLUSH_LIST;
+		os_event_set(recv_sys->flush_start);
+		os_event_wait(recv_sys->flush_end);
 
 		buf_pool_invalidate();
 
 		/* Allow batches from recv_writer thread. */
 		mutex_exit(&recv_sys->writer_mutex);
 
-		mutex_enter(&(log_sys->mutex));
+		log_mutex_enter();
 		mutex_enter(&(recv_sys->mutex));
-		ut_d(recv_no_log_write = FALSE);
+		ut_d(recv_no_log_write = false);
 
-		recv_no_ibuf_operations = FALSE;
+		recv_no_ibuf_operations = false;
 	}
 
 	recv_sys->apply_log_recs = FALSE;
 	recv_sys->apply_batch_on = FALSE;
 
-	err = recv_sys_empty_hash();
+	recv_sys_empty_hash();
 
 	if (has_printed) {
-		fprintf(stderr, "InnoDB: Apply batch completed\n");
+		ib::info() << "Apply batch completed";
 		sd_notify(0, "STATUS=InnoDB: Apply batch completed");
 	}
 
@@ -1961,7 +2867,6 @@ loop:
 #else /* !UNIV_HOTBACKUP */
 /*******************************************************************//**
 Applies log records in the hash table to a backup. */
-UNIV_INTERN
 void
 recv_apply_log_recs_for_backup(void)
 /*================================*/
@@ -1969,18 +2874,18 @@ recv_apply_log_recs_for_backup(void)
 	recv_addr_t*	recv_addr;
 	ulint		n_hash_cells;
 	buf_block_t*	block;
-	ulint		actual_size;
-	ibool		success;
+	bool		success;
 	ulint		error;
 	ulint		i;
-
+	fil_space_t*	space = NULL;
+	page_id_t	page_id;
 	recv_sys->apply_log_recs = TRUE;
 	recv_sys->apply_batch_on = TRUE;
 
 	block = back_block1;
 
-	ib_logf(IB_LOG_LEVEL_INFO,
-		"Starting an apply batch of log records to the database...");
+	ib::info() << "Starting an apply batch of log records to the"
+		" database...\n";
 
 	fputs("InnoDB: Progress in percent: ", stderr);
 
@@ -1988,14 +2893,22 @@ recv_apply_log_recs_for_backup(void)
 
 	for (i = 0; i < n_hash_cells; i++) {
 		/* The address hash table is externally chained */
-		recv_addr = hash_get_nth_cell(recv_sys->addr_hash, i)->node;
+		recv_addr = static_cast<recv_addr_t*>(hash_get_nth_cell(
+					recv_sys->addr_hash, i)->node);
 
 		while (recv_addr != NULL) {
 
-			ulint	zip_size
-				= fil_space_get_zip_size(recv_addr->space);
+			ib::trace() << "recv_addr {State: " << recv_addr->state
+				<< ", Space id: " << recv_addr->space
+				<< "Page no: " << recv_addr->page_no
+				<< ". index i: " << i << "\n";
 
-			if (zip_size == ULINT_UNDEFINED) {
+			bool			found;
+			const page_size_t&	page_size
+				= fil_space_get_page_size(recv_addr->space,
+							  &found);
+
+			if (!found) {
 #if 0
 				fprintf(stderr,
 					"InnoDB: Warning: cannot apply"
@@ -2005,7 +2918,7 @@ recv_apply_log_recs_for_backup(void)
 					" that id does not exist.\n",
 					recv_addr->space, recv_addr->page_no);
 #endif
-				recv_addr->state = RECV_PROCESSED;
+				recv_addr->state = RECV_DISCARDED;
 
 				ut_a(recv_sys->n_addrs);
 				recv_sys->n_addrs--;
@@ -2018,55 +2931,55 @@ recv_apply_log_recs_for_backup(void)
 			the block. */
 
 			buf_page_init_for_backup_restore(
-				recv_addr->space, recv_addr->page_no,
-				zip_size, block);
+				page_id_t(recv_addr->space, recv_addr->page_no),
+				page_size, block);
 
 			/* Extend the tablespace's last file if the page_no
 			does not fall inside its bounds; we assume the last
 			file is auto-extending, and mysqlbackup copied the file
 			when it still was smaller */
+			fil_space_t*	space
+				= fil_space_get(recv_addr->space);
 
-			success = fil_extend_space_to_desired_size(
-				&actual_size,
-				recv_addr->space, recv_addr->page_no + 1);
+			success = fil_space_extend(
+				space, recv_addr->page_no + 1);
 			if (!success) {
-				fprintf(stderr,
-					"InnoDB: Fatal error: cannot extend"
-					" tablespace %u to hold %u pages\n",
-					recv_addr->space, recv_addr->page_no);
-
-				exit(1);
+				ib::fatal() << "Cannot extend tablespace "
+					<< recv_addr->space << " to hold "
+					<< recv_addr->page_no << " pages";
 			}
 
 			/* Read the page from the tablespace file using the
 			fil0fil.cc routines */
 
-			if (zip_size) {
-				error = fil_io(OS_FILE_READ, true,
-					       recv_addr->space, zip_size,
-					       recv_addr->page_no, 0, zip_size,
-					       block->page.zip.data, NULL, 0);
+			const page_id_t	page_id(recv_addr->space,
+						recv_addr->page_no);
+
+			if (page_size.is_compressed()) {
+
+				error = fil_io(
+					IORequestRead, true,
+					page_id,
+					page_size, 0, page_size.physical(),
+					block->page.zip.data, NULL);
+
 				if (error == DB_SUCCESS
 				    && !buf_zip_decompress(block, TRUE)) {
-					exit(1);
+					ut_error;
 				}
 			} else {
-				error = fil_io(OS_FILE_READ, true,
-					       recv_addr->space, 0,
-					       recv_addr->page_no, 0,
-					       UNIV_PAGE_SIZE,
-					       block->frame, NULL, 0);
+
+				error = fil_io(
+					IORequestRead, true,
+					page_id, page_size, 0,
+					page_size.logical(),
+					block->frame, NULL);
 			}
 
 			if (error != DB_SUCCESS) {
-				fprintf(stderr,
-					"InnoDB: Fatal error: cannot read"
-					" from tablespace"
-					" %lu page number %lu\n",
-					(ulong) recv_addr->space,
-					(ulong) recv_addr->page_no);
-
-				exit(1);
+				ib::fatal() << "Cannot read from tablespace "
+					<< recv_addr->space << " page number "
+					<< recv_addr->page_no;
 			}
 
 			/* Apply the log records to this page */
@@ -2076,78 +2989,115 @@ recv_apply_log_recs_for_backup(void)
 			fil0fil.cc routines */
 
 			buf_flush_init_for_writing(
-				block->frame, buf_block_get_page_zip(block),
-				mach_read_from_8(block->frame + FIL_PAGE_LSN));
+				block, block->frame,
+				buf_block_get_page_zip(block),
+				mach_read_from_8(block->frame + FIL_PAGE_LSN),
+				fsp_is_checksum_disabled(
+					block->page.id.space()));
 
-			if (zip_size) {
-				error = fil_io(OS_FILE_WRITE, true,
-					       recv_addr->space, zip_size,
-					       recv_addr->page_no, 0,
-					       zip_size,
-					       block->page.zip.data, NULL, 0);
+			if (page_size.is_compressed()) {
+
+				error = fil_io(
+					IORequestWrite, true, page_id,
+					page_size, 0, page_size.physical(),
+					block->page.zip.data, NULL);
 			} else {
-				error = fil_io(OS_FILE_WRITE, true,
-					       recv_addr->space, 0,
-					       recv_addr->page_no, 0,
-					       UNIV_PAGE_SIZE,
-					       block->frame, NULL, 0);
+				error = fil_io(
+					IORequestWrite, true, page_id,
+					page_size, 0, page_size.logical(),
+					block->frame, NULL);
 			}
 skip_this_recv_addr:
-			recv_addr = HASH_GET_NEXT(addr_hash, recv_addr);
+			recv_addr = static_cast<recv_addr_t*>(HASH_GET_NEXT(
+					addr_hash, recv_addr));
 		}
 
 		if ((100 * i) / n_hash_cells
 		    != (100 * (i + 1)) / n_hash_cells) {
 			fprintf(stderr, "%lu ",
-				(ulong) ((100 * i) / n_hash_cells));
+				(ulint) ((100 * i) / n_hash_cells));
 			fflush(stderr);
 			sd_notifyf(0, "STATUS=Applying batch of log records for"
 				   " backup InnoDB: Progress %lu",
-				   (ulong) (100 * i) / n_hash_cells);
+				   (ulint) (100 * i) / n_hash_cells);
 		}
 	}
+
 	sd_notify(0, "STATUS=InnoDB: Apply batch for backup completed");
 
+	/* write logs in next line */
+	fprintf(stderr, "\n");
+	recv_sys->apply_log_recs = FALSE;
+	recv_sys->apply_batch_on = FALSE;
 	recv_sys_empty_hash();
 }
 #endif /* !UNIV_HOTBACKUP */
 
-/*******************************************************************//**
-Tries to parse a single log record and returns its length.
-@return	length of the record, or 0 if the record was not complete */
+/** Tries to parse a single log record.
+@param[out]	type		log record type
+@param[in]	ptr		pointer to a buffer
+@param[in]	end_ptr		end of the buffer
+@param[out]	space_id	tablespace identifier
+@param[out]	page_no		page number
+@param[in]	apply		whether to apply MLOG_FILE_* records
+@param[out]	body		start of log record body
+@return length of the record, or 0 if the record was not complete */
 static
 ulint
 recv_parse_log_rec(
-/*===============*/
-	byte*	ptr,	/*!< in: pointer to a buffer */
-	byte*	end_ptr,/*!< in: pointer to the buffer end */
-	byte*	type,	/*!< out: type */
-	ulint*	space,	/*!< out: space id */
-	ulint*	page_no,/*!< out: page number */
-	byte**	body)	/*!< out: log record body start */
+	mlog_id_t*	type,
+	byte*		ptr,
+	byte*		end_ptr,
+	ulint*		space,
+	ulint*		page_no,
+	bool		apply,
+	byte**		body)
 {
 	byte*	new_ptr;
 
 	*body = NULL;
 
+	UNIV_MEM_INVALID(type, sizeof *type);
+	UNIV_MEM_INVALID(space, sizeof *space);
+	UNIV_MEM_INVALID(page_no, sizeof *page_no);
+	UNIV_MEM_INVALID(body, sizeof *body);
+
 	if (ptr == end_ptr) {
 
 		return(0);
 	}
 
-	if (*ptr == MLOG_MULTI_REC_END) {
-
-		*type = *ptr;
-
-		return(1);
-	}
-
-	if (*ptr == MLOG_DUMMY_RECORD) {
-		*type = *ptr;
-
-		*space = ULINT_UNDEFINED - 1; /* For debugging */
+	switch (*ptr) {
+#ifdef UNIV_LOG_LSN_DEBUG
+	case MLOG_LSN | MLOG_SINGLE_REC_FLAG:
+	case MLOG_LSN:
+		new_ptr = mlog_parse_initial_log_record(
+			ptr, end_ptr, type, space, page_no);
+		if (new_ptr != NULL) {
+			const lsn_t	lsn = static_cast<lsn_t>(
+				*space) << 32 | *page_no;
+			ut_a(lsn == recv_sys->recovered_lsn);
+		}
 
+		*type = MLOG_LSN;
+		return(new_ptr - ptr);
+#endif /* UNIV_LOG_LSN_DEBUG */
+	case MLOG_MULTI_REC_END:
+	case MLOG_DUMMY_RECORD:
+		*type = static_cast<mlog_id_t>(*ptr);
 		return(1);
+	case MLOG_CHECKPOINT:
+		if (end_ptr < ptr + SIZE_OF_MLOG_CHECKPOINT) {
+			return(0);
+		}
+		*type = static_cast<mlog_id_t>(*ptr);
+		return(SIZE_OF_MLOG_CHECKPOINT);
+	case MLOG_MULTI_REC_END | MLOG_SINGLE_REC_FLAG:
+	case MLOG_DUMMY_RECORD | MLOG_SINGLE_REC_FLAG:
+	case MLOG_CHECKPOINT | MLOG_SINGLE_REC_FLAG:
+		ib::error() << "Incorrect log record type:" << *ptr;
+		recv_sys->found_corrupt_log = true;
+		return(0);
 	}
 
 	new_ptr = mlog_parse_initial_log_record(ptr, end_ptr, type, space,
@@ -2159,28 +3109,14 @@ recv_parse_log_rec(
 		return(0);
 	}
 
-#ifdef UNIV_LOG_LSN_DEBUG
-	if (*type == MLOG_LSN) {
-		lsn_t	lsn = (lsn_t) *space << 32 | *page_no;
-# ifdef UNIV_LOG_DEBUG
-		ut_a(lsn == log_sys->old_lsn);
-# else /* UNIV_LOG_DEBUG */
-		ut_a(lsn == recv_sys->recovered_lsn);
-# endif /* UNIV_LOG_DEBUG */
-	}
-#endif /* UNIV_LOG_LSN_DEBUG */
+	new_ptr = recv_parse_or_apply_log_rec_body(
+		*type, new_ptr, end_ptr, *space, *page_no, apply, NULL, NULL);
 
-	new_ptr = recv_parse_or_apply_log_rec_body(*type, new_ptr, end_ptr,
-						   NULL, NULL, *space);
 	if (UNIV_UNLIKELY(new_ptr == NULL)) {
 
 		return(0);
 	}
 
-	if (*page_no > recv_max_parsed_page_no) {
-		recv_max_parsed_page_no = *page_no;
-	}
-
 	return(new_ptr - ptr);
 }
 
@@ -2209,120 +3145,102 @@ recv_calc_lsn_on_data_add(
 	return(lsn + lsn_len);
 }
 
-#ifdef UNIV_LOG_DEBUG
-/*******************************************************//**
-Checks that the parser recognizes incomplete initial segments of a log
-record as incomplete. */
+/** Prints diagnostic info of corrupt log.
+@param[in]	ptr	pointer to corrupt log record
+@param[in]	type	type of the log record (could be garbage)
+@param[in]	space	tablespace ID (could be garbage)
+@param[in]	page_no	page number (could be garbage)
+@return whether processing should continue */
 static
-void
-recv_check_incomplete_log_recs(
-/*===========================*/
-	byte*	ptr,	/*!< in: pointer to a complete log record */
-	ulint	len)	/*!< in: length of the log record */
-{
-	ulint	i;
-	byte	type;
-	ulint	space;
-	ulint	page_no;
-	byte*	body;
-
-	for (i = 0; i < len; i++) {
-		ut_a(0 == recv_parse_log_rec(ptr, ptr + i, &type, &space,
-					     &page_no, &body));
-	}
-}
-#endif /* UNIV_LOG_DEBUG */
-
-/*******************************************************//**
-Prints diagnostic info of corrupt log. */
-static
-void
+bool
 recv_report_corrupt_log(
-/*====================*/
-	byte*	ptr,	/*!< in: pointer to corrupt log record */
-	byte	type,	/*!< in: type of the record */
-	ulint	space,	/*!< in: space id, this may also be garbage */
-	ulint	page_no)/*!< in: page number, this may also be garbage */
+	const byte*	ptr,
+	int		type,
+	ulint		space,
+	ulint		page_no)
 {
-	fprintf(stderr,
-		"InnoDB: ############### CORRUPT LOG RECORD FOUND\n"
-		"InnoDB: Log record type %lu, space id %lu, page number %lu\n"
-		"InnoDB: Log parsing proceeded successfully up to " LSN_PF "\n"
-		"InnoDB: Previous log record type %lu, is multi %lu\n"
-		"InnoDB: Recv offset %lu, prev %lu\n",
-		(ulong) type, (ulong) space, (ulong) page_no,
-		recv_sys->recovered_lsn,
-		(ulong) recv_previous_parsed_rec_type,
-		(ulong) recv_previous_parsed_rec_is_multi,
-		(ulong) (ptr - recv_sys->buf),
-		(ulong) recv_previous_parsed_rec_offset);
+	ib::error() <<
+		"############### CORRUPT LOG RECORD FOUND ##################";
 
-	if ((ulint)(ptr - recv_sys->buf + 100)
-	    > recv_previous_parsed_rec_offset
-	    && (ulint)(ptr - recv_sys->buf + 100
-		       - recv_previous_parsed_rec_offset)
-	    < 200000) {
-		fputs("InnoDB: Hex dump of corrupt log starting"
-		      " 100 bytes before the start\n"
-		      "InnoDB: of the previous log rec,\n"
-		      "InnoDB: and ending 100 bytes after the start"
-		      " of the corrupt rec:\n",
-		      stderr);
+	ib::info() << "Log record type " << type << ", page " << space << ":"
+		<< page_no << ". Log parsing proceeded successfully up to "
+		<< recv_sys->recovered_lsn << ". Previous log record type "
+		<< recv_previous_parsed_rec_type << ", is multi "
+		<< recv_previous_parsed_rec_is_multi << " Recv offset "
+		<< (ptr - recv_sys->buf) << ", prev "
+		<< recv_previous_parsed_rec_offset;
 
-		ut_print_buf(stderr,
-			     recv_sys->buf
-			     + recv_previous_parsed_rec_offset - 100,
-			     ptr - recv_sys->buf + 200
-			     - recv_previous_parsed_rec_offset);
-		putc('\n', stderr);
-	}
+	ut_ad(ptr <= recv_sys->buf + recv_sys->len);
+
+	const ulint	limit	= 100;
+	const ulint	before
+		= std::min(recv_previous_parsed_rec_offset, limit);
+	const ulint	after
+		= std::min(recv_sys->len - (ptr - recv_sys->buf), limit);
+
+	ib::info() << "Hex dump starting " << before << " bytes before and"
+		" ending " << after << " bytes after the corrupted record:";
+
+	ut_print_buf(stderr,
+		     recv_sys->buf
+		     + recv_previous_parsed_rec_offset - before,
+		     ptr - recv_sys->buf + before + after
+		     - recv_previous_parsed_rec_offset);
+	putc('\n', stderr);
 
 #ifndef UNIV_HOTBACKUP
 	if (!srv_force_recovery) {
-		fputs("InnoDB: Set innodb_force_recovery"
-		      " to ignore this error.\n", stderr);
+		ib::info() << "Set innodb_force_recovery to ignore this error.";
+		return(false);
 	}
 #endif /* !UNIV_HOTBACKUP */
 
-	fputs("InnoDB: WARNING: the log file may have been corrupt and it\n"
-	      "InnoDB: is possible that the log scan did not proceed\n"
-	      "InnoDB: far enough in recovery! Please run CHECK TABLE\n"
-	      "InnoDB: on your InnoDB tables to check that they are ok!\n"
-	      "InnoDB: If mysqld crashes after this recovery, look at\n"
-	      "InnoDB: " REFMAN "forcing-innodb-recovery.html\n"
-	      "InnoDB: about forcing recovery.\n", stderr);
-
-	fflush(stderr);
+	ib::warn() << "The log file may have been corrupt and it is possible"
+		" that the log scan did not proceed far enough in recovery!"
+		" Please run CHECK TABLE on your InnoDB tables to check"
+		" that they are ok! If mysqld crashes after this recovery; "
+		<< FORCE_RECOVERY_MSG;
+	return(true);
 }
 
-/*******************************************************//**
-Parses log records from a buffer and stores them to a hash table to wait
-merging to file pages.
-@return	currently always returns FALSE */
-static
-ibool
-recv_parse_log_recs(
-/*================*/
-	ibool	store_to_hash,	/*!< in: TRUE if the records should be stored
-				to the hash table; this is set to FALSE if just
-				debug checking is needed */
-	dberr_t* err)		/*!< out: DB_SUCCESS if successfull,
-				DB_ERROR if parsing fails. */
-{
-	byte*	ptr;
-	byte*	end_ptr;
-	ulint	single_rec;
-	ulint	len;
-	ulint	total_len;
-	lsn_t	new_recovered_lsn;
-	lsn_t	old_lsn;
-	byte	type;
-	ulint	space;
-	ulint	page_no;
-	byte*	body;
-	ulint	n_recs;
+/** Whether to store redo log records to the hash table */
+enum store_t {
+	/** Do not store redo log records. */
+	STORE_NO,
+	/** Store redo log records. */
+	STORE_YES,
+	/** Store redo log records if the tablespace exists. */
+	STORE_IF_EXISTS
+};
 
-	ut_ad(mutex_own(&(log_sys->mutex)));
+/** Parse log records from a buffer and optionally store them to a
+hash table to wait merging to file pages.
+@param[in]	checkpoint_lsn	the LSN of the latest checkpoint
+@param[in]	store		whether to store page operations
+@param[in]	apply		whether to apply the records
+@param[out]	err		DB_SUCCESS or error code
+@return whether MLOG_CHECKPOINT record was seen the first time,
+or corruption was noticed */
+static MY_ATTRIBUTE((warn_unused_result))
+bool
+recv_parse_log_recs(
+	lsn_t		checkpoint_lsn,
+	store_t		store,
+	bool		apply,
+	dberr_t*	err)
+{
+	byte*		ptr;
+	byte*		end_ptr;
+	bool		single_rec;
+	ulint		len;
+	lsn_t		new_recovered_lsn;
+	lsn_t		old_lsn;
+	mlog_id_t	type;
+	ulint		space;
+	ulint		page_no;
+	byte*		body;
+
+	ut_ad(log_mutex_own());
 	ut_ad(recv_sys->parse_start_lsn != 0);
 loop:
 	ptr = recv_sys->buf + recv_sys->recovered_offset;
@@ -2331,30 +3249,44 @@ loop:
 
 	if (ptr == end_ptr) {
 
-		return(FALSE);
+		return(false);
 	}
 
-	single_rec = (ulint)*ptr & MLOG_SINGLE_REC_FLAG;
+	switch (*ptr) {
+	case MLOG_CHECKPOINT:
+#ifdef UNIV_LOG_LSN_DEBUG
+	case MLOG_LSN:
+#endif /* UNIV_LOG_LSN_DEBUG */
+	case MLOG_DUMMY_RECORD:
+		single_rec = true;
+		break;
+	default:
+		single_rec = !!(*ptr & MLOG_SINGLE_REC_FLAG);
+	}
 
-	if (single_rec || *ptr == MLOG_DUMMY_RECORD) {
-		/* The mtr only modified a single page, or this is a file op */
+	if (single_rec) {
+		/* The mtr did not modify multiple pages */
 
 		old_lsn = recv_sys->recovered_lsn;
 
 		/* Try to parse a log record, fetching its type, space id,
 		page no, and a pointer to the body of the log record */
 
-		len = recv_parse_log_rec(ptr, end_ptr, &type, &space,
-					 &page_no, &body);
+		len = recv_parse_log_rec(&type, ptr, end_ptr, &space,
+					 &page_no, apply, &body);
 
-		if (len == 0 || recv_sys->found_corrupt_log) {
-			if (recv_sys->found_corrupt_log) {
+		if (len == 0) {
+			return(false);
+		}
 
-				recv_report_corrupt_log(ptr,
-							type, space, page_no);
-			}
+		if (recv_sys->found_corrupt_log) {
+			recv_report_corrupt_log(
+				ptr, type, space, page_no);
+			return(true);
+		}
 
-			return(FALSE);
+		if (recv_sys->found_corrupt_fs) {
+			return(true);
 		}
 
 		new_recovered_lsn = recv_calc_lsn_on_data_add(old_lsn, len);
@@ -2364,126 +3296,151 @@ loop:
 			that also the next log block should have been scanned
 			in */
 
-			return(FALSE);
+			return(false);
 		}
 
-		recv_previous_parsed_rec_type = (ulint) type;
+		recv_previous_parsed_rec_type = type;
 		recv_previous_parsed_rec_offset = recv_sys->recovered_offset;
 		recv_previous_parsed_rec_is_multi = 0;
 
 		recv_sys->recovered_offset += len;
 		recv_sys->recovered_lsn = new_recovered_lsn;
 
-		DBUG_PRINT("ib_log",
-			   ("scan " DBUG_LSN_PF ": log rec %u len %u "
-			    "page %u:%u", old_lsn,
-			    (unsigned) type, (unsigned) len,
-			    (unsigned) space, (unsigned) page_no));
-
-		if (type == MLOG_DUMMY_RECORD) {
+		switch (type) {
+			lsn_t	lsn;
+		case MLOG_DUMMY_RECORD:
 			/* Do nothing */
-
-		} else if (!store_to_hash) {
-			/* In debug checking, update a replicate page
-			according to the log record, and check that it
-			becomes identical with the original page */
-#ifdef UNIV_LOG_DEBUG
-			recv_check_incomplete_log_recs(ptr, len);
-#endif/* UNIV_LOG_DEBUG */
-
-		} else if (type == MLOG_FILE_CREATE
-			   || type == MLOG_FILE_CREATE2
-			   || type == MLOG_FILE_RENAME
-			   || type == MLOG_FILE_DELETE) {
-			ut_a(space);
-#ifdef UNIV_HOTBACKUP
-			if (recv_replay_file_ops) {
-
-				/* In mysqlbackup --apply-log, replay an .ibd
-				file operation, if possible; note that
-				fil_path_to_mysql_datadir is set in mysqlbackup
-				to point to the datadir we should use there */
-
-				if (NULL == fil_op_log_parse_or_replay(
-					    body, end_ptr, type,
-					    space, page_no)) {
-					fprintf(stderr,
-						"InnoDB: Error: file op"
-						" log record of type %lu"
-						" space %lu not complete in\n"
-						"InnoDB: the replay phase."
-						" Path %s\n",
-						(ulint) type, space,
-						(char*)(body + 2));
-
-					*err = DB_ERROR;
-					return(FALSE);
-				}
-			}
+			break;
+		case MLOG_CHECKPOINT:
+#if SIZE_OF_MLOG_CHECKPOINT != 1 + 8
+# error SIZE_OF_MLOG_CHECKPOINT != 1 + 8
 #endif
-			/* In normal mysqld crash recovery we do not try to
-			replay file operations */
+			lsn = mach_read_from_8(ptr + 1);
+
+			DBUG_PRINT("ib_log",
+				   ("MLOG_CHECKPOINT(" LSN_PF ") %s at "
+				    LSN_PF,
+				    lsn,
+				    lsn != checkpoint_lsn ? "ignored"
+				    : recv_sys->mlog_checkpoint_lsn
+				    ? "reread" : "read",
+				    recv_sys->recovered_lsn));
+
+			if (lsn == checkpoint_lsn) {
+				if (recv_sys->mlog_checkpoint_lsn) {
+					/* At recv_reset_logs() we may
+					write a duplicate MLOG_CHECKPOINT
+					for the same checkpoint LSN. Thus
+					recv_sys->mlog_checkpoint_lsn
+					can differ from the current LSN. */
+					ut_ad(recv_sys->mlog_checkpoint_lsn
+					      <= recv_sys->recovered_lsn);
+					break;
+				}
+				recv_sys->mlog_checkpoint_lsn
+					= recv_sys->recovered_lsn;
+#ifndef UNIV_HOTBACKUP
+				return(true);
+#endif /* !UNIV_HOTBACKUP */
+			}
+			break;
+		case MLOG_FILE_NAME:
+		case MLOG_FILE_DELETE:
+		case MLOG_FILE_CREATE2:
+		case MLOG_FILE_RENAME2:
+		case MLOG_TRUNCATE:
+			/* These were already handled by
+			recv_parse_log_rec() and
+			recv_parse_or_apply_log_rec_body(). */
+			break;
 #ifdef UNIV_LOG_LSN_DEBUG
-		} else if (type == MLOG_LSN) {
+		case MLOG_LSN:
 			/* Do not add these records to the hash table.
 			The page number and space id fields are misused
 			for something else. */
+			break;
 #endif /* UNIV_LOG_LSN_DEBUG */
-		} else {
-			recv_add_to_hash_table(type, space, page_no, body,
-					       ptr + len, old_lsn,
-					       recv_sys->recovered_lsn);
+		default:
+			switch (store) {
+			case STORE_NO:
+				break;
+			case STORE_IF_EXISTS:
+				if (fil_space_get_flags(space)
+				    == ULINT_UNDEFINED) {
+					break;
+				}
+				/* fall through */
+			case STORE_YES:
+				recv_add_to_hash_table(
+					type, space, page_no, body,
+					ptr + len, old_lsn,
+					recv_sys->recovered_lsn);
+			}
+			/* fall through */
+		case MLOG_INDEX_LOAD:
+			DBUG_PRINT("ib_log",
+				("scan " LSN_PF ": log rec %s"
+				" len " ULINTPF
+				" page " ULINTPF ":" ULINTPF,
+				old_lsn, get_mlog_string(type),
+				len, space, page_no));
 		}
 	} else {
 		/* Check that all the records associated with the single mtr
 		are included within the buffer */
 
-		total_len = 0;
-		n_recs = 0;
+		ulint	total_len	= 0;
+		ulint	n_recs		= 0;
 
 		for (;;) {
-			len = recv_parse_log_rec(ptr, end_ptr, &type, &space,
-						 &page_no, &body);
-			if (len == 0 || recv_sys->found_corrupt_log) {
+			len = recv_parse_log_rec(
+				&type, ptr, end_ptr, &space, &page_no,
+				false, &body);
 
-				if (recv_sys->found_corrupt_log) {
-
-					recv_report_corrupt_log(
-						ptr, type, space, page_no);
-				}
-
-				return(FALSE);
+			if (len == 0) {
+				return(false);
 			}
 
-			recv_previous_parsed_rec_type = (ulint) type;
+			if (recv_sys->found_corrupt_log
+			    || type == MLOG_CHECKPOINT
+			    || (*ptr & MLOG_SINGLE_REC_FLAG)) {
+				recv_sys->found_corrupt_log = true;
+				recv_report_corrupt_log(
+					ptr, type, space, page_no);
+				return(true);
+			}
+
+			if (recv_sys->found_corrupt_fs) {
+				return(true);
+			}
+
+			recv_previous_parsed_rec_type = type;
 			recv_previous_parsed_rec_offset
 				= recv_sys->recovered_offset + total_len;
 			recv_previous_parsed_rec_is_multi = 1;
 
-#ifdef UNIV_LOG_DEBUG
-			if ((!store_to_hash) && (type != MLOG_MULTI_REC_END)) {
-				recv_check_incomplete_log_recs(ptr, len);
-			}
-#endif /* UNIV_LOG_DEBUG */
-
-			DBUG_PRINT("ib_log",
-				   ("scan " DBUG_LSN_PF ": multi-log rec %u "
-				    "len %u page %u:%u",
-				    recv_sys->recovered_lsn,
-				    (unsigned) type, (unsigned) len,
-				    (unsigned) space, (unsigned) page_no));
-
 			total_len += len;
 			n_recs++;
 
 			ptr += len;
 
 			if (type == MLOG_MULTI_REC_END) {
-
-				/* Found the end mark for the records */
-
+				DBUG_PRINT("ib_log",
+					   ("scan " LSN_PF
+					    ": multi-log end"
+					    " total_len " ULINTPF
+					    " n=" ULINTPF,
+					    recv_sys->recovered_lsn,
+					    total_len, n_recs));
 				break;
 			}
+
+			DBUG_PRINT("ib_log",
+				   ("scan " LSN_PF ": multi-log rec %s"
+				    " len " ULINTPF
+				    " page " ULINTPF ":" ULINTPF,
+				    recv_sys->recovered_lsn,
+				    get_mlog_string(type), len, space, page_no));
 		}
 
 		new_recovered_lsn = recv_calc_lsn_on_data_add(
@@ -2494,7 +3451,7 @@ loop:
 			that also the next log block should have been scanned
 			in */
 
-			return(FALSE);
+			return(false);
 		}
 
 		/* Add all the records to the hash table */
@@ -2503,36 +3460,69 @@ loop:
 
 		for (;;) {
 			old_lsn = recv_sys->recovered_lsn;
-			len = recv_parse_log_rec(ptr, end_ptr, &type, &space,
-						 &page_no, &body);
-			if (recv_sys->found_corrupt_log) {
+			/* This will apply MLOG_FILE_ records. We
+			had to skip them in the first scan, because we
+			did not know if the mini-transaction was
+			completely recovered (until MLOG_MULTI_REC_END). */
+			len = recv_parse_log_rec(
+				&type, ptr, end_ptr, &space, &page_no,
+				apply, &body);
 
-				recv_report_corrupt_log(ptr,
-							type, space, page_no);
+			if (recv_sys->found_corrupt_log
+			    && !recv_report_corrupt_log(
+				    ptr, type, space, page_no)) {
+				return(true);
+			}
+
+			if (recv_sys->found_corrupt_fs) {
+				return(true);
 			}
 
 			ut_a(len != 0);
-			ut_a(0 == ((ulint)*ptr & MLOG_SINGLE_REC_FLAG));
+			ut_a(!(*ptr & MLOG_SINGLE_REC_FLAG));
 
 			recv_sys->recovered_offset += len;
 			recv_sys->recovered_lsn
 				= recv_calc_lsn_on_data_add(old_lsn, len);
-			if (type == MLOG_MULTI_REC_END) {
 
+			switch (type) {
+			case MLOG_MULTI_REC_END:
 				/* Found the end mark for the records */
-
-				break;
-			}
-
-			if (store_to_hash
+				goto loop;
 #ifdef UNIV_LOG_LSN_DEBUG
-			    && type != MLOG_LSN
+			case MLOG_LSN:
+				/* Do not add these records to the hash table.
+				The page number and space id fields are misused
+				for something else. */
+				break;
 #endif /* UNIV_LOG_LSN_DEBUG */
-			    ) {
-				recv_add_to_hash_table(type, space, page_no,
-						       body, ptr + len,
-						       old_lsn,
-						       new_recovered_lsn);
+			case MLOG_FILE_NAME:
+			case MLOG_FILE_DELETE:
+			case MLOG_FILE_CREATE2:
+			case MLOG_FILE_RENAME2:
+			case MLOG_INDEX_LOAD:
+			case MLOG_TRUNCATE:
+				/* These were already handled by
+				recv_parse_log_rec() and
+				recv_parse_or_apply_log_rec_body(). */
+				break;
+			default:
+				switch (store) {
+				case STORE_NO:
+					break;
+				case STORE_IF_EXISTS:
+					if (fil_space_get_flags(space)
+					    == ULINT_UNDEFINED) {
+						break;
+					}
+					/* fall through */
+				case STORE_YES:
+					recv_add_to_hash_table(
+						type, space, page_no,
+						body, ptr + len,
+						old_lsn,
+						new_recovered_lsn);
+				}
 			}
 
 			ptr += len;
@@ -2545,9 +3535,9 @@ loop:
 /*******************************************************//**
 Adds data from a new log block to the parsing buffer of recv_sys if
 recv_sys->parse_start_lsn is non-zero.
-@return	TRUE if more data added */
+@return true if more data added */
 static
-ibool
+bool
 recv_sys_add_to_parsing_buf(
 /*========================*/
 	const byte*	log_block,	/*!< in: log block */
@@ -2565,18 +3555,18 @@ recv_sys_add_to_parsing_buf(
 		/* Cannot start parsing yet because no start point for
 		it found */
 
-		return(FALSE);
+		return(false);
 	}
 
 	data_len = log_block_get_data_len(log_block);
 
 	if (recv_sys->parse_start_lsn >= scanned_lsn) {
 
-		return(FALSE);
+		return(false);
 
 	} else if (recv_sys->scanned_lsn >= scanned_lsn) {
 
-		return(FALSE);
+		return(false);
 
 	} else if (recv_sys->parse_start_lsn > recv_sys->scanned_lsn) {
 		more_len = (ulint) (scanned_lsn - recv_sys->parse_start_lsn);
@@ -2586,7 +3576,7 @@ recv_sys_add_to_parsing_buf(
 
 	if (more_len == 0) {
 
-		return(FALSE);
+		return(false);
 	}
 
 	ut_ad(data_len >= more_len);
@@ -2614,7 +3604,7 @@ recv_sys_add_to_parsing_buf(
 		ut_a(recv_sys->len <= RECV_PARSING_BUF_SIZE);
 	}
 
-	return(TRUE);
+	return(true);
 }
 
 /*******************************************************//**
@@ -2637,21 +3627,21 @@ Scans log from a buffer and stores new log data to the parsing buffer.
 Parses and hashes the log records if new data found.  Unless
 UNIV_HOTBACKUP is defined, this function will apply log records
 automatically when the hash table becomes full.
-@return TRUE if limit_lsn has been reached, or not able to scan any
-more in this log group */
-UNIV_INTERN
-ibool
+@return true if not able to scan any more in this log group */
+static
+bool
 recv_scan_log_recs(
 /*===============*/
 	ulint		available_memory,/*!< in: we let the hash table of recs
 					to grow to this size, at the maximum */
-	ibool		store_to_hash,	/*!< in: TRUE if the records should be
-					stored to the hash table; this is set
-					to FALSE if just debug checking is
-					needed */
+	store_t*	store_to_hash,	/*!< in,out: whether the records should be
+					stored to the hash table; this is reset
+					if just debug checking is needed, or
+					when the available_memory runs out */
 	const byte*	buf,		/*!< in: buffer containing a log
 					segment or garbage */
 	ulint		len,		/*!< in: buffer length */
+	lsn_t		checkpoint_lsn,	/*!< in: latest checkpoint LSN */
 	lsn_t		start_lsn,	/*!< in: buffer start lsn */
 	lsn_t*		contiguous_lsn,	/*!< in/out: it is known that all log
 					groups contain contiguous log data up
@@ -2660,64 +3650,43 @@ recv_scan_log_recs(
 					this lsn */
 	dberr_t*	err)		/*!< out: error code or DB_SUCCESS */
 {
-	const byte*	log_block;
+	const byte*	log_block	= buf;
 	ulint		no;
-	lsn_t		scanned_lsn;
-	ibool		finished;
+	lsn_t		scanned_lsn	= start_lsn;
+	bool		finished	= false;
 	ulint		data_len;
-	ibool		more_data;
-	bool		maybe_encrypted=false;
+	bool		more_data	= false;
+	bool		apply		= recv_sys->mlog_checkpoint_lsn != 0;
+	bool		maybe_encrypted = false;
 
 	ut_ad(start_lsn % OS_FILE_LOG_BLOCK_SIZE == 0);
 	ut_ad(len % OS_FILE_LOG_BLOCK_SIZE == 0);
 	ut_ad(len >= OS_FILE_LOG_BLOCK_SIZE);
-	ut_a(store_to_hash <= TRUE);
-
-	finished = FALSE;
-
-	log_block = buf;
-	scanned_lsn = start_lsn;
-	more_data = FALSE;
 	*err = DB_SUCCESS;
 
 	do {
-		log_crypt_err_t log_crypt_err;
-
+		ut_ad(!finished);
 		no = log_block_get_hdr_no(log_block);
-		/*
-		fprintf(stderr, "Log block header no %lu\n", no);
+		ulint expected_no = log_block_convert_lsn_to_no(scanned_lsn);
+		if (no != expected_no) {
+			/* Garbage or an incompletely written log block.
 
-		fprintf(stderr, "Scanned lsn no %lu\n",
-		log_block_convert_lsn_to_no(scanned_lsn));
-		*/
-		if (no != log_block_convert_lsn_to_no(scanned_lsn)
-		    || !log_block_checksum_is_ok_or_old_format(log_block, true)) {
+			We will not report any error, because this can
+			happen when InnoDB was killed while it was
+			writing redo log. We simply treat this as an
+			abrupt end of the redo log. */
+			finished = true;
+			break;
+		}
 
-			if (no == log_block_convert_lsn_to_no(scanned_lsn)
-			    && !log_block_checksum_is_ok_or_old_format(
-				    log_block, true)) {
-				fprintf(stderr,
-					"InnoDB: Log block no %lu at"
-					" lsn " LSN_PF " has\n"
-					"InnoDB: ok header, but checksum field"
-					" contains %lu, should be %lu\n",
-					(ulong) no,
-					scanned_lsn,
-					(ulong) log_block_get_checksum(
-						log_block),
-					(ulong) log_block_calc_checksum(
-						log_block));
-			}
+		if (!log_block_checksum_is_ok(log_block, true)) {
+			log_crypt_err_t log_crypt_err;
 
 			maybe_encrypted = log_crypt_block_maybe_encrypted(log_block,
 					&log_crypt_err);
 
-			/* Garbage or an incompletely written log block */
-
 			/* Print checkpoint encryption keys if present */
 			log_crypt_print_checkpoint_keys(log_block);
-			finished = TRUE;
-
 			if (maybe_encrypted) {
 				/* Log block maybe encrypted finish processing*/
 				log_crypt_print_error(log_crypt_err);
@@ -2725,14 +3694,18 @@ recv_scan_log_recs(
 				return (TRUE);
 			}
 
-			/* Stop if we encounter a garbage log block */
-			if (!srv_force_recovery) {
-				fputs("InnoDB: Set innodb_force_recovery"
-				      " to ignore this error.\n", stderr);
-				*err = DB_ERROR;
-				return (TRUE);
-			}
+			ib::error() << "Log block " << no <<
+				" at lsn " << scanned_lsn << " has valid"
+				" header, but checksum field contains "
+				<< log_block_get_checksum(log_block)
+				<< ", should be "
+				<< log_block_calc_checksum(log_block);
+			/* Garbage or an incompletely written log block.
 
+			This could be the result of killing the server
+			while it was writing this log block. We treat
+			this as an abrupt end of the redo log. */
+			finished = true;
 			break;
 		}
 
@@ -2751,26 +3724,16 @@ recv_scan_log_recs(
 
 		data_len = log_block_get_data_len(log_block);
 
-		if ((store_to_hash || (data_len == OS_FILE_LOG_BLOCK_SIZE))
-		    && scanned_lsn + data_len > recv_sys->scanned_lsn
-		    && (recv_sys->scanned_checkpoint_no > 0)
-		    && (log_block_get_checkpoint_no(log_block)
-			< recv_sys->scanned_checkpoint_no)
+		if (scanned_lsn + data_len > recv_sys->scanned_lsn
+		    && log_block_get_checkpoint_no(log_block)
+		    < recv_sys->scanned_checkpoint_no
 		    && (recv_sys->scanned_checkpoint_no
 			- log_block_get_checkpoint_no(log_block)
 			> 0x80000000UL)) {
 
 			/* Garbage from a log buffer flush which was made
 			before the most recent database recovery */
-
-			finished = TRUE;
-#ifdef UNIV_LOG_DEBUG
-			/* This is not really an error, but currently
-			we stop here in the debug version: */
-
-			*err = DB_ERROR;
-			return (TRUE);
-#endif
+			finished = true;
 			break;
 		}
 
@@ -2791,27 +3754,24 @@ recv_scan_log_recs(
 		if (scanned_lsn > recv_sys->scanned_lsn) {
 
 			/* We have found more entries. If this scan is
- 			of startup type, we must initiate crash recovery
+			of startup type, we must initiate crash recovery
 			environment before parsing these log records. */
 
 #ifndef UNIV_HOTBACKUP
-			if (recv_log_scan_is_startup_type
-			    && !recv_needed_recovery) {
+			if (!recv_needed_recovery) {
 
 				if (!srv_read_only_mode) {
-					ib_logf(IB_LOG_LEVEL_INFO,
-						"Log scan progressed past the "
-						"checkpoint lsn " LSN_PF "",
-						recv_sys->scanned_lsn);
+					ib::info() << "Log scan progressed"
+						" past the checkpoint lsn "
+						<< recv_sys->scanned_lsn;
 
 					recv_init_crash_recovery();
 				} else {
 
-					ib_logf(IB_LOG_LEVEL_WARN,
-						"Recovery skipped, "
-						"--innodb-read-only set!");
+					ib::warn() << "Recovery skipped,"
+						" --innodb-read-only set!";
 
-					return(TRUE);
+					return(true);
 				}
 			}
 #endif /* !UNIV_HOTBACKUP */
@@ -2822,21 +3782,18 @@ recv_scan_log_recs(
 
 			if (recv_sys->len + 4 * OS_FILE_LOG_BLOCK_SIZE
 			    >= RECV_PARSING_BUF_SIZE) {
-				fprintf(stderr,
-					"InnoDB: Error: log parsing"
-					" buffer overflow."
-					" Recovery may have failed!\n");
+				ib::error() << "Log parsing buffer overflow."
+					" Recovery may have failed!";
 
-				recv_sys->found_corrupt_log = TRUE;
+				recv_sys->found_corrupt_log = true;
 
 #ifndef UNIV_HOTBACKUP
 				if (!srv_force_recovery) {
-					fputs("InnoDB: Set"
-					      " innodb_force_recovery"
-					      " to ignore this error.\n",
-					      stderr);
+					ib::error()
+						<< "Set innodb_force_recovery"
+						" to ignore this error.";
 					*err = DB_ERROR;
-					return (TRUE);
+					return(true);
 				}
 #endif /* !UNIV_HOTBACKUP */
 
@@ -2852,13 +3809,12 @@ recv_scan_log_recs(
 
 		if (data_len < OS_FILE_LOG_BLOCK_SIZE) {
 			/* Log data for this group ends here */
-
-			finished = TRUE;
+			finished = true;
 			break;
 		} else {
 			log_block += OS_FILE_LOG_BLOCK_SIZE;
 		}
-	} while (log_block < buf + len && !finished);
+	} while (log_block < buf + len);
 
 	*group_scanned_lsn = scanned_lsn;
 
@@ -2868,40 +3824,39 @@ recv_scan_log_recs(
 
 		if (finished || (recv_scan_print_counter % 80 == 0)) {
 
-			fprintf(stderr,
-				"InnoDB: Doing recovery: scanned up to"
-				" log sequence number " LSN_PF "\n",
-				*group_scanned_lsn);
+			ib::info() << "Doing recovery: scanned up to"
+				" log sequence number " << scanned_lsn;
 		}
 	}
 
 	if (more_data && !recv_sys->found_corrupt_log) {
 		/* Try to parse more log records */
+		bool parse_finished = false;
+		dberr_t parse_err = DB_SUCCESS;
 
-		recv_parse_log_recs(store_to_hash, err);
+		parse_finished = recv_parse_log_recs(checkpoint_lsn,
+				    *store_to_hash, apply,
+				    &parse_err);
 
-		if (*err != DB_SUCCESS) {
-			return (TRUE);
+		if (parse_err != DB_SUCCESS) {
+			ib::info() << "Parsing more log records failed checkpoint_lsn "
+				   << checkpoint_lsn << " error " << parse_err;
+			parse_finished = true;
 		}
 
-#ifndef UNIV_HOTBACKUP
-		if (store_to_hash
+		if (parse_finished) {
+			ut_ad(recv_sys->found_corrupt_log
+			      || recv_sys->found_corrupt_fs
+			      || recv_sys->mlog_checkpoint_lsn
+			      == recv_sys->recovered_lsn);
+			*err = parse_err;
+			return(true);
+		}
+
+		if (*store_to_hash != STORE_NO
 		    && mem_heap_get_size(recv_sys->heap) > available_memory) {
-
-			/* Hash table of log records has grown too big:
-			empty it; FALSE means no ibuf operations
-			allowed, as we cannot add new records to the
-			log yet: they would be produced by ibuf
-			operations */
-
-			*err = recv_apply_hashed_log_recs(FALSE);
-
-			if (*err != DB_SUCCESS) {
-				/* Finish processing because of error */
-				return (TRUE);
-			}
+			*store_to_hash = STORE_NO;
 		}
-#endif /* !UNIV_HOTBACKUP */
 
 		if (recv_sys->recovered_offset > RECV_PARSING_BUF_SIZE / 4) {
 			/* Move parsing buffer data to the buffer start */
@@ -2914,164 +3869,275 @@ recv_scan_log_recs(
 }
 
 #ifndef UNIV_HOTBACKUP
-/*******************************************************//**
-Scans log from a buffer and stores new log data to the parsing buffer. Parses
-and hashes the log records if new data found. */
+/** Scans log from a buffer and stores new log data to the parsing buffer.
+Parses and hashes the log records if new data found.
+@param[in,out]	group			log group
+@param[in,out]	contiguous_lsn		log sequence number
+until which all redo log has been scanned
+@param[in]	last_phase		whether changes
+can be applied to the tablespaces
+@param[out]	err			DB_SUCCESS or error code
+@return whether rescan is needed (not everything was stored) */
 static
-void
+bool
 recv_group_scan_log_recs(
-/*=====================*/
-	log_group_t*	group,		/*!< in: log group */
-	lsn_t*		contiguous_lsn,	/*!< in/out: it is known that all log
-					groups contain contiguous log data up
-					to this lsn */
-	lsn_t*		group_scanned_lsn,/*!< out: scanning succeeded up to
-					this lsn */
-	dberr_t*	err)		/*!< out: error code or DB_SUCCESS */
+	log_group_t*	group,
+	lsn_t*		contiguous_lsn,
+	bool		last_phase,
+	dberr_t*	err)
 {
-	ibool	finished;
+	DBUG_ENTER("recv_group_scan_log_recs");
+	DBUG_ASSERT(!last_phase || recv_sys->mlog_checkpoint_lsn > 0);
+
+	mutex_enter(&recv_sys->mutex);
+	recv_sys->len = 0;
+	recv_sys->recovered_offset = 0;
+	recv_sys->n_addrs = 0;
+	recv_sys_empty_hash();
+	srv_start_lsn = *contiguous_lsn;
+	recv_sys->parse_start_lsn = *contiguous_lsn;
+	recv_sys->scanned_lsn = *contiguous_lsn;
+	recv_sys->recovered_lsn = *contiguous_lsn;
+	recv_sys->scanned_checkpoint_no = 0;
+	recv_previous_parsed_rec_type = MLOG_SINGLE_REC_FLAG;
+	recv_previous_parsed_rec_offset	= 0;
+	recv_previous_parsed_rec_is_multi = 0;
+	ut_ad(recv_max_page_lsn == 0);
+	ut_ad(last_phase || !recv_writer_thread_active);
+	mutex_exit(&recv_sys->mutex);
+
+	lsn_t	checkpoint_lsn	= *contiguous_lsn;
 	lsn_t	start_lsn;
 	lsn_t	end_lsn;
+	store_t	store_to_hash	= recv_sys->mlog_checkpoint_lsn == 0
+		? STORE_NO : (last_phase ? STORE_IF_EXISTS : STORE_YES);
+	ulint	available_mem	= UNIV_PAGE_SIZE
+		* (buf_pool_get_n_pages()
+		   - (recv_n_pool_free_frames * srv_buf_pool_instances));
 
-	finished = FALSE;
 	*err = DB_SUCCESS;
+	end_lsn = *contiguous_lsn = ut_uint64_align_down(
+		*contiguous_lsn, OS_FILE_LOG_BLOCK_SIZE);
 
-	start_lsn = *contiguous_lsn;
-
-	while (!finished) {
-		end_lsn = start_lsn + RECV_SCAN_SIZE;
-
-		log_group_read_log_seg(LOG_RECOVER, log_sys->buf,
-				       group, start_lsn, end_lsn);
-
-		finished = recv_scan_log_recs(
-			(buf_pool_get_n_pages()
-			- (recv_n_pool_free_frames * srv_buf_pool_instances))
-			* UNIV_PAGE_SIZE,
-			TRUE, log_sys->buf, RECV_SCAN_SIZE,
-			start_lsn, contiguous_lsn, group_scanned_lsn,
-			err);
-
+	do {
 		if (*err != DB_SUCCESS) {
-			break;
+			DBUG_RETURN(false);
+		}
+
+		if (last_phase && store_to_hash == STORE_NO) {
+			store_to_hash = STORE_IF_EXISTS;
+			/* We must not allow change buffer
+			merge here, because it would generate
+			redo log records before we have
+			finished the redo log scan. */
+			*err = recv_apply_hashed_log_recs(FALSE);
 		}
 
 		start_lsn = end_lsn;
+		end_lsn += RECV_SCAN_SIZE;
+
+		log_group_read_log_seg(
+			log_sys->buf, group, start_lsn, end_lsn);
+	} while (!recv_scan_log_recs(
+			 available_mem, &store_to_hash, log_sys->buf,
+			 RECV_SCAN_SIZE,
+			 checkpoint_lsn,
+			 start_lsn, contiguous_lsn, &group->scanned_lsn, err));
+
+	if (recv_sys->found_corrupt_log || recv_sys->found_corrupt_fs) {
+		ib::error() << "Found corrupted log when looking checkpoint lsn: "
+			    << contiguous_lsn << " error = " << *err;
+		DBUG_RETURN(false);
 	}
 
-#ifdef UNIV_DEBUG
-	if (log_debug_writes) {
-		fprintf(stderr,
-			"InnoDB: Scanned group %lu up to"
-			" log sequence number " LSN_PF "\n",
-			(ulong) group->id,
-			*group_scanned_lsn);
-	}
-#endif /* UNIV_DEBUG */
+	DBUG_PRINT("ib_log", ("%s " LSN_PF
+			      " completed for log group " ULINTPF,
+			      last_phase ? "rescan" : "scan",
+			      group->scanned_lsn, group->id));
+
+	DBUG_RETURN(store_to_hash == STORE_NO);
 }
 
 /*******************************************************//**
 Initialize crash recovery environment. Can be called iff
-recv_needed_recovery == FALSE. */
+recv_needed_recovery == false. */
 static
 void
 recv_init_crash_recovery(void)
-/*==========================*/
 {
 	ut_ad(!srv_read_only_mode);
 	ut_a(!recv_needed_recovery);
 
-	recv_needed_recovery = TRUE;
-
-	ib_logf(IB_LOG_LEVEL_INFO, "Database was not shutdown normally!");
-	ib_logf(IB_LOG_LEVEL_INFO, "Starting crash recovery.");
-	ib_logf(IB_LOG_LEVEL_INFO,
-		"Reading tablespace information from the .ibd files...");
-
-	fil_load_single_table_tablespaces();
-
-	/* If we are using the doublewrite method, we will
-	check if there are half-written pages in data files,
-	and restore them from the doublewrite buffer if
-	possible */
-
-	if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
-
-		ib_logf(IB_LOG_LEVEL_INFO,
-			"Restoring possible half-written data pages ");
-
-		ib_logf(IB_LOG_LEVEL_INFO,
-			"from the doublewrite buffer...");
-
-		buf_dblwr_process();
-
-		/* Spawn the background thread to flush dirty pages
-		from the buffer pools. */
-		recv_writer_thread_handle = os_thread_create(
-			recv_writer_thread, 0, 0);
-	}
+	recv_needed_recovery = true;
 }
 
-/********************************************************//**
-Recovers from a checkpoint. When this function returns, the database is able
-to start processing of new user transactions, but the function
-recv_recovery_from_checkpoint_finish should be called later to complete
-the recovery and free the resources used in it.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+/** Report a missing tablespace for which page-redo log exists.
+@param[in]	err	previous error code
+@param[in]	i	tablespace descriptor
+@return new error code */
+static
 dberr_t
-recv_recovery_from_checkpoint_start_func(
-/*=====================================*/
-#ifdef UNIV_LOG_ARCHIVE
-	ulint	type,		/*!< in: LOG_CHECKPOINT or LOG_ARCHIVE */
-	lsn_t	limit_lsn,	/*!< in: recover up to this lsn if possible */
-#endif /* UNIV_LOG_ARCHIVE */
-	lsn_t	min_flushed_lsn,/*!< in: min flushed lsn from data files */
-	lsn_t	max_flushed_lsn)/*!< in: max flushed lsn from data files */
+recv_init_missing_space(dberr_t err, const recv_spaces_t::const_iterator& i)
+{
+	if (srv_force_recovery == 0) {
+		ib::error() << "Tablespace " << i->first << " was not"
+			" found at " << i->second.name << ".";
+
+		if (err == DB_SUCCESS) {
+			ib::error() << "Set innodb_force_recovery=1 to"
+				" ignore this and to permanently lose"
+				" all changes to the tablespace.";
+			err = DB_TABLESPACE_NOT_FOUND;
+		}
+	} else {
+		ib::warn() << "Tablespace " << i->first << " was not"
+			" found at " << i->second.name << ", and"
+			" innodb_force_recovery was set. All redo log"
+			" for this tablespace will be ignored!";
+	}
+
+	return(err);
+}
+
+/** Check if all tablespaces were found for crash recovery.
+@return error code or DB_SUCCESS */
+static MY_ATTRIBUTE((warn_unused_result))
+dberr_t
+recv_init_crash_recovery_spaces(void)
+{
+	typedef std::set<ulint>	space_set_t;
+	bool		flag_deleted	= false;
+	space_set_t	missing_spaces;
+
+	ut_ad(!srv_read_only_mode);
+	ut_ad(recv_needed_recovery);
+
+	ib::info() << "Database was not shutdown normally!";
+	ib::info() << "Starting crash recovery.";
+
+	for (recv_spaces_t::iterator i = recv_spaces.begin();
+	     i != recv_spaces.end(); i++) {
+		ut_ad(!is_predefined_tablespace(i->first));
+
+		if (i->second.deleted) {
+			/* The tablespace was deleted,
+			so we can ignore any redo log for it. */
+			flag_deleted = true;
+		} else if (i->second.space != NULL) {
+			/* The tablespace was found, and there
+			are some redo log records for it. */
+			fil_names_dirty(i->second.space);
+		} else {
+			missing_spaces.insert(i->first);
+			flag_deleted = true;
+		}
+	}
+
+	if (flag_deleted) {
+		dberr_t err = DB_SUCCESS;
+
+		for (ulint h = 0;
+		     h < hash_get_n_cells(recv_sys->addr_hash);
+		     h++) {
+			for (recv_addr_t* recv_addr
+				     = static_cast<recv_addr_t*>(
+					     HASH_GET_FIRST(
+						     recv_sys->addr_hash, h));
+			     recv_addr != 0;
+			     recv_addr = static_cast<recv_addr_t*>(
+				     HASH_GET_NEXT(addr_hash, recv_addr))) {
+				const ulint space = recv_addr->space;
+
+				if (is_predefined_tablespace(space)) {
+					continue;
+				}
+
+				recv_spaces_t::iterator i
+					= recv_spaces.find(space);
+				ut_ad(i != recv_spaces.end());
+
+				if (i->second.deleted) {
+					ut_ad(missing_spaces.find(space)
+					      == missing_spaces.end());
+					recv_addr->state = RECV_DISCARDED;
+					continue;
+				}
+
+				space_set_t::iterator m = missing_spaces.find(
+					space);
+
+				if (m != missing_spaces.end()) {
+					missing_spaces.erase(m);
+					err = recv_init_missing_space(err, i);
+					recv_addr->state = RECV_DISCARDED;
+					/* All further redo log for this
+					tablespace should be removed. */
+					i->second.deleted = true;
+				}
+			}
+		}
+
+		if (err != DB_SUCCESS) {
+			return(err);
+		}
+	}
+
+	for (space_set_t::const_iterator m = missing_spaces.begin();
+	     m != missing_spaces.end(); m++) {
+		recv_spaces_t::iterator i = recv_spaces.find(*m);
+		ut_ad(i != recv_spaces.end());
+
+		ib::info() << "Tablespace " << i->first
+			<< " was not found at '" << i->second.name
+			<< "', but there were no modifications either.";
+	}
+
+	buf_dblwr_process();
+
+	if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
+		/* Spawn the background thread to flush dirty pages
+		from the buffer pools. */
+		os_thread_create(recv_writer_thread, 0, 0);
+	}
+
+	return(DB_SUCCESS);
+}
+
+/** Start recovering from a redo log checkpoint.
+@see recv_recovery_from_checkpoint_finish
+@param[in]	flush_lsn	FIL_PAGE_FILE_FLUSH_LSN
+of first system tablespace page
+@return error code or DB_SUCCESS */
+dberr_t
+recv_recovery_from_checkpoint_start(
+	lsn_t	flush_lsn)
 {
 	log_group_t*	group;
 	log_group_t*	max_cp_group;
 	ulint		max_cp_field;
 	lsn_t		checkpoint_lsn;
+	bool		rescan;
 	ib_uint64_t	checkpoint_no;
-	lsn_t		group_scanned_lsn = 0;
 	lsn_t		contiguous_lsn;
-#ifdef UNIV_LOG_ARCHIVE
-	log_group_t*	up_to_date_group;
-	lsn_t		archived_lsn;
-#endif /* UNIV_LOG_ARCHIVE */
 	byte*		buf;
 	byte		log_hdr_buf[LOG_FILE_HDR_SIZE];
-	dberr_t		err;
-	ut_when_dtor<recv_dblwr_t> tmp(recv_sys->dblwr);
+	dberr_t		err = DB_SUCCESS;
 
-#ifdef UNIV_LOG_ARCHIVE
-	ut_ad(type != LOG_CHECKPOINT || limit_lsn == LSN_MAX);
-/** TRUE when recovering from a checkpoint */
-# define TYPE_CHECKPOINT	(type == LOG_CHECKPOINT)
-/** Recover up to this log sequence number */
-# define LIMIT_LSN		limit_lsn
-#else /* UNIV_LOG_ARCHIVE */
-/** TRUE when recovering from a checkpoint */
-# define TYPE_CHECKPOINT	1
-/** Recover up to this log sequence number */
-# define LIMIT_LSN		LSN_MAX
-#endif /* UNIV_LOG_ARCHIVE */
+	/* Initialize red-black tree for fast insertions into the
+	flush_list during recovery process. */
+	buf_flush_init_flush_rbt();
 
 	if (srv_force_recovery >= SRV_FORCE_NO_LOG_REDO) {
 
-		ib_logf(IB_LOG_LEVEL_INFO,
-			"The user has set SRV_FORCE_NO_LOG_REDO on, "
-			"skipping log redo");
+		ib::info() << "The user has set SRV_FORCE_NO_LOG_REDO on,"
+			" skipping log redo";
 
 		return(DB_SUCCESS);
 	}
 
-	recv_recovery_on = TRUE;
+	recv_recovery_on = true;
 
-	recv_sys->limit_lsn = LIMIT_LSN;
-
-	mutex_enter(&(log_sys->mutex));
+	log_mutex_enter();
 
 	/* Look for the latest checkpoint from any of the log groups */
 
@@ -3079,36 +4145,34 @@ recv_recovery_from_checkpoint_start_func(
 
 	if (err != DB_SUCCESS) {
 
-		mutex_exit(&(log_sys->mutex));
+		log_mutex_exit();
 
 		return(err);
 	}
 
-	log_group_read_checkpoint_info(max_cp_group, max_cp_field);
+	log_group_header_read(max_cp_group, max_cp_field);
 
 	buf = log_sys->checkpoint_buf;
 
 	checkpoint_lsn = mach_read_from_8(buf + LOG_CHECKPOINT_LSN);
 	checkpoint_no = mach_read_from_8(buf + LOG_CHECKPOINT_NO);
-#ifdef UNIV_LOG_ARCHIVE
-	archived_lsn = mach_read_from_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN);
-#endif /* UNIV_LOG_ARCHIVE */
 
 	/* Read the first log file header to print a note if this is
 	a recovery from a restored InnoDB Hot Backup */
 
-	fil_io(OS_FILE_READ | OS_FILE_LOG, true, max_cp_group->space_id, 0,
-	       0, 0, LOG_FILE_HDR_SIZE,
-		log_hdr_buf, max_cp_group, 0);
+	const page_id_t	page_id(max_cp_group->space_id, 0);
 
-	if (0 == ut_memcmp(log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
+	fil_io(IORequestLogRead, true, page_id, univ_page_size, 0,
+		LOG_FILE_HDR_SIZE, log_hdr_buf, max_cp_group, NULL);
+
+	if (0 == ut_memcmp(log_hdr_buf + LOG_HEADER_CREATOR,
 			   (byte*)"ibbackup", (sizeof "ibbackup") - 1)) {
 
 		if (srv_read_only_mode) {
+			log_mutex_exit();
 
-			ib_logf(IB_LOG_LEVEL_ERROR,
-				"Cannot restore from mysqlbackup, InnoDB "
-				"running in read-only mode!");
+			ib::error() << "Cannot restore from mysqlbackup,"
+				" InnoDB running in read-only mode!";
 
 			return(DB_ERROR);
 		}
@@ -3116,210 +4180,177 @@ recv_recovery_from_checkpoint_start_func(
 		/* This log file was created by mysqlbackup --restore: print
 		a note to the user about it */
 
-		ib_logf(IB_LOG_LEVEL_INFO,
-			"The log file was created by mysqlbackup --apply-log "
-			"at %s. The following crash recovery is part of a "
-			"normal restore.",
-			log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP);
+		ib::info() << "The log file was created by mysqlbackup"
+			" --apply-log at "
+			<< log_hdr_buf + LOG_HEADER_CREATOR
+			<< ". The following crash recovery is part of a"
+			" normal restore.";
 
-		/* Wipe over the label now */
+		/* Replace the label. */
+		ut_ad(LOG_HEADER_CREATOR_END - LOG_HEADER_CREATOR
+		      >= sizeof LOG_HEADER_CREATOR_CURRENT);
+		memset(log_hdr_buf + LOG_HEADER_CREATOR, 0,
+		       LOG_HEADER_CREATOR_END - LOG_HEADER_CREATOR);
+		strcpy(reinterpret_cast<char*>(log_hdr_buf)
+		       + LOG_HEADER_CREATOR, LOG_HEADER_CREATOR_CURRENT);
 
-		memset(log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
-		       ' ', 4);
 		/* Write to the log file to wipe over the label */
-		fil_io(OS_FILE_WRITE | OS_FILE_LOG, true,
-		       max_cp_group->space_id, 0,
-		       0, 0, OS_FILE_LOG_BLOCK_SIZE,
-			log_hdr_buf, max_cp_group, 0);
+		fil_io(IORequestLogWrite, true, page_id,
+		       univ_page_size, 0, OS_FILE_LOG_BLOCK_SIZE, log_hdr_buf,
+			max_cp_group, NULL);
 	}
 
-#ifdef UNIV_LOG_ARCHIVE
-	group = UT_LIST_GET_FIRST(log_sys->log_groups);
+	/* Start reading the log groups from the checkpoint lsn up. The
+	variable contiguous_lsn contains an lsn up to which the log is
+	known to be contiguously written to all log groups. */
 
-	while (group) {
-		log_checkpoint_get_nth_group_info(buf, group->id,
-						  &(group->archived_file_no),
-						  &(group->archived_offset));
-
-		group = UT_LIST_GET_NEXT(log_groups, group);
-	}
-#endif /* UNIV_LOG_ARCHIVE */
-
-	if (TYPE_CHECKPOINT) {
-		/* Start reading the log groups from the checkpoint lsn up. The
-		variable contiguous_lsn contains an lsn up to which the log is
-		known to be contiguously written to all log groups. */
-		recv_sys->parse_start_lsn = checkpoint_lsn;
-		recv_sys->scanned_lsn = checkpoint_lsn;
-		recv_sys->scanned_checkpoint_no = 0;
-		recv_sys->recovered_lsn = checkpoint_lsn;
-		srv_start_lsn = checkpoint_lsn;
-	}
-
-	contiguous_lsn = ut_uint64_align_down(recv_sys->scanned_lsn,
-					      OS_FILE_LOG_BLOCK_SIZE);
-#ifdef UNIV_LOG_ARCHIVE
-	if (TYPE_CHECKPOINT) {
-		up_to_date_group = max_cp_group;
-	} else {
-		ulint	capacity;
-		dberr_t err;
-
-		/* Try to recover the remaining part from logs: first from
-		the logs of the archived group */
-
-		group = recv_sys->archive_group;
-		capacity = log_group_get_capacity(group);
-
-		if (recv_sys->scanned_lsn > checkpoint_lsn + capacity
-		    || checkpoint_lsn > recv_sys->scanned_lsn + capacity) {
-
-			mutex_exit(&(log_sys->mutex));
-
-			/* The group does not contain enough log: probably
-			an archived log file was missing or corrupt */
-
-			return(DB_ERROR);
-		}
-
-		recv_group_scan_log_recs(group, &contiguous_lsn,
-			&group_scanned_lsn, &err);
-
-		if (err != DB_SUCCESS || recv_sys->scanned_lsn < checkpoint_lsn) {
-
-			mutex_exit(&(log_sys->mutex));
-
-			/* The group did not contain enough log: an archived
-			log file was missing or invalid, or the log group
-			was corrupt */
-
-			return(DB_ERROR);
-		}
-
-		group->scanned_lsn = group_scanned_lsn;
-		up_to_date_group = group;
-	}
-#endif /* UNIV_LOG_ARCHIVE */
+	recv_sys->mlog_checkpoint_lsn = 0;
 
 	ut_ad(RECV_SCAN_SIZE <= log_sys->buf_size);
 
+	ut_ad(UT_LIST_GET_LEN(log_sys->log_groups) == 1);
 	group = UT_LIST_GET_FIRST(log_sys->log_groups);
 
-#ifdef UNIV_LOG_ARCHIVE
-	if ((type == LOG_ARCHIVE) && (group == recv_sys->archive_group)) {
-		group = UT_LIST_GET_NEXT(log_groups, group);
+	ut_ad(recv_sys->n_addrs == 0);
+	contiguous_lsn = checkpoint_lsn;
+	switch (group->format) {
+	case 0:
+		log_mutex_exit();
+		return(recv_log_format_0_recover(checkpoint_lsn));
+	case LOG_HEADER_FORMAT_CURRENT:
+		break;
+	default:
+		ut_ad(0);
+		recv_sys->found_corrupt_log = true;
+		log_mutex_exit();
+		return(DB_ERROR);
 	}
-#endif /* UNIV_LOG_ARCHIVE */
 
-	/* Set the flag to publish that we are doing startup scan. */
-	recv_log_scan_is_startup_type = TYPE_CHECKPOINT;
-	while (group) {
-#ifdef UNIV_LOG_ARCHIVE
-		lsn_t	old_scanned_lsn	= recv_sys->scanned_lsn;
-#endif /* UNIV_LOG_ARCHIVE */
-		dberr_t err = DB_SUCCESS;
+	/* Look for MLOG_CHECKPOINT. */
+	recv_group_scan_log_recs(group, &contiguous_lsn, false, &err);
+	/* The first scan should not have stored or applied any records. */
+	ut_ad(recv_sys->n_addrs == 0);
+	ut_ad(!recv_sys->found_corrupt_fs);
 
-		recv_group_scan_log_recs(group, &contiguous_lsn,
-			&group_scanned_lsn, &err);
+	if (recv_sys->found_corrupt_log && !srv_force_recovery) {
+		log_mutex_exit();
+		ib::error() << "Found corrupted log when looking checkpoint lsn: "
+			    << contiguous_lsn << " error = " << err;
+		return(DB_ERROR);
+	}
+
+	if (recv_sys->mlog_checkpoint_lsn == 0) {
+		if (!srv_read_only_mode
+		    && group->scanned_lsn != checkpoint_lsn) {
+			ib::error() << "Ignoring the redo log due to missing"
+				" MLOG_CHECKPOINT between the checkpoint "
+				<< checkpoint_lsn << " and the end "
+				<< group->scanned_lsn << ".";
+			if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
+				log_mutex_exit();
+				return(DB_ERROR);
+			}
+		}
+
+		group->scanned_lsn = checkpoint_lsn;
+		rescan = false;
+	} else {
+		contiguous_lsn = checkpoint_lsn;
+		rescan = recv_group_scan_log_recs(
+			group, &contiguous_lsn, false, &err);
+
+		if ((recv_sys->found_corrupt_log && !srv_force_recovery)
+		    || recv_sys->found_corrupt_fs) {
+			log_mutex_exit();
+			ib::error() << "Found corrupted log in lsn:"
+				    << contiguous_lsn << " err = " << err;
+			return(DB_ERROR);
+		}
+	}
+
+	/* NOTE: we always do a 'recovery' at startup, but only if
+	there is something wrong we will print a message to the
+	user about recovery: */
+
+	if (checkpoint_lsn != flush_lsn) {
+
+		if (checkpoint_lsn + SIZE_OF_MLOG_CHECKPOINT < flush_lsn) {
+			ib::warn() << " Are you sure you are using the"
+				" right ib_logfiles to start up the database?"
+				" Log sequence number in the ib_logfiles is "
+				<< checkpoint_lsn << ", less than the"
+				" log sequence number in the first system"
+				" tablespace file header, " << flush_lsn << ".";
+		}
+
+		if (!recv_needed_recovery) {
+
+			ib::info() << "The log sequence number " << flush_lsn
+				<< " in the system tablespace does not match"
+				" the log sequence number " << checkpoint_lsn
+				<< " in the ib_logfiles!";
+
+			if (srv_read_only_mode) {
+				ib::error() << "Can't initiate database"
+					" recovery, running in read-only-mode.";
+				log_mutex_exit();
+				return(DB_READ_ONLY);
+			}
+
+			recv_init_crash_recovery();
+		}
+	}
+
+	log_sys->lsn = recv_sys->recovered_lsn;
+
+	if (recv_needed_recovery) {
+		err = recv_init_crash_recovery_spaces();
 
 		if (err != DB_SUCCESS) {
-			return (err);
+			log_mutex_exit();
+			return(err);
 		}
 
-		group->scanned_lsn = group_scanned_lsn;
+		if (rescan) {
+			contiguous_lsn = checkpoint_lsn;
 
-#ifdef UNIV_LOG_ARCHIVE
-		if (old_scanned_lsn < group_scanned_lsn) {
-			/* We found a more up-to-date group */
+			recv_group_scan_log_recs(group, &contiguous_lsn, true, &err);
 
-			up_to_date_group = group;
-		}
+			if ((recv_sys->found_corrupt_log
+			     && !srv_force_recovery)
+			    || recv_sys->found_corrupt_fs) {
+				log_mutex_exit();
+				ib::error() << "Found corrupted log in lsn:"
+					    << contiguous_lsn << " err = " << err;
 
-		if ((type == LOG_ARCHIVE)
-		    && (group == recv_sys->archive_group)) {
-			group = UT_LIST_GET_NEXT(log_groups, group);
-		}
-#endif /* UNIV_LOG_ARCHIVE */
-
-		group = UT_LIST_GET_NEXT(log_groups, group);
-	}
-	/* Done with startup scan. Clear the flag. */
-	recv_log_scan_is_startup_type = FALSE;
-	if (TYPE_CHECKPOINT) {
-		/* NOTE: we always do a 'recovery' at startup, but only if
-		there is something wrong we will print a message to the
-		user about recovery: */
-
-		if (checkpoint_lsn != max_flushed_lsn
-		    || checkpoint_lsn != min_flushed_lsn) {
-
-			if (checkpoint_lsn < max_flushed_lsn) {
-
-				ib_logf(IB_LOG_LEVEL_WARN,
-					"The log sequence number "
-					"in the ibdata files is higher "
-					"than the log sequence number "
-					"in the ib_logfiles! Are you sure "
-					"you are using the right "
-					"ib_logfiles to start up the database. "
-					"Log sequence number in the "
-					"ib_logfiles is " LSN_PF ", log"
-					"sequence numbers stamped "
-					"to ibdata file headers are between "
-					"" LSN_PF " and " LSN_PF ".",
-					checkpoint_lsn,
-					min_flushed_lsn,
-					max_flushed_lsn);
-			}
-
-			if (!recv_needed_recovery) {
-				ib_logf(IB_LOG_LEVEL_INFO,
-					"The log sequence numbers "
-					LSN_PF " and " LSN_PF
-					" in ibdata files do not match"
-					" the log sequence number "
-					LSN_PF
-					" in the ib_logfiles!",
-					min_flushed_lsn,
-					max_flushed_lsn,
-					checkpoint_lsn);
-
-				if (!srv_read_only_mode) {
-					recv_init_crash_recovery();
-				} else {
-					ib_logf(IB_LOG_LEVEL_ERROR,
-						"Can't initiate database "
-						"recovery, running "
-						"in read-only-mode.");
-					return(DB_READ_ONLY);
-				}
+				return(DB_ERROR);
 			}
 		}
+	} else {
+		ut_ad(!rescan || recv_sys->n_addrs == 0);
 	}
 
 	/* We currently have only one log group */
-	if (group_scanned_lsn < checkpoint_lsn
-	    || group_scanned_lsn < recv_max_page_lsn) {
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"We scanned the log up to "
-			LSN_PF ". A checkpoint was at " LSN_PF
-			" and the maximum LSN on a database page was " LSN_PF
-			". It is possible that the database is now corrupt!",
-			group_scanned_lsn, checkpoint_lsn, recv_max_page_lsn);
+
+	if (group->scanned_lsn < checkpoint_lsn
+	    || group->scanned_lsn < recv_max_page_lsn) {
+
+		ib::error() << "We scanned the log up to " << group->scanned_lsn
+			<< ". A checkpoint was at " << checkpoint_lsn << " and"
+			" the maximum LSN on a database page was "
+			<< recv_max_page_lsn << ". It is possible that the"
+			" database is now corrupt!";
 	}
 
 	if (recv_sys->recovered_lsn < checkpoint_lsn) {
+		log_mutex_exit();
 
-		mutex_exit(&(log_sys->mutex));
-
-		if (recv_sys->recovered_lsn >= LIMIT_LSN) {
-
-			return(DB_SUCCESS);
-		}
+		ib::error() << "Recovered only to lsn:"
+			    << recv_sys->recovered_lsn << " checkpoint_lsn: " << checkpoint_lsn;
 
 		/* No harm in trying to do RO access. */
 		if (!srv_read_only_mode) {
-			return (DB_READ_ONLY);
+			ut_error;
 		}
 
 		return(DB_ERROR);
@@ -3330,17 +4361,12 @@ recv_recovery_from_checkpoint_start_func(
 
 	log_sys->next_checkpoint_lsn = checkpoint_lsn;
 	log_sys->next_checkpoint_no = checkpoint_no + 1;
+
 	/* here the checkpoint info is written without any redo logging ongoing
 	* and next_checkpoint_no is updated directly hence no +1 */
 	log_crypt_set_ver_and_key(log_sys->next_checkpoint_no);
 
-#ifdef UNIV_LOG_ARCHIVE
-	log_sys->archived_lsn = archived_lsn;
-
-	recv_synchronize_groups(up_to_date_group);
-#else /* UNIV_LOG_ARCHIVE */
 	recv_synchronize_groups();
-#endif /* UNIV_LOG_ARCHIVE */
 
 	if (!recv_needed_recovery) {
 		ut_a(checkpoint_lsn == recv_sys->recovered_lsn);
@@ -3348,87 +4374,47 @@ recv_recovery_from_checkpoint_start_func(
 		srv_start_lsn = recv_sys->recovered_lsn;
 	}
 
-	log_sys->lsn = recv_sys->recovered_lsn;
-
 	ut_memcpy(log_sys->buf, recv_sys->last_block, OS_FILE_LOG_BLOCK_SIZE);
 
 	log_sys->buf_free = (ulint) log_sys->lsn % OS_FILE_LOG_BLOCK_SIZE;
 	log_sys->buf_next_to_write = log_sys->buf_free;
-	log_sys->written_to_some_lsn = log_sys->lsn;
-	log_sys->written_to_all_lsn = log_sys->lsn;
+	log_sys->write_lsn = log_sys->lsn;
 
 	log_sys->last_checkpoint_lsn = checkpoint_lsn;
 
+	if (!srv_read_only_mode) {
+		/* Write a MLOG_CHECKPOINT marker as the first thing,
+		before generating any other redo log. */
+		fil_names_clear(log_sys->last_checkpoint_lsn, true);
+	}
+
 	MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE,
 		    log_sys->lsn - log_sys->last_checkpoint_lsn);
 
 	log_sys->next_checkpoint_no = checkpoint_no + 1;
 	log_crypt_set_ver_and_key(log_sys->next_checkpoint_no);
 
-#ifdef UNIV_LOG_ARCHIVE
-	if (archived_lsn == LSN_MAX) {
-
-		log_sys->archiving_state = LOG_ARCH_OFF;
-	}
-#endif /* UNIV_LOG_ARCHIVE */
-
 	mutex_enter(&recv_sys->mutex);
 
 	recv_sys->apply_log_recs = TRUE;
 
 	mutex_exit(&recv_sys->mutex);
 
-	mutex_exit(&log_sys->mutex);
+	log_mutex_exit();
 
-	recv_lsn_checks_on = TRUE;
+	recv_lsn_checks_on = true;
 
 	/* The database is now ready to start almost normal processing of user
 	transactions: transaction rollbacks and the application of the log
 	records in the hash table can be run in background. */
 
 	return(DB_SUCCESS);
-
-#undef TYPE_CHECKPOINT
-#undef LIMIT_LSN
 }
 
-/********************************************************//**
-Completes recovery from a checkpoint. */
-UNIV_INTERN
+/** Complete recovery from a checkpoint. */
 void
 recv_recovery_from_checkpoint_finish(void)
-/*======================================*/
 {
-	/* Apply the hashed log records to the respective file pages */
-
-	if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
-
-		recv_apply_hashed_log_recs(TRUE);
-	}
-
-	DBUG_PRINT("ib_log", ("apply completed"));
-
-	if (recv_needed_recovery) {
-		trx_sys_print_mysql_master_log_pos();
-		trx_sys_print_mysql_binlog_offset();
-	}
-
-	if (recv_sys->found_corrupt_log) {
-
-		fprintf(stderr,
-			"InnoDB: WARNING: the log file may have been"
-			" corrupt and it\n"
-			"InnoDB: is possible that the log scan or parsing"
-			" did not proceed\n"
-			"InnoDB: far enough in recovery. Please run"
-			" CHECK TABLE\n"
-			"InnoDB: on your InnoDB tables to check that"
-			" they are ok!\n"
-			"InnoDB: It may be safest to recover your"
-			" InnoDB database from\n"
-			"InnoDB: a backup!\n");
-	}
-
 	/* Make sure that the recv_writer thread is done. This is
 	required because it grabs various mutexes and we want to
 	ensure that when we enable sync_order_checks there is no
@@ -3436,10 +4422,10 @@ recv_recovery_from_checkpoint_finish(void)
 	mutex_enter(&recv_sys->writer_mutex);
 
 	/* Free the resources of the recovery system */
-	recv_recovery_on = FALSE;
+	recv_recovery_on = false;
 
 	/* By acquring the mutex we ensure that the recv_writer thread
-	won't trigger any more LRU batchtes. Now wait for currently
+	won't trigger any more LRU batches. Now wait for currently
 	in progress batches to finish. */
 	buf_flush_wait_LRU_batch_end();
 
@@ -3450,22 +4436,45 @@ recv_recovery_from_checkpoint_finish(void)
 		++count;
 		os_thread_sleep(100000);
 		if (srv_print_verbose_log && count > 600) {
-			ib_logf(IB_LOG_LEVEL_INFO,
-				"Waiting for recv_writer to "
-				"finish flushing of buffer pool");
+			ib::info() << "Waiting for recv_writer to"
+				" finish flushing of buffer pool";
 			count = 0;
 		}
 	}
 
-#ifdef __WIN__
-	if (recv_writer_thread_handle) {
-		CloseHandle(recv_writer_thread_handle);
-	}
-#endif /* __WIN__ */
-
-#ifndef UNIV_LOG_DEBUG
 	recv_sys_debug_free();
-#endif
+
+	/* Free up the flush_rbt. */
+	buf_flush_free_flush_rbt();
+
+	/* Validate a few system page types that were left uninitialized
+	by older versions of MySQL. */
+	mtr_t		mtr;
+	buf_block_t*	block;
+	mtr.start();
+	mtr.set_sys_modified();
+	/* Bitmap page types will be reset in buf_dblwr_check_block()
+	without redo logging. */
+	block = buf_page_get(
+		page_id_t(IBUF_SPACE_ID, FSP_IBUF_HEADER_PAGE_NO),
+		univ_page_size, RW_X_LATCH, &mtr);
+	fil_block_check_type(block, FIL_PAGE_TYPE_SYS, &mtr);
+	/* Already MySQL 3.23.53 initialized FSP_IBUF_TREE_ROOT_PAGE_NO
+	to FIL_PAGE_INDEX. No need to reset that one. */
+	block = buf_page_get(
+		page_id_t(TRX_SYS_SPACE, TRX_SYS_PAGE_NO),
+		univ_page_size, RW_X_LATCH, &mtr);
+	fil_block_check_type(block, FIL_PAGE_TYPE_TRX_SYS, &mtr);
+	block = buf_page_get(
+		page_id_t(TRX_SYS_SPACE, FSP_FIRST_RSEG_PAGE_NO),
+		univ_page_size, RW_X_LATCH, &mtr);
+	fil_block_check_type(block, FIL_PAGE_TYPE_SYS, &mtr);
+	block = buf_page_get(
+		page_id_t(TRX_SYS_SPACE, FSP_DICT_HDR_PAGE_NO),
+		univ_page_size, RW_X_LATCH, &mtr);
+	fil_block_check_type(block, FIL_PAGE_TYPE_SYS, &mtr);
+	mtr.commit();
+
 	/* Roll back any recovered data dictionary transactions, so
 	that the data dictionary tables will be free of any locks.
 	The data dictionary latch should guarantee that there is at
@@ -3477,21 +4486,16 @@ recv_recovery_from_checkpoint_finish(void)
 
 /********************************************************//**
 Initiates the rollback of active transactions. */
-UNIV_INTERN
 void
 recv_recovery_rollback_active(void)
 /*===============================*/
 {
-#ifdef UNIV_SYNC_DEBUG
-	/* Wait for a while so that created threads have time to suspend
-	themselves before we switch the latching order checks on */
-	os_thread_sleep(1000000);
-
 	ut_ad(!recv_writer_thread_active);
 
-	/* Switch latching order checks on in sync0sync.cc */
-	sync_order_checks_on = TRUE;
-#endif
+	/* Switch latching order checks on in sync0debug.cc, if
+	--innodb-sync-debug=true (default) */
+	ut_d(sync_check_enable());
+
 	/* We can't start any (DDL) transactions if UNDO logging
 	has been disabled, additionally disable ROLLBACK of recovered
 	user transactions. */
@@ -3519,17 +4523,9 @@ recv_recovery_rollback_active(void)
 
 /******************************************************//**
 Resets the logs. The contents of log files will be lost! */
-UNIV_INTERN
 void
 recv_reset_logs(
 /*============*/
-#ifdef UNIV_LOG_ARCHIVE
-	ulint		arch_log_no,	/*!< in: next archived log file number */
-	ibool		new_logs_created,/*!< in: TRUE if resetting logs
-					is done at the log creation;
-					FALSE if it is done after
-					archive recovery */
-#endif /* UNIV_LOG_ARCHIVE */
 	lsn_t		lsn)		/*!< in: reset to this lsn
 					rounded up to be divisible by
 					OS_FILE_LOG_BLOCK_SIZE, after
@@ -3538,7 +4534,7 @@ recv_reset_logs(
 {
 	log_group_t*	group;
 
-	ut_ad(mutex_own(&(log_sys->mutex)));
+	ut_ad(log_mutex_own());
 
 	log_sys->lsn = ut_uint64_align_up(lsn, OS_FILE_LOG_BLOCK_SIZE);
 
@@ -3547,30 +4543,15 @@ recv_reset_logs(
 	while (group) {
 		group->lsn = log_sys->lsn;
 		group->lsn_offset = LOG_FILE_HDR_SIZE;
-#ifdef UNIV_LOG_ARCHIVE
-		group->archived_file_no = arch_log_no;
-		group->archived_offset = 0;
-
-		if (!new_logs_created) {
-			recv_truncate_group(group, group->lsn, group->lsn,
-					    group->lsn, group->lsn);
-		}
-#endif /* UNIV_LOG_ARCHIVE */
-
 		group = UT_LIST_GET_NEXT(log_groups, group);
 	}
 
 	log_sys->buf_next_to_write = 0;
-	log_sys->written_to_some_lsn = log_sys->lsn;
-	log_sys->written_to_all_lsn = log_sys->lsn;
+	log_sys->write_lsn = log_sys->lsn;
 
 	log_sys->next_checkpoint_no = 0;
 	log_sys->last_checkpoint_lsn = 0;
 
-#ifdef UNIV_LOG_ARCHIVE
-	log_sys->archived_lsn = log_sys->lsn;
-#endif /* UNIV_LOG_ARCHIVE */
-
 	log_block_init(log_sys->buf, log_sys->lsn);
 	log_block_set_first_rec_group(log_sys->buf, LOG_BLOCK_HDR_SIZE);
 
@@ -3580,20 +4561,19 @@ recv_reset_logs(
 	MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE,
 		    (log_sys->lsn - log_sys->last_checkpoint_lsn));
 
-	mutex_exit(&(log_sys->mutex));
+	log_mutex_exit();
 
 	/* Reset the checkpoint fields in logs */
 
 	log_make_checkpoint_at(LSN_MAX, TRUE);
 
-	mutex_enter(&(log_sys->mutex));
+	log_mutex_enter();
 }
 #endif /* !UNIV_HOTBACKUP */
 
 #ifdef UNIV_HOTBACKUP
 /******************************************************//**
 Creates new log files after a backup has been restored. */
-UNIV_INTERN
 void
 recv_reset_log_files_for_backup(
 /*============================*/
@@ -3604,7 +4584,7 @@ recv_reset_log_files_for_backup(
 					divisible by OS_FILE_LOG_BLOCK_SIZE */
 {
 	os_file_t	log_file;
-	ibool		success;
+	bool		success;
 	byte*		buf;
 	ulint		i;
 	ulint		log_dir_len;
@@ -3617,37 +4597,31 @@ recv_reset_log_files_for_backup(
 	*/
 	ut_a(log_dir_len + strlen(ib_logfile_basename) + 11  < sizeof(name));
 
-	buf = ut_malloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
-	memset(buf, '\0', LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
+	buf = (byte*)ut_zalloc_nokey(LOG_FILE_HDR_SIZE +
+				     OS_FILE_LOG_BLOCK_SIZE);
 
 	for (i = 0; i < n_log_files; i++) {
 
 		sprintf(name, "%s%s%lu", log_dir,
-			ib_logfile_basename, (ulong) i);
+			ib_logfile_basename, (ulint) i);
 
-		log_file = os_file_create_simple(innodb_file_log_key,
+		log_file = os_file_create_simple(innodb_log_file_key,
 						 name, OS_FILE_CREATE,
 						 OS_FILE_READ_WRITE,
-						 &success);
+						 srv_read_only_mode, &success);
 		if (!success) {
-			fprintf(stderr,
-				"InnoDB: Cannot create %s. Check that"
-				" the file does not exist yet.\n", name);
-
-			exit(1);
+			ib::fatal() << "Cannot create " << name << ". Check that"
+				" the file does not exist yet.";
 		}
 
-		fprintf(stderr,
-			"Setting log file size to %llu\n",
-			log_file_size);
+		ib::info() << "Setting log file size to " << log_file_size;
 
-		success = os_file_set_size(name, log_file, log_file_size);
+		success = os_file_set_size(
+			name, log_file, log_file_size, srv_read_only_mode);
 
 		if (!success) {
-			fprintf(stderr,
-				"InnoDB: Cannot set %s size to %llu\n",
-				name, log_file_size);
-			exit(1);
+			ib::fatal() << "Cannot set " << name << " size to "
+				<< (long long unsigned)log_file_size;
 		}
 
 		os_file_flush(log_file);
@@ -3658,22 +4632,31 @@ recv_reset_log_files_for_backup(
 
 	log_reset_first_header_and_checkpoint(buf, lsn);
 
-	log_block_init_in_old_format(buf + LOG_FILE_HDR_SIZE, lsn);
+	log_block_init(buf + LOG_FILE_HDR_SIZE, lsn);
 	log_block_set_first_rec_group(buf + LOG_FILE_HDR_SIZE,
 				      LOG_BLOCK_HDR_SIZE);
-	sprintf(name, "%s%s%lu", log_dir, ib_logfile_basename, (ulong)0);
+	log_block_set_checksum(buf + LOG_FILE_HDR_SIZE,
+	log_block_calc_checksum_crc32(buf + LOG_FILE_HDR_SIZE));
 
-	log_file = os_file_create_simple(innodb_file_log_key,
+	log_block_set_checksum(buf, log_block_calc_checksum_crc32(buf));
+	sprintf(name, "%s%s%lu", log_dir, ib_logfile_basename, (ulint)0);
+
+	log_file = os_file_create_simple(innodb_log_file_key,
 					 name, OS_FILE_OPEN,
-					 OS_FILE_READ_WRITE, &success);
+					 OS_FILE_READ_WRITE,
+					 srv_read_only_mode, &success);
 	if (!success) {
-		fprintf(stderr, "InnoDB: Cannot open %s.\n", name);
-
-		exit(1);
+		ib::fatal() << "Cannot open " << name << ".";
 	}
 
-	os_file_write(name, log_file, buf, 0,
-		      LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
+	IORequest	request(IORequest::WRITE);
+
+	dberr_t	err = os_file_write(
+		request, name, log_file, buf, 0,
+		LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
+
+	ut_a(err == DB_SUCCESS);
+
 	os_file_flush(log_file);
 	os_file_close(log_file);
 
@@ -3681,347 +4664,24 @@ recv_reset_log_files_for_backup(
 }
 #endif /* UNIV_HOTBACKUP */
 
-#ifdef UNIV_LOG_ARCHIVE
-/* Dead code */
-/******************************************************//**
-Reads from the archive of a log group and performs recovery.
-@return	TRUE if no more complete consistent archive files */
-static
-ibool
-log_group_recover_from_archive_file(
-/*================================*/
-	log_group_t*	group)		/*!< in: log group */
+/** Find a doublewrite copy of a page.
+@param[in]	space_id	tablespace identifier
+@param[in]	page_no		page number
+@return	page frame
+@retval NULL if no page was found */
+
+const byte*
+recv_dblwr_t::find_page(ulint space_id, ulint page_no)
 {
-	os_file_t	file_handle;
-	ib_uint64_t	start_lsn;
-	ib_uint64_t	file_end_lsn;
-	ib_uint64_t	dummy_lsn;
-	ib_uint64_t	scanned_lsn;
-	ulint		len;
-	ibool		ret;
-	byte*		buf;
-	os_offset_t	read_offset;
-	os_offset_t	file_size;
-	int		input_char;
-	char		name[10000];
-	dberr_t		err;
+	typedef std::vector<const byte*, ut_allocator<const byte*> >
+		matches_t;
 
-	ut_a(0);
+	matches_t	matches;
+	const byte*	result = 0;
 
-try_open_again:
-	buf = log_sys->buf;
-
-	/* Add the file to the archive file space; open the file */
-
-	log_archived_file_name_gen(name, group->id, group->archived_file_no);
-
-	file_handle = os_file_create(innodb_file_log_key,
-				     name, OS_FILE_OPEN,
-				     OS_FILE_LOG, OS_FILE_AIO, &ret);
-
-	if (ret == FALSE) {
-ask_again:
-		fprintf(stderr,
-			"InnoDB: Do you want to copy additional"
-			" archived log files\n"
-			"InnoDB: to the directory\n");
-		fprintf(stderr,
-			"InnoDB: or were these all the files needed"
-			" in recovery?\n");
-		fprintf(stderr,
-			"InnoDB: (Y == copy more files; N == this is all)?");
-
-		input_char = getchar();
-
-		if (input_char == (int) 'N') {
-
-			return(TRUE);
-		} else if (input_char == (int) 'Y') {
-
-			goto try_open_again;
-		} else {
-			goto ask_again;
-		}
-	}
-
-	file_size = os_file_get_size(file_handle);
-	ut_a(file_size != (os_offset_t) -1);
-
-	fprintf(stderr, "InnoDB: Opened archived log file %s\n", name);
-
-	ret = os_file_close(file_handle);
-
-	if (file_size < LOG_FILE_HDR_SIZE) {
-		fprintf(stderr,
-			"InnoDB: Archive file header incomplete %s\n", name);
-
-		return(TRUE);
-	}
-
-	ut_a(ret);
-
-	/* Add the archive file as a node to the space */
-
-	fil_node_create(name, 1 + file_size / UNIV_PAGE_SIZE,
-			group->archive_space_id, FALSE);
-#if RECV_SCAN_SIZE < LOG_FILE_HDR_SIZE
-# error "RECV_SCAN_SIZE < LOG_FILE_HDR_SIZE"
-#endif
-
-	/* Read the archive file header */
-	fil_io(OS_FILE_READ | OS_FILE_LOG, true, group->archive_space_id, 0, 0,
-	       LOG_FILE_HDR_SIZE, buf, NULL, 0);
-
-	/* Check if the archive file header is consistent */
-
-	if (mach_read_from_4(buf + LOG_GROUP_ID) != group->id
-	    || mach_read_from_4(buf + LOG_FILE_NO)
-	    != group->archived_file_no) {
-		fprintf(stderr,
-			"InnoDB: Archive file header inconsistent %s\n", name);
-
-		return(TRUE);
-	}
-
-	if (!mach_read_from_4(buf + LOG_FILE_ARCH_COMPLETED)) {
-		fprintf(stderr,
-			"InnoDB: Archive file not completely written %s\n",
-			name);
-
-		return(TRUE);
-	}
-
-	start_lsn = mach_read_from_8(buf + LOG_FILE_START_LSN);
-	file_end_lsn = mach_read_from_8(buf + LOG_FILE_END_LSN);
-
-	if (!recv_sys->scanned_lsn) {
-
-		if (recv_sys->parse_start_lsn < start_lsn) {
-			fprintf(stderr,
-				"InnoDB: Archive log file %s"
-				" starts from too big a lsn\n",
-				name);
-			return(TRUE);
-		}
-
-		recv_sys->scanned_lsn = start_lsn;
-	}
-
-	if (recv_sys->scanned_lsn != start_lsn) {
-
-		fprintf(stderr,
-			"InnoDB: Archive log file %s starts from"
-			" a wrong lsn\n",
-			name);
-		return(TRUE);
-	}
-
-	read_offset = LOG_FILE_HDR_SIZE;
-
-	for (;;) {
-		len = RECV_SCAN_SIZE;
-
-		if (read_offset + len > file_size) {
-			len = ut_calc_align_down(file_size - read_offset,
-						 OS_FILE_LOG_BLOCK_SIZE);
-		}
-
-		if (len == 0) {
-
-			break;
-		}
-
-#ifdef UNIV_DEBUG
-		if (log_debug_writes) {
-			fprintf(stderr,
-				"InnoDB: Archive read starting at"
-				" lsn %llu, len %lu from file %s\n",
-				start_lsn,
-				(ulong) len, name);
-		}
-#endif /* UNIV_DEBUG */
-
-		fil_io(OS_FILE_READ | OS_FILE_LOG, true,
-		       group->archive_space_id, read_offset / UNIV_PAGE_SIZE,
-		       read_offset % UNIV_PAGE_SIZE, len, buf, NULL, 0);
-
-		ret = recv_scan_log_recs(
-			(buf_pool_get_n_pages()
-			- (recv_n_pool_free_frames * srv_buf_pool_instances))
-			* UNIV_PAGE_SIZE, TRUE, buf, len, start_lsn,
-			&dummy_lsn, &scanned_lsn, &err);
-
-		if (err != DB_SUCCESS) {
-			return (FALSE);
-		}
-
-		if (scanned_lsn == file_end_lsn) {
-
-			return(FALSE);
-		}
-
-		if (ret) {
-			fprintf(stderr,
-				"InnoDB: Archive log file %s"
-				" does not scan right\n",
-				name);
-			return(TRUE);
-		}
-
-		read_offset += len;
-		start_lsn += len;
-
-		ut_ad(start_lsn == scanned_lsn);
-	}
-
-	return(FALSE);
-}
-
-/********************************************************//**
-Recovers from archived log files, and also from log files, if they exist.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
-ulint
-recv_recovery_from_archive_start(
-/*=============================*/
-	ib_uint64_t	min_flushed_lsn,/*!< in: min flushed lsn field from the
-					data files */
-	ib_uint64_t	limit_lsn,	/*!< in: recover up to this lsn if
-					possible */
-	ulint		first_log_no)	/*!< in: number of the first archived
-					log file to use in the recovery; the
-					file will be searched from
-					INNOBASE_LOG_ARCH_DIR specified in
-					server config file */
-{
-	log_group_t*	group;
-	ulint		group_id;
-	ulint		trunc_len;
-	ibool		ret;
-	ulint		err;
-
-	ut_a(0);
-
-	recv_sys_create();
-	recv_sys_init(buf_pool_get_curr_size());
-
-	recv_recovery_on = TRUE;
-	recv_recovery_from_backup_on = TRUE;
-
-	recv_sys->limit_lsn = limit_lsn;
-
-	group_id = 0;
-
-	group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
-	while (group) {
-		if (group->id == group_id) {
-
-			break;
-		}
-
-		group = UT_LIST_GET_NEXT(log_groups, group);
-	}
-
-	if (!group) {
-		fprintf(stderr,
-			"InnoDB: There is no log group defined with id %lu!\n",
-			(ulong) group_id);
-		return(DB_ERROR);
-	}
-
-	group->archived_file_no = first_log_no;
-
-	recv_sys->parse_start_lsn = min_flushed_lsn;
-
-	recv_sys->scanned_lsn = 0;
-	recv_sys->scanned_checkpoint_no = 0;
-	recv_sys->recovered_lsn = recv_sys->parse_start_lsn;
-
-	recv_sys->archive_group = group;
-
-	ret = FALSE;
-
-	mutex_enter(&(log_sys->mutex));
-
-	while (!ret) {
-		ret = log_group_recover_from_archive_file(group);
-
-		/* Close and truncate a possible processed archive file
-		from the file space */
-
-		trunc_len = UNIV_PAGE_SIZE
-			* fil_space_get_size(group->archive_space_id);
-		if (trunc_len > 0) {
-			fil_space_truncate_start(group->archive_space_id,
-						 trunc_len);
-		}
-
-		group->archived_file_no++;
-	}
-
-	if (recv_sys->recovered_lsn < limit_lsn) {
-
-		if (!recv_sys->scanned_lsn) {
-
-			recv_sys->scanned_lsn = recv_sys->parse_start_lsn;
-		}
-
-		mutex_exit(&(log_sys->mutex));
-
-		err = recv_recovery_from_checkpoint_start(LOG_ARCHIVE,
-							  limit_lsn,
-							  LSN_MAX,
-							  LSN_MAX);
-		if (err != DB_SUCCESS) {
-
-			return(err);
-		}
-
-		mutex_enter(&(log_sys->mutex));
-	}
-
-	if (limit_lsn != LSN_MAX) {
-
-		recv_apply_hashed_log_recs(FALSE);
-
-		recv_reset_logs(0, FALSE, recv_sys->recovered_lsn);
-	}
-
-	mutex_exit(&(log_sys->mutex));
-
-	return(DB_SUCCESS);
-}
-
-/********************************************************//**
-Completes recovery from archive. */
-UNIV_INTERN
-void
-recv_recovery_from_archive_finish(void)
-/*===================================*/
-{
-	recv_recovery_from_checkpoint_finish();
-
-	recv_recovery_from_backup_on = FALSE;
-}
-#endif /* UNIV_LOG_ARCHIVE */
-
-
-void recv_dblwr_t::add(byte* page)
-{
-	pages.push_back(page);
-}
-
-byte* recv_dblwr_t::find_page(ulint space_id, ulint page_no)
-{
-	std::vector<byte*> matches;
-	byte*	result = 0;
-
-	for (std::list<byte*>::iterator i = pages.begin();
-	     i != pages.end(); ++i) {
-
-		if ((page_get_space_id(*i) == space_id)
-		    && (page_get_page_no(*i) == page_no)) {
+	for (list::iterator i = pages.begin(); i != pages.end(); ++i) {
+		if (page_get_space_id(*i) == space_id
+		    && page_get_page_no(*i) == page_no) {
 			matches.push_back(*i);
 		}
 	}
@@ -4033,8 +4693,9 @@ byte* recv_dblwr_t::find_page(ulint space_id, ulint page_no)
 		lsn_t max_lsn	= 0;
 		lsn_t page_lsn	= 0;
 
-		for (std::vector<byte*>::iterator i = matches.begin();
-		     i != matches.end(); ++i) {
+		for (matches_t::iterator i = matches.begin();
+		     i != matches.end();
+		     ++i) {
 
 			page_lsn = mach_read_from_8(*i + FIL_PAGE_LSN);
 
@@ -4047,3 +4708,186 @@ byte* recv_dblwr_t::find_page(ulint space_id, ulint page_no)
 
 	return(result);
 }
+
+#ifndef DBUG_OFF
+/** Return string name of the redo log record type.
+@param[in]	type	record log record enum
+@return string name of record log record */
+const char*
+get_mlog_string(mlog_id_t type)
+{
+	switch (type) {
+	case MLOG_SINGLE_REC_FLAG:
+		return("MLOG_SINGLE_REC_FLAG");
+
+	case MLOG_1BYTE:
+		return("MLOG_1BYTE");
+
+	case MLOG_2BYTES:
+		return("MLOG_2BYTES");
+
+	case MLOG_4BYTES:
+		return("MLOG_4BYTES");
+
+	case MLOG_8BYTES:
+		return("MLOG_8BYTES");
+
+	case MLOG_REC_INSERT:
+		return("MLOG_REC_INSERT");
+
+	case MLOG_REC_CLUST_DELETE_MARK:
+		return("MLOG_REC_CLUST_DELETE_MARK");
+
+	case MLOG_REC_SEC_DELETE_MARK:
+		return("MLOG_REC_SEC_DELETE_MARK");
+
+	case MLOG_REC_UPDATE_IN_PLACE:
+		return("MLOG_REC_UPDATE_IN_PLACE");
+
+	case MLOG_REC_DELETE:
+		return("MLOG_REC_DELETE");
+
+	case MLOG_LIST_END_DELETE:
+		return("MLOG_LIST_END_DELETE");
+
+	case MLOG_LIST_START_DELETE:
+		return("MLOG_LIST_START_DELETE");
+
+	case MLOG_LIST_END_COPY_CREATED:
+		return("MLOG_LIST_END_COPY_CREATED");
+
+	case MLOG_PAGE_REORGANIZE:
+		return("MLOG_PAGE_REORGANIZE");
+
+	case MLOG_PAGE_CREATE:
+		return("MLOG_PAGE_CREATE");
+
+	case MLOG_UNDO_INSERT:
+		return("MLOG_UNDO_INSERT");
+
+	case MLOG_UNDO_ERASE_END:
+		return("MLOG_UNDO_ERASE_END");
+
+	case MLOG_UNDO_INIT:
+		return("MLOG_UNDO_INIT");
+
+	case MLOG_UNDO_HDR_DISCARD:
+		return("MLOG_UNDO_HDR_DISCARD");
+
+	case MLOG_UNDO_HDR_REUSE:
+		return("MLOG_UNDO_HDR_REUSE");
+
+	case MLOG_UNDO_HDR_CREATE:
+		return("MLOG_UNDO_HDR_CREATE");
+
+	case MLOG_REC_MIN_MARK:
+		return("MLOG_REC_MIN_MARK");
+
+	case MLOG_IBUF_BITMAP_INIT:
+		return("MLOG_IBUF_BITMAP_INIT");
+
+#ifdef UNIV_LOG_LSN_DEBUG
+	case MLOG_LSN:
+		return("MLOG_LSN");
+#endif /* UNIV_LOG_LSN_DEBUG */
+
+	case MLOG_INIT_FILE_PAGE:
+		return("MLOG_INIT_FILE_PAGE");
+
+	case MLOG_WRITE_STRING:
+		return("MLOG_WRITE_STRING");
+
+	case MLOG_MULTI_REC_END:
+		return("MLOG_MULTI_REC_END");
+
+	case MLOG_DUMMY_RECORD:
+		return("MLOG_DUMMY_RECORD");
+
+	case MLOG_FILE_DELETE:
+		return("MLOG_FILE_DELETE");
+
+	case MLOG_COMP_REC_MIN_MARK:
+		return("MLOG_COMP_REC_MIN_MARK");
+
+	case MLOG_COMP_PAGE_CREATE:
+		return("MLOG_COMP_PAGE_CREATE");
+
+	case MLOG_COMP_REC_INSERT:
+		return("MLOG_COMP_REC_INSERT");
+
+	case MLOG_COMP_REC_CLUST_DELETE_MARK:
+		return("MLOG_COMP_REC_CLUST_DELETE_MARK");
+
+	case MLOG_COMP_REC_SEC_DELETE_MARK:
+		return("MLOG_COMP_REC_SEC_DELETE_MARK");
+
+	case MLOG_COMP_REC_UPDATE_IN_PLACE:
+		return("MLOG_COMP_REC_UPDATE_IN_PLACE");
+
+	case MLOG_COMP_REC_DELETE:
+		return("MLOG_COMP_REC_DELETE");
+
+	case MLOG_COMP_LIST_END_DELETE:
+		return("MLOG_COMP_LIST_END_DELETE");
+
+	case MLOG_COMP_LIST_START_DELETE:
+		return("MLOG_COMP_LIST_START_DELETE");
+
+	case MLOG_COMP_LIST_END_COPY_CREATED:
+		return("MLOG_COMP_LIST_END_COPY_CREATED");
+
+	case MLOG_COMP_PAGE_REORGANIZE:
+		return("MLOG_COMP_PAGE_REORGANIZE");
+
+	case MLOG_FILE_CREATE2:
+		return("MLOG_FILE_CREATE2");
+
+	case MLOG_ZIP_WRITE_NODE_PTR:
+		return("MLOG_ZIP_WRITE_NODE_PTR");
+
+	case MLOG_ZIP_WRITE_BLOB_PTR:
+		return("MLOG_ZIP_WRITE_BLOB_PTR");
+
+	case MLOG_ZIP_WRITE_HEADER:
+		return("MLOG_ZIP_WRITE_HEADER");
+
+	case MLOG_ZIP_PAGE_COMPRESS:
+		return("MLOG_ZIP_PAGE_COMPRESS");
+
+	case MLOG_ZIP_PAGE_COMPRESS_NO_DATA:
+		return("MLOG_ZIP_PAGE_COMPRESS_NO_DATA");
+
+	case MLOG_ZIP_PAGE_REORGANIZE:
+		return("MLOG_ZIP_PAGE_REORGANIZE");
+
+	case MLOG_FILE_RENAME2:
+		return("MLOG_FILE_RENAME2");
+
+	case MLOG_FILE_NAME:
+		return("MLOG_FILE_NAME");
+
+	case MLOG_CHECKPOINT:
+		return("MLOG_CHECKPOINT");
+
+	case MLOG_PAGE_CREATE_RTREE:
+		return("MLOG_PAGE_CREATE_RTREE");
+
+	case MLOG_COMP_PAGE_CREATE_RTREE:
+		return("MLOG_COMP_PAGE_CREATE_RTREE");
+
+	case MLOG_INIT_FILE_PAGE2:
+		return("MLOG_INIT_FILE_PAGE2");
+
+	case MLOG_INDEX_LOAD:
+		return("MLOG_INDEX_LOAD");
+
+	case MLOG_TRUNCATE:
+		return("MLOG_TRUNCATE");
+
+	case MLOG_FILE_WRITE_CRYPT_DATA:
+		return("MLOG_FILE_WRITE_CRYPT_DATA");
+	}
+	DBUG_ASSERT(0);
+	return(NULL);
+}
+#endif /* !DBUG_OFF */
diff --git a/storage/innobase/mach/mach0data.cc b/storage/innobase/mach/mach0data.cc
index df68aab8a18..2d3e6730f75 100644
--- a/storage/innobase/mach/mach0data.cc
+++ b/storage/innobase/mach/mach0data.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -30,65 +30,65 @@ Created 11/28/1995 Heikki Tuuri
 #include "mach0data.ic"
 #endif
 
-/*********************************************************//**
-Reads a ulint in a compressed form if the log record fully contains it.
-@return	pointer to end of the stored field, NULL if not complete */
-UNIV_INTERN
-byte*
+/** Read a 32-bit integer in a compressed form.
+@param[in,out]	ptr	pointer to memory where to read;
+advanced by the number of bytes consumed, or set NULL if out of space
+@param[in]	end_ptr	end of the buffer
+@return unsigned value */
+ib_uint32_t
 mach_parse_compressed(
-/*==================*/
-	byte*	ptr,	/*!< in: pointer to buffer from where to read */
-	byte*	end_ptr,/*!< in: pointer to end of the buffer */
-	ulint*	val)	/*!< out: read value (< 2^32) */
+	const byte**	ptr,
+	const byte*	end_ptr)
 {
-	ulint	flag;
+	ulint	val;
 
-	ut_ad(ptr && end_ptr && val);
-
-	if (ptr >= end_ptr) {
-
-		return(NULL);
+	if (*ptr >= end_ptr) {
+		*ptr = NULL;
+		return(0);
 	}
 
-	flag = mach_read_from_1(ptr);
+	val = mach_read_from_1(*ptr);
 
-	if (flag < 0x80UL) {
-		*val = flag;
-		return(ptr + 1);
-
-	} else if (flag < 0xC0UL) {
-		if (end_ptr < ptr + 2) {
-			return(NULL);
+	if (val < 0x80) {
+		/* 0nnnnnnn (7 bits) */
+		++*ptr;
+		return(static_cast<ib_uint32_t>(val));
+	} else if (val < 0xC0) {
+		/* 10nnnnnn nnnnnnnn (14 bits) */
+		if (end_ptr >= *ptr + 2) {
+			val = mach_read_from_2(*ptr) & 0x3FFF;
+			ut_ad(val > 0x7F);
+			*ptr += 2;
+			return(static_cast<ib_uint32_t>(val));
 		}
-
-		*val = mach_read_from_2(ptr) & 0x7FFFUL;
-
-		return(ptr + 2);
-
-	} else if (flag < 0xE0UL) {
-		if (end_ptr < ptr + 3) {
-			return(NULL);
+	} else if (val < 0xE0) {
+		/* 110nnnnn nnnnnnnn nnnnnnnn (21 bits) */
+		if (end_ptr >= *ptr + 3) {
+			val = mach_read_from_3(*ptr) & 0x1FFFFF;
+			ut_ad(val > 0x3FFF);
+			*ptr += 3;
+			return(static_cast<ib_uint32_t>(val));
 		}
-
-		*val = mach_read_from_3(ptr) & 0x3FFFFFUL;
-
-		return(ptr + 3);
-	} else if (flag < 0xF0UL) {
-		if (end_ptr < ptr + 4) {
-			return(NULL);
+	} else if (val < 0xF0) {
+		/* 1110nnnn nnnnnnnn nnnnnnnn nnnnnnnn (28 bits) */
+		if (end_ptr >= *ptr + 4) {
+			val = mach_read_from_4(*ptr) & 0xFFFFFFF;
+			ut_ad(val > 0x1FFFFF);
+			*ptr += 4;
+			return(static_cast<ib_uint32_t>(val));
 		}
-
-		*val = mach_read_from_4(ptr) & 0x1FFFFFFFUL;
-
-		return(ptr + 4);
 	} else {
-		ut_ad(flag == 0xF0UL);
+		ut_ad(val == 0xF0);
 
-		if (end_ptr < ptr + 5) {
-			return(NULL);
+		/* 11110000 nnnnnnnn nnnnnnnn nnnnnnnn nnnnnnnn (32 bits) */
+		if (end_ptr >= *ptr + 5) {
+			val = mach_read_from_4(*ptr + 1);
+			ut_ad(val > 0xFFFFFFF);
+			*ptr += 5;
+			return(static_cast<ib_uint32_t>(val));
 		}
-
-		*val = mach_read_from_4(ptr + 1);
-		return(ptr + 5);
 	}
+
+	*ptr = NULL;
+	return(0);
 }
diff --git a/storage/innobase/mem/mem0dbg.cc b/storage/innobase/mem/mem0dbg.cc
deleted file mode 100644
index a77785a369a..00000000000
--- a/storage/innobase/mem/mem0dbg.cc
+++ /dev/null
@@ -1,1050 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file mem/mem0dbg.cc
-The memory management: the debug code. This is not a compilation module,
-but is included in mem0mem.* !
-
-Created 6/9/1994 Heikki Tuuri
-*************************************************************************/
-
-#ifdef UNIV_MEM_DEBUG
-# ifndef UNIV_HOTBACKUP
-#  include "ha_prototypes.h"
-/* The mutex which protects in the debug version the hash table
-containing the list of live memory heaps, and also the global
-variables below. */
-UNIV_INTERN ib_mutex_t		mem_hash_mutex;
-
-#ifdef UNIV_PFS_MUTEX
-/* Key to register mem_hash_mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t	mem_hash_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
-
-# endif /* !UNIV_HOTBACKUP */
-
-/* The following variables contain information about the
-extent of memory allocations. Only used in the debug version.
-Protected by mem_hash_mutex above. */
-
-static ulint		mem_n_created_heaps		= 0;
-static ulint		mem_n_allocations		= 0;
-static ulint		mem_total_allocated_memory	= 0;
-UNIV_INTERN ulint	mem_current_allocated_memory	= 0;
-static ulint		mem_max_allocated_memory	= 0;
-# ifndef UNIV_HOTBACKUP
-static ulint		mem_last_print_info		= 0;
-static ibool		mem_hash_initialized		= FALSE;
-# endif /* !UNIV_HOTBACKUP */
-
-/* Size of the hash table for memory management tracking */
-#define	MEM_HASH_SIZE	997
-
-/* The node of the list containing currently allocated memory heaps */
-
-struct mem_hash_node_t {
-	UT_LIST_NODE_T(mem_hash_node_t)
-				list;	/*!< hash list node */
-	mem_heap_t*		heap;	/*!< memory heap */
-	const char*		file_name;/* file where heap was created*/
-	ulint			line;	/*!< file line of creation */
-	ulint			nth_heap;/* this is the nth heap created */
-	UT_LIST_NODE_T(mem_hash_node_t)
-				all_list;/* list of all created heaps */
-};
-
-typedef UT_LIST_BASE_NODE_T(mem_hash_node_t) mem_hash_cell_t;
-
-/* The hash table of allocated heaps */
-static mem_hash_cell_t		mem_hash_table[MEM_HASH_SIZE];
-
-/* The base node of the list of all allocated heaps */
-static mem_hash_cell_t		mem_all_list_base;
-
-
-
-UNIV_INLINE
-mem_hash_cell_t*
-mem_hash_get_nth_cell(ulint i);
-
-/* Accessor function for the hash table. Returns a pointer to the
-table cell. */
-UNIV_INLINE
-mem_hash_cell_t*
-mem_hash_get_nth_cell(ulint i)
-{
-	ut_a(i < MEM_HASH_SIZE);
-
-	return(&(mem_hash_table[i]));
-}
-
-/* Accessor functions for a memory field in the debug version */
-UNIV_INTERN
-void
-mem_field_header_set_len(byte* field, ulint len)
-{
-	mach_write_to_4(field - 2 * sizeof(ulint), len);
-}
-
-UNIV_INTERN
-ulint
-mem_field_header_get_len(byte* field)
-{
-	return(mach_read_from_4(field - 2 * sizeof(ulint)));
-}
-
-UNIV_INTERN
-void
-mem_field_header_set_check(byte* field, ulint check)
-{
-	mach_write_to_4(field - sizeof(ulint), check);
-}
-
-UNIV_INTERN
-ulint
-mem_field_header_get_check(byte* field)
-{
-	return(mach_read_from_4(field - sizeof(ulint)));
-}
-
-UNIV_INTERN
-void
-mem_field_trailer_set_check(byte* field, ulint check)
-{
-	mach_write_to_4(field + mem_field_header_get_len(field), check);
-}
-
-UNIV_INTERN
-ulint
-mem_field_trailer_get_check(byte* field)
-{
-	return(mach_read_from_4(field
-				+ mem_field_header_get_len(field)));
-}
-#endif /* UNIV_MEM_DEBUG */
-
-#ifndef UNIV_HOTBACKUP
-/******************************************************************//**
-Initializes the memory system. */
-UNIV_INTERN
-void
-mem_init(
-/*=====*/
-	ulint	size)	/*!< in: common pool size in bytes */
-{
-#ifdef UNIV_MEM_DEBUG
-
-	ulint	i;
-
-	/* Initialize the hash table */
-	ut_a(FALSE == mem_hash_initialized);
-
-	mutex_create(mem_hash_mutex_key, &mem_hash_mutex, SYNC_MEM_HASH);
-
-	for (i = 0; i < MEM_HASH_SIZE; i++) {
-		UT_LIST_INIT(*mem_hash_get_nth_cell(i));
-	}
-
-	UT_LIST_INIT(mem_all_list_base);
-
-	mem_hash_initialized = TRUE;
-#endif
-
-	if (UNIV_LIKELY(srv_use_sys_malloc)) {
-		/* When innodb_use_sys_malloc is set, the
-		mem_comm_pool won't be used for any allocations.  We
-		create a dummy mem_comm_pool, because some statistics
-		and debugging code relies on it being initialized. */
-		size = 1;
-	}
-
-	mem_comm_pool = mem_pool_create(size);
-}
-
-/******************************************************************//**
-Closes the memory system. */
-UNIV_INTERN
-void
-mem_close(void)
-/*===========*/
-{
-	mem_pool_free(mem_comm_pool);
-	mem_comm_pool = NULL;
-#ifdef UNIV_MEM_DEBUG
-	mutex_free(&mem_hash_mutex);
-	mem_hash_initialized = FALSE;
-#endif /* UNIV_MEM_DEBUG */
-}
-#endif /* !UNIV_HOTBACKUP */
-
-#ifdef UNIV_MEM_DEBUG
-/******************************************************************//**
-Initializes an allocated memory field in the debug version. */
-UNIV_INTERN
-void
-mem_field_init(
-/*===========*/
-	byte*	buf,	/*!< in: memory field */
-	ulint	n)	/*!< in: how many bytes the user requested */
-{
-	ulint	rnd;
-	byte*	usr_buf;
-
-	usr_buf = buf + MEM_FIELD_HEADER_SIZE;
-
-	/* In the debug version write the length field and the
-	check fields to the start and the end of the allocated storage.
-	The field header consists of a length field and
-	a random number field, in this order. The field trailer contains
-	the same random number as a check field. */
-
-	mem_field_header_set_len(usr_buf, n);
-
-	rnd = ut_rnd_gen_ulint();
-
-	mem_field_header_set_check(usr_buf, rnd);
-	mem_field_trailer_set_check(usr_buf, rnd);
-
-	/* Update the memory allocation information */
-
-	mutex_enter(&mem_hash_mutex);
-
-	mem_total_allocated_memory += n;
-	mem_current_allocated_memory += n;
-	mem_n_allocations++;
-
-	if (mem_current_allocated_memory > mem_max_allocated_memory) {
-		mem_max_allocated_memory = mem_current_allocated_memory;
-	}
-
-	mutex_exit(&mem_hash_mutex);
-
-	/* In the debug version set the buffer to a random
-	combination of 0xBA and 0xBE */
-
-	mem_init_buf(usr_buf, n);
-}
-
-/******************************************************************//**
-Erases an allocated memory field in the debug version. */
-UNIV_INTERN
-void
-mem_field_erase(
-/*============*/
-	byte*	buf,	/*!< in: memory field */
-	ulint	n MY_ATTRIBUTE((unused)))
-			/*!< in: how many bytes the user requested */
-{
-	byte*	usr_buf;
-
-	usr_buf = buf + MEM_FIELD_HEADER_SIZE;
-
-	mutex_enter(&mem_hash_mutex);
-	mem_current_allocated_memory	-= n;
-	mutex_exit(&mem_hash_mutex);
-
-	/* Check that the field lengths agree */
-	ut_ad(n == (ulint) mem_field_header_get_len(usr_buf));
-
-	/* In the debug version, set the freed space to a random
-	combination of 0xDE and 0xAD */
-
-	mem_erase_buf(buf, MEM_SPACE_NEEDED(n));
-}
-
-/***************************************************************//**
-Initializes a buffer to a random combination of hex BA and BE.
-Used to initialize allocated memory. */
-UNIV_INTERN
-void
-mem_init_buf(
-/*=========*/
-	byte*	buf,	/*!< in: pointer to buffer */
-	ulint	 n)	/*!< in: length of buffer */
-{
-	byte*	ptr;
-
-	UNIV_MEM_ASSERT_W(buf, n);
-
-	for (ptr = buf; ptr < buf + n; ptr++) {
-
-		if (ut_rnd_gen_ibool()) {
-			*ptr = 0xBA;
-		} else {
-			*ptr = 0xBE;
-		}
-	}
-
-	UNIV_MEM_INVALID(buf, n);
-}
-
-/***************************************************************//**
-Initializes a buffer to a random combination of hex DE and AD.
-Used to erase freed memory. */
-UNIV_INTERN
-void
-mem_erase_buf(
-/*==========*/
-	byte*	buf,	/*!< in: pointer to buffer */
-	ulint	n)	/*!< in: length of buffer */
-{
-	byte*	ptr;
-
-	UNIV_MEM_ASSERT_W(buf, n);
-
-	for (ptr = buf; ptr < buf + n; ptr++) {
-		if (ut_rnd_gen_ibool()) {
-			*ptr = 0xDE;
-		} else {
-			*ptr = 0xAD;
-		}
-	}
-
-	UNIV_MEM_FREE(buf, n);
-}
-
-/***************************************************************//**
-Inserts a created memory heap to the hash table of current allocated
-memory heaps. */
-UNIV_INTERN
-void
-mem_hash_insert(
-/*============*/
-	mem_heap_t*	heap,	   /*!< in: the created heap */
-	const char*	file_name, /*!< in: file name of creation */
-	ulint		line)	   /*!< in: line where created */
-{
-	mem_hash_node_t*	new_node;
-	ulint			cell_no	;
-
-	ut_ad(mem_heap_check(heap));
-
-	mutex_enter(&mem_hash_mutex);
-
-	cell_no = ut_hash_ulint((ulint) heap, MEM_HASH_SIZE);
-
-	/* Allocate a new node to the list */
-	new_node = static_cast<mem_hash_node_t*>(ut_malloc(sizeof(*new_node)));
-
-	new_node->heap = heap;
-	new_node->file_name = file_name;
-	new_node->line = line;
-	new_node->nth_heap = mem_n_created_heaps;
-
-	/* Insert into lists */
-	UT_LIST_ADD_FIRST(list, *mem_hash_get_nth_cell(cell_no), new_node);
-
-	UT_LIST_ADD_LAST(all_list, mem_all_list_base, new_node);
-
-	mem_n_created_heaps++;
-
-	mutex_exit(&mem_hash_mutex);
-}
-
-/***************************************************************//**
-Removes a memory heap (which is going to be freed by the caller)
-from the list of live memory heaps. Returns the size of the heap
-in terms of how much memory in bytes was allocated for the user of
-the heap (not the total space occupied by the heap).
-Also validates the heap.
-NOTE: This function does not free the storage occupied by the
-heap itself, only the node in the list of heaps. */
-UNIV_INTERN
-void
-mem_hash_remove(
-/*============*/
-	mem_heap_t*	heap,	   /*!< in: the heap to be freed */
-	const char*	file_name, /*!< in: file name of freeing */
-	ulint		line)	   /*!< in: line where freed */
-{
-	mem_hash_node_t*	node;
-	ulint			cell_no;
-	ibool			error;
-	ulint			size;
-
-	ut_ad(mem_heap_check(heap));
-
-	mutex_enter(&mem_hash_mutex);
-
-	cell_no = ut_hash_ulint((ulint) heap, MEM_HASH_SIZE);
-
-	/* Look for the heap in the hash table list */
-	node = UT_LIST_GET_FIRST(*mem_hash_get_nth_cell(cell_no));
-
-	while (node != NULL) {
-		if (node->heap == heap) {
-
-			break;
-		}
-
-		node = UT_LIST_GET_NEXT(list, node);
-	}
-
-	if (node == NULL) {
-		fprintf(stderr,
-			"Memory heap or buffer freed in %s line %lu"
-			" did not exist.\n",
-			innobase_basename(file_name), (ulong) line);
-		ut_error;
-	}
-
-	/* Remove from lists */
-	UT_LIST_REMOVE(list, *mem_hash_get_nth_cell(cell_no), node);
-
-	UT_LIST_REMOVE(all_list, mem_all_list_base, node);
-
-	/* Validate the heap which will be freed */
-	mem_heap_validate_or_print(node->heap, NULL, FALSE, &error, &size,
-				   NULL, NULL);
-	if (error) {
-		fprintf(stderr,
-			"Inconsistency in memory heap or"
-			" buffer n:o %lu created\n"
-			"in %s line %lu and tried to free in %s line %lu.\n"
-			"Hex dump of 400 bytes around memory heap"
-			" first block start:\n",
-			node->nth_heap,
-			innobase_basename(node->file_name), (ulong) node->line,
-			innobase_basename(file_name), (ulong) line);
-		ut_print_buf(stderr, (byte*) node->heap - 200, 400);
-		fputs("\nDump of the mem heap:\n", stderr);
-		mem_heap_validate_or_print(node->heap, NULL, TRUE, &error,
-					   &size, NULL, NULL);
-		ut_error;
-	}
-
-	/* Free the memory occupied by the node struct */
-	ut_free(node);
-
-	mem_current_allocated_memory -= size;
-
-	mutex_exit(&mem_hash_mutex);
-}
-#endif /* UNIV_MEM_DEBUG */
-
-#if defined UNIV_MEM_DEBUG || defined UNIV_DEBUG
-/***************************************************************//**
-Checks a memory heap for consistency and prints the contents if requested.
-Outputs the sum of sizes of buffers given to the user (only in
-the debug version), the physical size of the heap and the number of
-blocks in the heap. In case of error returns 0 as sizes and number
-of blocks. */
-UNIV_INTERN
-void
-mem_heap_validate_or_print(
-/*=======================*/
-	mem_heap_t*	heap,	/*!< in: memory heap */
-	byte*		top MY_ATTRIBUTE((unused)),
-				/*!< in: calculate and validate only until
-				this top pointer in the heap is reached,
-				if this pointer is NULL, ignored */
-	ibool		print,	/*!< in: if TRUE, prints the contents
-				of the heap; works only in
-				the debug version */
-	ibool*		error,	/*!< out: TRUE if error */
-	ulint*		us_size,/*!< out: allocated memory
-				(for the user) in the heap,
-				if a NULL pointer is passed as this
-				argument, it is ignored; in the
-				non-debug version this is always -1 */
-	ulint*		ph_size,/*!< out: physical size of the heap,
-				if a NULL pointer is passed as this
-				argument, it is ignored */
-	ulint*		n_blocks) /*!< out: number of blocks in the heap,
-				if a NULL pointer is passed as this
-				argument, it is ignored */
-{
-	mem_block_t*	block;
-	ulint		total_len	= 0;
-	ulint		block_count	= 0;
-	ulint		phys_len	= 0;
-#ifdef UNIV_MEM_DEBUG
-	ulint		len;
-	byte*		field;
-	byte*		user_field;
-	ulint		check_field;
-#endif
-
-	/* Pessimistically, we set the parameters to error values */
-	if (us_size != NULL) {
-		*us_size = 0;
-	}
-	if (ph_size != NULL) {
-		*ph_size = 0;
-	}
-	if (n_blocks != NULL) {
-		*n_blocks = 0;
-	}
-	*error = TRUE;
-
-	block = heap;
-
-	if (block->magic_n != MEM_BLOCK_MAGIC_N) {
-		return;
-	}
-
-	if (print) {
-		fputs("Memory heap:", stderr);
-	}
-
-	while (block != NULL) {
-		phys_len += mem_block_get_len(block);
-
-		if ((block->type == MEM_HEAP_BUFFER)
-		    && (mem_block_get_len(block) > UNIV_PAGE_SIZE)) {
-
-			fprintf(stderr,
-				"InnoDB: Error: mem block %p"
-				" length %lu > UNIV_PAGE_SIZE\n",
-				(void*) block,
-				(ulong) mem_block_get_len(block));
-			/* error */
-
-			return;
-		}
-
-#ifdef UNIV_MEM_DEBUG
-		/* We can trace the fields of the block only in the debug
-		version */
-		if (print) {
-			fprintf(stderr, " Block %ld:", block_count);
-		}
-
-		field = (byte*) block + mem_block_get_start(block);
-
-		if (top && (field == top)) {
-
-			goto completed;
-		}
-
-		while (field < (byte*) block + mem_block_get_free(block)) {
-
-			/* Calculate the pointer to the storage
-			which was given to the user */
-
-			user_field = field + MEM_FIELD_HEADER_SIZE;
-
-			len = mem_field_header_get_len(user_field);
-
-			if (print) {
-				ut_print_buf(stderr, user_field, len);
-				putc('\n', stderr);
-			}
-
-			total_len += len;
-			check_field = mem_field_header_get_check(user_field);
-
-			if (check_field
-			    != mem_field_trailer_get_check(user_field)) {
-				/* error */
-
-				fprintf(stderr,
-					"InnoDB: Error: block %lx mem"
-					" field %lx len %lu\n"
-					"InnoDB: header check field is"
-					" %lx but trailer %lx\n",
-					(ulint) block,
-					(ulint) field, len, check_field,
-					mem_field_trailer_get_check(
-						user_field));
-
-				return;
-			}
-
-			/* Move to next field */
-			field = field + MEM_SPACE_NEEDED(len);
-
-			if (top && (field == top)) {
-
-				goto completed;
-			}
-
-		}
-
-		/* At the end check that we have arrived to the first free
-		position */
-
-		if (field != (byte*) block + mem_block_get_free(block)) {
-			/* error */
-
-			fprintf(stderr,
-				"InnoDB: Error: block %lx end of"
-				" mem fields %lx\n"
-				"InnoDB: but block free at %lx\n",
-				(ulint) block, (ulint) field,
-				(ulint)((byte*) block
-					+ mem_block_get_free(block)));
-
-			return;
-		}
-
-#endif
-
-		block = UT_LIST_GET_NEXT(list, block);
-		block_count++;
-	}
-#ifdef UNIV_MEM_DEBUG
-completed:
-#endif
-	if (us_size != NULL) {
-		*us_size = total_len;
-	}
-	if (ph_size != NULL) {
-		*ph_size = phys_len;
-	}
-	if (n_blocks != NULL) {
-		*n_blocks = block_count;
-	}
-	*error = FALSE;
-}
-
-/**************************************************************//**
-Prints the contents of a memory heap. */
-static
-void
-mem_heap_print(
-/*===========*/
-	mem_heap_t*	heap)	/*!< in: memory heap */
-{
-	ibool	error;
-	ulint	us_size;
-	ulint	phys_size;
-	ulint	n_blocks;
-
-	ut_ad(mem_heap_check(heap));
-
-	mem_heap_validate_or_print(heap, NULL, TRUE, &error,
-				   &us_size, &phys_size, &n_blocks);
-	fprintf(stderr,
-		"\nheap type: %lu; size: user size %lu;"
-		" physical size %lu; blocks %lu.\n",
-		(ulong) heap->type, (ulong) us_size,
-		(ulong) phys_size, (ulong) n_blocks);
-	ut_a(!error);
-}
-
-/**************************************************************//**
-Validates the contents of a memory heap.
-@return	TRUE if ok */
-UNIV_INTERN
-ibool
-mem_heap_validate(
-/*==============*/
-	mem_heap_t*	heap)	/*!< in: memory heap */
-{
-	ibool	error;
-	ulint	us_size;
-	ulint	phys_size;
-	ulint	n_blocks;
-
-	ut_ad(mem_heap_check(heap));
-
-	mem_heap_validate_or_print(heap, NULL, FALSE, &error, &us_size,
-				   &phys_size, &n_blocks);
-	if (error) {
-		mem_heap_print(heap);
-	}
-
-	ut_a(!error);
-
-	return(TRUE);
-}
-#endif /* UNIV_MEM_DEBUG || UNIV_DEBUG */
-
-#ifdef UNIV_DEBUG
-/**************************************************************//**
-Checks that an object is a memory heap (or a block of it).
-@return	TRUE if ok */
-UNIV_INTERN
-ibool
-mem_heap_check(
-/*===========*/
-	mem_heap_t*	heap)	/*!< in: memory heap */
-{
-	ut_a(heap->magic_n == MEM_BLOCK_MAGIC_N);
-
-	return(TRUE);
-}
-#endif /* UNIV_DEBUG */
-
-#ifdef UNIV_MEM_DEBUG
-/*****************************************************************//**
-TRUE if no memory is currently allocated.
-@return	TRUE if no heaps exist */
-UNIV_INTERN
-ibool
-mem_all_freed(void)
-/*===============*/
-{
-	mem_hash_node_t*	node;
-	ulint			heap_count	= 0;
-	ulint			i;
-
-	mem_validate();
-
-	mutex_enter(&mem_hash_mutex);
-
-	for (i = 0; i < MEM_HASH_SIZE; i++) {
-
-		node = UT_LIST_GET_FIRST(*mem_hash_get_nth_cell(i));
-		while (node != NULL) {
-			heap_count++;
-			node = UT_LIST_GET_NEXT(list, node);
-		}
-	}
-
-	mutex_exit(&mem_hash_mutex);
-
-	if (heap_count == 0) {
-# ifndef UNIV_HOTBACKUP
-		ut_a(mem_pool_get_reserved(mem_comm_pool) == 0);
-# endif /* !UNIV_HOTBACKUP */
-
-		return(TRUE);
-	} else {
-		return(FALSE);
-	}
-}
-
-/*****************************************************************//**
-Validates the dynamic memory allocation system.
-@return	TRUE if error */
-UNIV_INTERN
-ibool
-mem_validate_no_assert(void)
-/*========================*/
-{
-	mem_hash_node_t*	node;
-	ulint			n_heaps			= 0;
-	ulint			allocated_mem;
-	ulint			ph_size;
-	ulint			total_allocated_mem	= 0;
-	ibool			error			= FALSE;
-	ulint			n_blocks;
-	ulint			i;
-
-# ifndef UNIV_HOTBACKUP
-	mem_pool_validate(mem_comm_pool);
-# endif /* !UNIV_HOTBACKUP */
-
-	mutex_enter(&mem_hash_mutex);
-
-	for (i = 0; i < MEM_HASH_SIZE; i++) {
-
-		node = UT_LIST_GET_FIRST(*mem_hash_get_nth_cell(i));
-
-		while (node != NULL) {
-			n_heaps++;
-
-			mem_heap_validate_or_print(node->heap, NULL,
-						   FALSE, &error,
-						   &allocated_mem,
-						   &ph_size, &n_blocks);
-
-			if (error) {
-				fprintf(stderr,
-					"\nERROR!!!!!!!!!!!!!!!!!!!"
-					"!!!!!!!!!!!!!!!!!!!!!!!\n\n"
-					"Inconsistency in memory heap"
-					" or buffer created\n"
-					"in %s line %lu.\n",
-					innobase_basename(node->file_name),
-					node->line);
-
-				mutex_exit(&mem_hash_mutex);
-
-				return(TRUE);
-			}
-
-			total_allocated_mem += allocated_mem;
-			node = UT_LIST_GET_NEXT(list, node);
-		}
-	}
-
-	if ((n_heaps == 0) && (mem_current_allocated_memory != 0)) {
-		error = TRUE;
-	}
-
-	if (mem_total_allocated_memory < mem_current_allocated_memory) {
-		error = TRUE;
-	}
-
-	if (mem_max_allocated_memory > mem_total_allocated_memory) {
-		error = TRUE;
-	}
-
-	if (mem_n_created_heaps < n_heaps) {
-		error = TRUE;
-	}
-
-	mutex_exit(&mem_hash_mutex);
-
-	return(error);
-}
-
-/************************************************************//**
-Validates the dynamic memory
-@return	TRUE if ok */
-UNIV_INTERN
-ibool
-mem_validate(void)
-/*==============*/
-{
-	ut_a(!mem_validate_no_assert());
-
-	return(TRUE);
-}
-#endif /* UNIV_MEM_DEBUG */
-
-/************************************************************//**
-Tries to find neigboring memory allocation blocks and dumps to stderr
-the neighborhood of a given pointer. */
-UNIV_INTERN
-void
-mem_analyze_corruption(
-/*===================*/
-	void*	ptr)	/*!< in: pointer to place of possible corruption */
-{
-	byte*	p;
-	ulint	i;
-	ulint	dist;
-
-	fputs("InnoDB: Apparent memory corruption: mem dump ", stderr);
-	ut_print_buf(stderr, (byte*) ptr - 250, 500);
-
-	fputs("\nInnoDB: Scanning backward trying to find"
-	      " previous allocated mem blocks\n", stderr);
-
-	p = (byte*) ptr;
-	dist = 0;
-
-	for (i = 0; i < 10; i++) {
-		for (;;) {
-			if (((ulint) p) % 4 == 0) {
-
-				if (*((ulint*) p) == MEM_BLOCK_MAGIC_N) {
-					fprintf(stderr,
-						"Mem block at - %lu,"
-						" file %s, line %lu\n",
-						(ulong) dist,
-						(p + sizeof(ulint)),
-						(ulong)
-						(*(ulint*)(p + 8
-							   + sizeof(ulint))));
-
-					break;
-				}
-
-				if (*((ulint*) p) == MEM_FREED_BLOCK_MAGIC_N) {
-					fprintf(stderr,
-						"Freed mem block at - %lu,"
-						" file %s, line %lu\n",
-						(ulong) dist,
-						(p + sizeof(ulint)),
-						(ulong)
-						(*(ulint*)(p + 8
-							   + sizeof(ulint))));
-
-					break;
-				}
-			}
-
-			p--;
-			dist++;
-		}
-
-		p--;
-		dist++;
-	}
-
-	fprintf(stderr,
-		"InnoDB: Scanning forward trying to find next"
-		" allocated mem blocks\n");
-
-	p = (byte*) ptr;
-	dist = 0;
-
-	for (i = 0; i < 10; i++) {
-		for (;;) {
-			if (((ulint) p) % 4 == 0) {
-
-				if (*((ulint*) p) == MEM_BLOCK_MAGIC_N) {
-					fprintf(stderr,
-						"Mem block at + %lu, file %s,"
-						" line %lu\n",
-						(ulong) dist,
-						(p + sizeof(ulint)),
-						(ulong)
-						(*(ulint*)(p + 8
-							   + sizeof(ulint))));
-
-					break;
-				}
-
-				if (*((ulint*) p) == MEM_FREED_BLOCK_MAGIC_N) {
-					fprintf(stderr,
-						"Freed mem block at + %lu,"
-						" file %s, line %lu\n",
-						(ulong) dist,
-						(p + sizeof(ulint)),
-						(ulong)
-						(*(ulint*)(p + 8
-							   + sizeof(ulint))));
-
-					break;
-				}
-			}
-
-			p++;
-			dist++;
-		}
-
-		p++;
-		dist++;
-	}
-}
-
-#ifndef UNIV_HOTBACKUP
-/*****************************************************************//**
-Prints information of dynamic memory usage and currently allocated
-memory heaps or buffers. Can only be used in the debug version. */
-static
-void
-mem_print_info_low(
-/*===============*/
-	ibool	print_all)	/*!< in: if TRUE, all heaps are printed,
-				else only the heaps allocated after the
-				previous call of this function */
-{
-#ifdef UNIV_MEM_DEBUG
-	mem_hash_node_t*	node;
-	ulint			n_heaps			= 0;
-	ulint			allocated_mem;
-	ulint			ph_size;
-	ulint			total_allocated_mem	= 0;
-	ibool			error;
-	ulint			n_blocks;
-#endif
-	FILE*			outfile;
-
-	/* outfile = fopen("ibdebug", "a"); */
-
-	outfile = stdout;
-
-	fprintf(outfile, "\n");
-	fprintf(outfile,
-		"________________________________________________________\n");
-	fprintf(outfile, "MEMORY ALLOCATION INFORMATION\n\n");
-
-#ifndef UNIV_MEM_DEBUG
-
-	UT_NOT_USED(print_all);
-
-	mem_pool_print_info(outfile, mem_comm_pool);
-
-	fprintf(outfile,
-		"Sorry, non-debug version cannot give more memory info\n");
-
-	/* fclose(outfile); */
-
-	return;
-#else
-	mutex_enter(&mem_hash_mutex);
-
-	fprintf(outfile, "LIST OF CREATED HEAPS AND ALLOCATED BUFFERS: \n\n");
-
-	if (!print_all) {
-		fprintf(outfile, "AFTER THE LAST PRINT INFO\n");
-	}
-
-	node = UT_LIST_GET_FIRST(mem_all_list_base);
-
-	while (node != NULL) {
-		n_heaps++;
-
-		if (!print_all && node->nth_heap < mem_last_print_info) {
-
-			goto next_heap;
-		}
-
-		mem_heap_validate_or_print(node->heap, NULL,
-					   FALSE, &error, &allocated_mem,
-					   &ph_size, &n_blocks);
-		total_allocated_mem += allocated_mem;
-
-		fprintf(outfile,
-			"%lu: file %s line %lu of size %lu phys.size %lu"
-			" with %lu blocks, type %lu\n",
-			node->nth_heap,
-			innobase_basename(node->file_name), node->line,
-			allocated_mem, ph_size, n_blocks,
-			(node->heap)->type);
-next_heap:
-		node = UT_LIST_GET_NEXT(all_list, node);
-	}
-
-	fprintf(outfile, "\n");
-
-	fprintf(outfile, "Current allocated memory              : %lu\n",
-		mem_current_allocated_memory);
-	fprintf(outfile, "Current allocated heaps and buffers   : %lu\n",
-		n_heaps);
-	fprintf(outfile, "Cumulative allocated memory           : %lu\n",
-		mem_total_allocated_memory);
-	fprintf(outfile, "Maximum allocated memory              : %lu\n",
-		mem_max_allocated_memory);
-	fprintf(outfile, "Cumulative created heaps and buffers  : %lu\n",
-		mem_n_created_heaps);
-	fprintf(outfile, "Cumulative number of allocations      : %lu\n",
-		mem_n_allocations);
-
-	mem_last_print_info = mem_n_created_heaps;
-
-	mutex_exit(&mem_hash_mutex);
-
-	mem_pool_print_info(outfile, mem_comm_pool);
-
-	/*	mem_validate(); */
-
-	/*	fclose(outfile); */
-#endif
-}
-
-/*****************************************************************//**
-Prints information of dynamic memory usage and currently allocated memory
-heaps or buffers. Can only be used in the debug version. */
-UNIV_INTERN
-void
-mem_print_info(void)
-/*================*/
-{
-	mem_print_info_low(TRUE);
-}
-
-/*****************************************************************//**
-Prints information of dynamic memory usage and currently allocated memory
-heaps or buffers since the last ..._print_info or..._print_new_info. */
-UNIV_INTERN
-void
-mem_print_new_info(void)
-/*====================*/
-{
-	mem_print_info_low(FALSE);
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/mem/mem0mem.cc b/storage/innobase/mem/mem0mem.cc
index e066aff5b30..41292ccf842 100644
--- a/storage/innobase/mem/mem0mem.cc
+++ b/storage/innobase/mem/mem0mem.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -23,6 +23,8 @@ The memory management
 Created 6/9/1994 Heikki Tuuri
 *************************************************************************/
 
+#include "ha_prototypes.h"
+
 #include "mem0mem.h"
 #ifdef UNIV_NONINL
 #include "mem0mem.ic"
@@ -30,91 +32,23 @@ Created 6/9/1994 Heikki Tuuri
 
 #include "buf0buf.h"
 #include "srv0srv.h"
-#include "mem0dbg.cc"
 #include <stdarg.h>
 
-/*
-			THE MEMORY MANAGEMENT
-			=====================
-
-The basic element of the memory management is called a memory
-heap. A memory heap is conceptually a
-stack from which memory can be allocated. The stack may grow infinitely.
-The top element of the stack may be freed, or
-the whole stack can be freed at one time. The advantage of the
-memory heap concept is that we can avoid using the malloc and free
-functions of C which are quite expensive, for example, on the Solaris + GCC
-system (50 MHz Sparc, 1993) the pair takes 3 microseconds,
-on Win NT + 100MHz Pentium, 2.5 microseconds.
-When we use a memory heap,
-we can allocate larger blocks of memory at a time and thus
-reduce overhead. Slightly more efficient the method is when we
-allocate the memory from the index page buffer pool, as we can
-claim a new page fast. This is called buffer allocation.
-When we allocate the memory from the dynamic memory of the
-C environment, that is called dynamic allocation.
-
-The default way of operation of the memory heap is the following.
-First, when the heap is created, an initial block of memory is
-allocated. In dynamic allocation this may be about 50 bytes.
-If more space is needed, additional blocks are allocated
-and they are put into a linked list.
-After the initial block, each allocated block is twice the size of the
-previous, until a threshold is attained, after which the sizes
-of the blocks stay the same. An exception is, of course, the case
-where the caller requests a memory buffer whose size is
-bigger than the threshold. In that case a block big enough must
-be allocated.
-
-The heap is physically arranged so that if the current block
-becomes full, a new block is allocated and always inserted in the
-chain of blocks as the last block.
-
-In the debug version of the memory management, all the allocated
-heaps are kept in a list (which is implemented as a hash table).
-Thus we can notice if the caller tries to free an already freed
-heap. In addition, each buffer given to the caller contains
-start field at the start and a trailer field at the end of the buffer.
-
-The start field has the following content:
-A. sizeof(ulint) bytes of field length (in the standard byte order)
-B. sizeof(ulint) bytes of check field (a random number)
-
-The trailer field contains:
-A. sizeof(ulint) bytes of check field (the same random number as at the start)
-
-Thus we can notice if something has been copied over the
-borders of the buffer, which is illegal.
-The memory in the buffers is initialized to a random byte sequence.
-After freeing, all the blocks in the heap are set to random bytes
-to help us discover errors which result from the use of
-buffers in an already freed heap. */
-
-#ifdef MEM_PERIODIC_CHECK
-
-ibool					mem_block_list_inited;
-/* List of all mem blocks allocated; protected by the mem_comm_pool mutex */
-UT_LIST_BASE_NODE_T(mem_block_t)	mem_block_list;
-
-#endif
-
-/**********************************************************************//**
-Duplicates a NUL-terminated string, allocated from a memory heap.
-@return	own: a copy of the string */
-UNIV_INTERN
+/** Duplicates a NUL-terminated string, allocated from a memory heap.
+@param[in]	heap,	memory heap where string is allocated
+@param[in]	str)	string to be copied
+@return own: a copy of the string */
 char*
 mem_heap_strdup(
-/*============*/
-	mem_heap_t*	heap,	/*!< in: memory heap where string is allocated */
-	const char*	str)	/*!< in: string to be copied */
+	mem_heap_t*	heap,
+	const char*	str)
 {
 	return(static_cast<char*>(mem_heap_dup(heap, str, strlen(str) + 1)));
 }
 
 /**********************************************************************//**
 Duplicate a block of data, allocated from a memory heap.
-@return	own: a copy of the data */
-UNIV_INTERN
+@return own: a copy of the data */
 void*
 mem_heap_dup(
 /*=========*/
@@ -127,8 +61,7 @@ mem_heap_dup(
 
 /**********************************************************************//**
 Concatenate two strings and return the result, using a memory heap.
-@return	own: the result */
-UNIV_INTERN
+@return own: the result */
 char*
 mem_heap_strcat(
 /*============*/
@@ -153,7 +86,7 @@ mem_heap_strcat(
 
 /****************************************************************//**
 Helper function for mem_heap_printf.
-@return	length of formatted string, including terminating NUL */
+@return length of formatted string, including terminating NUL */
 static
 ulint
 mem_heap_printf_low(
@@ -265,8 +198,7 @@ A simple sprintf replacement that dynamically allocates the space for the
 formatted string from the given heap. This supports a very limited set of
 the printf syntax: types 's' and 'u' and length modifier 'l' (which is
 required for the 'u' type).
-@return	heap-allocated formatted string */
-UNIV_INTERN
+@return heap-allocated formatted string */
 char*
 mem_heap_printf(
 /*============*/
@@ -293,11 +225,45 @@ mem_heap_printf(
 	return(str);
 }
 
+#ifdef UNIV_DEBUG
+/** Validates the contents of a memory heap.
+Checks a memory heap for consistency, prints the contents if any error
+is detected. A fatal error is logged if an error is detected.
+@param[in]	heap	Memory heap to validate. */
+void
+mem_heap_validate(
+	const mem_heap_t*	heap)
+{
+	ulint	size = 0;
+
+	for (const mem_block_t* block = heap;
+		block != NULL;
+		block = UT_LIST_GET_NEXT(list, block)) {
+
+		mem_block_validate(block);
+
+		switch (block->type) {
+		case MEM_HEAP_DYNAMIC:
+			break;
+		case MEM_HEAP_BUFFER:
+		case MEM_HEAP_BUFFER | MEM_HEAP_BTR_SEARCH:
+			ut_ad(block->len <= UNIV_PAGE_SIZE);
+			break;
+		default:
+			ut_error;
+		}
+
+		size += block->len;
+	}
+
+	ut_ad(size == heap->total_size);
+}
+#endif /* UNIV_DEBUG */
+
 /***************************************************************//**
 Creates a memory heap block where data can be allocated.
 @return own: memory heap block, NULL if did not succeed (only possible
 for MEM_HEAP_BTR_SEARCH type heaps) */
-UNIV_INTERN
 mem_block_t*
 mem_heap_create_block_func(
 /*=======================*/
@@ -320,8 +286,9 @@ mem_heap_create_block_func(
 	ut_ad((type == MEM_HEAP_DYNAMIC) || (type == MEM_HEAP_BUFFER)
 	      || (type == MEM_HEAP_BUFFER + MEM_HEAP_BTR_SEARCH));
 
-	if (heap && heap->magic_n != MEM_BLOCK_MAGIC_N) {
-		mem_analyze_corruption(heap);
+	if (heap != NULL) {
+		mem_block_validate(heap);
+		ut_d(mem_heap_validate(heap));
 	}
 
 	/* In dynamic allocation, calculate the size: block header + data. */
@@ -332,8 +299,7 @@ mem_heap_create_block_func(
 
 		ut_ad(type == MEM_HEAP_DYNAMIC || n <= MEM_MAX_ALLOC_IN_BUF);
 
-		block = static_cast<mem_block_t*>(
-			mem_area_alloc(&len, mem_comm_pool));
+		block = static_cast<mem_block_t*>(ut_malloc_nokey(len));
 	} else {
 		len = UNIV_PAGE_SIZE;
 
@@ -356,16 +322,16 @@ mem_heap_create_block_func(
 		block = (mem_block_t*) buf_block->frame;
 	}
 
-	if(!block) {
-		ib_logf(IB_LOG_LEVEL_FATAL,
-			" InnoDB: Unable to allocate memory of size %lu.\n",
-			len);
+	if (block == NULL) {
+		ib::fatal() << "Unable to allocate memory of size "
+			<< len << ".";
 	}
+
 	block->buf_block = buf_block;
 	block->free_block = NULL;
 #else /* !UNIV_HOTBACKUP */
 	len = MEM_BLOCK_HEADER_SIZE + MEM_SPACE_NEEDED(n);
-	block = ut_malloc(len);
+	block = ut_malloc_nokey(len);
 	ut_ad(block);
 #endif /* !UNIV_HOTBACKUP */
 
@@ -374,18 +340,6 @@ mem_heap_create_block_func(
 			    sizeof(block->file_name)));
 	ut_d(block->line = line);
 
-#ifdef MEM_PERIODIC_CHECK
-	mutex_enter(&(mem_comm_pool->mutex));
-
-	if (!mem_block_list_inited) {
-		mem_block_list_inited = TRUE;
-		UT_LIST_INIT(mem_block_list);
-	}
-
-	UT_LIST_ADD_LAST(mem_block_list, mem_block_list, block);
-
-	mutex_exit(&(mem_comm_pool->mutex));
-#endif
 	mem_block_set_len(block, len);
 	mem_block_set_type(block, type);
 	mem_block_set_free(block, MEM_BLOCK_HEADER_SIZE);
@@ -414,7 +368,6 @@ mem_heap_create_block_func(
 Adds a new block to a memory heap.
 @return created block, NULL if did not succeed (only possible for
 MEM_HEAP_BTR_SEARCH type heaps) */
-UNIV_INTERN
 mem_block_t*
 mem_heap_add_block(
 /*===============*/
@@ -425,7 +378,7 @@ mem_heap_add_block(
 	mem_block_t*	new_block;
 	ulint		new_size;
 
-	ut_ad(mem_heap_check(heap));
+	ut_d(mem_block_validate(heap));
 
 	block = UT_LIST_GET_LAST(heap->base);
 
@@ -460,14 +413,13 @@ mem_heap_add_block(
 
 	/* Add the new block as the last block */
 
-	UT_LIST_INSERT_AFTER(list, heap->base, block, new_block);
+	UT_LIST_INSERT_AFTER(heap->base, block, new_block);
 
 	return(new_block);
 }
 
 /******************************************************************//**
 Frees a block from a memory heap. */
-UNIV_INTERN
 void
 mem_heap_block_free(
 /*================*/
@@ -482,19 +434,9 @@ mem_heap_block_free(
 	buf_block = static_cast<buf_block_t*>(block->buf_block);
 #endif /* !UNIV_HOTBACKUP */
 
-	if (block->magic_n != MEM_BLOCK_MAGIC_N) {
-		mem_analyze_corruption(block);
-	}
+	mem_block_validate(block);
 
-	UT_LIST_REMOVE(list, heap->base, block);
-
-#ifdef MEM_PERIODIC_CHECK
-	mutex_enter(&(mem_comm_pool->mutex));
-
-	UT_LIST_REMOVE(mem_block_list, mem_block_list, block);
-
-	mutex_exit(&(mem_comm_pool->mutex));
-#endif
+	UT_LIST_REMOVE(heap->base, block);
 
 	ut_ad(heap->total_size >= block->len);
 	heap->total_size -= block->len;
@@ -503,36 +445,19 @@ mem_heap_block_free(
 	len = block->len;
 	block->magic_n = MEM_FREED_BLOCK_MAGIC_N;
 
+	UNIV_MEM_ASSERT_W(block, len);
+
 #ifndef UNIV_HOTBACKUP
-	if (!srv_use_sys_malloc) {
-#ifdef UNIV_MEM_DEBUG
-		/* In the debug version we set the memory to a random
-		combination of hex 0xDE and 0xAD. */
-
-		mem_erase_buf((byte*) block, len);
-#else /* UNIV_MEM_DEBUG */
-		UNIV_MEM_ASSERT_AND_FREE(block, len);
-#endif /* UNIV_MEM_DEBUG */
-
-	}
 	if (type == MEM_HEAP_DYNAMIC || len < UNIV_PAGE_SIZE / 2) {
 
 		ut_ad(!buf_block);
-		mem_area_free(block, mem_comm_pool);
+		ut_free(block);
 	} else {
 		ut_ad(type & MEM_HEAP_BUFFER);
 
 		buf_block_free(buf_block);
 	}
 #else /* !UNIV_HOTBACKUP */
-#ifdef UNIV_MEM_DEBUG
-	/* In the debug version we set the memory to a random
-	combination of hex 0xDE and 0xAD. */
-
-	mem_erase_buf((byte*) block, len);
-#else /* UNIV_MEM_DEBUG */
-	UNIV_MEM_ASSERT_AND_FREE(block, len);
-#endif /* UNIV_MEM_DEBUG */
 	ut_free(block);
 #endif /* !UNIV_HOTBACKUP */
 }
@@ -540,7 +465,6 @@ mem_heap_block_free(
 #ifndef UNIV_HOTBACKUP
 /******************************************************************//**
 Frees the free_block field from a memory heap. */
-UNIV_INTERN
 void
 mem_heap_free_block_free(
 /*=====================*/
@@ -554,30 +478,3 @@ mem_heap_free_block_free(
 	}
 }
 #endif /* !UNIV_HOTBACKUP */
-
-#ifdef MEM_PERIODIC_CHECK
-/******************************************************************//**
-Goes through the list of all allocated mem blocks, checks their magic
-numbers, and reports possible corruption. */
-UNIV_INTERN
-void
-mem_validate_all_blocks(void)
-/*=========================*/
-{
-	mem_block_t*	block;
-
-	mutex_enter(&(mem_comm_pool->mutex));
-
-	block = UT_LIST_GET_FIRST(mem_block_list);
-
-	while (block) {
-		if (block->magic_n != MEM_BLOCK_MAGIC_N) {
-			mem_analyze_corruption(block);
-		}
-
-		block = UT_LIST_GET_NEXT(mem_block_list, block);
-	}
-
-	mutex_exit(&(mem_comm_pool->mutex));
-}
-#endif
diff --git a/storage/innobase/mem/mem0pool.cc b/storage/innobase/mem/mem0pool.cc
deleted file mode 100644
index fe9a84d21fa..00000000000
--- a/storage/innobase/mem/mem0pool.cc
+++ /dev/null
@@ -1,727 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file mem/mem0pool.cc
-The lowest-level memory management
-
-Created 5/12/1997 Heikki Tuuri
-*************************************************************************/
-
-#include "mem0pool.h"
-#ifdef UNIV_NONINL
-#include "mem0pool.ic"
-#endif
-
-#include "srv0srv.h"
-#include "sync0sync.h"
-#include "ut0mem.h"
-#include "ut0lst.h"
-#include "ut0byte.h"
-#include "mem0mem.h"
-#include "srv0start.h"
-
-/* We would like to use also the buffer frames to allocate memory. This
-would be desirable, because then the memory consumption of the database
-would be fixed, and we might even lock the buffer pool to the main memory.
-The problem here is that the buffer management routines can themselves call
-memory allocation, while the buffer pool mutex is reserved.
-
-The main components of the memory consumption are:
-
-1. buffer pool,
-2. parsed and optimized SQL statements,
-3. data dictionary cache,
-4. log buffer,
-5. locks for each transaction,
-6. hash table for the adaptive index,
-7. state and buffers for each SQL query currently being executed,
-8. session for each user, and
-9. stack for each OS thread.
-
-Items 1 and 2 are managed by an LRU algorithm. Items 5 and 6 can potentially
-consume very much memory. Items 7 and 8 should consume quite little memory,
-and the OS should take care of item 9, which too should consume little memory.
-
-A solution to the memory management:
-
-1. the buffer pool size is set separately;
-2. log buffer size is set separately;
-3. the common pool size for all the other entries, except 8, is set separately.
-
-Problems: we may waste memory if the common pool is set too big. Another
-problem is the locks, which may take very much space in big transactions.
-Then the shared pool size should be set very big. We can allow locks to take
-space from the buffer pool, but the SQL optimizer is then unaware of the
-usable size of the buffer pool. We could also combine the objects in the
-common pool and the buffers in the buffer pool into a single LRU list and
-manage it uniformly, but this approach does not take into account the parsing
-and other costs unique to SQL statements.
-
-The locks for a transaction can be seen as a part of the state of the
-transaction. Hence, they should be stored in the common pool. We still
-have the problem of a very big update transaction, for example, which
-will set very many x-locks on rows, and the locks will consume a lot
-of memory, say, half of the buffer pool size.
-
-Another problem is what to do if we are not able to malloc a requested
-block of memory from the common pool. Then we can request memory from
-the operating system. If it does not help, a system error results.
-
-Because 5 and 6 may potentially consume very much memory, we let them grow
-into the buffer pool. We may let the locks of a transaction take frames
-from the buffer pool, when the corresponding memory heap block has grown to
-the size of a buffer frame. Similarly for the hash node cells of the locks,
-and for the adaptive index. Thus, for each individual transaction, its locks
-can occupy at most about the size of the buffer frame of memory in the common
-pool, and after that its locks will grow into the buffer pool. */
-
-/** Mask used to extract the free bit from area->size */
-#define MEM_AREA_FREE	1
-
-/** The smallest memory area total size */
-#define MEM_AREA_MIN_SIZE	(2 * MEM_AREA_EXTRA_SIZE)
-
-
-/** Data structure for a memory pool. The space is allocated using the buddy
-algorithm, where free list i contains areas of size 2 to power i. */
-struct mem_pool_t{
-	byte*		buf;		/*!< memory pool */
-	ulint		size;		/*!< memory common pool size */
-	ulint		reserved;	/*!< amount of currently allocated
-					memory */
-	ib_mutex_t		mutex;		/*!< mutex protecting this struct */
-	UT_LIST_BASE_NODE_T(mem_area_t)
-			free_list[64];	/*!< lists of free memory areas: an
-					area is put to the list whose number
-					is the 2-logarithm of the area size */
-};
-
-/** The common memory pool */
-UNIV_INTERN mem_pool_t*	mem_comm_pool	= NULL;
-
-#ifdef UNIV_PFS_MUTEX
-/* Key to register mutex in mem_pool_t with performance schema */
-UNIV_INTERN mysql_pfs_key_t	mem_pool_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
-
-/* We use this counter to check that the mem pool mutex does not leak;
-this is to track a strange assertion failure reported at
-mysql@lists.mysql.com */
-
-UNIV_INTERN ulint	mem_n_threads_inside		= 0;
-
-/********************************************************************//**
-Reserves the mem pool mutex if we are not in server shutdown. Use
-this function only in memory free functions, since only memory
-free functions are used during server shutdown. */
-UNIV_INLINE
-void
-mem_pool_mutex_enter(
-/*=================*/
-	mem_pool_t*	pool)		/*!< in: memory pool */
-{
-	if (srv_shutdown_state < SRV_SHUTDOWN_EXIT_THREADS) {
-		mutex_enter(&(pool->mutex));
-	}
-}
-
-/********************************************************************//**
-Releases the mem pool mutex if we are not in server shutdown. As
-its corresponding mem_pool_mutex_enter() function, use it only
-in memory free functions */
-UNIV_INLINE
-void
-mem_pool_mutex_exit(
-/*================*/
-	mem_pool_t*	pool)		/*!< in: memory pool */
-{
-	if (srv_shutdown_state < SRV_SHUTDOWN_EXIT_THREADS) {
-		mutex_exit(&(pool->mutex));
-	}
-}
-
-/********************************************************************//**
-Returns memory area size.
-@return	size */
-UNIV_INLINE
-ulint
-mem_area_get_size(
-/*==============*/
-	mem_area_t*	area)	/*!< in: area */
-{
-	return(area->size_and_free & ~MEM_AREA_FREE);
-}
-
-/********************************************************************//**
-Sets memory area size. */
-UNIV_INLINE
-void
-mem_area_set_size(
-/*==============*/
-	mem_area_t*	area,	/*!< in: area */
-	ulint		size)	/*!< in: size */
-{
-	area->size_and_free = (area->size_and_free & MEM_AREA_FREE)
-		| size;
-}
-
-/********************************************************************//**
-Returns memory area free bit.
-@return	TRUE if free */
-UNIV_INLINE
-ibool
-mem_area_get_free(
-/*==============*/
-	mem_area_t*	area)	/*!< in: area */
-{
-#if TRUE != MEM_AREA_FREE
-# error "TRUE != MEM_AREA_FREE"
-#endif
-	return(area->size_and_free & MEM_AREA_FREE);
-}
-
-/********************************************************************//**
-Sets memory area free bit. */
-UNIV_INLINE
-void
-mem_area_set_free(
-/*==============*/
-	mem_area_t*	area,	/*!< in: area */
-	ibool		free)	/*!< in: free bit value */
-{
-#if TRUE != MEM_AREA_FREE
-# error "TRUE != MEM_AREA_FREE"
-#endif
-	area->size_and_free = (area->size_and_free & ~MEM_AREA_FREE)
-		| free;
-}
-
-/********************************************************************//**
-Creates a memory pool.
-@return	memory pool */
-UNIV_INTERN
-mem_pool_t*
-mem_pool_create(
-/*============*/
-	ulint	size)	/*!< in: pool size in bytes */
-{
-	mem_pool_t*	pool;
-	mem_area_t*	area;
-	ulint		i;
-	ulint		used;
-
-	pool = static_cast<mem_pool_t*>(ut_malloc(sizeof(mem_pool_t)));
-
-	pool->buf = static_cast<byte*>(ut_malloc_low(size, TRUE));
-	pool->size = size;
-
-	mutex_create(mem_pool_mutex_key, &pool->mutex, SYNC_MEM_POOL);
-
-	/* Initialize the free lists */
-
-	for (i = 0; i < 64; i++) {
-
-		UT_LIST_INIT(pool->free_list[i]);
-	}
-
-	used = 0;
-
-	while (size - used >= MEM_AREA_MIN_SIZE) {
-
-		i = ut_2_log(size - used);
-
-		if (ut_2_exp(i) > size - used) {
-
-			/* ut_2_log rounds upward */
-
-			i--;
-		}
-
-		area = (mem_area_t*)(pool->buf + used);
-
-		mem_area_set_size(area, ut_2_exp(i));
-		mem_area_set_free(area, TRUE);
-		UNIV_MEM_FREE(MEM_AREA_EXTRA_SIZE + (byte*) area,
-			      ut_2_exp(i) - MEM_AREA_EXTRA_SIZE);
-
-		UT_LIST_ADD_FIRST(free_list, pool->free_list[i], area);
-
-		used = used + ut_2_exp(i);
-	}
-
-	ut_ad(size >= used);
-
-	pool->reserved = 0;
-
-	return(pool);
-}
-
-/********************************************************************//**
-Frees a memory pool. */
-UNIV_INTERN
-void
-mem_pool_free(
-/*==========*/
-	mem_pool_t*	pool)	/*!< in, own: memory pool */
-{
-	ut_free(pool->buf);
-	ut_free(pool);
-}
-
-/********************************************************************//**
-Fills the specified free list.
-@return	TRUE if we were able to insert a block to the free list */
-static
-ibool
-mem_pool_fill_free_list(
-/*====================*/
-	ulint		i,	/*!< in: free list index */
-	mem_pool_t*	pool)	/*!< in: memory pool */
-{
-	mem_area_t*	area;
-	mem_area_t*	area2;
-	ibool		ret;
-
-	ut_ad(mutex_own(&(pool->mutex)));
-
-	if (UNIV_UNLIKELY(i >= 63)) {
-		/* We come here when we have run out of space in the
-		memory pool: */
-
-		return(FALSE);
-	}
-
-	area = UT_LIST_GET_FIRST(pool->free_list[i + 1]);
-
-	if (area == NULL) {
-		if (UT_LIST_GET_LEN(pool->free_list[i + 1]) > 0) {
-			ut_print_timestamp(stderr);
-
-			fprintf(stderr,
-				"  InnoDB: Error: mem pool free list %lu"
-				" length is %lu\n"
-				"InnoDB: though the list is empty!\n",
-				(ulong) i + 1,
-				(ulong)
-				UT_LIST_GET_LEN(pool->free_list[i + 1]));
-		}
-
-		ret = mem_pool_fill_free_list(i + 1, pool);
-
-		if (ret == FALSE) {
-
-			return(FALSE);
-		}
-
-		area = UT_LIST_GET_FIRST(pool->free_list[i + 1]);
-	}
-
-	if (UNIV_UNLIKELY(UT_LIST_GET_LEN(pool->free_list[i + 1]) == 0)) {
-		mem_analyze_corruption(area);
-
-		ut_error;
-	}
-
-	UT_LIST_REMOVE(free_list, pool->free_list[i + 1], area);
-
-	area2 = (mem_area_t*)(((byte*) area) + ut_2_exp(i));
-	UNIV_MEM_ALLOC(area2, MEM_AREA_EXTRA_SIZE);
-
-	mem_area_set_size(area2, ut_2_exp(i));
-	mem_area_set_free(area2, TRUE);
-
-	UT_LIST_ADD_FIRST(free_list, pool->free_list[i], area2);
-
-	mem_area_set_size(area, ut_2_exp(i));
-
-	UT_LIST_ADD_FIRST(free_list, pool->free_list[i], area);
-
-	return(TRUE);
-}
-
-/********************************************************************//**
-Allocates memory from a pool. NOTE: This low-level function should only be
-used in mem0mem.*!
-@return	own: allocated memory buffer */
-UNIV_INTERN
-void*
-mem_area_alloc(
-/*===========*/
-	ulint*		psize,	/*!< in: requested size in bytes; for optimum
-				space usage, the size should be a power of 2
-				minus MEM_AREA_EXTRA_SIZE;
-				out: allocated size in bytes (greater than
-				or equal to the requested size) */
-	mem_pool_t*	pool)	/*!< in: memory pool */
-{
-	mem_area_t*	area;
-	ulint		size;
-	ulint		n;
-	ibool		ret;
-
-	/* If we are using os allocator just make a simple call
-	to malloc */
-	if (UNIV_LIKELY(srv_use_sys_malloc)) {
-		return(malloc(*psize));
-	}
-
-	size = *psize;
-	n = ut_2_log(ut_max(size + MEM_AREA_EXTRA_SIZE, MEM_AREA_MIN_SIZE));
-
-	mutex_enter(&(pool->mutex));
-	mem_n_threads_inside++;
-
-	ut_a(mem_n_threads_inside == 1);
-
-	area = UT_LIST_GET_FIRST(pool->free_list[n]);
-
-	if (area == NULL) {
-		ret = mem_pool_fill_free_list(n, pool);
-
-		if (ret == FALSE) {
-			/* Out of memory in memory pool: we try to allocate
-			from the operating system with the regular malloc: */
-
-			mem_n_threads_inside--;
-			mutex_exit(&(pool->mutex));
-
-			return(ut_malloc(size));
-		}
-
-		area = UT_LIST_GET_FIRST(pool->free_list[n]);
-	}
-
-	if (!mem_area_get_free(area)) {
-		fprintf(stderr,
-			"InnoDB: Error: Removing element from mem pool"
-			" free list %lu though the\n"
-			"InnoDB: element is not marked free!\n",
-			(ulong) n);
-
-		mem_analyze_corruption(area);
-
-		/* Try to analyze a strange assertion failure reported at
-		mysql@lists.mysql.com where the free bit IS 1 in the
-		hex dump above */
-
-		if (mem_area_get_free(area)) {
-			fprintf(stderr,
-				"InnoDB: Probably a race condition"
-				" because now the area is marked free!\n");
-		}
-
-		ut_error;
-	}
-
-	if (UT_LIST_GET_LEN(pool->free_list[n]) == 0) {
-		fprintf(stderr,
-			"InnoDB: Error: Removing element from mem pool"
-			" free list %lu\n"
-			"InnoDB: though the list length is 0!\n",
-			(ulong) n);
-		mem_analyze_corruption(area);
-
-		ut_error;
-	}
-
-	ut_ad(mem_area_get_size(area) == ut_2_exp(n));
-
-	mem_area_set_free(area, FALSE);
-
-	UT_LIST_REMOVE(free_list, pool->free_list[n], area);
-
-	pool->reserved += mem_area_get_size(area);
-
-	mem_n_threads_inside--;
-	mutex_exit(&(pool->mutex));
-
-	ut_ad(mem_pool_validate(pool));
-
-	*psize = ut_2_exp(n) - MEM_AREA_EXTRA_SIZE;
-	UNIV_MEM_ALLOC(MEM_AREA_EXTRA_SIZE + (byte*) area, *psize);
-
-	return((void*)(MEM_AREA_EXTRA_SIZE + ((byte*) area)));
-}
-
-/********************************************************************//**
-Gets the buddy of an area, if it exists in pool.
-@return	the buddy, NULL if no buddy in pool */
-UNIV_INLINE
-mem_area_t*
-mem_area_get_buddy(
-/*===============*/
-	mem_area_t*	area,	/*!< in: memory area */
-	ulint		size,	/*!< in: memory area size */
-	mem_pool_t*	pool)	/*!< in: memory pool */
-{
-	mem_area_t*	buddy;
-
-	ut_ad(size != 0);
-
-	if (((((byte*) area) - pool->buf) % (2 * size)) == 0) {
-
-		/* The buddy is in a higher address */
-
-		buddy = (mem_area_t*)(((byte*) area) + size);
-
-		if ((((byte*) buddy) - pool->buf) + size > pool->size) {
-
-			/* The buddy is not wholly contained in the pool:
-			there is no buddy */
-
-			buddy = NULL;
-		}
-	} else {
-		/* The buddy is in a lower address; NOTE that area cannot
-		be at the pool lower end, because then we would end up to
-		the upper branch in this if-clause: the remainder would be
-		0 */
-
-		buddy = (mem_area_t*)(((byte*) area) - size);
-	}
-
-	return(buddy);
-}
-
-/********************************************************************//**
-Frees memory to a pool. */
-UNIV_INTERN
-void
-mem_area_free(
-/*==========*/
-	void*		ptr,	/*!< in, own: pointer to allocated memory
-				buffer */
-	mem_pool_t*	pool)	/*!< in: memory pool */
-{
-	mem_area_t*	area;
-	mem_area_t*	buddy;
-	void*		new_ptr;
-	ulint		size;
-	ulint		n;
-
-	if (UNIV_LIKELY(srv_use_sys_malloc)) {
-		free(ptr);
-
-		return;
-	}
-
-	/* It may be that the area was really allocated from the OS with
-	regular malloc: check if ptr points within our memory pool */
-
-	if ((byte*) ptr < pool->buf || (byte*) ptr >= pool->buf + pool->size) {
-		ut_free(ptr);
-
-		return;
-	}
-
-	area = (mem_area_t*) (((byte*) ptr) - MEM_AREA_EXTRA_SIZE);
-
-	if (mem_area_get_free(area)) {
-		fprintf(stderr,
-			"InnoDB: Error: Freeing element to mem pool"
-			" free list though the\n"
-			"InnoDB: element is marked free!\n");
-
-		mem_analyze_corruption(area);
-		ut_error;
-	}
-
-	size = mem_area_get_size(area);
-	UNIV_MEM_FREE(ptr, size - MEM_AREA_EXTRA_SIZE);
-
-	if (size == 0) {
-		fprintf(stderr,
-			"InnoDB: Error: Mem area size is 0. Possibly a"
-			" memory overrun of the\n"
-			"InnoDB: previous allocated area!\n");
-
-		mem_analyze_corruption(area);
-		ut_error;
-	}
-
-#ifdef UNIV_LIGHT_MEM_DEBUG
-	if (((byte*) area) + size < pool->buf + pool->size) {
-
-		ulint	next_size;
-
-		next_size = mem_area_get_size(
-			(mem_area_t*)(((byte*) area) + size));
-		if (UNIV_UNLIKELY(!next_size || !ut_is_2pow(next_size))) {
-			fprintf(stderr,
-				"InnoDB: Error: Memory area size %lu,"
-				" next area size %lu not a power of 2!\n"
-				"InnoDB: Possibly a memory overrun of"
-				" the buffer being freed here.\n",
-				(ulong) size, (ulong) next_size);
-			mem_analyze_corruption(area);
-
-			ut_error;
-		}
-	}
-#endif
-	buddy = mem_area_get_buddy(area, size, pool);
-
-	n = ut_2_log(size);
-
-	mem_pool_mutex_enter(pool);
-	mem_n_threads_inside++;
-
-	ut_a(mem_n_threads_inside == 1);
-
-	if (buddy && mem_area_get_free(buddy)
-	    && (size == mem_area_get_size(buddy))) {
-
-		/* The buddy is in a free list */
-
-		if ((byte*) buddy < (byte*) area) {
-			new_ptr = ((byte*) buddy) + MEM_AREA_EXTRA_SIZE;
-
-			mem_area_set_size(buddy, 2 * size);
-			mem_area_set_free(buddy, FALSE);
-		} else {
-			new_ptr = ptr;
-
-			mem_area_set_size(area, 2 * size);
-		}
-
-		/* Remove the buddy from its free list and merge it to area */
-
-		UT_LIST_REMOVE(free_list, pool->free_list[n], buddy);
-
-		pool->reserved += ut_2_exp(n);
-
-		mem_n_threads_inside--;
-		mem_pool_mutex_exit(pool);
-
-		mem_area_free(new_ptr, pool);
-
-		return;
-	} else {
-		UT_LIST_ADD_FIRST(free_list, pool->free_list[n], area);
-
-		mem_area_set_free(area, TRUE);
-
-		ut_ad(pool->reserved >= size);
-
-		pool->reserved -= size;
-	}
-
-	mem_n_threads_inside--;
-	mem_pool_mutex_exit(pool);
-
-	ut_ad(mem_pool_validate(pool));
-}
-
-/********************************************************************//**
-Validates a memory pool.
-@return	TRUE if ok */
-UNIV_INTERN
-ibool
-mem_pool_validate(
-/*==============*/
-	mem_pool_t*	pool)	/*!< in: memory pool */
-{
-	mem_area_t*	area;
-	mem_area_t*	buddy;
-	ulint		free;
-	ulint		i;
-
-	mem_pool_mutex_enter(pool);
-
-	free = 0;
-
-	for (i = 0; i < 64; i++) {
-
-		UT_LIST_CHECK(free_list, mem_area_t, pool->free_list[i]);
-
-		for (area = UT_LIST_GET_FIRST(pool->free_list[i]);
-		     area != 0;
-		     area = UT_LIST_GET_NEXT(free_list, area)) {
-
-			ut_a(mem_area_get_free(area));
-			ut_a(mem_area_get_size(area) == ut_2_exp(i));
-
-			buddy = mem_area_get_buddy(area, ut_2_exp(i), pool);
-
-			ut_a(!buddy || !mem_area_get_free(buddy)
-			     || (ut_2_exp(i) != mem_area_get_size(buddy)));
-
-			free += ut_2_exp(i);
-		}
-	}
-
-	ut_a(free + pool->reserved == pool->size);
-
-	mem_pool_mutex_exit(pool);
-
-	return(TRUE);
-}
-
-/********************************************************************//**
-Prints info of a memory pool. */
-UNIV_INTERN
-void
-mem_pool_print_info(
-/*================*/
-	FILE*		outfile,/*!< in: output file to write to */
-	mem_pool_t*	pool)	/*!< in: memory pool */
-{
-	ulint		i;
-
-	mem_pool_validate(pool);
-
-	fprintf(outfile, "INFO OF A MEMORY POOL\n");
-
-	mutex_enter(&(pool->mutex));
-
-	for (i = 0; i < 64; i++) {
-		if (UT_LIST_GET_LEN(pool->free_list[i]) > 0) {
-
-			fprintf(outfile,
-				"Free list length %lu for"
-				" blocks of size %lu\n",
-				(ulong) UT_LIST_GET_LEN(pool->free_list[i]),
-				(ulong) ut_2_exp(i));
-		}
-	}
-
-	fprintf(outfile, "Pool size %lu, reserved %lu.\n", (ulong) pool->size,
-		(ulong) pool->reserved);
-	mutex_exit(&(pool->mutex));
-}
-
-/********************************************************************//**
-Returns the amount of reserved memory.
-@return	reserved memory in bytes */
-UNIV_INTERN
-ulint
-mem_pool_get_reserved(
-/*==================*/
-	mem_pool_t*	pool)	/*!< in: memory pool */
-{
-	ulint	reserved;
-
-	mutex_enter(&(pool->mutex));
-
-	reserved = pool->reserved;
-
-	mutex_exit(&(pool->mutex));
-
-	return(reserved);
-}
diff --git a/storage/innobase/mtr/mtr0log.cc b/storage/innobase/mtr/mtr0log.cc
index 82df1df63d4..d653b3851c3 100644
--- a/storage/innobase/mtr/mtr0log.cc
+++ b/storage/innobase/mtr/mtr0log.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -27,19 +27,19 @@ Created 12/7/1995 Heikki Tuuri
 
 #ifdef UNIV_NONINL
 #include "mtr0log.ic"
-#endif
+#endif /* UNIV_NOINL */
 
 #include "buf0buf.h"
 #include "dict0dict.h"
 #include "log0recv.h"
 #include "page0page.h"
+#include "buf0dblwr.h"
 
 #ifndef UNIV_HOTBACKUP
 # include "dict0boot.h"
 
 /********************************************************//**
 Catenates n bytes to the mtr log. */
-UNIV_INTERN
 void
 mlog_catenate_string(
 /*=================*/
@@ -47,30 +47,25 @@ mlog_catenate_string(
 	const byte*	str,	/*!< in: string to write */
 	ulint		len)	/*!< in: string length */
 {
-	dyn_array_t*	mlog;
-
 	if (mtr_get_log_mode(mtr) == MTR_LOG_NONE) {
 
 		return;
 	}
 
-	mlog = &(mtr->log);
-
-	dyn_push_string(mlog, str, len);
+	mtr->get_log()->push(str, ib_uint32_t(len));
 }
 
 /********************************************************//**
 Writes the initial part of a log record consisting of one-byte item
 type and four-byte space and page numbers. Also pushes info
 to the mtr memo that a buffer page has been modified. */
-UNIV_INTERN
 void
 mlog_write_initial_log_record(
 /*==========================*/
 	const byte*	ptr,	/*!< in: pointer to (inside) a buffer
 				frame holding the file page where
 				modification is made */
-	byte		type,	/*!< in: log item type: MLOG_1BYTE, ... */
+	mlog_id_t	type,	/*!< in: log item type: MLOG_1BYTE, ... */
 	mtr_t*		mtr)	/*!< in: mini-transaction handle */
 {
 	byte*	log_ptr;
@@ -94,23 +89,22 @@ mlog_write_initial_log_record(
 
 /********************************************************//**
 Parses an initial log record written by mlog_write_initial_log_record.
-@return	parsed record end, NULL if not a complete record */
-UNIV_INTERN
+@return parsed record end, NULL if not a complete record */
 byte*
 mlog_parse_initial_log_record(
 /*==========================*/
-	byte*	ptr,	/*!< in: buffer */
-	byte*	end_ptr,/*!< in: buffer end */
-	byte*	type,	/*!< out: log record type: MLOG_1BYTE, ... */
-	ulint*	space,	/*!< out: space id */
-	ulint*	page_no)/*!< out: page number */
+	const byte*	ptr,	/*!< in: buffer */
+	const byte*	end_ptr,/*!< in: buffer end */
+	mlog_id_t*	type,	/*!< out: log record type: MLOG_1BYTE, ... */
+	ulint*		space,	/*!< out: space id */
+	ulint*		page_no)/*!< out: page number */
 {
 	if (end_ptr < ptr + 1) {
 
 		return(NULL);
 	}
 
-	*type = (byte)((ulint)*ptr & ~MLOG_SINGLE_REC_FLAG);
+	*type = (mlog_id_t)((ulint)*ptr & ~MLOG_SINGLE_REC_FLAG);
 	ut_ad(*type <= MLOG_BIGGEST_TYPE || EXTRA_CHECK_MLOG_NUMBER(*type));
 
 	ptr++;
@@ -120,36 +114,35 @@ mlog_parse_initial_log_record(
 		return(NULL);
 	}
 
-	ptr = mach_parse_compressed(ptr, end_ptr, space);
+	*space = mach_parse_compressed(&ptr, end_ptr);
 
-	if (ptr == NULL) {
-
-		return(NULL);
+	if (ptr != NULL) {
+		*page_no = mach_parse_compressed(&ptr, end_ptr);
 	}
 
-	ptr = mach_parse_compressed(ptr, end_ptr, page_no);
-
-	return(ptr);
+	return(const_cast<byte*>(ptr));
 }
 
 /********************************************************//**
 Parses a log record written by mlog_write_ulint or mlog_write_ull.
-@return	parsed record end, NULL if not a complete record or a corrupt record */
-UNIV_INTERN
+@return parsed record end, NULL if not a complete record or a corrupt record */
 byte*
 mlog_parse_nbytes(
 /*==============*/
-	ulint	type,	/*!< in: log record type: MLOG_1BYTE, ... */
-	byte*	ptr,	/*!< in: buffer */
-	byte*	end_ptr,/*!< in: buffer end */
-	byte*	page,	/*!< in: page where to apply the log record, or NULL */
-	void*	page_zip)/*!< in/out: compressed page, or NULL */
+	mlog_id_t	type,	/*!< in: log record type: MLOG_1BYTE, ... */
+	const byte*	ptr,	/*!< in: buffer */
+	const byte*	end_ptr,/*!< in: buffer end */
+	byte*		page,	/*!< in: page where to apply the log
+				record, or NULL */
+	void*		page_zip)/*!< in/out: compressed page, or NULL */
 {
 	ulint		offset;
 	ulint		val;
 	ib_uint64_t	dval;
 
 	ut_a(type <= MLOG_8BYTES);
+	ut_a(!page || !page_zip
+	     || !fil_page_index_page_check(page));
 	if (end_ptr < ptr + 2) {
 
 		return(NULL);
@@ -158,11 +151,6 @@ mlog_parse_nbytes(
 	offset = mach_read_from_2(ptr);
 	ptr += 2;
 
-	ut_a(!page || !page_zip || fil_page_get_type(page) != FIL_PAGE_INDEX ||
-	     /* scrubbing changes page type from FIL_PAGE_INDEX to
-	     * FIL_PAGE_TYPE_ALLOCATED (rest of this assertion is below) */
-	     (type == MLOG_2BYTES && offset == FIL_PAGE_TYPE));
-
 	if (offset >= UNIV_PAGE_SIZE) {
 		recv_sys->found_corrupt_log = TRUE;
 
@@ -170,7 +158,7 @@ mlog_parse_nbytes(
 	}
 
 	if (type == MLOG_8BYTES) {
-		ptr = mach_ull_parse_compressed(ptr, end_ptr, &dval);
+		dval = mach_u64_parse_compressed(&ptr, end_ptr);
 
 		if (ptr == NULL) {
 
@@ -186,10 +174,10 @@ mlog_parse_nbytes(
 			mach_write_to_8(page + offset, dval);
 		}
 
-		return(ptr);
+		return(const_cast<byte*>(ptr));
 	}
 
-	ptr = mach_parse_compressed(ptr, end_ptr, &val);
+	val = mach_parse_compressed(&ptr, end_ptr);
 
 	if (ptr == NULL) {
 
@@ -198,7 +186,7 @@ mlog_parse_nbytes(
 
 	switch (type) {
 	case MLOG_1BYTE:
-		if (UNIV_UNLIKELY(val > 0xFFUL)) {
+		if (val > 0xFFUL) {
 			goto corrupt;
 		}
 		if (page) {
@@ -211,7 +199,7 @@ mlog_parse_nbytes(
 		}
 		break;
 	case MLOG_2BYTES:
-		if (UNIV_UNLIKELY(val > 0xFFFFUL)) {
+		if (val > 0xFFFFUL) {
 			goto corrupt;
 		}
 		if (page) {
@@ -222,13 +210,6 @@ mlog_parse_nbytes(
 			}
 			mach_write_to_2(page + offset, val);
 		}
-		ut_a(!page || !page_zip ||
-		     fil_page_get_type(page) != FIL_PAGE_INDEX ||
-		     /* scrubbing changes page type from FIL_PAGE_INDEX to
-		     * FIL_PAGE_TYPE_ALLOCATED */
-		     (type == MLOG_2BYTES &&
-		      offset == FIL_PAGE_TYPE &&
-		      val == FIL_PAGE_TYPE_ALLOCATED));
 
 		break;
 	case MLOG_4BYTES:
@@ -247,20 +228,19 @@ mlog_parse_nbytes(
 		ptr = NULL;
 	}
 
-	return(ptr);
+	return(const_cast<byte*>(ptr));
 }
 
 /********************************************************//**
 Writes 1, 2 or 4 bytes to a file page. Writes the corresponding log
 record to the mini-transaction log if mtr is not NULL. */
-UNIV_INTERN
 void
 mlog_write_ulint(
 /*=============*/
-	byte*	ptr,	/*!< in: pointer where to write */
-	ulint	val,	/*!< in: value to write */
-	byte	type,	/*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
-	mtr_t*	mtr)	/*!< in: mini-transaction handle */
+	byte*		ptr,	/*!< in: pointer where to write */
+	ulint		val,	/*!< in: value to write */
+	mlog_id_t	type,	/*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
+	mtr_t*		mtr)	/*!< in: mini-transaction handle */
 {
 	switch (type) {
 	case MLOG_1BYTE:
@@ -299,7 +279,6 @@ mlog_write_ulint(
 /********************************************************//**
 Writes 8 bytes to a file page. Writes the corresponding log
 record to the mini-transaction log, only if mtr is not NULL */
-UNIV_INTERN
 void
 mlog_write_ull(
 /*===========*/
@@ -321,7 +300,7 @@ mlog_write_ull(
 			mach_write_to_2(log_ptr, page_offset(ptr));
 			log_ptr += 2;
 
-			log_ptr += mach_ull_write_compressed(log_ptr, val);
+			log_ptr += mach_u64_write_compressed(log_ptr, val);
 
 			mlog_close(mtr, log_ptr);
 		}
@@ -332,7 +311,6 @@ mlog_write_ull(
 /********************************************************//**
 Writes a string to a file page buffered in the buffer pool. Writes the
 corresponding log record to the mini-transaction log. */
-UNIV_INTERN
 void
 mlog_write_string(
 /*==============*/
@@ -352,7 +330,6 @@ mlog_write_string(
 /********************************************************//**
 Logs a write of a string to a file page buffered in the buffer pool.
 Writes the corresponding log record to the mini-transaction log. */
-UNIV_INTERN
 void
 mlog_log_string(
 /*============*/
@@ -389,8 +366,7 @@ mlog_log_string(
 
 /********************************************************//**
 Parses a log record written by mlog_write_string.
-@return	parsed record end, NULL if not a complete record */
-UNIV_INTERN
+@return parsed record end, NULL if not a complete record */
 byte*
 mlog_parse_string(
 /*==============*/
@@ -402,7 +378,9 @@ mlog_parse_string(
 	ulint	offset;
 	ulint	len;
 
-	ut_a(!page || !page_zip || fil_page_get_type(page) != FIL_PAGE_INDEX);
+	ut_a(!page || !page_zip
+	     || (fil_page_get_type(page) != FIL_PAGE_INDEX
+		 && fil_page_get_type(page) != FIL_PAGE_RTREE));
 
 	if (end_ptr < ptr + 4) {
 
@@ -414,8 +392,7 @@ mlog_parse_string(
 	len = mach_read_from_2(ptr);
 	ptr += 2;
 
-	if (UNIV_UNLIKELY(offset >= UNIV_PAGE_SIZE)
-	    || UNIV_UNLIKELY(len + offset > UNIV_PAGE_SIZE)) {
+	if (offset >= UNIV_PAGE_SIZE || len + offset > UNIV_PAGE_SIZE) {
 		recv_sys->found_corrupt_log = TRUE;
 
 		return(NULL);
@@ -441,15 +418,14 @@ mlog_parse_string(
 /********************************************************//**
 Opens a buffer for mlog, writes the initial log record and,
 if needed, the field lengths of an index.
-@return	buffer, NULL if log mode MTR_LOG_NONE */
-UNIV_INTERN
+@return buffer, NULL if log mode MTR_LOG_NONE */
 byte*
 mlog_open_and_write_index(
 /*======================*/
 	mtr_t*			mtr,	/*!< in: mtr */
 	const byte*		rec,	/*!< in: index record or page */
 	const dict_index_t*	index,	/*!< in: record descriptor */
-	byte			type,	/*!< in: log item type */
+	mlog_id_t		type,	/*!< in: log item type */
 	ulint			size)	/*!< in: requested buffer size in bytes
 					(if 0, calls mlog_close() and
 					returns NULL) */
@@ -471,25 +447,45 @@ mlog_open_and_write_index(
 	} else {
 		ulint	i;
 		ulint	n	= dict_index_get_n_fields(index);
-		/* total size needed */
 		ulint	total	= 11 + size + (n + 2) * 2;
 		ulint	alloc	= total;
-		/* allocate at most DYN_ARRAY_DATA_SIZE at a time */
-		if (alloc > DYN_ARRAY_DATA_SIZE) {
-			alloc = DYN_ARRAY_DATA_SIZE;
+
+		if (alloc > mtr_buf_t::MAX_DATA_SIZE) {
+			alloc = mtr_buf_t::MAX_DATA_SIZE;
 		}
+
+		/* For spatial index, on non-leaf page, we just keep
+		2 fields, MBR and page no. */
+		if (dict_index_is_spatial(index)
+		    && !page_is_leaf(page_align(rec))) {
+			n = DICT_INDEX_SPATIAL_NODEPTR_SIZE;
+		}
+
 		log_start = log_ptr = mlog_open(mtr, alloc);
+
 		if (!log_ptr) {
 			return(NULL); /* logging is disabled */
 		}
+
 		log_end = log_ptr + alloc;
-		log_ptr = mlog_write_initial_log_record_fast(rec, type,
-							     log_ptr, mtr);
+
+		log_ptr = mlog_write_initial_log_record_fast(
+			rec, type, log_ptr, mtr);
+
 		mach_write_to_2(log_ptr, n);
 		log_ptr += 2;
-		mach_write_to_2(log_ptr,
-				dict_index_get_n_unique_in_tree(index));
+
+		if (page_is_leaf(page_align(rec))) {
+			mach_write_to_2(
+				log_ptr, dict_index_get_n_unique_in_tree(index));
+		} else {
+			mach_write_to_2(
+				log_ptr,
+				dict_index_get_n_unique_in_tree_nonleaf(index));
+		}
+
 		log_ptr += 2;
+
 		for (i = 0; i < n; i++) {
 			dict_field_t*		field;
 			const dict_col_t*	col;
@@ -500,7 +496,7 @@ mlog_open_and_write_index(
 			len = field->fixed_len;
 			ut_ad(len < 0x7fff);
 			if (len == 0
-			    && (col->len > 255 || col->mtype == DATA_BLOB)) {
+			    && (DATA_BIG_COL(col))) {
 				/* variable-length field
 				with maximum length > 255 */
 				len = 0x7fff;
@@ -513,10 +509,13 @@ mlog_open_and_write_index(
 				ut_a(total > (ulint) (log_ptr - log_start));
 				total -= log_ptr - log_start;
 				alloc = total;
-				if (alloc > DYN_ARRAY_DATA_SIZE) {
-					alloc = DYN_ARRAY_DATA_SIZE;
+
+				if (alloc > mtr_buf_t::MAX_DATA_SIZE) {
+					alloc = mtr_buf_t::MAX_DATA_SIZE;
 				}
+
 				log_start = log_ptr = mlog_open(mtr, alloc);
+
 				if (!log_ptr) {
 					return(NULL); /* logging is disabled */
 				}
@@ -539,8 +538,7 @@ mlog_open_and_write_index(
 
 /********************************************************//**
 Parses a log record written by mlog_open_and_write_index.
-@return	parsed record end, NULL if not a complete record */
-UNIV_INTERN
+@return parsed record end, NULL if not a complete record */
 byte*
 mlog_parse_index(
 /*=============*/
@@ -570,7 +568,7 @@ mlog_parse_index(
 	} else {
 		n = n_uniq = 1;
 	}
-	table = dict_mem_table_create("LOG_DUMMY", DICT_HDR_SPACE, n,
+	table = dict_mem_table_create("LOG_DUMMY", DICT_HDR_SPACE, n, 0,
 				      comp ? DICT_TF_COMPACT : 0, 0);
 	ind = dict_mem_index_create("LOG_DUMMY", "LOG_DUMMY",
 				    DICT_HDR_SPACE, 0, n);
diff --git a/storage/innobase/mtr/mtr0mtr.cc b/storage/innobase/mtr/mtr0mtr.cc
index 5843dd80524..e8ed8adb483 100644
--- a/storage/innobase/mtr/mtr0mtr.cc
+++ b/storage/innobase/mtr/mtr0mtr.cc
@@ -25,27 +25,435 @@ Created 11/26/1995 Heikki Tuuri
 
 #include "mtr0mtr.h"
 
-#ifdef UNIV_NONINL
-#include "mtr0mtr.ic"
-#endif
-
 #include "buf0buf.h"
 #include "buf0flu.h"
+#include "fsp0sysspace.h"
 #include "page0types.h"
 #include "mtr0log.h"
 #include "log0log.h"
+#include "row0trunc.h"
 
-#ifndef UNIV_HOTBACKUP
-# include "log0recv.h"
+#include "log0recv.h"
 
-/***************************************************//**
-Checks if a mini-transaction is dirtying a clean page.
-@return TRUE if the mtr is dirtying a clean page. */
-UNIV_INTERN
-ibool
-mtr_block_dirtied(
-/*==============*/
-	const buf_block_t*	block)	/*!< in: block being x-fixed */
+#ifdef UNIV_NONINL
+#include "mtr0mtr.ic"
+#endif /* UNIV_NONINL */
+
+/** Iterate over a memo block in reverse. */
+template <typename Functor>
+struct Iterate {
+
+	/** Release specific object */
+	explicit Iterate(Functor& functor)
+		:
+		m_functor(functor)
+	{
+		/* Do nothing */
+	}
+
+	/** @return false if the functor returns false. */
+	bool operator()(mtr_buf_t::block_t* block)
+	{
+		const mtr_memo_slot_t*	start =
+			reinterpret_cast<const mtr_memo_slot_t*>(
+				block->begin());
+
+		mtr_memo_slot_t*	slot =
+			reinterpret_cast<mtr_memo_slot_t*>(
+				block->end());
+
+		ut_ad(!(block->used() % sizeof(*slot)));
+
+		while (slot-- != start) {
+
+			if (!m_functor(slot)) {
+				return(false);
+			}
+		}
+
+		return(true);
+	}
+
+	Functor&	m_functor;
+};
+
+/** Find specific object */
+struct Find {
+
+	/** Constructor */
+	Find(const void* object, ulint type)
+		:
+		m_slot(),
+		m_type(type),
+		m_object(object)
+	{
+		ut_a(object != NULL);
+	}
+
+	/** @return false if the object was found. */
+	bool operator()(mtr_memo_slot_t* slot)
+	{
+		if (m_object == slot->object && m_type == slot->type) {
+			m_slot = slot;
+			return(false);
+		}
+
+		return(true);
+	}
+
+	/** Slot if found */
+	mtr_memo_slot_t*m_slot;
+
+	/** Type of the object to look for */
+	ulint		m_type;
+
+	/** The object instance to look for */
+	const void*	m_object;
+};
+
+/** Find a page frame */
+struct FindPage
+{
+	/** Constructor
+	@param[in]	ptr	pointer to within a page frame
+	@param[in]	flags	MTR_MEMO flags to look for */
+	FindPage(const void* ptr, ulint flags)
+		: m_ptr(ptr), m_flags(flags), m_slot(NULL)
+	{
+		/* We can only look for page-related flags. */
+		ut_ad(!(flags & ~(MTR_MEMO_PAGE_S_FIX
+				  | MTR_MEMO_PAGE_X_FIX
+				  | MTR_MEMO_PAGE_SX_FIX
+				  | MTR_MEMO_BUF_FIX
+				  | MTR_MEMO_MODIFY)));
+	}
+
+	/** Visit a memo entry.
+	@param[in]	slot	memo entry to visit
+	@retval	false	if a page was found
+	@retval	true	if the iteration should continue */
+	bool operator()(mtr_memo_slot_t* slot)
+	{
+		ut_ad(m_slot == NULL);
+
+		if (!(m_flags & slot->type) || slot->object == NULL) {
+			return(true);
+		}
+
+		buf_block_t* block = reinterpret_cast<buf_block_t*>(
+			slot->object);
+
+		if (m_ptr < block->frame
+		    || m_ptr >= block->frame + block->page.size.logical()) {
+			return(true);
+		}
+
+		m_slot = slot;
+		return(false);
+	}
+
+	/** @return the slot that was found */
+	mtr_memo_slot_t* get_slot() const
+	{
+		ut_ad(m_slot != NULL);
+		return(m_slot);
+	}
+	/** @return the block that was found */
+	buf_block_t* get_block() const
+	{
+		return(reinterpret_cast<buf_block_t*>(get_slot()->object));
+	}
+private:
+	/** Pointer inside a page frame to look for */
+	const void*const	m_ptr;
+	/** MTR_MEMO flags to look for */
+	const ulint		m_flags;
+	/** The slot corresponding to m_ptr */
+	mtr_memo_slot_t*	m_slot;
+};
+
+/** Release latches and decrement the buffer fix count.
+@param slot	memo slot */
+static
+void
+memo_slot_release(mtr_memo_slot_t* slot)
+{
+	switch (slot->type) {
+	case MTR_MEMO_BUF_FIX:
+	case MTR_MEMO_PAGE_S_FIX:
+	case MTR_MEMO_PAGE_SX_FIX:
+	case MTR_MEMO_PAGE_X_FIX: {
+
+		buf_block_t*	block;
+
+		block = reinterpret_cast<buf_block_t*>(slot->object);
+
+		buf_block_unfix(block);
+		buf_page_release_latch(block, slot->type);
+		break;
+	}
+
+	case MTR_MEMO_S_LOCK:
+		rw_lock_s_unlock(reinterpret_cast<rw_lock_t*>(slot->object));
+		break;
+
+	case MTR_MEMO_SX_LOCK:
+		rw_lock_sx_unlock(reinterpret_cast<rw_lock_t*>(slot->object));
+		break;
+
+	case MTR_MEMO_X_LOCK:
+		rw_lock_x_unlock(reinterpret_cast<rw_lock_t*>(slot->object));
+		break;
+
+#ifdef UNIV_DEBUG
+	default:
+		ut_ad(slot->type == MTR_MEMO_MODIFY);
+#endif /* UNIV_DEBUG */
+	}
+
+	slot->object = NULL;
+}
+
+/** Unfix a page, do not release the latches on the page.
+@param slot	memo slot */
+static
+void
+memo_block_unfix(mtr_memo_slot_t* slot)
+{
+	switch (slot->type) {
+	case MTR_MEMO_BUF_FIX:
+	case MTR_MEMO_PAGE_S_FIX:
+	case MTR_MEMO_PAGE_X_FIX:
+	case MTR_MEMO_PAGE_SX_FIX: {
+		buf_block_unfix(reinterpret_cast<buf_block_t*>(slot->object));
+		break;
+	}
+
+	case MTR_MEMO_S_LOCK:
+	case MTR_MEMO_X_LOCK:
+	case MTR_MEMO_SX_LOCK:
+		break;
+#ifdef UNIV_DEBUG
+	default:
+#endif /* UNIV_DEBUG */
+		break;
+	}
+}
+/** Release latches represented by a slot.
+@param slot	memo slot */
+static
+void
+memo_latch_release(mtr_memo_slot_t* slot)
+{
+	switch (slot->type) {
+	case MTR_MEMO_BUF_FIX:
+	case MTR_MEMO_PAGE_S_FIX:
+	case MTR_MEMO_PAGE_SX_FIX:
+	case MTR_MEMO_PAGE_X_FIX: {
+		buf_block_t*	block;
+
+		block = reinterpret_cast<buf_block_t*>(slot->object);
+
+		memo_block_unfix(slot);
+
+		buf_page_release_latch(block, slot->type);
+
+		slot->object = NULL;
+		break;
+	}
+
+	case MTR_MEMO_S_LOCK:
+		rw_lock_s_unlock(reinterpret_cast<rw_lock_t*>(slot->object));
+		slot->object = NULL;
+		break;
+
+	case MTR_MEMO_X_LOCK:
+		rw_lock_x_unlock(reinterpret_cast<rw_lock_t*>(slot->object));
+		slot->object = NULL;
+		break;
+
+	case MTR_MEMO_SX_LOCK:
+		rw_lock_sx_unlock(reinterpret_cast<rw_lock_t*>(slot->object));
+		slot->object = NULL;
+		break;
+
+#ifdef UNIV_DEBUG
+	default:
+		ut_ad(slot->type == MTR_MEMO_MODIFY);
+
+		slot->object = NULL;
+#endif /* UNIV_DEBUG */
+	}
+}
+
+/** Release the latches acquired by the mini-transaction. */
+struct ReleaseLatches {
+
+	/** @return true always. */
+	bool operator()(mtr_memo_slot_t* slot) const
+	{
+		if (slot->object != NULL) {
+			memo_latch_release(slot);
+		}
+
+		return(true);
+	}
+};
+
+/** Release the latches and blocks acquired by the mini-transaction. */
+struct ReleaseAll {
+	/** @return true always. */
+	bool operator()(mtr_memo_slot_t* slot) const
+	{
+		if (slot->object != NULL) {
+			memo_slot_release(slot);
+		}
+
+		return(true);
+	}
+};
+
+/** Check that all slots have been handled. */
+struct DebugCheck {
+	/** @return true always. */
+	bool operator()(const mtr_memo_slot_t* slot) const
+	{
+		ut_a(slot->object == NULL);
+		return(true);
+	}
+};
+
+/** Release a resource acquired by the mini-transaction. */
+struct ReleaseBlocks {
+	/** Release specific object */
+	ReleaseBlocks(lsn_t start_lsn, lsn_t end_lsn, FlushObserver* observer)
+		:
+		m_end_lsn(end_lsn),
+		m_start_lsn(start_lsn),
+		m_flush_observer(observer)
+	{
+		/* Do nothing */
+	}
+
+	/** Add the modified page to the buffer flush list. */
+	void add_dirty_page_to_flush_list(mtr_memo_slot_t* slot) const
+	{
+		ut_ad(m_end_lsn > 0);
+		ut_ad(m_start_lsn > 0);
+
+		buf_block_t*	block;
+
+		block = reinterpret_cast<buf_block_t*>(slot->object);
+
+		buf_flush_note_modification(block, m_start_lsn,
+					    m_end_lsn, m_flush_observer);
+	}
+
+	/** @return true always. */
+	bool operator()(mtr_memo_slot_t* slot) const
+	{
+		if (slot->object != NULL) {
+
+			if (slot->type == MTR_MEMO_PAGE_X_FIX
+			    || slot->type == MTR_MEMO_PAGE_SX_FIX) {
+
+				add_dirty_page_to_flush_list(slot);
+
+			} else if (slot->type == MTR_MEMO_BUF_FIX) {
+
+				buf_block_t*	block;
+				block = reinterpret_cast<buf_block_t*>(
+					slot->object);
+				if (block->made_dirty_with_no_latch) {
+					add_dirty_page_to_flush_list(slot);
+					block->made_dirty_with_no_latch = false;
+				}
+			}
+		}
+
+		return(true);
+	}
+
+	/** Mini-transaction REDO start LSN */
+	lsn_t		m_end_lsn;
+
+	/** Mini-transaction REDO end LSN */
+	lsn_t		m_start_lsn;
+
+	/** Flush observer */
+	FlushObserver*	m_flush_observer;
+};
+
+class mtr_t::Command {
+public:
+	/** Constructor.
+	Takes ownership of the mtr->m_impl, is responsible for deleting it.
+	@param[in,out]	mtr	mini-transaction */
+	explicit Command(mtr_t* mtr)
+		:
+		m_locks_released()
+	{
+		init(mtr);
+	}
+
+	void init(mtr_t* mtr)
+	{
+		m_impl = &mtr->m_impl;
+		m_sync = mtr->m_sync;
+	}
+
+	/** Destructor */
+	~Command()
+	{
+		ut_ad(m_impl == 0);
+	}
+
+	/** Write the redo log record, add dirty pages to the flush list and
+	release the resources. */
+	void execute();
+
+	/** Release the blocks used in this mini-transaction. */
+	void release_blocks();
+
+	/** Release the latches acquired by the mini-transaction. */
+	void release_latches();
+
+	/** Release both the latches and blocks used in the mini-transaction. */
+	void release_all();
+
+	/** Release the resources */
+	void release_resources();
+
+	/** Append the redo log records to the redo log buffer.
+	@param[in]	len	number of bytes to write */
+	void finish_write(ulint len);
+
+private:
+	/** Prepare to write the mini-transaction log to the redo log buffer.
+	@return number of bytes to write in finish_write() */
+	ulint prepare_write();
+
+	/** true if it is a sync mini-transaction. */
+	bool			m_sync;
+
+	/** The mini-transaction state. */
+	mtr_t::Impl*		m_impl;
+
+	/** Set to 1 after the user thread releases the latches. The log
+	writer thread must wait for this to be set to 1. */
+	volatile ulint		m_locks_released;
+
+	/** Start lsn of the possible log entry for this mtr */
+	lsn_t			m_start_lsn;
+
+	/** End lsn of the possible log entry for this mtr */
+	lsn_t			m_end_lsn;
+};
+
+/** Check if a mini-transaction is dirtying a clean page.
+@return true if the mtr is dirtying a clean page. */
+bool
+mtr_t::is_block_dirtied(const buf_block_t* block)
 {
 	ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
 	ut_ad(block->page.buf_fix_count > 0);
@@ -56,417 +464,650 @@ mtr_block_dirtied(
 	return(block->page.oldest_modification == 0);
 }
 
-/*****************************************************************//**
-Releases the item in the slot given. */
-static MY_ATTRIBUTE((nonnull))
+/** Write the block contents to the REDO log */
+struct mtr_write_log_t {
+	/** Append a block to the redo log buffer.
+	@return whether the appending should continue */
+	bool operator()(const mtr_buf_t::block_t* block) const
+	{
+		log_write_low(block->begin(), block->used());
+		return(true);
+	}
+};
+
+/** Append records to the system-wide redo log buffer.
+@param[in]	log	redo log records */
 void
-mtr_memo_slot_release_func(
-/*=======================*/
-#ifdef UNIV_DEBUG
- 	mtr_t*			mtr,	/*!< in/out: mini-transaction */
-#endif /* UNIV_DEBUG */
-	mtr_memo_slot_t*	slot)	/*!< in: memo slot */
+mtr_write_log(
+	const mtr_buf_t*	log)
 {
-	void*	object = slot->object;
-	slot->object = NULL;
+	const ulint	len = log->size();
+	mtr_write_log_t	write_log;
 
-	/* slot release is a local operation for the current mtr.
-	We must not be holding the flush_order mutex while
-	doing this. */
-	ut_ad(!log_flush_order_mutex_own());
+	DBUG_PRINT("ib_log",
+		   (ULINTPF " extra bytes written at " LSN_PF,
+		    len, log_sys->lsn));
+
+	log_reserve_and_open(len);
+	log->for_each_block(write_log);
+	log_close();
+}
+
+/** Start a mini-transaction.
+@param sync		true if it is a synchronous mini-transaction
+@param read_only	true if read only mini-transaction */
+void
+mtr_t::start(bool sync, bool read_only)
+{
+	start(NULL, sync, read_only);
+}
+
+/** Start a mini-transaction.
+@param sync		true if it is a synchronous mini-transaction
+@param read_only	true if read only mini-transaction */
+void
+mtr_t::start(trx_t* trx, bool sync, bool read_only)
+{
+	UNIV_MEM_INVALID(this, sizeof(*this));
+
+	UNIV_MEM_INVALID(&m_impl, sizeof(m_impl));
+
+	m_sync =  sync;
+
+	m_commit_lsn = 0;
+
+	new(&m_impl.m_log) mtr_buf_t();
+	new(&m_impl.m_memo) mtr_buf_t();
+
+	m_impl.m_mtr = this;
+	m_impl.m_log_mode = MTR_LOG_ALL;
+	m_impl.m_inside_ibuf = false;
+	m_impl.m_modifications = false;
+	m_impl.m_made_dirty = false;
+	m_impl.m_n_log_recs = 0;
+	m_impl.m_state = MTR_STATE_ACTIVE;
+	ut_d(m_impl.m_user_space_id = TRX_SYS_SPACE);
+	m_impl.m_user_space = NULL;
+	m_impl.m_undo_space = NULL;
+	m_impl.m_sys_space = NULL;
+	m_impl.m_flush_observer = NULL;
+	m_impl.m_trx = trx;
+
+	ut_d(m_impl.m_magic_n = MTR_MAGIC_N);
+}
+
+/** Release the resources */
+void
+mtr_t::Command::release_resources()
+{
+	ut_ad(m_impl->m_magic_n == MTR_MAGIC_N);
+
+	/* Currently only used in commit */
+	ut_ad(m_impl->m_state == MTR_STATE_COMMITTING);
 
-	switch (slot->type) {
-	case MTR_MEMO_PAGE_S_FIX:
-	case MTR_MEMO_PAGE_X_FIX:
-	case MTR_MEMO_BUF_FIX:
-		buf_page_release((buf_block_t*) object, slot->type);
-		break;
-	case MTR_MEMO_S_LOCK:
-		rw_lock_s_unlock((rw_lock_t*) object);
-		break;
-	case MTR_MEMO_X_LOCK:
-		rw_lock_x_unlock((rw_lock_t*) object);
-		break;
 #ifdef UNIV_DEBUG
+	DebugCheck		release;
+	Iterate<DebugCheck>	iterator(release);
+
+	m_impl->m_memo.for_each_block_in_reverse(iterator);
+#endif /* UNIV_DEBUG */
+
+	/* Reset the mtr buffers */
+	m_impl->m_log.erase();
+
+	m_impl->m_memo.erase();
+
+	m_impl->m_state = MTR_STATE_COMMITTED;
+
+	m_impl = 0;
+}
+
+/** Commit a mini-transaction. */
+void
+mtr_t::commit()
+{
+	ut_ad(is_active());
+	ut_ad(!is_inside_ibuf());
+	ut_ad(m_impl.m_magic_n == MTR_MAGIC_N);
+	m_impl.m_state = MTR_STATE_COMMITTING;
+
+	/* This is a dirty read, for debugging. */
+	ut_ad(!recv_no_log_write);
+
+	Command	cmd(this);
+
+	if (m_impl.m_modifications
+	    && (m_impl.m_n_log_recs > 0
+		|| m_impl.m_log_mode == MTR_LOG_NO_REDO)) {
+
+		ut_ad(!srv_read_only_mode
+		      || m_impl.m_log_mode == MTR_LOG_NO_REDO);
+
+		cmd.execute();
+	} else {
+		cmd.release_all();
+		cmd.release_resources();
+	}
+}
+
+/** Commit a mini-transaction that did not modify any pages,
+but generated some redo log on a higher level, such as
+MLOG_FILE_NAME records and a MLOG_CHECKPOINT marker.
+The caller must invoke log_mutex_enter() and log_mutex_exit().
+This is to be used at log_checkpoint().
+@param[in]	checkpoint_lsn	the LSN of the log checkpoint  */
+void
+mtr_t::commit_checkpoint(lsn_t checkpoint_lsn)
+{
+	ut_ad(log_mutex_own());
+	ut_ad(is_active());
+	ut_ad(!is_inside_ibuf());
+	ut_ad(m_impl.m_magic_n == MTR_MAGIC_N);
+	ut_ad(get_log_mode() == MTR_LOG_ALL);
+	ut_ad(!m_impl.m_made_dirty);
+	ut_ad(m_impl.m_memo.size() == 0);
+	ut_ad(!srv_read_only_mode);
+	ut_d(m_impl.m_state = MTR_STATE_COMMITTING);
+
+	/* This is a dirty read, for debugging. */
+	ut_ad(!recv_no_log_write);
+
+	switch (m_impl.m_n_log_recs) {
+	case 0:
+		break;
+	case 1:
+		*m_impl.m_log.front()->begin() |= MLOG_SINGLE_REC_FLAG;
+		break;
 	default:
-		ut_ad(slot->type == MTR_MEMO_MODIFY);
-		ut_ad(mtr_memo_contains(mtr, object, MTR_MEMO_PAGE_X_FIX));
-#endif /* UNIV_DEBUG */
+		mlog_catenate_ulint(
+			&m_impl.m_log, MLOG_MULTI_REC_END, MLOG_1BYTE);
 	}
+
+	byte*	ptr = m_impl.m_log.push<byte*>(SIZE_OF_MLOG_CHECKPOINT);
+#if SIZE_OF_MLOG_CHECKPOINT != 9
+# error SIZE_OF_MLOG_CHECKPOINT != 9
+#endif
+	*ptr = MLOG_CHECKPOINT;
+	mach_write_to_8(ptr + 1, checkpoint_lsn);
+
+	Command	cmd(this);
+	cmd.finish_write(m_impl.m_log.size());
+	cmd.release_resources();
+
+	DBUG_PRINT("ib_log",
+		   ("MLOG_CHECKPOINT(" LSN_PF ") written at " LSN_PF,
+		    checkpoint_lsn, log_sys->lsn));
 }
 
 #ifdef UNIV_DEBUG
-# define mtr_memo_slot_release(mtr, slot) mtr_memo_slot_release_func(mtr, slot)
-#else /* UNIV_DEBUG */
-# define mtr_memo_slot_release(mtr, slot) mtr_memo_slot_release_func(slot)
+/** Check if a tablespace is associated with the mini-transaction
+(needed for generating a MLOG_FILE_NAME record)
+@param[in]	space	tablespace
+@return whether the mini-transaction is associated with the space */
+bool
+mtr_t::is_named_space(ulint space) const
+{
+	ut_ad(!m_impl.m_sys_space
+	      || m_impl.m_sys_space->id == TRX_SYS_SPACE);
+	ut_ad(!m_impl.m_undo_space
+	      || m_impl.m_undo_space->id != TRX_SYS_SPACE);
+	ut_ad(!m_impl.m_user_space
+	      || m_impl.m_user_space->id != TRX_SYS_SPACE);
+	ut_ad(!m_impl.m_sys_space
+	      || m_impl.m_sys_space != m_impl.m_user_space);
+	ut_ad(!m_impl.m_sys_space
+	      || m_impl.m_sys_space != m_impl.m_undo_space);
+	ut_ad(!m_impl.m_user_space
+	      || m_impl.m_user_space != m_impl.m_undo_space);
+
+	switch (get_log_mode()) {
+	case MTR_LOG_NONE:
+	case MTR_LOG_NO_REDO:
+		return(true);
+	case MTR_LOG_ALL:
+	case MTR_LOG_SHORT_INSERTS:
+		return(m_impl.m_user_space_id == space
+		       || is_predefined_tablespace(space));
+	}
+
+	ut_error;
+	return(false);
+}
 #endif /* UNIV_DEBUG */
 
-/**********************************************************//**
-Releases the mlocks and other objects stored in an mtr memo.
-They are released in the order opposite to which they were pushed
-to the memo. */
-static MY_ATTRIBUTE((nonnull))
-void
-mtr_memo_pop_all(
-/*=============*/
-	mtr_t*	mtr)	/*!< in/out: mini-transaction */
+/** Acquire a tablespace X-latch.
+NOTE: use mtr_x_lock_space().
+@param[in]	space_id	tablespace ID
+@param[in]	file		file name from where called
+@param[in]	line		line number in file
+@return the tablespace object (never NULL) */
+fil_space_t*
+mtr_t::x_lock_space(ulint space_id, const char* file, ulint line)
 {
-	ut_ad(mtr->magic_n == MTR_MAGIC_N);
-	ut_ad(mtr->state == MTR_COMMITTING); /* Currently only used in
-					     commit */
+	fil_space_t*	space;
 
-	for (const dyn_block_t* block = dyn_array_get_last_block(&mtr->memo);
-	     block;
-	     block = dyn_array_get_prev_block(&mtr->memo, block)) {
-		const mtr_memo_slot_t*	start
-			= reinterpret_cast<mtr_memo_slot_t*>(
-				dyn_block_get_data(block));
-		mtr_memo_slot_t*	slot
-			= reinterpret_cast<mtr_memo_slot_t*>(
-				dyn_block_get_data(block)
-				+ dyn_block_get_used(block));
+	ut_ad(m_impl.m_magic_n == MTR_MAGIC_N);
+	ut_ad(is_active());
 
-		ut_ad(!(dyn_block_get_used(block) % sizeof(mtr_memo_slot_t)));
+	if (space_id == TRX_SYS_SPACE) {
+		space = m_impl.m_sys_space;
 
-		while (slot-- != start) {
-			if (slot->object != NULL) {
-				mtr_memo_slot_release(mtr, slot);
-			}
+		if (!space) {
+			space = m_impl.m_sys_space = fil_space_get(space_id);
+		}
+	} else if ((space = m_impl.m_user_space) && space_id == space->id) {
+	} else if ((space = m_impl.m_undo_space) && space_id == space->id) {
+	} else if (get_log_mode() == MTR_LOG_NO_REDO) {
+		space = fil_space_get(space_id);
+		ut_ad(space->purpose == FIL_TYPE_TEMPORARY
+		      || space->purpose == FIL_TYPE_IMPORT
+		      || space->redo_skipped_count > 0
+		      || srv_is_tablespace_truncated(space->id));
+	} else {
+		/* called from trx_rseg_create() */
+		space = m_impl.m_undo_space = fil_space_get(space_id);
+	}
+
+	ut_ad(space);
+	ut_ad(space->id == space_id);
+	x_lock(&space->latch, file, line);
+	ut_ad(space->purpose == FIL_TYPE_TEMPORARY
+	      || space->purpose == FIL_TYPE_IMPORT
+	      || space->purpose == FIL_TYPE_TABLESPACE);
+	return(space);
+}
+
+/** Look up the system tablespace. */
+void
+mtr_t::lookup_sys_space()
+{
+	ut_ad(!m_impl.m_sys_space);
+	m_impl.m_sys_space = fil_space_get(TRX_SYS_SPACE);
+	ut_ad(m_impl.m_sys_space);
+}
+
+/** Look up the user tablespace.
+@param[in]	space_id	tablespace ID */
+void
+mtr_t::lookup_user_space(ulint space_id)
+{
+	ut_ad(space_id != TRX_SYS_SPACE);
+	ut_ad(m_impl.m_user_space_id == space_id);
+	ut_ad(!m_impl.m_user_space);
+	m_impl.m_user_space = fil_space_get(space_id);
+	ut_ad(m_impl.m_user_space);
+}
+
+/** Set the tablespace associated with the mini-transaction
+(needed for generating a MLOG_FILE_NAME record)
+@param[in]	space	user or system tablespace */
+void
+mtr_t::set_named_space(fil_space_t* space)
+{
+	ut_ad(m_impl.m_user_space_id == TRX_SYS_SPACE);
+	ut_d(m_impl.m_user_space_id = space->id);
+	if (space->id == TRX_SYS_SPACE) {
+		ut_ad(m_impl.m_sys_space == NULL
+		      || m_impl.m_sys_space == space);
+		m_impl.m_sys_space = space;
+	} else {
+		m_impl.m_user_space = space;
+	}
+}
+
+/** Release an object in the memo stack.
+@return true if released */
+bool
+mtr_t::memo_release(const void* object, ulint type)
+{
+	ut_ad(m_impl.m_magic_n == MTR_MAGIC_N);
+	ut_ad(is_active());
+
+	/* We cannot release a page that has been written to in the
+	middle of a mini-transaction. */
+	ut_ad(!m_impl.m_modifications || type != MTR_MEMO_PAGE_X_FIX);
+
+	Find		find(object, type);
+	Iterate<Find>	iterator(find);
+
+	if (!m_impl.m_memo.for_each_block_in_reverse(iterator)) {
+		memo_slot_release(find.m_slot);
+		return(true);
+	}
+
+	return(false);
+}
+
+/** Release a page latch.
+@param[in]	ptr	pointer to within a page frame
+@param[in]	type	object type: MTR_MEMO_PAGE_X_FIX, ... */
+void
+mtr_t::release_page(const void* ptr, mtr_memo_type_t type)
+{
+	ut_ad(m_impl.m_magic_n == MTR_MAGIC_N);
+	ut_ad(is_active());
+
+	/* We cannot release a page that has been written to in the
+	middle of a mini-transaction. */
+	ut_ad(!m_impl.m_modifications || type != MTR_MEMO_PAGE_X_FIX);
+
+	FindPage		find(ptr, type);
+	Iterate<FindPage>	iterator(find);
+
+	if (!m_impl.m_memo.for_each_block_in_reverse(iterator)) {
+		memo_slot_release(find.get_slot());
+		return;
+	}
+
+	/* The page was not found! */
+	ut_ad(0);
+}
+
+/** Prepare to write the mini-transaction log to the redo log buffer.
+@return number of bytes to write in finish_write() */
+ulint
+mtr_t::Command::prepare_write()
+{
+	switch (m_impl->m_log_mode) {
+	case MTR_LOG_SHORT_INSERTS:
+		ut_ad(0);
+		/* fall through (write no redo log) */
+	case MTR_LOG_NO_REDO:
+	case MTR_LOG_NONE:
+		ut_ad(m_impl->m_log.size() == 0);
+		log_mutex_enter();
+		m_end_lsn = m_start_lsn = log_sys->lsn;
+		return(0);
+	case MTR_LOG_ALL:
+		break;
+	}
+
+	ulint	len	= m_impl->m_log.size();
+	ulint	n_recs	= m_impl->m_n_log_recs;
+	ut_ad(len > 0);
+	ut_ad(n_recs > 0);
+
+	if (len > log_sys->buf_size / 2) {
+		log_buffer_extend((len + 1) * 2);
+	}
+
+	ut_ad(m_impl->m_n_log_recs == n_recs);
+
+	fil_space_t*	space = m_impl->m_user_space;
+
+	if (space != NULL && space->id <= srv_undo_tablespaces_open) {
+		/* Omit MLOG_FILE_NAME for predefined tablespaces. */
+		space = NULL;
+	}
+
+	log_mutex_enter();
+
+	if (fil_names_write_if_was_clean(space, m_impl->m_mtr)) {
+		/* This mini-transaction was the first one to modify
+		this tablespace since the latest checkpoint, so
+		some MLOG_FILE_NAME records were appended to m_log. */
+		ut_ad(m_impl->m_n_log_recs > n_recs);
+		mlog_catenate_ulint(
+			&m_impl->m_log, MLOG_MULTI_REC_END, MLOG_1BYTE);
+		len = m_impl->m_log.size();
+	} else {
+		/* This was not the first time of dirtying a
+		tablespace since the latest checkpoint. */
+
+		ut_ad(n_recs == m_impl->m_n_log_recs);
+
+		if (n_recs <= 1) {
+			ut_ad(n_recs == 1);
+
+			/* Flag the single log record as the
+			only record in this mini-transaction. */
+			*m_impl->m_log.front()->begin()
+				|= MLOG_SINGLE_REC_FLAG;
+		} else {
+			/* Because this mini-transaction comprises
+			multiple log records, append MLOG_MULTI_REC_END
+			at the end. */
+
+			mlog_catenate_ulint(
+				&m_impl->m_log, MLOG_MULTI_REC_END,
+				MLOG_1BYTE);
+			len++;
 		}
 	}
+
+	/* check and attempt a checkpoint if exceeding capacity */
+	log_margin_checkpoint_age(len);
+
+	return(len);
 }
 
-/*****************************************************************//**
-Releases the item in the slot given. */
-static
+/** Append the redo log records to the redo log buffer
+@param[in] len	number of bytes to write */
 void
-mtr_memo_slot_note_modification(
-/*============================*/
-	mtr_t*			mtr,	/*!< in: mtr */
-	mtr_memo_slot_t*	slot)	/*!< in: memo slot */
+mtr_t::Command::finish_write(
+	ulint	len)
 {
-	ut_ad(mtr->modifications);
-	ut_ad(!srv_read_only_mode);
-	ut_ad(mtr->magic_n == MTR_MAGIC_N);
+	ut_ad(m_impl->m_log_mode == MTR_LOG_ALL);
+	ut_ad(log_mutex_own());
+	ut_ad(m_impl->m_log.size() == len);
+	ut_ad(len > 0);
 
-	if (slot->object != NULL && slot->type == MTR_MEMO_PAGE_X_FIX) {
-		buf_block_t*	block = (buf_block_t*) slot->object;
+	if (m_impl->m_log.is_small()) {
+		const mtr_buf_t::block_t*	front = m_impl->m_log.front();
+		ut_ad(len <= front->used());
 
-		ut_ad(!mtr->made_dirty || log_flush_order_mutex_own());
-		buf_flush_note_modification(block, mtr);
-	}
-}
+		m_end_lsn = log_reserve_and_write_fast(
+			front->begin(), len, &m_start_lsn);
 
-/**********************************************************//**
-Add the modified pages to the buffer flush list. They are released
-in the order opposite to which they were pushed to the memo. NOTE! It is
-essential that the x-rw-lock on a modified buffer page is not released
-before buf_page_note_modification is called for that page! Otherwise,
-some thread might race to modify it, and the flush list sort order on
-lsn would be destroyed. */
-static
-void
-mtr_memo_note_modifications(
-/*========================*/
-	mtr_t*	mtr)	/*!< in: mtr */
-{
-	ut_ad(!srv_read_only_mode);
-	ut_ad(mtr->magic_n == MTR_MAGIC_N);
-	ut_ad(mtr->state == MTR_COMMITTING); /* Currently only used in
-					     commit */
-
-	for (const dyn_block_t* block = dyn_array_get_last_block(&mtr->memo);
-	     block;
-	     block = dyn_array_get_prev_block(&mtr->memo, block)) {
-		const mtr_memo_slot_t*	start
-			= reinterpret_cast<mtr_memo_slot_t*>(
-				dyn_block_get_data(block));
-		mtr_memo_slot_t*	slot
-			= reinterpret_cast<mtr_memo_slot_t*>(
-				dyn_block_get_data(block)
-				+ dyn_block_get_used(block));
-
-		ut_ad(!(dyn_block_get_used(block) % sizeof(mtr_memo_slot_t)));
-
-		while (slot-- != start) {
-			if (slot->object != NULL) {
-				mtr_memo_slot_note_modification(mtr, slot);
-			}
+		if (m_end_lsn > 0) {
+			return;
 		}
 	}
+
+	/* Open the database log for log_write_low */
+	m_start_lsn = log_reserve_and_open(len);
+
+	mtr_write_log_t	write_log;
+	m_impl->m_log.for_each_block(write_log);
+
+	m_end_lsn = log_close();
 }
 
-/************************************************************//**
-Append the dirty pages to the flush list. */
-static
+/** Release the latches and blocks acquired by this mini-transaction */
 void
-mtr_add_dirtied_pages_to_flush_list(
-/*================================*/
-	mtr_t*	mtr)	/*!< in/out: mtr */
+mtr_t::Command::release_all()
 {
-	ut_ad(!srv_read_only_mode);
+	ReleaseAll release;
+	Iterate<ReleaseAll> iterator(release);
 
-	/* No need to acquire log_flush_order_mutex if this mtr has
-	not dirtied a clean page. log_flush_order_mutex is used to
-	ensure ordered insertions in the flush_list. We need to
-	insert in the flush_list iff the page in question was clean
-	before modifications. */
-	if (mtr->made_dirty) {
+	m_impl->m_memo.for_each_block_in_reverse(iterator);
+
+	/* Note that we have released the latches. */
+	m_locks_released = 1;
+}
+
+/** Release the latches acquired by this mini-transaction */
+void
+mtr_t::Command::release_latches()
+{
+	ReleaseLatches release;
+	Iterate<ReleaseLatches> iterator(release);
+
+	m_impl->m_memo.for_each_block_in_reverse(iterator);
+
+	/* Note that we have released the latches. */
+	m_locks_released = 1;
+}
+
+/** Release the blocks used in this mini-transaction */
+void
+mtr_t::Command::release_blocks()
+{
+	ReleaseBlocks release(m_start_lsn, m_end_lsn, m_impl->m_flush_observer);
+	Iterate<ReleaseBlocks> iterator(release);
+
+	m_impl->m_memo.for_each_block_in_reverse(iterator);
+}
+
+/** Write the redo log record, add dirty pages to the flush list and release
+the resources. */
+void
+mtr_t::Command::execute()
+{
+	ut_ad(m_impl->m_log_mode != MTR_LOG_NONE);
+
+	if (const ulint len = prepare_write()) {
+		finish_write(len);
+	}
+
+	if (m_impl->m_made_dirty) {
 		log_flush_order_mutex_enter();
 	}
 
 	/* It is now safe to release the log mutex because the
 	flush_order mutex will ensure that we are the first one
 	to insert into the flush list. */
-	log_release();
+	log_mutex_exit();
 
-	if (mtr->modifications) {
-		mtr_memo_note_modifications(mtr);
-	}
+	m_impl->m_mtr->m_commit_lsn = m_end_lsn;
 
-	if (mtr->made_dirty) {
+	release_blocks();
+
+	if (m_impl->m_made_dirty) {
 		log_flush_order_mutex_exit();
 	}
+
+	release_latches();
+
+	release_resources();
 }
 
-/************************************************************//**
-Writes the contents of a mini-transaction log, if any, to the database log. */
-static
+/** Release the free extents that was reserved using
+fsp_reserve_free_extents().  This is equivalent to calling
+fil_space_release_free_extents().  This is intended for use
+with index pages.
+@param[in]	n_reserved	number of reserved extents */
 void
-mtr_log_reserve_and_write(
-/*======================*/
-	mtr_t*	mtr)	/*!< in/out: mtr */
+mtr_t::release_free_extents(ulint n_reserved)
 {
-	dyn_array_t*	mlog;
-	ulint		data_size;
-	byte*		first_data;
+	fil_space_t*	space;
 
-	ut_ad(!srv_read_only_mode);
+	ut_ad(m_impl.m_undo_space == NULL);
 
-	mlog = &(mtr->log);
+	if (m_impl.m_user_space != NULL) {
 
-	first_data = dyn_block_get_data(mlog);
+		ut_ad(m_impl.m_user_space->id
+		      == m_impl.m_user_space_id);
+		ut_ad(memo_contains(get_memo(), &m_impl.m_user_space->latch,
+				    MTR_MEMO_X_LOCK));
 
-	if (mtr->n_log_recs > 1) {
-		mlog_catenate_ulint(mtr, MLOG_MULTI_REC_END, MLOG_1BYTE);
+		space = m_impl.m_user_space;
 	} else {
-		*first_data = (byte)((ulint)*first_data
-				     | MLOG_SINGLE_REC_FLAG);
+
+		ut_ad(m_impl.m_sys_space->id == TRX_SYS_SPACE);
+		ut_ad(memo_contains(get_memo(), &m_impl.m_sys_space->latch,
+				    MTR_MEMO_X_LOCK));
+
+		space = m_impl.m_sys_space;
 	}
 
-	if (mlog->heap == NULL) {
-		ulint	len;
-
-		len = mtr->log_mode != MTR_LOG_NO_REDO
-			? dyn_block_get_used(mlog) : 0;
-
-		mtr->end_lsn = log_reserve_and_write_fast(
-			first_data, len, &mtr->start_lsn);
-
-		if (mtr->end_lsn) {
-
-			/* Success. We have the log mutex.
-			Add pages to flush list and exit */
-			mtr_add_dirtied_pages_to_flush_list(mtr);
-
-			return;
-		}
-	}
-
-	data_size = dyn_array_get_data_size(mlog);
-
-	/* Open the database log for log_write_low */
-	mtr->start_lsn = log_reserve_and_open(data_size);
-
-	if (mtr->log_mode == MTR_LOG_ALL) {
-
-		for (dyn_block_t* block = mlog;
-		     block != 0;
-		     block = dyn_array_get_next_block(mlog, block)) {
-
-			log_write_low(
-				dyn_block_get_data(block),
-				dyn_block_get_used(block));
-		}
-
-	} else {
-		ut_ad(mtr->log_mode == MTR_LOG_NONE
-		      || mtr->log_mode == MTR_LOG_NO_REDO);
-		/* Do nothing */
-	}
-
-	mtr->end_lsn = log_close();
-
-	mtr_add_dirtied_pages_to_flush_list(mtr);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/***************************************************************//**
-Commits a mini-transaction. */
-UNIV_INTERN
-void
-mtr_commit(
-/*=======*/
-	mtr_t*	mtr)	/*!< in: mini-transaction */
-{
-	ut_ad(mtr);
-	ut_ad(mtr->magic_n == MTR_MAGIC_N);
-	ut_ad(mtr->state == MTR_ACTIVE);
-	ut_ad(!mtr->inside_ibuf);
-	ut_d(mtr->state = MTR_COMMITTING);
-
-#ifndef UNIV_HOTBACKUP
-	/* This is a dirty read, for debugging. */
-	ut_ad(!recv_no_log_write);
-
-	if (mtr->modifications && mtr->n_log_recs) {
-		ut_ad(!srv_read_only_mode);
-		mtr_log_reserve_and_write(mtr);
-	}
-
-	mtr_memo_pop_all(mtr);
-#endif /* !UNIV_HOTBACKUP */
-
-	dyn_array_free(&(mtr->memo));
-	dyn_array_free(&(mtr->log));
-#ifdef UNIV_DEBUG_VALGRIND
-	/* Declare everything uninitialized except
-	mtr->start_lsn, mtr->end_lsn and mtr->state. */
-	{
-		lsn_t	start_lsn	= mtr->start_lsn;
-		lsn_t	end_lsn		= mtr->end_lsn;
-		UNIV_MEM_INVALID(mtr, sizeof *mtr);
-		mtr->start_lsn = start_lsn;
-		mtr->end_lsn = end_lsn;
-	}
-#endif /* UNIV_DEBUG_VALGRIND */
-	ut_d(mtr->state = MTR_COMMITTED);
-}
-
-#ifndef UNIV_HOTBACKUP
-/***************************************************//**
-Releases an object in the memo stack.
-@return true if released */
-UNIV_INTERN
-bool
-mtr_memo_release(
-/*=============*/
-	mtr_t*	mtr,	/*!< in/out: mini-transaction */
-	void*	object,	/*!< in: object */
-	ulint	type)	/*!< in: object type: MTR_MEMO_S_LOCK, ... */
-{
-	ut_ad(mtr->magic_n == MTR_MAGIC_N);
-	ut_ad(mtr->state == MTR_ACTIVE);
-	/* We cannot release a page that has been written to in the
-	middle of a mini-transaction. */
-	ut_ad(!mtr->modifications || type != MTR_MEMO_PAGE_X_FIX);
-
-	for (const dyn_block_t* block = dyn_array_get_last_block(&mtr->memo);
-	     block;
-	     block = dyn_array_get_prev_block(&mtr->memo, block)) {
-		const mtr_memo_slot_t*	start
-			= reinterpret_cast<mtr_memo_slot_t*>(
-				dyn_block_get_data(block));
-		mtr_memo_slot_t*	slot
-			= reinterpret_cast<mtr_memo_slot_t*>(
-				dyn_block_get_data(block)
-				+ dyn_block_get_used(block));
-
-		ut_ad(!(dyn_block_get_used(block) % sizeof(mtr_memo_slot_t)));
-
-		while (slot-- != start) {
-			if (object == slot->object && type == slot->type) {
-				mtr_memo_slot_release(mtr, slot);
-				return(true);
-			}
-		}
-	}
-
-	return(false);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/********************************************************//**
-Reads 1 - 4 bytes from a file page buffered in the buffer pool.
-@return	value read */
-UNIV_INTERN
-ulint
-mtr_read_ulint(
-/*===========*/
-	const byte*	ptr,	/*!< in: pointer from where to read */
-	ulint		type,	/*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
-	mtr_t*		mtr MY_ATTRIBUTE((unused)))
-				/*!< in: mini-transaction handle */
-{
-	ut_ad(mtr->state == MTR_ACTIVE);
-	ut_ad(mtr_memo_contains_page(mtr, ptr, MTR_MEMO_PAGE_S_FIX)
-	      || mtr_memo_contains_page(mtr, ptr, MTR_MEMO_PAGE_X_FIX));
-
-	return(mach_read_ulint(ptr, type));
+	space->release_free_extents(n_reserved);
 }
 
 #ifdef UNIV_DEBUG
-# ifndef UNIV_HOTBACKUP
-/**********************************************************//**
-Checks if memo contains the given page.
-@return	TRUE if contains */
-UNIV_INTERN
-ibool
-mtr_memo_contains_page(
-/*===================*/
-	mtr_t*		mtr,	/*!< in: mtr */
-	const byte*	ptr,	/*!< in: pointer to buffer frame */
-	ulint		type)	/*!< in: type of object */
+/** Check if memo contains the given item.
+@return	true if contains */
+bool
+mtr_t::memo_contains(
+	mtr_buf_t*	memo,
+	const void*	object,
+	ulint		type)
 {
-	return(mtr_memo_contains(mtr, buf_block_align(ptr), type));
+	Find		find(object, type);
+	Iterate<Find>	iterator(find);
+
+	return(!memo->for_each_block_in_reverse(iterator));
 }
 
-/*********************************************************//**
-Prints info of an mtr handle. */
-UNIV_INTERN
-void
-mtr_print(
-/*======*/
-	mtr_t*	mtr)	/*!< in: mtr */
+/** Debug check for flags */
+struct FlaggedCheck {
+	FlaggedCheck(const void* ptr, ulint flags)
+		:
+		m_ptr(ptr),
+		m_flags(flags)
+	{
+		// Do nothing
+	}
+
+	bool operator()(const mtr_memo_slot_t* slot) const
+	{
+		if (m_ptr == slot->object && (m_flags & slot->type)) {
+			return(false);
+		}
+
+		return(true);
+	}
+
+	const void*	m_ptr;
+	ulint		m_flags;
+};
+
+/** Check if memo contains the given item.
+@param object		object to search
+@param flags		specify types of object (can be ORred) of
+			MTR_MEMO_PAGE_S_FIX ... values
+@return true if contains */
+bool
+mtr_t::memo_contains_flagged(const void* ptr, ulint flags) const
 {
-	fprintf(stderr,
-		"Mini-transaction handle: memo size %lu bytes"
-		" log size %lu bytes\n",
-		(ulong) dyn_array_get_data_size(&(mtr->memo)),
-		(ulong) dyn_array_get_data_size(&(mtr->log)));
+	ut_ad(m_impl.m_magic_n == MTR_MAGIC_N);
+	ut_ad(is_committing() || is_active());
+
+	FlaggedCheck		check(ptr, flags);
+	Iterate<FlaggedCheck>	iterator(check);
+
+	return(!m_impl.m_memo.for_each_block_in_reverse(iterator));
 }
-# endif /* !UNIV_HOTBACKUP */
+
+/** Check if memo contains the given page.
+@param[in]	ptr	pointer to within buffer frame
+@param[in]	flags	specify types of object with OR of
+			MTR_MEMO_PAGE_S_FIX... values
+@return	the block
+@retval	NULL	if not found */
+buf_block_t*
+mtr_t::memo_contains_page_flagged(
+	const byte*	ptr,
+	ulint		flags) const
+{
+	FindPage		check(ptr, flags);
+	Iterate<FindPage>	iterator(check);
+
+	return(m_impl.m_memo.for_each_block_in_reverse(iterator)
+	       ? NULL : check.get_block());
+}
+
+/** Mark the given latched page as modified.
+@param[in]	ptr	pointer to within buffer frame */
+void
+mtr_t::memo_modify_page(const byte* ptr)
+{
+	buf_block_t*	block = memo_contains_page_flagged(
+		ptr, MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX);
+	ut_ad(block != NULL);
+
+	if (!memo_contains(get_memo(), block, MTR_MEMO_MODIFY)) {
+		memo_push(block, MTR_MEMO_MODIFY);
+	}
+}
+
+/** Print info of an mtr handle. */
+void
+mtr_t::print() const
+{
+	ib::info() << "Mini-transaction handle: memo size "
+		<< m_impl.m_memo.size() << " bytes log size "
+		<< get_log()->size() << " bytes";
+}
+
 #endif /* UNIV_DEBUG */
-
-/**********************************************************//**
-Releases a buf_page stored in an mtr memo after a
-savepoint. */
-UNIV_INTERN
-void
-mtr_release_buf_page_at_savepoint(
-/*=============================*/
-	mtr_t*		mtr,		/*!< in: mtr */
-	ulint		savepoint,	/*!< in: savepoint */
-	buf_block_t*	block)		/*!< in: block to release */
-{
-	mtr_memo_slot_t* slot;
-	dyn_array_t*	memo;
-
-	ut_ad(mtr);
-	ut_ad(mtr->magic_n == MTR_MAGIC_N);
-	ut_ad(mtr->state == MTR_ACTIVE);
-
-	memo = &(mtr->memo);
-
-	ut_ad(dyn_array_get_data_size(memo) > savepoint);
-
-	slot = (mtr_memo_slot_t*) dyn_array_get_element(memo, savepoint);
-
-	ut_ad(slot->object == block);
-	ut_ad(slot->type == MTR_MEMO_PAGE_S_FIX ||
-	      slot->type == MTR_MEMO_PAGE_X_FIX ||
-	      slot->type == MTR_MEMO_BUF_FIX);
-
-	buf_page_release((buf_block_t*) slot->object, slot->type);
-	slot->object = NULL;
-}
diff --git a/storage/innobase/os/os0event.cc b/storage/innobase/os/os0event.cc
new file mode 100644
index 00000000000..b687af3e21c
--- /dev/null
+++ b/storage/innobase/os/os0event.cc
@@ -0,0 +1,550 @@
+/*****************************************************************************
+
+Copyright (c) 2013, 2015, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file os/os0event.cc
+The interface to the operating system condition variables.
+
+Created 2012-09-23 Sunny Bains
+*******************************************************/
+
+#include "os0event.h"
+#include "ut0mutex.h"
+#include "ha_prototypes.h"
+#include "ut0new.h"
+
+#ifdef _WIN32
+#include <windows.h>
+#include <synchapi.h>
+#endif /* _WIN32 */
+
+#include <list>
+
+/** The number of microsecnds in a second. */
+static const ulint MICROSECS_IN_A_SECOND = 1000000;
+
+#ifdef _WIN32
+/** Native condition variable. */
+typedef CONDITION_VARIABLE	os_cond_t;
+#else
+/** Native condition variable */
+typedef pthread_cond_t		os_cond_t;
+#endif /* _WIN32 */
+
+typedef std::list<os_event_t, ut_allocator<os_event_t> >	os_event_list_t;
+typedef os_event_list_t::iterator				event_iter_t;
+
+/** InnoDB condition variable. */
+struct os_event {
+	os_event(const char* name) UNIV_NOTHROW;
+
+	~os_event() UNIV_NOTHROW;
+
+	/**
+	Destroys a condition variable */
+	void destroy() UNIV_NOTHROW
+	{
+#ifndef _WIN32
+		int	ret = pthread_cond_destroy(&cond_var);
+		ut_a(ret == 0);
+#endif /* !_WIN32 */
+
+		mutex.destroy();
+	}
+
+	/** Set the event */
+	void set() UNIV_NOTHROW
+	{
+		mutex.enter();
+
+		if (!m_set) {
+			broadcast();
+		}
+
+		mutex.exit();
+	}
+
+	int64_t reset() UNIV_NOTHROW
+	{
+		mutex.enter();
+
+		if (m_set) {
+			m_set = false;
+		}
+
+		int64_t	ret = signal_count;
+
+		mutex.exit();
+
+		return(ret);
+	}
+
+	/**
+	Waits for an event object until it is in the signaled state.
+
+	Typically, if the event has been signalled after the os_event_reset()
+	we'll return immediately because event->m_set == true.
+	There are, however, situations (e.g.: sync_array code) where we may
+	lose this information. For example:
+
+	thread A calls os_event_reset()
+	thread B calls os_event_set()   [event->m_set == true]
+	thread C calls os_event_reset() [event->m_set == false]
+	thread A calls os_event_wait()  [infinite wait!]
+	thread C calls os_event_wait()  [infinite wait!]
+
+	Where such a scenario is possible, to avoid infinite wait, the
+	value returned by reset() should be passed in as
+	reset_sig_count. */
+	void wait_low(int64_t reset_sig_count) UNIV_NOTHROW;
+
+	/**
+	Waits for an event object until it is in the signaled state or
+	a timeout is exceeded.
+	@param time_in_usec - timeout in microseconds,
+			or OS_SYNC_INFINITE_TIME
+	@param reset_sig_count- zero or the value returned by
+			previous call of os_event_reset().
+	@return	0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */
+	ulint wait_time_low(
+		ulint		time_in_usec,
+		int64_t		reset_sig_count) UNIV_NOTHROW;
+
+	/** @return true if the event is in the signalled state. */
+	bool is_set() const UNIV_NOTHROW
+	{
+		return(m_set);
+	}
+
+private:
+	/**
+	Initialize a condition variable */
+	void init() UNIV_NOTHROW
+	{
+
+		mutex.init();
+
+#ifdef _WIN32
+		InitializeConditionVariable(&cond_var);
+#else
+		{
+			int	ret;
+
+			ret = pthread_cond_init(&cond_var, NULL);
+			ut_a(ret == 0);
+		}
+#endif /* _WIN32 */
+	}
+
+	/**
+	Wait on condition variable */
+	void wait() UNIV_NOTHROW
+	{
+#ifdef _WIN32
+		if (!SleepConditionVariableCS(&cond_var, mutex, INFINITE)) {
+			ut_error;
+		}
+#else
+		{
+			int	ret;
+
+			ret = pthread_cond_wait(&cond_var, mutex);
+			ut_a(ret == 0);
+		}
+#endif /* _WIN32 */
+	}
+
+	/**
+	Wakes all threads waiting for condition variable */
+	void broadcast() UNIV_NOTHROW
+	{
+		m_set = true;
+		++signal_count;
+
+#ifdef _WIN32
+		WakeAllConditionVariable(&cond_var);
+#else
+		{
+			int	ret;
+
+			ret = pthread_cond_broadcast(&cond_var);
+			ut_a(ret == 0);
+		}
+#endif /* _WIN32 */
+	}
+
+	/**
+	Wakes one thread waiting for condition variable */
+	void signal() UNIV_NOTHROW
+	{
+#ifdef _WIN32
+		WakeConditionVariable(&cond_var);
+#else
+		{
+			int	ret;
+
+			ret = pthread_cond_signal(&cond_var);
+			ut_a(ret == 0);
+		}
+#endif /* _WIN32 */
+	}
+
+	/**
+	Do a timed wait on condition variable.
+	@param abstime - timeout
+	@param time_in_ms - timeout in milliseconds.
+	@return true if timed out, false otherwise */
+	bool timed_wait(
+#ifndef _WIN32
+		const timespec*	abstime
+#else
+		DWORD		time_in_ms
+#endif /* !_WIN32 */
+	);
+
+private:
+	bool			m_set;		/*!< this is true when the
+						event is in the signaled
+						state, i.e., a thread does
+						not stop if it tries to wait
+						for this event */
+	int64_t			signal_count;	/*!< this is incremented
+						each time the event becomes
+						signaled */
+	EventMutex		mutex;		/*!< this mutex protects
+						the next fields */
+
+
+	os_cond_t		cond_var;	/*!< condition variable is
+						used in waiting for the event */
+
+public:
+	event_iter_t		event_iter;	/*!< For O(1) removal from
+						list */
+protected:
+	// Disable copying
+	os_event(const os_event&);
+	os_event& operator=(const os_event&);
+};
+
+/**
+Do a timed wait on condition variable.
+@param abstime - absolute time to wait
+@param time_in_ms - timeout in milliseconds
+@return true if timed out */
+bool
+os_event::timed_wait(
+#ifndef _WIN32
+	const timespec*	abstime
+#else
+	DWORD		time_in_ms
+#endif /* !_WIN32 */
+)
+{
+#ifdef _WIN32
+	BOOL		ret;
+
+	ret = SleepConditionVariableCS(&cond_var, mutex, time_in_ms);
+
+	if (!ret) {
+		DWORD	err = GetLastError();
+
+		/* FQDN=msdn.microsoft.com
+		@see http://$FQDN/en-us/library/ms686301%28VS.85%29.aspx,
+
+		"Condition variables are subject to spurious wakeups
+		(those not associated with an explicit wake) and stolen wakeups
+		(another thread manages to run before the woken thread)."
+		Check for both types of timeouts.
+		Conditions are checked by the caller.*/
+		if (err == WAIT_TIMEOUT || err == ERROR_TIMEOUT) {
+			return(true);
+		}
+	}
+
+	ut_a(ret);
+
+	return(false);
+#else
+	int	ret;
+
+	ret = pthread_cond_timedwait(&cond_var, mutex, abstime);
+
+	switch (ret) {
+	case 0:
+	case ETIMEDOUT:
+		/* We play it safe by checking for EINTR even though
+		according to the POSIX documentation it can't return EINTR. */
+	case EINTR:
+		break;
+
+	default:
+		ib::error() << "pthread_cond_timedwait() returned: " << ret
+			<< ": abstime={" << abstime->tv_sec << ","
+			<< abstime->tv_nsec << "}";
+		ut_error;
+	}
+
+	return(ret == ETIMEDOUT);
+#endif /* _WIN32 */
+}
+
+/**
+Waits for an event object until it is in the signaled state.
+
+Typically, if the event has been signalled after the os_event_reset()
+we'll return immediately because event->m_set == true.
+There are, however, situations (e.g.: sync_array code) where we may
+lose this information. For example:
+
+thread A calls os_event_reset()
+thread B calls os_event_set()   [event->m_set == true]
+thread C calls os_event_reset() [event->m_set == false]
+thread A calls os_event_wait()  [infinite wait!]
+thread C calls os_event_wait()  [infinite wait!]
+
+Where such a scenario is possible, to avoid infinite wait, the
+value returned by reset() should be passed in as
+reset_sig_count. */
+void
+os_event::wait_low(
+	int64_t		reset_sig_count) UNIV_NOTHROW
+{
+	mutex.enter();
+
+	if (!reset_sig_count) {
+		reset_sig_count = signal_count;
+	}
+
+	while (!m_set && signal_count == reset_sig_count) {
+
+		wait();
+
+		/* Spurious wakeups may occur: we have to check if the
+		event really has been signaled after we came here to wait. */
+	}
+
+	mutex.exit();
+}
+
+/**
+Waits for an event object until it is in the signaled state or
+a timeout is exceeded.
+@param time_in_usec - timeout in microseconds, or OS_SYNC_INFINITE_TIME
+@param reset_sig_count - zero or the value returned by previous call
+	of os_event_reset().
+@return	0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */
+ulint
+os_event::wait_time_low(
+	ulint		time_in_usec,
+	int64_t		reset_sig_count) UNIV_NOTHROW
+{
+	bool		timed_out = false;
+
+#ifdef _WIN32
+	DWORD		time_in_ms;
+
+	if (time_in_usec != OS_SYNC_INFINITE_TIME) {
+		time_in_ms = DWORD(time_in_usec / 1000);
+	} else {
+		time_in_ms = INFINITE;
+	}
+#else
+	struct timespec	abstime;
+
+	if (time_in_usec != OS_SYNC_INFINITE_TIME) {
+		struct timeval	tv;
+		int		ret;
+		ulint		sec;
+		ulint		usec;
+
+		ret = ut_usectime(&sec, &usec);
+		ut_a(ret == 0);
+
+		tv.tv_sec = sec;
+		tv.tv_usec = usec;
+
+		tv.tv_usec += time_in_usec;
+
+		if ((ulint) tv.tv_usec >= MICROSECS_IN_A_SECOND) {
+			tv.tv_sec += tv.tv_usec / MICROSECS_IN_A_SECOND;
+			tv.tv_usec %= MICROSECS_IN_A_SECOND;
+		}
+
+		abstime.tv_sec  = tv.tv_sec;
+		abstime.tv_nsec = tv.tv_usec * 1000;
+	} else {
+		abstime.tv_nsec = 999999999;
+		abstime.tv_sec = (time_t) ULINT_MAX;
+	}
+
+	ut_a(abstime.tv_nsec <= 999999999);
+
+#endif /* _WIN32 */
+
+	mutex.enter();
+
+	if (!reset_sig_count) {
+		reset_sig_count = signal_count;
+	}
+
+	do {
+		if (m_set || signal_count != reset_sig_count) {
+
+			break;
+		}
+
+#ifndef _WIN32
+		timed_out = timed_wait(&abstime);
+#else
+		timed_out = timed_wait(time_in_ms);
+#endif /* !_WIN32 */
+
+	} while (!timed_out);
+
+	mutex.exit();
+
+	return(timed_out ? OS_SYNC_TIME_EXCEEDED : 0);
+}
+
+/** Constructor */
+os_event::os_event(const char* name) UNIV_NOTHROW
+{
+	init();
+
+	m_set = false;
+
+	/* We return this value in os_event_reset(),
+	which can then be be used to pass to the
+	os_event_wait_low(). The value of zero is
+	reserved in os_event_wait_low() for the case
+	when the caller does not want to pass any
+	signal_count value. To distinguish between
+	the two cases we initialize signal_count
+	to 1 here. */
+
+	signal_count = 1;
+}
+
+/** Destructor */
+os_event::~os_event() UNIV_NOTHROW
+{
+	destroy();
+}
+
+/**
+Creates an event semaphore, i.e., a semaphore which may just have two
+states: signaled and nonsignaled. The created event is manual reset: it
+must be reset explicitly by calling sync_os_reset_event.
+@return	the event handle */
+os_event_t
+os_event_create(
+/*============*/
+	const char*	name)			/*!< in: the name of the
+						event, if NULL the event
+						is created without a name */
+{
+	return(UT_NEW_NOKEY(os_event(name)));
+}
+
+/**
+Check if the event is set.
+@return true if set */
+bool
+os_event_is_set(
+/*============*/
+	const os_event_t	event)		/*!< in: event to test */
+{
+	return(event->is_set());
+}
+
+/**
+Sets an event semaphore to the signaled state: lets waiting threads
+proceed. */
+void
+os_event_set(
+/*=========*/
+	os_event_t	event)			/*!< in/out: event to set */
+{
+	event->set();
+}
+
+/**
+Resets an event semaphore to the nonsignaled state. Waiting threads will
+stop to wait for the event.
+The return value should be passed to os_even_wait_low() if it is desired
+that this thread should not wait in case of an intervening call to
+os_event_set() between this os_event_reset() and the
+os_event_wait_low() call. See comments for os_event_wait_low().
+@return	current signal_count. */
+int64_t
+os_event_reset(
+/*===========*/
+	os_event_t	event)			/*!< in/out: event to reset */
+{
+	return(event->reset());
+}
+
+/**
+Waits for an event object until it is in the signaled state or
+a timeout is exceeded.
+@return	0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */
+ulint
+os_event_wait_time_low(
+/*===================*/
+	os_event_t	event,			/*!< in/out: event to wait */
+	ulint		time_in_usec,		/*!< in: timeout in
+						microseconds, or
+						OS_SYNC_INFINITE_TIME */
+	int64_t		reset_sig_count)	/*!< in: zero or the value
+						returned by previous call of
+						os_event_reset(). */
+{
+	return(event->wait_time_low(time_in_usec, reset_sig_count));
+}
+
+/**
+Waits for an event object until it is in the signaled state.
+
+Where such a scenario is possible, to avoid infinite wait, the
+value returned by os_event_reset() should be passed in as
+reset_sig_count. */
+void
+os_event_wait_low(
+/*==============*/
+	os_event_t	event,			/*!< in: event to wait */
+	int64_t		reset_sig_count)	/*!< in: zero or the value
+						returned by previous call of
+						os_event_reset(). */
+{
+	event->wait_low(reset_sig_count);
+}
+
+/**
+Frees an event object. */
+void
+os_event_destroy(
+/*=============*/
+	os_event_t&	event)			/*!< in/own: event to free */
+
+{
+	if (event != NULL) {
+		UT_DELETE(event);
+		event = NULL;
+	}
+}
diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc
index e21ffa2defa..3cd92282aa0 100644
--- a/storage/innobase/os/os0file.cc
+++ b/storage/innobase/os/os0file.cc
@@ -33,41 +33,61 @@ The interface to the operating system file i/o primitives
 Created 10/21/1995 Heikki Tuuri
 *******************************************************/
 
+#ifndef UNIV_INNOCHECKSUM
+
+#include "ha_prototypes.h"
+#include "sql_const.h"
+
 #include "os0file.h"
 
 #ifdef UNIV_NONINL
 #include "os0file.ic"
 #endif
 
-#include "ut0mem.h"
 #include "srv0srv.h"
 #include "srv0start.h"
 #include "fil0fil.h"
 #include "fil0crypt.h"
 #include "fsp0fsp.h"
 #include "fil0pagecompress.h"
-#include "buf0buf.h"
-#include "srv0mon.h"
 #include "srv0srv.h"
 #ifdef HAVE_LINUX_UNISTD_H
 #include "unistd.h"
 #endif
 #ifndef UNIV_HOTBACKUP
-# include "os0sync.h"
+# include "os0event.h"
 # include "os0thread.h"
 #else /* !UNIV_HOTBACKUP */
-# ifdef __WIN__
+# ifdef _WIN32
 /* Add includes for the _stat() call to compile on Windows */
 #  include <sys/types.h>
 #  include <sys/stat.h>
 #  include <errno.h>
-# endif /* __WIN__ */
+# endif /* _WIN32 */
 #endif /* !UNIV_HOTBACKUP */
 
-#if defined(LINUX_NATIVE_AIO)
+#include <vector>
+
+#ifdef LINUX_NATIVE_AIO
 #include <libaio.h>
+#endif /* LINUX_NATIVE_AIO */
+
+#ifdef HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE
+# include <fcntl.h>
+# include <linux/falloc.h>
+#endif /* HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE */
+
+#ifdef HAVE_LZ4
+#include <lz4.h>
 #endif
 
+#include <zlib.h>
+
+#ifdef UNIV_DEBUG
+/** Set when InnoDB has invoked exit(). */
+bool	innodb_calling_exit;
+#endif /* UNIV_DEBUG */
+
 #if defined(UNIV_LINUX) && defined(HAVE_SYS_IOCTL_H)
 # include <sys/ioctl.h>
 # ifndef DFS_IOCTL_ATOMIC_WRITE_SET
@@ -102,27 +122,59 @@ static const ulint IO_IBUF_SEGMENT = 0;
 /** Log segment id */
 static const ulint IO_LOG_SEGMENT = 1;
 
+/** Number of retries for partial I/O's */
+static const ulint NUM_RETRIES_ON_PARTIAL_IO = 10;
+
+/** Blocks for doing IO, used in the transparent compression
+and encryption code. */
+struct Block {
+	/** Default constructor */
+	Block() : m_ptr(), m_in_use() { }
+
+	byte*		m_ptr;
+
+	byte		pad[CACHE_LINE_SIZE - sizeof(ulint)];
+	lock_word_t	m_in_use;
+};
+
+/** For storing the allocated blocks */
+typedef std::vector<Block> Blocks;
+
+/** Block collection */
+static Blocks*	block_cache;
+
+/** Number of blocks to allocate for sync read/writes */
+static const size_t	MAX_BLOCKS = 128;
+
+/** Block buffer size */
+#define BUFFER_BLOCK_SIZE ((ulint)(UNIV_PAGE_SIZE * 1.3))
+
 /* This specifies the file permissions InnoDB uses when it creates files in
 Unix; the value of os_innodb_umask is initialized in ha_innodb.cc to
 my_umask */
 
-#ifndef __WIN__
+#ifndef _WIN32
 /** Umask for creating files */
-UNIV_INTERN ulint	os_innodb_umask = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
+static ulint	os_innodb_umask = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
 #else
 /** Umask for creating files */
-UNIV_INTERN ulint	os_innodb_umask	= 0;
+static ulint	os_innodb_umask	= 0;
+#ifndef ECANCELED
 #define ECANCELED  125
-#endif /* __WIN__ */
+#endif
+static HANDLE	completion_port;
+static HANDLE	read_completion_port;
+static DWORD	fls_sync_io  = FLS_OUT_OF_INDEXES;
+#define IOCP_SHUTDOWN_KEY (ULONG_PTR)-1
+#endif /* _WIN32 */
 
 #ifndef UNIV_HOTBACKUP
-/* We use these mutexes to protect lseek + file i/o operation, if the
-OS does not provide an atomic pread or pwrite, or similar */
-#define OS_FILE_N_SEEK_MUTEXES	16
-UNIV_INTERN os_ib_mutex_t	os_file_seek_mutexes[OS_FILE_N_SEEK_MUTEXES];
 
-/* In simulated aio, merge at most this many consecutive i/os */
-#define OS_AIO_MERGE_N_CONSECUTIVE	64
+/** In simulated aio, merge at most this many consecutive i/os */
+static const ulint	OS_AIO_MERGE_N_CONSECUTIVE = 64;
+
+/** Flag indicating if the page_cleaner is in active state. */
+extern bool buf_page_cleaner_is_active;
 
 #ifdef WITH_INNODB_DISALLOW_WRITES
 #define WAIT_ALLOW_WRITES() os_event_wait(srv_allow_writes_event)
@@ -135,25 +187,25 @@ UNIV_INTERN os_ib_mutex_t	os_file_seek_mutexes[OS_FILE_N_SEEK_MUTEXES];
 InnoDB AIO Implementation:
 =========================
 
-We support native AIO for windows and linux. For rest of the platforms
-we simulate AIO by special io-threads servicing the IO-requests.
+We support native AIO for Windows and Linux. For rest of the platforms
+we simulate AIO by special IO-threads servicing the IO-requests.
 
 Simulated AIO:
 ==============
 
-In platforms where we 'simulate' AIO following is a rough explanation
+On platforms where we 'simulate' AIO, the following is a rough explanation
 of the high level design.
 There are four io-threads (for ibuf, log, read, write).
 All synchronous IO requests are serviced by the calling thread using
 os_file_write/os_file_read. The Asynchronous requests are queued up
 in an array (there are four such arrays) by the calling thread.
-Later these requests are picked up by the io-thread and are serviced
+Later these requests are picked up by the IO-thread and are serviced
 synchronously.
 
 Windows native AIO:
 ==================
 
-If srv_use_native_aio is not set then windows follow the same
+If srv_use_native_aio is not set then Windows follow the same
 code as simulated AIO. If the flag is set then native AIO interface
 is used. On windows, one of the limitation is that if a file is opened
 for AIO no synchronous IO can be done on it. Therefore we have an
@@ -172,7 +224,7 @@ Linux native AIO:
 =================
 
 If we have libaio installed on the system and innodb_use_native_aio
-is set to TRUE we follow the code path of native AIO, otherwise we
+is set to true we follow the code path of native AIO, otherwise we
 do simulated AIO.
 There are innodb_file_io_threads helper threads. These threads work
 on the four arrays mentioned above in Simulated AIO.
@@ -184,243 +236,1227 @@ the completed IO request and calls completion routine on it.
 
 **********************************************************************/
 
-/** Flag: enable debug printout for asynchronous i/o */
-UNIV_INTERN ibool	os_aio_print_debug	= FALSE;
 
 #ifdef UNIV_PFS_IO
 /* Keys to register InnoDB I/O with performance schema */
-UNIV_INTERN mysql_pfs_key_t  innodb_file_data_key;
-UNIV_INTERN mysql_pfs_key_t  innodb_file_log_key;
-UNIV_INTERN mysql_pfs_key_t  innodb_file_temp_key;
+mysql_pfs_key_t  innodb_data_file_key;
+mysql_pfs_key_t  innodb_log_file_key;
+mysql_pfs_key_t  innodb_temp_file_key;
 #endif /* UNIV_PFS_IO */
 
-/** The asynchronous i/o array slot structure */
-struct os_aio_slot_t{
-	ibool		is_read;	/*!< TRUE if a read operation */
-	ulint		pos;		/*!< index of the slot in the aio
-					array */
-	ibool		reserved;	/*!< TRUE if this slot is reserved */
-	time_t		reservation_time;/*!< time when reserved */
-	ulint		len;		/*!< length of the block to read or
-					write */
-	byte*		buf;		/*!< buffer used in i/o */
-	ulint		type;		/*!< OS_FILE_READ or OS_FILE_WRITE */
+class AIO;
+
+/** The asynchronous I/O context */
+struct Slot {
+
+#ifdef WIN_ASYNC_IO
+	/** Windows control block for the aio request 
+	must be at the very start of Slot, so we can
+	cast Slot* to OVERLAPPED*
+	*/
+	OVERLAPPED		control;
+#endif
+
+	/** index of the slot in the aio array */
+	uint16_t		pos;
+
+	/** true if this slot is reserved */
+	bool			is_reserved;
+
+	/** time when reserved */
+	time_t			reservation_time;
+
+	/** buffer used in i/o */
+	byte*			buf;
 	ulint		is_log;		/*!< 1 if OS_FILE_LOG or 0 */
 	ulint		page_size;      /*!< UNIV_PAGE_SIZE or zip_size */
 
-	os_offset_t	offset;		/*!< file offset in bytes */
-	os_file_t	file;		/*!< file where to read or write */
-	const char*	name;		/*!< file name or path */
-	ibool		io_already_done;/*!< used only in simulated aio:
-					TRUE if the physical i/o already
-					made and only the slot message
-					needs to be passed to the caller
-					of os_aio_simulated_handle */
-	fil_node_t*	message1;	/*!< message which is given by the */
-	void*		message2;	/*!< the requester of an aio operation
-					and which can be used to identify
-					which pending aio operation was
-					completed */
-	ulint           bitmap;
+	/** Buffer pointer used for actual IO. We advance this
+	when partial IO is required and not buf */
+	byte*			ptr;
 
-	ulint*          write_size;     /*!< Actual write size initialized
-					after fist successfull trim
-					operation for this page and if
-					initialized we do not trim again if
-					actual page size does not decrease. */
+	/** OS_FILE_READ or OS_FILE_WRITE */
+	IORequest		type;
 
-	ulint           file_block_size;/*!< file block size */
+	/** file offset in bytes */
+	os_offset_t		offset;
+
+	/** file where to read or write */
+	os_file_t		file;
+
+	/** file name or path */
+	const char*		name;
+
+	/** used only in simulated aio: true if the physical i/o
+	already made and only the slot message needs to be passed
+	to the caller of os_aio_simulated_handle */
+	bool			io_already_done;
+
+	/*!< file block size */
+	ulint			file_block_size;
+
+	/** The file node for which the IO is requested. */
+	fil_node_t*		m1;
+
+	/** the requester of an aio operation and which can be used
+	to identify which pending aio operation was completed */
+	void*			m2;
+
+	/** AIO completion status */
+	dberr_t			err;
 
 #ifdef WIN_ASYNC_IO
-	HANDLE		handle;		/*!< handle object we need in the
-					OVERLAPPED struct */
-	OVERLAPPED	control;	/*!< Windows control block for the
-					aio request */
+
+	/** bytes written/read */
+	DWORD			n_bytes;
+
+	/** length of the block to read or write */
+	DWORD			len;
+
+	/** aio array containing this slot */
+	AIO				*array;
 #elif defined(LINUX_NATIVE_AIO)
-	struct iocb	control;	/* Linux control block for aio */
-	int		n_bytes;	/* bytes written/read. */
-	int		ret;		/* AIO return code */
+	/** Linux control block for aio */
+	struct iocb		control;
+
+	/** AIO return code */
+	int			ret;
+
+	/** bytes written/read. */
+	ssize_t			n_bytes;
+
+	/** length of the block to read or write */
+	ulint			len;
+#else
+	/** length of the block to read or write */
+	ulint			len;
+
+	/** bytes written/read. */
+	ulint			n_bytes;
 #endif /* WIN_ASYNC_IO */
+
+	/** Length of the block before it was compressed */
+	uint32			original_len;
+
+	/** Buffer block for compressed pages or encrypted pages */
+	Block*			buf_block;
+
+	/** Unaligned buffer for compressed pages */
+	byte*			compressed_ptr;
+
+	/** Compressed data page, aligned and derived from compressed_ptr */
+	byte*			compressed_page;
+
+	/** true, if we shouldn't punch a hole after writing the page */
+	bool			skip_punch_hole;
+
+	ulint*			write_size;
 };
 
 /** The asynchronous i/o array structure */
-struct os_aio_array_t{
-	os_ib_mutex_t	mutex;	/*!< the mutex protecting the aio array */
-	os_event_t	not_full;
-				/*!< The event which is set to the
-				signaled state when there is space in
-				the aio outside the ibuf segment */
-	os_event_t	is_empty;
-				/*!< The event which is set to the
-				signaled state when there are no
-				pending i/os in this array */
-	ulint		n_slots;/*!< Total number of slots in the aio
-				array.  This must be divisible by
-				n_threads. */
-	ulint		n_segments;
-				/*!< Number of segments in the aio
-				array of pending aio requests. A
-				thread can wait separately for any one
-				of the segments. */
-	ulint		cur_seg;/*!< We reserve IO requests in round
-				robin fashion to different segments.
-				This points to the segment that is to
-				be used to service next IO request. */
-	ulint		n_reserved;
-				/*!< Number of reserved slots in the
-				aio array outside the ibuf segment */
-	os_aio_slot_t*	slots;	/*!< Pointer to the slots in the array */
-#ifdef __WIN__
-	HANDLE*		handles;
-				/*!< Pointer to an array of OS native
-				event handles where we copied the
-				handles from slots, in the same
-				order. This can be used in
-				WaitForMultipleObjects; used only in
-				Windows */
-#endif /* __WIN__ */
+class AIO {
+public:
+	/** Constructor
+	@param[in]	id		Latch ID
+	@param[in]	n_slots		Number of slots to configure
+	@param[in]	segments	Number of segments to configure */
+	AIO(latch_id_t id, ulint n_slots, ulint segments);
+
+	/** Destructor */
+	~AIO();
+
+	/** Initialize the instance
+	@return DB_SUCCESS or error code */
+	dberr_t init();
+
+	/** Requests for a slot in the aio array. If no slot is available, waits
+	until not_full-event becomes signaled.
+
+	@param[in,out]	type	IO context
+	@param[in,out]	m1	message to be passed along with the AIO
+				operation
+	@param[in,out]	m2	message to be passed along with the AIO
+				operation
+	@param[in]	file	file handle
+	@param[in]	name	name of the file or path as a null-terminated
+				string
+	@param[in,out]	buf	buffer where to read or from which to write
+	@param[in]	offset	file offset, where to read from or start writing
+	@param[in]	len	length of the block to read or write
+	@return pointer to slot */
+	Slot* reserve_slot(
+		IORequest&	type,
+		fil_node_t*	m1,
+		void*		m2,
+		os_file_t	file,
+		const char*	name,
+		void*		buf,
+		os_offset_t	offset,
+		ulint		len,
+		ulint*		write_size)
+		MY_ATTRIBUTE((warn_unused_result));
+
+	/** @return number of reserved slots */
+	ulint pending_io_count() const;
+
+	/** Returns a pointer to the nth slot in the aio array.
+	@param[in]	index	Index of the slot in the array
+	@return pointer to slot */
+	const Slot* at(ulint i) const
+		MY_ATTRIBUTE((warn_unused_result))
+	{
+		ut_a(i < m_slots.size());
+
+		return(&m_slots[i]);
+	}
+
+	/** Non const version */
+	Slot* at(ulint i)
+		MY_ATTRIBUTE((warn_unused_result))
+	{
+		ut_a(i < m_slots.size());
+
+		return(&m_slots[i]);
+	}
+
+	/** Frees a slot in the AIO array, assumes caller owns the mutex.
+	@param[in,out]	slot	Slot to release */
+	void release(Slot* slot);
+
+	/** Frees a slot in the AIO array, assumes caller doesn't own the mutex.
+	@param[in,out]	slot	Slot to release */
+	void release_with_mutex(Slot* slot);
+
+	/** Prints info about the aio array.
+	@param[in,out]	file	Where to print */
+	void print(FILE* file);
+
+	/** @return the number of slots per segment */
+	ulint slots_per_segment() const
+		MY_ATTRIBUTE((warn_unused_result))
+	{
+		return(m_slots.size() / m_n_segments);
+	}
+
+	/** @return accessor for n_segments */
+	ulint get_n_segments() const
+		MY_ATTRIBUTE((warn_unused_result))
+	{
+		return(m_n_segments);
+	}
+
+#ifdef UNIV_DEBUG
+	/** @return true if the thread owns the mutex */
+	bool is_mutex_owned() const
+		MY_ATTRIBUTE((warn_unused_result))
+	{
+		return(mutex_own(&m_mutex));
+	}
+#endif /* UNIV_DEBUG */
+
+	/** Acquire the mutex */
+	void acquire() const
+	{
+		mutex_enter(&m_mutex);
+	}
+
+	/** Release the mutex */
+	void release() const
+	{
+		mutex_exit(&m_mutex);
+	}
+
+	/** Write out the state to the file/stream
+	@param[in, out]	file	File to write to */
+	void to_file(FILE* file) const;
+
+#ifdef LINUX_NATIVE_AIO
+	/** Dispatch an AIO request to the kernel.
+	@param[in,out]	slot	an already reserved slot
+	@return true on success. */
+	bool linux_dispatch(Slot* slot)
+		MY_ATTRIBUTE((warn_unused_result));
+
+	/** Accessor for an AIO event
+	@param[in]	index	Index into the array
+	@return the event at the index */
+	io_event* io_events(ulint index)
+		MY_ATTRIBUTE((warn_unused_result))
+	{
+		ut_a(index < m_events.size());
+
+		return(&m_events[index]);
+	}
+
+	/** Accessor for the AIO context
+	@param[in]	segment	Segment for which to get the context
+	@return the AIO context for the segment */
+	io_context* io_ctx(ulint segment)
+		MY_ATTRIBUTE((warn_unused_result))
+	{
+		ut_ad(segment < get_n_segments());
+
+		return(m_aio_ctx[segment]);
+	}
+
+	/** Creates an io_context for native linux AIO.
+	@param[in]	max_events	number of events
+	@param[out]	io_ctx		io_ctx to initialize.
+	@return true on success. */
+	static bool linux_create_io_ctx(ulint max_events, io_context_t* io_ctx)
+		MY_ATTRIBUTE((warn_unused_result));
+
+	/** Checks if the system supports native linux aio. On some kernel
+	versions where native aio is supported it won't work on tmpfs. In such
+	cases we can't use native aio as it is not possible to mix simulated
+	and native aio.
+	@return true if supported, false otherwise. */
+	static bool is_linux_native_aio_supported()
+		MY_ATTRIBUTE((warn_unused_result));
+#endif /* LINUX_NATIVE_AIO */
+
+#ifdef WIN_ASYNC_IO
+	
+	/** Wake up all AIO threads in Windows native aio */
+	static void wake_at_shutdown() {
+		PostQueuedCompletionStatus(completion_port, 0, IOCP_SHUTDOWN_KEY, NULL);
+		PostQueuedCompletionStatus(read_completion_port, 0, IOCP_SHUTDOWN_KEY, NULL);
+	}
+#endif /* WIN_ASYNC_IO */
+
+#ifdef _WIN32
+	/** This function can be called if one wants to post a batch of reads
+	and prefers an I/O - handler thread to handle them all at once later.You
+	must call os_aio_simulated_wake_handler_threads later to ensure the
+	threads are not left sleeping! */
+	static void simulated_put_read_threads_to_sleep();
+
+	
+#endif /* _WIN32 */
+
+	/** Create an instance using new(std::nothrow)
+	@param[in]	id		Latch ID
+	@param[in]	n_slots		The number of AIO request slots
+	@param[in]	segments	The number of segments
+	@return a new AIO instance */
+	static AIO* create(
+		latch_id_t	id,
+		ulint		n_slots,
+		ulint		segments)
+		MY_ATTRIBUTE((warn_unused_result));
+
+	/** Initializes the asynchronous io system. Creates one array each
+	for ibuf and log I/O. Also creates one array each for read and write
+	where each array is divided logically into n_readers and n_writers
+	respectively. The caller must create an i/o handler thread for each
+	segment in these arrays. This function also creates the sync array.
+	No I/O handler thread needs to be created for that
+	@param[in]	n_per_seg	maximum number of pending aio
+					operations allowed per segment
+	@param[in]	n_readers	number of reader threads
+	@param[in]	n_writers	number of writer threads
+	@param[in]	n_slots_sync	number of slots in the sync aio array
+	@return true if AIO sub-system was started successfully */
+	static bool start(
+		ulint		n_per_seg,
+		ulint		n_readers,
+		ulint		n_writers,
+		ulint		n_slots_sync)
+		MY_ATTRIBUTE((warn_unused_result));
+
+	/** Free the AIO arrays */
+	static void shutdown();
+
+	/** Print all the AIO segments
+	@param[in,out]	file		Where to print */
+	static void print_all(FILE* file);
+
+	/** Calculates local segment number and aio array from global
+	segment number.
+	@param[out]	array		AIO wait array
+	@param[in]	segment		global segment number
+	@return local segment number within the aio array */
+	static ulint get_array_and_local_segment(
+		AIO**		array,
+		ulint		segment)
+		MY_ATTRIBUTE((warn_unused_result));
+
+	/** Select the IO slot array
+	@param[in]	type		Type of IO, READ or WRITE
+	@param[in]	read_only	true if running in read-only mode
+	@param[in]	mode		IO mode
+	@return slot array or NULL if invalid mode specified */
+	static AIO* select_slot_array(
+		IORequest&	type,
+		bool		read_only,
+		ulint		mode)
+		MY_ATTRIBUTE((warn_unused_result));
+
+	/** Calculates segment number for a slot.
+	@param[in]	array		AIO wait array
+	@param[in]	slot		slot in this array
+	@return segment number (which is the number used by, for example,
+		I/O handler threads) */
+	static ulint get_segment_no_from_slot(
+		const AIO*	array,
+		const Slot*	slot)
+		MY_ATTRIBUTE((warn_unused_result));
+
+	/** Wakes up a simulated AIO I/O-handler thread if it has something
+	to do.
+	@param[in]	global_segment	the number of the segment in the
+					AIO arrays */
+	static void wake_simulated_handler_thread(ulint global_segment);
+
+	/** Check if it is a read request
+	@param[in]	aio		The AIO instance to check
+	@return true if the AIO instance is for reading. */
+	static bool is_read(const AIO* aio)
+		MY_ATTRIBUTE((warn_unused_result))
+	{
+		return(s_reads == aio);
+	}
+
+	/** Wait on an event until no pending writes */
+	static void wait_until_no_pending_writes()
+	{
+		os_event_wait(AIO::s_writes->m_is_empty);
+	}
+
+	/** Print to file
+	@param[in]	file		File to write to */
+	static void print_to_file(FILE* file);
+
+	/** Check for pending IO. Gets the count and also validates the
+	data structures.
+	@return count of pending IO requests */
+	static ulint total_pending_io_count();
+
+private:
+	/** Initialise the slots
+	@return DB_SUCCESS or error code */
+	dberr_t init_slots()
+		MY_ATTRIBUTE((warn_unused_result));
+
+	/** Wakes up a simulated AIO I/O-handler thread if it has something
+	to do for a local segment in the AIO array.
+	@param[in]	global_segment	the number of the segment in the
+					AIO arrays
+	@param[in]	segment		the local segment in the AIO array */
+	void wake_simulated_handler_thread(ulint global_segment, ulint segment);
+
+	/** Prints pending IO requests per segment of an aio array.
+	We probably don't need per segment statistics but they can help us
+	during development phase to see if the IO requests are being
+	distributed as expected.
+	@param[in,out]	file		file where to print
+	@param[in]	segments	pending IO array */
+	void print_segment_info(
+		FILE*		file,
+		const ulint*	segments);
+
+#ifdef LINUX_NATIVE_AIO
+	/** Initialise the Linux native AIO data structures
+	@return DB_SUCCESS or error code */
+	dberr_t init_linux_native_aio()
+		MY_ATTRIBUTE((warn_unused_result));
+#endif /* LINUX_NATIVE_AIO */
+
+private:
+	typedef std::vector<Slot> Slots;
+
+	/** the mutex protecting the aio array */
+	mutable SysMutex	m_mutex;
+
+	/** Pointer to the slots in the array.
+	Number of elements must be divisible by n_threads. */
+	Slots			m_slots;
+
+	/** Number of segments in the aio array of pending aio requests.
+	A thread can wait separately for any one of the segments. */
+	ulint			m_n_segments;
+
+	/** The event which is set to the signaled state when
+	there is space in the aio outside the ibuf segment */
+	os_event_t		m_not_full;
+
+	/** The event which is set to the signaled state when
+	there are no pending i/os in this array */
+	os_event_t		m_is_empty;
+
+	/** Number of reserved slots in the AIO array outside
+	the ibuf segment */
+	ulint			m_n_reserved;
+
 
 #if defined(LINUX_NATIVE_AIO)
-	io_context_t*		aio_ctx;
-				/* completion queue for IO. There is
-				one such queue per segment. Each thread
-				will work on one ctx exclusively. */
-	struct io_event*	aio_events;
-				/* The array to collect completed IOs.
-				There is one such event for each
-				possible pending IO. The size of the
-				array is equal to n_slots. */
+	typedef std::vector<io_event> IOEvents;
+
+	/** completion queue for IO. There is one such queue per
+	segment. Each thread will work on one ctx exclusively. */
+	io_context_t*		m_aio_ctx;
+
+	/** The array to collect completed IOs. There is one such
+	event for each possible pending IO. The size of the array
+	is equal to m_slots.size(). */
+	IOEvents		m_events;
 #endif /* LINUX_NATIV_AIO */
+
+	/** The aio arrays for non-ibuf i/o and ibuf i/o, as well as
+	sync AIO. These are NULL when the module has not yet been
+	initialized. */
+
+	/** Insert buffer */
+	static AIO*		s_ibuf;
+
+	/** Redo log */
+	static AIO*		s_log;
+
+	/** Reads */
+	static AIO*		s_reads;
+
+	/** Writes */
+	static AIO*		s_writes;
+
+	/** Synchronous I/O */
+	static AIO*		s_sync;
 };
 
+/** Static declarations */
+AIO*	AIO::s_reads;
+AIO*	AIO::s_writes;
+AIO*	AIO::s_ibuf;
+AIO*	AIO::s_log;
+AIO*	AIO::s_sync;
+
 #if defined(LINUX_NATIVE_AIO)
 /** timeout for each io_getevents() call = 500ms. */
-#define OS_AIO_REAP_TIMEOUT	(500000000UL)
+static const ulint	OS_AIO_REAP_TIMEOUT = 500000000UL;
 
 /** time to sleep, in microseconds if io_setup() returns EAGAIN. */
-#define OS_AIO_IO_SETUP_RETRY_SLEEP	(500000UL)
+static const ulint	OS_AIO_IO_SETUP_RETRY_SLEEP = 500000UL;
 
 /** number of attempts before giving up on io_setup(). */
-#define OS_AIO_IO_SETUP_RETRY_ATTEMPTS	5
-#endif
+static const int	OS_AIO_IO_SETUP_RETRY_ATTEMPTS = 5;
+#endif /* LINUX_NATIVE_AIO */
 
-/** Array of events used in simulated aio */
+/** Array of events used in simulated AIO */
 static os_event_t*	os_aio_segment_wait_events = NULL;
 
-/** The aio arrays for non-ibuf i/o and ibuf i/o, as well as sync aio. These
-are NULL when the module has not yet been initialized. @{ */
-static os_aio_array_t*	os_aio_read_array	= NULL;	/*!< Reads */
-static os_aio_array_t*	os_aio_write_array	= NULL;	/*!< Writes */
-static os_aio_array_t*	os_aio_ibuf_array	= NULL;	/*!< Insert buffer */
-static os_aio_array_t*	os_aio_log_array	= NULL;	/*!< Redo log */
-static os_aio_array_t*	os_aio_sync_array	= NULL;	/*!< Synchronous I/O */
-/* @} */
-
 /** Number of asynchronous I/O segments.  Set by os_aio_init(). */
-static ulint	os_aio_n_segments	= ULINT_UNDEFINED;
+static ulint		os_aio_n_segments = ULINT_UNDEFINED;
 
-/** If the following is TRUE, read i/o handler threads try to
+/** If the following is true, read i/o handler threads try to
 wait until a batch of new read requests have been posted */
-static ibool	os_aio_recommend_sleep_for_read_threads	= FALSE;
+static bool		os_aio_recommend_sleep_for_read_threads = false;
 #endif /* !UNIV_HOTBACKUP */
 
-UNIV_INTERN ulint	os_n_file_reads		= 0;
-UNIV_INTERN ulint	os_bytes_read_since_printout = 0;
-UNIV_INTERN ulint	os_n_file_writes	= 0;
-UNIV_INTERN ulint	os_n_fsyncs		= 0;
-UNIV_INTERN ulint	os_n_file_reads_old	= 0;
-UNIV_INTERN ulint	os_n_file_writes_old	= 0;
-UNIV_INTERN ulint	os_n_fsyncs_old		= 0;
-UNIV_INTERN time_t	os_last_printout;
-
-UNIV_INTERN ibool	os_has_said_disk_full	= FALSE;
-
-#if !defined(UNIV_HOTBACKUP)	\
-    && (!defined(HAVE_ATOMIC_BUILTINS) || UNIV_WORD_SIZE < 8)
-/** The mutex protecting the following counts of pending I/O operations */
-static os_ib_mutex_t	os_file_count_mutex;
-#endif /* !UNIV_HOTBACKUP && (!HAVE_ATOMIC_BUILTINS || UNIV_WORD_SIZE < 8) */
-
-/** Number of pending os_file_pread() operations */
-UNIV_INTERN ulint	os_file_n_pending_preads  = 0;
-/** Number of pending os_file_pwrite() operations */
-UNIV_INTERN ulint	os_file_n_pending_pwrites = 0;
+ulint	os_n_file_reads		= 0;
+ulint	os_bytes_read_since_printout = 0;
+ulint	os_n_file_writes	= 0;
+ulint	os_n_fsyncs		= 0;
+ulint	os_n_file_reads_old	= 0;
+ulint	os_n_file_writes_old	= 0;
+ulint	os_n_fsyncs_old		= 0;
 /** Number of pending write operations */
-UNIV_INTERN ulint	os_n_pending_writes = 0;
+ulint	os_n_pending_writes = 0;
 /** Number of pending read operations */
-UNIV_INTERN ulint	os_n_pending_reads = 0;
+ulint	os_n_pending_reads = 0;
 
-/** After first fallocate failure we will disable os_file_trim */
-UNIV_INTERN ibool       os_fallocate_failed = FALSE;
+time_t	os_last_printout;
+bool	os_has_said_disk_full	= false;
 
-/**********************************************************************//**
-Directly manipulate the allocated disk space by deallocating for the file referred to
-by fd  for  the  byte range starting at offset and continuing for len bytes.
-Within the specified range, partial file system blocks are zeroed, and whole
-file system blocks are removed from the file.  After a successful call,
-subsequent reads from  this range will return zeroes.
-@return	true if success, false if error */
-UNIV_INTERN
-ibool
-os_file_trim(
-/*=========*/
-	os_aio_slot_t*	slot); /*!< in: slot structure     */
+/** Default Zip compression level */
+extern uint page_zip_level;
 
-/****************************************************************//**
+#if DATA_TRX_ID_LEN > 6
+#error "COMPRESSION_ALGORITHM will not fit"
+#endif /* DATA_TRX_ID_LEN */
+
+/** Validates the consistency of the aio system.
+@return true if ok */
+static
+bool
+os_aio_validate();
+
+/** Does error handling when a file operation fails.
+@param[in]	name		File name or NULL
+@param[in]	operation	Name of operation e.g., "read", "write"
+@return true if we should retry the operation */
+static
+bool
+os_file_handle_error(
+	const char*	name,
+	const char*	operation);
+
+/**
 Does error handling when a file operation fails.
-@return	TRUE if we should retry the operation */
-ibool
+@param[in]	name		File name or NULL
+@param[in]	operation	Name of operation e.g., "read", "write"
+@param[in]	silent	if true then don't print any message to the log.
+@return true if we should retry the operation */
+static
+bool
 os_file_handle_error_no_exit(
-/*=========================*/
-	const char*	name,		/*!< in: name of a file or NULL */
-	const char*	operation,	/*!< in: operation */
-	ibool		on_error_silent,/*!< in: if TRUE then don't print
-					any message to the log. */
-	const char*	file,		/*!< in: file name */
-	const ulint	line);		/*!< in: line */
+	const char*	name,
+	const char*	operation,
+	bool		silent);
 
-/****************************************************************//**
-Tries to enable the atomic write feature, if available, for the specified file
-handle.
-@return TRUE if success */
-static __attribute__((warn_unused_result))
-ibool
-os_file_set_atomic_writes(
-/*======================*/
-	const char*	name	/*!< in: name of the file */
-	__attribute__((unused)),
-	os_file_t	file	/*!< in: handle to the file */
-	__attribute__((unused)))
+/** Decompress after a read and punch a hole in the file if it was a write
+@param[in]	type		IO context
+@param[in]	fh		Open file handle
+@param[in,out]	buf		Buffer to transform
+@param[in,out]	scratch		Scratch area for read decompression
+@param[in]	src_len		Length of the buffer before compression
+@param[in]	len		Compressed buffer length for write and size
+				of buf len for read
+@return DB_SUCCESS or error code */
+static
+dberr_t
+os_file_io_complete(
+	const IORequest&type,
+	os_file_t	fh,
+	byte*		buf,
+	byte*		scratch,
+	ulint		src_len,
+	ulint		offset,
+	ulint		len);
+
+/** Does simulated AIO. This function should be called by an i/o-handler
+thread.
+
+@param[in]	segment	The number of the segment in the aio arrays to wait
+			for; segment 0 is the ibuf i/o thread, segment 1 the
+			log i/o thread, then follow the non-ibuf read threads,
+			and as the last are the non-ibuf write threads
+@param[out]	m1	the messages passed with the AIO request; note that
+			also in the case where the AIO operation failed, these
+			output parameters are valid and can be used to restart
+			the operation, for example
+@param[out]	m2	Callback argument
+@param[in]	type	IO context
+@return DB_SUCCESS or error code */
+static
+dberr_t
+os_aio_simulated_handler(
+	ulint		global_segment,
+	fil_node_t**	m1,
+	void**		m2,
+	IORequest*	type);
+
+#ifdef WIN_ASYNC_IO
+/** This function is only used in Windows asynchronous i/o.
+Waits for an aio operation to complete. This function is used to wait the
+for completed requests. The aio array of pending requests is divided
+into segments. The thread specifies which segment or slot it wants to wait
+for. NOTE: this function will also take care of freeing the aio slot,
+therefore no other thread is allowed to do the freeing!
+@param[in]	segment		The number of the segment in the aio arrays to
+wait for; segment 0 is the ibuf I/O thread,
+segment 1 the log I/O thread, then follow the
+non-ibuf read threads, and as the last are the
+non-ibuf write threads; if this is
+ULINT_UNDEFINED, then it means that sync AIO
+is used, and this parameter is ignored
+@param[in]	pos		this parameter is used only in sync AIO:
+wait for the aio slot at this position
+@param[out]	m1		the messages passed with the AIO request; note
+that also in the case where the AIO operation
+failed, these output parameters are valid and
+can be used to restart the operation,
+for example
+@param[out]	m2		callback message
+@param[out]	type		OS_FILE_WRITE or ..._READ
+@return DB_SUCCESS or error code */
+static
+dberr_t
+os_aio_windows_handler(
+	ulint		segment,
+	ulint		pos,
+	fil_node_t**	m1,
+	void**		m2,
+	IORequest*	type);
+#endif /* WIN_ASYNC_IO */
+
+#ifdef MYSQL_COMPRESSION
+/** Allocate a page for sync IO
+@return pointer to page */
+static
+Block*
+os_alloc_block()
 {
-#ifdef DFS_IOCTL_ATOMIC_WRITE_SET
-	int	atomic_option	= 1;
+	size_t		pos;
+	Blocks&		blocks = *block_cache;
+	size_t		i = static_cast<size_t>(my_timer_cycles());
+	const size_t	size = blocks.size();
+	ulint		retry = 0;
+	Block*		block;
 
-	if (ioctl(file, DFS_IOCTL_ATOMIC_WRITE_SET, &atomic_option)) {
+	DBUG_EXECUTE_IF("os_block_cache_busy", retry = MAX_BLOCKS * 3;);
 
-		fprintf(stderr, "InnoDB: Warning:Trying to enable atomic writes on "
-			"file %s on non-supported platform!\n", name);
-		os_file_handle_error_no_exit(name, "ioctl", FALSE, __FILE__, __LINE__);
-		return(FALSE);
+	for (;;) {
+
+		/* After go through the block cache for 3 times,
+		allocate a new temporary block. */
+		if (retry == MAX_BLOCKS * 3) {
+			byte*	ptr;
+
+			ptr = static_cast<byte*>(
+				ut_malloc_nokey(sizeof(*block)
+						+ BUFFER_BLOCK_SIZE));
+
+			block = new (ptr) Block();
+			block->m_ptr = static_cast<byte*>(
+				ptr + sizeof(*block));
+			block->m_in_use = 1;
+
+			break;
+		}
+
+		pos = i++ % size;
+
+		if (TAS(&blocks[pos].m_in_use, 1) == 0) {
+			block = &blocks[pos];
+			break;
+		}
+
+		os_thread_yield();
+
+		++retry;
 	}
 
-	return(TRUE);
-#else
-	fprintf(stderr, "InnoDB: Error: trying to enable atomic writes on "
-		"file %s on non-supported platform!\n", name);
-	return(FALSE);
-#endif
+	ut_a(block->m_in_use != 0);
+
+	return(block);
 }
 
+/** Free a page after sync IO
+@param[in,own]	block		The block to free/release */
+static
+void
+os_free_block(Block* block)
+{
+	ut_ad(block->m_in_use == 1);
+
+	TAS(&block->m_in_use, 0);
+
+	/* When this block is not in the block cache, and it's
+	a temporary block, we need to free it directly. */
+	if (std::less<Block*>()(block, &block_cache->front())
+	    || std::greater<Block*>()(block, &block_cache->back())) {
+		ut_free(block);
+	}
+}
+#endif /* MYSQL_COMPRESSION */
+
+/** Generic AIO Handler methods. Currently handles IO post processing. */
+class AIOHandler {
+public:
+	/** Do any post processing after a read/write
+	@return DB_SUCCESS or error code. */
+	static dberr_t post_io_processing(Slot* slot);
+
+	/** Decompress after a read and punch a hole in the file if
+	it was a write */
+	static dberr_t io_complete(const Slot* slot)
+	{
+		ut_a(slot->offset > 0);
+		ut_a(slot->type.is_read() || !slot->skip_punch_hole);
+
+		return(os_file_io_complete(
+				slot->type, slot->file, slot->buf,
+				slot->compressed_page, slot->original_len,
+				static_cast<ulint>(slot->offset),
+				slot->len));
+	}
+
+private:
+	/** Check whether the page was encrypted.
+	@param[in]	slot		The slot that contains the IO request
+	@return true if it was an encyrpted page */
+	static bool is_encrypted_page(const Slot* slot)
+	{
+#ifdef MYSQL_ENCRYPTION
+		return(Encryption::is_encrypted_page(slot->buf));
+#else
+		return (false);
+#endif
+	}
+
+	/** Check whether the page was compressed.
+	@param[in]	slot		The slot that contains the IO request
+	@return true if it was a compressed page */
+	static bool is_compressed_page(const Slot* slot)
+	{
+		const byte*	src = slot->buf;
+
+		ulint	page_type = mach_read_from_2(src + FIL_PAGE_TYPE);
+
+		return(page_type == FIL_PAGE_COMPRESSED);
+	}
+
+	/** Get the compressed page size.
+	@param[in]	slot		The slot that contains the IO request
+	@return number of bytes to read for a successful decompress */
+	static ulint compressed_page_size(const Slot* slot)
+	{
+		ut_ad(slot->type.is_read());
+		ut_ad(is_compressed_page(slot));
+
+		ulint		size;
+		const byte*	src = slot->buf;
+
+		size = mach_read_from_2(src + FIL_PAGE_COMPRESS_SIZE_V1);
+
+		return(size + FIL_PAGE_DATA);
+	}
+
+	/** Check if the page contents can be decompressed.
+	@param[in]	slot		The slot that contains the IO request
+	@return true if the data read has all the compressed data */
+	static bool can_decompress(const Slot* slot)
+	{
+		ut_ad(slot->type.is_read());
+		ut_ad(is_compressed_page(slot));
+
+		ulint		version;
+		const byte*	src = slot->buf;
+
+		version = mach_read_from_1(src + FIL_PAGE_VERSION);
+
+		ut_a(version == 1);
+
+		/* Includes the page header size too */
+		ulint		size = compressed_page_size(slot);
+
+		return(size <= (slot->ptr - slot->buf) + (ulint) slot->n_bytes);
+	}
+
+	/** Check if we need to read some more data.
+	@param[in]	slot		The slot that contains the IO request
+	@param[in]	n_bytes		Total bytes read so far
+	@return DB_SUCCESS or error code */
+	static dberr_t check_read(Slot* slot, ulint n_bytes);
+};
+
+/** Helper class for doing synchronous file IO. Currently, the objective
+is to hide the OS specific code, so that the higher level functions aren't
+peppered with #ifdef. Makes the code flow difficult to follow.  */
+class SyncFileIO {
+public:
+	/** Constructor
+	@param[in]	fh	File handle
+	@param[in,out]	buf	Buffer to read/write
+	@param[in]	n	Number of bytes to read/write
+	@param[in]	offset	Offset where to read or write */
+	SyncFileIO(os_file_t fh, void* buf, ulint n, os_offset_t offset)
+		:
+		m_fh(fh),
+		m_buf(buf),
+		m_n(static_cast<ssize_t>(n)),
+		m_offset(offset)
+	{
+		ut_ad(m_n > 0);
+	}
+
+	/** Destructor */
+	~SyncFileIO()
+	{
+		/* No op */
+	}
+
+	/** Do the read/write
+	@param[in]	request	The IO context and type
+	@return the number of bytes read/written or negative value on error */
+	ssize_t execute(const IORequest& request);
+
+	/** Do the read/write
+	@param[in,out]	slot	The IO slot, it has the IO context
+	@return the number of bytes read/written or negative value on error */
+	static ssize_t execute(Slot* slot);
+
+	/** Move the read/write offset up to where the partial IO succeeded.
+	@param[in]	n_bytes	The number of bytes to advance */
+	void advance(ssize_t n_bytes)
+	{
+		m_offset += n_bytes;
+
+		ut_ad(m_n >= n_bytes);
+
+		m_n -=  n_bytes;
+
+		m_buf = reinterpret_cast<uchar*>(m_buf) + n_bytes;
+	}
+
+private:
+	/** Open file handle */
+	os_file_t		m_fh;
+
+	/** Buffer to read/write */
+	void*			m_buf;
+
+	/** Number of bytes to read/write */
+	ssize_t			m_n;
+
+	/** Offset from where to read/write */
+	os_offset_t		m_offset;
+};
+
+/** If it is a compressed page return the compressed page data + footer size
+@param[in]	buf		Buffer to check, must include header + 10 bytes
+@return ULINT_UNDEFINED if the page is not a compressed page or length
+	of the compressed data (including footer) if it is a compressed page */
+ulint
+os_file_compressed_page_size(const byte* buf)
+{
+	ulint	type = mach_read_from_2(buf + FIL_PAGE_TYPE);
+
+	if (type == FIL_PAGE_COMPRESSED) {
+		ulint	version = mach_read_from_1(buf + FIL_PAGE_VERSION);
+		ut_a(version == 1);
+		return(mach_read_from_2(buf + FIL_PAGE_COMPRESS_SIZE_V1));
+	}
+
+	return(ULINT_UNDEFINED);
+}
+
+/** If it is a compressed page return the original page data + footer size
+@param[in] buf		Buffer to check, must include header + 10 bytes
+@return ULINT_UNDEFINED if the page is not a compressed page or length
+	of the original data + footer if it is a compressed page */
+ulint
+os_file_original_page_size(const byte* buf)
+{
+	ulint	type = mach_read_from_2(buf + FIL_PAGE_TYPE);
+
+	if (type == FIL_PAGE_COMPRESSED) {
+
+		ulint	version = mach_read_from_1(buf + FIL_PAGE_VERSION);
+		ut_a(version == 1);
+
+		return(mach_read_from_2(buf + FIL_PAGE_ORIGINAL_SIZE_V1));
+	}
+
+	return(ULINT_UNDEFINED);
+}
+
+/** Check if we need to read some more data.
+@param[in]	slot		The slot that contains the IO request
+@param[in]	n_bytes		Total bytes read so far
+@return DB_SUCCESS or error code */
+dberr_t
+AIOHandler::check_read(Slot* slot, ulint n_bytes)
+{
+	dberr_t	err=DB_SUCCESS;
+
+	ut_ad(slot->type.is_read());
+	ut_ad(slot->original_len > slot->len);
+
+	if (is_compressed_page(slot)) {
+
+		if (can_decompress(slot)) {
+
+			ut_a(slot->offset > 0);
+
+			slot->len = slot->original_len;
+#ifdef _WIN32
+			slot->n_bytes = static_cast<DWORD>(n_bytes);
+#else
+			slot->n_bytes = static_cast<ulint>(n_bytes);
+#endif /* _WIN32 */
+
+			err = io_complete(slot);
+			ut_a(err == DB_SUCCESS);
+
+		} else {
+			/* Read the next block in */
+			ut_ad(compressed_page_size(slot) >= n_bytes);
+
+			err = DB_FAIL;
+		}
+	} else if (is_encrypted_page(slot)) {
+			ut_a(slot->offset > 0);
+
+			slot->len = slot->original_len;
+#ifdef _WIN32
+			slot->n_bytes = static_cast<DWORD>(n_bytes);
+#else
+			slot->n_bytes = static_cast<ulint>(n_bytes);
+#endif /* _WIN32 */
+
+			err = io_complete(slot);
+			ut_a(err == DB_SUCCESS);
+
+	} else {
+		err = DB_FAIL;
+	}
+
+#ifdef MYSQL_COMPRESSION
+	if (slot->buf_block != NULL) {
+		os_free_block(slot->buf_block);
+		slot->buf_block = NULL;
+	}
+#endif
+	return(err);
+}
+
+/** Do any post processing after a read/write
+@return DB_SUCCESS or error code. */
+dberr_t
+AIOHandler::post_io_processing(Slot* slot)
+{
+	dberr_t	err=DB_SUCCESS;
+
+	ut_ad(slot->is_reserved);
+
+	/* Total bytes read so far */
+	ulint	n_bytes = (slot->ptr - slot->buf) + slot->n_bytes;
+
+	/* Compressed writes can be smaller than the original length.
+	Therefore they can be processed without further IO. */
+	if (n_bytes == slot->original_len
+	    || (slot->type.is_write()
+		&& slot->type.is_compressed()
+		&& slot->len == static_cast<ulint>(slot->n_bytes))) {
+
+#ifdef MYSQL_COMPRESSION
+		if (!slot->type.is_log()
+		    && (is_compressed_page(slot)
+			|| is_encrypted_page(slot))) {
+
+			ut_a(slot->offset > 0);
+
+			if (slot->type.is_read()) {
+				slot->len = slot->original_len;
+			}
+
+			/* The punch hole has been done on collect() */
+
+			if (slot->type.is_read()) {
+				err = io_complete(slot);
+			} else {
+				err = DB_SUCCESS;
+			}
+
+			ut_ad(err == DB_SUCCESS
+			      || err == DB_UNSUPPORTED
+			      || err == DB_CORRUPTION
+			      || err == DB_IO_DECOMPRESS_FAIL);
+		} else {
+
+			err = DB_SUCCESS;
+		}
+
+		if (slot->buf_block != NULL) {
+			os_free_block(slot->buf_block);
+			slot->buf_block = NULL;
+		}
+#endif /* MYSQL_COMPRESSION */
+	} else if ((ulint) slot->n_bytes == (ulint) slot->len) {
+
+		/* It *must* be a partial read. */
+		ut_ad(slot->len < slot->original_len);
+
+		/* Has to be a read request, if it is less than
+		the original length. */
+		ut_ad(slot->type.is_read());
+		err = check_read(slot, n_bytes);
+
+	} else {
+		err = DB_FAIL;
+	}
+
+	return(err);
+}
+
+/** Count the number of free slots
+@return number of reserved slots */
+ulint
+AIO::pending_io_count() const
+{
+	acquire();
+
+#ifdef UNIV_DEBUG
+	ut_a(m_n_segments > 0);
+	ut_a(!m_slots.empty());
+
+	ulint	count = 0;
+
+	for (ulint i = 0; i < m_slots.size(); ++i) {
+
+		const Slot&	slot = m_slots[i];
+
+		if (slot.is_reserved) {
+			++count;
+			ut_a(slot.len > 0);
+		}
+	}
+
+	ut_a(m_n_reserved == count);
+#endif /* UNIV_DEBUG */
+
+	ulint	reserved = m_n_reserved;
+
+	release();
+
+	return(reserved);
+}
+
+#ifdef MYSQL_COMPRESSION
+/** Compress a data page
+#param[in]	block_size	File system block size
+@param[in]	src		Source contents to compress
+@param[in]	src_len		Length in bytes of the source
+@param[out]	dst		Compressed page contents
+@param[out]	dst_len		Length in bytes of dst contents
+@return buffer data, dst_len will have the length of the data */
+static
+byte*
+os_file_compress_page(
+	Compression	compression,
+	ulint		block_size,
+	byte*		src,
+	ulint		src_len,
+	byte*		dst,
+	ulint*		dst_len)
+{
+	ulint		len = 0;
+	ulint		compression_level = page_zip_level;
+	ulint		page_type = mach_read_from_2(src + FIL_PAGE_TYPE);
+
+	/* The page size must be a multiple of the OS punch hole size. */
+	ut_ad(!(src_len % block_size));
+
+	/* Shouldn't compress an already compressed page. */
+	ut_ad(page_type != FIL_PAGE_COMPRESSED);
+
+	/* The page must be at least twice as large as the file system
+	block size if we are to save any space. Ignore R-Tree pages for now,
+	they repurpose the same 8 bytes in the page header. No point in
+	compressing if the file system block size >= our page size. */
+
+	if (page_type == FIL_PAGE_RTREE
+	    || block_size == ULINT_UNDEFINED
+            || compression.m_type == Compression::NONE
+	    || src_len < block_size * 2) {
+
+		*dst_len = src_len;
+
+		return(src);
+	}
+
+	/* Leave the header alone when compressing. */
+	ut_ad(block_size >= FIL_PAGE_DATA * 2);
+
+	ut_ad(src_len > FIL_PAGE_DATA + block_size);
+
+	/* Must compress to <= N-1 FS blocks. */
+	ulint		out_len = src_len - (FIL_PAGE_DATA + block_size);
+
+	/* This is the original data page size - the page header. */
+	ulint		content_len = src_len - FIL_PAGE_DATA;
+
+	ut_ad(out_len >= block_size - FIL_PAGE_DATA);
+	ut_ad(out_len <= src_len - (block_size + FIL_PAGE_DATA));
+
+	/* Only compress the data + trailer, leave the header alone */
+
+	switch (compression.m_type) {
+	case Compression::NONE:
+		ut_error;
+
+	case Compression::ZLIB: {
+
+		uLongf	zlen = static_cast<uLongf>(out_len);
+
+		if (compress2(
+			dst + FIL_PAGE_DATA,
+			&zlen,
+			src + FIL_PAGE_DATA,
+			static_cast<uLong>(content_len),
+			static_cast<int>(compression_level)) != Z_OK) {
+
+			*dst_len = src_len;
+
+			return(src);
+		}
+
+		len = static_cast<ulint>(zlen);
+
+		break;
+	}
+
+#ifdef HAVE_LZ4
+	case Compression::LZ4:
+
+		len = LZ4_compress_limitedOutput(
+			reinterpret_cast<char*>(src) + FIL_PAGE_DATA,
+			reinterpret_cast<char*>(dst) + FIL_PAGE_DATA,
+			static_cast<int>(content_len),
+			static_cast<int>(out_len));
+
+		ut_a(len <= src_len - FIL_PAGE_DATA);
+
+		if (len == 0  || len >= out_len) {
+
+			*dst_len = src_len;
+
+			return(src);
+		}
+
+		break;
+#endif
+
+	default:
+		*dst_len = src_len;
+		return(src);
+	}
+
+	ut_a(len <= out_len);
+
+	ut_ad(memcmp(src + FIL_PAGE_LSN + 4,
+		     src + src_len - FIL_PAGE_END_LSN_OLD_CHKSUM + 4, 4)
+	      == 0);
+
+	/* Copy the header as is. */
+	memmove(dst, src, FIL_PAGE_DATA);
+
+	/* Add compression control information. Required for decompressing. */
+	mach_write_to_2(dst + FIL_PAGE_TYPE, FIL_PAGE_COMPRESSED);
+
+	mach_write_to_1(dst + FIL_PAGE_VERSION, 1);
+
+	mach_write_to_1(dst + FIL_PAGE_ALGORITHM_V1, compression.m_type);
+
+	mach_write_to_2(dst + FIL_PAGE_ORIGINAL_TYPE_V1, page_type);
+
+	mach_write_to_2(dst + FIL_PAGE_ORIGINAL_SIZE_V1, content_len);
+
+	mach_write_to_2(dst + FIL_PAGE_COMPRESS_SIZE_V1, len);
+
+	/* Round to the next full block size */
+
+	len += FIL_PAGE_DATA;
+
+	*dst_len = ut_calc_align(len, block_size);
+
+	ut_ad(*dst_len >= len && *dst_len <= out_len + FIL_PAGE_DATA);
+
+	/* Clear out the unused portion of the page. */
+	if (len % block_size) {
+		memset(dst + len, 0x0, block_size - (len % block_size));
+	}
+
+	return(dst);
+}
+#endif /* MYSQL_COMPRESSION */
 
 #ifdef UNIV_DEBUG
 # ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Validates the consistency the aio system some of the time.
-@return	TRUE if ok or the check was skipped */
-UNIV_INTERN
-ibool
-os_aio_validate_skip(void)
-/*======================*/
+/** Validates the consistency the aio system some of the time.
+@return true if ok or the check was skipped */
+bool
+os_aio_validate_skip()
 {
 /** Try os_aio_validate() every this many times */
 # define OS_AIO_VALIDATE_SKIP	13
@@ -433,8 +1469,10 @@ os_aio_validate_skip(void)
 	because this call is only for heuristic purposes. We want to
 	reduce the call frequency of the costly os_aio_validate()
 	check in debug builds. */
-	if (--os_aio_validate_count > 0) {
-		return(TRUE);
+	--os_aio_validate_count;
+
+	if (os_aio_validate_count > 0) {
+		return(true);
 	}
 
 	os_aio_validate_count = OS_AIO_VALIDATE_SKIP;
@@ -443,168 +1481,1506 @@ os_aio_validate_skip(void)
 # endif /* !UNIV_HOTBACKUP */
 #endif /* UNIV_DEBUG */
 
-#ifdef __WIN__
-/***********************************************************************//**
-Gets the operating system version. Currently works only on Windows.
-@return	OS_WIN95, OS_WIN31, OS_WINNT, OS_WIN2000, OS_WINXP, OS_WINVISTA,
-OS_WIN7. */
-UNIV_INTERN
-ulint
-os_get_os_version(void)
-/*===================*/
+#undef USE_FILE_LOCK
+#define USE_FILE_LOCK
+#if defined(UNIV_HOTBACKUP) || defined(_WIN32)
+/* InnoDB Hot Backup does not lock the data files.
+ * On Windows, mandatory locking is used.
+ */
+# undef USE_FILE_LOCK
+#endif
+#ifdef USE_FILE_LOCK
+/** Obtain an exclusive lock on a file.
+@param[in]	fd		file descriptor
+@param[in]	name		file name
+@return 0 on success */
+static
+int
+os_file_lock(
+	int		fd,
+	const char*	name)
 {
-	OSVERSIONINFO	os_info;
+	struct flock lk;
 
-	os_info.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
+	lk.l_type = F_WRLCK;
+	lk.l_whence = SEEK_SET;
+	lk.l_start = lk.l_len = 0;
 
-	ut_a(GetVersionEx(&os_info));
+	if (fcntl(fd, F_SETLK, &lk) == -1) {
 
-	if (os_info.dwPlatformId == VER_PLATFORM_WIN32s) {
-		return(OS_WIN31);
-	} else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_WINDOWS) {
-		return(OS_WIN95);
-	} else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_NT) {
-		switch (os_info.dwMajorVersion) {
-		case 3:
-		case 4:
-			return(OS_WINNT);
-		case 5:
-			return (os_info.dwMinorVersion == 0)
-				? OS_WIN2000 : OS_WINXP;
-		case 6:
-			return (os_info.dwMinorVersion == 0)
-				? OS_WINVISTA : OS_WIN7;
-		default:
-			return(OS_WIN7);
+		ib::error()
+			<< "Unable to lock " << name
+			<< " error: " << errno;
+
+		if (errno == EAGAIN || errno == EACCES) {
+
+			ib::info()
+				<< "Check that you do not already have"
+				" another mysqld process using the"
+				" same InnoDB data or log files.";
 		}
+
+		return(-1);
+	}
+
+	return(0);
+}
+#endif /* USE_FILE_LOCK */
+
+#ifndef UNIV_HOTBACKUP
+
+/** Calculates local segment number and aio array from global segment number.
+@param[out]	array		aio wait array
+@param[in]	segment		global segment number
+@return local segment number within the aio array */
+ulint
+AIO::get_array_and_local_segment(
+	AIO**		array,
+	ulint		segment)
+{
+	ulint		local_segment;
+	ulint		n_extra_segs = (srv_read_only_mode) ? 0 : 2;
+
+	ut_a(segment < os_aio_n_segments);
+
+	if (!srv_read_only_mode && segment < n_extra_segs) {
+
+		/* We don't support ibuf/log IO during read only mode. */
+
+		if (segment == IO_IBUF_SEGMENT) {
+
+			*array = s_ibuf;
+
+		} else if (segment == IO_LOG_SEGMENT) {
+
+			*array = s_log;
+
+		} else {
+			*array = NULL;
+		}
+
+		local_segment = 0;
+
+	} else if (segment < s_reads->m_n_segments + n_extra_segs) {
+
+		*array = s_reads;
+		local_segment = segment - n_extra_segs;
+
 	} else {
-		ut_error;
-		return(0);
+		*array = s_writes;
+
+		local_segment = segment
+			      - (s_reads->m_n_segments + n_extra_segs);
+	}
+
+	return(local_segment);
+}
+
+/** Frees a slot in the aio array. Assumes caller owns the mutex.
+@param[in,out]	slot		Slot to release */
+void
+AIO::release(Slot* slot)
+{
+	ut_ad(is_mutex_owned());
+
+	ut_ad(slot->is_reserved);
+
+	slot->is_reserved = false;
+
+	--m_n_reserved;
+
+	if (m_n_reserved == m_slots.size() - 1) {
+		os_event_set(m_not_full);
+	}
+
+	if (m_n_reserved == 0) {
+		os_event_set(m_is_empty);
+	}
+
+#if defined(LINUX_NATIVE_AIO)
+
+	if (srv_use_native_aio) {
+		memset(&slot->control, 0x0, sizeof(slot->control));
+		slot->ret = 0;
+		slot->n_bytes = 0;
+	} else {
+		/* These fields should not be used if we are not
+		using native AIO. */
+		ut_ad(slot->n_bytes == 0);
+		ut_ad(slot->ret == 0);
+	}
+
+#endif /* WIN_ASYNC_IO */
+}
+
+/** Frees a slot in the AIO array. Assumes caller doesn't own the mutex.
+@param[in,out]	slot		Slot to release */
+void
+AIO::release_with_mutex(Slot* slot)
+{
+	acquire();
+
+	release(slot);
+
+	release();
+}
+
+/** Creates a temporary file.  This function is like tmpfile(3), but
+the temporary file is created in the given parameter path. If the path
+is NULL then it will create the file in the MySQL server configuration
+parameter (--tmpdir).
+@param[in]	path	location for creating temporary file
+@@return temporary file handle, or NULL on error */
+FILE*
+os_file_create_tmpfile(
+	const char*	path)
+{
+	FILE*	file	= NULL;
+	int	fd	= innobase_mysql_tmpfile(path);
+
+	if (fd >= 0) {
+		file = fdopen(fd, "w+b");
+	}
+
+	if (file == NULL) {
+
+		ib::error()
+			<< "Unable to create temporary file; errno: "
+			<< errno;
+
+		if (fd >= 0) {
+			close(fd);
+		}
+	}
+
+	return(file);
+}
+
+/** Rewind file to its start, read at most size - 1 bytes from it to str, and
+NUL-terminate str. All errors are silently ignored. This function is
+mostly meant to be used with temporary files.
+@param[in,out]	file		File to read from
+@param[in,out]	str		Buffer where to read
+@param[in]	size		Size of buffer */
+void
+os_file_read_string(
+	FILE*		file,
+	char*		str,
+	ulint		size)
+{
+	if (size != 0) {
+		rewind(file);
+
+		size_t	flen = fread(str, 1, size - 1, file);
+
+		str[flen] = '\0';
 	}
 }
-#endif /* __WIN__ */
 
-/***********************************************************************//**
-Retrieves the last error number if an error occurs in a file io function.
+/** Decompress after a read and punch a hole in the file if it was a write
+@param[in]	type		IO context
+@param[in]	fh		Open file handle
+@param[in,out]	buf		Buffer to transform
+@param[in,out]	scratch		Scratch area for read decompression
+@param[in]	src_len		Length of the buffer before compression
+@param[in]	len		Used buffer length for write and output
+				buf len for read
+@return DB_SUCCESS or error code */
+static
+dberr_t
+os_file_io_complete(
+	const IORequest&type,
+	os_file_t	fh,
+	byte*		buf,
+	byte*		scratch,
+	ulint		src_len,
+	ulint		offset,
+	ulint		len)
+{
+#ifdef MYSQL_ENCRYPTION
+	/* We never compress/decompress the first page */
+	ut_a(offset > 0);
+	ut_ad(type.validate());
+
+	if (!type.is_compression_enabled()) {
+
+		return(DB_SUCCESS);
+
+	} else if (type.is_read()) {
+		dberr_t		ret = DB_SUCCESS;
+		Encryption	encryption(type.encryption_algorithm());
+
+		ut_ad(!type.is_log());
+
+		ret = encryption.decrypt(type, buf, src_len, scratch, len);
+		if (ret == DB_SUCCESS) {
+			return(os_file_decompress_page(
+					type.is_dblwr_recover(),
+					buf, scratch, len));
+		} else {
+			return(ret);
+		}
+
+	} else if (type.punch_hole()) {
+
+		ut_ad(len <= src_len);
+		ut_ad(!type.is_log());
+		ut_ad(type.is_write());
+		ut_ad(type.is_compressed());
+
+		/* Nothing to do. */
+		if (len == src_len) {
+			return(DB_SUCCESS);
+		}
+
+#ifdef UNIV_DEBUG
+		const ulint	block_size = type.block_size();
+#endif /* UNIV_DEBUG */
+
+		/* We don't support multiple page sizes in the server
+		at the moment. */
+		ut_ad(src_len == srv_page_size);
+
+		/* Must be a multiple of the compression unit size. */
+		ut_ad((len % block_size) == 0);
+		ut_ad((offset % block_size) == 0);
+
+		ut_ad(len + block_size <= src_len);
+
+		offset += len;
+
+		return(os_file_punch_hole(fh, offset, src_len - len));
+	}
+
+	ut_ad(!type.is_log());
+#endif /* MYSQL_ENCRYPTION */
+
+	return(DB_SUCCESS);
+}
+
+#endif /* !UNIV_HOTBACKUP */
+
+/** This function returns a new path name after replacing the basename
+in an old path with a new basename.  The old_path is a full path
+name including the extension.  The tablename is in the normal
+form "databasename/tablename".  The new base name is found after
+the forward slash.  Both input strings are null terminated.
+
+This function allocates memory to be returned.  It is the callers
+responsibility to free the return value after it is no longer needed.
+
+@param[in]	old_path		Pathname
+@param[in]	tablename		Contains new base name
+@return own: new full pathname */
+char*
+os_file_make_new_pathname(
+	const char*	old_path,
+	const char*	tablename)
+{
+	ulint		dir_len;
+	char*		last_slash;
+	char*		base_name;
+	char*		new_path;
+	ulint		new_path_len;
+
+	/* Split the tablename into its database and table name components.
+	They are separated by a '/'. */
+	last_slash = strrchr((char*) tablename, '/');
+	base_name = last_slash ? last_slash + 1 : (char*) tablename;
+
+	/* Find the offset of the last slash. We will strip off the
+	old basename.ibd which starts after that slash. */
+	last_slash = strrchr((char*) old_path, OS_PATH_SEPARATOR);
+	dir_len = last_slash ? last_slash - old_path : strlen(old_path);
+
+	/* allocate a new path and move the old directory path to it. */
+	new_path_len = dir_len + strlen(base_name) + sizeof "/.ibd";
+	new_path = static_cast<char*>(ut_malloc_nokey(new_path_len));
+	memcpy(new_path, old_path, dir_len);
+
+	ut_snprintf(new_path + dir_len,
+		    new_path_len - dir_len,
+		    "%c%s.ibd",
+		    OS_PATH_SEPARATOR,
+		    base_name);
+
+	return(new_path);
+}
+
+/** This function reduces a null-terminated full remote path name into
+the path that is sent by MySQL for DATA DIRECTORY clause.  It replaces
+the 'databasename/tablename.ibd' found at the end of the path with just
+'tablename'.
+
+Since the result is always smaller than the path sent in, no new memory
+is allocated. The caller should allocate memory for the path sent in.
+This function manipulates that path in place.
+
+If the path format is not as expected, just return.  The result is used
+to inform a SHOW CREATE TABLE command.
+@param[in,out]	data_dir_path		Full path/data_dir_path */
+void
+os_file_make_data_dir_path(
+	char*	data_dir_path)
+{
+	/* Replace the period before the extension with a null byte. */
+	char*	ptr = strrchr((char*) data_dir_path, '.');
+
+	if (ptr == NULL) {
+		return;
+	}
+
+	ptr[0] = '\0';
+
+	/* The tablename starts after the last slash. */
+	ptr = strrchr((char*) data_dir_path, OS_PATH_SEPARATOR);
+
+	if (ptr == NULL) {
+		return;
+	}
+
+	ptr[0] = '\0';
+
+	char*	tablename = ptr + 1;
+
+	/* The databasename starts after the next to last slash. */
+	ptr = strrchr((char*) data_dir_path, OS_PATH_SEPARATOR);
+
+	if (ptr == NULL) {
+		return;
+	}
+
+	ulint	tablename_len = ut_strlen(tablename);
+
+	ut_memmove(++ptr, tablename, tablename_len);
+
+	ptr[tablename_len] = '\0';
+}
+
+/** Check if the path refers to the root of a drive using a pointer
+to the last directory separator that the caller has fixed.
+@param[in]	path	path name
+@param[in]	path	last directory separator in the path
+@return true if this path is a drive root, false if not */
+UNIV_INLINE
+bool
+os_file_is_root(
+	const char*	path,
+	const char*	last_slash)
+{
+	return(
+#ifdef _WIN32
+	       (last_slash == path + 2 && path[1] == ':') ||
+#endif /* _WIN32 */
+	       last_slash == path);
+}
+
+/** Return the parent directory component of a null-terminated path.
+Return a new buffer containing the string up to, but not including,
+the final component of the path.
+The path returned will not contain a trailing separator.
+Do not return a root path, return NULL instead.
+The final component trimmed off may be a filename or a directory name.
+If the final component is the only component of the path, return NULL.
+It is the caller's responsibility to free the returned string after it
+is no longer needed.
+@param[in]	path		Path name
+@return own: parent directory of the path */
+static
+char*
+os_file_get_parent_dir(
+	const char*	path)
+{
+	bool	has_trailing_slash = false;
+
+	/* Find the offset of the last slash */
+	const char* last_slash = strrchr(path, OS_PATH_SEPARATOR);
+
+	if (!last_slash) {
+		/* No slash in the path, return NULL */
+		return(NULL);
+	}
+
+	/* Ok, there is a slash. Is there anything after it? */
+	if (static_cast<size_t>(last_slash - path + 1) == strlen(path)) {
+		has_trailing_slash = true;
+	}
+
+	/* Reduce repetative slashes. */
+	while (last_slash > path
+		&& last_slash[-1] == OS_PATH_SEPARATOR) {
+		last_slash--;
+	}
+
+	/* Check for the root of a drive. */
+	if (os_file_is_root(path, last_slash)) {
+		return(NULL);
+	}
+
+	/* If a trailing slash prevented the first strrchr() from trimming
+	the last component of the path, trim that component now. */
+	if (has_trailing_slash) {
+		/* Back up to the previous slash. */
+		last_slash--;
+		while (last_slash > path
+		       && last_slash[0] != OS_PATH_SEPARATOR) {
+			last_slash--;
+		}
+
+		/* Reduce repetative slashes. */
+		while (last_slash > path
+			&& last_slash[-1] == OS_PATH_SEPARATOR) {
+			last_slash--;
+		}
+	}
+
+	/* Check for the root of a drive. */
+	if (os_file_is_root(path, last_slash)) {
+		return(NULL);
+	}
+
+	/* Non-trivial directory component */
+
+	return(mem_strdupl(path, last_slash - path));
+}
+#ifdef UNIV_ENABLE_UNIT_TEST_GET_PARENT_DIR
+
+/* Test the function os_file_get_parent_dir. */
+void
+test_os_file_get_parent_dir(
+	const char*	child_dir,
+	const char*	expected_dir)
+{
+	char* child = mem_strdup(child_dir);
+	char* expected = expected_dir == NULL ? NULL
+			 : mem_strdup(expected_dir);
+
+	/* os_file_get_parent_dir() assumes that separators are
+	converted to OS_PATH_SEPARATOR. */
+	os_normalize_path(child);
+	os_normalize_path(expected);
+
+	char* parent = os_file_get_parent_dir(child);
+
+	bool unexpected = (expected == NULL
+			  ? (parent != NULL)
+			  : (0 != strcmp(parent, expected)));
+	if (unexpected) {
+		ib::fatal() << "os_file_get_parent_dir('" << child
+			<< "') returned '" << parent
+			<< "', instead of '" << expected << "'.";
+	}
+	ut_free(parent);
+	ut_free(child);
+	ut_free(expected);
+}
+
+/* Test the function os_file_get_parent_dir. */
+void
+unit_test_os_file_get_parent_dir()
+{
+	test_os_file_get_parent_dir("/usr/lib/a", "/usr/lib");
+	test_os_file_get_parent_dir("/usr/", NULL);
+	test_os_file_get_parent_dir("//usr//", NULL);
+	test_os_file_get_parent_dir("usr", NULL);
+	test_os_file_get_parent_dir("usr//", NULL);
+	test_os_file_get_parent_dir("/", NULL);
+	test_os_file_get_parent_dir("//", NULL);
+	test_os_file_get_parent_dir(".", NULL);
+	test_os_file_get_parent_dir("..", NULL);
+# ifdef _WIN32
+	test_os_file_get_parent_dir("D:", NULL);
+	test_os_file_get_parent_dir("D:/", NULL);
+	test_os_file_get_parent_dir("D:\\", NULL);
+	test_os_file_get_parent_dir("D:/data", NULL);
+	test_os_file_get_parent_dir("D:/data/", NULL);
+	test_os_file_get_parent_dir("D:\\data\\", NULL);
+	test_os_file_get_parent_dir("D:///data/////", NULL);
+	test_os_file_get_parent_dir("D:\\\\\\data\\\\\\\\", NULL);
+	test_os_file_get_parent_dir("D:/data//a", "D:/data");
+	test_os_file_get_parent_dir("D:\\data\\\\a", "D:\\data");
+	test_os_file_get_parent_dir("D:///data//a///b/", "D:///data//a");
+	test_os_file_get_parent_dir("D:\\\\\\data\\\\a\\\\\\b\\", "D:\\\\\\data\\\\a");
+#endif  /* _WIN32 */
+}
+#endif /* UNIV_ENABLE_UNIT_TEST_GET_PARENT_DIR */
+
+
+/** Creates all missing subdirectories along the given path.
+@param[in]	path		Path name
+@return DB_SUCCESS if OK, otherwise error code. */
+dberr_t
+os_file_create_subdirs_if_needed(
+	const char*	path)
+{
+	if (srv_read_only_mode) {
+
+		ib::error()
+			<< "read only mode set. Can't create "
+			<< "subdirectories '" << path << "'";
+
+		return(DB_READ_ONLY);
+
+	}
+
+	char*	subdir = os_file_get_parent_dir(path);
+
+	if (subdir == NULL) {
+		/* subdir is root or cwd, nothing to do */
+		return(DB_SUCCESS);
+	}
+
+	/* Test if subdir exists */
+	os_file_type_t	type;
+	bool	subdir_exists;
+	bool	success = os_file_status(subdir, &subdir_exists, &type);
+
+	if (success && !subdir_exists) {
+
+		/* Subdir does not exist, create it */
+		dberr_t	err = os_file_create_subdirs_if_needed(subdir);
+
+		if (err != DB_SUCCESS) {
+
+			ut_free(subdir);
+
+			return(err);
+		}
+
+		success = os_file_create_directory(subdir, false);
+	}
+
+	ut_free(subdir);
+
+	return(success ? DB_SUCCESS : DB_ERROR);
+}
+
+#ifdef MYSQL_COMPRESSION
+/** Allocate the buffer for IO on a transparently compressed table.
+@param[in]	type		IO flags
+@param[out]	buf		buffer to read or write
+@param[in,out]	n		number of bytes to read/write, starting from
+				offset
+@return pointer to allocated page, compressed data is written to the offset
+	that is aligned on UNIV_SECTOR_SIZE of Block.m_ptr */
+static
+Block*
+os_file_compress_page(
+	IORequest&	type,
+	void*&		buf,
+	ulint*		n)
+{
+	ut_ad(!type.is_log());
+	ut_ad(type.is_write());
+	ut_ad(type.is_compressed());
+
+	ulint	n_alloc = *n * 2;
+
+	ut_a(n_alloc <= UNIV_PAGE_SIZE_MAX * 2);
+#ifdef HAVE_LZ4
+	ut_a(type.compression_algorithm().m_type != Compression::LZ4
+	     || static_cast<ulint>(LZ4_COMPRESSBOUND(*n)) < n_alloc);
+#endif
+
+	Block*	ptr = reinterpret_cast<Block*>(ut_malloc_nokey(n_alloc));
+
+	if (ptr == NULL) {
+		return(NULL);
+	}
+
+	ulint	old_compressed_len;
+	ulint	compressed_len = *n;
+
+	old_compressed_len = mach_read_from_2(
+		reinterpret_cast<byte*>(buf)
+		+ FIL_PAGE_COMPRESS_SIZE_V1);
+
+	if (old_compressed_len > 0) {
+		old_compressed_len = ut_calc_align(
+			old_compressed_len + FIL_PAGE_DATA,
+			type.block_size());
+	}
+
+	byte*	compressed_page;
+
+	compressed_page = static_cast<byte*>(
+		ut_align(block->m_ptr, UNIV_SECTOR_SIZE));
+
+	byte*	buf_ptr;
+
+	buf_ptr = os_file_compress_page(
+		type.compression_algorithm(),
+		type.block_size(),
+		reinterpret_cast<byte*>(buf),
+		*n,
+		compressed_page,
+		&compressed_len);
+
+	if (buf_ptr != buf) {
+		/* Set new compressed size to uncompressed page. */
+		memcpy(reinterpret_cast<byte*>(buf) + FIL_PAGE_COMPRESS_SIZE_V1,
+		       buf_ptr + FIL_PAGE_COMPRESS_SIZE_V1, 2);
+
+		buf = buf_ptr;
+		*n = compressed_len;
+
+		if (compressed_len >= old_compressed_len) {
+
+			ut_ad(old_compressed_len <= UNIV_PAGE_SIZE);
+
+			type.clear_punch_hole();
+		}
+	}
+
+	return(block);
+}
+#endif /* MYSQL_COMPRESSION */
+
+#ifdef MYSQL_ENCRYPTION
+/** Encrypt a page content when write it to disk.
+@param[in]	type		IO flags
+@param[out]	buf		buffer to read or write
+@param[in,out]	n		number of bytes to read/write, starting from
+				offset
+@return pointer to the encrypted page */
+static
+Block*
+os_file_encrypt_page(
+	const IORequest&	type,
+	void*&			buf,
+	ulint*			n)
+{
+
+	byte*		encrypted_page;
+	ulint		encrypted_len = *n;
+	byte*		buf_ptr;
+	Encryption	encryption(type.encryption_algorithm());
+
+	ut_ad(!type.is_log());
+	ut_ad(type.is_write());
+	ut_ad(type.is_encrypted());
+
+	Block*  block = os_alloc_block();
+
+	encrypted_page = static_cast<byte*>(
+		ut_align(block->m_ptr, UNIV_SECTOR_SIZE));
+
+	buf_ptr = encryption.encrypt(type,
+				     reinterpret_cast<byte*>(buf), *n,
+				     encrypted_page, &encrypted_len);
+
+	bool	encrypted = buf_ptr != buf;
+
+	if (encrypted) {
+
+		buf = buf_ptr;
+		*n = encrypted_len;
+	}
+
+	return(block);
+}
+#endif /* MYSQL_ENCRYPTION */
+
+#ifndef _WIN32
+
+/** Do the read/write
+@param[in]	request	The IO context and type
+@return the number of bytes read/written or negative value on error */
+ssize_t
+SyncFileIO::execute(const IORequest& request)
+{
+	ssize_t	n_bytes;
+
+	if (request.is_read()) {
+		n_bytes = pread(m_fh, m_buf, m_n, m_offset);
+	} else {
+		ut_ad(request.is_write());
+		n_bytes = pwrite(m_fh, m_buf, m_n, m_offset);
+	}
+
+	return(n_bytes);
+}
+
+/** Free storage space associated with a section of the file.
+@param[in]	fh		Open file handle
+@param[in]	off		Starting offset (SEEK_SET)
+@param[in]	len		Size of the hole
+@return DB_SUCCESS or error code */
+static
+dberr_t
+os_file_punch_hole_posix(
+	os_file_t	fh,
+	os_offset_t	off,
+	os_offset_t	len)
+{
+
+#ifdef HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE
+	const int	mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE;
+
+	int		ret = fallocate(fh, mode, off, len);
+
+	if (ret == 0) {
+		return(DB_SUCCESS);
+	}
+
+	ut_a(ret == -1);
+
+	if (errno == ENOTSUP) {
+		return(DB_IO_NO_PUNCH_HOLE);
+	}
+
+	ib::warn()
+		<< "fallocate(" << fh
+		<<", FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, "
+		<< off << ", " << len << ") returned errno: "
+		<<  errno;
+
+	return(DB_IO_ERROR);
+
+#elif defined(UNIV_SOLARIS)
+
+	// Use F_FREESP
+
+#endif /* HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE */
+
+	return(DB_IO_NO_PUNCH_HOLE);
+}
+
+#if defined(LINUX_NATIVE_AIO)
+
+/** Linux native AIO handler */
+class LinuxAIOHandler {
+public:
+	/**
+	@param[in] global_segment	The global segment*/
+	LinuxAIOHandler(ulint global_segment)
+		:
+		m_global_segment(global_segment)
+	{
+		/* Should never be doing Sync IO here. */
+		ut_a(m_global_segment != ULINT_UNDEFINED);
+
+		/* Find the array and the local segment. */
+
+		m_segment = AIO::get_array_and_local_segment(
+			&m_array, m_global_segment);
+
+		m_n_slots = m_array->slots_per_segment();
+	}
+
+	/** Destructor */
+	~LinuxAIOHandler()
+	{
+		// No op
+	}
+
+	/**
+	Process a Linux AIO request
+	@param[out]	m1		the messages passed with the
+	@param[out]	m2		AIO request; note that in case the
+					AIO operation failed, these output
+					parameters are valid and can be used to
+					restart the operation.
+	@param[out]	request		IO context
+	@return DB_SUCCESS or error code */
+	dberr_t poll(fil_node_t** m1, void** m2, IORequest* request);
+
+private:
+	/** Resubmit an IO request that was only partially successful
+	@param[in,out]	slot		Request to resubmit
+	@return DB_SUCCESS or DB_FAIL if the IO resubmit request failed */
+	dberr_t	resubmit(Slot* slot);
+
+	/** Check if the AIO succeeded
+	@param[in,out]	slot		The slot to check
+	@return DB_SUCCESS, DB_FAIL if the operation should be retried or
+		DB_IO_ERROR on all other errors */
+	dberr_t	check_state(Slot* slot);
+
+	/** @return true if a shutdown was detected */
+	bool is_shutdown() const
+	{
+		return(srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS
+		       && !buf_page_cleaner_is_active);
+	}
+
+	/** If no slot was found then the m_array->m_mutex will be released.
+	@param[out]	n_pending	The number of pending IOs
+	@return NULL or a slot that has completed IO */
+	Slot* find_completed_slot(ulint* n_pending);
+
+	/** This is called from within the IO-thread. If there are no completed
+	IO requests in the slot array, the thread calls this function to
+	collect more requests from the Linux kernel.
+	The IO-thread waits on io_getevents(), which is a blocking call, with
+	a timeout value. Unless the system is very heavy loaded, keeping the
+	IO-thread very busy, the io-thread will spend most of its time waiting
+	in this function.
+	The IO-thread also exits in this function. It checks server status at
+	each wakeup and that is why we use timed wait in io_getevents(). */
+	void collect();
+
+private:
+	/** Slot array */
+	AIO*			m_array;
+
+	/** Number of slots inthe local segment */
+	ulint			m_n_slots;
+
+	/** The local segment to check */
+	ulint			m_segment;
+
+	/** The global segment */
+	ulint			m_global_segment;
+};
+
+/** Resubmit an IO request that was only partially successful
+@param[in,out]	slot		Request to resubmit
+@return DB_SUCCESS or DB_FAIL if the IO resubmit request failed */
+dberr_t
+LinuxAIOHandler::resubmit(Slot* slot)
+{
+#ifdef UNIV_DEBUG
+	/* Bytes already read/written out */
+	ulint	n_bytes = slot->ptr - slot->buf;
+
+	ut_ad(m_array->is_mutex_owned());
+
+	ut_ad(n_bytes < slot->original_len);
+	ut_ad(static_cast<ulint>(slot->n_bytes) < slot->original_len - n_bytes);
+	/* Partial read or write scenario */
+	ut_ad(slot->len >= static_cast<ulint>(slot->n_bytes));
+#endif /* UNIV_DEBUG */
+
+	slot->len -= slot->n_bytes;
+	slot->ptr += slot->n_bytes;
+	slot->offset += slot->n_bytes;
+
+	/* Resetting the bytes read/written */
+	slot->n_bytes = 0;
+	slot->io_already_done = false;
+
+	struct iocb*	iocb = &slot->control;
+
+	if (slot->type.is_read()) {
+
+		io_prep_pread(
+			iocb,
+			slot->file,
+			slot->ptr,
+			slot->len,
+			static_cast<off_t>(slot->offset));
+	} else {
+
+		ut_a(slot->type.is_write());
+
+		io_prep_pwrite(
+			iocb,
+			slot->file,
+			slot->ptr,
+			slot->len,
+			static_cast<off_t>(slot->offset));
+	}
+
+	iocb->data = slot;
+
+	/* Resubmit an I/O request */
+	int	ret = io_submit(m_array->io_ctx(m_segment), 1, &iocb);
+
+	if (ret < -1)  {
+		errno = -ret;
+	}
+
+	return(ret < 0 ? DB_IO_PARTIAL_FAILED : DB_SUCCESS);
+}
+
+/** Check if the AIO succeeded
+@param[in,out]	slot		The slot to check
+@return DB_SUCCESS, DB_FAIL if the operation should be retried or
+	DB_IO_ERROR on all other errors */
+dberr_t
+LinuxAIOHandler::check_state(Slot* slot)
+{
+	ut_ad(m_array->is_mutex_owned());
+
+	/* Note that it may be that there is more then one completed
+	IO requests. We process them one at a time. We may have a case
+	here to improve the performance slightly by dealing with all
+	requests in one sweep. */
+
+	srv_set_io_thread_op_info(
+		m_global_segment, "processing completed aio requests");
+
+	ut_ad(slot->io_already_done);
+
+	dberr_t	err = DB_SUCCESS;
+
+	if (slot->ret == 0) {
+
+		err = AIOHandler::post_io_processing(slot);
+
+	} else {
+		errno = -slot->ret;
+
+		/* os_file_handle_error does tell us if we should retry
+		this IO. As it stands now, we don't do this retry when
+		reaping requests from a different context than
+		the dispatcher. This non-retry logic is the same for
+		Windows and Linux native AIO.
+		We should probably look into this to transparently
+		re-submit the IO. */
+		os_file_handle_error(slot->name, "Linux aio");
+
+		err = DB_IO_ERROR;
+	}
+
+	return(err);
+}
+
+/** If no slot was found then the m_array->m_mutex will be released.
+@param[out]	n_pending		The number of pending IOs
+@return NULL or a slot that has completed IO */
+Slot*
+LinuxAIOHandler::find_completed_slot(ulint* n_pending)
+{
+	ulint	offset = m_n_slots * m_segment;
+
+	*n_pending = 0;
+
+	m_array->acquire();
+
+	Slot*	slot = m_array->at(offset);
+
+	for (ulint i = 0; i < m_n_slots; ++i, ++slot) {
+
+		if (slot->is_reserved) {
+
+			++*n_pending;
+
+			if (slot->io_already_done) {
+
+				/* Something for us to work on.
+				Note: We don't release the mutex. */
+				return(slot);
+			}
+		}
+	}
+
+	m_array->release();
+
+	return(NULL);
+}
+
+/** This function is only used in Linux native asynchronous i/o. This is
+called from within the io-thread. If there are no completed IO requests
+in the slot array, the thread calls this function to collect more
+requests from the kernel.
+The io-thread waits on io_getevents(), which is a blocking call, with
+a timeout value. Unless the system is very heavy loaded, keeping the
+io-thread very busy, the io-thread will spend most of its time waiting
+in this function.
+The io-thread also exits in this function. It checks server status at
+each wakeup and that is why we use timed wait in io_getevents(). */
+void
+LinuxAIOHandler::collect()
+{
+	ut_ad(m_n_slots > 0);
+	ut_ad(m_array != NULL);
+	ut_ad(m_segment < m_array->get_n_segments());
+
+	/* Which io_context we are going to use. */
+	io_context*	io_ctx = m_array->io_ctx(m_segment);
+
+	/* Starting point of the m_segment we will be working on. */
+	ulint	start_pos = m_segment * m_n_slots;
+
+	/* End point. */
+	ulint	end_pos = start_pos + m_n_slots;
+
+	for (;;) {
+		struct io_event*	events;
+
+		/* Which part of event array we are going to work on. */
+		events = m_array->io_events(m_segment * m_n_slots);
+
+		/* Initialize the events. */
+		memset(events, 0, sizeof(*events) * m_n_slots);
+
+		/* The timeout value is arbitrary. We probably need
+		to experiment with it a little. */
+		struct timespec		timeout;
+
+		timeout.tv_sec = 0;
+		timeout.tv_nsec = OS_AIO_REAP_TIMEOUT;
+
+		int	ret;
+
+		ret = io_getevents(io_ctx, 1, m_n_slots, events, &timeout);
+
+		for (int i = 0; i < ret; ++i) {
+
+			struct iocb*	iocb;
+
+			iocb = reinterpret_cast<struct iocb*>(events[i].obj);
+			ut_a(iocb != NULL);
+
+			Slot*	slot = reinterpret_cast<Slot*>(iocb->data);
+
+			/* Some sanity checks. */
+			ut_a(slot != NULL);
+			ut_a(slot->is_reserved);
+
+			/* We are not scribbling previous segment. */
+			ut_a(slot->pos >= start_pos);
+
+			/* We have not overstepped to next segment. */
+			ut_a(slot->pos < end_pos);
+
+			/* We never compress/decompress the first page */
+
+			if (slot->offset > 0
+			    && !slot->skip_punch_hole
+			    && slot->type.is_compression_enabled()
+			    && !slot->type.is_log()
+			    && slot->type.is_write()
+			    && slot->type.is_compressed()
+			    && slot->type.punch_hole()) {
+
+				slot->err = AIOHandler::io_complete(slot);
+			} else {
+				slot->err = DB_SUCCESS;
+			}
+
+			/* Mark this request as completed. The error handling
+			will be done in the calling function. */
+			m_array->acquire();
+
+			slot->ret = events[i].res2;
+			slot->io_already_done = true;
+			slot->n_bytes = events[i].res;
+
+			m_array->release();
+		}
+
+		if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS
+		    || !buf_page_cleaner_is_active
+		    || ret > 0) {
+
+			break;
+		}
+
+		/* This error handling is for any error in collecting the
+		IO requests. The errors, if any, for any particular IO
+		request are simply passed on to the calling routine. */
+
+		switch (ret) {
+		case -EAGAIN:
+			/* Not enough resources! Try again. */
+
+		case -EINTR:
+			/* Interrupted! The behaviour in case of an interrupt.
+			If we have some completed IOs available then the
+			return code will be the number of IOs. We get EINTR
+			only if there are no completed IOs and we have been
+			interrupted. */
+
+		case 0:
+			/* No pending request! Go back and check again. */
+
+			continue;
+		}
+
+		/* All other errors should cause a trap for now. */
+		ib::fatal()
+			<< "Unexpected ret_code[" << ret
+			<< "] from io_getevents()!";
+
+		break;
+	}
+}
+
+/** Process a Linux AIO request
+@param[out]	m1		the messages passed with the
+@param[out]	m2		AIO request; note that in case the
+				AIO operation failed, these output
+				parameters are valid and can be used to
+				restart the operation.
+@param[out]	request		IO context
+@return DB_SUCCESS or error code */
+dberr_t
+LinuxAIOHandler::poll(fil_node_t** m1, void** m2, IORequest* request)
+{
+	dberr_t		err = DB_SUCCESS;
+	Slot*		slot;
+
+	/* Loop until we have found a completed request. */
+	for (;;) {
+
+		ulint	n_pending;
+
+		slot = find_completed_slot(&n_pending);
+
+		if (slot != NULL) {
+
+			ut_ad(m_array->is_mutex_owned());
+
+			err = check_state(slot);
+
+			/* DB_FAIL is not a hard error, we should retry */
+			if (err != DB_FAIL) {
+				break;
+			}
+
+			/* Partial IO, resubmit request for
+			remaining bytes to read/write */
+			err = resubmit(slot);
+
+			if (err != DB_SUCCESS) {
+				break;
+			}
+
+			m_array->release();
+
+		} else if (is_shutdown() && n_pending == 0) {
+
+			/* There is no completed request. If there is
+			no pending request at all, and the system is
+			being shut down, exit. */
+
+			*m1 = NULL;
+			*m2 = NULL;
+
+			return(DB_SUCCESS);
+
+		} else {
+
+			/* Wait for some request. Note that we return
+			from wait if we have found a request. */
+
+			srv_set_io_thread_op_info(
+				m_global_segment,
+				"waiting for completed aio requests");
+
+			collect();
+		}
+	}
+
+	if (err == DB_IO_PARTIAL_FAILED) {
+		/* Aborting in case of submit failure */
+		ib::fatal()
+			<< "Native Linux AIO interface. "
+			"io_submit() call failed when "
+			"resubmitting a partial I/O "
+			"request on the file " << slot->name
+			<< ".";
+	}
+
+	*m1 = slot->m1;
+	*m2 = slot->m2;
+
+	*request = slot->type;
+
+	m_array->release(slot);
+
+	m_array->release();
+
+	return(err);
+}
+
+/** This function is only used in Linux native asynchronous i/o.
+Waits for an aio operation to complete. This function is used to wait for
+the completed requests. The aio array of pending requests is divided
+into segments. The thread specifies which segment or slot it wants to wait
+for. NOTE: this function will also take care of freeing the aio slot,
+therefore no other thread is allowed to do the freeing!
+
+@param[in]	global_seg	segment number in the aio array
+				to wait for; segment 0 is the ibuf
+				i/o thread, segment 1 is log i/o thread,
+				then follow the non-ibuf read threads,
+				and the last are the non-ibuf write
+				threads.
+@param[out]	m1		the messages passed with the
+@param[out]	m2			AIO request; note that in case the
+				AIO operation failed, these output
+				parameters are valid and can be used to
+				restart the operation.
+@param[out]xi	 request	IO context
+@return DB_SUCCESS if the IO was successful */
+static
+dberr_t
+os_aio_linux_handler(
+	ulint		global_segment,
+	fil_node_t**	m1,
+	void**		m2,
+	IORequest*	request)
+{
+	LinuxAIOHandler	handler(global_segment);
+
+	dberr_t	err = handler.poll(m1, m2, request);
+
+	if (err == DB_IO_NO_PUNCH_HOLE) {
+		fil_no_punch_hole(*m1);
+		err = DB_SUCCESS;
+	}
+
+	return(err);
+}
+
+/** Dispatch an AIO request to the kernel.
+@param[in,out]	slot		an already reserved slot
+@return true on success. */
+bool
+AIO::linux_dispatch(Slot* slot)
+{
+	ut_a(slot->is_reserved);
+	ut_ad(slot->type.validate());
+
+	/* Find out what we are going to work with.
+	The iocb struct is directly in the slot.
+	The io_context is one per segment. */
+
+	ulint		io_ctx_index;
+	struct iocb*	iocb = &slot->control;
+
+	io_ctx_index = (slot->pos * m_n_segments) / m_slots.size();
+
+	int	ret = io_submit(m_aio_ctx[io_ctx_index], 1, &iocb);
+
+	/* io_submit() returns number of successfully queued requests
+	or -errno. */
+
+	if (ret != 1) {
+		errno = -ret;
+	}
+
+	return(ret == 1);
+}
+
+/** Creates an io_context for native linux AIO.
+@param[in]	max_events	number of events
+@param[out]	io_ctx		io_ctx to initialize.
+@return true on success. */
+bool
+AIO::linux_create_io_ctx(
+	ulint		max_events,
+	io_context_t*	io_ctx)
+{
+	ssize_t		n_retries = 0;
+
+	for (;;) {
+
+		memset(io_ctx, 0x0, sizeof(*io_ctx));
+
+		/* Initialize the io_ctx. Tell it how many pending
+		IO requests this context will handle. */
+
+		int	ret = io_setup(max_events, io_ctx);
+
+		if (ret == 0) {
+			/* Success. Return now. */
+			return(true);
+		}
+
+		/* If we hit EAGAIN we'll make a few attempts before failing. */
+
+		switch (ret) {
+		case -EAGAIN:
+			if (n_retries == 0) {
+				/* First time around. */
+				ib::warn()
+					<< "io_setup() failed with EAGAIN."
+					" Will make "
+					<< OS_AIO_IO_SETUP_RETRY_ATTEMPTS
+					<< " attempts before giving up.";
+			}
+
+			if (n_retries < OS_AIO_IO_SETUP_RETRY_ATTEMPTS) {
+
+				++n_retries;
+
+				ib::warn()
+					<< "io_setup() attempt "
+					<< n_retries << ".";
+
+				os_thread_sleep(OS_AIO_IO_SETUP_RETRY_SLEEP);
+
+				continue;
+			}
+
+			/* Have tried enough. Better call it a day. */
+			ib::error()
+				<< "io_setup() failed with EAGAIN after "
+				<< OS_AIO_IO_SETUP_RETRY_ATTEMPTS
+				<< " attempts.";
+			break;
+
+		case -ENOSYS:
+			ib::error()
+				<< "Linux Native AIO interface"
+				" is not supported on this platform. Please"
+				" check your OS documentation and install"
+				" appropriate binary of InnoDB.";
+
+			break;
+
+		default:
+			ib::error()
+				<< "Linux Native AIO setup"
+				<< " returned following error["
+				<< ret << "]";
+			break;
+		}
+
+		ib::info()
+			<< "You can disable Linux Native AIO by"
+			" setting innodb_use_native_aio = 0 in my.cnf";
+
+		break;
+	}
+
+	return(false);
+}
+
+/** Checks if the system supports native linux aio. On some kernel
+versions where native aio is supported it won't work on tmpfs. In such
+cases we can't use native aio as it is not possible to mix simulated
+and native aio.
+@return: true if supported, false otherwise. */
+bool
+AIO::is_linux_native_aio_supported()
+{
+	int		fd;
+	io_context_t	io_ctx;
+	char		name[1000];
+
+	if (!linux_create_io_ctx(1, &io_ctx)) {
+
+		/* The platform does not support native aio. */
+
+		return(false);
+
+	} else if (!srv_read_only_mode) {
+
+		/* Now check if tmpdir supports native aio ops. */
+		fd = innobase_mysql_tmpfile(NULL);
+
+		if (fd < 0) {
+			ib::warn()
+				<< "Unable to create temp file to check"
+				" native AIO support.";
+
+			return(false);
+		}
+	} else {
+
+		os_normalize_path(srv_log_group_home_dir);
+
+		ulint	dirnamelen = strlen(srv_log_group_home_dir);
+
+		ut_a(dirnamelen < (sizeof name) - 10 - sizeof "ib_logfile");
+
+		memcpy(name, srv_log_group_home_dir, dirnamelen);
+
+		/* Add a path separator if needed. */
+		if (dirnamelen && name[dirnamelen - 1] != OS_PATH_SEPARATOR) {
+
+			name[dirnamelen++] = OS_PATH_SEPARATOR;
+		}
+
+		strcpy(name + dirnamelen, "ib_logfile0");
+
+		fd = ::open(name, O_RDONLY);
+
+		if (fd == -1) {
+
+			ib::warn()
+				<< "Unable to open"
+				<< " \"" << name << "\" to check native"
+				<< " AIO read support.";
+
+			return(false);
+		}
+	}
+
+	struct io_event	io_event;
+
+	memset(&io_event, 0x0, sizeof(io_event));
+
+	byte*	buf = static_cast<byte*>(ut_malloc_nokey(UNIV_PAGE_SIZE * 2));
+	byte*	ptr = static_cast<byte*>(ut_align(buf, UNIV_PAGE_SIZE));
+
+	struct iocb	iocb;
+
+	/* Suppress valgrind warning. */
+	memset(buf, 0x00, UNIV_PAGE_SIZE * 2);
+	memset(&iocb, 0x0, sizeof(iocb));
+
+	struct iocb*	p_iocb = &iocb;
+
+	if (!srv_read_only_mode) {
+
+		io_prep_pwrite(p_iocb, fd, ptr, UNIV_PAGE_SIZE, 0);
+
+	} else {
+		ut_a(UNIV_PAGE_SIZE >= 512);
+		io_prep_pread(p_iocb, fd, ptr, 512, 0);
+	}
+
+	int	err = io_submit(io_ctx, 1, &p_iocb);
+
+	if (err >= 1) {
+		/* Now collect the submitted IO request. */
+		err = io_getevents(io_ctx, 1, 1, &io_event, NULL);
+	}
+
+	ut_free(buf);
+	close(fd);
+
+	switch (err) {
+	case 1:
+		return(true);
+
+	case -EINVAL:
+	case -ENOSYS:
+		ib::error()
+			<< "Linux Native AIO not supported. You can either"
+			" move "
+			<< (srv_read_only_mode ? name : "tmpdir")
+			<< " to a file system that supports native"
+			" AIO or you can set innodb_use_native_aio to"
+			" FALSE to avoid this message.";
+
+		/* fall through. */
+	default:
+		ib::error()
+			<< "Linux Native AIO check on "
+			<< (srv_read_only_mode ? name : "tmpdir")
+			<< "returned error[" << -err << "]";
+	}
+
+	return(false);
+}
+
+#endif /* LINUX_NATIVE_AIO */
+
+/** Retrieves the last error number if an error occurs in a file io function.
 The number should be retrieved before any other OS calls (because they may
 overwrite the error number). If the number is not known to this program,
 the OS error number + 100 is returned.
-@return	error number, or OS error number + 100 */
+@param[in]	report_all_errors	true if we want an error message
+					printed of all errors
+@param[in]	on_error_silent		true then don't print any diagnostic
+					to the log
+@return error number, or OS error number + 100 */
 static
 ulint
 os_file_get_last_error_low(
-/*=======================*/
-	bool	report_all_errors,	/*!< in: TRUE if we want an error
-					message printed of all errors */
-	bool	on_error_silent)	/*!< in: TRUE then don't print any
-					diagnostic to the log */
+	bool	report_all_errors,
+	bool	on_error_silent)
 {
-#ifdef __WIN__
+	int	err = errno;
 
-	ulint	err = (ulint) GetLastError();
-	if (err == ERROR_SUCCESS) {
-		return(0);
-	}
-
-	if (report_all_errors
-	    || (!on_error_silent
-		&& err != ERROR_DISK_FULL
-		&& err != ERROR_FILE_EXISTS)) {
-
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			"  InnoDB: Operating system error number %lu"
-			" in a file operation.\n", (ulong) err);
-
-		if (err == ERROR_PATH_NOT_FOUND) {
-			fprintf(stderr,
-				"InnoDB: The error means the system"
-				" cannot find the path specified.\n");
-
-			if (srv_is_being_started) {
-				fprintf(stderr,
-					"InnoDB: If you are installing InnoDB,"
-					" remember that you must create\n"
-					"InnoDB: directories yourself, InnoDB"
-					" does not create them.\n");
-			}
-		} else if (err == ERROR_ACCESS_DENIED) {
-			fprintf(stderr,
-				"InnoDB: The error means mysqld does not have"
-				" the access rights to\n"
-				"InnoDB: the directory. It may also be"
-				" you have created a subdirectory\n"
-				"InnoDB: of the same name as a data file.\n");
-		} else if (err == ERROR_SHARING_VIOLATION
-			   || err == ERROR_LOCK_VIOLATION) {
-			fprintf(stderr,
-				"InnoDB: The error means that another program"
-				" is using InnoDB's files.\n"
-				"InnoDB: This might be a backup or antivirus"
-				" software or another instance\n"
-				"InnoDB: of MySQL."
-				" Please close it to get rid of this error.\n");
-		} else if (err == ERROR_WORKING_SET_QUOTA
-			   || err == ERROR_NO_SYSTEM_RESOURCES) {
-			fprintf(stderr,
-				"InnoDB: The error means that there are no"
-				" sufficient system resources or quota to"
-				" complete the operation.\n");
-		} else if (err == ERROR_OPERATION_ABORTED) {
-			fprintf(stderr,
-				"InnoDB: The error means that the I/O"
-				" operation has been aborted\n"
-				"InnoDB: because of either a thread exit"
-				" or an application request.\n"
-				"InnoDB: Retry attempt is made.\n");
-		} else if (err == ECANCELED || err == ENOTTY) {
-			if (strerror(err) != NULL) {
-				fprintf(stderr,
-					"InnoDB: Error number %d"
-					" means '%s'.\n",
-					err, strerror(err));
-			}
-
-			if(srv_use_atomic_writes) {
-				fprintf(stderr,
-					"InnoDB: Error trying to enable atomic writes on "
-					"non-supported destination!\n");
-			}
-		} else {
-			fprintf(stderr,
-				"InnoDB: Some operating system error numbers"
-				" are described at\n"
-				"InnoDB: "
-				REFMAN
-				"operating-system-error-codes.html\n");
-		}
-	}
-
-	fflush(stderr);
-
-	if (err == ERROR_FILE_NOT_FOUND) {
-		return(OS_FILE_NOT_FOUND);
-	} else if (err == ERROR_DISK_FULL) {
-		return(OS_FILE_DISK_FULL);
-	} else if (err == ERROR_FILE_EXISTS) {
-		return(OS_FILE_ALREADY_EXISTS);
-	} else if (err == ERROR_SHARING_VIOLATION
-		   || err == ERROR_LOCK_VIOLATION) {
-		return(OS_FILE_SHARING_VIOLATION);
-	} else if (err == ERROR_WORKING_SET_QUOTA
-		   || err == ERROR_NO_SYSTEM_RESOURCES) {
-		return(OS_FILE_INSUFFICIENT_RESOURCE);
-	} else if (err == ERROR_OPERATION_ABORTED) {
-		return(OS_FILE_OPERATION_ABORTED);
-	} else if (err == ERROR_ACCESS_DENIED) {
-		return(OS_FILE_ACCESS_VIOLATION);
-	} else if (err == ERROR_BUFFER_OVERFLOW) {
-		return(OS_FILE_NAME_TOO_LONG);
-	} else {
-		return(OS_FILE_ERROR_MAX + err);
-	}
-#else
-	int err = errno;
 	if (err == 0) {
 		return(0);
 	}
@@ -612,61 +2988,43 @@ os_file_get_last_error_low(
 	if (report_all_errors
 	    || (err != ENOSPC && err != EEXIST && !on_error_silent)) {
 
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			"  InnoDB: Operating system error number %d"
-			" in a file operation.\n", err);
+		ib::error()
+			<< "Operating system error number "
+			<< err
+			<< " in a file operation.";
 
 		if (err == ENOENT) {
-			fprintf(stderr,
-				"InnoDB: The error means the system"
-				" cannot find the path specified.\n");
+
+			ib::error()
+				<< "The error means the system"
+				" cannot find the path specified.";
 
 			if (srv_is_being_started) {
-				fprintf(stderr,
-					"InnoDB: If you are installing InnoDB,"
-					" remember that you must create\n"
-					"InnoDB: directories yourself, InnoDB"
-					" does not create them.\n");
+
+				ib::error()
+					<< "If you are installing InnoDB,"
+					" remember that you must create"
+					" directories yourself, InnoDB"
+					" does not create them.";
 			}
 		} else if (err == EACCES) {
-			fprintf(stderr,
-				"InnoDB: The error means mysqld does not have"
-				" the access rights to\n"
-				"InnoDB: the directory.\n");
-		} else if (err == ECANCELED || err == ENOTTY) {
-			if (strerror(err) != NULL) {
-				fprintf(stderr,
-					"InnoDB: Error number %d"
-					" means '%s'.\n",
-					err, strerror(err));
-			}
 
-			if(srv_use_atomic_writes) {
-				fprintf(stderr,
-					"InnoDB: Error trying to enable atomic writes on "
-					"non-supported destination!\n");
-			}
+			ib::error()
+				<< "The error means mysqld does not have"
+				" the access rights to the directory.";
+
 		} else {
 			if (strerror(err) != NULL) {
-				fprintf(stderr,
-					"InnoDB: Error number %d"
-					" means '%s'.\n",
-					err, strerror(err));
+
+				ib::error()
+					<< "Error number " << err << " means '"
+					<< strerror(err) << "'";
 			}
 
-
-			fprintf(stderr,
-				"InnoDB: Some operating system"
-				" error numbers are described at\n"
-				"InnoDB: "
-				REFMAN
-				"operating-system-error-codes.html\n");
+			ib::info() << OPERATING_SYSTEM_ERROR_MSG;
 		}
 	}
 
-	fflush(stderr);
-
 	switch (err) {
 	case ENOSPC:
 		return(OS_FILE_DISK_FULL);
@@ -674,15 +3032,10 @@ os_file_get_last_error_low(
 		return(OS_FILE_NOT_FOUND);
 	case EEXIST:
 		return(OS_FILE_ALREADY_EXISTS);
-	case ENAMETOOLONG:
-		return(OS_FILE_NAME_TOO_LONG);
 	case EXDEV:
 	case ENOTDIR:
 	case EISDIR:
 		return(OS_FILE_PATH_ERROR);
-	case ECANCELED:
-	case ENOTTY:
-                return(OS_FILE_OPERATION_NOT_SUPPORTED);
 	case EAGAIN:
 		if (srv_use_native_aio) {
 			return(OS_FILE_AIO_RESOURCES_RESERVED);
@@ -697,454 +3050,414 @@ os_file_get_last_error_low(
 		return(OS_FILE_ACCESS_VIOLATION);
 	}
 	return(OS_FILE_ERROR_MAX + err);
-#endif
 }
 
-/***********************************************************************//**
-Retrieves the last error number if an error occurs in a file io function.
-The number should be retrieved before any other OS calls (because they may
-overwrite the error number). If the number is not known to this program,
-the OS error number + 100 is returned.
-@return	error number, or OS error number + 100 */
-UNIV_INTERN
-ulint
-os_file_get_last_error(
-/*===================*/
-	bool	report_all_errors)	/*!< in: TRUE if we want an error
-					message printed of all errors */
-{
-	return(os_file_get_last_error_low(report_all_errors, false));
-}
-
-/****************************************************************//**
-Does error handling when a file operation fails.
-Conditionally exits (calling exit(3)) based on should_exit value and the
-error type, if should_exit is TRUE then on_error_silent is ignored.
-@return	TRUE if we should retry the operation */
-static
-ibool
-os_file_handle_error_cond_exit(
-/*===========================*/
-	const char*	name,		/*!< in: name of a file or NULL */
-	const char*	operation,	/*!< in: operation */
-	ibool		should_exit,	/*!< in: call exit(3) if unknown error
-					and this parameter is TRUE */
-	ibool		on_error_silent,/*!< in: if TRUE then don't print
-					any message to the log iff it is
-					an unknown non-fatal error */
-	const char*     file,           /*!< in: file name */
-	const ulint     line)           /*!< in: line */
-{
-	ulint	err;
-
-	err = os_file_get_last_error_low(false, on_error_silent);
-
-	switch (err) {
-	case OS_FILE_DISK_FULL:
-		/* We only print a warning about disk full once */
-
-		if (os_has_said_disk_full) {
-
-			return(FALSE);
-		}
-
-		/* Disk full error is reported irrespective of the
-		on_error_silent setting. */
-
-		if (name) {
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				"  InnoDB: Encountered a problem with"
-				" file %s\n", name);
-		}
-
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			"  InnoDB: Disk is full. Try to clean the disk"
-			" to free space.\n");
-
-		fprintf(stderr,
-			" InnoDB: at file %s and at line %ld\n", file, line);
-
-		os_has_said_disk_full = TRUE;
-
-		fflush(stderr);
-		ut_error;
-		return(FALSE);
-
-	case OS_FILE_AIO_RESOURCES_RESERVED:
-	case OS_FILE_AIO_INTERRUPTED:
-
-		return(TRUE);
-
-	case OS_FILE_PATH_ERROR:
-	case OS_FILE_ALREADY_EXISTS:
-	case OS_FILE_ACCESS_VIOLATION:
-
-		return(FALSE);
-
-	case OS_FILE_SHARING_VIOLATION:
-
-		os_thread_sleep(10000000);  /* 10 sec */
-		return(TRUE);
-
-	case OS_FILE_OPERATION_ABORTED:
-	case OS_FILE_INSUFFICIENT_RESOURCE:
-
-		os_thread_sleep(100000);	/* 100 ms */
-		return(TRUE);
-
-	default:
-
-		/* If it is an operation that can crash on error then it
-		is better to ignore on_error_silent and print an error message
-		to the log. */
-
-		if (should_exit || !on_error_silent) {
-			fprintf(stderr,
-				" InnoDB: Operation %s to file %s and at line %ld\n",
-				operation, file, line);
-		}
-
-		if (should_exit || !on_error_silent) {
-			ib_logf(IB_LOG_LEVEL_ERROR, "File %s: '%s' returned OS "
-				"error " ULINTPF ".%s", name ? name : "(unknown)",
-				operation, err, should_exit
-				? " Cannot continue operation" : "");
-		}
-
-		if (should_exit) {
-			exit(1);
-		}
-	}
-
-	return(FALSE);
-}
-
-/****************************************************************//**
-Does error handling when a file operation fails.
-@return	TRUE if we should retry the operation */
-static
-ibool
-os_file_handle_error(
-/*=================*/
-	const char*	name,		/*!< in: name of a file or NULL */
-	const char*	operation,	/*!< in: operation */
-	const char*     file,           /*!< in: file name */
-	const ulint     line)           /*!< in: line */
-{
-	/* exit in case of unknown error */
-	return(os_file_handle_error_cond_exit(name, operation, TRUE, FALSE, file, line));
-}
-
-/****************************************************************//**
-Does error handling when a file operation fails.
-@return	TRUE if we should retry the operation */
-ibool
-os_file_handle_error_no_exit(
-/*=========================*/
-	const char*	name,		/*!< in: name of a file or NULL */
-	const char*	operation,	/*!< in: operation */
-	ibool		on_error_silent,/*!< in: if TRUE then don't print
-					any message to the log. */
-	const char*     file,           /*!< in: file name */
-	const ulint     line)           /*!< in: line */
-{
-	/* don't exit in case of unknown error */
-	return(os_file_handle_error_cond_exit(
-			name, operation, FALSE, on_error_silent, file, line));
-}
-
-#undef USE_FILE_LOCK
-#define USE_FILE_LOCK
-#if defined(UNIV_HOTBACKUP) || defined(__WIN__)
-/* InnoDB Hot Backup does not lock the data files.
- * On Windows, mandatory locking is used.
- */
-# undef USE_FILE_LOCK
-#endif
-#ifdef USE_FILE_LOCK
-/****************************************************************//**
-Obtain an exclusive lock on a file.
-@return	0 on success */
+/** Wrapper to fsync(2) that retries the call on some errors.
+Returns the value 0 if successful; otherwise the value -1 is returned and
+the global variable errno is set to indicate the error.
+@param[in]	file		open file handle
+@return 0 if success, -1 otherwise */
 static
 int
-os_file_lock(
-/*=========*/
-	int		fd,	/*!< in: file descriptor */
-	const char*	name)	/*!< in: file name */
+os_file_fsync_posix(
+	os_file_t	file)
 {
-	struct flock lk;
+	ulint		failures = 0;
 
-	ut_ad(!srv_read_only_mode);
+	for (;;) {
 
-	lk.l_type = F_WRLCK;
-	lk.l_whence = SEEK_SET;
-	lk.l_start = lk.l_len = 0;
+		++os_n_fsyncs;
 
-	if (fcntl(fd, F_SETLK, &lk) == -1) {
+		int	ret = fsync(file);
 
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Unable to lock %s, error: %d", name, errno);
-
-		if (errno == EAGAIN || errno == EACCES) {
-			ib_logf(IB_LOG_LEVEL_INFO,
-				"Check that you do not already have "
-				"another mysqld process using the "
-				"same InnoDB data or log files.");
+		if (ret == 0) {
+			return(ret);
 		}
 
-		return(-1);
+		switch(errno) {
+		case ENOLCK:
+
+			++failures;
+			ut_a(failures < 1000);
+
+			if (!(failures % 100)) {
+
+				ib::warn()
+					<< "fsync(): "
+					<< "No locks available; retrying";
+			}
+
+			/* 0.2 sec */
+			os_thread_sleep(200000);
+			break;
+
+		case EIO:
+
+			++failures;
+			ut_a(failures < 1000);
+
+			if (!(failures % 100)) {
+
+				ib::warn()
+					<< "fsync(): "
+					<< "An error occurred during "
+					<< "synchronization,"
+					<< " retrying";
+			}
+
+			/* 0.2 sec */
+			os_thread_sleep(200000);
+			break;
+
+		case EINTR:
+
+			++failures;
+			ut_a(failures < 2000);
+			break;
+
+		default:
+			ut_error;
+			break;
+		}
 	}
 
-	return(0);
+	ut_error;
+
+	return(-1);
 }
+
+/** Check the existence and type of the given file.
+@param[in]	path		path name of file
+@param[out]	exists		true if the file exists
+@param[out]	type		Type of the file, if it exists
+@return true if call succeeded */
+bool
+os_file_status_posix(
+	const char*	path,
+	bool*		exists,
+	os_file_type_t* type)
+{
+	struct stat	statinfo;
+
+	int	ret = stat(path, &statinfo);
+
+	*exists = !ret;
+
+	if (!ret) {
+		/* file exists, everything OK */
+
+	} else if (errno == ENOENT || errno == ENOTDIR || errno == ENAMETOOLONG) {
+		/* file does not exist */
+		return(true);
+
+	} else {
+		/* file exists, but stat call failed */
+		os_file_handle_error_no_exit(path, "stat", false);
+		return(false);
+	}
+
+	if (S_ISDIR(statinfo.st_mode)) {
+		*type = OS_FILE_TYPE_DIR;
+
+	} else if (S_ISLNK(statinfo.st_mode)) {
+		*type = OS_FILE_TYPE_LINK;
+
+	} else if (S_ISREG(statinfo.st_mode)) {
+		*type = OS_FILE_TYPE_FILE;
+	} else {
+		*type = OS_FILE_TYPE_UNKNOWN;
+	}
+
+	return(true);
+}
+
+/** NOTE! Use the corresponding macro os_file_flush(), not directly this
+function!
+Flushes the write buffers of a given file to the disk.
+@param[in]	file		handle to a file
+@return true if success */
+bool
+os_file_flush_func(
+	os_file_t	file)
+{
+	int	ret;
+
+	ret = os_file_fsync_posix(file);
+
+	if (ret == 0) {
+		return(true);
+	}
+
+	/* Since Linux returns EINVAL if the 'file' is actually a raw device,
+	we choose to ignore that error if we are using raw disks */
+
+	if (srv_start_raw_disk_in_use && errno == EINVAL) {
+
+		return(true);
+	}
+
+	ib::error() << "The OS said file flush did not succeed";
+
+	os_file_handle_error(NULL, "flush");
+
+	/* It is a fatal error if a file flush does not succeed, because then
+	the database can get corrupt on disk */
+	ut_error;
+
+	return(false);
+}
+
+/** NOTE! Use the corresponding macro os_file_create_simple(), not directly
+this function!
+A simple function to open or create a file.
+@param[in]	name		name of the file or path as a null-terminated
+				string
+@param[in]	create_mode	create mode
+@param[in]	access_type	OS_FILE_READ_ONLY or OS_FILE_READ_WRITE
+@param[in]	read_only	if true, read only checks are enforced
+@param[out]	success		true if succeed, false if error
+@return handle to the file, not defined if error, error number
+	can be retrieved with os_file_get_last_error */
+os_file_t
+os_file_create_simple_func(
+	const char*	name,
+	ulint		create_mode,
+	ulint		access_type,
+	bool		read_only,
+	bool*		success)
+{
+	os_file_t	file;
+
+	*success = false;
+
+	int		create_flag;
+	const char*	mode_str	= NULL;
+
+	ut_a(!(create_mode & OS_FILE_ON_ERROR_SILENT));
+	ut_a(!(create_mode & OS_FILE_ON_ERROR_NO_EXIT));
+
+	if (create_mode == OS_FILE_OPEN) {
+		mode_str = "OPEN";
+
+		if (access_type == OS_FILE_READ_ONLY) {
+
+			create_flag = O_RDONLY;
+
+		} else if (read_only) {
+
+			create_flag = O_RDONLY;
+
+		} else {
+			create_flag = O_RDWR;
+		}
+
+	} else if (read_only) {
+
+		mode_str = "OPEN";
+		create_flag = O_RDONLY;
+
+	} else if (create_mode == OS_FILE_CREATE) {
+
+		mode_str = "CREATE";
+		create_flag = O_RDWR | O_CREAT | O_EXCL;
+
+	} else if (create_mode == OS_FILE_CREATE_PATH) {
+
+		mode_str = "CREATE PATH";
+		/* Create subdirs along the path if needed. */
+
+		*success = os_file_create_subdirs_if_needed(name);
+
+		if (!*success) {
+
+			ib::error()
+				<< "Unable to create subdirectories '"
+				<< name << "'";
+
+			return(OS_FILE_CLOSED);
+		}
+
+		create_flag = O_RDWR | O_CREAT | O_EXCL;
+		create_mode = OS_FILE_CREATE;
+	} else {
+
+		ib::error()
+			<< "Unknown file create mode ("
+			<< create_mode
+			<< " for file '" << name << "'";
+
+		return(OS_FILE_CLOSED);
+	}
+
+	bool	retry;
+
+	do {
+		file = ::open(name, create_flag, os_innodb_umask);
+
+		if (file == -1) {
+			*success = false;
+			retry = os_file_handle_error(
+				name,
+				create_mode == OS_FILE_OPEN
+				? "open" : "create");
+		} else {
+			*success = true;
+			retry = false;
+		}
+
+	} while (retry);
+
+	/* This function is always called for data files, we should disable
+	OS caching (O_DIRECT) here as we do in os_file_create_func(), so
+	we open the same file in the same mode, see man page of open(2). */
+       if (!srv_read_only_mode
+	   && *success
+	   && (srv_unix_file_flush_method == SRV_UNIX_O_DIRECT
+	       || srv_unix_file_flush_method == SRV_UNIX_O_DIRECT_NO_FSYNC)) {
+
+	       os_file_set_nocache(file, name, mode_str);
+	}
+
+#ifdef USE_FILE_LOCK
+	if (!read_only
+	    && *success
+	    && (access_type == OS_FILE_READ_WRITE)
+	    && os_file_lock(file, name)) {
+
+		*success = false;
+		close(file);
+		file = -1;
+	}
 #endif /* USE_FILE_LOCK */
 
-#ifndef UNIV_HOTBACKUP
-/****************************************************************//**
-Creates the seek mutexes used in positioned reads and writes. */
-UNIV_INTERN
-void
-os_io_init_simple(void)
-/*===================*/
-{
-#if !defined(HAVE_ATOMIC_BUILTINS) || UNIV_WORD_SIZE < 8
-	os_file_count_mutex = os_mutex_create();
-#endif /* !HAVE_ATOMIC_BUILTINS || UNIV_WORD_SIZE < 8 */
-
-	for (ulint i = 0; i < OS_FILE_N_SEEK_MUTEXES; i++) {
-		os_file_seek_mutexes[i] = os_mutex_create();
-	}
-}
-
-/** Create a temporary file. This function is like tmpfile(3), but
-the temporary file is created in the given parameter path. If the path
-is null then it will create the file in the mysql server configuration
-parameter (--tmpdir).
-@param[in]	path	location for creating temporary file
-@return temporary file handle, or NULL on error */
-UNIV_INTERN
-FILE*
-os_file_create_tmpfile(
-	const char*	path)
-{
-	FILE*	file	= NULL;
-	WAIT_ALLOW_WRITES();
-	int	fd	= innobase_mysql_tmpfile(path);
-
-	ut_ad(!srv_read_only_mode);
-
-	if (fd >= 0) {
-		file = fdopen(fd, "w+b");
-	}
-
-	if (!file) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			"  InnoDB: Error: unable to create temporary file;"
-			" errno: %d\n", errno);
-		if (fd >= 0) {
-			close(fd);
+	/* If we have proper file handle and atomic writes should be used,
+	try to set atomic writes and if that fails when creating a new
+	table, produce a error. If atomic writes are used on existing
+	file, ignore error and use traditional writes for that file */
+	/* JAN: TODO: ATOMIC WRITES
+	if (file != -1
+	    && (awrites == ATOMIC_WRITES_ON ||
+		(srv_use_atomic_writes && awrites == ATOMIC_WRITES_DEFAULT))
+	    && !os_file_set_atomic_writes(name, file)) {
+		if (create_mode == OS_FILE_CREATE) {
+			fprintf(stderr, "InnoDB: Error: Can't create file using atomic writes\n");
+			close(file);
+			os_file_delete_if_exists_func(name);
+			*success = FALSE;
+			file = -1;
 		}
 	}
+	*/
+
 
 	return(file);
 }
-#endif /* !UNIV_HOTBACKUP */
 
-/***********************************************************************//**
+/** This function attempts to create a directory named pathname. The new
+directory gets default permissions. On Unix the permissions are
+(0770 & ~umask). If the directory exists already, nothing is done and
+the call succeeds, unless the fail_if_exists arguments is true.
+If another error occurs, such as a permission error, this does not crash,
+but reports the error and returns false.
+@param[in]	pathname	directory name as null-terminated string
+@param[in]	fail_if_exists	if true, pre-existing directory is treated as
+				an error.
+@return true if call succeeds, false on error */
+bool
+os_file_create_directory(
+	const char*	pathname,
+	bool		fail_if_exists)
+{
+	int	rcode = mkdir(pathname, 0770);
+
+	if (!(rcode == 0 || (errno == EEXIST && !fail_if_exists))) {
+		/* failure */
+		os_file_handle_error_no_exit(pathname, "mkdir", false);
+
+		return(false);
+	}
+
+	return(true);
+}
+
+/**
 The os_file_opendir() function opens a directory stream corresponding to the
 directory named by the dirname argument. The directory stream is positioned
 at the first entry. In both Unix and Windows we automatically skip the '.'
 and '..' items at the start of the directory listing.
-@return	directory stream, NULL if error */
-UNIV_INTERN
+@param[in]	dirname		directory name; it must not contain a trailing
+				'\' or '/'
+@param[in]	is_fatal	true if we should treat an error as a fatal
+				error; if we try to open symlinks then we do
+				not wish a fatal error if it happens not to be
+				a directory
+@return directory stream, NULL if error */
 os_file_dir_t
 os_file_opendir(
-/*============*/
-	const char*	dirname,	/*!< in: directory name; it must not
-					contain a trailing '\' or '/' */
-	ibool		error_is_fatal)	/*!< in: TRUE if we should treat an
-					error as a fatal error; if we try to
-					open symlinks then we do not wish a
-					fatal error if it happens not to be
-					a directory */
+	const char*	dirname,
+	bool		error_is_fatal)
 {
 	os_file_dir_t		dir;
-#ifdef __WIN__
-	LPWIN32_FIND_DATA	lpFindFileData;
-	char			path[OS_FILE_MAX_PATH + 3];
-
-	ut_a(strlen(dirname) < OS_FILE_MAX_PATH);
-
-	strcpy(path, dirname);
-	strcpy(path + strlen(path), "\\*");
-
-	/* Note that in Windows opening the 'directory stream' also retrieves
-	the first entry in the directory. Since it is '.', that is no problem,
-	as we will skip over the '.' and '..' entries anyway. */
-
-	lpFindFileData = static_cast<LPWIN32_FIND_DATA>(
-		ut_malloc(sizeof(WIN32_FIND_DATA)));
-
-	dir = FindFirstFile((LPCTSTR) path, lpFindFileData);
-
-	ut_free(lpFindFileData);
-
-	if (dir == INVALID_HANDLE_VALUE) {
-
-		if (error_is_fatal) {
-			os_file_handle_error(dirname, "opendir", __FILE__, __LINE__);
-		}
-
-		return(NULL);
-	}
-
-	return(dir);
-#else
 	dir = opendir(dirname);
 
 	if (dir == NULL && error_is_fatal) {
-		os_file_handle_error(dirname, "opendir", __FILE__, __LINE__);
+		os_file_handle_error(dirname, "opendir");
 	}
 
 	return(dir);
-#endif /* __WIN__ */
 }
 
-/***********************************************************************//**
-Closes a directory stream.
-@return	0 if success, -1 if failure */
-UNIV_INTERN
+/** Closes a directory stream.
+@param[in]	dir		directory stream
+@return 0 if success, -1 if failure */
 int
 os_file_closedir(
-/*=============*/
-	os_file_dir_t	dir)	/*!< in: directory stream */
+	os_file_dir_t	dir)
 {
-#ifdef __WIN__
-	BOOL		ret;
+	int	ret = closedir(dir);
 
-	ret = FindClose(dir);
-
-	if (!ret) {
-		os_file_handle_error_no_exit(NULL, "closedir", FALSE, __FILE__, __LINE__);
-
-		return(-1);
-	}
-
-	return(0);
-#else
-	int	ret;
-
-	ret = closedir(dir);
-
-	if (ret) {
-		os_file_handle_error_no_exit(NULL, "closedir", FALSE, __FILE__, __LINE__);
+	if (ret != 0) {
+		os_file_handle_error_no_exit(NULL, "closedir", false);
 	}
 
 	return(ret);
-#endif /* __WIN__ */
 }
 
-/***********************************************************************//**
-This function returns information of the next file in the directory. We jump
+/** This function returns information of the next file in the directory. We jump
 over the '.' and '..' entries in the directory.
-@return	0 if ok, -1 if error, 1 if at the end of the directory */
-UNIV_INTERN
+@param[in]	dirname		directory name or path
+@param[in]	dir		directory stream
+@param[out]	info		buffer where the info is returned
+@return 0 if ok, -1 if error, 1 if at the end of the directory */
 int
 os_file_readdir_next_file(
-/*======================*/
-	const char*	dirname,/*!< in: directory name or path */
-	os_file_dir_t	dir,	/*!< in: directory stream */
-	os_file_stat_t*	info)	/*!< in/out: buffer where the info is returned */
+	const char*	dirname,
+	os_file_dir_t	dir,
+	os_file_stat_t*	info)
 {
-#ifdef __WIN__
-	LPWIN32_FIND_DATA	lpFindFileData;
-	BOOL			ret;
-
-	lpFindFileData = static_cast<LPWIN32_FIND_DATA>(
-		ut_malloc(sizeof(WIN32_FIND_DATA)));
-next_file:
-	ret = FindNextFile(dir, lpFindFileData);
-
-	if (ret) {
-		ut_a(strlen((char*) lpFindFileData->cFileName)
-		     < OS_FILE_MAX_PATH);
-
-		if (strcmp((char*) lpFindFileData->cFileName, ".") == 0
-		    || strcmp((char*) lpFindFileData->cFileName, "..") == 0) {
-
-			goto next_file;
-		}
-
-		strcpy(info->name, (char*) lpFindFileData->cFileName);
-
-		info->size = (ib_int64_t)(lpFindFileData->nFileSizeLow)
-			+ (((ib_int64_t)(lpFindFileData->nFileSizeHigh))
-			   << 32);
-
-		if (lpFindFileData->dwFileAttributes
-		    & FILE_ATTRIBUTE_REPARSE_POINT) {
-			/* TODO: test Windows symlinks */
-			/* TODO: MySQL has apparently its own symlink
-			implementation in Windows, dbname.sym can
-			redirect a database directory:
-			REFMAN "windows-symbolic-links.html" */
-			info->type = OS_FILE_TYPE_LINK;
-		} else if (lpFindFileData->dwFileAttributes
-			   & FILE_ATTRIBUTE_DIRECTORY) {
-			info->type = OS_FILE_TYPE_DIR;
-		} else {
-			/* It is probably safest to assume that all other
-			file types are normal. Better to check them rather
-			than blindly skip them. */
-
-			info->type = OS_FILE_TYPE_FILE;
-		}
-	}
-
-	ut_free(lpFindFileData);
-
-	if (ret) {
-		return(0);
-	} else if (GetLastError() == ERROR_NO_MORE_FILES) {
-
-		return(1);
-	} else {
-		os_file_handle_error_no_exit(NULL, "readdir_next_file", FALSE, __FILE__, __LINE__);
-		return(-1);
-	}
-#else
 	struct dirent*	ent;
 	char*		full_path;
 	int		ret;
 	struct stat	statinfo;
+
 #ifdef HAVE_READDIR_R
 	char		dirent_buf[sizeof(struct dirent)
 				   + _POSIX_PATH_MAX + 100];
 	/* In /mysys/my_lib.c, _POSIX_PATH_MAX + 1 is used as
 	the max file name len; but in most standards, the
 	length is NAME_MAX; we add 100 to be even safer */
-#endif
+#endif /* HAVE_READDIR_R */
 
 next_file:
 
 #ifdef HAVE_READDIR_R
 	ret = readdir_r(dir, (struct dirent*) dirent_buf, &ent);
 
-	if (ret != 0
-#ifdef UNIV_AIX
-	    /* On AIX, only if we got non-NULL 'ent' (result) value and
-	    a non-zero 'ret' (return) value, it indicates a failed
-	    readdir_r() call. An NULL 'ent' with an non-zero 'ret'
-	    would indicate the "end of the directory" is reached. */
-	    && ent != NULL
-#endif
-	   ) {
-		fprintf(stderr,
-			"InnoDB: cannot read directory %s, error %lu\n",
-			dirname, (ulong) ret);
+	if (ret != 0) {
+
+		ib::error()
+			<< "Cannot read directory " << dirname
+			<< " error: " << ret;
 
 		return(-1);
 	}
@@ -1163,7 +3476,7 @@ next_file:
 
 		return(1);
 	}
-#endif
+#endif /* HAVE_READDIR_R */
 	ut_a(strlen(ent->d_name) < OS_FILE_MAX_PATH);
 
 	if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) {
@@ -1174,7 +3487,7 @@ next_file:
 	strcpy(info->name, ent->d_name);
 
 	full_path = static_cast<char*>(
-		ut_malloc(strlen(dirname) + strlen(ent->d_name) + 10));
+		ut_malloc_nokey(strlen(dirname) + strlen(ent->d_name) + 10));
 
 	sprintf(full_path, "%s/%s", dirname, ent->d_name);
 
@@ -1198,14 +3511,14 @@ next_file:
 			goto next_file;
 		}
 
-		os_file_handle_error_no_exit(full_path, "stat", FALSE, __FILE__, __LINE__);
+		os_file_handle_error_no_exit(full_path, "stat", false);
 
 		ut_free(full_path);
 
 		return(-1);
 	}
 
-	info->size = (ib_int64_t) statinfo.st_size;
+	info->size = statinfo.st_size;
 
 	if (S_ISDIR(statinfo.st_mode)) {
 		info->type = OS_FILE_TYPE_DIR;
@@ -1220,387 +3533,66 @@ next_file:
 	ut_free(full_path);
 
 	return(0);
-#endif
 }
 
-/*****************************************************************//**
-This function attempts to create a directory named pathname. The new
-directory gets default permissions. On Unix the permissions are
-(0770 & ~umask). If the directory exists already, nothing is done and
-the call succeeds, unless the fail_if_exists arguments is true.
-If another error occurs, such as a permission error, this does not crash,
-but reports the error and returns FALSE.
-@return	TRUE if call succeeds, FALSE on error */
-UNIV_INTERN
-ibool
-os_file_create_directory(
-/*=====================*/
-	const char*	pathname,	/*!< in: directory name as
-					null-terminated string */
-	ibool		fail_if_exists)	/*!< in: if TRUE, pre-existing directory
-					is treated as an error. */
-{
-#ifdef __WIN__
-	BOOL	rcode;
-
-	rcode = CreateDirectory((LPCTSTR) pathname, NULL);
-	if (!(rcode != 0
-	      || (GetLastError() == ERROR_ALREADY_EXISTS
-		  && !fail_if_exists))) {
-
-		os_file_handle_error_no_exit(
-			pathname, "CreateDirectory", FALSE, __FILE__, __LINE__);
-
-		return(FALSE);
-	}
-
-	return(TRUE);
-#else
-	int	rcode;
-	WAIT_ALLOW_WRITES();
-
-	rcode = mkdir(pathname, 0770);
-
-	if (!(rcode == 0 || (errno == EEXIST && !fail_if_exists))) {
-		/* failure */
-		os_file_handle_error_no_exit(pathname, "mkdir", FALSE, __FILE__, __LINE__);
-
-		return(FALSE);
-	}
-
-	return (TRUE);
-#endif /* __WIN__ */
-}
-
-/****************************************************************//**
-NOTE! Use the corresponding macro os_file_create_simple(), not directly
+/** NOTE! Use the corresponding macro os_file_create(), not directly
 this function!
-A simple function to open or create a file.
-@return own: handle to the file, not defined if error, error number
-can be retrieved with os_file_get_last_error */
-UNIV_INTERN
+Opens an existing file or creates a new.
+@param[in]	name		name of the file or path as a null-terminated
+				string
+@param[in]	create_mode	create mode
+@param[in]	purpose		OS_FILE_AIO, if asynchronous, non-buffered I/O
+				is desired, OS_FILE_NORMAL, if any normal file;
+				NOTE that it also depends on type, os_aio_..
+				and srv_.. variables whether we really use async
+				I/O or unbuffered I/O: look in the function
+				source code for the exact rules
+@param[in]	type		OS_DATA_FILE or OS_LOG_FILE
+@param[in]	read_only	true, if read only checks should be enforcedm
+@param[in]	success		true if succeeded
+@return handle to the file, not defined if error, error number
+	can be retrieved with os_file_get_last_error */
 os_file_t
-os_file_create_simple_func(
-/*=======================*/
-	const char*	name,	/*!< in: name of the file or path as a
-				null-terminated string */
-	ulint		create_mode,/*!< in: create mode */
-	ulint		access_type,/*!< in: OS_FILE_READ_ONLY or
-				OS_FILE_READ_WRITE */
-	ibool*		success)/*!< out: TRUE if succeed, FALSE if error */
+os_file_create_func(
+	const char*	name,
+	ulint		create_mode,
+	ulint		purpose,
+	ulint		type,
+	bool		read_only,
+	bool*		success)
 {
-	os_file_t	file;
-	ibool		retry;
+	bool		on_error_no_exit;
+	bool		on_error_silent;
 
-	*success = FALSE;
-#ifdef __WIN__
-	DWORD		access;
-	DWORD		create_flag;
-	DWORD		attributes	= 0;
+	*success = false;
 
-	ut_a(!(create_mode & OS_FILE_ON_ERROR_SILENT));
-	ut_a(!(create_mode & OS_FILE_ON_ERROR_NO_EXIT));
+	DBUG_EXECUTE_IF(
+		"ib_create_table_fail_disk_full",
+		*success = false;
+		errno = ENOSPC;
+		return(OS_FILE_CLOSED);
+	);
 
-	if (create_mode == OS_FILE_OPEN) {
-
-		create_flag = OPEN_EXISTING;
-
-	} else if (srv_read_only_mode) {
-
-		create_flag = OPEN_EXISTING;
-
-	} else if (create_mode == OS_FILE_CREATE) {
-
-		create_flag = CREATE_NEW;
-
-	} else if (create_mode == OS_FILE_CREATE_PATH) {
-
-		ut_a(!srv_read_only_mode);
-
-		/* Create subdirs along the path if needed  */
-		*success = os_file_create_subdirs_if_needed(name);
-
-		if (!*success) {
-
-			ib_logf(IB_LOG_LEVEL_ERROR,
-				"Unable to create subdirectories '%s'",
-				name);
-
-			return((os_file_t) -1);
-		}
-
-		create_flag = CREATE_NEW;
-		create_mode = OS_FILE_CREATE;
-
-	} else {
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Unknown file create mode (%lu) for file '%s'",
-			create_mode, name);
-
-		return((os_file_t) -1);
-	}
-
-	if (access_type == OS_FILE_READ_ONLY) {
-		access = GENERIC_READ;
-	} else if (srv_read_only_mode) {
-
-		ib_logf(IB_LOG_LEVEL_INFO,
-			"read only mode set. Unable to "
-			"open file '%s' in RW mode, trying RO mode", name);
-
-		access = GENERIC_READ;
-
-	} else if (access_type == OS_FILE_READ_WRITE) {
-		access = GENERIC_READ | GENERIC_WRITE;
-	} else {
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Unknown file access type (%lu) for file '%s'",
-			access_type, name);
-
-		return((os_file_t) -1);
-	}
-
-	do {
-		/* Use default security attributes and no template file. */
-
-		file = CreateFile(
-			(LPCTSTR) name, access, FILE_SHARE_READ, NULL,
-			create_flag, attributes, NULL);
-
-		if (file == INVALID_HANDLE_VALUE) {
-
-			*success = FALSE;
-
-			retry = os_file_handle_error(
-				name, create_mode == OS_FILE_OPEN ?
-				"open" : "create", __FILE__, __LINE__);
-
-		} else {
-			*success = TRUE;
-			retry = false;
-		}
-
-	} while (retry);
-
-#else /* __WIN__ */
-	int		create_flag;
-	if (create_mode != OS_FILE_OPEN && create_mode != OS_FILE_OPEN_RAW)
-		WAIT_ALLOW_WRITES();
-
-	ut_a(!(create_mode & OS_FILE_ON_ERROR_SILENT));
-	ut_a(!(create_mode & OS_FILE_ON_ERROR_NO_EXIT));
-
-	if (create_mode == OS_FILE_OPEN) {
-
-		if (access_type == OS_FILE_READ_ONLY) {
-			create_flag = O_RDONLY;
-		} else if (srv_read_only_mode) {
-			create_flag = O_RDONLY;
-		} else {
-			create_flag = O_RDWR;
-		}
-
-	} else if (srv_read_only_mode) {
-
-		create_flag = O_RDONLY;
-
-	} else if (create_mode == OS_FILE_CREATE) {
-
-		create_flag = O_RDWR | O_CREAT | O_EXCL;
-
-	} else if (create_mode == OS_FILE_CREATE_PATH) {
-
-		/* Create subdirs along the path if needed  */
-
-		*success = os_file_create_subdirs_if_needed(name);
-
-		if (!*success) {
-
-			ib_logf(IB_LOG_LEVEL_ERROR,
-				"Unable to create subdirectories '%s'",
-				name);
-
-			return((os_file_t) -1);
-		}
-
-		create_flag = O_RDWR | O_CREAT | O_EXCL;
-		create_mode = OS_FILE_CREATE;
-	} else {
-
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Unknown file create mode (%lu) for file '%s'",
-			create_mode, name);
-
-		return((os_file_t) -1);
-	}
-
-	do {
-		file = ::open(name, create_flag, os_innodb_umask);
-
-		if (file == -1) {
-			*success = FALSE;
-
-			retry = os_file_handle_error(
-				name,
-				create_mode == OS_FILE_OPEN
-				?  "open" : "create", __FILE__, __LINE__);
-		} else {
-			*success = TRUE;
-			retry = false;
-		}
-
-	} while (retry);
-
-#ifdef USE_FILE_LOCK
-	if (!srv_read_only_mode
-	    && *success
-	    && (access_type == OS_FILE_READ_WRITE)
-	    && os_file_lock(file, name)) {
-
-		*success = FALSE;
-		close(file);
-		file = -1;
-	}
-#endif /* USE_FILE_LOCK */
-
-#endif /* __WIN__ */
-
-	return(file);
-}
-
-/****************************************************************//**
-NOTE! Use the corresponding macro
-os_file_create_simple_no_error_handling(), not directly this function!
-A simple function to open or create a file.
-@return own: handle to the file, not defined if error, error number
-can be retrieved with os_file_get_last_error */
-UNIV_INTERN
-os_file_t
-os_file_create_simple_no_error_handling_func(
-/*=========================================*/
-	const char*	name,	/*!< in: name of the file or path as a
-				null-terminated string */
-	ulint		create_mode,/*!< in: create mode */
-	ulint		access_type,/*!< in: OS_FILE_READ_ONLY,
-				OS_FILE_READ_WRITE, or
-				OS_FILE_READ_ALLOW_DELETE; the last option is
-				used by a backup program reading the file */
-	ibool*		success,/*!< out: TRUE if succeed, FALSE if error */
-	ulint           atomic_writes) /*! in: atomic writes table option
-				       value */
-{
-	os_file_t	file;
-	atomic_writes_t awrites = (atomic_writes_t) atomic_writes;
-
-	*success = FALSE;
-#ifdef __WIN__
-	DWORD		access;
-	DWORD		create_flag;
-	DWORD		attributes	= 0;
-	DWORD		share_mode	= FILE_SHARE_READ;
-
-	ut_a(name);
-
-	ut_a(!(create_mode & OS_FILE_ON_ERROR_SILENT));
-	ut_a(!(create_mode & OS_FILE_ON_ERROR_NO_EXIT));
-
-	if (create_mode == OS_FILE_OPEN) {
-		create_flag = OPEN_EXISTING;
-	} else if (srv_read_only_mode) {
-		create_flag = OPEN_EXISTING;
-	} else if (create_mode == OS_FILE_CREATE) {
-		create_flag = CREATE_NEW;
-	} else {
-
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Unknown file create mode (%lu) for file '%s'",
-			create_mode, name);
-
-		return((os_file_t) -1);
-	}
-
-	if (access_type == OS_FILE_READ_ONLY) {
-		access = GENERIC_READ;
-	} else if (srv_read_only_mode) {
-		access = GENERIC_READ;
-	} else if (access_type == OS_FILE_READ_WRITE) {
-		access = GENERIC_READ | GENERIC_WRITE;
-	} else if (access_type == OS_FILE_READ_ALLOW_DELETE) {
-
-		ut_a(!srv_read_only_mode);
-
-		access = GENERIC_READ;
-
-		/*!< A backup program has to give mysqld the maximum
-		freedom to do what it likes with the file */
-
-		share_mode |= FILE_SHARE_DELETE | FILE_SHARE_WRITE;
-	} else {
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Unknown file access type (%lu) for file '%s'",
-			access_type, name);
-
-		return((os_file_t) -1);
-	}
-
-	file = CreateFile((LPCTSTR) name,
-			  access,
-			  share_mode,
-			  NULL,			// Security attributes
-			  create_flag,
-			  attributes,
-			  NULL);		// No template file
-
-	/* If we have proper file handle and atomic writes should be used,
-	try to set atomic writes and if that fails when creating a new
-	table, produce a error. If atomic writes are used on existing
-	file, ignore error and use traditional writes for that file */
-	if (file != INVALID_HANDLE_VALUE
-	    && (awrites == ATOMIC_WRITES_ON ||
-		(srv_use_atomic_writes && awrites == ATOMIC_WRITES_DEFAULT))
-	    && !os_file_set_atomic_writes(name, file)) {
-		if (create_mode == OS_FILE_CREATE) {
-			fprintf(stderr, "InnoDB: Error: Can't create file using atomic writes\n");
-			CloseHandle(file);
-			os_file_delete_if_exists_func(name);
-			*success = FALSE;
-			file = INVALID_HANDLE_VALUE;
-		}
-	}
-
-	*success = (file != INVALID_HANDLE_VALUE);
-#else /* __WIN__ */
 	int		create_flag;
 	const char*	mode_str	= NULL;
 
-	ut_a(name);
-	if (create_mode != OS_FILE_OPEN && create_mode != OS_FILE_OPEN_RAW)
-		WAIT_ALLOW_WRITES();
+	on_error_no_exit = create_mode & OS_FILE_ON_ERROR_NO_EXIT
+		? true : false;
+	on_error_silent = create_mode & OS_FILE_ON_ERROR_SILENT
+		? true : false;
 
-	ut_a(!(create_mode & OS_FILE_ON_ERROR_SILENT));
-	ut_a(!(create_mode & OS_FILE_ON_ERROR_NO_EXIT));
+	create_mode &= ~OS_FILE_ON_ERROR_NO_EXIT;
+	create_mode &= ~OS_FILE_ON_ERROR_SILENT;
 
-	if (create_mode == OS_FILE_OPEN) {
+	if (create_mode == OS_FILE_OPEN
+	    || create_mode == OS_FILE_OPEN_RAW
+	    || create_mode == OS_FILE_OPEN_RETRY) {
 
 		mode_str = "OPEN";
 
-		if (access_type == OS_FILE_READ_ONLY) {
+		create_flag = read_only ? O_RDONLY : O_RDWR;
 
-			create_flag = O_RDONLY;
-
-		} else if (srv_read_only_mode) {
-
-			create_flag = O_RDONLY;
-
-		} else {
-
-			ut_a(access_type == OS_FILE_READ_WRITE
-				|| access_type == OS_FILE_READ_ALLOW_DELETE);
-
-			create_flag = O_RDWR;
-		}
-
-	} else if (srv_read_only_mode) {
+	} else if (read_only) {
 
 		mode_str = "OPEN";
 
@@ -1609,42 +3601,104 @@ os_file_create_simple_no_error_handling_func(
 	} else if (create_mode == OS_FILE_CREATE) {
 
 		mode_str = "CREATE";
-
 		create_flag = O_RDWR | O_CREAT | O_EXCL;
 
-	} else {
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Unknown file create mode (%lu) for file '%s'",
-			create_mode, name);
+	} else if (create_mode == OS_FILE_OVERWRITE) {
 
-		return((os_file_t) -1);
+		mode_str = "OVERWRITE";
+		create_flag = O_RDWR | O_CREAT | O_TRUNC;
+
+	} else {
+		ib::error()
+			<< "Unknown file create mode (" << create_mode << ")"
+			<< " for file '" << name << "'";
+
+		return(OS_FILE_CLOSED);
 	}
 
-	file = ::open(name, create_flag, os_innodb_umask);
+	ut_a(type == OS_LOG_FILE
+	     || type == OS_DATA_FILE
+	     || type == OS_DATA_TEMP_FILE);
 
-	*success = file == -1 ? FALSE : TRUE;
+	ut_a(purpose == OS_FILE_AIO || purpose == OS_FILE_NORMAL);
 
-	/* This function is always called for data files, we should disable
-	OS caching (O_DIRECT) here as we do in os_file_create_func(), so
-	we open the same file in the same mode, see man page of open(2). */
-       if (!srv_read_only_mode
-	   && *success
-	   && (srv_unix_file_flush_method == SRV_UNIX_O_DIRECT
-	       || srv_unix_file_flush_method == SRV_UNIX_O_DIRECT_NO_FSYNC)) {
+#ifdef O_SYNC
+	/* We let O_SYNC only affect log files; note that we map O_DSYNC to
+	O_SYNC because the datasync options seemed to corrupt files in 2001
+	in both Linux and Solaris */
+
+	if (!read_only
+	    && type == OS_LOG_FILE
+	    && srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) {
+
+		create_flag |= O_SYNC;
+	}
+#endif /* O_SYNC */
+
+	os_file_t	file;
+	bool		retry;
+
+	do {
+		file = ::open(name, create_flag, os_innodb_umask);
+
+		if (file == -1) {
+			const char*	operation;
+
+			operation = (create_mode == OS_FILE_CREATE
+				     && !read_only) ? "create" : "open";
+
+			*success = false;
+
+			if (on_error_no_exit) {
+				retry = os_file_handle_error_no_exit(
+					name, operation, on_error_silent);
+			} else {
+				retry = os_file_handle_error(name, operation);
+			}
+		} else {
+			*success = true;
+			retry = false;
+		}
+
+	} while (retry);
+
+	/* We disable OS caching (O_DIRECT) only on data files */
+	if (!read_only
+	    && *success
+	    && (type != OS_LOG_FILE && type != OS_DATA_TEMP_FILE)
+	    && (srv_unix_file_flush_method == SRV_UNIX_O_DIRECT
+		|| srv_unix_file_flush_method == SRV_UNIX_O_DIRECT_NO_FSYNC)) {
 
 	       os_file_set_nocache(file, name, mode_str);
 	}
 
 #ifdef USE_FILE_LOCK
-	if (!srv_read_only_mode
+	if (!read_only
 	    && *success
-	    && (access_type == OS_FILE_READ_WRITE)
+	    && create_mode != OS_FILE_OPEN_RAW
 	    && os_file_lock(file, name)) {
 
-		*success = FALSE;
+		if (create_mode == OS_FILE_OPEN_RETRY) {
+
+			ib::info()
+				<< "Retrying to lock the first data file";
+
+			for (int i = 0; i < 100; i++) {
+				os_thread_sleep(1000000);
+
+				if (!os_file_lock(file, name)) {
+					*success = true;
+					return(file);
+				}
+			}
+
+			ib::info()
+				<< "Unable to open the first data file";
+		}
+
+		*success = false;
 		close(file);
 		file = -1;
-
 	}
 #endif /* USE_FILE_LOCK */
 
@@ -1652,7 +3706,8 @@ os_file_create_simple_no_error_handling_func(
 	try to set atomic writes and if that fails when creating a new
 	table, produce a error. If atomic writes are used on existing
 	file, ignore error and use traditional writes for that file */
-	if (file != -1
+	/* JAN: TODO: ATOMIC WRITES
+	if (file != -1 && type == OS_DATA_FILE
 	    && (awrites == ATOMIC_WRITES_ON ||
 		(srv_use_atomic_writes && awrites == ATOMIC_WRITES_DEFAULT))
 	    && !os_file_set_atomic_writes(name, file)) {
@@ -1664,134 +3719,1125 @@ os_file_create_simple_no_error_handling_func(
 			file = -1;
 		}
 	}
+	*/
+	return(file);
+}
 
+/** NOTE! Use the corresponding macro
+os_file_create_simple_no_error_handling(), not directly this function!
+A simple function to open or create a file.
+@param[in]	name		name of the file or path as a null-terminated
+				string
+@param[in]	create_mode	create mode
+@param[in]	access_type	OS_FILE_READ_ONLY, OS_FILE_READ_WRITE, or
+				OS_FILE_READ_ALLOW_DELETE; the last option
+				is used by a backup program reading the file
+@param[in]	read_only	if true read only mode checks are enforced
+@param[out]	success		true if succeeded
+@return own: handle to the file, not defined if error, error number
+	can be retrieved with os_file_get_last_error */
+os_file_t
+os_file_create_simple_no_error_handling_func(
+	const char*	name,
+	ulint		create_mode,
+	ulint		access_type,
+	bool		read_only,
+	bool*		success)
+{
+	os_file_t	file;
+	int		create_flag;
 
-#endif /* __WIN__ */
+	ut_a(!(create_mode & OS_FILE_ON_ERROR_SILENT));
+	ut_a(!(create_mode & OS_FILE_ON_ERROR_NO_EXIT));
+
+	*success = false;
+
+	if (create_mode == OS_FILE_OPEN) {
+
+		if (access_type == OS_FILE_READ_ONLY) {
+
+			create_flag = O_RDONLY;
+
+		} else if (read_only) {
+
+			create_flag = O_RDONLY;
+
+		} else {
+
+			ut_a(access_type == OS_FILE_READ_WRITE
+			     || access_type == OS_FILE_READ_ALLOW_DELETE);
+
+			create_flag = O_RDWR;
+		}
+
+	} else if (read_only) {
+
+		create_flag = O_RDONLY;
+
+	} else if (create_mode == OS_FILE_CREATE) {
+
+		create_flag = O_RDWR | O_CREAT | O_EXCL;
+
+	} else {
+
+		ib::error()
+			<< "Unknown file create mode "
+			<< create_mode << " for file '" << name << "'";
+
+		return(OS_FILE_CLOSED);
+	}
+
+	file = ::open(name, create_flag, os_innodb_umask);
+
+	*success = (file != -1);
+
+#ifdef USE_FILE_LOCK
+	if (!read_only
+	    && *success
+	    && access_type == OS_FILE_READ_WRITE
+	    && os_file_lock(file, name)) {
+
+		*success = false;
+		close(file);
+		file = -1;
+
+	}
+#endif /* USE_FILE_LOCK */
 
 	return(file);
 }
 
-/****************************************************************//**
-Tries to disable OS caching on an opened file descriptor. */
-UNIV_INTERN
-void
-os_file_set_nocache(
-/*================*/
-	os_file_t	fd		/*!< in: file descriptor to alter */
-					__attribute__((unused)),
-	const char*	file_name	/*!< in: used in the diagnostic
-					message */
-					MY_ATTRIBUTE((unused)),
-	const char*	operation_name MY_ATTRIBUTE((unused)))
-					/*!< in: "open" or "create"; used
-					in the diagnostic message */
+/** Deletes a file if it exists. The file has to be closed before calling this.
+@param[in]	name		file path as a null-terminated string
+@param[out]	exist		indicate if file pre-exist
+@return true if success */
+bool
+os_file_delete_if_exists_func(
+	const char*	name,
+	bool*		exist)
 {
-	/* some versions of Solaris may not have DIRECTIO_ON */
-#if defined(UNIV_SOLARIS) && defined(DIRECTIO_ON)
-	if (directio(fd, DIRECTIO_ON) == -1) {
-		int	errno_save = errno;
+	if (exist != NULL) {
+		*exist = true;
+	}
 
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Failed to set DIRECTIO_ON on file %s: %s: %s, "
-			"continuing anyway.",
-			file_name, operation_name, strerror(errno_save));
-	}
-#elif defined(O_DIRECT)
-	if (fcntl(fd, F_SETFL, O_DIRECT) == -1) {
-		int		errno_save = errno;
-		static bool	warning_message_printed = false;
-		if (errno_save == EINVAL) {
-			if (!warning_message_printed) {
-				warning_message_printed = true;
-# ifdef UNIV_LINUX
-				ib_logf(IB_LOG_LEVEL_WARN,
-					"Failed to set O_DIRECT on file "
-					"%s: %s: %s, continuing anyway. "
-					"O_DIRECT is known to result "
-					"in 'Invalid argument' on Linux on "
-					"tmpfs, see MySQL Bug#26662.",
-					file_name, operation_name,
-					strerror(errno_save));
-# else /* UNIV_LINUX */
-				goto short_warning;
-# endif /* UNIV_LINUX */
-			}
-		} else {
-# ifndef UNIV_LINUX
-short_warning:
-# endif
-			ib_logf(IB_LOG_LEVEL_WARN,
-				"Failed to set O_DIRECT on file %s: %s: %s, "
-				"continuing anyway.",
-				file_name, operation_name, strerror(errno_save));
+	int	ret = unlink(name);
+
+	if (ret != 0 && errno == ENOENT) {
+		if (exist != NULL) {
+			*exist = false;
 		}
+	} else if (ret != 0 && errno != ENOENT) {
+		os_file_handle_error_no_exit(name, "delete", false);
+
+		return(false);
 	}
-#endif /* defined(UNIV_SOLARIS) && defined(DIRECTIO_ON) */
+
+	return(true);
 }
 
-/****************************************************************//**
-NOTE! Use the corresponding macro os_file_create(), not directly
+/** Deletes a file. The file has to be closed before calling this.
+@param[in]	name		file path as a null-terminated string
+@return true if success */
+bool
+os_file_delete_func(
+	const char*	name)
+{
+	int	ret = unlink(name);
+
+	if (ret != 0) {
+		os_file_handle_error_no_exit(name, "delete", FALSE);
+
+		return(false);
+	}
+
+	return(true);
+}
+
+/** NOTE! Use the corresponding macro os_file_rename(), not directly this
+function!
+Renames a file (can also move it to another directory). It is safest that the
+file is closed before calling this function.
+@param[in]	oldpath		old file path as a null-terminated string
+@param[in]	newpath		new file path
+@return true if success */
+bool
+os_file_rename_func(
+	const char*	oldpath,
+	const char*	newpath)
+{
+#ifdef UNIV_DEBUG
+	os_file_type_t	type;
+	bool		exists;
+
+	/* New path must not exist. */
+	ut_ad(os_file_status(newpath, &exists, &type));
+	ut_ad(!exists);
+
+	/* Old path must exist. */
+	ut_ad(os_file_status(oldpath, &exists, &type));
+	ut_ad(exists);
+#endif /* UNIV_DEBUG */
+
+	int	ret = rename(oldpath, newpath);
+
+	if (ret != 0) {
+		os_file_handle_error_no_exit(oldpath, "rename", FALSE);
+
+		return(false);
+	}
+
+	return(true);
+}
+
+/** NOTE! Use the corresponding macro os_file_close(), not directly this
+function!
+Closes a file handle. In case of error, error number can be retrieved with
+os_file_get_last_error.
+@param[in]	file		Handle to close
+@return true if success */
+bool
+os_file_close_func(
+	os_file_t	file)
+{
+	int	ret = close(file);
+
+	if (ret == -1) {
+		os_file_handle_error(NULL, "close");
+
+		return(false);
+	}
+
+	return(true);
+}
+
+/** Gets a file size.
+@param[in]	file		handle to an open file
+@return file size, or (os_offset_t) -1 on failure */
+os_offset_t
+os_file_get_size(
+	os_file_t	file)
+{
+	/* Store current position */
+	os_offset_t	pos = lseek(file, 0, SEEK_CUR);
+	os_offset_t	file_size = lseek(file, 0, SEEK_END);
+
+	/* Restore current position as the function should not change it */
+	lseek(file, pos, SEEK_SET);
+
+	return(file_size);
+}
+
+/** Gets a file size.
+@param[in]	filename	Full path to the filename to check
+@return file size if OK, else set m_total_size to ~0 and m_alloc_size to
+	errno */
+os_file_size_t
+os_file_get_size(
+	const char*	filename)
+{
+	struct stat	s;
+	os_file_size_t	file_size;
+
+	int	ret = stat(filename, &s);
+
+	if (ret == 0) {
+		file_size.m_total_size = s.st_size;
+		/* st_blocks is in 512 byte sized blocks */
+		file_size.m_alloc_size = s.st_blocks * 512;
+	} else {
+		file_size.m_total_size = ~0;
+		file_size.m_alloc_size = (os_offset_t) errno;
+	}
+
+	return(file_size);
+}
+
+/** This function returns information about the specified file
+@param[in]	path		pathname of the file
+@param[out]	stat_info	information of a file in a directory
+@param[in,out]	statinfo	information of a file in a directory
+@param[in]	check_rw_perm	for testing whether the file can be opened
+				in RW mode
+@param[in]	read_only	if true read only mode checks are enforced
+@return DB_SUCCESS if all OK */
+static
+dberr_t
+os_file_get_status_posix(
+	const char*	path,
+	os_file_stat_t* stat_info,
+	struct stat*	statinfo,
+	bool		check_rw_perm,
+	bool		read_only)
+{
+	int	ret = stat(path, statinfo);
+
+	if (ret && (errno == ENOENT || errno == ENOTDIR)) {
+		/* file does not exist */
+
+		return(DB_NOT_FOUND);
+
+	} else if (ret) {
+		/* file exists, but stat call failed */
+
+		os_file_handle_error_no_exit(path, "stat", false);
+
+		return(DB_FAIL);
+	}
+
+	switch (statinfo->st_mode & S_IFMT) {
+	case S_IFDIR:
+		stat_info->type = OS_FILE_TYPE_DIR;
+		break;
+	case S_IFLNK:
+		stat_info->type = OS_FILE_TYPE_LINK;
+		break;
+	case S_IFBLK:
+		/* Handle block device as regular file. */
+	case S_IFCHR:
+		/* Handle character device as regular file. */
+	case S_IFREG:
+		stat_info->type = OS_FILE_TYPE_FILE;
+		break;
+	default:
+		stat_info->type = OS_FILE_TYPE_UNKNOWN;
+	}
+
+	stat_info->size = statinfo->st_size;
+	stat_info->block_size = statinfo->st_blksize;
+	stat_info->alloc_size = statinfo->st_blocks * 512;
+
+	if (check_rw_perm
+	    && (stat_info->type == OS_FILE_TYPE_FILE
+		|| stat_info->type == OS_FILE_TYPE_BLOCK)) {
+
+		int	access = !read_only ? O_RDWR : O_RDONLY;
+		int	fh = ::open(path, access, os_innodb_umask);
+
+		if (fh == -1) {
+			stat_info->rw_perm = false;
+		} else {
+			stat_info->rw_perm = true;
+			close(fh);
+		}
+	}
+
+	return(DB_SUCCESS);
+}
+
+/** Truncates a file to a specified size in bytes.
+Do nothing if the size to preserve is greater or equal to the current
+size of the file.
+@param[in]	pathname	file path
+@param[in]	file		file to be truncated
+@param[in]	size		size to preserve in bytes
+@return true if success */
+static
+bool
+os_file_truncate_posix(
+	const char*	pathname,
+	os_file_t	file,
+	os_offset_t	size)
+{
+	int	res = ftruncate(file, size);
+
+	if (res == -1) {
+
+		bool	retry;
+
+		retry = os_file_handle_error_no_exit(
+			pathname, "truncate", false);
+
+		if (retry) {
+			ib::warn()
+				<< "Truncate failed for '"
+				<< pathname << "'";
+		}
+	}
+
+	return(res == 0);
+}
+
+/** Truncates a file at its current position.
+@return true if success */
+bool
+os_file_set_eof(
+	FILE*		file)	/*!< in: file to be truncated */
+{
+	return(!ftruncate(fileno(file), ftell(file)));
+}
+
+#ifdef UNIV_HOTBACKUP
+/** Closes a file handle.
+@param[in]	file		Handle to a file
+@return true if success */
+bool
+os_file_close_no_error_handling(
+	os_file_t	file)
+{
+	return(close(file) != -1);
+}
+#endif /* UNIV_HOTBACKUP */
+
+/** This function can be called if one wants to post a batch of reads and
+prefers an i/o-handler thread to handle them all at once later. You must
+call os_aio_simulated_wake_handler_threads later to ensure the threads
+are not left sleeping! */
+void
+os_aio_simulated_put_read_threads_to_sleep()
+{
+	/* No op on non Windows */
+}
+
+#else /* !_WIN32 */
+
+#include <WinIoCtl.h>
+
+/*
+Windows : Handling synchronous IO on files opened asynchronously.
+
+If file is opened for asynchronous IO (FILE_FLAG_OVERLAPPED) and also bound to
+a completion port, then every IO on this file would normally be enqueued to the
+completion port. Sometimes however we would like to do a synchronous IO. This is
+possible if we initialitze have overlapped.hEvent with a valid event and set its
+lowest order bit to 1 (see MSDN ReadFile and WriteFile description for more info)
+
+We'll create this special event once for each thread and store in thread local
+storage.
+*/
+
+
+static void __stdcall win_free_syncio_event(void *data) {
+	if (data) {
+		CloseHandle((HANDLE)data);
+	}
+}
+
+
+/*
+Initialize tls index.for event handle used for synchronized IO on files that
+might be opened with FILE_FLAG_OVERLAPPED.
+*/
+static void win_init_syncio_event() {
+	fls_sync_io = FlsAlloc(win_free_syncio_event);
+	ut_a(fls_sync_io != FLS_OUT_OF_INDEXES);
+}
+
+
+/*
+Retrieve per-thread event for doing synchronous io on asyncronously opened files
+*/
+static HANDLE win_get_syncio_event()
+{
+	HANDLE h;
+
+	h = (HANDLE)FlsGetValue(fls_sync_io);
+	if (h) {
+		return h;
+	}
+	h = CreateEventA(NULL, FALSE, FALSE, NULL);
+	ut_a(h);
+	/* Set low-order bit to keeps I/O completion from being queued */
+	h = (HANDLE)((uintptr_t)h | 1);
+	FlsSetValue(fls_sync_io, h);
+	return h;
+}
+
+
+/** Do the read/write
+@param[in]	request	The IO context and type
+@return the number of bytes read/written or negative value on error */
+ssize_t
+SyncFileIO::execute(const IORequest& request)
+{
+	OVERLAPPED	seek;
+
+	memset(&seek, 0x0, sizeof(seek));
+
+	seek.hEvent = win_get_syncio_event();
+	seek.Offset = (DWORD) m_offset & 0xFFFFFFFF;
+	seek.OffsetHigh = (DWORD) (m_offset >> 32);
+
+	BOOL	ret;
+	DWORD	n_bytes;
+
+	if (request.is_read()) {
+		ret = ReadFile(m_fh, m_buf,
+			static_cast<DWORD>(m_n), &n_bytes, &seek);
+
+	} else {
+		ut_ad(request.is_write());
+		ret = WriteFile(m_fh, m_buf,
+			static_cast<DWORD>(m_n), &n_bytes, &seek);
+	}
+	if (!ret && (GetLastError() == ERROR_IO_PENDING)) {
+		/* Wait for async io to complete */
+		ret = GetOverlappedResult(m_fh, &seek, &n_bytes, TRUE);
+	}
+
+	return(ret ? static_cast<ssize_t>(n_bytes) : -1);
+}
+
+/** Do the read/write
+@param[in,out]	slot	The IO slot, it has the IO context
+@return the number of bytes read/written or negative value on error */
+ssize_t
+SyncFileIO::execute(Slot* slot)
+{
+	BOOL	ret;
+	slot->control.hEvent = win_get_syncio_event();
+	if (slot->type.is_read()) {
+
+		ret = ReadFile(
+			slot->file, slot->ptr, slot->len,
+			&slot->n_bytes, &slot->control);
+
+	} else {
+		ut_ad(slot->type.is_write());
+
+		ret = WriteFile(
+			slot->file, slot->ptr, slot->len,
+			&slot->n_bytes, &slot->control);
+
+	}
+	if (!ret && (GetLastError() == ERROR_IO_PENDING)) {
+		/* Wait for async io to complete */
+		ret = GetOverlappedResult(slot->file, &slot->control, &slot->n_bytes, TRUE);
+	}
+	return(ret ? slot->n_bytes : -1);
+}
+
+/* Startup/shutdown */
+
+struct WinIoInit
+{
+	WinIoInit() {
+		completion_port = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 0);
+		read_completion_port = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 0);	ut_a(completion_port && read_completion_port);
+		fls_sync_io = FlsAlloc(win_free_syncio_event);
+		ut_a(fls_sync_io != FLS_OUT_OF_INDEXES);
+	}
+
+	~WinIoInit() {
+		CloseHandle(completion_port);
+		CloseHandle(read_completion_port);
+		FlsFree(fls_sync_io);
+	}
+};
+
+/* Ensures proper initialization and shutdown */
+static WinIoInit win_io_init;
+
+/** Check if the file system supports sparse files.
+@param[in]	 name		File name
+@return true if the file system supports sparse files */
+static
+bool
+os_is_sparse_file_supported_win32(const char* filename)
+{
+	char	volname[MAX_PATH];
+	BOOL	result = GetVolumePathName(filename, volname, MAX_PATH);
+
+	if (!result) {
+
+		ib::error()
+			<< "os_is_sparse_file_supported: "
+			<< "Failed to get the volume path name for: "
+			<< filename
+			<< "- OS error number " << GetLastError();
+
+		return(false);
+	}
+
+	DWORD	flags;
+
+	GetVolumeInformation(
+		volname, NULL, MAX_PATH, NULL, NULL,
+		&flags, NULL, MAX_PATH);
+
+	return(flags & FILE_SUPPORTS_SPARSE_FILES) ? true : false;
+}
+
+/** Free storage space associated with a section of the file.
+@param[in]	fh		Open file handle
+@param[in]	page_size	Tablespace page size
+@param[in]	block_size	File system block size
+@param[in]	off		Starting offset (SEEK_SET)
+@param[in]	len		Size of the hole
+@return 0 on success or errno */
+static
+dberr_t
+os_file_punch_hole_win32(
+	os_file_t	fh,
+	os_offset_t	off,
+	os_offset_t	len)
+{
+	FILE_ZERO_DATA_INFORMATION	punch;
+
+	punch.FileOffset.QuadPart = off;
+	punch.BeyondFinalZero.QuadPart = off + len;
+
+	/* If lpOverlapped is NULL, lpBytesReturned cannot be NULL,
+	therefore we pass a dummy parameter. */
+	DWORD	temp;
+
+	BOOL	result = DeviceIoControl(
+		fh, FSCTL_SET_ZERO_DATA, &punch, sizeof(punch),
+		NULL, 0, &temp, NULL);
+
+	return(!result ? DB_IO_NO_PUNCH_HOLE : DB_SUCCESS);
+}
+
+/** Check the existence and type of the given file.
+@param[in]	path		path name of file
+@param[out]	exists		true if the file exists
+@param[out]	type		Type of the file, if it exists
+@return true if call succeeded */
+bool
+os_file_status_win32(
+	const char*	path,
+	bool*		exists,
+	os_file_type_t* type)
+{
+	int		ret;
+	struct _stat64	statinfo;
+
+	ret = _stat64(path, &statinfo);
+
+	*exists = !ret;
+
+	if (!ret) {
+		/* file exists, everything OK */
+
+	} else if (errno == ENOENT || errno == ENOTDIR || errno == ENAMETOOLONG) {
+		/* file does not exist */
+		return(true);
+
+	} else {
+		/* file exists, but stat call failed */
+		os_file_handle_error_no_exit(path, "stat", false);
+		return(false);
+	}
+
+	if (_S_IFDIR & statinfo.st_mode) {
+		*type = OS_FILE_TYPE_DIR;
+
+	} else if (_S_IFREG & statinfo.st_mode) {
+		*type = OS_FILE_TYPE_FILE;
+
+	} else {
+		*type = OS_FILE_TYPE_UNKNOWN;
+	}
+
+	return(true);
+}
+
+/** NOTE! Use the corresponding macro os_file_flush(), not directly this
+function!
+Flushes the write buffers of a given file to the disk.
+@param[in]	file		handle to a file
+@return true if success */
+bool
+os_file_flush_func(
+	os_file_t	file)
+{
+	++os_n_fsyncs;
+
+	BOOL	ret = FlushFileBuffers(file);
+
+	if (ret) {
+		return(true);
+	}
+
+	/* Since Windows returns ERROR_INVALID_FUNCTION if the 'file' is
+	actually a raw device, we choose to ignore that error if we are using
+	raw disks */
+
+	if (srv_start_raw_disk_in_use && GetLastError()
+	    == ERROR_INVALID_FUNCTION) {
+		return(true);
+	}
+
+	os_file_handle_error(NULL, "flush");
+
+	/* It is a fatal error if a file flush does not succeed, because then
+	the database can get corrupt on disk */
+	ut_error;
+
+	return(false);
+}
+
+/** Retrieves the last error number if an error occurs in a file io function.
+The number should be retrieved before any other OS calls (because they may
+overwrite the error number). If the number is not known to this program,
+the OS error number + 100 is returned.
+@param[in]	report_all_errors	true if we want an error message printed
+					of all errors
+@param[in]	on_error_silent		true then don't print any diagnostic
+					to the log
+@return error number, or OS error number + 100 */
+static
+ulint
+os_file_get_last_error_low(
+	bool	report_all_errors,
+	bool	on_error_silent)
+{
+	ulint	err = (ulint) GetLastError();
+
+	if (err == ERROR_SUCCESS) {
+		return(0);
+	}
+
+	if (report_all_errors
+	    || (!on_error_silent
+		&& err != ERROR_DISK_FULL
+		&& err != ERROR_FILE_EXISTS)) {
+
+		ib::error()
+			<< "Operating system error number " << err
+			<< " in a file operation.";
+
+		if (err == ERROR_PATH_NOT_FOUND) {
+			ib::error()
+				<< "The error means the system"
+				" cannot find the path specified.";
+
+			if (srv_is_being_started) {
+				ib::error()
+					<< "If you are installing InnoDB,"
+					" remember that you must create"
+					" directories yourself, InnoDB"
+					" does not create them.";
+			}
+
+		} else if (err == ERROR_ACCESS_DENIED) {
+
+			ib::error()
+				<< "The error means mysqld does not have"
+				" the access rights to"
+				" the directory. It may also be"
+				" you have created a subdirectory"
+				" of the same name as a data file.";
+
+		} else if (err == ERROR_SHARING_VIOLATION
+			   || err == ERROR_LOCK_VIOLATION) {
+
+			ib::error()
+				<< "The error means that another program"
+				" is using InnoDB's files."
+				" This might be a backup or antivirus"
+				" software or another instance"
+				" of MySQL."
+				" Please close it to get rid of this error.";
+
+		} else if (err == ERROR_WORKING_SET_QUOTA
+			   || err == ERROR_NO_SYSTEM_RESOURCES) {
+
+			ib::error()
+				<< "The error means that there are no"
+				" sufficient system resources or quota to"
+				" complete the operation.";
+
+		} else if (err == ERROR_OPERATION_ABORTED) {
+
+			ib::error()
+				<< "The error means that the I/O"
+				" operation has been aborted"
+				" because of either a thread exit"
+				" or an application request."
+				" Retry attempt is made.";
+		} else {
+
+			ib::info() << OPERATING_SYSTEM_ERROR_MSG;
+		}
+	}
+
+	if (err == ERROR_FILE_NOT_FOUND) {
+		return(OS_FILE_NOT_FOUND);
+	} else if (err == ERROR_DISK_FULL) {
+		return(OS_FILE_DISK_FULL);
+	} else if (err == ERROR_FILE_EXISTS) {
+		return(OS_FILE_ALREADY_EXISTS);
+	} else if (err == ERROR_SHARING_VIOLATION
+		   || err == ERROR_LOCK_VIOLATION) {
+		return(OS_FILE_SHARING_VIOLATION);
+	} else if (err == ERROR_WORKING_SET_QUOTA
+		   || err == ERROR_NO_SYSTEM_RESOURCES) {
+		return(OS_FILE_INSUFFICIENT_RESOURCE);
+	} else if (err == ERROR_OPERATION_ABORTED) {
+		return(OS_FILE_OPERATION_ABORTED);
+	} else if (err == ERROR_ACCESS_DENIED) {
+		return(OS_FILE_ACCESS_VIOLATION);
+	}
+
+	return(OS_FILE_ERROR_MAX + err);
+}
+
+/** NOTE! Use the corresponding macro os_file_create_simple(), not directly
 this function!
-Opens an existing file or creates a new.
-@return own: handle to the file, not defined if error, error number
-can be retrieved with os_file_get_last_error */
-UNIV_INTERN
+A simple function to open or create a file.
+@param[in]	name		name of the file or path as a null-terminated
+				string
+@param[in]	create_mode	create mode
+@param[in]	access_type	OS_FILE_READ_ONLY or OS_FILE_READ_WRITE
+@param[in]	read_only	if true read only mode checks are enforced
+@param[out]	success		true if succeed, false if error
+@return handle to the file, not defined if error, error number
+	can be retrieved with os_file_get_last_error */
 os_file_t
-os_file_create_func(
-/*================*/
-	const char*	name,	/*!< in: name of the file or path as a
-				null-terminated string */
-	ulint		create_mode,/*!< in: create mode */
-	ulint		purpose,/*!< in: OS_FILE_AIO, if asynchronous,
-				non-buffered i/o is desired,
-				OS_FILE_NORMAL, if any normal file;
-				NOTE that it also depends on type, os_aio_..
-				and srv_.. variables whether we really use
-				async i/o or unbuffered i/o: look in the
-				function source code for the exact rules */
-	ulint		type,	/*!< in: OS_DATA_FILE or OS_LOG_FILE */
-	ibool*		success,/*!< out: TRUE if succeed, FALSE if error */
-	ulint           atomic_writes) /*! in: atomic writes table option
-				       value */
+os_file_create_simple_func(
+	const char*	name,
+	ulint		create_mode,
+	ulint		access_type,
+	bool		read_only,
+	bool*		success)
 {
 	os_file_t	file;
-	ibool		retry;
-	ibool		on_error_no_exit;
-	ibool		on_error_silent;
-	atomic_writes_t awrites = (atomic_writes_t) atomic_writes;
 
-#ifdef __WIN__
-	DBUG_EXECUTE_IF(
-		"ib_create_table_fail_disk_full",
-		*success = FALSE;
-		SetLastError(ERROR_DISK_FULL);
-		return((os_file_t) -1);
-	);
-#else /* __WIN__ */
-	DBUG_EXECUTE_IF(
-		"ib_create_table_fail_disk_full",
-		*success = FALSE;
-		errno = ENOSPC;
-		return((os_file_t) -1);
-	);
-#endif /* __WIN__ */
+	*success = false;
 
-#ifdef __WIN__
+	DWORD		access;
 	DWORD		create_flag;
-	DWORD		share_mode	= FILE_SHARE_READ;
+	DWORD		attributes = 0;
+
+	ut_a(!(create_mode & OS_FILE_ON_ERROR_SILENT));
+	ut_a(!(create_mode & OS_FILE_ON_ERROR_NO_EXIT));
+
+	if (create_mode == OS_FILE_OPEN) {
+
+		create_flag = OPEN_EXISTING;
+
+	} else if (read_only) {
+
+		create_flag = OPEN_EXISTING;
+
+	} else if (create_mode == OS_FILE_CREATE) {
+
+		create_flag = CREATE_NEW;
+
+	} else if (create_mode == OS_FILE_CREATE_PATH) {
+
+		/* Create subdirs along the path if needed. */
+		*success = os_file_create_subdirs_if_needed(name);
+
+		if (!*success) {
+
+			ib::error()
+				<< "Unable to create subdirectories '"
+				<< name << "'";
+
+			return(OS_FILE_CLOSED);
+		}
+
+		create_flag = CREATE_NEW;
+		create_mode = OS_FILE_CREATE;
+
+	} else {
+
+		ib::error()
+			<< "Unknown file create mode ("
+			<< create_mode << ") for file '"
+			<< name << "'";
+
+		return(OS_FILE_CLOSED);
+	}
+
+	if (access_type == OS_FILE_READ_ONLY) {
+
+		access = GENERIC_READ;
+
+	} else if (read_only) {
+
+		ib::info()
+			<< "Read only mode set. Unable to"
+			" open file '" << name << "' in RW mode, "
+			<< "trying RO mode", name;
+
+		access = GENERIC_READ;
+
+	} else if (access_type == OS_FILE_READ_WRITE) {
+
+		access = GENERIC_READ | GENERIC_WRITE;
+
+	} else {
+
+		ib::error()
+			<< "Unknown file access type (" << access_type << ") "
+			"for file '" << name << "'";
+
+		return(OS_FILE_CLOSED);
+	}
+
+	bool	retry;
+
+	do {
+		/* Use default security attributes and no template file. */
+
+		file = CreateFile(
+			(LPCTSTR) name, access, FILE_SHARE_READ, NULL,
+			create_flag, attributes, NULL);
+
+		if (file == INVALID_HANDLE_VALUE) {
+
+			*success = false;
+
+			retry = os_file_handle_error(
+				name, create_mode == OS_FILE_OPEN ?
+				"open" : "create");
+
+		} else {
+
+			retry = false;
+
+			*success = true;
+
+			DWORD	temp;
+
+			/* This is a best effort use case, if it fails then
+			we will find out when we try and punch the hole. */
+
+			DeviceIoControl(
+				file, FSCTL_SET_SPARSE, NULL, 0, NULL, 0,
+				&temp, NULL);
+		}
+
+	} while (retry);
+
+	return(file);
+}
+
+/** This function attempts to create a directory named pathname. The new
+directory gets default permissions. On Unix the permissions are
+(0770 & ~umask). If the directory exists already, nothing is done and
+the call succeeds, unless the fail_if_exists arguments is true.
+If another error occurs, such as a permission error, this does not crash,
+but reports the error and returns false.
+@param[in]	pathname	directory name as null-terminated string
+@param[in]	fail_if_exists	if true, pre-existing directory is treated
+				as an error.
+@return true if call succeeds, false on error */
+bool
+os_file_create_directory(
+	const char*	pathname,
+	bool		fail_if_exists)
+{
+	BOOL	rcode;
+
+	rcode = CreateDirectory((LPCTSTR) pathname, NULL);
+	if (!(rcode != 0
+	      || (GetLastError() == ERROR_ALREADY_EXISTS
+		  && !fail_if_exists))) {
+
+		os_file_handle_error_no_exit(
+			pathname, "CreateDirectory", false);
+
+		return(false);
+	}
+
+	return(true);
+}
+
+/** The os_file_opendir() function opens a directory stream corresponding to the
+directory named by the dirname argument. The directory stream is positioned
+at the first entry. In both Unix and Windows we automatically skip the '.'
+and '..' items at the start of the directory listing.
+@param[in]	dirname		directory name; it must not contain a trailing
+				'\' or '/'
+@param[in]	is_fatal	true if we should treat an error as a fatal
+				error; if we try to open symlinks then we do
+				not wish a fatal error if it happens not to
+				be a directory
+@return directory stream, NULL if error */
+os_file_dir_t
+os_file_opendir(
+	const char*	dirname,
+	bool		error_is_fatal)
+{
+	os_file_dir_t		dir;
+	LPWIN32_FIND_DATA	lpFindFileData;
+	char			path[OS_FILE_MAX_PATH + 3];
+
+	ut_a(strlen(dirname) < OS_FILE_MAX_PATH);
+
+	strcpy(path, dirname);
+	strcpy(path + strlen(path), "\\*");
+
+	/* Note that in Windows opening the 'directory stream' also retrieves
+	the first entry in the directory. Since it is '.', that is no problem,
+	as we will skip over the '.' and '..' entries anyway. */
+
+	lpFindFileData = static_cast<LPWIN32_FIND_DATA>(
+		ut_malloc_nokey(sizeof(WIN32_FIND_DATA)));
+
+	dir = FindFirstFile((LPCTSTR) path, lpFindFileData);
+
+	ut_free(lpFindFileData);
+
+	if (dir == INVALID_HANDLE_VALUE) {
+
+		if (error_is_fatal) {
+			os_file_handle_error(dirname, "opendir");
+		}
+
+		return(NULL);
+	}
+
+	return(dir);
+}
+
+/** Closes a directory stream.
+@param[in]	dir	directory stream
+@return 0 if success, -1 if failure */
+int
+os_file_closedir(
+	os_file_dir_t	dir)
+{
+	BOOL		ret;
+
+	ret = FindClose(dir);
+
+	if (!ret) {
+		os_file_handle_error_no_exit(NULL, "closedir", false);
+
+		return(-1);
+	}
+
+	return(0);
+}
+
+/** This function returns information of the next file in the directory. We
+jump over the '.' and '..' entries in the directory.
+@param[in]	dirname		directory name or path
+@param[in]	dir		directory stream
+@param[out]	info		buffer where the info is returned
+@return 0 if ok, -1 if error, 1 if at the end of the directory */
+int
+os_file_readdir_next_file(
+	const char*	dirname,
+	os_file_dir_t	dir,
+	os_file_stat_t*	info)
+{
+	BOOL		ret;
+	int		status;
+	WIN32_FIND_DATA	find_data;
+
+next_file:
+
+	ret = FindNextFile(dir, &find_data);
+
+	if (ret > 0) {
+
+		const char* name;
+
+		name = static_cast<const char*>(find_data.cFileName);
+
+		ut_a(strlen(name) < OS_FILE_MAX_PATH);
+
+		if (strcmp(name, ".") == 0 || strcmp(name, "..") == 0) {
+
+			goto next_file;
+		}
+
+		strcpy(info->name, name);
+
+		info->size = find_data.nFileSizeHigh;
+		info->size <<= 32;
+		info->size |= find_data.nFileSizeLow;
+
+		if (find_data.dwFileAttributes
+		    & FILE_ATTRIBUTE_REPARSE_POINT) {
+
+			/* TODO: test Windows symlinks */
+			/* TODO: MySQL has apparently its own symlink
+			implementation in Windows, dbname.sym can
+			redirect a database directory:
+			REFMAN "windows-symbolic-links.html" */
+
+			info->type = OS_FILE_TYPE_LINK;
+
+		} else if (find_data.dwFileAttributes
+			   & FILE_ATTRIBUTE_DIRECTORY) {
+
+			info->type = OS_FILE_TYPE_DIR;
+
+		} else {
+
+			/* It is probably safest to assume that all other
+			file types are normal. Better to check them rather
+			than blindly skip them. */
+
+			info->type = OS_FILE_TYPE_FILE;
+		}
+
+		status = 0;
+
+	} else if (GetLastError() == ERROR_NO_MORE_FILES) {
+
+		status = 1;
+
+	} else {
+
+		os_file_handle_error_no_exit(NULL, "readdir_next_file", false);
+
+		status = -1;
+	}
+
+	return(status);
+}
+
+/** NOTE! Use the corresponding macro os_file_create(), not directly
+this function!
+Opens an existing file or creates a new.
+@param[in]	name		name of the file or path as a null-terminated
+				string
+@param[in]	create_mode	create mode
+@param[in]	purpose		OS_FILE_AIO, if asynchronous, non-buffered I/O
+				is desired, OS_FILE_NORMAL, if any normal file;
+				NOTE that it also depends on type, os_aio_..
+				and srv_.. variables whether we really use async
+				I/O or unbuffered I/O: look in the function
+				source code for the exact rules
+@param[in]	type		OS_DATA_FILE or OS_LOG_FILE
+@param[in]	success		true if succeeded
+@return handle to the file, not defined if error, error number
+	can be retrieved with os_file_get_last_error */
+os_file_t
+os_file_create_func(
+	const char*	name,
+	ulint		create_mode,
+	ulint		purpose,
+	ulint		type,
+	bool		read_only,
+	bool*		success)
+{
+	os_file_t	file;
+	bool		retry;
+	bool		on_error_no_exit;
+	bool		on_error_silent;
+
+	*success = false;
+
+	DBUG_EXECUTE_IF(
+		"ib_create_table_fail_disk_full",
+		*success = false;
+		SetLastError(ERROR_DISK_FULL);
+		return(OS_FILE_CLOSED);
+	);
+
+	DWORD		create_flag;
+	DWORD		share_mode = FILE_SHARE_READ;
 
 	on_error_no_exit = create_mode & OS_FILE_ON_ERROR_NO_EXIT
-		? TRUE : FALSE;
+		? true : false;
 
 	on_error_silent = create_mode & OS_FILE_ON_ERROR_SILENT
-		? TRUE : FALSE;
+		? true : false;
 
 	create_mode &= ~OS_FILE_ON_ERROR_NO_EXIT;
 	create_mode &= ~OS_FILE_ON_ERROR_SILENT;
 
 	if (create_mode == OS_FILE_OPEN_RAW) {
 
-		ut_a(!srv_read_only_mode);
+		ut_a(!read_only);
 
 		create_flag = OPEN_EXISTING;
 
@@ -1806,7 +4852,7 @@ os_file_create_func(
 
 		create_flag = OPEN_EXISTING;
 
-	} else if (srv_read_only_mode) {
+	} else if (read_only) {
 
 		create_flag = OPEN_EXISTING;
 
@@ -1819,11 +4865,11 @@ os_file_create_func(
 		create_flag = CREATE_ALWAYS;
 
 	} else {
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Unknown file create mode (%lu) for file '%s'",
-			create_mode, name);
+		ib::error()
+			<< "Unknown file create mode (" << create_mode << ") "
+			<< " for file '" << name << "'";
 
-		return((os_file_t) -1);
+		return(OS_FILE_CLOSED);
 	}
 
 	DWORD		attributes = 0;
@@ -1843,13 +4889,16 @@ os_file_create_func(
 #endif /* WIN_ASYNC_IO */
 
 	} else if (purpose == OS_FILE_NORMAL) {
-		/* Use default setting. */
-	} else {
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Unknown purpose flag (%lu) while opening file '%s'",
-			purpose, name);
 
-		return((os_file_t)(-1));
+		/* Use default setting. */
+
+	} else {
+
+		ib::error()
+			<< "Unknown purpose flag (" << purpose << ") "
+			<< "while opening file '" << name << "'";
+
+		return(OS_FILE_CLOSED);
 	}
 
 #ifdef UNIV_NON_BUFFERED_IO
@@ -1870,7 +4919,7 @@ os_file_create_func(
 #endif /* UNIV_HOTBACKUP */
 	DWORD	access = GENERIC_READ;
 
-	if (!srv_read_only_mode) {
+	if (!read_only) {
 		access |= GENERIC_WRITE;
 	}
 
@@ -1884,334 +4933,264 @@ os_file_create_func(
 			const char*	operation;
 
 			operation = (create_mode == OS_FILE_CREATE
-				     && !srv_read_only_mode)
+				     && !read_only)
 				? "create" : "open";
 
-			*success = FALSE;
+			*success = false;
 
 			if (on_error_no_exit) {
 				retry = os_file_handle_error_no_exit(
-					name, operation, on_error_silent, __FILE__, __LINE__);
+					name, operation, on_error_silent);
 			} else {
-				retry = os_file_handle_error(name, operation, __FILE__, __LINE__);
+				retry = os_file_handle_error(name, operation);
 			}
 		} else {
-			*success = TRUE;
-			retry = FALSE;
-		}
 
-	} while (retry);
-
-	/* If we have proper file handle and atomic writes should be used,
-	try to set atomic writes and if that fails when creating a new
-	table, produce a error. If atomic writes are used on existing
-	file, ignore error and use traditional writes for that file */
-	if (file != INVALID_HANDLE_VALUE && type == OS_DATA_FILE
-	    && (awrites == ATOMIC_WRITES_ON ||
-		(srv_use_atomic_writes && awrites == ATOMIC_WRITES_DEFAULT))
-	    && !os_file_set_atomic_writes(name, file)) {
-		if (create_mode == OS_FILE_CREATE) {
-			fprintf(stderr, "InnoDB: Error: Can't create file using atomic writes\n");
-			CloseHandle(file);
-			os_file_delete_if_exists_func(name);
-			*success = FALSE;
-			file = INVALID_HANDLE_VALUE;
-		}
-	}
-#else /* __WIN__ */
-	int		create_flag;
-	const char*	mode_str	= NULL;
-	if (create_mode != OS_FILE_OPEN && create_mode != OS_FILE_OPEN_RAW)
-		WAIT_ALLOW_WRITES();
-
-	on_error_no_exit = create_mode & OS_FILE_ON_ERROR_NO_EXIT
-		? TRUE : FALSE;
-	on_error_silent = create_mode & OS_FILE_ON_ERROR_SILENT
-		? TRUE : FALSE;
-
-	create_mode &= ~OS_FILE_ON_ERROR_NO_EXIT;
-	create_mode &= ~OS_FILE_ON_ERROR_SILENT;
-
-	if (create_mode == OS_FILE_OPEN
-	    || create_mode == OS_FILE_OPEN_RAW
-	    || create_mode == OS_FILE_OPEN_RETRY) {
-
-		mode_str = "OPEN";
-
-		create_flag = srv_read_only_mode ? O_RDONLY : O_RDWR;
-
-	} else if (srv_read_only_mode) {
-
-		mode_str = "OPEN";
-
-		create_flag = O_RDONLY;
-
-	} else if (create_mode == OS_FILE_CREATE) {
-
-		mode_str = "CREATE";
-		create_flag = O_RDWR | O_CREAT | O_EXCL;
-
-	} else if (create_mode == OS_FILE_OVERWRITE) {
-
-		mode_str = "OVERWRITE";
-		create_flag = O_RDWR | O_CREAT | O_TRUNC;
-
-	} else {
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Unknown file create mode (%lu) for file '%s'",
-			create_mode, name);
-
-		return((os_file_t) -1);
-	}
-
-	ut_a(type == OS_LOG_FILE || type == OS_DATA_FILE);
-	ut_a(purpose == OS_FILE_AIO || purpose == OS_FILE_NORMAL);
-
-#ifdef O_SYNC
-	/* We let O_SYNC only affect log files; note that we map O_DSYNC to
-	O_SYNC because the datasync options seemed to corrupt files in 2001
-	in both Linux and Solaris */
-
-	if (!srv_read_only_mode
-	    && type == OS_LOG_FILE
-	    && srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) {
-
-		create_flag |= O_SYNC;
-	}
-#endif /* O_SYNC */
-
-	do {
-		file = ::open(name, create_flag, os_innodb_umask);
-
-		if (file == -1) {
-			const char*	operation;
-
-			operation = (create_mode == OS_FILE_CREATE
-				     && !srv_read_only_mode)
-				? "create" : "open";
-
-			*success = FALSE;
-
-			if (on_error_no_exit) {
-				retry = os_file_handle_error_no_exit(
-					name, operation, on_error_silent, __FILE__, __LINE__);
-			} else {
-				retry = os_file_handle_error(name, operation, __FILE__, __LINE__);
-			}
-		} else {
-			*success = TRUE;
 			retry = false;
+
+			*success = true;
+
+			if (srv_use_native_aio && ((attributes & FILE_FLAG_OVERLAPPED) != 0)) {
+				/* Bind the file handle to completion port */
+				ut_a(CreateIoCompletionPort(file, completion_port, 0, 0));
+			}
+			DWORD	temp;
+
+			/* This is a best effort use case, if it fails then
+			we will find out when we try and punch the hole. */
+			DeviceIoControl(
+				file, FSCTL_SET_SPARSE, NULL, 0, NULL, 0,
+				&temp, NULL);
 		}
 
 	} while (retry);
 
-	/* We disable OS caching (O_DIRECT) only on data files */
-       if (!srv_read_only_mode
-	   && *success
-	   && type != OS_LOG_FILE
-	   && (srv_unix_file_flush_method == SRV_UNIX_O_DIRECT
-	       || srv_unix_file_flush_method == SRV_UNIX_O_DIRECT_NO_FSYNC)) {
-
-	       os_file_set_nocache(file, name, mode_str);
-	}
-
-#ifdef USE_FILE_LOCK
-	if (!srv_read_only_mode
-	    && *success
-	    && create_mode != OS_FILE_OPEN_RAW
-	    && os_file_lock(file, name)) {
-
-		if (create_mode == OS_FILE_OPEN_RETRY) {
-
-			ut_a(!srv_read_only_mode);
-
-			ib_logf(IB_LOG_LEVEL_INFO,
-				"Retrying to lock the first data file");
-
-			for (int i = 0; i < 100; i++) {
-				os_thread_sleep(1000000);
-
-				if (!os_file_lock(file, name)) {
-					*success = TRUE;
-					return(file);
-				}
-			}
-
-			ib_logf(IB_LOG_LEVEL_INFO,
-				"Unable to open the first data file");
-		}
-
-		*success = FALSE;
-		close(file);
-		file = -1;
-	}
-#endif /* USE_FILE_LOCK */
-
-	/* If we have proper file handle and atomic writes should be used,
-	try to set atomic writes and if that fails when creating a new
-	table, produce a error. If atomic writes are used on existing
-	file, ignore error and use traditional writes for that file */
-	if (file != -1 && type == OS_DATA_FILE
-	    && (awrites == ATOMIC_WRITES_ON ||
-		(srv_use_atomic_writes && awrites == ATOMIC_WRITES_DEFAULT))
-	    && !os_file_set_atomic_writes(name, file)) {
-		if (create_mode == OS_FILE_CREATE) {
-			fprintf(stderr, "InnoDB: Error: Can't create file using atomic writes\n");
-			close(file);
-			os_file_delete_if_exists_func(name);
-			*success = FALSE;
-			file = -1;
-		}
-	}
-#endif /* __WIN__ */
-
 	return(file);
 }
 
-/***********************************************************************//**
-Deletes a file if it exists. The file has to be closed before calling this.
-@return	TRUE if success */
-UNIV_INTERN
+/** NOTE! Use the corresponding macro os_file_create_simple_no_error_handling(),
+not directly this function!
+A simple function to open or create a file.
+@param[in]	name		name of the file or path as a null-terminated
+				string
+@param[in]	create_mode	create mode
+@param[in]	access_type	OS_FILE_READ_ONLY, OS_FILE_READ_WRITE, or
+				OS_FILE_READ_ALLOW_DELETE; the last option is
+				used by a backup program reading the file
+@param[out]	success		true if succeeded
+@return own: handle to the file, not defined if error, error number
+	can be retrieved with os_file_get_last_error */
+os_file_t
+os_file_create_simple_no_error_handling_func(
+	const char*	name,
+	ulint		create_mode,
+	ulint		access_type,
+	bool		read_only,
+	bool*		success)
+{
+	os_file_t	file;
+
+	*success = false;
+
+	DWORD		access;
+	DWORD		create_flag;
+	DWORD		attributes	= 0;
+	DWORD		share_mode	= FILE_SHARE_READ;
+
+	ut_a(name);
+
+	ut_a(!(create_mode & OS_FILE_ON_ERROR_SILENT));
+	ut_a(!(create_mode & OS_FILE_ON_ERROR_NO_EXIT));
+
+	if (create_mode == OS_FILE_OPEN) {
+
+		create_flag = OPEN_EXISTING;
+
+	} else if (read_only) {
+
+		create_flag = OPEN_EXISTING;
+
+	} else if (create_mode == OS_FILE_CREATE) {
+
+		create_flag = CREATE_NEW;
+
+	} else {
+
+		ib::error()
+			<< "Unknown file create mode (" << create_mode << ") "
+			<< " for file '" << name << "'";
+
+		return(OS_FILE_CLOSED);
+	}
+
+	if (access_type == OS_FILE_READ_ONLY) {
+
+		access = GENERIC_READ;
+
+	} else if (read_only) {
+
+		access = GENERIC_READ;
+
+	} else if (access_type == OS_FILE_READ_WRITE) {
+
+		access = GENERIC_READ | GENERIC_WRITE;
+
+	} else if (access_type == OS_FILE_READ_ALLOW_DELETE) {
+
+		ut_a(!read_only);
+
+		access = GENERIC_READ;
+
+		/*!< A backup program has to give mysqld the maximum
+		freedom to do what it likes with the file */
+
+		share_mode |= FILE_SHARE_DELETE | FILE_SHARE_WRITE;
+	} else {
+
+		ib::error()
+			<< "Unknown file access type (" << access_type << ") "
+			<< "for file '" << name << "'";
+
+		return(OS_FILE_CLOSED);
+	}
+
+	file = CreateFile((LPCTSTR) name,
+			  access,
+			  share_mode,
+			  NULL,			// Security attributes
+			  create_flag,
+			  attributes,
+			  NULL);		// No template file
+
+	*success = (file != INVALID_HANDLE_VALUE);
+
+	return(file);
+}
+
+/** Deletes a file if it exists. The file has to be closed before calling this.
+@param[in]	name		file path as a null-terminated string
+@param[out]	exist		indicate if file pre-exist
+@return true if success */
 bool
 os_file_delete_if_exists_func(
-/*==========================*/
-	const char*	name)	/*!< in: file path as a null-terminated
-				string */
+	const char*	name,
+	bool*		exist)
 {
-#ifdef __WIN__
-	bool	ret;
 	ulint	count	= 0;
-loop:
-	/* In Windows, deleting an .ibd file may fail if mysqlbackup is copying
-	it */
 
-	ret = DeleteFile((LPCTSTR) name);
-
-	if (ret) {
-		return(true);
+	if (exist != NULL) {
+		*exist = true;
 	}
 
-	DWORD lasterr = GetLastError();
-	if (lasterr == ERROR_FILE_NOT_FOUND
-	    || lasterr == ERROR_PATH_NOT_FOUND) {
-		/* the file does not exist, this not an error */
+	for (;;) {
+		/* In Windows, deleting an .ibd file may fail if ibbackup
+		is copying it */
 
-		return(true);
+		bool	ret = DeleteFile((LPCTSTR) name);
+
+		if (ret) {
+			return(true);
+		}
+
+		DWORD	lasterr = GetLastError();
+
+		if (lasterr == ERROR_FILE_NOT_FOUND
+		    || lasterr == ERROR_PATH_NOT_FOUND) {
+
+			/* the file does not exist, this not an error */
+			if (exist != NULL) {
+				*exist = false;
+			}
+
+			return(true);
+		}
+
+		++count;
+
+		if (count > 100 && 0 == (count % 10)) {
+
+			/* Print error information */
+			os_file_get_last_error(true);
+
+			ib::warn() << "Delete of file '" << name << "' failed.";
+		}
+
+		/* Sleep for a second */
+		os_thread_sleep(1000000);
+
+		if (count > 2000) {
+
+			return(false);
+		}
 	}
-
-	count++;
-
-	if (count > 100 && 0 == (count % 10)) {
-		os_file_get_last_error(true); /* print error information */
-
-		ib_logf(IB_LOG_LEVEL_WARN, "Delete of file %s failed.", name);
-	}
-
-	os_thread_sleep(500000);	/* sleep for 0.5 second */
-
-	if (count > 2000) {
-
-		return(false);
-	}
-
-	goto loop;
-#else
-	int	ret;
-	WAIT_ALLOW_WRITES();
-
-	ret = unlink(name);
-
-	if (ret != 0 && errno != ENOENT) {
-		os_file_handle_error_no_exit(name, "delete", FALSE, __FILE__, __LINE__);
-
-		return(false);
-	}
-
-	return(true);
-#endif /* __WIN__ */
 }
 
-/***********************************************************************//**
-Deletes a file. The file has to be closed before calling this.
-@return	TRUE if success */
-UNIV_INTERN
+/** Deletes a file. The file has to be closed before calling this.
+@param[in]	name		File path as NUL terminated string
+@return true if success */
 bool
 os_file_delete_func(
-/*================*/
-	const char*	name)	/*!< in: file path as a null-terminated
-				string */
+	const char*	name)
 {
-#ifdef __WIN__
-	BOOL	ret;
 	ulint	count	= 0;
-loop:
-	/* In Windows, deleting an .ibd file may fail if mysqlbackup is copying
-	it */
 
-	ret = DeleteFile((LPCTSTR) name);
+	for (;;) {
+		/* In Windows, deleting an .ibd file may fail if ibbackup
+		is copying it */
 
-	if (ret) {
-		return(true);
+		BOOL	ret = DeleteFile((LPCTSTR) name);
+
+		if (ret) {
+			return(true);
+		}
+
+		if (GetLastError() == ERROR_FILE_NOT_FOUND) {
+			/* If the file does not exist, we classify this as
+			a 'mild' error and return */
+
+			return(false);
+		}
+
+		++count;
+
+		if (count > 100 && 0 == (count % 10)) {
+
+			/* print error information */
+			os_file_get_last_error(true);
+
+			ib::warn()
+				<< "Cannot delete file '" << name << "'. Are "
+				<< "you running ibbackup to back up the file?";
+		}
+
+		/* sleep for a second */
+		os_thread_sleep(1000000);
+
+		if (count > 2000) {
+
+			return(false);
+		}
 	}
 
-	if (GetLastError() == ERROR_FILE_NOT_FOUND) {
-		/* If the file does not exist, we classify this as a 'mild'
-		error and return */
-
-		return(false);
-	}
-
-	count++;
-
-	if (count > 100 && 0 == (count % 10)) {
-		os_file_get_last_error(true); /* print error information */
-
-		fprintf(stderr,
-			"InnoDB: Warning: cannot delete file %s\n"
-			"InnoDB: Are you running mysqlbackup"
-			" to back up the file?\n", name);
-	}
-
-	os_thread_sleep(1000000);	/* sleep for a second */
-
-	if (count > 2000) {
-
-		return(false);
-	}
-
-	goto loop;
-#else
-	int	ret;
-	WAIT_ALLOW_WRITES();
-
-	ret = unlink(name);
-
-	if (ret != 0) {
-		os_file_handle_error_no_exit(name, "delete", FALSE, __FILE__, __LINE__);
-
-		return(false);
-	}
-
-	return(true);
-#endif
+	ut_error;
+	return(false);
 }
 
-/***********************************************************************//**
-NOTE! Use the corresponding macro os_file_rename(), not directly this function!
+/** NOTE! Use the corresponding macro os_file_rename(), not directly this
+function!
 Renames a file (can also move it to another directory). It is safest that the
 file is closed before calling this function.
-@return	TRUE if success */
-UNIV_INTERN
-ibool
+@param[in]	oldpath		old file path as a null-terminated string
+@param[in]	newpath		new file path
+@return true if success */
+bool
 os_file_rename_func(
-/*================*/
-	const char*	oldpath,/*!< in: old file path as a null-terminated
-				string */
-	const char*	newpath)/*!< in: new file path */
+	const char*	oldpath,
+	const char*	newpath)
 {
 #ifdef UNIV_DEBUG
 	os_file_type_t	type;
-	ibool		exists;
+	bool		exists;
 
 	/* New path must not exist. */
 	ut_ad(os_file_status(newpath, &exists, &type));
@@ -2222,1195 +5201,114 @@ os_file_rename_func(
 	ut_ad(exists);
 #endif /* UNIV_DEBUG */
 
-#ifdef __WIN__
-	BOOL	ret;
-
-	ret = MoveFile((LPCTSTR) oldpath, (LPCTSTR) newpath);
-
-	if (ret) {
-		return(TRUE);
+	if (MoveFile((LPCTSTR) oldpath, (LPCTSTR) newpath)) {
+		return(true);
 	}
 
-	os_file_handle_error_no_exit(oldpath, "rename", FALSE, __FILE__, __LINE__);
+	os_file_handle_error_no_exit(oldpath, "rename", false);
 
-	return(FALSE);
-#else
-	int	ret;
-	WAIT_ALLOW_WRITES();
-
-	ret = rename(oldpath, newpath);
-
-	if (ret != 0) {
-		os_file_handle_error_no_exit(oldpath, "rename", FALSE, __FILE__, __LINE__);
-
-		return(FALSE);
-	}
-
-	return(TRUE);
-#endif /* __WIN__ */
+	return(false);
 }
 
-/***********************************************************************//**
-NOTE! Use the corresponding macro os_file_close(), not directly this function!
+/** NOTE! Use the corresponding macro os_file_close(), not directly
+this function!
 Closes a file handle. In case of error, error number can be retrieved with
 os_file_get_last_error.
-@return	TRUE if success */
-UNIV_INTERN
-ibool
+@param[in,own]	file		Handle to a file
+@return true if success */
+bool
 os_file_close_func(
-/*===============*/
-	os_file_t	file)	/*!< in, own: handle to a file */
+	os_file_t	file)
 {
-#ifdef __WIN__
-	BOOL	ret;
+	ut_a(file > 0);
 
-	ret = CloseHandle(file);
-
-	if (ret) {
-		return(TRUE);
+	if (CloseHandle(file)) {
+		return(true);
 	}
 
-	os_file_handle_error(NULL, "close", __FILE__, __LINE__);
+	os_file_handle_error(NULL, "close");
 
-	return(FALSE);
-#else
-	int	ret;
-
-	ret = close(file);
-
-	if (ret == -1) {
-		os_file_handle_error(NULL, "close", __FILE__, __LINE__);
-
-		return(FALSE);
-	}
-
-	return(TRUE);
-#endif /* __WIN__ */
+	return(false);
 }
 
-#ifdef UNIV_HOTBACKUP
-/***********************************************************************//**
-Closes a file handle.
-@return	TRUE if success */
-UNIV_INTERN
-ibool
-os_file_close_no_error_handling(
-/*============================*/
-	os_file_t	file)	/*!< in, own: handle to a file */
-{
-#ifdef __WIN__
-	BOOL	ret;
-
-	ret = CloseHandle(file);
-
-	if (ret) {
-		return(TRUE);
-	}
-
-	return(FALSE);
-#else
-	int	ret;
-
-	ret = close(file);
-
-	if (ret == -1) {
-
-		return(FALSE);
-	}
-
-	return(TRUE);
-#endif /* __WIN__ */
-}
-#endif /* UNIV_HOTBACKUP */
-
-/***********************************************************************//**
-Gets a file size.
-@return	file size, or (os_offset_t) -1 on failure */
-UNIV_INTERN
+/** Gets a file size.
+@param[in]	file		Handle to a file
+@return file size, or (os_offset_t) -1 on failure */
 os_offset_t
 os_file_get_size(
-/*=============*/
-	os_file_t	file)	/*!< in: handle to a file */
+	os_file_t	file)
 {
-#ifdef __WIN__
-	os_offset_t	offset;
 	DWORD		high;
-	DWORD		low;
+	DWORD		low = GetFileSize(file, &high);
 
-	low = GetFileSize(file, &high);
-
-	if ((low == 0xFFFFFFFF) && (GetLastError() != NO_ERROR)) {
+	if (low == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
 		return((os_offset_t) -1);
 	}
 
-	offset = (os_offset_t) low | ((os_offset_t) high << 32);
-
-	return(offset);
-#else
-	return((os_offset_t) lseek(file, 0, SEEK_END));
-#endif /* __WIN__ */
+	return(os_offset_t(low | (os_offset_t(high) << 32)));
 }
 
-/***********************************************************************//**
-Write the specified number of zeros to a newly created file.
-@return	TRUE if success */
-UNIV_INTERN
-ibool
-os_file_set_size(
-/*=============*/
-	const char*	name,	/*!< in: name of the file or path as a
-				null-terminated string */
-	os_file_t	file,	/*!< in: handle to a file */
-	os_offset_t	size)	/*!< in: file size */
+/** Gets a file size.
+@param[in]	filename	Full path to the filename to check
+@return file size if OK, else set m_total_size to ~0 and m_alloc_size to
+	errno */
+os_file_size_t
+os_file_get_size(
+	const char*	filename)
 {
-	os_offset_t	current_size;
-	ibool		ret;
-	byte*		buf;
-	byte*		buf2;
-	ulint		buf_size;
+	struct __stat64	s;
+	os_file_size_t	file_size;
 
-	current_size = 0;
-
-#ifdef HAVE_POSIX_FALLOCATE
-	if (srv_use_posix_fallocate) {
-
-		if (posix_fallocate(file, current_size, size) == -1) {
-
-			fprintf(stderr, "InnoDB: Error: preallocating file "
-				"space for file \'%s\' failed.  Current size "
-				"%lu, desired size %lu\n",
-				name, (ulong) current_size, (ulong) size);
-			os_file_handle_error_no_exit(name, "posix_fallocate", FALSE, __FILE__, __LINE__);
-
-			return(FALSE);
-		}
-		return(TRUE);
-	}
-#endif
-
-	/* Write up to 1 megabyte at a time. */
-	buf_size = ut_min(64, (ulint) (size / UNIV_PAGE_SIZE))
-		* UNIV_PAGE_SIZE;
-	buf2 = static_cast<byte*>(ut_malloc(buf_size + UNIV_PAGE_SIZE));
-
-	/* Align the buffer for possible raw i/o */
-	buf = static_cast<byte*>(ut_align(buf2, UNIV_PAGE_SIZE));
-
-	/* Write buffer full of zeros */
-	memset(buf, 0, buf_size);
-
-	if (size >= (os_offset_t) 100 << 20) {
-
-		fprintf(stderr, "InnoDB: Progress in MB:");
-	}
-
-	while (current_size < size) {
-		ulint	n_bytes;
-
-		if (size - current_size < (os_offset_t) buf_size) {
-			n_bytes = (ulint) (size - current_size);
-		} else {
-			n_bytes = buf_size;
-		}
-
-		ret = os_file_write(name, file, buf, current_size, n_bytes);
-
-		if (!ret) {
-			ut_free(buf2);
-			goto error_handling;
-		}
-
-		/* Print about progress for each 100 MB written */
-		if ((current_size + n_bytes) / (100 << 20)
-		    != current_size / (100 << 20)) {
-
-			fprintf(stderr, " %lu00",
-				(ulong) ((current_size + n_bytes)
-					 / (100 << 20)));
-		}
-
-		current_size += n_bytes;
-	}
-
-	if (size >= (os_offset_t) 100 << 20) {
-
-		fprintf(stderr, "\n");
-	}
-
-	ut_free(buf2);
-
-	ret = os_file_flush(file);
-
-	if (ret) {
-		return(TRUE);
-	}
-
-error_handling:
-	return(FALSE);
-}
-
-/***********************************************************************//**
-Truncates a file at its current position.
-@return	TRUE if success */
-UNIV_INTERN
-ibool
-os_file_set_eof(
-/*============*/
-	FILE*		file)	/*!< in: file to be truncated */
-{
-#ifdef __WIN__
-	HANDLE h = (HANDLE) _get_osfhandle(fileno(file));
-	return(SetEndOfFile(h));
-#else /* __WIN__ */
-	WAIT_ALLOW_WRITES();
-	return(!ftruncate(fileno(file), ftell(file)));
-#endif /* __WIN__ */
-}
-
-#ifndef __WIN__
-/***********************************************************************//**
-Wrapper to fsync(2) that retries the call on some errors.
-Returns the value 0 if successful; otherwise the value -1 is returned and
-the global variable errno is set to indicate the error.
-@return	0 if success, -1 otherwise */
-
-static
-int
-os_file_fsync(
-/*==========*/
-	os_file_t	file)	/*!< in: handle to a file */
-{
-	int	ret;
-	int	failures;
-	ibool	retry;
-
-	failures = 0;
-
-	do {
-		ret = fsync(file);
-
-		os_n_fsyncs++;
-
-		if (ret == -1 && errno == ENOLCK) {
-
-			if (failures % 100 == 0) {
-
-				ut_print_timestamp(stderr);
-				fprintf(stderr,
-					" InnoDB: fsync(): "
-					"No locks available; retrying\n");
-			}
-
-			os_thread_sleep(200000 /* 0.2 sec */);
-
-			failures++;
-
-			retry = TRUE;
-		} else {
-
-			retry = FALSE;
-		}
-	} while (retry);
-
-	return(ret);
-}
-#endif /* !__WIN__ */
-
-/***********************************************************************//**
-NOTE! Use the corresponding macro os_file_flush(), not directly this function!
-Flushes the write buffers of a given file to the disk.
-@return	TRUE if success */
-UNIV_INTERN
-ibool
-os_file_flush_func(
-/*===============*/
-	os_file_t	file)	/*!< in, own: handle to a file */
-{
-#ifdef __WIN__
-	BOOL	ret;
-
-	os_n_fsyncs++;
-
-	ret = FlushFileBuffers(file);
-
-	if (ret) {
-		return(TRUE);
-	}
-
-	/* Since Windows returns ERROR_INVALID_FUNCTION if the 'file' is
-	actually a raw device, we choose to ignore that error if we are using
-	raw disks */
-
-	if (srv_start_raw_disk_in_use && GetLastError()
-	    == ERROR_INVALID_FUNCTION) {
-		return(TRUE);
-	}
-
-	os_file_handle_error(NULL, "flush", __FILE__, __LINE__);
-
-	/* It is a fatal error if a file flush does not succeed, because then
-	the database can get corrupt on disk */
-	ut_error;
-
-	return(FALSE);
-#else
-	int	ret;
-	WAIT_ALLOW_WRITES();
-
-#if defined(HAVE_DARWIN_THREADS)
-# ifndef F_FULLFSYNC
-	/* The following definition is from the Mac OS X 10.3 <sys/fcntl.h> */
-#  define F_FULLFSYNC 51 /* fsync + ask the drive to flush to the media */
-# elif F_FULLFSYNC != 51
-#  error "F_FULLFSYNC != 51: ABI incompatibility with Mac OS X 10.3"
-# endif
-	/* Apple has disabled fsync() for internal disk drives in OS X. That
-	caused corruption for a user when he tested a power outage. Let us in
-	OS X use a nonstandard flush method recommended by an Apple
-	engineer. */
-
-	if (!srv_have_fullfsync) {
-		/* If we are not on an operating system that supports this,
-		then fall back to a plain fsync. */
-
-		ret = os_file_fsync(file);
-	} else {
-		ret = fcntl(file, F_FULLFSYNC, NULL);
-
-		if (ret) {
-			/* If we are not on a file system that supports this,
-			then fall back to a plain fsync. */
-			ret = os_file_fsync(file);
-		}
-	}
-#else
-	ret = os_file_fsync(file);
-#endif
+	int		ret = _stat64(filename, &s);
 
 	if (ret == 0) {
-		return(TRUE);
-	}
 
-	/* Since Linux returns EINVAL if the 'file' is actually a raw device,
-	we choose to ignore that error if we are using raw disks */
+		file_size.m_total_size = s.st_size;
 
-	if (srv_start_raw_disk_in_use && errno == EINVAL) {
+		DWORD	low_size;
+		DWORD	high_size;
 
-		return(TRUE);
-	}
+		low_size = GetCompressedFileSize(filename, &high_size);
 
-	ib_logf(IB_LOG_LEVEL_ERROR, "The OS said file flush did not succeed");
+		if (low_size != INVALID_FILE_SIZE) {
 
-	os_file_handle_error(NULL, "flush", __FILE__, __LINE__);
+			file_size.m_alloc_size = high_size;
+			file_size.m_alloc_size <<= 32;
+			file_size.m_alloc_size |= low_size;
 
-	/* It is a fatal error if a file flush does not succeed, because then
-	the database can get corrupt on disk */
-	ut_error;
-
-	return(FALSE);
-#endif
-}
-
-#ifndef __WIN__
-/*******************************************************************//**
-Does a synchronous read operation in Posix.
-@return	number of bytes read, -1 if error */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-ssize_t
-os_file_pread(
-/*==========*/
-	os_file_t	file,	/*!< in: handle to a file */
-	void*		buf,	/*!< in: buffer where to read */
-	ulint		n,	/*!< in: number of bytes to read */
-	os_offset_t	offset)	/*!< in: file offset from where to read */
-{
-	off_t	offs;
-#if defined(HAVE_PREAD) && !defined(HAVE_BROKEN_PREAD)
-	ssize_t	n_bytes;
-#endif /* HAVE_PREAD && !HAVE_BROKEN_PREAD */
-
-	ut_ad(n);
-
-	/* If off_t is > 4 bytes in size, then we assume we can pass a
-	64-bit address */
-	offs = (off_t) offset;
-
-	if (sizeof(off_t) <= 4) {
-		if (offset != (os_offset_t) offs) {
-			ib_logf(IB_LOG_LEVEL_ERROR,
-				"File read at offset > 4 GB");
-		}
-	}
-
-	os_n_file_reads++;
-
-#if defined(HAVE_PREAD) && !defined(HAVE_BROKEN_PREAD)
-#if defined(HAVE_ATOMIC_BUILTINS) && UNIV_WORD_SIZE == 8
-	(void) os_atomic_increment_ulint(&os_n_pending_reads, 1);
-	(void) os_atomic_increment_ulint(&os_file_n_pending_preads, 1);
-	MONITOR_ATOMIC_INC(MONITOR_OS_PENDING_READS);
-#else
-	os_mutex_enter(os_file_count_mutex);
-	os_file_n_pending_preads++;
-	os_n_pending_reads++;
-	MONITOR_INC(MONITOR_OS_PENDING_READS);
-	os_mutex_exit(os_file_count_mutex);
-#endif /* HAVE_ATOMIC_BUILTINS && UNIV_WORD == 8 */
-
-	n_bytes = pread(file, buf, n, offs);
-
-#if defined(HAVE_ATOMIC_BUILTINS) && UNIV_WORD_SIZE == 8
-	(void) os_atomic_decrement_ulint(&os_n_pending_reads, 1);
-	(void) os_atomic_decrement_ulint(&os_file_n_pending_preads, 1);
-	MONITOR_ATOMIC_DEC(MONITOR_OS_PENDING_READS);
-#else
-	os_mutex_enter(os_file_count_mutex);
-	os_file_n_pending_preads--;
-	os_n_pending_reads--;
-	MONITOR_DEC(MONITOR_OS_PENDING_READS);
-	os_mutex_exit(os_file_count_mutex);
-#endif /* !HAVE_ATOMIC_BUILTINS || UNIV_WORD == 8 */
-
-	return(n_bytes);
-#else
-	{
-		off_t	ret_offset;
-		ssize_t	ret;
-#ifndef UNIV_HOTBACKUP
-		ulint	i;
-#endif /* !UNIV_HOTBACKUP */
-
-#if defined(HAVE_ATOMIC_BUILTINS) && UNIV_WORD_SIZE == 8
-		(void) os_atomic_increment_ulint(&os_n_pending_reads, 1);
-		MONITOR_ATOMIC_INC(MONITOR_OS_PENDING_READS);
-#else
-		os_mutex_enter(os_file_count_mutex);
-		os_n_pending_reads++;
-		MONITOR_INC(MONITOR_OS_PENDING_READS);
-		os_mutex_exit(os_file_count_mutex);
-#endif /* HAVE_ATOMIC_BUILTINS && UNIV_WORD == 8 */
-#ifndef UNIV_HOTBACKUP
-		/* Protect the seek / read operation with a mutex */
-		i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
-
-		os_mutex_enter(os_file_seek_mutexes[i]);
-#endif /* !UNIV_HOTBACKUP */
-
-		ret_offset = lseek(file, offs, SEEK_SET);
-
-		if (ret_offset < 0) {
-			ret = -1;
 		} else {
-			ret = read(file, buf, (ssize_t) n);
+			ib::error()
+				<< "GetCompressedFileSize("
+				<< filename << ", ..) failed.";
+
+			file_size.m_alloc_size = (os_offset_t) -1;
 		}
-
-#ifndef UNIV_HOTBACKUP
-		os_mutex_exit(os_file_seek_mutexes[i]);
-#endif /* !UNIV_HOTBACKUP */
-
-#if defined(HAVE_ATOMIC_BUILTINS) && UNIV_WORD_SIZE == 8
-		(void) os_atomic_decrement_ulint(&os_n_pending_reads, 1);
-		MONITOR_ATOIC_DEC(MONITOR_OS_PENDING_READS);
-#else
-		os_mutex_enter(os_file_count_mutex);
-		os_n_pending_reads--;
-		MONITOR_DEC(MONITOR_OS_PENDING_READS);
-		os_mutex_exit(os_file_count_mutex);
-#endif /* HAVE_ATOMIC_BUILTINS && UNIV_WORD_SIZE == 8 */
-
-		return(ret);
-	}
-#endif
-}
-
-/*******************************************************************//**
-Does a synchronous write operation in Posix.
-@return	number of bytes written, -1 if error */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-ssize_t
-os_file_pwrite(
-/*===========*/
-	os_file_t	file,	/*!< in: handle to a file */
-	const void*	buf,	/*!< in: buffer from where to write */
-	ulint		n,	/*!< in: number of bytes to write */
-	os_offset_t	offset)	/*!< in: file offset where to write */
-{
-	ssize_t	ret;
-	off_t	offs;
-
-	ut_ad(n);
-	ut_ad(!srv_read_only_mode);
-
-	/* If off_t is > 4 bytes in size, then we assume we can pass a
-	64-bit address */
-	offs = (off_t) offset;
-
-	if (sizeof(off_t) <= 4) {
-		if (offset != (os_offset_t) offs) {
-			ib_logf(IB_LOG_LEVEL_ERROR,
-				"File write at offset > 4 GB.");
-		}
-	}
-
-	os_n_file_writes++;
-
-#if defined(HAVE_PWRITE) && !defined(HAVE_BROKEN_PREAD)
-#if !defined(HAVE_ATOMIC_BUILTINS) || UNIV_WORD_SIZE < 8
-	os_mutex_enter(os_file_count_mutex);
-	os_file_n_pending_pwrites++;
-	os_n_pending_writes++;
-	MONITOR_INC(MONITOR_OS_PENDING_WRITES);
-	os_mutex_exit(os_file_count_mutex);
-#else
-	(void) os_atomic_increment_ulint(&os_n_pending_writes, 1);
-	(void) os_atomic_increment_ulint(&os_file_n_pending_pwrites, 1);
-	MONITOR_ATOMIC_INC(MONITOR_OS_PENDING_WRITES);
-#endif /* !HAVE_ATOMIC_BUILTINS || UNIV_WORD < 8 */
-
-	ret = pwrite(file, buf, (ssize_t) n, offs);
-
-#if !defined(HAVE_ATOMIC_BUILTINS) || UNIV_WORD_SIZE < 8
-	os_mutex_enter(os_file_count_mutex);
-	os_file_n_pending_pwrites--;
-	os_n_pending_writes--;
-	MONITOR_DEC(MONITOR_OS_PENDING_WRITES);
-	os_mutex_exit(os_file_count_mutex);
-#else
-	(void) os_atomic_decrement_ulint(&os_n_pending_writes, 1);
-	(void) os_atomic_decrement_ulint(&os_file_n_pending_pwrites, 1);
-	MONITOR_ATOMIC_DEC(MONITOR_OS_PENDING_WRITES);
-#endif /* !HAVE_ATOMIC_BUILTINS || UNIV_WORD < 8 */
-
-	return(ret);
-#else
-	{
-		off_t	ret_offset;
-# ifndef UNIV_HOTBACKUP
-		ulint	i;
-# endif /* !UNIV_HOTBACKUP */
-
-		os_mutex_enter(os_file_count_mutex);
-		os_n_pending_writes++;
-		MONITOR_INC(MONITOR_OS_PENDING_WRITES);
-		os_mutex_exit(os_file_count_mutex);
-
-# ifndef UNIV_HOTBACKUP
-		/* Protect the seek / write operation with a mutex */
-		i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
-
-		os_mutex_enter(os_file_seek_mutexes[i]);
-# endif /* UNIV_HOTBACKUP */
-
-		ret_offset = lseek(file, offs, SEEK_SET);
-
-		if (ret_offset < 0) {
-			ret = -1;
-
-			goto func_exit;
-		}
-
-		ret = write(file, buf, (ssize_t) n);
-
-func_exit:
-# ifndef UNIV_HOTBACKUP
-		os_mutex_exit(os_file_seek_mutexes[i]);
-# endif /* !UNIV_HOTBACKUP */
-
-		os_mutex_enter(os_file_count_mutex);
-		os_n_pending_writes--;
-		MONITOR_DEC(MONITOR_OS_PENDING_WRITES);
-		os_mutex_exit(os_file_count_mutex);
-
-		return(ret);
-	}
-#endif /* !UNIV_HOTBACKUP */
-}
-#endif
-
-/*******************************************************************//**
-NOTE! Use the corresponding macro os_file_read(), not directly this
-function!
-Requests a synchronous positioned read operation.
-@return	TRUE if request was successful, FALSE if fail */
-UNIV_INTERN
-ibool
-os_file_read_func(
-/*==============*/
-	os_file_t	file,	/*!< in: handle to a file */
-	void*		buf,	/*!< in: buffer where to read */
-	os_offset_t	offset,	/*!< in: file offset where to read */
-	ulint		n)	/*!< in: number of bytes to read */
-{
-#ifdef __WIN__
-	BOOL		ret;
-	DWORD		len;
-	DWORD		ret2;
-	DWORD		low;
-	DWORD		high;
-	ibool		retry;
-#ifndef UNIV_HOTBACKUP
-	ulint		i;
-#endif /* !UNIV_HOTBACKUP */
-
-	/* On 64-bit Windows, ulint is 64 bits. But offset and n should be
-	no more than 32 bits. */
-	ut_a((n & 0xFFFFFFFFUL) == n);
-
-	os_n_file_reads++;
-	os_bytes_read_since_printout += n;
-
-try_again:
-	ut_ad(buf);
-	ut_ad(n > 0);
-
-	low = (DWORD) offset & 0xFFFFFFFF;
-	high = (DWORD) (offset >> 32);
-
-	os_mutex_enter(os_file_count_mutex);
-	os_n_pending_reads++;
-	MONITOR_INC(MONITOR_OS_PENDING_READS);
-	os_mutex_exit(os_file_count_mutex);
-
-#ifndef UNIV_HOTBACKUP
-	/* Protect the seek / read operation with a mutex */
-	i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
-
-	os_mutex_enter(os_file_seek_mutexes[i]);
-#endif /* !UNIV_HOTBACKUP */
-
-	ret2 = SetFilePointer(
-		file, low, reinterpret_cast<PLONG>(&high), FILE_BEGIN);
-
-	if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
-
-#ifndef UNIV_HOTBACKUP
-		os_mutex_exit(os_file_seek_mutexes[i]);
-#endif /* !UNIV_HOTBACKUP */
-
-		os_mutex_enter(os_file_count_mutex);
-		os_n_pending_reads--;
-		MONITOR_DEC(MONITOR_OS_PENDING_READS);
-		os_mutex_exit(os_file_count_mutex);
-
-		goto error_handling;
-	}
-
-	ret = ReadFile(file, buf, (DWORD) n, &len, NULL);
-
-#ifndef UNIV_HOTBACKUP
-	os_mutex_exit(os_file_seek_mutexes[i]);
-#endif /* !UNIV_HOTBACKUP */
-
-	os_mutex_enter(os_file_count_mutex);
-	os_n_pending_reads--;
-	MONITOR_DEC(MONITOR_OS_PENDING_READS);
-	os_mutex_exit(os_file_count_mutex);
-
-	if (ret && len == n) {
-		return(TRUE);
-	}
-#else /* __WIN__ */
-	ibool	retry;
-	ssize_t	ret;
-
-	os_bytes_read_since_printout += n;
-
-try_again:
-	ret = os_file_pread(file, buf, n, offset);
-
-	if ((ulint) ret == n) {
-		return(TRUE);
-	} else if (ret == -1) {
-                ib_logf(IB_LOG_LEVEL_ERROR,
-			"Error in system call pread(). The operating"
-			" system error number is %lu.",(ulint) errno);
-        } else {
-		/* Partial read occurred */
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Tried to read " ULINTPF " bytes at offset "
-			UINT64PF ". Was only able to read %ld.",
-			n, offset, (lint) ret);
-	}
-#endif /* __WIN__ */
-#ifdef __WIN__
-error_handling:
-#endif
-	retry = os_file_handle_error(NULL, "read", __FILE__, __LINE__);
-
-	if (retry) {
-		goto try_again;
-	}
-
-	fprintf(stderr,
-		"InnoDB: Fatal error: cannot read from file."
-		" OS error number %lu.\n",
-#ifdef __WIN__
-		(ulong) GetLastError()
-#else
-		(ulong) errno
-#endif /* __WIN__ */
-		);
-	fflush(stderr);
-
-	ut_error;
-
-	return(FALSE);
-}
-
-/*******************************************************************//**
-NOTE! Use the corresponding macro os_file_read_no_error_handling(),
-not directly this function!
-Requests a synchronous positioned read operation. This function does not do
-any error handling. In case of error it returns FALSE.
-@return	TRUE if request was successful, FALSE if fail */
-UNIV_INTERN
-ibool
-os_file_read_no_error_handling_func(
-/*================================*/
-	os_file_t	file,	/*!< in: handle to a file */
-	void*		buf,	/*!< in: buffer where to read */
-	os_offset_t	offset,	/*!< in: file offset where to read */
-	ulint		n)	/*!< in: number of bytes to read */
-{
-#ifdef __WIN__
-	BOOL		ret;
-	DWORD		len;
-	DWORD		ret2;
-	DWORD		low;
-	DWORD		high;
-	ibool		retry;
-#ifndef UNIV_HOTBACKUP
-	ulint		i;
-#endif /* !UNIV_HOTBACKUP */
-
-	/* On 64-bit Windows, ulint is 64 bits. But offset and n should be
-	no more than 32 bits. */
-	ut_a((n & 0xFFFFFFFFUL) == n);
-
-	os_n_file_reads++;
-	os_bytes_read_since_printout += n;
-
-try_again:
-	ut_ad(buf);
-	ut_ad(n > 0);
-
-	low = (DWORD) offset & 0xFFFFFFFF;
-	high = (DWORD) (offset >> 32);
-
-	os_mutex_enter(os_file_count_mutex);
-	os_n_pending_reads++;
-	MONITOR_INC(MONITOR_OS_PENDING_READS);
-	os_mutex_exit(os_file_count_mutex);
-
-#ifndef UNIV_HOTBACKUP
-	/* Protect the seek / read operation with a mutex */
-	i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
-
-	os_mutex_enter(os_file_seek_mutexes[i]);
-#endif /* !UNIV_HOTBACKUP */
-
-	ret2 = SetFilePointer(
-		file, low, reinterpret_cast<PLONG>(&high), FILE_BEGIN);
-
-	if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
-
-#ifndef UNIV_HOTBACKUP
-		os_mutex_exit(os_file_seek_mutexes[i]);
-#endif /* !UNIV_HOTBACKUP */
-
-		os_mutex_enter(os_file_count_mutex);
-		os_n_pending_reads--;
-		MONITOR_DEC(MONITOR_OS_PENDING_READS);
-		os_mutex_exit(os_file_count_mutex);
-
-		goto error_handling;
-	}
-
-	ret = ReadFile(file, buf, (DWORD) n, &len, NULL);
-
-#ifndef UNIV_HOTBACKUP
-	os_mutex_exit(os_file_seek_mutexes[i]);
-#endif /* !UNIV_HOTBACKUP */
-
-	os_mutex_enter(os_file_count_mutex);
-	os_n_pending_reads--;
-	MONITOR_DEC(MONITOR_OS_PENDING_READS);
-	os_mutex_exit(os_file_count_mutex);
-
-	if (ret && len == n) {
-		return(TRUE);
-	}
-#else /* __WIN__ */
-	ibool	retry;
-	ssize_t	ret;
-
-	os_bytes_read_since_printout += n;
-
-try_again:
-	ret = os_file_pread(file, buf, n, offset);
-
-	if ((ulint) ret == n) {
-		return(TRUE);
-	} else if (ret == -1) {
-                ib_logf(IB_LOG_LEVEL_ERROR,
-			"Error in system call pread(). The operating"
-			" system error number is %lu.",(ulint) errno);
-        } else {
-		/* Partial read occurred */
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Tried to read " ULINTPF " bytes at offset "
-			UINT64PF ". Was only able to read %ld.",
-			n, offset, (lint) ret);
-	}
-#endif /* __WIN__ */
-#ifdef __WIN__
-error_handling:
-#endif
-	retry = os_file_handle_error_no_exit(NULL, "read", FALSE, __FILE__, __LINE__);
-
-	if (retry) {
-		goto try_again;
-	}
-
-	return(FALSE);
-}
-
-/*******************************************************************//**
-Rewind file to its start, read at most size - 1 bytes from it to str, and
-NUL-terminate str. All errors are silently ignored. This function is
-mostly meant to be used with temporary files. */
-UNIV_INTERN
-void
-os_file_read_string(
-/*================*/
-	FILE*	file,	/*!< in: file to read from */
-	char*	str,	/*!< in: buffer where to read */
-	ulint	size)	/*!< in: size of buffer */
-{
-	size_t	flen;
-
-	if (size == 0) {
-		return;
-	}
-
-	rewind(file);
-	flen = fread(str, 1, size - 1, file);
-	str[flen] = '\0';
-}
-
-/*******************************************************************//**
-NOTE! Use the corresponding macro os_file_write(), not directly
-this function!
-Requests a synchronous write operation.
-@return	TRUE if request was successful, FALSE if fail */
-UNIV_INTERN
-ibool
-os_file_write_func(
-/*===============*/
-	const char*	name,	/*!< in: name of the file or path as a
-				null-terminated string */
-	os_file_t	file,	/*!< in: handle to a file */
-	const void*	buf,	/*!< in: buffer from which to write */
-	os_offset_t	offset,	/*!< in: file offset where to write */
-	ulint		n)	/*!< in: number of bytes to write */
-{
-	ut_ad(!srv_read_only_mode);
-
-#ifdef __WIN__
-	BOOL		ret;
-	DWORD		len;
-	DWORD		ret2;
-	DWORD		low;
-	DWORD		high;
-	ulint		n_retries	= 0;
-	ulint		err;
-	DWORD		saved_error = 0;
-#ifndef UNIV_HOTBACKUP
-	ulint		i;
-#endif /* !UNIV_HOTBACKUP */
-
-	/* On 64-bit Windows, ulint is 64 bits. But offset and n should be
-	no more than 32 bits. */
-	ut_a((n & 0xFFFFFFFFUL) == n);
-
-	os_n_file_writes++;
-
-	ut_ad(buf);
-	ut_ad(n > 0);
-
-retry:
-	low = (DWORD) offset & 0xFFFFFFFF;
-	high = (DWORD) (offset >> 32);
-
-	os_mutex_enter(os_file_count_mutex);
-	os_n_pending_writes++;
-	MONITOR_INC(MONITOR_OS_PENDING_WRITES);
-	os_mutex_exit(os_file_count_mutex);
-
-#ifndef UNIV_HOTBACKUP
-	/* Protect the seek / write operation with a mutex */
-	i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
-
-	os_mutex_enter(os_file_seek_mutexes[i]);
-#endif /* !UNIV_HOTBACKUP */
-
-	ret2 = SetFilePointer(
-		file, low, reinterpret_cast<PLONG>(&high), FILE_BEGIN);
-
-	if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
-
-#ifndef UNIV_HOTBACKUP
-		os_mutex_exit(os_file_seek_mutexes[i]);
-#endif /* !UNIV_HOTBACKUP */
-
-		os_mutex_enter(os_file_count_mutex);
-		os_n_pending_writes--;
-		MONITOR_DEC(MONITOR_OS_PENDING_WRITES);
-		os_mutex_exit(os_file_count_mutex);
-
-		ut_print_timestamp(stderr);
-
-		fprintf(stderr,
-			" InnoDB: Error: File pointer positioning to"
-			" file %s failed at\n"
-			"InnoDB: offset %llu. Operating system"
-			" error number %lu.\n"
-			"InnoDB: Some operating system error numbers"
-			" are described at\n"
-			"InnoDB: "
-			REFMAN "operating-system-error-codes.html\n",
-			name, offset, (ulong) GetLastError());
-
-		return(FALSE);
-	}
-
-	ret = WriteFile(file, buf, (DWORD) n, &len, NULL);
-
-#ifndef UNIV_HOTBACKUP
-	os_mutex_exit(os_file_seek_mutexes[i]);
-#endif /* !UNIV_HOTBACKUP */
-
-	os_mutex_enter(os_file_count_mutex);
-	os_n_pending_writes--;
-	MONITOR_DEC(MONITOR_OS_PENDING_WRITES);
-	os_mutex_exit(os_file_count_mutex);
-
-	if (ret && len == n) {
-
-		return(TRUE);
-	}
-
-	/* If some background file system backup tool is running, then, at
-	least in Windows 2000, we may get here a specific error. Let us
-	retry the operation 100 times, with 1 second waits. */
-
-	if (GetLastError() == ERROR_LOCK_VIOLATION && n_retries < 100) {
-
-		os_thread_sleep(1000000);
-
-		n_retries++;
-
-		goto retry;
-	}
-
-	if (!os_has_said_disk_full) {
-		char *winmsg = NULL;
-
-		saved_error = GetLastError();
-		err = (ulint) saved_error;
-
-		ut_print_timestamp(stderr);
-
-		fprintf(stderr,
-			" InnoDB: Error: Write to file %s failed"
-			" at offset %llu.\n"
-			"InnoDB: %lu bytes should have been written,"
-			" only %lu were written.\n"
-			"InnoDB: Operating system error number %lu.\n"
-			"InnoDB: Check that your OS and file system"
-			" support files of this size.\n"
-			"InnoDB: Check also that the disk is not full"
-			" or a disk quota exceeded.\n",
-			name, offset,
-			(ulong) n, (ulong) len, (ulong) err);
-
-		/* Ask Windows to prepare a standard message for a
-		GetLastError() */
-
-		FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER |
-			FORMAT_MESSAGE_FROM_SYSTEM |
-			FORMAT_MESSAGE_IGNORE_INSERTS,
-			NULL, saved_error,
-			MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
-			(LPSTR)&winmsg, 0, NULL);
-
-		if (winmsg) {
-			fprintf(stderr,
-				"InnoDB: FormatMessage: Error number %lu means '%s'.\n",
-				(ulong) saved_error, winmsg);
-			LocalFree(winmsg);
-		}
-
-		if (strerror((int) err) != NULL) {
-			fprintf(stderr,
-				"InnoDB: Error number %lu means '%s'.\n",
-				(ulong) err, strerror((int) err));
-		}
-
-		fprintf(stderr,
-			"InnoDB: Some operating system error numbers"
-			" are described at\n"
-			"InnoDB: "
-			REFMAN "operating-system-error-codes.html\n");
-
-		os_has_said_disk_full = TRUE;
-	}
-
-	return(FALSE);
-#else
-	ssize_t	ret;
-	WAIT_ALLOW_WRITES();
-
-	ret = os_file_pwrite(file, buf, n, offset);
-
-	if ((ulint) ret == n) {
-
-		return(TRUE);
-	}
-
-	if (!os_has_said_disk_full) {
-		ut_print_timestamp(stderr);
-
-		if(ret == -1) {
-			ib_logf(IB_LOG_LEVEL_ERROR,
-				"Failure of system call pwrite(). Operating"
-				" system error number is %lu.",
-				(ulint) errno);
-		} else {
-			fprintf(stderr,
-				" InnoDB: Error: Write to file %s failed"
-				" at offset " UINT64PF ".\n"
-				"InnoDB: %lu bytes should have been written,"
-				" only %ld were written.\n"
-				"InnoDB: Operating system error number %lu.\n"
-				"InnoDB: Check that your OS and file system"
-				" support files of this size.\n"
-				"InnoDB: Check also that the disk is not full"
-				" or a disk quota exceeded.\n",
-				name, offset, n, (lint) ret,
-				(ulint) errno);
-		}
-
-		if (strerror(errno) != NULL) {
-			fprintf(stderr,
-				"InnoDB: Error number %d means '%s'.\n",
-				errno, strerror(errno));
-		}
-
-		fprintf(stderr,
-			"InnoDB: Some operating system error numbers"
-			" are described at\n"
-			"InnoDB: "
-			REFMAN "operating-system-error-codes.html\n");
-
-		os_has_said_disk_full = TRUE;
-	}
-
-	return(FALSE);
-#endif
-}
-
-/*******************************************************************//**
-Check the existence and type of the given file.
-@return	TRUE if call succeeded */
-UNIV_INTERN
-ibool
-os_file_status(
-/*===========*/
-	const char*	path,	/*!< in: pathname of the file */
-	ibool*		exists,	/*!< out: TRUE if file exists */
-	os_file_type_t* type)	/*!< out: type of the file (if it exists) */
-{
-#ifdef __WIN__
-	int		ret;
-	struct _stat64	statinfo;
-
-	ret = _stat64(path, &statinfo);
-	if (ret && (errno == ENOENT || errno == ENOTDIR || errno == ENAMETOOLONG)) {
-		/* file does not exist */
-		*exists = FALSE;
-		return(TRUE);
-	} else if (ret) {
-		/* file exists, but stat call failed */
-
-		os_file_handle_error_no_exit(path, "stat", FALSE, __FILE__, __LINE__);
-
-		return(FALSE);
-	}
-
-	if (_S_IFDIR & statinfo.st_mode) {
-		*type = OS_FILE_TYPE_DIR;
-	} else if (_S_IFREG & statinfo.st_mode) {
-		*type = OS_FILE_TYPE_FILE;
 	} else {
-		*type = OS_FILE_TYPE_UNKNOWN;
+		file_size.m_total_size = ~0;
+		file_size.m_alloc_size = (os_offset_t) ret;
 	}
 
-	*exists = TRUE;
-
-	return(TRUE);
-#else
-	int		ret;
-	struct stat	statinfo;
-
-	ret = stat(path, &statinfo);
-	if (ret && (errno == ENOENT || errno == ENOTDIR || errno == ENAMETOOLONG)) {
-		/* file does not exist */
-		*exists = FALSE;
-		return(TRUE);
-	} else if (ret) {
-		/* file exists, but stat call failed */
-
-		os_file_handle_error_no_exit(path, "stat", FALSE, __FILE__, __LINE__);
-
-		return(FALSE);
-	}
-
-	if (S_ISDIR(statinfo.st_mode)) {
-		*type = OS_FILE_TYPE_DIR;
-	} else if (S_ISLNK(statinfo.st_mode)) {
-		*type = OS_FILE_TYPE_LINK;
-	} else if (S_ISREG(statinfo.st_mode)) {
-		*type = OS_FILE_TYPE_FILE;
-	} else {
-		*type = OS_FILE_TYPE_UNKNOWN;
-	}
-
-	*exists = TRUE;
-
-	return(TRUE);
-#endif
+	return(file_size);
 }
 
-/*******************************************************************//**
-This function returns information about the specified file
-@return	DB_SUCCESS if all OK */
-UNIV_INTERN
+/** This function returns information about the specified file
+@param[in]	path		pathname of the file
+@param[out]	stat_info	information of a file in a directory
+@param[in,out]	statinfo	information of a file in a directory
+@param[in]	check_rw_perm	for testing whether the file can be opened
+				in RW mode
+@param[in]	read_only	true if the file is opened in read-only mode
+@return DB_SUCCESS if all OK */
+static
 dberr_t
-os_file_get_status(
-/*===============*/
-	const char*	path,		/*!< in:	pathname of the file */
-	os_file_stat_t* stat_info,	/*!< information of a file in a
-					directory */
-	bool		check_rw_perm)	/*!< in: for testing whether the
-					file can be opened in RW mode */
+os_file_get_status_win32(
+	const char*	path,
+	os_file_stat_t* stat_info,
+	struct _stat64*	statinfo,
+	bool		check_rw_perm,
+	bool		read_only)
 {
-	int		ret;
-
-#ifdef __WIN__
-	struct _stat64	statinfo;
-
-	ret = _stat64(path, &statinfo);
+	int	ret = _stat64(path, statinfo);
 
 	if (ret && (errno == ENOENT || errno == ENOTDIR)) {
 		/* file does not exist */
@@ -3420,17 +5318,19 @@ os_file_get_status(
 	} else if (ret) {
 		/* file exists, but stat call failed */
 
-		os_file_handle_error_no_exit(path, "stat", FALSE, __FILE__, __LINE__);
+		os_file_handle_error_no_exit(path, "STAT", false);
 
 		return(DB_FAIL);
 
-	} else if (_S_IFDIR & statinfo.st_mode) {
+	} else if (_S_IFDIR & statinfo->st_mode) {
+
 		stat_info->type = OS_FILE_TYPE_DIR;
-	} else if (_S_IFREG & statinfo.st_mode) {
+
+	} else if (_S_IFREG & statinfo->st_mode) {
 
 		DWORD	access = GENERIC_READ;
 
-		if (!srv_read_only_mode) {
+		if (!read_only) {
 			access |= GENERIC_WRITE;
 		}
 
@@ -3457,731 +5357,1330 @@ os_file_get_status(
 				CloseHandle(fh);
 			}
 		}
+
+		char	volname[MAX_PATH];
+		BOOL	result = GetVolumePathName(path, volname, MAX_PATH);
+
+		if (!result) {
+
+			ib::error()
+				<< "os_file_get_status_win32: "
+				<< "Failed to get the volume path name for: "
+				<< path
+				<< "- OS error number " << GetLastError();
+
+			return(DB_FAIL);
+		}
+
+		DWORD	sectorsPerCluster;
+		DWORD	bytesPerSector;
+		DWORD	numberOfFreeClusters;
+		DWORD	totalNumberOfClusters;
+
+		result = GetDiskFreeSpace(
+			(LPCSTR) volname,
+			&sectorsPerCluster,
+			&bytesPerSector,
+			&numberOfFreeClusters,
+			&totalNumberOfClusters);
+
+		if (!result) {
+
+			ib::error()
+				<< "GetDiskFreeSpace(" << volname << ",...) "
+				<< "failed "
+				<< "- OS error number " << GetLastError();
+
+			return(DB_FAIL);
+		}
+
+		stat_info->block_size = bytesPerSector * sectorsPerCluster;
+
+		/* On Windows the block size is not used as the allocation
+		unit for sparse files. The underlying infra-structure for
+		sparse files is based on NTFS compression. The punch hole
+		is done on a "compression unit". This compression unit
+		is based on the cluster size. You cannot punch a hole if
+		the cluster size >= 8K. For smaller sizes the table is
+		as follows:
+
+		Cluster Size	Compression Unit
+		512 Bytes		 8 KB
+		  1 KB			16 KB
+		  2 KB			32 KB
+		  4 KB			64 KB
+
+		Default NTFS cluster size is 4K, compression unit size of 64K.
+		Therefore unless the user has created the file system with
+		a smaller cluster size and used larger page sizes there is
+		little benefit from compression out of the box. */
+
+		stat_info->block_size = (stat_info->block_size <= 4096)
+			?  stat_info->block_size * 16 : ULINT_UNDEFINED;
 	} else {
 		stat_info->type = OS_FILE_TYPE_UNKNOWN;
 	}
-#else
-	struct stat	statinfo;
-
-	ret = stat(path, &statinfo);
-
-	if (ret && (errno == ENOENT || errno == ENOTDIR)) {
-		/* file does not exist */
-
-		return(DB_NOT_FOUND);
-
-	} else if (ret) {
-		/* file exists, but stat call failed */
-
-		os_file_handle_error_no_exit(path, "stat", FALSE, __FILE__, __LINE__);
-
-		return(DB_FAIL);
-
-	}
-
-	switch (statinfo.st_mode & S_IFMT) {
-	case S_IFDIR:
-		stat_info->type = OS_FILE_TYPE_DIR;
-		break;
-	case S_IFLNK:
-		stat_info->type = OS_FILE_TYPE_LINK;
-		break;
-	case S_IFBLK:
-		/* Handle block device as regular file. */
-	case S_IFCHR:
-		/* Handle character device as regular file. */
-	case S_IFREG:
-		stat_info->type = OS_FILE_TYPE_FILE;
-		break;
-	default:
-		stat_info->type = OS_FILE_TYPE_UNKNOWN;
-	}
-
-
-	if (check_rw_perm && stat_info->type == OS_FILE_TYPE_FILE) {
-
-		int	fh;
-		int	access;
-
-		access = !srv_read_only_mode ? O_RDWR : O_RDONLY;
-
-		fh = ::open(path, access, os_innodb_umask);
-
-		if (fh == -1) {
-			stat_info->rw_perm = false;
-		} else {
-			stat_info->rw_perm = true;
-			close(fh);
-		}
-	}
-
-#endif /* _WIN_ */
-
-	stat_info->ctime = statinfo.st_ctime;
-	stat_info->atime = statinfo.st_atime;
-	stat_info->mtime = statinfo.st_mtime;
-	stat_info->size  = statinfo.st_size;
 
 	return(DB_SUCCESS);
 }
 
-/* path name separator character */
-#ifdef __WIN__
-#  define OS_FILE_PATH_SEPARATOR	'\\'
-#else
-#  define OS_FILE_PATH_SEPARATOR	'/'
-#endif
-
-/****************************************************************//**
-This function returns a new path name after replacing the basename
-in an old path with a new basename.  The old_path is a full path
-name including the extension.  The tablename is in the normal
-form "databasename/tablename".  The new base name is found after
-the forward slash.  Both input strings are null terminated.
-
-This function allocates memory to be returned.  It is the callers
-responsibility to free the return value after it is no longer needed.
-
-@return	own: new full pathname */
-UNIV_INTERN
-char*
-os_file_make_new_pathname(
-/*======================*/
-	const char*	old_path,	/*!< in: pathname */
-	const char*	tablename)	/*!< in: contains new base name */
+/** Truncates a file to a specified size in bytes.
+Do nothing if the size to preserve is greater or equal to the current
+size of the file.
+@param[in]	pathname	file path
+@param[in]	file		file to be truncated
+@param[in]	size		size to preserve in bytes
+@return true if success */
+static
+bool
+os_file_truncate_win32(
+	const char*	pathname,
+	os_file_t	file,
+	os_offset_t	size)
 {
-	ulint		dir_len;
-	char*		last_slash;
-	char*		base_name;
-	char*		new_path;
-	ulint		new_path_len;
+	LARGE_INTEGER	length;
 
-	/* Split the tablename into its database and table name components.
-	They are separated by a '/'. */
-	last_slash = strrchr((char*) tablename, '/');
-	base_name = last_slash ? last_slash + 1 : (char*) tablename;
+	length.QuadPart = size;
 
-	/* Find the offset of the last slash. We will strip off the
-	old basename.ibd which starts after that slash. */
-	last_slash = strrchr((char*) old_path, OS_FILE_PATH_SEPARATOR);
-	dir_len = last_slash ? last_slash - old_path : strlen(old_path);
-
-	/* allocate a new path and move the old directory path to it. */
-	new_path_len = dir_len + strlen(base_name) + sizeof "/.ibd";
-	new_path = static_cast<char*>(mem_alloc(new_path_len));
-	memcpy(new_path, old_path, dir_len);
-
-	ut_snprintf(new_path + dir_len,
-		    new_path_len - dir_len,
-		    "%c%s.ibd",
-		    OS_FILE_PATH_SEPARATOR,
-		    base_name);
-
-	return(new_path);
-}
-
-/****************************************************************//**
-This function returns a remote path name by combining a data directory
-path provided in a DATA DIRECTORY clause with the tablename which is
-in the form 'database/tablename'.  It strips the file basename (which
-is the tablename) found after the last directory in the path provided.
-The full filepath created will include the database name as a directory
-under the path provided.  The filename is the tablename with the '.ibd'
-extension. All input and output strings are null-terminated.
-
-This function allocates memory to be returned.  It is the callers
-responsibility to free the return value after it is no longer needed.
-
-@return	own: A full pathname; data_dir_path/databasename/tablename.ibd */
-UNIV_INTERN
-char*
-os_file_make_remote_pathname(
-/*=========================*/
-	const char*	data_dir_path,	/*!< in: pathname */
-	const char*	tablename,	/*!< in: tablename */
-	const char*	extention)	/*!< in: file extention; ibd,cfg */
-{
-	ulint		data_dir_len;
-	char*		last_slash;
-	char*		new_path;
-	ulint		new_path_len;
-
-	ut_ad(extention && strlen(extention) == 3);
-
-	/* Find the offset of the last slash. We will strip off the
-	old basename or tablename which starts after that slash. */
-	last_slash = strrchr((char*) data_dir_path, OS_FILE_PATH_SEPARATOR);
-	data_dir_len = last_slash ? last_slash - data_dir_path : strlen(data_dir_path);
-
-	/* allocate a new path and move the old directory path to it. */
-	new_path_len = data_dir_len + strlen(tablename)
-		       + sizeof "/." + strlen(extention);
-	new_path = static_cast<char*>(mem_alloc(new_path_len));
-	memcpy(new_path, data_dir_path, data_dir_len);
-	ut_snprintf(new_path + data_dir_len,
-		    new_path_len - data_dir_len,
-		    "%c%s.%s",
-		    OS_FILE_PATH_SEPARATOR,
-		    tablename,
-		    extention);
-
-	srv_normalize_path_for_win(new_path);
-
-	return(new_path);
-}
-
-/****************************************************************//**
-This function reduces a null-terminated full remote path name into
-the path that is sent by MySQL for DATA DIRECTORY clause.  It replaces
-the 'databasename/tablename.ibd' found at the end of the path with just
-'tablename'.
-
-Since the result is always smaller than the path sent in, no new memory
-is allocated. The caller should allocate memory for the path sent in.
-This function manipulates that path in place.
-
-If the path format is not as expected, just return.  The result is used
-to inform a SHOW CREATE TABLE command. */
-UNIV_INTERN
-void
-os_file_make_data_dir_path(
-/*========================*/
-	char*	data_dir_path)	/*!< in/out: full path/data_dir_path */
-{
-	char*	ptr;
-	char*	tablename;
-	ulint	tablename_len;
-
-	/* Replace the period before the extension with a null byte. */
-	ptr = strrchr((char*) data_dir_path, '.');
-	if (!ptr) {
-		return;
-	}
-	ptr[0] = '\0';
-
-	/* The tablename starts after the last slash. */
-	ptr = strrchr((char*) data_dir_path, OS_FILE_PATH_SEPARATOR);
-	if (!ptr) {
-		return;
-	}
-	ptr[0] = '\0';
-	tablename = ptr + 1;
-
-	/* The databasename starts after the next to last slash. */
-	ptr = strrchr((char*) data_dir_path, OS_FILE_PATH_SEPARATOR);
-	if (!ptr) {
-		return;
-	}
-	tablename_len = ut_strlen(tablename);
-
-	ut_memmove(++ptr, tablename, tablename_len);
-
-	ptr[tablename_len] = '\0';
-}
-
-/****************************************************************//**
-The function os_file_dirname returns a directory component of a
-null-terminated pathname string. In the usual case, dirname returns
-the string up to, but not including, the final '/', and basename
-is the component following the final '/'. Trailing '/' characters
-are not counted as part of the pathname.
-
-If path does not contain a slash, dirname returns the string ".".
-
-Concatenating the string returned by dirname, a "/", and the basename
-yields a complete pathname.
-
-The return value is a copy of the directory component of the pathname.
-The copy is allocated from heap. It is the caller responsibility
-to free it after it is no longer needed.
-
-The following list of examples (taken from SUSv2) shows the strings
-returned by dirname and basename for different paths:
-
-       path	      dirname	     basename
-       "/usr/lib"     "/usr"	     "lib"
-       "/usr/"	      "/"	     "usr"
-       "usr"	      "."	     "usr"
-       "/"	      "/"	     "/"
-       "."	      "."	     "."
-       ".."	      "."	     ".."
-
-@return	own: directory component of the pathname */
-UNIV_INTERN
-char*
-os_file_dirname(
-/*============*/
-	const char*	path)	/*!< in: pathname */
-{
-	/* Find the offset of the last slash */
-	const char* last_slash = strrchr(path, OS_FILE_PATH_SEPARATOR);
-	if (!last_slash) {
-		/* No slash in the path, return "." */
-
-		return(mem_strdup("."));
-	}
-
-	/* Ok, there is a slash */
-
-	if (last_slash == path) {
-		/* last slash is the first char of the path */
-
-		return(mem_strdup("/"));
-	}
-
-	/* Non-trivial directory component */
-
-	return(mem_strdupl(path, last_slash - path));
-}
-
-/****************************************************************//**
-Creates all missing subdirectories along the given path.
-@return	TRUE if call succeeded FALSE otherwise */
-UNIV_INTERN
-ibool
-os_file_create_subdirs_if_needed(
-/*=============================*/
-	const char*	path)	/*!< in: path name */
-{
-	if (srv_read_only_mode) {
-
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"read only mode set. Can't create subdirectories '%s'",
-			path);
-
-		return(FALSE);
-
-	}
-
-	char*	subdir = os_file_dirname(path);
-
-	if (strlen(subdir) == 1
-	    && (*subdir == OS_FILE_PATH_SEPARATOR || *subdir == '.')) {
-		/* subdir is root or cwd, nothing to do */
-		mem_free(subdir);
-
-		return(TRUE);
-	}
-
-	/* Test if subdir exists */
-	os_file_type_t	type;
-	ibool	subdir_exists;
-	ibool	success = os_file_status(subdir, &subdir_exists, &type);
-
-	if (success && !subdir_exists) {
-
-		/* subdir does not exist, create it */
-		success = os_file_create_subdirs_if_needed(subdir);
+	BOOL	success = SetFilePointerEx(file, length, NULL, FILE_BEGIN);
 
+	if (!success) {
+		os_file_handle_error_no_exit(
+			pathname, "SetFilePointerEx", false);
+	} else {
+		success = SetEndOfFile(file);
 		if (!success) {
-			mem_free(subdir);
-
-			return(FALSE);
+			os_file_handle_error_no_exit(
+				pathname, "SetEndOfFile", false);
 		}
-
-		success = os_file_create_directory(subdir, FALSE);
 	}
-
-	mem_free(subdir);
-
 	return(success);
 }
 
-#ifndef UNIV_HOTBACKUP
-/****************************************************************//**
-Returns a pointer to the nth slot in the aio array.
-@return	pointer to slot */
-static
-os_aio_slot_t*
-os_aio_array_get_nth_slot(
-/*======================*/
-	os_aio_array_t*		array,	/*!< in: aio array */
-	ulint			index)	/*!< in: index of the slot */
+/** Truncates a file at its current position.
+@param[in]	file		Handle to be truncated
+@return true if success */
+bool
+os_file_set_eof(
+	FILE*		file)
 {
-	ut_a(index < array->n_slots);
+	HANDLE	h = (HANDLE) _get_osfhandle(fileno(file));
 
-	return(&array->slots[index]);
+	return(SetEndOfFile(h));
 }
 
-#if defined(LINUX_NATIVE_AIO)
-/******************************************************************//**
-Creates an io_context for native linux AIO.
-@return	TRUE on success. */
-static
-ibool
-os_aio_linux_create_io_ctx(
-/*=======================*/
-	ulint		max_events,	/*!< in: number of events. */
-	io_context_t*	io_ctx)		/*!< out: io_ctx to initialize. */
+#ifdef UNIV_HOTBACKUP
+/** Closes a file handle.
+@param[in]	file		Handle to close
+@return true if success */
+bool
+os_file_close_no_error_handling(
+	os_file_t	file)
 {
-	int	ret;
-	ulint	retries = 0;
+	return(CloseHandle(file) ? true : false);
+}
+#endif /* UNIV_HOTBACKUP */
 
-retry:
-	memset(io_ctx, 0x0, sizeof(*io_ctx));
+/** This function can be called if one wants to post a batch of reads and
+prefers an i/o-handler thread to handle them all at once later. You must
+call os_aio_simulated_wake_handler_threads later to ensure the threads
+are not left sleeping! */
+void
+os_aio_simulated_put_read_threads_to_sleep()
+{
+	AIO::simulated_put_read_threads_to_sleep();
+}
 
-	/* Initialize the io_ctx. Tell it how many pending
-	IO requests this context will handle. */
+/** This function can be called if one wants to post a batch of reads and
+prefers an i/o-handler thread to handle them all at once later. You must
+call os_aio_simulated_wake_handler_threads later to ensure the threads
+are not left sleeping! */
+void
+AIO::simulated_put_read_threads_to_sleep()
+{
+	/* The idea of putting background IO threads to sleep is only for
+	Windows when using simulated AIO. Windows XP seems to schedule
+	background threads too eagerly to allow for coalescing during
+	readahead requests. */
 
-	ret = io_setup(max_events, io_ctx);
-	if (ret == 0) {
-#if defined(UNIV_AIO_DEBUG)
-		fprintf(stderr,
-			"InnoDB: Linux native AIO:"
-			" initialized io_ctx for segment\n");
+	if (srv_use_native_aio) {
+		/* We do not use simulated AIO: do nothing */
+
+		return;
+	}
+
+	os_aio_recommend_sleep_for_read_threads	= true;
+
+	for (ulint i = 0; i < os_aio_n_segments; i++) {
+		AIO*	array;
+
+		get_array_and_local_segment(&array, i);
+
+		if (array == s_reads) {
+
+			os_event_reset(os_aio_segment_wait_events[i]);
+		}
+	}
+}
+
+#endif /* !_WIN32*/
+
+#ifdef MYSQL_COMPRESSION
+/** Validate the type, offset and number of bytes to read *
+@param[in]	type		IO flags
+@param[in]	offset		Offset from start of the file
+@param[in]	n		Number of bytes to read from offset */
+static
+void
+os_file_check_args(const IORequest& type, os_offset_t offset, ulint n)
+{
+	ut_ad(type.validate());
+
+	ut_ad(n > 0);
+
+	/* If off_t is > 4 bytes in size, then we assume we can pass a
+	64-bit address */
+	off_t		offs = static_cast<off_t>(offset);
+
+	if (sizeof(off_t) <= 4 && offset != (os_offset_t) offs) {
+
+		ib::error() << "file write at offset > 4 GB.";
+	}
+}
+#endif /* MYSQL_COMPRESSION */
+
+/** Does a syncronous read or write depending upon the type specified
+In case of partial reads/writes the function tries
+NUM_RETRIES_ON_PARTIAL_IO times to read/write the complete data.
+@param[in]	type,		IO flags
+@param[in]	file		handle to an open file
+@param[out]	buf		buffer where to read
+@param[in]	offset		file offset from the start where to read
+@param[in]	n		number of bytes to read, starting from offset
+@param[out]	err		DB_SUCCESS or error code
+@return number of bytes read/written, -1 if error */
+static MY_ATTRIBUTE((warn_unused_result))
+ssize_t
+os_file_io(
+	const IORequest&in_type,
+	os_file_t	file,
+	void*		buf,
+	ulint		n,
+	os_offset_t	offset,
+	dberr_t*	err)
+{
+	ulint		original_n = n;
+	IORequest	type = in_type;
+	byte*		compressed_page=NULL;
+	ssize_t		bytes_returned = 0;
+
+#ifdef MYSQL_COMPRESSION
+	Block*		block=NULL;
+	if (type.is_compressed()) {
+
+		/* We don't compress the first page of any file. */
+		ut_ad(offset > 0);
+
+		block  = os_file_compress_page(type, buf, &n);
+
+		compressed_page = static_cast<byte*>(
+			ut_align(block->m_ptr, UNIV_SECTOR_SIZE));
+
+	} else {
+		block = NULL;
+		compressed_page = NULL;
+	}
+#endif /* MYSQL_COMPRESSION */
+
+#ifdef MYSQL_ENCRYPTION
+	/* We do encryption after compression, since if we do encryption
+	before compression, the encrypted data will cause compression fail
+	or low compression rate. */
+	if (type.is_encrypted() && type.is_write()) {
+		/* We don't encrypt the first page of any file. */
+		Block*	compressed_block = block;
+		ut_ad(offset > 0);
+
+		block = os_file_encrypt_page(type, buf, &n);
+
+		if (compressed_block != NULL) {
+			os_free_block(compressed_block);
+		}
+	}
+#endif /* MYSQL_ENCRYPTION */
+
+	SyncFileIO	sync_file_io(file, buf, n, offset);
+
+	for (ulint i = 0; i < NUM_RETRIES_ON_PARTIAL_IO; ++i) {
+
+		ssize_t	n_bytes = sync_file_io.execute(type);
+
+		/* Check for a hard error. Not much we can do now. */
+		if (n_bytes < 0) {
+
+			break;
+
+		} else if ((ulint) n_bytes + bytes_returned == n) {
+
+			bytes_returned += n_bytes;
+
+			if (offset > 0
+			    && (type.is_compressed() || type.is_read())) {
+
+				*err = os_file_io_complete(
+					type, file,
+					reinterpret_cast<byte*>(buf),
+					compressed_page, original_n,
+					static_cast<ulint>(offset), n);
+
+			} else {
+
+				*err = DB_SUCCESS;
+			}
+#ifdef MYSQL_COMPRESSION
+			if (block != NULL) {
+				os_free_block(block);
+			}
 #endif
-		/* Success. Return now. */
-		return(TRUE);
-	}
 
-	/* If we hit EAGAIN we'll make a few attempts before failing. */
-
-	switch (ret) {
-	case -EAGAIN:
-		if (retries == 0) {
-			/* First time around. */
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				" InnoDB: Warning: io_setup() failed"
-				" with EAGAIN. Will make %d attempts"
-				" before giving up.\n",
-				OS_AIO_IO_SETUP_RETRY_ATTEMPTS);
+			return(original_n);
 		}
 
-		if (retries < OS_AIO_IO_SETUP_RETRY_ATTEMPTS) {
-			++retries;
-			fprintf(stderr,
-				"InnoDB: Warning: io_setup() attempt"
-				" %lu failed.\n",
-				retries);
-			os_thread_sleep(OS_AIO_IO_SETUP_RETRY_SLEEP);
-			goto retry;
+		/* Handle partial read/write. */
+
+		ut_ad((ulint) n_bytes + bytes_returned < n);
+
+		bytes_returned += (ulint) n_bytes;
+
+		if (!type.is_partial_io_warning_disabled()) {
+
+			const char*	op = type.is_read()
+				? "read" : "written";
+
+			ib::warn()
+				<< n
+				<< " bytes should have been " << op << ". Only "
+				<< bytes_returned
+				<< " bytes " << op << ". Retrying"
+				<< " for the remaining bytes.";
 		}
 
-		/* Have tried enough. Better call it a day. */
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: Error: io_setup() failed"
-			" with EAGAIN after %d attempts.\n",
-			OS_AIO_IO_SETUP_RETRY_ATTEMPTS);
-		break;
-
-	case -ENOSYS:
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: Error: Linux Native AIO interface"
-			" is not supported on this platform. Please"
-			" check your OS documentation and install"
-			" appropriate binary of InnoDB.\n");
-
-		break;
-
-	default:
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: Error: Linux Native AIO setup"
-			" returned following error[%d]\n", -ret);
-		break;
+		/* Advance the offset and buffer by n_bytes */
+		sync_file_io.advance(n_bytes);
 	}
 
-	fprintf(stderr,
-		"InnoDB: You can disable Linux Native AIO by"
-		" setting innodb_use_native_aio = 0 in my.cnf\n");
-	return(FALSE);
+#ifdef MYSQL_COMPRESSION
+	if (block != NULL) {
+		os_free_block(block);
+	}
+#endif
+
+	*err = DB_IO_ERROR;
+
+	if (!type.is_partial_io_warning_disabled()) {
+		ib::warn()
+			<< "Retry attempts for "
+			<< (type.is_read() ? "reading" : "writing")
+			<< " partial data failed.";
+	}
+
+	return(bytes_returned);
 }
 
-/******************************************************************//**
-Checks if the system supports native linux aio. On some kernel
-versions where native aio is supported it won't work on tmpfs. In such
-cases we can't use native aio as it is not possible to mix simulated
-and native aio.
-@return: TRUE if supported, FALSE otherwise. */
-static
-ibool
-os_aio_native_aio_supported(void)
-/*=============================*/
+/** Does a synchronous write operation in Posix.
+@param[in]	type		IO context
+@param[in]	file		handle to an open file
+@param[out]	buf		buffer from which to write
+@param[in]	n		number of bytes to read, starting from offset
+@param[in]	offset		file offset from the start where to read
+@param[out]	err		DB_SUCCESS or error code
+@return number of bytes written, -1 if error */
+static MY_ATTRIBUTE((warn_unused_result))
+ssize_t
+os_file_pwrite(
+	IORequest&	type,
+	os_file_t	file,
+	const byte*	buf,
+	ulint		n,
+	os_offset_t	offset,
+	dberr_t*	err)
 {
-	int			fd;
-	io_context_t		io_ctx;
-	char			name[1000];
+	ut_ad(type.validate());
 
-	if (!os_aio_linux_create_io_ctx(1, &io_ctx)) {
-		/* The platform does not support native aio. */
-		return(FALSE);
-	} else if (!srv_read_only_mode) {
-		/* Now check if tmpdir supports native aio ops. */
-		fd = innobase_mysql_tmpfile(NULL);
+	++os_n_file_writes;
 
-		if (fd < 0) {
-			ib_logf(IB_LOG_LEVEL_WARN,
-				"Unable to create temp file to check "
-				"native AIO support.");
+	(void) os_atomic_increment_ulint(&os_n_pending_writes, 1);
+	MONITOR_ATOMIC_INC(MONITOR_OS_PENDING_WRITES);
 
-			return(FALSE);
-		}
-	} else {
+	ssize_t	n_bytes = os_file_io(type, file, (void*) buf, n, offset, err);
 
-		srv_normalize_path_for_win(srv_log_group_home_dir);
+	(void) os_atomic_decrement_ulint(&os_n_pending_writes, 1);
+	MONITOR_ATOMIC_DEC(MONITOR_OS_PENDING_WRITES);
 
-		ulint	dirnamelen = strlen(srv_log_group_home_dir);
-		ut_a(dirnamelen < (sizeof name) - 10 - sizeof "ib_logfile");
-		memcpy(name, srv_log_group_home_dir, dirnamelen);
+	return(n_bytes);
+}
 
-		/* Add a path separator if needed. */
-		if (dirnamelen && name[dirnamelen - 1] != SRV_PATH_SEPARATOR) {
-			name[dirnamelen++] = SRV_PATH_SEPARATOR;
+/** Requests a synchronous write operation.
+@param[in]	type		IO flags
+@param[in]	file		handle to an open file
+@param[out]	buf		buffer from which to write
+@param[in]	offset		file offset from the start where to read
+@param[in]	n		number of bytes to read, starting from offset
+@return DB_SUCCESS if request was successful, false if fail */
+static MY_ATTRIBUTE((warn_unused_result))
+dberr_t
+os_file_write_page(
+	IORequest&	type,
+	const char*	name,
+	os_file_t	file,
+	const byte*	buf,
+	os_offset_t	offset,
+	ulint		n)
+{
+	dberr_t		err;
+
+	ut_ad(type.validate());
+	ut_ad(n > 0);
+	
+	ssize_t	n_bytes = os_file_pwrite(type, file, buf, n, offset, &err);
+
+	if ((ulint) n_bytes != n && !os_has_said_disk_full) {
+
+		ib::error()
+			<< "Write to file " << name << "failed at offset "
+			<< offset << ", " << n
+			<< " bytes should have been written,"
+			" only " << n_bytes << " were written."
+			" Operating system error number " << errno << "."
+			" Check that your OS and file system"
+			" support files of this size."
+			" Check also that the disk is not full"
+			" or a disk quota exceeded.";
+
+		if (strerror(errno) != NULL) {
+
+			ib::error()
+				<< "Error number " << errno
+				<< " means '" << strerror(errno) << "'";
 		}
 
-		strcpy(name + dirnamelen, "ib_logfile0");
+		ib::info() << OPERATING_SYSTEM_ERROR_MSG;
 
-		fd = ::open(name, O_RDONLY);
+		os_has_said_disk_full = true;
+	}
 
-		if (fd == -1) {
+	return(err);
+}
 
-			ib_logf(IB_LOG_LEVEL_WARN,
-				"Unable to open \"%s\" to check "
-				"native AIO read support.", name);
+/** Does a synchronous read operation in Posix.
+@param[in]	type		IO flags
+@param[in]	file		handle to an open file
+@param[out]	buf		buffer where to read
+@param[in]	offset		file offset from the start where to read
+@param[in]	n		number of bytes to read, starting from offset
+@param[out]	err		DB_SUCCESS or error code
+@return number of bytes read, -1 if error */
+static MY_ATTRIBUTE((warn_unused_result))
+ssize_t
+os_file_pread(
+	IORequest&	type,
+	os_file_t	file,
+	void*		buf,
+	ulint		n,
+	os_offset_t	offset,
+	dberr_t*	err)
+{
+	++os_n_file_reads;
 
-			return(FALSE);
+	(void) os_atomic_increment_ulint(&os_n_pending_reads, 1);
+	MONITOR_ATOMIC_INC(MONITOR_OS_PENDING_READS);
+
+	ssize_t	n_bytes = os_file_io(type, file, buf, n, offset, err);
+
+	(void) os_atomic_decrement_ulint(&os_n_pending_reads, 1);
+	MONITOR_ATOMIC_DEC(MONITOR_OS_PENDING_READS);
+
+	return(n_bytes);
+}
+
+/** Requests a synchronous positioned read operation.
+@return DB_SUCCESS if request was successful, false if fail
+@param[in]	type		IO flags
+@param[in]	file		handle to an open file
+@param[out]	buf		buffer where to read
+@param[in]	offset		file offset from the start where to read
+@param[in]	n		number of bytes to read, starting from offset
+@param[out]	o		number of bytes actually read
+@param[in]	exit_on_err	if true then exit on error
+@return DB_SUCCESS or error code */
+static MY_ATTRIBUTE((warn_unused_result))
+dberr_t
+os_file_read_page(
+	IORequest&	type,
+	os_file_t	file,
+	void*		buf,
+	os_offset_t	offset,
+	ulint		n,
+	ulint*		o,
+	bool		exit_on_err)
+{
+	dberr_t		err;
+
+	os_bytes_read_since_printout += n;
+
+	ut_ad(type.validate());
+	ut_ad(n > 0);
+
+	for (;;) {
+		ssize_t	n_bytes;
+
+		n_bytes = os_file_pread(type, file, buf, n, offset, &err);
+
+		if (o != NULL) {
+			*o = n_bytes;
+		}
+
+		if (err != DB_SUCCESS && !exit_on_err) {
+
+			return(err);
+
+		} else if ((ulint) n_bytes == n) {
+
+#ifdef MYSQL_COMPRESSION
+			/** The read will succeed but decompress can fail
+			for various reasons. */
+
+			if (type.is_compression_enabled()
+			    && !Compression::is_compressed_page(
+				    static_cast<byte*>(buf))) {
+
+				return(DB_SUCCESS);
+
+			} else {
+				return(err);
+			}
+#else
+			return(DB_SUCCESS);
+#endif /* MYSQL_COMPRESSION */
+		}
+
+		ib::error() << "Tried to read " << n
+			<< " bytes at offset " << offset
+			<< ", but was only able to read " << n_bytes;
+
+		if (exit_on_err) {
+
+			if (!os_file_handle_error(NULL, "read")) {
+				/* Hard error */
+				break;
+			}
+
+		} else if (!os_file_handle_error_no_exit(NULL, "read", false)) {
+
+			/* Hard error */
+			break;
+		}
+
+		if (n_bytes > 0 && (ulint) n_bytes < n) {
+			n -= (ulint) n_bytes;
+			offset += (ulint) n_bytes;
+			buf = reinterpret_cast<uchar*>(buf) + (ulint) n_bytes;
 		}
 	}
 
-	struct io_event	io_event;
+	ib::fatal()
+		<< "Cannot read from file. OS error number "
+		<< errno << ".";
 
-	memset(&io_event, 0x0, sizeof(io_event));
+	return(err);
+}
 
-	byte*	buf = static_cast<byte*>(ut_malloc(UNIV_PAGE_SIZE * 2));
-	byte*	ptr = static_cast<byte*>(ut_align(buf, UNIV_PAGE_SIZE));
+/** Retrieves the last error number if an error occurs in a file io function.
+The number should be retrieved before any other OS calls (because they may
+overwrite the error number). If the number is not known to this program,
+the OS error number + 100 is returned.
+@param[in]	report_all_errors	true if we want an error printed
+					for all errors
+@return error number, or OS error number + 100 */
+ulint
+os_file_get_last_error(
+	bool	report_all_errors)
+{
+	return(os_file_get_last_error_low(report_all_errors, false));
+}
 
-	struct iocb	iocb;
+/** Does error handling when a file operation fails.
+Conditionally exits (calling srv_fatal_error()) based on should_exit value
+and the error type, if should_exit is true then on_error_silent is ignored.
+@param[in]	name		name of a file or NULL
+@param[in]	operation	operation
+@param[in]	should_exit	call srv_fatal_error() on an unknown error,
+				if this parameter is true
+@param[in]	on_error_silent	if true then don't print any message to the log
+				iff it is an unknown non-fatal error
+@return true if we should retry the operation */
+static MY_ATTRIBUTE((warn_unused_result))
+bool
+os_file_handle_error_cond_exit(
+	const char*	name,
+	const char*	operation,
+	bool		should_exit,
+	bool		on_error_silent)
+{
+	ulint	err;
 
-	/* Suppress valgrind warning. */
-	memset(buf, 0x00, UNIV_PAGE_SIZE * 2);
-	memset(&iocb, 0x0, sizeof(iocb));
-
-	struct iocb*	p_iocb = &iocb;
-
-	if (!srv_read_only_mode) {
-		io_prep_pwrite(p_iocb, fd, ptr, UNIV_PAGE_SIZE, 0);
-	} else {
-		ut_a(UNIV_PAGE_SIZE >= 512);
-		io_prep_pread(p_iocb, fd, ptr, 512, 0);
-	}
-
-	int	err = io_submit(io_ctx, 1, &p_iocb);
-
-	if (err >= 1) {
-		/* Now collect the submitted IO request. */
-		err = io_getevents(io_ctx, 1, 1, &io_event, NULL);
-	}
-
-	ut_free(buf);
-	close(fd);
+	err = os_file_get_last_error_low(false, on_error_silent);
 
 	switch (err) {
-	case 1:
-		return(TRUE);
+	case OS_FILE_DISK_FULL:
+		/* We only print a warning about disk full once */
 
-	case -EINVAL:
-	case -ENOSYS:
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Linux Native AIO not supported. You can either "
-			"move %s to a file system that supports native "
-			"AIO or you can set innodb_use_native_aio to "
-			"FALSE to avoid this message.",
-			srv_read_only_mode ? name : "tmpdir");
+		if (os_has_said_disk_full) {
+
+			return(false);
+		}
+
+		/* Disk full error is reported irrespective of the
+		on_error_silent setting. */
+
+		if (name) {
+
+			ib::error()
+				<< "Encountered a problem with file '"
+				<< name << "'";
+		}
+
+		ib::error()
+			<< "Disk is full. Try to clean the disk to free space.";
+
+		os_has_said_disk_full = true;
+
+		return(false);
+
+	case OS_FILE_AIO_RESOURCES_RESERVED:
+	case OS_FILE_AIO_INTERRUPTED:
+
+		return(true);
+
+	case OS_FILE_PATH_ERROR:
+	case OS_FILE_ALREADY_EXISTS:
+	case OS_FILE_ACCESS_VIOLATION:
+
+		return(false);
+
+	case OS_FILE_SHARING_VIOLATION:
+
+		os_thread_sleep(10000000);	/* 10 sec */
+		return(true);
+
+	case OS_FILE_OPERATION_ABORTED:
+	case OS_FILE_INSUFFICIENT_RESOURCE:
+
+		os_thread_sleep(100000);	/* 100 ms */
+		return(true);
 
-		/* fall through. */
 	default:
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Linux Native AIO check on %s returned error[%d]",
-			srv_read_only_mode ? name : "tmpdir", -err);
+
+		/* If it is an operation that can crash on error then it
+		is better to ignore on_error_silent and print an error message
+		to the log. */
+
+		if (should_exit || !on_error_silent) {
+			ib::error() << "File "
+				<< (name != NULL ? name : "(unknown)")
+				<< ": '" << operation << "'"
+				" returned OS error " << err << "."
+				<< (should_exit
+				    ? " Cannot continue operation" : "");
+		}
+
+		if (should_exit) {
+			srv_fatal_error();
+		}
 	}
 
-	return(FALSE);
+	return(false);
 }
+
+/** Does error handling when a file operation fails.
+@param[in]	name		name of a file or NULL
+@param[in]	operation	operation name that failed
+@return true if we should retry the operation */
+static
+bool
+os_file_handle_error(
+	const char*	name,
+	const char*	operation)
+{
+	/* Exit in case of unknown error */
+	return(os_file_handle_error_cond_exit(name, operation, true, false));
+}
+
+/** Does error handling when a file operation fails.
+@param[in]	name		name of a file or NULL
+@param[in]	operation	operation name that failed
+@param[in]	on_error_silent	if true then don't print any message to the log.
+@return true if we should retry the operation */
+static
+bool
+os_file_handle_error_no_exit(
+	const char*	name,
+	const char*	operation,
+	bool		on_error_silent)
+{
+	/* Don't exit in case of unknown error */
+	return(os_file_handle_error_cond_exit(
+			name, operation, false, on_error_silent));
+}
+
+/** Tries to disable OS caching on an opened file descriptor.
+@param[in]	fd		file descriptor to alter
+@param[in]	file_name	file name, used in the diagnostic message
+@param[in]	name		"open" or "create"; used in the diagnostic
+				message */
+void
+os_file_set_nocache(
+	os_file_t	fd		MY_ATTRIBUTE((unused)),
+	const char*	file_name	MY_ATTRIBUTE((unused)),
+	const char*	operation_name	MY_ATTRIBUTE((unused)))
+{
+	/* some versions of Solaris may not have DIRECTIO_ON */
+#if defined(UNIV_SOLARIS) && defined(DIRECTIO_ON)
+	if (directio(fd, DIRECTIO_ON) == -1) {
+		int	errno_save = errno;
+
+		ib::error()
+			<< "Failed to set DIRECTIO_ON on file "
+			<< file_name << ": " << operation_name
+			<< strerror(errno_save) << ","
+			" continuing anyway.";
+	}
+#elif defined(O_DIRECT)
+	if (fcntl(fd, F_SETFL, O_DIRECT) == -1) {
+		int		errno_save = errno;
+		static bool	warning_message_printed = false;
+		if (errno_save == EINVAL) {
+			if (!warning_message_printed) {
+				warning_message_printed = true;
+# ifdef UNIV_LINUX
+				ib::warn()
+					<< "Failed to set O_DIRECT on file"
+					<< file_name << ";" << operation_name
+					<< ": " << strerror(errno_save) << ", "
+					<< "ccontinuing anyway. O_DIRECT is "
+					"known to result in 'Invalid argument' "
+					"on Linux on tmpfs, "
+					"see MySQL Bug#26662.";
+# else /* UNIV_LINUX */
+				goto short_warning;
+# endif /* UNIV_LINUX */
+			}
+		} else {
+# ifndef UNIV_LINUX
+short_warning:
+# endif
+			ib::warn()
+				<< "Failed to set O_DIRECT on file "
+				<< file_name << "; " << operation_name
+				<< " : " << strerror(errno_save)
+				<< " continuing anyway.";
+		}
+	}
+#endif /* defined(UNIV_SOLARIS) && defined(DIRECTIO_ON) */
+}
+
+/** Write the specified number of zeros to a newly created file.
+@param[in]	name		name of the file or path as a null-terminated
+				string
+@param[in]	file		handle to a file
+@param[in]	size		file size
+@param[in]	read_only	Enable read-only checks if true
+@return true if success */
+bool
+os_file_set_size(
+	const char*	name,
+	os_file_t	file,
+	os_offset_t	size,
+	bool		read_only)
+{
+	/* Write up to 1 megabyte at a time. */
+	ulint	buf_size = ut_min(
+		static_cast<ulint>(64),
+		static_cast<ulint>(size / UNIV_PAGE_SIZE));
+
+	buf_size *= UNIV_PAGE_SIZE;
+
+	/* Align the buffer for possible raw i/o */
+	byte*	buf2;
+
+	buf2 = static_cast<byte*>(ut_malloc_nokey(buf_size + UNIV_PAGE_SIZE));
+
+	byte*	buf = static_cast<byte*>(ut_align(buf2, UNIV_PAGE_SIZE));
+
+	/* Write buffer full of zeros */
+	memset(buf, 0, buf_size);
+
+	if (size >= (os_offset_t) 100 << 20) {
+
+		ib::info() << "Progress in MB:";
+	}
+
+	os_offset_t	current_size = 0;
+
+	while (current_size < size) {
+		ulint	n_bytes;
+
+		if (size - current_size < (os_offset_t) buf_size) {
+			n_bytes = (ulint) (size - current_size);
+		} else {
+			n_bytes = buf_size;
+		}
+
+		dberr_t		err;
+		IORequest	request(IORequest::WRITE);
+
+#ifdef UNIV_HOTBACKUP
+
+		err = os_file_write(
+			request, name, file, buf, current_size, n_bytes);
+#else
+		/* Using OS_AIO_SYNC mode on POSIX systems will result in
+		fall back to os_file_write/read. On Windows it will use
+		special mechanism to wait before it returns back. */
+
+		err = os_aio(
+			request,
+			OS_AIO_SYNC, name,
+			file, buf, current_size, n_bytes,
+			read_only, NULL, NULL, NULL);
+#endif /* UNIV_HOTBACKUP */
+
+		if (err != DB_SUCCESS) {
+
+			ut_free(buf2);
+			return(false);
+		}
+
+		/* Print about progress for each 100 MB written */
+		if ((current_size + n_bytes) / (100 << 20)
+		    != current_size / (100 << 20)) {
+
+			fprintf(stderr, " %lu00",
+				(ulong) ((current_size + n_bytes)
+					 / (100 << 20)));
+		}
+
+		current_size += n_bytes;
+	}
+
+	if (size >= (os_offset_t) 100 << 20) {
+
+		fprintf(stderr, "\n");
+	}
+
+	ut_free(buf2);
+
+	return(os_file_flush(file));
+}
+
+/** Truncates a file to a specified size in bytes.
+Do nothing if the size to preserve is greater or equal to the current
+size of the file.
+@param[in]	pathname	file path
+@param[in]	file		file to be truncated
+@param[in]	size		size to preserve in bytes
+@return true if success */
+bool
+os_file_truncate(
+	const char*	pathname,
+	os_file_t	file,
+	os_offset_t	size)
+{
+	/* Do nothing if the size preserved is larger than or equal to the
+	current size of file */
+	os_offset_t	size_bytes = os_file_get_size(file);
+
+	if (size >= size_bytes) {
+		return(true);
+	}
+
+#ifdef _WIN32
+	return(os_file_truncate_win32(pathname, file, size));
+#else /* _WIN32 */
+	return(os_file_truncate_posix(pathname, file, size));
+#endif /* _WIN32 */
+}
+
+/** NOTE! Use the corresponding macro os_file_read(), not directly this
+function!
+Requests a synchronous positioned read operation.
+@return DB_SUCCESS if request was successful, DB_IO_ERROR on failure
+@param[in]	type		IO flags
+@param[in]	file		handle to an open file
+@param[out]	buf		buffer where to read
+@param[in]	offset		file offset from the start where to read
+@param[in]	n		number of bytes to read, starting from offset
+@return DB_SUCCESS or error code */
+dberr_t
+os_file_read_func(
+	IORequest&	type,
+	os_file_t	file,
+	void*		buf,
+	os_offset_t	offset,
+	ulint		n)
+{
+	ut_ad(type.is_read());
+
+	return(os_file_read_page(type, file, buf, offset, n, NULL, true));
+}
+
+/** NOTE! Use the corresponding macro os_file_read_no_error_handling(),
+not directly this function!
+Requests a synchronous positioned read operation.
+@return DB_SUCCESS if request was successful, DB_IO_ERROR on failure
+@param[in]	type		IO flags
+@param[in]	file		handle to an open file
+@param[out]	buf		buffer where to read
+@param[in]	offset		file offset from the start where to read
+@param[in]	n		number of bytes to read, starting from offset
+@param[out]	o		number of bytes actually read
+@return DB_SUCCESS or error code */
+dberr_t
+os_file_read_no_error_handling_func(
+	IORequest&	type,
+	os_file_t	file,
+	void*		buf,
+	os_offset_t	offset,
+	ulint		n,
+	ulint*		o)
+{
+	ut_ad(type.is_read());
+
+	return(os_file_read_page(type, file, buf, offset, n, o, false));
+}
+
+/** NOTE! Use the corresponding macro os_file_write(), not directly
+Requests a synchronous write operation.
+@param[in]	type		IO flags
+@param[in]	file		handle to an open file
+@param[out]	buf		buffer from which to write
+@param[in]	offset		file offset from the start where to read
+@param[in]	n		number of bytes to read, starting from offset
+@return DB_SUCCESS if request was successful, false if fail */
+dberr_t
+os_file_write_func(
+	IORequest&	type,
+	const char*	name,
+	os_file_t	file,
+	const void*	buf,
+	os_offset_t	offset,
+	ulint		n)
+{
+	ut_ad(type.validate());
+	ut_ad(type.is_write());
+
+	/* We never compress the first page.
+	Note: This assumes we always do block IO. */
+	if (offset == 0) {
+		type.clear_compressed();
+	}
+
+	const byte*	ptr = reinterpret_cast<const byte*>(buf);
+
+	return(os_file_write_page(type, name, file, ptr, offset, n));
+}
+
+/** Check the existence and type of the given file.
+@param[in]	path		path name of file
+@param[out]	exists		true if the file exists
+@param[out]	type		Type of the file, if it exists
+@return true if call succeeded */
+bool
+os_file_status(
+	const char*	path,
+	bool*		exists,
+	os_file_type_t* type)
+{
+#ifdef _WIN32
+	return(os_file_status_win32(path, exists, type));
+#else
+	return(os_file_status_posix(path, exists, type));
+#endif /* _WIN32 */
+}
+
+/** Free storage space associated with a section of the file.
+@param[in]	fh		Open file handle
+@param[in]	off		Starting offset (SEEK_SET)
+@param[in]	len		Size of the hole
+@return DB_SUCCESS or error code */
+dberr_t
+os_file_punch_hole(
+	os_file_t	fh,
+	os_offset_t	off,
+	os_offset_t	len)
+{
+	/* In this debugging mode, we act as if punch hole is supported,
+	and then skip any calls to actually punch a hole here.
+	In this way, Transparent Page Compression is still being tested. */
+	DBUG_EXECUTE_IF("ignore_punch_hole",
+		return(DB_SUCCESS);
+	);
+
+#ifdef _WIN32
+	return(os_file_punch_hole_win32(fh, off, len));
+#else
+	return(os_file_punch_hole_posix(fh, off, len));
+#endif /* _WIN32 */
+}
+
+/** Check if the file system supports sparse files.
+
+Warning: On POSIX systems we try and punch a hole from offset 0 to
+the system configured page size. This should only be called on an empty
+file.
+
+Note: On Windows we use the name and on Unices we use the file handle.
+
+@param[in]	name		File name
+@param[in]	fh		File handle for the file - if opened
+@return true if the file system supports sparse files */
+bool
+os_is_sparse_file_supported(const char* path, os_file_t fh)
+{
+	/* In this debugging mode, we act as if punch hole is supported,
+	then we skip any calls to actually punch a hole.  In this way,
+	Transparent Page Compression is still being tested. */
+	DBUG_EXECUTE_IF("ignore_punch_hole",
+		return(true);
+	);
+
+#ifdef _WIN32
+	return(os_is_sparse_file_supported_win32(path));
+#else
+	dberr_t	err;
+
+	/* We don't know the FS block size, use the sector size. The FS
+	will do the magic. */
+	err = os_file_punch_hole(fh, 0, UNIV_PAGE_SIZE);
+
+	return(err == DB_SUCCESS);
+#endif /* _WIN32 */
+}
+
+/** This function returns information about the specified file
+@param[in]	path		pathname of the file
+@param[out]	stat_info	information of a file in a directory
+@param[in]	check_rw_perm	for testing whether the file can be opened
+				in RW mode
+@param[in]	read_only	true if file is opened in read-only mode
+@return DB_SUCCESS if all OK */
+dberr_t
+os_file_get_status(
+	const char*	path,
+	os_file_stat_t* stat_info,
+	bool		check_rw_perm,
+	bool		read_only)
+{
+	dberr_t	ret;
+
+#ifdef _WIN32
+	struct _stat64	info;
+
+	ret = os_file_get_status_win32(
+		path, stat_info, &info, check_rw_perm, read_only);
+
+#else
+	struct stat	info;
+
+	ret = os_file_get_status_posix(
+		path, stat_info, &info, check_rw_perm, read_only);
+
+#endif /* _WIN32 */
+
+	if (ret == DB_SUCCESS) {
+		stat_info->ctime = info.st_ctime;
+		stat_info->atime = info.st_atime;
+		stat_info->mtime = info.st_mtime;
+		stat_info->size  = info.st_size;
+	}
+
+	return(ret);
+}
+
+/**
+Waits for an AIO operation to complete. This function is used to wait the
+for completed requests. The aio array of pending requests is divided
+into segments. The thread specifies which segment or slot it wants to wait
+for. NOTE: this function will also take care of freeing the aio slot,
+therefore no other thread is allowed to do the freeing!
+@param[in]	segment		The number of the segment in the aio arrays to
+				wait for; segment 0 is the ibuf I/O thread,
+				segment 1 the log I/O thread, then follow the
+				non-ibuf read threads, and as the last are the
+				non-ibuf write threads; if this is
+				ULINT_UNDEFINED, then it means that sync AIO
+				is used, and this parameter is ignored
+@param[out]	m1		the messages passed with the AIO request; note
+				that also in the case where the AIO operation
+				failed, these output parameters are valid and
+				can be used to restart the operation,
+				for example
+@param[out]	m2		callback message
+@param[out]	type		OS_FILE_WRITE or ..._READ
+@return DB_SUCCESS or error code */
+dberr_t
+os_aio_handler(
+	ulint		segment,
+	fil_node_t**	m1,
+	void**		m2,
+	IORequest*	request)
+{
+	dberr_t	err;
+
+	if (srv_use_native_aio) {
+		srv_set_io_thread_op_info(segment, "native aio handle");
+
+#ifdef WIN_ASYNC_IO
+
+		err = os_aio_windows_handler(segment, 0, m1, m2, request);
+
+#elif defined(LINUX_NATIVE_AIO)
+
+		err = os_aio_linux_handler(segment, m1, m2, request);
+
+#else
+		ut_error;
+
+		err = DB_ERROR; /* Eliminate compiler warning */
+
+#endif /* WIN_ASYNC_IO */
+
+	} else {
+		srv_set_io_thread_op_info(segment, "simulated aio handle");
+
+		err = os_aio_simulated_handler(segment, m1, m2, request);
+	}
+
+	return(err);
+}
+
+/** Constructor
+@param[in]	id		The latch ID
+@param[in]	n		Number of AIO slots
+@param[in]	segments	Number of segments */
+AIO::AIO(
+	latch_id_t	id,
+	ulint		n,
+	ulint		segments)
+	:
+	m_slots(n),
+	m_n_segments(segments),
+	m_n_reserved()
+# ifdef LINUX_NATIVE_AIO
+	,m_aio_ctx(),
+	m_events(m_slots.size())
+# endif /* LINUX_NATIVE_AIO */
+{
+	ut_a(n > 0);
+	ut_a(m_n_segments > 0);
+
+	mutex_create(id, &m_mutex);
+
+	m_not_full = os_event_create("aio_not_full");
+	m_is_empty = os_event_create("aio_is_empty");
+
+	memset(&m_slots[0], 0x0, sizeof(m_slots[0]) * m_slots.size());
+#ifdef LINUX_NATIVE_AIO
+	memset(&m_events[0], 0x0, sizeof(m_events[0]) * m_events.size());
 #endif /* LINUX_NATIVE_AIO */
 
-/******************************************************************//**
-Creates an aio wait array. Note that we return NULL in case of failure.
-We don't care about freeing memory here because we assume that a
-failure will result in server refusing to start up.
-@return	own: aio array, NULL on failure */
-static
-os_aio_array_t*
-os_aio_array_create(
-/*================*/
-	ulint	n,		/*!< in: maximum number of pending aio
-				operations allowed; n must be
-				divisible by n_segments */
-	ulint	n_segments)	/*!< in: number of segments in the aio array */
+	os_event_set(m_is_empty);
+}
+
+/** Initialise the slots */
+dberr_t
+AIO::init_slots()
 {
-	os_aio_array_t*	array;
+	for (ulint i = 0; i < m_slots.size(); ++i) {
+		Slot&	slot = m_slots[i];
+
+		slot.pos = static_cast<uint16_t>(i);
+
+		slot.is_reserved = false;
+
 #ifdef WIN_ASYNC_IO
-	OVERLAPPED*	over;
+
+		slot.array = this;
+
 #elif defined(LINUX_NATIVE_AIO)
-	struct io_event*	io_event = NULL;
+
+		slot.ret = 0;
+
+		slot.n_bytes = 0;
+
+		memset(&slot.control, 0x0, sizeof(slot.control));
+
 #endif /* WIN_ASYNC_IO */
-	ut_a(n > 0);
-	ut_a(n_segments > 0);
 
-	array = static_cast<os_aio_array_t*>(ut_malloc(sizeof(*array)));
-	memset(array, 0x0, sizeof(*array));
+		slot.compressed_ptr = reinterpret_cast<byte*>(
+			ut_zalloc_nokey(UNIV_PAGE_SIZE_MAX * 2));
 
-	array->mutex = os_mutex_create();
-	array->not_full = os_event_create();
-	array->is_empty = os_event_create();
+		if (slot.compressed_ptr == NULL) {
+			return(DB_OUT_OF_MEMORY);
+		}
 
-	os_event_set(array->is_empty);
-
-	array->n_slots = n;
-	array->n_segments = n_segments;
-
-	array->slots = static_cast<os_aio_slot_t*>(
-		ut_malloc(n * sizeof(*array->slots)));
-
-	memset(array->slots, 0x0, n * sizeof(*array->slots));
-
-#ifdef __WIN__
-	array->handles = static_cast<HANDLE*>(ut_malloc(n * sizeof(HANDLE)));
-#endif /* __WIN__ */
-
-#if defined(LINUX_NATIVE_AIO)
-	array->aio_ctx = NULL;
-	array->aio_events = NULL;
-
-	/* If we are not using native aio interface then skip this
-	part of initialization. */
-	if (!srv_use_native_aio) {
-		goto skip_native_aio;
+		slot.compressed_page = static_cast<byte *>(
+			ut_align(slot.compressed_ptr, UNIV_PAGE_SIZE));
 	}
 
+	return(DB_SUCCESS);
+}
+
+#ifdef LINUX_NATIVE_AIO
+/** Initialise the Linux Native AIO interface */
+dberr_t
+AIO::init_linux_native_aio()
+{
 	/* Initialize the io_context array. One io_context
 	per segment in the array. */
 
-	array->aio_ctx = static_cast<io_context**>(
-		ut_malloc(n_segments * sizeof(*array->aio_ctx)));
+	ut_a(m_aio_ctx == NULL);
 
-	for (ulint i = 0; i < n_segments; ++i) {
-		if (!os_aio_linux_create_io_ctx(n/n_segments,
-						&array->aio_ctx[i])) {
+	m_aio_ctx = static_cast<io_context**>(
+		ut_zalloc_nokey(m_n_segments * sizeof(*m_aio_ctx)));
+
+	if (m_aio_ctx == NULL) {
+		return(DB_OUT_OF_MEMORY);
+	}
+
+	io_context**	ctx = m_aio_ctx;
+	ulint		max_events = slots_per_segment();
+
+	for (ulint i = 0; i < m_n_segments; ++i, ++ctx) {
+
+		if (!linux_create_io_ctx(max_events, ctx)) {
 			/* If something bad happened during aio setup
-			we disable linux native aio.
-                        The disadvantage will be a small memory leak
-                        at shutdown but that's ok compared to a crash
-                        or a not working server.
-                        This frequently happens when running the test suite
-                        with many threads on a system with low fs.aio-max-nr!
-                        */
-
-                        fprintf(stderr,
-                                "  InnoDB: Warning: Linux Native AIO disabled "
-                                "because os_aio_linux_create_io_ctx() "
-                                "failed. To get rid of this warning you can "
-                                "try increasing system "
-                                "fs.aio-max-nr to 1048576 or larger or "
-                                "setting innodb_use_native_aio = 0 in my.cnf\n");
-                        srv_use_native_aio = FALSE;
-			goto skip_native_aio;
+			we should call it a day and return right away.
+			We don't care about any leaks because a failure
+			to initialize the io subsystem means that the
+			server (or atleast the innodb storage engine)
+			is not going to startup. */
+			return(DB_IO_ERROR);
 		}
 	}
 
-	/* Initialize the event array. One event per slot. */
-	io_event = static_cast<struct io_event*>(
-		ut_malloc(n * sizeof(*io_event)));
-
-	memset(io_event, 0x0, sizeof(*io_event) * n);
-	array->aio_events = io_event;
-
-skip_native_aio:
+	return(DB_SUCCESS);
+}
 #endif /* LINUX_NATIVE_AIO */
-	for (ulint i = 0; i < n; i++) {
-		os_aio_slot_t*	slot;
 
-		slot = os_aio_array_get_nth_slot(array, i);
+/** Initialise the array */
+dberr_t
+AIO::init()
+{
+	ut_a(!m_slots.empty());
 
-		slot->pos = i;
-		slot->reserved = FALSE;
-#ifdef WIN_ASYNC_IO
-		slot->handle = CreateEvent(NULL,TRUE, FALSE, NULL);
 
-		over = &slot->control;
+	if (srv_use_native_aio) {
+#ifdef LINUX_NATIVE_AIO
+		dberr_t	err = init_linux_native_aio();
 
-		over->hEvent = slot->handle;
+		if (err != DB_SUCCESS) {
+			return(err);
+		}
 
-		array->handles[i] = over->hEvent;
+#endif /* LINUX_NATIVE_AIO */
+	}
 
-#elif defined(LINUX_NATIVE_AIO)
-		memset(&slot->control, 0x0, sizeof(slot->control));
-		slot->n_bytes = 0;
-		slot->ret = 0;
-#endif /* WIN_ASYNC_IO */
+	return(init_slots());
+}
+
+/** Creates an aio wait array. Note that we return NULL in case of failure.
+We don't care about freeing memory here because we assume that a
+failure will result in server refusing to start up.
+@param[in]	id		Latch ID
+@param[in]	n		maximum number of pending AIO operations
+				allowed; n must be divisible by m_n_segments
+@param[in]	n_segments	number of segments in the AIO array
+@return own: AIO array, NULL on failure */
+AIO*
+AIO::create(
+	latch_id_t	id,
+	ulint		n,
+	ulint		n_segments)
+{
+	if ((n % n_segments)) {
+
+		ib::error()
+			<< "Maximum number of AIO operations must be "
+			<< "divisible by number of segments";
+
+		return(NULL);
+	}
+
+	AIO*	array = UT_NEW_NOKEY(AIO(id, n, n_segments));
+
+	if (array != NULL && array->init() != DB_SUCCESS) {
+
+		UT_DELETE(array);
+
+		array = NULL;
 	}
 
 	return(array);
 }
 
-/************************************************************************//**
-Frees an aio wait array. */
-static
-void
-os_aio_array_free(
-/*==============*/
-	os_aio_array_t*& array)	/*!< in, own: array to free */
+/** AIO destructor */
+AIO::~AIO()
 {
-#ifdef WIN_ASYNC_IO
-	ulint i;
-	for (i = 0; i < array->n_slots; i++) {
-		os_aio_slot_t*	slot = os_aio_array_get_nth_slot(array, i);
-		CloseHandle(slot->handle);
-	}
-#endif /* WIN_ASYNC_IO */
+	mutex_destroy(&m_mutex);
 
-#ifdef __WIN__
-	ut_free(array->handles);
-#endif /* __WIN__ */
-	os_mutex_free(array->mutex);
-	os_event_free(array->not_full);
-	os_event_free(array->is_empty);
+	os_event_destroy(m_not_full);
+	os_event_destroy(m_is_empty);
 
 #if defined(LINUX_NATIVE_AIO)
 	if (srv_use_native_aio) {
-		ut_free(array->aio_events);
-		ut_free(array->aio_ctx);
+		m_events.clear();
+		ut_free(m_aio_ctx);
 	}
 #endif /* LINUX_NATIVE_AIO */
 
-	ut_free(array->slots);
-	ut_free(array);
+	for (ulint i = 0; i < m_slots.size(); ++i) {
+		Slot&	slot = m_slots[i];
 
-	array = 0;
+		if (slot.compressed_ptr != NULL) {
+			ut_free(slot.compressed_ptr);
+			slot.compressed_ptr = NULL;
+			slot.compressed_page = NULL;
+		}
+	}
+
+	m_slots.clear();
 }
 
-/***********************************************************************
-Initializes the asynchronous io system. Creates one array each for ibuf
+/** Initializes the asynchronous io system. Creates one array each for ibuf
 and log i/o. Also creates one array each for read and write where each
-array is divided logically into n_read_segs and n_write_segs
+array is divided logically into n_readers and n_writers
 respectively. The caller must create an i/o handler thread for each
 segment in these arrays. This function also creates the sync array.
-No i/o handler thread needs to be created for that */
-UNIV_INTERN
-ibool
-os_aio_init(
-/*========*/
-	ulint	n_per_seg,	/*<! in: maximum number of pending aio
-				operations allowed per segment */
-	ulint	n_read_segs,	/*<! in: number of reader threads */
-	ulint	n_write_segs,	/*<! in: number of writer threads */
-	ulint	n_slots_sync)	/*<! in: number of slots in the sync aio
-				array */
+No i/o handler thread needs to be created for that
+@param[in]	n_per_seg	maximum number of pending aio
+				operations allowed per segment
+@param[in]	n_readers	number of reader threads
+@param[in]	n_writers	number of writer threads
+@param[in]	n_slots_sync	number of slots in the sync aio array
+@return true if the AIO sub-system was started successfully */
+bool
+AIO::start(
+	ulint		n_per_seg,
+	ulint		n_readers,
+	ulint		n_writers,
+	ulint		n_slots_sync)
 {
-	os_io_init_simple();
-
 #if defined(LINUX_NATIVE_AIO)
 	/* Check if native aio is supported on this system and tmpfs */
-	if (srv_use_native_aio && !os_aio_native_aio_supported()) {
+	if (srv_use_native_aio && !is_linux_native_aio_supported()) {
 
-		ib_logf(IB_LOG_LEVEL_WARN, "Linux Native AIO disabled.");
+		ib::warn() << "Linux Native AIO disabled.";
 
 		srv_use_native_aio = FALSE;
 	}
@@ -4189,69 +6688,71 @@ os_aio_init(
 
 	srv_reset_io_thread_op_info();
 
-	os_aio_read_array = os_aio_array_create(
-		n_read_segs * n_per_seg, n_read_segs);
+	s_reads = create(
+		LATCH_ID_OS_AIO_READ_MUTEX, n_readers * n_per_seg, n_readers);
 
-	if (os_aio_read_array == NULL) {
-		return(FALSE);
+	if (s_reads == NULL) {
+		return(false);
 	}
 
-	ulint	start = (srv_read_only_mode) ? 0 : 2;
-	ulint	n_segs = n_read_segs + start;
+	ulint	start = srv_read_only_mode ? 0 : 2;
+	ulint	n_segs = n_readers + start;
 
-	/* 0 is the ibuf segment and 1 is the insert buffer segment. */
+	/* 0 is the ibuf segment and 1 is the redo log segment. */
 	for (ulint i = start; i < n_segs; ++i) {
 		ut_a(i < SRV_MAX_N_IO_THREADS);
 		srv_io_thread_function[i] = "read thread";
 	}
 
-	ulint	n_segments = n_read_segs;
+	ulint	n_segments = n_readers;
 
 	if (!srv_read_only_mode) {
 
-		os_aio_log_array = os_aio_array_create(n_per_seg, 1);
+		s_ibuf = create(LATCH_ID_OS_AIO_IBUF_MUTEX, n_per_seg, 1);
 
-		if (os_aio_log_array == NULL) {
-			return(FALSE);
-		}
-
-		++n_segments;
-
-		srv_io_thread_function[1] = "log thread";
-
-		os_aio_ibuf_array = os_aio_array_create(n_per_seg, 1);
-
-		if (os_aio_ibuf_array == NULL) {
-			return(FALSE);
+		if (s_ibuf == NULL) {
+			return(false);
 		}
 
 		++n_segments;
 
 		srv_io_thread_function[0] = "insert buffer thread";
 
-		os_aio_write_array = os_aio_array_create(
-			n_write_segs * n_per_seg, n_write_segs);
+		s_log = create(LATCH_ID_OS_AIO_LOG_MUTEX, n_per_seg, 1);
 
-		if (os_aio_write_array == NULL) {
-			return(FALSE);
+		if (s_log == NULL) {
+			return(false);
 		}
 
-		n_segments += n_write_segs;
+		++n_segments;
 
-		for (ulint i = start + n_read_segs; i < n_segments; ++i) {
-			ut_a(i < SRV_MAX_N_IO_THREADS);
-			srv_io_thread_function[i] = "write thread";
-		}
+		srv_io_thread_function[1] = "log thread";
 
-		ut_ad(n_segments >= 4);
 	} else {
-		ut_ad(n_segments > 0);
+		s_ibuf = s_log = NULL;
 	}
 
-	os_aio_sync_array = os_aio_array_create(n_slots_sync, 1);
+	s_writes = create(
+		LATCH_ID_OS_AIO_WRITE_MUTEX, n_writers * n_per_seg, n_writers);
 
-	if (os_aio_sync_array == NULL) {
-		return(FALSE);
+	if (s_writes == NULL) {
+		return(false);
+	}
+
+	n_segments += n_writers;
+
+	for (ulint i = start + n_readers; i < n_segments; ++i) {
+		ut_a(i < SRV_MAX_N_IO_THREADS);
+		srv_io_thread_function[i] = "write thread";
+	}
+
+	ut_ad(n_segments >= static_cast<ulint>(srv_read_only_mode ? 2 : 4));
+
+	s_sync = create(LATCH_ID_OS_AIO_SYNC_MUTEX, n_slots_sync, 1);
+
+	if (s_sync == NULL) {
+
+		return(false);
 	}
 
 	os_aio_n_segments = n_segments;
@@ -4259,93 +6760,120 @@ os_aio_init(
 	os_aio_validate();
 
 	os_aio_segment_wait_events = static_cast<os_event_t*>(
-		ut_malloc(n_segments * sizeof *os_aio_segment_wait_events));
+		ut_zalloc_nokey(
+			n_segments * sizeof *os_aio_segment_wait_events));
+
+	if (os_aio_segment_wait_events == NULL) {
+
+		return(false);
+	}
 
 	for (ulint i = 0; i < n_segments; ++i) {
-		os_aio_segment_wait_events[i] = os_event_create();
+		os_aio_segment_wait_events[i] = os_event_create(0);
 	}
 
 	os_last_printout = ut_time();
 
-	return(TRUE);
-
+	return(true);
 }
 
-/***********************************************************************
-Frees the asynchronous io system. */
-UNIV_INTERN
+/** Free the AIO arrays */
 void
-os_aio_free(void)
-/*=============*/
+AIO::shutdown()
 {
-	if (os_aio_ibuf_array != 0) {
-		os_aio_array_free(os_aio_ibuf_array);
+	UT_DELETE(s_ibuf);
+	s_ibuf = NULL;
+
+	UT_DELETE(s_log);
+	s_log = NULL;
+
+	UT_DELETE(s_writes);
+	s_writes = NULL;
+
+	UT_DELETE(s_sync);
+	s_sync = NULL;
+
+	UT_DELETE(s_reads);
+	s_reads = NULL;
+}
+
+/** Initializes the asynchronous io system. Creates one array each for ibuf
+and log i/o. Also creates one array each for read and write where each
+array is divided logically into n_readers and n_writers
+respectively. The caller must create an i/o handler thread for each
+segment in these arrays. This function also creates the sync array.
+No i/o handler thread needs to be created for that
+@param[in]	n_readers	number of reader threads
+@param[in]	n_writers	number of writer threads
+@param[in]	n_slots_sync	number of slots in the sync aio array */
+bool
+os_aio_init(
+	ulint		n_readers,
+	ulint		n_writers,
+	ulint		n_slots_sync)
+{
+	/* Maximum number of pending aio operations allowed per segment */
+	ulint		limit = 8 * OS_AIO_N_PENDING_IOS_PER_THREAD;
+
+
+	ut_a(block_cache == NULL);
+
+	block_cache = UT_NEW_NOKEY(Blocks(MAX_BLOCKS));
+
+	for (Blocks::iterator it = block_cache->begin();
+	     it != block_cache->end();
+	     ++it) {
+
+		ut_a(it->m_in_use == 0);
+		ut_a(it->m_ptr == NULL);
+
+		/* Allocate double of max page size memory, since
+		compress could generate more bytes than orgininal
+		data. */
+		it->m_ptr = static_cast<byte*>(
+			ut_malloc_nokey(BUFFER_BLOCK_SIZE));
+
+		ut_a(it->m_ptr != NULL);
 	}
 
-	if (os_aio_log_array != 0) {
-		os_aio_array_free(os_aio_log_array);
-	}
+	return(AIO::start(limit, n_readers, n_writers, n_slots_sync));
+}
 
-	if (os_aio_write_array != 0) {
-		os_aio_array_free(os_aio_write_array);
-	}
-
-	if (os_aio_sync_array != 0) {
-		os_aio_array_free(os_aio_sync_array);
-	}
-
-	os_aio_array_free(os_aio_read_array);
+/** Frees the asynchronous io system. */
+void
+os_aio_free()
+{
+	AIO::shutdown();
 
 	for (ulint i = 0; i < os_aio_n_segments; i++) {
-		os_event_free(os_aio_segment_wait_events[i]);
+		os_event_destroy(os_aio_segment_wait_events[i]);
 	}
 
 	ut_free(os_aio_segment_wait_events);
 	os_aio_segment_wait_events = 0;
 	os_aio_n_segments = 0;
+
+	for (Blocks::iterator it = block_cache->begin();
+	     it != block_cache->end();
+	     ++it) {
+
+		ut_a(it->m_in_use == 0);
+		ut_free(it->m_ptr);
+	}
+
+	UT_DELETE(block_cache);
+
+	block_cache = NULL;
 }
 
-#ifdef WIN_ASYNC_IO
-/************************************************************************//**
-Wakes up all async i/o threads in the array in Windows async i/o at
+/** Wakes up all async i/o threads so that they know to exit themselves in
 shutdown. */
-static
 void
-os_aio_array_wake_win_aio_at_shutdown(
-/*==================================*/
-	os_aio_array_t*	array)	/*!< in: aio array */
-{
-	ulint	i;
-
-	for (i = 0; i < array->n_slots; i++) {
-
-		SetEvent((array->slots + i)->handle);
-	}
-}
-#endif
-
-/************************************************************************//**
-Wakes up all async i/o threads so that they know to exit themselves in
-shutdown. */
-UNIV_INTERN
-void
-os_aio_wake_all_threads_at_shutdown(void)
-/*=====================================*/
+os_aio_wake_all_threads_at_shutdown()
 {
 #ifdef WIN_ASYNC_IO
-	/* This code wakes up all ai/o threads in Windows native aio */
-	os_aio_array_wake_win_aio_at_shutdown(os_aio_read_array);
-	if (os_aio_write_array != 0) {
-		os_aio_array_wake_win_aio_at_shutdown(os_aio_write_array);
-	}
 
-	if (os_aio_ibuf_array != 0) {
-		os_aio_array_wake_win_aio_at_shutdown(os_aio_ibuf_array);
-	}
-
-	if (os_aio_log_array != 0) {
-		os_aio_array_wake_win_aio_at_shutdown(os_aio_log_array);
-	}
+	AIO::wake_at_shutdown();
 
 #elif defined(LINUX_NATIVE_AIO)
 
@@ -4358,359 +6886,347 @@ os_aio_wake_all_threads_at_shutdown(void)
 		return;
 	}
 
+#endif /* !WIN_ASYNC_AIO */
+
 	/* Fall through to simulated AIO handler wakeup if we are
 	not using native AIO. */
-#endif /* !WIN_ASYNC_AIO */
 
 	/* This loop wakes up all simulated ai/o threads */
 
-	for (ulint i = 0; i < os_aio_n_segments; i++) {
+	for (ulint i = 0; i < os_aio_n_segments; ++i) {
 
 		os_event_set(os_aio_segment_wait_events[i]);
 	}
 }
 
-/************************************************************************//**
-Waits until there are no pending writes in os_aio_write_array. There can
+/** Waits until there are no pending writes in AIO::s_writes. There can
 be other, synchronous, pending writes. */
-UNIV_INTERN
 void
-os_aio_wait_until_no_pending_writes(void)
-/*=====================================*/
+os_aio_wait_until_no_pending_writes()
 {
-	ut_ad(!srv_read_only_mode);
-	os_event_wait(os_aio_write_array->is_empty);
+	AIO::wait_until_no_pending_writes();
 }
 
-/**********************************************************************//**
-Calculates segment number for a slot.
+/** Calculates segment number for a slot.
+@param[in]	array		AIO wait array
+@param[in]	slot		slot in this array
 @return segment number (which is the number used by, for example,
-i/o-handler threads) */
-static
+	I/O-handler threads) */
 ulint
-os_aio_get_segment_no_from_slot(
-/*============================*/
-	os_aio_array_t*	array,	/*!< in: aio wait array */
-	os_aio_slot_t*	slot)	/*!< in: slot in this array */
+AIO::get_segment_no_from_slot(
+	const AIO*	array,
+	const Slot*	slot)
 {
 	ulint	segment;
 	ulint	seg_len;
 
-	if (array == os_aio_ibuf_array) {
+	if (array == s_ibuf) {
 		ut_ad(!srv_read_only_mode);
 
 		segment = IO_IBUF_SEGMENT;
 
-	} else if (array == os_aio_log_array) {
+	} else if (array == s_log) {
 		ut_ad(!srv_read_only_mode);
 
 		segment = IO_LOG_SEGMENT;
 
-	} else if (array == os_aio_read_array) {
-		seg_len = os_aio_read_array->n_slots
-			/ os_aio_read_array->n_segments;
+	} else if (array == s_reads) {
+		seg_len = s_reads->slots_per_segment();
 
 		segment = (srv_read_only_mode ? 0 : 2) + slot->pos / seg_len;
 	} else {
-		ut_ad(!srv_read_only_mode);
-		ut_a(array == os_aio_write_array);
+		ut_a(array == s_writes);
 
-		seg_len = os_aio_write_array->n_slots
-			/ os_aio_write_array->n_segments;
+		seg_len = s_writes->slots_per_segment();
 
-		segment = os_aio_read_array->n_segments + 2
-			+ slot->pos / seg_len;
+		segment = s_reads->m_n_segments
+			+ (srv_read_only_mode ? 0 : 2) + slot->pos / seg_len;
 	}
 
 	return(segment);
 }
 
-/**********************************************************************//**
-Calculates local segment number and aio array from global segment number.
-@return	local segment number within the aio array */
-static
-ulint
-os_aio_get_array_and_local_segment(
-/*===============================*/
-	os_aio_array_t** array,		/*!< out: aio wait array */
-	ulint		 global_segment)/*!< in: global segment number */
-{
-	ulint		segment;
-
-	ut_a(global_segment < os_aio_n_segments);
-
-	if (srv_read_only_mode) {
-		*array = os_aio_read_array;
-
-		return(global_segment);
-	} else if (global_segment == IO_IBUF_SEGMENT) {
-		*array = os_aio_ibuf_array;
-		segment = 0;
-
-	} else if (global_segment == IO_LOG_SEGMENT) {
-		*array = os_aio_log_array;
-		segment = 0;
-
-	} else if (global_segment < os_aio_read_array->n_segments + 2) {
-		*array = os_aio_read_array;
-
-		segment = global_segment - 2;
-	} else {
-		*array = os_aio_write_array;
-
-		segment = global_segment - (os_aio_read_array->n_segments + 2);
-	}
-
-	return(segment);
-}
-
-/*******************************************************************//**
-Requests for a slot in the aio array. If no slot is available, waits until
+/** Requests for a slot in the aio array. If no slot is available, waits until
 not_full-event becomes signaled.
-@return	pointer to slot */
-static
-os_aio_slot_t*
-os_aio_array_reserve_slot(
-/*======================*/
-	ulint		type,	/*!< in: OS_FILE_READ or OS_FILE_WRITE */
-	ulint		is_log,	/*!< in: 1 is OS_FILE_LOG or 0 */
-	os_aio_array_t*	array,	/*!< in: aio array */
-	fil_node_t*	message1,/*!< in: message to be passed along with
-				the aio operation */
-	void*		message2,/*!< in: message to be passed along with
-				the aio operation */
-	os_file_t	file,	/*!< in: file handle */
-	const char*	name,	/*!< in: name of the file or path as a
-				null-terminated string */
-	void*		buf,	/*!< in: buffer where to read or from which
-				to write */
-	os_offset_t	offset,	/*!< in: file offset */
-	ulint		len,	/*!< in: length of the block to read or write */
-	ulint           page_size, /*!< in: page size in bytes */
+
+@param[in,out]	type		IO context
+@param[in,out]	m1		message to be passed along with the AIO
+				operation
+@param[in,out]	m2		message to be passed along with the AIO
+				operation
+@param[in]	file		file handle
+@param[in]	name		name of the file or path as a NUL-terminated
+				string
+@param[in,out]	buf		buffer where to read or from which to write
+@param[in]	offset		file offset, where to read from or start writing
+@param[in]	len		length of the block to read or write
+@return pointer to slot */
+Slot*
+AIO::reserve_slot(
+	IORequest&	type,
+	fil_node_t*	m1,
+	void*		m2,
+	os_file_t	file,
+	const char*	name,
+	void*		buf,
+	os_offset_t	offset,
+	ulint		len,
 	ulint*		write_size)/*!< in/out: Actual write size initialized
 			       after fist successfull trim
 			       operation for this page and if
 			       initialized we do not trim again if
 			       actual page size does not decrease. */
 {
-	os_aio_slot_t*	slot = NULL;
-#ifdef WIN_ASYNC_IO
-	OVERLAPPED*	control;
-
-#elif defined(LINUX_NATIVE_AIO)
-
-	struct iocb*	iocb;
-	off_t		aio_offset;
-
-#endif /* WIN_ASYNC_IO */
-	ulint		i;
-	ulint		counter;
-	ulint		slots_per_seg;
-	ulint		local_seg;
-
 #ifdef WIN_ASYNC_IO
 	ut_a((len & 0xFFFFFFFFUL) == len);
 #endif /* WIN_ASYNC_IO */
 
 	/* No need of a mutex. Only reading constant fields */
-	slots_per_seg = array->n_slots / array->n_segments;
+	ulint		slots_per_seg;
+
+	ut_ad(type.validate());
+
+	slots_per_seg = slots_per_segment();
 
 	/* We attempt to keep adjacent blocks in the same local
 	segment. This can help in merging IO requests when we are
 	doing simulated AIO */
-	local_seg = (offset >> (UNIV_PAGE_SIZE_SHIFT + 6))
-		% array->n_segments;
+	ulint		local_seg;
 
-loop:
-	os_mutex_enter(array->mutex);
+	local_seg = (offset >> (UNIV_PAGE_SIZE_SHIFT + 6)) % m_n_segments;
 
-	if (array->n_reserved == array->n_slots) {
-		os_mutex_exit(array->mutex);
+	for (;;) {
+
+		acquire();
+
+		if (m_n_reserved != m_slots.size()) {
+			break;
+		}
+
+		release();
 
 		if (!srv_use_native_aio) {
-			/* If the handler threads are suspended, wake them
-			so that we get more slots */
+			/* If the handler threads are suspended,
+			wake them so that we get more slots */
 
 			os_aio_simulated_wake_handler_threads();
 		}
 
-		os_event_wait(array->not_full);
-
-		goto loop;
+		os_event_wait(m_not_full);
 	}
 
+	ulint	counter = 0;
+	Slot*	slot = NULL;
+
 	/* We start our search for an available slot from our preferred
 	local segment and do a full scan of the array. We are
 	guaranteed to find a slot in full scan. */
-	for (i = local_seg * slots_per_seg, counter = 0;
-	     counter < array->n_slots;
-	     i++, counter++) {
+	for (ulint i = local_seg * slots_per_seg;
+	     counter < m_slots.size();
+	     ++i, ++counter) {
 
-		i %= array->n_slots;
+		i %= m_slots.size();
 
-		slot = os_aio_array_get_nth_slot(array, i);
+		slot = at(i);
 
-		if (slot->reserved == FALSE) {
-			goto found;
+		if (slot->is_reserved == false) {
+			break;
 		}
 	}
 
 	/* We MUST always be able to get hold of a reserved slot. */
-	ut_error;
+	ut_a(counter < m_slots.size());
 
-found:
-	ut_a(slot->reserved == FALSE);
-	array->n_reserved++;
+	ut_a(slot->is_reserved == false);
 
-	if (array->n_reserved == 1) {
-		os_event_reset(array->is_empty);
+	++m_n_reserved;
+
+	if (m_n_reserved == 1) {
+		os_event_reset(m_is_empty);
 	}
 
-	if (array->n_reserved == array->n_slots) {
-		os_event_reset(array->not_full);
+	if (m_n_reserved == m_slots.size()) {
+		os_event_reset(m_not_full);
 	}
 
-	slot->reserved = TRUE;
+	slot->is_reserved = true;
 	slot->reservation_time = ut_time();
-	slot->message1 = message1;
-	slot->message2 = message2;
+	slot->m1       = m1;
+	slot->m2       = m2;
 	slot->file     = file;
 	slot->name     = name;
-	slot->len      = len;
+#ifdef _WIN32
+	slot->len      = static_cast<DWORD>(len);
+#else
+	slot->len      = static_cast<ulint>(len);
+#endif /* _WIN32 */
 	slot->type     = type;
+	slot->buf      = static_cast<byte*>(buf);
+	slot->ptr      = slot->buf;
 	slot->offset   = offset;
-	slot->io_already_done = FALSE;
+	slot->err      = DB_SUCCESS;
 	slot->write_size = write_size;
-	slot->is_log   = is_log;
-	slot->page_size = page_size;
-
-	if (message1) {
-		slot->file_block_size = fil_node_get_block_size(message1);
-	}
-
+	slot->is_log   = type.is_log();
+	slot->original_len = static_cast<uint32>(len);
+	slot->io_already_done = false;
+	slot->buf_block = NULL;
 	slot->buf      = static_cast<byte*>(buf);
 
-#ifdef WIN_ASYNC_IO
-	control = &slot->control;
-	control->Offset = (DWORD) offset & 0xFFFFFFFF;
-	control->OffsetHigh = (DWORD) (offset >> 32);
-	ResetEvent(slot->handle);
+#ifdef MYSQL_COMPRESSION
+	if (srv_use_native_aio
+	    && offset > 0
+	    && type.is_write()
+	    && type.is_compressed()) {
+		ulint	compressed_len = len;
 
+		ut_ad(!type.is_log());
+
+		release();
+
+		void* src_buf = slot->buf;
+
+		slot->buf_block = os_file_compress_page(
+			type,
+			src_buf,
+			&compressed_len);
+
+		slot->buf = static_cast<byte*>(src_buf);
+		slot->ptr = slot->buf;
+#ifdef _WIN32
+		slot->len = static_cast<DWORD>(compressed_len);
+#else
+		slot->len = static_cast<ulint>(compressed_len);
+#endif /* _WIN32 */
+		slot->skip_punch_hole = type.punch_hole();
+
+		acquire();
+	}
+#endif /* MYSQL_COMPRESSION */
+
+#ifdef MYSQL_ENCRYPTION
+	/* We do encryption after compression, since if we do encryption
+	before compression, the encrypted data will cause compression fail
+	or low compression rate. */
+	if (srv_use_native_aio
+	    && offset > 0
+	    && type.is_write()
+	    && type.is_encrypted()) {
+		ulint		encrypted_len = slot->len;
+		Block*		encrypted_block;
+
+		ut_ad(!type.is_log());
+
+		release();
+
+		void* src_buf = slot->buf;
+		encrypted_block = os_file_encrypt_page(
+			type,
+			src_buf,
+			&encrypted_len);
+
+		if (slot->buf_block != NULL) {
+			os_free_block(slot->buf_block);
+		}
+
+		slot->buf_block = encrypted_block;
+		slot->buf = static_cast<byte*>(src_buf);
+		slot->ptr = slot->buf;
+
+#ifdef _WIN32
+		slot->len = static_cast<DWORD>(encrypted_len);
+#else
+		slot->len = static_cast<ulint>(encrypted_len);
+#endif /* _WIN32 */
+
+		acquire();
+        }
+#endif /* MYSQL_ENCRYPTION */
+
+#ifdef WIN_ASYNC_IO
+	{
+		OVERLAPPED*	control;
+
+		control = &slot->control;
+		control->Offset = (DWORD) offset & 0xFFFFFFFF;
+		control->OffsetHigh = (DWORD) (offset >> 32);
+	}
 #elif defined(LINUX_NATIVE_AIO)
 
 	/* If we are not using native AIO skip this part. */
-	if (!srv_use_native_aio) {
-		goto skip_native_aio;
+	if (srv_use_native_aio) {
+
+		off_t		aio_offset;
+
+		/* Check if we are dealing with 64 bit arch.
+		If not then make sure that offset fits in 32 bits. */
+		aio_offset = (off_t) offset;
+
+		ut_a(sizeof(aio_offset) >= sizeof(offset)
+		     || ((os_offset_t) aio_offset) == offset);
+
+		struct iocb*	iocb = &slot->control;
+
+		if (type.is_read()) {
+
+			io_prep_pread(
+				iocb, file, slot->ptr, slot->len, aio_offset);
+		} else {
+			ut_ad(type.is_write());
+
+			io_prep_pwrite(
+				iocb, file, slot->ptr, slot->len, aio_offset);
+		}
+
+		iocb->data = slot;
+
+		slot->n_bytes = 0;
+		slot->ret = 0;
 	}
-
-	/* Check if we are dealing with 64 bit arch.
-	If not then make sure that offset fits in 32 bits. */
-	aio_offset = (off_t) offset;
-
-	ut_a(sizeof(aio_offset) >= sizeof(offset)
-	     || ((os_offset_t) aio_offset) == offset);
-
-	iocb = &slot->control;
-
-	if (type == OS_FILE_READ) {
-		io_prep_pread(iocb, file, buf, len, aio_offset);
-	} else {
-		ut_a(type == OS_FILE_WRITE);
-		io_prep_pwrite(iocb, file, buf, len, aio_offset);
-	}
-
-	iocb->data = (void*) slot;
-	slot->n_bytes = 0;
-	slot->ret = 0;
-
-skip_native_aio:
 #endif /* LINUX_NATIVE_AIO */
-	os_mutex_exit(array->mutex);
+
+	release();
 
 	return(slot);
 }
 
-/*******************************************************************//**
-Frees a slot in the aio array. */
-static
+/** Wakes up a simulated aio i/o-handler thread if it has something to do.
+@param[in]	global_segment	The number of the segment in the AIO arrays */
 void
-os_aio_array_free_slot(
-/*===================*/
-	os_aio_array_t*	array,	/*!< in: aio array */
-	os_aio_slot_t*	slot)	/*!< in: pointer to slot */
+AIO::wake_simulated_handler_thread(ulint global_segment)
 {
-	os_mutex_enter(array->mutex);
-
-	ut_ad(slot->reserved);
-
-	slot->reserved = FALSE;
-
-	array->n_reserved--;
-
-	if (array->n_reserved == array->n_slots - 1) {
-		os_event_set(array->not_full);
-	}
-
-	if (array->n_reserved == 0) {
-		os_event_set(array->is_empty);
-	}
-
-#ifdef WIN_ASYNC_IO
-
-	ResetEvent(slot->handle);
-
-#elif defined(LINUX_NATIVE_AIO)
-
-	if (srv_use_native_aio) {
-		memset(&slot->control, 0x0, sizeof(slot->control));
-		slot->n_bytes = 0;
-		slot->ret = 0;
-		/*fprintf(stderr, "Freed up Linux native slot.\n");*/
-	} else {
-		/* These fields should not be used if we are not
-		using native AIO. */
-		ut_ad(slot->n_bytes == 0);
-		ut_ad(slot->ret == 0);
-	}
-
-#endif
-	os_mutex_exit(array->mutex);
-}
-
-/**********************************************************************//**
-Wakes up a simulated aio i/o-handler thread if it has something to do. */
-static
-void
-os_aio_simulated_wake_handler_thread(
-/*=================================*/
-	ulint	global_segment)	/*!< in: the number of the segment in the aio
-				arrays */
-{
-	os_aio_array_t*	array;
-	ulint		segment;
-
 	ut_ad(!srv_use_native_aio);
 
-	segment = os_aio_get_array_and_local_segment(&array, global_segment);
+	AIO*	array;
+	ulint	segment = get_array_and_local_segment(&array, global_segment);
 
-	ulint	n = array->n_slots / array->n_segments;
+	array->wake_simulated_handler_thread(global_segment, segment);
+}
 
-	segment *= n;
+/** Wakes up a simulated AIO I/O-handler thread if it has something to do
+for a local segment in the AIO array.
+@param[in]	global_segment	The number of the segment in the AIO arrays
+@param[in]	segment		The local segment in the AIO array */
+void
+AIO::wake_simulated_handler_thread(ulint global_segment, ulint segment)
+{
+	ut_ad(!srv_use_native_aio);
+
+	ulint	n = slots_per_segment();
+	ulint	offset = segment * n;
 
 	/* Look through n slots after the segment * n'th slot */
 
-	os_mutex_enter(array->mutex);
+	acquire();
 
-	for (ulint i = 0; i < n; ++i) {
-		const os_aio_slot_t*	slot;
+	const Slot*	slot = at(offset);
 
-		slot = os_aio_array_get_nth_slot(array, segment + i);
+	for (ulint i = 0; i < n; ++i, ++slot) {
 
-		if (slot->reserved) {
+		if (slot->is_reserved) {
 
 			/* Found an i/o request */
 
-			os_mutex_exit(array->mutex);
+			release();
 
 			os_event_t	event;
 
@@ -4722,15 +7238,12 @@ os_aio_simulated_wake_handler_thread(
 		}
 	}
 
-	os_mutex_exit(array->mutex);
+	release();
 }
 
-/**********************************************************************//**
-Wakes up simulated aio i/o-handler threads if they have something to do. */
-UNIV_INTERN
+/** Wakes up simulated aio i/o-handler threads if they have something to do. */
 void
-os_aio_simulated_wake_handler_threads(void)
-/*=======================================*/
+os_aio_simulated_wake_handler_threads()
 {
 	if (srv_use_native_aio) {
 		/* We do not use simulated aio: do nothing */
@@ -4738,1292 +7251,1077 @@ os_aio_simulated_wake_handler_threads(void)
 		return;
 	}
 
-	os_aio_recommend_sleep_for_read_threads	= FALSE;
+	os_aio_recommend_sleep_for_read_threads	= false;
 
 	for (ulint i = 0; i < os_aio_n_segments; i++) {
-		os_aio_simulated_wake_handler_thread(i);
+		AIO::wake_simulated_handler_thread(i);
 	}
 }
 
-/**********************************************************************//**
-This function can be called if one wants to post a batch of reads and
-prefers an i/o-handler thread to handle them all at once later. You must
-call os_aio_simulated_wake_handler_threads later to ensure the threads
-are not left sleeping! */
-UNIV_INTERN
-void
-os_aio_simulated_put_read_threads_to_sleep(void)
-/*============================================*/
+/** Select the IO slot array
+@param[in]	type		Type of IO, READ or WRITE
+@param[in]	read_only	true if running in read-only mode
+@param[in]	mode		IO mode
+@return slot array or NULL if invalid mode specified */
+AIO*
+AIO::select_slot_array(IORequest& type, bool read_only, ulint mode)
 {
+	AIO*	array;
 
-/* The idea of putting background IO threads to sleep is only for
-Windows when using simulated AIO. Windows XP seems to schedule
-background threads too eagerly to allow for coalescing during
-readahead requests. */
-#ifdef __WIN__
-	os_aio_array_t*	array;
+	ut_ad(type.validate());
 
-	if (srv_use_native_aio) {
-		/* We do not use simulated aio: do nothing */
-
-		return;
-	}
-
-	os_aio_recommend_sleep_for_read_threads	= TRUE;
-
-	for (ulint i = 0; i < os_aio_n_segments; i++) {
-		os_aio_get_array_and_local_segment(&array, i);
-
-		if (array == os_aio_read_array) {
-
-			os_event_reset(os_aio_segment_wait_events[i]);
-		}
-	}
-#endif /* __WIN__ */
-}
-
-#if defined(LINUX_NATIVE_AIO)
-/*******************************************************************//**
-Dispatch an AIO request to the kernel.
-@return	TRUE on success. */
-static
-ibool
-os_aio_linux_dispatch(
-/*==================*/
-	os_aio_array_t*	array,	/*!< in: io request array. */
-	os_aio_slot_t*	slot)	/*!< in: an already reserved slot. */
-{
-	int		ret;
-	ulint		io_ctx_index;
-	struct iocb*	iocb;
-
-	ut_ad(slot != NULL);
-	ut_ad(array);
-
-	ut_a(slot->reserved);
-
-	/* Find out what we are going to work with.
-	The iocb struct is directly in the slot.
-	The io_context is one per segment. */
-
-	iocb = &slot->control;
-	io_ctx_index = (slot->pos * array->n_segments) / array->n_slots;
-
-	ret = io_submit(array->aio_ctx[io_ctx_index], 1, &iocb);
-
-#if defined(UNIV_AIO_DEBUG)
-	fprintf(stderr,
-		"io_submit[%c] ret[%d]: slot[%p] ctx[%p] seg[%lu]\n",
-		(slot->type == OS_FILE_WRITE) ? 'w' : 'r', ret, slot,
-		array->aio_ctx[io_ctx_index], (ulong) io_ctx_index);
-#endif
-
-	/* io_submit returns number of successfully
-	queued requests or -errno. */
-	if (UNIV_UNLIKELY(ret != 1)) {
-		errno = -ret;
-		return(FALSE);
-	}
-
-	return(TRUE);
-}
-#endif /* LINUX_NATIVE_AIO */
-
-
-/*******************************************************************//**
-NOTE! Use the corresponding macro os_aio(), not directly this function!
-Requests an asynchronous i/o operation.
-@return	TRUE if request was queued successfully, FALSE if fail */
-UNIV_INTERN
-ibool
-os_aio_func(
-/*========*/
-	ulint		type,	/*!< in: OS_FILE_READ or OS_FILE_WRITE */
-	ulint		is_log,	/*!< in: 1 is OS_FILE_LOG or 0 */
-	ulint		mode,	/*!< in: OS_AIO_NORMAL, ..., possibly ORed
-				to OS_AIO_SIMULATED_WAKE_LATER: the
-				last flag advises this function not to wake
-				i/o-handler threads, but the caller will
-				do the waking explicitly later, in this
-				way the caller can post several requests in
-				a batch; NOTE that the batch must not be
-				so big that it exhausts the slots in aio
-				arrays! NOTE that a simulated batch
-				may introduce hidden chances of deadlocks,
-				because i/os are not actually handled until
-				all have been posted: use with great
-				caution! */
-	const char*	name,	/*!< in: name of the file or path as a
-				null-terminated string */
-	os_file_t	file,	/*!< in: handle to a file */
-	void*		buf,	/*!< in: buffer where to read or from which
-				to write */
-	os_offset_t	offset,	/*!< in: file offset where to read or write */
-	ulint		n,	/*!< in: number of bytes to read or write */
-	ulint           page_size, /*!< in: page size in bytes */
-	fil_node_t*	message1,/*!< in: message for the aio handler
-				(can be used to identify a completed
-				aio operation); ignored if mode is
-				OS_AIO_SYNC */
-	void*		message2,/*!< in: message for the aio handler
-				(can be used to identify a completed
-				aio operation); ignored if mode is
-				OS_AIO_SYNC */
-	ulint*		write_size)/*!< in/out: Actual write size initialized
-			       after fist successfull trim
-			       operation for this page and if
-			       initialized we do not trim again if
-			       actual page size does not decrease. */
-{
-	os_aio_array_t*	array;
-	os_aio_slot_t*	slot;
-#ifdef WIN_ASYNC_IO
-	void* buffer = NULL;
-	ibool		retval;
-	BOOL		ret		= TRUE;
-	DWORD		len		= (DWORD) n;
-	struct fil_node_t* dummy_mess1;
-	void*		dummy_mess2;
-	ulint		dummy_type;
-#endif /* WIN_ASYNC_IO */
-	ulint		wake_later;
-
-	ut_ad(buf);
-	ut_ad(n > 0);
-	ut_ad(n % OS_FILE_LOG_BLOCK_SIZE == 0);
-	ut_ad(offset % OS_FILE_LOG_BLOCK_SIZE == 0);
-	ut_ad(os_aio_validate_skip());
-#ifdef WIN_ASYNC_IO
-	ut_ad((n & 0xFFFFFFFFUL) == n);
-#endif
-
-
-	wake_later = mode & OS_AIO_SIMULATED_WAKE_LATER;
-	mode = mode & (~OS_AIO_SIMULATED_WAKE_LATER);
-
-	DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28",
-			mode = OS_AIO_SYNC; os_has_said_disk_full = FALSE;);
-
-	if (mode == OS_AIO_SYNC
-#ifdef WIN_ASYNC_IO
-	    && !srv_use_native_aio
-#endif /* WIN_ASYNC_IO */
-	    ) {
-		ibool ret;
-
-		/* This is actually an ordinary synchronous read or write:
-		no need to use an i/o-handler thread. NOTE that if we use
-		Windows async i/o, Windows does not allow us to use
-		ordinary synchronous os_file_read etc. on the same file,
-		therefore we have built a special mechanism for synchronous
-		wait in the Windows case.
-		Also note that the Performance Schema instrumentation has
-		been performed by current os_aio_func()'s wrapper function
-		pfs_os_aio_func(). So we would no longer need to call
-		Performance Schema instrumented os_file_read() and
-		os_file_write(). Instead, we should use os_file_read_func()
-		and os_file_write_func() */
-
-		if (type == OS_FILE_READ) {
-			ret = os_file_read_func(file, buf, offset, n);
-		} else {
-
-			ut_ad(!srv_read_only_mode);
-			ut_a(type == OS_FILE_WRITE);
-
-			ret = os_file_write_func(name, file, buf, offset, n);
-
-			DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28",
-				os_has_said_disk_full = FALSE; ret = 0; errno = 28;);
-
-			if (!ret) {
-				os_file_handle_error_cond_exit(name, "os_file_write_func", TRUE, FALSE,
-							       __FILE__, __LINE__);
-			}
-		}
-
-		return ret;
-	}
-
-try_again:
 	switch (mode) {
 	case OS_AIO_NORMAL:
-		if (type == OS_FILE_READ) {
-			array = os_aio_read_array;
-		} else {
-			ut_ad(!srv_read_only_mode);
-			array = os_aio_write_array;
-		}
+
+		array = type.is_read() ? AIO::s_reads : AIO::s_writes;
 		break;
+
 	case OS_AIO_IBUF:
-		ut_ad(type == OS_FILE_READ);
+		ut_ad(type.is_read());
+
 		/* Reduce probability of deadlock bugs in connection with ibuf:
 		do not let the ibuf i/o handler sleep */
 
-		wake_later = FALSE;
+		type.clear_do_not_wake();
 
-		if (srv_read_only_mode) {
-			array = os_aio_read_array;
-		} else {
-			array = os_aio_ibuf_array;
-		}
+		array = read_only ? AIO::s_reads : AIO::s_ibuf;
 		break;
+
 	case OS_AIO_LOG:
-		if (srv_read_only_mode) {
-			array = os_aio_read_array;
-		} else {
-			array = os_aio_log_array;
-		}
+
+		array = read_only ? AIO::s_reads : AIO::s_log;
 		break;
+
 	case OS_AIO_SYNC:
-		array = os_aio_sync_array;
+
+		array = AIO::s_sync;
 #if defined(LINUX_NATIVE_AIO)
 		/* In Linux native AIO we don't use sync IO array. */
 		ut_a(!srv_use_native_aio);
 #endif /* LINUX_NATIVE_AIO */
 		break;
+
 	default:
 		ut_error;
 		array = NULL; /* Eliminate compiler warning */
 	}
 
-	slot = os_aio_array_reserve_slot(type, is_log, array, message1, message2, file,
-					 name, buf, offset, n, page_size, write_size);
-
-	if (type == OS_FILE_READ) {
-		if (srv_use_native_aio) {
-			os_n_file_reads++;
-			os_bytes_read_since_printout += n;
-#ifdef WIN_ASYNC_IO
-			ret = ReadFile(file, buf, (DWORD) n, &len,
-				       &(slot->control));
-
-#elif defined(LINUX_NATIVE_AIO)
-			if (!os_aio_linux_dispatch(array, slot)) {
-				goto err_exit;
-			}
-#endif /* WIN_ASYNC_IO */
-		} else {
-			if (!wake_later) {
-				os_aio_simulated_wake_handler_thread(
-					os_aio_get_segment_no_from_slot(
-						array, slot));
-			}
-		}
-	} else if (type == OS_FILE_WRITE) {
-		ut_ad(!srv_read_only_mode);
-		if (srv_use_native_aio) {
-			os_n_file_writes++;
-#ifdef WIN_ASYNC_IO
-
-			n = slot->len;
-			buffer = buf;
-			ret = WriteFile(file, buffer, (DWORD) n, &len,
-					&(slot->control));
-
-#elif defined(LINUX_NATIVE_AIO)
-			if (!os_aio_linux_dispatch(array, slot)) {
-				goto err_exit;
-			}
-#endif /* WIN_ASYNC_IO */
-		} else {
-			if (!wake_later) {
-				os_aio_simulated_wake_handler_thread(
-					os_aio_get_segment_no_from_slot(
-						array, slot));
-			}
-		}
-	} else {
-		ut_error;
-	}
-
-#ifdef WIN_ASYNC_IO
-	if (srv_use_native_aio) {
-		if ((ret && len == n)
-		    || (!ret && GetLastError() == ERROR_IO_PENDING)) {
-			/* aio was queued successfully! */
-
-			if (mode == OS_AIO_SYNC) {
-				/* We want a synchronous i/o operation on a
-				file where we also use async i/o: in Windows
-				we must use the same wait mechanism as for
-				async i/o */
-
-				retval = os_aio_windows_handle(
-					ULINT_UNDEFINED, slot->pos,
-					&dummy_mess1, &dummy_mess2,
-					&dummy_type);
-
-				return(retval);
-			}
-
-			return(TRUE);
-		}
-
-		goto err_exit;
-	}
-#endif /* WIN_ASYNC_IO */
-	/* aio was queued successfully! */
-	return(TRUE);
-
-#if defined LINUX_NATIVE_AIO || defined WIN_ASYNC_IO
-err_exit:
-#endif /* LINUX_NATIVE_AIO || WIN_ASYNC_IO */
-	os_aio_array_free_slot(array, slot);
-
-	if (os_file_handle_error(
-		name,type == OS_FILE_READ ? "aio read" : "aio write", __FILE__, __LINE__)) {
-
-		goto try_again;
-	}
-
-	return(FALSE);
+	return(array);
 }
 
 #ifdef WIN_ASYNC_IO
-/**********************************************************************//**
-This function is only used in Windows asynchronous i/o.
+/** This function is only used in Windows asynchronous i/o.
 Waits for an aio operation to complete. This function is used to wait the
 for completed requests. The aio array of pending requests is divided
 into segments. The thread specifies which segment or slot it wants to wait
 for. NOTE: this function will also take care of freeing the aio slot,
 therefore no other thread is allowed to do the freeing!
-@return	TRUE if the aio operation succeeded */
-UNIV_INTERN
-ibool
-os_aio_windows_handle(
-/*==================*/
-	ulint	segment,	/*!< in: the number of the segment in the aio
-				arrays to wait for; segment 0 is the ibuf
-				i/o thread, segment 1 the log i/o thread,
-				then follow the non-ibuf read threads, and as
-				the last are the non-ibuf write threads; if
-				this is ULINT_UNDEFINED, then it means that
-				sync aio is used, and this parameter is
-				ignored */
-	ulint	pos,		/*!< this parameter is used only in sync aio:
-				wait for the aio slot at this position */
-	fil_node_t**message1,	/*!< out: the messages passed with the aio
-				request; note that also in the case where
-				the aio operation failed, these output
-				parameters are valid and can be used to
-				restart the operation, for example */
-	void**	message2,
-	ulint*	type)		/*!< out: OS_FILE_WRITE or ..._READ */
-{
-	ulint		orig_seg	= segment;
-	os_aio_array_t*	array;
-	os_aio_slot_t*	slot;
-	ulint		n;
-	ulint		i;
-	ibool		ret_val;
-	BOOL		ret;
-	DWORD		len;
-	BOOL		retry		= FALSE;
+@param[in]	segment		The number of the segment in the aio arrays to
+				wait for; segment 0 is the ibuf I/O thread,
+				segment 1 the log I/O thread, then follow the
+				non-ibuf read threads, and as the last are the
+				non-ibuf write threads; if this is
+				ULINT_UNDEFINED, then it means that sync AIO
+				is used, and this parameter is ignored
+@param[in]	pos		this parameter is used only in sync AIO:
+				wait for the aio slot at this position
+@param[out]	m1		the messages passed with the AIO request; note
+				that also in the case where the AIO operation
+				failed, these output parameters are valid and
+				can be used to restart the operation,
+				for example
+@param[out]	m2		callback message
+@param[out]	type		OS_FILE_WRITE or ..._READ
+@return DB_SUCCESS or error code */
 
-	if (segment == ULINT_UNDEFINED) {
-		segment = 0;
-		array = os_aio_sync_array;
-	} else {
-		segment = os_aio_get_array_and_local_segment(&array, segment);
-	}
+#define READ_SEGMENT(s) (s < srv_n_read_io_threads)
+#define WRITE_SEGMENT(s) !READ_SEGMENT(s)
+
+static
+dberr_t
+os_aio_windows_handler(
+	ulint		segment,
+	ulint		pos,
+	fil_node_t**	m1,
+	void**		m2,
+	IORequest*	type)
+{
+	Slot*		slot= 0;
+	dberr_t		err;
+
+	BOOL		ret;
+	ULONG_PTR	key;
+
+	ut_a(segment != ULINT_UNDEFINED);
 
 	/* NOTE! We only access constant fields in os_aio_array. Therefore
 	we do not have to acquire the protecting mutex yet */
 
 	ut_ad(os_aio_validate_skip());
-	ut_ad(segment < array->n_segments);
 
-	n = array->n_slots / array->n_segments;
+	HANDLE port = READ_SEGMENT(segment) ? read_completion_port : completion_port;
+	for (;;) {
+		DWORD len;
+		ret = GetQueuedCompletionStatus(port, &len, &key,
+		(OVERLAPPED **)&slot, INFINITE);
 
-	if (array == os_aio_sync_array) {
-
-		WaitForSingleObject(
-			os_aio_array_get_nth_slot(array, pos)->handle,
-			INFINITE);
-
-		i = pos;
-
-	} else {
-		if (orig_seg != ULINT_UNDEFINED) {
-			srv_set_io_thread_op_info(orig_seg, "wait Windows aio");
+		/* If shutdown key was received, repost the shutdown message and exit */
+		if (ret && key == IOCP_SHUTDOWN_KEY) {
+			PostQueuedCompletionStatus(port, 0, key, NULL);
+			*m1 = NULL;
+			*m2 = NULL;
+			return (DB_SUCCESS);
 		}
 
-		i = WaitForMultipleObjects(
-			(DWORD) n, array->handles + segment * n,
-			FALSE, INFINITE);
+		ut_a(slot);
+
+		if (!ret) {
+			/* IO failed */
+			break;
+		}
+
+		slot->n_bytes= len;
+
+		if (WRITE_SEGMENT(segment) && slot->type.is_read()) {
+			/*
+			Redirect read completions  to the dedicated completion port
+			and thread. We need to split read and write threads. If we do not
+			do that, and just allow all io threads process all IO, it is possible
+			to get stuck in a deadlock in buffer pool code,
+
+			Currently, the problem is solved this way - "write io" threads
+			always get all completion notifications, from both async reads and
+			writes. Write completion is handled in the same thread that gets it.
+			Read completion is forwarded via PostQueueCompletionStatus())
+			to the second completion port dedicated solely to reads. One of the
+			"read io" threads waiting on this port will finally handle the IO.
+
+			Forwarding IO completion this way costs a context switch , and this
+			seems tolerable  since asynchronous reads are by far less frequent.
+			*/
+			ut_a(PostQueuedCompletionStatus(read_completion_port, len, key, &slot->control));
+		}
+		else {
+			break;
+		}
 	}
 
-	os_mutex_enter(array->mutex);
+	ut_a(slot->is_reserved);
 
-	if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS
-	    && array->n_reserved == 0) {
-		*message1 = NULL;
-		*message2 = NULL;
-		os_mutex_exit(array->mutex);
-		return(TRUE);
-	}
-
-	ut_a(i >= WAIT_OBJECT_0 && i <= WAIT_OBJECT_0 + n);
-
-	slot = os_aio_array_get_nth_slot(array, i + segment * n);
-
-	ut_a(slot->reserved);
-
-	if (orig_seg != ULINT_UNDEFINED) {
-		srv_set_io_thread_op_info(
-			orig_seg, "get windows aio return value");
-	}
-
-	ret = GetOverlappedResult(slot->file, &(slot->control), &len, TRUE);
-
-	*message1 = slot->message1;
-	*message2 = slot->message2;
+	*m1 = slot->m1;
+	*m2 = slot->m2;
 
 	*type = slot->type;
 
-	if (ret && len == slot->len) {
+	bool retry = false;
 
-		ret_val = TRUE;
-	} else if (!ret || (len != slot->len)) {
+	if (ret && slot->n_bytes == slot->len) {
+
+		err = DB_SUCCESS;
+
+	} else if (os_file_handle_error(slot->name, "Windows aio")) {
+
+		retry = true;
 
-		if (!ret) {
-			if (os_file_handle_error(slot->name, "Windows aio", __FILE__, __LINE__)) {
-				retry = TRUE;
-			} else {
-				ret_val = FALSE;
-			}
-		} else {
-			retry = TRUE;
-		}
 	} else {
 
-		ret_val = FALSE;
+		err = DB_IO_ERROR;
 	}
 
-	os_mutex_exit(array->mutex);
 
 	if (retry) {
-		/* retry failed read/write operation synchronously.
-		No need to hold array->mutex. */
+		/* Retry failed read/write operation synchronously. */
 
 #ifdef UNIV_PFS_IO
 		/* This read/write does not go through os_file_read
 		and os_file_write APIs, need to register with
 		performance schema explicitly here. */
 		struct PSI_file_locker* locker = NULL;
-		register_pfs_file_io_begin(locker, slot->file, slot->len,
-					   (slot->type == OS_FILE_WRITE)
-						? PSI_FILE_WRITE
-						: PSI_FILE_READ,
-					    __FILE__, __LINE__);
-#endif
+
+		register_pfs_file_io_begin(
+			locker, slot->file, slot->len,
+			slot->type.is_write()
+			? PSI_FILE_WRITE : PSI_FILE_READ, __FILE__, __LINE__);
+#endif /* UNIV_PFS_IO */
 
 		ut_a((slot->len & 0xFFFFFFFFUL) == slot->len);
 
- 		switch (slot->type) {
- 		case OS_FILE_WRITE:
-			ret = WriteFile(slot->file, slot->buf,
-					(DWORD) slot->len, &len,
-					&(slot->control));
-			break;
-		case OS_FILE_READ:
-			ret = ReadFile(slot->file, slot->buf,
-				       (DWORD) slot->len, &len,
-				       &(slot->control));
-
-			break;
-		default:
-			ut_error;
-		}
+		ssize_t	n_bytes = SyncFileIO::execute(slot);
 
 #ifdef UNIV_PFS_IO
-		register_pfs_file_io_end(locker, len);
-#endif
+		register_pfs_file_io_end(locker, slot->len);
+#endif /* UNIV_PFS_IO */
 
-		if (!ret && GetLastError() == ERROR_IO_PENDING) {
-			/* aio was queued successfully!
-			We want a synchronous i/o operation on a
-			file where we also use async i/o: in Windows
-			we must use the same wait mechanism as for
-			async i/o */
-
-			ret = GetOverlappedResult(slot->file,
-						  &(slot->control),
-						  &len, TRUE);
-		}
-
-		ret_val = ret && len == slot->len;
+		err = (n_bytes == slot->len) ? DB_SUCCESS : DB_IO_ERROR;
 	}
 
-	if (slot->type == OS_FILE_WRITE &&
-	    !slot->is_log &&
-	    srv_use_trim &&
-	    os_fallocate_failed == FALSE) {
-		// Deallocate unused blocks from file system
-		os_file_trim(slot);
+	if (err == DB_SUCCESS) {
+		err = AIOHandler::post_io_processing(slot);
 	}
 
-	os_aio_array_free_slot(array, slot);
+	ut_a(slot->array);
+	slot->array->release_with_mutex(slot);
 
-	return(ret_val);
+	if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS
+		&& !buf_page_cleaner_is_active
+		&& os_aio_all_slots_free()) {
+			/* Last IO, wakeup other io  threads */
+			AIO::wake_at_shutdown();
+	}
+	return(err);
 }
-#endif
+#endif /* WIN_ASYNC_IO */
 
-#if defined(LINUX_NATIVE_AIO)
-/******************************************************************//**
-This function is only used in Linux native asynchronous i/o. This is
-called from within the io-thread. If there are no completed IO requests
-in the slot array, the thread calls this function to collect more
-requests from the kernel.
-The io-thread waits on io_getevents(), which is a blocking call, with
-a timeout value. Unless the system is very heavy loaded, keeping the
-io-thread very busy, the io-thread will spend most of its time waiting
-in this function.
-The io-thread also exits in this function. It checks server status at
-each wakeup and that is why we use timed wait in io_getevents(). */
-static
-void
-os_aio_linux_collect(
-/*=================*/
-	os_aio_array_t* array,		/*!< in/out: slot array. */
-	ulint		segment,	/*!< in: local segment no. */
-	ulint		seg_size)	/*!< in: segment size. */
+/**
+NOTE! Use the corresponding macro os_aio(), not directly this function!
+Requests an asynchronous i/o operation.
+@param[in]	type		IO request context
+@param[in]	mode		IO mode
+@param[in]	name		Name of the file or path as NUL terminated
+				string
+@param[in]	file		Open file handle
+@param[out]	buf		buffer where to read
+@param[in]	offset		file offset where to read
+@param[in]	n		number of bytes to read
+@param[in]	read_only	if true read only mode checks are enforced
+@param[in,out]	m1		Message for the AIO handler, (can be used to
+				identify a completed AIO operation); ignored
+				if mode is OS_AIO_SYNC
+@param[in,out]	m2		message for the AIO handler (can be used to
+				identify a completed AIO operation); ignored
+				if mode is OS_AIO_SYNC
+@return DB_SUCCESS or error code */
+dberr_t
+os_aio_func(
+	IORequest&	type,
+	ulint		mode,
+	const char*	name,
+	os_file_t	file,
+	void*		buf,
+	os_offset_t	offset,
+	ulint		n,
+	bool		read_only,
+	fil_node_t*	m1,
+	void*		m2,
+	ulint*		write_size)/*!< in/out: Actual write size initialized
+			       after fist successfull trim
+			       operation for this page and if
+			       initialized we do not trim again if
+			       actual page size does not decrease. */
 {
-	int			i;
-	int			ret;
-	ulint			start_pos;
-	ulint			end_pos;
-	struct timespec		timeout;
-	struct io_event*	events;
-	struct io_context*	io_ctx;
+#ifdef WIN_ASYNC_IO
+	BOOL		ret = TRUE;
+#endif /* WIN_ASYNC_IO */
 
-	/* sanity checks. */
-	ut_ad(array != NULL);
-	ut_ad(seg_size > 0);
-	ut_ad(segment < array->n_segments);
-
-	/* Which part of event array we are going to work on. */
-	events = &array->aio_events[segment * seg_size];
-
-	/* Which io_context we are going to use. */
-	io_ctx = array->aio_ctx[segment];
-
-	/* Starting point of the segment we will be working on. */
-	start_pos = segment * seg_size;
-
-	/* End point. */
-	end_pos = start_pos + seg_size;
-
-retry:
-
-	/* Initialize the events. The timeout value is arbitrary.
-	We probably need to experiment with it a little. */
-	memset(events, 0, sizeof(*events) * seg_size);
-	timeout.tv_sec = 0;
-	timeout.tv_nsec = OS_AIO_REAP_TIMEOUT;
-
-	ret = io_getevents(io_ctx, 1, seg_size, events, &timeout);
-
-	if (ret > 0) {
-		for (i = 0; i < ret; i++) {
-			os_aio_slot_t*	slot;
-			struct iocb*	control;
-
-			control = (struct iocb*) events[i].obj;
-			ut_a(control != NULL);
-
-			slot = (os_aio_slot_t*) control->data;
-
-			/* Some sanity checks. */
-			ut_a(slot != NULL);
-			ut_a(slot->reserved);
-
-#if defined(UNIV_AIO_DEBUG)
-			fprintf(stderr,
-				"io_getevents[%c]: slot[%p] ctx[%p]"
-				" seg[%lu]\n",
-				(slot->type == OS_FILE_WRITE) ? 'w' : 'r',
-				slot, io_ctx, segment);
-#endif
-
-			/* We are not scribbling previous segment. */
-			ut_a(slot->pos >= start_pos);
-
-			/* We have not overstepped to next segment. */
-			ut_a(slot->pos < end_pos);
-
-			if (slot->type == OS_FILE_WRITE &&
-			    !slot->is_log &&
-			    srv_use_trim &&
-			    os_fallocate_failed == FALSE) {
-				// Deallocate unused blocks from file system
-				os_file_trim(slot);
-			}
-
-			/* Mark this request as completed. The error handling
-			will be done in the calling function. */
-			os_mutex_enter(array->mutex);
-			slot->n_bytes = events[i].res;
-			slot->ret = events[i].res2;
-			slot->io_already_done = TRUE;
-			os_mutex_exit(array->mutex);
-		}
-		return;
-	}
-
-	if (UNIV_UNLIKELY(srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS)) {
-		return;
-	}
-
-	/* This error handling is for any error in collecting the
-	IO requests. The errors, if any, for any particular IO
-	request are simply passed on to the calling routine. */
-
-	switch (ret) {
-	case -EAGAIN:
-		/* Not enough resources! Try again. */
-	case -EINTR:
-		/* Interrupted! I have tested the behaviour in case of an
-		interrupt. If we have some completed IOs available then
-		the return code will be the number of IOs. We get EINTR only
-		if there are no completed IOs and we have been interrupted. */
-	case 0:
-		/* No pending request! Go back and check again. */
-		goto retry;
-	}
-
-	/* All other errors should cause a trap for now. */
-	ut_print_timestamp(stderr);
-	fprintf(stderr,
-		" InnoDB: unexpected ret_code[%d] from io_getevents()!\n",
-		ret);
-	ut_error;
-}
-
-/**********************************************************************//**
-This function is only used in Linux native asynchronous i/o.
-Waits for an aio operation to complete. This function is used to wait for
-the completed requests. The aio array of pending requests is divided
-into segments. The thread specifies which segment or slot it wants to wait
-for. NOTE: this function will also take care of freeing the aio slot,
-therefore no other thread is allowed to do the freeing!
-@return	TRUE if the IO was successful */
-UNIV_INTERN
-ibool
-os_aio_linux_handle(
-/*================*/
-	ulint	global_seg,	/*!< in: segment number in the aio array
-				to wait for; segment 0 is the ibuf
-				i/o thread, segment 1 is log i/o thread,
-				then follow the non-ibuf read threads,
-				and the last are the non-ibuf write
-				threads. */
-	fil_node_t**message1,	/*!< out: the messages passed with the */
-	void**	message2,	/*!< aio request; note that in case the
-				aio operation failed, these output
-				parameters are valid and can be used to
-				restart the operation. */
-	ulint*	type)		/*!< out: OS_FILE_WRITE or ..._READ */
-{
-	ulint		segment;
-	os_aio_array_t*	array;
-	os_aio_slot_t*	slot;
-	ulint		n;
-	ulint		i;
-	ibool		ret = FALSE;
-
-	/* Should never be doing Sync IO here. */
-	ut_a(global_seg != ULINT_UNDEFINED);
-
-	/* Find the array and the local segment. */
-	segment = os_aio_get_array_and_local_segment(&array, global_seg);
-	n = array->n_slots / array->n_segments;
-
-	/* Loop until we have found a completed request. */
-	for (;;) {
-		ibool	any_reserved = FALSE;
-		os_mutex_enter(array->mutex);
-		for (i = 0; i < n; ++i) {
-			slot = os_aio_array_get_nth_slot(
-				array, i + segment * n);
-			if (!slot->reserved) {
-				continue;
-			} else if (slot->io_already_done) {
-				/* Something for us to work on. */
-				goto found;
-			} else {
-				any_reserved = TRUE;
-			}
-		}
-
-		os_mutex_exit(array->mutex);
-
-		/* There is no completed request.
-		If there is no pending request at all,
-		and the system is being shut down, exit. */
-		if (UNIV_UNLIKELY
-		    (!any_reserved
-		     && srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS)) {
-			*message1 = NULL;
-			*message2 = NULL;
-			return(TRUE);
-		}
-
-		/* Wait for some request. Note that we return
-		from wait iff we have found a request. */
-
-		srv_set_io_thread_op_info(global_seg,
-			"waiting for completed aio requests");
-		os_aio_linux_collect(array, segment, n);
-	}
-
-found:
-	/* Note that it may be that there are more then one completed
-	IO requests. We process them one at a time. We may have a case
-	here to improve the performance slightly by dealing with all
-	requests in one sweep. */
-	srv_set_io_thread_op_info(global_seg,
-				"processing completed aio requests");
-
-	/* Ensure that we are scribbling only our segment. */
-	ut_a(i < n);
-
-	ut_ad(slot != NULL);
-	ut_ad(slot->reserved);
-	ut_ad(slot->io_already_done);
-
-	*message1 = slot->message1;
-	*message2 = slot->message2;
-
-	*type = slot->type;
-
-	if (slot->ret == 0 && slot->n_bytes == (long) slot->len) {
-
-		ret = TRUE;
-	} else {
-		errno = -slot->ret;
-
-		if (slot->ret == 0) {
-			fprintf(stderr, 
-				"InnoDB: Number of bytes after aio %d requested %lu\n"
-				"InnoDB: from file %s\n",
-				slot->n_bytes, slot->len, slot->name);
-		}
-
-		/* os_file_handle_error does tell us if we should retry
-		this IO. As it stands now, we don't do this retry when
-		reaping requests from a different context than
-		the dispatcher. This non-retry logic is the same for
-		windows and linux native AIO.
-		We should probably look into this to transparently
-		re-submit the IO. */
-		os_file_handle_error(slot->name, "Linux aio", __FILE__, __LINE__);
-
-		ret = FALSE;
-	}
-
-	os_mutex_exit(array->mutex);
-
-	os_aio_array_free_slot(array, slot);
-
-	return(ret);
-}
-#endif /* LINUX_NATIVE_AIO */
-
-/**********************************************************************//**
-Does simulated aio. This function should be called by an i/o-handler
-thread.
-@return	TRUE if the aio operation succeeded */
-UNIV_INTERN
-ibool
-os_aio_simulated_handle(
-/*====================*/
-	ulint	global_segment,	/*!< in: the number of the segment in the aio
-				arrays to wait for; segment 0 is the ibuf
-				i/o thread, segment 1 the log i/o thread,
-				then follow the non-ibuf read threads, and as
-				the last are the non-ibuf write threads */
-	fil_node_t**message1,	/*!< out: the messages passed with the aio
-				request; note that also in the case where
-				the aio operation failed, these output
-				parameters are valid and can be used to
-				restart the operation, for example */
-	void**	message2,
-	ulint*	type)		/*!< out: OS_FILE_WRITE or ..._READ */
-{
-	os_aio_array_t*	array;
-	ulint		segment;
-	os_aio_slot_t*	consecutive_ios[OS_AIO_MERGE_N_CONSECUTIVE];
-	ulint		n_consecutive;
-	ulint		total_len;
-	ulint		offs;
-	os_offset_t	lowest_offset;
-	ulint		biggest_age;
-	ulint		age;
-	byte*		combined_buf;
-	byte*		combined_buf2;
-	ibool		ret;
-	ibool		any_reserved;
-	ulint		n;
-	os_aio_slot_t*	aio_slot;
-
-	/* Fix compiler warning */
-	*consecutive_ios = NULL;
-
-	segment = os_aio_get_array_and_local_segment(&array, global_segment);
-
-restart:
-	/* NOTE! We only access constant fields in os_aio_array. Therefore
-	we do not have to acquire the protecting mutex yet */
-
-	srv_set_io_thread_op_info(global_segment,
-				  "looking for i/o requests (a)");
+	ut_ad(n > 0);
+	ut_ad((n % OS_FILE_LOG_BLOCK_SIZE) == 0);
+	ut_ad((offset % OS_FILE_LOG_BLOCK_SIZE) == 0);
 	ut_ad(os_aio_validate_skip());
-	ut_ad(segment < array->n_segments);
 
-	n = array->n_slots / array->n_segments;
+#ifdef WIN_ASYNC_IO
+	ut_ad((n & 0xFFFFFFFFUL) == n);
+#endif /* WIN_ASYNC_IO */
+
+	DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28",
+			mode = OS_AIO_SYNC; os_has_said_disk_full = FALSE;);
+
+	if (mode == OS_AIO_SYNC) {
+		if (type.is_read()) {
+			return(os_file_read_func(type, file, buf, offset, n));
+		}
+
+		ut_ad(type.is_write());
+
+		return(os_file_write_func(type, name, file, buf, offset, n));
+	}
+
+try_again:
+
+	AIO*	array;
+
+	array = AIO::select_slot_array(type, read_only, mode);
+
+	Slot*	slot;
+
+	slot = array->reserve_slot(type, m1, m2, file, name, buf, offset, n, write_size);
+
+	if (type.is_read()) {
+
+
+		if (srv_use_native_aio) {
+
+			++os_n_file_reads;
+
+			os_bytes_read_since_printout += n;
+#ifdef WIN_ASYNC_IO
+			ret = ReadFile(
+				file, slot->ptr, slot->len,
+				&slot->n_bytes, &slot->control);
+
+#elif defined(LINUX_NATIVE_AIO)
+			if (!array->linux_dispatch(slot)) {
+				goto err_exit;
+			}
+#endif /* WIN_ASYNC_IO */
+		} else if (type.is_wake()) {
+			AIO::wake_simulated_handler_thread(
+				AIO::get_segment_no_from_slot(array, slot));
+		}
+	} else if (type.is_write()) {
+
+		if (srv_use_native_aio) {
+			++os_n_file_writes;
+
+#ifdef WIN_ASYNC_IO
+			ret = WriteFile(
+				file, slot->ptr, slot->len,
+				&slot->n_bytes, &slot->control);
+
+#elif defined(LINUX_NATIVE_AIO)
+			if (!array->linux_dispatch(slot)) {
+				goto err_exit;
+			}
+#endif /* WIN_ASYNC_IO */
+
+		} else if (type.is_wake()) {
+			AIO::wake_simulated_handler_thread(
+				AIO::get_segment_no_from_slot(array, slot));
+		}
+	} else {
+		ut_error;
+	}
+
+#ifdef WIN_ASYNC_IO
+	if ((ret && slot->len == slot->n_bytes)
+		|| (!ret && GetLastError() == ERROR_IO_PENDING)) {
+		/* aio completed or was queued successfully! */
+		return(DB_SUCCESS);
+	}
+
+	goto err_exit;
+
+#endif /* WIN_ASYNC_IO */
+
+	/* AIO request was queued successfully! */
+	return(DB_SUCCESS);
+
+#if defined LINUX_NATIVE_AIO || defined WIN_ASYNC_IO
+err_exit:
+#endif /* LINUX_NATIVE_AIO || WIN_ASYNC_IO */
+
+	array->release_with_mutex(slot);
+
+	if (os_file_handle_error(
+		name, type.is_read() ? "aio read" : "aio write")) {
+
+		goto try_again;
+	}
+
+	return(DB_IO_ERROR);
+}
+
+/** Simulated AIO handler for reaping IO requests */
+class SimulatedAIOHandler {
+
+public:
+
+	/** Constructor
+	@param[in,out]	array	The AIO array
+	@param[in]	segment	Local segment in the array */
+	SimulatedAIOHandler(AIO* array, ulint segment)
+		:
+		m_oldest(),
+		m_n_elems(),
+		m_lowest_offset(IB_UINT64_MAX),
+		m_array(array),
+		m_n_slots(),
+		m_segment(segment),
+		m_ptr(),
+		m_buf()
+	{
+		ut_ad(m_segment < 100);
+
+		m_slots.resize(OS_AIO_MERGE_N_CONSECUTIVE);
+	}
+
+	/** Destructor */
+	~SimulatedAIOHandler()
+	{
+		if (m_ptr != NULL) {
+			ut_free(m_ptr);
+		}
+	}
+
+	/** Reset the state of the handler
+	@param[in]	n_slots	Number of pending AIO operations supported */
+	void init(ulint n_slots)
+	{
+		m_oldest = 0;
+		m_n_elems = 0;
+		m_n_slots = n_slots;
+		m_lowest_offset = IB_UINT64_MAX;
+
+		if (m_ptr != NULL) {
+			ut_free(m_ptr);
+			m_ptr = m_buf = NULL;
+		}
+
+		m_slots[0] = NULL;
+	}
+
+	/** Check if there is a slot for which the i/o has already been done
+	@param[out]	n_reserved	Number of reserved slots
+	@return the first completed slot that is found. */
+	Slot* check_completed(ulint* n_reserved)
+	{
+		ulint	offset = m_segment * m_n_slots;
+
+		*n_reserved = 0;
+
+		Slot*	slot;
+
+		slot = m_array->at(offset);
+
+		for (ulint i = 0; i < m_n_slots; ++i, ++slot) {
+
+			if (slot->is_reserved) {
+
+				if (slot->io_already_done) {
+
+					ut_a(slot->is_reserved);
+
+					return(slot);
+				}
+
+				++*n_reserved;
+			}
+		}
+
+		return(NULL);
+	}
+
+	/** If there are at least 2 seconds old requests, then pick the
+	oldest one to prevent starvation.  If several requests have the
+	same age, then pick the one at the lowest offset.
+	@return true if request was selected */
+	bool select()
+	{
+		if (!select_oldest()) {
+
+			return(select_lowest_offset());
+		}
+
+		return(true);
+	}
+
+	/** Check if there are several consecutive blocks
+	to read or write. Merge them if found. */
+	void merge()
+	{
+		/* if m_n_elems != 0, then we have assigned
+		something valid to consecutive_ios[0] */
+		ut_ad(m_n_elems != 0);
+		ut_ad(first_slot() != NULL);
+
+		Slot*	slot = first_slot();
+
+		while (!merge_adjacent(slot)) {
+			/* No op */
+		}
+	}
+
+	/** We have now collected n_consecutive I/O requests
+	in the array; allocate a single buffer which can hold
+	all data, and perform the I/O
+	@return the length of the buffer */
+	ulint allocate_buffer()
+		MY_ATTRIBUTE((warn_unused_result))
+	{
+		ulint	len;
+		Slot*	slot = first_slot();
+
+		ut_ad(m_ptr == NULL);
+
+		if (slot->type.is_read() && m_n_elems > 1) {
+
+			len = 0;
+
+			for (ulint i = 0; i < m_n_elems; ++i) {
+				len += m_slots[i]->len;
+			}
+
+			m_ptr = static_cast<byte*>(
+				ut_malloc_nokey(len + UNIV_PAGE_SIZE));
+
+			m_buf = static_cast<byte*>(
+				ut_align(m_ptr, UNIV_PAGE_SIZE));
+
+		} else {
+			len = first_slot()->len;
+			m_buf = first_slot()->buf;
+		}
+
+		return(len);
+	}
+
+	/** We have to compress the individual pages and punch
+	holes in them on a page by page basis when writing to
+	tables that can be compresed at the IO level.
+	@param[in]	len		Value returned by allocate_buffer */
+	void copy_to_buffer(ulint len)
+	{
+		Slot*	slot = first_slot();
+
+		if (len > slot->len && slot->type.is_write()) {
+
+			byte*	ptr = m_buf;
+
+			ut_ad(ptr != slot->buf);
+
+			/* Copy the buffers to the combined buffer */
+			for (ulint i = 0; i < m_n_elems; ++i) {
+
+				slot = m_slots[i];
+
+				memmove(ptr, slot->buf, slot->len);
+
+				ptr += slot->len;
+			}
+		}
+	}
+
+	/** Do the I/O with ordinary, synchronous i/o functions:
+	@param[in]	len		Length of buffer for IO */
+	void io()
+	{
+		if (first_slot()->type.is_write()) {
+
+			for (ulint i = 0; i < m_n_elems; ++i) {
+				write(m_slots[i]);
+			}
+
+		} else {
+
+			for (ulint i = 0; i < m_n_elems; ++i) {
+				read(m_slots[i]);
+			}
+		}
+	}
+
+	/** Do the decompression of the pages read in */
+	void io_complete()
+	{
+		// Note: For non-compressed tables. Not required
+		// for correctness.
+	}
+
+	/** Mark the i/os done in slots */
+	void done()
+	{
+		for (ulint i = 0; i < m_n_elems; ++i) {
+			m_slots[i]->io_already_done = true;
+		}
+	}
+
+	/** @return the first slot in the consecutive array */
+	Slot* first_slot()
+		MY_ATTRIBUTE((warn_unused_result))
+	{
+		ut_a(m_n_elems > 0);
+
+		return(m_slots[0]);
+	}
+
+	/** Wait for I/O requests
+	@param[in]	global_segment	The global segment
+	@param[in,out]	event		Wait on event if no active requests
+	@return the number of slots */
+	ulint check_pending(
+		ulint		global_segment,
+		os_event_t	event)
+		MY_ATTRIBUTE((warn_unused_result));
+private:
+
+	/** Do the file read
+	@param[in,out]	slot		Slot that has the IO context */
+	void read(Slot* slot)
+	{
+		dberr_t	err = os_file_read(
+			slot->type,
+			slot->file,
+			slot->ptr,
+			slot->offset,
+			slot->len);
+
+		ut_a(err == DB_SUCCESS);
+	}
+
+	/** Do the file read
+	@param[in,out]	slot		Slot that has the IO context */
+	void write(Slot* slot)
+	{
+		dberr_t	err = os_file_write(
+			slot->type,
+			slot->name,
+			slot->file,
+			slot->ptr,
+			slot->offset,
+			slot->len);
+
+		ut_a(err == DB_SUCCESS || err == DB_IO_NO_PUNCH_HOLE);
+	}
+
+	/** @return true if the slots are adjacent and can be merged */
+	bool adjacent(const Slot* s1, const Slot* s2) const
+	{
+		return(s1 != s2
+		       && s1->file == s2->file
+		       && s2->offset == s1->offset + s1->len
+		       && s1->type == s2->type);
+	}
+
+	/** @return true if merge limit reached or no adjacent slots found. */
+	bool merge_adjacent(Slot*& current)
+	{
+		Slot*	slot;
+		ulint	offset = m_segment * m_n_slots;
+
+		slot = m_array->at(offset);
+
+		for (ulint i = 0; i < m_n_slots; ++i, ++slot) {
+
+			if (slot->is_reserved && adjacent(current, slot)) {
+
+				current = slot;
+
+				/* Found a consecutive i/o request */
+
+				m_slots[m_n_elems] = slot;
+
+				++m_n_elems;
+
+				return(m_n_elems >= m_slots.capacity());
+			}
+		}
+
+		return(true);
+	}
+
+	/** There were no old requests. Look for an I/O request at the lowest
+	offset in the array (we ignore the high 32 bits of the offset in these
+	heuristics) */
+	bool select_lowest_offset()
+	{
+		ut_ad(m_n_elems == 0);
+
+		ulint	offset = m_segment * m_n_slots;
+
+		m_lowest_offset = IB_UINT64_MAX;
+
+		for (ulint i = 0; i < m_n_slots; ++i) {
+			Slot*	slot;
+
+			slot = m_array->at(i + offset);
+
+			if (slot->is_reserved
+			    && slot->offset < m_lowest_offset) {
+
+				/* Found an i/o request */
+				m_slots[0] = slot;
+
+				m_n_elems = 1;
+
+				m_lowest_offset = slot->offset;
+			}
+		}
+
+		return(m_n_elems > 0);
+	}
+
+	/** Select the slot if it is older than the current oldest slot.
+	@param[in]	slot		The slot to check */
+	void select_if_older(Slot* slot)
+	{
+		ulint	age;
+
+		age = (ulint) difftime(ut_time(), slot->reservation_time);
+
+		if ((age >= 2 && age > m_oldest)
+		    || (age >= 2
+			&& age == m_oldest
+			&& slot->offset < m_lowest_offset)) {
+
+			/* Found an i/o request */
+			m_slots[0] = slot;
+
+			m_n_elems = 1;
+
+			m_oldest = age;
+
+			m_lowest_offset = slot->offset;
+		}
+	}
+
+	/** Select th oldest slot in the array
+	@return true if oldest slot found */
+	bool select_oldest()
+	{
+		ut_ad(m_n_elems == 0);
+
+		Slot*	slot;
+		ulint	offset = m_n_slots * m_segment;
+
+		slot = m_array->at(offset);
+
+		for (ulint i = 0; i < m_n_slots; ++i, ++slot) {
+
+			if (slot->is_reserved) {
+				select_if_older(slot);
+			}
+		}
+
+		return(m_n_elems > 0);
+	}
+
+	typedef std::vector<Slot*> slots_t;
+
+private:
+	ulint		m_oldest;
+	ulint		m_n_elems;
+	os_offset_t	m_lowest_offset;
+
+	AIO*		m_array;
+	ulint		m_n_slots;
+	ulint		m_segment;
+
+	slots_t		m_slots;
+
+	byte*		m_ptr;
+	byte*		m_buf;
+};
+
+/** Wait for I/O requests
+@return the number of slots */
+ulint
+SimulatedAIOHandler::check_pending(
+	ulint		global_segment,
+	os_event_t	event)
+{
+	/* NOTE! We only access constant fields in os_aio_array.
+	Therefore we do not have to acquire the protecting mutex yet */
+
+	ut_ad(os_aio_validate_skip());
+
+	ut_ad(m_segment < m_array->get_n_segments());
 
 	/* Look through n slots after the segment * n'th slot */
 
-	if (array == os_aio_read_array
+	if (AIO::is_read(m_array)
 	    && os_aio_recommend_sleep_for_read_threads) {
 
-		/* Give other threads chance to add several i/os to the array
-		at once. */
+		/* Give other threads chance to add several
+		I/Os to the array at once. */
 
-		goto recommended_sleep;
+		srv_set_io_thread_op_info(
+			global_segment, "waiting for i/o request");
+
+		os_event_wait(event);
+
+		return(0);
 	}
 
-	srv_set_io_thread_op_info(global_segment,
-				  "looking for i/o requests (b)");
-
-	/* Check if there is a slot for which the i/o has already been
-	done */
-	any_reserved = FALSE;
-
-	os_mutex_enter(array->mutex);
-
-	for (ulint i = 0; i < n; i++) {
-		os_aio_slot_t*	slot;
-
-		slot = os_aio_array_get_nth_slot(array, i + segment * n);
-
-		if (!slot->reserved) {
-			continue;
-		} else if (slot->io_already_done) {
-
-			if (os_aio_print_debug) {
-				fprintf(stderr,
-					"InnoDB: i/o for slot %lu"
-					" already done, returning\n",
-					(ulong) i);
-			}
-
-			aio_slot = slot;
-			ret = TRUE;
-			goto slot_io_done;
-		} else {
-			any_reserved = TRUE;
-		}
-	}
-
-	/* There is no completed request.
-	If there is no pending request at all,
-	and the system is being shut down, exit. */
-	if (!any_reserved && srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
-		os_mutex_exit(array->mutex);
-		*message1 = NULL;
-		*message2 = NULL;
-		return(TRUE);
-	}
-
-	n_consecutive = 0;
-
-	/* If there are at least 2 seconds old requests, then pick the oldest
-	one to prevent starvation. If several requests have the same age,
-	then pick the one at the lowest offset. */
-
-	biggest_age = 0;
-	lowest_offset = IB_UINT64_MAX;
-
-	for (ulint i = 0; i < n; i++) {
-		os_aio_slot_t*	slot;
-
-		slot = os_aio_array_get_nth_slot(array, i + segment * n);
-
-		if (slot->reserved) {
-
-			age = (ulint) difftime(
-				ut_time(), slot->reservation_time);
-
-			if ((age >= 2 && age > biggest_age)
-			    || (age >= 2 && age == biggest_age
-				&& slot->offset < lowest_offset)) {
-
-				/* Found an i/o request */
-				consecutive_ios[0] = slot;
-
-				n_consecutive = 1;
-
-				biggest_age = age;
-				lowest_offset = slot->offset;
-			}
-		}
-	}
-
-	if (n_consecutive == 0) {
-		/* There were no old requests. Look for an i/o request at the
-		lowest offset in the array (we ignore the high 32 bits of the
-		offset in these heuristics) */
-
-		lowest_offset = IB_UINT64_MAX;
-
-		for (ulint i = 0; i < n; i++) {
-			os_aio_slot_t*	slot;
-
-			slot = os_aio_array_get_nth_slot(
-				array, i + segment * n);
-
-			if (slot->reserved && slot->offset < lowest_offset) {
-
-				/* Found an i/o request */
-				consecutive_ios[0] = slot;
-
-				n_consecutive = 1;
-
-				lowest_offset = slot->offset;
-			}
-		}
-	}
-
-	if (n_consecutive == 0) {
-
-		/* No i/o requested at the moment */
-
-		goto wait_for_io;
-	}
-
-	/* if n_consecutive != 0, then we have assigned
-	something valid to consecutive_ios[0] */
-	ut_ad(n_consecutive != 0);
-	ut_ad(consecutive_ios[0] != NULL);
-
-	aio_slot = consecutive_ios[0];
-
-	/* Check if there are several consecutive blocks to read or write */
-
-consecutive_loop:
-	for (ulint i = 0; i < n; i++) {
-		os_aio_slot_t*	slot;
-
-		slot = os_aio_array_get_nth_slot(array, i + segment * n);
-
-		if (slot->reserved
-		    && slot != aio_slot
-		    && slot->offset == aio_slot->offset + aio_slot->len
-		    && slot->type == aio_slot->type
-		    && slot->file == aio_slot->file) {
-
-			/* Found a consecutive i/o request */
-
-			consecutive_ios[n_consecutive] = slot;
-			n_consecutive++;
-
-			aio_slot = slot;
-
-			if (n_consecutive < OS_AIO_MERGE_N_CONSECUTIVE) {
-
-				goto consecutive_loop;
-			} else {
-				break;
-			}
-		}
-	}
-
-	srv_set_io_thread_op_info(global_segment, "consecutive i/o requests");
-
-	/* We have now collected n_consecutive i/o requests in the array;
-	allocate a single buffer which can hold all data, and perform the
-	i/o */
-
-	total_len = 0;
-	aio_slot = consecutive_ios[0];
-
-	for (ulint i = 0; i < n_consecutive; i++) {
-		total_len += consecutive_ios[i]->len;
-	}
-
-	if (n_consecutive == 1) {
-		/* We can use the buffer of the i/o request */
-		combined_buf = aio_slot->buf;
-		combined_buf2 = NULL;
-	} else {
-		combined_buf2 = static_cast<byte*>(
-			ut_malloc(total_len + UNIV_PAGE_SIZE));
-
-		ut_a(combined_buf2);
-
-		combined_buf = static_cast<byte*>(
-			ut_align(combined_buf2, UNIV_PAGE_SIZE));
-	}
-
-	/* We release the array mutex for the time of the i/o: NOTE that
-	this assumes that there is just one i/o-handler thread serving
-	a single segment of slots! */
-
-	os_mutex_exit(array->mutex);
-
-	if (aio_slot->type == OS_FILE_WRITE && n_consecutive > 1) {
-		/* Copy the buffers to the combined buffer */
-		offs = 0;
-
-		for (ulint i = 0; i < n_consecutive; i++) {
-
-			ut_memcpy(combined_buf + offs, consecutive_ios[i]->buf,
-				  consecutive_ios[i]->len);
-
-			offs += consecutive_ios[i]->len;
-		}
-	}
-
-	srv_set_io_thread_op_info(global_segment, "doing file i/o");
-
-	/* Do the i/o with ordinary, synchronous i/o functions: */
-	if (aio_slot->type == OS_FILE_WRITE) {
-		ut_ad(!srv_read_only_mode);
-		ret = os_file_write(
-			aio_slot->name, aio_slot->file, combined_buf,
-			aio_slot->offset, total_len);
-
-		DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28",
-			os_has_said_disk_full = FALSE;
-			ret = 0;
-			errno = 28;);
-
-		if (!ret) {
-			os_file_handle_error_cond_exit(aio_slot->name, "os_file_write_func", TRUE, FALSE,
-						       __FILE__, __LINE__);
-		}
-
-	} else {
-		ret = os_file_read(
-			aio_slot->file, combined_buf,
-			aio_slot->offset, total_len);
-	}
-
-	srv_set_io_thread_op_info(global_segment, "file i/o done");
-
-	if (aio_slot->type == OS_FILE_READ && n_consecutive > 1) {
-		/* Copy the combined buffer to individual buffers */
-		offs = 0;
-
-		for (ulint i = 0; i < n_consecutive; i++) {
-
-			ut_memcpy(consecutive_ios[i]->buf, combined_buf + offs,
-				  consecutive_ios[i]->len);
-			offs += consecutive_ios[i]->len;
-		}
-	}
-
-	if (combined_buf2) {
-		ut_free(combined_buf2);
-	}
-
-	os_mutex_enter(array->mutex);
-
-	/* Mark the i/os done in slots */
-
-	for (ulint i = 0; i < n_consecutive; i++) {
-		consecutive_ios[i]->io_already_done = TRUE;
-	}
-
-	/* We return the messages for the first slot now, and if there were
-	several slots, the messages will be returned with subsequent calls
-	of this function */
-
-slot_io_done:
-
-	ut_a(aio_slot->reserved);
-
-	*message1 = aio_slot->message1;
-	*message2 = aio_slot->message2;
-
-	*type = aio_slot->type;
-
-	os_mutex_exit(array->mutex);
-
-	os_aio_array_free_slot(array, aio_slot);
-
-	return(ret);
-
-wait_for_io:
-	srv_set_io_thread_op_info(global_segment, "resetting wait event");
-
-	/* We wait here until there again can be i/os in the segment
-	of this thread */
-
-	os_event_reset(os_aio_segment_wait_events[global_segment]);
-
-	os_mutex_exit(array->mutex);
-
-recommended_sleep:
-	srv_set_io_thread_op_info(global_segment, "waiting for i/o request");
-
-	os_event_wait(os_aio_segment_wait_events[global_segment]);
-
-	goto restart;
+	return(m_array->slots_per_segment());
 }
 
-/**********************************************************************//**
-Validates the consistency of an aio array.
-@return	true if ok */
+/** Does simulated AIO. This function should be called by an i/o-handler
+thread.
+
+@param[in]	segment	The number of the segment in the aio arrays to wait
+			for; segment 0 is the ibuf i/o thread, segment 1 the
+			log i/o thread, then follow the non-ibuf read threads,
+			and as the last are the non-ibuf write threads
+@param[out]	m1	the messages passed with the AIO request; note that
+			also in the case where the AIO operation failed, these
+			output parameters are valid and can be used to restart
+			the operation, for example
+@param[out]	m2	Callback argument
+@param[in]	type	IO context
+@return DB_SUCCESS or error code */
 static
-bool
-os_aio_array_validate(
-/*==================*/
-	os_aio_array_t*	array)	/*!< in: aio wait array */
+dberr_t
+os_aio_simulated_handler(
+	ulint		global_segment,
+	fil_node_t**	m1,
+	void**		m2,
+	IORequest*	type)
 {
-	ulint		i;
-	ulint		n_reserved	= 0;
+	Slot*		slot;
+	AIO*		array;
+	ulint		segment;
+	os_event_t	event = os_aio_segment_wait_events[global_segment];
 
-	os_mutex_enter(array->mutex);
+	segment = AIO::get_array_and_local_segment(&array, global_segment);
 
-	ut_a(array->n_slots > 0);
-	ut_a(array->n_segments > 0);
+	SimulatedAIOHandler	handler(array, segment);
 
-	for (i = 0; i < array->n_slots; i++) {
-		os_aio_slot_t*	slot;
+	for (;;) {
 
-		slot = os_aio_array_get_nth_slot(array, i);
+		srv_set_io_thread_op_info(
+			global_segment, "looking for i/o requests (a)");
 
-		if (slot->reserved) {
-			n_reserved++;
-			ut_a(slot->len > 0);
+		ulint	n_slots = handler.check_pending(global_segment, event);
+
+		if (n_slots == 0) {
+			continue;
 		}
+
+		handler.init(n_slots);
+
+		srv_set_io_thread_op_info(
+			global_segment, "looking for i/o requests (b)");
+
+		array->acquire();
+
+		ulint	n_reserved;
+
+		slot = handler.check_completed(&n_reserved);
+
+		if (slot != NULL) {
+
+			break;
+
+		} else if (n_reserved == 0
+			   && !buf_page_cleaner_is_active
+			   && srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
+
+			/* There is no completed request. If there
+			are no pending request at all, and the system
+			is being shut down, exit. */
+
+			array->release();
+
+			*m1 = NULL;
+
+			*m2 = NULL;
+
+			return(DB_SUCCESS);
+
+		} else if (handler.select()) {
+
+			break;
+		}
+
+		/* No I/O requested at the moment */
+
+		srv_set_io_thread_op_info(
+			global_segment, "resetting wait event");
+
+		/* We wait here until tbere are more IO requests
+		for this segment. */
+
+		os_event_reset(event);
+
+		array->release();
+
+		srv_set_io_thread_op_info(
+			global_segment, "waiting for i/o request");
+
+		os_event_wait(event);
 	}
 
-	ut_a(array->n_reserved == n_reserved);
+	/** Found a slot that has already completed its IO */
 
-	os_mutex_exit(array->mutex);
+	if (slot == NULL) {
+		/* Merge adjacent requests */
+		handler.merge();
+
+		/* Check if there are several consecutive blocks
+		to read or write */
+
+		srv_set_io_thread_op_info(
+			global_segment, "consecutive i/o requests");
+
+		// Note: We don't support write combining for simulated AIO.
+		//ulint	total_len = handler.allocate_buffer();
+
+		/* We release the array mutex for the time of the I/O: NOTE that
+		this assumes that there is just one i/o-handler thread serving
+		a single segment of slots! */
+
+		array->release();
+
+		// Note: We don't support write combining for simulated AIO.
+		//handler.copy_to_buffer(total_len);
+
+		srv_set_io_thread_op_info(global_segment, "doing file i/o");
+
+		handler.io();
+
+		srv_set_io_thread_op_info(global_segment, "file i/o done");
+
+		handler.io_complete();
+
+		array->acquire();
+
+		handler.done();
+
+		/* We return the messages for the first slot now, and if there
+		were several slots, the messages will be returned with
+		subsequent calls of this function */
+
+		slot = handler.first_slot();
+	}
+
+	ut_ad(slot->is_reserved);
+
+	*m1 = slot->m1;
+	*m2 = slot->m2;
+
+	*type = slot->type;
+
+	array->release(slot);
+
+	array->release();
+
+	return(DB_SUCCESS);
+}
+
+/** Get the total number of pending IOs
+@return the total number of pending IOs */
+ulint
+AIO::total_pending_io_count()
+{
+	ulint	count = s_reads->pending_io_count();
+
+	if (s_writes != NULL) {
+		count += s_writes->pending_io_count();
+	}
+
+	if (s_ibuf != NULL) {
+		count += s_ibuf->pending_io_count();
+	}
+
+	if (s_log != NULL) {
+		count += s_log->pending_io_count();
+	}
+
+	if (s_sync != NULL) {
+		count += s_sync->pending_io_count();
+	}
+
+	return(count);
+}
+
+/** Validates the consistency the aio system.
+@return true if ok */
+static
+bool
+os_aio_validate()
+{
+	/* The methods countds and validates, we ignore the count. */
+	AIO::total_pending_io_count();
 
 	return(true);
 }
 
-/**********************************************************************//**
-Validates the consistency the aio system.
-@return	TRUE if ok */
-UNIV_INTERN
-ibool
-os_aio_validate(void)
-/*=================*/
-{
-	os_aio_array_validate(os_aio_read_array);
-
-	if (os_aio_write_array != 0) {
-		os_aio_array_validate(os_aio_write_array);
-	}
-
-	if (os_aio_ibuf_array != 0) {
-		os_aio_array_validate(os_aio_ibuf_array);
-	}
-
-	if (os_aio_log_array != 0) {
-		os_aio_array_validate(os_aio_log_array);
-	}
-
-	if (os_aio_sync_array != 0) {
-		os_aio_array_validate(os_aio_sync_array);
-	}
-
-	return(TRUE);
-}
-
-/**********************************************************************//**
-Prints pending IO requests per segment of an aio array.
+/** Prints pending IO requests per segment of an aio array.
 We probably don't need per segment statistics but they can help us
 during development phase to see if the IO requests are being
-distributed as expected. */
-static
+distributed as expected.
+@param[in,out]	file		File where to print
+@param[in]	segments	Pending IO array */
 void
-os_aio_print_segment_info(
-/*======================*/
-	FILE*		file,	/*!< in: file where to print */
-	ulint*		n_seg,	/*!< in: pending IO array */
-	os_aio_array_t*	array)	/*!< in: array to process */
+AIO::print_segment_info(
+	FILE*		file,
+	const ulint*	segments)
 {
-	ulint	i;
+	ut_ad(m_n_segments > 0);
 
-	ut_ad(array);
-	ut_ad(n_seg);
-	ut_ad(array->n_segments > 0);
+	if (m_n_segments > 1) {
 
-	if (array->n_segments == 1) {
-		return;
-	}
+		fprintf(file, " [");
 
-	fprintf(file, " [");
-	for (i = 0; i < array->n_segments; i++) {
-		if (i != 0) {
-			fprintf(file, ", ");
+		for (ulint i = 0; i < m_n_segments; ++i, ++segments) {
+
+			if (i != 0) {
+				fprintf(file, ", ");
+			}
+
+			fprintf(file, "%lu", *segments);
 		}
 
-		fprintf(file, "%lu", n_seg[i]);
+		fprintf(file, "] ");
 	}
-	fprintf(file, "] ");
 }
 
-/**********************************************************************//**
-Prints info about the aio array. */
-UNIV_INTERN
+/** Prints info about the aio array.
+@param[in,out]	file		Where to print */
 void
-os_aio_print_array(
-/*==============*/
-	FILE*		file,	/*!< in: file where to print */
-	os_aio_array_t*	array)	/*!< in: aio array to print */
+AIO::print(FILE* file)
 {
-	ulint			n_reserved = 0;
-	ulint			n_res_seg[SRV_MAX_N_IO_THREADS];
+	ulint	count = 0;
+	ulint	n_res_seg[SRV_MAX_N_IO_THREADS];
 
-	os_mutex_enter(array->mutex);
+	mutex_enter(&m_mutex);
 
-	ut_a(array->n_slots > 0);
-	ut_a(array->n_segments > 0);
+	ut_a(!m_slots.empty());
+	ut_a(m_n_segments > 0);
 
 	memset(n_res_seg, 0x0, sizeof(n_res_seg));
 
-	for (ulint i = 0; i < array->n_slots; ++i) {
-		os_aio_slot_t*	slot;
-		ulint		seg_no;
+	for (ulint i = 0; i < m_slots.size(); ++i) {
+		Slot&	slot = m_slots[i];
+		ulint	segment = (i * m_n_segments) / m_slots.size();
 
-		slot = os_aio_array_get_nth_slot(array, i);
+		if (slot.is_reserved) {
 
-		seg_no = (i * array->n_segments) / array->n_slots;
+			++count;
 
-		if (slot->reserved) {
-			++n_reserved;
-			++n_res_seg[seg_no];
+			++n_res_seg[segment];
 
-			ut_a(slot->len > 0);
+			ut_a(slot.len > 0);
 		}
 	}
 
-	ut_a(array->n_reserved == n_reserved);
+	ut_a(m_n_reserved == count);
 
-	fprintf(file, " %lu", (ulong) n_reserved);
+	print_segment_info(file, n_res_seg);
 
-	os_aio_print_segment_info(file, n_res_seg, array);
-
-	os_mutex_exit(array->mutex);
+	mutex_exit(&m_mutex);
 }
 
-/**********************************************************************//**
-Prints info of the aio arrays. */
-UNIV_INTERN
+/** Print all the AIO segments
+@param[in,out]	file		Where to print */
 void
-os_aio_print(
-/*=========*/
-	FILE*	file)	/*!< in: file where to print */
+AIO::print_all(FILE* file)
+{
+	s_reads->print(file);
+
+	if (s_writes != NULL) {
+		fputs(", aio writes:", file);
+		s_writes->print(file);
+	}
+
+	if (s_ibuf != NULL) {
+		fputs(",\n ibuf aio reads:", file);
+		s_ibuf->print(file);
+	}
+
+	if (s_log != NULL) {
+		fputs(", log i/o's:", file);
+		s_log->print(file);
+	}
+
+	if (s_sync != NULL) {
+		fputs(", sync i/o's:", file);
+		s_sync->print(file);
+	}
+}
+
+/** Prints info of the aio arrays.
+@param[in,out]	file		file where to print */
+void
+os_aio_print(FILE*	file)
 {
 	time_t		current_time;
 	double		time_elapsed;
@@ -6031,42 +8329,22 @@ os_aio_print(
 
 	for (ulint i = 0; i < srv_n_file_io_threads; ++i) {
 		fprintf(file, "I/O thread %lu state: %s (%s)",
-			(ulong) i,
+			(ulint) i,
 			srv_io_thread_op_info[i],
 			srv_io_thread_function[i]);
 
-#ifndef __WIN__
-		if (os_aio_segment_wait_events[i]->is_set) {
+#ifndef _WIN32
+		if (os_event_is_set(os_aio_segment_wait_events[i])) {
 			fprintf(file, " ev set");
 		}
-#endif /* __WIN__ */
+#endif /* _WIN32 */
 
 		fprintf(file, "\n");
 	}
 
 	fputs("Pending normal aio reads:", file);
 
-	os_aio_print_array(file, os_aio_read_array);
-
-	if (os_aio_write_array != 0) {
-		fputs(", aio writes:", file);
-		os_aio_print_array(file, os_aio_write_array);
-	}
-
-	if (os_aio_ibuf_array != 0) {
-		fputs(",\n ibuf aio reads:", file);
-		os_aio_print_array(file, os_aio_ibuf_array);
-	}
-
-	if (os_aio_log_array != 0) {
-		fputs(", log i/o's:", file);
-		os_aio_print_array(file, os_aio_log_array);
-	}
-
-	if (os_aio_sync_array != 0) {
-		fputs(", sync i/o's:", file);
-		os_aio_print_array(file, os_aio_sync_array);
-	}
+	AIO::print_all(file);
 
 	putc('\n', file);
 	current_time = ut_time();
@@ -6075,17 +8353,17 @@ os_aio_print(
 	fprintf(file,
 		"Pending flushes (fsync) log: %lu; buffer pool: %lu\n"
 		"%lu OS file reads, %lu OS file writes, %lu OS fsyncs\n",
-		(ulong) fil_n_pending_log_flushes,
-		(ulong) fil_n_pending_tablespace_flushes,
-		(ulong) os_n_file_reads,
-		(ulong) os_n_file_writes,
-		(ulong) os_n_fsyncs);
+		(ulint) fil_n_pending_log_flushes,
+		(ulint) fil_n_pending_tablespace_flushes,
+		(ulint) os_n_file_reads,
+		(ulint) os_n_file_writes,
+		(ulint) os_n_fsyncs);
 
-	if (os_file_n_pending_preads != 0 || os_file_n_pending_pwrites != 0) {
+	if (os_n_pending_writes != 0 || os_n_pending_reads != 0) {
 		fprintf(file,
 			"%lu pending preads, %lu pending pwrites\n",
-			(ulong) os_file_n_pending_preads,
-			(ulong) os_file_n_pending_pwrites);
+			(ulint) os_n_pending_reads,
+			(ulint) os_n_pending_writes);
 	}
 
 	if (os_n_file_reads == os_n_file_reads_old) {
@@ -6100,7 +8378,7 @@ os_aio_print(
 		" %.2f writes/s, %.2f fsyncs/s\n",
 		(os_n_file_reads - os_n_file_reads_old)
 		/ time_elapsed,
-		(ulong) avg_bytes_read,
+		(ulint) avg_bytes_read,
 		(os_n_file_writes - os_n_file_writes_old)
 		/ time_elapsed,
 		(os_n_fsyncs - os_n_fsyncs_old)
@@ -6114,315 +8392,1038 @@ os_aio_print(
 	os_last_printout = current_time;
 }
 
-/**********************************************************************//**
-Refreshes the statistics used to print per-second averages. */
-UNIV_INTERN
+/** Refreshes the statistics used to print per-second averages. */
 void
-os_aio_refresh_stats(void)
-/*======================*/
+os_aio_refresh_stats()
 {
-	os_n_file_reads_old = os_n_file_reads;
-	os_n_file_writes_old = os_n_file_writes;
 	os_n_fsyncs_old = os_n_fsyncs;
+
 	os_bytes_read_since_printout = 0;
 
-	os_last_printout = time(NULL);
+	os_n_file_reads_old = os_n_file_reads;
+
+	os_n_file_writes_old = os_n_file_writes;
+
+	os_n_fsyncs_old = os_n_fsyncs;
+
+	os_bytes_read_since_printout = 0;
+
+	os_last_printout = ut_time();
+}
+
+/** Checks that all slots in the system have been freed, that is, there are
+no pending io operations.
+@return true if all free */
+bool
+os_aio_all_slots_free()
+{
+	return(AIO::total_pending_io_count() == 0);
 }
 
 #ifdef UNIV_DEBUG
-/**********************************************************************//**
-Checks that all slots in the system have been freed, that is, there are
-no pending io operations.
-@return	TRUE if all free */
-UNIV_INTERN
-ibool
-os_aio_all_slots_free(void)
-/*=======================*/
+/** Prints all pending IO for the array
+@param[in]	file	file where to print
+@param[in]	array	array to process */
+void
+AIO::to_file(FILE* file) const
 {
-	os_aio_array_t*	array;
-	ulint		n_res	= 0;
+	acquire();
 
-	array = os_aio_read_array;
+	fprintf(file, " %lu\n", static_cast<ulint>(m_n_reserved));
 
-	os_mutex_enter(array->mutex);
+	for (ulint i = 0; i < m_slots.size(); ++i) {
 
-	n_res += array->n_reserved;
+		const Slot&	slot = m_slots[i];
 
-	os_mutex_exit(array->mutex);
+		if (slot.is_reserved) {
 
-	if (!srv_read_only_mode) {
-		ut_a(os_aio_write_array == 0);
-
-		array = os_aio_write_array;
-
-		os_mutex_enter(array->mutex);
-
-		n_res += array->n_reserved;
-
-		os_mutex_exit(array->mutex);
-
-		ut_a(os_aio_ibuf_array == 0);
-
-		array = os_aio_ibuf_array;
-
-		os_mutex_enter(array->mutex);
-
-		n_res += array->n_reserved;
-
-		os_mutex_exit(array->mutex);
+			fprintf(file,
+				"%s IO for %s (offset=" UINT64PF
+				", size=%lu)\n",
+				slot.type.is_read() ? "read" : "write",
+				slot.name, slot.offset, slot.len);
+		}
 	}
 
-	ut_a(os_aio_log_array == 0);
-
-	array = os_aio_log_array;
-
-	os_mutex_enter(array->mutex);
-
-	n_res += array->n_reserved;
-
-	os_mutex_exit(array->mutex);
-
-	array = os_aio_sync_array;
-
-	os_mutex_enter(array->mutex);
-
-	n_res += array->n_reserved;
-
-	os_mutex_exit(array->mutex);
-
-	if (n_res == 0) {
-
-		return(TRUE);
-	}
-
-	return(FALSE);
+	release();
 }
+
+/** Print pending IOs for all arrays */
+void
+AIO::print_to_file(FILE* file)
+{
+	fprintf(file, "Pending normal aio reads:");
+
+	s_reads->to_file(file);
+
+	if (s_writes != NULL) {
+		fprintf(file, "Pending normal aio writes:");
+		s_writes->to_file(file);
+	}
+
+	if (s_ibuf != NULL) {
+		fprintf(file, "Pending ibuf aio reads:");
+		s_ibuf->to_file(file);
+	}
+
+	if (s_log != NULL) {
+		fprintf(file, "Pending log i/o's:");
+		s_log->to_file(file);
+	}
+
+	if (s_sync != NULL) {
+		fprintf(file, "Pending sync i/o's:");
+		s_sync->to_file(file);
+	}
+}
+
+/** Prints all pending IO
+@param[in]	file		File where to print */
+void
+os_aio_print_pending_io(
+	FILE*	file)
+{
+	AIO::print_to_file(file);
+}
+
 #endif /* UNIV_DEBUG */
 
-#ifdef _WIN32
-#include <winioctl.h>
-#ifndef FSCTL_FILE_LEVEL_TRIM
-#define FSCTL_FILE_LEVEL_TRIM  CTL_CODE(FILE_DEVICE_FILE_SYSTEM, 130, METHOD_BUFFERED, FILE_WRITE_DATA)
-typedef struct _FILE_LEVEL_TRIM_RANGE {
-  DWORDLONG Offset;
-  DWORDLONG Length;
-} FILE_LEVEL_TRIM_RANGE, *PFILE_LEVEL_TRIM_RANGE;
-
-typedef struct _FILE_LEVEL_TRIM {
-  DWORD                 Key;
-  DWORD                 NumRanges;
-  FILE_LEVEL_TRIM_RANGE Ranges[1];
-} FILE_LEVEL_TRIM, *PFILE_LEVEL_TRIM;
-#endif
-#endif
-
-/**********************************************************************//**
-Directly manipulate the allocated disk space by deallocating for the file referred to
-by fd  for  the  byte range starting at offset and continuing for len bytes.
-Within the specified range, partial file system blocks are zeroed, and whole
-file system blocks are removed from the file.  After a successful call,
-subsequent reads from  this range will return zeroes.
-@return	true if success, false if error */
-UNIV_INTERN
-ibool
-os_file_trim(
-/*=========*/
-	os_aio_slot_t*	slot) /*!< in: slot structure     */
+/**
+Set the file create umask
+@param[in]	umask		The umask to use for file creation. */
+void
+os_file_set_umask(ulint umask)
 {
+	os_innodb_umask = umask;
+}
 
-	size_t len = slot->len;
-	size_t trim_len = slot->page_size - len;
-	os_offset_t off = slot->offset + len;
-	size_t bsize = slot->file_block_size;
+#else
 
-#ifdef UNIV_TRIM_DEBUG
-	fprintf(stderr, "Note: TRIM: write_size %lu trim_len %lu len %lu off %lu bz %lu\n",
-		slot->write_size ? *slot->write_size : 0, trim_len, len, off, bsize);
+#include "univ.i"
+#include "db0err.h"
+#include "mach0data.h"
+#include "fil0fil.h"
+#include "os0file.h"
+
+#ifdef HAVE_LZ4
+#include <lz4.h>
 #endif
 
-	// Nothing to do if trim length is zero or if actual write
-	// size is initialized and it is smaller than current write size.
-	// In first write if we trim we set write_size to actual bytes
-	// written and rest of the page is trimmed. In following writes
-	// there is no need to trim again if write_size only increases
-	// because rest of the page is already trimmed. If actual write
-	// size decreases we need to trim again.
-	if (trim_len == 0 ||
-	    (slot->write_size &&
-		    *slot->write_size > 0 &&
-		    len >= *slot->write_size)) {
+#include <zlib.h>
+#ifndef UNIV_INNOCHECKSUM
+#include <my_aes.h>
+#include <my_rnd.h>
+#include <mysqld.h>
+#include <mysql/service_mysql_keyring.h>
+#endif
 
-		if (slot->write_size) {
-			if (*slot->write_size > 0 && len >= *slot->write_size) {
-				srv_stats.page_compressed_trim_op_saved.inc();
+typedef byte	Block;
+
+#ifdef MYSQL_COMPRESSION
+/** Allocate a page for sync IO
+@return pointer to page */
+static
+Block*
+os_alloc_block()
+{
+	return(reinterpret_cast<byte*>(malloc(UNIV_PAGE_SIZE_MAX * 2)));
+}
+
+/** Free a page after sync IO
+@param[in,own]	block		The block to free/release */
+static
+void
+os_free_block(Block* block)
+{
+	ut_free(block);
+}
+#endif
+#endif /* !UNIV_INNOCHECKSUM */
+
+#ifdef MYSQL_COMPRESSION
+
+/**
+@param[in]      type            The compression type
+@return the string representation */
+const char*
+Compression::to_string(Type type)
+{
+        switch(type) {
+        case NONE:
+                return("None");
+        case ZLIB:
+                return("Zlib");
+        case LZ4:
+                return("LZ4");
+        }
+
+        ut_ad(0);
+
+        return("<UNKNOWN>");
+}
+
+/**
+@param[in]      meta		Page Meta data
+@return the string representation */
+std::string Compression::to_string(const Compression::meta_t& meta)
+{
+	std::ostringstream	stream;
+
+	stream	<< "version: " << int(meta.m_version) << " "
+		<< "algorithm: " << meta.m_algorithm << " "
+		<< "(" << to_string(meta.m_algorithm) << ") "
+		<< "orginal_type: " << meta.m_original_type << " "
+		<< "original_size: " << meta.m_original_size << " "
+		<< "compressed_size: " << meta.m_compressed_size;
+
+	return(stream.str());
+}
+
+/** @return true if it is a compressed page */
+bool
+Compression::is_compressed_page(const byte* page)
+{
+	return(mach_read_from_2(page + FIL_PAGE_TYPE) == FIL_PAGE_COMPRESSED);
+}
+
+/** Deserizlise the page header compression meta-data
+@param[in]	page		Pointer to the page header
+@param[out]	control		Deserialised data */
+void
+Compression::deserialize_header(
+	const byte*		page,
+	Compression::meta_t*	control)
+{
+	ut_ad(is_compressed_page(page));
+
+	control->m_version = static_cast<uint8_t>(
+		mach_read_from_1(page + FIL_PAGE_VERSION));
+
+	control->m_original_type = static_cast<uint16_t>(
+		mach_read_from_2(page + FIL_PAGE_ORIGINAL_TYPE_V1));
+
+	control->m_compressed_size = static_cast<uint16_t>(
+		mach_read_from_2(page + FIL_PAGE_COMPRESS_SIZE_V1));
+
+	control->m_original_size = static_cast<uint16_t>(
+		mach_read_from_2(page + FIL_PAGE_ORIGINAL_SIZE_V1));
+
+	control->m_algorithm = static_cast<Type>(
+		mach_read_from_1(page + FIL_PAGE_ALGORITHM_V1));
+}
+
+/** Decompress the page data contents. Page type must be FIL_PAGE_COMPRESSED, if
+not then the source contents are left unchanged and DB_SUCCESS is returned.
+@param[in]	dblwr_recover	true of double write recovery in progress
+@param[in,out]	src		Data read from disk, decompressed data will be
+				copied to this page
+@param[in,out]	dst		Scratch area to use for decompression
+@param[in]	dst_len		Size of the scratch area in bytes
+@return DB_SUCCESS or error code */
+dberr_t
+Compression::deserialize(
+	bool		dblwr_recover,
+	byte*		src,
+	byte*		dst,
+	ulint		dst_len)
+{
+	if (!is_compressed_page(src)) {
+		/* There is nothing we can do. */
+		return(DB_SUCCESS);
+	}
+
+	meta_t	header;
+
+	deserialize_header(src, &header);
+
+	byte*	ptr = src + FIL_PAGE_DATA;
+
+	ut_ad(header.m_version == 1);
+
+	if (header.m_version != 1
+	    || header.m_original_size < UNIV_PAGE_SIZE_MIN - (FIL_PAGE_DATA + 8)
+	    || header.m_original_size > UNIV_PAGE_SIZE_MAX - FIL_PAGE_DATA
+	    || dst_len < header.m_original_size + FIL_PAGE_DATA) {
+
+		/* The last check could potentially return DB_OVERFLOW,
+		the caller should be able to retry with a larger buffer. */
+
+		return(DB_CORRUPTION);
+	}
+
+	Block*	block;
+
+	/* The caller doesn't know what to expect */
+	if (dst == NULL) {
+
+		block = os_alloc_block();
+
+#ifdef UNIV_INNOCHECKSUM
+		dst = block;
+#else
+		dst = block->m_ptr;
+#endif /* UNIV_INNOCHECKSUM */
+
+	} else {
+		block = NULL;
+	}
+
+	int		ret;
+	Compression	compression;
+	ulint		len = header.m_original_size;
+
+	compression.m_type = static_cast<Compression::Type>(header.m_algorithm);
+
+	switch(compression.m_type) {
+	case Compression::ZLIB: {
+
+		uLongf	zlen = header.m_original_size;
+
+		if (uncompress(dst, &zlen, ptr, header.m_compressed_size)
+		    != Z_OK) {
+
+			if (block != NULL) {
+				os_free_block(block);
 			}
 
-			*slot->write_size = len;
+			return(DB_IO_DECOMPRESS_FAIL);
 		}
 
-		return (TRUE);
+		len = static_cast<ulint>(zlen);
+
+		break;
 	}
+#ifdef HAVE_LZ4
+	case Compression::LZ4: {
+		int		ret;
 
-#ifdef __linux__
-#if defined(HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE)
-	int ret = fallocate(slot->file, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, off, trim_len);
+		if (dblwr_recover) {
 
-	if (ret) {
-		/* After first failure do not try to trim again */
-		os_fallocate_failed = TRUE;
-		srv_use_trim = FALSE;
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			"  InnoDB: Warning: fallocate call failed with error code %d.\n"
-			"  InnoDB: start: %lu len: %lu payload: %lu\n"
-			"  InnoDB: Disabling fallocate for now.\n", errno, (ulong) off, (ulong) trim_len, (ulong) len);
+			ret = LZ4_decompress_safe(
+				reinterpret_cast<char*>(ptr),
+				reinterpret_cast<char*>(dst),
+				header.m_compressed_size,
+				header.m_original_size);
 
-		os_file_handle_error_no_exit(slot->name,
-			" fallocate(FALLOC_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE) ",
-			FALSE, __FILE__, __LINE__);
+		} else {
 
-		if (slot->write_size) {
-			*slot->write_size = 0;
+			/* This can potentially read beyond the input
+			buffer if the data is malformed. According to
+			the LZ4 documentation it is a little faster
+			than the above function. When recovering from
+			the double write buffer we can afford to us the
+			slower function above. */
+
+			ret = LZ4_decompress_fast(
+				reinterpret_cast<char*>(ptr),
+				reinterpret_cast<char*>(dst),
+				header.m_original_size);
 		}
 
-		return (FALSE);
-	} else {
-		if (slot->write_size) {
-			*slot->write_size = len;
+		if (ret < 0) {
+
+			if (block != NULL) {
+				os_free_block(block);
+			}
+
+			return(DB_IO_DECOMPRESS_FAIL);
 		}
-	}
-#else
-	ut_print_timestamp(stderr);
-	fprintf(stderr,
-		"  InnoDB: Warning: fallocate not supported on this installation."
-		"  InnoDB: Disabling fallocate for now.");
-	os_fallocate_failed = TRUE;
-	srv_use_trim = FALSE;
-	if (slot->write_size) {
-		*slot->write_size = 0;
-	}
 
-#endif /* HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE ... */
-
-#elif defined(_WIN32)
-	FILE_LEVEL_TRIM flt;
-	flt.Key = 0;
-	flt.NumRanges = 1;
-	flt.Ranges[0].Offset = off;
-	flt.Ranges[0].Length = trim_len;
-
-	BOOL ret = DeviceIoControl(slot->file, FSCTL_FILE_LEVEL_TRIM,
-		&flt, sizeof(flt), NULL, NULL, NULL, NULL);
-
-	if (!ret) {
-		/* After first failure do not try to trim again */
-		os_fallocate_failed = TRUE;
-		srv_use_trim=FALSE;
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			"  InnoDB: Warning: fallocate call failed with error.\n"
-			"  InnoDB: start: %lu len: %lu payload: %lu\n"
-			"  InnoDB: Disabling fallocate for now.\n", off, trim_len, len);
-
-		os_file_handle_error_no_exit(slot->name,
-			" DeviceIOControl(FSCTL_FILE_LEVEL_TRIM) ",
-			FALSE, __FILE__, __LINE__);
-
-		if (slot->write_size) {
-			*slot->write_size = 0;
-		}
-		return (FALSE);
-	} else {
-		if (slot->write_size) {
-			*slot->write_size = len;
-		}
+		break;
 	}
 #endif
-
-	switch(bsize) {
-	case 512:
-		srv_stats.page_compression_trim_sect512.add((trim_len / bsize));
-		break;
-	case 1024:
-		srv_stats.page_compression_trim_sect1024.add((trim_len / bsize));
-		break;
-	case 2948:
-		srv_stats.page_compression_trim_sect2048.add((trim_len / bsize));
-		break;
-	case 4096:
-		srv_stats.page_compression_trim_sect4096.add((trim_len / bsize));
-		break;
-	case 8192:
-		srv_stats.page_compression_trim_sect8192.add((trim_len / bsize));
-		break;
-	case 16384:
-		srv_stats.page_compression_trim_sect16384.add((trim_len / bsize));
-		break;
-	case 32768:
-		srv_stats.page_compression_trim_sect32768.add((trim_len / bsize));
-		break;
 	default:
+#if !defined(UNIV_INNOCHECKSUM)
+		ib::error()
+			<< "Compression algorithm support missing: "
+			<< Compression::to_string(compression.m_type);
+#else
+		fprintf(stderr, "Compression algorithm support missing: %s\n",
+			Compression::to_string(compression.m_type));
+#endif /* !UNIV_INNOCHECKSUM */
+
+		if (block != NULL) {
+			os_free_block(block);
+		}
+
+		return(DB_UNSUPPORTED);
+	}
+	/* Leave the header alone */
+	memmove(src + FIL_PAGE_DATA, dst, len);
+
+	mach_write_to_2(src + FIL_PAGE_TYPE, header.m_original_type);
+
+	ut_ad(dblwr_recover
+	      || memcmp(src + FIL_PAGE_LSN + 4,
+			src + (header.m_original_size + FIL_PAGE_DATA)
+			- FIL_PAGE_END_LSN_OLD_CHKSUM + 4, 4) == 0);
+
+	if (block != NULL) {
+		os_free_block(block);
+	}
+
+	return(DB_SUCCESS);
+}
+
+/** Decompress the page data contents. Page type must be FIL_PAGE_COMPRESSED, if
+not then the source contents are left unchanged and DB_SUCCESS is returned.
+@param[in]	dblwr_recover	true of double write recovery in progress
+@param[in,out]	src		Data read from disk, decompressed data will be
+				copied to this page
+@param[in,out]	dst		Scratch area to use for decompression
+@param[in]	dst_len		Size of the scratch area in bytes
+@return DB_SUCCESS or error code */
+dberr_t
+os_file_decompress_page(
+	bool		dblwr_recover,
+	byte*		src,
+	byte*		dst,
+	ulint		dst_len)
+{
+	return(Compression::deserialize(dblwr_recover, src, dst, dst_len));
+}
+#endif /* MYSQL_COMPRESSION */
+
+#ifdef MYSQL_ENCRYPTION
+
+/**
+@param[in]      type            The encryption type
+@return the string representation */
+const char*
+Encryption::to_string(Type type)
+{
+        switch(type) {
+        case NONE:
+                return("N");
+        case AES:
+                return("Y");
+        }
+
+        ut_ad(0);
+
+        return("<UNKNOWN>");
+}
+
+/** Generate random encryption value for key and iv.
+@param[in,out]	value	Encryption value */
+void Encryption::random_value(byte* value)
+{
+	ut_ad(value != NULL);
+
+	my_rand_buffer(value, ENCRYPTION_KEY_LEN);
+}
+
+/** Create new master key for key rotation.
+@param[in,out]	master_key	master key */
+void
+Encryption::create_master_key(byte** master_key)
+{
+#ifndef UNIV_INNOCHECKSUM
+	char*	key_type = NULL;
+	size_t	key_len;
+	char	key_name[ENCRYPTION_MASTER_KEY_NAME_MAX_LEN];
+	int	ret;
+
+	/* If uuid does not match with current server uuid,
+	set uuid as current server uuid. */
+	if (strcmp(uuid, server_uuid) != 0) {
+		memcpy(uuid, server_uuid, ENCRYPTION_SERVER_UUID_LEN);
+	}
+	memset(key_name, 0, ENCRYPTION_MASTER_KEY_NAME_MAX_LEN);
+
+	/* Generate new master key */
+	ut_snprintf(key_name, ENCRYPTION_MASTER_KEY_NAME_MAX_LEN,
+		    "%s-%s-%lu", ENCRYPTION_MASTER_KEY_PRIFIX,
+		    uuid, master_key_id + 1);
+
+	/* We call key ring API to generate master key here. */
+	ret = my_key_generate(key_name, "AES",
+			      NULL, ENCRYPTION_KEY_LEN);
+
+	/* We call key ring API to get master key here. */
+	ret = my_key_fetch(key_name, &key_type, NULL,
+			   reinterpret_cast<void**>(master_key),
+			   &key_len);
+
+	if (ret || *master_key == NULL) {
+		ib::error() << "Encryption can't find master key, please check"
+				" the keyring plugin is loaded.";
+		*master_key = NULL;
+	} else {
+		master_key_id++;
+	}
+
+	if (key_type) {
+		my_free(key_type);
+	}
+#endif
+}
+
+/** Get master key by key id.
+@param[in]	master_key_id	master key id
+@param[in]	srv_uuid	uuid of server instance
+@param[in,out]	master_key	master key */
+void
+Encryption::get_master_key(ulint master_key_id,
+			   char* srv_uuid,
+			   byte** master_key)
+{
+#ifndef UNIV_INNOCHECKSUM
+	char*	key_type = NULL;
+	size_t	key_len;
+	char	key_name[ENCRYPTION_MASTER_KEY_NAME_MAX_LEN];
+	int	ret;
+
+	memset(key_name, 0, ENCRYPTION_MASTER_KEY_NAME_MAX_LEN);
+
+	if (srv_uuid != NULL) {
+		ut_snprintf(key_name, ENCRYPTION_MASTER_KEY_NAME_MAX_LEN,
+			    "%s-%s-%lu", ENCRYPTION_MASTER_KEY_PRIFIX,
+			    srv_uuid, master_key_id);
+	} else {
+		/* For compitable with 5.7.11, we need to get master key with
+		server id. */
+		memset(key_name, 0, ENCRYPTION_MASTER_KEY_NAME_MAX_LEN);
+		ut_snprintf(key_name, ENCRYPTION_MASTER_KEY_NAME_MAX_LEN,
+			    "%s-%lu-%lu", ENCRYPTION_MASTER_KEY_PRIFIX,
+			    server_id, master_key_id);
+	}
+
+	/* We call key ring API to get master key here. */
+	ret = my_key_fetch(key_name, &key_type, NULL,
+			   reinterpret_cast<void**>(master_key), &key_len);
+
+	if (key_type) {
+		my_free(key_type);
+	}
+
+	if (ret) {
+		*master_key = NULL;
+		ib::error() << "Encryption can't find master key, please check"
+				" the keyring plugin is loaded.";
+	}
+
+#ifdef UNIV_ENCRYPT_DEBUG
+	if (!ret && *master_key) {
+		fprintf(stderr, "Fetched master key:%lu ", master_key_id);
+		ut_print_buf(stderr, *master_key, key_len);
+		fprintf(stderr, "\n");
+	}
+#endif /* DEBUG_TDE */
+
+#endif
+}
+
+/** Current master key id */
+ulint	Encryption::master_key_id = 0;
+
+/** Current uuid of server instance */
+char	Encryption::uuid[ENCRYPTION_SERVER_UUID_LEN + 1] = {0};
+
+/** Get current master key and master key id
+@param[in,out]	master_key_id	master key id
+@param[in,out]	master_key	master key
+@param[in,out]	version		encryption information version */
+void
+Encryption::get_master_key(ulint* master_key_id,
+			   byte** master_key,
+			   Encryption::Version*  version)
+{
+#ifndef UNIV_INNOCHECKSUM
+	char*	key_type = NULL;
+	size_t	key_len;
+	char	key_name[ENCRYPTION_MASTER_KEY_NAME_MAX_LEN];
+	int	ret;
+
+	memset(key_name, 0, ENCRYPTION_KEY_LEN);
+	*version = Encryption::ENCRYPTION_VERSION_2;
+
+	if (Encryption::master_key_id == 0) {
+		/* If m_master_key is 0, means there's no encrypted
+		tablespace, we need to generate the first master key,
+		and store it to key ring. */
+		memset(uuid, 0, ENCRYPTION_SERVER_UUID_LEN + 1);
+		memcpy(uuid, server_uuid, ENCRYPTION_SERVER_UUID_LEN);
+
+		/* Prepare the server uuid. */
+		ut_snprintf(key_name, ENCRYPTION_MASTER_KEY_NAME_MAX_LEN,
+			    "%s-%s-1", ENCRYPTION_MASTER_KEY_PRIFIX,
+			    uuid);
+
+		/* We call key ring API to generate master key here. */
+		ret = my_key_generate(key_name, "AES",
+				      NULL, ENCRYPTION_KEY_LEN);
+
+		/* We call key ring API to get master key here. */
+		ret = my_key_fetch(key_name, &key_type, NULL,
+				   reinterpret_cast<void**>(master_key),
+				   &key_len);
+
+		if (!ret && *master_key != NULL) {
+			Encryption::master_key_id++;
+			*master_key_id = Encryption::master_key_id;
+		}
+#ifdef UNIV_ENCRYPT_DEBUG
+		if (!ret && *master_key) {
+			fprintf(stderr, "Generated new master key:");
+			ut_print_buf(stderr, *master_key, key_len);
+			fprintf(stderr, "\n");
+		}
+#endif
+	} else {
+		*master_key_id = Encryption::master_key_id;
+
+		ut_snprintf(key_name, ENCRYPTION_MASTER_KEY_NAME_MAX_LEN,
+			    "%s-%s-%lu", ENCRYPTION_MASTER_KEY_PRIFIX,
+			    uuid, *master_key_id);
+
+		/* We call key ring API to get master key here. */
+		ret = my_key_fetch(key_name, &key_type, NULL,
+				   reinterpret_cast<void**>(master_key),
+				   &key_len);
+
+		/* For compitable with 5.7.11, we need to try to get master key with
+		server id when get master key with server uuid failure. */
+		if (ret || *master_key == NULL) {
+			if (key_type) {
+				my_free(key_type);
+			}
+
+			memset(key_name, 0,
+			       ENCRYPTION_MASTER_KEY_NAME_MAX_LEN);
+			ut_snprintf(key_name, ENCRYPTION_MASTER_KEY_NAME_MAX_LEN,
+				    "%s-%lu-%lu", ENCRYPTION_MASTER_KEY_PRIFIX,
+				    server_id, *master_key_id);
+
+			ret = my_key_fetch(key_name, &key_type, NULL,
+					   reinterpret_cast<void**>(master_key),
+					   &key_len);
+			*version = Encryption::ENCRYPTION_VERSION_1;
+		}
+#ifdef UNIV_ENCRYPT_DEBUG
+		if (!ret && *master_key) {
+			fprintf(stderr, "Fetched master key:%lu ",
+				*master_key_id);
+			ut_print_buf(stderr, *master_key, key_len);
+			fprintf(stderr, "\n");
+		}
+#endif
+	}
+
+	if (ret) {
+		*master_key = NULL;
+		ib::error() << "Encryption can't find master key, please check"
+				" the keyring plugin is loaded.";
+	}
+
+	if (key_type) {
+		my_free(key_type);
+	}
+#endif
+}
+
+/** Check if page is encrypted page or not
+@param[in]	page	page which need to check
+@return true if it is a encrypted page */
+bool
+Encryption::is_encrypted_page(const byte* page)
+{
+	ulint	page_type = mach_read_from_2(page + FIL_PAGE_TYPE);
+
+	return(page_type == FIL_PAGE_ENCRYPTED
+	       || page_type == FIL_PAGE_COMPRESSED_AND_ENCRYPTED
+	       || page_type == FIL_PAGE_ENCRYPTED_RTREE);
+}
+
+/** Encrypt the page data contents. Page type can't be
+FIL_PAGE_ENCRYPTED, FIL_PAGE_COMPRESSED_AND_ENCRYPTED,
+FIL_PAGE_ENCRYPTED_RTREE.
+@param[in]	type		IORequest
+@param[in,out]	src		page data which need to encrypt
+@param[in]	src_len		Size of the source in bytes
+@param[in,out]	dst		destination area
+@param[in,out]	dst_len		Size of the destination in bytes
+@return buffer data, dst_len will have the length of the data */
+byte*
+Encryption::encrypt(
+	const IORequest&	type,
+	byte*			src,
+	ulint			src_len,
+	byte*			dst,
+	ulint*			dst_len)
+{
+	ulint		len = 0;
+	ulint		page_type = mach_read_from_2(src + FIL_PAGE_TYPE);
+	ulint		data_len;
+	ulint		main_len;
+	ulint		remain_len;
+	byte		remain_buf[MY_AES_BLOCK_SIZE * 2];
+
+#ifdef UNIV_ENCRYPT_DEBUG
+	ulint space_id =
+		mach_read_from_4(src + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
+	ulint page_no = mach_read_from_4(src + FIL_PAGE_OFFSET);
+
+	fprintf(stderr, "Encrypting page:%lu.%lu len:%lu\n",
+		space_id, page_no, src_len);
+#endif
+
+	/* Shouldn't encrypte an already encrypted page. */
+	ut_ad(page_type != FIL_PAGE_ENCRYPTED
+	      && page_type != FIL_PAGE_COMPRESSED_AND_ENCRYPTED
+	      && page_type != FIL_PAGE_ENCRYPTED_RTREE);
+
+	ut_ad(m_type != Encryption::NONE);
+
+	/* This is data size which need to encrypt. */
+	data_len = src_len - FIL_PAGE_DATA;
+	main_len = (data_len / MY_AES_BLOCK_SIZE) * MY_AES_BLOCK_SIZE;
+	remain_len = data_len - main_len;
+
+	/* Only encrypt the data + trailer, leave the header alone */
+
+	switch (m_type) {
+	case Encryption::NONE:
+		ut_error;
+
+	case Encryption::AES: {
+		lint			elen;
+
+		ut_ad(m_klen == ENCRYPTION_KEY_LEN);
+
+		elen = my_aes_encrypt(
+			src + FIL_PAGE_DATA,
+			static_cast<uint32>(main_len),
+			dst + FIL_PAGE_DATA,
+			reinterpret_cast<unsigned char*>(m_key),
+			static_cast<uint32>(m_klen),
+			my_aes_256_cbc,
+			reinterpret_cast<unsigned char*>(m_iv),
+			false);
+
+		if (elen == MY_AES_BAD_DATA) {
+			ulint	page_no =mach_read_from_4(
+				src + FIL_PAGE_OFFSET);
+			ulint	space_id = mach_read_from_4(
+				src + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
+			*dst_len = src_len;
+#ifndef UNIV_INNOCHECKSUM
+				ib::warn()
+					<< " Can't encrypt data of page,"
+					<< " page no:" << page_no
+					<< " space id:" << space_id;
+#else
+				fprintf(stderr, " Can't encrypt data of page,"
+					" page no:" ULINTPF
+					" space id:" ULINTPF,
+					page_no, space_id);
+#endif /* !UNIV_INNOCHECKSUM */
+			return(src);
+		}
+
+		len = static_cast<ulint>(elen);
+		ut_ad(len == main_len);
+
+		/* Copy remain bytes and page tailer. */
+		memcpy(dst + FIL_PAGE_DATA + len,
+		       src + FIL_PAGE_DATA + len,
+		       src_len - FIL_PAGE_DATA - len);
+
+		/* Encrypt the remain bytes. */
+		if (remain_len != 0) {
+			remain_len = MY_AES_BLOCK_SIZE * 2;
+
+			elen = my_aes_encrypt(
+				dst + FIL_PAGE_DATA + data_len - remain_len,
+				static_cast<uint32>(remain_len),
+				remain_buf,
+				reinterpret_cast<unsigned char*>(m_key),
+				static_cast<uint32>(m_klen),
+				my_aes_256_cbc,
+				reinterpret_cast<unsigned char*>(m_iv),
+				false);
+
+			if (elen == MY_AES_BAD_DATA) {
+				ulint	page_no =mach_read_from_4(
+					src + FIL_PAGE_OFFSET);
+				ulint	space_id = mach_read_from_4(
+					src + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
+#ifndef UNIV_INNOCHECKSUM
+				ib::warn()
+					<< " Can't encrypt data of page,"
+					<< " page no:" << page_no
+					<< " space id:" << space_id;
+#else
+				fprintf(stderr, " Can't encrypt data of page,"
+					" page no:" ULINTPF
+					" space id:" ULINTPF,
+					page_no, space_id);
+#endif /* !UNIV_INNOCHECKSUM */
+				*dst_len = src_len;
+				return(src);
+			}
+
+			memcpy(dst + FIL_PAGE_DATA + data_len - remain_len,
+			       remain_buf, remain_len);
+		}
+
+
 		break;
 	}
 
-	srv_stats.page_compressed_trim_op.inc();
+	default:
+		ut_error;
+	}
 
-	return (TRUE);
+	/* Copy the header as is. */
+	memmove(dst, src, FIL_PAGE_DATA);
+	ut_ad(memcmp(src, dst, FIL_PAGE_DATA) == 0);
 
+	/* Add encryption control information. Required for decrypting. */
+	if (page_type == FIL_PAGE_COMPRESSED) {
+		/* If the page is compressed, we don't need to save the
+		original type, since it is done in compression already. */
+		mach_write_to_2(dst + FIL_PAGE_TYPE,
+				FIL_PAGE_COMPRESSED_AND_ENCRYPTED);
+		ut_ad(memcmp(src+FIL_PAGE_TYPE+2,
+			     dst+FIL_PAGE_TYPE+2,
+			     FIL_PAGE_DATA-FIL_PAGE_TYPE-2) == 0);
+	} else if (page_type == FIL_PAGE_RTREE) {
+		/* If the page is R-tree page, we need to save original
+		type. */
+		mach_write_to_2(dst + FIL_PAGE_TYPE, FIL_PAGE_ENCRYPTED_RTREE);
+	} else{
+		mach_write_to_2(dst + FIL_PAGE_TYPE, FIL_PAGE_ENCRYPTED);
+		mach_write_to_2(dst + FIL_PAGE_ORIGINAL_TYPE_V1, page_type);
+	}
+
+#ifdef UNIV_ENCRYPT_DEBUG
+#ifndef UNIV_INNOCHECKSUM
+#if 0
+	byte*	check_buf = static_cast<byte*>(ut_malloc_nokey(src_len));
+	byte*	buf2 = static_cast<byte*>(ut_malloc_nokey(src_len));
+
+	memcpy(check_buf, dst, src_len);
+
+	dberr_t err = decrypt(type, check_buf, src_len, buf2, src_len);
+	if (err != DB_SUCCESS || memcmp(src + FIL_PAGE_DATA,
+					check_buf + FIL_PAGE_DATA,
+					src_len - FIL_PAGE_DATA) != 0) {
+		ut_print_buf(stderr, src, src_len);
+		ut_print_buf(stderr, check_buf, src_len);
+		ut_ad(0);
+	}
+	ut_free(buf2);
+	ut_free(check_buf);
+#endif
+	fprintf(stderr, "Encrypted page:%lu.%lu\n", space_id, page_no);
+#endif
+#endif
+	*dst_len = src_len;
+
+
+	return(dst);
 }
-#endif /* !UNIV_HOTBACKUP */
 
-/***********************************************************************//**
-Try to get number of bytes per sector from file system.
-@return	file block size */
-UNIV_INTERN
-ulint
-os_file_get_block_size(
-/*===================*/
-	os_file_t	file,	/*!< in: handle to a file */
-	const char*	name)	/*!< in: file name */
+/** Decrypt the page data contents. Page type must be FIL_PAGE_ENCRYPTED,
+if not then the source contents are left unchanged and DB_SUCCESS is returned.
+@param[in]	type		IORequest
+@param[in,out]	src		Data read from disk, decrypted data will be
+				copied to this page
+@param[in]	src_len		source data length
+@param[in,out]	dst		Scratch area to use for decryption
+@param[in]	dst_len		Size of the scratch area in bytes
+@return DB_SUCCESS or error code */
+dberr_t
+Encryption::decrypt(
+	const IORequest&	type,
+	byte*			src,
+	ulint			src_len,
+	byte*			dst,
+	ulint			dst_len)
 {
-	ulint		fblock_size = 512;
+	ulint		data_len;
+	ulint		main_len;
+	ulint		remain_len;
+	ulint		original_type;
+	ulint		page_type;
+	byte		remain_buf[MY_AES_BLOCK_SIZE * 2];
+	Block*		block;
 
-#if defined(UNIV_LINUX) && defined(HAVE_SYS_STATVFS_H)
-	struct statvfs  fstat;
-	int		err;
+	/* Do nothing if it's not an encrypted table. */
+	if (!is_encrypted_page(src)) {
+		return(DB_SUCCESS);
+	}
 
-	err = fstatvfs(file, &fstat);
+	/* For compressed page, we need to get the compressed size
+	for decryption */
+	page_type = mach_read_from_2(src + FIL_PAGE_TYPE);
+	if (page_type == FIL_PAGE_COMPRESSED_AND_ENCRYPTED) {
+		src_len = static_cast<uint16_t>(
+			mach_read_from_2(src + FIL_PAGE_COMPRESS_SIZE_V1))
+			+ FIL_PAGE_DATA;
+#ifndef UNIV_INNOCHECKSUM
+		src_len = ut_calc_align(src_len, type.block_size());
+#endif
+	}
+#ifdef UNIV_ENCRYPT_DEBUG
+	ulint space_id =
+		mach_read_from_4(src + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
+	ulint page_no = mach_read_from_4(src + FIL_PAGE_OFFSET);
+
+	fprintf(stderr, "Decrypting page:%lu.%lu len:%lu\n",
+		space_id, page_no, src_len);
+#endif
+
+	original_type = static_cast<uint16_t>(
+		mach_read_from_2(src + FIL_PAGE_ORIGINAL_TYPE_V1));
+
+	byte*	ptr = src + FIL_PAGE_DATA;
+
+	/* The caller doesn't know what to expect */
+	if (dst == NULL) {
+
+		block = os_alloc_block();
+#ifdef UNIV_INNOCHECKSUM
+		dst = block;
+#else
+		dst = block->m_ptr;
+#endif /* UNIV_INNOCHECKSUM */
 
-	if (err != 0) {
-		fprintf(stderr, "InnoDB: Warning: fstatvfs() failed on file %s\n", name);
-		os_file_handle_error_no_exit(name, "fstatvfs()", FALSE, __FILE__, __LINE__);
 	} else {
-		fblock_size = fstat.f_bsize;
+		block = NULL;
 	}
-#endif /* UNIV_LINUX */
-#ifdef __WIN__
-	{
-		DWORD SectorsPerCluster = 0;
-		DWORD BytesPerSector = 0;
-		DWORD NumberOfFreeClusters = 0;
-		DWORD TotalNumberOfClusters = 0;
 
-		/*
-		if (GetFreeSpace((LPCTSTR)name, &SectorsPerCluster, &BytesPerSector, &NumberOfFreeClusters, &TotalNumberOfClusters)) {
-			fblock_size = BytesPerSector;
+	data_len = src_len - FIL_PAGE_DATA;
+	main_len = (data_len / MY_AES_BLOCK_SIZE) * MY_AES_BLOCK_SIZE;
+	remain_len = data_len - main_len;
+
+	switch(m_type) {
+	case Encryption::AES: {
+		lint			elen;
+
+		/* First decrypt the last 2 blocks data of data, since
+		data is no block aligned. */
+		if (remain_len != 0) {
+			ut_ad(m_klen == ENCRYPTION_KEY_LEN);
+
+			remain_len = MY_AES_BLOCK_SIZE * 2;
+
+			/* Copy the last 2 blocks. */
+			memcpy(remain_buf,
+			       ptr + data_len - remain_len,
+			       remain_len);
+
+			elen = my_aes_decrypt(
+				remain_buf,
+				static_cast<uint32>(remain_len),
+				dst + data_len - remain_len,
+				reinterpret_cast<unsigned char*>(m_key),
+				static_cast<uint32>(m_klen),
+				my_aes_256_cbc,
+				reinterpret_cast<unsigned char*>(m_iv),
+				false);
+			if (elen == MY_AES_BAD_DATA) {
+				if (block != NULL) {
+					os_free_block(block);
+				}
+
+				return(DB_IO_DECRYPT_FAIL);
+			}
+
+			/* Copy the other data bytes to temp area. */
+			memcpy(dst, ptr, data_len - remain_len);
 		} else {
-			fprintf(stderr, "InnoDB: Warning: GetFreeSpace() failed on file %s\n", name);
-			os_file_handle_error_no_exit(name, "GetFreeSpace()", FALSE, __FILE__, __LINE__);
-		}
-		*/
-	}
-#endif /* __WIN__*/
+			ut_ad(data_len == main_len);
 
-	/* Currently we support file block size up to 4Kb */
-	if (fblock_size > 4096 || fblock_size < 512) {
-		if (fblock_size < 512) {
-			fblock_size = 512;
-		} else {
-			fblock_size = 4096;
+			/* Copy the data bytes to temp area. */
+			memcpy(dst, ptr, data_len);
 		}
+
+		/* Then decrypt the main data */
+		elen = my_aes_decrypt(
+				dst,
+				static_cast<uint32>(main_len),
+				ptr,
+				reinterpret_cast<unsigned char*>(m_key),
+				static_cast<uint32>(m_klen),
+				my_aes_256_cbc,
+				reinterpret_cast<unsigned char*>(m_iv),
+				false);
+		if (elen == MY_AES_BAD_DATA) {
+
+			if (block != NULL) {
+				os_free_block(block);
+			}
+
+			return(DB_IO_DECRYPT_FAIL);
+		}
+
+		ut_ad(static_cast<ulint>(elen) == main_len);
+
+		/* Copy the remain bytes. */
+		memcpy(ptr + main_len, dst + main_len, data_len - main_len);
+
+		break;
 	}
 
-	return fblock_size;
+	default:
+#if !defined(UNIV_INNOCHECKSUM)
+		ib::error()
+			<< "Encryption algorithm support missing: "
+			<< Encryption::to_string(m_type);
+#else
+		fprintf(stderr, "Encryption algorithm support missing: %s\n",
+			Encryption::to_string(m_type));
+#endif /* !UNIV_INNOCHECKSUM */
+
+		if (block != NULL) {
+			os_free_block(block);
+		}
+
+		return(DB_UNSUPPORTED);
+	}
+
+	/* Restore the original page type. If it's a compressed and
+	encrypted page, just reset it as compressed page type, since
+	we will do uncompress later. */
+
+	if (page_type == FIL_PAGE_ENCRYPTED) {
+		mach_write_to_2(src + FIL_PAGE_TYPE, original_type);
+		mach_write_to_2(src + FIL_PAGE_ORIGINAL_TYPE_V1, 0);
+	} else if (page_type == FIL_PAGE_ENCRYPTED_RTREE) {
+		mach_write_to_2(src + FIL_PAGE_TYPE, FIL_PAGE_RTREE);
+	} else {
+		ut_ad(page_type == FIL_PAGE_COMPRESSED_AND_ENCRYPTED);
+		mach_write_to_2(src + FIL_PAGE_TYPE, FIL_PAGE_COMPRESSED);
+	}
+
+	if (block != NULL) {
+		os_free_block(block);
+	}
+
+#ifdef UNIV_ENCRYPT_DEBUG
+	fprintf(stderr, "Decrypted page:%lu.%lu\n", space_id, page_no);
+#endif
+
+	DBUG_EXECUTE_IF("ib_crash_during_decrypt_page", DBUG_SUICIDE(););
+
+	return(DB_SUCCESS);
+}
+#endif /* MYSQL_ENCRYPTION */
+
+/** Normalizes a directory path for the current OS:
+On Windows, we convert '/' to '\', else we convert '\' to '/'.
+@param[in,out] str A null-terminated directory and file path */
+void
+os_normalize_path(
+	char*	str)
+{
+	if (str != NULL) {
+		for (; *str; str++) {
+			if (*str == OS_PATH_SEPARATOR_ALT) {
+				*str = OS_PATH_SEPARATOR;
+			}
+		}
+	}
 }
diff --git a/storage/innobase/os/os0proc.cc b/storage/innobase/os/os0proc.cc
index ff6d65e4ae6..ac445719457 100644
--- a/storage/innobase/os/os0proc.cc
+++ b/storage/innobase/os/os0proc.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -24,11 +24,14 @@ process control primitives
 Created 9/30/1995 Heikki Tuuri
 *******************************************************/
 
+#include "ha_prototypes.h"
+
 #include "os0proc.h"
 #ifdef UNIV_NONINL
 #include "os0proc.ic"
 #endif
 
+#include "srv0srv.h"
 #include "ut0mem.h"
 #include "ut0byte.h"
 
@@ -40,40 +43,39 @@ MAP_ANON but MAP_ANON is marked as deprecated */
 #define OS_MAP_ANON	MAP_ANON
 #endif
 
-UNIV_INTERN ibool os_use_large_pages;
-/* Large page size. This may be a boot-time option on some platforms */
-UNIV_INTERN ulint os_large_page_size;
+/** The total amount of memory currently allocated from the operating
+system with os_mem_alloc_large(). */
+ulint	os_total_large_mem_allocated = 0;
 
-/****************************************************************//**
-Converts the current process id to a number. It is not guaranteed that the
-number is unique. In Linux returns the 'process number' of the current
-thread. That number is the same as one sees in 'top', for example. In Linux
-the thread id is not the same as one sees in 'top'.
-@return	process id as a number */
-UNIV_INTERN
+/** Whether to use large pages in the buffer pool */
+my_bool	os_use_large_pages;
+
+/** Large page size. This may be a boot-time option on some platforms */
+uint	os_large_page_size;
+
+/** Converts the current process id to a number.
+@return process id as a number */
 ulint
 os_proc_get_number(void)
 /*====================*/
 {
-#ifdef __WIN__
-	return((ulint)GetCurrentProcessId());
+#ifdef _WIN32
+	return(static_cast<ulint>(GetCurrentProcessId()));
 #else
-	return((ulint) getpid());
+	return(static_cast<ulint>(getpid()));
 #endif
 }
 
-/****************************************************************//**
-Allocates large pages memory.
-@return	allocated memory */
-UNIV_INTERN
+/** Allocates large pages memory.
+@param[in,out]	n	Number of bytes to allocate
+@return allocated memory */
 void*
 os_mem_alloc_large(
-/*===============*/
-	ulint*	n)			/*!< in/out: number of bytes */
+	ulint*	n)
 {
 	void*	ptr;
 	ulint	size;
-#if defined HAVE_LARGE_PAGES && defined UNIV_LINUX
+#if defined HAVE_LINUX_LARGE_PAGES && defined UNIV_LINUX
 	int shmid;
 	struct shmid_ds buf;
 
@@ -88,15 +90,14 @@ os_mem_alloc_large(
 
 	shmid = shmget(IPC_PRIVATE, (size_t) size, SHM_HUGETLB | SHM_R | SHM_W);
 	if (shmid < 0) {
-		fprintf(stderr, "InnoDB: HugeTLB: Warning: Failed to allocate"
-			" %lu bytes. errno %d\n", size, errno);
+		ib::warn() << "Failed to allocate " << size
+			<< " bytes. errno " << errno;
 		ptr = NULL;
 	} else {
 		ptr = shmat(shmid, NULL, 0);
 		if (ptr == (void*)-1) {
-			fprintf(stderr, "InnoDB: HugeTLB: Warning: Failed to"
-				" attach shared memory segment, errno %d\n",
-				errno);
+			ib::warn() << "Failed to attach shared memory segment,"
+				" errno " << errno;
 			ptr = NULL;
 		}
 
@@ -108,19 +109,18 @@ os_mem_alloc_large(
 
 	if (ptr) {
 		*n = size;
-		os_fast_mutex_lock(&ut_list_mutex);
-		ut_total_allocated_memory += size;
-		os_fast_mutex_unlock(&ut_list_mutex);
+		os_atomic_increment_ulint(
+			&os_total_large_mem_allocated, size);
+
 		UNIV_MEM_ALLOC(ptr, size);
 		return(ptr);
 	}
 
-	fprintf(stderr, "InnoDB HugeTLB: Warning: Using conventional"
-		" memory pool\n");
+	ib::warn() << "Using conventional memory pool";
 skip:
-#endif /* HAVE_LARGE_PAGES && UNIV_LINUX */
+#endif /* HAVE_LINUX_LARGE_PAGES && UNIV_LINUX */
 
-#ifdef __WIN__
+#ifdef _WIN32
 	SYSTEM_INFO	system_info;
 	GetSystemInfo(&system_info);
 
@@ -133,81 +133,60 @@ skip:
 	ptr = VirtualAlloc(NULL, size, MEM_COMMIT | MEM_RESERVE,
 			   PAGE_READWRITE);
 	if (!ptr) {
-		fprintf(stderr, "InnoDB: VirtualAlloc(%lu bytes) failed;"
-			" Windows error %lu\n",
-			(ulong) size, (ulong) GetLastError());
+		ib::info() << "VirtualAlloc(" << size << " bytes) failed;"
+			" Windows error " << GetLastError();
 	} else {
-		os_fast_mutex_lock(&ut_list_mutex);
-		ut_total_allocated_memory += size;
-		os_fast_mutex_unlock(&ut_list_mutex);
+		os_atomic_increment_ulint(
+			&os_total_large_mem_allocated, size);
 		UNIV_MEM_ALLOC(ptr, size);
 	}
-#elif !defined OS_MAP_ANON
-	size = *n;
-	ptr = ut_malloc_low(size, TRUE, FALSE);
 #else
-# ifdef HAVE_GETPAGESIZE
 	size = getpagesize();
-# else
-	size = UNIV_PAGE_SIZE;
-# endif
 	/* Align block size to system page size */
 	ut_ad(ut_is_2pow(size));
 	size = *n = ut_2pow_round(*n + (size - 1), size);
 	ptr = mmap(NULL, size, PROT_READ | PROT_WRITE,
 		   MAP_PRIVATE | OS_MAP_ANON, -1, 0);
 	if (UNIV_UNLIKELY(ptr == (void*) -1)) {
-		fprintf(stderr, "InnoDB: mmap(%lu bytes) failed;"
-			" errno %lu\n",
-			(ulong) size, (ulong) errno);
+		ib::error() << "mmap(" << size << " bytes) failed;"
+			" errno " << errno;
 		ptr = NULL;
 	} else {
-		os_fast_mutex_lock(&ut_list_mutex);
-		ut_total_allocated_memory += size;
-		os_fast_mutex_unlock(&ut_list_mutex);
+		os_atomic_increment_ulint(
+			&os_total_large_mem_allocated, size);
 		UNIV_MEM_ALLOC(ptr, size);
 	}
 #endif
 	return(ptr);
 }
 
-/****************************************************************//**
-Frees large pages memory. */
-UNIV_INTERN
+/** Frees large pages memory.
+@param[in]	ptr	pointer returned by os_mem_alloc_large()
+@param[in]	size	size returned by os_mem_alloc_large() */
 void
 os_mem_free_large(
-/*==============*/
-	void	*ptr,			/*!< in: pointer returned by
-					os_mem_alloc_large() */
-	ulint	size)			/*!< in: size returned by
-					os_mem_alloc_large() */
+	void	*ptr,
+	ulint	size)
 {
-	os_fast_mutex_lock(&ut_list_mutex);
-	ut_a(ut_total_allocated_memory >= size);
-	os_fast_mutex_unlock(&ut_list_mutex);
+	ut_a(os_total_large_mem_allocated >= size);
 
-#if defined HAVE_LARGE_PAGES && defined UNIV_LINUX
+#if defined HAVE_LINUX_LARGE_PAGES && defined UNIV_LINUX
 	if (os_use_large_pages && os_large_page_size && !shmdt(ptr)) {
-		os_fast_mutex_lock(&ut_list_mutex);
-		ut_a(ut_total_allocated_memory >= size);
-		ut_total_allocated_memory -= size;
-		os_fast_mutex_unlock(&ut_list_mutex);
+		os_atomic_decrement_ulint(
+			&os_total_large_mem_allocated, size);
 		UNIV_MEM_FREE(ptr, size);
 		return;
 	}
-#endif /* HAVE_LARGE_PAGES && UNIV_LINUX */
-#ifdef __WIN__
+#endif /* HAVE_LINUX_LARGE_PAGES && UNIV_LINUX */
+#ifdef _WIN32
 	/* When RELEASE memory, the size parameter must be 0.
 	Do not use MEM_RELEASE with MEM_DECOMMIT. */
 	if (!VirtualFree(ptr, 0, MEM_RELEASE)) {
-		fprintf(stderr, "InnoDB: VirtualFree(%p, %lu) failed;"
-			" Windows error %lu\n",
-			ptr, (ulong) size, (ulong) GetLastError());
+		ib::error() << "VirtualFree(" << ptr << ", " << size
+			<< ") failed; Windows error " << GetLastError();
 	} else {
-		os_fast_mutex_lock(&ut_list_mutex);
-		ut_a(ut_total_allocated_memory >= size);
-		ut_total_allocated_memory -= size;
-		os_fast_mutex_unlock(&ut_list_mutex);
+		os_atomic_decrement_ulint(
+			&os_total_large_mem_allocated, size);
 		UNIV_MEM_FREE(ptr, size);
 	}
 #elif !defined OS_MAP_ANON
@@ -218,14 +197,11 @@ os_mem_free_large(
 # else
 	if (munmap(ptr, size)) {
 # endif /* UNIV_SOLARIS */
-		fprintf(stderr, "InnoDB: munmap(%p, %lu) failed;"
-			" errno %lu\n",
-			ptr, (ulong) size, (ulong) errno);
+		ib::error() << "munmap(" << ptr << ", " << size << ") failed;"
+			" errno " << errno;
 	} else {
-		os_fast_mutex_lock(&ut_list_mutex);
-		ut_a(ut_total_allocated_memory >= size);
-		ut_total_allocated_memory -= size;
-		os_fast_mutex_unlock(&ut_list_mutex);
+		os_atomic_decrement_ulint(
+			&os_total_large_mem_allocated, size);
 		UNIV_MEM_FREE(ptr, size);
 	}
 #endif
diff --git a/storage/innobase/os/os0sync.cc b/storage/innobase/os/os0sync.cc
deleted file mode 100644
index 03c53848832..00000000000
--- a/storage/innobase/os/os0sync.cc
+++ /dev/null
@@ -1,935 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file os/os0sync.cc
-The interface to the operating system
-synchronization primitives.
-
-Created 9/6/1995 Heikki Tuuri
-*******************************************************/
-
-#include "os0sync.h"
-#ifdef UNIV_NONINL
-#include "os0sync.ic"
-#endif
-
-#ifdef __WIN__
-#include <windows.h>
-#endif
-
-#include "ut0mem.h"
-#include "srv0start.h"
-#include "srv0srv.h"
-
-/* Type definition for an operating system mutex struct */
-struct os_mutex_t{
-	os_event_t	event;	/*!< Used by sync0arr.cc for queing threads */
-	void*		handle;	/*!< OS handle to mutex */
-	ulint		count;	/*!< we use this counter to check
-				that the same thread does not
-				recursively lock the mutex: we
-				do not assume that the OS mutex
-				supports recursive locking, though
-				NT seems to do that */
-	UT_LIST_NODE_T(os_mutex_t) os_mutex_list;
-				/* list of all 'slow' OS mutexes created */
-};
-
-/** Mutex protecting counts and the lists of OS mutexes and events */
-UNIV_INTERN os_ib_mutex_t	os_sync_mutex;
-/** TRUE if os_sync_mutex has been initialized */
-static ibool		os_sync_mutex_inited	= FALSE;
-/** TRUE when os_sync_free() is being executed */
-static ibool		os_sync_free_called	= FALSE;
-
-/** This is incremented by 1 in os_thread_create and decremented by 1 in
-os_thread_exit */
-UNIV_INTERN ulint	os_thread_count		= 0;
-
-/** The list of all events created */
-static UT_LIST_BASE_NODE_T(os_event)		os_event_list;
-
-/** The list of all OS 'slow' mutexes */
-static UT_LIST_BASE_NODE_T(os_mutex_t)		os_mutex_list;
-
-UNIV_INTERN ulint	os_event_count		= 0;
-UNIV_INTERN ulint	os_mutex_count		= 0;
-UNIV_INTERN ulint	os_fast_mutex_count	= 0;
-
-/* The number of microsecnds in a second. */
-static const ulint MICROSECS_IN_A_SECOND = 1000000;
-
-#ifdef UNIV_PFS_MUTEX
-UNIV_INTERN mysql_pfs_key_t	event_os_mutex_key;
-UNIV_INTERN mysql_pfs_key_t	os_mutex_key;
-#endif
-
-/* Because a mutex is embedded inside an event and there is an
-event embedded inside a mutex, on free, this generates a recursive call.
-This version of the free event function doesn't acquire the global lock */
-static void os_event_free_internal(os_event_t	event);
-
-/* On Windows (Vista and later), load function pointers for condition
-variable handling. Those functions are not available in prior versions,
-so we have to use them via runtime loading, as long as we support XP. */
-static void os_cond_module_init(void);
-
-#ifdef __WIN__
-/* Prototypes and function pointers for condition variable functions */
-typedef VOID (WINAPI* InitializeConditionVariableProc)
-	     (PCONDITION_VARIABLE ConditionVariable);
-static InitializeConditionVariableProc initialize_condition_variable;
-
-typedef BOOL (WINAPI* SleepConditionVariableCSProc)
-	     (PCONDITION_VARIABLE ConditionVariable,
-	      PCRITICAL_SECTION CriticalSection,
-	      DWORD dwMilliseconds);
-static SleepConditionVariableCSProc sleep_condition_variable;
-
-typedef VOID (WINAPI* WakeAllConditionVariableProc)
-	     (PCONDITION_VARIABLE ConditionVariable);
-static WakeAllConditionVariableProc wake_all_condition_variable;
-
-typedef VOID (WINAPI* WakeConditionVariableProc)
-	     (PCONDITION_VARIABLE ConditionVariable);
-static WakeConditionVariableProc wake_condition_variable;
-#endif
-
-/*********************************************************//**
-Initialitze condition variable */
-UNIV_INLINE
-void
-os_cond_init(
-/*=========*/
-	os_cond_t*	cond)	/*!< in: condition variable. */
-{
-	ut_a(cond);
-
-#ifdef __WIN__
-	ut_a(initialize_condition_variable != NULL);
-	initialize_condition_variable(cond);
-#else
-	ut_a(pthread_cond_init(cond, NULL) == 0);
-#endif
-}
-
-/*********************************************************//**
-Do a timed wait on condition variable.
-@return TRUE if timed out, FALSE otherwise */
-UNIV_INLINE
-ibool
-os_cond_wait_timed(
-/*===============*/
-	os_cond_t*		cond,		/*!< in: condition variable. */
-	os_fast_mutex_t*	fast_mutex,	/*!< in: fast mutex */
-#ifndef __WIN__
-	const struct timespec*	abstime		/*!< in: timeout */
-#else
-	DWORD			time_in_ms	/*!< in: timeout in
-						milliseconds*/
-#endif /* !__WIN__ */
-)
-{
-	fast_mutex_t*	mutex = &fast_mutex->mutex;
-#ifdef __WIN__
-	BOOL	ret;
-	DWORD	err;
-
-	ut_a(sleep_condition_variable != NULL);
-
-	ret = sleep_condition_variable(cond, mutex, time_in_ms);
-
-	if (!ret) {
-		err = GetLastError();
-		/* From http://msdn.microsoft.com/en-us/library/ms686301%28VS.85%29.aspx,
-		"Condition variables are subject to spurious wakeups
-		(those not associated with an explicit wake) and stolen wakeups
-		(another thread manages to run before the woken thread)."
-		Check for both types of timeouts.
-		Conditions are checked by the caller.*/
-		if ((err == WAIT_TIMEOUT) || (err == ERROR_TIMEOUT)) {
-			return(TRUE);
-		}
-	}
-
-	ut_a(ret);
-
-	return(FALSE);
-#else
-	int	ret;
-
-	ret = pthread_cond_timedwait(cond, mutex, abstime);
-
-	switch (ret) {
-	case 0:
-	case ETIMEDOUT:
-	/* We play it safe by checking for EINTR even though
-	according to the POSIX documentation it can't return EINTR. */
-	case EINTR:
-		break;
-
-	default:
-		fprintf(stderr, "  InnoDB: pthread_cond_timedwait() returned: "
-				"%d: abstime={%lu,%lu}\n",
-				ret, (ulong) abstime->tv_sec, (ulong) abstime->tv_nsec);
-		ut_error;
-	}
-
-	return(ret == ETIMEDOUT);
-#endif
-}
-/*********************************************************//**
-Wait on condition variable */
-UNIV_INLINE
-void
-os_cond_wait(
-/*=========*/
-	os_cond_t*		cond,	/*!< in: condition variable. */
-	os_fast_mutex_t*	fast_mutex)/*!< in: fast mutex */
-{
-	fast_mutex_t*	mutex = &fast_mutex->mutex;
-	ut_a(cond);
-	ut_a(mutex);
-
-#ifdef __WIN__
-	ut_a(sleep_condition_variable != NULL);
-	ut_a(sleep_condition_variable(cond, mutex, INFINITE));
-#else
-	ut_a(pthread_cond_wait(cond, mutex) == 0);
-#endif
-}
-
-/*********************************************************//**
-Wakes all threads  waiting for condition variable */
-UNIV_INLINE
-void
-os_cond_broadcast(
-/*==============*/
-	os_cond_t*	cond)	/*!< in: condition variable. */
-{
-	ut_a(cond);
-
-#ifdef __WIN__
-	ut_a(wake_all_condition_variable != NULL);
-	wake_all_condition_variable(cond);
-#else
-	ut_a(pthread_cond_broadcast(cond) == 0);
-#endif
-}
-
-/*********************************************************//**
-Destroys condition variable */
-UNIV_INLINE
-void
-os_cond_destroy(
-/*============*/
-	os_cond_t*	cond)	/*!< in: condition variable. */
-{
-#ifdef __WIN__
-	/* Do nothing */
-#else
-	ut_a(pthread_cond_destroy(cond) == 0);
-#endif
-}
-
-/*********************************************************//**
-On Windows (Vista and later), load function pointers for condition variable
-handling. Those functions are not available in prior versions, so we have to
-use them via runtime loading, as long as we support XP. */
-static
-void
-os_cond_module_init(void)
-/*=====================*/
-{
-#ifdef __WIN__
-	HMODULE		h_dll;
-
-	if (!srv_use_native_conditions)
-		return;
-
-	h_dll = GetModuleHandle("kernel32");
-
-	initialize_condition_variable = (InitializeConditionVariableProc)
-			 GetProcAddress(h_dll, "InitializeConditionVariable");
-	sleep_condition_variable = (SleepConditionVariableCSProc)
-			  GetProcAddress(h_dll, "SleepConditionVariableCS");
-	wake_all_condition_variable = (WakeAllConditionVariableProc)
-			     GetProcAddress(h_dll, "WakeAllConditionVariable");
-	wake_condition_variable = (WakeConditionVariableProc)
-			 GetProcAddress(h_dll, "WakeConditionVariable");
-
-	/* When using native condition variables, check function pointers */
-	ut_a(initialize_condition_variable);
-	ut_a(sleep_condition_variable);
-	ut_a(wake_all_condition_variable);
-	ut_a(wake_condition_variable);
-#endif
-}
-
-/*********************************************************//**
-Initializes global event and OS 'slow' mutex lists. */
-UNIV_INTERN
-void
-os_sync_init(void)
-/*==============*/
-{
-	UT_LIST_INIT(os_event_list);
-	UT_LIST_INIT(os_mutex_list);
-
-	os_sync_mutex = NULL;
-	os_sync_mutex_inited = FALSE;
-
-	/* Now for Windows only */
-	os_cond_module_init();
-
-	os_sync_mutex = os_mutex_create();
-
-	os_sync_mutex_inited = TRUE;
-}
-
-/*********************************************************//**
-Frees created events and OS 'slow' mutexes. */
-UNIV_INTERN
-void
-os_sync_free(void)
-/*==============*/
-{
-	os_event_t	event;
-	os_ib_mutex_t	mutex;
-
-	os_sync_free_called = TRUE;
-	event = UT_LIST_GET_FIRST(os_event_list);
-
-	while (event) {
-
-		os_event_free(event);
-
-		event = UT_LIST_GET_FIRST(os_event_list);
-	}
-
-	mutex = UT_LIST_GET_FIRST(os_mutex_list);
-
-	while (mutex) {
-		if (mutex == os_sync_mutex) {
-			/* Set the flag to FALSE so that we do not try to
-			reserve os_sync_mutex any more in remaining freeing
-			operations in shutdown */
-			os_sync_mutex_inited = FALSE;
-		}
-
-		os_mutex_free(mutex);
-
-		mutex = UT_LIST_GET_FIRST(os_mutex_list);
-	}
-	os_sync_free_called = FALSE;
-}
-
-/*********************************************************//**
-Creates an event semaphore, i.e., a semaphore which may just have two
-states: signaled and nonsignaled. The created event is manual reset: it
-must be reset explicitly by calling sync_os_reset_event.
-@return	the event handle */
-UNIV_INTERN
-os_event_t
-os_event_create(void)
-/*==================*/
-{
-	os_event_t	event;
-
-#ifdef __WIN__
-	if(!srv_use_native_conditions) {
-
-		event = static_cast<os_event_t>(ut_malloc(sizeof(*event)));
-
-		event->handle = CreateEvent(NULL, TRUE, FALSE, NULL);
-		if (!event->handle) {
-			fprintf(stderr,
-				"InnoDB: Could not create a Windows event"
-				" semaphore; Windows error %lu\n",
-				(ulong) GetLastError());
-		}
-	} else /* Windows with condition variables */
-#endif
-	{
-		event = static_cast<os_event_t>(ut_malloc(sizeof *event));
-
-#ifndef PFS_SKIP_EVENT_MUTEX
-		os_fast_mutex_init(event_os_mutex_key, &event->os_mutex);
-#else
-		os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &event->os_mutex);
-#endif
-
-		os_cond_init(&(event->cond_var));
-
-		event->is_set = FALSE;
-
-		/* We return this value in os_event_reset(), which can then be
-		be used to pass to the os_event_wait_low(). The value of zero
-		is reserved in os_event_wait_low() for the case when the
-		caller does not want to pass any signal_count value. To
-		distinguish between the two cases we initialize signal_count
-		to 1 here. */
-		event->signal_count = 1;
-	}
-
-	/* The os_sync_mutex can be NULL because during startup an event
-	can be created [ because it's embedded in the mutex/rwlock ] before
-	this module has been initialized */
-	if (os_sync_mutex != NULL) {
-		os_mutex_enter(os_sync_mutex);
-	}
-
-	/* Put to the list of events */
-	UT_LIST_ADD_FIRST(os_event_list, os_event_list, event);
-
-	os_event_count++;
-
-	if (os_sync_mutex != NULL) {
-		os_mutex_exit(os_sync_mutex);
-	}
-
-	return(event);
-}
-
-/**********************************************************//**
-Sets an event semaphore to the signaled state: lets waiting threads
-proceed. */
-UNIV_INTERN
-void
-os_event_set(
-/*=========*/
-	os_event_t	event)	/*!< in: event to set */
-{
-	ut_a(event);
-
-#ifdef __WIN__
-	if (!srv_use_native_conditions) {
-		ut_a(SetEvent(event->handle));
-		return;
-	}
-#endif
-
-	os_fast_mutex_lock(&(event->os_mutex));
-
-	if (event->is_set) {
-		/* Do nothing */
-	} else {
-		event->is_set = TRUE;
-		event->signal_count += 1;
-		os_cond_broadcast(&(event->cond_var));
-	}
-
-	os_fast_mutex_unlock(&(event->os_mutex));
-}
-
-/**********************************************************//**
-Resets an event semaphore to the nonsignaled state. Waiting threads will
-stop to wait for the event.
-The return value should be passed to os_even_wait_low() if it is desired
-that this thread should not wait in case of an intervening call to
-os_event_set() between this os_event_reset() and the
-os_event_wait_low() call. See comments for os_event_wait_low().
-@return	current signal_count. */
-UNIV_INTERN
-ib_int64_t
-os_event_reset(
-/*===========*/
-	os_event_t	event)	/*!< in: event to reset */
-{
-	ib_int64_t	ret = 0;
-
-	ut_a(event);
-
-#ifdef __WIN__
-	if(!srv_use_native_conditions) {
-		ut_a(ResetEvent(event->handle));
-		return(0);
-	}
-#endif
-
-	os_fast_mutex_lock(&(event->os_mutex));
-
-	if (!event->is_set) {
-		/* Do nothing */
-	} else {
-		event->is_set = FALSE;
-	}
-	ret = event->signal_count;
-
-	os_fast_mutex_unlock(&(event->os_mutex));
-	return(ret);
-}
-
-/**********************************************************//**
-Frees an event object, without acquiring the global lock. */
-static
-void
-os_event_free_internal(
-/*===================*/
-	os_event_t	event)	/*!< in: event to free */
-{
-#ifdef __WIN__
-	if(!srv_use_native_conditions) {
-		ut_a(event);
-		ut_a(CloseHandle(event->handle));
-	} else
-#endif
-	{
-		ut_a(event);
-
-		/* This is to avoid freeing the mutex twice */
-		os_fast_mutex_free(&(event->os_mutex));
-
-		os_cond_destroy(&(event->cond_var));
-	}
-
-	/* Remove from the list of events */
-	UT_LIST_REMOVE(os_event_list, os_event_list, event);
-
-	os_event_count--;
-
-	ut_free(event);
-}
-
-/**********************************************************//**
-Frees an event object. */
-UNIV_INTERN
-void
-os_event_free(
-/*==========*/
-	os_event_t	event)	/*!< in: event to free */
-
-{
-	ut_a(event);
-#ifdef __WIN__
-	if(!srv_use_native_conditions){
-		ut_a(CloseHandle(event->handle));
-	} else /*Windows with condition variables */
-#endif
-	{
-		os_fast_mutex_free(&(event->os_mutex));
-
-		os_cond_destroy(&(event->cond_var));
-	}
-
-	/* Remove from the list of events */
-	os_mutex_enter(os_sync_mutex);
-
-	UT_LIST_REMOVE(os_event_list, os_event_list, event);
-
-	os_event_count--;
-
-	os_mutex_exit(os_sync_mutex);
-
-	ut_free(event);
-}
-
-/**********************************************************//**
-Waits for an event object until it is in the signaled state.
-
-Typically, if the event has been signalled after the os_event_reset()
-we'll return immediately because event->is_set == TRUE.
-There are, however, situations (e.g.: sync_array code) where we may
-lose this information. For example:
-
-thread A calls os_event_reset()
-thread B calls os_event_set()   [event->is_set == TRUE]
-thread C calls os_event_reset() [event->is_set == FALSE]
-thread A calls os_event_wait()  [infinite wait!]
-thread C calls os_event_wait()  [infinite wait!]
-
-Where such a scenario is possible, to avoid infinite wait, the
-value returned by os_event_reset() should be passed in as
-reset_sig_count. */
-UNIV_INTERN
-void
-os_event_wait_low(
-/*==============*/
-	os_event_t	event,		/*!< in: event to wait */
-	ib_int64_t	reset_sig_count)/*!< in: zero or the value
-					returned by previous call of
-					os_event_reset(). */
-{
-#ifdef __WIN__
-	if(!srv_use_native_conditions) {
-		DWORD	err;
-
-		ut_a(event);
-
-		UT_NOT_USED(reset_sig_count);
-
-		/* Specify an infinite wait */
-		err = WaitForSingleObject(event->handle, INFINITE);
-
-		ut_a(err == WAIT_OBJECT_0);
-		return;
-	}
-#endif
-
-	os_fast_mutex_lock(&event->os_mutex);
-
-	if (!reset_sig_count) {
-		reset_sig_count = event->signal_count;
-	}
-
-	while (!event->is_set && event->signal_count == reset_sig_count) {
-		os_cond_wait(&(event->cond_var), &(event->os_mutex));
-
-		/* Solaris manual said that spurious wakeups may occur: we
-		have to check if the event really has been signaled after
-		we came here to wait */
-	}
-
-	os_fast_mutex_unlock(&event->os_mutex);
-}
-
-/**********************************************************//**
-Waits for an event object until it is in the signaled state or
-a timeout is exceeded.
-@return	0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */
-UNIV_INTERN
-ulint
-os_event_wait_time_low(
-/*===================*/
-	os_event_t	event,			/*!< in: event to wait */
-	ulint		time_in_usec,		/*!< in: timeout in
-						microseconds, or
-						OS_SYNC_INFINITE_TIME */
-	ib_int64_t	reset_sig_count)	/*!< in: zero or the value
-						returned by previous call of
-						os_event_reset(). */
-{
-	ibool		timed_out = FALSE;
-
-#ifdef __WIN__
-	DWORD		time_in_ms;
-
-	if (!srv_use_native_conditions) {
-		DWORD	err;
-
-		ut_a(event);
-
-		if (time_in_usec != OS_SYNC_INFINITE_TIME) {
-			time_in_ms = static_cast<DWORD>(time_in_usec / 1000);
-			err = WaitForSingleObject(event->handle, time_in_ms);
-		} else {
-			err = WaitForSingleObject(event->handle, INFINITE);
-		}
-
-		if (err == WAIT_OBJECT_0) {
-			return(0);
-		} else if ((err == WAIT_TIMEOUT) || (err == ERROR_TIMEOUT)) {
-			return(OS_SYNC_TIME_EXCEEDED);
-		}
-
-		ut_error;
-		/* Dummy value to eliminate compiler warning. */
-		return(42);
-	} else {
-		ut_a(sleep_condition_variable != NULL);
-
-		if (time_in_usec != OS_SYNC_INFINITE_TIME) {
-			time_in_ms = static_cast<DWORD>(time_in_usec / 1000);
-		} else {
-			time_in_ms = INFINITE;
-		}
-	}
-#else
-	struct timespec	abstime;
-
-	if (time_in_usec != OS_SYNC_INFINITE_TIME) {
-		struct timeval	tv;
-		int		ret;
-		ulint		sec;
-		ulint		usec;
-
-		ret = ut_usectime(&sec, &usec);
-		ut_a(ret == 0);
-
-		tv.tv_sec = sec;
-		tv.tv_usec = usec;
-
-		tv.tv_usec += time_in_usec;
-
-		if ((ulint) tv.tv_usec >= MICROSECS_IN_A_SECOND) {
-			tv.tv_sec += tv.tv_usec / MICROSECS_IN_A_SECOND;
-			tv.tv_usec %= MICROSECS_IN_A_SECOND;
-		}
-
-		abstime.tv_sec  = tv.tv_sec;
-		abstime.tv_nsec = tv.tv_usec * 1000;
-	} else {
-		abstime.tv_nsec = 999999999;
-		abstime.tv_sec = (time_t) ULINT_MAX;
-	}
-
-	ut_a(abstime.tv_nsec <= 999999999);
-
-#endif /* __WIN__ */
-
-	os_fast_mutex_lock(&event->os_mutex);
-
-	if (!reset_sig_count) {
-		reset_sig_count = event->signal_count;
-	}
-
-	do {
-		if (event->is_set || event->signal_count != reset_sig_count) {
-
-			break;
-		}
-
-		timed_out = os_cond_wait_timed(
-			&event->cond_var, &event->os_mutex,
-#ifndef __WIN__
-			&abstime
-#else
-			time_in_ms
-#endif /* !__WIN__ */
-		);
-
-	} while (!timed_out);
-
-	os_fast_mutex_unlock(&event->os_mutex);
-
-	return(timed_out ? OS_SYNC_TIME_EXCEEDED : 0);
-}
-
-/*********************************************************//**
-Creates an operating system mutex semaphore. Because these are slow, the
-mutex semaphore of InnoDB itself (ib_mutex_t) should be used where possible.
-@return	the mutex handle */
-UNIV_INTERN
-os_ib_mutex_t
-os_mutex_create(void)
-/*=================*/
-{
-	os_fast_mutex_t*	mutex;
-	os_ib_mutex_t		mutex_str;
-
-	mutex = static_cast<os_fast_mutex_t*>(
-		ut_malloc(sizeof(os_fast_mutex_t)));
-
-	os_fast_mutex_init(os_mutex_key, mutex);
-
-	mutex_str = static_cast<os_ib_mutex_t>(ut_malloc(sizeof *mutex_str));
-
-	mutex_str->handle = mutex;
-	mutex_str->count = 0;
-	mutex_str->event = os_event_create();
-
-	if (UNIV_LIKELY(os_sync_mutex_inited)) {
-		/* When creating os_sync_mutex itself we cannot reserve it */
-		os_mutex_enter(os_sync_mutex);
-	}
-
-	UT_LIST_ADD_FIRST(os_mutex_list, os_mutex_list, mutex_str);
-
-	os_mutex_count++;
-
-	if (UNIV_LIKELY(os_sync_mutex_inited)) {
-		os_mutex_exit(os_sync_mutex);
-	}
-
-	return(mutex_str);
-}
-
-/**********************************************************//**
-Acquires ownership of a mutex semaphore. */
-UNIV_INTERN
-void
-os_mutex_enter(
-/*===========*/
-	os_ib_mutex_t	mutex)	/*!< in: mutex to acquire */
-{
-	os_fast_mutex_lock(static_cast<os_fast_mutex_t*>(mutex->handle));
-
-	(mutex->count)++;
-
-	ut_a(mutex->count == 1);
-}
-
-/**********************************************************//**
-Releases ownership of a mutex. */
-UNIV_INTERN
-void
-os_mutex_exit(
-/*==========*/
-	os_ib_mutex_t	mutex)	/*!< in: mutex to release */
-{
-	ut_a(mutex);
-
-	ut_a(mutex->count == 1);
-
-	(mutex->count)--;
-	os_fast_mutex_unlock(static_cast<os_fast_mutex_t*>(mutex->handle));
-}
-
-/**********************************************************//**
-Frees a mutex object. */
-UNIV_INTERN
-void
-os_mutex_free(
-/*==========*/
-	os_ib_mutex_t	mutex)	/*!< in: mutex to free */
-{
-	ut_a(mutex);
-
-	if (UNIV_LIKELY(!os_sync_free_called)) {
-		os_event_free_internal(mutex->event);
-	}
-
-	if (UNIV_LIKELY(os_sync_mutex_inited)) {
-		os_mutex_enter(os_sync_mutex);
-	}
-
-	UT_LIST_REMOVE(os_mutex_list, os_mutex_list, mutex);
-
-	os_mutex_count--;
-
-	if (UNIV_LIKELY(os_sync_mutex_inited)) {
-		os_mutex_exit(os_sync_mutex);
-	}
-
-	os_fast_mutex_free(static_cast<os_fast_mutex_t*>(mutex->handle));
-	ut_free(mutex->handle);
-	ut_free(mutex);
-}
-
-/*********************************************************//**
-Initializes an operating system fast mutex semaphore. */
-UNIV_INTERN
-void
-os_fast_mutex_init_func(
-/*====================*/
-	fast_mutex_t*		fast_mutex)	/*!< in: fast mutex */
-{
-#ifdef __WIN__
-	ut_a(fast_mutex);
-
-	InitializeCriticalSection((LPCRITICAL_SECTION) fast_mutex);
-#else
-	ut_a(0 == pthread_mutex_init(fast_mutex, MY_MUTEX_INIT_FAST));
-#endif
-	if (UNIV_LIKELY(os_sync_mutex_inited)) {
-		/* When creating os_sync_mutex itself (in Unix) we cannot
-		reserve it */
-
-		os_mutex_enter(os_sync_mutex);
-	}
-
-	os_fast_mutex_count++;
-
-	if (UNIV_LIKELY(os_sync_mutex_inited)) {
-		os_mutex_exit(os_sync_mutex);
-	}
-}
-
-/**********************************************************//**
-Acquires ownership of a fast mutex. */
-UNIV_INTERN
-void
-os_fast_mutex_lock_func(
-/*====================*/
-	fast_mutex_t*		fast_mutex)	/*!< in: mutex to acquire */
-{
-#ifdef __WIN__
-	EnterCriticalSection((LPCRITICAL_SECTION) fast_mutex);
-#else
-	pthread_mutex_lock(fast_mutex);
-#endif
-}
-
-/**********************************************************//**
-Releases ownership of a fast mutex. */
-UNIV_INTERN
-void
-os_fast_mutex_unlock_func(
-/*======================*/
-	fast_mutex_t*		fast_mutex)	/*!< in: mutex to release */
-{
-#ifdef __WIN__
-	LeaveCriticalSection(fast_mutex);
-#else
-	pthread_mutex_unlock(fast_mutex);
-#endif
-}
-
-/**********************************************************//**
-Releases ownership of a fast mutex. Implies a full memory barrier even on
-platforms such as PowerPC where this is not normally required. */
-UNIV_INTERN
-void
-os_fast_mutex_unlock_full_barrier(
-/*=================*/
-	os_fast_mutex_t*	fast_mutex)	/*!< in: mutex to release */
-{
-#ifdef __WIN__
-	LeaveCriticalSection(&fast_mutex->mutex);
-#else
-	pthread_mutex_unlock(&fast_mutex->mutex);
-#ifdef __powerpc__
-	os_mb;
-#endif
-#endif
-}
-
-/**********************************************************//**
-Frees a mutex object. */
-UNIV_INTERN
-void
-os_fast_mutex_free_func(
-/*====================*/
-	fast_mutex_t*		fast_mutex)	/*!< in: mutex to free */
-{
-#ifdef __WIN__
-	ut_a(fast_mutex);
-
-	DeleteCriticalSection((LPCRITICAL_SECTION) fast_mutex);
-#else
-	int	ret;
-
-	ret = pthread_mutex_destroy(fast_mutex);
-
-	if (UNIV_UNLIKELY(ret != 0)) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			"  InnoDB: error: return value %lu when calling\n"
-			"InnoDB: pthread_mutex_destroy().\n", (ulint) ret);
-		fprintf(stderr,
-			"InnoDB: Byte contents of the pthread mutex at %p:\n",
-			(void*) fast_mutex);
-		ut_print_buf(stderr, fast_mutex, sizeof(os_fast_mutex_t));
-		putc('\n', stderr);
-	}
-#endif
-	if (UNIV_LIKELY(os_sync_mutex_inited)) {
-		/* When freeing the last mutexes, we have
-		already freed os_sync_mutex */
-
-		os_mutex_enter(os_sync_mutex);
-	}
-
-	ut_ad(os_fast_mutex_count > 0);
-	os_fast_mutex_count--;
-
-	if (UNIV_LIKELY(os_sync_mutex_inited)) {
-		os_mutex_exit(os_sync_mutex);
-	}
-}
diff --git a/storage/innobase/os/os0thread.cc b/storage/innobase/os/os0thread.cc
index a5b0f7de6ae..2d9abaa6c52 100644
--- a/storage/innobase/os/os0thread.cc
+++ b/storage/innobase/os/os0thread.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -23,30 +23,39 @@ The interface to the operating system thread control primitives
 Created 9/8/1995 Heikki Tuuri
 *******************************************************/
 
+#include "ha_prototypes.h"
+
 #include "os0thread.h"
+#include "ut0new.h"
+
 #ifdef UNIV_NONINL
 #include "os0thread.ic"
 #endif
 
-#ifdef __WIN__
-#include <windows.h>
-#endif
-
 #ifndef UNIV_HOTBACKUP
 #include "srv0srv.h"
-#include "os0sync.h"
+#include "os0event.h"
+
+#include <map>
+
+/** Mutex that tracks the thread count. Used by innorwlocktest.cc
+FIXME: the unit tests should use APIs */
+SysMutex	thread_mutex;
+
+/** Number of threads active. */
+ulint	os_thread_count;
+
 
 /***************************************************************//**
 Compares two thread ids for equality.
-@return	TRUE if equal */
-UNIV_INTERN
+@return TRUE if equal */
 ibool
 os_thread_eq(
 /*=========*/
 	os_thread_id_t	a,	/*!< in: OS thread or thread id */
 	os_thread_id_t	b)	/*!< in: OS thread or thread id */
 {
-#ifdef __WIN__
+#ifdef _WIN32
 	if (a == b) {
 		return(TRUE);
 	}
@@ -64,34 +73,25 @@ os_thread_eq(
 /****************************************************************//**
 Converts an OS thread id to a ulint. It is NOT guaranteed that the ulint is
 unique for the thread though!
-@return	thread identifier as a number */
-UNIV_INTERN
+@return thread identifier as a number */
 ulint
 os_thread_pf(
 /*=========*/
 	os_thread_id_t	a)	/*!< in: OS thread identifier */
 {
-#ifdef UNIV_HPUX10
-	/* In HP-UX-10.20 a pthread_t is a struct of 3 fields: field1, field2,
-	field3. We do not know if field1 determines the thread uniquely. */
-
-	return((ulint)(a.field1));
-#else
 	return((ulint) a);
-#endif
 }
 
 /*****************************************************************//**
 Returns the thread identifier of current thread. Currently the thread
 identifier in Unix is the thread handle itself. Note that in HP-UX
 pthread_t is a struct of 3 fields.
-@return	current thread identifier */
-UNIV_INTERN
+@return current thread identifier */
 os_thread_id_t
 os_thread_get_curr_id(void)
 /*=======================*/
 {
-#ifdef __WIN__
+#ifdef _WIN32
 	return(GetCurrentThreadId());
 #else
 	return(pthread_self());
@@ -100,10 +100,11 @@ os_thread_get_curr_id(void)
 
 /****************************************************************//**
 Creates a new thread of execution. The execution starts from
-the function given. The start function takes a void* parameter
-and returns an ulint.
-@return	handle to the thread */
-UNIV_INTERN
+the function given.
+NOTE: We count the number of threads in os_thread_exit(). A created
+thread should always use that to exit so thatthe thread count will be
+decremented.
+We do not return an error code because if there is one, we crash here. */
 os_thread_t
 os_thread_create_func(
 /*==================*/
@@ -114,150 +115,176 @@ os_thread_create_func(
 	os_thread_id_t*		thread_id)	/*!< out: id of the created
 						thread, or NULL */
 {
+	os_thread_id_t	new_thread_id;
+
 	/* the new thread should look recent changes up here so far. */
 	os_wmb;
 
-#ifdef __WIN__
-	os_thread_t	thread;
-	DWORD		win_thread_id;
+#ifdef _WIN32
+	HANDLE		handle;
 
-	os_mutex_enter(os_sync_mutex);
-	os_thread_count++;
-	os_mutex_exit(os_sync_mutex);
-
-	thread = CreateThread(NULL,	/* no security attributes */
+	handle = CreateThread(NULL,	/* no security attributes */
 			      0,	/* default size stack */
 			      func,
 			      arg,
 			      0,	/* thread runs immediately */
-			      &win_thread_id);
+			      &new_thread_id);
 
-	if (thread_id) {
-		*thread_id = win_thread_id;
+	if (!handle) {
+		/* If we cannot start a new thread, life has no meaning. */
+		ib::fatal() << "CreateThread returned " << GetLastError();
 	}
 
-	return((os_thread_t)thread);
-#else
-	int		ret;
-	os_thread_t	pthread;
+	CloseHandle(handle);
+
+	mutex_enter(&thread_mutex);
+
+	os_thread_count++;
+
+	mutex_exit(&thread_mutex);
+
+	return((os_thread_t)new_thread_id);
+#else /* _WIN32 else */
+
 	pthread_attr_t	attr;
 
-#ifndef UNIV_HPUX10
 	pthread_attr_init(&attr);
-#endif
 
-#ifdef UNIV_AIX
-	/* We must make sure a thread stack is at least 32 kB, otherwise
-	InnoDB might crash; we do not know if the default stack size on
-	AIX is always big enough. An empirical test on AIX-4.3 suggested
-	the size was 96 kB, though. */
+	mutex_enter(&thread_mutex);
+	++os_thread_count;
+	mutex_exit(&thread_mutex);
 
-	ret = pthread_attr_setstacksize(&attr,
-					(size_t)(PTHREAD_STACK_MIN
-						 + 32 * 1024));
-	if (ret) {
-		fprintf(stderr,
-			"InnoDB: Error: pthread_attr_setstacksize"
-			" returned %d\n", ret);
-		exit(1);
-	}
-#endif
-	os_mutex_enter(os_sync_mutex);
-	os_thread_count++;
-	os_mutex_exit(os_sync_mutex);
+	int	ret = pthread_create(&new_thread_id, &attr, func, arg);
 
-#ifdef UNIV_HPUX10
-	ret = pthread_create(&pthread, pthread_attr_default, func, arg);
-#else
-	ret = pthread_create(&pthread, &attr, func, arg);
-#endif
-	if (ret) {
-		fprintf(stderr,
-			"InnoDB: Error: pthread_create returned %d\n", ret);
-		exit(1);
+	if (ret != 0) {
+		ib::fatal() << "pthread_create returned " << ret;
 	}
 
-#ifndef UNIV_HPUX10
 	pthread_attr_destroy(&attr);
-#endif
+
+#endif /* not _WIN32 */
 
 	ut_a(os_thread_count <= OS_THREAD_MAX_N);
 
-	if (thread_id) {
-		*thread_id = pthread;
+	/* Return the thread_id if the caller requests it. */
+	if (thread_id != NULL) {
+		*thread_id = new_thread_id;
 	}
-
-	return(pthread);
-#endif
+	return((os_thread_t)new_thread_id);
 }
 
-/*****************************************************************//**
-Exits the current thread. */
-UNIV_INTERN
+/** Exits the current thread. */
 void
-os_thread_exit(
-/*===========*/
-	void*	exit_value)	/*!< in: exit value; in Windows this void*
-				is cast as a DWORD */
+os_thread_exit()
 {
 #ifdef UNIV_DEBUG_THREAD_CREATION
-	fprintf(stderr, "Thread exits, id %lu\n",
-		os_thread_pf(os_thread_get_curr_id()));
+	ib::info() << "Thread exits, id "
+		<< os_thread_pf(os_thread_get_curr_id());
 #endif
 
 #ifdef UNIV_PFS_THREAD
 	pfs_delete_thread();
 #endif
 
-	os_mutex_enter(os_sync_mutex);
-	os_thread_count--;
-	os_mutex_exit(os_sync_mutex);
+	mutex_enter(&thread_mutex);
 
-#ifdef __WIN__
-	ExitThread((DWORD) exit_value);
+	os_thread_count--;
+
+#ifdef _WIN32
+	mutex_exit(&thread_mutex);
+
+	ExitThread(0);
 #else
+	mutex_exit(&thread_mutex);
 	pthread_detach(pthread_self());
-	pthread_exit(exit_value);
+	pthread_exit(NULL);
 #endif
 }
 
 /*****************************************************************//**
 Advises the os to give up remainder of the thread's time slice. */
-UNIV_INTERN
 void
 os_thread_yield(void)
 /*=================*/
 {
-#if defined(__WIN__)
+#if defined(_WIN32)
 	SwitchToThread();
-#elif (defined(HAVE_SCHED_YIELD) && defined(HAVE_SCHED_H))
-	sched_yield();
-#elif defined(HAVE_PTHREAD_YIELD_ZERO_ARG)
-	pthread_yield();
-#elif defined(HAVE_PTHREAD_YIELD_ONE_ARG)
-	pthread_yield(0);
 #else
-	os_thread_sleep(0);
+	sched_yield();
 #endif
 }
 #endif /* !UNIV_HOTBACKUP */
 
 /*****************************************************************//**
 The thread sleeps at least the time given in microseconds. */
-UNIV_INTERN
 void
 os_thread_sleep(
 /*============*/
 	ulint	tm)	/*!< in: time in microseconds */
 {
-#ifdef __WIN__
+#ifdef _WIN32
 	Sleep((DWORD) tm / 1000);
+#elif defined(HAVE_NANOSLEEP)
+	struct timespec	t;
+
+	t.tv_sec = tm / 1000000;
+	t.tv_nsec = (tm % 1000000) * 1000;
+
+	::nanosleep(&t, NULL);
 #else
-	struct timeval	t;
+	struct timeval  t;
 
 	t.tv_sec = tm / 1000000;
 	t.tv_usec = tm % 1000000;
 
 	select(0, NULL, NULL, NULL, &t);
-#endif
+#endif /* _WIN32 */
 }
+
+/*****************************************************************//**
+Check if there are threads active.
+@return true if the thread count > 0. */
+bool
+os_thread_active()
+/*==============*/
+{
+	mutex_enter(&thread_mutex);
+
+	bool active = (os_thread_count > 0);
+
+	/* All the threads have exited or are just exiting;
+	NOTE that the threads may not have completed their
+	exit yet. Should we use pthread_join() to make sure
+	they have exited? If we did, we would have to
+	remove the pthread_detach() from
+	os_thread_exit().  Now we just sleep 0.1
+	seconds and hope that is enough! */
+
+	mutex_exit(&thread_mutex);
+
+	return(active);
+}
+
+/**
+Initializes OS thread management data structures. */
+void
+os_thread_init()
+/*============*/
+{
+	mutex_create(LATCH_ID_THREAD_MUTEX, &thread_mutex);
+}
+
+/**
+Frees OS thread management data structures. */
+void
+os_thread_free()
+/*============*/
+{
+	if (os_thread_count != 0) {
+		ib::warn() << "Some (" << os_thread_count << ") threads are"
+			" still active";
+	}
+
+	mutex_destroy(&thread_mutex);
+}
+
diff --git a/storage/innobase/page/page0cur.cc b/storage/innobase/page/page0cur.cc
index 76e4c2aed9b..b0412009b80 100644
--- a/storage/innobase/page/page0cur.cc
+++ b/storage/innobase/page/page0cur.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2015, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2012, Facebook Inc.
 
 This program is free software; you can redistribute it and/or modify it under
@@ -24,6 +24,8 @@ The page cursor
 Created 10/4/1994 Heikki Tuuri
 *************************************************************************/
 
+#include "ha_prototypes.h"
+
 #include "page0cur.h"
 #ifdef UNIV_NONINL
 #include "page0cur.ic"
@@ -33,9 +35,11 @@ Created 10/4/1994 Heikki Tuuri
 #include "btr0btr.h"
 #include "mtr0log.h"
 #include "log0recv.h"
-#include "ut0ut.h"
 #ifndef UNIV_HOTBACKUP
 #include "rem0cmp.h"
+#include "gis0rtree.h"
+
+#include <algorithm>
 
 #ifdef PAGE_CUR_ADAPT
 # ifdef UNIV_SEARCH_PERF_STAT
@@ -53,7 +57,7 @@ a = 1103515245 (3^5 * 5 * 7 * 129749)
 c = 12345 (3 * 5 * 823)
 m = 18446744073709551616 (2^64)
 
-@return	number between 0 and 2^64-1 */
+@return number between 0 and 2^64-1 */
 static
 ib_uint64_t
 page_cur_lcg_prng(void)
@@ -76,41 +80,30 @@ page_cur_lcg_prng(void)
 	return(lcg_current);
 }
 
-/****************************************************************//**
-Tries a search shortcut based on the last insert.
-@return	TRUE on success */
+/** Try a search shortcut based on the last insert.
+@param[in]	block			index page
+@param[in]	index			index tree
+@param[in]	tuple			search key
+@param[in,out]	iup_matched_fields	already matched fields in the
+upper limit record
+@param[in,out]	ilow_matched_fields	already matched fields in the
+lower limit record
+@param[out]	cursor			page cursor
+@return true on success */
 UNIV_INLINE
-ibool
+bool
 page_cur_try_search_shortcut(
-/*=========================*/
-	const buf_block_t*	block,	/*!< in: index page */
-	const dict_index_t*	index,	/*!< in: record descriptor */
-	const dtuple_t*		tuple,	/*!< in: data tuple */
+	const buf_block_t*	block,
+	const dict_index_t*	index,
+	const dtuple_t*		tuple,
 	ulint*			iup_matched_fields,
-					/*!< in/out: already matched
-					fields in upper limit record */
-	ulint*			iup_matched_bytes,
-					/*!< in/out: already matched
-					bytes in a field not yet
-					completely matched */
 	ulint*			ilow_matched_fields,
-					/*!< in/out: already matched
-					fields in lower limit record */
-	ulint*			ilow_matched_bytes,
-					/*!< in/out: already matched
-					bytes in a field not yet
-					completely matched */
-	page_cur_t*		cursor) /*!< out: page cursor */
+	page_cur_t*		cursor)
 {
 	const rec_t*	rec;
 	const rec_t*	next_rec;
 	ulint		low_match;
-	ulint		low_bytes;
 	ulint		up_match;
-	ulint		up_bytes;
-#ifdef UNIV_SEARCH_DEBUG
-	page_cur_t	cursor2;
-#endif
 	ibool		success		= FALSE;
 	const page_t*	page		= buf_block_get_frame(block);
 	mem_heap_t*	heap		= NULL;
@@ -127,53 +120,120 @@ page_cur_try_search_shortcut(
 	ut_ad(rec);
 	ut_ad(page_rec_is_user_rec(rec));
 
-	ut_pair_min(&low_match, &low_bytes,
-		    *ilow_matched_fields, *ilow_matched_bytes,
-		    *iup_matched_fields, *iup_matched_bytes);
+	low_match = up_match = std::min(*ilow_matched_fields,
+					*iup_matched_fields);
 
-	up_match = low_match;
-	up_bytes = low_bytes;
-
-	if (page_cmp_dtuple_rec_with_match(tuple, rec, offsets,
-					   &low_match, &low_bytes) < 0) {
+	if (cmp_dtuple_rec_with_match(tuple, rec, offsets, &low_match) < 0) {
 		goto exit_func;
 	}
 
 	next_rec = page_rec_get_next_const(rec);
-	offsets = rec_get_offsets(next_rec, index, offsets,
-				  dtuple_get_n_fields(tuple), &heap);
+	if (!page_rec_is_supremum(next_rec)) {
+		offsets = rec_get_offsets(next_rec, index, offsets,
+					  dtuple_get_n_fields(tuple), &heap);
 
-	if (page_cmp_dtuple_rec_with_match(tuple, next_rec, offsets,
-					   &up_match, &up_bytes) >= 0) {
-		goto exit_func;
+		if (cmp_dtuple_rec_with_match(tuple, next_rec, offsets,
+					      &up_match) >= 0) {
+			goto exit_func;
+		}
+
+		*iup_matched_fields = up_match;
 	}
 
 	page_cur_position(rec, block, cursor);
 
-#ifdef UNIV_SEARCH_DEBUG
-	page_cur_search_with_match(block, index, tuple, PAGE_CUR_DBG,
-				   iup_matched_fields,
-				   iup_matched_bytes,
-				   ilow_matched_fields,
-				   ilow_matched_bytes,
-				   &cursor2);
-	ut_a(cursor2.rec == cursor->rec);
+	*ilow_matched_fields = low_match;
 
-	if (!page_rec_is_supremum(next_rec)) {
+#ifdef UNIV_SEARCH_PERF_STAT
+	page_cur_short_succ++;
+#endif
+	success = TRUE;
+exit_func:
+	if (UNIV_LIKELY_NULL(heap)) {
+		mem_heap_free(heap);
+	}
+	return(success);
+}
 
-		ut_a(*iup_matched_fields == up_match);
-		ut_a(*iup_matched_bytes == up_bytes);
+/** Try a search shortcut based on the last insert.
+@param[in]	block			index page
+@param[in]	index			index tree
+@param[in]	tuple			search key
+@param[in,out]	iup_matched_fields	already matched fields in the
+upper limit record
+@param[in,out]	iup_matched_bytes	already matched bytes in the
+first partially matched field in the upper limit record
+@param[in,out]	ilow_matched_fields	already matched fields in the
+lower limit record
+@param[in,out]	ilow_matched_bytes	already matched bytes in the
+first partially matched field in the lower limit record
+@param[out]	cursor			page cursor
+@return true on success */
+UNIV_INLINE
+bool
+page_cur_try_search_shortcut_bytes(
+	const buf_block_t*	block,
+	const dict_index_t*	index,
+	const dtuple_t*		tuple,
+	ulint*			iup_matched_fields,
+	ulint*			iup_matched_bytes,
+	ulint*			ilow_matched_fields,
+	ulint*			ilow_matched_bytes,
+	page_cur_t*		cursor)
+{
+	const rec_t*	rec;
+	const rec_t*	next_rec;
+	ulint		low_match;
+	ulint		low_bytes;
+	ulint		up_match;
+	ulint		up_bytes;
+	ibool		success		= FALSE;
+	const page_t*	page		= buf_block_get_frame(block);
+	mem_heap_t*	heap		= NULL;
+	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
+	ulint*		offsets		= offsets_;
+	rec_offs_init(offsets_);
+
+	ut_ad(dtuple_check_typed(tuple));
+
+	rec = page_header_get_ptr(page, PAGE_LAST_INSERT);
+	offsets = rec_get_offsets(rec, index, offsets,
+				  dtuple_get_n_fields(tuple), &heap);
+
+	ut_ad(rec);
+	ut_ad(page_rec_is_user_rec(rec));
+	if (ut_pair_cmp(*ilow_matched_fields, *ilow_matched_bytes,
+			*iup_matched_fields, *iup_matched_bytes) < 0) {
+		up_match = low_match = *ilow_matched_fields;
+		up_bytes = low_bytes = *ilow_matched_bytes;
+	} else {
+		up_match = low_match = *iup_matched_fields;
+		up_bytes = low_bytes = *iup_matched_bytes;
 	}
 
-	ut_a(*ilow_matched_fields == low_match);
-	ut_a(*ilow_matched_bytes == low_bytes);
-#endif
+	if (cmp_dtuple_rec_with_match_bytes(
+		    tuple, rec, index, offsets, &low_match, &low_bytes) < 0) {
+		goto exit_func;
+	}
+
+	next_rec = page_rec_get_next_const(rec);
 	if (!page_rec_is_supremum(next_rec)) {
+		offsets = rec_get_offsets(next_rec, index, offsets,
+					  dtuple_get_n_fields(tuple), &heap);
+
+		if (cmp_dtuple_rec_with_match_bytes(
+			    tuple, next_rec, index, offsets,
+			    &up_match, &up_bytes)
+		    >= 0) {
+			goto exit_func;
+		}
 
 		*iup_matched_fields = up_match;
 		*iup_matched_bytes = up_bytes;
 	}
 
+	page_cur_position(rec, block, cursor);
+
 	*ilow_matched_fields = low_match;
 	*ilow_matched_bytes = low_bytes;
 
@@ -187,7 +247,6 @@ exit_func:
 	}
 	return(success);
 }
-
 #endif
 
 #ifdef PAGE_CUR_LE_OR_EXTENDS
@@ -195,7 +254,7 @@ exit_func:
 Checks if the nth field in a record is a character type field which extends
 the nth field in tuple, i.e., the field is longer or equal in length and has
 common first characters.
-@return	TRUE if rec field extends tuple field */
+@return TRUE if rec field extends tuple field */
 static
 ibool
 page_cur_rec_field_extends(
@@ -222,16 +281,17 @@ page_cur_rec_field_extends(
 	    || type->mtype == DATA_FIXBINARY
 	    || type->mtype == DATA_BINARY
 	    || type->mtype == DATA_BLOB
+	    || DATA_GEOMETRY_MTYPE(type->mtype)
 	    || type->mtype == DATA_VARMYSQL
 	    || type->mtype == DATA_MYSQL) {
 
 		if (dfield_get_len(dfield) != UNIV_SQL_NULL
 		    && rec_f_len != UNIV_SQL_NULL
 		    && rec_f_len >= dfield_get_len(dfield)
-		    && !cmp_data_data_slow(type->mtype, type->prtype,
-					   dfield_get_data(dfield),
-					   dfield_get_len(dfield),
-					   rec_f, dfield_get_len(dfield))) {
+		    && !cmp_data_data(type->mtype, type->prtype,
+				      dfield_get_data(dfield),
+				      dfield_get_len(dfield),
+				      rec_f, dfield_get_len(dfield))) {
 
 			return(TRUE);
 		}
@@ -241,33 +301,389 @@ page_cur_rec_field_extends(
 }
 #endif /* PAGE_CUR_LE_OR_EXTENDS */
 
+/** If key is fixed length then populate offset directly from
+cached version.
+@param[in]	rec	B-Tree record for which offset needs to be
+			populated.
+@param[in,out]	index	index handler
+@param[in]	tuple	data tuple
+@param[in,out]	offsets	default offsets array
+@param[in,out]	heap	heap
+@return reference to populate offsets. */
+static
+ulint*
+populate_offsets(
+	const rec_t*		rec,
+	const dtuple_t*		tuple,
+	dict_index_t*		index,
+	ulint*			offsets,
+	mem_heap_t**		heap)
+{
+	ut_ad(dict_table_is_intrinsic(index->table));
+
+	bool rec_has_null_values	= false;
+
+	if (index->rec_cache.key_has_null_cols) {
+		/* Check if record has null value. */
+		const byte*	nulls = rec - (1 + REC_N_NEW_EXTRA_BYTES);
+		ulint		n_bytes_to_scan
+			= UT_BITS_IN_BYTES(index->n_nullable);
+		byte		null_mask = 0xff;
+		ulint		bits_examined = 0;
+
+		for (ulint i = 0; i < n_bytes_to_scan - 1; i++) {
+			if (*nulls & null_mask) {
+				rec_has_null_values = true;
+				break;
+			}
+			--nulls;
+			bits_examined += 8;
+		}
+
+		if (!rec_has_null_values) {
+			null_mask >>= (8 - (index->n_nullable - bits_examined));
+			rec_has_null_values = *nulls & null_mask;
+		}
+
+		if (rec_has_null_values) {
+
+			offsets = rec_get_offsets(
+				rec, index, offsets,
+				dtuple_get_n_fields_cmp(tuple), heap);
+
+			return(offsets);
+		}
+	}
+
+	/* Check if offsets are cached else cache them first.
+	There are queries that will first verify if key is present using index
+	search and then initiate insert. If offsets are cached during index
+	search it would be based on key part only but during insert that looks
+	out for exact location to insert key + db_row_id both columns would
+	be used and so re-compute offsets in such case. */
+	if (!index->rec_cache.offsets_cached
+	    || (rec_offs_n_fields(index->rec_cache.offsets)
+		< dtuple_get_n_fields_cmp(tuple))) {
+
+		offsets = rec_get_offsets(
+			rec, index, offsets,
+			dtuple_get_n_fields_cmp(tuple), heap);
+
+		/* Reallocate if our offset array is not big
+		enough to hold the needed size. */
+		ulint sz1 = index->rec_cache.sz_of_offsets;
+		ulint sz2 = offsets[0];
+		if (sz1 < sz2) {
+			index->rec_cache.offsets = static_cast<ulint*>(
+				mem_heap_alloc(
+					index->heap, sizeof(ulint) * sz2));
+			index->rec_cache.sz_of_offsets =
+				static_cast<uint32_t>(sz2);
+		}
+
+		memcpy(index->rec_cache.offsets,
+		       offsets, (sizeof(ulint) * sz2));
+		index->rec_cache.offsets_cached = true;
+	}
+
+	ut_ad(index->rec_cache.offsets[2] = (ulint) rec);
+
+	return(index->rec_cache.offsets);
+}
+
 /****************************************************************//**
 Searches the right position for a page cursor. */
-UNIV_INTERN
 void
 page_cur_search_with_match(
 /*=======================*/
 	const buf_block_t*	block,	/*!< in: buffer block */
-	const dict_index_t*	index,	/*!< in: record descriptor */
+	const dict_index_t*	index,	/*!< in/out: record descriptor */
 	const dtuple_t*		tuple,	/*!< in: data tuple */
-	ulint			mode,	/*!< in: PAGE_CUR_L,
+	page_cur_mode_t		mode,	/*!< in: PAGE_CUR_L,
 					PAGE_CUR_LE, PAGE_CUR_G, or
 					PAGE_CUR_GE */
 	ulint*			iup_matched_fields,
 					/*!< in/out: already matched
 					fields in upper limit record */
-	ulint*			iup_matched_bytes,
-					/*!< in/out: already matched
-					bytes in a field not yet
-					completely matched */
 	ulint*			ilow_matched_fields,
 					/*!< in/out: already matched
 					fields in lower limit record */
+	page_cur_t*		cursor,	/*!< out: page cursor */
+	rtr_info_t*		rtr_info)/*!< in/out: rtree search stack */
+{
+	ulint		up;
+	ulint		low;
+	ulint		mid;
+	const page_t*	page;
+	const page_dir_slot_t* slot;
+	const rec_t*	up_rec;
+	const rec_t*	low_rec;
+	const rec_t*	mid_rec;
+	ulint		up_matched_fields;
+	ulint		low_matched_fields;
+	ulint		cur_matched_fields;
+	int		cmp;
+#ifdef UNIV_ZIP_DEBUG
+	const page_zip_des_t*	page_zip = buf_block_get_page_zip(block);
+#endif /* UNIV_ZIP_DEBUG */
+	mem_heap_t*	heap		= NULL;
+	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
+	ulint*		offsets		= offsets_;
+	rec_offs_init(offsets_);
+
+	ut_ad(dtuple_validate(tuple));
+#ifdef UNIV_DEBUG
+# ifdef PAGE_CUR_DBG
+	if (mode != PAGE_CUR_DBG)
+# endif /* PAGE_CUR_DBG */
+# ifdef PAGE_CUR_LE_OR_EXTENDS
+		if (mode != PAGE_CUR_LE_OR_EXTENDS)
+# endif /* PAGE_CUR_LE_OR_EXTENDS */
+			ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE
+			      || mode == PAGE_CUR_G || mode == PAGE_CUR_GE
+			      || dict_index_is_spatial(index));
+#endif /* UNIV_DEBUG */
+	page = buf_block_get_frame(block);
+#ifdef UNIV_ZIP_DEBUG
+	ut_a(!page_zip || page_zip_validate(page_zip, page, index));
+#endif /* UNIV_ZIP_DEBUG */
+
+	ut_d(page_check_dir(page));
+
+#ifdef PAGE_CUR_ADAPT
+	if (page_is_leaf(page)
+	    && (mode == PAGE_CUR_LE)
+	    && !dict_index_is_spatial(index)
+	    && (page_header_get_field(page, PAGE_N_DIRECTION) > 3)
+	    && (page_header_get_ptr(page, PAGE_LAST_INSERT))
+	    && (page_header_get_field(page, PAGE_DIRECTION) == PAGE_RIGHT)) {
+
+		if (page_cur_try_search_shortcut(
+			    block, index, tuple,
+			    iup_matched_fields,
+			    ilow_matched_fields,
+			    cursor)) {
+			return;
+		}
+	}
+# ifdef PAGE_CUR_DBG
+	if (mode == PAGE_CUR_DBG) {
+		mode = PAGE_CUR_LE;
+	}
+# endif
+#endif
+
+	/* If the mode is for R-tree indexes, use the special MBR
+	related compare functions */
+	if (dict_index_is_spatial(index) && mode > PAGE_CUR_LE) {
+		/* For leaf level insert, we still use the traditional
+		compare function for now */
+		if (mode == PAGE_CUR_RTREE_INSERT && page_is_leaf(page)){
+			mode = PAGE_CUR_LE;
+		} else {
+			rtr_cur_search_with_match(
+				block, (dict_index_t*)index, tuple, mode,
+				cursor, rtr_info);
+			return;
+		}
+	}
+
+	/* The following flag does not work for non-latin1 char sets because
+	cmp_full_field does not tell how many bytes matched */
+#ifdef PAGE_CUR_LE_OR_EXTENDS
+	ut_a(mode != PAGE_CUR_LE_OR_EXTENDS);
+#endif /* PAGE_CUR_LE_OR_EXTENDS */
+
+	/* If mode PAGE_CUR_G is specified, we are trying to position the
+	cursor to answer a query of the form "tuple < X", where tuple is
+	the input parameter, and X denotes an arbitrary physical record on
+	the page. We want to position the cursor on the first X which
+	satisfies the condition. */
+
+	up_matched_fields  = *iup_matched_fields;
+	low_matched_fields = *ilow_matched_fields;
+
+	/* Perform binary search. First the search is done through the page
+	directory, after that as a linear search in the list of records
+	owned by the upper limit directory slot. */
+
+	low = 0;
+	up = page_dir_get_n_slots(page) - 1;
+
+	/* Perform binary search until the lower and upper limit directory
+	slots come to the distance 1 of each other */
+
+	while (up - low > 1) {
+		mid = (low + up) / 2;
+		slot = page_dir_get_nth_slot(page, mid);
+		mid_rec = page_dir_slot_get_rec(slot);
+
+		cur_matched_fields = std::min(low_matched_fields,
+					      up_matched_fields);
+
+		offsets = offsets_;
+		if (index->rec_cache.fixed_len_key) {
+			offsets = populate_offsets(
+				mid_rec, tuple,
+				const_cast<dict_index_t*>(index),
+				offsets, &heap);
+		} else {
+			offsets = rec_get_offsets(
+				mid_rec, index, offsets,
+				dtuple_get_n_fields_cmp(tuple), &heap);
+
+		}
+
+		cmp = cmp_dtuple_rec_with_match(
+			tuple, mid_rec, offsets, &cur_matched_fields);
+
+		if (cmp > 0) {
+low_slot_match:
+			low = mid;
+			low_matched_fields = cur_matched_fields;
+
+		} else if (cmp) {
+#ifdef PAGE_CUR_LE_OR_EXTENDS
+			if (mode == PAGE_CUR_LE_OR_EXTENDS
+			    && page_cur_rec_field_extends(
+				    tuple, mid_rec, offsets,
+				    cur_matched_fields)) {
+
+				goto low_slot_match;
+			}
+#endif /* PAGE_CUR_LE_OR_EXTENDS */
+up_slot_match:
+			up = mid;
+			up_matched_fields = cur_matched_fields;
+
+		} else if (mode == PAGE_CUR_G || mode == PAGE_CUR_LE
+#ifdef PAGE_CUR_LE_OR_EXTENDS
+			   || mode == PAGE_CUR_LE_OR_EXTENDS
+#endif /* PAGE_CUR_LE_OR_EXTENDS */
+			   ) {
+			goto low_slot_match;
+		} else {
+
+			goto up_slot_match;
+		}
+	}
+
+	slot = page_dir_get_nth_slot(page, low);
+	low_rec = page_dir_slot_get_rec(slot);
+	slot = page_dir_get_nth_slot(page, up);
+	up_rec = page_dir_slot_get_rec(slot);
+
+	/* Perform linear search until the upper and lower records come to
+	distance 1 of each other. */
+
+	while (page_rec_get_next_const(low_rec) != up_rec) {
+
+		mid_rec = page_rec_get_next_const(low_rec);
+
+		cur_matched_fields = std::min(low_matched_fields,
+					      up_matched_fields);
+
+		offsets = offsets_;
+		if (index->rec_cache.fixed_len_key) {
+			offsets = populate_offsets(
+				mid_rec, tuple,
+				const_cast<dict_index_t*>(index),
+				offsets, &heap);
+		} else {
+			offsets = rec_get_offsets(
+				mid_rec, index, offsets,
+				dtuple_get_n_fields_cmp(tuple), &heap);
+
+		}
+
+		cmp = cmp_dtuple_rec_with_match(
+			tuple, mid_rec, offsets, &cur_matched_fields);
+
+		if (cmp > 0) {
+low_rec_match:
+			low_rec = mid_rec;
+			low_matched_fields = cur_matched_fields;
+
+		} else if (cmp) {
+#ifdef PAGE_CUR_LE_OR_EXTENDS
+			if (mode == PAGE_CUR_LE_OR_EXTENDS
+			    && page_cur_rec_field_extends(
+				    tuple, mid_rec, offsets,
+				    cur_matched_fields)) {
+
+				goto low_rec_match;
+			}
+#endif /* PAGE_CUR_LE_OR_EXTENDS */
+up_rec_match:
+			up_rec = mid_rec;
+			up_matched_fields = cur_matched_fields;
+		} else if (mode == PAGE_CUR_G || mode == PAGE_CUR_LE
+#ifdef PAGE_CUR_LE_OR_EXTENDS
+			   || mode == PAGE_CUR_LE_OR_EXTENDS
+#endif /* PAGE_CUR_LE_OR_EXTENDS */
+			   ) {
+			if (!cmp && !cur_matched_fields) {
+#ifdef UNIV_DEBUG
+				mtr_t	mtr;
+				mtr_start(&mtr);
+
+				/* We got a match, but cur_matched_fields is
+				0, it must have REC_INFO_MIN_REC_FLAG */
+				ulint   rec_info = rec_get_info_bits(mid_rec,
+                                                     rec_offs_comp(offsets));
+				ut_ad(rec_info & REC_INFO_MIN_REC_FLAG);
+				ut_ad(btr_page_get_prev(page, &mtr) == FIL_NULL);
+				mtr_commit(&mtr);
+#endif
+
+				cur_matched_fields = dtuple_get_n_fields_cmp(tuple);
+			}
+
+			goto low_rec_match;
+		} else {
+
+			goto up_rec_match;
+		}
+	}
+
+	if (mode <= PAGE_CUR_GE) {
+		page_cur_position(up_rec, block, cursor);
+	} else {
+		page_cur_position(low_rec, block, cursor);
+	}
+
+	*iup_matched_fields  = up_matched_fields;
+	*ilow_matched_fields = low_matched_fields;
+	if (UNIV_LIKELY_NULL(heap)) {
+		mem_heap_free(heap);
+	}
+}
+
+/** Search the right position for a page cursor.
+@param[in]	block			buffer block
+@param[in]	index			index tree
+@param[in]	tuple			key to be searched for
+@param[in]	mode			search mode
+@param[in,out]	iup_matched_fields	already matched fields in the
+upper limit record
+@param[in,out]	iup_matched_bytes	already matched bytes in the
+first partially matched field in the upper limit record
+@param[in,out]	ilow_matched_fields	already matched fields in the
+lower limit record
+@param[in,out]	ilow_matched_bytes	already matched bytes in the
+first partially matched field in the lower limit record
+@param[out]	cursor			page cursor */
+void
+page_cur_search_with_match_bytes(
+	const buf_block_t*	block,
+	const dict_index_t*	index,
+	const dtuple_t*		tuple,
+	page_cur_mode_t		mode,
+	ulint*			iup_matched_fields,
+	ulint*			iup_matched_bytes,
+	ulint*			ilow_matched_fields,
 	ulint*			ilow_matched_bytes,
-					/*!< in/out: already matched
-					bytes in a field not yet
-					completely matched */
-	page_cur_t*		cursor)	/*!< out: page cursor */
+	page_cur_t*		cursor)
 {
 	ulint		up;
 	ulint		low;
@@ -284,11 +700,6 @@ page_cur_search_with_match(
 	ulint		cur_matched_fields;
 	ulint		cur_matched_bytes;
 	int		cmp;
-#ifdef UNIV_SEARCH_DEBUG
-	int		dbg_cmp;
-	ulint		dbg_matched_fields;
-	ulint		dbg_matched_bytes;
-#endif
 #ifdef UNIV_ZIP_DEBUG
 	const page_zip_des_t*	page_zip = buf_block_get_page_zip(block);
 #endif /* UNIV_ZIP_DEBUG */
@@ -297,8 +708,6 @@ page_cur_search_with_match(
 	ulint*		offsets		= offsets_;
 	rec_offs_init(offsets_);
 
-	ut_ad(block && tuple && iup_matched_fields && iup_matched_bytes
-	      && ilow_matched_fields && ilow_matched_bytes && cursor);
 	ut_ad(dtuple_validate(tuple));
 #ifdef UNIV_DEBUG
 # ifdef PAGE_CUR_DBG
@@ -315,7 +724,7 @@ page_cur_search_with_match(
 	ut_a(!page_zip || page_zip_validate(page_zip, page, index));
 #endif /* UNIV_ZIP_DEBUG */
 
-	page_check_dir(page);
+	ut_d(page_check_dir(page));
 
 #ifdef PAGE_CUR_ADAPT
 	if (page_is_leaf(page)
@@ -324,7 +733,7 @@ page_cur_search_with_match(
 	    && (page_header_get_ptr(page, PAGE_LAST_INSERT))
 	    && (page_header_get_field(page, PAGE_DIRECTION) == PAGE_RIGHT)) {
 
-		if (page_cur_try_search_shortcut(
+		if (page_cur_try_search_shortcut_bytes(
 			    block, index, tuple,
 			    iup_matched_fields, iup_matched_bytes,
 			    ilow_matched_fields, ilow_matched_bytes,
@@ -352,7 +761,7 @@ page_cur_search_with_match(
 	satisfies the condition. */
 
 	up_matched_fields  = *iup_matched_fields;
-	up_matched_bytes   = *iup_matched_bytes;
+	up_matched_bytes  = *iup_matched_bytes;
 	low_matched_fields = *ilow_matched_fields;
 	low_matched_bytes  = *ilow_matched_bytes;
 
@@ -375,20 +784,21 @@ page_cur_search_with_match(
 			    low_matched_fields, low_matched_bytes,
 			    up_matched_fields, up_matched_bytes);
 
-		offsets = rec_get_offsets(mid_rec, index, offsets,
-					  dtuple_get_n_fields_cmp(tuple),
-					  &heap);
+		offsets = rec_get_offsets(
+			mid_rec, index, offsets_,
+			dtuple_get_n_fields_cmp(tuple), &heap);
 
-		cmp = cmp_dtuple_rec_with_match(tuple, mid_rec, offsets,
-						&cur_matched_fields,
-						&cur_matched_bytes);
-		if (UNIV_LIKELY(cmp > 0)) {
+		cmp = cmp_dtuple_rec_with_match_bytes(
+			tuple, mid_rec, index, offsets,
+			&cur_matched_fields, &cur_matched_bytes);
+
+		if (cmp > 0) {
 low_slot_match:
 			low = mid;
 			low_matched_fields = cur_matched_fields;
 			low_matched_bytes = cur_matched_bytes;
 
-		} else if (UNIV_EXPECT(cmp, -1)) {
+		} else if (cmp) {
 #ifdef PAGE_CUR_LE_OR_EXTENDS
 			if (mode == PAGE_CUR_LE_OR_EXTENDS
 			    && page_cur_rec_field_extends(
@@ -408,7 +818,6 @@ up_slot_match:
 			   || mode == PAGE_CUR_LE_OR_EXTENDS
 #endif /* PAGE_CUR_LE_OR_EXTENDS */
 			   ) {
-
 			goto low_slot_match;
 		} else {
 
@@ -432,20 +841,21 @@ up_slot_match:
 			    low_matched_fields, low_matched_bytes,
 			    up_matched_fields, up_matched_bytes);
 
-		offsets = rec_get_offsets(mid_rec, index, offsets,
-					  dtuple_get_n_fields_cmp(tuple),
-					  &heap);
+		offsets = rec_get_offsets(
+			mid_rec, index, offsets_,
+			dtuple_get_n_fields_cmp(tuple), &heap);
 
-		cmp = cmp_dtuple_rec_with_match(tuple, mid_rec, offsets,
-						&cur_matched_fields,
-						&cur_matched_bytes);
-		if (UNIV_LIKELY(cmp > 0)) {
+		cmp = cmp_dtuple_rec_with_match_bytes(
+			tuple, mid_rec, index, offsets,
+			&cur_matched_fields, &cur_matched_bytes);
+
+		if (cmp > 0) {
 low_rec_match:
 			low_rec = mid_rec;
 			low_matched_fields = cur_matched_fields;
 			low_matched_bytes = cur_matched_bytes;
 
-		} else if (UNIV_EXPECT(cmp, -1)) {
+		} else if (cmp) {
 #ifdef PAGE_CUR_LE_OR_EXTENDS
 			if (mode == PAGE_CUR_LE_OR_EXTENDS
 			    && page_cur_rec_field_extends(
@@ -464,6 +874,22 @@ up_rec_match:
 			   || mode == PAGE_CUR_LE_OR_EXTENDS
 #endif /* PAGE_CUR_LE_OR_EXTENDS */
 			   ) {
+			if (!cmp && !cur_matched_fields) {
+#ifdef UNIV_DEBUG
+				mtr_t	mtr;
+				mtr_start(&mtr);
+
+				/* We got a match, but cur_matched_fields is
+				0, it must have REC_INFO_MIN_REC_FLAG */
+				ulint   rec_info = rec_get_info_bits(mid_rec,
+                                                     rec_offs_comp(offsets));
+				ut_ad(rec_info & REC_INFO_MIN_REC_FLAG);
+				ut_ad(btr_page_get_prev(page, &mtr) == FIL_NULL);
+				mtr_commit(&mtr);
+#endif
+
+				cur_matched_fields = dtuple_get_n_fields_cmp(tuple);
+			}
 
 			goto low_rec_match;
 		} else {
@@ -472,58 +898,6 @@ up_rec_match:
 		}
 	}
 
-#ifdef UNIV_SEARCH_DEBUG
-
-	/* Check that the lower and upper limit records have the
-	right alphabetical order compared to tuple. */
-	dbg_matched_fields = 0;
-	dbg_matched_bytes = 0;
-
-	offsets = rec_get_offsets(low_rec, index, offsets,
-				  ULINT_UNDEFINED, &heap);
-	dbg_cmp = page_cmp_dtuple_rec_with_match(tuple, low_rec, offsets,
-						 &dbg_matched_fields,
-						 &dbg_matched_bytes);
-	if (mode == PAGE_CUR_G) {
-		ut_a(dbg_cmp >= 0);
-	} else if (mode == PAGE_CUR_GE) {
-		ut_a(dbg_cmp == 1);
-	} else if (mode == PAGE_CUR_L) {
-		ut_a(dbg_cmp == 1);
-	} else if (mode == PAGE_CUR_LE) {
-		ut_a(dbg_cmp >= 0);
-	}
-
-	if (!page_rec_is_infimum(low_rec)) {
-
-		ut_a(low_matched_fields == dbg_matched_fields);
-		ut_a(low_matched_bytes == dbg_matched_bytes);
-	}
-
-	dbg_matched_fields = 0;
-	dbg_matched_bytes = 0;
-
-	offsets = rec_get_offsets(up_rec, index, offsets,
-				  ULINT_UNDEFINED, &heap);
-	dbg_cmp = page_cmp_dtuple_rec_with_match(tuple, up_rec, offsets,
-						 &dbg_matched_fields,
-						 &dbg_matched_bytes);
-	if (mode == PAGE_CUR_G) {
-		ut_a(dbg_cmp == -1);
-	} else if (mode == PAGE_CUR_GE) {
-		ut_a(dbg_cmp <= 0);
-	} else if (mode == PAGE_CUR_L) {
-		ut_a(dbg_cmp <= 0);
-	} else if (mode == PAGE_CUR_LE) {
-		ut_a(dbg_cmp == -1);
-	}
-
-	if (!page_rec_is_supremum(up_rec)) {
-
-		ut_a(up_matched_fields == dbg_matched_fields);
-		ut_a(up_matched_bytes == dbg_matched_bytes);
-	}
-#endif
 	if (mode <= PAGE_CUR_GE) {
 		page_cur_position(up_rec, block, cursor);
 	} else {
@@ -542,7 +916,6 @@ up_rec_match:
 /***********************************************************//**
 Positions a page cursor on a randomly chosen user record on a page. If there
 are no user records, sets the cursor on the infimum record. */
-UNIV_INTERN
 void
 page_cur_open_on_rnd_user_rec(
 /*==========================*/
@@ -583,11 +956,22 @@ page_cur_insert_rec_write_log(
 	ulint	extra_size;
 	ulint	cur_extra_size;
 	const byte* ins_ptr;
-	byte*	log_ptr;
 	const byte* log_end;
 	ulint	i;
 
+	/* Avoid REDO logging to save on costly IO because
+	temporary tables are not recovered during crash recovery. */
+	if (dict_table_is_temporary(index->table)) {
+		byte*	log_ptr = mlog_open(mtr, 0);
+		if (log_ptr == NULL) {
+			return;
+		}
+		mlog_close(mtr, log_ptr);
+		log_ptr = NULL;
+	}
+
 	ut_a(rec_size < UNIV_PAGE_SIZE);
+	ut_ad(mtr->is_named_space(index->space));
 	ut_ad(page_align(insert_rec) == page_align(cursor_rec));
 	ut_ad(!page_rec_is_comp(insert_rec)
 	      == !dict_table_is_comp(index->table));
@@ -648,6 +1032,8 @@ page_cur_insert_rec_write_log(
 		} while (i < min_rec_size);
 	}
 
+	byte*	log_ptr;
+
 	if (mtr_get_log_mode(mtr) != MTR_LOG_SHORT_INSERTS) {
 
 		if (page_rec_is_comp(insert_rec)) {
@@ -753,27 +1139,26 @@ need_extra_info:
 
 /***********************************************************//**
 Parses a log record of a record insert on a page.
-@return	end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
 byte*
 page_cur_parse_insert_rec(
 /*======================*/
 	ibool		is_short,/*!< in: TRUE if short inserts */
-	byte*		ptr,	/*!< in: buffer */
-	byte*		end_ptr,/*!< in: buffer end */
+	const byte*	ptr,	/*!< in: buffer */
+	const byte*	end_ptr,/*!< in: buffer end */
 	buf_block_t*	block,	/*!< in: page or NULL */
 	dict_index_t*	index,	/*!< in: record descriptor */
 	mtr_t*		mtr)	/*!< in: mtr or NULL */
 {
-	ulint	origin_offset;
+	ulint	origin_offset		= 0; /* remove warning */
 	ulint	end_seg_len;
-	ulint	mismatch_index;
+	ulint	mismatch_index		= 0; /* remove warning */
 	page_t*	page;
 	rec_t*	cursor_rec;
 	byte	buf1[1024];
 	byte*	buf;
-	byte*	ptr2			= ptr;
-	ulint	info_and_status_bits = 0; /* remove warning */
+	const byte*	ptr2		= ptr;
+	ulint		info_and_status_bits = 0; /* remove warning */
 	page_cur_t	cursor;
 	mem_heap_t*	heap		= NULL;
 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
@@ -799,7 +1184,7 @@ page_cur_parse_insert_rec(
 
 		cursor_rec = page + offset;
 
-		if (UNIV_UNLIKELY(offset >= UNIV_PAGE_SIZE)) {
+		if (offset >= UNIV_PAGE_SIZE) {
 
 			recv_sys->found_corrupt_log = TRUE;
 
@@ -807,14 +1192,14 @@ page_cur_parse_insert_rec(
 		}
 	}
 
-	ptr = mach_parse_compressed(ptr, end_ptr, &end_seg_len);
+	end_seg_len = mach_parse_compressed(&ptr, end_ptr);
 
 	if (ptr == NULL) {
 
 		return(NULL);
 	}
 
-	if (UNIV_UNLIKELY(end_seg_len >= UNIV_PAGE_SIZE << 1)) {
+	if (end_seg_len >= UNIV_PAGE_SIZE << 1) {
 		recv_sys->found_corrupt_log = TRUE;
 
 		return(NULL);
@@ -831,7 +1216,7 @@ page_cur_parse_insert_rec(
 		info_and_status_bits = mach_read_from_1(ptr);
 		ptr++;
 
-		ptr = mach_parse_compressed(ptr, end_ptr, &origin_offset);
+		origin_offset = mach_parse_compressed(&ptr, end_ptr);
 
 		if (ptr == NULL) {
 
@@ -840,7 +1225,7 @@ page_cur_parse_insert_rec(
 
 		ut_a(origin_offset < UNIV_PAGE_SIZE);
 
-		ptr = mach_parse_compressed(ptr, end_ptr, &mismatch_index);
+		mismatch_index = mach_parse_compressed(&ptr, end_ptr);
 
 		if (ptr == NULL) {
 
@@ -850,14 +1235,14 @@ page_cur_parse_insert_rec(
 		ut_a(mismatch_index < UNIV_PAGE_SIZE);
 	}
 
-	if (UNIV_UNLIKELY(end_ptr < ptr + (end_seg_len >> 1))) {
+	if (end_ptr < ptr + (end_seg_len >> 1)) {
 
 		return(NULL);
 	}
 
 	if (!block) {
 
-		return(ptr + (end_seg_len >> 1));
+		return(const_cast<byte*>(ptr + (end_seg_len >> 1)));
 	}
 
 	ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
@@ -882,30 +1267,19 @@ page_cur_parse_insert_rec(
 		buf = buf1;
 	} else {
 		buf = static_cast<byte*>(
-			mem_alloc(mismatch_index + end_seg_len));
+			ut_malloc_nokey(mismatch_index + end_seg_len));
 	}
 
 	/* Build the inserted record to buf */
 
         if (UNIV_UNLIKELY(mismatch_index >= UNIV_PAGE_SIZE)) {
-		fprintf(stderr,
-			"Is short %lu, info_and_status_bits %lu, offset %lu, "
-			"o_offset %lu\n"
-			"mismatch index %lu, end_seg_len %lu\n"
-			"parsed len %lu\n",
-			(ulong) is_short, (ulong) info_and_status_bits,
-			(ulong) page_offset(cursor_rec),
-			(ulong) origin_offset,
-			(ulong) mismatch_index, (ulong) end_seg_len,
-			(ulong) (ptr - ptr2));
 
-		fputs("Dump of 300 bytes of log:\n", stderr);
-		ut_print_buf(stderr, ptr2, 300);
-		putc('\n', stderr);
-
-		buf_page_print(page, 0, 0);
-
-		ut_error;
+		ib::fatal() << "is_short " << is_short << ", "
+			<< "info_and_status_bits " << info_and_status_bits
+			<< ", offset " << page_offset(cursor_rec) << ","
+			" o_offset " << origin_offset << ", mismatch index "
+			<< mismatch_index << ", end_seg_len " << end_seg_len
+			<< " parsed len " << (ptr - ptr2);
 	}
 
 	ut_memcpy(buf, rec_get_start(cursor_rec, offsets), mismatch_index);
@@ -913,10 +1287,10 @@ page_cur_parse_insert_rec(
 
 	if (page_is_comp(page)) {
 		rec_set_info_and_status_bits(buf + origin_offset,
-				     info_and_status_bits);
+					     info_and_status_bits);
 	} else {
 		rec_set_info_bits_old(buf + origin_offset,
-							info_and_status_bits);
+				      info_and_status_bits);
 	}
 
 	page_cur_position(cursor_rec, block, &cursor);
@@ -933,22 +1307,21 @@ page_cur_parse_insert_rec(
 
 	if (buf != buf1) {
 
-		mem_free(buf);
+		ut_free(buf);
 	}
 
 	if (UNIV_LIKELY_NULL(heap)) {
 		mem_heap_free(heap);
 	}
 
-	return(ptr + end_seg_len);
+	return(const_cast<byte*>(ptr + end_seg_len));
 }
 
 /***********************************************************//**
 Inserts a record next to page cursor on an uncompressed page.
 Returns pointer to inserted record if succeed, i.e., enough
 space available, NULL otherwise. The cursor stays at the same position.
-@return	pointer to record if succeed, NULL otherwise */
-UNIV_INTERN
+@return pointer to record if succeed, NULL otherwise */
 rec_t*
 page_cur_insert_rec_low(
 /*====================*/
@@ -975,10 +1348,10 @@ page_cur_insert_rec_low(
 	page = page_align(current_rec);
 	ut_ad(dict_table_is_comp(index->table)
 	      == (ibool) !!page_is_comp(page));
-	ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
-	ut_ad(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID)
-	      == index->id || recv_recovery_is_on()
-	      || (mtr ? mtr->inside_ibuf : dict_index_is_ibuf(index)));
+	ut_ad(fil_page_index_page_check(page));
+	ut_ad(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID) == index->id
+	      || recv_recovery_is_on()
+	      || (mtr ? mtr->is_inside_ibuf() : dict_index_is_ibuf(index)));
 
 	ut_ad(!page_rec_is_supremum(current_rec));
 
@@ -1115,7 +1488,214 @@ use_heap:
 	      || rec_get_node_ptr_flag(last_insert)
 	      == rec_get_node_ptr_flag(insert_rec));
 
-	if (UNIV_UNLIKELY(last_insert == NULL)) {
+	if (!dict_index_is_spatial(index)) {
+		if (UNIV_UNLIKELY(last_insert == NULL)) {
+			page_header_set_field(page, NULL, PAGE_DIRECTION,
+					      PAGE_NO_DIRECTION);
+			page_header_set_field(page, NULL, PAGE_N_DIRECTION, 0);
+
+		} else if ((last_insert == current_rec)
+			   && (page_header_get_field(page, PAGE_DIRECTION)
+			       != PAGE_LEFT)) {
+
+			page_header_set_field(page, NULL, PAGE_DIRECTION,
+					      PAGE_RIGHT);
+			page_header_set_field(page, NULL, PAGE_N_DIRECTION,
+					      page_header_get_field(
+						page, PAGE_N_DIRECTION) + 1);
+
+		} else if ((page_rec_get_next(insert_rec) == last_insert)
+			   && (page_header_get_field(page, PAGE_DIRECTION)
+			       != PAGE_RIGHT)) {
+
+			page_header_set_field(page, NULL, PAGE_DIRECTION,
+					      PAGE_LEFT);
+			page_header_set_field(page, NULL, PAGE_N_DIRECTION,
+					      page_header_get_field(
+						page, PAGE_N_DIRECTION) + 1);
+		} else {
+			page_header_set_field(page, NULL, PAGE_DIRECTION,
+					      PAGE_NO_DIRECTION);
+			page_header_set_field(page, NULL, PAGE_N_DIRECTION, 0);
+		}
+	}
+
+	page_header_set_ptr(page, NULL, PAGE_LAST_INSERT, insert_rec);
+
+	/* 7. It remains to update the owner record. */
+	{
+		rec_t*	owner_rec	= page_rec_find_owner_rec(insert_rec);
+		ulint	n_owned;
+		if (page_is_comp(page)) {
+			n_owned = rec_get_n_owned_new(owner_rec);
+			rec_set_n_owned_new(owner_rec, NULL, n_owned + 1);
+		} else {
+			n_owned = rec_get_n_owned_old(owner_rec);
+			rec_set_n_owned_old(owner_rec, n_owned + 1);
+		}
+
+		/* 8. Now we have incremented the n_owned field of the owner
+		record. If the number exceeds PAGE_DIR_SLOT_MAX_N_OWNED,
+		we have to split the corresponding directory slot in two. */
+
+		if (UNIV_UNLIKELY(n_owned == PAGE_DIR_SLOT_MAX_N_OWNED)) {
+			page_dir_split_slot(
+				page, NULL,
+				page_dir_find_owner_slot(owner_rec));
+		}
+	}
+
+	/* 9. Write log record of the insert */
+	if (UNIV_LIKELY(mtr != NULL)) {
+		page_cur_insert_rec_write_log(insert_rec, rec_size,
+					      current_rec, index, mtr);
+	}
+
+	return(insert_rec);
+}
+
+/** Inserts a record next to page cursor on an uncompressed page.
+@param[in]	current_rec	pointer to current record after which
+				the new record is inserted.
+@param[in]	index		record descriptor
+@param[in]	tuple		pointer to a data tuple
+@param[in]	n_ext		number of externally stored columns
+@param[in]	mtr		mini-transaction handle, or NULL
+
+@return pointer to record if succeed, NULL otherwise */
+rec_t*
+page_cur_direct_insert_rec_low(
+	rec_t*		current_rec,
+	dict_index_t*	index,
+	const dtuple_t*	tuple,
+	ulint		n_ext,
+	mtr_t*		mtr)
+{
+	byte*		insert_buf;
+	ulint		rec_size;
+	page_t*		page;		/*!< the relevant page */
+	rec_t*		last_insert;	/*!< cursor position at previous
+					insert */
+	rec_t*		free_rec;	/*!< a free record that was reused,
+					or NULL */
+	rec_t*		insert_rec;	/*!< inserted record */
+	ulint		heap_no;	/*!< heap number of the inserted
+					record */
+
+	page = page_align(current_rec);
+
+	ut_ad(dict_table_is_comp(index->table)
+	      == (ibool) !!page_is_comp(page));
+
+	ut_ad(fil_page_index_page_check(page));
+
+	ut_ad(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID)
+	      == index->id);
+
+	ut_ad(!page_rec_is_supremum(current_rec));
+
+	/* 1. Get the size of the physical record in the page */
+	rec_size = index->rec_cache.rec_size;
+
+	/* 2. Try to find suitable space from page memory management */
+	free_rec = page_header_get_ptr(page, PAGE_FREE);
+	if (free_rec) {
+		/* Try to allocate from the head of the free list. */
+		ulint		foffsets_[REC_OFFS_NORMAL_SIZE];
+		ulint*		foffsets	= foffsets_;
+		mem_heap_t*	heap		= NULL;
+
+		rec_offs_init(foffsets_);
+
+		foffsets = rec_get_offsets(
+			free_rec, index, foffsets, ULINT_UNDEFINED, &heap);
+		if (rec_offs_size(foffsets) < rec_size) {
+			if (heap != NULL) {
+				mem_heap_free(heap);
+				heap = NULL;
+			}
+
+			free_rec = NULL;
+			insert_buf = page_mem_alloc_heap(
+				page, NULL, rec_size, &heap_no);
+
+			if (insert_buf == NULL) {
+				return(NULL);
+			}
+		} else {
+			insert_buf = free_rec - rec_offs_extra_size(foffsets);
+
+			if (page_is_comp(page)) {
+				heap_no = rec_get_heap_no_new(free_rec);
+				page_mem_alloc_free(
+					page, NULL,
+					rec_get_next_ptr(free_rec, TRUE),
+					rec_size);
+			} else {
+				heap_no = rec_get_heap_no_old(free_rec);
+				page_mem_alloc_free(
+					page, NULL,
+					rec_get_next_ptr(free_rec, FALSE),
+					rec_size);
+			}
+
+			if (heap != NULL) {
+				mem_heap_free(heap);
+				heap = NULL;
+			}
+		}
+	} else {
+		free_rec = NULL;
+		insert_buf = page_mem_alloc_heap(page, NULL,
+						 rec_size, &heap_no);
+
+		if (insert_buf == NULL) {
+			return(NULL);
+		}
+	}
+
+	/* 3. Create the record */
+	insert_rec = rec_convert_dtuple_to_rec(insert_buf, index, tuple, n_ext);
+
+	/* 4. Insert the record in the linked list of records */
+	ut_ad(current_rec != insert_rec);
+
+	{
+		/* next record after current before the insertion */
+		rec_t*	next_rec = page_rec_get_next(current_rec);
+#ifdef UNIV_DEBUG
+		if (page_is_comp(page)) {
+			ut_ad(rec_get_status(current_rec)
+				<= REC_STATUS_INFIMUM);
+			ut_ad(rec_get_status(insert_rec) < REC_STATUS_INFIMUM);
+			ut_ad(rec_get_status(next_rec) != REC_STATUS_INFIMUM);
+		}
+#endif
+		page_rec_set_next(insert_rec, next_rec);
+		page_rec_set_next(current_rec, insert_rec);
+	}
+
+	page_header_set_field(page, NULL, PAGE_N_RECS,
+			      1 + page_get_n_recs(page));
+
+	/* 5. Set the n_owned field in the inserted record to zero,
+	and set the heap_no field */
+	if (page_is_comp(page)) {
+		rec_set_n_owned_new(insert_rec, NULL, 0);
+		rec_set_heap_no_new(insert_rec, heap_no);
+	} else {
+		rec_set_n_owned_old(insert_rec, 0);
+		rec_set_heap_no_old(insert_rec, heap_no);
+	}
+
+	/* 6. Update the last insertion info in page header */
+
+	last_insert = page_header_get_ptr(page, PAGE_LAST_INSERT);
+	ut_ad(!last_insert || !page_is_comp(page)
+	      || rec_get_node_ptr_flag(last_insert)
+	      == rec_get_node_ptr_flag(insert_rec));
+
+	if (last_insert == NULL) {
 		page_header_set_field(page, NULL, PAGE_DIRECTION,
 				      PAGE_NO_DIRECTION);
 		page_header_set_field(page, NULL, PAGE_N_DIRECTION, 0);
@@ -1163,21 +1743,22 @@ use_heap:
 		record. If the number exceeds PAGE_DIR_SLOT_MAX_N_OWNED,
 		we have to split the corresponding directory slot in two. */
 
-		if (UNIV_UNLIKELY(n_owned == PAGE_DIR_SLOT_MAX_N_OWNED)) {
+		if (n_owned == PAGE_DIR_SLOT_MAX_N_OWNED) {
 			page_dir_split_slot(
 				page, NULL,
 				page_dir_find_owner_slot(owner_rec));
 		}
 	}
 
-	/* 9. Write log record of the insert */
-	if (UNIV_LIKELY(mtr != NULL)) {
-		page_cur_insert_rec_write_log(insert_rec, rec_size,
-					      current_rec, index, mtr);
+	/* 8. Open the mtr for name sake to set the modification flag
+	to true failing which no flush would be done. */
+	byte*	log_ptr = mlog_open(mtr, 0);
+	ut_ad(log_ptr == NULL);
+	if (log_ptr != NULL) {
+		/* To keep complier happy. */
+		mlog_close(mtr, log_ptr);
 	}
 
-	btr_blob_dbg_add_rec(insert_rec, index, offsets, "insert");
-
 	return(insert_rec);
 }
 
@@ -1192,8 +1773,7 @@ if this is a compressed leaf page in a secondary index.
 This has to be done either within the same mini-transaction,
 or by invoking ibuf_reset_free_bits() before mtr_commit().
 
-@return	pointer to record if succeed, NULL otherwise */
-UNIV_INTERN
+@return pointer to record if succeed, NULL otherwise */
 rec_t*
 page_cur_insert_rec_zip(
 /*====================*/
@@ -1223,10 +1803,10 @@ page_cur_insert_rec_zip(
 	page = page_cur_get_page(cursor);
 	ut_ad(dict_table_is_comp(index->table));
 	ut_ad(page_is_comp(page));
-	ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
-	ut_ad(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID)
-	      == index->id || recv_recovery_is_on()
-	      || (mtr ? mtr->inside_ibuf : dict_index_is_ibuf(index)));
+	ut_ad(fil_page_index_page_check(page));
+	ut_ad(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID) == index->id
+	      || (mtr ? mtr->is_inside_ibuf() : dict_index_is_ibuf(index))
+	      || recv_recovery_is_on());
 
 	ut_ad(!page_cur_is_after_last(cursor));
 #ifdef UNIV_ZIP_DEBUG
@@ -1284,7 +1864,7 @@ page_cur_insert_rec_zip(
 			get rid of the modification log. */
 			page_create_zip(page_cur_get_block(cursor), index,
 					page_header_get_field(page, PAGE_LEVEL),
-					0, mtr);
+					0, NULL, mtr);
 			ut_ad(!page_header_get_ptr(page, PAGE_FREE));
 
 			if (page_zip_available(
@@ -1357,7 +1937,7 @@ page_cur_insert_rec_zip(
 			if (!log_compressed) {
 				if (page_zip_compress(
 					    page_zip, page, index,
-					    level, NULL)) {
+					    level, NULL, NULL)) {
 					page_cur_insert_rec_write_log(
 						insert_rec, rec_size,
 						cursor->rec, index, mtr);
@@ -1423,12 +2003,10 @@ page_cur_insert_rec_zip(
 			}
 
 			/* Out of space: restore the page */
-			btr_blob_dbg_remove(page, index, "insert_zip_fail");
 			if (!page_zip_decompress(page_zip, page, FALSE)) {
 				ut_error; /* Memory corrupted? */
 			}
 			ut_ad(page_validate(page, index));
-			btr_blob_dbg_add(page, index, "insert_zip_fail");
 			insert_rec = NULL;
 		}
 
@@ -1589,34 +2167,38 @@ use_heap:
 	      || rec_get_node_ptr_flag(last_insert)
 	      == rec_get_node_ptr_flag(insert_rec));
 
-	if (UNIV_UNLIKELY(last_insert == NULL)) {
-		page_header_set_field(page, page_zip, PAGE_DIRECTION,
-							PAGE_NO_DIRECTION);
-		page_header_set_field(page, page_zip, PAGE_N_DIRECTION, 0);
+	if (!dict_index_is_spatial(index)) {
+		if (UNIV_UNLIKELY(last_insert == NULL)) {
+			page_header_set_field(page, page_zip, PAGE_DIRECTION,
+					      PAGE_NO_DIRECTION);
+			page_header_set_field(page, page_zip,
+					      PAGE_N_DIRECTION, 0);
 
-	} else if ((last_insert == cursor->rec)
-		   && (page_header_get_field(page, PAGE_DIRECTION)
-		       != PAGE_LEFT)) {
+		} else if ((last_insert == cursor->rec)
+			   && (page_header_get_field(page, PAGE_DIRECTION)
+			       != PAGE_LEFT)) {
 
-		page_header_set_field(page, page_zip, PAGE_DIRECTION,
-							PAGE_RIGHT);
-		page_header_set_field(page, page_zip, PAGE_N_DIRECTION,
-				      page_header_get_field(
-					      page, PAGE_N_DIRECTION) + 1);
+			page_header_set_field(page, page_zip, PAGE_DIRECTION,
+					      PAGE_RIGHT);
+			page_header_set_field(page, page_zip, PAGE_N_DIRECTION,
+					      page_header_get_field(
+						page, PAGE_N_DIRECTION) + 1);
 
-	} else if ((page_rec_get_next(insert_rec) == last_insert)
-		   && (page_header_get_field(page, PAGE_DIRECTION)
-		       != PAGE_RIGHT)) {
+		} else if ((page_rec_get_next(insert_rec) == last_insert)
+			   && (page_header_get_field(page, PAGE_DIRECTION)
+			       != PAGE_RIGHT)) {
 
-		page_header_set_field(page, page_zip, PAGE_DIRECTION,
-							PAGE_LEFT);
-		page_header_set_field(page, page_zip, PAGE_N_DIRECTION,
-				      page_header_get_field(
-					      page, PAGE_N_DIRECTION) + 1);
-	} else {
-		page_header_set_field(page, page_zip, PAGE_DIRECTION,
-							PAGE_NO_DIRECTION);
-		page_header_set_field(page, page_zip, PAGE_N_DIRECTION, 0);
+			page_header_set_field(page, page_zip, PAGE_DIRECTION,
+					      PAGE_LEFT);
+			page_header_set_field(page, page_zip, PAGE_N_DIRECTION,
+					      page_header_get_field(
+						page, PAGE_N_DIRECTION) + 1);
+		} else {
+			page_header_set_field(page, page_zip, PAGE_DIRECTION,
+					      PAGE_NO_DIRECTION);
+			page_header_set_field(page, page_zip,
+					      PAGE_N_DIRECTION, 0);
+		}
 	}
 
 	page_header_set_ptr(page, page_zip, PAGE_LAST_INSERT, insert_rec);
@@ -1642,8 +2224,6 @@ use_heap:
 
 	page_zip_write_rec(page_zip, insert_rec, index, offsets, 1);
 
-	btr_blob_dbg_add_rec(insert_rec, index, offsets, "insert_zip_ok");
-
 	/* 9. Write log record of the insert */
 	if (UNIV_LIKELY(mtr != NULL)) {
 		page_cur_insert_rec_write_log(insert_rec, rec_size,
@@ -1669,6 +2249,7 @@ page_copy_rec_list_to_created_page_write_log(
 	byte*	log_ptr;
 
 	ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
+	ut_ad(mtr->is_named_space(index->space));
 
 	log_ptr = mlog_open_and_write_index(mtr, page, index,
 					    page_is_comp(page)
@@ -1684,8 +2265,7 @@ page_copy_rec_list_to_created_page_write_log(
 
 /**********************************************************//**
 Parses a log record of copying a record list end to a new created page.
-@return	end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
 byte*
 page_parse_copy_rec_list_to_created_page(
 /*=====================================*/
@@ -1731,9 +2311,12 @@ page_parse_copy_rec_list_to_created_page(
 	page_zip = buf_block_get_page_zip(block);
 
 	page_header_set_ptr(page, page_zip, PAGE_LAST_INSERT, NULL);
-	page_header_set_field(page, page_zip, PAGE_DIRECTION,
-							PAGE_NO_DIRECTION);
-	page_header_set_field(page, page_zip, PAGE_N_DIRECTION, 0);
+
+	if (!dict_index_is_spatial(index)) {
+		page_header_set_field(page, page_zip, PAGE_DIRECTION,
+				      PAGE_NO_DIRECTION);
+		page_header_set_field(page, page_zip, PAGE_N_DIRECTION, 0);
+	}
 
 	return(rec_end);
 }
@@ -1747,7 +2330,6 @@ IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
 if this is a compressed leaf page in a secondary index.
 This has to be done either within the same mini-transaction,
 or by invoking ibuf_reset_free_bits() before mtr_commit(). */
-UNIV_INTERN
 void
 page_copy_rec_list_end_to_created_page(
 /*===================================*/
@@ -1764,7 +2346,6 @@ page_copy_rec_list_end_to_created_page(
 	ulint	n_recs;
 	ulint	slot_index;
 	ulint	rec_size;
-	ulint	log_mode;
 	byte*	log_ptr;
 	ulint	log_data_len;
 	mem_heap_t*	heap		= NULL;
@@ -1797,11 +2378,18 @@ page_copy_rec_list_end_to_created_page(
 	log_ptr = page_copy_rec_list_to_created_page_write_log(new_page,
 							       index, mtr);
 
-	log_data_len = dyn_array_get_data_size(&(mtr->log));
+	log_data_len = mtr->get_log()->size();
 
 	/* Individual inserts are logged in a shorter form */
 
-	log_mode = mtr_set_log_mode(mtr, MTR_LOG_SHORT_INSERTS);
+	mtr_log_t	log_mode;
+
+	if (dict_table_is_temporary(index->table)
+	    || index->table->ibd_file_missing /* IMPORT TABLESPACE */) {
+		log_mode = mtr_get_log_mode(mtr);
+	} else {
+		log_mode = mtr_set_log_mode(mtr, MTR_LOG_SHORT_INSERTS);
+	}
 
 	prev_rec = page_get_infimum_rec(new_page);
 	if (page_is_comp(new_page)) {
@@ -1857,8 +2445,6 @@ page_copy_rec_list_end_to_created_page(
 		heap_top += rec_size;
 
 		rec_offs_make_valid(insert_rec, index, offsets);
-		btr_blob_dbg_add_rec(insert_rec, index, offsets, "copy_end");
-
 		page_cur_insert_rec_write_log(insert_rec, rec_size, prev_rec,
 					      index, mtr);
 		prev_rec = insert_rec;
@@ -1886,11 +2472,11 @@ page_copy_rec_list_end_to_created_page(
 		mem_heap_free(heap);
 	}
 
-	log_data_len = dyn_array_get_data_size(&(mtr->log)) - log_data_len;
+	log_data_len = mtr->get_log()->size() - log_data_len;
 
 	ut_a(log_data_len < 100 * UNIV_PAGE_SIZE);
 
-	if (UNIV_LIKELY(log_ptr != NULL)) {
+	if (log_ptr != NULL) {
 		mach_write_to_4(log_ptr, log_data_len);
 	}
 
@@ -1911,8 +2497,9 @@ page_copy_rec_list_end_to_created_page(
 	page_header_set_field(new_page, NULL, PAGE_N_RECS, n_recs);
 
 	page_header_set_ptr(new_page, NULL, PAGE_LAST_INSERT, NULL);
+
 	page_header_set_field(new_page, NULL, PAGE_DIRECTION,
-							PAGE_NO_DIRECTION);
+			      PAGE_NO_DIRECTION);
 	page_header_set_field(new_page, NULL, PAGE_N_DIRECTION, 0);
 
 	/* Restore the log mode */
@@ -1933,6 +2520,7 @@ page_cur_delete_rec_write_log(
 	byte*	log_ptr;
 
 	ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
+	ut_ad(mtr->is_named_space(index->space));
 
 	log_ptr = mlog_open_and_write_index(mtr, rec, index,
 					    page_rec_is_comp(rec)
@@ -1956,8 +2544,7 @@ page_cur_delete_rec_write_log(
 
 /***********************************************************//**
 Parses log record of a record delete on a page.
-@return	pointer to record end or NULL */
-UNIV_INTERN
+@return pointer to record end or NULL */
 byte*
 page_cur_parse_delete_rec(
 /*======================*/
@@ -2006,7 +2593,6 @@ page_cur_parse_delete_rec(
 /***********************************************************//**
 Deletes a record at the page cursor. The cursor is moved to the next
 record after the deleted one. */
-UNIV_INTERN
 void
 page_cur_delete_rec(
 /*================*/
@@ -2042,10 +2628,11 @@ page_cur_delete_rec(
 	current_rec = cursor->rec;
 	ut_ad(rec_offs_validate(current_rec, index, offsets));
 	ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
-	ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
-	ut_ad(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID)
-	      == index->id || recv_recovery_is_on()
-	      || (mtr ? mtr->inside_ibuf : dict_index_is_ibuf(index)));
+	ut_ad(fil_page_index_page_check(page));
+	ut_ad(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID) == index->id
+	      || (mtr ? mtr->is_inside_ibuf() : dict_index_is_ibuf(index))
+	      || recv_recovery_is_on());
+	ut_ad(mtr == NULL || mtr->is_named_space(index->space));
 
 	/* The record must not be the supremum or infimum record. */
 	ut_ad(page_rec_is_user_rec(current_rec));
@@ -2104,7 +2691,7 @@ page_cur_delete_rec(
 	/* rec now points to the record of the previous directory slot. Look
 	for the immediate predecessor of current_rec in a loop. */
 
-	while(current_rec != rec) {
+	while (current_rec != rec) {
 		prev_rec = rec;
 		rec = page_rec_get_next(rec);
 	}
@@ -2135,8 +2722,6 @@ page_cur_delete_rec(
 	page_dir_slot_set_n_owned(cur_dir_slot, page_zip, cur_n_owned - 1);
 
 	/* 6. Free the memory occupied by the record */
-	btr_blob_dbg_remove_rec(current_rec, const_cast<dict_index_t*>(index),
-				offsets, "delete");
 	page_mem_free(page, page_zip, current_rec, index, offsets);
 
 	/* 7. Now we have decremented the number of owned records of the slot.
diff --git a/storage/innobase/page/page0page.cc b/storage/innobase/page/page0page.cc
index a09f270a54f..89669d09e89 100644
--- a/storage/innobase/page/page0page.cc
+++ b/storage/innobase/page/page0page.cc
@@ -24,17 +24,16 @@ Index page routines
 Created 2/2/1994 Heikki Tuuri
 *******************************************************/
 
-#define THIS_MODULE
 #include "page0page.h"
 #ifdef UNIV_NONINL
 #include "page0page.ic"
 #endif
-#undef THIS_MODULE
 
 #include "page0cur.h"
 #include "page0zip.h"
 #include "buf0buf.h"
 #include "btr0btr.h"
+#include "row0trunc.h"
 #ifndef UNIV_HOTBACKUP
 # include "srv0srv.h"
 # include "lock0lock.h"
@@ -86,8 +85,7 @@ is 50 x 4 bytes = 200 bytes. */
 
 /***************************************************************//**
 Looks for the directory slot which owns the given record.
-@return	the directory slot number */
-UNIV_INTERN
+@return the directory slot number */
 ulint
 page_dir_find_owner_slot(
 /*=====================*/
@@ -124,11 +122,9 @@ page_dir_find_owner_slot(
 	while (UNIV_LIKELY(*(uint16*) slot != rec_offs_bytes)) {
 
 		if (UNIV_UNLIKELY(slot == first_slot)) {
-			fprintf(stderr,
-				"InnoDB: Probable data corruption on"
-				" page %lu\n"
-				"InnoDB: Original record ",
-				(ulong) page_get_page_no(page));
+			ib::error() << "Probable data corruption on page "
+				<< page_get_page_no(page)
+				<< ". Original record on that page;";
 
 			if (page_is_comp(page)) {
 				fputs("(compact record)", stderr);
@@ -136,20 +132,15 @@ page_dir_find_owner_slot(
 				rec_print_old(stderr, rec);
 			}
 
-			fputs("\n"
-			      "InnoDB: on that page.\n"
-			      "InnoDB: Cannot find the dir slot for record ",
-			      stderr);
+			ib::error() << "Cannot find the dir slot for this"
+				" record on that page;";
+
 			if (page_is_comp(page)) {
 				fputs("(compact record)", stderr);
 			} else {
 				rec_print_old(stderr, page
 					      + mach_decode_2(rec_offs_bytes));
 			}
-			fputs("\n"
-			      "InnoDB: on that page!\n", stderr);
-
-			buf_page_print(page, 0, 0);
 
 			ut_error;
 		}
@@ -162,7 +153,7 @@ page_dir_find_owner_slot(
 
 /**************************************************************//**
 Used to check the consistency of a directory slot.
-@return	TRUE if succeed */
+@return TRUE if succeed */
 static
 ibool
 page_dir_slot_check(
@@ -205,7 +196,6 @@ page_dir_slot_check(
 
 /*************************************************************//**
 Sets the max trx id field value. */
-UNIV_INTERN
 void
 page_set_max_trx_id(
 /*================*/
@@ -240,8 +230,7 @@ page_set_max_trx_id(
 
 /************************************************************//**
 Allocates a block of memory from the heap of an index page.
-@return	pointer to start of allocated buffer, or NULL if allocation fails */
-UNIV_INTERN
+@return pointer to start of allocated buffer, or NULL if allocation fails */
 byte*
 page_mem_alloc_heap(
 /*================*/
@@ -286,63 +275,70 @@ page_create_write_log(
 	buf_frame_t*	frame,	/*!< in: a buffer frame where the page is
 				created */
 	mtr_t*		mtr,	/*!< in: mini-transaction handle */
-	ibool		comp)	/*!< in: TRUE=compact page format */
+	ibool		comp,	/*!< in: TRUE=compact page format */
+	bool		is_rtree) /*!< in: whether it is R-tree */
 {
-	mlog_write_initial_log_record(frame, comp
-				      ? MLOG_COMP_PAGE_CREATE
-				      : MLOG_PAGE_CREATE, mtr);
-}
-#else /* !UNIV_HOTBACKUP */
-# define page_create_write_log(frame,mtr,comp) ((void) 0)
-#endif /* !UNIV_HOTBACKUP */
+	mlog_id_t	type;
 
-/***********************************************************//**
-Parses a redo log record of creating a page.
-@return	end of log record or NULL */
-UNIV_INTERN
-byte*
-page_parse_create(
-/*==============*/
-	byte*		ptr,	/*!< in: buffer */
-	byte*		end_ptr MY_ATTRIBUTE((unused)), /*!< in: buffer end */
-	ulint		comp,	/*!< in: nonzero=compact page format */
-	buf_block_t*	block,	/*!< in: block or NULL */
-	mtr_t*		mtr)	/*!< in: mtr or NULL */
-{
-	ut_ad(ptr && end_ptr);
-
-	/* The record is empty, except for the record initial part */
-
-	if (block) {
-		page_create(block, mtr, comp);
+	if (is_rtree) {
+		type = comp ? MLOG_COMP_PAGE_CREATE_RTREE
+			    : MLOG_PAGE_CREATE_RTREE;
+	} else {
+		type = comp ? MLOG_COMP_PAGE_CREATE : MLOG_PAGE_CREATE;
 	}
 
-	return(ptr);
+	mlog_write_initial_log_record(frame, type, mtr);
 }
+#else /* !UNIV_HOTBACKUP */
+# define page_create_write_log(frame,mtr,comp,is_rtree) ((void) 0)
+#endif /* !UNIV_HOTBACKUP */
+
+/** The page infimum and supremum of an empty page in ROW_FORMAT=REDUNDANT */
+static const byte infimum_supremum_redundant[] = {
+	/* the infimum record */
+	0x08/*end offset*/,
+	0x01/*n_owned*/,
+	0x00, 0x00/*heap_no=0*/,
+	0x03/*n_fields=1, 1-byte offsets*/,
+	0x00, 0x74/* pointer to supremum */,
+	'i', 'n', 'f', 'i', 'm', 'u', 'm', 0,
+	/* the supremum record */
+	0x09/*end offset*/,
+	0x01/*n_owned*/,
+	0x00, 0x08/*heap_no=1*/,
+	0x03/*n_fields=1, 1-byte offsets*/,
+	0x00, 0x00/* end of record list */,
+	's', 'u', 'p', 'r', 'e', 'm', 'u', 'm', 0
+};
+
+/** The page infimum and supremum of an empty page in ROW_FORMAT=COMPACT */
+static const byte infimum_supremum_compact[] = {
+	/* the infimum record */
+	0x01/*n_owned=1*/,
+	0x00, 0x02/* heap_no=0, REC_STATUS_INFIMUM */,
+	0x00, 0x0d/* pointer to supremum */,
+	'i', 'n', 'f', 'i', 'm', 'u', 'm', 0,
+	/* the supremum record */
+	0x01/*n_owned=1*/,
+	0x00, 0x0b/* heap_no=1, REC_STATUS_SUPREMUM */,
+	0x00, 0x00/* end of record list */,
+	's', 'u', 'p', 'r', 'e', 'm', 'u', 'm'
+};
 
 /**********************************************************//**
 The index page creation function.
-@return	pointer to the page */
+@return pointer to the page */
 static
 page_t*
 page_create_low(
 /*============*/
 	buf_block_t*	block,		/*!< in: a buffer block where the
 					page is created */
-	ulint		comp)		/*!< in: nonzero=compact page format */
+	ulint		comp,		/*!< in: nonzero=compact page format */
+	bool		is_rtree)	/*!< in: if it is an R-Tree page */
 {
-	page_dir_slot_t* slot;
-	mem_heap_t*	heap;
-	dtuple_t*	tuple;
-	dfield_t*	field;
-	byte*		heap_top;
-	rec_t*		infimum_rec;
-	rec_t*		supremum_rec;
 	page_t*		page;
-	dict_index_t*	index;
-	ulint*		offsets;
 
-	ut_ad(block);
 #if PAGE_BTR_IBUF_FREE_LIST + FLST_BASE_NODE_SIZE > PAGE_DATA
 # error "PAGE_BTR_IBUF_FREE_LIST + FLST_BASE_NODE_SIZE > PAGE_DATA"
 #endif
@@ -350,175 +346,132 @@ page_create_low(
 # error "PAGE_BTR_IBUF_FREE_LIST_NODE + FLST_NODE_SIZE > PAGE_DATA"
 #endif
 
-	/* The infimum and supremum records use a dummy index. */
-	if (UNIV_LIKELY(comp)) {
-		index = dict_ind_compact;
-	} else {
-		index = dict_ind_redundant;
-	}
-
-	/* 1. INCREMENT MODIFY CLOCK */
 	buf_block_modify_clock_inc(block);
 
 	page = buf_block_get_frame(block);
 
-	fil_page_set_type(page, FIL_PAGE_INDEX);
-
-	heap = mem_heap_create(200);
-
-	/* 3. CREATE THE INFIMUM AND SUPREMUM RECORDS */
-
-	/* Create first a data tuple for infimum record */
-	tuple = dtuple_create(heap, 1);
-	dtuple_set_info_bits(tuple, REC_STATUS_INFIMUM);
-	field = dtuple_get_nth_field(tuple, 0);
-
-	dfield_set_data(field, "infimum", 8);
-	dtype_set(dfield_get_type(field),
-		  DATA_VARCHAR, DATA_ENGLISH | DATA_NOT_NULL, 8);
-	/* Set the corresponding physical record to its place in the page
-	record heap */
-
-	heap_top = page + PAGE_DATA;
-
-	infimum_rec = rec_convert_dtuple_to_rec(heap_top, index, tuple, 0);
-
-	if (UNIV_LIKELY(comp)) {
-		ut_a(infimum_rec == page + PAGE_NEW_INFIMUM);
-
-		rec_set_n_owned_new(infimum_rec, NULL, 1);
-		rec_set_heap_no_new(infimum_rec, 0);
+	if (is_rtree) {
+		fil_page_set_type(page, FIL_PAGE_RTREE);
 	} else {
-		ut_a(infimum_rec == page + PAGE_OLD_INFIMUM);
-
-		rec_set_n_owned_old(infimum_rec, 1);
-		rec_set_heap_no_old(infimum_rec, 0);
+		fil_page_set_type(page, FIL_PAGE_INDEX);
 	}
 
-	offsets = rec_get_offsets(infimum_rec, index, NULL,
-				  ULINT_UNDEFINED, &heap);
+	memset(page + PAGE_HEADER, 0, PAGE_HEADER_PRIV_END);
+	page[PAGE_HEADER + PAGE_N_DIR_SLOTS + 1] = 2;
+	page[PAGE_HEADER + PAGE_DIRECTION + 1] = PAGE_NO_DIRECTION;
 
-	heap_top = rec_get_end(infimum_rec, offsets);
-
-	/* Create then a tuple for supremum */
-
-	tuple = dtuple_create(heap, 1);
-	dtuple_set_info_bits(tuple, REC_STATUS_SUPREMUM);
-	field = dtuple_get_nth_field(tuple, 0);
-
-	dfield_set_data(field, "supremum", comp ? 8 : 9);
-	dtype_set(dfield_get_type(field),
-		  DATA_VARCHAR, DATA_ENGLISH | DATA_NOT_NULL, comp ? 8 : 9);
-
-	supremum_rec = rec_convert_dtuple_to_rec(heap_top, index, tuple, 0);
-
-	if (UNIV_LIKELY(comp)) {
-		ut_a(supremum_rec == page + PAGE_NEW_SUPREMUM);
-
-		rec_set_n_owned_new(supremum_rec, NULL, 1);
-		rec_set_heap_no_new(supremum_rec, 1);
+	if (comp) {
+		page[PAGE_HEADER + PAGE_N_HEAP] = 0x80;/*page_is_comp()*/
+		page[PAGE_HEADER + PAGE_N_HEAP + 1] = PAGE_HEAP_NO_USER_LOW;
+		page[PAGE_HEADER + PAGE_HEAP_TOP + 1] = PAGE_NEW_SUPREMUM_END;
+		memcpy(page + PAGE_DATA, infimum_supremum_compact,
+		       sizeof infimum_supremum_compact);
+		memset(page
+		       + PAGE_NEW_SUPREMUM_END, 0,
+		       UNIV_PAGE_SIZE - PAGE_DIR - PAGE_NEW_SUPREMUM_END);
+		page[UNIV_PAGE_SIZE - PAGE_DIR - PAGE_DIR_SLOT_SIZE * 2 + 1]
+			= PAGE_NEW_SUPREMUM;
+		page[UNIV_PAGE_SIZE - PAGE_DIR - PAGE_DIR_SLOT_SIZE + 1]
+			= PAGE_NEW_INFIMUM;
 	} else {
-		ut_a(supremum_rec == page + PAGE_OLD_SUPREMUM);
-
-		rec_set_n_owned_old(supremum_rec, 1);
-		rec_set_heap_no_old(supremum_rec, 1);
-	}
-
-	offsets = rec_get_offsets(supremum_rec, index, offsets,
-				  ULINT_UNDEFINED, &heap);
-	heap_top = rec_get_end(supremum_rec, offsets);
-
-	ut_ad(heap_top == page
-	      + (comp ? PAGE_NEW_SUPREMUM_END : PAGE_OLD_SUPREMUM_END));
-
-	mem_heap_free(heap);
-
-	/* 4. INITIALIZE THE PAGE */
-
-	page_header_set_field(page, NULL, PAGE_N_DIR_SLOTS, 2);
-	page_header_set_ptr(page, NULL, PAGE_HEAP_TOP, heap_top);
-	page_header_set_field(page, NULL, PAGE_N_HEAP, comp
-			      ? 0x8000 | PAGE_HEAP_NO_USER_LOW
-			      : PAGE_HEAP_NO_USER_LOW);
-	page_header_set_ptr(page, NULL, PAGE_FREE, NULL);
-	page_header_set_field(page, NULL, PAGE_GARBAGE, 0);
-	page_header_set_ptr(page, NULL, PAGE_LAST_INSERT, NULL);
-	page_header_set_field(page, NULL, PAGE_DIRECTION, PAGE_NO_DIRECTION);
-	page_header_set_field(page, NULL, PAGE_N_DIRECTION, 0);
-	page_header_set_field(page, NULL, PAGE_N_RECS, 0);
-	page_set_max_trx_id(block, NULL, 0, NULL);
-	memset(heap_top, 0, UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START
-	       - page_offset(heap_top));
-
-	/* 5. SET POINTERS IN RECORDS AND DIR SLOTS */
-
-	/* Set the slots to point to infimum and supremum. */
-
-	slot = page_dir_get_nth_slot(page, 0);
-	page_dir_slot_set_rec(slot, infimum_rec);
-
-	slot = page_dir_get_nth_slot(page, 1);
-	page_dir_slot_set_rec(slot, supremum_rec);
-
-	/* Set the next pointers in infimum and supremum */
-
-	if (UNIV_LIKELY(comp)) {
-		rec_set_next_offs_new(infimum_rec, PAGE_NEW_SUPREMUM);
-		rec_set_next_offs_new(supremum_rec, 0);
-	} else {
-		rec_set_next_offs_old(infimum_rec, PAGE_OLD_SUPREMUM);
-		rec_set_next_offs_old(supremum_rec, 0);
+		page[PAGE_HEADER + PAGE_N_HEAP + 1] = PAGE_HEAP_NO_USER_LOW;
+		page[PAGE_HEADER + PAGE_HEAP_TOP + 1] = PAGE_OLD_SUPREMUM_END;
+		memcpy(page + PAGE_DATA, infimum_supremum_redundant,
+		       sizeof infimum_supremum_redundant);
+		memset(page
+		       + PAGE_OLD_SUPREMUM_END, 0,
+		       UNIV_PAGE_SIZE - PAGE_DIR - PAGE_OLD_SUPREMUM_END);
+		page[UNIV_PAGE_SIZE - PAGE_DIR - PAGE_DIR_SLOT_SIZE * 2 + 1]
+			= PAGE_OLD_SUPREMUM;
+		page[UNIV_PAGE_SIZE - PAGE_DIR - PAGE_DIR_SLOT_SIZE + 1]
+			= PAGE_OLD_INFIMUM;
 	}
 
 	return(page);
 }
 
+/** Parses a redo log record of creating a page.
+@param[in,out]	block	buffer block, or NULL
+@param[in]	comp	nonzero=compact page format
+@param[in]	is_rtree whether it is rtree page */
+void
+page_parse_create(
+	buf_block_t*	block,
+	ulint		comp,
+	bool		is_rtree)
+{
+	if (block != NULL) {
+		page_create_low(block, comp, is_rtree);
+	}
+}
+
 /**********************************************************//**
-Create an uncompressed B-tree index page.
-@return	pointer to the page */
-UNIV_INTERN
+Create an uncompressed B-tree or R-tree index page.
+@return pointer to the page */
 page_t*
 page_create(
 /*========*/
 	buf_block_t*	block,		/*!< in: a buffer block where the
 					page is created */
 	mtr_t*		mtr,		/*!< in: mini-transaction handle */
-	ulint		comp)		/*!< in: nonzero=compact page format */
+	ulint		comp,		/*!< in: nonzero=compact page format */
+	bool		is_rtree)	/*!< in: whether it is a R-Tree page */
 {
-	page_create_write_log(buf_block_get_frame(block), mtr, comp);
-	return(page_create_low(block, comp));
+	ut_ad(mtr->is_named_space(block->page.id.space()));
+	page_create_write_log(buf_block_get_frame(block), mtr, comp, is_rtree);
+	return(page_create_low(block, comp, is_rtree));
 }
 
 /**********************************************************//**
 Create a compressed B-tree index page.
-@return	pointer to the page */
-UNIV_INTERN
+@return pointer to the page */
 page_t*
 page_create_zip(
 /*============*/
-	buf_block_t*	block,		/*!< in/out: a buffer frame where the
-					page is created */
-	dict_index_t*	index,		/*!< in: the index of the page */
-	ulint		level,		/*!< in: the B-tree level of the page */
-	trx_id_t	max_trx_id,	/*!< in: PAGE_MAX_TRX_ID */
-	mtr_t*		mtr)		/*!< in/out: mini-transaction */
+	buf_block_t*		block,		/*!< in/out: a buffer frame
+						where the page is created */
+	dict_index_t*		index,		/*!< in: the index of the
+						page, or NULL when applying
+						TRUNCATE log
+						record during recovery */
+	ulint			level,		/*!< in: the B-tree level
+						of the page */
+	trx_id_t		max_trx_id,	/*!< in: PAGE_MAX_TRX_ID */
+	const redo_page_compress_t* page_comp_info,
+						/*!< in: used for applying
+						TRUNCATE log
+						record during recovery */
+	mtr_t*			mtr)		/*!< in/out: mini-transaction
+						handle */
 {
-	page_t*		page;
-	page_zip_des_t*	page_zip	= buf_block_get_page_zip(block);
+	page_t*			page;
+	page_zip_des_t*		page_zip = buf_block_get_page_zip(block);
+	bool			is_spatial;
 
 	ut_ad(block);
 	ut_ad(page_zip);
-	ut_ad(index);
-	ut_ad(dict_table_is_comp(index->table));
+	ut_ad(index == NULL || dict_table_is_comp(index->table));
+	is_spatial = index ? dict_index_is_spatial(index)
+			   : page_comp_info->type & DICT_SPATIAL;
 
-	page = page_create_low(block, TRUE);
+	page = page_create_low(block, TRUE, is_spatial);
 	mach_write_to_2(PAGE_HEADER + PAGE_LEVEL + page, level);
 	mach_write_to_8(PAGE_HEADER + PAGE_MAX_TRX_ID + page, max_trx_id);
 
-	if (!page_zip_compress(page_zip, page, index,
-			       page_zip_level, mtr)) {
-		/* The compression of a newly created page
-		should always succeed. */
+	if (truncate_t::s_fix_up_active) {
+		/* Compress the index page created when applying
+		TRUNCATE log during recovery */
+		if (!page_zip_compress(page_zip, page, index, page_zip_level,
+				       page_comp_info, NULL)) {
+			/* The compression of a newly created
+			page should always succeed. */
+			ut_error;
+		}
+
+	} else if (!page_zip_compress(page_zip, page, index,
+				      page_zip_level, NULL, mtr)) {
+		/* The compression of a newly created
+		page should always succeed. */
 		ut_error;
 	}
 
@@ -527,7 +480,6 @@ page_create_zip(
 
 /**********************************************************//**
 Empty a previously created B-tree index page. */
-UNIV_INTERN
 void
 page_create_empty(
 /*==============*/
@@ -539,9 +491,15 @@ page_create_empty(
 	const page_t*	page	= buf_block_get_frame(block);
 	page_zip_des_t*	page_zip= buf_block_get_page_zip(block);
 
-	ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
+	ut_ad(fil_page_index_page_check(page));
 
-	if (dict_index_is_sec_or_ibuf(index) && page_is_leaf(page)) {
+	/* Multiple transactions cannot simultaneously operate on the
+	same temp-table in parallel.
+	max_trx_id is ignored for temp tables because it not required
+	for MVCC. */
+	if (dict_index_is_sec_or_ibuf(index)
+	    && !dict_table_is_temporary(index->table)
+	    && page_is_leaf(page)) {
 		max_trx_id = page_get_max_trx_id(page);
 		ut_ad(max_trx_id);
 	}
@@ -549,9 +507,10 @@ page_create_empty(
 	if (page_zip) {
 		page_create_zip(block, index,
 				page_header_get_field(page, PAGE_LEVEL),
-				max_trx_id, mtr);
+				max_trx_id, NULL, mtr);
 	} else {
-		page_create(block, mtr, page_is_comp(page));
+		page_create(block, mtr, page_is_comp(page),
+			    dict_index_is_spatial(index));
 
 		if (max_trx_id) {
 			page_update_max_trx_id(
@@ -568,7 +527,6 @@ IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
 if new_block is a compressed leaf page in a secondary index.
 This has to be done either within the same mini-transaction,
 or by invoking ibuf_reset_free_bits() before mtr_commit(). */
-UNIV_INTERN
 void
 page_copy_rec_list_end_no_locks(
 /*============================*/
@@ -610,22 +568,10 @@ page_copy_rec_list_end_no_locks(
 		ins_rec = page_cur_insert_rec_low(cur2, index,
 						  cur1_rec, offsets, mtr);
 		if (UNIV_UNLIKELY(!ins_rec)) {
-			/* Track an assertion failure reported on the mailing
-			list on June 18th, 2003 */
-
-			buf_page_print(new_page, 0,
-				       BUF_PAGE_PRINT_NO_CRASH);
-			buf_page_print(page_align(rec), 0,
-				       BUF_PAGE_PRINT_NO_CRASH);
-			ut_print_timestamp(stderr);
-
-			fprintf(stderr,
-				"InnoDB: rec offset %lu, cur1 offset %lu,"
-				" cur2 offset %lu\n",
-				(ulong) page_offset(rec),
-				(ulong) page_offset(page_cur_get_rec(&cur1)),
-				(ulong) page_offset(cur2));
-			ut_error;
+			ib::fatal() << "Rec offset " << page_offset(rec)
+				<< ", cur1 offset "
+				<< page_offset(page_cur_get_rec(&cur1))
+				<< ", cur2 offset " << page_offset(cur2);
 		}
 
 		page_cur_move_to_next(&cur1);
@@ -650,7 +596,6 @@ or by invoking ibuf_reset_free_bits() before mtr_commit().
 
 @return pointer to the original successor of the infimum record on
 new_page, or NULL on zip overflow (new_block will be decompressed) */
-UNIV_INTERN
 rec_t*
 page_copy_rec_list_end(
 /*===================*/
@@ -665,7 +610,9 @@ page_copy_rec_list_end(
 	page_t*		page		= page_align(rec);
 	rec_t*		ret		= page_rec_get_next(
 		page_get_infimum_rec(new_page));
-	ulint		log_mode	= 0; /* remove warning */
+	ulint		num_moved	= 0;
+	rtr_rec_move_t*	rec_move	= NULL;
+	mem_heap_t*	heap		= NULL;
 
 #ifdef UNIV_ZIP_DEBUG
 	if (new_page_zip) {
@@ -685,6 +632,8 @@ page_copy_rec_list_end(
 	/* Here, "ret" may be pointing to a user record or the
 	predefined supremum record. */
 
+	mtr_log_t	log_mode = MTR_LOG_NONE;
+
 	if (new_page_zip) {
 		log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
 	}
@@ -693,14 +642,39 @@ page_copy_rec_list_end(
 		page_copy_rec_list_end_to_created_page(new_page, rec,
 						       index, mtr);
 	} else {
-		page_copy_rec_list_end_no_locks(new_block, block, rec,
-						index, mtr);
+		if (dict_index_is_spatial(index)) {
+			ulint	max_to_move = page_get_n_recs(
+						buf_block_get_frame(block));
+			heap = mem_heap_create(256);
+
+			rec_move = static_cast<rtr_rec_move_t*>(mem_heap_alloc(
+					heap,
+					sizeof (*rec_move) * max_to_move));
+
+			/* For spatial index, we need to insert recs one by one
+			to keep recs ordered. */
+			rtr_page_copy_rec_list_end_no_locks(new_block,
+							    block, rec, index,
+							    heap, rec_move,
+							    max_to_move,
+							    &num_moved,
+							    mtr);
+		} else {
+			page_copy_rec_list_end_no_locks(new_block, block, rec,
+							index, mtr);
+		}
 	}
 
 	/* Update PAGE_MAX_TRX_ID on the uncompressed page.
 	Modifications will be redo logged and copied to the compressed
-	page in page_zip_compress() or page_zip_reorganize() below. */
-	if (dict_index_is_sec_or_ibuf(index) && page_is_leaf(page)) {
+	page in page_zip_compress() or page_zip_reorganize() below.
+	Multiple transactions cannot simultaneously operate on the
+	same temp-table in parallel.
+	max_trx_id is ignored for temp tables because it not required
+	for MVCC. */
+	if (dict_index_is_sec_or_ibuf(index)
+	    && page_is_leaf(page)
+	    && !dict_table_is_temporary(index->table)) {
 		page_update_max_trx_id(new_block, NULL,
 				       page_get_max_trx_id(page), mtr);
 	}
@@ -708,8 +682,11 @@ page_copy_rec_list_end(
 	if (new_page_zip) {
 		mtr_set_log_mode(mtr, log_mode);
 
-		if (!page_zip_compress(new_page_zip, new_page,
-				       index, page_zip_level, mtr)) {
+		if (!page_zip_compress(new_page_zip,
+				       new_page,
+				       index,
+				       page_zip_level,
+				       NULL, mtr)) {
 			/* Before trying to reorganize the page,
 			store the number of preceding records on the page. */
 			ulint	ret_pos
@@ -723,15 +700,16 @@ page_copy_rec_list_end(
 
 			if (!page_zip_reorganize(new_block, index, mtr)) {
 
-				btr_blob_dbg_remove(new_page, index,
-						    "copy_end_reorg_fail");
 				if (!page_zip_decompress(new_page_zip,
 							 new_page, FALSE)) {
 					ut_error;
 				}
 				ut_ad(page_validate(new_page, index));
-				btr_blob_dbg_add(new_page, index,
-						 "copy_end_reorg_fail");
+
+				if (heap) {
+					mem_heap_free(heap);
+				}
+
 				return(NULL);
 			} else {
 				/* The page was reorganized:
@@ -747,7 +725,15 @@ page_copy_rec_list_end(
 
 	/* Update the lock table and possible hash index */
 
-	lock_move_rec_list_end(new_block, block, rec);
+	if (dict_index_is_spatial(index) && rec_move) {
+		lock_rtr_move_rec_list(new_block, block, rec_move, num_moved);
+	} else if (!dict_table_is_locking_disabled(index->table)) {
+		lock_move_rec_list_end(new_block, block, rec);
+	}
+
+	if (heap) {
+		mem_heap_free(heap);
+	}
 
 	btr_search_move_or_delete_hash_entries(new_block, block, index);
 
@@ -766,7 +752,6 @@ or by invoking ibuf_reset_free_bits() before mtr_commit().
 
 @return pointer to the original predecessor of the supremum record on
 new_page, or NULL on zip overflow (new_block will be decompressed) */
-UNIV_INTERN
 rec_t*
 page_copy_rec_list_start(
 /*=====================*/
@@ -780,8 +765,9 @@ page_copy_rec_list_start(
 	page_zip_des_t*	new_page_zip	= buf_block_get_page_zip(new_block);
 	page_cur_t	cur1;
 	rec_t*		cur2;
-	ulint		log_mode	= 0 /* remove warning */;
 	mem_heap_t*	heap		= NULL;
+	ulint		num_moved	= 0;
+	rtr_rec_move_t*	rec_move	= NULL;
 	rec_t*		ret
 		= page_rec_get_prev(page_get_supremum_rec(new_page));
 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
@@ -796,6 +782,8 @@ page_copy_rec_list_start(
 		return(ret);
 	}
 
+	mtr_log_t	log_mode = MTR_LOG_NONE;
+
 	if (new_page_zip) {
 		log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
 	}
@@ -806,27 +794,45 @@ page_copy_rec_list_start(
 	cur2 = ret;
 
 	/* Copy records from the original page to the new page */
+	if (dict_index_is_spatial(index)) {
+		ulint		max_to_move = page_get_n_recs(
+						buf_block_get_frame(block));
+		heap = mem_heap_create(256);
 
-	while (page_cur_get_rec(&cur1) != rec) {
-		rec_t*	cur1_rec = page_cur_get_rec(&cur1);
-		offsets = rec_get_offsets(cur1_rec, index, offsets,
-					  ULINT_UNDEFINED, &heap);
-		cur2 = page_cur_insert_rec_low(cur2, index,
-					       cur1_rec, offsets, mtr);
-		ut_a(cur2);
+		rec_move = static_cast<rtr_rec_move_t*>(mem_heap_alloc(
+					heap,
+					sizeof (*rec_move) * max_to_move));
 
-		page_cur_move_to_next(&cur1);
-	}
+		/* For spatial index, we need to insert recs one by one
+		to keep recs ordered. */
+		rtr_page_copy_rec_list_start_no_locks(new_block,
+						      block, rec, index, heap,
+						      rec_move, max_to_move,
+						      &num_moved, mtr);
+	} else {
 
-	if (UNIV_LIKELY_NULL(heap)) {
-		mem_heap_free(heap);
+		while (page_cur_get_rec(&cur1) != rec) {
+			rec_t*	cur1_rec = page_cur_get_rec(&cur1);
+			offsets = rec_get_offsets(cur1_rec, index, offsets,
+						  ULINT_UNDEFINED, &heap);
+			cur2 = page_cur_insert_rec_low(cur2, index,
+						       cur1_rec, offsets, mtr);
+			ut_a(cur2);
+
+			page_cur_move_to_next(&cur1);
+		}
 	}
 
 	/* Update PAGE_MAX_TRX_ID on the uncompressed page.
 	Modifications will be redo logged and copied to the compressed
-	page in page_zip_compress() or page_zip_reorganize() below. */
+	page in page_zip_compress() or page_zip_reorganize() below.
+	Multiple transactions cannot simultaneously operate on the
+	same temp-table in parallel.
+	max_trx_id is ignored for temp tables because it not required
+	for MVCC. */
 	if (dict_index_is_sec_or_ibuf(index)
-	    && page_is_leaf(page_align(rec))) {
+	    && page_is_leaf(page_align(rec))
+	    && !dict_table_is_temporary(index->table)) {
 		page_update_max_trx_id(new_block, NULL,
 				       page_get_max_trx_id(page_align(rec)),
 				       mtr);
@@ -839,8 +845,7 @@ page_copy_rec_list_start(
 				goto zip_reorganize;);
 
 		if (!page_zip_compress(new_page_zip, new_page, index,
-				       page_zip_level, mtr)) {
-
+				       page_zip_level, NULL, mtr)) {
 			ulint	ret_pos;
 #ifndef DBUG_OFF
 zip_reorganize:
@@ -857,16 +862,17 @@ zip_reorganize:
 			if (UNIV_UNLIKELY
 			    (!page_zip_reorganize(new_block, index, mtr))) {
 
-				btr_blob_dbg_remove(new_page, index,
-						    "copy_start_reorg_fail");
 				if (UNIV_UNLIKELY
 				    (!page_zip_decompress(new_page_zip,
 							  new_page, FALSE))) {
 					ut_error;
 				}
 				ut_ad(page_validate(new_page, index));
-				btr_blob_dbg_add(new_page, index,
-						 "copy_start_reorg_fail");
+
+				if (UNIV_LIKELY_NULL(heap)) {
+					mem_heap_free(heap);
+				}
+
 				return(NULL);
 			}
 
@@ -877,7 +883,15 @@ zip_reorganize:
 
 	/* Update the lock table and possible hash index */
 
-	lock_move_rec_list_start(new_block, block, rec, ret);
+	if (dict_index_is_spatial(index)) {
+		lock_rtr_move_rec_list(new_block, block, rec_move, num_moved);
+	} else if (!dict_table_is_locking_disabled(index->table)) {
+		lock_move_rec_list_start(new_block, block, rec, ret);
+	}
+
+	if (heap) {
+		mem_heap_free(heap);
+	}
 
 	btr_search_move_or_delete_hash_entries(new_block, block, index);
 
@@ -892,7 +906,7 @@ page_delete_rec_list_write_log(
 /*===========================*/
 	rec_t*		rec,	/*!< in: record on page */
 	dict_index_t*	index,	/*!< in: record descriptor */
-	byte		type,	/*!< in: operation type:
+	mlog_id_t	type,	/*!< in: operation type:
 				MLOG_LIST_END_DELETE, ... */
 	mtr_t*		mtr)	/*!< in: mtr */
 {
@@ -915,12 +929,11 @@ page_delete_rec_list_write_log(
 
 /**********************************************************//**
 Parses a log record of a record list end or start deletion.
-@return	end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
 byte*
 page_parse_delete_rec_list(
 /*=======================*/
-	byte		type,	/*!< in: MLOG_LIST_END_DELETE,
+	mlog_id_t	type,	/*!< in: MLOG_LIST_END_DELETE,
 				MLOG_LIST_START_DELETE,
 				MLOG_COMP_LIST_END_DELETE or
 				MLOG_COMP_LIST_START_DELETE */
@@ -972,7 +985,6 @@ page_parse_delete_rec_list(
 /*************************************************************//**
 Deletes records from a page from a given record onward, including that record.
 The infimum and supremum records are not deleted. */
-UNIV_INTERN
 void
 page_delete_rec_list_end(
 /*=====================*/
@@ -1053,7 +1065,7 @@ delete_all:
 				       : MLOG_LIST_END_DELETE, mtr);
 
 	if (page_zip) {
-		ulint		log_mode;
+		mtr_log_t	log_mode;
 
 		ut_a(page_is_comp(page));
 		/* Individual deletes are not logged */
@@ -1168,9 +1180,6 @@ delete_all:
 	/* Remove the record chain segment from the record chain */
 	page_rec_set_next(prev_rec, page_get_supremum_rec(page));
 
-	btr_blob_dbg_op(page, rec, index, "delete_end",
-			btr_blob_dbg_remove_rec);
-
 	/* Catenate the deleted chain segment to the page free list */
 
 	page_rec_set_next(last_rec, page_header_get_ptr(page, PAGE_FREE));
@@ -1186,7 +1195,6 @@ delete_all:
 /*************************************************************//**
 Deletes records from page, up to the given record, NOT including
 that record. Infimum and supremum records are not deleted. */
-UNIV_INTERN
 void
 page_delete_rec_list_start(
 /*=======================*/
@@ -1196,11 +1204,9 @@ page_delete_rec_list_start(
 	mtr_t*		mtr)	/*!< in: mtr */
 {
 	page_cur_t	cur1;
-	ulint		log_mode;
 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
 	ulint*		offsets		= offsets_;
 	mem_heap_t*	heap		= NULL;
-	byte		type;
 
 	rec_offs_init(offsets_);
 
@@ -1231,6 +1237,8 @@ page_delete_rec_list_start(
 		return;
 	}
 
+	mlog_id_t	type;
+
 	if (page_rec_is_comp(rec)) {
 		type = MLOG_COMP_LIST_START_DELETE;
 	} else {
@@ -1244,7 +1252,7 @@ page_delete_rec_list_start(
 
 	/* Individual deletes are not logged */
 
-	log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
+	mtr_log_t	log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
 
 	while (page_cur_get_rec(&cur1) != rec) {
 		offsets = rec_get_offsets(page_cur_get_rec(&cur1), index,
@@ -1273,7 +1281,6 @@ or by invoking ibuf_reset_free_bits() before mtr_commit().
 
 @return TRUE on success; FALSE on compression failure (new_block will
 be decompressed) */
-UNIV_INTERN
 ibool
 page_move_rec_list_end(
 /*===================*/
@@ -1289,6 +1296,8 @@ page_move_rec_list_end(
 	ulint		old_n_recs;
 	ulint		new_n_recs;
 
+	ut_ad(!dict_index_is_spatial(index));
+
 	old_data_size = page_get_data_size(new_page);
 	old_n_recs = page_get_n_recs(new_page);
 #ifdef UNIV_ZIP_DEBUG
@@ -1332,8 +1341,7 @@ if new_block is a compressed leaf page in a secondary index.
 This has to be done either within the same mini-transaction,
 or by invoking ibuf_reset_free_bits() before mtr_commit().
 
-@return	TRUE on success; FALSE on compression failure */
-UNIV_INTERN
+@return TRUE on success; FALSE on compression failure */
 ibool
 page_move_rec_list_start(
 /*=====================*/
@@ -1434,7 +1442,6 @@ page_dir_add_slot(
 
 /****************************************************************//**
 Splits a directory slot which owns too many records. */
-UNIV_INTERN
 void
 page_dir_split_slot(
 /*================*/
@@ -1497,7 +1504,6 @@ page_dir_split_slot(
 Tries to balance the given directory slot with too few records with the upper
 neighbor, so that there are at least the minimum number of records owned by
 the slot; this may result in the merging of two slots. */
-UNIV_INTERN
 void
 page_dir_balance_slot(
 /*==================*/
@@ -1567,8 +1573,7 @@ page_dir_balance_slot(
 /************************************************************//**
 Returns the nth record of the record list.
 This is the inverse function of page_rec_get_n_recs_before().
-@return	nth record */
-UNIV_INTERN
+@return nth record */
 const rec_t*
 page_rec_get_nth_const(
 /*===================*/
@@ -1620,8 +1625,7 @@ page_rec_get_nth_const(
 /***************************************************************//**
 Returns the number of records before the given record in chain.
 The number includes infimum and supremum records.
-@return	number of records */
-UNIV_INTERN
+@return number of records */
 ulint
 page_rec_get_n_recs_before(
 /*=======================*/
@@ -1686,7 +1690,6 @@ page_rec_get_n_recs_before(
 /************************************************************//**
 Prints record contents including the data relevant only in
 the index page context. */
-UNIV_INTERN
 void
 page_rec_print(
 /*===========*/
@@ -1696,17 +1699,13 @@ page_rec_print(
 	ut_a(!page_rec_is_comp(rec) == !rec_offs_comp(offsets));
 	rec_print_new(stderr, rec, offsets);
 	if (page_rec_is_comp(rec)) {
-		fprintf(stderr,
-			" n_owned: %lu; heap_no: %lu; next rec: %lu\n",
-			(ulong) rec_get_n_owned_new(rec),
-			(ulong) rec_get_heap_no_new(rec),
-			(ulong) rec_get_next_offs(rec, TRUE));
+		ib::info() << "n_owned: " << rec_get_n_owned_new(rec)
+			<< "; heap_no: " << rec_get_heap_no_new(rec)
+			<< "; next rec: " << rec_get_next_offs(rec, TRUE);
 	} else {
-		fprintf(stderr,
-			" n_owned: %lu; heap_no: %lu; next rec: %lu\n",
-			(ulong) rec_get_n_owned_old(rec),
-			(ulong) rec_get_heap_no_old(rec),
-			(ulong) rec_get_next_offs(rec, FALSE));
+		ib::info() << "n_owned: " << rec_get_n_owned_old(rec)
+			<< "; heap_no: " << rec_get_heap_no_old(rec)
+			<< "; next rec: " << rec_get_next_offs(rec, FALSE);
 	}
 
 	page_rec_check(rec);
@@ -1717,7 +1716,6 @@ page_rec_print(
 /***************************************************************//**
 This is used to print the contents of the directory for
 debugging purposes. */
-UNIV_INTERN
 void
 page_dir_print(
 /*===========*/
@@ -1759,7 +1757,6 @@ page_dir_print(
 /***************************************************************//**
 This is used to print the contents of the page record list for
 debugging purposes. */
-UNIV_INTERN
 void
 page_print_list(
 /*============*/
@@ -1778,7 +1775,7 @@ page_print_list(
 
 	ut_a((ibool)!!page_is_comp(page) == dict_table_is_comp(index->table));
 
-	fprintf(stderr,
+	fprint(stderr,
 		"--------------------------------\n"
 		"PAGE RECORD LIST\n"
 		"Page address %p\n", page);
@@ -1829,7 +1826,6 @@ page_print_list(
 
 /***************************************************************//**
 Prints the info in a page header. */
-UNIV_INTERN
 void
 page_header_print(
 /*==============*/
@@ -1857,7 +1853,6 @@ page_header_print(
 /***************************************************************//**
 This is used to print the contents of the page for
 debugging purposes. */
-UNIV_INTERN
 void
 page_print(
 /*=======*/
@@ -1881,8 +1876,7 @@ page_print(
 The following is used to validate a record on a page. This function
 differs from rec_validate as it can also check the n_owned field and
 the heap_no field.
-@return	TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
 ibool
 page_rec_validate(
 /*==============*/
@@ -1908,17 +1902,15 @@ page_rec_validate(
 	}
 
 	if (UNIV_UNLIKELY(!(n_owned <= PAGE_DIR_SLOT_MAX_N_OWNED))) {
-		fprintf(stderr,
-			"InnoDB: Dir slot of rec %lu, n owned too big %lu\n",
-			(ulong) page_offset(rec), (ulong) n_owned);
+		ib::warn() << "Dir slot of rec " << page_offset(rec)
+			<< ", n owned too big " << n_owned;
 		return(FALSE);
 	}
 
 	if (UNIV_UNLIKELY(!(heap_no < page_dir_get_n_heap(page)))) {
-		fprintf(stderr,
-			"InnoDB: Heap no of rec %lu too big %lu %lu\n",
-			(ulong) page_offset(rec), (ulong) heap_no,
-			(ulong) page_dir_get_n_heap(page));
+		ib::warn() << "Heap no of rec " << page_offset(rec)
+			<< " too big " << heap_no << " "
+			<< page_dir_get_n_heap(page);
 		return(FALSE);
 	}
 
@@ -1926,11 +1918,11 @@ page_rec_validate(
 }
 
 #ifndef UNIV_HOTBACKUP
+#ifdef UNIV_DEBUG
 /***************************************************************//**
 Checks that the first directory slot points to the infimum record and
 the last to the supremum. This function is intended to track if the
 bug fixed in 4.0.14 has caused corruption to users' databases. */
-UNIV_INTERN
 void
 page_check_dir(
 /*===========*/
@@ -1947,28 +1939,24 @@ page_check_dir(
 
 	if (UNIV_UNLIKELY(!page_rec_is_infimum_low(infimum_offs))) {
 
-		fprintf(stderr,
-			"InnoDB: Page directory corruption:"
-			" infimum not pointed to\n");
-		buf_page_print(page, 0, 0);
+		ib::fatal() << "Page directory corruption: infimum not"
+			" pointed to";
 	}
 
 	if (UNIV_UNLIKELY(!page_rec_is_supremum_low(supremum_offs))) {
 
-		fprintf(stderr,
-			"InnoDB: Page directory corruption:"
-			" supremum not pointed to\n");
-		buf_page_print(page, 0, 0);
+		ib::fatal() << "Page directory corruption: supremum not"
+			" pointed to";
 	}
 }
+#endif /* UNIV_DEBUG */
 #endif /* !UNIV_HOTBACKUP */
 
 /***************************************************************//**
 This function checks the consistency of an index page when we do not
 know the index. This is also resilient so that this should never crash
 even if the page is total garbage.
-@return	TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
 ibool
 page_simple_validate_old(
 /*=====================*/
@@ -1991,9 +1979,8 @@ page_simple_validate_old(
 	n_slots = page_dir_get_n_slots(page);
 
 	if (UNIV_UNLIKELY(n_slots > UNIV_PAGE_SIZE / 4)) {
-		fprintf(stderr,
-			"InnoDB: Nonsensical number %lu of page dir slots\n",
-			(ulong) n_slots);
+		ib::error() << "Nonsensical number " << n_slots
+			<< " of page dir slots";
 
 		goto func_exit;
 	}
@@ -2002,13 +1989,12 @@ page_simple_validate_old(
 
 	if (UNIV_UNLIKELY(rec_heap_top
 			  > page_dir_get_nth_slot(page, n_slots - 1))) {
-
-		fprintf(stderr,
-			"InnoDB: Record heap and dir overlap on a page,"
-			" heap top %lu, dir %lu\n",
-			(ulong) page_header_get_field(page, PAGE_HEAP_TOP),
-			(ulong)
-			page_offset(page_dir_get_nth_slot(page, n_slots - 1)));
+		ib::error()
+			<< "Record heap and dir overlap on a page, heap top "
+			<< page_header_get_field(page, PAGE_HEAP_TOP)
+			<< ", dir "
+			<< page_offset(page_dir_get_nth_slot(page,
+							     n_slots - 1));
 
 		goto func_exit;
 	}
@@ -2025,11 +2011,9 @@ page_simple_validate_old(
 
 	for (;;) {
 		if (UNIV_UNLIKELY(rec > rec_heap_top)) {
-			fprintf(stderr,
-				"InnoDB: Record %lu is above"
-				" rec heap top %lu\n",
-				(ulong)(rec - page),
-				(ulong)(rec_heap_top - page));
+			ib::error() << "Record " << (rec - page)
+				<< " is above rec heap top "
+				<< (rec_heap_top - page);
 
 			goto func_exit;
 		}
@@ -2039,22 +2023,18 @@ page_simple_validate_old(
 			if (UNIV_UNLIKELY(rec_get_n_owned_old(rec)
 					  != own_count)) {
 
-				fprintf(stderr,
-					"InnoDB: Wrong owned count %lu, %lu,"
-					" rec %lu\n",
-					(ulong) rec_get_n_owned_old(rec),
-					(ulong) own_count,
-					(ulong)(rec - page));
+				ib::error() << "Wrong owned count "
+					<< rec_get_n_owned_old(rec)
+					<< ", " << own_count << ", rec "
+					<< (rec - page);
 
 				goto func_exit;
 			}
 
 			if (UNIV_UNLIKELY
 			    (page_dir_slot_get_rec(slot) != rec)) {
-				fprintf(stderr,
-					"InnoDB: Dir slot does not point"
-					" to right rec %lu\n",
-					(ulong)(rec - page));
+				ib::error() << "Dir slot does not point"
+					" to right rec " << (rec - page);
 
 				goto func_exit;
 			}
@@ -2075,11 +2055,10 @@ page_simple_validate_old(
 		if (UNIV_UNLIKELY
 		    (rec_get_next_offs(rec, FALSE) < FIL_PAGE_DATA
 		     || rec_get_next_offs(rec, FALSE) >= UNIV_PAGE_SIZE)) {
-			fprintf(stderr,
-				"InnoDB: Next record offset"
-				" nonsensical %lu for rec %lu\n",
-				(ulong) rec_get_next_offs(rec, FALSE),
-				(ulong) (rec - page));
+
+			ib::error() << "Next record offset nonsensical "
+				<< rec_get_next_offs(rec, FALSE) << " for rec "
+				<< (rec - page);
 
 			goto func_exit;
 		}
@@ -2087,10 +2066,8 @@ page_simple_validate_old(
 		count++;
 
 		if (UNIV_UNLIKELY(count > UNIV_PAGE_SIZE)) {
-			fprintf(stderr,
-				"InnoDB: Page record list appears"
-				" to be circular %lu\n",
-				(ulong) count);
+			ib::error() << "Page record list appears"
+				" to be circular " << count;
 			goto func_exit;
 		}
 
@@ -2099,24 +2076,23 @@ page_simple_validate_old(
 	}
 
 	if (UNIV_UNLIKELY(rec_get_n_owned_old(rec) == 0)) {
-		fprintf(stderr, "InnoDB: n owned is zero in a supremum rec\n");
+		ib::error() << "n owned is zero in a supremum rec";
 
 		goto func_exit;
 	}
 
 	if (UNIV_UNLIKELY(slot_no != n_slots - 1)) {
-		fprintf(stderr, "InnoDB: n slots wrong %lu, %lu\n",
-			(ulong) slot_no, (ulong) (n_slots - 1));
+		ib::error() <<  "n slots wrong "
+			<< slot_no << ", " << (n_slots - 1);
 		goto func_exit;
 	}
 
 	if (UNIV_UNLIKELY(page_header_get_field(page, PAGE_N_RECS)
 			  + PAGE_HEAP_NO_USER_LOW
 			  != count + 1)) {
-		fprintf(stderr, "InnoDB: n recs wrong %lu %lu\n",
-			(ulong) page_header_get_field(page, PAGE_N_RECS)
-			+ PAGE_HEAP_NO_USER_LOW,
-			(ulong) (count + 1));
+		ib::error() <<  "n recs wrong "
+			<< page_header_get_field(page, PAGE_N_RECS)
+			+ PAGE_HEAP_NO_USER_LOW << " " << (count + 1);
 
 		goto func_exit;
 	}
@@ -2127,20 +2103,16 @@ page_simple_validate_old(
 	while (rec != NULL) {
 		if (UNIV_UNLIKELY(rec < page + FIL_PAGE_DATA
 				  || rec >= page + UNIV_PAGE_SIZE)) {
-			fprintf(stderr,
-				"InnoDB: Free list record has"
-				" a nonsensical offset %lu\n",
-				(ulong) (rec - page));
+			ib::error() << "Free list record has"
+				" a nonsensical offset " << (rec - page);
 
 			goto func_exit;
 		}
 
 		if (UNIV_UNLIKELY(rec > rec_heap_top)) {
-			fprintf(stderr,
-				"InnoDB: Free list record %lu"
-				" is above rec heap top %lu\n",
-				(ulong) (rec - page),
-				(ulong) (rec_heap_top - page));
+			ib::error() << "Free list record " << (rec - page)
+				<< " is above rec heap top "
+				<< (rec_heap_top - page);
 
 			goto func_exit;
 		}
@@ -2148,10 +2120,8 @@ page_simple_validate_old(
 		count++;
 
 		if (UNIV_UNLIKELY(count > UNIV_PAGE_SIZE)) {
-			fprintf(stderr,
-				"InnoDB: Page free list appears"
-				" to be circular %lu\n",
-				(ulong) count);
+			ib::error() << "Page free list appears"
+				" to be circular " << count;
 			goto func_exit;
 		}
 
@@ -2160,9 +2130,8 @@ page_simple_validate_old(
 
 	if (UNIV_UNLIKELY(page_dir_get_n_heap(page) != count + 1)) {
 
-		fprintf(stderr, "InnoDB: N heap is wrong %lu, %lu\n",
-			(ulong) page_dir_get_n_heap(page),
-			(ulong) (count + 1));
+		ib::error() <<  "N heap is wrong "
+			<< page_dir_get_n_heap(page) << ", " << (count + 1);
 
 		goto func_exit;
 	}
@@ -2177,8 +2146,7 @@ func_exit:
 This function checks the consistency of an index page when we do not
 know the index. This is also resilient so that this should never crash
 even if the page is total garbage.
-@return	TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
 ibool
 page_simple_validate_new(
 /*=====================*/
@@ -2201,9 +2169,8 @@ page_simple_validate_new(
 	n_slots = page_dir_get_n_slots(page);
 
 	if (UNIV_UNLIKELY(n_slots > UNIV_PAGE_SIZE / 4)) {
-		fprintf(stderr,
-			"InnoDB: Nonsensical number %lu"
-			" of page dir slots\n", (ulong) n_slots);
+		ib::error() << "Nonsensical number " << n_slots
+			<< " of page dir slots";
 
 		goto func_exit;
 	}
@@ -2213,12 +2180,11 @@ page_simple_validate_new(
 	if (UNIV_UNLIKELY(rec_heap_top
 			  > page_dir_get_nth_slot(page, n_slots - 1))) {
 
-		fprintf(stderr,
-			"InnoDB: Record heap and dir overlap on a page,"
-			" heap top %lu, dir %lu\n",
-			(ulong) page_header_get_field(page, PAGE_HEAP_TOP),
-			(ulong)
-			page_offset(page_dir_get_nth_slot(page, n_slots - 1)));
+		ib::error() << "Record heap and dir overlap on a page,"
+			" heap top "
+			<< page_header_get_field(page, PAGE_HEAP_TOP)
+			<< ", dir " << page_offset(
+				page_dir_get_nth_slot(page, n_slots - 1));
 
 		goto func_exit;
 	}
@@ -2235,11 +2201,10 @@ page_simple_validate_new(
 
 	for (;;) {
 		if (UNIV_UNLIKELY(rec > rec_heap_top)) {
-			fprintf(stderr,
-				"InnoDB: Record %lu is above rec"
-				" heap top %lu\n",
-				(ulong) page_offset(rec),
-				(ulong) page_offset(rec_heap_top));
+
+			ib::error() << "Record " << page_offset(rec)
+				<< " is above rec heap top "
+				<< page_offset(rec_heap_top);
 
 			goto func_exit;
 		}
@@ -2249,22 +2214,18 @@ page_simple_validate_new(
 			if (UNIV_UNLIKELY(rec_get_n_owned_new(rec)
 					  != own_count)) {
 
-				fprintf(stderr,
-					"InnoDB: Wrong owned count %lu, %lu,"
-					" rec %lu\n",
-					(ulong) rec_get_n_owned_new(rec),
-					(ulong) own_count,
-					(ulong) page_offset(rec));
+				ib::error() << "Wrong owned count "
+					<< rec_get_n_owned_new(rec) << ", "
+					<< own_count << ", rec "
+					<< page_offset(rec);
 
 				goto func_exit;
 			}
 
 			if (UNIV_UNLIKELY
 			    (page_dir_slot_get_rec(slot) != rec)) {
-				fprintf(stderr,
-					"InnoDB: Dir slot does not point"
-					" to right rec %lu\n",
-					(ulong) page_offset(rec));
+				ib::error() << "Dir slot does not point"
+					" to right rec " << page_offset(rec);
 
 				goto func_exit;
 			}
@@ -2285,11 +2246,10 @@ page_simple_validate_new(
 		if (UNIV_UNLIKELY
 		    (rec_get_next_offs(rec, TRUE) < FIL_PAGE_DATA
 		     || rec_get_next_offs(rec, TRUE) >= UNIV_PAGE_SIZE)) {
-			fprintf(stderr,
-				"InnoDB: Next record offset nonsensical %lu"
-				" for rec %lu\n",
-				(ulong) rec_get_next_offs(rec, TRUE),
-				(ulong) page_offset(rec));
+
+			ib::error() << "Next record offset nonsensical "
+				<< rec_get_next_offs(rec, TRUE)
+				<< " for rec " << page_offset(rec);
 
 			goto func_exit;
 		}
@@ -2297,10 +2257,8 @@ page_simple_validate_new(
 		count++;
 
 		if (UNIV_UNLIKELY(count > UNIV_PAGE_SIZE)) {
-			fprintf(stderr,
-				"InnoDB: Page record list appears"
-				" to be circular %lu\n",
-				(ulong) count);
+			ib::error() << "Page record list appears to be"
+				" circular " << count;
 			goto func_exit;
 		}
 
@@ -2309,25 +2267,23 @@ page_simple_validate_new(
 	}
 
 	if (UNIV_UNLIKELY(rec_get_n_owned_new(rec) == 0)) {
-		fprintf(stderr, "InnoDB: n owned is zero"
-			" in a supremum rec\n");
+		ib::error() << "n owned is zero in a supremum rec";
 
 		goto func_exit;
 	}
 
 	if (UNIV_UNLIKELY(slot_no != n_slots - 1)) {
-		fprintf(stderr, "InnoDB: n slots wrong %lu, %lu\n",
-			(ulong) slot_no, (ulong) (n_slots - 1));
+		ib::error() << "n slots wrong " << slot_no << ", "
+			<< (n_slots - 1);
 		goto func_exit;
 	}
 
 	if (UNIV_UNLIKELY(page_header_get_field(page, PAGE_N_RECS)
 			  + PAGE_HEAP_NO_USER_LOW
 			  != count + 1)) {
-		fprintf(stderr, "InnoDB: n recs wrong %lu %lu\n",
-			(ulong) page_header_get_field(page, PAGE_N_RECS)
-			+ PAGE_HEAP_NO_USER_LOW,
-			(ulong) (count + 1));
+		ib::error() << "n recs wrong "
+			<< page_header_get_field(page, PAGE_N_RECS)
+			+ PAGE_HEAP_NO_USER_LOW << " " << (count + 1);
 
 		goto func_exit;
 	}
@@ -2338,20 +2294,17 @@ page_simple_validate_new(
 	while (rec != NULL) {
 		if (UNIV_UNLIKELY(rec < page + FIL_PAGE_DATA
 				  || rec >= page + UNIV_PAGE_SIZE)) {
-			fprintf(stderr,
-				"InnoDB: Free list record has"
-				" a nonsensical offset %lu\n",
-				(ulong) page_offset(rec));
+
+			ib::error() << "Free list record has"
+				" a nonsensical offset " << page_offset(rec);
 
 			goto func_exit;
 		}
 
 		if (UNIV_UNLIKELY(rec > rec_heap_top)) {
-			fprintf(stderr,
-				"InnoDB: Free list record %lu"
-				" is above rec heap top %lu\n",
-				(ulong) page_offset(rec),
-				(ulong) page_offset(rec_heap_top));
+			ib::error() << "Free list record " << page_offset(rec)
+				<< " is above rec heap top "
+				<< page_offset(rec_heap_top);
 
 			goto func_exit;
 		}
@@ -2359,10 +2312,8 @@ page_simple_validate_new(
 		count++;
 
 		if (UNIV_UNLIKELY(count > UNIV_PAGE_SIZE)) {
-			fprintf(stderr,
-				"InnoDB: Page free list appears"
-				" to be circular %lu\n",
-				(ulong) count);
+			ib::error() << "Page free list appears to be"
+				" circular " << count;
 			goto func_exit;
 		}
 
@@ -2371,9 +2322,8 @@ page_simple_validate_new(
 
 	if (UNIV_UNLIKELY(page_dir_get_n_heap(page) != count + 1)) {
 
-		fprintf(stderr, "InnoDB: N heap is wrong %lu, %lu\n",
-			(ulong) page_dir_get_n_heap(page),
-			(ulong) (count + 1));
+		ib::error() << "N heap is wrong "
+			<< page_dir_get_n_heap(page) << ", " << (count + 1);
 
 		goto func_exit;
 	}
@@ -2386,8 +2336,7 @@ func_exit:
 
 /***************************************************************//**
 This function checks the consistency of an index page.
-@return	TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
 ibool
 page_validate(
 /*==========*/
@@ -2412,9 +2361,15 @@ page_validate(
 	ulint*			offsets		= NULL;
 	ulint*			old_offsets	= NULL;
 
+#ifdef UNIV_GIS_DEBUG
+	if (dict_index_is_spatial(index)) {
+		fprintf(stderr, "Page no: %lu\n", page_get_page_no(page));
+	}
+#endif /* UNIV_DEBUG */
+
 	if (UNIV_UNLIKELY((ibool) !!page_is_comp(page)
 			  != dict_table_is_comp(index->table))) {
-		fputs("InnoDB: 'compact format' flag mismatch\n", stderr);
+		ib::error() << "'compact format' flag mismatch";
 		goto func_exit2;
 	}
 	if (page_is_comp(page)) {
@@ -2427,16 +2382,20 @@ page_validate(
 		}
 	}
 
-	if (dict_index_is_sec_or_ibuf(index) && page_is_leaf(page)
+	/* Multiple transactions cannot simultaneously operate on the
+	same temp-table in parallel.
+	max_trx_id is ignored for temp tables because it not required
+	for MVCC. */
+	if (dict_index_is_sec_or_ibuf(index)
+	    && !dict_table_is_temporary(index->table)
+	    && page_is_leaf(page)
 	    && !page_is_empty(page)) {
 		trx_id_t	max_trx_id	= page_get_max_trx_id(page);
 		trx_id_t	sys_max_trx_id	= trx_sys_get_max_trx_id();
 
 		if (max_trx_id == 0 || max_trx_id > sys_max_trx_id) {
-			ib_logf(IB_LOG_LEVEL_ERROR,
-				"PAGE_MAX_TRX_ID out of bounds: "
-				TRX_ID_FMT ", " TRX_ID_FMT,
-				max_trx_id, sys_max_trx_id);
+			ib::error() << "PAGE_MAX_TRX_ID out of bounds: "
+				<< max_trx_id << ", " << sys_max_trx_id;
 			goto func_exit2;
 		}
 	}
@@ -2456,13 +2415,11 @@ page_validate(
 	if (UNIV_UNLIKELY(!(page_header_get_ptr(page, PAGE_HEAP_TOP)
 			    <= page_dir_get_nth_slot(page, n_slots - 1)))) {
 
-		fprintf(stderr,
-			"InnoDB: Record heap and dir overlap"
-			" on space %lu page %lu index %s, %p, %p\n",
-			(ulong) page_get_space_id(page),
-			(ulong) page_get_page_no(page), index->name,
-			page_header_get_ptr(page, PAGE_HEAP_TOP),
-			page_dir_get_nth_slot(page, n_slots - 1));
+		ib::warn() << "Record heap and dir overlap on space "
+			<< page_get_space_id(page) << " page "
+			<< page_get_page_no(page) << " index " << index->name
+			<< ", " << page_header_get_ptr(page, PAGE_HEAP_TOP)
+			<< ", " << page_dir_get_nth_slot(page, n_slots - 1);
 
 		goto func_exit;
 	}
@@ -2484,7 +2441,7 @@ page_validate(
 		if (page_is_comp(page) && page_rec_is_user_rec(rec)
 		    && UNIV_UNLIKELY(rec_get_node_ptr_flag(rec)
 				     == page_is_leaf(page))) {
-			fputs("InnoDB: node_ptr flag mismatch\n", stderr);
+			ib::error() << "'node_ptr' flag mismatch";
 			goto func_exit;
 		}
 
@@ -2494,22 +2451,43 @@ page_validate(
 
 #ifndef UNIV_HOTBACKUP
 		/* Check that the records are in the ascending order */
-		if (UNIV_LIKELY(count >= PAGE_HEAP_NO_USER_LOW)
+		if (count >= PAGE_HEAP_NO_USER_LOW
 		    && !page_rec_is_supremum(rec)) {
-			if (UNIV_UNLIKELY
-			    (1 != cmp_rec_rec(rec, old_rec,
-					      offsets, old_offsets, index))) {
-				fprintf(stderr,
-					"InnoDB: Records in wrong order"
-					" on space %lu page %lu index %s\n",
-					(ulong) page_get_space_id(page),
-					(ulong) page_get_page_no(page),
-					index->name);
+
+			int	ret = cmp_rec_rec(
+				rec, old_rec, offsets, old_offsets, index);
+
+			/* For spatial index, on nonleaf leavel, we
+			allow recs to be equal. */
+			bool rtr_equal_nodeptrs =
+				(ret == 0 && dict_index_is_spatial(index)
+				&& !page_is_leaf(page));
+
+			if (ret <= 0 && !rtr_equal_nodeptrs) {
+
+				ib::error() << "Records in wrong order on"
+					" space " << page_get_space_id(page)
+					<< " page " << page_get_page_no(page)
+					<< " index " << index->name;
+
 				fputs("\nInnoDB: previous record ", stderr);
-				rec_print_new(stderr, old_rec, old_offsets);
-				fputs("\nInnoDB: record ", stderr);
-				rec_print_new(stderr, rec, offsets);
-				putc('\n', stderr);
+				/* For spatial index, print the mbr info.*/
+				if (index->type & DICT_SPATIAL) {
+					putc('\n', stderr);
+					rec_print_mbr_rec(stderr,
+						old_rec, old_offsets);
+					fputs("\nInnoDB: record ", stderr);
+					putc('\n', stderr);
+					rec_print_mbr_rec(stderr, rec, offsets);
+					putc('\n', stderr);
+					putc('\n', stderr);
+
+				} else {
+					rec_print_new(stderr, old_rec, old_offsets);
+					fputs("\nInnoDB: record ", stderr);
+					rec_print_new(stderr, rec, offsets);
+					putc('\n', stderr);
+				}
 
 				goto func_exit;
 			}
@@ -2519,21 +2497,27 @@ page_validate(
 		if (page_rec_is_user_rec(rec)) {
 
 			data_size += rec_offs_size(offsets);
+
+#if UNIV_GIS_DEBUG
+			/* For spatial index, print the mbr info.*/
+			if (index->type & DICT_SPATIAL) {
+				rec_print_mbr_rec(stderr, rec, offsets);
+				putc('\n', stderr);
+			}
+#endif /* UNIV_GIS_DEBUG */
 		}
 
 		offs = page_offset(rec_get_start(rec, offsets));
 		i = rec_offs_size(offsets);
 		if (UNIV_UNLIKELY(offs + i >= UNIV_PAGE_SIZE)) {
-			fputs("InnoDB: record offset out of bounds\n", stderr);
+			ib::error() << "Record offset out of bounds";
 			goto func_exit;
 		}
 
 		while (i--) {
 			if (UNIV_UNLIKELY(buf[offs + i])) {
 				/* No other record may overlap this */
-
-				fputs("InnoDB: Record overlaps another\n",
-				      stderr);
+				ib::error() << "Record overlaps another";
 				goto func_exit;
 			}
 
@@ -2549,17 +2533,14 @@ page_validate(
 		if (UNIV_UNLIKELY(rec_own_count)) {
 			/* This is a record pointed to by a dir slot */
 			if (UNIV_UNLIKELY(rec_own_count != own_count)) {
-				fprintf(stderr,
-					"InnoDB: Wrong owned count %lu, %lu\n",
-					(ulong) rec_own_count,
-					(ulong) own_count);
+				ib::error() << "Wrong owned count "
+					<< rec_own_count << ", " << own_count;
 				goto func_exit;
 			}
 
 			if (page_dir_slot_get_rec(slot) != rec) {
-				fputs("InnoDB: Dir slot does not"
-				      " point to right rec\n",
-				      stderr);
+				ib::error() << "Dir slot does not"
+					" point to right rec";
 				goto func_exit;
 			}
 
@@ -2596,30 +2577,28 @@ page_validate(
 		}
 	} else if (UNIV_UNLIKELY(rec_get_n_owned_old(rec) == 0)) {
 n_owned_zero:
-		fputs("InnoDB: n owned is zero\n", stderr);
+		ib::error() <<  "n owned is zero";
 		goto func_exit;
 	}
 
 	if (UNIV_UNLIKELY(slot_no != n_slots - 1)) {
-		fprintf(stderr, "InnoDB: n slots wrong %lu %lu\n",
-			(ulong) slot_no, (ulong) (n_slots - 1));
+		ib::error() << "n slots wrong " << slot_no << " "
+			<< (n_slots - 1);
 		goto func_exit;
 	}
 
 	if (UNIV_UNLIKELY(page_header_get_field(page, PAGE_N_RECS)
 			  + PAGE_HEAP_NO_USER_LOW
 			  != count + 1)) {
-		fprintf(stderr, "InnoDB: n recs wrong %lu %lu\n",
-			(ulong) page_header_get_field(page, PAGE_N_RECS)
-			+ PAGE_HEAP_NO_USER_LOW,
-			(ulong) (count + 1));
+		ib::error() << "n recs wrong "
+			<< page_header_get_field(page, PAGE_N_RECS)
+			+ PAGE_HEAP_NO_USER_LOW << " " << (count + 1);
 		goto func_exit;
 	}
 
 	if (UNIV_UNLIKELY(data_size != page_get_data_size(page))) {
-		fprintf(stderr,
-			"InnoDB: Summed data size %lu, returned by func %lu\n",
-			(ulong) data_size, (ulong) page_get_data_size(page));
+		ib::error() << "Summed data size " << data_size
+			<< ", returned by func " << page_get_data_size(page);
 		goto func_exit;
 	}
 
@@ -2638,15 +2617,15 @@ n_owned_zero:
 		offs = page_offset(rec_get_start(rec, offsets));
 		i = rec_offs_size(offsets);
 		if (UNIV_UNLIKELY(offs + i >= UNIV_PAGE_SIZE)) {
-			fputs("InnoDB: record offset out of bounds\n", stderr);
+			ib::error() << "Record offset out of bounds";
 			goto func_exit;
 		}
 
 		while (i--) {
 
 			if (UNIV_UNLIKELY(buf[offs + i])) {
-				fputs("InnoDB: Record overlaps another"
-				      " in free list\n", stderr);
+				ib::error() << "Record overlaps another"
+					" in free list";
 				goto func_exit;
 			}
 
@@ -2657,9 +2636,8 @@ n_owned_zero:
 	}
 
 	if (UNIV_UNLIKELY(page_dir_get_n_heap(page) != count + 1)) {
-		fprintf(stderr, "InnoDB: N heap is wrong %lu %lu\n",
-			(ulong) page_dir_get_n_heap(page),
-			(ulong) count + 1);
+		ib::error() << "N heap is wrong "
+			<< page_dir_get_n_heap(page) << " " << count + 1;
 		goto func_exit;
 	}
 
@@ -2670,13 +2648,9 @@ func_exit:
 
 	if (UNIV_UNLIKELY(ret == FALSE)) {
 func_exit2:
-		fprintf(stderr,
-			"InnoDB: Apparent corruption"
-			" in space %lu page %lu index %s\n",
-			(ulong) page_get_space_id(page),
-			(ulong) page_get_page_no(page),
-			index->name);
-		buf_page_print(page, 0, 0);
+		ib::error() << "Apparent corruption in space "
+			<< page_get_space_id(page) << " page "
+			<< page_get_page_no(page) << " index " << index->name;
 	}
 
 	return(ret);
@@ -2685,8 +2659,7 @@ func_exit2:
 #ifndef UNIV_HOTBACKUP
 /***************************************************************//**
 Looks in the page record list for a record with the given heap number.
-@return	record, NULL if not found */
-UNIV_INTERN
+@return record, NULL if not found */
 const rec_t*
 page_find_rec_with_heap_no(
 /*=======================*/
@@ -2698,7 +2671,7 @@ page_find_rec_with_heap_no(
 	if (page_is_comp(page)) {
 		rec = page + PAGE_NEW_INFIMUM;
 
-		for(;;) {
+		for (;;) {
 			ulint	rec_heap_no = rec_get_heap_no_new(rec);
 
 			if (rec_heap_no == heap_no) {
@@ -2735,8 +2708,7 @@ page_find_rec_with_heap_no(
 Removes the record from a leaf page. This function does not log
 any changes. It is used by the IMPORT tablespace functions.
 The cursor is moved to the next record after the deleted one.
-@return	true if success, i.e., the page did not become too empty */
-UNIV_INTERN
+@return true if success, i.e., the page did not become too empty */
 bool
 page_delete_rec(
 /*============*/
@@ -2755,7 +2727,7 @@ page_delete_rec(
 
 	if (!rec_offs_any_extern(offsets)
 	    && ((page_get_data_size(page) - rec_offs_size(offsets)
-		< BTR_CUR_PAGE_COMPRESS_LIMIT)
+		< BTR_CUR_PAGE_COMPRESS_LIMIT(index))
 		|| (mach_read_from_4(page + FIL_PAGE_NEXT) == FIL_NULL
 		    && mach_read_from_4(page + FIL_PAGE_PREV) == FIL_NULL)
 		|| (page_get_n_recs(page) < 2))) {
@@ -2791,7 +2763,6 @@ page_delete_rec(
 @param[in]	page	index tree leaf page
 @return the last record, not delete-marked
 @retval infimum record if all records are delete-marked */
-
 const rec_t*
 page_find_rec_max_not_deleted(
 	const page_t*	page)
@@ -2824,14 +2795,12 @@ page_find_rec_max_not_deleted(
 but different than the global setting innodb_checksum_algorithm.
 @param[in]	current_algo	current checksum algorithm
 @param[in]	page_checksum	page valid checksum
-@param[in]	space_id	tablespace id
-@param[in]	page_no		page number */
+@param[in]	page_id		page identifier */
 void
 page_warn_strict_checksum(
 	srv_checksum_algorithm_t	curr_algo,
 	srv_checksum_algorithm_t	page_checksum,
-	ulint				space_id,
-	ulint				page_no)
+	const page_id_t&		page_id)
 {
 	srv_checksum_algorithm_t	curr_algo_nonstrict;
 	switch (curr_algo) {
@@ -2848,16 +2817,15 @@ page_warn_strict_checksum(
 		ut_error;
 	}
 
-	ib_logf(IB_LOG_LEVEL_WARN,
-		"innodb_checksum_algorithm is set to \"%s\""
-		" but the page [page id: space=" ULINTPF ","
-		" page number=" ULINTPF "] contains a valid checksum \"%s\"."
-		" Accepting the page as valid. Change innodb_checksum_algorithm"
-		" to \"%s\" to silently accept such pages or rewrite all pages"
-		" so that they contain \"%s\" checksum.",
-		buf_checksum_algorithm_name(curr_algo),
-		space_id, page_no,
-		buf_checksum_algorithm_name(page_checksum),
-		buf_checksum_algorithm_name(curr_algo_nonstrict),
-		buf_checksum_algorithm_name(curr_algo_nonstrict));
+	ib::warn() << "innodb_checksum_algorithm is set to \""
+		<< buf_checksum_algorithm_name(curr_algo) << "\""
+		<< " but the page " << page_id << " contains a valid checksum \""
+		<< buf_checksum_algorithm_name(page_checksum) << "\". "
+		<< " Accepting the page as valid. Change"
+		<< " innodb_checksum_algorithm to \""
+		<< buf_checksum_algorithm_name(curr_algo_nonstrict)
+		<< "\" to silently accept such pages or rewrite all pages"
+		<< " so that they contain \""
+		<< buf_checksum_algorithm_name(curr_algo_nonstrict)
+		<< "\" checksum.";
 }
diff --git a/storage/innobase/page/page0zip.cc b/storage/innobase/page/page0zip.cc
index 2bf1f324784..35ae10fc1bb 100644
--- a/storage/innobase/page/page0zip.cc
+++ b/storage/innobase/page/page0zip.cc
@@ -24,41 +24,39 @@ Compressed page interface
 Created June 2005 by Marko Makela
 *******************************************************/
 
-// First include (the generated) my_config.h, to get correct platform defines.
-#include "my_config.h"
-
-#include <map>
-using namespace std;
-
-#define THIS_MODULE
+#include "page0size.h"
 #include "page0zip.h"
 #ifdef UNIV_NONINL
 # include "page0zip.ic"
 #endif
-#undef THIS_MODULE
-#include "fil0fil.h"
-#include "buf0checksum.h"
-#include "mach0data.h"
+
+/** A BLOB field reference full of zero, for use in assertions and tests.
+Initially, BLOB field references are set to zero, in
+dtuple_convert_big_rec(). */
+const byte field_ref_zero[FIELD_REF_SIZE] = {
+        0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0,
+};
+
 #ifndef UNIV_INNOCHECKSUM
 #include "page0page.h"
 #include "mtr0log.h"
-#include "ut0sort.h"
 #include "dict0dict.h"
 #include "btr0cur.h"
 #include "page0types.h"
 #include "log0recv.h"
-#else
-#define page_warn_strict_checksum(A,B,C,D)
-#endif /* !UNIV_INNOCHECKSUM */
+#include "row0trunc.h"
 #include "zlib.h"
 #ifndef UNIV_HOTBACKUP
-#ifndef UNIV_INNOCHECKSUM
 # include "buf0buf.h"
+#include "buf0types.h"
+#include "buf0checksum.h"
 # include "btr0sea.h"
 # include "dict0boot.h"
 # include "lock0lock.h"
 # include "srv0srv.h"
-#endif /* !UNIV_INNOCHECKSUM */
 # include "buf0lru.h"
 # include "srv0mon.h"
 # include "ut0crc32.h"
@@ -68,26 +66,22 @@ using namespace std;
 # define buf_LRU_stat_inc_unzip()			((void) 0)
 #endif /* !UNIV_HOTBACKUP */
 
+#include <map>
+#include <algorithm>
+
 #ifndef UNIV_HOTBACKUP
-#ifndef UNIV_INNOCHECKSUM
 /** Statistics on compression, indexed by page_zip_des_t::ssize - 1 */
-UNIV_INTERN page_zip_stat_t		page_zip_stat[PAGE_ZIP_SSIZE_MAX];
+page_zip_stat_t		page_zip_stat[PAGE_ZIP_SSIZE_MAX];
 /** Statistics on compression, indexed by index->id */
-UNIV_INTERN page_zip_stat_per_index_t	page_zip_stat_per_index;
-/** Mutex protecting page_zip_stat_per_index */
-UNIV_INTERN ib_mutex_t			page_zip_stat_per_index_mutex;
-#ifdef HAVE_PSI_INTERFACE
-UNIV_INTERN mysql_pfs_key_t		page_zip_stat_per_index_mutex_key;
-#endif /* HAVE_PSI_INTERFACE */
-#endif /* !UNIV_INNOCHECKSUM */
+page_zip_stat_per_index_t	page_zip_stat_per_index;
 #endif /* !UNIV_HOTBACKUP */
 
 /* Compression level to be used by zlib. Settable by user. */
-UNIV_INTERN uint	page_zip_level = DEFAULT_COMPRESSION_LEVEL;
+uint	page_zip_level = DEFAULT_COMPRESSION_LEVEL;
 
 /* Whether or not to log compressed page images to avoid possible
 compression algorithm changes in zlib. */
-UNIV_INTERN my_bool	page_zip_log_pages = false;
+my_bool	page_zip_log_pages = true;
 
 /* Please refer to ../include/page0zip.ic for a description of the
 compressed page format. */
@@ -117,24 +111,24 @@ static const byte supremum_extra_data[] = {
 
 /** Assert that a block of memory is filled with zero bytes.
 Compare at most sizeof(field_ref_zero) bytes.
-@param b	in: memory block
-@param s	in: size of the memory block, in bytes */
-#define ASSERT_ZERO(b, s) \
-	ut_ad(!memcmp(b, field_ref_zero, ut_min(s, sizeof field_ref_zero)))
+@param b in: memory block
+@param s in: size of the memory block, in bytes */
+#define ASSERT_ZERO(b, s)			\
+	ut_ad(!memcmp(b, field_ref_zero,	\
+		      ut_min(static_cast<size_t>(s), sizeof field_ref_zero)));
 /** Assert that a BLOB pointer is filled with zero bytes.
-@param b	in: BLOB pointer */
+@param b in: BLOB pointer */
 #define ASSERT_ZERO_BLOB(b) \
 	ut_ad(!memcmp(b, field_ref_zero, sizeof field_ref_zero))
 
 /* Enable some extra debugging output.  This code can be enabled
 independently of any UNIV_ debugging conditions. */
-#ifndef UNIV_INNOCHECKSUM
 #if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
 # include <stdarg.h>
 MY_ATTRIBUTE((format (printf, 1, 2)))
 /**********************************************************************//**
 Report a failure to decompress or compress.
-@return	number of characters printed */
+@return number of characters printed */
 static
 int
 page_zip_fail_func(
@@ -154,21 +148,18 @@ page_zip_fail_func(
 	return(res);
 }
 /** Wrapper for page_zip_fail_func()
-@param fmt_args	in: printf(3) format string and arguments */
+@param fmt_args in: printf(3) format string and arguments */
 # define page_zip_fail(fmt_args) page_zip_fail_func fmt_args
 #else /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
 /** Dummy wrapper for page_zip_fail_func()
-@param fmt_args	ignored: printf(3) format string and arguments */
+@param fmt_args ignored: printf(3) format string and arguments */
 # define page_zip_fail(fmt_args) /* empty */
 #endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
-#endif /* !UNIV_INNOCHECKSUM */
 
-#ifndef UNIV_INNOCHECKSUM
 #ifndef UNIV_HOTBACKUP
 /**********************************************************************//**
 Determine the guaranteed free space on an empty page.
-@return	minimum payload size on the page */
-UNIV_INTERN
+@return minimum payload size on the page */
 ulint
 page_zip_empty_size(
 /*================*/
@@ -179,8 +170,7 @@ page_zip_empty_size(
 		/* subtract the page header and the longest
 		uncompressed data needed for one record */
 		- (PAGE_DATA
-		   + PAGE_ZIP_DIR_SLOT_SIZE
-		   + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN
+		   + PAGE_ZIP_CLUST_LEAF_SLOT_SIZE
 		   + 1/* encoded heap_no==2 in page_zip_write_rec() */
 		   + 1/* end of modification log */
 		   - REC_N_NEW_EXTRA_BYTES/* omitted bytes */)
@@ -188,12 +178,62 @@ page_zip_empty_size(
 		- compressBound(static_cast<uLong>(2 * (n_fields + 1)));
 	return(size > 0 ? (ulint) size : 0);
 }
+
+/** Check whether a tuple is too big for compressed table
+@param[in]	index	dict index object
+@param[in]	entry	entry for the index
+@return	true if it's too big, otherwise false */
+bool
+page_zip_is_too_big(
+	const dict_index_t*	index,
+	const dtuple_t*		entry)
+{
+	const page_size_t&	page_size =
+		dict_table_page_size(index->table);
+
+	/* Estimate the free space of an empty compressed page.
+	Subtract one byte for the encoded heap_no in the
+	modification log. */
+	ulint	free_space_zip = page_zip_empty_size(
+		index->n_fields, page_size.physical());
+	ulint	n_uniq = dict_index_get_n_unique_in_tree(index);
+
+	ut_ad(dict_table_is_comp(index->table));
+	ut_ad(page_size.is_compressed());
+
+	if (free_space_zip == 0) {
+		return(true);
+	}
+
+	/* Subtract one byte for the encoded heap_no in the
+	modification log. */
+	free_space_zip--;
+
+	/* There should be enough room for two node pointer
+	records on an empty non-leaf page.  This prevents
+	infinite page splits. */
+
+	if (entry->n_fields >= n_uniq
+	    && (REC_NODE_PTR_SIZE
+		+ rec_get_converted_size_comp_prefix(
+			index, entry->fields, n_uniq, NULL)
+		/* On a compressed page, there is
+		a two-byte entry in the dense
+		page directory for every record.
+		But there is no record header. */
+		- (REC_N_NEW_EXTRA_BYTES - 2)
+		> free_space_zip / 2)) {
+		return(true);
+	}
+
+	return(false);
+}
 #endif /* !UNIV_HOTBACKUP */
 
 /*************************************************************//**
 Gets the number of elements in the dense page directory,
 including deleted records (the free list).
-@return	number of elements in the dense page directory */
+@return number of elements in the dense page directory */
 UNIV_INLINE
 ulint
 page_zip_dir_elems(
@@ -207,7 +247,7 @@ page_zip_dir_elems(
 /*************************************************************//**
 Gets the size of the compressed page trailer (the dense page directory),
 including deleted records (the free list).
-@return	length of dense page directory, in bytes */
+@return length of dense page directory, in bytes */
 UNIV_INLINE
 ulint
 page_zip_dir_size(
@@ -220,7 +260,7 @@ page_zip_dir_size(
 /*************************************************************//**
 Gets an offset to the compressed page trailer (the dense page directory),
 including deleted records (the free list).
-@return	offset of the dense page directory */
+@return offset of the dense page directory */
 UNIV_INLINE
 ulint
 page_zip_dir_start_offs(
@@ -236,23 +276,23 @@ page_zip_dir_start_offs(
 /*************************************************************//**
 Gets a pointer to the compressed page trailer (the dense page directory),
 including deleted records (the free list).
-@param[in] page_zip	compressed page
-@param[in] n_dense	number of entries in the directory
-@return	pointer to the dense page directory */
+@param[in] page_zip compressed page
+@param[in] n_dense number of entries in the directory
+@return pointer to the dense page directory */
 #define page_zip_dir_start_low(page_zip, n_dense)			\
 	((page_zip)->data + page_zip_dir_start_offs(page_zip, n_dense))
 /*************************************************************//**
 Gets a pointer to the compressed page trailer (the dense page directory),
 including deleted records (the free list).
-@param[in] page_zip	compressed page
-@return	pointer to the dense page directory */
+@param[in] page_zip compressed page
+@return pointer to the dense page directory */
 #define page_zip_dir_start(page_zip)					\
 	page_zip_dir_start_low(page_zip, page_zip_dir_elems(page_zip))
 
 /*************************************************************//**
 Gets the size of the compressed page trailer (the dense page directory),
 only including user records (excluding the free list).
-@return	length of dense page directory comprising existing records, in bytes */
+@return length of dense page directory comprising existing records, in bytes */
 UNIV_INLINE
 ulint
 page_zip_dir_user_size(
@@ -267,7 +307,7 @@ page_zip_dir_user_size(
 
 /*************************************************************//**
 Find the slot of the given record in the dense page directory.
-@return	dense directory slot, or NULL if record not found */
+@return dense directory slot, or NULL if record not found */
 UNIV_INLINE
 byte*
 page_zip_dir_find_low(
@@ -290,7 +330,7 @@ page_zip_dir_find_low(
 
 /*************************************************************//**
 Find the slot of the given non-free record in the dense page directory.
-@return	dense directory slot, or NULL if record not found */
+@return dense directory slot, or NULL if record not found */
 UNIV_INLINE
 byte*
 page_zip_dir_find(
@@ -309,7 +349,7 @@ page_zip_dir_find(
 
 /*************************************************************//**
 Find the slot of the given free record in the dense page directory.
-@return	dense directory slot, or NULL if record not found */
+@return dense directory slot, or NULL if record not found */
 UNIV_INLINE
 byte*
 page_zip_dir_find_free(
@@ -461,7 +501,7 @@ page_zip_get_n_prev_extern(
 
 /**********************************************************************//**
 Encode the length of a fixed-length column.
-@return	buf + length of encoded val */
+@return buf + length of encoded val */
 static
 byte*
 page_zip_fixed_field_encode(
@@ -489,17 +529,19 @@ page_zip_fixed_field_encode(
 
 /**********************************************************************//**
 Write the index information for the compressed page.
-@return	used size of buf */
-static
+@return used size of buf */
 ulint
 page_zip_fields_encode(
 /*===================*/
-	ulint		n,	/*!< in: number of fields to compress */
-	dict_index_t*	index,	/*!< in: index comprising at least n fields */
-	ulint		trx_id_pos,/*!< in: position of the trx_id column
-				in the index, or ULINT_UNDEFINED if
-				this is a non-leaf page */
-	byte*		buf)	/*!< out: buffer of (n + 1) * 2 bytes */
+	ulint			n,	/*!< in: number of fields
+					to compress */
+	const dict_index_t*	index,	/*!< in: index comprising
+					at least n fields */
+	ulint			trx_id_pos,
+					/*!< in: position of the trx_id column
+					in the index, or ULINT_UNDEFINED if
+					this is a non-leaf page */
+	byte*			buf)	/*!< out: buffer of (n + 1) * 2 bytes */
 {
 	const byte*	buf_start	= buf;
 	ulint		i;
@@ -525,8 +567,7 @@ page_zip_fields_encode(
 			const dict_col_t*	column
 				= dict_field_get_col(field);
 
-			if (UNIV_UNLIKELY(column->len > 255)
-			    || UNIV_UNLIKELY(column->mtype == DATA_BLOB)) {
+			if (DATA_BIG_COL(column)) {
 				val |= 0x7e; /* max > 255 bytes */
 			}
 
@@ -670,10 +711,10 @@ page_zip_dir_encode(
 		ut_a(offs < UNIV_PAGE_SIZE - PAGE_DIR);
 		ut_a(offs >= PAGE_ZIP_START);
 #if PAGE_ZIP_DIR_SLOT_MASK & (PAGE_ZIP_DIR_SLOT_MASK + 1)
-# error "PAGE_ZIP_DIR_SLOT_MASK is not 1 less than a power of 2"
+# error PAGE_ZIP_DIR_SLOT_MASK is not 1 less than a power of 2
 #endif
-#if PAGE_ZIP_DIR_SLOT_MASK < UNIV_PAGE_ZIP_SIZE_MAX - 1
-# error "PAGE_ZIP_DIR_SLOT_MASK < UNIV_ZIP_SIZE_MAX - 1"
+#if PAGE_ZIP_DIR_SLOT_MASK < UNIV_ZIP_SIZE_MAX - 1
+# error PAGE_ZIP_DIR_SLOT_MASK < UNIV_ZIP_SIZE_MAX - 1
 #endif
 		if (UNIV_UNLIKELY(rec_get_n_owned_new(rec))) {
 			offs |= PAGE_ZIP_DIR_SLOT_OWNED;
@@ -761,7 +802,6 @@ page_zip_free(
 
 /**********************************************************************//**
 Configure the zlib allocator to use the given memory heap. */
-UNIV_INTERN
 void
 page_zip_set_alloc(
 /*===============*/
@@ -783,16 +823,16 @@ page_zip_set_alloc(
 #ifdef PAGE_ZIP_COMPRESS_DBG
 /** Set this variable in a debugger to enable
 excessive logging in page_zip_compress(). */
-UNIV_INTERN ibool	page_zip_compress_dbg;
+ibool	page_zip_compress_dbg;
 /** Set this variable in a debugger to enable
 binary logging of the data passed to deflate().
 When this variable is nonzero, it will act
 as a log file name generator. */
-UNIV_INTERN unsigned	page_zip_compress_log;
+unsigned	page_zip_compress_log;
 
 /**********************************************************************//**
 Wrapper for deflate().  Log the operation if page_zip_compress_dbg is set.
-@return	deflate() status: Z_OK, Z_BUF_ERROR, ... */
+@return deflate() status: Z_OK, Z_BUF_ERROR, ... */
 static
 int
 page_zip_compress_deflate(
@@ -806,7 +846,10 @@ page_zip_compress_deflate(
 		ut_print_buf(stderr, strm->next_in, strm->avail_in);
 	}
 	if (UNIV_LIKELY_NULL(logfile)) {
-		fwrite(strm->next_in, 1, strm->avail_in, logfile);
+		if (fwrite(strm->next_in, 1, strm->avail_in, logfile)
+		    != strm->avail_in) {
+			perror("fwrite");
+		}
 	}
 	status = deflate(strm, flush);
 	if (UNIV_UNLIKELY(page_zip_compress_dbg)) {
@@ -819,9 +862,9 @@ page_zip_compress_deflate(
 # undef deflate
 /** Debug wrapper for the zlib compression routine deflate().
 Log the operation if page_zip_compress_dbg is set.
-@param strm	in/out: compressed stream
-@param flush	in: flushing method
-@return		deflate() status: Z_OK, Z_BUF_ERROR, ... */
+@param strm in/out: compressed stream
+@param flush in: flushing method
+@return deflate() status: Z_OK, Z_BUF_ERROR, ... */
 # define deflate(strm, flush) page_zip_compress_deflate(logfile, strm, flush)
 /** Declaration of the logfile parameter */
 # define FILE_LOGFILE FILE* logfile,
@@ -836,7 +879,7 @@ Log the operation if page_zip_compress_dbg is set.
 
 /**********************************************************************//**
 Compress the records of a node pointer page.
-@return	Z_OK, or a zlib error code */
+@return Z_OK, or a zlib error code */
 static
 int
 page_zip_compress_node_ptrs(
@@ -902,7 +945,7 @@ page_zip_compress_node_ptrs(
 
 /**********************************************************************//**
 Compress the records of a leaf node of a secondary index.
-@return	Z_OK, or a zlib error code */
+@return Z_OK, or a zlib error code */
 static
 int
 page_zip_compress_sec(
@@ -948,7 +991,7 @@ page_zip_compress_sec(
 /**********************************************************************//**
 Compress a record of a leaf node of a clustered index that contains
 externally stored columns.
-@return	Z_OK, or a zlib error code */
+@return Z_OK, or a zlib error code */
 static
 int
 page_zip_compress_clust_ext(
@@ -1075,7 +1118,7 @@ page_zip_compress_clust_ext(
 
 /**********************************************************************//**
 Compress the records of a leaf node of a clustered index.
-@return	Z_OK, or a zlib error code */
+@return Z_OK, or a zlib error code */
 static
 int
 page_zip_compress_clust(
@@ -1205,54 +1248,68 @@ page_zip_compress_clust(
 	} while (--n_dense);
 
 func_exit:
-	return(err);
-}
+	return(err);}
 
 /**********************************************************************//**
 Compress a page.
 @return TRUE on success, FALSE on failure; page_zip will be left
 intact on failure. */
-UNIV_INTERN
 ibool
 page_zip_compress(
 /*==============*/
-	page_zip_des_t*	page_zip,/*!< in: size; out: data, n_blobs,
-				m_start, m_end, m_nonempty */
-	const page_t*	page,	/*!< in: uncompressed page */
-	dict_index_t*	index,	/*!< in: index of the B-tree node */
-	ulint		level,	/*!< in: compression level */
-	mtr_t*		mtr)	/*!< in: mini-transaction, or NULL */
+	page_zip_des_t*		page_zip,	/*!< in: size; out: data,
+						n_blobs, m_start, m_end,
+						m_nonempty */
+	const page_t*		page,		/*!< in: uncompressed page */
+	dict_index_t*		index,		/*!< in: index of the B-tree
+						node */
+	ulint			level,		/*!< in: commpression level */
+	const redo_page_compress_t* page_comp_info,
+						/*!< in: used for applying
+						TRUNCATE log
+						record during recovery */
+	mtr_t*			mtr)		/*!< in/out: mini-transaction,
+						or NULL */
 {
-	z_stream	c_stream;
-	int		err;
-	ulint		n_fields;/* number of index fields needed */
-	byte*		fields;	/*!< index field information */
-	byte*		buf;	/*!< compressed payload of the page */
-	byte*		buf_end;/* end of buf */
-	ulint		n_dense;
-	ulint		slot_size;/* amount of uncompressed bytes per record */
-	const rec_t**	recs;	/*!< dense page directory, sorted by address */
-	mem_heap_t*	heap;
-	ulint		trx_id_col;
-	ulint		n_blobs	= 0;
-	byte*		storage;/* storage of uncompressed columns */
+	z_stream		c_stream;
+	int			err;
+	ulint			n_fields;	/* number of index fields
+						needed */
+	byte*			fields;		/*!< index field information */
+	byte*			buf;		/*!< compressed payload of the
+						page */
+	byte*			buf_end;	/* end of buf */
+	ulint			n_dense;
+	ulint			slot_size;	/* amount of uncompressed bytes
+						per record */
+	const rec_t**		recs;		/*!< dense page directory,
+						sorted by address */
+	mem_heap_t*		heap;
+	ulint			trx_id_col = ULINT_UNDEFINED;
+	ulint			n_blobs	= 0;
+	byte*			storage;	/* storage of uncompressed
+						columns */
+	index_id_t		ind_id;
 #ifndef UNIV_HOTBACKUP
-	ullint		usec = ut_time_us(NULL);
+	uintmax_t		usec = ut_time_us(NULL);
 #endif /* !UNIV_HOTBACKUP */
 #ifdef PAGE_ZIP_COMPRESS_DBG
-	FILE*		logfile = NULL;
+	FILE*			logfile = NULL;
 #endif
 	/* A local copy of srv_cmp_per_index_enabled to avoid reading that
 	variable multiple times in this function since it can be changed at
 	anytime. */
-	my_bool		cmp_per_index_enabled = srv_cmp_per_index_enabled;
+	my_bool			cmp_per_index_enabled;
+	cmp_per_index_enabled	= srv_cmp_per_index_enabled;
 
 	ut_a(page_is_comp(page));
-	ut_a(fil_page_get_type(page) == FIL_PAGE_INDEX);
+	ut_a(fil_page_index_page_check(page));
 	ut_ad(page_simple_validate_new((page_t*) page));
 	ut_ad(page_zip_simple_validate(page_zip));
-	ut_ad(dict_table_is_comp(index->table));
-	ut_ad(!dict_index_is_ibuf(index));
+	ut_ad(!index
+	      || (index
+		  && dict_table_is_comp(index->table)
+		  && !dict_index_is_ibuf(index)));
 
 	UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
 
@@ -1272,21 +1329,30 @@ page_zip_compress(
 		     == PAGE_NEW_SUPREMUM);
 	}
 
-	if (page_is_leaf(page)) {
-		n_fields = dict_index_get_n_fields(index);
+	if (truncate_t::s_fix_up_active) {
+		ut_ad(page_comp_info != NULL);
+		n_fields = page_comp_info->n_fields;
+		ind_id = page_comp_info->index_id;
 	} else {
-		n_fields = dict_index_get_n_unique_in_tree(index);
+		if (page_is_leaf(page)) {
+			n_fields = dict_index_get_n_fields(index);
+		} else {
+			n_fields = dict_index_get_n_unique_in_tree_nonleaf(index);
+		}
+		ind_id = index->id;
 	}
 
 	/* The dense directory excludes the infimum and supremum records. */
 	n_dense = page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW;
 #ifdef PAGE_ZIP_COMPRESS_DBG
 	if (UNIV_UNLIKELY(page_zip_compress_dbg)) {
-		fprintf(stderr, "compress %p %p %lu %lu %lu\n",
-			(void*) page_zip, (void*) page,
-			(ibool) page_is_leaf(page),
-			n_fields, n_dense);
+		ib::info() << "compress "
+			<< static_cast<void*>(page_zip) << " "
+			<< static_cast<const void*>(page) << " "
+			<< page_is_leaf(page) << " "
+			<< n_fields << " " << n_dense;
 	}
+
 	if (UNIV_UNLIKELY(page_zip_compress_log)) {
 		/* Create a log file for every compression attempt. */
 		char	logfilename[9];
@@ -1296,7 +1362,10 @@ page_zip_compress(
 
 		if (logfile) {
 			/* Write the uncompressed page to the log. */
-			fwrite(page, 1, UNIV_PAGE_SIZE, logfile);
+			if (fwrite(page, 1, UNIV_PAGE_SIZE, logfile)
+			    != UNIV_PAGE_SIZE) {
+				perror("fwrite");
+			}
 			/* Record the compressed size as zero.
 			This will be overwritten at successful exit. */
 			putc(0, logfile);
@@ -1310,7 +1379,7 @@ page_zip_compress(
 	page_zip_stat[page_zip->ssize - 1].compressed++;
 	if (cmp_per_index_enabled) {
 		mutex_enter(&page_zip_stat_per_index_mutex);
-		page_zip_stat_per_index[index->id].compressed++;
+		page_zip_stat_per_index[ind_id].compressed++;
 		mutex_exit(&page_zip_stat_per_index_mutex);
 	}
 #endif /* !UNIV_HOTBACKUP */
@@ -1334,13 +1403,17 @@ page_zip_compress(
 	    && strcasecmp(index->table_name, "IBUF_DUMMY") != 0) {
 
 #ifdef UNIV_DEBUG
-		fprintf(stderr,
-			"InnoDB: Simulating a compression failure"
-			" for table %s, index %s, page %lu (%s)\n",
-			index->table_name,
-			index->name,
-			page_get_page_no(page),
-			page_is_leaf(page) ? "leaf" : "non-leaf");
+		ib::error()
+			<< "InnoDB: Simulating a compression failure"
+			<< " for table "
+			<< (index->table->name.m_name)
+			<< " index "
+			<< index->name()
+			<< " page "
+			<< page_get_page_no(page)
+			<< "("
+			<< (page_is_leaf(page) ? "leaf" : "non-leaf")
+			<< ")";
 
 #endif
 
@@ -1374,25 +1447,38 @@ page_zip_compress(
 	ut_a(err == Z_OK);
 
 	c_stream.next_out = buf;
+
 	/* Subtract the space reserved for uncompressed data. */
 	/* Page header and the end marker of the modification log */
 	c_stream.avail_out = static_cast<uInt>(buf_end - buf - 1);
 
 	/* Dense page directory and uncompressed columns, if any */
 	if (page_is_leaf(page)) {
-		if (dict_index_is_clust(index)) {
-			trx_id_col = dict_index_get_sys_col_pos(
-				index, DATA_TRX_ID);
-			ut_ad(trx_id_col > 0);
-			ut_ad(trx_id_col != ULINT_UNDEFINED);
+		if ((index && dict_index_is_clust(index))
+		    || (page_comp_info
+			&& (page_comp_info->type & DICT_CLUSTERED))) {
+
+			if (index) {
+				trx_id_col = dict_index_get_sys_col_pos(
+					index, DATA_TRX_ID);
+				ut_ad(trx_id_col > 0);
+				ut_ad(trx_id_col != ULINT_UNDEFINED);
+			} else if (page_comp_info
+				   && (page_comp_info->type
+				       & DICT_CLUSTERED)) {
+				trx_id_col = page_comp_info->trx_id_pos;
+			}
 
 			slot_size = PAGE_ZIP_DIR_SLOT_SIZE
 				+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
+
 		} else {
 			/* Signal the absence of trx_id
 			in page_zip_fields_encode() */
-			ut_ad(dict_index_get_sys_col_pos(index, DATA_TRX_ID)
-			      == ULINT_UNDEFINED);
+			if (index) {
+				ut_ad(dict_index_get_sys_col_pos(
+					index, DATA_TRX_ID) == ULINT_UNDEFINED);
+			}
 			trx_id_col = 0;
 			slot_size = PAGE_ZIP_DIR_SLOT_SIZE;
 		}
@@ -1407,9 +1493,20 @@ page_zip_compress(
 	}
 
 	c_stream.avail_out -= static_cast<uInt>(n_dense * slot_size);
-	c_stream.avail_in = static_cast<uInt>(
-		page_zip_fields_encode(n_fields, index, trx_id_col, fields));
+	if (truncate_t::s_fix_up_active) {
+		ut_ad(page_comp_info != NULL);
+		c_stream.avail_in = static_cast<uInt>(
+			page_comp_info->field_len);
+		for (ulint i = 0; i < page_comp_info->field_len; i++) {
+			fields[i] = page_comp_info->fields[i];
+		}
+	} else {
+		c_stream.avail_in = static_cast<uInt>(
+			page_zip_fields_encode(
+				n_fields, index, trx_id_col, fields));
+	}
 	c_stream.next_in = fields;
+
 	if (UNIV_LIKELY(!trx_id_col)) {
 		trx_id_col = ULINT_UNDEFINED;
 	}
@@ -1482,16 +1579,16 @@ err_exit:
 		}
 #endif /* PAGE_ZIP_COMPRESS_DBG */
 #ifndef UNIV_HOTBACKUP
-		if (page_is_leaf(page)) {
+		if (page_is_leaf(page) && index) {
 			dict_index_zip_failure(index);
 		}
 
-		ullint	time_diff = ut_time_us(NULL) - usec;
+		uintmax_t	time_diff = ut_time_us(NULL) - usec;
 		page_zip_stat[page_zip->ssize - 1].compressed_usec
 			+= time_diff;
 		if (cmp_per_index_enabled) {
 			mutex_enter(&page_zip_stat_per_index_mutex);
-			page_zip_stat_per_index[index->id].compressed_usec
+			page_zip_stat_per_index[ind_id].compressed_usec
 				+= time_diff;
 			mutex_exit(&page_zip_stat_per_index_mutex);
 		}
@@ -1549,22 +1646,24 @@ err_exit:
 		byte sz[4];
 		mach_write_to_4(sz, c_stream.total_out);
 		fseek(logfile, UNIV_PAGE_SIZE, SEEK_SET);
-		fwrite(sz, 1, sizeof sz, logfile);
+		if (fwrite(sz, 1, sizeof sz, logfile) != sizeof sz) {
+			perror("fwrite");
+		}
 		fclose(logfile);
 	}
 #endif /* PAGE_ZIP_COMPRESS_DBG */
 #ifndef UNIV_HOTBACKUP
-	ullint	time_diff = ut_time_us(NULL) - usec;
+	uintmax_t	time_diff = ut_time_us(NULL) - usec;
 	page_zip_stat[page_zip->ssize - 1].compressed_ok++;
 	page_zip_stat[page_zip->ssize - 1].compressed_usec += time_diff;
 	if (cmp_per_index_enabled) {
 		mutex_enter(&page_zip_stat_per_index_mutex);
-		page_zip_stat_per_index[index->id].compressed_ok++;
-		page_zip_stat_per_index[index->id].compressed_usec += time_diff;
+		page_zip_stat_per_index[ind_id].compressed_ok++;
+		page_zip_stat_per_index[ind_id].compressed_usec += time_diff;
 		mutex_exit(&page_zip_stat_per_index_mutex);
 	}
 
-	if (page_is_leaf(page)) {
+	if (page_is_leaf(page) && !truncate_t::s_fix_up_active) {
 		dict_index_zip_success(index);
 	}
 #endif /* !UNIV_HOTBACKUP */
@@ -1572,34 +1671,6 @@ err_exit:
 	return(TRUE);
 }
 
-/**********************************************************************//**
-Compare two page directory entries.
-@return	positive if rec1 > rec2 */
-UNIV_INLINE
-ibool
-page_zip_dir_cmp(
-/*=============*/
-	const rec_t*	rec1,	/*!< in: rec1 */
-	const rec_t*	rec2)	/*!< in: rec2 */
-{
-	return(rec1 > rec2);
-}
-
-/**********************************************************************//**
-Sort the dense page directory by address (heap_no). */
-static
-void
-page_zip_dir_sort(
-/*==============*/
-	rec_t**	arr,	/*!< in/out: dense page directory */
-	rec_t**	aux_arr,/*!< in/out: work area */
-	ulint	low,	/*!< in: lower bound of the sorting area, inclusive */
-	ulint	high)	/*!< in: upper bound of the sorting area, exclusive */
-{
-	UT_SORT_FUNCTION_BODY(page_zip_dir_sort, arr, aux_arr, low, high,
-			      page_zip_dir_cmp);
-}
-
 /**********************************************************************//**
 Deallocate the index information initialized by page_zip_fields_decode(). */
 static
@@ -1619,16 +1690,17 @@ page_zip_fields_free(
 
 /**********************************************************************//**
 Read the index information for the compressed page.
-@return	own: dummy index describing the page, or NULL on error */
+@return own: dummy index describing the page, or NULL on error */
 static
 dict_index_t*
 page_zip_fields_decode(
 /*===================*/
 	const byte*	buf,	/*!< in: index information */
 	const byte*	end,	/*!< in: end of buf */
-	ulint*		trx_id_col)/*!< in: NULL for non-leaf pages;
+	ulint*		trx_id_col,/*!< in: NULL for non-leaf pages;
 				for leaf pages, pointer to where to store
 				the position of the trx_id column */
+	bool		is_spatial)/*< in: is spatial index or not */
 {
 	const byte*	b;
 	ulint		n;
@@ -1660,7 +1732,7 @@ page_zip_fields_decode(
 		return(NULL);
 	}
 
-	table = dict_mem_table_create("ZIP_DUMMY", DICT_HDR_SPACE, n,
+	table = dict_mem_table_create("ZIP_DUMMY", DICT_HDR_SPACE, n, 0,
 				      DICT_TF_COMPACT, 0);
 	index = dict_mem_index_create("ZIP_DUMMY", "ZIP_DUMMY",
 				      DICT_HDR_SPACE, 0, n);
@@ -1730,13 +1802,17 @@ page_zip_fields_decode(
 
 	ut_ad(b == end);
 
+	if (is_spatial) {
+		index->type |= DICT_SPATIAL;
+	}
+
 	return(index);
 }
 
 /**********************************************************************//**
 Populate the sparse page directory from the dense directory.
-@return	TRUE on success, FALSE on failure */
-static
+@return TRUE on success, FALSE on failure */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
 ibool
 page_zip_dir_decode(
 /*================*/
@@ -1747,9 +1823,8 @@ page_zip_dir_decode(
 					filled in */
 	rec_t**			recs,	/*!< out: dense page directory sorted by
 					ascending address (and heap_no) */
-	rec_t**			recs_aux,/*!< in/out: scratch area */
 	ulint			n_dense)/*!< in: number of user records, and
-					size of recs[] and recs_aux[] */
+					size of recs[] */
 {
 	ulint	i;
 	ulint	n_recs;
@@ -1824,15 +1899,13 @@ page_zip_dir_decode(
 		recs[i] = page + offs;
 	}
 
-	if (UNIV_LIKELY(n_dense > 1)) {
-		page_zip_dir_sort(recs, recs_aux, 0, n_dense);
-	}
+	std::sort(recs, recs + n_dense);
 	return(TRUE);
 }
 
 /**********************************************************************//**
 Initialize the REC_N_NEW_EXTRA_BYTES of each record.
-@return	TRUE on success, FALSE on failure */
+@return TRUE on success, FALSE on failure */
 static
 ibool
 page_zip_set_extra_bytes(
@@ -1930,7 +2003,7 @@ page_zip_set_extra_bytes(
 /**********************************************************************//**
 Apply the modification log to a record containing externally stored
 columns.  Do not copy the fields that are stored separately.
-@return	pointer to modification log, or NULL on failure */
+@return pointer to modification log, or NULL on failure */
 static
 const byte*
 page_zip_apply_log_ext(
@@ -1985,8 +2058,8 @@ page_zip_apply_log_ext(
 				- BTR_EXTERN_FIELD_REF_SIZE;
 
 			if (UNIV_UNLIKELY(data + len >= end)) {
-				page_zip_fail(("page_zip_apply_log_ext: "
-					       "ext %p+%lu >= %p\n",
+				page_zip_fail(("page_zip_apply_log_ext:"
+					       " ext %p+%lu >= %p\n",
 					       (const void*) data,
 					       (ulong) len,
 					       (const void*) end));
@@ -2003,8 +2076,8 @@ page_zip_apply_log_ext(
 	/* Copy the last bytes of the record. */
 	len = rec_get_end(rec, offsets) - next_out;
 	if (UNIV_UNLIKELY(data + len >= end)) {
-		page_zip_fail(("page_zip_apply_log_ext: "
-			       "last %p+%lu >= %p\n",
+		page_zip_fail(("page_zip_apply_log_ext:"
+			       " last %p+%lu >= %p\n",
 			       (const void*) data,
 			       (ulong) len,
 			       (const void*) end));
@@ -2019,7 +2092,7 @@ page_zip_apply_log_ext(
 /**********************************************************************//**
 Apply the modification log to an uncompressed page.
 Do not copy the fields that are stored separately.
-@return	pointer to end of modification log, or NULL on failure */
+@return pointer to end of modification log, or NULL on failure */
 static
 const byte*
 page_zip_apply_log(
@@ -2137,8 +2210,8 @@ page_zip_apply_log(
 			/* Non-leaf nodes should not contain any
 			externally stored columns. */
 			if (UNIV_UNLIKELY(hs & REC_STATUS_NODE_PTR)) {
-				page_zip_fail(("page_zip_apply_log: "
-					       "%lu&REC_STATUS_NODE_PTR\n",
+				page_zip_fail(("page_zip_apply_log:"
+					       " %lu&REC_STATUS_NODE_PTR\n",
 					       (ulong) hs));
 				return(NULL);
 			}
@@ -2154,8 +2227,8 @@ page_zip_apply_log(
 				- REC_NODE_PTR_SIZE;
 			/* Copy the data bytes, except node_ptr. */
 			if (UNIV_UNLIKELY(data + len >= end)) {
-				page_zip_fail(("page_zip_apply_log: "
-					       "node_ptr %p+%lu >= %p\n",
+				page_zip_fail(("page_zip_apply_log:"
+					       " node_ptr %p+%lu >= %p\n",
 					       (const void*) data,
 					       (ulong) len,
 					       (const void*) end));
@@ -2169,8 +2242,8 @@ page_zip_apply_log(
 			/* Copy all data bytes of
 			a record in a secondary index. */
 			if (UNIV_UNLIKELY(data + len >= end)) {
-				page_zip_fail(("page_zip_apply_log: "
-					       "sec %p+%lu >= %p\n",
+				page_zip_fail(("page_zip_apply_log:"
+					       " sec %p+%lu >= %p\n",
 					       (const void*) data,
 					       (ulong) len,
 					       (const void*) end));
@@ -2188,8 +2261,8 @@ page_zip_apply_log(
 			if (UNIV_UNLIKELY(data + l >= end)
 			    || UNIV_UNLIKELY(len < (DATA_TRX_ID_LEN
 						    + DATA_ROLL_PTR_LEN))) {
-				page_zip_fail(("page_zip_apply_log: "
-					       "trx_id %p+%lu >= %p\n",
+				page_zip_fail(("page_zip_apply_log:"
+					       " trx_id %p+%lu >= %p\n",
 					       (const void*) data,
 					       (ulong) l,
 					       (const void*) end));
@@ -2204,8 +2277,8 @@ page_zip_apply_log(
 			b = rec + l + (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
 			len = rec_get_end(rec, offsets) - b;
 			if (UNIV_UNLIKELY(data + len >= end)) {
-				page_zip_fail(("page_zip_apply_log: "
-					       "clust %p+%lu >= %p\n",
+				page_zip_fail(("page_zip_apply_log:"
+					       " clust %p+%lu >= %p\n",
 					       (const void*) data,
 					       (ulong) len,
 					       (const void*) end));
@@ -2220,7 +2293,7 @@ page_zip_apply_log(
 /**********************************************************************//**
 Set the heap_no in a record, and skip the fixed-size record header
 that is not included in the d_stream.
-@return	TRUE on success, FALSE if d_stream does not end at rec */
+@return TRUE on success, FALSE if d_stream does not end at rec */
 static
 ibool
 page_zip_decompress_heap_no(
@@ -2245,7 +2318,7 @@ page_zip_decompress_heap_no(
 
 /**********************************************************************//**
 Decompress the records of a node pointer page.
-@return	TRUE on success, FALSE on failure */
+@return TRUE on success, FALSE on failure */
 static
 ibool
 page_zip_decompress_node_ptrs(
@@ -2434,7 +2507,7 @@ zlib_done:
 
 /**********************************************************************//**
 Decompress the records of a leaf node of a secondary index.
-@return	TRUE on success, FALSE on failure */
+@return TRUE on success, FALSE on failure */
 static
 ibool
 page_zip_decompress_sec(
@@ -2572,7 +2645,7 @@ zlib_done:
 /**********************************************************************//**
 Decompress a record of a leaf node of a clustered index that contains
 externally stored columns.
-@return	TRUE on success */
+@return TRUE on success */
 static
 ibool
 page_zip_decompress_clust_ext(
@@ -2682,7 +2755,7 @@ page_zip_decompress_clust_ext(
 
 /**********************************************************************//**
 Compress the records of a leaf node of a clustered index.
-@return	TRUE on success, FALSE on failure */
+@return TRUE on success, FALSE on failure */
 static
 ibool
 page_zip_decompress_clust(
@@ -2708,9 +2781,7 @@ page_zip_decompress_clust(
 
 	/* Subtract the space reserved for uncompressed data. */
 	d_stream->avail_in -= static_cast<uInt>(n_dense)
-			    * (PAGE_ZIP_DIR_SLOT_SIZE
-			      + DATA_TRX_ID_LEN
-			      + DATA_ROLL_PTR_LEN);
+			    * (PAGE_ZIP_CLUST_LEAF_SLOT_SIZE);
 
 	/* Decompress the records in heap_no order. */
 	for (slot = 0; slot < n_dense; slot++) {
@@ -2952,8 +3023,8 @@ zlib_done:
 				    (externs < page_zip->data
 				     + page_zip->m_end)) {
 					page_zip_fail(("page_zip_"
-						       "decompress_clust: "
-						       "%p < %p + %lu\n",
+						       "decompress_clust:"
+						       " %p < %p + %lu\n",
 						       (const void*) externs,
 						       (const void*)
 						       page_zip->data,
@@ -2982,11 +3053,11 @@ zlib_done:
 Decompress a page.  This function should tolerate errors on the compressed
 page.  Instead of letting assertions fail, it will return FALSE if an
 inconsistency is detected.
-@return	TRUE on success, FALSE on failure */
-UNIV_INTERN
+@return TRUE on success, FALSE on failure */
+static
 ibool
-page_zip_decompress(
-/*================*/
+page_zip_decompress_low(
+/*====================*/
 	page_zip_des_t*	page_zip,/*!< in: data, ssize;
 				out: m_start, m_end, m_nonempty, n_blobs */
 	page_t*		page,	/*!< out: uncompressed page, may be trashed */
@@ -3002,9 +3073,6 @@ page_zip_decompress(
 	ulint		trx_id_col = ULINT_UNDEFINED;
 	mem_heap_t*	heap;
 	ulint*		offsets;
-#ifndef UNIV_HOTBACKUP
-	ullint		usec = ut_time_us(NULL);
-#endif /* !UNIV_HOTBACKUP */
 
 	ut_ad(page_zip_simple_validate(page_zip));
 	UNIV_MEM_ASSERT_W(page, UNIV_PAGE_SIZE);
@@ -3023,7 +3091,7 @@ page_zip_decompress(
 	heap = mem_heap_create(n_dense * (3 * sizeof *recs) + UNIV_PAGE_SIZE);
 
 	recs = static_cast<rec_t**>(
-		mem_heap_alloc(heap, n_dense * (2 * sizeof *recs)));
+		mem_heap_alloc(heap, n_dense * sizeof *recs));
 
 	if (all) {
 		/* Copy the page header. */
@@ -3058,7 +3126,7 @@ page_zip_decompress(
 
 	/* Copy the page directory. */
 	if (UNIV_UNLIKELY(!page_zip_dir_decode(page_zip, page, recs,
-					       recs + n_dense, n_dense))) {
+					       n_dense))) {
 zlib_error:
 		mem_heap_free(heap);
 		return(FALSE);
@@ -3111,7 +3179,8 @@ zlib_error:
 
 	index = page_zip_fields_decode(
 		page + PAGE_ZIP_START, d_stream.next_out,
-		page_is_leaf(page) ? &trx_id_col : NULL);
+		page_is_leaf(page) ? &trx_id_col : NULL,
+		fil_page_get_type(page) == FIL_PAGE_RTREE);
 
 	if (UNIV_UNLIKELY(!index)) {
 
@@ -3188,8 +3257,36 @@ err_exit:
 
 	page_zip_fields_free(index);
 	mem_heap_free(heap);
+
+	return(TRUE);
+}
+
+/**********************************************************************//**
+Decompress a page.  This function should tolerate errors on the compressed
+page.  Instead of letting assertions fail, it will return FALSE if an
+inconsistency is detected.
+@return TRUE on success, FALSE on failure */
+ibool
+page_zip_decompress(
+/*================*/
+	page_zip_des_t*	page_zip,/*!< in: data, ssize;
+				out: m_start, m_end, m_nonempty, n_blobs */
+	page_t*		page,	/*!< out: uncompressed page, may be trashed */
+	ibool		all)	/*!< in: TRUE=decompress the whole page;
+				FALSE=verify but do not copy some
+				page header fields that should not change
+				after page creation */
+{
 #ifndef UNIV_HOTBACKUP
-	ullint	time_diff = ut_time_us(NULL) - usec;
+	uintmax_t	usec = ut_time_us(NULL);
+#endif /* !UNIV_HOTBACKUP */
+
+	if (!page_zip_decompress_low(page_zip, page, all)) {
+		return(FALSE);
+	}
+
+#ifndef UNIV_HOTBACKUP
+	uintmax_t	time_diff = ut_time_us(NULL) - usec;
 	page_zip_stat[page_zip->ssize - 1].decompressed++;
 	page_zip_stat[page_zip->ssize - 1].decompressed_usec += time_diff;
 
@@ -3244,17 +3341,16 @@ page_zip_hexdump_func(
 }
 
 /** Dump a block of memory on the standard error stream.
-@param buf	in: data
-@param size	in: length of the data, in bytes */
+@param buf in: data
+@param size in: length of the data, in bytes */
 #define page_zip_hexdump(buf, size) page_zip_hexdump_func(#buf, buf, size)
 
 /** Flag: make page_zip_validate() compare page headers only */
-UNIV_INTERN ibool	page_zip_validate_header_only = FALSE;
+ibool	page_zip_validate_header_only = FALSE;
 
 /**********************************************************************//**
 Check that the compressed and decompressed pages match.
-@return	TRUE if valid, FALSE if not */
-UNIV_INTERN
+@return TRUE if valid, FALSE if not */
 ibool
 page_zip_validate_low(
 /*==================*/
@@ -3289,14 +3385,14 @@ page_zip_validate_low(
 
 	/* page_zip_decompress() expects the uncompressed page to be
 	UNIV_PAGE_SIZE aligned. */
-	temp_page_buf = static_cast<byte*>(ut_malloc(2 * UNIV_PAGE_SIZE));
+	temp_page_buf = static_cast<byte*>(ut_malloc_nokey(2 * UNIV_PAGE_SIZE));
 	temp_page = static_cast<byte*>(ut_align(temp_page_buf, UNIV_PAGE_SIZE));
 
 	UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
 	UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
 
 	temp_page_zip = *page_zip;
-	valid = page_zip_decompress(&temp_page_zip, temp_page, TRUE);
+	valid = page_zip_decompress_low(&temp_page_zip, temp_page, TRUE);
 	if (!valid) {
 		fputs("page_zip_validate(): failed to decompress\n", stderr);
 		goto func_exit;
@@ -3354,15 +3450,23 @@ page_zip_validate_low(
 
 				/* Only the minimum record flag
 				differed.  Let us ignore it. */
-				page_zip_fail(("page_zip_validate: "
-					       "min_rec_flag "
-					       "(%s"
-					       "%lu,%lu,0x%02lx)\n",
+				page_zip_fail(("page_zip_validate:"
+					       " min_rec_flag"
+					       " (%s%lu,%lu,0x%02lx)\n",
 					       sloppy ? "ignored, " : "",
 					       page_get_space_id(page),
 					       page_get_page_no(page),
 					       (ulong) page[offset]));
-				valid = sloppy;
+				/* We don't check for spatial index, since
+				the "minimum record" could be deleted when
+				doing rtr_update_mbr_field.
+				GIS_FIXME: need to validate why
+				rtr_update_mbr_field.() could affect this */
+				if (index && dict_index_is_spatial(index)) {
+					valid = true;
+				} else {
+					valid = sloppy;
+				}
 				goto func_exit;
 			}
 		}
@@ -3373,8 +3477,8 @@ page_zip_validate_low(
 
 		while (rec || trec) {
 			if (page_offset(rec) != page_offset(trec)) {
-				page_zip_fail(("page_zip_validate: "
-					       "PAGE_FREE list: %u!=%u\n",
+				page_zip_fail(("page_zip_validate:"
+					       " PAGE_FREE list: %u!=%u\n",
 					       (unsigned) page_offset(rec),
 					       (unsigned) page_offset(trec)));
 				valid = FALSE;
@@ -3395,8 +3499,8 @@ page_zip_validate_low(
 
 		do {
 			if (page_offset(rec) != page_offset(trec)) {
-				page_zip_fail(("page_zip_validate: "
-					       "record list: 0x%02x!=0x%02x\n",
+				page_zip_fail(("page_zip_validate:"
+					       " record list: 0x%02x!=0x%02x\n",
 					       (unsigned) page_offset(rec),
 					       (unsigned) page_offset(trec)));
 				valid = FALSE;
@@ -3413,8 +3517,8 @@ page_zip_validate_low(
 					   trec - rec_offs_extra_size(offsets),
 					   rec_offs_size(offsets))) {
 					page_zip_fail(
-						("page_zip_validate: "
-						 "record content: 0x%02x",
+						("page_zip_validate:"
+						 " record content: 0x%02x",
 						 (unsigned) page_offset(rec)));
 					valid = FALSE;
 					break;
@@ -3443,8 +3547,7 @@ func_exit:
 
 /**********************************************************************//**
 Check that the compressed and decompressed pages match.
-@return	TRUE if valid, FALSE if not */
-UNIV_INTERN
+@return TRUE if valid, FALSE if not */
 ibool
 page_zip_validate(
 /*==============*/
@@ -3460,7 +3563,7 @@ page_zip_validate(
 #ifdef UNIV_DEBUG
 /**********************************************************************//**
 Assert that the compressed and decompressed page headers match.
-@return	TRUE */
+@return TRUE */
 static
 ibool
 page_zip_header_cmp(
@@ -3482,7 +3585,7 @@ page_zip_header_cmp(
 /**********************************************************************//**
 Write a record on the compressed page that contains externally stored
 columns.  The data must already have been written to the uncompressed page.
-@return	end of modification log */
+@return end of modification log */
 static
 byte*
 page_zip_write_rec_ext(
@@ -3604,7 +3707,6 @@ page_zip_write_rec_ext(
 /**********************************************************************//**
 Write an entire record on the compressed page.  The data must already
 have been written to the uncompressed page. */
-UNIV_INTERN
 void
 page_zip_write_rec(
 /*===============*/
@@ -3620,7 +3722,6 @@ page_zip_write_rec(
 	ulint		heap_no;
 	byte*		slot;
 
-	ut_ad(PAGE_ZIP_MATCH(rec, page_zip));
 	ut_ad(page_zip_simple_validate(page_zip));
 	ut_ad(page_zip_get_size(page_zip)
 	      > PAGE_DATA + page_zip_dir_size(page_zip));
@@ -3789,8 +3890,7 @@ page_zip_write_rec(
 
 /***********************************************************//**
 Parses a log record of writing a BLOB pointer of a record.
-@return	end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
 byte*
 page_zip_parse_write_blob_ptr(
 /*==========================*/
@@ -3802,6 +3902,8 @@ page_zip_parse_write_blob_ptr(
 	ulint	offset;
 	ulint	z_offset;
 
+	ut_ad(ptr != NULL);
+	ut_ad(end_ptr != NULL);
 	ut_ad(!page == !page_zip);
 
 	if (UNIV_UNLIKELY
@@ -3813,9 +3915,9 @@ page_zip_parse_write_blob_ptr(
 	offset = mach_read_from_2(ptr);
 	z_offset = mach_read_from_2(ptr + 2);
 
-	if (UNIV_UNLIKELY(offset < PAGE_ZIP_START)
-	    || UNIV_UNLIKELY(offset >= UNIV_PAGE_SIZE)
-	    || UNIV_UNLIKELY(z_offset >= UNIV_PAGE_SIZE)) {
+	if (offset < PAGE_ZIP_START
+	    || offset >= UNIV_PAGE_SIZE
+	    || z_offset >= UNIV_PAGE_SIZE) {
 corrupt:
 		recv_sys->found_corrupt_log = TRUE;
 
@@ -3823,8 +3925,8 @@ corrupt:
 	}
 
 	if (page) {
-		if (UNIV_UNLIKELY(!page_zip)
-		    || UNIV_UNLIKELY(!page_is_leaf(page))) {
+
+		if (!page_zip || !page_is_leaf(page)) {
 
 			goto corrupt;
 		}
@@ -3849,7 +3951,6 @@ corrupt:
 /**********************************************************************//**
 Write a BLOB pointer of a record on the leaf page of a clustered index.
 The information must already have been updated on the uncompressed page. */
-UNIV_INTERN
 void
 page_zip_write_blob_ptr(
 /*====================*/
@@ -3868,7 +3969,10 @@ page_zip_write_blob_ptr(
 	ulint		blob_no;
 	ulint		len;
 
-	ut_ad(PAGE_ZIP_MATCH(rec, page_zip));
+	ut_ad(page_zip != NULL);
+	ut_ad(rec != NULL);
+	ut_ad(index != NULL);
+	ut_ad(offsets != NULL);
 	ut_ad(page_simple_validate_new((page_t*) page));
 	ut_ad(page_zip_simple_validate(page_zip));
 	ut_ad(page_zip_get_size(page_zip)
@@ -3895,8 +3999,7 @@ page_zip_write_blob_ptr(
 
 	externs = page_zip->data + page_zip_get_size(page_zip)
 		- (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
-		* (PAGE_ZIP_DIR_SLOT_SIZE
-		   + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
+		* PAGE_ZIP_CLUST_LEAF_SLOT_SIZE;
 
 	field = rec_get_nth_field(rec, offsets, n, &len);
 
@@ -3932,8 +4035,7 @@ page_zip_write_blob_ptr(
 
 /***********************************************************//**
 Parses a log record of writing the node pointer of a record.
-@return	end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
 byte*
 page_zip_parse_write_node_ptr(
 /*==========================*/
@@ -3945,6 +4047,8 @@ page_zip_parse_write_node_ptr(
 	ulint	offset;
 	ulint	z_offset;
 
+	ut_ad(ptr != NULL);
+	ut_ad(end_ptr!= NULL);
 	ut_ad(!page == !page_zip);
 
 	if (UNIV_UNLIKELY(end_ptr < ptr + (2 + 2 + REC_NODE_PTR_SIZE))) {
@@ -3955,9 +4059,9 @@ page_zip_parse_write_node_ptr(
 	offset = mach_read_from_2(ptr);
 	z_offset = mach_read_from_2(ptr + 2);
 
-	if (UNIV_UNLIKELY(offset < PAGE_ZIP_START)
-	    || UNIV_UNLIKELY(offset >= UNIV_PAGE_SIZE)
-	    || UNIV_UNLIKELY(z_offset >= UNIV_PAGE_SIZE)) {
+	if (offset < PAGE_ZIP_START
+	    || offset >= UNIV_PAGE_SIZE
+	    || z_offset >= UNIV_PAGE_SIZE) {
 corrupt:
 		recv_sys->found_corrupt_log = TRUE;
 
@@ -3970,8 +4074,7 @@ corrupt:
 		byte*	storage;
 		ulint	heap_no;
 
-		if (UNIV_UNLIKELY(!page_zip)
-		    || UNIV_UNLIKELY(page_is_leaf(page))) {
+		if (!page_zip || page_is_leaf(page)) {
 
 			goto corrupt;
 		}
@@ -4007,7 +4110,6 @@ corrupt:
 
 /**********************************************************************//**
 Write the node pointer of a record on a non-leaf compressed page. */
-UNIV_INTERN
 void
 page_zip_write_node_ptr(
 /*====================*/
@@ -4023,7 +4125,6 @@ page_zip_write_node_ptr(
 	page_t*	page	= page_align(rec);
 #endif /* UNIV_DEBUG */
 
-	ut_ad(PAGE_ZIP_MATCH(rec, page_zip));
 	ut_ad(page_simple_validate_new(page));
 	ut_ad(page_zip_simple_validate(page_zip));
 	ut_ad(page_zip_get_size(page_zip)
@@ -4074,7 +4175,6 @@ page_zip_write_node_ptr(
 
 /**********************************************************************//**
 Write the trx_id and roll_ptr of a record on a B-tree leaf node page. */
-UNIV_INTERN
 void
 page_zip_write_trx_id_and_roll_ptr(
 /*===============================*/
@@ -4092,8 +4192,6 @@ page_zip_write_trx_id_and_roll_ptr(
 #endif /* UNIV_DEBUG */
 	ulint	len;
 
-	ut_ad(PAGE_ZIP_MATCH(rec, page_zip));
-
 	ut_ad(page_simple_validate_new(page));
 	ut_ad(page_zip_simple_validate(page_zip));
 	ut_ad(page_zip_get_size(page_zip)
@@ -4176,7 +4274,7 @@ page_zip_clear_rec(
 		there is an array of node_ptr immediately before the
 		dense page directory, at the very end of the page. */
 		storage	= page_zip_dir_start(page_zip);
-		ut_ad(dict_index_get_n_unique_in_tree(index) ==
+		ut_ad(dict_index_get_n_unique_in_tree_nonleaf(index) ==
 		      rec_offs_n_fields(offsets) - 1);
 		field	= rec_get_nth_field(rec, offsets,
 					    rec_offs_n_fields(offsets) - 1,
@@ -4233,7 +4331,6 @@ page_zip_clear_rec(
 /**********************************************************************//**
 Write the "deleted" flag of a record on a compressed page.  The flag must
 already have been written on the uncompressed page. */
-UNIV_INTERN
 void
 page_zip_rec_set_deleted(
 /*=====================*/
@@ -4257,7 +4354,6 @@ page_zip_rec_set_deleted(
 /**********************************************************************//**
 Write the "owned" flag of a record on a compressed page.  The n_owned field
 must already have been written on the uncompressed page. */
-UNIV_INTERN
 void
 page_zip_rec_set_owned(
 /*===================*/
@@ -4277,7 +4373,6 @@ page_zip_rec_set_owned(
 
 /**********************************************************************//**
 Insert a record to the dense page directory. */
-UNIV_INTERN
 void
 page_zip_dir_insert(
 /*================*/
@@ -4356,7 +4451,6 @@ page_zip_dir_insert(
 /**********************************************************************//**
 Shift the dense page directory and the array of BLOB pointers
 when a record is deleted. */
-UNIV_INTERN
 void
 page_zip_dir_delete(
 /*================*/
@@ -4429,8 +4523,7 @@ page_zip_dir_delete(
 
 		externs = page_zip->data + page_zip_get_size(page_zip)
 			- (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
-			* (PAGE_ZIP_DIR_SLOT_SIZE
-			   + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
+			* PAGE_ZIP_CLUST_LEAF_SLOT_SIZE;
 
 		ext_end = externs - page_zip->n_blobs
 			* BTR_EXTERN_FIELD_REF_SIZE;
@@ -4454,7 +4547,6 @@ skip_blobs:
 
 /**********************************************************************//**
 Add a slot to the dense page directory. */
-UNIV_INTERN
 void
 page_zip_dir_add_slot(
 /*==================*/
@@ -4488,19 +4580,15 @@ page_zip_dir_add_slot(
 			* (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
 		externs = stored
 			- page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE;
-		ASSERT_ZERO(externs
-			    - (PAGE_ZIP_DIR_SLOT_SIZE
-			       + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN),
-			    PAGE_ZIP_DIR_SLOT_SIZE
-			    + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
-		memmove(externs - (PAGE_ZIP_DIR_SLOT_SIZE
-				   + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN),
+		ASSERT_ZERO(externs - PAGE_ZIP_CLUST_LEAF_SLOT_SIZE,
+			               PAGE_ZIP_CLUST_LEAF_SLOT_SIZE);
+		memmove(externs - PAGE_ZIP_CLUST_LEAF_SLOT_SIZE,
 			externs, stored - externs);
 	} else {
 		stored = dir
 			- page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE;
 		ASSERT_ZERO(stored - PAGE_ZIP_DIR_SLOT_SIZE,
-			    PAGE_ZIP_DIR_SLOT_SIZE);
+			    static_cast<size_t>(PAGE_ZIP_DIR_SLOT_SIZE));
 	}
 
 	/* Move the uncompressed area backwards to make space
@@ -4510,8 +4598,7 @@ page_zip_dir_add_slot(
 
 /***********************************************************//**
 Parses a log record of writing to the header of a page.
-@return	end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
 byte*
 page_zip_parse_write_header(
 /*========================*/
@@ -4523,7 +4610,8 @@ page_zip_parse_write_header(
 	ulint	offset;
 	ulint	len;
 
-	ut_ad(ptr && end_ptr);
+	ut_ad(ptr != NULL);
+	ut_ad(end_ptr!= NULL);
 	ut_ad(!page == !page_zip);
 
 	if (UNIV_UNLIKELY(end_ptr < ptr + (1 + 1))) {
@@ -4534,20 +4622,20 @@ page_zip_parse_write_header(
 	offset = (ulint) *ptr++;
 	len = (ulint) *ptr++;
 
-	if (UNIV_UNLIKELY(!len) || UNIV_UNLIKELY(offset + len >= PAGE_DATA)) {
+	if (len == 0 || offset + len >= PAGE_DATA) {
 corrupt:
 		recv_sys->found_corrupt_log = TRUE;
 
 		return(NULL);
 	}
 
-	if (UNIV_UNLIKELY(end_ptr < ptr + len)) {
+	if (end_ptr < ptr + len) {
 
 		return(NULL);
 	}
 
 	if (page) {
-		if (UNIV_UNLIKELY(!page_zip)) {
+		if (!page_zip) {
 
 			goto corrupt;
 		}
@@ -4569,7 +4657,6 @@ corrupt:
 #ifndef UNIV_HOTBACKUP
 /**********************************************************************//**
 Write a log record of writing to the uncompressed header portion of a page. */
-UNIV_INTERN
 void
 page_zip_write_header_log(
 /*======================*/
@@ -4614,7 +4701,6 @@ bits in the same mini-transaction in such a way that the modification
 will be redo-logged.
 @return TRUE on success, FALSE on failure; page_zip will be left
 intact on failure, but page will be overwritten. */
-UNIV_INTERN
 ibool
 page_zip_reorganize(
 /*================*/
@@ -4632,7 +4718,6 @@ page_zip_reorganize(
 	page_t*		page		= buf_block_get_frame(block);
 	buf_block_t*	temp_block;
 	page_t*		temp_page;
-	ulint		log_mode;
 
 	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
 	ut_ad(page_is_comp(page));
@@ -4642,12 +4727,11 @@ page_zip_reorganize(
 	UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
 
 	/* Disable logging */
-	log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
+	mtr_log_t	log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
 
 #ifndef UNIV_HOTBACKUP
 	temp_block = buf_block_alloc(buf_pool);
 	btr_search_drop_page_hash_index(block);
-	block->check_index_page_at_flush = TRUE;
 #else /* !UNIV_HOTBACKUP */
 	ut_ad(block == back_block1);
 	temp_block = back_block2;
@@ -4657,12 +4741,10 @@ page_zip_reorganize(
 	/* Copy the old page to temporary space */
 	buf_frame_copy(temp_page, page);
 
-	btr_blob_dbg_remove(page, index, "zip_reorg");
-
 	/* Recreate the page: note that global data on page (possible
 	segment headers, next page-field, etc.) is preserved intact */
 
-	page_create(block, mtr, TRUE);
+	page_create(block, mtr, TRUE, dict_index_is_spatial(index));
 
 	/* Copy the records from the temporary space to the recreated page;
 	do not copy the lock bits yet */
@@ -4671,7 +4753,14 @@ page_zip_reorganize(
 					page_get_infimum_rec(temp_page),
 					index, mtr);
 
-	if (!dict_index_is_clust(index) && page_is_leaf(temp_page)) {
+	/* Temp-Tables are not shared across connection and so we avoid
+	locking of temp-tables as there would be no 2 trx trying to
+	operate on same temp-table in parallel.
+	max_trx_id is use to track which all trxs wrote to the page
+	in parallel but in case of temp-table this can is not needed. */
+	if (!dict_index_is_clust(index)
+	    && !dict_table_is_temporary(index->table)
+	    && page_is_leaf(temp_page)) {
 		/* Copy max trx id to recreated page */
 		trx_id_t	max_trx_id = page_get_max_trx_id(temp_page);
 		page_set_max_trx_id(block, NULL, max_trx_id, NULL);
@@ -4681,7 +4770,8 @@ page_zip_reorganize(
 	/* Restore logging. */
 	mtr_set_log_mode(mtr, log_mode);
 
-	if (!page_zip_compress(page_zip, page, index, page_zip_level, mtr)) {
+	if (!page_zip_compress(page_zip, page, index,
+			       page_zip_level, NULL, mtr)) {
 
 #ifndef UNIV_HOTBACKUP
 		buf_block_free(temp_block);
@@ -4703,7 +4793,6 @@ Copy the records of a page byte for byte.  Do not copy the page header
 or trailer, except those B-tree header fields that are directly
 related to the storage of records.  Also copy PAGE_MAX_TRX_ID.
 NOTE: The caller must update the lock table and the adaptive hash index. */
-UNIV_INTERN
 void
 page_zip_copy_recs(
 /*===============*/
@@ -4716,8 +4805,10 @@ page_zip_copy_recs(
 	dict_index_t*		index,		/*!< in: index of the B-tree */
 	mtr_t*			mtr)		/*!< in: mini-transaction */
 {
-	ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX));
-	ut_ad(mtr_memo_contains_page(mtr, src, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX)
+	      || dict_table_is_intrinsic(index->table));
+	ut_ad(mtr_memo_contains_page(mtr, src, MTR_MEMO_PAGE_X_FIX)
+	      || dict_table_is_intrinsic(index->table));
 	ut_ad(!dict_index_is_ibuf(index));
 #ifdef UNIV_ZIP_DEBUG
 	/* The B-tree operations that call this function may set
@@ -4734,7 +4825,9 @@ page_zip_copy_recs(
 
 	/* The PAGE_MAX_TRX_ID must be set on leaf pages of secondary
 	indexes.  It does not matter on other pages. */
-	ut_a(dict_index_is_clust(index) || !page_is_leaf(src)
+	ut_a(dict_index_is_clust(index)
+	     || dict_table_is_temporary(index->table)
+	     || !page_is_leaf(src)
 	     || page_get_max_trx_id(src));
 
 	UNIV_MEM_ASSERT_W(page, UNIV_PAGE_SIZE);
@@ -4786,16 +4879,13 @@ page_zip_copy_recs(
 #ifdef UNIV_ZIP_DEBUG
 	ut_a(page_zip_validate(page_zip, page, index));
 #endif /* UNIV_ZIP_DEBUG */
-	btr_blob_dbg_add(page, index, "page_zip_copy_recs");
-
 	page_zip_compress_write_log(page_zip, page, index, mtr);
 }
 #endif /* !UNIV_HOTBACKUP */
 
 /**********************************************************************//**
 Parses a log record of compressing an index page.
-@return	end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
 byte*
 page_zip_parse_compress(
 /*====================*/
@@ -4808,7 +4898,7 @@ page_zip_parse_compress(
 	ulint	trailer_size;
 
 	ut_ad(ptr != NULL);
-	ut_ad(end_ptr != NULL);
+	ut_ad(end_ptr!= NULL);
 	ut_ad(!page == !page_zip);
 
 	if (UNIV_UNLIKELY(ptr + (2 + 2) > end_ptr)) {
@@ -4827,8 +4917,7 @@ page_zip_parse_compress(
 	}
 
 	if (page) {
-		if (UNIV_UNLIKELY(!page_zip)
-		    || UNIV_UNLIKELY(page_zip_get_size(page_zip) < size)) {
+		if (!page_zip || page_zip_get_size(page_zip) < size) {
 corrupt:
 			recv_sys->found_corrupt_log = TRUE;
 
@@ -4855,19 +4944,24 @@ corrupt:
 }
 #endif /* !UNIV_INNOCHECKSUM */
 
-/**********************************************************************//**
-Calculate the compressed page checksum.
-@return	page checksum */
-UNIV_INTERN
-ulint
+/** Calculate the compressed page checksum.
+@param[in]	data			compressed page
+@param[in]	size			size of compressed page
+@param[in]	algo			algorithm to use
+@param[in]	use_legacy_big_endian	only used if algo is
+SRV_CHECKSUM_ALGORITHM_CRC32 or SRV_CHECKSUM_ALGORITHM_STRICT_CRC32 - if true
+then use big endian byteorder when converting byte strings to integers.
+SRV_CHECKSUM_ALGORITHM_CRC32 or SRV_CHECKSUM_ALGORITHM_STRICT_CRC32 - if true
+then use big endian byteorder when converting byte strings to integers.
+@return page checksum */
+uint32_t
 page_zip_calc_checksum(
-/*===================*/
-	const void*	data,	/*!< in: compressed page */
-	ulint		size,	/*!< in: size of compressed page */
-	srv_checksum_algorithm_t algo) /*!< in: algorithm to use */
+	const void*			data,
+	ulint				size,
+	srv_checksum_algorithm_t	algo,
+	bool				use_legacy_big_endian /* = false */)
 {
-	uLong		adler;
-	ib_uint32_t	crc32;
+	uint32_t	adler;
 	const Bytef*	s = static_cast<const byte*>(data);
 
 	/* Exclude FIL_PAGE_SPACE_OR_CHKSUM, FIL_PAGE_LSN,
@@ -4876,16 +4970,25 @@ page_zip_calc_checksum(
 	switch (algo) {
 	case SRV_CHECKSUM_ALGORITHM_CRC32:
 	case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32:
+		{
+			ut_ad(size > FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
 
-		ut_ad(size > FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
+			ut_crc32_func_t	crc32_func = use_legacy_big_endian
+				? ut_crc32_legacy_big_endian
+				: ut_crc32;
 
-		crc32 = ut_crc32(s + FIL_PAGE_OFFSET,
-				 FIL_PAGE_LSN - FIL_PAGE_OFFSET)
-			^ ut_crc32(s + FIL_PAGE_TYPE, 2)
-			^ ut_crc32(s + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
-				   size - FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
+			const uint32_t	crc32
+				= crc32_func(
+					s + FIL_PAGE_OFFSET,
+					FIL_PAGE_LSN - FIL_PAGE_OFFSET)
+				^ crc32_func(
+					s + FIL_PAGE_TYPE, 2)
+				^ crc32_func(
+					s + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
+					size - FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
 
-		return((ulint) crc32);
+			return(crc32);
+		}
 	case SRV_CHECKSUM_ALGORITHM_INNODB:
 	case SRV_CHECKSUM_ALGORITHM_STRICT_INNODB:
 		ut_ad(size > FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
@@ -4898,7 +5001,7 @@ page_zip_calc_checksum(
 			static_cast<uInt>(size)
 			- FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
 
-		return((ulint) adler);
+		return(adler);
 	case SRV_CHECKSUM_ALGORITHM_NONE:
 	case SRV_CHECKSUM_ALGORITHM_STRICT_NONE:
 		return(BUF_NO_CHECKSUM_MAGIC);
@@ -4912,54 +5015,66 @@ page_zip_calc_checksum(
 
 /**********************************************************************//**
 Verify a compressed page's checksum.
-@return	TRUE if the stored checksum is valid according to the value of
+@return TRUE if the stored checksum is valid according to the value of
 innodb_checksum_algorithm */
-UNIV_INTERN
 ibool
 page_zip_verify_checksum(
 /*=====================*/
-	const void*	data,	/*!< in: compressed page */
-	ulint		size)	/*!< in: size of compressed page */
+	const void*	data,		/*!< in: compressed page */
+	ulint		size		/*!< in: size of compressed page */
+#ifdef UNIV_INNOCHECKSUM
+	/* these variables are used only for innochecksum tool. */
+	,uintmax_t	page_no,	/*!< in: page number of
+					given read_buf */
+	bool		strict_check,	/*!< in: true if strict-check
+					option is enable */
+	bool		is_log_enabled,	/*!< in: true if log option is
+					enabled */
+	FILE*		log_file	/*!< in: file pointer to
+					log_file */
+#endif /* UNIV_INNOCHECKSUM */
+)
 {
-	ib_uint32_t	stored;
-	ib_uint32_t	calc;
-	ib_uint32_t	crc32 = 0 /* silence bogus warning */;
-	ib_uint32_t	innodb = 0 /* silence bogus warning */;
+	const unsigned char*	p = static_cast<const unsigned char*>(data)
+		+ FIL_PAGE_SPACE_OR_CHKSUM;
 
-	stored = static_cast<ib_uint32_t>(mach_read_from_4(
-		static_cast<const unsigned char*>(data) + FIL_PAGE_SPACE_OR_CHKSUM));
-
-	ulint	page_no MY_ATTRIBUTE((unused)) =
-                mach_read_from_4(static_cast<const unsigned char*>
-                                 (data) + FIL_PAGE_OFFSET);
-	ulint	space_id MY_ATTRIBUTE((unused)) =
-                mach_read_from_4(static_cast<const unsigned char*>
-                                 (data) + FIL_PAGE_SPACE_ID);
+	const uint32_t		stored = static_cast<uint32_t>(
+		mach_read_from_4(p));
 
 #if FIL_PAGE_LSN % 8
 #error "FIL_PAGE_LSN must be 64 bit aligned"
 #endif
 
-#ifndef UNIV_INNOCHECKSUM
-	/* innochecksum doesn't compile with ut_d. Since we don't
-	need to check for empty pages when running innochecksum,
-	just don't include this code. */
 	/* Check if page is empty */
 	if (stored == 0
 	    && *reinterpret_cast<const ib_uint64_t*>(static_cast<const char*>(
 		data)
 		+ FIL_PAGE_LSN) == 0) {
 		/* make sure that the page is really empty */
+#ifdef UNIV_INNOCHECKSUM
 		ulint i;
 		for (i = 0; i < size; i++) {
+			if (*((const char*) data + i) != 0)
+				break;
+		}
+		if (i >= size) {
+			if (is_log_enabled) {
+				fprintf(log_file, "Page::%lu is empty and"
+					" uncorrupted\n", page_no);
+			}
+
+			return(TRUE);
+		}
+#else
+		for (ulint i = 0; i < size; i++) {
 			if (*((const char*) data + i) != 0) {
 				return(FALSE);
 			}
 		}
 		/* Empty page */
 		return(TRUE);
+#endif /* UNIV_INNOCHECKSUM */
 	}
-#endif
 
 	const srv_checksum_algorithm_t	curr_algo =
 		static_cast<srv_checksum_algorithm_t>(srv_checksum_algorithm);
@@ -4968,97 +5083,166 @@ page_zip_verify_checksum(
 		return(TRUE);
 	}
 
-	calc = static_cast<ib_uint32_t>(page_zip_calc_checksum(
-		data, size, curr_algo));
+#ifndef	UNIV_INNOCHECKSUM
+	ulint		page_no = mach_read_from_4(static_cast<
+						   const unsigned char*>
+						   (data) + FIL_PAGE_OFFSET);
+	ulint		space_id = mach_read_from_4(static_cast<
+						    const unsigned char*>
+						    (data) + FIL_PAGE_SPACE_ID);
+	const page_id_t	page_id(space_id, page_no);
+#endif	/* UNIV_INNOCHECKSUM */
 
+	const uint32_t	calc = page_zip_calc_checksum(data, size, curr_algo);
+
+#ifdef UNIV_INNOCHECKSUM
+	if (is_log_enabled) {
+		fprintf(log_file, "page::%lu;"
+			" %s checksum: calculated = %u;"
+			" recorded = %u\n", page_no,
+			buf_checksum_algorithm_name(
+				static_cast<srv_checksum_algorithm_t>(
+				srv_checksum_algorithm)),
+			calc, stored);
+	}
+
+	if (!strict_check) {
+
+		const uint32_t	crc32 = page_zip_calc_checksum(
+			data, size, SRV_CHECKSUM_ALGORITHM_CRC32);
+
+		if (is_log_enabled) {
+			fprintf(log_file, "page::%lu: crc32 checksum:"
+				" calculated = %u; recorded = %u\n",
+				page_no, crc32, stored);
+			fprintf(log_file, "page::%lu: none checksum:"
+				" calculated = %lu; recorded = %u\n",
+				page_no, BUF_NO_CHECKSUM_MAGIC, stored);
+		}
+	}
+#endif /* UNIV_INNOCHECKSUM */
 	if (stored == calc) {
 		return(TRUE);
 	}
 
+	bool	legacy_checksum_checked = false;
+
 	switch (curr_algo) {
 	case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32:
 	case SRV_CHECKSUM_ALGORITHM_CRC32:
 
 		if (stored == BUF_NO_CHECKSUM_MAGIC) {
+#ifndef	UNIV_INNOCHECKSUM
 			if (curr_algo
 			    == SRV_CHECKSUM_ALGORITHM_STRICT_CRC32) {
 				page_warn_strict_checksum(
 					curr_algo,
 					SRV_CHECKSUM_ALGORITHM_NONE,
-					space_id, page_no);
+					page_id);
 			}
+#endif	/* UNIV_INNOCHECKSUM */
 
 			return(TRUE);
 		}
 
-		innodb = static_cast<ib_uint32_t>(page_zip_calc_checksum(
-			data, size, SRV_CHECKSUM_ALGORITHM_INNODB));
+		/* We need to check whether the stored checksum matches legacy
+		big endian checksum or Innodb checksum. We optimize the order
+		based on earlier results. if earlier we have found pages
+		matching legacy big endian checksum, we try to match it first.
+		Otherwise we check innodb checksum first. */
+		if (legacy_big_endian_checksum) {
+			if (stored == page_zip_calc_checksum(
+				data, size, curr_algo, true)) {
 
-		if (stored == innodb) {
+				return(TRUE);
+			}
+			legacy_checksum_checked = true;
+		}
+
+		if (stored == page_zip_calc_checksum(
+			data, size, SRV_CHECKSUM_ALGORITHM_INNODB)) {
+
+#ifndef	UNIV_INNOCHECKSUM
 			if (curr_algo
 			    == SRV_CHECKSUM_ALGORITHM_STRICT_CRC32) {
 				page_warn_strict_checksum(
 					curr_algo,
 					SRV_CHECKSUM_ALGORITHM_INNODB,
-					space_id, page_no);
+					page_id);
 			}
+#endif	/* UNIV_INNOCHECKSUM */
 
 			return(TRUE);
 		}
 
+		/* If legacy checksum is not checked, do it now. */
+		if (!legacy_checksum_checked
+		    && stored == page_zip_calc_checksum(
+			data, size, curr_algo, true)) {
+
+			legacy_big_endian_checksum = true;
+				return(TRUE);
+		}
+
 		break;
 	case SRV_CHECKSUM_ALGORITHM_STRICT_INNODB:
 	case SRV_CHECKSUM_ALGORITHM_INNODB:
 
 		if (stored == BUF_NO_CHECKSUM_MAGIC) {
+#ifndef	UNIV_INNOCHECKSUM
 			if (curr_algo
 			    == SRV_CHECKSUM_ALGORITHM_STRICT_INNODB) {
 				page_warn_strict_checksum(
 					curr_algo,
 					SRV_CHECKSUM_ALGORITHM_NONE,
-					space_id, page_no);
+					page_id);
 			}
+#endif	/* UNIV_INNOCHECKSUM */
 
 			return(TRUE);
 		}
 
-		crc32 = static_cast<ib_uint32_t>(page_zip_calc_checksum(
-			data, size, SRV_CHECKSUM_ALGORITHM_CRC32));
-
-		if (stored == crc32) {
+		if (stored == page_zip_calc_checksum(
+			data, size, SRV_CHECKSUM_ALGORITHM_CRC32)
+		    || stored == page_zip_calc_checksum(
+			data, size, SRV_CHECKSUM_ALGORITHM_CRC32, true)) {
+#ifndef	UNIV_INNOCHECKSUM
 			if (curr_algo
 			    == SRV_CHECKSUM_ALGORITHM_STRICT_INNODB) {
 				page_warn_strict_checksum(
 					curr_algo,
 					SRV_CHECKSUM_ALGORITHM_CRC32,
-					space_id, page_no);
+					page_id);
 			}
-
+#endif	/* UNIV_INNOCHECKSUM */
 			return(TRUE);
 		}
 
 		break;
 	case SRV_CHECKSUM_ALGORITHM_STRICT_NONE:
 
-		crc32 = static_cast<ib_uint32_t>(page_zip_calc_checksum(
-			data, size, SRV_CHECKSUM_ALGORITHM_CRC32));
-
-		if (stored == crc32) {
+		if (stored == page_zip_calc_checksum(
+			data, size, SRV_CHECKSUM_ALGORITHM_CRC32)
+		    || stored == page_zip_calc_checksum(
+			data, size, SRV_CHECKSUM_ALGORITHM_CRC32, true)) {
+#ifndef	UNIV_INNOCHECKSUM
 			page_warn_strict_checksum(
-				curr_algo, SRV_CHECKSUM_ALGORITHM_CRC32,
-				space_id, page_no);
-
+				curr_algo,
+				SRV_CHECKSUM_ALGORITHM_CRC32,
+				page_id);
+#endif	/* UNIV_INNOCHECKSUM */
 			return(TRUE);
 		}
 
-		innodb = static_cast<ib_uint32_t>(page_zip_calc_checksum(
-			data, size, SRV_CHECKSUM_ALGORITHM_INNODB));
+		if (stored == page_zip_calc_checksum(
+			data, size, SRV_CHECKSUM_ALGORITHM_INNODB)) {
 
-		if (stored == innodb) {
+#ifndef	UNIV_INNOCHECKSUM
 			page_warn_strict_checksum(
 				curr_algo,
 				SRV_CHECKSUM_ALGORITHM_INNODB,
-				space_id, page_no);
+				page_id);
+#endif	/* UNIV_INNOCHECKSUM */
 			return(TRUE);
 		}
 
diff --git a/storage/innobase/pars/lexyy.cc b/storage/innobase/pars/lexyy.cc
index bfa8e2ea950..b97afd7c76b 100644
--- a/storage/innobase/pars/lexyy.cc
+++ b/storage/innobase/pars/lexyy.cc
@@ -176,15 +176,15 @@ extern FILE *yyin, *yyout;
 #define EOB_ACT_END_OF_FILE 1
 #define EOB_ACT_LAST_MATCH 2
 
-    #define YY_LESS_LINENO(n)
+#define YY_LESS_LINENO(n)
 
 /* Return all but the first "n" matched characters back to the input stream. */
 #define yyless(n) \
 	do \
 		{ \
 		/* Undo effects of setting up yytext. */ \
-        int yyless_macro_arg = (n); \
-        YY_LESS_LINENO(yyless_macro_arg);\
+		int yyless_macro_arg = (n); \
+		YY_LESS_LINENO(yyless_macro_arg);\
 		*yy_cp = (yy_hold_char); \
 		YY_RESTORE_YY_MORE_OFFSET \
 		(yy_c_buf_p) = yy_cp = yy_bp + yyless_macro_arg - YY_MORE_ADJ; \
@@ -232,8 +232,8 @@ struct yy_buffer_state
 	 */
 	int yy_at_bol;
 
-    int yy_bs_lineno; /**< The line count. */
-    int yy_bs_column; /**< The column count. */
+	int yy_bs_lineno; /**< The line count. */
+	int yy_bs_column; /**< The column count. */
 
 	/* Whether to try to fill the input buffer when we reach the
 	 * end of it.
@@ -321,9 +321,9 @@ void yyfree (void *  );
 #define yy_set_interactive(is_interactive) \
 	{ \
 	if ( ! YY_CURRENT_BUFFER ){ \
-        yyensure_buffer_stack (); \
+		yyensure_buffer_stack (); \
 		YY_CURRENT_BUFFER_LVALUE =    \
-            yy_create_buffer(yyin,YY_BUF_SIZE ); \
+			yy_create_buffer(yyin,YY_BUF_SIZE ); \
 	} \
 	YY_CURRENT_BUFFER_LVALUE->yy_is_interactive = is_interactive; \
 	}
@@ -331,9 +331,9 @@ void yyfree (void *  );
 #define yy_set_bol(at_bol) \
 	{ \
 	if ( ! YY_CURRENT_BUFFER ){\
-        yyensure_buffer_stack (); \
-		YY_CURRENT_BUFFER_LVALUE =    \
-            yy_create_buffer(yyin,YY_BUF_SIZE ); \
+		yyensure_buffer_stack (); \
+		YY_CURRENT_BUFFER_LVALUE = \
+			yy_create_buffer(yyin,YY_BUF_SIZE ); \
 	} \
 	YY_CURRENT_BUFFER_LVALUE->yy_at_bol = at_bol; \
 	}
@@ -342,7 +342,7 @@ void yyfree (void *  );
 
 /* Begin user sect3 */
 
-#define yywrap(n) 1
+#define yywrap() 1
 #define YY_SKIP_YYWRAP
 
 typedef unsigned char YY_CHAR;
@@ -961,7 +961,7 @@ Created 12/14/1997 Heikki Tuuri
 #include "mem0mem.h"
 #include "os0proc.h"
 
-#define malloc(A)	ut_malloc(A)
+#define malloc(A)	ut_malloc_nokey(A)
 #define free(A)		ut_free(A)
 #define realloc(P, A)	ut_realloc(P, A)
 #define exit(A) 	ut_error
@@ -2380,7 +2380,7 @@ case YY_STATE_EOF(id):
  */
 static int yy_get_next_buffer (void)
 {
-    	register char *dest = YY_CURRENT_BUFFER_LVALUE->yy_ch_buf;
+	register char *dest = YY_CURRENT_BUFFER_LVALUE->yy_ch_buf;
 	register char *source = (yytext_ptr);
 	register int number_to_move, i;
 	int ret_val;
@@ -2428,7 +2428,7 @@ static int yy_get_next_buffer (void)
 				YY_CURRENT_BUFFER_LVALUE->yy_buf_size - number_to_move - 1);
 
 		while ( num_to_read <= 0 )
-			{ /* Not enough room in the buffer - grow it. */
+		{ /* Not enough room in the buffer - grow it. */
 
 			/* just a shorter name for the current buffer */
 			YY_BUFFER_STATE b = YY_CURRENT_BUFFER;
@@ -2462,8 +2462,7 @@ static int yy_get_next_buffer (void)
 			num_to_read = static_cast<int>(
 				YY_CURRENT_BUFFER_LVALUE->yy_buf_size
 				- number_to_move - 1);
-
-			}
+		}
 
 		if ( num_to_read > YY_READ_BUF_SIZE )
 			num_to_read = YY_READ_BUF_SIZE;
@@ -2513,7 +2512,7 @@ static int yy_get_next_buffer (void)
 
 /* yy_get_previous_state - get the state just before the EOB char was reached */
 
-     yy_state_type yy_get_previous_state (void)
+yy_state_type yy_get_previous_state (void)
 {
 	register yy_state_type yy_current_state;
 	register char *yy_cp;
@@ -2545,10 +2544,10 @@ static int yy_get_next_buffer (void)
  * synopsis
  *	next_state = yy_try_NUL_trans( current_state );
  */
-     static yy_state_type yy_try_NUL_trans  (yy_state_type yy_current_state )
+static yy_state_type yy_try_NUL_trans  (yy_state_type yy_current_state )
 {
 	register int yy_is_jam;
-    	register char *yy_cp = (yy_c_buf_p);
+	register char *yy_cp = (yy_c_buf_p);
 
 	register YY_CHAR yy_c = 1;
 	if ( yy_accept[yy_current_state] )
@@ -2647,13 +2646,13 @@ static int yy_get_next_buffer (void)
  *
  * @note This function does not reset the start condition to @c INITIAL .
  */
-    void yyrestart  (FILE * input_file )
+void yyrestart  (FILE * input_file )
 {
 
 	if ( ! YY_CURRENT_BUFFER ){
-        yyensure_buffer_stack ();
+		yyensure_buffer_stack ();
 		YY_CURRENT_BUFFER_LVALUE =
-            yy_create_buffer(yyin,YY_BUF_SIZE );
+		yy_create_buffer(yyin,YY_BUF_SIZE );
 	}
 
 	yy_init_buffer(YY_CURRENT_BUFFER,input_file );
@@ -2664,14 +2663,14 @@ static int yy_get_next_buffer (void)
  * @param new_buffer The new input buffer.
  *
  */
-    MY_ATTRIBUTE((unused)) static void yy_switch_to_buffer  (YY_BUFFER_STATE  new_buffer )
+MY_ATTRIBUTE((unused)) static void yy_switch_to_buffer  (YY_BUFFER_STATE  new_buffer )
 {
 
 	/* TODO. We should be able to replace this entire function body
 	 * with
 	 *		yypop_buffer_state();
 	 *		yypush_buffer_state(new_buffer);
-     */
+	 */
 	yyensure_buffer_stack ();
 	if ( YY_CURRENT_BUFFER == new_buffer )
 		return;
@@ -2697,7 +2696,7 @@ static int yy_get_next_buffer (void)
 
 static void yy_load_buffer_state  (void)
 {
-    	(yy_n_chars) = YY_CURRENT_BUFFER_LVALUE->yy_n_chars;
+	(yy_n_chars) = YY_CURRENT_BUFFER_LVALUE->yy_n_chars;
 	(yytext_ptr) = (yy_c_buf_p) = YY_CURRENT_BUFFER_LVALUE->yy_buf_pos;
 	yyin = YY_CURRENT_BUFFER_LVALUE->yy_input_file;
 	(yy_hold_char) = *(yy_c_buf_p);
@@ -2709,7 +2708,7 @@ static void yy_load_buffer_state  (void)
  *
  * @return the allocated buffer state.
  */
-    static YY_BUFFER_STATE yy_create_buffer  (FILE * file, int  size )
+static YY_BUFFER_STATE yy_create_buffer  (FILE * file, int  size )
 {
 	YY_BUFFER_STATE b;
 
@@ -2737,7 +2736,7 @@ static void yy_load_buffer_state  (void)
  * @param b a buffer created with yy_create_buffer()
  *
  */
-     void yy_delete_buffer (YY_BUFFER_STATE  b )
+void yy_delete_buffer (YY_BUFFER_STATE  b )
 {
 
 	if ( ! b )
@@ -2756,7 +2755,7 @@ static void yy_load_buffer_state  (void)
  * This function is sometimes called more than once on the same buffer,
  * such as during a yyrestart() or at EOF.
  */
-     static void yy_init_buffer  (YY_BUFFER_STATE  b, FILE * file )
+static void yy_init_buffer  (YY_BUFFER_STATE  b, FILE * file )
 
 {
 	int oerrno = errno;
@@ -2766,16 +2765,16 @@ static void yy_load_buffer_state  (void)
 	b->yy_input_file = file;
 	b->yy_fill_buffer = 1;
 
-    /* If b is the current buffer, then yy_init_buffer was _probably_
-     * called from yyrestart() or through yy_get_next_buffer.
-     * In that case, we don't want to reset the lineno or column.
-     */
-    if (b != YY_CURRENT_BUFFER){
-        b->yy_bs_lineno = 1;
-        b->yy_bs_column = 0;
-    }
+	/* If b is the current buffer, then yy_init_buffer was _probably_
+	 * called from yyrestart() or through yy_get_next_buffer.
+	 * In that case, we don't want to reset the lineno or column.
+	 */
+	if (b != YY_CURRENT_BUFFER){
+		b->yy_bs_lineno = 1;
+		b->yy_bs_column = 0;
+	}
 
-        b->yy_is_interactive = 0;
+	b->yy_is_interactive = 0;
 
 	errno = oerrno;
 }
@@ -2784,9 +2783,9 @@ static void yy_load_buffer_state  (void)
  * @param b the buffer state to be flushed, usually @c YY_CURRENT_BUFFER.
  *
  */
-    void yy_flush_buffer (YY_BUFFER_STATE  b )
+void yy_flush_buffer (YY_BUFFER_STATE  b )
 {
-    	if ( ! b )
+	if ( ! b )
 		return;
 
 	b->yy_n_chars = 0;
@@ -2815,7 +2814,7 @@ static void yy_load_buffer_state  (void)
  */
 void yypush_buffer_state (YY_BUFFER_STATE new_buffer )
 {
-    	if (new_buffer == NULL)
+	if (new_buffer == NULL)
 		return;
 
 	yyensure_buffer_stack();
@@ -2845,7 +2844,7 @@ void yypush_buffer_state (YY_BUFFER_STATE new_buffer )
  */
 void yypop_buffer_state (void)
 {
-    	if (!YY_CURRENT_BUFFER)
+	if (!YY_CURRENT_BUFFER)
 		return;
 
 	yy_delete_buffer(YY_CURRENT_BUFFER );
@@ -2871,7 +2870,7 @@ static void yyensure_buffer_stack (void)
 		/* First allocation is just for 2 elements, since we don't know if this
 		 * scanner will even need a stack. We use 2 instead of 1 to avoid an
 		 * immediate realloc on the next call.
-         */
+		 */
 		num_to_alloc = 1;
 		(yy_buffer_stack) = (struct yy_buffer_state**)yyalloc
 								(num_to_alloc * sizeof(struct yy_buffer_state*)
@@ -2912,7 +2911,7 @@ static void yyensure_buffer_stack (void)
 
 static void yy_fatal_error (yyconst char* msg )
 {
-    	(void) fprintf( stderr, "%s\n", msg );
+	(void) fprintf( stderr, "%s\n", msg );
 	exit( YY_EXIT_FAILURE );
 }
 
@@ -2923,8 +2922,8 @@ static void yy_fatal_error (yyconst char* msg )
 	do \
 		{ \
 		/* Undo effects of setting up yytext. */ \
-        int yyless_macro_arg = (n); \
-        YY_LESS_LINENO(yyless_macro_arg);\
+		int yyless_macro_arg = (n); \
+		YY_LESS_LINENO(yyless_macro_arg);\
 		yytext[yyleng] = (yy_hold_char); \
 		(yy_c_buf_p) = yytext + yyless_macro_arg; \
 		(yy_hold_char) = *(yy_c_buf_p); \
@@ -2941,7 +2940,7 @@ static void yy_fatal_error (yyconst char* msg )
 int yyget_lineno  (void)
 {
 
-    return yylineno;
+	return yylineno;
 }
 
 /** Get the input stream.
@@ -2949,7 +2948,7 @@ int yyget_lineno  (void)
  */
 FILE *yyget_in  (void)
 {
-        return yyin;
+	return yyin;
 }
 
 /** Get the output stream.
@@ -2957,7 +2956,7 @@ FILE *yyget_in  (void)
  */
 FILE *yyget_out  (void)
 {
-        return yyout;
+	return yyout;
 }
 
 /** Get the length of the current token.
@@ -2965,7 +2964,7 @@ FILE *yyget_out  (void)
  */
 yy_size_t yyget_leng  (void)
 {
-        return yyleng;
+	return yyleng;
 }
 
 /** Get the current token.
@@ -2974,7 +2973,7 @@ yy_size_t yyget_leng  (void)
 
 char *yyget_text  (void)
 {
-        return yytext;
+	return yytext;
 }
 
 /** Set the current line number.
@@ -2984,7 +2983,7 @@ char *yyget_text  (void)
 void yyset_lineno (int  line_number )
 {
 
-    yylineno = line_number;
+	yylineno = line_number;
 }
 
 /** Set the input stream. This does not discard the current
@@ -2995,57 +2994,57 @@ void yyset_lineno (int  line_number )
  */
 void yyset_in (FILE *  in_str )
 {
-        yyin = in_str ;
+	yyin = in_str ;
 }
 
 void yyset_out (FILE *  out_str )
 {
-        yyout = out_str ;
+	yyout = out_str ;
 }
 
 int yyget_debug  (void)
 {
-        return yy_flex_debug;
+	return yy_flex_debug;
 }
 
 void yyset_debug (int  bdebug )
 {
-        yy_flex_debug = bdebug ;
+	yy_flex_debug = bdebug ;
 }
 
 static int yy_init_globals (void)
 {
-        /* Initialization is the same as for the non-reentrant scanner.
-     * This function is called from yylex_destroy(), so don't allocate here.
-     */
+	/* Initialization is the same as for the non-reentrant scanner.
+	 * This function is called from yylex_destroy(), so don't allocate here.
+	 */
 
-    (yy_buffer_stack) = 0;
-    (yy_buffer_stack_top) = 0;
-    (yy_buffer_stack_max) = 0;
-    (yy_c_buf_p) = (char *) 0;
-    (yy_init) = 0;
-    (yy_start) = 0;
+	(yy_buffer_stack) = 0;
+	(yy_buffer_stack_top) = 0;
+	(yy_buffer_stack_max) = 0;
+	(yy_c_buf_p) = (char *) 0;
+	(yy_init) = 0;
+	(yy_start) = 0;
 
 /* Defined in main.c */
 #ifdef YY_STDINIT
-    yyin = stdin;
-    yyout = stdout;
+	yyin = stdin;
+	yyout = stdout;
 #else
-    yyin = (FILE *) 0;
-    yyout = (FILE *) 0;
+	yyin = (FILE *) 0;
+	yyout = (FILE *) 0;
 #endif
 
-    /* For future reference: Set errno on error, since we are called by
-     * yylex_init()
-     */
-    return 0;
+	/* For future reference: Set errno on error, since we are called by
+	 * yylex_init()
+	 */
+	return 0;
 }
 
 /* yylex_destroy is for both reentrant and non-reentrant scanners. */
 MY_ATTRIBUTE((unused)) static int yylex_destroy  (void)
 {
 
-    /* Pop the buffer stack, destroying each element. */
+	/* Pop the buffer stack, destroying each element. */
 	while(YY_CURRENT_BUFFER){
 		yy_delete_buffer(YY_CURRENT_BUFFER  );
 		YY_CURRENT_BUFFER_LVALUE = NULL;
@@ -3056,11 +3055,11 @@ MY_ATTRIBUTE((unused)) static int yylex_destroy  (void)
 	yyfree((yy_buffer_stack) );
 	(yy_buffer_stack) = NULL;
 
-    /* Reset the globals. This is important in a non-reentrant scanner so the next time
-     * yylex() is called, initialization will occur. */
-    yy_init_globals( );
+	/* Reset the globals. This is important in a non-reentrant scanner so the next time
+	 * yylex() is called, initialization will occur. */
+	yy_init_globals( );
 
-    return 0;
+	return 0;
 }
 
 /*
@@ -3117,7 +3116,6 @@ void yyfree (void * ptr )
 
 /**********************************************************************
 Release any resources used by the lexer. */
-UNIV_INTERN
 void
 pars_lexer_close(void)
 /*==================*/
diff --git a/storage/innobase/pars/make_bison.sh b/storage/innobase/pars/make_bison.sh
index 2618be102bc..2b0c15662fc 100755
--- a/storage/innobase/pars/make_bison.sh
+++ b/storage/innobase/pars/make_bison.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 #
-# Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
+# Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved.
 #
 # This program is free software; you can redistribute it and/or modify it under
 # the terms of the GNU General Public License as published by the Free Software
@@ -26,7 +26,6 @@ mv pars0grm.tab.h ../include/pars0grm.h
 sed -e '
 s/'"$TMPFILE"'/'"$OUTFILE"'/;
 s/^\(\(YYSTYPE\|int\) yy\(char\|nerrs\)\)/static \1/;
-s/\(\(YYSTYPE\|int\) yy\(lval\|parse\)\)/UNIV_INTERN \1/;
 ' < "$TMPFILE" > "$OUTFILE"
 
 rm "$TMPFILE"
diff --git a/storage/innobase/pars/make_flex.sh b/storage/innobase/pars/make_flex.sh
index c3db8aea298..2c493097463 100755
--- a/storage/innobase/pars/make_flex.sh
+++ b/storage/innobase/pars/make_flex.sh
@@ -40,7 +40,6 @@ s/\(\(int\|void\) yy[gs]et_\)/MY_ATTRIBUTE((unused)) static \1/;
 s/\(void \*\?yy\(\(re\)\?alloc\|free\)\)/static \1/;
 s/\(extern \)\?\(int yy\(leng\|lineno\|_flex_debug\)\)/static \2/;
 s/\(int yylex_destroy\)/MY_ATTRIBUTE((unused)) static \1/;
-s/\(extern \)\?\(int yylex \)/UNIV_INTERN \2/;
 s/^\(\(FILE\|char\) *\* *yyget\)/MY_ATTRIBUTE((unused)) static \1/;
 s/^\(extern \)\?\(\(FILE\|char\) *\* *yy\)/static \2/;
 ' < $TMPFILE >> $OUTFILE
diff --git a/storage/innobase/pars/pars0grm.cc b/storage/innobase/pars/pars0grm.cc
index b360f36e597..5fbb70c4481 100644
--- a/storage/innobase/pars/pars0grm.cc
+++ b/storage/innobase/pars/pars0grm.cc
@@ -284,7 +284,7 @@
 que_node_t */
 
 #include "univ.i"
-#include <math.h>				/* Can't be before univ.i */
+#include <math.h>
 #include "pars0pars.h"
 #include "mem0mem.h"
 #include "que0types.h"
@@ -294,7 +294,6 @@ que_node_t */
 #define YYSTYPE que_node_t*
 
 /* #define __STDC__ */
-
 int
 yylex(void);
 
@@ -426,8 +425,6 @@ YYID (i)
 #    define YYSTACK_ALLOC __builtin_alloca
 #   elif defined __BUILTIN_VA_ARG_INCR
 #    include <alloca.h> /* INFRINGES ON USER NAME SPACE */
-#   elif defined _AIX
-#    define YYSTACK_ALLOC __alloca
 #   elif defined _MSC_VER
 #    include <malloc.h> /* INFRINGES ON USER NAME SPACE */
 #    define alloca _alloca
diff --git a/storage/innobase/pars/pars0grm.y b/storage/innobase/pars/pars0grm.y
index 60913287cc4..2f0c83da8e3 100644
--- a/storage/innobase/pars/pars0grm.y
+++ b/storage/innobase/pars/pars0grm.y
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -30,7 +30,7 @@ Created 12/14/1997 Heikki Tuuri
 que_node_t */
 
 #include "univ.i"
-#include <math.h>				/* Can't be before univ.i */
+#include <math.h>
 #include "pars0pars.h"
 #include "mem0mem.h"
 #include "que0types.h"
@@ -40,7 +40,6 @@ que_node_t */
 #define YYSTYPE que_node_t*
 
 /* #define __STDC__ */
-
 int
 yylex(void);
 %}
diff --git a/storage/innobase/pars/pars0lex.l b/storage/innobase/pars/pars0lex.l
index 83c3af4b6c5..3efcc2c8b4d 100644
--- a/storage/innobase/pars/pars0lex.l
+++ b/storage/innobase/pars/pars0lex.l
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -59,7 +59,7 @@ Created 12/14/1997 Heikki Tuuri
 #include "mem0mem.h"
 #include "os0proc.h"
 
-#define malloc(A)	ut_malloc(A)
+#define malloc(A)	ut_malloc_nokey(A)
 #define free(A)		ut_free(A)
 #define realloc(P, A)	ut_realloc(P, A)
 #define exit(A) 	ut_error
@@ -692,7 +692,6 @@ In the state 'id', only two actions are possible (defined below). */
 
 /**********************************************************************
 Release any resources used by the lexer. */
-UNIV_INTERN
 void
 pars_lexer_close(void)
 /*==================*/
diff --git a/storage/innobase/pars/pars0opt.cc b/storage/innobase/pars/pars0opt.cc
index 5a7e1861d74..a01d535e595 100644
--- a/storage/innobase/pars/pars0opt.cc
+++ b/storage/innobase/pars/pars0opt.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -32,6 +32,7 @@ Created 12/21/1997 Heikki Tuuri
 #include "row0sel.h"
 #include "row0ins.h"
 #include "row0upd.h"
+#include "dict0boot.h"
 #include "dict0dict.h"
 #include "dict0mem.h"
 #include "que0que.h"
@@ -50,7 +51,7 @@ Created 12/21/1997 Heikki Tuuri
 
 /*******************************************************************//**
 Inverts a comparison operator.
-@return	the equivalent operator when the order of the arguments is switched */
+@return the equivalent operator when the order of the arguments is switched */
 static
 int
 opt_invert_cmp_op(
@@ -79,7 +80,7 @@ opt_invert_cmp_op(
 Checks if the value of an expression can be calculated BEFORE the nth table
 in a join is accessed. If this is the case, it can possibly be used in an
 index search for the nth table.
-@return	TRUE if already determined */
+@return TRUE if already determined */
 static
 ibool
 opt_check_exp_determined_before(
@@ -138,7 +139,7 @@ opt_check_exp_determined_before(
 /*******************************************************************//**
 Looks in a comparison condition if a column value is already restricted by
 it BEFORE the nth table is accessed.
-@return	expression restricting the value of the column, or NULL if not known */
+@return expression restricting the value of the column, or NULL if not known */
 static
 que_node_t*
 opt_look_for_col_in_comparison_before(
@@ -244,7 +245,7 @@ Looks in a search condition if a column value is already restricted by the
 search condition BEFORE the nth table is accessed. Takes into account that
 if we will fetch in an ascending order, we cannot utilize an upper limit for
 a column value; in a descending order, respectively, a lower limit.
-@return	expression restricting the value of the column, or NULL if not known */
+@return expression restricting the value of the column, or NULL if not known */
 static
 que_node_t*
 opt_look_for_col_in_cond_before(
@@ -323,7 +324,7 @@ already know exactly in the query. If we have a comparison condition for
 an additional field, 2 point are added. If the index is unique, and we know
 all the unique fields for the index we add 1024 points. For a clustered index
 we add 1 point.
-@return	goodness */
+@return goodness */
 static
 ulint
 opt_calc_index_goodness(
@@ -411,7 +412,7 @@ opt_calc_index_goodness(
 
 /*******************************************************************//**
 Calculates the number of matched fields based on an index goodness.
-@return	number of excatly or partially matched fields */
+@return number of excatly or partially matched fields */
 UNIV_INLINE
 ulint
 opt_calc_n_fields_from_goodness(
@@ -424,9 +425,9 @@ opt_calc_n_fields_from_goodness(
 /*******************************************************************//**
 Converts a comparison operator to the corresponding search mode PAGE_CUR_GE,
 ...
-@return	search mode */
+@return search mode */
 UNIV_INLINE
-ulint
+page_cur_mode_t
 opt_op_to_search_mode(
 /*==================*/
 	ibool	asc,	/*!< in: TRUE if the rows should be fetched in an
@@ -460,12 +461,12 @@ opt_op_to_search_mode(
 		ut_error;
 	}
 
-	return(0);
+	return(PAGE_CUR_UNSUPP);
 }
 
 /*******************************************************************//**
 Determines if a node is an argument node of a function node.
-@return	TRUE if is an argument */
+@return TRUE if is an argument */
 static
 ibool
 opt_is_arg(
@@ -774,10 +775,10 @@ opt_find_test_conds(
 	fclass = opt_classify_comparison(sel_node, i, cond);
 
 	if (fclass == OPT_END_COND) {
-		UT_LIST_ADD_LAST(cond_list, plan->end_conds, cond);
+		UT_LIST_ADD_LAST(plan->end_conds, cond);
 
 	} else if (fclass == OPT_TEST_COND) {
-		UT_LIST_ADD_LAST(cond_list, plan->other_conds, cond);
+		UT_LIST_ADD_LAST(plan->other_conds, cond);
 
 	}
 }
@@ -839,8 +840,8 @@ opt_determine_and_normalize_test_conds(
 
 	plan = sel_node_get_nth_plan(sel_node, i);
 
-	UT_LIST_INIT(plan->end_conds);
-	UT_LIST_INIT(plan->other_conds);
+	UT_LIST_INIT(plan->end_conds, &func_node_t::cond_list);
+	UT_LIST_INIT(plan->other_conds, &func_node_t::cond_list);
 
 	/* Recursively go through the conjuncts and classify them */
 
@@ -862,7 +863,6 @@ already exist in the list. If the column is already in the list, puts a value
 indirection to point to the occurrence in the column list, except if the
 column occurrence we are looking at is in the column list, in which case
 nothing is done. */
-UNIV_INTERN
 void
 opt_find_all_cols(
 /*==============*/
@@ -941,7 +941,7 @@ opt_find_all_cols(
 
 	/* The same column did not occur in the list: add it */
 
-	UT_LIST_ADD_LAST(col_var_list, *col_list, sym_node);
+	UT_LIST_ADD_LAST(*col_list, sym_node);
 
 	sym_node->copy_val = copy_val;
 
@@ -1037,7 +1037,7 @@ opt_classify_cols(
 
 	plan->must_get_clust = FALSE;
 
-	UT_LIST_INIT(plan->columns);
+	UT_LIST_INIT(plan->columns, &sym_node_t::col_var_list);
 
 	/* All select list columns should be copied: therefore TRUE as the
 	first argument */
@@ -1119,13 +1119,13 @@ opt_clust_access(
 		/* We optimize here only queries to InnoDB's internal system
 		tables, and they should not contain column prefix indexes. */
 
-		if (dict_index_get_nth_field(index, pos)->prefix_len != 0
+		if (dict_is_sys_table(index->table->id)
+		    && (dict_index_get_nth_field(index, pos)->prefix_len != 0
 		    || dict_index_get_nth_field(clust_index, i)
-		    ->prefix_len != 0) {
-			fprintf(stderr,
-				"InnoDB: Error in pars0opt.cc:"
-				" table %s has prefix_len != 0\n",
-				index->table_name);
+		    ->prefix_len != 0)) {
+			ib::error() << "Error in pars0opt.cc: table "
+				<< index->table->name
+				<< " has prefix_len != 0";
 		}
 
 		*(plan->clust_map + i) = pos;
@@ -1138,7 +1138,6 @@ opt_clust_access(
 Optimizes a select. Decides which indexes to tables to use. The tables
 are accessed in the order that they were written to the FROM part in the
 select statement. */
-UNIV_INTERN
 void
 opt_search_plan(
 /*============*/
@@ -1213,9 +1212,9 @@ opt_search_plan(
 #endif
 }
 
+#if 1//def UNIV_SQL_DEBUG
 /********************************************************************//**
 Prints info of a query plan. */
-UNIV_INTERN
 void
 opt_print_query_plan(
 /*=================*/
@@ -1251,11 +1250,13 @@ opt_print_query_plan(
 			n_fields = 0;
 		}
 
-		fputs("Table ", stderr);
-		dict_index_name_print(stderr, NULL, plan->index);
-		fprintf(stderr,"; exact m. %lu, match %lu, end conds %lu\n",
+		fprintf(stderr,
+			"Index %s of table %s"
+			"; exact m. %lu, match %lu, end conds %lu\n",
+			plan->index->name(), plan->index->table_name,
 			(unsigned long) plan->n_exact_match,
 			(unsigned long) n_fields,
 			(unsigned long) UT_LIST_GET_LEN(plan->end_conds));
 	}
 }
+#endif /* UNIV_SQL_DEBUG */
diff --git a/storage/innobase/pars/pars0pars.cc b/storage/innobase/pars/pars0pars.cc
index e6af3d25e86..36846e418b0 100644
--- a/storage/innobase/pars/pars0pars.cc
+++ b/storage/innobase/pars/pars0pars.cc
@@ -26,6 +26,8 @@ Created 11/19/1996 Heikki Tuuri
 /* Historical note: Innobase executed its first SQL string (CREATE TABLE)
 on 1/27/1998 */
 
+#include "ha_prototypes.h"
+
 #include "pars0pars.h"
 
 #ifdef UNIV_NONINL
@@ -48,53 +50,47 @@ on 1/27/1998 */
 #include "lock0lock.h"
 #include "eval0eval.h"
 
-#ifdef UNIV_SQL_DEBUG
-/** If the following is set TRUE, the lexer will print the SQL string
-as it tokenizes it */
-UNIV_INTERN ibool	pars_print_lexed	= FALSE;
-#endif /* UNIV_SQL_DEBUG */
-
 /* Global variable used while parsing a single procedure or query : the code is
 NOT re-entrant */
-UNIV_INTERN sym_tab_t*	pars_sym_tab_global;
+sym_tab_t*	pars_sym_tab_global;
 
 /* Global variables used to denote certain reserved words, used in
 constructing the parsing tree */
 
-UNIV_INTERN pars_res_word_t	pars_to_char_token = {PARS_TO_CHAR_TOKEN};
-UNIV_INTERN pars_res_word_t	pars_to_number_token = {PARS_TO_NUMBER_TOKEN};
-UNIV_INTERN pars_res_word_t	pars_to_binary_token = {PARS_TO_BINARY_TOKEN};
-UNIV_INTERN pars_res_word_t	pars_binary_to_number_token = {PARS_BINARY_TO_NUMBER_TOKEN};
-UNIV_INTERN pars_res_word_t	pars_substr_token = {PARS_SUBSTR_TOKEN};
-UNIV_INTERN pars_res_word_t	pars_replstr_token = {PARS_REPLSTR_TOKEN};
-UNIV_INTERN pars_res_word_t	pars_concat_token = {PARS_CONCAT_TOKEN};
-UNIV_INTERN pars_res_word_t	pars_instr_token = {PARS_INSTR_TOKEN};
-UNIV_INTERN pars_res_word_t	pars_length_token = {PARS_LENGTH_TOKEN};
-UNIV_INTERN pars_res_word_t	pars_sysdate_token = {PARS_SYSDATE_TOKEN};
-UNIV_INTERN pars_res_word_t	pars_printf_token = {PARS_PRINTF_TOKEN};
-UNIV_INTERN pars_res_word_t	pars_assert_token = {PARS_ASSERT_TOKEN};
-UNIV_INTERN pars_res_word_t	pars_rnd_token = {PARS_RND_TOKEN};
-UNIV_INTERN pars_res_word_t	pars_rnd_str_token = {PARS_RND_STR_TOKEN};
-UNIV_INTERN pars_res_word_t	pars_count_token = {PARS_COUNT_TOKEN};
-UNIV_INTERN pars_res_word_t	pars_sum_token = {PARS_SUM_TOKEN};
-UNIV_INTERN pars_res_word_t	pars_distinct_token = {PARS_DISTINCT_TOKEN};
-UNIV_INTERN pars_res_word_t	pars_binary_token = {PARS_BINARY_TOKEN};
-UNIV_INTERN pars_res_word_t	pars_blob_token = {PARS_BLOB_TOKEN};
-UNIV_INTERN pars_res_word_t	pars_int_token = {PARS_INT_TOKEN};
-UNIV_INTERN pars_res_word_t	pars_bigint_token = {PARS_BIGINT_TOKEN};
-UNIV_INTERN pars_res_word_t	pars_char_token = {PARS_CHAR_TOKEN};
-UNIV_INTERN pars_res_word_t	pars_float_token = {PARS_FLOAT_TOKEN};
-UNIV_INTERN pars_res_word_t	pars_update_token = {PARS_UPDATE_TOKEN};
-UNIV_INTERN pars_res_word_t	pars_asc_token = {PARS_ASC_TOKEN};
-UNIV_INTERN pars_res_word_t	pars_desc_token = {PARS_DESC_TOKEN};
-UNIV_INTERN pars_res_word_t	pars_open_token = {PARS_OPEN_TOKEN};
-UNIV_INTERN pars_res_word_t	pars_close_token = {PARS_CLOSE_TOKEN};
-UNIV_INTERN pars_res_word_t	pars_share_token = {PARS_SHARE_TOKEN};
-UNIV_INTERN pars_res_word_t	pars_unique_token = {PARS_UNIQUE_TOKEN};
-UNIV_INTERN pars_res_word_t	pars_clustered_token = {PARS_CLUSTERED_TOKEN};
+pars_res_word_t	pars_to_char_token = {PARS_TO_CHAR_TOKEN};
+pars_res_word_t	pars_to_number_token = {PARS_TO_NUMBER_TOKEN};
+pars_res_word_t	pars_to_binary_token = {PARS_TO_BINARY_TOKEN};
+pars_res_word_t	pars_binary_to_number_token = {PARS_BINARY_TO_NUMBER_TOKEN};
+pars_res_word_t	pars_substr_token = {PARS_SUBSTR_TOKEN};
+pars_res_word_t	pars_replstr_token = {PARS_REPLSTR_TOKEN};
+pars_res_word_t	pars_concat_token = {PARS_CONCAT_TOKEN};
+pars_res_word_t	pars_instr_token = {PARS_INSTR_TOKEN};
+pars_res_word_t	pars_length_token = {PARS_LENGTH_TOKEN};
+pars_res_word_t	pars_sysdate_token = {PARS_SYSDATE_TOKEN};
+pars_res_word_t	pars_printf_token = {PARS_PRINTF_TOKEN};
+pars_res_word_t	pars_assert_token = {PARS_ASSERT_TOKEN};
+pars_res_word_t	pars_rnd_token = {PARS_RND_TOKEN};
+pars_res_word_t	pars_rnd_str_token = {PARS_RND_STR_TOKEN};
+pars_res_word_t	pars_count_token = {PARS_COUNT_TOKEN};
+pars_res_word_t	pars_sum_token = {PARS_SUM_TOKEN};
+pars_res_word_t	pars_distinct_token = {PARS_DISTINCT_TOKEN};
+pars_res_word_t	pars_binary_token = {PARS_BINARY_TOKEN};
+pars_res_word_t	pars_blob_token = {PARS_BLOB_TOKEN};
+pars_res_word_t	pars_int_token = {PARS_INT_TOKEN};
+pars_res_word_t	pars_bigint_token = {PARS_BIGINT_TOKEN};
+pars_res_word_t	pars_char_token = {PARS_CHAR_TOKEN};
+pars_res_word_t	pars_float_token = {PARS_FLOAT_TOKEN};
+pars_res_word_t	pars_update_token = {PARS_UPDATE_TOKEN};
+pars_res_word_t	pars_asc_token = {PARS_ASC_TOKEN};
+pars_res_word_t	pars_desc_token = {PARS_DESC_TOKEN};
+pars_res_word_t	pars_open_token = {PARS_OPEN_TOKEN};
+pars_res_word_t	pars_close_token = {PARS_CLOSE_TOKEN};
+pars_res_word_t	pars_share_token = {PARS_SHARE_TOKEN};
+pars_res_word_t	pars_unique_token = {PARS_UNIQUE_TOKEN};
+pars_res_word_t	pars_clustered_token = {PARS_CLUSTERED_TOKEN};
 
 /** Global variable used to denote the '*' in SELECT * FROM.. */
-UNIV_INTERN ulint	pars_star_denoter	= 12345678;
+ulint	pars_star_denoter	= 12345678;
 
 /********************************************************************
 Get user function with the given name.*/
@@ -188,7 +184,7 @@ pars_info_lookup_bound_lit(
 
 /*********************************************************************//**
 Determines the class of a function code.
-@return	function class: PARS_FUNC_ARITH, ... */
+@return function class: PARS_FUNC_ARITH, ... */
 static
 ulint
 pars_func_get_class(
@@ -233,7 +229,7 @@ pars_func_get_class(
 
 /*********************************************************************//**
 Parses an operator or predefined function expression.
-@return	own: function node in a query tree */
+@return own: function node in a query tree */
 static
 func_node_t*
 pars_func_low(
@@ -256,15 +252,14 @@ pars_func_low(
 
 	node->args = arg;
 
-	UT_LIST_ADD_LAST(func_node_list, pars_sym_tab_global->func_node_list,
-			 node);
+	UT_LIST_ADD_LAST(pars_sym_tab_global->func_node_list, node);
+
 	return(node);
 }
 
 /*********************************************************************//**
 Parses a function expression.
-@return	own: function node in a query tree */
-UNIV_INTERN
+@return own: function node in a query tree */
 func_node_t*
 pars_func(
 /*======*/
@@ -277,7 +272,6 @@ pars_func(
 /*************************************************************************
 Rebind a LIKE search string. NOTE: We ignore any '%' characters embedded
 within the search string.*/
-
 int
 pars_like_rebind(
 /*=============*/
@@ -300,9 +294,7 @@ pars_like_rebind(
 	}
 
 	/* Is this a '%STRING' or %STRING% ?*/
-	if (*ptr == '%') {
-		op = (op == IB_LIKE_PREFIX) ? IB_LIKE_SUBSTR : IB_LIKE_SUFFIX;
-	}
+	ut_ad(*ptr != '%');
 
 	if (node->like_node == NULL) {
 		/* Add the LIKE operator info node to the node list.
@@ -338,10 +330,8 @@ pars_like_rebind(
 		mach_read_from_4(static_cast<byte*>(dfield_get_data(dfield))));
 
 	switch (op_check) {
-	case	IB_LIKE_PREFIX:
-	case	IB_LIKE_SUFFIX:
-	case	IB_LIKE_SUBSTR:
-	case	IB_LIKE_EXACT:
+	case IB_LIKE_PREFIX:
+	case IB_LIKE_EXACT:
 		break;
 
 	default:
@@ -382,36 +372,6 @@ pars_like_rebind(
 		dfield_set_data(dfield, ptr, ptr_len - 1);
 		break;
 
-	case	IB_LIKE_SUFFIX:
-		func = PARS_LIKE_TOKEN_SUFFIX;
-
-		/* Modify the original node */
-		/* Make it an '' empty string */
-		dfield_set_len(dfield, 0);
-
-		dfield = que_node_get_val(str_node);
-		dtype = dfield_get_type(dfield);
-
-		ut_a(dtype_get_mtype(dtype) == DATA_VARCHAR);
-
-		dfield_set_data(dfield, ptr + 1, ptr_len - 1);
-		break;
-
-	case	IB_LIKE_SUBSTR:
-		func = PARS_LIKE_TOKEN_SUBSTR;
-
-		/* Modify the original node */
-		/* Make it an '' empty string */
-		dfield_set_len(dfield, 0);
-
-		dfield = que_node_get_val(str_node);
-		dtype = dfield_get_type(dfield);
-
-		ut_a(dtype_get_mtype(dtype) == DATA_VARCHAR);
-
-		dfield_set_data(dfield, ptr + 1, ptr_len - 2);
-		break;
-
 	default:
 		ut_error;
 	}
@@ -450,8 +410,7 @@ pars_like_op(
 }
 /*********************************************************************//**
 Parses an operator expression.
-@return	own: function node in a query tree */
-UNIV_INTERN
+@return own: function node in a query tree */
 func_node_t*
 pars_op(
 /*====*/
@@ -485,8 +444,7 @@ pars_op(
 
 /*********************************************************************//**
 Parses an ORDER BY clause. Order by a single column only is supported.
-@return	own: order-by node in a query tree */
-UNIV_INTERN
+@return own: order-by node in a query tree */
 order_node_t*
 pars_order_by(
 /*==========*/
@@ -516,7 +474,7 @@ pars_order_by(
 /*********************************************************************//**
 Determine if a data type is a built-in string data type of the InnoDB
 SQL parser.
-@return	TRUE if string data type */
+@return TRUE if string data type */
 static
 ibool
 pars_is_string_type(
@@ -715,8 +673,7 @@ pars_resolve_exp_variables_and_types(
 	sym_node->indirection = node;
 
 	if (select_node) {
-		UT_LIST_ADD_LAST(col_var_list, select_node->copy_variables,
-				 sym_node);
+		UT_LIST_ADD_LAST(select_node->copy_variables, sym_node);
 	}
 
 	dfield_set_type(que_node_get_val(sym_node),
@@ -868,7 +825,7 @@ pars_retrieve_table_def(
 
 /*********************************************************************//**
 Retrieves the table definitions for a list of table name ids.
-@return	number of tables */
+@return number of tables */
 static
 ulint
 pars_retrieve_table_list_defs(
@@ -935,8 +892,7 @@ pars_select_all_columns(
 /*********************************************************************//**
 Parses a select list; creates a query graph node for the whole SELECT
 statement.
-@return	own: select node in a query tree */
-UNIV_INTERN
+@return own: select node in a query tree */
 sel_node_t*
 pars_select_list(
 /*=============*/
@@ -1000,8 +956,7 @@ pars_check_aggregate(
 
 /*********************************************************************//**
 Parses a select statement.
-@return	own: select node in a query tree */
-UNIV_INTERN
+@return own: select node in a query tree */
 sel_node_t*
 pars_select_statement(
 /*==================*/
@@ -1029,7 +984,7 @@ pars_select_statement(
 		     == que_node_list_get_len(select_node->select_list));
 	}
 
-	UT_LIST_INIT(select_node->copy_variables);
+	UT_LIST_INIT(select_node->copy_variables, &sym_node_t::col_var_list);
 
 	pars_resolve_exp_list_columns(table_list, select_node->select_list);
 	pars_resolve_exp_list_variables_and_types(select_node,
@@ -1083,8 +1038,7 @@ pars_select_statement(
 
 /*********************************************************************//**
 Parses a cursor declaration.
-@return	sym_node */
-UNIV_INTERN
+@return sym_node */
 que_node_t*
 pars_cursor_declaration(
 /*====================*/
@@ -1104,8 +1058,7 @@ pars_cursor_declaration(
 
 /*********************************************************************//**
 Parses a function declaration.
-@return	sym_node */
-UNIV_INTERN
+@return sym_node */
 que_node_t*
 pars_function_declaration(
 /*======================*/
@@ -1124,8 +1077,7 @@ pars_function_declaration(
 
 /*********************************************************************//**
 Parses a delete or update statement start.
-@return	own: update node in a query tree */
-UNIV_INTERN
+@return own: update node in a query tree */
 upd_node_t*
 pars_update_statement_start(
 /*========================*/
@@ -1148,8 +1100,7 @@ pars_update_statement_start(
 
 /*********************************************************************//**
 Parses a column assignment in an update.
-@return	column assignment node */
-UNIV_INTERN
+@return column assignment node */
 col_assign_node_t*
 pars_column_assignment(
 /*===================*/
@@ -1262,8 +1213,7 @@ pars_process_assign_list(
 
 /*********************************************************************//**
 Parses an update or delete statement.
-@return	own: update node in a query tree */
-UNIV_INTERN
+@return own: update node in a query tree */
 upd_node_t*
 pars_update_statement(
 /*==================*/
@@ -1281,7 +1231,7 @@ pars_update_statement(
 	pars_retrieve_table_def(table_sym);
 	node->table = table_sym->table;
 
-	UT_LIST_INIT(node->columns);
+	UT_LIST_INIT(node->columns, &sym_node_t::col_var_list);
 
 	/* Make the single table node into a list of table nodes of length 1 */
 
@@ -1348,8 +1298,7 @@ pars_update_statement(
 
 /*********************************************************************//**
 Parses an insert statement.
-@return	own: update node in a query tree */
-UNIV_INTERN
+@return own: update node in a query tree */
 ins_node_t*
 pars_insert_statement(
 /*==================*/
@@ -1459,8 +1408,7 @@ pars_set_dfield_type(
 
 /*********************************************************************//**
 Parses a variable declaration.
-@return	own: symbol table node of type SYM_VAR */
-UNIV_INTERN
+@return own: symbol table node of type SYM_VAR */
 sym_node_t*
 pars_variable_declaration(
 /*======================*/
@@ -1480,8 +1428,7 @@ pars_variable_declaration(
 
 /*********************************************************************//**
 Parses a procedure parameter declaration.
-@return	own: symbol table node of type SYM_VAR */
-UNIV_INTERN
+@return own: symbol table node of type SYM_VAR */
 sym_node_t*
 pars_parameter_declaration(
 /*=======================*/
@@ -1523,8 +1470,7 @@ pars_set_parent_in_list(
 
 /*********************************************************************//**
 Parses an elsif element.
-@return	elsif node */
-UNIV_INTERN
+@return elsif node */
 elsif_node_t*
 pars_elsif_element(
 /*===============*/
@@ -1550,8 +1496,7 @@ pars_elsif_element(
 
 /*********************************************************************//**
 Parses an if-statement.
-@return	if-statement node */
-UNIV_INTERN
+@return if-statement node */
 if_node_t*
 pars_if_statement(
 /*==============*/
@@ -1604,8 +1549,7 @@ pars_if_statement(
 
 /*********************************************************************//**
 Parses a while-statement.
-@return	while-statement node */
-UNIV_INTERN
+@return while-statement node */
 while_node_t*
 pars_while_statement(
 /*=================*/
@@ -1633,8 +1577,7 @@ pars_while_statement(
 
 /*********************************************************************//**
 Parses a for-loop-statement.
-@return	for-statement node */
-UNIV_INTERN
+@return for-statement node */
 for_node_t*
 pars_for_statement(
 /*===============*/
@@ -1670,8 +1613,7 @@ pars_for_statement(
 
 /*********************************************************************//**
 Parses an exit statement.
-@return	exit statement node */
-UNIV_INTERN
+@return exit statement node */
 exit_node_t*
 pars_exit_statement(void)
 /*=====================*/
@@ -1687,8 +1629,7 @@ pars_exit_statement(void)
 
 /*********************************************************************//**
 Parses a return-statement.
-@return	return-statement node */
-UNIV_INTERN
+@return return-statement node */
 return_node_t*
 pars_return_statement(void)
 /*=======================*/
@@ -1705,8 +1646,7 @@ pars_return_statement(void)
 
 /*********************************************************************//**
 Parses an assignment statement.
-@return	assignment statement node */
-UNIV_INTERN
+@return assignment statement node */
 assign_node_t*
 pars_assignment_statement(
 /*======================*/
@@ -1734,8 +1674,7 @@ pars_assignment_statement(
 
 /*********************************************************************//**
 Parses a procedure call.
-@return	function node */
-UNIV_INTERN
+@return function node */
 func_node_t*
 pars_procedure_call(
 /*================*/
@@ -1754,8 +1693,7 @@ pars_procedure_call(
 /*********************************************************************//**
 Parses a fetch statement. into_list or user_func (but not both) must be
 non-NULL.
-@return	fetch statement node */
-UNIV_INTERN
+@return fetch statement node */
 fetch_node_t*
 pars_fetch_statement(
 /*=================*/
@@ -1808,8 +1746,7 @@ pars_fetch_statement(
 
 /*********************************************************************//**
 Parses an open or close cursor statement.
-@return	fetch statement node */
-UNIV_INTERN
+@return fetch statement node */
 open_node_t*
 pars_open_statement(
 /*================*/
@@ -1840,8 +1777,7 @@ pars_open_statement(
 
 /*********************************************************************//**
 Parses a row_printf-statement.
-@return	row_printf-statement node */
-UNIV_INTERN
+@return row_printf-statement node */
 row_printf_node_t*
 pars_row_printf_statement(
 /*======================*/
@@ -1863,8 +1799,7 @@ pars_row_printf_statement(
 
 /*********************************************************************//**
 Parses a commit statement.
-@return	own: commit node struct */
-UNIV_INTERN
+@return own: commit node struct */
 commit_node_t*
 pars_commit_statement(void)
 /*=======================*/
@@ -1874,8 +1809,7 @@ pars_commit_statement(void)
 
 /*********************************************************************//**
 Parses a rollback statement.
-@return	own: rollback node struct */
-UNIV_INTERN
+@return own: rollback node struct */
 roll_node_t*
 pars_rollback_statement(void)
 /*=========================*/
@@ -1885,8 +1819,7 @@ pars_rollback_statement(void)
 
 /*********************************************************************//**
 Parses a column definition at a table creation.
-@return	column sym table node */
-UNIV_INTERN
+@return column sym table node */
 sym_node_t*
 pars_column_def(
 /*============*/
@@ -1916,8 +1849,7 @@ pars_column_def(
 
 /*********************************************************************//**
 Parses a table creation operation.
-@return	table create subgraph */
-UNIV_INTERN
+@return table create subgraph */
 tab_node_t*
 pars_create_table(
 /*==============*/
@@ -1959,7 +1891,7 @@ pars_create_table(
 		on global variables. There is an inherent race here but
 		that has always existed around this variable. */
 		if (srv_file_per_table) {
-			flags2 |= DICT_TF2_USE_TABLESPACE;
+			flags2 |= DICT_TF2_USE_FILE_PER_TABLE;
 		}
 	}
 
@@ -1998,7 +1930,7 @@ pars_create_table(
 	n_cols = que_node_list_get_len(column_defs);
 
 	table = dict_mem_table_create(
-		table_sym->name, 0, n_cols, flags, flags2);
+		table_sym->name, 0, n_cols, 0, flags, flags2);
 
 #ifdef UNIV_DEBUG
 	if (not_fit_in_memory != NULL) {
@@ -2019,7 +1951,7 @@ pars_create_table(
 		column = static_cast<sym_node_t*>(que_node_get_next(column));
 	}
 
-	node = tab_create_graph_create(table, pars_sym_tab_global->heap, true,
+	node = tab_create_graph_create(table, pars_sym_tab_global->heap,
 		FIL_SPACE_ENCRYPTION_DEFAULT, FIL_DEFAULT_ENCRYPTION_KEY);
 
 	table_sym->resolved = TRUE;
@@ -2030,8 +1962,7 @@ pars_create_table(
 
 /*********************************************************************//**
 Parses an index creation operation.
-@return	index create subgraph */
-UNIV_INTERN
+@return index create subgraph */
 ind_node_t*
 pars_create_index(
 /*==============*/
@@ -2074,7 +2005,7 @@ pars_create_index(
 		column = static_cast<sym_node_t*>(que_node_get_next(column));
 	}
 
-	node = ind_create_graph_create(index, pars_sym_tab_global->heap, true);
+	node = ind_create_graph_create(index, pars_sym_tab_global->heap, NULL);
 
 	table_sym->resolved = TRUE;
 	table_sym->token_type = SYM_TABLE;
@@ -2087,8 +2018,7 @@ pars_create_index(
 
 /*********************************************************************//**
 Parses a procedure definition.
-@return	query fork node */
-UNIV_INTERN
+@return query fork node */
 que_fork_t*
 pars_procedure_definition(
 /*======================*/
@@ -2107,7 +2037,7 @@ pars_procedure_definition(
 	fork = que_fork_create(NULL, NULL, QUE_FORK_PROCEDURE, heap);
 	fork->trx = NULL;
 
-	thr = que_thr_create(fork, heap);
+	thr = que_thr_create(fork, heap, NULL);
 
 	node = static_cast<proc_node_t*>(
 		mem_heap_alloc(heap, sizeof(proc_node_t)));
@@ -2138,8 +2068,7 @@ Parses a stored procedure call, when this is not within another stored
 procedure, that is, the client issues a procedure call directly.
 In MySQL/InnoDB, stored InnoDB procedures are invoked via the
 parsed procedure tree, not via InnoDB SQL, so this function is not used.
-@return	query graph */
-UNIV_INTERN
+@return query graph */
 que_fork_t*
 pars_stored_procedure_call(
 /*=======================*/
@@ -2152,7 +2081,6 @@ pars_stored_procedure_call(
 
 /*************************************************************//**
 Retrieves characters to the lexical analyzer. */
-UNIV_INTERN
 int
 pars_get_lex_chars(
 /*===============*/
@@ -2166,9 +2094,6 @@ pars_get_lex_chars(
 		pars_sym_tab_global->string_len
 		- pars_sym_tab_global->next_char_pos);
 	if (len == 0) {
-#ifdef YYDEBUG
-		/* fputs("SQL string ends\n", stderr); */
-#endif
 		return(0);
 	}
 
@@ -2176,19 +2101,6 @@ pars_get_lex_chars(
 		len = max_size;
 	}
 
-#ifdef UNIV_SQL_DEBUG
-	if (pars_print_lexed) {
-
-		if (len >= 5) {
-			len = 5;
-		}
-
-		fwrite(pars_sym_tab_global->sql_string
-		       + pars_sym_tab_global->next_char_pos,
-		       1, len, stderr);
-	}
-#endif /* UNIV_SQL_DEBUG */
-
 	ut_memcpy(buf, pars_sym_tab_global->sql_string
 		  + pars_sym_tab_global->next_char_pos, len);
 
@@ -2199,7 +2111,6 @@ pars_get_lex_chars(
 
 /*************************************************************//**
 Called by yyparse on error. */
-UNIV_INTERN
 void
 yyerror(
 /*====*/
@@ -2208,15 +2119,12 @@ yyerror(
 {
 	ut_ad(s);
 
-	fputs("PARSER ERROR: Syntax error in SQL string\n", stderr);
-
-	ut_error;
+	ib::fatal() << "PARSER: Syntax error in SQL string";
 }
 
 /*************************************************************//**
 Parses an SQL string returning the query graph.
-@return	own: the query graph */
-UNIV_INTERN
+@return own: the query graph */
 que_t*
 pars_sql(
 /*=====*/
@@ -2232,7 +2140,7 @@ pars_sql(
 	heap = mem_heap_create(16000);
 
 	/* Currently, the parser is not reentrant: */
-	ut_ad(mutex_own(&(dict_sys->mutex)));
+	ut_ad(mutex_own(&dict_sys->mutex));
 
 	pars_sym_tab_global = sym_tab_create(heap);
 
@@ -2264,19 +2172,21 @@ pars_sql(
 	return(graph);
 }
 
-/******************************************************************//**
-Completes a query graph by adding query thread and fork nodes
+/** Completes a query graph by adding query thread and fork nodes
 above it and prepares the graph for running. The fork created is of
 type QUE_FORK_MYSQL_INTERFACE.
-@return	query thread node to run */
-UNIV_INTERN
+@param[in]	node		root node for an incomplete query
+				graph, or NULL for dummy graph
+@param[in]	trx		transaction handle
+@param[in]	heap		memory heap from which allocated
+@param[in]	prebuilt	row prebuilt structure
+@return query thread node to run */
 que_thr_t*
 pars_complete_graph_for_exec(
-/*=========================*/
-	que_node_t*	node,	/*!< in: root node for an incomplete
-				query graph, or NULL for dummy graph */
-	trx_t*		trx,	/*!< in: transaction handle */
-	mem_heap_t*	heap)	/*!< in: memory heap from which allocated */
+	que_node_t*	node,
+	trx_t*		trx,
+	mem_heap_t*	heap,
+	row_prebuilt_t*	prebuilt)
 {
 	que_fork_t*	fork;
 	que_thr_t*	thr;
@@ -2284,7 +2194,7 @@ pars_complete_graph_for_exec(
 	fork = que_fork_create(NULL, NULL, QUE_FORK_MYSQL_INTERFACE, heap);
 	fork->trx = trx;
 
-	thr = que_thr_create(fork, heap);
+	thr = que_thr_create(fork, heap, prebuilt);
 
 	thr->child = node;
 
@@ -2299,8 +2209,7 @@ pars_complete_graph_for_exec(
 
 /****************************************************************//**
 Create parser info struct.
-@return	own: info struct */
-UNIV_INTERN
+@return own: info struct */
 pars_info_t*
 pars_info_create(void)
 /*==================*/
@@ -2323,7 +2232,6 @@ pars_info_create(void)
 
 /****************************************************************//**
 Free info struct and everything it contains. */
-UNIV_INTERN
 void
 pars_info_free(
 /*===========*/
@@ -2334,7 +2242,6 @@ pars_info_free(
 
 /****************************************************************//**
 Add bound literal. */
-UNIV_INTERN
 void
 pars_info_add_literal(
 /*==================*/
@@ -2374,7 +2281,6 @@ pars_info_add_literal(
 /****************************************************************//**
 Equivalent to pars_info_add_literal(info, name, str, strlen(str),
 DATA_VARCHAR, DATA_ENGLISH). */
-UNIV_INTERN
 void
 pars_info_add_str_literal(
 /*======================*/
@@ -2389,7 +2295,6 @@ pars_info_add_str_literal(
 /********************************************************************
 If the literal value already exists then it rebinds otherwise it
 creates a new entry.*/
-UNIV_INTERN
 void
 pars_info_bind_literal(
 /*===================*/
@@ -2418,7 +2323,6 @@ pars_info_bind_literal(
 /********************************************************************
 If the literal value already exists then it rebinds otherwise it
 creates a new entry.*/
-UNIV_INTERN
 void
 pars_info_bind_varchar_literal(
 /*===========================*/
@@ -2452,7 +2356,6 @@ pars_info_add_literal(info, name, buf, 4, DATA_INT, 0);
 
 except that the buffer is dynamically allocated from the info struct's
 heap. */
-UNIV_INTERN
 void
 pars_info_add_int4_literal(
 /*=======================*/
@@ -2469,7 +2372,6 @@ pars_info_add_int4_literal(
 /********************************************************************
 If the literal value already exists then it rebinds otherwise it
 creates a new entry. */
-UNIV_INTERN
 void
 pars_info_bind_int4_literal(
 /*========================*/
@@ -2495,7 +2397,6 @@ pars_info_bind_int4_literal(
 /********************************************************************
 If the literal value already exists then it rebinds otherwise it
 creates a new entry. */
-UNIV_INTERN
 void
 pars_info_bind_int8_literal(
 /*========================*/
@@ -2528,7 +2429,6 @@ pars_info_add_literal(info, name, buf, 8, DATA_FIXBINARY, 0);
 
 except that the buffer is dynamically allocated from the info struct's
 heap. */
-UNIV_INTERN
 void
 pars_info_add_ull_literal(
 /*======================*/
@@ -2546,7 +2446,6 @@ pars_info_add_ull_literal(
 /****************************************************************//**
 If the literal value already exists then it rebinds otherwise it
 creates a new entry. */
-UNIV_INTERN
 void
 pars_info_bind_ull_literal(
 /*=======================*/
@@ -2572,7 +2471,6 @@ pars_info_bind_ull_literal(
 
 /****************************************************************//**
 Add user function. */
-UNIV_INTERN
 void
 pars_info_bind_function(
 /*====================*/
@@ -2607,7 +2505,6 @@ pars_info_bind_function(
 
 /********************************************************************
 Add bound id. */
-UNIV_INTERN
 void
 pars_info_bind_id(
 /*==============*/
@@ -2644,7 +2541,6 @@ pars_info_bind_id(
 
 /********************************************************************
 Get bound identifier with the given name.*/
-
 pars_bound_id_t*
 pars_info_get_bound_id(
 /*===================*/
@@ -2658,8 +2554,7 @@ pars_info_get_bound_id(
 
 /****************************************************************//**
 Get bound literal with the given name.
-@return	bound literal, or NULL if not found */
-UNIV_INTERN
+@return bound literal, or NULL if not found */
 pars_bound_lit_t*
 pars_info_get_bound_lit(
 /*====================*/
diff --git a/storage/innobase/pars/pars0sym.cc b/storage/innobase/pars/pars0sym.cc
index b01a69cb33a..b6305368362 100644
--- a/storage/innobase/pars/pars0sym.cc
+++ b/storage/innobase/pars/pars0sym.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -40,8 +40,7 @@ Created 12/15/1997 Heikki Tuuri
 
 /******************************************************************//**
 Creates a symbol table for a single stored procedure or query.
-@return	own: symbol table */
-UNIV_INTERN
+@return own: symbol table */
 sym_tab_t*
 sym_tab_create(
 /*===========*/
@@ -52,8 +51,8 @@ sym_tab_create(
 	sym_tab = static_cast<sym_tab_t*>(
 		mem_heap_alloc(heap, sizeof(sym_tab_t)));
 
-	UT_LIST_INIT(sym_tab->sym_list);
-	UT_LIST_INIT(sym_tab->func_node_list);
+	UT_LIST_INIT(sym_tab->sym_list, &sym_node_t::sym_list);
+	UT_LIST_INIT(sym_tab->func_node_list, &func_node_t::func_node_list);
 
 	sym_tab->heap = heap;
 
@@ -65,7 +64,6 @@ sym_tab_create(
 Frees the memory allocated dynamically AFTER parsing phase for variables
 etc. in the symbol table. Does not free the mem heap where the table was
 originally created. Frees also SQL explicit cursor definitions. */
-UNIV_INTERN
 void
 sym_tab_free_private(
 /*=================*/
@@ -112,8 +110,7 @@ sym_tab_free_private(
 
 /******************************************************************//**
 Adds an integer literal to a symbol table.
-@return	symbol table node */
-UNIV_INTERN
+@return symbol table node */
 sym_node_t*
 sym_tab_add_int_lit(
 /*================*/
@@ -145,7 +142,7 @@ sym_tab_add_int_lit(
 	node->prefetch_buf = NULL;
 	node->cursor_def = NULL;
 
-	UT_LIST_ADD_LAST(sym_list, sym_tab->sym_list, node);
+	UT_LIST_ADD_LAST(sym_tab->sym_list, node);
 
 	node->like_node = NULL;
 
@@ -156,8 +153,7 @@ sym_tab_add_int_lit(
 
 /******************************************************************//**
 Adds a string literal to a symbol table.
-@return	symbol table node */
-UNIV_INTERN
+@return symbol table node */
 sym_node_t*
 sym_tab_add_str_lit(
 /*================*/
@@ -184,7 +180,7 @@ sym_tab_add_str_lit(
 		  DATA_VARCHAR, DATA_ENGLISH, 0);
 
 	data = (len) ? static_cast<byte*>(mem_heap_dup(sym_tab->heap, str, len))
-	      	     : NULL;
+		: NULL;
 
 	dfield_set_data(&(node->common.val), data, len);
 
@@ -192,7 +188,7 @@ sym_tab_add_str_lit(
 	node->prefetch_buf = NULL;
 	node->cursor_def = NULL;
 
-	UT_LIST_ADD_LAST(sym_list, sym_tab->sym_list, node);
+	UT_LIST_ADD_LAST(sym_tab->sym_list, node);
 
 	node->like_node = NULL;
 
@@ -203,8 +199,7 @@ sym_tab_add_str_lit(
 
 /******************************************************************//**
 Add a bound literal to a symbol table.
-@return	symbol table node */
-UNIV_INTERN
+@return symbol table node */
 sym_node_t*
 sym_tab_add_bound_lit(
 /*==================*/
@@ -273,7 +268,7 @@ sym_tab_add_bound_lit(
 	node->prefetch_buf = NULL;
 	node->cursor_def = NULL;
 
-	UT_LIST_ADD_LAST(sym_list, sym_tab->sym_list, node);
+	UT_LIST_ADD_LAST(sym_tab->sym_list, node);
 
 	blit->node = node;
 	node->like_node = NULL;
@@ -284,7 +279,6 @@ sym_tab_add_bound_lit(
 
 /**********************************************************************
 Rebind literal to a node in the symbol table. */
-
 sym_node_t*
 sym_tab_rebind_lit(
 /*===============*/
@@ -328,8 +322,7 @@ sym_tab_rebind_lit(
 
 /******************************************************************//**
 Adds an SQL null literal to a symbol table.
-@return	symbol table node */
-UNIV_INTERN
+@return symbol table node */
 sym_node_t*
 sym_tab_add_null_lit(
 /*=================*/
@@ -356,7 +349,7 @@ sym_tab_add_null_lit(
 	node->prefetch_buf = NULL;
 	node->cursor_def = NULL;
 
-	UT_LIST_ADD_LAST(sym_list, sym_tab->sym_list, node);
+	UT_LIST_ADD_LAST(sym_tab->sym_list, node);
 
 	node->like_node = NULL;
 
@@ -367,8 +360,7 @@ sym_tab_add_null_lit(
 
 /******************************************************************//**
 Adds an identifier to a symbol table.
-@return	symbol table node */
-UNIV_INTERN
+@return symbol table node */
 sym_node_t*
 sym_tab_add_id(
 /*===========*/
@@ -386,7 +378,7 @@ sym_tab_add_id(
 	node->name = mem_heap_strdupl(sym_tab->heap, (char*) name, len);
 	node->name_len = len;
 
-	UT_LIST_ADD_LAST(sym_list, sym_tab->sym_list, node);
+	UT_LIST_ADD_LAST(sym_tab->sym_list, node);
 
 	dfield_set_null(&node->common.val);
 
@@ -397,8 +389,7 @@ sym_tab_add_id(
 
 /******************************************************************//**
 Add a bound identifier to a symbol table.
-@return	symbol table node */
-UNIV_INTERN
+@return symbol table node */
 sym_node_t*
 sym_tab_add_bound_id(
 /*=================*/
@@ -424,7 +415,7 @@ sym_tab_add_bound_id(
 	node->name = mem_heap_strdup(sym_tab->heap, bid->id);
 	node->name_len = strlen(node->name);
 
-	UT_LIST_ADD_LAST(sym_list, sym_tab->sym_list, node);
+	UT_LIST_ADD_LAST(sym_tab->sym_list, node);
 
 	dfield_set_null(&node->common.val);
 
diff --git a/storage/innobase/que/que0que.cc b/storage/innobase/que/que0que.cc
index 957a90e71b3..8d3e8cfa115 100644
--- a/storage/innobase/que/que0que.cc
+++ b/storage/innobase/que/que0que.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -23,6 +23,8 @@ Query graph
 Created 5/27/1996 Heikki Tuuri
 *******************************************************/
 
+#include "ha_prototypes.h"
+
 #include "que0que.h"
 
 #ifdef UNIV_NONINL
@@ -46,12 +48,6 @@ Created 5/27/1996 Heikki Tuuri
 
 #define QUE_MAX_LOOPS_WITHOUT_CHECK	16
 
-#ifdef UNIV_DEBUG
-/* If the following flag is set TRUE, the module will print trace info
-of SQL execution in the UNIV_SQL_DEBUG version */
-UNIV_INTERN ibool	que_trace_on		= FALSE;
-#endif /* UNIV_DEBUG */
-
 /* Short introduction to query graphs
    ==================================
 
@@ -123,8 +119,7 @@ que_thr_move_to_run_state(
 
 /***********************************************************************//**
 Creates a query graph fork node.
-@return	own: fork node */
-UNIV_INTERN
+@return own: fork node */
 que_fork_t*
 que_fork_create(
 /*============*/
@@ -153,22 +148,27 @@ que_fork_create(
 
 	fork->graph = (graph != NULL) ? graph : fork;
 
+	UT_LIST_INIT(fork->thrs, &que_thr_t::thrs);
+
 	return(fork);
 }
 
-/***********************************************************************//**
-Creates a query graph thread node.
-@return	own: query thread node */
-UNIV_INTERN
+
+/** Creates a query graph thread node.
+@param[in]	parent		parent node, i.e., a fork node
+@param[in]	heap		memory heap where created
+@param[in]	prebuilt	row prebuilt structure
+@return own: query thread node */
 que_thr_t*
 que_thr_create(
-/*===========*/
-	que_fork_t*	parent,	/*!< in: parent node, i.e., a fork node */
-	mem_heap_t*	heap)	/*!< in: memory heap where created */
+	que_fork_t*	parent,
+	mem_heap_t*	heap,
+	row_prebuilt_t*	prebuilt)
 {
 	que_thr_t*	thr;
 
-	ut_ad(parent && heap);
+	ut_ad(parent != NULL);
+	ut_ad(heap != NULL);
 
 	thr = static_cast<que_thr_t*>(mem_heap_zalloc(heap, sizeof(*thr)));
 
@@ -184,7 +184,9 @@ que_thr_create(
 
 	thr->lock_state = QUE_THR_LOCK_NOLOCK;
 
-	UT_LIST_ADD_LAST(thrs, parent->thrs, thr);
+	thr->prebuilt = prebuilt;
+
+	UT_LIST_ADD_LAST(parent->thrs, thr);
 
 	return(thr);
 }
@@ -195,12 +197,11 @@ a worker thread to execute it. This function should be used to end
 the wait state of a query thread waiting for a lock or a stored procedure
 completion.
 @return the query thread that needs to be released. */
-UNIV_INTERN
 que_thr_t*
 que_thr_end_lock_wait(
 /*==================*/
 	trx_t*		trx)	/*!< in: transaction with que_state in
-		       		QUE_THR_LOCK_WAIT */
+				QUE_THR_LOCK_WAIT */
 {
 	que_thr_t*	thr;
 	ibool		was_active;
@@ -249,7 +250,6 @@ Round robin scheduler.
 @return a query thread of the graph moved to QUE_THR_RUNNING state, or
 NULL; the query thread should be executed by que_run_threads by the
 caller */
-UNIV_INTERN
 que_thr_t*
 que_fork_scheduler_round_robin(
 /*===========================*/
@@ -299,7 +299,6 @@ is returned.
 @return a query thread of the graph moved to QUE_THR_RUNNING state, or
 NULL; the query thread should be executed by que_run_threads by the
 caller */
-UNIV_INTERN
 que_thr_t*
 que_fork_start_command(
 /*===================*/
@@ -358,9 +357,10 @@ que_fork_start_command(
 
 			break;
 
+		case QUE_THR_RUNNING:
 		case QUE_THR_LOCK_WAIT:
+		case QUE_THR_PROCEDURE_WAIT:
 			ut_error;
-
 		}
 	}
 
@@ -398,7 +398,6 @@ que_graph_free_stat_list(
 /**********************************************************************//**
 Frees a query graph, but not the heap where it was created. Does not free
 explicit cursor declarations, they are freed in que_graph_free. */
-UNIV_INTERN
 void
 que_graph_free_recursive(
 /*=====================*/
@@ -414,11 +413,16 @@ que_graph_free_recursive(
 	ind_node_t*	cre_ind;
 	purge_node_t*	purge;
 
+	DBUG_ENTER("que_graph_free_recursive");
+
 	if (node == NULL) {
 
-		return;
+		DBUG_VOID_RETURN;
 	}
 
+	DBUG_PRINT("que_graph_free_recursive",
+		   ("node: %p, type: %lu", node, que_node_get_type(node)));
+
 	switch (que_node_get_type(node)) {
 
 	case QUE_NODE_FORK:
@@ -437,14 +441,7 @@ que_graph_free_recursive(
 
 		thr = static_cast<que_thr_t*>(node);
 
-		if (thr->magic_n != QUE_THR_MAGIC_N) {
-			fprintf(stderr,
-				"que_thr struct appears corrupt;"
-				" magic n %lu\n",
-				(unsigned long) thr->magic_n);
-			mem_analyze_corruption(thr);
-			ut_error;
-		}
+		ut_a(thr->magic_n == QUE_THR_MAGIC_N);
 
 		thr->magic_n = QUE_THR_MAGIC_FREED;
 
@@ -470,8 +467,12 @@ que_graph_free_recursive(
 		ins = static_cast<ins_node_t*>(node);
 
 		que_graph_free_recursive(ins->select);
+		ins->select = NULL;
 
-		mem_heap_free(ins->entry_sys_heap);
+		if (ins->entry_sys_heap != NULL) {
+			mem_heap_free(ins->entry_sys_heap);
+			ins->entry_sys_heap = NULL;
+		}
 
 		break;
 	case QUE_NODE_PURGE:
@@ -482,23 +483,31 @@ que_graph_free_recursive(
 		break;
 
 	case QUE_NODE_UPDATE:
-
 		upd = static_cast<upd_node_t*>(node);
 
+		DBUG_PRINT("que_graph_free_recursive",
+			   ("QUE_NODE_UPDATE: %p, processed_cascades: %p",
+			    upd, upd->processed_cascades));
+
 		if (upd->in_mysql_interface) {
 
 			btr_pcur_free_for_mysql(upd->pcur);
+			upd->in_mysql_interface = FALSE;
 		}
 
-		que_graph_free_recursive(upd->cascade_node);
-
-		if (upd->cascade_heap) {
+		if (upd->cascade_top) {
 			mem_heap_free(upd->cascade_heap);
+			upd->cascade_heap = NULL;
+			upd->cascade_top = false;
 		}
 
 		que_graph_free_recursive(upd->select);
+		upd->select = NULL;
 
-		mem_heap_free(upd->heap);
+		if (upd->heap != NULL) {
+			mem_heap_free(upd->heap);
+			upd->heap = NULL;
+		}
 
 		break;
 	case QUE_NODE_CREATE_TABLE:
@@ -506,7 +515,7 @@ que_graph_free_recursive(
 
 		que_graph_free_recursive(cre_tab->tab_def);
 		que_graph_free_recursive(cre_tab->col_def);
-		que_graph_free_recursive(cre_tab->commit_node);
+		que_graph_free_recursive(cre_tab->v_col_def);
 
 		mem_heap_free(cre_tab->heap);
 
@@ -516,7 +525,6 @@ que_graph_free_recursive(
 
 		que_graph_free_recursive(cre_ind->ind_def);
 		que_graph_free_recursive(cre_ind->field_def);
-		que_graph_free_recursive(cre_ind->commit_node);
 
 		mem_heap_free(cre_ind->heap);
 
@@ -559,17 +567,14 @@ que_graph_free_recursive(
 
 		break;
 	default:
-		fprintf(stderr,
-			"que_node struct appears corrupt; type %lu\n",
-			(unsigned long) que_node_get_type(node));
-		mem_analyze_corruption(node);
 		ut_error;
 	}
+
+	DBUG_VOID_RETURN;
 }
 
 /**********************************************************************//**
 Frees a query graph. */
-UNIV_INTERN
 void
 que_graph_free(
 /*===========*/
@@ -600,7 +605,7 @@ que_graph_free(
 
 /****************************************************************//**
 Performs an execution step on a thr node.
-@return	query thread to run next, or NULL if none */
+@return query thread to run next, or NULL if none */
 static
 que_thr_t*
 que_thr_node_step(
@@ -669,8 +674,7 @@ que_thr_move_to_run_state(
 /**********************************************************************//**
 Stops a query thread if graph or trx is in a state requiring it. The
 conditions are tested in the order (1) graph, (2) trx.
-@return	TRUE if stopped */
-UNIV_INTERN
+@return TRUE if stopped */
 ibool
 que_thr_stop(
 /*=========*/
@@ -692,6 +696,10 @@ que_thr_stop(
 		trx->lock.wait_thr = thr;
 		thr->state = QUE_THR_LOCK_WAIT;
 
+	} else if (trx->duplicates && trx->error_state == DB_DUPLICATE_KEY) {
+
+		return(FALSE);
+
 	} else if (trx->error_state != DB_SUCCESS
 		   && trx->error_state != DB_LOCK_WAIT) {
 
@@ -781,7 +789,6 @@ A patch for MySQL used to 'stop' a dummy query thread used in MySQL. The
 query thread is stopped and made inactive, except in the case where
 it was put to the lock wait state in lock0lock.cc, but the lock has already
 been granted or the transaction chosen as a victim in deadlock resolution. */
-UNIV_INTERN
 void
 que_thr_stop_for_mysql(
 /*===================*/
@@ -791,9 +798,6 @@ que_thr_stop_for_mysql(
 
 	trx = thr_get_trx(thr);
 
-	/* Can't be the purge transaction. */
-	ut_a(trx->id != 0);
-
 	trx_mutex_enter(trx);
 
 	if (thr->state == QUE_THR_RUNNING) {
@@ -830,22 +834,13 @@ que_thr_stop_for_mysql(
 Moves a thread from another state to the QUE_THR_RUNNING state. Increments
 the n_active_thrs counters of the query graph and transaction if thr was
 not active. */
-UNIV_INTERN
 void
 que_thr_move_to_run_state_for_mysql(
 /*================================*/
 	que_thr_t*	thr,	/*!< in: an query thread */
 	trx_t*		trx)	/*!< in: transaction */
 {
-	if (thr->magic_n != QUE_THR_MAGIC_N) {
-		fprintf(stderr,
-			"que_thr struct appears corrupt; magic n %lu\n",
-			(unsigned long) thr->magic_n);
-
-		mem_analyze_corruption(thr);
-
-		ut_error;
-	}
+	ut_a(thr->magic_n == QUE_THR_MAGIC_N);
 
 	if (!thr->is_active) {
 
@@ -862,7 +857,6 @@ que_thr_move_to_run_state_for_mysql(
 /**********************************************************************//**
 A patch for MySQL used to 'stop' a dummy query thread used in MySQL
 select, when there is no error or lock wait. */
-UNIV_INTERN
 void
 que_thr_stop_for_mysql_no_error(
 /*============================*/
@@ -870,20 +864,10 @@ que_thr_stop_for_mysql_no_error(
 	trx_t*		trx)	/*!< in: transaction */
 {
 	ut_ad(thr->state == QUE_THR_RUNNING);
-	ut_ad(thr_get_trx(thr)->id != 0);
 	ut_ad(thr->is_active == TRUE);
 	ut_ad(trx->lock.n_active_thrs == 1);
 	ut_ad(thr->graph->n_active_thrs == 1);
-
-	if (thr->magic_n != QUE_THR_MAGIC_N) {
-		fprintf(stderr,
-			"que_thr struct appears corrupt; magic n %lu\n",
-			(unsigned long) thr->magic_n);
-
-		mem_analyze_corruption(thr);
-
-		ut_error;
-	}
+	ut_a(thr->magic_n == QUE_THR_MAGIC_N);
 
 	thr->state = QUE_THR_COMPLETED;
 
@@ -896,8 +880,7 @@ que_thr_stop_for_mysql_no_error(
 /****************************************************************//**
 Get the first containing loop node (e.g. while_node_t or for_node_t) for the
 given node, or NULL if the node is not within a loop.
-@return	containing loop node, or NULL. */
-UNIV_INTERN
+@return containing loop node, or NULL. */
 que_node_t*
 que_node_get_containing_loop_node(
 /*==============================*/
@@ -924,68 +907,64 @@ que_node_get_containing_loop_node(
 	return(node);
 }
 
-/**********************************************************************//**
-Prints info of an SQL query graph node. */
-UNIV_INTERN
-void
-que_node_print_info(
-/*================*/
-	que_node_t*	node)	/*!< in: query graph node */
+#ifndef DBUG_OFF
+/** Gets information of an SQL query graph node.
+@return type description */
+static MY_ATTRIBUTE((warn_unused_result, nonnull))
+const char*
+que_node_type_string(
+/*=================*/
+	const que_node_t*	node)	/*!< in: query graph node */
 {
-	ulint		type;
-	const char*	str;
-
-	type = que_node_get_type(node);
-
-	if (type == QUE_NODE_SELECT) {
-		str = "SELECT";
-	} else if (type == QUE_NODE_INSERT) {
-		str = "INSERT";
-	} else if (type == QUE_NODE_UPDATE) {
-		str = "UPDATE";
-	} else if (type == QUE_NODE_WHILE) {
-		str = "WHILE";
-	} else if (type == QUE_NODE_ASSIGNMENT) {
-		str = "ASSIGNMENT";
-	} else if (type == QUE_NODE_IF) {
-		str = "IF";
-	} else if (type == QUE_NODE_FETCH) {
-		str = "FETCH";
-	} else if (type == QUE_NODE_OPEN) {
-		str = "OPEN";
-	} else if (type == QUE_NODE_PROC) {
-		str = "STORED PROCEDURE";
-	} else if (type == QUE_NODE_FUNC) {
-		str = "FUNCTION";
-	} else if (type == QUE_NODE_LOCK) {
-		str = "LOCK";
-	} else if (type == QUE_NODE_THR) {
-		str = "QUERY THREAD";
-	} else if (type == QUE_NODE_COMMIT) {
-		str = "COMMIT";
-	} else if (type == QUE_NODE_UNDO) {
-		str = "UNDO ROW";
-	} else if (type == QUE_NODE_PURGE) {
-		str = "PURGE ROW";
-	} else if (type == QUE_NODE_ROLLBACK) {
-		str = "ROLLBACK";
-	} else if (type == QUE_NODE_CREATE_TABLE) {
-		str = "CREATE TABLE";
-	} else if (type == QUE_NODE_CREATE_INDEX) {
-		str = "CREATE INDEX";
-	} else if (type == QUE_NODE_FOR) {
-		str = "FOR LOOP";
-	} else if (type == QUE_NODE_RETURN) {
-		str = "RETURN";
-	} else if (type == QUE_NODE_EXIT) {
-		str = "EXIT";
-	} else {
-		str = "UNKNOWN NODE TYPE";
+	switch (que_node_get_type(node)) {
+	case QUE_NODE_SELECT:
+		return("SELECT");
+	case QUE_NODE_INSERT:
+		return("INSERT");
+	case QUE_NODE_UPDATE:
+		return("UPDATE");
+	case QUE_NODE_WHILE:
+		return("WHILE");
+	case QUE_NODE_ASSIGNMENT:
+		return("ASSIGNMENT");
+	case QUE_NODE_IF:
+		return("IF");
+	case QUE_NODE_FETCH:
+		return("FETCH");
+	case QUE_NODE_OPEN:
+		return("OPEN");
+	case QUE_NODE_PROC:
+		return("STORED PROCEDURE");
+	case QUE_NODE_FUNC:
+		return("FUNCTION");
+	case QUE_NODE_LOCK:
+		return("LOCK");
+	case QUE_NODE_THR:
+		return("QUERY THREAD");
+	case QUE_NODE_COMMIT:
+		return("COMMIT");
+	case QUE_NODE_UNDO:
+		return("UNDO ROW");
+	case QUE_NODE_PURGE:
+		return("PURGE ROW");
+	case QUE_NODE_ROLLBACK:
+		return("ROLLBACK");
+	case QUE_NODE_CREATE_TABLE:
+		return("CREATE TABLE");
+	case QUE_NODE_CREATE_INDEX:
+		return("CREATE INDEX");
+	case QUE_NODE_FOR:
+		return("FOR LOOP");
+	case QUE_NODE_RETURN:
+		return("RETURN");
+	case QUE_NODE_EXIT:
+		return("EXIT");
+	default:
+		ut_ad(0);
+		return("UNKNOWN NODE TYPE");
 	}
-
-	fprintf(stderr, "Node type %lu: %s, address %p\n",
-		(ulong) type, str, (void*) node);
 }
+#endif /* !DBUG_OFF */
 
 /**********************************************************************//**
 Performs an execution step on a query thread.
@@ -1014,12 +993,10 @@ que_thr_step(
 
 	old_thr = thr;
 
-#ifdef UNIV_DEBUG
-	if (que_trace_on) {
-		fputs("To execute: ", stderr);
-		que_node_print_info(node);
-	}
-#endif
+	DBUG_PRINT("ib_que", ("Execute %u (%s) at %p",
+			      unsigned(type), que_node_type_string(node),
+			      (const void*) node));
+
 	if (type & QUE_NODE_CONTROL_STAT) {
 		if ((thr->prev_node != que_node_get_parent(node))
 		    && que_node_get_next(thr->prev_node)) {
@@ -1168,7 +1145,6 @@ que_run_threads_low(
 
 /**********************************************************************//**
 Run a query thread. Handles lock waits. */
-UNIV_INTERN
 void
 que_run_threads(
 /*============*/
@@ -1220,8 +1196,7 @@ loop:
 
 /*********************************************************************//**
 Evaluate the given SQL.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
 dberr_t
 que_eval_sql(
 /*=========*/
@@ -1235,6 +1210,9 @@ que_eval_sql(
 	que_thr_t*	thr;
 	que_t*		graph;
 
+	DBUG_ENTER("que_eval_sql");
+	DBUG_PRINT("que_eval_sql", ("query: %s", sql));
+
 	ut_a(trx->error_state == DB_SUCCESS);
 
 	if (reserve_dict_mutex) {
@@ -1247,8 +1225,6 @@ que_eval_sql(
 		mutex_exit(&dict_sys->mutex);
 	}
 
-	ut_a(graph);
-
 	graph->trx = trx;
 	trx->graph = NULL;
 
@@ -1268,12 +1244,13 @@ que_eval_sql(
 		mutex_exit(&dict_sys->mutex);
 	}
 
-	return(trx->error_state);
+	ut_a(trx->error_state != 0);
+
+	DBUG_RETURN(trx->error_state);
 }
 
 /*********************************************************************//**
 Initialise the query sub-system. */
-UNIV_INTERN
 void
 que_init(void)
 /*==========*/
@@ -1283,7 +1260,6 @@ que_init(void)
 
 /*********************************************************************//**
 Close the query sub-system. */
-UNIV_INTERN
 void
 que_close(void)
 /*===========*/
diff --git a/storage/innobase/read/read0read.cc b/storage/innobase/read/read0read.cc
index faf4102437b..2fb7083b0b2 100644
--- a/storage/innobase/read/read0read.cc
+++ b/storage/innobase/read/read0read.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -25,10 +25,6 @@ Created 2/16/1997 Heikki Tuuri
 
 #include "read0read.h"
 
-#ifdef UNIV_NONINL
-#include "read0read.ic"
-#endif
-
 #include "srv0srv.h"
 #include "trx0sys.h"
 
@@ -135,7 +131,7 @@ in any cursor read view.
 
 PROOF: We know that:
  1: Currently active read views in trx_sys_t::view_list are ordered by
-    read_view_t::low_limit_no in descending order, that is,
+    ReadView::low_limit_no in descending order, that is,
     newest read view first.
 
  2: Purge clones the oldest read view and uses that to determine whether there
@@ -171,484 +167,615 @@ try to open read_view at same time. Only one can acquire trx_sys->mutex.
 In which order will the views be opened? Should it matter? If no, why?
 
 The order does not matter. No new transactions can be created and no running
-transaction can commit or rollback (or free views).
+RW transaction can commit or rollback (or free views). AC-NL-RO transactions
+will mark their views as closed but not actually free their views.
 */
 
-/*********************************************************************//**
-Creates a read view object.
-@return	own: read view struct */
-UNIV_INLINE
-read_view_t*
-read_view_create_low(
-/*=================*/
-	ulint		n,	/*!< in: number of cells in the trx_ids array */
-	mem_heap_t*	heap)	/*!< in: memory heap from which allocated */
+/** Minimum number of elements to reserve in ReadView::ids_t */
+static const ulint MIN_TRX_IDS = 32;
+
+#ifdef UNIV_DEBUG
+/** Functor to validate the view list. */
+struct	ViewCheck {
+
+	ViewCheck() : m_prev_view() { }
+
+	void	operator()(const ReadView* view)
+	{
+		ut_a(m_prev_view == NULL
+		     || view->is_closed()
+		     || view->le(m_prev_view));
+
+		m_prev_view = view;
+	}
+
+	const ReadView*	m_prev_view;
+};
+
+/**
+Validates a read view list. */
+
+bool
+MVCC::validate() const
 {
-	read_view_t*	view;
-
-	view = static_cast<read_view_t*>(
-		mem_heap_alloc(
-			heap, sizeof(*view) + n * sizeof(*view->trx_ids)));
-
-	view->n_trx_ids = n;
-	view->trx_ids = (trx_id_t*) &view[1];
-
-	return(view);
-}
-
-/*********************************************************************//**
-Clones a read view object. This function will allocate space for two read
-views contiguously, one identical in size and content as @param view (starting
-at returned pointer) and another view immediately following the trx_ids array.
-The second view will have space for an extra trx_id_t element.
-@return	read view struct */
-UNIV_INLINE
-read_view_t*
-read_view_clone(
-/*============*/
-	const read_view_t*	view,	/*!< in: view to clone */
-	mem_heap_t*		heap)	/*!< in: memory heap
-					from which allocated */
-{
-	ulint		sz;
-	read_view_t*	clone;
-	read_view_t*	new_view;
+	ViewCheck	check;
 
 	ut_ad(mutex_own(&trx_sys->mutex));
 
-	/* Allocate space for two views. */
+	ut_list_map(m_views, check);
 
-	sz = sizeof(*view) + view->n_trx_ids * sizeof(*view->trx_ids);
-
-	/* Add an extra trx_id_t slot for the new view. */
-
-	clone = static_cast<read_view_t*>(
-		mem_heap_alloc(heap, (sz * 2) + sizeof(trx_id_t)));
-
-	/* Only the contents of the old view are important, the new view
-	will be created from this and so we don't copy that across. */
-
-	memcpy(clone, view, sz);
-
-	clone->trx_ids = (trx_id_t*) &clone[1];
-
-	new_view = (read_view_t*) &clone->trx_ids[clone->n_trx_ids];
-	new_view->trx_ids = (trx_id_t*) &new_view[1];
-	new_view->n_trx_ids = clone->n_trx_ids + 1;
-
-	ut_a(new_view->n_trx_ids == view->n_trx_ids + 1);
-
-	return(clone);
+	return(true);
 }
+#endif /* UNIV_DEBUG */
+
+/**
+Try and increase the size of the array. Old elements are
+copied across.
+@param n 		Make space for n elements */
 
-/*********************************************************************//**
-Insert the view in the proper order into the trx_sys->view_list. The
-read view list is ordered by read_view_t::low_limit_no in descending order. */
-static
 void
-read_view_add(
-/*==========*/
-	read_view_t*	view)		/*!< in: view to add to */
+ReadView::ids_t::reserve(ulint n)
 {
-	read_view_t*	elem;
-	read_view_t*	prev_elem;
-
-	ut_ad(mutex_own(&trx_sys->mutex));
-	ut_ad(read_view_validate(view));
-
-	/* Find the correct slot for insertion. */
-	for (elem = UT_LIST_GET_FIRST(trx_sys->view_list), prev_elem = NULL;
-	     elem != NULL && view->low_limit_no < elem->low_limit_no;
-	     prev_elem = elem, elem = UT_LIST_GET_NEXT(view_list, elem)) {
-		/* No op */
+	if (n <= capacity()) {
+		return;
 	}
 
-	if (prev_elem == NULL) {
-		UT_LIST_ADD_FIRST(view_list, trx_sys->view_list, view);
-	} else {
-		UT_LIST_INSERT_AFTER(
-			view_list, trx_sys->view_list, prev_elem, view);
+	/** Keep a minimum threshold */
+	if (n < MIN_TRX_IDS) {
+		n = MIN_TRX_IDS;
 	}
 
-	ut_ad(read_view_list_validate());
+	value_type*	p = m_ptr;
+
+	m_ptr = UT_NEW_ARRAY_NOKEY(value_type, n);
+
+	m_reserved = n;
+
+	ut_ad(size() < capacity());
+
+	if (p != NULL) {
+
+		::memmove(m_ptr, p, size() * sizeof(value_type));
+
+		UT_DELETE_ARRAY(p);
+	}
 }
 
-/** Functor to create thew view trx_ids array. */
-struct	CreateView {
+/**
+Copy and overwrite this array contents
+@param start		Source array
+@param end		Pointer to end of array */
 
-	CreateView(read_view_t*	view)
-		: m_view(view)
-	{
-		  m_n_trx = m_view->n_trx_ids;
-		  m_view->n_trx_ids = 0;
+void
+ReadView::ids_t::assign(const value_type* start, const value_type* end)
+{
+	ut_ad(end >= start);
+
+	ulint	n = end - start;
+
+	/* No need to copy the old contents across during reserve(). */
+	clear();
+
+	/* Create extra space if required. */
+	reserve(n);
+
+	resize(n);
+
+	ut_ad(size() == n);
+
+	::memmove(m_ptr, start, size() * sizeof(value_type));
+}
+
+/**
+Append a value to the array.
+@param value		the value to append */
+
+void
+ReadView::ids_t::push_back(value_type value)
+{
+	if (capacity() <= size()) {
+		reserve(size() * 2);
 	}
 
-	void	operator()(const trx_t* trx)
-	{
-		ut_ad(mutex_own(&trx_sys->mutex));
-		ut_ad(trx->in_rw_trx_list);
+	m_ptr[m_size++] = value;
+	ut_ad(size() <= capacity());
+}
 
-		/* trx->state cannot change from or to NOT_STARTED
-		while we are holding the trx_sys->mutex. It may change
-		from ACTIVE to PREPARED or COMMITTED. */
+/**
+Insert the value in the correct slot, preserving the order. Doesn't
+check for duplicates. */
 
-		if (trx->id != m_view->creator_trx_id
-		    && !trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY)) {
+void
+ReadView::ids_t::insert(value_type value)
+{
+	ut_ad(value > 0);
 
-			ut_ad(m_n_trx > m_view->n_trx_ids);
+	reserve(size() + 1);
 
-			m_view->trx_ids[m_view->n_trx_ids++] = trx->id;
+	if (empty() || back() < value) {
+		push_back(value);
+		return;
+	}
 
-			/* NOTE that a transaction whose trx number is <
-			trx_sys->max_trx_id can still be active, if it is
-			in the middle of its commit! Note that when a
-			transaction starts, we initialize trx->no to
-			TRX_ID_MAX. */
+	value_type*	end = data() + size();
+	value_type*	ub = std::upper_bound(data(), end, value);
 
-			/* trx->no is protected by trx_sys->mutex, which
-			we are holding. It is assigned by trx_commit()
-			before lock_trx_release_locks() assigns
-			trx->state = TRX_STATE_COMMITTED_IN_MEMORY. */
+	if (ub == end) {
+		push_back(value);
+	} else {
+		ut_ad(ub < end);
 
-			if (m_view->low_limit_no > trx->no) {
-				m_view->low_limit_no = trx->no;
-			}
+		ulint	n_elems = std::distance(ub, end);
+		ulint	n = n_elems * sizeof(value_type);
+
+		/* Note: Copying overlapped memory locations. */
+		::memmove(ub + 1, ub, n);
+
+		*ub = value;
+
+		resize(size() + 1);
+	}
+}
+
+/**
+ReadView constructor */
+ReadView::ReadView()
+	:
+	m_low_limit_id(),
+	m_up_limit_id(),
+	m_creator_trx_id(),
+	m_ids(),
+	m_low_limit_no()
+{
+	ut_d(::memset(&m_view_list, 0x0, sizeof(m_view_list)));
+}
+
+/**
+ReadView destructor */
+ReadView::~ReadView()
+{
+	// Do nothing
+}
+
+/** Constructor
+@param size		Number of views to pre-allocate */
+MVCC::MVCC(ulint size)
+{
+	UT_LIST_INIT(m_free, &ReadView::m_view_list);
+	UT_LIST_INIT(m_views, &ReadView::m_view_list);
+
+	for (ulint i = 0; i < size; ++i) {
+		ReadView*	view = UT_NEW_NOKEY(ReadView());
+
+		UT_LIST_ADD_FIRST(m_free, view);
+	}
+}
+
+MVCC::~MVCC()
+{
+	for (ReadView* view = UT_LIST_GET_FIRST(m_free);
+	     view != NULL;
+	     view = UT_LIST_GET_FIRST(m_free)) {
+
+		UT_LIST_REMOVE(m_free, view);
+
+		UT_DELETE(view);
+	}
+
+	ut_a(UT_LIST_GET_LEN(m_views) == 0);
+}
+
+/**
+Copy the transaction ids from the source vector */
+
+void
+ReadView::copy_trx_ids(const trx_ids_t& trx_ids)
+{
+	ulint	size = trx_ids.size();
+
+	if (m_creator_trx_id > 0) {
+		ut_ad(size > 0);
+		--size;
+	}
+
+	if (size == 0) {
+		m_ids.clear();
+		return;
+	}
+
+	m_ids.reserve(size);
+	m_ids.resize(size);
+
+	ids_t::value_type*	p = m_ids.data();
+
+	/* Copy all the trx_ids except the creator trx id */
+
+	if (m_creator_trx_id > 0) {
+
+		/* Note: We go through all this trouble because it is
+		unclear whether std::vector::resize() will cause an
+		overhead or not. We should test this extensively and
+		if the vector to vector copy is fast enough then get
+		rid of this code and replace it with more readable
+		and obvious code. The code below does exactly one copy,
+		and filters out the creator's trx id. */
+
+		trx_ids_t::const_iterator	it = std::lower_bound(
+			trx_ids.begin(), trx_ids.end(), m_creator_trx_id);
+
+		ut_ad(it != trx_ids.end() && *it == m_creator_trx_id);
+
+		ulint	i = std::distance(trx_ids.begin(), it);
+		ulint	n = i * sizeof(trx_ids_t::value_type);
+
+		::memmove(p, &trx_ids[0], n);
+
+		n = (trx_ids.size() - i - 1) * sizeof(trx_ids_t::value_type);
+
+		ut_ad(i + (n / sizeof(trx_ids_t::value_type)) == m_ids.size());
+
+		if (n > 0) {
+			::memmove(p + i, &trx_ids[i + 1], n);
+		}
+	} else {
+		ulint	n = size * sizeof(trx_ids_t::value_type);
+
+		::memmove(p, &trx_ids[0], n);
+	}
+
+#ifdef UNIV_DEBUG
+	/* Assert that all transaction ids in list are active. */
+	for (trx_ids_t::const_iterator it = trx_ids.begin();
+	     it != trx_ids.end(); ++it) {
+
+		trx_t*	trx = trx_get_rw_trx_by_id(*it);
+		ut_ad(trx != NULL);
+		ut_ad(trx->state == TRX_STATE_ACTIVE
+		      || trx->state == TRX_STATE_PREPARED);
+	}
+#endif /* UNIV_DEBUG */
+}
+
+/**
+Opens a read view where exactly the transactions serialized before this
+point in time are seen in the view.
+@param id		Creator transaction id */
+
+void
+ReadView::prepare(trx_id_t id)
+{
+	ut_ad(mutex_own(&trx_sys->mutex));
+
+	m_creator_trx_id = id;
+
+	m_low_limit_no = m_low_limit_id = trx_sys->max_trx_id;
+
+	if (!trx_sys->rw_trx_ids.empty()) {
+		copy_trx_ids(trx_sys->rw_trx_ids);
+	} else {
+		m_ids.clear();
+	}
+
+	if (UT_LIST_GET_LEN(trx_sys->serialisation_list) > 0) {
+		const trx_t*	trx;
+
+		trx = UT_LIST_GET_FIRST(trx_sys->serialisation_list);
+
+		if (trx->no < m_low_limit_no) {
+			m_low_limit_no = trx->no;
+		}
+	}
+}
+
+/**
+Complete the read view creation */
+
+void
+ReadView::complete()
+{
+	/* The first active transaction has the smallest id. */
+	m_up_limit_id = !m_ids.empty() ? m_ids.front() : m_low_limit_id;
+
+	ut_ad(m_up_limit_id <= m_low_limit_id);
+
+	m_closed = false;
+}
+
+/**
+Find a free view from the active list, if none found then allocate
+a new view.
+@return a view to use */
+
+ReadView*
+MVCC::get_view()
+{
+	ut_ad(mutex_own(&trx_sys->mutex));
+
+	ReadView*	view;
+
+	if (UT_LIST_GET_LEN(m_free) > 0) {
+		view = UT_LIST_GET_FIRST(m_free);
+		UT_LIST_REMOVE(m_free, view);
+	} else {
+		view = UT_NEW_NOKEY(ReadView());
+
+		if (view == NULL) {
+			ib::error() << "Failed to allocate MVCC view";
 		}
 	}
 
-	read_view_t*	m_view;
-	ulint		m_n_trx;
-};
+	return(view);
+}
 
-/*********************************************************************//**
-Opens a read view where exactly the transactions serialized before this
-point in time are seen in the view.
-@return	own: read view struct */
-static
-read_view_t*
-read_view_open_now_low(
-/*===================*/
-	trx_id_t	cr_trx_id,	/*!< in: trx_id of creating
-					transaction, or 0 used in purge */
-	mem_heap_t*	heap)		/*!< in: memory heap from which
-					allocated */
+/**
+Release a view that is inactive but not closed. Caller must own
+the trx_sys_t::mutex.
+@param view		View to release */
+void
+MVCC::view_release(ReadView*& view)
 {
-	read_view_t*	view;
-	ulint		n_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list);
+	ut_ad(!srv_read_only_mode);
+	ut_ad(trx_sys_mutex_own());
+
+	uintptr_t	p = reinterpret_cast<uintptr_t>(view);
+
+	ut_a(p & 0x1);
+
+	view = reinterpret_cast<ReadView*>(p & ~1);
+
+	ut_ad(view->m_closed);
+
+	/** RW transactions should not free their views here. Their views
+	should freed using view_close_view() */
+
+	ut_ad(view->m_creator_trx_id == 0);
+
+	UT_LIST_REMOVE(m_views, view);
+
+	UT_LIST_ADD_LAST(m_free, view);
+
+	view = NULL;
+}
+
+/**
+Allocate and create a view.
+@param view		view owned by this class created for the
+			caller. Must be freed by calling view_close()
+@param trx		transaction instance of caller */
+void
+MVCC::view_open(ReadView*& view, trx_t* trx)
+{
+	ut_ad(!srv_read_only_mode);
+
+	/** If no new RW transaction has been started since the last view
+	was created then reuse the the existing view. */
+	if (view != NULL) {
+
+		uintptr_t	p = reinterpret_cast<uintptr_t>(view);
+
+		view = reinterpret_cast<ReadView*>(p & ~1);
+
+		ut_ad(view->m_closed);
+
+		/* NOTE: This can be optimised further, for now we only
+		resuse the view iff there are no active RW transactions.
+
+		There is an inherent race here between purge and this
+		thread. Purge will skip views that are marked as closed.
+		Therefore we must set the low limit id after we reset the
+		closed status after the check. */
+
+		if (trx_is_autocommit_non_locking(trx) && view->empty()) {
+
+			view->m_closed = false;
+
+			if (view->m_low_limit_id == trx_sys_get_max_trx_id()) {
+				return;
+			} else {
+				view->m_closed = true;
+			}
+		}
+
+		mutex_enter(&trx_sys->mutex);
+
+		UT_LIST_REMOVE(m_views, view);
+
+	} else {
+		mutex_enter(&trx_sys->mutex);
+
+		view = get_view();
+	}
+
+	if (view != NULL) {
+
+		view->prepare(trx->id);
+
+		view->complete();
+
+		UT_LIST_ADD_FIRST(m_views, view);
+
+		ut_ad(!view->is_closed());
+
+		ut_ad(validate());
+	}
+
+	trx_sys_mutex_exit();
+}
+
+/**
+Get the oldest (active) view in the system.
+@return oldest view if found or NULL */
+
+ReadView*
+MVCC::get_oldest_view() const
+{
+	ReadView*	view;
 
 	ut_ad(mutex_own(&trx_sys->mutex));
 
-	view = read_view_create_low(n_trx, heap);
+	for (view = UT_LIST_GET_LAST(m_views);
+	     view != NULL;
+	     view = UT_LIST_GET_PREV(m_view_list, view)) {
 
-	view->undo_no = 0;
-	view->type = VIEW_NORMAL;
-	view->creator_trx_id = cr_trx_id;
+		if (!view->is_closed()) {
+			break;
+		}
+	}
 
-	/* No future transactions should be visible in the view */
+	return(view);
+}
 
-	view->low_limit_no = trx_sys->max_trx_id;
-	view->low_limit_id = view->low_limit_no;
+/**
+Copy state from another view. Must call copy_complete() to finish.
+@param other		view to copy from */
 
-	/* No active transaction should be visible, except cr_trx */
+void
+ReadView::copy_prepare(const ReadView& other)
+{
+	ut_ad(&other != this);
 
-	ut_list_map(trx_sys->rw_trx_list, &trx_t::trx_list, CreateView(view));
+	if (!other.m_ids.empty()) {
+		const ids_t::value_type* 	p = other.m_ids.data();
 
-	if (view->n_trx_ids > 0) {
-		/* The last active transaction has the smallest id: */
-		view->up_limit_id = view->trx_ids[view->n_trx_ids - 1];
+		m_ids.assign(p, p + other.m_ids.size());
 	} else {
-		view->up_limit_id = view->low_limit_id;
+		m_ids.clear();
 	}
 
-	/* Purge views are not added to the view list. */
-	if (cr_trx_id > 0) {
-		read_view_add(view);
+	m_up_limit_id = other.m_up_limit_id;
+
+	m_low_limit_no = other.m_low_limit_no;
+
+	m_low_limit_id = other.m_low_limit_id;
+
+	m_creator_trx_id = other.m_creator_trx_id;
+}
+
+/**
+Complete the copy, insert the creator transaction id into the
+m_ids too and adjust the m_up_limit_id, if required */
+
+void
+ReadView::copy_complete()
+{
+	ut_ad(!trx_sys_mutex_own());
+
+	if (m_creator_trx_id > 0) {
+		m_ids.insert(m_creator_trx_id);
 	}
 
-	return(view);
+	if (!m_ids.empty()) {
+		/* The last active transaction has the smallest id. */
+		m_up_limit_id = std::min(m_ids.front(), m_up_limit_id);
+	}
+
+	ut_ad(m_up_limit_id <= m_low_limit_id);
+
+	/* We added the creator transaction ID to the m_ids. */
+	m_creator_trx_id = 0;
 }
 
-/*********************************************************************//**
-Opens a read view where exactly the transactions serialized before this
-point in time are seen in the view.
-@return	own: read view struct */
-UNIV_INTERN
-read_view_t*
-read_view_open_now(
-/*===============*/
-	trx_id_t	cr_trx_id,	/*!< in: trx_id of creating
-					transaction, or 0 used in purge */
-	mem_heap_t*	heap)		/*!< in: memory heap from which
-					allocated */
-{
-	read_view_t*	view;
+/** Clones the oldest view and stores it in view. No need to
+call view_close(). The caller owns the view that is passed in.
+This function is called by Purge to determine whether it should
+purge the delete marked record or not.
+@param view		Preallocated view, owned by the caller */
 
+void
+MVCC::clone_oldest_view(ReadView* view)
+{
 	mutex_enter(&trx_sys->mutex);
 
-	view = read_view_open_now_low(cr_trx_id, heap);
-
-	mutex_exit(&trx_sys->mutex);
-
-	return(view);
-}
-
-/*********************************************************************//**
-Makes a copy of the oldest existing read view, with the exception that also
-the creating trx of the oldest view is set as not visible in the 'copied'
-view. Opens a new view if no views currently exist. The view must be closed
-with ..._close. This is used in purge.
-@return	own: read view struct */
-UNIV_INTERN
-read_view_t*
-read_view_purge_open(
-/*=================*/
-	mem_heap_t*	heap)		/*!< in: memory heap from which
-					allocated */
-{
-	ulint		i;
-	read_view_t*	view;
-	read_view_t*	oldest_view;
-	trx_id_t	creator_trx_id;
-	ulint		insert_done	= 0;
-
-	mutex_enter(&trx_sys->mutex);
-
-	oldest_view = UT_LIST_GET_LAST(trx_sys->view_list);
+	ReadView*	oldest_view = get_oldest_view();
 
 	if (oldest_view == NULL) {
 
-		view = read_view_open_now_low(0, heap);
+		view->prepare(0);
 
-		mutex_exit(&trx_sys->mutex);
+		trx_sys_mutex_exit();
 
-		return(view);
+		view->complete();
+
+	} else {
+		view->copy_prepare(*oldest_view);
+
+		trx_sys_mutex_exit();
+
+		view->copy_complete();
 	}
+}
 
-	/* Allocate space for both views, the oldest and the new purge view. */
+/**
+@return the number of active views */
 
-	oldest_view = read_view_clone(oldest_view, heap);
+ulint
+MVCC::size() const
+{
+	trx_sys_mutex_enter();
 
-	ut_ad(read_view_validate(oldest_view));
+	ulint	size = 0;
 
-	mutex_exit(&trx_sys->mutex);
+	for (const ReadView* view = UT_LIST_GET_FIRST(m_views);
+	     view != NULL;
+	     view = UT_LIST_GET_NEXT(m_view_list, view)) {
 
-	ut_a(oldest_view->creator_trx_id > 0);
-	creator_trx_id = oldest_view->creator_trx_id;
-
-	view = (read_view_t*) &oldest_view->trx_ids[oldest_view->n_trx_ids];
-
-	/* Add the creator transaction id in the trx_ids array in the
-	correct slot. */
-
-	for (i = 0; i < oldest_view->n_trx_ids; ++i) {
-		trx_id_t	id;
-
-		id = oldest_view->trx_ids[i - insert_done];
-
-		if (insert_done == 0 && creator_trx_id > id) {
-			id = creator_trx_id;
-			insert_done = 1;
+		if (!view->is_closed()) {
+			++size;
 		}
-
-		view->trx_ids[i] = id;
 	}
 
-	if (insert_done == 0) {
-		view->trx_ids[i] = creator_trx_id;
-	} else {
-		ut_a(i > 0);
-		view->trx_ids[i] = oldest_view->trx_ids[i - 1];
-	}
+	trx_sys_mutex_exit();
 
-	view->creator_trx_id = 0;
-
-	view->low_limit_no = oldest_view->low_limit_no;
-	view->low_limit_id = oldest_view->low_limit_id;
-
-	if (view->n_trx_ids > 0) {
-		/* The last active transaction has the smallest id: */
-
-		view->up_limit_id = view->trx_ids[view->n_trx_ids - 1];
-	} else {
-		view->up_limit_id = oldest_view->up_limit_id;
-	}
-
-	return(view);
+	return(size);
 }
 
-/*********************************************************************//**
-Closes a consistent read view for MySQL. This function is called at an SQL
-statement end if the trx isolation level is <= TRX_ISO_READ_COMMITTED. */
-UNIV_INTERN
+/**
+Close a view created by the above function.
+@para view		view allocated by trx_open.
+@param own_mutex	true if caller owns trx_sys_t::mutex */
+
 void
-read_view_close_for_mysql(
-/*======================*/
-	trx_t*		trx)	/*!< in: trx which has a read view */
+MVCC::view_close(ReadView*& view, bool own_mutex)
 {
-	ut_a(trx->global_read_view);
+	uintptr_t	p = reinterpret_cast<uintptr_t>(view);
 
-	read_view_remove(trx->global_read_view, false);
+	/* Note: The assumption here is that AC-NL-RO transactions will
+	call this function with own_mutex == false. */
+	if (!own_mutex) {
+		/* Sanitise the pointer first. */
+		ReadView*	ptr = reinterpret_cast<ReadView*>(p & ~1);
 
-	mem_heap_empty(trx->global_read_view_heap);
+		/* Note this can be called for a read view that
+		was already closed. */
+		ptr->m_closed = true;
 
-	trx->read_view = NULL;
-	trx->global_read_view = NULL;
-}
-
-/*********************************************************************//**
-Prints a read view to stderr. */
-UNIV_INTERN
-void
-read_view_print(
-/*============*/
-	const read_view_t*	view)	/*!< in: read view */
-{
-	ulint	n_ids;
-	ulint	i;
-
-	if (view->type == VIEW_HIGH_GRANULARITY) {
-		fprintf(stderr,
-			"High-granularity read view undo_n:o " TRX_ID_FMT "\n",
-			view->undo_no);
+		/* Set the view as closed. */
+		view = reinterpret_cast<ReadView*>(p | 0x1);
 	} else {
-		fprintf(stderr, "Normal read view\n");
-	}
+		view = reinterpret_cast<ReadView*>(p & ~1);
 
-	fprintf(stderr, "Read view low limit trx n:o " TRX_ID_FMT "\n",
-		view->low_limit_no);
+		view->close();
 
-	fprintf(stderr, "Read view up limit trx id " TRX_ID_FMT "\n",
-		view->up_limit_id);
+		UT_LIST_REMOVE(m_views, view);
+		UT_LIST_ADD_LAST(m_free, view);
 
-	fprintf(stderr, "Read view low limit trx id " TRX_ID_FMT "\n",
-		view->low_limit_id);
+		ut_ad(validate());
 
-	fprintf(stderr, "Read view individually stored trx ids:\n");
-
-	n_ids = view->n_trx_ids;
-
-	for (i = 0; i < n_ids; i++) {
-		fprintf(stderr, "Read view trx id " TRX_ID_FMT "\n",
-			view->trx_ids[i]);
+		view = NULL;
 	}
 }
 
-/*********************************************************************//**
-Create a high-granularity consistent cursor view for mysql to be used
-in cursors. In this consistent read view modifications done by the
-creating transaction after the cursor is created or future transactions
-are not visible. */
-UNIV_INTERN
-cursor_view_t*
-read_cursor_view_create_for_mysql(
-/*==============================*/
-	trx_t*		cr_trx)	/*!< in: trx where cursor view is created */
-{
-	read_view_t*	view;
-	mem_heap_t*	heap;
-	ulint		n_trx;
-	cursor_view_t*	curview;
+/**
+Set the view creator transaction id. Note: This shouldbe set only
+for views created by RW transactions.
+@param view		Set the creator trx id for this view
+@param id		Transaction id to set */
 
-	/* Use larger heap than in trx_create when creating a read_view
-	because cursors are quite long. */
-
-	heap = mem_heap_create(512);
-
-	curview = (cursor_view_t*) mem_heap_alloc(heap, sizeof(*curview));
-
-	curview->heap = heap;
-
-	/* Drop cursor tables from consideration when evaluating the
-	need of auto-commit */
-
-	curview->n_mysql_tables_in_use = cr_trx->n_mysql_tables_in_use;
-
-	cr_trx->n_mysql_tables_in_use = 0;
-
-	mutex_enter(&trx_sys->mutex);
-
-	n_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list);
-
-	curview->read_view = read_view_create_low(n_trx, curview->heap);
-
-	view = curview->read_view;
-	view->undo_no = cr_trx->undo_no;
-	view->type = VIEW_HIGH_GRANULARITY;
-	view->creator_trx_id = UINT64_UNDEFINED;
-
-	/* No future transactions should be visible in the view */
-
-	view->low_limit_no = trx_sys->max_trx_id;
-	view->low_limit_id = view->low_limit_no;
-
-	/* No active transaction should be visible */
-
-	ut_list_map(trx_sys->rw_trx_list, &trx_t::trx_list, CreateView(view));
-
-	view->creator_trx_id = cr_trx->id;
-
-	if (view->n_trx_ids > 0) {
-		/* The last active transaction has the smallest id: */
-
-		view->up_limit_id = view->trx_ids[view->n_trx_ids - 1];
-	} else {
-		view->up_limit_id = view->low_limit_id;
-	}
-
-	read_view_add(view);
-
-	mutex_exit(&trx_sys->mutex);
-
-	return(curview);
-}
-
-/*********************************************************************//**
-Close a given consistent cursor view for mysql and restore global read view
-back to a transaction read view. */
-UNIV_INTERN
 void
-read_cursor_view_close_for_mysql(
-/*=============================*/
-	trx_t*		trx,	/*!< in: trx */
-	cursor_view_t*	curview)/*!< in: cursor view to be closed */
+MVCC::set_view_creator_trx_id(ReadView* view, trx_id_t id)
 {
-	ut_a(curview);
-	ut_a(curview->read_view);
-	ut_a(curview->heap);
+	ut_ad(id > 0);
+	ut_ad(mutex_own(&trx_sys->mutex));
 
-	/* Add cursor's tables to the global count of active tables that
-	belong to this transaction */
-	trx->n_mysql_tables_in_use += curview->n_mysql_tables_in_use;
-
-	read_view_remove(curview->read_view, false);
-
-	trx->read_view = trx->global_read_view;
-
-	mem_heap_free(curview->heap);
-}
-
-/*********************************************************************//**
-This function sets a given consistent cursor view to a transaction
-read view if given consistent cursor view is not NULL. Otherwise, function
-restores a global read view to a transaction read view. */
-UNIV_INTERN
-void
-read_cursor_set_for_mysql(
-/*======================*/
-	trx_t*		trx,	/*!< in: transaction where cursor is set */
-	cursor_view_t*	curview)/*!< in: consistent cursor view to be set */
-{
-	ut_a(trx);
-
-	mutex_enter(&trx_sys->mutex);
-
-	if (UNIV_LIKELY(curview != NULL)) {
-		trx->read_view = curview->read_view;
-	} else {
-		trx->read_view = trx->global_read_view;
-	}
-
-	ut_ad(read_view_validate(trx->read_view));
-
-	mutex_exit(&trx_sys->mutex);
+	view->creator_trx_id(id);
 }
diff --git a/storage/innobase/rem/rem0cmp.cc b/storage/innobase/rem/rem0cmp.cc
index 616ef322fb5..9cd0f76c7a9 100644
--- a/storage/innobase/rem/rem0cmp.cc
+++ b/storage/innobase/rem/rem0cmp.cc
@@ -23,16 +23,23 @@ Comparison services for records
 Created 7/1/1994 Heikki Tuuri
 ************************************************************************/
 
+#include "ha_prototypes.h"
+
 #include "rem0cmp.h"
 
 #ifdef UNIV_NONINL
 #include "rem0cmp.ic"
 #endif
 
-#include "ha_prototypes.h"
 #include "handler0alter.h"
 #include "srv0srv.h"
 
+#include <gstream.h>
+#include <spatial.h>
+#include <gis0geo.h>
+#include <page0cur.h>
+#include <algorithm>
+
 /*		ALPHABETICAL ORDER
 		==================
 
@@ -52,83 +59,53 @@ At the present, the comparison functions return 0 in the case,
 where two records disagree only in the way that one
 has more fields than the other. */
 
-#ifdef UNIV_DEBUG
-/*************************************************************//**
-Used in debug checking of cmp_dtuple_... .
-This function is used to compare a data tuple to a physical record. If
-dtuple has n fields then rec must have either m >= n fields, or it must
-differ from dtuple in some of the m fields rec has.
-@return 1, 0, -1, if dtuple is greater, equal, less than rec,
-respectively, when only the common first fields are compared */
-static
-int
-cmp_debug_dtuple_rec_with_match(
-/*============================*/
-	const dtuple_t*	dtuple,	/*!< in: data tuple */
-	const rec_t*	rec,	/*!< in: physical record which differs from
-				dtuple in some of the common fields, or which
-				has an equal number or more fields than
-				dtuple */
-	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
-	ulint		n_cmp,	/*!< in: number of fields to compare */
-	ulint*		matched_fields)/*!< in/out: number of already
-				completely  matched fields; when function
-				returns, contains the value for current
-				comparison */
-	MY_ATTRIBUTE((nonnull, warn_unused_result));
-#endif /* UNIV_DEBUG */
-/*************************************************************//**
-This function is used to compare two data fields for which the data type
-is such that we must use MySQL code to compare them. The prototype here
-must be a copy of the one in ha_innobase.cc!
-@return	1, 0, -1, if a is greater, equal, less than b, respectively */
-extern
+/** Compare two data fields.
+@param[in] prtype precise type
+@param[in] a data field
+@param[in] a_length length of a, in bytes (not UNIV_SQL_NULL)
+@param[in] b data field
+@param[in] b_length length of b, in bytes (not UNIV_SQL_NULL)
+@return positive, 0, negative, if a is greater, equal, less than b,
+respectively */
+UNIV_INLINE
 int
 innobase_mysql_cmp(
-/*===============*/
-	int		mysql_type,	/*!< in: MySQL type */
-	uint		charset_number,	/*!< in: number of the charset */
-	const unsigned char* a,		/*!< in: data field */
-	unsigned int	a_length,	/*!< in: data field length,
-					not UNIV_SQL_NULL */
-	const unsigned char* b,		/*!< in: data field */
-	unsigned int	b_length);	/*!< in: data field length,
-					not UNIV_SQL_NULL */
-/*************************************************************//**
-This function is used to compare two data fields for which the data type
-is such that we must use MySQL code to compare them. The prototype here
-must be a copy of the one in ha_innobase.cc!
-@return	1, 0, -1, if a is greater, equal, less than b, respectively */
-extern
-int
-innobase_mysql_cmp_prefix(
-/*======================*/
-	int		mysql_type,	/*!< in: MySQL type */
-	uint		charset_number,	/*!< in: number of the charset */
-	const unsigned char* a,		/*!< in: data field */
-	unsigned int	a_length,	/*!< in: data field length,
-					not UNIV_SQL_NULL */
-	const unsigned char* b,		/*!< in: data field */
-	unsigned int	b_length);	/*!< in: data field length,
-					not UNIV_SQL_NULL */
-/*********************************************************************//**
-Transforms the character code so that it is ordered appropriately for the
-language. This is only used for the latin1 char set. MySQL does the
-comparisons for other char sets.
-@return	collation order position */
-UNIV_INLINE
-ulint
-cmp_collate(
-/*========*/
-	ulint	code)	/*!< in: code of a character stored in database record */
+	ulint		prtype,
+	const byte*	a,
+	unsigned int	a_length,
+	const byte*	b,
+	unsigned int	b_length)
 {
-	return((ulint) srv_latin1_ordering[code]);
+#ifdef UNIV_DEBUG
+	switch (prtype & DATA_MYSQL_TYPE_MASK) {
+	case MYSQL_TYPE_BIT:
+	case MYSQL_TYPE_STRING:
+	case MYSQL_TYPE_VAR_STRING:
+	case MYSQL_TYPE_TINY_BLOB:
+	case MYSQL_TYPE_MEDIUM_BLOB:
+	case MYSQL_TYPE_BLOB:
+	case MYSQL_TYPE_LONG_BLOB:
+	case MYSQL_TYPE_VARCHAR:
+		break;
+	default:
+		ut_error;
+	}
+#endif /* UNIV_DEBUG */
+
+	uint cs_num = (uint) dtype_get_charset_coll(prtype);
+
+	if (CHARSET_INFO* cs = get_charset(cs_num, MYF(MY_WME))) {
+		return(cs->coll->strnncollsp(
+			       cs, a, a_length, b, b_length));
+	}
+
+	ib::fatal() << "Unable to find charset-collation " << cs_num;
+	return(0);
 }
 
 /*************************************************************//**
 Returns TRUE if two columns are equal for comparison purposes.
-@return	TRUE if the columns are considered equal in comparisons */
-UNIV_INTERN
+@return TRUE if the columns are considered equal in comparisons */
 ibool
 cmp_cols_are_equal(
 /*===============*/
@@ -178,14 +155,85 @@ cmp_cols_are_equal(
 	return(col1->mtype != DATA_INT || col1->len == col2->len);
 }
 
+/** Compare two DATA_DECIMAL (MYSQL_TYPE_DECIMAL) fields.
+TODO: Remove this function. Everything should use MYSQL_TYPE_NEWDECIMAL.
+@param[in] a data field
+@param[in] a_length length of a, in bytes (not UNIV_SQL_NULL)
+@param[in] b data field
+@param[in] b_length length of b, in bytes (not UNIV_SQL_NULL)
+@return positive, 0, negative, if a is greater, equal, less than b,
+respectively */
+static UNIV_COLD
+int
+cmp_decimal(
+	const byte*	a,
+	unsigned int	a_length,
+	const byte*	b,
+	unsigned int	b_length)
+{
+	int	swap_flag;
+
+	/* Remove preceding spaces */
+	for (; a_length && *a == ' '; a++, a_length--) { }
+	for (; b_length && *b == ' '; b++, b_length--) { }
+
+	if (*a == '-') {
+		swap_flag = -1;
+
+		if (*b != '-') {
+			return(swap_flag);
+		}
+
+		a++; b++;
+		a_length--;
+		b_length--;
+	} else {
+		swap_flag = 1;
+
+		if (*b == '-') {
+			return(swap_flag);
+		}
+	}
+
+	while (a_length > 0 && (*a == '+' || *a == '0')) {
+		a++; a_length--;
+	}
+
+	while (b_length > 0 && (*b == '+' || *b == '0')) {
+		b++; b_length--;
+	}
+
+	if (a_length != b_length) {
+		if (a_length < b_length) {
+			return(-swap_flag);
+		}
+
+		return(swap_flag);
+	}
+
+	while (a_length > 0 && *a == *b) {
+
+		a++; b++; a_length--;
+	}
+
+	if (a_length == 0) {
+		return(0);
+	}
+
+	if (*a <= *b) {
+		swap_flag = -swap_flag;
+	}
+
+	return(swap_flag);
+}
+
 /*************************************************************//**
-Innobase uses this function to compare two data fields for which the data type
-is such that we must compare whole fields or call MySQL to do the comparison
+Innobase uses this function to compare two geometry data fields
 @return	1, 0, -1, if a is greater, equal, less than b, respectively */
 static
 int
-cmp_whole_field(
-/*============*/
+cmp_geometry_field(
+/*===============*/
 	ulint		mtype,		/*!< in: main type */
 	ulint		prtype,		/*!< in: precise type */
 	const byte*	a,		/*!< in: data field */
@@ -194,67 +242,108 @@ cmp_whole_field(
 	const byte*	b,		/*!< in: data field */
 	unsigned int	b_length)	/*!< in: data field length,
 					not UNIV_SQL_NULL */
+{
+	double		x1, x2;
+	double		y1, y2;
+
+	ut_ad(prtype & DATA_GIS_MBR);
+
+	if (a_length < sizeof(double) || b_length < sizeof(double)) {
+		return(0);
+	}
+
+	/* Try to compare mbr left lower corner (xmin, ymin) */
+	x1 = mach_double_read(a);
+	x2 = mach_double_read(b);
+	y1 = mach_double_read(a + sizeof(double) * SPDIMS);
+	y2 = mach_double_read(b + sizeof(double) * SPDIMS);
+
+	if (x1 > x2) {
+		return(1);
+	} else if (x2 > x1) {
+		return(-1);
+	}
+
+	if (y1 > y2) {
+		return(1);
+	} else if (y2 > y1) {
+		return(-1);
+	}
+
+	/* left lower corner (xmin, ymin) overlaps, now right upper corner */
+	x1 = mach_double_read(a + sizeof(double));
+	x2 = mach_double_read(b + sizeof(double));
+	y1 = mach_double_read(a + sizeof(double) * SPDIMS + sizeof(double));
+	y2 = mach_double_read(b + sizeof(double) * SPDIMS + sizeof(double));
+
+	if (x1 > x2) {
+		return(1);
+	} else if (x2 > x1) {
+		return(-1);
+	}
+
+	if (y1 > y2) {
+		return(1);
+	} else if (y2 > y1) {
+		return(-1);
+	}
+
+	return(0);
+}
+/*************************************************************//**
+Innobase uses this function to compare two gis data fields
+@return	1, 0, -1, if mode == PAGE_CUR_MBR_EQUAL. And return
+1, 0 for rest compare modes, depends on a and b qualifies the
+relationship (CONTAINT, WITHIN etc.) */
+static
+int
+cmp_gis_field(
+/*============*/
+	page_cur_mode_t	mode,		/*!< in: compare mode */
+	const byte*	a,		/*!< in: data field */
+	unsigned int	a_length,	/*!< in: data field length,
+					not UNIV_SQL_NULL */
+	const byte*	b,		/*!< in: data field */
+	unsigned int	b_length)	/*!< in: data field length,
+					not UNIV_SQL_NULL */
+{
+	if (mode == PAGE_CUR_MBR_EQUAL) {
+		/* TODO: Since the DATA_GEOMETRY is not used in compare
+		function, we could pass it instead of a specific type now */
+		return(cmp_geometry_field(DATA_GEOMETRY, DATA_GIS_MBR,
+					  a, a_length, b, b_length));
+	} else {
+		return(rtree_key_cmp(mode, a, a_length, b, b_length));
+	}
+}
+
+/** Compare two data fields.
+@param[in] mtype main type
+@param[in] prtype precise type
+@param[in] a data field
+@param[in] a_length length of a, in bytes (not UNIV_SQL_NULL)
+@param[in] b data field
+@param[in] b_length length of b, in bytes (not UNIV_SQL_NULL)
+@return positive, 0, negative, if a is greater, equal, less than b,
+respectively */
+static
+int
+cmp_whole_field(
+	ulint		mtype,
+	ulint		prtype,
+	const byte*	a,
+	unsigned int	a_length,
+	const byte*	b,
+	unsigned int	b_length)
 {
 	float		f_1;
 	float		f_2;
 	double		d_1;
 	double		d_2;
-	int		swap_flag	= 1;
 
 	switch (mtype) {
-
 	case DATA_DECIMAL:
-		/* Remove preceding spaces */
-		for (; a_length && *a == ' '; a++, a_length--) { }
-		for (; b_length && *b == ' '; b++, b_length--) { }
-
-		if (*a == '-') {
-			if (*b != '-') {
-				return(-1);
-			}
-
-			a++; b++;
-			a_length--;
-			b_length--;
-
-			swap_flag = -1;
-
-		} else if (*b == '-') {
-
-			return(1);
-		}
-
-		while (a_length > 0 && (*a == '+' || *a == '0')) {
-			a++; a_length--;
-		}
-
-		while (b_length > 0 && (*b == '+' || *b == '0')) {
-			b++; b_length--;
-		}
-
-		if (a_length != b_length) {
-			if (a_length < b_length) {
-				return(-swap_flag);
-			}
-
-			return(swap_flag);
-		}
-
-		while (a_length > 0 && *a == *b) {
-
-			a++; b++; a_length--;
-		}
-
-		if (a_length == 0) {
-
-			return(0);
-		}
-
-		if (*a > *b) {
-			return(swap_flag);
-		}
-
-		return(-swap_flag);
+		return(cmp_decimal(a, a_length, b, b_length));
 	case DATA_DOUBLE:
 		d_1 = mach_double_read(a);
 		d_2 = mach_double_read(b);
@@ -278,403 +367,326 @@ cmp_whole_field(
 		}
 
 		return(0);
+	case DATA_VARCHAR:
+	case DATA_CHAR:
+		return(my_charset_latin1.coll->strnncollsp(
+			       &my_charset_latin1,
+			       a, a_length, b, b_length));
 	case DATA_BLOB:
 		if (prtype & DATA_BINARY_TYPE) {
-
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				"  InnoDB: Error: comparing a binary BLOB"
-				" with a character set sensitive\n"
-				"InnoDB: comparison!\n");
+			ib::error() << "Comparing a binary BLOB"
+				" using a character set collation!";
+			ut_ad(0);
 		}
 		/* fall through */
 	case DATA_VARMYSQL:
 	case DATA_MYSQL:
-		return(innobase_mysql_cmp(
-			       (int)(prtype & DATA_MYSQL_TYPE_MASK),
-			       (uint) dtype_get_charset_coll(prtype),
-			       a, a_length, b, b_length));
+		return(innobase_mysql_cmp(prtype,
+					  a, a_length, b, b_length));
+	case DATA_POINT:
+	case DATA_VAR_POINT:
+	case DATA_GEOMETRY:
+		return(cmp_geometry_field(mtype, prtype, a, a_length, b,
+				b_length));
 	default:
-		fprintf(stderr,
-			"InnoDB: unknown type number %lu\n",
-			(ulong) mtype);
-		ut_error;
+		ib::fatal() << "Unknown data type number " << mtype;
 	}
 
 	return(0);
 }
 
-/*****************************************************************
-This function is used to compare two dfields where at least the first
-has its data type field set. */
-UNIV_INTERN
+/** Compare two data fields.
+@param[in] mtype main type
+@param[in] prtype precise type
+@param[in] data1 data field
+@param[in] len1 length of data1 in bytes, or UNIV_SQL_NULL
+@param[in] data2 data field
+@param[in] len2 length of data2 in bytes, or UNIV_SQL_NULL
+@return the comparison result of data1 and data2
+@retval 0 if data1 is equal to data2
+@retval negative if data1 is less than data2
+@retval positive if data1 is greater than data2 */
+inline
 int
-cmp_dfield_dfield_like_prefix(
-/*==========================*/
-				/* out: 1, 0, -1, if dfield1 is greater, equal,
-				less than dfield2, respectively */
-	dfield_t*	dfield1,/* in: data field; must have type field set */
-	dfield_t*	dfield2)/* in: data field */
+cmp_data(
+	ulint		mtype,
+	ulint		prtype,
+	const byte*	data1,
+	ulint		len1,
+	const byte*	data2,
+	ulint		len2)
 {
-	const dtype_t*	type;
-	int		ret;
-
-	ut_ad(dfield_check_typed(dfield1));
-
-	type = dfield_get_type(dfield1);
-
-	if (type->mtype >= DATA_FLOAT) {
-		ret = innobase_mysql_cmp_prefix(
-			static_cast<int>(type->prtype & DATA_MYSQL_TYPE_MASK),
-			static_cast<uint>(dtype_get_charset_coll(type->prtype)),
-			static_cast<byte*>(dfield_get_data(dfield1)),
-			static_cast<uint>(dfield_get_len(dfield1)),
-			static_cast<byte*>(dfield_get_data(dfield2)),
-			static_cast<uint>(dfield_get_len(dfield2)));
-	} else {
-		ret = (cmp_data_data_like_prefix(
-			static_cast<byte*>(dfield_get_data(dfield1)),
-			dfield_get_len(dfield1),
-			static_cast<byte*>(dfield_get_data(dfield2)),
-			dfield_get_len(dfield2)));
-	}
-
-	return(ret);
-}
-
-/*************************************************************//**
-This function is used to compare two data fields for which we know the
-data type.
-@return	1, 0, -1, if data1 is greater, equal, less than data2, respectively */
-UNIV_INTERN
-int
-cmp_data_data_slow(
-/*===============*/
-	ulint		mtype,	/*!< in: main type */
-	ulint		prtype,	/*!< in: precise type */
-	const byte*	data1,	/*!< in: data field (== a pointer to a memory
-				buffer) */
-	ulint		len1,	/*!< in: data field length or UNIV_SQL_NULL */
-	const byte*	data2,	/*!< in: data field (== a pointer to a memory
-				buffer) */
-	ulint		len2)	/*!< in: data field length or UNIV_SQL_NULL */
-{
-	ulint	data1_byte;
-	ulint	data2_byte;
-	ulint	cur_bytes;
-
 	if (len1 == UNIV_SQL_NULL || len2 == UNIV_SQL_NULL) {
-
 		if (len1 == len2) {
-
 			return(0);
 		}
 
-		if (len1 == UNIV_SQL_NULL) {
-			/* We define the SQL null to be the smallest possible
-			value of a field in the alphabetical order */
-
-			return(-1);
-		}
-
-		return(1);
+		/* We define the SQL null to be the smallest possible
+		value of a field. */
+		return(len1 == UNIV_SQL_NULL ? -1 : 1);
 	}
 
-	if (mtype >= DATA_FLOAT
-	    || (mtype == DATA_BLOB
-		&& 0 == (prtype & DATA_BINARY_TYPE)
-		&& dtype_get_charset_coll(prtype)
-		!= DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL)) {
+	ulint	pad;
 
+	switch (mtype) {
+	case DATA_FIXBINARY:
+	case DATA_BINARY:
+		if (dtype_get_charset_coll(prtype)
+		    != DATA_MYSQL_BINARY_CHARSET_COLL) {
+			pad = 0x20;
+			break;
+		}
+		/* fall through */
+	case DATA_INT:
+	case DATA_SYS_CHILD:
+	case DATA_SYS:
+		pad = ULINT_UNDEFINED;
+		break;
+	case DATA_POINT:
+	case DATA_VAR_POINT:
+		/* Since DATA_POINT has a fixed length of DATA_POINT_LEN,
+		currently, pad is not needed. Meanwhile, DATA_VAR_POINT acts
+		the same as DATA_GEOMETRY */
+	case DATA_GEOMETRY:
+		ut_ad(prtype & DATA_BINARY_TYPE);
+		pad = ULINT_UNDEFINED;
+		if (prtype & DATA_GIS_MBR) {
+			return(cmp_whole_field(mtype, prtype,
+					       data1, (unsigned) len1,
+					       data2, (unsigned) len2));
+		}
+		break;
+	case DATA_BLOB:
+		if (prtype & DATA_BINARY_TYPE) {
+			pad = ULINT_UNDEFINED;
+			break;
+		}
+		/* fall through */
+	default:
 		return(cmp_whole_field(mtype, prtype,
 				       data1, (unsigned) len1,
 				       data2, (unsigned) len2));
 	}
 
-	/* Compare then the fields */
+	ulint	len;
+	int	cmp;
 
-	cur_bytes = 0;
-
-	for (;;) {
-		if (len1 <= cur_bytes) {
-			if (len2 <= cur_bytes) {
-
-				return(0);
-			}
-
-			data1_byte = dtype_get_pad_char(mtype, prtype);
-
-			if (data1_byte == ULINT_UNDEFINED) {
-
-				return(-1);
-			}
-		} else {
-			data1_byte = *data1;
-		}
-
-		if (len2 <= cur_bytes) {
-			data2_byte = dtype_get_pad_char(mtype, prtype);
-
-			if (data2_byte == ULINT_UNDEFINED) {
-
-				return(1);
-			}
-		} else {
-			data2_byte = *data2;
-		}
-
-		if (data1_byte == data2_byte) {
-			/* If the bytes are equal, they will remain such even
-			after the collation transformation below */
-
-			goto next_byte;
-		}
-
-		if (mtype <= DATA_CHAR
-		    || (mtype == DATA_BLOB
-			&& 0 == (prtype & DATA_BINARY_TYPE))) {
-
-			data1_byte = cmp_collate(data1_byte);
-			data2_byte = cmp_collate(data2_byte);
-		}
-
-		if (data1_byte > data2_byte) {
-
-			return(1);
-		} else if (data1_byte < data2_byte) {
-
-			return(-1);
-		}
-next_byte:
-		/* Next byte */
-		cur_bytes++;
-		data1++;
-		data2++;
+	if (len1 < len2) {
+		len = len1;
+		len2 -= len;
+		len1 = 0;
+	} else {
+		len = len2;
+		len1 -= len;
+		len2 = 0;
 	}
 
-	return(0);		/* Not reached */
-}
+	if (len) {
+#if defined __i386__ || defined __x86_64__ || defined _M_IX86 || defined _M_X64
+		/* Compare the first bytes with a loop to avoid the call
+		overhead of memcmp(). On x86 and x86-64, the GCC built-in
+		(repz cmpsb) seems to be very slow, so we will be calling the
+		libc version. http://gcc.gnu.org/bugzilla/show_bug.cgi?id=43052
+		tracks the slowness of the GCC built-in memcmp().
 
-/*****************************************************************
-This function is used to compare two data fields for which we know the
-data type to be VARCHAR */
+		We compare up to the first 4..7 bytes with the loop.
+		The (len & 3) is used for "normalizing" or
+		"quantizing" the len parameter for the memcmp() call,
+		in case the whole prefix is equal. On x86 and x86-64,
+		the GNU libc memcmp() of equal strings is faster with
+		len=4 than with len=3.
 
-int
-cmp_data_data_slow_varchar(
-/*=======================*/
-				/* out: 1, 0, -1, if lhs is greater, equal,
-				less than rhs, respectively */
-	const byte*	lhs,	/* in: data field (== a pointer to a memory
-				buffer) */
-	ulint		lhs_len,/* in: data field length or UNIV_SQL_NULL */
-	const byte*	rhs,	/* in: data field (== a pointer to a memory
-				buffer) */
-	ulint		rhs_len)/* in: data field length or UNIV_SQL_NULL */
-{
-	ulint	i;
+		On other architectures than the IA32 or AMD64, there could
+		be a built-in memcmp() that is faster than the loop.
+		We only use the loop where we know that it can improve
+		the performance. */
+		for (ulint i = 4 + (len & 3); i > 0; i--) {
+			cmp = int(*data1++) - int(*data2++);
+			if (cmp) {
+				return(cmp);
+			}
 
-	ut_a(rhs_len != UNIV_SQL_NULL);
-
-	if (lhs_len == UNIV_SQL_NULL) {
-
-		/* We define the SQL null to be the smallest possible
-		value of a field in the alphabetical order */
-
-		return(-1);
-	}
-
-	/* Compare the values.*/
-
-	for (i = 0; i < lhs_len && i < rhs_len; ++i, ++rhs, ++lhs) {
-		ulint	lhs_byte = *lhs;
-		ulint	rhs_byte = *rhs;
-
-		if (lhs_byte != rhs_byte) {
-			/* If the bytes are equal, they will remain such even
-			after the collation transformation below */
-
-			lhs_byte = cmp_collate(lhs_byte);
-			rhs_byte = cmp_collate(rhs_byte);
-
-			if (lhs_byte > rhs_byte) {
-
-				return(1);
-			} else if (lhs_byte < rhs_byte) {
-
-				return(-1);
+			if (!--len) {
+				break;
 			}
 		}
-	}
 
-	return((i == lhs_len && i == rhs_len) ? 0 :
-		static_cast<int>(rhs_len - lhs_len));
-}
+		if (len) {
+#endif /* IA32 or AMD64 */
+			cmp = memcmp(data1, data2, len);
 
-/*****************************************************************
-This function is used to compare two data fields for which we know the
-data type. The comparison is done for the LIKE operator.*/
-
-int
-cmp_data_data_slow_like_prefix(
-/*===========================*/
-				/* out: 1, 0, -1, if lhs is greater, equal,
-				less than rhs, respectively */
-	const byte*	lhs,	/* in: data field (== a pointer to a memory
-				buffer) */
-	ulint		len1,	/* in: data field length or UNIV_SQL_NULL */
-	const byte*	rhs,	/* in: data field (== a pointer to a memory
-				buffer) */
-	ulint		len2)	/* in: data field length or UNIV_SQL_NULL */
-{
-	ulint	i;
-
-	ut_a(len2 != UNIV_SQL_NULL);
-
-	if (len1 == UNIV_SQL_NULL) {
-
-		/* We define the SQL null to be the smallest possible
-		value of a field in the alphabetical order */
-
-		return(-1);
-	}
-
-	/* Compare the values.*/
-
-	for (i = 0; i < len1 && i < len2; ++i, ++rhs, ++lhs) {
-		ulint	lhs_byte = *lhs;
-		ulint	rhs_byte = *rhs;
-
-		if (lhs_byte != rhs_byte) {
-			/* If the bytes are equal, they will remain such even
-			after the collation transformation below */
-
-			lhs_byte = cmp_collate(lhs_byte);
-			rhs_byte = cmp_collate(rhs_byte);
-
-			if (lhs_byte > rhs_byte) {
-
-				return(1);
-			} else if (lhs_byte < rhs_byte) {
-
-				return(-1);
+			if (cmp) {
+				return(cmp);
 			}
+
+			data1 += len;
+			data2 += len;
+#if defined __i386__ || defined __x86_64__ || defined _M_IX86 || defined _M_X64
 		}
+#endif /* IA32 or AMD64 */
 	}
 
-	return(i == len2 ? 0 : 1);
+	cmp = (int) (len1 - len2);
+
+	if (!cmp || pad == ULINT_UNDEFINED) {
+		return(cmp);
+	}
+
+	len = 0;
+
+	if (len1) {
+		do {
+			cmp = static_cast<int>(
+				mach_read_from_1(&data1[len++]) - pad);
+		} while (cmp == 0 && len < len1);
+	} else {
+		ut_ad(len2 > 0);
+
+		do {
+			cmp = static_cast<int>(
+				pad - mach_read_from_1(&data2[len++]));
+		} while (cmp == 0 && len < len2);
+	}
+
+	return(cmp);
 }
 
-/*****************************************************************
-This function is used to compare two data fields for which we know the
-data type. The comparison is done for the LIKE operator.*/
-
+/** Compare a GIS data tuple to a physical record.
+@param[in] dtuple data tuple
+@param[in] rec B-tree record
+@param[in] offsets rec_get_offsets(rec)
+@param[in] mode compare mode
+@retval negative if dtuple is less than rec */
 int
-cmp_data_data_slow_like_suffix(
-/*===========================*/
-				/* out: 1, 0, -1, if data1 is greater, equal,
-				less than data2, respectively */
-				/* in: data field (== a pointer to a
-				memory buffer) */
-	const byte*	data1 UNIV_UNUSED,
-				/* in: data field length or UNIV_SQL_NULL */
-	ulint		len1 UNIV_UNUSED,
-				/* in: data field (== a pointer to a memory
-				buffer) */
-	const byte*	data2 UNIV_UNUSED,
-				/* in: data field length or UNIV_SQL_NULL */
-	ulint		len2 UNIV_UNUSED)
-
-{
-	ut_error;	// FIXME:
-	return(1);
-}
-
-/*****************************************************************
-This function is used to compare two data fields for which we know the
-data type. The comparison is done for the LIKE operator.*/
-
-int
-cmp_data_data_slow_like_substr(
-/*===========================*/
-				/* out: 1, 0, -1, if data1 is greater, equal,
-				less than data2, respectively */
-				/* in: data field (== a pointer to a
-				memory buffer) */
-	const byte*	data1 UNIV_UNUSED,
-				/* in: data field length or UNIV_SQL_NULL */
-	ulint		len1 UNIV_UNUSED,
-				/* in: data field (== a pointer to a memory
-				buffer) */
-	const byte*	data2 UNIV_UNUSED,
-				/* in: data field length or UNIV_SQL_NULL */
-	ulint		len2 UNIV_UNUSED)
-{
-	ut_error;	// FIXME:
-	return(1);
-}
-/*************************************************************//**
-This function is used to compare a data tuple to a physical record.
-Only dtuple->n_fields_cmp first fields are taken into account for
-the data tuple! If we denote by n = n_fields_cmp, then rec must
-have either m >= n fields, or it must differ from dtuple in some of
-the m fields rec has. If rec has an externally stored field we do not
-compare it but return with value 0 if such a comparison should be
-made.
-@return 1, 0, -1, if dtuple is greater, equal, less than rec,
-respectively, when only the common first fields are compared, or until
-the first externally stored field in rec */
-UNIV_INTERN
-int
-cmp_dtuple_rec_with_match_low(
-/*==========================*/
+cmp_dtuple_rec_with_gis(
+/*====================*/
 	const dtuple_t*	dtuple,	/*!< in: data tuple */
 	const rec_t*	rec,	/*!< in: physical record which differs from
 				dtuple in some of the common fields, or which
 				has an equal number or more fields than
 				dtuple */
 	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
-	ulint		n_cmp,	/*!< in: number of fields to compare */
-	ulint*		matched_fields, /*!< in/out: number of already completely
-				matched fields; when function returns,
-				contains the value for current comparison */
-	ulint*		matched_bytes) /*!< in/out: number of already matched
-				bytes within the first field not completely
-				matched; when function returns, contains the
-				value for current comparison */
+	page_cur_mode_t	mode)	/*!< in: compare mode */
 {
 	const dfield_t*	dtuple_field;	/* current field in logical record */
 	ulint		dtuple_f_len;	/* the length of the current field
 					in the logical record */
-	const byte*	dtuple_b_ptr;	/* pointer to the current byte in
-					logical field data */
-	ulint		dtuple_byte;	/* value of current byte to be compared
-					in dtuple*/
 	ulint		rec_f_len;	/* length of current field in rec */
 	const byte*	rec_b_ptr;	/* pointer to the current byte in
 					rec field */
-	ulint		rec_byte;	/* value of current byte to be
-					compared in rec */
+	int		ret = 0;	/* return value */
+
+	dtuple_field = dtuple_get_nth_field(dtuple, 0);
+	dtuple_f_len = dfield_get_len(dtuple_field);
+
+	rec_b_ptr = rec_get_nth_field(rec, offsets, 0, &rec_f_len);
+	ret = cmp_gis_field(
+		mode, static_cast<const byte*>(dfield_get_data(dtuple_field)),
+		(unsigned) dtuple_f_len, rec_b_ptr, (unsigned) rec_f_len);
+
+	return(ret);
+}
+
+/** Compare a GIS data tuple to a physical record in rtree non-leaf node.
+We need to check the page number field, since we don't store pk field in
+rtree non-leaf node.
+@param[in]	dtuple		data tuple
+@param[in]	rec		R-tree record
+@param[in]	offsets		rec_get_offsets(rec)
+@retval negative if dtuple is less than rec */
+int
+cmp_dtuple_rec_with_gis_internal(
+	const dtuple_t*	dtuple,
+	const rec_t*	rec,
+	const ulint*	offsets)
+{
+	const dfield_t*	dtuple_field;	/* current field in logical record */
+	ulint		dtuple_f_len;	/* the length of the current field
+					in the logical record */
+	ulint		rec_f_len;	/* length of current field in rec */
+	const byte*	rec_b_ptr;	/* pointer to the current byte in
+					rec field */
+	int		ret = 0;	/* return value */
+
+	dtuple_field = dtuple_get_nth_field(dtuple, 0);
+	dtuple_f_len = dfield_get_len(dtuple_field);
+
+	rec_b_ptr = rec_get_nth_field(rec, offsets, 0, &rec_f_len);
+	ret = cmp_gis_field(
+		PAGE_CUR_WITHIN,
+		static_cast<const byte*>(dfield_get_data(dtuple_field)),
+		(unsigned) dtuple_f_len, rec_b_ptr, (unsigned) rec_f_len);
+	if (ret != 0) {
+		return(ret);
+	}
+
+	dtuple_field = dtuple_get_nth_field(dtuple, 1);
+	dtuple_f_len = dfield_get_len(dtuple_field);
+	rec_b_ptr = rec_get_nth_field(rec, offsets, 1, &rec_f_len);
+
+	return(cmp_data(dtuple_field->type.mtype,
+			dtuple_field->type.prtype,
+			static_cast<const byte*>(dtuple_field->data),
+			dtuple_f_len,
+			rec_b_ptr,
+			rec_f_len));
+}
+
+/** Compare two data fields.
+@param[in] mtype main type
+@param[in] prtype precise type
+@param[in] data1 data field
+@param[in] len1 length of data1 in bytes, or UNIV_SQL_NULL
+@param[in] data2 data field
+@param[in] len2 length of data2 in bytes, or UNIV_SQL_NULL
+@return the comparison result of data1 and data2
+@retval 0 if data1 is equal to data2
+@retval negative if data1 is less than data2
+@retval positive if data1 is greater than data2 */
+int
+cmp_data_data(
+	ulint		mtype,
+	ulint		prtype,
+	const byte*	data1,
+	ulint		len1,
+	const byte*	data2,
+	ulint		len2)
+{
+	return(cmp_data(mtype, prtype, data1, len1, data2, len2));
+}
+
+/** Compare a data tuple to a physical record.
+@param[in] dtuple data tuple
+@param[in] rec B-tree record
+@param[in] offsets rec_get_offsets(rec)
+@param[in] n_cmp number of fields to compare
+@param[in,out] matched_fields number of completely matched fields
+@return the comparison result of dtuple and rec
+@retval 0 if dtuple is equal to rec
+@retval negative if dtuple is less than rec
+@retval positive if dtuple is greater than rec */
+int
+cmp_dtuple_rec_with_match_low(
+	const dtuple_t*	dtuple,
+	const rec_t*	rec,
+	const ulint*	offsets,
+	ulint		n_cmp,
+	ulint*		matched_fields)
+{
 	ulint		cur_field;	/* current field number */
-	ulint		cur_bytes;	/* number of already matched bytes
-					in current field */
 	int		ret;		/* return value */
 
-	ut_ad(dtuple != NULL);
-	ut_ad(rec != NULL);
-	ut_ad(matched_fields != NULL);
-	ut_ad(matched_bytes != NULL);
 	ut_ad(dtuple_check_typed(dtuple));
 	ut_ad(rec_offs_validate(rec, NULL, offsets));
 
 	cur_field = *matched_fields;
-	cur_bytes = *matched_bytes;
 
 	ut_ad(n_cmp > 0);
 	ut_ad(n_cmp <= dtuple_get_n_fields(dtuple));
 	ut_ad(cur_field <= n_cmp);
 	ut_ad(cur_field <= rec_offs_n_fields(offsets));
 
-	if (cur_bytes == 0 && cur_field == 0) {
+	if (cur_field == 0) {
 		ulint	rec_info = rec_get_info_bits(rec,
 						     rec_offs_comp(offsets));
 		ulint	tup_info = dtuple_get_info_bits(dtuple);
@@ -688,43 +700,150 @@ cmp_dtuple_rec_with_match_low(
 		}
 	}
 
+	/* Match fields in a loop */
+
+	for (; cur_field < n_cmp; cur_field++) {
+		const byte*	rec_b_ptr;
+		const dfield_t*	dtuple_field
+			= dtuple_get_nth_field(dtuple, cur_field);
+		const byte*	dtuple_b_ptr
+			= static_cast<const byte*>(
+				dfield_get_data(dtuple_field));
+		const dtype_t*	type
+			= dfield_get_type(dtuple_field);
+		ulint		dtuple_f_len
+			= dfield_get_len(dtuple_field);
+		ulint		rec_f_len;
+
+		/* We should never compare against an externally
+		stored field.  Only clustered index records can
+		contain externally stored fields, and the first fields
+		(primary key fields) should already differ. */
+		ut_ad(!rec_offs_nth_extern(offsets, cur_field));
+
+		rec_b_ptr = rec_get_nth_field(rec, offsets, cur_field,
+					      &rec_f_len);
+
+		ut_ad(!dfield_is_ext(dtuple_field));
+
+		ret = cmp_data(type->mtype, type->prtype,
+			       dtuple_b_ptr, dtuple_f_len,
+			       rec_b_ptr, rec_f_len);
+		if (ret) {
+			goto order_resolved;
+		}
+	}
+
+	ret = 0;	/* If we ran out of fields, dtuple was equal to rec
+			up to the common fields */
+order_resolved:
+	*matched_fields = cur_field;
+	return(ret);
+}
+
+/** Get the pad character code point for a type.
+@param[in]	type
+@return		pad character code point
+@retval		ULINT_UNDEFINED if no padding is specified */
+UNIV_INLINE
+ulint
+cmp_get_pad_char(
+	const dtype_t*	type)
+{
+	switch (type->mtype) {
+	case DATA_FIXBINARY:
+	case DATA_BINARY:
+		if (dtype_get_charset_coll(type->prtype)
+		    == DATA_MYSQL_BINARY_CHARSET_COLL) {
+			/* Starting from 5.0.18, do not pad
+			VARBINARY or BINARY columns. */
+			return(ULINT_UNDEFINED);
+		}
+		/* Fall through */
+	case DATA_CHAR:
+	case DATA_VARCHAR:
+	case DATA_MYSQL:
+	case DATA_VARMYSQL:
+		/* Space is the padding character for all char and binary
+		strings, and starting from 5.0.3, also for TEXT strings. */
+		return(0x20);
+	case DATA_GEOMETRY:
+                /* DATA_GEOMETRY is binary data, not ASCII-based. */
+	        return(ULINT_UNDEFINED);
+	case DATA_BLOB:
+		if (!(type->prtype & DATA_BINARY_TYPE)) {
+			return(0x20);
+		}
+		/* Fall through */
+	default:
+		/* No padding specified */
+		return(ULINT_UNDEFINED);
+	}
+}
+
+/** Compare a data tuple to a physical record.
+@param[in]	dtuple		data tuple
+@param[in]	rec		B-tree or R-tree index record
+@param[in]	index		index tree
+@param[in]	offsets		rec_get_offsets(rec)
+@param[in,out]	matched_fields	number of completely matched fields
+@param[in,out]	matched_bytes	number of matched bytes in the first
+field that is not matched
+@return the comparison result of dtuple and rec
+@retval 0 if dtuple is equal to rec
+@retval negative if dtuple is less than rec
+@retval positive if dtuple is greater than rec */
+int
+cmp_dtuple_rec_with_match_bytes(
+	const dtuple_t*		dtuple,
+	const rec_t*		rec,
+	const dict_index_t*	index,
+	const ulint*		offsets,
+	ulint*			matched_fields,
+	ulint*			matched_bytes)
+{
+	ulint		n_cmp	= dtuple_get_n_fields_cmp(dtuple);
+	ulint		cur_field;	/* current field number */
+	ulint		cur_bytes;
+	int		ret;		/* return value */
+
+	ut_ad(dtuple_check_typed(dtuple));
+	ut_ad(rec_offs_validate(rec, index, offsets));
+	//ut_ad(page_is_leaf(page_align(rec)));
+	ut_ad(!(REC_INFO_MIN_REC_FLAG
+		& dtuple_get_info_bits(dtuple)));
+	ut_ad(!(REC_INFO_MIN_REC_FLAG
+		& rec_get_info_bits(rec, rec_offs_comp(offsets))));
+
+	cur_field = *matched_fields;
+	cur_bytes = *matched_bytes;
+
+	ut_ad(n_cmp <= dtuple_get_n_fields(dtuple));
+	ut_ad(cur_field <= n_cmp);
+	ut_ad(cur_field + (cur_bytes > 0) <= rec_offs_n_fields(offsets));
+
 	/* Match fields in a loop; stop if we run out of fields in dtuple
 	or find an externally stored field */
 
 	while (cur_field < n_cmp) {
+		const dfield_t*	dfield		= dtuple_get_nth_field(
+			dtuple, cur_field);
+		const dtype_t*	type		= dfield_get_type(dfield);
+		ulint		dtuple_f_len	= dfield_get_len(dfield);
+		const byte*	dtuple_b_ptr;
+		const byte*	rec_b_ptr;
+		ulint		rec_f_len;
 
-		ulint	mtype;
-		ulint	prtype;
-
-		dtuple_field = dtuple_get_nth_field(dtuple, cur_field);
-		{
-			const dtype_t*	type
-				= dfield_get_type(dtuple_field);
-
-			mtype = type->mtype;
-			prtype = type->prtype;
-		}
-
-		dtuple_f_len = dfield_get_len(dtuple_field);
-
+		dtuple_b_ptr = static_cast<const byte*>(
+			dfield_get_data(dfield));
 		rec_b_ptr = rec_get_nth_field(rec, offsets,
 					      cur_field, &rec_f_len);
+		ut_ad(!rec_offs_nth_extern(offsets, cur_field));
 
 		/* If we have matched yet 0 bytes, it may be that one or
 		both the fields are SQL null, or the record or dtuple may be
-		the predefined minimum record, or the field is externally
-		stored */
-
-		if (UNIV_LIKELY(cur_bytes == 0)) {
-			if (rec_offs_nth_extern(offsets, cur_field)) {
-				/* We do not compare to an externally
-				stored field */
-
-				ret = 0;
-
-				goto order_resolved;
-			}
-
+		the predefined minimum record. */
+		if (cur_bytes == 0) {
 			if (dtuple_f_len == UNIV_SQL_NULL) {
 				if (rec_f_len == UNIV_SQL_NULL) {
 
@@ -743,97 +862,74 @@ cmp_dtuple_rec_with_match_low(
 			}
 		}
 
-		if (mtype >= DATA_FLOAT
-		    || (mtype == DATA_BLOB
-			&& 0 == (prtype & DATA_BINARY_TYPE)
-			&& dtype_get_charset_coll(prtype)
-			!= DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL)) {
+		switch (type->mtype) {
+		case DATA_FIXBINARY:
+		case DATA_BINARY:
+		case DATA_INT:
+		case DATA_SYS_CHILD:
+		case DATA_SYS:
+			break;
+		case DATA_BLOB:
+			if (type->prtype & DATA_BINARY_TYPE) {
+				break;
+			}
+			/* fall through */
+		default:
+			ret = cmp_data(type->mtype, type->prtype,
+				       dtuple_b_ptr, dtuple_f_len,
+				       rec_b_ptr, rec_f_len);
 
-			ret = cmp_whole_field(
-				mtype, prtype,
-				static_cast<const byte*>(
-					dfield_get_data(dtuple_field)),
-				(unsigned) dtuple_f_len,
-				rec_b_ptr, (unsigned) rec_f_len);
-
-			if (ret != 0) {
-				cur_bytes = 0;
-
-				goto order_resolved;
-			} else {
+			if (!ret) {
 				goto next_field;
 			}
+
+			cur_bytes = 0;
+			goto order_resolved;
 		}
 
 		/* Set the pointers at the current byte */
 
-		rec_b_ptr = rec_b_ptr + cur_bytes;
-		dtuple_b_ptr = (byte*) dfield_get_data(dtuple_field)
-			+ cur_bytes;
+		rec_b_ptr += cur_bytes;
+		dtuple_b_ptr += cur_bytes;
 		/* Compare then the fields */
 
-		for (;;) {
-			if (UNIV_UNLIKELY(rec_f_len <= cur_bytes)) {
+		for (const ulint pad = cmp_get_pad_char(type);;
+		     cur_bytes++) {
+			ulint	rec_byte = pad;
+			ulint	dtuple_byte = pad;
+
+			if (rec_f_len <= cur_bytes) {
 				if (dtuple_f_len <= cur_bytes) {
 
 					goto next_field;
 				}
 
-				rec_byte = dtype_get_pad_char(mtype, prtype);
-
 				if (rec_byte == ULINT_UNDEFINED) {
 					ret = 1;
 
 					goto order_resolved;
 				}
 			} else {
-				rec_byte = *rec_b_ptr;
+				rec_byte = *rec_b_ptr++;
 			}
 
-			if (UNIV_UNLIKELY(dtuple_f_len <= cur_bytes)) {
-				dtuple_byte = dtype_get_pad_char(mtype,
-								 prtype);
-
+			if (dtuple_f_len <= cur_bytes) {
 				if (dtuple_byte == ULINT_UNDEFINED) {
 					ret = -1;
 
 					goto order_resolved;
 				}
 			} else {
-				dtuple_byte = *dtuple_b_ptr;
+				dtuple_byte = *dtuple_b_ptr++;
 			}
 
-			if (dtuple_byte == rec_byte) {
-				/* If the bytes are equal, they will
-				remain such even after the collation
-				transformation below */
-
-				goto next_byte;
+			if (dtuple_byte < rec_byte) {
+				ret = -1;
+				goto order_resolved;
+			} else if (dtuple_byte > rec_byte) {
+				ret = 1;
+				goto order_resolved;
 			}
-
-			if (mtype <= DATA_CHAR
-			    || (mtype == DATA_BLOB
-				&& !(prtype & DATA_BINARY_TYPE))) {
-
-				rec_byte = cmp_collate(rec_byte);
-				dtuple_byte = cmp_collate(dtuple_byte);
-			}
-
-			ret = (int) (dtuple_byte - rec_byte);
-			if (UNIV_LIKELY(ret)) {
-				if (ret < 0) {
-					ret = -1;
-					goto order_resolved;
-				} else {
-					ret = 1;
-					goto order_resolved;
-				}
-			}
-next_byte:
-			/* Next byte */
-			cur_bytes++;
-			rec_b_ptr++;
-			dtuple_b_ptr++;
 		}
 
 next_field:
@@ -846,43 +942,39 @@ next_field:
 	ret = 0;	/* If we ran out of fields, dtuple was equal to rec
 			up to the common fields */
 order_resolved:
-	ut_ad((ret >= - 1) && (ret <= 1));
-	ut_ad(ret == cmp_debug_dtuple_rec_with_match(dtuple, rec, offsets,
-						     n_cmp, matched_fields));
-	ut_ad(*matched_fields == cur_field); /* In the debug version, the
-					     above cmp_debug_... sets
-					     *matched_fields to a value */
 	*matched_fields = cur_field;
 	*matched_bytes = cur_bytes;
 
 	return(ret);
 }
 
-/**************************************************************//**
-Compares a data tuple to a physical record.
+/** Compare a data tuple to a physical record.
 @see cmp_dtuple_rec_with_match
-@return 1, 0, -1, if dtuple is greater, equal, less than rec, respectively */
-UNIV_INTERN
+@param[in] dtuple data tuple
+@param[in] rec B-tree record
+@param[in] offsets rec_get_offsets(rec); may be NULL
+for ROW_FORMAT=REDUNDANT
+@return the comparison result of dtuple and rec
+@retval 0 if dtuple is equal to rec
+@retval negative if dtuple is less than rec
+@retval positive if dtuple is greater than rec */
 int
 cmp_dtuple_rec(
-/*===========*/
-	const dtuple_t*	dtuple,	/*!< in: data tuple */
-	const rec_t*	rec,	/*!< in: physical record */
-	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
+	const dtuple_t*	dtuple,
+	const rec_t*	rec,
+	const ulint*	offsets)
 {
 	ulint	matched_fields	= 0;
-	ulint	matched_bytes	= 0;
 
 	ut_ad(rec_offs_validate(rec, NULL, offsets));
 	return(cmp_dtuple_rec_with_match(dtuple, rec, offsets,
-					 &matched_fields, &matched_bytes));
+					 &matched_fields));
 }
 
 /**************************************************************//**
 Checks if a dtuple is a prefix of a record. The last field in dtuple
 is allowed to be a prefix of the corresponding field in the record.
-@return	TRUE if prefix */
-UNIV_INTERN
+@return TRUE if prefix */
 ibool
 cmp_dtuple_is_prefix_of_rec(
 /*========================*/
@@ -892,36 +984,23 @@ cmp_dtuple_is_prefix_of_rec(
 {
 	ulint	n_fields;
 	ulint	matched_fields	= 0;
-	ulint	matched_bytes	= 0;
 
 	ut_ad(rec_offs_validate(rec, NULL, offsets));
 	n_fields = dtuple_get_n_fields(dtuple);
 
 	if (n_fields > rec_offs_n_fields(offsets)) {
-
+		ut_ad(0);
 		return(FALSE);
 	}
 
-	cmp_dtuple_rec_with_match(dtuple, rec, offsets,
-				  &matched_fields, &matched_bytes);
-	if (matched_fields == n_fields) {
-
-		return(TRUE);
-	}
-
-	if (matched_fields == n_fields - 1
-	    && matched_bytes == dfield_get_len(
-		    dtuple_get_nth_field(dtuple, n_fields - 1))) {
-		return(TRUE);
-	}
-
-	return(FALSE);
+	cmp_dtuple_rec_with_match(dtuple, rec, offsets, &matched_fields);
+	return(matched_fields == n_fields);
 }
 
 /*************************************************************//**
 Compare two physical record fields.
-@retval 1 if rec1 field is greater than rec2
-@retval -1 if rec1 field is less than rec2
+@retval positive if rec1 field is greater than rec2
+@retval negative if rec1 field is less than rec2
 @retval 0 if rec1 field equals to rec2 */
 static MY_ATTRIBUTE((nonnull, warn_unused_result))
 int
@@ -946,85 +1025,15 @@ cmp_rec_rec_simple_field(
 	rec1_b_ptr = rec_get_nth_field(rec1, offsets1, n, &rec1_f_len);
 	rec2_b_ptr = rec_get_nth_field(rec2, offsets2, n, &rec2_f_len);
 
-	if (rec1_f_len == UNIV_SQL_NULL || rec2_f_len == UNIV_SQL_NULL) {
-		if (rec1_f_len == rec2_f_len) {
-			return(0);
-		}
-		/* We define the SQL null to be the smallest possible
-		value of a field in the alphabetical order */
-		return(rec1_f_len == UNIV_SQL_NULL ? -1 : 1);
-	}
-
-	if (col->mtype >= DATA_FLOAT
-	    || (col->mtype == DATA_BLOB
-		&& !(col->prtype & DATA_BINARY_TYPE)
-		&& dtype_get_charset_coll(col->prtype)
-		!= DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL)) {
-		return(cmp_whole_field(col->mtype, col->prtype,
-				       rec1_b_ptr, (unsigned) rec1_f_len,
-				       rec2_b_ptr, (unsigned) rec2_f_len));
-	}
-
-	/* Compare the fields */
-	for (ulint cur_bytes = 0;; cur_bytes++, rec1_b_ptr++, rec2_b_ptr++) {
-		ulint		rec1_byte;
-		ulint		rec2_byte;
-
-		if (rec2_f_len <= cur_bytes) {
-			if (rec1_f_len <= cur_bytes) {
-				return(0);
-			}
-
-			rec2_byte = dtype_get_pad_char(
-				col->mtype, col->prtype);
-
-			if (rec2_byte == ULINT_UNDEFINED) {
-				return(1);
-			}
-		} else {
-			rec2_byte = *rec2_b_ptr;
-		}
-
-		if (rec1_f_len <= cur_bytes) {
-			rec1_byte = dtype_get_pad_char(
-				col->mtype, col->prtype);
-
-			if (rec1_byte == ULINT_UNDEFINED) {
-				return(-1);
-			}
-		} else {
-			rec1_byte = *rec1_b_ptr;
-		}
-
-		if (rec1_byte == rec2_byte) {
-			/* If the bytes are equal, they will remain such
-			even after the collation transformation below */
-			continue;
-		}
-
-		if (col->mtype <= DATA_CHAR
-		    || (col->mtype == DATA_BLOB
-			&& !(col->prtype & DATA_BINARY_TYPE))) {
-
-			rec1_byte = cmp_collate(rec1_byte);
-			rec2_byte = cmp_collate(rec2_byte);
-		}
-
-		if (rec1_byte < rec2_byte) {
-			return(-1);
-		} else if (rec1_byte > rec2_byte) {
-			return(1);
-		}
-	}
+	return(cmp_data(col->mtype, col->prtype,
+			rec1_b_ptr, rec1_f_len, rec2_b_ptr, rec2_f_len));
 }
 
-/*************************************************************//**
-Compare two physical records that contain the same number of columns,
+/** Compare two physical records that contain the same number of columns,
 none of which are stored externally.
-@retval 1 if rec1 (including non-ordering columns) is greater than rec2
-@retval -1 if rec1 (including non-ordering columns) is less than rec2
+@retval positive if rec1 (including non-ordering columns) is greater than rec2
+@retval negative if rec1 (including non-ordering columns) is less than rec2
 @retval 0 if rec1 is a duplicate of rec2 */
-UNIV_INTERN
 int
 cmp_rec_rec_simple(
 /*===============*/
@@ -1097,48 +1106,40 @@ cmp_rec_rec_simple(
 	return(0);
 }
 
-/*************************************************************//**
-This function is used to compare two physical records. Only the common
-first fields are compared, and if an externally stored field is
-encountered, then 0 is returned.
-@return 1, 0, -1 if rec1 is greater, equal, less, respectively */
-UNIV_INTERN
+/** Compare two B-tree records.
+@param[in] rec1 B-tree record
+@param[in] rec2 B-tree record
+@param[in] offsets1 rec_get_offsets(rec1, index)
+@param[in] offsets2 rec_get_offsets(rec2, index)
+@param[in] index B-tree index
+@param[in] nulls_unequal true if this is for index cardinality
+statistics estimation, and innodb_stats_method=nulls_unequal
+or innodb_stats_method=nulls_ignored
+@param[out] matched_fields number of completely matched fields
+within the first field not completely matched
+@return the comparison result
+@retval 0 if rec1 is equal to rec2
+@retval negative if rec1 is less than rec2
+@retval positive if rec2 is greater than rec2 */
 int
 cmp_rec_rec_with_match(
-/*===================*/
-	const rec_t*	rec1,	/*!< in: physical record */
-	const rec_t*	rec2,	/*!< in: physical record */
-	const ulint*	offsets1,/*!< in: rec_get_offsets(rec1, index) */
-	const ulint*	offsets2,/*!< in: rec_get_offsets(rec2, index) */
-	dict_index_t*	index,	/*!< in: data dictionary index */
-	ibool		nulls_unequal,
-				/* in: TRUE if this is for index statistics
-				cardinality estimation, and innodb_stats_method
-				is "nulls_unequal" or "nulls_ignored" */
-	ulint*		matched_fields, /*!< in/out: number of already completely
-				matched fields; when the function returns,
-				contains the value the for current
-				comparison */
-	ulint*		matched_bytes) /*!< in/out: number of already matched
-				bytes within the first field not completely
-				matched; when the function returns, contains
-				the value for the current comparison */
+	const rec_t*		rec1,
+	const rec_t*		rec2,
+	const ulint*		offsets1,
+	const ulint*		offsets2,
+	const dict_index_t*	index,
+	bool			nulls_unequal,
+	ulint*			matched_fields)
 {
 	ulint		rec1_n_fields;	/* the number of fields in rec */
 	ulint		rec1_f_len;	/* length of current field in rec */
 	const byte*	rec1_b_ptr;	/* pointer to the current byte
 					in rec field */
-	ulint		rec1_byte;	/* value of current byte to be
-					compared in rec */
 	ulint		rec2_n_fields;	/* the number of fields in rec */
 	ulint		rec2_f_len;	/* length of current field in rec */
 	const byte*	rec2_b_ptr;	/* pointer to the current byte
 					in rec field */
-	ulint		rec2_byte;	/* value of current byte to be
-					compared in rec */
-	ulint		cur_field;	/* current field number */
-	ulint		cur_bytes;	/* number of already matched
-					bytes in current field */
+	ulint		cur_field = 0;	/* current field number */
 	int		ret = 0;	/* return value */
 	ulint		comp;
 
@@ -1153,313 +1154,112 @@ cmp_rec_rec_with_match(
 	rec1_n_fields = rec_offs_n_fields(offsets1);
 	rec2_n_fields = rec_offs_n_fields(offsets2);
 
-	cur_field = *matched_fields;
-	cur_bytes = *matched_bytes;
+	/* Test if rec is the predefined minimum record */
+	if (UNIV_UNLIKELY(rec_get_info_bits(rec1, comp)
+			  & REC_INFO_MIN_REC_FLAG)) {
+		/* There should only be one such record. */
+		ut_ad(!(rec_get_info_bits(rec2, comp)
+			& REC_INFO_MIN_REC_FLAG));
+		ret = -1;
+		goto order_resolved;
+	} else if (UNIV_UNLIKELY
+		   (rec_get_info_bits(rec2, comp)
+		    & REC_INFO_MIN_REC_FLAG)) {
+		ret = 1;
+		goto order_resolved;
+	}
 
 	/* Match fields in a loop */
 
-	while ((cur_field < rec1_n_fields) && (cur_field < rec2_n_fields)) {
+	for (; cur_field < rec1_n_fields && cur_field < rec2_n_fields;
+	     cur_field++) {
 
 		ulint	mtype;
 		ulint	prtype;
 
-		if (dict_index_is_univ(index)) {
+		/* If this is node-ptr records then avoid comparing node-ptr
+		field. Only key field needs to be compared. */
+		if (cur_field == dict_index_get_n_unique_in_tree(index)) {
+			break;
+		}
+
+		if (dict_index_is_ibuf(index)) {
 			/* This is for the insert buffer B-tree. */
 			mtype = DATA_BINARY;
 			prtype = 0;
 		} else {
-			const dict_col_t*	col
-				= dict_index_get_nth_col(index, cur_field);
+			const dict_col_t*	col;
+
+			col	= dict_index_get_nth_col(index, cur_field);
 
 			mtype = col->mtype;
 			prtype = col->prtype;
+
+			/* If the index is spatial index, we mark the
+			prtype of the first field as MBR field. */
+			if (cur_field == 0 && dict_index_is_spatial(index)) {
+				ut_ad(DATA_GEOMETRY_MTYPE(mtype));
+				prtype |= DATA_GIS_MBR;
+			}
 		}
 
+		/* We should never encounter an externally stored field.
+		Externally stored fields only exist in clustered index
+		leaf page records. These fields should already differ
+		in the primary key columns already, before DB_TRX_ID,
+		DB_ROLL_PTR, and any externally stored columns. */
+		ut_ad(!rec_offs_nth_extern(offsets1, cur_field));
+		ut_ad(!rec_offs_nth_extern(offsets2, cur_field));
+
 		rec1_b_ptr = rec_get_nth_field(rec1, offsets1,
 					       cur_field, &rec1_f_len);
 		rec2_b_ptr = rec_get_nth_field(rec2, offsets2,
 					       cur_field, &rec2_f_len);
 
-		if (cur_bytes == 0) {
-			if (cur_field == 0) {
-				/* Test if rec is the predefined minimum
-				record */
-				if (UNIV_UNLIKELY(rec_get_info_bits(rec1, comp)
-						  & REC_INFO_MIN_REC_FLAG)) {
-
-					if (!(rec_get_info_bits(rec2, comp)
-					      & REC_INFO_MIN_REC_FLAG)) {
-						ret = -1;
-					}
-
-					goto order_resolved;
-
-				} else if (UNIV_UNLIKELY
-					   (rec_get_info_bits(rec2, comp)
-					    & REC_INFO_MIN_REC_FLAG)) {
-
-					ret = 1;
-
-					goto order_resolved;
-				}
-			}
-
-			if (rec_offs_nth_extern(offsets1, cur_field)
-			    || rec_offs_nth_extern(offsets2, cur_field)) {
-				/* We do not compare to an externally
-				stored field */
-
-				goto order_resolved;
-			}
-
-			if (rec1_f_len == UNIV_SQL_NULL
-			    || rec2_f_len == UNIV_SQL_NULL) {
-
-				if (rec1_f_len == rec2_f_len) {
-					/* This is limited to stats collection,
-					cannot use it for regular search */
-					if (nulls_unequal) {
-						ret = -1;
-					} else {
-						goto next_field;
-					}
-				} else if (rec2_f_len == UNIV_SQL_NULL) {
-
-					/* We define the SQL null to be the
-					smallest possible value of a field
-					in the alphabetical order */
-
-					ret = 1;
-				} else {
-					ret = -1;
-				}
-
-				goto order_resolved;
-			}
+		if (nulls_unequal
+		    && rec1_f_len == UNIV_SQL_NULL
+		    && rec2_f_len == UNIV_SQL_NULL) {
+			ret = -1;
+			goto order_resolved;
 		}
 
-		if (mtype >= DATA_FLOAT
-		    || (mtype == DATA_BLOB
-			&& 0 == (prtype & DATA_BINARY_TYPE)
-			&& dtype_get_charset_coll(prtype)
-			!= DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL)) {
-
-			ret = cmp_whole_field(mtype, prtype,
-					      rec1_b_ptr,
-					      (unsigned) rec1_f_len,
-					      rec2_b_ptr,
-					      (unsigned) rec2_f_len);
-			if (ret != 0) {
-				cur_bytes = 0;
-
-				goto order_resolved;
-			} else {
-				goto next_field;
-			}
+		ret = cmp_data(mtype, prtype,
+			       rec1_b_ptr, rec1_f_len,
+			       rec2_b_ptr, rec2_f_len);
+		if (ret) {
+			goto order_resolved;
 		}
-
-		/* Set the pointers at the current byte */
-		rec1_b_ptr = rec1_b_ptr + cur_bytes;
-		rec2_b_ptr = rec2_b_ptr + cur_bytes;
-
-		/* Compare then the fields */
-		for (;;) {
-			if (rec2_f_len <= cur_bytes) {
-
-				if (rec1_f_len <= cur_bytes) {
-
-					goto next_field;
-				}
-
-				rec2_byte = dtype_get_pad_char(mtype, prtype);
-
-				if (rec2_byte == ULINT_UNDEFINED) {
-					ret = 1;
-
-					goto order_resolved;
-				}
-			} else {
-				rec2_byte = *rec2_b_ptr;
-			}
-
-			if (rec1_f_len <= cur_bytes) {
-				rec1_byte = dtype_get_pad_char(mtype, prtype);
-
-				if (rec1_byte == ULINT_UNDEFINED) {
-					ret = -1;
-
-					goto order_resolved;
-				}
-			} else {
-				rec1_byte = *rec1_b_ptr;
-			}
-
-			if (rec1_byte == rec2_byte) {
-				/* If the bytes are equal, they will remain
-				such even after the collation transformation
-				below */
-
-				goto next_byte;
-			}
-
-			if (mtype <= DATA_CHAR
-			    || (mtype == DATA_BLOB
-				&& !(prtype & DATA_BINARY_TYPE))) {
-
-				rec1_byte = cmp_collate(rec1_byte);
-				rec2_byte = cmp_collate(rec2_byte);
-			}
-
-			if (rec1_byte < rec2_byte) {
-				ret = -1;
-				goto order_resolved;
-			} else if (rec1_byte > rec2_byte) {
-				ret = 1;
-				goto order_resolved;
-			}
-next_byte:
-			/* Next byte */
-
-			cur_bytes++;
-			rec1_b_ptr++;
-			rec2_b_ptr++;
-		}
-
-next_field:
-		cur_field++;
-		cur_bytes = 0;
 	}
 
-	ut_ad(cur_bytes == 0);
-
 	/* If we ran out of fields, rec1 was equal to rec2 up
 	to the common fields */
 	ut_ad(ret == 0);
 order_resolved:
-
-	ut_ad((ret >= - 1) && (ret <= 1));
-
 	*matched_fields = cur_field;
-	*matched_bytes = cur_bytes;
-
 	return(ret);
 }
 
-#ifdef UNIV_DEBUG
-/*************************************************************//**
-Used in debug checking of cmp_dtuple_... .
-This function is used to compare a data tuple to a physical record. If
-dtuple has n fields then rec must have either m >= n fields, or it must
-differ from dtuple in some of the m fields rec has. If encounters an
-externally stored field, returns 0.
-@return 1, 0, -1, if dtuple is greater, equal, less than rec,
-respectively, when only the common first fields are compared */
-static
-int
-cmp_debug_dtuple_rec_with_match(
-/*============================*/
-	const dtuple_t*	dtuple,	/*!< in: data tuple */
-	const rec_t*	rec,	/*!< in: physical record which differs from
-				dtuple in some of the common fields, or which
-				has an equal number or more fields than
-				dtuple */
-	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
-	ulint		n_cmp,	/*!< in: number of fields to compare */
-	ulint*		matched_fields) /*!< in/out: number of already
-				completely matched fields; when function
-				returns, contains the value for current
-				comparison */
+#ifdef UNIV_COMPILE_TEST_FUNCS
+
+#ifdef HAVE_UT_CHRONO_T
+
+void
+test_cmp_data_data(ulint len)
 {
-	const dfield_t*	dtuple_field;	/* current field in logical record */
-	ulint		dtuple_f_len;	/* the length of the current field
-					in the logical record */
-	const byte*	dtuple_f_data;	/* pointer to the current logical
-					field data */
-	ulint		rec_f_len;	/* length of current field in rec */
-	const byte*	rec_f_data;	/* pointer to the current rec field */
-	int		ret;		/* return value */
-	ulint		cur_field;	/* current field number */
+	int		i;
+	static byte	zeros[64];
 
-	ut_ad(dtuple != NULL);
-	ut_ad(rec != NULL);
-	ut_ad(matched_fields != NULL);
-	ut_ad(dtuple_check_typed(dtuple));
-	ut_ad(rec_offs_validate(rec, NULL, offsets));
-
-	ut_ad(n_cmp > 0);
-	ut_ad(n_cmp <= dtuple_get_n_fields(dtuple));
-	ut_ad(*matched_fields <= n_cmp);
-	ut_ad(*matched_fields <= rec_offs_n_fields(offsets));
-
-	cur_field = *matched_fields;
-
-	if (cur_field == 0) {
-		if (UNIV_UNLIKELY
-		    (rec_get_info_bits(rec, rec_offs_comp(offsets))
-		     & REC_INFO_MIN_REC_FLAG)) {
-
-			ret = !(dtuple_get_info_bits(dtuple)
-				& REC_INFO_MIN_REC_FLAG);
-
-			goto order_resolved;
-		}
-
-		if (UNIV_UNLIKELY
-		    (dtuple_get_info_bits(dtuple) & REC_INFO_MIN_REC_FLAG)) {
-			ret = -1;
-
-			goto order_resolved;
-		}
+	if (len > sizeof zeros) {
+		len = sizeof zeros;
 	}
 
-	/* Match fields in a loop; stop if we run out of fields in dtuple */
+	ut_chrono_t	ch(__func__);
 
-	while (cur_field < n_cmp) {
-
-		ulint	mtype;
-		ulint	prtype;
-
-		dtuple_field = dtuple_get_nth_field(dtuple, cur_field);
-		{
-			const dtype_t*	type
-				= dfield_get_type(dtuple_field);
-
-			mtype = type->mtype;
-			prtype = type->prtype;
-		}
-
-		dtuple_f_data = static_cast<const byte*>(
-			dfield_get_data(dtuple_field));
-
-		dtuple_f_len = dfield_get_len(dtuple_field);
-
-		rec_f_data = rec_get_nth_field(rec, offsets,
-					       cur_field, &rec_f_len);
-
-		if (rec_offs_nth_extern(offsets, cur_field)) {
-			/* We do not compare to an externally stored field */
-
-			ret = 0;
-
-			goto order_resolved;
-		}
-
-		ret = cmp_data_data(mtype, prtype, dtuple_f_data, dtuple_f_len,
-				    rec_f_data, rec_f_len);
-		if (ret != 0) {
-			goto order_resolved;
-		}
-
-		cur_field++;
+	for (i = 1000000; i > 0; i--) {
+		i += cmp_data(DATA_INT, 0, zeros, len, zeros, len);
 	}
-
-	ret = 0;	/* If we ran out of fields, dtuple was equal to rec
-			up to the common fields */
-order_resolved:
-	ut_ad((ret >= - 1) && (ret <= 1));
-
-	*matched_fields = cur_field;
-
-	return(ret);
 }
-#endif /* UNIV_DEBUG */
+
+#endif /* HAVE_UT_CHRONO_T */
+
+#endif /* UNIV_COMPILE_TEST_FUNCS */
diff --git a/storage/innobase/rem/rem0rec.cc b/storage/innobase/rem/rem0rec.cc
index 9218e757f6a..d055aa5d84e 100644
--- a/storage/innobase/rem/rem0rec.cc
+++ b/storage/innobase/rem/rem0rec.cc
@@ -36,6 +36,9 @@ Created 5/30/1994 Heikki Tuuri
 #ifdef WITH_WSREP
 #include <ha_prototypes.h>
 #endif /* WITH_WSREP */
+#include "gis0geo.h"
+#include "trx0sys.h"
+#include "mach0data.h"
 
 /*			PHYSICAL RECORD (OLD STYLE)
 			===========================
@@ -147,11 +150,11 @@ A record is a complete-field prefix of another record, if
 the corresponding canonical strings have the same property. */
 
 /* this is used to fool compiler in rec_validate */
-UNIV_INTERN ulint	rec_dummy;
+ulint	rec_dummy;
 
 /***************************************************************//**
 Validates the consistency of an old-style physical record.
-@return	TRUE if ok */
+@return TRUE if ok */
 static
 ibool
 rec_validate_old(
@@ -161,8 +164,7 @@ rec_validate_old(
 /******************************************************//**
 Determine how many of the first n columns in a compact
 physical record are stored externally.
-@return	number of externally stored columns */
-UNIV_INTERN
+@return number of externally stored columns */
 ulint
 rec_get_n_extern_new(
 /*=================*/
@@ -224,8 +226,7 @@ rec_get_n_extern_new(
 			stored in one byte for 0..127.  The length
 			will be encoded in two bytes when it is 128 or
 			more, or when the field is stored externally. */
-			if (UNIV_UNLIKELY(col->len > 255)
-			    || UNIV_UNLIKELY(col->mtype == DATA_BLOB)) {
+			if (DATA_BIG_COL(col)) {
 				if (len & 0x80) {
 					/* 1exxxxxxx xxxxxxxx */
 					if (len & 0x40) {
@@ -314,6 +315,7 @@ rec_init_offsets_comp_ordinary(
 
 		if (!field->fixed_len
 		    || (temp && !dict_col_get_fixed_size(col, temp))) {
+			ut_ad(col->mtype != DATA_POINT);
 			/* Variable-length field: read the length */
 			len = *lens--;
 			/* If the maximum length of the field is up
@@ -323,9 +325,7 @@ rec_init_offsets_comp_ordinary(
 			stored in one byte for 0..127.  The length
 			will be encoded in two bytes when it is 128 or
 			more, or when the field is stored externally. */
-			if (UNIV_UNLIKELY(col->len > 255)
-			    || UNIV_UNLIKELY(col->mtype
-					     == DATA_BLOB)) {
+			if (DATA_BIG_COL(col)) {
 				if (len & 0x80) {
 					/* 1exxxxxxx xxxxxxxx */
 					len <<= 8;
@@ -405,7 +405,8 @@ rec_init_offsets(
 			return;
 		case REC_STATUS_NODE_PTR:
 			n_node_ptr_field
-				= dict_index_get_n_unique_in_tree(index);
+				= dict_index_get_n_unique_in_tree_nonleaf(
+					index);
 			break;
 		case REC_STATUS_ORDINARY:
 			rec_init_offsets_comp_ordinary(
@@ -449,9 +450,12 @@ rec_init_offsets(
 			}
 
 			if (UNIV_UNLIKELY(!field->fixed_len)) {
-				/* Variable-length field: read the length */
 				const dict_col_t*	col
 					= dict_field_get_col(field);
+				/* DATA_POINT should always be a fixed
+				length column. */
+				ut_ad(col->mtype != DATA_POINT);
+				/* Variable-length field: read the length */
 				len = *lens--;
 				/* If the maximum length of the field
 				is up to 255 bytes, the actual length
@@ -462,9 +466,7 @@ rec_init_offsets(
 				encoded in two bytes when it is 128 or
 				more, or when the field is stored
 				externally. */
-				if (UNIV_UNLIKELY(col->len > 255)
-				    || UNIV_UNLIKELY(col->mtype
-						     == DATA_BLOB)) {
+				if (DATA_BIG_COL(col)) {
 					if (len & 0x80) {
 						/* 1exxxxxxx xxxxxxxx */
 
@@ -532,8 +534,7 @@ resolved:
 /******************************************************//**
 The following function determines the offsets to each field
 in the record.	It can reuse a previously returned array.
-@return	the new offsets */
-UNIV_INTERN
+@return the new offsets */
 ulint*
 rec_get_offsets_func(
 /*=================*/
@@ -569,7 +570,7 @@ rec_get_offsets_func(
 			/* Node pointer records consist of the
 			uniquely identifying fields of the record
 			followed by a child page number field. */
-			n = dict_index_get_n_unique_in_tree(index) + 1;
+			n = dict_index_get_n_unique_in_tree_nonleaf(index) + 1;
 			break;
 		case REC_STATUS_INFIMUM:
 		case REC_STATUS_SUPREMUM:
@@ -612,7 +613,6 @@ rec_get_offsets_func(
 /******************************************************//**
 The following function determines the offsets to each field
 in the record.  It can reuse a previously allocated array. */
-UNIV_INTERN
 void
 rec_get_offsets_reverse(
 /*====================*/
@@ -642,7 +642,8 @@ rec_get_offsets_reverse(
 	ut_ad(dict_table_is_comp(index->table));
 
 	if (UNIV_UNLIKELY(node_ptr)) {
-		n_node_ptr_field = dict_index_get_n_unique_in_tree(index);
+		n_node_ptr_field =
+			dict_index_get_n_unique_in_tree_nonleaf(index);
 		n = n_node_ptr_field + 1;
 	} else {
 		n_node_ptr_field = ULINT_UNDEFINED;
@@ -699,8 +700,7 @@ rec_get_offsets_reverse(
 			stored in one byte for 0..127.  The length
 			will be encoded in two bytes when it is 128 or
 			more, or when the field is stored externally. */
-			if (UNIV_UNLIKELY(col->len > 255)
-			    || UNIV_UNLIKELY(col->mtype == DATA_BLOB)) {
+			if (DATA_BIG_COL(col)) {
 				if (len & 0x80) {
 					/* 1exxxxxxx xxxxxxxx */
 					len <<= 8;
@@ -734,8 +734,7 @@ resolved:
 /************************************************************//**
 The following function is used to get the offset to the nth
 data field in an old-style record.
-@return	offset to the field */
-UNIV_INTERN
+@return offset to the field */
 ulint
 rec_get_nth_field_offs_old(
 /*=======================*/
@@ -787,7 +786,7 @@ rec_get_nth_field_offs_old(
 
 /**********************************************************//**
 Determines the size of a data tuple prefix in ROW_FORMAT=COMPACT.
-@return	total size */
+@return total size */
 UNIV_INLINE MY_ATTRIBUTE((warn_unused_result, nonnull(1,2)))
 ulint
 rec_get_converted_size_comp_prefix_low(
@@ -798,6 +797,8 @@ rec_get_converted_size_comp_prefix_low(
 					it does not */
 	const dfield_t*		fields,	/*!< in: array of data fields */
 	ulint			n_fields,/*!< in: number of data fields */
+	const dtuple_t*		v_entry,/*!< in: dtuple contains virtual column
+					data */
 	ulint*			extra,	/*!< out: extra size */
 	bool			temp)	/*!< in: whether this is a
 					temporary file record */
@@ -805,11 +806,16 @@ rec_get_converted_size_comp_prefix_low(
 	ulint	extra_size;
 	ulint	data_size;
 	ulint	i;
-	ulint	n_null	= index->n_nullable;
-	ut_ad(n_fields > 0);
+	ulint	n_null	= (n_fields > 0) ? index->n_nullable : 0;
+	ulint	n_v_fields;
 	ut_ad(n_fields <= dict_index_get_n_fields(index));
 	ut_ad(!temp || extra);
 
+	/* At the time being, only temp file record could possible
+	store virtual columns */
+	ut_ad(!v_entry || (dict_index_is_clust(index) && temp));
+	n_v_fields = v_entry ? dtuple_get_n_v_fields(v_entry) : 0;
+
 	extra_size = temp
 		? UT_BITS_IN_BYTES(n_null)
 		: REC_N_NEW_EXTRA_BYTES
@@ -833,8 +839,22 @@ rec_get_converted_size_comp_prefix_low(
 		len = dfield_get_len(&fields[i]);
 		col = dict_field_get_col(field);
 
-		ut_ad(dict_col_type_assert_equal(col,
-						 dfield_get_type(&fields[i])));
+#ifdef UNIV_DEBUG
+		dtype_t*	type;
+
+		type = dfield_get_type(&fields[i]);
+		if (dict_index_is_spatial(index)) {
+			if (DATA_GEOMETRY_MTYPE(col->mtype) && i == 0) {
+				ut_ad(type->prtype & DATA_GIS_MBR);
+			} else {
+				ut_ad(type->mtype == DATA_SYS_CHILD
+				      || dict_col_type_assert_equal(col, type));
+			}
+		} else {
+			ut_ad(dict_col_type_assert_equal(col, type));
+		}
+#endif
+
 		/* All NULLable fields must be included in the n_null count. */
 		ut_ad((col->prtype & DATA_NOT_NULL) || n_null--);
 
@@ -844,7 +864,9 @@ rec_get_converted_size_comp_prefix_low(
 			continue;
 		}
 
-		ut_ad(len <= col->len || col->mtype == DATA_BLOB
+		ut_ad(len <= col->len || DATA_LARGE_MTYPE(col->mtype)
+                      || (DATA_POINT_MTYPE(col->mtype)
+			  && len == DATA_MBR_LEN)
 		      || (col->len == 0 && col->mtype == DATA_VARCHAR));
 
 		fixed_len = field->fixed_len;
@@ -866,18 +888,26 @@ rec_get_converted_size_comp_prefix_low(
 
 			ut_ad(len <= fixed_len);
 
-			ut_ad(!mbmaxlen || len >= mbminlen
-			      * (fixed_len / mbmaxlen));
+			if (dict_index_is_spatial(index)) {
+				ut_ad(type->mtype == DATA_SYS_CHILD
+				      || !mbmaxlen
+				      || len >= mbminlen * (fixed_len
+							    / mbmaxlen));
+			} else {
+				ut_ad(type->mtype != DATA_SYS_CHILD);
+				ut_ad(!mbmaxlen
+				      || len >= mbminlen * (fixed_len
+							    / mbmaxlen));
+			}
 
 			/* dict_index_add_col() should guarantee this */
 			ut_ad(!field->prefix_len
 			      || fixed_len == field->prefix_len);
 #endif /* UNIV_DEBUG */
 		} else if (dfield_is_ext(&fields[i])) {
-			ut_ad(col->len >= 256 || col->mtype == DATA_BLOB);
+			ut_ad(DATA_BIG_COL(col));
 			extra_size += 2;
-		} else if (len < 128
-			   || (col->len < 256 && col->mtype != DATA_BLOB)) {
+		} else if (len < 128 || !DATA_BIG_COL(col)) {
 			extra_size++;
 		} else {
 			/* For variable-length columns, we look up the
@@ -893,13 +923,47 @@ rec_get_converted_size_comp_prefix_low(
 		*extra = extra_size;
 	}
 
+	/* Log virtual columns */
+	if (n_v_fields != 0) {
+		/* length marker */
+		data_size += 2;
+
+		for (i = 0; i < n_v_fields; i++) {
+			dfield_t*       vfield;
+			ulint		flen;
+
+                        const dict_v_col_t*     col
+                                = dict_table_get_nth_v_col(index->table, i);
+
+			/* Only those indexed needs to be logged */
+                        if (col->m_col.ord_part) {
+				data_size += mach_get_compressed_size(
+					i + REC_MAX_N_FIELDS);
+				vfield = dtuple_get_nth_v_field(
+                                                v_entry, col->v_pos);
+
+                                flen = vfield->len;
+
+				if (flen != UNIV_SQL_NULL) {
+                                        flen = ut_min(
+                                                flen,
+                                                static_cast<ulint>(
+                                                DICT_MAX_FIELD_LEN_BY_FORMAT(
+                                                        index->table)));
+					data_size += flen;
+                                }
+
+				data_size += mach_get_compressed_size(flen);
+			}
+		}
+	}
+
 	return(extra_size + data_size);
 }
 
 /**********************************************************//**
 Determines the size of a data tuple prefix in ROW_FORMAT=COMPACT.
-@return	total size */
-UNIV_INTERN
+@return total size */
 ulint
 rec_get_converted_size_comp_prefix(
 /*===============================*/
@@ -910,13 +974,12 @@ rec_get_converted_size_comp_prefix(
 {
 	ut_ad(dict_table_is_comp(index->table));
 	return(rec_get_converted_size_comp_prefix_low(
-		       index, fields, n_fields, extra, false));
+		       index, fields, n_fields, NULL, extra, false));
 }
 
 /**********************************************************//**
 Determines the size of a data tuple in ROW_FORMAT=COMPACT.
-@return	total size */
-UNIV_INTERN
+@return total size */
 ulint
 rec_get_converted_size_comp(
 /*========================*/
@@ -939,7 +1002,8 @@ rec_get_converted_size_comp(
 		break;
 	case REC_STATUS_NODE_PTR:
 		n_fields--;
-		ut_ad(n_fields == dict_index_get_n_unique_in_tree(index));
+		ut_ad(n_fields == dict_index_get_n_unique_in_tree_nonleaf(
+					index));
 		ut_ad(dfield_get_len(&fields[n_fields]) == REC_NODE_PTR_SIZE);
 		size = REC_NODE_PTR_SIZE; /* child page number */
 		break;
@@ -956,12 +1020,11 @@ rec_get_converted_size_comp(
 	}
 
 	return(size + rec_get_converted_size_comp_prefix_low(
-		       index, fields, n_fields, extra, false));
+		       index, fields, n_fields, NULL, extra, false));
 }
 
 /***********************************************************//**
 Sets the value of the ith field SQL null bit of an old-style record. */
-UNIV_INTERN
 void
 rec_set_nth_field_null_bit(
 /*=======================*/
@@ -1000,7 +1063,6 @@ rec_set_nth_field_null_bit(
 /***********************************************************//**
 Sets an old-style record field to SQL null.
 The physical size of the field is not changed. */
-UNIV_INTERN
 void
 rec_set_nth_field_sql_null(
 /*=======================*/
@@ -1019,7 +1081,7 @@ rec_set_nth_field_sql_null(
 /*********************************************************//**
 Builds an old-style physical record out of a data tuple and
 stores it beginning from the start of the given buffer.
-@return	pointer to the origin of physical record */
+@return pointer to the origin of physical record */
 static
 rec_t*
 rec_convert_dtuple_to_rec_old(
@@ -1133,7 +1195,7 @@ rec_convert_dtuple_to_rec_old(
 
 /*********************************************************//**
 Builds a ROW_FORMAT=COMPACT record out of a data tuple. */
-UNIV_INLINE MY_ATTRIBUTE((nonnull))
+UNIV_INLINE
 void
 rec_convert_dtuple_to_rec_comp(
 /*===========================*/
@@ -1141,6 +1203,8 @@ rec_convert_dtuple_to_rec_comp(
 	const dict_index_t*	index,	/*!< in: record descriptor */
 	const dfield_t*		fields,	/*!< in: array of data fields */
 	ulint			n_fields,/*!< in: number of data fields */
+	const dtuple_t*		v_entry,/*!< in: dtuple contains
+					virtual column data */
 	ulint			status,	/*!< in: status bits of the record */
 	bool			temp)	/*!< in: whether to use the
 					format for temporary files in
@@ -1157,9 +1221,9 @@ rec_convert_dtuple_to_rec_comp(
 	ulint		fixed_len;
 	ulint		null_mask	= 1;
 	ulint		n_null;
+	ulint		num_v = v_entry ? dtuple_get_n_v_fields(v_entry) : 0;
 
 	ut_ad(temp || dict_table_is_comp(index->table));
-	ut_ad(n_fields > 0);
 
 	if (temp) {
 		ut_ad(status == REC_STATUS_ORDINARY);
@@ -1172,6 +1236,8 @@ rec_convert_dtuple_to_rec_comp(
 			temp = false;
 		}
 	} else {
+		ut_ad(v_entry == NULL);
+		ut_ad(num_v == 0);
 		nulls = rec - (REC_N_NEW_EXTRA_BYTES + 1);
 
 		switch (UNIV_EXPECT(status, REC_STATUS_ORDINARY)) {
@@ -1181,7 +1247,8 @@ rec_convert_dtuple_to_rec_comp(
 			break;
 		case REC_STATUS_NODE_PTR:
 			ut_ad(n_fields
-			      == dict_index_get_n_unique_in_tree(index) + 1);
+			      == dict_index_get_n_unique_in_tree_nonleaf(index)
+				 + 1);
 			n_node_ptr_field = n_fields - 1;
 			break;
 		case REC_STATUS_INFIMUM:
@@ -1196,15 +1263,21 @@ rec_convert_dtuple_to_rec_comp(
 	}
 
 	end = rec;
-	n_null = index->n_nullable;
-	lens = nulls - UT_BITS_IN_BYTES(n_null);
-	/* clear the SQL-null flags */
-	memset(lens + 1, 0, nulls - lens);
+
+	if (n_fields != 0) {
+		n_null = index->n_nullable;
+		lens = nulls - UT_BITS_IN_BYTES(n_null);
+		/* clear the SQL-null flags */
+		memset(lens + 1, 0, nulls - lens);
+	}
 
 	/* Store the data and the offsets */
 
-	for (i = 0, field = fields; i < n_fields; i++, field++) {
+	for (i = 0; i < n_fields; i++) {
 		const dict_field_t*	ifield;
+		dict_col_t*		col = NULL;
+
+		field = &fields[i];
 
 		type = dfield_get_type(field);
 		len = dfield_get_len(field);
@@ -1242,10 +1315,12 @@ rec_convert_dtuple_to_rec_comp(
 
 		ifield = dict_index_get_nth_field(index, i);
 		fixed_len = ifield->fixed_len;
+		col = ifield->col;
 		if (temp && fixed_len
-		    && !dict_col_get_fixed_size(ifield->col, temp)) {
+		    && !dict_col_get_fixed_size(col, temp)) {
 			fixed_len = 0;
 		}
+
 		/* If the maximum length of a variable-length field
 		is up to 255 bytes, the actual length is always stored
 		in one byte. If the maximum length is more than 255
@@ -1254,10 +1329,8 @@ rec_convert_dtuple_to_rec_comp(
 		it is 128 or more, or when the field is stored externally. */
 		if (fixed_len) {
 #ifdef UNIV_DEBUG
-			ulint	mbminlen = DATA_MBMINLEN(
-				ifield->col->mbminmaxlen);
-			ulint	mbmaxlen = DATA_MBMAXLEN(
-				ifield->col->mbminmaxlen);
+			ulint	mbminlen = DATA_MBMINLEN(col->mbminmaxlen);
+			ulint	mbmaxlen = DATA_MBMAXLEN(col->mbminmaxlen);
 
 			ut_ad(len <= fixed_len);
 			ut_ad(!mbmaxlen || len >= mbminlen
@@ -1265,20 +1338,20 @@ rec_convert_dtuple_to_rec_comp(
 			ut_ad(!dfield_is_ext(field));
 #endif /* UNIV_DEBUG */
 		} else if (dfield_is_ext(field)) {
-			ut_ad(ifield->col->len >= 256
-			      || ifield->col->mtype == DATA_BLOB);
+			ut_ad(DATA_BIG_COL(col));
 			ut_ad(len <= REC_ANTELOPE_MAX_INDEX_COL_LEN
 			      + BTR_EXTERN_FIELD_REF_SIZE);
 			*lens-- = (byte) (len >> 8) | 0xc0;
 			*lens-- = (byte) len;
 		} else {
+			/* DATA_POINT would have a fixed_len */
+			ut_ad(dtype_get_mtype(type) != DATA_POINT);
 			ut_ad(len <= dtype_get_len(type)
-			      || dtype_get_mtype(type) == DATA_BLOB
+			      || DATA_LARGE_MTYPE(dtype_get_mtype(type))
 			      || !strcmp(index->name,
 					 FTS_INDEX_TABLE_IND_NAME));
-			if (len < 128
-			    || (dtype_get_len(type) < 256
-				&& dtype_get_mtype(type) != DATA_BLOB)) {
+			if (len < 128 || !DATA_BIG_LEN_MTYPE(
+				dtype_get_len(type), dtype_get_mtype(type))) {
 
 				*lens-- = (byte) len;
 			} else {
@@ -1291,12 +1364,62 @@ rec_convert_dtuple_to_rec_comp(
 		memcpy(end, dfield_get_data(field), len);
 		end += len;
 	}
+
+	if (!num_v) {
+		return;
+	}
+
+	/* reserve 2 bytes for writing length */
+	byte*	ptr = end;
+	ptr += 2;
+
+	/* Now log information on indexed virtual columns */
+	for (ulint col_no = 0; col_no < num_v; col_no++) {
+		dfield_t*       vfield;
+		ulint		flen;
+
+		const dict_v_col_t*     col
+			= dict_table_get_nth_v_col(index->table, col_no);
+
+		if (col->m_col.ord_part) {
+			ulint   pos = col_no;
+
+			pos += REC_MAX_N_FIELDS;
+
+			ptr += mach_write_compressed(ptr, pos);
+
+			vfield = dtuple_get_nth_v_field(
+				v_entry, col->v_pos);
+
+			flen = vfield->len;
+
+			if (flen != UNIV_SQL_NULL) {
+				/* The virtual column can only be in sec
+				index, and index key length is bound by
+				DICT_MAX_FIELD_LEN_BY_FORMAT */
+				flen = ut_min(
+					flen,
+					static_cast<ulint>(
+					DICT_MAX_FIELD_LEN_BY_FORMAT(
+						index->table)));
+			}
+
+			ptr += mach_write_compressed(ptr, flen);
+
+			if (flen != UNIV_SQL_NULL) {
+				ut_memcpy(ptr, dfield_get_data(vfield), flen);
+				ptr += flen;
+			}
+		}
+	}
+
+	mach_write_to_2(end, ptr - end);
 }
 
 /*********************************************************//**
 Builds a new-style physical record out of a data tuple and
 stores it beginning from the start of the given buffer.
-@return	pointer to the origin of physical record */
+@return pointer to the origin of physical record */
 static
 rec_t*
 rec_convert_dtuple_to_rec_new(
@@ -1316,7 +1439,8 @@ rec_convert_dtuple_to_rec_new(
 	rec = buf + extra_size;
 
 	rec_convert_dtuple_to_rec_comp(
-		rec, index, dtuple->fields, dtuple->n_fields, status, false);
+		rec, index, dtuple->fields, dtuple->n_fields, NULL,
+		status, false);
 
 	/* Set the info bits of the record */
 	rec_set_info_and_status_bits(rec, dtuple_get_info_bits(dtuple));
@@ -1327,8 +1451,7 @@ rec_convert_dtuple_to_rec_new(
 /*********************************************************//**
 Builds a physical record out of a data tuple and
 stores it beginning from the start of the given buffer.
-@return	pointer to the origin of physical record */
-UNIV_INTERN
+@return pointer to the origin of physical record */
 rec_t*
 rec_convert_dtuple_to_rec(
 /*======================*/
@@ -1383,24 +1506,24 @@ rec_convert_dtuple_to_rec(
 #ifndef UNIV_HOTBACKUP
 /**********************************************************//**
 Determines the size of a data tuple prefix in ROW_FORMAT=COMPACT.
-@return	total size */
-UNIV_INTERN
+@return total size */
 ulint
 rec_get_converted_size_temp(
 /*========================*/
 	const dict_index_t*	index,	/*!< in: record descriptor */
 	const dfield_t*		fields,	/*!< in: array of data fields */
 	ulint			n_fields,/*!< in: number of data fields */
+	const dtuple_t*		v_entry,/*!< in: dtuple contains virtual column
+					data */
 	ulint*			extra)	/*!< out: extra size */
 {
 	return(rec_get_converted_size_comp_prefix_low(
-		       index, fields, n_fields, extra, true));
+		       index, fields, n_fields, v_entry, extra, true));
 }
 
 /******************************************************//**
 Determine the offset to each field in temporary file.
 @see rec_convert_dtuple_to_temp() */
-UNIV_INTERN
 void
 rec_init_offsets_temp(
 /*==================*/
@@ -1415,23 +1538,23 @@ rec_init_offsets_temp(
 /*********************************************************//**
 Builds a temporary file record out of a data tuple.
 @see rec_init_offsets_temp() */
-UNIV_INTERN
 void
 rec_convert_dtuple_to_temp(
 /*=======================*/
 	rec_t*			rec,		/*!< out: record */
 	const dict_index_t*	index,		/*!< in: record descriptor */
 	const dfield_t*		fields,		/*!< in: array of data fields */
-	ulint			n_fields)	/*!< in: number of fields */
+	ulint			n_fields,	/*!< in: number of fields */
+	const dtuple_t*		v_entry)	/*!< in: dtuple contains
+						virtual column data */
 {
-	rec_convert_dtuple_to_rec_comp(rec, index, fields, n_fields,
+	rec_convert_dtuple_to_rec_comp(rec, index, fields, n_fields, v_entry,
 				       REC_STATUS_ORDINARY, true);
 }
 
 /**************************************************************//**
 Copies the first n fields of a physical record to a data tuple. The fields
 are copied to the memory heap. */
-UNIV_INTERN
 void
 rec_copy_prefix_to_dtuple(
 /*======================*/
@@ -1476,7 +1599,7 @@ rec_copy_prefix_to_dtuple(
 /**************************************************************//**
 Copies the first n fields of an old-style physical record
 to a new physical record in a buffer.
-@return	own: copied record */
+@return own: copied record */
 static
 rec_t*
 rec_copy_prefix_to_buf_old(
@@ -1501,11 +1624,9 @@ rec_copy_prefix_to_buf_old(
 	prefix_len = area_start + area_end;
 
 	if ((*buf == NULL) || (*buf_size < prefix_len)) {
-		if (*buf != NULL) {
-			mem_free(*buf);
-		}
-
-		*buf = static_cast<byte*>(mem_alloc2(prefix_len, buf_size));
+		ut_free(*buf);
+		*buf_size = prefix_len;
+		*buf = static_cast<byte*>(ut_malloc_nokey(prefix_len));
 	}
 
 	ut_memcpy(*buf, rec - area_start, prefix_len);
@@ -1520,8 +1641,7 @@ rec_copy_prefix_to_buf_old(
 /**************************************************************//**
 Copies the first n fields of a physical record to a new physical record in
 a buffer.
-@return	own: copied record */
-UNIV_INTERN
+@return own: copied record */
 rec_t*
 rec_copy_prefix_to_buf(
 /*===================*/
@@ -1558,8 +1678,15 @@ rec_copy_prefix_to_buf(
 		ut_ad(n_fields <= dict_index_get_n_fields(index));
 		break;
 	case REC_STATUS_NODE_PTR:
-		/* it doesn't make sense to copy the child page number field */
-		ut_ad(n_fields <= dict_index_get_n_unique_in_tree(index));
+		/* For R-tree, we need to copy the child page number field. */
+		if (dict_index_is_spatial(index)) {
+			ut_ad(n_fields == DICT_INDEX_SPATIAL_NODEPTR_SIZE + 1);
+		} else {
+			/* it doesn't make sense to copy the child page number
+			field */
+			ut_ad(n_fields <=
+			      dict_index_get_n_unique_in_tree_nonleaf(index));
+		}
 		break;
 	case REC_STATUS_INFIMUM:
 	case REC_STATUS_SUPREMUM:
@@ -1609,7 +1736,7 @@ rec_copy_prefix_to_buf(
 			stored in one byte for 0..127.  The length
 			will be encoded in two bytes when it is 128 or
 			more, or when the column is stored externally. */
-			if (col->len > 255 || col->mtype == DATA_BLOB) {
+			if (DATA_BIG_COL(col)) {
 				if (len & 0x80) {
 					/* 1exxxxxx */
 					len &= 0x3f;
@@ -1627,11 +1754,9 @@ rec_copy_prefix_to_buf(
 	prefix_len += rec - (lens + 1);
 
 	if ((*buf == NULL) || (*buf_size < prefix_len)) {
-		if (*buf != NULL) {
-			mem_free(*buf);
-		}
-
-		*buf = static_cast<byte*>(mem_alloc2(prefix_len, buf_size));
+		ut_free(*buf);
+		*buf_size = prefix_len;
+		*buf = static_cast<byte*>(ut_malloc_nokey(prefix_len));
 	}
 
 	memcpy(*buf, lens + 1, prefix_len);
@@ -1642,7 +1767,7 @@ rec_copy_prefix_to_buf(
 
 /***************************************************************//**
 Validates the consistency of an old-style physical record.
-@return	TRUE if ok */
+@return TRUE if ok */
 static
 ibool
 rec_validate_old(
@@ -1660,8 +1785,7 @@ rec_validate_old(
 	n_fields = rec_get_n_fields_old(rec);
 
 	if ((n_fields == 0) || (n_fields > REC_MAX_N_FIELDS)) {
-		fprintf(stderr, "InnoDB: Error: record has %lu fields\n",
-			(ulong) n_fields);
+		ib::error() << "Record has " << n_fields << " fields";
 		return(FALSE);
 	}
 
@@ -1669,10 +1793,7 @@ rec_validate_old(
 		data = rec_get_nth_field_old(rec, i, &len);
 
 		if (!((len < UNIV_PAGE_SIZE) || (len == UNIV_SQL_NULL))) {
-			fprintf(stderr,
-				"InnoDB: Error: record field %lu len %lu\n",
-				(ulong) i,
-				(ulong) len);
+			ib::error() << "Record field " << i << " len " << len;
 			return(FALSE);
 		}
 
@@ -1688,10 +1809,8 @@ rec_validate_old(
 	}
 
 	if (len_sum != rec_get_data_size_old(rec)) {
-		fprintf(stderr,
-			"InnoDB: Error: record len should be %lu, len %lu\n",
-			(ulong) len_sum,
-			rec_get_data_size_old(rec));
+		ib::error() << "Record len should be " << len_sum << ", len "
+			<< rec_get_data_size_old(rec);
 		return(FALSE);
 	}
 
@@ -1702,8 +1821,7 @@ rec_validate_old(
 
 /***************************************************************//**
 Validates the consistency of a physical record.
-@return	TRUE if ok */
-UNIV_INTERN
+@return TRUE if ok */
 ibool
 rec_validate(
 /*=========*/
@@ -1721,8 +1839,7 @@ rec_validate(
 	n_fields = rec_offs_n_fields(offsets);
 
 	if ((n_fields == 0) || (n_fields > REC_MAX_N_FIELDS)) {
-		fprintf(stderr, "InnoDB: Error: record has %lu fields\n",
-			(ulong) n_fields);
+		ib::error() << "Record has " << n_fields << " fields";
 		return(FALSE);
 	}
 
@@ -1732,10 +1849,7 @@ rec_validate(
 		data = rec_get_nth_field(rec, offsets, i, &len);
 
 		if (!((len < UNIV_PAGE_SIZE) || (len == UNIV_SQL_NULL))) {
-			fprintf(stderr,
-				"InnoDB: Error: record field %lu len %lu\n",
-				(ulong) i,
-				(ulong) len);
+			ib::error() << "Record field " << i << " len " << len;
 			return(FALSE);
 		}
 
@@ -1751,10 +1865,8 @@ rec_validate(
 	}
 
 	if (len_sum != rec_offs_data_size(offsets)) {
-		fprintf(stderr,
-			"InnoDB: Error: record len should be %lu, len %lu\n",
-			(ulong) len_sum,
-			(ulong) rec_offs_data_size(offsets));
+		ib::error() << "Record len should be " << len_sum << ", len "
+			<< rec_offs_data_size(offsets);
 		return(FALSE);
 	}
 
@@ -1769,7 +1881,6 @@ rec_validate(
 
 /***************************************************************//**
 Prints an old-style physical record. */
-UNIV_INTERN
 void
 rec_print_old(
 /*==========*/
@@ -1808,7 +1919,7 @@ rec_print_old(
 					(ulong) len);
 			}
 		} else {
-			fprintf(file, " SQL NULL, size %lu ",
+			fprintf(file, " SQL NULL, size " ULINTPF " ",
 				rec_get_nth_field_size(rec, i));
 		}
 
@@ -1823,7 +1934,6 @@ rec_print_old(
 /***************************************************************//**
 Prints a physical record in ROW_FORMAT=COMPACT.  Ignores the
 record header. */
-UNIV_INTERN
 void
 rec_print_comp(
 /*===========*/
@@ -1866,9 +1976,153 @@ rec_print_comp(
 	}
 }
 
+/***************************************************************//**
+Prints an old-style spatial index record. */
+void
+rec_print_mbr_old(
+/*==============*/
+	FILE*		file,	/*!< in: file where to print */
+	const rec_t*	rec)	/*!< in: physical record */
+{
+	const byte*	data;
+	ulint		len;
+	ulint		n;
+	ulint		i;
+
+	ut_ad(rec);
+
+	n = rec_get_n_fields_old(rec);
+
+	fprintf(file, "PHYSICAL RECORD: n_fields %lu;"
+		" %u-byte offsets; info bits %lu\n",
+		(ulong) n,
+		rec_get_1byte_offs_flag(rec) ? 1 : 2,
+		(ulong) rec_get_info_bits(rec, FALSE));
+
+	for (i = 0; i < n; i++) {
+
+		data = rec_get_nth_field_old(rec, i, &len);
+
+		fprintf(file, " %lu:", (ulong) i);
+
+		if (len != UNIV_SQL_NULL) {
+			if (i == 0) {
+				fprintf(file, " MBR:");
+				for (; len > 0; len -= sizeof(double)) {
+					double	d = mach_double_read(data);
+
+					if (len != sizeof(double)) {
+						fprintf(file, "%.2lf,", d);
+					} else {
+						fprintf(file, "%.2lf", d);
+					}
+
+					data += sizeof(double);
+				}
+			} else {
+				if (len <= 30) {
+
+					ut_print_buf(file, data, len);
+				} else {
+					ut_print_buf(file, data, 30);
+
+					fprintf(file, " (total %lu bytes)",
+						(ulong) len);
+				}
+			}
+		} else {
+			fprintf(file, " SQL NULL, size %lu ",
+				rec_get_nth_field_size(rec, i));
+		}
+
+		putc(';', file);
+		putc('\n', file);
+	}
+
+	if (rec_get_deleted_flag(rec, false)) {
+		fprintf(file, " Deleted");
+	}
+
+	if (rec_get_info_bits(rec, true) & REC_INFO_MIN_REC_FLAG) {
+		fprintf(file, " First rec");
+	}
+
+	rec_validate_old(rec);
+}
+
+/***************************************************************//**
+Prints a spatial index record. */
+void
+rec_print_mbr_rec(
+/*==============*/
+	FILE*		file,	/*!< in: file where to print */
+	const rec_t*	rec,	/*!< in: physical record */
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
+{
+	ut_ad(rec);
+	ut_ad(offsets);
+	ut_ad(rec_offs_validate(rec, NULL, offsets));
+
+	if (!rec_offs_comp(offsets)) {
+		rec_print_mbr_old(file, rec);
+		return;
+	}
+
+	for (ulint i = 0; i < rec_offs_n_fields(offsets); i++) {
+		const byte*	data;
+		ulint		len;
+
+		data = rec_get_nth_field(rec, offsets, i, &len);
+
+		if (i == 0) {
+			fprintf(file, " MBR:");
+			for (; len > 0; len -= sizeof(double)) {
+				double	d = mach_double_read(data);
+
+				if (len != sizeof(double)) {
+					fprintf(file, "%.2lf,", d);
+				} else {
+					fprintf(file, "%.2lf", d);
+				}
+
+				data += sizeof(double);
+			}
+		} else {
+			fprintf(file, " %lu:", (ulong) i);
+
+			if (len != UNIV_SQL_NULL) {
+				if (len <= 30) {
+
+					ut_print_buf(file, data, len);
+				} else {
+					ut_print_buf(file, data, 30);
+
+					fprintf(file, " (total %lu bytes)",
+						(ulong) len);
+				}
+			} else {
+				fputs(" SQL NULL", file);
+			}
+		}
+		putc(';', file);
+	}
+
+	if (rec_get_info_bits(rec, true) & REC_INFO_DELETED_FLAG) {
+		fprintf(file, " Deleted");
+	}
+
+	if (rec_get_info_bits(rec, true) & REC_INFO_MIN_REC_FLAG) {
+		fprintf(file, " First rec");
+	}
+
+
+	rec_validate(rec, offsets);
+}
+
+/***************************************************************//**
+Prints a physical record. */
 /***************************************************************//**
 Prints a physical record. */
-UNIV_INTERN
 void
 rec_print_new(
 /*==========*/
@@ -1880,6 +2134,14 @@ rec_print_new(
 	ut_ad(offsets);
 	ut_ad(rec_offs_validate(rec, NULL, offsets));
 
+#ifdef UNIV_DEBUG
+	if (rec_get_deleted_flag(rec, rec_offs_comp(offsets))) {
+		DBUG_PRINT("info", ("deleted "));
+	} else {
+		DBUG_PRINT("info", ("not-deleted "));
+	}
+#endif /* UNIV_DEBUG */
+
 	if (!rec_offs_comp(offsets)) {
 		rec_print_old(file, rec);
 		return;
@@ -1896,7 +2158,6 @@ rec_print_new(
 
 /***************************************************************//**
 Prints a physical record. */
-UNIV_INTERN
 void
 rec_print(
 /*======*/
@@ -1923,11 +2184,94 @@ rec_print(
 	}
 }
 
+/** Pretty-print a record.
+@param[in,out]	o	output stream
+@param[in]	rec	physical record
+@param[in]	info	rec_get_info_bits(rec)
+@param[in]	offsets	rec_get_offsets(rec) */
+void
+rec_print(
+	std::ostream&	o,
+	const rec_t*	rec,
+	ulint		info,
+	const ulint*	offsets)
+{
+	const ulint	comp	= rec_offs_comp(offsets);
+	const ulint	n	= rec_offs_n_fields(offsets);
+
+	ut_ad(rec_offs_validate(rec, NULL, offsets));
+
+	o << (comp ? "COMPACT RECORD" : "RECORD")
+	  << "(info_bits=" << info << ", " << n << " fields): {";
+
+	for (ulint i = 0; i < n; i++) {
+		const byte*	data;
+		ulint		len;
+
+		if (i) {
+			o << ',';
+		}
+
+		data = rec_get_nth_field(rec, offsets, i, &len);
+
+		if (len == UNIV_SQL_NULL) {
+			o << "NULL";
+			continue;
+		}
+
+		if (rec_offs_nth_extern(offsets, i)) {
+			ulint	local_len = len - BTR_EXTERN_FIELD_REF_SIZE;
+			ut_ad(len >= BTR_EXTERN_FIELD_REF_SIZE);
+
+			o << '['
+			  << local_len
+			  << '+' << BTR_EXTERN_FIELD_REF_SIZE << ']';
+			ut_print_buf(o, data, local_len);
+			ut_print_buf_hex(o, data + local_len,
+					 BTR_EXTERN_FIELD_REF_SIZE);
+		} else {
+			o << '[' << len << ']';
+			ut_print_buf(o, data, len);
+		}
+	}
+
+	o << "}";
+}
+
+/** Display a record.
+@param[in,out]	o	output stream
+@param[in]	r	record to display
+@return	the output stream */
+std::ostream&
+operator<<(std::ostream& o, const rec_index_print& r)
+{
+	mem_heap_t*	heap	= NULL;
+	ulint*		offsets	= rec_get_offsets(
+		r.m_rec, r.m_index, NULL, ULINT_UNDEFINED, &heap);
+	rec_print(o, r.m_rec,
+		  rec_get_info_bits(r.m_rec, rec_offs_comp(offsets)),
+		  offsets);
+	mem_heap_free(heap);
+	return(o);
+}
+
+/** Display a record.
+@param[in,out]	o	output stream
+@param[in]	r	record to display
+@return	the output stream */
+std::ostream&
+operator<<(std::ostream& o, const rec_offsets_print& r)
+{
+	rec_print(o, r.m_rec,
+		  rec_get_info_bits(r.m_rec, rec_offs_comp(r.m_offsets)),
+		  r.m_offsets);
+	return(o);
+}
+
 # ifdef UNIV_DEBUG
 /************************************************************//**
 Reads the DB_TRX_ID of a clustered index record.
-@return	the value of DB_TRX_ID */
-UNIV_INTERN
+@return the value of DB_TRX_ID */
 trx_id_t
 rec_get_trx_id(
 /*===========*/
@@ -1945,7 +2289,7 @@ rec_get_trx_id(
 	ulint*		offsets		= offsets_;
 	rec_offs_init(offsets_);
 
-	ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
+	ut_ad(fil_page_index_page_check(page));
 	ut_ad(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID)
 	      == index->id);
 	ut_ad(dict_index_is_clust(index));
@@ -1967,6 +2311,17 @@ rec_get_trx_id(
 # endif /* UNIV_DEBUG */
 #endif /* !UNIV_HOTBACKUP */
 
+/** Mark the nth field as externally stored.
+@param[in]	offsets		array returned by rec_get_offsets()
+@param[in]	n		nth field */
+void
+rec_offs_make_nth_extern(
+	ulint*		offsets,
+	const ulint	n)
+{
+	ut_ad(!rec_offs_nth_sql_null(offsets, n));
+	rec_offs_base(offsets)[1 + n] |= REC_OFFS_EXTERNAL;
+}
 #ifdef WITH_WSREP
 int
 wsrep_rec_get_foreign_key(
diff --git a/storage/innobase/row/row0ext.cc b/storage/innobase/row/row0ext.cc
index ad852577ad2..38c4e2ba6fb 100644
--- a/storage/innobase/row/row0ext.cc
+++ b/storage/innobase/row/row0ext.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2006, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2006, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -31,16 +31,18 @@ Created September 2006 Marko Makela
 
 #include "btr0cur.h"
 
-/********************************************************************//**
-Fills the column prefix cache of an externally stored column. */
+/** Fills the column prefix cache of an externally stored column.
+@param[in,out]	ext		column prefix cache
+@param[in]	i		index of ext->ext[]
+@param[in]	page_size	page size
+@param[in]	dfield		data field */
 static
 void
 row_ext_cache_fill(
-/*===============*/
-	row_ext_t*	ext,	/*!< in/out: column prefix cache */
-	ulint		i,	/*!< in: index of ext->ext[] */
-	ulint		zip_size,/*!< compressed page size in bytes, or 0 */
-	const dfield_t*	dfield)	/*!< in: data field */
+	row_ext_t*		ext,
+	ulint			i,
+	const page_size_t&	page_size,
+	const dfield_t*		dfield)
 {
 	const byte*	field	= static_cast<const byte*>(
 					dfield_get_data(dfield));
@@ -78,16 +80,14 @@ row_ext_cache_fill(
 			crashed during the execution of
 			btr_free_externally_stored_field(). */
 			ext->len[i] = btr_copy_externally_stored_field_prefix(
-				buf, ext->max_len, zip_size, field, f_len,
-				NULL);
+				buf, ext->max_len, page_size, field, f_len);
 		}
 	}
 }
 
 /********************************************************************//**
 Creates a cache of column prefixes of externally stored columns.
-@return	own: column prefix cache */
-UNIV_INTERN
+@return own: column prefix cache */
 row_ext_t*
 row_ext_create(
 /*===========*/
@@ -106,7 +106,7 @@ row_ext_create(
 	mem_heap_t*	heap)	/*!< in: heap where created */
 {
 	ulint		i;
-	ulint		zip_size = dict_tf_get_zip_size(flags);
+	const page_size_t&	page_size = dict_tf_get_page_size(flags);
 
 	row_ext_t*	ret;
 
@@ -116,12 +116,10 @@ row_ext_create(
 		mem_heap_alloc(heap,
 			       (sizeof *ret) + (n_ext - 1) * sizeof ret->len));
 
-	ut_ad(ut_is_2pow(zip_size));
-	ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX);
-
 	ret->n_ext = n_ext;
 	ret->ext = ext;
 	ret->max_len = DICT_MAX_FIELD_LEN_BY_FORMAT_FLAG(flags);
+	ret->page_size.copy_from(page_size);
 
 	ret->buf = static_cast<byte*>(
 		mem_heap_alloc(heap, n_ext * ret->max_len));
@@ -136,7 +134,7 @@ row_ext_create(
 		const dfield_t*	dfield;
 
 		dfield = dtuple_get_nth_field(tuple, ext[i]);
-		row_ext_cache_fill(ret, i, zip_size, dfield);
+		row_ext_cache_fill(ret, i, page_size, dfield);
 	}
 
 	return(ret);
diff --git a/storage/innobase/row/row0ftsort.cc b/storage/innobase/row/row0ftsort.cc
index bdd0e63c0a6..7729d4c5c30 100644
--- a/storage/innobase/row/row0ftsort.cc
+++ b/storage/innobase/row/row0ftsort.cc
@@ -24,16 +24,20 @@ Create Full Text Index with (parallel) merge sort
 Created 10/13/2010 Jimmy Yang
 *******************************************************/
 
-#include "dict0dict.h" /* dict_table_stats_lock() */
+#include "ha_prototypes.h"
+
+#include "dict0dict.h"
 #include "row0merge.h"
 #include "pars0pars.h"
 #include "row0ftsort.h"
 #include "row0merge.h"
 #include "row0row.h"
 #include "btr0cur.h"
+#include "btr0bulk.h"
+#include "fts0plugin.h"
 
 /** Read the next record to buffer N.
-@param N	index into array of merge info structure */
+@param N index into array of merge info structure */
 #define ROW_MERGE_READ_GET_NEXT(N)					\
 	do {								\
 		b[N] = row_merge_read_rec(				\
@@ -48,7 +52,7 @@ Created 10/13/2010 Jimmy Yang
 	} while (0)
 
 /** Parallel sort degree */
-UNIV_INTERN ulong	fts_sort_pll_degree	= 2;
+ulong	fts_sort_pll_degree	= 2;
 
 /*********************************************************************//**
 Create a temporary "fts sort index" used to merge sort the
@@ -60,7 +64,6 @@ integer value)
 3) Word's position in original doc.
 
 @return dict_index_t structure for the fts sort index */
-UNIV_INTERN
 dict_index_t*
 row_merge_create_fts_sort_index(
 /*============================*/
@@ -81,13 +84,15 @@ row_merge_create_fts_sort_index(
 
 	// FIXME: This name shouldn't be hard coded here.
 	new_index = dict_mem_index_create(
-		index->table->name, "tmp_fts_idx", 0, DICT_FTS, 3);
+		index->table->name.m_name, "tmp_fts_idx", 0, DICT_FTS, 3);
 
 	new_index->id = index->id;
 	new_index->table = (dict_table_t*) table;
 	new_index->n_uniq = FTS_NUM_FIELDS_SORT;
 	new_index->n_def = FTS_NUM_FIELDS_SORT;
 	new_index->cached = TRUE;
+	new_index->parser = index->parser;
+	new_index->is_ngram = index->is_ngram;
 
 	idx_field = dict_index_get_nth_field(index, 0);
 	charset = fts_index_get_charset(index);
@@ -168,7 +173,6 @@ row_merge_create_fts_sort_index(
 /*********************************************************************//**
 Initialize FTS parallel sort structures.
 @return TRUE if all successful */
-UNIV_INTERN
 ibool
 row_fts_psort_info_init(
 /*====================*/
@@ -198,7 +202,7 @@ row_fts_psort_info_init(
 
 	block_size = 3 * srv_sort_buf_size;
 
-	*psort = psort_info = static_cast<fts_psort_t*>(mem_zalloc(
+	*psort = psort_info = static_cast<fts_psort_t*>(ut_zalloc_nokey(
 		 fts_sort_pll_degree * sizeof *psort_info));
 
 	if (!psort_info) {
@@ -208,11 +212,11 @@ row_fts_psort_info_init(
 
 	/* Common Info for all sort threads */
 	common_info = static_cast<fts_psort_common_t*>(
-		mem_alloc(sizeof *common_info));
+		ut_malloc_nokey(sizeof *common_info));
 
 	if (!common_info) {
 		ut_free(dup);
-		mem_free(psort_info);
+		ut_free(psort_info);
 		return(FALSE);
 	}
 
@@ -220,8 +224,8 @@ row_fts_psort_info_init(
 	common_info->new_table = (dict_table_t*) new_table;
 	common_info->trx = trx;
 	common_info->all_info = psort_info;
-	common_info->sort_event = os_event_create();
-	common_info->merge_event = os_event_create();
+	common_info->sort_event = os_event_create(0);
+	common_info->merge_event = os_event_create(0);
 	common_info->opt_doc_id_size = opt_doc_id_size;
 	crypt_data = fil_space_get_crypt_data(new_table->space);
 
@@ -239,19 +243,19 @@ row_fts_psort_info_init(
 
 	ut_ad(trx->mysql_thd != NULL);
 	const char*	path = thd_innodb_tmpdir(trx->mysql_thd);
-
 	/* There will be FTS_NUM_AUX_INDEX number of "sort buckets" for
 	each parallel sort thread. Each "sort bucket" holds records for
 	a particular "FTS index partition" */
 	for (j = 0; j < fts_sort_pll_degree; j++) {
 
-		UT_LIST_INIT(psort_info[j].fts_doc_list);
+		UT_LIST_INIT(
+			psort_info[j].fts_doc_list, &fts_doc_item_t::doc_list);
 
 		for (i = 0; i < FTS_NUM_AUX_INDEX; i++) {
 
 			psort_info[j].merge_file[i] =
 				 static_cast<merge_file_t*>(
-					mem_zalloc(sizeof(merge_file_t)));
+					ut_zalloc_nokey(sizeof(merge_file_t)));
 
 			if (!psort_info[j].merge_file[i]) {
 				ret = FALSE;
@@ -268,7 +272,7 @@ row_fts_psort_info_init(
 
 			/* Need to align memory for O_DIRECT write */
 			psort_info[j].block_alloc[i] =
-				static_cast<row_merge_block_t*>(ut_malloc(
+				static_cast<row_merge_block_t*>(ut_malloc_nokey(
 					block_size + 1024));
 
 			psort_info[j].merge_block[i] =
@@ -276,13 +280,18 @@ row_fts_psort_info_init(
 					ut_align(
 					psort_info[j].block_alloc[i], 1024));
 
+			if (!psort_info[j].merge_block[i]) {
+				ret = FALSE;
+				goto func_exit;
+			}
+
 			/* If tablespace is encrypted, allocate additional buffer for
 			encryption/decryption. */
 			if (encrypted) {
 
 				/* Need to align memory for O_DIRECT write */
 				psort_info[j].crypt_alloc[i] =
-					static_cast<row_merge_block_t*>(ut_malloc(
+					static_cast<row_merge_block_t*>(ut_malloc_nokey(
 							block_size + 1024));
 
 				psort_info[j].crypt_block[i] =
@@ -298,11 +307,6 @@ row_fts_psort_info_init(
 				psort_info[j].crypt_alloc[i] = NULL;
 				psort_info[j].crypt_block[i] = NULL;
 			}
-
-			if (!psort_info[j].merge_block[i]) {
-				ret = FALSE;
-				goto func_exit;
-			}
 		}
 
 		psort_info[j].child_status = 0;
@@ -310,13 +314,13 @@ row_fts_psort_info_init(
 		psort_info[j].psort_common = common_info;
 		psort_info[j].error = DB_SUCCESS;
 		psort_info[j].memory_used = 0;
-		mutex_create(fts_pll_tokenize_mutex_key, &psort_info[j].mutex, SYNC_FTS_TOKENIZE);
+		mutex_create(LATCH_ID_FTS_PLL_TOKENIZE, &psort_info[j].mutex);
 	}
 
 	/* Initialize merge_info structures parallel merge and insert
 	into auxiliary FTS tables (FTS_INDEX_TABLE) */
 	*merge = merge_info = static_cast<fts_psort_t*>(
-		mem_alloc(FTS_NUM_AUX_INDEX * sizeof *merge_info));
+		ut_malloc_nokey(FTS_NUM_AUX_INDEX * sizeof *merge_info));
 
 	for (j = 0; j < FTS_NUM_AUX_INDEX; j++) {
 
@@ -335,7 +339,6 @@ func_exit:
 /*********************************************************************//**
 Clean up and deallocate FTS parallel sort structures, and close the
 merge sort files  */
-UNIV_INTERN
 void
 row_fts_psort_info_destroy(
 /*=======================*/
@@ -353,34 +356,28 @@ row_fts_psort_info_destroy(
 						psort_info[j].merge_file[i]);
 				}
 
-				if (psort_info[j].block_alloc[i]) {
-					ut_free(psort_info[j].block_alloc[i]);
-				}
+				ut_free(psort_info[j].block_alloc[i]);
+				ut_free(psort_info[j].merge_file[i]);
 
 				if (psort_info[j].crypt_alloc[i]) {
 					ut_free(psort_info[j].crypt_alloc[i]);
 				}
-
-				mem_free(psort_info[j].merge_file[i]);
 			}
 
 			mutex_free(&psort_info[j].mutex);
 		}
 
-		os_event_free(merge_info[0].psort_common->sort_event);
-		os_event_free(merge_info[0].psort_common->merge_event);
+		os_event_destroy(merge_info[0].psort_common->sort_event);
+		os_event_destroy(merge_info[0].psort_common->merge_event);
 		ut_free(merge_info[0].psort_common->dup);
-		mem_free(merge_info[0].psort_common);
-		mem_free(psort_info);
+		ut_free(merge_info[0].psort_common);
+		ut_free(psort_info);
 	}
 
-	if (merge_info) {
-		mem_free(merge_info);
-	}
+	ut_free(merge_info);
 }
 /*********************************************************************//**
 Free up merge buffers when merge sort is done */
-UNIV_INTERN
 void
 row_fts_free_pll_merge_buf(
 /*=======================*/
@@ -402,9 +399,95 @@ row_fts_free_pll_merge_buf(
 	return;
 }
 
+/*********************************************************************//**
+FTS plugin parser 'myql_add_word' callback function for row merge.
+Refer to 'st_mysql_ftparser_param' for more detail.
+@return always returns 0 */
+static
+int
+row_merge_fts_doc_add_word_for_parser(
+/*==================================*/
+	MYSQL_FTPARSER_PARAM	*param,		/* in: parser paramter */
+	const char		*word,		/* in: token word */
+	int			word_len,	/* in: word len */
+	MYSQL_FTPARSER_BOOLEAN_INFO*	boolean_info)	/* in: boolean info */
+{
+	fts_string_t		str;
+	fts_tokenize_ctx_t*	t_ctx;
+	row_fts_token_t*	fts_token;
+	byte*			ptr;
+
+	ut_ad(param);
+	ut_ad(param->mysql_ftparam);
+	ut_ad(word);
+	ut_ad(boolean_info);
+
+	t_ctx = static_cast<fts_tokenize_ctx_t*>(param->mysql_ftparam);
+	ut_ad(t_ctx);
+
+	str.f_str = (byte*)(word);
+	str.f_len = word_len;
+	str.f_n_char = fts_get_token_size(
+		(CHARSET_INFO*)param->cs, word, word_len);
+
+	/* JAN: TODO: MySQL 5.7 FTS
+	ut_ad(boolean_info->position >= 0);
+	*/
+
+	ptr = static_cast<byte*>(ut_malloc_nokey(sizeof(row_fts_token_t)
+			+ sizeof(fts_string_t) + str.f_len));
+	fts_token = reinterpret_cast<row_fts_token_t*>(ptr);
+	fts_token->text = reinterpret_cast<fts_string_t*>(
+			ptr + sizeof(row_fts_token_t));
+	fts_token->text->f_str = static_cast<byte*>(
+			ptr + sizeof(row_fts_token_t) + sizeof(fts_string_t));
+
+	fts_token->text->f_len = str.f_len;
+	fts_token->text->f_n_char = str.f_n_char;
+	memcpy(fts_token->text->f_str, str.f_str, str.f_len);
+
+	/* JAN: TODO: MySQL 5.7 FTS
+	fts_token->position = boolean_info->position;
+	*/
+
+	/* Add token to list */
+	UT_LIST_ADD_LAST(t_ctx->fts_token_list, fts_token);
+
+	return(0);
+}
+
+/*********************************************************************//**
+Tokenize by fts plugin parser */
+static
+void
+row_merge_fts_doc_tokenize_by_parser(
+/*=================================*/
+	fts_doc_t*		doc,	/* in: doc to tokenize */
+	st_mysql_ftparser*	parser,	/* in: plugin parser instance */
+	fts_tokenize_ctx_t*	t_ctx)	/* in/out: tokenize ctx instance */
+{
+	MYSQL_FTPARSER_PARAM	param;
+
+	ut_a(parser);
+
+	/* Set paramters for param */
+	param.mysql_parse = fts_tokenize_document_internal;
+	param.mysql_add_word = row_merge_fts_doc_add_word_for_parser;
+	param.mysql_ftparam = t_ctx;
+	param.cs = doc->charset;
+	param.doc = reinterpret_cast<char*>(doc->text.f_str);
+	param.length = static_cast<int>(doc->text.f_len);
+	param.mode= MYSQL_FTPARSER_SIMPLE_MODE;
+
+	PARSER_INIT(parser, &param);
+	/* We assume parse returns successfully here. */
+	parser->parse(&param);
+	PARSER_DEINIT(parser, &param);
+}
+
 /*********************************************************************//**
 Tokenize incoming text data and add to the sort buffer.
-@return	TRUE if the record passed, FALSE if out of space */
+@return TRUE if the record passed, FALSE if out of space */
 static
 ibool
 row_merge_fts_doc_tokenize(
@@ -420,8 +503,7 @@ row_merge_fts_doc_tokenize(
 						store Doc ID during sort*/
 	fts_tokenize_ctx_t*	t_ctx)          /*!< in/out: tokenize context */
 {
-	ulint		i;
-	ulint		inc;
+	ulint		inc = 0;
 	fts_string_t	str;
 	ulint		len;
 	row_merge_buf_t* buf;
@@ -431,6 +513,8 @@ row_merge_fts_doc_tokenize(
 	byte		str_buf[FTS_MAX_WORD_LEN + 1];
 	ulint		data_size[FTS_NUM_AUX_INDEX];
 	ulint		n_tuple[FTS_NUM_AUX_INDEX];
+	st_mysql_ftparser*	parser;
+	bool			is_ngram;
 
 	t_str.f_n_char = 0;
 	t_ctx->buf_used = 0;
@@ -438,28 +522,61 @@ row_merge_fts_doc_tokenize(
 	memset(n_tuple, 0, FTS_NUM_AUX_INDEX * sizeof(ulint));
 	memset(data_size, 0, FTS_NUM_AUX_INDEX * sizeof(ulint));
 
+	parser = sort_buf[0]->index->parser;
+	is_ngram = sort_buf[0]->index->is_ngram;
+
 	/* Tokenize the data and add each word string, its corresponding
 	doc id and position to sort buffer */
-	for (i = t_ctx->processed_len; i < doc->text.f_len; i += inc) {
-		ib_rbt_bound_t	parent;
+	while (t_ctx->processed_len < doc->text.f_len) {
 		ulint		idx = 0;
 		ib_uint32_t	position;
-		ulint           offset = 0;
 		ulint		cur_len = 0;
 		doc_id_t	write_doc_id;
+		row_fts_token_t* fts_token = NULL;
 
-		inc = innobase_mysql_fts_get_token(
-			doc->charset, doc->text.f_str + i,
-			doc->text.f_str + doc->text.f_len, &str, &offset);
+		if (parser != NULL) {
+			if (t_ctx->processed_len == 0) {
+				UT_LIST_INIT(t_ctx->fts_token_list, &row_fts_token_t::token_list);
 
-		ut_a(inc > 0);
+				/* Parse the whole doc and cache tokens */
+				row_merge_fts_doc_tokenize_by_parser(doc,
+					parser, t_ctx);
+
+				/* Just indictate we have parsed all the word */
+				t_ctx->processed_len += 1;
+			}
+
+			/* Then get a token */
+			fts_token = UT_LIST_GET_FIRST(t_ctx->fts_token_list);
+			if (fts_token) {
+				str.f_len = fts_token->text->f_len;
+				str.f_n_char = fts_token->text->f_n_char;
+				str.f_str = fts_token->text->f_str;
+			} else {
+				ut_ad(UT_LIST_GET_LEN(t_ctx->fts_token_list) == 0);
+				/* Reach the end of the list */
+				t_ctx->processed_len = doc->text.f_len;
+				break;
+			}
+		} else {
+			inc = innobase_mysql_fts_get_token(
+				doc->charset,
+				doc->text.f_str + t_ctx->processed_len,
+				doc->text.f_str + doc->text.f_len, &str);
+
+			ut_a(inc > 0);
+		}
 
 		/* Ignore string whose character number is less than
 		"fts_min_token_size" or more than "fts_max_token_size" */
-		if (str.f_n_char < fts_min_token_size
-		    || str.f_n_char > fts_max_token_size) {
+		if (!fts_check_token(&str, NULL, is_ngram, NULL)) {
+			if (parser != NULL) {
+				UT_LIST_REMOVE(t_ctx->fts_token_list, fts_token);
+				ut_free(fts_token);
+			} else {
+				t_ctx->processed_len += inc;
+			}
 
-			t_ctx->processed_len += inc;
 			continue;
 		}
 
@@ -469,13 +586,17 @@ row_merge_fts_doc_tokenize(
 
 		t_str.f_str = (byte*) &str_buf;
 
-		/* if "cached_stopword" is defined, ingore words in the
+		/* if "cached_stopword" is defined, ignore words in the
 		stopword list */
-		if (t_ctx->cached_stopword
-		    && rbt_search(t_ctx->cached_stopword,
-				  &parent, &t_str) == 0) {
+		if (!fts_check_token(&str, t_ctx->cached_stopword, is_ngram,
+				     doc->charset)) {
+			if (parser != NULL) {
+				UT_LIST_REMOVE(t_ctx->fts_token_list, fts_token);
+				ut_free(fts_token);
+			} else {
+				t_ctx->processed_len += inc;
+			}
 
-			t_ctx->processed_len += inc;
 			continue;
 		}
 
@@ -541,9 +662,15 @@ row_merge_fts_doc_tokenize(
 		++field;
 
 		/* The third field is the position */
-		mach_write_to_4(
-			(byte*) &position,
-			(i + offset + inc - str.f_len + t_ctx->init_pos));
+		if (parser != NULL) {
+			mach_write_to_4(
+				reinterpret_cast<byte*>(&position),
+				(fts_token->position + t_ctx->init_pos));
+		} else {
+			mach_write_to_4(
+				reinterpret_cast<byte*>(&position),
+				(t_ctx->processed_len + inc - str.f_len + t_ctx->init_pos));
+		}
 
 		dfield_set_data(field, &position, sizeof(position));
 		len = dfield_get_len(field);
@@ -575,20 +702,24 @@ row_merge_fts_doc_tokenize(
 		encryption key_version in the beginning of the buffer. */
 		if (buf->total_size + data_size[idx] + cur_len
 			>= (srv_sort_buf_size - 1 - ROW_MERGE_RESERVE_SIZE)) {
-
 			buf_full = TRUE;
 			break;
 		}
 
 		/* Increment the number of tuples */
 		n_tuple[idx]++;
-		t_ctx->processed_len += inc;
+		if (parser != NULL) {
+			UT_LIST_REMOVE(t_ctx->fts_token_list, fts_token);
+			ut_free(fts_token);
+		} else {
+			t_ctx->processed_len += inc;
+		}
 		data_size[idx] += cur_len;
 	}
 
 	/* Update the data length and the number of new word tuples
 	added in this round of tokenization */
-	for (i = 0; i <  FTS_NUM_AUX_INDEX; i++) {
+	for (ulint i = 0; i <  FTS_NUM_AUX_INDEX; i++) {
 		/* The computation of total_size below assumes that no
 		delete-mark flags will be stored and that all fields
 		are NOT NULL and fixed-length. */
@@ -626,8 +757,7 @@ row_merge_fts_get_next_doc_item(
 
 	*doc_item = UT_LIST_GET_FIRST(psort_info->fts_doc_list);
 	if (*doc_item != NULL) {
-		UT_LIST_REMOVE(doc_list, psort_info->fts_doc_list,
-			       *doc_item);
+		UT_LIST_REMOVE(psort_info->fts_doc_list, *doc_item);
 
 		ut_ad(psort_info->memory_used >= sizeof(fts_doc_item_t)
 		      + (*doc_item)->field->len);
@@ -642,7 +772,6 @@ row_merge_fts_get_next_doc_item(
 Function performs parallel tokenization of the incoming doc strings.
 It also performs the initial in memory sort of the parsed records.
 @return OS_THREAD_DUMMY_RETURN */
-UNIV_INTERN
 os_thread_ret_t
 fts_parallel_tokenization(
 /*======================*/
@@ -661,7 +790,6 @@ fts_parallel_tokenization(
 	ib_uint64_t		total_rec = 0;
 	ulint			num_doc_processed = 0;
 	doc_id_t		last_doc_id = 0;
-	ulint			zip_size;
 	mem_heap_t*		blob_heap = NULL;
 	fts_doc_t		doc;
 	dict_table_t*		table = psort_info->psort_common->new_table;
@@ -674,6 +802,12 @@ fts_parallel_tokenization(
 
 	ut_ad(psort_info->psort_common->trx->mysql_thd != NULL);
 
+	/* const char*		path = thd_innodb_tmpdir(
+		psort_info->psort_common->trx->mysql_thd);
+	*/
+
+	ut_ad(psort_info->psort_common->trx->mysql_thd != NULL);
+
 	const char*		path = thd_innodb_tmpdir(
 		psort_info->psort_common->trx->mysql_thd);
 
@@ -699,7 +833,8 @@ fts_parallel_tokenization(
 	block = psort_info->merge_block;
 	crypt_block = psort_info->crypt_block;
 	crypt_data = psort_info->psort_common->crypt_data;
-	zip_size = dict_table_zip_size(table);
+
+	const page_size_t&	page_size = dict_table_page_size(table);
 
 	row_merge_fts_get_next_doc_item(psort_info, &doc_item);
 
@@ -729,8 +864,7 @@ loop:
 				doc.text.f_str =
 					btr_copy_externally_stored_field(
 						&doc.text.f_len, data,
-						zip_size, data_len, blob_heap,
-						NULL);
+						page_size, data_len, blob_heap);
 			} else {
 				doc.text.f_str = data;
 				doc.text.f_len = data_len;
@@ -761,15 +895,13 @@ loop:
 		num_doc_processed++;
 
 		if (fts_enable_diag_print && num_doc_processed % 10000 == 1) {
-			ib_logf(IB_LOG_LEVEL_INFO,
-				"number of doc processed %d\n",
-				(int) num_doc_processed);
+			ib::info() << "Number of documents processed: "
+				<< num_doc_processed;
 #ifdef FTS_INTERNAL_DIAG_PRINT
 			for (i = 0; i < FTS_NUM_AUX_INDEX; i++) {
-				ib_logf(IB_LOG_LEVEL_INFO,
-					"ID %d, partition %d, word "
-					"%d\n",(int) psort_info->psort_id,
-					(int) i, (int) mycount[i]);
+				ib::info() << "ID " << psort_info->psort_id
+					<< ", partition " << i << ", word "
+					<< mycount[i];
 			}
 #endif
 		}
@@ -797,7 +929,7 @@ loop:
 				     crypt_data,
 				     crypt_block[t_ctx.buf_used],
 				     table->space)) {
-			error = DB_TEMP_FILE_WRITE_FAILURE;
+			error = DB_TEMP_FILE_WRITE_FAIL;
 			goto func_exit;
 		}
 
@@ -817,13 +949,12 @@ loop:
 		} else if (retried > 10000) {
 			ut_ad(!doc_item);
 			/* retied too many times and cannot get new record */
-			ib_logf(IB_LOG_LEVEL_ERROR,
-					"InnoDB: FTS parallel sort processed "
-					"%lu records, the sort queue has "
-					"%lu records. But sort cannot get "
-					"the next records", num_doc_processed,
-					UT_LIST_GET_LEN(
-						psort_info->fts_doc_list));
+			ib::error() << "FTS parallel sort processed "
+				<< num_doc_processed
+				<< " records, the sort queue has "
+				<< UT_LIST_GET_LEN(psort_info->fts_doc_list)
+				<< " records. But sort cannot get the next"
+				" records";
 			goto exit;
 		}
 	} else if (psort_info->state == FTS_PARENT_EXITING) {
@@ -893,7 +1024,7 @@ exit:
 						crypt_data,
 						crypt_block[i],
 						table->space)) {
-					error = DB_TEMP_FILE_WRITE_FAILURE;
+					error = DB_TEMP_FILE_WRITE_FAIL;
 					goto func_exit;
 				}
 
@@ -967,18 +1098,13 @@ func_exit:
 	os_event_set(psort_info->psort_common->sort_event);
 	psort_info->child_status = FTS_CHILD_EXITING;
 
-#ifdef __WIN__
-	CloseHandle(psort_info->thread_hdl);
-#endif /*__WIN__ */
-
-	os_thread_exit(NULL);
+	os_thread_exit();
 
 	OS_THREAD_DUMMY_RETURN;
 }
 
 /*********************************************************************//**
 Start the parallel tokenization and parallel merge sort */
-UNIV_INTERN
 void
 row_fts_start_psort(
 /*================*/
@@ -989,16 +1115,16 @@ row_fts_start_psort(
 
 	for (i = 0; i < fts_sort_pll_degree; i++) {
 		psort_info[i].psort_id = i;
-		psort_info[i].thread_hdl = os_thread_create(
-			fts_parallel_tokenization,
-			(void*) &psort_info[i], &thd_id);
+		psort_info[i].thread_hdl =
+			os_thread_create(fts_parallel_tokenization,
+				(void*) &psort_info[i],
+				 &thd_id);
 	}
 }
 
 /*********************************************************************//**
 Function performs the merge and insertion of the sorted records.
 @return OS_THREAD_DUMMY_RETURN */
-UNIV_INTERN
 os_thread_ret_t
 fts_parallel_merge(
 /*===============*/
@@ -1019,18 +1145,13 @@ fts_parallel_merge(
 	os_event_set(psort_info->psort_common->merge_event);
 	psort_info->child_status = FTS_CHILD_EXITING;
 
-#ifdef __WIN__
-	CloseHandle(psort_info->thread_hdl);
-#endif /*__WIN__ */
-
-	os_thread_exit(NULL);
+	os_thread_exit();
 
 	OS_THREAD_DUMMY_RETURN;
 }
 
 /*********************************************************************//**
 Kick off the parallel merge and insert thread */
-UNIV_INTERN
 void
 row_fts_start_parallel_merge(
 /*=========================*/
@@ -1044,48 +1165,92 @@ row_fts_start_parallel_merge(
 		merge_info[i].psort_id = i;
 		merge_info[i].child_status = 0;
 
-		merge_info[i].thread_hdl = os_thread_create(
-			fts_parallel_merge, (void*) &merge_info[i], &thd_id);
+		merge_info[i].thread_hdl = os_thread_create(fts_parallel_merge,
+			(void*) &merge_info[i], &thd_id);
 	}
 }
 
+/**
+Write out a single word's data as new entry/entries in the INDEX table.
+@param[in]	ins_ctx	insert context
+@param[in]	word	word string
+@param[in]	node	node colmns
+@return	DB_SUCCUESS if insertion runs fine, otherwise error code */
+static
+dberr_t
+row_merge_write_fts_node(
+	const	fts_psort_insert_t*	ins_ctx,
+	const	fts_string_t*		word,
+	const	fts_node_t*		node)
+{
+	dtuple_t*	tuple;
+	dfield_t*	field;
+	dberr_t		ret = DB_SUCCESS;
+	doc_id_t	write_first_doc_id[8];
+	doc_id_t	write_last_doc_id[8];
+	ib_uint32_t	write_doc_count;
+
+	tuple = ins_ctx->tuple;
+
+	/* The first field is the tokenized word */
+	field = dtuple_get_nth_field(tuple, 0);
+	dfield_set_data(field, word->f_str, word->f_len);
+
+	/* The second field is first_doc_id */
+	field = dtuple_get_nth_field(tuple, 1);
+	fts_write_doc_id((byte*)&write_first_doc_id, node->first_doc_id);
+	dfield_set_data(field, &write_first_doc_id, sizeof(doc_id_t));
+
+	/* The third and fourth fileds(TRX_ID, ROLL_PTR) are filled already.*/
+	/* The fifth field is last_doc_id */
+	field = dtuple_get_nth_field(tuple, 4);
+	fts_write_doc_id((byte*)&write_last_doc_id, node->last_doc_id);
+	dfield_set_data(field, &write_last_doc_id, sizeof(doc_id_t));
+
+	/* The sixth field is doc_count */
+	field = dtuple_get_nth_field(tuple, 5);
+	mach_write_to_4((byte*)&write_doc_count, (ib_uint32_t)node->doc_count);
+	dfield_set_data(field, &write_doc_count, sizeof(ib_uint32_t));
+
+	/* The seventh field is ilist */
+	field = dtuple_get_nth_field(tuple, 6);
+	dfield_set_data(field, node->ilist, node->ilist_size);
+
+	ret = ins_ctx->btr_bulk->insert(tuple);
+
+	return(ret);
+}
+
 /********************************************************************//**
 Insert processed FTS data to auxillary index tables.
-@return	DB_SUCCESS if insertion runs fine */
+@return DB_SUCCESS if insertion runs fine */
 static MY_ATTRIBUTE((nonnull))
 dberr_t
 row_merge_write_fts_word(
 /*=====================*/
-	trx_t*		trx,		/*!< in: transaction */
-	que_t**		ins_graph,	/*!< in: Insert query graphs */
-	fts_tokenizer_word_t* word,	/*!< in: sorted and tokenized
-					word */
-	fts_table_t*	fts_table,	/*!< in: fts aux table instance */
-	CHARSET_INFO*	charset)	/*!< in: charset */
+	fts_psort_insert_t*	ins_ctx,	/*!< in: insert context */
+	fts_tokenizer_word_t*	word)		/*!< in: sorted and tokenized
+						word */
 {
-	ulint	selected;
 	dberr_t	ret = DB_SUCCESS;
 
-	selected = fts_select_index(
-		charset, word->text.f_str, word->text.f_len);
-	fts_table->suffix = fts_get_suffix(selected);
+	ut_ad(ins_ctx->aux_index_id == fts_select_index(
+		ins_ctx->charset, word->text.f_str, word->text.f_len));
 
 	/* Pop out each fts_node in word->nodes write them to auxiliary table */
-	while (ib_vector_size(word->nodes) > 0) {
+	for (ulint i = 0; i < ib_vector_size(word->nodes); i++) {
 		dberr_t		error;
 		fts_node_t*	fts_node;
 
-		fts_node = static_cast<fts_node_t*>(ib_vector_pop(word->nodes));
+		fts_node = static_cast<fts_node_t*>(ib_vector_get(word->nodes, i));
 
-		error = fts_write_node(
-			trx, &ins_graph[selected], fts_table, &word->text,
-			fts_node);
+		error = row_merge_write_fts_node(ins_ctx, &word->text, fts_node);
 
 		if (error != DB_SUCCESS) {
-			fprintf(stderr, "InnoDB: failed to write"
-				" word %s to FTS auxiliary index"
-				" table, error (%s) \n",
-				word->text.f_str, ut_strerr(error));
+			ib::error() << "Failed to write word "
+				<< word->text.f_str << " to FTS auxiliary"
+				" index table, error (" << ut_strerr(error)
+				<< ")";
 			ret = error;
 		}
 
@@ -1093,13 +1258,14 @@ row_merge_write_fts_word(
 		fts_node->ilist = NULL;
 	}
 
+	ib_vector_reset(word->nodes);
+
 	return(ret);
 }
 
 /*********************************************************************//**
 Read sorted FTS data files and insert data tuples to auxillary tables.
-@return	DB_SUCCESS or error number */
-UNIV_INTERN
+@return DB_SUCCESS or error number */
 void
 row_fts_insert_tuple(
 /*=================*/
@@ -1141,11 +1307,7 @@ row_fts_insert_tuple(
 				positions);
 
 			/* Write out the current word */
-			row_merge_write_fts_word(ins_ctx->trx,
-						 ins_ctx->ins_graph, word,
-						 &ins_ctx->fts_table,
-						 ins_ctx->charset);
-
+			row_merge_write_fts_word(ins_ctx, word);
 		}
 
 		return;
@@ -1159,7 +1321,7 @@ row_fts_insert_tuple(
 	token_word.f_str = static_cast<byte*>(dfield_get_data(dfield));
 
 	if (!word->text.f_str) {
-		fts_utf8_string_dup(&word->text, &token_word, ins_ctx->heap);
+		fts_string_dup(&word->text, &token_word, ins_ctx->heap);
 	}
 
 	/* compare to the last word, to see if they are the same
@@ -1176,12 +1338,10 @@ row_fts_insert_tuple(
 		}
 
 		/* Write out the current word */
-		row_merge_write_fts_word(ins_ctx->trx, ins_ctx->ins_graph,
-					 word, &ins_ctx->fts_table,
-					 ins_ctx->charset);
+		row_merge_write_fts_word(ins_ctx, word);
 
 		/* Copy the new word */
-		fts_utf8_string_dup(&word->text, &token_word, ins_ctx->heap);
+		fts_string_dup(&word->text, &token_word, ins_ctx->heap);
 
 		num_item = ib_vector_size(positions);
 
@@ -1409,8 +1569,7 @@ row_fts_build_sel_tree(
 /*********************************************************************//**
 Read sorted file containing index data tuples and insert these data
 tuples to the index
-@return	DB_SUCCESS or error number */
-UNIV_INTERN
+@return DB_SUCCESS or error number */
 dberr_t
 row_fts_merge_insert(
 /*=================*/
@@ -1430,7 +1589,6 @@ row_fts_merge_insert(
 	ib_vector_t*		positions;
 	doc_id_t		last_doc_id;
 	ib_alloc_t*		heap_alloc;
-	ulint			n_bytes;
 	ulint			i;
 	mrec_buf_t**		buf;
 	int*			fd;
@@ -1444,7 +1602,15 @@ row_fts_merge_insert(
 	fts_psort_insert_t	ins_ctx;
 	ulint			count_diag = 0;
 	fil_space_crypt_t*	crypt_data = NULL;
-	ulint			space;
+	ulint			space=0;
+	fts_table_t		fts_table;
+	char			aux_table_name[MAX_FULL_NAME_LEN];
+	dict_table_t*		aux_table;
+	dict_index_t*		aux_index;
+	trx_t*			trx;
+	byte			trx_id_buf[6];
+	roll_ptr_t		roll_ptr = 0;
+	dfield_t*		field;
 
 	ut_ad(index);
 	ut_ad(table);
@@ -1452,9 +1618,10 @@ row_fts_merge_insert(
 	/* We use the insert query graph as the dummy graph
 	needed in the row module call */
 
-	ins_ctx.trx = trx_allocate_for_background();
+	trx = trx_allocate_for_background();
+	trx_start_if_not_started(trx, true);
 
-	ins_ctx.trx->op_info = "inserting index entries";
+	trx->op_info = "inserting index entries";
 
 	ins_ctx.opt_doc_id_size = psort_info[0].psort_common->opt_doc_id_size;
 	crypt_data = psort_info[0].psort_common->crypt_data;
@@ -1506,9 +1673,8 @@ row_fts_merge_insert(
 	}
 
 	if (fts_enable_diag_print) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr, "  InnoDB_FTS: to inserted %lu records\n",
-			(ulong) count_diag);
+		ib::info() << "InnoDB_FTS: to inserted " << count_diag
+			<< " records";
 	}
 
 	/* Initialize related variables if creating FTS indexes */
@@ -1520,24 +1686,49 @@ row_fts_merge_insert(
 	positions = ib_vector_create(heap_alloc, sizeof(ulint), 32);
 	last_doc_id = 0;
 
-	/* Allocate insert query graphs for FTS auxillary
-	Index Table, note we have FTS_NUM_AUX_INDEX such index tables */
-	n_bytes = sizeof(que_t*) * (FTS_NUM_AUX_INDEX + 1);
-	ins_ctx.ins_graph = static_cast<que_t**>(mem_heap_alloc(heap, n_bytes));
-	memset(ins_ctx.ins_graph, 0x0, n_bytes);
-
 	/* We should set the flags2 with aux_table_name here,
 	in order to get the correct aux table names. */
 	index->table->flags2 |= DICT_TF2_FTS_AUX_HEX_NAME;
 	DBUG_EXECUTE_IF("innodb_test_wrong_fts_aux_table_name",
 			index->table->flags2 &= ~DICT_TF2_FTS_AUX_HEX_NAME;);
+	fts_table.type = FTS_INDEX_TABLE;
+	fts_table.index_id = index->id;
+	fts_table.table_id = table->id;
+	fts_table.parent = index->table->name.m_name;
+	fts_table.table = index->table;
+	fts_table.suffix = fts_get_suffix(id);
 
-	ins_ctx.fts_table.type = FTS_INDEX_TABLE;
-	ins_ctx.fts_table.index_id = index->id;
-	ins_ctx.fts_table.table_id = table->id;
-	ins_ctx.fts_table.parent = index->table->name;
-	ins_ctx.fts_table.table = index->table;
-	space = table->space;
+	/* Get aux index */
+	fts_get_table_name(&fts_table, aux_table_name);
+	aux_table = dict_table_open_on_name(aux_table_name, FALSE, FALSE,
+					    DICT_ERR_IGNORE_NONE);
+	ut_ad(aux_table != NULL);
+	dict_table_close(aux_table, FALSE, FALSE);
+	aux_index = dict_table_get_first_index(aux_table);
+
+	FlushObserver* observer;
+	observer = psort_info[0].psort_common->trx->flush_observer;
+
+	/* Create bulk load instance */
+	ins_ctx.btr_bulk = UT_NEW_NOKEY(BtrBulk(aux_index, trx->id, observer));
+	ins_ctx.btr_bulk->init();
+
+	/* Create tuple for insert */
+	ins_ctx.tuple = dtuple_create(heap, dict_index_get_n_fields(aux_index));
+	dict_index_copy_types(ins_ctx.tuple, aux_index,
+			      dict_index_get_n_fields(aux_index));
+
+	/* Set TRX_ID and ROLL_PTR */
+	trx_write_trx_id(trx_id_buf, trx->id);
+	field = dtuple_get_nth_field(ins_ctx.tuple, 2);
+	dfield_set_data(field, &trx_id_buf, 6);
+
+	field = dtuple_get_nth_field(ins_ctx.tuple, 3);
+	dfield_set_data(field, &roll_ptr, 7);
+
+#ifdef UNIV_DEBUG
+	ins_ctx.aux_index_id = id;
+#endif
 
 	for (i = 0; i < fts_sort_pll_degree; i++) {
 		if (psort_info[i].merge_file[id]->n_rec == 0) {
@@ -1640,26 +1831,21 @@ row_fts_merge_insert(
 	}
 
 exit:
-	fts_sql_commit(ins_ctx.trx);
+	fts_sql_commit(trx);
 
-	ins_ctx.trx->op_info = "";
+	trx->op_info = "";
 
 	mem_heap_free(tuple_heap);
 
-	for (i = 0; i < FTS_NUM_AUX_INDEX; i++) {
-		if (ins_ctx.ins_graph[i]) {
-			fts_que_graph_free(ins_ctx.ins_graph[i]);
-		}
-	}
+	error = ins_ctx.btr_bulk->finish(error);
+	UT_DELETE(ins_ctx.btr_bulk);
 
-	trx_free_for_background(ins_ctx.trx);
+	trx_free_for_background(trx);
 
 	mem_heap_free(heap);
 
 	if (fts_enable_diag_print) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr, "  InnoDB_FTS: inserted %lu records\n",
-			(ulong) count);
+		ib::info() << "InnoDB_FTS: inserted " << count << " records";
 	}
 
 	return(error);
diff --git a/storage/innobase/row/row0import.cc b/storage/innobase/row/row0import.cc
index 6dd3c4eea94..1ae11204f69 100644
--- a/storage/innobase/row/row0import.cc
+++ b/storage/innobase/row/row0import.cc
@@ -24,6 +24,8 @@ Import a tablespace to a running instance.
 Created 2012-02-08 by Sunny Bains.
 *******************************************************/
 
+#include "ha_prototypes.h"
+
 #include "row0import.h"
 
 #ifdef UNIV_NONINL
@@ -40,15 +42,20 @@ Created 2012-02-08 by Sunny Bains.
 #include "row0mysql.h"
 #include "srv0start.h"
 #include "row0quiesce.h"
+#include "ut0new.h"
 
 #include <vector>
 
+#ifdef HAVE_MY_AES_H
+#include <my_aes.h>
+#endif
+
 /** The size of the buffer to use for IO. Note: os_file_read() doesn't expect
 reads to fail. If you set the buffer size to be greater than a multiple of the
 file size then it will assert. TODO: Fix this limitation of the IO functions.
-@param n - page size of the tablespace.
+@param n page size of the tablespace.
 @retval number of pages */
-#define IO_BUFFER_SIZE(n)	((1024 * 1024) / n)
+#define IO_BUFFER_SIZE(m, n)	((m) / (n))
 
 /** For gathering stats on records during phase I */
 struct row_stats_t {
@@ -110,56 +117,51 @@ struct row_import {
 		m_hostname(),
 		m_table_name(),
 		m_autoinc(),
-		m_page_size(),
+		m_page_size(0, 0, false),
 		m_flags(),
 		m_n_cols(),
 		m_cols(),
 		m_col_names(),
 		m_n_indexes(),
 		m_indexes(),
-		m_missing(true) { }
+		m_missing(true),
+		m_cfp_missing(true)	{ }
 
 	~row_import() UNIV_NOTHROW;
 
-	/**
-	Find the index entry in in the indexes array.
-	@param name - index name
+	/** Find the index entry in in the indexes array.
+	@param name index name
 	@return instance if found else 0. */
 	row_index_t* get_index(const char* name) const UNIV_NOTHROW;
 
-	/**
-	Get the number of rows in the index.
-	@param name - index name
+	/** Get the number of rows in the index.
+	@param name index name
 	@return number of rows (doesn't include delete marked rows). */
 	ulint	get_n_rows(const char* name) const UNIV_NOTHROW;
 
-	/**
-	Find the ordinal value of the column name in the cfg table columns.
-	@param name - of column to look for.
+	/** Find the ordinal value of the column name in the cfg table columns.
+	@param name of column to look for.
 	@return ULINT_UNDEFINED if not found. */
 	ulint find_col(const char* name) const UNIV_NOTHROW;
 
-	/**
-	Get the number of rows for which purge failed during the convert phase.
-	@param name - index name
+	/** Get the number of rows for which purge failed during the
+	convert phase.
+	@param name index name
 	@return number of rows for which purge failed. */
-	ulint	get_n_purge_failed(const char* name) const UNIV_NOTHROW;
+	ulint get_n_purge_failed(const char* name) const UNIV_NOTHROW;
 
-	/**
-	Check if the index is clean. ie. no delete-marked records
-	@param name - index name
+	/** Check if the index is clean. ie. no delete-marked records
+	@param name index name
 	@return true if index needs to be purged. */
 	bool requires_purge(const char* name) const UNIV_NOTHROW
 	{
 		return(get_n_purge_failed(name) > 0);
 	}
 
-	/**
-	Set the index root <space, pageno> using the index name */
+	/** Set the index root <space, pageno> using the index name */
 	void set_root_by_name() UNIV_NOTHROW;
 
-	/**
-	Set the index root <space, pageno> using a heuristic
+	/** Set the index root <space, pageno> using a heuristic
 	@return DB_SUCCESS or error code */
 	dberr_t set_root_by_heuristic() UNIV_NOTHROW;
 
@@ -172,18 +174,16 @@ struct row_import {
 		THD*			thd,
 		const dict_index_t*	index) UNIV_NOTHROW;
 
-	/**
-	Check if the table schema that was read from the .cfg file matches the
-	in memory table definition.
-	@param thd - MySQL session variable
+	/** Check if the table schema that was read from the .cfg file
+	matches the in memory table definition.
+	@param thd MySQL session variable
 	@return DB_SUCCESS or error code. */
 	dberr_t match_table_columns(
 		THD*			thd) UNIV_NOTHROW;
 
-	/**
-	Check if the table (and index) schema that was read from the .cfg file
-	matches the in memory table definition.
-	@param thd - MySQL session variable
+	/** Check if the table (and index) schema that was read from the
+	.cfg file matches the in memory table definition.
+	@param thd MySQL session variable
 	@return DB_SUCCESS or error code. */
 	dberr_t match_schema(
 		THD*			thd) UNIV_NOTHROW;
@@ -199,7 +199,7 @@ struct row_import {
 
 	ib_uint64_t	m_autoinc;		/*!< Next autoinc value */
 
-	ulint		m_page_size;		/*!< Tablespace page size */
+	page_size_t	m_page_size;		/*!< Tablespace page size */
 
 	ulint		m_flags;		/*!< Table flags */
 
@@ -220,20 +220,21 @@ struct row_import {
 
 	bool		m_missing;		/*!< true if a .cfg file was
 						found and was readable */
+
+	bool		m_cfp_missing;		/*!< true if a .cfp file was
+						found and was readable */
 };
 
 /** Use the page cursor to iterate over records in a block. */
 class RecIterator {
 public:
-	/**
-	Default constructor */
+	/** Default constructor */
 	RecIterator() UNIV_NOTHROW
 	{
 		memset(&m_cur, 0x0, sizeof(m_cur));
 	}
 
-	/**
-	Position the cursor on the first user record. */
+	/** Position the cursor on the first user record. */
 	void	open(buf_block_t* block) UNIV_NOTHROW
 	{
 		page_cur_set_before_first(block, &m_cur);
@@ -243,8 +244,7 @@ public:
 		}
 	}
 
-	/**
-	Move to the next record. */
+	/** Move to the next record. */
 	void	next() UNIV_NOTHROW
 	{
 		page_cur_move_to_next(&m_cur);
@@ -290,9 +290,9 @@ couldn't purge the delete marked reocrds during Phase I. */
 class IndexPurge {
 public:
 	/** Constructor
-	@param trx - the user transaction covering the import tablespace
-	@param index - to be imported
-	@param space_id - space id of the tablespace */
+	@param trx the user transaction covering the import tablespace
+	@param index to be imported
+	@param space_id space id of the tablespace */
 	IndexPurge(
 		trx_t*		trx,
 		dict_index_t*	index) UNIV_NOTHROW
@@ -301,9 +301,8 @@ public:
 		m_index(index),
 		m_n_rows(0)
 	{
-		ib_logf(IB_LOG_LEVEL_INFO,
-			"Phase II - Purge records from index %s",
-			index->name);
+		ib::info() << "Phase II - Purge records from index "
+			<< index->name;
 	}
 
 	/** Descructor */
@@ -321,28 +320,23 @@ public:
 	}
 
 private:
-	/**
-	Begin import, position the cursor on the first record. */
+	/** Begin import, position the cursor on the first record. */
 	void	open() UNIV_NOTHROW;
 
-	/**
-	Close the persistent curosr and commit the mini-transaction. */
+	/** Close the persistent curosr and commit the mini-transaction. */
 	void	close() UNIV_NOTHROW;
 
-	/**
-	Position the cursor on the next record.
+	/** Position the cursor on the next record.
 	@return DB_SUCCESS or error code */
 	dberr_t	next() UNIV_NOTHROW;
 
-	/**
-	Store the persistent cursor position and reopen the
+	/** Store the persistent cursor position and reopen the
 	B-tree cursor in BTR_MODIFY_TREE mode, because the
 	tree structure may be changed during a pessimistic delete. */
 	void	purge_pessimistic_delete() UNIV_NOTHROW;
 
-	/**
-	Purge delete-marked records.
-	@param offsets - current row offsets. */
+	/** Purge delete-marked records.
+	@param offsets current row offsets. */
 	void	purge() UNIV_NOTHROW;
 
 protected:
@@ -364,7 +358,7 @@ tablespace file.  */
 class AbstractCallback : public PageCallback {
 public:
 	/** Constructor
-	@param trx - covering transaction */
+	@param trx covering transaction */
 	AbstractCallback(trx_t* trx)
 		:
 		m_trx(trx),
@@ -374,16 +368,15 @@ public:
 		m_space_flags(ULINT_UNDEFINED),
 		m_table_flags(ULINT_UNDEFINED) UNIV_NOTHROW { }
 
-	/**
-	Free any extent descriptor instance */
+	/** Free any extent descriptor instance */
 	virtual ~AbstractCallback()
 	{
-		delete [] m_xdes;
+		UT_DELETE_ARRAY(m_xdes);
 	}
 
 	/** Determine the page size to use for traversing the tablespace
-	@param file_size - size of the tablespace file in bytes
-	@param block - contents of the first page in the tablespace file.
+	@param file_size size of the tablespace file in bytes
+	@param block contents of the first page in the tablespace file.
 	@retval DB_SUCCESS or error code. */
 	virtual dberr_t init(
 		os_offset_t		file_size,
@@ -392,13 +385,12 @@ public:
 	/** @return true if compressed table. */
 	bool is_compressed_table() const UNIV_NOTHROW
 	{
-		return(get_zip_size() > 0);
+		return(get_page_size().is_compressed());
 	}
 
 protected:
-	/**
-	Get the data page depending on the table type, compressed or not.
-	@param block - block read from disk
+	/** Get the data page depending on the table type, compressed or not.
+	@param block block read from disk
 	@retval the buffer frame */
 	buf_frame_t* get_frame(buf_block_t* block) const UNIV_NOTHROW
 	{
@@ -421,10 +413,9 @@ protected:
 		return(DB_SUCCESS);
 	}
 
-	/**
-	Get the physical offset of the extent descriptor within the page.
-	@param page_no - page number of the extent descriptor
-	@param page - contents of the page containing the extent descriptor.
+	/** Get the physical offset of the extent descriptor within the page.
+	@param page_no page number of the extent descriptor
+	@param page contents of the page containing the extent descriptor.
 	@return the start of the xdes array in a page */
 	const xdes_t* xdes(
 		ulint		page_no,
@@ -432,19 +423,18 @@ protected:
 	{
 		ulint	offset;
 
-		offset = xdes_calc_descriptor_index(get_zip_size(), page_no);
+		offset = xdes_calc_descriptor_index(get_page_size(), page_no);
 
 		return(page + XDES_ARR_OFFSET + XDES_SIZE * offset);
 	}
 
-	/**
-	Set the current page directory (xdes). If the extent descriptor is
+	/** Set the current page directory (xdes). If the extent descriptor is
 	marked as free then free the current extent descriptor and set it to
 	0. This implies that all pages that are covered by this extent
 	descriptor are also freed.
 
-	@param page_no - offset of page within the file
-	@param page - page contents
+	@param page_no offset of page within the file
+	@param page page contents
 	@return DB_SUCCESS or error code. */
 	dberr_t	set_current_xdes(
 		ulint		page_no,
@@ -452,9 +442,8 @@ protected:
 	{
 		m_xdes_page_no = page_no;
 
-		delete[] m_xdes;
-
-		m_xdes = 0;
+		UT_DELETE_ARRAY(m_xdes);
+		m_xdes = NULL;
 
 		ulint		state;
 		const xdes_t*	xdesc = page + XDES_ARR_OFFSET;
@@ -463,17 +452,21 @@ protected:
 
 		if (state != XDES_FREE) {
 
-			m_xdes = new(std::nothrow) xdes_t[m_page_size];
+			m_xdes = UT_NEW_ARRAY_NOKEY(xdes_t,
+						    m_page_size.physical());
 
 			/* Trigger OOM */
-			DBUG_EXECUTE_IF("ib_import_OOM_13",
-					delete [] m_xdes; m_xdes = 0;);
+			DBUG_EXECUTE_IF(
+				"ib_import_OOM_13",
+				UT_DELETE_ARRAY(m_xdes);
+				m_xdes = NULL;
+			);
 
-			if (m_xdes == 0) {
+			if (m_xdes == NULL) {
 				return(DB_OUT_OF_MEMORY);
 			}
 
-			memcpy(m_xdes, page, m_page_size);
+			memcpy(m_xdes, page, m_page_size.physical());
 		}
 
 		return(DB_SUCCESS);
@@ -483,19 +476,18 @@ protected:
 	@return true if it is a root page */
 	bool is_root_page(const page_t* page) const UNIV_NOTHROW
 	{
-		ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
+		ut_ad(fil_page_index_page_check(page));
 
 		return(mach_read_from_4(page + FIL_PAGE_NEXT) == FIL_NULL
 		       && mach_read_from_4(page + FIL_PAGE_PREV) == FIL_NULL);
 	}
 
-	/**
-	Check if the page is marked as free in the extent descriptor.
-	@param page_no - page number to check in the extent descriptor.
+	/** Check if the page is marked as free in the extent descriptor.
+	@param page_no page number to check in the extent descriptor.
 	@return true if the page is marked as free */
 	bool is_free(ulint page_no) const UNIV_NOTHROW
 	{
-		ut_a(xdes_calc_descriptor_page(get_zip_size(), page_no)
+		ut_a(xdes_calc_descriptor_page(get_page_size(), page_no)
 		     == m_xdes_page_no);
 
 		if (m_xdes != 0) {
@@ -542,8 +534,8 @@ protected:
 };
 
 /** Determine the page size to use for traversing the tablespace
-@param file_size - size of the tablespace file in bytes
-@param block - contents of the first page in the tablespace file.
+@param file_size size of the tablespace file in bytes
+@param block contents of the first page in the tablespace file.
 @retval DB_SUCCESS or error code. */
 dberr_t
 AbstractCallback::init(
@@ -557,35 +549,23 @@ AbstractCallback::init(
 	/* Since we don't know whether it is a compressed table
 	or not, the data is always read into the block->frame. */
 
-	dberr_t	err = set_zip_size(block->frame);
-
-	if (err != DB_SUCCESS) {
-		return(DB_CORRUPTION);
-	}
+	set_page_size(block->frame);
 
 	/* Set the page size used to traverse the tablespace. */
 
-	m_page_size = (is_compressed_table())
-		? get_zip_size() : fsp_flags_get_page_size(m_space_flags);
+	if (!is_compressed_table() && !m_page_size.equals_to(univ_page_size)) {
 
-	if (m_page_size == 0) {
-		ib_logf(IB_LOG_LEVEL_ERROR, "Page size is 0");
-		return(DB_CORRUPTION);
-	} else if (!is_compressed_table() && m_page_size != UNIV_PAGE_SIZE) {
-
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Page size %lu of ibd file is not the same "
-			"as the server page size %lu",
-			m_page_size, UNIV_PAGE_SIZE);
+		ib::error() << "Page size " << m_page_size.physical()
+			<< " of ibd file is not the same as the server page"
+			" size " << univ_page_size.physical();
 
 		return(DB_CORRUPTION);
 
-	} else if ((file_size % m_page_size)) {
+	} else if (file_size % m_page_size.physical() != 0) {
 
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"File size " UINT64PF " is not a multiple "
-			"of the page size %lu",
-			(ib_uint64_t) file_size, (ulong) m_page_size);
+		ib::error() << "File size " << file_size << " is not a"
+			" multiple of the page size "
+			<< m_page_size.physical();
 
 		return(DB_CORRUPTION);
 	}
@@ -596,11 +576,9 @@ AbstractCallback::init(
 	m_free_limit = mach_read_from_4(page + FSP_FREE_LIMIT);
 	m_space = mach_read_from_4(page + FSP_HEADER_OFFSET + FSP_SPACE_ID);
 
-	if ((err = set_current_xdes(0, page)) != DB_SUCCESS) {
-		return(err);
-	}
+	dberr_t	err = set_current_xdes(0, page);
 
-	return(DB_SUCCESS);
+	return(err);
 }
 
 /**
@@ -620,11 +598,11 @@ struct FetchIndexRootPages : public AbstractCallback {
 		ulint		m_page_no;	/*!< Root page number */
 	};
 
-	typedef std::vector<Index> Indexes;
+	typedef std::vector<Index, ut_allocator<Index> >	Indexes;
 
 	/** Constructor
-	@param trx - covering (user) transaction
-	@param table - table definition in server .*/
+	@param trx covering (user) transaction
+	@param table table definition in server .*/
 	FetchIndexRootPages(const dict_table_t* table, trx_t* trx)
 		:
 		AbstractCallback(trx),
@@ -641,8 +619,14 @@ struct FetchIndexRootPages : public AbstractCallback {
 	}
 
 	/**
-	Check if the .ibd file row format is the same as the table's.
-	@param ibd_table_flags - determined from space and page.
+	@retval the space flags of the tablespace being iterated over */
+	virtual ulint get_space_flags() const UNIV_NOTHROW
+	{
+		return(m_space_flags);
+	}
+
+	/** Check if the .ibd file row format is the same as the table's.
+	@param ibd_table_flags determined from space and page.
 	@return DB_SUCCESS or error code. */
 	dberr_t check_row_format(ulint ibd_table_flags) UNIV_NOTHROW
 	{
@@ -654,7 +638,7 @@ struct FetchIndexRootPages : public AbstractCallback {
 
 			ib_errf(m_trx->mysql_thd, IB_LOG_LEVEL_ERROR,
 				ER_TABLE_SCHEMA_MISMATCH,
-				".ibd file has invlad table flags: %lx",
+				".ibd file has invalid table flags: %lx",
 				ibd_table_flags);
 
 			return(DB_CORRUPTION);
@@ -667,8 +651,8 @@ struct FetchIndexRootPages : public AbstractCallback {
 
 			ib_errf(m_trx->mysql_thd, IB_LOG_LEVEL_ERROR,
 				ER_TABLE_SCHEMA_MISMATCH,
-				"Table has %s row format, .ibd "
-				"file has %s row format.",
+				"Table has %s row format, .ibd"
+				" file has %s row format.",
 				dict_tf_to_row_format_string(m_table->flags),
 				dict_tf_to_row_format_string(ibd_table_flags));
 
@@ -680,10 +664,9 @@ struct FetchIndexRootPages : public AbstractCallback {
 		return(err);
 	}
 
-	/**
-	Called for each block as it is read from the file.
-	@param offset - physical offset in the file
-	@param block - block to convert, it is not from the buffer pool.
+	/** Called for each block as it is read from the file.
+	@param offset physical offset in the file
+	@param block block to convert, it is not from the buffer pool.
 	@retval DB_SUCCESS or error code. */
 	virtual dberr_t operator() (
 		os_offset_t	offset,
@@ -700,13 +683,12 @@ struct FetchIndexRootPages : public AbstractCallback {
 	Indexes			m_indexes;
 };
 
-/**
-Called for each block as it is read from the file. Check index pages to
+/** Called for each block as it is read from the file. Check index pages to
 determine the exact row format. We can't get that from the tablespace
 header flags alone.
 
-@param offset - physical offset in the file
-@param block - block to convert, it is not from the buffer pool.
+@param offset physical offset in the file
+@param block block to convert, it is not from the buffer pool.
 @retval DB_SUCCESS or error code. */
 dberr_t
 FetchIndexRootPages::operator() (
@@ -723,30 +705,29 @@ FetchIndexRootPages::operator() (
 
 	ulint	page_type = fil_page_get_type(page);
 
-	if (block->page.offset * m_page_size != offset) {
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Page offset doesn't match file offset: "
-			"page offset: %lu, file offset: %lu",
-			(ulint) block->page.offset,
-			(ulint) (offset / m_page_size));
+	if (block->page.id.page_no() * m_page_size.physical() != offset) {
+
+		ib::error() << "Page offset doesn't match file offset:"
+			" page offset: " << block->page.id.page_no()
+			<< ", file offset: "
+			<< (offset / m_page_size.physical());
 
 		err = DB_CORRUPTION;
 	} else if (page_type == FIL_PAGE_TYPE_XDES) {
-		err = set_current_xdes(block->page.offset, page);
-	} else if (page_type == FIL_PAGE_INDEX
-		   && !is_free(block->page.offset)
+		err = set_current_xdes(block->page.id.page_no(), page);
+	} else if (fil_page_index_page_check(page)
+		   && !is_free(block->page.id.page_no())
 		   && is_root_page(page)) {
 
 		index_id_t	id = btr_page_get_index_id(page);
-		ulint		page_no = buf_block_get_page_no(block);
 
-		m_indexes.push_back(Index(id, page_no));
+		m_indexes.push_back(Index(id, block->page.id.page_no()));
 
 		if (m_indexes.size() == 1) {
 
-			m_table_flags = dict_sys_tables_type_to_tf(
+			m_table_flags = fsp_flags_to_dict_tf(
 				m_space_flags,
-				page_is_comp(page) ? DICT_N_COLS_COMPACT : 0);
+				page_is_comp(page) ? true : false);
 
 			err = check_row_format(m_table_flags);
 		}
@@ -764,23 +745,26 @@ FetchIndexRootPages::build_row_import(row_import* cfg) const UNIV_NOTHROW
 	Indexes::const_iterator end = m_indexes.end();
 
 	ut_a(cfg->m_table == m_table);
-	cfg->m_page_size = m_page_size;
+	cfg->m_page_size.copy_from(m_page_size);
 	cfg->m_n_indexes = m_indexes.size();
 
 	if (cfg->m_n_indexes == 0) {
 
-		ib_logf(IB_LOG_LEVEL_ERROR, "No B+Tree found in tablespace");
+		ib::error() << "No B+Tree found in tablespace";
 
 		return(DB_CORRUPTION);
 	}
 
-	cfg->m_indexes = new(std::nothrow) row_index_t[cfg->m_n_indexes];
+	cfg->m_indexes = UT_NEW_ARRAY_NOKEY(row_index_t, cfg->m_n_indexes);
 
 	/* Trigger OOM */
-	DBUG_EXECUTE_IF("ib_import_OOM_11",
-			delete [] cfg->m_indexes; cfg->m_indexes = 0;);
+	DBUG_EXECUTE_IF(
+		"ib_import_OOM_11",
+		UT_DELETE_ARRAY(cfg->m_indexes);
+		cfg->m_indexes = NULL;
+	);
 
-	if (cfg->m_indexes == 0) {
+	if (cfg->m_indexes == NULL) {
 		return(DB_OUT_OF_MEMORY);
 	}
 
@@ -798,14 +782,16 @@ FetchIndexRootPages::build_row_import(row_import* cfg) const UNIV_NOTHROW
 
 		ulint	len = strlen(name) + 1;
 
-		cfg_index->m_name = new(std::nothrow) byte[len];
+		cfg_index->m_name = UT_NEW_ARRAY_NOKEY(byte, len);
 
 		/* Trigger OOM */
-		DBUG_EXECUTE_IF("ib_import_OOM_12",
-				delete [] cfg_index->m_name;
-				cfg_index->m_name = 0;);
+		DBUG_EXECUTE_IF(
+			"ib_import_OOM_12",
+			UT_DELETE_ARRAY(cfg_index->m_name);
+			cfg_index->m_name = NULL;
+		);
 
-		if (cfg_index->m_name == 0) {
+		if (cfg_index->m_name == NULL) {
 			return(DB_OUT_OF_MEMORY);
 		}
 
@@ -849,8 +835,8 @@ tablespace file.
 class PageConverter : public AbstractCallback {
 public:
 	/** Constructor
-	* @param cfg - config of table being imported.
-	* @param trx - transaction covering the import */
+	@param cfg config of table being imported.
+	@param trx transaction covering the import */
 	PageConverter(row_import* cfg, trx_t* trx) UNIV_NOTHROW;
 
 	virtual ~PageConverter() UNIV_NOTHROW
@@ -868,9 +854,15 @@ public:
 	}
 
 	/**
-	Called for each block as it is read from the file.
-	@param offset - physical offset in the file
-	@param block - block to convert, it is not from the buffer pool.
+	@retval the space flags of the tablespace being iterated over */
+	virtual ulint get_space_flags() const UNIV_NOTHROW
+	{
+		return(m_space_flags);
+	}
+
+	/** Called for each block as it is read from the file.
+	@param offset physical offset in the file
+	@param block block to convert, it is not from the buffer pool.
 	@retval DB_SUCCESS or error code. */
 	virtual dberr_t operator() (
 		os_offset_t	offset,
@@ -884,16 +876,15 @@ private:
 		IMPORT_PAGE_STATUS_CORRUPTED	/*!< Page is corrupted */
 	};
 
-	/**
-	Update the page, set the space id, max trx id and index id.
-	@param block - block read from file
-	@param page_type - type of the page
+	/** Update the page, set the space id, max trx id and index id.
+	@param block block read from file
+	@param page_type type of the page
 	@retval DB_SUCCESS or error code */
 	dberr_t update_page(
 		buf_block_t*	block,
 		ulint&		page_type) UNIV_NOTHROW;
 
-#if defined UNIV_DEBUG
+#ifdef UNIV_DEBUG
 	/**
 	@return true error condition is enabled. */
 	bool trigger_corruption() UNIV_NOTHROW
@@ -904,77 +895,69 @@ private:
 #define trigger_corruption()	(false)
 #endif /* UNIV_DEBUG */
 
-	/**
-	Update the space, index id, trx id.
-	@param block - block to convert
+	/** Update the space, index id, trx id.
+	@param block block to convert
 	@return DB_SUCCESS or error code */
 	dberr_t	update_index_page(buf_block_t*	block) UNIV_NOTHROW;
 
 	/** Update the BLOB refrences and write UNDO log entries for
 	rows that can't be purged optimistically.
-	@param block - block to update
+	@param block block to update
 	@retval DB_SUCCESS or error code */
 	dberr_t	update_records(buf_block_t* block) UNIV_NOTHROW;
 
-	/**
-	Validate the page, check for corruption.
-	@param offset - physical offset within file.
-	@param page - page read from file.
+	/** Validate the page, check for corruption.
+	@param offset physical offset within file.
+	@param page page read from file.
 	@return 0 on success, 1 if all zero, 2 if corrupted */
 	import_page_status_t validate(
 		os_offset_t	offset,
 		buf_block_t*	page) UNIV_NOTHROW;
 
-	/**
-	Validate the space flags and update tablespace header page.
-	@param block - block read from file, not from the buffer pool.
+	/** Validate the space flags and update tablespace header page.
+	@param block block read from file, not from the buffer pool.
 	@retval DB_SUCCESS or error code */
 	dberr_t	update_header(buf_block_t* block) UNIV_NOTHROW;
 
-	/**
-	Adjust the BLOB reference for a single column that is externally stored
-	@param rec - record to update
-	@param offsets - column offsets for the record
-	@param i - column ordinal value
+	/** Adjust the BLOB reference for a single column that is externally stored
+	@param rec record to update
+	@param offsets column offsets for the record
+	@param i column ordinal value
 	@return DB_SUCCESS or error code */
 	dberr_t	adjust_cluster_index_blob_column(
 		rec_t*		rec,
 		const ulint*	offsets,
 		ulint		i) UNIV_NOTHROW;
 
-	/**
-	Adjusts the BLOB reference in the clustered index row for all
+	/** Adjusts the BLOB reference in the clustered index row for all
 	externally stored columns.
-	@param rec - record to update
-	@param offsets - column offsets for the record
+	@param rec record to update
+	@param offsets column offsets for the record
 	@return DB_SUCCESS or error code */
 	dberr_t	adjust_cluster_index_blob_columns(
 		rec_t*		rec,
 		const ulint*	offsets) UNIV_NOTHROW;
 
-	/**
-	In the clustered index, adjist the BLOB pointers as needed.
+	/** In the clustered index, adjist the BLOB pointers as needed.
 	Also update the BLOB reference, write the new space id.
-	@param rec - record to update
-	@param offsets - column offsets for the record
+	@param rec record to update
+	@param offsets column offsets for the record
 	@return DB_SUCCESS or error code */
 	dberr_t	adjust_cluster_index_blob_ref(
 		rec_t*		rec,
 		const ulint*	offsets) UNIV_NOTHROW;
 
-	/**
-	Purge delete-marked records, only if it is possible to do
+	/** Purge delete-marked records, only if it is possible to do
 	so without re-organising the B+tree.
-	@param offsets - current row offsets.
+	@param offsets current row offsets.
 	@retval true if purged */
 	bool	purge(const ulint* offsets) UNIV_NOTHROW;
 
-	/**
-	Adjust the BLOB references and sys fields for the current record.
-	@param index - the index being converted
-	@param rec - record to update
-	@param offsets - column offsets for the record
-	@param deleted - true if row is delete marked
+	/** Adjust the BLOB references and sys fields for the current record.
+	@param index the index being converted
+	@param rec record to update
+	@param offsets column offsets for the record
+	@param deleted true if row is delete marked
 	@return DB_SUCCESS or error code. */
 	dberr_t	adjust_cluster_record(
 		const dict_index_t*	index,
@@ -982,8 +965,7 @@ private:
 		const ulint*		offsets,
 		bool			deleted) UNIV_NOTHROW;
 
-	/**
-	Find an index with the matching id.
+	/** Find an index with the matching id.
 	@return row_index_t* instance or 0 */
 	row_index_t* find_index(index_id_t id) UNIV_NOTHROW
 	{
@@ -1032,9 +1014,9 @@ row_import destructor. */
 row_import::~row_import() UNIV_NOTHROW
 {
 	for (ulint i = 0; m_indexes != 0 && i < m_n_indexes; ++i) {
-		delete [] m_indexes[i].m_name;
+		UT_DELETE_ARRAY(m_indexes[i].m_name);
 
-		if (m_indexes[i].m_fields == 0) {
+		if (m_indexes[i].m_fields == NULL) {
 			continue;
 		}
 
@@ -1042,26 +1024,25 @@ row_import::~row_import() UNIV_NOTHROW
 		ulint		n_fields = m_indexes[i].m_n_fields;
 
 		for (ulint j = 0; j < n_fields; ++j) {
-			delete [] fields[j].name;
+			UT_DELETE_ARRAY(const_cast<char*>(fields[j].name()));
 		}
 
-		delete [] fields;
+		UT_DELETE_ARRAY(fields);
 	}
 
 	for (ulint i = 0; m_col_names != 0 && i < m_n_cols; ++i) {
-		delete [] m_col_names[i];
+		UT_DELETE_ARRAY(m_col_names[i]);
 	}
 
-	delete [] m_cols;
-	delete [] m_indexes;
-	delete [] m_col_names;
-	delete [] m_table_name;
-	delete [] m_hostname;
+	UT_DELETE_ARRAY(m_cols);
+	UT_DELETE_ARRAY(m_indexes);
+	UT_DELETE_ARRAY(m_col_names);
+	UT_DELETE_ARRAY(m_table_name);
+	UT_DELETE_ARRAY(m_hostname);
 }
 
-/**
-Find the index entry in in the indexes array.
-@param name - index name
+/** Find the index entry in in the indexes array.
+@param name index name
 @return instance if found else 0. */
 row_index_t*
 row_import::get_index(
@@ -1082,9 +1063,8 @@ row_import::get_index(
 	return(0);
 }
 
-/**
-Get the number of rows in the index.
-@param name - index name
+/** Get the number of rows in the index.
+@param name index name
 @return number of rows (doesn't include delete marked rows). */
 ulint
 row_import::get_n_rows(
@@ -1097,9 +1077,8 @@ row_import::get_n_rows(
 	return(index->m_stats.m_n_rows);
 }
 
-/**
-Get the number of rows for which purge failed uding the convert phase.
-@param name - index name
+/** Get the number of rows for which purge failed uding the convert phase.
+@param name index name
 @return number of rows for which purge failed. */
 ulint
 row_import::get_n_purge_failed(
@@ -1112,9 +1091,8 @@ row_import::get_n_purge_failed(
 	return(index->m_stats.m_n_purge_failed);
 }
 
-/**
-Find the ordinal value of the column name in the cfg table columns.
-@param name - of column to look for.
+/** Find the ordinal value of the column name in the cfg table columns.
+@param name of column to look for.
 @return ULINT_UNDEFINED if not found. */
 ulint
 row_import::find_col(
@@ -1149,9 +1127,9 @@ row_import::match_index_columns(
 
 	if (cfg_index == 0) {
 		ib_errf(thd, IB_LOG_LEVEL_ERROR,
-			 ER_TABLE_SCHEMA_MISMATCH,
-			 "Index %s not found in tablespace meta-data file.",
-			 index->name);
+			ER_TABLE_SCHEMA_MISMATCH,
+			"Index %s not found in tablespace meta-data file.",
+			index->name());
 
 		return(DB_ERROR);
 	}
@@ -1159,11 +1137,11 @@ row_import::match_index_columns(
 	if (cfg_index->m_n_fields != index->n_fields) {
 
 		ib_errf(thd, IB_LOG_LEVEL_ERROR,
-			 ER_TABLE_SCHEMA_MISMATCH,
-			 "Index field count %lu doesn't match"
-			 " tablespace metadata file value %lu",
-			 (ulong) index->n_fields,
-			 (ulong) cfg_index->m_n_fields);
+			ER_TABLE_SCHEMA_MISMATCH,
+			"Index field count %lu doesn't match"
+			" tablespace metadata file value %lu",
+			(ulong) index->n_fields,
+			(ulong) cfg_index->m_n_fields);
 
 		return(DB_ERROR);
 	}
@@ -1175,39 +1153,39 @@ row_import::match_index_columns(
 
 	for (ulint i = 0; i < index->n_fields; ++i, ++field, ++cfg_field) {
 
-		if (strcmp(field->name, cfg_field->name) != 0) {
+		if (strcmp(field->name(), cfg_field->name()) != 0) {
 			ib_errf(thd, IB_LOG_LEVEL_ERROR,
-				 ER_TABLE_SCHEMA_MISMATCH,
-				 "Index field name %s doesn't match"
-				 " tablespace metadata field name %s"
-				 " for field position %lu",
-				 field->name, cfg_field->name, (ulong) i);
+				ER_TABLE_SCHEMA_MISMATCH,
+				"Index field name %s doesn't match"
+				" tablespace metadata field name %s"
+				" for field position %lu",
+				field->name(), cfg_field->name(), (ulong) i);
 
 			err = DB_ERROR;
 		}
 
 		if (cfg_field->prefix_len != field->prefix_len) {
 			ib_errf(thd, IB_LOG_LEVEL_ERROR,
-				 ER_TABLE_SCHEMA_MISMATCH,
-				 "Index %s field %s prefix len %lu"
-				 " doesn't match metadata file value"
-				 " %lu",
-				 index->name, field->name,
-				 (ulong) field->prefix_len,
-				 (ulong) cfg_field->prefix_len);
+				ER_TABLE_SCHEMA_MISMATCH,
+				"Index %s field %s prefix len %lu"
+				" doesn't match metadata file value"
+				" %lu",
+				index->name(), field->name(),
+				(ulong) field->prefix_len,
+				(ulong) cfg_field->prefix_len);
 
 			err = DB_ERROR;
 		}
 
 		if (cfg_field->fixed_len != field->fixed_len) {
 			ib_errf(thd, IB_LOG_LEVEL_ERROR,
-				 ER_TABLE_SCHEMA_MISMATCH,
-				 "Index %s field %s fixed len %lu"
-				 " doesn't match metadata file value"
-				 " %lu",
-				 index->name, field->name,
-				 (ulong) field->fixed_len,
-				 (ulong) cfg_field->fixed_len);
+				ER_TABLE_SCHEMA_MISMATCH,
+				"Index %s field %s fixed len %lu"
+				" doesn't match metadata file value"
+				" %lu",
+				index->name(), field->name(),
+				(ulong) field->fixed_len,
+				(ulong) cfg_field->fixed_len);
 
 			err = DB_ERROR;
 		}
@@ -1216,10 +1194,9 @@ row_import::match_index_columns(
 	return(err);
 }
 
-/**
-Check if the table schema that was read from the .cfg file matches the
+/** Check if the table schema that was read from the .cfg file matches the
 in memory table definition.
-@param thd - MySQL session variable
+@param thd MySQL session variable
 @return DB_SUCCESS or error code. */
 dberr_t
 row_import::match_table_columns(
@@ -1250,9 +1227,9 @@ row_import::match_table_columns(
 
 			ib_errf(thd, IB_LOG_LEVEL_ERROR,
 				 ER_TABLE_SCHEMA_MISMATCH,
-				 "Column %s ordinal value mismatch, it's at "
-				 "%lu in the table and %lu in the tablespace "
-				 "meta-data file",
+				 "Column %s ordinal value mismatch, it's at"
+				 " %lu in the table and %lu in the tablespace"
+				 " meta-data file",
 				 col_name,
 				 (ulong) col->ind, (ulong) cfg_col_index);
 
@@ -1326,10 +1303,9 @@ row_import::match_table_columns(
 	return(err);
 }
 
-/**
-Check if the table (and index) schema that was read from the .cfg file
+/** Check if the table (and index) schema that was read from the .cfg file
 matches the in memory table definition.
-@param thd - MySQL session variable
+@param thd MySQL session variable
 @return DB_SUCCESS or error code. */
 dberr_t
 row_import::match_schema(
@@ -1339,16 +1315,16 @@ row_import::match_schema(
 
 	if (m_flags != m_table->flags) {
 		ib_errf(thd, IB_LOG_LEVEL_ERROR, ER_TABLE_SCHEMA_MISMATCH,
-			 "Table flags don't match, server table has 0x%lx "
-			 "and the meta-data file has 0x%lx",
+			 "Table flags don't match, server table has 0x%lx"
+			 " and the meta-data file has 0x%lx",
 			 (ulong) m_table->n_cols, (ulong) m_flags);
 
 		return(DB_ERROR);
 	} else if (m_table->n_cols != m_n_cols) {
 		ib_errf(thd, IB_LOG_LEVEL_ERROR, ER_TABLE_SCHEMA_MISMATCH,
-			 "Number of columns don't match, table has %lu "
-			 "columns but the tablespace meta-data file has "
-			 "%lu columns",
+			 "Number of columns don't match, table has %lu"
+			 " columns but the tablespace meta-data file has"
+			 " %lu columns",
 			 (ulong) m_table->n_cols, (ulong) m_n_cols);
 
 		return(DB_ERROR);
@@ -1359,9 +1335,9 @@ row_import::match_schema(
 		table matching the IMPORT definition. */
 
 		ib_errf(thd, IB_LOG_LEVEL_ERROR, ER_TABLE_SCHEMA_MISMATCH,
-			 "Number of indexes don't match, table has %lu "
-			 "indexes but the tablespace meta-data file has "
-			 "%lu indexes",
+			 "Number of indexes don't match, table has %lu"
+			 " indexes but the tablespace meta-data file has"
+			 " %lu indexes",
 			 (ulong) UT_LIST_GET_LEN(m_table->indexes),
 			 (ulong) m_n_indexes);
 
@@ -1433,17 +1409,9 @@ row_import::set_root_by_heuristic() UNIV_NOTHROW
 
 	if (UT_LIST_GET_LEN(m_table->indexes) != m_n_indexes) {
 
-		char	table_name[MAX_FULL_NAME_LEN + 1];
-
-		innobase_format_name(
-			table_name, sizeof(table_name), m_table->name, FALSE);
-
-		ib_logf(IB_LOG_LEVEL_WARN,
-			"Table %s should have %lu indexes but the tablespace "
-			"has %lu indexes",
-			table_name,
-			UT_LIST_GET_LEN(m_table->indexes),
-			m_n_indexes);
+		ib::warn() << "Table " << m_table->name << " should have "
+			<< UT_LIST_GET_LEN(m_table->indexes) << " indexes but"
+			" the tablespace has " << m_n_indexes << " indexes";
 	}
 
 	dict_mutex_enter_for_mysql();
@@ -1457,22 +1425,23 @@ row_import::set_root_by_heuristic() UNIV_NOTHROW
 
 		if (index->type & DICT_FTS) {
 			index->type |= DICT_CORRUPT;
-			ib_logf(IB_LOG_LEVEL_WARN,
-				"Skipping FTS index: %s", index->name);
+			ib::warn() << "Skipping FTS index: " << index->name;
 		} else if (i < m_n_indexes) {
 
-			delete [] cfg_index[i].m_name;
+			UT_DELETE_ARRAY(cfg_index[i].m_name);
 
 			ulint	len = strlen(index->name) + 1;
 
-			cfg_index[i].m_name = new(std::nothrow) byte[len];
+			cfg_index[i].m_name = UT_NEW_ARRAY_NOKEY(byte, len);
 
 			/* Trigger OOM */
-			DBUG_EXECUTE_IF("ib_import_OOM_14",
-					delete[] cfg_index[i].m_name;
-					cfg_index[i].m_name = 0;);
+			DBUG_EXECUTE_IF(
+				"ib_import_OOM_14",
+				UT_DELETE_ARRAY(cfg_index[i].m_name);
+				cfg_index[i].m_name = NULL;
+			);
 
-			if (cfg_index[i].m_name == 0) {
+			if (cfg_index[i].m_name == NULL) {
 				err = DB_OUT_OF_MEMORY;
 				break;
 			}
@@ -1593,14 +1562,15 @@ IndexPurge::purge_pessimistic_delete() UNIV_NOTHROW
 {
 	dberr_t	err;
 
-	btr_pcur_restore_position(BTR_MODIFY_TREE, &m_pcur, &m_mtr);
+	btr_pcur_restore_position(BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE,
+				  &m_pcur, &m_mtr);
 
 	ut_ad(rec_get_deleted_flag(
 			btr_pcur_get_rec(&m_pcur),
 			dict_table_is_comp(m_index->table)));
 
 	btr_cur_pessimistic_delete(
-		&err, FALSE, btr_pcur_get_btr_cur(&m_pcur), 0, RB_NONE, &m_mtr);
+		&err, FALSE, btr_pcur_get_btr_cur(&m_pcur), 0, false, &m_mtr);
 
 	ut_a(err == DB_SUCCESS);
 
@@ -1624,10 +1594,9 @@ IndexPurge::purge() UNIV_NOTHROW
 	btr_pcur_restore_position(BTR_MODIFY_LEAF, &m_pcur, &m_mtr);
 }
 
-/**
-Constructor
-* @param cfg - config of table being imported.
-* @param trx - transaction covering the import */
+/** Constructor
+@param cfg config of table being imported.
+@param trx transaction covering the import */
 PageConverter::PageConverter(
 	row_import*	cfg,
 	trx_t*		trx)
@@ -1648,11 +1617,10 @@ PageConverter::PageConverter(
 	m_cluster_index = dict_table_get_first_index(m_cfg->m_table);
 }
 
-/**
-Adjust the BLOB reference for a single column that is externally stored
-@param rec - record to update
-@param offsets - column offsets for the record
-@param i - column ordinal value
+/** Adjust the BLOB reference for a single column that is externally stored
+@param rec record to update
+@param offsets column offsets for the record
+@param i column ordinal value
 @return DB_SUCCESS or error code */
 dberr_t
 PageConverter::adjust_cluster_index_blob_column(
@@ -1670,17 +1638,11 @@ PageConverter::adjust_cluster_index_blob_column(
 
 	if (len < BTR_EXTERN_FIELD_REF_SIZE) {
 
-		char index_name[MAX_FULL_NAME_LEN + 1];
-
-		innobase_format_name(
-			index_name, sizeof(index_name),
-			m_cluster_index->name, TRUE);
-
 		ib_errf(m_trx->mysql_thd, IB_LOG_LEVEL_ERROR,
 			ER_INNODB_INDEX_CORRUPT,
-			"Externally stored column(%lu) has a reference "
-			"length of %lu in the cluster index %s",
-			(ulong) i, (ulong) len, index_name);
+			"Externally stored column(%lu) has a reference"
+			" length of %lu in the cluster index %s",
+			(ulong) i, (ulong) len, m_cluster_index->name());
 
 		return(DB_CORRUPTION);
 	}
@@ -1699,11 +1661,10 @@ PageConverter::adjust_cluster_index_blob_column(
 	return(DB_SUCCESS);
 }
 
-/**
-Adjusts the BLOB reference in the clustered index row for all externally
+/** Adjusts the BLOB reference in the clustered index row for all externally
 stored columns.
-@param rec - record to update
-@param offsets - column offsets for the record
+@param rec record to update
+@param offsets column offsets for the record
 @return DB_SUCCESS or error code */
 dberr_t
 PageConverter::adjust_cluster_index_blob_columns(
@@ -1732,11 +1693,10 @@ PageConverter::adjust_cluster_index_blob_columns(
 	return(DB_SUCCESS);
 }
 
-/**
-In the clustered index, adjust BLOB pointers as needed. Also update the
+/** In the clustered index, adjust BLOB pointers as needed. Also update the
 BLOB reference, write the new space id.
-@param rec - record to update
-@param offsets - column offsets for the record
+@param rec record to update
+@param offsets column offsets for the record
 @return DB_SUCCESS or error code */
 dberr_t
 PageConverter::adjust_cluster_index_blob_ref(
@@ -1756,10 +1716,9 @@ PageConverter::adjust_cluster_index_blob_ref(
 	return(DB_SUCCESS);
 }
 
-/**
-Purge delete-marked records, only if it is possible to do so without
+/** Purge delete-marked records, only if it is possible to do so without
 re-organising the B+tree.
-@param offsets - current row offsets.
+@param offsets current row offsets.
 @return true if purge succeeded */
 bool
 PageConverter::purge(const ulint* offsets) UNIV_NOTHROW
@@ -1779,11 +1738,10 @@ PageConverter::purge(const ulint* offsets) UNIV_NOTHROW
 	return(false);
 }
 
-/**
-Adjust the BLOB references and sys fields for the current record.
-@param rec - record to update
-@param offsets - column offsets for the record
-@param deleted - true if row is delete marked
+/** Adjust the BLOB references and sys fields for the current record.
+@param rec record to update
+@param offsets column offsets for the record
+@param deleted true if row is delete marked
 @return DB_SUCCESS or error code. */
 dberr_t
 PageConverter::adjust_cluster_record(
@@ -1808,10 +1766,9 @@ PageConverter::adjust_cluster_record(
 	return(err);
 }
 
-/**
-Update the BLOB refrences and write UNDO log entries for
+/** Update the BLOB refrences and write UNDO log entries for
 rows that can't be purged optimistically.
-@param block - block to update
+@param block block to update
 @retval DB_SUCCESS or error code */
 dberr_t
 PageConverter::update_records(
@@ -1879,8 +1836,7 @@ PageConverter::update_records(
 	return(DB_SUCCESS);
 }
 
-/**
-Update the space, index id, trx id.
+/** Update the space, index id, trx id.
 @return DB_SUCCESS or error code */
 dberr_t
 PageConverter::update_index_page(
@@ -1889,7 +1845,7 @@ PageConverter::update_index_page(
 	index_id_t	id;
 	buf_frame_t*	page = block->frame;
 
-	if (is_free(buf_block_get_page_no(block))) {
+	if (is_free(block->page.id.page_no())) {
 		return(DB_SUCCESS);
 	} else if ((id = btr_page_get_index_id(page)) != m_index->m_id) {
 
@@ -1938,31 +1894,27 @@ PageConverter::update_index_page(
 	return(update_records(block));
 }
 
-/**
-Validate the space flags and update tablespace header page.
-@param block - block read from file, not from the buffer pool.
+/** Validate the space flags and update tablespace header page.
+@param block block read from file, not from the buffer pool.
 @retval DB_SUCCESS or error code */
 dberr_t
 PageConverter::update_header(
 	buf_block_t*	block) UNIV_NOTHROW
 {
 	/* Check for valid header */
-	switch(fsp_header_get_space_id(get_frame(block))) {
+	switch (fsp_header_get_space_id(get_frame(block))) {
 	case 0:
 		return(DB_CORRUPTION);
 	case ULINT_UNDEFINED:
-		ib_logf(IB_LOG_LEVEL_WARN,
-			"Space id check in the header failed "
-			"- ignored");
+		ib::warn() << "Space id check in the header failed: ignored";
 	}
 
-	ulint		space_flags = fsp_header_get_flags(get_frame(block));
+	ulint	space_flags = fsp_header_get_flags(get_frame(block));
 
 	if (!fsp_flags_is_valid(space_flags)) {
 
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Unsupported tablespace format %lu",
-			(ulong) space_flags);
+		ib::error() <<  "Unsupported tablespace format "
+			<< space_flags;
 
 		return(DB_UNSUPPORTED);
 	}
@@ -1970,7 +1922,6 @@ PageConverter::update_header(
 	mach_write_to_8(
 		get_frame(block) + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION,
 		m_current_lsn);
-
 	/* Write space_id to the tablespace header, page 0. */
 	mach_write_to_4(
 		get_frame(block) + FSP_HEADER_OFFSET + FSP_SPACE_ID,
@@ -1984,9 +1935,8 @@ PageConverter::update_header(
 	return(DB_SUCCESS);
 }
 
-/**
-Update the page, set the space id, max trx id and index id.
-@param block - block read from file
+/** Update the page, set the space id, max trx id and index id.
+@param block block read from file
 @retval DB_SUCCESS or error code */
 dberr_t
 PageConverter::update_page(
@@ -1997,11 +1947,12 @@ PageConverter::update_page(
 
 	switch (page_type = fil_page_get_type(get_frame(block))) {
 	case FIL_PAGE_TYPE_FSP_HDR:
+		ut_a(block->page.id.page_no() == 0);
 		/* Work directly on the uncompressed page headers. */
-		ut_a(buf_block_get_page_no(block) == 0);
 		return(update_header(block));
 
 	case FIL_PAGE_INDEX:
+	case FIL_PAGE_RTREE:
 		/* We need to decompress the contents into block->frame
 		before we can do any thing with Btree pages. */
 
@@ -2023,7 +1974,7 @@ PageConverter::update_page(
 
 	case FIL_PAGE_TYPE_XDES:
 		err = set_current_xdes(
-			buf_block_get_page_no(block), get_frame(block));
+			block->page.id.page_no(), get_frame(block));
 	case FIL_PAGE_INODE:
 	case FIL_PAGE_TYPE_TRX_SYS:
 	case FIL_PAGE_IBUF_FREE_LIST:
@@ -2042,15 +1993,14 @@ PageConverter::update_page(
 		return(err);
 	}
 
-	ib_logf(IB_LOG_LEVEL_WARN, "Unknown page type (%lu)", page_type);
+	ib::warn() << "Unknown page type (" << page_type << ")";
 
 	return(DB_CORRUPTION);
 }
 
-/**
-Validate the page
-@param offset - physical offset within file.
-@param page - page read from file.
+/** Validate the page
+@param offset physical offset within file.
+@param page page read from file.
 @return status */
 PageConverter::import_page_status_t
 PageConverter::validate(
@@ -2063,51 +2013,28 @@ PageConverter::validate(
 	the file. Flag as corrupt if it doesn't. Disable the check
 	for LSN in buf_page_is_corrupted() */
 
-	if (buf_page_is_corrupted(false, page, get_zip_size())
-	    || (page_get_page_no(page) != offset / m_page_size
+	if (buf_page_is_corrupted(
+		false, page, get_page_size(),
+		fsp_is_checksum_disabled(block->page.id.space()))
+	    || (page_get_page_no(page) != offset / m_page_size.physical()
 		&& page_get_page_no(page) != 0)) {
 
 		return(IMPORT_PAGE_STATUS_CORRUPTED);
 
 	} else if (offset > 0 && page_get_page_no(page) == 0) {
-		ulint		checksum;
 
-		checksum = mach_read_from_4(page + FIL_PAGE_SPACE_OR_CHKSUM);
-		if (checksum != 0) {
-			/* Checksum check passed in buf_page_is_corrupted(). */
-			ib_logf(IB_LOG_LEVEL_WARN,
-				"%s: Page %lu checksum %lu should be zero.",
-				m_filepath, (ulong) (offset / m_page_size),
-				checksum);
-		}
-
-		const byte*	b = page + FIL_PAGE_OFFSET;
-		const byte*	e = page + m_page_size
-				    - FIL_PAGE_END_LSN_OLD_CHKSUM;
-
-		/* If the page number is zero and offset > 0 then
-		the entire page MUST consist of zeroes. If not then
-		we flag it as corrupt. */
-
-		while (b != e) {
-
-			if (*b++ && !trigger_corruption()) {
-				return(IMPORT_PAGE_STATUS_CORRUPTED);
-			}
-		}
-
-		/* The page is all zero: do nothing. */
+		/* The page is all zero: do nothing. We already checked
+		for all NULs in buf_page_is_corrupted() */
 		return(IMPORT_PAGE_STATUS_ALL_ZERO);
 	}
 
 	return(IMPORT_PAGE_STATUS_OK);
 }
 
-/**
-Called for every page in the tablespace. If the page was not
+/** Called for every page in the tablespace. If the page was not
 updated then its state must be set to BUF_PAGE_NOT_USED.
-@param offset - physical offset within the file
-@param block - block read from file, note it is not from the buffer pool
+@param offset physical offset within the file
+@param block block read from file, note it is not from the buffer pool
 @retval DB_SUCCESS or error code. */
 dberr_t
 PageConverter::operator() (
@@ -2127,7 +2054,7 @@ PageConverter::operator() (
 		ut_ad(m_page_zip_ptr == 0);
 	}
 
-	switch(validate(offset, block)) {
+	switch (validate(offset, block)) {
 	case IMPORT_PAGE_STATUS_OK:
 
 		/* We have to decompress the compressed pages before
@@ -2143,19 +2070,23 @@ PageConverter::operator() (
 		out the descriptor contents and not block->frame for compressed
 		pages. */
 
-		if (!is_compressed_table() || page_type == FIL_PAGE_INDEX) {
+		if (!is_compressed_table()
+		    || fil_page_type_is_index(page_type)) {
 
 			buf_flush_init_for_writing(
+				!is_compressed_table() ? block : NULL,
 				!is_compressed_table()
 				? block->frame : block->page.zip.data,
 				!is_compressed_table() ? 0 : m_page_zip_ptr,
-				m_current_lsn);
+				m_current_lsn,
+				fsp_is_checksum_disabled(
+					block->page.id.space()));
 		} else {
 			/* Calculate and update the checksum of non-btree
 			pages for compressed tables explicitly here. */
 
 			buf_flush_update_zip_checksum(
-				get_frame(block), get_zip_size(),
+				get_frame(block), get_page_size().physical(),
 				m_current_lsn);
 		}
 
@@ -2167,9 +2098,9 @@ PageConverter::operator() (
 
 	case IMPORT_PAGE_STATUS_CORRUPTED:
 
-		ib_logf(IB_LOG_LEVEL_WARN,
-			"%s: Page %lu at offset " UINT64PF " looks corrupted.",
-			m_filepath, (ulong) (offset / m_page_size), offset);
+		ib::warn() << "Page " << (offset / m_page_size.physical())
+			<< " at offset " << offset
+			<< " looks corrupted in file " << m_filepath;
 
 		return(DB_CORRUPTION);
 	}
@@ -2195,15 +2126,9 @@ row_import_discard_changes(
 
 	prebuilt->trx->error_info = NULL;
 
-	char	table_name[MAX_FULL_NAME_LEN + 1];
-
-	innobase_format_name(
-		table_name, sizeof(table_name),
-		prebuilt->table->name, FALSE);
-
-	ib_logf(IB_LOG_LEVEL_INFO,
-		"Discarding tablespace of table %s: %s",
-		table_name, ut_strerr(err));
+	ib::info() << "Discarding tablespace of table "
+		<< prebuilt->table->name
+		<< ": " << ut_strerr(err);
 
 	if (trx->dict_operation_lock_mode != RW_X_LATCH) {
 		ut_a(trx->dict_operation_lock_mode == 0);
@@ -2252,6 +2177,9 @@ row_import_cleanup(
 
 	trx_commit_for_mysql(trx);
 
+	prebuilt->table->encryption_key = NULL;
+	prebuilt->table->encryption_iv = NULL;
+
 	row_mysql_unlock_data_dictionary(trx);
 
 	trx_free_for_mysql(trx);
@@ -2280,7 +2208,7 @@ row_import_error(
 
 		innobase_format_name(
 			table_name, sizeof(table_name),
-			prebuilt->table->name, FALSE);
+			prebuilt->table->name.m_name);
 
 		ib_senderrf(
 			trx->mysql_thd, IB_LOG_LEVEL_WARN,
@@ -2321,11 +2249,6 @@ row_import_adjust_root_pages_of_secondary_indexes(
 
 	/* Adjust the root pages of the secondary indexes only. */
 	while ((index = dict_table_get_next_index(index)) != NULL) {
-		char		index_name[MAX_FULL_NAME_LEN + 1];
-
-		innobase_format_name(
-			index_name, sizeof(index_name), index->name, TRUE);
-
 		ut_a(!dict_index_is_clust(index));
 
 		if (!(index->type & DICT_CORRUPT)
@@ -2337,9 +2260,8 @@ row_import_adjust_root_pages_of_secondary_indexes(
 
 			err = btr_root_adjust_on_import(index);
 		} else {
-			ib_logf(IB_LOG_LEVEL_WARN,
-				"Skip adjustment of root pages for "
-				"index %s.", index->name);
+			ib::warn() << "Skip adjustment of root pages for"
+				" index " << index->name << ".";
 
 			err = DB_CORRUPTION;
 		}
@@ -2353,9 +2275,9 @@ row_import_adjust_root_pages_of_secondary_indexes(
 			ib_errf(trx->mysql_thd,
 				IB_LOG_LEVEL_WARN,
 				ER_INNODB_INDEX_CORRUPT,
-				"Index '%s' not found or corrupt, "
-				"you should recreate this index.",
-				index_name);
+				"Index %s not found or corrupt,"
+				" you should recreate this index.",
+				index->name());
 
 			/* Do not bail out, so that the data
 			can be recovered. */
@@ -2390,9 +2312,9 @@ row_import_adjust_root_pages_of_secondary_indexes(
 			ib_errf(trx->mysql_thd,
 				IB_LOG_LEVEL_WARN,
 				ER_INNODB_INDEX_CORRUPT,
-				"Index '%s' contains %lu entries, "
-				"should be %lu, you should recreate "
-				"this index.", index_name,
+				"Index %s contains %lu entries,"
+				" should be %lu, you should recreate"
+				" this index.", index->name(),
 				(ulong) purge.get_n_rows(),
 				(ulong) n_rows_in_table);
 
@@ -2485,16 +2407,11 @@ row_import_set_sys_max_row_id(
 			err = DB_CORRUPTION;);
 
 	if (err != DB_SUCCESS) {
-		char		index_name[MAX_FULL_NAME_LEN + 1];
-
-		innobase_format_name(
-			index_name, sizeof(index_name), index->name, TRUE);
-
 		ib_errf(prebuilt->trx->mysql_thd,
 			IB_LOG_LEVEL_WARN,
 			ER_INNODB_INDEX_CORRUPT,
-			"Index '%s' corruption detected, invalid DB_ROW_ID "
-			"in index.", index_name);
+			"Index `%s` corruption detected, invalid DB_ROW_ID"
+			" in index.", index->name());
 
 		return(err);
 
@@ -2573,13 +2490,16 @@ row_import_cfg_read_index_fields(
 	byte			row[sizeof(ib_uint32_t) * 3];
 	ulint			n_fields = index->m_n_fields;
 
-	index->m_fields = new(std::nothrow) dict_field_t[n_fields];
+	index->m_fields = UT_NEW_ARRAY_NOKEY(dict_field_t, n_fields);
 
 	/* Trigger OOM */
-	DBUG_EXECUTE_IF("ib_import_OOM_4",
-			delete [] index->m_fields; index->m_fields = 0;);
+	DBUG_EXECUTE_IF(
+		"ib_import_OOM_4",
+		UT_DELETE_ARRAY(index->m_fields);
+		index->m_fields = NULL;
+	);
 
-	if (index->m_fields == 0) {
+	if (index->m_fields == NULL) {
 		return(DB_OUT_OF_MEMORY);
 	}
 
@@ -2613,12 +2533,16 @@ row_import_cfg_read_index_fields(
 		/* Include the NUL byte in the length. */
 		ulint	len = mach_read_from_4(ptr);
 
-		byte*	name = new(std::nothrow) byte[len];
+		byte*	name = UT_NEW_ARRAY_NOKEY(byte, len);
 
 		/* Trigger OOM */
-		DBUG_EXECUTE_IF("ib_import_OOM_5", delete [] name; name = 0;);
+		DBUG_EXECUTE_IF(
+			"ib_import_OOM_5",
+			UT_DELETE_ARRAY(name);
+			name = NULL;
+		);
 
-		if (name == 0) {
+		if (name == NULL) {
 			return(DB_OUT_OF_MEMORY);
 		}
 
@@ -2660,13 +2584,16 @@ row_import_read_index_data(
 	ut_a(cfg->m_n_indexes > 0);
 	ut_a(cfg->m_n_indexes < 1024);
 
-	cfg->m_indexes = new(std::nothrow) row_index_t[cfg->m_n_indexes];
+	cfg->m_indexes = UT_NEW_ARRAY_NOKEY(row_index_t, cfg->m_n_indexes);
 
 	/* Trigger OOM */
-	DBUG_EXECUTE_IF("ib_import_OOM_6",
-			delete [] cfg->m_indexes; cfg->m_indexes = 0;);
+	DBUG_EXECUTE_IF(
+		"ib_import_OOM_6",
+		UT_DELETE_ARRAY(cfg->m_indexes);
+		cfg->m_indexes = NULL;
+	);
 
-	if (cfg->m_indexes == 0) {
+	if (cfg->m_indexes == NULL) {
 		return(DB_OUT_OF_MEMORY);
 	}
 
@@ -2690,16 +2617,16 @@ row_import_read_index_data(
 			char	msg[BUFSIZ];
 
 			ut_snprintf(msg, sizeof(msg),
-				    "while reading index meta-data, expected "
-				    "to read %lu bytes but read only %lu "
-				    "bytes",
+				    "while reading index meta-data, expected"
+				    " to read %lu bytes but read only %lu"
+				    " bytes",
 				    (ulong) sizeof(row), (ulong) n_bytes);
 
 			ib_senderrf(
 				thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
 				errno, strerror(errno), msg);
 
-			ib_logf(IB_LOG_LEVEL_ERROR, "IO Error: %s", msg);
+			ib::error() << "IO Error: " << msg;
 
 			return(DB_IO_ERROR);
 		}
@@ -2745,20 +2672,22 @@ row_import_read_index_data(
 		if (len > OS_FILE_MAX_PATH) {
 			ib_errf(thd, IB_LOG_LEVEL_ERROR,
 				ER_INNODB_INDEX_CORRUPT,
-				"Index name length (%lu) is too long, "
-				"the meta-data is corrupt", len);
+				"Index name length (%lu) is too long,"
+				" the meta-data is corrupt", len);
 
 			return(DB_CORRUPTION);
 		}
 
-		cfg_index->m_name = new(std::nothrow) byte[len];
+		cfg_index->m_name = UT_NEW_ARRAY_NOKEY(byte, len);
 
 		/* Trigger OOM */
-		DBUG_EXECUTE_IF("ib_import_OOM_7",
-				delete [] cfg_index->m_name;
-				cfg_index->m_name = 0;);
+		DBUG_EXECUTE_IF(
+			"ib_import_OOM_7",
+			UT_DELETE_ARRAY(cfg_index->m_name);
+			cfg_index->m_name = NULL;
+		);
 
-		if (cfg_index->m_name == 0) {
+		if (cfg_index->m_name == NULL) {
 			return(DB_OUT_OF_MEMORY);
 		}
 
@@ -2854,23 +2783,29 @@ row_import_read_columns(
 	ut_a(cfg->m_n_cols > 0);
 	ut_a(cfg->m_n_cols < 1024);
 
-	cfg->m_cols = new(std::nothrow) dict_col_t[cfg->m_n_cols];
+	cfg->m_cols = UT_NEW_ARRAY_NOKEY(dict_col_t, cfg->m_n_cols);
 
 	/* Trigger OOM */
-	DBUG_EXECUTE_IF("ib_import_OOM_8",
-			delete [] cfg->m_cols; cfg->m_cols = 0;);
+	DBUG_EXECUTE_IF(
+		"ib_import_OOM_8",
+		UT_DELETE_ARRAY(cfg->m_cols);
+		cfg->m_cols = NULL;
+	);
 
-	if (cfg->m_cols == 0) {
+	if (cfg->m_cols == NULL) {
 		return(DB_OUT_OF_MEMORY);
 	}
 
-	cfg->m_col_names = new(std::nothrow) byte* [cfg->m_n_cols];
+	cfg->m_col_names = UT_NEW_ARRAY_NOKEY(byte*, cfg->m_n_cols);
 
 	/* Trigger OOM */
-	DBUG_EXECUTE_IF("ib_import_OOM_9",
-			delete [] cfg->m_col_names; cfg->m_col_names = 0;);
+	DBUG_EXECUTE_IF(
+		"ib_import_OOM_9",
+		UT_DELETE_ARRAY(cfg->m_col_names);
+		cfg->m_col_names = NULL;
+	);
 
-	if (cfg->m_col_names == 0) {
+	if (cfg->m_col_names == NULL) {
 		return(DB_OUT_OF_MEMORY);
 	}
 
@@ -2931,14 +2866,16 @@ row_import_read_columns(
 			return(DB_CORRUPTION);
 		}
 
-		cfg->m_col_names[i] = new(std::nothrow) byte[len];
+		cfg->m_col_names[i] = UT_NEW_ARRAY_NOKEY(byte, len);
 
 		/* Trigger OOM */
-		DBUG_EXECUTE_IF("ib_import_OOM_10",
-				delete [] cfg->m_col_names[i];
-				cfg->m_col_names[i] = 0;);
+		DBUG_EXECUTE_IF(
+			"ib_import_OOM_10",
+			UT_DELETE_ARRAY(cfg->m_col_names[i]);
+			cfg->m_col_names[i] = NULL;
+		);
 
-		if (cfg->m_col_names[i] == 0) {
+		if (cfg->m_col_names[i] == NULL) {
 			return(DB_OUT_OF_MEMORY);
 		}
 
@@ -2991,13 +2928,16 @@ row_import_read_v1(
 	ulint	len = mach_read_from_4(value);
 
 	/* NUL byte is part of name length. */
-	cfg->m_hostname = new(std::nothrow) byte[len];
+	cfg->m_hostname = UT_NEW_ARRAY_NOKEY(byte, len);
 
 	/* Trigger OOM */
-	DBUG_EXECUTE_IF("ib_import_OOM_1",
-			delete [] cfg->m_hostname; cfg->m_hostname = 0;);
+	DBUG_EXECUTE_IF(
+		"ib_import_OOM_1",
+		UT_DELETE_ARRAY(cfg->m_hostname);
+		cfg->m_hostname = NULL;
+	);
 
-	if (cfg->m_hostname == 0) {
+	if (cfg->m_hostname == NULL) {
 		return(DB_OUT_OF_MEMORY);
 	}
 
@@ -3030,13 +2970,16 @@ row_import_read_v1(
 	len = mach_read_from_4(value);
 
 	/* NUL byte is part of name length. */
-	cfg->m_table_name = new(std::nothrow) byte[len];
+	cfg->m_table_name = UT_NEW_ARRAY_NOKEY(byte, len);
 
 	/* Trigger OOM */
-	DBUG_EXECUTE_IF("ib_import_OOM_2",
-			delete [] cfg->m_table_name; cfg->m_table_name = 0;);
+	DBUG_EXECUTE_IF(
+		"ib_import_OOM_2",
+		UT_DELETE_ARRAY(cfg->m_table_name);
+		cfg->m_table_name = NULL;
+	);
 
-	if (cfg->m_table_name == 0) {
+	if (cfg->m_table_name == NULL) {
 		return(DB_OUT_OF_MEMORY);
 	}
 
@@ -3051,9 +2994,8 @@ row_import_read_v1(
 		return(err);
 	}
 
-	ib_logf(IB_LOG_LEVEL_INFO,
-		"Importing tablespace for table '%s' that was exported "
-		"from host '%s'", cfg->m_table_name, cfg->m_hostname);
+	ib::info() << "Importing tablespace for table '" << cfg->m_table_name
+		<< "' that was exported from host '" << cfg->m_hostname << "'";
 
 	byte		row[sizeof(ib_uint32_t) * 3];
 
@@ -3089,16 +3031,18 @@ row_import_read_v1(
 
 	byte*		ptr = row;
 
-	cfg->m_page_size = mach_read_from_4(ptr);
+	const ulint	logical_page_size = mach_read_from_4(ptr);
 	ptr += sizeof(ib_uint32_t);
 
-	if (cfg->m_page_size != UNIV_PAGE_SIZE) {
+	if (logical_page_size != univ_page_size.logical()) {
 
 		ib_errf(thd, IB_LOG_LEVEL_ERROR, ER_TABLE_SCHEMA_MISMATCH,
-			"Tablespace to be imported has a different "
-			"page size than this server. Server page size "
-			"is %lu, whereas tablespace page size is %lu",
-			UNIV_PAGE_SIZE, (ulong) cfg->m_page_size);
+			"Tablespace to be imported has a different"
+			" page size than this server. Server page size"
+			" is " ULINTPF ", whereas tablespace page size"
+			" is " ULINTPF,
+			univ_page_size.logical(),
+			logical_page_size);
 
 		return(DB_ERROR);
 	}
@@ -3106,6 +3050,10 @@ row_import_read_v1(
 	cfg->m_flags = mach_read_from_4(ptr);
 	ptr += sizeof(ib_uint32_t);
 
+	cfg->m_page_size.copy_from(dict_tf_get_page_size(cfg->m_flags));
+
+	ut_a(logical_page_size == cfg->m_page_size.logical());
+
 	cfg->m_n_cols = mach_read_from_4(ptr);
 
 	if (!dict_tf_is_valid(cfg->m_flags)) {
@@ -3163,8 +3111,8 @@ row_import_read_meta_data(
 		return(row_import_read_v1(file, thd, &cfg));
 	default:
 		ib_errf(thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
-			"Unsupported meta-data version number (%lu), "
-			"file ignored", (ulong) cfg.m_version);
+			"Unsupported meta-data version number (%lu),"
+			" file ignored", (ulong) cfg.m_version);
 	}
 
 	return(DB_ERROR);
@@ -3194,8 +3142,8 @@ row_import_read_cfg(
 		char	msg[BUFSIZ];
 
 		ut_snprintf(msg, sizeof(msg),
-			    "Error opening '%s', will attempt to import "
-			    "without schema verification", name);
+			    "Error opening '%s', will attempt to import"
+			    " without schema verification", name);
 
 		ib_senderrf(
 			thd, IB_LOG_LEVEL_WARN, ER_IO_READ_ERROR,
@@ -3215,11 +3163,174 @@ row_import_read_cfg(
 	return(err);
 }
 
+#ifdef MYSQL_ENCRYPTION
+/** Read the contents of the <tablespace>.cfp file.
+@param[in]	table		table
+@param[in]	file		file to read from
+@param[in]	thd		session
+@param[in]	cfp		contents of the .cfp file
+@return DB_SUCCESS or error code. */
+static
+dberr_t
+row_import_read_encryption_data(
+	dict_table_t*	table,
+	FILE*		file,
+	THD*		thd,
+	row_import&	import)
+{
+	byte		row[sizeof(ib_uint32_t)];
+	ulint		key_size;
+	byte		transfer_key[ENCRYPTION_KEY_LEN];
+	byte		encryption_key[ENCRYPTION_KEY_LEN];
+	byte		encryption_iv[ENCRYPTION_KEY_LEN];
+	lint		elen;
+
+	if (fread(&row, 1, sizeof(row), file) != sizeof(row)) {
+		ib_senderrf(
+			thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
+			errno, strerror(errno),
+			"while reading encrypton key size.");
+
+		return(DB_IO_ERROR);
+	}
+
+	key_size = mach_read_from_4(row);
+	if (key_size != ENCRYPTION_KEY_LEN) {
+		ib_senderrf(
+			thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
+			errno, strerror(errno),
+			"while parsing encryption key size.");
+
+		return(DB_IO_ERROR);
+	}
+
+	/* Read the transfer key. */
+	if (fread(transfer_key, 1, ENCRYPTION_KEY_LEN, file)
+		!= ENCRYPTION_KEY_LEN) {
+		ib_senderrf(
+			thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
+			errno, strerror(errno),
+			"while reading tranfer key.");
+
+		return(DB_IO_ERROR);
+	}
+
+	/* Read the encrypted key. */
+	if (fread(encryption_key, 1, ENCRYPTION_KEY_LEN, file)
+		!= ENCRYPTION_KEY_LEN) {
+		ib_senderrf(
+			thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
+			errno, strerror(errno),
+			"while reading encryption key.");
+
+		return(DB_IO_ERROR);
+	}
+
+	/* Read the encrypted iv. */
+	if (fread(encryption_iv, 1, ENCRYPTION_KEY_LEN, file)
+		!= ENCRYPTION_KEY_LEN) {
+		ib_senderrf(
+			thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
+			errno, strerror(errno),
+			"while reading encryption iv.");
+
+		return(DB_IO_ERROR);
+	}
+
+	table->encryption_key =
+		static_cast<byte*>(mem_heap_alloc(table->heap,
+						  ENCRYPTION_KEY_LEN));
+
+	table->encryption_iv =
+		static_cast<byte*>(mem_heap_alloc(table->heap,
+						  ENCRYPTION_KEY_LEN));
+	/* Decrypt tablespace key and iv. */
+	elen = my_aes_decrypt(
+		encryption_key,
+		ENCRYPTION_KEY_LEN,
+		table->encryption_key,
+		transfer_key,
+		ENCRYPTION_KEY_LEN,
+		my_aes_256_ecb, NULL, false);
+
+	if (elen == MY_AES_BAD_DATA) {
+		ib_senderrf(
+			thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
+			errno, strerror(errno),
+			"while decrypt encryption key.");
+
+		return(DB_IO_ERROR);
+	}
+
+	elen = my_aes_decrypt(
+		encryption_iv,
+		ENCRYPTION_KEY_LEN,
+		table->encryption_iv,
+		transfer_key,
+		ENCRYPTION_KEY_LEN,
+		my_aes_256_ecb, NULL, false);
+
+	if (elen == MY_AES_BAD_DATA) {
+		ib_senderrf(
+			thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
+			errno, strerror(errno),
+			"while decrypt encryption iv.");
+
+		return(DB_IO_ERROR);
+	}
+
+	return(DB_SUCCESS);
+}
+
+/** Read the contents of the <tablename>.cfp file.
+@param[in]	table		table
+@param[in]	thd		session
+@param[in]	cfp		contents of the .cfp file
+@return DB_SUCCESS or error code. */
+static
+dberr_t
+row_import_read_cfp(
+	dict_table_t*	table,
+	THD*		thd,
+	row_import&	import)
+{
+	dberr_t		err;
+	char		name[OS_FILE_MAX_PATH];
+
+	/* Clear table encryption information. */
+	table->encryption_key = NULL;
+	table->encryption_iv  = NULL;
+
+	srv_get_encryption_data_filename(table, name, sizeof(name));
+
+	FILE*	file = fopen(name, "rb");
+
+	if (file == NULL) {
+		import.m_cfp_missing = true;
+
+		/* If there's no cfp file, we assume it's not an
+		encrpyted table. return directly. */
+
+		import.m_cfp_missing = true;
+
+		err = DB_SUCCESS;
+	} else {
+
+		import.m_cfp_missing = false;
+
+		err = row_import_read_encryption_data(table, file,
+						      thd, import);
+		fclose(file);
+	}
+
+	return(err);
+}
+#endif /* MYSQL_ENCRYPTION */
+
 /*****************************************************************//**
 Update the <space, root page> of a table's indexes from the values
 in the data dictionary.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 row_import_update_index_root(
 /*=========================*/
@@ -3313,17 +3424,11 @@ row_import_update_index_root(
 		err = trx->error_state;
 
 		if (err != DB_SUCCESS) {
-			char		index_name[MAX_FULL_NAME_LEN + 1];
-
-			innobase_format_name(
-				index_name, sizeof(index_name),
-				index->name, TRUE);
-
 			ib_errf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
 				ER_INTERNAL_ERROR,
-				"While updating the <space, root page "
-				"number> of index %s - %s",
-				index_name, ut_strerr(err));
+				"While updating the <space, root page"
+				" number> of index %s - %s",
+				index->name(), ut_strerr(err));
 
 			break;
 		}
@@ -3387,7 +3492,6 @@ row_import_set_discarded(
 /*****************************************************************//**
 Update the DICT_TF2_DISCARDED flag in SYS_TABLES.
 @return DB_SUCCESS or error code. */
-UNIV_INTERN
 dberr_t
 row_import_update_discarded_flag(
 /*=============================*/
@@ -3409,8 +3513,8 @@ row_import_update_discarded_flag(
 		"PROCEDURE UPDATE_DISCARDED_FLAG() IS\n"
 		"DECLARE FUNCTION my_func;\n"
 		"DECLARE CURSOR c IS\n"
-		" SELECT MIX_LEN "
-		" FROM SYS_TABLES "
+		" SELECT MIX_LEN"
+		" FROM SYS_TABLES"
 		" WHERE ID = :table_id FOR UPDATE;"
 		"\n"
 		"BEGIN\n"
@@ -3450,8 +3554,7 @@ row_import_update_discarded_flag(
 /*****************************************************************//**
 Imports a tablespace. The space id in the .ibd file must match the space id
 of the table in the data dictionary.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
 dberr_t
 row_import_for_mysql(
 /*=================*/
@@ -3461,26 +3564,28 @@ row_import_for_mysql(
 	dberr_t		err;
 	trx_t*		trx;
 	ib_uint64_t	autoinc = 0;
-	char		table_name[MAX_FULL_NAME_LEN + 1];
 	char*		filepath = NULL;
+	ulint		space_flags MY_ATTRIBUTE((unused));
 
+	/* The caller assured that this is not read_only_mode and that no
+	temorary tablespace is being imported. */
 	ut_ad(!srv_read_only_mode);
-
-	innobase_format_name(
-		table_name, sizeof(table_name), table->name, FALSE);
+	ut_ad(!dict_table_is_temporary(table));
 
 	ut_a(table->space);
 	ut_ad(prebuilt->trx);
 	ut_a(table->ibd_file_missing);
 
-	trx_start_if_not_started(prebuilt->trx);
+	ibuf_delete_for_discarded_space(table->space);
+
+	trx_start_if_not_started(prebuilt->trx, true);
 
 	trx = trx_allocate_for_mysql();
 
 	/* So that the table is not DROPped during recovery. */
 	trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
 
-	trx_start_if_not_started(trx);
+	trx_start_if_not_started(trx, true);
 
 	/* So that we can send error messages to the user. */
 	trx->mysql_thd = prebuilt->trx->mysql_thd;
@@ -3495,7 +3600,9 @@ row_import_for_mysql(
 
 	mutex_enter(&trx->undo_mutex);
 
-	err = trx_undo_assign_undo(trx, TRX_UNDO_UPDATE);
+	/* IMPORT tablespace is blocked for temp-tables and so we don't
+	need to assign temporary rollback segment for this trx. */
+	err = trx_undo_assign_undo(trx, &trx->rsegs.m_redo, TRX_UNDO_UPDATE);
 
 	mutex_exit(&trx->undo_mutex);
 
@@ -3506,7 +3613,7 @@ row_import_for_mysql(
 
 		return(row_import_cleanup(prebuilt, trx, err));
 
-	} else if (trx->update_undo == 0) {
+	} else if (trx->rsegs.m_redo.update_undo == 0) {
 
 		err = DB_TOO_MANY_CONCURRENT_TRXS;
 		return(row_import_cleanup(prebuilt, trx, err));
@@ -3516,7 +3623,7 @@ row_import_for_mysql(
 
 	/* Prevent DDL operations while we are checking. */
 
-	rw_lock_s_lock_func(&dict_operation_lock, 0, __FILE__, __LINE__);
+	rw_lock_s_lock_func(dict_operation_lock, 0, __FILE__, __LINE__);
 
 	row_import	cfg;
 
@@ -3529,7 +3636,7 @@ row_import_for_mysql(
 
 	if (err == DB_SUCCESS) {
 
-		/* We have a schema file, try and match it with the our
+		/* We have a schema file, try and match it with our
 		data dictionary. */
 
 		err = cfg.match_schema(trx->mysql_thd);
@@ -3543,14 +3650,14 @@ row_import_for_mysql(
 			autoinc = cfg.m_autoinc;
 		}
 
-		rw_lock_s_unlock_gen(&dict_operation_lock, 0);
+		rw_lock_s_unlock_gen(dict_operation_lock, 0);
 
 		DBUG_EXECUTE_IF("ib_import_set_index_root_failure",
 				err = DB_TOO_MANY_CONCURRENT_TRXS;);
 
 	} else if (cfg.m_missing) {
 
-		rw_lock_s_unlock_gen(&dict_operation_lock, 0);
+		rw_lock_s_unlock_gen(dict_operation_lock, 0);
 
 		/* We don't have a schema file, we will have to discover
 		the index root pages from the .ibd file and skip the schema
@@ -3558,12 +3665,14 @@ row_import_for_mysql(
 
 		ut_a(err == DB_FAIL);
 
-		cfg.m_page_size = UNIV_PAGE_SIZE;
+		cfg.m_page_size.copy_from(univ_page_size);
 
 		FetchIndexRootPages	fetchIndexRootPages(table, trx);
 
 		err = fil_tablespace_iterate(
-			table, IO_BUFFER_SIZE(cfg.m_page_size),
+			table, IO_BUFFER_SIZE(
+				cfg.m_page_size.physical(),
+				cfg.m_page_size.physical()),
 			fetchIndexRootPages);
 
 		if (err == DB_SUCCESS) {
@@ -3579,17 +3688,54 @@ row_import_for_mysql(
 			}
 		}
 
+		space_flags = fetchIndexRootPages.get_space_flags();
+
 	} else {
-		rw_lock_s_unlock_gen(&dict_operation_lock, 0);
+		rw_lock_s_unlock_gen(dict_operation_lock, 0);
 	}
 
-	if (err != DB_SUCCESS) {
+	/* Try to read encryption information. */
+	if (err == DB_SUCCESS) {
+#ifdef MYSQL_ENCRYPTION
+		err = row_import_read_cfp(table, trx->mysql_thd, cfg);
+
+		/* If table is not set to encrypted, but the fsp flag
+		is not, then return error. */
+		if (!dict_table_is_encrypted(table)
+		    && space_flags != 0
+		    && FSP_FLAGS_GET_ENCRYPTION(space_flags)) {
+
+			ib_errf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+				 ER_TABLE_SCHEMA_MISMATCH,
+				 "Table is not marked as encrypted, but"
+				 " the tablespace is marked as encrypted");
+
+			err = DB_ERROR;
+			return(row_import_error(prebuilt, trx, err));
+		}
+
+		/* If table is set to encrypted, but can't find
+		cfp file, then return error. */
+		if (cfg.m_cfp_missing== true
+		    && ((space_flags != 0
+			 && FSP_FLAGS_GET_ENCRYPTION(space_flags))
+			|| dict_table_is_encrypted(table))) {
+			ib_errf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+				 ER_TABLE_SCHEMA_MISMATCH,
+				 "Table is in an encrypted tablespace, but"
+				 " can't find the encryption meta-data file"
+				 " in importing");
+			err = DB_ERROR;
+			return(row_import_error(prebuilt, trx, err));
+		}
+#endif /* MYSQL_ENCRYPTION */
+	} else {
 		return(row_import_error(prebuilt, trx, err));
 	}
 
 	prebuilt->trx->op_info = "importing tablespace";
 
-	ib_logf(IB_LOG_LEVEL_INFO, "Phase I - Update all pages");
+	ib::info() << "Phase I - Update all pages";
 
 	/* Iterate over all the pages and do the sanity checking and
 	the conversion required to import the tablespace. */
@@ -3599,22 +3745,41 @@ row_import_for_mysql(
 	/* Set the IO buffer size in pages. */
 
 	err = fil_tablespace_iterate(
-		table, IO_BUFFER_SIZE(cfg.m_page_size), converter);
+		table, IO_BUFFER_SIZE(
+			cfg.m_page_size.physical(),
+			cfg.m_page_size.physical()), converter);
 
 	DBUG_EXECUTE_IF("ib_import_reset_space_and_lsn_failure",
 			err = DB_TOO_MANY_CONCURRENT_TRXS;);
 
+#ifdef MYSQL_ENCRYPTION
+	if (err == DB_IO_NO_ENCRYPT_TABLESPACE) {
+		char	table_name[MAX_FULL_NAME_LEN + 1];
+
+		innobase_format_name(
+			table_name, sizeof(table_name),
+			table->name.m_name);
+
+		ib_errf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+			ER_TABLE_SCHEMA_MISMATCH,
+			"Encryption attribute is no matched");
+
+		return(row_import_cleanup(prebuilt, trx, err));
+	}
+#endif /* MYSQL_ENCRYPTION */
+
 	if (err != DB_SUCCESS) {
 		char	table_name[MAX_FULL_NAME_LEN + 1];
 
 		innobase_format_name(
-			table_name, sizeof(table_name), table->name, FALSE);
+			table_name, sizeof(table_name),
+			table->name.m_name);
 
 		if (err != DB_DECRYPTION_FAILED) {
 
 			ib_errf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
 				ER_INTERNAL_ERROR,
-				"Cannot reset LSNs in table '%s' : %s",
+			"Cannot reset LSNs in table %s : %s",
 				table_name, ut_strerr(err));
 		}
 
@@ -3626,25 +3791,47 @@ row_import_for_mysql(
 	/* If the table is stored in a remote tablespace, we need to
 	determine that filepath from the link file and system tables.
 	Find the space ID in SYS_TABLES since this is an ALTER TABLE. */
+	dict_get_and_save_data_dir_path(table, true);
+
 	if (DICT_TF_HAS_DATA_DIR(table->flags)) {
-		dict_get_and_save_data_dir_path(table, true);
 		ut_a(table->data_dir_path);
 
-		filepath = os_file_make_remote_pathname(
-			table->data_dir_path, table->name, "ibd");
+		filepath = fil_make_filepath(
+			table->data_dir_path, table->name.m_name, IBD, true);
 	} else {
-		filepath = fil_make_ibd_name(table->name, false);
+		filepath = fil_make_filepath(
+			NULL, table->name.m_name, IBD, false);
+	}
+
+	DBUG_EXECUTE_IF(
+		"ib_import_OOM_15",
+		ut_free(filepath);
+		filepath = NULL;
+	);
+
+	if (filepath == NULL) {
+		row_mysql_unlock_data_dictionary(trx);
+		return(row_import_cleanup(prebuilt, trx, DB_OUT_OF_MEMORY));
 	}
-	ut_a(filepath);
 
 	/* Open the tablespace so that we can access via the buffer pool.
 	We set the 2nd param (fix_dict = true) here because we already
-	have an x-lock on dict_operation_lock and dict_sys->mutex. */
+	have an x-lock on dict_operation_lock and dict_sys->mutex.
+	The tablespace is initially opened as a temporary one, because
+	we will not be writing any redo log for it before we have invoked
+	fil_space_set_imported() to declare it a persistent tablespace. */
 
-	err = fil_open_single_table_tablespace(
-		true, true, table->space,
-		dict_tf_to_fsp_flags(table->flags),
-		table->name, filepath, table);
+	ulint	fsp_flags = dict_tf_to_fsp_flags(table->flags, false);
+
+#ifdef MYSQL_ENCRYPTION
+	if (table->encryption_key != NULL) {
+		fsp_flags |= FSP_FLAGS_MASK_ENCRYPTION;
+	}
+#endif /* MYSQL_ENCRYPTION */
+
+	err = fil_ibd_open(
+		true, true, FIL_TYPE_IMPORT, table->space,
+		fsp_flags, table->name.m_name, filepath, table);
 
 	DBUG_EXECUTE_IF("ib_import_open_tablespace_failure",
 			err = DB_TABLESPACE_NOT_FOUND;);
@@ -3656,14 +3843,25 @@ row_import_for_mysql(
 			ER_GET_ERRMSG,
 			err, ut_strerr(err), filepath);
 
-		mem_free(filepath);
+		ut_free(filepath);
 
 		return(row_import_cleanup(prebuilt, trx, err));
 	}
 
+#ifdef MYSQL_ENCRYPTION
+	/* For encrypted table, set encryption information. */
+	if (dict_table_is_encrypted(table)) {
+
+		err = fil_set_encryption(table->space,
+					 Encryption::AES,
+					 table->encryption_key,
+					 table->encryption_iv);
+	}
+#endif /* MYSQL_ENCRYPTION */
+
 	row_mysql_unlock_data_dictionary(trx);
 
-	mem_free(filepath);
+	ut_free(filepath);
 
 	err = ibuf_check_bitmap_on_import(trx, table->space);
 
@@ -3741,22 +3939,54 @@ row_import_for_mysql(
 		}
 	}
 
-	ib_logf(IB_LOG_LEVEL_INFO, "Phase III - Flush changes to disk");
+	ib::info() << "Phase III - Flush changes to disk";
 
 	/* Ensure that all pages dirtied during the IMPORT make it to disk.
 	The only dirty pages generated should be from the pessimistic purge
 	of delete marked records that couldn't be purged in Phase I. */
 
 	buf_LRU_flush_or_remove_pages(
-		prebuilt->table->space, BUF_REMOVE_FLUSH_WRITE, trx);
+		prebuilt->table->space, BUF_REMOVE_FLUSH_WRITE,	trx);
 
 	if (trx_is_interrupted(trx)) {
-		ib_logf(IB_LOG_LEVEL_INFO, "Phase III - Flush interrupted");
+		ib::info() << "Phase III - Flush interrupted";
 		return(row_import_error(prebuilt, trx, DB_INTERRUPTED));
-	} else {
-		ib_logf(IB_LOG_LEVEL_INFO, "Phase IV - Flush complete");
 	}
 
+	ib::info() << "Phase IV - Flush complete";
+	fil_space_set_imported(prebuilt->table->space);
+
+#ifdef MYSQL_ENCRYPTION
+	if (dict_table_is_encrypted(table)) {
+		fil_space_t*	space;
+		mtr_t		mtr;
+		byte		encrypt_info[ENCRYPTION_INFO_SIZE_V2];
+
+		mtr_start(&mtr);
+
+		mtr.set_named_space(table->space);
+		space = mtr_x_lock_space(table->space, &mtr);
+
+		memset(encrypt_info, 0, ENCRYPTION_INFO_SIZE_V2);
+
+		if (!fsp_header_rotate_encryption(space,
+						  encrypt_info,
+						  &mtr)) {
+			mtr_commit(&mtr);
+			ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+				ER_FILE_NOT_FOUND,
+				filepath, err, ut_strerr(err));
+
+			ut_free(filepath);
+			row_mysql_unlock_data_dictionary(trx);
+
+			return(row_import_cleanup(prebuilt, trx, err));
+		}
+
+		mtr_commit(&mtr);
+	}
+#endif /* MYSQL_ENCRYPTION */
+
 	/* The dictionary latches will be released in in row_import_cleanup()
 	after the transaction commit, for both success and error. */
 
@@ -3779,19 +4009,15 @@ row_import_for_mysql(
 	table->ibd_file_missing = false;
 	table->flags2 &= ~DICT_TF2_DISCARDED;
 
-	if (autoinc != 0) {
-		char	table_name[MAX_FULL_NAME_LEN + 1];
+	/* Set autoinc value read from cfg file. The value is set to zero
+	if the cfg file is missing and is initialized later from table
+	column value. */
+	ib::info() << table->name << " autoinc value set to "
+		<< autoinc;
 
-		innobase_format_name(
-			table_name, sizeof(table_name), table->name, FALSE);
-
-		ib_logf(IB_LOG_LEVEL_INFO, "%s autoinc value set to " IB_ID_FMT,
-			table_name, autoinc);
-
-		dict_table_autoinc_lock(table);
-		dict_table_autoinc_initialize(table, autoinc);
-		dict_table_autoinc_unlock(table);
-	}
+	dict_table_autoinc_lock(table);
+	dict_table_autoinc_initialize(table, autoinc);
+	dict_table_autoinc_unlock(table);
 
 	ut_a(err == DB_SUCCESS);
 
diff --git a/storage/innobase/row/row0ins.cc b/storage/innobase/row/row0ins.cc
index e3817ea4de7..397617e659c 100644
--- a/storage/innobase/row/row0ins.cc
+++ b/storage/innobase/row/row0ins.cc
@@ -23,13 +23,14 @@ Insert into a table
 Created 4/20/1996 Heikki Tuuri
 *******************************************************/
 
+#include "ha_prototypes.h"
+
 #include "row0ins.h"
 
 #ifdef UNIV_NONINL
 #include "row0ins.ic"
 #endif
 
-#include "ha_prototypes.h"
 #include "dict0dict.h"
 #include "dict0boot.h"
 #include "trx0rec.h"
@@ -52,6 +53,7 @@ Created 4/20/1996 Heikki Tuuri
 #include "fts0fts.h"
 #include "fts0types.h"
 #include "m_string.h"
+#include "gis0geo.h"
 
 /*************************************************************************
 IMPORTANT NOTE: Any operation that generates redo MUST check that there
@@ -65,8 +67,7 @@ introduced where a call to log_free_check() is bypassed. */
 
 /*********************************************************************//**
 Creates an insert node struct.
-@return	own: insert node struct */
-UNIV_INTERN
+@return own: insert node struct */
 ins_node_t*
 ins_node_create(
 /*============*/
@@ -91,6 +92,7 @@ ins_node_create(
 	node->select = NULL;
 
 	node->trx_id = 0;
+	node->duplicate = NULL;
 
 	node->entry_sys_heap = mem_heap_create(128);
 
@@ -112,7 +114,7 @@ ins_node_create_entry_list(
 
 	ut_ad(node->entry_sys_heap);
 
-	UT_LIST_INIT(node->entry_list);
+	UT_LIST_INIT(node->entry_list, &dtuple_t::tuple_list);
 
 	/* We will include all indexes (include those corrupted
 	secondary indexes) in the entry list. Filteration of
@@ -122,10 +124,11 @@ ins_node_create_entry_list(
 	     index != 0;
 	     index = dict_table_get_next_index(index)) {
 
-		entry = row_build_index_entry(
-			node->row, NULL, index, node->entry_sys_heap);
+		entry = row_build_index_entry_low(
+			node->row, NULL, index, node->entry_sys_heap,
+			ROW_BUILD_FOR_INSERT);
 
-		UT_LIST_ADD_LAST(tuple_list, node->entry_list, entry);
+		UT_LIST_ADD_LAST(node->entry_list, entry);
 	}
 }
 
@@ -152,7 +155,10 @@ row_ins_alloc_sys_fields(
 	ut_ad(dtuple_get_n_fields(row) == dict_table_get_n_cols(table));
 
 	/* allocate buffer to hold the needed system created hidden columns. */
-	uint len = DATA_ROW_ID_LEN + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
+	uint len = DATA_ROW_ID_LEN + DATA_TRX_ID_LEN;
+	if (!dict_table_is_intrinsic(table)) {
+		len += DATA_ROLL_PTR_LEN;
+	}
 	ptr = static_cast<byte*>(mem_heap_zalloc(heap, len));
 
 	/* 1. Populate row-id */
@@ -177,20 +183,19 @@ row_ins_alloc_sys_fields(
 
 	ptr += DATA_TRX_ID_LEN;
 
-	/* 3. Populate roll ptr */
+	if (!dict_table_is_intrinsic(table)) {
+		col = dict_table_get_sys_col(table, DATA_ROLL_PTR);
 
-	col = dict_table_get_sys_col(table, DATA_ROLL_PTR);
+		dfield = dtuple_get_nth_field(row, dict_col_get_no(col));
 
-	dfield = dtuple_get_nth_field(row, dict_col_get_no(col));
-
-	dfield_set_data(dfield, ptr, DATA_ROLL_PTR_LEN);
+		dfield_set_data(dfield, ptr, DATA_ROLL_PTR_LEN);
+	}
 }
 
 /*********************************************************************//**
 Sets a new row to insert for an INS_DIRECT node. This function is only used
 if we have constructed the row separately, which is a rare case; this
 function is quite slow. */
-UNIV_INTERN
 void
 ins_node_set_new_row(
 /*=================*/
@@ -200,6 +205,7 @@ ins_node_set_new_row(
 	node->state = INS_NODE_SET_IX_LOCK;
 	node->index = NULL;
 	node->entry = NULL;
+	node->duplicate = NULL;
 
 	node->row = row;
 
@@ -223,7 +229,7 @@ ins_node_set_new_row(
 Does an insert operation by updating a delete-marked existing record
 in the index. This situation can occur if the delete-marked record is
 kept in the index for consistent reads.
-@return	DB_SUCCESS or error code */
+@return DB_SUCCESS or error code */
 static MY_ATTRIBUTE((nonnull, warn_unused_result))
 dberr_t
 row_ins_sec_index_entry_by_modify(
@@ -270,11 +276,10 @@ row_ins_sec_index_entry_by_modify(
 		case, the change would already be there. The CREATE
 		INDEX should be waiting for a MySQL meta-data lock
 		upgrade at least until this INSERT or UPDATE
-		returns. After that point, the TEMP_INDEX_PREFIX
-		would be dropped from the index name in
-		commit_inplace_alter_table(). */
+		returns. After that point, set_committed(true)
+		would be invoked in commit_inplace_alter_table(). */
 		ut_a(update->n_fields == 0);
-		ut_a(*cursor->index->name == TEMP_INDEX_PREFIX);
+		ut_a(!cursor->index->is_committed());
 		ut_ad(!dict_index_is_online_ddl(cursor->index));
 		return(DB_SUCCESS);
 	}
@@ -318,37 +323,34 @@ row_ins_sec_index_entry_by_modify(
 Does an insert operation by delete unmarking and updating a delete marked
 existing record in the index. This situation can occur if the delete marked
 record is kept in the index for consistent reads.
-@return	DB_SUCCESS, DB_FAIL, or error code */
+@return DB_SUCCESS, DB_FAIL, or error code */
 static MY_ATTRIBUTE((nonnull, warn_unused_result))
 dberr_t
 row_ins_clust_index_entry_by_modify(
 /*================================*/
+	btr_pcur_t*	pcur,	/*!< in/out: a persistent cursor pointing
+				to the clust_rec that is being modified. */
 	ulint		flags,	/*!< in: undo logging and locking flags */
 	ulint		mode,	/*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
 				depending on whether mtr holds just a leaf
 				latch or also a tree latch */
-	btr_cur_t*	cursor,	/*!< in: B-tree cursor */
 	ulint**		offsets,/*!< out: offsets on cursor->page_cur.rec */
 	mem_heap_t**	offsets_heap,
 				/*!< in/out: pointer to memory heap that can
 				be emptied, or NULL */
 	mem_heap_t*	heap,	/*!< in/out: memory heap */
-	big_rec_t**	big_rec,/*!< out: possible big rec vector of fields
-				which have to be stored externally by the
-				caller */
 	const dtuple_t*	entry,	/*!< in: index entry to insert */
 	que_thr_t*	thr,	/*!< in: query thread */
 	mtr_t*		mtr)	/*!< in: mtr; must be committed before
 				latching any further pages */
 {
 	const rec_t*	rec;
-	const upd_t*	update;
+	upd_t*		update;
 	dberr_t		err;
-
+	btr_cur_t*	cursor	= btr_pcur_get_btr_cur(pcur);
+	TABLE*		mysql_table = NULL;
 	ut_ad(dict_index_is_clust(cursor->index));
 
-	*big_rec = NULL;
-
 	rec = btr_cur_get_rec(cursor);
 
 	ut_ad(rec_get_deleted_flag(rec,
@@ -357,10 +359,14 @@ row_ins_clust_index_entry_by_modify(
 	/* Build an update vector containing all the fields to be modified;
 	NOTE that this vector may NOT contain system columns trx_id or
 	roll_ptr */
+	if (thr->prebuilt != NULL) {
+		mysql_table = thr->prebuilt->m_mysql_table;
+		ut_ad(thr->prebuilt->trx == thr_get_trx(thr));
+	}
 
 	update = row_upd_build_difference_binary(
 		cursor->index, entry, rec, NULL, true,
-		thr_get_trx(thr), heap);
+		thr_get_trx(thr), heap, mysql_table);
 	if (mode != BTR_MODIFY_TREE) {
 		ut_ad((mode & ~BTR_ALREADY_S_LATCHED) == BTR_MODIFY_LEAF);
 
@@ -384,10 +390,24 @@ row_ins_clust_index_entry_by_modify(
 			return(DB_LOCK_TABLE_FULL);
 
 		}
+
+		big_rec_t*	big_rec	= NULL;
+
 		err = btr_cur_pessimistic_update(
 			flags | BTR_KEEP_POS_FLAG,
 			cursor, offsets, offsets_heap, heap,
-			big_rec, update, 0, thr, thr_get_trx(thr)->id, mtr);
+			&big_rec, update, 0, thr, thr_get_trx(thr)->id, mtr);
+
+		if (big_rec) {
+			ut_a(err == DB_SUCCESS);
+
+			DEBUG_SYNC_C("before_row_ins_upd_extern");
+			err = btr_store_big_rec_extern_fields(
+				pcur, update, *offsets, big_rec, mtr,
+				BTR_STORE_INSERT_UPDATE);
+			DEBUG_SYNC_C("after_row_ins_upd_extern");
+			dtuple_big_rec_free(big_rec);
+		}
 	}
 
 	return(err);
@@ -396,7 +416,7 @@ row_ins_clust_index_entry_by_modify(
 /*********************************************************************//**
 Returns TRUE if in a cascaded update/delete an ancestor node of node
 updates (not DELETE, but UPDATE) table.
-@return	TRUE if an ancestor updates table */
+@return TRUE if an ancestor updates table */
 static
 ibool
 row_ins_cascade_ancestor_updates_table(
@@ -426,7 +446,7 @@ row_ins_cascade_ancestor_updates_table(
 /*********************************************************************//**
 Returns the number of ancestor UPDATE or DELETE nodes of a
 cascaded update/delete node.
-@return	number of ancestors */
+@return number of ancestors */
 static MY_ATTRIBUTE((nonnull, warn_unused_result))
 ulint
 row_ins_cascade_n_ancestors(
@@ -464,9 +484,10 @@ row_ins_cascade_calc_update_vec(
 	mem_heap_t*	heap,		/*!< in: memory heap to use as
 					temporary storage */
 	trx_t*		trx,		/*!< in: update transaction */
-	ibool*		fts_col_affected)/*!< out: is FTS column affected */
+	ibool*		fts_col_affected,
+					/*!< out: is FTS column affected */
+	upd_node_t*	cascade)	/*!< in: cascade update node */
 {
-	upd_node_t*	cascade		= node->cascade_node;
 	dict_table_t*	table		= foreign->foreign_table;
 	dict_index_t*	index		= foreign->foreign_index;
 	upd_t*		update;
@@ -480,6 +501,7 @@ row_ins_cascade_calc_update_vec(
 	ibool		doc_id_updated = FALSE;
 	ulint		doc_id_pos = 0;
 	doc_id_t	new_doc_id = FTS_NULL_DOC_ID;
+	ulint		prefix_col;
 
 	ut_a(node);
 	ut_a(foreign);
@@ -500,7 +522,6 @@ row_ins_cascade_calc_update_vec(
 	update = cascade->update;
 
 	update->info_bits = 0;
-	update->n_fields = foreign->n_fields;
 
 	n_fields_updated = 0;
 
@@ -508,14 +529,15 @@ row_ins_cascade_calc_update_vec(
 
 	if (table->fts) {
 		doc_id_pos = dict_table_get_nth_col_pos(
-			table, table->fts->doc_col);
+			table, table->fts->doc_col, &prefix_col);
 	}
 
 	for (i = 0; i < foreign->n_fields; i++) {
 
 		parent_field_no = dict_table_get_nth_col_pos(
 			parent_table,
-			dict_index_get_nth_col_no(parent_index, i));
+			dict_index_get_nth_col_no(parent_index, i),
+			&prefix_col);
 
 		for (j = 0; j < parent_update->n_fields; j++) {
 			const upd_field_t*	parent_ufield
@@ -538,7 +560,8 @@ row_ins_cascade_calc_update_vec(
 
 				ufield->field_no
 					= dict_table_get_nth_col_pos(
-					table, dict_col_get_no(col));
+						table, dict_col_get_no(col),
+						&prefix_col);
 
 				ufield->orig_len = 0;
 				ufield->exp = NULL;
@@ -629,7 +652,8 @@ row_ins_cascade_calc_update_vec(
 				if (table->fts
 				    && dict_table_is_fts_column(
 					table->fts->indexes,
-					dict_col_get_no(col))
+					dict_col_get_no(col),
+					dict_col_is_virtual(col))
 					!= ULINT_UNDEFINED) {
 					*fts_col_affected = TRUE;
 				}
@@ -649,25 +673,19 @@ row_ins_cascade_calc_update_vec(
 							&ufield->new_val)));
 
 					if (new_doc_id <= 0) {
-						fprintf(stderr,
-							"InnoDB: FTS Doc ID "
-							"must be larger than "
-							"0 \n");
+						ib::error() << "FTS Doc ID"
+							" must be larger than"
+							" 0";
 						return(ULINT_UNDEFINED);
 					}
 
 					if (new_doc_id < n_doc_id) {
-						fprintf(stderr,
-						       "InnoDB: FTS Doc ID "
-						       "must be larger than "
-						       IB_ID_FMT" for table",
-						       n_doc_id -1);
+						ib::error() << "FTS Doc ID"
+							" must be larger than "
+							<< n_doc_id - 1
+							<< " for table "
+							<< table->name;
 
-						ut_print_name(stderr, trx,
-							      TRUE,
-							      table->name);
-
-						putc('\n', stderr);
 						return(ULINT_UNDEFINED);
 					}
 
@@ -684,26 +702,27 @@ row_ins_cascade_calc_update_vec(
 	if (table->fts && *fts_col_affected) {
 		if (DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
 			doc_id_t	doc_id;
-                        upd_field_t*	ufield;
+			doc_id_t*	next_doc_id;
+			upd_field_t*	ufield;
+
+			next_doc_id = static_cast<doc_id_t*>(mem_heap_alloc(
+				heap, sizeof(doc_id_t)));
 
 			ut_ad(!doc_id_updated);
 			ufield = update->fields + n_fields_updated;
-			fts_get_next_doc_id(table, &trx->fts_next_doc_id);
-			doc_id = fts_update_doc_id(table, ufield,
-						   &trx->fts_next_doc_id);
+			fts_get_next_doc_id(table, next_doc_id);
+			doc_id = fts_update_doc_id(table, ufield, next_doc_id);
 			n_fields_updated++;
-			fts_trx_add_op(trx, table, doc_id, FTS_INSERT, NULL);
+			cascade->fts_next_doc_id = doc_id;
 		} else  {
 			if (doc_id_updated) {
 				ut_ad(new_doc_id);
-				fts_trx_add_op(trx, table, new_doc_id,
-					       FTS_INSERT, NULL);
+				cascade->fts_next_doc_id = new_doc_id;
 			} else {
-				fprintf(stderr, "InnoDB: FTS Doc ID must be "
-					"updated along with FTS indexed "
-					"column for table ");
-				ut_print_name(stderr, trx, TRUE, table->name);
-				putc('\n', stderr);
+				cascade->fts_next_doc_id = FTS_NULL_DOC_ID;
+				ib::error() << "FTS Doc ID must be updated"
+					" along with FTS indexed column for"
+					" table " << table->name;
 				return(ULINT_UNDEFINED);
 			}
 		}
@@ -730,10 +749,9 @@ row_ins_set_detailed(
 	rewind(srv_misc_tmpfile);
 
 	if (os_file_set_eof(srv_misc_tmpfile)) {
-		std::string fk_str;
-		ut_print_name(srv_misc_tmpfile, trx, TRUE,
+		ut_print_name(srv_misc_tmpfile, trx,
 			      foreign->foreign_table_name);
-		fk_str = dict_print_info_on_foreign_key_in_create_format(
+		std::string fk_str = dict_print_info_on_foreign_key_in_create_format(
 			trx, foreign, FALSE);
 		fputs(fk_str.c_str(), srv_misc_tmpfile);
 		trx_set_detailed_error_from_file(trx, srv_misc_tmpfile);
@@ -768,7 +786,7 @@ row_ins_foreign_trx_print(
 	heap_size = mem_heap_get_size(trx->lock.lock_heap);
 	lock_mutex_exit();
 
-	mutex_enter(&trx_sys->mutex);
+	trx_sys_mutex_enter();
 
 	mutex_enter(&dict_foreign_err_mutex);
 	rewind(dict_foreign_err_file);
@@ -778,7 +796,7 @@ row_ins_foreign_trx_print(
 	trx_print_low(dict_foreign_err_file, trx, 600,
 		      n_rec_locks, n_trx_locks, heap_size);
 
-	mutex_exit(&trx_sys->mutex);
+	trx_sys_mutex_exit();
 
 	ut_ad(mutex_own(&dict_foreign_err_mutex));
 }
@@ -814,23 +832,22 @@ row_ins_foreign_report_err(
 	row_ins_foreign_trx_print(trx);
 
 	fputs("Foreign key constraint fails for table ", ef);
-	ut_print_name(ef, trx, TRUE, foreign->foreign_table_name);
+	ut_print_name(ef, trx, foreign->foreign_table_name);
 	fputs(":\n", ef);
 	fk_str = dict_print_info_on_foreign_key_in_create_format(trx, foreign,
 							TRUE);
 	fputs(fk_str.c_str(), ef);
 	putc('\n', ef);
 	fputs(errstr, ef);
-	fputs(" in parent table, in index ", ef);
-	ut_print_name(ef, trx, FALSE, foreign->referenced_index->name);
+	fprintf(ef, " in parent table, in index %s",
+		foreign->referenced_index->name());
 	if (entry) {
 		fputs(" tuple:\n", ef);
 		dtuple_print(ef, entry);
 	}
 	fputs("\nBut in child table ", ef);
-	ut_print_name(ef, trx, TRUE, foreign->foreign_table_name);
-	fputs(", in index ", ef);
-	ut_print_name(ef, trx, FALSE, foreign->foreign_index->name);
+	ut_print_name(ef, trx, foreign->foreign_table_name);
+	fprintf(ef, ", in index %s", foreign->foreign_index->name());
 	if (rec) {
 		fputs(", there is a record:\n", ef);
 		rec_print(ef, rec, foreign->foreign_index);
@@ -871,13 +888,13 @@ row_ins_foreign_report_add_err(
 	row_ins_foreign_trx_print(trx);
 
 	fputs("Foreign key constraint fails for table ", ef);
-	ut_print_name(ef, trx, TRUE, foreign->foreign_table_name);
+	ut_print_name(ef, trx, foreign->foreign_table_name);
 	fputs(":\n", ef);
 	fk_str = dict_print_info_on_foreign_key_in_create_format(trx, foreign,
 							TRUE);
 	fputs(fk_str.c_str(), ef);
-	fputs("\nTrying to add in child table, in index ", ef);
-	ut_print_name(ef, trx, FALSE, foreign->foreign_index->name);
+	fprintf(ef, " in parent table, in index %s",
+		foreign->foreign_index->name());
 	if (entry) {
 		fputs(" tuple:\n", ef);
 		/* TODO: DB_TRX_ID and DB_ROLL_PTR may be uninitialized.
@@ -885,10 +902,10 @@ row_ins_foreign_report_add_err(
 		dtuple_print(ef, entry);
 	}
 	fputs("\nBut in parent table ", ef);
-	ut_print_name(ef, trx, TRUE, foreign->referenced_table_name);
-	fputs(", in index ", ef);
-	ut_print_name(ef, trx, FALSE, foreign->referenced_index->name);
-	fputs(",\nthe closest match we can find is record:\n", ef);
+	ut_print_name(ef, trx, foreign->referenced_table_name);
+	fprintf(ef, ", in index %s,\n"
+		"the closest match we can find is record:\n",
+		foreign->referenced_index->name());
 	if (rec && page_rec_is_supremum(rec)) {
 		/* If the cursor ended on a supremum record, it is better
 		to report the previous record in the error message, so that
@@ -915,21 +932,125 @@ row_ins_invalidate_query_cache(
 	const char*	name)		/*!< in: table name prefixed with
 					database name and a '/' character */
 {
-	char*	buf;
-	char*	ptr;
 	ulint	len = strlen(name) + 1;
-
-	buf = mem_strdupl(name, len);
-
-	ptr = strchr(buf, '/');
-	ut_a(ptr);
-	*ptr = '\0';
-
-	innobase_invalidate_query_cache(thr_get_trx(thr), buf, len);
-	mem_free(buf);
+	innobase_invalidate_query_cache(thr_get_trx(thr), name, len);
 }
+
+#ifdef MYSQL_VIRTUAL_COLUMNS
+
+/** Fill virtual column information in cascade node for the child table.
+@param[out]	cascade		child update node
+@param[in]	rec		clustered rec of child table
+@param[in]	index		clustered index of child table
+@param[in]	node		parent update node
+@param[in]	foreign		foreign key information
+@param[out]	err		error code. */
+static
+void
+row_ins_foreign_fill_virtual(
+	upd_node_t*		cascade,
+	const rec_t*		rec,
+	dict_index_t*		index,
+	upd_node_t*		node,
+	dict_foreign_t*		foreign,
+	dberr_t*		err)
+{
+	THD*		thd = current_thd;
+	row_ext_t*	ext;
+	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
+	rec_offs_init(offsets_);
+	const ulint*	offsets =
+		rec_get_offsets(rec, index, offsets_,
+				ULINT_UNDEFINED, &cascade->heap);
+	mem_heap_t*	v_heap = NULL;
+	upd_t*		update = cascade->update;
+	ulint		n_v_fld = index->table->n_v_def;
+	ulint		n_diff;
+	upd_field_t*	upd_field;
+	dict_vcol_set*	v_cols = foreign->v_cols;
+	update->old_vrow = row_build(
+		ROW_COPY_POINTERS, index, rec,
+		offsets, index->table, NULL, NULL,
+		&ext, cascade->heap);
+	n_diff = update->n_fields;
+
+	update->n_fields += n_v_fld;
+
+	if (index->table->vc_templ == NULL) {
+		/** This can occur when there is a cascading
+		delete or update after restart. */
+		innobase_init_vc_templ(index->table);
+	}
+
+	for (ulint i = 0; i < n_v_fld; i++) {
+
+		dict_v_col_t*     col = dict_table_get_nth_v_col(
+				index->table, i);
+
+		dict_vcol_set::iterator it = v_cols->find(col);
+
+		if (it == v_cols->end()) {
+			continue;
+		}
+
+		dfield_t*	vfield = innobase_get_computed_value(
+				update->old_vrow, col, index,
+				&v_heap, update->heap, NULL, thd, NULL,
+				NULL, NULL, NULL);
+
+		if (vfield == NULL) {
+			*err = DB_COMPUTE_VALUE_FAILED;
+			goto func_exit;
+		}
+
+		upd_field = upd_get_nth_field(update, n_diff);
+
+		upd_field->old_v_val = static_cast<dfield_t*>(
+				mem_heap_alloc(cascade->heap,
+					sizeof *upd_field->old_v_val));
+
+		dfield_copy(upd_field->old_v_val, vfield);
+
+		upd_field_set_v_field_no(upd_field, i, index);
+
+		if (node->is_delete
+		    ? (foreign->type & DICT_FOREIGN_ON_DELETE_SET_NULL)
+		    : (foreign->type & DICT_FOREIGN_ON_UPDATE_SET_NULL)) {
+
+			dfield_set_null(&upd_field->new_val);
+		}
+
+		if (!node->is_delete
+		    && (foreign->type & DICT_FOREIGN_ON_UPDATE_CASCADE)) {
+
+			dfield_t* new_vfield = innobase_get_computed_value(
+					update->old_vrow, col, index,
+					&v_heap, update->heap, NULL, thd,
+					NULL, NULL, node->update, foreign);
+
+			if (new_vfield == NULL) {
+				*err = DB_COMPUTE_VALUE_FAILED;
+				goto func_exit;
+			}
+
+			dfield_copy(&(upd_field->new_val), new_vfield);
+		}
+
+		n_diff++;
+	}
+
+	update->n_fields = n_diff;
+	*err = DB_SUCCESS;
+
+func_exit:
+	if (v_heap) {
+		mem_heap_free(v_heap);
+	}
+}
+#endif /* MYSQL_VIRTUAL_COLUMNS */
+
 #ifdef WITH_WSREP
-dberr_t wsrep_append_foreign_key(trx_t *trx,  
+dberr_t wsrep_append_foreign_key(trx_t *trx,
 			       dict_foreign_t*	foreign,
 			       const rec_t*	clust_rec,
 			       dict_index_t*	clust_index,
@@ -941,7 +1062,7 @@ dberr_t wsrep_append_foreign_key(trx_t *trx,
 Perform referential actions or checks when a parent row is deleted or updated
 and the constraint had an ON DELETE or ON UPDATE condition which was not
 RESTRICT.
-@return	DB_SUCCESS, DB_LOCK_WAIT, or error code */
+@return DB_SUCCESS, DB_LOCK_WAIT, or error code */
 static MY_ATTRIBUTE((nonnull, warn_unused_result))
 dberr_t
 row_ins_foreign_check_on_constraint(
@@ -963,7 +1084,6 @@ row_ins_foreign_check_on_constraint(
 	dict_index_t*	index;
 	dict_index_t*	clust_index;
 	dtuple_t*	ref;
-	mem_heap_t*	upd_vec_heap	= NULL;
 	const rec_t*	rec;
 	const rec_t*	clust_rec;
 	const buf_block_t* clust_block;
@@ -976,6 +1096,7 @@ row_ins_foreign_check_on_constraint(
 	doc_id_t	doc_id = FTS_NULL_DOC_ID;
 	ibool		fts_col_affacted = FALSE;
 
+	DBUG_ENTER("row_ins_foreign_check_on_constraint");
 	ut_a(thr);
 	ut_a(foreign);
 	ut_a(pcur);
@@ -986,10 +1107,10 @@ row_ins_foreign_check_on_constraint(
 	/* Since we are going to delete or update a row, we have to invalidate
 	the MySQL query cache for table. A deadlock of threads is not possible
 	here because the caller of this function does not hold any latches with
-	the sync0sync.h rank above the lock_sys_t::mutex. The query cache mutex
-       	has a rank just above the lock_sys_t::mutex. */
+	the mutex rank above the lock_sys_t::mutex. The query cache mutex
+	has a rank just above the lock_sys_t::mutex. */
 
-	row_ins_invalidate_query_cache(thr, table->name);
+	row_ins_invalidate_query_cache(thr, table->name.m_name);
 
 	node = static_cast<upd_node_t*>(thr->run_node);
 
@@ -1001,7 +1122,7 @@ row_ins_foreign_check_on_constraint(
 					   thr, foreign,
 					   btr_pcur_get_rec(pcur), entry);
 
-		return(DB_ROW_IS_REFERENCED);
+		DBUG_RETURN(DB_ROW_IS_REFERENCED);
 	}
 
 	if (!node->is_delete && 0 == (foreign->type
@@ -1014,26 +1135,20 @@ row_ins_foreign_check_on_constraint(
 					   thr, foreign,
 					   btr_pcur_get_rec(pcur), entry);
 
-		return(DB_ROW_IS_REFERENCED);
+		DBUG_RETURN(DB_ROW_IS_REFERENCED);
 	}
 
-	if (node->cascade_node == NULL) {
-		/* Extend our query graph by creating a child to current
-		update node. The child is used in the cascade or set null
-		operation. */
+	cascade = row_create_update_node_for_mysql(table, node->cascade_heap);
+	que_node_set_parent(cascade, node);
 
-		node->cascade_heap = mem_heap_create(128);
-		node->cascade_node = row_create_update_node_for_mysql(
-			table, node->cascade_heap);
-		que_node_set_parent(node->cascade_node, node);
-	}
-
-	/* Initialize cascade_node to do the operation we want. Note that we
-	use the SAME cascade node to do all foreign key operations of the
-	SQL DELETE: the table of the cascade node may change if there are
-	several child tables to the table where the delete is done! */
-
-	cascade = node->cascade_node;
+	/* For the cascaded operation, all the update nodes are allocated in
+	the same heap.  All the update nodes will point to the same heap.
+	This heap is owned by the first update node. And it must be freed
+	only in the first update node */
+	cascade->cascade_heap = node->cascade_heap;
+	cascade->cascade_upd_nodes = node->cascade_upd_nodes;
+	cascade->new_upd_nodes = node->new_upd_nodes;
+	cascade->processed_cascades = node->processed_cascades;
 
 	cascade->table = table;
 
@@ -1079,8 +1194,8 @@ row_ins_foreign_check_on_constraint(
 		goto nonstandard_exit_func;
 	}
 
-	if (row_ins_cascade_n_ancestors(cascade) >= 15) {
-		err = DB_ROW_IS_REFERENCED;
+	if (row_ins_cascade_n_ancestors(cascade) >= FK_MAX_CASCADE_DEL) {
+		err = DB_FOREIGN_EXCEED_MAX_CASCADE;
 
 		row_ins_foreign_report_err(
 			"Trying a too deep cascaded delete or update\n",
@@ -1123,12 +1238,11 @@ row_ins_foreign_check_on_constraint(
 		    || btr_pcur_get_low_match(cascade->pcur)
 		    < dict_index_get_n_unique(clust_index)) {
 
-			fputs("InnoDB: error in cascade of a foreign key op\n"
-			      "InnoDB: ", stderr);
-			dict_index_name_print(stderr, trx, index);
+			ib::error() << "In cascade of a foreign key op index "
+				<< index->name
+				<< " of table " << index->table->name;
 
-			fputs("\n"
-			      "InnoDB: record ", stderr);
+			fputs("InnoDB: record ", stderr);
 			rec_print(stderr, rec, index);
 			fputs("\n"
 			      "InnoDB: clustered record ", stderr);
@@ -1171,8 +1285,10 @@ row_ins_foreign_check_on_constraint(
 		goto nonstandard_exit_func;
 	}
 
+
 	if (table->fts) {
-		doc_id = fts_get_doc_id_from_rec(table, clust_rec, tmp_heap);
+		doc_id = fts_get_doc_id_from_rec(table, clust_rec,
+						 clust_index, tmp_heap);
 	}
 
 	if (node->is_delete
@@ -1181,6 +1297,14 @@ row_ins_foreign_check_on_constraint(
 
 		/* Build the appropriate update vector which sets
 		foreign->n_fields first fields in rec to SQL NULL */
+		if (table->fts) {
+
+			/* For the clause ON DELETE SET NULL, the cascade
+			operation is actually an update operation with the new
+			values being null.  For FTS, this means that the old
+			values be deleted and no new values to be added.*/
+			cascade->fts_next_doc_id = FTS_NULL_DOC_ID;
+		}
 
 		update = cascade->update;
 
@@ -1191,38 +1315,61 @@ row_ins_foreign_check_on_constraint(
 
 		for (i = 0; i < foreign->n_fields; i++) {
 			upd_field_t*	ufield = &update->fields[i];
+			ulint		col_no = dict_index_get_nth_col_no(
+						index, i);
+			ulint		prefix_col;
 
 			ufield->field_no = dict_table_get_nth_col_pos(
-				table,
-				dict_index_get_nth_col_no(index, i));
+				table, col_no, &prefix_col);
+			dict_col_t*	col = dict_table_get_nth_col(
+				table, col_no);
+			dict_col_copy_type(col, dfield_get_type(&ufield->new_val));
+
 			ufield->orig_len = 0;
 			ufield->exp = NULL;
 			dfield_set_null(&ufield->new_val);
 
 			if (table->fts && dict_table_is_fts_column(
 				table->fts->indexes,
-				dict_index_get_nth_col_no(index, i))
-				!= ULINT_UNDEFINED) {
+				dict_index_get_nth_col_no(index, i),
+				dict_col_is_virtual(
+					dict_index_get_nth_col(index, i)))
+			    != ULINT_UNDEFINED) {
 				fts_col_affacted = TRUE;
 			}
 		}
 
 		if (fts_col_affacted) {
-			fts_trx_add_op(trx, table, doc_id, FTS_DELETE, NULL);
+			cascade->fts_doc_id = doc_id;
 		}
+
+#ifdef MYSQL_VIRTUAL_COLUMNS
+		if (foreign->v_cols != NULL
+		    && foreign->v_cols->size() > 0) {
+			row_ins_foreign_fill_virtual(
+				cascade, clust_rec, clust_index,
+				node, foreign, &err);
+
+			if (err != DB_SUCCESS) {
+				goto nonstandard_exit_func;
+			}
+		}
+#endif /* MYSQL_VIRTUAL_COLUMNS */
 	} else if (table->fts && cascade->is_delete) {
 		/* DICT_FOREIGN_ON_DELETE_CASCADE case */
 		for (i = 0; i < foreign->n_fields; i++) {
 			if (table->fts && dict_table_is_fts_column(
 				table->fts->indexes,
-				dict_index_get_nth_col_no(index, i))
-				!= ULINT_UNDEFINED) {
+				dict_index_get_nth_col_no(index, i),
+				dict_col_is_virtual(
+					dict_index_get_nth_col(index, i)))
+			    != ULINT_UNDEFINED) {
 				fts_col_affacted = TRUE;
 			}
 		}
 
 		if (fts_col_affacted) {
-			fts_trx_add_op(trx, table, doc_id, FTS_DELETE, NULL);
+			cascade->fts_doc_id = doc_id;
 		}
 	}
 
@@ -1232,10 +1379,23 @@ row_ins_foreign_check_on_constraint(
 		/* Build the appropriate update vector which sets changing
 		foreign->n_fields first fields in rec to new values */
 
-		upd_vec_heap = mem_heap_create(256);
-
 		n_to_update = row_ins_cascade_calc_update_vec(
-			node, foreign, upd_vec_heap, trx, &fts_col_affacted);
+			node, foreign, cascade->cascade_heap,
+			trx, &fts_col_affacted, cascade);
+
+
+#ifdef MYSQL_VIRTUAL_COLUMNS
+		if (foreign->v_cols != NULL
+		    && foreign->v_cols->size() > 0) {
+			row_ins_foreign_fill_virtual(
+				cascade, clust_rec, clust_index,
+				node, foreign, &err);
+
+			if (err != DB_SUCCESS) {
+				goto nonstandard_exit_func;
+			}
+		}
+#endif /* MYSQL_VIRTUAL_COLUMNS */
 
 		if (n_to_update == ULINT_UNDEFINED) {
 			err = DB_ROW_IS_REFERENCED;
@@ -1266,7 +1426,7 @@ row_ins_foreign_check_on_constraint(
 		/* Mark the old Doc ID as deleted */
 		if (fts_col_affacted) {
 			ut_ad(table->fts);
-			fts_trx_add_op(trx, table, doc_id, FTS_DELETE, NULL);
+			cascade->fts_doc_id = doc_id;
 		}
 	}
 
@@ -1299,16 +1459,11 @@ row_ins_foreign_check_on_constraint(
 			"WSREP: foreign key append failed: %d\n", err);
 	} else
 #endif /* WITH_WSREP */
-		err = row_update_cascade_for_mysql(thr, cascade,
-					   foreign->foreign_table);
+	node->new_upd_nodes->push_back(cascade);
 
-	if (foreign->foreign_table->n_foreign_key_checks_running == 0) {
-		fprintf(stderr,
-			"InnoDB: error: table %s has the counter 0"
-			" though there is\n"
-			"InnoDB: a FOREIGN KEY check running on it.\n",
-			foreign->foreign_table->name);
-	}
+	os_atomic_increment_ulint(&table->n_foreign_key_checks_running, 1);
+
+	ut_ad(foreign->foreign_table->n_foreign_key_checks_running > 0);
 
 	/* Release the data dictionary latch for a while, so that we do not
 	starve other threads from doing CREATE TABLE etc. if we have a huge
@@ -1322,7 +1477,7 @@ row_ins_foreign_check_on_constraint(
 
 	row_mysql_freeze_data_dictionary(thr_get_trx(thr));
 
-	mtr_start_trx(mtr, trx);
+	mtr_start(mtr);
 
 	/* Restore pcur position */
 
@@ -1332,35 +1487,29 @@ row_ins_foreign_check_on_constraint(
 		mem_heap_free(tmp_heap);
 	}
 
-	if (upd_vec_heap) {
-		mem_heap_free(upd_vec_heap);
-	}
-
-	return(err);
+	DBUG_RETURN(err);
 
 nonstandard_exit_func:
+	que_graph_free_recursive(cascade);
+
 	if (tmp_heap) {
 		mem_heap_free(tmp_heap);
 	}
 
-	if (upd_vec_heap) {
-		mem_heap_free(upd_vec_heap);
-	}
-
 	btr_pcur_store_position(pcur, mtr);
 
 	mtr_commit(mtr);
-	mtr_start_trx(mtr, trx);
+	mtr_start(mtr);
 
 	btr_pcur_restore_position(BTR_SEARCH_LEAF, pcur, mtr);
 
-	return(err);
+	DBUG_RETURN(err);
 }
 
 /*********************************************************************//**
 Sets a shared lock on a record. Used in locking possible duplicate key
 records and also in checking foreign key constraints.
-@return	DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */
+@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */
 static
 dberr_t
 row_ins_set_shared_rec_lock(
@@ -1391,7 +1540,7 @@ row_ins_set_shared_rec_lock(
 /*********************************************************************//**
 Sets a exclusive lock on a record. Used in locking possible duplicate key
 records
-@return	DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */
+@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */
 static
 dberr_t
 row_ins_set_exclusive_rec_lock(
@@ -1419,12 +1568,22 @@ row_ins_set_exclusive_rec_lock(
 	return(err);
 }
 
+/* Decrement a counter in the destructor. */
+class ib_dec_in_dtor {
+public:
+	ib_dec_in_dtor(ulint& c): counter(c) {}
+	~ib_dec_in_dtor() {
+		os_atomic_decrement_ulint(&counter, 1);
+	}
+private:
+	ulint&		counter;
+};
+
 /***************************************************************//**
 Checks if foreign key constraint fails for an index entry. Sets shared locks
 which lock either the success or the failure of the constraint. NOTE that
 the caller must have a shared latch on dict_operation_lock.
-@return	DB_SUCCESS, DB_NO_REFERENCED_ROW, or DB_ROW_IS_REFERENCED */
-UNIV_INTERN
+@return DB_SUCCESS, DB_NO_REFERENCED_ROW, or DB_ROW_IS_REFERENCED */
 dberr_t
 row_ins_check_foreign_constraint(
 /*=============================*/
@@ -1446,18 +1605,17 @@ row_ins_check_foreign_constraint(
 	ulint		n_fields_cmp;
 	btr_pcur_t	pcur;
 	int		cmp;
-	ulint		i;
 	mtr_t		mtr;
 	trx_t*		trx		= thr_get_trx(thr);
 	mem_heap_t*	heap		= NULL;
 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
 	ulint*		offsets		= offsets_;
+
+	DBUG_ENTER("row_ins_check_foreign_constraint");
+
 	rec_offs_init(offsets_);
 
-run_again:
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_S));
 
 	err = DB_SUCCESS;
 
@@ -1470,11 +1628,8 @@ run_again:
 	/* If any of the foreign key fields in entry is SQL NULL, we
 	suppress the foreign key check: this is compatible with Oracle,
 	for example */
-
-	for (i = 0; i < foreign->n_fields; i++) {
-		if (UNIV_SQL_NULL == dfield_get_len(
-			    dtuple_get_nth_field(entry, i))) {
-
+	for (ulint i = 0; i < foreign->n_fields; i++) {
+		if (dfield_is_null(dtuple_get_nth_field(entry, i))) {
 			goto exit_func;
 		}
 	}
@@ -1524,19 +1679,17 @@ run_again:
 			row_ins_foreign_trx_print(trx);
 
 			fputs("Foreign key constraint fails for table ", ef);
-			ut_print_name(ef, trx, TRUE,
+			ut_print_name(ef, trx,
 				      foreign->foreign_table_name);
 			fputs(":\n", ef);
 			fk_str = dict_print_info_on_foreign_key_in_create_format(
 				trx, foreign, TRUE);
 			fputs(fk_str.c_str(), ef);
-			fputs("\nTrying to add to index ", ef);
-			ut_print_name(ef, trx, FALSE,
-				      foreign->foreign_index->name);
-			fputs(" tuple:\n", ef);
+			fprintf(ef, "\nTrying to add to index %s tuple:\n",
+				foreign->foreign_index->name());
 			dtuple_print(ef, entry);
 			fputs("\nBut the parent table ", ef);
-			ut_print_name(ef, trx, TRUE,
+			ut_print_name(ef, trx,
 				      foreign->referenced_table_name);
 			fputs("\nor its .ibd file does"
 			      " not currently exist!\n", ef);
@@ -1560,7 +1713,7 @@ run_again:
 		}
 	}
 
-	mtr_start_trx(&mtr, trx);
+	mtr_start(&mtr);
 
 	/* Store old value on n_fields_cmp */
 
@@ -1726,55 +1879,43 @@ end_scan:
 
 do_possible_lock_wait:
 	if (err == DB_LOCK_WAIT) {
-		bool		verified = false;
+		/* An object that will correctly decrement the FK check counter
+		when it goes out of this scope. */
+		ib_dec_in_dtor	dec(check_table->n_foreign_key_checks_running);
 
 		trx->error_state = err;
 
 		que_thr_stop_for_mysql(thr);
 
+		thr->lock_state = QUE_THR_LOCK_ROW;
+
+		/* To avoid check_table being dropped, increment counter */
+		os_atomic_increment_ulint(
+			&check_table->n_foreign_key_checks_running, 1);
+
 		lock_wait_suspend_thread(thr);
 
+		thr->lock_state = QUE_THR_LOCK_NOLOCK;
+
+		DBUG_PRINT("to_be_dropped",
+			   ("table: %s", check_table->name.m_name));
 		if (check_table->to_be_dropped) {
 			/* The table is being dropped. We shall timeout
 			this operation */
 			err = DB_LOCK_WAIT_TIMEOUT;
+
 			goto exit_func;
 		}
 
-		/* We had temporarily released dict_operation_lock in
-		above lock sleep wait, now we have the lock again, and
-		we will need to re-check whether the foreign key has been
-		dropped. We only need to verify if the table is referenced
-		table case (check_ref == 0), since MDL lock will prevent
-		concurrent DDL and DML on the same table */
-		if (!check_ref) {
-			for (dict_foreign_set::iterator it
-				= table->referenced_set.begin();
-			     it != table->referenced_set.end();
-			     ++it) {
-				if (*it == foreign) {
-					verified = true;
-					break;
-				}
-			}
-		} else {
-			verified = true;
-		}
-
-		if (!verified) {
-			err = DB_DICT_CHANGED;
-		} else if (trx->error_state == DB_SUCCESS) {
-			goto run_again;
-		} else {
-			err = trx->error_state;
-		}
 	}
 
+
 exit_func:
-	if (UNIV_LIKELY_NULL(heap)) {
+	if (heap != NULL) {
 		mem_heap_free(heap);
 	}
-	return(err);
+
+	DBUG_RETURN(err);
 }
 
 /***************************************************************//**
@@ -1783,7 +1924,7 @@ is not mentioned in any constraint, this function does nothing,
 Otherwise does searches to the indexes of referenced tables and
 sets shared locks which lock either the success or the failure of
 a constraint.
-@return	DB_SUCCESS or error code */
+@return DB_SUCCESS or error code */
 static MY_ATTRIBUTE((nonnull, warn_unused_result))
 dberr_t
 row_ins_check_foreign_constraints(
@@ -1811,7 +1952,6 @@ row_ins_check_foreign_constraints(
 
 		if (foreign->foreign_index == index) {
 			dict_table_t*	ref_table = NULL;
-			dict_table_t*	foreign_table = foreign->foreign_table;
 			dict_table_t*	referenced_table
 						= foreign->referenced_table;
 
@@ -1828,12 +1968,6 @@ row_ins_check_foreign_constraints(
 				row_mysql_freeze_data_dictionary(trx);
 			}
 
-			if (referenced_table) {
-				os_inc_counter(dict_sys->mutex,
-					       foreign_table
-					       ->n_foreign_key_checks_running);
-			}
-
 			/* NOTE that if the thread ends up waiting for a lock
 			we will release dict_operation_lock temporarily!
 			But the counter on the table protects the referenced
@@ -1842,15 +1976,6 @@ row_ins_check_foreign_constraints(
 			err = row_ins_check_foreign_constraint(
 				TRUE, foreign, table, entry, thr);
 
-			DBUG_EXECUTE_IF("row_ins_dict_change_err",
-					err = DB_DICT_CHANGED;);
-
-			if (referenced_table) {
-				os_dec_counter(dict_sys->mutex,
-					       foreign_table
-					       ->n_foreign_key_checks_running);
-			}
-
 			if (got_s_lock) {
 				row_mysql_unfreeze_data_dictionary(trx);
 			}
@@ -1872,7 +1997,7 @@ row_ins_check_foreign_constraints(
 /***************************************************************//**
 Checks if a unique key violation to rec would occur at the index entry
 insert.
-@return	TRUE if error */
+@return TRUE if error */
 static
 ibool
 row_ins_dupl_error_with_rec(
@@ -1885,7 +2010,6 @@ row_ins_dupl_error_with_rec(
 	const ulint*	offsets)/*!< in: rec_get_offsets(rec, index) */
 {
 	ulint	matched_fields;
-	ulint	matched_bytes;
 	ulint	n_unique;
 	ulint	i;
 
@@ -1894,10 +2018,8 @@ row_ins_dupl_error_with_rec(
 	n_unique = dict_index_get_n_unique(index);
 
 	matched_fields = 0;
-	matched_bytes = 0;
 
-	cmp_dtuple_rec_with_match(entry, rec, offsets,
-				  &matched_fields, &matched_bytes);
+	cmp_dtuple_rec_with_match(entry, rec, offsets, &matched_fields);
 
 	if (matched_fields < n_unique) {
 
@@ -1907,7 +2029,7 @@ row_ins_dupl_error_with_rec(
 	/* In a unique secondary index we allow equal key values if they
 	contain SQL NULLs */
 
-	if (!dict_index_is_clust(index)) {
+	if (!dict_index_is_clust(index) && !index->nulls_equal) {
 
 		for (i = 0; i < n_unique; i++) {
 			if (dfield_is_null(dtuple_get_nth_field(entry, i))) {
@@ -1924,7 +2046,7 @@ row_ins_dupl_error_with_rec(
 Scans a unique non-clustered index at a given index entry to determine
 whether a uniqueness violation has occurred for the key value of the entry.
 Set shared locks on possible duplicate records.
-@return	DB_SUCCESS, DB_DUPLICATE_KEY, or DB_LOCK_WAIT */
+@return DB_SUCCESS, DB_DUPLICATE_KEY, or DB_LOCK_WAIT */
 static MY_ATTRIBUTE((nonnull, warn_unused_result))
 dberr_t
 row_ins_scan_sec_index_for_duplicate(
@@ -1945,10 +2067,11 @@ row_ins_scan_sec_index_for_duplicate(
 	dberr_t		err		= DB_SUCCESS;
 	ulint		allow_duplicates;
 	ulint*		offsets		= NULL;
+	DBUG_ENTER("row_ins_scan_sec_index_for_duplicate");
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(s_latch == rw_lock_own(&index->lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
+
+	ut_ad(s_latch == rw_lock_own_flagged(
+			&index->lock, RW_LOCK_FLAG_S | RW_LOCK_FLAG_SX));
 
 	n_unique = dict_index_get_n_unique(index);
 
@@ -1956,11 +2079,13 @@ row_ins_scan_sec_index_for_duplicate(
 	n_unique first fields is NULL, a unique key violation cannot occur,
 	since we define NULL != NULL in this case */
 
-	for (ulint i = 0; i < n_unique; i++) {
-		if (UNIV_SQL_NULL == dfield_get_len(
-			    dtuple_get_nth_field(entry, i))) {
+	if (!index->nulls_equal) {
+		for (ulint i = 0; i < n_unique; i++) {
+			if (UNIV_SQL_NULL == dfield_get_len(
+					dtuple_get_nth_field(entry, i))) {
 
-			return(DB_SUCCESS);
+				DBUG_RETURN(DB_SUCCESS);
+			}
 		}
 	}
 
@@ -2027,7 +2152,7 @@ row_ins_scan_sec_index_for_duplicate(
 
 		cmp = cmp_dtuple_rec(entry, rec, offsets);
 
-		if (cmp == 0) {
+		if (cmp == 0 && !index->allow_duplicates) {
 			if (row_ins_dupl_error_with_rec(rec, entry,
 							index, offsets)) {
 				err = DB_DUPLICATE_KEY;
@@ -2036,21 +2161,20 @@ row_ins_scan_sec_index_for_duplicate(
 
 				/* If the duplicate is on hidden FTS_DOC_ID,
 				state so in the error log */
-				if (DICT_TF2_FLAG_IS_SET(
+				if (index == index->table->fts_doc_id_index
+				    && DICT_TF2_FLAG_IS_SET(
 					index->table,
-					DICT_TF2_FTS_HAS_DOC_ID)
-				    && strcmp(index->name,
-					      FTS_DOC_ID_INDEX_NAME) == 0) {
-					ib_logf(IB_LOG_LEVEL_ERROR,
-						"Duplicate FTS_DOC_ID value"
-						" on table %s",
-						index->table->name);
+					DICT_TF2_FTS_HAS_DOC_ID)) {
+
+					ib::error() << "Duplicate FTS_DOC_ID"
+						" value on table "
+						<< index->table->name;
 				}
 
 				goto end_scan;
 			}
 		} else {
-			ut_a(cmp < 0);
+			ut_a(cmp < 0 || index->allow_duplicates);
 			goto end_scan;
 		}
 	} while (btr_pcur_move_to_next(&pcur, mtr));
@@ -2059,14 +2183,14 @@ end_scan:
 	/* Restore old value */
 	dtuple_set_n_fields_cmp(entry, n_fields_cmp);
 
-	return(err);
+	DBUG_RETURN(err);
 }
 
 /** Checks for a duplicate when the table is being rebuilt online.
-@retval DB_SUCCESS		when no duplicate is detected
-@retval DB_SUCCESS_LOCKED_REC	when rec is an exact match of entry or
+@retval DB_SUCCESS when no duplicate is detected
+@retval DB_SUCCESS_LOCKED_REC when rec is an exact match of entry or
 a newer version of entry (the entry should not be inserted)
-@retval DB_DUPLICATE_KEY	when entry is a duplicate of rec */
+@retval DB_DUPLICATE_KEY when entry is a duplicate of rec */
 static MY_ATTRIBUTE((nonnull, warn_unused_result))
 dberr_t
 row_ins_duplicate_online(
@@ -2077,7 +2201,6 @@ row_ins_duplicate_online(
 	ulint*		offsets)/*!< in/out: rec_get_offsets(rec) */
 {
 	ulint	fields	= 0;
-	ulint	bytes	= 0;
 
 	/* During rebuild, there should not be any delete-marked rows
 	in the new table. */
@@ -2087,7 +2210,7 @@ row_ins_duplicate_online(
 	/* Compare the PRIMARY KEY fields and the
 	DB_TRX_ID, DB_ROLL_PTR. */
 	cmp_dtuple_rec_with_match_low(
-		entry, rec, offsets, n_uniq + 2, &fields, &bytes);
+		entry, rec, offsets, n_uniq + 2, &fields);
 
 	if (fields < n_uniq) {
 		/* Not a duplicate. */
@@ -2096,7 +2219,6 @@ row_ins_duplicate_online(
 
 	if (fields == n_uniq + 2) {
 		/* rec is an exact match of entry. */
-		ut_ad(bytes == 0);
 		return(DB_SUCCESS_LOCKED_REC);
 	}
 
@@ -2104,10 +2226,10 @@ row_ins_duplicate_online(
 }
 
 /** Checks for a duplicate when the table is being rebuilt online.
-@retval DB_SUCCESS		when no duplicate is detected
-@retval DB_SUCCESS_LOCKED_REC	when rec is an exact match of entry or
+@retval DB_SUCCESS when no duplicate is detected
+@retval DB_SUCCESS_LOCKED_REC when rec is an exact match of entry or
 a newer version of entry (the entry should not be inserted)
-@retval DB_DUPLICATE_KEY	when entry is a duplicate of rec */
+@retval DB_DUPLICATE_KEY when entry is a duplicate of rec */
 static MY_ATTRIBUTE((nonnull, warn_unused_result))
 dberr_t
 row_ins_duplicate_error_in_clust_online(
@@ -2197,12 +2319,21 @@ row_ins_duplicate_error_in_clust(
 			offsets = rec_get_offsets(rec, cursor->index, offsets,
 						  ULINT_UNDEFINED, &heap);
 
+			ulint lock_type;
+
+			lock_type =
+				trx->isolation_level <= TRX_ISO_READ_COMMITTED
+				? LOCK_REC_NOT_GAP : LOCK_ORDINARY;
+
 			/* We set a lock on the possible duplicate: this
 			is needed in logical logging of MySQL to make
 			sure that in roll-forward we get the same duplicate
 			errors as in original execution */
 
-			if (trx->duplicates) {
+			if (flags & BTR_NO_LOCKING_FLAG) {
+				/* Do nothing if no-locking is set */
+				err = DB_SUCCESS;
+			} else if (trx->duplicates) {
 
 				/* If the SQL-query will update or replace
 				duplicate key we will take X-lock for
@@ -2210,13 +2341,13 @@ row_ins_duplicate_error_in_clust(
 				INSERT ON DUPLICATE KEY UPDATE). */
 
 				err = row_ins_set_exclusive_rec_lock(
-					LOCK_REC_NOT_GAP,
+					lock_type,
 					btr_cur_get_block(cursor),
 					rec, cursor->index, offsets, thr);
 			} else {
 
 				err = row_ins_set_shared_rec_lock(
-					LOCK_REC_NOT_GAP,
+					lock_type,
 					btr_cur_get_block(cursor), rec,
 					cursor->index, offsets, thr);
 			}
@@ -2332,7 +2463,6 @@ the delete marked record.
 @retval DB_LOCK_WAIT on lock wait when !(flags & BTR_NO_LOCKING_FLAG)
 @retval DB_FAIL if retry with BTR_MODIFY_TREE is needed
 @return error code */
-UNIV_INTERN
 dberr_t
 row_ins_clust_index_entry_low(
 /*==========================*/
@@ -2344,57 +2474,84 @@ row_ins_clust_index_entry_low(
 	ulint		n_uniq,	/*!< in: 0 or index->n_uniq */
 	dtuple_t*	entry,	/*!< in/out: index entry to insert */
 	ulint		n_ext,	/*!< in: number of externally stored columns */
-	que_thr_t*	thr)	/*!< in: query thread */
+	que_thr_t*	thr,	/*!< in: query thread */
+	bool		dup_chk_only)
+				/*!< in: if true, just do duplicate check
+				and return. don't execute actual insert. */
 {
-	btr_cur_t	cursor;
-	ulint*		offsets		= NULL;
+	btr_pcur_t	pcur;
+	btr_cur_t*	cursor;
 	dberr_t		err		= DB_SUCCESS;
 	big_rec_t*	big_rec		= NULL;
 	mtr_t		mtr;
 	mem_heap_t*	offsets_heap	= NULL;
+	ulint           offsets_[REC_OFFS_NORMAL_SIZE];
+	ulint*          offsets         = offsets_;
+	rec_offs_init(offsets_);
+
+	DBUG_ENTER("row_ins_clust_index_entry_low");
 
 	ut_ad(dict_index_is_clust(index));
 	ut_ad(!dict_index_is_unique(index)
 	      || n_uniq == dict_index_get_n_unique(index));
 	ut_ad(!n_uniq || n_uniq == dict_index_get_n_unique(index));
+	ut_ad(!thr_get_trx(thr)->in_rollback);
 
-	mtr_start_trx(&mtr, thr_get_trx(thr));
+	mtr_start(&mtr);
+	mtr.set_named_space(index->space);
+
+	if (dict_table_is_temporary(index->table)) {
+		/* Disable REDO logging as the lifetime of temp-tables is
+		limited to server or connection lifetime and so REDO
+		information is not needed on restart for recovery.
+		Disable locking as temp-tables are local to a connection. */
+
+		ut_ad(flags & BTR_NO_LOCKING_FLAG);
+		ut_ad(!dict_table_is_intrinsic(index->table)
+		      || (flags & BTR_NO_UNDO_LOG_FLAG));
+
+		mtr.set_log_mode(MTR_LOG_NO_REDO);
+	}
 
 	if (mode == BTR_MODIFY_LEAF && dict_index_is_online_ddl(index)) {
 		mode = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED;
 		mtr_s_lock(dict_index_get_lock(index), &mtr);
 	}
 
-	cursor.thr = thr;
-
 	/* Note that we use PAGE_CUR_LE as the search mode, because then
 	the function will return in both low_match and up_match of the
 	cursor sensible values */
+	btr_pcur_open(index, entry, PAGE_CUR_LE, mode, &pcur, &mtr);
+	cursor = btr_pcur_get_btr_cur(&pcur);
 
-	err = btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE, mode,
-				    &cursor, 0, __FILE__, __LINE__, &mtr);
-
-	if (err != DB_SUCCESS) {
-		index->table->is_encrypted = true;
-		index->table->ibd_file_missing = true;
-		mtr_commit(&mtr);
-		goto func_exit;
+	if (cursor) {
+		cursor->thr = thr;
 	}
 
+	ut_ad(!dict_table_is_intrinsic(index->table)
+	      || cursor->page_cur.block->made_dirty_with_no_latch);
+
 #ifdef UNIV_DEBUG
 	{
-		page_t*	page = btr_cur_get_page(&cursor);
+		page_t*	page = btr_cur_get_page(cursor);
 		rec_t*	first_rec = page_rec_get_next(
 			page_get_infimum_rec(page));
 
 		ut_ad(page_rec_is_supremum(first_rec)
-		      || rec_get_n_fields(first_rec, index)
-		      == dtuple_get_n_fields(entry));
+		      || rec_n_fields_is_sane(index, first_rec, entry));
 	}
-#endif
+#endif /* UNIV_DEBUG */
 
-	if (n_uniq && (cursor.up_match >= n_uniq
-		       || cursor.low_match >= n_uniq)) {
+	/* Allowing duplicates in clustered index is currently enabled
+	only for intrinsic table and caller understand the limited
+	operation that can be done in this case. */
+	ut_ad(!index->allow_duplicates
+	      || (index->allow_duplicates
+		  && dict_table_is_intrinsic(index->table)));
+
+	if (!index->allow_duplicates
+	    && n_uniq
+	    && (cursor->up_match >= n_uniq || cursor->low_match >= n_uniq)) {
 
 		if (flags
 		    == (BTR_CREATE_FLAG | BTR_NO_LOCKING_FLAG
@@ -2402,7 +2559,7 @@ row_ins_clust_index_entry_low(
 			/* Set no locks when applying log
 			in online table rebuild. Only check for duplicates. */
 			err = row_ins_duplicate_error_in_clust_online(
-				n_uniq, entry, &cursor,
+				n_uniq, entry, cursor,
 				&offsets, &offsets_heap);
 
 			switch (err) {
@@ -2413,14 +2570,14 @@ row_ins_clust_index_entry_low(
 				/* fall through */
 			case DB_SUCCESS_LOCKED_REC:
 			case DB_DUPLICATE_KEY:
-				thr_get_trx(thr)->error_info = cursor.index;
+				thr_get_trx(thr)->error_info = cursor->index;
 			}
 		} else {
 			/* Note that the following may return also
 			DB_LOCK_WAIT */
 
 			err = row_ins_duplicate_error_in_clust(
-				flags, &cursor, entry, thr, &mtr);
+				flags, cursor, entry, thr, &mtr);
 		}
 
 		if (err != DB_SUCCESS) {
@@ -2430,77 +2587,33 @@ err_exit:
 		}
 	}
 
-	if (row_ins_must_modify_rec(&cursor)) {
+	if (dup_chk_only) {
+		mtr_commit(&mtr);
+		goto func_exit;
+	}
+
+	/* Note: Allowing duplicates would qualify for modification of
+	an existing record as the new entry is exactly same as old entry.
+	Avoid this check if allow duplicates is enabled. */
+	if (!index->allow_duplicates && row_ins_must_modify_rec(cursor)) {
 		/* There is already an index entry with a long enough common
 		prefix, we must convert the insert into a modify of an
 		existing record */
 		mem_heap_t*	entry_heap	= mem_heap_create(1024);
 
+		/* If the existing record is being modified and the new record
+		doesn't fit the provided slot then existing record is added
+		to free list and new record is inserted. This also means
+		cursor that we have cached for SELECT is now invalid. */
+		index->last_sel_cur->invalid = true;
+
 		err = row_ins_clust_index_entry_by_modify(
-			flags, mode, &cursor, &offsets, &offsets_heap,
-			entry_heap, &big_rec, entry, thr, &mtr);
-
-		rec_t*		rec		= btr_cur_get_rec(&cursor);
-
-		if (big_rec) {
-			ut_a(err == DB_SUCCESS);
-			/* Write out the externally stored
-			columns while still x-latching
-			index->lock and block->lock. Allocate
-			pages for big_rec in the mtr that
-			modified the B-tree, but be sure to skip
-			any pages that were freed in mtr. We will
-			write out the big_rec pages before
-			committing the B-tree mini-transaction. If
-			the system crashes so that crash recovery
-			will not replay the mtr_commit(&mtr), the
-			big_rec pages will be left orphaned until
-			the pages are allocated for something else.
-
-			TODO: If the allocation extends the
-			tablespace, it will not be redo
-			logged, in either mini-transaction.
-			Tablespace extension should be
-			redo-logged in the big_rec
-			mini-transaction, so that recovery
-			will not fail when the big_rec was
-			written to the extended portion of the
-			file, in case the file was somehow
-			truncated in the crash. */
-
-			DEBUG_SYNC_C_IF_THD(
-				thr_get_trx(thr)->mysql_thd,
-				"before_row_ins_upd_extern");
-			err = btr_store_big_rec_extern_fields(
-				index, btr_cur_get_block(&cursor),
-				rec, offsets, big_rec, &mtr,
-				BTR_STORE_INSERT_UPDATE);
-			DEBUG_SYNC_C_IF_THD(
-				thr_get_trx(thr)->mysql_thd,
-				"after_row_ins_upd_extern");
-			/* If writing big_rec fails (for
-			example, because of DB_OUT_OF_FILE_SPACE),
-			the record will be corrupted. Even if
-			we did not update any externally
-			stored columns, our update could cause
-			the record to grow so that a
-			non-updated column was selected for
-			external storage. This non-update
-			would not have been written to the
-			undo log, and thus the record cannot
-			be rolled back.
-
-			However, because we have not executed
-			mtr_commit(mtr) yet, the update will
-			not be replayed in crash recovery, and
-			the following assertion failure will
-			effectively "roll back" the operation. */
-			ut_a(err == DB_SUCCESS);
-			dtuple_big_rec_free(big_rec);
-		}
+			&pcur, flags, mode, &offsets, &offsets_heap,
+			entry_heap, entry, thr, &mtr);
 
 		if (err == DB_SUCCESS && dict_index_is_online_ddl(index)) {
-			row_log_table_insert(rec, index, offsets);
+			row_log_table_insert(btr_cur_get_rec(cursor), entry,
+					     index, offsets);
 		}
 
 		mtr_commit(&mtr);
@@ -2512,7 +2625,7 @@ err_exit:
 			ut_ad((mode & ~BTR_ALREADY_S_LATCHED)
 			      == BTR_MODIFY_LEAF);
 			err = btr_cur_optimistic_insert(
-				flags, &cursor, &offsets, &offsets_heap,
+				flags, cursor, &offsets, &offsets_heap,
 				entry, &insert_rec, &big_rec,
 				n_ext, thr, &mtr);
 		} else {
@@ -2522,22 +2635,24 @@ err_exit:
 				goto err_exit;
 			}
 
+			DEBUG_SYNC_C("before_insert_pessimitic_row_ins_clust");
+
 			err = btr_cur_optimistic_insert(
-				flags, &cursor,
+				flags, cursor,
 				&offsets, &offsets_heap,
 				entry, &insert_rec, &big_rec,
 				n_ext, thr, &mtr);
 
 			if (err == DB_FAIL) {
 				err = btr_cur_pessimistic_insert(
-					flags, &cursor,
+					flags, cursor,
 					&offsets, &offsets_heap,
 					entry, &insert_rec, &big_rec,
 					n_ext, thr, &mtr);
 			}
 		}
 
-		if (UNIV_LIKELY_NULL(big_rec)) {
+		if (big_rec != NULL) {
 			mtr_commit(&mtr);
 
 			/* Online table rebuild could read (and
@@ -2558,7 +2673,7 @@ err_exit:
 			if (err == DB_SUCCESS
 			    && dict_index_is_online_ddl(index)) {
 				row_log_table_insert(
-					insert_rec, index, offsets);
+					insert_rec, entry, index, offsets);
 			}
 
 			mtr_commit(&mtr);
@@ -2566,30 +2681,178 @@ err_exit:
 	}
 
 func_exit:
-	if (offsets_heap) {
+	if (offsets_heap != NULL) {
 		mem_heap_free(offsets_heap);
 	}
 
-	return(err);
+	btr_pcur_close(&pcur);
+
+	DBUG_RETURN(err);
 }
 
-/***************************************************************//**
-Starts a mini-transaction and checks if the index will be dropped.
+/** This is a specialized function meant for direct insertion to
+auto-generated clustered index based on cached position from
+last successful insert. To be used when data is sorted.
+
+@param[in]	mode	BTR_MODIFY_LEAF or BTR_MODIFY_TREE.
+			depending on whether we wish optimistic or
+			pessimistic descent down the index tree
+@param[in,out]	index	clustered index
+@param[in,out]	entry	index entry to insert
+@param[in]	thr	query thread
+
+@return error code */
+static
+dberr_t
+row_ins_sorted_clust_index_entry(
+	ulint		mode,
+	dict_index_t*	index,
+	dtuple_t*	entry,
+	ulint		n_ext,
+	que_thr_t*	thr)
+{
+	dberr_t		err = DB_SUCCESS;
+	mtr_t*		mtr;
+	const bool	commit_mtr	= mode == BTR_MODIFY_TREE;
+
+	mem_heap_t*	offsets_heap	= NULL;
+	ulint           offsets_[REC_OFFS_NORMAL_SIZE];
+	ulint*          offsets         = offsets_;
+	rec_offs_init(offsets_);
+
+	DBUG_ENTER("row_ins_sorted_clust_index_entry");
+
+	ut_ad(index->last_ins_cur != NULL);
+	ut_ad(dict_index_is_clust(index));
+	ut_ad(dict_table_is_intrinsic(index->table));
+	ut_ad(dict_index_is_auto_gen_clust(index));
+
+	btr_cur_t	cursor;
+	cursor.thr = thr;
+	mtr = &index->last_ins_cur->mtr;
+
+	/* Search for position if tree needs to be split or if last position
+	is not cached. */
+	if (mode == BTR_MODIFY_TREE
+	    || index->last_ins_cur->rec == NULL
+	    || index->last_ins_cur->disable_caching) {
+
+		/* Commit the previous mtr. */
+		index->last_ins_cur->release();
+
+		mtr_start(mtr);
+		mtr_set_log_mode(mtr, MTR_LOG_NO_REDO);
+
+		err = btr_cur_search_to_nth_level_with_no_latch(
+			index, 0, entry, PAGE_CUR_LE, &cursor,
+			__FILE__, __LINE__, mtr);
+		ut_ad(cursor.page_cur.block != NULL);
+		ut_ad(cursor.page_cur.block->made_dirty_with_no_latch);
+	} else {
+		cursor.index = index;
+
+		cursor.page_cur.index = index;
+
+		cursor.page_cur.rec = index->last_ins_cur->rec;
+
+		cursor.page_cur.block = index->last_ins_cur->block;
+	}
+
+	const ulint	flags = BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG;
+
+	for (;;) {
+		rec_t*		insert_rec;
+		big_rec_t*	big_rec		= NULL;
+
+		if (mode != BTR_MODIFY_TREE) {
+			ut_ad((mode & ~BTR_ALREADY_S_LATCHED)
+				== BTR_MODIFY_LEAF);
+
+			err = btr_cur_optimistic_insert(
+				flags, &cursor, &offsets, &offsets_heap, entry,
+				&insert_rec, &big_rec, n_ext, thr, mtr);
+			if (err != DB_SUCCESS) {
+				break;
+			}
+		} else {
+			/* TODO: Check if this is needed for intrinsic table. */
+			if (buf_LRU_buf_pool_running_out()) {
+				err = DB_LOCK_TABLE_FULL;
+				break;
+			}
+
+			err = btr_cur_optimistic_insert(
+				flags, &cursor, &offsets, &offsets_heap, entry,
+				&insert_rec, &big_rec, n_ext, thr, mtr);
+
+			if (err == DB_FAIL) {
+				err = btr_cur_pessimistic_insert(
+					flags, &cursor, &offsets, &offsets_heap,
+					entry, &insert_rec, &big_rec, n_ext,
+					thr, mtr);
+			}
+		}
+
+		if (big_rec != NULL) {
+			/* If index involves big-record optimization is
+			turned-off. */
+			index->last_ins_cur->release();
+			index->last_ins_cur->disable_caching = true;
+
+			err = row_ins_index_entry_big_rec(
+				entry, big_rec, offsets, &offsets_heap, index,
+				thr_get_trx(thr)->mysql_thd, __FILE__, __LINE__);
+
+			dtuple_convert_back_big_rec(index, entry, big_rec);
+
+		} else if (err == DB_SUCCESS ) {
+			if (!commit_mtr
+			    && !index->last_ins_cur->disable_caching) {
+				index->last_ins_cur->rec = insert_rec;
+
+				index->last_ins_cur->block
+					= cursor.page_cur.block;
+			} else {
+				index->last_ins_cur->release();
+			}
+		}
+
+		break;
+	}
+
+	if (err != DB_SUCCESS) {
+		index->last_ins_cur->release();
+	}
+
+	if (offsets_heap != NULL) {
+		mem_heap_free(offsets_heap);
+	}
+
+	DBUG_RETURN(err);
+}
+
+/** Start a mini-transaction and check if the index will be dropped.
+@param[in,out]	mtr		mini-transaction
+@param[in,out]	index		secondary index
+@param[in]	check		whether to check
+@param[in]	search_mode	flags
 @return true if the index is to be dropped */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
+static MY_ATTRIBUTE((warn_unused_result))
 bool
-row_ins_sec_mtr_start_trx_and_check_if_aborted(
-/*=======================================*/
-	mtr_t*		mtr,	/*!< out: mini-transaction */
-	trx_t*		trx,	/*!< in: transaction handle */
-	dict_index_t*	index,	/*!< in/out: secondary index */
-	bool		check,	/*!< in: whether to check */
+row_ins_sec_mtr_start_and_check_if_aborted(
+	mtr_t*		mtr,
+	dict_index_t*	index,
+	bool		check,
 	ulint		search_mode)
-				/*!< in: flags */
 {
 	ut_ad(!dict_index_is_clust(index));
+	ut_ad(mtr->is_named_space(index->space));
 
-	mtr_start_trx(mtr, trx);
+	const mtr_log_t	log_mode = mtr->get_log_mode();
+
+	mtr_start(mtr);
+	mtr->set_named_space(index->space);
+	mtr->set_log_mode(log_mode);
 
 	if (!check) {
 		return(false);
@@ -2598,13 +2861,13 @@ row_ins_sec_mtr_start_trx_and_check_if_aborted(
 	if (search_mode & BTR_ALREADY_S_LATCHED) {
 		mtr_s_lock(dict_index_get_lock(index), mtr);
 	} else {
-		mtr_x_lock(dict_index_get_lock(index), mtr);
+		mtr_sx_lock(dict_index_get_lock(index), mtr);
 	}
 
 	switch (index->online_status) {
 	case ONLINE_INDEX_ABORTED:
 	case ONLINE_INDEX_ABORTED_DROPPED:
-		ut_ad(*index->name == TEMP_INDEX_PREFIX);
+		ut_ad(!index->is_committed());
 		return(true);
 	case ONLINE_INDEX_COMPLETE:
 		return(false);
@@ -2624,7 +2887,6 @@ It is then unmarked. Otherwise, the entry is just inserted to the index.
 @retval DB_LOCK_WAIT on lock wait when !(flags & BTR_NO_LOCKING_FLAG)
 @retval DB_FAIL if retry with BTR_MODIFY_TREE is needed
 @return error code */
-UNIV_INTERN
 dberr_t
 row_ins_sec_index_entry_low(
 /*========================*/
@@ -2639,22 +2901,50 @@ row_ins_sec_index_entry_low(
 	dtuple_t*	entry,	/*!< in/out: index entry to insert */
 	trx_id_t	trx_id,	/*!< in: PAGE_MAX_TRX_ID during
 				row_log_table_apply(), or 0 */
-	que_thr_t*	thr)	/*!< in: query thread */
+	que_thr_t*	thr,	/*!< in: query thread */
+	bool		dup_chk_only)
+				/*!< in: if true, just do duplicate check
+				and return. don't execute actual insert. */
 {
+	DBUG_ENTER("row_ins_sec_index_entry_low");
+
 	btr_cur_t	cursor;
-	ulint		search_mode	= mode | BTR_INSERT;
+	ulint		search_mode	= mode;
 	dberr_t		err		= DB_SUCCESS;
 	ulint		n_unique;
 	mtr_t		mtr;
-	ulint*		offsets	= NULL;
-	trx_t*		trx = thr_get_trx(thr);
+	ulint           offsets_[REC_OFFS_NORMAL_SIZE];
+	ulint*          offsets         = offsets_;
+	rec_offs_init(offsets_);
+	rtr_info_t	rtr_info;
 
 	ut_ad(!dict_index_is_clust(index));
 	ut_ad(mode == BTR_MODIFY_LEAF || mode == BTR_MODIFY_TREE);
 
 	cursor.thr = thr;
-	ut_ad(thr_get_trx(thr)->id);
-	mtr_start_trx(&mtr, trx);
+	cursor.rtr_info = NULL;
+	ut_ad(thr_get_trx(thr)->id != 0
+	      || dict_table_is_intrinsic(index->table));
+
+	mtr_start(&mtr);
+	mtr.set_named_space(index->space);
+
+	if (dict_table_is_temporary(index->table)) {
+		/* Disable REDO logging as the lifetime of temp-tables is
+		limited to server or connection lifetime and so REDO
+		information is not needed on restart for recovery.
+		Disable locking as temp-tables are local to a connection. */
+
+		ut_ad(flags & BTR_NO_LOCKING_FLAG);
+		ut_ad(!dict_table_is_intrinsic(index->table)
+		      || (flags & BTR_NO_UNDO_LOG_FLAG));
+
+		mtr.set_log_mode(MTR_LOG_NO_REDO);
+	} else if (!dict_index_is_spatial(index)) {
+		/* Enable insert buffering if it's neither temp-table
+		nor spatial index. */
+		search_mode |= BTR_INSERT;
+	}
 
 	/* Ensure that we acquire index->lock when inserting into an
 	index with index->online_status == ONLINE_INDEX_COMPLETE, but
@@ -2662,14 +2952,14 @@ row_ins_sec_index_entry_low(
 	This prevents a concurrent change of index->online_status.
 	The memory object cannot be freed as long as we have an open
 	reference to the table, or index->table->n_ref_count > 0. */
-	const bool check = *index->name == TEMP_INDEX_PREFIX;
+	const bool	check = !index->is_committed();
 	if (check) {
 		DEBUG_SYNC_C("row_ins_sec_index_enter");
 		if (mode == BTR_MODIFY_LEAF) {
 			search_mode |= BTR_ALREADY_S_LATCHED;
 			mtr_s_lock(dict_index_get_lock(index), &mtr);
 		} else {
-			mtr_x_lock(dict_index_get_lock(index), &mtr);
+			mtr_sx_lock(dict_index_get_lock(index), &mtr);
 		}
 
 		if (row_log_online_op_try(
@@ -2686,11 +2976,55 @@ row_ins_sec_index_entry_low(
 		search_mode |= BTR_IGNORE_SEC_UNIQUE;
 	}
 
-	err = btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE,
-					  search_mode,
-					  &cursor, 0, __FILE__, __LINE__, &mtr);
+	if (dict_index_is_spatial(index)) {
+		cursor.index = index;
+		rtr_init_rtr_info(&rtr_info, false, &cursor, index, false);
+		rtr_info_update_btr(&cursor, &rtr_info);
+
+		err = btr_cur_search_to_nth_level(
+			index, 0, entry, PAGE_CUR_RTREE_INSERT,
+			search_mode,
+			&cursor, 0, __FILE__, __LINE__, &mtr);
+
+		if (mode == BTR_MODIFY_LEAF && rtr_info.mbr_adj) {
+			mtr_commit(&mtr);
+			rtr_clean_rtr_info(&rtr_info, true);
+			rtr_init_rtr_info(&rtr_info, false, &cursor,
+					  index, false);
+			rtr_info_update_btr(&cursor, &rtr_info);
+			mtr_start(&mtr);
+			mtr.set_named_space(index->space);
+			search_mode &= ~BTR_MODIFY_LEAF;
+			search_mode |= BTR_MODIFY_TREE;
+			err = btr_cur_search_to_nth_level(
+				index, 0, entry, PAGE_CUR_RTREE_INSERT,
+				search_mode,
+				&cursor, 0, __FILE__, __LINE__, &mtr);
+			mode = BTR_MODIFY_TREE;
+		}
+
+		DBUG_EXECUTE_IF(
+			"rtree_test_check_count", {
+			goto func_exit;});
+
+	} else {
+		if (dict_table_is_intrinsic(index->table)) {
+			err = btr_cur_search_to_nth_level_with_no_latch(
+				index, 0, entry, PAGE_CUR_LE, &cursor,
+				__FILE__, __LINE__, &mtr);
+			ut_ad(cursor.page_cur.block != NULL);
+			ut_ad(cursor.page_cur.block->made_dirty_with_no_latch);
+		} else {
+			err = btr_cur_search_to_nth_level(
+				index, 0, entry, PAGE_CUR_LE,
+				search_mode,
+				&cursor, 0, __FILE__, __LINE__, &mtr);
+		}
+	}
 
 	if (err != DB_SUCCESS) {
+		trx_t* trx = thr_get_trx(thr);
+
 		if (err == DB_DECRYPTION_FAILED) {
 			ib_push_warning(trx->mysql_thd,
 				DB_DECRYPTION_FAILED,
@@ -2704,6 +3038,7 @@ row_ins_sec_index_entry_low(
 	}
 
 	if (cursor.flag == BTR_CUR_INSERT_TO_IBUF) {
+		ut_ad(!dict_index_is_spatial(index));
 		/* The insert was buffered during the search: we are done */
 		goto func_exit;
 	}
@@ -2715,10 +3050,9 @@ row_ins_sec_index_entry_low(
 			page_get_infimum_rec(page));
 
 		ut_ad(page_rec_is_supremum(first_rec)
-		      || rec_get_n_fields(first_rec, index)
-		      == dtuple_get_n_fields(entry));
+		      || rec_n_fields_is_sane(index, first_rec, entry));
 	}
-#endif
+#endif /* UNIV_DEBUG */
 
 	n_unique = dict_index_get_n_unique(index);
 
@@ -2728,8 +3062,8 @@ row_ins_sec_index_entry_low(
 
 		DEBUG_SYNC_C("row_ins_sec_index_unique");
 
-		if (row_ins_sec_mtr_start_trx_and_check_if_aborted(
-			    &mtr, trx, index, check, search_mode)) {
+		if (row_ins_sec_mtr_start_and_check_if_aborted(
+			    &mtr, index, check, search_mode)) {
 			goto func_exit;
 		}
 
@@ -2742,12 +3076,11 @@ row_ins_sec_index_entry_low(
 		case DB_SUCCESS:
 			break;
 		case DB_DUPLICATE_KEY:
-			if (*index->name == TEMP_INDEX_PREFIX) {
+			if (!index->is_committed()) {
 				ut_ad(!thr_get_trx(thr)
 				      ->dict_operation_lock_mode);
 				mutex_enter(&dict_sys->mutex);
-				dict_set_corrupted_index_cache_only(
-					index, index->table);
+				dict_set_corrupted_index_cache_only(index);
 				mutex_exit(&dict_sys->mutex);
 				/* Do not return any error to the
 				caller. The duplicate will be reported
@@ -2760,11 +3093,14 @@ row_ins_sec_index_entry_low(
 			}
 			/* fall through */
 		default:
-			return(err);
+			if (dict_index_is_spatial(index)) {
+				rtr_clean_rtr_info(&rtr_info, true);
+			}
+			DBUG_RETURN(err);
 		}
 
-		if (row_ins_sec_mtr_start_trx_and_check_if_aborted(
-			    &mtr, trx, index, check, search_mode)) {
+		if (row_ins_sec_mtr_start_and_check_if_aborted(
+			    &mtr, index, check, search_mode)) {
 			goto func_exit;
 		}
 
@@ -2775,14 +3111,70 @@ row_ins_sec_index_entry_low(
 		prevent any insertion of a duplicate by another
 		transaction. Let us now reposition the cursor and
 		continue the insertion. */
+		if (dict_table_is_intrinsic(index->table)) {
+			err = btr_cur_search_to_nth_level_with_no_latch(
+				index, 0, entry, PAGE_CUR_LE, &cursor,
+				__FILE__, __LINE__, &mtr);
+			ut_ad(cursor.page_cur.block != NULL);
+			ut_ad(cursor.page_cur.block->made_dirty_with_no_latch);
+		} else {
+			btr_cur_search_to_nth_level(
+				index, 0, entry, PAGE_CUR_LE,
+				(search_mode
+				 & ~(BTR_INSERT | BTR_IGNORE_SEC_UNIQUE)),
+				&cursor, 0, __FILE__, __LINE__, &mtr);
+		}
+	}
 
-		btr_cur_search_to_nth_level(
-			index, 0, entry, PAGE_CUR_LE,
-			search_mode & ~(BTR_INSERT | BTR_IGNORE_SEC_UNIQUE),
-			&cursor, 0, __FILE__, __LINE__, &mtr);
+	if (!(flags & BTR_NO_LOCKING_FLAG)
+	    && dict_index_is_unique(index)
+	    && thr_get_trx(thr)->duplicates
+	    && thr_get_trx(thr)->isolation_level >= TRX_ISO_REPEATABLE_READ) {
+
+		/* When using the REPLACE statement or ON DUPLICATE clause, a
+		gap lock is taken on the position of the to-be-inserted record,
+		to avoid other concurrent transactions from inserting the same
+		record. */
+
+		dberr_t	err;
+		const rec_t* rec = page_rec_get_next_const(
+			btr_cur_get_rec(&cursor));
+
+		ut_ad(!page_rec_is_infimum(rec));
+
+		offsets = rec_get_offsets(rec, index, offsets,
+					  ULINT_UNDEFINED, &offsets_heap);
+
+		err = row_ins_set_exclusive_rec_lock(
+			LOCK_GAP, btr_cur_get_block(&cursor), rec,
+			index, offsets, thr);
+
+		switch (err) {
+		case DB_SUCCESS:
+		case DB_SUCCESS_LOCKED_REC:
+			if (thr_get_trx(thr)->error_state != DB_DUPLICATE_KEY) {
+				break;
+			}
+			/* Fall through (skip actual insert) after we have
+			successfully acquired the gap lock. */
+		default:
+			goto func_exit;
+		}
+	}
+
+	ut_ad(thr_get_trx(thr)->error_state == DB_SUCCESS);
+
+	if (dup_chk_only) {
+		goto func_exit;
 	}
 
 	if (row_ins_must_modify_rec(&cursor)) {
+		/* If the existing record is being modified and the new record
+		is doesn't fit the provided slot then existing record is added
+		to free list and new record is inserted. This also means
+		cursor that we have cached for SELECT is now invalid. */
+		index->last_sel_cur->invalid = true;
+
 		/* There is already an index entry with a long enough common
 		prefix, we must convert the insert into a modify of an
 		existing record */
@@ -2793,6 +3185,11 @@ row_ins_sec_index_entry_low(
 		err = row_ins_sec_index_entry_by_modify(
 			flags, mode, &cursor, &offsets,
 			offsets_heap, heap, entry, thr, &mtr);
+
+		if (err == DB_SUCCESS && dict_index_is_spatial(index)
+		    && rtr_info.mbr_adj) {
+			err = rtr_ins_enlarge_mbr(&cursor, thr, &mtr);
+		}
 	} else {
 		rec_t*		insert_rec;
 		big_rec_t*	big_rec;
@@ -2802,6 +3199,11 @@ row_ins_sec_index_entry_low(
 				flags, &cursor, &offsets, &offsets_heap,
 				entry, &insert_rec,
 				&big_rec, 0, thr, &mtr);
+			if (err == DB_SUCCESS
+			    && dict_index_is_spatial(index)
+			    && rtr_info.mbr_adj) {
+				err = rtr_ins_enlarge_mbr(&cursor, thr, &mtr);
+			}
 		} else {
 			ut_ad(mode == BTR_MODIFY_TREE);
 			if (buf_LRU_buf_pool_running_out()) {
@@ -2822,6 +3224,11 @@ row_ins_sec_index_entry_low(
 					entry, &insert_rec,
 					&big_rec, 0, thr, &mtr);
 			}
+			if (err == DB_SUCCESS
+				   && dict_index_is_spatial(index)
+				   && rtr_info.mbr_adj) {
+				err = rtr_ins_enlarge_mbr(&cursor, thr, &mtr);
+			}
 		}
 
 		if (err == DB_SUCCESS && trx_id) {
@@ -2835,15 +3242,18 @@ row_ins_sec_index_entry_low(
 	}
 
 func_exit:
+	if (dict_index_is_spatial(index)) {
+		rtr_clean_rtr_info(&rtr_info, true);
+	}
+
 	mtr_commit(&mtr);
-	return(err);
+	DBUG_RETURN(err);
 }
 
 /***************************************************************//**
 Tries to insert the externally stored fields (off-page columns)
 of a clustered index entry.
 @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
-UNIV_INTERN
 dberr_t
 row_ins_index_entry_big_rec_func(
 /*=============================*/
@@ -2854,12 +3264,12 @@ row_ins_index_entry_big_rec_func(
 	dict_index_t*		index,	/*!< in: index */
 	const char*		file,	/*!< in: file name of caller */
 #ifndef DBUG_OFF
-	const void*		thd,	/*!< in: connection, or NULL */
+	const void*		thd,    /*!< in: connection, or NULL */
 #endif /* DBUG_OFF */
 	ulint			line)	/*!< in: line number of caller */
 {
 	mtr_t		mtr;
-	btr_cur_t	cursor;
+	btr_pcur_t	pcur;
 	rec_t*		rec;
 	dberr_t		error;
 
@@ -2868,26 +3278,30 @@ row_ins_index_entry_big_rec_func(
 	DEBUG_SYNC_C_IF_THD(thd, "before_row_ins_extern_latch");
 
 	mtr_start(&mtr);
-	btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE,
-				    BTR_MODIFY_TREE, &cursor, 0,
-				    file, line, &mtr);
-	rec = btr_cur_get_rec(&cursor);
+	mtr.set_named_space(index->space);
+	dict_disable_redo_if_temporary(index->table, &mtr);
+
+	btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_MODIFY_TREE,
+		      &pcur, &mtr);
+	rec = btr_pcur_get_rec(&pcur);
 	offsets = rec_get_offsets(rec, index, offsets,
 				  ULINT_UNDEFINED, heap);
 
 	DEBUG_SYNC_C_IF_THD(thd, "before_row_ins_extern");
 	error = btr_store_big_rec_extern_fields(
-		index, btr_cur_get_block(&cursor),
-		rec, offsets, big_rec, &mtr, BTR_STORE_INSERT);
+		&pcur, 0, offsets, big_rec, &mtr, BTR_STORE_INSERT);
 	DEBUG_SYNC_C_IF_THD(thd, "after_row_ins_extern");
 
 	if (error == DB_SUCCESS
 	    && dict_index_is_online_ddl(index)) {
-		row_log_table_insert(rec, index, offsets);
+		row_log_table_insert(btr_pcur_get_rec(&pcur), entry,
+				     index, offsets);
 	}
 
 	mtr_commit(&mtr);
 
+	btr_pcur_close(&pcur);
+
 	return(error);
 }
 
@@ -2896,57 +3310,83 @@ Inserts an entry into a clustered index. Tries first optimistic,
 then pessimistic descent down the tree. If the entry matches enough
 to a delete marked record, performs the insert by updating or delete
 unmarking the delete marked record.
-@return	DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */
-UNIV_INTERN
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */
 dberr_t
 row_ins_clust_index_entry(
 /*======================*/
 	dict_index_t*	index,	/*!< in: clustered index */
 	dtuple_t*	entry,	/*!< in/out: index entry to insert */
 	que_thr_t*	thr,	/*!< in: query thread */
-	ulint		n_ext)	/*!< in: number of externally stored columns */
+	ulint		n_ext,	/*!< in: number of externally stored columns */
+	bool		dup_chk_only)
+				/*!< in: if true, just do duplicate check
+				and return. don't execute actual insert. */
 {
 	dberr_t	err;
 	ulint	n_uniq;
 
+	DBUG_ENTER("row_ins_clust_index_entry");
+
 	if (!index->table->foreign_set.empty()) {
 		err = row_ins_check_foreign_constraints(
 			index->table, index, entry, thr);
 		if (err != DB_SUCCESS) {
 
-			return(err);
+			DBUG_RETURN(err);
 		}
 	}
 
 	n_uniq = dict_index_is_unique(index) ? index->n_uniq : 0;
 
 	/* Try first optimistic descent to the B-tree */
+	ulint	flags;
 
-	log_free_check();
-
-	err = row_ins_clust_index_entry_low(
-		0, BTR_MODIFY_LEAF, index, n_uniq, entry, n_ext, thr);
-
-#ifdef UNIV_DEBUG
-	/* Work around Bug#14626800 ASSERTION FAILURE IN DEBUG_SYNC().
-	Once it is fixed, remove the 'ifdef', 'if' and this comment. */
-	if (!thr_get_trx(thr)->ddl) {
-		DEBUG_SYNC_C_IF_THD(thr_get_trx(thr)->mysql_thd,
-				    "after_row_ins_clust_index_entry_leaf");
+	if (!dict_table_is_intrinsic(index->table)) {
+		log_free_check();
+		flags = dict_table_is_temporary(index->table)
+			? BTR_NO_LOCKING_FLAG
+			: 0;
+	} else {
+		flags = BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG;
 	}
-#endif /* UNIV_DEBUG */
+
+	if (dict_table_is_intrinsic(index->table)
+	    && dict_index_is_auto_gen_clust(index)) {
+		err = row_ins_sorted_clust_index_entry(
+			BTR_MODIFY_LEAF, index, entry, n_ext, thr);
+	} else {
+		err = row_ins_clust_index_entry_low(
+			flags, BTR_MODIFY_LEAF, index, n_uniq, entry,
+			n_ext, thr, dup_chk_only);
+	}
+
+
+	DEBUG_SYNC_C_IF_THD(thr_get_trx(thr)->mysql_thd,
+			    "after_row_ins_clust_index_entry_leaf");
 
 	if (err != DB_FAIL) {
 		DEBUG_SYNC_C("row_ins_clust_index_entry_leaf_after");
-		return(err);
+		DBUG_RETURN(err);
 	}
 
 	/* Try then pessimistic descent to the B-tree */
+	if (!dict_table_is_intrinsic(index->table)) {
+		log_free_check();
+	} else {
+		index->last_sel_cur->invalid = true;
+	}
 
-	log_free_check();
+	if (dict_table_is_intrinsic(index->table)
+	    && dict_index_is_auto_gen_clust(index)) {
+		err = row_ins_sorted_clust_index_entry(
+			BTR_MODIFY_TREE, index, entry, n_ext, thr);
+	} else {
+		err = row_ins_clust_index_entry_low(
+			flags, BTR_MODIFY_TREE, index, n_uniq, entry,
+			n_ext, thr, dup_chk_only);
+	}
 
-	return(row_ins_clust_index_entry_low(
-		       0, BTR_MODIFY_TREE, index, n_uniq, entry, n_ext, thr));
+	DBUG_RETURN(err);
 }
 
 /***************************************************************//**
@@ -2954,19 +3394,25 @@ Inserts an entry into a secondary index. Tries first optimistic,
 then pessimistic descent down the tree. If the entry matches enough
 to a delete marked record, performs the insert by updating or delete
 unmarking the delete marked record.
-@return	DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */
-UNIV_INTERN
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */
 dberr_t
 row_ins_sec_index_entry(
 /*====================*/
 	dict_index_t*	index,	/*!< in: secondary index */
 	dtuple_t*	entry,	/*!< in/out: index entry to insert */
-	que_thr_t*	thr)	/*!< in: query thread */
+	que_thr_t*	thr,	/*!< in: query thread */
+	bool		dup_chk_only)
+				/*!< in: if true, just do duplicate check
+				and return. don't execute actual insert. */
 {
 	dberr_t		err;
 	mem_heap_t*	offsets_heap;
 	mem_heap_t*	heap;
 
+	DBUG_EXECUTE_IF("row_ins_sec_index_entry_timeout", {
+			DBUG_SET("-d,row_ins_sec_index_entry_timeout");
+			return(DB_LOCK_WAIT);});
+
 	if (!index->table->foreign_set.empty()) {
 		err = row_ins_check_foreign_constraints(index->table, index,
 							entry, thr);
@@ -2976,27 +3422,43 @@ row_ins_sec_index_entry(
 		}
 	}
 
-	ut_ad(thr_get_trx(thr)->id);
+	ut_ad(thr_get_trx(thr)->id != 0
+	      || dict_table_is_intrinsic(index->table));
 
 	offsets_heap = mem_heap_create(1024);
 	heap = mem_heap_create(1024);
 
 	/* Try first optimistic descent to the B-tree */
 
-	log_free_check();
+	ulint	flags;
+
+	if (!dict_table_is_intrinsic(index->table)) {
+		log_free_check();
+		flags = dict_table_is_temporary(index->table)
+			? BTR_NO_LOCKING_FLAG
+			: 0;
+	} else {
+		flags = BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG;
+	}
 
 	err = row_ins_sec_index_entry_low(
-		0, BTR_MODIFY_LEAF, index, offsets_heap, heap, entry, 0, thr);
+		flags, BTR_MODIFY_LEAF, index, offsets_heap, heap, entry,
+		0, thr, dup_chk_only);
 	if (err == DB_FAIL) {
 		mem_heap_empty(heap);
 
 		/* Try then pessimistic descent to the B-tree */
 
-		log_free_check();
+		if (!dict_table_is_intrinsic(index->table)) {
+			log_free_check();
+		} else {
+			index->last_sel_cur->invalid = true;
+		}
 
 		err = row_ins_sec_index_entry_low(
-			0, BTR_MODIFY_TREE, index,
-			offsets_heap, heap, entry, 0, thr);
+			flags, BTR_MODIFY_TREE, index,
+			offsets_heap, heap, entry, 0, thr,
+			dup_chk_only);
 	}
 
 	mem_heap_free(heap);
@@ -3009,7 +3471,7 @@ Inserts an index entry to index. Tries first optimistic, then pessimistic
 descent down the tree. If the entry matches enough to a delete marked record,
 performs the insert by updating or delete unmarking the delete marked
 record.
-@return	DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */
 static
 dberr_t
 row_ins_index_entry(
@@ -3018,46 +3480,101 @@ row_ins_index_entry(
 	dtuple_t*	entry,	/*!< in/out: index entry to insert */
 	que_thr_t*	thr)	/*!< in: query thread */
 {
+	ut_ad(thr_get_trx(thr)->id != 0);
+
 	DBUG_EXECUTE_IF("row_ins_index_entry_timeout", {
 			DBUG_SET("-d,row_ins_index_entry_timeout");
 			return(DB_LOCK_WAIT);});
 
 	if (dict_index_is_clust(index)) {
-		return(row_ins_clust_index_entry(index, entry, thr, 0));
+		return(row_ins_clust_index_entry(index, entry, thr, 0, false));
 	} else {
-		return(row_ins_sec_index_entry(index, entry, thr));
+		return(row_ins_sec_index_entry(index, entry, thr, false));
 	}
 }
 
-/***********************************************************//**
-Sets the values of the dtuple fields in entry from the values of appropriate
-columns in row. */
-static MY_ATTRIBUTE((nonnull))
+
+/*****************************************************************//**
+This function generate MBR (Minimum Bounding Box) for spatial objects
+and set it to spatial index field. */
+static
 void
+row_ins_spatial_index_entry_set_mbr_field(
+/*======================================*/
+	dfield_t*	field,		/*!< in/out: mbr field */
+	const dfield_t*	row_field)	/*!< in: row field */
+{
+	uchar*		dptr = NULL;
+	ulint		dlen = 0;
+	double		mbr[SPDIMS * 2];
+
+	/* This must be a GEOMETRY datatype */
+	ut_ad(DATA_GEOMETRY_MTYPE(field->type.mtype));
+
+	dptr = static_cast<uchar*>(dfield_get_data(row_field));
+	dlen = dfield_get_len(row_field);
+
+	/* obtain the MBR */
+	rtree_mbr_from_wkb(dptr + GEO_DATA_HEADER_SIZE,
+			   static_cast<uint>(dlen - GEO_DATA_HEADER_SIZE),
+			   SPDIMS, mbr);
+
+	/* Set mbr as index entry data */
+	dfield_write_mbr(field, mbr);
+}
+
+/** Sets the values of the dtuple fields in entry from the values of appropriate
+columns in row.
+@param[in]	index	index handler
+@param[out]	entry	index entry to make
+@param[in]	row	row
+
+@return DB_SUCCESS if the set is successful */
+dberr_t
 row_ins_index_entry_set_vals(
-/*=========================*/
-	dict_index_t*	index,	/*!< in: index */
-	dtuple_t*	entry,	/*!< in: index entry to make */
-	const dtuple_t*	row)	/*!< in: row */
+	const dict_index_t*	index,
+	dtuple_t*		entry,
+	const dtuple_t*		row)
 {
 	ulint	n_fields;
 	ulint	i;
+	ulint	num_v = dtuple_get_n_v_fields(entry);
 
 	n_fields = dtuple_get_n_fields(entry);
 
-	for (i = 0; i < n_fields; i++) {
-		dict_field_t*	ind_field;
+	for (i = 0; i < n_fields + num_v; i++) {
+		dict_field_t*	ind_field = NULL;
 		dfield_t*	field;
 		const dfield_t*	row_field;
 		ulint		len;
+		dict_col_t*	col;
+
+		if (i >= n_fields) {
+			/* This is virtual field */
+			field = dtuple_get_nth_v_field(entry, i - n_fields);
+			col = &dict_table_get_nth_v_col(
+				index->table, i - n_fields)->m_col;
+		} else {
+			field = dtuple_get_nth_field(entry, i);
+			ind_field = dict_index_get_nth_field(index, i);
+			col = ind_field->col;
+		}
+
+		if (dict_col_is_virtual(col)) {
+			const dict_v_col_t*     v_col
+				= reinterpret_cast<const dict_v_col_t*>(col);
+			ut_ad(dtuple_get_n_fields(row)
+			      == dict_table_get_n_cols(index->table));
+			row_field = dtuple_get_nth_v_field(row, v_col->v_pos);
+		} else {
+			row_field = dtuple_get_nth_field(
+				row, ind_field->col->ind);
+		}
 
-		field = dtuple_get_nth_field(entry, i);
-		ind_field = dict_index_get_nth_field(index, i);
-		row_field = dtuple_get_nth_field(row, ind_field->col->ind);
 		len = dfield_get_len(row_field);
 
 		/* Check column prefix indexes */
-		if (ind_field->prefix_len > 0
+		if (ind_field != NULL && ind_field->prefix_len > 0
 		    && dfield_get_len(row_field) != UNIV_SQL_NULL) {
 
 			const	dict_col_t*	col
@@ -3073,12 +3590,26 @@ row_ins_index_entry_set_vals(
 			ut_ad(!dfield_is_ext(row_field));
 		}
 
+		/* Handle spatial index. For the first field, replace
+		the data with its MBR (Minimum Bounding Box). */
+		if ((i == 0) && dict_index_is_spatial(index)) {
+			if (!row_field->data
+			    || row_field->len < GEO_DATA_HEADER_SIZE) {
+				return(DB_CANT_CREATE_GEOMETRY_OBJECT);
+			}
+			row_ins_spatial_index_entry_set_mbr_field(
+				field, row_field);
+			continue;
+		}
+
 		dfield_set_data(field, dfield_get_data(row_field), len);
 		if (dfield_is_ext(row_field)) {
 			ut_ad(dict_index_is_clust(index));
 			dfield_set_ext(field);
 		}
 	}
+
+	return(DB_SUCCESS);
 }
 
 /***********************************************************//**
@@ -3094,24 +3625,24 @@ row_ins_index_entry_step(
 {
 	dberr_t	err;
 
+	DBUG_ENTER("row_ins_index_entry_step");
+
 	ut_ad(dtuple_check_typed(node->row));
 
-	row_ins_index_entry_set_vals(node->index, node->entry, node->row);
+	err = row_ins_index_entry_set_vals(node->index, node->entry, node->row);
+
+	if (err != DB_SUCCESS) {
+		DBUG_RETURN(err);
+	}
 
 	ut_ad(dtuple_check_typed(node->entry));
 
 	err = row_ins_index_entry(node->index, node->entry, thr);
 
-#ifdef UNIV_DEBUG
-	/* Work around Bug#14626800 ASSERTION FAILURE IN DEBUG_SYNC().
-	Once it is fixed, remove the 'ifdef', 'if' and this comment. */
-	if (!thr_get_trx(thr)->ddl) {
-		DEBUG_SYNC_C_IF_THD(thr_get_trx(thr)->mysql_thd,
-				    "after_row_ins_index_entry_step");
-	}
-#endif /* UNIV_DEBUG */
+	DEBUG_SYNC_C_IF_THD(thr_get_trx(thr)->mysql_thd,
+			    "after_row_ins_index_entry_step");
 
-	return(err);
+	DBUG_RETURN(err);
 }
 
 /***********************************************************//**
@@ -3217,6 +3748,14 @@ row_ins(
 {
 	dberr_t	err;
 
+	DBUG_ENTER("row_ins");
+
+	DBUG_PRINT("row_ins", ("table: %s", node->table->name.m_name));
+
+	if (node->duplicate) {
+		thr_get_trx(thr)->error_state = DB_DUPLICATE_KEY;
+	}
+
 	if (node->state == INS_NODE_ALLOC_ROW_ID) {
 
 		row_ins_alloc_row_id_step(node);
@@ -3242,12 +3781,52 @@ row_ins(
 		if (node->index->type != DICT_FTS) {
 			err = row_ins_index_entry_step(node, thr);
 
-			if (err != DB_SUCCESS) {
+			switch (err) {
+			case DB_SUCCESS:
+				break;
+			case DB_DUPLICATE_KEY:
+				ut_ad(dict_index_is_unique(node->index));
 
-				return(err);
+				if (thr_get_trx(thr)->isolation_level
+				    >= TRX_ISO_REPEATABLE_READ
+				    && thr_get_trx(thr)->duplicates) {
+
+					/* When we are in REPLACE statement or
+					INSERT ..  ON DUPLICATE UPDATE
+					statement, we process all the
+					unique secondary indexes, even after we
+					encounter a duplicate error. This is
+					done to take necessary gap locks in
+					secondary indexes to block concurrent
+					transactions from inserting the
+					searched records. */
+					if (!node->duplicate) {
+						/* Save 1st dup error. Ignore
+						subsequent dup errors. */
+						node->duplicate = node->index;
+						thr_get_trx(thr)->error_state
+							= DB_DUPLICATE_KEY;
+					}
+					break;
+				}
+				// fall through
+			default:
+				DBUG_RETURN(err);
 			}
 		}
 
+		if (node->duplicate && dict_table_is_temporary(node->table)) {
+			ut_ad(thr_get_trx(thr)->error_state
+			      == DB_DUPLICATE_KEY);
+			/* For TEMPORARY TABLE, we won't lock anything,
+			so we can simply break here instead of requiring
+			GAP locks for other unique secondary indexes,
+			pretending we have consumed all indexes. */
+			node->index = NULL;
+			node->entry = NULL;
+			break;
+		}
+
 		node->index = dict_table_get_next_index(node->index);
 		node->entry = UT_LIST_GET_NEXT(tuple_list, node->entry);
 
@@ -3261,20 +3840,36 @@ row_ins(
 			node->index = dict_table_get_next_index(node->index);
 			node->entry = UT_LIST_GET_NEXT(tuple_list, node->entry);
 		}
+
+		/* After encountering a duplicate key error, we process
+		remaining indexes just to place gap locks and no actual
+		insertion will take place.  These gap locks are needed
+		only for unique indexes.  So skipping non-unique indexes. */
+		if (node->duplicate) {
+			while (node->index
+			       && !dict_index_is_unique(node->index)) {
+
+				node->index = dict_table_get_next_index(
+					node->index);
+				node->entry = UT_LIST_GET_NEXT(tuple_list,
+							       node->entry);
+			}
+			thr_get_trx(thr)->error_state = DB_DUPLICATE_KEY;
+		}
 	}
 
 	ut_ad(node->entry == NULL);
 
+	thr_get_trx(thr)->error_info = node->duplicate;
 	node->state = INS_NODE_ALLOC_ROW_ID;
 
-	return(DB_SUCCESS);
+	DBUG_RETURN(node->duplicate ? DB_DUPLICATE_KEY : DB_SUCCESS);
 }
 
 /***********************************************************//**
 Inserts a row to a table. This is a high-level function used in SQL execution
 graphs.
-@return	query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
 que_thr_t*
 row_ins_step(
 /*=========*/
@@ -3288,13 +3883,16 @@ row_ins_step(
 
 	ut_ad(thr);
 
+	DEBUG_SYNC_C("innodb_row_ins_step_enter");
+
 	trx = thr_get_trx(thr);
 
-	trx_start_if_not_started_xa(trx);
+	trx_start_if_not_started_xa(trx, true);
 
 	node = static_cast<ins_node_t*>(thr->run_node);
 
 	ut_ad(que_node_get_type(node) == QUE_NODE_INSERT);
+	ut_ad(!dict_table_is_intrinsic(node->table));
 
 	parent = que_node_get_parent(node);
 	sel_node = node->select;
@@ -3313,6 +3911,7 @@ row_ins_step(
 	table during the search operation, and there is no need to set
 	it again here. But we must write trx->id to node->trx_id_buf. */
 
+	memset(node->trx_id_buf, 0, DATA_TRX_ID_LEN);
 	trx_write_trx_id(node->trx_id_buf, trx->id);
 
 	if (node->state == INS_NODE_SET_IX_LOCK) {
diff --git a/storage/innobase/row/row0log.cc b/storage/innobase/row/row0log.cc
index 5e32663ad32..614df1e30d9 100644
--- a/storage/innobase/row/row0log.cc
+++ b/storage/innobase/row/row0log.cc
@@ -36,9 +36,14 @@ Created 2011-05-26 Marko Makela
 #include "row0ext.h"
 #include "data0data.h"
 #include "que0que.h"
+#include "srv0mon.h"
 #include "handler0alter.h"
+#include "ut0new.h"
+#include "ut0stage.h"
+#include "trx0rec.h"
 
-#include<map>
+#include <algorithm>
+#include <map>
 
 ulint onlineddl_rowlog_rows;
 ulint onlineddl_rowlog_pct_used;
@@ -63,22 +68,15 @@ enum row_op {
 	ROW_OP_DELETE
 };
 
-#ifdef UNIV_DEBUG
-/** Write information about the applied record to the error log */
-# define ROW_LOG_APPLY_PRINT
-#endif /* UNIV_DEBUG */
-
-#ifdef ROW_LOG_APPLY_PRINT
-/** When set, write information about the applied record to the error log */
-static bool row_log_apply_print;
-#endif /* ROW_LOG_APPLY_PRINT */
-
 /** Size of the modification log entry header, in bytes */
 #define ROW_LOG_HEADER_SIZE 2/*op, extra_size*/
 
 /** Log block for modifications during online ALTER TABLE */
 struct row_log_buf_t {
 	byte*		block;	/*!< file block buffer */
+	ut_new_pfx_t	block_pfx; /*!< opaque descriptor of "block". Set
+				by ut_allocator::allocate_large() and fed to
+				ut_allocator::deallocate_large(). */
 	mrec_buf_t	buf;	/*!< buffer for accessing a record
 				that spans two blocks */
 	ulint		blocks; /*!< current position in blocks */
@@ -87,14 +85,13 @@ struct row_log_buf_t {
 				the start of the row_log_table log;
 				0 for row_log_online_op() and
 				row_log_apply(). */
-	ulint		size;	/*!< allocated size of block */
 };
 
 /** Tracks BLOB allocation during online ALTER TABLE */
 class row_log_table_blob_t {
 public:
 	/** Constructor (declaring a BLOB freed)
-	@param offset_arg	row_log_t::tail::total */
+	@param offset_arg row_log_t::tail::total */
 #ifdef UNIV_DEBUG
 	row_log_table_blob_t(ulonglong offset_arg) :
 		old_offset (0), free_offset (offset_arg),
@@ -105,7 +102,7 @@ public:
 #endif /* UNIV_DEBUG */
 
 	/** Declare a BLOB freed again.
-	@param offset_arg	row_log_t::tail::total */
+	@param offset_arg row_log_t::tail::total */
 #ifdef UNIV_DEBUG
 	void blob_free(ulonglong offset_arg)
 #else /* UNIV_DEBUG */
@@ -119,14 +116,14 @@ public:
 		offset = BLOB_FREED;
 	}
 	/** Declare a freed BLOB reused.
-	@param offset_arg	row_log_t::tail::total */
+	@param offset_arg row_log_t::tail::total */
 	void blob_alloc(ulonglong offset_arg) {
 		ut_ad(free_offset <= offset_arg);
 		ut_d(old_offset = offset);
 		offset = offset_arg;
 	}
 	/** Determine if a BLOB was freed at a given log position
-	@param offset_arg	row_log_t::head::total after the log record
+	@param offset_arg row_log_t::head::total after the log record
 	@return true if freed */
 	bool is_freed(ulonglong offset_arg) const {
 		/* This is supposed to be the offset at the end of the
@@ -156,7 +153,12 @@ If a page number maps to 0, it is an off-page column that has been freed.
 If a page number maps to a nonzero number, the number is a byte offset
 into the index->online_log, indicating that the page is safe to access
 when applying log records starting from that offset. */
-typedef std::map<ulint, row_log_table_blob_t> page_no_map;
+typedef std::map<
+	ulint,
+	row_log_table_blob_t,
+	std::less<ulint>,
+	ut_allocator<std::pair<const ulint, row_log_table_blob_t> > >
+	page_no_map;
 
 /** @brief Buffer for logging modifications during online index creation
 
@@ -198,13 +200,18 @@ struct row_log_t {
 				or by index->lock X-latch only */
 	row_log_buf_t	head;	/*!< reader context; protected by MDL only;
 				modifiable by row_log_apply_ops() */
+	ulint		n_old_col;
+				/*!< number of non-virtual column in
+				old table */
+	ulint		n_old_vcol;
+				/*!< number of virtual column in old table */
 	const char*	path;	/*!< where to create temporary file during
 				log operation */
 };
 
 /** Create the file or online log if it does not exist.
-@param[in,out]	log	online rebuild log
-@return file descriptor. */
+@param[in,out] log     online rebuild log
+@return true if success, false if not */
 static MY_ATTRIBUTE((warn_unused_result))
 int
 row_log_tmpfile(
@@ -213,6 +220,13 @@ row_log_tmpfile(
 	DBUG_ENTER("row_log_tmpfile");
 	if (log->fd < 0) {
 		log->fd = row_merge_file_create_low(log->path);
+		DBUG_EXECUTE_IF("row_log_tmpfile_fail",
+				if (log->fd > 0)
+					row_merge_file_destroy_low(log->fd);
+				log->fd = -1;);
+		if (log->fd >= 0) {
+			MONITOR_ATOMIC_INC(MONITOR_ALTER_TABLE_LOG_FILES);
+		}
 	}
 
 	DBUG_RETURN(log->fd);
@@ -228,13 +242,15 @@ row_log_block_allocate(
 {
 	DBUG_ENTER("row_log_block_allocate");
 	if (log_buf.block == NULL) {
-		log_buf.size = srv_sort_buf_size;
-		log_buf.block = (byte*) os_mem_alloc_large(&log_buf.size);
-		DBUG_EXECUTE_IF("simulate_row_log_allocation_failure",
-			if (log_buf.block)
-				os_mem_free_large(log_buf.block, log_buf.size);
-			log_buf.block = NULL;);
-		if (!log_buf.block) {
+		DBUG_EXECUTE_IF(
+			"simulate_row_log_allocation_failure",
+			DBUG_RETURN(false);
+		);
+
+		log_buf.block = ut_allocator<byte>(mem_key_row_log_buf)
+			.allocate_large(srv_sort_buf_size, &log_buf.block_pfx);
+
+		if (log_buf.block == NULL) {
 			DBUG_RETURN(false);
 		}
 	}
@@ -250,7 +266,8 @@ row_log_block_free(
 {
 	DBUG_ENTER("row_log_block_free");
 	if (log_buf.block != NULL) {
-		os_mem_free_large(log_buf.block, log_buf.size);
+		ut_allocator<byte>(mem_key_row_log_buf).deallocate_large(
+			log_buf.block, &log_buf.block_pfx);
 		log_buf.block = NULL;
 	}
 	DBUG_VOID_RETURN;
@@ -258,7 +275,6 @@ row_log_block_free(
 
 /******************************************************//**
 Logs an operation to a secondary index that is (or was) being created. */
-UNIV_INTERN
 void
 row_log_online_op(
 /*==============*/
@@ -276,10 +292,8 @@ row_log_online_op(
 
 	ut_ad(dtuple_validate(tuple));
 	ut_ad(dtuple_get_n_fields(tuple) == dict_index_get_n_fields(index));
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_SHARED)
-	      || rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_S)
+	      || rw_lock_own(dict_index_get_lock(index), RW_LOCK_X));
 
 	if (dict_index_is_corrupted(index)) {
 		return;
@@ -292,7 +306,7 @@ row_log_online_op(
 	extra_size+1 (and reserve 0 as the end-of-chunk marker). */
 
 	size = rec_get_converted_size_temp(
-		index, tuple->fields, tuple->n_fields, &extra_size);
+		index, tuple->fields, tuple->n_fields, NULL, &extra_size);
 	ut_ad(size >= extra_size);
 	ut_ad(size <= sizeof log->tail.buf);
 
@@ -340,14 +354,15 @@ row_log_online_op(
 	}
 
 	rec_convert_dtuple_to_temp(
-		b + extra_size, index, tuple->fields, tuple->n_fields);
+		b + extra_size, index, tuple->fields, tuple->n_fields, NULL);
 	b += size;
 
 	if (mrec_size >= avail_size) {
+		dberr_t			err;
+		IORequest		request(IORequest::WRITE);
 		const os_offset_t	byte_offset
 			= (os_offset_t) log->tail.blocks
 			* srv_sort_buf_size;
-		ibool			ret;
 
 		if (byte_offset + srv_sort_buf_size >= srv_online_max_size) {
 			goto write_failed;
@@ -360,6 +375,7 @@ row_log_online_op(
 			memcpy(log->tail.block + log->tail.bytes,
 			       log->tail.buf, avail_size);
 		}
+
 		UNIV_MEM_ASSERT_RW(log->tail.block, srv_sort_buf_size);
 
 		if (row_log_tmpfile(log) < 0) {
@@ -367,12 +383,13 @@ row_log_online_op(
 			goto err_exit;
 		}
 
-		ret = os_file_write(
+		err = os_file_write(
+			request,
 			"(modification log)",
 			OS_FILE_FROM_FD(log->fd),
 			log->tail.block, byte_offset, srv_sort_buf_size);
 		log->tail.blocks++;
-		if (!ret) {
+		if (err != DB_SUCCESS) {
 write_failed:
 			/* We set the flag directly instead of invoking
 			dict_set_corrupted_index_cache_only(index) here,
@@ -396,7 +413,6 @@ err_exit:
 /******************************************************//**
 Gets the error status of the online index rebuild log.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 dberr_t
 row_log_table_get_error(
 /*====================*/
@@ -460,10 +476,11 @@ row_log_table_close_func(
 	ut_ad(mutex_own(&log->mutex));
 
 	if (size >= avail) {
+		dberr_t			err;
+		IORequest		request(IORequest::WRITE);
 		const os_offset_t	byte_offset
 			= (os_offset_t) log->tail.blocks
 			* srv_sort_buf_size;
-		ibool			ret;
 
 		if (byte_offset + srv_sort_buf_size >= srv_online_max_size) {
 			goto write_failed;
@@ -476,6 +493,7 @@ row_log_table_close_func(
 			memcpy(log->tail.block + log->tail.bytes,
 			       log->tail.buf, avail);
 		}
+
 		UNIV_MEM_ASSERT_RW(log->tail.block, srv_sort_buf_size);
 
 		if (row_log_tmpfile(log) < 0) {
@@ -483,12 +501,13 @@ row_log_table_close_func(
 			goto err_exit;
 		}
 
-		ret = os_file_write(
+		err = os_file_write(
+			request,
 			"(modification log)",
 			OS_FILE_FROM_FD(log->fd),
 			log->tail.block, byte_offset, srv_sort_buf_size);
 		log->tail.blocks++;
-		if (!ret) {
+		if (err != DB_SUCCESS) {
 write_failed:
 			log->error = DB_ONLINE_LOG_TOO_BIG;
 		}
@@ -518,15 +537,29 @@ err_exit:
 	row_log_table_close_func(log, size, avail)
 #endif /* UNIV_DEBUG */
 
+/** Check whether a virtual column is indexed in the new table being
+created during alter table
+@param[in]	index	cluster index
+@param[in]	v_no	virtual column number
+@return true if it is indexed, else false */
+bool
+row_log_col_is_indexed(
+	const dict_index_t*	index,
+	ulint			v_no)
+{
+	return(dict_table_get_nth_v_col(
+		index->online_log->table, v_no)->m_col.ord_part);
+}
+
 /******************************************************//**
 Logs a delete operation to a table that is being rebuilt.
 This will be merged in row_log_table_apply_delete(). */
-UNIV_INTERN
 void
 row_log_table_delete(
 /*=================*/
 	const rec_t*	rec,	/*!< in: clustered index leaf page record,
 				page X-latched */
+	const dtuple_t*	ventry,	/*!< in: dtuple holding virtual column info */
 	dict_index_t*	index,	/*!< in/out: clustered index, S-latched
 				or X-latched */
 	const ulint*	offsets,/*!< in: rec_get_offsets(rec,index) */
@@ -546,10 +579,9 @@ row_log_table_delete(
 	ut_ad(rec_offs_validate(rec, index, offsets));
 	ut_ad(rec_offs_n_fields(offsets) == dict_index_get_n_fields(index));
 	ut_ad(rec_offs_size(offsets) <= sizeof index->online_log->tail.buf);
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&index->lock, RW_LOCK_SHARED)
-	      || rw_lock_own(&index->lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(rw_lock_own_flagged(
+			&index->lock,
+			RW_LOCK_FLAG_S | RW_LOCK_FLAG_X | RW_LOCK_FLAG_SX));
 
 	if (dict_index_is_corrupted(index)
 	    || !dict_index_is_online_ddl(index)
@@ -617,7 +649,7 @@ row_log_table_delete(
 	ut_ad(DATA_ROLL_PTR_LEN == dtuple_get_nth_field(
 		      old_pk, old_pk->n_fields - 1)->len);
 	old_pk_size = rec_get_converted_size_temp(
-		new_index, old_pk->fields, old_pk->n_fields,
+		new_index, old_pk->fields, old_pk->n_fields, NULL,
 		&old_pk_extra_size);
 	ut_ad(old_pk_extra_size < 0x100);
 
@@ -645,6 +677,13 @@ row_log_table_delete(
 		}
 	}
 
+	/* Check if we need to log virtual column data */
+	if (ventry->n_v_fields > 0) {
+		ulint	v_extra;
+		mrec_size += rec_get_converted_size_temp(
+			new_index, NULL, 0, ventry, &v_extra);
+	}
+
 	if (byte* b = row_log_table_open(index->online_log,
 					 mrec_size, &avail_size)) {
 		*b++ = ROW_T_DELETE;
@@ -656,7 +695,7 @@ row_log_table_delete(
 
 		rec_convert_dtuple_to_temp(
 			b + old_pk_extra_size, new_index,
-			old_pk->fields, old_pk->n_fields);
+			old_pk->fields, old_pk->n_fields, NULL);
 
 		b += old_pk_size;
 
@@ -687,6 +726,13 @@ row_log_table_delete(
 			b += ext_size;
 		}
 
+		/* log virtual columns */
+		if (ventry->n_v_fields > 0) {
+                        rec_convert_dtuple_to_temp(
+                                b, new_index, NULL, 0, ventry);
+                        b += mach_read_from_2(b);
+                }
+
 		row_log_table_close(
 			index->online_log, b, mrec_size, avail_size);
 	}
@@ -704,6 +750,10 @@ row_log_table_low_redundant(
 	const rec_t*		rec,	/*!< in: clustered index leaf
 					page record in ROW_FORMAT=REDUNDANT,
 					page X-latched */
+	const dtuple_t*		ventry,	/*!< in: dtuple holding virtual
+					column info or NULL */
+	const dtuple_t*		o_ventry,/*!< in: old dtuple holding virtual
+					column info or NULL */
 	dict_index_t*		index,	/*!< in/out: clustered index, S-latched
 					or X-latched */
 	bool			insert,	/*!< in: true if insert,
@@ -723,16 +773,22 @@ row_log_table_low_redundant(
 	ulint		avail_size;
 	mem_heap_t*	heap		= NULL;
 	dtuple_t*	tuple;
+	ulint		num_v = ventry ? dtuple_get_n_v_fields(ventry) : 0;
 
 	ut_ad(!page_is_comp(page_align(rec)));
 	ut_ad(dict_index_get_n_fields(index) == rec_get_n_fields_old(rec));
-	ut_ad(dict_tf_is_valid(index->table->flags));
+	ut_ad(dict_tf2_is_valid(index->table->flags, index->table->flags2));
 	ut_ad(!dict_table_is_comp(index->table));  /* redundant row format */
 	ut_ad(dict_index_is_clust(new_index));
 
 	heap = mem_heap_create(DTUPLE_EST_ALLOC(index->n_fields));
-	tuple = dtuple_create(heap, index->n_fields);
+	tuple = dtuple_create_with_vcol(heap, index->n_fields, num_v);
 	dict_index_copy_types(tuple, index, index->n_fields);
+
+	if (num_v) {
+		dict_table_copy_v_types(tuple, index->table);
+	}
+
 	dtuple_set_n_fields_cmp(tuple, dict_index_get_n_unique(index));
 
 	if (rec_get_1byte_offs_flag(rec)) {
@@ -764,10 +820,23 @@ row_log_table_low_redundant(
 	}
 
 	size = rec_get_converted_size_temp(
-		index, tuple->fields, tuple->n_fields, &extra_size);
+		index, tuple->fields, tuple->n_fields, ventry, &extra_size);
 
 	mrec_size = ROW_LOG_HEADER_SIZE + size + (extra_size >= 0x80);
 
+	if (ventry && ventry->n_v_fields > 0) {
+		ulint	v_extra = 0;
+		mrec_size += rec_get_converted_size_temp(
+			index, NULL, 0, ventry, &v_extra);
+
+		if (o_ventry) {
+			mrec_size += rec_get_converted_size_temp(
+				index, NULL, 0, o_ventry, &v_extra);
+		}
+	} else if (index->table->n_v_cols) {
+		mrec_size += 2;
+	}
+
 	if (insert || index->online_log->same_pk) {
 		ut_ad(!old_pk);
 		old_pk_extra_size = old_pk_size = 0;
@@ -781,7 +850,7 @@ row_log_table_low_redundant(
 
 		old_pk_size = rec_get_converted_size_temp(
 			new_index, old_pk->fields, old_pk->n_fields,
-			&old_pk_extra_size);
+			ventry, &old_pk_extra_size);
 		ut_ad(old_pk_extra_size < 0x100);
 		mrec_size += 1/*old_pk_extra_size*/ + old_pk_size;
 	}
@@ -795,7 +864,8 @@ row_log_table_low_redundant(
 
 			rec_convert_dtuple_to_temp(
 				b + old_pk_extra_size, new_index,
-				old_pk->fields, old_pk->n_fields);
+				old_pk->fields, old_pk->n_fields,
+				ventry);
 			b += old_pk_size;
 		}
 
@@ -808,9 +878,28 @@ row_log_table_low_redundant(
 		}
 
 		rec_convert_dtuple_to_temp(
-			b + extra_size, index, tuple->fields, tuple->n_fields);
+			b + extra_size, index, tuple->fields, tuple->n_fields,
+			ventry);
 		b += size;
 
+		if (ventry && ventry->n_v_fields > 0) {
+			rec_convert_dtuple_to_temp(
+				b, new_index, NULL, 0, ventry);
+			b += mach_read_from_2(b);
+
+			if (o_ventry) {
+				rec_convert_dtuple_to_temp(
+					b, new_index, NULL, 0, o_ventry);
+				b += mach_read_from_2(b);
+			}
+		} else if (index->table->n_v_cols) {
+			/* The table contains virtual columns, but nothing
+			has changed for them, so just mark a 2 bytes length
+			field */
+			mach_write_to_2(b, 2);
+			b += 2;
+		}
+
 		row_log_table_close(
 			index->online_log, b, mrec_size, avail_size);
 	}
@@ -820,12 +909,15 @@ row_log_table_low_redundant(
 
 /******************************************************//**
 Logs an insert or update to a table that is being rebuilt. */
-static MY_ATTRIBUTE((nonnull(1,2,3)))
+static
 void
 row_log_table_low(
 /*==============*/
 	const rec_t*	rec,	/*!< in: clustered index leaf page record,
 				page X-latched */
+	const dtuple_t*	ventry,	/*!< in: dtuple holding virtual column info */
+	const dtuple_t*	o_ventry,/*!< in: dtuple holding old virtual column
+				info */
 	dict_index_t*	index,	/*!< in/out: clustered index, S-latched
 				or X-latched */
 	const ulint*	offsets,/*!< in: rec_get_offsets(rec,index) */
@@ -839,18 +931,19 @@ row_log_table_low(
 	ulint			extra_size;
 	ulint			mrec_size;
 	ulint			avail_size;
-	const dict_index_t*	new_index = dict_table_get_first_index(
-		index->online_log->table);
+	const dict_index_t*	new_index;
+
+	new_index = dict_table_get_first_index(index->online_log->table);
+
 	ut_ad(dict_index_is_clust(index));
 	ut_ad(dict_index_is_clust(new_index));
 	ut_ad(!dict_index_is_online_ddl(new_index));
 	ut_ad(rec_offs_validate(rec, index, offsets));
 	ut_ad(rec_offs_n_fields(offsets) == dict_index_get_n_fields(index));
 	ut_ad(rec_offs_size(offsets) <= sizeof index->online_log->tail.buf);
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&index->lock, RW_LOCK_SHARED)
-	      || rw_lock_own(&index->lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(rw_lock_own_flagged(
+			&index->lock,
+			RW_LOCK_FLAG_S | RW_LOCK_FLAG_X | RW_LOCK_FLAG_SX));
 	ut_ad(fil_page_get_type(page_align(rec)) == FIL_PAGE_INDEX);
 	ut_ad(page_is_leaf(page_align(rec)));
 	ut_ad(!page_is_comp(page_align(rec)) == !rec_offs_comp(offsets));
@@ -863,7 +956,8 @@ row_log_table_low(
 
 	if (!rec_offs_comp(offsets)) {
 		row_log_table_low_redundant(
-			rec, index, insert, old_pk, new_index);
+			rec, ventry, o_ventry, index, insert,
+			old_pk, new_index);
 		return;
 	}
 
@@ -877,6 +971,22 @@ row_log_table_low(
 	mrec_size = ROW_LOG_HEADER_SIZE
 		+ (extra_size >= 0x80) + rec_offs_size(offsets) - omit_size;
 
+	if (ventry && ventry->n_v_fields > 0) {
+		ulint	v_extra = 0;
+		mrec_size += rec_get_converted_size_temp(
+			new_index, NULL, 0, ventry, &v_extra);
+
+		if (o_ventry) {
+			mrec_size += rec_get_converted_size_temp(
+				new_index, NULL, 0, o_ventry, &v_extra);
+		}
+	} else if (index->table->n_v_cols) {
+		/* Always leave 2 bytes length marker for virtual column
+		data logging even if there is none of them is indexed if table
+		has virtual columns */
+		mrec_size += 2;
+	}
+
 	if (insert || index->online_log->same_pk) {
 		ut_ad(!old_pk);
 		old_pk_extra_size = old_pk_size = 0;
@@ -890,7 +1000,7 @@ row_log_table_low(
 
 		old_pk_size = rec_get_converted_size_temp(
 			new_index, old_pk->fields, old_pk->n_fields,
-			&old_pk_extra_size);
+			old_pk, &old_pk_extra_size);
 		ut_ad(old_pk_extra_size < 0x100);
 		mrec_size += 1/*old_pk_extra_size*/ + old_pk_size;
 	}
@@ -904,7 +1014,8 @@ row_log_table_low(
 
 			rec_convert_dtuple_to_temp(
 				b + old_pk_extra_size, new_index,
-				old_pk->fields, old_pk->n_fields);
+				old_pk->fields, old_pk->n_fields,
+				NULL);
 			b += old_pk_size;
 		}
 
@@ -921,6 +1032,24 @@ row_log_table_low(
 		memcpy(b, rec, rec_offs_data_size(offsets));
 		b += rec_offs_data_size(offsets);
 
+		if (ventry && ventry->n_v_fields > 0) {
+			rec_convert_dtuple_to_temp(
+				b, new_index, NULL, 0, ventry);
+			b += mach_read_from_2(b);
+
+			if (o_ventry) {
+				rec_convert_dtuple_to_temp(
+					b, new_index, NULL, 0, o_ventry);
+				b += mach_read_from_2(b);
+			}
+		} else if (index->table->n_v_cols) {
+			/* The table contains virtual columns, but nothing
+			has changed for them, so just mark a 2 bytes length
+			field */
+			mach_write_to_2(b, 2);
+			b += 2;
+		}
+
 		row_log_table_close(
 			index->online_log, b, mrec_size, avail_size);
 	}
@@ -929,7 +1058,6 @@ row_log_table_low(
 /******************************************************//**
 Logs an update to a table that is being rebuilt.
 This will be merged in row_log_table_apply_update(). */
-UNIV_INTERN
 void
 row_log_table_update(
 /*=================*/
@@ -938,16 +1066,21 @@ row_log_table_update(
 	dict_index_t*	index,	/*!< in/out: clustered index, S-latched
 				or X-latched */
 	const ulint*	offsets,/*!< in: rec_get_offsets(rec,index) */
-	const dtuple_t*	old_pk)	/*!< in: row_log_table_get_pk()
+	const dtuple_t*	old_pk,	/*!< in: row_log_table_get_pk()
 				before the update */
+	const dtuple_t*	new_v_row,/*!< in: dtuple contains the new virtual
+				columns */
+	const dtuple_t*	old_v_row)/*!< in: dtuple contains the old virtual
+				columns */
 {
-	row_log_table_low(rec, index, offsets, false, old_pk);
+	row_log_table_low(rec, new_v_row, old_v_row, index, offsets,
+			  false, old_pk);
 }
 
 /** Gets the old table column of a PRIMARY KEY column.
-@param table	old table (before ALTER TABLE)
-@param col_map	mapping of old column numbers to new ones
-@param col_no	column position in the new table
+@param table old table (before ALTER TABLE)
+@param col_map mapping of old column numbers to new ones
+@param col_no column position in the new table
 @return old table column, or NULL if this is an added column */
 static
 const dict_col_t*
@@ -967,21 +1100,22 @@ row_log_table_get_pk_old_col(
 }
 
 /** Maps an old table column of a PRIMARY KEY column.
-@param col	old table column (before ALTER TABLE)
-@param ifield	clustered index field in the new table (after ALTER TABLE)
-@param dfield	clustered index tuple field in the new table
-@param heap	memory heap for allocating dfield contents
-@param rec	clustered index leaf page record in the old table
-@param offsets	rec_get_offsets(rec)
-@param i	rec field corresponding to col
-@param zip_size	compressed page size of the old table, or 0 for uncompressed
-@param max_len	maximum length of dfield
-@retval DB_INVALID_NULL if a NULL value is encountered
-@retval DB_TOO_BIG_INDEX_COL if the maximum prefix length is exceeded */
+@param[in]	col		old table column (before ALTER TABLE)
+@param[in]	ifield		clustered index field in the new table (after
+ALTER TABLE)
+@param[in,out]	dfield		clustered index tuple field in the new table
+@param[in,out]	heap		memory heap for allocating dfield contents
+@param[in]	rec		clustered index leaf page record in the old
+table
+@param[in]	offsets		rec_get_offsets(rec)
+@param[in]	i		rec field corresponding to col
+@param[in]	page_size	page size of the old table
+@param[in]	max_len		maximum length of dfield
+@retval DB_INVALID_NULL		if a NULL value is encountered
+@retval DB_TOO_BIG_INDEX_COL	if the maximum prefix length is exceeded */
 static
 dberr_t
 row_log_table_get_pk_col(
-/*=====================*/
 	const dict_col_t*	col,
 	const dict_field_t*	ifield,
 	dfield_t*		dfield,
@@ -989,14 +1123,12 @@ row_log_table_get_pk_col(
 	const rec_t*		rec,
 	const ulint*		offsets,
 	ulint			i,
-	ulint			zip_size,
+	const page_size_t&	page_size,
 	ulint			max_len)
 {
 	const byte*	field;
 	ulint		len;
 
-	ut_ad(ut_is_2pow(zip_size));
-
 	field = rec_get_nth_field(rec, offsets, i, &len);
 
 	if (len == UNIV_SQL_NULL) {
@@ -1018,7 +1150,7 @@ row_log_table_get_pk_col(
 			mem_heap_alloc(heap, field_len));
 
 		len = btr_copy_externally_stored_field_prefix(
-			blob_field, field_len, zip_size, field, len, NULL);
+			blob_field, field_len, page_size, field, len);
 		if (len >= max_len + 1) {
 			return(DB_TOO_BIG_INDEX_COL);
 		}
@@ -1036,7 +1168,6 @@ Constructs the old PRIMARY KEY and DB_TRX_ID,DB_ROLL_PTR
 of a table that is being rebuilt.
 @return tuple of PRIMARY KEY,DB_TRX_ID,DB_ROLL_PTR in the rebuilt table,
 or NULL if the PRIMARY KEY definition does not change */
-UNIV_INTERN
 const dtuple_t*
 row_log_table_get_pk(
 /*=================*/
@@ -1055,10 +1186,9 @@ row_log_table_get_pk(
 	ut_ad(dict_index_is_clust(index));
 	ut_ad(dict_index_is_online_ddl(index));
 	ut_ad(!offsets || rec_offs_validate(rec, index, offsets));
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&index->lock, RW_LOCK_SHARED)
-	      || rw_lock_own(&index->lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(rw_lock_own_flagged(
+			&index->lock,
+			RW_LOCK_FLAG_S | RW_LOCK_FLAG_X | RW_LOCK_FLAG_SX));
 
 	ut_ad(log);
 	ut_ad(log->table);
@@ -1131,7 +1261,9 @@ row_log_table_get_pk(
 		dtuple_set_n_fields_cmp(tuple, new_n_uniq);
 
 		const ulint max_len = DICT_MAX_FIELD_LEN_BY_FORMAT(new_table);
-		const ulint zip_size = dict_table_zip_size(index->table);
+
+		const page_size_t&	page_size
+			= dict_table_page_size(index->table);
 
 		for (ulint new_i = 0; new_i < new_n_uniq; new_i++) {
 			dict_field_t*	ifield;
@@ -1158,7 +1290,7 @@ row_log_table_get_pk(
 
 				log->error = row_log_table_get_pk_col(
 					col, ifield, dfield, *heap,
-					rec, offsets, i, zip_size, max_len);
+					rec, offsets, i, page_size, max_len);
 
 				if (log->error != DB_SUCCESS) {
 err_exit:
@@ -1227,22 +1359,21 @@ func_exit:
 /******************************************************//**
 Logs an insert to a table that is being rebuilt.
 This will be merged in row_log_table_apply_insert(). */
-UNIV_INTERN
 void
 row_log_table_insert(
 /*=================*/
 	const rec_t*	rec,	/*!< in: clustered index leaf page record,
 				page X-latched */
+	const dtuple_t*	ventry,	/*!< in: dtuple holding virtual column info */
 	dict_index_t*	index,	/*!< in/out: clustered index, S-latched
 				or X-latched */
 	const ulint*	offsets)/*!< in: rec_get_offsets(rec,index) */
 {
-	row_log_table_low(rec, index, offsets, true, NULL);
+	row_log_table_low(rec, ventry, NULL, index, offsets, true, NULL);
 }
 
 /******************************************************//**
 Notes that a BLOB is being freed during online ALTER TABLE. */
-UNIV_INTERN
 void
 row_log_table_blob_free(
 /*====================*/
@@ -1251,9 +1382,9 @@ row_log_table_blob_free(
 {
 	ut_ad(dict_index_is_clust(index));
 	ut_ad(dict_index_is_online_ddl(index));
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&index->lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(rw_lock_own_flagged(
+			&index->lock,
+			RW_LOCK_FLAG_X | RW_LOCK_FLAG_SX));
 	ut_ad(page_no != FIL_NULL);
 
 	if (index->online_log->error != DB_SUCCESS) {
@@ -1262,8 +1393,8 @@ row_log_table_blob_free(
 
 	page_no_map*	blobs	= index->online_log->blobs;
 
-	if (!blobs) {
-		index->online_log->blobs = blobs = new page_no_map();
+	if (blobs == NULL) {
+		index->online_log->blobs = blobs = UT_NEW_NOKEY(page_no_map());
 	}
 
 #ifdef UNIV_DEBUG
@@ -1287,7 +1418,6 @@ row_log_table_blob_free(
 
 /******************************************************//**
 Notes that a BLOB is being allocated during online ALTER TABLE. */
-UNIV_INTERN
 void
 row_log_table_blob_alloc(
 /*=====================*/
@@ -1296,9 +1426,11 @@ row_log_table_blob_alloc(
 {
 	ut_ad(dict_index_is_clust(index));
 	ut_ad(dict_index_is_online_ddl(index));
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&index->lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+
+	ut_ad(rw_lock_own_flagged(
+			&index->lock,
+			RW_LOCK_FLAG_X | RW_LOCK_FLAG_SX));
+
 	ut_ad(page_no != FIL_NULL);
 
 	if (index->online_log->error != DB_SUCCESS) {
@@ -1335,6 +1467,7 @@ row_log_table_apply_convert_mrec(
 						reason of failure */
 {
 	dtuple_t*	row;
+	ulint		num_v = dict_table_get_n_v_cols(log->table);
 
 	*error = DB_SUCCESS;
 
@@ -1348,7 +1481,8 @@ row_log_table_apply_convert_mrec(
 				dfield_get_type(dtuple_get_nth_field(row, i)));
 		}
 	} else {
-		row = dtuple_create(heap, dict_table_get_n_cols(log->table));
+		row = dtuple_create_with_vcol(
+			heap, dict_table_get_n_cols(log->table), num_v);
 		dict_table_copy_types(row, log->table);
 	}
 
@@ -1368,6 +1502,7 @@ row_log_table_apply_convert_mrec(
 
 		const dict_col_t*	col
 			= dict_field_get_col(ind_field);
+
 		ulint			col_no
 			= log->col_map[dict_col_get_no(col)];
 
@@ -1376,8 +1511,9 @@ row_log_table_apply_convert_mrec(
 			continue;
 		}
 
-		dfield_t*		dfield
+		dfield_t*	dfield
 			= dtuple_get_nth_field(row, col_no);
+
 		ulint			len;
 		const byte*		data;
 
@@ -1408,8 +1544,8 @@ row_log_table_apply_convert_mrec(
 
 			data = btr_rec_copy_externally_stored_field(
 				mrec, offsets,
-				dict_table_zip_size(index->table),
-				i, &len, heap, NULL);
+				dict_table_page_size(index->table),
+				i, &len, heap);
 			ut_a(data);
 			dfield_set_data(dfield, data, len);
 blob_done:
@@ -1468,6 +1604,14 @@ blob_done:
 						 dfield_get_type(dfield)));
 	}
 
+	/* read the virtual column data if any */
+	if (num_v) {
+		byte* b = const_cast<byte*>(mrec)
+			  + rec_offs_data_size(offsets);
+		trx_undo_read_v_cols(log->table, b, row, false,
+				     &(log->col_map[log->n_old_col]));
+	}
+
 	return(row);
 }
 
@@ -1497,14 +1641,15 @@ row_log_table_apply_insert_low(
 	ut_ad(dtuple_validate(row));
 	ut_ad(trx_id);
 
-#ifdef ROW_LOG_APPLY_PRINT
-	if (row_log_apply_print) {
-		fprintf(stderr, "table apply insert "
-			IB_ID_FMT " " IB_ID_FMT "\n",
-			index->table->id, index->id);
-		dtuple_print(stderr, row);
+#ifdef UNIV_DEBUG
+	{
+		rec_printer p(row);
+		DBUG_PRINT("ib_alter_table",
+			("insert table " IB_ID_FMT " (index " IB_ID_FMT "): %s",
+			index->table->id, index->id,
+			p.str().c_str()));
 	}
-#endif /* ROW_LOG_APPLY_PRINT */
+#endif
 
 	static const ulint	flags
 		= (BTR_CREATE_FLAG
@@ -1515,7 +1660,8 @@ row_log_table_apply_insert_low(
 	entry = row_build_index_entry(row, NULL, index, heap);
 
 	error = row_ins_clust_index_entry_low(
-		flags, BTR_MODIFY_TREE, index, index->n_uniq, entry, 0, thr);
+		flags, BTR_MODIFY_TREE, index, index->n_uniq,
+		entry, 0, thr, false);
 
 	switch (error) {
 	case DB_SUCCESS:
@@ -1541,7 +1687,8 @@ row_log_table_apply_insert_low(
 		entry = row_build_index_entry(row, NULL, index, heap);
 		error = row_ins_sec_index_entry_low(
 			flags, BTR_MODIFY_TREE,
-			index, offsets_heap, heap, entry, trx_id, thr);
+			index, offsets_heap, heap, entry, trx_id, thr,
+			false);
 
 		/* Report correct index name for duplicate key error. */
 		if (error == DB_DUPLICATE_KEY) {
@@ -1608,12 +1755,14 @@ row_log_table_apply_insert(
 /******************************************************//**
 Deletes a record from a table that is being rebuilt.
 @return DB_SUCCESS or error code */
-static MY_ATTRIBUTE((nonnull(1, 2, 4, 5), warn_unused_result))
+static MY_ATTRIBUTE((warn_unused_result))
 dberr_t
 row_log_table_apply_delete_low(
 /*===========================*/
 	btr_pcur_t*		pcur,		/*!< in/out: B-tree cursor,
 						will be trashed */
+	const dtuple_t*		ventry,		/*!< in: dtuple holding
+						virtual column info */
 	const ulint*		offsets,	/*!< in: offsets on pcur */
 	const row_ext_t*	save_ext,	/*!< in: saved external field
 						info, or NULL */
@@ -1628,20 +1777,26 @@ row_log_table_apply_delete_low(
 
 	ut_ad(dict_index_is_clust(index));
 
-#ifdef ROW_LOG_APPLY_PRINT
-	if (row_log_apply_print) {
-		fprintf(stderr, "table apply delete "
-			IB_ID_FMT " " IB_ID_FMT "\n",
-			index->table->id, index->id);
-		rec_print_new(stderr, btr_pcur_get_rec(pcur), offsets);
+#ifdef UNIV_DEBUG
+	{
+		rec_printer p(btr_pcur_get_rec(pcur), offsets);
+		DBUG_PRINT("ib_alter_table",
+			("delete table " IB_ID_FMT " (index " IB_ID_FMT "): %s",
+			index->table->id, index->id,
+			p.str().c_str()));
 	}
-#endif /* ROW_LOG_APPLY_PRINT */
+#endif
+
 	if (dict_table_get_next_index(index)) {
 		/* Build a row template for purging secondary index entries. */
 		row = row_build(
 			ROW_COPY_DATA, index, btr_pcur_get_rec(pcur),
 			offsets, NULL, NULL, NULL,
 			save_ext ? NULL : &ext, heap);
+		if (ventry) {
+			dtuple_copy_v_fields(row, ventry);
+		}
+
 		if (!save_ext) {
 			save_ext = ext;
 		}
@@ -1650,7 +1805,7 @@ row_log_table_apply_delete_low(
 	}
 
 	btr_cur_pessimistic_delete(&error, FALSE, btr_pcur_get_btr_cur(pcur),
-				   BTR_CREATE_FLAG, RB_NONE, mtr);
+				   BTR_CREATE_FLAG, false, mtr);
 	mtr_commit(mtr);
 
 	if (error != DB_SUCCESS) {
@@ -1665,8 +1820,10 @@ row_log_table_apply_delete_low(
 		const dtuple_t*	entry = row_build_index_entry(
 			row, save_ext, index, heap);
 		mtr_start(mtr);
+		mtr->set_named_space(index->space);
 		btr_pcur_open(index, entry, PAGE_CUR_LE,
-			      BTR_MODIFY_TREE, pcur, mtr);
+			      BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE,
+			      pcur, mtr);
 #ifdef UNIV_DEBUG
 		switch (btr_pcur_get_btr_cur(pcur)->flag) {
 		case BTR_CUR_DELETE_REF:
@@ -1696,7 +1853,7 @@ flag_ok:
 
 		btr_cur_pessimistic_delete(&error, FALSE,
 					   btr_pcur_get_btr_cur(pcur),
-					   BTR_CREATE_FLAG, RB_NONE, mtr);
+					   BTR_CREATE_FLAG, false, mtr);
 		mtr_commit(mtr);
 	}
 
@@ -1720,8 +1877,9 @@ row_log_table_apply_delete(
 						that can be emptied */
 	mem_heap_t*		heap,		/*!< in/out: memory heap */
 	const row_log_t*	log,		/*!< in: online log */
-	const row_ext_t*	save_ext)	/*!< in: saved external field
+	const row_ext_t*	save_ext,	/*!< in: saved external field
 						info, or NULL */
+	ulint			ext_size)	/*!< in: external field size */
 {
 	dict_table_t*	new_table = log->table;
 	dict_index_t*	index = dict_table_get_first_index(new_table);
@@ -1729,15 +1887,20 @@ row_log_table_apply_delete(
 	mtr_t		mtr;
 	btr_pcur_t	pcur;
 	ulint*		offsets;
+	ulint		num_v = new_table->n_v_cols;
 
 	ut_ad(rec_offs_n_fields(moffsets)
 	      == dict_index_get_n_unique(index) + 2);
 	ut_ad(!rec_offs_any_extern(moffsets));
 
 	/* Convert the row to a search tuple. */
-	old_pk = dtuple_create(heap, index->n_uniq);
+	old_pk = dtuple_create_with_vcol(heap, index->n_uniq, num_v);
 	dict_index_copy_types(old_pk, index, index->n_uniq);
 
+	if (num_v) {
+                dict_table_copy_v_types(old_pk, index->table);
+        }
+
 	for (ulint i = 0; i < index->n_uniq; i++) {
 		ulint		len;
 		const void*	field;
@@ -1748,8 +1911,10 @@ row_log_table_apply_delete(
 	}
 
 	mtr_start(&mtr);
+	mtr.set_named_space(index->space);
 	btr_pcur_open(index, old_pk, PAGE_CUR_LE,
-		      BTR_MODIFY_TREE, &pcur, &mtr);
+		      BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE,
+		      &pcur, &mtr);
 #ifdef UNIV_DEBUG
 	switch (btr_pcur_get_btr_cur(&pcur)->flag) {
 	case BTR_CUR_DELETE_REF:
@@ -1821,7 +1986,15 @@ all_done:
 		}
 	}
 
-	return(row_log_table_apply_delete_low(&pcur, offsets, save_ext,
+	if (num_v) {
+                byte* b = (byte*)mrec + rec_offs_data_size(moffsets)
+			  + ext_size;
+                trx_undo_read_v_cols(log->table, b, old_pk, false,
+				     &(log->col_map[log->n_old_col]));
+        }
+
+	return(row_log_table_apply_delete_low(&pcur, old_pk,
+					      offsets, save_ext,
 					      heap, &mtr));
 }
 
@@ -1891,6 +2064,7 @@ row_log_table_apply_update(
 	}
 
 	mtr_start(&mtr);
+	mtr.set_named_space(index->space);
 	btr_pcur_open(index, old_pk, PAGE_CUR_LE,
 		      BTR_MODIFY_TREE, &pcur, &mtr);
 #ifdef UNIV_DEBUG
@@ -1961,7 +2135,7 @@ func_exit:
 			mtr_commit(&mtr);
 		}
 func_exit_committed:
-		ut_ad(mtr.state == MTR_COMMITTED);
+		ut_ad(mtr.has_committed());
 
 		if (error != DB_SUCCESS) {
 			/* Report the erroneous row using the new
@@ -2032,15 +2206,15 @@ func_exit_committed:
 		/* Some BLOBs are missing, so we are interpreting
 		this ROW_T_UPDATE as ROW_T_DELETE (see *1). */
 		error = row_log_table_apply_delete_low(
-			&pcur, cur_offsets, NULL, heap, &mtr);
+			&pcur, old_pk, cur_offsets, NULL, heap, &mtr);
 		goto func_exit_committed;
 	}
 
 	dtuple_t*	entry	= row_build_index_entry(
 		row, NULL, index, heap);
-	const upd_t*	update	= row_upd_build_difference_binary(
+	upd_t*		update	= row_upd_build_difference_binary(
 		index, entry, btr_pcur_get_rec(&pcur), cur_offsets,
-		false, NULL, heap);
+		false, NULL, heap, dup->table);
 
 	if (!update->n_fields) {
 		/* Nothing to do. */
@@ -2070,8 +2244,8 @@ func_exit_committed:
 		}
 
 		error = row_log_table_apply_delete_low(
-			&pcur, cur_offsets, NULL, heap, &mtr);
-		ut_ad(mtr.state == MTR_COMMITTED);
+			&pcur, old_pk, cur_offsets, NULL, heap, &mtr);
+		ut_ad(mtr.has_committed());
 
 		if (error == DB_SUCCESS) {
 			error = row_log_table_apply_insert_low(
@@ -2091,15 +2265,18 @@ func_exit_committed:
 			ROW_COPY_DATA, index, btr_pcur_get_rec(&pcur),
 			cur_offsets, NULL, NULL, NULL, &old_ext, heap);
 		ut_ad(old_row);
-#ifdef ROW_LOG_APPLY_PRINT
-		if (row_log_apply_print) {
-			fprintf(stderr, "table apply update "
-				IB_ID_FMT " " IB_ID_FMT "\n",
-				index->table->id, index->id);
-			dtuple_print(stderr, old_row);
-			dtuple_print(stderr, row);
+
+#ifdef UNIV_DEBUG
+		{
+			rec_printer old(old_row);
+			rec_printer new_row(row);
+			DBUG_PRINT("ib_alter_table",
+				("update table " IB_ID_FMT " (index " IB_ID_FMT "): %s to %s",
+				index->table->id, index->id,
+				old.str().c_str(),
+				new_row.str().c_str()));
 		}
-#endif /* ROW_LOG_APPLY_PRINT */
+#endif
 	} else {
 		old_row = NULL;
 		old_ext = NULL;
@@ -2118,9 +2295,8 @@ func_exit_committed:
 	if (big_rec) {
 		if (error == DB_SUCCESS) {
 			error = btr_store_big_rec_extern_fields(
-				index, btr_pcur_get_block(&pcur),
-				btr_pcur_get_rec(&pcur), cur_offsets,
-				big_rec, &mtr, BTR_STORE_UPDATE);
+				&pcur, update, cur_offsets, big_rec, &mtr,
+				BTR_STORE_UPDATE);
 		}
 
 		dtuple_big_rec_free(big_rec);
@@ -2142,6 +2318,10 @@ func_exit_committed:
 			continue;
 		}
 
+		if (dict_index_has_virtual(index)) {
+			dtuple_copy_v_fields(old_row, old_pk);
+		}
+
 		mtr_commit(&mtr);
 
 		entry = row_build_index_entry(old_row, old_ext, index, heap);
@@ -2151,6 +2331,7 @@ func_exit_committed:
 		}
 
 		mtr_start(&mtr);
+		mtr.set_named_space(index->space);
 
 		if (ROW_FOUND != row_search_index_entry(
 			    index, entry, BTR_MODIFY_TREE, &pcur, &mtr)) {
@@ -2161,7 +2342,7 @@ func_exit_committed:
 
 		btr_cur_pessimistic_delete(
 			&error, FALSE, btr_pcur_get_btr_cur(&pcur),
-			BTR_CREATE_FLAG, RB_NONE, &mtr);
+			BTR_CREATE_FLAG, false, &mtr);
 
 		if (error != DB_SUCCESS) {
 			break;
@@ -2174,7 +2355,7 @@ func_exit_committed:
 			BTR_CREATE_FLAG | BTR_NO_LOCKING_FLAG
 			| BTR_NO_UNDO_LOG_FLAG | BTR_KEEP_SYS_FLAG,
 			BTR_MODIFY_TREE, index, offsets_heap, heap,
-			entry, trx_id, thr);
+			entry, trx_id, thr, false);
 
 		/* Report correct index name for duplicate key error. */
 		if (error == DB_DUPLICATE_KEY) {
@@ -2182,6 +2363,7 @@ func_exit_committed:
 		}
 
 		mtr_start(&mtr);
+		mtr.set_named_space(index->space);
 	}
 
 	goto func_exit;
@@ -2259,6 +2441,10 @@ row_log_table_apply_op(
 
 		next_mrec = mrec + rec_offs_data_size(offsets);
 
+		if (log->table->n_v_cols) {
+			next_mrec += mach_read_from_2(next_mrec);
+		}
+
 		if (next_mrec > mrec_end) {
 			return(NULL);
 		} else {
@@ -2293,6 +2479,10 @@ row_log_table_apply_op(
 		rec_offs_set_n_fields(offsets, new_index->n_uniq + 2);
 		rec_init_offsets_temp(mrec, new_index, offsets);
 		next_mrec = mrec + rec_offs_data_size(offsets) + ext_size;
+		if (log->table->n_v_cols) {
+			next_mrec += mach_read_from_2(next_mrec);
+		}
+
 		if (next_mrec > mrec_end) {
 			return(NULL);
 		}
@@ -2325,7 +2515,7 @@ row_log_table_apply_op(
 		*error = row_log_table_apply_delete(
 			thr, new_trx_id_col,
 			mrec, offsets, offsets_heap, heap,
-			log, ext);
+			log, ext, ext_size);
 		break;
 
 	case ROW_T_UPDATE:
@@ -2336,6 +2526,7 @@ row_log_table_apply_op(
 		definition of the columns belonging to PRIMARY KEY
 		is not changed, the log will only contain
 		DB_TRX_ID,new_row. */
+		ulint           num_v = new_index->table->n_v_cols;
 
 		if (dup->index->online_log->same_pk) {
 			ut_ad(new_index->n_uniq == dup->index->n_uniq);
@@ -2364,9 +2555,14 @@ row_log_table_apply_op(
 				return(NULL);
 			}
 
-			old_pk = dtuple_create(heap, new_index->n_uniq);
+			old_pk = dtuple_create_with_vcol(
+				heap, new_index->n_uniq, num_v);
 			dict_index_copy_types(
 				old_pk, new_index, old_pk->n_fields);
+			if (num_v) {
+		                dict_table_copy_v_types(
+					old_pk, new_index->table);
+			}
 
 			/* Copy the PRIMARY KEY fields from mrec to old_pk. */
 			for (ulint i = 0; i < new_index->n_uniq; i++) {
@@ -2404,10 +2600,16 @@ row_log_table_apply_op(
 
 			/* Copy the PRIMARY KEY fields and
 			DB_TRX_ID, DB_ROLL_PTR from mrec to old_pk. */
-			old_pk = dtuple_create(heap, new_index->n_uniq + 2);
+			old_pk = dtuple_create_with_vcol(
+				heap, new_index->n_uniq + 2, num_v);
 			dict_index_copy_types(old_pk, new_index,
 					      old_pk->n_fields);
 
+			if (num_v) {
+		                dict_table_copy_v_types(
+					old_pk, new_index->table);
+			}
+
 			for (ulint i = 0;
 			     i < dict_index_get_n_unique(new_index) + 2;
 			     i++) {
@@ -2454,6 +2656,31 @@ row_log_table_apply_op(
 			}
 		}
 
+		/* Read virtual column info from log */
+		if (num_v) {
+			ulint		o_v_size = 0;
+			ulint		n_v_size = 0;
+			n_v_size = mach_read_from_2(next_mrec);
+			next_mrec += n_v_size;
+			if (next_mrec > mrec_end) {
+				return(NULL);
+			}
+
+			/* if there is more than 2 bytes length info */
+			if (n_v_size > 2) {
+				trx_undo_read_v_cols(
+					log->table, const_cast<byte*>(
+					next_mrec), old_pk, false,
+					&(log->col_map[log->n_old_col]));
+				o_v_size = mach_read_from_2(next_mrec);
+			}
+
+			next_mrec += o_v_size;
+			if (next_mrec > mrec_end) {
+				return(NULL);
+			}
+		}
+
 		ut_ad(next_mrec <= mrec_end);
 		log->head.total += next_mrec - mrec_start;
 		dtuple_set_n_fields_cmp(old_pk, new_index->n_uniq);
@@ -2479,16 +2706,74 @@ row_log_table_apply_op(
 	return(next_mrec);
 }
 
-/******************************************************//**
-Applies operations to a table was rebuilt.
+#ifdef HAVE_PSI_STAGE_INTERFACE
+/** Estimate how much an ALTER TABLE progress should be incremented per
+one block of log applied.
+For the other phases of ALTER TABLE we increment the progress with 1 per
+page processed.
+@return amount of abstract units to add to work_completed when one block
+of log is applied.
+*/
+inline
+ulint
+row_log_progress_inc_per_block()
+{
+	/* We must increment the progress once per page (as in
+	univ_page_size, usually 16KiB). One block here is srv_sort_buf_size
+	(usually 1MiB). */
+	const ulint	pages_per_block = std::max(
+		static_cast<unsigned long>(
+			srv_sort_buf_size / univ_page_size.physical()),
+		1UL);
+
+	/* Multiply by an artificial factor of 6 to even the pace with
+	the rest of the ALTER TABLE phases, they process page_size amount
+	of data faster. */
+	return(pages_per_block * 6);
+}
+
+/** Estimate how much work is to be done by the log apply phase
+of an ALTER TABLE for this index.
+@param[in]	index	index whose log to assess
+@return work to be done by log-apply in abstract units
+*/
+ulint
+row_log_estimate_work(
+	const dict_index_t*	index)
+{
+	if (index == NULL || index->online_log == NULL) {
+		return(0);
+	}
+
+	const row_log_t*	l = index->online_log;
+	const ulint		bytes_left =
+		static_cast<ulint>(l->tail.total - l->head.total);
+	const ulint		blocks_left = bytes_left / srv_sort_buf_size;
+
+	return(blocks_left * row_log_progress_inc_per_block());
+}
+#else /* HAVE_PSI_STAGE_INTERFACE */
+inline
+ulint
+row_log_progress_inc_per_block()
+{
+	return(0);
+}
+#endif /* HAVE_PSI_STAGE_INTERFACE */
+
+/** Applies operations to a table was rebuilt.
+@param[in]	thr	query graph
+@param[in,out]	dup	for reporting duplicate key errors
+@param[in,out]	stage	performance schema accounting object, used by
+ALTER TABLE. If not NULL, then stage->inc() will be called for each block
+of log that is applied.
 @return DB_SUCCESS, or error code on failure */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
+static MY_ATTRIBUTE((warn_unused_result))
 dberr_t
 row_log_table_apply_ops(
-/*====================*/
-	que_thr_t*	thr,	/*!< in: query graph */
-	row_merge_dup_t*dup)	/*!< in/out: for reporting duplicate key
-				errors */
+	que_thr_t*		thr,
+	row_merge_dup_t*	dup,
+	ut_stage_alter_t*	stage)
 {
 	dberr_t		error;
 	const mrec_t*	mrec		= NULL;
@@ -2516,9 +2801,7 @@ row_log_table_apply_ops(
 	ut_ad(dict_index_is_clust(index));
 	ut_ad(dict_index_is_online_ddl(index));
 	ut_ad(trx->mysql_thd);
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_X));
 	ut_ad(!dict_index_is_online_ddl(new_index));
 	ut_ad(trx_id_col > 0);
 	ut_ad(trx_id_col != ULINT_UNDEFINED);
@@ -2527,7 +2810,7 @@ row_log_table_apply_ops(
 
 	UNIV_MEM_INVALID(&mrec_end, sizeof mrec_end);
 
-	offsets = static_cast<ulint*>(ut_malloc(i * sizeof *offsets));
+	offsets = static_cast<ulint*>(ut_malloc_nokey(i * sizeof *offsets));
 	offsets[0] = i;
 	offsets[1] = dict_index_get_n_fields(index);
 
@@ -2537,11 +2820,11 @@ row_log_table_apply_ops(
 
 next_block:
 	ut_ad(has_index_lock);
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_X));
 	ut_ad(index->online_log->head.bytes == 0);
 
+	stage->inc(row_log_progress_inc_per_block());
+
 	if (trx_is_interrupted(trx)) {
 		goto interrupted;
 	}
@@ -2562,8 +2845,8 @@ next_block:
 	if (UNIV_UNLIKELY(index->online_log->head.blocks
 			  > index->online_log->tail.blocks)) {
 unexpected_eof:
-		fprintf(stderr, "InnoDB: unexpected end of temporary file"
-			" for table %s\n", index->table_name);
+		ib::error() << "Unexpected end of temporary file for table "
+			<< index->table->name;
 corruption:
 		error = DB_CORRUPTION;
 		goto func_exit;
@@ -2574,11 +2857,13 @@ corruption:
 		if (index->online_log->head.blocks) {
 #ifdef HAVE_FTRUNCATE
 			/* Truncate the file in order to save space. */
-			if (index->online_log->fd != -1
+			if (index->online_log->fd > 0
 			    && ftruncate(index->online_log->fd, 0) == -1) {
-				fprintf(stderr, "InnoDB: Error: Truncate of file "
-					"\'%s\' failed with error %d:%s\n",
-					index->name + 1, errno, strerror(errno));
+				ib::error()
+					<< "\'" << index->name + 1
+					<< "\' failed with error "
+					<< errno << ":" << strerror(errno);
+
 				goto corruption;
 			}
 #endif /* HAVE_FTRUNCATE */
@@ -2602,7 +2887,6 @@ all_done:
 		}
 	} else {
 		os_offset_t	ofs;
-		ibool		success;
 
 		ofs = (os_offset_t) index->online_log->head.blocks
 			* srv_sort_buf_size;
@@ -2620,14 +2904,19 @@ all_done:
 			goto func_exit;
 		}
 
-		success = os_file_read_no_error_handling(
+		IORequest	request;
+
+		dberr_t	err = os_file_read_no_error_handling(
+			request,
 			OS_FILE_FROM_FD(index->online_log->fd),
 			index->online_log->head.block, ofs,
-			srv_sort_buf_size);
+			srv_sort_buf_size,
+			NULL);
 
-		if (!success) {
-			fprintf(stderr, "InnoDB: unable to read temporary file"
-				" for table %s\n", index->table_name);
+		if (err != DB_SUCCESS) {
+			ib::error()
+				<< "Unable to read temporary file"
+				" for table " << index->table_name;
 			goto corruption;
 		}
 
@@ -2636,14 +2925,6 @@ all_done:
 		posix_fadvise(index->online_log->fd,
 			      ofs, srv_sort_buf_size, POSIX_FADV_DONTNEED);
 #endif /* POSIX_FADV_DONTNEED */
-#if 0 //def FALLOC_FL_PUNCH_HOLE
-		/* Try to deallocate the space for the file on disk.
-		This should work on ext4 on Linux 2.6.39 and later,
-		and be ignored when the operation is unsupported. */
-		fallocate(index->online_log->fd,
-			  FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
-			  ofs, srv_sort_buf_size);
-#endif /* FALLOC_FL_PUNCH_HOLE */
 
 		next_mrec = index->online_log->head.block;
 		next_mrec_end = next_mrec + srv_sort_buf_size;
@@ -2828,27 +3109,31 @@ func_exit:
 	return(error);
 }
 
-/******************************************************//**
-Apply the row_log_table log to a table upon completing rebuild.
+/** Apply the row_log_table log to a table upon completing rebuild.
+@param[in]	thr		query graph
+@param[in]	old_table	old table
+@param[in,out]	table		MySQL table (for reporting duplicates)
+@param[in,out]	stage		performance schema accounting object, used by
+ALTER TABLE. stage->begin_phase_log_table() will be called initially and then
+stage->inc() will be called for each block of log that is applied.
 @return DB_SUCCESS, or error code on failure */
-UNIV_INTERN
 dberr_t
 row_log_table_apply(
-/*================*/
-	que_thr_t*	thr,	/*!< in: query graph */
-	dict_table_t*	old_table,
-				/*!< in: old table */
-	struct TABLE*	table)	/*!< in/out: MySQL table
-				(for reporting duplicates) */
+	que_thr_t*		thr,
+	dict_table_t*		old_table,
+	struct TABLE*		table,
+	ut_stage_alter_t*	stage)
 {
 	dberr_t		error;
 	dict_index_t*	clust_index;
 
 	thr_get_trx(thr)->error_key_num = 0;
+	DBUG_EXECUTE_IF("innodb_trx_duplicates",
+			thr_get_trx(thr)->duplicates = TRX_DUP_REPLACE;);
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(!rw_lock_own(&dict_operation_lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
+	stage->begin_phase_log_table();
+
+	ut_ad(!rw_lock_own(dict_operation_lock, RW_LOCK_S));
 	clust_index = dict_table_get_first_index(old_table);
 
 	rw_lock_x_lock(dict_index_get_lock(clust_index));
@@ -2867,7 +3152,7 @@ row_log_table_apply(
 			clust_index->online_log->col_map, 0
 		};
 
-		error = row_log_table_apply_ops(thr, &dup);
+		error = row_log_table_apply_ops(thr, &dup, stage);
 
 		ut_ad(error != DB_SUCCESS
 		      || clust_index->online_log->head.total
@@ -2875,6 +3160,9 @@ row_log_table_apply(
 	}
 
 	rw_lock_x_unlock(dict_index_get_lock(clust_index));
+	DBUG_EXECUTE_IF("innodb_trx_duplicates",
+			thr_get_trx(thr)->duplicates = 0;);
+
 	return(error);
 }
 
@@ -2882,7 +3170,6 @@ row_log_table_apply(
 Allocate the row log for an index and flag the index
 for online creation.
 @retval true if success, false if not */
-UNIV_INTERN
 bool
 row_log_allocate(
 /*=============*/
@@ -2907,17 +3194,17 @@ row_log_allocate(
 	ut_ad(same_pk || table);
 	ut_ad(!table || col_map);
 	ut_ad(!add_cols || col_map);
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-	log = (row_log_t*) ut_malloc(sizeof *log);
-	if (!log) {
+	ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_X));
+
+	log = static_cast<row_log_t*>(ut_malloc_nokey(sizeof *log));
+
+	if (log == NULL) {
 		DBUG_RETURN(false);
 	}
 
 	log->fd = -1;
-	mutex_create(index_online_log_key, &log->mutex,
-		     SYNC_INDEX_ONLINE_LOG);
+	mutex_create(LATCH_ID_INDEX_ONLINE_LOG, &log->mutex);
+
 	log->blobs = NULL;
 	log->table = table;
 	log->same_pk = same_pk;
@@ -2931,6 +3218,9 @@ row_log_allocate(
 	log->head.blocks = log->head.bytes = 0;
 	log->head.total = 0;
 	log->path = path;
+	log->n_old_col = index->table->n_cols;
+	log->n_old_vcol = index->table->n_v_cols;
+
 	dict_index_set_online_status(index, ONLINE_INDEX_CREATION);
 	index->online_log = log;
 
@@ -2944,7 +3234,6 @@ row_log_allocate(
 
 /******************************************************//**
 Free the row log for an index that was being created online. */
-UNIV_INTERN
 void
 row_log_free(
 /*=========*/
@@ -2952,31 +3241,30 @@ row_log_free(
 {
 	MONITOR_ATOMIC_DEC(MONITOR_ONLINE_CREATE_INDEX);
 
-	delete log->blobs;
+	UT_DELETE(log->blobs);
 	row_log_block_free(log->tail);
 	row_log_block_free(log->head);
 	row_merge_file_destroy_low(log->fd);
 	mutex_free(&log->mutex);
 	ut_free(log);
-	log = 0;
+	log = NULL;
 }
 
 /******************************************************//**
 Get the latest transaction ID that has invoked row_log_online_op()
 during online creation.
 @return latest transaction ID, or 0 if nothing was logged */
-UNIV_INTERN
 trx_id_t
 row_log_get_max_trx(
 /*================*/
 	dict_index_t*	index)	/*!< in: index, must be locked */
 {
 	ut_ad(dict_index_get_online_status(index) == ONLINE_INDEX_CREATION);
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad((rw_lock_own(dict_index_get_lock(index), RW_LOCK_SHARED)
+
+	ut_ad((rw_lock_own(dict_index_get_lock(index), RW_LOCK_S)
 	       && mutex_own(&index->online_log->mutex))
-	      || rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	      || rw_lock_own(dict_index_get_lock(index), RW_LOCK_X));
+
 	return(index->online_log->max_trx);
 }
 
@@ -3003,14 +3291,27 @@ row_log_apply_op_low(
 	ulint*		offsets = NULL;
 
 	ut_ad(!dict_index_is_clust(index));
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX)
+
+	ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_X)
 	      == has_index_lock);
-#endif /* UNIV_SYNC_DEBUG */
+
 	ut_ad(!dict_index_is_corrupted(index));
 	ut_ad(trx_id != 0 || op == ROW_OP_DELETE);
 
+#ifdef UNIV_DEBUG
+	{
+		rec_printer p(entry);
+		DBUG_PRINT("ib_create_index",
+				("%s %s index " IB_ID_FMT "," IB_ID_FMT ": %s",
+				op == ROW_OP_INSERT ? "insert" : "delete",
+				has_index_lock ? "locked" : "unlocked",
+				index->id, trx_id,
+				p.str().c_str()));
+	}
+#endif
+
 	mtr_start(&mtr);
+	mtr.set_named_space(index->space);
 
 	/* We perform the pessimistic variant of the operations if we
 	already hold index->lock exclusively. First, search the
@@ -3067,6 +3368,7 @@ row_log_apply_op_low(
 				Lock the index tree exclusively. */
 				mtr_commit(&mtr);
 				mtr_start(&mtr);
+				mtr.set_named_space(index->space);
 				btr_cur_search_to_nth_level(
 					index, 0, entry, PAGE_CUR_LE,
 					BTR_MODIFY_TREE, &cursor, 0,
@@ -3083,11 +3385,11 @@ row_log_apply_op_low(
 
 			/* As there are no externally stored fields in
 			a secondary index record, the parameter
-			rb_ctx = RB_NONE will be ignored. */
+			rollback=false will be ignored. */
 
 			btr_cur_pessimistic_delete(
 				error, FALSE, &cursor,
-				BTR_CREATE_FLAG, RB_NONE, &mtr);
+				BTR_CREATE_FLAG, false, &mtr);
 			break;
 		case ROW_OP_INSERT:
 			if (exists) {
@@ -3169,6 +3471,7 @@ insert_the_rec:
 				Lock the index tree exclusively. */
 				mtr_commit(&mtr);
 				mtr_start(&mtr);
+				mtr.set_named_space(index->space);
 				btr_cur_search_to_nth_level(
 					index, 0, entry, PAGE_CUR_LE,
 					BTR_MODIFY_TREE, &cursor, 0,
@@ -3238,10 +3541,9 @@ row_log_apply_op(
 
 	/* Online index creation is only used for secondary indexes. */
 	ut_ad(!dict_index_is_clust(index));
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX)
+
+	ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_X)
 	      == has_index_lock);
-#endif /* UNIV_SYNC_DEBUG */
 
 	if (dict_index_is_corrupted(index)) {
 		*error = DB_INDEX_CORRUPT;
@@ -3315,34 +3617,28 @@ corrupted:
 	/* Online index creation is only implemented for secondary
 	indexes, which never contain off-page columns. */
 	ut_ad(n_ext == 0);
-#ifdef ROW_LOG_APPLY_PRINT
-	if (row_log_apply_print) {
-		fprintf(stderr, "apply " IB_ID_FMT " " TRX_ID_FMT " %u %u ",
-			index->id, trx_id,
-			unsigned (op), unsigned (has_index_lock));
-		for (const byte* m = mrec - data_size; m < mrec; m++) {
-			fprintf(stderr, "%02x", *m);
-		}
-		putc('\n', stderr);
-	}
-#endif /* ROW_LOG_APPLY_PRINT */
+
 	row_log_apply_op_low(index, dup, error, offsets_heap,
 			     has_index_lock, op, trx_id, entry);
 	return(mrec);
 }
 
-/******************************************************//**
-Applies operations to a secondary index that was being created.
+/** Applies operations to a secondary index that was being created.
+@param[in]	trx	transaction (for checking if the operation was
+interrupted)
+@param[in,out]	index	index
+@param[in,out]	dup	for reporting duplicate key errors
+@param[in,out]	stage	performance schema accounting object, used by
+ALTER TABLE. If not NULL, then stage->inc() will be called for each block
+of log that is applied.
 @return DB_SUCCESS, or error code on failure */
-static MY_ATTRIBUTE((nonnull))
+static
 dberr_t
 row_log_apply_ops(
-/*==============*/
-	trx_t*		trx,	/*!< in: transaction (for checking if
-				the operation was interrupted) */
-	dict_index_t*	index,	/*!< in/out: index */
-	row_merge_dup_t*dup)	/*!< in/out: for reporting duplicate key
-				errors */
+	const trx_t*		trx,
+	dict_index_t*		index,
+	row_merge_dup_t*	dup,
+	ut_stage_alter_t*	stage)
 {
 	dberr_t		error;
 	const mrec_t*	mrec	= NULL;
@@ -3357,14 +3653,12 @@ row_log_apply_ops(
 		+ dict_index_get_n_fields(index);
 
 	ut_ad(dict_index_is_online_ddl(index));
-	ut_ad(*index->name == TEMP_INDEX_PREFIX);
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(!index->is_committed());
+	ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_X));
 	ut_ad(index->online_log);
 	UNIV_MEM_INVALID(&mrec_end, sizeof mrec_end);
 
-	offsets = static_cast<ulint*>(ut_malloc(i * sizeof *offsets));
+	offsets = static_cast<ulint*>(ut_malloc_nokey(i * sizeof *offsets));
 	offsets[0] = i;
 	offsets[1] = dict_index_get_n_fields(index);
 
@@ -3374,11 +3668,11 @@ row_log_apply_ops(
 
 next_block:
 	ut_ad(has_index_lock);
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_X));
 	ut_ad(index->online_log->head.bytes == 0);
 
+	stage->inc(row_log_progress_inc_per_block());
+
 	if (trx_is_interrupted(trx)) {
 		goto interrupted;
 	}
@@ -3396,8 +3690,8 @@ next_block:
 	if (UNIV_UNLIKELY(index->online_log->head.blocks
 			  > index->online_log->tail.blocks)) {
 unexpected_eof:
-		fprintf(stderr, "InnoDB: unexpected end of temporary file"
-			" for index %s\n", index->name + 1);
+		ib::error() << "Unexpected end of temporary file for index "
+			<< index->name;
 corruption:
 		error = DB_CORRUPTION;
 		goto func_exit;
@@ -3408,11 +3702,13 @@ corruption:
 		if (index->online_log->head.blocks) {
 #ifdef HAVE_FTRUNCATE
 			/* Truncate the file in order to save space. */
-			if (index->online_log->fd != -1
+			if (index->online_log->fd > 0
 			    && ftruncate(index->online_log->fd, 0) == -1) {
-				fprintf(stderr, "InnoDB: Error: Truncate of file "
-					"\'%s\' failed with error %d:%s\n",
-					index->name + 1, errno, strerror(errno));
+				ib::error()
+					<< "\'" << index->name + 1
+					<< "\' failed with error "
+					<< errno << ":" << strerror(errno);
+
 				goto corruption;
 			}
 #endif /* HAVE_FTRUNCATE */
@@ -3434,7 +3730,6 @@ all_done:
 		}
 	} else {
 		os_offset_t	ofs;
-		ibool		success;
 
 		ofs = (os_offset_t) index->online_log->head.blocks
 			* srv_sort_buf_size;
@@ -3450,14 +3745,19 @@ all_done:
 			goto func_exit;
 		}
 
-		success = os_file_read_no_error_handling(
+		IORequest	request;
+
+		dberr_t	err = os_file_read_no_error_handling(
+			request,
 			OS_FILE_FROM_FD(index->online_log->fd),
 			index->online_log->head.block, ofs,
-			srv_sort_buf_size);
+			srv_sort_buf_size,
+			NULL);
 
-		if (!success) {
-			fprintf(stderr, "InnoDB: unable to read temporary file"
-				" for index %s\n", index->name + 1);
+		if (err != DB_SUCCESS) {
+			ib::error()
+				<< "Unable to read temporary file"
+				" for index " << index->name;
 			goto corruption;
 		}
 
@@ -3466,14 +3766,6 @@ all_done:
 		posix_fadvise(index->online_log->fd,
 			      ofs, srv_sort_buf_size, POSIX_FADV_DONTNEED);
 #endif /* POSIX_FADV_DONTNEED */
-#if 0 //def FALLOC_FL_PUNCH_HOLE
-		/* Try to deallocate the space for the file on disk.
-		This should work on ext4 on Linux 2.6.39 and later,
-		and be ignored when the operation is unsupported. */
-		fallocate(index->online_log->fd,
-			  FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
-			  ofs, srv_sort_buf_size);
-#endif /* FALLOC_FL_PUNCH_HOLE */
 
 		next_mrec = index->online_log->head.block;
 		next_mrec_end = next_mrec + srv_sort_buf_size;
@@ -3655,18 +3947,21 @@ func_exit:
 	return(error);
 }
 
-/******************************************************//**
-Apply the row log to the index upon completing index creation.
+/** Apply the row log to the index upon completing index creation.
+@param[in]	trx	transaction (for checking if the operation was
+interrupted)
+@param[in,out]	index	secondary index
+@param[in,out]	table	MySQL table (for reporting duplicates)
+@param[in,out]	stage	performance schema accounting object, used by
+ALTER TABLE. stage->begin_phase_log_index() will be called initially and then
+stage->inc() will be called for each block of log that is applied.
 @return DB_SUCCESS, or error code on failure */
-UNIV_INTERN
 dberr_t
 row_log_apply(
-/*==========*/
-	trx_t*		trx,	/*!< in: transaction (for checking if
-				the operation was interrupted) */
-	dict_index_t*	index,	/*!< in/out: secondary index */
-	struct TABLE*	table)	/*!< in/out: MySQL table
-				(for reporting duplicates) */
+	const trx_t*		trx,
+	dict_index_t*		index,
+	struct TABLE*		table,
+	ut_stage_alter_t*	stage)
 {
 	dberr_t		error;
 	row_log_t*	log;
@@ -3676,12 +3971,14 @@ row_log_apply(
 	ut_ad(dict_index_is_online_ddl(index));
 	ut_ad(!dict_index_is_clust(index));
 
+	stage->begin_phase_log_index();
+
 	log_free_check();
 
 	rw_lock_x_lock(dict_index_get_lock(index));
 
 	if (!dict_table_is_corrupted(index->table)) {
-		error = row_log_apply_ops(trx, index, &dup);
+		error = row_log_apply_ops(trx, index, &dup, stage);
 	} else {
 		error = DB_SUCCESS;
 	}
@@ -3702,11 +3999,6 @@ row_log_apply(
 
 	log = index->online_log;
 	index->online_log = NULL;
-	/* We could remove the TEMP_INDEX_PREFIX and update the data
-	dictionary to say that this index is complete, if we had
-	access to the .frm file here.  If the server crashes before
-	all requested indexes have been created, this completed index
-	will be dropped. */
 	rw_lock_x_unlock(dict_index_get_lock(index));
 
 	row_log_free(log);
diff --git a/storage/innobase/row/row0merge.cc b/storage/innobase/row/row0merge.cc
index d8baef8040f..2ab0e491bd7 100644
--- a/storage/innobase/row/row0merge.cc
+++ b/storage/innobase/row/row0merge.cc
@@ -26,6 +26,14 @@ Completed by Sunny Bains and Marko Makela
 #include <my_config.h>
 #include <log.h>
 
+#include <math.h>
+
+#include "ha_prototypes.h"
+
+#include <math.h>
+
+#include "ha_prototypes.h"
+
 #include "row0merge.h"
 #include "row0ext.h"
 #include "row0log.h"
@@ -39,7 +47,10 @@ Completed by Sunny Bains and Marko Makela
 #include "row0ftsort.h"
 #include "row0import.h"
 #include "handler0alter.h"
-#include "ha_prototypes.h"
+#include "btr0bulk.h"
+#include "fsp0sysspace.h"
+#include "ut0new.h"
+#include "ut0stage.h"
 #include "math.h" /* log() */
 #include "fil0crypt.h"
 
@@ -50,34 +61,218 @@ float my_log2f(float n)
 }
 
 /* Ignore posix_fadvise() on those platforms where it does not exist */
-#if defined __WIN__
+#if defined _WIN32
 # define posix_fadvise(fd, offset, len, advice) /* nothing */
-#endif /* __WIN__ */
-
-#ifdef UNIV_DEBUG
-/** Set these in order ot enable debug printout. */
-/* @{ */
-/** Log each record read from temporary file. */
-static ibool	row_merge_print_read;
-/** Log each record write to temporary file. */
-static ibool	row_merge_print_write;
-/** Log each row_merge_blocks() call, merging two blocks of records to
-a bigger one. */
-static ibool	row_merge_print_block;
-/** Log each block read from temporary file. */
-static ibool	row_merge_print_block_read;
-/** Log each block read from temporary file. */
-static ibool	row_merge_print_block_write;
-/* @} */
-#endif /* UNIV_DEBUG */
+#endif /* _WIN32 */
 
 /* Whether to disable file system cache */
-UNIV_INTERN char	srv_disable_sort_file_cache;
+char	srv_disable_sort_file_cache;
+
+/** Class that caches index row tuples made from a single cluster
+index page scan, and then insert into corresponding index tree */
+class index_tuple_info_t {
+public:
+	/** constructor
+	@param[in]	heap	memory heap
+	@param[in]	index	index to be created */
+	index_tuple_info_t(
+		mem_heap_t*	heap,
+		dict_index_t*	index) UNIV_NOTHROW
+	{
+		m_heap = heap;
+		m_index = index;
+		m_dtuple_vec = UT_NEW_NOKEY(idx_tuple_vec());
+	}
+
+	/** destructor */
+	~index_tuple_info_t()
+	{
+		UT_DELETE(m_dtuple_vec);
+	}
+
+	/** Get the index object
+	@return the index object */
+	dict_index_t*   get_index() UNIV_NOTHROW
+	{
+		return(m_index);
+	}
+
+	/** Caches an index row into index tuple vector
+	@param[in]	row	table row
+	@param[in]	ext	externally stored column
+	prefixes, or NULL */
+	void add(
+		const dtuple_t*		row,
+		const row_ext_t*	ext) UNIV_NOTHROW
+	{
+		dtuple_t*	dtuple;
+
+		dtuple = row_build_index_entry(row, ext, m_index, m_heap);
+
+		ut_ad(dtuple);
+
+		m_dtuple_vec->push_back(dtuple);
+	}
+
+	/** Insert spatial index rows cached in vector into spatial index
+	@param[in]	trx_id		transaction id
+	@param[in,out]	row_heap	memory heap
+	@param[in]	pcur		cluster index scanning cursor
+	@param[in,out]	scan_mtr	mini-transaction for pcur
+	@param[out]	mtr_committed	whether scan_mtr got committed
+	@return DB_SUCCESS if successful, else error number */
+	dberr_t insert(
+		trx_id_t		trx_id,
+		mem_heap_t*		row_heap,
+		btr_pcur_t*		pcur,
+		mtr_t*			scan_mtr,
+		bool*			mtr_committed)
+	{
+		big_rec_t*      big_rec;
+		rec_t*          rec;
+		btr_cur_t       ins_cur;
+		mtr_t           mtr;
+		rtr_info_t      rtr_info;
+		ulint*		ins_offsets = NULL;
+		dberr_t		error = DB_SUCCESS;
+		dtuple_t*	dtuple;
+		ulint		count = 0;
+		const ulint	flag = BTR_NO_UNDO_LOG_FLAG
+				       | BTR_NO_LOCKING_FLAG
+				       | BTR_KEEP_SYS_FLAG | BTR_CREATE_FLAG;
+
+		ut_ad(dict_index_is_spatial(m_index));
+
+		DBUG_EXECUTE_IF("row_merge_instrument_log_check_flush",
+			log_sys->check_flush_or_checkpoint = true;
+		);
+
+		for (idx_tuple_vec::iterator it = m_dtuple_vec->begin();
+		     it != m_dtuple_vec->end();
+		     ++it) {
+			dtuple = *it;
+			ut_ad(dtuple);
+
+			if (log_sys->check_flush_or_checkpoint) {
+				if (!(*mtr_committed)) {
+					btr_pcur_move_to_prev_on_page(pcur);
+					btr_pcur_store_position(pcur, scan_mtr);
+					mtr_commit(scan_mtr);
+					*mtr_committed = true;
+				}
+
+				log_free_check();
+			}
+
+			mtr.start();
+			mtr.set_named_space(m_index->space);
+
+			ins_cur.index = m_index;
+			rtr_init_rtr_info(&rtr_info, false, &ins_cur, m_index,
+					  false);
+			rtr_info_update_btr(&ins_cur, &rtr_info);
+
+			btr_cur_search_to_nth_level(m_index, 0, dtuple,
+						    PAGE_CUR_RTREE_INSERT,
+						    BTR_MODIFY_LEAF, &ins_cur,
+						    0, __FILE__, __LINE__,
+						    &mtr);
+
+			/* It need to update MBR in parent entry,
+			so change search mode to BTR_MODIFY_TREE */
+			if (rtr_info.mbr_adj) {
+				mtr_commit(&mtr);
+				rtr_clean_rtr_info(&rtr_info, true);
+				rtr_init_rtr_info(&rtr_info, false, &ins_cur,
+						  m_index, false);
+				rtr_info_update_btr(&ins_cur, &rtr_info);
+				mtr_start(&mtr);
+				mtr.set_named_space(m_index->space);
+				btr_cur_search_to_nth_level(
+					m_index, 0, dtuple,
+					PAGE_CUR_RTREE_INSERT,
+					BTR_MODIFY_TREE, &ins_cur, 0,
+					__FILE__, __LINE__, &mtr);
+			}
+
+			error = btr_cur_optimistic_insert(
+				flag, &ins_cur, &ins_offsets, &row_heap,
+				dtuple, &rec, &big_rec, 0, NULL, &mtr);
+
+			if (error == DB_FAIL) {
+				ut_ad(!big_rec);
+				mtr.commit();
+				mtr.start();
+				mtr.set_named_space(m_index->space);
+
+				rtr_clean_rtr_info(&rtr_info, true);
+				rtr_init_rtr_info(&rtr_info, false,
+						  &ins_cur, m_index, false);
+
+				rtr_info_update_btr(&ins_cur, &rtr_info);
+				btr_cur_search_to_nth_level(
+					m_index, 0, dtuple,
+					PAGE_CUR_RTREE_INSERT,
+					BTR_MODIFY_TREE,
+					&ins_cur, 0,
+					__FILE__, __LINE__, &mtr);
+
+
+				error = btr_cur_pessimistic_insert(
+						flag, &ins_cur, &ins_offsets,
+						&row_heap, dtuple, &rec,
+						&big_rec, 0, NULL, &mtr);
+			}
+
+			DBUG_EXECUTE_IF(
+				"row_merge_ins_spatial_fail",
+				error = DB_FAIL;
+			);
+
+			if (error == DB_SUCCESS) {
+				if (rtr_info.mbr_adj) {
+					error = rtr_ins_enlarge_mbr(
+							&ins_cur, NULL, &mtr);
+				}
+
+				if (error == DB_SUCCESS) {
+					page_update_max_trx_id(
+						btr_cur_get_block(&ins_cur),
+						btr_cur_get_page_zip(&ins_cur),
+						trx_id, &mtr);
+				}
+			}
+
+			mtr_commit(&mtr);
+
+			rtr_clean_rtr_info(&rtr_info, true);
+			count++;
+		}
+
+		m_dtuple_vec->clear();
+
+		return(error);
+	}
+
+private:
+	/** Cache index rows made from a cluster index scan. Usually
+	for rows on single cluster index page */
+	typedef std::vector<dtuple_t*, ut_allocator<dtuple_t*> >
+		idx_tuple_vec;
+
+	/** vector used to cache index rows made from cluster index scan */
+	idx_tuple_vec*		m_dtuple_vec;
+
+	/** the index being built */
+	dict_index_t*		m_index;
+
+	/** memory heap for creating index tuples */
+	mem_heap_t*		m_heap;
+};
 
 /* Maximum pending doc memory limit in bytes for a fts tokenization thread */
 #define FTS_PENDING_DOC_MEMORY_LIMIT	1000000
 
-
 /******************************************************//**
 Encrypt a merge block. */
 static
@@ -93,7 +288,7 @@ row_merge_encrypt_buf(
 {
 	uint key_version;
 	uint dstlen=0;
-	os_offset_t ofs = (os_offset_t)srv_sort_buf_size * (os_offset_t)offset;
+	uint ofs = (uint)(srv_sort_buf_size * offset);
 
 	key_version =  encryption_key_get_latest_version(crypt_data->key_id);
 
@@ -107,12 +302,12 @@ row_merge_encrypt_buf(
 					   space, ofs, 0);
 
 	if (! ((rc == MY_AES_OK) && ((ulint)dstlen == srv_sort_buf_size-ROW_MERGE_RESERVE_SIZE))) {
-		ib_logf(IB_LOG_LEVEL_FATAL,
-			"Unable to encrypt data-block "
+		ib::error()
+			<< "Unable to encrypt data-block "
 			" src: %p srclen: %lu buf: %p buflen: %u."
-			" return-code: %d. Can't continue!\n",
-			input_buf, (ulong) srv_sort_buf_size,
-			crypted_buf, dstlen, rc);
+			<< srv_sort_buf_size << " buf: " << crypted_buf
+			<< " buflen: " << dstlen
+			<< " return-code: " << rc << " Can't continue!";
 		ut_error;
 	}
 }
@@ -132,7 +327,7 @@ row_merge_decrypt_buf(
 {
 	uint key_version;
 	uint dstlen=0;
-	os_offset_t ofs = (os_offset_t)srv_sort_buf_size * (os_offset_t)offset;
+	uint ofs = (uint)(srv_sort_buf_size * offset);
 
 	/* Read key_version from beginning of the buffer */
 	key_version = mach_read_from_4((byte *)input_buf);
@@ -149,53 +344,53 @@ row_merge_decrypt_buf(
 					   space, ofs, 0);
 
 	if (! ((rc == MY_AES_OK) && ((ulint)dstlen == srv_sort_buf_size-ROW_MERGE_RESERVE_SIZE))) {
-		ib_logf(IB_LOG_LEVEL_FATAL,
-			"Unable to encrypt data-block "
-			" src: %p srclen: %lu buf: %p buflen: %d."
-			" return-code: %d. Can't continue!\n",
-			input_buf, (ulong) srv_sort_buf_size,
-			crypted_buf, dstlen, rc);
+		ib::error()
+			<< "Unable to decrypt data-block "
+			<< " src: " << input_buf << " srclen: "
+			<< srv_sort_buf_size << " buf: " << crypted_buf
+			<< " buflen: " << dstlen
+			<< " return-code: " << rc << " Can't continue!";
 		ut_error;
 	}
 
 	return true;
 }
 
-#ifdef UNIV_DEBUG
-/******************************************************//**
-Display a merge tuple. */
-static MY_ATTRIBUTE((nonnull))
-void
-row_merge_tuple_print(
-/*==================*/
-	FILE*		f,	/*!< in: output stream */
-	const mtuple_t*	entry,	/*!< in: tuple to print */
-	ulint		n_fields)/*!< in: number of fields in the tuple */
-{
-	ulint	j;
+/* Maximum pending doc memory limit in bytes for a fts tokenization thread */
+#define FTS_PENDING_DOC_MEMORY_LIMIT	1000000
 
-	for (j = 0; j < n_fields; j++) {
-		const dfield_t*	field = &entry->fields[j];
-
-		if (dfield_is_null(field)) {
-			fputs("\n NULL;", f);
-		} else {
-			ulint	field_len	= dfield_get_len(field);
-			ulint	len		= ut_min(field_len, 20);
-			if (dfield_is_ext(field)) {
-				fputs("\nE", f);
-			} else {
-				fputs("\n ", f);
-			}
-			ut_print_buf(f, dfield_get_data(field), len);
-			if (len != field_len) {
-				fprintf(f, " (total %lu bytes)", field_len);
-			}
-		}
-	}
-	putc('\n', f);
-}
-#endif /* UNIV_DEBUG */
+/** Insert sorted data tuples to the index.
+@param[in]	trx_id		transaction identifier
+@param[in]	index		index to be inserted
+@param[in]	old_table	old table
+@param[in]	fd		file descriptor
+@param[in,out]	block		file buffer
+@param[in]	row_buf		row_buf the sorted data tuples,
+or NULL if fd, block will be used instead
+@param[in,out]	btr_bulk	btr bulk instance
+@param[in,out]	stage		performance schema accounting object, used by
+ALTER TABLE. If not NULL stage->begin_phase_insert() will be called initially
+and then stage->inc() will be called for each record that is processed.
+@return DB_SUCCESS or error number */
+static	MY_ATTRIBUTE((warn_unused_result))
+dberr_t
+row_merge_insert_index_tuples(
+	trx_id_t		trx_id,
+	dict_index_t*		index,
+	const dict_table_t*	old_table,
+	int			fd,
+	row_merge_block_t*	block,
+	const row_merge_buf_t*	row_buf,
+	BtrBulk*		btr_bulk,
+	const ib_uint64_t	table_total_rows, /*!< in: total rows of old table */
+	const float		pct_progress,	/*!< in: total progress
+						percent until now */
+	const float		pct_cost, /*!< in: current progress percent
+					  */
+	fil_space_crypt_t*	crypt_data,/*!< in: table crypt data */
+	row_merge_block_t*	crypt_block, /*!< in: crypt buf or NULL */
+	ulint			space,	   /*!< in: space id */
+	ut_stage_alter_t*	stage = NULL);
 
 /******************************************************//**
 Encode an index record. */
@@ -215,7 +410,7 @@ row_merge_buf_encode(
 	ulint	extra_size;
 
 	size = rec_get_converted_size_temp(
-		index, entry->fields, n_fields, &extra_size);
+		index, entry->fields, n_fields, NULL, &extra_size);
 	ut_ad(size >= extra_size);
 
 	/* Encode extra_size + 1 */
@@ -228,14 +423,14 @@ row_merge_buf_encode(
 	}
 
 	rec_convert_dtuple_to_temp(*b + extra_size, index,
-				   entry->fields, n_fields);
+				   entry->fields, n_fields, NULL);
 
 	*b += size;
 }
 
 /******************************************************//**
 Allocate a sort buffer.
-@return	own: sort buffer */
+@return own: sort buffer */
 static MY_ATTRIBUTE((malloc, nonnull))
 row_merge_buf_t*
 row_merge_buf_create_low(
@@ -258,7 +453,7 @@ row_merge_buf_create_low(
 	buf->index = index;
 	buf->max_tuples = max_tuples;
 	buf->tuples = static_cast<mtuple_t*>(
-		ut_malloc(2 * max_tuples * sizeof *buf->tuples));
+		ut_malloc_nokey(2 * max_tuples * sizeof *buf->tuples));
 	buf->tmp_tuples = buf->tuples + max_tuples;
 
 	return(buf);
@@ -266,8 +461,7 @@ row_merge_buf_create_low(
 
 /******************************************************//**
 Allocate a sort buffer.
-@return	own: sort buffer */
-UNIV_INTERN
+@return own: sort buffer */
 row_merge_buf_t*
 row_merge_buf_create(
 /*=================*/
@@ -279,7 +473,8 @@ row_merge_buf_create(
 	mem_heap_t*		heap;
 
 	max_tuples = (srv_sort_buf_size - ROW_MERGE_RESERVE_SIZE)
-		/ ut_max(1, dict_index_get_min_size(index));
+		/ ut_max(static_cast<ulint>(1),
+			 dict_index_get_min_size(index));
 
 	buf_size = (sizeof *buf);
 
@@ -292,8 +487,7 @@ row_merge_buf_create(
 
 /******************************************************//**
 Empty a sort buffer.
-@return	sort buffer */
-UNIV_INTERN
+@return sort buffer */
 row_merge_buf_t*
 row_merge_buf_empty(
 /*================*/
@@ -319,7 +513,6 @@ row_merge_buf_empty(
 
 /******************************************************//**
 Deallocate a sort buffer. */
-UNIV_INTERN
 void
 row_merge_buf_free(
 /*===============*/
@@ -345,7 +538,7 @@ row_merge_buf_redundant_convert(
 	const dfield_t*		row_field,
 	dfield_t*		field,
 	ulint			len,
-	ulint			zip_size,
+	const page_size_t&	page_size,
 	mem_heap_t*		heap)
 {
 	ut_ad(DATA_MBMINLEN(field->type.mbminmaxlen) == 1);
@@ -365,7 +558,7 @@ row_merge_buf_redundant_convert(
 			    field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE));
 
 		byte*	data = btr_copy_externally_stored_field(
-			&ext_len, field_data, zip_size, field_len, heap, NULL);
+			&ext_len, field_data, page_size, field_len, heap);
 
 		ut_ad(ext_len < len);
 
@@ -384,6 +577,7 @@ row_merge_buf_redundant_convert(
 @param[in,out]	buf		sort buffer
 @param[in]	fts_index	fts index to be created
 @param[in]	old_table	original table
+@param[in]	new_table	new table
 @param[in,out]	psort_info	parallel sort info
 @param[in]	row		table row
 @param[in]	ext		cache of externally stored
@@ -394,8 +588,10 @@ row_merge_buf_redundant_convert(
 				converting to ROW_FORMAT=REDUNDANT, or NULL
 				when not to invoke
 				row_merge_buf_redundant_convert()
-@param[in,out]	exceed_page	set if the record size exceeds the page size
-				when converting to ROW_FORMAT=REDUNDANT
+@param[in,out]	err		set if error occurs
+@param[in,out]	v_heap		heap memory to process data for virtual column
+@param[in,out]	my_table	mysql table object
+@param[in]	trx		transaction object
 @return number of rows added, 0 if out of space */
 static
 ulint
@@ -403,12 +599,16 @@ row_merge_buf_add(
 	row_merge_buf_t*	buf,
 	dict_index_t*		fts_index,
 	const dict_table_t*	old_table,
+	const dict_table_t*	new_table,
 	fts_psort_t*		psort_info,
 	const dtuple_t*		row,
 	const row_ext_t*	ext,
 	doc_id_t*		doc_id,
 	mem_heap_t*		conv_heap,
-	bool*			exceed_page)
+	dberr_t*		err,
+	mem_heap_t**		v_heap,
+	TABLE*			my_table,
+	trx_t*			trx)
 {
 	ulint			i;
 	const dict_index_t*	index;
@@ -438,6 +638,9 @@ row_merge_buf_add(
 	fts_index */
 	index = (buf->index->type & DICT_FTS) ? fts_index : buf->index;
 
+	/* create spatial index should not come here */
+	ut_ad(!dict_index_is_spatial(index));
+
 	n_fields = dict_index_get_n_fields(index);
 
 	entry = &buf->tuples[buf->n_tuples];
@@ -457,11 +660,20 @@ row_merge_buf_add(
 		const dfield_t*		row_field;
 
 		col = ifield->col;
+
+#ifdef MYSQL_VIRTUAL_COLUMNS
+		const dict_v_col_t*	v_col = NULL;
+		if (dict_col_is_virtual(col)) {
+			v_col = reinterpret_cast<const dict_v_col_t*>(col);
+		}
+#endif /* MYSQL_VIRTUAL_COLUMNS */
+
 		col_no = dict_col_get_no(col);
 
 		/* Process the Doc ID column */
 		if (*doc_id > 0
-		    && col_no == index->table->fts->doc_col) {
+		    && col_no == index->table->fts->doc_col
+		    && !dict_col_is_virtual(col)) {
 			fts_write_doc_id((byte*) &write_doc_id, *doc_id);
 
 			/* Note: field->data now points to a value on the
@@ -478,9 +690,28 @@ row_merge_buf_add(
 			field->type.mbminmaxlen = DATA_MBMINMAXLEN(0, 0);
 			field->type.len = ifield->col->len;
 		} else {
-			row_field = dtuple_get_nth_field(row, col_no);
+			/* Use callback to get the virtual column value */
+			if (dict_col_is_virtual(col)) {
+ #ifdef MYSQL_VIRTUAL_COLUMN
+				dict_index_t*	clust_index
+					= dict_table_get_first_index(new_table);
+
+				row_field = innobase_get_computed_value(
+					row, v_col, clust_index,
+					v_heap, NULL, ifield, trx->mysql_thd,
+					my_table, old_table, NULL, NULL);
+
+				if (row_field == NULL) {
+					*err = DB_COMPUTE_VALUE_FAILED;
+					DBUG_RETURN(0);
+				}
+				dfield_copy(field, row_field);
+#endif /* MYSQL_VIRTUAL_COLUMN */
+			} else {
+				row_field = dtuple_get_nth_field(row, col_no);
+				dfield_copy(field, row_field);
+			}
 
-			dfield_copy(field, row_field);
 
 			/* Tokenize and process data for FTS */
 			if (index->type & DICT_FTS) {
@@ -506,9 +737,9 @@ row_merge_buf_add(
 						dfield_get_data(doc_field)));
 
 					if (*doc_id == 0) {
-						ib_logf(IB_LOG_LEVEL_WARN,
-							"FTS Doc ID is zero. "
-							"Record Skipped");
+						ib::warn() << "FTS Doc ID is"
+							" zero. Record"
+							" skipped";
 						DBUG_RETURN(0);
 					}
 				}
@@ -518,8 +749,8 @@ row_merge_buf_add(
 					continue;
 				}
 
-				ptr = ut_malloc(sizeof(*doc_item)
-						+ field->len);
+				ptr = ut_malloc_nokey(sizeof(*doc_item)
+						      + field->len);
 
 				doc_item = static_cast<fts_doc_item_t*>(ptr);
 				value = static_cast<byte*>(ptr)
@@ -537,7 +768,6 @@ row_merge_buf_add(
 
 				if (psort_info[bucket].error == DB_SUCCESS) {
 					UT_LIST_ADD_LAST(
-						doc_list,
 						psort_info[bucket].fts_doc_list,
 						doc_item);
 					psort_info[bucket].memory_used +=
@@ -562,11 +792,10 @@ row_merge_buf_add(
 			if (field->len != UNIV_SQL_NULL
 			    && col->mtype == DATA_MYSQL
 			    && col->len != field->len) {
-
 				if (conv_heap != NULL) {
 					row_merge_buf_redundant_convert(
 						row_field, field, col->len,
-						dict_table_zip_size(old_table),
+						dict_table_page_size(old_table),
 						conv_heap);
 				} else {
 					/* Field length mismatch should not
@@ -596,7 +825,8 @@ row_merge_buf_add(
 					len = dfield_get_len(field);
 				}
 			}
-		} else {
+		} else if (!dict_col_is_virtual(col)) {
+			/* Only non-virtual column are stored externally */
 			const byte*	buf = row_ext_lookup(ext, col_no,
 							     &len);
 			if (UNIV_LIKELY_NULL(buf)) {
@@ -617,7 +847,10 @@ row_merge_buf_add(
 			dfield_set_len(field, len);
 		}
 
-		ut_ad(len <= col->len || col->mtype == DATA_BLOB);
+		ut_ad(len <= col->len
+		      || DATA_LARGE_MTYPE(col->mtype)
+		      || (col->mtype == DATA_POINT
+			  && len == DATA_MBR_LEN));
 
 		fixed_len = ifield->fixed_len;
 		if (fixed_len && !dict_table_is_comp(index->table)
@@ -646,7 +879,7 @@ row_merge_buf_add(
 		} else if (dfield_is_ext(field)) {
 			extra_size += 2;
 		} else if (len < 128
-			   || (col->len < 256 && col->mtype != DATA_BLOB)) {
+			   || (!DATA_BIG_COL(col))) {
 			extra_size++;
 		} else {
 			/* For variable-length columns, we look up the
@@ -670,7 +903,7 @@ row_merge_buf_add(
 		ulint	extra;
 
 		size = rec_get_converted_size_temp(
-			index, entry->fields, n_fields, &extra);
+			index, entry->fields, n_fields, NULL, &extra);
 
 		ut_ad(data_size + extra_size == size);
 		ut_ad(extra_size == extra);
@@ -688,7 +921,7 @@ row_merge_buf_add(
 	ut_ad(size < UNIV_PAGE_SIZE) in rec_offs_data_size().
 	It may hit the assert before attempting to insert the row. */
 	if (conv_heap != NULL && data_size > UNIV_PAGE_SIZE) {
-		*exceed_page = true;
+		*err = DB_TOO_BIG_RECORD;
 	}
 
 	ut_ad(data_size < srv_sort_buf_size);
@@ -719,7 +952,6 @@ row_merge_buf_add(
 
 /*************************************************************//**
 Report a duplicate key. */
-UNIV_INTERN
 void
 row_merge_dup_report(
 /*=================*/
@@ -735,7 +967,8 @@ row_merge_dup_report(
 
 /*************************************************************//**
 Compare two tuples.
-@return	1, 0, -1 if a is greater, equal, less, respectively, than b */
+@return positive, 0, negative if a is greater, equal, less, than b,
+respectively */
 static MY_ATTRIBUTE((warn_unused_result))
 int
 row_merge_tuple_cmp(
@@ -799,17 +1032,18 @@ no_report:
 
 /** Wrapper for row_merge_tuple_sort() to inject some more context to
 UT_SORT_FUNCTION_BODY().
-@param tuples	array of tuples that being sorted
-@param aux	work area, same size as tuples[]
-@param low	lower bound of the sorting area, inclusive
-@param high	upper bound of the sorting area, inclusive */
+@param tuples array of tuples that being sorted
+@param aux work area, same size as tuples[]
+@param low lower bound of the sorting area, inclusive
+@param high upper bound of the sorting area, inclusive */
 #define row_merge_tuple_sort_ctx(tuples, aux, low, high)		\
 	row_merge_tuple_sort(n_uniq, n_field, dup, tuples, aux, low, high)
 /** Wrapper for row_merge_tuple_cmp() to inject some more context to
 UT_SORT_FUNCTION_BODY().
-@param a	first tuple to be compared
-@param b	second tuple to be compared
-@return	1, 0, -1 if a is greater, equal, less, respectively, than b */
+@param a first tuple to be compared
+@param b second tuple to be compared
+@return positive, 0, negative, if a is greater, equal, less, than b,
+respectively */
 #define row_merge_tuple_cmp_ctx(a,b)			\
 	row_merge_tuple_cmp(n_uniq, n_field, a, b, dup)
 
@@ -839,7 +1073,6 @@ row_merge_tuple_sort(
 
 /******************************************************//**
 Sort a buffer. */
-UNIV_INTERN
 void
 row_merge_buf_sort(
 /*===============*/
@@ -847,6 +1080,8 @@ row_merge_buf_sort(
 	row_merge_dup_t*	dup)	/*!< in/out: reporter of duplicates
 					(NULL if non-unique index) */
 {
+	ut_ad(!dict_index_is_spatial(buf->index));
+
 	row_merge_tuple_sort(dict_index_get_n_unique(buf->index),
 			     dict_index_get_n_fields(buf->index),
 			     dup,
@@ -855,7 +1090,6 @@ row_merge_buf_sort(
 
 /******************************************************//**
 Write a buffer to a block. */
-UNIV_INTERN
 void
 row_merge_buf_write(
 /*================*/
@@ -868,19 +1102,24 @@ row_merge_buf_write(
 	ulint			n_fields= dict_index_get_n_fields(index);
 	byte*			b	= &block[ROW_MERGE_RESERVE_SIZE];
 
+	DBUG_ENTER("row_merge_buf_write");
+
 	for (ulint i = 0; i < buf->n_tuples; i++) {
 		const mtuple_t*	entry	= &buf->tuples[i];
 
 		row_merge_buf_encode(&b, index, entry, n_fields);
 		ut_ad(b < &block[srv_sort_buf_size]);
+
 #ifdef UNIV_DEBUG
-		if (row_merge_print_write) {
-			fprintf(stderr, "row_merge_buf_write %p,%d,%lu %lu",
-				(void*) b, of->fd, (ulong) of->offset,
-				(ulong) i);
-			row_merge_tuple_print(stderr, entry, n_fields);
+		{
+			rec_printer p(entry->fields, n_fields);
+			DBUG_PRINT("ib_merge_sort",
+				("%p,fd=%d,%lu %lu: %s",
+					reinterpret_cast<const void*>(b), of->fd,
+					(ulint)of->offset, (ulint)i,
+					p.str().c_str()));
 		}
-#endif /* UNIV_DEBUG */
+#endif
 	}
 
 	/* Write an "end-of-chunk" marker. */
@@ -892,18 +1131,17 @@ row_merge_buf_write(
 	to avoid bogus warnings. */
 	memset(b, 0xff, &block[srv_sort_buf_size] - b);
 #endif /* UNIV_DEBUG_VALGRIND */
-#ifdef UNIV_DEBUG
-	if (row_merge_print_write) {
-		fprintf(stderr, "row_merge_buf_write %p,%d,%lu EOF\n",
-			(void*) b, of->fd, (ulong) of->offset);
-	}
-#endif /* UNIV_DEBUG */
+	DBUG_PRINT("ib_merge_sort",
+		   ("write %p,%d,%lu EOF",
+		    reinterpret_cast<const void*>(b), of->fd,
+			   (ulint)of->offset));
+	DBUG_VOID_RETURN;
 }
 
 /******************************************************//**
 Create a memory heap and allocate space for row_merge_rec_offsets()
 and mrec_buf_t[3].
-@return	memory heap */
+@return memory heap */
 static
 mem_heap_t*
 row_merge_heap_create(
@@ -933,8 +1171,7 @@ row_merge_heap_create(
 
 /********************************************************************//**
 Read a merge block from the file system.
-@return	TRUE if request was successful, FALSE if fail */
-UNIV_INTERN
+@return TRUE if request was successful, FALSE if fail */
 ibool
 row_merge_read(
 /*===========*/
@@ -948,26 +1185,19 @@ row_merge_read(
 	ulint			space)	   /*!< in: space id */
 {
 	os_offset_t	ofs = ((os_offset_t) offset) * srv_sort_buf_size;
-	ibool		success;
 
-	DBUG_EXECUTE_IF("row_merge_read_failure", return(FALSE););
+	DBUG_ENTER("row_merge_read");
+	DBUG_PRINT("ib_merge_sort", ("fd=%d ofs=" UINT64PF, fd, ofs));
+	DBUG_EXECUTE_IF("row_merge_read_failure", DBUG_RETURN(FALSE););
 
-#ifdef UNIV_DEBUG
-	if (row_merge_print_block_read) {
-		fprintf(stderr, "row_merge_read fd=%d ofs=%lu\n",
-			fd, (ulong) offset);
-	}
-#endif /* UNIV_DEBUG */
+	IORequest	request;
 
-#ifdef UNIV_DEBUG
-	if (row_merge_print_block_read) {
-		fprintf(stderr, "row_merge_read fd=%d ofs=%lu\n",
-			fd, (ulong) offset);
-	}
-#endif /* UNIV_DEBUG */
+	/* Merge sort pages are never compressed. */
+	request.disable_compression();
 
-	success = os_file_read_no_error_handling(OS_FILE_FROM_FD(fd), buf,
-		                                 ofs, srv_sort_buf_size);
+	dberr_t	err = os_file_read_no_error_handling(
+		request,
+		OS_FILE_FROM_FD(fd), buf, ofs, srv_sort_buf_size, NULL);
 
 	/* For encrypted tables, decrypt data after reading and copy data */
 	if (crypt_data && crypt_buf) {
@@ -981,20 +1211,16 @@ row_merge_read(
 	posix_fadvise(fd, ofs, srv_sort_buf_size, POSIX_FADV_DONTNEED);
 #endif /* POSIX_FADV_DONTNEED */
 
-	if (UNIV_UNLIKELY(!success)) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			"  InnoDB: failed to read merge block at " UINT64PF "\n",
-			ofs);
+	if (err != DB_SUCCESS) {
+		ib::error() << "Failed to read merge block at " << ofs;
 	}
 
-	return(UNIV_LIKELY(success));
+	DBUG_RETURN(err == DB_SUCCESS);
 }
 
 /********************************************************************//**
 Write a merge block to the file system.
-@return	TRUE if request was successful, FALSE if fail */
-UNIV_INTERN
+@return TRUE if request was successful, FALSE if fail */
 ibool
 row_merge_write(
 /*============*/
@@ -1008,12 +1234,13 @@ row_merge_write(
 {
 	size_t		buf_len = srv_sort_buf_size;
 	os_offset_t	ofs = buf_len * (os_offset_t) offset;
-	ibool		ret;
 	void*		out_buf = (void *)buf;
 
-	DBUG_EXECUTE_IF("row_merge_write_failure", return(FALSE););
+	DBUG_ENTER("row_merge_write");
+	DBUG_PRINT("ib_merge_sort", ("fd=%d ofs=" UINT64PF, fd, ofs));
+	DBUG_EXECUTE_IF("row_merge_write_failure", DBUG_RETURN(FALSE););
 
-	/* For encrypted tables, encrypt data before writing */
+	IORequest	request(IORequest::WRITE);
 	if (crypt_data && crypt_buf) {
 		row_merge_encrypt_buf(crypt_data, offset, space, (const byte *)buf, (byte *)crypt_buf);
 		out_buf = crypt_buf;
@@ -1022,14 +1249,11 @@ row_merge_write(
 		mach_write_to_4((byte *)out_buf, 0);
 	}
 
-	ret = os_file_write("(merge)", OS_FILE_FROM_FD(fd), out_buf, ofs, buf_len);
+	request.disable_compression();
 
-#ifdef UNIV_DEBUG
-	if (row_merge_print_block_write) {
-		fprintf(stderr, "row_merge_write fd=%d ofs=%lu\n",
-			fd, (ulong) offset);
-	}
-#endif /* UNIV_DEBUG */
+	dberr_t	err = os_file_write(
+		request,
+		"(merge)", OS_FILE_FROM_FD(fd), out_buf, ofs, buf_len);
 
 #ifdef POSIX_FADV_DONTNEED
 	/* The block will be needed on the next merge pass,
@@ -1037,13 +1261,12 @@ row_merge_write(
 	posix_fadvise(fd, ofs, buf_len, POSIX_FADV_DONTNEED);
 #endif /* POSIX_FADV_DONTNEED */
 
-	return(UNIV_LIKELY(ret));
+	DBUG_RETURN(err == DB_SUCCESS);
 }
 
 /********************************************************************//**
 Read a merge record.
-@return	pointer to next record, or NULL on I/O error or end of list */
-UNIV_INTERN
+@return pointer to next record, or NULL on I/O error or end of list */
 const byte*
 row_merge_read_rec(
 /*===============*/
@@ -1077,6 +1300,8 @@ row_merge_read_rec(
 	ut_ad(*offsets == 1 + REC_OFFS_HEADER_SIZE
 	      + dict_index_get_n_fields(index));
 
+	DBUG_ENTER("row_merge_read_rec");
+
 	if (b == &block[0]) {
 		b+= ROW_MERGE_RESERVE_SIZE;
 	}
@@ -1086,14 +1311,12 @@ row_merge_read_rec(
 	if (UNIV_UNLIKELY(!extra_size)) {
 		/* End of list */
 		*mrec = NULL;
-#ifdef UNIV_DEBUG
-		if (row_merge_print_read) {
-			fprintf(stderr, "row_merge_read %p,%p,%d,%lu EOF\n",
-				(const void*) b, (const void*) block,
-				fd, (ulong) *foffs);
-		}
-#endif /* UNIV_DEBUG */
-		return(NULL);
+		DBUG_PRINT("ib_merge_sort",
+			   ("read %p,%p,%d,%lu EOF\n",
+			    reinterpret_cast<const void*>(b),
+			    reinterpret_cast<const void*>(block),
+			    fd, ulint(*foffs)));
+		DBUG_RETURN(NULL);
 	}
 
 	if (extra_size >= 0x80) {
@@ -1105,7 +1328,7 @@ row_merge_read_rec(
 err_exit:
 				/* Signal I/O error. */
 				*mrec = b;
-				return(NULL);
+				DBUG_RETURN(NULL);
 			}
 
 			/* Wrap around to the beginning of the buffer. */
@@ -1183,14 +1406,13 @@ err_exit:
 	avail_size = &block[srv_sort_buf_size] - b;
 	memcpy(*buf, b, avail_size);
 	*mrec = *buf + extra_size;
-#ifdef UNIV_DEBUG
+
 	/* We cannot invoke rec_offs_make_valid() here, because there
 	are no REC_N_NEW_EXTRA_BYTES between extra_size and data_size.
 	Similarly, rec_offs_validate() would fail, because it invokes
 	rec_get_status(). */
-	offsets[2] = (ulint) *mrec;
-	offsets[3] = (ulint) index;
-#endif /* UNIV_DEBUG */
+	ut_d(offsets[2] = (ulint) *mrec);
+	ut_d(offsets[3] = (ulint) index);
 
 	if (!row_merge_read(fd, ++(*foffs), block,
 			    crypt_data, crypt_block, space)) {
@@ -1206,17 +1428,19 @@ err_exit:
 	b += extra_size + data_size - avail_size;
 
 func_exit:
-#ifdef UNIV_DEBUG
-	if (row_merge_print_read) {
-		fprintf(stderr, "row_merge_read %p,%p,%d,%lu ",
-			(const void*) b, (const void*) block,
-			fd, (ulong) *foffs);
-		rec_print_comp(stderr, *mrec, offsets);
-		putc('\n', stderr);
-	}
-#endif /* UNIV_DEBUG */
 
-	return(b);
+#ifdef UNIV_DEBUG
+	{
+		rec_printer p(*mrec, 0, offsets);
+		DBUG_PRINT("ib_merge_sort",
+			("%p,%p,fd=%d,%lu: %s",
+				reinterpret_cast<const void*>(b),
+				reinterpret_cast<const void*>(block),
+				fd, ulint(*foffs),
+				p.str().c_str()));
+	}
+#endif
+	DBUG_RETURN(b);
 }
 
 /********************************************************************//**
@@ -1227,29 +1451,34 @@ row_merge_write_rec_low(
 /*====================*/
 	byte*		b,	/*!< out: buffer */
 	ulint		e,	/*!< in: encoded extra_size */
-#ifdef UNIV_DEBUG
+#ifndef DBUG_OFF
 	ulint		size,	/*!< in: total size to write */
 	int		fd,	/*!< in: file descriptor */
 	ulint		foffs,	/*!< in: file offset */
-#endif /* UNIV_DEBUG */
+#endif /* !DBUG_OFF */
 	const mrec_t*	mrec,	/*!< in: record to write */
 	const ulint*	offsets)/*!< in: offsets of mrec */
-#ifndef UNIV_DEBUG
+#ifdef DBUG_OFF
 # define row_merge_write_rec_low(b, e, size, fd, foffs, mrec, offsets)	\
 	row_merge_write_rec_low(b, e, mrec, offsets)
-#endif /* !UNIV_DEBUG */
+#endif /* DBUG_OFF */
 {
-#ifdef UNIV_DEBUG
-	const byte* const end = b + size;
-	ut_ad(e == rec_offs_extra_size(offsets) + 1);
+	DBUG_ENTER("row_merge_write_rec_low");
 
-	if (row_merge_print_write) {
-		fprintf(stderr, "row_merge_write %p,%d,%lu ",
-			(void*) b, fd, (ulong) foffs);
-		rec_print_comp(stderr, mrec, offsets);
-		putc('\n', stderr);
+#ifndef DBUG_OFF
+	const byte* const end = b + size;
+#endif /* DBUG_OFF */
+	DBUG_ASSERT(e == rec_offs_extra_size(offsets) + 1);
+
+#ifdef UNIV_DEBUG
+	{
+		rec_printer p(mrec, 0, offsets);
+		DBUG_PRINT("ib_merge_sort",
+			("%p,fd=%d,%lu: %s",
+				reinterpret_cast<const void*>(b), fd, ulint(foffs),
+				p.str().c_str()));
 	}
-#endif /* UNIV_DEBUG */
+#endif
 
 	if (e < 0x80) {
 		*b++ = (byte) e;
@@ -1259,12 +1488,13 @@ row_merge_write_rec_low(
 	}
 
 	memcpy(b, mrec - rec_offs_extra_size(offsets), rec_offs_size(offsets));
-	ut_ad(b + rec_offs_size(offsets) == end);
+	DBUG_ASSERT(b + rec_offs_size(offsets) == end);
+	DBUG_VOID_RETURN;
 }
 
 /********************************************************************//**
 Write a merge record.
-@return	pointer to end of block, or NULL on error */
+@return pointer to end of block, or NULL on error */
 static
 byte*
 row_merge_write_rec(
@@ -1339,7 +1569,7 @@ row_merge_write_rec(
 
 /********************************************************************//**
 Write an end-of-list marker.
-@return	pointer to end of block, or NULL on error */
+@return pointer to end of block, or NULL on error */
 static
 byte*
 row_merge_write_eof(
@@ -1356,12 +1586,13 @@ row_merge_write_eof(
 	ut_ad(b >= &block[0]);
 	ut_ad(b < &block[srv_sort_buf_size]);
 	ut_ad(foffs);
-#ifdef UNIV_DEBUG
-	if (row_merge_print_write) {
-		fprintf(stderr, "row_merge_write %p,%p,%d,%lu EOF\n",
-			(void*) b, (void*) block, fd, (ulong) *foffs);
-	}
-#endif /* UNIV_DEBUG */
+
+	DBUG_ENTER("row_merge_write_eof");
+	DBUG_PRINT("ib_merge_sort",
+		   ("%p,%p,fd=%d,%lu",
+		    reinterpret_cast<const void*>(b),
+		    reinterpret_cast<const void*>(block),
+		    fd, ulint(*foffs)));
 
 	if (b == &block[0]) {
 		b+= ROW_MERGE_RESERVE_SIZE;
@@ -1379,17 +1610,16 @@ row_merge_write_eof(
 
 	if (!row_merge_write(fd, (*foffs)++, block,
 			     crypt_data, crypt_block, space)) {
-		return(NULL);
+		DBUG_RETURN(NULL);
 	}
 
 	UNIV_MEM_INVALID(&block[0], srv_sort_buf_size);
-
-	return(&block[0]);
+	DBUG_RETURN(&block[0]);
 }
 
 /** Create a temporary file if it has not been created already.
 @param[in,out]	tmpfd	temporary file handle
-@param[in]	path	path to create temporary file
+@param[in]	path	location for creating temporary file
 @return file descriptor, or -1 on failure */
 static MY_ATTRIBUTE((warn_unused_result))
 int
@@ -1399,6 +1629,9 @@ row_merge_tmpfile_if_needed(
 {
 	if (*tmpfd < 0) {
 		*tmpfd = row_merge_file_create_low(path);
+		if (*tmpfd >= 0) {
+			MONITOR_ATOMIC_INC(MONITOR_ALTER_TABLE_SORT_FILES);
+		}
 	}
 
 	return(*tmpfd);
@@ -1406,9 +1639,8 @@ row_merge_tmpfile_if_needed(
 
 /** Create a temporary file for merge sort if it was not created already.
 @param[in,out]	file	merge file structure
-@param[in,out]	tmpfd	temporary file structure
 @param[in]	nrec	number of records in the file
-@param[in]	path	path to create temporary files
+@param[in]	path	location for creating temporary file
 @return file descriptor, or -1 on failure */
 static MY_ATTRIBUTE((warn_unused_result))
 int
@@ -1420,6 +1652,7 @@ row_merge_file_create_if_needed(
 {
 	ut_ad(file->fd < 0 || *tmpfd >=0);
 	if (file->fd < 0 && row_merge_file_create(file, path) >= 0) {
+		MONITOR_ATOMIC_INC(MONITOR_ALTER_TABLE_SORT_FILES);
 		if (row_merge_tmpfile_if_needed(tmpfd, path) < 0) {
 			return(-1);
 		}
@@ -1431,6 +1664,121 @@ row_merge_file_create_if_needed(
 	return(file->fd);
 }
 
+/** Copy the merge data tuple from another merge data tuple.
+@param[in]	mtuple		source merge data tuple
+@param[in,out]	prev_mtuple	destination merge data tuple
+@param[in]	n_unique	number of unique fields exist in the mtuple
+@param[in,out]	heap		memory heap where last_mtuple allocated */
+static
+void
+row_mtuple_create(
+	const mtuple_t*	mtuple,
+	mtuple_t*	prev_mtuple,
+	ulint		n_unique,
+	mem_heap_t*	heap)
+{
+	memcpy(prev_mtuple->fields, mtuple->fields,
+	       n_unique * sizeof *mtuple->fields);
+
+	dfield_t*	field = prev_mtuple->fields;
+
+	for (ulint i = 0; i < n_unique; i++) {
+		dfield_dup(field++, heap);
+	}
+}
+
+/** Compare two merge data tuples.
+@param[in]	prev_mtuple	merge data tuple
+@param[in]	current_mtuple	merge data tuple
+@param[in,out]	dup		reporter of duplicates
+@retval positive, 0, negative if current_mtuple is greater, equal, less, than
+last_mtuple. */
+static
+int
+row_mtuple_cmp(
+	const mtuple_t*		prev_mtuple,
+	const mtuple_t*		current_mtuple,
+	row_merge_dup_t*	dup)
+{
+	ut_ad(dict_index_is_clust(dup->index));
+	const ulint	n_unique = dict_index_get_n_unique(dup->index);
+
+	return(row_merge_tuple_cmp(
+		       n_unique, n_unique, *current_mtuple, *prev_mtuple, dup));
+}
+
+/** Insert cached spatial index rows.
+@param[in]	trx_id		transaction id
+@param[in]	sp_tuples	cached spatial rows
+@param[in]	num_spatial	number of spatial indexes
+@param[in,out]	row_heap	heap for insert
+@param[in,out]	sp_heap		heap for tuples
+@param[in,out]	pcur		cluster index cursor
+@param[in,out]	mtr		mini transaction
+@param[in,out]	mtr_committed	whether scan_mtr got committed
+@return DB_SUCCESS or error number */
+static
+dberr_t
+row_merge_spatial_rows(
+	trx_id_t		trx_id,
+	index_tuple_info_t**	sp_tuples,
+	ulint			num_spatial,
+	mem_heap_t*		row_heap,
+	mem_heap_t*		sp_heap,
+	btr_pcur_t*		pcur,
+	mtr_t*			mtr,
+	bool*			mtr_committed)
+{
+	dberr_t			err = DB_SUCCESS;
+
+	if (sp_tuples == NULL) {
+		return(DB_SUCCESS);
+	}
+
+	ut_ad(sp_heap != NULL);
+
+	for (ulint j = 0; j < num_spatial; j++) {
+		err = sp_tuples[j]->insert(
+			trx_id, row_heap,
+			pcur, mtr, mtr_committed);
+
+		if (err != DB_SUCCESS) {
+			return(err);
+		}
+	}
+
+	mem_heap_empty(sp_heap);
+
+	return(err);
+}
+
+/** Check if the geometry field is valid.
+@param[in]	row		the row
+@param[in]	index		spatial index
+@return true if it's valid, false if it's invalid. */
+static
+bool
+row_geo_field_is_valid(
+	const dtuple_t*		row,
+	dict_index_t*		index)
+{
+	const dict_field_t*	ind_field
+		= dict_index_get_nth_field(index, 0);
+	const dict_col_t*	col
+		= ind_field->col;
+	ulint			col_no
+		= dict_col_get_no(col);
+	const dfield_t*		dfield
+		= dtuple_get_nth_field(row, col_no);
+
+	if (dfield_is_null(dfield)
+	    || dfield_get_len(dfield) < GEO_DATA_HEADER_SIZE) {
+		return(false);
+	}
+
+	return(true);
+}
+
 /** Reads clustered index of the table and create temporary files
 containing the index entries for the indexes to be built.
 @param[in]	trx		transaction
@@ -1448,18 +1796,23 @@ containing the index entries for the indexes to be built.
 @param[in]	key_numbers	MySQL key numbers to create
 @param[in]	n_index		number of indexes to create
 @param[in]	add_cols	default values of added columns, or NULL
+@param[in]	add_v		newly added virtual columns along with indexes
 @param[in]	col_map		mapping of old column numbers to new ones, or
-				NULL if old_table == new_table
+NULL if old_table == new_table
 @param[in]	add_autoinc	number of added AUTO_INCREMENT columns, or
-				ULINT_UNDEFINED if none is added
-@param[in,out]	sequence		autoinc sequence
+ULINT_UNDEFINED if none is added
+@param[in,out]	sequence	autoinc sequence
 @param[in,out]	block		file buffer
+@param[in]	skip_pk_sort	whether the new PRIMARY KEY will follow
+existing order
 @param[in,out]	tmpfd		temporary file handle
-@param[in]	pct_cost	percent of task weight out of total alter job
-@param[in]	crypt_data	crypt data or NULL
-@param[in,out]	crypt_block	crypted file buffer
-return	DB_SUCCESS or error */
-static MY_ATTRIBUTE((nonnull(1,2,3,4,6,9,10,16), warn_unused_result))
+@param[in,out]	stage		performance schema accounting object, used by
+ALTER TABLE. stage->n_pk_recs_inc() will be called for each record read and
+stage->inc() will be called for each page read.
+@param[in]	eval_table	mysql table used to evaluate virtual column
+				value, see innobase_get_computed_value().
+@return DB_SUCCESS or error */
+static MY_ATTRIBUTE((warn_unused_result))
 dberr_t
 row_merge_read_clustered_index(
 	trx_t*			trx,
@@ -1474,19 +1827,26 @@ row_merge_read_clustered_index(
 	const ulint*		key_numbers,
 	ulint			n_index,
 	const dtuple_t*		add_cols,
+	const dict_add_v_col_t*	add_v,
 	const ulint*		col_map,
 	ulint			add_autoinc,
 	ib_sequence_t&		sequence,
 	row_merge_block_t*	block,
+	bool			skip_pk_sort,
 	int*			tmpfd,
+	ut_stage_alter_t*	stage,
 	float 			pct_cost,
 	fil_space_crypt_t*	crypt_data,
-	row_merge_block_t*	crypt_block)
+	row_merge_block_t*	crypt_block,
+	struct TABLE*		eval_table)
+
 {
 	dict_index_t*		clust_index;	/* Clustered index */
 	mem_heap_t*		row_heap;	/* Heap memory to create
 						clustered index tuples */
 	row_merge_buf_t**	merge_buf;	/* Temporary list for records*/
+	mem_heap_t*		v_heap = NULL;	/* Heap memory to process large
+						data for virtual column */
 	btr_pcur_t		pcur;		/* Cursor on the clustered
 						index */
 	mtr_t			mtr;		/* Mini transaction */
@@ -1500,12 +1860,19 @@ row_merge_read_clustered_index(
 	ibool			add_doc_id = FALSE;
 	os_event_t		fts_parallel_sort_event = NULL;
 	ibool			fts_pll_sort = FALSE;
-	ib_int64_t		sig_count = 0;
+	int64_t			sig_count = 0;
+	index_tuple_info_t**	sp_tuples = NULL;
+	mem_heap_t*		sp_heap = NULL;
+	ulint			num_spatial = 0;
+	BtrBulk*		clust_btr_bulk = NULL;
+	bool			clust_temp_file = false;
+	mem_heap_t*		mtuple_heap = NULL;
+	mtuple_t		prev_mtuple;
 	mem_heap_t*		conv_heap = NULL;
-
+	FlushObserver*		observer = trx->flush_observer;
 	float 			curr_progress = 0.0;
-	ib_int64_t		read_rows = 0;
-	ib_int64_t		table_total_rows = 0;
+	ib_uint64_t		read_rows = 0;
+	ib_uint64_t		table_total_rows = 0;
 
 	DBUG_ENTER("row_merge_read_clustered_index");
 
@@ -1524,13 +1891,22 @@ row_merge_read_clustered_index(
 	DEBUG_FTS_SORT_PRINT("FTS_SORT: Start Create Index\n");
 #endif
 
-	ut_ad(trx->mysql_thd != NULL);
-	const char*	path = thd_innodb_tmpdir(trx->mysql_thd);
-
 	/* Create and initialize memory for record buffers */
 
 	merge_buf = static_cast<row_merge_buf_t**>(
-		mem_alloc(n_index * sizeof *merge_buf));
+		ut_malloc_nokey(n_index * sizeof *merge_buf));
+
+	row_merge_dup_t	clust_dup = {index[0], table, col_map, 0};
+	dfield_t*	prev_fields;
+	const ulint	n_uniq = dict_index_get_n_unique(index[0]);
+
+	ut_ad(trx->mysql_thd != NULL);
+
+	const char*	path = thd_innodb_tmpdir(trx->mysql_thd);
+
+	ut_ad(!skip_pk_sort || dict_index_is_clust(index[0]));
+	/* There is no previous tuple yet. */
+	prev_mtuple.fields = NULL;
 
 	for (ulint i = 0; i < n_index; i++) {
 		if (index[i]->type & DICT_FTS) {
@@ -1560,10 +1936,37 @@ row_merge_read_clustered_index(
 			fts_parallel_sort_event =
 				 psort_info[0].psort_common->sort_event;
 		} else {
+			if (dict_index_is_spatial(index[i])) {
+				num_spatial++;
+			}
+
 			merge_buf[i] = row_merge_buf_create(index[i]);
 		}
 	}
 
+	if (num_spatial > 0) {
+		ulint	count = 0;
+
+		sp_heap = mem_heap_create(512);
+
+		sp_tuples = static_cast<index_tuple_info_t**>(
+			ut_malloc_nokey(num_spatial
+					* sizeof(*sp_tuples)));
+
+		for (ulint i = 0; i < n_index; i++) {
+			if (dict_index_is_spatial(index[i])) {
+				sp_tuples[count]
+					= UT_NEW_NOKEY(
+						index_tuple_info_t(
+							sp_heap,
+							index[i]));
+				count++;
+			}
+		}
+
+		ut_ad(count == num_spatial);
+	}
+
 	mtr_start(&mtr);
 
 	/* Find the clustered index and create a persistent cursor
@@ -1581,7 +1984,7 @@ row_merge_read_clustered_index(
 		do not violate the added NOT NULL constraints. */
 
 		nonnull = static_cast<ulint*>(
-			mem_alloc(dict_table_get_n_cols(new_table)
+			ut_malloc_nokey(dict_table_get_n_cols(new_table)
 				  * sizeof *nonnull));
 
 		for (ulint i = 0; i < dict_table_get_n_cols(old_table); i++) {
@@ -1604,7 +2007,7 @@ row_merge_read_clustered_index(
 		}
 
 		if (!n_nonnull) {
-			mem_free(nonnull);
+			ut_free(nonnull);
 			nonnull = NULL;
 		}
 	}
@@ -1616,6 +2019,14 @@ row_merge_read_clustered_index(
 		conv_heap = mem_heap_create(sizeof(mrec_buf_t));
 	}
 
+	if (skip_pk_sort) {
+		prev_fields = static_cast<dfield_t*>(
+			ut_malloc_nokey(n_uniq * sizeof *prev_fields));
+		mtuple_heap = mem_heap_create(sizeof(mrec_buf_t));
+	} else {
+		prev_fields = NULL;
+	}
+
 	/* Scan the clustered index. */
 	for (;;) {
 		const rec_t*	rec;
@@ -1633,7 +2044,12 @@ row_merge_read_clustered_index(
 
 		page_cur_move_to_next(cur);
 
+		stage->n_pk_recs_inc();
+
 		if (page_cur_is_after_last(cur)) {
+
+			stage->inc();
+
 			if (UNIV_UNLIKELY(trx_is_interrupted(trx))) {
 				err = DB_INTERRUPTED;
 				trx->error_key_num = 0;
@@ -1647,6 +2063,7 @@ row_merge_read_clustered_index(
 					goto func_exit;
 				}
 			}
+
 #ifdef DBUG_OFF
 # define dbug_run_purge	false
 #else /* DBUG_OFF */
@@ -1656,6 +2073,22 @@ row_merge_read_clustered_index(
 				"ib_purge_on_create_index_page_switch",
 				dbug_run_purge = true;);
 
+			/* Insert the cached spatial index rows. */
+			bool	mtr_committed = false;
+
+			err = row_merge_spatial_rows(
+				trx->id, sp_tuples, num_spatial,
+				row_heap, sp_heap, &pcur,
+				&mtr, &mtr_committed);
+
+			if (err != DB_SUCCESS) {
+				goto func_exit;
+			}
+
+			if (mtr_committed) {
+				goto scan_next;
+			}
+
 			if (dbug_run_purge
 			    || rw_lock_get_waiters(
 				    dict_index_get_lock(clust_index))) {
@@ -1672,8 +2105,8 @@ row_merge_read_clustered_index(
 				/* Leaf pages must never be empty, unless
 				this is the only page in the index tree. */
 				ut_ad(btr_pcur_is_on_user_rec(&pcur)
-				      || buf_block_get_page_no(
-					      btr_pcur_get_block(&pcur))
+				      || btr_pcur_get_block(
+					      &pcur)->page.id.page_no()
 				      == clust_index->page);
 
 				btr_pcur_store_position(&pcur, &mtr);
@@ -1693,7 +2126,7 @@ row_merge_read_clustered_index(
 
 				/* Give the waiters a chance to proceed. */
 				os_thread_yield();
-
+scan_next:
 				mtr_start(&mtr);
 				/* Restore position on the record, or its
 				predecessor if the record was purged
@@ -1708,9 +2141,7 @@ end_of_index:
 					row = NULL;
 					mtr_commit(&mtr);
 					mem_heap_free(row_heap);
-					if (nonnull) {
-						mem_free(nonnull);
-					}
+					ut_free(nonnull);
 					goto write_buffers;
 				}
 			} else {
@@ -1726,9 +2157,10 @@ end_of_index:
 
 				block = page_cur_get_block(cur);
 				block = btr_block_get(
-					buf_block_get_space(block),
-					buf_block_get_zip_size(block),
-					next_page_no, BTR_SEARCH_LEAF,
+					page_id_t(block->page.id.space(),
+						  next_page_no),
+					block->page.size,
+					BTR_SEARCH_LEAF,
 					clust_index, &mtr);
 
 				btr_leaf_page_release(page_cur_get_block(cur),
@@ -1765,18 +2197,18 @@ end_of_index:
 			ONLINE_INDEX_COMPLETE state between the time
 			the DML thread has updated the clustered index
 			but has not yet accessed secondary index. */
-			ut_ad(trx->read_view);
+			ut_ad(MVCC::is_view_active(trx->read_view));
 
-			if (!read_view_sees_trx_id(
-				    trx->read_view,
+			if (!trx->read_view->changes_visible(
 				    row_get_rec_trx_id(
-					    rec, clust_index, offsets))) {
+					    rec, clust_index, offsets),
+				    old_table->name)) {
 				rec_t*	old_vers;
 
 				row_vers_build_for_consistent_read(
 					rec, &mtr, clust_index, &offsets,
 					trx->read_view, &row_heap,
-					row_heap, &old_vers);
+					row_heap, &old_vers, NULL);
 
 				rec = old_vers;
 
@@ -1816,9 +2248,10 @@ end_of_index:
 
 		/* Build a row based on the clustered index. */
 
-		row = row_build(ROW_COPY_POINTERS, clust_index,
-				rec, offsets, new_table,
-				add_cols, col_map, &ext, row_heap);
+		row = row_build_w_add_vcol(ROW_COPY_POINTERS, clust_index,
+					   rec, offsets, new_table,
+					   add_cols, add_v, col_map, &ext,
+					   row_heap);
 		ut_ad(row);
 
 		for (ulint i = 0; i < n_nonnull; i++) {
@@ -1897,25 +2330,50 @@ write_buffers:
 		/* Build all entries for all the indexes to be created
 		in a single scan of the clustered index. */
 
-		for (ulint i = 0; i < n_index; i++) {
+		ulint	s_idx_cnt = 0;
+		bool	skip_sort = skip_pk_sort
+			&& dict_index_is_clust(merge_buf[0]->index);
+
+		for (ulint i = 0; i < n_index; i++, skip_sort = false) {
 			row_merge_buf_t*	buf	= merge_buf[i];
 			merge_file_t*		file	= &files[i];
 			ulint			rows_added = 0;
-			bool			exceed_page = false;
+
+			if (dict_index_is_spatial(buf->index)) {
+				if (!row) {
+					continue;
+				}
+
+				ut_ad(sp_tuples[s_idx_cnt]->get_index()
+				      == buf->index);
+
+				/* If the geometry field is invalid, report
+				error. */
+				if (!row_geo_field_is_valid(row, buf->index)) {
+					err = DB_CANT_CREATE_GEOMETRY_OBJECT;
+					break;
+				}
+
+				sp_tuples[s_idx_cnt]->add(row, ext);
+				s_idx_cnt++;
+
+				continue;
+			}
 
 			if (UNIV_LIKELY
 			    (row && (rows_added = row_merge_buf_add(
-					buf, fts_index, old_table,
+					buf, fts_index, old_table, new_table,
 					psort_info, row, ext, &doc_id,
-					conv_heap, &exceed_page)))) {
+					conv_heap, &err,
+					&v_heap, eval_table, trx)))) {
 
 				/* If we are creating FTS index,
 				a single row can generate more
 				records for tokenized word */
 				file->n_rec += rows_added;
 
-				if (exceed_page) {
-					err = DB_TOO_BIG_RECORD;
+				if (err != DB_SUCCESS) {
+					ut_ad(err == DB_TOO_BIG_RECORD);
 					break;
 				}
 
@@ -1925,8 +2383,10 @@ write_buffers:
 
 				if (buf->index->type & DICT_FTS) {
 					/* Check if error occurs in child thread */
-					for (ulint j = 0; j < fts_sort_pll_degree; j++) {
-						if (psort_info[j].error != DB_SUCCESS) {
+					for (ulint j = 0;
+					     j < fts_sort_pll_degree; j++) {
+						if (psort_info[j].error
+							!= DB_SUCCESS) {
 							err = psort_info[j].error;
 							trx->error_key_num = i;
 							break;
@@ -1938,9 +2398,39 @@ write_buffers:
 					}
 				}
 
+				if (skip_sort) {
+					ut_ad(buf->n_tuples > 0);
+					const mtuple_t*	curr =
+						&buf->tuples[buf->n_tuples - 1];
+
+					ut_ad(i == 0);
+					ut_ad(dict_index_is_clust(merge_buf[0]->index));
+					/* Detect duplicates by comparing the
+					current record with previous record.
+					When temp file is not used, records
+					should be in sorted order. */
+					if (prev_mtuple.fields != NULL
+					    && (row_mtuple_cmp(
+						&prev_mtuple, curr,
+						&clust_dup) == 0)) {
+
+						err = DB_DUPLICATE_KEY;
+						trx->error_key_num
+							= key_numbers[0];
+						goto func_exit;
+					}
+
+					prev_mtuple.fields = curr->fields;
+				}
+
 				continue;
 			}
 
+			if (err == DB_COMPUTE_VALUE_FAILED) {
+				trx->error_key_num = i;
+				goto func_exit;
+			}
+
 			if (buf->index->type & DICT_FTS) {
 				if (!row || !doc_id) {
 					continue;
@@ -1955,10 +2445,124 @@ write_buffers:
 			ut_ad(buf->n_tuples || row == NULL);
 
 			/* We have enough data tuples to form a block.
-			Sort them and write to disk. */
+			Sort them and write to disk if temp file is used
+			or insert into index if temp file is not used. */
+			ut_ad(old_table == new_table
+			      ? !dict_index_is_clust(buf->index)
+			      : (i == 0) == dict_index_is_clust(buf->index));
+
+			/* We have enough data tuples to form a block.
+			Sort them (if !skip_sort) and write to disk. */
 
 			if (buf->n_tuples) {
-				if (dict_index_is_unique(buf->index)) {
+				if (skip_sort) {
+					/* Temporary File is not used.
+					so insert sorted block to the index */
+					if (row != NULL) {
+						bool	mtr_committed = false;
+
+						/* We have to do insert the
+						cached spatial index rows, since
+						after the mtr_commit, the cluster
+						index page could be updated, then
+						the data in cached rows become
+						invalid. */
+						err = row_merge_spatial_rows(
+							trx->id, sp_tuples,
+							num_spatial,
+							row_heap, sp_heap,
+							&pcur, &mtr,
+							&mtr_committed);
+
+						if (err != DB_SUCCESS) {
+							goto func_exit;
+						}
+
+						/* We are not at the end of
+						the scan yet. We must
+						mtr_commit() in order to be
+						able to call log_free_check()
+						in row_merge_insert_index_tuples().
+						Due to mtr_commit(), the
+						current row will be invalid, and
+						we must reread it on the next
+						loop iteration. */
+						if (!mtr_committed) {
+							btr_pcur_move_to_prev_on_page(
+								&pcur);
+							btr_pcur_store_position(
+								&pcur, &mtr);
+
+							mtr_commit(&mtr);
+						}
+					}
+
+					mem_heap_empty(mtuple_heap);
+					prev_mtuple.fields = prev_fields;
+
+					row_mtuple_create(
+						&buf->tuples[buf->n_tuples - 1],
+						&prev_mtuple, n_uniq,
+						mtuple_heap);
+
+					if (clust_btr_bulk == NULL) {
+						clust_btr_bulk = UT_NEW_NOKEY(
+							BtrBulk(index[i],
+								trx->id,
+								observer));
+
+						clust_btr_bulk->init();
+					} else {
+						clust_btr_bulk->latch();
+					}
+
+					err = row_merge_insert_index_tuples(
+						trx->id, index[i], old_table,
+						-1, NULL, buf, clust_btr_bulk,
+						table_total_rows,
+						curr_progress,
+						pct_cost,
+						NULL,
+						NULL,
+						new_table->space);
+
+					if (row == NULL) {
+						err = clust_btr_bulk->finish(
+							err);
+						UT_DELETE(clust_btr_bulk);
+						clust_btr_bulk = NULL;
+					} else {
+						/* Release latches for possible
+						log_free_chck in spatial index
+						build. */
+						clust_btr_bulk->release();
+					}
+
+					if (err != DB_SUCCESS) {
+						break;
+					}
+
+					if (row != NULL) {
+						/* Restore the cursor on the
+						previous clustered index record,
+						and empty the buffer. The next
+						iteration of the outer loop will
+						advance the cursor and read the
+						next record (the one which we
+						had to ignore due to the buffer
+						overflow). */
+						mtr_start(&mtr);
+						btr_pcur_restore_position(
+							BTR_SEARCH_LEAF, &pcur,
+							&mtr);
+						buf = row_merge_buf_empty(buf);
+						/* Restart the outer loop on the
+						record. We did not insert it
+						into any index yet. */
+						ut_ad(i == 0);
+						break;
+					}
+				} else if (dict_index_is_unique(buf->index)) {
 					row_merge_dup_t	dup = {
 						buf->index, table, col_map, 0};
 
@@ -1998,30 +2602,82 @@ write_buffers:
 					dict_index_get_lock(buf->index));
 			}
 
-			if (buf->n_tuples > 0) {
+			/* Secondary index and clustered index which is
+			not in sorted order can use the temporary file.
+			Fulltext index should not use the temporary file. */
+			if (!skip_sort && !(buf->index->type & DICT_FTS)) {
+				/* In case we can have all rows in sort buffer,
+				we can insert directly into the index without
+				temporary file if clustered index does not uses
+				temporary file. */
+				if (row == NULL && file->fd == -1
+				    && !clust_temp_file) {
+					DBUG_EXECUTE_IF(
+						"row_merge_write_failure",
+						err = DB_TEMP_FILE_WRITE_FAIL;
+						trx->error_key_num = i;
+						goto all_done;);
 
-				if (row_merge_file_create_if_needed(
-					file, tmpfd, buf->n_tuples, path) < 0) {
-					err = DB_OUT_OF_MEMORY;
-					trx->error_key_num = i;
-					break;
-				}
+					DBUG_EXECUTE_IF(
+						"row_merge_tmpfile_fail",
+						err = DB_OUT_OF_MEMORY;
+						trx->error_key_num = i;
+						goto all_done;);
 
-				ut_ad(file->n_rec > 0);
+					BtrBulk	btr_bulk(index[i], trx->id,
+							 observer);
+					btr_bulk.init();
 
-				row_merge_buf_write(buf, file, block);
+					err = row_merge_insert_index_tuples(
+						trx->id, index[i], old_table,
+						-1, NULL, buf, &btr_bulk,
+						table_total_rows,
+						curr_progress,
+						pct_cost,
+						NULL,
+						NULL,
+						new_table->space);
 
-				if (!row_merge_write(file->fd, file->offset++,
-					block, crypt_data, crypt_block,
-					new_table->space)) {
-					err = DB_TEMP_FILE_WRITE_FAILURE;
-					trx->error_key_num = i;
-					break;
+					err = btr_bulk.finish(err);
+
+					DBUG_EXECUTE_IF(
+						"row_merge_insert_big_row",
+						err = DB_TOO_BIG_RECORD;);
+
+					if (err != DB_SUCCESS) {
+						break;
+					}
+				} else {
+					if (row_merge_file_create_if_needed(
+						file, tmpfd,
+						buf->n_tuples, path) < 0) {
+						err = DB_OUT_OF_MEMORY;
+						trx->error_key_num = i;
+						goto func_exit;
+					}
+
+					/* Ensure that duplicates in the
+					clustered index will be detected before
+					inserting secondary index records. */
+					if (dict_index_is_clust(buf->index)) {
+						clust_temp_file = true;
+					}
+
+					ut_ad(file->n_rec > 0);
+
+					row_merge_buf_write(buf, file, block);
+
+					if (!row_merge_write(file->fd, file->offset++, block,
+					     crypt_data, crypt_block, new_table->space)) {
+						err = DB_TEMP_FILE_WRITE_FAIL;
+						trx->error_key_num = i;
+						break;
+					}
+
+					UNIV_MEM_INVALID(
+						&block[0], srv_sort_buf_size);
 				}
 			}
-
-			UNIV_MEM_INVALID(&block[0], srv_sort_buf_size);
-
 			merge_buf[i] = row_merge_buf_empty(buf);
 
 			if (UNIV_LIKELY(row != NULL)) {
@@ -2032,16 +2688,15 @@ write_buffers:
 				if (UNIV_UNLIKELY
 				    (!(rows_added = row_merge_buf_add(
 						buf, fts_index, old_table,
-						psort_info, row, ext,
+						new_table, psort_info, row, ext,
 						&doc_id, conv_heap,
-						&exceed_page)))) {
+						&err, &v_heap, table, trx)))) {
 					/* An empty buffer should have enough
 					room for at least one record. */
 					ut_error;
 				}
 
-				if (exceed_page) {
-					err = DB_TOO_BIG_RECORD;
+				if (err != DB_SUCCESS) {
 					break;
 				}
 
@@ -2058,6 +2713,9 @@ write_buffers:
 		}
 
 		mem_heap_empty(row_heap);
+		if (v_heap) {
+			mem_heap_empty(v_heap);
+		}
 
 		/* Increment innodb_onlineddl_pct_progress status variable */
 		read_rows++;
@@ -2072,15 +2730,32 @@ write_buffers:
 	}
 
 func_exit:
-	mtr_commit(&mtr);
-
-	mem_heap_free(row_heap);
-
-	if (nonnull) {
-		mem_free(nonnull);
+	/* row_merge_spatial_rows may have committed
+	the mtr	before an error occurs. */
+	if (mtr.is_active()) {
+		mtr_commit(&mtr);
 	}
+	mem_heap_free(row_heap);
+	ut_free(nonnull);
 
 all_done:
+	if (clust_btr_bulk != NULL) {
+		ut_ad(err != DB_SUCCESS);
+		clust_btr_bulk->latch();
+		err = clust_btr_bulk->finish(
+			err);
+		UT_DELETE(clust_btr_bulk);
+	}
+
+	if (prev_fields != NULL) {
+		ut_free(prev_fields);
+		mem_heap_free(mtuple_heap);
+	}
+
+	if (v_heap) {
+		mem_heap_free(v_heap);
+	}
+
 	if (conv_heap != NULL) {
 		mem_heap_free(conv_heap);
 	}
@@ -2143,11 +2818,9 @@ wait_again:
 		} while (!all_exit && trial_count < max_trial_count);
 
 		if (!all_exit) {
-			ut_ad(0);
-			ib_logf(IB_LOG_LEVEL_FATAL,
-				"Not all child sort threads exited"
-				" when creating FTS index '%s'",
-				fts_sort_idx->name);
+			ib::fatal() << "Not all child sort threads exited"
+				" when creating FTS index '"
+				<< fts_sort_idx->name << "'";
 		}
 	}
 
@@ -2160,10 +2833,21 @@ wait_again:
 
 	row_fts_free_pll_merge_buf(psort_info);
 
-	mem_free(merge_buf);
+	ut_free(merge_buf);
 
 	btr_pcur_close(&pcur);
 
+	if (sp_tuples != NULL) {
+		for (ulint i = 0; i < num_spatial; i++) {
+			UT_DELETE(sp_tuples[i]);
+		}
+		ut_free(sp_tuples);
+
+		if (sp_heap) {
+			mem_heap_free(sp_heap);
+		}
+	}
+
 	/* Update the next Doc ID we used. Table should be locked, so
 	no concurrent DML */
 	if (max_doc_id && err == DB_SUCCESS) {
@@ -2174,7 +2858,8 @@ wait_again:
 
 		if (err == DB_SUCCESS) {
 			fts_update_next_doc_id(
-				0, new_table, old_table->name, max_doc_id);
+				0, new_table,
+				old_table->name.m_name, max_doc_id);
 		}
 	}
 
@@ -2184,10 +2869,10 @@ wait_again:
 }
 
 /** Write a record via buffer 2 and read the next record to buffer N.
-@param N	number of the buffer (0 or 1)
-@param INDEX	record descriptor
-@param AT_END	statement to execute at end of input */
-#define ROW_MERGE_WRITE_GET_NEXT(N, INDEX, AT_END)			\
+@param N number of the buffer (0 or 1)
+@param INDEX record descriptor
+@param AT_END statement to execute at end of input */
+#define ROW_MERGE_WRITE_GET_NEXT_LOW(N, INDEX, AT_END)			\
 	do {								\
 		b2 = row_merge_write_rec(&block[2 * srv_sort_buf_size], \
 					 &buf[2], b2,			\
@@ -2215,23 +2900,40 @@ wait_again:
 		}							\
 	} while (0)
 
-/*************************************************************//**
-Merge two blocks of records on disk and write a bigger block.
-@return	DB_SUCCESS or error code */
-static __attribute__((nonnull(1,2,3,4,5,6), warn_unused_result))
+#ifdef HAVE_PSI_STAGE_INTERFACE
+#define ROW_MERGE_WRITE_GET_NEXT(N, INDEX, AT_END)			\
+	do {								\
+		if (stage != NULL) {					\
+			stage->inc();					\
+		}							\
+		ROW_MERGE_WRITE_GET_NEXT_LOW(N, INDEX, AT_END);		\
+	} while (0)
+#else /* HAVE_PSI_STAGE_INTERFACE */
+#define ROW_MERGE_WRITE_GET_NEXT(N, INDEX, AT_END)			\
+	ROW_MERGE_WRITE_GET_NEXT_LOW(N, INDEX, AT_END)
+#endif /* HAVE_PSI_STAGE_INTERFACE */
+
+/** Merge two blocks of records on disk and write a bigger block.
+@param[in]	dup	descriptor of index being created
+@param[in]	file	file containing index entries
+@param[in,out]	block	3 buffers
+@param[in,out]	foffs0	offset of first source list in the file
+@param[in,out]	foffs1	offset of second source list in the file
+@param[in,out]	of	output file
+@param[in,out]	stage	performance schema accounting object, used by
+ALTER TABLE. If not NULL stage->inc() will be called for each record
+processed.
+@return DB_SUCCESS or error code */
+static MY_ATTRIBUTE((warn_unused_result))
 dberr_t
 row_merge_blocks(
-/*=============*/
-	const row_merge_dup_t*	dup,	/*!< in: descriptor of
-					index being created */
-	const merge_file_t*	file,	/*!< in: file containing
-					index entries */
-	row_merge_block_t*	block,	/*!< in/out: 3 buffers */
-	ulint*			foffs0,	/*!< in/out: offset of first
-					source list in the file */
-	ulint*			foffs1,	/*!< in/out: offset of second
-					source list in the file */
-	merge_file_t*		of,	/*!< in/out: output file */
+	const row_merge_dup_t*	dup,
+	const merge_file_t*	file,
+	row_merge_block_t*	block,
+	ulint*			foffs0,
+	ulint*			foffs1,
+	merge_file_t*		of,
+	ut_stage_alter_t*	stage,
 	fil_space_crypt_t*	crypt_data,/*!< in: crypt data or NULL */
 	row_merge_block_t*	crypt_block,/*!< in: in/out: crypted file
 					    buffer */
@@ -2250,16 +2952,11 @@ row_merge_blocks(
 	ulint*		offsets0;/* offsets of mrec0 */
 	ulint*		offsets1;/* offsets of mrec1 */
 
-#ifdef UNIV_DEBUG
-	if (row_merge_print_block) {
-		fprintf(stderr,
-			"row_merge_blocks fd=%d ofs=%lu + fd=%d ofs=%lu"
-			" = fd=%d ofs=%lu\n",
-			file->fd, (ulong) *foffs0,
-			file->fd, (ulong) *foffs1,
-			of->fd, (ulong) of->offset);
-	}
-#endif /* UNIV_DEBUG */
+	DBUG_ENTER("row_merge_blocks");
+	DBUG_PRINT("ib_merge_sort",
+		   ("fd=%d,%lu+%lu to fd=%d,%lu",
+		    file->fd, ulint(*foffs0), ulint(*foffs1),
+		    of->fd, ulint(of->offset)));
 
 	heap = row_merge_heap_create(dup->index, &buf, &offsets0, &offsets1);
 
@@ -2272,7 +2969,7 @@ row_merge_blocks(
 			crypt_data, crypt_block ? &crypt_block[srv_sort_buf_size] : NULL, space)) {
 corrupt:
 		mem_heap_free(heap);
-		return(DB_CORRUPTION);
+		DBUG_RETURN(DB_CORRUPTION);
 	}
 
 	b0 = &block[0];
@@ -2297,20 +2994,16 @@ corrupt:
 	}
 
 	while (mrec0 && mrec1) {
-		switch (cmp_rec_rec_simple(
-				mrec0, mrec1, offsets0, offsets1,
-				dup->index, dup->table)) {
-		case 0:
-			mem_heap_free(heap);
-			return(DB_DUPLICATE_KEY);
-		case -1:
+		int cmp = cmp_rec_rec_simple(
+			mrec0, mrec1, offsets0, offsets1,
+			dup->index, dup->table);
+		if (cmp < 0) {
 			ROW_MERGE_WRITE_GET_NEXT(0, dup->index, goto merged);
-			break;
-		case 1:
+		} else if (cmp) {
 			ROW_MERGE_WRITE_GET_NEXT(1, dup->index, goto merged);
-			break;
-		default:
-			ut_error;
+		} else {
+			mem_heap_free(heap);
+			DBUG_RETURN(DB_DUPLICATE_KEY);
 		}
 	}
 
@@ -2335,22 +3028,29 @@ done1:
 	b2 = row_merge_write_eof(&block[2 * srv_sort_buf_size],
 		b2, of->fd, &of->offset,
 		crypt_data, crypt_block ? &crypt_block[2 * srv_sort_buf_size] : NULL, space);
+	DBUG_RETURN(b2 ? DB_SUCCESS : DB_CORRUPTION);
 
-	return(b2 ? DB_SUCCESS : DB_CORRUPTION);
 }
 
-/*************************************************************//**
-Copy a block of index entries.
-@return	TRUE on success, FALSE on failure */
-static __attribute__((nonnull(1,2,3,4,5), warn_unused_result))
+/** Copy a block of index entries.
+@param[in]	index	index being created
+@param[in]	file	input file
+@param[in,out]	block	3 buffers
+@param[in,out]	foffs0	input file offset
+@param[in,out]	of	output file
+@param[in,out]	stage	performance schema accounting object, used by
+ALTER TABLE. If not NULL stage->inc() will be called for each record
+processed.
+@return TRUE on success, FALSE on failure */
+static MY_ATTRIBUTE((warn_unused_result))
 ibool
 row_merge_blocks_copy(
-/*==================*/
-	const dict_index_t*	index,	/*!< in: index being created */
-	const merge_file_t*	file,	/*!< in: input file */
-	row_merge_block_t*	block,	/*!< in/out: 3 buffers */
-	ulint*			foffs0,	/*!< in/out: input file offset */
-	merge_file_t*		of,	/*!< in/out: output file */
+	const dict_index_t*	index,
+	const merge_file_t*	file,
+	row_merge_block_t*	block,
+	ulint*			foffs0,
+	merge_file_t*		of,
+	ut_stage_alter_t*	stage,
 	fil_space_crypt_t*	crypt_data,/*!< in: table crypt data */
 	row_merge_block_t*	crypt_block, /*!< in: crypt buf or NULL */
 	ulint			space)	   /*!< in: space id */
@@ -2365,15 +3065,11 @@ row_merge_blocks_copy(
 	ulint*		offsets0;/* offsets of mrec0 */
 	ulint*		offsets1;/* dummy offsets */
 
-#ifdef UNIV_DEBUG
-	if (row_merge_print_block) {
-		fprintf(stderr,
-			"row_merge_blocks_copy fd=%d ofs=%lu"
-			" = fd=%d ofs=%lu\n",
-			file->fd, (ulong) foffs0,
-			of->fd, (ulong) of->offset);
-	}
-#endif /* UNIV_DEBUG */
+	DBUG_ENTER("row_merge_blocks_copy");
+	DBUG_PRINT("ib_merge_sort",
+		   ("fd=%d,%lu to fd=%d,%lu",
+		    file->fd, ulint(foffs0),
+		    of->fd, ulint(of->offset)));
 
 	heap = row_merge_heap_create(index, &buf, &offsets0, &offsets1);
 
@@ -2384,7 +3080,7 @@ row_merge_blocks_copy(
 			    crypt_data, crypt_block ? &crypt_block[0] : NULL, space)) {
 corrupt:
 		mem_heap_free(heap);
-		return(FALSE);
+		DBUG_RETURN(FALSE);
 	}
 
 	b0 = &block[0];
@@ -2414,32 +3110,37 @@ done0:
 
 	mem_heap_free(heap);
 
-	return(row_merge_write_eof(&block[2 * srv_sort_buf_size],
+	DBUG_RETURN(row_merge_write_eof(&block[2 * srv_sort_buf_size],
 				   b2, of->fd, &of->offset,
 				   crypt_data,
 				   crypt_block ? &crypt_block[2 * srv_sort_buf_size] : NULL, space)
-	       != NULL);
+		    != NULL);
 }
 
-/*************************************************************//**
-Merge disk files.
-@return	DB_SUCCESS or error code */
-static __attribute__((nonnull(1,2,3,4,5,6,7)))
+/** Merge disk files.
+@param[in]	trx		transaction
+@param[in]	dup		descriptor of index being created
+@param[in,out]	file		file containing index entries
+@param[in,out]	block		3 buffers
+@param[in,out]	tmpfd		temporary file handle
+@param[in,out]	num_run		Number of runs that remain to be merged
+@param[in,out]	run_offset	Array that contains the first offset number
+for each merge run
+@param[in,out]	stage		performance schema accounting object, used by
+ALTER TABLE. If not NULL stage->inc() will be called for each record
+processed.
+@return DB_SUCCESS or error code */
+static
 dberr_t
 row_merge(
-/*======*/
-	trx_t*			trx,	/*!< in: transaction */
-	const row_merge_dup_t*	dup,	/*!< in: descriptor of
-					index being created */
-	merge_file_t*		file,	/*!< in/out: file containing
-					index entries */
-	row_merge_block_t*	block,	/*!< in/out: 3 buffers */
-	int*			tmpfd,	/*!< in/out: temporary file handle */
-	ulint*			num_run,/*!< in/out: Number of runs remain
-					to be merged */
-	ulint*			run_offset, /*!< in/out: Array contains the
-					first offset number for each merge
-					run */
+	trx_t*			trx,
+	const row_merge_dup_t*	dup,
+	merge_file_t*		file,
+	row_merge_block_t*	block,
+	int*			tmpfd,
+	ulint*			num_run,
+	ulint*			run_offset,
+	ut_stage_alter_t*	stage,
 	fil_space_crypt_t*	crypt_data,/*!< in: table crypt data */
 	row_merge_block_t*	crypt_block, /*!< in: crypt buf or NULL */
 	ulint			space)	   /*!< in: space id */
@@ -2489,7 +3190,7 @@ row_merge(
 		run_offset[n_run++] = of.offset;
 
 		error = row_merge_blocks(dup, file, block,
-					 &foffs0, &foffs1, &of,
+					 &foffs0, &foffs1, &of, stage,
 					 crypt_data, crypt_block, space);
 
 		if (error != DB_SUCCESS) {
@@ -2510,7 +3211,7 @@ row_merge(
 		run_offset[n_run++] = of.offset;
 
 		if (!row_merge_blocks_copy(dup->index, file, block,
-					   &foffs0, &of,
+					   &foffs0, &of, stage,
 					   crypt_data, crypt_block, space)) {
 			return(DB_CORRUPTION);
 		}
@@ -2528,7 +3229,7 @@ row_merge(
 		run_offset[n_run++] = of.offset;
 
 		if (!row_merge_blocks_copy(dup->index, file, block,
-					   &foffs1, &of,
+					   &foffs1, &of, stage,
 					   crypt_data, crypt_block, space)) {
 			return(DB_CORRUPTION);
 		}
@@ -2563,21 +3264,23 @@ row_merge(
 	return(DB_SUCCESS);
 }
 
-/*************************************************************//**
-Merge disk files.
-@return	DB_SUCCESS or error code */
-UNIV_INTERN
+/** Merge disk files.
+@param[in]	trx	transaction
+@param[in]	dup	descriptor of index being created
+@param[in,out]	file	file containing index entries
+@param[in,out]	block	3 buffers
+@param[in,out]	tmpfd	temporary file handle
+@param[in,out]	stage	performance schema accounting object, used by
+ALTER TABLE. If not NULL, stage->begin_phase_sort() will be called initially
+and then stage->inc() will be called for each record processed.
+@return DB_SUCCESS or error code */
 dberr_t
 row_merge_sort(
-/*===========*/
-	trx_t*			trx,	/*!< in: transaction */
-	const row_merge_dup_t*	dup,	/*!< in: descriptor of
-					index being created */
-	merge_file_t*		file,	/*!< in/out: file containing
-					index entries */
-	row_merge_block_t*	block,	/*!< in/out: 3 buffers */
-	int*			tmpfd,	/*!< in/out: temporary file handle
-					*/
+	trx_t*			trx,
+	const row_merge_dup_t*	dup,
+	merge_file_t*		file,
+	row_merge_block_t*	block,
+	int*			tmpfd,
 	const bool		update_progress,
 					/*!< in: update progress
 					status variable or not */
@@ -2587,7 +3290,8 @@ row_merge_sort(
 	const float		pct_cost, /*!< in: current progress percent */
 	fil_space_crypt_t*	crypt_data,/*!< in: table crypt data */
 	row_merge_block_t*	crypt_block, /*!< in: crypt buf or NULL */
-	ulint			space)	   /*!< in: space id */
+	ulint			space,	   /*!< in: space id */
+	ut_stage_alter_t* 	stage)
 {
 	const ulint	half	= file->offset / 2;
 	ulint		num_runs;
@@ -2602,6 +3306,10 @@ row_merge_sort(
 	/* Record the number of merge runs we need to perform */
 	num_runs = file->offset;
 
+	if (stage != NULL) {
+		stage->begin_phase_sort(log2(num_runs));
+	}
+
 	/* Find the number N which 2^N is greater or equal than num_runs */
 	/* N is merge sort running count */
 	total_merge_sort_count = (ulint) ceil(my_log2f(num_runs));
@@ -2615,7 +3323,7 @@ row_merge_sort(
 	}
 
 	/* "run_offset" records each run's first offset number */
-	run_offset = (ulint*) mem_alloc(file->offset * sizeof(ulint));
+	run_offset = (ulint*) ut_malloc_nokey(file->offset * sizeof(ulint));
 
 	/* This tells row_merge() where to start for the first round
 	of merge. */
@@ -2650,7 +3358,7 @@ row_merge_sort(
 #endif /* UNIV_SOLARIS */
 
 		error = row_merge(trx, dup, file, block, tmpfd,
-				  &num_runs, run_offset,
+				  &num_runs, run_offset, stage,
 				  crypt_data, crypt_block, space);
 
 		if(update_progress) {
@@ -2669,7 +3377,7 @@ row_merge_sort(
 		UNIV_MEM_ASSERT_RW(run_offset, num_runs * sizeof *run_offset);
 	} while (num_runs > 1);
 
-	mem_free(run_offset);
+	ut_free(run_offset);
 
 	/* Progress report only for "normal" indexes. */
 #ifndef UNIV_SOLARIS
@@ -2681,24 +3389,30 @@ row_merge_sort(
 	DBUG_RETURN(error);
 }
 
-/*************************************************************//**
-Copy externally stored columns to the data tuple. */
-static MY_ATTRIBUTE((nonnull))
+/** Copy externally stored columns to the data tuple.
+@param[in]	mrec		record containing BLOB pointers,
+or NULL to use tuple instead
+@param[in]	offsets		offsets of mrec
+@param[in]	zip_size	compressed page size in bytes, or 0
+@param[in,out]	tuple		data tuple
+@param[in,out]	heap		memory heap */
+static
 void
 row_merge_copy_blobs(
-/*=================*/
-	const mrec_t*	mrec,	/*!< in: merge record */
-	const ulint*	offsets,/*!< in: offsets of mrec */
-	ulint		zip_size,/*!< in: compressed page size in bytes, or 0 */
-	dtuple_t*	tuple,	/*!< in/out: data tuple */
-	mem_heap_t*	heap)	/*!< in/out: memory heap */
+	const mrec_t*		mrec,
+	const ulint*		offsets,
+	const page_size_t&	page_size,
+	dtuple_t*		tuple,
+	mem_heap_t*		heap)
 {
-	ut_ad(rec_offs_any_extern(offsets));
+	ut_ad(mrec == NULL || rec_offs_any_extern(offsets));
 
 	for (ulint i = 0; i < dtuple_get_n_fields(tuple); i++) {
 		ulint		len;
 		const void*	data;
 		dfield_t*	field = dtuple_get_nth_field(tuple, i);
+		ulint		field_len;
+		const byte*	field_data;
 
 		if (!dfield_is_ext(field)) {
 			continue;
@@ -2712,8 +3426,25 @@ row_merge_copy_blobs(
 		columns cannot possibly be freed between the time the
 		BLOB pointers are read (row_merge_read_clustered_index())
 		and dereferenced (below). */
-		data = btr_rec_copy_externally_stored_field(
-			mrec, offsets, zip_size, i, &len, heap, NULL);
+		if (mrec == NULL) {
+			field_data
+				= static_cast<byte*>(dfield_get_data(field));
+			field_len = dfield_get_len(field);
+
+			ut_a(field_len >= BTR_EXTERN_FIELD_REF_SIZE);
+
+			ut_a(memcmp(field_data + field_len
+				     - BTR_EXTERN_FIELD_REF_SIZE,
+				     field_ref_zero,
+				     BTR_EXTERN_FIELD_REF_SIZE));
+
+			data = btr_copy_externally_stored_field(
+				&len, field_data, page_size, field_len, heap);
+		} else {
+			data = btr_rec_copy_externally_stored_field(
+				mrec, offsets, page_size, i, &len, heap);
+		}
+
 		/* Because we have locked the table, any records
 		written by incomplete transactions must have been
 		rolled back already. There must not be any incomplete
@@ -2724,74 +3455,146 @@ row_merge_copy_blobs(
 	}
 }
 
-/********************************************************************//**
-Read sorted file containing index data tuples and insert these data
-tuples to the index
-@return	DB_SUCCESS or error number */
-static __attribute__((nonnull(2,3,5), warn_unused_result))
+/** Convert a merge record to a typed data tuple. Note that externally
+stored fields are not copied to heap.
+@param[in,out]	index	index on the table
+@param[in]	mtuple	merge record
+@param[in]	heap	memory heap from which memory needed is allocated
+@return	index entry built. */
+static
+void
+row_merge_mtuple_to_dtuple(
+	dict_index_t*	index,
+	dtuple_t*	dtuple,
+	const mtuple_t* mtuple)
+{
+	ut_ad(!dict_index_is_ibuf(index));
+
+	memcpy(dtuple->fields, mtuple->fields,
+	       dtuple->n_fields * sizeof *mtuple->fields);
+}
+
+/** Insert sorted data tuples to the index.
+@param[in]	trx_id		transaction identifier
+@param[in]	index		index to be inserted
+@param[in]	old_table	old table
+@param[in]	fd		file descriptor
+@param[in,out]	block		file buffer
+@param[in]	row_buf		row_buf the sorted data tuples,
+or NULL if fd, block will be used instead
+@param[in,out]	btr_bulk	btr bulk instance
+@param[in,out]	stage		performance schema accounting object, used by
+ALTER TABLE. If not NULL stage->begin_phase_insert() will be called initially
+and then stage->inc() will be called for each record that is processed.
+@return DB_SUCCESS or error number */
+static	MY_ATTRIBUTE((warn_unused_result))
 dberr_t
 row_merge_insert_index_tuples(
-/*==========================*/
-	trx_id_t		trx_id,	/*!< in: transaction identifier */
-	dict_index_t*		index,	/*!< in: index */
-	const dict_table_t*	old_table,/*!< in: old table */
-	int			fd,	/*!< in: file descriptor */
-	row_merge_block_t*	block,	/*!< in/out: file buffer */
-	const ib_int64_t	table_total_rows, /*!< in: total rows of old table */
-	const float		pct_progress,	/*!< in: total progress percent until now */
+	trx_id_t		trx_id,
+	dict_index_t*		index,
+	const dict_table_t*	old_table,
+	int			fd,
+	row_merge_block_t*	block,
+	const row_merge_buf_t*	row_buf,
+	BtrBulk*		btr_bulk,
+	const ib_uint64_t	table_total_rows, /*!< in: total rows of old table */
+	const float		pct_progress,	/*!< in: total progress
+						percent until now */
 	const float		pct_cost, /*!< in: current progress percent
 					  */
 	fil_space_crypt_t*	crypt_data,/*!< in: table crypt data */
 	row_merge_block_t*	crypt_block, /*!< in: crypt buf or NULL */
-	ulint			space)	   /*!< in: space id */
+	ulint			space,	   /*!< in: space id */
+	ut_stage_alter_t*	stage)
 {
 	const byte*		b;
 	mem_heap_t*		heap;
 	mem_heap_t*		tuple_heap;
-	mem_heap_t*		ins_heap;
 	dberr_t			error = DB_SUCCESS;
 	ulint			foffs = 0;
 	ulint*			offsets;
 	mrec_buf_t*		buf;
-	ib_int64_t		inserted_rows = 0;
-	float			curr_progress;
+	ulint			n_rows = 0;
+	dtuple_t*		dtuple;
+	ib_uint64_t		inserted_rows = 0;
+	float			curr_progress = 0;
+	dict_index_t*		old_index = NULL;
+	const mrec_t*		mrec  = NULL;
+	ulint			n_ext = 0;
+	mtr_t			mtr;
+
+
 	DBUG_ENTER("row_merge_insert_index_tuples");
 
 	ut_ad(!srv_read_only_mode);
 	ut_ad(!(index->type & DICT_FTS));
+	ut_ad(!dict_index_is_spatial(index));
 	ut_ad(trx_id);
 
+	if (stage != NULL) {
+		stage->begin_phase_insert();
+	}
+
 	tuple_heap = mem_heap_create(1000);
 
 	{
 		ulint i	= 1 + REC_OFFS_HEADER_SIZE
 			+ dict_index_get_n_fields(index);
 		heap = mem_heap_create(sizeof *buf + i * sizeof *offsets);
-		ins_heap = mem_heap_create(sizeof *buf + i * sizeof *offsets);
 		offsets = static_cast<ulint*>(
 			mem_heap_alloc(heap, i * sizeof *offsets));
 		offsets[0] = i;
 		offsets[1] = dict_index_get_n_fields(index);
 	}
 
-	b = &block[0];
+	if (row_buf != NULL) {
+		ut_ad(fd == -1);
+		ut_ad(block == NULL);
+		DBUG_EXECUTE_IF("row_merge_read_failure",
+				error = DB_CORRUPTION;
+				goto err_exit;);
+		buf = NULL;
+		b = NULL;
+		dtuple = dtuple_create(
+			heap, dict_index_get_n_fields(index));
+		dtuple_set_n_fields_cmp(
+			dtuple, dict_index_get_n_unique_in_tree(index));
+	} else {
+		b = block;
+		dtuple = NULL;
 
 	if (!row_merge_read(fd, foffs, block,
 			    crypt_data, crypt_block, space)) {
-		error = DB_CORRUPTION;
-	} else {
-		buf = static_cast<mrec_buf_t*>(
-			mem_heap_alloc(heap, sizeof *buf));
+			error = DB_CORRUPTION;
+			goto err_exit;
+		} else {
+			buf = static_cast<mrec_buf_t*>(
+				mem_heap_alloc(heap, sizeof *buf));
+		}
+	}
 
-		for (;;) {
-			const mrec_t*	mrec;
-			dtuple_t*	dtuple;
-			ulint		n_ext;
-			big_rec_t*	big_rec;
-			rec_t*		rec;
-			btr_cur_t	cursor;
-			mtr_t		mtr;
 
+	for (;;) {
+
+		if (stage != NULL) {
+			stage->inc();
+		}
+
+		if (row_buf != NULL) {
+			if (n_rows >= row_buf->n_tuples) {
+				break;
+			}
+
+			/* Convert merge tuple record from
+			row buffer to data tuple record */
+			row_merge_mtuple_to_dtuple(
+				index, dtuple, &row_buf->tuples[n_rows]);
+
+			n_ext = dtuple_get_n_ext(dtuple);
+			n_rows++;
+			/* BLOB pointers must be copied from dtuple */
+			mrec = NULL;
+		} else {
 			b = row_merge_read_rec(block, buf, b, index,
 					       fd, &foffs, &mrec, offsets,
 					       crypt_data, crypt_block, space);
@@ -2803,155 +3606,74 @@ row_merge_insert_index_tuples(
 				break;
 			}
 
-			dict_index_t*	old_index
-				= dict_table_get_first_index(old_table);
-
-			if (dict_index_is_clust(index)
-			    && dict_index_is_online_ddl(old_index)) {
-				error = row_log_table_get_error(old_index);
-				if (error != DB_SUCCESS) {
-					break;
-				}
-			}
-
 			dtuple = row_rec_to_index_entry_low(
 				mrec, index, offsets, &n_ext, tuple_heap);
+		}
 
-			if (!n_ext) {
-				/* There are no externally stored columns. */
-			} else {
-				ut_ad(dict_index_is_clust(index));
-				/* Off-page columns can be fetched safely
-				when concurrent modifications to the table
-				are disabled. (Purge can process delete-marked
-				records, but row_merge_read_clustered_index()
-				would have skipped them.)
-
-				When concurrent modifications are enabled,
-				row_merge_read_clustered_index() will
-				only see rows from transactions that were
-				committed before the ALTER TABLE started
-				(REPEATABLE READ).
-
-				Any modifications after the
-				row_merge_read_clustered_index() scan
-				will go through row_log_table_apply().
-				Any modifications to off-page columns
-				will be tracked by
-				row_log_table_blob_alloc() and
-				row_log_table_blob_free(). */
-				row_merge_copy_blobs(
-					mrec, offsets,
-					dict_table_zip_size(old_table),
-					dtuple, tuple_heap);
-			}
-
-			ut_ad(dtuple_validate(dtuple));
-			log_free_check();
-
-			mtr_start(&mtr);
-			/* Insert after the last user record. */
-			btr_cur_open_at_index_side(
-				false, index, BTR_MODIFY_LEAF,
-				&cursor, 0, &mtr);
-			page_cur_position(
-				page_rec_get_prev(btr_cur_get_rec(&cursor)),
-				btr_cur_get_block(&cursor),
-				btr_cur_get_page_cur(&cursor));
-			cursor.flag = BTR_CUR_BINARY;
-#ifdef UNIV_DEBUG
-			/* Check that the records are inserted in order. */
-			rec = btr_cur_get_rec(&cursor);
-
-			if (!page_rec_is_infimum(rec)) {
-				ulint*	rec_offsets = rec_get_offsets(
-					rec, index, offsets,
-					ULINT_UNDEFINED, &tuple_heap);
-				ut_ad(cmp_dtuple_rec(dtuple, rec, rec_offsets)
-				      > 0);
-			}
-#endif /* UNIV_DEBUG */
-			ulint*	ins_offsets = NULL;
-
-			error = btr_cur_optimistic_insert(
-				BTR_NO_UNDO_LOG_FLAG | BTR_NO_LOCKING_FLAG
-				| BTR_KEEP_SYS_FLAG | BTR_CREATE_FLAG,
-				&cursor, &ins_offsets, &ins_heap,
-				dtuple, &rec, &big_rec, 0, NULL, &mtr);
-
-			if (error == DB_FAIL) {
-				ut_ad(!big_rec);
-				mtr_commit(&mtr);
-				mtr_start(&mtr);
-				btr_cur_open_at_index_side(
-					false, index, BTR_MODIFY_TREE,
-					&cursor, 0, &mtr);
-				page_cur_position(
-					page_rec_get_prev(btr_cur_get_rec(
-								  &cursor)),
-					btr_cur_get_block(&cursor),
-					btr_cur_get_page_cur(&cursor));
-
-				error = btr_cur_pessimistic_insert(
-					BTR_NO_UNDO_LOG_FLAG
-					| BTR_NO_LOCKING_FLAG
-					| BTR_KEEP_SYS_FLAG | BTR_CREATE_FLAG,
-					&cursor, &ins_offsets, &ins_heap,
-					dtuple, &rec, &big_rec, 0, NULL, &mtr);
-			}
-
-			if (!dict_index_is_clust(index)) {
-				page_update_max_trx_id(
-					btr_cur_get_block(&cursor),
-					btr_cur_get_page_zip(&cursor),
-					trx_id, &mtr);
-			}
-
-			mtr_commit(&mtr);
-
-			if (UNIV_LIKELY_NULL(big_rec)) {
-				/* If the system crashes at this
-				point, the clustered index record will
-				contain a null BLOB pointer. This
-				should not matter, because the copied
-				table will be dropped on crash
-				recovery anyway. */
-
-				ut_ad(dict_index_is_clust(index));
-				ut_ad(error == DB_SUCCESS);
-				error = row_ins_index_entry_big_rec(
-					dtuple, big_rec,
-					ins_offsets, &ins_heap,
-					index, NULL, __FILE__, __LINE__);
-				dtuple_convert_back_big_rec(
-					index, dtuple, big_rec);
-			}
+		old_index	= dict_table_get_first_index(old_table);
 
+		if (dict_index_is_clust(index)
+		    && dict_index_is_online_ddl(old_index)) {
+			error = row_log_table_get_error(old_index);
 			if (error != DB_SUCCESS) {
-				goto err_exit;
+				break;
 			}
+		}
 
-			mem_heap_empty(tuple_heap);
-			mem_heap_empty(ins_heap);
+		if (!n_ext) {
+			/* There are no externally stored columns. */
+		} else {
+			ut_ad(dict_index_is_clust(index));
+			/* Off-page columns can be fetched safely
+			when concurrent modifications to the table
+			are disabled. (Purge can process delete-marked
+			records, but row_merge_read_clustered_index()
+			would have skipped them.)
 
-			/* Increment innodb_onlineddl_pct_progress status variable */
-			inserted_rows++;
-			if(inserted_rows % 1000 == 0) {
-				/* Update progress for each 1000 rows */
-				curr_progress = (inserted_rows >= table_total_rows ||
-					table_total_rows <= 0) ?
-						pct_cost :
-					((pct_cost * inserted_rows) / table_total_rows);
+			When concurrent modifications are enabled,
+			row_merge_read_clustered_index() will
+			only see rows from transactions that were
+			committed before the ALTER TABLE started
+			(REPEATABLE READ).
 
-				/* presenting 10.12% as 1012 integer */;
-				onlineddl_pct_progress = (ulint) ((pct_progress + curr_progress) * 100);
-			}
+			Any modifications after the
+			row_merge_read_clustered_index() scan
+			will go through row_log_table_apply().
+			Any modifications to off-page columns
+			will be tracked by
+			row_log_table_blob_alloc() and
+			row_log_table_blob_free(). */
+			row_merge_copy_blobs(
+				mrec, offsets,
+				dict_table_page_size(old_table),
+				dtuple, tuple_heap);
+		}
+
+		ut_ad(dtuple_validate(dtuple));
+		error = btr_bulk->insert(dtuple);
+
+		if (error != DB_SUCCESS) {
+			goto err_exit;
+		}
+
+		mem_heap_empty(tuple_heap);
+
+		/* Increment innodb_onlineddl_pct_progress status variable */
+		inserted_rows++;
+		if(inserted_rows % 1000 == 0) {
+			/* Update progress for each 1000 rows */
+			curr_progress = (inserted_rows >= table_total_rows ||
+				table_total_rows <= 0) ?
+				pct_cost :
+				((pct_cost * inserted_rows) / table_total_rows);
+
+			/* presenting 10.12% as 1012 integer */;
+			onlineddl_pct_progress = (pct_progress + curr_progress) * 100;
 		}
 	}
 
 err_exit:
 	mem_heap_free(tuple_heap);
-	mem_heap_free(ins_heap);
 	mem_heap_free(heap);
 
 	DBUG_RETURN(error);
@@ -2959,8 +3681,7 @@ err_exit:
 
 /*********************************************************************//**
 Sets an exclusive lock on a table, for the duration of creating indexes.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
 dberr_t
 row_merge_lock_table(
 /*=================*/
@@ -2968,76 +3689,15 @@ row_merge_lock_table(
 	dict_table_t*	table,		/*!< in: table to lock */
 	enum lock_mode	mode)		/*!< in: LOCK_X or LOCK_S */
 {
-	mem_heap_t*	heap;
-	que_thr_t*	thr;
-	dberr_t		err;
-	sel_node_t*	node;
-
 	ut_ad(!srv_read_only_mode);
 	ut_ad(mode == LOCK_X || mode == LOCK_S);
 
-	heap = mem_heap_create(512);
-
 	trx->op_info = "setting table lock for creating or dropping index";
+	trx->ddl = true;
+	/* Trx for DDL should not be forced to rollback for now */
+	trx->in_innodb |= TRX_FORCE_ROLLBACK_DISABLE;
 
-	node = sel_node_create(heap);
-	thr = pars_complete_graph_for_exec(node, trx, heap);
-	thr->graph->state = QUE_FORK_ACTIVE;
-
-	/* We use the select query graph as the dummy graph needed
-	in the lock module call */
-
-	thr = static_cast<que_thr_t*>(
-		que_fork_get_first_thr(
-			static_cast<que_fork_t*>(que_node_get_parent(thr))));
-
-	que_thr_move_to_run_state_for_mysql(thr, trx);
-
-run_again:
-	thr->run_node = thr;
-	thr->prev_node = thr->common.parent;
-
-	err = lock_table(0, table, mode, thr);
-
-	trx->error_state = err;
-
-	if (UNIV_LIKELY(err == DB_SUCCESS)) {
-		que_thr_stop_for_mysql_no_error(thr, trx);
-	} else {
-		que_thr_stop_for_mysql(thr);
-
-		if (err != DB_QUE_THR_SUSPENDED) {
-			bool	was_lock_wait;
-
-			was_lock_wait = row_mysql_handle_errors(
-				&err, trx, thr, NULL);
-
-			if (was_lock_wait) {
-				goto run_again;
-			}
-		} else {
-			que_thr_t*	run_thr;
-			que_node_t*	parent;
-
-			parent = que_node_get_parent(thr);
-
-			run_thr = que_fork_start_command(
-				static_cast<que_fork_t*>(parent));
-
-			ut_a(run_thr == thr);
-
-			/* There was a lock wait but the thread was not
-			in a ready to run or running state. */
-			trx->error_state = DB_LOCK_WAIT;
-
-			goto run_again;
-		}
-	}
-
-	que_graph_free(thr->graph);
-	trx->op_info = "";
-
-	return(err);
+	return(lock_table_for_trx(table, trx, mode));
 }
 
 /*********************************************************************//**
@@ -3064,9 +3724,7 @@ row_merge_drop_index_dict(
 	ut_ad(mutex_own(&dict_sys->mutex));
 	ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH);
 	ut_ad(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX);
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
 
 	info = pars_info_create();
 	pars_info_add_ull_literal(info, "indexid", index_id);
@@ -3079,9 +3737,8 @@ row_merge_drop_index_dict(
 		DB_TOO_MANY_CONCURRENT_TRXS. */
 		trx->error_state = DB_SUCCESS;
 
-		ut_print_timestamp(stderr);
-		fprintf(stderr, " InnoDB: Error: row_merge_drop_index_dict "
-			"failed with error code: %u.\n", (unsigned) error);
+		ib::error() << "row_merge_drop_index_dict failed with error "
+			<< error;
 	}
 
 	trx->op_info = "";
@@ -3091,7 +3748,6 @@ row_merge_drop_index_dict(
 Drop indexes that were created before an error occurred.
 The data dictionary must have been locked exclusively by the caller,
 because the transaction will not be committed. */
-UNIV_INTERN
 void
 row_merge_drop_indexes_dict(
 /*========================*/
@@ -3131,9 +3787,7 @@ row_merge_drop_indexes_dict(
 	ut_ad(mutex_own(&dict_sys->mutex));
 	ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH);
 	ut_ad(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX);
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
 
 	/* It is possible that table->n_ref_count > 1 when
 	locked=TRUE. In this case, all code that should have an open
@@ -3147,15 +3801,18 @@ row_merge_drop_indexes_dict(
 	trx->op_info = "dropping indexes";
 	error = que_eval_sql(info, sql, FALSE, trx);
 
-	if (error != DB_SUCCESS) {
+	switch (error) {
+	case DB_SUCCESS:
+		break;
+	default:
 		/* Even though we ensure that DDL transactions are WAIT
 		and DEADLOCK free, we could encounter other errors e.g.,
 		DB_TOO_MANY_CONCURRENT_TRXS. */
+		ib::error() << "row_merge_drop_indexes_dict failed with error "
+			<< error;
+		/* fall through */
+	case DB_TOO_MANY_CONCURRENT_TRXS:
 		trx->error_state = DB_SUCCESS;
-
-		ut_print_timestamp(stderr);
-		fprintf(stderr, " InnoDB: Error: row_merge_drop_indexes_dict "
-			"failed with error code: %u.\n", (unsigned) error);
 	}
 
 	trx->op_info = "";
@@ -3165,7 +3822,6 @@ row_merge_drop_indexes_dict(
 Drop indexes that were created before an error occurred.
 The data dictionary must have been locked exclusively by the caller,
 because the transaction will not be committed. */
-UNIV_INTERN
 void
 row_merge_drop_indexes(
 /*===================*/
@@ -3181,16 +3837,14 @@ row_merge_drop_indexes(
 	ut_ad(mutex_own(&dict_sys->mutex));
 	ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH);
 	ut_ad(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX);
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
 
 	index = dict_table_get_first_index(table);
 	ut_ad(dict_index_is_clust(index));
 	ut_ad(dict_index_get_online_status(index) == ONLINE_INDEX_COMPLETE);
 
 	/* the caller should have an open handle to the table */
-	ut_ad(table->n_ref_count >= 1);
+	ut_ad(table->get_ref_count() >= 1);
 
 	/* It is possible that table->n_ref_count > 1 when
 	locked=TRUE. In this case, all code that should have an open
@@ -3199,7 +3853,7 @@ row_merge_drop_indexes(
 
 	A concurrent purge will be prevented by dict_operation_lock. */
 
-	if (!locked && table->n_ref_count > 1) {
+	if (!locked && table->get_ref_count() > 1) {
 		/* We will have to drop the indexes later, when the
 		table is guaranteed to be no longer in use.  Mark the
 		indexes as incomplete and corrupted, so that other
@@ -3215,7 +3869,7 @@ row_merge_drop_indexes(
 			case ONLINE_INDEX_ABORTED_DROPPED:
 				continue;
 			case ONLINE_INDEX_COMPLETE:
-				if (*index->name != TEMP_INDEX_PREFIX) {
+				if (index->is_committed()) {
 					/* Do nothing to already
 					published indexes. */
 				} else if (index->type & DICT_FTS) {
@@ -3268,7 +3922,7 @@ row_merge_drop_indexes(
 				continue;
 			case ONLINE_INDEX_CREATION:
 				rw_lock_x_lock(dict_index_get_lock(index));
-				ut_ad(*index->name == TEMP_INDEX_PREFIX);
+				ut_ad(!index->is_committed());
 				row_log_abort_sec(index);
 			drop_aborted:
 				rw_lock_x_unlock(dict_index_get_lock(index));
@@ -3312,7 +3966,7 @@ row_merge_drop_indexes(
 
 		ut_ad(!dict_index_is_clust(index));
 
-		if (*index->name == TEMP_INDEX_PREFIX) {
+		if (!index->is_committed()) {
 			/* If it is FTS index, drop from table->fts
 			and also drop its auxiliary tables */
 			if (index->type & DICT_FTS) {
@@ -3351,7 +4005,6 @@ row_merge_drop_indexes(
 
 /*********************************************************************//**
 Drop all partially created indexes during crash recovery. */
-UNIV_INTERN
 void
 row_merge_drop_temp_indexes(void)
 /*=============================*/
@@ -3403,9 +4056,8 @@ row_merge_drop_temp_indexes(void)
 		DB_TOO_MANY_CONCURRENT_TRXS. */
 		trx->error_state = DB_SUCCESS;
 
-		ut_print_timestamp(stderr);
-		fprintf(stderr, " InnoDB: Error: row_merge_drop_temp_indexes "
-			"failed with error code: %u.\n", (unsigned) error);
+		ib::error() << "row_merge_drop_temp_indexes failed with error"
+			<< error;
 	}
 
 	trx_commit_for_mysql(trx);
@@ -3418,7 +4070,6 @@ row_merge_drop_temp_indexes(void)
 UNIV_PFS_IO defined, register the file descriptor with Performance Schema.
 @param[in]	path	location for creating temporary merge files.
 @return File descriptor */
-UNIV_INTERN
 int
 row_merge_file_create_low(
 	const char*	path)
@@ -3430,7 +4081,7 @@ row_merge_file_create_low(
 	performance schema */
 	struct PSI_file_locker*	locker = NULL;
 	PSI_file_locker_state	state;
-	register_pfs_file_open_begin(&state, locker, innodb_file_temp_key,
+	register_pfs_file_open_begin(&state, locker, innodb_temp_file_key,
 				     PSI_FILE_OPEN,
 				     "Innodb Merge Temp File",
 				     __FILE__, __LINE__);
@@ -3441,9 +4092,8 @@ row_merge_file_create_low(
 #endif
 
 	if (fd < 0) {
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Cannot create temporary merge file");
-		return (-1);
+		ib::error() << "Cannot create temporary merge file";
+		return(-1);
 	}
 	return(fd);
 }
@@ -3453,7 +4103,6 @@ row_merge_file_create_low(
 @param[out]	merge_file	merge file structure
 @param[in]	path		location for creating temporary file
 @return file descriptor, or -1 on failure */
-UNIV_INTERN
 int
 row_merge_file_create(
 	merge_file_t*	merge_file,
@@ -3475,7 +4124,6 @@ row_merge_file_create(
 /*********************************************************************//**
 Destroy a merge file. And de-register the file from Performance Schema
 if UNIV_PFS_IO is defined. */
-UNIV_INTERN
 void
 row_merge_file_destroy_low(
 /*=======================*/
@@ -3497,7 +4145,6 @@ row_merge_file_destroy_low(
 }
 /*********************************************************************//**
 Destroy a merge file. */
-UNIV_INTERN
 void
 row_merge_file_destroy(
 /*===================*/
@@ -3515,8 +4162,7 @@ row_merge_file_destroy(
 Rename an index in the dictionary that was created. The data
 dictionary must have been locked exclusively by the caller, because
 the transaction will not be committed.
-@return	DB_SUCCESS if all OK */
-UNIV_INTERN
+@return DB_SUCCESS if all OK */
 dberr_t
 row_merge_rename_index_to_add(
 /*==========================*/
@@ -3554,10 +4200,8 @@ row_merge_rename_index_to_add(
 		DB_TOO_MANY_CONCURRENT_TRXS. */
 		trx->error_state = DB_SUCCESS;
 
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: Error: row_merge_rename_index_to_add "
-			 "failed with error code: %u.\n", (unsigned) err);
+		ib::error() << "row_merge_rename_index_to_add failed with"
+			" error " << err;
 	}
 
 	trx->op_info = "";
@@ -3569,8 +4213,7 @@ row_merge_rename_index_to_add(
 Rename an index in the dictionary that is to be dropped. The data
 dictionary must have been locked exclusively by the caller, because
 the transaction will not be committed.
-@return	DB_SUCCESS if all OK */
-UNIV_INTERN
+@return DB_SUCCESS if all OK */
 dberr_t
 row_merge_rename_index_to_drop(
 /*===========================*/
@@ -3611,10 +4254,8 @@ row_merge_rename_index_to_drop(
 		DB_TOO_MANY_CONCURRENT_TRXS. */
 		trx->error_state = DB_SUCCESS;
 
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: Error: row_merge_rename_index_to_drop "
-			 "failed with error code: %u.\n", (unsigned) err);
+		ib::error() << "row_merge_rename_index_to_drop failed with"
+			" error " << err;
 	}
 
 	trx->op_info = "";
@@ -3626,8 +4267,7 @@ row_merge_rename_index_to_drop(
 Provide a new pathname for a table that is being renamed if it belongs to
 a file-per-table tablespace.  The caller is responsible for freeing the
 memory allocated for the return value.
-@return	new pathname of tablespace file, or NULL if space = 0 */
-UNIV_INTERN
+@return new pathname of tablespace file, or NULL if space = 0 */
 char*
 row_make_new_pathname(
 /*==================*/
@@ -3637,14 +4277,14 @@ row_make_new_pathname(
 	char*	new_path;
 	char*	old_path;
 
-	ut_ad(table->space != TRX_SYS_SPACE);
+	ut_ad(!is_system_tablespace(table->space));
 
 	old_path = fil_space_get_first_path(table->space);
 	ut_a(old_path);
 
 	new_path = os_file_make_new_pathname(old_path, new_name);
 
-	mem_free(old_path);
+	ut_free(old_path);
 
 	return(new_path);
 }
@@ -3653,8 +4293,7 @@ row_make_new_pathname(
 Rename the tables in the data dictionary.  The data dictionary must
 have been locked exclusively by the caller, because the transaction
 will not be committed.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
 dberr_t
 row_merge_rename_tables_dict(
 /*=========================*/
@@ -3682,8 +4321,8 @@ row_merge_rename_tables_dict(
 
 	info = pars_info_create();
 
-	pars_info_add_str_literal(info, "new_name", new_table->name);
-	pars_info_add_str_literal(info, "old_name", old_table->name);
+	pars_info_add_str_literal(info, "new_name", new_table->name.m_name);
+	pars_info_add_str_literal(info, "old_name", old_table->name.m_name);
 	pars_info_add_str_literal(info, "tmp_name", tmp_name);
 
 	err = que_eval_sql(info,
@@ -3695,10 +4334,11 @@ row_merge_rename_tables_dict(
 			   " WHERE NAME = :new_name;\n"
 			   "END;\n", FALSE, trx);
 
-	/* Update SYS_TABLESPACES and SYS_DATAFILES if the old
-	table is in a non-system tablespace where space > 0. */
+	/* Update SYS_TABLESPACES and SYS_DATAFILES if the old table being
+	renamed is a single-table tablespace, which must be implicitly
+	renamed along with the table. */
 	if (err == DB_SUCCESS
-	    && old_table->space != TRX_SYS_SPACE
+	    && dict_table_is_file_per_table(old_table)
 	    && !old_table->ibd_file_missing) {
 		/* Make pathname to update SYS_DATAFILES. */
 		char* tmp_path = row_make_new_pathname(old_table, tmp_name);
@@ -3721,19 +4361,22 @@ row_merge_rename_tables_dict(
 				   " WHERE SPACE = :old_space;\n"
 				   "END;\n", FALSE, trx);
 
-		mem_free(tmp_path);
+		ut_free(tmp_path);
 	}
 
-	/* Update SYS_TABLESPACES and SYS_DATAFILES if the new
-	table is in a non-system tablespace where space > 0. */
-	if (err == DB_SUCCESS && new_table->space != TRX_SYS_SPACE) {
+	/* Update SYS_TABLESPACES and SYS_DATAFILES if the new table being
+	renamed is a single-table tablespace, which must be implicitly
+	renamed along with the table. */
+	if (err == DB_SUCCESS
+	    && dict_table_is_file_per_table(new_table)) {
 		/* Make pathname to update SYS_DATAFILES. */
 		char* old_path = row_make_new_pathname(
-			new_table, old_table->name);
+			new_table, old_table->name.m_name);
 
 		info = pars_info_create();
 
-		pars_info_add_str_literal(info, "old_name", old_table->name);
+		pars_info_add_str_literal(info, "old_name",
+					  old_table->name.m_name);
 		pars_info_add_str_literal(info, "old_path", old_path);
 		pars_info_add_int4_literal(info, "new_space",
 					   (lint) new_table->space);
@@ -3749,7 +4392,7 @@ row_merge_rename_tables_dict(
 				   " WHERE SPACE = :new_space;\n"
 				   "END;\n", FALSE, trx);
 
-		mem_free(old_path);
+		ut_free(old_path);
 	}
 
 	if (err == DB_SUCCESS && dict_table_is_discarded(new_table)) {
@@ -3762,22 +4405,27 @@ row_merge_rename_tables_dict(
 	return(err);
 }
 
-/*********************************************************************//**
-Create and execute a query graph for creating an index.
-@return	DB_SUCCESS or error code */
+/** Create and execute a query graph for creating an index.
+@param[in,out]	trx	trx
+@param[in,out]	table	table
+@param[in,out]	index	index
+@param[in]	add_v	new virtual columns added along with add index call
+@return DB_SUCCESS or error code */
 static MY_ATTRIBUTE((nonnull, warn_unused_result))
 dberr_t
 row_merge_create_index_graph(
-/*=========================*/
-	trx_t*		trx,		/*!< in: trx */
-	dict_table_t*	table,		/*!< in: table */
-	dict_index_t*	index)		/*!< in: index */
+	trx_t*			trx,
+	dict_table_t*		table,
+	dict_index_t*		index,
+	const dict_add_v_col_t* add_v)
 {
 	ind_node_t*	node;		/*!< Index creation node */
 	mem_heap_t*	heap;		/*!< Memory heap */
 	que_thr_t*	thr;		/*!< Query thread */
 	dberr_t		err;
 
+	DBUG_ENTER("row_merge_create_index_graph");
+
 	ut_ad(trx);
 	ut_ad(table);
 	ut_ad(index);
@@ -3785,8 +4433,8 @@ row_merge_create_index_graph(
 	heap = mem_heap_create(512);
 
 	index->table = table;
-	node = ind_create_graph_create(index, heap, false);
-	thr = pars_complete_graph_for_exec(node, trx, heap);
+	node = ind_create_graph_create(index, heap, add_v);
+	thr = pars_complete_graph_for_exec(node, trx, heap, NULL);
 
 	ut_a(thr == que_fork_start_command(
 			static_cast<que_fork_t*>(que_node_get_parent(thr))));
@@ -3797,28 +4445,33 @@ row_merge_create_index_graph(
 
 	que_graph_free((que_t*) que_node_get_parent(thr));
 
-	return(err);
+	DBUG_RETURN(err);
 }
 
-/*********************************************************************//**
-Create the index and load in to the dictionary.
-@return	index, or NULL on error */
-UNIV_INTERN
+/** Create the index and load in to the dictionary.
+@param[in,out]	trx		trx (sets error_state)
+@param[in,out]	table		the index is on this table
+@param[in]	index_def	the index definition
+@param[in]	add_v		new virtual columns added along with add
+				index call
+@param[in]	col_names	column names if columns are renamed
+				or NULL
+@return index, or NULL on error */
 dict_index_t*
 row_merge_create_index(
-/*===================*/
-	trx_t*			trx,	/*!< in/out: trx (sets error_state) */
-	dict_table_t*		table,	/*!< in: the index is on this table */
+	trx_t*			trx,
+	dict_table_t*		table,
 	const index_def_t*	index_def,
-					/*!< in: the index definition */
+	const dict_add_v_col_t*	add_v,
 	const char**		col_names)
-					/*! in: column names if columns are
-					renamed or NULL */
 {
 	dict_index_t*	index;
 	dberr_t		err;
 	ulint		n_fields = index_def->n_fields;
 	ulint		i;
+	bool		has_new_v_col = false;
+
+	DBUG_ENTER("row_merge_create_index");
 
 	ut_ad(!srv_read_only_mode);
 
@@ -3826,47 +4479,65 @@ row_merge_create_index(
 	a persistent operation. We pass 0 as the space id, and determine at
 	a lower level the space id where to store the table. */
 
-	index = dict_mem_index_create(table->name, index_def->name,
+	index = dict_mem_index_create(table->name.m_name, index_def->name,
 				      0, index_def->ind_type, n_fields);
 
 	ut_a(index);
 
+	index->set_committed(index_def->rebuild);
+
 	for (i = 0; i < n_fields; i++) {
+		const char*	name;
 		index_field_t*	ifield = &index_def->fields[i];
-		const char * col_name;
 
-		/*
-		Alter table renaming a column and then adding a index
-		to this new name e.g ALTER TABLE t
-		CHANGE COLUMN b c INT NOT NULL, ADD UNIQUE INDEX (c);
-		requires additional check as column names are not yet
-		changed when new index definitions are created. Table's
-		new column names are on a array of column name pointers
-		if any of the column names are changed. */
-
-		if (col_names && col_names[i]) {
-			col_name = col_names[i];
+		if (ifield->is_v_col) {
+			if (ifield->col_no >= table->n_v_def) {
+				ut_ad(ifield->col_no < table->n_v_def
+				      + add_v->n_v_col);
+				ut_ad(ifield->col_no >= table->n_v_def);
+				name = add_v->v_col_name[
+					ifield->col_no - table->n_v_def];
+				has_new_v_col = true;
+			} else {
+				name = dict_table_get_v_col_name(
+					table, ifield->col_no);
+			}
 		} else {
-			col_name = ifield->col_name ?
-				dict_table_get_col_name_for_mysql(table, ifield->col_name) :
-				dict_table_get_col_name(table, ifield->col_no);
+			/*
+			Alter table renaming a column and then adding a index
+			to this new name e.g ALTER TABLE t
+			CHANGE COLUMN b c INT NOT NULL, ADD UNIQUE INDEX (c);
+			requires additional check as column names are not yet
+			changed when new index definitions are created. Table's
+			new column names are on a array of column name pointers
+			if any of the column names are changed. */
+
+			if (col_names && col_names[i]) {
+				name = col_names[i];
+			} else {
+				name = ifield->col_name ?
+					dict_table_get_col_name_for_mysql(table, ifield->col_name) :
+					dict_table_get_col_name(table, ifield->col_no);
+			}
 		}
 
-		dict_mem_index_add_field(
-			index,
-			col_name,
-			ifield->prefix_len);
+		dict_mem_index_add_field(index, name, ifield->prefix_len);
 	}
 
 	/* Add the index to SYS_INDEXES, using the index prototype. */
-	err = row_merge_create_index_graph(trx, table, index);
+	err = row_merge_create_index_graph(trx, table, index, add_v);
 
 	if (err == DB_SUCCESS) {
 
-		index = dict_table_get_index_on_name(table, index_def->name);
+		index = dict_table_get_index_on_name(table, index_def->name,
+						     index_def->rebuild);
 
 		ut_a(index);
 
+		index->parser = index_def->parser;
+		index->is_ngram = index_def->is_ngram;
+		index->has_new_v_col = has_new_v_col;
+
 		/* Note the id of the transaction that created this
 		index, we use it to restrict readers from accessing
 		this index, to ensure read consistency. */
@@ -3875,12 +4546,11 @@ row_merge_create_index(
 		index = NULL;
 	}
 
-	return(index);
+	DBUG_RETURN(index);
 }
 
 /*********************************************************************//**
 Check if a transaction can use an index. */
-UNIV_INTERN
 ibool
 row_merge_is_index_usable(
 /*======================*/
@@ -3895,8 +4565,11 @@ row_merge_is_index_usable(
 
 	return(!dict_index_is_corrupted(index)
 	       && (dict_table_is_temporary(index->table)
-		   || !trx->read_view
-		   || read_view_sees_trx_id(trx->read_view, index->trx_id)));
+		   || index->trx_id == 0
+		   || !MVCC::is_view_active(trx->read_view)
+		   || trx->read_view->changes_visible(
+			   index->trx_id,
+			   index->table->name)));
 }
 
 /*********************************************************************//**
@@ -3904,8 +4577,7 @@ Drop a table. The caller must have ensured that the background stats
 thread is not processing the table. This can be done by calling
 dict_stats_wait_bg_to_stop_using_table() after locking the dictionary and
 before calling this function.
-@return	DB_SUCCESS or error code */
-UNIV_INTERN
+@return DB_SUCCESS or error code */
 dberr_t
 row_merge_drop_table(
 /*=================*/
@@ -3915,49 +4587,87 @@ row_merge_drop_table(
 	ut_ad(!srv_read_only_mode);
 
 	/* There must be no open transactions on the table. */
-	ut_a(table->n_ref_count == 0);
+	ut_a(table->get_ref_count() == 0);
 
-	return(row_drop_table_for_mysql(table->name, trx, false, false, false));
+	return(row_drop_table_for_mysql(table->name.m_name,
+			trx, false, false, false));
 }
 
-/*********************************************************************//**
-Build indexes on a table by reading a clustered index,
-creating a temporary file containing index entries, merge sorting
-these index entries and inserting sorted index entries to indexes.
-@return	DB_SUCCESS or error code */
-UNIV_INTERN
+/** Write an MLOG_INDEX_LOAD record to indicate in the redo-log
+that redo-logging of individual index pages was disabled, and
+the flushing of such pages to the data files was completed.
+@param[in]	index	an index tree on which redo logging was disabled */
+static
+void
+row_merge_write_redo(
+	const dict_index_t*	index)
+{
+	mtr_t	mtr;
+	byte*	log_ptr;
+
+	ut_ad(!dict_table_is_temporary(index->table));
+	mtr.start();
+	log_ptr = mlog_open(&mtr, 11 + 8);
+	log_ptr = mlog_write_initial_log_record_low(
+		MLOG_INDEX_LOAD,
+		index->space, index->page, log_ptr, &mtr);
+	mach_write_to_8(log_ptr, index->id);
+	mlog_close(&mtr, log_ptr + 8);
+	mtr.commit();
+}
+
+/** Build indexes on a table by reading a clustered index, creating a temporary
+file containing index entries, merge sorting these index entries and inserting
+sorted index entries to indexes.
+@param[in]	trx		transaction
+@param[in]	old_table	table where rows are read from
+@param[in]	new_table	table where indexes are created; identical to
+old_table unless creating a PRIMARY KEY
+@param[in]	online		true if creating indexes online
+@param[in]	indexes		indexes to be created
+@param[in]	key_numbers	MySQL key numbers
+@param[in]	n_indexes	size of indexes[]
+@param[in,out]	table		MySQL table, for reporting erroneous key value
+if applicable
+@param[in]	add_cols	default values of added columns, or NULL
+@param[in]	col_map		mapping of old column numbers to new ones, or
+NULL if old_table == new_table
+@param[in]	add_autoinc	number of added AUTO_INCREMENT columns, or
+ULINT_UNDEFINED if none is added
+@param[in,out]	sequence	autoinc sequence
+@param[in]	skip_pk_sort	whether the new PRIMARY KEY will follow
+existing order
+@param[in,out]	stage		performance schema accounting object, used by
+ALTER TABLE. stage->begin_phase_read_pk() will be called at the beginning of
+this function and it will be passed to other functions for further accounting.
+@param[in]	add_v		new virtual columns added along with indexes
+@param[in]	eval_table	mysql table used to evaluate virtual column
+				value, see innobase_get_computed_value().
+@return DB_SUCCESS or error code */
 dberr_t
 row_merge_build_indexes(
-/*====================*/
-	trx_t*		trx,		/*!< in: transaction */
-	dict_table_t*	old_table,	/*!< in: table where rows are
-					read from */
-	dict_table_t*	new_table,	/*!< in: table where indexes are
-					created; identical to old_table
-					unless creating a PRIMARY KEY */
-	bool		online,		/*!< in: true if creating indexes
-					online */
-	dict_index_t**	indexes,	/*!< in: indexes to be created */
-	const ulint*	key_numbers,	/*!< in: MySQL key numbers */
-	ulint		n_indexes,	/*!< in: size of indexes[] */
-	struct TABLE*	table,		/*!< in/out: MySQL table, for
-					reporting erroneous key value
-					if applicable */
-	const dtuple_t*	add_cols,	/*!< in: default values of
-					added columns, or NULL */
-	const ulint*	col_map,	/*!< in: mapping of old column
-					numbers to new ones, or NULL
-					if old_table == new_table */
-	ulint		add_autoinc,	/*!< in: number of added
-					AUTO_INCREMENT column, or
-					ULINT_UNDEFINED if none is added */
-	ib_sequence_t&	sequence)	/*!< in: autoinc instance if
-					add_autoinc != ULINT_UNDEFINED */
+	trx_t*			trx,
+	dict_table_t*		old_table,
+	dict_table_t*		new_table,
+	bool			online,
+	dict_index_t**		indexes,
+	const ulint*		key_numbers,
+	ulint			n_indexes,
+	struct TABLE*		table,
+	const dtuple_t*		add_cols,
+	const ulint*		col_map,
+	ulint			add_autoinc,
+	ib_sequence_t&		sequence,
+	bool			skip_pk_sort,
+	ut_stage_alter_t*	stage,
+	const dict_add_v_col_t*	add_v,
+	struct TABLE*		eval_table)
 {
 	merge_file_t*		merge_files;
 	row_merge_block_t*	block;
+	ut_new_pfx_t		block_pfx;
+	ut_new_pfx_t		crypt_pfx;
 	row_merge_block_t*	crypt_block;
-	ulint			block_size;
 	ulint			i;
 	ulint			j;
 	dberr_t			error;
@@ -3965,7 +4675,7 @@ row_merge_build_indexes(
 	dict_index_t*		fts_sort_idx = NULL;
 	fts_psort_t*		psort_info = NULL;
 	fts_psort_t*		merge_info = NULL;
-	ib_int64_t		sig_count = 0;
+	int64_t			sig_count = 0;
 	bool			fts_psort_initiated = false;
 	fil_space_crypt_t *	crypt_data = NULL;
 
@@ -3981,12 +4691,18 @@ row_merge_build_indexes(
 	ut_ad((old_table == new_table) == !col_map);
 	ut_ad(!add_cols || col_map);
 
+	stage->begin_phase_read_pk(skip_pk_sort && new_table != old_table
+				   ? n_indexes - 1
+				   : n_indexes);
+
 	/* Allocate memory for merge file data structure and initialize
 	fields */
 
-	block_size = 3 * srv_sort_buf_size;
-	block = static_cast<row_merge_block_t*>(
-		os_mem_alloc_large(&block_size));
+	ut_allocator<row_merge_block_t>	alloc(mem_key_row_merge_sort);
+
+	/* This will allocate "3 * srv_sort_buf_size" elements of type
+	row_merge_block_t. The latter is defined as byte. */
+	block = alloc.allocate_large(3 * srv_sort_buf_size, &block_pfx);
 
 	if (block == NULL) {
 		DBUG_RETURN(DB_OUT_OF_MEMORY);
@@ -4003,7 +4719,7 @@ row_merge_build_indexes(
 			crypt_data && crypt_data->encryption == FIL_SPACE_ENCRYPTION_DEFAULT)) {
 
 		crypt_block = static_cast<row_merge_block_t*>(
-			os_mem_alloc_large(&block_size));
+			alloc.allocate_large(3 * srv_sort_buf_size, &crypt_pfx));
 
 		if (crypt_block == NULL) {
 			DBUG_RETURN(DB_OUT_OF_MEMORY);
@@ -4013,10 +4729,36 @@ row_merge_build_indexes(
 		crypt_data = NULL;
 	}
 
-	trx_start_if_not_started_xa(trx);
+	trx_start_if_not_started_xa(trx, true);
+
+	/* Check if we need a flush observer to flush dirty pages.
+	Since we disable redo logging in bulk load, so we should flush
+	dirty pages before online log apply, because online log apply enables
+	redo logging(we can do further optimization here).
+	1. online add index: flush dirty pages right before row_log_apply().
+	2. table rebuild: flush dirty pages before row_log_table_apply().
+
+	we use bulk load to create all types of indexes except spatial index,
+	for which redo logging is enabled. If we create only spatial indexes,
+	we don't need to flush dirty pages at all. */
+	bool	need_flush_observer = (old_table != new_table);
+
+	for (i = 0; i < n_indexes; i++) {
+		if (!dict_index_is_spatial(indexes[i])) {
+			need_flush_observer = true;
+		}
+	}
+
+	FlushObserver*	flush_observer = NULL;
+	if (need_flush_observer) {
+		flush_observer = UT_NEW_NOKEY(
+			FlushObserver(new_table->space, trx, stage));
+
+		trx_set_flush_observer(trx, flush_observer);
+	}
 
 	merge_files = static_cast<merge_file_t*>(
-		mem_alloc(n_indexes * sizeof *merge_files));
+		ut_malloc_nokey(n_indexes * sizeof *merge_files));
 
 	/* Initialize all the merge file descriptors, so that we
 	don't call row_merge_file_destroy() on uninitialized
@@ -4028,9 +4770,7 @@ row_merge_build_indexes(
 
 	total_static_cost = COST_BUILD_INDEX_STATIC * n_indexes + COST_READ_CLUSTERED_INDEX;
 	total_dynamic_cost = COST_BUILD_INDEX_DYNAMIC * n_indexes;
-
 	for (i = 0; i < n_indexes; i++) {
-
 		if (indexes[i]->type & DICT_FTS) {
 			ibool	opt_doc_id_size = FALSE;
 
@@ -4041,18 +4781,24 @@ row_merge_build_indexes(
 			fts_sort_idx = row_merge_create_fts_sort_index(
 				indexes[i], old_table, &opt_doc_id_size);
 
-			row_merge_dup_t* dup = static_cast<row_merge_dup_t*>(
-				ut_malloc(sizeof *dup));
+			row_merge_dup_t*	dup
+				= static_cast<row_merge_dup_t*>(
+					ut_malloc_nokey(sizeof *dup));
 			dup->index = fts_sort_idx;
 			dup->table = table;
 			dup->col_map = col_map;
 			dup->n_dup = 0;
 
-			row_fts_psort_info_init(
-				trx, dup, new_table, opt_doc_id_size,
-				&psort_info, &merge_info);
+			/* This can fail e.g. if temporal files can't be
+			created */
+			if (!row_fts_psort_info_init(
+					trx, dup, new_table, opt_doc_id_size,
+					&psort_info, &merge_info)) {
+				error = DB_CORRUPTION;
+				goto func_exit;
+			}
 
-			/* "We need to ensure that we free the resources
+			/* We need to ensure that we free the resources
 			allocated */
 			fts_psort_initiated = true;
 		}
@@ -4081,13 +4827,14 @@ row_merge_build_indexes(
 
 	/* Read clustered index of the table and create files for
 	secondary index entries for merge sort */
-
 	error = row_merge_read_clustered_index(
-		trx, table, old_table, new_table, online, indexes,
-		fts_sort_idx, psort_info, merge_files, key_numbers,
-		n_indexes, add_cols, col_map,
-		add_autoinc, sequence, block, &tmpfd,
-		pct_cost, crypt_data, crypt_block);
+			trx, table, old_table, new_table, online, indexes,
+			fts_sort_idx, psort_info, merge_files, key_numbers,
+			n_indexes, add_cols, add_v, col_map, add_autoinc,
+			sequence, block, skip_pk_sort, &tmpfd, stage,
+			pct_cost, crypt_data, crypt_block, eval_table);
+
+	stage->end_phase_read_pk();
 
 	pct_progress += pct_cost;
 
@@ -4114,6 +4861,10 @@ row_merge_build_indexes(
 	for (i = 0; i < n_indexes; i++) {
 		dict_index_t*	sort_idx = indexes[i];
 
+		if (dict_index_is_spatial(sort_idx)) {
+			continue;
+		}
+
 		if (indexes[i]->type & DICT_FTS) {
 			os_event_t	fts_parallel_merge_event;
 
@@ -4163,11 +4914,10 @@ wait_again:
 				}
 
 				if (!all_exit) {
-					ib_logf(IB_LOG_LEVEL_ERROR,
-						"Not all child merge threads"
-						" exited when creating FTS"
-						" index '%s'",
-						indexes[i]->name);
+					ib::error() << "Not all child merge"
+						" threads exited when creating"
+						" FTS index '"
+						<< indexes[i]->name << "'";
 				}
 			} else {
 				/* This cannot report duplicates; an
@@ -4180,7 +4930,7 @@ wait_again:
 #ifdef FTS_INTERNAL_DIAG_PRINT
 			DEBUG_FTS_SORT_PRINT("FTS_SORT: Complete Insert\n");
 #endif
-		} else if (merge_files[i].fd != -1) {
+		} else if (merge_files[i].fd >= 0) {
 			char		buf[3 * NAME_LEN];
 			char		*bufend;
 			row_merge_dup_t	dup = {
@@ -4194,8 +4944,7 @@ wait_again:
 
 			bufend = innobase_convert_name(buf, sizeof buf,
 				indexes[i]->name, strlen(indexes[i]->name),
-				trx ? trx->mysql_thd : NULL,
-				FALSE);
+				trx ? trx->mysql_thd : NULL);
 
 			buf[bufend - buf]='\0';
 
@@ -4207,7 +4956,7 @@ wait_again:
 					trx, &dup, &merge_files[i],
 					block, &tmpfd, true,
 					pct_progress, pct_cost,
-					crypt_data, crypt_block, new_table->space);
+					crypt_data, crypt_block, new_table->space, stage);
 
 			pct_progress += pct_cost;
 
@@ -4220,6 +4969,10 @@ wait_again:
 				os_thread_sleep(20000000););  /* 20 sec */
 
 			if (error == DB_SUCCESS) {
+				BtrBulk	btr_bulk(sort_idx, trx->id,
+						 flush_observer);
+				btr_bulk.init();
+
 				pct_cost = (COST_BUILD_INDEX_STATIC +
 					(total_dynamic_cost * merge_files[i].offset /
 						total_index_blocks)) /
@@ -4233,9 +4986,13 @@ wait_again:
 
 				error = row_merge_insert_index_tuples(
 					trx->id, sort_idx, old_table,
-					merge_files[i].fd, block,
+					merge_files[i].fd, block, NULL,
+					&btr_bulk,
 					merge_files[i].n_rec, pct_progress, pct_cost,
-					crypt_data, crypt_block, new_table->space);
+					crypt_data, crypt_block, new_table->space, stage);
+
+				error = btr_bulk.finish(error);
+
 				pct_progress += pct_cost;
 
 				sql_print_information("InnoDB: Online DDL : "
@@ -4257,9 +5014,14 @@ wait_again:
 			ut_ad(sort_idx->online_status
 			      == ONLINE_INDEX_COMPLETE);
 		} else {
+			ut_ad(need_flush_observer);
 			sql_print_information("InnoDB: Online DDL : Start applying row log");
+
+			flush_observer->flush();
+			row_merge_write_redo(indexes[i]);
+
 			DEBUG_SYNC_C("row_log_apply_before");
-			error = row_log_apply(trx, sort_idx, table);
+			error = row_log_apply(trx, sort_idx, table, stage);
 			DEBUG_SYNC_C("row_log_apply_after");
 			sql_print_information("InnoDB: Online DDL : End of applying row log");
 		}
@@ -4272,19 +5034,13 @@ wait_again:
 		}
 
 		if (indexes[i]->type & DICT_FTS && fts_enable_diag_print) {
-			char*	name = (char*) indexes[i]->name;
-
-			if (*name == TEMP_INDEX_PREFIX)  {
-				name++;
-			}
-
-			ut_print_timestamp(stderr);
-			fprintf(stderr, " InnoDB: Finished building "
-				"full-text index %s\n", name);
+			ib::info() << "Finished building full-text index "
+				<< indexes[i]->name;
 		}
 	}
 
 func_exit:
+
 	DBUG_EXECUTE_IF(
 		"ib_build_indexes_too_many_concurrent_trxs",
 		error = DB_TOO_MANY_CONCURRENT_TRXS;
@@ -4306,11 +5062,12 @@ func_exit:
 		dict_mem_index_free(fts_sort_idx);
 	}
 
-	mem_free(merge_files);
-	os_mem_free_large(block, block_size);
+	ut_free(merge_files);
+
+	alloc.deallocate_large(block, &block_pfx);
 
 	if (crypt_block) {
-		os_mem_free_large(crypt_block, block_size);
+		alloc.deallocate_large(crypt_block, &crypt_pfx);
 	}
 
 	DICT_TF2_FLAG_UNSET(new_table, DICT_TF2_FTS_ADD_DOC_ID);
@@ -4320,7 +5077,7 @@ func_exit:
 		as aborted. */
 		for (i = 0; i < n_indexes; i++) {
 			ut_ad(!(indexes[i]->type & DICT_FTS));
-			ut_ad(*indexes[i]->name == TEMP_INDEX_PREFIX);
+			ut_ad(!indexes[i]->is_committed());
 			ut_ad(!dict_index_is_clust(indexes[i]));
 
 			/* Completed indexes should be dropped as
@@ -4346,12 +5103,42 @@ func_exit:
 				/* fall through */
 			case ONLINE_INDEX_ABORTED_DROPPED:
 			case ONLINE_INDEX_ABORTED:
-				MONITOR_MUTEX_INC(
-					&dict_sys->mutex,
+				MONITOR_ATOMIC_INC(
 					MONITOR_BACKGROUND_DROP_INDEX);
 			}
 		}
 	}
 
+	DBUG_EXECUTE_IF("ib_index_crash_after_bulk_load", DBUG_SUICIDE(););
+
+	if (flush_observer != NULL) {
+		ut_ad(need_flush_observer);
+
+		DBUG_EXECUTE_IF("ib_index_build_fail_before_flush",
+			error = DB_FAIL;
+		);
+
+		if (error != DB_SUCCESS) {
+			flush_observer->interrupted();
+		}
+
+		flush_observer->flush();
+
+		UT_DELETE(flush_observer);
+
+		if (trx_is_interrupted(trx)) {
+			error = DB_INTERRUPTED;
+		}
+
+		if (error == DB_SUCCESS && old_table != new_table) {
+			for (const dict_index_t* index
+				     = dict_table_get_first_index(new_table);
+			     index != NULL;
+			     index = dict_table_get_next_index(index)) {
+				row_merge_write_redo(index);
+			}
+		}
+	}
+
 	DBUG_RETURN(error);
 }
diff --git a/storage/innobase/row/row0mysql.cc b/storage/innobase/row/row0mysql.cc
index a5ab4f4911e..d730592d237 100644
--- a/storage/innobase/row/row0mysql.cc
+++ b/storage/innobase/row/row0mysql.cc
@@ -24,52 +24,63 @@ Contains also create table and other data dictionary operations.
 Created 9/17/2000 Heikki Tuuri
 *******************************************************/
 
+#include "ha_prototypes.h"
+#include <debug_sync.h>
+#include <gstream.h>
+#include <spatial.h>
+
 #include "row0mysql.h"
 
 #ifdef UNIV_NONINL
 #include "row0mysql.ic"
 #endif
 
-#include <debug_sync.h>
-#include <my_dbug.h>
-
-#include <sql_const.h>
-#include "row0ins.h"
-#include "row0merge.h"
-#include "row0sel.h"
-#include "row0upd.h"
-#include "row0row.h"
-#include "que0que.h"
-#include "pars0pars.h"
-#include "dict0dict.h"
-#include "dict0crea.h"
-#include "dict0load.h"
+#include "btr0sea.h"
 #include "dict0boot.h"
+#include "dict0crea.h"
+#include <sql_const.h>
+#include "dict0dict.h"
+#include "dict0load.h"
 #include "dict0stats.h"
 #include "dict0stats_bg.h"
-#include "trx0roll.h"
-#include "trx0purge.h"
-#include "trx0rec.h"
-#include "trx0undo.h"
-#include "lock0lock.h"
-#include "rem0cmp.h"
-#include "log0log.h"
-#include "btr0sea.h"
+#include "dict0defrag_bg.h"
 #include "btr0defragment.h"
 #include "fil0fil.h"
 #include "fil0crypt.h"
-#include "ibuf0ibuf.h"
+#include "fsp0file.h"
+#include "fsp0sysspace.h"
 #include "fts0fts.h"
 #include "fts0types.h"
-#include "srv0start.h"
+#include "ibuf0ibuf.h"
+#include "lock0lock.h"
+#include "log0log.h"
+#include "pars0pars.h"
+#include "que0que.h"
+#include "rem0cmp.h"
 #include "row0import.h"
-#include "m_string.h"
-#include "my_sys.h"
-#include "ha_prototypes.h"
+#include "row0ins.h"
+#include "row0merge.h"
+#include "row0row.h"
+#include "row0sel.h"
+#include "row0upd.h"
+#include "trx0purge.h"
+#include "trx0rec.h"
+#include "trx0roll.h"
+#include "trx0undo.h"
+#include "row0ext.h"
+#include "ut0new.h"
+
 #include <algorithm>
+#include <deque>
+#include <vector>
+
+const char* MODIFICATIONS_NOT_ALLOWED_MSG_FORCE_RECOVERY =
+	"innodb_force_recovery is on. We do not allow database modifications"
+	" by the user. Shut down mysqld and edit my.cnf to set"
+	" innodb_force_recovery=0";
 
 /** Provide optional 4.x backwards compatibility for 5.0 and above */
-UNIV_INTERN ibool	row_rollback_on_timeout	= FALSE;
+ibool	row_rollback_on_timeout	= FALSE;
 
 /** Chain node of the list of tables to drop in the background. */
 struct row_mysql_drop_t{
@@ -78,11 +89,6 @@ struct row_mysql_drop_t{
 							/*!< list chain node */
 };
 
-#ifdef UNIV_PFS_MUTEX
-/* Key to register drop list mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t	row_drop_list_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
-
 /** @brief List of tables we should drop in background.
 
 ALTER TABLE in MySQL requires that the table handler can drop the
@@ -96,6 +102,8 @@ static ib_mutex_t row_drop_list_mutex;
 /** Flag: has row_mysql_drop_list been initialized? */
 static ibool	row_mysql_drop_list_inited	= FALSE;
 
+extern ib_mutex_t	master_key_id_mutex;
+
 /** Magic table names for invoking various monitor threads */
 /* @{ */
 static const char S_innodb_monitor[] = "innodb_monitor";
@@ -119,7 +127,7 @@ the magic table names.
 
 /*******************************************************************//**
 Determine if the given name is a name reserved for MySQL system tables.
-@return	TRUE if name is a MySQL system table name */
+@return TRUE if name is a MySQL system table name */
 static
 ibool
 row_mysql_is_system_table(
@@ -142,13 +150,28 @@ which the master thread drops in background. We need this on Unix because in
 ALTER TABLE MySQL may call drop table even if the table has running queries on
 it. Also, if there are running foreign key checks on the table, we drop the
 table lazily.
-@return	TRUE if the table was not yet in the drop list, and was added there */
+@return TRUE if the table was not yet in the drop list, and was added there */
 static
 ibool
 row_add_table_to_background_drop_list(
 /*==================================*/
 	const char*	name);	/*!< in: table name */
 
+#ifdef UNIV_DEBUG
+/** Wait for the background drop list to become empty. */
+void
+row_wait_for_background_drop_list_empty()
+{
+	bool	empty = false;
+	while (!empty) {
+		mutex_enter(&row_drop_list_mutex);
+		empty = (UT_LIST_GET_LEN(row_mysql_drop_list) == 0);
+		mutex_exit(&row_drop_list_mutex);
+		os_thread_sleep(100000);
+	}
+}
+#endif /* UNIV_DEBUG */
+
 /*******************************************************************//**
 Delays an INSERT, DELETE or UPDATE operation if the purge is lagging. */
 static
@@ -163,15 +186,20 @@ row_mysql_delay_if_needed(void)
 
 /*******************************************************************//**
 Frees the blob heap in prebuilt when no longer needed. */
-UNIV_INTERN
 void
 row_mysql_prebuilt_free_blob_heap(
 /*==============================*/
 	row_prebuilt_t*	prebuilt)	/*!< in: prebuilt struct of a
 					ha_innobase:: table handle */
 {
+	DBUG_ENTER("row_mysql_prebuilt_free_blob_heap");
+
+	DBUG_PRINT("row_mysql_prebuilt_free_blob_heap",
+		   ("blob_heap freeing: %p", prebuilt->blob_heap));
+
 	mem_heap_free(prebuilt->blob_heap);
 	prebuilt->blob_heap = NULL;
+	DBUG_VOID_RETURN;
 }
 
 /*******************************************************************//**
@@ -179,7 +207,6 @@ Stores a >= 5.0.3 format true VARCHAR length to dest, in the MySQL row
 format.
 @return pointer to the data, we skip the 1 or 2 bytes at the start
 that are used to store the len */
-UNIV_INTERN
 byte*
 row_mysql_store_true_var_len(
 /*=========================*/
@@ -208,7 +235,6 @@ Reads a >= 5.0.3 format true VARCHAR length, in the MySQL row format, and
 returns a pointer to the data.
 @return pointer to the data, we skip the 1 or 2 bytes at the start
 that are used to store the len */
-UNIV_INTERN
 const byte*
 row_mysql_read_true_varchar(
 /*========================*/
@@ -232,7 +258,6 @@ row_mysql_read_true_varchar(
 
 /*******************************************************************//**
 Stores a reference to a BLOB in the MySQL format. */
-UNIV_INTERN
 void
 row_mysql_store_blob_ref(
 /*=====================*/
@@ -269,8 +294,7 @@ row_mysql_store_blob_ref(
 
 /*******************************************************************//**
 Reads a reference to a BLOB in the MySQL format.
-@return	pointer to BLOB data */
-UNIV_INTERN
+@return pointer to BLOB data */
 const byte*
 row_mysql_read_blob_ref(
 /*====================*/
@@ -289,9 +313,114 @@ row_mysql_read_blob_ref(
 	return(data);
 }
 
+/*******************************************************************//**
+Converting InnoDB geometry data format to MySQL data format. */
+void
+row_mysql_store_geometry(
+/*=====================*/
+	byte*		dest,		/*!< in/out: where to store */
+	ulint		dest_len,	/*!< in: dest buffer size: determines
+					into how many bytes the GEOMETRY length
+					is stored, the space for the length
+					may vary from 1 to 4 bytes */
+	const byte*	src,		/*!< in: GEOMETRY data; if the value to
+					store is SQL NULL this should be NULL
+					pointer */
+	ulint		src_len)	/*!< in: GEOMETRY length; if the value
+					to store is SQL NULL this should be 0;
+					remember also to set the NULL bit in
+					the MySQL record header! */
+{
+	/* MySQL might assume the field is set to zero except the length and
+	the pointer fields */
+	UNIV_MEM_ASSERT_RW(src, src_len);
+	UNIV_MEM_ASSERT_W(dest, dest_len);
+	UNIV_MEM_INVALID(dest, dest_len);
+
+	memset(dest, '\0', dest_len);
+
+	/* In dest there are 1 - 4 bytes reserved for the BLOB length,
+	and after that 8 bytes reserved for the pointer to the data.
+	In 32-bit architectures we only use the first 4 bytes of the pointer
+	slot. */
+
+	ut_ad(dest_len - 8 > 1 || src_len < 1<<8);
+	ut_ad(dest_len - 8 > 2 || src_len < 1<<16);
+	ut_ad(dest_len - 8 > 3 || src_len < 1<<24);
+
+	mach_write_to_n_little_endian(dest, dest_len - 8, src_len);
+
+	memcpy(dest + dest_len - 8, &src, sizeof src);
+
+	DBUG_EXECUTE_IF("row_print_geometry_data",
+	{
+		String  res;
+		Geometry_buffer buffer;
+		String  wkt;
+
+		/** Show the meaning of geometry data. */
+		Geometry* g = Geometry::construct(
+			&buffer, (const char*)src, (uint32) src_len);
+
+		if (g)
+		{
+			/*
+			if (g->as_wkt(&wkt) == 0)
+			{
+				ib::info() << "Write geometry data to"
+					" MySQL WKT format: "
+					<< wkt.c_ptr_safe() << ".";
+			}
+			*/
+		}
+	});
+}
+
+/*******************************************************************//**
+Read geometry data in the MySQL format.
+@return pointer to geometry data */
+const byte*
+row_mysql_read_geometry(
+/*====================*/
+	ulint*		len,		/*!< out: data length */
+	const byte*	ref,		/*!< in: geometry data in the
+					MySQL format */
+	ulint		col_len)	/*!< in: MySQL format length */
+{
+	byte*		data;
+
+	*len = mach_read_from_n_little_endian(ref, col_len - 8);
+
+	memcpy(&data, ref + col_len - 8, sizeof data);
+
+	DBUG_EXECUTE_IF("row_print_geometry_data",
+	{
+		String  res;
+		Geometry_buffer buffer;
+		String  wkt;
+
+		/** Show the meaning of geometry data. */
+		Geometry* g = Geometry::construct(
+			&buffer, (const char*) data, (uint32) *len);
+
+		if (g)
+		{
+			/*
+			if (g->as_wkt(&wkt) == 0)
+			{
+				ib::info() << "Read geometry data in"
+					" MySQL's WKT format: "
+					<< wkt.c_ptr_safe() << ".";
+			}
+			*/
+		}
+	});
+
+	return(data);
+}
+
 /**************************************************************//**
 Pad a column with spaces. */
-UNIV_INTERN
 void
 row_mysql_pad_col(
 /*==============*/
@@ -336,8 +465,7 @@ row_mysql_pad_col(
 Stores a non-SQL-NULL field given in the MySQL format in the InnoDB format.
 The counterpart of this function is row_sel_field_store_in_mysql_format() in
 row0sel.cc.
-@return	up to which byte we used buf in the conversion */
-UNIV_INTERN
+@return up to which byte we used buf in the conversion */
 byte*
 row_mysql_store_col_in_innobase_format(
 /*===================================*/
@@ -483,11 +611,11 @@ row_mysql_store_col_in_innobase_format(
 		We will try to truncate it to n bytes by stripping
 		space padding.	If the field contains single-byte
 		characters only, it will be truncated to n characters.
-		Consider a CHAR(5) field containing the string ".a   "
-		where "." denotes a 3-byte character represented by
-		the bytes "$%&".  After our stripping, the string will
-		be stored as "$%&a " (5 bytes).	 The string ".abc "
-		will be stored as "$%&abc" (6 bytes).
+		Consider a CHAR(5) field containing the string
+		".a   " where "." denotes a 3-byte character represented
+		by the bytes "$%&". After our stripping, the string will
+		be stored as "$%&a " (5 bytes). The string
+		".abc " will be stored as "$%&abc" (6 bytes).
 
 		The space padding will be restored in row0sel.cc, function
 		row_sel_field_store_in_mysql_format(). */
@@ -502,9 +630,17 @@ row_mysql_store_col_in_innobase_format(
 		while (col_len > n_chars && ptr[col_len - 1] == 0x20) {
 			col_len--;
 		}
-	} else if (type == DATA_BLOB && row_format_col) {
+	} else if (!row_format_col) {
+		/* if mysql data is from a MySQL key value
+		since the length is always stored in 2 bytes,
+		we need do nothing here. */
+	} else if (type == DATA_BLOB) {
 
 		ptr = row_mysql_read_blob_ref(&col_len, mysql_data, col_len);
+	} else if (DATA_GEOMETRY_MTYPE(type)) {
+		/* We use blob to store geometry data except DATA_POINT
+		internally, but in MySQL Layer the datatype is always blob. */
+		ptr = row_mysql_read_geometry(&col_len, mysql_data, col_len);
 	}
 
 	dfield_set_data(dfield, ptr, col_len);
@@ -525,14 +661,18 @@ row_mysql_convert_row_to_innobase(
 					copied there! */
 	row_prebuilt_t*	prebuilt,	/*!< in: prebuilt struct where template
 					must be of type ROW_MYSQL_WHOLE_ROW */
-	byte*		mysql_rec)	/*!< in: row in the MySQL format;
+	const byte*	mysql_rec,	/*!< in: row in the MySQL format;
 					NOTE: do not discard as long as
 					row is used, as row may contain
 					pointers to this record! */
+	mem_heap_t**	blob_heap)	/*!< in: FIX_ME, remove this after
+					server fixes its issue */
 {
 	const mysql_row_templ_t*templ;
 	dfield_t*		dfield;
 	ulint			i;
+	ulint			n_col = 0;
+	ulint			n_v_col = 0;
 
 	ut_ad(prebuilt->template_type == ROW_MYSQL_WHOLE_ROW);
 	ut_ad(prebuilt->mysql_template);
@@ -540,7 +680,15 @@ row_mysql_convert_row_to_innobase(
 	for (i = 0; i < prebuilt->n_template; i++) {
 
 		templ = prebuilt->mysql_template + i;
-		dfield = dtuple_get_nth_field(row, i);
+
+		if (templ->is_virtual) {
+			ut_ad(n_v_col < dtuple_get_n_v_fields(row));
+			dfield = dtuple_get_nth_v_field(row, n_v_col);
+			n_v_col++;
+		} else {
+			dfield = dtuple_get_nth_field(row, n_col);
+			n_col++;
+		}
 
 		if (templ->mysql_null_bit_mask != 0) {
 			/* Column may be SQL NULL */
@@ -563,6 +711,16 @@ row_mysql_convert_row_to_innobase(
 			mysql_rec + templ->mysql_col_offset,
 			templ->mysql_col_len,
 			dict_table_is_comp(prebuilt->table));
+
+		/* server has issue regarding handling BLOB virtual fields,
+		and we need to duplicate it with our own memory here */
+		if (templ->is_virtual
+		    && DATA_LARGE_MTYPE(dfield_get_type(dfield)->mtype)) {
+			if (*blob_heap == NULL) {
+				*blob_heap = mem_heap_create(dfield->len);
+			}
+			dfield_dup(dfield, *blob_heap);
+		}
 next_column:
 		;
 	}
@@ -581,7 +739,6 @@ next_column:
 Handles user errors and lock waits detected by the database engine.
 @return true if it was a lock wait and we should continue running the
 query thread and in that case the thr is ALREADY in the running state. */
-UNIV_INTERN
 bool
 row_mysql_handle_errors(
 /*====================*/
@@ -612,7 +769,6 @@ handle_new_error:
 	case DB_DUPLICATE_KEY:
 	case DB_FOREIGN_DUPLICATE_KEY:
 	case DB_TOO_BIG_RECORD:
-	case DB_TOO_BIG_FOR_REDO:
 	case DB_UNDO_RECORD_TOO_BIG:
 	case DB_ROW_IS_REFERENCED:
 	case DB_NO_REFERENCED_ROW:
@@ -622,9 +778,13 @@ handle_new_error:
 	case DB_READ_ONLY:
 	case DB_FTS_INVALID_DOCID:
 	case DB_INTERRUPTED:
-	case DB_DICT_CHANGED:
+	case DB_CANT_CREATE_GEOMETRY_OBJECT:
 	case DB_TABLE_NOT_FOUND:
 	case DB_DECRYPTION_FAILED:
+	case DB_COMPUTE_VALUE_FAILED:
+		DBUG_EXECUTE_IF("row_mysql_crash_if_error", {
+					log_buffer_flush_to_disk();
+					DBUG_SUICIDE(); });
 		if (savept) {
 			/* Roll back the latest, possibly incomplete insertion
 			or update */
@@ -634,6 +794,9 @@ handle_new_error:
 		/* MySQL will roll back the latest SQL statement */
 		break;
 	case DB_LOCK_WAIT:
+
+		trx_kill_blocking(trx);
+
 		lock_wait_suspend_thread(thr);
 
 		if (trx->error_state != DB_SUCCESS) {
@@ -655,41 +818,29 @@ handle_new_error:
 		break;
 
 	case DB_MUST_GET_MORE_FILE_SPACE:
-		fputs("InnoDB: The database cannot continue"
-		      " operation because of\n"
-		      "InnoDB: lack of space. You must add"
-		      " a new data file to\n"
-		      "InnoDB: my.cnf and restart the database.\n", stderr);
-
-		ut_ad(0);
-		exit(1);
+		ib::fatal() << "The database cannot continue operation because"
+			" of lack of space. You must add a new data file"
+			" to my.cnf and restart the database.";
+		break;
 
 	case DB_CORRUPTION:
-		fputs("InnoDB: We detected index corruption"
-		      " in an InnoDB type table.\n"
-		      "InnoDB: You have to dump + drop + reimport"
-		      " the table or, in\n"
-		      "InnoDB: a case of widespread corruption,"
-		      " dump all InnoDB\n"
-		      "InnoDB: tables and recreate the"
-		      " whole InnoDB tablespace.\n"
-		      "InnoDB: If the mysqld server crashes"
-		      " after the startup or when\n"
-		      "InnoDB: you dump the tables, look at\n"
-		      "InnoDB: " REFMAN "forcing-innodb-recovery.html"
-		      " for help.\n", stderr);
+		ib::error() << "We detected index corruption in an InnoDB type"
+			" table. You have to dump + drop + reimport the"
+			" table or, in a case of widespread corruption,"
+			" dump all InnoDB tables and recreate the whole"
+			" tablespace. If the mysqld server crashes after"
+			" the startup or when you dump the tables. "
+			<< FORCE_RECOVERY_MSG;
 		break;
 	case DB_FOREIGN_EXCEED_MAX_CASCADE:
-		fprintf(stderr, "InnoDB: Cannot delete/update rows with"
-			" cascading foreign key constraints that exceed max"
-			" depth of %lu\n"
-			"Please drop excessive foreign constraints"
-			" and try again\n", (ulong) DICT_FK_MAX_RECURSIVE_LOAD);
+		ib::error() << "Cannot delete/update rows with cascading"
+			" foreign key constraints that exceed max depth of "
+			<< FK_MAX_CASCADE_DEL << ". Please drop excessive"
+			" foreign constraints and try again";
 		break;
 	default:
-		fprintf(stderr, "InnoDB: unknown error code %lu\n",
-			(ulong) err);
-		ut_error;
+		ib::fatal() << "Unknown error code " << err << ": "
+			<< ut_strerr(err);
 	}
 
 	if (trx->error_state != DB_SUCCESS) {
@@ -705,8 +856,7 @@ handle_new_error:
 
 /********************************************************************//**
 Create a prebuilt struct for a MySQL table handle.
-@return	own: a prebuilt struct */
-UNIV_INTERN
+@return own: a prebuilt struct */
 row_prebuilt_t*
 row_create_prebuilt(
 /*================*/
@@ -714,6 +864,8 @@ row_create_prebuilt(
 	ulint		mysql_row_len)	/*!< in: length in bytes of a row in
 					the MySQL format */
 {
+	DBUG_ENTER("row_create_prebuilt");
+
 	row_prebuilt_t*	prebuilt;
 	mem_heap_t*	heap;
 	dict_index_t*	clust_index;
@@ -723,7 +875,8 @@ row_create_prebuilt(
 	uint		srch_key_len = 0;
 	ulint		search_tuple_n_fields;
 
-	search_tuple_n_fields = 2 * dict_table_get_n_cols(table);
+	search_tuple_n_fields = 2 * (dict_table_get_n_cols(table)
+				     + dict_table_get_n_v_cols(table));
 
 	clust_index = dict_table_get_first_index(table);
 
@@ -763,9 +916,12 @@ row_create_prebuilt(
 	sure if this prebuilt instance is going to be \
 	used in inserts */ \
 	+ (mysql_row_len < 256 ? mysql_row_len : 0) \
-	+ DTUPLE_EST_ALLOC(dict_table_get_n_cols(table)) \
+	+ DTUPLE_EST_ALLOC(dict_table_get_n_cols(table) \
+			   + dict_table_get_n_v_cols(table)) \
 	+ sizeof(que_fork_t) \
 	+ sizeof(que_thr_t) \
+	+ sizeof(*prebuilt->pcur) \
+	+ sizeof(*prebuilt->clust_pcur) \
 	)
 
 	/* Calculate size of key buffer used to store search key in
@@ -783,12 +939,13 @@ row_create_prebuilt(
 						== MAX_REF_PARTS););
 		uint temp_len = 0;
 		for (uint i = 0; i < temp_index->n_uniq; i++) {
-			if (temp_index->fields[i].col->mtype == DATA_INT) {
+			ulint type = temp_index->fields[i].col->mtype;
+			if (type == DATA_INT) {
 				temp_len +=
 					temp_index->fields[i].fixed_len;
 			}
 		}
-		srch_key_len = max(srch_key_len,temp_len);
+		srch_key_len = std::max(srch_key_len,temp_len);
 	}
 
 	ut_a(srch_key_len <= MAX_SRCH_KEY_VAL_BUFFER);
@@ -824,8 +981,14 @@ row_create_prebuilt(
 		prebuilt->srch_key_val2 = NULL;
 	}
 
-	btr_pcur_reset(&prebuilt->pcur);
-	btr_pcur_reset(&prebuilt->clust_pcur);
+	prebuilt->pcur = static_cast<btr_pcur_t*>(
+				mem_heap_zalloc(prebuilt->heap,
+					       sizeof(btr_pcur_t)));
+	prebuilt->clust_pcur = static_cast<btr_pcur_t*>(
+					mem_heap_zalloc(prebuilt->heap,
+						       sizeof(btr_pcur_t)));
+	btr_pcur_reset(prebuilt->pcur);
+	btr_pcur_reset(prebuilt->clust_pcur);
 
 	prebuilt->select_lock_type = LOCK_NONE;
 	prebuilt->stored_select_lock_type = LOCK_NONE_UNSET;
@@ -852,47 +1015,38 @@ row_create_prebuilt(
 
 	prebuilt->mysql_row_len = mysql_row_len;
 
-	return(prebuilt);
+	prebuilt->ins_sel_stmt = false;
+	prebuilt->session = NULL;
+
+	prebuilt->fts_doc_id_in_read_set = 0;
+	prebuilt->blob_heap = NULL;
+
+	prebuilt->m_no_prefetch = false;
+	prebuilt->m_read_virtual_key = false;
+
+	DBUG_RETURN(prebuilt);
 }
 
 /********************************************************************//**
 Free a prebuilt struct for a MySQL table handle. */
-UNIV_INTERN
 void
 row_prebuilt_free(
 /*==============*/
 	row_prebuilt_t*	prebuilt,	/*!< in, own: prebuilt struct */
 	ibool		dict_locked)	/*!< in: TRUE=data dictionary locked */
 {
-	ulint	i;
+	DBUG_ENTER("row_prebuilt_free");
 
-	if (UNIV_UNLIKELY
-	    (prebuilt->magic_n != ROW_PREBUILT_ALLOCATED
-	     || prebuilt->magic_n2 != ROW_PREBUILT_ALLOCATED)) {
-
-		fprintf(stderr,
-			"InnoDB: Error: trying to free a corrupt\n"
-			"InnoDB: table handle. Magic n %lu,"
-			" magic n2 %lu, table name ",
-			(ulong) prebuilt->magic_n,
-			(ulong) prebuilt->magic_n2);
-		ut_print_name(stderr, NULL, TRUE, prebuilt->table->name);
-		putc('\n', stderr);
-
-		mem_analyze_corruption(prebuilt);
-
-		ut_error;
-	}
+	ut_a(prebuilt->magic_n == ROW_PREBUILT_ALLOCATED);
+	ut_a(prebuilt->magic_n2 == ROW_PREBUILT_ALLOCATED);
 
 	prebuilt->magic_n = ROW_PREBUILT_FREED;
 	prebuilt->magic_n2 = ROW_PREBUILT_FREED;
 
-	btr_pcur_reset(&prebuilt->pcur);
-	btr_pcur_reset(&prebuilt->clust_pcur);
+	btr_pcur_reset(prebuilt->pcur);
+	btr_pcur_reset(prebuilt->clust_pcur);
 
-	if (prebuilt->mysql_template) {
-		mem_free(prebuilt->mysql_template);
-	}
+	ut_free(prebuilt->mysql_template);
 
 	if (prebuilt->ins_graph) {
 		que_graph_free_recursive(prebuilt->ins_graph);
@@ -907,7 +1061,7 @@ row_prebuilt_free(
 	}
 
 	if (prebuilt->blob_heap) {
-		mem_heap_free(prebuilt->blob_heap);
+		row_mysql_prebuilt_free_blob_heap(prebuilt);
 	}
 
 	if (prebuilt->old_vers_heap) {
@@ -918,44 +1072,38 @@ row_prebuilt_free(
 		byte*	base = prebuilt->fetch_cache[0] - 4;
 		byte*	ptr = base;
 
-		for (i = 0; i < MYSQL_FETCH_CACHE_SIZE; i++) {
-			byte*	row;
-			ulint	magic1;
-			ulint	magic2;
-
-			magic1 = mach_read_from_4(ptr);
+		for (ulint i = 0; i < MYSQL_FETCH_CACHE_SIZE; i++) {
+			ulint	magic1 = mach_read_from_4(ptr);
+			ut_a(magic1 == ROW_PREBUILT_FETCH_MAGIC_N);
 			ptr += 4;
 
-			row = ptr;
+			byte*	row = ptr;
+			ut_a(row == prebuilt->fetch_cache[i]);
 			ptr += prebuilt->mysql_row_len;
 
-			magic2 = mach_read_from_4(ptr);
+			ulint	magic2 = mach_read_from_4(ptr);
+			ut_a(magic2 == ROW_PREBUILT_FETCH_MAGIC_N);
 			ptr += 4;
-
-			if (ROW_PREBUILT_FETCH_MAGIC_N != magic1
-			    || row != prebuilt->fetch_cache[i]
-			    || ROW_PREBUILT_FETCH_MAGIC_N != magic2) {
-
-				fputs("InnoDB: Error: trying to free"
-					" a corrupt fetch buffer.\n", stderr);
-
-				mem_analyze_corruption(base);
-				ut_error;
-			}
 		}
 
-		mem_free(base);
+		ut_free(base);
 	}
 
-	dict_table_close(prebuilt->table, dict_locked, TRUE);
+	if (prebuilt->rtr_info) {
+		rtr_clean_rtr_info(prebuilt->rtr_info, true);
+	}
+	if (prebuilt->table) {
+		dict_table_close(prebuilt->table, dict_locked, TRUE);
+	}
 
 	mem_heap_free(prebuilt->heap);
+
+	DBUG_VOID_RETURN;
 }
 
 /*********************************************************************//**
 Updates the transaction pointers in query graphs stored in the prebuilt
 struct. */
-UNIV_INTERN
 void
 row_update_prebuilt_trx(
 /*====================*/
@@ -963,29 +1111,9 @@ row_update_prebuilt_trx(
 					in MySQL handle */
 	trx_t*		trx)		/*!< in: transaction handle */
 {
-	if (trx->magic_n != TRX_MAGIC_N) {
-		fprintf(stderr,
-			"InnoDB: Error: trying to use a corrupt\n"
-			"InnoDB: trx handle. Magic n %lu\n",
-			(ulong) trx->magic_n);
-
-		mem_analyze_corruption(trx);
-
-		ut_error;
-	}
-
-	if (prebuilt->magic_n != ROW_PREBUILT_ALLOCATED) {
-		fprintf(stderr,
-			"InnoDB: Error: trying to use a corrupt\n"
-			"InnoDB: table handle. Magic n %lu, table name ",
-			(ulong) prebuilt->magic_n);
-		ut_print_name(stderr, trx, TRUE, prebuilt->table->name);
-		putc('\n', stderr);
-
-		mem_analyze_corruption(prebuilt);
-
-		ut_error;
-	}
+	ut_a(trx->magic_n == TRX_MAGIC_N);
+	ut_a(prebuilt->magic_n == ROW_PREBUILT_ALLOCATED);
+	ut_a(prebuilt->magic_n2 == ROW_PREBUILT_ALLOCATED);
 
 	prebuilt->trx = trx;
 
@@ -1006,7 +1134,7 @@ row_update_prebuilt_trx(
 Gets pointer to a prebuilt dtuple used in insertions. If the insert graph
 has not yet been built in the prebuilt struct, then this function first
 builds it.
-@return	prebuilt dtuple; the column type information is also set in it */
+@return prebuilt dtuple; the column type information is also set in it */
 static
 dtuple_t*
 row_get_prebuilt_insert_row(
@@ -1054,7 +1182,9 @@ row_get_prebuilt_insert_row(
 
 	dtuple_t*	row;
 
-	row = dtuple_create(prebuilt->heap, dict_table_get_n_cols(table));
+	row = dtuple_create_with_vcol(
+			prebuilt->heap, dict_table_get_n_cols(table),
+			dict_table_get_n_v_cols(table));
 
 	dict_table_copy_types(row, table);
 
@@ -1064,7 +1194,7 @@ row_get_prebuilt_insert_row(
 		que_node_get_parent(
 			pars_complete_graph_for_exec(
 				node,
-				prebuilt->trx, prebuilt->heap)));
+				prebuilt->trx, prebuilt->heap, prebuilt)));
 
 	prebuilt->ins_graph->state = QUE_FORK_ACTIVE;
 
@@ -1088,8 +1218,8 @@ row_update_statistics_if_needed(
 	if (!table->stat_initialized) {
 		DBUG_EXECUTE_IF(
 			"test_upd_stats_if_needed_not_inited",
-			fprintf(stderr, "test_upd_stats_if_needed_not_inited "
-				"was executed\n");
+			fprintf(stderr, "test_upd_stats_if_needed_not_inited"
+				" was executed\n");
 		);
 		return;
 	}
@@ -1112,8 +1242,10 @@ row_update_statistics_if_needed(
 	We calculate statistics at most every 16th round, since we may have
 	a counter table which is very small and updated very often. */
 	ib_uint64_t threshold= 16 + n_rows / 16; /* 6.25% */
-	if (srv_stats_modified_counter)
-		threshold= ut_min(srv_stats_modified_counter, threshold);
+
+	if (srv_stats_modified_counter) {
+		threshold= ut_min((ib_uint64_t)srv_stats_modified_counter, threshold);
+	}
 
 	if (counter > threshold) {
 
@@ -1129,8 +1261,7 @@ AUTO_INC lock gives exclusive access to the auto-inc counter of the
 table. The lock is reserved only for the duration of an SQL statement.
 It is not compatible with another AUTO_INC or exclusive lock on the
 table.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
 dberr_t
 row_lock_table_autoinc_for_mysql(
 /*=============================*/
@@ -1144,11 +1275,9 @@ row_lock_table_autoinc_for_mysql(
 	dberr_t			err;
 	ibool			was_lock_wait;
 
-	ut_ad(trx);
-
 	/* If we already hold an AUTOINC lock on the table then do nothing.
-        Note: We peek at the value of the current owner without acquiring
-	the lock mutex. **/
+	Note: We peek at the value of the current owner without acquiring
+	the lock mutex. */
 	if (trx == table->autoinc_trx) {
 
 		return(DB_SUCCESS);
@@ -1173,7 +1302,7 @@ run_again:
 	/* It may be that the current session has not yet started
 	its transaction, or it has been committed: */
 
-	trx_start_if_not_started_xa(trx);
+	trx_start_if_not_started_xa(trx, true);
 
 	err = lock_table(0, prebuilt->table, LOCK_AUTO_INC, thr);
 
@@ -1202,8 +1331,7 @@ run_again:
 
 /*********************************************************************//**
 Sets a table lock on the table mentioned in prebuilt.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
 dberr_t
 row_lock_table_for_mysql(
 /*=====================*/
@@ -1221,8 +1349,6 @@ row_lock_table_for_mysql(
 	dberr_t		err;
 	ibool		was_lock_wait;
 
-	ut_ad(trx);
-
 	trx->op_info = "setting table lock";
 
 	if (prebuilt->sel_graph == NULL) {
@@ -1244,7 +1370,7 @@ run_again:
 	/* It may be that the current session has not yet started
 	its transaction, or it has been committed: */
 
-	trx_start_if_not_started_xa(trx);
+	trx_start_if_not_started_xa(trx, false);
 
 	if (table) {
 		err = lock_table(
@@ -1281,16 +1407,284 @@ run_again:
 	return(err);
 }
 
-/*********************************************************************//**
-Does an insert for MySQL.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+/** Perform explicit rollback in absence of UNDO logs.
+@param[in]	index	apply rollback action on this index
+@param[in]	entry	entry to remove/rollback.
+@param[in,out]	thr	thread handler.
+@param[in,out]	mtr	mini transaction.
+@return error code or DB_SUCCESS */
+static
 dberr_t
-row_insert_for_mysql(
-/*=================*/
-	byte*		mysql_rec,	/*!< in: row in the MySQL format */
-	row_prebuilt_t*	prebuilt)	/*!< in: prebuilt struct in MySQL
-					handle */
+row_explicit_rollback(
+	dict_index_t*		index,
+	const dtuple_t*		entry,
+	que_thr_t*		thr,
+	mtr_t*			mtr)
+{
+	btr_cur_t	cursor;
+	ulint		flags;
+	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
+	ulint*		offsets;
+	mem_heap_t*	heap = NULL;
+	dberr_t		err = DB_SUCCESS;
+
+	rec_offs_init(offsets_);
+	flags = BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG;
+
+	err = btr_cur_search_to_nth_level_with_no_latch(
+		index, 0, entry, PAGE_CUR_LE,
+		&cursor, __FILE__, __LINE__, mtr);
+
+	offsets = rec_get_offsets(
+		btr_cur_get_rec(&cursor), index, offsets_,
+		ULINT_UNDEFINED, &heap);
+
+	if (dict_index_is_clust(index)) {
+		err = btr_cur_del_mark_set_clust_rec(
+			flags, btr_cur_get_block(&cursor),
+			btr_cur_get_rec(&cursor), index,
+			offsets, thr, entry, mtr);
+	} else {
+		err = btr_cur_del_mark_set_sec_rec(
+			flags, &cursor, TRUE, thr, mtr);
+	}
+	ut_ad(err == DB_SUCCESS);
+
+	/* Void call just to set mtr modification flag
+	to true failing which block is not scheduled for flush*/
+	byte* log_ptr = mlog_open(mtr, 0);
+	ut_ad(log_ptr == NULL);
+	if (log_ptr != NULL) {
+		/* To keep complier happy. */
+		mlog_close(mtr, log_ptr);
+	}
+
+	if (heap != NULL) {
+		mem_heap_free(heap);
+	}
+
+	return(err);
+}
+
+/** Convert a row in the MySQL format to a row in the Innobase format.
+This is specialized function used for intrinsic table with reduce branching.
+@param[in,out]	row		row where field values are copied.
+@param[in]	prebuilt	prebuilt handler
+@param[in]	mysql_rec	row in mysql format. */
+static
+void
+row_mysql_to_innobase(
+	dtuple_t*		row,
+	row_prebuilt_t*		prebuilt,
+	const byte*		mysql_rec)
+{
+	ut_ad(dict_table_is_intrinsic(prebuilt->table));
+
+	const byte*		ptr = mysql_rec;
+
+	for (ulint i = 0; i < prebuilt->n_template; i++) {
+		const mysql_row_templ_t*	templ;
+		dfield_t*			dfield;
+
+		templ = prebuilt->mysql_template + i;
+		dfield = dtuple_get_nth_field(row, i);
+
+		/* Check if column has null value. */
+		if (templ->mysql_null_bit_mask != 0) {
+			if (mysql_rec[templ->mysql_null_byte_offset]
+			    & (byte) (templ->mysql_null_bit_mask)) {
+				dfield_set_null(dfield);
+				continue;
+			}
+		}
+
+		/* Extract the column value. */
+		ptr = mysql_rec + templ->mysql_col_offset;
+		const dtype_t*	dtype = dfield_get_type(dfield);
+		ulint		col_len = templ->mysql_col_len;
+
+		ut_ad(dtype->mtype == DATA_INT
+		      || dtype->mtype == DATA_CHAR
+		      || dtype->mtype == DATA_MYSQL
+		      || dtype->mtype == DATA_VARCHAR
+		      || dtype->mtype == DATA_VARMYSQL
+		      || dtype->mtype == DATA_BINARY
+		      || dtype->mtype == DATA_FIXBINARY
+		      || dtype->mtype == DATA_FLOAT
+		      || dtype->mtype == DATA_DOUBLE
+		      || dtype->mtype == DATA_DECIMAL
+		      || dtype->mtype == DATA_BLOB
+		      || dtype->mtype == DATA_GEOMETRY
+		      || dtype->mtype == DATA_POINT
+		      || dtype->mtype == DATA_VAR_POINT);
+
+#ifdef UNIV_DEBUG
+		if (dtype_get_mysql_type(dtype) == DATA_MYSQL_TRUE_VARCHAR) {
+			ut_ad(templ->mysql_length_bytes > 0);
+		}
+#endif /* UNIV_DEBUG */
+
+		/* For now varchar field this has to be always 0 so
+		memcpy of 0 bytes shouldn't affect the original col_len. */
+		if (dtype->mtype == DATA_INT) {
+			/* Convert and Store in big-endian. */
+			byte*	buf = prebuilt->ins_upd_rec_buff
+				+ templ->mysql_col_offset;
+			byte*	copy_to = buf + col_len;
+			for (;;) {
+				copy_to--;
+				*copy_to = *ptr;
+				if (copy_to == buf) {
+					break;
+				}
+				ptr++;
+			}
+
+			if (!(dtype->prtype & DATA_UNSIGNED)) {
+				*buf ^= 128;
+			}
+
+			ptr = buf;
+			buf += col_len;
+		} else if (dtype_get_mysql_type(dtype) ==
+				DATA_MYSQL_TRUE_VARCHAR) {
+
+			ut_ad(dtype->mtype == DATA_VARCHAR
+			      || dtype->mtype == DATA_VARMYSQL
+			      || dtype->mtype == DATA_BINARY);
+
+			col_len = 0;
+			row_mysql_read_true_varchar(
+				&col_len, ptr, templ->mysql_length_bytes);
+			ptr += templ->mysql_length_bytes;
+		} else if (dtype->mtype == DATA_BLOB) {
+			ptr = row_mysql_read_blob_ref(&col_len, ptr, col_len);
+		} else if (DATA_GEOMETRY_MTYPE(dtype->mtype)) {
+			/* Point, Var-Point, Geometry */
+			ptr = row_mysql_read_geometry(&col_len, ptr, col_len);
+		}
+
+		dfield_set_data(dfield, ptr, col_len);
+	}
+}
+
+/** Does an insert for MySQL using cursor interface.
+Cursor interface is low level interface that directly interacts at
+Storage Level by-passing all the locking and transaction semantics.
+For InnoDB case, this will also by-pass hidden column generation.
+@param[in]	mysql_rec	row in the MySQL format
+@param[in,out]	prebuilt	prebuilt struct in MySQL handle
+@return error code or DB_SUCCESS */
+static
+dberr_t
+row_insert_for_mysql_using_cursor(
+	const byte*		mysql_rec,
+	row_prebuilt_t*		prebuilt)
+{
+	dberr_t		err	= DB_SUCCESS;
+	ins_node_t*	node	= NULL;
+	que_thr_t*	thr	= NULL;
+	mtr_t		mtr;
+
+	/* Step-1: Get the reference of row to insert. */
+	row_get_prebuilt_insert_row(prebuilt);
+	node = prebuilt->ins_node;
+	thr = que_fork_get_first_thr(prebuilt->ins_graph);
+
+	/* Step-2: Convert row from MySQL row format to InnoDB row format. */
+	row_mysql_to_innobase(node->row, prebuilt, mysql_rec);
+
+	/* Step-3: Append row-id index is not unique. */
+	dict_index_t*	clust_index = dict_table_get_first_index(node->table);
+
+	if (!dict_index_is_unique(clust_index)) {
+		dict_sys_write_row_id(
+			node->row_id_buf,
+			dict_table_get_next_table_sess_row_id(node->table));
+	}
+
+	trx_write_trx_id(node->trx_id_buf,
+			 dict_table_get_next_table_sess_trx_id(node->table));
+
+	/* Step-4: Iterate over all the indexes and insert entries. */
+	dict_index_t*	inserted_upto = NULL;
+	node->entry = UT_LIST_GET_FIRST(node->entry_list);
+	for (dict_index_t* index = UT_LIST_GET_FIRST(node->table->indexes);
+	     index != NULL;
+	     index = UT_LIST_GET_NEXT(indexes, index),
+	     node->entry = UT_LIST_GET_NEXT(tuple_list, node->entry)) {
+
+		node->index = index;
+		err = row_ins_index_entry_set_vals(
+			node->index, node->entry, node->row);
+		if (err != DB_SUCCESS) {
+			break;
+		}
+
+		if (dict_index_is_clust(index)) {
+			err = row_ins_clust_index_entry(
+				node->index, node->entry, thr, 0, false);
+		} else {
+			err = row_ins_sec_index_entry(
+				node->index, node->entry, thr, false);
+		}
+
+		if (err == DB_SUCCESS) {
+			inserted_upto = index;
+		} else {
+			break;
+		}
+	}
+
+	/* Step-5: If error is encountered while inserting entries to any
+	of the index then entries inserted to previous indexes are removed
+	explicity. Automatic rollback is not in action as UNDO logs are
+	turned-off. */
+	if (err != DB_SUCCESS) {
+
+		node->entry = UT_LIST_GET_FIRST(node->entry_list);
+
+		mtr_start(&mtr);
+		dict_disable_redo_if_temporary(node->table, &mtr);
+
+		for (dict_index_t* index =
+			UT_LIST_GET_FIRST(node->table->indexes);
+		     inserted_upto != NULL;
+		     index = UT_LIST_GET_NEXT(indexes, index),
+		     node->entry = UT_LIST_GET_NEXT(tuple_list, node->entry)) {
+
+			row_explicit_rollback(index, node->entry, thr, &mtr);
+
+			if (index == inserted_upto) {
+				break;
+			}
+		}
+
+		mtr_commit(&mtr);
+	} else {
+		/* Not protected by dict_table_stats_lock() for performance
+		reasons, we would rather get garbage in stat_n_rows (which is
+		just an estimate anyway) than protecting the following code
+		, with a latch. */
+		dict_table_n_rows_inc(node->table);
+
+		srv_stats.n_rows_inserted.inc();
+	}
+
+	thr_get_trx(thr)->error_state = DB_SUCCESS;
+	return(err);
+}
+
+/** Does an insert for MySQL using INSERT graph. This function will run/execute
+INSERT graph.
+@param[in]	mysql_rec	row in the MySQL format
+@param[in,out]	prebuilt	prebuilt struct in MySQL handle
+@return error code or DB_SUCCESS */
+static
+dberr_t
+row_insert_for_mysql_using_ins_graph(
+	const byte*	mysql_rec,
+	row_prebuilt_t*	prebuilt)
 {
 	trx_savept_t	savept;
 	que_thr_t*	thr;
@@ -1300,21 +1694,26 @@ row_insert_for_mysql(
 	ins_node_t*	node		= prebuilt->ins_node;
 	dict_table_t*	table		= prebuilt->table;
 
+	/* FIX_ME: This blob heap is used to compensate an issue in server
+	for virtual column blob handling */
+	mem_heap_t*	blob_heap = NULL;
+
 	ut_ad(trx);
+	ut_a(prebuilt->magic_n == ROW_PREBUILT_ALLOCATED);
+	ut_a(prebuilt->magic_n2 == ROW_PREBUILT_ALLOCATED);
 
 	if (dict_table_is_discarded(prebuilt->table)) {
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"The table %s doesn't have a corresponding "
-			"tablespace, it was discarded.",
-			prebuilt->table->name);
+
+		ib::error() << "The table " << prebuilt->table->name
+			<< " doesn't have a corresponding tablespace, it was"
+			" discarded.";
 
 		return(DB_TABLESPACE_DELETED);
 
 	} else if (prebuilt->table->ibd_file_missing) {
 
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			".ibd file is missing for table %s",
-			prebuilt->table->name);
+		ib::error() << ".ibd file is missing for table "
+			<< prebuilt->table->name;
 
 		return(DB_TABLESPACE_NOT_FOUND);
 	} else if (prebuilt->table->is_encrypted) {
@@ -1324,37 +1723,46 @@ row_insert_for_mysql(
 			" used encryption algorithm or method does not match.",
 			prebuilt->table->name, prebuilt->table->space);
 		return(DB_DECRYPTION_FAILED);
-	} else if (prebuilt->magic_n != ROW_PREBUILT_ALLOCATED) {
-		fprintf(stderr,
-			"InnoDB: Error: trying to free a corrupt\n"
-			"InnoDB: table handle. Magic n %lu, table name ",
-			(ulong) prebuilt->magic_n);
-		ut_print_name(stderr, trx, TRUE, prebuilt->table->name);
-		putc('\n', stderr);
-
-		mem_analyze_corruption(prebuilt);
-
-		ut_error;
 	} else if (srv_force_recovery) {
-		fputs("InnoDB: innodb_force_recovery is on: we do not allow\n"
-		      "InnoDB: database modifications by the user. Shut down\n"
-		      "InnoDB: mysqld and edit my.cnf so that"
-		      "InnoDB: innodb_force_... is removed.\n",
-		      stderr);
 
+		ib::error() << MODIFICATIONS_NOT_ALLOWED_MSG_FORCE_RECOVERY;
 		return(DB_READ_ONLY);
 	}
+	DBUG_EXECUTE_IF("mark_table_corrupted", {
+		/* Mark the table corrupted for the clustered index */
+		dict_index_t*	index = dict_table_get_first_index(table);
+		ut_ad(dict_index_is_clust(index));
+		dict_set_corrupted(index, trx, "INSERT TABLE"); });
+
+	if (dict_table_is_corrupted(table)) {
+
+		ib::error() << "Table " << table->name << " is corrupt.";
+		return(DB_TABLE_CORRUPT);
+	}
+
+	DBUG_EXECUTE_IF("mark_table_corrupted", {
+		/* Mark the table corrupted for the clustered index */
+		dict_index_t*	index = dict_table_get_first_index(table);
+		ut_ad(dict_index_is_clust(index));
+		dict_set_corrupted(index, trx, "INSERT TABLE"); });
+
+	if (dict_table_is_corrupted(table)) {
+
+		ib::error() << "Table " << table->name << " is corrupt.";
+		return(DB_TABLE_CORRUPT);
+	}
 
 	trx->op_info = "inserting";
 
 	row_mysql_delay_if_needed();
 
-	trx_start_if_not_started_xa(trx);
+	trx_start_if_not_started_xa(trx, true);
 
 	row_get_prebuilt_insert_row(prebuilt);
 	node = prebuilt->ins_node;
 
-	row_mysql_convert_row_to_innobase(node->row, prebuilt, mysql_rec);
+	row_mysql_convert_row_to_innobase(node->row, prebuilt, mysql_rec,
+					  &blob_heap);
 
 	savept = trx_savept_take(trx);
 
@@ -1395,20 +1803,26 @@ error_exit:
 			goto run_again;
 		}
 
+		node->duplicate = NULL;
 		trx->op_info = "";
 
+		if (blob_heap != NULL) {
+			mem_heap_free(blob_heap);
+		}
+
 		return(err);
 	}
 
+	node->duplicate = NULL;
+
 	if (dict_table_has_fts_index(table)) {
-		doc_id_t        doc_id;
+		doc_id_t	doc_id;
 
 		/* Extract the doc id from the hidden FTS column */
 		doc_id = fts_get_doc_id_from_row(table, node->row);
 
 		if (doc_id <= 0) {
-			fprintf(stderr,
-				"InnoDB: FTS Doc ID must be large than 0 \n");
+			ib::error() << "FTS Doc ID must be large than 0";
 			err = DB_FTS_INVALID_DOCID;
 			trx->error_state = DB_FTS_INVALID_DOCID;
 			goto error_exit;
@@ -1419,12 +1833,10 @@ error_exit:
 				= table->fts->cache->next_doc_id;
 
 			if (doc_id < next_doc_id) {
-				fprintf(stderr,
-					"InnoDB: FTS Doc ID must be large than"
-					" " UINT64PF " for table",
-					next_doc_id - 1);
-				ut_print_name(stderr, trx, TRUE, table->name);
-				putc('\n', stderr);
+
+				ib::error() << "FTS Doc ID must be large than "
+					<< next_doc_id - 1 << " for table "
+					<< table->name;
 
 				err = DB_FTS_INVALID_DOCID;
 				trx->error_state = DB_FTS_INVALID_DOCID;
@@ -1438,13 +1850,12 @@ error_exit:
 
 			if (next_doc_id > 1
 			    && doc_id - next_doc_id >= FTS_DOC_ID_MAX_STEP) {
-				fprintf(stderr,
-					"InnoDB: Doc ID " UINT64PF " is too"
-					" big. Its difference with largest"
-					" used Doc ID " UINT64PF " cannot"
-					" exceed or equal to %d\n",
-					doc_id, next_doc_id - 1,
-					FTS_DOC_ID_MAX_STEP);
+				 ib::error() << "Doc ID " << doc_id
+					<< " is too big. Its difference with"
+					" largest used Doc ID "
+					<< next_doc_id - 1 << " cannot"
+					" exceed or equal to "
+					<< FTS_DOC_ID_MAX_STEP;
 				err = DB_FTS_INVALID_DOCID;
 				trx->error_state = DB_FTS_INVALID_DOCID;
 				goto error_exit;
@@ -1459,9 +1870,9 @@ error_exit:
 	que_thr_stop_for_mysql_no_error(thr, trx);
 
 	if (table->is_system_db) {
-		srv_stats.n_system_rows_inserted.add((size_t)trx->id, 1);
+		srv_stats.n_system_rows_inserted.inc();
 	} else {
-		srv_stats.n_rows_inserted.add((size_t)trx->id, 1);
+		srv_stats.n_rows_inserted.inc();
 	}
 
 	/* Not protected by dict_table_stats_lock() for performance
@@ -1478,12 +1889,35 @@ error_exit:
 	row_update_statistics_if_needed(table);
 	trx->op_info = "";
 
+	if (blob_heap != NULL) {
+		mem_heap_free(blob_heap);
+	}
+
 	return(err);
 }
 
+/** Does an insert for MySQL.
+@param[in]	mysql_rec	row in the MySQL format
+@param[in,out]	prebuilt	prebuilt struct in MySQL handle
+@return error code or DB_SUCCESS*/
+dberr_t
+row_insert_for_mysql(
+	const byte*		mysql_rec,
+	row_prebuilt_t*		prebuilt)
+{
+	/* For intrinsic tables there a lot of restrictions that can be
+	relaxed including locking of table, transaction handling, etc.
+	Use direct cursor interface for inserting to intrinsic tables. */
+	if (dict_table_is_intrinsic(prebuilt->table)) {
+		return(row_insert_for_mysql_using_cursor(mysql_rec, prebuilt));
+	} else {
+		return(row_insert_for_mysql_using_ins_graph(
+			mysql_rec, prebuilt));
+	}
+}
+
 /*********************************************************************//**
 Builds a dummy query graph used in selects. */
-UNIV_INTERN
 void
 row_prebuild_sel_graph(
 /*===================*/
@@ -1502,7 +1936,8 @@ row_prebuild_sel_graph(
 			que_node_get_parent(
 				pars_complete_graph_for_exec(
 					static_cast<sel_node_t*>(node),
-					prebuilt->trx, prebuilt->heap)));
+					prebuilt->trx, prebuilt->heap,
+					prebuilt)));
 
 		prebuilt->sel_graph->state = QUE_FORK_ACTIVE;
 	}
@@ -1511,8 +1946,7 @@ row_prebuild_sel_graph(
 /*********************************************************************//**
 Creates an query graph node of 'update' type to be used in the MySQL
 interface.
-@return	own: update node */
-UNIV_INTERN
+@return own: update node */
 upd_node_t*
 row_create_update_node_for_mysql(
 /*=============================*/
@@ -1521,6 +1955,8 @@ row_create_update_node_for_mysql(
 {
 	upd_node_t*	node;
 
+	DBUG_ENTER("row_create_update_node_for_mysql");
+
 	node = upd_node_create(heap);
 
 	node->in_mysql_interface = TRUE;
@@ -1528,28 +1964,34 @@ row_create_update_node_for_mysql(
 	node->searched_update = FALSE;
 	node->select = NULL;
 	node->pcur = btr_pcur_create_for_mysql();
+
+	DBUG_PRINT("info", ("node: %p, pcur: %p", node, node->pcur));
+
 	node->table = table;
 
-	node->update = upd_create(dict_table_get_n_cols(table), heap);
+	node->update = upd_create(dict_table_get_n_cols(table)
+				  + dict_table_get_n_v_cols(table), heap);
 
 	node->update_n_fields = dict_table_get_n_cols(table);
 
-	UT_LIST_INIT(node->columns);
+	UT_LIST_INIT(node->columns, &sym_node_t::col_var_list);
+
 	node->has_clust_rec_x_lock = TRUE;
 	node->cmpl_info = 0;
 
 	node->table_sym = NULL;
 	node->col_assign_list = NULL;
+	node->fts_doc_id = FTS_NULL_DOC_ID;
+	node->fts_next_doc_id = UINT64_UNDEFINED;
 
-	return(node);
+	DBUG_RETURN(node);
 }
 
 /*********************************************************************//**
 Gets pointer to a prebuilt update vector used in updates. If the update
 graph has not yet been built in the prebuilt struct, then this function
 first builds it.
-@return	prebuilt update vector */
-UNIV_INTERN
+@return prebuilt update vector */
 upd_t*
 row_get_prebuilt_update_vector(
 /*===========================*/
@@ -1574,7 +2016,8 @@ row_get_prebuilt_update_vector(
 			que_node_get_parent(
 				pars_complete_graph_for_exec(
 					static_cast<upd_node_t*>(node),
-					prebuilt->trx, prebuilt->heap)));
+					prebuilt->trx, prebuilt->heap,
+					prebuilt)));
 
 		prebuilt->upd_graph->state = QUE_FORK_ACTIVE;
 	}
@@ -1593,8 +2036,9 @@ row_fts_do_update(
 	doc_id_t	old_doc_id,	/* in: old document id */
 	doc_id_t	new_doc_id)	/* in: new document id */
 {
-	if (trx->fts_next_doc_id) {
-		fts_trx_add_op(trx, table, old_doc_id, FTS_DELETE, NULL);
+	fts_trx_add_op(trx, table, old_doc_id, FTS_DELETE, NULL);
+
+	if (new_doc_id != FTS_NULL_DOC_ID) {
 		fts_trx_add_op(trx, table, new_doc_id, FTS_INSERT, NULL);
 	}
 }
@@ -1606,34 +2050,28 @@ static
 dberr_t
 row_fts_update_or_delete(
 /*=====================*/
-	row_prebuilt_t*	prebuilt)	/* in: prebuilt struct in MySQL
+	trx_t*		trx,
+	upd_node_t*	node)	/* in: prebuilt struct in MySQL
 					handle */
 {
-	trx_t*		trx = prebuilt->trx;
-	dict_table_t*	table = prebuilt->table;
-	upd_node_t*	node = prebuilt->upd_node;
-	doc_id_t	old_doc_id = prebuilt->fts_doc_id;
+	dict_table_t*	table = node->table;
+	doc_id_t	old_doc_id = node->fts_doc_id;
+	DBUG_ENTER("row_fts_update_or_delete");
 
-	ut_a(dict_table_has_fts_index(prebuilt->table));
+	ut_a(dict_table_has_fts_index(node->table));
 
 	/* Deletes are simple; get them out of the way first. */
 	if (node->is_delete) {
 		/* A delete affects all FTS indexes, so we pass NULL */
 		fts_trx_add_op(trx, table, old_doc_id, FTS_DELETE, NULL);
 	} else {
-		doc_id_t	new_doc_id;
-
-		new_doc_id = fts_read_doc_id((byte*) &trx->fts_next_doc_id);
-
-		if (new_doc_id == 0) {
-			fprintf(stderr, " InnoDB FTS: Doc ID cannot be 0 \n");
-			return(DB_FTS_INVALID_DOCID);
-		}
+		doc_id_t	new_doc_id = node->fts_next_doc_id;
+		ut_ad(new_doc_id != UINT64_UNDEFINED);
 
 		row_fts_do_update(trx, table, old_doc_id, new_doc_id);
 	}
 
-	return(DB_SUCCESS);
+	DBUG_RETURN(DB_SUCCESS);
 }
 
 /*********************************************************************//**
@@ -1664,9 +2102,7 @@ init_fts_doc_id_for_ref(
 
 		foreign = *it;
 
-		if (foreign->foreign_table == NULL) {
-			break;
-		}
+		ut_ad(foreign->foreign_table != NULL);
 
 		if (foreign->foreign_table->fts != NULL) {
 			fts_init_doc_id(foreign->foreign_table);
@@ -1680,48 +2116,366 @@ init_fts_doc_id_for_ref(
 	}
 }
 
-/*********************************************************************//**
-Does an update or delete of a row for MySQL.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+/* A functor for decrementing counters. */
+class ib_dec_counter {
+public:
+	ib_dec_counter() {}
+
+	void operator() (upd_node_t* node) {
+		ut_ad(node->table->n_foreign_key_checks_running > 0);
+		os_atomic_decrement_ulint(
+			&node->table->n_foreign_key_checks_running, 1);
+	}
+};
+
+
+typedef	std::vector<btr_pcur_t, ut_allocator<btr_pcur_t> >	cursors_t;
+
+/** Delete row from table (corresponding entries from all the indexes).
+Function will maintain cursor to the entries to invoke explicity rollback
+just incase update action following delete fails.
+
+@param[in]	node		update node carrying information to delete.
+@param[out]	delete_entries	vector of cursor to deleted entries.
+@param[in]	restore_delete	if true, then restore DELETE records by
+				unmarking delete.
+@return error code or DB_SUCCESS */
+static
 dberr_t
-row_update_for_mysql(
-/*=================*/
-	byte*		mysql_rec,	/*!< in: the row to be updated, in
-					the MySQL format */
-	row_prebuilt_t*	prebuilt)	/*!< in: prebuilt struct in MySQL
-					handle */
+row_delete_for_mysql_using_cursor(
+	const upd_node_t*	node,
+	cursors_t&		delete_entries,
+	bool			restore_delete)
+{
+	mtr_t		mtr;
+	dict_table_t*	table = node->table;
+	mem_heap_t*	heap = mem_heap_create(1000);
+	dberr_t		err = DB_SUCCESS;
+	dtuple_t*	entry;
+
+	mtr_start(&mtr);
+	dict_disable_redo_if_temporary(table, &mtr);
+
+	for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
+	     index != NULL && err == DB_SUCCESS && !restore_delete;
+	     index = UT_LIST_GET_NEXT(indexes, index)) {
+
+		entry = row_build_index_entry(
+			node->row, node->ext, index, heap);
+
+		btr_pcur_t	pcur;
+
+		btr_pcur_open(index, entry, PAGE_CUR_LE,
+			      BTR_MODIFY_LEAF, &pcur, &mtr);
+
+#ifdef UNIV_DEBUG
+		ulint           offsets_[REC_OFFS_NORMAL_SIZE];
+		ulint*          offsets         = offsets_;
+		rec_offs_init(offsets_);
+
+		offsets = rec_get_offsets(
+			btr_cur_get_rec(btr_pcur_get_btr_cur(&pcur)),
+			index, offsets, ULINT_UNDEFINED, &heap);
+
+		ut_ad(!cmp_dtuple_rec(
+			entry, btr_cur_get_rec(btr_pcur_get_btr_cur(&pcur)),
+			offsets));
+#endif /* UNIV_DEBUG */
+
+		ut_ad(!rec_get_deleted_flag(
+			btr_cur_get_rec(btr_pcur_get_btr_cur(&pcur)),
+			dict_table_is_comp(index->table)));
+
+		ut_ad(btr_pcur_get_block(&pcur)->made_dirty_with_no_latch);
+
+		if (page_rec_is_infimum(btr_pcur_get_rec(&pcur))
+		    || page_rec_is_supremum(btr_pcur_get_rec(&pcur))) {
+			err = DB_ERROR;
+		} else {
+			btr_cur_t* btr_cur = btr_pcur_get_btr_cur(&pcur);
+
+			btr_rec_set_deleted_flag(
+				btr_cur_get_rec(btr_cur),
+				buf_block_get_page_zip(
+					btr_cur_get_block(btr_cur)),
+				TRUE);
+
+			/* Void call just to set mtr modification flag
+			to true failing which block is not scheduled for flush*/
+			byte* log_ptr = mlog_open(&mtr, 0);
+			ut_ad(log_ptr == NULL);
+			if (log_ptr != NULL) {
+				/* To keep complier happy. */
+				mlog_close(&mtr, log_ptr);
+			}
+
+			btr_pcur_store_position(&pcur, &mtr);
+
+			delete_entries.push_back(pcur);
+		}
+	}
+
+	if (err != DB_SUCCESS || restore_delete) {
+
+		/* Rollback half-way delete action that might have been
+		applied to few of the indexes. */
+		cursors_t::iterator	end = delete_entries.end();
+		for (cursors_t::iterator it = delete_entries.begin();
+		     it != end;
+		     ++it) {
+
+			ibool success = btr_pcur_restore_position(
+				BTR_MODIFY_LEAF, &(*it), &mtr);
+
+			if (!success) {
+				ut_a(success);
+			} else {
+				btr_cur_t* btr_cur = btr_pcur_get_btr_cur(
+					&(*it));
+
+				ut_ad(btr_cur_get_block(
+					btr_cur)->made_dirty_with_no_latch);
+
+				btr_rec_set_deleted_flag(
+					btr_cur_get_rec(btr_cur),
+					buf_block_get_page_zip(
+						btr_cur_get_block(btr_cur)),
+					FALSE);
+
+				/* Void call just to set mtr modification flag
+				to true failing which block is not scheduled for
+				flush. */
+				byte* log_ptr = mlog_open(&mtr, 0);
+				ut_ad(log_ptr == NULL);
+				if (log_ptr != NULL) {
+					/* To keep complier happy. */
+					mlog_close(&mtr, log_ptr);
+				}
+			}
+		}
+	}
+
+	mtr_commit(&mtr);
+
+	mem_heap_free(heap);
+
+	return(err);
+}
+
+/** Does an update of a row for MySQL by inserting new entry with update values.
+@param[in]	node		update node carrying information to delete.
+@param[out]	delete_entries	vector of cursor to deleted entries.
+@param[in]	thr		thread handler
+@return error code or DB_SUCCESS */
+static
+dberr_t
+row_update_for_mysql_using_cursor(
+	const upd_node_t*	node,
+	cursors_t&		delete_entries,
+	que_thr_t*		thr)
+{
+	dberr_t		err = DB_SUCCESS;
+	dict_table_t*	table = node->table;
+	mem_heap_t*	heap = mem_heap_create(1000);
+	dtuple_t*	entry;
+	dfield_t*	trx_id_field;
+
+	/* Step-1: Update row-id column if table doesn't have unique index. */
+	if (!dict_index_is_unique(dict_table_get_first_index(table))) {
+		/* Update the row_id column. */
+		dfield_t*	row_id_field;
+
+		row_id_field = dtuple_get_nth_field(
+			node->upd_row, dict_table_get_n_cols(table) - 2);
+
+		dict_sys_write_row_id(
+			static_cast<byte*>(row_id_field->data),
+			dict_table_get_next_table_sess_row_id(node->table));
+	}
+
+	/* Step-2: Update the trx_id column. */
+	trx_id_field = dtuple_get_nth_field(
+		node->upd_row, dict_table_get_n_cols(table) - 1);
+	trx_write_trx_id(static_cast<byte*>(trx_id_field->data),
+			 dict_table_get_next_table_sess_trx_id(node->table));
+
+
+	/* Step-3: Check if UPDATE can lead to DUPLICATE key violation.
+	If yes, then avoid executing it and return error. Only after ensuring
+	that UPDATE is safe execute it as we can't rollback. */
+	for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
+	     index != NULL && err == DB_SUCCESS;
+	     index = UT_LIST_GET_NEXT(indexes, index)) {
+
+		entry = row_build_index_entry(
+			node->upd_row, node->upd_ext, index, heap);
+
+		if (dict_index_is_clust(index)) {
+			if (!dict_index_is_auto_gen_clust(index)) {
+				err = row_ins_clust_index_entry(
+					index, entry, thr,
+					node->upd_ext
+					? node->upd_ext->n_ext : 0,
+					true);
+			}
+		} else {
+			err = row_ins_sec_index_entry(index, entry, thr, true);
+		}
+	}
+
+	if (err != DB_SUCCESS) {
+		/* This suggest update can't be executed safely.
+		Avoid executing update. Rollback DELETE action. */
+		row_delete_for_mysql_using_cursor(node, delete_entries, true);
+	}
+
+	/* Step-4: It is now safe to execute update if there is no error */
+	for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
+	     index != NULL && err == DB_SUCCESS;
+	     index = UT_LIST_GET_NEXT(indexes, index)) {
+
+		entry = row_build_index_entry(
+			node->upd_row, node->upd_ext, index, heap);
+
+		if (dict_index_is_clust(index)) {
+			err = row_ins_clust_index_entry(
+				index, entry, thr,
+				node->upd_ext ? node->upd_ext->n_ext : 0,
+				false);
+			/* Commit the open mtr as we are processing UPDATE. */
+			index->last_ins_cur->release();
+		} else {
+			err = row_ins_sec_index_entry(index, entry, thr, false);
+		}
+
+		/* Too big record is valid error and suggestion is to use
+		bigger page-size or different format. */
+		ut_ad(err == DB_SUCCESS
+		      || err == DB_TOO_BIG_RECORD
+		      || err == DB_OUT_OF_FILE_SPACE);
+
+		if (err == DB_TOO_BIG_RECORD) {
+			row_delete_for_mysql_using_cursor(
+				node, delete_entries, true);
+		}
+	}
+
+	if (heap != NULL) {
+		mem_heap_free(heap);
+	}
+	return(err);
+}
+
+/** Does an update or delete of a row for MySQL.
+@param[in]	mysql_rec	row in the MySQL format
+@param[in,out]	prebuilt	prebuilt struct in MySQL handle
+@return error code or DB_SUCCESS */
+static
+dberr_t
+row_del_upd_for_mysql_using_cursor(
+	const byte*		mysql_rec,
+	row_prebuilt_t*		prebuilt)
+{
+	dberr_t			err = DB_SUCCESS;
+	upd_node_t*		node;
+	cursors_t		delete_entries;
+	dict_index_t*		clust_index;
+	que_thr_t*		thr = NULL;
+
+	/* Step-0: If there is cached insert position commit it before
+	starting delete/update action as this can result in btree structure
+	to change. */
+	thr = que_fork_get_first_thr(prebuilt->upd_graph);
+	clust_index = dict_table_get_first_index(prebuilt->table);
+	clust_index->last_ins_cur->release();
+
+	/* Step-1: Select the appropriate cursor that will help build
+	the original row and updated row. */
+	node = prebuilt->upd_node;
+	if (prebuilt->pcur->btr_cur.index == clust_index) {
+		btr_pcur_copy_stored_position(node->pcur, prebuilt->pcur);
+	} else {
+		btr_pcur_copy_stored_position(node->pcur,
+					      prebuilt->clust_pcur);
+	}
+
+	ut_ad(dict_table_is_intrinsic(prebuilt->table));
+	ut_ad(!prebuilt->table->n_v_cols);
+
+	/* Internal table is created by optimiser. So there
+	should not be any virtual columns. */
+	row_upd_store_row(node, NULL, NULL);
+
+	/* Step-2: Execute DELETE operation. */
+	err = row_delete_for_mysql_using_cursor(node, delete_entries, false);
+
+	/* Step-3: If only DELETE operation then exit immediately. */
+	if (node->is_delete) {
+		if (err == DB_SUCCESS) {
+			dict_table_n_rows_dec(prebuilt->table);
+			srv_stats.n_rows_deleted.inc();
+		}
+	}
+
+	if (err == DB_SUCCESS && !node->is_delete) {
+		/* Step-4: Complete UPDATE operation by inserting new row with
+		updated data. */
+		err = row_update_for_mysql_using_cursor(
+			node, delete_entries, thr);
+
+		if (err == DB_SUCCESS) {
+			srv_stats.n_rows_updated.inc();
+		}
+	}
+
+	thr_get_trx(thr)->error_state = DB_SUCCESS;
+	cursors_t::iterator	end = delete_entries.end();
+	for (cursors_t::iterator it = delete_entries.begin(); it != end; ++it) {
+		btr_pcur_close(&(*it));
+	}
+
+	return(err);
+}
+
+/** Does an update or delete of a row for MySQL.
+@param[in]	mysql_rec	row in the MySQL format
+@param[in,out]	prebuilt	prebuilt struct in MySQL handle
+@return error code or DB_SUCCESS */
+static
+dberr_t
+row_update_for_mysql_using_upd_graph(
+	const byte*	mysql_rec,
+	row_prebuilt_t*	prebuilt)
 {
 	trx_savept_t	savept;
 	dberr_t		err;
 	que_thr_t*	thr;
 	ibool		was_lock_wait;
 	dict_index_t*	clust_index;
-	/*	ulint		ref_len; */
 	upd_node_t*	node;
 	dict_table_t*	table		= prebuilt->table;
 	trx_t*		trx		= prebuilt->trx;
 	ulint		fk_depth	= 0;
+	upd_cascade_t*	cascade_upd_nodes;
+	upd_cascade_t*	new_upd_nodes;
+	upd_cascade_t*	processed_cascades;
+	bool		got_s_lock	= false;
 
-	ut_ad(prebuilt != NULL);
-	ut_ad(trx != NULL);
+	DBUG_ENTER("row_update_for_mysql_using_upd_graph");
+
+	ut_ad(trx);
+	ut_a(prebuilt->magic_n == ROW_PREBUILT_ALLOCATED);
+	ut_a(prebuilt->magic_n2 == ROW_PREBUILT_ALLOCATED);
 	UT_NOT_USED(mysql_rec);
 
 	if (prebuilt->table->ibd_file_missing) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr, "  InnoDB: Error:\n"
-			"InnoDB: MySQL is trying to use a table handle"
-			" but the .ibd file for\n"
-			"InnoDB: table %s does not exist.\n"
-			"InnoDB: Have you deleted the .ibd file"
-			" from the database directory under\n"
-			"InnoDB: the MySQL datadir, or have you"
-			" used DISCARD TABLESPACE?\n"
-			"InnoDB: Look from\n"
-			"InnoDB: " REFMAN "innodb-troubleshooting.html\n"
-			"InnoDB: how you can resolve the problem.\n",
-			prebuilt->table->name);
-		return(DB_ERROR);
+		ib::error() << "MySQL is trying to use a table handle but the"
+			" .ibd file for table " << prebuilt->table->name
+			<< " does not exist. Have you deleted"
+			" the .ibd file from the database directory under"
+			" the MySQL datadir, or have you used DISCARD"
+			" TABLESPACE? " << TROUBLESHOOTING_MSG;
+		DBUG_RETURN(DB_ERROR);
 	} else if (prebuilt->table->is_encrypted) {
 		ib_push_warning(trx, DB_DECRYPTION_FAILED,
 			"Table %s in tablespace %lu encrypted."
@@ -1731,27 +2485,9 @@ row_update_for_mysql(
 		return (DB_TABLE_NOT_FOUND);
 	}
 
-	if (UNIV_UNLIKELY(prebuilt->magic_n != ROW_PREBUILT_ALLOCATED)) {
-		fprintf(stderr,
-			"InnoDB: Error: trying to free a corrupt\n"
-			"InnoDB: table handle. Magic n %lu, table name ",
-			(ulong) prebuilt->magic_n);
-		ut_print_name(stderr, trx, TRUE, prebuilt->table->name);
-		putc('\n', stderr);
-
-		mem_analyze_corruption(prebuilt);
-
-		ut_error;
-	}
-
-	if (UNIV_UNLIKELY(srv_force_recovery)) {
-		fputs("InnoDB: innodb_force_recovery is on: we do not allow\n"
-		      "InnoDB: database modifications by the user. Shut down\n"
-		      "InnoDB: mysqld and edit my.cnf so that"
-		      "InnoDB: innodb_force_... is removed.\n",
-		      stderr);
-
-		return(DB_READ_ONLY);
+	if(srv_force_recovery) {
+		ib::error() << MODIFICATIONS_NOT_ALLOWED_MSG_FORCE_RECOVERY;
+		DBUG_RETURN(DB_READ_ONLY);
 	}
 
 	DEBUG_SYNC_C("innodb_row_update_for_mysql_begin");
@@ -1760,7 +2496,9 @@ row_update_for_mysql(
 
 	row_mysql_delay_if_needed();
 
-	trx_start_if_not_started_xa(trx);
+	init_fts_doc_id_for_ref(table, &fk_depth);
+
+	trx_start_if_not_started_xa(trx, true);
 
 	if (dict_table_is_referenced_by_foreign_key(table)) {
 		/* Share lock the data dictionary to prevent any
@@ -1776,13 +2514,33 @@ row_update_for_mysql(
 
 	node = prebuilt->upd_node;
 
+	if (node->cascade_heap) {
+		mem_heap_empty(node->cascade_heap);
+	} else {
+		node->cascade_heap = mem_heap_create(128);
+	}
+
+	mem_heap_allocator<upd_node_t*> mem_heap_ator(node->cascade_heap);
+
+	cascade_upd_nodes = new
+		(mem_heap_ator.allocate(sizeof(upd_cascade_t)))
+		upd_cascade_t(deque_mem_heap_t(mem_heap_ator));
+
+	new_upd_nodes = new
+		(mem_heap_ator.allocate(sizeof(upd_cascade_t)))
+		upd_cascade_t(deque_mem_heap_t(mem_heap_ator));
+
+	processed_cascades = new
+		(mem_heap_ator.allocate(sizeof(upd_cascade_t)))
+		upd_cascade_t(deque_mem_heap_t(mem_heap_ator));
+
 	clust_index = dict_table_get_first_index(table);
 
-	if (prebuilt->pcur.btr_cur.index == clust_index) {
-		btr_pcur_copy_stored_position(node->pcur, &prebuilt->pcur);
+	if (prebuilt->pcur->btr_cur.index == clust_index) {
+		btr_pcur_copy_stored_position(node->pcur, prebuilt->pcur);
 	} else {
 		btr_pcur_copy_stored_position(node->pcur,
-					      &prebuilt->clust_pcur);
+					      prebuilt->clust_pcur);
 	}
 
 	ut_a(node->pcur->rel_pos == BTR_PCUR_ON);
@@ -1800,30 +2558,62 @@ row_update_for_mysql(
 
 	node->state = UPD_NODE_UPDATE_CLUSTERED;
 
+	node->cascade_top = true;
+	node->cascade_upd_nodes = cascade_upd_nodes;
+	node->new_upd_nodes = new_upd_nodes;
+	node->processed_cascades = processed_cascades;
+	node->fts_doc_id = prebuilt->fts_doc_id;
+
+	if (trx->fts_next_doc_id != UINT64_UNDEFINED) {
+		node->fts_next_doc_id = fts_read_doc_id(
+			(byte*) &trx->fts_next_doc_id);
+	} else {
+		node->fts_next_doc_id = UINT64_UNDEFINED;
+	}
+
 	ut_ad(!prebuilt->sql_stat_start);
 
 	que_thr_move_to_run_state_for_mysql(thr, trx);
 
+	thr->fk_cascade_depth = 0;
+
 run_again:
+	if (thr->fk_cascade_depth == 1 && trx->dict_operation_lock_mode == 0) {
+		got_s_lock = true;
+		row_mysql_freeze_data_dictionary(trx);
+	}
+
 	thr->run_node = node;
 	thr->prev_node = node;
-	thr->fk_cascade_depth = 0;
 
 	row_upd_step(thr);
 
+	DBUG_EXECUTE_IF("dml_cascade_only_once", node->check_cascade_only_once(););
+
 	err = trx->error_state;
 
-	/* Reset fk_cascade_depth back to 0 */
-	thr->fk_cascade_depth = 0;
-
 	if (err != DB_SUCCESS) {
+
 		que_thr_stop_for_mysql(thr);
 
 		if (err == DB_RECORD_NOT_FOUND) {
 			trx->error_state = DB_SUCCESS;
 			trx->op_info = "";
 
-			return(err);
+			if (thr->fk_cascade_depth > 0) {
+				que_graph_free_recursive(node);
+			}
+			goto error;
+		}
+
+		/* Since reporting a plain "duplicate key" error message to
+		the user in cases where a long CASCADE operation would lead
+		to a duplicate key in some other table is very confusing,
+		map duplicate key errors resulting from FK constraints to a
+		separate error code. */
+		if (err == DB_DUPLICATE_KEY && thr->fk_cascade_depth > 0) {
+			err = DB_FOREIGN_DUPLICATE_KEY;
+			trx->error_state = err;
 		}
 
 		thr->lock_state= QUE_THR_LOCK_ROW;
@@ -1835,23 +2625,86 @@ run_again:
 		thr->lock_state= QUE_THR_LOCK_NOLOCK;
 
 		if (was_lock_wait) {
+			std::for_each(new_upd_nodes->begin(),
+				      new_upd_nodes->end(),
+				      ib_dec_counter());
+			std::for_each(new_upd_nodes->begin(),
+				      new_upd_nodes->end(),
+				      que_graph_free_recursive);
+			node->new_upd_nodes->clear();
 			goto run_again;
 		}
 
 		trx->op_info = "";
 
-		return(err);
+		if (thr->fk_cascade_depth > 0) {
+			que_graph_free_recursive(node);
+		}
+		goto error;
+	} else {
+
+		std::copy(node->new_upd_nodes->begin(),
+			  node->new_upd_nodes->end(),
+			  std::back_inserter(*node->cascade_upd_nodes));
+
+		node->new_upd_nodes->clear();
 	}
 
-	que_thr_stop_for_mysql_no_error(thr, trx);
+	if (dict_table_has_fts_index(node->table)
+	    && node->fts_doc_id != FTS_NULL_DOC_ID
+	    && node->fts_next_doc_id != UINT64_UNDEFINED) {
+		err = row_fts_update_or_delete(trx, node);
+		ut_a(err == DB_SUCCESS);
+	}
 
-	if (dict_table_has_fts_index(table)
-	    && trx->fts_next_doc_id != UINT64_UNDEFINED) {
-		err = row_fts_update_or_delete(prebuilt);
-		if (err != DB_SUCCESS) {
-			trx->op_info = "";
-			return(err);
+	if (thr->fk_cascade_depth > 0) {
+		/* Processing cascade operation */
+		ut_ad(node->table->n_foreign_key_checks_running > 0);
+		os_atomic_decrement_ulint(
+			&node->table->n_foreign_key_checks_running, 1);
+		node->processed_cascades->push_back(node);
+	}
+
+	if (!cascade_upd_nodes->empty()) {
+		DEBUG_SYNC_C("foreign_constraint_update_cascade");
+		node = cascade_upd_nodes->front();
+		node->cascade_upd_nodes = cascade_upd_nodes;
+		cascade_upd_nodes->pop_front();
+		thr->fk_cascade_depth++;
+		prebuilt->m_mysql_table = NULL;
+
+		goto run_again;
+	}
+
+	/* Completed cascading operations (if any) */
+	if (got_s_lock) {
+		row_mysql_unfreeze_data_dictionary(trx);
+	}
+
+	thr->fk_cascade_depth = 0;
+
+	/* Update the statistics only after completing all cascaded
+	operations */
+	for (upd_cascade_t::iterator i = processed_cascades->begin();
+	     i != processed_cascades->end();
+	     ++i) {
+
+		node = *i;
+
+		if (node->is_delete) {
+			/* Not protected by dict_table_stats_lock() for
+			performance reasons, we would rather get garbage
+			in stat_n_rows (which is just an estimate anyway)
+			than protecting the following code with a latch. */
+			dict_table_n_rows_dec(node->table);
+
+			srv_stats.n_rows_deleted.add((size_t)trx->id, 1);
+		} else {
+			srv_stats.n_rows_updated.add((size_t)trx->id, 1);
 		}
+
+		row_update_statistics_if_needed(node->table);
+		que_graph_free_recursive(node);
 	}
 
 	if (node->is_delete) {
@@ -1861,19 +2714,17 @@ run_again:
 		with a latch. */
 		dict_table_n_rows_dec(prebuilt->table);
 
-		if (table->is_system_db) {
-			srv_stats.n_system_rows_deleted.add(
-				(size_t)trx->id, 1);
+		if (table->is_system_db) {	
+			srv_stats.n_system_rows_deleted.inc();
 		} else {
-			srv_stats.n_rows_deleted.add((size_t)trx->id, 1);
+			srv_stats.n_rows_deleted.inc();
 		}
 
 	} else {
 		if (table->is_system_db) {
-			srv_stats.n_system_rows_updated.add(
-				(size_t)trx->id, 1);
+			srv_stats.n_system_rows_updated.inc();
 		} else {
-			srv_stats.n_rows_updated.add((size_t)trx->id, 1);
+			srv_stats.n_rows_updated.inc();
 		}
 	}
 
@@ -1892,11 +2743,95 @@ run_again:
 
 	trx->op_info = "";
 
+	que_thr_stop_for_mysql_no_error(thr, trx);
+
+	DBUG_ASSERT(cascade_upd_nodes->empty());
+
+	DBUG_RETURN(err);
+
+error:
+	if (got_s_lock) {
+		row_mysql_unfreeze_data_dictionary(trx);
+	}
+
+	if (thr->fk_cascade_depth > 0) {
+		ut_ad(node->table->n_foreign_key_checks_running > 0);
+		os_atomic_decrement_ulint(
+			&node->table->n_foreign_key_checks_running, 1);
+		thr->fk_cascade_depth = 0;
+	}
+
+	/* Reset the table->n_foreign_key_checks_running counter */
+	std::for_each(cascade_upd_nodes->begin(),
+		      cascade_upd_nodes->end(),
+		      ib_dec_counter());
+
+	std::for_each(new_upd_nodes->begin(),
+		      new_upd_nodes->end(),
+		      ib_dec_counter());
+
+	std::for_each(cascade_upd_nodes->begin(),
+		      cascade_upd_nodes->end(),
+		      que_graph_free_recursive);
+
+	std::for_each(new_upd_nodes->begin(),
+		      new_upd_nodes->end(),
+		      que_graph_free_recursive);
+
+	std::for_each(processed_cascades->begin(),
+		      processed_cascades->end(),
+		      que_graph_free_recursive);
+
+	DBUG_RETURN(err);
+}
+
+/** Does an update or delete of a row for MySQL.
+@param[in]	mysql_rec	row in the MySQL format
+@param[in,out]	prebuilt	prebuilt struct in MySQL handle
+@return error code or DB_SUCCESS */
+dberr_t
+row_update_for_mysql(
+	const byte*		mysql_rec,
+	row_prebuilt_t*		prebuilt)
+{
+	if (dict_table_is_intrinsic(prebuilt->table)) {
+		return(row_del_upd_for_mysql_using_cursor(mysql_rec, prebuilt));
+	} else {
+		ut_a(prebuilt->template_type == ROW_MYSQL_WHOLE_ROW);
+		return(row_update_for_mysql_using_upd_graph(
+			mysql_rec, prebuilt));
+	}
+}
+
+/** Delete all rows for the given table by freeing/truncating indexes.
+@param[in,out]	table	table handler
+@return error code or DB_SUCCESS */
+dberr_t
+row_delete_all_rows(
+	dict_table_t*	table)
+{
+	dberr_t		err = DB_SUCCESS;
+
+	/* Step-0: If there is cached insert position along with mtr
+	commit it before starting delete/update action. */
+	dict_table_get_first_index(table)->last_ins_cur->release();
+
+	/* Step-1: Now truncate all the indexes and re-create them.
+	Note: This is ddl action even though delete all rows is
+	DML action. Any error during this action is ir-reversible. */
+	for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
+	     index != NULL && err == DB_SUCCESS;
+	     index = UT_LIST_GET_NEXT(indexes, index)) {
+
+		err = dict_truncate_index_tree_in_mem(index);
+		// TODO: what happen if get an error
+		ut_ad(err == DB_SUCCESS);
+	}
+
 	return(err);
 }
 
-/*********************************************************************//**
-This can only be used when srv_locks_unsafe_for_binlog is TRUE or this
+/** This can only be used when srv_locks_unsafe_for_binlog is TRUE or this
 session is using a READ COMMITTED or READ UNCOMMITTED isolation level.
 Before calling this function row_search_for_mysql() must have
 initialized prebuilt->new_rec_locks to store the information which new
@@ -1904,21 +2839,18 @@ record locks really were set. This function removes a newly set
 clustered index record lock under prebuilt->pcur or
 prebuilt->clust_pcur.  Thus, this implements a 'mini-rollback' that
 releases the latest clustered index record lock we set.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
+@param[in,out]	prebuilt		prebuilt struct in MySQL handle
+@param[in]	has_latches_on_recs	TRUE if called so that we have the
+					latches on the records under pcur
+					and clust_pcur, and we do not need
+					to reposition the cursors. */
 void
 row_unlock_for_mysql(
-/*=================*/
-	row_prebuilt_t*	prebuilt,	/*!< in/out: prebuilt struct in MySQL
-					handle */
-	ibool		has_latches_on_recs)/*!< in: TRUE if called so
-					that we have the latches on
-					the records under pcur and
-					clust_pcur, and we do not need
-					to reposition the cursors. */
+	row_prebuilt_t*	prebuilt,
+	ibool		has_latches_on_recs)
 {
-	btr_pcur_t*	pcur		= &prebuilt->pcur;
-	btr_pcur_t*	clust_pcur	= &prebuilt->clust_pcur;
+	btr_pcur_t*	pcur		= prebuilt->pcur;
+	btr_pcur_t*	clust_pcur	= prebuilt->clust_pcur;
 	trx_t*		trx		= prebuilt->trx;
 
 	ut_ad(prebuilt != NULL);
@@ -1928,11 +2860,13 @@ row_unlock_for_mysql(
 	    (!srv_locks_unsafe_for_binlog
 	     && trx->isolation_level > TRX_ISO_READ_COMMITTED)) {
 
-		fprintf(stderr,
-			"InnoDB: Error: calling row_unlock_for_mysql though\n"
-			"InnoDB: innodb_locks_unsafe_for_binlog is FALSE and\n"
-			"InnoDB: this session is not using"
-			" READ COMMITTED isolation level.\n");
+		ib::error() << "Calling row_unlock_for_mysql though"
+			" innodb_locks_unsafe_for_binlog is FALSE and this"
+			" session is not using READ COMMITTED isolation"
+			" level.";
+		return;
+	}
+	if (dict_index_is_spatial(prebuilt->index)) {
 		return;
 	}
 
@@ -1945,7 +2879,7 @@ row_unlock_for_mysql(
 		trx_id_t	rec_trx_id;
 		mtr_t		mtr;
 
-		mtr_start_trx(&mtr, trx);
+		mtr_start(&mtr);
 
 		/* Restore the cursor position and find the record */
 
@@ -2027,109 +2961,10 @@ no_unlock:
 	trx->op_info = "";
 }
 
-/**********************************************************************//**
-Does a cascaded delete or set null in a foreign key operation.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-row_update_cascade_for_mysql(
-/*=========================*/
-	que_thr_t*	thr,	/*!< in: query thread */
-	upd_node_t*	node,	/*!< in: update node used in the cascade
-				or set null operation */
-	dict_table_t*	table)	/*!< in: table where we do the operation */
-{
-	dberr_t	err;
-	trx_t*	trx;
-
-	trx = thr_get_trx(thr);
-
-	/* Increment fk_cascade_depth to record the recursive call depth on
-	a single update/delete that affects multiple tables chained
-	together with foreign key relations. */
-	thr->fk_cascade_depth++;
-
-	if (thr->fk_cascade_depth > FK_MAX_CASCADE_DEL) {
-		return(DB_FOREIGN_EXCEED_MAX_CASCADE);
-	}
-run_again:
-	thr->run_node = node;
-	thr->prev_node = node;
-
-	DEBUG_SYNC_C("foreign_constraint_update_cascade");
-
-	row_upd_step(thr);
-
-	/* The recursive call for cascading update/delete happens
-	in above row_upd_step(), reset the counter once we come
-	out of the recursive call, so it does not accumulate for
-	different row deletes */
-	thr->fk_cascade_depth = 0;
-
-	err = trx->error_state;
-
-	/* Note that the cascade node is a subnode of another InnoDB
-	query graph node. We do a normal lock wait in this node, but
-	all errors are handled by the parent node. */
-
-	if (err == DB_LOCK_WAIT) {
-		/* Handle lock wait here */
-
-		que_thr_stop_for_mysql(thr);
-
-		lock_wait_suspend_thread(thr);
-
-		/* Note that a lock wait may also end in a lock wait timeout,
-		or this transaction is picked as a victim in selective
-		deadlock resolution */
-
-		if (trx->error_state != DB_SUCCESS) {
-
-			return(trx->error_state);
-		}
-
-		/* Retry operation after a normal lock wait */
-
-		goto run_again;
-	}
-
-	if (err != DB_SUCCESS) {
-
-		return(err);
-	}
-
-	if (node->is_delete) {
-		/* Not protected by dict_table_stats_lock() for performance
-		reasons, we would rather get garbage in stat_n_rows (which is
-		just an estimate anyway) than protecting the following code
-		with a latch. */
-		dict_table_n_rows_dec(table);
-
-		if (table->is_system_db) {
-			srv_stats.n_system_rows_deleted.add(
-				(size_t)trx->id, 1);
-		} else {
-			srv_stats.n_rows_deleted.add((size_t)trx->id, 1);
-		}
-	} else {
-		if (table->is_system_db) {
-			srv_stats.n_system_rows_updated.add(
-				(size_t)trx->id, 1);
-		} else {
-			srv_stats.n_rows_updated.add((size_t)trx->id, 1);
-		}
-	}
-
-	row_update_statistics_if_needed(table);
-
-	return(err);
-}
-
 /*********************************************************************//**
 Checks if a table is such that we automatically created a clustered
 index on it (on row id).
-@return	TRUE if the clustered index was generated automatically */
-UNIV_INTERN
+@return TRUE if the clustered index was generated automatically */
 ibool
 row_table_got_default_clust_index(
 /*==============================*/
@@ -2145,7 +2980,6 @@ row_table_got_default_clust_index(
 /*********************************************************************//**
 Locks the data dictionary in shared mode from modifications, for performing
 foreign key check, rollback, or other operation invisible to MySQL. */
-UNIV_INTERN
 void
 row_mysql_freeze_data_dictionary_func(
 /*==================================*/
@@ -2155,14 +2989,13 @@ row_mysql_freeze_data_dictionary_func(
 {
 	ut_a(trx->dict_operation_lock_mode == 0);
 
-	rw_lock_s_lock_inline(&dict_operation_lock, 0, file, line);
+	rw_lock_s_lock_inline(dict_operation_lock, 0, file, line);
 
 	trx->dict_operation_lock_mode = RW_S_LATCH;
 }
 
 /*********************************************************************//**
 Unlocks the data dictionary shared lock. */
-UNIV_INTERN
 void
 row_mysql_unfreeze_data_dictionary(
 /*===============================*/
@@ -2172,7 +3005,7 @@ row_mysql_unfreeze_data_dictionary(
 
 	ut_a(trx->dict_operation_lock_mode == RW_S_LATCH);
 
-	rw_lock_s_unlock(&dict_operation_lock);
+	rw_lock_s_unlock(dict_operation_lock);
 
 	trx->dict_operation_lock_mode = 0;
 }
@@ -2180,7 +3013,6 @@ row_mysql_unfreeze_data_dictionary(
 /*********************************************************************//**
 Locks the data dictionary exclusively for performing a table create or other
 data dictionary modification operation. */
-UNIV_INTERN
 void
 row_mysql_lock_data_dictionary_func(
 /*================================*/
@@ -2194,15 +3026,14 @@ row_mysql_lock_data_dictionary_func(
 	/* Serialize data dictionary operations with dictionary mutex:
 	no deadlocks or lock waits can occur then in these operations */
 
-	rw_lock_x_lock_inline(&dict_operation_lock, 0, file, line);
+	rw_lock_x_lock_inline(dict_operation_lock, 0, file, line);
 	trx->dict_operation_lock_mode = RW_X_LATCH;
 
-	mutex_enter(&(dict_sys->mutex));
+	mutex_enter(&dict_sys->mutex);
 }
 
 /*********************************************************************//**
 Unlocks the data dictionary exclusive lock. */
-UNIV_INTERN
 void
 row_mysql_unlock_data_dictionary(
 /*=============================*/
@@ -2215,27 +3046,25 @@ row_mysql_unlock_data_dictionary(
 	/* Serialize data dictionary operations with dictionary mutex:
 	no deadlocks can occur then in these operations */
 
-	mutex_exit(&(dict_sys->mutex));
-	rw_lock_x_unlock(&dict_operation_lock);
+	mutex_exit(&dict_sys->mutex);
+	rw_lock_x_unlock(dict_operation_lock);
 
 	trx->dict_operation_lock_mode = 0;
 }
 
 /*********************************************************************//**
-Creates a table for MySQL. If the name of the table ends in
-one of "innodb_monitor", "innodb_lock_monitor", "innodb_tablespace_monitor",
-"innodb_table_monitor", then this will also start the printing of monitor
-output by the master thread. If the table name ends in "innodb_mem_validate",
-InnoDB will try to invoke mem_validate(). On failure the transaction will
-be rolled back and the 'table' object will be freed.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+Creates a table for MySQL. On failure the transaction will be rolled back
+and the 'table' object will be freed.
+@return error code or DB_SUCCESS */
 dberr_t
 row_create_table_for_mysql(
 /*=======================*/
 	dict_table_t*	table,	/*!< in, own: table definition
 				(will be freed, or on DB_SUCCESS
 				added to the data dictionary cache) */
+	const char*	compression,
+				/*!< in: compression algorithm to use,
+				can be NULL */
 	trx_t*		trx,	/*!< in/out: transaction */
 	bool		commit,	/*!< in: if true, commit the transaction */
 	fil_encryption_t mode,	/*!< in: encryption mode */
@@ -2244,14 +3073,10 @@ row_create_table_for_mysql(
 	tab_node_t*	node;
 	mem_heap_t*	heap;
 	que_thr_t*	thr;
-	const char*	table_name;
-	ulint		table_name_len;
 	dberr_t		err;
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-	ut_ad(mutex_own(&(dict_sys->mutex)));
+	ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
+	ut_ad(mutex_own(&dict_sys->mutex));
 	ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH);
 
 	DBUG_EXECUTE_IF(
@@ -2261,15 +3086,11 @@ row_create_table_for_mysql(
 
 	trx->op_info = "creating table";
 
-	if (row_mysql_is_system_table(table->name)) {
-
-		fprintf(stderr,
-			"InnoDB: Error: trying to create a MySQL system"
-			" table %s of type InnoDB.\n"
-			"InnoDB: MySQL system tables must be"
-			" of the MyISAM type!\n",
-			table->name);
+	if (row_mysql_is_system_table(table->name.m_name)) {
 
+		ib::error() << "Trying to create a MySQL system table "
+			<< table->name << " of type InnoDB. MySQL system"
+			" tables must be of the MyISAM type!";
 #ifndef DBUG_OFF
 err_exit:
 #endif /* !DBUG_OFF */
@@ -2284,59 +3105,7 @@ err_exit:
 		return(DB_ERROR);
 	}
 
-	trx_start_if_not_started_xa(trx);
-
-	/* The table name is prefixed with the database name and a '/'.
-	Certain table names starting with 'innodb_' have their special
-	meaning regardless of the database name.  Thus, we need to
-	ignore the database name prefix in the comparisons. */
-	table_name = dict_remove_db_name(table->name);
-	table_name_len = strlen(table_name) + 1;
-
-	if (STR_EQ(table_name, table_name_len, S_innodb_monitor)) {
-
-		/* Table equals "innodb_monitor":
-		start monitor prints */
-
-		srv_print_innodb_monitor = TRUE;
-
-		/* The lock timeout monitor thread also takes care
-		of InnoDB monitor prints */
-
-		os_event_set(srv_monitor_event);
-	} else if (STR_EQ(table_name, table_name_len,
-			  S_innodb_lock_monitor)) {
-
-		srv_print_innodb_monitor = TRUE;
-		srv_print_innodb_lock_monitor = TRUE;
-		os_event_set(srv_monitor_event);
-	} else if (STR_EQ(table_name, table_name_len,
-			  S_innodb_tablespace_monitor)) {
-
-		srv_print_innodb_tablespace_monitor = TRUE;
-		os_event_set(srv_monitor_event);
-	} else if (STR_EQ(table_name, table_name_len,
-			  S_innodb_table_monitor)) {
-
-		srv_print_innodb_table_monitor = TRUE;
-		os_event_set(srv_monitor_event);
-#ifdef UNIV_MEM_DEBUG
-	} else if (STR_EQ(table_name, table_name_len,
-			  S_innodb_mem_validate)) {
-		/* We define here a debugging feature intended for
-		developers */
-
-		fputs("Validating InnoDB memory:\n"
-		      "to use this feature you must compile InnoDB with\n"
-		      "UNIV_MEM_DEBUG defined in univ.i and"
-		      " the server must be\n"
-		      "quiet because allocation from a mem heap"
-		      " is not protected\n"
-		      "by any semaphore.\n", stderr);
-		ut_a(mem_validate());
-		fputs("Memory validated\n", stderr);
-#endif /* UNIV_MEM_DEBUG */
-	}
+	trx_start_if_not_started_xa(trx, true);
 
 	heap = mem_heap_create(512);
 
@@ -2349,12 +3118,12 @@ err_exit:
 		/* If the transaction was previously flagged as
 		TRX_DICT_OP_INDEX, we should be creating auxiliary
 		tables for full-text indexes. */
-		ut_ad(strstr(table->name, "/FTS_") != NULL);
+		ut_ad(strstr(table->name.m_name, "/FTS_") != NULL);
 	}
 
-	node = tab_create_graph_create(table, heap, commit, mode, key_id);
+	node = tab_create_graph_create(table, heap, mode, key_id);
 
-	thr = pars_complete_graph_for_exec(node, trx, heap);
+	thr = pars_complete_graph_for_exec(node, trx, heap, NULL);
 
 	ut_a(thr == que_fork_start_command(
 			static_cast<que_fork_t*>(que_node_get_parent(thr))));
@@ -2363,54 +3132,79 @@ err_exit:
 
 	err = trx->error_state;
 
-	if (table->space != TRX_SYS_SPACE) {
-		ut_a(DICT_TF2_FLAG_IS_SET(table, DICT_TF2_USE_TABLESPACE));
+	/* Update SYS_TABLESPACES and SYS_DATAFILES if a new file-per-table
+	tablespace was created. */
+	if (err == DB_SUCCESS && dict_table_is_file_per_table(table)) {
 
-		/* Update SYS_TABLESPACES and SYS_DATAFILES if a new
-		tablespace was created. */
-		if (err == DB_SUCCESS) {
-			char*	path;
-			path = fil_space_get_first_path(table->space);
+		ut_ad(dict_table_is_file_per_table(table));
 
-			err = dict_create_add_tablespace_to_dictionary(
-				table->space, table->name,
-				fil_space_get_flags(table->space),
-				path, trx, commit);
+		char*	path;
+		path = fil_space_get_first_path(table->space);
 
-			mem_free(path);
-		}
+		err = dict_replace_tablespace_in_dictionary(
+			table->space, table->name.m_name,
+			fil_space_get_flags(table->space),
+			path, trx, commit);
+
+			ut_free(path);
 
 		if (err != DB_SUCCESS) {
+
 			/* We must delete the link file. */
-			fil_delete_link_file(table->name);
+			RemoteDatafile::delete_link_file(table->name.m_name);
+
+		} else if (compression != NULL && compression[0] != '\0') {
+#ifdef MYSQL_COMPRESSION
+			ut_ad(!dict_table_in_shared_tablespace(table));
+
+			ut_ad(Compression::validate(compression) == DB_SUCCESS);
+
+			err = fil_set_compression(table, compression);
+
+			switch (err) {
+			case DB_SUCCESS:
+				break;
+			case DB_NOT_FOUND:
+			case DB_UNSUPPORTED:
+			case DB_IO_NO_PUNCH_HOLE_FS:
+				/* Return these errors */
+				break;
+			case DB_IO_NO_PUNCH_HOLE_TABLESPACE:
+				/* Page Compression will not be used. */
+				err = DB_SUCCESS;
+				break;
+			default:
+				ut_error;
+			}
+
+			/* We can check for file system punch hole support
+			only after creating the tablespace. On Windows
+			we can query that information but not on Linux. */
+			ut_ad(err == DB_SUCCESS
+				|| err == DB_IO_NO_PUNCH_HOLE_FS);
+#endif /* MYSQL_COMPRESSION */
+			
+			/* In non-strict mode we ignore dodgy compression
+			settings. */
 		}
 	}
 
 	switch (err) {
 	case DB_SUCCESS:
+	case DB_IO_NO_PUNCH_HOLE_FS:
 		break;
 	case DB_OUT_OF_FILE_SPACE:
 		trx->error_state = DB_SUCCESS;
 		trx_rollback_to_savepoint(trx, NULL);
 
-		ut_print_timestamp(stderr);
-		fputs("  InnoDB: Warning: cannot create table ",
-		      stderr);
-		ut_print_name(stderr, trx, TRUE, table->name);
-		fputs(" because tablespace full\n", stderr);
+		ib::warn() << "Cannot create table "
+			<< table->name
+			<< " because tablespace full";
 
-		if (dict_table_open_on_name(table->name, TRUE, FALSE,
+		if (dict_table_open_on_name(table->name.m_name, TRUE, FALSE,
 					    DICT_ERR_IGNORE_NONE)) {
 
-			/* Make things easy for the drop table code. */
-
-			if (table->can_be_evicted) {
-				dict_table_move_from_lru_to_non_lru(table);
-			}
-
-			dict_table_close(table, TRUE, FALSE);
-
-			row_drop_table_for_mysql(table->name, trx, FALSE, TRUE);
+			dict_table_close_and_drop(trx, table);
 
 			if (commit) {
 				trx_commit_for_mysql(trx);
@@ -2421,22 +3215,19 @@ err_exit:
 
 		break;
 
+	case DB_UNSUPPORTED:
 	case DB_TOO_MANY_CONCURRENT_TRXS:
 		/* We already have .ibd file here. it should be deleted. */
 
-		if (table->space
+		if (dict_table_is_file_per_table(table)
 		    && fil_delete_tablespace(
 			    table->space,
 			    BUF_REMOVE_FLUSH_NO_WRITE)
 		    != DB_SUCCESS) {
 
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				"  InnoDB: Error: not able to"
-				" delete tablespace %lu of table ",
-				(ulong) table->space);
-			ut_print_name(stderr, trx, TRUE, table->name);
-			fputs("!\n", stderr);
+			ib::error() << "Not able to delete tablespace "
+				<< table->space << " of table "
+				<< table->name << "!";
 		}
 		/* fall through */
 
@@ -2460,20 +3251,20 @@ err_exit:
 Does an index creation operation for MySQL. TODO: currently failure
 to create an index results in dropping the whole table! This is no problem
 currently as all indexes must be created at the same time as the table.
-@return	error number or DB_SUCCESS */
-UNIV_INTERN
+@return error number or DB_SUCCESS */
 dberr_t
 row_create_index_for_mysql(
 /*=======================*/
 	dict_index_t*	index,		/*!< in, own: index definition
 					(will be freed) */
 	trx_t*		trx,		/*!< in: transaction handle */
-	const ulint*	field_lengths)	/*!< in: if not NULL, must contain
+	const ulint*	field_lengths,	/*!< in: if not NULL, must contain
 					dict_index_get_n_fields(index)
 					actual field lengths for the
 					index columns, which are
 					then checked for not being too
 					large. */
+	dict_table_t*	handler)	/*!< in/out: table handler. */
 {
 	ind_node_t*	node;
 	mem_heap_t*	heap;
@@ -2483,14 +3274,9 @@ row_create_index_for_mysql(
 	ulint		len;
 	char*		table_name;
 	char*		index_name;
-	dict_table_t*	table;
+	dict_table_t*	table = NULL;
 	ibool		is_fts;
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-	ut_ad(mutex_own(&(dict_sys->mutex)));
-
 	trx->op_info = "creating index";
 
 	/* Copy the table name because we may want to drop the
@@ -2501,10 +3287,26 @@ row_create_index_for_mysql(
 
 	is_fts = (index->type == DICT_FTS);
 
-	table = dict_table_open_on_name(table_name, TRUE, TRUE,
-					DICT_ERR_IGNORE_NONE);
+	if (handler != NULL && dict_table_is_intrinsic(handler)) {
+		table = handler;
+	}
 
-	trx_start_if_not_started_xa(trx);
+	if (table == NULL) {
+
+		ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
+		ut_ad(mutex_own(&dict_sys->mutex));
+
+		table = dict_table_open_on_name(table_name, TRUE, TRUE,
+						DICT_ERR_IGNORE_NONE);
+
+	} else {
+		table->acquire();
+		ut_ad(dict_table_is_intrinsic(table));
+	}
+
+	if (!dict_table_is_temporary(table)) {
+		trx_start_if_not_started_xa(trx, true);
+	}
 
 	for (i = 0; i < index->n_def; i++) {
 		/* Check that prefix_len and actual length
@@ -2530,25 +3332,69 @@ row_create_index_for_mysql(
 		}
 	}
 
-	heap = mem_heap_create(512);
-
 	trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
 
-	/* Note that the space id where we store the index is inherited from
-	the table in dict_build_index_def_step() in dict0crea.cc. */
+	/* For temp-table we avoid insertion into SYSTEM TABLES to
+	maintain performance and so we have separate path that directly
+	just updates dictonary cache. */
+	if (!dict_table_is_temporary(table)) {
+		/* Note that the space id where we store the index is
+		inherited from the table in dict_build_index_def_step()
+		in dict0crea.cc. */
 
-	node = ind_create_graph_create(index, heap, true);
+		heap = mem_heap_create(512);
+		node = ind_create_graph_create(index, heap, NULL);
 
-	thr = pars_complete_graph_for_exec(node, trx, heap);
+		thr = pars_complete_graph_for_exec(node, trx, heap, NULL);
 
-	ut_a(thr == que_fork_start_command(
-			static_cast<que_fork_t*>(que_node_get_parent(thr))));
+		ut_a(thr == que_fork_start_command(
+				static_cast<que_fork_t*>(
+					que_node_get_parent(thr))));
 
-	que_run_threads(thr);
+		que_run_threads(thr);
 
-	err = trx->error_state;
+		err = trx->error_state;
 
-	que_graph_free((que_t*) que_node_get_parent(thr));
+		que_graph_free((que_t*) que_node_get_parent(thr));
+	} else {
+		dict_build_index_def(table, index, trx);
+
+		index_id_t index_id = index->id;
+
+		/* add index to dictionary cache and also free index object.
+		We allow instrinsic table to violate the size limits because
+		they are used by optimizer for all record formats. */
+		err = dict_index_add_to_cache(
+			table, index, FIL_NULL,
+			!dict_table_is_intrinsic(table)
+			&& trx_is_strict(trx));
+
+		if (err != DB_SUCCESS) {
+			goto error_handling;
+		}
+
+		/* as above function has freed index object re-load it
+		now from dictionary cache using index_id */
+		if (!dict_table_is_intrinsic(table)) {
+			index = dict_index_get_if_in_cache_low(index_id);
+		} else {
+			index = dict_table_find_index_on_id(table, index_id);
+
+			/* trx_id field is used for tracking which transaction
+			created the index. For intrinsic table this is
+			ir-relevant and so re-use it for tracking consistent
+			view while processing SELECT as part of UPDATE. */
+			index->trx_id = ULINT_UNDEFINED;
+		}
+		ut_a(index != NULL);
+		index->table = table;
+
+		err = dict_create_index_tree_in_mem(index, trx);
+
+		if (err != DB_SUCCESS && !dict_table_is_intrinsic(table)) {
+			dict_index_remove_from_cache(table, index);
+		}
+	}
 
 	/* Create the index specific FTS auxiliary tables. */
 	if (err == DB_SUCCESS && is_fts) {
@@ -2557,7 +3403,8 @@ row_create_index_for_mysql(
 		idx = dict_table_get_index_on_name(table, index_name);
 
 		ut_ad(idx);
-		err = fts_create_index_tables(trx, idx);
+		err = fts_create_index_tables_low(
+			trx, idx, table->name.m_name, table->id);
 	}
 
 error_handling:
@@ -2568,19 +3415,25 @@ error_handling:
 
 		trx->error_state = DB_SUCCESS;
 
-		trx_rollback_to_savepoint(trx, NULL);
+		if (trx_is_started(trx)) {
 
-		row_drop_table_for_mysql(table_name, trx, FALSE, TRUE);
+			trx_rollback_to_savepoint(trx, NULL);
+		}
 
-		trx_commit_for_mysql(trx);
+		row_drop_table_for_mysql(table_name, trx, FALSE, true, handler);
+
+		if (trx_is_started(trx)) {
+
+			trx_commit_for_mysql(trx);
+		}
 
 		trx->error_state = DB_SUCCESS;
 	}
 
 	trx->op_info = "";
 
-	mem_free(table_name);
-	mem_free(index_name);
+	ut_free(table_name);
+	ut_free(index_name);
 
 	return(err);
 }
@@ -2590,54 +3443,66 @@ Scans a table create SQL string and adds to the data dictionary
 the foreign key constraints declared in the string. This function
 should be called after the indexes for a table have been created.
 Each foreign key constraint must be accompanied with indexes in
-both participating tables. The indexes are allowed to contain more
-fields than mentioned in the constraint. Check also that foreign key
-constraints which reference this table are ok.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+bot participating tables. The indexes are allowed to contain more
+fields than mentioned in the constraint.
+
+@param[in]	trx		transaction
+@param[in]	sql_string	table create statement where
+				foreign keys are declared like:
+				FOREIGN KEY (a, b) REFERENCES table2(c, d),
+				table2 can be written also with the database
+				name before it: test.table2; the default
+				database id the database of parameter name
+@param[in]	sql_length	length of sql_string
+@param[in]	name		table full name in normalized form
+@param[in]	reject_fks	if TRUE, fail with error code
+				DB_CANNOT_ADD_CONSTRAINT if any
+				foreign keys are found.
+@return error code or DB_SUCCESS */
 dberr_t
 row_table_add_foreign_constraints(
-/*==============================*/
-	trx_t*		trx,		/*!< in: transaction */
-	const char*	sql_string,	/*!< in: table create statement where
-					foreign keys are declared like:
-				FOREIGN KEY (a, b) REFERENCES table2(c, d),
-					table2 can be written also with the
-					database name before it: test.table2 */
-	size_t		sql_length,	/*!< in: length of sql_string */
-	const char*	name,		/*!< in: table full name in the
-					normalized form
-					database_name/table_name */
-	ibool		reject_fks)	/*!< in: if TRUE, fail with error
-					code DB_CANNOT_ADD_CONSTRAINT if
-					any foreign keys are found. */
+	trx_t*			trx,
+	const char*		sql_string,
+	size_t			sql_length,
+	const char*		name,
+	ibool			reject_fks)
 {
 	dberr_t	err;
 
-	ut_ad(mutex_own(&(dict_sys->mutex)));
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	DBUG_ENTER("row_table_add_foreign_constraints");
+
+	ut_ad(mutex_own(&dict_sys->mutex));
+	ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
 	ut_a(sql_string);
 
 	trx->op_info = "adding foreign keys";
 
-	trx_start_if_not_started_xa(trx);
+	trx_start_if_not_started_xa(trx, true);
 
 	trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
 
-	err = dict_create_foreign_constraints(trx, sql_string, sql_length,
-					      name, reject_fks);
+	err = dict_create_foreign_constraints(
+		trx, sql_string, sql_length, name, reject_fks);
 
 	DBUG_EXECUTE_IF("ib_table_add_foreign_fail",
 			err = DB_DUPLICATE_KEY;);
 
 	DEBUG_SYNC_C("table_add_foreign_constraints");
 
+	/* Check like this shouldn't be done for table that doesn't
+	have foreign keys but code still continues to run with void action.
+	Disable it for intrinsic table at-least */
 	if (err == DB_SUCCESS) {
 		/* Check that also referencing constraints are ok */
+		dict_names_t	fk_tables;
 		err = dict_load_foreigns(name, NULL, false, true,
-					 DICT_ERR_IGNORE_NONE);
+					 DICT_ERR_IGNORE_NONE, fk_tables);
+
+		while (err == DB_SUCCESS && !fk_tables.empty()) {
+			dict_load_table(fk_tables.front(), true,
+					DICT_ERR_IGNORE_NONE);
+			fk_tables.pop_front();
+		}
 	}
 
 	if (err != DB_SUCCESS) {
@@ -2645,16 +3510,22 @@ row_table_add_foreign_constraints(
 
 		trx->error_state = DB_SUCCESS;
 
-		trx_rollback_to_savepoint(trx, NULL);
+		if (trx_is_started(trx)) {
 
-		row_drop_table_for_mysql(name, trx, FALSE, TRUE);
+			trx_rollback_to_savepoint(trx, NULL);
+		}
 
-		trx_commit_for_mysql(trx);
+		row_drop_table_for_mysql(name, trx, FALSE, true);
+
+		if (trx_is_started(trx)) {
+
+			trx_commit_for_mysql(trx);
+		}
 
 		trx->error_state = DB_SUCCESS;
 	}
 
-	return(err);
+	DBUG_RETURN(err);
 }
 
 /*********************************************************************//**
@@ -2664,7 +3535,7 @@ table before all handles to it has been removed. Furhermore, the MySQL's
 call to drop table must be non-blocking. Therefore we do the drop table
 as a background operation, which is taken care of by the master thread
 in srv0srv.cc.
-@return	error code or DB_SUCCESS */
+@return error code or DB_SUCCESS */
 static
 dberr_t
 row_drop_table_for_mysql_in_background(
@@ -2680,11 +3551,7 @@ row_drop_table_for_mysql_in_background(
 	foreign keys, we must set the following to be able to drop the
 	table: */
 
-	trx->check_foreigns = FALSE;
-
-	/*	fputs("InnoDB: Error: Dropping table ", stderr);
-	ut_print_name(stderr, trx, TRUE, name);
-	fputs(" in background drop list\n", stderr); */
+	trx->check_foreigns = false;
 
 	/* Try to drop the table in InnoDB */
 
@@ -2707,8 +3574,7 @@ row_drop_table_for_mysql_in_background(
 The master thread in srv0srv.cc calls this regularly to drop tables which
 we must drop in background after queries to them have ended. Such lazy
 dropping of tables is needed in ALTER TABLE on Unix.
-@return	how many tables dropped + remaining tables in list */
-UNIV_INTERN
+@return how many tables dropped + remaining tables in list */
 ulint
 row_drop_tables_for_mysql_in_background(void)
 /*=========================================*/
@@ -2761,18 +3627,17 @@ loop:
 already_dropped:
 	mutex_enter(&row_drop_list_mutex);
 
-	UT_LIST_REMOVE(row_mysql_drop_list, row_mysql_drop_list, drop);
+	UT_LIST_REMOVE(row_mysql_drop_list, drop);
 
 	MONITOR_DEC(MONITOR_BACKGROUND_DROP_TABLE);
 
-	ut_print_timestamp(stderr);
-	fputs("  InnoDB: Dropped table ", stderr);
-	ut_print_name(stderr, NULL, TRUE, drop->table_name);
-	fputs(" in background drop queue.\n", stderr);
+	ib::info() << "Dropped table "
+		<< ut_get_name(NULL, drop->table_name)
+		<< " in background drop queue.",
 
-	mem_free(drop->table_name);
+	ut_free(drop->table_name);
 
-	mem_free(drop);
+	ut_free(drop);
 
 	mutex_exit(&row_drop_list_mutex);
 
@@ -2782,8 +3647,7 @@ already_dropped:
 /*********************************************************************//**
 Get the background drop list length. NOTE: the caller must own the
 drop list mutex!
-@return	how many tables in list */
-UNIV_INTERN
+@return how many tables in list */
 ulint
 row_get_background_drop_list_len_low(void)
 /*======================================*/
@@ -2807,7 +3671,7 @@ which the master thread drops in background. We need this on Unix because in
 ALTER TABLE MySQL may call drop table even if the table has running queries on
 it. Also, if there are running foreign key checks on the table, we drop the
 table lazily.
-@return	TRUE if the table was not yet in the drop list, and was added there */
+@return TRUE if the table was not yet in the drop list, and was added there */
 static
 ibool
 row_add_table_to_background_drop_list(
@@ -2835,38 +3699,34 @@ row_add_table_to_background_drop_list(
 	}
 
 	drop = static_cast<row_mysql_drop_t*>(
-		mem_alloc(sizeof(row_mysql_drop_t)));
+		ut_malloc_nokey(sizeof(row_mysql_drop_t)));
 
 	drop->table_name = mem_strdup(name);
 
-	UT_LIST_ADD_LAST(row_mysql_drop_list, row_mysql_drop_list, drop);
+	UT_LIST_ADD_LAST(row_mysql_drop_list, drop);
 
 	MONITOR_INC(MONITOR_BACKGROUND_DROP_TABLE);
 
-	/*	fputs("InnoDB: Adding table ", stderr);
-	ut_print_name(stderr, trx, TRUE, drop->table_name);
-	fputs(" to background drop list\n", stderr); */
-
 	mutex_exit(&row_drop_list_mutex);
 
 	return(TRUE);
 }
 
-/*********************************************************************//**
-Reassigns the table identifier of a table.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+/** Reassigns the table identifier of a table.
+@param[in,out]	table	table
+@param[in,out]	trx	transaction
+@param[out]	new_id	new table id
+@return error code or DB_SUCCESS */
 dberr_t
 row_mysql_table_id_reassign(
-/*========================*/
-	dict_table_t*	table,	/*!< in/out: table */
-	trx_t*		trx,	/*!< in/out: transaction */
-	table_id_t*	new_id)	/*!< out: new table id */
+	dict_table_t*	table,
+	trx_t*		trx,
+	table_id_t*	new_id)
 {
 	dberr_t		err;
 	pars_info_t*	info	= pars_info_create();
 
-	dict_hdr_get_new_id(new_id, NULL, NULL);
+	dict_hdr_get_new_id(new_id, NULL, NULL, table, false);
 
 	/* Remove all locks except the table-level S and X locks. */
 	lock_remove_all_on_table(table, FALSE);
@@ -2884,6 +3744,8 @@ row_mysql_table_id_reassign(
 		" WHERE TABLE_ID = :old_id;\n"
 		"UPDATE SYS_INDEXES SET TABLE_ID = :new_id\n"
 		" WHERE TABLE_ID = :old_id;\n"
+		"UPDATE SYS_VIRTUAL SET TABLE_ID = :new_id\n"
+		" WHERE TABLE_ID = :old_id;\n"
 		"END;\n", FALSE, trx);
 
 	return(err);
@@ -2904,7 +3766,7 @@ row_discard_tablespace_begin(
 
 	trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
 
-	trx_start_if_not_started_xa(trx);
+	trx_start_if_not_started_xa(trx, true);
 
 	/* Serialize data dictionary operations with dictionary mutex:
 	this is to avoid deadlocks during data dictionary operations */
@@ -2918,7 +3780,7 @@ row_discard_tablespace_begin(
 
 	if (table) {
 		dict_stats_wait_bg_to_stop_using_table(table, trx);
-		ut_a(table->space != TRX_SYS_SPACE);
+		ut_a(!is_system_tablespace(table->space));
 		ut_a(table->n_foreign_key_checks_running == 0);
 	}
 
@@ -2967,10 +3829,10 @@ row_discard_tablespace_foreign_key_checks(
 	ut_print_timestamp(ef);
 
 	fputs("  Cannot DISCARD table ", ef);
-	ut_print_name(stderr, trx, TRUE, table->name);
+	ut_print_name(ef, trx, table->name.m_name);
 	fputs("\n"
 	      "because it is referenced by ", ef);
-	ut_print_name(stderr, trx, TRUE, foreign->foreign_table_name);
+	ut_print_name(ef, trx, foreign->foreign_table_name);
 	putc('\n', ef);
 
 	mutex_exit(&dict_foreign_err_mutex);
@@ -3050,16 +3912,16 @@ row_discard_tablespace(
 
 	table_id_t	new_id;
 
-	/* Set the TABLESPACE DISCARD flag in the table definition on disk. */
-
-	err = row_import_update_discarded_flag(trx, table->id, true, true);
+	/* Set the TABLESPACE DISCARD flag in the table definition
+	on disk. */
+	err = row_import_update_discarded_flag(
+		trx, table->id, true, true);
 
 	if (err != DB_SUCCESS) {
 		return(err);
 	}
 
 	/* Update the index root pages in the system tables, on disk */
-
 	err = row_import_update_index_root(trx, table, true, true);
 
 	if (err != DB_SUCCESS) {
@@ -3082,11 +3944,38 @@ row_discard_tablespace(
 		return(err);
 	}
 
+	/* For encrypted table, before we discard the tablespace,
+	we need save the encryption information into table, otherwise,
+	this information will be lost in fil_discard_tablespace along
+	with fil_space_free(). */
+	if (dict_table_is_encrypted(table)) {
+		ut_ad(table->encryption_key == NULL
+		      && table->encryption_iv == NULL);
+
+		table->encryption_key =
+			static_cast<byte*>(mem_heap_alloc(table->heap,
+							  ENCRYPTION_KEY_LEN));
+
+		table->encryption_iv =
+			static_cast<byte*>(mem_heap_alloc(table->heap,
+							  ENCRYPTION_KEY_LEN));
+
+		fil_space_t*	space = fil_space_get(table->space);
+		ut_ad(FSP_FLAGS_GET_ENCRYPTION(space->flags));
+
+		memcpy(table->encryption_key,
+		       space->encryption_key,
+		       ENCRYPTION_KEY_LEN);
+		memcpy(table->encryption_iv,
+		       space->encryption_iv,
+		       ENCRYPTION_KEY_LEN);
+	}
+
 	/* Discard the physical file that is used for the tablespace. */
 
 	err = fil_discard_tablespace(table->space);
 
-	switch(err) {
+	switch (err) {
 	case DB_SUCCESS:
 	case DB_IO_ERROR:
 	case DB_TABLESPACE_NOT_FOUND:
@@ -3133,8 +4022,7 @@ row_discard_tablespace(
 Discards the tablespace of a table which stored in an .ibd file. Discarding
 means that this function renames the .ibd file and assigns a new table id for
 the table. Also the flag table->ibd_file_missing is set to TRUE.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
 dberr_t
 row_discard_tablespace_for_mysql(
 /*=============================*/
@@ -3152,11 +4040,19 @@ row_discard_tablespace_for_mysql(
 		err = DB_TABLE_NOT_FOUND;
 	} else if (table->is_encrypted) {
 		err = DB_DECRYPTION_FAILED;
-	} else if (table->space == TRX_SYS_SPACE) {
+	} else if (dict_table_is_temporary(table)) {
+
+		ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+			    ER_CANNOT_DISCARD_TEMPORARY_TABLE);
+
+		err = DB_ERROR;
+
+	} else if (table->space == srv_sys_space.space_id()) {
 		char	table_name[MAX_FULL_NAME_LEN + 1];
 
 		innobase_format_name(
-			table_name, sizeof(table_name), table->name, FALSE);
+			table_name, sizeof(table_name),
+			table->name.m_name);
 
 		ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
 			    ER_TABLE_IN_SYSTEM_TABLESPACE, table_name);
@@ -3167,7 +4063,8 @@ row_discard_tablespace_for_mysql(
 		char	table_name[MAX_FULL_NAME_LEN + 1];
 
 		innobase_format_name(
-			table_name, sizeof(table_name), table->name, FALSE);
+			table_name, sizeof(table_name),
+			table->name.m_name);
 
 		ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
 			    ER_DISCARD_FK_CHECKS_RUNNING, table_name);
@@ -3189,8 +4086,7 @@ row_discard_tablespace_for_mysql(
 
 /*********************************************************************//**
 Sets an exclusive lock on a table.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
 dberr_t
 row_mysql_lock_table(
 /*=================*/
@@ -3212,7 +4108,7 @@ row_mysql_lock_table(
 	trx->op_info = op_info;
 
 	node = sel_node_create(heap);
-	thr = pars_complete_graph_for_exec(node, trx, heap);
+	thr = pars_complete_graph_for_exec(node, trx, heap, NULL);
 	thr->graph->state = QUE_FORK_ACTIVE;
 
 	/* We use the select query graph as the dummy graph needed
@@ -3277,681 +4173,253 @@ fil_wait_crypt_bg_threads(
 {
 	uint start = time(0);
 	uint last = start;
-
 	if (table->space != 0) {
 		fil_space_crypt_mark_space_closing(table->space);
 	}
 
-	while (table->n_ref_count > 0) {
+	while (table->get_ref_count()> 0) {
 		dict_mutex_exit_for_mysql();
 		os_thread_sleep(20000);
 		dict_mutex_enter_for_mysql();
 		uint now = time(0);
 		if (now >= last + 30) {
-			fprintf(stderr,
-				"WARNING: waited %u seconds "
-				"for ref-count on table: %s space: %u\n",
-				now - start, table->name, table->space);
+			ib::warn()
+				<< "Waited " << now - start
+				<< " seconds for ref-count on table: "
+				<< table->name.m_name << " space: " << table->space;
 			last = now;
 		}
-
 		if (now >= start + 300) {
-			fprintf(stderr,
-				"WARNING: after %u seconds, gave up waiting "
-				"for ref-count on table: %s space: %u\n",
-				now - start, table->name, table->space);
+			ib::warn()
+				<< "After " << now - start
+				<< " seconds, gave up waiting "
+				<< "for ref-count on table: " << table->name.m_name
+				<< " space: " << table->space;
 			break;
 		}
 	}
 }
-
-/*********************************************************************//**
-Truncates a table for MySQL.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+/** Drop ancillary FTS tables as part of dropping a table.
+@param[in,out]	table		Table cache entry
+@param[in,out]	trx		Transaction handle
+@return error code or DB_SUCCESS */
+UNIV_INLINE
 dberr_t
-row_truncate_table_for_mysql(
-/*=========================*/
-	dict_table_t*	table,	/*!< in: table handle */
-	trx_t*		trx)	/*!< in: transaction handle */
+row_drop_ancillary_fts_tables(
+	dict_table_t*	table,
+	trx_t*		trx)
 {
-	dberr_t		err;
-	mem_heap_t*	heap;
-	byte*		buf;
-	dtuple_t*	tuple;
-	dfield_t*	dfield;
-	dict_index_t*	sys_index;
-	btr_pcur_t	pcur;
-	mtr_t		mtr;
-	table_id_t	new_id;
-	ulint		recreate_space = 0;
-	pars_info_t*	info = NULL;
-	ibool		has_internal_doc_id;
-	ulint		old_space = table->space;
+	/* Drop ancillary FTS tables */
+	if (dict_table_has_fts_index(table)
+	    || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
 
-	/* How do we prevent crashes caused by ongoing operations on
-	the table? Old operations could try to access non-existent
-	pages.
+		ut_ad(table->get_ref_count() == 0);
+		ut_ad(trx_is_started(trx));
 
-	1) SQL queries, INSERT, SELECT, ...: we must get an exclusive
-	InnoDB table lock on the table before we can do TRUNCATE
-	TABLE. Then there are no running queries on the table.
-
-	2) Purge and rollback: we assign a new table id for the
-	table. Since purge and rollback look for the table based on
-	the table id, they see the table as 'dropped' and discard
-	their operations.
-
-	3) Insert buffer: TRUNCATE TABLE is analogous to DROP TABLE,
-	so we do not have to remove insert buffer records, as the
-	insert buffer works at a low level. If a freed page is later
-	reallocated, the allocator will remove the ibuf entries for
-	it.
-
-	When we truncate *.ibd files by recreating them (analogous to
-	DISCARD TABLESPACE), we remove all entries for the table in the
-	insert buffer tree.  This is not strictly necessary, because
-	in 6) we will assign a new tablespace identifier, but we can
-	free up some space in the system tablespace.
-
-	4) Linear readahead and random readahead: we use the same
-	method as in 3) to discard ongoing operations. (This is only
-	relevant for TRUNCATE TABLE by DISCARD TABLESPACE.)
-
-	5) FOREIGN KEY operations: if
-	table->n_foreign_key_checks_running > 0, we do not allow the
-	TRUNCATE. We also reserve the data dictionary latch.
-
-	6) Crash recovery: To prevent the application of pre-truncation
-	redo log records on the truncated tablespace, we will assign
-	a new tablespace identifier to the truncated tablespace. */
-
-	ut_ad(table);
-
-	if (dict_table_is_discarded(table)) {
-		return(DB_TABLESPACE_DELETED);
-	} else if (table->is_encrypted) {
-		return(DB_DECRYPTION_FAILED);
-	} else if (table->ibd_file_missing) {
-		return(DB_TABLESPACE_NOT_FOUND);
-	}
-
-	trx_start_for_ddl(trx, TRX_DICT_OP_TABLE);
-
-	trx->op_info = "truncating table";
-
-	/* Serialize data dictionary operations with dictionary mutex:
-	no deadlocks can occur then in these operations */
-
-	ut_a(trx->dict_operation_lock_mode == 0);
-	/* Prevent foreign key checks etc. while we are truncating the
-	table */
-	row_mysql_lock_data_dictionary(trx);
-
-	ut_ad(mutex_own(&(dict_sys->mutex)));
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
-	dict_stats_wait_bg_to_stop_using_table(table, trx);
-
-	/* Check if the table is referenced by foreign key constraints from
-	some other table (not the table itself) */
-
-	dict_foreign_set::iterator	it
-		= std::find_if(table->referenced_set.begin(),
-			       table->referenced_set.end(),
-			       dict_foreign_different_tables());
-
-	if (!srv_read_only_mode
-	    && it != table->referenced_set.end()
-	    && trx->check_foreigns) {
-
-		FILE*		ef	= dict_foreign_err_file;
-		dict_foreign_t*	foreign	= *it;
-
-		/* We only allow truncating a referenced table if
-		FOREIGN_KEY_CHECKS is set to 0 */
-
-		mutex_enter(&dict_foreign_err_mutex);
-		rewind(ef);
-		ut_print_timestamp(ef);
-
-		fputs("  Cannot truncate table ", ef);
-		ut_print_name(ef, trx, TRUE, table->name);
-		fputs(" by DROP+CREATE\n"
-		      "InnoDB: because it is referenced by ", ef);
-		ut_print_name(ef, trx, TRUE, foreign->foreign_table_name);
-		putc('\n', ef);
-		mutex_exit(&dict_foreign_err_mutex);
-
-		err = DB_ERROR;
-		goto funct_exit;
-	}
-
-	/* TODO: could we replace the counter n_foreign_key_checks_running
-	with lock checks on the table? Acquire here an exclusive lock on the
-	table, and rewrite lock0lock.cc and the lock wait in srv0srv.cc so that
-	they can cope with the table having been truncated here? Foreign key
-	checks take an IS or IX lock on the table. */
-
-	if (table->n_foreign_key_checks_running > 0) {
-		ut_print_timestamp(stderr);
-		fputs("  InnoDB: Cannot truncate table ", stderr);
-		ut_print_name(stderr, trx, TRUE, table->name);
-		fputs(" by DROP+CREATE\n"
-		      "InnoDB: because there is a foreign key check"
-		      " running on it.\n",
-		      stderr);
-		err = DB_ERROR;
-
-		goto funct_exit;
-	}
-
-	/* Check if memcached plugin is running on this table. if is, we don't
-	allow truncate this table. */
-	if (table->memcached_sync_count != 0) {
-		ut_print_timestamp(stderr);
-		fputs("  InnoDB: Cannot truncate table ", stderr);
-		ut_print_name(stderr, trx, TRUE, table->name);
-		fputs(" by DROP+CREATE\n"
-		      "InnoDB: because there are memcached operations"
-		      " running on it.\n",
-		      stderr);
-		err = DB_ERROR;
-
-		goto funct_exit;
-	} else {
-                /* We need to set this counter to -1 for blocking
-                memcached operations. */
-		table->memcached_sync_count = DICT_TABLE_IN_DDL;
-        }
-
-	/* Remove all locks except the table-level X lock. */
-
-	lock_remove_all_on_table(table, FALSE);
-
-	/* Ensure that the table will be dropped by
-	trx_rollback_active() in case of a crash. */
-
-	trx->table_id = table->id;
-	trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
-
-	/* Assign an undo segment for the transaction, so that the
-	transaction will be recovered after a crash. */
-
-	mutex_enter(&trx->undo_mutex);
-
-	err = trx_undo_assign_undo(trx, TRX_UNDO_UPDATE);
-
-	mutex_exit(&trx->undo_mutex);
-
-	if (err != DB_SUCCESS) {
-
-		goto funct_exit;
-	}
-
-	if (table->space && !DICT_TF2_FLAG_IS_SET(table, DICT_TF2_TEMPORARY)) {
-		/* Discard and create the single-table tablespace. */
-		fil_space_crypt_t* crypt_data;
-		ulint	space	= table->space;
-		ulint	flags	= fil_space_get_flags(space);
-		ulint	key_id  = FIL_DEFAULT_ENCRYPTION_KEY;
-		fil_encryption_t mode = FIL_SPACE_ENCRYPTION_DEFAULT;
-
-		dict_get_and_save_data_dir_path(table, true);
-		crypt_data = fil_space_get_crypt_data(space);
-
-		if (crypt_data) {
-			key_id = crypt_data->key_id;
-			mode = crypt_data->encryption;
-		}
-
-		if (flags != ULINT_UNDEFINED
-		    && fil_discard_tablespace(space) == DB_SUCCESS) {
-
-			dict_index_t*	index;
-
-			dict_hdr_get_new_id(NULL, NULL, &space);
-
-			/* Lock all index trees for this table. We must
-			do so after dict_hdr_get_new_id() to preserve
-			the latch order */
-			dict_table_x_lock_indexes(table);
-
-			if (space == ULINT_UNDEFINED
-			    || fil_create_new_single_table_tablespace(
-				    space, table->name,
-				    table->data_dir_path,
-				    flags, table->flags2,
-				    FIL_IBD_FILE_INITIAL_SIZE,
-				    mode, key_id)
-			    != DB_SUCCESS) {
-				dict_table_x_unlock_indexes(table);
-
-				ib_logf(IB_LOG_LEVEL_ERROR,
-					"TRUNCATE TABLE %s failed to "
-					"create a new tablespace",
-					table->name);
-
-				table->ibd_file_missing = 1;
-				err = DB_ERROR;
-				goto funct_exit;
-			}
-
-			recreate_space = space;
-
-			/* Replace the space_id in the data dictionary cache.
-			The persisent data dictionary (SYS_TABLES.SPACE
-			and SYS_INDEXES.SPACE) are updated later in this
-			function. */
-			table->space = space;
-			index = dict_table_get_first_index(table);
-			do {
-				index->space = space;
-				index = dict_table_get_next_index(index);
-			} while (index);
-
-			mtr_start_trx(&mtr, trx);
-			fsp_header_init(space,
-					FIL_IBD_FILE_INITIAL_SIZE, &mtr);
-			mtr_commit(&mtr);
-		}
-	} else {
-		/* Lock all index trees for this table, as we will
-		truncate the table/index and possibly change their metadata.
-		All DML/DDL are blocked by table level lock, with
-		a few exceptions such as queries into information schema
-		about the table, MySQL could try to access index stats
-		for this kind of query, we need to use index locks to
-		sync up */
-		dict_table_x_lock_indexes(table);
-	}
-
-	/* scan SYS_INDEXES for all indexes of the table */
-	heap = mem_heap_create(800);
-
-	tuple = dtuple_create(heap, 1);
-	dfield = dtuple_get_nth_field(tuple, 0);
-
-	buf = static_cast<byte*>(mem_heap_alloc(heap, 8));
-	mach_write_to_8(buf, table->id);
-
-	dfield_set_data(dfield, buf, 8);
-	sys_index = dict_table_get_first_index(dict_sys->sys_indexes);
-	dict_index_copy_types(tuple, sys_index, 1);
-
-	mtr_start_trx(&mtr, trx);
-	btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
-				  BTR_MODIFY_LEAF, &pcur, &mtr);
-	for (;;) {
-		rec_t*		rec;
-		const byte*	field;
-		ulint		len;
-		ulint		root_page_no;
-
-		if (!btr_pcur_is_on_user_rec(&pcur)) {
-			/* The end of SYS_INDEXES has been reached. */
-			break;
-		}
-
-		rec = btr_pcur_get_rec(&pcur);
-
-		field = rec_get_nth_field_old(
-			rec, DICT_FLD__SYS_INDEXES__TABLE_ID, &len);
-		ut_ad(len == 8);
-
-		if (memcmp(buf, field, len) != 0) {
-			/* End of indexes for the table (TABLE_ID mismatch). */
-			break;
-		}
-
-		if (rec_get_deleted_flag(rec, FALSE)) {
-			/* The index has been dropped. */
-			goto next_rec;
-		}
-
-		/* This call may commit and restart mtr
-		and reposition pcur. */
-		root_page_no = dict_truncate_index_tree(table, recreate_space,
-							&pcur, &mtr);
-
-		rec = btr_pcur_get_rec(&pcur);
-
-		if (root_page_no != FIL_NULL) {
-			page_rec_write_field(
-				rec, DICT_FLD__SYS_INDEXES__PAGE_NO,
-				root_page_no, &mtr);
-			/* We will need to commit and restart the
-			mini-transaction in order to avoid deadlocks.
-			The dict_truncate_index_tree() call has allocated
-			a page in this mini-transaction, and the rest of
-			this loop could latch another index page. */
-			mtr_commit(&mtr);
-			mtr_start_trx(&mtr, trx);
-			btr_pcur_restore_position(BTR_MODIFY_LEAF,
-						  &pcur, &mtr);
-		}
-
-next_rec:
-		btr_pcur_move_to_next_user_rec(&pcur, &mtr);
-	}
-
-	btr_pcur_close(&pcur);
-	mtr_commit(&mtr);
-
-	mem_heap_free(heap);
-	/* Done with index truncation, release index tree locks,
-	subsequent work relates to table level metadata change */
-	dict_table_x_unlock_indexes(table);
-
-	dict_hdr_get_new_id(&new_id, NULL, NULL);
-
-	/* Create new FTS auxiliary tables with the new_id, and
-	drop the old index later, only if everything runs successful. */
-	has_internal_doc_id = dict_table_has_fts_index(table)
-			      || DICT_TF2_FLAG_IS_SET(
-				table, DICT_TF2_FTS_HAS_DOC_ID);
-	if (has_internal_doc_id) {
-		dict_table_t	fts_table;
-		ulint		i;
-
-		fts_table.name = table->name;
-		fts_table.id = new_id;
-		fts_table.flags2 = table->flags2;
-
-		err = fts_create_common_tables(
-			trx, &fts_table, table->name, TRUE);
-
-		for (i = 0;
-		     i < ib_vector_size(table->fts->indexes)
-		     && err == DB_SUCCESS;
-		     i++) {
-
-			dict_index_t*	fts_index;
-
-			fts_index = static_cast<dict_index_t*>(
-				ib_vector_getp(table->fts->indexes, i));
-
-			err = fts_create_index_tables_low(
-				trx, fts_index, table->name, new_id);
-		}
+		dberr_t err = fts_drop_tables(trx, table);
 
 		if (err != DB_SUCCESS) {
-			trx->error_state = DB_SUCCESS;
-			trx_rollback_to_savepoint(trx, NULL);
-			trx->error_state = DB_SUCCESS;
-			ut_print_timestamp(stderr);
-			fputs("  InnoDB: Unable to truncate FTS index for"
-			      " table", stderr);
-			ut_print_name(stderr, trx, TRUE, table->name);
-			fputs("\n", stderr);
+			ib::error() << " Unable to remove ancillary FTS"
+				" tables for table "
+				<< table->name << " : " << ut_strerr(err);
 
-			goto funct_exit;
-		} else {
-			ut_ad(trx->state != TRX_STATE_NOT_STARTED);
+			return(err);
 		}
 	}
 
-	info = pars_info_create();
+	/* The table->fts flag can be set on the table for which
+	the cluster index is being rebuilt. Such table might not have
+	DICT_TF2_FTS flag set. So keep this out of above
+	dict_table_has_fts_index condition */
+	if (table->fts != NULL) {
+		/* Need to set TABLE_DICT_LOCKED bit, since
+		fts_que_graph_free_check_lock would try to acquire
+		dict mutex lock */
+		table->fts->fts_status |= TABLE_DICT_LOCKED;
 
-	pars_info_add_int4_literal(info, "new_space", (lint) table->space);
-	pars_info_add_ull_literal(info, "old_id", table->id);
-	pars_info_add_ull_literal(info, "new_id", new_id);
-
-	err = que_eval_sql(info,
-			   "PROCEDURE RENUMBER_TABLE_ID_PROC () IS\n"
-			   "BEGIN\n"
-			   "UPDATE SYS_TABLES"
-			   " SET ID = :new_id, SPACE = :new_space\n"
-			   " WHERE ID = :old_id;\n"
-			   "UPDATE SYS_COLUMNS SET TABLE_ID = :new_id\n"
-			   " WHERE TABLE_ID = :old_id;\n"
-			   "UPDATE SYS_INDEXES"
-			   " SET TABLE_ID = :new_id, SPACE = :new_space\n"
-			   " WHERE TABLE_ID = :old_id;\n"
-			   "END;\n"
-			   , FALSE, trx);
-
-	if (err == DB_SUCCESS && old_space != table->space) {
-		info = pars_info_create();
-
-		pars_info_add_int4_literal(info, "old_space", (lint) old_space);
-
-		pars_info_add_int4_literal(
-			info, "new_space", (lint) table->space);
-
-		err = que_eval_sql(info,
-				   "PROCEDURE RENUMBER_TABLESPACE_PROC () IS\n"
-				   "BEGIN\n"
-				   "UPDATE SYS_TABLESPACES"
-				   " SET SPACE = :new_space\n"
-				   " WHERE SPACE = :old_space;\n"
-				   "UPDATE SYS_DATAFILES"
-				   " SET SPACE = :new_space"
-				   " WHERE SPACE = :old_space;\n"
-				   "END;\n"
-				   , FALSE, trx);
+		fts_free(table);
 	}
-	DBUG_EXECUTE_IF("ib_ddl_crash_before_fts_truncate", err = DB_ERROR;);
 
-	if (err != DB_SUCCESS) {
-		trx->error_state = DB_SUCCESS;
-		trx_rollback_to_savepoint(trx, NULL);
-		trx->error_state = DB_SUCCESS;
+	return(DB_SUCCESS);
+}
 
-		/* Update system table failed.  Table in memory metadata
-		could be in an inconsistent state, mark the in-memory
-		table->corrupted to be true. In the long run, this should
-		be fixed by atomic truncate table */
-		table->corrupted = true;
+/** Drop a table from the memory cache as part of dropping a table.
+@param[in]	tablename	A copy of table->name. Used when table == null
+@param[in,out]	table		Table cache entry
+@param[in,out]	trx		Transaction handle
+@return error code or DB_SUCCESS */
+UNIV_INLINE
+dberr_t
+row_drop_table_from_cache(
+	const char*	tablename,
+	dict_table_t*	table,
+	trx_t*		trx)
+{
+	dberr_t	err = DB_SUCCESS;
+	bool	is_temp = dict_table_is_temporary(table);
 
-		ut_print_timestamp(stderr);
-		fputs("  InnoDB: Unable to assign a new identifier to table ",
-		      stderr);
-		ut_print_name(stderr, trx, TRUE, table->name);
-		fputs("\n"
-		      "InnoDB: after truncating it.  Background processes"
-		      " may corrupt the table!\n", stderr);
+	/* Remove the pointer to this table object from the list
+	of modified tables by the transaction because the object
+	is going to be destroyed below. */
+	trx->mod_tables.erase(table);
 
-		/* Failed to update the table id, so drop the new
-		FTS auxiliary tables */
-		if (has_internal_doc_id) {
-			ut_ad(trx->state == TRX_STATE_NOT_STARTED);
-
-			table_id_t	id = table->id;
-
-			table->id = new_id;
-
-			fts_drop_tables(trx, table);
-
-			table->id = id;
-
-			ut_ad(trx->state != TRX_STATE_NOT_STARTED);
-		}
-
-		err = DB_ERROR;
+	if (!dict_table_is_intrinsic(table)) {
+		dict_table_remove_from_cache(table);
 	} else {
-		/* Drop the old FTS index */
-		if (has_internal_doc_id) {
-			ut_ad(trx->state != TRX_STATE_NOT_STARTED);
-			fts_drop_tables(trx, table);
-			ut_ad(trx->state != TRX_STATE_NOT_STARTED);
+		for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
+		     index != NULL;
+		     index = UT_LIST_GET_FIRST(table->indexes)) {
+
+			rw_lock_free(&index->lock);
+
+			UT_LIST_REMOVE(table->indexes, index);
+
+			dict_mem_index_free(index);
 		}
 
-		DBUG_EXECUTE_IF("ib_truncate_crash_after_fts_drop",
-				DBUG_SUICIDE(););
-
-		dict_table_change_id_in_cache(table, new_id);
-
-		/* Reset the Doc ID in cache to 0 */
-		if (has_internal_doc_id && table->fts->cache) {
-			table->fts->fts_status |= TABLE_DICT_LOCKED;
-			fts_update_next_doc_id(trx, table, NULL, 0);
-			fts_cache_clear(table->fts->cache);
-			fts_cache_init(table->fts->cache);
-			table->fts->fts_status &= ~TABLE_DICT_LOCKED;
-		}
+		dict_mem_table_free(table);
+		table = NULL;
 	}
 
-	/* Reset auto-increment. */
-	dict_table_autoinc_lock(table);
-	dict_table_autoinc_initialize(table, 1);
-	dict_table_autoinc_unlock(table);
-
-	trx_commit_for_mysql(trx);
-
-funct_exit:
-
-        if (table->memcached_sync_count == DICT_TABLE_IN_DDL) {
-                /* We need to set the memcached sync back to 0, unblock
-                memcached operationse. */
-                table->memcached_sync_count = 0;
-        }
-
-	row_mysql_unlock_data_dictionary(trx);
-
-	dict_stats_update(table, DICT_STATS_EMPTY_TABLE);
-
-	trx->op_info = "";
-
-	srv_wake_master_thread();
+	if (!is_temp
+	    && dict_load_table(tablename, true,
+			       DICT_ERR_IGNORE_NONE) != NULL) {
+		ib::error() << "Not able to remove table "
+			<< ut_get_name(trx, tablename)
+			<< " from the dictionary cache!";
+		err = DB_ERROR;
+	}
 
 	return(err);
 }
 
-/*********************************************************************//**
-Drops a table for MySQL.  If the name of the dropped table ends in
-one of "innodb_monitor", "innodb_lock_monitor", "innodb_tablespace_monitor",
-"innodb_table_monitor", then this will also stop the printing of monitor
-output by the master thread.  If the data dictionary was not already locked
-by the transaction, the transaction will be committed.  Otherwise, the
-data dictionary will remain locked.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+/** Drop a single-table tablespace as part of dropping or renaming a table.
+This deletes the fil_space_t if found and the file on disk.
+@param[in]	space_id	Tablespace ID
+@param[in]	tablename	Table name, same as the tablespace name
+@param[in]	filepath	File path of tablespace to delete
+@param[in]	is_temp		Is this a temporary table/tablespace
+@param[in]	is_encrypted	Is this an encrypted table/tablespace
+@param[in]	trx		Transaction handle
+@return error code or DB_SUCCESS */
+UNIV_INLINE
+dberr_t
+row_drop_single_table_tablespace(
+	ulint		space_id,
+	const char*	tablename,
+	const char*	filepath,
+	bool		is_temp,
+	bool		is_encrypted,
+	trx_t*		trx)
+{
+	dberr_t	err = DB_SUCCESS;
+
+	/* This might be a temporary single-table tablespace if the table
+	is compressed and temporary. If so, don't spam the log when we
+	delete one of these or if we can't find the tablespace. */
+	bool	print_msg = !is_temp && !is_encrypted;
+
+	/* If the tablespace is not in the cache, just delete the file. */
+	if (!fil_space_for_table_exists_in_mem(
+			space_id, tablename, print_msg, false, NULL, 0, NULL)) {
+
+		/* Force a delete of any discarded or temporary files. */
+		fil_delete_file(filepath);
+
+		if (print_msg) {
+			ib::info() << "Removed datafile " << filepath
+				<< " for table " << tablename;
+		}
+	} else if (fil_delete_tablespace(space_id, BUF_REMOVE_FLUSH_NO_WRITE)
+		   != DB_SUCCESS) {
+
+		ib::error() << "We removed the InnoDB internal data"
+			" dictionary entry of table " << tablename
+			<< " but we are not able to delete the tablespace "
+			<< space_id << " file " << filepath << "!";
+
+		err = DB_ERROR;
+	}
+
+	return(err);
+}
+
+/** Drop a table for MySQL.
+If the data dictionary was not already locked by the transaction,
+the transaction will be committed.  Otherwise, the data dictionary
+will remain locked.
+@param[in]	name		Table name
+@param[in]	trx		Transaction handle
+@param[in]	drop_db		true=dropping whole database
+@param[in]	create_failed	TRUE=create table failed
+				because e.g. foreign key column
+@param[in]	nonatomic	Whether it is permitted to release
+				and reacquire dict_operation_lock
+@param[in,out]	handler		Table handler
+@return error code or DB_SUCCESS */
 dberr_t
 row_drop_table_for_mysql(
-/*=====================*/
-	const char*	name,	/*!< in: table name */
-	trx_t*		trx,	/*!< in: transaction handle */
-	bool		drop_db,/*!< in: true=dropping whole database */
-	ibool		create_failed,/*!<in: TRUE=create table failed
-				       because e.g. foreign key column
-				       type mismatch. */
-	bool		nonatomic)
-				/*!< in: whether it is permitted
-				to release and reacquire dict_operation_lock */
+	const char*	name,
+	trx_t*		trx,
+	bool		drop_db,
+	ibool		create_failed,
+	bool		nonatomic,
+	dict_table_t*	handler)
 {
 	dberr_t		err;
 	dict_foreign_t*	foreign;
-	dict_table_t*	table;
-	ibool		print_msg;
-	ulint		space_id;
-	char*		filepath = NULL;
-	const char*	tablename_minus_db;
-	char*		tablename =  NULL;
-	bool		ibd_file_missing;
-	ulint		namelen;
+	dict_table_t*	table			= NULL;
+	char*		filepath		= NULL;
+	char*		tablename		= NULL;
 	bool		locked_dictionary	= false;
 	pars_info_t*	info			= NULL;
 	mem_heap_t*	heap			= NULL;
+	bool		is_intrinsic_temp_table	= false;
 
 	DBUG_ENTER("row_drop_table_for_mysql");
-
-	DBUG_PRINT("row_drop_table_for_mysql", ("table: %s", name));
+	DBUG_PRINT("row_drop_table_for_mysql", ("table: '%s'", name));
 
 	ut_a(name != NULL);
 
-	/* The table name is prefixed with the database name and a '/'.
-	Certain table names starting with 'innodb_' have their special
-	meaning regardless of the database name.  Thus, we need to
-	ignore the database name prefix in the comparisons. */
-	tablename_minus_db = strchr(name, '/');
-
-	if (tablename_minus_db) {
-		tablename_minus_db++;
-	} else {
-		/* Ancillary FTS tables don't have '/' characters. */
-		tablename_minus_db = name;
-	}
-
-	namelen = strlen(tablename_minus_db) + 1;
-
-	if (namelen == sizeof S_innodb_monitor
-	    && !memcmp(tablename_minus_db, S_innodb_monitor,
-		       sizeof S_innodb_monitor)) {
-
-		/* Table name equals "innodb_monitor":
-		stop monitor prints */
-
-		srv_print_innodb_monitor = FALSE;
-		srv_print_innodb_lock_monitor = FALSE;
-	} else if (namelen == sizeof S_innodb_lock_monitor
-		   && !memcmp(tablename_minus_db, S_innodb_lock_monitor,
-			      sizeof S_innodb_lock_monitor)) {
-		srv_print_innodb_monitor = FALSE;
-		srv_print_innodb_lock_monitor = FALSE;
-	} else if (namelen == sizeof S_innodb_tablespace_monitor
-		   && !memcmp(tablename_minus_db, S_innodb_tablespace_monitor,
-			      sizeof S_innodb_tablespace_monitor)) {
-
-		srv_print_innodb_tablespace_monitor = FALSE;
-	} else if (namelen == sizeof S_innodb_table_monitor
-		   && !memcmp(tablename_minus_db, S_innodb_table_monitor,
-			      sizeof S_innodb_table_monitor)) {
-
-		srv_print_innodb_table_monitor = FALSE;
-	}
-
 	/* Serialize data dictionary operations with dictionary mutex:
 	no deadlocks can occur then in these operations */
 
 	trx->op_info = "dropping table";
 
-	/* This function is called recursively via fts_drop_tables(). */
-	if (trx->state == TRX_STATE_NOT_STARTED) {
-		trx_start_for_ddl(trx, TRX_DICT_OP_TABLE);
+	if (handler != NULL && dict_table_is_intrinsic(handler)) {
+		table = handler;
+		is_intrinsic_temp_table = true;
 	}
 
-	if (trx->dict_operation_lock_mode != RW_X_LATCH) {
-		/* Prevent foreign key checks etc. while we are dropping the
-		table */
+	if (table == NULL) {
 
-		row_mysql_lock_data_dictionary(trx);
+		if (trx->dict_operation_lock_mode != RW_X_LATCH) {
+			/* Prevent foreign key checks etc. while we are
+			dropping the table */
 
-		locked_dictionary = true;
-		nonatomic = true;
+			row_mysql_lock_data_dictionary(trx);
+
+			locked_dictionary = true;
+			nonatomic = true;
+		}
+
+		ut_ad(mutex_own(&dict_sys->mutex));
+		ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
+
+		table = dict_table_open_on_name(
+			name, TRUE, FALSE,
+			static_cast<dict_err_ignore_t>(
+				DICT_ERR_IGNORE_INDEX_ROOT
+				| DICT_ERR_IGNORE_CORRUPT));
+	} else {
+		table->acquire();
+		ut_ad(dict_table_is_intrinsic(table));
 	}
 
-	ut_ad(mutex_own(&(dict_sys->mutex)));
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
-	table = dict_table_open_on_name(
-		name, TRUE, FALSE,
-		static_cast<dict_err_ignore_t>(
-			DICT_ERR_IGNORE_INDEX_ROOT | DICT_ERR_IGNORE_CORRUPT));
-
 	if (!table) {
 		err = DB_TABLE_NOT_FOUND;
-		ut_print_timestamp(stderr);
-
-		fputs("  InnoDB: Error: table ", stderr);
-		ut_print_name(stderr, trx, TRUE, name);
-		fputs(" does not exist in the InnoDB internal\n"
-		      "InnoDB: data dictionary though MySQL is"
-		      " trying to drop it.\n"
-		      "InnoDB: Have you copied the .frm file"
-		      " of the table to the\n"
-		      "InnoDB: MySQL database directory"
-		      " from another database?\n"
-		      "InnoDB: You can look for further help from\n"
-		      "InnoDB: " REFMAN "innodb-troubleshooting.html\n",
-		      stderr);
 		goto funct_exit;
 	}
-
 	/* If table is encrypted and table page encryption failed
 	return error. */
 	if (table->is_encrypted) {
@@ -3965,6 +4433,16 @@ row_drop_table_for_mysql(
 		goto funct_exit;
 	}
 
+	/* This function is called recursively via fts_drop_tables(). */
+	if (!trx_is_started(trx)) {
+
+		if (!dict_table_is_temporary(table)) {
+			trx_start_for_ddl(trx, TRX_DICT_OP_TABLE);
+		} else {
+			trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
+		}
+	}
+
 	/* Turn on this drop bit before we could release the dictionary
 	latch */
 	table->to_be_dropped = true;
@@ -3996,7 +4474,7 @@ row_drop_table_for_mysql(
 
 	/* Delete the link file if used. */
 	if (DICT_TF_HAS_DATA_DIR(table->flags)) {
-		fil_delete_link_file(name);
+		RemoteDatafile::delete_link_file(name);
 	}
 
 	if (!dict_table_is_temporary(table)) {
@@ -4013,15 +4491,12 @@ row_drop_table_for_mysql(
 		err = dict_stats_drop_table(name, errstr, sizeof(errstr));
 
 		if (err != DB_SUCCESS) {
-			ib_logf(IB_LOG_LEVEL_WARN, "%s", errstr);
+			ib::warn() << errstr;
 		}
 	}
 
-	/* Move the table the the non-LRU list so that it isn't
-	considered for eviction. */
-
-	if (table->can_be_evicted) {
-		dict_table_move_from_lru_to_non_lru(table);
+	if (!dict_table_is_intrinsic(table)) {
+		dict_table_prevent_eviction(table);
 	}
 
 	dict_table_close(table, TRUE, FALSE);
@@ -4062,10 +4537,10 @@ row_drop_table_for_mysql(
 				ut_print_timestamp(ef);
 
 				fputs("  Cannot drop table ", ef);
-				ut_print_name(ef, trx, TRUE, name);
+				ut_print_name(ef, trx, name);
 				fputs("\n"
 				      "because it is referenced by ", ef);
-				ut_print_name(ef, trx, TRUE,
+				ut_print_name(ef, trx,
 					      foreign->foreign_table_name);
 				putc('\n', ef);
 				mutex_exit(&dict_foreign_err_mutex);
@@ -4083,22 +4558,17 @@ row_drop_table_for_mysql(
 
 	if (table->n_foreign_key_checks_running > 0) {
 
-		const char*	save_tablename = table->name;
+		const char*	save_tablename = table->name.m_name;
 		ibool		added;
 
 		added = row_add_table_to_background_drop_list(save_tablename);
 
 		if (added) {
-			ut_print_timestamp(stderr);
-			fputs("  InnoDB: You are trying to drop table ",
-			      stderr);
-			ut_print_name(stderr, trx, TRUE, save_tablename);
-			fputs("\n"
-			      "InnoDB: though there is a"
-			      " foreign key check running on it.\n"
-			      "InnoDB: Adding the table to"
-			      " the background drop queue.\n",
-			      stderr);
+			ib::info() << "You are trying to drop table "
+				<< table->name
+				<< " though there is a foreign key check"
+				" running on it. Adding the table to the"
+				" background drop queue.";
 
 			/* We return DB_SUCCESS to MySQL though the drop will
 			happen lazily later */
@@ -4113,7 +4583,7 @@ row_drop_table_for_mysql(
 	}
 
 	/* Remove all locks that are on the table or its records, if there
-	are no refernces to the table but it has record locks, we release
+	are no references to the table but it has record locks, we release
 	the record locks unconditionally. One use case is:
 
 		CREATE TABLE t2 (PRIMARY KEY (a)) SELECT * FROM t1;
@@ -4129,25 +4599,26 @@ row_drop_table_for_mysql(
 	/* Wait on background threads to stop using table */
 	fil_wait_crypt_bg_threads(table);
 
-	if (table->n_ref_count == 0) {
-		lock_remove_all_on_table(table, TRUE);
+	if (table->get_ref_count() == 0) {
+		/* We don't take lock on intrinsic table so nothing to remove.*/
+		if (!dict_table_is_intrinsic(table)) {
+			lock_remove_all_on_table(table, TRUE);
+		}
 		ut_a(table->n_rec_locks == 0);
-	} else if (table->n_ref_count > 0 || table->n_rec_locks > 0) {
+	} else if (table->get_ref_count() > 0 || table->n_rec_locks > 0) {
 		ibool	added;
 
-		added = row_add_table_to_background_drop_list(table->name);
+		ut_ad(!dict_table_is_intrinsic(table));
+
+		added = row_add_table_to_background_drop_list(
+			table->name.m_name);
 
 		if (added) {
-			ut_print_timestamp(stderr);
-			fputs("  InnoDB: Warning: MySQL is"
-			      " trying to drop table ", stderr);
-			ut_print_name(stderr, trx, TRUE, table->name);
-			fputs("\n"
-			      "InnoDB: though there are still"
-			      " open handles to it.\n"
-			      "InnoDB: Adding the table to the"
-			      " background drop queue.\n",
-			      stderr);
+			ib::info() << "MySQL is trying to drop table "
+				<< table->name
+				<< " though there are still open handles to"
+				" it. Adding the table to the background drop"
+				" queue.";
 
 			/* We return DB_SUCCESS to MySQL though the drop will
 			happen lazily later */
@@ -4170,7 +4641,7 @@ row_drop_table_for_mysql(
 	/* If we get this far then the table to be dropped must not have
 	any table or record locks on it. */
 
-	ut_a(!lock_table_has_locks(table));
+	ut_a(dict_table_is_intrinsic(table) || !lock_table_has_locks(table));
 
 	switch (trx_get_dict_operation(trx)) {
 	case TRX_DICT_OP_NONE:
@@ -4181,8 +4652,10 @@ row_drop_table_for_mysql(
 	case TRX_DICT_OP_INDEX:
 		/* If the transaction was previously flagged as
 		TRX_DICT_OP_INDEX, we should be dropping auxiliary
-		tables for full-text indexes. */
-		ut_ad(strstr(table->name, "/FTS_") != NULL);
+		tables for full-text indexes or temp tables. */
+		ut_ad(strstr(table->name.m_name, "/FTS_") != NULL
+		      || strstr(table->name.m_name, TEMP_FILE_PREFIX_INNODB)
+		      != NULL);
 	}
 
 	/* Mark all indexes unavailable in the data dictionary cache
@@ -4211,242 +4684,220 @@ row_drop_table_for_mysql(
 		rw_lock_x_unlock(dict_index_get_lock(index));
 	}
 
-	/* We use the private SQL parser of Innobase to generate the
-	query graphs needed in deleting the dictionary data from system
-	tables in Innobase. Deleting a row from SYS_INDEXES table also
-	frees the file segments of the B-tree associated with the index. */
+	/* As we don't insert entries to SYSTEM TABLES for temp-tables
+	we need to avoid running removal of these entries. */
+	if (!dict_table_is_temporary(table)) {
+		/* We use the private SQL parser of Innobase to generate the
+		query graphs needed in deleting the dictionary data from system
+		tables in Innobase. Deleting a row from SYS_INDEXES table also
+		frees the file segments of the B-tree associated with the
+		index. */
 
-	info = pars_info_create();
+		info = pars_info_create();
 
-	pars_info_add_str_literal(info, "table_name", name);
+		pars_info_add_str_literal(info, "table_name", name);
 
-	err = que_eval_sql(info,
-			   "PROCEDURE DROP_TABLE_PROC () IS\n"
-			   "sys_foreign_id CHAR;\n"
-			   "table_id CHAR;\n"
-			   "index_id CHAR;\n"
-			   "foreign_id CHAR;\n"
-			   "space_id INT;\n"
-			   "found INT;\n"
+		std::basic_string<char, std::char_traits<char>,
+				  ut_allocator<char> > sql;
+		sql.reserve(2000);
 
-			   "DECLARE CURSOR cur_fk IS\n"
-			   "SELECT ID FROM SYS_FOREIGN\n"
-			   "WHERE FOR_NAME = :table_name\n"
-			   "AND TO_BINARY(FOR_NAME)\n"
-			   "  = TO_BINARY(:table_name)\n"
-			   "LOCK IN SHARE MODE;\n"
+		sql =	"PROCEDURE DROP_TABLE_PROC () IS\n"
+			"sys_foreign_id CHAR;\n"
+			"table_id CHAR;\n"
+			"index_id CHAR;\n"
+			"foreign_id CHAR;\n"
+			"space_id INT;\n"
+			"found INT;\n";
 
-			   "DECLARE CURSOR cur_idx IS\n"
-			   "SELECT ID FROM SYS_INDEXES\n"
-			   "WHERE TABLE_ID = table_id\n"
-			   "LOCK IN SHARE MODE;\n"
+		sql +=	"DECLARE CURSOR cur_fk IS\n"
+			"SELECT ID FROM SYS_FOREIGN\n"
+			"WHERE FOR_NAME = :table_name\n"
+			"AND TO_BINARY(FOR_NAME)\n"
+			"  = TO_BINARY(:table_name)\n"
+			"LOCK IN SHARE MODE;\n";
 
-			   "BEGIN\n"
-			   "SELECT ID INTO table_id\n"
-			   "FROM SYS_TABLES\n"
-			   "WHERE NAME = :table_name\n"
-			   "LOCK IN SHARE MODE;\n"
-			   "IF (SQL % NOTFOUND) THEN\n"
-			   "       RETURN;\n"
-			   "END IF;\n"
-			   "SELECT SPACE INTO space_id\n"
-			   "FROM SYS_TABLES\n"
-			   "WHERE NAME = :table_name;\n"
-			   "IF (SQL % NOTFOUND) THEN\n"
-			   "       RETURN;\n"
-			   "END IF;\n"
-			   "found := 1;\n"
-			   "SELECT ID INTO sys_foreign_id\n"
-			   "FROM SYS_TABLES\n"
-			   "WHERE NAME = 'SYS_FOREIGN'\n"
-			   "LOCK IN SHARE MODE;\n"
-			   "IF (SQL % NOTFOUND) THEN\n"
-			   "       found := 0;\n"
-			   "END IF;\n"
-			   "IF (:table_name = 'SYS_FOREIGN') THEN\n"
-			   "       found := 0;\n"
-			   "END IF;\n"
-			   "IF (:table_name = 'SYS_FOREIGN_COLS') THEN\n"
-			   "       found := 0;\n"
-			   "END IF;\n"
-			   "OPEN cur_fk;\n"
-			   "WHILE found = 1 LOOP\n"
-			   "       FETCH cur_fk INTO foreign_id;\n"
-			   "       IF (SQL % NOTFOUND) THEN\n"
-			   "               found := 0;\n"
-			   "       ELSE\n"
-			   "               DELETE FROM SYS_FOREIGN_COLS\n"
-			   "               WHERE ID = foreign_id;\n"
-			   "               DELETE FROM SYS_FOREIGN\n"
-			   "               WHERE ID = foreign_id;\n"
-			   "       END IF;\n"
-			   "END LOOP;\n"
-			   "CLOSE cur_fk;\n"
-			   "found := 1;\n"
-			   "OPEN cur_idx;\n"
-			   "WHILE found = 1 LOOP\n"
-			   "       FETCH cur_idx INTO index_id;\n"
-			   "       IF (SQL % NOTFOUND) THEN\n"
-			   "               found := 0;\n"
-			   "       ELSE\n"
-			   "               DELETE FROM SYS_FIELDS\n"
-			   "               WHERE INDEX_ID = index_id;\n"
-			   "               DELETE FROM SYS_INDEXES\n"
-			   "               WHERE ID = index_id\n"
-			   "               AND TABLE_ID = table_id;\n"
-			   "       END IF;\n"
-			   "END LOOP;\n"
-			   "CLOSE cur_idx;\n"
-			   "DELETE FROM SYS_TABLESPACES\n"
-			   "WHERE SPACE = space_id;\n"
-			   "DELETE FROM SYS_DATAFILES\n"
-			   "WHERE SPACE = space_id;\n"
-			   "DELETE FROM SYS_COLUMNS\n"
-			   "WHERE TABLE_ID = table_id;\n"
-			   "DELETE FROM SYS_TABLES\n"
-			   "WHERE NAME = :table_name;\n"
-			   "END;\n"
-			   , FALSE, trx);
+		sql +=	"DECLARE CURSOR cur_idx IS\n"
+			"SELECT ID FROM SYS_INDEXES\n"
+			"WHERE TABLE_ID = table_id\n"
+			"LOCK IN SHARE MODE;\n";
+
+		sql +=	"BEGIN\n";
+
+		sql +=	"SELECT ID INTO table_id\n"
+			"FROM SYS_TABLES\n"
+			"WHERE NAME = :table_name\n"
+			"LOCK IN SHARE MODE;\n"
+			"IF (SQL % NOTFOUND) THEN\n"
+			"       RETURN;\n"
+			"END IF;\n";
+
+		sql +=	"SELECT SPACE INTO space_id\n"
+			"FROM SYS_TABLES\n"
+			"WHERE NAME = :table_name;\n"
+			"IF (SQL % NOTFOUND) THEN\n"
+			"       RETURN;\n"
+			"END IF;\n";
+
+		sql +=	"found := 1;\n"
+			"SELECT ID INTO sys_foreign_id\n"
+			"FROM SYS_TABLES\n"
+			"WHERE NAME = 'SYS_FOREIGN'\n"
+			"LOCK IN SHARE MODE;\n"
+			"IF (SQL % NOTFOUND) THEN\n"
+			"       found := 0;\n"
+			"END IF;\n"
+			"IF (:table_name = 'SYS_FOREIGN') THEN\n"
+			"       found := 0;\n"
+			"END IF;\n"
+			"IF (:table_name = 'SYS_FOREIGN_COLS') \n"
+			"THEN\n"
+			"       found := 0;\n"
+			"END IF;\n";
+
+		sql +=	"OPEN cur_fk;\n"
+			"WHILE found = 1 LOOP\n"
+			"       FETCH cur_fk INTO foreign_id;\n"
+			"       IF (SQL % NOTFOUND) THEN\n"
+			"               found := 0;\n"
+			"       ELSE\n"
+			"               DELETE FROM \n"
+			"		   SYS_FOREIGN_COLS\n"
+			"               WHERE ID = foreign_id;\n"
+			"               DELETE FROM SYS_FOREIGN\n"
+			"               WHERE ID = foreign_id;\n"
+			"       END IF;\n"
+			"END LOOP;\n"
+			"CLOSE cur_fk;\n";
+
+		sql +=	"found := 1;\n"
+			"OPEN cur_idx;\n"
+			"WHILE found = 1 LOOP\n"
+			"       FETCH cur_idx INTO index_id;\n"
+			"       IF (SQL % NOTFOUND) THEN\n"
+			"               found := 0;\n"
+			"       ELSE\n"
+			"               DELETE FROM SYS_FIELDS\n"
+			"               WHERE INDEX_ID = index_id;\n"
+			"               DELETE FROM SYS_INDEXES\n"
+			"               WHERE ID = index_id\n"
+			"               AND TABLE_ID = table_id;\n"
+			"       END IF;\n"
+			"END LOOP;\n"
+			"CLOSE cur_idx;\n";
+
+		sql +=	"DELETE FROM SYS_COLUMNS\n"
+			"WHERE TABLE_ID = table_id;\n"
+			"DELETE FROM SYS_TABLES\n"
+			"WHERE NAME = :table_name;\n";
+
+		if (dict_table_is_file_per_table(table)) {
+			sql += "DELETE FROM SYS_TABLESPACES\n"
+				"WHERE SPACE = space_id;\n"
+				"DELETE FROM SYS_DATAFILES\n"
+				"WHERE SPACE = space_id;\n";
+		}
+
+		sql +=	"DELETE FROM SYS_VIRTUAL\n"
+			"WHERE TABLE_ID = table_id;\n";
+
+		sql += "END;\n";
+
+		err = que_eval_sql(info, sql.c_str(), FALSE, trx);
+	} else {
+		page_no = page_nos;
+		for (dict_index_t* index = dict_table_get_first_index(table);
+		     index != NULL;
+		     index = dict_table_get_next_index(index)) {
+			/* remove the index object associated. */
+			dict_drop_index_tree_in_mem(index, *page_no++);
+		}
+		err = DB_SUCCESS;
+	}
 
 	switch (err) {
-		ibool	is_temp;
+		ulint	space_id;
+		bool	is_temp;
+		bool	is_encrypted;
+		bool	ibd_file_missing;
+		bool	is_discarded;
+		bool	shared_tablespace;
 
 	case DB_SUCCESS:
-		/* Clone the name, in case it has been allocated
-		from table->heap, which will be freed by
-		dict_table_remove_from_cache(table) below. */
 		space_id = table->space;
 		ibd_file_missing = table->ibd_file_missing;
-
-		is_temp = DICT_TF2_FLAG_IS_SET(table, DICT_TF2_TEMPORARY);
+		is_discarded = dict_table_is_discarded(table);
+		is_temp = dict_table_is_temporary(table);
+		is_encrypted = dict_table_is_encrypted(table);
+		shared_tablespace = DICT_TF_HAS_SHARED_SPACE(table->flags);
 
 		/* If there is a temp path then the temp flag is set.
-		However, during recovery or reloading the table object
-		after eviction from data dictionary cache, we might
-		have a temp flag but not know the temp path */
+		However, during recovery, we might have a temp flag but
+		not know the temp path */
 		ut_a(table->dir_path_of_temp_table == NULL || is_temp);
-		if (dict_table_is_discarded(table)
-		    || table->ibd_file_missing) {
-			/* Do not attempt to drop known-to-be-missing
-			tablespaces. */
-			space_id = 0;
-		}
 
 		/* We do not allow temporary tables with a remote path. */
 		ut_a(!(is_temp && DICT_TF_HAS_DATA_DIR(table->flags)));
 
-		if (space_id && DICT_TF_HAS_DATA_DIR(table->flags)) {
-			dict_get_and_save_data_dir_path(table, true);
+		/* Make sure the data_dir_path is set if needed. */
+		dict_get_and_save_data_dir_path(table, true);
+
+		err = row_drop_ancillary_fts_tables(table, trx);
+		if (err != DB_SUCCESS) {
+			break;
+		}
+
+		/* Determine the tablespace filename before we drop
+		dict_table_t.  Free this memory before returning. */
+		if (DICT_TF_HAS_DATA_DIR(table->flags)) {
 			ut_a(table->data_dir_path);
-
-			filepath = os_file_make_remote_pathname(
-				table->data_dir_path, table->name, "ibd");
+			filepath = fil_make_filepath(
+				table->data_dir_path,
+				table->name.m_name, IBD, true);
 		} else if (table->dir_path_of_temp_table) {
-			filepath = fil_make_ibd_name(
-				table->dir_path_of_temp_table, true);
-		} else {
-			filepath = fil_make_ibd_name(tablename, false);
+			filepath = fil_make_filepath(
+				table->dir_path_of_temp_table,
+				NULL, IBD, false);
+		} else if (!shared_tablespace) {
+			filepath = fil_make_filepath(
+				NULL, table->name.m_name, IBD, false);
 		}
 
-		if (dict_table_has_fts_index(table)
-		    || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
-			ut_ad(table->n_ref_count == 0);
-			ut_ad(trx->state != TRX_STATE_NOT_STARTED);
-			err = fts_drop_tables(trx, table);
+		/* Free the dict_table_t object. */
+		err = row_drop_table_from_cache(tablename, table, trx);
+		if (err != DB_SUCCESS) {
+			break;
+		}
 
-			if (err != DB_SUCCESS) {
-				ut_print_timestamp(stderr);
-				fprintf(stderr," InnoDB: Error: (%s) not "
-					"able to remove ancillary FTS tables "
-					"for table ", ut_strerr(err));
-				ut_print_name(stderr, trx, TRUE, tablename);
-				fputs("\n", stderr);
-
-				goto funct_exit;
+		/* Do not attempt to drop known-to-be-missing tablespaces,
+		nor system or shared general tablespaces. */
+		if (is_discarded || ibd_file_missing || shared_tablespace
+		    || is_system_tablespace(space_id)) {
+			/* For encrypted table, if ibd file can not be decrypt,
+			we also set ibd_file_missing. We still need to try to
+			remove the ibd file for this. */
+			if (is_discarded || !is_encrypted
+			    || !ibd_file_missing) {
+				break;
 			}
 		}
 
-		/* The table->fts flag can be set on the table for which
-		the cluster index is being rebuilt. Such table might not have
-		DICT_TF2_FTS flag set. So keep this out of above
-		dict_table_has_fts_index condition */
-		if (table->fts) {
-			/* Need to set TABLE_DICT_LOCKED bit, since
-			fts_que_graph_free_check_lock would try to acquire
-			dict mutex lock */
-			table->fts->fts_status |= TABLE_DICT_LOCKED;
-
-			fts_free(table);
+#ifdef MYSQL_ENCRYPTION
+		if (is_encrypted) {
+			/* Require the mutex to block key rotation. */
+			mutex_enter(&master_key_id_mutex);
 		}
+#endif /* MYSQL_ENCRYPTION */
 
-		dict_table_remove_from_cache(table);
+		/* We can now drop the single-table tablespace. */
+		err = row_drop_single_table_tablespace(
+			space_id, tablename, filepath,
+			is_temp, is_encrypted, trx);
 
-		if (dict_load_table(tablename, TRUE,
-				    DICT_ERR_IGNORE_NONE) != NULL) {
-			ut_print_timestamp(stderr);
-			fputs("  InnoDB: Error: not able to remove table ",
-			      stderr);
-			ut_print_name(stderr, trx, TRUE, tablename);
-			fputs(" from the dictionary cache!\n", stderr);
-			err = DB_ERROR;
+#ifdef MYSQL_ENCRYPTION
+		if (is_encrypted) {
+			mutex_exit(&master_key_id_mutex);
 		}
-
-		/* Do not drop possible .ibd tablespace if something went
-		wrong: we do not want to delete valuable data of the user */
-
-		/* Don't spam the log if we can't find the tablespace of
-		a temp table or if the tablesace has been discarded. */
-		print_msg = !(is_temp || ibd_file_missing);
-
-		if (err == DB_SUCCESS && space_id > TRX_SYS_SPACE) {
-			if (!is_temp
-			    && !fil_space_for_table_exists_in_mem(
-					space_id, tablename, FALSE,
-					print_msg, false, NULL, 0)) {
-				/* This might happen if we are dropping a
-				discarded tablespace */
-				err = DB_SUCCESS;
-
-				if (print_msg) {
-					char msg_tablename[MAX_FULL_NAME_LEN + 1];
-
-					innobase_format_name(
-						msg_tablename, sizeof(tablename),
-						tablename, FALSE);
-
-					ib_logf(IB_LOG_LEVEL_INFO,
-						"Removed the table %s from "
-						"InnoDB's data dictionary",
-						msg_tablename);
-				}
-
-				/* Force a delete of any discarded
-				or temporary files. */
-
-				fil_delete_file(filepath);
-
-			} else if (fil_delete_tablespace(
-					space_id,
-					BUF_REMOVE_FLUSH_NO_WRITE)
-				   != DB_SUCCESS) {
-				fprintf(stderr,
-					"InnoDB: We removed now the InnoDB"
-					" internal data dictionary entry\n"
-					"InnoDB: of table ");
-				ut_print_name(stderr, trx, TRUE, tablename);
-				fprintf(stderr, ".\n");
-
-				ut_print_timestamp(stderr);
-				fprintf(stderr,
-					"  InnoDB: Error: not able to"
-					" delete tablespace %lu of table ",
-					(ulong) space_id);
-				ut_print_name(stderr, trx, TRUE, tablename);
-				fputs("!\n", stderr);
-				err = DB_ERROR;
-			}
-		}
-
+#endif /* MYSQL_ENCRYPTION */
 		break;
 
 	case DB_OUT_OF_FILE_SPACE:
@@ -4466,13 +4917,10 @@ row_drop_table_for_mysql(
 
 	default:
 		/* This is some error we do not expect. Print
-		the error number and rollback transaction */
-		ut_print_timestamp(stderr);
-
-		fprintf(stderr, "InnoDB: unknown error code %lu"
-			" while dropping table:", (ulong) err);
-		ut_print_name(stderr, trx, TRUE, tablename);
-		fprintf(stderr, ".\n");
+		the error number and rollback the transaction */
+		ib::error() << "Unknown error code " << err << " while"
+			" dropping table: "
+			<< ut_get_name(trx, tablename) << ".";
 
 		trx->error_state = DB_SUCCESS;
 		trx_rollback_to_savepoint(trx, NULL);
@@ -4493,30 +4941,51 @@ row_drop_table_for_mysql(
 		}
 	}
 
+	if (err != DB_SUCCESS && table != NULL) {
+		/* Drop table has failed with error but as drop table is not
+		transaction safe we should mark the table as corrupted to avoid
+		unwarranted follow-up action on this table that can result
+		in more serious issues. */
+
+		table->corrupted = true;
+		for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
+		     index != NULL;
+		     index = UT_LIST_GET_NEXT(indexes, index)) {
+			dict_set_corrupted(index, trx, "DROP TABLE");
+		}
+	}
+
 funct_exit:
 	if (heap) {
 		mem_heap_free(heap);
 	}
-	if (filepath) {
-		mem_free(filepath);
-	}
+
+	ut_free(filepath);
 
 	if (locked_dictionary) {
-		trx_commit_for_mysql(trx);
+
+		if (trx_is_started(trx)) {
+
+			trx_commit_for_mysql(trx);
+		}
 
 		row_mysql_unlock_data_dictionary(trx);
 	}
 
 	trx->op_info = "";
 
-	srv_wake_master_thread();
+	/* No need to immediately invoke master thread as there is no work
+	generated by intrinsic table operation that needs master thread
+	attention. */
+	if (!is_intrinsic_temp_table) {
+		srv_wake_master_thread();
+	}
 
 	DBUG_RETURN(err);
 }
 
 /*********************************************************************//**
 Drop all temporary tables during crash recovery. */
-UNIV_INTERN
 void
 row_mysql_drop_temp_tables(void)
 /*============================*/
@@ -4588,7 +5057,8 @@ row_mysql_drop_temp_tables(void)
 		btr_pcur_store_position(&pcur, &mtr);
 		btr_pcur_commit_specify_mtr(&pcur, &mtr);
 
-		table = dict_load_table(table_name, TRUE, DICT_ERR_IGNORE_NONE);
+		table = dict_load_table(table_name, true,
+					DICT_ERR_IGNORE_NONE);
 
 		if (table) {
 			row_drop_table_for_mysql(table_name, trx, FALSE, FALSE);
@@ -4610,7 +5080,7 @@ row_mysql_drop_temp_tables(void)
 /*******************************************************************//**
 Drop all foreign keys in a database, see Bug#18942.
 Called at the end of row_drop_database_for_mysql().
-@return	error code or DB_SUCCESS */
+@return error code or DB_SUCCESS */
 static MY_ATTRIBUTE((nonnull, warn_unused_result))
 dberr_t
 drop_all_foreign_keys_in_db(
@@ -4667,29 +5137,46 @@ drop_all_foreign_keys_in_db(
 	return(err);
 }
 
-/*********************************************************************//**
-Drops a database for MySQL.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+/** Drop a database for MySQL.
+@param[in]	name	database name which ends at '/'
+@param[in]	trx	transaction handle
+@param[out]	found	number of dropped tables/partitions
+@return error code or DB_SUCCESS */
 dberr_t
 row_drop_database_for_mysql(
-/*========================*/
-	const char*	name,	/*!< in: database name which ends to '/' */
-	trx_t*		trx)	/*!< in: transaction handle */
+	const char*	name,
+	trx_t*		trx,
+	ulint*		found)
 {
 	dict_table_t*	table;
 	char*		table_name;
 	dberr_t		err	= DB_SUCCESS;
 	ulint		namelen	= strlen(name);
+	bool		is_partition = false;
+
+	ut_ad(found != NULL);
+
+	DBUG_ENTER("row_drop_database_for_mysql");
+
+	DBUG_PRINT("row_drop_database_for_mysql", ("db: '%s'", name));
 
 	ut_a(name != NULL);
-	ut_a(name[namelen - 1] == '/');
+	/* Assert DB name or partition name. */
+	if (name[namelen - 1] == '#') {
+		ut_ad(name[namelen - 2] != '/');
+		is_partition = true;
+		trx->op_info = "dropping partitions";
+	} else {
+		ut_a(name[namelen - 1] == '/');
+		trx->op_info = "dropping database";
+	}
 
-	trx->op_info = "dropping database";
+	*found = 0;
 
 	trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
 
-	trx_start_if_not_started_xa(trx);
+	trx_start_if_not_started_xa(trx, true);
+
 loop:
 	row_mysql_lock_data_dictionary(trx);
 
@@ -4702,31 +5189,29 @@ loop:
 				| DICT_ERR_IGNORE_CORRUPT));
 
 		if (!table) {
-			ib_logf(IB_LOG_LEVEL_ERROR,
-				"Cannot load table %s from InnoDB internal "
-				"data dictionary during drop database",
-				table_name);
-			mem_free(table_name);
+			ib::error() << "Cannot load table " << table_name
+				<< " from InnoDB internal data dictionary"
+				" during drop database";
+			ut_free(table_name);
 			err = DB_TABLE_NOT_FOUND;
 			break;
 
 		}
 
-		if (!row_is_mysql_tmp_table_name(table->name)) {
+		if (!row_is_mysql_tmp_table_name(table->name.m_name)) {
 			/* There could be orphan temp tables left from
 			interrupted alter table. Leave them, and handle
 			the rest.*/
-			if (table->can_be_evicted) {
-				ib_logf(IB_LOG_LEVEL_WARN,
-					"Orphan table encountered during "
-					"DROP DATABASE. This is possible if "
-					"'%s.frm' was lost.", table->name);
+			if (table->can_be_evicted
+			    && (name[namelen - 1] != '#')) {
+				ib::warn() << "Orphan table encountered during"
+					" DROP DATABASE. This is possible if '"
+					<< table->name << ".frm' was lost.";
 			}
 
 			if (table->ibd_file_missing) {
-				ib_logf(IB_LOG_LEVEL_WARN,
-					"Missing %s.ibd file for table %s.",
-					table->name, table->name);
+				ib::warn() << "Missing .ibd file for table "
+					<< table->name << ".";
 			}
 		}
 
@@ -4737,25 +5222,31 @@ loop:
 		if we are holding, the dict_sys->mutex. */
 		ut_ad(mutex_own(&dict_sys->mutex));
 
+		/* Disable statistics on the found table. */
+		if (!dict_stats_stop_bg(table)) {
+			row_mysql_unlock_data_dictionary(trx);
+
+			os_thread_sleep(250000);
+
+			ut_free(table_name);
+
+			goto loop;
+		}
+
 		/* Wait until MySQL does not have any queries running on
 		the table */
 
-		if (table->n_ref_count > 0) {
+		if (table->get_ref_count() > 0) {
 			row_mysql_unlock_data_dictionary(trx);
 
-			ut_print_timestamp(stderr);
-			fputs("  InnoDB: Warning: MySQL is trying to"
-			      " drop database ", stderr);
-			ut_print_name(stderr, trx, TRUE, name);
-			fputs("\n"
-			      "InnoDB: though there are still"
-			      " open handles to table ", stderr);
-			ut_print_name(stderr, trx, TRUE, table_name);
-			fputs(".\n", stderr);
+			ib::warn() << "MySQL is trying to drop database "
+				<< ut_get_name(trx, name) << " though"
+				" there are still open handles to table "
+				<< table->name << ".";
 
 			os_thread_sleep(1000000);
 
-			mem_free(table_name);
+			ut_free(table_name);
 
 			goto loop;
 		}
@@ -4764,29 +5255,29 @@ loop:
 		trx_commit_for_mysql(trx);
 
 		if (err != DB_SUCCESS) {
-			fputs("InnoDB: DROP DATABASE ", stderr);
-			ut_print_name(stderr, trx, TRUE, name);
-			fprintf(stderr, " failed with error (%s) for table ",
-				ut_strerr(err));
-			ut_print_name(stderr, trx, TRUE, table_name);
-			putc('\n', stderr);
-			mem_free(table_name);
+			ib::error() << "DROP DATABASE "
+				<< ut_get_name(trx, name) << " failed"
+				" with error (" << ut_strerr(err) << ") for"
+				" table " << ut_get_name(trx, table_name);
+			ut_free(table_name);
 			break;
 		}
 
-		mem_free(table_name);
+		ut_free(table_name);
+		(*found)++;
 	}
 
-	if (err == DB_SUCCESS) {
+	/* Partitioning does not yet support foreign keys. */
+	if (err == DB_SUCCESS && !is_partition) {
 		/* after dropping all tables try to drop all leftover
 		foreign keys in case orphaned ones exist */
 		err = drop_all_foreign_keys_in_db(name, trx);
 
 		if (err != DB_SUCCESS) {
-			fputs("InnoDB: DROP DATABASE ", stderr);
-			ut_print_name(stderr, trx, TRUE, name);
-			fprintf(stderr, " failed with error %d while "
-				"dropping all foreign keys", err);
+			const std::string&	db = ut_get_name(trx, name);
+			ib::error() << "DROP DATABASE " << db << " failed with"
+				" error " << err << " while dropping all"
+				" foreign keys";
 		}
 	}
 
@@ -4796,27 +5287,27 @@ loop:
 
 	trx->op_info = "";
 
-	return(err);
+	DBUG_RETURN(err);
 }
 
 /*********************************************************************//**
 Checks if a table name contains the string "/#sql" which denotes temporary
 tables in MySQL.
-@return	true if temporary table */
-UNIV_INTERN MY_ATTRIBUTE((warn_unused_result))
+@return true if temporary table */
+MY_ATTRIBUTE((warn_unused_result))
 bool
 row_is_mysql_tmp_table_name(
 /*========================*/
 	const char*	name)	/*!< in: table name in the form
 				'database/tablename' */
 {
-	return(strstr(name, "/#sql") != NULL);
+	return(strstr(name, "/" TEMP_FILE_PREFIX) != NULL);
 	/* return(strstr(name, "/@0023sql") != NULL); */
 }
 
 /****************************************************************//**
 Delete a single constraint.
-@return	error code or DB_SUCCESS */
+@return error code or DB_SUCCESS */
 static MY_ATTRIBUTE((nonnull, warn_unused_result))
 dberr_t
 row_delete_constraint_low(
@@ -4839,7 +5330,7 @@ row_delete_constraint_low(
 
 /****************************************************************//**
 Delete a single constraint.
-@return	error code or DB_SUCCESS */
+@return error code or DB_SUCCESS */
 static MY_ATTRIBUTE((nonnull, warn_unused_result))
 dberr_t
 row_delete_constraint(
@@ -4872,8 +5363,7 @@ row_delete_constraint(
 
 /*********************************************************************//**
 Renames a table for MySQL.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
 dberr_t
 row_rename_table_for_mysql(
 /*=======================*/
@@ -4899,23 +5389,15 @@ row_rename_table_for_mysql(
 	ut_ad(trx->state == TRX_STATE_ACTIVE);
 
 	if (srv_force_recovery) {
-		fputs("InnoDB: innodb_force_recovery is on: we do not allow\n"
-		      "InnoDB: database modifications by the user. Shut down\n"
-		      "InnoDB: mysqld and edit my.cnf so that"
-		      "InnoDB: innodb_force_... is removed.\n",
-		      stderr);
-
+		ib::info() << MODIFICATIONS_NOT_ALLOWED_MSG_FORCE_RECOVERY;
 		err = DB_READ_ONLY;
 		goto funct_exit;
 
 	} else if (row_mysql_is_system_table(new_name)) {
 
-		fprintf(stderr,
-			"InnoDB: Error: trying to create a MySQL"
-			" system table %s of type InnoDB.\n"
-			"InnoDB: MySQL system tables must be"
-			" of the MyISAM type!\n",
-			new_name);
+		ib::error() << "Trying to create a MySQL system table "
+			<< new_name << " of type InnoDB. MySQL system tables"
+			" must be of the MyISAM type!";
 
 		goto funct_exit;
 	}
@@ -4972,7 +5454,7 @@ row_rename_table_for_mysql(
 		whether there exists table name in
 		system table whose name is
 		not being normalized to lower case */
-		normalize_table_name_low(
+		normalize_table_name_c_low(
 			par_case_name, old_name, FALSE);
 #endif
 		table = dict_table_open_on_name(par_case_name, dict_locked, FALSE,
@@ -4981,20 +5463,6 @@ row_rename_table_for_mysql(
 
 	if (!table) {
 		err = DB_TABLE_NOT_FOUND;
-		ut_print_timestamp(stderr);
-
-		fputs("  InnoDB: Error: table ", stderr);
-		ut_print_name(stderr, trx, TRUE, old_name);
-		fputs(" does not exist in the InnoDB internal\n"
-		      "InnoDB: data dictionary though MySQL is"
-		      " trying to rename the table.\n"
-		      "InnoDB: Have you copied the .frm file"
-		      " of the table to the\n"
-		      "InnoDB: MySQL database directory"
-		      " from another database?\n"
-		      "InnoDB: You can look for further help from\n"
-		      "InnoDB: " REFMAN "innodb-troubleshooting.html\n",
-		      stderr);
 		goto funct_exit;
 
 	} else if (table->ibd_file_missing
@@ -5002,10 +5470,9 @@ row_rename_table_for_mysql(
 
 		err = DB_TABLE_NOT_FOUND;
 
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Table %s does not have an .ibd file in the database "
-			"directory. See " REFMAN "innodb-troubleshooting.html",
-			old_name);
+		ib::error() << "Table " << old_name << " does not have an .ibd"
+			" file in the database directory. "
+			<< TROUBLESHOOTING_MSG;
 
 		goto funct_exit;
 
@@ -5036,12 +5503,10 @@ row_rename_table_for_mysql(
 	}
 
 	if (table->n_foreign_key_checks_running > 0) {
-		ut_print_timestamp(stderr);
-		fputs(" InnoDB: Error: in ALTER TABLE ", stderr);
-		ut_print_name(stderr, trx, TRUE, old_name);
-		fprintf(stderr, "\n"
-			"InnoDB: a FOREIGN KEY check is running.\n"
-			"InnoDB: Cannot rename table.\n");
+		ib::error() << "In ALTER TABLE "
+			<< ut_get_name(trx, old_name)
+			<< " a FOREIGN KEY check is running. Cannot rename"
+			" table.";
 		err = DB_TABLE_IN_FK_CHECK;
 		goto funct_exit;
 	}
@@ -5063,10 +5528,10 @@ row_rename_table_for_mysql(
 			   "END;\n"
 			   , FALSE, trx);
 
-	/* SYS_TABLESPACES and SYS_DATAFILES track non-system tablespaces
-	which have space IDs > 0. */
+	/* SYS_TABLESPACES and SYS_DATAFILES need to be updated if
+	the table is in a single-table tablespace. */
 	if (err == DB_SUCCESS
-	    && table->space != TRX_SYS_SPACE
+	    && dict_table_is_file_per_table(table)
 	    && !table->ibd_file_missing) {
 		/* Make a new pathname to update SYS_DATAFILES. */
 		char*	new_path = row_make_new_pathname(table, new_name);
@@ -5089,7 +5554,7 @@ row_rename_table_for_mysql(
 				   "END;\n"
 				   , FALSE, trx);
 
-		mem_free(new_path);
+		ut_free(new_path);
 	}
 	if (err != DB_SUCCESS) {
 		goto end;
@@ -5233,40 +5698,31 @@ row_rename_table_for_mysql(
 end:
 	if (err != DB_SUCCESS) {
 		if (err == DB_DUPLICATE_KEY) {
-			ut_print_timestamp(stderr);
-			fputs("  InnoDB: Error; possible reasons:\n"
-			      "InnoDB: 1) Table rename would cause"
-			      " two FOREIGN KEY constraints\n"
-			      "InnoDB: to have the same internal name"
-			      " in case-insensitive comparison.\n"
-			      "InnoDB: 2) table ", stderr);
-			ut_print_name(stderr, trx, TRUE, new_name);
-			fputs(" exists in the InnoDB internal data\n"
-			      "InnoDB: dictionary though MySQL is"
-			      " trying to rename table ", stderr);
-			ut_print_name(stderr, trx, TRUE, old_name);
-			fputs(" to it.\n"
-			      "InnoDB: Have you deleted the .frm file"
-			      " and not used DROP TABLE?\n"
-			      "InnoDB: You can look for further help from\n"
-			      "InnoDB: " REFMAN "innodb-troubleshooting.html\n"
-			      "InnoDB: If table ", stderr);
-			ut_print_name(stderr, trx, TRUE, new_name);
-			fputs(" is a temporary table #sql..., then"
-			      " it can be that\n"
-			      "InnoDB: there are still queries running"
-			      " on the table, and it will be\n"
-			      "InnoDB: dropped automatically when"
-			      " the queries end.\n"
-			      "InnoDB: You can drop the orphaned table"
-			      " inside InnoDB by\n"
-			      "InnoDB: creating an InnoDB table with"
-			      " the same name in another\n"
-			      "InnoDB: database and copying the .frm file"
-			      " to the current database.\n"
-			      "InnoDB: Then MySQL thinks the table exists,"
-			      " and DROP TABLE will\n"
-			      "InnoDB: succeed.\n", stderr);
+			ib::error() << "Possible reasons:";
+			ib::error() << "(1) Table rename would cause two"
+				" FOREIGN KEY constraints to have the same"
+				" internal name in case-insensitive"
+				" comparison.";
+			ib::error() << "(2) Table "
+				<< ut_get_name(trx, new_name)
+				<< " exists in the InnoDB internal data"
+				" dictionary though MySQL is trying to rename"
+				" table " << ut_get_name(trx, old_name)
+				<< " to it. Have you deleted the .frm file and"
+				" not used DROP TABLE?";
+			ib::info() << TROUBLESHOOTING_MSG;
+			ib::error() << "If table "
+				<< ut_get_name(trx, new_name)
+				<< " is a temporary table #sql..., then"
+				" it can be that there are still queries"
+				" running on the table, and it will be dropped"
+				" automatically when the queries end. You can"
+				" drop the orphaned table inside InnoDB by"
+				" creating an InnoDB table with the same name"
+				" in another database and copying the .frm file"
+				" to the current database. Then MySQL thinks"
+				" the table exists, and DROP TABLE will"
+				" succeed.";
 		}
 		trx->error_state = DB_SUCCESS;
 		trx_rollback_to_savepoint(trx, NULL);
@@ -5284,38 +5740,39 @@ end:
 			goto funct_exit;
 		}
 
+#ifdef MYSQL_VIRTUAL_COLUMNS
+		/* In case of copy alter, template db_name and
+		table_name should be renamed only for newly
+		created table. */
+		if (table->vc_templ != NULL && !new_is_tmp) {
+			innobase_rename_vc_templ(table);
+		}
+#endif
+
 		/* We only want to switch off some of the type checking in
-		an ALTER, not in a RENAME. */
+		an ALTER TABLE...ALGORITHM=COPY, not in a RENAME. */
+		dict_names_t	fk_tables;
 
 		err = dict_load_foreigns(
 			new_name, NULL,
 			false, !old_is_tmp || trx->check_foreigns,
-			DICT_ERR_IGNORE_NONE);
+			DICT_ERR_IGNORE_NONE, fk_tables);
 
 		if (err != DB_SUCCESS) {
-			ut_print_timestamp(stderr);
 
 			if (old_is_tmp) {
-				fputs("  InnoDB: Error: in ALTER TABLE ",
-				      stderr);
-				ut_print_name(stderr, trx, TRUE, new_name);
-				fputs("\n"
-				      "InnoDB: has or is referenced"
-				      " in foreign key constraints\n"
-				      "InnoDB: which are not compatible"
-				      " with the new table definition.\n",
-				      stderr);
+				ib::error() << "In ALTER TABLE "
+					<< ut_get_name(trx, new_name)
+					<< " has or is referenced in foreign"
+					" key constraints which are not"
+					" compatible with the new table"
+					" definition.";
 			} else {
-				fputs("  InnoDB: Error: in RENAME TABLE"
-				      " table ",
-				      stderr);
-				ut_print_name(stderr, trx, TRUE, new_name);
-				fputs("\n"
-				      "InnoDB: is referenced in"
-				      " foreign key constraints\n"
-				      "InnoDB: which are not compatible"
-				      " with the new table definition.\n",
-				      stderr);
+				ib::error() << "In RENAME TABLE table "
+					<< ut_get_name(trx, new_name)
+					<< " is referenced in foreign key"
+					" constraints which are not compatible"
+					" with the new table definition.";
 			}
 
 			ut_a(DB_SUCCESS == dict_table_rename_in_cache(
@@ -5324,13 +5781,37 @@ end:
 			trx_rollback_to_savepoint(trx, NULL);
 			trx->error_state = DB_SUCCESS;
 		}
+
+		/* Check whether virtual column or stored column affects
+		the foreign key constraint of the table. */
+		if (dict_foreigns_has_s_base_col(
+				table->foreign_set, table)) {
+			err = DB_NO_FK_ON_S_BASE_COL;
+			ut_a(DB_SUCCESS == dict_table_rename_in_cache(
+				table, old_name, FALSE));
+			trx->error_state = DB_SUCCESS;
+			trx_rollback_to_savepoint(trx, NULL);
+			trx->error_state = DB_SUCCESS;
+			goto funct_exit;
+		}
+
+		/* Fill the virtual column set in foreign when
+		the table undergoes copy alter operation. */
+		dict_mem_table_free_foreign_vcol_set(table);
+		dict_mem_table_fill_foreign_vcol_set(table);
+
+		while (!fk_tables.empty()) {
+			dict_load_table(fk_tables.front(), true,
+					DICT_ERR_IGNORE_NONE);
+			fk_tables.pop_front();
+		}
 	}
 
 funct_exit:
 	if (aux_fts_rename && err != DB_SUCCESS
 	    && table != NULL && (table->space != 0)) {
 
-		char*	orig_name = table->name;
+		char*	orig_name = table->name.m_name;
 		trx_t*	trx_bg = trx_allocate_for_background();
 
 		/* If the first fts_rename fails, the trx would
@@ -5349,9 +5830,9 @@ funct_exit:
 		in cache is not changed yet. If the reverting fails,
 		the ibd data may be left in the new database, which
 		can be fixed only manually. */
-		table->name = const_cast<char*>(new_name);
+		table->name.m_name = const_cast<char*>(new_name);
 		fts_rename_aux_tables(table, old_name, trx_bg);
-		table->name = orig_name;
+		table->name.m_name = orig_name;
 
 		trx_bg->dict_operation_lock_mode = 0;
 		trx_commit_for_mysql(trx_bg);
@@ -5375,28 +5856,82 @@ funct_exit:
 	return(err);
 }
 
+/** Renames a partitioned table for MySQL.
+@param[in]	old_name	Old table name.
+@param[in]	new_name	New table name.
+@param[in,out]	trx		Transaction.
+@return error code or DB_SUCCESS */
+dberr_t
+row_rename_partitions_for_mysql(
+	const char*	old_name,
+	const char*	new_name,
+	trx_t*		trx)
+{
+	char		from_name[FN_REFLEN];
+	char		to_name[FN_REFLEN];
+	ulint		from_len = strlen(old_name);
+	ulint		to_len = strlen(new_name);
+	char*		table_name;
+	dberr_t		error = DB_TABLE_NOT_FOUND;
+
+	ut_a(from_len < (FN_REFLEN - 4));
+	ut_a(to_len < (FN_REFLEN - 4));
+	memcpy(from_name, old_name, from_len);
+	from_name[from_len] = '#';
+	from_name[from_len + 1] = 0;
+	while ((table_name = dict_get_first_table_name_in_db(from_name))) {
+		ut_a(memcmp(table_name, from_name, from_len) == 0);
+		/* Must match #[Pp]#<partition_name> */
+		if (strlen(table_name) <= (from_len + 3)
+		    || table_name[from_len] != '#'
+		    || table_name[from_len + 2] != '#'
+		    || (table_name[from_len + 1] != 'P'
+			&& table_name[from_len + 1] != 'p')) {
+
+			ut_ad(0);
+			ut_free(table_name);
+			continue;
+		}
+		memcpy(to_name, new_name, to_len);
+		memcpy(to_name + to_len, table_name + from_len,
+			strlen(table_name) - from_len + 1);
+		error = row_rename_table_for_mysql(table_name, to_name,
+						trx, false);
+		if (error != DB_SUCCESS) {
+			/* Rollback and return. */
+			trx_rollback_for_mysql(trx);
+			ut_free(table_name);
+			return(error);
+		}
+		ut_free(table_name);
+	}
+	trx_commit_for_mysql(trx);
+	return(error);
+}
+
 /*********************************************************************//**
-Checks that the index contains entries in an ascending order, unique
-constraint is not broken, and calculates the number of index entries
+Scans an index for either COUNT(*) or CHECK TABLE.
+If CHECK TABLE; Checks that the index contains entries in an ascending order,
+unique constraint is not broken, and calculates the number of index entries
 in the read view of the current transaction.
-@return	true if ok */
-UNIV_INTERN
-bool
-row_check_index_for_mysql(
-/*======================*/
+@return DB_SUCCESS or other error */
+dberr_t
+row_scan_index_for_mysql(
+/*=====================*/
 	row_prebuilt_t*		prebuilt,	/*!< in: prebuilt struct
 						in MySQL handle */
 	const dict_index_t*	index,		/*!< in: index */
+	bool			check_keys,	/*!< in: true=check for mis-
+						ordered or duplicate records,
+						false=count the rows only */
 	ulint*			n_rows)		/*!< out: number of entries
 						seen in the consistent read */
 {
 	dtuple_t*	prev_entry	= NULL;
 	ulint		matched_fields;
-	ulint		matched_bytes;
 	byte*		buf;
-	ulint		ret;
+	dberr_t		ret;
 	rec_t*		rec;
-	bool		is_ok		= true;
 	int		cmp;
 	ibool		contains_null;
 	ulint		i;
@@ -5409,6 +5944,9 @@ row_check_index_for_mysql(
 
 	*n_rows = 0;
 
+	/* Don't support RTree Leaf level scan */
+	ut_ad(!dict_index_is_spatial(index));
+
 	if (dict_index_is_clust(index)) {
 		/* The clustered index of a table is always available.
 		During online ALTER TABLE that rebuilds the table, the
@@ -5422,10 +5960,11 @@ row_check_index_for_mysql(
 		/* Full Text index are implemented by auxiliary tables,
 		not the B-tree. We also skip secondary indexes that are
 		being created online. */
-		return(true);
+		return(DB_SUCCESS);
 	}
 
-	buf = static_cast<byte*>(mem_alloc(UNIV_PAGE_SIZE));
+	ulint bufsize = ut_max(UNIV_PAGE_SIZE, prebuilt->mysql_row_len);
+	buf = static_cast<byte*>(ut_malloc_nokey(bufsize));
 	heap = mem_heap_create(100);
 
 	cnt = 1000;
@@ -5435,6 +5974,7 @@ loop:
 	/* Check thd->killed every 1,000 scanned rows */
 	if (--cnt == 0) {
 		if (trx_is_interrupted(prebuilt->trx)) {
+			ret = DB_INTERRUPTED;
 			goto func_exit;
 		}
 		cnt = 1000;
@@ -5443,22 +5983,34 @@ loop:
 	switch (ret) {
 	case DB_SUCCESS:
 		break;
+	case DB_DEADLOCK:
+	case DB_LOCK_TABLE_FULL:
+	case DB_LOCK_WAIT_TIMEOUT:
+	case DB_INTERRUPTED:
+		goto func_exit;
 	default:
-		ut_print_timestamp(stderr);
-		fputs("  InnoDB: Warning: CHECK TABLE on ", stderr);
-		dict_index_name_print(stderr, prebuilt->trx, index);
-		fprintf(stderr, " returned %lu\n", ret);
+	{
+		const char* doing = check_keys? "CHECK TABLE" : "COUNT(*)";
+		ib::warn() << doing << " on index " << index->name << " of"
+			" table " << index->table->name << " returned " << ret;
 		/* fall through (this error is ignored by CHECK TABLE) */
+	}
 	case DB_END_OF_INDEX:
+		ret = DB_SUCCESS;
 func_exit:
-		mem_free(buf);
+		ut_free(buf);
 		mem_heap_free(heap);
 
-		return(is_ok);
+		return(ret);
 	}
 
 	*n_rows = *n_rows + 1;
 
+	if (!check_keys) {
+		goto next_rec;
+	}
+	/* else this code is doing handler::check() for CHECK TABLE */
+
 	/* row_search... returns the index record in buf, record origin offset
 	within buf stored in the first 4 bytes, because we have built a dummy
 	template */
@@ -5470,11 +6022,9 @@ func_exit:
 
 	if (prev_entry != NULL) {
 		matched_fields = 0;
-		matched_bytes = 0;
 
 		cmp = cmp_dtuple_rec_with_match(prev_entry, rec, offsets,
-						&matched_fields,
-						&matched_bytes);
+						&matched_fields);
 		contains_null = FALSE;
 
 		/* In a unique secondary index we allow equal key values if
@@ -5491,27 +6041,25 @@ func_exit:
 			}
 		}
 
+		const char* msg;
+
 		if (cmp > 0) {
-			fputs("InnoDB: index records in a wrong order in ",
-			      stderr);
+			ret = DB_INDEX_CORRUPT;
+			msg = "index records in a wrong order in ";
 not_ok:
-			dict_index_name_print(stderr,
-					      prebuilt->trx, index);
-			fputs("\n"
-			      "InnoDB: prev record ", stderr);
-			dtuple_print(stderr, prev_entry);
-			fputs("\n"
-			      "InnoDB: record ", stderr);
-			rec_print_new(stderr, rec, offsets);
-			putc('\n', stderr);
-			is_ok = false;
+			ib::error()
+				<< msg << index->name
+				<< " of table " << index->table->name
+				<< ": " << *prev_entry << ", "
+				<< rec_offsets_print(rec, offsets);
+			/* Continue reading */
 		} else if (dict_index_is_unique(index)
 			   && !contains_null
 			   && matched_fields
 			   >= dict_index_get_n_ordering_defined_by_user(
 				   index)) {
-
-			fputs("InnoDB: duplicate key in ", stderr);
+			ret = DB_DUPLICATE_KEY;
+			msg = "duplicate key in ";
 			goto not_ok;
 		}
 	}
@@ -5542,11 +6090,12 @@ not_ok:
 		}
 	}
 
-	ret = row_search_for_mysql(buf, PAGE_CUR_G, prebuilt, 0, ROW_SEL_NEXT);
+next_rec:
+	ret = row_search_for_mysql(
+		buf, PAGE_CUR_G, prebuilt, 0, ROW_SEL_NEXT);
 
 	goto loop;
 }
-
 /*********************************************************************//**
 Determines if a table is a magic monitor table.
 @return	true if monitor table */
@@ -5575,23 +6124,21 @@ row_is_magic_monitor_table(
 
 /*********************************************************************//**
 Initialize this module */
-UNIV_INTERN
 void
 row_mysql_init(void)
 /*================*/
 {
-	mutex_create(
-		row_drop_list_mutex_key,
-		&row_drop_list_mutex, SYNC_NO_ORDER_CHECK);
+	mutex_create(LATCH_ID_ROW_DROP_LIST, &row_drop_list_mutex);
 
-	UT_LIST_INIT(row_mysql_drop_list);
+	UT_LIST_INIT(
+		row_mysql_drop_list,
+		&row_mysql_drop_t::row_mysql_drop_list);
 
 	row_mysql_drop_list_inited = TRUE;
 }
 
 /*********************************************************************//**
 Close this module */
-UNIV_INTERN
 void
 row_mysql_close(void)
 /*================*/
diff --git a/storage/innobase/row/row0purge.cc b/storage/innobase/row/row0purge.cc
index bc2e0b0e1cb..bac55694056 100644
--- a/storage/innobase/row/row0purge.cc
+++ b/storage/innobase/row/row0purge.cc
@@ -46,6 +46,8 @@ Created 3/14/1997 Heikki Tuuri
 #include "log0log.h"
 #include "srv0mon.h"
 #include "srv0start.h"
+#include "handler.h"
+#include "ha_innodb.h"
 
 /*************************************************************************
 IMPORTANT NOTE: Any operation that generates redo MUST check that there
@@ -57,15 +59,14 @@ check.
 If you make a change in this module make sure that no codepath is
 introduced where a call to log_free_check() is bypassed. */
 
-/********************************************************************//**
-Creates a purge node to a query graph.
-@return	own: purge node */
-UNIV_INTERN
+/** Create a purge node to a query graph.
+@param[in]	parent	parent node, i.e., a thr node
+@param[in]	heap	memory heap where created
+@return own: purge node */
 purge_node_t*
 row_purge_node_create(
-/*==================*/
-	que_thr_t*	parent,		/*!< in: parent node  */
-	mem_heap_t*	heap)		/*!< in: memory heap where created */
+	que_thr_t*	parent,
+	mem_heap_t*	heap)
 {
 	purge_node_t*	node;
 
@@ -86,7 +87,7 @@ row_purge_node_create(
 /***********************************************************//**
 Repositions the pcur in the purge node on the clustered index record,
 if found. If the record is not found, close pcur.
-@return	TRUE if the record was found */
+@return TRUE if the record was found */
 static
 ibool
 row_purge_reposition_pcur(
@@ -137,14 +138,13 @@ row_purge_remove_clust_if_poss_low(
 	ulint			offsets_[REC_OFFS_NORMAL_SIZE];
 	rec_offs_init(offsets_);
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_S));
 
 	index = dict_table_get_first_index(node->table);
 
 	log_free_check();
 	mtr_start(&mtr);
+	mtr.set_named_space(index->space);
 
 	if (!row_purge_reposition_pcur(mode, node, &mtr)) {
 		/* The record was already removed. */
@@ -161,15 +161,17 @@ row_purge_remove_clust_if_poss_low(
 		goto func_exit;
 	}
 
+	ut_ad(rec_get_deleted_flag(rec, rec_offs_comp(offsets)));
+
 	if (mode == BTR_MODIFY_LEAF) {
 		success = btr_cur_optimistic_delete(
 			btr_pcur_get_btr_cur(&node->pcur), 0, &mtr);
 	} else {
 		dberr_t	err;
-		ut_ad(mode == BTR_MODIFY_TREE);
+		ut_ad(mode == (BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE));
 		btr_cur_pessimistic_delete(
 			&err, FALSE, btr_pcur_get_btr_cur(&node->pcur), 0,
-			RB_NONE, &mtr);
+			false, &mtr);
 
 		switch (err) {
 		case DB_SUCCESS:
@@ -217,7 +219,7 @@ row_purge_remove_clust_if_poss(
 	     n_tries < BTR_CUR_RETRY_DELETE_N_TIMES;
 	     n_tries++) {
 		if (row_purge_remove_clust_if_poss_low(
-			    node, BTR_MODIFY_TREE)) {
+			    node, BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE)) {
 			return(true);
 		}
 
@@ -241,8 +243,7 @@ inserts a record that the secondary index entry would refer to.
 However, in that case, the user transaction would also re-insert the
 secondary index entry after purge has removed it and released the leaf
 page latch.
-@return	true if the secondary index record can be purged */
-UNIV_INTERN
+@return true if the secondary index record can be purged */
 bool
 row_purge_poss_sec(
 /*===============*/
@@ -259,7 +260,8 @@ row_purge_poss_sec(
 	can_delete = !row_purge_reposition_pcur(BTR_SEARCH_LEAF, node, &mtr)
 		|| !row_vers_old_has_index_entry(TRUE,
 						 btr_pcur_get_rec(&node->pcur),
-						 &mtr, index, entry);
+						 &mtr, index, entry,
+						 node->roll_ptr, node->trx_id);
 
 	/* Persistent cursor is closed if reposition fails. */
 	if (node->found_clust) {
@@ -274,7 +276,7 @@ row_purge_poss_sec(
 /***************************************************************
 Removes a secondary index entry if possible, by modifying the
 index tree.  Does not try to buffer the delete.
-@return	TRUE if success or if not found */
+@return TRUE if success or if not found */
 static MY_ATTRIBUTE((nonnull, warn_unused_result))
 ibool
 row_purge_remove_sec_if_poss_tree(
@@ -292,13 +294,13 @@ row_purge_remove_sec_if_poss_tree(
 
 	log_free_check();
 	mtr_start(&mtr);
+	mtr.set_named_space(index->space);
 
-	if (*index->name == TEMP_INDEX_PREFIX) {
-		/* The index->online_status may change if the
-		index->name starts with TEMP_INDEX_PREFIX (meaning
-		that the index is or was being created online). It is
-		protected by index->lock. */
-		mtr_x_lock(dict_index_get_lock(index), &mtr);
+	if (!index->is_committed()) {
+		/* The index->online_status may change if the index is
+		or was being created online, but not committed yet. It
+		is protected by index->lock. */
+		mtr_sx_lock(dict_index_get_lock(index), &mtr);
 
 		if (dict_index_is_online_ddl(index)) {
 			/* Online secondary index creation will not
@@ -310,13 +312,15 @@ row_purge_remove_sec_if_poss_tree(
 		}
 	} else {
 		/* For secondary indexes,
-		index->online_status==ONLINE_INDEX_CREATION unless
-		index->name starts with TEMP_INDEX_PREFIX. */
+		index->online_status==ONLINE_INDEX_COMPLETE if
+		index->is_committed(). */
 		ut_ad(!dict_index_is_online_ddl(index));
 	}
 
-	search_result = row_search_index_entry(index, entry, BTR_MODIFY_TREE,
-					       &pcur, &mtr);
+	search_result = row_search_index_entry(
+				index, entry,
+				BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE,
+				&pcur, &mtr);
 
 	switch (search_result) {
 	case ROW_NOT_FOUND:
@@ -354,17 +358,13 @@ row_purge_remove_sec_if_poss_tree(
 		marked for deletion. */
 		if (!rec_get_deleted_flag(btr_cur_get_rec(btr_cur),
 					  dict_table_is_comp(index->table))) {
-			fputs("InnoDB: tried to purge sec index entry not"
-			      " marked for deletion in\n"
-			      "InnoDB: ", stderr);
-			dict_index_name_print(stderr, NULL, index);
-			fputs("\n"
-			      "InnoDB: tuple ", stderr);
-			dtuple_print(stderr, entry);
-			fputs("\n"
-			      "InnoDB: record ", stderr);
-			rec_print(stderr, btr_cur_get_rec(btr_cur), index);
-			putc('\n', stderr);
+			ib::error()
+				<< "tried to purge non-delete-marked record"
+				" in index " << index->name
+				<< " of table " << index->table->name
+				<< ": tuple: " << *entry
+				<< ", record: " << rec_index_print(
+					btr_cur_get_rec(btr_cur), index);
 
 			ut_ad(0);
 
@@ -372,7 +372,7 @@ row_purge_remove_sec_if_poss_tree(
 		}
 
 		btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0,
-					   RB_NONE, &mtr);
+					   false, &mtr);
 		switch (UNIV_EXPECT(err, DB_SUCCESS)) {
 		case DB_SUCCESS:
 			break;
@@ -395,8 +395,8 @@ func_exit_no_pcur:
 /***************************************************************
 Removes a secondary index entry without modifying the index tree,
 if possible.
-@retval	true if success or if not found
-@retval	false if row_purge_remove_sec_if_poss_tree() should be invoked */
+@retval true if success or if not found
+@retval false if row_purge_remove_sec_if_poss_tree() should be invoked */
 static MY_ATTRIBUTE((nonnull, warn_unused_result))
 bool
 row_purge_remove_sec_if_poss_leaf(
@@ -414,12 +414,17 @@ row_purge_remove_sec_if_poss_leaf(
 	log_free_check();
 
 	mtr_start(&mtr);
+	mtr.set_named_space(index->space);
 
-	if (*index->name == TEMP_INDEX_PREFIX) {
-		/* The index->online_status may change if the
-		index->name starts with TEMP_INDEX_PREFIX (meaning
-		that the index is or was being created online). It is
-		protected by index->lock. */
+	if (!index->is_committed()) {
+		/* For uncommitted spatial index, we also skip the purge. */
+		if (dict_index_is_spatial(index)) {
+			goto func_exit_no_pcur;
+		}
+
+		/* The index->online_status may change if the the
+		index is or was being created online, but not
+		committed yet. It is protected by index->lock. */
 		mtr_s_lock(dict_index_get_lock(index), &mtr);
 
 		if (dict_index_is_online_ddl(index)) {
@@ -431,25 +436,44 @@ row_purge_remove_sec_if_poss_leaf(
 			goto func_exit_no_pcur;
 		}
 
-		mode = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED | BTR_DELETE;
+		/* Change buffering is disabled for temporary tables. */
+		mode = (dict_table_is_temporary(index->table))
+			? BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED
+			: BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED
+			| BTR_DELETE;
 	} else {
 		/* For secondary indexes,
-		index->online_status==ONLINE_INDEX_CREATION unless
-		index->name starts with TEMP_INDEX_PREFIX. */
+		index->online_status==ONLINE_INDEX_COMPLETE if
+		index->is_committed(). */
 		ut_ad(!dict_index_is_online_ddl(index));
 
-		mode = BTR_MODIFY_LEAF | BTR_DELETE;
+		/* Change buffering is disabled for temporary tables
+		and spatial index. */
+		mode = (dict_table_is_temporary(index->table)
+			|| dict_index_is_spatial(index))
+			? BTR_MODIFY_LEAF
+			: BTR_MODIFY_LEAF | BTR_DELETE;
 	}
 
 	/* Set the purge node for the call to row_purge_poss_sec(). */
 	pcur.btr_cur.purge_node = node;
-	/* Set the query thread, so that ibuf_insert_low() will be
-	able to invoke thd_get_trx(). */
-	pcur.btr_cur.thr = static_cast<que_thr_t*>(que_node_get_parent(node));
+	if (dict_index_is_spatial(index)) {
+		rw_lock_sx_lock(dict_index_get_lock(index));
+		pcur.btr_cur.thr = NULL;
+	} else {
+		/* Set the query thread, so that ibuf_insert_low() will be
+		able to invoke thd_get_trx(). */
+		pcur.btr_cur.thr = static_cast<que_thr_t*>(
+			que_node_get_parent(node));
+	}
 
 	search_result = row_search_index_entry(
 		index, entry, mode, &pcur, &mtr);
 
+	if (dict_index_is_spatial(index)) {
+		rw_lock_sx_unlock(dict_index_get_lock(index));
+	}
+
 	switch (search_result) {
 	case ROW_FOUND:
 		/* Before attempting to purge a record, check
@@ -462,19 +486,15 @@ row_purge_remove_sec_if_poss_leaf(
 				btr_cur_get_rec(btr_cur),
 				dict_table_is_comp(index->table))) {
 
-				fputs("InnoDB: tried to purge sec index"
-				      " entry not marked for deletion in\n"
-				      "InnoDB: ", stderr);
-				dict_index_name_print(stderr, NULL, index);
-				fputs("\n"
-				      "InnoDB: tuple ", stderr);
-				dtuple_print(stderr, entry);
-				fputs("\n"
-				      "InnoDB: record ", stderr);
-				rec_print(stderr, btr_cur_get_rec(btr_cur),
-					  index);
-				putc('\n', stderr);
-
+				ib::error()
+					<< "tried to purge non-delete-marked"
+					" record" " in index " << index->name
+					<< " of table " << index->table->name
+					<< ": tuple: " << *entry
+					<< ", record: "
+					<< rec_index_print(
+						btr_cur_get_rec(btr_cur),
+						index);
 				ut_ad(0);
 
 				btr_pcur_close(&pcur);
@@ -482,6 +502,42 @@ row_purge_remove_sec_if_poss_leaf(
 				goto func_exit_no_pcur;
 			}
 
+			if (dict_index_is_spatial(index)) {
+				const page_t*   page;
+				const trx_t*	trx = NULL;
+
+				if (btr_cur->rtr_info != NULL
+				    && btr_cur->rtr_info->thr != NULL) {
+					trx = thr_get_trx(
+						btr_cur->rtr_info->thr);
+				}
+
+				page = btr_cur_get_page(btr_cur);
+
+				if (!lock_test_prdt_page_lock(
+					trx,
+					page_get_space_id(page),
+					page_get_page_no(page))
+				     && page_get_n_recs(page) < 2
+				     && page_get_page_no(page) !=
+					dict_index_get_page(index)) {
+					/* this is the last record on page,
+					and it has a "page" lock on it,
+					which mean search is still depending
+					on it, so do not delete */
+#ifdef UNIV_DEBUG
+					ib::info() << "skip purging last"
+						" record on page "
+						<< page_get_page_no(page)
+						<< ".";
+#endif /* UNIV_DEBUG */
+
+					btr_pcur_close(&pcur);
+					mtr_commit(&mtr);
+					return(success);
+				}
+			}
+
 			if (!btr_cur_optimistic_delete(btr_cur, 0, &mtr)) {
 
 				/* The index entry could not be deleted. */
@@ -497,13 +553,13 @@ row_purge_remove_sec_if_poss_leaf(
 	case ROW_NOT_FOUND:
 		/* The index entry does not exist, nothing to do. */
 		btr_pcur_close(&pcur);
-	func_exit_no_pcur:
+func_exit_no_pcur:
 		mtr_commit(&mtr);
 		return(success);
 	}
 
 	ut_error;
-	return(FALSE);
+	return(false);
 }
 
 /***********************************************************//**
@@ -550,6 +606,25 @@ retry:
 	ut_a(success);
 }
 
+/** Skip uncommitted virtual indexes on newly added virtual column.
+@param[in,out]	index	dict index object */
+static
+inline
+void
+row_purge_skip_uncommitted_virtual_index(
+	dict_index_t*&	index)
+{
+	/* We need to skip virtual indexes which is not
+	committed yet. It's safe because these indexes are
+	newly created by alter table, and because we do
+	not support LOCK=NONE when adding an index on newly
+	added virtual column.*/
+	while (index != NULL && dict_index_has_virtual(index)
+	       && !index->is_committed() && index->has_new_v_col) {
+		index = dict_table_get_next_index(index);
+	}
+}
+
 /***********************************************************//**
 Purges a delete marking of a record.
 @retval true if the row was not found, or it was successfully removed
@@ -569,13 +644,16 @@ row_purge_del_mark(
 		/* skip corrupted secondary index */
 		dict_table_skip_corrupt_index(node->index);
 
+		row_purge_skip_uncommitted_virtual_index(node->index);
+
 		if (!node->index) {
 			break;
 		}
 
 		if (node->index->type != DICT_FTS) {
 			dtuple_t*	entry = row_build_index_entry_low(
-				node->row, NULL, node->index, heap);
+				node->row, NULL, node->index,
+				heap, ROW_BUILD_FOR_PURGE);
 			row_purge_remove_sec_if_poss(node, node->index, entry);
 			mem_heap_empty(heap);
 		}
@@ -603,9 +681,7 @@ row_purge_upd_exist_or_extern_func(
 {
 	mem_heap_t*	heap;
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_S));
 
 	if (node->rec_type == TRX_UNDO_UPD_DEL_REC
 	    || (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
@@ -618,6 +694,8 @@ row_purge_upd_exist_or_extern_func(
 	while (node->index != NULL) {
 		dict_table_skip_corrupt_index(node->index);
 
+		row_purge_skip_uncommitted_virtual_index(node->index);
+
 		if (!node->index) {
 			break;
 		}
@@ -626,7 +704,8 @@ row_purge_upd_exist_or_extern_func(
 						     thr, NULL, NULL)) {
 			/* Build the older version of the index entry */
 			dtuple_t*	entry = row_build_index_entry_low(
-				node->row, NULL, node->index, heap);
+				node->row, NULL, node->index,
+				heap, ROW_BUILD_FOR_PURGE);
 			row_purge_remove_sec_if_poss(node, node->index, entry);
 			mem_heap_empty(heap);
 		}
@@ -672,17 +751,26 @@ skip_secondaries:
 						 &is_insert, &rseg_id,
 						 &page_no, &offset);
 
-			rseg = trx_sys_get_nth_rseg(trx_sys, rseg_id);
+			/* If table is temp then it can't have its undo log
+			residing in rollback segment with REDO log enabled. */
+			bool is_redo_rseg =
+				dict_table_is_temporary(node->table)
+				? false : true;
+			rseg = trx_sys_get_nth_rseg(
+				trx_sys, rseg_id, is_redo_rseg);
+
 			ut_a(rseg != NULL);
 			ut_a(rseg->id == rseg_id);
 
 			mtr_start(&mtr);
 
-			/* We have to acquire an X-latch to the clustered
-			index tree */
+			/* We have to acquire an SX-latch to the clustered
+			index tree (exclude other tree changes) */
 
 			index = dict_table_get_first_index(node->table);
-			mtr_x_lock(dict_index_get_lock(index), &mtr);
+			mtr_sx_lock(dict_index_get_lock(index), &mtr);
+
+			mtr.set_named_space(index->space);
 
 			/* NOTE: we must also acquire an X-latch to the
 			root page of the tree. We will need it when we
@@ -696,7 +784,8 @@ skip_secondaries:
 			btr_root_get(index, &mtr);
 
 			block = buf_page_get(
-				rseg->space, 0, page_no, RW_X_LATCH, &mtr);
+				page_id_t(rseg->space, page_no),
+				univ_page_size, RW_X_LATCH, &mtr);
 
 			buf_block_dbg_add_level(block, SYNC_TRX_UNDO_PAGE);
 
@@ -709,7 +798,7 @@ skip_secondaries:
 				index,
 				data_field + dfield_get_len(&ufield->new_val)
 				- BTR_EXTERN_FIELD_REF_SIZE,
-				NULL, NULL, NULL, 0, RB_NONE, &mtr);
+				NULL, NULL, NULL, 0, false, &mtr);
 			mtr_commit(&mtr);
 		}
 	}
@@ -763,11 +852,13 @@ row_purge_parse_undo_rec(
 	ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr,
 					       &info_bits);
 	node->table = NULL;
+	node->trx_id = trx_id;
 
 	/* Prevent DROP TABLE etc. from running when we are doing the purge
 	for this row */
 
-	rw_lock_s_lock_inline(&dict_operation_lock, 0, __FILE__, __LINE__);
+try_again:
+	rw_lock_s_lock_inline(dict_operation_lock, 0, __FILE__, __LINE__);
 
 	node->table = dict_table_open_on_id(
 		table_id, FALSE, DICT_TABLE_OP_NORMAL);
@@ -777,6 +868,32 @@ row_purge_parse_undo_rec(
 		goto err_exit;
 	}
 
+#ifdef MYSQL_VIRTUAL_COLUMNS
+	if (node->table->n_v_cols && !node->table->vc_templ
+	    && dict_table_has_indexed_v_cols(node->table)) {
+		/* Need server fully up for virtual column computation */
+		if (!mysqld_server_started) {
+
+			dict_table_close(node->table, FALSE, FALSE);
+			rw_lock_s_unlock(dict_operation_lock);
+			if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
+				return(false);
+			}
+			os_thread_sleep(1000000);
+			goto try_again;
+		}
+
+		/* Initialize the template for the table */
+		innobase_init_vc_templ(node->table);
+	}
+#endif /* MYSQL_VIRTUAL_COLUMNS */
+
+	/* Disable purging for temp-tables as they are short-lived
+	and no point in re-organzing such short lived tables */
+	if (dict_table_is_temporary(node->table)) {
+		goto close_exit;
+	}
+
 	if (node->table->ibd_file_missing) {
 		/* We skip purge of missing .ibd files */
 
@@ -789,14 +906,15 @@ row_purge_parse_undo_rec(
 
 	clust_index = dict_table_get_first_index(node->table);
 
-	if (clust_index == NULL) {
+	if (clust_index == NULL
+	    || dict_index_is_corrupted(clust_index)) {
 		/* The table was corrupt in the data dictionary.
 		dict_set_corrupted() works on an index, and
 		we do not have an index to call it with. */
 close_exit:
 		dict_table_close(node->table, FALSE, FALSE);
 err_exit:
-		rw_lock_s_unlock(&dict_operation_lock);
+		rw_lock_s_unlock(dict_operation_lock);
 		return(false);
 	}
 
@@ -852,6 +970,7 @@ row_purge_record_func(
 	clust_index = dict_table_get_first_index(node->table);
 
 	node->index = dict_table_get_next_index(clust_index);
+	ut_ad(!trx_undo_roll_ptr_is_insert(node->roll_ptr));
 
 	switch (node->rec_type) {
 	case TRX_UNDO_DEL_MARK_REC:
@@ -914,7 +1033,7 @@ row_purge(
 			bool purged = row_purge_record(
 				node, undo_rec, thr, updated_extern);
 
-			rw_lock_s_unlock(&dict_operation_lock);
+			rw_lock_s_unlock(dict_operation_lock);
 
 			if (purged
 			    || srv_shutdown_state != SRV_SHUTDOWN_NONE) {
@@ -957,8 +1076,7 @@ row_purge_end(
 /***********************************************************//**
 Does the purge operation for a single undo log record. This is a high-level
 function used in an SQL execution graph.
-@return	query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
 que_thr_t*
 row_purge_step(
 /*===========*/
@@ -1028,14 +1146,14 @@ purge_node_t::validate_pcur()
 		return(true);
 	}
 
-	if (pcur.old_stored != BTR_PCUR_OLD_STORED) {
+	if (!pcur.old_stored) {
 		return(true);
 	}
 
-	dict_index_t*   clust_index = pcur.btr_cur.index;
+	dict_index_t*	clust_index = pcur.btr_cur.index;
 
 	ulint*	offsets = rec_get_offsets(
-	pcur.old_rec, clust_index, NULL, pcur.old_n_fields, &heap);
+		pcur.old_rec, clust_index, NULL, pcur.old_n_fields, &heap);
 
 	/* Here we are comparing the purge ref record and the stored initial
 	part in persistent cursor. Both cases we store n_uniq fields of the
@@ -1044,9 +1162,9 @@ purge_node_t::validate_pcur()
 	int st = cmp_dtuple_rec(ref, pcur.old_rec, offsets);
 
 	if (st != 0) {
-		fprintf(stderr, "Purge node pcur validation failed\n");
-		dtuple_print(stderr, ref);
-		rec_print(stderr, pcur.old_rec, clust_index);
+		ib::error() << "Purge node pcur validation failed";
+		ib::error() << rec_printer(ref).str();
+		ib::error() << rec_printer(pcur.old_rec, offsets).str();
 		return(false);
 	}
 
diff --git a/storage/innobase/row/row0quiesce.cc b/storage/innobase/row/row0quiesce.cc
index 583fbe60fb3..08d119768a9 100644
--- a/storage/innobase/row/row0quiesce.cc
+++ b/storage/innobase/row/row0quiesce.cc
@@ -23,16 +23,22 @@ Quiesce a tablespace.
 Created 2012-02-08 by Sunny Bains.
 *******************************************************/
 
-#include "row0quiesce.h"
-#include "row0mysql.h"
+#include "ha_prototypes.h"
 
+#include "row0quiesce.h"
 #ifdef UNIV_NONINL
 #include "row0quiesce.ic"
 #endif
 
+#include "row0mysql.h"
 #include "ibuf0ibuf.h"
 #include "srv0start.h"
 #include "trx0purge.h"
+#include "fsp0sysspace.h"
+
+#ifdef HAVE_MY_AES_H
+#include <my_aes.h>
+#endif
 
 /*********************************************************************//**
 Write the meta data (index user fields) config file.
@@ -326,8 +332,7 @@ row_quiesce_write_header(
 	if (hostname == 0) {
 		static const char	NullHostname[] = "Hostname unknown";
 
-		ib_logf(IB_LOG_LEVEL_WARN,
-			"Unable to determine server hostname.");
+		ib::warn() << "Unable to determine server hostname.";
 
 		hostname = NullHostname;
 	}
@@ -350,8 +355,8 @@ row_quiesce_write_header(
 	}
 
 	/* The table name includes the NUL byte. */
-	ut_a(table->name != 0);
-	len = static_cast<ib_uint32_t>(strlen(table->name) + 1);
+	ut_a(table->name.m_name != NULL);
+	len = static_cast<ib_uint32_t>(strlen(table->name.m_name) + 1);
 
 	/* Write the table name. */
 	mach_write_to_4(value, len);
@@ -359,7 +364,7 @@ row_quiesce_write_header(
 	DBUG_EXECUTE_IF("ib_export_io_write_failure_6", close(fileno(file)););
 
 	if (fwrite(&value, 1,  sizeof(value), file) != sizeof(value)
-	    || fwrite(table->name, 1,  len, file) != len) {
+	    || fwrite(table->name.m_name, 1,  len, file) != len) {
 
 		ib_senderrf(
 			thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
@@ -428,7 +433,7 @@ row_quiesce_write_cfg(
 
 	srv_get_meta_data_filename(table, name, sizeof(name));
 
-	ib_logf(IB_LOG_LEVEL_INFO, "Writing table metadata to '%s'", name);
+	ib::info() << "Writing table metadata to '" << name << "'";
 
 	FILE*	file = fopen(name, "w+b");
 
@@ -475,6 +480,210 @@ row_quiesce_write_cfg(
 	return(err);
 }
 
+#ifdef MYSQL_ENCRYPTION
+
+/** Write the transfer key to CFP file.
+@param[in]	table		write the data for this table
+@param[in]	file		file to write to
+@param[in]	thd		session
+@return DB_SUCCESS or error code. */
+static	MY_ATTRIBUTE((nonnull, warn_unused_result))
+dberr_t
+row_quiesce_write_transfer_key(
+	const dict_table_t*	table,
+	FILE*			file,
+	THD*			thd)
+{
+	byte		key_size[sizeof(ib_uint32_t)];
+	byte		row[ENCRYPTION_KEY_LEN * 3];
+	byte*		ptr = row;
+	byte*		transfer_key = ptr;
+	lint		elen;
+
+	ut_ad(table->encryption_key != NULL
+	      && table->encryption_iv != NULL);
+
+	/* Write the encryption key size. */
+	mach_write_to_4(key_size, ENCRYPTION_KEY_LEN);
+
+	if (fwrite(&key_size, 1,  sizeof(key_size), file)
+		!= sizeof(key_size)) {
+		ib_senderrf(
+			thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
+			errno, strerror(errno),
+			"while writing key size.");
+
+		return(DB_IO_ERROR);
+	}
+
+	/* Generate and write the transfer key. */
+	Encryption::random_value(transfer_key);
+	if (fwrite(transfer_key, 1, ENCRYPTION_KEY_LEN, file)
+		!= ENCRYPTION_KEY_LEN) {
+		ib_senderrf(
+			thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
+			errno, strerror(errno),
+			"while writing transfer key.");
+
+		return(DB_IO_ERROR);
+	}
+
+	ptr += ENCRYPTION_KEY_LEN;
+
+	/* Encrypt tablespace key. */
+	elen = my_aes_encrypt(
+		reinterpret_cast<unsigned char*>(table->encryption_key),
+		ENCRYPTION_KEY_LEN,
+		ptr,
+		reinterpret_cast<unsigned char*>(transfer_key),
+		ENCRYPTION_KEY_LEN,
+		my_aes_256_ecb,
+		NULL, false);
+
+	if (elen == MY_AES_BAD_DATA) {
+		ib_senderrf(
+			thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
+			errno, strerror(errno),
+			"while encrypt tablespace key.");
+		return(DB_ERROR);
+	}
+
+	/* Write encrypted tablespace key */
+	if (fwrite(ptr, 1, ENCRYPTION_KEY_LEN, file)
+		!= ENCRYPTION_KEY_LEN) {
+		ib_senderrf(
+			thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
+			errno, strerror(errno),
+			"while writing encrypted tablespace key.");
+
+		return(DB_IO_ERROR);
+	}
+	ptr += ENCRYPTION_KEY_LEN;
+
+	/* Encrypt tablespace iv. */
+	elen = my_aes_encrypt(
+		reinterpret_cast<unsigned char*>(table->encryption_iv),
+		ENCRYPTION_KEY_LEN,
+		ptr,
+		reinterpret_cast<unsigned char*>(transfer_key),
+		ENCRYPTION_KEY_LEN,
+		my_aes_256_ecb,
+		NULL, false);
+
+	if (elen == MY_AES_BAD_DATA) {
+		ib_senderrf(
+			thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
+			errno, strerror(errno),
+			"while encrypt tablespace iv.");
+		return(DB_ERROR);
+	}
+
+	/* Write encrypted tablespace iv */
+	if (fwrite(ptr, 1, ENCRYPTION_KEY_LEN, file)
+		!= ENCRYPTION_KEY_LEN) {
+		ib_senderrf(
+			thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
+			errno, strerror(errno),
+			"while writing encrypted tablespace iv.");
+
+		return(DB_IO_ERROR);
+	}
+
+	return(DB_SUCCESS);
+}
+
+/** Write the encryption data after quiesce.
+@param[in]	table		write the data for this table
+@param[in]	thd		session
+@return DB_SUCCESS or error code */
+static	MY_ATTRIBUTE((nonnull, warn_unused_result))
+dberr_t
+row_quiesce_write_cfp(
+	dict_table_t*	table,
+	THD*		thd)
+{
+	dberr_t			err;
+	char			name[OS_FILE_MAX_PATH];
+
+	/* If table is not encrypted, return. */
+	if (!dict_table_is_encrypted(table)) {
+		return(DB_SUCCESS);
+	}
+
+	/* Get the encryption key and iv from space */
+	/* For encrypted table, before we discard the tablespace,
+	we need save the encryption information into table, otherwise,
+	this information will be lost in fil_discard_tablespace along
+	with fil_space_free(). */
+	if (table->encryption_key == NULL) {
+		table->encryption_key =
+			static_cast<byte*>(mem_heap_alloc(table->heap,
+							  ENCRYPTION_KEY_LEN));
+
+		table->encryption_iv =
+			static_cast<byte*>(mem_heap_alloc(table->heap,
+							  ENCRYPTION_KEY_LEN));
+
+		fil_space_t*	space = fil_space_get(table->space);
+		ut_ad(space != NULL && FSP_FLAGS_GET_ENCRYPTION(space->flags));
+
+		memcpy(table->encryption_key,
+		       space->encryption_key,
+		       ENCRYPTION_KEY_LEN);
+		memcpy(table->encryption_iv,
+		       space->encryption_iv,
+		       ENCRYPTION_KEY_LEN);
+	}
+
+	srv_get_encryption_data_filename(table, name, sizeof(name));
+
+	ib::info() << "Writing table encryption data to '" << name << "'";
+
+	FILE*	file = fopen(name, "w+b");
+
+	if (file == NULL) {
+		ib_errf(thd, IB_LOG_LEVEL_WARN, ER_CANT_CREATE_FILE,
+			 name, errno, strerror(errno));
+
+		err = DB_IO_ERROR;
+	} else {
+		err = row_quiesce_write_transfer_key(table, file, thd);
+
+		if (fflush(file) != 0) {
+
+			char	msg[BUFSIZ];
+
+			ut_snprintf(msg, sizeof(msg), "%s flush() failed",
+				    name);
+
+			ib_senderrf(
+				thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
+				errno, strerror(errno), msg);
+
+			err = DB_IO_ERROR;
+		}
+
+		if (fclose(file) != 0) {
+			char	msg[BUFSIZ];
+
+			ut_snprintf(msg, sizeof(msg), "%s flose() failed",
+				    name);
+
+			ib_senderrf(
+				thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
+				errno, strerror(errno), msg);
+			err = DB_IO_ERROR;
+		}
+	}
+
+	/* Clean the encryption information */
+	table->encryption_key = NULL;
+	table->encryption_iv = NULL;
+
+	return(err);
+}
+#endif /* MYSQL_ENCRYPTION */
+
 /*********************************************************************//**
 Check whether a table has an FTS index defined on it.
 @return true if an FTS index exists on the table */
@@ -505,7 +714,6 @@ row_quiesce_table_has_fts_index(
 
 /*********************************************************************//**
 Quiesce the tablespace that the table resides in. */
-UNIV_INTERN
 void
 row_quiesce_table_start(
 /*====================*/
@@ -516,15 +724,10 @@ row_quiesce_table_start(
 	ut_a(srv_n_purge_threads > 0);
 	ut_ad(!srv_read_only_mode);
 
-	char		table_name[MAX_FULL_NAME_LEN + 1];
-
 	ut_a(trx->mysql_thd != 0);
 
-	innobase_format_name(
-		table_name, sizeof(table_name), table->name, FALSE);
-
-	ib_logf(IB_LOG_LEVEL_INFO,
-		"Sync to disk of '%s' started.", table_name);
+	ut_ad(fil_space_get(table->space) != NULL);
+	ib::info() << "Sync to disk of " << table->name << " started.";
 
 	if (trx_purge_state() != PURGE_STATE_DISABLED) {
 		trx_purge_stop();
@@ -535,32 +738,51 @@ row_quiesce_table_start(
 	     && !trx_is_interrupted(trx);
 	     ++count) {
 		if (!(count % 20)) {
-			ib_logf(IB_LOG_LEVEL_INFO,
-				"Merging change buffer entries for '%s'",
-				table_name);
+			ib::info() << "Merging change buffer entries for "
+				<< table->name;
 		}
 	}
 
 	if (!trx_is_interrupted(trx)) {
+		extern	ib_mutex_t	master_key_id_mutex;
+
+#ifdef MYSQL_ENCRYPTION
+		if (dict_table_is_encrypted(table)) {
+			/* Require the mutex to block key rotation. */
+			mutex_enter(&master_key_id_mutex);
+		}
+#endif /* MYSQL_ENCRYPTION */
+
 		buf_LRU_flush_or_remove_pages(
 			table->space, BUF_REMOVE_FLUSH_WRITE, trx);
 
+#ifdef MYSQL_ENCRYPTION
+		if (dict_table_is_encrypted(table)) {
+			mutex_exit(&master_key_id_mutex);
+		}
+#endif /* MYSQL_ENCRYPTION */
+
 		if (trx_is_interrupted(trx)) {
 
-			ib_logf(IB_LOG_LEVEL_WARN, "Quiesce aborted!");
+			ib::warn() << "Quiesce aborted!";
 
 		} else if (row_quiesce_write_cfg(table, trx->mysql_thd)
 			   != DB_SUCCESS) {
 
-			ib_logf(IB_LOG_LEVEL_WARN,
-				"There was an error writing to the "
-				"meta data file");
+			ib::warn() << "There was an error writing to the"
+				" meta data file";
+#ifdef MYSQL_ENCRYPTION
+		} else if (row_quiesce_write_cfp(table, trx->mysql_thd)
+			   != DB_SUCCESS) {
+			ib::warn() << "There was an error writing to the"
+				" encryption info file";
+#endif /* MYSQL_ENCRYPTION */
 		} else {
-			ib_logf(IB_LOG_LEVEL_INFO,
-				"Table '%s' flushed to disk", table_name);
+			ib::info() << "Table " << table->name
+				<< " flushed to disk";
 		}
 	} else {
-		ib_logf(IB_LOG_LEVEL_WARN, "Quiesce aborted!");
+		ib::warn() << "Quiesce aborted!";
 	}
 
 	dberr_t	err = row_quiesce_set_state(table, QUIESCE_COMPLETE, trx);
@@ -569,7 +791,6 @@ row_quiesce_table_start(
 
 /*********************************************************************//**
 Cleanup after table quiesce. */
-UNIV_INTERN
 void
 row_quiesce_table_complete(
 /*=======================*/
@@ -577,13 +798,9 @@ row_quiesce_table_complete(
 	trx_t*		trx)		/*!< in/out: transaction/session */
 {
 	ulint		count = 0;
-	char		table_name[MAX_FULL_NAME_LEN + 1];
 
 	ut_a(trx->mysql_thd != 0);
 
-	innobase_format_name(
-		table_name, sizeof(table_name), table->name, FALSE);
-
 	/* We need to wait for the operation to complete if the
 	transaction has been killed. */
 
@@ -591,9 +808,8 @@ row_quiesce_table_complete(
 
 		/* Print a warning after every minute. */
 		if (!(count % 60)) {
-			ib_logf(IB_LOG_LEVEL_WARN,
-				"Waiting for quiesce of '%s' to complete",
-				table_name);
+			ib::warn() << "Waiting for quiesce of " << table->name
+				<< " to complete";
 		}
 
 		/* Sleep for a second. */
@@ -609,10 +825,22 @@ row_quiesce_table_complete(
 
 	srv_get_meta_data_filename(table, cfg_name, sizeof(cfg_name));
 
-	os_file_delete_if_exists(innodb_file_data_key, cfg_name);
+	os_file_delete_if_exists(innodb_data_file_key, cfg_name, NULL);
 
-	ib_logf(IB_LOG_LEVEL_INFO,
-		"Deleting the meta-data file '%s'", cfg_name);
+	ib::info() << "Deleting the meta-data file '" << cfg_name << "'";
+
+	if (dict_table_is_encrypted(table)) {
+		char		cfp_name[OS_FILE_MAX_PATH];
+
+		srv_get_encryption_data_filename(table,
+						 cfp_name,
+						 sizeof(cfp_name));
+
+		os_file_delete_if_exists(innodb_data_file_key, cfp_name, NULL);
+
+		ib::info() << "Deleting the meta-data file '"
+			<< cfp_name << "'";
+	}
 
 	if (trx_purge_state() != PURGE_STATE_DISABLED) {
 		trx_purge_run();
@@ -625,7 +853,6 @@ row_quiesce_table_complete(
 /*********************************************************************//**
 Set a table's quiesce state.
 @return DB_SUCCESS or error code. */
-UNIV_INTERN
 dberr_t
 row_quiesce_set_state(
 /*==================*/
@@ -642,23 +869,39 @@ row_quiesce_set_state(
 
 		return(DB_UNSUPPORTED);
 
-	} else if (table->space == TRX_SYS_SPACE) {
+	} else if (dict_table_is_temporary(table)) {
+
+		ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_WARN,
+			    ER_CANNOT_DISCARD_TEMPORARY_TABLE);
+
+		return(DB_UNSUPPORTED);
+	} else if (table->space == srv_sys_space.space_id()) {
 
 		char	table_name[MAX_FULL_NAME_LEN + 1];
 
 		innobase_format_name(
-			table_name, sizeof(table_name), table->name, FALSE);
+			table_name, sizeof(table_name),
+			table->name.m_name);
 
 		ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_WARN,
 			    ER_TABLE_IN_SYSTEM_TABLESPACE, table_name);
 
 		return(DB_UNSUPPORTED);
+
+	} else if (DICT_TF_HAS_SHARED_SPACE(table->flags)) {
+		std::ostringstream err_msg;
+		err_msg << "FLUSH TABLES FOR EXPORT on table " << table->name
+			<< " in a general tablespace.";
+		ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_WARN,
+			    ER_NOT_SUPPORTED_YET, err_msg.str().c_str());
+
+		return(DB_UNSUPPORTED);
 	} else if (row_quiesce_table_has_fts_index(table)) {
 
 		ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_WARN,
 			    ER_NOT_SUPPORTED_YET,
-			    "FLUSH TABLES on tables that have an FTS index. "
-			    "FTS auxiliary tables will not be flushed.");
+			    "FLUSH TABLES on tables that have an FTS index."
+			    " FTS auxiliary tables will not be flushed.");
 
 	} else if (DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
 		/* If this flag is set then the table may not have any active
@@ -666,10 +909,10 @@ row_quiesce_set_state(
 
 		ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_WARN,
 			    ER_NOT_SUPPORTED_YET,
-			    "FLUSH TABLES on a table that had an FTS index, "
-			    "created on a hidden column, the "
-			    "auxiliary tables haven't been dropped as yet. "
-			    "FTS auxiliary tables will not be flushed.");
+			    "FLUSH TABLES on a table that had an FTS index,"
+			    " created on a hidden column, the"
+			    " auxiliary tables haven't been dropped as yet."
+			    " FTS auxiliary tables will not be flushed.");
 	}
 
 	row_mysql_lock_data_dictionary(trx);
diff --git a/storage/innobase/row/row0row.cc b/storage/innobase/row/row0row.cc
index 96d25e15777..5c71e6e98bc 100644
--- a/storage/innobase/row/row0row.cc
+++ b/storage/innobase/row/row0row.cc
@@ -23,6 +23,8 @@ General row routines
 Created 4/20/1996 Heikki Tuuri
 *******************************************************/
 
+#include "ha_prototypes.h"
+
 #include "row0row.h"
 
 #ifdef UNIV_NONINL
@@ -31,8 +33,8 @@ Created 4/20/1996 Heikki Tuuri
 
 #include "data0type.h"
 #include "dict0dict.h"
+#include "dict0boot.h"
 #include "btr0btr.h"
-#include "ha_prototypes.h"
 #include "mach0data.h"
 #include "trx0rseg.h"
 #include "trx0trx.h"
@@ -46,6 +48,8 @@ Created 4/20/1996 Heikki Tuuri
 #include "rem0cmp.h"
 #include "read0read.h"
 #include "ut0mem.h"
+#include "gis0geo.h"
+#include "row0mysql.h"
 
 /*****************************************************************//**
 When an insert or purge to a table is performed, this function builds
@@ -53,7 +57,6 @@ the entry to be inserted into or purged from an index on the table.
 @return index entry which should be inserted or purged
 @retval NULL if the externally stored columns in the clustered index record
 are unavailable and ext != NULL, or row is missing some needed columns. */
-UNIV_INTERN
 dtuple_t*
 row_build_index_entry_low(
 /*======================*/
@@ -62,18 +65,28 @@ row_build_index_entry_low(
 	const row_ext_t*	ext,	/*!< in: externally stored column
 					prefixes, or NULL */
 	dict_index_t*		index,	/*!< in: index on the table */
-	mem_heap_t*		heap)	/*!< in: memory heap from which
+	mem_heap_t*		heap,	/*!< in: memory heap from which
 					the memory for the index entry
 					is allocated */
+	ulint			flag)	/*!< in: ROW_BUILD_NORMAL,
+					ROW_BUILD_FOR_PURGE
+                                        or ROW_BUILD_FOR_UNDO */
 {
 	dtuple_t*	entry;
 	ulint		entry_len;
 	ulint		i;
+	ulint		num_v = 0;
 
 	entry_len = dict_index_get_n_fields(index);
-	entry = dtuple_create(heap, entry_len);
 
-	if (dict_index_is_univ(index)) {
+	if (flag == ROW_BUILD_FOR_INSERT && dict_index_is_clust(index)) {
+		num_v = dict_table_get_n_v_cols(index->table);
+		entry = dtuple_create_with_vcol(heap, entry_len, num_v);
+	} else {
+		entry = dtuple_create(heap, entry_len);
+	}
+
+	if (dict_index_is_ibuf(index)) {
 		dtuple_set_n_fields_cmp(entry, entry_len);
 		/* There may only be externally stored columns
 		in a clustered index B-tree of a user table. */
@@ -83,22 +96,47 @@ row_build_index_entry_low(
 			entry, dict_index_get_n_unique_in_tree(index));
 	}
 
-	for (i = 0; i < entry_len; i++) {
-		const dict_field_t*	ind_field
-			= dict_index_get_nth_field(index, i);
-		const dict_col_t*	col
-			= ind_field->col;
-		ulint			col_no
-			= dict_col_get_no(col);
-		dfield_t*		dfield
-			= dtuple_get_nth_field(entry, i);
-		const dfield_t*		dfield2
-			= dtuple_get_nth_field(row, col_no);
+	for (i = 0; i < entry_len + num_v; i++) {
+		const dict_field_t*	ind_field = NULL;
+		const dict_col_t*	col;
+		ulint			col_no = 0;
+		dfield_t*		dfield;
+		dfield_t*		dfield2;
 		ulint			len;
 
+		if (i >= entry_len) {
+			/* This is to insert new rows to cluster index */
+			ut_ad(dict_index_is_clust(index)
+			      && flag == ROW_BUILD_FOR_INSERT);
+			dfield = dtuple_get_nth_v_field(entry, i - entry_len);
+			col = &dict_table_get_nth_v_col(
+				index->table, i - entry_len)->m_col;
+
+		} else {
+			ind_field = dict_index_get_nth_field(index, i);
+			col = ind_field->col;
+			col_no = dict_col_get_no(col);
+			dfield = dtuple_get_nth_field(entry, i);
+		}
 #if DATA_MISSING != 0
 # error "DATA_MISSING != 0"
 #endif
+
+		if (dict_col_is_virtual(col)) {
+			const dict_v_col_t*	v_col
+				= reinterpret_cast<const dict_v_col_t*>(col);
+
+			ut_ad(v_col->v_pos < dtuple_get_n_v_fields(row));
+			dfield2 = dtuple_get_nth_v_field(row, v_col->v_pos);
+
+			ut_ad(dfield_is_null(dfield2) || dfield2->data);
+		} else {
+			dfield2 = dtuple_get_nth_field(row, col_no);
+			ut_ad(dfield_get_type(dfield2)->mtype == DATA_MISSING
+			      || (!(dfield_get_type(dfield2)->prtype
+				    & DATA_VIRTUAL)));
+		}
+
 		if (UNIV_UNLIKELY(dfield_get_type(dfield2)->mtype
 				  == DATA_MISSING)) {
 			/* The field has not been initialized in the row.
@@ -106,6 +144,132 @@ row_build_index_entry_low(
 			return(NULL);
 		}
 
+#ifdef UNIV_DEBUG
+		if (dfield_get_type(dfield2)->prtype & DATA_VIRTUAL
+		    && dict_index_is_clust(index)) {
+			ut_ad(flag == ROW_BUILD_FOR_INSERT);
+		}
+#endif /* UNIV_DEBUG */
+
+		/* Special handle spatial index, set the first field
+		which is for store MBR. */
+		if (dict_index_is_spatial(index) && i == 0) {
+			double*			mbr;
+
+			dfield_copy(dfield, dfield2);
+			dfield->type.prtype |= DATA_GIS_MBR;
+
+			/* Allocate memory for mbr field */
+			ulint mbr_len = DATA_MBR_LEN;
+			mbr = static_cast<double*>(mem_heap_alloc(heap, mbr_len));
+
+			/* Set mbr field data. */
+			dfield_set_data(dfield, mbr, mbr_len);
+
+			if (dfield2->data) {
+				uchar*	dptr = NULL;
+				ulint	dlen = 0;
+				ulint	flen = 0;
+				double	tmp_mbr[SPDIMS * 2];
+				mem_heap_t*	temp_heap = NULL;
+
+				if (dfield_is_ext(dfield2)) {
+					if (flag == ROW_BUILD_FOR_PURGE) {
+						byte*	ptr = NULL;
+
+						spatial_status_t spatial_status;
+						spatial_status =
+							dfield_get_spatial_status(
+								dfield2);
+
+						switch (spatial_status) {
+						case SPATIAL_ONLY:
+						ptr = static_cast<byte*>(
+							dfield_get_data(
+								dfield2));
+						ut_ad(dfield_get_len(dfield2)
+						      == DATA_MBR_LEN);
+						break;
+
+						case SPATIAL_MIXED:
+						ptr = static_cast<byte*>(
+							dfield_get_data(
+								dfield2))
+							+ dfield_get_len(
+								dfield2);
+						break;
+
+						case SPATIAL_NONE:
+						/* Undo record is logged before
+						spatial index is created.*/
+						return(NULL);
+
+						case SPATIAL_UNKNOWN:
+						ut_ad(0);
+						}
+
+						memcpy(mbr, ptr, DATA_MBR_LEN);
+						continue;
+					}
+
+					if (flag == ROW_BUILD_FOR_UNDO
+					    && dict_table_get_format(index->table)
+						>= UNIV_FORMAT_B) {
+						/* For build entry for undo, and
+						the table is Barrcuda, we need
+						to skip the prefix data. */
+						flen = BTR_EXTERN_FIELD_REF_SIZE;
+						ut_ad(dfield_get_len(dfield2) >=
+						      BTR_EXTERN_FIELD_REF_SIZE);
+						dptr = static_cast<byte*>(
+							dfield_get_data(dfield2))
+							+ dfield_get_len(dfield2)
+							- BTR_EXTERN_FIELD_REF_SIZE;
+					} else {
+						flen = dfield_get_len(dfield2);
+						dptr = static_cast<byte*>(
+							dfield_get_data(dfield2));
+					}
+
+					temp_heap = mem_heap_create(1000);
+
+					const page_size_t	page_size
+						= (ext != NULL)
+						? ext->page_size
+						: dict_table_page_size(
+							index->table);
+
+					dptr = btr_copy_externally_stored_field(
+						&dlen, dptr,
+						page_size,
+						flen,
+						temp_heap);
+				} else {
+					dptr = static_cast<uchar*>(
+						dfield_get_data(dfield2));
+					dlen = dfield_get_len(dfield2);
+
+				}
+
+				if (dlen <= GEO_DATA_HEADER_SIZE) {
+					for (uint i = 0; i < SPDIMS; ++i) {
+						tmp_mbr[i * 2] = DBL_MAX;
+						tmp_mbr[i * 2 + 1] = -DBL_MAX;
+					}
+				} else {
+					rtree_mbr_from_wkb(dptr + GEO_DATA_HEADER_SIZE,
+							   static_cast<uint>(dlen
+							   - GEO_DATA_HEADER_SIZE),
+							   SPDIMS, tmp_mbr);
+				}
+				dfield_write_mbr(dfield, tmp_mbr);
+				if (temp_heap) {
+					mem_heap_free(temp_heap);
+				}
+			}
+			continue;
+		}
+
 		len = dfield_get_len(dfield2);
 
 		dfield_copy(dfield, dfield2);
@@ -114,13 +278,14 @@ row_build_index_entry_low(
 			continue;
 		}
 
-		if (ind_field->prefix_len == 0
+		if ((!ind_field || ind_field->prefix_len == 0)
 		    && (!dfield_is_ext(dfield)
 			|| dict_index_is_clust(index))) {
 			/* The dfield_copy() above suffices for
 			columns that are stored in-page, or for
 			clustered index record columns that are not
-			part of a column prefix in the PRIMARY KEY. */
+			part of a column prefix in the PRIMARY KEY,
+			or for virtaul columns in cluster index record. */
 			continue;
 		}
 
@@ -183,51 +348,42 @@ row_build_index_entry_low(
 	return(entry);
 }
 
-/*******************************************************************//**
-An inverse function to row_build_index_entry. Builds a row from a
-record in a clustered index.
-@return	own: row built; see the NOTE below! */
-UNIV_INTERN
+/** An inverse function to row_build_index_entry. Builds a row from a
+record in a clustered index, with possible indexing on ongoing
+addition of new virtual columns.
+@param[in]	type		ROW_COPY_POINTERS or ROW_COPY_DATA;
+@param[in]	index		clustered index
+@param[in]	rec		record in the clustered index
+@param[in]	offsets		rec_get_offsets(rec,index) or NULL
+@param[in]	col_table	table, to check which
+				externally stored columns
+				occur in the ordering columns
+				of an index, or NULL if
+				index->table should be
+				consulted instead
+@param[in]	add_cols	default values of added columns, or NULL
+@param[in]	add_v		new virtual columns added
+				along with new indexes
+@param[in]	col_map		mapping of old column
+				numbers to new ones, or NULL
+@param[in]	ext		cache of externally stored column
+				prefixes, or NULL
+@param[in]	heap		memory heap from which
+				the memory needed is allocated
+@return own: row built; */
+static inline
 dtuple_t*
-row_build(
-/*======*/
-	ulint			type,	/*!< in: ROW_COPY_POINTERS or
-					ROW_COPY_DATA; the latter
-					copies also the data fields to
-					heap while the first only
-					places pointers to data fields
-					on the index page, and thus is
-					more efficient */
-	const dict_index_t*	index,	/*!< in: clustered index */
-	const rec_t*		rec,	/*!< in: record in the clustered
-					index; NOTE: in the case
-					ROW_COPY_POINTERS the data
-					fields in the row will point
-					directly into this record,
-					therefore, the buffer page of
-					this record must be at least
-					s-latched and the latch held
-					as long as the row dtuple is used! */
-	const ulint*		offsets,/*!< in: rec_get_offsets(rec,index)
-					or NULL, in which case this function
-					will invoke rec_get_offsets() */
+row_build_low(
+	ulint			type,
+	const dict_index_t*	index,
+	const rec_t*		rec,
+	const ulint*		offsets,
 	const dict_table_t*	col_table,
-					/*!< in: table, to check which
-					externally stored columns
-					occur in the ordering columns
-					of an index, or NULL if
-					index->table should be
-					consulted instead */
 	const dtuple_t*		add_cols,
-					/*!< in: default values of
-					added columns, or NULL */
-	const ulint*		col_map,/*!< in: mapping of old column
-					numbers to new ones, or NULL */
-	row_ext_t**		ext,	/*!< out, own: cache of
-					externally stored column
-					prefixes, or NULL */
-	mem_heap_t*		heap)	/*!< in: memory heap from which
-					the memory needed is allocated */
+	const dict_add_v_col_t*	add_v,
+	const ulint*		col_map,
+	row_ext_t**		ext,
+	mem_heap_t*		heap)
 {
 	const byte*		copy;
 	dtuple_t*		row;
@@ -244,7 +400,7 @@ row_build(
 	ut_ad(rec != NULL);
 	ut_ad(heap != NULL);
 	ut_ad(dict_index_is_clust(index));
-	ut_ad(!mutex_own(&trx_sys->mutex));
+	ut_ad(!trx_sys_mutex_own());
 	ut_ad(!col_map || col_table);
 
 	if (!offsets) {
@@ -255,16 +411,16 @@ row_build(
 	}
 
 #if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
-	if (rec_offs_any_null_extern(rec, offsets)) {
-		/* This condition can occur during crash recovery
-		before trx_rollback_active() has completed execution,
-		or when a concurrently executing
-		row_ins_index_entry_low() has committed the B-tree
-		mini-transaction but has not yet managed to restore
-		the cursor position for writing the big_rec. */
-		ut_a(trx_undo_roll_ptr_is_insert(
-			     row_get_rec_roll_ptr(rec, index, offsets)));
-	}
+	/* Some blob refs can be NULL during crash recovery before
+	trx_rollback_active() has completed execution, or when a concurrently
+	executing insert or update has committed the B-tree mini-transaction
+	but has not yet managed to restore the cursor position for writing
+	the big_rec. Note that the mini-transaction can be committed multiple
+	times, and the cursor restore can happen multiple times for single
+	insert or update statement.  */
+	ut_a(!rec_offs_any_null_extern(rec, offsets)
+	     || trx_rw_is_active(row_get_rec_trx_id(rec, index, offsets),
+						    NULL, false));
 #endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
 
 	if (type != ROW_COPY_POINTERS) {
@@ -301,8 +457,22 @@ row_build(
 				dict_table_get_nth_col(col_table, i),
 				dfield_get_type(dtuple_get_nth_field(row, i)));
 		}
+	} else if (add_v != NULL) {
+		row = dtuple_create_with_vcol(
+			heap, dict_table_get_n_cols(col_table),
+			dict_table_get_n_v_cols(col_table) + add_v->n_v_col);
+		dict_table_copy_types(row, col_table);
+
+		for (ulint i = 0; i < add_v->n_v_col; i++) {
+			dict_col_copy_type(
+				&add_v->v_col[i].m_col,
+				dfield_get_type(dtuple_get_nth_v_field(
+					row, i + col_table->n_v_def)));
+		}
 	} else {
-		row = dtuple_create(heap, dict_table_get_n_cols(col_table));
+		row = dtuple_create_with_vcol(
+			heap, dict_table_get_n_cols(col_table),
+			dict_table_get_n_v_cols(col_table));
 		dict_table_copy_types(row, col_table);
 	}
 
@@ -388,11 +558,100 @@ row_build(
 	return(row);
 }
 
+
+/*******************************************************************//**
+An inverse function to row_build_index_entry. Builds a row from a
+record in a clustered index.
+@return own: row built; see the NOTE below! */
+dtuple_t*
+row_build(
+/*======*/
+	ulint			type,	/*!< in: ROW_COPY_POINTERS or
+					ROW_COPY_DATA; the latter
+					copies also the data fields to
+					heap while the first only
+					places pointers to data fields
+					on the index page, and thus is
+					more efficient */
+	const dict_index_t*	index,	/*!< in: clustered index */
+	const rec_t*		rec,	/*!< in: record in the clustered
+					index; NOTE: in the case
+					ROW_COPY_POINTERS the data
+					fields in the row will point
+					directly into this record,
+					therefore, the buffer page of
+					this record must be at least
+					s-latched and the latch held
+					as long as the row dtuple is used! */
+	const ulint*		offsets,/*!< in: rec_get_offsets(rec,index)
+					or NULL, in which case this function
+					will invoke rec_get_offsets() */
+	const dict_table_t*	col_table,
+					/*!< in: table, to check which
+					externally stored columns
+					occur in the ordering columns
+					of an index, or NULL if
+					index->table should be
+					consulted instead */
+	const dtuple_t*		add_cols,
+					/*!< in: default values of
+					added columns, or NULL */
+	const ulint*		col_map,/*!< in: mapping of old column
+					numbers to new ones, or NULL */
+	row_ext_t**		ext,	/*!< out, own: cache of
+					externally stored column
+					prefixes, or NULL */
+	mem_heap_t*		heap)	/*!< in: memory heap from which
+					 the memory needed is allocated */
+{
+	return(row_build_low(type, index, rec, offsets, col_table,
+			     add_cols, NULL, col_map, ext, heap));
+}
+
+/** An inverse function to row_build_index_entry. Builds a row from a
+record in a clustered index, with possible indexing on ongoing
+addition of new virtual columns.
+@param[in]	type		ROW_COPY_POINTERS or ROW_COPY_DATA;
+@param[in]	index		clustered index
+@param[in]	rec		record in the clustered index
+@param[in]	offsets		rec_get_offsets(rec,index) or NULL
+@param[in]	col_table	table, to check which
+				externally stored columns
+				occur in the ordering columns
+				of an index, or NULL if
+				index->table should be
+				consulted instead
+@param[in]	add_cols	default values of added columns, or NULL
+@param[in]	add_v		new virtual columns added
+				along with new indexes
+@param[in]	col_map		mapping of old column
+				numbers to new ones, or NULL
+@param[in]	ext		cache of externally stored column
+				prefixes, or NULL
+@param[in]	heap		memory heap from which
+				the memory needed is allocated
+@return own: row built; */
+dtuple_t*
+row_build_w_add_vcol(
+	ulint			type,
+	const dict_index_t*	index,
+	const rec_t*		rec,
+	const ulint*		offsets,
+	const dict_table_t*	col_table,
+	const dtuple_t*		add_cols,
+	const dict_add_v_col_t*	add_v,
+	const ulint*		col_map,
+	row_ext_t**		ext,
+	mem_heap_t*		heap)
+{
+	return(row_build_low(type, index, rec, offsets, col_table,
+			     add_cols, add_v, col_map, ext, heap));
+}
+
 /*******************************************************************//**
 Converts an index record to a typed data tuple.
 @return index entry built; does not set info_bits, and the data fields
 in the entry will point directly to rec */
-UNIV_INTERN
 dtuple_t*
 row_rec_to_index_entry_low(
 /*=======================*/
@@ -414,6 +673,7 @@ row_rec_to_index_entry_low(
 	ut_ad(rec != NULL);
 	ut_ad(heap != NULL);
 	ut_ad(index != NULL);
+
 	/* Because this function may be invoked by row0merge.cc
 	on a record whose header is in different format, the check
 	rec_offs_validate(rec, index, offsets) must be avoided here. */
@@ -426,7 +686,11 @@ row_rec_to_index_entry_low(
 
 	dtuple_set_n_fields_cmp(entry,
 				dict_index_get_n_unique_in_tree(index));
-	ut_ad(rec_len == dict_index_get_n_fields(index));
+	ut_ad(rec_len == dict_index_get_n_fields(index)
+	      /* a record for older SYS_INDEXES table
+	      (missing merge_threshold column) is acceptable. */
+	      || (index->table->id == DICT_INDEXES_ID
+		  && rec_len == dict_index_get_n_fields(index) - 1));
 
 	dict_index_copy_types(entry, index, rec_len);
 
@@ -451,8 +715,7 @@ row_rec_to_index_entry_low(
 /*******************************************************************//**
 Converts an index record to a typed data tuple. NOTE that externally
 stored (often big) fields are NOT copied to heap.
-@return	own: index entry built */
-UNIV_INTERN
+@return own: index entry built */
 dtuple_t*
 row_rec_to_index_entry(
 /*===================*/
@@ -493,8 +756,7 @@ row_rec_to_index_entry(
 /*******************************************************************//**
 Builds from a secondary index record a row reference with which we can
 search the clustered index record.
-@return	own: row reference built; see the NOTE below! */
-UNIV_INTERN
+@return own: row reference built; see the NOTE below! */
 dtuple_t*
 row_build_row_ref(
 /*==============*/
@@ -606,7 +868,6 @@ row_build_row_ref(
 /*******************************************************************//**
 Builds from a secondary index record a row reference with which we can
 search the clustered index record. */
-UNIV_INTERN
 void
 row_build_row_ref_in_tuple(
 /*=======================*/
@@ -641,23 +902,10 @@ row_build_row_ref_in_tuple(
 	ut_a(index);
 	ut_a(rec);
 	ut_ad(!dict_index_is_clust(index));
-
-	if (UNIV_UNLIKELY(!index->table)) {
-		fputs("InnoDB: table ", stderr);
-notfound:
-		ut_print_name(stderr, trx, TRUE, index->table_name);
-		fputs(" for index ", stderr);
-		ut_print_name(stderr, trx, FALSE, index->name);
-		fputs(" not found\n", stderr);
-		ut_error;
-	}
+	ut_a(index->table);
 
 	clust_index = dict_table_get_first_index(index->table);
-
-	if (UNIV_UNLIKELY(!clust_index)) {
-		fputs("InnoDB: clust index for table ", stderr);
-		goto notfound;
-	}
+	ut_ad(clust_index);
 
 	if (!offsets) {
 		offsets = rec_get_offsets(rec, index, offsets_,
@@ -717,8 +965,7 @@ notfound:
 
 /***************************************************************//**
 Searches the clustered index record for a row, if we have the row reference.
-@return	TRUE if found */
-UNIV_INTERN
+@return TRUE if found */
 ibool
 row_search_on_row_ref(
 /*==================*/
@@ -761,8 +1008,7 @@ row_search_on_row_ref(
 /*********************************************************************//**
 Fetches the clustered index record for a secondary index record. The latches
 on the secondary index record are preserved.
-@return	record or NULL, if no record found */
-UNIV_INTERN
+@return record or NULL, if no record found */
 rec_t*
 row_get_clust_rec(
 /*==============*/
@@ -802,8 +1048,7 @@ row_get_clust_rec(
 
 /***************************************************************//**
 Searches an index record.
-@return	whether the record was found or buffered */
-UNIV_INTERN
+@return whether the record was found or buffered */
 enum row_search_result
 row_search_index_entry(
 /*===================*/
@@ -820,11 +1065,17 @@ row_search_index_entry(
 
 	ut_ad(dtuple_check_typed(entry));
 
-	btr_pcur_open(index, entry, PAGE_CUR_LE, mode, pcur, mtr);
+	if (dict_index_is_spatial(index)) {
+		ut_ad(mode & BTR_MODIFY_LEAF || mode & BTR_MODIFY_TREE);
+		rtr_pcur_open(index, entry, PAGE_CUR_RTREE_LOCATE,
+			      mode, pcur, mtr);
+	} else {
+		btr_pcur_open(index, entry, PAGE_CUR_LE, mode, pcur, mtr);
+	}
 
 	switch (btr_pcur_get_btr_cur(pcur)->flag) {
 	case BTR_CUR_DELETE_REF:
-		ut_a(mode & BTR_DELETE);
+		ut_a(mode & BTR_DELETE && !dict_index_is_spatial(index));
 		return(ROW_NOT_DELETED_REF);
 
 	case BTR_CUR_DEL_MARK_IBUF:
@@ -865,7 +1116,7 @@ Not more than "buf_size" bytes are written to "buf".
 The result is always '\0'-terminated (provided buf_size > 0) and the
 number of bytes that were written to "buf" is returned (including the
 terminating '\0').
-@return	number of bytes that were written */
+@return number of bytes that were written */
 static
 ulint
 row_raw_format_int(
@@ -892,7 +1143,7 @@ row_raw_format_int(
 
 		ret = ut_snprintf(
 			buf, buf_size,
-			unsigned_type ? UINT64PF : INT64PF, value) + 1;
+			unsigned_type ? "%llu" : "%lld", (longlong) value)+1;
 	} else {
 
 		*format_in_hex = TRUE;
@@ -913,7 +1164,7 @@ Not more than "buf_size" bytes are written to "buf".
 The result is always '\0'-terminated (provided buf_size > 0) and the
 number of bytes that were written to "buf" is returned (including the
 terminating '\0').
-@return	number of bytes that were written */
+@return number of bytes that were written */
 static
 ulint
 row_raw_format_str(
@@ -963,8 +1214,7 @@ Not more than "buf_size" bytes are written to "buf".
 The result is always NUL-terminated (provided buf_size is positive) and the
 number of bytes that were written to "buf" is returned (including the
 terminating NUL).
-@return	number of bytes that were written */
-UNIV_INTERN
+@return number of bytes that were written */
 ulint
 row_raw_format(
 /*===========*/
@@ -1042,9 +1292,9 @@ row_raw_format(
 	return(ret);
 }
 
-#ifdef UNIV_COMPILE_TEST_FUNCS
+#ifdef UNIV_ENABLE_UNIT_TEST_ROW_RAW_FORMAT_INT
 
-#include "ut0dbg.h"
+#ifdef HAVE_UT_CHRONO_T
 
 void
 test_row_raw_format_int()
@@ -1052,7 +1302,6 @@ test_row_raw_format_int()
 	ulint	ret;
 	char	buf[128];
 	ibool	format_in_hex;
-	speedo_t speedo;
 	ulint	i;
 
 #define CALL_AND_TEST(data, data_len, prtype, buf, buf_size,\
@@ -1236,7 +1485,7 @@ test_row_raw_format_int()
 
 	/* speed test */
 
-	speedo_reset(&speedo);
+	ut_chrono_t	ch(__func__);
 
 	for (i = 0; i < 1000000; i++) {
 		row_raw_format_int("\x23", 1,
@@ -1253,8 +1502,8 @@ test_row_raw_format_int()
 				   DATA_UNSIGNED, buf, sizeof(buf),
 				   &format_in_hex);
 	}
-
-	speedo_show(&speedo);
 }
 
-#endif /* UNIV_COMPILE_TEST_FUNCS */
+#endif /* HAVE_UT_CHRONO_T */
+
+#endif /* UNIV_ENABLE_UNIT_TEST_ROW_RAW_FORMAT_INT */
diff --git a/storage/innobase/row/row0sel.cc b/storage/innobase/row/row0sel.cc
index dab1bc58db5..9a94eb99cd3 100644
--- a/storage/innobase/row/row0sel.cc
+++ b/storage/innobase/row/row0sel.cc
@@ -44,6 +44,7 @@ Created 12/19/1997 Heikki Tuuri
 #include "btr0btr.h"
 #include "btr0cur.h"
 #include "btr0sea.h"
+#include "gis0rtree.h"
 #include "mach0data.h"
 #include "que0que.h"
 #include "row0upd.h"
@@ -59,10 +60,8 @@ Created 12/19/1997 Heikki Tuuri
 #include "buf0lru.h"
 #include "srv0srv.h"
 #include "ha_prototypes.h"
-#include "m_string.h" /* for my_sys.h */
-#include "my_sys.h" /* DEBUG_SYNC_C */
-
-#include "my_compare.h" /* enum icp_result */
+#include "srv0mon.h"
+#include "ut0new.h"
 
 /* Maximum number of rows to prefetch; MySQL interface has another parameter */
 #define SEL_MAX_N_PREFETCH	16
@@ -87,7 +86,7 @@ is alphabetically the same as the corresponding BLOB column in the clustered
 index record.
 NOTE: the comparison is NOT done as a binary comparison, but character
 fields are compared with collation!
-@return	TRUE if the columns are equal */
+@return TRUE if the columns are equal */
 static
 ibool
 row_sel_sec_rec_is_for_blob(
@@ -111,7 +110,6 @@ row_sel_sec_rec_is_for_blob(
 {
 	ulint	len;
 	byte	buf[REC_VERSION_56_MAX_INDEX_COL_LEN];
-	ulint	zip_size = dict_tf_get_zip_size(table->flags);
 
 	/* This function should never be invoked on an Antelope format
 	table, because they should always contain enough prefix in the
@@ -122,9 +120,8 @@ row_sel_sec_rec_is_for_blob(
 	ut_ad(prefix_len > 0);
 	ut_a(prefix_len <= sizeof buf);
 
-	if (UNIV_UNLIKELY
-	    (!memcmp(clust_field + clust_len - BTR_EXTERN_FIELD_REF_SIZE,
-		     field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE))) {
+	if (!memcmp(clust_field + clust_len - BTR_EXTERN_FIELD_REF_SIZE,
+		    field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE)) {
 		/* The externally stored field was not written yet.
 		This record should only be seen by
 		recv_recovery_rollback_active() or any
@@ -132,12 +129,11 @@ row_sel_sec_rec_is_for_blob(
 		return(FALSE);
 	}
 
-	len = btr_copy_externally_stored_field_prefix(buf, prefix_len,
-						      zip_size,
-						      clust_field, clust_len,
-						      NULL);
+	len = btr_copy_externally_stored_field_prefix(
+		buf, prefix_len, dict_tf_get_page_size(table->flags),
+		clust_field, clust_len);
 
-	if (UNIV_UNLIKELY(len == 0)) {
+	if (len == 0) {
 		/* The BLOB was being deleted as the server crashed.
 		There should not be any secondary index records
 		referring to this clustered index record, because
@@ -152,26 +148,28 @@ row_sel_sec_rec_is_for_blob(
 	return(!cmp_data_data(mtype, prtype, buf, len, sec_field, sec_len));
 }
 
-/********************************************************************//**
-Returns TRUE if the user-defined column values in a secondary index record
+/** Returns TRUE if the user-defined column values in a secondary index record
 are alphabetically the same as the corresponding columns in the clustered
 index record.
 NOTE: the comparison is NOT done as a binary comparison, but character
 fields are compared with collation!
+@param[in]	sec_rec		secondary index record
+@param[in]	sec_index	secondary index
+@param[in]	clust_rec	clustered index record;
+				must be protected by a page s-latch
+@param[in]	clust_index	clustered index
+@param[in]	thr		query thread
 @return TRUE if the secondary record is equal to the corresponding
 fields in the clustered record, when compared with collation;
 FALSE if not equal or if the clustered record has been marked for deletion */
 static
 ibool
 row_sel_sec_rec_is_for_clust_rec(
-/*=============================*/
-	const rec_t*	sec_rec,	/*!< in: secondary index record */
-	dict_index_t*	sec_index,	/*!< in: secondary index */
-	const rec_t*	clust_rec,	/*!< in: clustered index record;
-					must be protected by a lock or
-					a page latch against deletion
-					in rollback or purge */
-	dict_index_t*	clust_index)	/*!< in: clustered index */
+	const rec_t*	sec_rec,
+	dict_index_t*	sec_index,
+	const rec_t*	clust_rec,
+	dict_index_t*	clust_index,
+	que_thr_t*	thr)
 {
 	const byte*	sec_field;
 	ulint		sec_len;
@@ -198,6 +196,8 @@ row_sel_sec_rec_is_for_clust_rec(
 		return(FALSE);
 	}
 
+	heap = mem_heap_create(256);
+
 	clust_offs = rec_get_offsets(clust_rec, clust_index, clust_offs,
 				     ULINT_UNDEFINED, &heap);
 	sec_offs = rec_get_offsets(sec_rec, sec_index, sec_offs,
@@ -208,22 +208,55 @@ row_sel_sec_rec_is_for_clust_rec(
 	for (i = 0; i < n; i++) {
 		const dict_field_t*	ifield;
 		const dict_col_t*	col;
-		ulint			clust_pos;
+		ulint			clust_pos = 0;
 		ulint			clust_len;
 		ulint			len;
+		bool			is_virtual;
 
 		ifield = dict_index_get_nth_field(sec_index, i);
 		col = dict_field_get_col(ifield);
-		clust_pos = dict_col_get_clust_pos(col, clust_index);
 
-		clust_field = rec_get_nth_field(
-			clust_rec, clust_offs, clust_pos, &clust_len);
+		is_virtual = dict_col_is_virtual(col);
+
+		/* For virtual column, its value will need to be
+		reconstructed from base column in cluster index */
+		if (is_virtual) {
+#ifdef MYSQL_VIRTUAL_COLUMNS
+			const dict_v_col_t*	v_col;
+			const dtuple_t*         row;
+			dfield_t*		vfield;
+			row_ext_t*		ext;
+
+			v_col = reinterpret_cast<const dict_v_col_t*>(col);
+
+			row = row_build(ROW_COPY_POINTERS,
+					clust_index, clust_rec,
+					clust_offs,
+					NULL, NULL, NULL, &ext, heap);
+
+			vfield = innobase_get_computed_value(
+					row, v_col, clust_index,
+					&heap, NULL, NULL,
+					thr_get_trx(thr)->mysql_thd,
+					thr->prebuilt->m_mysql_table, NULL,
+					NULL, NULL);
+
+			clust_len = vfield->len;
+			clust_field = static_cast<byte*>(vfield->data);
+#endif /* MYSQL_VIRTUAL_COLUMNS */
+		} else {
+			clust_pos = dict_col_get_clust_pos(col, clust_index);
+
+			clust_field = rec_get_nth_field(
+				clust_rec, clust_offs, clust_pos, &clust_len);
+		}
+
 		sec_field = rec_get_nth_field(sec_rec, sec_offs, i, &sec_len);
 
 		len = clust_len;
 
 		if (ifield->prefix_len > 0 && len != UNIV_SQL_NULL
-		    && sec_len != UNIV_SQL_NULL) {
+		    && sec_len != UNIV_SQL_NULL && !is_virtual) {
 
 			if (rec_offs_nth_extern(clust_offs, clust_pos)) {
 				len -= BTR_EXTERN_FIELD_REF_SIZE;
@@ -249,12 +282,47 @@ row_sel_sec_rec_is_for_clust_rec(
 			}
 		}
 
-		if (0 != cmp_data_data(col->mtype, col->prtype,
-				       clust_field, len,
-				       sec_field, sec_len)) {
+		/* For spatial index, the first field is MBR, we check
+		if the MBR is equal or not. */
+		if (dict_index_is_spatial(sec_index) && i == 0) {
+			rtr_mbr_t	tmp_mbr;
+			rtr_mbr_t	sec_mbr;
+			byte*		dptr =
+				const_cast<byte*>(clust_field);
+
+			ut_ad(clust_len != UNIV_SQL_NULL);
+
+			/* For externally stored field, we need to get full
+			geo data to generate the MBR for comparing. */
+			if (rec_offs_nth_extern(clust_offs, clust_pos)) {
+				dptr = btr_copy_externally_stored_field(
+					&clust_len, dptr,
+					dict_tf_get_page_size(
+						sec_index->table->flags),
+					len, heap);
+			}
+
+			rtree_mbr_from_wkb(dptr + GEO_DATA_HEADER_SIZE,
+					   static_cast<uint>(clust_len
+					   - GEO_DATA_HEADER_SIZE),
+					   SPDIMS,
+					   reinterpret_cast<double*>(
+						&tmp_mbr));
+			rtr_read_mbr(sec_field, &sec_mbr);
+
+			if (!MBR_EQUAL_CMP(&sec_mbr, &tmp_mbr)) {
+				is_equal = FALSE;
+				goto func_exit;
+			}
+		} else {
+
+			if (0 != cmp_data_data(col->mtype, col->prtype,
+					       clust_field, len,
+					       sec_field, sec_len)) {
 inequal:
-			is_equal = FALSE;
-			goto func_exit;
+				is_equal = FALSE;
+				goto func_exit;
+			}
 		}
 	}
 
@@ -267,8 +335,7 @@ func_exit:
 
 /*********************************************************************//**
 Creates a select node struct.
-@return	own: select node struct */
-UNIV_INTERN
+@return own: select node struct */
 sel_node_t*
 sel_node_create(
 /*============*/
@@ -290,7 +357,6 @@ sel_node_create(
 /*********************************************************************//**
 Frees the memory private to a select node when a query graph is freed,
 does not free the heap where the node was originally created. */
-UNIV_INTERN
 void
 sel_node_free_private(
 /*==================*/
@@ -453,8 +519,9 @@ row_sel_fetch_columns(
 
 				data = btr_rec_copy_externally_stored_field(
 					rec, offsets,
-					dict_table_zip_size(index->table),
-					field_no, &len, heap, NULL);
+					dict_table_page_size(index->table),
+					field_no, &len, heap);
+				//field_no, &len, heap, NULL);
 
 				/* data == NULL means that the
 				externally stored field was not
@@ -508,7 +575,7 @@ sel_col_prefetch_buf_alloc(
 	ut_ad(que_node_get_type(column) == QUE_NODE_SYMBOL);
 
 	column->prefetch_buf = static_cast<sel_buf_t*>(
-		mem_alloc(SEL_MAX_N_PREFETCH * sizeof(sel_buf_t)));
+		ut_malloc_nokey(SEL_MAX_N_PREFETCH * sizeof(sel_buf_t)));
 
 	for (i = 0; i < SEL_MAX_N_PREFETCH; i++) {
 		sel_buf = column->prefetch_buf + i;
@@ -522,7 +589,6 @@ sel_col_prefetch_buf_alloc(
 /*********************************************************************//**
 Frees a prefetch buffer for a column, including the dynamically allocated
 memory for data stored there. */
-UNIV_INTERN
 void
 sel_col_prefetch_buf_free(
 /*======================*/
@@ -536,11 +602,11 @@ sel_col_prefetch_buf_free(
 
 		if (sel_buf->val_buf_size > 0) {
 
-			mem_free(sel_buf->data);
+			ut_free(sel_buf->data);
 		}
 	}
 
-	mem_free(prefetch_buf);
+	ut_free(prefetch_buf);
 }
 
 /*********************************************************************//**
@@ -677,12 +743,12 @@ sel_enqueue_prefetched_row(
 
 /*********************************************************************//**
 Builds a previous version of a clustered index record for a consistent read
-@return	DB_SUCCESS or error code */
+@return DB_SUCCESS or error code */
 static MY_ATTRIBUTE((nonnull, warn_unused_result))
 dberr_t
 row_sel_build_prev_vers(
 /*====================*/
-	read_view_t*	read_view,	/*!< in: read view */
+	ReadView*	read_view,	/*!< in: read view */
 	dict_index_t*	index,		/*!< in: plan node for table */
 	rec_t*		rec,		/*!< in: record in a clustered index */
 	ulint**		offsets,	/*!< in/out: offsets returned by
@@ -706,14 +772,14 @@ row_sel_build_prev_vers(
 
 	err = row_vers_build_for_consistent_read(
 		rec, mtr, index, offsets, read_view, offset_heap,
-		*old_vers_heap, old_vers);
+		*old_vers_heap, old_vers, NULL);
 	return(err);
 }
 
 /*********************************************************************//**
 Builds the last committed version of a clustered index record for a
 semi-consistent read. */
-static MY_ATTRIBUTE((nonnull))
+static
 void
 row_sel_build_committed_vers_for_mysql(
 /*===================================*/
@@ -728,6 +794,8 @@ row_sel_build_committed_vers_for_mysql(
 					record does not exist in the view:
 					i.e., it was freshly inserted
 					afterwards */
+	const dtuple_t**vrow,		/*!< out: to be filled with old virtual
+					column version if any */
 	mtr_t*		mtr)		/*!< in: mtr */
 {
 	if (prebuilt->old_vers_heap) {
@@ -739,13 +807,13 @@ row_sel_build_committed_vers_for_mysql(
 
 	row_vers_build_for_semi_consistent_read(
 		rec, mtr, clust_index, offsets, offset_heap,
-		prebuilt->old_vers_heap, old_vers);
+		prebuilt->old_vers_heap, old_vers, vrow);
 }
 
 /*********************************************************************//**
 Tests the conditions which determine when the index segment we are searching
 through has been exhausted.
-@return	TRUE if row passed the tests */
+@return TRUE if row passed the tests */
 UNIV_INLINE
 ibool
 row_sel_test_end_conds(
@@ -781,7 +849,7 @@ row_sel_test_end_conds(
 
 /*********************************************************************//**
 Tests the other conditions.
-@return	TRUE if row passed the tests */
+@return TRUE if row passed the tests */
 UNIV_INLINE
 ibool
 row_sel_test_other_conds(
@@ -810,7 +878,7 @@ row_sel_test_other_conds(
 /*********************************************************************//**
 Retrieves the clustered index record corresponding to a record in a
 non-clustered index. Does the necessary locking.
-@return	DB_SUCCESS or error code */
+@return DB_SUCCESS or error code */
 static MY_ATTRIBUTE((nonnull, warn_unused_result))
 dberr_t
 row_sel_get_clust_rec(
@@ -899,7 +967,7 @@ row_sel_get_clust_rec(
 		err = lock_clust_rec_read_check_and_lock(
 			0, btr_pcur_get_block(&plan->clust_pcur),
 			clust_rec, index, offsets,
-			static_cast<enum lock_mode>(node->row_lock_mode),
+			static_cast<lock_mode>(node->row_lock_mode),
 			lock_type,
 			thr);
 
@@ -956,7 +1024,8 @@ row_sel_get_clust_rec(
 		     || rec_get_deleted_flag(rec, dict_table_is_comp(
 						     plan->table)))
 		    && !row_sel_sec_rec_is_for_clust_rec(rec, plan->index,
-							 clust_rec, index)) {
+							 clust_rec, index,
+							 thr)) {
 			goto func_exit;
 		}
 	}
@@ -979,24 +1048,197 @@ err_exit:
 	return(err);
 }
 
+/*********************************************************************//**
+Sets a lock on a page of R-Tree record. This is all or none action,
+mostly due to we cannot reposition a record in R-Tree (with the
+nature of splitting)
+@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */
+UNIV_INLINE
+dberr_t
+sel_set_rtr_rec_lock(
+/*=================*/
+	btr_pcur_t*		pcur,	/*!< in: cursor */
+	const rec_t*		first_rec,/*!< in: record */
+	dict_index_t*		index,	/*!< in: index */
+	const ulint*		offsets,/*!< in: rec_get_offsets(rec, index) */
+	ulint			mode,	/*!< in: lock mode */
+	ulint			type,	/*!< in: LOCK_ORDINARY, LOCK_GAP, or
+					LOC_REC_NOT_GAP */
+	que_thr_t*		thr,	/*!< in: query thread */
+	mtr_t*			mtr)	/*!< in: mtr */
+{
+	matched_rec_t*  match = pcur->btr_cur.rtr_info->matches;
+	mem_heap_t*     heap = NULL;
+	dberr_t		err = DB_SUCCESS;
+	trx_t*		trx = thr_get_trx(thr);
+	buf_block_t*	cur_block = btr_pcur_get_block(pcur);
+	ulint           offsets_[REC_OFFS_NORMAL_SIZE];
+	ulint*          my_offsets = const_cast<ulint*>(offsets);
+	rec_t*		rec = const_cast<rec_t*>(first_rec);
+	rtr_rec_vector*	match_rec;
+	rtr_rec_vector::iterator end;
+
+	rec_offs_init(offsets_);
+
+	if (match->locked || page_rec_is_supremum(first_rec)) {
+		return(DB_SUCCESS_LOCKED_REC);
+	}
+
+	ut_ad(page_align(first_rec) == cur_block->frame);
+	ut_ad(match->valid);
+
+	rw_lock_x_lock(&(match->block.lock));
+retry:
+	cur_block = btr_pcur_get_block(pcur);
+        ut_ad(rw_lock_own(&(match->block.lock), RW_LOCK_X)
+              || rw_lock_own(&(match->block.lock), RW_LOCK_S));
+	ut_ad(page_is_leaf(buf_block_get_frame(cur_block)));
+
+	err = lock_sec_rec_read_check_and_lock(
+		0, cur_block, rec, index, my_offsets,
+		static_cast<lock_mode>(mode), type, thr);
+
+	if (err == DB_LOCK_WAIT) {
+re_scan:
+		mtr_commit(mtr);
+		trx->error_state = err;
+		que_thr_stop_for_mysql(thr);
+		thr->lock_state = QUE_THR_LOCK_ROW;
+		if (row_mysql_handle_errors(
+			&err, trx, thr, NULL)) {
+			thr->lock_state = QUE_THR_LOCK_NOLOCK;
+			mtr_start(mtr);
+
+			mutex_enter(&match->rtr_match_mutex);
+			if (!match->valid && match->matched_recs->empty()) {
+				mutex_exit(&match->rtr_match_mutex);
+				err = DB_RECORD_NOT_FOUND;
+				goto func_end;
+			}
+			mutex_exit(&match->rtr_match_mutex);
+
+			ulint		page_no = page_get_page_no(
+						btr_pcur_get_page(pcur));
+			page_id_t	page_id(dict_index_get_space(index),
+						page_no);
+
+			cur_block = buf_page_get_gen(
+				page_id, dict_table_page_size(index->table),
+				RW_X_LATCH, NULL, BUF_GET,
+				__FILE__, __LINE__, mtr, &err);
+		} else {
+			mtr_start(mtr);
+			goto func_end;
+		}
+
+		DEBUG_SYNC_C("rtr_set_lock_wait");
+
+		if (!match->valid) {
+			/* Page got deleted */
+			mtr_commit(mtr);
+			mtr_start(mtr);
+			err = DB_RECORD_NOT_FOUND;
+			goto func_end;
+		}
+
+		match->matched_recs->clear();
+
+		rtr_cur_search_with_match(
+			cur_block, index,
+			pcur->btr_cur.rtr_info->search_tuple,
+			pcur->btr_cur.rtr_info->search_mode,
+			&pcur->btr_cur.page_cur,
+			pcur->btr_cur.rtr_info);
+
+		if (!page_is_leaf(buf_block_get_frame(cur_block))) {
+			/* Page got splitted and promoted (only for
+			root page it is possible).  Release the
+			page and ask for a re-search */
+			mtr_commit(mtr);
+			mtr_start(mtr);
+			err = DB_RECORD_NOT_FOUND;
+			goto func_end;
+		}
+
+		rec = btr_pcur_get_rec(pcur);
+		my_offsets = offsets_;
+		my_offsets = rec_get_offsets(rec, index, my_offsets,
+					     ULINT_UNDEFINED, &heap);
+
+		/* No match record */
+		if (page_rec_is_supremum(rec) || !match->valid) {
+			mtr_commit(mtr);
+			mtr_start(mtr);
+			err = DB_RECORD_NOT_FOUND;
+			goto func_end;
+		}
+
+		goto retry;
+	}
+
+	my_offsets = offsets_;
+	match_rec = match->matched_recs;
+	end = match_rec->end();
+
+	for (rtr_rec_vector::iterator it = match_rec->begin();
+	     it != end; ++it) {
+		rtr_rec_t*	rtr_rec = &(*it);
+
+		my_offsets = rec_get_offsets(
+				rtr_rec->r_rec, index, my_offsets,
+				ULINT_UNDEFINED, &heap);
+
+		err = lock_sec_rec_read_check_and_lock(
+			0, &match->block, rtr_rec->r_rec, index,
+			my_offsets, static_cast<lock_mode>(mode),
+			type, thr);
+
+		if (err == DB_SUCCESS || err == DB_SUCCESS_LOCKED_REC) {
+			rtr_rec->locked = true;
+		} else if (err == DB_LOCK_WAIT) {
+			goto re_scan;
+		} else {
+			goto func_end;
+		}
+
+	}
+
+	match->locked = true;
+
+
+func_end:
+	rw_lock_x_unlock(&(match->block.lock));
+	if (heap != NULL) {
+		mem_heap_free(heap);
+	}
+
+	ut_ad(err != DB_LOCK_WAIT);
+
+	return(err);
+}
+
 /*********************************************************************//**
 Sets a lock on a record.
-@return	DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */
+@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */
 UNIV_INLINE
 dberr_t
 sel_set_rec_lock(
 /*=============*/
-	const buf_block_t*	block,	/*!< in: buffer block of rec */
+	btr_pcur_t*		pcur,	/*!< in: cursor */
 	const rec_t*		rec,	/*!< in: record */
 	dict_index_t*		index,	/*!< in: index */
 	const ulint*		offsets,/*!< in: rec_get_offsets(rec, index) */
 	ulint			mode,	/*!< in: lock mode */
 	ulint			type,	/*!< in: LOCK_ORDINARY, LOCK_GAP, or
 					LOC_REC_NOT_GAP */
-	que_thr_t*		thr)	/*!< in: query thread */
+	que_thr_t*		thr,	/*!< in: query thread */
+	mtr_t*			mtr)	/*!< in: mtr */
 {
-	trx_t*		trx;
-	dberr_t		err;
+	trx_t*			trx;
+	dberr_t			err = DB_SUCCESS;
+	const buf_block_t*	block;
+
+	block = btr_pcur_get_block(pcur);
 
 	trx = thr_get_trx(thr);
 
@@ -1010,11 +1252,23 @@ sel_set_rec_lock(
 	if (dict_index_is_clust(index)) {
 		err = lock_clust_rec_read_check_and_lock(
 			0, block, rec, index, offsets,
-			static_cast<enum lock_mode>(mode), type, thr);
+			static_cast<lock_mode>(mode), type, thr);
 	} else {
-		err = lock_sec_rec_read_check_and_lock(
-			0, block, rec, index, offsets,
-			static_cast<enum lock_mode>(mode), type, thr);
+
+		if (dict_index_is_spatial(index)) {
+			if (type == LOCK_GAP || type == LOCK_ORDINARY) {
+				ut_ad(0);
+				ib::error() << "Incorrectly request GAP lock "
+					"on RTree";
+				return(DB_SUCCESS);
+			}
+			err = sel_set_rtr_rec_lock(pcur, rec, index, offsets,
+						   mode, type, thr, mtr);
+		} else {
+			err = lock_sec_rec_read_check_and_lock(
+				0, block, rec, index, offsets,
+				static_cast<lock_mode>(mode), type, thr);
+		}
 	}
 
 	return(err);
@@ -1207,7 +1461,7 @@ plan_reset_cursor(
 /*********************************************************************//**
 Tries to do a shortcut to fetch a clustered index record with a unique key,
 using the hash index if possible (not always).
-@return	SEL_FOUND, SEL_EXHAUSTED, SEL_RETRY */
+@return SEL_FOUND, SEL_EXHAUSTED, SEL_RETRY */
 static
 ulint
 row_sel_try_search_shortcut(
@@ -1216,8 +1470,8 @@ row_sel_try_search_shortcut(
 	plan_t*		plan,	/*!< in: plan for a unique search in clustered
 				index */
 	ibool		search_latch_locked,
-				/*!< in: whether the search holds
-				btr_search_latch */
+				/*!< in: whether the search holds latch on
+				search system. */
 	mtr_t*		mtr)	/*!< in: mtr */
 {
 	dict_index_t*	index;
@@ -1233,11 +1487,11 @@ row_sel_try_search_shortcut(
 	ut_ad(node->read_view);
 	ut_ad(plan->unique_search);
 	ut_ad(!plan->must_get_clust);
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
 	if (search_latch_locked) {
-		ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
+		ut_ad(rw_lock_own(btr_get_search_latch(index), RW_LOCK_S));
 	}
-#endif /* UNIV_SYNC_DEBUG */
+#endif /* UNIV_DEBUG */
 
 	row_sel_open_pcur(plan, search_latch_locked, mtr);
 
@@ -1270,7 +1524,9 @@ row_sel_try_search_shortcut(
 			ret = SEL_RETRY;
 			goto func_exit;
 		}
-	} else if (!lock_sec_rec_cons_read_sees(rec, node->read_view)) {
+	} else if (!srv_read_only_mode
+		   && !lock_sec_rec_cons_read_sees(
+			rec, index, node->read_view)) {
 
 		ret = SEL_RETRY;
 		goto func_exit;
@@ -1313,8 +1569,8 @@ func_exit:
 
 /*********************************************************************//**
 Performs a select step.
-@return	DB_SUCCESS or error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
+@return DB_SUCCESS or error code */
+static MY_ATTRIBUTE((warn_unused_result))
 dberr_t
 row_sel(
 /*====*/
@@ -1401,16 +1657,17 @@ table_loop:
 
 	/* Open a cursor to index, or restore an open cursor position */
 
-	mtr_start_trx(&mtr, thr_get_trx(thr));
+	mtr_start(&mtr);
 
 	if (consistent_read && plan->unique_search && !plan->pcur_is_open
 	    && !plan->must_get_clust
 	    && !plan->table->big_rows) {
 		if (!search_latch_locked) {
-			rw_lock_s_lock(&btr_search_latch);
+			rw_lock_s_lock(btr_get_search_latch(index));
 
 			search_latch_locked = TRUE;
-		} else if (rw_lock_get_writer(&btr_search_latch) == RW_LOCK_WAIT_EX) {
+		} else if (rw_lock_get_writer(btr_get_search_latch(index))
+				== RW_LOCK_X_WAIT) {
 
 			/* There is an x-latch request waiting: release the
 			s-latch for a moment; as an s-latch here is often
@@ -1419,8 +1676,8 @@ table_loop:
 			from acquiring an s-latch for a long time, lowering
 			performance significantly in multiprocessors. */
 
-			rw_lock_s_unlock(&btr_search_latch);
-			rw_lock_s_lock(&btr_search_latch);
+			rw_lock_s_unlock(btr_get_search_latch(index));
+			rw_lock_s_lock(btr_get_search_latch(index));
 		}
 
 		found_flag = row_sel_try_search_shortcut(node, plan,
@@ -1441,11 +1698,11 @@ table_loop:
 		plan_reset_cursor(plan);
 
 		mtr_commit(&mtr);
-		mtr_start_trx(&mtr, thr_get_trx(thr));
+		mtr_start(&mtr);
 	}
 
 	if (search_latch_locked) {
-		rw_lock_s_unlock(&btr_search_latch);
+		rw_lock_s_unlock(btr_get_search_latch(index));
 
 		search_latch_locked = FALSE;
 	}
@@ -1497,6 +1754,9 @@ rec_loop:
 	if (!node->asc && cursor_just_opened
 	    && !page_rec_is_supremum(rec)) {
 
+		/* Do not support "descending search" for Spatial index */
+		ut_ad(!dict_index_is_spatial(index));
+
 		/* When we open a cursor for a descending search, we must set
 		a next-key lock on the successor record: otherwise it would
 		be possible to insert new records next to the cursor position,
@@ -1531,10 +1791,10 @@ rec_loop:
 				lock_type = LOCK_ORDINARY;
 			}
 
-			err = sel_set_rec_lock(btr_pcur_get_block(&plan->pcur),
+			err = sel_set_rec_lock(&plan->pcur,
 					       next_rec, index, offsets,
 					       node->row_lock_mode,
-					       lock_type, thr);
+					       lock_type, thr, &mtr);
 
 			switch (err) {
 			case DB_SUCCESS_LOCKED_REC:
@@ -1578,7 +1838,8 @@ skip_lock:
 		we lock only the record, i.e., next-key locking is
 		not used. */
 		if (srv_locks_unsafe_for_binlog
-		    || trx->isolation_level <= TRX_ISO_READ_COMMITTED) {
+		    || trx->isolation_level <= TRX_ISO_READ_COMMITTED
+		    || dict_index_is_spatial(index)) {
 
 			if (page_rec_is_supremum(rec)) {
 
@@ -1590,9 +1851,10 @@ skip_lock:
 			lock_type = LOCK_ORDINARY;
 		}
 
-		err = sel_set_rec_lock(btr_pcur_get_block(&plan->pcur),
+		err = sel_set_rec_lock(&plan->pcur,
 				       rec, index, offsets,
-				       node->row_lock_mode, lock_type, thr);
+				       node->row_lock_mode, lock_type,
+				       thr, &mtr);
 
 		switch (err) {
 		case DB_SUCCESS_LOCKED_REC:
@@ -1660,8 +1922,8 @@ skip_lock:
 
 		if (dict_index_is_clust(index)) {
 
-			if (!lock_clust_rec_cons_read_sees(rec, index, offsets,
-							   node->read_view)) {
+			if (!lock_clust_rec_cons_read_sees(
+					rec, index, offsets, node->read_view)) {
 
 				err = row_sel_build_prev_vers(
 					node->read_view, index, rec,
@@ -1709,8 +1971,10 @@ skip_lock:
 
 				rec = old_vers;
 			}
-		} else if (!lock_sec_rec_cons_read_sees(rec,
-							node->read_view)) {
+		} else if (!srv_read_only_mode
+			   && !lock_sec_rec_cons_read_sees(
+				   rec, index, node->read_view)) {
+
 			cons_read_requires_clust_rec = TRUE;
 		}
 	}
@@ -1974,9 +2238,14 @@ stop_for_a_while:
 
 	mtr_commit(&mtr);
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(sync_thread_levels_empty_except_dict());
-#endif /* UNIV_SYNC_DEBUG */
+#ifdef UNIV_DEBUG
+	{
+		btrsea_sync_check	check(true);
+
+		ut_ad(!sync_check_iterate(check));
+	}
+#endif /* UNIV_DEBUG */
+
 	err = DB_SUCCESS;
 	goto func_exit;
 
@@ -1994,9 +2263,13 @@ commit_mtr_for_a_while:
 
 	mtr_has_extra_clust_latch = FALSE;
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(sync_thread_levels_empty_except_dict());
-#endif /* UNIV_SYNC_DEBUG */
+#ifdef UNIV_DEBUG
+	{
+		dict_sync_check	check(true);
+
+		ut_ad(!sync_check_iterate(check));
+	}
+#endif /* UNIV_DEBUG */
 
 	goto table_loop;
 
@@ -2011,15 +2284,20 @@ lock_wait_or_error:
 
 	mtr_commit(&mtr);
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(sync_thread_levels_empty_except_dict());
-#endif /* UNIV_SYNC_DEBUG */
+#ifdef UNIV_DEBUG
+	{
+		dict_sync_check	check(true);
+
+		ut_ad(!sync_check_iterate(check));
+	}
+#endif /* UNIV_DEBUG */
 
 func_exit:
 	if (search_latch_locked) {
-		rw_lock_s_unlock(&btr_search_latch);
+		rw_lock_s_unlock(btr_get_search_latch(index));
 	}
-	if (UNIV_LIKELY_NULL(heap)) {
+
+	if (heap != NULL) {
 		mem_heap_free(heap);
 	}
 	return(err);
@@ -2028,8 +2306,7 @@ func_exit:
 /**********************************************************************//**
 Performs a select step. This is a high-level function used in SQL execution
 graphs.
-@return	query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
 que_thr_t*
 row_sel_step(
 /*=========*/
@@ -2057,17 +2334,23 @@ row_sel_step(
 		/* It may be that the current session has not yet started
 		its transaction, or it has been committed: */
 
-		trx_start_if_not_started_xa(thr_get_trx(thr));
+		trx_start_if_not_started_xa(thr_get_trx(thr), false);
 
 		plan_reset_cursor(sel_node_get_nth_plan(node, 0));
 
 		if (node->consistent_read) {
 			/* Assign a read view for the query */
-			node->read_view = trx_assign_read_view(
-				thr_get_trx(thr));
+			trx_assign_read_view(thr_get_trx(thr));
+
+			if (thr_get_trx(thr)->read_view != NULL) {
+				node->read_view = thr_get_trx(thr)->read_view;
+			} else {
+				node->read_view = NULL;
+			}
+
 		} else {
 			sym_node_t*	table_node;
-			enum lock_mode	i_lock_mode;
+			lock_mode	i_lock_mode;
 
 			if (node->set_x_locks) {
 				i_lock_mode = LOCK_IX;
@@ -2134,8 +2417,7 @@ row_sel_step(
 
 /**********************************************************************//**
 Performs a fetch for a cursor.
-@return	query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
 que_thr_t*
 fetch_step(
 /*=======*/
@@ -2182,8 +2464,7 @@ fetch_step(
 	sel_node->common.parent = node;
 
 	if (sel_node->state == SEL_NODE_CLOSED) {
-		fprintf(stderr,
-			"InnoDB: Error: fetch called on a closed cursor\n");
+		ib::error() << "fetch called on a closed cursor";
 
 		thr_get_trx(thr)->error_state = DB_ERROR;
 
@@ -2197,8 +2478,7 @@ fetch_step(
 
 /****************************************************************//**
 Sample callback function for fetch that prints each row.
-@return	always returns non-NULL */
-UNIV_INTERN
+@return always returns non-NULL */
 void*
 row_fetch_print(
 /*============*/
@@ -2211,7 +2491,7 @@ row_fetch_print(
 
 	UT_NOT_USED(user_arg);
 
-	fprintf(stderr, "row_fetch_print: row %p\n", row);
+	ib::info() << "row_fetch_print: row " << row;
 
 	for (exp = node->select_list;
 	     exp != 0;
@@ -2239,8 +2519,7 @@ row_fetch_print(
 
 /***********************************************************//**
 Prints a row in a select result.
-@return	query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
 que_thr_t*
 row_printf_step(
 /*============*/
@@ -2307,7 +2586,6 @@ the parameter key_len. But currently we do not allow search keys where the
 last field is only a prefix of the full key field len and print a warning if
 such appears. A counterpart of this function is
 ha_innobase::store_key_val_for_row() in ha_innodb.cc. */
-UNIV_INTERN
 void
 row_sel_convert_mysql_key_to_innobase(
 /*==================================*/
@@ -2389,30 +2667,49 @@ row_sel_convert_mysql_key_to_innobase(
 		}
 
 		/* Calculate data length and data field total length */
+		if (DATA_LARGE_MTYPE(type) || DATA_GEOMETRY_MTYPE(type)) {
 
-		if (type == DATA_BLOB) {
-			/* The key field is a column prefix of a BLOB or
-			TEXT */
+			/* For R-tree index, data length should be the
+			total size of the wkb data.*/
+			if (dict_index_is_spatial(index)) {
+				ut_ad(DATA_GEOMETRY_MTYPE(type));
+				data_len = key_len;
+				data_field_len = data_offset + data_len;
+			} else {
+				/* The key field is a column prefix of a BLOB
+				or TEXT, except DATA_POINT of GEOMETRY. */
 
-			ut_a(field->prefix_len > 0);
+				ut_a(field->prefix_len > 0
+				     || DATA_POINT_MTYPE(type));
 
-			/* MySQL stores the actual data length to the first 2
-			bytes after the optional SQL NULL marker byte. The
-			storage format is little-endian, that is, the most
-			significant byte at a higher address. In UTF-8, MySQL
-			seems to reserve field->prefix_len bytes for
-			storing this field in the key value buffer, even
-			though the actual value only takes data_len bytes
-			from the start. */
+				/* MySQL stores the actual data length to the
+				first 2 bytes after the optional SQL NULL
+				marker byte. The storage format is
+				little-endian, that is, the most significant
+				byte at a higher address. In UTF-8, MySQL
+				seems to reserve field->prefix_len bytes for
+				storing this field in the key value buffer,
+				even though the actual value only takes data
+				len bytes from the start.
+				For POINT of GEOMETRY, which has no prefix
+				because it's now a fixed length type in
+				InnoDB, we have to get DATA_POINT_LEN bytes,
+				which is original prefix length of POINT. */
 
-			data_len = key_ptr[data_offset]
-				+ 256 * key_ptr[data_offset + 1];
-			data_field_len = data_offset + 2 + field->prefix_len;
+				data_len = key_ptr[data_offset]
+					   + 256 * key_ptr[data_offset + 1];
+				data_field_len = data_offset + 2
+						 + (type == DATA_POINT
+						    ? DATA_POINT_LEN
+						    : field->prefix_len);
 
-			data_offset += 2;
+				data_offset += 2;
+
+				/* Now that we know the length, we store the
+				column value like it would be a fixed char
+				field */
+			}
 
-			/* Now that we know the length, we store the column
-			value like it would be a fixed char field */
 
 		} else if (field->prefix_len > 0) {
 			/* Looks like MySQL pads unused end bytes in the
@@ -2433,10 +2730,9 @@ row_sel_convert_mysql_key_to_innobase(
 			data_field_len = data_offset + data_len;
 		}
 
-		if (UNIV_UNLIKELY
-		    (dtype_get_mysql_type(dfield_get_type(dfield))
+		if ((dtype_get_mysql_type(dfield_get_type(dfield))
 		     == DATA_MYSQL_TRUE_VARCHAR)
-		    && UNIV_LIKELY(type != DATA_INT)) {
+		    && (type != DATA_INT)) {
 			/* In a MySQL key value format, a true VARCHAR is
 			always preceded by 2 bytes of a length field.
 			dfield_get_type(dfield)->len returns the maximum
@@ -2473,19 +2769,14 @@ row_sel_convert_mysql_key_to_innobase(
 			trick to calculate LIKE 'abc%' type queries there
 			should never be partial-field prefixes in searches. */
 
-			ut_print_timestamp(stderr);
+			ib::warn() << "Using a partial-field key prefix in"
+				" search, index " << index->name
+				<< " of table " << index->table->name
+				<< ". Last data field length "
+				<< data_field_len << " bytes, key ptr now"
+				" exceeds key end by " << (key_ptr - key_end)
+				<< " bytes. Key value in the MySQL format:";
 
-			fputs("  InnoDB: Warning: using a partial-field"
-			      " key prefix in search.\n"
-			      "InnoDB: ", stderr);
-			dict_index_name_print(stderr, trx, index);
-			fprintf(stderr, ". Last data field length %lu bytes,\n"
-				"InnoDB: key ptr now exceeds"
-				" key end by %lu bytes.\n"
-				"InnoDB: Key value in the MySQL format:\n",
-				(ulong) data_field_len,
-				(ulong) (key_ptr - key_end));
-			fflush(stderr);
 			ut_print_buf(stderr, original_key_ptr, key_len);
 			putc('\n', stderr);
 
@@ -2494,7 +2785,7 @@ row_sel_convert_mysql_key_to_innobase(
 				dfield_set_len(dfield, len
 					       - (ulint) (key_ptr - key_end));
 			}
-                        ut_ad(0);
+			ut_ad(0);
 		}
 
 		n_fields++;
@@ -2532,14 +2823,14 @@ row_sel_store_row_id_to_prebuilt(
 		dict_index_get_sys_col_pos(index, DATA_ROW_ID), &len);
 
 	if (UNIV_UNLIKELY(len != DATA_ROW_ID_LEN)) {
-		fprintf(stderr,
-			"InnoDB: Error: Row id field is"
-			" wrong length %lu in ", (ulong) len);
-		dict_index_name_print(stderr, prebuilt->trx, index);
-		fprintf(stderr, "\n"
-			"InnoDB: Field number %lu, record:\n",
-			(ulong) dict_index_get_sys_col_pos(index,
-							   DATA_ROW_ID));
+
+		ib::error() << "Row id field is wrong length " << len << " in"
+			" index " << index->name
+			<< " of table " << index->table->name
+			<< ", Field number "
+			<< dict_index_get_sys_col_pos(index, DATA_ROW_ID)
+			<< ", record:";
+
 		rec_print_new(stderr, index_rec, offsets);
 		putc('\n', stderr);
 		ut_error;
@@ -2548,20 +2839,9 @@ row_sel_store_row_id_to_prebuilt(
 	ut_memcpy(prebuilt->row_id, data, len);
 }
 
-#ifdef UNIV_DEBUG
-/** Convert a non-SQL-NULL field from Innobase format to MySQL format. */
-# define row_sel_field_store_in_mysql_format(dest,templ,idx,field,src,len) \
-	row_sel_field_store_in_mysql_format_func(dest,templ,idx,field,src,len)
-#else /* UNIV_DEBUG */
-/** Convert a non-SQL-NULL field from Innobase format to MySQL format. */
-# define row_sel_field_store_in_mysql_format(dest,templ,idx,field,src,len) \
-	row_sel_field_store_in_mysql_format_func(dest,templ,src,len)
-#endif /* UNIV_DEBUG */
-
 /**************************************************************//**
 Stores a non-SQL-NULL field in the MySQL format. The counterpart of this
 function is row_mysql_store_col_in_innobase_format() in row0mysql.cc. */
-static MY_ATTRIBUTE((nonnull))
 void
 row_sel_field_store_in_mysql_format_func(
 /*=====================================*/
@@ -2590,7 +2870,8 @@ row_sel_field_store_in_mysql_format_func(
 	byte*			ptr;
 #ifdef UNIV_DEBUG
 	const dict_field_t*	field
-		= dict_index_get_nth_field(index, field_no);
+		= templ->is_virtual
+			 ? NULL : dict_index_get_nth_field(index, field_no);
 #endif /* UNIV_DEBUG */
 
 	ut_ad(len != UNIV_SQL_NULL);
@@ -2682,6 +2963,13 @@ row_sel_field_store_in_mysql_format_func(
 					 len);
 		break;
 
+	case DATA_POINT:
+	case DATA_VAR_POINT:
+	case DATA_GEOMETRY:
+		/* We store all geometry data as BLOB data at server layer. */
+		row_mysql_store_geometry(dest, templ->mysql_col_len, data, len);
+		break;
+
 	case DATA_MYSQL:
 		memcpy(dest, data, len);
 
@@ -2707,7 +2995,8 @@ row_sel_field_store_in_mysql_format_func(
 		ut_ad(len * templ->mbmaxlen >= templ->mysql_col_len
 		      || (field_no == templ->icp_rec_field_no
 			  && field->prefix_len > 0));
-		ut_ad(!(field->prefix_len % templ->mbmaxlen));
+		ut_ad(templ->is_virtual
+		      || !(field->prefix_len % templ->mbmaxlen));
 
 		if (templ->mbminlen == 1 && templ->mbmaxlen != 1) {
 			/* Pad with spaces. This undoes the stripping
@@ -2732,9 +3021,10 @@ row_sel_field_store_in_mysql_format_func(
 	case DATA_DECIMAL:
 		/* Above are the valid column types for MySQL data. */
 #endif /* UNIV_DEBUG */
-		ut_ad(field->prefix_len
-		      ? field->prefix_len == len
-		      : templ->mysql_col_len == len);
+		ut_ad((templ->is_virtual && !field)
+		      || (field && field->prefix_len
+				? field->prefix_len == len
+				: templ->mysql_col_len == len));
 		memcpy(dest, data, len);
 	}
 }
@@ -2762,7 +3052,7 @@ row_sel_store_mysql_field_func(
 						a page latch */
 #ifdef UNIV_DEBUG
 	const dict_index_t*	index,		/*!< in: index of rec */
-#endif
+#endif /* UNIV_DEBUG */
 	const ulint*		offsets,	/*!< in: array returned by
 						rec_get_offsets() */
 	ulint			field_no,	/*!< in: templ->rec_field_no or
@@ -2770,6 +3060,8 @@ row_sel_store_mysql_field_func(
 						templ->icp_rec_field_no */
 	const mysql_row_templ_t*templ)		/*!< in: row template */
 {
+	DBUG_ENTER("row_sel_store_mysql_field_func");
+
 	const byte*	data;
 	ulint		len;
 
@@ -2789,8 +3081,9 @@ row_sel_store_mysql_field_func(
 
 		ut_a(!prebuilt->trx->has_search_latch);
 		ut_ad(field_no == templ->clust_rec_field_no);
+		ut_ad(templ->type != DATA_POINT);
 
-		if (UNIV_UNLIKELY(templ->type == DATA_BLOB)) {
+		if (DATA_LARGE_MTYPE(templ->type)) {
 			if (prebuilt->blob_heap == NULL) {
 				prebuilt->blob_heap = mem_heap_create(
 					UNIV_PAGE_SIZE);
@@ -2807,8 +3100,9 @@ row_sel_store_mysql_field_func(
 
 		data = btr_rec_copy_externally_stored_field(
 			rec, offsets,
-			dict_table_zip_size(prebuilt->table),
-			field_no, &len, heap, NULL);
+			dict_table_page_size(prebuilt->table),
+			field_no, &len, heap);
+		//field_no, &len, heap, NULL);
 
 		if (UNIV_UNLIKELY(!data)) {
 			/* The externally stored field was not written
@@ -2822,7 +3116,7 @@ row_sel_store_mysql_field_func(
 
 			ut_a(prebuilt->trx->isolation_level
 			     == TRX_ISO_READ_UNCOMMITTED);
-			return(FALSE);
+			DBUG_RETURN(FALSE);
 		}
 
 		ut_a(len != UNIV_SQL_NULL);
@@ -2853,10 +3147,11 @@ row_sel_store_mysql_field_func(
 			       (const byte*) prebuilt->default_rec
 			       + templ->mysql_col_offset,
 			       templ->mysql_col_len);
-			return(TRUE);
+			DBUG_RETURN(TRUE);
 		}
 
-		if (UNIV_UNLIKELY(templ->type == DATA_BLOB)) {
+		if (DATA_LARGE_MTYPE(templ->type)
+		    || DATA_GEOMETRY_MTYPE(templ->type)) {
 
 			/* It is a BLOB field locally stored in the
 			InnoDB record: we MUST copy its contents to
@@ -2866,11 +3161,16 @@ row_sel_store_mysql_field_func(
 			will be invalid as soon as the
 			mini-transaction is committed and the page
 			latch on the clustered index page is
-			released. */
+			released.
+			For DATA_POINT, it's stored like CHAR in InnoDB,
+			but it should be a BLOB field in MySQL layer. So we
+			still treated it as BLOB here. */
 
 			if (prebuilt->blob_heap == NULL) {
 				prebuilt->blob_heap = mem_heap_create(
 					UNIV_PAGE_SIZE);
+				DBUG_PRINT("anna", ("blob_heap allocated: %p",
+						    prebuilt->blob_heap));
 			}
 
 			data = static_cast<byte*>(
@@ -2891,7 +3191,7 @@ row_sel_store_mysql_field_func(
 			&= ~(byte) templ->mysql_null_bit_mask;
 	}
 
-	return(TRUE);
+	DBUG_RETURN(TRUE);
 }
 
 /**************************************************************//**
@@ -2910,6 +3210,7 @@ row_sel_store_mysql_rec(
 					which was described in prebuilt's
 					template, or in the clustered index;
 					must be protected by a page latch */
+	const dtuple_t*	vrow,		/*!< in: virtual columns */
 	ibool		rec_clust,	/*!< in: TRUE if rec is in the
 					clustered index instead of
 					prebuilt->index */
@@ -2917,18 +3218,80 @@ row_sel_store_mysql_rec(
 	const ulint*	offsets)	/*!< in: array returned by
 					rec_get_offsets(rec) */
 {
-	ulint	i;
+	ulint		i;
+	DBUG_ENTER("row_sel_store_mysql_rec");
 
 	ut_ad(rec_clust || index == prebuilt->index);
 	ut_ad(!rec_clust || dict_index_is_clust(index));
 
 	if (UNIV_LIKELY_NULL(prebuilt->blob_heap)) {
-		mem_heap_free(prebuilt->blob_heap);
-		prebuilt->blob_heap = NULL;
+		row_mysql_prebuilt_free_blob_heap(prebuilt);
 	}
 
 	for (i = 0; i < prebuilt->n_template; i++) {
 		const mysql_row_templ_t*templ = &prebuilt->mysql_template[i];
+
+		if (templ->is_virtual && dict_index_is_clust(index)) {
+
+			/* Skip virtual columns if it is not a covered
+			search or virtual key read is not requested. */
+			if (!dict_index_has_virtual(prebuilt->index)
+			    || (!prebuilt->read_just_key
+				&& !prebuilt->m_read_virtual_key)
+			    || !rec_clust) {
+				continue;
+			}
+
+			dict_v_col_t*   col;
+			col = dict_table_get_nth_v_col(
+				index->table, templ->clust_rec_field_no);
+
+			ut_ad(vrow);
+
+			const dfield_t* dfield = dtuple_get_nth_v_field(
+				vrow, col->v_pos);
+
+			/* If this is a partitioned table, it might request
+			InnoDB to fill out virtual column data for serach
+			index key values while other non key columns are also
+			getting selected. The non-key virtual columns may
+			not be materialized and we should skip them. */
+			if (dfield_get_type(dfield)->mtype == DATA_MISSING) {
+				ulint prefix;
+				ut_ad(prebuilt->m_read_virtual_key);
+
+				/* If it is part of index key the data should
+				have been materialized. */
+				ut_ad(dict_index_get_nth_col_or_prefix_pos(
+					prebuilt->index, col->v_pos, false,
+					true, &prefix) == ULINT_UNDEFINED);
+
+				continue;
+			}
+
+			if (dfield->len == UNIV_SQL_NULL) {
+				mysql_rec[templ->mysql_null_byte_offset]
+				|= (byte) templ->mysql_null_bit_mask;
+				memcpy(mysql_rec
+				+ templ->mysql_col_offset,
+				(const byte*) prebuilt->default_rec
+				+ templ->mysql_col_offset,
+				templ->mysql_col_len);
+			} else {
+				row_sel_field_store_in_mysql_format(
+				mysql_rec + templ->mysql_col_offset,
+				templ, index, templ->clust_rec_field_no,
+				(const byte*)dfield->data, dfield->len);
+				if (templ->mysql_null_bit_mask) {
+					mysql_rec[
+					templ->mysql_null_byte_offset]
+					&= ~(byte) templ->mysql_null_bit_mask;
+				}
+			}
+
+			continue;
+		}
+
 		const ulint		field_no
 			= rec_clust
 			? templ->clust_rec_field_no
@@ -2946,32 +3309,35 @@ row_sel_store_mysql_rec(
 		if (!row_sel_store_mysql_field(mysql_rec, prebuilt,
 					       rec, index, offsets,
 					       field_no, templ)) {
-			return(FALSE);
+
+			DBUG_RETURN(FALSE);
 		}
 	}
 
 	/* FIXME: We only need to read the doc_id if an FTS indexed
 	column is being updated.
-	NOTE, the record must be cluster index record. Secondary index
-	might not have the Doc ID */
-	if (dict_table_has_fts_index(prebuilt->table)
-	    && dict_index_is_clust(index)) {
-
-		prebuilt->fts_doc_id = fts_get_doc_id_from_rec(
-			prebuilt->table, rec, NULL);
+	NOTE, the record can be cluster or secondary index record.
+	if secondary index is used then FTS_DOC_ID column should be part
+	of this index. */
+	if (dict_table_has_fts_index(prebuilt->table)) {
+		if (dict_index_is_clust(index)
+		    || prebuilt->fts_doc_id_in_read_set) {
+			prebuilt->fts_doc_id = fts_get_doc_id_from_rec(
+				prebuilt->table, rec, index, NULL);
+		}
 	}
 
-	return(TRUE);
+	DBUG_RETURN(TRUE);
 }
 
 /*********************************************************************//**
 Builds a previous version of a clustered index record for a consistent read
-@return	DB_SUCCESS or error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
+@return DB_SUCCESS or error code */
+static MY_ATTRIBUTE((warn_unused_result))
 dberr_t
 row_sel_build_prev_vers_for_mysql(
 /*==============================*/
-	read_view_t*	read_view,	/*!< in: read view */
+	ReadView*	read_view,	/*!< in: read view */
 	dict_index_t*	clust_index,	/*!< in: clustered index */
 	row_prebuilt_t*	prebuilt,	/*!< in: prebuilt struct */
 	const rec_t*	rec,		/*!< in: record in a clustered index */
@@ -2983,6 +3349,8 @@ row_sel_build_prev_vers_for_mysql(
 					record does not exist in the view:
 					i.e., it was freshly inserted
 					afterwards */
+	const dtuple_t**vrow,		/*!< out: dtuple to hold old virtual
+					column data */
 	mtr_t*		mtr)		/*!< in: mtr */
 {
 	dberr_t	err;
@@ -2995,7 +3363,7 @@ row_sel_build_prev_vers_for_mysql(
 
 	err = row_vers_build_for_consistent_read(
 		rec, mtr, clust_index, offsets, read_view, offset_heap,
-		prebuilt->old_vers_heap, old_vers);
+		prebuilt->old_vers_heap, old_vers, vrow);
 	return(err);
 }
 
@@ -3003,8 +3371,8 @@ row_sel_build_prev_vers_for_mysql(
 Retrieves the clustered index record corresponding to a record in a
 non-clustered index. Does the necessary locking. Used in the MySQL
 interface.
-@return	DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
+@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */
+static MY_ATTRIBUTE((warn_unused_result))
 dberr_t
 row_sel_get_clust_rec_for_mysql(
 /*============================*/
@@ -3025,6 +3393,7 @@ row_sel_get_clust_rec_for_mysql(
 				rec_get_offsets(out_rec, clust_index) */
 	mem_heap_t**	offset_heap,/*!< in/out: memory heap from which
 				the offsets are allocated */
+	const dtuple_t**vrow,	/*!< out: virtual column to fill */
 	mtr_t*		mtr)	/*!< in: mtr used to get access to the
 				non-clustered record; the same mtr is used to
 				access the clustered index */
@@ -3047,38 +3416,106 @@ row_sel_get_clust_rec_for_mysql(
 
 	btr_pcur_open_with_no_init(clust_index, prebuilt->clust_ref,
 				   PAGE_CUR_LE, BTR_SEARCH_LEAF,
-				   &prebuilt->clust_pcur, 0, mtr);
+				   prebuilt->clust_pcur, 0, mtr);
 
-	clust_rec = btr_pcur_get_rec(&prebuilt->clust_pcur);
+	clust_rec = btr_pcur_get_rec(prebuilt->clust_pcur);
 
-	prebuilt->clust_pcur.trx_if_known = trx;
+	prebuilt->clust_pcur->trx_if_known = trx;
 
 	/* Note: only if the search ends up on a non-infimum record is the
 	low_match value the real match to the search tuple */
 
 	if (!page_rec_is_user_rec(clust_rec)
-	    || btr_pcur_get_low_match(&prebuilt->clust_pcur)
+	    || btr_pcur_get_low_match(prebuilt->clust_pcur)
 	    < dict_index_get_n_unique(clust_index)) {
+		btr_cur_t*	btr_cur = btr_pcur_get_btr_cur(prebuilt->pcur);
 
-		/* In a rare case it is possible that no clust rec is found
-		for a delete-marked secondary index record: if in row0umod.cc
-		in row_undo_mod_remove_clust_low() we have already removed
-		the clust rec, while purge is still cleaning and removing
-		secondary index records associated with earlier versions of
-		the clustered index record. In that case we know that the
-		clustered index record did not exist in the read view of
-		trx. */
+		/* If this is a spatial index scan, and we are reading
+		from a shadow buffer, the record could be already
+		deleted (due to rollback etc.). So get the original
+		page and verify that */
+		if  (dict_index_is_spatial(sec_index)
+		     && btr_cur->rtr_info->matches
+		     && (page_align(rec)
+			== btr_cur->rtr_info->matches->block.frame
+			|| rec != btr_pcur_get_rec(prebuilt->pcur))) {
+#ifdef UNIV_DEBUG
+			rtr_info_t*	rtr_info = btr_cur->rtr_info;
+			mutex_enter(&rtr_info->matches->rtr_match_mutex);
+			/* The page could be deallocated (by rollback etc.) */
+			if (!rtr_info->matches->valid) {
+				mutex_exit(&rtr_info->matches->rtr_match_mutex);
+				clust_rec = NULL;
 
-		if (!rec_get_deleted_flag(rec,
+                                err = DB_SUCCESS;
+                                goto func_exit;
+			}
+			mutex_exit(&rtr_info->matches->rtr_match_mutex);
+
+			if (rec_get_deleted_flag(rec,
+                                          dict_table_is_comp(sec_index->table))
+                                  && prebuilt->select_lock_type == LOCK_NONE) {
+
+				clust_rec = NULL;
+
+				err = DB_SUCCESS;
+				goto func_exit;
+			}
+
+			if (rec != btr_pcur_get_rec(prebuilt->pcur)) {
+				clust_rec = NULL;
+
+                                err = DB_SUCCESS;
+                                goto func_exit;
+			}
+
+			ulint		page_no = page_get_page_no(
+						btr_pcur_get_page(
+							prebuilt->pcur));
+
+			page_id_t	page_id(dict_index_get_space(sec_index),
+						page_no);
+
+			buf_block_t*	block = buf_page_get_gen(
+				page_id,
+				dict_table_page_size(sec_index->table),
+				RW_NO_LATCH, NULL, BUF_GET,
+				__FILE__, __LINE__, mtr, &err);
+
+			mem_heap_t*	heap = mem_heap_create(256);
+			dtuple_t*       tuple = dict_index_build_data_tuple(
+				sec_index, const_cast<rec_t*>(rec),
+				dict_index_get_n_fields(sec_index), heap);;
+			page_cur_t     page_cursor;
+
+		        ulint		low_match = page_cur_search(
+						block, sec_index, tuple,
+						PAGE_CUR_LE, &page_cursor);
+
+			ut_ad(low_match < dtuple_get_n_fields_cmp(tuple));
+			mem_heap_free(heap);
+			clust_rec = NULL;
+
+			err = DB_SUCCESS;
+			goto func_exit;
+#endif /* UNIV_DEBUG */
+		} else if (!rec_get_deleted_flag(rec,
 					  dict_table_is_comp(sec_index->table))
 		    || prebuilt->select_lock_type != LOCK_NONE) {
-			ut_print_timestamp(stderr);
-			fputs("  InnoDB: error clustered record"
-			      " for sec rec not found\n"
-			      "InnoDB: ", stderr);
-			dict_index_name_print(stderr, trx, sec_index);
-			fputs("\n"
-			      "InnoDB: sec index record ", stderr);
+			/* In a rare case it is possible that no clust
+			rec is found for a delete-marked secondary index
+			record: if in row0umod.cc in
+			row_undo_mod_remove_clust_low() we have already removed
+			the clust rec, while purge is still cleaning and
+			removing secondary index records associated with
+			earlier versions of the clustered index record.
+			In that case we know that the clustered index
+			record did not exist in the read view of trx. */
+			ib::error() << "Clustered record for sec rec not found"
+				" index " << sec_index->name
+				<< " of table " << sec_index->table->name;
+
+			fputs("InnoDB: sec index record ", stderr);
 			rec_print(stderr, rec, sec_index);
 			fputs("\n"
 			      "InnoDB: clust index record ", stderr);
@@ -3106,9 +3543,9 @@ row_sel_get_clust_rec_for_mysql(
 		we set a LOCK_REC_NOT_GAP type lock */
 
 		err = lock_clust_rec_read_check_and_lock(
-			0, btr_pcur_get_block(&prebuilt->clust_pcur),
+			0, btr_pcur_get_block(prebuilt->clust_pcur),
 			clust_rec, clust_index, *offsets,
-			static_cast<enum lock_mode>(prebuilt->select_lock_type),
+			static_cast<lock_mode>(prebuilt->select_lock_type),
 			LOCK_REC_NOT_GAP,
 			thr);
 
@@ -3131,14 +3568,14 @@ row_sel_get_clust_rec_for_mysql(
 		if (trx->isolation_level > TRX_ISO_READ_UNCOMMITTED
 		    && !lock_clust_rec_cons_read_sees(
 			    clust_rec, clust_index, *offsets,
-			    trx->read_view)) {
+			    trx_get_read_view(trx))) {
 
 			/* The following call returns 'offsets' associated with
 			'old_vers' */
 			err = row_sel_build_prev_vers_for_mysql(
 				trx->read_view, clust_index, prebuilt,
 				clust_rec, offsets, offset_heap, &old_vers,
-				mtr);
+				vrow, mtr);
 
 			if (err != DB_SUCCESS || old_vers == NULL) {
 
@@ -3161,20 +3598,17 @@ row_sel_get_clust_rec_for_mysql(
 		visit through secondary index records that would not really
 		exist in our snapshot. */
 
+		/* And for spatial index, since the rec is from shadow buffer,
+		so we need to check if it's exactly match the clust_rec. */
 		if (clust_rec
 		    && (old_vers
 			|| trx->isolation_level <= TRX_ISO_READ_UNCOMMITTED
+			|| dict_index_is_spatial(sec_index)
 			|| rec_get_deleted_flag(rec, dict_table_is_comp(
 							sec_index->table)))
 		    && !row_sel_sec_rec_is_for_clust_rec(
-			    rec, sec_index, clust_rec, clust_index)) {
+			    rec, sec_index, clust_rec, clust_index, thr)) {
 			clust_rec = NULL;
-#ifdef UNIV_SEARCH_DEBUG
-		} else {
-			ut_a(clust_rec == NULL
-			     || row_sel_sec_rec_is_for_clust_rec(
-				     rec, sec_index, clust_rec, clust_index));
-#endif
 		}
 
 		err = DB_SUCCESS;
@@ -3190,7 +3624,7 @@ func_exit:
 		/* We may use the cursor in update or in unlock_row():
 		store its position */
 
-		btr_pcur_store_position(&prebuilt->clust_pcur, mtr);
+		btr_pcur_store_position(prebuilt->clust_pcur, mtr);
 	}
 
 err_exit:
@@ -3236,7 +3670,7 @@ sel_restore_position_for_mysql(
 		ut_ad((pcur->rel_pos == BTR_PCUR_ON)
 		      == btr_pcur_is_on_user_rec(pcur));
 	}
-#endif
+#endif /* UNIV_DEBUG */
 
 	/* The position may need be adjusted for rel_pos and moves_up. */
 
@@ -3313,7 +3747,7 @@ row_sel_copy_cached_field_for_mysql(
 	UNIV_MEM_ASSERT_W(buf, templ->mysql_col_len);
 
 	if (templ->mysql_type == DATA_MYSQL_TRUE_VARCHAR
-	    && templ->type != DATA_INT) {
+	    && (templ->type != DATA_INT)) {
 		/* Check for != DATA_INT to make sure we do
 		not treat MySQL ENUM or SET as a true VARCHAR!
 		Find the actual length of the true VARCHAR field. */
@@ -3328,6 +3762,41 @@ row_sel_copy_cached_field_for_mysql(
 	ut_memcpy(buf, cache, len);
 }
 
+/** Copy used fields from cached row.
+Copy cache record field by field, don't touch fields that
+are not covered by current key.
+@param[out]	buf		Where to copy the MySQL row.
+@param[in]	cached_rec	What to copy (in MySQL row format).
+@param[in]	prebuilt	prebuilt struct. */
+void
+row_sel_copy_cached_fields_for_mysql(
+	byte*		buf,
+	const byte*	cached_rec,
+	row_prebuilt_t*	prebuilt)
+{
+	const mysql_row_templ_t*templ;
+	ulint			i;
+	for (i = 0; i < prebuilt->n_template; i++) {
+		templ = prebuilt->mysql_template + i;
+
+		/* Skip virtual columns */
+		if (templ->is_virtual) {
+			continue;
+		}
+
+		row_sel_copy_cached_field_for_mysql(
+			buf, cached_rec, templ);
+		/* Copy NULL bit of the current field from cached_rec
+		to buf */
+		if (templ->mysql_null_bit_mask) {
+			buf[templ->mysql_null_byte_offset]
+				^= (buf[templ->mysql_null_byte_offset]
+				    ^ cached_rec[templ->mysql_null_byte_offset])
+				& (byte) templ->mysql_null_bit_mask;
+		}
+	}
+}
+
 /********************************************************************//**
 Pops a cached row for MySQL from the fetch cache. */
 UNIV_INLINE
@@ -3349,22 +3818,7 @@ row_sel_dequeue_cached_row_for_mysql(
 	cached_rec = prebuilt->fetch_cache[prebuilt->fetch_cache_first];
 
 	if (UNIV_UNLIKELY(prebuilt->keep_other_fields_on_keyread)) {
-		/* Copy cache record field by field, don't touch fields that
-		are not covered by current key */
-
-		for (i = 0; i < prebuilt->n_template; i++) {
-			templ = prebuilt->mysql_template + i;
-			row_sel_copy_cached_field_for_mysql(
-				buf, cached_rec, templ);
-			/* Copy NULL bit of the current field from cached_rec
-			to buf */
-			if (templ->mysql_null_bit_mask) {
-				buf[templ->mysql_null_byte_offset]
-					^= (buf[templ->mysql_null_byte_offset]
-					    ^ cached_rec[templ->mysql_null_byte_offset])
-					& (byte) templ->mysql_null_bit_mask;
-			}
-		}
+		row_sel_copy_cached_fields_for_mysql(buf, cached_rec, prebuilt);
 	} else if (prebuilt->mysql_prefix_len > 63) {
 		/* The record is long. Copy it field by field, in case
 		there are some long VARCHAR column of which only a
@@ -3376,8 +3830,17 @@ row_sel_dequeue_cached_row_for_mysql(
 		/* Then copy the requested fields. */
 
 		for (i = 0; i < prebuilt->n_template; i++) {
+			templ = prebuilt->mysql_template + i;
+
+			/* Skip virtual columns */
+			if (templ->is_virtual
+			    && !(dict_index_has_virtual(prebuilt->index)
+				 && prebuilt->read_just_key)) {
+				continue;
+			}
+
 			row_sel_copy_cached_field_for_mysql(
-				buf, cached_rec, prebuilt->mysql_template + i);
+				buf, cached_rec, templ);
 		}
 	} else {
 		ut_memcpy(buf, cached_rec, prebuilt->mysql_prefix_len);
@@ -3405,7 +3868,7 @@ row_sel_prefetch_cache_init(
 
 	/* Reserve space for the magic number. */
 	sz = UT_ARR_SIZE(prebuilt->fetch_cache) * (prebuilt->mysql_row_len + 8);
-	ptr = static_cast<byte*>(mem_alloc(sz));
+	ptr = static_cast<byte*>(ut_malloc_nokey(sz));
 
 	for (i = 0; i < UT_ARR_SIZE(prebuilt->fetch_cache); i++) {
 
@@ -3476,7 +3939,7 @@ Tries to do a shortcut to fetch a clustered index record with a unique key,
 using the hash index if possible (not always). We assume that the search
 mode is PAGE_CUR_GE, it is a consistent read, there is a read view in trx,
 btr search latch has been locked in S-mode if AHI is enabled.
-@return	SEL_FOUND, SEL_EXHAUSTED, SEL_RETRY */
+@return SEL_FOUND, SEL_EXHAUSTED, SEL_RETRY */
 static
 ulint
 row_sel_try_search_shortcut_for_mysql(
@@ -3489,26 +3952,19 @@ row_sel_try_search_shortcut_for_mysql(
 {
 	dict_index_t*	index		= prebuilt->index;
 	const dtuple_t*	search_tuple	= prebuilt->search_tuple;
-	btr_pcur_t*	pcur		= &prebuilt->pcur;
+	btr_pcur_t*	pcur		= prebuilt->pcur;
 	trx_t*		trx		= prebuilt->trx;
 	const rec_t*	rec;
 
 	ut_ad(dict_index_is_clust(index));
 	ut_ad(!prebuilt->templ_contains_blob);
 
-#ifndef UNIV_SEARCH_DEBUG
 	btr_pcur_open_with_no_init(index, search_tuple, PAGE_CUR_GE,
 				   BTR_SEARCH_LEAF, pcur,
 				   (trx->has_search_latch)
 				    ? RW_S_LATCH
 				    : 0,
 				   mtr);
-#else /* UNIV_SEARCH_DEBUG */
-	btr_pcur_open_with_no_init(index, search_tuple, PAGE_CUR_GE,
-				   BTR_SEARCH_LEAF, pcur,
-				   0,
-				   mtr);
-#endif /* UNIV_SEARCH_DEBUG */
 	rec = btr_pcur_get_rec(pcur);
 
 	if (!page_rec_is_user_rec(rec)) {
@@ -3531,8 +3987,8 @@ row_sel_try_search_shortcut_for_mysql(
 	*offsets = rec_get_offsets(rec, index, *offsets,
 				   ULINT_UNDEFINED, heap);
 
-	if (!lock_clust_rec_cons_read_sees(rec, index,
-					   *offsets, trx->read_view)) {
+	if (!lock_clust_rec_cons_read_sees(
+			rec, index, *offsets, trx_get_read_view(trx))) {
 
 		return(SEL_RETRY);
 	}
@@ -3551,7 +4007,7 @@ row_sel_try_search_shortcut_for_mysql(
 Check a pushed-down index condition.
 @return ICP_NO_MATCH, ICP_MATCH, or ICP_OUT_OF_RANGE */
 static
-enum icp_result
+ICP_RESULT
 row_search_idx_cond_check(
 /*======================*/
 	byte*			mysql_rec,	/*!< out: record
@@ -3563,7 +4019,7 @@ row_search_idx_cond_check(
 	const rec_t*		rec,		/*!< in: InnoDB record */
 	const ulint*		offsets)	/*!< in: rec_get_offsets() */
 {
-	enum icp_result result;
+	ICP_RESULT	result;
 	ulint		i;
 
 	ut_ad(rec_offs_validate(rec, prebuilt->index, offsets));
@@ -3584,6 +4040,11 @@ row_search_idx_cond_check(
 	for (i = 0; i < prebuilt->idx_cond_n_cols; i++) {
 		const mysql_row_templ_t*templ = &prebuilt->mysql_template[i];
 
+		/* Skip virtual columns */
+		if (templ->is_virtual) {
+			continue;
+		}
+
 		if (!row_sel_store_mysql_field(mysql_rec, prebuilt,
 					       rec, prebuilt->index, offsets,
 					       templ->icp_rec_field_no,
@@ -3607,7 +4068,7 @@ row_search_idx_cond_check(
 		if (!prebuilt->need_to_access_clustered
 		    || dict_index_is_clust(prebuilt->index)) {
 			if (!row_sel_store_mysql_rec(
-				    mysql_rec, prebuilt, rec, FALSE,
+				    mysql_rec, prebuilt, rec, NULL, FALSE,
 				    prebuilt->index, offsets)) {
 				ut_ad(dict_index_is_clust(prebuilt->index));
 				return(ICP_NO_MATCH);
@@ -3630,44 +4091,411 @@ row_search_idx_cond_check(
 	return(result);
 }
 
-/********************************************************************//**
-Searches for rows in the database. This is used in the interface to
-MySQL. This function opens a cursor, and also implements fetch next
-and fetch prev. NOTE that if we do a search with a full key value
-from a unique index (ROW_SEL_EXACT), then we will not store the cursor
-position and fetch next or fetch prev must not be tried to the cursor!
-@return DB_SUCCESS, DB_RECORD_NOT_FOUND, DB_END_OF_INDEX, DB_DEADLOCK,
-DB_LOCK_TABLE_FULL, DB_CORRUPTION, or DB_TOO_BIG_RECORD */
-UNIV_INTERN
+/** Traverse to next/previous record.
+@param[in]	moves_up	if true, move to next record else previous
+@param[in]	match_mode	0 or ROW_SEL_EXACT or ROW_SEL_EXACT_PREFIX
+@param[in,out]	pcur		cursor to record
+@param[in]	mtr		mini transaction
+
+@return DB_SUCCESS or error code */
+static
 dberr_t
-row_search_for_mysql(
-/*=================*/
-	byte*		buf,		/*!< in/out: buffer for the fetched
-					row in the MySQL format */
-	ulint		mode,		/*!< in: search mode PAGE_CUR_L, ... */
-	row_prebuilt_t*	prebuilt,	/*!< in: prebuilt struct for the
-					table handle; this contains the info
-					of search_tuple, index; if search
-					tuple contains 0 fields then we
-					position the cursor at the start or
-					the end of the index, depending on
-					'mode' */
-	ulint		match_mode,	/*!< in: 0 or ROW_SEL_EXACT or
-					ROW_SEL_EXACT_PREFIX */
-	ulint		direction)	/*!< in: 0 or ROW_SEL_NEXT or
-					ROW_SEL_PREV; NOTE: if this is != 0,
-					then prebuilt must have a pcur
-					with stored position! In opening of a
-					cursor 'direction' should be 0. */
+row_search_traverse(
+	bool		moves_up,
+	ulint		match_mode,
+	btr_pcur_t*	pcur,
+	mtr_t*		mtr)
 {
+	dberr_t		err = DB_SUCCESS;
+
+	if (moves_up) {
+		if (!btr_pcur_move_to_next(pcur, mtr)) {
+			err = (match_mode != 0)
+				? DB_RECORD_NOT_FOUND : DB_END_OF_INDEX;
+			return(err);
+		}
+	} else {
+		if (!btr_pcur_move_to_prev(pcur, mtr)) {
+			err = (match_mode != 0)
+				? DB_RECORD_NOT_FOUND : DB_END_OF_INDEX;
+			return(err);
+		}
+	}
+
+	return(err);
+}
+
+/** Searches for rows in the database using cursor.
+Function is for temporary tables that are not shared accross connections
+and so lot of complexity is reduced especially locking and transaction related.
+The cursor is an iterator over the table/index.
+
+@param[out]	buf		buffer for the fetched row in MySQL format
+@param[in]	mode		search mode PAGE_CUR_L
+@param[in,out]	prebuilt	prebuilt struct for the table handler;
+				this contains the info to search_tuple,
+				index; if search tuple contains 0 field then
+				we position the cursor at start or the end of
+				index, depending on 'mode'
+@param[in]	match_mode	0 or ROW_SEL_EXACT or ROW_SEL_EXACT_PREFIX
+@param[in]	direction	0 or ROW_SEL_NEXT or ROW_SEL_PREV;
+				Note: if this is != 0, then prebuilt must has a
+				pcur with stored position! In opening of a
+				cursor 'direction' should be 0.
+@return DB_SUCCESS or error code */
+dberr_t
+row_search_no_mvcc(
+	byte*		buf,
+	page_cur_mode_t	mode,
+	row_prebuilt_t*	prebuilt,
+	ulint		match_mode,
+	ulint		direction)
+{
+	dict_index_t*	index		= prebuilt->index;
+	const dtuple_t*	search_tuple	= prebuilt->search_tuple;
+	btr_pcur_t*	pcur		= prebuilt->pcur;
+
+	const rec_t*	result_rec	= NULL;
+	const rec_t*	clust_rec	= NULL;
+
+	dberr_t		err		= DB_SUCCESS;
+
+	mem_heap_t*	heap		= NULL;
+	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
+	ulint*		offsets		= offsets_;
+	rec_offs_init(offsets_);
+	ut_ad(index && pcur && search_tuple);
+
+	/* Step-0: Re-use the cached mtr. */
+	mtr_t*		mtr = &index->last_sel_cur->mtr;
+	dict_index_t*	clust_index = dict_table_get_first_index(index->table);
+
+	/* Step-1: Build the select graph. */
+	if (direction == 0 && prebuilt->sel_graph == NULL) {
+		row_prebuild_sel_graph(prebuilt);
+	}
+
+	que_thr_t*	thr = que_fork_get_first_thr(prebuilt->sel_graph);
+
+	bool		moves_up;
+
+	if (direction == 0) {
+
+		if (mode == PAGE_CUR_GE || mode == PAGE_CUR_G) {
+			moves_up = true;
+		} else {
+			moves_up = false;
+		}
+
+	} else if (direction == ROW_SEL_NEXT) {
+		moves_up = true;
+	} else {
+		moves_up = false;
+	}
+
+	/* Step-2: Open or Restore the cursor.
+	If search key is specified, cursor is open using the key else
+	cursor is open to return all the records. */
+	if (direction != 0) {
+		if (index->last_sel_cur->invalid) {
+
+			/* Index tree has changed and so active cached cursor
+			is no more valid. Re-set it based on the last selected
+			position. */
+			index->last_sel_cur->release();
+
+			mtr_start(mtr);
+			dict_disable_redo_if_temporary(index->table, mtr);
+
+			mem_heap_t*	heap = mem_heap_create(256);
+			dtuple_t*	tuple;
+
+			tuple = dict_index_build_data_tuple(
+				index, pcur->old_rec,
+				pcur->old_n_fields, heap);
+
+			btr_pcur_open_with_no_init(
+				index, tuple, pcur->search_mode,
+				BTR_SEARCH_LEAF, pcur, 0, mtr);
+
+			mem_heap_free(heap);
+		} else {
+			/* Restore the cursor for reading next record from cache
+			information. */
+			ut_ad(index->last_sel_cur->rec != NULL);
+
+			pcur->btr_cur.page_cur.rec = index->last_sel_cur->rec;
+			pcur->btr_cur.page_cur.block =
+				index->last_sel_cur->block;
+
+			err = row_search_traverse(
+				moves_up, match_mode, pcur, mtr);
+			if (err != DB_SUCCESS) {
+				return(err);
+			}
+		}
+	} else {
+		/* There could be previous uncommitted transaction if SELECT
+		is operation as part of SELECT (IF NOT FOUND) INSERT
+		(IF DUPLICATE) UPDATE plan. */
+		index->last_sel_cur->release();
+
+		/* Capture table snapshot in form of trx-id. */
+		index->trx_id = dict_table_get_curr_table_sess_trx_id(
+			index->table);
+
+		/* Fresh search commences. */
+		mtr_start(mtr);
+		dict_disable_redo_if_temporary(index->table, mtr);
+
+		if (dtuple_get_n_fields(search_tuple) > 0) {
+
+			btr_pcur_open_with_no_init(
+				index, search_tuple, mode, BTR_SEARCH_LEAF,
+				pcur, 0, mtr);
+
+		} else if (mode == PAGE_CUR_G || mode == PAGE_CUR_L) {
+
+			btr_pcur_open_at_index_side(
+				mode == PAGE_CUR_G, index, BTR_SEARCH_LEAF,
+				pcur, false, 0, mtr);
+
+		}
+	}
+
+	/* Step-3: Traverse the records filtering non-qualifiying records. */
+	for (/* No op */;
+	     err == DB_SUCCESS;
+	     err = row_search_traverse(moves_up, match_mode, pcur, mtr)) {
+
+		const rec_t*	rec = btr_pcur_get_rec(pcur);
+
+		if (page_rec_is_infimum(rec)
+		    || page_rec_is_supremum(rec)
+		    || rec_get_deleted_flag(
+			rec, dict_table_is_comp(index->table))) {
+
+			/* The infimum record on a page cannot be in the
+			result set, and neither can a record lock be placed on
+			it: we skip such a record. */
+			continue;
+		}
+
+		offsets = rec_get_offsets(
+			rec, index, offsets, ULINT_UNDEFINED, &heap);
+
+		/* Note that we cannot trust the up_match value in the cursor
+		at this place because we can arrive here after moving the
+		cursor! Thus we have to recompare rec and search_tuple to
+		determine if they match enough. */
+		if (match_mode == ROW_SEL_EXACT) {
+			/* Test if the index record matches completely to
+			search_tuple in prebuilt: if not, then we return with
+			DB_RECORD_NOT_FOUND */
+			if (0 != cmp_dtuple_rec(search_tuple, rec, offsets)) {
+				err = DB_RECORD_NOT_FOUND;
+				break;
+			}
+		} else if (match_mode == ROW_SEL_EXACT_PREFIX) {
+			if (!cmp_dtuple_is_prefix_of_rec(
+				search_tuple, rec, offsets)) {
+				err = DB_RECORD_NOT_FOUND;
+				break;
+			}
+		}
+
+		/* Get the clustered index. We always need clustered index
+		record for snapshort verification. */
+		if (index != clust_index) {
+
+			err = row_sel_get_clust_rec_for_mysql(
+				prebuilt, index, rec, thr, &clust_rec,
+				&offsets, &heap, NULL, mtr);
+
+			if (err != DB_SUCCESS) {
+				break;
+			}
+
+			if (rec_get_deleted_flag(
+				clust_rec, dict_table_is_comp(index->table))) {
+
+				/* The record is delete marked in clustered
+				index. We can skip this record. */
+				continue;
+			}
+
+			result_rec = clust_rec;
+		} else {
+			result_rec = rec;
+		}
+
+		/* Step-4: Check if row is part of the consistent view that was
+		captured while SELECT statement started execution. */
+		{
+			trx_id_t	trx_id;
+
+			ulint		len;
+			ulint		trx_id_off = rec_get_nth_field_offs(
+				offsets, clust_index->n_uniq, &len);
+
+			ut_ad(len == DATA_TRX_ID_LEN);
+
+			trx_id = trx_read_trx_id(result_rec + trx_id_off);
+
+			if (trx_id > index->trx_id) {
+				/* This row was recently added skip it from
+				SELECT view. */
+				continue;
+			}
+		}
+
+		/* Step-5: Cache the row-id of selected row to prebuilt cache.*/
+		if (prebuilt->clust_index_was_generated) {
+			row_sel_store_row_id_to_prebuilt(
+				prebuilt, result_rec, clust_index, offsets);
+		}
+
+		/* Step-6: Convert selected record to MySQL format and
+		store it. */
+		if (prebuilt->template_type == ROW_MYSQL_DUMMY_TEMPLATE) {
+
+			const rec_t*	ret_rec =
+				(index != clust_index
+				 && prebuilt->need_to_access_clustered)
+				? result_rec : rec;
+
+			offsets = rec_get_offsets(ret_rec, index, offsets,
+						  ULINT_UNDEFINED, &heap);
+
+			memcpy(buf + 4, ret_rec - rec_offs_extra_size(offsets),
+			rec_offs_size(offsets));
+
+			mach_write_to_4(buf, rec_offs_extra_size(offsets) + 4);
+
+		} else if (!row_sel_store_mysql_rec(
+				buf, prebuilt, result_rec, NULL, TRUE,
+				clust_index, offsets)) {
+			err = DB_ERROR;
+			break;
+		}
+
+		/* Step-7: Store cursor position to fetch next record.
+		MySQL calls this function iteratively get_next(), get_next()
+		fashion. */
+		ut_ad(err == DB_SUCCESS);
+		index->last_sel_cur->rec = btr_pcur_get_rec(pcur);
+		index->last_sel_cur->block = btr_pcur_get_block(pcur);
+
+		/* This is needed in order to restore the cursor if index
+		structure changes while SELECT is still active. */
+		pcur->old_rec = dict_index_copy_rec_order_prefix(
+			index, rec, &pcur->old_n_fields,
+			&pcur->old_rec_buf, &pcur->buf_size);
+
+		break;
+	}
+
+	if (err != DB_SUCCESS) {
+		index->last_sel_cur->release();
+	}
+
+	if (heap != NULL) {
+		mem_heap_free(heap);
+	}
+	return(err);
+}
+
+/** Extract virtual column data from a virtual index record and fill a dtuple
+@param[in]	rec		the virtual (secondary) index record
+@param[in]	index		the virtual index
+@param[in,out]	vrow		the dtuple where data extract to
+@param[in]	heap		memory heap to allocate memory
+*/
+static
+void
+row_sel_fill_vrow(
+	const rec_t*		rec,
+	dict_index_t*		index,
+	const dtuple_t**	vrow,
+	mem_heap_t*		heap)
+{
+	ulint           offsets_[REC_OFFS_NORMAL_SIZE];
+	ulint*		offsets	= offsets_;
+	rec_offs_init(offsets_);
+
+	ut_ad(!(*vrow));
+
+	offsets = rec_get_offsets(rec, index, offsets,
+				  ULINT_UNDEFINED, &heap);
+
+	*vrow = dtuple_create_with_vcol(
+		heap, 0, dict_table_get_n_v_cols(index->table));
+
+	/* Initialize all virtual row's mtype to DATA_MISSING */
+	dtuple_init_v_fld(*vrow);
+
+	for (ulint i = 0; i < dict_index_get_n_fields(index); i++) {
+		const dict_field_t*     field;
+                const dict_col_t*       col;
+
+		field = dict_index_get_nth_field(index, i);
+		col = dict_field_get_col(field);
+
+		if (dict_col_is_virtual(col)) {
+			const byte*     data;
+		        ulint           len;
+
+			data = rec_get_nth_field(rec, offsets, i, &len);
+
+                        const dict_v_col_t*     vcol = reinterpret_cast<
+				const dict_v_col_t*>(col);
+
+			dfield_t* dfield = dtuple_get_nth_v_field(
+				*vrow, vcol->v_pos);
+			dfield_set_data(dfield, data, len);
+			dict_col_copy_type(col, dfield_get_type(dfield));
+		}
+	}
+}
+
+/** Searches for rows in the database using cursor.
+Function is mainly used for tables that are shared accorss connection and
+so it employs technique that can help re-construct the rows that
+transaction is suppose to see.
+It also has optimization such as pre-caching the rows, using AHI, etc.
+
+@param[out]	buf		buffer for the fetched row in MySQL format
+@param[in]	mode		search mode PAGE_CUR_L
+@param[in,out]	prebuilt	prebuilt struct for the table handler;
+				this contains the info to search_tuple,
+				index; if search tuple contains 0 field then
+				we position the cursor at start or the end of
+				index, depending on 'mode'
+@param[in]	match_mode	0 or ROW_SEL_EXACT or ROW_SEL_EXACT_PREFIX
+@param[in]	direction	0 or ROW_SEL_NEXT or ROW_SEL_PREV;
+				Note: if this is != 0, then prebuilt must has a
+				pcur with stored position! In opening of a
+				cursor 'direction' should be 0.
+@return DB_SUCCESS or error code */
+dberr_t
+row_search_mvcc(
+	byte*		buf,
+	page_cur_mode_t	mode,
+	row_prebuilt_t*	prebuilt,
+	ulint		match_mode,
+	ulint		direction)
+{
+	DBUG_ENTER("row_search_mvcc");
+
 	dict_index_t*	index		= prebuilt->index;
 	ibool		comp		= dict_table_is_comp(index->table);
 	const dtuple_t*	search_tuple	= prebuilt->search_tuple;
-	btr_pcur_t*	pcur		= &prebuilt->pcur;
+	btr_pcur_t*	pcur		= prebuilt->pcur;
 	trx_t*		trx		= prebuilt->trx;
 	dict_index_t*	clust_index;
 	que_thr_t*	thr;
 	const rec_t*	rec;
+	const dtuple_t*	vrow = NULL;
 	const rec_t*	result_rec = NULL;
 	const rec_t*	clust_rec;
 	dberr_t		err				= DB_SUCCESS;
@@ -3681,9 +4509,6 @@ row_search_for_mysql(
 	/* if the returned record was locked and we did a semi-consistent
 	read (fetch the newest committed version), then this is set to
 	TRUE */
-#ifdef UNIV_SEARCH_DEBUG
-	ulint		cnt				= 0;
-#endif /* UNIV_SEARCH_DEBUG */
 	ulint		next_offs;
 	ibool		same_user_rec;
 	mtr_t		mtr;
@@ -3693,99 +4518,72 @@ row_search_for_mysql(
 	ibool		table_lock_waited		= FALSE;
 	byte*		next_buf			= 0;
 	ibool		use_clustered_index		= FALSE;
+	bool		spatial_search			= false;
 
 	rec_offs_init(offsets_);
 
 	ut_ad(index && pcur && search_tuple);
+	ut_a(prebuilt->magic_n == ROW_PREBUILT_ALLOCATED);
+	ut_a(prebuilt->magic_n2 == ROW_PREBUILT_ALLOCATED);
 
 	/* We don't support FTS queries from the HANDLER interfaces, because
 	we implemented FTS as reversed inverted index with auxiliary tables.
 	So anything related to traditional index query would not apply to
 	it. */
-	if (index->type & DICT_FTS) {
-		return(DB_END_OF_INDEX);
+	if (prebuilt->index->type & DICT_FTS) {
+		DBUG_RETURN(DB_END_OF_INDEX);
 	}
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch));
-#endif /* UNIV_SYNC_DEBUG */
+#ifdef UNIV_DEBUG
+	{
+		btrsea_sync_check	check(trx->has_search_latch);
+		ut_ad(!sync_check_iterate(check));
+	}
+#endif /* UNIV_DEBUG */
 
 	if (dict_table_is_discarded(prebuilt->table)) {
 
-		return(DB_TABLESPACE_DELETED);
+		DBUG_RETURN(DB_TABLESPACE_DELETED);
 
 	} else if (prebuilt->table->ibd_file_missing) {
 
-		return(DB_TABLESPACE_NOT_FOUND);
+		DBUG_RETURN(DB_TABLESPACE_NOT_FOUND);
 
 	} else if (prebuilt->table->is_encrypted) {
 
 		return(DB_DECRYPTION_FAILED);
 	} else if (!prebuilt->index_usable) {
+		DBUG_RETURN(DB_MISSING_HISTORY);
 
-		return(DB_MISSING_HISTORY);
+	} else if (dict_index_is_corrupted(prebuilt->index)) {
 
-	} else if (dict_index_is_corrupted(index)) {
-
-		return(DB_CORRUPTION);
-
-	} else if (prebuilt->magic_n != ROW_PREBUILT_ALLOCATED) {
-		fprintf(stderr,
-			"InnoDB: Error: trying to free a corrupt\n"
-			"InnoDB: table handle. Magic n %lu, table name ",
-			(ulong) prebuilt->magic_n);
-		ut_print_name(stderr, trx, TRUE, prebuilt->table->name);
-		putc('\n', stderr);
-
-		mem_analyze_corruption(prebuilt);
-
-		ut_error;
+		DBUG_RETURN(DB_CORRUPTION);
 	}
 
-#if 0
-	/* August 19, 2005 by Heikki: temporarily disable this error
-	print until the cursor lock count is done correctly.
-	See bugs #12263 and #12456!*/
+	/* We need to get the virtual column values stored in secondary
+	index key, if this is covered index scan or virtual key read is
+	requested. */
+	bool    need_vrow = dict_index_has_virtual(prebuilt->index)
+		&& (prebuilt->read_just_key
+		    || prebuilt->m_read_virtual_key);
 
-	if (trx->n_mysql_tables_in_use == 0
-	    && UNIV_UNLIKELY(prebuilt->select_lock_type == LOCK_NONE)) {
-		/* Note that if MySQL uses an InnoDB temp table that it
-		created inside LOCK TABLES, then n_mysql_tables_in_use can
-		be zero; in that case select_lock_type is set to LOCK_X in
-		::start_stmt. */
-
-		fputs("InnoDB: Error: MySQL is trying to perform a SELECT\n"
-		      "InnoDB: but it has not locked"
-		      " any tables in ::external_lock()!\n",
-		      stderr);
-		trx_print(stderr, trx, 600);
-		fputc('\n', stderr);
-	}
-#endif
-
-#if 0
-	fprintf(stderr, "Match mode %lu\n search tuple ",
-		(ulong) match_mode);
-	dtuple_print(search_tuple);
-	fprintf(stderr, "N tables locked %lu\n",
-		(ulong) trx->mysql_n_tables_locked);
-#endif
 	/*-------------------------------------------------------------*/
 	/* PHASE 0: Release a possible s-latch we are holding on the
 	adaptive hash index latch if there is someone waiting behind */
 
-	if (UNIV_UNLIKELY(rw_lock_get_writer(&btr_search_latch) != RW_LOCK_NOT_LOCKED)
-	    && trx->has_search_latch) {
+	if (trx->has_search_latch
+#ifndef INNODB_RW_LOCKS_USE_ATOMICS
+	    && rw_lock_get_writer(
+		btr_get_search_latch(index)) != RW_LOCK_NOT_LOCKED
+#endif /* !INNODB_RW_LOCKS_USE_ATOMICS */
+	    ) {
 
 		/* There is an x-latch request on the adaptive hash index:
 		release the s-latch to reduce starvation and wait for
 		BTR_SEA_TIMEOUT rounds before trying to keep it again over
 		calls from MySQL */
 
-		rw_lock_s_unlock(&btr_search_latch);
-		trx->has_search_latch = FALSE;
-
-		trx->search_latch_timeout = BTR_SEA_TIMEOUT;
+		trx_search_latch_release_if_reserved(trx);
 	}
 
 	/* Reset the new record lock info if srv_locks_unsafe_for_binlog
@@ -3895,7 +4693,15 @@ row_search_for_mysql(
 		}
 	}
 
-	mtr_start_trx(&mtr, trx);
+	/* We don't support sequencial scan for Rtree index, because it
+	is no meaning to do so. */
+	if (dict_index_is_spatial(index)
+		&& !RTREE_SEARCH_MODE(mode)) {
+		err = DB_END_OF_INDEX;
+		goto func_exit;
+	}
+
+	mtr_start(&mtr);
 
 	/*-------------------------------------------------------------*/
 	/* PHASE 2: Try fast adaptive hash index search if possible */
@@ -3908,6 +4714,7 @@ row_search_for_mysql(
 
 	if (UNIV_UNLIKELY(direction == 0)
 	    && unique_search
+	    && btr_search_enabled
 	    && dict_index_is_clust(index)
 	    && !prebuilt->templ_contains_blob
 	    && !prebuilt->used_in_HANDLER
@@ -3917,9 +4724,10 @@ row_search_for_mysql(
 		mode = PAGE_CUR_GE;
 
 		if (trx->mysql_n_tables_locked == 0
+		    && !prebuilt->ins_sel_stmt
 		    && prebuilt->select_lock_type == LOCK_NONE
 		    && trx->isolation_level > TRX_ISO_READ_UNCOMMITTED
-		    && trx->read_view) {
+		    && MVCC::is_view_active(trx->read_view)) {
 
 			/* This is a SELECT query done as a consistent read,
 			and the read view has already been allocated:
@@ -3933,20 +4741,14 @@ row_search_for_mysql(
 			and if we try that, we can deadlock on the adaptive
 			hash index semaphore! */
 
-#ifndef UNIV_SEARCH_DEBUG
-			if (!trx->has_search_latch) {
-				rw_lock_s_lock(&btr_search_latch);
-				trx->has_search_latch = TRUE;
-			}
-#endif
+			ut_a(!trx->has_search_latch);
+			rw_lock_s_lock(btr_get_search_latch(index));
+			trx->has_search_latch = true;
+
 			switch (row_sel_try_search_shortcut_for_mysql(
 					&rec, prebuilt, &offsets, &heap,
 					&mtr)) {
 			case SEL_FOUND:
-#ifdef UNIV_SEARCH_DEBUG
-				ut_a(0 == cmp_dtuple_rec(search_tuple,
-							 rec, offsets));
-#endif
 				/* At this point, rec is protected by
 				a page latch that was acquired by
 				row_sel_try_search_shortcut_for_mysql().
@@ -3970,7 +4772,7 @@ row_search_for_mysql(
 
 				if (!row_sel_store_mysql_rec(
 					    buf, prebuilt,
-					    rec, FALSE, index, offsets)) {
+					    rec, NULL, FALSE, index, offsets)) {
 					/* Only fresh inserts may contain
 					incomplete externally stored
 					columns. Pretend that such
@@ -3989,32 +4791,28 @@ row_search_for_mysql(
 			shortcut_match:
 				mtr_commit(&mtr);
 
-				/* ut_print_name(stderr, index->name);
-				fputs(" shortcut\n", stderr); */
+				/* NOTE that we do NOT store the cursor
+				position */
 
 				err = DB_SUCCESS;
-				goto release_search_latch_if_needed;
+
+				rw_lock_s_unlock(btr_get_search_latch(index));
+				trx->has_search_latch = false;
+
+				goto func_exit;
 
 			case SEL_EXHAUSTED:
 			shortcut_mismatch:
 				mtr_commit(&mtr);
 
-				/* ut_print_name(stderr, index->name);
-				fputs(" record not found 2\n", stderr); */
-
 				err = DB_RECORD_NOT_FOUND;
-release_search_latch_if_needed:
-				if (trx->search_latch_timeout > 0
-				    && trx->has_search_latch) {
 
-					trx->search_latch_timeout--;
-
-					rw_lock_s_unlock(&btr_search_latch);
-					trx->has_search_latch = FALSE;
-				}
+				rw_lock_s_unlock(btr_get_search_latch(index));
+				trx->has_search_latch = false;
 
 				/* NOTE that we do NOT store the cursor
 				position */
+
 				goto func_exit;
 
 			case SEL_RETRY:
@@ -4025,17 +4823,20 @@ release_search_latch_if_needed:
 			}
 
 			mtr_commit(&mtr);
-			mtr_start_trx(&mtr, trx);
+			mtr_start(&mtr);
+
+                        rw_lock_s_unlock(btr_get_search_latch(index));
+                        trx->has_search_latch = false;
 		}
 	}
 
 	/*-------------------------------------------------------------*/
 	/* PHASE 3: Open or restore index cursor position */
 
-	if (trx->has_search_latch) {
-		rw_lock_s_unlock(&btr_search_latch);
-		trx->has_search_latch = FALSE;
-	}
+	trx_search_latch_release_if_reserved(trx);
+
+	spatial_search = dict_index_is_spatial(index)
+			 && mode >= PAGE_CUR_CONTAIN;
 
 	/* The state of a running trx can only be changed by the
 	thread that is currently serving the transaction. Because we
@@ -4043,14 +4844,14 @@ release_search_latch_if_needed:
 	mutex. */
 	ut_ad(prebuilt->sql_stat_start || trx->state == TRX_STATE_ACTIVE);
 
-	ut_ad(trx->state == TRX_STATE_NOT_STARTED
-	      || trx->state == TRX_STATE_ACTIVE);
+	ut_ad(!trx_is_started(trx) || trx->state == TRX_STATE_ACTIVE);
 
 	ut_ad(prebuilt->sql_stat_start
 	      || prebuilt->select_lock_type != LOCK_NONE
-	      || trx->read_view);
+	      || MVCC::is_view_active(trx->read_view)
+	      || srv_read_only_mode);
 
-	trx_start_if_not_started(trx);
+	trx_start_if_not_started(trx, false);
 
 	if (trx->isolation_level <= TRX_ISO_READ_COMMITTED
 	    && prebuilt->select_lock_type != LOCK_NONE
@@ -4066,11 +4867,17 @@ release_search_latch_if_needed:
 	naturally moves upward (in fetch next) in alphabetical order,
 	otherwise downward */
 
-	if (UNIV_UNLIKELY(direction == 0)) {
-		if (mode == PAGE_CUR_GE || mode == PAGE_CUR_G) {
+	if (direction == 0) {
+
+		if (mode == PAGE_CUR_GE
+		    || mode == PAGE_CUR_G
+		    || mode >= PAGE_CUR_CONTAIN) {
+
 			moves_up = TRUE;
 		}
+
 	} else if (direction == ROW_SEL_NEXT) {
+
 		moves_up = TRUE;
 	}
 
@@ -4085,14 +4892,13 @@ release_search_latch_if_needed:
 	if (!prebuilt->sql_stat_start) {
 		/* No need to set an intention lock or assign a read view */
 
-		if (UNIV_UNLIKELY
-		    (trx->read_view == NULL
-		     && prebuilt->select_lock_type == LOCK_NONE)) {
+		if (!MVCC::is_view_active(trx->read_view)
+		    && !srv_read_only_mode
+		    && prebuilt->select_lock_type == LOCK_NONE) {
 
-			fputs("InnoDB: Error: MySQL is trying to"
-			      " perform a consistent read\n"
-			      "InnoDB: but the read view is not assigned!\n",
-			      stderr);
+			ib::error() << "MySQL is trying to perform a"
+				" consistent read but the read view is not"
+				" assigned!";
 			trx_print(stderr, trx, 600);
 			fputc('\n', stderr);
 			ut_error;
@@ -4101,7 +4907,10 @@ release_search_latch_if_needed:
 		/* This is a consistent read */
 		/* Assign a read view for the query */
 
-		trx_assign_read_view(trx);
+		if (!srv_read_only_mode) {
+			trx_assign_read_view(trx);
+		}
+
 		prebuilt->sql_stat_start = FALSE;
 	} else {
 wait_table_again:
@@ -4120,6 +4929,12 @@ wait_table_again:
 	/* Open or restore index cursor position */
 
 	if (UNIV_LIKELY(direction != 0)) {
+		if (spatial_search) {
+			/* R-Tree access does not need to do
+			cursor position and resposition */
+			goto next_rec;
+		}
+
 		ibool	need_to_process = sel_restore_position_for_mysql(
 			&same_user_rec, BTR_SEARCH_LEAF,
 			pcur, moves_up, &mtr);
@@ -4146,6 +4961,34 @@ wait_table_again:
 		}
 
 	} else if (dtuple_get_n_fields(search_tuple) > 0) {
+		pcur->btr_cur.thr = thr;
+
+		if (dict_index_is_spatial(index)) {
+			bool	need_pred_lock;
+
+			need_pred_lock = (set_also_gap_locks
+					  && !(srv_locks_unsafe_for_binlog
+					      || trx->isolation_level
+						 <= TRX_ISO_READ_COMMITTED)
+					  && prebuilt->select_lock_type
+						 != LOCK_NONE);
+
+			if (!prebuilt->rtr_info) {
+				prebuilt->rtr_info = rtr_create_rtr_info(
+					need_pred_lock, true,
+					btr_pcur_get_btr_cur(pcur), index);
+				prebuilt->rtr_info->search_tuple = search_tuple;
+				prebuilt->rtr_info->search_mode = mode;
+				rtr_info_update_btr(btr_pcur_get_btr_cur(pcur),
+						    prebuilt->rtr_info);
+			} else {
+				rtr_info_reinit_in_cursor(
+					btr_pcur_get_btr_cur(pcur),
+					index, need_pred_lock);
+				prebuilt->rtr_info->search_tuple = search_tuple;
+				prebuilt->rtr_info->search_mode = mode;
+			}
+		}
 
 		err = btr_pcur_open_with_no_init(index, search_tuple, mode,
 					   	 BTR_SEARCH_LEAF,
@@ -4165,7 +5008,8 @@ wait_table_again:
 		    && set_also_gap_locks
 		    && !(srv_locks_unsafe_for_binlog
 			 || trx->isolation_level <= TRX_ISO_READ_COMMITTED)
-		    && prebuilt->select_lock_type != LOCK_NONE) {
+		    && prebuilt->select_lock_type != LOCK_NONE
+		    && !dict_index_is_spatial(index)) {
 
 			/* Try to place a gap lock on the next index record
 			to prevent phantoms in ORDER BY ... DESC queries */
@@ -4173,10 +5017,10 @@ wait_table_again:
 
 			offsets = rec_get_offsets(next_rec, index, offsets,
 						  ULINT_UNDEFINED, &heap);
-			err = sel_set_rec_lock(btr_pcur_get_block(pcur),
+			err = sel_set_rec_lock(pcur,
 					       next_rec, index, offsets,
 					       prebuilt->select_lock_type,
-					       LOCK_GAP, thr);
+					       LOCK_GAP, thr, &mtr);
 
 			switch (err) {
 			case DB_SUCCESS_LOCKED_REC:
@@ -4210,7 +5054,9 @@ wait_table_again:
 rec_loop:
 	DEBUG_SYNC_C("row_search_rec_loop");
 	if (trx_is_interrupted(trx)) {
-		btr_pcur_store_position(pcur, &mtr);
+		if (!spatial_search) {
+			btr_pcur_store_position(pcur, &mtr);
+		}
 		err = DB_INTERRUPTED;
 		goto normal_return;
 	}
@@ -4226,17 +5072,6 @@ rec_loop:
 	}
 
 	ut_ad(!!page_rec_is_comp(rec) == comp);
-#ifdef UNIV_SEARCH_DEBUG
-	/*
-	fputs("Using ", stderr);
-	dict_index_name_print(stderr, trx, index);
-	fprintf(stderr, " cnt %lu ; Page no %lu\n", cnt,
-	page_get_page_no(page_align(rec)));
-	rec_print(stderr, rec, index);
-	printf("delete-mark: %lu\n",
-	       rec_get_deleted_flag(rec, page_rec_is_comp(rec)));
-	*/
-#endif /* UNIV_SEARCH_DEBUG */
 
 	if (page_rec_is_infimum(rec)) {
 
@@ -4252,7 +5087,8 @@ rec_loop:
 		if (set_also_gap_locks
 		    && !(srv_locks_unsafe_for_binlog
 			 || trx->isolation_level <= TRX_ISO_READ_COMMITTED)
-		    && prebuilt->select_lock_type != LOCK_NONE) {
+		    && prebuilt->select_lock_type != LOCK_NONE
+		    && !dict_index_is_spatial(index)) {
 
 			/* Try to place a lock on the index record */
 
@@ -4263,10 +5099,10 @@ rec_loop:
 
 			offsets = rec_get_offsets(rec, index, offsets,
 						  ULINT_UNDEFINED, &heap);
-			err = sel_set_rec_lock(btr_pcur_get_block(pcur),
+			err = sel_set_rec_lock(pcur,
 					       rec, index, offsets,
 					       prebuilt->select_lock_type,
-					       LOCK_ORDINARY, thr);
+					       LOCK_ORDINARY, thr, &mtr);
 
 			switch (err) {
 			case DB_SUCCESS_LOCKED_REC:
@@ -4277,6 +5113,7 @@ rec_loop:
 				goto lock_wait_or_error;
 			}
 		}
+
 		/* A page supremum record cannot be in the result set: skip
 		it now that we have placed a possible lock on it */
 
@@ -4305,27 +5142,22 @@ rec_loop:
 
 wrong_offs:
 		if (srv_force_recovery == 0 || moves_up == FALSE) {
-			ut_print_timestamp(stderr);
-			buf_page_print(page_align(rec), 0,
-				       BUF_PAGE_PRINT_NO_CRASH);
-			fprintf(stderr,
-				"\nInnoDB: rec address %p,"
-				" buf block fix count %lu\n",
-				(void*) rec, (ulong)
-				btr_cur_get_block(btr_pcur_get_btr_cur(pcur))
-				->page.buf_fix_count);
-			fprintf(stderr,
-				"InnoDB: Index corruption: rec offs %lu"
-				" next offs %lu, page no %lu,\n"
-				"InnoDB: ",
-				(ulong) page_offset(rec),
-				(ulong) next_offs,
-				(ulong) page_get_page_no(page_align(rec)));
-			dict_index_name_print(stderr, trx, index);
-			fputs(". Run CHECK TABLE. You may need to\n"
-			      "InnoDB: restore from a backup, or"
-			      " dump + drop + reimport the table.\n",
-			      stderr);
+			ib::error() << "Rec address "
+				<< static_cast<const void*>(rec)
+				<< ", buf block fix count "
+				<< btr_cur_get_block(
+					btr_pcur_get_btr_cur(pcur))->page
+					.buf_fix_count;
+
+			ib::error() << "Index corruption: rec offs "
+				<< page_offset(rec) << " next offs "
+				<< next_offs << ", page no "
+				<< page_get_page_no(page_align(rec))
+				<< ", index " << index->name
+				<< " of table " << index->table->name
+				<< ". Run CHECK TABLE. You may need to"
+				" restore from a backup, or dump + drop +"
+				" reimport the table.";
 			ut_ad(0);
 			err = DB_CORRUPTION;
 
@@ -4334,16 +5166,13 @@ wrong_offs:
 			/* The user may be dumping a corrupt table. Jump
 			over the corruption to recover as much as possible. */
 
-			fprintf(stderr,
-				"InnoDB: Index corruption: rec offs %lu"
-				" next offs %lu, page no %lu,\n"
-				"InnoDB: ",
-				(ulong) page_offset(rec),
-				(ulong) next_offs,
-				(ulong) page_get_page_no(page_align(rec)));
-			dict_index_name_print(stderr, trx, index);
-			fputs(". We try to skip the rest of the page.\n",
-			      stderr);
+			ib::info() << "Index corruption: rec offs "
+				<< page_offset(rec) << " next offs "
+				<< next_offs << ", page no "
+				<< page_get_page_no(page_align(rec))
+				<< ", index " << index->name
+				<< " of table " << index->table->name
+				<< ". We try to skip the rest of the page.";
 
 			btr_pcur_move_to_last_on_page(pcur, &mtr);
 
@@ -4354,7 +5183,7 @@ wrong_offs:
 
 	/* Calculate the 'offsets' associated with 'rec' */
 
-	ut_ad(fil_page_get_type(btr_pcur_get_page(pcur)) == FIL_PAGE_INDEX);
+	ut_ad(fil_page_index_page_check(btr_pcur_get_page(pcur)));
 	ut_ad(btr_page_get_index_id(btr_pcur_get_page(pcur)) == index->id);
 
 	offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
@@ -4362,16 +5191,14 @@ wrong_offs:
 	if (UNIV_UNLIKELY(srv_force_recovery > 0)) {
 		if (!rec_validate(rec, offsets)
 		    || !btr_index_rec_validate(rec, index, FALSE)) {
-			fprintf(stderr,
-				"InnoDB: Index corruption: rec offs %lu"
-				" next offs %lu, page no %lu,\n"
-				"InnoDB: ",
-				(ulong) page_offset(rec),
-				(ulong) next_offs,
-				(ulong) page_get_page_no(page_align(rec)));
-			dict_index_name_print(stderr, trx, index);
-			fputs(". We try to skip the record.\n",
-			      stderr);
+
+			ib::info() << "Index corruption: rec offs "
+				<< page_offset(rec) << " next offs "
+				<< next_offs << ", page no "
+				<< page_get_page_no(page_align(rec))
+				<< ", index " << index->name
+				<< " of table " << index->table->name
+				<< ". We try to skip the record.";
 
 			goto next_rec;
 		}
@@ -4394,7 +5221,8 @@ wrong_offs:
 			    && !(srv_locks_unsafe_for_binlog
 				 || trx->isolation_level
 				 <= TRX_ISO_READ_COMMITTED)
-			    && prebuilt->select_lock_type != LOCK_NONE) {
+			    && prebuilt->select_lock_type != LOCK_NONE
+			    && !dict_index_is_spatial(index)) {
 
 				/* Try to place a gap lock on the index
 				record only if innodb_locks_unsafe_for_binlog
@@ -4402,10 +5230,10 @@ wrong_offs:
 				using a READ COMMITTED or lower isolation level. */
 
 				err = sel_set_rec_lock(
-					btr_pcur_get_block(pcur),
+					pcur,
 					rec, index, offsets,
 					prebuilt->select_lock_type, LOCK_GAP,
-					thr);
+					thr, &mtr);
 
 				switch (err) {
 				case DB_SUCCESS_LOCKED_REC:
@@ -4427,11 +5255,6 @@ wrong_offs:
 			pcur->rel_pos = BTR_PCUR_BEFORE;
 
 			err = DB_RECORD_NOT_FOUND;
-#if 0
-			ut_print_name(stderr, trx, FALSE, index->name);
-			fputs(" record not found 3\n", stderr);
-#endif
-
 			goto normal_return;
 		}
 
@@ -4443,7 +5266,8 @@ wrong_offs:
 			    && !(srv_locks_unsafe_for_binlog
 				 || trx->isolation_level
 				 <= TRX_ISO_READ_COMMITTED)
-			    && prebuilt->select_lock_type != LOCK_NONE) {
+			    && prebuilt->select_lock_type != LOCK_NONE
+			    && !dict_index_is_spatial(index)) {
 
 				/* Try to place a gap lock on the index
 				record only if innodb_locks_unsafe_for_binlog
@@ -4451,10 +5275,10 @@ wrong_offs:
 				using a READ COMMITTED or lower isolation level. */
 
 				err = sel_set_rec_lock(
-					btr_pcur_get_block(pcur),
+					pcur,
 					rec, index, offsets,
 					prebuilt->select_lock_type, LOCK_GAP,
-					thr);
+					thr, &mtr);
 
 				switch (err) {
 				case DB_SUCCESS_LOCKED_REC:
@@ -4476,11 +5300,6 @@ wrong_offs:
 			pcur->rel_pos = BTR_PCUR_BEFORE;
 
 			err = DB_RECORD_NOT_FOUND;
-#if 0
-			ut_print_name(stderr, trx, FALSE, index->name);
-			fputs(" record not found 4\n", stderr);
-#endif
-
 			goto normal_return;
 		}
 	}
@@ -4504,7 +5323,8 @@ wrong_offs:
 		if (!set_also_gap_locks
 		    || srv_locks_unsafe_for_binlog
 		    || trx->isolation_level <= TRX_ISO_READ_COMMITTED
-		    || (unique_search && !rec_get_deleted_flag(rec, comp))) {
+		    || (unique_search && !rec_get_deleted_flag(rec, comp))
+		    || dict_index_is_spatial(index)) {
 
 			goto no_gap_lock;
 		} else {
@@ -4532,10 +5352,10 @@ no_gap_lock:
 			lock_type = LOCK_REC_NOT_GAP;
 		}
 
-		err = sel_set_rec_lock(btr_pcur_get_block(pcur),
+		err = sel_set_rec_lock(pcur,
 				       rec, index, offsets,
 				       prebuilt->select_lock_type,
-				       lock_type, thr);
+				       lock_type, thr, &mtr);
 
 		switch (err) {
 			const rec_t*	old_vers;
@@ -4551,6 +5371,9 @@ no_gap_lock:
 		case DB_SUCCESS:
 			break;
 		case DB_LOCK_WAIT:
+			/* Lock wait for R-tree should already
+			be handled in sel_set_rtr_rec_lock() */
+			ut_ad(!dict_index_is_spatial(index));
 			/* Never unlock rows that were part of a conflict. */
 			prebuilt->new_rec_locks = 0;
 
@@ -4566,13 +5389,15 @@ no_gap_lock:
 			associated with 'old_vers' */
 			row_sel_build_committed_vers_for_mysql(
 				clust_index, prebuilt, rec,
-				&offsets, &heap, &old_vers, &mtr);
+				&offsets, &heap, &old_vers, need_vrow ? &vrow : NULL,
+			        &mtr);
 
 			/* Check whether it was a deadlock or not, if not
 			a deadlock and the transaction had to wait then
 			release the lock it is waiting on. */
 
-			err = lock_trx_handle_wait(trx);
+			trx->abort_type = TRX_SERVER_ABORT;
+			err = lock_trx_handle_wait(trx, false, false);
 
 			switch (err) {
 			case DB_SUCCESS:
@@ -4587,6 +5412,7 @@ no_gap_lock:
 			case DB_DEADLOCK:
 				goto lock_wait_or_error;
 			case DB_LOCK_WAIT:
+				ut_ad(!dict_index_is_spatial(index));
 				err = DB_SUCCESS;
 				break;
 			default:
@@ -4602,6 +5428,13 @@ no_gap_lock:
 			did_semi_consistent_read = TRUE;
 			rec = old_vers;
 			break;
+		case DB_RECORD_NOT_FOUND:
+			if (dict_index_is_spatial(index)) {
+				goto next_rec;
+			} else {
+				goto lock_wait_or_error;
+			}
+
 		default:
 
 			goto lock_wait_or_error;
@@ -4622,9 +5455,10 @@ no_gap_lock:
 			high force recovery level set, we try to avoid crashes
 			by skipping this lookup */
 
-			if (UNIV_LIKELY(srv_force_recovery < 5)
+			if (srv_force_recovery < 5
 			    && !lock_clust_rec_cons_read_sees(
-				    rec, index, offsets, trx->read_view)) {
+				    rec, index, offsets,
+				    trx_get_read_view(trx))) {
 
 				rec_t*	old_vers;
 				/* The following call returns 'offsets'
@@ -4632,7 +5466,8 @@ no_gap_lock:
 				err = row_sel_build_prev_vers_for_mysql(
 					trx->read_view, clust_index,
 					prebuilt, rec, &offsets, &heap,
-					&old_vers, &mtr);
+					&old_vers, need_vrow ? &vrow : NULL,
+					&mtr);
 
 				if (err != DB_SUCCESS) {
 
@@ -4657,8 +5492,9 @@ no_gap_lock:
 
 			ut_ad(!dict_index_is_clust(index));
 
-			if (!lock_sec_rec_cons_read_sees(
-				    rec, trx->read_view)) {
+			if (!srv_read_only_mode
+			    && !lock_sec_rec_cons_read_sees(
+					rec, index, trx->read_view)) {
 				/* We should look at the clustered index.
 				However, as this is a non-locking read,
 				we can skip the clustered index lookup if
@@ -4819,18 +5655,22 @@ requires_clust_rec:
 
 		mtr_has_extra_clust_latch = TRUE;
 
+		ut_ad(!vrow);
 		/* The following call returns 'offsets' associated with
 		'clust_rec'. Note that 'clust_rec' can be an old version
 		built for a consistent read. */
 
 		err = row_sel_get_clust_rec_for_mysql(prebuilt, index, rec,
 						      thr, &clust_rec,
-						      &offsets, &heap, &mtr);
+						      &offsets, &heap,
+						      need_vrow ? &vrow : NULL,
+						      &mtr);
 		switch (err) {
 		case DB_SUCCESS:
 			if (clust_rec == NULL) {
 				/* The record did not exist in the read view */
-				ut_ad(prebuilt->select_lock_type == LOCK_NONE);
+				ut_ad(prebuilt->select_lock_type == LOCK_NONE
+				      || dict_index_is_spatial(index));
 
 				goto next_rec;
 			}
@@ -4847,6 +5687,7 @@ requires_clust_rec:
 			err = DB_SUCCESS;
 			break;
 		default:
+			vrow = NULL;
 			goto lock_wait_or_error;
 		}
 
@@ -4868,6 +5709,13 @@ requires_clust_rec:
 			goto next_rec;
 		}
 
+		if (need_vrow && !vrow) {
+			if (!heap) {
+				heap = mem_heap_create(100);
+			}
+			row_sel_fill_vrow(rec, index, &vrow, heap);
+		}
+
 		result_rec = clust_rec;
 		ut_ad(rec_offs_validate(result_rec, clust_index, offsets));
 
@@ -4885,7 +5733,7 @@ requires_clust_rec:
 			authoritative case is in result_rec, the
 			appropriate version of the clustered index record. */
 			if (!row_sel_store_mysql_rec(
-				    buf, prebuilt, result_rec,
+				    buf, prebuilt, result_rec, vrow,
 				    TRUE, clust_index, offsets)) {
 				goto next_rec;
 			}
@@ -4902,19 +5750,21 @@ requires_clust_rec:
 				offsets));
 	ut_ad(!rec_get_deleted_flag(result_rec, comp));
 
-	/* At this point, the clustered index record is protected
+	/* Decide whether to prefetch extra rows.
+	At this point, the clustered index record is protected
 	by a page latch that was acquired when pcur was positioned.
 	The latch will not be released until mtr_commit(&mtr). */
 
 	if ((match_mode == ROW_SEL_EXACT
 	     || prebuilt->n_rows_fetched >= MYSQL_FETCH_CACHE_THRESHOLD)
 	    && prebuilt->select_lock_type == LOCK_NONE
+	    && !prebuilt->m_no_prefetch
 	    && !prebuilt->templ_contains_blob
+	    && !prebuilt->templ_contains_fixed_point
 	    && !prebuilt->clust_index_was_generated
 	    && !prebuilt->used_in_HANDLER
 	    && !prebuilt->innodb_api
-	    && prebuilt->template_type
-	    != ROW_MYSQL_DUMMY_TEMPLATE
+	    && prebuilt->template_type != ROW_MYSQL_DUMMY_TEMPLATE
 	    && !prebuilt->in_fts_query) {
 
 		/* Inside an update, for example, we do not cache rows,
@@ -4951,7 +5801,7 @@ requires_clust_rec:
 				 ? row_sel_fetch_last_buf(prebuilt) : buf;
 
 			if (!row_sel_store_mysql_rec(
-				next_buf, prebuilt, result_rec,
+				next_buf, prebuilt, result_rec, vrow,
 				result_rec != rec,
 				result_rec != rec ? clust_index : index,
 				offsets)) {
@@ -5006,7 +5856,7 @@ requires_clust_rec:
 		} else if (!prebuilt->idx_cond && !prebuilt->innodb_api) {
 			/* The record was not yet converted to MySQL format. */
 			if (!row_sel_store_mysql_rec(
-				    buf, prebuilt, result_rec,
+				    buf, prebuilt, result_rec, vrow,
 				    result_rec != rec,
 				    result_rec != rec ? clust_index : index,
 				    offsets)) {
@@ -5051,7 +5901,9 @@ idx_cond_failed:
 
 		/* Inside an update always store the cursor position */
 
-		btr_pcur_store_position(pcur, &mtr);
+		if (!spatial_search) {
+			btr_pcur_store_position(pcur, &mtr);
+		}
 
 		if (prebuilt->innodb_api) {
 			prebuilt->innodb_api_rec = result_rec;
@@ -5068,6 +5920,7 @@ next_rec:
 	}
 	did_semi_consistent_read = FALSE;
 	prebuilt->new_rec_locks = 0;
+	vrow = NULL;
 
 	/*-------------------------------------------------------------*/
 	/* PHASE 5: Move the cursor to the next index record */
@@ -5082,35 +5935,50 @@ next_rec:
 	the cursor. What prevents us from buffer-fixing all leaf pages
 	within the mini-transaction is the btr_leaf_page_release()
 	call in btr_pcur_move_to_next_page(). Only the leaf page where
-	the cursor is positioned will remain buffer-fixed. */
+	the cursor is positioned will remain buffer-fixed.
+	For R-tree spatial search, we also commit the mini-transaction
+	each time  */
 
-	if (UNIV_UNLIKELY(mtr_has_extra_clust_latch)) {
-		/* We must commit mtr if we are moving to the next
-		non-clustered index record, because we could break the
-		latching order if we would access a different clustered
+	if (mtr_has_extra_clust_latch || spatial_search) {
+		/* If we have extra cluster latch, we must commit
+		mtr if we are moving to the next non-clustered
+		index record, because we could break the latching
+		order if we would access a different clustered
 		index page right away without releasing the previous. */
 
-		btr_pcur_store_position(pcur, &mtr);
+		/* No need to do store restore for R-tree */
+		if (!spatial_search) {
+			btr_pcur_store_position(pcur, &mtr);
+		}
 
 		mtr_commit(&mtr);
 		mtr_has_extra_clust_latch = FALSE;
 
-		mtr_start_trx(&mtr, trx);
-		if (sel_restore_position_for_mysql(&same_user_rec,
+		mtr_start(&mtr);
+
+		if (!spatial_search
+		    && sel_restore_position_for_mysql(&same_user_rec,
 						   BTR_SEARCH_LEAF,
 						   pcur, moves_up, &mtr)) {
-#ifdef UNIV_SEARCH_DEBUG
-			cnt++;
-#endif /* UNIV_SEARCH_DEBUG */
-
 			goto rec_loop;
 		}
 	}
 
 	if (moves_up) {
-		if (UNIV_UNLIKELY(!btr_pcur_move_to_next(pcur, &mtr))) {
+		bool	 move;
+
+		if (spatial_search) {
+			move = rtr_pcur_move_to_next(
+				search_tuple, mode, pcur, 0, &mtr);
+		} else {
+			move = btr_pcur_move_to_next(pcur, &mtr);
+		}
+
+		if (!move) {
 not_moved:
-			btr_pcur_store_position(pcur, &mtr);
+			if (!spatial_search) {
+				btr_pcur_store_position(pcur, &mtr);
+			}
 
 			if (match_mode != 0) {
 				err = DB_RECORD_NOT_FOUND;
@@ -5126,10 +5994,6 @@ not_moved:
 		}
 	}
 
-#ifdef UNIV_SEARCH_DEBUG
-	cnt++;
-#endif /* UNIV_SEARCH_DEBUG */
-
 	goto rec_loop;
 
 lock_wait_or_error:
@@ -5141,9 +6005,10 @@ lock_wait_or_error:
 	did_semi_consistent_read = FALSE;
 
 	/*-------------------------------------------------------------*/
-
-	if (rec) {
-		btr_pcur_store_position(pcur, &mtr);
+	if (!dict_index_is_spatial(index)) {
+		if (rec) {
+			btr_pcur_store_position(pcur, &mtr);
+		}
 	}
 
 lock_table_wait:
@@ -5154,7 +6019,9 @@ lock_table_wait:
 
 	/* The following is a patch for MySQL */
 
-	que_thr_stop_for_mysql(thr);
+	if (thr->is_active) {
+		que_thr_stop_for_mysql(thr);
+	}
 
 	thr->lock_state = QUE_THR_LOCK_ROW;
 
@@ -5162,7 +6029,7 @@ lock_table_wait:
 		/* It was a lock wait, and it ended */
 
 		thr->lock_state = QUE_THR_LOCK_NOLOCK;
-		mtr_start_trx(&mtr, trx);
+		mtr_start(&mtr);
 
 		/* Table lock waited, go try to obtain table lock
 		again */
@@ -5172,9 +6039,11 @@ lock_table_wait:
 			goto wait_table_again;
 		}
 
-		sel_restore_position_for_mysql(&same_user_rec,
-					       BTR_SEARCH_LEAF, pcur,
-					       moves_up, &mtr);
+		if (!dict_index_is_spatial(index)) {
+			sel_restore_position_for_mysql(
+				&same_user_rec, BTR_SEARCH_LEAF, pcur,
+				moves_up, &mtr);
+		}
 
 		if ((srv_locks_unsafe_for_binlog
 		     || trx->isolation_level <= TRX_ISO_READ_COMMITTED)
@@ -5206,11 +6075,6 @@ lock_table_wait:
 
 	thr->lock_state = QUE_THR_LOCK_NOLOCK;
 
-#ifdef UNIV_SEARCH_DEBUG
-	/*	fputs("Using ", stderr);
-	dict_index_name_print(stderr, index);
-	fprintf(stderr, " cnt %lu ret value %lu err\n", cnt, err); */
-#endif /* UNIV_SEARCH_DEBUG */
 	goto func_exit;
 
 normal_return:
@@ -5219,6 +6083,17 @@ normal_return:
 
 	mtr_commit(&mtr);
 
+	/* Rollback blocking transactions from hit list for high priority
+	transaction, if any. We should not be holding latches here as
+	we are going to rollback the blocking transactions. */
+	if (!trx->hit_list.empty()) {
+
+		ut_ad(trx_is_high_priority(trx));
+		trx_kill_blocking(trx);
+	}
+
+	DEBUG_SYNC_C("row_search_for_mysql_before_return");
+
 	if (prebuilt->idx_cond != 0) {
 
 		/* When ICP is active we don't write to the MySQL buffer
@@ -5241,15 +6116,18 @@ normal_return:
 		err = DB_SUCCESS;
 	}
 
-#ifdef UNIV_SEARCH_DEBUG
-	/*	fputs("Using ", stderr);
-	dict_index_name_print(stderr, index);
-	fprintf(stderr, " cnt %lu ret value %lu err\n", cnt, err); */
-#endif /* UNIV_SEARCH_DEBUG */
+#ifdef UNIV_DEBUG
+	if (dict_index_is_spatial(index) && err != DB_SUCCESS
+	    && err != DB_END_OF_INDEX && err != DB_INTERRUPTED) {
+		rtr_node_path_t*	path = pcur->btr_cur.rtr_info->path;
+
+		ut_ad(path->empty());
+	}
+#endif
 
 func_exit:
 	trx->op_info = "";
-	if (UNIV_LIKELY_NULL(heap)) {
+	if (heap != NULL) {
 		mem_heap_free(heap);
 	}
 
@@ -5259,28 +6137,148 @@ func_exit:
 	ut_ad(prebuilt->row_read_type != ROW_READ_WITH_LOCKS
 	      || !did_semi_consistent_read);
 
-	if (UNIV_UNLIKELY(prebuilt->row_read_type != ROW_READ_WITH_LOCKS)) {
-		if (UNIV_UNLIKELY(did_semi_consistent_read)) {
+	if (prebuilt->row_read_type != ROW_READ_WITH_LOCKS) {
+		if (did_semi_consistent_read) {
 			prebuilt->row_read_type = ROW_READ_DID_SEMI_CONSISTENT;
 		} else {
 			prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
 		}
 	}
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch));
-#endif /* UNIV_SYNC_DEBUG */
+#ifdef UNIV_DEBUG
+	{
+		btrsea_sync_check	check(trx->has_search_latch);
+
+		ut_ad(!sync_check_iterate(check));
+	}
+#endif /* UNIV_DEBUG */
 
 	DEBUG_SYNC_C("innodb_row_search_for_mysql_exit");
 
-	return(err);
+	DBUG_RETURN(err);
+}
+
+/********************************************************************//**
+Count rows in a R-Tree leaf level.
+@return DB_SUCCESS if successful */
+dberr_t
+row_count_rtree_recs(
+/*=================*/
+	row_prebuilt_t*	prebuilt,	/*!< in: prebuilt struct for the
+					table handle; this contains the info
+					of search_tuple, index; if search
+					tuple contains 0 fields then we
+					position the cursor at the start or
+					the end of the index, depending on
+					'mode' */
+	ulint*		n_rows)		/*!< out: number of entries
+					seen in the consistent read */
+{
+	dict_index_t*	index		= prebuilt->index;
+	dberr_t		ret		= DB_SUCCESS;
+	mtr_t		mtr;
+	mem_heap_t*	heap;
+	dtuple_t*	entry;
+	dtuple_t*	search_entry	= prebuilt->search_tuple;
+	ulint		entry_len;
+	ulint		i;
+	byte*		buf;
+
+	ut_a(dict_index_is_spatial(index));
+
+	*n_rows = 0;
+
+	heap = mem_heap_create(256);
+
+	/* Build a search tuple. */
+	entry_len = dict_index_get_n_fields(index);
+	entry = dtuple_create(heap, entry_len);
+
+	for (i = 0; i < entry_len; i++) {
+		const dict_field_t*	ind_field
+			= dict_index_get_nth_field(index, i);
+		const dict_col_t*	col
+			= ind_field->col;
+		dfield_t*		dfield
+			= dtuple_get_nth_field(entry, i);
+
+		if (i == 0) {
+			double*	mbr;
+			double	tmp_mbr[SPDIMS * 2];
+
+			dfield->type.mtype = DATA_GEOMETRY;
+			dfield->type.prtype |= DATA_GIS_MBR;
+
+			/* Allocate memory for mbr field */
+			mbr = static_cast<double*>
+				(mem_heap_alloc(heap, DATA_MBR_LEN));
+
+			/* Set mbr field data. */
+			dfield_set_data(dfield, mbr, DATA_MBR_LEN);
+
+			for (uint j = 0; j < SPDIMS; j++) {
+				tmp_mbr[j * 2] = DBL_MAX;
+				tmp_mbr[j * 2 + 1] = -DBL_MAX;
+			}
+			dfield_write_mbr(dfield, tmp_mbr);
+			continue;
+		}
+
+		dfield->type.mtype = col->mtype;
+		dfield->type.prtype = col->prtype;
+
+	}
+
+	prebuilt->search_tuple = entry;
+
+	ulint bufsize = ut_max(UNIV_PAGE_SIZE, prebuilt->mysql_row_len);
+	buf = static_cast<byte*>(ut_malloc_nokey(bufsize));
+
+	ulint cnt = 1000;
+
+	ret = row_search_for_mysql(buf, PAGE_CUR_WITHIN, prebuilt, 0, 0);
+loop:
+	/* Check thd->killed every 1,000 scanned rows */
+	if (--cnt == 0) {
+		if (trx_is_interrupted(prebuilt->trx)) {
+			ret = DB_INTERRUPTED;
+			goto func_exit;
+		}
+		cnt = 1000;
+	}
+
+	switch (ret) {
+	case DB_SUCCESS:
+		break;
+	case DB_DEADLOCK:
+	case DB_LOCK_TABLE_FULL:
+	case DB_LOCK_WAIT_TIMEOUT:
+	case DB_INTERRUPTED:
+		goto func_exit;
+	default:
+		/* fall through (this error is ignored by CHECK TABLE) */
+	case DB_END_OF_INDEX:
+		ret = DB_SUCCESS;
+func_exit:
+		prebuilt->search_tuple = search_entry;
+		ut_free(buf);
+		mem_heap_free(heap);
+
+		return(ret);
+	}
+
+	*n_rows = *n_rows + 1;
+
+	ret = row_search_for_mysql(
+		buf, PAGE_CUR_WITHIN, prebuilt, 0, ROW_SEL_NEXT);
+
+	goto loop;
 }
 
 /*******************************************************************//**
 Checks if MySQL at the moment is allowed for this table to retrieve a
 consistent read result, or store it to the query cache.
-@return	TRUE if storing or retrieving from the query cache is permitted */
-UNIV_INTERN
+@return TRUE if storing or retrieving from the query cache is permitted */
 ibool
 row_search_check_if_query_cache_permitted(
 /*======================================*/
@@ -5291,22 +6289,8 @@ row_search_check_if_query_cache_permitted(
 	dict_table_t*	table;
 	ibool		ret	= FALSE;
 
-	/* Disable query cache altogether for all tables if recovered XA
-	transactions in prepared state exist. This is because we do not
-	restore the table locks for those transactions and we may wrongly
-	set ret=TRUE above if "lock_table_get_n_locks(table) == 0". See
-	"Bug#14658648 XA ROLLBACK (DISTRIBUTED DATABASE) NOT WORKING WITH
-	QUERY CACHE ENABLED".
-	Read trx_sys->n_prepared_recovered_trx without mutex protection,
-	not possible to end up with a torn read since n_prepared_recovered_trx
-	is word size. */
-	if (trx_sys->n_prepared_recovered_trx > 0) {
-
-		return(FALSE);
-	}
-
-	table = dict_table_open_on_name(norm_name, FALSE, FALSE,
-					DICT_ERR_IGNORE_NONE);
+	table = dict_table_open_on_name(
+		norm_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE);
 
 	if (table == NULL) {
 
@@ -5315,15 +6299,22 @@ row_search_check_if_query_cache_permitted(
 
 	/* Start the transaction if it is not started yet */
 
-	trx_start_if_not_started(trx);
+	trx_start_if_not_started(trx, false);
 
 	/* If there are locks on the table or some trx has invalidated the
-	cache up to our trx id, then ret = FALSE.
-	We do not check what type locks there are on the table, though only
-	IX type locks actually would require ret = FALSE. */
+	cache before this transaction started then this transaction cannot
+	read/write from/to the cache.
+
+	If a read view has not been created for the transaction then it doesn't
+	really matter what this transactin sees. If a read view was created
+	then the view low_limit_id is the max trx id that this transaction
+	saw at the time of the read view creation.  */
 
 	if (lock_table_get_n_locks(table) == 0
-	    && trx->id >= table->query_cache_inv_trx_id) {
+	    && ((trx->id != 0 && trx->id >= table->query_cache_inv_id)
+		|| !MVCC::is_view_active(trx->read_view)
+		|| trx->read_view->low_limit_id()
+		>= table->query_cache_inv_id)) {
 
 		ret = TRUE;
 
@@ -5331,12 +6322,10 @@ row_search_check_if_query_cache_permitted(
 		transaction if it does not yet have one */
 
 		if (trx->isolation_level >= TRX_ISO_REPEATABLE_READ
-		    && !trx->read_view) {
+		    && !srv_read_only_mode
+		    && !MVCC::is_view_active(trx->read_view)) {
 
-			trx->read_view = read_view_open_now(
-				trx->id, trx->global_read_view_heap);
-
-			trx->global_read_view = trx->read_view;
+			trx_sys->mvcc->view_open(trx->read_view, trx);
 		}
 	}
 
@@ -5348,7 +6337,7 @@ row_search_check_if_query_cache_permitted(
 /*******************************************************************//**
 Read the AUTOINC column from the current row. If the value is less than
 0 and the type is not unsigned then we reset the value to 0.
-@return	value read from the column */
+@return value read from the column */
 static
 ib_uint64_t
 row_search_autoinc_read_column(
@@ -5398,7 +6387,7 @@ row_search_autoinc_read_column(
 		ut_error;
 	}
 
-	if (!unsigned_type && (ib_int64_t) value < 0) {
+	if (!unsigned_type && static_cast<int64_t>(value) < 0) {
 		value = 0;
 	}
 
@@ -5450,7 +6439,6 @@ row_search_get_max_rec(
 Read the max AUTOINC value from an index.
 @return DB_SUCCESS if all OK else error code, DB_RECORD_NOT_FOUND if
 column name can't be found in index */
-UNIV_INTERN
 dberr_t
 row_search_max_autoinc(
 /*===================*/
diff --git a/storage/innobase/row/row0trunc.cc b/storage/innobase/row/row0trunc.cc
new file mode 100644
index 00000000000..25dc274b0c5
--- /dev/null
+++ b/storage/innobase/row/row0trunc.cc
@@ -0,0 +1,3096 @@
+/*****************************************************************************
+
+Copyright (c) 2013, 2016, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file row/row0trunc.cc
+TRUNCATE implementation
+
+Created 2013-04-12 Sunny Bains
+*******************************************************/
+
+#include "row0mysql.h"
+#include "pars0pars.h"
+#include "dict0crea.h"
+#include "dict0boot.h"
+#include "dict0stats.h"
+#include "dict0stats_bg.h"
+#include "lock0lock.h"
+#include "fts0fts.h"
+#include "fsp0sysspace.h"
+#include "srv0start.h"
+#include "row0trunc.h"
+#include "os0file.h"
+#include <vector>
+
+bool	truncate_t::s_fix_up_active = false;
+truncate_t::tables_t		truncate_t::s_tables;
+truncate_t::truncated_tables_t	truncate_t::s_truncated_tables;
+
+/**
+Iterator over the the raw records in an index, doesn't support MVCC. */
+class IndexIterator {
+
+public:
+	/**
+	Iterate over an indexes records
+	@param index		index to iterate over */
+	explicit IndexIterator(dict_index_t* index)
+		:
+		m_index(index)
+	{
+		/* Do nothing */
+	}
+
+	/**
+	Search for key. Position the cursor on a record GE key.
+	@return DB_SUCCESS or error code. */
+	dberr_t search(dtuple_t& key, bool noredo)
+	{
+		mtr_start(&m_mtr);
+
+		if (noredo) {
+			mtr_set_log_mode(&m_mtr, MTR_LOG_NO_REDO);
+		}
+
+		btr_pcur_open_on_user_rec(
+			m_index,
+			&key,
+			PAGE_CUR_GE,
+			BTR_MODIFY_LEAF,
+			&m_pcur, &m_mtr);
+
+		return(DB_SUCCESS);
+	}
+
+	/**
+	Iterate over all the records
+	@return DB_SUCCESS or error code */
+	template <typename Callback>
+	dberr_t for_each(Callback& callback)
+	{
+		dberr_t	err = DB_SUCCESS;
+
+		for (;;) {
+
+			if (!btr_pcur_is_on_user_rec(&m_pcur)
+			    || !callback.match(&m_mtr, &m_pcur)) {
+
+				/* The end of of the index has been reached. */
+				err = DB_END_OF_INDEX;
+				break;
+			}
+
+			rec_t*	rec = btr_pcur_get_rec(&m_pcur);
+
+			if (!rec_get_deleted_flag(rec, FALSE)) {
+
+				err = callback(&m_mtr, &m_pcur);
+
+				if (err != DB_SUCCESS) {
+					break;
+				}
+			}
+
+			btr_pcur_move_to_next_user_rec(&m_pcur, &m_mtr);
+		}
+
+		btr_pcur_close(&m_pcur);
+		mtr_commit(&m_mtr);
+
+		return(err == DB_END_OF_INDEX ? DB_SUCCESS : err);
+	}
+
+private:
+	// Disable copying
+	IndexIterator(const IndexIterator&);
+	IndexIterator& operator=(const IndexIterator&);
+
+private:
+	mtr_t		m_mtr;
+	btr_pcur_t	m_pcur;
+	dict_index_t*	m_index;
+};
+
+/** SysIndex table iterator, iterate over records for a table. */
+class SysIndexIterator {
+
+public:
+	/**
+	Iterate over all the records that match the table id.
+	@return DB_SUCCESS or error code */
+	template <typename Callback>
+	dberr_t for_each(Callback& callback) const
+	{
+		dict_index_t*	sys_index;
+		byte		buf[DTUPLE_EST_ALLOC(1)];
+		dtuple_t*	tuple =
+			dtuple_create_from_mem(buf, sizeof(buf), 1, 0);
+		dfield_t*	dfield = dtuple_get_nth_field(tuple, 0);
+
+		dfield_set_data(
+			dfield,
+			callback.table_id(),
+			sizeof(*callback.table_id()));
+
+		sys_index = dict_table_get_first_index(dict_sys->sys_indexes);
+
+		dict_index_copy_types(tuple, sys_index, 1);
+
+		IndexIterator	iterator(sys_index);
+
+		/* Search on the table id and position the cursor
+		on GE table_id. */
+		iterator.search(*tuple, callback.get_logging_status());
+
+		return(iterator.for_each(callback));
+	}
+};
+
+/** Generic callback abstract class. */
+class Callback
+{
+
+public:
+	/**
+	Constructor
+	@param	table_id		id of the table being operated.
+	@param	noredo			if true turn off logging. */
+	Callback(table_id_t table_id, bool noredo)
+		:
+		m_id(),
+		m_noredo(noredo)
+	{
+		/* Convert to storage byte order. */
+		mach_write_to_8(&m_id, table_id);
+	}
+
+	/**
+	Destructor */
+	virtual ~Callback()
+	{
+		/* Do nothing */
+	}
+
+	/**
+	@param mtr		mini-transaction covering the iteration
+	@param pcur		persistent cursor used for iteration
+	@return true if the table id column matches. */
+	bool match(mtr_t* mtr, btr_pcur_t* pcur) const
+	{
+		ulint		len;
+		const byte*	field;
+		rec_t*		rec = btr_pcur_get_rec(pcur);
+
+		field = rec_get_nth_field_old(
+			rec, DICT_FLD__SYS_INDEXES__TABLE_ID, &len);
+
+		ut_ad(len == 8);
+
+		return(memcmp(&m_id, field, len) == 0);
+	}
+
+	/**
+	@return pointer to table id storage format buffer */
+	const table_id_t* table_id() const
+	{
+		return(&m_id);
+	}
+
+	/**
+	@return	return if logging needs to be turned off. */
+	bool get_logging_status() const
+	{
+		return(m_noredo);
+	}
+
+protected:
+	// Disably copying
+	Callback(const Callback&);
+	Callback& operator=(const Callback&);
+
+protected:
+	/** Table id in storage format */
+	table_id_t		m_id;
+
+	/** Turn off logging. */
+	const bool		m_noredo;
+};
+
+/**
+Creates a TRUNCATE log record with space id, table name, data directory path,
+tablespace flags, table format, index ids, index types, number of index fields
+and index field information of the table. */
+class TruncateLogger : public Callback {
+
+public:
+	/**
+	Constructor
+
+	@param table	Table to truncate
+	@param flags	tablespace falgs */
+	TruncateLogger(
+		dict_table_t*	table,
+		ulint		flags,
+		table_id_t	new_table_id)
+		:
+		Callback(table->id, false),
+		m_table(table),
+		m_flags(flags),
+		m_truncate(table->id, new_table_id, table->data_dir_path),
+		m_log_file_name()
+	{
+		/* Do nothing */
+	}
+
+	/**
+	Initialize Truncate Logger by constructing Truncate Log File Name.
+
+	@return DB_SUCCESS or error code. */
+	dberr_t init()
+	{
+		/* Construct log file name. */
+		ulint	log_file_name_buf_sz =
+			strlen(srv_log_group_home_dir) + 22 + 22 + 1 /* NUL */
+			+ strlen(TruncateLogger::s_log_prefix)
+			+ strlen(TruncateLogger::s_log_ext);
+
+		m_log_file_name = UT_NEW_ARRAY_NOKEY(char, log_file_name_buf_sz);
+		if (m_log_file_name == NULL) {
+			return(DB_OUT_OF_MEMORY);
+		}
+		memset(m_log_file_name, 0, log_file_name_buf_sz);
+
+		strcpy(m_log_file_name, srv_log_group_home_dir);
+		ulint	log_file_name_len = strlen(m_log_file_name);
+		if (m_log_file_name[log_file_name_len - 1]
+			!= OS_PATH_SEPARATOR) {
+
+			m_log_file_name[log_file_name_len]
+				= OS_PATH_SEPARATOR;
+			log_file_name_len = strlen(m_log_file_name);
+		}
+
+		ut_snprintf(m_log_file_name + log_file_name_len,
+			    log_file_name_buf_sz - log_file_name_len,
+			    "%s%lu_%lu_%s",
+			    TruncateLogger::s_log_prefix,
+			    (ulong) m_table->space,
+			    (ulong) m_table->id,
+			    TruncateLogger::s_log_ext);
+
+		return(DB_SUCCESS);
+
+	}
+
+	/**
+	Destructor */
+	~TruncateLogger()
+	{
+		if (m_log_file_name != NULL) {
+			bool exist;
+			os_file_delete_if_exists(
+				innodb_log_file_key, m_log_file_name, &exist);
+			UT_DELETE_ARRAY(m_log_file_name);
+			m_log_file_name = NULL;
+		}
+	}
+
+	/**
+	@param mtr	mini-transaction covering the read
+	@param pcur	persistent cursor used for reading
+	@return DB_SUCCESS or error code */
+	dberr_t operator()(mtr_t* mtr, btr_pcur_t* pcur);
+
+	/** Called after iteratoring over the records.
+	@return true if invariant satisfied. */
+	bool debug() const
+	{
+		/* We must find all the index entries on disk. */
+		return(UT_LIST_GET_LEN(m_table->indexes)
+		       == m_truncate.indexes());
+	}
+
+	/**
+	Write the TRUNCATE log
+	@return DB_SUCCESS or error code */
+	dberr_t log() const
+	{
+		dberr_t	err = DB_SUCCESS;
+
+		if (m_log_file_name == 0) {
+			return(DB_ERROR);
+		}
+
+		bool		ret;
+		os_file_t	handle = os_file_create(
+			innodb_log_file_key, m_log_file_name,
+			OS_FILE_CREATE, OS_FILE_NORMAL,
+			OS_LOG_FILE, srv_read_only_mode, &ret);
+		if (!ret) {
+			return(DB_IO_ERROR);
+		}
+
+
+		ulint	sz = UNIV_PAGE_SIZE;
+		void*	buf = ut_zalloc_nokey(sz + UNIV_PAGE_SIZE);
+		if (buf == 0) {
+			os_file_close(handle);
+			return(DB_OUT_OF_MEMORY);
+		}
+
+		/* Align the memory for file i/o if we might have O_DIRECT set*/
+		byte*	log_buf = static_cast<byte*>(
+			ut_align(buf, UNIV_PAGE_SIZE));
+
+		lsn_t	lsn = log_get_lsn();
+
+		/* Generally loop should exit in single go but
+		just for those 1% of rare cases we need to assume
+		corner case. */
+		do {
+			/* First 4 bytes are reserved for magic number
+			which is currently 0. */
+			err = m_truncate.write(
+				log_buf + 4, log_buf + sz - 4,
+				m_table->space, m_table->name.m_name,
+				m_flags, m_table->flags, lsn);
+
+			DBUG_EXECUTE_IF("ib_err_trunc_oom_logging",
+					err = DB_FAIL;);
+
+			if (err != DB_SUCCESS) {
+				ut_ad(err == DB_FAIL);
+				ut_free(buf);
+				sz *= 2;
+				buf = ut_zalloc_nokey(sz + UNIV_PAGE_SIZE);
+				DBUG_EXECUTE_IF("ib_err_trunc_oom_logging",
+						ut_free(buf);
+						buf = 0;);
+				if (buf == 0) {
+					os_file_close(handle);
+					return(DB_OUT_OF_MEMORY);
+				}
+				log_buf = static_cast<byte*>(
+					ut_align(buf, UNIV_PAGE_SIZE));
+			}
+
+		} while (err != DB_SUCCESS);
+
+		dberr_t	io_err;
+
+		IORequest	request(IORequest::WRITE);
+
+		request.disable_compression();
+
+		io_err = os_file_write(
+			request, m_log_file_name, handle, log_buf, 0, sz);
+
+		if (io_err != DB_SUCCESS) {
+
+			ib::error()
+				<< "IO: Failed to write the file size to '"
+				<< m_log_file_name << "'";
+
+			/* Preserve the original error code */
+			if (err == DB_SUCCESS) {
+				err = io_err;
+			}
+		}
+
+		os_file_flush(handle);
+		os_file_close(handle);
+
+		ut_free(buf);
+
+		/* Why we need MLOG_TRUNCATE when we have truncate_log for
+		recovery?
+		- truncate log can protect us if crash happens while truncate
+		  is active. Once truncate is done truncate log is removed.
+		- If crash happens post truncate and system is yet to
+		  checkpoint, on recovery we would see REDO records from action
+		  before truncate (unless we explicitly checkpoint before
+		  returning from truncate API. Costly alternative so rejected).
+		- These REDO records may reference a page that doesn't exist
+		  post truncate so we need a mechanism to skip all such REDO
+		  records. MLOG_TRUNCATE records space_id and lsn that exactly
+		  serve the purpose.
+		- If checkpoint happens post truncate and crash happens post
+		  this point then neither MLOG_TRUNCATE nor REDO record
+		  from action before truncate are accessible. */
+		if (!is_system_tablespace(m_table->space)) {
+			mtr_t	mtr;
+			byte*	log_ptr;
+
+			mtr_start(&mtr);
+
+			log_ptr = mlog_open(&mtr, 11 + 8);
+			log_ptr = mlog_write_initial_log_record_low(
+				MLOG_TRUNCATE, m_table->space, 0,
+				log_ptr, &mtr);
+
+			mach_write_to_8(log_ptr, lsn);
+			log_ptr += 8;
+
+			mlog_close(&mtr, log_ptr);
+			mtr_commit(&mtr);
+		}
+
+		return(err);
+	}
+
+	/**
+	Indicate completion of truncate log by writing magic-number.
+	File will be removed from the system but to protect against
+	unlink (File-System) anomalies we ensure we write magic-number. */
+	void done()
+	{
+		if (m_log_file_name == 0) {
+			return;
+		}
+
+		bool	ret;
+		os_file_t handle = os_file_create_simple_no_error_handling(
+			innodb_log_file_key, m_log_file_name,
+			OS_FILE_OPEN, OS_FILE_READ_WRITE,
+			srv_read_only_mode, &ret);
+		DBUG_EXECUTE_IF("ib_err_trunc_writing_magic_number",
+				os_file_close(handle);
+				ret = false;);
+		if (!ret) {
+			ib::error() << "Failed to open truncate log file "
+				<< m_log_file_name << "."
+				" If server crashes before truncate log is"
+				" removed make sure it is manually removed"
+				" before restarting server";
+			os_file_delete(innodb_log_file_key, m_log_file_name);
+			return;
+		}
+
+		byte	buffer[sizeof(TruncateLogger::s_magic)];
+		mach_write_to_4(buffer, TruncateLogger::s_magic);
+
+		dberr_t	err;
+
+		IORequest	request(IORequest::WRITE);
+
+		request.disable_compression();
+
+		err = os_file_write(
+			request,
+			m_log_file_name, handle, buffer, 0, sizeof(buffer));
+
+		if (err != DB_SUCCESS) {
+
+			ib::error()
+				<< "IO: Failed to write the magic number to '"
+				<< m_log_file_name << "'";
+		}
+
+		DBUG_EXECUTE_IF("ib_trunc_crash_after_updating_magic_no",
+				DBUG_SUICIDE(););
+		os_file_flush(handle);
+		os_file_close(handle);
+		DBUG_EXECUTE_IF("ib_trunc_crash_after_logging_complete",
+				log_buffer_flush_to_disk();
+				os_thread_sleep(1000000);
+				DBUG_SUICIDE(););
+		os_file_delete(innodb_log_file_key, m_log_file_name);
+	}
+
+private:
+	// Disably copying
+	TruncateLogger(const TruncateLogger&);
+	TruncateLogger& operator=(const TruncateLogger&);
+
+private:
+	/** Lookup the index using the index id.
+	@return index instance if found else NULL */
+	const dict_index_t* find(index_id_t id) const
+	{
+		for (const dict_index_t* index = UT_LIST_GET_FIRST(
+				m_table->indexes);
+		     index != NULL;
+		     index = UT_LIST_GET_NEXT(indexes, index)) {
+
+			if (index->id == id) {
+				return(index);
+			}
+		}
+
+		return(NULL);
+	}
+
+private:
+	/** Table to be truncated */
+	dict_table_t*		m_table;
+
+	/** Tablespace flags */
+	ulint			m_flags;
+
+	/** Collect table to truncate information */
+	truncate_t		m_truncate;
+
+	/** Truncate log file name. */
+	char*			m_log_file_name;
+
+
+public:
+	/** Magic Number to indicate truncate action is complete. */
+	const static ib_uint32_t	s_magic;
+
+	/** Truncate Log file Prefix. */
+	const static char*		s_log_prefix;
+
+	/** Truncate Log file Extension. */
+	const static char*		s_log_ext;
+};
+
+const ib_uint32_t	TruncateLogger::s_magic = 32743712;
+const char*		TruncateLogger::s_log_prefix = "ib_";
+const char*		TruncateLogger::s_log_ext = "trunc.log";
+
+/**
+Scan to find out truncate log file from the given directory path.
+
+@param dir_path		look for log directory in following path.
+@param log_files	cache to hold truncate log file name found.
+@return DB_SUCCESS or error code. */
+dberr_t
+TruncateLogParser::scan(
+	const char*		dir_path,
+	trunc_log_files_t&	log_files)
+{
+	os_file_dir_t	dir;
+	os_file_stat_t	fileinfo;
+	dberr_t		err = DB_SUCCESS;
+	ulint		ext_len = strlen(TruncateLogger::s_log_ext);
+	ulint		prefix_len = strlen(TruncateLogger::s_log_prefix);
+	ulint		dir_len = strlen(dir_path);
+
+	/* Scan and look out for the truncate log files. */
+	dir = os_file_opendir(dir_path, true);
+	if (dir == NULL) {
+		return(DB_IO_ERROR);
+	}
+
+	while (fil_file_readdir_next_file(
+			&err, dir_path, dir, &fileinfo) == 0) {
+
+		ulint nm_len = strlen(fileinfo.name);
+
+		if (fileinfo.type == OS_FILE_TYPE_FILE
+		    && nm_len > ext_len + prefix_len
+		    && (0 == strncmp(fileinfo.name + nm_len - ext_len,
+				     TruncateLogger::s_log_ext, ext_len))
+		    && (0 == strncmp(fileinfo.name,
+				     TruncateLogger::s_log_prefix,
+				     prefix_len))) {
+
+			if (fileinfo.size == 0) {
+				/* Truncate log not written. Remove the file. */
+				os_file_delete(
+					innodb_log_file_key, fileinfo.name);
+				continue;
+			}
+
+			/* Construct file name by appending directory path */
+			ulint	sz = dir_len + 22 + 22 + 1 + ext_len + prefix_len;
+			char*	log_file_name = UT_NEW_ARRAY_NOKEY(char, sz);
+			if (log_file_name == NULL) {
+				err = DB_OUT_OF_MEMORY;
+				break;
+			}
+			memset(log_file_name, 0, sz);
+
+			strncpy(log_file_name, dir_path, dir_len);
+			ulint	log_file_name_len = strlen(log_file_name);
+			if (log_file_name[log_file_name_len - 1]
+				!= OS_PATH_SEPARATOR) {
+
+				log_file_name[log_file_name_len]
+					= OS_PATH_SEPARATOR;
+				log_file_name_len = strlen(log_file_name);
+			}
+			strcat(log_file_name, fileinfo.name);
+			log_files.push_back(log_file_name);
+		}
+	}
+
+	os_file_closedir(dir);
+
+	return(err);
+}
+
+/**
+Parse the log file and populate table to truncate information.
+(Add this table to truncate information to central vector that is then
+ used by truncate fix-up routine to fix-up truncate action of the table.)
+
+@param	log_file_name	log file to parse
+@return DB_SUCCESS or error code. */
+dberr_t
+TruncateLogParser::parse(
+	const char*	log_file_name)
+{
+	dberr_t		err = DB_SUCCESS;
+	truncate_t*	truncate = NULL;
+
+	/* Open the file and read magic-number to findout if truncate action
+	was completed. */
+	bool		ret;
+	os_file_t	handle = os_file_create_simple(
+		innodb_log_file_key, log_file_name,
+		OS_FILE_OPEN, OS_FILE_READ_ONLY, srv_read_only_mode, &ret);
+	if (!ret) {
+		ib::error() << "Error opening truncate log file: "
+			<< log_file_name;
+		return(DB_IO_ERROR);
+	}
+
+	ulint	sz = UNIV_PAGE_SIZE;
+	void*	buf = ut_zalloc_nokey(sz + UNIV_PAGE_SIZE);
+	if (buf == 0) {
+		os_file_close(handle);
+		return(DB_OUT_OF_MEMORY);
+	}
+
+	IORequest	request(IORequest::READ);
+
+	request.disable_compression();
+
+	/* Align the memory for file i/o if we might have O_DIRECT set*/
+	byte*	log_buf = static_cast<byte*>(ut_align(buf, UNIV_PAGE_SIZE));
+
+	do {
+		err = os_file_read(request, handle, log_buf, 0, sz);
+
+		if (err != DB_SUCCESS) {
+			os_file_close(handle);
+			break;
+		}
+
+		ulint	magic_n = mach_read_from_4(log_buf);
+		if (magic_n == TruncateLogger::s_magic) {
+
+			/* Truncate action completed. Avoid parsing the file. */
+			os_file_close(handle);
+
+			os_file_delete(innodb_log_file_key, log_file_name);
+			break;
+		}
+
+		if (truncate == NULL) {
+			truncate = UT_NEW_NOKEY(truncate_t(log_file_name));
+			if (truncate == NULL) {
+				os_file_close(handle);
+				err = DB_OUT_OF_MEMORY;
+				break;
+			}
+		}
+
+		err = truncate->parse(log_buf + 4, log_buf + sz - 4);
+
+		if (err != DB_SUCCESS) {
+
+			ut_ad(err == DB_FAIL);
+
+			ut_free(buf);
+			buf = 0;
+
+			sz *= 2;
+
+			buf = ut_zalloc_nokey(sz + UNIV_PAGE_SIZE);
+
+			if (buf == 0) {
+				os_file_close(handle);
+				err = DB_OUT_OF_MEMORY;
+				UT_DELETE(truncate);
+				truncate = NULL;
+				break;
+			}
+
+			log_buf = static_cast<byte*>(
+				ut_align(buf, UNIV_PAGE_SIZE));
+		}
+	} while (err != DB_SUCCESS);
+
+	ut_free(buf);
+
+	if (err == DB_SUCCESS && truncate != NULL) {
+		truncate_t::add(truncate);
+		os_file_close(handle);
+	}
+
+	return(err);
+}
+
+/**
+Scan and Parse truncate log files.
+
+@param dir_path		look for log directory in following path
+@return DB_SUCCESS or error code. */
+dberr_t
+TruncateLogParser::scan_and_parse(
+	const char*	dir_path)
+{
+	dberr_t			err;
+	trunc_log_files_t	log_files;
+
+	/* Scan and trace all the truncate log files. */
+	err = TruncateLogParser::scan(dir_path, log_files);
+
+	/* Parse truncate lof files if scan was successful. */
+	if (err == DB_SUCCESS) {
+
+		for (ulint i = 0;
+		     i < log_files.size() && err == DB_SUCCESS;
+		     i++) {
+			err = TruncateLogParser::parse(log_files[i]);
+		}
+	}
+
+	trunc_log_files_t::const_iterator end = log_files.end();
+	for (trunc_log_files_t::const_iterator it = log_files.begin();
+	     it != end;
+	     ++it) {
+		if (*it != NULL) {
+			UT_DELETE_ARRAY(*it);
+		}
+	}
+	log_files.clear();
+
+	return(err);
+}
+
+/** Callback to drop indexes during TRUNCATE */
+class DropIndex : public Callback {
+
+public:
+	/**
+	Constructor
+
+	@param[in,out]	table	Table to truncate
+	@param[in]	noredo	whether to disable redo logging */
+	DropIndex(dict_table_t* table, bool noredo)
+		:
+		Callback(table->id, noredo),
+		m_table(table)
+	{
+		/* No op */
+	}
+
+	/**
+	@param mtr	mini-transaction covering the read
+	@param pcur	persistent cursor used for reading
+	@return DB_SUCCESS or error code */
+	dberr_t operator()(mtr_t* mtr, btr_pcur_t* pcur) const;
+
+private:
+	/** Table to be truncated */
+	dict_table_t*		m_table;
+};
+
+/** Callback to create the indexes during TRUNCATE */
+class CreateIndex : public Callback {
+
+public:
+	/**
+	Constructor
+
+	@param[in,out]	table	Table to truncate
+	@param[in]	noredo	whether to disable redo logging */
+	CreateIndex(dict_table_t* table, bool noredo)
+		:
+		Callback(table->id, noredo),
+		m_table(table)
+	{
+		/* No op */
+	}
+
+	/**
+	Create the new index and update the root page number in the
+	SysIndex table.
+
+	@param mtr	mini-transaction covering the read
+	@param pcur	persistent cursor used for reading
+	@return DB_SUCCESS or error code */
+	dberr_t operator()(mtr_t* mtr, btr_pcur_t* pcur) const;
+
+private:
+	// Disably copying
+	CreateIndex(const CreateIndex&);
+	CreateIndex& operator=(const CreateIndex&);
+
+private:
+	/** Table to be truncated */
+	dict_table_t*		m_table;
+};
+
+/** Check for presence of table-id in SYS_XXXX tables. */
+class TableLocator : public Callback {
+
+public:
+	/**
+	Constructor
+	@param table_id	table_id to look for */
+	explicit TableLocator(table_id_t table_id)
+		:
+		Callback(table_id, false),
+		m_table_found()
+	{
+		/* No op */
+	}
+
+	/**
+	@return true if table is found */
+	bool is_table_found() const
+	{
+		return(m_table_found);
+	}
+
+	/**
+	Look for table-id in SYS_XXXX tables without loading the table.
+
+	@param mtr	mini-transaction covering the read
+	@param pcur	persistent cursor used for reading
+	@return DB_SUCCESS or error code */
+	dberr_t operator()(mtr_t* mtr, btr_pcur_t* pcur);
+
+private:
+	// Disably copying
+	TableLocator(const TableLocator&);
+	TableLocator& operator=(const TableLocator&);
+
+private:
+	/** Set to true if table is present */
+	bool			m_table_found;
+};
+
+/**
+@param mtr	mini-transaction covering the read
+@param pcur	persistent cursor used for reading
+@return DB_SUCCESS or error code */
+dberr_t
+TruncateLogger::operator()(mtr_t* mtr, btr_pcur_t* pcur)
+{
+	ulint			len;
+	const byte*		field;
+	rec_t*			rec = btr_pcur_get_rec(pcur);
+	truncate_t::index_t	index;
+
+	field = rec_get_nth_field_old(
+		rec, DICT_FLD__SYS_INDEXES__TYPE, &len);
+	ut_ad(len == 4);
+	index.m_type = mach_read_from_4(field);
+
+	field = rec_get_nth_field_old(rec, DICT_FLD__SYS_INDEXES__ID, &len);
+	ut_ad(len == 8);
+	index.m_id = mach_read_from_8(field);
+
+	field = rec_get_nth_field_old(
+			rec, DICT_FLD__SYS_INDEXES__PAGE_NO, &len);
+	ut_ad(len == 4);
+	index.m_root_page_no = mach_read_from_4(field);
+
+	/* For compressed tables we need to store extra meta-data
+	required during btr_create(). */
+	if (fsp_flags_is_compressed(m_flags)) {
+
+		const dict_index_t* dict_index = find(index.m_id);
+
+		if (dict_index != NULL) {
+
+			dberr_t err = index.set(dict_index);
+
+			if (err != DB_SUCCESS) {
+				m_truncate.clear();
+				return(err);
+			}
+
+		} else {
+			ib::warn() << "Index id " << index.m_id
+				<< " not found";
+		}
+	}
+
+	m_truncate.add(index);
+
+	return(DB_SUCCESS);
+}
+
+/**
+Drop an index in the table.
+
+@param mtr	mini-transaction covering the read
+@param pcur	persistent cursor used for reading
+@return DB_SUCCESS or error code */
+dberr_t
+DropIndex::operator()(mtr_t* mtr, btr_pcur_t* pcur) const
+{
+	rec_t*	rec = btr_pcur_get_rec(pcur);
+
+	bool	freed = dict_drop_index_tree(rec, pcur, mtr);
+
+#ifdef UNIV_DEBUG
+	{
+		ulint		len;
+		const byte*	field;
+		ulint		index_type;
+
+		field = rec_get_nth_field_old(
+			btr_pcur_get_rec(pcur), DICT_FLD__SYS_INDEXES__TYPE,
+			&len);
+		ut_ad(len == 4);
+
+		index_type = mach_read_from_4(field);
+
+		if (index_type & DICT_CLUSTERED) {
+			/* Clustered index */
+			DBUG_EXECUTE_IF("ib_trunc_crash_on_drop_of_clust_index",
+					log_buffer_flush_to_disk();
+					os_thread_sleep(2000000);
+					DBUG_SUICIDE(););
+		} else if (index_type & DICT_UNIQUE) {
+			/* Unique index */
+			DBUG_EXECUTE_IF("ib_trunc_crash_on_drop_of_uniq_index",
+					log_buffer_flush_to_disk();
+					os_thread_sleep(2000000);
+					DBUG_SUICIDE(););
+		} else if (index_type == 0) {
+			/* Secondary index */
+			DBUG_EXECUTE_IF("ib_trunc_crash_on_drop_of_sec_index",
+					log_buffer_flush_to_disk();
+					os_thread_sleep(2000000);
+					DBUG_SUICIDE(););
+		}
+	}
+#endif /* UNIV_DEBUG */
+
+	DBUG_EXECUTE_IF("ib_err_trunc_drop_index",
+			freed = false;);
+
+	if (freed) {
+
+		/* We will need to commit and restart the
+		mini-transaction in order to avoid deadlocks.
+		The dict_drop_index_tree() call has freed
+		a page in this mini-transaction, and the rest
+		of this loop could latch another index page.*/
+		const mtr_log_t log_mode = mtr->get_log_mode();
+		mtr_commit(mtr);
+
+		mtr_start(mtr);
+		mtr->set_log_mode(log_mode);
+
+		btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, mtr);
+	} else {
+		/* Check if the .ibd file is missing. */
+		bool	found;
+
+		fil_space_get_page_size(m_table->space, &found);
+
+		DBUG_EXECUTE_IF("ib_err_trunc_drop_index",
+				found = false;);
+
+		if (!found) {
+			return(DB_ERROR);
+		}
+	}
+
+	return(DB_SUCCESS);
+}
+
+/**
+Create the new index and update the root page number in the
+SysIndex table.
+
+@param mtr	mini-transaction covering the read
+@param pcur	persistent cursor used for reading
+@return DB_SUCCESS or error code */
+dberr_t
+CreateIndex::operator()(mtr_t* mtr, btr_pcur_t* pcur) const
+{
+	ulint	root_page_no;
+
+	root_page_no = dict_recreate_index_tree(m_table, pcur, mtr);
+
+#ifdef UNIV_DEBUG
+	{
+		ulint		len;
+		const byte*	field;
+		ulint		index_type;
+
+		field = rec_get_nth_field_old(
+			btr_pcur_get_rec(pcur), DICT_FLD__SYS_INDEXES__TYPE,
+			&len);
+		ut_ad(len == 4);
+
+		index_type = mach_read_from_4(field);
+
+		if (index_type & DICT_CLUSTERED) {
+			/* Clustered index */
+			DBUG_EXECUTE_IF(
+				"ib_trunc_crash_on_create_of_clust_index",
+				log_buffer_flush_to_disk();
+				os_thread_sleep(2000000);
+				DBUG_SUICIDE(););
+		} else if (index_type & DICT_UNIQUE) {
+			/* Unique index */
+			DBUG_EXECUTE_IF(
+				"ib_trunc_crash_on_create_of_uniq_index",
+				log_buffer_flush_to_disk();
+				os_thread_sleep(2000000);
+				DBUG_SUICIDE(););
+		} else if (index_type == 0) {
+			/* Secondary index */
+			DBUG_EXECUTE_IF(
+				"ib_trunc_crash_on_create_of_sec_index",
+				log_buffer_flush_to_disk();
+				os_thread_sleep(2000000);
+				DBUG_SUICIDE(););
+		}
+	}
+#endif /* UNIV_DEBUG */
+
+	DBUG_EXECUTE_IF("ib_err_trunc_create_index",
+			root_page_no = FIL_NULL;);
+
+	if (root_page_no != FIL_NULL) {
+
+		rec_t*	rec = btr_pcur_get_rec(pcur);
+
+		page_rec_write_field(
+			rec, DICT_FLD__SYS_INDEXES__PAGE_NO,
+			root_page_no, mtr);
+
+		/* We will need to commit and restart the
+		mini-transaction in order to avoid deadlocks.
+		The dict_create_index_tree() call has allocated
+		a page in this mini-transaction, and the rest of
+		this loop could latch another index page. */
+		mtr_commit(mtr);
+
+		mtr_start(mtr);
+
+		btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, mtr);
+
+	} else {
+		bool	found;
+		fil_space_get_page_size(m_table->space, &found);
+
+		DBUG_EXECUTE_IF("ib_err_trunc_create_index",
+				found = false;);
+
+		if (!found) {
+			return(DB_ERROR);
+		}
+	}
+
+	return(DB_SUCCESS);
+}
+
+/**
+Look for table-id in SYS_XXXX tables without loading the table.
+
+@param mtr	mini-transaction covering the read
+@param pcur	persistent cursor used for reading
+@return DB_SUCCESS */
+dberr_t
+TableLocator::operator()(mtr_t* mtr, btr_pcur_t* pcur)
+{
+	m_table_found = true;
+
+	return(DB_SUCCESS);
+}
+
+/**
+Rollback the transaction and release the index locks.
+Drop indexes if table is corrupted so that drop/create
+sequence works as expected.
+
+@param table			table to truncate
+@param trx			transaction covering the TRUNCATE
+@param new_id			new table id that was suppose to get assigned
+				to the table if truncate executed successfully.
+@param has_internal_doc_id	indicate existence of fts index
+@param no_redo			if true, turn-off redo logging
+@param corrupted		table corrupted status
+@param unlock_index		if true then unlock indexes before action */
+static
+void
+row_truncate_rollback(
+	dict_table_t*	table,
+	trx_t*		trx,
+	table_id_t	new_id,
+	bool		has_internal_doc_id,
+	bool		no_redo,
+	bool		corrupted,
+	bool		unlock_index)
+{
+	if (unlock_index) {
+		dict_table_x_unlock_indexes(table);
+	}
+
+	trx->error_state = DB_SUCCESS;
+
+	trx_rollback_to_savepoint(trx, NULL);
+
+	trx->error_state = DB_SUCCESS;
+
+	if (corrupted && !dict_table_is_temporary(table)) {
+
+		/* Cleanup action to ensure we don't left over stale entries
+		if we are marking table as corrupted. This will ensure
+		it can be recovered using drop/create sequence. */
+		dict_table_x_lock_indexes(table);
+
+		DropIndex       dropIndex(table, no_redo);
+
+		SysIndexIterator().for_each(dropIndex);
+
+		dict_table_x_unlock_indexes(table);
+
+		for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
+		     index != NULL;
+		     index = UT_LIST_GET_NEXT(indexes, index)) {
+
+			dict_set_corrupted(index, trx, "TRUNCATE TABLE");
+		}
+
+		if (has_internal_doc_id) {
+
+			ut_ad(!trx_is_started(trx));
+
+			table_id_t      id = table->id;
+
+			table->id = new_id;
+
+			fts_drop_tables(trx, table);
+
+			table->id = id;
+
+			ut_ad(trx_is_started(trx));
+
+			trx_commit_for_mysql(trx);
+		}
+
+	} else if (corrupted && dict_table_is_temporary(table)) {
+
+		dict_table_x_lock_indexes(table);
+
+		for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
+		     index != NULL;
+		     index = UT_LIST_GET_NEXT(indexes, index)) {
+
+			dict_drop_index_tree_in_mem(index, index->page);
+
+			index->page = FIL_NULL;
+		}
+
+		dict_table_x_unlock_indexes(table);
+	}
+
+	table->corrupted = corrupted;
+}
+
+/**
+Finish the TRUNCATE operations for both commit and rollback.
+
+@param table		table being truncated
+@param trx		transaction covering the truncate
+@param fsp_flags	tablespace flags
+@param logger		table to truncate information logger
+@param err		status of truncate operation
+
+@return DB_SUCCESS or error code */
+static MY_ATTRIBUTE((warn_unused_result))
+dberr_t
+row_truncate_complete(
+	dict_table_t*		table,
+	trx_t*			trx,
+	ulint			fsp_flags,
+	TruncateLogger*		&logger,
+	dberr_t			err)
+{
+	bool	is_file_per_table = dict_table_is_file_per_table(table);
+
+	if (table->memcached_sync_count == DICT_TABLE_IN_DDL) {
+		/* We need to set the memcached sync back to 0, unblock
+		memcached operations. */
+		table->memcached_sync_count = 0;
+	}
+
+	row_mysql_unlock_data_dictionary(trx);
+
+	DEBUG_SYNC_C("ib_trunc_table_trunc_completing");
+
+	if (!dict_table_is_temporary(table)) {
+
+		DBUG_EXECUTE_IF("ib_trunc_crash_before_log_removal",
+				log_buffer_flush_to_disk();
+				os_thread_sleep(500000);
+				DBUG_SUICIDE(););
+
+		/* Note: We don't log-checkpoint instead we have written
+		a special REDO log record MLOG_TRUNCATE that is used to
+		avoid applying REDO records before truncate for crash
+		that happens post successful truncate completion. */
+
+		if (logger != NULL) {
+			logger->done();
+			UT_DELETE(logger);
+			logger = NULL;
+		}
+	}
+
+	/* If non-temp file-per-table tablespace... */
+	if (is_file_per_table
+	    && !dict_table_is_temporary(table)
+	    && fsp_flags != ULINT_UNDEFINED) {
+
+		/* This function will reset back the stop_new_ops
+		and is_being_truncated so that fil-ops can re-start. */
+		dberr_t err2 = truncate_t::truncate(
+			table->space,
+			table->data_dir_path,
+			table->name.m_name, fsp_flags, false);
+
+		if (err2 != DB_SUCCESS) {
+			return(err2);
+		}
+	}
+
+	if (err == DB_SUCCESS) {
+		dict_stats_update(table, DICT_STATS_EMPTY_TABLE);
+	}
+
+	trx->op_info = "";
+
+	/* For temporary tables or if there was an error, we need to reset
+	the dict operation flags. */
+	trx->ddl = false;
+	trx->dict_operation = TRX_DICT_OP_NONE;
+
+	ut_ad(!trx_is_started(trx));
+
+	srv_wake_master_thread();
+
+	DBUG_EXECUTE_IF("ib_trunc_crash_after_truncate_done",
+			DBUG_SUICIDE(););
+
+	return(err);
+}
+
+/**
+Handle FTS truncate issues.
+@param table		table being truncated
+@param new_id		new id for the table
+@param trx		transaction covering the truncate
+@return DB_SUCCESS or error code. */
+static MY_ATTRIBUTE((warn_unused_result))
+dberr_t
+row_truncate_fts(
+	dict_table_t*	table,
+	table_id_t	new_id,
+	trx_t*		trx)
+{
+	dict_table_t	fts_table;
+
+	fts_table.id = new_id;
+	fts_table.name = table->name;
+	fts_table.flags2 = table->flags2;
+	fts_table.flags = table->flags;
+	fts_table.tablespace = table->tablespace;
+	fts_table.space = table->space;
+
+	/* table->data_dir_path is used for FTS AUX table
+	creation. */
+	if (DICT_TF_HAS_DATA_DIR(table->flags)
+	    && table->data_dir_path == NULL) {
+		dict_get_and_save_data_dir_path(table, true);
+		ut_ad(table->data_dir_path != NULL);
+	}
+
+	/* table->tablespace() may not be always populated or
+	if table->tablespace() uses "innodb_general" name,
+	fetch the real name. */
+	if (DICT_TF_HAS_SHARED_SPACE(table->flags)
+	    && (table->tablespace() == NULL
+		|| dict_table_has_temp_general_tablespace_name(
+			table->tablespace()))) {
+		dict_get_and_save_space_name(table, true);
+		ut_ad(table->tablespace() != NULL);
+		ut_ad(!dict_table_has_temp_general_tablespace_name(
+			table->tablespace()));
+	}
+
+	fts_table.tablespace = table->tablespace();
+	fts_table.data_dir_path = table->data_dir_path;
+
+	dberr_t		err;
+
+	err = fts_create_common_tables(
+		trx, &fts_table, table->name.m_name, TRUE);
+
+	for (ulint i = 0;
+	     i < ib_vector_size(table->fts->indexes) && err == DB_SUCCESS;
+	     i++) {
+
+		dict_index_t*	fts_index;
+
+		fts_index = static_cast<dict_index_t*>(
+			ib_vector_getp(table->fts->indexes, i));
+
+		err = fts_create_index_tables_low(
+			trx, fts_index, table->name.m_name, new_id);
+	}
+
+	DBUG_EXECUTE_IF("ib_err_trunc_during_fts_trunc",
+			err = DB_ERROR;);
+
+	if (err != DB_SUCCESS) {
+
+		trx->error_state = DB_SUCCESS;
+		trx_rollback_to_savepoint(trx, NULL);
+		trx->error_state = DB_SUCCESS;
+
+		ib::error() << "Unable to truncate FTS index for table "
+			<< table->name;
+	} else {
+
+		ut_ad(trx_is_started(trx));
+	}
+
+	return(err);
+}
+
+/**
+Update system table to reflect new table id.
+@param old_table_id		old table id
+@param new_table_id		new table id
+@param reserve_dict_mutex	if TRUE, acquire/release
+				dict_sys->mutex around call to pars_sql.
+@param trx			transaction
+@return error code or DB_SUCCESS */
+static MY_ATTRIBUTE((warn_unused_result))
+dberr_t
+row_truncate_update_table_id(
+	table_id_t	old_table_id,
+	table_id_t	new_table_id,
+	ibool		reserve_dict_mutex,
+	trx_t*		trx)
+{
+	pars_info_t*	info	= NULL;
+	dberr_t		err	= DB_SUCCESS;
+
+	/* Scan the SYS_XXXX table and update to reflect new table-id. */
+	info = pars_info_create();
+	pars_info_add_ull_literal(info, "old_id", old_table_id);
+	pars_info_add_ull_literal(info, "new_id", new_table_id);
+
+	err = que_eval_sql(
+		info,
+		"PROCEDURE RENUMBER_TABLE_ID_PROC () IS\n"
+		"BEGIN\n"
+		"UPDATE SYS_TABLES"
+		" SET ID = :new_id\n"
+		" WHERE ID = :old_id;\n"
+		"UPDATE SYS_COLUMNS SET TABLE_ID = :new_id\n"
+		" WHERE TABLE_ID = :old_id;\n"
+		"UPDATE SYS_INDEXES"
+		" SET TABLE_ID = :new_id\n"
+		" WHERE TABLE_ID = :old_id;\n"
+		"UPDATE SYS_VIRTUAL"
+		" SET TABLE_ID = :new_id\n"
+		" WHERE TABLE_ID = :old_id;\n"
+		"END;\n", reserve_dict_mutex, trx);
+
+	return(err);
+}
+
+/**
+Get the table id to truncate.
+@param truncate_t		old/new table id of table to truncate
+@return table_id_t		table_id to use in SYS_XXXX table update. */
+static MY_ATTRIBUTE((warn_unused_result))
+table_id_t
+row_truncate_get_trunc_table_id(
+	const truncate_t&	truncate)
+{
+	TableLocator tableLocator(truncate.old_table_id());
+
+	SysIndexIterator().for_each(tableLocator);
+
+	return(tableLocator.is_table_found() ?
+		truncate.old_table_id(): truncate.new_table_id());
+}
+
+/**
+Update system table to reflect new table id and root page number.
+@param truncate_t		old/new table id of table to truncate
+				and updated root_page_no of indexes.
+@param new_table_id		new table id
+@param reserve_dict_mutex	if TRUE, acquire/release
+				dict_sys->mutex around call to pars_sql.
+@param mark_index_corrupted	if true, then mark index corrupted.
+@return error code or DB_SUCCESS */
+static MY_ATTRIBUTE((warn_unused_result))
+dberr_t
+row_truncate_update_sys_tables_during_fix_up(
+	const truncate_t&	truncate,
+	table_id_t		new_table_id,
+	ibool			reserve_dict_mutex,
+	bool			mark_index_corrupted)
+{
+	trx_t*		trx = trx_allocate_for_background();
+
+	trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
+
+	table_id_t	table_id = row_truncate_get_trunc_table_id(truncate);
+
+	/* Step-1: Update the root-page-no */
+
+	dberr_t	err;
+
+	err = truncate.update_root_page_no(
+		trx, table_id, reserve_dict_mutex, mark_index_corrupted);
+
+	if (err != DB_SUCCESS) {
+		return(err);
+	}
+
+	/* Step-2: Update table-id. */
+
+	err = row_truncate_update_table_id(
+		table_id, new_table_id, reserve_dict_mutex, trx);
+
+	if (err == DB_SUCCESS) {
+		dict_mutex_enter_for_mysql();
+
+		/* Remove the table with old table_id from cache. */
+		dict_table_t*	old_table = dict_table_open_on_id(
+			table_id, true, DICT_TABLE_OP_NORMAL);
+
+		if (old_table != NULL) {
+			dict_table_close(old_table, true, false);
+			dict_table_remove_from_cache(old_table);
+		}
+
+		/* Open table with new table_id and set table as
+		corrupted if it has FTS index. */
+
+		dict_table_t*	table = dict_table_open_on_id(
+			new_table_id, true, DICT_TABLE_OP_NORMAL);
+		ut_ad(table->id == new_table_id);
+
+		bool	has_internal_doc_id =
+			dict_table_has_fts_index(table)
+			|| DICT_TF2_FLAG_IS_SET(
+				table, DICT_TF2_FTS_HAS_DOC_ID);
+
+		if (has_internal_doc_id) {
+			trx->dict_operation_lock_mode = RW_X_LATCH;
+			fts_check_corrupt(table, trx);
+			trx->dict_operation_lock_mode = 0;
+		}
+
+		dict_table_close(table, true, false);
+		dict_mutex_exit_for_mysql();
+	}
+
+	trx_commit_for_mysql(trx);
+	trx_free_for_background(trx);
+
+	return(err);
+}
+
+/**
+Truncate also results in assignment of new table id, update the system
+SYSTEM TABLES with the new id.
+@param table,			table being truncated
+@param new_id,			new table id
+@param has_internal_doc_id,	has doc col (fts)
+@param no_redo			if true, turn-off redo logging
+@param trx			transaction handle
+@return	error code or DB_SUCCESS */
+static MY_ATTRIBUTE((warn_unused_result))
+dberr_t
+row_truncate_update_system_tables(
+	dict_table_t*	table,
+	table_id_t	new_id,
+	bool		has_internal_doc_id,
+	bool		no_redo,
+	trx_t*		trx)
+{
+	dberr_t		err	= DB_SUCCESS;
+
+	ut_a(!dict_table_is_temporary(table));
+
+	err = row_truncate_update_table_id(table->id, new_id, FALSE, trx);
+
+	DBUG_EXECUTE_IF("ib_err_trunc_during_sys_table_update",
+			err = DB_ERROR;);
+
+	if (err != DB_SUCCESS) {
+
+		row_truncate_rollback(
+			table, trx, new_id, has_internal_doc_id,
+			no_redo, true, false);
+
+		ib::error() << "Unable to assign a new identifier to table "
+			<< table->name << " after truncating it. Marked the"
+			" table as corrupted. In-memory representation is now"
+			" different from the on-disk representation.";
+		err = DB_ERROR;
+	} else {
+		/* Drop the old FTS index */
+		if (has_internal_doc_id) {
+
+			ut_ad(trx_is_started(trx));
+
+			fts_drop_tables(trx, table);
+
+			DBUG_EXECUTE_IF("ib_truncate_crash_while_fts_cleanup",
+					DBUG_SUICIDE(););
+
+			ut_ad(trx_is_started(trx));
+		}
+
+		DBUG_EXECUTE_IF("ib_trunc_crash_after_fts_drop",
+				log_buffer_flush_to_disk();
+				os_thread_sleep(2000000);
+				DBUG_SUICIDE(););
+
+		dict_table_change_id_in_cache(table, new_id);
+
+		/* Reset the Doc ID in cache to 0 */
+		if (has_internal_doc_id && table->fts->cache != NULL) {
+			table->fts->fts_status |= TABLE_DICT_LOCKED;
+			fts_update_next_doc_id(trx, table, NULL, 0);
+			fts_cache_clear(table->fts->cache);
+			fts_cache_init(table->fts->cache);
+			table->fts->fts_status &= ~TABLE_DICT_LOCKED;
+		}
+	}
+
+	return(err);
+}
+
+/**
+Prepare for the truncate process. On success all of the table's indexes will
+be locked in X mode.
+@param table		table to truncate
+@param flags		tablespace flags
+@return	error code or DB_SUCCESS */
+static MY_ATTRIBUTE((warn_unused_result))
+dberr_t
+row_truncate_prepare(dict_table_t* table, ulint* flags)
+{
+	ut_ad(!dict_table_is_temporary(table));
+	ut_ad(dict_table_is_file_per_table(table));
+
+	*flags = fil_space_get_flags(table->space);
+
+	ut_ad(!dict_table_is_temporary(table));
+
+	dict_get_and_save_data_dir_path(table, true);
+
+	dict_get_and_save_space_name(table, true);
+
+	if (*flags != ULINT_UNDEFINED) {
+
+		dberr_t	err = fil_prepare_for_truncate(table->space);
+
+		if (err != DB_SUCCESS) {
+			return(err);
+		}
+	}
+
+	return(DB_SUCCESS);
+}
+
+/**
+Do foreign key checks before starting TRUNCATE.
+@param table		table being truncated
+@param trx		transaction covering the truncate
+@return DB_SUCCESS or error code */
+static MY_ATTRIBUTE((warn_unused_result))
+dberr_t
+row_truncate_foreign_key_checks(
+	const dict_table_t*	table,
+	const trx_t*		trx)
+{
+	/* Check if the table is referenced by foreign key constraints from
+	some other table (not the table itself) */
+
+	dict_foreign_set::iterator	it
+		= std::find_if(table->referenced_set.begin(),
+			       table->referenced_set.end(),
+			       dict_foreign_different_tables());
+
+	if (!srv_read_only_mode
+	    && it != table->referenced_set.end()
+	    && trx->check_foreigns) {
+
+		dict_foreign_t*	foreign = *it;
+
+		FILE*	ef = dict_foreign_err_file;
+
+		/* We only allow truncating a referenced table if
+		FOREIGN_KEY_CHECKS is set to 0 */
+
+		mutex_enter(&dict_foreign_err_mutex);
+
+		rewind(ef);
+
+		ut_print_timestamp(ef);
+
+		fputs("  Cannot truncate table ", ef);
+		ut_print_name(ef, trx, table->name.m_name);
+		fputs(" by DROP+CREATE\n"
+		      "InnoDB: because it is referenced by ", ef);
+		ut_print_name(ef, trx, foreign->foreign_table_name);
+		putc('\n', ef);
+
+		mutex_exit(&dict_foreign_err_mutex);
+
+		return(DB_ERROR);
+	}
+
+	/* TODO: could we replace the counter n_foreign_key_checks_running
+	with lock checks on the table? Acquire here an exclusive lock on the
+	table, and rewrite lock0lock.cc and the lock wait in srv0srv.cc so that
+	they can cope with the table having been truncated here? Foreign key
+	checks take an IS or IX lock on the table. */
+
+	if (table->n_foreign_key_checks_running > 0) {
+		ib::warn() << "Cannot truncate table " << table->name
+			<< " because there is a foreign key check running on"
+			" it.";
+
+		return(DB_ERROR);
+	}
+
+	return(DB_SUCCESS);
+}
+
+/**
+Do some sanity checks before starting the actual TRUNCATE.
+@param table		table being truncated
+@return DB_SUCCESS or error code */
+static MY_ATTRIBUTE((warn_unused_result))
+dberr_t
+row_truncate_sanity_checks(
+	const dict_table_t* table)
+{
+	if (dict_table_is_discarded(table)) {
+
+		return(DB_TABLESPACE_DELETED);
+
+	} else if (table->ibd_file_missing) {
+
+		return(DB_TABLESPACE_NOT_FOUND);
+
+	} else if (dict_table_is_corrupted(table)) {
+
+		return(DB_TABLE_CORRUPT);
+	}
+
+	return(DB_SUCCESS);
+}
+
+/**
+Truncates a table for MySQL.
+@param table		table being truncated
+@param trx		transaction covering the truncate
+@return	error code or DB_SUCCESS */
+dberr_t
+row_truncate_table_for_mysql(
+	dict_table_t* table,
+	trx_t* trx)
+{
+	bool	is_file_per_table = dict_table_is_file_per_table(table);
+	dberr_t		err;
+#ifdef UNIV_DEBUG
+	ulint		old_space = table->space;
+#endif /* UNIV_DEBUG */
+	TruncateLogger*	logger = NULL;
+
+	/* Understanding the truncate flow.
+
+	Step-1: Perform intiial sanity check to ensure table can be truncated.
+	This would include check for tablespace discard status, ibd file
+	missing, etc ....
+
+	Step-2: Start transaction (only for non-temp table as temp-table don't
+	modify any data on disk doesn't need transaction object).
+
+	Step-3: Validate ownership of needed locks (Exclusive lock).
+	Ownership will also ensure there is no active SQL queries, INSERT,
+	SELECT, .....
+
+	Step-4: Stop all the background process associated with table.
+
+	Step-5: There are few foreign key related constraint under which
+	we can't truncate table (due to referential integrity unless it is
+	turned off). Ensure this condition is satisfied.
+
+	Step-6: Truncate operation can be rolled back in case of error
+	till some point. Associate rollback segment to record undo log.
+
+	Step-7: Generate new table-id.
+	Why we need new table-id ?
+	Purge and rollback case: we assign a new table id for the table.
+	Since purge and rollback look for the table based on the table id,
+	they see the table as 'dropped' and discard their operations.
+
+	Step-8: Log information about tablespace which includes
+	table and index information. If there is a crash in the next step
+	then during recovery we will attempt to fixup the operation.
+
+	Step-9: Drop all indexes (this include freeing of the pages
+	associated with them).
+
+	Step-10: Re-create new indexes.
+
+	Step-11: Update new table-id to in-memory cache (dictionary),
+	on-disk (INNODB_SYS_TABLES). INNODB_SYS_INDEXES also needs to
+	be updated to reflect updated root-page-no of new index created
+	and updated table-id.
+
+	Step-12: Cleanup Stage. Reset auto-inc value to 1.
+	Release all the locks.
+	Commit the transaction. Update trx operation state.
+
+	Notes:
+	- On error, log checkpoint is done followed writing of magic number to
+	truncate log file. If servers crashes after truncate, fix-up action
+	will not be applied.
+
+	- log checkpoint is done before starting truncate table to ensure
+	that previous REDO log entries are not applied if current truncate
+	crashes. Consider following use-case:
+	 - create table .... insert/load table .... truncate table (crash)
+	 - on restart table is restored .... truncate table (crash)
+	 - on restart (assuming default log checkpoint is not done) will have
+	   2 REDO log entries for same table. (Note 2 REDO log entries
+	   for different table is not an issue).
+	For system-tablespace we can't truncate the tablespace so we need
+	to initiate a local cleanup that involves dropping of indexes and
+	re-creating them. If we apply stale entry we might end-up issuing
+	drop on wrong indexes.
+
+	- Insert buffer: TRUNCATE TABLE is analogous to DROP TABLE,
+	so we do not have to remove insert buffer records, as the
+	insert buffer works at a low level. If a freed page is later
+	reallocated, the allocator will remove the ibuf entries for
+	it. When we prepare to truncate *.ibd files, we remove all entries
+	for the table in the insert buffer tree. This is not strictly
+	necessary, but we can free up some space in the system tablespace.
+
+	- Linear readahead and random readahead: we use the same
+	method as in 3) to discard ongoing operations. (This is only
+	relevant for TRUNCATE TABLE by TRUNCATE TABLESPACE.)
+	Ensure that the table will be dropped by trx_rollback_active() in
+	case of a crash.
+	*/
+
+	/*-----------------------------------------------------------------*/
+	/* Step-1: Perform intiial sanity check to ensure table can be
+	truncated. This would include check for tablespace discard status,
+	ibd file missing, etc .... */
+	err = row_truncate_sanity_checks(table);
+	if (err != DB_SUCCESS) {
+		return(err);
+
+	}
+
+	/* Step-2: Start transaction (only for non-temp table as temp-table
+	don't modify any data on disk doesn't need transaction object). */
+	if (!dict_table_is_temporary(table)) {
+		/* Avoid transaction overhead for temporary table DDL. */
+		trx_start_for_ddl(trx, TRX_DICT_OP_TABLE);
+	}
+
+	/* Step-3: Validate ownership of needed locks (Exclusive lock).
+	Ownership will also ensure there is no active SQL queries, INSERT,
+	SELECT, .....*/
+	trx->op_info = "truncating table";
+	ut_a(trx->dict_operation_lock_mode == 0);
+	row_mysql_lock_data_dictionary(trx);
+	ut_ad(mutex_own(&dict_sys->mutex));
+	ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
+
+	/* Step-4: Stop all the background process associated with table. */
+	dict_stats_wait_bg_to_stop_using_table(table, trx);
+
+	/* Step-5: There are few foreign key related constraint under which
+	we can't truncate table (due to referential integrity unless it is
+	turned off). Ensure this condition is satisfied. */
+	ulint	fsp_flags = ULINT_UNDEFINED;
+	err = row_truncate_foreign_key_checks(table, trx);
+	if (err != DB_SUCCESS) {
+		trx_rollback_to_savepoint(trx, NULL);
+		return(row_truncate_complete(
+				table, trx, fsp_flags, logger, err));
+	}
+
+	/* Check if memcached DML is running on this table. if is, we don't
+	allow truncate this table. */
+	if (table->memcached_sync_count != 0) {
+		ib::error() << "Cannot truncate table "
+			<< table->name
+			<< " by DROP+CREATE because there are memcached"
+			" operations running on it.";
+		err = DB_ERROR;
+		trx_rollback_to_savepoint(trx, NULL);
+		return(row_truncate_complete(
+				table, trx, fsp_flags, logger, err));
+	} else {
+                /* We need to set this counter to -1 for blocking
+                memcached operations. */
+		table->memcached_sync_count = DICT_TABLE_IN_DDL;
+        }
+
+	/* Remove all locks except the table-level X lock. */
+	lock_remove_all_on_table(table, FALSE);
+	trx->table_id = table->id;
+	trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
+
+	/* Step-6: Truncate operation can be rolled back in case of error
+	till some point. Associate rollback segment to record undo log. */
+	if (!dict_table_is_temporary(table)) {
+
+		/* Temporary tables don't need undo logging for autocommit stmt.
+		On crash (i.e. mysql restart) temporary tables are anyway not
+		accessible. */
+		mutex_enter(&trx->undo_mutex);
+
+		err = trx_undo_assign_undo(
+			trx, &trx->rsegs.m_redo, TRX_UNDO_UPDATE);
+
+		mutex_exit(&trx->undo_mutex);
+
+		DBUG_EXECUTE_IF("ib_err_trunc_assigning_undo_log",
+				err = DB_ERROR;);
+		if (err != DB_SUCCESS) {
+			trx_rollback_to_savepoint(trx, NULL);
+			return(row_truncate_complete(
+				table, trx, fsp_flags, logger, err));
+		}
+	}
+
+	/* Step-7: Generate new table-id.
+	Why we need new table-id ?
+	Purge and rollback: we assign a new table id for the
+	table. Since purge and rollback look for the table based on
+	the table id, they see the table as 'dropped' and discard
+	their operations. */
+	table_id_t	new_id;
+	dict_hdr_get_new_id(&new_id, NULL, NULL, table, false);
+
+	/* Check if table involves FTS index. */
+	bool	has_internal_doc_id =
+		dict_table_has_fts_index(table)
+		|| DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID);
+
+	bool	no_redo = is_file_per_table && !has_internal_doc_id;
+
+	/* Step-8: Log information about tablespace which includes
+	table and index information. If there is a crash in the next step
+	then during recovery we will attempt to fixup the operation. */
+
+	/* Lock all index trees for this table, as we will truncate
+	the table/index and possibly change their metadata. All
+	DML/DDL are blocked by table level X lock, with a few exceptions
+	such as queries into information schema about the table,
+	MySQL could try to access index stats for this kind of query,
+	we need to use index locks to sync up */
+	dict_table_x_lock_indexes(table);
+
+	if (!dict_table_is_temporary(table)) {
+
+		if (is_file_per_table) {
+
+			err = row_truncate_prepare(table, &fsp_flags);
+
+			DBUG_EXECUTE_IF("ib_err_trunc_preparing_for_truncate",
+					err = DB_ERROR;);
+
+			if (err != DB_SUCCESS) {
+				row_truncate_rollback(
+					table, trx, new_id,
+					has_internal_doc_id,
+					no_redo, false, true);
+				return(row_truncate_complete(
+					table, trx, fsp_flags, logger, err));
+			}
+		} else {
+			fsp_flags = fil_space_get_flags(table->space);
+
+			DBUG_EXECUTE_IF("ib_err_trunc_preparing_for_truncate",
+					fsp_flags = ULINT_UNDEFINED;);
+
+			if (fsp_flags == ULINT_UNDEFINED) {
+				row_truncate_rollback(
+					table, trx, new_id,
+					has_internal_doc_id,
+					no_redo, false, true);
+				return(row_truncate_complete(
+						table, trx, fsp_flags,
+						logger, DB_ERROR));
+			}
+		}
+
+		logger = UT_NEW_NOKEY(TruncateLogger(
+				table, fsp_flags, new_id));
+
+		err = logger->init();
+		if (err != DB_SUCCESS) {
+			row_truncate_rollback(
+				table, trx, new_id, has_internal_doc_id,
+				no_redo, false, true);
+			return(row_truncate_complete(
+				table, trx, fsp_flags, logger, DB_ERROR));
+
+		}
+
+		err = SysIndexIterator().for_each(*logger);
+		if (err != DB_SUCCESS) {
+			row_truncate_rollback(
+				table, trx, new_id, has_internal_doc_id,
+				no_redo, false, true);
+			return(row_truncate_complete(
+				table, trx, fsp_flags, logger, DB_ERROR));
+
+		}
+
+		ut_ad(logger->debug());
+
+		err = logger->log();
+
+		if (err != DB_SUCCESS) {
+			row_truncate_rollback(
+				table, trx, new_id, has_internal_doc_id,
+				no_redo, false, true);
+			return(row_truncate_complete(
+				table, trx, fsp_flags, logger, DB_ERROR));
+		}
+	}
+
+	DBUG_EXECUTE_IF("ib_trunc_crash_after_redo_log_write_complete",
+			log_buffer_flush_to_disk();
+			os_thread_sleep(3000000);
+			DBUG_SUICIDE(););
+
+	/* Step-9: Drop all indexes (free index pages associated with these
+	indexes) */
+	if (!dict_table_is_temporary(table)) {
+
+		DropIndex	dropIndex(table, no_redo);
+
+		err = SysIndexIterator().for_each(dropIndex);
+
+		if (err != DB_SUCCESS) {
+
+			row_truncate_rollback(
+				table, trx, new_id, has_internal_doc_id,
+				no_redo, true, true);
+
+			return(row_truncate_complete(
+				table, trx, fsp_flags, logger, err));
+		}
+
+	} else {
+		/* For temporary tables we don't have entries in SYSTEM TABLES*/
+		for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
+		     index != NULL;
+		     index = UT_LIST_GET_NEXT(indexes, index)) {
+
+			err = dict_truncate_index_tree_in_mem(index);
+
+			if (err != DB_SUCCESS) {
+				row_truncate_rollback(
+					table, trx, new_id, has_internal_doc_id,
+					no_redo, true, true);
+				return(row_truncate_complete(
+					table, trx, fsp_flags, logger, err));
+			}
+
+			DBUG_EXECUTE_IF(
+				"ib_trunc_crash_during_drop_index_temp_table",
+				log_buffer_flush_to_disk();
+				os_thread_sleep(2000000);
+				DBUG_SUICIDE(););
+		}
+	}
+
+	if (is_file_per_table
+	    && !dict_table_is_temporary(table)
+	    && fsp_flags != ULINT_UNDEFINED) {
+
+		fil_reinit_space_header(
+			table->space,
+			table->indexes.count + FIL_IBD_FILE_INITIAL_SIZE + 1);
+	}
+
+	DBUG_EXECUTE_IF("ib_trunc_crash_with_intermediate_log_checkpoint",
+			log_buffer_flush_to_disk();
+			os_thread_sleep(2000000);
+			log_checkpoint(TRUE, TRUE);
+			os_thread_sleep(1000000);
+			DBUG_SUICIDE(););
+
+	DBUG_EXECUTE_IF("ib_trunc_crash_drop_reinit_done_create_to_start",
+			log_buffer_flush_to_disk();
+			os_thread_sleep(2000000);
+			DBUG_SUICIDE(););
+
+	/* Step-10: Re-create new indexes. */
+	if (!dict_table_is_temporary(table)) {
+
+		CreateIndex	createIndex(table, no_redo);
+
+		err = SysIndexIterator().for_each(createIndex);
+
+		if (err != DB_SUCCESS) {
+
+			row_truncate_rollback(
+				table, trx, new_id, has_internal_doc_id,
+				no_redo, true, true);
+
+			return(row_truncate_complete(
+				table, trx, fsp_flags, logger, err));
+		}
+	}
+
+	/* Done with index truncation, release index tree locks,
+	subsequent work relates to table level metadata change */
+	dict_table_x_unlock_indexes(table);
+
+	if (has_internal_doc_id) {
+
+		err = row_truncate_fts(table, new_id, trx);
+
+		if (err != DB_SUCCESS) {
+
+			row_truncate_rollback(
+				table, trx, new_id, has_internal_doc_id,
+				no_redo, true, false);
+
+			return(row_truncate_complete(
+				table, trx, fsp_flags, logger, err));
+		}
+	}
+
+	/* Step-11: Update new table-id to in-memory cache (dictionary),
+	on-disk (INNODB_SYS_TABLES). INNODB_SYS_INDEXES also needs to
+	be updated to reflect updated root-page-no of new index created
+	and updated table-id. */
+	if (dict_table_is_temporary(table)) {
+
+		dict_table_change_id_in_cache(table, new_id);
+		err = DB_SUCCESS;
+
+	} else {
+
+		/* If this fails then we are in an inconsistent state and
+		the results are undefined. */
+		ut_ad(old_space == table->space);
+
+		err = row_truncate_update_system_tables(
+			table, new_id, has_internal_doc_id, no_redo, trx);
+
+		if (err != DB_SUCCESS) {
+			return(row_truncate_complete(
+				table, trx, fsp_flags, logger, err));
+		}
+	}
+
+	DBUG_EXECUTE_IF("ib_trunc_crash_on_updating_dict_sys_info",
+			log_buffer_flush_to_disk();
+			os_thread_sleep(2000000);
+			DBUG_SUICIDE(););
+
+	/* Step-12: Cleanup Stage. Reset auto-inc value to 1.
+	Release all the locks.
+	Commit the transaction. Update trx operation state. */
+	dict_table_autoinc_lock(table);
+	dict_table_autoinc_initialize(table, 1);
+	dict_table_autoinc_unlock(table);
+
+	if (trx_is_started(trx)) {
+
+		trx_commit_for_mysql(trx);
+	}
+
+	return(row_truncate_complete(table, trx, fsp_flags, logger, err));
+}
+
+/**
+Fix the table truncate by applying information parsed from TRUNCATE log.
+Fix-up includes re-creating table (drop and re-create indexes)
+@return	error code or DB_SUCCESS */
+dberr_t
+truncate_t::fixup_tables_in_system_tablespace()
+{
+	dberr_t	err = DB_SUCCESS;
+
+	/* Using the info cached during REDO log scan phase fix the
+	table truncate. */
+
+	for (tables_t::iterator it = s_tables.begin();
+	     it != s_tables.end();) {
+
+		if ((*it)->m_space_id == TRX_SYS_SPACE) {
+			/* Step-1: Drop and re-create indexes. */
+			ib::info() << "Completing truncate for table with "
+				"id (" << (*it)->m_old_table_id << ") "
+				"residing in the system tablespace.";
+
+			err = fil_recreate_table(
+				(*it)->m_space_id,
+				(*it)->m_format_flags,
+				(*it)->m_tablespace_flags,
+				(*it)->m_tablename,
+				**it);
+
+			/* Step-2: Update the SYS_XXXX tables to reflect
+			this new table_id and root_page_no. */
+			table_id_t	new_id;
+
+			dict_hdr_get_new_id(&new_id, NULL, NULL, NULL, true);
+
+			err = row_truncate_update_sys_tables_during_fix_up(
+				**it, new_id, TRUE,
+				(err == DB_SUCCESS) ? false : true);
+
+			if (err != DB_SUCCESS) {
+				break;
+			}
+
+			os_file_delete(
+				innodb_log_file_key, (*it)->m_log_file_name);
+			UT_DELETE(*it);
+			it = s_tables.erase(it);
+		} else {
+			++it;
+		}
+	}
+
+	/* Also clear the map used to track tablespace truncated. */
+	s_truncated_tables.clear();
+
+	return(err);
+}
+
+/**
+Fix the table truncate by applying information parsed from TRUNCATE log.
+Fix-up includes re-creating tablespace.
+@return	error code or DB_SUCCESS */
+dberr_t
+truncate_t::fixup_tables_in_non_system_tablespace()
+{
+	dberr_t	err = DB_SUCCESS;
+
+	/* Using the info cached during REDO log scan phase fix the
+	table truncate. */
+	tables_t::iterator end = s_tables.end();
+
+	for (tables_t::iterator it = s_tables.begin(); it != end; ++it) {
+
+		/* All tables in the system tablespace have already been
+		done and erased from this list. */
+		ut_a((*it)->m_space_id != TRX_SYS_SPACE);
+
+		/* Step-1: Drop tablespace (only for single-tablespace),
+		drop indexes and re-create indexes. */
+
+		if (fsp_is_file_per_table((*it)->m_space_id,
+					  (*it)->m_tablespace_flags)) {
+			/* The table is file_per_table */
+
+			ib::info() << "Completing truncate for table with "
+				"id (" << (*it)->m_old_table_id << ") "
+				"residing in file-per-table tablespace with "
+				"id (" << (*it)->m_space_id << ")";
+
+			if (!fil_space_get((*it)->m_space_id)) {
+
+				/* Create the database directory for name,
+				if it does not exist yet */
+				fil_create_directory_for_tablename(
+					(*it)->m_tablename);
+
+				err = fil_ibd_create(
+					(*it)->m_space_id,
+					(*it)->m_tablename,
+					(*it)->m_dir_path,
+					(*it)->m_tablespace_flags,
+					FIL_IBD_FILE_INITIAL_SIZE,
+					(*it)->m_encryption,
+					(*it)->m_key_id);
+
+				if (err != DB_SUCCESS) {
+					/* If checkpoint is not yet done
+					and table is dropped and then we might
+					still have REDO entries for this table
+					which are INVALID. Ignore them. */
+					ib::warn() << "Failed to create"
+						" tablespace for "
+						<< (*it)->m_space_id
+						<< " space-id";
+					err = DB_ERROR;
+					break;
+				}
+			}
+
+			ut_ad(fil_space_get((*it)->m_space_id));
+
+			err = fil_recreate_tablespace(
+				(*it)->m_space_id,
+				(*it)->m_format_flags,
+				(*it)->m_tablespace_flags,
+				(*it)->m_tablename,
+				**it, log_get_lsn());
+
+		} else {
+			/* Table is in a shared tablespace */
+
+			ib::info() << "Completing truncate for table with "
+				"id (" << (*it)->m_old_table_id << ") "
+				"residing in shared tablespace with "
+				"id (" << (*it)->m_space_id << ")";
+
+			/* Temp-tables in temp-tablespace are never restored.*/
+			ut_ad((*it)->m_space_id != srv_tmp_space.space_id());
+
+			err = fil_recreate_table(
+				(*it)->m_space_id,
+				(*it)->m_format_flags,
+				(*it)->m_tablespace_flags,
+				(*it)->m_tablename,
+				**it);
+		}
+
+		/* Step-2: Update the SYS_XXXX tables to reflect new
+		table-id and root_page_no. */
+		table_id_t	new_id;
+
+		dict_hdr_get_new_id(&new_id, NULL, NULL, NULL, true);
+
+		err = row_truncate_update_sys_tables_during_fix_up(
+			**it, new_id, TRUE, (err == DB_SUCCESS) ? false : true);
+
+		if (err != DB_SUCCESS) {
+			break;
+		}
+	}
+
+	if (err == DB_SUCCESS && s_tables.size() > 0) {
+
+		log_make_checkpoint_at(LSN_MAX, TRUE);
+	}
+
+	for (ulint i = 0; i < s_tables.size(); ++i) {
+		os_file_delete(
+			innodb_log_file_key, s_tables[i]->m_log_file_name);
+		UT_DELETE(s_tables[i]);
+	}
+
+	s_tables.clear();
+
+	return(err);
+}
+
+/**
+Constructor
+
+@param old_table_id	old table id assigned to table before truncate
+@param new_table_id	new table id that will be assigned to table
+			after truncate
+@param dir_path		directory path */
+
+truncate_t::truncate_t(
+	table_id_t	old_table_id,
+	table_id_t	new_table_id,
+	const char*	dir_path)
+	:
+	m_space_id(),
+	m_old_table_id(old_table_id),
+	m_new_table_id(new_table_id),
+	m_dir_path(),
+	m_tablename(),
+	m_tablespace_flags(),
+	m_format_flags(),
+	m_indexes(),
+	m_log_lsn(),
+	m_log_file_name(),
+	/* JAN: TODO: Encryption */
+	m_encryption(FIL_SPACE_ENCRYPTION_DEFAULT),
+	m_key_id(FIL_DEFAULT_ENCRYPTION_KEY)
+{
+	if (dir_path != NULL) {
+		m_dir_path = mem_strdup(dir_path);
+	}
+}
+
+/**
+Consturctor
+
+@param log_file_name	parse the log file during recovery to populate
+			information related to table to truncate */
+truncate_t::truncate_t(
+	const char*	log_file_name)
+	:
+	m_space_id(),
+	m_old_table_id(),
+	m_new_table_id(),
+	m_dir_path(),
+	m_tablename(),
+	m_tablespace_flags(),
+	m_format_flags(),
+	m_indexes(),
+	m_log_lsn(),
+	m_log_file_name(),
+	/* JAN: TODO: Encryption */
+	m_encryption(FIL_SPACE_ENCRYPTION_DEFAULT),
+	m_key_id(FIL_DEFAULT_ENCRYPTION_KEY)
+
+{
+	m_log_file_name = mem_strdup(log_file_name);
+	if (m_log_file_name == NULL) {
+		ib::fatal() << "Failed creating truncate_t; out of memory";
+	}
+}
+
+/** Constructor */
+
+truncate_t::index_t::index_t()
+	:
+	m_id(),
+	m_type(),
+	m_root_page_no(FIL_NULL),
+	m_new_root_page_no(FIL_NULL),
+	m_n_fields(),
+	m_trx_id_pos(ULINT_UNDEFINED),
+	m_fields()
+{
+	/* Do nothing */
+}
+
+/** Destructor */
+
+truncate_t::~truncate_t()
+{
+	if (m_dir_path != NULL) {
+		ut_free(m_dir_path);
+		m_dir_path = NULL;
+	}
+
+	if (m_tablename != NULL) {
+		ut_free(m_tablename);
+		m_tablename = NULL;
+	}
+
+	if (m_log_file_name != NULL) {
+		ut_free(m_log_file_name);
+		m_log_file_name = NULL;
+	}
+
+	m_indexes.clear();
+}
+
+/**
+@return number of indexes parsed from the log record */
+
+size_t
+truncate_t::indexes() const
+{
+	return(m_indexes.size());
+}
+
+/**
+Update root page number in SYS_XXXX tables.
+
+@param trx			transaction object
+@param table_id			table id for which information needs to
+				be updated.
+@param reserve_dict_mutex	if TRUE, acquire/release
+				dict_sys->mutex around call to pars_sql.
+@param mark_index_corrupted	if true, then mark index corrupted.
+@return DB_SUCCESS or error code */
+
+dberr_t
+truncate_t::update_root_page_no(
+	trx_t*		trx,
+	table_id_t	table_id,
+	ibool		reserve_dict_mutex,
+	bool		mark_index_corrupted) const
+{
+	indexes_t::const_iterator end = m_indexes.end();
+
+	dberr_t	err = DB_SUCCESS;
+
+	for (indexes_t::const_iterator it = m_indexes.begin();
+	     it != end;
+	     ++it) {
+
+		pars_info_t*	info = pars_info_create();
+
+		pars_info_add_int4_literal(
+			info, "page_no", it->m_new_root_page_no);
+
+		pars_info_add_ull_literal(info, "table_id", table_id);
+
+		pars_info_add_ull_literal(
+			info, "index_id",
+			(mark_index_corrupted ? -1 : it->m_id));
+
+		err = que_eval_sql(
+			info,
+			"PROCEDURE RENUMBER_IDX_PAGE_NO_PROC () IS\n"
+			"BEGIN\n"
+			"UPDATE SYS_INDEXES"
+			" SET PAGE_NO = :page_no\n"
+			" WHERE TABLE_ID = :table_id"
+			" AND ID = :index_id;\n"
+			"END;\n", reserve_dict_mutex, trx);
+
+		if (err != DB_SUCCESS) {
+			break;
+		}
+	}
+
+	return(err);
+}
+
+/**
+Check whether a tablespace was truncated during recovery
+@param space_id	tablespace id to check
+@return true if the tablespace was truncated */
+
+bool
+truncate_t::is_tablespace_truncated(ulint space_id)
+{
+	tables_t::iterator end = s_tables.end();
+
+	for (tables_t::iterator it = s_tables.begin(); it != end; ++it) {
+
+		if ((*it)->m_space_id == space_id) {
+
+			return(true);
+		}
+	}
+
+	return(false);
+}
+
+/** Was tablespace truncated (on crash before checkpoint).
+If the MLOG_TRUNCATE redo-record is still available then tablespace
+was truncated and checkpoint is yet to happen.
+@param[in]	space_id	tablespace id to check.
+@return true if tablespace is was truncated. */
+bool
+truncate_t::was_tablespace_truncated(ulint space_id)
+{
+	return(s_truncated_tables.find(space_id) != s_truncated_tables.end());
+}
+
+/** Get the lsn associated with space.
+@param[in]	space_id	tablespace id to check.
+@return associated lsn. */
+lsn_t
+truncate_t::get_truncated_tablespace_init_lsn(ulint space_id)
+{
+	ut_ad(was_tablespace_truncated(space_id));
+
+	return(s_truncated_tables.find(space_id)->second);
+}
+
+/**
+Parses log record during recovery
+@param start_ptr	buffer containing log body to parse
+@param end_ptr		buffer end
+
+@return DB_SUCCESS or error code */
+
+dberr_t
+truncate_t::parse(
+	byte*		start_ptr,
+	const byte*	end_ptr)
+{
+	/* Parse lsn, space-id, format-flags and tablespace-flags. */
+	if (end_ptr < start_ptr + (8 + 4 + 4 + 4)) {
+		return(DB_FAIL);
+	}
+
+	m_log_lsn = mach_read_from_8(start_ptr);
+	start_ptr += 8;
+
+	m_space_id = mach_read_from_4(start_ptr);
+	start_ptr += 4;
+
+	m_format_flags = mach_read_from_4(start_ptr);
+	start_ptr += 4;
+
+	m_tablespace_flags = mach_read_from_4(start_ptr);
+	start_ptr += 4;
+
+	/* Parse table-name. */
+	if (end_ptr < start_ptr + (2)) {
+		return(DB_FAIL);
+	}
+
+	ulint n_tablename_len = mach_read_from_2(start_ptr);
+	start_ptr += 2;
+
+	if (n_tablename_len > 0) {
+		if (end_ptr < start_ptr + n_tablename_len) {
+			return(DB_FAIL);
+		}
+		m_tablename = mem_strdup(reinterpret_cast<char*>(start_ptr));
+		ut_ad(m_tablename[n_tablename_len - 1] == 0);
+		start_ptr += n_tablename_len;
+	}
+
+
+	/* Parse and read old/new table-id, number of indexes */
+	if (end_ptr < start_ptr + (8 + 8 + 2 + 2)) {
+		return(DB_FAIL);
+	}
+
+	ut_ad(m_indexes.empty());
+
+	m_old_table_id = mach_read_from_8(start_ptr);
+	start_ptr += 8;
+
+	m_new_table_id = mach_read_from_8(start_ptr);
+	start_ptr += 8;
+
+	ulint n_indexes = mach_read_from_2(start_ptr);
+	start_ptr += 2;
+
+	/* Parse the remote directory from TRUNCATE log record */
+	{
+		ulint	n_tabledirpath_len = mach_read_from_2(start_ptr);
+		start_ptr += 2;
+
+		if (end_ptr < start_ptr + n_tabledirpath_len) {
+			return(DB_FAIL);
+		}
+
+		if (n_tabledirpath_len > 0) {
+
+			m_dir_path = mem_strdup(reinterpret_cast<char*>(start_ptr));
+			ut_ad(m_dir_path[n_tabledirpath_len - 1] == 0);
+			start_ptr += n_tabledirpath_len;
+		}
+	}
+
+	/* Parse index ids and types from TRUNCATE log record */
+	for (ulint i = 0; i < n_indexes; ++i) {
+		index_t	index;
+
+		if (end_ptr < start_ptr + (8 + 4 + 4 + 4)) {
+			return(DB_FAIL);
+		}
+
+		index.m_id = mach_read_from_8(start_ptr);
+		start_ptr += 8;
+
+		index.m_type = mach_read_from_4(start_ptr);
+		start_ptr += 4;
+
+		index.m_root_page_no = mach_read_from_4(start_ptr);
+		start_ptr += 4;
+
+		index.m_trx_id_pos = mach_read_from_4(start_ptr);
+		start_ptr += 4;
+
+		if (!(index.m_type & DICT_FTS)) {
+			m_indexes.push_back(index);
+		}
+	}
+
+	ut_ad(!m_indexes.empty());
+
+	if (fsp_flags_is_compressed(m_tablespace_flags)) {
+
+		/* Parse the number of index fields from TRUNCATE log record */
+		for (ulint i = 0; i < m_indexes.size(); ++i) {
+
+			if (end_ptr < start_ptr + (2 + 2)) {
+				return(DB_FAIL);
+			}
+
+			m_indexes[i].m_n_fields = mach_read_from_2(start_ptr);
+			start_ptr += 2;
+
+			ulint	len = mach_read_from_2(start_ptr);
+			start_ptr += 2;
+
+			if (end_ptr < start_ptr + len) {
+				return(DB_FAIL);
+			}
+
+			index_t&	index = m_indexes[i];
+
+			/* Should be NUL terminated. */
+			ut_ad((start_ptr)[len - 1] == 0);
+
+			index_t::fields_t::iterator	end;
+
+			end = index.m_fields.end();
+
+			index.m_fields.insert(
+				end, start_ptr, &(start_ptr)[len]);
+
+			start_ptr += len;
+		}
+	}
+
+	return(DB_SUCCESS);
+}
+
+/** Parse log record from REDO log file during recovery.
+@param[in,out]	start_ptr	buffer containing log body to parse
+@param[in]	end_ptr		buffer end
+@param[in]	space_id	tablespace identifier
+@return parsed upto or NULL. */
+byte*
+truncate_t::parse_redo_entry(
+	byte*		start_ptr,
+	const byte*	end_ptr,
+	ulint		space_id)
+{
+	lsn_t	lsn;
+
+	/* Parse space-id, lsn */
+	if (end_ptr < (start_ptr + 8)) {
+		return(NULL);
+	}
+
+	lsn = mach_read_from_8(start_ptr);
+	start_ptr += 8;
+
+	/* Tablespace can't exist in both state.
+	(scheduled-for-truncate, was-truncated). */
+	if (!is_tablespace_truncated(space_id)) {
+
+		truncated_tables_t::iterator	it =
+				s_truncated_tables.find(space_id);
+
+		if (it == s_truncated_tables.end()) {
+			s_truncated_tables.insert(
+				std::pair<ulint, lsn_t>(space_id, lsn));
+		} else {
+			it->second = lsn;
+		}
+	}
+
+	return(start_ptr);
+}
+
+/**
+Set the truncate log values for a compressed table.
+@param index	index from which recreate infoormation needs to be extracted
+@return DB_SUCCESS or error code */
+
+dberr_t
+truncate_t::index_t::set(
+	const dict_index_t* index)
+{
+	/* Get trx-id column position (set only for clustered index) */
+	if (dict_index_is_clust(index)) {
+		m_trx_id_pos = dict_index_get_sys_col_pos(index, DATA_TRX_ID);
+		ut_ad(m_trx_id_pos > 0);
+		ut_ad(m_trx_id_pos != ULINT_UNDEFINED);
+	} else {
+		m_trx_id_pos = 0;
+	}
+
+	/* Original logic set this field differently if page is not leaf.
+	For truncate case this being first page to get created it is
+	always a leaf page and so we don't need that condition here. */
+	m_n_fields = dict_index_get_n_fields(index);
+
+	/* See requirements of page_zip_fields_encode for size. */
+	ulint	encoded_buf_size = (m_n_fields + 1) * 2;
+	byte*	encoded_buf = UT_NEW_ARRAY_NOKEY(byte, encoded_buf_size);
+
+	if (encoded_buf == NULL) {
+		return(DB_OUT_OF_MEMORY);
+	}
+
+	ulint len = page_zip_fields_encode(
+		m_n_fields, index, m_trx_id_pos, encoded_buf);
+	ut_a(len <= encoded_buf_size);
+
+	/* Append the encoded fields data. */
+	m_fields.insert(m_fields.end(), &encoded_buf[0], &encoded_buf[len]);
+
+	/* NUL terminate the encoded data */
+	m_fields.push_back(0);
+
+	UT_DELETE_ARRAY(encoded_buf);
+
+	return(DB_SUCCESS);
+}
+
+/** Create an index for a table.
+@param[in]	table_name		table name, for which to create
+the index
+@param[in]	space_id		space id where we have to
+create the index
+@param[in]	page_size		page size of the .ibd file
+@param[in]	index_type		type of index to truncate
+@param[in]	index_id		id of index to truncate
+@param[in]	btr_redo_create_info	control info for ::btr_create()
+@param[in,out]	mtr			mini-transaction covering the
+create index
+@return root page no or FIL_NULL on failure */
+ulint
+truncate_t::create_index(
+	const char*		table_name,
+	ulint			space_id,
+	const page_size_t&	page_size,
+	ulint			index_type,
+	index_id_t		index_id,
+	const btr_create_t&	btr_redo_create_info,
+	mtr_t*			mtr) const
+{
+	ulint	root_page_no = btr_create(
+		index_type, space_id, page_size, index_id,
+		NULL, &btr_redo_create_info, mtr);
+
+	if (root_page_no == FIL_NULL) {
+
+		ib::info() << "innodb_force_recovery was set to "
+			<< srv_force_recovery << ". Continuing crash recovery"
+			" even though we failed to create index " << index_id
+			<< " for compressed table '" << table_name << "' with"
+			" tablespace " << space_id << " during recovery";
+	}
+
+	return(root_page_no);
+}
+
+/** Check if index has been modified since TRUNCATE log snapshot
+was recorded.
+@param space_id		space_id where table/indexes resides.
+@param root_page_no	root page of index that needs to be verified.
+@return true if modified else false */
+
+bool
+truncate_t::is_index_modified_since_logged(
+	ulint		space_id,
+	ulint		root_page_no) const
+{
+	mtr_t			mtr;
+	bool			found;
+	const page_size_t&	page_size = fil_space_get_page_size(space_id,
+								    &found);
+	dberr_t			err = DB_SUCCESS;
+
+	ut_ad(found);
+
+	mtr_start(&mtr);
+
+	/* Root page could be in free state if truncate crashed after drop_index
+	and page was not allocated for any other object. */
+	buf_block_t* block= buf_page_get_gen(
+		page_id_t(space_id, root_page_no), page_size, RW_X_LATCH, NULL,
+		BUF_GET_POSSIBLY_FREED, __FILE__, __LINE__, &mtr, &err);
+
+	page_t* root = buf_block_get_frame(block);
+
+#ifdef UNIV_DEBUG
+	/* If the root page has been freed as part of truncate drop_index action
+	and not yet allocated for any object still the pagelsn > snapshot lsn */
+	if (block->page.file_page_was_freed) {
+		ut_ad(mach_read_from_8(root + FIL_PAGE_LSN) > m_log_lsn);
+	}
+#endif /* UNIV_DEBUG */
+
+	lsn_t page_lsn = mach_read_from_8(root + FIL_PAGE_LSN);
+
+	mtr_commit(&mtr);
+
+	if (page_lsn > m_log_lsn) {
+		return(true);
+	}
+
+	return(false);
+}
+
+/** Drop indexes for a table.
+@param space_id		space_id where table/indexes resides. */
+
+void
+truncate_t::drop_indexes(
+	ulint		space_id) const
+{
+	mtr_t           mtr;
+	ulint		root_page_no = FIL_NULL;
+
+	indexes_t::const_iterator       end = m_indexes.end();
+
+	for (indexes_t::const_iterator it = m_indexes.begin();
+	     it != end;
+	     ++it) {
+
+		root_page_no = it->m_root_page_no;
+
+		bool			found;
+		const page_size_t&	page_size
+			= fil_space_get_page_size(space_id, &found);
+
+		ut_ad(found);
+
+		if (is_index_modified_since_logged(
+			space_id, root_page_no)) {
+			/* Page has been modified since TRUNCATE log snapshot
+			was recorded so not safe to drop the index. */
+			continue;
+		}
+
+		mtr_start(&mtr);
+
+		if (space_id != TRX_SYS_SPACE) {
+			/* Do not log changes for single-table
+			tablespaces, we are in recovery mode. */
+			mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
+		}
+
+		if (root_page_no != FIL_NULL) {
+			const page_id_t	root_page_id(space_id, root_page_no);
+
+			btr_free_if_exists(
+				root_page_id, page_size, it->m_id, &mtr);
+		}
+
+		/* If tree is already freed then we might return immediately
+		in which case we need to release the lock we have acquired
+		on root_page. */
+		mtr_commit(&mtr);
+	}
+}
+
+
+/** Create the indexes for a table
+@param[in]	table_name	table name, for which to create the indexes
+@param[in]	space_id	space id where we have to create the indexes
+@param[in]	page_size	page size of the .ibd file
+@param[in]	flags		tablespace flags
+@param[in]	format_flags	page format flags
+@return DB_SUCCESS or error code. */
+dberr_t
+truncate_t::create_indexes(
+	const char*		table_name,
+	ulint			space_id,
+	const page_size_t&	page_size,
+	ulint			flags,
+	ulint			format_flags)
+{
+	mtr_t           mtr;
+
+	mtr_start(&mtr);
+
+	if (space_id != TRX_SYS_SPACE) {
+		/* Do not log changes for single-table tablespaces, we
+		are in recovery mode. */
+		mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
+	}
+
+	/* Create all new index trees with table format, index ids, index
+	types, number of index fields and index field information taken
+	out from the TRUNCATE log record. */
+
+	ulint   root_page_no = FIL_NULL;
+	indexes_t::iterator       end = m_indexes.end();
+	for (indexes_t::iterator it = m_indexes.begin();
+	     it != end;
+	     ++it) {
+
+		btr_create_t    btr_redo_create_info(
+			fsp_flags_is_compressed(flags)
+			? &it->m_fields[0] : NULL);
+
+		btr_redo_create_info.format_flags = format_flags;
+
+		if (fsp_flags_is_compressed(flags)) {
+
+			btr_redo_create_info.n_fields = it->m_n_fields;
+			/* Skip the NUL appended field */
+			btr_redo_create_info.field_len =
+				it->m_fields.size() - 1;
+			btr_redo_create_info.trx_id_pos = it->m_trx_id_pos;
+		}
+
+		root_page_no = create_index(
+			table_name, space_id, page_size, it->m_type, it->m_id,
+			btr_redo_create_info, &mtr);
+
+		if (root_page_no == FIL_NULL) {
+			break;
+		}
+
+		it->m_new_root_page_no = root_page_no;
+	}
+
+	mtr_commit(&mtr);
+
+	return(root_page_no == FIL_NULL ? DB_ERROR : DB_SUCCESS);
+}
+
+/**
+Write a TRUNCATE log record for fixing up table if truncate crashes.
+@param start_ptr	buffer to write log record
+@param end_ptr		buffer end
+@param space_id		space id
+@param tablename	the table name in the usual databasename/tablename
+			format of InnoDB
+@param flags		tablespace flags
+@param format_flags	page format
+@param lsn		lsn while logging
+@return DB_SUCCESS or error code */
+
+dberr_t
+truncate_t::write(
+	byte*		start_ptr,
+	byte*		end_ptr,
+	ulint		space_id,
+	const char*	tablename,
+	ulint		flags,
+	ulint		format_flags,
+	lsn_t		lsn) const
+{
+	if (end_ptr < start_ptr) {
+		return(DB_FAIL);
+	}
+
+	/* LSN, Type, Space-ID, format-flag (also know as log_flag.
+	Stored in page_no field), tablespace flags */
+	if (end_ptr < (start_ptr + (8 + 4 + 4 + 4)))  {
+		return(DB_FAIL);
+	}
+
+	mach_write_to_8(start_ptr, lsn);
+	start_ptr += 8;
+
+	mach_write_to_4(start_ptr, space_id);
+	start_ptr += 4;
+
+	mach_write_to_4(start_ptr, format_flags);
+	start_ptr += 4;
+
+	mach_write_to_4(start_ptr, flags);
+	start_ptr += 4;
+
+	/* Name of the table. */
+	/* Include the NUL in the log record. */
+	ulint len = strlen(tablename) + 1;
+	if (end_ptr < (start_ptr + (len + 2))) {
+		return(DB_FAIL);
+	}
+
+	mach_write_to_2(start_ptr, len);
+	start_ptr += 2;
+
+	memcpy(start_ptr, tablename, len - 1);
+	start_ptr += len;
+
+	DBUG_EXECUTE_IF("ib_trunc_crash_while_writing_redo_log",
+			DBUG_SUICIDE(););
+
+	/* Old/New Table-ID, Number of Indexes and Tablespace dir-path-name. */
+	/* Write the remote directory of the table into mtr log */
+	len = m_dir_path != NULL ? strlen(m_dir_path) + 1 : 0;
+	if (end_ptr < (start_ptr + (len + 8 + 8 + 2 + 2))) {
+		return(DB_FAIL);
+	}
+
+	/* Write out old-table-id. */
+	mach_write_to_8(start_ptr, m_old_table_id);
+	start_ptr += 8;
+
+	/* Write out new-table-id. */
+	mach_write_to_8(start_ptr, m_new_table_id);
+	start_ptr += 8;
+
+	/* Write out the number of indexes. */
+	mach_write_to_2(start_ptr, m_indexes.size());
+	start_ptr += 2;
+
+	/* Write the length (NUL included) of the .ibd path. */
+	mach_write_to_2(start_ptr, len);
+	start_ptr += 2;
+
+	if (m_dir_path != NULL) {
+		memcpy(start_ptr, m_dir_path, len - 1);
+		start_ptr += len;
+	}
+
+	/* Indexes information (id, type) */
+	/* Write index ids, type, root-page-no into mtr log */
+	for (ulint i = 0; i < m_indexes.size(); ++i) {
+
+		if (end_ptr < (start_ptr + (8 + 4 + 4 + 4))) {
+			return(DB_FAIL);
+		}
+
+		mach_write_to_8(start_ptr, m_indexes[i].m_id);
+		start_ptr += 8;
+
+		mach_write_to_4(start_ptr, m_indexes[i].m_type);
+		start_ptr += 4;
+
+		mach_write_to_4(start_ptr, m_indexes[i].m_root_page_no);
+		start_ptr += 4;
+
+		mach_write_to_4(start_ptr, m_indexes[i].m_trx_id_pos);
+		start_ptr += 4;
+	}
+
+	/* If tablespace compressed then field info of each index. */
+	if (fsp_flags_is_compressed(flags)) {
+
+		for (ulint i = 0; i < m_indexes.size(); ++i) {
+
+			ulint len = m_indexes[i].m_fields.size();
+			if (end_ptr < (start_ptr + (len + 2 + 2))) {
+				return(DB_FAIL);
+			}
+
+			mach_write_to_2(
+				start_ptr, m_indexes[i].m_n_fields);
+			start_ptr += 2;
+
+			mach_write_to_2(start_ptr, len);
+			start_ptr += 2;
+
+			const byte*	ptr = &m_indexes[i].m_fields[0];
+			memcpy(start_ptr, ptr, len - 1);
+			start_ptr += len;
+		}
+	}
+
+	return(DB_SUCCESS);
+}
+
diff --git a/storage/innobase/row/row0uins.cc b/storage/innobase/row/row0uins.cc
index 651042fb820..27be7c1600e 100644
--- a/storage/innobase/row/row0uins.cc
+++ b/storage/innobase/row/row0uins.cc
@@ -60,7 +60,7 @@ introduced where a call to log_free_check() is bypassed. */
 /***************************************************************//**
 Removes a clustered index record. The pcur in node was positioned on the
 record, now it is detached.
-@return	DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
+@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
 static  MY_ATTRIBUTE((nonnull, warn_unused_result))
 dberr_t
 row_undo_ins_remove_clust_rec(
@@ -76,8 +76,11 @@ row_undo_ins_remove_clust_rec(
 	bool		online;
 
 	ut_ad(dict_index_is_clust(index));
+	ut_ad(node->trx->in_rollback);
 
 	mtr_start(&mtr);
+	mtr.set_named_space(index->space);
+	dict_disable_redo_if_temporary(index->table, &mtr);
 
 	/* This is similar to row_undo_mod_clust(). The DDL thread may
 	already have copied this row from the log to the new table.
@@ -103,24 +106,26 @@ row_undo_ins_remove_clust_rec(
 
 	ut_ad(rec_get_trx_id(btr_cur_get_rec(btr_cur), btr_cur->index)
 	      == node->trx->id);
+	ut_ad(!rec_get_deleted_flag(
+		      btr_cur_get_rec(btr_cur),
+		      dict_table_is_comp(btr_cur->index->table)));
 
 	if (online && dict_index_is_online_ddl(index)) {
 		const rec_t*	rec	= btr_cur_get_rec(btr_cur);
 		mem_heap_t*	heap	= NULL;
 		const ulint*	offsets	= rec_get_offsets(
 			rec, index, NULL, ULINT_UNDEFINED, &heap);
-		row_log_table_delete(rec, index, offsets, NULL);
+		row_log_table_delete(rec, node->row, index, offsets, NULL);
 		mem_heap_free(heap);
 	}
 
 	if (node->table->id == DICT_INDEXES_ID) {
+
 		ut_ad(!online);
 		ut_ad(node->trx->dict_operation_lock_mode == RW_X_LATCH);
 
-		/* Drop the index tree associated with the row in
-		SYS_INDEXES table: */
-
-		dict_drop_index_tree(btr_pcur_get_rec(&(node->pcur)), &mtr);
+		dict_drop_index_tree(
+			btr_pcur_get_rec(&node->pcur), &(node->pcur), &mtr);
 
 		mtr_commit(&mtr);
 
@@ -140,15 +145,15 @@ row_undo_ins_remove_clust_rec(
 retry:
 	/* If did not succeed, try pessimistic descent to tree */
 	mtr_start(&mtr);
+	mtr.set_named_space(index->space);
+	dict_disable_redo_if_temporary(index->table, &mtr);
 
-	success = btr_pcur_restore_position(BTR_MODIFY_TREE,
-					    &(node->pcur), &mtr);
+	success = btr_pcur_restore_position(
+			BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE,
+			&node->pcur, &mtr);
 	ut_a(success);
 
-	btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0,
-				   trx_is_recv(node->trx)
-				   ? RB_RECOVERY
-				   : RB_NORMAL, &mtr);
+	btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0, true, &mtr);
 
 	/* The delete operation may fail if we have little
 	file space left: TODO: easiest to crash the database
@@ -168,14 +173,13 @@ retry:
 
 func_exit:
 	btr_pcur_commit_specify_mtr(&node->pcur, &mtr);
-	trx_undo_rec_release(node->trx, node->undo_no);
 
 	return(err);
 }
 
 /***************************************************************//**
 Removes a secondary index entry if found.
-@return	DB_SUCCESS, DB_FAIL, or DB_OUT_OF_FILE_SPACE */
+@return DB_SUCCESS, DB_FAIL, or DB_OUT_OF_FILE_SPACE */
 static MY_ATTRIBUTE((nonnull, warn_unused_result))
 dberr_t
 row_undo_ins_remove_sec_low(
@@ -184,30 +188,44 @@ row_undo_ins_remove_sec_low(
 				depending on whether we wish optimistic or
 				pessimistic descent down the index tree */
 	dict_index_t*	index,	/*!< in: index */
-	dtuple_t*	entry)	/*!< in: index entry to remove */
+	dtuple_t*	entry,	/*!< in: index entry to remove */
+	que_thr_t*	thr)	/*!< in: query thread */
 {
 	btr_pcur_t		pcur;
 	btr_cur_t*		btr_cur;
 	dberr_t			err	= DB_SUCCESS;
 	mtr_t			mtr;
 	enum row_search_result	search_result;
+	ibool			modify_leaf = false;
 
 	log_free_check();
+	memset(&pcur, 0, sizeof(pcur));
 
 	mtr_start(&mtr);
+	mtr.set_named_space(index->space);
+	dict_disable_redo_if_temporary(index->table, &mtr);
 
 	if (mode == BTR_MODIFY_LEAF) {
 		mode = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED;
 		mtr_s_lock(dict_index_get_lock(index), &mtr);
+		modify_leaf = true;
 	} else {
-		ut_ad(mode == BTR_MODIFY_TREE);
-		mtr_x_lock(dict_index_get_lock(index), &mtr);
+		ut_ad(mode == (BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE));
+		mtr_sx_lock(dict_index_get_lock(index), &mtr);
 	}
 
 	if (row_log_online_op_try(index, entry, 0)) {
 		goto func_exit_no_pcur;
 	}
 
+	if (dict_index_is_spatial(index)) {
+		if (mode & BTR_MODIFY_LEAF) {
+			mode |= BTR_RTREE_DELETE_MARK;
+		}
+		btr_pcur_get_btr_cur(&pcur)->thr = thr;
+		mode |= BTR_RTREE_UNDO_INS;
+	}
+
 	search_result = row_search_index_entry(index, entry, mode,
 					       &pcur, &mtr);
 
@@ -216,6 +234,7 @@ row_undo_ins_remove_sec_low(
 		goto func_exit;
 	case ROW_FOUND:
 		break;
+
 	case ROW_BUFFERED:
 	case ROW_NOT_DELETED_REF:
 		/* These are invalid outcomes, because the mode passed
@@ -224,20 +243,28 @@ row_undo_ins_remove_sec_low(
 		ut_error;
 	}
 
+	if (search_result == ROW_FOUND && dict_index_is_spatial(index)) {
+		rec_t*	rec = btr_pcur_get_rec(&pcur);
+		if (rec_get_deleted_flag(rec,
+					 dict_table_is_comp(index->table))) {
+			ib::error() << "Record found in index " << index->name
+				<< " is deleted marked on insert rollback.";
+		}
+	}
+
 	btr_cur = btr_pcur_get_btr_cur(&pcur);
 
-	if (mode != BTR_MODIFY_TREE) {
+	if (modify_leaf) {
 		err = btr_cur_optimistic_delete(btr_cur, 0, &mtr)
 			? DB_SUCCESS : DB_FAIL;
 	} else {
-		/* No need to distinguish RB_RECOVERY here, because we
-		are deleting a secondary index record: the distinction
-		between RB_NORMAL and RB_RECOVERY only matters when
-		deleting a record that contains externally stored
-		columns. */
+		/* Passing rollback=false here, because we are
+		deleting a secondary index record: the distinction
+		only matters when deleting a record that contains
+		externally stored columns. */
 		ut_ad(!dict_index_is_clust(index));
 		btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0,
-					   RB_NORMAL, &mtr);
+					   false, &mtr);
 	}
 func_exit:
 	btr_pcur_close(&pcur);
@@ -250,20 +277,21 @@ func_exit_no_pcur:
 /***************************************************************//**
 Removes a secondary index entry from the index if found. Tries first
 optimistic, then pessimistic descent down the tree.
-@return	DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
+@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
 static MY_ATTRIBUTE((nonnull, warn_unused_result))
 dberr_t
 row_undo_ins_remove_sec(
 /*====================*/
 	dict_index_t*	index,	/*!< in: index */
-	dtuple_t*	entry)	/*!< in: index entry to insert */
+	dtuple_t*	entry,	/*!< in: index entry to insert */
+	que_thr_t*	thr)	/*!< in: query thread */
 {
 	dberr_t	err;
 	ulint	n_tries	= 0;
 
 	/* Try first optimistic descent to the B-tree */
 
-	err = row_undo_ins_remove_sec_low(BTR_MODIFY_LEAF, index, entry);
+	err = row_undo_ins_remove_sec_low(BTR_MODIFY_LEAF, index, entry, thr);
 
 	if (err == DB_SUCCESS) {
 
@@ -272,7 +300,9 @@ row_undo_ins_remove_sec(
 
 	/* Try then pessimistic descent to the B-tree */
 retry:
-	err = row_undo_ins_remove_sec_low(BTR_MODIFY_TREE, index, entry);
+	err = row_undo_ins_remove_sec_low(
+		BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE,
+		index, entry, thr);
 
 	/* The delete operation may fail if we have little
 	file space left: TODO: easiest to crash the database
@@ -328,20 +358,21 @@ close_table:
 		clust_index = dict_table_get_first_index(node->table);
 
 		if (clust_index != NULL) {
-			trx_undo_rec_get_row_ref(
+			ptr = trx_undo_rec_get_row_ref(
 				ptr, clust_index, &node->ref, node->heap);
 
 			if (!row_undo_search_clust_to_pcur(node)) {
 				goto close_table;
 			}
+			if (node->table->n_v_cols) {
+				trx_undo_read_v_cols(node->table, ptr,
+						     node->row, false, NULL);
+			}
 
 		} else {
-			ut_print_timestamp(stderr);
-			fprintf(stderr, "  InnoDB: table ");
-			ut_print_name(stderr, node->trx, TRUE,
-				      node->table->name);
-			fprintf(stderr, " has no indexes, "
-				"ignoring the table\n");
+			ib::warn() << "Table " << node->table->name
+				 << " has no indexes,"
+				" ignoring the table";
 			goto close_table;
 		}
 	}
@@ -349,12 +380,13 @@ close_table:
 
 /***************************************************************//**
 Removes secondary index records.
-@return	DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
+@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
 static MY_ATTRIBUTE((nonnull, warn_unused_result))
 dberr_t
 row_undo_ins_remove_sec_rec(
 /*========================*/
-	undo_node_t*	node)	/*!< in/out: row undo node */
+	undo_node_t*	node,	/*!< in/out: row undo node */
+	que_thr_t*	thr)	/*!< in: query thread */
 {
 	dberr_t		err	= DB_SUCCESS;
 	dict_index_t*	index	= node->index;
@@ -389,7 +421,7 @@ row_undo_ins_remove_sec_rec(
 			assume that the secondary index record does
 			not exist. */
 		} else {
-			err = row_undo_ins_remove_sec(index, entry);
+			err = row_undo_ins_remove_sec(index, entry, thr);
 
 			if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
 				goto func_exit;
@@ -412,25 +444,25 @@ the same clustered index unique key did not have any record, even delete
 marked, at the time of the insert.  InnoDB is eager in a rollback:
 if it figures out that an index record will be removed in the purge
 anyway, it will remove it in the rollback.
-@return	DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
-UNIV_INTERN
+@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
 dberr_t
 row_undo_ins(
 /*=========*/
-	undo_node_t*	node)	/*!< in: row undo node */
+	undo_node_t*	node,	/*!< in: row undo node */
+	que_thr_t*	thr)	/*!< in: query thread */
 {
 	dberr_t	err;
 	ibool	dict_locked;
 
 	ut_ad(node->state == UNDO_NODE_INSERT);
+	ut_ad(node->trx->in_rollback);
+	ut_ad(trx_undo_roll_ptr_is_insert(node->roll_ptr));
 
 	dict_locked = node->trx->dict_operation_lock_mode == RW_X_LATCH;
 
 	row_undo_ins_parse_undo_rec(node, dict_locked);
 
 	if (node->table == NULL) {
-		trx_undo_rec_release(node->trx, node->undo_no);
-
 		return(DB_SUCCESS);
 	}
 
@@ -443,7 +475,7 @@ row_undo_ins(
 
 	dict_table_skip_corrupt_index(node->index);
 
-	err = row_undo_ins_remove_sec_rec(node);
+	err = row_undo_ins_remove_sec_rec(node, thr);
 
 	if (err == DB_SUCCESS) {
 
diff --git a/storage/innobase/row/row0umod.cc b/storage/innobase/row/row0umod.cc
index a64e41786d6..81c86f8b6f7 100644
--- a/storage/innobase/row/row0umod.cc
+++ b/storage/innobase/row/row0umod.cc
@@ -23,6 +23,8 @@ Undo modify of a row
 Created 2/27/1997 Heikki Tuuri
 *******************************************************/
 
+#include "ha_prototypes.h"
+
 #include "row0umod.h"
 
 #ifdef UNIV_NONINL
@@ -71,7 +73,7 @@ introduced where a call to log_free_check() is bypassed. */
 
 /***********************************************************//**
 Undoes a modify in a clustered index record.
-@return	DB_SUCCESS, DB_FAIL, or error code: we may run out of file space */
+@return DB_SUCCESS, DB_FAIL, or error code: we may run out of file space */
 static MY_ATTRIBUTE((nonnull, warn_unused_result))
 dberr_t
 row_undo_mod_clust_low(
@@ -159,7 +161,6 @@ dberr_t
 row_undo_mod_remove_clust_low(
 /*==========================*/
 	undo_node_t*	node,	/*!< in: row undo node */
-	que_thr_t*	thr,	/*!< in: query thread */
 	mtr_t*		mtr,	/*!< in/out: mini-transaction */
 	ulint		mode)	/*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
 {
@@ -173,7 +174,9 @@ row_undo_mod_remove_clust_low(
 	or if we can remove it. */
 
 	if (!btr_pcur_restore_position(mode, &node->pcur, mtr)
-	    || row_vers_must_preserve_del_marked(node->new_trx_id, mtr)) {
+	    || row_vers_must_preserve_del_marked(node->new_trx_id,
+						 node->table->name,
+						 mtr)) {
 
 		return(DB_SUCCESS);
 	}
@@ -210,36 +213,6 @@ row_undo_mod_remove_clust_low(
 		return(DB_SUCCESS);
 	}
 
-	trx_id_offset = btr_cur_get_index(btr_cur)->trx_id_offset;
-
-	if (!trx_id_offset) {
-		mem_heap_t*	heap	= NULL;
-		ulint		trx_id_col;
-		const ulint*	offsets;
-		ulint		len;
-
-		trx_id_col = dict_index_get_sys_col_pos(
-			btr_cur_get_index(btr_cur), DATA_TRX_ID);
-		ut_ad(trx_id_col > 0);
-		ut_ad(trx_id_col != ULINT_UNDEFINED);
-
-		offsets = rec_get_offsets(
-			btr_cur_get_rec(btr_cur), btr_cur_get_index(btr_cur),
-			NULL, trx_id_col + 1, &heap);
-
-		trx_id_offset = rec_get_nth_field_offs(
-			offsets, trx_id_col, &len);
-		ut_ad(len == DATA_TRX_ID_LEN);
-		mem_heap_free(heap);
-	}
-
-	if (trx_read_trx_id(btr_cur_get_rec(btr_cur) + trx_id_offset)
-	    != node->new_trx_id) {
-		/* The record must have been purged and then replaced
-		with a different one. */
-		return(DB_SUCCESS);
-	}
-
 	/* We are about to remove an old, delete-marked version of the
 	record that may have been delete-marked by a different transaction
 	than the rolling-back one. */
@@ -251,15 +224,17 @@ row_undo_mod_remove_clust_low(
 			? DB_SUCCESS
 			: DB_FAIL;
 	} else {
-		ut_ad(mode == BTR_MODIFY_TREE);
+		ut_ad(mode == (BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE));
 
 		/* This operation is analogous to purge, we can free also
-		inherited externally stored fields */
+		inherited externally stored fields.
+		We can also assume that the record was complete
+		(including BLOBs), because it had been delete-marked
+		after it had been completely inserted. Therefore, we
+		are passing rollback=false, just like purge does. */
 
 		btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0,
-					   thr_is_recv(thr)
-					   ? RB_RECOVERY_PURGE_REC
-					   : RB_NONE, mtr);
+					   false, mtr);
 
 		/* The delete operation may fail if we have little
 		file space left: TODO: easiest to crash the database
@@ -272,7 +247,7 @@ row_undo_mod_remove_clust_low(
 /***********************************************************//**
 Undoes a modify in a clustered index record. Sets also the node state for the
 next round of undo.
-@return	DB_SUCCESS or error code: we may run out of file space */
+@return DB_SUCCESS or error code: we may run out of file space */
 static MY_ATTRIBUTE((nonnull, warn_unused_result))
 dberr_t
 row_undo_mod_clust(
@@ -288,16 +263,17 @@ row_undo_mod_clust(
 
 	ut_ad(thr_get_trx(thr) == node->trx);
 	ut_ad(node->trx->dict_operation_lock_mode);
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_SHARED)
-	      || rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(node->trx->in_rollback);
+	ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_S)
+	      || rw_lock_own(dict_operation_lock, RW_LOCK_X));
 
 	log_free_check();
 	pcur = &node->pcur;
 	index = btr_cur_get_index(btr_pcur_get_btr_cur(pcur));
 
-	mtr_start_trx(&mtr, thr_get_trx(thr));
+	mtr_start(&mtr);
+	mtr.set_named_space(index->space);
+	dict_disable_redo_if_temporary(index->table, &mtr);
 
 	online = dict_index_is_online_ddl(index);
 	if (online) {
@@ -327,6 +303,8 @@ row_undo_mod_clust(
 		descent down the index tree */
 
 		mtr_start_trx(&mtr, thr_get_trx(thr));
+		mtr.set_named_space(index->space);
+		dict_disable_redo_if_temporary(index->table, &mtr);
 
 		err = row_undo_mod_clust_low(
 			node, &offsets, &offsets_heap,
@@ -340,23 +318,27 @@ row_undo_mod_clust(
 	ut_ad(online || !dict_index_is_online_ddl(index));
 
 	if (err == DB_SUCCESS && online) {
-#ifdef UNIV_SYNC_DEBUG
-		ut_ad(rw_lock_own(&index->lock, RW_LOCK_SHARED)
-		      || rw_lock_own(&index->lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+
+		ut_ad(rw_lock_own_flagged(
+				&index->lock,
+				RW_LOCK_FLAG_S | RW_LOCK_FLAG_X
+				| RW_LOCK_FLAG_SX));
+
 		switch (node->rec_type) {
 		case TRX_UNDO_DEL_MARK_REC:
 			row_log_table_insert(
-				btr_pcur_get_rec(pcur), index, offsets);
+				btr_pcur_get_rec(pcur), node->row,
+				index, offsets);
 			break;
 		case TRX_UNDO_UPD_EXIST_REC:
 			row_log_table_update(
 				btr_pcur_get_rec(pcur), index, offsets,
-				rebuilt_old_pk);
+				rebuilt_old_pk, node->undo_row, node->row);
 			break;
 		case TRX_UNDO_UPD_DEL_REC:
 			row_log_table_delete(
-				btr_pcur_get_rec(pcur), index, offsets, sys);
+				btr_pcur_get_rec(pcur), node->row,
+				index, offsets, sys);
 			break;
 		default:
 			ut_ad(0);
@@ -379,12 +361,14 @@ row_undo_mod_clust(
 	if (err == DB_SUCCESS && node->rec_type == TRX_UNDO_UPD_DEL_REC) {
 
 		mtr_start_trx(&mtr, thr_get_trx(thr));
+		mtr.set_named_space(index->space);
+		dict_disable_redo_if_temporary(index->table, &mtr);
 
 		/* It is not necessary to call row_log_table,
 		because the record is delete-marked and would thus
 		be omitted from the rebuilt copy of the table. */
 		err = row_undo_mod_remove_clust_low(
-			node, thr, &mtr, BTR_MODIFY_LEAF);
+			node, &mtr, BTR_MODIFY_LEAF);
 		if (err != DB_SUCCESS) {
 			btr_pcur_commit_specify_mtr(pcur, &mtr);
 
@@ -392,9 +376,12 @@ row_undo_mod_clust(
 			pessimistic descent down the index tree */
 
 			mtr_start_trx(&mtr, thr_get_trx(thr));
+			mtr.set_named_space(index->space);
+			dict_disable_redo_if_temporary(index->table, &mtr);
 
-			err = row_undo_mod_remove_clust_low(node, thr, &mtr,
-							    BTR_MODIFY_TREE);
+			err = row_undo_mod_remove_clust_low(
+				node, &mtr,
+				BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE);
 
 			ut_ad(err == DB_SUCCESS
 			      || err == DB_OUT_OF_FILE_SPACE);
@@ -405,8 +392,6 @@ row_undo_mod_clust(
 
 	node->state = UNDO_NODE_FETCH_NEXT;
 
-	trx_undo_rec_release(node->trx, node->undo_no);
-
 	if (offsets_heap) {
 		mem_heap_free(offsets_heap);
 	}
@@ -416,7 +401,7 @@ row_undo_mod_clust(
 
 /***********************************************************//**
 Delete marks or removes a secondary index entry if found.
-@return	DB_SUCCESS, DB_FAIL, or DB_OUT_OF_FILE_SPACE */
+@return DB_SUCCESS, DB_FAIL, or DB_OUT_OF_FILE_SPACE */
 static MY_ATTRIBUTE((nonnull, warn_unused_result))
 dberr_t
 row_undo_mod_del_mark_or_remove_sec_low(
@@ -435,22 +420,29 @@ row_undo_mod_del_mark_or_remove_sec_low(
 	dberr_t			err	= DB_SUCCESS;
 	mtr_t			mtr;
 	mtr_t			mtr_vers;
-	enum row_search_result	search_result;
+	row_search_result	search_result;
+	ibool			modify_leaf = false;
 
 	log_free_check();
-	mtr_start_trx(&mtr, thr_get_trx(thr));
+	//mtr_start_trx(&mtr, thr_get_trx(thr));
+	mtr_start(&mtr);
+	mtr.set_named_space(index->space);
+	dict_disable_redo_if_temporary(index->table, &mtr);
 
-	if (*index->name == TEMP_INDEX_PREFIX) {
-		/* The index->online_status may change if the
-		index->name starts with TEMP_INDEX_PREFIX (meaning
-		that the index is or was being created online). It is
-		protected by index->lock. */
+	if (mode == BTR_MODIFY_LEAF) {
+		modify_leaf = true;
+	}
+
+	if (!index->is_committed()) {
+		/* The index->online_status may change if the index is
+		or was being created online, but not committed yet. It
+		is protected by index->lock. */
 		if (mode == BTR_MODIFY_LEAF) {
 			mode = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED;
 			mtr_s_lock(dict_index_get_lock(index), &mtr);
 		} else {
-			ut_ad(mode == BTR_MODIFY_TREE);
-			mtr_x_lock(dict_index_get_lock(index), &mtr);
+			ut_ad(mode == (BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE));
+			mtr_sx_lock(dict_index_get_lock(index), &mtr);
 		}
 
 		if (row_log_online_op_try(index, entry, 0)) {
@@ -458,13 +450,21 @@ row_undo_mod_del_mark_or_remove_sec_low(
 		}
 	} else {
 		/* For secondary indexes,
-		index->online_status==ONLINE_INDEX_CREATION unless
-		index->name starts with TEMP_INDEX_PREFIX. */
+		index->online_status==ONLINE_INDEX_COMPLETE if
+		index->is_committed(). */
 		ut_ad(!dict_index_is_online_ddl(index));
 	}
 
 	btr_cur = btr_pcur_get_btr_cur(&pcur);
 
+	if (dict_index_is_spatial(index)) {
+		if (mode & BTR_MODIFY_LEAF) {
+			btr_cur->thr = thr;
+			mode |= BTR_RTREE_DELETE_MARK;
+		}
+		mode |= BTR_RTREE_UNDO_INS;
+	}
+
 	search_result = row_search_index_entry(index, entry, mode,
 					       &pcur, &mtr);
 
@@ -502,7 +502,8 @@ row_undo_mod_del_mark_or_remove_sec_low(
 
 	old_has = row_vers_old_has_index_entry(FALSE,
 					       btr_pcur_get_rec(&(node->pcur)),
-					       &mtr_vers, index, entry);
+					       &mtr_vers, index, entry,
+					       0, 0);
 	if (old_has) {
 		err = btr_cur_del_mark_set_sec_rec(BTR_NO_LOCKING_FLAG,
 						   btr_cur, TRUE, thr, &mtr);
@@ -510,7 +511,17 @@ row_undo_mod_del_mark_or_remove_sec_low(
 	} else {
 		/* Remove the index record */
 
-		if (mode != BTR_MODIFY_TREE) {
+		if (dict_index_is_spatial(index)) {
+			rec_t*	rec = btr_pcur_get_rec(&pcur);
+			if (rec_get_deleted_flag(rec,
+						 dict_table_is_comp(index->table))) {
+				ib::error() << "Record found in index "
+					<< index->name << " is deleted marked"
+					" on rollback update.";
+			}
+		}
+
+		if (modify_leaf) {
 			success = btr_cur_optimistic_delete(btr_cur, 0, &mtr);
 			if (success) {
 				err = DB_SUCCESS;
@@ -518,15 +529,13 @@ row_undo_mod_del_mark_or_remove_sec_low(
 				err = DB_FAIL;
 			}
 		} else {
-			/* No need to distinguish RB_RECOVERY_PURGE here,
+			/* Passing rollback=false,
 			because we are deleting a secondary index record:
-			the distinction between RB_NORMAL and
-			RB_RECOVERY_PURGE only matters when deleting a
-			record that contains externally stored
-			columns. */
+			the distinction only matters when deleting a
+			record that contains externally stored columns. */
 			ut_ad(!dict_index_is_clust(index));
 			btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0,
-						   RB_NORMAL, &mtr);
+						   false, &mtr);
 
 			/* The delete operation may fail if we have little
 			file space left: TODO: easiest to crash the database
@@ -552,7 +561,7 @@ return to the original values because we do not know them. But this should
 not cause problems because in row0sel.cc, in queries we always retrieve the
 clustered index record or an earlier version of it, if the secondary index
 record through which we do the search is delete-marked.
-@return	DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
+@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
 static MY_ATTRIBUTE((nonnull, warn_unused_result))
 dberr_t
 row_undo_mod_del_mark_or_remove_sec(
@@ -572,7 +581,7 @@ row_undo_mod_del_mark_or_remove_sec(
 	}
 
 	err = row_undo_mod_del_mark_or_remove_sec_low(node, thr, index,
-						      entry, BTR_MODIFY_TREE);
+		entry, BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE);
 	return(err);
 }
 
@@ -581,10 +590,10 @@ Delete unmarks a secondary index entry which must be found. It might not be
 delete-marked at the moment, but it does not harm to unmark it anyway. We also
 need to update the fields of the secondary index record if we updated its
 fields but alphabetically they stayed the same, e.g., 'abc' -> 'aBc'.
-@retval	DB_SUCCESS on success
-@retval	DB_FAIL if BTR_MODIFY_TREE should be tried
-@retval	DB_OUT_OF_FILE_SPACE when running out of tablespace
-@retval	DB_DUPLICATE_KEY if the value was missing
+@retval DB_SUCCESS on success
+@retval DB_FAIL if BTR_MODIFY_TREE should be tried
+@retval DB_OUT_OF_FILE_SPACE when running out of tablespace
+@retval DB_DUPLICATE_KEY if the value was missing
 	and an insert would lead to a duplicate exists */
 static MY_ATTRIBUTE((nonnull, warn_unused_result))
 dberr_t
@@ -605,24 +614,37 @@ row_undo_mod_del_unmark_sec_and_undo_update(
 	trx_t*			trx		= thr_get_trx(thr);
 	const ulint		flags
 		= BTR_KEEP_SYS_FLAG | BTR_NO_LOCKING_FLAG;
-	enum row_search_result	search_result;
+	row_search_result	search_result;
+	ulint			orig_mode = mode;
 
-	ut_ad(trx->id);
+	ut_ad(trx->id != 0);
 
+	/* FIXME: Currently we do a 2-pass search for the undo due to
+	avoid undel-mark a wrong rec in rolling back in partial update.
+	Later, we could log some info in secondary index updates to avoid
+	this. */
+	if (dict_index_is_spatial(index)) {
+		ut_ad(mode & BTR_MODIFY_LEAF);
+		mode |=  BTR_RTREE_DELETE_MARK;
+	}
+
+try_again:
 	log_free_check();
-	mtr_start_trx(&mtr, thr_get_trx(thr));
+	//mtr_start_trx(&mtr, thr_get_trx(thr));
+	mtr_start(&mtr);
+	mtr.set_named_space(index->space);
+	dict_disable_redo_if_temporary(index->table, &mtr);
 
-	if (*index->name == TEMP_INDEX_PREFIX) {
-		/* The index->online_status may change if the
-		index->name starts with TEMP_INDEX_PREFIX (meaning
-		that the index is or was being created online). It is
-		protected by index->lock. */
+	if (!index->is_committed()) {
+		/* The index->online_status may change if the index is
+		or was being created online, but not committed yet. It
+		is protected by index->lock. */
 		if (mode == BTR_MODIFY_LEAF) {
 			mode = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED;
 			mtr_s_lock(dict_index_get_lock(index), &mtr);
 		} else {
 			ut_ad(mode == BTR_MODIFY_TREE);
-			mtr_x_lock(dict_index_get_lock(index), &mtr);
+			mtr_sx_lock(dict_index_get_lock(index), &mtr);
 		}
 
 		if (row_log_online_op_try(index, entry, trx->id)) {
@@ -630,11 +652,13 @@ row_undo_mod_del_unmark_sec_and_undo_update(
 		}
 	} else {
 		/* For secondary indexes,
-		index->online_status==ONLINE_INDEX_CREATION unless
-		index->name starts with TEMP_INDEX_PREFIX. */
+		index->online_status==ONLINE_INDEX_COMPLETE if
+		index->is_committed(). */
 		ut_ad(!dict_index_is_online_ddl(index));
 	}
 
+	btr_cur->thr = thr;
+
 	search_result = row_search_index_entry(index, entry, mode,
 					       &pcur, &mtr);
 
@@ -649,7 +673,18 @@ row_undo_mod_del_unmark_sec_and_undo_update(
 		flags BTR_INSERT, BTR_DELETE, or BTR_DELETE_MARK. */
 		ut_error;
 	case ROW_NOT_FOUND:
-		if (*index->name != TEMP_INDEX_PREFIX) {
+		/* For spatial index, if first search didn't find an
+		undel-marked rec, try to find a del-marked rec. */
+		if (dict_index_is_spatial(index) && btr_cur->rtr_info->fd_del) {
+			if (mode != orig_mode) {
+				mode = orig_mode;
+				btr_pcur_close(&pcur);
+				mtr_commit(&mtr);
+				goto try_again;
+			}
+		}
+
+		if (index->is_committed()) {
 			/* During online secondary index creation, it
 			is possible that MySQL is waiting for a
 			meta-data lock upgrade before invoking
@@ -658,34 +693,20 @@ row_undo_mod_del_unmark_sec_and_undo_update(
 			finished building the index, but it does not
 			yet exist in MySQL. In this case, we suppress
 			the printout to the error log. */
-			fputs("InnoDB: error in sec index entry del undo in\n"
-			      "InnoDB: ", stderr);
-			dict_index_name_print(stderr, trx, index);
-			fputs("\n"
-			      "InnoDB: tuple ", stderr);
-			dtuple_print(stderr, entry);
-			fputs("\n"
-			      "InnoDB: record ", stderr);
-			rec_print(stderr, btr_pcur_get_rec(&pcur), index);
-			putc('\n', stderr);
-			trx_print(stderr, trx, 0);
-			fputs("\n"
-			      "InnoDB: Submit a detailed bug report"
-			      " to http://bugs.mysql.com\n", stderr);
-
-			ib_logf(IB_LOG_LEVEL_WARN,
-				"record in index %s was not found"
-				" on rollback, trying to insert",
-				index->name);
+			ib::warn() << "Record in index " << index->name
+				<< " of table " << index->table->name
+				<< " was not found on rollback, trying to"
+				" insert: " << *entry
+				<< " at: " << rec_index_print(
+					btr_cur_get_rec(btr_cur), index);
 		}
 
 		if (btr_cur->up_match >= dict_index_get_n_unique(index)
 		    || btr_cur->low_match >= dict_index_get_n_unique(index)) {
-			if (*index->name != TEMP_INDEX_PREFIX) {
-				ib_logf(IB_LOG_LEVEL_WARN,
-					"record in index %s was not found on"
-					" rollback, and a duplicate exists",
-					index->name);
+			if (index->is_committed()) {
+				ib::warn() << "Record in index " << index->name
+					<< " was not found on rollback, and"
+					" a duplicate exists";
 			}
 			err = DB_DUPLICATE_KEY;
 			break;
@@ -731,6 +752,7 @@ row_undo_mod_del_unmark_sec_and_undo_update(
 		err = btr_cur_del_mark_set_sec_rec(
 			BTR_NO_LOCKING_FLAG,
 			btr_cur, FALSE, thr, &mtr);
+
 		ut_a(err == DB_SUCCESS);
 		heap = mem_heap_create(
 			sizeof(upd_t)
@@ -798,7 +820,7 @@ row_undo_mod_sec_flag_corrupted(
 		we can only mark the index corrupted in the
 		data dictionary cache. TODO: fix this somehow.*/
 		mutex_enter(&dict_sys->mutex);
-		dict_set_corrupted_index_cache_only(index, index->table);
+		dict_set_corrupted_index_cache_only(index);
 		mutex_exit(&dict_sys->mutex);
 		break;
 	default:
@@ -813,7 +835,7 @@ row_undo_mod_sec_flag_corrupted(
 
 /***********************************************************//**
 Undoes a modify in secondary indexes when undo record type is UPD_DEL.
-@return	DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
+@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
 static MY_ATTRIBUTE((nonnull, warn_unused_result))
 dberr_t
 row_undo_mod_upd_del_sec(
@@ -880,7 +902,7 @@ row_undo_mod_upd_del_sec(
 
 /***********************************************************//**
 Undoes a modify in secondary indexes when undo record type is DEL_MARK.
-@return	DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
+@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
 static MY_ATTRIBUTE((nonnull, warn_unused_result))
 dberr_t
 row_undo_mod_del_mark_sec(
@@ -948,7 +970,7 @@ row_undo_mod_del_mark_sec(
 
 /***********************************************************//**
 Undoes a modify in secondary indexes when undo record type is UPD_EXIST.
-@return	DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
+@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
 static MY_ATTRIBUTE((nonnull, warn_unused_result))
 dberr_t
 row_undo_mod_upd_exist_sec(
@@ -968,15 +990,31 @@ row_undo_mod_upd_exist_sec(
 
 	heap = mem_heap_create(1024);
 
+
 	while (node->index != NULL) {
 		dict_index_t*	index	= node->index;
 		dtuple_t*	entry;
 
-		if (index->type == DICT_FTS
-		    || !row_upd_changes_ord_field_binary(
-			index, node->update, thr, node->row, node->ext)) {
-			dict_table_next_uncorrupted_index(node->index);
-			continue;
+		if (dict_index_is_spatial(index)) {
+			if (!row_upd_changes_ord_field_binary_func(
+				index, node->update,
+#ifdef UNIV_DEBUG
+				thr,
+#endif /* UNIV_DEBUG */
+                                node->row,
+				node->ext, ROW_BUILD_FOR_UNDO)) {
+				dict_table_next_uncorrupted_index(node->index);
+				continue;
+			}
+		} else {
+			if (index->type == DICT_FTS
+			    || !row_upd_changes_ord_field_binary(index,
+								 node->update,
+								 thr, node->row,
+								 node->ext)) {
+				dict_table_next_uncorrupted_index(node->index);
+				continue;
+			}
 		}
 
 		/* Build the newest version of the index entry */
@@ -1034,9 +1072,17 @@ row_undo_mod_upd_exist_sec(
 		the secondary index record if we updated its fields
 		but alphabetically they stayed the same, e.g.,
 		'abc' -> 'aBc'. */
-		entry = row_build_index_entry(node->undo_row,
-					      node->undo_ext,
-					      index, heap);
+		if (dict_index_is_spatial(index)) {
+			entry = row_build_index_entry_low(node->undo_row,
+							  node->undo_ext,
+							  index, heap,
+							  ROW_BUILD_FOR_UNDO);
+		} else {
+			entry = row_build_index_entry(node->undo_row,
+						      node->undo_ext,
+						      index, heap);
+		}
+
 		ut_a(entry);
 
 		err = row_undo_mod_del_unmark_sec_and_undo_update(
@@ -1115,7 +1161,7 @@ row_undo_mod_parse_undo_rec(
 	ptr = trx_undo_rec_get_row_ref(ptr, clust_index, &(node->ref),
 				       node->heap);
 
-	trx_undo_update_rec_get_update(ptr, clust_index, type, trx_id,
+	ptr = trx_undo_update_rec_get_update(ptr, clust_index, type, trx_id,
 				       roll_ptr, info_bits, node->trx,
 				       node->heap, &(node->update));
 	node->new_trx_id = trx_id;
@@ -1127,12 +1173,19 @@ row_undo_mod_parse_undo_rec(
 
 		node->table = NULL;
 	}
+
+	/* Extract indexed virtual columns from undo log */
+	if (node->table && node->table->n_v_cols) {
+		row_upd_replace_vcol(node->row, node->table,
+				     node->update, false, node->undo_row,
+				     (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)
+					? NULL : ptr);
+	}
 }
 
 /***********************************************************//**
 Undoes a modify operation on a row of a table.
-@return	DB_SUCCESS or error code */
-UNIV_INTERN
+@return DB_SUCCESS or error code */
 dberr_t
 row_undo_mod(
 /*=========*/
@@ -1145,6 +1198,8 @@ row_undo_mod(
 	ut_ad(node != NULL);
 	ut_ad(thr != NULL);
 	ut_ad(node->state == UNDO_NODE_MODIFY);
+	ut_ad(node->trx->in_rollback);
+	ut_ad(!trx_undo_roll_ptr_is_insert(node->roll_ptr));
 
 	dict_locked = thr_get_trx(thr)->dict_operation_lock_mode == RW_X_LATCH;
 
@@ -1156,7 +1211,6 @@ row_undo_mod(
 		/* It is already undone, or will be undone by another query
 		thread, or table was dropped */
 
-		trx_undo_rec_release(node->trx, node->undo_no);
 		node->state = UNDO_NODE_FETCH_NEXT;
 
 		return(DB_SUCCESS);
diff --git a/storage/innobase/row/row0undo.cc b/storage/innobase/row/row0undo.cc
index 149dc671930..f72e8aa14ea 100644
--- a/storage/innobase/row/row0undo.cc
+++ b/storage/innobase/row/row0undo.cc
@@ -23,6 +23,8 @@ Row undo
 Created 1/8/1997 Heikki Tuuri
 *******************************************************/
 
+#include "ha_prototypes.h"
+
 #include "row0undo.h"
 
 #ifdef UNIV_NONINL
@@ -122,18 +124,19 @@ or if the roll ptr is NULL, i.e., it was a fresh insert. */
 
 /********************************************************************//**
 Creates a row undo node to a query graph.
-@return	own: undo node */
-UNIV_INTERN
+@return own: undo node */
 undo_node_t*
 row_undo_node_create(
 /*=================*/
-	trx_t*		trx,	/*!< in: transaction */
+	trx_t*		trx,	/*!< in/out: transaction */
 	que_thr_t*	parent,	/*!< in: parent node, i.e., a thr node */
 	mem_heap_t*	heap)	/*!< in: memory heap where created */
 {
 	undo_node_t*	undo;
 
-	ut_ad(trx && parent && heap);
+	ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE)
+	      || trx_state_eq(trx, TRX_STATE_PREPARED));
+	ut_ad(parent);
 
 	undo = static_cast<undo_node_t*>(
 		mem_heap_alloc(heap, sizeof(undo_node_t)));
@@ -156,50 +159,46 @@ Looks for the clustered index record when node has the row reference.
 The pcur in node is used in the search. If found, stores the row to node,
 and stores the position of pcur, and detaches it. The pcur must be closed
 by the caller in any case.
-@return TRUE if found; NOTE the node->pcur must be closed by the
+@return true if found; NOTE the node->pcur must be closed by the
 caller, regardless of the return value */
-UNIV_INTERN
-ibool
+bool
 row_undo_search_clust_to_pcur(
 /*==========================*/
-	undo_node_t*	node)	/*!< in: row undo node */
+	undo_node_t*	node)	/*!< in/out: row undo node */
 {
 	dict_index_t*	clust_index;
-	ibool		found;
+	bool		found;
 	mtr_t		mtr;
-	ibool		ret;
-	rec_t*		rec;
+	row_ext_t**	ext;
+	const rec_t*	rec;
 	mem_heap_t*	heap		= NULL;
 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
 	ulint*		offsets		= offsets_;
 	rec_offs_init(offsets_);
 
 	mtr_start(&mtr);
+	dict_disable_redo_if_temporary(node->table, &mtr);
 
 	clust_index = dict_table_get_first_index(node->table);
 
-	found = row_search_on_row_ref(&(node->pcur), BTR_MODIFY_LEAF,
+	found = row_search_on_row_ref(&node->pcur, BTR_MODIFY_LEAF,
 				      node->table, node->ref, &mtr);
 
-	rec = btr_pcur_get_rec(&(node->pcur));
+	if (!found) {
+		goto func_exit;
+	}
+
+	rec = btr_pcur_get_rec(&node->pcur);
 
 	offsets = rec_get_offsets(rec, clust_index, offsets,
 				  ULINT_UNDEFINED, &heap);
 
-	if (!found || node->roll_ptr
-	    != row_get_rec_roll_ptr(rec, clust_index, offsets)) {
+	found = row_get_rec_roll_ptr(rec, clust_index, offsets)
+		== node->roll_ptr;
 
-		/* We must remove the reservation on the undo log record
-		BEFORE releasing the latch on the clustered index page: this
-		is to make sure that some thread will eventually undo the
-		modification corresponding to node->roll_ptr. */
-
-		/* fputs("--------------------undoing a previous version\n",
-		stderr); */
-
-		ret = FALSE;
-	} else {
-		row_ext_t**	ext;
+	if (found) {
+		ut_ad(row_get_rec_trx_id(rec, clust_index, offsets)
+		      == node->trx->id);
 
 		if (dict_table_get_format(node->table) >= UNIV_FORMAT_B) {
 			/* In DYNAMIC or COMPRESSED format, there is
@@ -218,6 +217,20 @@ row_undo_search_clust_to_pcur(
 		node->row = row_build(ROW_COPY_DATA, clust_index, rec,
 				      offsets, NULL,
 				      NULL, NULL, ext, node->heap);
+
+		/* We will need to parse out virtual column info from undo
+		log, first mark them DATA_MISSING. So we will know if the
+		value gets updated */
+		if (node->table->n_v_cols
+		    && node->state != UNDO_NODE_INSERT
+		    && !(node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
+			for (ulint i = 0;
+			     i < dict_table_get_n_v_cols(node->table); i++) {
+				dfield_get_type(dtuple_get_nth_v_field(
+					node->row, i))->mtype = DATA_MISSING;
+			}
+		}
+
 		if (node->rec_type == TRX_UNDO_UPD_EXIST_REC) {
 			node->undo_row = dtuple_copy(node->row, node->heap);
 			row_upd_replace(node->undo_row, &node->undo_ext,
@@ -227,25 +240,24 @@ row_undo_search_clust_to_pcur(
 			node->undo_ext = NULL;
 		}
 
-		btr_pcur_store_position(&(node->pcur), &mtr);
-
-		ret = TRUE;
+		btr_pcur_store_position(&node->pcur, &mtr);
 	}
 
-	btr_pcur_commit_specify_mtr(&(node->pcur), &mtr);
-
-	if (UNIV_LIKELY_NULL(heap)) {
+	if (heap) {
 		mem_heap_free(heap);
 	}
-	return(ret);
+
+func_exit:
+	btr_pcur_commit_specify_mtr(&node->pcur, &mtr);
+	return(found);
 }
 
 /***********************************************************//**
 Fetches an undo log record and does the undo for the recorded operation.
 If none left, or a partial rollback completed, returns control to the
 parent node, which is always a query thread node.
-@return	DB_SUCCESS if operation successfully completed, else error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
+@return DB_SUCCESS if operation successfully completed, else error code */
+static MY_ATTRIBUTE((warn_unused_result))
 dberr_t
 row_undo(
 /*=====*/
@@ -261,18 +273,26 @@ row_undo(
 	ut_ad(thr != NULL);
 
 	trx = node->trx;
+	ut_ad(trx->in_rollback);
 
 	if (node->state == UNDO_NODE_FETCH_NEXT) {
 
-		node->undo_rec = trx_roll_pop_top_rec_of_trx(trx,
-							     trx->roll_limit,
-							     &roll_ptr,
-							     node->heap);
+		node->undo_rec = trx_roll_pop_top_rec_of_trx(
+			trx, trx->roll_limit, &roll_ptr, node->heap);
+
 		if (!node->undo_rec) {
 			/* Rollback completed for this query thread */
 
 			thr->run_node = que_node_get_parent(node);
 
+			/* Mark any partial rollback completed, so
+			that if the transaction object is committed
+			and reused later, the roll_limit will remain
+			at 0. trx->roll_limit will be nonzero during a
+			partial rollback only. */
+			trx->roll_limit = 0;
+			ut_d(trx->in_rollback = false);
+
 			return(DB_SUCCESS);
 		}
 
@@ -301,7 +321,7 @@ row_undo(
 
 	if (node->state == UNDO_NODE_INSERT) {
 
-		err = row_undo_ins(node);
+		err = row_undo_ins(node, thr);
 
 		node->state = UNDO_NODE_FETCH_NEXT;
 	} else {
@@ -327,8 +347,7 @@ row_undo(
 /***********************************************************//**
 Undoes a row operation in a table. This is a high-level function used
 in SQL execution graphs.
-@return	query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
 que_thr_t*
 row_undo_step(
 /*==========*/
@@ -355,21 +374,12 @@ row_undo_step(
 	if (err != DB_SUCCESS) {
 		/* SQL error detected */
 
-		fprintf(stderr, "InnoDB: Fatal error (%s) in rollback.\n",
-			ut_strerr(err));
-
 		if (err == DB_OUT_OF_FILE_SPACE) {
-			fprintf(stderr,
-				"InnoDB: Out of tablespace.\n"
-				"InnoDB: Consider increasing"
-				" your tablespace.\n");
-
-			exit(1);
+			ib::fatal() << "Out of tablespace during rollback."
+				" Consider increasing your tablespace.";
 		}
 
-		ut_error;
-
-		return(NULL);
+		ib::fatal() << "Error (" << ut_strerr(err) << ") in rollback.";
 	}
 
 	return(thr);
diff --git a/storage/innobase/row/row0upd.cc b/storage/innobase/row/row0upd.cc
index 60b490228b1..b9552344a3b 100644
--- a/storage/innobase/row/row0upd.cc
+++ b/storage/innobase/row/row0upd.cc
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2015, 2016, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -23,14 +24,16 @@ Update of a row
 Created 12/27/1996 Heikki Tuuri
 *******************************************************/
 
+#include "ha_prototypes.h"
+
 #include "row0upd.h"
 
 #ifdef UNIV_NONINL
 #include "row0upd.ic"
 #endif
 
-#include "ha_prototypes.h"
 #include "dict0dict.h"
+#include "dict0mem.h"
 #include "trx0undo.h"
 #include "rem0rec.h"
 #ifndef UNIV_HOTBACKUP
@@ -51,6 +54,9 @@ Created 12/27/1996 Heikki Tuuri
 #include "pars0sym.h"
 #include "eval0eval.h"
 #include "buf0lru.h"
+#include "trx0rec.h"
+#include "fts0fts.h"
+#include "fts0types.h"
 #include <algorithm>
 
 #include <mysql/plugin.h>
@@ -111,7 +117,7 @@ introduced where a call to log_free_check() is bypassed. */
 Checks if an update vector changes some of the first ordering fields of an
 index record. This is only used in foreign key checks and we can assume
 that index does not contain column prefixes.
-@return	TRUE if changes */
+@return TRUE if changes */
 static
 ibool
 row_upd_changes_first_fields_binary(
@@ -174,12 +180,10 @@ wsrep_row_upd_index_is_foreign(
 	trx_t*		trx)	/*!< in: transaction */
 {
 	dict_table_t*	table		= index->table;
-	dict_foreign_t*	foreign;
 	ibool		froze_data_dict	= FALSE;
 	ibool		is_referenced	= FALSE;
 
 	if (table->foreign_set.empty()) {
-
 		return(FALSE);
 	}
 
@@ -188,21 +192,13 @@ wsrep_row_upd_index_is_foreign(
 		froze_data_dict = TRUE;
 	}
 
-	for (dict_foreign_set::iterator it= table->foreign_set.begin();
-	     it != table->foreign_set.end();
-	     ++ it)
-	{
-		foreign= *it;
+	dict_foreign_set::iterator	it
+		= std::find_if(table->foreign_set.begin(),
+			       table->foreign_set.end(),
+			       dict_foreign_with_foreign_index(index));
 
-		if (foreign->foreign_index == index) {
+	is_referenced = (it != table->foreign_set.end());
 
-			is_referenced = TRUE;
-			goto func_exit;
-		}
-
-	}
-
-func_exit:
 	if (froze_data_dict) {
 		row_mysql_unfreeze_data_dictionary(trx);
 	}
@@ -218,7 +214,7 @@ under pcur.
 NOTE that this function will temporarily commit mtr and lose the
 pcur position!
 
-@return	DB_SUCCESS or an error code */
+@return DB_SUCCESS or an error code */
 static MY_ATTRIBUTE((nonnull, warn_unused_result))
 dberr_t
 row_upd_check_references_constraints(
@@ -241,9 +237,10 @@ row_upd_check_references_constraints(
 	dberr_t		err;
 	ibool		got_s_lock	= FALSE;
 
-	if (table->referenced_set.empty()) {
+	DBUG_ENTER("row_upd_check_references_constraints");
 
-		return(DB_SUCCESS);
+	if (table->referenced_set.empty()) {
+		DBUG_RETURN(DB_SUCCESS);
 	}
 
 	trx = thr_get_trx(thr);
@@ -267,8 +264,6 @@ row_upd_check_references_constraints(
 		row_mysql_freeze_data_dictionary(trx);
 	}
 
-run_again:
-
 	for (dict_foreign_set::iterator it = table->referenced_set.begin();
 	     it != table->referenced_set.end();
 	     ++it) {
@@ -296,12 +291,6 @@ run_again:
 					FALSE, FALSE, DICT_ERR_IGNORE_NONE);
 			}
 
-			if (foreign_table) {
-				os_inc_counter(dict_sys->mutex,
-					       foreign_table
-					       ->n_foreign_key_checks_running);
-			}
-
 			/* NOTE that if the thread ends up waiting for a lock
 			we will release dict_operation_lock temporarily!
 			But the counter on the table protects 'foreign' from
@@ -310,26 +299,18 @@ run_again:
 			err = row_ins_check_foreign_constraint(
 				FALSE, foreign, table, entry, thr);
 
-			if (foreign_table) {
-				os_dec_counter(dict_sys->mutex,
-					       foreign_table
-					       ->n_foreign_key_checks_running);
-			}
-
 			if (ref_table != NULL) {
 				dict_table_close(ref_table, FALSE, FALSE);
 			}
 
-			/* Some table foreign key dropped, try again */
-			if (err == DB_DICT_CHANGED) {
-				goto run_again;
-			} else if (err != DB_SUCCESS) {
+			if (err != DB_SUCCESS) {
 				goto func_exit;
 			}
 		}
 	}
 
 	err = DB_SUCCESS;
+
 func_exit:
 	if (got_s_lock) {
 		row_mysql_unfreeze_data_dictionary(trx);
@@ -337,8 +318,15 @@ func_exit:
 
 	mem_heap_free(heap);
 
-	return(err);
+	DEBUG_SYNC_C("foreign_constraint_check_for_update_done");
+
+	DBUG_EXECUTE_IF("row_upd_cascade_lock_wait_err",
+		err = DB_LOCK_WAIT;
+		DBUG_SET("-d,row_upd_cascade_lock_wait_err"););
+
+	DBUG_RETURN(err);
 }
+
 #ifdef WITH_WSREP
 static
 dberr_t
@@ -364,13 +352,12 @@ wsrep_row_upd_check_foreign_constraints(
 	ibool		opened     	= FALSE;
 
 	if (table->foreign_set.empty()) {
-
 		return(DB_SUCCESS);
 	}
 
 	trx = thr_get_trx(thr);
 
-        /* TODO: make native slave thread bail out here */
+	/* TODO: make native slave thread bail out here */
 
 	rec = btr_pcur_get_rec(pcur);
 	ut_ad(rec_offs_validate(rec, index, offsets));
@@ -390,12 +377,11 @@ wsrep_row_upd_check_foreign_constraints(
 		row_mysql_freeze_data_dictionary(trx);
 	}
 
-	for (dict_foreign_set::iterator it= table->foreign_set.begin();
+	for (dict_foreign_set::iterator it = table->foreign_set.begin();
 	     it != table->foreign_set.end();
-	     ++ it)
-	{
-		foreign= *it;
+	     ++it) {
 
+		foreign = *it;
 		/* Note that we may have an update which updates the index
 		record, but does NOT update the first fields which are
 		referenced in a foreign key constraint. Then the update does
@@ -415,12 +401,6 @@ wsrep_row_upd_check_foreign_constraints(
 				opened = (foreign->referenced_table) ? TRUE : FALSE;
 			}
 
-			if (foreign->referenced_table) {
-				os_inc_counter(dict_sys->mutex,
-					       foreign->referenced_table
-					       ->n_foreign_key_checks_running);
-			}
-
 			/* NOTE that if the thread ends up waiting for a lock
 			we will release dict_operation_lock temporarily!
 			But the counter on the table protects 'foreign' from
@@ -430,10 +410,6 @@ wsrep_row_upd_check_foreign_constraints(
 				TRUE, foreign, table, entry, thr);
 
 			if (foreign->referenced_table) {
-				os_dec_counter(dict_sys->mutex,
-					       foreign->referenced_table
-					       ->n_foreign_key_checks_running);
-
 				if (opened == TRUE) {
 					dict_table_close(foreign->referenced_table, FALSE, FALSE);
 					opened = FALSE;
@@ -441,11 +417,9 @@ wsrep_row_upd_check_foreign_constraints(
 			}
 
 			if (err != DB_SUCCESS) {
-
 				goto func_exit;
 			}
 		}
-
 	}
 
 	err = DB_SUCCESS;
@@ -456,16 +430,13 @@ func_exit:
 
 	mem_heap_free(heap);
 
-	DEBUG_SYNC_C("foreign_constraint_check_for_update_done");
-
 	return(err);
 }
 #endif /* WITH_WSREP */
 
 /*********************************************************************//**
 Creates an update node for a query graph.
-@return	own: update node */
-UNIV_INTERN
+@return own: update node */
 upd_node_t*
 upd_node_create(
 /*============*/
@@ -474,31 +445,13 @@ upd_node_create(
 	upd_node_t*	node;
 
 	node = static_cast<upd_node_t*>(
-		mem_heap_alloc(heap, sizeof(upd_node_t)));
+		mem_heap_zalloc(heap, sizeof(upd_node_t)));
 
 	node->common.type = QUE_NODE_UPDATE;
-
 	node->state = UPD_NODE_UPDATE_CLUSTERED;
-	node->in_mysql_interface = FALSE;
-
-	node->row = NULL;
-	node->ext = NULL;
-	node->upd_row = NULL;
-	node->upd_ext = NULL;
-	node->index = NULL;
-	node->update = NULL;
-
-	node->foreign = NULL;
-	node->cascade_heap = NULL;
-	node->cascade_node = NULL;
-
-	node->select = NULL;
-
 	node->heap = mem_heap_create(128);
 	node->magic_n = UPD_NODE_MAGIC_N;
 
-	node->cmpl_info = 0;
-
 	return(node);
 }
 #endif /* !UNIV_HOTBACKUP */
@@ -506,7 +459,6 @@ upd_node_create(
 /*********************************************************************//**
 Updates the trx id and roll ptr field in a clustered index record in database
 recovery. */
-UNIV_INTERN
 void
 row_upd_rec_sys_fields_in_recovery(
 /*===============================*/
@@ -539,7 +491,6 @@ row_upd_rec_sys_fields_in_recovery(
 #ifndef UNIV_HOTBACKUP
 /*********************************************************************//**
 Sets the trx id or roll ptr field of a clustered index entry. */
-UNIV_INTERN
 void
 row_upd_index_entry_sys_field(
 /*==========================*/
@@ -563,6 +514,7 @@ row_upd_index_entry_sys_field(
 	field = static_cast<byte*>(dfield_get_data(dfield));
 
 	if (type == DATA_TRX_ID) {
+		ut_ad(val > 0);
 		trx_write_trx_id(field, val);
 	} else {
 		ut_ad(type == DATA_ROLL_PTR);
@@ -575,7 +527,6 @@ Returns TRUE if row update changes size of some field in index or if some
 field to be updated is stored externally in rec or update.
 @return TRUE if the update changes the size of some field in index or
 the field is external in rec or update */
-UNIV_INTERN
 ibool
 row_upd_changes_field_size_or_external(
 /*===================================*/
@@ -596,6 +547,13 @@ row_upd_changes_field_size_or_external(
 	for (i = 0; i < n_fields; i++) {
 		upd_field = upd_get_nth_field(update, i);
 
+		/* We should ignore virtual field if the index is not
+		a virtual index */
+		if (upd_fld_is_virtual_col(upd_field)
+		    && dict_index_has_virtual(index) != DICT_VIRTUAL) {
+			continue;
+		}
+
 		new_val = &(upd_field->new_val);
 		new_len = dfield_get_len(new_val);
 
@@ -639,7 +597,6 @@ row_upd_changes_field_size_or_external(
 /***********************************************************//**
 Returns true if row update contains disowned external fields.
 @return true if the update contains disowned external fields. */
-UNIV_INTERN
 bool
 row_upd_changes_disowned_external(
 /*==============================*/
@@ -684,7 +641,6 @@ record given. No field size changes are allowed. This function is
 usually invoked on a clustered index. The only use case for a
 secondary index is row_ins_sec_index_entry_by_modify() or its
 counterpart in ibuf_insert_to_index_page(). */
-UNIV_INTERN
 void
 row_upd_rec_in_place(
 /*=================*/
@@ -711,49 +667,21 @@ row_upd_rec_in_place(
 	n_fields = upd_get_n_fields(update);
 
 	for (i = 0; i < n_fields; i++) {
-#ifdef UNIV_BLOB_DEBUG
-		btr_blob_dbg_t	b;
-		const byte*	field_ref	= NULL;
-#endif /* UNIV_BLOB_DEBUG */
-
 		upd_field = upd_get_nth_field(update, i);
+
+		/* No need to update virtual columns for non-virtual index */
+		if (upd_fld_is_virtual_col(upd_field)
+		    && !dict_index_has_virtual(index)) {
+			continue;
+		}
+
 		new_val = &(upd_field->new_val);
 		ut_ad(!dfield_is_ext(new_val) ==
 		      !rec_offs_nth_extern(offsets, upd_field->field_no));
-#ifdef UNIV_BLOB_DEBUG
-		if (dfield_is_ext(new_val)) {
-			ulint	len;
-			field_ref = rec_get_nth_field(rec, offsets, i, &len);
-			ut_a(len != UNIV_SQL_NULL);
-			ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
-			field_ref += len - BTR_EXTERN_FIELD_REF_SIZE;
-
-			b.ref_page_no = page_get_page_no(page_align(rec));
-			b.ref_heap_no = page_rec_get_heap_no(rec);
-			b.ref_field_no = i;
-			b.blob_page_no = mach_read_from_4(
-				field_ref + BTR_EXTERN_PAGE_NO);
-			ut_a(b.ref_field_no >= index->n_uniq);
-			btr_blob_dbg_rbt_delete(index, &b, "upd_in_place");
-		}
-#endif /* UNIV_BLOB_DEBUG */
 
 		rec_set_nth_field(rec, offsets, upd_field->field_no,
 				  dfield_get_data(new_val),
 				  dfield_get_len(new_val));
-
-#ifdef UNIV_BLOB_DEBUG
-		if (dfield_is_ext(new_val)) {
-			b.blob_page_no = mach_read_from_4(
-				field_ref + BTR_EXTERN_PAGE_NO);
-			b.always_owner = b.owner = !(field_ref[BTR_EXTERN_LEN]
-						     & BTR_EXTERN_OWNER_FLAG);
-			b.del = rec_get_deleted_flag(
-				rec, rec_offs_comp(offsets));
-
-			btr_blob_dbg_rbt_insert(index, &b, "upd_in_place");
-		}
-#endif /* UNIV_BLOB_DEBUG */
 	}
 
 	if (page_zip) {
@@ -765,8 +693,7 @@ row_upd_rec_in_place(
 /*********************************************************************//**
 Writes into the redo log the values of trx id and roll ptr and enough info
 to determine their positions within a clustered index record.
-@return	new pointer to mlog */
-UNIV_INTERN
+@return new pointer to mlog */
 byte*
 row_upd_write_sys_vals_to_log(
 /*==========================*/
@@ -787,7 +714,7 @@ row_upd_write_sys_vals_to_log(
 	trx_write_roll_ptr(log_ptr, roll_ptr);
 	log_ptr += DATA_ROLL_PTR_LEN;
 
-	log_ptr += mach_ull_write_compressed(log_ptr, trx_id);
+	log_ptr += mach_u64_write_compressed(log_ptr, trx_id);
 
 	return(log_ptr);
 }
@@ -795,18 +722,17 @@ row_upd_write_sys_vals_to_log(
 
 /*********************************************************************//**
 Parses the log data of system field values.
-@return	log data end or NULL */
-UNIV_INTERN
+@return log data end or NULL */
 byte*
 row_upd_parse_sys_vals(
 /*===================*/
-	byte*		ptr,	/*!< in: buffer */
-	byte*		end_ptr,/*!< in: buffer end */
+	const byte*	ptr,	/*!< in: buffer */
+	const byte*	end_ptr,/*!< in: buffer end */
 	ulint*		pos,	/*!< out: TRX_ID position in record */
 	trx_id_t*	trx_id,	/*!< out: trx id */
 	roll_ptr_t*	roll_ptr)/*!< out: roll ptr */
 {
-	ptr = mach_parse_compressed(ptr, end_ptr, pos);
+	*pos = mach_parse_compressed(&ptr, end_ptr);
 
 	if (ptr == NULL) {
 
@@ -821,15 +747,14 @@ row_upd_parse_sys_vals(
 	*roll_ptr = trx_read_roll_ptr(ptr);
 	ptr += DATA_ROLL_PTR_LEN;
 
-	ptr = mach_ull_parse_compressed(ptr, end_ptr, trx_id);
+	*trx_id = mach_u64_parse_compressed(&ptr, end_ptr);
 
-	return(ptr);
+	return(const_cast<byte*>(ptr));
 }
 
 #ifndef UNIV_HOTBACKUP
 /***********************************************************//**
 Writes to the redo log the new values of the fields occurring in the index. */
-UNIV_INTERN
 void
 row_upd_index_write_log(
 /*====================*/
@@ -874,7 +799,13 @@ row_upd_index_write_log(
 
 		len = dfield_get_len(new_val);
 
-		log_ptr += mach_write_compressed(log_ptr, upd_field->field_no);
+		/* If this is a virtual column, mark it using special
+		field_no */
+		ulint	field_no = upd_fld_is_virtual_col(upd_field)
+				   ? REC_MAX_N_FIELDS + upd_field->field_no
+				   : upd_field->field_no;
+
+		log_ptr += mach_write_compressed(log_ptr, field_no);
 		log_ptr += mach_write_compressed(log_ptr, len);
 
 		if (len != UNIV_SQL_NULL) {
@@ -903,13 +834,12 @@ row_upd_index_write_log(
 
 /*********************************************************************//**
 Parses the log data written by row_upd_index_write_log.
-@return	log data end or NULL */
-UNIV_INTERN
+@return log data end or NULL */
 byte*
 row_upd_index_parse(
 /*================*/
-	byte*		ptr,	/*!< in: buffer */
-	byte*		end_ptr,/*!< in: buffer end */
+	const byte*	ptr,	/*!< in: buffer */
+	const byte*	end_ptr,/*!< in: buffer end */
 	mem_heap_t*	heap,	/*!< in: memory heap where update vector is
 				built */
 	upd_t**		update_out)/*!< out: update vector */
@@ -929,7 +859,7 @@ row_upd_index_parse(
 
 	info_bits = mach_read_from_1(ptr);
 	ptr++;
-	ptr = mach_parse_compressed(ptr, end_ptr, &n_fields);
+	n_fields = mach_parse_compressed(&ptr, end_ptr);
 
 	if (ptr == NULL) {
 
@@ -944,16 +874,23 @@ row_upd_index_parse(
 		upd_field = upd_get_nth_field(update, i);
 		new_val = &(upd_field->new_val);
 
-		ptr = mach_parse_compressed(ptr, end_ptr, &field_no);
+		field_no = mach_parse_compressed(&ptr, end_ptr);
 
 		if (ptr == NULL) {
 
 			return(NULL);
 		}
 
+		/* Check if this is a virtual column, mark the prtype
+		if that is the case */
+		if (field_no >= REC_MAX_N_FIELDS) {
+			new_val->type.prtype |= DATA_VIRTUAL;
+			field_no -= REC_MAX_N_FIELDS;
+		}
+
 		upd_field->field_no = field_no;
 
-		ptr = mach_parse_compressed(ptr, end_ptr, &len);
+		len = mach_parse_compressed(&ptr, end_ptr);
 
 		if (ptr == NULL) {
 
@@ -977,7 +914,7 @@ row_upd_index_parse(
 
 	*update_out = update;
 
-	return(ptr);
+	return(const_cast<byte*>(ptr));
 }
 
 #ifndef UNIV_HOTBACKUP
@@ -985,8 +922,7 @@ row_upd_index_parse(
 Builds an update vector from those fields which in a secondary index entry
 differ from a record that has the equal ordering fields. NOTE: we compare
 the fields as binary strings!
-@return	own: update vector of differing fields */
-UNIV_INTERN
+@return own: update vector of differing fields */
 upd_t*
 row_upd_build_sec_rec_difference_binary(
 /*====================================*/
@@ -1048,27 +984,35 @@ row_upd_build_sec_rec_difference_binary(
 	return(update);
 }
 
-/***************************************************************//**
-Builds an update vector from those fields, excluding the roll ptr and
+/** Builds an update vector from those fields, excluding the roll ptr and
 trx id fields, which in an index entry differ from a record that has
 the equal ordering fields. NOTE: we compare the fields as binary strings!
+@param[in]	index		clustered index
+@param[in]	entry		clustered index entry to insert
+@param[in]	rec		clustered index record
+@param[in]	offsets		rec_get_offsets(rec,index), or NULL
+@param[in]	no_sys		skip the system columns
+				DB_TRX_ID and DB_ROLL_PTR
+@param[in]	trx		transaction (for diagnostics),
+				or NULL
+@param[in]	heap		memory heap from which allocated
+@param[in]	mysql_table	NULL, or mysql table object when
+				user thread invokes dml
 @return own: update vector of differing fields, excluding roll ptr and
 trx id */
-UNIV_INTERN
-const upd_t*
+upd_t*
 row_upd_build_difference_binary(
-/*============================*/
-	dict_index_t*	index,	/*!< in: clustered index */
-	const dtuple_t*	entry,	/*!< in: entry to insert */
-	const rec_t*	rec,	/*!< in: clustered index record */
-	const ulint*	offsets,/*!< in: rec_get_offsets(rec,index), or NULL */
-	bool		no_sys,	/*!< in: skip the system columns
-				DB_TRX_ID and DB_ROLL_PTR */
-	trx_t*		trx,	/*!< in: transaction */
-	mem_heap_t*	heap)	/*!< in: memory heap from which allocated */
+	dict_index_t*	index,
+	const dtuple_t*	entry,
+	const rec_t*	rec,
+	const ulint*	offsets,
+	bool		no_sys,
+	trx_t*		trx,
+	mem_heap_t*	heap,
+	TABLE*		mysql_table)
 {
 	upd_field_t*	upd_field;
-	const dfield_t*	dfield;
+	dfield_t*	dfield;
 	const byte*	data;
 	ulint		len;
 	upd_t*		update;
@@ -1076,18 +1020,21 @@ row_upd_build_difference_binary(
 	ulint		trx_id_pos;
 	ulint		i;
 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
+	ulint		n_fld = dtuple_get_n_fields(entry);
+	ulint		n_v_fld = dtuple_get_n_v_fields(entry);
 	rec_offs_init(offsets_);
 
 	/* This function is used only for a clustered index */
 	ut_a(dict_index_is_clust(index));
 
-	update = upd_create(dtuple_get_n_fields(entry), heap);
+	update = upd_create(n_fld + n_v_fld, heap);
 
 	n_diff = 0;
 
 	trx_id_pos = dict_index_get_sys_col_pos(index, DATA_TRX_ID);
-	ut_ad(dict_index_get_sys_col_pos(index, DATA_ROLL_PTR)
-	      == trx_id_pos + 1);
+	ut_ad(dict_table_is_intrinsic(index->table)
+	      || (dict_index_get_sys_col_pos(index, DATA_ROLL_PTR)
+			== trx_id_pos + 1));
 
 	if (!offsets) {
 		offsets = rec_get_offsets(rec, index, offsets_,
@@ -1096,7 +1043,7 @@ row_upd_build_difference_binary(
 		ut_ad(rec_offs_validate(rec, index, offsets));
 	}
 
-	for (i = 0; i < dtuple_get_n_fields(entry); i++) {
+	for (i = 0; i < n_fld; i++) {
 
 		data = rec_get_nth_field(rec, offsets, i, &len);
 
@@ -1104,10 +1051,17 @@ row_upd_build_difference_binary(
 
 		/* NOTE: we compare the fields as binary strings!
 		(No collation) */
+		if (no_sys) {
+			/* TRX_ID */
+			if (i == trx_id_pos) {
+				continue;
+			}
 
-		if (no_sys && (i == trx_id_pos || i == trx_id_pos + 1)) {
-
-			continue;
+			/* DB_ROLL_PTR */
+			if (i == trx_id_pos + 1
+			    && !dict_table_is_intrinsic(index->table)) {
+				continue;
+			}
 		}
 
 		if (!dfield_is_ext(dfield)
@@ -1124,35 +1078,103 @@ row_upd_build_difference_binary(
 		}
 	}
 
+#ifdef MYSQL_VIRTUAL_COLUMNS
+	/* Check the virtual columns updates. Even if there is no non-virtual
+	column (base columns) change, we will still need to build the
+	indexed virtual column value so that undo log would log them (
+	for purge/mvcc purpose) */
+	if (n_v_fld > 0) {
+		row_ext_t*	ext;
+		mem_heap_t*	v_heap = NULL;
+		THD*		thd;
+
+		if (trx == NULL) {
+			thd = current_thd;
+		} else {
+			thd = trx->mysql_thd;
+		}
+
+		ut_ad(!update->old_vrow);
+
+		for (i = 0; i < n_v_fld; i++) {
+			const dict_v_col_t*     col
+                                = dict_table_get_nth_v_col(index->table, i);
+
+			if (!col->m_col.ord_part) {
+				continue;
+			}
+
+			if (update->old_vrow == NULL) {
+				update->old_vrow = row_build(
+					ROW_COPY_POINTERS, index, rec, offsets,
+					index->table, NULL, NULL, &ext, heap);
+			}
+
+			dfield = dtuple_get_nth_v_field(entry, i);
+
+			dfield_t*	vfield = innobase_get_computed_value(
+				update->old_vrow, col, index,
+				&v_heap, heap, NULL, thd, mysql_table,
+				NULL, NULL, NULL);
+
+			if (!dfield_data_is_binary_equal(
+				dfield, vfield->len,
+				static_cast<byte*>(vfield->data))) {
+				upd_field = upd_get_nth_field(update, n_diff);
+
+				upd_field->old_v_val = static_cast<dfield_t*>(
+					mem_heap_alloc(
+						heap,
+						sizeof *upd_field->old_v_val));
+
+				dfield_copy(upd_field->old_v_val, vfield);
+
+				dfield_copy(&(upd_field->new_val), dfield);
+
+				upd_field_set_v_field_no(
+					upd_field, i, index);
+
+				n_diff++;
+
+			}
+		}
+
+		if (v_heap) {
+			mem_heap_free(v_heap);
+		}
+	}
+#endif /* MYSQL_VIRTUAL_COLUMNS */
+
 	update->n_fields = n_diff;
+	ut_ad(update->validate());
 
 	return(update);
 }
 
-/***********************************************************//**
-Fetch a prefix of an externally stored column.  This is similar
-to row_ext_lookup(), but the row_ext_t holds the old values
+/** Fetch a prefix of an externally stored column.
+This is similar to row_ext_lookup(), but the row_ext_t holds the old values
 of the column and must not be poisoned with the new values.
-@return	BLOB prefix */
+@param[in]	data		'internally' stored part of the field
+containing also the reference to the external part
+@param[in]	local_len	length of data, in bytes
+@param[in]	page_size	BLOB page size
+@param[in,out]	len		input - length of prefix to
+fetch; output: fetched length of the prefix
+@param[in,out]	heap		heap where to allocate
+@return BLOB prefix */
 static
 byte*
 row_upd_ext_fetch(
-/*==============*/
-	const byte*	data,		/*!< in: 'internally' stored part of the
-					field containing also the reference to
-					the external part */
-	ulint		local_len,	/*!< in: length of data, in bytes */
-	ulint		zip_size,	/*!< in: nonzero=compressed BLOB
-					page size, zero for uncompressed
-					BLOBs */
-	ulint*		len,		/*!< in: length of prefix to fetch;
-					out: fetched length of the prefix */
-	mem_heap_t*	heap)		/*!< in: heap where to allocate */
+	const byte*		data,
+	ulint			local_len,
+	const page_size_t&	page_size,
+	ulint*			len,
+	mem_heap_t*		heap)
 {
 	byte*	buf = static_cast<byte*>(mem_heap_alloc(heap, *len));
 
 	*len = btr_copy_externally_stored_field_prefix(
-		buf, *len, zip_size, data, local_len, NULL);
+		buf, *len, page_size, data, local_len);
 
 	/* We should never update records containing a half-deleted BLOB. */
 	ut_a(*len);
@@ -1160,22 +1182,24 @@ row_upd_ext_fetch(
 	return(buf);
 }
 
-/***********************************************************//**
-Replaces the new column value stored in the update vector in
-the given index entry field. */
+/** Replaces the new column value stored in the update vector in
+the given index entry field.
+@param[in,out]	dfield		data field of the index entry
+@param[in]	field		index field
+@param[in]	col		field->col
+@param[in]	uf		update field
+@param[in,out]	heap		memory heap for allocating and copying
+the new value
+@param[in]	page_size	page size */
 static
 void
 row_upd_index_replace_new_col_val(
-/*==============================*/
-	dfield_t*		dfield,	/*!< in/out: data field
-					of the index entry */
-	const dict_field_t*	field,	/*!< in: index field */
-	const dict_col_t*	col,	/*!< in: field->col */
-	const upd_field_t*	uf,	/*!< in: update field */
-	mem_heap_t*		heap,	/*!< in: memory heap for allocating
-					and copying the new value */
-	ulint			zip_size)/*!< in: compressed page
-					 size of the table, or 0 */
+	dfield_t*		dfield,
+	const dict_field_t*	field,
+	const dict_col_t*	col,
+	const upd_field_t*	uf,
+	mem_heap_t*		heap,
+	const page_size_t&	page_size)
 {
 	ulint		len;
 	const byte*	data;
@@ -1199,7 +1223,7 @@ row_upd_index_replace_new_col_val(
 
 			len = field->prefix_len;
 
-			data = row_upd_ext_fetch(data, l, zip_size,
+			data = row_upd_ext_fetch(data, l, page_size,
 						 &len, heap);
 		}
 
@@ -1258,7 +1282,6 @@ row_upd_index_replace_new_col_val(
 /***********************************************************//**
 Replaces the new column values stored in the update vector to the index entry
 given. */
-UNIV_INTERN
 void
 row_upd_index_replace_new_col_vals_index_pos(
 /*=========================================*/
@@ -1280,7 +1303,7 @@ row_upd_index_replace_new_col_vals_index_pos(
 {
 	ulint		i;
 	ulint		n_fields;
-	const ulint	zip_size	= dict_table_zip_size(index->table);
+	const page_size_t&	page_size = dict_table_page_size(index->table);
 
 	ut_ad(index);
 
@@ -1299,12 +1322,22 @@ row_upd_index_replace_new_col_vals_index_pos(
 
 		field = dict_index_get_nth_field(index, i);
 		col = dict_field_get_col(field);
-		uf = upd_get_field_by_field_no(update, i);
+		if (dict_col_is_virtual(col)) {
+			const dict_v_col_t*	vcol = reinterpret_cast<
+							const dict_v_col_t*>(
+								col);
+
+			uf = upd_get_field_by_field_no(
+				update, vcol->v_pos, true);
+		} else {
+			uf = upd_get_field_by_field_no(
+				update, i, false);
+		}
 
 		if (uf) {
 			row_upd_index_replace_new_col_val(
 				dtuple_get_nth_field(entry, i),
-				field, col, uf, heap, zip_size);
+				field, col, uf, heap, page_size);
 		}
 	}
 }
@@ -1312,7 +1345,6 @@ row_upd_index_replace_new_col_vals_index_pos(
 /***********************************************************//**
 Replaces the new column values stored in the update vector to the index entry
 given. */
-UNIV_INTERN
 void
 row_upd_index_replace_new_col_vals(
 /*===============================*/
@@ -1331,8 +1363,7 @@ row_upd_index_replace_new_col_vals(
 	ulint			i;
 	const dict_index_t*	clust_index
 		= dict_table_get_first_index(index->table);
-	const ulint		zip_size
-		= dict_table_zip_size(index->table);
+	const page_size_t&	page_size = dict_table_page_size(index->table);
 
 	dtuple_set_info_bits(entry, update->info_bits);
 
@@ -1343,20 +1374,173 @@ row_upd_index_replace_new_col_vals(
 
 		field = dict_index_get_nth_field(index, i);
 		col = dict_field_get_col(field);
-		uf = upd_get_field_by_field_no(
-			update, dict_col_get_clust_pos(col, clust_index));
+		if (dict_col_is_virtual(col)) {
+			const dict_v_col_t*	vcol = reinterpret_cast<
+							const dict_v_col_t*>(
+								col);
+
+			uf = upd_get_field_by_field_no(
+				update, vcol->v_pos, true);
+		} else {
+			uf = upd_get_field_by_field_no(
+				update,
+				dict_col_get_clust_pos(col, clust_index),
+				false);
+		}
 
 		if (uf) {
 			row_upd_index_replace_new_col_val(
 				dtuple_get_nth_field(entry, i),
-				field, col, uf, heap, zip_size);
+				field, col, uf, heap, page_size);
+		}
+	}
+}
+
+/** Replaces the virtual column values stored in the update vector.
+@param[in,out]	row	row whose column to be set
+@param[in]	field	data to set
+@param[in]	len	data length
+@param[in]	vcol	virtual column info */
+static
+void
+row_upd_set_vcol_data(
+	dtuple_t*		row,
+	const byte*             field,
+	ulint                   len,
+	dict_v_col_t*		vcol)
+{
+	dfield_t*	dfield = dtuple_get_nth_v_field(row, vcol->v_pos);
+
+	if (dfield_get_type(dfield)->mtype == DATA_MISSING) {
+		dict_col_copy_type(&vcol->m_col, dfield_get_type(dfield));
+
+		dfield_set_data(dfield, field, len);
+	}
+}
+
+/** Replaces the virtual column values stored in a dtuple with that of
+a update vector.
+@param[in,out]	row	row whose column to be updated
+@param[in]	table	table
+@param[in]	update	an update vector built for the clustered index
+@param[in]	upd_new	update to new or old value
+@param[in,out]	undo_row undo row (if needs to be updated)
+@param[in]	ptr	remaining part in update undo log */
+void
+row_upd_replace_vcol(
+	dtuple_t*		row,
+	const dict_table_t*	table,
+	const upd_t*		update,
+	bool			upd_new,
+	dtuple_t*		undo_row,
+	const byte*		ptr)
+{
+	ulint			col_no;
+	ulint			i;
+	ulint			n_cols;
+
+	n_cols = dtuple_get_n_v_fields(row);
+	for (col_no = 0; col_no < n_cols; col_no++) {
+		dfield_t*		dfield;
+
+		const dict_v_col_t*	col
+			= dict_table_get_nth_v_col(table, col_no);
+
+		/* If there is no index on the column, do not bother for
+		value update */
+		if (!col->m_col.ord_part) {
+			dict_index_t*	clust_index
+				= dict_table_get_first_index(table);
+
+			/* Skip the column if there is no online alter
+			table in progress or it is not being indexed
+			in new table */
+			if (!dict_index_is_online_ddl(clust_index)
+			    || !row_log_col_is_indexed(clust_index, col_no)) {
+				continue;
+			}
+		}
+
+		dfield = dtuple_get_nth_v_field(row, col_no);
+
+		for (i = 0; i < upd_get_n_fields(update); i++) {
+			const upd_field_t*	upd_field
+				= upd_get_nth_field(update, i);
+			if (!upd_fld_is_virtual_col(upd_field)
+			    || upd_field->field_no != col->v_pos) {
+				continue;
+			}
+
+			if (upd_new) {
+				dfield_copy_data(dfield, &upd_field->new_val);
+			} else {
+				dfield_copy_data(dfield, upd_field->old_v_val);
+			}
+
+			dfield_get_type(dfield)->mtype =
+				upd_field->new_val.type.mtype;
+			dfield_get_type(dfield)->prtype =
+				upd_field->new_val.type.prtype;
+			dfield_get_type(dfield)->mbminmaxlen =
+				upd_field->new_val.type.mbminmaxlen;
+			break;
+		}
+	}
+
+	bool	first_v_col = true;
+	bool	is_undo_log = true;
+
+	/* We will read those unchanged (but indexed) virtual columns in */
+	if (ptr != NULL) {
+		const byte*	end_ptr;
+
+		end_ptr = ptr + mach_read_from_2(ptr);
+		ptr += 2;
+
+		while (ptr != end_ptr) {
+			const byte*             field;
+			ulint                   field_no;
+			ulint                   len;
+			ulint                   orig_len;
+			bool			is_v;
+
+			field_no = mach_read_next_compressed(&ptr);
+
+			is_v = (field_no >= REC_MAX_N_FIELDS);
+
+			if (is_v) {
+				ptr = trx_undo_read_v_idx(
+					table, ptr, first_v_col, &is_undo_log,
+					&field_no);
+				first_v_col = false;
+			}
+
+			ptr = trx_undo_rec_get_col_val(
+				ptr, &field, &len, &orig_len);
+
+			if (field_no == ULINT_UNDEFINED) {
+				ut_ad(is_v);
+				continue;
+			}
+
+			if (is_v) {
+				dict_v_col_t* vcol = dict_table_get_nth_v_col(
+							table, field_no);
+
+				row_upd_set_vcol_data(row, field, len, vcol);
+
+				if (undo_row) {
+					row_upd_set_vcol_data(
+						undo_row, field, len, vcol);
+				}
+			}
+			ut_ad(ptr<= end_ptr);
 		}
 	}
 }
 
 /***********************************************************//**
 Replaces the new column values stored in the update vector. */
-UNIV_INTERN
 void
 row_upd_replace(
 /*============*/
@@ -1385,6 +1569,7 @@ row_upd_replace(
 	ut_ad(dict_index_is_clust(index));
 	ut_ad(update);
 	ut_ad(heap);
+	ut_ad(update->validate());
 
 	n_cols = dtuple_get_n_fields(row);
 	table = index->table;
@@ -1417,7 +1602,8 @@ row_upd_replace(
 			const upd_field_t*	upd_field
 				= upd_get_nth_field(update, i);
 
-			if (upd_field->field_no != clust_pos) {
+			if (upd_field->field_no != clust_pos
+			    || upd_fld_is_virtual_col(upd_field)) {
 
 				continue;
 			}
@@ -1437,6 +1623,8 @@ row_upd_replace(
 	} else {
 		*ext = NULL;
 	}
+
+	row_upd_replace_vcol(row, table, update, true, NULL, NULL);
 }
 
 /***********************************************************//**
@@ -1446,7 +1634,6 @@ This function is fast if the update vector is short or the number of ordering
 fields in the index is small. Otherwise, this can be quadratic.
 NOTE: we compare the fields as binary strings!
 @return TRUE if update vector changes an ordering field in the index record */
-UNIV_INTERN
 ibool
 row_upd_changes_ord_field_binary_func(
 /*==================================*/
@@ -1461,8 +1648,10 @@ row_upd_changes_ord_field_binary_func(
 				row and the data values in update are not
 				known when this function is called, e.g., at
 				compile time */
-	const row_ext_t*ext)	/*!< NULL, or prefixes of the externally
+	const row_ext_t*ext,	/*!< NULL, or prefixes of the externally
 				stored columns in the old row */
+	ulint		flag)	/*!< in: ROW_BUILD_NORMAL,
+				ROW_BUILD_FOR_PURGE or ROW_BUILD_FOR_UNDO */
 {
 	ulint			n_unique;
 	ulint			i;
@@ -1488,13 +1677,25 @@ row_upd_changes_ord_field_binary_func(
 		dfield_t		dfield_ext;
 		ulint			dfield_len= 0;
 		const byte*		buf;
+		bool			is_virtual;
+		const dict_v_col_t*	vcol = NULL;
 
 		ind_field = dict_index_get_nth_field(index, i);
 		col = dict_field_get_col(ind_field);
 		col_no = dict_col_get_no(col);
+		is_virtual = dict_col_is_virtual(col);
 
-		upd_field = upd_get_field_by_field_no(
-			update, dict_col_get_clust_pos(col, clust_index));
+		if (is_virtual) {
+			vcol = reinterpret_cast<const dict_v_col_t*>(col);
+
+			upd_field = upd_get_field_by_field_no(
+				update, vcol->v_pos, true);
+		} else {
+			upd_field = upd_get_field_by_field_no(
+				update,
+				dict_col_get_clust_pos(col, clust_index),
+				false);
+		}
 
 		if (upd_field == NULL) {
 			continue;
@@ -1505,7 +1706,112 @@ row_upd_changes_ord_field_binary_func(
 			return(TRUE);
 		}
 
-		dfield = dtuple_get_nth_field(row, col_no);
+		if (is_virtual) {
+			dfield = dtuple_get_nth_v_field(
+				row,  vcol->v_pos);
+		} else {
+			dfield = dtuple_get_nth_field(row, col_no);
+		}
+
+		/* For spatial index update, since the different geometry
+		data could generate same MBR, so, if the new index entry is
+		same as old entry, which means the MBR is not changed, we
+		don't need to do anything. */
+		if (dict_index_is_spatial(index) && i == 0) {
+			double		mbr1[SPDIMS * 2];
+			double		mbr2[SPDIMS * 2];
+			rtr_mbr_t*	old_mbr;
+			rtr_mbr_t*	new_mbr;
+			uchar*		dptr = NULL;
+			ulint		flen = 0;
+			ulint		dlen = 0;
+			mem_heap_t*	temp_heap = NULL;
+			const dfield_t*	new_field = &upd_field->new_val;
+
+			const page_size_t	page_size
+				= (ext != NULL)
+				? ext->page_size
+				: dict_table_page_size(
+					index->table);
+
+			ut_ad(dfield->data != NULL
+			      && dfield->len > GEO_DATA_HEADER_SIZE);
+			ut_ad(dict_col_get_spatial_status(col) != SPATIAL_NONE);
+
+			/* Get the old mbr. */
+			if (dfield_is_ext(dfield)) {
+				/* For off-page stored data, we
+				need to read the whole field data. */
+				flen = dfield_get_len(dfield);
+				dptr = static_cast<byte*>(
+					dfield_get_data(dfield));
+				temp_heap = mem_heap_create(1000);
+
+				dptr = btr_copy_externally_stored_field(
+					&dlen, dptr,
+					page_size,
+					flen,
+					temp_heap);
+			} else {
+				dptr = static_cast<uchar*>(dfield->data);
+				dlen = dfield->len;
+			}
+
+			rtree_mbr_from_wkb(dptr + GEO_DATA_HEADER_SIZE,
+					   static_cast<uint>(dlen
+					   - GEO_DATA_HEADER_SIZE),
+					   SPDIMS, mbr1);
+			old_mbr = reinterpret_cast<rtr_mbr_t*>(mbr1);
+
+			/* Get the new mbr. */
+			if (dfield_is_ext(new_field)) {
+				if (flag == ROW_BUILD_FOR_UNDO
+				    && dict_table_get_format(index->table)
+					>= UNIV_FORMAT_B) {
+					/* For undo, and the table is Barrcuda,
+					we need to skip the prefix data. */
+					flen = BTR_EXTERN_FIELD_REF_SIZE;
+					ut_ad(dfield_get_len(new_field) >=
+					      BTR_EXTERN_FIELD_REF_SIZE);
+					dptr = static_cast<byte*>(
+						dfield_get_data(new_field))
+						+ dfield_get_len(new_field)
+						- BTR_EXTERN_FIELD_REF_SIZE;
+				} else {
+					flen = dfield_get_len(new_field);
+					dptr = static_cast<byte*>(
+						dfield_get_data(new_field));
+				}
+
+				if (temp_heap == NULL) {
+					temp_heap = mem_heap_create(1000);
+				}
+
+				dptr = btr_copy_externally_stored_field(
+					&dlen, dptr,
+					page_size,
+					flen,
+					temp_heap);
+			} else {
+				dptr = static_cast<uchar*>(upd_field->new_val.data);
+				dlen = upd_field->new_val.len;
+			}
+			rtree_mbr_from_wkb(dptr + GEO_DATA_HEADER_SIZE,
+					   static_cast<uint>(dlen
+					   - GEO_DATA_HEADER_SIZE),
+					   SPDIMS, mbr2);
+			new_mbr = reinterpret_cast<rtr_mbr_t*>(mbr2);
+
+			if (temp_heap) {
+				mem_heap_free(temp_heap);
+			}
+
+			if (!MBR_EQUAL_CMP(old_mbr, new_mbr)) {
+				return(TRUE);
+			} else {
+				continue;
+			}
+		}
 
 		/* This treatment of column prefix indexes is loosely
 		based on row_build_index_entry(). */
@@ -1569,7 +1875,6 @@ Checks if an update vector changes an ordering field of an index record.
 NOTE: we compare the fields as binary strings!
 @return TRUE if update vector may change an ordering field in an index
 record */
-UNIV_INTERN
 ibool
 row_upd_changes_some_index_ord_field_binary(
 /*========================================*/
@@ -1586,11 +1891,17 @@ row_upd_changes_some_index_ord_field_binary(
 
 		upd_field = upd_get_nth_field(update, i);
 
-		if (dict_field_get_col(dict_index_get_nth_field(
-					       index, upd_field->field_no))
-		    ->ord_part) {
-
-			return(TRUE);
+		if (upd_fld_is_virtual_col(upd_field)) {
+			if (dict_table_get_nth_v_col(index->table,
+						     upd_field->field_no)
+			    ->m_col.ord_part) {
+				return(TRUE);
+			}
+		} else {
+			if (dict_field_get_col(dict_index_get_nth_field(
+				index, upd_field->field_no))->ord_part) {
+				return(TRUE);
+			}
 		}
 	}
 
@@ -1600,7 +1911,6 @@ row_upd_changes_some_index_ord_field_binary(
 /***********************************************************//**
 Checks if an FTS Doc ID column is affected by an UPDATE.
 @return whether the Doc ID column is changed */
-UNIV_INTERN
 bool
 row_upd_changes_doc_id(
 /*===================*/
@@ -1623,7 +1933,6 @@ row_upd_changes_doc_id(
 Checks if an FTS indexed column is affected by an UPDATE.
 @return offset within fts_t::indexes if FTS indexed column updated else
 ULINT_UNDEFINED */
-UNIV_INTERN
 ulint
 row_upd_changes_fts_column(
 /*=======================*/
@@ -1634,20 +1943,26 @@ row_upd_changes_fts_column(
 	dict_index_t*	clust_index;
 	fts_t*		fts = table->fts;
 
-	clust_index = dict_table_get_first_index(table);
+	if (upd_fld_is_virtual_col(upd_field)) {
+		col_no = upd_field->field_no;
+		return(dict_table_is_fts_column(fts->indexes, col_no, true));
+	} else {
+		clust_index = dict_table_get_first_index(table);
 
-	/* Convert from index-specific column number to table-global
-	column number. */
-	col_no = dict_index_get_nth_col_no(clust_index, upd_field->field_no);
+		/* Convert from index-specific column number to table-global
+		column number. */
+		col_no = dict_index_get_nth_col_no(clust_index,
+						   upd_field->field_no);
+		return(dict_table_is_fts_column(fts->indexes, col_no, false));
+	}
 
-	return(dict_table_is_fts_column(fts->indexes, col_no));
 }
 
 /***********************************************************//**
 Checks if an update vector changes some of the first ordering fields of an
 index record. This is only used in foreign key checks and we can assume
 that index does not contain column prefixes.
-@return	TRUE if changes */
+@return TRUE if changes */
 static
 ibool
 row_upd_changes_first_fields_binary(
@@ -1748,13 +2063,97 @@ row_upd_eval_new_vals(
 	}
 }
 
-/***********************************************************//**
-Stores to the heap the row on which the node->pcur is positioned. */
-static
+#ifdef MYSQL_VIRTUAL_COLUMNS
+/** Stores to the heap the virtual columns that need for any indexes
+@param[in,out]	node		row update node
+@param[in]	update		an update vector if it is update
+@param[in]	thd		mysql thread handle
+@param[in,out]	mysql_table	mysql table object */
+void
+row_upd_store_v_row(
+	upd_node_t*	node,
+	const upd_t*	update,
+	THD*		thd,
+	TABLE*		mysql_table)
+{
+	mem_heap_t*	heap = NULL;
+	dict_index_t*	index = dict_table_get_first_index(node->table);
+
+	for (ulint col_no = 0; col_no < dict_table_get_n_v_cols(node->table);
+	     col_no++) {
+
+		const dict_v_col_t*     col
+			= dict_table_get_nth_v_col(node->table, col_no);
+
+		if (col->m_col.ord_part) {
+			dfield_t*	dfield
+				= dtuple_get_nth_v_field(node->row, col_no);
+			ulint		n_upd
+				= update ? upd_get_n_fields(update) : 0;
+			ulint		i = 0;
+
+			/* Check if the value is already in update vector */
+			for (i = 0; i < n_upd; i++) {
+				const upd_field_t*      upd_field
+					= upd_get_nth_field(update, i);
+				if (!(upd_field->new_val.type.prtype
+				      & DATA_VIRTUAL)
+				    || upd_field->field_no != col->v_pos) {
+					continue;
+				}
+
+				dfield_copy_data(dfield, upd_field->old_v_val);
+				break;
+			}
+
+			/* Not updated */
+			if (i >= n_upd) {
+				/* If this is an update, then the value
+				should be in update->old_vrow */
+				if (update) {
+					if (update->old_vrow == NULL) {
+						/* This only happens in
+						cascade update. And virtual
+						column can't be affected,
+						so it is Ok to set it to NULL */
+						ut_ad(!node->cascade_top);
+						dfield_set_null(dfield);
+					} else {
+						dfield_t*       vfield
+							= dtuple_get_nth_v_field(
+								update->old_vrow,
+								col_no);
+						dfield_copy_data(dfield, vfield);
+					}
+				} else {
+					/* Need to compute, this happens when
+					deleting row */
+					innobase_get_computed_value(
+						node->row, col, index,
+						&heap, node->heap, NULL,
+						thd, mysql_table, NULL,
+						NULL, NULL);
+				}
+			}
+		}
+	}
+
+	if (heap) {
+		mem_heap_free(heap);
+	}
+}
+#endif /* MYSQL_VIRTUAL_COLUMNS */
+
+/** Stores to the heap the row on which the node->pcur is positioned.
+@param[in]	node		row update node
+@param[in]	thd		mysql thread handle
+@param[in,out]	mysql_table	NULL, or mysql table object when
+				user thread invokes dml */
 void
 row_upd_store_row(
-/*==============*/
-	upd_node_t*	node)	/*!< in: row update node */
+	upd_node_t*	node,
+	THD*		thd,
+	TABLE*		mysql_table)
 {
 	dict_index_t*	clust_index;
 	rec_t*		rec;
@@ -1792,6 +2191,14 @@ row_upd_store_row(
 
 	node->row = row_build(ROW_COPY_DATA, clust_index, rec, offsets,
 			      NULL, NULL, NULL, ext, node->heap);
+
+#ifdef MYSQL_VIRTUAL_COLUMNS
+	if (node->table->n_v_cols) {
+		row_upd_store_v_row(node, node->is_delete ? NULL : node->update,
+				    thd, mysql_table);
+	}
+#endif /* MYSQL_VIRTUAL_COLUMNS */
+
 	if (node->is_delete) {
 		node->upd_row = NULL;
 		node->upd_ext = NULL;
@@ -1806,6 +2213,26 @@ row_upd_store_row(
 	}
 }
 
+/***********************************************************//**
+Print a MBR data from disk */
+static
+void
+srv_mbr_print(const byte* data)
+{
+        double a, b, c, d;
+        a = mach_double_read(data);
+        data += sizeof(double);
+        b = mach_double_read(data);
+        data += sizeof(double);
+        c = mach_double_read(data);
+        data += sizeof(double);
+        d = mach_double_read(data);
+
+        ib::info() << "GIS MBR INFO: " << a << " and " << b << ", " << c
+		<< ", " << d << "\n";
+}
+
+
 /***********************************************************//**
 Updates a secondary index entry of a row.
 @return DB_SUCCESS if operation successfully completed, else error
@@ -1828,15 +2255,16 @@ row_upd_sec_index_entry(
 	dberr_t			err	= DB_SUCCESS;
 	trx_t*			trx	= thr_get_trx(thr);
 	ulint			mode;
+	ulint			flags = 0;
 	enum row_search_result	search_result;
 
-	ut_ad(trx->id);
+	ut_ad(trx->id != 0);
 
 	index = node->index;
 
 	referenced = row_upd_index_is_referenced(index, trx);
 #ifdef WITH_WSREP
-	ibool foreign = wsrep_row_upd_index_is_foreign(index, trx);
+	bool foreign = wsrep_row_upd_index_is_foreign(index, trx);
 #endif /* WITH_WSREP */
 
 	heap = mem_heap_create(1024);
@@ -1845,24 +2273,33 @@ row_upd_sec_index_entry(
 	entry = row_build_index_entry(node->row, node->ext, index, heap);
 	ut_a(entry);
 
-	log_free_check();
-
-#ifdef UNIV_DEBUG
-	/* Work around Bug#14626800 ASSERTION FAILURE IN DEBUG_SYNC().
-	Once it is fixed, remove the 'ifdef', 'if' and this comment. */
-	if (!trx->ddl) {
-		DEBUG_SYNC_C_IF_THD(trx->mysql_thd,
-				    "before_row_upd_sec_index_entry");
+	if (!dict_table_is_intrinsic(index->table)) {
+		log_free_check();
 	}
-#endif /* UNIV_DEBUG */
+
+	DEBUG_SYNC_C_IF_THD(trx->mysql_thd,
+			    "before_row_upd_sec_index_entry");
 
 	mtr_start_trx(&mtr, trx);
+	mtr.set_named_space(index->space);
 
-	if (*index->name == TEMP_INDEX_PREFIX) {
-		/* The index->online_status may change if the
-		index->name starts with TEMP_INDEX_PREFIX (meaning
-		that the index is or was being created online). It is
-		protected by index->lock. */
+	/* Disable REDO logging as lifetime of temp-tables is limited to
+	server or connection lifetime and so REDO information is not needed
+	on restart for recovery.
+	Disable locking as temp-tables are not shared across connection. */
+	if (dict_table_is_temporary(index->table)) {
+		flags |= BTR_NO_LOCKING_FLAG;
+		mtr.set_log_mode(MTR_LOG_NO_REDO);
+
+		if (dict_table_is_intrinsic(index->table)) {
+			flags |= BTR_NO_UNDO_LOG_FLAG;
+		}
+	}
+
+	if (!index->is_committed()) {
+		/* The index->online_status may change if the index is
+		or was being created online, but not committed yet. It
+		is protected by index->lock. */
 
 		mtr_s_lock(dict_index_get_lock(index), &mtr);
 
@@ -1891,24 +2328,35 @@ row_upd_sec_index_entry(
 		}
 
 		/* We can only buffer delete-mark operations if there
-		are no foreign key constraints referring to the index. */
-		mode = referenced
+		are no foreign key constraints referring to the index.
+		Change buffering is disabled for temporary tables and
+		spatial index. */
+		mode = (referenced || dict_table_is_temporary(index->table)
+			|| dict_index_is_spatial(index))
 			? BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED
 			: BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED
 			| BTR_DELETE_MARK;
 	} else {
 		/* For secondary indexes,
-		index->online_status==ONLINE_INDEX_CREATION unless
-		index->name starts with TEMP_INDEX_PREFIX. */
+		index->online_status==ONLINE_INDEX_COMPLETE if
+		index->is_committed(). */
 		ut_ad(!dict_index_is_online_ddl(index));
 
 		/* We can only buffer delete-mark operations if there
-		are no foreign key constraints referring to the index. */
-		mode = referenced
+		are no foreign key constraints referring to the index.
+		Change buffering is disabled for temporary tables and
+		spatial index. */
+		mode = (referenced || dict_table_is_temporary(index->table)
+			|| dict_index_is_spatial(index))
 			? BTR_MODIFY_LEAF
 			: BTR_MODIFY_LEAF | BTR_DELETE_MARK;
 	}
 
+	if (dict_index_is_spatial(index)) {
+		ut_ad(mode & BTR_MODIFY_LEAF);
+		mode |= BTR_RTREE_DELETE_MARK;
+	}
+
 	/* Set the query thread, so that ibuf_insert_low() will be
 	able to invoke thd_get_trx(). */
 	btr_pcur_get_btr_cur(&pcur)->thr = thr;
@@ -1929,95 +2377,112 @@ row_upd_sec_index_entry(
 		break;
 
 	case ROW_NOT_FOUND:
-		if (*index->name == TEMP_INDEX_PREFIX) {
+		if (!index->is_committed()) {
 			/* When online CREATE INDEX copied the update
 			that we already made to the clustered index,
 			and completed the secondary index creation
 			before we got here, the old secondary index
 			record would not exist. The CREATE INDEX
 			should be waiting for a MySQL meta-data lock
-			upgrade at least until this UPDATE
-			returns. After that point, the
-			TEMP_INDEX_PREFIX would be dropped from the
-			index name in commit_inplace_alter_table(). */
+			upgrade at least until this UPDATE returns.
+			After that point, set_committed(true) would be
+			invoked by commit_inplace_alter_table(). */
 			break;
 		}
 
-		fputs("InnoDB: error in sec index entry update in\n"
-		      "InnoDB: ", stderr);
-		dict_index_name_print(stderr, trx, index);
-		fputs("\n"
-		      "InnoDB: tuple ", stderr);
-		dtuple_print(stderr, entry);
-		fputs("\n"
-		      "InnoDB: record ", stderr);
-		rec_print(stderr, rec, index);
-		putc('\n', stderr);
-		trx_print(stderr, trx, 0);
-		fputs("\n"
-		      "InnoDB: Submit a detailed bug report"
-		      " to http://bugs.mysql.com\n", stderr);
+		if (dict_index_is_spatial(index) && btr_cur->rtr_info->fd_del) {
+			/* We found the record, but a delete marked */
+			break;
+		}
+
+		ib::error()
+			<< "Record in index " << index->name
+			<< " of table " << index->table->name
+			<< " was not found on update: " << *entry
+			<< " at: " << rec_index_print(rec, index);
+		srv_mbr_print((unsigned char*)entry->fields[0].data);
+#ifdef UNIV_DEBUG
+		mtr_commit(&mtr);
+		mtr_start(&mtr);
+		ut_ad(btr_validate_index(index, 0, false));
 		ut_ad(0);
+#endif /* UNIV_DEBUG */
 		break;
 	case ROW_FOUND:
+		ut_ad(err == DB_SUCCESS);
+
+
 		/* Delete mark the old index record; it can already be
 		delete marked if we return after a lock wait in
 		row_ins_sec_index_entry() below */
 		if (!rec_get_deleted_flag(
 			    rec, dict_table_is_comp(index->table))) {
-#ifdef WITH_WSREP
+
 			que_node_t *parent = que_node_get_parent(node);
-#endif /* WITH_WSREP */
+
 			err = btr_cur_del_mark_set_sec_rec(
-				0, btr_cur, TRUE, thr, &mtr);
-
-			if (err == DB_SUCCESS && referenced) {
-
-				ulint*	offsets;
-
-				offsets = rec_get_offsets(
-					rec, index, NULL, ULINT_UNDEFINED,
-					&heap);
-
-				/* NOTE that the following call loses
-				the position of pcur ! */
-				err = row_upd_check_references_constraints(
-					node, &pcur, index->table,
-					index, offsets, thr, &mtr);
+				flags, btr_cur, TRUE, thr, &mtr);
+			if (err != DB_SUCCESS) {
+				break;
 			}
 #ifdef WITH_WSREP
 			if (err == DB_SUCCESS && !referenced                  &&
 			    !(parent && que_node_get_type(parent) ==
 				QUE_NODE_UPDATE                               &&
-			      ((upd_node_t*)parent)->cascade_node == node)    &&
-			    foreign
-			) {
+			      (std::find(((upd_node_t*)parent)->cascade_upd_nodes->begin(),
+                                         ((upd_node_t*)parent)->cascade_upd_nodes->end(),
+                                         node) ==
+                               ((upd_node_t*)parent)->cascade_upd_nodes->end()))  &&
+                            foreign
+                        ) {
 				ulint*	offsets =
 					rec_get_offsets(
 						rec, index, NULL, ULINT_UNDEFINED,
 						&heap);
+
 				err = wsrep_row_upd_check_foreign_constraints(
 					node, &pcur, index->table,
 					index, offsets, thr, &mtr);
+
 				switch (err) {
 				case DB_SUCCESS:
 				case DB_NO_REFERENCED_ROW:
 					err = DB_SUCCESS;
 					break;
 				case DB_DEADLOCK:
-					if (wsrep_debug) fprintf (stderr, 
-						"WSREP: sec index FK check fail for deadlock");
+					if (wsrep_debug) {
+						ib::warn() << "WSREP: sec index FK check fail for deadlock"
+							   << " index " << index->name()
+							   << " table " << index->table->name.m_name;
+					}
 					break;
 				default:
-					fprintf (stderr, 
-						 "WSREP: referenced FK check fail: %d", 
-						 (int)err);
+					ib::error() << "WSREP: referenced FK check fail: " << err
+						    << " index " << index->name()
+						    << " table " << index->table->name.m_name;
+
 					break;
 				}
 			}
 #endif /* WITH_WSREP */
 		}
-		break;
+
+		ut_ad(err == DB_SUCCESS);
+
+		if (referenced) {
+
+			ulint*	offsets;
+
+			offsets = rec_get_offsets(
+				rec, index, NULL, ULINT_UNDEFINED,
+				&heap);
+
+			/* NOTE that the following call loses
+			the position of pcur ! */
+			err = row_upd_check_references_constraints(
+				node, &pcur, index->table,
+				index, offsets, thr, &mtr);
+		}
 	}
 
 	btr_pcur_close(&pcur);
@@ -2036,7 +2501,7 @@ row_upd_sec_index_entry(
 	ut_a(entry);
 
 	/* Insert new index entry */
-	err = row_ins_sec_index_entry(index, entry, thr);
+	err = row_ins_sec_index_entry(index, entry, thr, false);
 
 func_exit:
 	mem_heap_free(heap);
@@ -2074,27 +2539,27 @@ row_upd_sec_step(
 	row_upd_clust_rec_by_insert_inherit_func(rec,offsets,entry,update)
 #else /* UNIV_DEBUG */
 # define row_upd_clust_rec_by_insert_inherit(rec,offsets,entry,update)	\
-	row_upd_clust_rec_by_insert_inherit_func(entry,update)
+	row_upd_clust_rec_by_insert_inherit_func(rec,entry,update)
 #endif /* UNIV_DEBUG */
 /*******************************************************************//**
 Mark non-updated off-page columns inherited when the primary key is
 updated. We must mark them as inherited in entry, so that they are not
 freed in a rollback. A limited version of this function used to be
 called btr_cur_mark_dtuple_inherited_extern().
-@return TRUE if any columns were inherited */
-static MY_ATTRIBUTE((warn_unused_result))
-ibool
+@return whether any columns were inherited */
+static
+bool
 row_upd_clust_rec_by_insert_inherit_func(
 /*=====================================*/
-#ifdef UNIV_DEBUG
 	const rec_t*	rec,	/*!< in: old record, or NULL */
+#ifdef UNIV_DEBUG
 	const ulint*	offsets,/*!< in: rec_get_offsets(rec), or NULL */
 #endif /* UNIV_DEBUG */
 	dtuple_t*	entry,	/*!< in/out: updated entry to be
 				inserted into the clustered index */
 	const upd_t*	update)	/*!< in: update vector */
 {
-	ibool	inherit	= FALSE;
+	bool	inherit	= false;
 	ulint	i;
 
 	ut_ad(!rec == !offsets);
@@ -2108,9 +2573,9 @@ row_upd_clust_rec_by_insert_inherit_func(
 		ut_ad(!offsets
 		      || !rec_offs_nth_extern(offsets, i)
 		      == !dfield_is_ext(dfield)
-		      || upd_get_field_by_field_no(update, i));
+		      || upd_get_field_by_field_no(update, i, false));
 		if (!dfield_is_ext(dfield)
-		    || upd_get_field_by_field_no(update, i)) {
+		    || upd_get_field_by_field_no(update, i, false)) {
 			continue;
 		}
 
@@ -2142,13 +2607,19 @@ row_upd_clust_rec_by_insert_inherit_func(
 		data += len - BTR_EXTERN_FIELD_REF_SIZE;
 		/* The pointer must not be zero. */
 		ut_a(memcmp(data, field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE));
+
+		/* The BLOB must be owned, unless we are resuming from
+		a lock wait and we already had disowned the BLOB. */
+		ut_a(rec == NULL
+		     || !(data[BTR_EXTERN_LEN] & BTR_EXTERN_OWNER_FLAG));
 		data[BTR_EXTERN_LEN] &= ~BTR_EXTERN_OWNER_FLAG;
 		data[BTR_EXTERN_LEN] |= BTR_EXTERN_INHERITED_FLAG;
 		/* The BTR_EXTERN_INHERITED_FLAG only matters in
-		rollback. Purge will always free the extern fields of
-		a delete-marked row. */
+		rollback of a fresh insert (insert_undo log).
+		Purge (operating on update_undo log) will always free
+		the extern fields of a delete-marked row. */
 
-		inherit = TRUE;
+		inherit = true;
 	}
 
 	return(inherit);
@@ -2165,14 +2636,13 @@ static MY_ATTRIBUTE((nonnull, warn_unused_result))
 dberr_t
 row_upd_clust_rec_by_insert(
 /*========================*/
+	ulint		flags,  /*!< in: undo logging and locking flags */
 	upd_node_t*	node,	/*!< in/out: row update node */
 	dict_index_t*	index,	/*!< in: clustered index of the record */
 	que_thr_t*	thr,	/*!< in: query thread */
 	ibool		referenced,/*!< in: TRUE if index may be referenced in
 				a foreign key constraint */
-#ifdef WITH_WSREP
 	ibool		foreign, /*!< in: TRUE if index is foreign key index */
-#endif /* WITH_WSREP */
 	mtr_t*		mtr)	/*!< in/out: mtr; gets committed here */
 {
 	mem_heap_t*	heap;
@@ -2182,13 +2652,12 @@ row_upd_clust_rec_by_insert(
 	dict_table_t*	table;
 	dtuple_t*	entry;
 	dberr_t		err;
-	ibool		change_ownership	= FALSE;
 	rec_t*		rec;
 	ulint*		offsets			= NULL;
-
 #ifdef WITH_WSREP
 	que_node_t *parent = que_node_get_parent(node);
-#endif /* WITH_WSREP */
+#endif
+
 	ut_ad(node);
 	ut_ad(dict_index_is_clust(index));
 
@@ -2199,27 +2668,20 @@ row_upd_clust_rec_by_insert(
 
 	heap = mem_heap_create(1000);
 
-	entry = row_build_index_entry(node->upd_row, node->upd_ext,
-				      index, heap);
-	ut_a(entry);
+	entry = row_build_index_entry_low(node->upd_row, node->upd_ext,
+					  index, heap, ROW_BUILD_FOR_INSERT);
+	ut_ad(dtuple_get_info_bits(entry) == 0);
 
 	row_upd_index_entry_sys_field(entry, index, DATA_TRX_ID, trx->id);
 
 	switch (node->state) {
 	default:
 		ut_error;
-	case UPD_NODE_INSERT_BLOB:
-		/* A lock wait occurred in row_ins_clust_index_entry() in
-		the previous invocation of this function. Mark the
-		off-page columns in the entry inherited. */
-
-		change_ownership = row_upd_clust_rec_by_insert_inherit(
-			NULL, NULL, entry, node->update);
-		ut_a(change_ownership);
-		/* fall through */
 	case UPD_NODE_INSERT_CLUSTERED:
 		/* A lock wait occurred in row_ins_clust_index_entry() in
 		the previous invocation of this function. */
+		row_upd_clust_rec_by_insert_inherit(
+			NULL, NULL, entry, node->update);
 		break;
 	case UPD_NODE_UPDATE_CLUSTERED:
 		/* This is the first invocation of the function where
@@ -2230,9 +2692,21 @@ row_upd_clust_rec_by_insert(
 					  ULINT_UNDEFINED, &heap);
 		ut_ad(page_rec_is_user_rec(rec));
 
+		if (rec_get_deleted_flag(rec, rec_offs_comp(offsets))) {
+			/* If the clustered index record is already delete
+			marked, then we are here after a DB_LOCK_WAIT.
+			Skip delete marking clustered index and disowning
+			its blobs. */
+			ut_ad(rec_get_trx_id(rec, index) == trx->id);
+			ut_ad(!trx_undo_roll_ptr_is_insert(
+			              row_get_rec_roll_ptr(rec, index,
+							   offsets)));
+			goto check_fk;
+		}
+
 		err = btr_cur_del_mark_set_clust_rec(
-			btr_cur_get_block(btr_cur), rec, index, offsets,
-			thr, mtr);
+			flags, btr_cur_get_block(btr_cur), rec, index, offsets,
+			thr, node->row, mtr);
 		if (err != DB_SUCCESS) {
 err_exit:
 			mtr_commit(mtr);
@@ -2246,20 +2720,19 @@ err_exit:
 		old record and owned by the new entry. */
 
 		if (rec_offs_any_extern(offsets)) {
-			change_ownership = row_upd_clust_rec_by_insert_inherit(
-				rec, offsets, entry, node->update);
-
-			if (change_ownership) {
+			if (row_upd_clust_rec_by_insert_inherit(
+				    rec, offsets, entry, node->update)) {
 				/* The blobs are disowned here, expecting the
 				insert down below to inherit them.  But if the
 				insert fails, then this disown will be undone
 				when the operation is rolled back. */
 				btr_cur_disown_inherited_fields(
 					btr_cur_get_page_zip(btr_cur),
-					rec, index, offsets, node->update, mtr);
+					rec, index, offsets, node->update,
+					mtr);
 			}
 		}
-
+check_fk:
 		if (referenced) {
 			/* NOTE that the following call loses
 			the position of pcur ! */
@@ -2274,7 +2747,10 @@ err_exit:
 #ifdef WITH_WSREP
 		if (!referenced                                              &&
 		    !(parent && que_node_get_type(parent) == QUE_NODE_UPDATE &&
-		      ((upd_node_t*)parent)->cascade_node == node)           &&
+                      (std::find(((upd_node_t*)parent)->cascade_upd_nodes->begin(),
+                                 ((upd_node_t*)parent)->cascade_upd_nodes->end(),
+                                 node) ==
+                       ((upd_node_t*)parent)->cascade_upd_nodes->end()))       &&
 		    foreign
 		) {
 			err = wsrep_row_upd_check_foreign_constraints(
@@ -2285,15 +2761,20 @@ err_exit:
 				err = DB_SUCCESS;
 				break;
 			case DB_DEADLOCK:
-				if (wsrep_debug) fprintf (stderr, 
-					"WSREP: insert FK check fail for deadlock");
+				if (wsrep_debug) {
+					ib::warn() << "WSREP: sec index FK check fail for deadlock"
+						   << " index " << index->name()
+						   << " table " << index->table->name.m_name;
+				}
 				break;
 			default:
-				fprintf (stderr, 
-					"WSREP: referenced FK check fail: %d", 
-					 (int)err);
+				ib::error() << "WSREP: referenced FK check fail: " << err
+					    << " index " << index->name()
+					    << " table " << index->table->name.m_name;
+
 				break;
 			}
+
 			if (err != DB_SUCCESS) {
 				goto err_exit;
 			}
@@ -2305,10 +2786,8 @@ err_exit:
 
 	err = row_ins_clust_index_entry(
 		index, entry, thr,
-		node->upd_ext ? node->upd_ext->n_ext : 0);
-	node->state = change_ownership
-		? UPD_NODE_INSERT_BLOB
-		: UPD_NODE_INSERT_CLUSTERED;
+		node->upd_ext ? node->upd_ext->n_ext : 0, false);
+	node->state = UPD_NODE_INSERT_CLUSTERED;
 
 	mem_heap_free(heap);
 
@@ -2324,6 +2803,7 @@ static MY_ATTRIBUTE((nonnull, warn_unused_result))
 dberr_t
 row_upd_clust_rec(
 /*==============*/
+	ulint		flags,  /*!< in: undo logging and locking flags */
 	upd_node_t*	node,	/*!< in: row update node */
 	dict_index_t*	index,	/*!< in: clustered index */
 	ulint*		offsets,/*!< in: rec_get_offsets() on node->pcur */
@@ -2341,6 +2821,7 @@ row_upd_clust_rec(
 
 	ut_ad(node);
 	ut_ad(dict_index_is_clust(index));
+	ut_ad(!thr_get_trx(thr)->in_rollback);
 
 	pcur = node->pcur;
 	btr_cur = btr_pcur_get_btr_cur(pcur);
@@ -2361,28 +2842,22 @@ row_upd_clust_rec(
 
 	if (node->cmpl_info & UPD_NODE_NO_SIZE_CHANGE) {
 		err = btr_cur_update_in_place(
-			BTR_NO_LOCKING_FLAG, btr_cur,
+			flags | BTR_NO_LOCKING_FLAG, btr_cur,
 			offsets, node->update,
 			node->cmpl_info, thr, thr_get_trx(thr)->id, mtr);
 	} else {
 		err = btr_cur_optimistic_update(
-			BTR_NO_LOCKING_FLAG, btr_cur,
+			flags | BTR_NO_LOCKING_FLAG, btr_cur,
 			&offsets, offsets_heap, node->update,
 			node->cmpl_info, thr, thr_get_trx(thr)->id, mtr);
 	}
 
-	if (err == DB_SUCCESS && dict_index_is_online_ddl(index)) {
-		row_log_table_update(btr_cur_get_rec(btr_cur),
-				     index, offsets, rebuilt_old_pk);
+	if (err == DB_SUCCESS) {
+		goto success;
 	}
 
 	mtr_commit(mtr);
 
-	if (UNIV_LIKELY(err == DB_SUCCESS)) {
-
-		goto func_exit;
-	}
-
 	if (buf_LRU_buf_pool_running_out()) {
 
 		err = DB_LOCK_TABLE_FULL;
@@ -2392,6 +2867,20 @@ row_upd_clust_rec(
 	down the index tree */
 
 	mtr_start_trx(mtr, thr_get_trx(thr));
+	mtr->set_named_space(index->space);
+
+	/* Disable REDO logging as lifetime of temp-tables is limited to
+	server or connection lifetime and so REDO information is not needed
+	on restart for recovery.
+	Disable locking as temp-tables are not shared across connection. */
+	if (dict_table_is_temporary(index->table)) {
+		flags |= BTR_NO_LOCKING_FLAG;
+		mtr->set_log_mode(MTR_LOG_NO_REDO);
+
+		if (dict_table_is_intrinsic(index->table)) {
+			flags |= BTR_NO_UNDO_LOG_FLAG;
+		}
+	}
 
 	/* NOTE: this transaction has an s-lock or x-lock on the record and
 	therefore other transactions cannot modify the record when we have no
@@ -2409,58 +2898,36 @@ row_upd_clust_rec(
 	}
 
 	err = btr_cur_pessimistic_update(
-		BTR_NO_LOCKING_FLAG | BTR_KEEP_POS_FLAG, btr_cur,
+		flags | BTR_NO_LOCKING_FLAG | BTR_KEEP_POS_FLAG, btr_cur,
 		&offsets, offsets_heap, heap, &big_rec,
 		node->update, node->cmpl_info,
 		thr, thr_get_trx(thr)->id, mtr);
 	if (big_rec) {
 		ut_a(err == DB_SUCCESS);
-		/* Write out the externally stored
-		columns while still x-latching
-		index->lock and block->lock. Allocate
-		pages for big_rec in the mtr that
-		modified the B-tree, but be sure to skip
-		any pages that were freed in mtr. We will
-		write out the big_rec pages before
-		committing the B-tree mini-transaction. If
-		the system crashes so that crash recovery
-		will not replay the mtr_commit(&mtr), the
-		big_rec pages will be left orphaned until
-		the pages are allocated for something else.
-
-		TODO: If the allocation extends the tablespace, it
-		will not be redo logged, in either mini-transaction.
-		Tablespace extension should be redo-logged in the
-		big_rec mini-transaction, so that recovery will not
-		fail when the big_rec was written to the extended
-		portion of the file, in case the file was somehow
-		truncated in the crash. */
 
 		DEBUG_SYNC_C("before_row_upd_extern");
 		err = btr_store_big_rec_extern_fields(
-			index, btr_cur_get_block(btr_cur),
-			btr_cur_get_rec(btr_cur), offsets,
-			big_rec, mtr, BTR_STORE_UPDATE);
+			pcur, node->update, offsets, big_rec, mtr,
+			BTR_STORE_UPDATE);
 		DEBUG_SYNC_C("after_row_upd_extern");
-		/* If writing big_rec fails (for example, because of
-		DB_OUT_OF_FILE_SPACE), the record will be corrupted.
-		Even if we did not update any externally stored
-		columns, our update could cause the record to grow so
-		that a non-updated column was selected for external
-		storage. This non-update would not have been written
-		to the undo log, and thus the record cannot be rolled
-		back.
-
-		However, because we have not executed mtr_commit(mtr)
-		yet, the update will not be replayed in crash
-		recovery, and the following assertion failure will
-		effectively "roll back" the operation. */
-		ut_a(err == DB_SUCCESS);
 	}
 
-	if (err == DB_SUCCESS && dict_index_is_online_ddl(index)) {
-		row_log_table_update(btr_cur_get_rec(btr_cur),
-				     index, offsets, rebuilt_old_pk);
+	if (err == DB_SUCCESS) {
+success:
+		if (dict_index_is_online_ddl(index)) {
+			dtuple_t*	new_v_row = NULL;
+			dtuple_t*	old_v_row = NULL;
+
+			if (!(node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
+				new_v_row = node->upd_row;
+				old_v_row = node->update->old_vrow;
+			}
+
+			row_log_table_update(
+				btr_cur_get_rec(btr_cur),
+				index, offsets, rebuilt_old_pk, new_v_row,
+				old_v_row);
+		}
 	}
 
 	mtr_commit(mtr);
@@ -2478,11 +2945,12 @@ func_exit:
 
 /***********************************************************//**
 Delete marks a clustered index record.
-@return	DB_SUCCESS if operation successfully completed, else error code */
+@return DB_SUCCESS if operation successfully completed, else error code */
 static MY_ATTRIBUTE((nonnull, warn_unused_result))
 dberr_t
 row_upd_del_mark_clust_rec(
 /*=======================*/
+	ulint		flags,  /*!< in: undo logging and locking flags */
 	upd_node_t*	node,	/*!< in: row update node */
 	dict_index_t*	index,	/*!< in: clustered index */
 	ulint*		offsets,/*!< in/out: rec_get_offsets() for the
@@ -2491,19 +2959,16 @@ row_upd_del_mark_clust_rec(
 	ibool		referenced,
 				/*!< in: TRUE if index may be referenced in
 				a foreign key constraint */
-#ifdef WITH_WSREP
 	ibool		foreign,/*!< in: TRUE if index is foreign key index */
-#endif /* WITH_WSREP */
 	mtr_t*		mtr)	/*!< in: mtr; gets committed here */
 {
 	btr_pcur_t*	pcur;
 	btr_cur_t*	btr_cur;
 	dberr_t		err;
-#ifdef WITH_WSREP
 	rec_t*		rec;
+#ifdef WITH_WSREP
 	que_node_t *parent = que_node_get_parent(node);
-#endif /* WITH_WSREP */
-
+#endif
 	ut_ad(node);
 	ut_ad(dict_index_is_clust(index));
 	ut_ad(node->is_delete);
@@ -2514,22 +2979,18 @@ row_upd_del_mark_clust_rec(
 	/* Store row because we have to build also the secondary index
 	entries */
 
-	row_upd_store_row(node);
+	row_upd_store_row(node, thr_get_trx(thr)->mysql_thd,
+			  thr->prebuilt ? thr->prebuilt->m_mysql_table : NULL);
 
 	/* Mark the clustered index record deleted; we do not have to check
 	locks, because we assume that we have an x-lock on the record */
 
-#ifdef WITH_WSREP
 	rec = btr_cur_get_rec(btr_cur);
-#endif /* WITH_WSREP */
 
 	err = btr_cur_del_mark_set_clust_rec(
-#ifdef WITH_WSREP
-		btr_cur_get_block(btr_cur), rec,
-#else
-		btr_cur_get_block(btr_cur), btr_cur_get_rec(btr_cur),
-#endif /* WITH_WSREP */
-		index, offsets, thr, mtr);
+		flags, btr_cur_get_block(btr_cur), rec,
+		index, offsets, thr, node->row, mtr);
+
 	if (err == DB_SUCCESS && referenced) {
 		/* NOTE that the following call loses the position of pcur ! */
 
@@ -2539,7 +3000,10 @@ row_upd_del_mark_clust_rec(
 #ifdef WITH_WSREP
 	if (err == DB_SUCCESS && !referenced                         &&
 	    !(parent && que_node_get_type(parent) == QUE_NODE_UPDATE &&
-	      ((upd_node_t*)parent)->cascade_node == node)           &&
+              (std::find(((upd_node_t*)parent)->cascade_upd_nodes->begin(),
+                         ((upd_node_t*)parent)->cascade_upd_nodes->end(),
+                         node) ==
+               ((upd_node_t*)parent)->cascade_upd_nodes->end()))       &&
 	    thr_get_trx(thr)                                         &&
 	    foreign
 	) {
@@ -2551,13 +3015,17 @@ row_upd_del_mark_clust_rec(
 			err = DB_SUCCESS;
 			break;
 		case DB_DEADLOCK:
-			if (wsrep_debug) fprintf (stderr, 
-				"WSREP: clust rec FK check fail for deadlock");
+			if (wsrep_debug) {
+				ib::warn() << "WSREP: sec index FK check fail for deadlock"
+					   << " index " << index->name()
+					   << " table " << index->table->name.m_name;
+			}
 			break;
 		default:
-			fprintf (stderr, 
-				"WSREP: clust rec referenced FK check fail: %d", 
-				 (int)err);
+			ib::error() << "WSREP: referenced FK check fail: " << err
+				    << " index " << index->name()
+				    << " table " << index->table->name.m_name;
+
 			break;
 		}
 	}
@@ -2589,21 +3057,40 @@ row_upd_clust_step(
 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
 	ulint*		offsets;
 	ibool		referenced;
+	ulint		flags	= 0;
+	ibool		foreign = FALSE;
+	trx_t*		trx = thr_get_trx(thr);
+
 	rec_offs_init(offsets_);
 
 	index = dict_table_get_first_index(node->table);
 
-	referenced = row_upd_index_is_referenced(index, thr_get_trx(thr));
+	referenced = row_upd_index_is_referenced(index, trx);
+
 #ifdef WITH_WSREP
-	ibool foreign = wsrep_row_upd_index_is_foreign(
+	foreign = wsrep_row_upd_index_is_foreign(
 		index, thr_get_trx(thr));
-#endif /* WITH_WSREP */
+#endif
 
 	pcur = node->pcur;
 
 	/* We have to restore the cursor to its position */
 
 	mtr_start_trx(&mtr, thr_get_trx(thr));
+	mtr.set_named_space(index->space);
+
+	/* Disable REDO logging as lifetime of temp-tables is limited to
+	server or connection lifetime and so REDO information is not needed
+	on restart for recovery.
+	Disable locking as temp-tables are not shared across connection. */
+	if (dict_table_is_temporary(index->table)) {
+		flags |= BTR_NO_LOCKING_FLAG;
+		mtr.set_log_mode(MTR_LOG_NO_REDO);
+
+		if (dict_table_is_intrinsic(index->table)) {
+			flags |= BTR_NO_UNDO_LOG_FLAG;
+		}
+	}
 
 	/* If the restoration does not succeed, then the same
 	transaction has deleted the record on which the cursor was,
@@ -2617,15 +3104,9 @@ row_upd_clust_step(
 
 	ulint	mode;
 
-#ifdef UNIV_DEBUG
-	/* Work around Bug#14626800 ASSERTION FAILURE IN DEBUG_SYNC().
-	Once it is fixed, remove the 'ifdef', 'if' and this comment. */
-	if (!thr_get_trx(thr)->ddl) {
-		DEBUG_SYNC_C_IF_THD(
-			thr_get_trx(thr)->mysql_thd,
-			"innodb_row_upd_clust_step_enter");
-	}
-#endif /* UNIV_DEBUG */
+	DEBUG_SYNC_C_IF_THD(
+		thr_get_trx(thr)->mysql_thd,
+		"innodb_row_upd_clust_step_enter");
 
 	if (dict_index_is_online_ddl(index)) {
 		ut_ad(node->table->id != DICT_INDEXES_ID);
@@ -2653,11 +3134,13 @@ row_upd_clust_step(
 
 		ut_ad(!dict_index_is_online_ddl(index));
 
-		dict_drop_index_tree(btr_pcur_get_rec(pcur), &mtr);
+		dict_drop_index_tree(
+			btr_pcur_get_rec(pcur), pcur, &mtr);
 
 		mtr_commit(&mtr);
 
 		mtr_start_trx(&mtr, thr_get_trx(thr));
+		mtr.set_named_space(index->space);
 
 		success = btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur,
 						    &mtr);
@@ -2676,7 +3159,7 @@ row_upd_clust_step(
 
 	if (!node->has_clust_rec_x_lock) {
 		err = lock_clust_rec_modify_check_and_lock(
-			0, btr_pcur_get_block(pcur),
+			flags, btr_pcur_get_block(pcur),
 			rec, index, offsets, thr);
 		if (err != DB_SUCCESS) {
 			mtr_commit(&mtr);
@@ -2692,11 +3175,7 @@ row_upd_clust_step(
 
 	if (node->is_delete) {
 		err = row_upd_del_mark_clust_rec(
-#ifdef WITH_WSREP
-			node, index, offsets, thr, referenced, foreign, &mtr);
-#else
-			node, index, offsets, thr, referenced, &mtr);
-#endif /* WITH_WSREP */
+			flags, node, index, offsets, thr, referenced, foreign, &mtr);
 
 		if (err == DB_SUCCESS) {
 			node->state = UPD_NODE_UPDATE_ALL_SEC;
@@ -2720,11 +3199,12 @@ row_upd_clust_step(
 	if (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE) {
 
 		err = row_upd_clust_rec(
-			node, index, offsets, &heap, thr, &mtr);
+			flags, node, index, offsets, &heap, thr, &mtr);
 		goto exit_func;
 	}
 
-	row_upd_store_row(node);
+	row_upd_store_row(node, trx->mysql_thd,
+			  thr->prebuilt ? thr->prebuilt->m_mysql_table : NULL);
 
 	if (row_upd_changes_ord_field_binary(index, node->update, thr,
 					     node->row, node->ext)) {
@@ -2741,12 +3221,7 @@ row_upd_clust_step(
 		externally! */
 
 		err = row_upd_clust_rec_by_insert(
-#ifdef WITH_WSREP
-			node, index, thr, referenced, foreign, &mtr);
-#else
-			node, index, thr, referenced, &mtr);
-#endif /* WITH_WSREP */
-
+			flags, node, index, thr, referenced, foreign, &mtr);
 		if (err != DB_SUCCESS) {
 
 			goto exit_func;
@@ -2755,7 +3230,7 @@ row_upd_clust_step(
 		node->state = UPD_NODE_UPDATE_ALL_SEC;
 	} else {
 		err = row_upd_clust_rec(
-			node, index, offsets, &heap, thr, &mtr);
+			flags, node, index, offsets, &heap, thr, &mtr);
 
 		if (err != DB_SUCCESS) {
 
@@ -2780,7 +3255,6 @@ to this node, we assume that we have a persistent cursor which was on a
 record, and the position of the cursor is stored in the cursor.
 @return DB_SUCCESS if operation successfully completed, else error
 code or DB_LOCK_WAIT */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
 dberr_t
 row_upd(
 /*====*/
@@ -2788,9 +3262,17 @@ row_upd(
 	que_thr_t*	thr)	/*!< in: query thread */
 {
 	dberr_t		err	= DB_SUCCESS;
+	DBUG_ENTER("row_upd");
 
 	ut_ad(node != NULL);
 	ut_ad(thr != NULL);
+	ut_ad(!thr_get_trx(thr)->in_rollback);
+
+	DBUG_PRINT("row_upd", ("table: %s", node->table->name.m_name));
+	DBUG_PRINT("row_upd", ("info bits in update vector: 0x%lx",
+			       node->update ? node->update->info_bits: 0));
+	DBUG_PRINT("row_upd", ("foreign_id: %s",
+			       node->foreign ? node->foreign->id: "NULL"));
 
 	if (UNIV_LIKELY(node->in_mysql_interface)) {
 
@@ -2809,32 +3291,27 @@ row_upd(
 	switch (node->state) {
 	case UPD_NODE_UPDATE_CLUSTERED:
 	case UPD_NODE_INSERT_CLUSTERED:
-	case UPD_NODE_INSERT_BLOB:
-		log_free_check();
+		if (!dict_table_is_intrinsic(node->table)) {
+			log_free_check();
+		}
 		err = row_upd_clust_step(node, thr);
 
 		if (err != DB_SUCCESS) {
 
-			return(err);
+			DBUG_RETURN(err);
 		}
 	}
 
+	DEBUG_SYNC_C_IF_THD(thr_get_trx(thr)->mysql_thd,
+			    "after_row_upd_clust");
+
 	if (node->index == NULL
 	    || (!node->is_delete
 		&& (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE))) {
 
-		return(DB_SUCCESS);
+		DBUG_RETURN(DB_SUCCESS);
 	}
 
-#ifdef UNIV_DEBUG
-	/* Work around Bug#14626800 ASSERTION FAILURE IN DEBUG_SYNC().
-	Once it is fixed, remove the 'ifdef', 'if' and this comment. */
-	if (!thr_get_trx(thr)->ddl) {
-		DEBUG_SYNC_C_IF_THD(thr_get_trx(thr)->mysql_thd,
-				    "after_row_upd_clust");
-	}
-#endif /* UNIV_DEBUG */
-
 	DBUG_EXECUTE_IF("row_upd_skip_sec", node->index = NULL;);
 
 	do {
@@ -2850,7 +3327,7 @@ row_upd(
 
 			if (err != DB_SUCCESS) {
 
-				return(err);
+				DBUG_RETURN(err);
 			}
 		}
 
@@ -2871,14 +3348,13 @@ row_upd(
 
 	node->state = UPD_NODE_UPDATE_CLUSTERED;
 
-	return(err);
+	DBUG_RETURN(err);
 }
 
 /***********************************************************//**
 Updates a row in a table. This is a high-level function used in SQL execution
 graphs.
-@return	query thread to run next or NULL */
-UNIV_INTERN
+@return query thread to run next or NULL */
 que_thr_t*
 row_upd_step(
 /*=========*/
@@ -2889,12 +3365,13 @@ row_upd_step(
 	que_node_t*	parent;
 	dberr_t		err		= DB_SUCCESS;
 	trx_t*		trx;
+	DBUG_ENTER("row_upd_step");
 
 	ut_ad(thr);
 
 	trx = thr_get_trx(thr);
 
-	trx_start_if_not_started_xa(trx);
+	trx_start_if_not_started_xa(trx, true);
 
 	node = static_cast<upd_node_t*>(thr->run_node);
 
@@ -2932,7 +3409,7 @@ row_upd_step(
 
 			thr->run_node = sel_node;
 
-			return(thr);
+			DBUG_RETURN(thr);
 		}
 	}
 
@@ -2958,7 +3435,7 @@ row_upd_step(
 
 		thr->run_node = parent;
 
-		return(thr);
+		DBUG_RETURN(thr);
 	}
 
 	/* DO THE CHECKS OF THE CONSISTENCY CONSTRAINTS HERE */
@@ -2969,7 +3446,7 @@ error_handling:
 	trx->error_state = err;
 
 	if (err != DB_SUCCESS) {
-		return(NULL);
+		DBUG_RETURN(NULL);
 	}
 
 	/* DO THE TRIGGER ACTIONS HERE */
@@ -2986,6 +3463,71 @@ error_handling:
 
 	node->state = UPD_NODE_UPDATE_CLUSTERED;
 
-	return(thr);
+	DBUG_RETURN(thr);
 }
+
+#ifndef DBUG_OFF
+
+/** Ensure that the member cascade_upd_nodes has only one update node
+for each of the tables.  This is useful for testing purposes. */
+void upd_node_t::check_cascade_only_once()
+{
+	DBUG_ENTER("upd_node_t::check_cascade_only_once");
+
+	dbug_trace();
+
+	for (upd_cascade_t::const_iterator i = cascade_upd_nodes->begin();
+	     i != cascade_upd_nodes->end(); ++i) {
+
+		const upd_node_t*	update_node = *i;
+		std::string	table_name(update_node->table->name.m_name);
+		ulint	count = 0;
+
+		for (upd_cascade_t::const_iterator j
+			= cascade_upd_nodes->begin();
+		     j != cascade_upd_nodes->end(); ++j) {
+
+			const upd_node_t*	node = *j;
+
+			if (table_name == node->table->name.m_name) {
+				DBUG_ASSERT(count++ == 0);
+			}
+		}
+	}
+
+	DBUG_VOID_RETURN;
+}
+
+/** Print information about this object into the trace log file. */
+void upd_node_t::dbug_trace()
+{
+	DBUG_ENTER("upd_node_t::dbug_trace");
+
+	for (upd_cascade_t::const_iterator i = cascade_upd_nodes->begin();
+	     i != cascade_upd_nodes->end(); ++i) {
+
+		const upd_node_t*	update_node = *i;
+		ib::info() << "cascade_upd_nodes: Cascade to table: " <<
+			update_node->table->name;
+		/* JAN: TODO: MySQL 5.7
+		DBUG_LOG("upd_node_t", "cascade_upd_nodes: Cascade to table: "
+			 << update_node->table->name);
+		*/
+	}
+
+	for (upd_cascade_t::const_iterator j = new_upd_nodes->begin();
+	     j != new_upd_nodes->end(); ++j) {
+
+		const upd_node_t*	update_node = *j;
+		ib::info() << "cascade_upd_nodes: Cascade to table: " <<
+			update_node->table->name;
+		/* JAN: TODO: MySQL 5.7
+		DBUG_LOG("upd_node_t", "new_upd_nodes: Cascade to table: "
+			 << update_node->table->name);
+		*/
+	}
+
+	DBUG_VOID_RETURN;
+}
+#endif /* !DBUG_OFF */
 #endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/row/row0vers.cc b/storage/innobase/row/row0vers.cc
index 9dd7b63bcab..fd17683fb48 100644
--- a/storage/innobase/row/row0vers.cc
+++ b/storage/innobase/row/row0vers.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1997, 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -23,6 +23,8 @@ Row versions
 Created 2/6/1997 Heikki Tuuri
 *******************************************************/
 
+#include "ha_prototypes.h"
+
 #include "row0vers.h"
 
 #ifdef UNIV_NONINL
@@ -45,7 +47,26 @@ Created 2/6/1997 Heikki Tuuri
 #include "rem0cmp.h"
 #include "read0read.h"
 #include "lock0lock.h"
+#include "row0mysql.h"
 
+/** Check whether all non-virtual columns in a virtual index match that of in
+the cluster index
+@param[in]	index		the secondary index
+@param[in]	row		the cluster index row in dtuple form
+@param[in]	ext		externally stored column prefix or NULL
+@param[in]	ientry		the secondary index entry
+@param[in,out]	heap		heap used to build virtual dtuple
+@param[in,out]	n_non_v_col	number of non-virtual columns in the index
+@return true if all matches, false otherwise */
+static
+bool
+row_vers_non_vc_match(
+	dict_index_t*		index,
+	const dtuple_t*		row,
+	const row_ext_t*	ext,
+	const dtuple_t*		ientry,
+	mem_heap_t*		heap,
+	ulint*			n_non_v_col);
 /*****************************************************************//**
 Finds out if an active transaction has inserted or modified a secondary
 index record.
@@ -54,7 +75,7 @@ NOTE that this function can return false positives but never false
 negatives. The caller must confirm all positive results by calling
 trx_is_active() while holding lock_sys->mutex. */
 UNIV_INLINE
-trx_id_t
+trx_t*
 row_vers_impl_x_locked_low(
 /*=======================*/
 	const rec_t*	clust_rec,	/*!< in: clustered index record */
@@ -72,6 +93,9 @@ row_vers_impl_x_locked_low(
 	rec_t*		prev_version = NULL;
 	ulint*		clust_offsets;
 	mem_heap_t*	heap;
+	dtuple_t*	ientry = NULL;
+	mem_heap_t*	v_heap = NULL;
+	const dtuple_t*	cur_vrow = NULL;
 
 	DBUG_ENTER("row_vers_impl_x_locked_low");
 
@@ -85,7 +109,9 @@ row_vers_impl_x_locked_low(
 	trx_id = row_get_rec_trx_id(clust_rec, clust_index, clust_offsets);
 	corrupt = FALSE;
 
-	if (!trx_rw_is_active(trx_id, &corrupt)) {
+	trx_t*	trx = trx_rw_is_active(trx_id, &corrupt, true);
+
+	if (trx == 0) {
 		/* The transaction that modified or inserted clust_rec is no
 		longer active, or it is corrupt: no implicit lock on rec */
 		if (corrupt) {
@@ -104,6 +130,18 @@ row_vers_impl_x_locked_low(
 
 	rec_del = rec_get_deleted_flag(rec, comp);
 
+	if (dict_index_has_virtual(index)) {
+		ulint	n_ext;
+		ulint	est_size = DTUPLE_EST_ALLOC(index->n_fields);
+
+		/* Allocate the dtuple for virtual columns extracted from undo
+		log with its own heap, so to avoid it being freed as we
+		iterating in the version loop below. */
+		v_heap = mem_heap_create(est_size);
+		ientry = row_rec_to_index_entry(
+			rec, index, offsets, &n_ext, v_heap);
+	}
+
 	/* We look up if some earlier version, which was modified by
 	the trx_id transaction, of the clustered index record would
 	require rec to be in a different state (delete marked or
@@ -117,11 +155,12 @@ row_vers_impl_x_locked_low(
 
 	for (version = clust_rec;; version = prev_version) {
 		row_ext_t*	ext;
-		const dtuple_t*	row;
+		dtuple_t*	row;
 		dtuple_t*	entry;
 		ulint		vers_del;
 		trx_id_t	prev_trx_id;
 		mem_heap_t*	old_heap = heap;
+		const dtuple_t*	vrow = NULL;
 
 		/* We keep the semaphore in mtr on the clust_rec page, so
 		that no other transaction can update it and get an
@@ -131,14 +170,15 @@ row_vers_impl_x_locked_low(
 
 		trx_undo_prev_version_build(
 			clust_rec, mtr, version, clust_index, clust_offsets,
-			heap, &prev_version);
+			heap, &prev_version, NULL,
+			dict_index_has_virtual(index) ? &vrow : NULL, 0);
 
 		/* The oldest visible clustered index version must not be
 		delete-marked, because we never start a transaction by
 		inserting a delete-marked record. */
 		ut_ad(prev_version
 		      || !rec_get_deleted_flag(version, comp)
-		      || !trx_rw_is_active(trx_id, NULL));
+		      || !trx_rw_is_active(trx_id, NULL, false));
 
 		/* Free version and clust_offsets. */
 		mem_heap_free(old_heap);
@@ -162,7 +202,8 @@ row_vers_impl_x_locked_low(
 				or updated, the leaf page record always is
 				created with a clear delete-mark flag.
 				(We never insert a delete-marked record.) */
-				trx_id = 0;
+				trx_release_reference(trx);
+				trx = 0;
 			}
 
 			break;
@@ -185,6 +226,38 @@ row_vers_impl_x_locked_low(
 				clust_offsets,
 				NULL, NULL, NULL, &ext, heap);
 
+		if (dict_index_has_virtual(index)) {
+			if (vrow) {
+				/* Keep the virtual row info for the next
+				version */
+				cur_vrow = dtuple_copy(vrow, v_heap);
+				dtuple_dup_v_fld(cur_vrow, v_heap);
+			}
+
+			if (!cur_vrow) {
+				ulint	n_non_v_col = 0;
+
+				/* If the indexed virtual columns has changed,
+				there must be log record to generate vrow.
+				Otherwise, it is not changed, so no need
+				to compare */
+				if (row_vers_non_vc_match(
+					index, row, ext, ientry, heap,
+					&n_non_v_col) == 0) {
+					if (rec_del != vers_del) {
+						break;
+					}
+				} else if (!rec_del) {
+					break;
+				}
+
+				goto result_check;
+			} else {
+				ut_ad(row->n_v_fields == cur_vrow->n_v_fields);
+				dtuple_copy_v_fields(row, cur_vrow);
+			}
+		}
+
 		entry = row_build_index_entry(row, ext, index, heap);
 
 		/* entry may be NULL if a record was inserted in place
@@ -205,22 +278,7 @@ row_vers_impl_x_locked_low(
 
 		/* We check if entry and rec are identified in the alphabetical
 		ordering */
-
-		if (!trx_rw_is_active(trx_id, &corrupt)) {
-			/* Transaction no longer active: no implicit
-			x-lock. This situation should only be possible
-			because we are not holding lock_sys->mutex. */
-			ut_ad(!lock_mutex_own());
-			if (corrupt) {
-				lock_report_trx_id_insanity(
-					trx_id,
-					prev_version, clust_index,
-					clust_offsets,
-					trx_sys_get_max_trx_id());
-			}
-			trx_id = 0;
-			break;
-		} else if (0 == cmp_dtuple_rec(entry, rec, offsets)) {
+		if (0 == cmp_dtuple_rec(entry, rec, offsets)) {
 			/* The delete marks of rec and prev_version should be
 			equal for rec to be in the state required by
 			prev_version */
@@ -250,20 +308,25 @@ row_vers_impl_x_locked_low(
 			break;
 		}
 
-		if (trx_id != prev_trx_id) {
+result_check:
+		if (trx->id != prev_trx_id) {
 			/* prev_version was the first version modified by
 			the trx_id transaction: no implicit x-lock */
 
-			trx_id = 0;
+			trx_release_reference(trx);
+			trx = 0;
 			break;
 		}
 	}
 
-	DBUG_PRINT("info", ("Implicit lock is held by trx:%lu",
-		static_cast<unsigned long>(trx_id)));
+	DBUG_PRINT("info", ("Implicit lock is held by trx:" TRX_ID_FMT, trx_id));
+
+	if (v_heap != NULL) {
+		mem_heap_free(v_heap);
+	}
 
 	mem_heap_free(heap);
-	DBUG_RETURN(trx_id);
+	DBUG_RETURN(trx);
 }
 
 /*****************************************************************//**
@@ -273,21 +336,20 @@ index record.
 NOTE that this function can return false positives but never false
 negatives. The caller must confirm all positive results by calling
 trx_is_active() while holding lock_sys->mutex. */
-UNIV_INTERN
-trx_id_t
+trx_t*
 row_vers_impl_x_locked(
 /*===================*/
 	const rec_t*	rec,	/*!< in: record in a secondary index */
 	dict_index_t*	index,	/*!< in: the secondary index */
 	const ulint*	offsets)/*!< in: rec_get_offsets(rec, index) */
 {
-	dict_index_t*	clust_index;
-	const rec_t*	clust_rec;
-	trx_id_t	trx_id;
 	mtr_t		mtr;
+	trx_t*		trx;
+	const rec_t*	clust_rec;
+	dict_index_t*	clust_index;
 
 	ut_ad(!lock_mutex_own());
-	ut_ad(!mutex_own(&trx_sys->mutex));
+	ut_ad(!trx_sys_mutex_own());
 
 	mtr_start(&mtr);
 
@@ -301,7 +363,7 @@ row_vers_impl_x_locked(
 	clust_rec = row_get_clust_rec(
 		BTR_SEARCH_LEAF, rec, index, &clust_index, &mtr);
 
-	if (UNIV_UNLIKELY(!clust_rec)) {
+	if (!clust_rec) {
 		/* In a rare case it is possible that no clust rec is found
 		for a secondary index record: if in row0umod.cc
 		row_undo_mod_remove_clust_low() we have already removed the
@@ -314,47 +376,480 @@ row_vers_impl_x_locked(
 		a rollback we always undo the modifications to secondary index
 		records before the clustered index record. */
 
-		trx_id = 0;
+		trx = 0;
 	} else {
-		trx_id = row_vers_impl_x_locked_low(
+		trx = row_vers_impl_x_locked_low(
 			clust_rec, clust_index, rec, index, offsets, &mtr);
+
+		ut_ad(trx == 0 || trx_is_referenced(trx));
 	}
 
 	mtr_commit(&mtr);
 
-	return(trx_id);
+	return(trx);
 }
 
 /*****************************************************************//**
 Finds out if we must preserve a delete marked earlier version of a clustered
 index record, because it is >= the purge view.
-@return	TRUE if earlier version should be preserved */
-UNIV_INTERN
+@param[in]	trx_id		transaction id in the version
+@param[in]	name		table name
+@param[in,out]	mtr		mini transaction holding the latch on the
+				clustered index record; it will also hold
+				the latch on purge_view
+@return TRUE if earlier version should be preserved */
 ibool
 row_vers_must_preserve_del_marked(
 /*==============================*/
-	trx_id_t	trx_id,	/*!< in: transaction id in the version */
-	mtr_t*		mtr)	/*!< in: mtr holding the latch on the
-				clustered index record; it will also
-				hold the latch on purge_view */
+	trx_id_t		trx_id,
+	const table_name_t&	name,
+	mtr_t*			mtr)
 {
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_S));
 
-	mtr_s_lock(&(purge_sys->latch), mtr);
+	mtr_s_lock(&purge_sys->latch, mtr);
 
-	return(!read_view_sees_trx_id(purge_sys->view, trx_id));
+	return(!purge_sys->view.changes_visible(trx_id,	name));
 }
 
+/** Check whether all non-virtual columns in a virtual index match that of in
+the cluster index
+@param[in]	index		the secondary index
+@param[in]	row		the cluster index row in dtuple form
+@param[in]	ext		externally stored column prefix or NULL
+@param[in]	ientry		the secondary index entry
+@param[in,out]	heap		heap used to build virtual dtuple
+@param[in,out]	n_non_v_col	number of non-virtual columns in the index
+@return true if all matches, false otherwise */
+static
+bool
+row_vers_non_vc_match(
+	dict_index_t*		index,
+	const dtuple_t*		row,
+	const row_ext_t*	ext,
+	const dtuple_t*		ientry,
+	mem_heap_t*		heap,
+	ulint*			n_non_v_col)
+{
+	const dfield_t* field1;
+	dfield_t*	field2;
+	ulint		n_fields = dtuple_get_n_fields(ientry);
+	ulint		ret = true;
+
+	*n_non_v_col = 0;
+
+	/* Build index entry out of row */
+	dtuple_t* nentry = row_build_index_entry(row, ext, index, heap);
+
+	for (ulint i = 0; i < n_fields; i++) {
+		const dict_field_t*	ind_field = dict_index_get_nth_field(
+							index, i);
+
+		const dict_col_t*	col = ind_field->col;
+
+		/* Only check non-virtual columns */
+		if (dict_col_is_virtual(col)) {
+			continue;
+		}
+
+		if (ret) {
+			field1  = dtuple_get_nth_field(ientry, i);
+			field2  = dtuple_get_nth_field(nentry, i);
+
+			if (cmp_dfield_dfield(field1, field2) != 0) {
+				ret = false;
+			}
+		}
+
+		(*n_non_v_col)++;
+	}
+
+	return(ret);
+}
+
+#ifdef MYSQL_VIRTUAL_COLUMNS
+/** build virtual column value from current cluster index record data
+@param[in,out]	row		the cluster index row in dtuple form
+@param[in]	clust_index	clustered index
+@param[in]	index		the secondary index
+@param[in]	heap		heap used to build virtual dtuple */
+static
+void
+row_vers_build_clust_v_col(
+	dtuple_t*	row,
+	dict_index_t*	clust_index,
+	dict_index_t*	index,
+	mem_heap_t*	heap)
+{
+	mem_heap_t*	local_heap = NULL;
+	for (ulint i = 0; i < dict_index_get_n_fields(index); i++) {
+		const dict_field_t* ind_field = dict_index_get_nth_field(
+				index, i);
+
+		if (dict_col_is_virtual(ind_field->col)) {
+			const dict_v_col_t*       col;
+
+			col = reinterpret_cast<const dict_v_col_t*>(
+				ind_field->col);
+
+			innobase_get_computed_value(
+				row, col, clust_index, &local_heap,
+				heap, NULL, current_thd, NULL, NULL,
+				NULL, NULL);
+		}
+	}
+
+	if (local_heap) {
+		mem_heap_free(local_heap);
+	}
+}
+/** Build latest virtual column data from undo log
+@param[in]	in_purge	whether this is the purge thread
+@param[in]	rec		clustered index record
+@param[in]	clust_index	clustered index
+@param[in,out]	clust_offsets	offsets on the clustered index record
+@param[in]	index		the secondary index
+@param[in]	roll_ptr	the rollback pointer for the purging record
+@param[in]	trx_id		trx id for the purging record
+@param[in,out]	v_heap		heap used to build vrow
+@param[out]	v_row		dtuple holding the virtual rows
+@param[in,out]	mtr		mtr holding the latch on rec */
+static
+void
+row_vers_build_cur_vrow_low(
+	bool		in_purge,
+	const rec_t*	rec,
+	dict_index_t*	clust_index,
+	ulint*		clust_offsets,
+	dict_index_t*	index,
+	roll_ptr_t	roll_ptr,
+	trx_id_t	trx_id,
+	mem_heap_t*	v_heap,
+	const dtuple_t**vrow,
+	mtr_t*		mtr)
+{
+	const rec_t*	version;
+	rec_t*		prev_version;
+	mem_heap_t*	heap = NULL;
+	ulint		num_v = dict_table_get_n_v_cols(index->table);
+	const dfield_t* field;
+	ulint		i;
+	bool		all_filled = false;
+
+	*vrow = dtuple_create_with_vcol(v_heap, 0, num_v);
+	dtuple_init_v_fld(*vrow);
+
+	for (i = 0; i < num_v; i++) {
+		dfield_get_type(dtuple_get_nth_v_field(*vrow, i))->mtype
+			 = DATA_MISSING;
+	}
+
+	version = rec;
+
+	/* If this is called by purge thread, set TRX_UNDO_PREV_IN_PURGE
+	bit to search the undo log until we hit the current undo log with
+	roll_ptr */
+	const ulint	status = in_purge
+		? TRX_UNDO_PREV_IN_PURGE | TRX_UNDO_GET_OLD_V_VALUE
+		: TRX_UNDO_GET_OLD_V_VALUE;
+
+	while (!all_filled) {
+		mem_heap_t*	heap2 = heap;
+		heap = mem_heap_create(1024);
+		roll_ptr_t	cur_roll_ptr = row_get_rec_roll_ptr(
+			version, clust_index, clust_offsets);
+
+		trx_undo_prev_version_build(
+			rec, mtr, version, clust_index, clust_offsets,
+			heap, &prev_version, NULL, vrow, status);
+
+		if (heap2) {
+			mem_heap_free(heap2);
+		}
+
+		if (!prev_version) {
+			/* Versions end here */
+			break;
+		}
+
+		clust_offsets = rec_get_offsets(prev_version, clust_index,
+						NULL, ULINT_UNDEFINED, &heap);
+
+		ulint	entry_len = dict_index_get_n_fields(index);
+
+		all_filled = true;
+
+		for (i = 0; i < entry_len; i++) {
+			const dict_field_t*	ind_field
+				 = dict_index_get_nth_field(index, i);
+			const dict_col_t*	col = ind_field->col;
+
+			if (!dict_col_is_virtual(col)) {
+				continue;
+			}
+
+			const dict_v_col_t*	v_col
+				= reinterpret_cast<const dict_v_col_t*>(col);
+			field = dtuple_get_nth_v_field(*vrow, v_col->v_pos);
+
+			if (dfield_get_type(field)->mtype == DATA_MISSING) {
+				all_filled = false;
+				break;
+			}
+
+		}
+
+		trx_id_t	rec_trx_id = row_get_rec_trx_id(
+			prev_version, clust_index, clust_offsets);
+
+		if (rec_trx_id < trx_id || roll_ptr == cur_roll_ptr) {
+			break;
+		}
+
+		version = prev_version;
+	}
+
+	mem_heap_free(heap);
+}
+
+/** Check a virtual column value index secondary virtual index matches
+that of current cluster index record, which is recreated from information
+stored in undo log
+@param[in]	in_purge	called by purge thread
+@param[in]	rec		record in the clustered index
+@param[in]	row		the cluster index row in dtuple form
+@param[in]	ext		externally stored column prefix or NULL
+@param[in]	clust_index	cluster index
+@param[in]	clust_offsets	offsets on the cluster record
+@param[in]	index		the secondary index
+@param[in]	ientry		the secondary index entry
+@param[in]	roll_ptr	the rollback pointer for the purging record
+@param[in]	trx_id		trx id for the purging record
+@param[in,out]	v_heap		heap used to build virtual dtuple
+@param[in,out]	v_row		dtuple holding the virtual rows (if needed)
+@param[in]	mtr		mtr holding the latch on rec
+@return true if matches, false otherwise */
+static
+bool
+row_vers_vc_matches_cluster(
+	bool		in_purge,
+	const rec_t*	rec,
+	const dtuple_t*	row,
+	row_ext_t*	ext,
+	dict_index_t*	clust_index,
+	ulint*		clust_offsets,
+	dict_index_t*	index,
+	const dtuple_t* ientry,
+	roll_ptr_t	roll_ptr,
+	trx_id_t	trx_id,
+	mem_heap_t*	v_heap,
+	const dtuple_t**vrow,
+	mtr_t*		mtr)
+{
+	const rec_t*	version;
+	rec_t*          prev_version;
+	mem_heap_t*	heap2;
+	mem_heap_t*	heap = NULL;
+	mem_heap_t*	tuple_heap;
+	ulint		num_v = dict_table_get_n_v_cols(index->table);
+	bool		compare[REC_MAX_N_FIELDS];
+	ulint		n_fields = dtuple_get_n_fields(ientry);
+	ulint		n_non_v_col = 0;
+	ulint		n_cmp_v_col = 0;
+	const dfield_t* field1;
+	dfield_t*	field2;
+	ulint		i;
+
+	tuple_heap = mem_heap_create(1024);
+
+	/* First compare non-virtual columns (primary keys) */
+	if (!row_vers_non_vc_match(index, row, ext, ientry, tuple_heap,
+				   &n_non_v_col)) {
+		mem_heap_free(tuple_heap);
+		return(false);
+	}
+
+	ut_ad(n_fields > n_non_v_col);
+
+	*vrow = dtuple_create_with_vcol(v_heap ? v_heap : tuple_heap, 0, num_v);
+	dtuple_init_v_fld(*vrow);
+
+	for (i = 0; i < num_v; i++) {
+		dfield_get_type(dtuple_get_nth_v_field(*vrow, i))->mtype
+			 = DATA_MISSING;
+		compare[i] = false;
+	}
+
+	version = rec;
+
+	/* If this is called by purge thread, set TRX_UNDO_PREV_IN_PURGE
+	bit to search the undo log until we hit the current undo log with
+	roll_ptr */
+	ulint	status = (in_purge ? TRX_UNDO_PREV_IN_PURGE : 0)
+			 | TRX_UNDO_GET_OLD_V_VALUE;
+
+	while (n_cmp_v_col < n_fields - n_non_v_col) {
+		heap2 = heap;
+		heap = mem_heap_create(1024);
+		roll_ptr_t	cur_roll_ptr = row_get_rec_roll_ptr(
+			version, clust_index, clust_offsets);
+
+		ut_ad(cur_roll_ptr != 0);
+		ut_ad(in_purge == (roll_ptr != 0));
+
+		trx_undo_prev_version_build(
+			rec, mtr, version, clust_index, clust_offsets,
+			heap, &prev_version, NULL, vrow, status);
+
+		if (heap2) {
+			mem_heap_free(heap2);
+		}
+
+		if (!prev_version) {
+			/* Versions end here */
+			goto func_exit;
+		}
+
+		clust_offsets = rec_get_offsets(prev_version, clust_index,
+						NULL, ULINT_UNDEFINED, &heap);
+
+		ulint	entry_len = dict_index_get_n_fields(index);
+
+		for (i = 0; i < entry_len; i++) {
+			const dict_field_t*	ind_field
+				 = dict_index_get_nth_field(index, i);
+			const dict_col_t*	col = ind_field->col;
+			field1 = dtuple_get_nth_field(ientry, i);
+
+			if (!dict_col_is_virtual(col)) {
+				continue;
+			}
+
+			const dict_v_col_t*     v_col
+                                = reinterpret_cast<const dict_v_col_t*>(col);
+			field2
+				= dtuple_get_nth_v_field(*vrow, v_col->v_pos);
+
+			if ((dfield_get_type(field2)->mtype != DATA_MISSING)
+			    && (!compare[v_col->v_pos])) {
+
+				if (ind_field->prefix_len != 0
+				    && !dfield_is_null(field2)
+				    && field2->len > ind_field->prefix_len) {
+					field2->len = ind_field->prefix_len;
+				}
+
+				/* The index field mismatch */
+				if (v_heap
+				    || cmp_dfield_dfield(field2, field1) != 0) {
+					if (v_heap) {
+						dtuple_dup_v_fld(*vrow, v_heap);
+					}
+
+					mem_heap_free(tuple_heap);
+					mem_heap_free(heap);
+					return(false);
+				}
+
+				compare[v_col->v_pos] = true;
+				n_cmp_v_col++;
+			}
+		}
+
+		trx_id_t	rec_trx_id = row_get_rec_trx_id(
+			prev_version, clust_index, clust_offsets);
+
+		if (rec_trx_id < trx_id || roll_ptr == cur_roll_ptr) {
+			break;
+		}
+
+		version = prev_version;
+	}
+
+func_exit:
+	if (n_cmp_v_col == 0) {
+		*vrow = NULL;
+	}
+
+	mem_heap_free(tuple_heap);
+	mem_heap_free(heap);
+
+	/* FIXME: In the case of n_cmp_v_col is not the same as
+	n_fields - n_non_v_col, callback is needed to compare the rest
+	columns. At the timebeing, we will need to return true */
+	return (true);
+}
+
+/** Build a dtuple contains virtual column data for current cluster index
+@param[in]	in_purge	called by purge thread
+@param[in]	rec		cluster index rec
+@param[in]	clust_index	cluster index
+@param[in]	clust_offsets	cluster rec offset
+@param[in]	index		secondary index
+@param[in]	ientry		secondary index rec
+@param[in]	roll_ptr	roll_ptr for the purge record
+@param[in]	trx_id		transaction ID on the purging record
+@param[in,out]	heap		heap memory
+@param[in,out]	v_heap		heap memory to keep virtual colum dtuple
+@param[in]	mtr		mtr holding the latch on rec
+@return dtuple contains virtual column data */
+static
+const dtuple_t*
+row_vers_build_cur_vrow(
+	bool		in_purge,
+	const rec_t*	rec,
+	dict_index_t*	clust_index,
+	ulint**		clust_offsets,
+	dict_index_t*	index,
+	const dtuple_t*	ientry,
+	roll_ptr_t	roll_ptr,
+	trx_id_t	trx_id,
+	mem_heap_t*	heap,
+	mem_heap_t*	v_heap,
+	mtr_t*		mtr)
+{
+	const dtuple_t*	cur_vrow = NULL;
+
+	roll_ptr_t t_roll_ptr = row_get_rec_roll_ptr(
+		rec, clust_index, *clust_offsets);
+
+	/* if the row is newly inserted, then the virtual
+	columns need to be computed */
+	if (trx_undo_roll_ptr_is_insert(t_roll_ptr)) {
+
+		ut_ad(!rec_get_deleted_flag(rec, page_rec_is_comp(rec)));
+
+		/* This is a newly inserted record and cannot
+		be deleted, So the externally stored field
+		cannot be freed yet. */
+		dtuple_t* row = row_build(ROW_COPY_POINTERS, clust_index,
+					  rec, *clust_offsets,
+					  NULL, NULL, NULL, NULL, heap);
+
+		row_vers_build_clust_v_col(
+			row, clust_index, index, heap);
+		cur_vrow = dtuple_copy(row, v_heap);
+		dtuple_dup_v_fld(cur_vrow, v_heap);
+	} else {
+		/* Try to fetch virtual column data from undo log */
+		row_vers_build_cur_vrow_low(
+			in_purge, rec, clust_index, *clust_offsets,
+			index, roll_ptr, trx_id, v_heap, &cur_vrow, mtr);
+	}
+
+	*clust_offsets = rec_get_offsets(rec, clust_index, NULL,
+					 ULINT_UNDEFINED, &heap);
+	return(cur_vrow);
+}
+#endif /* MYSQL_VIRTUAL_COLUMNS */
+
 /*****************************************************************//**
 Finds out if a version of the record, where the version >= the current
 purge view, should have ientry as its secondary index entry. We check
 if there is any not delete marked version of the record where the trx
 id >= purge view, and the secondary index entry and ientry are identified in
 the alphabetical ordering; exactly in this case we return TRUE.
-@return	TRUE if earlier version should have */
-UNIV_INTERN
+@return TRUE if earlier version should have */
 ibool
 row_vers_old_has_index_entry(
 /*=========================*/
@@ -366,7 +861,9 @@ row_vers_old_has_index_entry(
 	mtr_t*		mtr,	/*!< in: mtr holding the latch on rec; it will
 				also hold the latch on purge_view */
 	dict_index_t*	index,	/*!< in: the secondary index */
-	const dtuple_t*	ientry)	/*!< in: the secondary index entry */
+	const dtuple_t*	ientry,	/*!< in: the secondary index entry */
+	roll_ptr_t	roll_ptr,/*!< in: roll_ptr for the purge record */
+	trx_id_t	trx_id)	/*!< in: transaction ID on the purging record */
 {
 	const rec_t*	version;
 	rec_t*		prev_version;
@@ -374,15 +871,16 @@ row_vers_old_has_index_entry(
 	ulint*		clust_offsets;
 	mem_heap_t*	heap;
 	mem_heap_t*	heap2;
-	const dtuple_t*	row;
+	dtuple_t*	row;
 	const dtuple_t*	entry;
 	ulint		comp;
+	const dtuple_t*	vrow = NULL;
+	mem_heap_t*	v_heap = NULL;
+	const dtuple_t*	cur_vrow = NULL;
 
 	ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
 	      || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_S));
 
 	clust_index = dict_table_get_first_index(index->table);
 
@@ -392,6 +890,13 @@ row_vers_old_has_index_entry(
 	clust_offsets = rec_get_offsets(rec, clust_index, NULL,
 					ULINT_UNDEFINED, &heap);
 
+	if (dict_index_has_virtual(index)) {
+		v_heap = mem_heap_create(100);
+	}
+
+	DBUG_EXECUTE_IF("ib_purge_virtual_index_crash",
+			DBUG_SUICIDE(););
+
 	if (also_curr && !rec_get_deleted_flag(rec, comp)) {
 		row_ext_t*	ext;
 
@@ -405,35 +910,106 @@ row_vers_old_has_index_entry(
 		row = row_build(ROW_COPY_POINTERS, clust_index,
 				rec, clust_offsets,
 				NULL, NULL, NULL, &ext, heap);
-		entry = row_build_index_entry(row, ext, index, heap);
 
-		/* If entry == NULL, the record contains unset BLOB
-		pointers.  This must be a freshly inserted record.  If
-		this is called from
-		row_purge_remove_sec_if_poss_low(), the thread will
-		hold latches on the clustered index and the secondary
-		index.  Because the insert works in three steps:
+		if (dict_index_has_virtual(index)) {
 
-			(1) insert the record to clustered index
-			(2) store the BLOBs and update BLOB pointers
-			(3) insert records to secondary indexes
+#ifdef MYSQL_VIRTUAL_COLUMNS
 
-		the purge thread can safely ignore freshly inserted
-		records and delete the secondary index record.  The
-		thread that inserted the new record will be inserting
-		the secondary index records. */
+#ifdef DBUG_OFF
+# define dbug_v_purge false
+#else /* DBUG_OFF */
+                        bool    dbug_v_purge = false;
+#endif /* DBUG_OFF */
 
-		/* NOTE that we cannot do the comparison as binary
-		fields because the row is maybe being modified so that
-		the clustered index record has already been updated to
-		a different binary value in a char field, but the
-		collation identifies the old and new value anyway! */
-		if (entry && !dtuple_coll_cmp(ientry, entry)) {
+			DBUG_EXECUTE_IF(
+				"ib_purge_virtual_index_callback",
+				dbug_v_purge = true;);
 
-			mem_heap_free(heap);
+			roll_ptr_t t_roll_ptr = row_get_rec_roll_ptr(
+				rec, clust_index, clust_offsets);
 
-			return(TRUE);
+			/* if the row is newly inserted, then the virtual
+			columns need to be computed */
+			if (trx_undo_roll_ptr_is_insert(t_roll_ptr)
+			    || dbug_v_purge) {
+				row_vers_build_clust_v_col(
+					row, clust_index, index, heap);
+
+				entry = row_build_index_entry(
+					row, ext, index, heap);
+				if (entry && !dtuple_coll_cmp(ientry, entry)) {
+
+					mem_heap_free(heap);
+
+					if (v_heap) {
+						mem_heap_free(v_heap);
+					}
+
+					return(TRUE);
+				}
+			} else {
+				if (row_vers_vc_matches_cluster(
+					also_curr, rec, row, ext, clust_index,
+					clust_offsets, index, ientry, roll_ptr,
+					trx_id, NULL, &vrow, mtr)) {
+					mem_heap_free(heap);
+
+					if (v_heap) {
+						mem_heap_free(v_heap);
+					}
+
+					return(TRUE);
+				}
+			}
+			clust_offsets = rec_get_offsets(rec, clust_index, NULL,
+							ULINT_UNDEFINED, &heap);
+#endif /* MYSQL_VIRTUAL_COLUMNS */
+		} else {
+
+			entry = row_build_index_entry(
+				row, ext, index, heap);
+
+			/* If entry == NULL, the record contains unset BLOB
+			pointers.  This must be a freshly inserted record.  If
+			this is called from
+			row_purge_remove_sec_if_poss_low(), the thread will
+			hold latches on the clustered index and the secondary
+			index.  Because the insert works in three steps:
+
+				(1) insert the record to clustered index
+				(2) store the BLOBs and update BLOB pointers
+				(3) insert records to secondary indexes
+
+			the purge thread can safely ignore freshly inserted
+			records and delete the secondary index record.  The
+			thread that inserted the new record will be inserting
+			the secondary index records. */
+
+			/* NOTE that we cannot do the comparison as binary
+			fields because the row is maybe being modified so that
+			the clustered index record has already been updated to
+			a different binary value in a char field, but the
+			collation identifies the old and new value anyway! */
+			if (entry && !dtuple_coll_cmp(ientry, entry)) {
+
+				mem_heap_free(heap);
+
+				if (v_heap) {
+					mem_heap_free(v_heap);
+				}
+				return(TRUE);
+			}
 		}
+	} else if (dict_index_has_virtual(index)) {
+#ifdef MYSQL_VIRTUAL_COLUMNS
+		/* The current cluster index record could be
+		deleted, but the previous version of it might not. We will
+		need to get the virtual column data from undo record
+		associated with current cluster index */
+		cur_vrow = row_vers_build_cur_vrow(
+			also_curr, rec, clust_index, &clust_offsets,
+			index, ientry, roll_ptr, trx_id, heap, v_heap, mtr);
+#endif /* MYSQL_VIRTUAL_COLUMNS */
 	}
 
 	version = rec;
@@ -441,9 +1017,13 @@ row_vers_old_has_index_entry(
 	for (;;) {
 		heap2 = heap;
 		heap = mem_heap_create(1024);
+		vrow = NULL;
+
 		trx_undo_prev_version_build(rec, mtr, version,
 					    clust_index, clust_offsets,
-					    heap, &prev_version);
+					    heap, &prev_version, NULL,
+					    dict_index_has_virtual(index)
+						? &vrow : NULL, 0);
 		mem_heap_free(heap2); /* free version and clust_offsets */
 
 		if (!prev_version) {
@@ -451,12 +1031,33 @@ row_vers_old_has_index_entry(
 
 			mem_heap_free(heap);
 
+			if (v_heap) {
+				mem_heap_free(v_heap);
+			}
+
 			return(FALSE);
 		}
 
 		clust_offsets = rec_get_offsets(prev_version, clust_index,
 						NULL, ULINT_UNDEFINED, &heap);
 
+		if (dict_index_has_virtual(index)) {
+			if (vrow) {
+				/* Keep the virtual row info for the next
+				version, unless it is changed */
+				mem_heap_empty(v_heap);
+				cur_vrow = dtuple_copy(vrow, v_heap);
+				dtuple_dup_v_fld(cur_vrow, v_heap);
+			}
+
+			if (!cur_vrow) {
+				/* Nothing for this index has changed,
+				continue */
+				version = prev_version;
+				continue;
+			}
+		}
+
 		if (!rec_get_deleted_flag(prev_version, comp)) {
 			row_ext_t*	ext;
 
@@ -466,6 +1067,13 @@ row_vers_old_has_index_entry(
 			row = row_build(ROW_COPY_POINTERS, clust_index,
 					prev_version, clust_offsets,
 					NULL, NULL, NULL, &ext, heap);
+
+			if (dict_index_has_virtual(index)) {
+				ut_ad(cur_vrow);
+				ut_ad(row->n_v_fields == cur_vrow->n_v_fields);
+				dtuple_copy_v_fields(row, cur_vrow);
+			}
+
 			entry = row_build_index_entry(row, ext, index, heap);
 
 			/* If entry == NULL, the record contains unset
@@ -483,6 +1091,9 @@ row_vers_old_has_index_entry(
 			if (entry && !dtuple_coll_cmp(ientry, entry)) {
 
 				mem_heap_free(heap);
+				if (v_heap) {
+					mem_heap_free(v_heap);
+				}
 
 				return(TRUE);
 			}
@@ -496,8 +1107,7 @@ row_vers_old_has_index_entry(
 Constructs the version of a clustered index record which a consistent
 read should see. We assume that the trx id stored in rec is such that
 the consistent read should not see rec in its present version.
-@return	DB_SUCCESS or DB_MISSING_HISTORY */
-UNIV_INTERN
+@return DB_SUCCESS or DB_MISSING_HISTORY */
 dberr_t
 row_vers_build_for_consistent_read(
 /*===============================*/
@@ -509,17 +1119,18 @@ row_vers_build_for_consistent_read(
 	dict_index_t*	index,	/*!< in: the clustered index */
 	ulint**		offsets,/*!< in/out: offsets returned by
 				rec_get_offsets(rec, index) */
-	read_view_t*	view,	/*!< in: the consistent read view */
+	ReadView*	view,	/*!< in: the consistent read view */
 	mem_heap_t**	offset_heap,/*!< in/out: memory heap from which
 				the offsets are allocated */
 	mem_heap_t*	in_heap,/*!< in: memory heap from which the memory for
 				*old_vers is allocated; memory for possible
 				intermediate versions is allocated and freed
 				locally within the function */
-	rec_t**		old_vers)/*!< out, own: old version, or NULL
+	rec_t**		old_vers,/*!< out, own: old version, or NULL
 				if the history is missing or the record
 				does not exist in the view, that is,
 				it was freshly inserted afterwards */
+	const dtuple_t**vrow)	/*!< out: virtual row */
 {
 	const rec_t*	version;
 	rec_t*		prev_version;
@@ -531,75 +1142,50 @@ row_vers_build_for_consistent_read(
 	ut_ad(dict_index_is_clust(index));
 	ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
 	      || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_S));
 
 	ut_ad(rec_offs_validate(rec, index, *offsets));
 
 	trx_id = row_get_rec_trx_id(rec, index, *offsets);
 
-	ut_ad(!read_view_sees_trx_id(view, trx_id));
+	ut_ad(!view->changes_visible(trx_id, index->table->name));
+
+	ut_ad(!vrow || !(*vrow));
 
 	version = rec;
 
 	for (;;) {
-		mem_heap_t*	heap2	= heap;
-		trx_undo_rec_t* undo_rec;
-		roll_ptr_t	roll_ptr;
-		undo_no_t	undo_no;
+		mem_heap_t*	prev_heap = heap;
+
 		heap = mem_heap_create(1024);
 
-		/* If we have high-granularity consistent read view and
-		creating transaction of the view is the same as trx_id in
-		the record we see this record only in the case when
-		undo_no of the record is < undo_no in the view. */
-
-		if (view->type == VIEW_HIGH_GRANULARITY
-		    && view->creator_trx_id == trx_id) {
-
-			roll_ptr = row_get_rec_roll_ptr(version, index,
-							*offsets);
-			undo_rec = trx_undo_get_undo_rec_low(roll_ptr, heap);
-			undo_no = trx_undo_rec_get_undo_no(undo_rec);
-			mem_heap_empty(heap);
-
-			if (view->undo_no > undo_no) {
-				/* The view already sees this version: we can
-				copy it to in_heap and return */
-
-#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
-				ut_a(!rec_offs_any_null_extern(
-					     version, *offsets));
-#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
-
-				buf = static_cast<byte*>(mem_heap_alloc(
-					in_heap, rec_offs_size(*offsets)));
-
-				*old_vers = rec_copy(buf, version, *offsets);
-				rec_offs_make_valid(*old_vers, index,
-						    *offsets);
-				err = DB_SUCCESS;
-				break;
-			}
+		if (vrow) {
+			*vrow = NULL;
 		}
 
-		err = trx_undo_prev_version_build(rec, mtr, version, index,
-						  *offsets, heap,
-						  &prev_version)
-			? DB_SUCCESS : DB_MISSING_HISTORY;
-		if (heap2) {
-			mem_heap_free(heap2); /* free version */
+		/* If purge can't see the record then we can't rely on
+		the UNDO log record. */
+
+		bool	purge_sees = trx_undo_prev_version_build(
+			rec, mtr, version, index, *offsets, heap,
+			&prev_version, NULL, vrow, 0);
+
+		err  = (purge_sees) ? DB_SUCCESS : DB_MISSING_HISTORY;
+
+		if (prev_heap != NULL) {
+			mem_heap_free(prev_heap);
 		}
 
 		if (prev_version == NULL) {
 			/* It was a freshly inserted version */
 			*old_vers = NULL;
+			ut_ad(!vrow || !(*vrow));
 			break;
 		}
 
-		*offsets = rec_get_offsets(prev_version, index, *offsets,
-					   ULINT_UNDEFINED, offset_heap);
+		*offsets = rec_get_offsets(
+			prev_version, index, *offsets, ULINT_UNDEFINED,
+			offset_heap);
 
 #if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
 		ut_a(!rec_offs_any_null_extern(prev_version, *offsets));
@@ -607,7 +1193,7 @@ row_vers_build_for_consistent_read(
 
 		trx_id = row_get_rec_trx_id(prev_version, index, *offsets);
 
-		if (read_view_sees_trx_id(view, trx_id)) {
+		if (view->changes_visible(trx_id, index->table->name)) {
 
 			/* The view already sees this version: we can copy
 			it to in_heap and return */
@@ -618,11 +1204,16 @@ row_vers_build_for_consistent_read(
 
 			*old_vers = rec_copy(buf, prev_version, *offsets);
 			rec_offs_make_valid(*old_vers, index, *offsets);
+
+			if (vrow && *vrow) {
+				*vrow = dtuple_copy(*vrow, in_heap);
+				dtuple_dup_v_fld(*vrow, in_heap);
+			}
 			break;
 		}
 
 		version = prev_version;
-	}/* for (;;) */
+	}
 
 	mem_heap_free(heap);
 
@@ -632,7 +1223,6 @@ row_vers_build_for_consistent_read(
 /*****************************************************************//**
 Constructs the last committed version of a clustered index record,
 which should be seen by a semi-consistent read. */
-UNIV_INTERN
 void
 row_vers_build_for_semi_consistent_read(
 /*====================================*/
@@ -650,9 +1240,11 @@ row_vers_build_for_semi_consistent_read(
 				*old_vers is allocated; memory for possible
 				intermediate versions is allocated and freed
 				locally within the function */
-	const rec_t**	old_vers)/*!< out: rec, old version, or NULL if the
+	const rec_t**	old_vers,/*!< out: rec, old version, or NULL if the
 				record does not exist in the view, that is,
 				it was freshly inserted afterwards */
+	const dtuple_t** vrow)	/*!< out: virtual row, old version, or NULL
+				if it is not updated in the view */
 {
 	const rec_t*	version;
 	mem_heap_t*	heap		= NULL;
@@ -662,13 +1254,12 @@ row_vers_build_for_semi_consistent_read(
 	ut_ad(dict_index_is_clust(index));
 	ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
 	      || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_S));
 
 	ut_ad(rec_offs_validate(rec, index, *offsets));
 
 	version = rec;
+	ut_ad(!vrow || !(*vrow));
 
 	for (;;) {
 		const trx_t*	version_trx;
@@ -681,7 +1272,7 @@ row_vers_build_for_semi_consistent_read(
 			rec_trx_id = version_trx_id;
 		}
 
-		mutex_enter(&trx_sys->mutex);
+		trx_sys_mutex_enter();
 		version_trx = trx_get_rw_trx_by_id(version_trx_id);
 		/* Because version_trx is a read-write transaction,
 		its state cannot change from or to NOT_STARTED while
@@ -692,7 +1283,7 @@ row_vers_build_for_semi_consistent_read(
 				    TRX_STATE_COMMITTED_IN_MEMORY)) {
 			version_trx = NULL;
 		}
-		mutex_exit(&trx_sys->mutex);
+		trx_sys_mutex_exit();
 
 		if (!version_trx) {
 committed_version_trx:
@@ -705,6 +1296,9 @@ committed_version_trx:
 
 			if (rec == version) {
 				*old_vers = rec;
+				if (vrow) {
+					*vrow = NULL;
+				}
 				break;
 			}
 
@@ -732,6 +1326,10 @@ committed_version_trx:
 
 			*old_vers = rec_copy(buf, version, *offsets);
 			rec_offs_make_valid(*old_vers, index, *offsets);
+			if (vrow && *vrow) {
+				*vrow = dtuple_copy(*vrow, in_heap);
+				dtuple_dup_v_fld(*vrow, in_heap);
+			}
 			break;
 		}
 
@@ -742,7 +1340,8 @@ committed_version_trx:
 
 		if (!trx_undo_prev_version_build(rec, mtr, version, index,
 						 *offsets, heap,
-						 &prev_version)) {
+						 &prev_version,
+						 in_heap, vrow, 0)) {
 			mem_heap_free(heap);
 			heap = heap2;
 			heap2 = NULL;
@@ -756,6 +1355,7 @@ committed_version_trx:
 		if (prev_version == NULL) {
 			/* It was a freshly inserted version */
 			*old_vers = NULL;
+			ut_ad(!vrow || !(*vrow));
 			break;
 		}
 
diff --git a/storage/innobase/srv/srv0conc.cc b/storage/innobase/srv/srv0conc.cc
index 7904cd8800d..a362873fee3 100644
--- a/storage/innobase/srv/srv0conc.cc
+++ b/storage/innobase/srv/srv0conc.cc
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
-Copyright (c) 2011, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2011, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2015, 2016, MariaDB Corporation.
 
 Portions of this file contain modifications contributed and copyrighted by
 Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -37,75 +38,36 @@ InnoDB concurrency manager
 Created 2011/04/18 Sunny Bains
 *******************************************************/
 
-#include "srv0srv.h"
-#include "sync0sync.h"
-#include "trx0trx.h"
+#include "ha_prototypes.h"
+#include <mysql/service_thd_wait.h>
 
-#include <mysql/plugin.h>
+#include "srv0srv.h"
+#include "trx0trx.h"
+#include "row0mysql.h"
+#include "dict0dict.h"
 #include <mysql/service_wsrep.h>
 
 /** Number of times a thread is allowed to enter InnoDB within the same
 SQL query after it has once got the ticket. */
-UNIV_INTERN ulong	srv_n_free_tickets_to_enter = 500;
+ulong	srv_n_free_tickets_to_enter = 500;
 
-#ifdef HAVE_ATOMIC_BUILTINS
 /** Maximum sleep delay (in micro-seconds), value of 0 disables it. */
-UNIV_INTERN ulong	srv_adaptive_max_sleep_delay = 150000;
-#endif /* HAVE_ATOMIC_BUILTINS */
+ulong	srv_adaptive_max_sleep_delay = 150000;
 
-UNIV_INTERN ulong	srv_thread_sleep_delay	= 10000;
+ulong	srv_thread_sleep_delay	= 10000;
 
 
 /** We are prepared for a situation that we have this many threads waiting for
 a semaphore inside InnoDB. innobase_start_or_create_for_mysql() sets the
 value. */
 
-UNIV_INTERN ulint	srv_max_n_threads	= 0;
+ulint	srv_max_n_threads	= 0;
 
 /** The following controls how many threads we let inside InnoDB concurrently:
 threads waiting for locks are not counted into the number because otherwise
 we could get a deadlock. Value of 0 will disable the concurrency check. */
 
-UNIV_INTERN ulong	srv_thread_concurrency	= 0;
-
-#ifndef HAVE_ATOMIC_BUILTINS
-
-/** This mutex protects srv_conc data structures */
-static os_fast_mutex_t	srv_conc_mutex;
-
-/** Concurrency list node */
-typedef UT_LIST_NODE_T(struct srv_conc_slot_t)	srv_conc_node_t;
-
-/** Slot for a thread waiting in the concurrency control queue. */
-struct srv_conc_slot_t{
-	os_event_t	event;		/*!< event to wait */
-	ibool		reserved;	/*!< TRUE if slot
-					reserved */
-	ibool		wait_ended;	/*!< TRUE when another thread has
-					already set the event and the thread
-					in this slot is free to proceed; but
-					reserved may still be TRUE at that
-					point */
-	srv_conc_node_t	srv_conc_queue;	/*!< queue node */
-#ifdef WITH_WSREP
-	void				*thd;		/*!< to see priority */
-#endif
-};
-
-/** Queue of threads waiting to get in */
-typedef UT_LIST_BASE_NODE_T(srv_conc_slot_t)	srv_conc_queue_t;
-
-static srv_conc_queue_t	srv_conc_queue;
-
-/** Array of wait slots */
-static srv_conc_slot_t*	srv_conc_slots;
-
-#if defined(UNIV_PFS_MUTEX)
-/* Key to register srv_conc_mutex_key with performance schema */
-UNIV_INTERN mysql_pfs_key_t	srv_conc_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
-
-#endif /* !HAVE_ATOMIC_BUILTINS */
+ulong	srv_thread_concurrency	= 0;
 
 /** Variables tracking the active and waiting threads. */
 struct srv_conc_t {
@@ -126,50 +88,6 @@ struct srv_conc_t {
 /* Control variables for tracking concurrency. */
 static srv_conc_t	srv_conc;
 
-/*********************************************************************//**
-Initialise the concurrency management data structures */
-void
-srv_conc_init(void)
-/*===============*/
-{
-#ifndef HAVE_ATOMIC_BUILTINS
-	ulint		i;
-
-	/* Init the server concurrency restriction data structures */
-
-	os_fast_mutex_init(srv_conc_mutex_key, &srv_conc_mutex);
-
-	UT_LIST_INIT(srv_conc_queue);
-
-	srv_conc_slots = static_cast<srv_conc_slot_t*>(
-		mem_zalloc(OS_THREAD_MAX_N * sizeof(*srv_conc_slots)));
-
-	for (i = 0; i < OS_THREAD_MAX_N; i++) {
-		srv_conc_slot_t*	conc_slot = &srv_conc_slots[i];
-
-		conc_slot->event = os_event_create();
-		ut_a(conc_slot->event);
-#ifdef WITH_WSREP
-		conc_slot->thd = NULL;
-#endif /* WITH_WSREP */
-	}
-#endif /* !HAVE_ATOMIC_BUILTINS */
-}
-
-/*********************************************************************//**
-Free the concurrency management data structures */
-void
-srv_conc_free(void)
-/*===============*/
-{
-#ifndef HAVE_ATOMIC_BUILTINS
-	os_fast_mutex_free(&srv_conc_mutex);
-	mem_free(srv_conc_slots);
-	srv_conc_slots = NULL;
-#endif /* !HAVE_ATOMIC_BUILTINS */
-}
-
-#ifdef HAVE_ATOMIC_BUILTINS
 /*********************************************************************//**
 Note that a user thread is entering InnoDB. */
 static
@@ -210,16 +128,43 @@ srv_conc_enter_innodb_with_atomics(
 	for (;;) {
 		ulint	sleep_in_us;
 #ifdef WITH_WSREP
-		if (wsrep_on(trx->mysql_thd) && 
+		if (wsrep_on(trx->mysql_thd) &&
 		    wsrep_trx_is_aborting(trx->mysql_thd)) {
-			if (wsrep_debug)
-		  		fprintf(stderr,	
-					"srv_conc_enter due to MUST_ABORT");
+			if (wsrep_debug) {
+				ib::info() <<
+					"srv_conc_enter due to MUST_ABORT";
+			}
 			srv_conc_force_enter_innodb(trx);
 			return;
 		}
 #endif /* WITH_WSREP */
 
+		if (srv_thread_concurrency == 0) {
+
+			if (notified_mysql) {
+
+				(void) os_atomic_decrement_lint(
+					&srv_conc.n_waiting, 1);
+
+				thd_wait_end(trx->mysql_thd);
+			}
+
+			return;
+		}
+
+		if (srv_thread_concurrency == 0) {
+
+			if (notified_mysql) {
+
+				(void) os_atomic_decrement_lint(
+					&srv_conc.n_waiting, 1);
+
+				thd_wait_end(trx->mysql_thd);
+			}
+
+			return;
+		}
+
 		if (srv_conc.n_active < (lint) srv_thread_concurrency) {
 			ulint	n_active;
 
@@ -277,6 +222,7 @@ srv_conc_enter_innodb_with_atomics(
 			notified_mysql = TRUE;
 		}
 
+		DEBUG_SYNC_C("user_thread_waiting");
 		trx->op_info = "sleeping before entering InnoDB";
 
 		sleep_in_us = srv_thread_sleep_delay;
@@ -315,279 +261,44 @@ srv_conc_exit_innodb_with_atomics(
 
 	(void) os_atomic_decrement_lint(&srv_conc.n_active, 1);
 }
-#else
-/*********************************************************************//**
-Note that a user thread is leaving InnoDB code. */
-static
-void
-srv_conc_exit_innodb_without_atomics(
-/*=================================*/
-	trx_t*	trx)		/*!< in/out: transaction */
-{
-	srv_conc_slot_t*	slot;
-
-	os_fast_mutex_lock(&srv_conc_mutex);
-
-	ut_ad(srv_conc.n_active > 0);
-	srv_conc.n_active--;
-	trx->declared_to_be_inside_innodb = FALSE;
-	trx->n_tickets_to_enter_innodb = 0;
-
-	slot = NULL;
-
-	if (srv_conc.n_active < (lint) srv_thread_concurrency) {
-#ifdef WITH_WSREP
-		srv_conc_slot_t*  wsrep_slot;
-#endif
-		/* Look for a slot where a thread is waiting and no other
-		thread has yet released the thread */
-
-		for (slot = UT_LIST_GET_FIRST(srv_conc_queue);
-		     slot != NULL && slot->wait_ended == TRUE;
-		     slot = UT_LIST_GET_NEXT(srv_conc_queue, slot)) {
-
-			/* No op */
-		}
-
-#ifdef WITH_WSREP
-		/* look for aborting trx, they must be released asap */
-		wsrep_slot= slot;
-		while (wsrep_slot && (wsrep_slot->wait_ended == TRUE || 
-		    !wsrep_trx_is_aborting(wsrep_slot->thd))) {
-			wsrep_slot = UT_LIST_GET_NEXT(srv_conc_queue, wsrep_slot);
-		}
-		if (wsrep_slot) {
-			slot = wsrep_slot;
-			if (wsrep_debug)
-			    fprintf(stderr, "WSREP: releasing aborting thd\n");
-		}
-#endif
-		if (slot != NULL) {
-			slot->wait_ended = TRUE;
-
-			/* We increment the count on behalf of the released
-			thread */
-
-			srv_conc.n_active++;
-		}
-	}
-
-	os_fast_mutex_unlock(&srv_conc_mutex);
-
-	if (slot != NULL) {
-		os_event_set(slot->event);
-	}
-}
-
-/*********************************************************************//**
-Handle the scheduling of a user thread that wants to enter InnoDB. */
-static
-void
-srv_conc_enter_innodb_without_atomics(
-/*==================================*/
-	trx_t*	trx)			/*!< in/out: transaction that wants
-					to enter InnoDB */
-{
-	ulint			i;
-	srv_conc_slot_t*	slot = NULL;
-	ibool			has_slept = FALSE;
-
-	os_fast_mutex_lock(&srv_conc_mutex);
-retry:
-	if (UNIV_UNLIKELY(trx->declared_to_be_inside_innodb)) {
-		os_fast_mutex_unlock(&srv_conc_mutex);
-		ut_print_timestamp(stderr);
-		fputs("  InnoDB: Error: trying to declare trx"
-		      " to enter InnoDB, but\n"
-		      "InnoDB: it already is declared.\n", stderr);
-		trx_print(stderr, trx, 0);
-		putc('\n', stderr);
-		return;
-	}
-
-	ut_ad(srv_conc.n_active >= 0);
-
-	if (srv_conc.n_active < (lint) srv_thread_concurrency) {
-
-		srv_conc.n_active++;
-		trx->declared_to_be_inside_innodb = TRUE;
-		trx->n_tickets_to_enter_innodb = srv_n_free_tickets_to_enter;
-
-		os_fast_mutex_unlock(&srv_conc_mutex);
-
-		return;
-	}
-#ifdef WITH_WSREP
-	if (wsrep_on(trx->mysql_thd) && 
-	    wsrep_thd_is_brute_force(trx->mysql_thd)) {
-		srv_conc_force_enter_innodb(trx);
-		return;
-	}
-#endif
-
-	/* If the transaction is not holding resources, let it sleep
-	for srv_thread_sleep_delay microseconds, and try again then */
-
-	if (!has_slept && !trx->has_search_latch
-	    && NULL == UT_LIST_GET_FIRST(trx->lock.trx_locks)) {
-
-		has_slept = TRUE; /* We let it sleep only once to avoid
-				starvation */
-
-		srv_conc.n_waiting++;
-
-		os_fast_mutex_unlock(&srv_conc_mutex);
-
-		trx->op_info = "sleeping before joining InnoDB queue";
-
-		/* Peter Zaitsev suggested that we take the sleep away
-		altogether. But the sleep may be good in pathological
-		situations of lots of thread switches. Simply put some
-		threads aside for a while to reduce the number of thread
-		switches. */
-		if (srv_thread_sleep_delay > 0) {
-			os_thread_sleep(srv_thread_sleep_delay);
-		}
-
-		trx->op_info = "";
-
-		os_fast_mutex_lock(&srv_conc_mutex);
-
-		srv_conc.n_waiting--;
-
-		goto retry;
-	}
-
-	/* Too many threads inside: put the current thread to a queue */
-
-	for (i = 0; i < OS_THREAD_MAX_N; i++) {
-		slot = srv_conc_slots + i;
-
-		if (!slot->reserved) {
-
-			break;
-		}
-	}
-
-	if (i == OS_THREAD_MAX_N) {
-		/* Could not find a free wait slot, we must let the
-		thread enter */
-
-		srv_conc.n_active++;
-		trx->declared_to_be_inside_innodb = TRUE;
-		trx->n_tickets_to_enter_innodb = 0;
-
-		os_fast_mutex_unlock(&srv_conc_mutex);
-
-		return;
-	}
-
-	/* Release possible search system latch this thread has */
-	if (trx->has_search_latch) {
-		trx_search_latch_release_if_reserved(trx);
-	}
-
-	/* Add to the queue */
-	slot->reserved = TRUE;
-	slot->wait_ended = FALSE;
-#ifdef WITH_WSREP
-	slot->thd = trx->mysql_thd;
-#endif
-
-	UT_LIST_ADD_LAST(srv_conc_queue, srv_conc_queue, slot);
-
-	os_event_reset(slot->event);
-
-	srv_conc.n_waiting++;
-
-#ifdef WITH_WSREP
-	if (wsrep_on(trx->mysql_thd) && 
-	    wsrep_trx_is_aborting(trx->mysql_thd)) {
-		os_fast_mutex_unlock(&srv_conc_mutex);
-		if (wsrep_debug)
-			fprintf(stderr, "srv_conc_enter due to MUST_ABORT");
-		trx->declared_to_be_inside_innodb = TRUE;
-		trx->n_tickets_to_enter_innodb = srv_n_free_tickets_to_enter;
-		return;
-	}
-	trx->wsrep_event = slot->event;
-#endif /* WITH_WSREP */
-	os_fast_mutex_unlock(&srv_conc_mutex);
-
-	/* Go to wait for the event; when a thread leaves InnoDB it will
-	release this thread */
-
-	ut_ad(!trx->has_search_latch);
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch));
-#endif /* UNIV_SYNC_DEBUG */
-	trx->op_info = "waiting in InnoDB queue";
-
-	thd_wait_begin(trx->mysql_thd, THD_WAIT_USER_LOCK);
-
-	os_event_wait(slot->event);
-	thd_wait_end(trx->mysql_thd);
-#ifdef WITH_WSREP
-	trx->wsrep_event = NULL;
-#endif /* WITH_WSREP */
-
-	trx->op_info = "";
-
-	os_fast_mutex_lock(&srv_conc_mutex);
-
-	srv_conc.n_waiting--;
-
-	/* NOTE that the thread which released this thread already
-	incremented the thread counter on behalf of this thread */
-
-	slot->reserved = FALSE;
-#ifdef WITH_WSREP
-	slot->thd = NULL;
-#endif
-
-	UT_LIST_REMOVE(srv_conc_queue, srv_conc_queue, slot);
-
-	trx->declared_to_be_inside_innodb = TRUE;
-	trx->n_tickets_to_enter_innodb = srv_n_free_tickets_to_enter;
-
-	os_fast_mutex_unlock(&srv_conc_mutex);
-}
-#endif /* HAVE_ATOMIC_BUILTINS */
 
 /*********************************************************************//**
 Puts an OS thread to wait if there are too many concurrent threads
-(>= srv_thread_concurrency) inside InnoDB. The threads wait in a FIFO queue. */
-UNIV_INTERN
+(>= srv_thread_concurrency) inside InnoDB. The threads wait in a FIFO queue.
+@param[in,out]	prebuilt	row prebuilt handler */
 void
 srv_conc_enter_innodb(
-/*==================*/
-	trx_t*	trx)	/*!< in: transaction object associated with the
-			thread */
+	row_prebuilt_t*	prebuilt)
 {
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch));
-#endif /* UNIV_SYNC_DEBUG */
+	trx_t*	trx	= prebuilt->trx;
+
+#ifdef UNIV_DEBUG
+	{
+		btrsea_sync_check	check(trx->has_search_latch);
+
+		ut_ad(!sync_check_iterate(check));
+	}
+#endif /* UNIV_DEBUG */
 
-#ifdef HAVE_ATOMIC_BUILTINS
 	srv_conc_enter_innodb_with_atomics(trx);
-#else
-	srv_conc_enter_innodb_without_atomics(trx);
-#endif /* HAVE_ATOMIC_BUILTINS */
 }
 
 /*********************************************************************//**
 This lets a thread enter InnoDB regardless of the number of threads inside
 InnoDB. This must be called when a thread ends a lock wait. */
-UNIV_INTERN
 void
 srv_conc_force_enter_innodb(
 /*========================*/
 	trx_t*	trx)	/*!< in: transaction object associated with the
 			thread */
 {
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch));
-#endif /* UNIV_SYNC_DEBUG */
+#ifdef UNIV_DEBUG
+	{
+		btrsea_sync_check	check(trx->has_search_latch);
+
+		ut_ad(!sync_check_iterate(check));
+	}
+#endif /* UNIV_DEBUG */
 
 	if (!srv_thread_concurrency) {
 
@@ -596,13 +307,7 @@ srv_conc_force_enter_innodb(
 
 	ut_ad(srv_conc.n_active >= 0);
 
-#ifdef HAVE_ATOMIC_BUILTINS
 	(void) os_atomic_increment_lint(&srv_conc.n_active, 1);
-#else
-	os_fast_mutex_lock(&srv_conc_mutex);
-	++srv_conc.n_active;
-	os_fast_mutex_unlock(&srv_conc_mutex);
-#endif /* HAVE_ATOMIC_BUILTINS */
 
 	trx->n_tickets_to_enter_innodb = 1;
 	trx->declared_to_be_inside_innodb = TRUE;
@@ -611,7 +316,6 @@ srv_conc_force_enter_innodb(
 /*********************************************************************//**
 This must be called when a thread exits InnoDB in a lock wait or at the
 end of an SQL statement. */
-UNIV_INTERN
 void
 srv_conc_force_exit_innodb(
 /*=======================*/
@@ -625,20 +329,19 @@ srv_conc_force_exit_innodb(
 		return;
 	}
 
-#ifdef HAVE_ATOMIC_BUILTINS
 	srv_conc_exit_innodb_with_atomics(trx);
-#else
-	srv_conc_exit_innodb_without_atomics(trx);
-#endif /* HAVE_ATOMIC_BUILTINS */
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch));
-#endif /* UNIV_SYNC_DEBUG */
+#ifdef UNIV_DEBUG
+	{
+		btrsea_sync_check	check(trx->has_search_latch);
+
+		ut_ad(!sync_check_iterate(check));
+	}
+#endif /* UNIV_DEBUG */
 }
 
 /*********************************************************************//**
 Get the count of threads waiting inside InnoDB. */
-UNIV_INTERN
 ulint
 srv_conc_get_waiting_threads(void)
 /*==============================*/
@@ -648,7 +351,6 @@ srv_conc_get_waiting_threads(void)
 
 /*********************************************************************//**
 Get the count of threads active inside InnoDB. */
-UNIV_INTERN
 ulint
 srv_conc_get_active_threads(void)
 /*==============================*/
@@ -660,25 +362,28 @@ srv_conc_get_active_threads(void)
 UNIV_INTERN
 void
 wsrep_srv_conc_cancel_wait(
-/*==================*/
+/*=======================*/
 	trx_t*	trx)	/*!< in: transaction object associated with the
 			thread */
 {
 #ifdef HAVE_ATOMIC_BUILTINS
-	/* aborting transactions will enter innodb by force in 
+	/* aborting transactions will enter innodb by force in
 	   srv_conc_enter_innodb_with_atomics(). No need to cancel here,
 	   thr will wake up after os_sleep and let to enter innodb
 	*/
-	if (wsrep_debug)
-		fprintf(stderr, "WSREP: conc slot cancel, no atomics\n");
+	if (wsrep_debug) {
+		ib::info() << "WSREP: conc slot cancel, no atomics";
+	}
 #else
-	os_fast_mutex_lock(&srv_conc_mutex);
+	// JAN: TODO: MySQL 5.7
+	//os_fast_mutex_lock(&srv_conc_mutex);
 	if (trx->wsrep_event) {
-		if (wsrep_debug) 
-			fprintf(stderr, "WSREP: conc slot cancel\n");
+		if (wsrep_debug) {
+			ib::info() << "WSREP: conc slot cancel";
+		}
 		os_event_set(trx->wsrep_event);
 	}
-	os_fast_mutex_unlock(&srv_conc_mutex);
+	//os_fast_mutex_unlock(&srv_conc_mutex);
 #endif
 }
 #endif /* WITH_WSREP */
diff --git a/storage/innobase/srv/srv0mon.cc b/storage/innobase/srv/srv0mon.cc
index 3375ea40658..6d213a2c761 100644
--- a/storage/innobase/srv/srv0mon.cc
+++ b/storage/innobase/srv/srv0mon.cc
@@ -26,15 +26,16 @@ Created 12/9/2009 Jimmy Yang
 *******************************************************/
 
 #ifndef UNIV_HOTBACKUP
-#include "os0file.h"
+#include "buf0buf.h"
+#include "dict0mem.h"
+#include "ibuf0ibuf.h"
+#include "lock0lock.h"
 #include "mach0data.h"
+#include "os0file.h"
 #include "srv0mon.h"
 #include "srv0srv.h"
-#include "buf0buf.h"
-#include "trx0sys.h"
 #include "trx0rseg.h"
-#include "lock0lock.h"
-#include "ibuf0ibuf.h"
+#include "trx0sys.h"
 #ifdef UNIV_NONINL
 #include "srv0mon.ic"
 #endif
@@ -89,13 +90,6 @@ static monitor_info_t	innodb_counter_info[] =
 	 MONITOR_NONE,
 	 MONITOR_DEFAULT_START, MONITOR_TABLE_REFERENCE},
 
-	{"metadata_mem_pool_size", "metadata",
-	 "Size of a memory pool InnoDB uses to store data dictionary"
-	 " and internal data structures in bytes",
-	 static_cast<monitor_type_t>(
-	 MONITOR_EXISTING | MONITOR_DEFAULT_ON | MONITOR_DISPLAY_CURRENT),
-	 MONITOR_DEFAULT_START, MONITOR_OVLD_META_MEM_POOL},
-
 	/* ========== Counters for Lock Module ========== */
 	{"module_lock", "lock", "Lock Module",
 	 MONITOR_MODULE,
@@ -268,10 +262,10 @@ static monitor_info_t	innodb_counter_info[] =
 	 MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_PAGES_DIRTY},
 
 	{"buffer_pool_bytes_dirty", "buffer",
-	 "Buffer bytes currently dirty (innodb_buffer_pool_bytes_dirty)",
-	 static_cast<monitor_type_t>(
-	 MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON),
-	 MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_BYTES_DIRTY},
+         "Buffer bytes currently dirty (innodb_buffer_pool_bytes_dirty)",
+         static_cast<monitor_type_t>(
+         MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON),
+         MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_BYTES_DIRTY},
 
 	{"buffer_pool_pages_free", "buffer",
 	 "Buffer pages currently free (innodb_buffer_pool_pages_free)",
@@ -350,11 +344,6 @@ static monitor_info_t	innodb_counter_info[] =
 	 MONITOR_SET_MEMBER, MONITOR_FLUSH_BATCH_SCANNED,
 	 MONITOR_FLUSH_BATCH_SCANNED_PER_CALL},
 
-	{"buffer_flush_batch_rescan", "buffer",
-	 "Number of times rescan of flush list forced",
-	 MONITOR_NONE,
-	 MONITOR_DEFAULT_START, MONITOR_FLUSH_HP_RESCAN},
-
 	/* Cumulative counter for pages flushed in flush batches */
 	{"buffer_flush_batch_total_pages", "buffer",
 	 "Total pages flushed as part of flush batch",
@@ -392,6 +381,71 @@ static monitor_info_t	innodb_counter_info[] =
 	 MONITOR_NONE,
 	 MONITOR_DEFAULT_START, MONITOR_FLUSH_N_TO_FLUSH_REQUESTED},
 
+	{"buffer_flush_n_to_flush_by_age", "buffer",
+	 "Number of pages target by LSN Age for flushing.",
+	 MONITOR_NONE,
+	 MONITOR_DEFAULT_START, MONITOR_FLUSH_N_TO_FLUSH_BY_AGE},
+
+	{"buffer_flush_adaptive_avg_time_slot", "buffer",
+	 "Avg time (ms) spent for adaptive flushing recently per slot.",
+	 MONITOR_NONE,
+	 MONITOR_DEFAULT_START, MONITOR_FLUSH_ADAPTIVE_AVG_TIME_SLOT},
+
+	{"buffer_LRU_batch_flush_avg_time_slot", "buffer",
+	 "Avg time (ms) spent for LRU batch flushing recently per slot.",
+	 MONITOR_NONE,
+	 MONITOR_DEFAULT_START, MONITOR_LRU_BATCH_FLUSH_AVG_TIME_SLOT},
+
+	{"buffer_flush_adaptive_avg_time_thread", "buffer",
+	 "Avg time (ms) spent for adaptive flushing recently per thread.",
+	 MONITOR_NONE,
+	 MONITOR_DEFAULT_START, MONITOR_FLUSH_ADAPTIVE_AVG_TIME_THREAD},
+
+	{"buffer_LRU_batch_flush_avg_time_thread", "buffer",
+	 "Avg time (ms) spent for LRU batch flushing recently per thread.",
+	 MONITOR_NONE,
+	 MONITOR_DEFAULT_START, MONITOR_LRU_BATCH_FLUSH_AVG_TIME_THREAD},
+
+	{"buffer_flush_adaptive_avg_time_est", "buffer",
+	 "Estimated time (ms) spent for adaptive flushing recently.",
+	 MONITOR_NONE,
+	 MONITOR_DEFAULT_START, MONITOR_FLUSH_ADAPTIVE_AVG_TIME_EST},
+
+	{"buffer_LRU_batch_flush_avg_time_est", "buffer",
+	 "Estimated time (ms) spent for LRU batch flushing recently.",
+	 MONITOR_NONE,
+	 MONITOR_DEFAULT_START, MONITOR_LRU_BATCH_FLUSH_AVG_TIME_EST},
+
+	{"buffer_flush_avg_time", "buffer",
+	 "Avg time (ms) spent for flushing recently.",
+	 MONITOR_NONE,
+	 MONITOR_DEFAULT_START, MONITOR_FLUSH_AVG_TIME},
+
+	{"buffer_flush_adaptive_avg_pass", "buffer",
+	 "Numner of adaptive flushes passed during the recent Avg period.",
+	 MONITOR_NONE,
+	 MONITOR_DEFAULT_START, MONITOR_FLUSH_ADAPTIVE_AVG_PASS},
+
+	{"buffer_LRU_batch_flush_avg_pass", "buffer",
+	 "Number of LRU batch flushes passed during the recent Avg period.",
+	 MONITOR_NONE,
+	 MONITOR_DEFAULT_START, MONITOR_LRU_BATCH_FLUSH_AVG_PASS},
+
+	{"buffer_flush_avg_pass", "buffer",
+	 "Number of flushes passed during the recent Avg period.",
+	 MONITOR_NONE,
+	 MONITOR_DEFAULT_START, MONITOR_FLUSH_AVG_PASS},
+
+	{"buffer_LRU_get_free_loops", "buffer",
+	 "Total loops in LRU get free.",
+	 MONITOR_NONE,
+	 MONITOR_DEFAULT_START, MONITOR_LRU_GET_FREE_LOOPS},
+
+	{"buffer_LRU_get_free_waits", "buffer",
+	 "Total sleep waits in LRU get free.",
+	 MONITOR_NONE,
+	 MONITOR_DEFAULT_START, MONITOR_LRU_GET_FREE_WAITS},
+
 	{"buffer_flush_avg_page_rate", "buffer",
 	 "Average number of pages at which flushing is happening",
 	 MONITOR_NONE,
@@ -720,16 +774,16 @@ static monitor_info_t	innodb_counter_info[] =
 	 MONITOR_MODULE,
 	 MONITOR_DEFAULT_START, MONITOR_MODULE_TRX},
 
-	{"trx_rw_commits", "transaction", "Number of read-write transactions "
-	  "committed",
+	{"trx_rw_commits", "transaction",
+	 "Number of read-write transactions  committed",
 	 MONITOR_NONE, MONITOR_DEFAULT_START, MONITOR_TRX_RW_COMMIT},
 
-	{"trx_ro_commits", "transaction", "Number of read-only transactions "
-	  "committed",
+	{"trx_ro_commits", "transaction",
+	 "Number of read-only transactions committed",
 	 MONITOR_NONE, MONITOR_DEFAULT_START, MONITOR_TRX_RO_COMMIT},
 
-	{"trx_nl_ro_commits", "transaction", "Number of non-locking "
-	 "auto-commit read-only transactions committed",
+	{"trx_nl_ro_commits", "transaction",
+	 "Number of non-locking auto-commit read-only transactions committed",
 	 MONITOR_NONE, MONITOR_DEFAULT_START, MONITOR_TRX_NL_RO_COMMIT},
 
 	{"trx_commits_insert_update", "transaction",
@@ -789,7 +843,7 @@ static monitor_info_t	innodb_counter_info[] =
 	 MONITOR_DEFAULT_START, MONITOR_N_DEL_ROW_PURGE},
 
 	{"purge_upd_exist_or_extern_records", "purge",
-	 "Number of purges on updates of existing records and "
+	 "Number of purges on updates of existing records and"
 	 " updates on delete marked record with externally stored field",
 	 MONITOR_NONE,
 	 MONITOR_DEFAULT_START, MONITOR_N_UPD_EXIST_EXTERN},
@@ -866,9 +920,9 @@ static monitor_info_t	innodb_counter_info[] =
 	 MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT),
 	 MONITOR_DEFAULT_START, MONITOR_OVLD_MAX_AGE_SYNC},
 
-	{"log_pending_log_writes", "recovery", "Pending log writes",
+	{"log_pending_log_flushes", "recovery", "Pending log flushes",
 	 MONITOR_NONE,
-	 MONITOR_DEFAULT_START, MONITOR_PENDING_LOG_WRITE},
+	 MONITOR_DEFAULT_START, MONITOR_PENDING_LOG_FLUSH},
 
 	{"log_pending_checkpoint_writes", "recovery", "Pending checkpoints",
 	 MONITOR_NONE,
@@ -896,6 +950,12 @@ static monitor_info_t	innodb_counter_info[] =
 	 MONITOR_EXISTING | MONITOR_DEFAULT_ON),
 	 MONITOR_DEFAULT_START, MONITOR_OVLD_LOG_WRITES},
 
+	{"log_padded", "recovery",
+	 "Bytes of log padded for log write ahead",
+	 static_cast<monitor_type_t>(
+	 MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+	 MONITOR_DEFAULT_START, MONITOR_OVLD_LOG_PADDED},
+
 	/* ========== Counters for Page Compression ========== */
 	{"module_compress", "compression", "Page Compression Info",
 	 MONITOR_MODULE,
@@ -1243,6 +1303,12 @@ static monitor_info_t	innodb_counter_info[] =
 	 MONITOR_EXISTING | MONITOR_DEFAULT_ON),
 	 MONITOR_DEFAULT_START, MONITOR_OVLD_RWLOCK_X_SPIN_WAITS},
 
+	{"innodb_rwlock_sx_spin_waits", "server",
+	 "Number of rwlock spin waits due to sx latch request",
+	 static_cast<monitor_type_t>(
+	 MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+	 MONITOR_DEFAULT_START, MONITOR_OVLD_RWLOCK_SX_SPIN_WAITS},
+
 	{"innodb_rwlock_s_spin_rounds", "server",
 	 "Number of rwlock spin loop rounds due to shared latch request",
 	 static_cast<monitor_type_t>(
@@ -1255,6 +1321,12 @@ static monitor_info_t	innodb_counter_info[] =
 	 MONITOR_EXISTING | MONITOR_DEFAULT_ON),
 	 MONITOR_DEFAULT_START, MONITOR_OVLD_RWLOCK_X_SPIN_ROUNDS},
 
+	{"innodb_rwlock_sx_spin_rounds", "server",
+	 "Number of rwlock spin loop rounds due to sx latch request",
+	 static_cast<monitor_type_t>(
+	 MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+	 MONITOR_DEFAULT_START, MONITOR_OVLD_RWLOCK_SX_SPIN_ROUNDS},
+
 	{"innodb_rwlock_s_os_waits", "server",
 	 "Number of OS waits due to shared latch request",
 	 static_cast<monitor_type_t>(
@@ -1267,14 +1339,19 @@ static monitor_info_t	innodb_counter_info[] =
 	 MONITOR_EXISTING | MONITOR_DEFAULT_ON),
 	 MONITOR_DEFAULT_START, MONITOR_OVLD_RWLOCK_X_OS_WAITS},
 
+	{"innodb_rwlock_sx_os_waits", "server",
+	 "Number of OS waits due to sx latch request",
+	 static_cast<monitor_type_t>(
+	 MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+	 MONITOR_DEFAULT_START, MONITOR_OVLD_RWLOCK_SX_OS_WAITS},
+
 	/* ========== Counters for DML operations ========== */
 	{"module_dml", "dml", "Statistics for DMLs",
 	 MONITOR_MODULE,
 	 MONITOR_DEFAULT_START, MONITOR_MODULE_DML_STATS},
 
 	{"dml_reads", "dml", "Number of rows read",
-	 static_cast<monitor_type_t>(
-	 MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+	 static_cast<monitor_type_t>(MONITOR_EXISTING),
 	 MONITOR_DEFAULT_START, MONITOR_OLVD_ROW_READ},
 
 	{"dml_inserts", "dml", "Number of rows inserted",
@@ -1337,6 +1414,16 @@ static monitor_info_t	innodb_counter_info[] =
 	 MONITOR_NONE,
 	 MONITOR_DEFAULT_START, MONITOR_PENDING_ALTER_TABLE},
 
+	{"ddl_sort_file_alter_table", "ddl",
+	 "Number of sort files created during alter table",
+	 MONITOR_NONE,
+	 MONITOR_DEFAULT_START, MONITOR_ALTER_TABLE_SORT_FILES},
+
+	{"ddl_log_file_alter_table", "ddl",
+	 "Number of log files created during alter table",
+	 MONITOR_NONE,
+	 MONITOR_DEFAULT_START, MONITOR_ALTER_TABLE_LOG_FILES},
+
 	/* ===== Counters for ICP (Index Condition Pushdown) Module ===== */
 	{"module_icp", "icp", "Index Condition Pushdown",
 	 MONITOR_MODULE,
@@ -1359,6 +1446,16 @@ static monitor_info_t	innodb_counter_info[] =
 	 MONITOR_NONE,
 	 MONITOR_DEFAULT_START, MONITOR_ICP_MATCH},
 
+	/* ========== Mutex monitoring on/off ========== */
+	{"latch_status", "Latch counters",
+	 "Collect latch counters to display via SHOW ENGING INNODB MUTEX",
+	 MONITOR_MODULE,
+	 MONITOR_DEFAULT_START, MONITOR_MODULE_LATCHES},
+
+	{"latch", "sync", "Latch monitoring control",
+	 MONITOR_HIDDEN,
+	 MONITOR_DEFAULT_START, MONITOR_LATCHES},
+
 	/* ========== To turn on/off reset all counters ========== */
 	{"all", "All Counters", "Turn on/off and reset all counters",
 	 MONITOR_MODULE,
@@ -1366,47 +1463,18 @@ static monitor_info_t	innodb_counter_info[] =
 };
 
 /* The "innodb_counter_value" array stores actual counter values */
-UNIV_INTERN monitor_value_t	innodb_counter_value[NUM_MONITOR];
+monitor_value_t	innodb_counter_value[NUM_MONITOR];
 
 /* monitor_set_tbl is used to record and determine whether a monitor
 has been turned on/off. */
-UNIV_INTERN ulint		monitor_set_tbl[(NUM_MONITOR + NUM_BITS_ULINT
+ulint		monitor_set_tbl[(NUM_MONITOR + NUM_BITS_ULINT
 						- 1) / NUM_BITS_ULINT];
 
-#ifndef HAVE_ATOMIC_BUILTINS_64
-/** Mutex protecting atomic operations on platforms that lack
-built-in operations for atomic memory access */
-ib_mutex_t	monitor_mutex;
-
-/** Key to register monitor_mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t	monitor_mutex_key;
-
-/****************************************************************//**
-Initialize the monitor subsystem. */
-UNIV_INTERN
-void
-srv_mon_create(void)
-/*================*/
-{
-	mutex_create(monitor_mutex_key, &monitor_mutex, SYNC_ANY_LATCH);
-}
-/****************************************************************//**
-Close the monitor subsystem. */
-UNIV_INTERN
-void
-srv_mon_free(void)
-/*==============*/
-{
-	mutex_free(&monitor_mutex);
-}
-#endif /* !HAVE_ATOMIC_BUILTINS_64 */
-
 /****************************************************************//**
 Get a monitor's "monitor_info" by its monitor id (index into the
 innodb_counter_info array.
-@return	Point to corresponding monitor_info_t, or NULL if no such
+@return Point to corresponding monitor_info_t, or NULL if no such
 monitor */
-UNIV_INTERN
 monitor_info_t*
 srv_mon_get_info(
 /*=============*/
@@ -1423,9 +1491,8 @@ srv_mon_get_info(
 /****************************************************************//**
 Get monitor's name by its monitor id (indexing into the
 innodb_counter_info array.
-@return	corresponding monitor name, or NULL if no such
+@return corresponding monitor name, or NULL if no such
 monitor */
-UNIV_INTERN
 const char*
 srv_mon_get_name(
 /*=============*/
@@ -1443,7 +1510,6 @@ srv_mon_get_name(
 Turn on/off, reset monitor counters in a module. If module_id
 is MONITOR_ALL_COUNTER then turn on all monitor counters.
 turned on because it has already been turned on. */
-UNIV_INTERN
 void
 srv_mon_set_module_control(
 /*=======================*/
@@ -1507,8 +1573,9 @@ srv_mon_set_module_control(
 		should be aware some counters are already on before
 		turn them on again (which could reset counter value) */
 		if (MONITOR_IS_ON(ix) && (set_option == MONITOR_TURN_ON)) {
-			fprintf(stderr, "Monitor '%s' is already enabled.\n",
-				srv_mon_get_name((monitor_id_t) ix));
+			ib::info() << "Monitor '"
+				<< srv_mon_get_name((monitor_id_t) ix)
+				<< "' is already enabled.";
 			continue;
 		}
 
@@ -1583,7 +1650,6 @@ corresponding monitors are turned on/off/reset, and do appropriate
 mathematics to deduct the actual value. Please also refer to
 srv_export_innodb_status() for related global counters used by
 the existing status variables.*/
-UNIV_INTERN
 void
 srv_mon_process_existing_counter(
 /*=============================*/
@@ -1608,10 +1674,6 @@ srv_mon_process_existing_counter(
 
 	/* Get the value from corresponding global variable */
 	switch (monitor_id) {
-	case MONITOR_OVLD_META_MEM_POOL:
-		value = srv_mem_pool_size;
-		break;
-
 	/* export_vars.innodb_buffer_pool_reads. Num Reads from
 	disk (page not in buffer) */
 	case MONITOR_OVLD_BUF_POOL_READS:
@@ -1790,6 +1852,10 @@ srv_mon_process_existing_counter(
 		value = srv_stats.log_writes;
 		break;
 
+	case MONITOR_OVLD_LOG_PADDED:
+		value = srv_stats.log_padded;
+		break;
+
 	/* innodb_dblwr_writes */
 	case MONITOR_OVLD_SRV_DBLWR_WRITES:
 		value = srv_stats.dblwr_writes;
@@ -1813,6 +1879,10 @@ srv_mon_process_existing_counter(
 		value = rw_lock_stats.rw_x_spin_wait_count;
 		break;
 
+	case MONITOR_OVLD_RWLOCK_SX_SPIN_WAITS:
+		value = rw_lock_stats.rw_sx_spin_wait_count;
+		break;
+
 	case MONITOR_OVLD_RWLOCK_S_SPIN_ROUNDS:
 		value = rw_lock_stats.rw_s_spin_round_count;
 		break;
@@ -1821,6 +1891,10 @@ srv_mon_process_existing_counter(
 		value = rw_lock_stats.rw_x_spin_round_count;
 		break;
 
+	case MONITOR_OVLD_RWLOCK_SX_SPIN_ROUNDS:
+		value = rw_lock_stats.rw_sx_spin_round_count;
+		break;
+
 	case MONITOR_OVLD_RWLOCK_S_OS_WAITS:
 		value = rw_lock_stats.rw_s_os_wait_count;
 		break;
@@ -1829,6 +1903,10 @@ srv_mon_process_existing_counter(
 		value = rw_lock_stats.rw_x_os_wait_count;
 		break;
 
+	case MONITOR_OVLD_RWLOCK_SX_OS_WAITS:
+		value = rw_lock_stats.rw_sx_os_wait_count;
+		break;
+
 	case MONITOR_OVLD_BUFFER_POOL_SIZE:
 		value = srv_buf_pool_size;
 		break;
@@ -2098,7 +2176,6 @@ srv_mon_process_existing_counter(
 /*************************************************************//**
 Reset a monitor, create a new base line with the current monitor
 value. This baseline is recorded by MONITOR_VALUE_RESET(monitor) */
-UNIV_INTERN
 void
 srv_mon_reset(
 /*==========*/
@@ -2146,7 +2223,6 @@ srv_mon_reset(
 
 /*************************************************************//**
 Turn on monitor counters that are marked as default ON. */
-UNIV_INTERN
 void
 srv_mon_default_on(void)
 /*====================*/
diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc
index e17b27b44fc..49de954b290 100644
--- a/storage/innobase/srv/srv0srv.cc
+++ b/storage/innobase/srv/srv0srv.cc
@@ -3,7 +3,7 @@
 Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2008, 2009 Google Inc.
 Copyright (c) 2009, Percona Inc.
-Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved.
+Copyright (c) 2013, 2016, MariaDB Corporation. All Rights Reserved.
 
 Portions of this file contain modifications contributed and copyrighted by
 Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -39,45 +39,48 @@ The database server main program
 Created 10/8/1995 Heikki Tuuri
 *******************************************************/
 
-/* Dummy comment */
-#include "srv0srv.h"
+#include "my_global.h"
+// JAN: TODO: MySQL 5.7 missing header
+//#include "my_thread.h"
+//
+// #include "mysql/psi/mysql_stage.h"
+// #include "mysql/psi/psi.h"
+// JAN: TODO: MySQL 5.7 missing header
+//#include "sql_thd_internal_api.h"
 
-#include "ut0mem.h"
-#include "ut0ut.h"
-#include "os0proc.h"
-#include "mem0mem.h"
-#include "mem0pool.h"
-#include "sync0sync.h"
-#include "que0que.h"
-#include "log0recv.h"
-#include "pars0pars.h"
-#include "usr0sess.h"
-#include "lock0lock.h"
-#include "trx0purge.h"
-#include "ibuf0ibuf.h"
+#include "ha_prototypes.h"
+
+#include "btr0sea.h"
 #include "buf0flu.h"
 #include "buf0lru.h"
-#include "btr0sea.h"
-#include "dict0load.h"
 #include "dict0boot.h"
-#include "dict0stats_bg.h" /* dict_stats_event */
-#include "srv0start.h"
+#include "dict0load.h"
+#include "dict0stats_bg.h"
+#include "fsp0sysspace.h"
+#include "ibuf0ibuf.h"
+#include "lock0lock.h"
+#include "log0recv.h"
+#include "mem0mem.h"
+#include "os0proc.h"
+#include "pars0pars.h"
+#include "que0que.h"
 #include "row0mysql.h"
+#include "row0trunc.h"
 #include "row0log.h"
-#include "ha_prototypes.h"
-#include "trx0i_s.h"
-#include "os0sync.h" /* for HAVE_ATOMIC_BUILTINS */
 #include "srv0mon.h"
+#include "srv0srv.h"
+#include "srv0start.h"
+#include "sync0sync.h"
+#include "trx0i_s.h"
+#include "trx0purge.h"
+#include "usr0sess.h"
 #include "ut0crc32.h"
 #include "btr0defragment.h"
-
-#include "mysql/plugin.h"
-#include "mysql/service_thd_wait.h"
+#include "ut0mem.h"
 #include "fil0fil.h"
 #include "fil0crypt.h"
 #include "fil0pagecompress.h"
 #include "btr0scrub.h"
-
 #ifdef WITH_WSREP
 extern int wsrep_debug;
 extern int wsrep_trx_is_aborting(void *thd_ptr);
@@ -87,19 +90,21 @@ UNIV_INTERN ulong	srv_fatal_semaphore_wait_threshold =  DEFAULT_SRV_FATAL_SEMAPH
 
 /* How much data manipulation language (DML) statements need to be delayed,
 in microseconds, in order to reduce the lagging of the purge thread. */
-UNIV_INTERN ulint	srv_dml_needed_delay = 0;
+ulint	srv_dml_needed_delay = 0;
 
-UNIV_INTERN ibool	srv_monitor_active = FALSE;
-UNIV_INTERN ibool	srv_error_monitor_active = FALSE;
+ibool	srv_monitor_active = FALSE;
+ibool	srv_error_monitor_active = FALSE;
 
-UNIV_INTERN ibool	srv_buf_dump_thread_active = FALSE;
+ibool	srv_buf_dump_thread_active = FALSE;
 
-UNIV_INTERN ibool	srv_dict_stats_thread_active = FALSE;
+bool	srv_buf_resize_thread_active = false;
 
 UNIV_INTERN ibool	srv_log_scrub_active = FALSE;
 UNIV_INTERN my_bool	srv_scrub_log = FALSE;
 
-UNIV_INTERN const char*	srv_main_thread_op_info = "";
+ibool	srv_dict_stats_thread_active = FALSE;
+
+const char*	srv_main_thread_op_info = "";
 
 /** Prefix used by MySQL to indicate pre-5.1 table name encoding */
 const char		srv_mysql50_table_name_prefix[10] = "#mysql50#";
@@ -109,40 +114,62 @@ const char		srv_mysql50_table_name_prefix[10] = "#mysql50#";
 /* The following three are dir paths which are catenated before file
 names, where the file name itself may also contain a path */
 
-UNIV_INTERN char*	srv_data_home	= NULL;
+char*	srv_data_home	= NULL;
 
 /** Rollback files directory, can be absolute. */
-UNIV_INTERN char*	srv_undo_dir = NULL;
+char*	srv_undo_dir = NULL;
 
 /** The number of tablespaces to use for rollback segments. */
-UNIV_INTERN ulong	srv_undo_tablespaces = 8;
+ulong	srv_undo_tablespaces = 0;
 
 /** The number of UNDO tablespaces that are open and ready to use. */
-UNIV_INTERN ulint	srv_undo_tablespaces_open = 8;
+ulint	srv_undo_tablespaces_open = 0;
+
+/** The number of UNDO tablespaces that are active (hosting some rollback
+segment). It is quite possible that some of the tablespaces doesn't host
+any of the rollback-segment based on configuration used. */
+ulint	srv_undo_tablespaces_active = 0;
 
 /* The number of rollback segments to use */
-UNIV_INTERN ulong	srv_undo_logs = 1;
+ulong	srv_undo_logs = 1;
 
-#ifdef UNIV_LOG_ARCHIVE
-UNIV_INTERN char*	srv_arch_dir	= NULL;
-#endif /* UNIV_LOG_ARCHIVE */
+/** Rate at which UNDO records should be purged. */
+ulong	srv_purge_rseg_truncate_frequency = 128;
+
+/** Enable or Disable Truncate of UNDO tablespace.
+Note: If enabled then UNDO tablespace will be selected for truncate.
+While Server waits for undo-tablespace to truncate if user disables
+it, truncate action is completed but no new tablespace is marked
+for truncate (action is never aborted). */
+my_bool	srv_undo_log_truncate = FALSE;
+
+/** Maximum size of undo tablespace. */
+unsigned long long	srv_max_undo_log_size;
+
+/** UNDO logs that are not redo logged.
+These logs reside in the temp tablespace.*/
+const ulong		srv_tmp_undo_logs = 32;
+
+/** Default undo tablespace size in UNIV_PAGEs count (10MB). */
+const ulint SRV_UNDO_TABLESPACE_SIZE_IN_PAGES =
+	((1024 * 1024) * 10) / UNIV_PAGE_SIZE_DEF;
 
 /** Set if InnoDB must operate in read-only mode. We don't do any
 recovery and open all tables in RO mode instead of RW mode. We don't
 sync the max trx id to disk either. */
-UNIV_INTERN my_bool	srv_read_only_mode;
+my_bool	srv_read_only_mode;
 /** store to its own file each table created by an user; data
 dictionary tables are in the system tablespace 0 */
-UNIV_INTERN my_bool	srv_file_per_table;
+my_bool	srv_file_per_table;
 /** The file format to use on new *.ibd files. */
-UNIV_INTERN ulint	srv_file_format = 0;
+ulint	srv_file_format = 0;
 /** Whether to check file format during startup.  A value of
 UNIV_FORMAT_MAX + 1 means no checking ie. FALSE.  The default is to
 set it to the highest format we support. */
-UNIV_INTERN ulint	srv_max_file_format_at_startup = UNIV_FORMAT_MAX;
+ulint	srv_max_file_format_at_startup = UNIV_FORMAT_MAX;
 /** Set if InnoDB operates in read-only mode or innodb-force-recovery
 is greater than SRV_FORCE_NO_TRX_UNDO. */
-UNIV_INTERN my_bool	high_level_read_only;
+my_bool	high_level_read_only;
 
 #if UNIV_FORMAT_A
 # error "UNIV_FORMAT_A must be 0!"
@@ -150,19 +177,18 @@ UNIV_INTERN my_bool	high_level_read_only;
 
 /** Place locks to records only i.e. do not use next-key locking except
 on duplicate key checking and foreign key checking */
-UNIV_INTERN ibool	srv_locks_unsafe_for_binlog = FALSE;
+ibool	srv_locks_unsafe_for_binlog = FALSE;
 /** Sort buffer size in index creation */
-UNIV_INTERN ulong	srv_sort_buf_size = 1048576;
+ulong	srv_sort_buf_size = 1048576;
 /** Maximum modification log file size for online index creation */
-UNIV_INTERN unsigned long long	srv_online_max_size;
+unsigned long long	srv_online_max_size;
 
 /* If this flag is TRUE, then we will use the native aio of the
 OS (provided we compiled Innobase with it in), otherwise we will
 use simulated aio we build below with threads.
 Currently we support native aio on windows and linux */
-UNIV_INTERN my_bool	srv_use_native_aio = TRUE;
-UNIV_INTERN my_bool	srv_numa_interleave = FALSE;
-
+my_bool	srv_use_native_aio = TRUE;
+my_bool	srv_numa_interleave = FALSE;
 /* If this flag is TRUE, then we will use fallocate(PUCH_HOLE)
 to the pages */
 UNIV_INTERN my_bool	srv_use_trim = FALSE;
@@ -177,57 +203,49 @@ UNIV_INTERN long srv_mtflush_threads = MTFLUSH_DEFAULT_WORKER;
 /* If this flag is TRUE, then we will use multi threaded flush. */
 UNIV_INTERN my_bool	srv_use_mtflush                 = FALSE;
 
-#ifdef __WIN__
-/* Windows native condition variables. We use runtime loading / function
-pointers, because they are not available on Windows Server 2003 and
-Windows XP/2000.
+#ifdef UNIV_DEBUG
+/** Force all user tables to use page compression. */
+ulong	srv_debug_compress;
+/** Used by SET GLOBAL innodb_master_thread_disabled_debug = X. */
+my_bool	srv_master_thread_disabled_debug;
+/** Event used to inform that master thread is disabled. */
+static os_event_t	srv_master_thread_disabled_event;
+/** Debug variable to find if any background threads are adding
+to purge during slow shutdown. */
+extern bool		trx_commit_disallowed;
+#endif /* UNIV_DEBUG */
 
-We use condition for events on Windows if possible, even if os_event
-resembles Windows kernel event object well API-wise. The reason is
-performance, kernel objects are heavyweights and WaitForSingleObject() is a
-performance killer causing calling thread to context switch. Besides, Innodb
-is preallocating large number (often millions) of os_events. With kernel event
-objects it takes a big chunk out of non-paged pool, which is better suited
-for tasks like IO than for storing idle event objects. */
-UNIV_INTERN ibool	srv_use_native_conditions = FALSE;
-#endif /* __WIN__ */
+/*------------------------- LOG FILES ------------------------ */
+char*	srv_log_group_home_dir	= NULL;
 
-UNIV_INTERN ulint	srv_n_data_files = 0;
-UNIV_INTERN char**	srv_data_file_names = NULL;
+ulong	srv_n_log_files		= SRV_N_LOG_FILES_MAX;
+/** At startup, this is the current redo log file size.
+During startup, if this is different from srv_log_file_size_requested
+(innodb_log_file_size), the redo log will be rebuilt and this size
+will be initialized to srv_log_file_size_requested.
+When upgrading from a previous redo log format, this will be set to 0,
+and writing to the redo log is not allowed.
+
+During startup, this is in bytes, and later converted to pages. */
+ib_uint64_t	srv_log_file_size;
+/** The value of the startup parameter innodb_log_file_size */
+ib_uint64_t	srv_log_file_size_requested;
 /* size in database pages */
-UNIV_INTERN ulint*	srv_data_file_sizes = NULL;
+ulint		srv_log_buffer_size = ULINT_MAX;
+ulong		srv_flush_log_at_trx_commit = 1;
+uint		srv_flush_log_at_timeout = 1;
+ulong		srv_page_size = UNIV_PAGE_SIZE_DEF;
+ulong		srv_page_size_shift = UNIV_PAGE_SIZE_SHIFT_DEF;
+ulong		srv_log_write_ahead_size = 0;
 
-/* if TRUE, then we auto-extend the last data file */
-UNIV_INTERN ibool	srv_auto_extend_last_data_file	= FALSE;
-/* if != 0, this tells the max size auto-extending may increase the
-last data file size */
-UNIV_INTERN ulint	srv_last_file_size_max	= 0;
-/* If the last data file is auto-extended, we add this
-many pages to it at a time */
-UNIV_INTERN ulong	srv_auto_extend_increment = 8;
-UNIV_INTERN ulint*	srv_data_file_is_raw_partition = NULL;
-
-/* If the following is TRUE we do not allow inserts etc. This protects
-the user from forgetting the 'newraw' keyword to my.cnf */
-
-UNIV_INTERN ibool	srv_created_new_raw	= FALSE;
-
-UNIV_INTERN char*	srv_log_group_home_dir	= NULL;
-
-UNIV_INTERN ulong	srv_n_log_files		= SRV_N_LOG_FILES_MAX;
-/* size in database pages */
-UNIV_INTERN ib_uint64_t	srv_log_file_size	= IB_UINT64_MAX;
-UNIV_INTERN ib_uint64_t	srv_log_file_size_requested;
-/* size in database pages */
-UNIV_INTERN ulint	srv_log_buffer_size	= ULINT_MAX;
-UNIV_INTERN ulong	srv_flush_log_at_trx_commit = 1;
-UNIV_INTERN uint	srv_flush_log_at_timeout = 1;
-UNIV_INTERN ulong	srv_page_size		= UNIV_PAGE_SIZE_DEF;
-UNIV_INTERN ulong	srv_page_size_shift	= UNIV_PAGE_SIZE_SHIFT_DEF;
+page_size_t	univ_page_size(0, 0, false);
 
 /* Try to flush dirty pages so as to avoid IO bursts at
 the checkpoints. */
-UNIV_INTERN char	srv_adaptive_flushing	= TRUE;
+char	srv_adaptive_flushing	= TRUE;
+
+/* Allow IO bursts at the checkpoints ignoring io_capacity setting. */
+my_bool	srv_flush_sync		= TRUE;
 
 /** Maximum number of times allowed to conditionally acquire
 mutex before switching to blocking wait on the mutex */
@@ -243,52 +261,52 @@ with mutex_enter(), which will wait until it gets the mutex. */
 UNIV_INTERN os_event_t	srv_allow_writes_event;
 #endif /* WITH_INNODB_DISALLOW_WRITES */
 
-/** The sort order table of the MySQL latin1_swedish_ci character set
-collation */
-UNIV_INTERN const byte*	srv_latin1_ordering;
-
-/* use os/external memory allocator */
-UNIV_INTERN my_bool	srv_use_sys_malloc	= TRUE;
-/* requested size in kilobytes */
-UNIV_INTERN ulint	srv_buf_pool_size	= ULINT_MAX;
-/* requested number of buffer pool instances */
-UNIV_INTERN ulint       srv_buf_pool_instances  = 1;
-/* number of locks to protect buf_pool->page_hash */
-UNIV_INTERN ulong	srv_n_page_hash_locks = 16;
+/** Requested size in bytes */
+ulint	srv_buf_pool_size	= ULINT_MAX;
+const ulint	srv_buf_pool_min_size	= 5 * 1024 * 1024;
+/** Default pool size in bytes */
+const ulint	srv_buf_pool_def_size	= 128 * 1024 * 1024;
+/** Requested buffer pool chunk size. Each buffer pool instance consists
+of one or more chunks. */
+ulong	srv_buf_pool_chunk_unit;
+/** Requested number of buffer pool instances */
+ulong	srv_buf_pool_instances;
+/** Default number of buffer pool instances */
+const ulong	srv_buf_pool_instances_default = 0;
+/** Number of locks to protect buf_pool->page_hash */
+ulong	srv_n_page_hash_locks = 16;
 /** Scan depth for LRU flush batch i.e.: number of blocks scanned*/
-UNIV_INTERN ulong	srv_LRU_scan_depth	= 1024;
-/** whether or not to flush neighbors of a block */
-UNIV_INTERN ulong	srv_flush_neighbors	= 1;
-/* previously requested size */
-UNIV_INTERN ulint	srv_buf_pool_old_size;
-/* current size in kilobytes */
-UNIV_INTERN ulint	srv_buf_pool_curr_size	= 0;
-/* dump that may % of each buffer pool during BP dump */
-UNIV_INTERN ulong	srv_buf_pool_dump_pct;
-/* size in bytes */
-UNIV_INTERN ulint	srv_mem_pool_size	= ULINT_MAX;
-UNIV_INTERN ulint	srv_lock_table_size	= ULINT_MAX;
+ulong	srv_LRU_scan_depth	= 1024;
+/** Whether or not to flush neighbors of a block */
+ulong	srv_flush_neighbors	= 1;
+/** Previously requested size */
+ulint	srv_buf_pool_old_size	= 0;
+/** Current size as scaling factor for the other components */
+ulint	srv_buf_pool_base_size	= 0;
+/** Current size in bytes */
+ulint	srv_buf_pool_curr_size	= 0;
+/** Dump this % of each buffer pool during BP dump */
+ulong	srv_buf_pool_dump_pct;
+/** Lock table size in bytes */
+ulint	srv_lock_table_size	= ULINT_MAX;
 
 UNIV_INTERN ulong	srv_idle_flush_pct = 100;
 
 /* This parameter is deprecated. Use srv_n_io_[read|write]_threads
 instead. */
-UNIV_INTERN ulint	srv_n_file_io_threads	= ULINT_MAX;
-UNIV_INTERN ulint	srv_n_read_io_threads	= ULINT_MAX;
-UNIV_INTERN ulint	srv_n_write_io_threads	= ULINT_MAX;
+ulint	srv_n_read_io_threads	= ULINT_MAX;
+ulint	srv_n_write_io_threads	= ULINT_MAX;
 
 /* Switch to enable random read ahead. */
-UNIV_INTERN my_bool	srv_random_read_ahead	= FALSE;
+my_bool	srv_random_read_ahead	= FALSE;
 /* User settable value of the number of pages that must be present
 in the buffer cache and accessed sequentially for InnoDB to trigger a
 readahead request. */
-UNIV_INTERN ulong	srv_read_ahead_threshold	= 56;
+ulong	srv_read_ahead_threshold	= 56;
 
-#ifdef UNIV_LOG_ARCHIVE
-UNIV_INTERN ibool		srv_log_archive_on	= FALSE;
-UNIV_INTERN ibool		srv_archive_recovery	= 0;
-UNIV_INTERN ib_uint64_t	srv_archive_recovery_limit_lsn;
-#endif /* UNIV_LOG_ARCHIVE */
+/** Maximum on-disk size of change buffer in terms of percentage
+of the buffer pool. */
+uint	srv_change_buffer_max_size = CHANGE_BUFFER_DEFAULT_SIZE;
 
 /* This parameter is used to throttle the number of insert buffers that are
 merged in a batch. By increasing this parameter on a faster disk you can
@@ -298,75 +316,81 @@ background loop when the system is idle (low load), on a busy system
 the parameter is scaled down by a factor of 4, this is to avoid putting
 a heavier load on the I/O sub system. */
 
-UNIV_INTERN ulong	srv_insert_buffer_batch_size = 20;
+ulong	srv_insert_buffer_batch_size = 20;
 
-UNIV_INTERN char*	srv_file_flush_method_str = NULL;
-UNIV_INTERN ulint	srv_unix_file_flush_method = SRV_UNIX_FSYNC;
-UNIV_INTERN ulint	srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
+char*	srv_file_flush_method_str = NULL;
+#ifndef _WIN32
+enum srv_unix_flush_t	srv_unix_file_flush_method = SRV_UNIX_FSYNC;
+#else
+enum srv_win_flush_t	srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
+#endif /* _WIN32 */
 
-UNIV_INTERN ulint	srv_max_n_open_files	  = 300;
+ulint	srv_max_n_open_files	  = 300;
 
 /* Number of IO operations per second the server can do */
-UNIV_INTERN ulong	srv_io_capacity         = 200;
-UNIV_INTERN ulong	srv_max_io_capacity     = 400;
+ulong	srv_io_capacity         = 200;
+ulong	srv_max_io_capacity     = 400;
+
+/* The number of page cleaner threads to use.*/
+ulong	srv_n_page_cleaners = 4;
 
 /* The InnoDB main thread tries to keep the ratio of modified pages
 in the buffer pool to all database pages in the buffer pool smaller than
 the following number. But it is not guaranteed that the value stays below
 that during a time of heavy update/insert activity. */
 
-UNIV_INTERN double	srv_max_buf_pool_modified_pct	= 75.0;
-UNIV_INTERN double	srv_max_dirty_pages_pct_lwm	= 50.0;
+double	srv_max_buf_pool_modified_pct	= 75.0;
+double	srv_max_dirty_pages_pct_lwm	= 0.0;
 
 /* This is the percentage of log capacity at which adaptive flushing,
 if enabled, will kick in. */
 UNIV_INTERN double	srv_adaptive_flushing_lwm	= 10.0;
 
 /* Number of iterations over which adaptive flushing is averaged. */
-UNIV_INTERN ulong	srv_flushing_avg_loops		= 30;
+ulong	srv_flushing_avg_loops		= 30;
 
 /* The number of purge threads to use.*/
-UNIV_INTERN ulong	srv_n_purge_threads = 1;
+ulong	srv_n_purge_threads = 4;
 
 /* the number of pages to purge in one batch */
-UNIV_INTERN ulong	srv_purge_batch_size = 20;
+ulong	srv_purge_batch_size = 20;
 
 /* Internal setting for "innodb_stats_method". Decides how InnoDB treats
 NULL value when collecting statistics. By default, it is set to
 SRV_STATS_NULLS_EQUAL(0), ie. all NULL value are treated equal */
-UNIV_INTERN ulong srv_innodb_stats_method = SRV_STATS_NULLS_EQUAL;
+ulong srv_innodb_stats_method = SRV_STATS_NULLS_EQUAL;
 
-UNIV_INTERN srv_stats_t	srv_stats;
+srv_stats_t	srv_stats;
 
 /* structure to pass status variables to MySQL */
-UNIV_INTERN export_var_t export_vars;
+export_var_t export_vars;
 
 /** Normally 0. When nonzero, skip some phases of crash recovery,
 starting from SRV_FORCE_IGNORE_CORRUPT, so that data can be recovered
 by SELECT or mysqldump. When this is nonzero, we do not allow any user
 modifications to the data. */
-UNIV_INTERN ulong	srv_force_recovery;
+ulong	srv_force_recovery;
 #ifndef DBUG_OFF
 /** Inject a crash at different steps of the recovery process.
 This is for testing and debugging only. */
-UNIV_INTERN ulong	srv_force_recovery_crash;
+ulong	srv_force_recovery_crash;
 #endif /* !DBUG_OFF */
 
 /** Print all user-level transactions deadlocks to mysqld stderr */
 
-UNIV_INTERN my_bool	srv_print_all_deadlocks = FALSE;
+my_bool	srv_print_all_deadlocks = FALSE;
 
 /** Enable INFORMATION_SCHEMA.innodb_cmp_per_index */
-UNIV_INTERN my_bool	srv_cmp_per_index_enabled = FALSE;
+my_bool	srv_cmp_per_index_enabled = FALSE;
 
 /* If the following is set to 1 then we do not run purge and insert buffer
 merge to completion before shutdown. If it is set to 2, do not even flush the
 buffer pool to data files at the shutdown: we effectively 'crash'
 InnoDB (but lose no committed transactions). */
-UNIV_INTERN ulint	srv_fast_shutdown	= 0;
+ulint	srv_fast_shutdown	= 0;
 
 /* Generate a innodb_status.<pid> file */
-UNIV_INTERN ibool	srv_innodb_status	= FALSE;
+ibool	srv_innodb_status	= FALSE;
 
 /* Optimize prefix index queries to skip cluster index lookup when possible */
 /* Enables or disables this prefix optimization.  Disabled by default. */
@@ -378,10 +402,10 @@ this many index pages, there are 2 ways to calculate statistics:
   in the innodb database.
 * quick transient stats, that are used if persistent stats for the given
   table/index are not found in the innodb database */
-UNIV_INTERN unsigned long long	srv_stats_transient_sample_pages = 8;
-UNIV_INTERN my_bool		srv_stats_persistent = TRUE;
-UNIV_INTERN unsigned long long	srv_stats_persistent_sample_pages = 20;
-UNIV_INTERN my_bool		srv_stats_auto_recalc = TRUE;
+unsigned long long	srv_stats_transient_sample_pages = 8;
+my_bool		srv_stats_persistent = TRUE;
+unsigned long long	srv_stats_persistent_sample_pages = 20;
+my_bool		srv_stats_auto_recalc = TRUE;
 
 /* The number of rows modified before we calculate new statistics (default 0
 = current limits) */
@@ -391,15 +415,15 @@ UNIV_INTERN unsigned long long srv_stats_modified_counter = 0;
 pages default true. */
 UNIV_INTERN my_bool	srv_stats_sample_traditional = TRUE;
 
-UNIV_INTERN ibool	srv_use_doublewrite_buf	= TRUE;
+ibool	srv_use_doublewrite_buf	= TRUE;
 
 /** doublewrite buffer is 1MB is size i.e.: it can hold 128 16K pages.
 The following parameter is the size of the buffer that is used for
 batch flushing i.e.: LRU flushing and flush_list flushing. The rest
 of the pages are used for single page flushing. */
-UNIV_INTERN ulong	srv_doublewrite_batch_size	= 120;
+ulong	srv_doublewrite_batch_size	= 120;
 
-UNIV_INTERN ulong	srv_replication_delay		= 0;
+ulong	srv_replication_delay		= 0;
 
 /*-------------------------------------------*/
 #ifdef HAVE_MEMORY_BARRIER
@@ -408,16 +432,8 @@ UNIV_INTERN ulong	srv_n_spin_wait_rounds	= 15;
 #else
 UNIV_INTERN ulong	srv_n_spin_wait_rounds	= 30;
 #endif
-UNIV_INTERN ulong	srv_spin_wait_delay	= 6;
-UNIV_INTERN ibool	srv_priority_boost	= TRUE;
-
-#ifdef UNIV_DEBUG
-UNIV_INTERN ibool	srv_print_thread_releases	= FALSE;
-UNIV_INTERN ibool	srv_print_lock_waits		= FALSE;
-UNIV_INTERN ibool	srv_print_buf_io		= FALSE;
-UNIV_INTERN ibool	srv_print_log_io		= FALSE;
-UNIV_INTERN ibool	srv_print_latch_waits		= FALSE;
-#endif /* UNIV_DEBUG */
+ulong	srv_spin_wait_delay	= 6;
+ibool	srv_priority_boost	= TRUE;
 
 static ulint		srv_n_rows_inserted_old		= 0;
 static ulint		srv_n_rows_updated_old		= 0;
@@ -428,8 +444,8 @@ static ulint		srv_n_system_rows_updated_old	= 0;
 static ulint		srv_n_system_rows_deleted_old	= 0;
 static ulint		srv_n_system_rows_read_old	= 0;
 
-UNIV_INTERN ulint	srv_truncated_status_writes	= 0;
-UNIV_INTERN ulint	srv_available_undo_logs         = 0;
+ulint	srv_truncated_status_writes	= 0;
+ulint	srv_available_undo_logs         = 0;
 
 UNIV_INTERN ib_uint64_t srv_page_compression_saved      = 0;
 UNIV_INTERN ib_uint64_t srv_page_compression_trim_sect512       = 0;
@@ -453,64 +469,47 @@ UNIV_INTERN ulonglong	srv_defragment_interval = 0;
 
 /* Set the following to 0 if you want InnoDB to write messages on
 stderr on startup/shutdown. */
-UNIV_INTERN ibool	srv_print_verbose_log		= TRUE;
-UNIV_INTERN my_bool	srv_print_innodb_monitor	= FALSE;
-UNIV_INTERN my_bool	srv_print_innodb_lock_monitor	= FALSE;
-UNIV_INTERN ibool	srv_print_innodb_tablespace_monitor = FALSE;
-UNIV_INTERN ibool	srv_print_innodb_table_monitor = FALSE;
-
+ibool	srv_print_verbose_log			= TRUE;
+my_bool	srv_print_innodb_monitor		= FALSE;
+my_bool	srv_print_innodb_lock_monitor		= FALSE;
+my_bool	srv_print_innodb_tablespace_monitor 	= FALSE;
+my_bool	srv_print_innodb_table_monitor 		= FALSE;
 /** If this flag is set tables without primary key are not allowed */
-UNIV_INTERN my_bool	srv_force_primary_key		= FALSE;
+my_bool	srv_force_primary_key			= FALSE;
 
 /* Array of English strings describing the current state of an
 i/o handler thread */
 
-UNIV_INTERN const char* srv_io_thread_op_info[SRV_MAX_N_IO_THREADS];
-UNIV_INTERN const char* srv_io_thread_function[SRV_MAX_N_IO_THREADS];
+const char* srv_io_thread_op_info[SRV_MAX_N_IO_THREADS];
+const char* srv_io_thread_function[SRV_MAX_N_IO_THREADS];
 
-UNIV_INTERN time_t	srv_last_monitor_time;
+time_t	srv_last_monitor_time;
 
-UNIV_INTERN ib_mutex_t	srv_innodb_monitor_mutex;
+ib_mutex_t	srv_innodb_monitor_mutex;
+
+/** Mutex protecting page_zip_stat_per_index */
+ib_mutex_t	page_zip_stat_per_index_mutex;
 
 /* Mutex for locking srv_monitor_file. Not created if srv_read_only_mode */
-UNIV_INTERN ib_mutex_t	srv_monitor_file_mutex;
-
-#ifdef UNIV_PFS_MUTEX
-# ifndef HAVE_ATOMIC_BUILTINS
-/* Key to register server_mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t	server_mutex_key;
-# endif /* !HAVE_ATOMIC_BUILTINS */
-/** Key to register srv_innodb_monitor_mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t	srv_innodb_monitor_mutex_key;
-/** Key to register srv_monitor_file_mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t	srv_monitor_file_mutex_key;
-/** Key to register srv_dict_tmpfile_mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t	srv_dict_tmpfile_mutex_key;
-/** Key to register the mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t	srv_misc_tmpfile_mutex_key;
-/** Key to register srv_sys_t::mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t	srv_sys_mutex_key;
-/** Key to register srv_sys_t::tasks_mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t	srv_sys_tasks_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
+ib_mutex_t	srv_monitor_file_mutex;
 
 /** Temporary file for innodb monitor output */
-UNIV_INTERN FILE*	srv_monitor_file;
+FILE*	srv_monitor_file;
 /** Mutex for locking srv_dict_tmpfile. Not created if srv_read_only_mode.
 This mutex has a very high rank; threads reserving it should not
 be holding any InnoDB latches. */
-UNIV_INTERN ib_mutex_t	srv_dict_tmpfile_mutex;
+ib_mutex_t	srv_dict_tmpfile_mutex;
 /** Temporary file for output from the data dictionary */
-UNIV_INTERN FILE*	srv_dict_tmpfile;
+FILE*	srv_dict_tmpfile;
 /** Mutex for locking srv_misc_tmpfile. Not created if srv_read_only_mode.
 This mutex has a very low rank; threads reserving it should not
 acquire any further latches or sleep before releasing this one. */
-UNIV_INTERN ib_mutex_t	srv_misc_tmpfile_mutex;
+ib_mutex_t	srv_misc_tmpfile_mutex;
 /** Temporary file for miscellanous diagnostic output */
-UNIV_INTERN FILE*	srv_misc_tmpfile;
+FILE*	srv_misc_tmpfile;
 
-UNIV_INTERN ulint	srv_main_thread_process_no	= 0;
-UNIV_INTERN ulint	srv_main_thread_id		= 0;
+ulint	srv_main_thread_process_no	= 0;
+ulint	srv_main_thread_id		= 0;
 
 /* The following counts are used by the srv_master_thread. */
 
@@ -542,9 +541,6 @@ current_time % 5 != 0. */
 
 # define	SRV_MASTER_CHECKPOINT_INTERVAL		(7)
 # define	SRV_MASTER_PURGE_INTERVAL		(10)
-#ifdef MEM_PERIODIC_CHECK
-# define	SRV_MASTER_MEM_VALIDATE_INTERVAL	(13)
-#endif /* MEM_PERIODIC_CHECK */
 # define	SRV_MASTER_DICT_LRU_INTERVAL		(47)
 
 /** Simulate compression failures. */
@@ -667,29 +663,27 @@ struct srv_sys_t{
 						activity */
 };
 
-#ifndef HAVE_ATOMIC_BUILTINS
-/** Mutex protecting some server global variables. */
-UNIV_INTERN ib_mutex_t	server_mutex;
-#endif /* !HAVE_ATOMIC_BUILTINS */
-
 static srv_sys_t*	srv_sys	= NULL;
 
 /** Event to signal the monitor thread. */
-UNIV_INTERN os_event_t	srv_monitor_event;
+os_event_t	srv_monitor_event;
 
 /** Event to signal the error thread */
-UNIV_INTERN os_event_t	srv_error_event;
+os_event_t	srv_error_event;
 
 /** Event to signal the buffer pool dump/load thread */
-UNIV_INTERN os_event_t	srv_buf_dump_event;
+os_event_t	srv_buf_dump_event;
+
+/** Event to signal the buffer pool resize thread */
+os_event_t	srv_buf_resize_event;
 
 /** The buffer pool dump/load file name */
-UNIV_INTERN char*	srv_buf_dump_filename;
+char*	srv_buf_dump_filename;
 
 /** Boolean config knobs that tell InnoDB to dump the buffer pool at shutdown
 and/or load it during startup. */
-UNIV_INTERN char	srv_buffer_pool_dump_at_shutdown = FALSE;
-UNIV_INTERN char	srv_buffer_pool_load_at_startup = FALSE;
+char	srv_buffer_pool_dump_at_shutdown = TRUE;
+char	srv_buffer_pool_load_at_startup = TRUE;
 
 /** Slot index in the srv_sys->sys_threads array for the purge thread. */
 static const ulint	SRV_PURGE_SLOT	= 1;
@@ -697,6 +691,47 @@ static const ulint	SRV_PURGE_SLOT	= 1;
 /** Slot index in the srv_sys->sys_threads array for the master thread. */
 static const ulint	SRV_MASTER_SLOT = 0;
 
+#ifdef HAVE_PSI_STAGE_INTERFACE
+/** Performance schema stage event for monitoring ALTER TABLE progress
+everything after flush log_make_checkpoint_at(). */
+PSI_stage_info	srv_stage_alter_table_end
+	= {0, "alter table (end)", PSI_FLAG_STAGE_PROGRESS};
+
+/** Performance schema stage event for monitoring ALTER TABLE progress
+log_make_checkpoint_at(). */
+PSI_stage_info	srv_stage_alter_table_flush
+	= {0, "alter table (flush)", PSI_FLAG_STAGE_PROGRESS};
+
+/** Performance schema stage event for monitoring ALTER TABLE progress
+row_merge_insert_index_tuples(). */
+PSI_stage_info	srv_stage_alter_table_insert
+	= {0, "alter table (insert)", PSI_FLAG_STAGE_PROGRESS};
+
+/** Performance schema stage event for monitoring ALTER TABLE progress
+row_log_apply(). */
+PSI_stage_info	srv_stage_alter_table_log_index
+	= {0, "alter table (log apply index)", PSI_FLAG_STAGE_PROGRESS};
+
+/** Performance schema stage event for monitoring ALTER TABLE progress
+row_log_table_apply(). */
+PSI_stage_info	srv_stage_alter_table_log_table
+	= {0, "alter table (log apply table)", PSI_FLAG_STAGE_PROGRESS};
+
+/** Performance schema stage event for monitoring ALTER TABLE progress
+row_merge_sort(). */
+PSI_stage_info	srv_stage_alter_table_merge_sort
+	= {0, "alter table (merge sort)", PSI_FLAG_STAGE_PROGRESS};
+
+/** Performance schema stage event for monitoring ALTER TABLE progress
+row_merge_read_clustered_index(). */
+PSI_stage_info	srv_stage_alter_table_read_pk_internal_sort
+	= {0, "alter table (read PK and internal sort)", PSI_FLAG_STAGE_PROGRESS};
+
+/** Performance schema stage event for monitoring buffer pool load progress. */
+PSI_stage_info	srv_stage_buffer_pool_load
+	= {0, "buffer pool load", PSI_FLAG_STAGE_PROGRESS};
+#endif /* HAVE_PSI_STAGE_INTERFACE */
+
 /*********************************************************************//**
 Prints counters for work done by srv_master_thread. */
 static
@@ -705,18 +740,18 @@ srv_print_master_thread_info(
 /*=========================*/
 	FILE  *file)    /* in: output stream */
 {
-	fprintf(file, "srv_master_thread loops: %lu srv_active, "
-		"%lu srv_shutdown, %lu srv_idle\n",
+	fprintf(file, "srv_master_thread loops: %lu srv_active,"
+		" %lu srv_shutdown, %lu srv_idle\n",
 		srv_main_active_loops,
 		srv_main_shutdown_loops,
 		srv_main_idle_loops);
-	fprintf(file, "srv_master_thread log flush and writes: %lu\n",
+	fprintf(file,
+		"srv_master_thread log flush and writes: " ULINTPF "\n",
 		srv_log_writes_and_flush);
 }
 
 /*********************************************************************//**
 Sets the info describing an i/o thread current state. */
-UNIV_INTERN
 void
 srv_set_io_thread_op_info(
 /*======================*/
@@ -731,7 +766,6 @@ srv_set_io_thread_op_info(
 
 /*********************************************************************//**
 Resets the info describing an i/o thread current state. */
-UNIV_INTERN
 void
 srv_reset_io_thread_op_info()
 /*=========================*/
@@ -780,7 +814,7 @@ srv_slot_get_type(
 
 /*********************************************************************//**
 Reserves a slot in the thread table for the current thread.
-@return	reserved slot */
+@return reserved slot */
 static
 srv_slot_t*
 srv_reserve_slot(
@@ -836,7 +870,7 @@ srv_reserve_slot(
 Suspends the calling thread to wait for the event in its thread slot.
 @return the current signal count of the event. */
 static
-ib_int64_t
+int64_t
 srv_suspend_thread_low(
 /*===================*/
 	srv_slot_t*	slot)	/*!< in/out: thread slot */
@@ -885,14 +919,14 @@ srv_suspend_thread_low(
 Suspends the calling thread to wait for the event in its thread slot.
 @return the current signal count of the event. */
 static
-ib_int64_t
+int64_t
 srv_suspend_thread(
 /*===============*/
 	srv_slot_t*	slot)	/*!< in/out: thread slot */
 {
 	srv_sys_mutex_enter();
 
-	ib_int64_t	sig_count = srv_suspend_thread_low(slot);
+	int64_t		sig_count = srv_suspend_thread_low(slot);
 
 	srv_sys_mutex_exit();
 
@@ -904,7 +938,6 @@ Releases threads of the type given from suspension in the thread table.
 NOTE! The server mutex has to be reserved by the caller!
 @return number of threads released: this may be less than n if not
         enough threads were suspended at the moment. */
-UNIV_INTERN
 ulint
 srv_release_threads(
 /*================*/
@@ -997,7 +1030,6 @@ srv_free_slot(
 
 /*********************************************************************//**
 Initializes the server. */
-UNIV_INTERN
 void
 srv_init(void)
 /*==========*/
@@ -1005,12 +1037,7 @@ srv_init(void)
 	ulint	n_sys_threads = 0;
 	ulint	srv_sys_sz = sizeof(*srv_sys);
 
-#ifndef HAVE_ATOMIC_BUILTINS
-	mutex_create(server_mutex_key, &server_mutex, SYNC_ANY_LATCH);
-#endif /* !HAVE_ATOMIC_BUILTINS */
-
-	mutex_create(srv_innodb_monitor_mutex_key,
-		     &srv_innodb_monitor_mutex, SYNC_NO_ORDER_CHECK);
+	mutex_create(LATCH_ID_SRV_INNODB_MONITOR, &srv_innodb_monitor_mutex);
 
 	if (!srv_read_only_mode) {
 
@@ -1020,36 +1047,43 @@ srv_init(void)
 		srv_sys_sz += n_sys_threads * sizeof(*srv_sys->sys_threads);
 	}
 
-	srv_sys = static_cast<srv_sys_t*>(mem_zalloc(srv_sys_sz));
+	srv_sys = static_cast<srv_sys_t*>(ut_zalloc_nokey(srv_sys_sz));
 
 	srv_sys->n_sys_threads = n_sys_threads;
 
-	if (!srv_read_only_mode) {
+	/* Even in read-only mode we flush pages related to intrinsic table
+	and so mutex creation is needed. */
+	{
 
-		mutex_create(srv_sys_mutex_key, &srv_sys->mutex, SYNC_THREADS);
+		mutex_create(LATCH_ID_SRV_SYS, &srv_sys->mutex);
 
-		mutex_create(srv_sys_tasks_mutex_key,
-			     &srv_sys->tasks_mutex, SYNC_ANY_LATCH);
+		mutex_create(LATCH_ID_SRV_SYS_TASKS, &srv_sys->tasks_mutex);
 
 		srv_sys->sys_threads = (srv_slot_t*) &srv_sys[1];
 
 		for (ulint i = 0; i < srv_sys->n_sys_threads; ++i) {
 			srv_slot_t*	slot = &srv_sys->sys_threads[i];
 
-			slot->event = os_event_create();
+			slot->event = os_event_create(0);
 
 			ut_a(slot->event);
 		}
 
-		srv_error_event = os_event_create();
+		srv_error_event = os_event_create(0);
 
-		srv_monitor_event = os_event_create();
+		srv_monitor_event = os_event_create(0);
 
-		srv_buf_dump_event = os_event_create();
+		srv_buf_dump_event = os_event_create(0);
 
-		UT_LIST_INIT(srv_sys->tasks);
+		buf_flush_event = os_event_create("buf_flush_event");
+
+		UT_LIST_INIT(srv_sys->tasks, &que_thr_t::queue);
 	}
 
+	srv_buf_resize_event = os_event_create(0);
+
+	ut_d(srv_master_thread_disabled_event = os_event_create(0));
+
 	/* page_zip_stat_per_index_mutex is acquired from:
 	1. page_zip_compress() (after SYNC_FSP)
 	2. page_zip_decompress()
@@ -1057,22 +1091,19 @@ srv_init(void)
 	4. innodb_cmp_per_index_update(), no other latches
 	since we do not acquire any other latches while holding this mutex,
 	it can have very low level. We pick SYNC_ANY_LATCH for it. */
-
-	mutex_create(
-		page_zip_stat_per_index_mutex_key,
-		&page_zip_stat_per_index_mutex, SYNC_ANY_LATCH);
+	mutex_create(LATCH_ID_PAGE_ZIP_STAT_PER_INDEX,
+		     &page_zip_stat_per_index_mutex);
 
 	/* Create dummy indexes for infimum and supremum records */
 
 	dict_ind_init();
 
-	srv_conc_init();
 #ifdef WITH_INNODB_DISALLOW_WRITES
 	/* Writes have to be enabled on init or else we hang. Thus, we
 	always set the event here regardless of innobase_disallow_writes.
 	That flag will always be 0 at this point because it isn't settable
 	via my.cnf or command line arg. */
-	srv_allow_writes_event = os_event_create();
+	srv_allow_writes_event = os_event_create(0);
 	os_event_set(srv_allow_writes_event);
 #endif /* WITH_INNODB_DISALLOW_WRITES */
 
@@ -1086,40 +1117,55 @@ srv_init(void)
 
 /*********************************************************************//**
 Frees the data structures created in srv_init(). */
-UNIV_INTERN
 void
 srv_free(void)
 /*==========*/
 {
-	srv_conc_free();
+	mutex_free(&srv_innodb_monitor_mutex);
+	mutex_free(&page_zip_stat_per_index_mutex);
 
-	/* The mutexes srv_sys->mutex and srv_sys->tasks_mutex should have
-	been freed by sync_close() already. */
-	mem_free(srv_sys);
-	srv_sys = NULL;
+	{
+		mutex_free(&srv_sys->mutex);
+		mutex_free(&srv_sys->tasks_mutex);
+
+		for (ulint i = 0; i < srv_sys->n_sys_threads; ++i) {
+			srv_slot_t*	slot = &srv_sys->sys_threads[i];
+
+			os_event_destroy(slot->event);
+		}
+
+		os_event_destroy(srv_error_event);
+		os_event_destroy(srv_monitor_event);
+		os_event_destroy(srv_buf_dump_event);
+		os_event_destroy(buf_flush_event);
+	}
+
+	os_event_destroy(srv_buf_resize_event);
+
+#ifdef UNIV_DEBUG
+	os_event_destroy(srv_master_thread_disabled_event);
+	srv_master_thread_disabled_event = NULL;
+#endif /* UNIV_DEBUG */
 
 	trx_i_s_cache_free(trx_i_s_cache);
 
-	if (!srv_read_only_mode) {
-		os_event_free(srv_buf_dump_event);
-		srv_buf_dump_event = NULL;
-	}
+	ut_free(srv_sys);
+
+	srv_sys = 0;
 }
 
 /*********************************************************************//**
 Initializes the synchronization primitives, memory system, and the thread
 local storage. */
-UNIV_INTERN
 void
 srv_general_init(void)
 /*==================*/
 {
-	ut_mem_init();
+	sync_check_init();
 	/* Reset the system variables in the recovery module. */
 	recv_sys_var_init();
-	os_sync_init();
-	sync_init();
-	mem_init(srv_mem_pool_size);
+	os_thread_init();
+	trx_pool_init();
 	que_init();
 	row_mysql_init();
 }
@@ -1131,29 +1177,19 @@ void
 srv_normalize_init_values(void)
 /*===========================*/
 {
-	ulint	n;
-	ulint	i;
+	srv_sys_space.normalize();
 
-	n = srv_n_data_files;
+	srv_tmp_space.normalize();
 
-	for (i = 0; i < n; i++) {
-		srv_data_file_sizes[i] = srv_data_file_sizes[i]
-			* ((1024 * 1024) / UNIV_PAGE_SIZE);
-	}
+	srv_log_file_size /= UNIV_PAGE_SIZE;
 
-	srv_last_file_size_max = srv_last_file_size_max
-		* ((1024 * 1024) / UNIV_PAGE_SIZE);
-
-	srv_log_file_size = srv_log_file_size / UNIV_PAGE_SIZE;
-
-	srv_log_buffer_size = srv_log_buffer_size / UNIV_PAGE_SIZE;
+	srv_log_buffer_size /= UNIV_PAGE_SIZE;
 
 	srv_lock_table_size = 5 * (srv_buf_pool_size / UNIV_PAGE_SIZE);
 }
 
 /*********************************************************************//**
 Boots the InnoDB server. */
-UNIV_INTERN
 void
 srv_boot(void)
 /*==========*/
@@ -1171,7 +1207,6 @@ srv_boot(void)
 	/* Initialize this module */
 
 	srv_init();
-	srv_mon_create();
 }
 
 /******************************************************************//**
@@ -1211,7 +1246,6 @@ srv_refresh_innodb_monitor_stats(void)
 Outputs to a file the output of the InnoDB Monitor.
 @return FALSE if not all information printed
 due to failure to obtain necessary mutex */
-UNIV_INTERN
 ibool
 srv_printf_innodb_monitor(
 /*======================*/
@@ -1258,6 +1292,7 @@ srv_printf_innodb_monitor(
 	fputs("----------\n"
 	      "SEMAPHORES\n"
 	      "----------\n", file);
+
 	sync_print(file);
 
 	/* Conceptually, srv_innodb_monitor_mutex has a very high latching
@@ -1318,7 +1353,11 @@ srv_printf_innodb_monitor(
 	      "-------------------------------------\n", file);
 	ibuf_print(file);
 
-	ha_print_info(file, btr_search_sys->hash_index);
+	for (ulint i = 0; i < btr_ahi_parts; ++i) {
+		rw_lock_s_lock(btr_search_latches[i]);
+		ha_print_info(file, btr_search_sys->hash_tables[i]);
+		rw_lock_s_unlock(btr_search_latches[i]);
+	}
 
 	fprintf(file,
 		"%.2f hash searches/s, %.2f non-hash searches/s\n",
@@ -1338,11 +1377,9 @@ srv_printf_innodb_monitor(
 	      "BUFFER POOL AND MEMORY\n"
 	      "----------------------\n", file);
 	fprintf(file,
-		"Total memory allocated " ULINTPF
-		"; in additional pool allocated " ULINTPF "\n",
-		ut_total_allocated_memory,
-		mem_pool_get_reserved(mem_comm_pool));
-	fprintf(file, "Dictionary memory allocated " ULINTPF "\n",
+		"Total large memory allocated " ULINTPF "\n"
+		"Dictionary memory allocated " ULINTPF "\n",
+		os_total_large_mem_allocated,
 		dict_sys->size);
 
 	buf_print_io(file);
@@ -1350,35 +1387,35 @@ srv_printf_innodb_monitor(
 	fputs("--------------\n"
 	      "ROW OPERATIONS\n"
 	      "--------------\n", file);
-	fprintf(file, "%ld queries inside InnoDB, %lu queries in queue\n",
-		(long) srv_conc_get_active_threads(),
+	fprintf(file,
+		ULINTPF " queries inside InnoDB, "
+		ULINTPF " queries in queue\n",
+		srv_conc_get_active_threads(),
 		srv_conc_get_waiting_threads());
 
 	/* This is a dirty read, without holding trx_sys->mutex. */
 	fprintf(file, "%lu read views open inside InnoDB\n",
-		UT_LIST_GET_LEN(trx_sys->view_list));
+		trx_sys->mvcc->size());
 
 	n_reserved = fil_space_get_n_reserved_extents(0);
 	if (n_reserved > 0) {
 		fprintf(file,
 			"%lu tablespace extents now reserved for"
 			" B-tree split operations\n",
-			(ulong) n_reserved);
+			n_reserved);
 	}
 
-#ifdef UNIV_LINUX
-	fprintf(file, "Main thread process no. %lu, id %lu, state: %s\n",
-		(ulong) srv_main_thread_process_no,
-		(ulong) srv_main_thread_id,
+	fprintf(file,
+		"Process ID=" ULINTPF
+		", Main thread ID=" ULINTPF
+		", state: %s\n",
+		srv_main_thread_process_no,
+		srv_main_thread_id,
 		srv_main_thread_op_info);
-#else
-	fprintf(file, "Main thread id %lu, state: %s\n",
-		(ulong) srv_main_thread_id,
-		srv_main_thread_op_info);
-#endif
 	fprintf(file,
 		"Number of rows inserted " ULINTPF
-		", updated " ULINTPF ", deleted " ULINTPF
+		", updated " ULINTPF
+		", deleted " ULINTPF
 		", read " ULINTPF "\n",
 		(ulint) srv_stats.n_rows_inserted,
 		(ulint) srv_stats.n_rows_updated,
@@ -1434,7 +1471,6 @@ srv_printf_innodb_monitor(
 
 /******************************************************************//**
 Function to pass InnoDB status variables to MySQL */
-UNIV_INTERN
 void
 srv_export_innodb_status(void)
 /*==========================*/
@@ -1524,6 +1560,7 @@ srv_export_innodb_status(void)
 #else
 	export_vars.innodb_have_atomic_builtins = 0;
 #endif
+
 	export_vars.innodb_page_size = UNIV_PAGE_SIZE;
 
 	export_vars.innodb_log_waits = srv_stats.log_waits;
@@ -1621,10 +1658,12 @@ srv_export_innodb_status(void)
 
 #ifdef UNIV_DEBUG
 	rw_lock_s_lock(&purge_sys->latch);
+	trx_id_t	up_limit_id;
 	trx_id_t	done_trx_no	= purge_sys->done.trx_no;
-	trx_id_t	up_limit_id	= purge_sys->view
-		? purge_sys->view->up_limit_id
-		: 0;
+
+	up_limit_id	= purge_sys->view_active
+		? purge_sys->view.up_limit_id() : 0;
+
 	rw_lock_s_unlock(&purge_sys->latch);
 
 	mutex_enter(&trx_sys->mutex);
@@ -1681,8 +1720,8 @@ srv_export_innodb_status(void)
 
 /*********************************************************************//**
 A thread which prints the info output by various InnoDB monitors.
-@return	a dummy parameter */
-extern "C" UNIV_INTERN
+@return a dummy parameter */
+extern "C"
 os_thread_ret_t
 DECLARE_THREAD(srv_monitor_thread)(
 /*===============================*/
@@ -1690,20 +1729,20 @@ DECLARE_THREAD(srv_monitor_thread)(
 			/*!< in: a dummy parameter required by
 			os_thread_create */
 {
-	ib_int64_t	sig_count;
+	int64_t		sig_count;
 	double		time_elapsed;
 	time_t		current_time;
+	time_t		last_monitor_time;
 	time_t		last_table_monitor_time;
 	time_t		last_tablespace_monitor_time;
-	time_t		last_monitor_time;
 	ulint		mutex_skipped;
 	ibool		last_srv_print_monitor;
 
 	ut_ad(!srv_read_only_mode);
 
 #ifdef UNIV_DEBUG_THREAD_CREATION
-	fprintf(stderr, "Lock timeout thread starts, id %lu\n",
-		os_thread_pf(os_thread_get_curr_id()));
+	ib::info() << "Lock timeout thread starts, id "
+		<< os_thread_pf(os_thread_get_curr_id());
 #endif /* UNIV_DEBUG_THREAD_CREATION */
 
 #ifdef UNIV_PFS_THREAD
@@ -1791,9 +1830,10 @@ loop:
 			      "========================\n",
 			      stderr);
 
-			fsp_print(0);
-			fputs("Validating tablespace\n", stderr);
-			fsp_validate(0);
+			// JAN: TODO: MySQL 5.7
+			//fsp_print(0);
+			//fputs("Validating tablespace\n", stderr);
+			//fsp_validate(0);
 			fputs("Validation ok\n"
 			      "---------------------------------------\n"
 			      "END OF INNODB TABLESPACE MONITOR OUTPUT\n"
@@ -1806,8 +1846,8 @@ loop:
 
 			last_table_monitor_time = ut_time();
 
-			fprintf(stderr, "Warning: %s\n",
-				DEPRECATED_MSG_INNODB_TABLE_MONITOR);
+			// fprintf(stderr, "Warning: %s\n",
+			//	DEPRECATED_MSG_INNODB_TABLE_MONITOR);
 
 			fputs("===========================================\n",
 			      stderr);
@@ -1817,15 +1857,15 @@ loop:
 			fputs(" INNODB TABLE MONITOR OUTPUT\n"
 			      "===========================================\n",
 			      stderr);
-			dict_print();
+			// dict_print();
 
 			fputs("-----------------------------------\n"
 			      "END OF INNODB TABLE MONITOR OUTPUT\n"
 			      "==================================\n",
 			      stderr);
 
-			fprintf(stderr, "Warning: %s\n",
-				DEPRECATED_MSG_INNODB_TABLE_MONITOR);
+			//fprintf(stderr, "Warning: %s\n",
+			//	DEPRECATED_MSG_INNODB_TABLE_MONITOR);
 		}
 	}
 
@@ -1848,7 +1888,7 @@ exit_func:
 	/* We count the number of threads in os_thread_exit(). A created
 	thread should always use that to exit and not use return() to exit. */
 
-	os_thread_exit(NULL);
+	os_thread_exit();
 
 	OS_THREAD_DUMMY_RETURN;
 }
@@ -1858,8 +1898,8 @@ A thread which prints warnings about semaphore waits which have lasted
 too long. These can be used to track bugs which cause hangs.
 Note: In order to make sync_arr_wake_threads_if_sema_free work as expected,
 we should avoid waiting any mutexes in this function!
-@return	a dummy parameter */
-extern "C" UNIV_INTERN
+@return a dummy parameter */
+extern "C"
 os_thread_ret_t
 DECLARE_THREAD(srv_error_monitor_thread)(
 /*=====================================*/
@@ -1871,7 +1911,7 @@ DECLARE_THREAD(srv_error_monitor_thread)(
 	ulint		fatal_cnt	= 0;
 	lsn_t		old_lsn;
 	lsn_t		new_lsn;
-	ib_int64_t	sig_count;
+	int64_t		sig_count;
 	/* longest waiting thread for a semaphore */
 	os_thread_id_t	waiter		= os_thread_get_curr_id();
 	os_thread_id_t	old_waiter	= waiter;
@@ -1884,8 +1924,8 @@ DECLARE_THREAD(srv_error_monitor_thread)(
 	old_lsn = srv_start_lsn;
 
 #ifdef UNIV_DEBUG_THREAD_CREATION
-	fprintf(stderr, "Error monitor thread starts, id %lu\n",
-		os_thread_pf(os_thread_get_curr_id()));
+	ib::info() << "Error monitor thread starts, id "
+		<< os_thread_pf(os_thread_get_curr_id());
 #endif /* UNIV_DEBUG_THREAD_CREATION */
 
 #ifdef UNIV_PFS_THREAD
@@ -1899,14 +1939,10 @@ loop:
 
 	if (log_peek_lsn(&new_lsn)) {
 		if (new_lsn < old_lsn) {
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				"  InnoDB: Error: old log sequence number " LSN_PF
-				" was greater\n"
-				"InnoDB: than the new log sequence number " LSN_PF "!\n"
-				"InnoDB: Please submit a bug report"
-				" to http://bugs.mysql.com\n",
-				old_lsn, new_lsn);
+		ib::error() << "Old log sequence number " << old_lsn << " was"
+			<< " greater than the new log sequence number "
+			<< new_lsn << ". Please submit a bug report to"
+			" http://bugs.mysql.com";
 			ut_ad(0);
 		}
 
@@ -1933,7 +1969,9 @@ loop:
 	if (sync_array_print_long_waits(&waiter, &sema)
 	    && sema == old_sema && os_thread_eq(waiter, old_waiter)) {
 #if defined(WITH_WSREP) && defined(WITH_INNODB_DISALLOW_WRITES)
-	  if (srv_allow_writes_event->is_set) {
+		if (true) {
+			// JAN: TODO: MySQL 5.7
+		//if (srv_allow_writes_event->is_set) {
 #endif /* WITH_WSREP */
 		fatal_cnt++;
 #if defined(WITH_WSREP) && defined(WITH_INNODB_DISALLOW_WRITES)
@@ -1947,15 +1985,10 @@ loop:
 	  }
 #endif /* WITH_WSREP */
 		if (fatal_cnt > 10) {
-
-			fprintf(stderr,
-				"InnoDB: Error: semaphore wait has lasted"
-				" > %lu seconds\n"
-				"InnoDB: We intentionally crash the server,"
-				" because it appears to be hung.\n",
-				(ulong) srv_fatal_semaphore_wait_threshold);
-
-			ut_error;
+			ib::fatal() << "Semaphore wait has lasted > "
+				<< srv_fatal_semaphore_wait_threshold
+				<< " seconds. We intentionally crash the"
+				" server because it appears to be hung.";
 		}
 	} else {
 		fatal_cnt = 0;
@@ -1982,14 +2015,13 @@ loop:
 	/* We count the number of threads in os_thread_exit(). A created
 	thread should always use that to exit and not use return() to exit. */
 
-	os_thread_exit(NULL);
+	os_thread_exit();
 
 	OS_THREAD_DUMMY_RETURN;
 }
 
 /******************************************************************//**
 Increment the server activity count. */
-UNIV_INTERN
 void
 srv_inc_activity_count(void)
 /*========================*/
@@ -2002,7 +2034,6 @@ Check whether any background thread is active. If so return the thread
 type.
 @return SRV_NONE if all are suspended or have exited, thread
 type if any are still active. */
-UNIV_INTERN
 srv_thread_type
 srv_get_active_thread_type(void)
 /*============================*/
@@ -2040,7 +2071,6 @@ srv_get_active_thread_type(void)
 Check whether any background thread are active. If so print which thread
 is active. Send the threads wakeup signal.
 @return name of thread that is active or NULL */
-UNIV_INTERN
 const char*
 srv_any_background_threads_are_active(void)
 /*=======================================*/
@@ -2048,7 +2078,11 @@ srv_any_background_threads_are_active(void)
 	const char*	thread_active = NULL;
 
 	if (srv_read_only_mode) {
-		return(NULL);
+		if (srv_buf_resize_thread_active) {
+			thread_active = "buf_resize_thread";
+		}
+		os_event_set(srv_buf_resize_event);
+		return(thread_active);
 	} else if (srv_error_monitor_active) {
 		thread_active = "srv_error_monitor_thread";
 	} else if (lock_sys->timeout_thread_active) {
@@ -2057,6 +2091,8 @@ srv_any_background_threads_are_active(void)
 		thread_active = "srv_monitor_thread";
 	} else if (srv_buf_dump_thread_active) {
 		thread_active = "buf_dump_thread";
+	} else if (srv_buf_resize_thread_active) {
+		thread_active = "buf_resize_thread";
 	} else if (srv_dict_stats_thread_active) {
 		thread_active = "dict_stats_thread";
 	} else if (srv_scrub_log && srv_log_scrub_thread_active) {
@@ -2068,8 +2104,11 @@ srv_any_background_threads_are_active(void)
 	os_event_set(srv_buf_dump_event);
 	os_event_set(lock_sys->timeout_event);
 	os_event_set(dict_stats_event);
-	if (srv_scrub_log)
+	os_event_set(srv_buf_resize_event);
+
+	if (srv_scrub_log) {
 		os_event_set(log_scrub_event);
+	}
 
 	return(thread_active);
 }
@@ -2080,15 +2119,11 @@ and wakes up the master thread if it is suspended (not sleeping). Used
 in the MySQL interface. Note that there is a small chance that the master
 thread stays suspended (we do not protect our operation with the
 srv_sys_t->mutex, for performance reasons). */
-UNIV_INTERN
 void
-srv_active_wake_master_thread(void)
+srv_active_wake_master_thread_low()
 /*===============================*/
 {
-	if (srv_read_only_mode) {
-		return;
-	}
-
+	ut_ad(!srv_read_only_mode);
 	ut_ad(!srv_sys_mutex_own());
 
 	srv_inc_activity_count();
@@ -2125,7 +2160,6 @@ and wakes up the purge thread if it is suspended (not sleeping).  Note
 that there is a small chance that the purge thread stays suspended
 (we do not protect our check with the srv_sys_t:mutex and the
 purge_sys->latch, for performance reasons). */
-UNIV_INTERN
 void
 srv_wake_purge_thread_if_not_active(void)
 /*=====================================*/
@@ -2141,7 +2175,6 @@ srv_wake_purge_thread_if_not_active(void)
 
 /*******************************************************************//**
 Wakes up the master thread if it is suspended or being suspended. */
-UNIV_INTERN
 void
 srv_wake_master_thread(void)
 /*========================*/
@@ -2157,7 +2190,6 @@ srv_wake_master_thread(void)
 Get current server activity count. We don't hold srv_sys::mutex while
 reading this value as it is only used in heuristics.
 @return activity count. */
-UNIV_INTERN
 ulint
 srv_get_activity_count(void)
 /*========================*/
@@ -2168,7 +2200,6 @@ srv_get_activity_count(void)
 /*******************************************************************//**
 Check if there has been any activity.
 @return FALSE if no change in activity counter. */
-UNIV_INTERN
 ibool
 srv_check_activity(
 /*===============*/
@@ -2192,7 +2223,7 @@ srv_sync_log_buffer_in_background(void)
 	srv_main_thread_op_info = "flushing log";
 	if (difftime(current_time, srv_last_log_flush_time)
 	    >= srv_flush_log_at_timeout) {
-		log_buffer_sync_in_background(TRUE);
+		log_buffer_sync_in_background(true);
 		srv_last_log_flush_time = current_time;
 		srv_log_writes_and_flush++;
 	}
@@ -2209,7 +2240,7 @@ srv_master_evict_from_table_cache(
 {
 	ulint	n_tables_evicted = 0;
 
-	rw_lock_x_lock(&dict_operation_lock);
+	rw_lock_x_lock(dict_operation_lock);
 
 	dict_mutex_enter_for_mysql();
 
@@ -2218,7 +2249,7 @@ srv_master_evict_from_table_cache(
 
 	dict_mutex_exit_for_mysql();
 
-	rw_lock_x_unlock(&dict_operation_lock);
+	rw_lock_x_unlock(dict_operation_lock);
 
 	return(n_tables_evicted);
 }
@@ -2247,25 +2278,74 @@ srv_shutdown_print_master_pending(
 		*last_print_time = ut_time();
 
 		if (n_tables_to_drop) {
-			ut_print_timestamp(stderr);
-			fprintf(stderr, "  InnoDB: Waiting for "
-				"%lu table(s) to be dropped\n",
-				(ulong) n_tables_to_drop);
+			ib::info() << "Waiting for " << n_tables_to_drop
+				<< " table(s) to be dropped";
 		}
 
 		/* Check change buffer merge, we only wait for change buffer
 		merge if it is a slow shutdown */
 		if (!srv_fast_shutdown && n_bytes_merged) {
-			ut_print_timestamp(stderr);
-			fprintf(stderr, "  InnoDB: Waiting for change "
-				"buffer merge to complete\n"
-				"  InnoDB: number of bytes of change buffer "
-				"just merged:  %lu\n",
-				n_bytes_merged);
+			ib::info() << "Waiting for change buffer merge to"
+				" complete number of bytes of change buffer"
+				" just merged: " << n_bytes_merged;
 		}
 	}
 }
 
+#ifdef UNIV_DEBUG
+/** Waits in loop as long as master thread is disabled (debug) */
+static
+void
+srv_master_do_disabled_loop(void)
+{
+	if (!srv_master_thread_disabled_debug) {
+		/* We return here to avoid changing op_info. */
+		return;
+	}
+
+	srv_main_thread_op_info = "disabled";
+
+	while (srv_master_thread_disabled_debug) {
+		os_event_set(srv_master_thread_disabled_event);
+		if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
+			break;
+		}
+		os_thread_sleep(100000);
+	}
+
+	srv_main_thread_op_info = "";
+}
+
+/** Disables master thread. It's used by:
+	SET GLOBAL innodb_master_thread_disabled_debug = 1 (0).
+@param[in]	thd		thread handle
+@param[in]	var		pointer to system variable
+@param[out]	var_ptr		where the formal string goes
+@param[in]	save		immediate result from check function */
+void
+srv_master_thread_disabled_debug_update(
+	THD*				thd,
+	struct st_mysql_sys_var*	var,
+	void*				var_ptr,
+	const void*			save)
+{
+	/* This method is protected by mutex, as every SET GLOBAL .. */
+	ut_ad(srv_master_thread_disabled_event != NULL);
+
+	const bool disable = *static_cast<const my_bool*>(save);
+
+	const int64_t sig_count = os_event_reset(
+		srv_master_thread_disabled_event);
+
+	srv_master_thread_disabled_debug = disable;
+
+	if (disable) {
+		os_event_wait_low(
+			srv_master_thread_disabled_event, sig_count);
+	}
+}
+#endif /* UNIV_DEBUG */
+
 /*********************************************************************//**
 Perform the tasks that the master thread is supposed to do when the
 server is active. There are two types of tasks. The first category is
@@ -2279,8 +2359,7 @@ srv_master_do_active_tasks(void)
 /*============================*/
 {
 	ib_time_t	cur_time = ut_time();
-	ullint		counter_time = ut_time_us(NULL);
-	ulint		n_evicted = 0;
+	uintmax_t	counter_time = ut_time_us(NULL);
 
 	/* First do the tasks that we are suppose to do at each
 	invocation of this function. */
@@ -2297,6 +2376,8 @@ srv_master_do_active_tasks(void)
 	MONITOR_INC_TIME_IN_MICRO_SECS(
 		MONITOR_SRV_BACKGROUND_DROP_TABLE_MICROSECOND, counter_time);
 
+	ut_d(srv_master_do_disabled_loop());
+
 	if (srv_shutdown_state > 0) {
 		return;
 	}
@@ -2322,15 +2403,6 @@ srv_master_do_active_tasks(void)
 	/* Now see if various tasks that are performed at defined
 	intervals need to be performed. */
 
-#ifdef MEM_PERIODIC_CHECK
-	/* Check magic numbers of every allocated mem block once in
-	SRV_MASTER_MEM_VALIDATE_INTERVAL seconds */
-	if (cur_time % SRV_MASTER_MEM_VALIDATE_INTERVAL == 0) {
-		mem_validate_all_blocks();
-		MONITOR_INC_TIME_IN_MICRO_SECS(
-			MONITOR_SRV_MEM_VALIDATE_MICROSECOND, counter_time);
-	}
-#endif
 	if (srv_shutdown_state > 0) {
 		return;
 	}
@@ -2341,9 +2413,11 @@ srv_master_do_active_tasks(void)
 
 	if (cur_time % SRV_MASTER_DICT_LRU_INTERVAL == 0) {
 		srv_main_thread_op_info = "enforcing dict cache limit";
-		n_evicted = srv_master_evict_from_table_cache(50);
-		MONITOR_INC_VALUE(
-			MONITOR_SRV_DICT_LRU_EVICT_COUNT_ACTIVE, n_evicted);
+		ulint	n_evicted = srv_master_evict_from_table_cache(50);
+		if (n_evicted != 0) {
+			MONITOR_INC_VALUE(
+				MONITOR_SRV_DICT_LRU_EVICT_COUNT_ACTIVE, n_evicted);
+		}
 		MONITOR_INC_TIME_IN_MICRO_SECS(
 			MONITOR_SRV_DICT_LRU_MICROSECOND, counter_time);
 	}
@@ -2374,8 +2448,7 @@ void
 srv_master_do_idle_tasks(void)
 /*==========================*/
 {
-	ullint	counter_time;
-	ulint	n_evicted = 0;
+	uintmax_t	counter_time;
 
 	++srv_main_idle_loops;
 
@@ -2392,6 +2465,8 @@ srv_master_do_idle_tasks(void)
 		MONITOR_SRV_BACKGROUND_DROP_TABLE_MICROSECOND,
 			 counter_time);
 
+	ut_d(srv_master_do_disabled_loop());
+
 	if (srv_shutdown_state > 0) {
 		return;
 	}
@@ -2413,9 +2488,11 @@ srv_master_do_idle_tasks(void)
 	}
 
 	srv_main_thread_op_info = "enforcing dict cache limit";
-	n_evicted = srv_master_evict_from_table_cache(100);
-        MONITOR_INC_VALUE(
-                MONITOR_SRV_DICT_LRU_EVICT_COUNT_IDLE, n_evicted);
+	ulint	n_evicted = srv_master_evict_from_table_cache(100);
+	if (n_evicted != 0) {
+		MONITOR_INC_VALUE(
+			MONITOR_SRV_DICT_LRU_EVICT_COUNT_IDLE, n_evicted);
+	}
 	MONITOR_INC_TIME_IN_MICRO_SECS(
 		MONITOR_SRV_DICT_LRU_MICROSECOND, counter_time);
 
@@ -2517,8 +2594,8 @@ srv_master_sleep(void)
 
 /*********************************************************************//**
 The master thread controlling the server.
-@return	a dummy parameter */
-extern "C" UNIV_INTERN
+@return a dummy parameter */
+extern "C"
 os_thread_ret_t
 DECLARE_THREAD(srv_master_thread)(
 /*==============================*/
@@ -2526,6 +2603,9 @@ DECLARE_THREAD(srv_master_thread)(
 			/*!< in: a dummy parameter required by
 			os_thread_create */
 {
+	my_thread_init();
+	DBUG_ENTER("srv_master_thread");
+
 	srv_slot_t*	slot;
 	ulint		old_activity_count = srv_get_activity_count();
 	ib_time_t	last_print_time;
@@ -2533,8 +2613,8 @@ DECLARE_THREAD(srv_master_thread)(
 	ut_ad(!srv_read_only_mode);
 
 #ifdef UNIV_DEBUG_THREAD_CREATION
-	fprintf(stderr, "Master thread starts, id %lu\n",
-		os_thread_pf(os_thread_get_curr_id()));
+	ib::info() << "Master thread starts, id "
+		<< os_thread_pf(os_thread_get_curr_id());
 #endif /* UNIV_DEBUG_THREAD_CREATION */
 
 #ifdef UNIV_PFS_THREAD
@@ -2567,7 +2647,8 @@ loop:
 		}
 	}
 
-	while (srv_master_do_shutdown_tasks(&last_print_time)) {
+	while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS
+	       && srv_master_do_shutdown_tasks(&last_print_time)) {
 
 		/* Shouldn't loop here in case of very fast shutdown */
 		ut_ad(srv_fast_shutdown < 2);
@@ -2586,22 +2667,21 @@ suspend_thread:
 
 	os_event_wait(slot->event);
 
-	if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
-		os_thread_exit(NULL);
+	if (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS) {
+		goto loop;
 	}
 
-	goto loop;
-
-	OS_THREAD_DUMMY_RETURN;	/* Not reached, avoid compiler warning */
+	my_thread_end();
+	os_thread_exit();
+	DBUG_RETURN(0);
 }
 
-/*********************************************************************//**
+/**
 Check if purge should stop.
 @return true if it should shutdown. */
 static
 bool
 srv_purge_should_exit(
-/*==============*/
 	ulint		n_purged)	/*!< in: pages purged in last batch */
 {
 	switch (srv_shutdown_state) {
@@ -2624,7 +2704,7 @@ srv_purge_should_exit(
 
 /*********************************************************************//**
 Fetch and execute a task from the work queue.
-@return	true if a task was executed */
+@return true if a task was executed */
 static
 bool
 srv_task_execute(void)
@@ -2643,7 +2723,7 @@ srv_task_execute(void)
 
 		ut_a(que_node_get_type(thr->child) == QUE_NODE_PURGE);
 
-		UT_LIST_REMOVE(queue, srv_sys->tasks, thr);
+		UT_LIST_REMOVE(srv_sys->tasks, thr);
 	}
 
 	mutex_exit(&srv_sys->tasks_mutex);
@@ -2653,7 +2733,7 @@ srv_task_execute(void)
 		que_run_threads(thr);
 
 		os_atomic_inc_ulint(
-			&purge_sys->bh_mutex, &purge_sys->n_completed, 1);
+			&purge_sys->pq_mutex, &purge_sys->n_completed, 1);
 	}
 
 	return(thr != NULL);
@@ -2661,8 +2741,8 @@ srv_task_execute(void)
 
 /*********************************************************************//**
 Worker thread that reads tasks from the work queue and executes them.
-@return	a dummy parameter */
-extern "C" UNIV_INTERN
+@return a dummy parameter */
+extern "C"
 os_thread_ret_t
 DECLARE_THREAD(srv_worker_thread)(
 /*==============================*/
@@ -2673,11 +2753,13 @@ DECLARE_THREAD(srv_worker_thread)(
 
 	ut_ad(!srv_read_only_mode);
 	ut_a(srv_force_recovery < SRV_FORCE_NO_BACKGROUND);
+	my_thread_init();
+	// JAN: TODO: MySQL 5.7
+	// THD *thd= create_thd(false, true, true);
 
 #ifdef UNIV_DEBUG_THREAD_CREATION
-	ut_print_timestamp(stderr);
-	fprintf(stderr, " InnoDB: worker thread starting, id %lu\n",
-		os_thread_pf(os_thread_get_curr_id()));
+	ib::info() << "Worker thread starting, id "
+		<< os_thread_pf(os_thread_get_curr_id());
 #endif /* UNIV_DEBUG_THREAD_CREATION */
 
 	slot = srv_reserve_slot(SRV_WORKER);
@@ -2722,14 +2804,16 @@ DECLARE_THREAD(srv_worker_thread)(
 	rw_lock_x_unlock(&purge_sys->latch);
 
 #ifdef UNIV_DEBUG_THREAD_CREATION
-	ut_print_timestamp(stderr);
-	fprintf(stderr, " InnoDB: Purge worker thread exiting, id %lu\n",
-		os_thread_pf(os_thread_get_curr_id()));
+	ib::info() << "Purge worker thread exiting, id "
+		<< os_thread_pf(os_thread_get_curr_id());
 #endif /* UNIV_DEBUG_THREAD_CREATION */
 
+	// JAN: TODO: MySQL 5.7
+	// destroy_thd(thd);
+        my_thread_end();
 	/* We count the number of threads in os_thread_exit(). A created
 	thread should always use that to exit and not use return() to exit. */
-	os_thread_exit(NULL);
+	os_thread_exit();
 
 	OS_THREAD_DUMMY_RETURN;	/* Not reached, avoid compiler warning */
 }
@@ -2798,9 +2882,16 @@ srv_do_purge(
 			break;
 		}
 
+		ulint	undo_trunc_freq =
+			purge_sys->undo_trunc.get_rseg_truncate_frequency();
+
+		ulint	rseg_truncate_frequency = ut_min(
+			static_cast<ulint>(srv_purge_rseg_truncate_frequency),
+			undo_trunc_freq);
+
 		n_pages_purged = trx_purge(
 			n_use_threads, srv_purge_batch_size,
-			(++count % TRX_SYS_N_RSEGS) == 0);
+			(++count % rseg_truncate_frequency) == 0);
 
 		*n_total_purged += n_pages_purged;
 
@@ -2830,7 +2921,7 @@ srv_purge_coordinator_suspend(
 	/** Maximum wait time on the purge event, in micro-seconds. */
 	static const ulint SRV_PURGE_MAX_TIMEOUT = 10000;
 
-	ib_int64_t	sig_count = srv_suspend_thread(slot);
+	int64_t		sig_count = srv_suspend_thread(slot);
 
 	do {
 		ulint		ret;
@@ -2919,14 +3010,17 @@ srv_purge_coordinator_suspend(
 
 /*********************************************************************//**
 Purge coordinator thread that schedules the purge tasks.
-@return	a dummy parameter */
-extern "C" UNIV_INTERN
+@return a dummy parameter */
+extern "C"
 os_thread_ret_t
 DECLARE_THREAD(srv_purge_coordinator_thread)(
 /*=========================================*/
 	void*	arg MY_ATTRIBUTE((unused)))	/*!< in: a dummy parameter
 						required by os_thread_create */
 {
+	my_thread_init();
+	// JAN: TODO: MySQL 5.7
+	// THD *thd= create_thd(false, true, true);
 	srv_slot_t*	slot;
 	ulint           n_total_purged = ULINT_UNDEFINED;
 
@@ -2947,9 +3041,8 @@ DECLARE_THREAD(srv_purge_coordinator_thread)(
 #endif /* UNIV_PFS_THREAD */
 
 #ifdef UNIV_DEBUG_THREAD_CREATION
-	ut_print_timestamp(stderr);
-	fprintf(stderr, " InnoDB: Purge coordinator thread created, id %lu\n",
-		os_thread_pf(os_thread_get_curr_id()));
+	ib::info() << "Purge coordinator thread created, id "
+		<< os_thread_pf(os_thread_get_curr_id());
 #endif /* UNIV_DEBUG_THREAD_CREATION */
 
 	slot = srv_reserve_slot(SRV_PURGE);
@@ -2993,6 +3086,12 @@ DECLARE_THREAD(srv_purge_coordinator_thread)(
 		n_pages_purged = trx_purge(1, srv_purge_batch_size, false);
 	}
 
+#ifdef UNIV_DEBUG
+	if (srv_fast_shutdown == 0) {
+		trx_commit_disallowed = true;
+	}
+#endif /* UNIV_DEBUG */
+
 	/* This trx_purge is called to remove any undo records (added by
 	background threads) after completion of the above loop. When
 	srv_fast_shutdown != 0, a large batch size can cause significant
@@ -3017,14 +3116,17 @@ DECLARE_THREAD(srv_purge_coordinator_thread)(
 
 	purge_sys->state = PURGE_STATE_EXIT;
 
+	/* If there are any pending undo-tablespace truncate then clear
+	it off as we plan to shutdown the purge thread. */
+	purge_sys->undo_trunc.clear();
+
 	purge_sys->running = false;
 
 	rw_lock_x_unlock(&purge_sys->latch);
 
 #ifdef UNIV_DEBUG_THREAD_CREATION
-	ut_print_timestamp(stderr);
-	fprintf(stderr, " InnoDB: Purge coordinator exiting, id %lu\n",
-		os_thread_pf(os_thread_get_curr_id()));
+	ib::info() << "Purge coordinator exiting, id "
+		<< os_thread_pf(os_thread_get_curr_id());
 #endif /* UNIV_DEBUG_THREAD_CREATION */
 
 	/* Ensure that all the worker threads quit. */
@@ -3032,9 +3134,12 @@ DECLARE_THREAD(srv_purge_coordinator_thread)(
 		srv_release_threads(SRV_WORKER, srv_n_purge_threads - 1);
 	}
 
+	// JAN: TODO: MYSQL 5.7
+	// destroy_thd(thd);
+	my_thread_end();
 	/* We count the number of threads in os_thread_exit(). A created
 	thread should always use that to exit and not use return() to exit. */
-	os_thread_exit(NULL);
+	os_thread_exit();
 
 	OS_THREAD_DUMMY_RETURN;	/* Not reached, avoid compiler warning */
 }
@@ -3042,7 +3147,6 @@ DECLARE_THREAD(srv_purge_coordinator_thread)(
 /**********************************************************************//**
 Enqueues a task to server task queue and releases a worker thread, if there
 is a suspended one. */
-UNIV_INTERN
 void
 srv_que_task_enqueue_low(
 /*=====================*/
@@ -3051,7 +3155,7 @@ srv_que_task_enqueue_low(
 	ut_ad(!srv_read_only_mode);
 	mutex_enter(&srv_sys->tasks_mutex);
 
-	UT_LIST_ADD_LAST(queue, srv_sys->tasks, thr);
+	UT_LIST_ADD_LAST(srv_sys->tasks, thr);
 
 	mutex_exit(&srv_sys->tasks_mutex);
 
@@ -3060,8 +3164,7 @@ srv_que_task_enqueue_low(
 
 /**********************************************************************//**
 Get count of tasks in the queue.
-@return number of tasks in queue  */
-UNIV_INTERN
+@return number of tasks in queue */
 ulint
 srv_get_task_queue_length(void)
 /*===========================*/
@@ -3081,7 +3184,6 @@ srv_get_task_queue_length(void)
 
 /**********************************************************************//**
 Wakeup the purge threads. */
-UNIV_INTERN
 void
 srv_purge_wakeup(void)
 /*==================*/
@@ -3100,3 +3202,58 @@ srv_purge_wakeup(void)
 	}
 }
 
+/** Check if tablespace is being truncated.
+(Ignore system-tablespace as we don't re-create the tablespace
+and so some of the action that are suppressed by this function
+for independent tablespace are not applicable to system-tablespace).
+@param	space_id	space_id to check for truncate action
+@return true		if being truncated, false if not being
+			truncated or tablespace is system-tablespace. */
+bool
+srv_is_tablespace_truncated(ulint space_id)
+{
+	if (is_system_tablespace(space_id)) {
+		return(false);
+	}
+
+	return(truncate_t::is_tablespace_truncated(space_id)
+	       || undo::Truncate::is_tablespace_truncated(space_id));
+
+}
+
+/** Check if tablespace was truncated.
+@param[in]	space	space object to check for truncate action
+@return true if tablespace was truncated and we still have an active
+MLOG_TRUNCATE REDO log record. */
+bool
+srv_was_tablespace_truncated(const fil_space_t* space)
+{
+	if (space == NULL) {
+		ut_ad(0);
+		return(false);
+	}
+
+	bool	has_shared_space = FSP_FLAGS_GET_SHARED(space->flags);
+
+	if (is_system_tablespace(space->id) || has_shared_space) {
+		return(false);
+	}
+
+	return(truncate_t::was_tablespace_truncated(space->id));
+}
+
+/** Call exit(3) */
+void
+srv_fatal_error()
+{
+
+	ib::error() << "Cannot continue operation.";
+
+	fflush(stderr);
+
+	ut_d(innodb_calling_exit = true);
+
+	srv_shutdown_all_bg_threads();
+
+	exit(3);
+}
diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc
index 0b81ad86f1c..baf9c0183c3 100644
--- a/storage/innobase/srv/srv0start.cc
+++ b/storage/innobase/srv/srv0start.cc
@@ -39,8 +39,14 @@ Starts the InnoDB database server
 Created 2/16/1996 Heikki Tuuri
 *************************************************************************/
 
+#include "my_global.h"
+
+#include "ha_prototypes.h"
+
 #include "mysqld.h"
-#include "pars0pars.h"
+#include "mysql/psi/mysql_stage.h"
+#include "mysql/psi/psi.h"
+
 #include "row0ftsort.h"
 #include "ut0mem.h"
 #include "ut0timer.h"
@@ -71,12 +77,13 @@ Created 2/16/1996 Heikki Tuuri
 #include "srv0srv.h"
 #include "btr0defragment.h"
 
+#include "fsp0sysspace.h"
+#include "row0trunc.h"
 #include <mysql/service_wsrep.h>
 
 #ifndef UNIV_HOTBACKUP
 # include "trx0rseg.h"
 # include "os0proc.h"
-# include "sync0sync.h"
 # include "buf0flu.h"
 # include "buf0rea.h"
 # include "buf0mtflu.h"
@@ -98,39 +105,63 @@ Created 2/16/1996 Heikki Tuuri
 # include "row0upd.h"
 # include "row0row.h"
 # include "row0mysql.h"
+# include "row0trunc.h"
 # include "btr0pcur.h"
-# include "os0sync.h"
+# include "os0event.h"
 # include "zlib.h"
 # include "ut0crc32.h"
 # include "btr0scrub.h"
+# include "ut0new.h"
+
+#ifdef HAVE_LZO1X
+#include <lzo/lzo1x.h>
+extern bool srv_lzo_disabled;
+#endif /* HAVE_LZO1X */
 
 /** Log sequence number immediately after startup */
-UNIV_INTERN lsn_t	srv_start_lsn;
+lsn_t	srv_start_lsn;
 /** Log sequence number at shutdown */
-UNIV_INTERN lsn_t	srv_shutdown_lsn;
-
-#ifdef HAVE_DARWIN_THREADS
-# include <sys/utsname.h>
-/** TRUE if the F_FULLFSYNC option is available */
-UNIV_INTERN ibool	srv_have_fullfsync = FALSE;
-#endif
+lsn_t	srv_shutdown_lsn;
 
 /** TRUE if a raw partition is in use */
-UNIV_INTERN ibool	srv_start_raw_disk_in_use = FALSE;
+ibool	srv_start_raw_disk_in_use = FALSE;
+
+/** Number of IO threads to use */
+ulint	srv_n_file_io_threads = 0;
 
 /** TRUE if the server is being started, before rolling back any
 incomplete transactions */
-UNIV_INTERN ibool	srv_startup_is_before_trx_rollback_phase = FALSE;
+bool	srv_startup_is_before_trx_rollback_phase = false;
 /** TRUE if the server is being started */
-UNIV_INTERN ibool	srv_is_being_started = FALSE;
+bool	srv_is_being_started = false;
+/** TRUE if SYS_TABLESPACES is available for lookups */
+bool	srv_sys_tablespaces_open = false;
 /** TRUE if the server was successfully started */
-UNIV_INTERN ibool	srv_was_started = FALSE;
+ibool	srv_was_started = FALSE;
 /** TRUE if innobase_start_or_create_for_mysql() has been called */
-static ibool		srv_start_has_been_called = FALSE;
+static ibool	srv_start_has_been_called = FALSE;
+
+/** Bit flags for tracking background thread creation. They are used to
+determine which threads need to be stopped if we need to abort during
+the initialisation step. */
+enum srv_start_state_t {
+	SRV_START_STATE_NONE = 0,		/*!< No thread started */
+	SRV_START_STATE_LOCK_SYS = 1,		/*!< Started lock-timeout
+						thread. */
+	SRV_START_STATE_IO = 2,			/*!< Started IO threads */
+	SRV_START_STATE_MONITOR = 4,		/*!< Started montior thread */
+	SRV_START_STATE_MASTER = 8,		/*!< Started master threadd. */
+	SRV_START_STATE_PURGE = 16,		/*!< Started purge thread(s) */
+	SRV_START_STATE_STAT = 32		/*!< Started bufdump + dict stat
+						and FTS optimize thread. */
+};
+
+/** Track server thrd starting phases */
+static ulint	srv_start_state;
 
 /** At a shutdown this value climbs from SRV_SHUTDOWN_NONE to
 SRV_SHUTDOWN_CLEANUP and then to SRV_SHUTDOWN_LAST_PHASE, and so on */
-UNIV_INTERN enum srv_shutdown_state	srv_shutdown_state = SRV_SHUTDOWN_NONE;
+enum srv_shutdown_t	srv_shutdown_state = SRV_SHUTDOWN_NONE;
 
 /** Files comprising the system tablespace */
 static os_file_t	files[1000];
@@ -146,29 +177,22 @@ void *mtflush_ctx=NULL;
 
 /** Thead handles */
 static os_thread_t	thread_handles[SRV_MAX_N_IO_THREADS + 6 + 32];
-static os_thread_t	buf_flush_page_cleaner_thread_handle;
 static os_thread_t	buf_dump_thread_handle;
 static os_thread_t	dict_stats_thread_handle;
+static os_thread_t	buf_flush_page_cleaner_thread_handle;
 /** Status variables, is thread started ?*/
 static bool		thread_started[SRV_MAX_N_IO_THREADS + 6 + 32] = {false};
-static bool		buf_flush_page_cleaner_thread_started = false;
 static bool		buf_dump_thread_started = false;
 static bool		dict_stats_thread_started = false;
-
-/** We use this mutex to test the return value of pthread_mutex_trylock
-   on successful locking. HP-UX does NOT return 0, though Linux et al do. */
-static os_fast_mutex_t	srv_os_test_mutex;
-
+static bool		buf_flush_page_cleaner_thread_started = false;
 /** Name of srv_monitor_file */
 static char*	srv_monitor_file_name;
 #endif /* !UNIV_HOTBACKUP */
 
-/** Default undo tablespace size in UNIV_PAGEs count (10MB). */
-static const ulint SRV_UNDO_TABLESPACE_SIZE_IN_PAGES =
-	((1024 * 1024) * 10) / UNIV_PAGE_SIZE_DEF;
+/** Minimum expected tablespace size. (10M) */
+static const ulint MIN_EXPECTED_TABLESPACE_SIZE = 5 * 1024 * 1024;
 
 /** */
-#define SRV_N_PENDING_IOS_PER_THREAD	OS_AIO_N_PENDING_IOS_PER_THREAD
 #define SRV_MAX_N_PENDING_SYNC_IOS	100
 
 /** The round off to MB is similar as done in srv_parse_megabytes() */
@@ -176,55 +200,40 @@ static const ulint SRV_UNDO_TABLESPACE_SIZE_IN_PAGES =
 				  ((1024 * 1024) / (UNIV_PAGE_SIZE))
 #ifdef UNIV_PFS_THREAD
 /* Keys to register InnoDB threads with performance schema */
-UNIV_INTERN mysql_pfs_key_t	io_handler_thread_key;
-UNIV_INTERN mysql_pfs_key_t	srv_lock_timeout_thread_key;
-UNIV_INTERN mysql_pfs_key_t	srv_error_monitor_thread_key;
-UNIV_INTERN mysql_pfs_key_t	srv_monitor_thread_key;
-UNIV_INTERN mysql_pfs_key_t	srv_master_thread_key;
-UNIV_INTERN mysql_pfs_key_t	srv_purge_thread_key;
+mysql_pfs_key_t	buf_dump_thread_key;
+mysql_pfs_key_t	dict_stats_thread_key;
+mysql_pfs_key_t	io_handler_thread_key;
+mysql_pfs_key_t	io_ibuf_thread_key;
+mysql_pfs_key_t	io_log_thread_key;
+mysql_pfs_key_t	io_read_thread_key;
+mysql_pfs_key_t	io_write_thread_key;
+mysql_pfs_key_t	srv_error_monitor_thread_key;
+mysql_pfs_key_t	srv_lock_timeout_thread_key;
+mysql_pfs_key_t	srv_master_thread_key;
+mysql_pfs_key_t	srv_monitor_thread_key;
+mysql_pfs_key_t	srv_purge_thread_key;
+mysql_pfs_key_t	srv_worker_thread_key;
 #endif /* UNIV_PFS_THREAD */
 
-/*********************************************************************//**
-Convert a numeric string that optionally ends in G or M or K, to a number
-containing megabytes.
-@return	next character in string */
-static
-char*
-srv_parse_megabytes(
-/*================*/
-	char*	str,	/*!< in: string containing a quantity in bytes */
-	ulint*	megs)	/*!< out: the number in megabytes */
+#ifdef HAVE_PSI_STAGE_INTERFACE
+/** Array of all InnoDB stage events for monitoring activities via
+performance schema. */
+static PSI_stage_info*	srv_stages[] =
 {
-	char*	endp;
-	ulint	size;
-
-	size = strtoul(str, &endp, 10);
-
-	str = endp;
-
-	switch (*str) {
-	case 'G': case 'g':
-		size *= 1024;
-		/* fall through */
-	case 'M': case 'm':
-		str++;
-		break;
-	case 'K': case 'k':
-		size /= 1024;
-		str++;
-		break;
-	default:
-		size /= 1024 * 1024;
-		break;
-	}
-
-	*megs = size;
-	return(str);
-}
+	&srv_stage_alter_table_end,
+	&srv_stage_alter_table_flush,
+	&srv_stage_alter_table_insert,
+	&srv_stage_alter_table_log_index,
+	&srv_stage_alter_table_log_table,
+	&srv_stage_alter_table_merge_sort,
+	&srv_stage_alter_table_read_pk_internal_sort,
+	&srv_stage_buffer_pool_load,
+};
+#endif /* HAVE_PSI_STAGE_INTERFACE */
 
 /*********************************************************************//**
 Check if a file can be opened in read-write mode.
-@return	true if it doesn't exist or can be opened in rw mode. */
+@return true if it doesn't exist or can be opened in rw mode. */
 static
 bool
 srv_file_check_mode(
@@ -235,14 +244,12 @@ srv_file_check_mode(
 
 	memset(&stat, 0x0, sizeof(stat));
 
-	dberr_t		err = os_file_get_status(name, &stat, true);
+	dberr_t		err = os_file_get_status(
+		name, &stat, true, srv_read_only_mode);
 
 	if (err == DB_FAIL) {
-
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"os_file_get_status() failed on '%s'. Can't determine "
-			"file permissions", name);
-
+		ib::error() << "os_file_get_status() failed on '" << name
+			<< "'. Can't determine file permissions.";
 		return(false);
 
 	} else if (err == DB_SUCCESS) {
@@ -252,20 +259,15 @@ srv_file_check_mode(
 		if (stat.type == OS_FILE_TYPE_FILE) {
 
 			if (!stat.rw_perm) {
-
-				ib_logf(IB_LOG_LEVEL_ERROR,
-					"%s can't be opened in %s mode",
-					name,
-					srv_read_only_mode
-					? "read" : "read-write");
-
+				const char*	mode = srv_read_only_mode
+					? "read" : "read-write";
+				ib::error() << name << " can't be opened in "
+					<< mode << " mode.";
 				return(false);
 			}
 		} else {
 			/* Not a regular file, bail out. */
-
-			ib_logf(IB_LOG_LEVEL_ERROR,
-				"'%s' not a regular file.", name);
+			ib::error() << "'" << name << "' not a regular file.";
 
 			return(false);
 		}
@@ -280,217 +282,11 @@ srv_file_check_mode(
 	return(true);
 }
 
-/*********************************************************************//**
-Reads the data files and their sizes from a character string given in
-the .cnf file.
-@return	TRUE if ok, FALSE on parse error */
-UNIV_INTERN
-ibool
-srv_parse_data_file_paths_and_sizes(
-/*================================*/
-	char*	str)	/*!< in/out: the data file path string */
-{
-	char*	input_str;
-	char*	path;
-	ulint	size;
-	ulint	i	= 0;
-
-	srv_auto_extend_last_data_file = FALSE;
-	srv_last_file_size_max = 0;
-	srv_data_file_names = NULL;
-	srv_data_file_sizes = NULL;
-	srv_data_file_is_raw_partition = NULL;
-
-	input_str = str;
-
-	/* First calculate the number of data files and check syntax:
-	path:size[M | G];path:size[M | G]... . Note that a Windows path may
-	contain a drive name and a ':'. */
-
-	while (*str != '\0') {
-		path = str;
-
-		while ((*str != ':' && *str != '\0')
-		       || (*str == ':'
-			   && (*(str + 1) == '\\' || *(str + 1) == '/'
-			       || *(str + 1) == ':'))) {
-			str++;
-		}
-
-		if (*str == '\0') {
-			return(FALSE);
-		}
-
-		str++;
-
-		str = srv_parse_megabytes(str, &size);
-
-		if (0 == strncmp(str, ":autoextend",
-				 (sizeof ":autoextend") - 1)) {
-
-			str += (sizeof ":autoextend") - 1;
-
-			if (0 == strncmp(str, ":max:",
-					 (sizeof ":max:") - 1)) {
-
-				str += (sizeof ":max:") - 1;
-
-				str = srv_parse_megabytes(str, &size);
-			}
-
-			if (*str != '\0') {
-
-				return(FALSE);
-			}
-		}
-
-		if (strlen(str) >= 6
-		    && *str == 'n'
-		    && *(str + 1) == 'e'
-		    && *(str + 2) == 'w') {
-			str += 3;
-		}
-
-		if (*str == 'r' && *(str + 1) == 'a' && *(str + 2) == 'w') {
-			str += 3;
-		}
-
-		if (size == 0) {
-			return(FALSE);
-		}
-
-		i++;
-
-		if (*str == ';') {
-			str++;
-		} else if (*str != '\0') {
-
-			return(FALSE);
-		}
-	}
-
-	if (i == 0) {
-		/* If innodb_data_file_path was defined it must contain
-		at least one data file definition */
-
-		return(FALSE);
-	}
-
-	srv_data_file_names = static_cast<char**>(
-		malloc(i * sizeof *srv_data_file_names));
-
-	srv_data_file_sizes = static_cast<ulint*>(
-		malloc(i * sizeof *srv_data_file_sizes));
-
-	srv_data_file_is_raw_partition = static_cast<ulint*>(
-		malloc(i * sizeof *srv_data_file_is_raw_partition));
-
-	srv_n_data_files = i;
-
-	/* Then store the actual values to our arrays */
-
-	str = input_str;
-	i = 0;
-
-	while (*str != '\0') {
-		path = str;
-
-		/* Note that we must step over the ':' in a Windows path;
-		a Windows path normally looks like C:\ibdata\ibdata1:1G, but
-		a Windows raw partition may have a specification like
-		\\.\C::1Gnewraw or \\.\PHYSICALDRIVE2:1Gnewraw */
-
-		while ((*str != ':' && *str != '\0')
-		       || (*str == ':'
-			   && (*(str + 1) == '\\' || *(str + 1) == '/'
-			       || *(str + 1) == ':'))) {
-			str++;
-		}
-
-		if (*str == ':') {
-			/* Make path a null-terminated string */
-			*str = '\0';
-			str++;
-		}
-
-		str = srv_parse_megabytes(str, &size);
-
-		srv_data_file_names[i] = path;
-		srv_data_file_sizes[i] = size;
-
-		if (0 == strncmp(str, ":autoextend",
-				 (sizeof ":autoextend") - 1)) {
-
-			srv_auto_extend_last_data_file = TRUE;
-
-			str += (sizeof ":autoextend") - 1;
-
-			if (0 == strncmp(str, ":max:",
-					 (sizeof ":max:") - 1)) {
-
-				str += (sizeof ":max:") - 1;
-
-				str = srv_parse_megabytes(
-					str, &srv_last_file_size_max);
-			}
-
-			if (*str != '\0') {
-
-				return(FALSE);
-			}
-		}
-
-		(srv_data_file_is_raw_partition)[i] = 0;
-
-		if (strlen(str) >= 6
-		    && *str == 'n'
-		    && *(str + 1) == 'e'
-		    && *(str + 2) == 'w') {
-			str += 3;
-			/* Initialize new raw device only during bootstrap */
-			(srv_data_file_is_raw_partition)[i] = SRV_NEW_RAW;
-		}
-
-		if (*str == 'r' && *(str + 1) == 'a' && *(str + 2) == 'w') {
-			str += 3;
-
-			/* Initialize new raw device only during bootstrap */
-			if ((srv_data_file_is_raw_partition)[i] == 0) {
-				(srv_data_file_is_raw_partition)[i] = SRV_NEW_RAW;
-			}
-		}
-
-		i++;
-
-		if (*str == ';') {
-			str++;
-		}
-	}
-
-	return(TRUE);
-}
-
-/*********************************************************************//**
-Frees the memory allocated by srv_parse_data_file_paths_and_sizes()
-and srv_parse_log_group_home_dirs(). */
-UNIV_INTERN
-void
-srv_free_paths_and_sizes(void)
-/*==========================*/
-{
-	free(srv_data_file_names);
-	srv_data_file_names = NULL;
-	free(srv_data_file_sizes);
-	srv_data_file_sizes = NULL;
-	free(srv_data_file_is_raw_partition);
-	srv_data_file_is_raw_partition = NULL;
-}
-
 #ifndef UNIV_HOTBACKUP
 /********************************************************************//**
 I/o-handler thread function.
-@return	OS_THREAD_DUMMY_RETURN */
-extern "C" UNIV_INTERN
+@return OS_THREAD_DUMMY_RETURN */
+extern "C"
 os_thread_ret_t
 DECLARE_THREAD(io_handler_thread)(
 /*==============================*/
@@ -502,16 +298,37 @@ DECLARE_THREAD(io_handler_thread)(
 	segment = *((ulint*) arg);
 
 #ifdef UNIV_DEBUG_THREAD_CREATION
-	ib_logf(IB_LOG_LEVEL_INFO,
-		"Io handler thread %lu starts, id %lu\n", segment,
-		os_thread_pf(os_thread_get_curr_id()));
+	ib::info() << "Io handler thread " << segment << " starts, id "
+		<< os_thread_pf(os_thread_get_curr_id());
 #endif
 
-#ifdef UNIV_PFS_THREAD
-	pfs_register_thread(io_handler_thread_key);
-#endif /* UNIV_PFS_THREAD */
+	/* For read only mode, we don't need ibuf and log I/O thread.
+	Please see innobase_start_or_create_for_mysql() */
+	ulint   start = (srv_read_only_mode) ? 0 : 2;
 
-	while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS) {
+	if (segment < start) {
+		if (segment == 0) {
+			pfs_register_thread(io_ibuf_thread_key);
+		} else {
+			ut_ad(segment == 1);
+			pfs_register_thread(io_log_thread_key);
+		}
+	} else if (segment >= start
+		   && segment < (start + srv_n_read_io_threads)) {
+			pfs_register_thread(io_read_thread_key);
+
+	} else if (segment >= (start + srv_n_read_io_threads)
+		   && segment < (start + srv_n_read_io_threads
+				 + srv_n_write_io_threads)) {
+		pfs_register_thread(io_write_thread_key);
+
+	} else {
+		pfs_register_thread(io_handler_thread_key);
+	}
+
+	while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS
+	       || buf_page_cleaner_is_active
+	       || !os_aio_all_slots_free()) {
 		fil_aio_wait(segment);
 	}
 
@@ -520,35 +337,16 @@ DECLARE_THREAD(io_handler_thread)(
 	The thread actually never comes here because it is exited in an
 	os_event_wait(). */
 
-	os_thread_exit(NULL);
+	os_thread_exit();
 
 	OS_THREAD_DUMMY_RETURN;
 }
 #endif /* !UNIV_HOTBACKUP */
 
-/*********************************************************************//**
-Normalizes a directory path for Windows: converts slashes to backslashes. */
-UNIV_INTERN
-void
-srv_normalize_path_for_win(
-/*=======================*/
-	char*	str MY_ATTRIBUTE((unused)))	/*!< in/out: null-terminated
-						character string */
-{
-#ifdef __WIN__
-	for (; *str; str++) {
-
-		if (*str == '/') {
-			*str = '\\';
-		}
-	}
-#endif
-}
-
 #ifndef UNIV_HOTBACKUP
 /*********************************************************************//**
 Creates a log file.
-@return	DB_SUCCESS or error code */
+@return DB_SUCCESS or error code */
 static MY_ATTRIBUTE((nonnull, warn_unused_result))
 dberr_t
 create_log_file(
@@ -556,30 +354,30 @@ create_log_file(
 	os_file_t*	file,	/*!< out: file handle */
 	const char*	name)	/*!< in: log file name */
 {
-	ibool		ret;
+	bool		ret;
 
 	*file = os_file_create(
-		innodb_file_log_key, name,
+		innodb_log_file_key, name,
 		OS_FILE_CREATE|OS_FILE_ON_ERROR_NO_EXIT, OS_FILE_NORMAL,
-		OS_LOG_FILE, &ret, FALSE);
+		OS_LOG_FILE, srv_read_only_mode, &ret);
 
 	if (!ret) {
-		ib_logf(IB_LOG_LEVEL_ERROR, "Cannot create %s", name);
+		ib::error() << "Cannot create " << name;
 		return(DB_ERROR);
 	}
 
-	ib_logf(IB_LOG_LEVEL_INFO,
-		"Setting log file %s size to %lu MB",
-		name, (ulong) srv_log_file_size
-		>> (20 - UNIV_PAGE_SIZE_SHIFT));
+	ib::info() << "Setting log file " << name << " size to "
+		<< (srv_log_file_size >> (20 - UNIV_PAGE_SIZE_SHIFT))
+		<< " MB";
 
 	ret = os_file_set_size(name, *file,
 			       (os_offset_t) srv_log_file_size
-			       << UNIV_PAGE_SIZE_SHIFT);
+			       << UNIV_PAGE_SIZE_SHIFT,
+			       srv_read_only_mode);
 	if (!ret) {
-		ib_logf(IB_LOG_LEVEL_ERROR, "Cannot set log file"
-			" %s to size %lu MB", name, (ulong) srv_log_file_size
-			>> (20 - UNIV_PAGE_SIZE_SHIFT));
+		ib::error() << "Cannot set log file " << name << " to size "
+			<< (srv_log_file_size >> (20 - UNIV_PAGE_SIZE_SHIFT))
+			<< " MB";
 		return(DB_ERROR);
 	}
 
@@ -592,61 +390,42 @@ create_log_file(
 /** Initial number of the first redo log file */
 #define INIT_LOG_FILE0	(SRV_N_LOG_FILES_MAX + 1)
 
-#ifdef DBUG_OFF
-# define RECOVERY_CRASH(x) do {} while(0)
-#else
-# define RECOVERY_CRASH(x) do {						\
-	if (srv_force_recovery_crash == x) {				\
-		fprintf(stderr, "innodb_force_recovery_crash=%lu\n",	\
-			srv_force_recovery_crash);			\
-		fflush(stderr);						\
-		exit(3);						\
-	}								\
-} while (0)
-#endif
-
 /*********************************************************************//**
 Creates all log files.
-@return	DB_SUCCESS or error code */
+@return DB_SUCCESS or error code */
 static
 dberr_t
 create_log_files(
 /*=============*/
-	bool	create_new_db,	/*!< in: TRUE if new database is being
-				created */
 	char*	logfilename,	/*!< in/out: buffer for log file name */
 	size_t	dirnamelen,	/*!< in: length of the directory path */
 	lsn_t	lsn,		/*!< in: FIL_PAGE_FILE_FLUSH_LSN value */
 	char*&	logfile0)	/*!< out: name of the first log file */
 {
+	dberr_t err;
+
 	if (srv_read_only_mode) {
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Cannot create log files in read-only mode");
+		ib::error() << "Cannot create log files in read-only mode";
 		return(DB_READ_ONLY);
 	}
 
-	/* We prevent system tablespace creation with existing files in
-	data directory. So we do not delete log files when creating new system
-	tablespace */
-	if (!create_new_db) {
-		/* Remove any old log files. */
-		for (unsigned i = 0; i <= INIT_LOG_FILE0; i++) {
-			sprintf(logfilename + dirnamelen, "ib_logfile%u", i);
+	/* Remove any old log files. */
+	for (unsigned i = 0; i <= INIT_LOG_FILE0; i++) {
+		sprintf(logfilename + dirnamelen, "ib_logfile%u", i);
 
-			/* Ignore errors about non-existent files or files
-			that cannot be removed. The create_log_file() will
-			return an error when the file exists. */
-#ifdef __WIN__
-			DeleteFile((LPCTSTR) logfilename);
+		/* Ignore errors about non-existent files or files
+		that cannot be removed. The create_log_file() will
+		return an error when the file exists. */
+#ifdef _WIN32
+		DeleteFile((LPCTSTR) logfilename);
 #else
-			unlink(logfilename);
+		unlink(logfilename);
 #endif
-			/* Crashing after deleting the first
-			file should be recoverable. The buffer
-			pool was clean, and we can simply create
-			all log files from the scratch. */
-			RECOVERY_CRASH(6);
-		}
+		/* Crashing after deleting the first
+		file should be recoverable. The buffer
+		pool was clean, and we can simply create
+		all log files from the scratch. */
+		RECOVERY_CRASH(6);
 	}
 
 	ut_ad(!buf_pool_check_no_pending_io());
@@ -657,7 +436,7 @@ create_log_files(
 		sprintf(logfilename + dirnamelen,
 			"ib_logfile%u", i ? i : INIT_LOG_FILE0);
 
-		dberr_t err = create_log_file(&files[i], logfilename);
+		err = create_log_file(&files[i], logfilename);
 
 		if (err != DB_SUCCESS) {
 			return(err);
@@ -671,41 +450,51 @@ create_log_files(
 	has been completed and renamed. */
 	sprintf(logfilename + dirnamelen, "ib_logfile%u", INIT_LOG_FILE0);
 
-	fil_space_create(
-		logfilename, SRV_LOG_SPACE_FIRST_ID,
-		fsp_flags_set_page_size(0, UNIV_PAGE_SIZE),
-		FIL_LOG,
-		NULL /* no encryption yet */);
+	/* Disable the doublewrite buffer for log files, not required */
+
+	fil_space_t*	log_space = fil_space_create(
+		"innodb_redo_log", SRV_LOG_SPACE_FIRST_ID,
+		fsp_flags_set_page_size(0, univ_page_size),
+		FIL_TYPE_LOG,
+		NULL /* No encryption yet */
+		);
 	ut_a(fil_validate());
+	ut_a(log_space != NULL);
 
 	logfile0 = fil_node_create(
 		logfilename, (ulint) srv_log_file_size,
-		SRV_LOG_SPACE_FIRST_ID, FALSE);
+		log_space, false, false);
 	ut_a(logfile0);
 
 	for (unsigned i = 1; i < srv_n_log_files; i++) {
+
 		sprintf(logfilename + dirnamelen, "ib_logfile%u", i);
 
-		if (!fil_node_create(
-			    logfilename,
-			    (ulint) srv_log_file_size,
-			    SRV_LOG_SPACE_FIRST_ID, FALSE)) {
-			ut_error;
+		if (!fil_node_create(logfilename,
+				     (ulint) srv_log_file_size,
+				     log_space, false, false)) {
+
+			ib::error()
+				<< "Cannot create file node for log file "
+				<< logfilename;
+
+			return(DB_ERROR);
 		}
 	}
 
-	log_group_init(0, srv_n_log_files,
-		       srv_log_file_size * UNIV_PAGE_SIZE,
-		       SRV_LOG_SPACE_FIRST_ID,
-		       SRV_LOG_SPACE_FIRST_ID + 1);
+	if (!log_group_init(0, srv_n_log_files,
+			    srv_log_file_size * UNIV_PAGE_SIZE,
+			    SRV_LOG_SPACE_FIRST_ID)) {
+		return(DB_ERROR);
+	}
 
 	fil_open_log_and_system_tablespace_files();
 
 	/* Create a log checkpoint. */
-	mutex_enter(&log_sys->mutex);
-	ut_d(recv_no_log_write = FALSE);
+	log_mutex_enter();
+	ut_d(recv_no_log_write = false);
 	recv_reset_logs(lsn);
-	mutex_exit(&log_sys->mutex);
+	log_mutex_exit();
 
 	return(DB_SUCCESS);
 }
@@ -734,29 +523,29 @@ create_log_files_rename(
 
 	RECOVERY_CRASH(9);
 
-	ib_logf(IB_LOG_LEVEL_INFO,
-		"Renaming log file %s to %s", logfile0, logfilename);
+	ib::info() << "Renaming log file " << logfile0 << " to "
+		<< logfilename;
 
-	mutex_enter(&log_sys->mutex);
+	log_mutex_enter();
 	ut_ad(strlen(logfile0) == 2 + strlen(logfilename));
-	ibool success = os_file_rename(
-		innodb_file_log_key, logfile0, logfilename);
+	bool success = os_file_rename(
+		innodb_log_file_key, logfile0, logfilename);
 	ut_a(success);
 
 	RECOVERY_CRASH(10);
 
 	/* Replace the first file with ib_logfile0. */
 	strcpy(logfile0, logfilename);
-	mutex_exit(&log_sys->mutex);
+	log_mutex_exit();
 
 	fil_open_log_and_system_tablespace_files();
 
-	ib_logf(IB_LOG_LEVEL_WARN, "New log files created, LSN=" LSN_PF, lsn);
+	ib::warn() << "New log files created, LSN=" << lsn;
 }
 
 /*********************************************************************//**
 Opens a log file.
-@return	DB_SUCCESS or error code */
+@return DB_SUCCESS or error code */
 static MY_ATTRIBUTE((nonnull, warn_unused_result))
 dberr_t
 open_log_file(
@@ -765,13 +554,13 @@ open_log_file(
 	const char*	name,	/*!< in: log file name */
 	os_offset_t*	size)	/*!< out: file size */
 {
-	ibool	ret;
+	bool	ret;
 
-	*file = os_file_create(innodb_file_log_key, name,
+	*file = os_file_create(innodb_log_file_key, name,
 			       OS_FILE_OPEN, OS_FILE_AIO,
-			       OS_LOG_FILE, &ret, FALSE);
+			       OS_LOG_FILE, srv_read_only_mode, &ret);
 	if (!ret) {
-		ib_logf(IB_LOG_LEVEL_ERROR, "Unable to open '%s'", name);
+		ib::error() << "Unable to open '" << name << "'";
 		return(DB_ERROR);
 	}
 
@@ -782,402 +571,9 @@ open_log_file(
 	return(DB_SUCCESS);
 }
 
-/*********************************************************************//**
-Creates or opens database data files and closes them.
-@return	DB_SUCCESS or error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-open_or_create_data_files(
-/*======================*/
-	ibool*		create_new_db,	/*!< out: TRUE if new database should be
-					created */
-#ifdef UNIV_LOG_ARCHIVE
-	ulint*		min_arch_log_no,/*!< out: min of archived log
-					numbers in data files */
-	ulint*		max_arch_log_no,/*!< out: max of archived log
-					numbers in data files */
-#endif /* UNIV_LOG_ARCHIVE */
-	lsn_t*		min_flushed_lsn,/*!< out: min of flushed lsn
-					values in data files */
-	lsn_t*		max_flushed_lsn,/*!< out: max of flushed lsn
-					values in data files */
-	ulint*		sum_of_new_sizes)/*!< out: sum of sizes of the
-					new files added */
-{
-	ibool		ret;
-	ulint		i;
-	ibool		one_opened	= FALSE;
-	ibool		one_created	= FALSE;
-	os_offset_t	size;
-	ulint		flags;
-	ulint		space;
-	ulint		rounded_size_pages;
-	char		name[10000];
-	fil_space_crypt_t*    crypt_data;
-
-	if (srv_n_data_files >= 1000) {
-
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Can only have < 1000 data files, you have "
-			"defined %lu", (ulong) srv_n_data_files);
-
-		return(DB_ERROR);
-	}
-
-	*sum_of_new_sizes = 0;
-
-	*create_new_db = FALSE;
-
-	srv_normalize_path_for_win(srv_data_home);
-
-	for (i = 0; i < srv_n_data_files; i++) {
-		ulint	dirnamelen;
-
-		srv_normalize_path_for_win(srv_data_file_names[i]);
-		dirnamelen = strlen(srv_data_home);
-
-		ut_a(dirnamelen + strlen(srv_data_file_names[i])
-		     < (sizeof name) - 1);
-
-		memcpy(name, srv_data_home, dirnamelen);
-
-		/* Add a path separator if needed. */
-		if (dirnamelen && name[dirnamelen - 1] != SRV_PATH_SEPARATOR) {
-			name[dirnamelen++] = SRV_PATH_SEPARATOR;
-		}
-
-		strcpy(name + dirnamelen, srv_data_file_names[i]);
-
-		/* Note: It will return true if the file doesn' exist. */
-
-		if (!srv_file_check_mode(name)) {
-
-			return(DB_FAIL);
-
-		} else if (srv_data_file_is_raw_partition[i] == 0) {
-
-			/* First we try to create the file: if it already
-			exists, ret will get value FALSE */
-
-			files[i] = os_file_create(
-				innodb_file_data_key, name, OS_FILE_CREATE,
-				OS_FILE_NORMAL, OS_DATA_FILE, &ret, FALSE);
-
-			if (srv_read_only_mode) {
-
-				if (ret) {
-					goto size_check;
-				}
-
-				ib_logf(IB_LOG_LEVEL_ERROR,
-					"Opening %s failed!", name);
-
-				return(DB_ERROR);
-
-			} else if (!ret
-				   && os_file_get_last_error(false)
-				   != OS_FILE_ALREADY_EXISTS
-#ifdef UNIV_AIX
-			    	   /* AIX 5.1 after security patch ML7 may have
-			           errno set to 0 here, which causes our
-				   function to return 100; work around that
-				   AIX problem */
-				   && os_file_get_last_error(false) != 100
-#endif /* UNIV_AIX */
-			    ) {
-				ib_logf(IB_LOG_LEVEL_ERROR,
-					"Creating or opening %s failed!",
-					name);
-
-				return(DB_ERROR);
-			}
-
-		} else if (srv_data_file_is_raw_partition[i] == SRV_NEW_RAW) {
-
-			ut_a(!srv_read_only_mode);
-
-			/* The partition is opened, not created; then it is
-			written over */
-
-			srv_start_raw_disk_in_use = TRUE;
-			srv_created_new_raw = TRUE;
-
-			files[i] = os_file_create(
-				innodb_file_data_key, name, OS_FILE_OPEN_RAW,
-				OS_FILE_NORMAL, OS_DATA_FILE, &ret, FALSE);
-
-			if (!ret) {
-				ib_logf(IB_LOG_LEVEL_ERROR,
-					"Error in opening %s", name);
-
-				return(DB_ERROR);
-			}
-
-			const char*	check_msg;
-			check_msg = fil_read_first_page(
-				files[i], FALSE, &flags, &space,
-#ifdef UNIV_LOG_ARCHIVE
-				min_arch_log_no, max_arch_log_no,
-#endif /* UNIV_LOG_ARCHIVE */
-				min_flushed_lsn, max_flushed_lsn, NULL);
-
-			/* If first page is valid, don't overwrite DB.
-			It prevents overwriting DB when mysql_install_db
-			starts mysqld multiple times during bootstrap. */
-			if (check_msg == NULL) {
-
-				srv_created_new_raw = FALSE;
-				ret = FALSE;
-			}
-
-		} else if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) {
-			srv_start_raw_disk_in_use = TRUE;
-
-			ret = FALSE;
-		} else {
-			ut_a(0);
-		}
-
-		if (ret == FALSE) {
-			const char* check_msg;
-			/* We open the data file */
-
-			if (one_created) {
-				ib_logf(IB_LOG_LEVEL_ERROR,
-					"Data files can only be added at "
-					"the end of a tablespace, but "
-					"data file %s existed beforehand.",
-					name);
-				return(DB_ERROR);
-			}
-			if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) {
-				ut_a(!srv_read_only_mode);
-				files[i] = os_file_create(
-					innodb_file_data_key,
-					name, OS_FILE_OPEN_RAW,
-					OS_FILE_NORMAL, OS_DATA_FILE, &ret, FALSE);
-			} else if (i == 0) {
-				files[i] = os_file_create(
-					innodb_file_data_key,
-					name, OS_FILE_OPEN_RETRY,
-					OS_FILE_NORMAL, OS_DATA_FILE, &ret, FALSE);
-			} else {
-				files[i] = os_file_create(
-					innodb_file_data_key,
-					name, OS_FILE_OPEN, OS_FILE_NORMAL,
-					OS_DATA_FILE, &ret, FALSE);
-			}
-
-			if (!ret) {
-
-				os_file_get_last_error(true);
-
-				ib_logf(IB_LOG_LEVEL_ERROR,
-					"Can't open '%s'", name);
-
-				return(DB_ERROR);
-			}
-
-			if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) {
-
-				goto skip_size_check;
-			}
-
-size_check:
-			size = os_file_get_size(files[i]);
-			ut_a(size != (os_offset_t) -1);
-
-			/* Under some error conditions like disk full
-			narios or file size reaching filesystem
-			limit the data file could contain an incomplete
-			extent at the end. When we extend a data file
-			and if some failure happens, then also the data
-			file could contain an incomplete extent.  So we
-			need to round the size downward to a megabyte.*/
-
-			rounded_size_pages = (ulint) CALC_NUMBER_OF_PAGES(size);
-
-			if (i == srv_n_data_files - 1
-			    && srv_auto_extend_last_data_file) {
-
-				if (srv_data_file_sizes[i] > rounded_size_pages
-				    || (srv_last_file_size_max > 0
-					&& srv_last_file_size_max
-					< rounded_size_pages)) {
-
-					ib_logf(IB_LOG_LEVEL_ERROR,
-						"auto-extending "
-						"data file %s is "
-						"of a different size "
-						"%lu pages (rounded "
-						"down to MB) than specified "
-						"in the .cnf file: "
-						"initial %lu pages, "
-						"max %lu (relevant if "
-						"non-zero) pages!",
-						name,
-						(ulong) rounded_size_pages,
-						(ulong) srv_data_file_sizes[i],
-						(ulong)
-						srv_last_file_size_max);
-
-					return(DB_ERROR);
-				}
-
-				srv_data_file_sizes[i] = rounded_size_pages;
-			}
-
-			if (rounded_size_pages != srv_data_file_sizes[i]) {
-
-				ib_logf(IB_LOG_LEVEL_ERROR,
-					"Data file %s is of a different "
-					"size %lu pages (rounded down to MB) "
-					"than specified in the .cnf file "
-					"%lu pages!",
-					name,
-					(ulong) rounded_size_pages,
-					(ulong) srv_data_file_sizes[i]);
-
-				return(DB_ERROR);
-			}
-skip_size_check:
-
-			/* This is the earliest location where we can load
-			the double write buffer. */
-			if (i == 0) {
-				buf_dblwr_init_or_load_pages(
-					files[i], srv_data_file_names[i], true);
-			}
-
-			bool retry = true;
-check_first_page:
-			check_msg = fil_read_first_page(
-				files[i], one_opened, &flags, &space,
-#ifdef UNIV_LOG_ARCHIVE
-				min_arch_log_no, max_arch_log_no,
-#endif /* UNIV_LOG_ARCHIVE */
-				min_flushed_lsn, max_flushed_lsn, &crypt_data);
-
-			if (check_msg) {
-
-				if (retry) {
-					fsp_open_info	fsp;
-					const ulint	page_no = 0;
-
-					retry = false;
-					fsp.id = 0;
-					fsp.filepath = srv_data_file_names[i];
-					fsp.file = files[i];
-
-					if (fil_user_tablespace_restore_page(
-						&fsp, page_no)) {
-						goto check_first_page;
-					}
-				}
-
-				ib_logf(IB_LOG_LEVEL_ERROR,
-						"%s in data file %s",
-						check_msg, name);
-				return(DB_ERROR);
-			}
-
-			/* The first file of the system tablespace must
-			have space ID = TRX_SYS_SPACE.  The FSP_SPACE_ID
-			field in files greater than ibdata1 are unreliable. */
-			ut_a(one_opened || space == TRX_SYS_SPACE);
-
-			/* Check the flags for the first system tablespace
-			file only. */
-			if (!one_opened
-			    && UNIV_PAGE_SIZE
-			       != fsp_flags_get_page_size(flags)) {
-
-				ib_logf(IB_LOG_LEVEL_ERROR,
-					"Data file \"%s\" uses page size %lu,"
-					"but the start-up parameter "
-					"is --innodb-page-size=%lu",
-					name,
-					fsp_flags_get_page_size(flags),
-					UNIV_PAGE_SIZE);
-
-				return(DB_ERROR);
-			}
-
-			one_opened = TRUE;
-		} else if (!srv_read_only_mode) {
-			/* We created the data file and now write it full of
-			zeros */
-
-			one_created = TRUE;
-
-			if (i > 0) {
-				ib_logf(IB_LOG_LEVEL_INFO,
-					"Data file %s did not"
-					" exist: new to be created",
-					name);
-			} else {
-				ib_logf(IB_LOG_LEVEL_INFO,
-					"The first specified "
-					"data file %s did not exist: "
-					"a new database to be created!",
-					name);
-
-				*create_new_db = TRUE;
-			}
-
-			ib_logf(IB_LOG_LEVEL_INFO,
-				"Setting file %s size to %lu MB",
-				name,
-				(ulong) (srv_data_file_sizes[i]
-					 >> (20 - UNIV_PAGE_SIZE_SHIFT)));
-
-			ib_logf(IB_LOG_LEVEL_INFO,
-				"Database physically writes the"
-				" file full: wait...");
-
-			ret = os_file_set_size(
-				name, files[i],
-				(os_offset_t) srv_data_file_sizes[i]
-				<< UNIV_PAGE_SIZE_SHIFT);
-
-			if (!ret) {
-				ib_logf(IB_LOG_LEVEL_ERROR,
-					"Error in creating %s: "
-					"probably out of disk space",
-					name);
-
-				return(DB_ERROR);
-			}
-
-			*sum_of_new_sizes += srv_data_file_sizes[i];
-
-			crypt_data = fil_space_create_crypt_data(FIL_SPACE_ENCRYPTION_DEFAULT, FIL_DEFAULT_ENCRYPTION_KEY);
-		}
-
-		ret = os_file_close(files[i]);
-		ut_a(ret);
-
-		if (i == 0) {
-			flags = fsp_flags_set_page_size(0, UNIV_PAGE_SIZE);
-			fil_space_create(name, 0, flags, FIL_TABLESPACE,
-					 crypt_data);
-			crypt_data = NULL;
-		}
-
-		ut_a(fil_validate());
-
-		if (!fil_node_create(name, srv_data_file_sizes[i], 0,
-				     srv_data_file_is_raw_partition[i] != 0)) {
-			return(DB_ERROR);
-		}
-	}
-
-	return(DB_SUCCESS);
-}
-
 /*********************************************************************//**
 Create undo tablespace.
-@return	DB_SUCCESS or error code */
+@return DB_SUCCESS or error code */
 static
 dberr_t
 srv_undo_tablespace_create(
@@ -1186,20 +582,21 @@ srv_undo_tablespace_create(
 	ulint		size)		/*!< in: tablespace size in pages */
 {
 	os_file_t	fh;
-	ibool		ret;
+	bool		ret;
 	dberr_t		err = DB_SUCCESS;
 
 	os_file_create_subdirs_if_needed(name);
 
 	fh = os_file_create(
-		innodb_file_data_key,
+		innodb_data_file_key,
 		name,
 		srv_read_only_mode ? OS_FILE_OPEN : OS_FILE_CREATE,
-		OS_FILE_NORMAL, OS_DATA_FILE, &ret, FALSE);
+		OS_FILE_NORMAL, OS_DATA_FILE, srv_read_only_mode, &ret);
 
 	if (srv_read_only_mode && ret) {
-		ib_logf(IB_LOG_LEVEL_INFO,
-			"%s opened in read-only mode", name);
+
+		ib::info() << name << " opened in read-only mode";
+
 	} else if (ret == FALSE) {
 		if (os_file_get_last_error(false) != OS_FILE_ALREADY_EXISTS
 #ifdef UNIV_AIX
@@ -1209,15 +606,8 @@ srv_undo_tablespace_create(
 		    && os_file_get_last_error(false) != 100
 #endif /* UNIV_AIX */
 		) {
-			ib_logf(IB_LOG_LEVEL_ERROR,
-				"Can't create UNDO tablespace %s", name);
-		} else {
-			ib_logf(IB_LOG_LEVEL_ERROR,
-				"Creating system tablespace with"
-				" existing undo tablespaces is not"
-				" supported. Please delete all undo"
-				" tablespaces before creating new"
-				" system tablespace.");
+			ib::error() << "Can't create UNDO tablespace "
+				<< name;
 		}
 		err = DB_ERROR;
 	} else {
@@ -1225,23 +615,22 @@ srv_undo_tablespace_create(
 
 		/* We created the data file and now write it full of zeros */
 
-		ib_logf(IB_LOG_LEVEL_INFO,
-			"Data file %s did not exist: new to be created",
-			name);
+		ib::info() << "Data file " << name << " did not exist: new to"
+			" be created";
 
-		ib_logf(IB_LOG_LEVEL_INFO,
-			"Setting file %s size to %lu MB",
-			name, size >> (20 - UNIV_PAGE_SIZE_SHIFT));
+		ib::info() << "Setting file " << name << " size to "
+			<< (size >> (20 - UNIV_PAGE_SIZE_SHIFT)) << " MB";
 
-		ib_logf(IB_LOG_LEVEL_INFO,
-			"Database physically writes the file full: wait...");
+		ib::info() << "Database physically writes the file full: "
+			<< "wait...";
 
-		ret = os_file_set_size(name, fh, size << UNIV_PAGE_SIZE_SHIFT);
+		ret = os_file_set_size(
+			name, fh, size << UNIV_PAGE_SIZE_SHIFT,
+			srv_read_only_mode);
 
 		if (!ret) {
-			ib_logf(IB_LOG_LEVEL_INFO,
-				"Error in creating %s: probably out of "
-				"disk space", name);
+			ib::info() << "Error in creating " << name
+				<< ": probably out of disk space";
 
 			err = DB_ERROR;
 		}
@@ -1251,44 +640,59 @@ srv_undo_tablespace_create(
 
 	return(err);
 }
-
 /*********************************************************************//**
 Open an undo tablespace.
-@return	DB_SUCCESS or error code */
+@return DB_SUCCESS or error code */
 static
 dberr_t
 srv_undo_tablespace_open(
 /*=====================*/
-	const char*	name,		/*!< in: tablespace name */
-	ulint		space)		/*!< in: tablespace id */
+	const char*	name,		/*!< in: tablespace file name */
+	ulint		space_id)	/*!< in: tablespace id */
 {
 	os_file_t	fh;
-	dberr_t		err	= DB_ERROR;
-	ibool		ret;
+	bool		ret;
 	ulint		flags;
+	dberr_t		err	= DB_ERROR;
+	char		undo_name[sizeof "innodb_undo000"];
+
+	ut_snprintf(undo_name, sizeof(undo_name),
+		   "innodb_undo%03u", static_cast<unsigned>(space_id));
 
 	if (!srv_file_check_mode(name)) {
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"UNDO tablespaces must be %s!",
-			srv_read_only_mode ? "writable" : "readable");
+		ib::error() << "UNDO tablespaces must be " <<
+			(srv_read_only_mode ? "writable" : "readable") << "!";
 
 		return(DB_ERROR);
 	}
 
 	fh = os_file_create(
-		innodb_file_data_key, name,
+		innodb_data_file_key, name,
 		OS_FILE_OPEN_RETRY
 		| OS_FILE_ON_ERROR_NO_EXIT
 		| OS_FILE_ON_ERROR_SILENT,
 		OS_FILE_NORMAL,
 		OS_DATA_FILE,
-		&ret,
-		FALSE);
+		srv_read_only_mode,
+		&ret);
 
 	/* If the file open was successful then load the tablespace. */
 
 	if (ret) {
 		os_offset_t	size;
+		fil_space_t*	space;
+
+		bool	atomic_write;
+
+#if !defined(NO_FALLOCATE) && defined(UNIV_LINUX)
+		if (!srv_use_doublewrite_buf) {
+			atomic_write = fil_fusionio_enable_atomic_write(fh);
+		} else {
+			atomic_write = false;
+		}
+#else
+		atomic_write = false;
+#endif /* !NO_FALLOCATE && UNIV_LINUX */
 
 		size = os_file_get_size(fh);
 		ut_a(size != (os_offset_t) -1);
@@ -1303,22 +707,26 @@ srv_undo_tablespace_open(
 		because InnoDB hasn't opened any other tablespace apart
 		from the system tablespace. */
 
-		fil_set_max_space_id_if_bigger(space);
+		fil_set_max_space_id_if_bigger(space_id);
 
 		/* Set the compressed page size to 0 (non-compressed) */
-		flags = fsp_flags_set_page_size(0, UNIV_PAGE_SIZE);
-		fil_space_create(name, space, flags, FIL_TABLESPACE,
-				 NULL /* no encryption */);
+		flags = fsp_flags_init(
+			univ_page_size, false, false, false, false, false, 0, ATOMIC_WRITES_DEFAULT);
+		space = fil_space_create(
+			undo_name, space_id, flags, FIL_TYPE_TABLESPACE, NULL);
 
 		ut_a(fil_validate());
+		ut_a(space);
 
 		os_offset_t	n_pages = size / UNIV_PAGE_SIZE;
 
-		/* On 64 bit Windows ulint can be 32 bit and os_offset_t
-		is 64 bit. It is OK to cast the n_pages to ulint because
-		the unit has been scaled to pages and they are always
-		32 bit. */
-		if (fil_node_create(name, (ulint) n_pages, space, FALSE)) {
+		/* On 32-bit platforms, ulint is 32 bits and os_offset_t
+		is 64 bits. It is OK to cast the n_pages to ulint because
+		the unit has been scaled to pages and page number is always
+		32 bits. */
+		if (fil_node_create(
+			name, (ulint) n_pages, space, false, atomic_write)) {
+
 			err = DB_SUCCESS;
 		}
 	}
@@ -1326,14 +734,92 @@ srv_undo_tablespace_open(
 	return(err);
 }
 
+/** Check if undo tablespaces and redo log files exist before creating a
+new system tablespace
+@retval DB_SUCCESS  if all undo and redo logs are not found
+@retval DB_ERROR    if any undo and redo logs are found */
+static
+dberr_t
+srv_check_undo_redo_logs_exists()
+{
+	bool		ret;
+	os_file_t	fh;
+	char	name[OS_FILE_MAX_PATH];
+
+	/* Check if any undo tablespaces exist */
+	for (ulint i = 1; i <= srv_undo_tablespaces; ++i) {
+
+		ut_snprintf(
+			name, sizeof(name),
+			"%s%cundo%03lu",
+			srv_undo_dir, OS_PATH_SEPARATOR,
+			i);
+
+		fh = os_file_create(
+			innodb_data_file_key, name,
+			OS_FILE_OPEN_RETRY
+			| OS_FILE_ON_ERROR_NO_EXIT
+			| OS_FILE_ON_ERROR_SILENT,
+			OS_FILE_NORMAL,
+			OS_DATA_FILE,
+			srv_read_only_mode,
+			&ret);
+
+		if (ret) {
+			os_file_close(fh);
+			ib::error()
+				<< "undo tablespace '" << name << "' exists."
+				" Creating system tablespace with existing undo"
+				" tablespaces is not supported. Please delete"
+				" all undo tablespaces before creating new"
+				" system tablespace.";
+			return(DB_ERROR);
+		}
+	}
+
+	/* Check if any redo log files exist */
+	char	logfilename[OS_FILE_MAX_PATH];
+	size_t dirnamelen = strlen(srv_log_group_home_dir);
+	memcpy(logfilename, srv_log_group_home_dir, dirnamelen);
+
+	for (unsigned i = 0; i < srv_n_log_files; i++) {
+		sprintf(logfilename + dirnamelen,
+			"ib_logfile%u", i);
+
+		fh = os_file_create(
+			innodb_log_file_key, logfilename,
+			OS_FILE_OPEN_RETRY
+			| OS_FILE_ON_ERROR_NO_EXIT
+			| OS_FILE_ON_ERROR_SILENT,
+			OS_FILE_NORMAL,
+			OS_LOG_FILE,
+			srv_read_only_mode,
+			&ret);
+
+		if (ret) {
+			os_file_close(fh);
+			ib::error() << "redo log file '" << logfilename
+				<< "' exists. Creating system tablespace with"
+				" existing redo log files is not recommended."
+				" Please delete all redo log files before"
+				" creating new system tablespace.";
+			return(DB_ERROR);
+		}
+	}
+
+	return(DB_SUCCESS);
+}
+
+undo::undo_spaces_t	undo::Truncate::s_fix_up_spaces;
+
 /********************************************************************
 Opens the configured number of undo tablespaces.
-@return	DB_SUCCESS or error code */
+@return DB_SUCCESS or error code */
 static
 dberr_t
 srv_undo_tablespaces_init(
 /*======================*/
-	ibool		create_new_db,		/*!< in: TRUE if new db being
+	bool		create_new_db,		/*!< in: TRUE if new db being
 						created */
 	const ulint	n_conf_tablespaces,	/*!< in: configured undo
 						tablespaces */
@@ -1341,11 +827,11 @@ srv_undo_tablespaces_init(
 						tablespaces successfully
 						discovered and opened */
 {
-	ulint		i;
-	dberr_t		err = DB_SUCCESS;
-	ulint		prev_space_id = 0;
-	ulint		n_undo_tablespaces;
-	ulint		undo_tablespace_ids[TRX_SYS_N_RSEGS + 1];
+	ulint			i;
+	dberr_t			err = DB_SUCCESS;
+	ulint			prev_space_id = 0;
+	ulint			n_undo_tablespaces;
+	ulint			undo_tablespace_ids[TRX_SYS_N_RSEGS + 1];
 
 	*n_opened = 0;
 
@@ -1367,18 +853,15 @@ srv_undo_tablespaces_init(
 		ut_snprintf(
 			name, sizeof(name),
 			"%s%cundo%03lu",
-			srv_undo_dir, SRV_PATH_SEPARATOR, i + 1);
+			srv_undo_dir, OS_PATH_SEPARATOR, i + 1);
 
 		/* Undo space ids start from 1. */
 		err = srv_undo_tablespace_create(
 			name, SRV_UNDO_TABLESPACE_SIZE_IN_PAGES);
 
 		if (err != DB_SUCCESS) {
-
-			ib_logf(IB_LOG_LEVEL_ERROR,
-				"Could not create undo tablespace '%s'.",
-				name);
-
+			ib::error() << "Could not create undo tablespace '"
+				<< name << "'.";
 			return(err);
 		}
 	}
@@ -1391,6 +874,41 @@ srv_undo_tablespaces_init(
 	if (!create_new_db) {
 		n_undo_tablespaces = trx_rseg_get_n_undo_tablespaces(
 			undo_tablespace_ids);
+
+		srv_undo_tablespaces_active = n_undo_tablespaces;
+
+		/* Check if any of the UNDO tablespace needs fix-up because
+		server crashed while truncate was active on UNDO tablespace.*/
+		for (i = 0; i < n_undo_tablespaces; ++i) {
+
+			undo::Truncate	undo_trunc;
+
+			if (undo_trunc.needs_fix_up(undo_tablespace_ids[i])) {
+
+				char	name[OS_FILE_MAX_PATH];
+
+				ut_snprintf(name, sizeof(name),
+					    "%s%cundo%03lu",
+					    srv_undo_dir, OS_PATH_SEPARATOR,
+					    undo_tablespace_ids[i]);
+
+				os_file_delete(innodb_data_file_key, name);
+
+				err = srv_undo_tablespace_create(
+					name,
+					SRV_UNDO_TABLESPACE_SIZE_IN_PAGES);
+
+				if (err != DB_SUCCESS) {
+					ib::error() << "Could not fix-up undo "
+						" tablespace truncate '"
+						<< name << "'.";
+					return(err);
+				}
+
+				undo::Truncate::s_fix_up_spaces.push_back(
+					undo_tablespace_ids[i]);
+			}
+		}
 	} else {
 		n_undo_tablespaces = n_conf_tablespaces;
 
@@ -1412,7 +930,7 @@ srv_undo_tablespaces_init(
 		ut_snprintf(
 			name, sizeof(name),
 			"%s%cundo%03lu",
-			srv_undo_dir, SRV_PATH_SEPARATOR,
+			srv_undo_dir, OS_PATH_SEPARATOR,
 			undo_tablespace_ids[i]);
 
 		/* Should be no gaps in undo tablespace ids. */
@@ -1427,10 +945,8 @@ srv_undo_tablespaces_init(
 		err = srv_undo_tablespace_open(name, undo_tablespace_ids[i]);
 
 		if (err != DB_SUCCESS) {
-
-			ib_logf(IB_LOG_LEVEL_ERROR,
-				"Unable to open undo tablespace '%s'.", name);
-
+			ib::error() << "Unable to open undo tablespace '"
+				<< name << "'.";
 			return(err);
 		}
 
@@ -1449,7 +965,7 @@ srv_undo_tablespaces_init(
 
 		ut_snprintf(
 			name, sizeof(name),
-			"%s%cundo%03lu", srv_undo_dir, SRV_PATH_SEPARATOR, i);
+			"%s%cundo%03lu", srv_undo_dir, OS_PATH_SEPARATOR, i);
 
 		/* Undo space ids start from 1. */
 		err = srv_undo_tablespace_open(name, i);
@@ -1468,36 +984,27 @@ srv_undo_tablespaces_init(
 	be unused undo tablespaces for future use. */
 
 	if (n_conf_tablespaces > n_undo_tablespaces) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: Expected to open %lu undo "
-			"tablespaces but was able\n",
-			n_conf_tablespaces);
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: to find only %lu undo "
-			"tablespaces.\n", n_undo_tablespaces);
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: Set the "
-			"innodb_undo_tablespaces parameter to "
-			"the\n");
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: correct value and retry. Suggested "
-			"value is %lu\n", n_undo_tablespaces);
+		ib::error() << "Expected to open " << n_conf_tablespaces
+			<< " undo tablespaces but was able to find only "
+			<< n_undo_tablespaces << " undo tablespaces. Set the"
+			" innodb_undo_tablespaces parameter to the correct"
+			" value and retry. Suggested value is "
+			<< n_undo_tablespaces;
 
 		return(err != DB_SUCCESS ? err : DB_ERROR);
 
 	} else  if (n_undo_tablespaces > 0) {
 
-		ib_logf(IB_LOG_LEVEL_INFO, "Opened %lu undo tablespaces",
-			n_undo_tablespaces);
+		ib::info() << "Opened " << n_undo_tablespaces
+			<< " undo tablespaces";
+
+		ib::info() << srv_undo_tablespaces_active << " undo tablespaces"
+			<< " made active";
 
 		if (n_conf_tablespaces == 0) {
-			ib_logf(IB_LOG_LEVEL_WARN,
-				"Using the system tablespace for all UNDO "
-				"logging because innodb_undo_tablespaces=0");
+			ib::warn() << "Will use system tablespace for all newly"
+				<< " created rollback-segment as"
+				<< " innodb_undo_tablespaces=0";
 		}
 	}
 
@@ -1516,6 +1023,67 @@ srv_undo_tablespaces_init(
 		mtr_commit(&mtr);
 	}
 
+	if (!undo::Truncate::s_fix_up_spaces.empty()) {
+
+		/* Step-1: Initialize the tablespace header and rsegs header. */
+		mtr_t		mtr;
+		trx_sysf_t*	sys_header;
+
+		mtr_start(&mtr);
+		/* Turn off REDO logging. We are in server start mode and fixing
+		UNDO tablespace even before REDO log is read. Let's say we
+		do REDO logging here then this REDO log record will be applied
+		as part of the current recovery process. We surely don't need
+		that as this is fix-up action parallel to REDO logging. */
+		mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
+		sys_header = trx_sysf_get(&mtr);
+
+		for (undo::undo_spaces_t::const_iterator it
+			     = undo::Truncate::s_fix_up_spaces.begin();
+		     it != undo::Truncate::s_fix_up_spaces.end();
+		     ++it) {
+
+			undo::Truncate::add_space_to_trunc_list(*it);
+
+			fsp_header_init(
+				*it, SRV_UNDO_TABLESPACE_SIZE_IN_PAGES, &mtr);
+
+			mtr_x_lock(fil_space_get_latch(*it, NULL), &mtr);
+
+			for (ulint i = 0; i < TRX_SYS_N_RSEGS; i++) {
+
+				ulint	space_id = trx_sysf_rseg_get_space(
+						sys_header, i, &mtr);
+
+				if (space_id == *it) {
+					trx_rseg_header_create(
+						*it, univ_page_size, ULINT_MAX,
+						i, &mtr);
+				}
+			}
+
+			undo::Truncate::clear_trunc_list();
+		}
+		mtr_commit(&mtr);
+
+		/* Step-2: Flush the dirty pages from the buffer pool. */
+		for (undo::undo_spaces_t::const_iterator it
+			     = undo::Truncate::s_fix_up_spaces.begin();
+		     it != undo::Truncate::s_fix_up_spaces.end();
+		     ++it) {
+
+			buf_LRU_flush_or_remove_pages(
+				TRX_SYS_SPACE, BUF_REMOVE_FLUSH_WRITE, NULL);
+
+			buf_LRU_flush_or_remove_pages(
+				*it, BUF_REMOVE_FLUSH_WRITE, NULL);
+
+			/* Remove the truncate redo log file. */
+			undo::Truncate	undo_trunc;
+			undo_trunc.done_logging(*it);
+		}
+	}
+
 	return(DB_SUCCESS);
 }
 
@@ -1542,8 +1110,7 @@ srv_start_wait_for_purge_to_start()
 			break;
 
 		case PURGE_STATE_INIT:
-			ib_logf(IB_LOG_LEVEL_INFO,
-				"Waiting for purge to start");
+			ib::info() << "Waiting for purge to start";
 
 			os_thread_sleep(50000);
 			break;
@@ -1555,252 +1122,459 @@ srv_start_wait_for_purge_to_start()
 	}
 }
 
+/** Create the temporary file tablespace.
+@param[in]	create_new_db	whether we are creating a new database
+@param[in,out]	tmp_space	Shared Temporary SysTablespace
+@return DB_SUCCESS or error code. */
+static
+dberr_t
+srv_open_tmp_tablespace(
+	bool		create_new_db,
+	SysTablespace*	tmp_space)
+{
+	ulint	sum_of_new_sizes;
+
+	/* Will try to remove if there is existing file left-over by last
+	unclean shutdown */
+	tmp_space->set_sanity_check_status(true);
+	tmp_space->delete_files();
+	tmp_space->set_ignore_read_only(true);
+
+	ib::info() << "Creating shared tablespace for temporary tables";
+
+	bool	create_new_temp_space;
+	ulint	temp_space_id = ULINT_UNDEFINED;
+
+	dict_hdr_get_new_id(NULL, NULL, &temp_space_id, NULL, true);
+
+	tmp_space->set_space_id(temp_space_id);
+
+	RECOVERY_CRASH(100);
+
+	dberr_t	err = tmp_space->check_file_spec(
+			&create_new_temp_space, 12 * 1024 * 1024);
+
+	if (err == DB_FAIL) {
+
+		ib::error() << "The " << tmp_space->name()
+			<< " data file must be writable!";
+
+		err = DB_ERROR;
+
+	} else if (err != DB_SUCCESS) {
+		ib::error() << "Could not create the shared "
+			<< tmp_space->name() << ".";
+
+	} else if ((err = tmp_space->open_or_create(
+			    true, create_new_db, &sum_of_new_sizes, NULL))
+		   != DB_SUCCESS) {
+
+		ib::error() << "Unable to create the shared "
+			<< tmp_space->name();
+
+	} else {
+
+		mtr_t	mtr;
+		ulint	size = tmp_space->get_sum_of_sizes();
+
+		ut_a(temp_space_id != ULINT_UNDEFINED);
+		ut_a(tmp_space->space_id() == temp_space_id);
+
+		/* Open this shared temp tablespace in the fil_system so that
+		it stays open until shutdown. */
+		if (fil_space_open(tmp_space->name())) {
+
+			/* Initialize the header page */
+			mtr_start(&mtr);
+			mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
+
+			fsp_header_init(tmp_space->space_id(), size, &mtr);
+
+			mtr_commit(&mtr);
+		} else {
+			/* This file was just opened in the code above! */
+			ib::error() << "The " << tmp_space->name()
+				<< " data file cannot be re-opened"
+				" after check_file_spec() succeeded!";
+
+			err = DB_ERROR;
+		}
+	}
+
+	return(err);
+}
+
+/****************************************************************//**
+Set state to indicate start of particular group of threads in InnoDB. */
+UNIV_INLINE
+void
+srv_start_state_set(
+/*================*/
+	srv_start_state_t state)	/*!< in: indicate current state of
+					thread startup */
+{
+	srv_start_state |= state;
+}
+
+/****************************************************************//**
+Check if following group of threads is started.
+@return true if started */
+UNIV_INLINE
+bool
+srv_start_state_is_set(
+/*===================*/
+	srv_start_state_t state)	/*!< in: state to check for */
+{
+	return(srv_start_state & state);
+}
+
+/**
+Shutdown all background threads created by InnoDB. */
+void
+srv_shutdown_all_bg_threads()
+{
+	ulint	i;
+
+	srv_shutdown_state = SRV_SHUTDOWN_EXIT_THREADS;
+
+	if (!srv_start_state) {
+		return;
+	}
+
+	/* All threads end up waiting for certain events. Put those events
+	to the signaled state. Then the threads will exit themselves after
+	os_event_wait(). */
+	for (i = 0; i < 1000; i++) {
+		/* NOTE: IF YOU CREATE THREADS IN INNODB, YOU MUST EXIT THEM
+		HERE OR EARLIER */
+
+		if (!srv_read_only_mode) {
+
+			if (srv_start_state_is_set(SRV_START_STATE_LOCK_SYS)) {
+				/* a. Let the lock timeout thread exit */
+				os_event_set(lock_sys->timeout_event);
+			}
+
+			/* b. srv error monitor thread exits automatically,
+			no need to do anything here */
+
+			if (srv_start_state_is_set(SRV_START_STATE_MASTER)) {
+				/* c. We wake the master thread so that
+				it exits */
+				srv_wake_master_thread();
+			}
+
+			if (srv_start_state_is_set(SRV_START_STATE_PURGE)) {
+				/* d. Wakeup purge threads. */
+				srv_purge_wakeup();
+			}
+		}
+
+		if (srv_start_state_is_set(SRV_START_STATE_IO)) {
+			/* e. Exit the i/o threads */
+			if (!srv_read_only_mode) {
+				if (recv_sys->flush_start != NULL) {
+					os_event_set(recv_sys->flush_start);
+				}
+				if (recv_sys->flush_end != NULL) {
+					os_event_set(recv_sys->flush_end);
+				}
+			}
+
+			os_event_set(buf_flush_event);
+
+			if (!buf_page_cleaner_is_active
+			    && os_aio_all_slots_free()) {
+				os_aio_wake_all_threads_at_shutdown();
+			}
+		}
+
+		/* f. dict_stats_thread is signaled from
+		logs_empty_and_mark_files_at_shutdown() and should have
+		already quit or is quitting right now. */
+
+		bool	active = os_thread_active();
+
+		os_thread_sleep(100000);
+
+		if (!active) {
+			break;
+		}
+	}
+
+	if (i == 1000) {
+		ib::warn() << os_thread_count << " threads created by InnoDB"
+			" had not exited at shutdown!";
+#ifdef UNIV_DEBUG
+		os_aio_print_pending_io(stderr);
+		ut_ad(0);
+#endif /* UNIV_DEBUG */
+	} else {
+		/* Reset the start state. */
+		srv_start_state = SRV_START_STATE_NONE;
+	}
+}
+
+#ifdef UNIV_DEBUG
+# define srv_init_abort(_db_err)	\
+	srv_init_abort_low(create_new_db, __FILE__, __LINE__, _db_err)
+#else
+# define srv_init_abort(_db_err)	\
+	srv_init_abort_low(create_new_db, _db_err)
+#endif /* UNIV_DEBUG */
+
+/** Innobase start-up aborted. Perform cleanup actions.
+@param[in]	create_new_db	TRUE if new db is  being created
+@param[in]	file		File name
+@param[in]	line		Line number
+@param[in]	err		Reason for aborting InnoDB startup
+@return DB_SUCCESS or error code. */
+static
+dberr_t
+srv_init_abort_low(
+	bool		create_new_db,
+#ifdef UNIV_DEBUG
+	const char*	file,
+	ulint		line,
+#endif /* UNIV_DEBUG */
+	dberr_t		err)
+{
+	if (create_new_db) {
+		ib::error() << "InnoDB Database creation was aborted"
+#ifdef UNIV_DEBUG
+			" at " << innobase_basename(file) << "[" << line << "]"
+#endif /* UNIV_DEBUG */
+			" with error " << ut_strerr(err) << ". You may need"
+			" to delete the ibdata1 file before trying to start"
+			" up again.";
+	} else {
+		ib::error() << "Plugin initialization aborted"
+#ifdef UNIV_DEBUG
+			" at " << innobase_basename(file) << "[" << line << "]"
+#endif /* UNIV_DEBUG */
+			" with error " << ut_strerr(err);
+	}
+
+	srv_shutdown_all_bg_threads();
+	return(err);
+}
+
+/** Prepare to delete the redo log files. Flush the dirty pages from all the
+buffer pools.  Flush the redo log buffer to the redo log file.
+@param[in]	n_files		number of old redo log files
+@return lsn upto which data pages have been flushed. */
+static
+lsn_t
+srv_prepare_to_delete_redo_log_files(
+	ulint	n_files)
+{
+	lsn_t	flushed_lsn;
+	ulint	pending_io = 0;
+	ulint	count = 0;
+
+	do {
+		/* Clean the buffer pool. */
+		buf_flush_sync_all_buf_pools();
+
+		RECOVERY_CRASH(1);
+
+		log_mutex_enter();
+
+		fil_names_clear(log_sys->lsn, false);
+
+		flushed_lsn = log_sys->lsn;
+
+		{
+			ib::warn	warning;
+			if (srv_log_file_size == 0) {
+				warning << "Upgrading redo log: ";
+			} else {
+				warning << "Resizing redo log from "
+					<< n_files << "*"
+					<< srv_log_file_size << " to ";
+			}
+			warning << srv_n_log_files << "*"
+				<< srv_log_file_size_requested
+				<< " pages, LSN=" << flushed_lsn;
+		}
+
+		/* Flush the old log files. */
+		log_mutex_exit();
+
+		log_write_up_to(flushed_lsn, true);
+
+		/* If innodb_flush_method=O_DSYNC,
+		we need to explicitly flush the log buffers. */
+		fil_flush(SRV_LOG_SPACE_FIRST_ID);
+
+		ut_ad(flushed_lsn == log_get_lsn());
+
+		/* Check if the buffer pools are clean.  If not
+		retry till it is clean. */
+		pending_io = buf_pool_check_no_pending_io();
+
+		if (pending_io > 0) {
+			count++;
+			/* Print a message every 60 seconds if we
+			are waiting to clean the buffer pools */
+			if (srv_print_verbose_log && count > 600) {
+				ib::info() << "Waiting for "
+					<< pending_io << " buffer "
+					<< "page I/Os to complete";
+				count = 0;
+			}
+		}
+		os_thread_sleep(100000);
+
+	} while (buf_pool_check_no_pending_io());
+
+	return(flushed_lsn);
+}
+
 /********************************************************************
 Starts InnoDB and creates a new database if database files
 are not found and the user wants.
-@return	DB_SUCCESS or error code */
-UNIV_INTERN
+@return DB_SUCCESS or error code */
 dberr_t
 innobase_start_or_create_for_mysql(void)
 /*====================================*/
 {
-	ibool		create_new_db;
-	lsn_t		min_flushed_lsn;
-	lsn_t		max_flushed_lsn;
-#ifdef UNIV_LOG_ARCHIVE
-	ulint		min_arch_log_no;
-	ulint		max_arch_log_no;
-#endif /* UNIV_LOG_ARCHIVE */
-	ulint		sum_of_new_sizes;
+	bool		create_new_db = false;
+	lsn_t		flushed_lsn;
 	ulint		sum_of_data_file_sizes;
 	ulint		tablespace_size_in_header;
 	dberr_t		err;
-	unsigned	i;
 	ulint		srv_n_log_files_found = srv_n_log_files;
-	ulint		io_limit;
 	mtr_t		mtr;
-	ib_bh_t*	ib_bh;
-	ulint		n_recovered_trx;
+	purge_pq_t*	purge_queue;
 	char		logfilename[10000];
 	char*		logfile0	= NULL;
 	size_t		dirnamelen;
-	bool		sys_datafiles_created = false;
+	unsigned	i = 0;
 
-	/* This should be initialized early */
-	ut_init_timer();
+	/* Reset the start state. */
+	srv_start_state = SRV_START_STATE_NONE;
 
 	high_level_read_only = srv_read_only_mode
 		|| srv_force_recovery > SRV_FORCE_NO_TRX_UNDO;
 
 	if (srv_read_only_mode) {
-		ib_logf(IB_LOG_LEVEL_INFO, "Started in read only mode");
+		ib::info() << "Started in read only mode";
+
+		/* There is no write except to intrinsic table and so turn-off
+		doublewrite mechanism completely. */
+		srv_use_doublewrite_buf = FALSE;
 	}
 
-#ifdef HAVE_DARWIN_THREADS
-# ifdef F_FULLFSYNC
-	/* This executable has been compiled on Mac OS X 10.3 or later.
-	Assume that F_FULLFSYNC is available at run-time. */
-	srv_have_fullfsync = TRUE;
-# else /* F_FULLFSYNC */
-	/* This executable has been compiled on Mac OS X 10.2
-	or earlier.  Determine if the executable is running
-	on Mac OS X 10.3 or later. */
-	struct utsname utsname;
-	if (uname(&utsname)) {
-		ut_print_timestamp(stderr);
-		fputs(" InnoDB: cannot determine Mac OS X version!\n", stderr);
+#ifdef HAVE_LZO1X
+	if (lzo_init() != LZO_E_OK) {
+		ib::warn() << "lzo_init() failed, support disabled";
+		srv_lzo_disabled = true;
 	} else {
-		srv_have_fullfsync = strcmp(utsname.release, "7.") >= 0;
+		ib::info() << "LZO1X support available";
+		srv_lzo_disabled = false;
 	}
-	if (!srv_have_fullfsync) {
-		ut_print_timestamp(stderr);
-		fputs(" InnoDB: On Mac OS X, fsync() may be "
-		      "broken on internal drives,\n", stderr);
-		ut_print_timestamp(stderr);
-		fputs(" InnoDB: making transactions unsafe!\n", stderr);
-	}
-# endif /* F_FULLFSYNC */
-#endif /* HAVE_DARWIN_THREADS */
-
-	ib_logf(IB_LOG_LEVEL_INFO,
-		"Using %s to ref count buffer pool pages",
-#ifdef PAGE_ATOMIC_REF_COUNT
-		"atomics"
-#else
-		"mutexes"
-#endif /* PAGE_ATOMIC_REF_COUNT */
-	);
+#endif /* HAVE_LZO1X */
 
+#ifdef UNIV_LINUX
+# ifdef HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE
+	ib::info() << "PUNCH HOLE support available";
+# else
+	ib::info() << "PUNCH HOLE support not available";
+# endif /* HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE */
+#endif /* UNIV_LINUX */
 
 	if (sizeof(ulint) != sizeof(void*)) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: Error: size of InnoDB's ulint is %lu, "
-			"but size of void*\n", (ulong) sizeof(ulint));
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: is %lu. The sizes should be the same "
-			"so that on a 64-bit\n",
-			(ulong) sizeof(void*));
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: platforms you can allocate more than 4 GB "
-			"of memory.\n");
+		ib::error() << "Size of InnoDB's ulint is " << sizeof(ulint)
+			<< ", but size of void* is " << sizeof(void*)
+			<< ". The sizes should be the same so that on"
+			" a 64-bit platforms you can allocate more than 4 GB"
+			" of memory.";
 	}
 
 #ifdef UNIV_DEBUG
-	ib_logf(IB_LOG_LEVEL_INFO,
-		" InnoDB: !!!!!!!! UNIV_DEBUG switched on !!!!!!!!!");
+	ib::info() << "!!!!!!!! UNIV_DEBUG switched on !!!!!!!!!";
 #endif
 
 #ifdef UNIV_IBUF_DEBUG
-	ib_logf(IB_LOG_LEVEL_INFO,
-		" InnoDB: !!!!!!!! UNIV_IBUF_DEBUG switched on !!!!!!!!!");
+	ib::info() << "!!!!!!!! UNIV_IBUF_DEBUG switched on !!!!!!!!!";
 # ifdef UNIV_IBUF_COUNT_DEBUG
-	ib_logf(IB_LOG_LEVEL_INFO,
-		" InnoDB: !!!!!!!! UNIV_IBUF_COUNT_DEBUG switched on "
-		"!!!!!!!!!");
-	ib_logf(IB_LOG_LEVEL_INFO,
-		" InnoDB: Crash recovery will fail with UNIV_IBUF_COUNT_DEBUG");
+	ib::info() << "!!!!!!!! UNIV_IBUF_COUNT_DEBUG switched on !!!!!!!!!";
+	ib::error() << "Crash recovery will fail with UNIV_IBUF_COUNT_DEBUG";
 # endif
 #endif
 
-#ifdef UNIV_BLOB_DEBUG
-	ib_logf(IB_LOG_LEVEL_INFO,
-		"InnoDB: !!!!!!!! UNIV_BLOB_DEBUG switched on !!!!!!!!!\n"
-		"InnoDB: Server restart may fail with UNIV_BLOB_DEBUG");
-#endif /* UNIV_BLOB_DEBUG */
-
-#ifdef UNIV_SYNC_DEBUG
-	ib_logf(IB_LOG_LEVEL_INFO,
-		" InnoDB: !!!!!!!! UNIV_SYNC_DEBUG switched on !!!!!!!!!");
-#endif
-
-#ifdef UNIV_SEARCH_DEBUG
-	ib_logf(IB_LOG_LEVEL_INFO,
-		" InnoDB: !!!!!!!! UNIV_SEARCH_DEBUG switched on !!!!!!!!!");
-#endif
-
 #ifdef UNIV_LOG_LSN_DEBUG
-	ib_logf(IB_LOG_LEVEL_INFO,
-		" InnoDB: !!!!!!!! UNIV_LOG_LSN_DEBUG switched on !!!!!!!!!");
+	ib::info() << "!!!!!!!! UNIV_LOG_LSN_DEBUG switched on !!!!!!!!!";
 #endif /* UNIV_LOG_LSN_DEBUG */
-#ifdef UNIV_MEM_DEBUG
-	ib_logf(IB_LOG_LEVEL_INFO,
-		" InnoDB: !!!!!!!! UNIV_MEM_DEBUG switched on !!!!!!!!!");
-#endif
-
-	if (srv_use_sys_malloc) {
-		ib_logf(IB_LOG_LEVEL_INFO,
-			"The InnoDB memory heap is disabled");
-	}
 
 #if defined(COMPILER_HINTS_ENABLED)
-	ib_logf(IB_LOG_LEVEL_INFO,
-		" InnoDB: Compiler hints enabled.");
+	ib::info() << "Compiler hints enabled.";
 #endif /* defined(COMPILER_HINTS_ENABLED) */
 
-	ib_logf(IB_LOG_LEVEL_INFO,
-		"" IB_ATOMICS_STARTUP_MSG "");
-
-	ib_logf(IB_LOG_LEVEL_INFO,
-		"" IB_MEMORY_BARRIER_STARTUP_MSG "");
+	ib::info() << IB_ATOMICS_STARTUP_MSG;
+	ib::info() << MUTEX_TYPE;
+	ib::info() << IB_MEMORY_BARRIER_STARTUP_MSG;
 
 #ifndef HAVE_MEMORY_BARRIER
-#if defined __i386__ || defined __x86_64__ || defined _M_IX86 || defined _M_X64 || defined __WIN__
+#if defined __i386__ || defined __x86_64__ || defined _M_IX86 || defined _M_X64 || defined _WIN32
 #else
-	ib_logf(IB_LOG_LEVEL_WARN,
-		"MySQL was built without a memory barrier capability on this"
-		" architecture, which might allow a mutex/rw_lock violation"
-		" under high thread concurrency. This may cause a hang.");
+	ib::warn() << "MySQL was built without a memory barrier capability on"
+		" this architecture, which might allow a mutex/rw_lock"
+		" violation under high thread concurrency. This may cause a"
+		" hang.";
 #endif /* IA32 or AMD64 */
 #endif /* HAVE_MEMORY_BARRIER */
 
-	ib_logf(IB_LOG_LEVEL_INFO,
-		"Compressed tables use zlib " ZLIB_VERSION
+	ib::info() << "Compressed tables use zlib " ZLIB_VERSION
 #ifdef UNIV_ZIP_DEBUG
 	      " with validation"
 #endif /* UNIV_ZIP_DEBUG */
-	      );
+	      ;
 #ifdef UNIV_ZIP_COPY
-	ib_logf(IB_LOG_LEVEL_INFO, "and extra copying");
+	ib::info() << "and extra copying";
 #endif /* UNIV_ZIP_COPY */
 
-
 	/* Since InnoDB does not currently clean up all its internal data
 	structures in MySQL Embedded Server Library server_end(), we
 	print an error message if someone tries to start up InnoDB a
 	second time during the process lifetime. */
 
 	if (srv_start_has_been_called) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr, " InnoDB: Error: startup called second time "
-			"during the process\n");
-		ut_print_timestamp(stderr);
-		fprintf(stderr, " InnoDB: lifetime. In the MySQL Embedded "
-			"Server Library you\n");
-		ut_print_timestamp(stderr);
-		fprintf(stderr, " InnoDB: cannot call server_init() more "
-			"than once during the\n");
-		ut_print_timestamp(stderr);
-		fprintf(stderr, " InnoDB: process lifetime.\n");
+		ib::error() << "Startup called second time"
+			" during the process lifetime."
+			" In the MySQL Embedded Server Library"
+			" you cannot call server_init() more than"
+			" once during the process lifetime.";
 	}
 
 	srv_start_has_been_called = TRUE;
 
-#ifdef UNIV_DEBUG
-	log_do_write = TRUE;
-#endif /* UNIV_DEBUG */
-	/*	yydebug = TRUE; */
+	srv_is_being_started = true;
 
-	srv_is_being_started = TRUE;
-	srv_startup_is_before_trx_rollback_phase = TRUE;
-
-#ifdef __WIN__
-	switch (os_get_os_version()) {
-	case OS_WIN95:
-	case OS_WIN31:
-	case OS_WINNT:
-		/* On Win 95, 98, ME, Win32 subsystem for Windows 3.1,
-		and NT use simulated aio. In NT Windows provides async i/o,
-		but when run in conjunction with InnoDB Hot Backup, it seemed
-		to corrupt the data files. */
-
-		srv_use_native_aio = FALSE;
-		break;
-
-	case OS_WIN2000:
-	case OS_WINXP:
-		/* On 2000 and XP, async IO is available. */
-		srv_use_native_aio = TRUE;
-		break;
-
-	default:
-		/* Vista and later have both async IO and condition variables */
-		srv_use_native_aio = TRUE;
-		srv_use_native_conditions = TRUE;
-		break;
-	}
+#ifdef _WIN32
+	srv_use_native_aio = TRUE;
 
 #elif defined(LINUX_NATIVE_AIO)
 
 	if (srv_use_native_aio) {
-		ib_logf(IB_LOG_LEVEL_INFO, "Using Linux native AIO");
+		ib::info() << "Using Linux native AIO";
 	}
 #else
 	/* Currently native AIO is supported only on windows and linux
 	and that also when the support is compiled in. In all other
 	cases, we ignore the setting of innodb_use_native_aio. */
 	srv_use_native_aio = FALSE;
-#endif /* __WIN__ */
+#endif /* _WIN32 */
+
+	/* Register performance schema stages before any real work has been
+	started which may need to be instrumented. */
+	mysql_stage_register("innodb", srv_stages, UT_ARR_SIZE(srv_stages));
 
 	if (srv_file_flush_method_str == NULL) {
 		/* These are the default options */
-
+#ifndef _WIN32
 		srv_unix_file_flush_method = SRV_UNIX_FSYNC;
-
-		srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
-#ifndef __WIN__
 	} else if (0 == ut_strcmp(srv_file_flush_method_str, "fsync")) {
 		srv_unix_file_flush_method = SRV_UNIX_FSYNC;
 
@@ -1819,6 +1593,7 @@ innobase_start_or_create_for_mysql(void)
 	} else if (0 == ut_strcmp(srv_file_flush_method_str, "nosync")) {
 		srv_unix_file_flush_method = SRV_UNIX_NOSYNC;
 #else
+		srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
 	} else if (0 == ut_strcmp(srv_file_flush_method_str, "normal")) {
 		srv_win_file_flush_method = SRV_WIN_IO_NORMAL;
 		srv_use_native_aio = FALSE;
@@ -1830,12 +1605,12 @@ innobase_start_or_create_for_mysql(void)
 	} else if (0 == ut_strcmp(srv_file_flush_method_str,
 				  "async_unbuffered")) {
 		srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
-#endif /* __WIN__ */
+#endif /* _WIN32 */
 	} else {
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Unrecognized value %s for innodb_flush_method",
-			srv_file_flush_method_str);
-		return(DB_ERROR);
+		ib::error() << "Unrecognized value "
+			<< srv_file_flush_method_str
+			<< " for innodb_flush_method";
+		return(srv_init_abort(DB_ERROR));
 	}
 
 	/* Note that the call srv_boot() also changes the values of
@@ -1846,7 +1621,6 @@ innobase_start_or_create_for_mysql(void)
 	maximum number of threads that can wait in the 'srv_conc array' for
 	their time to enter InnoDB. */
 
-#define BUF_POOL_SIZE_THRESHOLD (1024 * 1024 * 1024)
 	srv_max_n_threads = 1   /* io_ibuf_thread */
 			    + 1 /* io_log_thread */
 			    + 1 /* lock_wait_timeout_thread */
@@ -1858,7 +1632,6 @@ innobase_start_or_create_for_mysql(void)
 			    + 1 /* dict_stats_thread */
 			    + 1 /* fts_optimize_thread */
 			    + 1 /* recv_writer_thread */
-			    + 1 /* buf_flush_page_cleaner_thread */
 			    + 1 /* trx_rollback_or_clean_all_recovered */
 			    + 128 /* added as margin, for use of
 				  InnoDB Memcached etc. */
@@ -1866,127 +1639,155 @@ innobase_start_or_create_for_mysql(void)
 			    + srv_n_read_io_threads
 			    + srv_n_write_io_threads
 			    + srv_n_purge_threads
+			    + srv_n_page_cleaners
 			    /* FTS Parallel Sort */
 			    + fts_sort_pll_degree * FTS_NUM_AUX_INDEX
 			      * max_connections;
 
-	if (srv_buf_pool_size < BUF_POOL_SIZE_THRESHOLD) {
-		/* If buffer pool is less than 1 GB,
-		use only one buffer pool instance */
+	if (srv_buf_pool_size >= BUF_POOL_SIZE_THRESHOLD) {
+
+		if (srv_buf_pool_instances == srv_buf_pool_instances_default) {
+#if defined(_WIN32) && !defined(_WIN64)
+			/* Do not allocate too large of a buffer pool on
+			Windows 32-bit systems, which can have trouble
+			allocating larger single contiguous memory blocks. */
+			srv_buf_pool_instances = ut_min(
+				static_cast<ulong>(MAX_BUFFER_POOLS),
+				static_cast<ulong>(srv_buf_pool_size
+						   / (128 * 1024 * 1024)));
+#else /* defined(_WIN32) && !defined(_WIN64) */
+			/* Default to 8 instances when size > 1GB. */
+			srv_buf_pool_instances = 8;
+#endif /* defined(_WIN32) && !defined(_WIN64) */
+		}
+	} else {
+		/* If buffer pool is less than 1 GiB, assume fewer
+		threads. Also use only one buffer pool instance. */
+		if (srv_buf_pool_instances != srv_buf_pool_instances_default
+		    && srv_buf_pool_instances != 1) {
+			/* We can't distinguish whether the user has explicitly
+			started mysqld with --innodb-buffer-pool-instances=0,
+			(srv_buf_pool_instances_default is 0) or has not
+			specified that option at all. Thus we have the
+			limitation that if the user started with =0, we
+			will not emit a warning here, but we should actually
+			do so. */
+			ib::info()
+				<< "Adjusting innodb_buffer_pool_instances"
+				" from " << srv_buf_pool_instances << " to 1"
+				" since innodb_buffer_pool_size is less than "
+				<< BUF_POOL_SIZE_THRESHOLD / (1024 * 1024)
+				<< " MiB";
+		}
+
 		srv_buf_pool_instances = 1;
 	}
 
+	if (srv_buf_pool_chunk_unit * srv_buf_pool_instances
+	    > srv_buf_pool_size) {
+		/* Size unit of buffer pool is larger than srv_buf_pool_size.
+		adjust srv_buf_pool_chunk_unit for srv_buf_pool_size. */
+		srv_buf_pool_chunk_unit
+			= static_cast<ulong>(srv_buf_pool_size)
+			  / srv_buf_pool_instances;
+		if (srv_buf_pool_size % srv_buf_pool_instances != 0) {
+			++srv_buf_pool_chunk_unit;
+		}
+	}
+
+	srv_buf_pool_size = buf_pool_size_align(srv_buf_pool_size);
+
+	if (srv_n_page_cleaners > srv_buf_pool_instances) {
+		/* limit of page_cleaner parallelizability
+		is number of buffer pool instances. */
+		srv_n_page_cleaners = srv_buf_pool_instances;
+	}
+
 	srv_boot();
 
 	if (ut_crc32_sse2_enabled) {
-		ib_logf(IB_LOG_LEVEL_INFO, "Using SSE crc32 instructions");
+		ib::info() << "Using SSE crc32 instructions";
 	} else if (ut_crc32_power8_enabled) {
-		ib_logf(IB_LOG_LEVEL_INFO, "Using POWER8 crc32 instructions");
+		ib::info() << "Using POWER8 crc32 instructions";
 	} else {
-		ib_logf(IB_LOG_LEVEL_INFO, "Using generic crc32 instructions");
+		ib::info() << "Using generic crc32 instructions";
 	}
 
 	if (!srv_read_only_mode) {
 
-		mutex_create(srv_monitor_file_mutex_key,
-			     &srv_monitor_file_mutex, SYNC_NO_ORDER_CHECK);
+		mutex_create(LATCH_ID_SRV_MONITOR_FILE,
+			     &srv_monitor_file_mutex);
 
 		if (srv_innodb_status) {
 
 			srv_monitor_file_name = static_cast<char*>(
-				mem_alloc(
+				ut_malloc_nokey(
 					strlen(fil_path_to_mysql_datadir)
 					+ 20 + sizeof "/innodb_status."));
 
-			sprintf(srv_monitor_file_name, "%s/innodb_status.%lu",
+			sprintf(srv_monitor_file_name,
+				"%s/innodb_status." ULINTPF,
 				fil_path_to_mysql_datadir,
 				os_proc_get_number());
 
 			srv_monitor_file = fopen(srv_monitor_file_name, "w+");
 
 			if (!srv_monitor_file) {
-
-				ib_logf(IB_LOG_LEVEL_ERROR,
-					"Unable to create %s: %s",
-					srv_monitor_file_name,
-					strerror(errno));
-
-				return(DB_ERROR);
+				ib::error() << "Unable to create "
+					<< srv_monitor_file_name << ": "
+					<< strerror(errno);
+				return(srv_init_abort(DB_ERROR));
 			}
 		} else {
+
 			srv_monitor_file_name = NULL;
 			srv_monitor_file = os_file_create_tmpfile(NULL);
 
 			if (!srv_monitor_file) {
-				return(DB_ERROR);
+				return(srv_init_abort(DB_ERROR));
 			}
 		}
 
-		mutex_create(srv_dict_tmpfile_mutex_key,
-			     &srv_dict_tmpfile_mutex, SYNC_DICT_OPERATION);
+		mutex_create(LATCH_ID_SRV_DICT_TMPFILE,
+			     &srv_dict_tmpfile_mutex);
 
 		srv_dict_tmpfile = os_file_create_tmpfile(NULL);
 
 		if (!srv_dict_tmpfile) {
-			return(DB_ERROR);
+			return(srv_init_abort(DB_ERROR));
 		}
 
-		mutex_create(srv_misc_tmpfile_mutex_key,
-			     &srv_misc_tmpfile_mutex, SYNC_ANY_LATCH);
+		mutex_create(LATCH_ID_SRV_MISC_TMPFILE,
+			     &srv_misc_tmpfile_mutex);
 
 		srv_misc_tmpfile = os_file_create_tmpfile(NULL);
 
 		if (!srv_misc_tmpfile) {
-			return(DB_ERROR);
+			return(srv_init_abort(DB_ERROR));
 		}
 	}
 
-	/* If user has set the value of innodb_file_io_threads then
-	we'll emit a message telling the user that this parameter
-	is now deprecated. */
-	if (srv_n_file_io_threads != 4) {
-		ib_logf(IB_LOG_LEVEL_WARN,
-			"innodb_file_io_threads is deprecated. Please use "
-			"innodb_read_io_threads and innodb_write_io_threads "
-			"instead");
-	}
-
-	/* Now overwrite the value on srv_n_file_io_threads */
 	srv_n_file_io_threads = srv_n_read_io_threads;
 
+	srv_n_file_io_threads += srv_n_write_io_threads;
+
 	if (!srv_read_only_mode) {
 		/* Add the log and ibuf IO threads. */
 		srv_n_file_io_threads += 2;
-		srv_n_file_io_threads += srv_n_write_io_threads;
 	} else {
-		ib_logf(IB_LOG_LEVEL_INFO,
-			"Disabling background IO write threads.");
-
-		srv_n_write_io_threads = 0;
+		ib::info() << "Disabling background log and ibuf IO write"
+			<< " threads.";
 	}
 
 	ut_a(srv_n_file_io_threads <= SRV_MAX_N_IO_THREADS);
 
-	io_limit = 8 * SRV_N_PENDING_IOS_PER_THREAD;
-
-	/* On Windows when using native aio the number of aio requests
-	that a thread can handle at a given time is limited to 32
-	i.e.: SRV_N_PENDING_IOS_PER_THREAD */
-# ifdef __WIN__
-	if (srv_use_native_aio) {
-		io_limit = SRV_N_PENDING_IOS_PER_THREAD;
-	}
-# endif /* __WIN__ */
-
-	if (!os_aio_init(io_limit,
-			 srv_n_read_io_threads,
+	if (!os_aio_init(srv_n_read_io_threads,
 			 srv_n_write_io_threads,
 			 SRV_MAX_N_PENDING_SYNC_IOS)) {
 
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Fatal : Cannot initialize AIO sub-system");
+		ib::error() << "Cannot initialize AIO sub-system";
 
-		return(DB_ERROR);
+		return(srv_init_abort(DB_ERROR));
 	}
 
 	fil_init(srv_file_per_table ? 50000 : 5000, srv_max_n_open_files);
@@ -2002,21 +1803,30 @@ innobase_start_or_create_for_mysql(void)
 		unit = 'M';
 	}
 
-	/* Print time to initialize the buffer pool */
-	ib_logf(IB_LOG_LEVEL_INFO,
-		"Initializing buffer pool, size = %.1f%c", size, unit);
+	double	chunk_size;
+	char	chunk_unit;
+
+	if (srv_buf_pool_chunk_unit >= 1024 * 1024 * 1024) {
+		chunk_size = srv_buf_pool_chunk_unit / 1024.0 / 1024 / 1024;
+		chunk_unit = 'G';
+	} else {
+		chunk_size = srv_buf_pool_chunk_unit / 1024.0 / 1024;
+		chunk_unit = 'M';
+	}
+
+	ib::info() << "Initializing buffer pool, total size = "
+		<< size << unit << ", instances = " << srv_buf_pool_instances
+		<< ", chunk size = " << chunk_size << chunk_unit;
 
 	err = buf_pool_init(srv_buf_pool_size, srv_buf_pool_instances);
 
 	if (err != DB_SUCCESS) {
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Cannot allocate memory for the buffer pool");
+		ib::error() << "Cannot allocate memory for the buffer pool";
 
-		return(DB_ERROR);
+		return(srv_init_abort(DB_ERROR));
 	}
 
-	ib_logf(IB_LOG_LEVEL_INFO,
-		"Completed initialization of buffer pool");
+	ib::info() << "Completed initialization of buffer pool";
 
 #ifdef UNIV_DEBUG
 	/* We have observed deadlocks with a 5MB buffer pool but
@@ -2024,39 +1834,51 @@ innobase_start_or_create_for_mysql(void)
 
 	if (srv_buf_pool_size <= 5 * 1024 * 1024) {
 
-		ib_logf(IB_LOG_LEVEL_INFO,
-			"Small buffer pool size (%luM), the flst_validate() "
-			"debug function can cause a deadlock if the "
-			"buffer pool fills up.",
-			srv_buf_pool_size / 1024 / 1024);
+		ib::info() << "Small buffer pool size ("
+			<< srv_buf_pool_size / 1024 / 1024
+			<< "M), the flst_validate() debug function can cause a"
+			<< " deadlock if the buffer pool fills up.";
 	}
 #endif /* UNIV_DEBUG */
 
 	fsp_init();
 	log_init();
 
+	recv_sys_create();
+	recv_sys_init(buf_pool_get_curr_size());
 	lock_sys_create(srv_lock_table_size);
+	srv_start_state_set(SRV_START_STATE_LOCK_SYS);
 
 	/* Create i/o-handler threads: */
 
-	for (i = 0; i < srv_n_file_io_threads; ++i) {
+	for (ulint t = 0; t < srv_n_file_io_threads; ++t) {
 
-		n[i] = i;
+		n[t] = t;
 
-		thread_handles[i] = os_thread_create(io_handler_thread, n + i, thread_ids + i);
-		thread_started[i] = true;
+		thread_handles[t] = os_thread_create(io_handler_thread, n + t, thread_ids + t);
+		thread_started[t] = true;
 	}
 
-#ifdef UNIV_LOG_ARCHIVE
-	if (0 != ut_strcmp(srv_log_group_home_dir, srv_arch_dir)) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr, " InnoDB: Error: you must set the log group home dir in my.cnf\n");
-		ut_print_timestamp(stderr);
-		fprintf(stderr, " InnoDB: the same as log arch dir.\n");
+	/* Even in read-only mode there could be flush job generated by
+	intrinsic table operations. */
+	buf_flush_page_cleaner_init();
 
-		return(DB_ERROR);
+	os_thread_create(buf_flush_page_cleaner_coordinator,
+			 NULL, NULL);
+
+	buf_flush_page_cleaner_thread_started = true;
+
+	for (i = 1; i < srv_n_page_cleaners; ++i) {
+		os_thread_create(buf_flush_page_cleaner_worker,
+				 NULL, NULL);
 	}
-#endif /* UNIV_LOG_ARCHIVE */
+
+	/* Make sure page cleaner is active. */
+	while (!buf_page_cleaner_is_active) {
+		os_thread_sleep(10000);
+	}
+
+	srv_start_state_set(SRV_START_STATE_IO);
 
 	if (srv_n_log_files * srv_log_file_size * UNIV_PAGE_SIZE
 	    >= 512ULL * 1024ULL * 1024ULL * 1024ULL) {
@@ -2065,10 +1887,9 @@ innobase_start_or_create_for_mysql(void)
 		bytes, then we have a limit of 512 GB. If that limit is to
 		be raised, then log_block_convert_lsn_to_no() must be
 		modified. */
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Combined size of log files must be < 512 GB");
+		ib::error() << "Combined size of log files must be < 512 GB";
 
-		return(DB_ERROR);
+		return(srv_init_abort(DB_ERROR));
 	}
 
 	if (srv_n_log_files * srv_log_file_size >= ULINT_MAX) {
@@ -2078,104 +1899,82 @@ innobase_start_or_create_for_mysql(void)
 		So next_offset must be < ULINT_MAX * UNIV_PAGE_SIZE. This
 		means that we are limited to ULINT_MAX * UNIV_PAGE_SIZE which
 		is 64 TB on 32 bit systems. */
-		fprintf(stderr,
-			" InnoDB: Error: combined size of log files"
-			" must be < %lu GB\n",
-			ULINT_MAX / 1073741824 * UNIV_PAGE_SIZE);
+		ib::error() << "Combined size of log files must be < "
+			<< ULINT_MAX / 1073741824 * UNIV_PAGE_SIZE << " GB";
 
-		return(DB_ERROR);
+		return(srv_init_abort(DB_ERROR));
 	}
 
-	sum_of_new_sizes = 0;
+	os_normalize_path(srv_data_home);
 
-	for (i = 0; i < srv_n_data_files; i++) {
-#ifndef __WIN__
-		if (sizeof(off_t) < 5
-		    && srv_data_file_sizes[i]
-		    >= (ulint) (1 << (32 - UNIV_PAGE_SIZE_SHIFT))) {
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				" InnoDB: Error: file size must be < 4 GB"
-				" with this MySQL binary\n");
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				" InnoDB: and operating system combination,"
-				" in some OS's < 2 GB\n");
+	/* Check if the data files exist or not. */
+	err = srv_sys_space.check_file_spec(
+		&create_new_db, MIN_EXPECTED_TABLESPACE_SIZE);
 
-			return(DB_ERROR);
+	if (err != DB_SUCCESS) {
+		return(srv_init_abort(DB_ERROR));
+	}
+
+	srv_startup_is_before_trx_rollback_phase = !create_new_db;
+
+	/* Check if undo tablespaces and redo log files exist before creating
+	a new system tablespace */
+	if (create_new_db) {
+		err = srv_check_undo_redo_logs_exists();
+		if (err != DB_SUCCESS) {
+			return(srv_init_abort(DB_ERROR));
 		}
-#endif
-		sum_of_new_sizes += srv_data_file_sizes[i];
+		recv_sys_debug_free();
 	}
 
-	if (sum_of_new_sizes < 10485760 / UNIV_PAGE_SIZE) {
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Tablespace size must be at least 10 MB");
+	/* Open or create the data files. */
+	ulint	sum_of_new_sizes;
 
-		return(DB_ERROR);
+	err = srv_sys_space.open_or_create(
+		false, create_new_db, &sum_of_new_sizes, &flushed_lsn);
+
+	switch (err) {
+	case DB_SUCCESS:
+		break;
+	case DB_CANNOT_OPEN_FILE:
+		ib::error()
+			<< "Could not open or create the system tablespace. If"
+			" you tried to add new data files to the system"
+			" tablespace, and it failed here, you should now"
+			" edit innodb_data_file_path in my.cnf back to what"
+			" it was, and remove the new ibdata files InnoDB"
+			" created in this failed attempt. InnoDB only wrote"
+			" those files full of zeros, but did not yet use"
+			" them in any way. But be careful: do not remove"
+			" old data files which contain your precious data!";
+		/* fall through */
+	default:
+		/* Other errors might come from Datafile::validate_first_page() */
+		return(srv_init_abort(err));
 	}
 
-	recv_sys_create();
-	recv_sys_init(buf_pool_get_curr_size());
-
-	err = open_or_create_data_files(&create_new_db,
-#ifdef UNIV_LOG_ARCHIVE
-					&min_arch_log_no, &max_arch_log_no,
-#endif /* UNIV_LOG_ARCHIVE */
-					&min_flushed_lsn, &max_flushed_lsn,
-					&sum_of_new_sizes);
-	if (err == DB_FAIL) {
-
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"The system tablespace must be writable!");
-
-		return(DB_ERROR);
-
-	} else if (err != DB_SUCCESS) {
-
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Could not open or create the system tablespace. If "
-			"you tried to add new data files to the system "
-			"tablespace, and it failed here, you should now "
-			"edit innodb_data_file_path in my.cnf back to what "
-			"it was, and remove the new ibdata files InnoDB "
-			"created in this failed attempt. InnoDB only wrote "
-			"those files full of zeros, but did not yet use "
-			"them in any way. But be careful: do not remove "
-			"old data files which contain your precious data!");
-
-		return(err);
-	}
-
-#ifdef UNIV_LOG_ARCHIVE
-	srv_normalize_path_for_win(srv_arch_dir);
-	srv_arch_dir = srv_add_path_separator_if_needed(srv_arch_dir);
-#endif /* UNIV_LOG_ARCHIVE */
-
 	dirnamelen = strlen(srv_log_group_home_dir);
 	ut_a(dirnamelen < (sizeof logfilename) - 10 - sizeof "ib_logfile");
 	memcpy(logfilename, srv_log_group_home_dir, dirnamelen);
 
 	/* Add a path separator if needed. */
-	if (dirnamelen && logfilename[dirnamelen - 1] != SRV_PATH_SEPARATOR) {
-		logfilename[dirnamelen++] = SRV_PATH_SEPARATOR;
+	if (dirnamelen && logfilename[dirnamelen - 1] != OS_PATH_SEPARATOR) {
+		logfilename[dirnamelen++] = OS_PATH_SEPARATOR;
 	}
 
 	srv_log_file_size_requested = srv_log_file_size;
 
 	if (create_new_db) {
-		bool success = buf_flush_list(ULINT_MAX, LSN_MAX, NULL);
-		ut_a(success);
 
-		min_flushed_lsn = max_flushed_lsn = log_get_lsn();
+		buf_flush_sync_all_buf_pools();
 
-		buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
+		flushed_lsn = log_get_lsn();
 
-		err = create_log_files(create_new_db, logfilename, dirnamelen,
-				       max_flushed_lsn, logfile0);
+		err = create_log_files(
+			logfilename, dirnamelen, flushed_lsn, logfile0);
 
 		if (err != DB_SUCCESS) {
-			return(err);
+			return(srv_init_abort(err));
 		}
 	} else {
 		for (i = 0; i < SRV_N_LOG_FILES_MAX; i++) {
@@ -2186,58 +1985,47 @@ innobase_start_or_create_for_mysql(void)
 				"ib_logfile%u", i);
 
 			err = os_file_get_status(
-				logfilename, &stat_info, false);
+				logfilename, &stat_info, false,
+				srv_read_only_mode);
 
 			if (err == DB_NOT_FOUND) {
 				if (i == 0) {
-					if (max_flushed_lsn
-					    != min_flushed_lsn) {
-						ib_logf(IB_LOG_LEVEL_ERROR,
-							"Cannot create"
-							" log files because"
-							" data files are"
-							" corrupt or"
-							" not in sync"
-							" with each other");
-						return(DB_ERROR);
-					}
-
-					if (max_flushed_lsn < (lsn_t) 1000) {
-						ib_logf(IB_LOG_LEVEL_ERROR,
-							"Cannot create"
+					if (flushed_lsn
+					    < static_cast<lsn_t>(1000)) {
+						ib::error()
+							<< "Cannot create"
 							" log files because"
 							" data files are"
 							" corrupt or the"
 							" database was not"
 							" shut down cleanly"
 							" after creating"
-							" the data files.");
-						return(DB_ERROR);
+							" the data files.";
+						return(srv_init_abort(
+							DB_ERROR));
 					}
 
 					err = create_log_files(
-						create_new_db, logfilename,
-						dirnamelen, max_flushed_lsn,
-						logfile0);
+						logfilename, dirnamelen,
+						flushed_lsn, logfile0);
 
 					if (err != DB_SUCCESS) {
-						return(err);
+						return(srv_init_abort(err));
 					}
 
 					create_log_files_rename(
 						logfilename, dirnamelen,
-						max_flushed_lsn, logfile0);
+						flushed_lsn, logfile0);
 
 					/* Suppress the message about
 					crash recovery. */
-					max_flushed_lsn = min_flushed_lsn
-						= log_get_lsn();
+					flushed_lsn = log_get_lsn();
 					goto files_checked;
 				} else if (i < 2) {
 					/* must have at least 2 log files */
-					ib_logf(IB_LOG_LEVEL_ERROR,
-						"Only one log file found.");
-					return(err);
+					ib::error() << "Only one log file"
+						" found.";
+					return(srv_init_abort(err));
 				}
 
 				/* opened all files */
@@ -2245,24 +2033,23 @@ innobase_start_or_create_for_mysql(void)
 			}
 
 			if (!srv_file_check_mode(logfilename)) {
-				return(DB_ERROR);
+				return(srv_init_abort(DB_ERROR));
 			}
 
 			err = open_log_file(&files[i], logfilename, &size);
 
 			if (err != DB_SUCCESS) {
-				return(err);
+				return(srv_init_abort(err));
 			}
 
 			ut_a(size != (os_offset_t) -1);
 
 			if (size & ((1 << UNIV_PAGE_SIZE_SHIFT) - 1)) {
-				ib_logf(IB_LOG_LEVEL_ERROR,
-					"Log file %s size "
-					UINT64PF " is not a multiple of"
-					" innodb_page_size",
-					logfilename, size);
-				return(DB_ERROR);
+
+				ib::error() << "Log file " << logfilename
+					<< " size " << size << " is not a"
+					" multiple of innodb_page_size";
+				return(srv_init_abort(DB_ERROR));
 			}
 
 			size >>= UNIV_PAGE_SIZE_SHIFT;
@@ -2270,16 +2057,15 @@ innobase_start_or_create_for_mysql(void)
 			if (i == 0) {
 				srv_log_file_size = size;
 			} else if (size != srv_log_file_size) {
-				ib_logf(IB_LOG_LEVEL_ERROR,
-					"Log file %s is"
-					" of different size " UINT64PF " bytes"
-					" than other log"
-					" files " UINT64PF " bytes!",
-					logfilename,
-					size << UNIV_PAGE_SIZE_SHIFT,
-					(os_offset_t) srv_log_file_size
-					<< UNIV_PAGE_SIZE_SHIFT);
-				return(DB_ERROR);
+
+				ib::error() << "Log file " << logfilename
+					<< " is of different size "
+					<< (size << UNIV_PAGE_SIZE_SHIFT)
+					<< " bytes than other log files "
+					<< (srv_log_file_size
+					    << UNIV_PAGE_SIZE_SHIFT)
+					<< " bytes!";
+				return(srv_init_abort(DB_ERROR));
 			}
 		}
 
@@ -2289,13 +2075,16 @@ innobase_start_or_create_for_mysql(void)
 
 		sprintf(logfilename + dirnamelen, "ib_logfile%u", 0);
 
-		fil_space_create(logfilename,
-				 SRV_LOG_SPACE_FIRST_ID,
-				 fsp_flags_set_page_size(0, UNIV_PAGE_SIZE),
-				 FIL_LOG,
-				 NULL /* no encryption yet */);
+		/* Disable the doublewrite buffer for log files. */
+		fil_space_t*	log_space = fil_space_create(
+			"innodb_redo_log",
+			SRV_LOG_SPACE_FIRST_ID,
+			fsp_flags_set_page_size(0, univ_page_size),
+			FIL_TYPE_LOG,
+			NULL /* no encryption yet */);
 
 		ut_a(fil_validate());
+		ut_a(log_space);
 
 		/* srv_log_file_size is measured in pages; if page size is 16KB,
 		then we have a limit of 64TB on 32 bit systems */
@@ -2306,20 +2095,15 @@ innobase_start_or_create_for_mysql(void)
 
 			if (!fil_node_create(logfilename,
 					     (ulint) srv_log_file_size,
-					     SRV_LOG_SPACE_FIRST_ID, FALSE)) {
-				return(DB_ERROR);
+					     log_space, false, false)) {
+				return(srv_init_abort(DB_ERROR));
 			}
 		}
 
-#ifdef UNIV_LOG_ARCHIVE
-		/* Create the file space object for archived logs. Under
-		MySQL, no archiving ever done. */
-		fil_space_create("arch_log_space", SRV_LOG_SPACE_FIRST_ID + 1,
-				 0, FIL_LOG);
-#endif /* UNIV_LOG_ARCHIVE */
-		log_group_init(0, i, srv_log_file_size * UNIV_PAGE_SIZE,
-			       SRV_LOG_SPACE_FIRST_ID,
-			       SRV_LOG_SPACE_FIRST_ID + 1);
+		if (!log_group_init(0, i, srv_log_file_size * UNIV_PAGE_SIZE,
+				    SRV_LOG_SPACE_FIRST_ID)) {
+			return(srv_init_abort(DB_ERROR));
+		}
 	}
 
 files_checked:
@@ -2340,7 +2124,7 @@ files_checked:
 	if (err != DB_SUCCESS
 	    && srv_force_recovery < SRV_FORCE_NO_UNDO_LOG_SCAN) {
 
-		return(err);
+		return(srv_init_abort(err));
 	}
 
 	/* Initialize objects used by dict stats gathering thread, which
@@ -2351,7 +2135,7 @@ files_checked:
 
 	if (!srv_read_only_mode && srv_scrub_log) {
 		/* TODO(minliz): have/use log_scrub_thread_init() instead? */
-		log_scrub_event = os_event_create();
+		log_scrub_event = os_event_create(0);
 	}
 
 	trx_sys_file_format_init();
@@ -2359,85 +2143,52 @@ files_checked:
 	trx_sys_create();
 
 	if (create_new_db) {
+		dberr_t err = DB_SUCCESS;
 
 		ut_a(!srv_read_only_mode);
 
 		mtr_start(&mtr);
 
-		fsp_header_init(0, sum_of_new_sizes, &mtr);
+		bool ret = fsp_header_init(0, sum_of_new_sizes, &mtr);
 
 		mtr_commit(&mtr);
 
+		if (!ret) {
+			return(srv_init_abort(DB_ERROR));
+		}
+
 		/* To maintain backward compatibility we create only
 		the first rollback segment before the double write buffer.
 		All the remaining rollback segments will be created later,
 		after the double write buffer has been created. */
 		trx_sys_create_sys_pages();
 
-		ib_bh = trx_sys_init_at_db_start();
-		n_recovered_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list);
+		purge_queue = trx_sys_init_at_db_start();
 
 		/* The purge system needs to create the purge view and
 		therefore requires that the trx_sys is inited. */
 
-		trx_purge_sys_create(srv_n_purge_threads, ib_bh);
+		trx_purge_sys_create(srv_n_purge_threads, purge_queue);
 
 		err = dict_create();
 
 		if (err != DB_SUCCESS) {
-			return(err);
+			return(srv_init_abort(err));
 		}
 
-		srv_startup_is_before_trx_rollback_phase = FALSE;
+		buf_flush_sync_all_buf_pools();
 
-		bool success = buf_flush_list(ULINT_MAX, LSN_MAX, NULL);
-		ut_a(success);
+		flushed_lsn = log_get_lsn();
 
-		min_flushed_lsn = max_flushed_lsn = log_get_lsn();
-
-		buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
-
-		/* Stamp the LSN to the data files. */
-		fil_write_flushed_lsn_to_data_files(max_flushed_lsn, 0);
-
-		fil_flush_file_spaces(FIL_TABLESPACE);
-
-		create_log_files_rename(logfilename, dirnamelen,
-					max_flushed_lsn, logfile0);
-#ifdef UNIV_LOG_ARCHIVE
-	} else if (srv_archive_recovery) {
-
-		ib_logf(IB_LOG_LEVEL_INFO,
-			" Starting archive recovery from a backup...");
-
-		err = recv_recovery_from_archive_start(
-			min_flushed_lsn, srv_archive_recovery_limit_lsn,
-			min_arch_log_no);
-		if (err != DB_SUCCESS) {
-
-			return(DB_ERROR);
-		}
-		/* Since ibuf init is in dict_boot, and ibuf is needed
-		in any disk i/o, first call dict_boot */
-
-		err = dict_boot();
+		err = fil_write_flushed_lsn(flushed_lsn);
 
 		if (err != DB_SUCCESS) {
-			return(err);
+			return(srv_init_abort(err));
 		}
 
-		ib_bh = trx_sys_init_at_db_start();
-		n_recovered_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list);
+		create_log_files_rename(
+			logfilename, dirnamelen, flushed_lsn, logfile0);
 
-		/* The purge system needs to create the purge view and
-		therefore requires that the trx_sys is inited. */
-
-		trx_purge_sys_create(srv_n_purge_threads, ib_bh);
-
-		srv_startup_is_before_trx_rollback_phase = FALSE;
-
-		recv_recovery_from_archive_finish();
-#endif /* UNIV_LOG_ARCHIVE */
 	} else {
 
 		/* Check if we support the max format that is stamped
@@ -2456,7 +2207,7 @@ files_checked:
 			srv_max_file_format_at_startup);
 
 		if (err != DB_SUCCESS) {
-			return(err);
+			return(srv_init_abort(err));
 		}
 
 		/* Invalidate the buffer pool to ensure that we reread
@@ -2466,44 +2217,101 @@ files_checked:
 		and there must be no page in the buf_flush list. */
 		buf_pool_invalidate();
 
+		/* Scan and locate truncate log files. Parsed located files
+		and add table to truncate information to central vector for
+		truncate fix-up action post recovery. */
+		err = TruncateLogParser::scan_and_parse(srv_log_group_home_dir);
+		if (err != DB_SUCCESS) {
+
+			return(srv_init_abort(DB_ERROR));
+		}
+
 		/* We always try to do a recovery, even if the database had
 		been shut down normally: this is the normal startup path */
 
-		err = recv_recovery_from_checkpoint_start(
-			LOG_CHECKPOINT, LSN_MAX,
-			min_flushed_lsn, max_flushed_lsn);
+		err = recv_recovery_from_checkpoint_start(flushed_lsn);
+
+		recv_sys->dblwr.pages.clear();
+
+		if (err == DB_SUCCESS) {
+			/* Initialize the change buffer. */
+			err = dict_boot();
+		}
 
 		if (err != DB_SUCCESS) {
 
-			return(DB_ERROR);
+			/* A tablespace was not found during recovery. The
+			user must force recovery. */
+
+			if (err == DB_TABLESPACE_NOT_FOUND) {
+
+				srv_fatal_error();
+
+				ut_error;
+			}
+
+			return(srv_init_abort(DB_ERROR));
 		}
 
-		/* Since the insert buffer init is in dict_boot, and the
-		insert buffer is needed in any disk i/o, first we call
-		dict_boot(). Note that trx_sys_init_at_db_start() only needs
-		to access space 0, and the insert buffer at this stage already
-		works for space 0. */
+		purge_queue = trx_sys_init_at_db_start();
 
-		err = dict_boot();
+		if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
+			/* Apply the hashed log records to the
+			respective file pages, for the last batch of
+			recv_group_scan_log_recs(). */
 
-		if (err != DB_SUCCESS) {
-			return(err);
+			dberr_t err = recv_apply_hashed_log_recs(TRUE);
+			DBUG_PRINT("ib_log", ("apply completed"));
+
+			if (err != DB_SUCCESS) {
+				ib::warn() << "recv_apply_hashed_log_recs "
+					   << " failed with error " << err;
+			}
+
+			if (recv_needed_recovery) {
+				trx_sys_print_mysql_binlog_offset();
+			}
 		}
 
-		ib_bh = trx_sys_init_at_db_start();
-		n_recovered_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list);
+		if (recv_sys->found_corrupt_log) {
+			ib::warn()
+				<< "The log file may have been corrupt and it"
+				" is possible that the log scan or parsing"
+				" did not proceed far enough in recovery."
+				" Please run CHECK TABLE on your InnoDB tables"
+				" to check that they are ok!"
+				" It may be safest to recover your"
+				" InnoDB database from a backup!";
+		}
 
 		/* The purge system needs to create the purge view and
 		therefore requires that the trx_sys is inited. */
 
-		trx_purge_sys_create(srv_n_purge_threads, ib_bh);
+		trx_purge_sys_create(srv_n_purge_threads, purge_queue);
 
 		/* recv_recovery_from_checkpoint_finish needs trx lists which
 		are initialized in trx_sys_init_at_db_start(). */
 
 		recv_recovery_from_checkpoint_finish();
 
+		/* Fix-up truncate of tables in the system tablespace
+		if server crashed while truncate was active. The non-
+		system tables are done after tablespace discovery. Do
+		this now because this procedure assumes that no pages
+		have changed since redo recovery.  Tablespace discovery
+		can do updates to pages in the system tablespace.*/
+		err = truncate_t::fixup_tables_in_system_tablespace();
+
 		if (srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE) {
+			/* Open or Create SYS_TABLESPACES and SYS_DATAFILES
+			so that tablespace names and other metadata can be
+			found. */
+			srv_sys_tablespaces_open = true;
+			err = dict_create_or_check_sys_tablespace();
+			if (err != DB_SUCCESS) {
+				return(srv_init_abort(err));
+			}
+
 			/* The following call is necessary for the insert
 			buffer to work with multiple tablespaces. We must
 			know the mapping between space id's and .ibd file
@@ -2511,92 +2319,77 @@ files_checked:
 
 			In a crash recovery, we check that the info in data
 			dictionary is consistent with what we already know
-			about space id's from the call of
-			fil_load_single_table_tablespaces().
+			about space id's from the calls to fil_ibd_load().
 
 			In a normal startup, we create the space objects for
 			every table in the InnoDB data dictionary that has
 			an .ibd file.
 
-			We also determine the maximum tablespace id used. */
-			dict_check_t	dict_check;
+			We also determine the maximum tablespace id used.
 
-			if (recv_needed_recovery) {
-				dict_check = DICT_CHECK_ALL_LOADED;
-			} else if (n_recovered_trx) {
-				dict_check = DICT_CHECK_SOME_LOADED;
-			} else {
-				dict_check = DICT_CHECK_NONE_LOADED;
-			}
+			The 'validate' flag indicates that when a tablespace
+			is opened, we also read the header page and validate
+			the contents to the data dictionary. This is time
+			consuming, especially for databases with lots of ibd
+			files.  So only do it after a crash and not forcing
+			recovery.  Open rw transactions at this point is not
+			a good reason to validate. */
+			bool validate = recv_needed_recovery
+				&& srv_force_recovery == 0;
 
-			/* Create the SYS_TABLESPACES and SYS_DATAFILES system table */
-			err = dict_create_or_check_sys_tablespace();
-			if (err != DB_SUCCESS) {
-				return(err);
-			}
+			dict_check_tablespaces_and_store_max_id(validate);
+		}
 
-			sys_datafiles_created = true;
+#ifdef MYSQL_ENCRYPTION
+		/* Rotate the encryption key for recovery. It's because
+		server could crash in middle of key rotation. Some tablespace
+		didn't complete key rotation. Here, we will resume the
+		rotation. */
+		if (!srv_read_only_mode
+		    && srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
+			fil_encryption_rotate();
+		}
+#endif /* MYSQL_ENCRYPTION */
 
-			/* This function assumes that SYS_DATAFILES exists */
-			dict_check_tablespaces_and_store_max_id(dict_check);
+		/* Fix-up truncate of table if server crashed while truncate
+		was active. */
+		err = truncate_t::fixup_tables_in_non_system_tablespace();
+
+		if (err != DB_SUCCESS) {
+			return(srv_init_abort(err));
 		}
 
 		if (!srv_force_recovery
 		    && !recv_sys->found_corrupt_log
 		    && (srv_log_file_size_requested != srv_log_file_size
 			|| srv_n_log_files_found != srv_n_log_files)) {
+			dberr_t err = DB_SUCCESS;
+
 			/* Prepare to replace the redo log files. */
 
 			if (srv_read_only_mode) {
-				ib_logf(IB_LOG_LEVEL_ERROR,
-					"Cannot resize log files "
-					"in read-only mode.");
-				return(DB_READ_ONLY);
+				ib::error() << "Cannot resize log files"
+					" in read-only mode.";
+				return(srv_init_abort(DB_READ_ONLY));
 			}
 
-			/* Clean the buffer pool. */
-			bool success = buf_flush_list(
-				ULINT_MAX, LSN_MAX, NULL);
-			ut_a(success);
-
-			RECOVERY_CRASH(1);
-
-			min_flushed_lsn = max_flushed_lsn = log_get_lsn();
-
-			ib_logf(IB_LOG_LEVEL_WARN,
-				"Resizing redo log from %u*%u to %u*%u pages"
-				", LSN=" LSN_PF,
-				(unsigned) i,
-				(unsigned) srv_log_file_size,
-				(unsigned) srv_n_log_files,
-				(unsigned) srv_log_file_size_requested,
-				max_flushed_lsn);
-
-			buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
-
-			RECOVERY_CRASH(2);
-
-			/* Flush the old log files. */
-			log_buffer_flush_to_disk();
-			/* If innodb_flush_method=O_DSYNC,
-			we need to explicitly flush the log buffers. */
-			fil_flush(SRV_LOG_SPACE_FIRST_ID);
-
-			ut_ad(max_flushed_lsn == log_get_lsn());
+			/* Prepare to delete the old redo log files */
+			flushed_lsn = srv_prepare_to_delete_redo_log_files(i);
 
 			/* Prohibit redo log writes from any other
 			threads until creating a log checkpoint at the
 			end of create_log_files(). */
-			ut_d(recv_no_log_write = TRUE);
+			ut_d(recv_no_log_write = true);
 			ut_ad(!buf_pool_check_no_pending_io());
 
 			RECOVERY_CRASH(3);
 
 			/* Stamp the LSN to the data files. */
-			fil_write_flushed_lsn_to_data_files(
-				max_flushed_lsn, 0);
+			err = fil_write_flushed_lsn(flushed_lsn);
 
-			fil_flush_file_spaces(FIL_TABLESPACE);
+			if (err != DB_SUCCESS) {
+				return(srv_init_abort(err));
+			}
 
 			RECOVERY_CRASH(4);
 
@@ -2609,24 +2402,24 @@ files_checked:
 			/* Free the old log file space. */
 			log_group_close_all();
 
-			ib_logf(IB_LOG_LEVEL_WARN,
-				"Starting to delete and rewrite log files.");
+			ib::warn() << "Starting to delete and rewrite log"
+				" files.";
 
 			srv_log_file_size = srv_log_file_size_requested;
 
-			err = create_log_files(create_new_db, logfilename,
-					       dirnamelen, max_flushed_lsn,
-					       logfile0);
+			err = create_log_files(
+				logfilename, dirnamelen, flushed_lsn,
+				logfile0);
 
 			if (err != DB_SUCCESS) {
-				return(err);
+				return(srv_init_abort(err));
 			}
 
-			create_log_files_rename(logfilename, dirnamelen,
-						max_flushed_lsn, logfile0);
+			create_log_files_rename(
+				logfilename, dirnamelen, flushed_lsn,
+				logfile0);
 		}
 
-		srv_startup_is_before_trx_rollback_phase = FALSE;
 		recv_recovery_rollback_active();
 
 		/* It is possible that file_format tag has never
@@ -2652,34 +2445,16 @@ files_checked:
 		log_buffer_flush_to_disk();
 	}
 
-#ifdef UNIV_LOG_ARCHIVE
-	/* Archiving is always off under MySQL */
-	if (!srv_log_archive_on) {
-		ut_a(DB_SUCCESS == log_archive_noarchivelog());
-	} else {
-		mutex_enter(&(log_sys->mutex));
+	/* Open temp-tablespace and keep it open until shutdown. */
 
-		start_archive = FALSE;
+	err = srv_open_tmp_tablespace(create_new_db, &srv_tmp_space);
 
-		if (log_sys->archiving_state == LOG_ARCH_OFF) {
-			start_archive = TRUE;
-		}
-
-		mutex_exit(&(log_sys->mutex));
-
-		if (start_archive) {
-			ut_a(DB_SUCCESS == log_archive_archivelog());
-		}
+	if (err != DB_SUCCESS) {
+		return(srv_init_abort(err));
 	}
-#endif /* UNIV_LOG_ARCHIVE */
-
-	/* fprintf(stderr, "Max allowed record size %lu\n",
-	page_get_free_space_of_empty() / 2); */
-
-	if (buf_dblwr == NULL) {
-		/* Create the doublewrite buffer to a new tablespace */
-
-		buf_dblwr_create();
+	/* Create the doublewrite buffer to a new tablespace */
+	if (buf_dblwr == NULL && !buf_dblwr_create()) {
+		return(srv_init_abort(DB_ERROR));
 	}
 
 	/* Here the double write buffer has already been created and so
@@ -2703,14 +2478,22 @@ files_checked:
 	be set using the dynamic global variable srv_undo_logs. */
 
 	srv_available_undo_logs = trx_sys_create_rsegs(
-		srv_undo_tablespaces, srv_undo_logs);
+		srv_undo_tablespaces, srv_undo_logs, srv_tmp_undo_logs);
 
 	if (srv_available_undo_logs == ULINT_UNDEFINED) {
 		/* Can only happen if server is read only. */
 		ut_a(srv_read_only_mode);
 		srv_undo_logs = ULONG_UNDEFINED;
+	} else if (srv_available_undo_logs < srv_undo_logs
+		   && !srv_force_recovery && !recv_needed_recovery) {
+		ib::error() << "System or UNDO tablespace is running of out"
+			    << " of space";
+		/* Should due to out of file space. */
+		return(srv_init_abort(DB_ERROR));
 	}
 
+	srv_startup_is_before_trx_rollback_phase = false;
+
 	if (!srv_read_only_mode) {
 		/* Create the thread which watches the timeouts
 		for lock waits */
@@ -2730,24 +2513,29 @@ files_checked:
 			srv_monitor_thread,
 			NULL, thread_ids + 4 + SRV_MAX_N_IO_THREADS);
 		thread_started[4 + SRV_MAX_N_IO_THREADS] = true;
+		srv_start_state_set(SRV_START_STATE_MONITOR);
 	}
 
 	/* Create the SYS_FOREIGN and SYS_FOREIGN_COLS system tables */
 	err = dict_create_or_check_foreign_constraint_tables();
 	if (err != DB_SUCCESS) {
-		return(err);
+		return(srv_init_abort(err));
 	}
 
-	/* Create the SYS_TABLESPACES and SYS_DATAFILES system tables if we
-	have not done that already on crash recovery. */
-	if (sys_datafiles_created == false) {
-		err = dict_create_or_check_sys_tablespace();
-		if (err != DB_SUCCESS) {
-			return(err);
-		}
+	/* Create the SYS_TABLESPACES system table */
+	err = dict_create_or_check_sys_tablespace();
+	if (err != DB_SUCCESS) {
+		return(srv_init_abort(err));
+	}
+	srv_sys_tablespaces_open = true;
+
+	/* Create the SYS_VIRTUAL system table */
+	err = dict_create_or_check_sys_virtual();
+	if (err != DB_SUCCESS) {
+		return(srv_init_abort(err));
 	}
 
-	srv_is_being_started = FALSE;
+	srv_is_being_started = false;
 
 	ut_a(trx_purge_state() == PURGE_STATE_INIT);
 
@@ -2760,6 +2548,7 @@ files_checked:
 			srv_master_thread,
 			NULL, thread_ids + (1 + SRV_MAX_N_IO_THREADS));
 		thread_started[1 + SRV_MAX_N_IO_THREADS] = true;
+		srv_start_state_set(SRV_START_STATE_MASTER);
 	}
 
 	if (!srv_read_only_mode
@@ -2784,12 +2573,15 @@ files_checked:
 
 		srv_start_wait_for_purge_to_start();
 
+		srv_start_state_set(SRV_START_STATE_PURGE);
 	} else {
 		purge_sys->state = PURGE_STATE_DISABLED;
 	}
 
-	if (!srv_read_only_mode) {
+	/* wake main loop of page cleaner up */
+	os_event_set(buf_flush_event);
 
+	if (!srv_read_only_mode) {
 		if (srv_use_mtflush) {
 			/* Start multi-threaded flush threads */
 			mtflush_ctx = buf_mtflu_handler_init(
@@ -2802,143 +2594,90 @@ files_checked:
 				mtflush_ctx,
 				(thread_ids + 6 + 32));
 		}
-
-		buf_flush_page_cleaner_thread_handle = os_thread_create(buf_flush_page_cleaner_thread, NULL, NULL);
-		buf_flush_page_cleaner_thread_started = true;
 	}
 
-#ifdef UNIV_DEBUG
-	/* buf_debug_prints = TRUE; */
-#endif /* UNIV_DEBUG */
-	sum_of_data_file_sizes = 0;
-
-	for (i = 0; i < srv_n_data_files; i++) {
-		sum_of_data_file_sizes += srv_data_file_sizes[i];
-	}
+	sum_of_data_file_sizes = srv_sys_space.get_sum_of_sizes();
+	ut_a(sum_of_new_sizes != ULINT_UNDEFINED);
 
 	tablespace_size_in_header = fsp_header_get_tablespace_size();
 
 	if (!srv_read_only_mode
-	    && !srv_auto_extend_last_data_file
+	    && !srv_sys_space.can_auto_extend_last_file()
 	    && sum_of_data_file_sizes != tablespace_size_in_header) {
 
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: Error: tablespace size"
-			" stored in header is %lu pages, but\n",
-			(ulong) tablespace_size_in_header);
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			"InnoDB: the sum of data file sizes is %lu pages\n",
-			(ulong) sum_of_data_file_sizes);
+		ib::error() << "Tablespace size stored in header is "
+			<< tablespace_size_in_header << " pages, but the sum"
+			" of data file sizes is " << sum_of_data_file_sizes
+			<< " pages";
 
 		if (srv_force_recovery == 0
 		    && sum_of_data_file_sizes < tablespace_size_in_header) {
 			/* This is a fatal error, the tail of a tablespace is
 			missing */
 
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				" InnoDB: Cannot start InnoDB."
-				" The tail of the system tablespace is\n");
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				" InnoDB: missing. Have you edited"
-				" innodb_data_file_path in my.cnf in an\n");
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				" InnoDB: inappropriate way, removing"
-				" ibdata files from there?\n");
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				" InnoDB: You can set innodb_force_recovery=1"
-				" in my.cnf to force\n");
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				" InnoDB: a startup if you are trying"
-				" to recover a badly corrupt database.\n");
+			ib::error()
+				<< "Cannot start InnoDB."
+				" The tail of the system tablespace is"
+				" missing. Have you edited"
+				" innodb_data_file_path in my.cnf in an"
+				" inappropriate way, removing"
+				" ibdata files from there?"
+				" You can set innodb_force_recovery=1"
+				" in my.cnf to force"
+				" a startup if you are trying"
+				" to recover a badly corrupt database.";
 
-			return(DB_ERROR);
+			return(srv_init_abort(DB_ERROR));
 		}
 	}
 
 	if (!srv_read_only_mode
-	    && srv_auto_extend_last_data_file
+	    && srv_sys_space.can_auto_extend_last_file()
 	    && sum_of_data_file_sizes < tablespace_size_in_header) {
 
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: Error: tablespace size stored in header"
-			" is %lu pages, but\n",
-			(ulong) tablespace_size_in_header);
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: the sum of data file sizes"
-			" is only %lu pages\n",
-			(ulong) sum_of_data_file_sizes);
+		ib::error() << "Tablespace size stored in header is "
+			<< tablespace_size_in_header << " pages, but the sum"
+			" of data file sizes is only "
+			<< sum_of_data_file_sizes << " pages";
 
 		if (srv_force_recovery == 0) {
 
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				" InnoDB: Cannot start InnoDB. The tail of"
-				" the system tablespace is\n");
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				" InnoDB: missing. Have you edited"
-				" innodb_data_file_path in my.cnf in an\n");
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
+			ib::error()
+				<< "Cannot start InnoDB. The tail of"
+				" the system tablespace is"
+				" missing. Have you edited"
+				" innodb_data_file_path in my.cnf in an"
 				" InnoDB: inappropriate way, removing"
-				" ibdata files from there?\n");
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				" InnoDB: You can set innodb_force_recovery=1"
-				" in my.cnf to force\n");
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
+				" ibdata files from there?"
+				" You can set innodb_force_recovery=1"
+				" in my.cnf to force"
 				" InnoDB: a startup if you are trying to"
-				" recover a badly corrupt database.\n");
+				" recover a badly corrupt database.";
 
-			return(DB_ERROR);
+			return(srv_init_abort(DB_ERROR));
 		}
 	}
 
-	/* Check that os_fast_mutexes work as expected */
-	os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &srv_os_test_mutex);
-
-	if (0 != os_fast_mutex_trylock(&srv_os_test_mutex)) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: Error: pthread_mutex_trylock returns"
-			" an unexpected value on\n");
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: success! Cannot continue.\n");
-		exit(1);
-	}
-
-	os_fast_mutex_unlock(&srv_os_test_mutex);
-
-	os_fast_mutex_lock(&srv_os_test_mutex);
-
-	os_fast_mutex_unlock(&srv_os_test_mutex);
-
-	os_fast_mutex_free(&srv_os_test_mutex);
-
 	if (srv_print_verbose_log) {
-		ib_logf(IB_LOG_LEVEL_INFO,
-			"%s started; log sequence number " LSN_PF "",
-			INNODB_VERSION_STR, srv_start_lsn);
+		ib::info() << INNODB_VERSION_STR
+			<< " started; log sequence number "
+			<< srv_start_lsn;
 	}
 
 	if (srv_force_recovery > 0) {
-		ib_logf(IB_LOG_LEVEL_INFO,
-			"!!! innodb_force_recovery is set to %lu !!!",
-			(ulong) srv_force_recovery);
+		ib::info() << "!!! innodb_force_recovery is set to "
+			<< srv_force_recovery << " !!!";
 	}
 
 	if (!srv_read_only_mode) {
+		/* Create thread(s) that handles key rotation. This is
+		needed already here as log_preflush_pool_modified_pages
+		will flush dirty pages and that might need e.g.
+		fil_crypt_threads_event. */
+		fil_system_enter();
+		fil_crypt_threads_init();
+		fil_system_exit();
+
 		/*
 		  Create a checkpoint before logging anything new, so that
 		  the current encryption key in use is definitely logged
@@ -2957,6 +2696,10 @@ files_checked:
 	}
 
 	if (!srv_read_only_mode) {
+		if (create_new_db) {
+			srv_buffer_pool_load_at_startup = FALSE;
+		}
+
 #ifdef WITH_WSREP
 		/*
 		  Create the dump/load thread only when not running with
@@ -2970,9 +2713,9 @@ files_checked:
 		buf_dump_thread_started = true;
 #ifdef WITH_WSREP
 		} else {
-			ib_logf(IB_LOG_LEVEL_WARN,
+			ib::warn() <<
 				"Skipping buffer pool dump/restore during "
-				"wsrep recovery.");
+				"wsrep recovery.";
 		}
 #endif /* WITH_WSREP */
 
@@ -2983,16 +2726,17 @@ files_checked:
 		/* Create the thread that will optimize the FTS sub-system. */
 		fts_optimize_init();
 
-		/* Create thread(s) that handles key rotation */
-		fil_system_enter();
-		fil_crypt_threads_init();
-		fil_system_exit();
+		srv_start_state_set(SRV_START_STATE_STAT);
 
 		/* Create the log scrub thread */
-		if (srv_scrub_log)
+		if (srv_scrub_log) {
 			os_thread_create(log_scrub_thread, NULL, NULL);
+		}
 	}
 
+	/* Create the buffer pool resize thread */
+	os_thread_create(buf_resize_thread, NULL, NULL);
+
 	/* Init data for datafile scrub threads */
 	btr_scrub_init();
 
@@ -3000,7 +2744,6 @@ files_checked:
 	btr_defragment_init();
 
 	srv_was_started = TRUE;
-
 	return(DB_SUCCESS);
 }
 
@@ -3036,29 +2779,23 @@ srv_fts_close(void)
 
 /****************************************************************//**
 Shuts down the InnoDB database.
-@return	DB_SUCCESS or error code */
-UNIV_INTERN
+@return DB_SUCCESS or error code */
 dberr_t
 innobase_shutdown_for_mysql(void)
 /*=============================*/
 {
-	ulint	i;
-
 	if (!srv_was_started) {
 		if (srv_is_being_started) {
-			ib_logf(IB_LOG_LEVEL_WARN,
-				"Shutting down an improperly started, "
-				"or created database!");
+			ib::warn() << "Shutting down an improperly started,"
+				" or created database!";
 		}
 
 		return(DB_SUCCESS);
 	}
 
 	if (!srv_read_only_mode) {
-		/* Shutdown the FTS optimize sub system. */
-		fts_optimize_start_shutdown();
-
-		fts_optimize_end();
+		fts_optimize_shutdown();
+		dict_stats_shutdown();
 
 		/* Shutdown key rotation threads */
 		fil_crypt_threads_end();
@@ -3072,81 +2809,17 @@ innobase_shutdown_for_mysql(void)
 	logs_empty_and_mark_files_at_shutdown();
 
 	if (srv_conc_get_active_threads() != 0) {
-		ib_logf(IB_LOG_LEVEL_WARN,
-			"Query counter shows %ld queries still "
-			"inside InnoDB at shutdown",
-			srv_conc_get_active_threads());
+		ib::warn() << "Query counter shows "
+			<< srv_conc_get_active_threads() << " queries still"
+			" inside InnoDB at shutdown";
 	}
 
 	/* 2. Make all threads created by InnoDB to exit */
+	srv_shutdown_all_bg_threads();
 
-	srv_shutdown_state = SRV_SHUTDOWN_EXIT_THREADS;
-
-	/* All threads end up waiting for certain events. Put those events
-	to the signaled state. Then the threads will exit themselves after
-	os_event_wait(). */
-
-	for (i = 0; i < 1000; i++) {
-		/* NOTE: IF YOU CREATE THREADS IN INNODB, YOU MUST EXIT THEM
-		HERE OR EARLIER */
-
-		if (!srv_read_only_mode) {
-			/* a. Let the lock timeout thread exit */
-			os_event_set(lock_sys->timeout_event);
-
-			/* b. srv error monitor thread exits automatically,
-			no need to do anything here */
-
-			/* c. We wake the master thread so that it exits */
-			srv_wake_master_thread();
-
-			/* d. Wakeup purge threads. */
-			srv_purge_wakeup();
-		}
-
-		/* e. Exit the i/o threads */
-
-		os_aio_wake_all_threads_at_shutdown();
-
-		/* f. dict_stats_thread is signaled from
-		logs_empty_and_mark_files_at_shutdown() and should have
-		already quit or is quitting right now. */
-
-
-		if (srv_use_mtflush) {
-			/* g. Exit the multi threaded flush threads */
-
-			buf_mtflu_io_thread_exit();
-		}
-
-		os_mutex_enter(os_sync_mutex);
-
-		if (os_thread_count == 0) {
-			/* All the threads have exited or are just exiting;
-			NOTE that the threads may not have completed their
-			exit yet. Should we use pthread_join() to make sure
-			they have exited? If we did, we would have to
-			remove the pthread_detach() from
-			os_thread_exit().  Now we just sleep 0.1
-			seconds and hope that is enough! */
-
-			os_mutex_exit(os_sync_mutex);
-
-			os_thread_sleep(100000);
-
-			break;
-		}
-
-		os_mutex_exit(os_sync_mutex);
-
-		os_thread_sleep(100000);
-	}
-
-	if (i == 1000) {
-		ib_logf(IB_LOG_LEVEL_WARN,
-			"%lu threads created by InnoDB"
-			" had not exited at shutdown!",
-			(ulong) os_thread_count);
+	if (srv_use_mtflush) {
+		/* g. Exit the multi threaded flush threads */
+		buf_mtflu_io_thread_exit();
 	}
 
 	if (srv_monitor_file) {
@@ -3154,7 +2827,7 @@ innobase_shutdown_for_mysql(void)
 		srv_monitor_file = 0;
 		if (srv_monitor_file_name) {
 			unlink(srv_monitor_file_name);
-			mem_free(srv_monitor_file_name);
+			ut_free(srv_monitor_file_name);
 		}
 	}
 
@@ -3172,7 +2845,7 @@ innobase_shutdown_for_mysql(void)
 		dict_stats_thread_deinit();
 		if (srv_scrub_log) {
 			/* TODO(minliz): have/use log_scrub_thread_deinit() instead? */
-			os_event_free(log_scrub_event);
+			os_event_destroy(log_scrub_event);
 			log_scrub_event = NULL;
 		}
 	}
@@ -3184,37 +2857,9 @@ innobase_shutdown_for_mysql(void)
 	/* Cleanup data for datafile scrubbing */
 	btr_scrub_cleanup();
 
-#ifdef __WIN__
-	/* MDEV-361: ha_innodb.dll leaks handles on Windows
-	MDEV-7403: should not pass recv_writer_thread_handle to
-	CloseHandle().
-
-	On Windows we should call CloseHandle() for all
-	open thread handles. */
-	if (os_thread_count == 0) {
-		for (i = 0; i < SRV_MAX_N_IO_THREADS + 6 + 32; ++i) {
-			if (thread_started[i]) {
-				CloseHandle(thread_handles[i]);
-			}
-		}
-
-		if (buf_flush_page_cleaner_thread_started) {
-			CloseHandle(buf_flush_page_cleaner_thread_handle);
-		}
-
-		if (buf_dump_thread_started) {
-			CloseHandle(buf_dump_thread_handle);
-		}
-
-		if (dict_stats_thread_started) {
-			CloseHandle(dict_stats_thread_handle);
-		}
-	}
-#endif /* __WIN __ */
-
 	/* This must be disabled before closing the buffer pool
 	and closing the data dictionary.  */
-	btr_search_disable();
+	btr_search_disable(true);
 
 	ibuf_close();
 	log_shutdown();
@@ -3222,6 +2867,8 @@ innobase_shutdown_for_mysql(void)
 	trx_sys_close();
 	lock_sys_close();
 
+	trx_pool_close();
+
 	/* We don't create these mutexes in RO mode because we don't create
 	the temp files that the cover. */
 	if (!srv_read_only_mode) {
@@ -3238,47 +2885,28 @@ innobase_shutdown_for_mysql(void)
 	os_aio_free();
 	que_close();
 	row_mysql_close();
-	srv_mon_free();
-	sync_close();
 	srv_free();
 	fil_close();
 
-	/* 4. Free the os_conc_mutex and all os_events and os_mutexes */
-
-	os_sync_free();
-
-	/* 5. Free all allocated memory */
+	/* 4. Free all allocated memory */
 
 	pars_lexer_close();
 	log_mem_free();
 	buf_pool_free(srv_buf_pool_instances);
-	mem_close();
 
-	/* ut_free_all_mem() frees all allocated memory not freed yet
-	in shutdown, and it will also free the ut_list_mutex, so it
-	should be the last one for all operation */
-	ut_free_all_mem();
+	/* 6. Free the thread management resoruces. */
+	os_thread_free();
 
-	if (os_thread_count != 0
-	    || os_event_count != 0
-	    || os_mutex_count != 0
-	    || os_fast_mutex_count != 0) {
-		ib_logf(IB_LOG_LEVEL_WARN,
-			"Some resources were not cleaned up in shutdown: "
-			"threads %lu, events %lu, os_mutexes %lu, "
-			"os_fast_mutexes %lu",
-			(ulong) os_thread_count, (ulong) os_event_count,
-			(ulong) os_mutex_count, (ulong) os_fast_mutex_count);
-	}
+	/* 7. Free the synchronisation infrastructure. */
+	sync_check_close();
 
 	if (dict_foreign_err_file) {
 		fclose(dict_foreign_err_file);
 	}
 
 	if (srv_print_verbose_log) {
-		ib_logf(IB_LOG_LEVEL_INFO,
-			"Shutdown completed; log sequence number " LSN_PF "",
-			srv_shutdown_lsn);
+		ib::info() << "Shutdown completed; log sequence number "
+			<< srv_shutdown_lsn;
 	}
 
 	srv_was_started = FALSE;
@@ -3292,7 +2920,6 @@ innobase_shutdown_for_mysql(void)
 /********************************************************************
 Signal all per-table background threads to shutdown, and wait for them to do
 so. */
-UNIV_INTERN
 void
 srv_shutdown_table_bg_threads(void)
 /*===============================*/
@@ -3366,47 +2993,70 @@ srv_shutdown_table_bg_threads(void)
 	}
 }
 
-/*****************************************************************//**
-Get the meta-data filename from the table name. */
-UNIV_INTERN
+/** Get the meta-data filename from the table name for a
+single-table tablespace.
+@param[in]	table		table object
+@param[out]	filename	filename
+@param[in]	max_len		filename max length */
 void
 srv_get_meta_data_filename(
-/*=======================*/
-	dict_table_t*	table,		/*!< in: table */
-	char*			filename,	/*!< out: filename */
-	ulint			max_len)	/*!< in: filename max length */
+	dict_table_t*	table,
+	char*		filename,
+	ulint		max_len)
 {
-	ulint			len;
-	char*			path;
-	char*			suffix;
-	static const ulint	suffix_len = strlen(".cfg");
+	ulint		len;
+	char*		path;
+
+	/* Make sure the data_dir_path is set. */
+	dict_get_and_save_data_dir_path(table, false);
 
 	if (DICT_TF_HAS_DATA_DIR(table->flags)) {
-		dict_get_and_save_data_dir_path(table, false);
 		ut_a(table->data_dir_path);
 
-		path = os_file_make_remote_pathname(
-			table->data_dir_path, table->name, "cfg");
+		path = fil_make_filepath(
+			table->data_dir_path, table->name.m_name, CFG, true);
 	} else {
-		path = fil_make_ibd_name(table->name, false);
+		path = fil_make_filepath(NULL, table->name.m_name, CFG, false);
 	}
 
 	ut_a(path);
 	len = ut_strlen(path);
 	ut_a(max_len >= len);
 
-	suffix = path + (len - suffix_len);
-	if (strncmp(suffix, ".cfg", suffix_len) == 0) {
-		strcpy(filename, path);
-	} else {
-		ut_ad(strncmp(suffix, ".ibd", suffix_len) == 0);
+	strcpy(filename, path);
 
-		strncpy(filename, path, len - suffix_len);
-		suffix = filename + (len - suffix_len);
-		strcpy(suffix, ".cfg");
+	ut_free(path);
+}
+/** Get the encryption-data filename from the table name for a
+single-table tablespace.
+@param[in]	table		table object
+@param[out]	filename	filename
+@param[in]	max_len		filename max length */
+void
+srv_get_encryption_data_filename(
+	dict_table_t*	table,
+	char*		filename,
+	ulint		max_len)
+{
+	ulint		len;
+	char*		path;
+	/* Make sure the data_dir_path is set. */
+	dict_get_and_save_data_dir_path(table, false);
+
+	if (DICT_TF_HAS_DATA_DIR(table->flags)) {
+		ut_a(table->data_dir_path);
+
+		path = fil_make_filepath(
+			table->data_dir_path, table->name.m_name, CFP, true);
+	} else {
+		path = fil_make_filepath(NULL, table->name.m_name, CFP, false);
 	}
 
-	mem_free(path);
+	ut_a(path);
+	len = ut_strlen(path);
+	ut_a(max_len >= len);
 
-	srv_normalize_path_for_win(filename);
+	strcpy(filename, path);
+
+	ut_free(path);
 }
diff --git a/storage/innobase/sync/sync0arr.cc b/storage/innobase/sync/sync0arr.cc
index d464515a228..97c83fd5757 100644
--- a/storage/innobase/sync/sync0arr.cc
+++ b/storage/innobase/sync/sync0arr.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2008, Google Inc.
 Copyright (c) 2013, 2015, MariaDB Corporation. All Rights Reserved.
 
@@ -31,6 +31,7 @@ The wait array used in synchronization primitives
 Created 9/5/1995 Heikki Tuuri
 *******************************************************/
 
+#include "ha_prototypes.h"
 #include "univ.i"
 
 #include "sync0arr.h"
@@ -52,70 +53,78 @@ Created 9/5/1995 Heikki Tuuri
 #include <innodb_priv.h>
 
 #include "sync0sync.h"
-#include "sync0rw.h"
-#include "os0sync.h"
-#include "os0file.h"
 #include "lock0lock.h"
+#include "sync0rw.h"
+#include "sync0debug.h"
+#include "os0event.h"
+#include "os0file.h"
 #include "srv0srv.h"
-#include "ha_prototypes.h"
 
 /*
 			WAIT ARRAY
 			==========
 
-The wait array consists of cells each of which has an
-an operating system event object created for it. The threads
-waiting for a mutex, for example, can reserve a cell
-in the array and suspend themselves to wait for the event
-to become signaled. When using the wait array, remember to make
-sure that some thread holding the synchronization object
-will eventually know that there is a waiter in the array and
-signal the object, to prevent infinite wait.
-Why we chose to implement a wait array? First, to make
-mutexes fast, we had to code our own implementation of them,
-which only in usually uncommon cases resorts to using
-slow operating system primitives. Then we had the choice of
-assigning a unique OS event for each mutex, which would
-be simpler, or using a global wait array. In some operating systems,
-the global wait array solution is more efficient and flexible,
-because we can do with a very small number of OS events,
-say 200. In NT 3.51, allocating events seems to be a quadratic
-algorithm, because 10 000 events are created fast, but
-100 000 events takes a couple of minutes to create.
+The wait array consists of cells each of which has an an event object created
+for it. The threads waiting for a mutex, for example, can reserve a cell
+in the array and suspend themselves to wait for the event to become signaled.
+When using the wait array, remember to make sure that some thread holding
+the synchronization object will eventually know that there is a waiter in
+the array and signal the object, to prevent infinite wait.  Why we chose
+to implement a wait array? First, to make mutexes fast, we had to code
+our own implementation of them, which only in usually uncommon cases
+resorts to using slow operating system primitives. Then we had the choice of
+assigning a unique OS event for each mutex, which would be simpler, or
+using a global wait array. In some operating systems, the global wait
+array solution is more efficient and flexible, because we can do with
+a very small number of OS events, say 200. In NT 3.51, allocating events
+seems to be a quadratic algorithm, because 10 000 events are created fast,
+but 100 000 events takes a couple of minutes to create.
 
-As of 5.0.30 the above mentioned design is changed. Since now
-OS can handle millions of wait events efficiently, we no longer
-have this concept of each cell of wait array having one event.
-Instead, now the event that a thread wants to wait on is embedded
-in the wait object (mutex or rw_lock). We still keep the global
-wait array for the sake of diagnostics and also to avoid infinite
-wait The error_monitor thread scans the global wait array to signal
+As of 5.0.30 the above mentioned design is changed. Since now OS can handle
+millions of wait events efficiently, we no longer have this concept of each
+cell of wait array having one event.  Instead, now the event that a thread
+wants to wait on is embedded in the wait object (mutex or rw_lock). We still
+keep the global wait array for the sake of diagnostics and also to avoid
+infinite wait The error_monitor thread scans the global wait array to signal
 any waiting threads who have missed the signal. */
 
-/** A cell where an individual thread may wait suspended
-until a resource is released. The suspending is implemented
-using an operating system event semaphore. */
+typedef SyncArrayMutex::MutexType WaitMutex;
+typedef BlockSyncArrayMutex::MutexType BlockWaitMutex;
+
+/** The latch types that use the sync array. */
+union sync_object_t {
+
+	/** RW lock instance */
+	rw_lock_t*	lock;
+
+	/** Mutex instance */
+	WaitMutex*	mutex;
+
+	/** Block mutex instance */
+	BlockWaitMutex*	bpmutex;
+};
+
+/** A cell where an individual thread may wait suspended until a resource
+is released. The suspending is implemented using an operating system
+event semaphore. */
+
 struct sync_cell_t {
-	void*		wait_object;	/*!< pointer to the object the
+	sync_object_t	latch;		/*!< pointer to the object the
 					thread is waiting for; if NULL
 					the cell is free for use */
-	ib_mutex_t*	old_wait_mutex;	/*!< the latest wait mutex in cell */
-	rw_lock_t*	old_wait_rw_lock;
-					/*!< the latest wait rw-lock
-					in cell */
 	ulint		request_type;	/*!< lock type requested on the
 					object */
 	const char*	file;		/*!< in debug version file where
 					requested */
 	ulint		line;		/*!< in debug version line where
 					requested */
-	os_thread_id_t	thread;		/*!< thread id of this waiting
+	os_thread_id_t	thread_id;	/*!< thread id of this waiting
 					thread */
-	ibool		waiting;	/*!< TRUE if the thread has already
+	bool		waiting;	/*!< TRUE if the thread has already
 					called sync_array_event_wait
 					on this cell */
-	ib_int64_t	signal_count;	/*!< We capture the signal_count
-					of the wait_object when we
+	int64_t		signal_count;	/*!< We capture the signal_count
+					of the latch when we
 					reset the event. This value is
 					then passed on to os_event_wait
 					and we wait only if the event
@@ -126,51 +135,66 @@ struct sync_cell_t {
 					the wait cell */
 };
 
-/* NOTE: It is allowed for a thread to wait
-for an event allocated for the array without owning the
-protecting mutex (depending on the case: OS or database mutex), but
-all changes (set or reset) to the state of the event must be made
-while owning the mutex. */
+/* NOTE: It is allowed for a thread to wait for an event allocated for
+the array without owning the protecting mutex (depending on the case:
+OS or database mutex), but all changes (set or reset) to the state of
+the event must be made while owning the mutex. */
+
 /** Synchronization array */
 struct sync_array_t {
+
+	/** Constructor
+	Creates a synchronization wait array. It is protected by a mutex
+	which is automatically reserved when the functions operating on it
+	are called.
+	@param[in]	num_cells	Number of cells to create */
+	sync_array_t(ulint num_cells)
+		UNIV_NOTHROW;
+
+	/** Destructor */
+	~sync_array_t()
+		UNIV_NOTHROW;
+
 	ulint		n_reserved;	/*!< number of currently reserved
 					cells in the wait array */
 	ulint		n_cells;	/*!< number of cells in the
 					wait array */
 	sync_cell_t*	array;		/*!< pointer to wait array */
-	ib_mutex_t	mutex;		/*!< possible database mutex
-					protecting this data structure */
-	os_ib_mutex_t	os_mutex;	/*!< Possible operating system mutex
-					protecting the data structure.
-					As this data structure is used in
-					constructing the database mutex,
-					to prevent infinite recursion
-					in implementation, we fall back to
-					an OS mutex. */
+	SysMutex	mutex;		/*!< System mutex protecting the
+					data structure.  As this data
+					structure is used in constructing
+					the database mutex, to prevent
+					infinite recursion in implementation,
+					we fall back to an OS mutex. */
 	ulint		res_count;	/*!< count of cell reservations
 					since creation of the array */
+	ulint           next_free_slot; /*!< the next free cell in the array */
+	ulint           first_free_slot;/*!< the last slot that was freed */
 };
 
 /** User configured sync array size */
-UNIV_INTERN ulong	srv_sync_array_size = 32;
+ulong	srv_sync_array_size = 1;
 
 /** Locally stored copy of srv_sync_array_size */
-static	ulint		sync_array_size;
+ulint	sync_array_size;
 
 /** The global array of wait cells for implementation of the database's own
 mutexes and read-write locks */
-static	sync_array_t**	sync_wait_array;
+sync_array_t**	sync_wait_array;
 
 /** count of how many times an object has been signalled */
-static ulint		sg_count;
+static ulint			sg_count;
 
-#ifdef UNIV_SYNC_DEBUG
+#define sync_array_exit(a)	mutex_exit(&(a)->mutex)
+#define sync_array_enter(a)	mutex_enter(&(a)->mutex)
+
+#ifdef UNIV_DEBUG
 /******************************************************************//**
 This function is called only in the debug version. Detects a deadlock
 of one or more threads because of waits of semaphores.
-@return	TRUE if deadlock detected */
+@return TRUE if deadlock detected */
 static
-ibool
+bool
 sync_array_detect_deadlock(
 /*=======================*/
 	sync_array_t*	arr,	/*!< in: wait array; NOTE! the caller must
@@ -178,105 +202,70 @@ sync_array_detect_deadlock(
 	sync_cell_t*	start,	/*!< in: cell where recursive search started */
 	sync_cell_t*	cell,	/*!< in: cell to search */
 	ulint		depth);	/*!< in: recursion depth */
-#endif /* UNIV_SYNC_DEBUG */
+#endif /* UNIV_DEBUG */
+
+/** Constructor
+Creates a synchronization wait array. It is protected by a mutex
+which is automatically reserved when the functions operating on it
+are called.
+@param[in]	num_cells		Number of cells to create */
+sync_array_t::sync_array_t(ulint num_cells)
+	UNIV_NOTHROW
+	:
+	n_reserved(),
+	n_cells(),
+	array(),
+	mutex(),
+	res_count(),
+	next_free_slot(),
+	first_free_slot()
+{
+	ut_a(num_cells > 0);
+
+	array = UT_NEW_ARRAY_NOKEY(sync_cell_t, num_cells);
+
+	ulint	sz = sizeof(sync_cell_t) * num_cells;
+
+	memset(array, 0x0, sz);
+
+	n_cells = num_cells;
+
+	first_free_slot = ULINT_UNDEFINED;
+
+	/* Then create the mutex to protect the wait array */
+	mutex_create(LATCH_ID_SYNC_ARRAY_MUTEX, &mutex);
+}
+
+/** Destructor */
+sync_array_t::~sync_array_t()
+	UNIV_NOTHROW
+{
+	ut_a(n_reserved == 0);
+
+	sync_array_validate(this);
+
+	/* Release the mutex protecting the wait array */
+
+	mutex_free(&mutex);
+
+	UT_DELETE_ARRAY(array);
+}
 
 /*****************************************************************//**
 Gets the nth cell in array.
-@return	cell */
+@return cell */
+UNIV_INTERN
 sync_cell_t*
 sync_array_get_nth_cell(
 /*====================*/
 	sync_array_t*	arr,	/*!< in: sync array */
 	ulint		n)	/*!< in: index */
 {
-	ut_a(arr);
 	ut_a(n < arr->n_cells);
 
 	return(arr->array + n);
 }
 
-/******************************************************************//**
-Looks for a cell with the given thread id.
-@return	pointer to cell or NULL if not found */
-static
-sync_cell_t*
-sync_array_find_thread(
-/*===================*/
-	sync_array_t*	arr,	/*!< in: wait array */
-	os_thread_id_t	thread)	/*!< in: thread id */
-{
-	ulint		i;
-	sync_cell_t*	cell;
-
-	for (i = 0; i < arr->n_cells; i++) {
-
-		cell = sync_array_get_nth_cell(arr, i);
-
-		if (cell->wait_object != NULL
-		    && os_thread_eq(cell->thread, thread)) {
-
-			return(cell);	/* Found */
-		}
-	}
-
-	return(NULL);	/* Not found */
-}
-
-/******************************************************************//**
-Reserves the mutex semaphore protecting a sync array. */
-static
-void
-sync_array_enter(
-/*=============*/
-	sync_array_t*	arr)	/*!< in: sync wait array */
-{
-	os_mutex_enter(arr->os_mutex);
-}
-
-/******************************************************************//**
-Releases the mutex semaphore protecting a sync array. */
-static
-void
-sync_array_exit(
-/*============*/
-	sync_array_t*	arr)	/*!< in: sync wait array */
-{
-	os_mutex_exit(arr->os_mutex);
-}
-
-/*******************************************************************//**
-Creates a synchronization wait array. It is protected by a mutex
-which is automatically reserved when the functions operating on it
-are called.
-@return	own: created wait array */
-static
-sync_array_t*
-sync_array_create(
-/*==============*/
-	ulint	n_cells)	/*!< in: number of cells in the array
-				to create */
-{
-	ulint		sz;
-	sync_array_t*	arr;
-
-	ut_a(n_cells > 0);
-
-	/* Allocate memory for the data structures */
-	arr = static_cast<sync_array_t*>(ut_malloc(sizeof(*arr)));
-	memset(arr, 0x0, sizeof(*arr));
-
-	sz = sizeof(sync_cell_t) * n_cells;
-	arr->array = static_cast<sync_cell_t*>(ut_malloc(sz));
-	memset(arr->array, 0x0, sz);
-
-	arr->n_cells = n_cells;
-
-	/* Then create the mutex to protect the wait array complex */
-	arr->os_mutex = os_mutex_create();
-
-	return(arr);
-}
-
 /******************************************************************//**
 Frees the resources in a wait array. */
 static
@@ -285,36 +274,28 @@ sync_array_free(
 /*============*/
 	sync_array_t*	arr)	/*!< in, own: sync wait array */
 {
-	ut_a(arr->n_reserved == 0);
-
-	sync_array_validate(arr);
-
-	/* Release the mutex protecting the wait array complex */
-
-	os_mutex_free(arr->os_mutex);
-
-	ut_free(arr->array);
-	ut_free(arr);
+	UT_DELETE(arr);
 }
 
 /********************************************************************//**
 Validates the integrity of the wait array. Checks
 that the number of reserved cells equals the count variable. */
-UNIV_INTERN
 void
 sync_array_validate(
 /*================*/
 	sync_array_t*	arr)	/*!< in: sync wait array */
 {
 	ulint		i;
-	sync_cell_t*	cell;
 	ulint		count		= 0;
 
 	sync_array_enter(arr);
 
 	for (i = 0; i < arr->n_cells; i++) {
+		sync_cell_t*	cell;
+
 		cell = sync_array_get_nth_cell(arr, i);
-		if (cell->wait_object != NULL) {
+
+		if (cell->latch.mutex != NULL) {
 			count++;
 		}
 	}
@@ -332,87 +313,140 @@ sync_cell_get_event(
 /*================*/
 	sync_cell_t*	cell) /*!< in: non-empty sync array cell */
 {
-	ulint type = cell->request_type;
+	ulint	type = cell->request_type;
 
 	if (type == SYNC_MUTEX) {
-		return(((ib_mutex_t*) cell->wait_object)->event);
-	} else if (type == RW_LOCK_WAIT_EX) {
-		return(((rw_lock_t*) cell->wait_object)->wait_ex_event);
-	} else { /* RW_LOCK_SHARED and RW_LOCK_EX wait on the same event */
-		return(((rw_lock_t*) cell->wait_object)->event);
+
+		return(cell->latch.mutex->event());
+
+	} else if (type == SYNC_BUF_BLOCK) {
+
+		return(cell->latch.bpmutex->event());
+
+	} else if (type == RW_LOCK_X_WAIT) {
+
+		return(cell->latch.lock->wait_ex_event);
+
+	} else { /* RW_LOCK_S and RW_LOCK_X wait on the same event */
+
+		return(cell->latch.lock->event);
 	}
 }
 
 /******************************************************************//**
 Reserves a wait array cell for waiting for an object.
 The event of the cell is reset to nonsignalled state.
-@return true if free cell is found, otherwise false */
-UNIV_INTERN
-bool
+@return sync cell to wait on */
+sync_cell_t*
 sync_array_reserve_cell(
 /*====================*/
 	sync_array_t*	arr,	/*!< in: wait array */
 	void*		object, /*!< in: pointer to the object to wait for */
 	ulint		type,	/*!< in: lock request type */
 	const char*	file,	/*!< in: file where requested */
-	ulint		line,	/*!< in: line where requested */
-	ulint*		index)	/*!< out: index of the reserved cell */
+	ulint		line)	/*!< in: line where requested */
 {
 	sync_cell_t*	cell;
-	os_event_t      event;
-	ulint		i;
-
-	ut_a(object);
-	ut_a(index);
 
 	sync_array_enter(arr);
 
-	arr->res_count++;
+	if (arr->first_free_slot != ULINT_UNDEFINED) {
+		/* Try and find a slot in the free list */
+		ut_ad(arr->first_free_slot < arr->next_free_slot);
+		cell = sync_array_get_nth_cell(arr, arr->first_free_slot);
+		arr->first_free_slot = cell->line;
+	} else if (arr->next_free_slot < arr->n_cells) {
+		/* Try and find a slot after the currently allocated slots */
+		cell = sync_array_get_nth_cell(arr, arr->next_free_slot);
+		++arr->next_free_slot;
+	} else {
+		sync_array_exit(arr);
 
-	/* Reserve a new cell. */
-	for (i = 0; i < arr->n_cells; i++) {
-		cell = sync_array_get_nth_cell(arr, i);
-
-		if (cell->wait_object == NULL) {
-
-			cell->waiting = FALSE;
-			cell->wait_object = object;
-
-			if (type == SYNC_MUTEX) {
-				cell->old_wait_mutex =
-					static_cast<ib_mutex_t*>(object);
-			} else {
-				cell->old_wait_rw_lock =
-					static_cast<rw_lock_t*>(object);
-			}
-
-			cell->request_type = type;
-
-			cell->file = file;
-			cell->line = line;
-
-			arr->n_reserved++;
-
-			*index = i;
-
-			sync_array_exit(arr);
-
-			/* Make sure the event is reset and also store
-			the value of signal_count at which the event
-			was reset. */
-                        event = sync_cell_get_event(cell);
-			cell->signal_count = os_event_reset(event);
-
-			cell->reservation_time = ut_time();
-
-			cell->thread = os_thread_get_curr_id();
-
-			return(true);
-		}
+		// We should return NULL and if there is more than
+		// one sync array, try another sync array instance.
+		return(NULL);
 	}
 
-	/* No free cell found */
-	return false;
+	++arr->res_count;
+
+	ut_ad(arr->n_reserved < arr->n_cells);
+	ut_ad(arr->next_free_slot <= arr->n_cells);
+
+	++arr->n_reserved;
+
+	/* Reserve the cell. */
+	ut_ad(cell->latch.mutex == NULL);
+
+	cell->request_type = type;
+
+	if (cell->request_type == SYNC_MUTEX) {
+		cell->latch.mutex = reinterpret_cast<WaitMutex*>(object);
+	} else if (cell->request_type == SYNC_BUF_BLOCK) {
+		cell->latch.bpmutex = reinterpret_cast<BlockWaitMutex*>(object);
+	} else {
+		cell->latch.lock = reinterpret_cast<rw_lock_t*>(object);
+	}
+
+	cell->waiting = false;
+
+	cell->file = file;
+	cell->line = line;
+
+	sync_array_exit(arr);
+
+	cell->thread_id = os_thread_get_curr_id();
+
+	cell->reservation_time = ut_time();
+
+	/* Make sure the event is reset and also store the value of
+	signal_count at which the event was reset. */
+	os_event_t	event = sync_cell_get_event(cell);
+	cell->signal_count = os_event_reset(event);
+
+	return(cell);
+}
+
+/******************************************************************//**
+Frees the cell. NOTE! sync_array_wait_event frees the cell
+automatically! */
+void
+sync_array_free_cell(
+/*=================*/
+	sync_array_t*	arr,	/*!< in: wait array */
+	sync_cell_t*&	cell)	/*!< in/out: the cell in the array */
+{
+	sync_array_enter(arr);
+
+	ut_a(cell->latch.mutex != NULL);
+
+	cell->waiting = false;
+	cell->signal_count = 0;
+	cell->latch.mutex = NULL;
+
+	/* Setup the list of free slots in the array */
+	cell->line = arr->first_free_slot;
+
+	arr->first_free_slot = cell - arr->array;
+
+	ut_a(arr->n_reserved > 0);
+	arr->n_reserved--;
+
+	if (arr->next_free_slot > arr->n_cells / 2 && arr->n_reserved == 0) {
+#ifdef UNIV_DEBUG
+		for (ulint i = 0; i < arr->next_free_slot; ++i) {
+			cell = sync_array_get_nth_cell(arr, i);
+
+			ut_ad(!cell->waiting);
+			ut_ad(cell->latch.mutex == 0);
+			ut_ad(cell->signal_count == 0);
+		}
+#endif /* UNIV_DEBUG */
+		arr->next_free_slot = 0;
+		arr->first_free_slot = ULINT_UNDEFINED;
+	}
+	sync_array_exit(arr);
+
+	cell = 0;
 }
 
 /******************************************************************//**
@@ -420,30 +454,21 @@ This function should be called when a thread starts to wait on
 a wait array cell. In the debug version this function checks
 if the wait for a semaphore will result in a deadlock, in which
 case prints info and asserts. */
-UNIV_INTERN
 void
 sync_array_wait_event(
 /*==================*/
 	sync_array_t*	arr,	/*!< in: wait array */
-	ulint		index)	/*!< in: index of the reserved cell */
+	sync_cell_t*&	cell)	/*!< in: index of the reserved cell */
 {
-	sync_cell_t*	cell;
-	os_event_t	event;
-
-	ut_a(arr);
-
 	sync_array_enter(arr);
 
-	cell = sync_array_get_nth_cell(arr, index);
+	ut_ad(!cell->waiting);
+	ut_ad(cell->latch.mutex);
+	ut_ad(os_thread_get_curr_id() == cell->thread_id);
 
-	ut_a(cell->wait_object);
-	ut_a(!cell->waiting);
-	ut_ad(os_thread_get_curr_id() == cell->thread);
+	cell->waiting = true;
 
-	event = sync_cell_get_event(cell);
-	cell->waiting = TRUE;
-
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
 
 	/* We use simple enter to the mutex below, because if
 	we cannot acquire it at once, mutex_enter would call
@@ -452,19 +477,21 @@ sync_array_wait_event(
 
 	rw_lock_debug_mutex_enter();
 
-	if (TRUE == sync_array_detect_deadlock(arr, cell, cell, 0)) {
+	if (sync_array_detect_deadlock(arr, cell, cell, 0)) {
 
-		fputs("########################################\n", stderr);
-		ut_error;
+		ib::fatal() << "########################################"
+                        " Deadlock Detected!";
 	}
 
 	rw_lock_debug_mutex_exit();
-#endif
+#endif /* UNIV_DEBUG */
 	sync_array_exit(arr);
 
-	os_event_wait_low(event, cell->signal_count);
+	os_event_wait_low(sync_cell_get_event(cell), cell->signal_count);
 
-	sync_array_free_cell(arr, index);
+	sync_array_free_cell(arr, cell);
+
+	cell = 0;
 }
 
 /******************************************************************//**
@@ -474,11 +501,8 @@ void
 sync_array_cell_print(
 /*==================*/
 	FILE*		file,		/*!< in: file where to print */
-	sync_cell_t*	cell,		/*!< in: sync cell */
-	os_thread_id_t* reserver)	/*!< out: write reserver or
-					0 */
+	sync_cell_t*	cell)		/*!< in: sync cell */
 {
-	ib_mutex_t*	mutex;
 	rw_lock_t*	rwlock;
 	ulint		type;
 	ulint		writer;
@@ -488,37 +512,75 @@ sync_array_cell_print(
 	fprintf(file,
 		"--Thread %lu has waited at %s line %lu"
 		" for %.2f seconds the semaphore:\n",
-		(ulong) os_thread_pf(cell->thread),
+		(ulong) os_thread_pf(cell->thread_id),
 		innobase_basename(cell->file), (ulong) cell->line,
 		difftime(time(NULL), cell->reservation_time));
 
 	if (type == SYNC_MUTEX) {
-		/* We use old_wait_mutex in case the cell has already
-		been freed meanwhile */
-		mutex = cell->old_wait_mutex;
+		WaitMutex*	mutex = cell->latch.mutex;
+		const WaitMutex::MutexPolicy&	policy = mutex->policy();
+#ifdef UNIV_DEBUG
+		const char*	name = policy.get_enter_filename();
+		if (name == NULL) {
+			/* The mutex might have been released. */
+			name = "NULL";
+		}
+#endif /* UNIV_DEBUG */
 
 		if (mutex) {
-			fprintf(file,
-				"Mutex at %p created file %s line %lu, lock var %lu\n"
-				"Last time reserved by thread %lu in file %s line %lu, "
-				"waiters flag %lu\n",
-				(void*) mutex, innobase_basename(mutex->cfile_name),
-				(ulong) mutex->cline,
-				(ulong) mutex->lock_word,
-				mutex->thread_id,
-				mutex->file_name, (ulong) mutex->line,
-				(ulong) mutex->waiters);
+		fprintf(file,
+			"Mutex at %p, %s, lock var %lu\n"
+#ifdef UNIV_DEBUG
+			"Last time reserved in file %s line %lu"
+#endif /* UNIV_DEBUG */
+			"\n",
+			(void*) mutex,
+			policy.to_string().c_str(),
+			(ulong) mutex->state()
+#ifdef UNIV_DEBUG
+			,name,
+			(ulong) policy.get_enter_line()
+#endif /* UNIV_DEBUG */
+			);
 		}
+	} else if (type == SYNC_BUF_BLOCK) {
+		BlockWaitMutex*	mutex = cell->latch.bpmutex;
 
-	} else if (type == RW_LOCK_EX
-		   || type == RW_LOCK_WAIT_EX
-		   || type == RW_LOCK_SHARED) {
+		const BlockWaitMutex::MutexPolicy&	policy =
+			mutex->policy();
+#ifdef UNIV_DEBUG
+		const char*	name = policy.get_enter_filename();
+		if (name == NULL) {
+			/* The mutex might have been released. */
+			name = "NULL";
+		}
+#endif /* UNIV_DEBUG */
 
-		fputs(type == RW_LOCK_EX ? "X-lock on"
-		      : type == RW_LOCK_WAIT_EX ? "X-lock (wait_ex) on"
+		fprintf(file,
+			"Mutex at %p, %s, lock var %lu\n"
+#ifdef UNIV_DEBUG
+			"Last time reserved in file %s line %lu"
+#endif /* UNIV_DEBUG */
+			"\n",
+			(void*) mutex,
+			policy.to_string().c_str(),
+			(ulong) mutex->state()
+#ifdef UNIV_DEBUG
+			,name,
+			(ulong) policy.get_enter_line()
+#endif /* UNIV_DEBUG */
+		       );
+	} else if (type == RW_LOCK_X
+		   || type == RW_LOCK_X_WAIT
+		   || type == RW_LOCK_SX
+		   || type == RW_LOCK_S) {
+
+		fputs(type == RW_LOCK_X ? "X-lock on"
+		      : type == RW_LOCK_X_WAIT ? "X-lock (wait_ex) on"
+		      : type == RW_LOCK_SX ? "SX-lock on"
 		      : "S-lock on", file);
 
-		rwlock = (rw_lock_t*)cell->old_wait_rw_lock;
+		rwlock = cell->latch.lock;
 
 		if (rwlock) {
 			fprintf(file,
@@ -528,35 +590,38 @@ sync_array_cell_print(
 
 			writer = rw_lock_get_writer(rwlock);
 
-			if (writer && writer != RW_LOCK_NOT_LOCKED) {
+			if (writer != RW_LOCK_NOT_LOCKED) {
+
 				fprintf(file,
 					"a writer (thread id %lu) has"
 					" reserved it in mode %s",
 					(ulong) os_thread_pf(rwlock->writer_thread),
-					writer == RW_LOCK_EX
-					? " exclusive\n"
+				writer == RW_LOCK_X ? " exclusive\n"
+				: writer == RW_LOCK_SX ? " SX\n"
 					: " wait exclusive\n");
-				*reserver = rwlock->writer_thread;
 			}
 
 			fprintf(file,
-				"number of readers %lu, waiters flag %lu, "
-				"lock_word: %lx\n"
+				"number of readers %lu, waiters flag %lu,"
+				" lock_word: %lx\n"
 				"Last time read locked in file %s line %lu\n"
 				"Last time write locked in file %s line %lu\n",
-				(ulong) rw_lock_get_reader_count(rwlock),
-				(ulong) rwlock->waiters,
+				(ulint) rw_lock_get_reader_count(rwlock),
+				(ulint) rwlock->waiters,
 				rwlock->lock_word,
 				innobase_basename(rwlock->last_s_file_name),
-				(ulong) rwlock->last_s_line,
+				(ulint) rwlock->last_s_line,
 				rwlock->last_x_file_name,
-				(ulong) rwlock->last_x_line);
+				(ulint) rwlock->last_x_line);
 
+			/* JAN: TODO: FIX LATER
 			fprintf(file,
 				"Holder thread %lu file %s line %lu\n",
-				rwlock->thread_id, rwlock->file_name, rwlock->line);
-
+				rwlock->thread_id, rwlock->file_name,
+			rwlock->line);
+			*/
 		}
+
 	} else {
 		ut_error;
 	}
@@ -566,11 +631,37 @@ sync_array_cell_print(
 	}
 }
 
-#ifdef UNIV_SYNC_DEBUG
+/******************************************************************//**
+Looks for a cell with the given thread id.
+@return pointer to cell or NULL if not found */
+static
+sync_cell_t*
+sync_array_find_thread(
+/*===================*/
+	sync_array_t*	arr,	/*!< in: wait array */
+	os_thread_id_t	thread)	/*!< in: thread id */
+{
+	ulint		i;
 
+	for (i = 0; i < arr->n_cells; i++) {
+		sync_cell_t*	cell;
+
+		cell = sync_array_get_nth_cell(arr, i);
+
+		if (cell->latch.mutex != NULL
+		    && os_thread_eq(cell->thread_id, thread)) {
+
+			return(cell);	/* Found */
+		}
+	}
+
+	return(NULL);	/* Not found */
+}
+
+#ifdef UNIV_DEBUG
 /******************************************************************//**
 Recursion step for deadlock detection.
-@return	TRUE if deadlock detected */
+@return TRUE if deadlock detected */
 static
 ibool
 sync_array_deadlock_step(
@@ -609,12 +700,28 @@ sync_array_deadlock_step(
 	return(FALSE);
 }
 
+/**
+Report an error to stderr.
+@param lock		rw-lock instance
+@param debug		rw-lock debug information
+@param cell		thread context */
+void
+sync_array_report_error(
+	rw_lock_t*		lock,
+	rw_lock_debug_t*	debug,
+	sync_cell_t* 		cell)
+{
+	fprintf(stderr, "rw-lock %p ", (void*) lock);
+	sync_array_cell_print(stderr, cell);
+	rw_lock_debug_print(stderr, debug);
+}
+
 /******************************************************************//**
 This function is called only in the debug version. Detects a deadlock
 of one or more threads because of waits of semaphores.
-@return	TRUE if deadlock detected */
+@return TRUE if deadlock detected */
 static
-ibool
+bool
 sync_array_detect_deadlock(
 /*=======================*/
 	sync_array_t*	arr,	/*!< in: wait array; NOTE! the caller must
@@ -623,34 +730,33 @@ sync_array_detect_deadlock(
 	sync_cell_t*	cell,	/*!< in: cell to search */
 	ulint		depth)	/*!< in: recursion depth */
 {
-	ib_mutex_t*	mutex;
 	rw_lock_t*	lock;
 	os_thread_id_t	thread;
 	ibool		ret;
 	rw_lock_debug_t*debug;
-	os_thread_id_t	reserver=0;
 
 	ut_a(arr);
 	ut_a(start);
 	ut_a(cell);
-	ut_ad(cell->wait_object);
-	ut_ad(os_thread_get_curr_id() == start->thread);
+	ut_ad(cell->latch.mutex != 0);
+	ut_ad(os_thread_get_curr_id() == start->thread_id);
 	ut_ad(depth < 100);
 
 	depth++;
 
 	if (!cell->waiting) {
-
-		return(FALSE); /* No deadlock here */
+		/* No deadlock here */
+		return(false);
 	}
 
-	if (cell->request_type == SYNC_MUTEX) {
+	switch (cell->request_type) {
+	case SYNC_MUTEX: {
 
-		mutex = static_cast<ib_mutex_t*>(cell->wait_object);
+		WaitMutex*	mutex = cell->latch.mutex;
+		const WaitMutex::MutexPolicy&	policy = mutex->policy();
 
-		if (mutex_get_lock_word(mutex) != 0) {
-
-			thread = mutex->thread_id;
+		if (mutex->state() != MUTEX_STATE_UNLOCKED) {
+			thread = policy.get_thread_id();
 
 			/* Note that mutex->thread_id above may be
 			also OS_THREAD_ID_UNDEFINED, because the
@@ -659,64 +765,127 @@ sync_array_detect_deadlock(
 			released the mutex: in this case no deadlock
 			can occur, as the wait array cannot contain
 			a thread with ID_UNDEFINED value. */
+			ret = sync_array_deadlock_step(
+				arr, start, thread, 0, depth);
 
-			ret = sync_array_deadlock_step(arr, start, thread, 0,
-						       depth);
 			if (ret) {
-				fprintf(stderr,
-					"Mutex %p owned by thread %lu file %s line %lu\n",
-					mutex, (ulong) os_thread_pf(mutex->thread_id),
-					mutex->file_name, (ulong) mutex->line);
-				sync_array_cell_print(stderr, cell, &reserver);
+				const char*	name;
 
-				return(TRUE);
+				name = policy.get_enter_filename();
+
+				if (name == NULL) {
+					/* The mutex might have been
+					released. */
+					name = "NULL";
+				}
+
+				ib::info()
+					<< "Mutex " << mutex << " owned by"
+					" thread " << os_thread_pf(thread)
+					<< " file " << name << " line "
+					<< policy.get_enter_line();
+
+				sync_array_cell_print(stderr, cell);
+
+				return(true);
 			}
 		}
 
-		return(FALSE); /* No deadlock */
+		/* No deadlock */
+		return(false);
+		}
 
-	} else if (cell->request_type == RW_LOCK_EX
-		   || cell->request_type == RW_LOCK_WAIT_EX) {
+	case SYNC_BUF_BLOCK: {
 
-		lock = static_cast<rw_lock_t*>(cell->wait_object);
+		BlockWaitMutex*	mutex = cell->latch.bpmutex;
+
+		const BlockWaitMutex::MutexPolicy&	policy =
+			mutex->policy();
+
+		if (mutex->state() != MUTEX_STATE_UNLOCKED) {
+			thread = policy.get_thread_id();
+
+			/* Note that mutex->thread_id above may be
+			also OS_THREAD_ID_UNDEFINED, because the
+			thread which held the mutex maybe has not
+			yet updated the value, or it has already
+			released the mutex: in this case no deadlock
+			can occur, as the wait array cannot contain
+			a thread with ID_UNDEFINED value. */
+			ret = sync_array_deadlock_step(
+				arr, start, thread, 0, depth);
+
+			if (ret) {
+				const char*	name;
+
+				name = policy.get_enter_filename();
+
+				if (name == NULL) {
+					/* The mutex might have been
+					released. */
+					name = "NULL";
+				}
+
+				ib::info()
+					<< "Mutex " << mutex << " owned by"
+					" thread " << os_thread_pf(thread)
+					<< " file " << name << " line "
+					<< policy.get_enter_line();
+
+
+				return(true);
+			}
+		}
+
+		/* No deadlock */
+		return(false);
+	}
+
+	case RW_LOCK_X:
+	case RW_LOCK_X_WAIT:
+
+		lock = cell->latch.lock;
 
 		for (debug = UT_LIST_GET_FIRST(lock->debug_list);
-		     debug != 0;
+		     debug != NULL;
 		     debug = UT_LIST_GET_NEXT(list, debug)) {
 
 			thread = debug->thread_id;
 
-			if (((debug->lock_type == RW_LOCK_EX)
-			     && !os_thread_eq(thread, cell->thread))
-			    || ((debug->lock_type == RW_LOCK_WAIT_EX)
-				&& !os_thread_eq(thread, cell->thread))
-			    || (debug->lock_type == RW_LOCK_SHARED)) {
+			switch (debug->lock_type) {
+			case RW_LOCK_X:
+			case RW_LOCK_SX:
+			case RW_LOCK_X_WAIT:
+				if (os_thread_eq(thread, cell->thread_id)) {
+					break;
+				}
+				/* fall through */
+			case RW_LOCK_S:
 
 				/* The (wait) x-lock request can block
 				infinitely only if someone (can be also cell
 				thread) is holding s-lock, or someone
-				(cannot be cell thread) (wait) x-lock, and
-				he is blocked by start thread */
+				(cannot be cell thread) (wait) x-lock or
+				sx-lock, and he is blocked by start thread */
 
 				ret = sync_array_deadlock_step(
 					arr, start, thread, debug->pass,
 					depth);
+
 				if (ret) {
-print:
-					fprintf(stderr, "rw-lock %p ",
-						(void*) lock);
-					sync_array_cell_print(stderr, cell, &reserver);
+					sync_array_report_error(
+						lock, debug, cell);
 					rw_lock_debug_print(stderr, debug);
 					return(TRUE);
 				}
 			}
 		}
 
-		return(FALSE);
+		return(false);
 
-	} else if (cell->request_type == RW_LOCK_SHARED) {
+	case RW_LOCK_SX:
 
-		lock = static_cast<rw_lock_t*>(cell->wait_object);
+		lock = cell->latch.lock;
 
 		for (debug = UT_LIST_GET_FIRST(lock->debug_list);
 		     debug != 0;
@@ -724,8 +893,46 @@ print:
 
 			thread = debug->thread_id;
 
-			if ((debug->lock_type == RW_LOCK_EX)
-			    || (debug->lock_type == RW_LOCK_WAIT_EX)) {
+			switch (debug->lock_type) {
+			case RW_LOCK_X:
+			case RW_LOCK_SX:
+			case RW_LOCK_X_WAIT:
+
+				if (os_thread_eq(thread, cell->thread_id)) {
+					break;
+				}
+
+				/* The sx-lock request can block infinitely
+				only if someone (can be also cell thread) is
+				holding (wait) x-lock or sx-lock, and he is
+				blocked by start thread */
+
+				ret = sync_array_deadlock_step(
+					arr, start, thread, debug->pass,
+					depth);
+
+				if (ret) {
+					sync_array_report_error(
+						lock, debug, cell);
+					return(TRUE);
+				}
+			}
+		}
+
+		return(false);
+
+	case RW_LOCK_S:
+
+		lock = cell->latch.lock;
+
+		for (debug = UT_LIST_GET_FIRST(lock->debug_list);
+		     debug != 0;
+		     debug = UT_LIST_GET_NEXT(list, debug)) {
+
+			thread = debug->thread_id;
+
+			if (debug->lock_type == RW_LOCK_X
+			    || debug->lock_type == RW_LOCK_X_WAIT) {
 
 				/* The s-lock request can block infinitely
 				only if someone (can also be cell thread) is
@@ -735,119 +942,107 @@ print:
 				ret = sync_array_deadlock_step(
 					arr, start, thread, debug->pass,
 					depth);
+
 				if (ret) {
-					goto print;
+					sync_array_report_error(
+						lock, debug, cell);
+					return(TRUE);
 				}
 			}
 		}
 
-		return(FALSE);
+		return(false);
 
-	} else {
+	default:
 		ut_error;
 	}
 
-	return(TRUE);	/* Execution never reaches this line: for compiler
-			fooling only */
+	return(true);
 }
-#endif /* UNIV_SYNC_DEBUG */
+#endif /* UNIV_DEBUG */
 
 /******************************************************************//**
 Determines if we can wake up the thread waiting for a sempahore. */
 static
-ibool
+bool
 sync_arr_cell_can_wake_up(
 /*======================*/
 	sync_cell_t*	cell)	/*!< in: cell to search */
 {
-	ib_mutex_t*	mutex;
 	rw_lock_t*	lock;
 
-	if (cell->request_type == SYNC_MUTEX) {
-
-		mutex = static_cast<ib_mutex_t*>(cell->wait_object);
+	switch (cell->request_type) {
+		WaitMutex*	mutex;
+		BlockWaitMutex*	bpmutex;
+	case SYNC_MUTEX:
+		mutex = cell->latch.mutex;
 
 		os_rmb;
-		if (mutex_get_lock_word(mutex) == 0) {
+		if (mutex->state() == MUTEX_STATE_UNLOCKED) {
 
-			return(TRUE);
+			return(true);
 		}
 
-	} else if (cell->request_type == RW_LOCK_EX) {
+		break;
 
-		lock = static_cast<rw_lock_t*>(cell->wait_object);
+	case SYNC_BUF_BLOCK:
+		bpmutex = cell->latch.bpmutex;
 
 		os_rmb;
-		if (lock->lock_word > 0) {
+		if (bpmutex->state() == MUTEX_STATE_UNLOCKED) {
+
+			return(true);
+		}
+
+		break;
+
+	case RW_LOCK_X:
+	case RW_LOCK_SX:
+		lock = cell->latch.lock;
+
+		os_rmb;
+		if (lock->lock_word > X_LOCK_HALF_DECR) {
 		/* Either unlocked or only read locked. */
 
-			return(TRUE);
+			return(true);
 		}
 
-        } else if (cell->request_type == RW_LOCK_WAIT_EX) {
+		break;
 
-		lock = static_cast<rw_lock_t*>(cell->wait_object);
+	case RW_LOCK_X_WAIT:
 
-                /* lock_word == 0 means all readers have left */
+		lock = cell->latch.lock;
+
+                /* lock_word == 0 means all readers or sx have left */
 		os_rmb;
 		if (lock->lock_word == 0) {
 
-			return(TRUE);
+			return(true);
 		}
-	} else if (cell->request_type == RW_LOCK_SHARED) {
-		lock = static_cast<rw_lock_t*>(cell->wait_object);
+		break;
+
+	case RW_LOCK_S:
+
+		lock = cell->latch.lock;
 
                 /* lock_word > 0 means no writer or reserved writer */
 		os_rmb;
 		if (lock->lock_word > 0) {
 
-			return(TRUE);
+			return(true);
 		}
 	}
 
-	return(FALSE);
-}
-
-/******************************************************************//**
-Frees the cell. NOTE! sync_array_wait_event frees the cell
-automatically! */
-UNIV_INTERN
-void
-sync_array_free_cell(
-/*=================*/
-	sync_array_t*	arr,	/*!< in: wait array */
-	ulint		index)  /*!< in: index of the cell in array */
-{
-	sync_cell_t*	cell;
-
-	sync_array_enter(arr);
-
-	cell = sync_array_get_nth_cell(arr, index);
-
-	ut_a(cell->wait_object != NULL);
-
-	cell->waiting = FALSE;
-	cell->wait_object =  NULL;
-	cell->signal_count = 0;
-
-	ut_a(arr->n_reserved > 0);
-	arr->n_reserved--;
-
-	sync_array_exit(arr);
+	return(false);
 }
 
 /**********************************************************************//**
 Increments the signalled count. */
-UNIV_INTERN
 void
-sync_array_object_signalled(void)
-/*=============================*/
+sync_array_object_signalled()
+/*=========================*/
 {
-#ifdef HAVE_ATOMIC_BUILTINS
-	(void) os_atomic_increment_ulint(&sg_count, 1);
-#else
 	++sg_count;
-#endif /* HAVE_ATOMIC_BUILTINS */
 }
 
 /**********************************************************************//**
@@ -864,27 +1059,19 @@ sync_array_wake_threads_if_sema_free_low(
 /*=====================================*/
 	sync_array_t*	arr)		/* in/out: wait array */
 {
-	ulint		i = 0;
-	ulint		count;
-
 	sync_array_enter(arr);
 
-	for (count = 0;  count < arr->n_reserved; ++i) {
+	for (ulint i = 0;  i < arr->next_free_slot; ++i) {
 		sync_cell_t*	cell;
 
 		cell = sync_array_get_nth_cell(arr, i);
 
-		if (cell->wait_object != NULL) {
+		if (cell->latch.mutex != 0 && sync_arr_cell_can_wake_up(cell)) {
+			os_event_t      event;
 
-			count++;
+			event = sync_cell_get_event(cell);
 
-			if (sync_arr_cell_can_wake_up(cell)) {
-				os_event_t      event;
-
-				event = sync_cell_get_event(cell);
-
-				os_event_set(event);
-			}
+			os_event_set(event);
 		}
 	}
 
@@ -899,14 +1086,11 @@ function should be called about every 1 second in the server.
 Note that there's a race condition between this thread and mutex_exit
 changing the lock_word and calling signal_object, so sometimes this finds
 threads to wake up even when nothing has gone wrong. */
-UNIV_INTERN
 void
 sync_arr_wake_threads_if_sema_free(void)
 /*====================================*/
 {
-	ulint		i;
-
-	for (i = 0; i < sync_array_size; ++i) {
+	for (ulint i = 0; i < sync_array_size; ++i) {
 
 		sync_array_wake_threads_if_sema_free_low(
 			sync_wait_array[i]);
@@ -915,9 +1099,9 @@ sync_arr_wake_threads_if_sema_free(void)
 
 /**********************************************************************//**
 Prints warnings of long semaphore waits to stderr.
-@return	TRUE if fatal semaphore wait threshold was exceeded */
+@return TRUE if fatal semaphore wait threshold was exceeded */
 static
-ibool
+bool
 sync_array_print_long_waits_low(
 /*============================*/
 	sync_array_t*	arr,	/*!< in: sync array instance */
@@ -925,14 +1109,14 @@ sync_array_print_long_waits_low(
 	const void**	sema,	/*!< out: longest-waited-for semaphore */
 	ibool*		noticed)/*!< out: TRUE if long wait noticed */
 {
-	ulint		i;
 	ulint		fatal_timeout = srv_fatal_semaphore_wait_threshold;
 	ibool		fatal = FALSE;
 	double		longest_diff = 0;
+	ulint		i;
 
 	/* For huge tables, skip the check during CHECK TABLE etc... */
 	if (fatal_timeout > SRV_SEMAPHORE_WAIT_EXTENSION) {
-		return(FALSE);
+		return(false);
 	}
 
 #ifdef UNIV_DEBUG_VALGRIND
@@ -947,28 +1131,25 @@ sync_array_print_long_waits_low(
 # define SYNC_ARRAY_TIMEOUT	240
 #endif
 
-	for (i = 0; i < arr->n_cells; i++) {
+	for (ulint i = 0; i < arr->n_cells; i++) {
 
-		double		diff;
 		sync_cell_t*	cell;
-		void*		wait_object;
-		os_thread_id_t reserver=0;
+		void*		latch;
 
 		cell = sync_array_get_nth_cell(arr, i);
 
-		wait_object = cell->wait_object;
+		latch = cell->latch.mutex;
 
-		if (wait_object == NULL || !cell->waiting) {
+		if (latch == NULL || !cell->waiting) {
 
 			continue;
 		}
 
-		diff = difftime(time(NULL), cell->reservation_time);
+		double	diff = difftime(time(NULL), cell->reservation_time);
 
 		if (diff > SYNC_ARRAY_TIMEOUT) {
-			fputs("InnoDB: Warning: a long semaphore wait:\n",
-			      stderr);
-			sync_array_cell_print(stderr, cell, &reserver);
+			ib::warn() << "A long semaphore wait:";
+			sync_array_cell_print(stderr, cell);
 			*noticed = TRUE;
 		}
 
@@ -978,62 +1159,29 @@ sync_array_print_long_waits_low(
 
 		if (diff > longest_diff) {
 			longest_diff = diff;
-			*sema = wait_object;
-			*waiter = cell->thread;
+			*sema = latch;
+			*waiter = cell->thread_id;
 		}
 	}
 
-	/* We found a long semaphore wait, wait all threads that are
+	/* We found a long semaphore wait, print all threads that are
 	waiting for a semaphore. */
 	if (*noticed) {
 		for (i = 0; i < arr->n_cells; i++) {
 			void*	wait_object;
-			os_thread_id_t reserver=(os_thread_id_t)ULINT_UNDEFINED;
 			sync_cell_t*	cell;
-			ulint loop = 0;
 
 			cell = sync_array_get_nth_cell(arr, i);
 
-			wait_object = cell->wait_object;
+			wait_object = cell->latch.mutex;
 
 			if (wait_object == NULL || !cell->waiting) {
 
 				continue;
 			}
 
-			fputs("InnoDB: Warning: semaphore wait:\n",
-			      stderr);
-			sync_array_cell_print(stderr, cell, &reserver);
-
-			/* Try to output cell information for writer recursive way */
-			while (reserver != (os_thread_id_t)ULINT_UNDEFINED) {
-				sync_cell_t* reserver_wait;
-
-				reserver_wait = sync_array_find_thread(arr, reserver);
-
-				if (reserver_wait &&
-					reserver_wait->wait_object != NULL &&
-					reserver_wait->waiting) {
-					fputs("InnoDB: Warning: Writer thread is waiting this semaphore:\n",
-						stderr);
-					reserver = (os_thread_id_t)ULINT_UNDEFINED;
-					sync_array_cell_print(stderr, reserver_wait, &reserver);
-					loop++;
-
-					if (reserver_wait->thread == reserver) {
-						reserver = (os_thread_id_t)ULINT_UNDEFINED;
-					}
-				} else {
-					reserver = (os_thread_id_t)ULINT_UNDEFINED;
-				}
-
-				/* This is protection against loop */
-				if (loop > 100) {
-					fputs("InnoDB: Warning: Too many waiting threads.\n", stderr);
-					break;
-				}
-
-			}
+			ib::info() << "A semaphore wait:";
+			sync_array_cell_print(stderr, cell);
 		}
 	}
 
@@ -1044,8 +1192,7 @@ sync_array_print_long_waits_low(
 
 /**********************************************************************//**
 Prints warnings of long semaphore waits to stderr.
-@return	TRUE if fatal semaphore wait threshold was exceeded */
-UNIV_INTERN
+@return TRUE if fatal semaphore wait threshold was exceeded */
 ibool
 sync_array_print_long_waits(
 /*========================*/
@@ -1088,11 +1235,12 @@ sync_array_print_long_waits(
 
 		fprintf(stderr,
 			"InnoDB: Pending preads %lu, pwrites %lu\n",
-			(ulong) os_file_n_pending_preads,
-			(ulong) os_file_n_pending_pwrites);
+			(ulong) os_n_pending_reads,
+			(ulong) os_n_pending_writes);
 
 		srv_print_innodb_monitor = TRUE;
-		os_event_set(srv_monitor_event);
+
+		lock_set_timeout_event();
 
 		os_thread_sleep(30000000);
 
@@ -1116,7 +1264,6 @@ sync_array_print_info_low(
 {
 	ulint		i;
 	ulint		count = 0;
-	os_thread_id_t	r = 0;
 
 	fprintf(file,
 		"OS WAIT ARRAY INFO: reservation count " ULINTPF "\n",
@@ -1127,9 +1274,9 @@ sync_array_print_info_low(
 
 		cell = sync_array_get_nth_cell(arr, i);
 
-		if (cell->wait_object != NULL) {
+		if (cell->latch.mutex != 0) {
 			count++;
-			sync_array_cell_print(file, cell, &r);
+			sync_array_cell_print(file, cell);
 		}
 	}
 }
@@ -1152,65 +1299,50 @@ sync_array_print_info(
 
 /**********************************************************************//**
 Create the primary system wait array(s), they are protected by an OS mutex */
-UNIV_INTERN
 void
 sync_array_init(
 /*============*/
 	ulint		n_threads)		/*!< in: Number of slots to
 						create in all arrays */
 {
-	ulint		i;
-	ulint		n_slots;
-
 	ut_a(sync_wait_array == NULL);
 	ut_a(srv_sync_array_size > 0);
 	ut_a(n_threads > 0);
 
 	sync_array_size = srv_sync_array_size;
 
-	/* We have to use ut_malloc() because the mutex infrastructure
-	hasn't been initialised yet. It is required by mem_alloc() and
-	the heap functions. */
+	sync_wait_array = UT_NEW_ARRAY_NOKEY(sync_array_t*, sync_array_size);
 
-	sync_wait_array = static_cast<sync_array_t**>(
-		ut_malloc(sizeof(*sync_wait_array) * sync_array_size));
+	ulint	n_slots = 1 + (n_threads - 1) / sync_array_size;
 
-	n_slots = 1 + (n_threads - 1) / sync_array_size;
+	for (ulint i = 0; i < sync_array_size; ++i) {
 
-	for (i = 0; i < sync_array_size; ++i) {
-
-		sync_wait_array[i] = sync_array_create(n_slots);
+		sync_wait_array[i] = UT_NEW_NOKEY(sync_array_t(n_slots));
 	}
 }
 
 /**********************************************************************//**
 Close sync array wait sub-system. */
-UNIV_INTERN
 void
 sync_array_close(void)
 /*==================*/
 {
-	ulint		i;
-
-	for (i = 0; i < sync_array_size; ++i) {
+	for (ulint i = 0; i < sync_array_size; ++i) {
 		sync_array_free(sync_wait_array[i]);
 	}
 
-	ut_free(sync_wait_array);
+	UT_DELETE_ARRAY(sync_wait_array);
 	sync_wait_array = NULL;
 }
 
 /**********************************************************************//**
 Print info about the sync array(s). */
-UNIV_INTERN
 void
 sync_array_print(
 /*=============*/
 	FILE*		file)		/*!< in/out: Print to this stream */
 {
-	ulint		i;
-
-	for (i = 0; i < sync_array_size; ++i) {
+	for (ulint i = 0; i < sync_array_size; ++i) {
 		sync_array_print_info(file, sync_wait_array[i]);
 	}
 
@@ -1219,25 +1351,6 @@ sync_array_print(
 
 }
 
-/**********************************************************************//**
-Get an instance of the sync wait array. */
-UNIV_INTERN
-sync_array_t*
-sync_array_get(void)
-/*================*/
-{
-	ulint		i;
-	static ulint	count;
-
-#ifdef HAVE_ATOMIC_BUILTINS
-	i = os_atomic_increment_ulint(&count, 1);
-#else
-	i = count++;
-#endif /* HAVE_ATOMIC_BUILTINS */
-
-	return(sync_wait_array[i % sync_array_size]);
-}
-
 /**********************************************************************//**
 Prints info of the wait array without using any mutexes/semaphores. */
 UNIV_INTERN
@@ -1253,12 +1366,10 @@ sync_array_print_innodb(void)
 	for (i = 0; i < arr->n_cells; i++) {
 		void*	wait_object;
 		sync_cell_t*	cell;
-		os_thread_id_t reserver=(os_thread_id_t)ULINT_UNDEFINED;
-		ulint loop=0;
 
 		cell = sync_array_get_nth_cell(arr, i);
 
-		wait_object = cell->wait_object;
+		wait_object = cell->latch.mutex;
 
 		if (wait_object == NULL || !cell->waiting) {
 
@@ -1267,34 +1378,7 @@ sync_array_print_innodb(void)
 
 		fputs("InnoDB: Warning: semaphore wait:\n",
 			      stderr);
-		sync_array_cell_print(stderr, cell, &reserver);
-
-		/* Try to output cell information for writer recursive way */
-		while (reserver != (os_thread_id_t)ULINT_UNDEFINED) {
-			sync_cell_t* reserver_wait;
-
-			reserver_wait = sync_array_find_thread(arr, reserver);
-
-			if (reserver_wait &&
-				reserver_wait->wait_object != NULL &&
-				reserver_wait->waiting) {
-				fputs("InnoDB: Warning: Writer thread is waiting this semaphore:\n",
-					stderr);
-				sync_array_cell_print(stderr, reserver_wait, &reserver);
-
-				if (reserver_wait->thread == reserver) {
-					reserver = (os_thread_id_t)ULINT_UNDEFINED;
-				}
-			} else {
-				reserver = (os_thread_id_t)ULINT_UNDEFINED;
-			}
-
-			/* This is protection against loop */
-			if (loop > 100) {
-				fputs("InnoDB: Warning: Too many waiting threads.\n", stderr);
-				break;
-			}
-		}
+		sync_array_cell_print(stderr, cell);
 	}
 
 	fputs("InnoDB: Semaphore wait debug output ended:\n", stderr);
@@ -1333,7 +1417,7 @@ sync_arr_get_item(
 	wait_cell = sync_array_get_nth_cell(sync_arr, i);
 
 	if (wait_cell) {
-		wait_object = wait_cell->wait_object;
+		wait_object = wait_cell->latch.mutex;
 
 		if(wait_object != NULL && wait_cell->waiting) {
 			found = TRUE;
@@ -1375,63 +1459,73 @@ sync_arr_fill_sys_semphore_waits_table(
 	for(ulint i=0; i < n_items;i++) {
 		sync_cell_t *cell=NULL;
 		if (sync_arr_get_item(i, &cell)) {
-			ib_mutex_t* mutex;
+			WaitMutex* mutex;
 			type = cell->request_type;
-			OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_THREAD_ID], (longlong)os_thread_pf(cell->thread)));
+			/* JAN: FIXME
+			OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_THREAD_ID],
+			(longlong)os_thread_pf(cell->thread)));
+			*/
 			OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_FILE], innobase_basename(cell->file)));
 			OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_LINE], cell->line));
 			OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_WAIT_TIME], (longlong)difftime(time(NULL), cell->reservation_time)));
 
 			if (type == SYNC_MUTEX) {
-				mutex = static_cast<ib_mutex_t*>(cell->old_wait_mutex);
+				mutex = static_cast<WaitMutex*>(cell->latch.mutex);
 
 				if (mutex) {
-					OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_OBJECT_NAME], mutex->cmutex_name));
+					// JAN: FIXME
+					// OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_OBJECT_NAME], mutex->cmutex_name));
 					OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_WAIT_OBJECT], (longlong)mutex));
 					OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_WAIT_TYPE], "MUTEX"));
-					OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_HOLDER_THREAD_ID], (longlong)mutex->thread_id));
-					OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_HOLDER_FILE], innobase_basename(mutex->file_name)));
-					OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_HOLDER_LINE], mutex->line));
-					OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_CREATED_FILE], innobase_basename(mutex->cfile_name)));
-					OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_CREATED_LINE], mutex->cline));
-					OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_WAITERS_FLAG], (longlong)mutex->waiters));
-					OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_LOCK_WORD], (longlong)mutex->lock_word));
-					OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_FILE], innobase_basename(mutex->file_name)));
-					OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_LINE], mutex->line));
-					OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_OS_WAIT_COUNT], mutex->count_os_wait));
+					//OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_HOLDER_THREAD_ID], (longlong)mutex->thread_id));
+					//OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_HOLDER_FILE], innobase_basename(mutex->file_name)));
+					//OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_HOLDER_LINE], mutex->line));
+					//OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_CREATED_FILE], innobase_basename(mutex->cfile_name)));
+					//OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_CREATED_LINE], mutex->cline));
+					//OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_WAITERS_FLAG], (longlong)mutex->waiters));
+					//OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_LOCK_WORD], (longlong)mutex->lock_word));
+					//OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_FILE], innobase_basename(mutex->file_name)));
+					//OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_LINE], mutex->line));
+					//OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_OS_WAIT_COUNT], mutex->count_os_wait));
 				}
-			} else if (type == RW_LOCK_EX
-				|| type == RW_LOCK_WAIT_EX
-				|| type == RW_LOCK_SHARED) {
+			} else if (type == RW_LOCK_X_WAIT
+				|| type == RW_LOCK_X
+				|| type == RW_LOCK_SX
+			        || type == RW_LOCK_S) {
 				rw_lock_t* rwlock=NULL;
 
-				rwlock = static_cast<rw_lock_t *> (cell->old_wait_rw_lock);
+				rwlock = static_cast<rw_lock_t *> (cell->latch.lock);
 
 				if (rwlock) {
 					ulint writer = rw_lock_get_writer(rwlock);
 
 					OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_WAIT_OBJECT], (longlong)rwlock));
-					if (type == RW_LOCK_EX) {
-						OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_WAIT_TYPE], "RW_LOCK_EX"));
-					} else if (type == RW_LOCK_WAIT_EX) {
-						OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_WAIT_TYPE], "RW_LOCK_WAIT_EX"));
-					} else if (type == RW_LOCK_SHARED) {
-						OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_WAIT_TYPE], "RW_LOCK_SHARED"));
+					if (type == RW_LOCK_X) {
+						OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_WAIT_TYPE], "RW_LOCK_X"));
+					} else if (type == RW_LOCK_X_WAIT) {
+						OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_WAIT_TYPE], "RW_LOCK_X_WAIT"));
+					} else if (type == RW_LOCK_S) {
+						OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_WAIT_TYPE], "RW_LOCK_S"));
+					} else if (type == RW_LOCK_SX) {
+						OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_WAIT_TYPE], "RW_LOCK_SX"));
 					}
 
 					if (writer != RW_LOCK_NOT_LOCKED) {
-						OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_OBJECT_NAME], rwlock->lock_name));
+						// JAN: FIXME
+						// OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_OBJECT_NAME], rwlock->lock_name));
 						OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_WRITER_THREAD], (longlong)os_thread_pf(rwlock->writer_thread)));
 
-						if (writer == RW_LOCK_EX) {
-							OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_RESERVATION_MODE], "RW_LOCK_EX"));
-						} else if (writer == RW_LOCK_WAIT_EX) {
-							OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_RESERVATION_MODE], "RW_LOCK_WAIT_EX"));
+						if (writer == RW_LOCK_X) {
+							OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_RESERVATION_MODE], "RW_LOCK_X"));
+						} else if (writer == RW_LOCK_X_WAIT) {
+							OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_RESERVATION_MODE], "RW_LOCK_X_WAIT"));
+						} else if (type == RW_LOCK_SX) {
+							OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_RESERVATION_MODE], "RW_LOCK_SX"));
 						}
 
-						OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_HOLDER_THREAD_ID], (longlong)rwlock->thread_id));
-						OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_HOLDER_FILE], innobase_basename(rwlock->file_name)));
-						OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_HOLDER_LINE], rwlock->line));
+						//OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_HOLDER_THREAD_ID], (longlong)rwlock->thread_id));
+						//OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_HOLDER_FILE], innobase_basename(rwlock->file_name)));
+						//OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_HOLDER_LINE], rwlock->line));
 						OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_READERS], rw_lock_get_reader_count(rwlock)));
 						OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_WAITERS_FLAG], (longlong)rwlock->waiters));
 						OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_LOCK_WORD], (longlong)rwlock->lock_word));
diff --git a/storage/innobase/sync/sync0debug.cc b/storage/innobase/sync/sync0debug.cc
new file mode 100644
index 00000000000..accc9313653
--- /dev/null
+++ b/storage/innobase/sync/sync0debug.cc
@@ -0,0 +1,1813 @@
+/*****************************************************************************
+
+Copyright (c) 2014, 2016, Oracle and/or its affiliates. All Rights Reserved.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file sync/sync0debug.cc
+Debug checks for latches.
+
+Created 2012-08-21 Sunny Bains
+*******************************************************/
+
+#include "sync0sync.h"
+#include "sync0debug.h"
+
+#include "ut0new.h"
+#include "srv0start.h"
+
+#include <map>
+#include <vector>
+#include <string>
+#include <algorithm>
+#include <iostream>
+
+#ifdef UNIV_DEBUG
+
+my_bool		srv_sync_debug;
+
+/** The global mutex which protects debug info lists of all rw-locks.
+To modify the debug info list of an rw-lock, this mutex has to be
+acquired in addition to the mutex protecting the lock. */
+static ib_mutex_t		rw_lock_debug_mutex;
+
+/** If deadlock detection does not get immediately the mutex,
+it may wait for this event */
+static os_event_t		rw_lock_debug_event;
+
+/** This is set to true, if there may be waiters for the event */
+static bool			rw_lock_debug_waiters;
+
+/** The latch held by a thread */
+struct Latched {
+
+	/** Constructor */
+	Latched() : m_latch(), m_level(SYNC_UNKNOWN) { }
+
+	/** Constructor
+	@param[in]	latch		Latch instance
+	@param[in]	level		Level of latch held */
+	Latched(const latch_t*	latch,
+		latch_level_t	level)
+		:
+		m_latch(latch),
+		m_level(level)
+	{
+		/* No op */
+	}
+
+	/** @return the latch level */
+	latch_level_t get_level() const
+	{
+		return(m_level);
+	}
+
+	/** Check if the rhs latch and level match
+	@param[in]	rhs		instance to compare with
+	@return true on match */
+	bool operator==(const Latched& rhs) const
+	{
+		return(m_latch == rhs.m_latch && m_level == rhs.m_level);
+	}
+
+	/** The latch instance */
+	const latch_t*		m_latch;
+
+	/** The latch level. For buffer blocks we can pass a separate latch
+	level to check against, see buf_block_dbg_add_level() */
+	latch_level_t		m_level;
+};
+
+/** Thread specific latches. This is ordered on level in descending order. */
+typedef std::vector<Latched, ut_allocator<Latched> > Latches;
+
+/** The deadlock detector. */
+struct LatchDebug {
+
+	/** Debug mutex for control structures, should not be tracked
+	by this module. */
+	typedef OSMutex Mutex;
+
+	/** Comparator for the ThreadMap. */
+	struct os_thread_id_less
+		: public std::binary_function<
+		  os_thread_id_t,
+		  os_thread_id_t,
+		  bool>
+	{
+		/** @return true if lhs < rhs */
+		bool operator()(
+			const os_thread_id_t& lhs,
+			const os_thread_id_t& rhs) const
+			UNIV_NOTHROW
+		{
+			return(os_thread_pf(lhs) < os_thread_pf(rhs));
+		}
+	};
+
+	/** For tracking a thread's latches. */
+	typedef std::map<
+		os_thread_id_t,
+		Latches*,
+		os_thread_id_less,
+		ut_allocator<std::pair<const std::string, latch_meta_t> > >
+		ThreadMap;
+
+	/** Constructor */
+	LatchDebug()
+		UNIV_NOTHROW;
+
+	/** Destructor */
+	~LatchDebug()
+		UNIV_NOTHROW
+	{
+		m_mutex.destroy();
+	}
+
+	/** Create a new instance if one doesn't exist else return
+	the existing one.
+	@param[in]	add		add an empty entry if one is not
+					found (default no)
+	@return	pointer to a thread's acquired latches. */
+	Latches* thread_latches(bool add = false)
+		UNIV_NOTHROW;
+
+	/** Check that all the latches already owned by a thread have a lower
+	level than limit.
+	@param[in]	latches		the thread's existing (acquired) latches
+	@param[in]	limit		to check against
+	@return latched if there is one with a level <= limit . */
+	const Latched* less(
+		const Latches*	latches,
+		latch_level_t	limit) const
+		UNIV_NOTHROW;
+
+	/** Checks if the level value exists in the thread's acquired latches.
+	@param[in]	latches		the thread's existing (acquired) latches
+	@param[in]	level		to lookup
+	@return	latch if found or 0 */
+	const latch_t* find(
+		const Latches*	Latches,
+		latch_level_t	level) const
+		UNIV_NOTHROW;
+
+	/**
+	Checks if the level value exists in the thread's acquired latches.
+	@param[in]	level		to lookup
+	@return	latch if found or 0 */
+	const latch_t* find(latch_level_t level)
+		UNIV_NOTHROW;
+
+	/** Report error and abort.
+	@param[in]	latches		thread's existing latches
+	@param[in]	latched		The existing latch causing the
+					invariant to fail
+	@param[in]	level		The new level request that breaks
+					the order */
+	void crash(
+		const Latches*	latches,
+		const Latched*	latched,
+		latch_level_t	level) const
+		UNIV_NOTHROW;
+
+	/** Do a basic ordering check.
+	@param[in]	latches		thread's existing latches
+	@param[in]	requested_level	Level requested by latch
+	@param[in]	level		declared ulint so that we can
+					do level - 1. The level of the
+					latch that the thread is trying
+					to acquire
+	@return true if passes, else crash with error message. */
+	bool basic_check(
+		const Latches*	latches,
+		latch_level_t	requested_level,
+		ulint		level) const
+		UNIV_NOTHROW;
+
+	/** Adds a latch and its level in the thread level array. Allocates
+	the memory for the array if called for the first time for this
+	OS thread.  Makes the checks against other latch levels stored
+	in the array for this thread.
+
+	@param[in]	latch	latch that the thread wants to acqire.
+	@param[in]	level	latch level to check against */
+	void lock_validate(
+		const latch_t*	latch,
+		latch_level_t	level)
+		UNIV_NOTHROW
+	{
+		/* Ignore diagnostic latches, starting with '.' */
+
+		if (*latch->get_name() != '.'
+		    && latch->get_level() != SYNC_LEVEL_VARYING) {
+
+			ut_ad(level != SYNC_LEVEL_VARYING);
+
+			Latches*	latches = check_order(latch, level);
+
+			ut_a(latches->empty()
+			     || level == SYNC_LEVEL_VARYING
+			     || level == SYNC_NO_ORDER_CHECK
+			     || latches->back().get_level()
+			     == SYNC_NO_ORDER_CHECK
+			     || latches->back().m_latch->get_level()
+			     == SYNC_LEVEL_VARYING
+			     || latches->back().get_level() >= level);
+		}
+	}
+
+	/** Adds a latch and its level in the thread level array. Allocates
+	the memory for the array if called for the first time for this
+	OS thread.  Makes the checks against other latch levels stored
+	in the array for this thread.
+
+	@param[in]	latch	latch that the thread wants to acqire.
+	@param[in]	level	latch level to check against */
+	void lock_granted(
+		const latch_t*	latch,
+		latch_level_t	level)
+		UNIV_NOTHROW
+	{
+		/* Ignore diagnostic latches, starting with '.' */
+
+		if (*latch->get_name() != '.'
+		    && latch->get_level() != SYNC_LEVEL_VARYING) {
+
+			Latches*	latches = thread_latches(true);
+
+			latches->push_back(Latched(latch, level));
+		}
+	}
+
+	/** For recursive X rw-locks.
+	@param[in]	latch		The RW-Lock to relock  */
+	void relock(const latch_t* latch)
+		UNIV_NOTHROW
+	{
+		ut_a(latch->m_rw_lock);
+
+		latch_level_t	level = latch->get_level();
+
+		/* Ignore diagnostic latches, starting with '.' */
+
+		if (*latch->get_name() != '.'
+		    && latch->get_level() != SYNC_LEVEL_VARYING) {
+
+			Latches*	latches = thread_latches(true);
+
+			Latches::iterator	it = std::find(
+				latches->begin(), latches->end(),
+				Latched(latch, level));
+
+			ut_a(latches->empty()
+			     || level == SYNC_LEVEL_VARYING
+			     || level == SYNC_NO_ORDER_CHECK
+			     || latches->back().m_latch->get_level()
+			     == SYNC_LEVEL_VARYING
+			     || latches->back().m_latch->get_level()
+			     == SYNC_NO_ORDER_CHECK
+			     || latches->back().get_level() >= level
+			     || it != latches->end());
+
+			if (it == latches->end()) {
+				latches->push_back(Latched(latch, level));
+			} else {
+				latches->insert(it, Latched(latch, level));
+			}
+		}
+	}
+
+	/** Iterate over a thread's latches.
+	@param[in,out]	functor		The callback
+	@return true if the functor returns true. */
+	bool for_each(sync_check_functor_t& functor)
+		UNIV_NOTHROW
+	{
+		const Latches*	latches = thread_latches();
+
+		if (latches == 0) {
+			return(functor.result());
+		}
+
+		Latches::const_iterator	end = latches->end();
+
+		for (Latches::const_iterator it = latches->begin();
+		     it != end;
+		     ++it) {
+
+			if (functor(it->m_level)) {
+				break;
+			}
+		}
+
+		return(functor.result());
+	}
+
+	/** Removes a latch from the thread level array if it is found there.
+	@param[in]	latch		The latch that was released
+	@return true if found in the array; it is not an error if the latch is
+	not found, as we presently are not able to determine the level for
+	every latch reservation the program does */
+	void unlock(const latch_t* latch) UNIV_NOTHROW;
+
+	/** Get the level name
+	@param[in]	level		The level ID to lookup
+	@return level name */
+	const std::string& get_level_name(latch_level_t level) const
+		UNIV_NOTHROW
+	{
+		Levels::const_iterator	it = m_levels.find(level);
+
+		ut_ad(it != m_levels.end());
+
+		return(it->second);
+	}
+
+	/** Initialise the debug data structures */
+	static void init()
+		UNIV_NOTHROW;
+
+	/** Shutdown the latch debug checking */
+	static void shutdown()
+		UNIV_NOTHROW;
+
+	/** @return the singleton instance */
+	static LatchDebug* instance()
+		UNIV_NOTHROW
+	{
+		return(s_instance);
+	}
+
+	/** Create the singleton instance */
+	static void create_instance()
+		UNIV_NOTHROW
+	{
+		ut_ad(s_instance == NULL);
+
+		s_instance = UT_NEW_NOKEY(LatchDebug());
+	}
+
+private:
+	/** Disable copying */
+	LatchDebug(const LatchDebug&);
+	LatchDebug& operator=(const LatchDebug&);
+
+	/** Adds a latch and its level in the thread level array. Allocates
+	the memory for the array if called first time for this OS thread.
+	Makes the checks against other latch levels stored in the array
+	for this thread.
+
+	@param[in]	latch	 pointer to a mutex or an rw-lock
+	@param[in]	level	level in the latching order
+	@return the thread's latches */
+	Latches* check_order(
+		const latch_t*	latch,
+		latch_level_t	level)
+		UNIV_NOTHROW;
+
+	/** Print the latches acquired by a thread
+	@param[in]	latches		Latches acquired by a thread */
+	void print_latches(const Latches* latches) const
+		UNIV_NOTHROW;
+
+	/** Special handling for the RTR mutexes. We need to add proper
+	levels for them if possible.
+	@param[in]	latch		Latch to check
+	@return true if it is a an _RTR_ mutex */
+	bool is_rtr_mutex(const latch_t* latch) const
+		UNIV_NOTHROW
+	{
+		return(latch->get_id() == LATCH_ID_RTR_ACTIVE_MUTEX
+		       || latch->get_id() == LATCH_ID_RTR_PATH_MUTEX
+		       || latch->get_id() == LATCH_ID_RTR_MATCH_MUTEX
+		       || latch->get_id() == LATCH_ID_RTR_SSN_MUTEX);
+	}
+
+private:
+	/** Comparator for the Levels . */
+	struct latch_level_less
+		: public std::binary_function<
+		  latch_level_t,
+		  latch_level_t,
+		  bool>
+	{
+		/** @return true if lhs < rhs */
+		bool operator()(
+			const latch_level_t& lhs,
+			const latch_level_t& rhs) const
+			UNIV_NOTHROW
+		{
+			return(lhs < rhs);
+		}
+	};
+
+	typedef std::map<
+		latch_level_t,
+		std::string,
+		latch_level_less,
+		ut_allocator<std::pair<latch_level_t, std::string> > >
+		Levels;
+
+	/** Mutex protecting the deadlock detector data structures. */
+	Mutex			m_mutex;
+
+	/** Thread specific data. Protected by m_mutex. */
+	ThreadMap		m_threads;
+
+	/** Mapping from latche level to its string representation. */
+	Levels			m_levels;
+
+	/** The singleton instance. Must be created in single threaded mode. */
+	static LatchDebug*	s_instance;
+
+public:
+	/** For checking whether this module has been initialised or not. */
+	static bool		s_initialized;
+};
+
+/** The latch order checking infra-structure */
+LatchDebug* LatchDebug::s_instance = NULL;
+bool LatchDebug::s_initialized = false;
+
+#define LEVEL_MAP_INSERT(T)						\
+do {									\
+	std::pair<Levels::iterator, bool>	result =		\
+		m_levels.insert(Levels::value_type(T, #T));		\
+	ut_ad(result.second);						\
+} while(0)
+
+/** Setup the mapping from level ID to level name mapping */
+LatchDebug::LatchDebug()
+{
+	m_mutex.init();
+
+	LEVEL_MAP_INSERT(SYNC_UNKNOWN);
+	LEVEL_MAP_INSERT(SYNC_MUTEX);
+	LEVEL_MAP_INSERT(RW_LOCK_SX);
+	LEVEL_MAP_INSERT(RW_LOCK_X_WAIT);
+	LEVEL_MAP_INSERT(RW_LOCK_S);
+	LEVEL_MAP_INSERT(RW_LOCK_X);
+	LEVEL_MAP_INSERT(RW_LOCK_NOT_LOCKED);
+	LEVEL_MAP_INSERT(SYNC_MONITOR_MUTEX);
+	LEVEL_MAP_INSERT(SYNC_ANY_LATCH);
+	LEVEL_MAP_INSERT(SYNC_DOUBLEWRITE);
+	LEVEL_MAP_INSERT(SYNC_BUF_FLUSH_LIST);
+	LEVEL_MAP_INSERT(SYNC_BUF_BLOCK);
+	LEVEL_MAP_INSERT(SYNC_BUF_PAGE_HASH);
+	LEVEL_MAP_INSERT(SYNC_BUF_POOL);
+	LEVEL_MAP_INSERT(SYNC_POOL);
+	LEVEL_MAP_INSERT(SYNC_POOL_MANAGER);
+	LEVEL_MAP_INSERT(SYNC_SEARCH_SYS);
+	LEVEL_MAP_INSERT(SYNC_WORK_QUEUE);
+	LEVEL_MAP_INSERT(SYNC_FTS_TOKENIZE);
+	LEVEL_MAP_INSERT(SYNC_FTS_OPTIMIZE);
+	LEVEL_MAP_INSERT(SYNC_FTS_BG_THREADS);
+	LEVEL_MAP_INSERT(SYNC_FTS_CACHE_INIT);
+	LEVEL_MAP_INSERT(SYNC_RECV);
+	LEVEL_MAP_INSERT(SYNC_LOG_FLUSH_ORDER);
+	LEVEL_MAP_INSERT(SYNC_LOG);
+	LEVEL_MAP_INSERT(SYNC_LOG_WRITE);
+	LEVEL_MAP_INSERT(SYNC_PAGE_CLEANER);
+	LEVEL_MAP_INSERT(SYNC_PURGE_QUEUE);
+	LEVEL_MAP_INSERT(SYNC_TRX_SYS_HEADER);
+	LEVEL_MAP_INSERT(SYNC_REC_LOCK);
+	LEVEL_MAP_INSERT(SYNC_THREADS);
+	LEVEL_MAP_INSERT(SYNC_TRX);
+	LEVEL_MAP_INSERT(SYNC_TRX_SYS);
+	LEVEL_MAP_INSERT(SYNC_LOCK_SYS);
+	LEVEL_MAP_INSERT(SYNC_LOCK_WAIT_SYS);
+	LEVEL_MAP_INSERT(SYNC_INDEX_ONLINE_LOG);
+	LEVEL_MAP_INSERT(SYNC_IBUF_BITMAP);
+	LEVEL_MAP_INSERT(SYNC_IBUF_BITMAP_MUTEX);
+	LEVEL_MAP_INSERT(SYNC_IBUF_TREE_NODE);
+	LEVEL_MAP_INSERT(SYNC_IBUF_TREE_NODE_NEW);
+	LEVEL_MAP_INSERT(SYNC_IBUF_INDEX_TREE);
+	LEVEL_MAP_INSERT(SYNC_IBUF_MUTEX);
+	LEVEL_MAP_INSERT(SYNC_FSP_PAGE);
+	LEVEL_MAP_INSERT(SYNC_FSP);
+	LEVEL_MAP_INSERT(SYNC_EXTERN_STORAGE);
+	LEVEL_MAP_INSERT(SYNC_TRX_UNDO_PAGE);
+	LEVEL_MAP_INSERT(SYNC_RSEG_HEADER);
+	LEVEL_MAP_INSERT(SYNC_RSEG_HEADER_NEW);
+	LEVEL_MAP_INSERT(SYNC_NOREDO_RSEG);
+	LEVEL_MAP_INSERT(SYNC_REDO_RSEG);
+	LEVEL_MAP_INSERT(SYNC_TRX_UNDO);
+	LEVEL_MAP_INSERT(SYNC_PURGE_LATCH);
+	LEVEL_MAP_INSERT(SYNC_TREE_NODE);
+	LEVEL_MAP_INSERT(SYNC_TREE_NODE_FROM_HASH);
+	LEVEL_MAP_INSERT(SYNC_TREE_NODE_NEW);
+	LEVEL_MAP_INSERT(SYNC_INDEX_TREE);
+	LEVEL_MAP_INSERT(SYNC_IBUF_PESS_INSERT_MUTEX);
+	LEVEL_MAP_INSERT(SYNC_IBUF_HEADER);
+	LEVEL_MAP_INSERT(SYNC_DICT_HEADER);
+	LEVEL_MAP_INSERT(SYNC_STATS_AUTO_RECALC);
+	LEVEL_MAP_INSERT(SYNC_DICT_AUTOINC_MUTEX);
+	LEVEL_MAP_INSERT(SYNC_DICT);
+	LEVEL_MAP_INSERT(SYNC_FTS_CACHE);
+	LEVEL_MAP_INSERT(SYNC_DICT_OPERATION);
+	LEVEL_MAP_INSERT(SYNC_FILE_FORMAT_TAG);
+	LEVEL_MAP_INSERT(SYNC_TRX_I_S_LAST_READ);
+	LEVEL_MAP_INSERT(SYNC_TRX_I_S_RWLOCK);
+	LEVEL_MAP_INSERT(SYNC_RECV_WRITER);
+	LEVEL_MAP_INSERT(SYNC_LEVEL_VARYING);
+	LEVEL_MAP_INSERT(SYNC_NO_ORDER_CHECK);
+
+	/* Enum count starts from 0 */
+	ut_ad(m_levels.size() == SYNC_LEVEL_MAX + 1);
+}
+
+/** Print the latches acquired by a thread
+@param[in]	latches		Latches acquired by a thread */
+void
+LatchDebug::print_latches(const Latches* latches) const
+	UNIV_NOTHROW
+{
+	ib::error() << "Latches already owned by this thread: ";
+
+	Latches::const_iterator	end = latches->end();
+
+	for (Latches::const_iterator it = latches->begin();
+	     it != end;
+	     ++it) {
+
+		ib::error()
+			<< sync_latch_get_name(it->m_latch->get_id())
+			<< " -> "
+			<< it->m_level << " "
+			<< "(" << get_level_name(it->m_level) << ")";
+	}
+}
+
+/** Report error and abort
+@param[in]	latches		thread's existing latches
+@param[in]	latched		The existing latch causing the invariant to fail
+@param[in]	level		The new level request that breaks the order */
+void
+LatchDebug::crash(
+	const Latches*	latches,
+	const Latched*	latched,
+	latch_level_t	level) const
+	UNIV_NOTHROW
+{
+	const latch_t*		latch = latched->m_latch;
+	const std::string&	in_level_name = get_level_name(level);
+
+	const std::string&	latch_level_name =
+		get_level_name(latched->m_level);
+
+	ib::error()
+		<< "Thread " << os_thread_pf(os_thread_get_curr_id())
+		<< " already owns a latch "
+		<< sync_latch_get_name(latch->m_id) << " at level"
+		<< " " << latched->m_level << " (" << latch_level_name
+		<< " ), which is at a lower/same level than the"
+		<< " requested latch: "
+		<< level << " (" << in_level_name << "). "
+		<< latch->to_string();
+
+	print_latches(latches);
+
+	ut_error;
+}
+
+/** Check that all the latches already owned by a thread have a lower
+level than limit.
+@param[in]	latches		the thread's existing (acquired) latches
+@param[in]	limit		to check against
+@return latched info if there is one with a level <= limit . */
+const Latched*
+LatchDebug::less(
+	const Latches*	latches,
+	latch_level_t	limit) const
+	UNIV_NOTHROW
+{
+	Latches::const_iterator	end = latches->end();
+
+	for (Latches::const_iterator it = latches->begin(); it != end; ++it) {
+
+		if (it->m_level <= limit) {
+			return(&(*it));
+		}
+	}
+
+	return(NULL);
+}
+
+/** Do a basic ordering check.
+@param[in]	latches		thread's existing latches
+@param[in]	requested_level	Level requested by latch
+@param[in]	in_level	declared ulint so that we can do level - 1.
+				The level of the latch that the thread is
+				trying to acquire
+@return true if passes, else crash with error message. */
+bool
+LatchDebug::basic_check(
+	const Latches*	latches,
+	latch_level_t	requested_level,
+	ulint		in_level) const
+	UNIV_NOTHROW
+{
+	latch_level_t	level = latch_level_t(in_level);
+
+	ut_ad(level < SYNC_LEVEL_MAX);
+
+	const Latched*	latched = less(latches, level);
+
+	if (latched != NULL) {
+		crash(latches, latched, requested_level);
+		return(false);
+	}
+
+	return(true);
+}
+
+/** Create a new instance if one doesn't exist else return the existing one.
+@param[in]	add		add an empty entry if one is not found
+				(default no)
+@return	pointer to a thread's acquired latches. */
+Latches*
+LatchDebug::thread_latches(bool add)
+	UNIV_NOTHROW
+{
+	m_mutex.enter();
+
+	os_thread_id_t		thread_id = os_thread_get_curr_id();
+	ThreadMap::iterator	lb = m_threads.lower_bound(thread_id);
+
+	if (lb != m_threads.end()
+	    && !(m_threads.key_comp()(thread_id, lb->first))) {
+
+		Latches*	latches = lb->second;
+
+		m_mutex.exit();
+
+		return(latches);
+
+	} else if (!add) {
+
+		m_mutex.exit();
+
+		return(NULL);
+
+	} else {
+		typedef ThreadMap::value_type value_type;
+
+		Latches*	latches = UT_NEW_NOKEY(Latches());
+
+		ut_a(latches != NULL);
+
+		latches->reserve(32);
+
+		m_threads.insert(lb, value_type(thread_id, latches));
+
+		m_mutex.exit();
+
+		return(latches);
+	}
+}
+
+/** Checks if the level value exists in the thread's acquired latches.
+@param[in]	levels		the thread's existing (acquired) latches
+@param[in]	level		to lookup
+@return	latch if found or 0 */
+const latch_t*
+LatchDebug::find(
+	const Latches*	latches,
+	latch_level_t	level) const UNIV_NOTHROW
+{
+	Latches::const_iterator	end = latches->end();
+
+	for (Latches::const_iterator it = latches->begin(); it != end; ++it) {
+
+		if (it->m_level == level) {
+
+			return(it->m_latch);
+		}
+	}
+
+	return(0);
+}
+
+/** Checks if the level value exists in the thread's acquired latches.
+@param[in]	 level		The level to lookup
+@return	latch if found or NULL */
+const latch_t*
+LatchDebug::find(latch_level_t level)
+	UNIV_NOTHROW
+{
+	return(find(thread_latches(), level));
+}
+
+/**
+Adds a latch and its level in the thread level array. Allocates the memory
+for the array if called first time for this OS thread. Makes the checks
+against other latch levels stored in the array for this thread.
+@param[in]	latch	pointer to a mutex or an rw-lock
+@param[in]	level	level in the latching order
+@return the thread's latches */
+Latches*
+LatchDebug::check_order(
+	const latch_t*	latch,
+	latch_level_t	level)
+	UNIV_NOTHROW
+{
+	ut_ad(latch->get_level() != SYNC_LEVEL_VARYING);
+
+	Latches*	latches = thread_latches(true);
+
+	/* NOTE that there is a problem with _NODE and _LEAF levels: if the
+	B-tree height changes, then a leaf can change to an internal node
+	or the other way around. We do not know at present if this can cause
+	unnecessary assertion failures below. */
+
+	switch (level) {
+	case SYNC_NO_ORDER_CHECK:
+	case SYNC_EXTERN_STORAGE:
+	case SYNC_TREE_NODE_FROM_HASH:
+		/* Do no order checking */
+		break;
+
+	case SYNC_TRX_SYS_HEADER:
+
+		if (srv_is_being_started) {
+			/* This is violated during trx_sys_create_rsegs()
+			when creating additional rollback segments when
+			upgrading in innobase_start_or_create_for_mysql(). */
+			break;
+		}
+
+		/* Fall through */
+
+	case SYNC_MONITOR_MUTEX:
+	case SYNC_RECV:
+	case SYNC_FTS_BG_THREADS:
+	case SYNC_WORK_QUEUE:
+	case SYNC_FTS_TOKENIZE:
+	case SYNC_FTS_OPTIMIZE:
+	case SYNC_FTS_CACHE:
+	case SYNC_FTS_CACHE_INIT:
+	case SYNC_PAGE_CLEANER:
+	case SYNC_LOG:
+	case SYNC_LOG_WRITE:
+	case SYNC_LOG_FLUSH_ORDER:
+	case SYNC_FILE_FORMAT_TAG:
+	case SYNC_DOUBLEWRITE:
+	case SYNC_SEARCH_SYS:
+	case SYNC_THREADS:
+	case SYNC_LOCK_SYS:
+	case SYNC_LOCK_WAIT_SYS:
+	case SYNC_TRX_SYS:
+	case SYNC_IBUF_BITMAP_MUTEX:
+	case SYNC_REDO_RSEG:
+	case SYNC_NOREDO_RSEG:
+	case SYNC_TRX_UNDO:
+	case SYNC_PURGE_LATCH:
+	case SYNC_PURGE_QUEUE:
+	case SYNC_DICT_AUTOINC_MUTEX:
+	case SYNC_DICT_OPERATION:
+	case SYNC_DICT_HEADER:
+	case SYNC_TRX_I_S_RWLOCK:
+	case SYNC_TRX_I_S_LAST_READ:
+	case SYNC_IBUF_MUTEX:
+	case SYNC_INDEX_ONLINE_LOG:
+	case SYNC_STATS_AUTO_RECALC:
+	case SYNC_POOL:
+	case SYNC_POOL_MANAGER:
+	case SYNC_RECV_WRITER:
+
+		basic_check(latches, level, level);
+		break;
+
+	case SYNC_ANY_LATCH:
+
+		/* Temporary workaround for LATCH_ID_RTR_*_MUTEX */
+		if (is_rtr_mutex(latch)) {
+
+			const Latched*	latched = less(latches, level);
+
+			if (latched == NULL
+			    || (latched != NULL
+				&& is_rtr_mutex(latched->m_latch))) {
+
+				/* No violation */
+				break;
+
+			}
+
+			crash(latches, latched, level);
+
+		} else {
+			basic_check(latches, level, level);
+		}
+
+		break;
+
+	case SYNC_TRX:
+
+		/* Either the thread must own the lock_sys->mutex, or
+		it is allowed to own only ONE trx_t::mutex. */
+
+		if (less(latches, level) != NULL) {
+			basic_check(latches, level, level - 1);
+			ut_a(find(latches, SYNC_LOCK_SYS) != 0);
+		}
+		break;
+
+	case SYNC_BUF_FLUSH_LIST:
+	case SYNC_BUF_POOL:
+
+		/* We can have multiple mutexes of this type therefore we
+		can only check whether the greater than condition holds. */
+
+		basic_check(latches, level, level - 1);
+		break;
+
+	case SYNC_BUF_PAGE_HASH:
+
+		/* Multiple page_hash locks are only allowed during
+		buf_validate and that is where buf_pool mutex is already
+		held. */
+
+		/* Fall through */
+
+	case SYNC_BUF_BLOCK:
+
+		/* Either the thread must own the (buffer pool) buf_pool->mutex
+		or it is allowed to latch only ONE of (buffer block)
+		block->mutex or buf_pool->zip_mutex. */
+
+		if (less(latches, level) != NULL) {
+			basic_check(latches, level, level - 1);
+			ut_a(find(latches, SYNC_BUF_POOL) != 0);
+		}
+		break;
+
+	case SYNC_REC_LOCK:
+
+		if (find(latches, SYNC_LOCK_SYS) != 0) {
+			basic_check(latches, level, SYNC_REC_LOCK - 1);
+		} else {
+			basic_check(latches, level, SYNC_REC_LOCK);
+		}
+		break;
+
+	case SYNC_IBUF_BITMAP:
+
+		/* Either the thread must own the master mutex to all
+		the bitmap pages, or it is allowed to latch only ONE
+		bitmap page. */
+
+		if (find(latches, SYNC_IBUF_BITMAP_MUTEX) != 0) {
+
+			basic_check(latches, level, SYNC_IBUF_BITMAP - 1);
+
+		} else if (!srv_is_being_started) {
+
+			/* This is violated during trx_sys_create_rsegs()
+			when creating additional rollback segments during
+			upgrade. */
+
+			basic_check(latches, level, SYNC_IBUF_BITMAP);
+		}
+		break;
+
+	case SYNC_FSP_PAGE:
+		ut_a(find(latches, SYNC_FSP) != 0);
+		break;
+
+	case SYNC_FSP:
+
+		ut_a(find(latches, SYNC_FSP) != 0
+		     || basic_check(latches, level, SYNC_FSP));
+		break;
+
+	case SYNC_TRX_UNDO_PAGE:
+
+		/* Purge is allowed to read in as many UNDO pages as it likes.
+		The purge thread can read the UNDO pages without any covering
+		mutex. */
+
+		ut_a(find(latches, SYNC_TRX_UNDO) != 0
+		     || find(latches, SYNC_REDO_RSEG) != 0
+		     || find(latches, SYNC_NOREDO_RSEG) != 0
+		     || basic_check(latches, level, level - 1));
+		break;
+
+	case SYNC_RSEG_HEADER:
+
+		ut_a(find(latches, SYNC_REDO_RSEG) != 0
+		     || find(latches, SYNC_NOREDO_RSEG) != 0);
+		break;
+
+	case SYNC_RSEG_HEADER_NEW:
+
+		ut_a(find(latches, SYNC_FSP_PAGE) != 0);
+		break;
+
+	case SYNC_TREE_NODE:
+
+		{
+			const latch_t*	fsp_latch;
+
+			fsp_latch = find(latches, SYNC_FSP);
+
+			ut_a((fsp_latch != NULL
+			      && fsp_latch->is_temp_fsp())
+			     || find(latches, SYNC_INDEX_TREE) != 0
+			     || find(latches, SYNC_DICT_OPERATION)
+			     || basic_check(latches,
+					    level, SYNC_TREE_NODE - 1));
+		}
+
+		break;
+
+	case SYNC_TREE_NODE_NEW:
+
+		ut_a(find(latches, SYNC_FSP_PAGE) != 0);
+		break;
+
+	case SYNC_INDEX_TREE:
+
+		basic_check(latches, level, SYNC_TREE_NODE - 1);
+		break;
+
+	case SYNC_IBUF_TREE_NODE:
+
+		ut_a(find(latches, SYNC_IBUF_INDEX_TREE) != 0
+		     || basic_check(latches, level, SYNC_IBUF_TREE_NODE - 1));
+		break;
+
+	case SYNC_IBUF_TREE_NODE_NEW:
+
+		/* ibuf_add_free_page() allocates new pages for the change
+		buffer while only holding the tablespace x-latch. These
+		pre-allocated new pages may only be used while holding
+		ibuf_mutex, in btr_page_alloc_for_ibuf(). */
+
+		ut_a(find(latches, SYNC_IBUF_MUTEX) != 0
+		     || find(latches, SYNC_FSP) != 0);
+		break;
+
+	case SYNC_IBUF_INDEX_TREE:
+
+		if (find(latches, SYNC_FSP) != 0) {
+			basic_check(latches, level, level - 1);
+		} else {
+			basic_check(latches, level, SYNC_IBUF_TREE_NODE - 1);
+		}
+		break;
+
+	case SYNC_IBUF_PESS_INSERT_MUTEX:
+
+		basic_check(latches, level, SYNC_FSP - 1);
+		ut_a(find(latches, SYNC_IBUF_MUTEX) == 0);
+		break;
+
+	case SYNC_IBUF_HEADER:
+
+		basic_check(latches, level, SYNC_FSP - 1);
+		ut_a(find(latches, SYNC_IBUF_MUTEX) == NULL);
+		ut_a(find(latches, SYNC_IBUF_PESS_INSERT_MUTEX) == NULL);
+		break;
+
+	case SYNC_DICT:
+		basic_check(latches, level, SYNC_DICT);
+		break;
+
+	case SYNC_MUTEX:
+	case SYNC_UNKNOWN:
+	case SYNC_LEVEL_VARYING:
+	case RW_LOCK_X:
+	case RW_LOCK_X_WAIT:
+	case RW_LOCK_S:
+	case RW_LOCK_SX:
+	case RW_LOCK_NOT_LOCKED:
+		/* These levels should never be set for a latch. */
+		ut_error;
+		break;
+	}
+
+	return(latches);
+}
+
+/** Removes a latch from the thread level array if it is found there.
+@param[in]	latch		that was released/unlocked
+@param[in]	level		level of the latch
+@return true if found in the array; it is not an error if the latch is
+not found, as we presently are not able to determine the level for
+every latch reservation the program does */
+void
+LatchDebug::unlock(const latch_t* latch)
+	UNIV_NOTHROW
+{
+	if (latch->get_level() == SYNC_LEVEL_VARYING) {
+		// We don't have varying level mutexes
+		ut_ad(latch->m_rw_lock);
+	}
+
+	Latches*	latches;
+
+	if (*latch->get_name() == '.') {
+
+		/* Ignore diagnostic latches, starting with '.' */
+
+	} else if ((latches = thread_latches()) != NULL) {
+
+		Latches::reverse_iterator	rend = latches->rend();
+
+		for (Latches::reverse_iterator it = latches->rbegin();
+		     it != rend;
+		     ++it) {
+
+			if (it->m_latch != latch) {
+
+				continue;
+			}
+
+			Latches::iterator	i = it.base();
+
+			latches->erase(--i);
+
+			/* If this thread doesn't own any more
+			latches remove from the map.
+
+			FIXME: Perhaps use the master thread
+			to do purge. Or, do it from close connection.
+			This could be expensive. */
+
+			if (latches->empty()) {
+
+				m_mutex.enter();
+
+				os_thread_id_t	thread_id;
+
+				thread_id = os_thread_get_curr_id();
+
+				m_threads.erase(thread_id);
+
+				m_mutex.exit();
+
+				UT_DELETE(latches);
+			}
+
+			return;
+		}
+
+		if (latch->get_level() != SYNC_LEVEL_VARYING) {
+			ib::error()
+				<< "Couldn't find latch "
+				<< sync_latch_get_name(latch->get_id());
+
+			print_latches(latches);
+
+			/** Must find the latch. */
+			ut_error;
+		}
+	}
+}
+
+/** Get the latch id from a latch name.
+@param[in]	name	Latch name
+@return latch id if found else LATCH_ID_NONE. */
+latch_id_t
+sync_latch_get_id(const char* name)
+{
+	LatchMetaData::const_iterator	end = latch_meta.end();
+
+	/* Linear scan should be OK, this should be extremely rare. */
+
+	for (LatchMetaData::const_iterator it = latch_meta.begin();
+	     it != end;
+	     ++it) {
+
+		if (*it == NULL || (*it)->get_id() == LATCH_ID_NONE) {
+
+			continue;
+
+		} else if (strcmp((*it)->get_name(), name) == 0) {
+
+			return((*it)->get_id());
+		}
+	}
+
+	return(LATCH_ID_NONE);
+}
+
+/** Get the latch name from a sync level
+@param[in]	level		Latch level to lookup
+@return NULL if not found. */
+const char*
+sync_latch_get_name(latch_level_t level)
+{
+	LatchMetaData::const_iterator	end = latch_meta.end();
+
+	/* Linear scan should be OK, this should be extremely rare. */
+
+	for (LatchMetaData::const_iterator it = latch_meta.begin();
+	     it != end;
+	     ++it) {
+
+		if (*it == NULL || (*it)->get_id() == LATCH_ID_NONE) {
+
+			continue;
+
+		} else if ((*it)->get_level() == level) {
+
+			return((*it)->get_name());
+		}
+	}
+
+	return(0);
+}
+
+/** Check if it is OK to acquire the latch.
+@param[in]	latch	latch type */
+void
+sync_check_lock_validate(const latch_t* latch)
+{
+	if (LatchDebug::instance() != NULL) {
+		LatchDebug::instance()->lock_validate(
+			latch, latch->get_level());
+	}
+}
+
+/** Note that the lock has been granted
+@param[in]	latch	latch type */
+void
+sync_check_lock_granted(const latch_t* latch)
+{
+	if (LatchDebug::instance() != NULL) {
+		LatchDebug::instance()->lock_granted(latch, latch->get_level());
+	}
+}
+
+/** Check if it is OK to acquire the latch.
+@param[in]	latch	latch type
+@param[in]	level	Latch level */
+void
+sync_check_lock(
+	const latch_t*	latch,
+	latch_level_t	level)
+{
+	if (LatchDebug::instance() != NULL) {
+
+		ut_ad(latch->get_level() == SYNC_LEVEL_VARYING);
+		ut_ad(latch->get_id() == LATCH_ID_BUF_BLOCK_LOCK);
+
+		LatchDebug::instance()->lock_validate(latch, level);
+		LatchDebug::instance()->lock_granted(latch, level);
+	}
+}
+
+/** Check if it is OK to re-acquire the lock.
+@param[in]	latch		RW-LOCK to relock (recursive X locks) */
+void
+sync_check_relock(const latch_t* latch)
+{
+	if (LatchDebug::instance() != NULL) {
+		LatchDebug::instance()->relock(latch);
+	}
+}
+
+/** Removes a latch from the thread level array if it is found there.
+@param[in]	latch		The latch to unlock */
+void
+sync_check_unlock(const latch_t* latch)
+{
+	if (LatchDebug::instance() != NULL) {
+		LatchDebug::instance()->unlock(latch);
+	}
+}
+
+/** Checks if the level array for the current thread contains a
+mutex or rw-latch at the specified level.
+@param[in]	level		to find
+@return	a matching latch, or NULL if not found */
+const latch_t*
+sync_check_find(latch_level_t level)
+{
+	if (LatchDebug::instance() != NULL) {
+		return(LatchDebug::instance()->find(level));
+	}
+
+	return(NULL);
+}
+
+/** Iterate over the thread's latches.
+@param[in,out]	functor		called for each element.
+@return false if the sync debug hasn't been initialised
+@return the value returned by the functor */
+bool
+sync_check_iterate(sync_check_functor_t& functor)
+{
+	if (LatchDebug::instance() != NULL) {
+		return(LatchDebug::instance()->for_each(functor));
+	}
+
+	return(false);
+}
+
+/** Enable sync order checking.
+
+Note: We don't enforce any synchronisation checks. The caller must ensure
+that no races can occur */
+void
+sync_check_enable()
+{
+	if (!srv_sync_debug) {
+
+		return;
+	}
+
+	/* We should always call this before we create threads. */
+
+	LatchDebug::create_instance();
+}
+
+/** Initialise the debug data structures */
+void
+LatchDebug::init()
+	UNIV_NOTHROW
+{
+	ut_a(rw_lock_debug_event == NULL);
+
+	mutex_create(LATCH_ID_RW_LOCK_DEBUG, &rw_lock_debug_mutex);
+
+	rw_lock_debug_event = os_event_create("rw_lock_debug_event");
+
+	rw_lock_debug_waiters = FALSE;
+}
+
+/** Shutdown the latch debug checking
+
+Note: We don't enforce any synchronisation checks. The caller must ensure
+that no races can occur */
+void
+LatchDebug::shutdown()
+	UNIV_NOTHROW
+{
+	ut_a(rw_lock_debug_event != NULL);
+
+	os_event_destroy(rw_lock_debug_event);
+
+	rw_lock_debug_event = NULL;
+
+	mutex_free(&rw_lock_debug_mutex);
+
+	if (instance() == NULL) {
+
+		return;
+	}
+
+	ut_a(s_initialized);
+
+	s_initialized = false;
+
+	UT_DELETE(s_instance);
+
+	LatchDebug::s_instance = NULL;
+}
+
+/** Acquires the debug mutex. We cannot use the mutex defined in sync0sync,
+because the debug mutex is also acquired in sync0arr while holding the OS
+mutex protecting the sync array, and the ordinary mutex_enter might
+recursively call routines in sync0arr, leading to a deadlock on the OS
+mutex. */
+void
+rw_lock_debug_mutex_enter()
+{
+	for (;;) {
+
+		if (0 == mutex_enter_nowait(&rw_lock_debug_mutex)) {
+			return;
+		}
+
+		os_event_reset(rw_lock_debug_event);
+
+		rw_lock_debug_waiters = TRUE;
+
+		if (0 == mutex_enter_nowait(&rw_lock_debug_mutex)) {
+			return;
+		}
+
+		os_event_wait(rw_lock_debug_event);
+	}
+}
+
+/** Releases the debug mutex. */
+void
+rw_lock_debug_mutex_exit()
+{
+	mutex_exit(&rw_lock_debug_mutex);
+
+	if (rw_lock_debug_waiters) {
+		rw_lock_debug_waiters = FALSE;
+		os_event_set(rw_lock_debug_event);
+	}
+}
+#endif /* UNIV_DEBUG */
+
+/* Meta data for all the InnoDB latches. If the latch is not in recorded
+here then it will be be considered for deadlock checks.  */
+LatchMetaData	latch_meta;
+
+/** Load the latch meta data. */
+static
+void
+sync_latch_meta_init()
+	UNIV_NOTHROW
+{
+	latch_meta.resize(LATCH_ID_MAX);
+
+	/* The latches should be ordered on latch_id_t. So that we can
+	index directly into the vector to update and fetch meta-data. */
+
+	LATCH_ADD(AUTOINC, SYNC_DICT_AUTOINC_MUTEX, autoinc_mutex_key);
+
+#if defined PFS_SKIP_BUFFER_MUTEX_RWLOCK || defined PFS_GROUP_BUFFER_SYNC
+	LATCH_ADD(BUF_BLOCK_MUTEX, SYNC_BUF_BLOCK, PFS_NOT_INSTRUMENTED);
+#else
+	LATCH_ADD(BUF_BLOCK_MUTEX, SYNC_BUF_BLOCK, buffer_block_mutex_key);
+#endif /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */
+
+	LATCH_ADD(BUF_POOL, SYNC_BUF_POOL, buf_pool_mutex_key);
+
+	LATCH_ADD(BUF_POOL_ZIP, SYNC_BUF_BLOCK, buf_pool_zip_mutex_key);
+
+	LATCH_ADD(CACHE_LAST_READ, SYNC_TRX_I_S_LAST_READ,
+		  cache_last_read_mutex_key);
+
+	LATCH_ADD(DICT_FOREIGN_ERR, SYNC_NO_ORDER_CHECK,
+		  dict_foreign_err_mutex_key);
+
+	LATCH_ADD(DICT_SYS, SYNC_DICT, dict_sys_mutex_key);
+
+	LATCH_ADD(FILE_FORMAT_MAX, SYNC_FILE_FORMAT_TAG,
+		  file_format_max_mutex_key);
+
+	LATCH_ADD(FIL_SYSTEM, SYNC_ANY_LATCH, fil_system_mutex_key);
+
+	LATCH_ADD(FLUSH_LIST, SYNC_BUF_FLUSH_LIST, flush_list_mutex_key);
+
+	LATCH_ADD(FTS_BG_THREADS, SYNC_FTS_BG_THREADS,
+		  fts_bg_threads_mutex_key);
+
+	LATCH_ADD(FTS_DELETE, SYNC_FTS_OPTIMIZE, fts_delete_mutex_key);
+
+	LATCH_ADD(FTS_OPTIMIZE, SYNC_FTS_OPTIMIZE, fts_optimize_mutex_key);
+
+	LATCH_ADD(FTS_DOC_ID, SYNC_FTS_OPTIMIZE, fts_doc_id_mutex_key);
+
+	LATCH_ADD(FTS_PLL_TOKENIZE, SYNC_FTS_TOKENIZE,
+		  fts_pll_tokenize_mutex_key);
+
+	LATCH_ADD(HASH_TABLE_MUTEX, SYNC_BUF_PAGE_HASH, hash_table_mutex_key);
+
+	LATCH_ADD(IBUF_BITMAP, SYNC_IBUF_BITMAP_MUTEX, ibuf_bitmap_mutex_key);
+
+	LATCH_ADD(IBUF, SYNC_IBUF_MUTEX, ibuf_mutex_key);
+
+	LATCH_ADD(IBUF_PESSIMISTIC_INSERT, SYNC_IBUF_PESS_INSERT_MUTEX,
+		  ibuf_pessimistic_insert_mutex_key);
+
+	LATCH_ADD(LOG_SYS, SYNC_LOG, log_sys_mutex_key);
+
+	LATCH_ADD(LOG_WRITE, SYNC_LOG_WRITE, log_sys_write_mutex_key);
+
+	LATCH_ADD(LOG_FLUSH_ORDER, SYNC_LOG_FLUSH_ORDER,
+		  log_flush_order_mutex_key);
+
+	LATCH_ADD(MUTEX_LIST, SYNC_NO_ORDER_CHECK, mutex_list_mutex_key);
+
+	LATCH_ADD(PAGE_CLEANER, SYNC_PAGE_CLEANER, page_cleaner_mutex_key);
+
+	LATCH_ADD(PURGE_SYS_PQ, SYNC_PURGE_QUEUE, purge_sys_pq_mutex_key);
+
+	LATCH_ADD(RECALC_POOL, SYNC_STATS_AUTO_RECALC,
+		  recalc_pool_mutex_key);
+
+	LATCH_ADD(RECV_SYS, SYNC_RECV, recv_sys_mutex_key);
+
+	LATCH_ADD(RECV_WRITER, SYNC_RECV_WRITER, recv_writer_mutex_key);
+
+	LATCH_ADD(REDO_RSEG, SYNC_REDO_RSEG, redo_rseg_mutex_key);
+
+	LATCH_ADD(NOREDO_RSEG, SYNC_NOREDO_RSEG, noredo_rseg_mutex_key);
+
+#ifdef UNIV_DEBUG
+	/* Mutex names starting with '.' are not tracked. They are assumed
+	to be diagnostic mutexes used in debugging. */
+	latch_meta[LATCH_ID_RW_LOCK_DEBUG] =
+		LATCH_ADD(RW_LOCK_DEBUG,
+			SYNC_NO_ORDER_CHECK,
+			rw_lock_debug_mutex_key);
+#endif /* UNIV_DEBUG */
+
+	LATCH_ADD(RTR_SSN_MUTEX, SYNC_ANY_LATCH, rtr_ssn_mutex_key);
+
+	LATCH_ADD(RTR_ACTIVE_MUTEX, SYNC_ANY_LATCH, rtr_active_mutex_key);
+
+	LATCH_ADD(RTR_MATCH_MUTEX, SYNC_ANY_LATCH, rtr_match_mutex_key);
+
+	LATCH_ADD(RTR_PATH_MUTEX, SYNC_ANY_LATCH, rtr_path_mutex_key);
+
+	LATCH_ADD(RW_LOCK_LIST, SYNC_NO_ORDER_CHECK, rw_lock_list_mutex_key);
+
+	LATCH_ADD(RW_LOCK_MUTEX, SYNC_NO_ORDER_CHECK, rw_lock_mutex_key);
+
+	LATCH_ADD(SRV_DICT_TMPFILE, SYNC_DICT_OPERATION,
+		  srv_dict_tmpfile_mutex_key);
+
+	LATCH_ADD(SRV_INNODB_MONITOR, SYNC_NO_ORDER_CHECK,
+		  srv_innodb_monitor_mutex_key);
+
+	LATCH_ADD(SRV_MISC_TMPFILE, SYNC_ANY_LATCH,
+		  srv_misc_tmpfile_mutex_key);
+
+	LATCH_ADD(SRV_MONITOR_FILE, SYNC_NO_ORDER_CHECK,
+		  srv_monitor_file_mutex_key);
+
+#ifdef UNIV_DEBUG
+	LATCH_ADD(SYNC_THREAD, SYNC_NO_ORDER_CHECK, sync_thread_mutex_key);
+#endif /* UNIV_DEBUG */
+
+	LATCH_ADD(BUF_DBLWR, SYNC_DOUBLEWRITE, buf_dblwr_mutex_key);
+
+	LATCH_ADD(TRX_UNDO, SYNC_TRX_UNDO, trx_undo_mutex_key);
+
+	LATCH_ADD(TRX_POOL, SYNC_POOL, trx_pool_mutex_key);
+
+	LATCH_ADD(TRX_POOL_MANAGER, SYNC_POOL_MANAGER,
+		  trx_pool_manager_mutex_key);
+
+	LATCH_ADD(TRX, SYNC_TRX, trx_mutex_key);
+
+	LATCH_ADD(LOCK_SYS, SYNC_LOCK_SYS, lock_mutex_key);
+
+	LATCH_ADD(LOCK_SYS_WAIT, SYNC_LOCK_WAIT_SYS, lock_wait_mutex_key);
+
+	LATCH_ADD(TRX_SYS, SYNC_TRX_SYS, trx_sys_mutex_key);
+
+	LATCH_ADD(SRV_SYS, SYNC_THREADS, srv_sys_mutex_key);
+
+	LATCH_ADD(SRV_SYS_TASKS, SYNC_ANY_LATCH, srv_threads_mutex_key);
+
+	LATCH_ADD(PAGE_ZIP_STAT_PER_INDEX, SYNC_ANY_LATCH,
+		  page_zip_stat_per_index_mutex_key);
+
+#ifndef PFS_SKIP_EVENT_MUTEX
+	LATCH_ADD(EVENT_MANAGER, SYNC_NO_ORDER_CHECK, event_manager_mutex_key);
+#else
+	LATCH_ADD(EVENT_MANAGER, SYNC_NO_ORDER_CHECK, PFS_NOT_INSTRUMENTED);
+#endif /* !PFS_SKIP_EVENT_MUTEX */
+
+	LATCH_ADD(EVENT_MUTEX, SYNC_NO_ORDER_CHECK, event_mutex_key);
+
+	LATCH_ADD(SYNC_ARRAY_MUTEX, SYNC_NO_ORDER_CHECK,
+		  sync_array_mutex_key);
+
+	LATCH_ADD(THREAD_MUTEX, SYNC_NO_ORDER_CHECK, thread_mutex_key);
+
+	LATCH_ADD(ZIP_PAD_MUTEX, SYNC_NO_ORDER_CHECK, zip_pad_mutex_key);
+
+	LATCH_ADD(OS_AIO_READ_MUTEX, SYNC_NO_ORDER_CHECK, PFS_NOT_INSTRUMENTED);
+
+	LATCH_ADD(OS_AIO_WRITE_MUTEX, SYNC_NO_ORDER_CHECK,
+		  PFS_NOT_INSTRUMENTED);
+
+	LATCH_ADD(OS_AIO_LOG_MUTEX, SYNC_NO_ORDER_CHECK, PFS_NOT_INSTRUMENTED);
+
+	LATCH_ADD(OS_AIO_IBUF_MUTEX, SYNC_NO_ORDER_CHECK, PFS_NOT_INSTRUMENTED);
+
+	LATCH_ADD(OS_AIO_SYNC_MUTEX, SYNC_NO_ORDER_CHECK, PFS_NOT_INSTRUMENTED);
+
+	LATCH_ADD(ROW_DROP_LIST, SYNC_NO_ORDER_CHECK, row_drop_list_mutex_key);
+
+	LATCH_ADD(INDEX_ONLINE_LOG, SYNC_INDEX_ONLINE_LOG,
+		  index_online_log_key);
+
+	LATCH_ADD(WORK_QUEUE, SYNC_WORK_QUEUE, PFS_NOT_INSTRUMENTED);
+
+	// Add the RW locks
+	LATCH_ADD(BTR_SEARCH, SYNC_SEARCH_SYS, btr_search_latch_key);
+
+	LATCH_ADD(BUF_BLOCK_LOCK, SYNC_LEVEL_VARYING, buf_block_lock_key);
+
+#ifdef UNIV_DEBUG
+	LATCH_ADD(BUF_BLOCK_DEBUG, SYNC_NO_ORDER_CHECK,
+		  buf_block_debug_latch_key);
+#endif /* UNIV_DEBUG */
+
+	LATCH_ADD(DICT_OPERATION, SYNC_DICT, dict_operation_lock_key);
+
+	LATCH_ADD(CHECKPOINT, SYNC_NO_ORDER_CHECK, checkpoint_lock_key);
+
+	LATCH_ADD(FIL_SPACE, SYNC_FSP, fil_space_latch_key);
+
+	LATCH_ADD(FTS_CACHE, SYNC_FTS_CACHE, fts_cache_rw_lock_key);
+
+	LATCH_ADD(FTS_CACHE_INIT, SYNC_FTS_CACHE_INIT,
+		  fts_cache_init_rw_lock_key);
+
+	LATCH_ADD(TRX_I_S_CACHE, SYNC_TRX_I_S_RWLOCK, trx_i_s_cache_lock_key);
+
+	LATCH_ADD(TRX_PURGE, SYNC_PURGE_LATCH, trx_purge_latch_key);
+
+	LATCH_ADD(IBUF_INDEX_TREE, SYNC_IBUF_INDEX_TREE,
+		  index_tree_rw_lock_key);
+
+	LATCH_ADD(INDEX_TREE, SYNC_INDEX_TREE, index_tree_rw_lock_key);
+
+	LATCH_ADD(DICT_TABLE_STATS, SYNC_INDEX_TREE, dict_table_stats_key);
+
+	LATCH_ADD(HASH_TABLE_RW_LOCK, SYNC_BUF_PAGE_HASH,
+		  hash_table_locks_key);
+
+	LATCH_ADD(SYNC_DEBUG_MUTEX, SYNC_NO_ORDER_CHECK, PFS_NOT_INSTRUMENTED);
+
+	LATCH_ADD(MASTER_KEY_ID_MUTEX, SYNC_NO_ORDER_CHECK, master_key_id_mutex_key);
+
+	/* JAN: TODO: Add PFS instrumentation */
+	LATCH_ADD(SCRUB_STAT_MUTEX, SYNC_NO_ORDER_CHECK, PFS_NOT_INSTRUMENTED);
+	LATCH_ADD(DEFRAGMENT_MUTEX, SYNC_NO_ORDER_CHECK, PFS_NOT_INSTRUMENTED);
+	LATCH_ADD(BTR_DEFRAGMENT_MUTEX, SYNC_NO_ORDER_CHECK, PFS_NOT_INSTRUMENTED);
+	LATCH_ADD(MTFLUSH_THREAD_MUTEX,  SYNC_NO_ORDER_CHECK, PFS_NOT_INSTRUMENTED);
+	LATCH_ADD(MTFLUSH_MUTEX,  SYNC_NO_ORDER_CHECK, PFS_NOT_INSTRUMENTED);
+	LATCH_ADD(FIL_CRYPT_MUTEX, SYNC_NO_ORDER_CHECK, PFS_NOT_INSTRUMENTED);
+	LATCH_ADD(FIL_CRYPT_STAT_MUTEX, SYNC_NO_ORDER_CHECK, PFS_NOT_INSTRUMENTED);
+	LATCH_ADD(FIL_CRYPT_DATA_MUTEX, SYNC_NO_ORDER_CHECK, PFS_NOT_INSTRUMENTED);
+	LATCH_ADD(FIL_CRYPT_THREADS_MUTEX, SYNC_NO_ORDER_CHECK, PFS_NOT_INSTRUMENTED);
+
+	latch_id_t	id = LATCH_ID_NONE;
+
+	/* The array should be ordered on latch ID.We need to
+	index directly into it from the mutex policy to update
+	the counters and access the meta-data. */
+
+	for (LatchMetaData::iterator it = latch_meta.begin();
+	     it != latch_meta.end();
+	     ++it) {
+
+		const latch_meta_t*	meta = *it;
+
+
+		/* Skip blank entries */
+		if (meta == NULL || meta->get_id() == LATCH_ID_NONE) {
+			continue;
+		}
+
+		ut_a(id < meta->get_id());
+
+		id = meta->get_id();
+	}
+}
+
+/** Destroy the latch meta data */
+static
+void
+sync_latch_meta_destroy()
+{
+	for (LatchMetaData::iterator it = latch_meta.begin();
+	     it != latch_meta.end();
+	     ++it) {
+
+		UT_DELETE(*it);
+	}
+
+	latch_meta.clear();
+}
+
+/** Track mutex file creation name and line number. This is to avoid storing
+{ const char* name; uint16_t line; } in every instance. This results in the
+sizeof(Mutex) > 64. We use a lookup table to store it separately. Fetching
+the values is very rare, only required for diagnostic purposes. And, we
+don't create/destroy mutexes that frequently. */
+struct CreateTracker {
+
+	/** Constructor */
+	CreateTracker()
+		UNIV_NOTHROW
+	{
+		m_mutex.init();
+	}
+
+	/** Destructor */
+	~CreateTracker()
+		UNIV_NOTHROW
+	{
+		ut_d(m_files.empty());
+
+		m_mutex.destroy();
+	}
+
+	/** Register where the latch was created
+	@param[in]	ptr		Latch instance
+	@param[in]	filename	Where created
+	@param[in]	line		Line number in filename */
+	void register_latch(
+		const void*	ptr,
+		const char*	filename,
+		uint16_t	line)
+		UNIV_NOTHROW
+	{
+		m_mutex.enter();
+
+		Files::iterator	lb = m_files.lower_bound(ptr);
+
+		ut_ad(lb == m_files.end()
+		      || m_files.key_comp()(ptr, lb->first));
+
+		typedef Files::value_type value_type;
+
+		m_files.insert(lb, value_type(ptr, File(filename, line)));
+
+		m_mutex.exit();
+	}
+
+	/** Deregister a latch - when it is destroyed
+	@param[in]	ptr		Latch instance being destroyed */
+	void deregister_latch(const void* ptr)
+		UNIV_NOTHROW
+	{
+		m_mutex.enter();
+
+		Files::iterator	lb = m_files.lower_bound(ptr);
+
+		ut_ad(lb != m_files.end()
+		      && !(m_files.key_comp()(ptr, lb->first)));
+
+		m_files.erase(lb);
+
+		m_mutex.exit();
+	}
+
+	/** Get the create string, format is "name:line"
+	@param[in]	ptr		Latch instance
+	@return the create string or "" if not found */
+	std::string get(const void* ptr)
+		UNIV_NOTHROW
+	{
+		m_mutex.enter();
+
+		std::string	created;
+
+		Files::iterator	lb = m_files.lower_bound(ptr);
+
+		if (lb != m_files.end()
+		    && !(m_files.key_comp()(ptr, lb->first))) {
+
+			std::ostringstream	msg;
+
+			msg << lb->second.m_name << ":" << lb->second.m_line;
+
+			created = msg.str();
+		}
+
+		m_mutex.exit();
+
+		return(created);
+	}
+
+private:
+	/** For tracking the filename and line number */
+	struct File {
+
+		/** Constructor */
+		File() UNIV_NOTHROW : m_name(), m_line() { }
+
+		/** Constructor
+		@param[in]	name		Filename where created
+		@param[in]	line		Line number where created */
+		File(const char*  name, uint16_t line)
+			UNIV_NOTHROW
+			:
+			m_name(sync_basename(name)),
+			m_line(line)
+		{
+			/* No op */
+		}
+
+		/** Filename where created */
+		std::string		m_name;
+
+		/** Line number where created */
+		uint16_t		m_line;
+	};
+
+	/** Map the mutex instance to where it was created */
+	typedef std::map<
+		const void*,
+		File,
+		std::less<const void*>,
+		ut_allocator<std::pair<const void*, File> > >
+		Files;
+
+	typedef OSMutex	Mutex;
+
+	/** Mutex protecting m_files */
+	Mutex			m_mutex;
+
+	/** Track the latch creation */
+	Files			m_files;
+};
+
+/** Track latch creation location. For reducing the size of the latches */
+static CreateTracker*	create_tracker;
+
+/** Register a latch, called when it is created
+@param[in]	ptr		Latch instance that was created
+@param[in]	filename	Filename where it was created
+@param[in]	line		Line number in filename */
+void
+sync_file_created_register(
+	const void*	ptr,
+	const char*	filename,
+	uint16_t	line)
+{
+	create_tracker->register_latch(ptr, filename, line);
+}
+
+/** Deregister a latch, called when it is destroyed
+@param[in]	ptr		Latch to be destroyed */
+void
+sync_file_created_deregister(const void* ptr)
+{
+	create_tracker->deregister_latch(ptr);
+}
+
+/** Get the string where the file was created. Its format is "name:line"
+@param[in]	ptr		Latch instance
+@return created information or "" if can't be found */
+std::string
+sync_file_created_get(const void* ptr)
+{
+	return(create_tracker->get(ptr));
+}
+
+/** Initializes the synchronization data structures. */
+void
+sync_check_init()
+{
+	ut_ad(!LatchDebug::s_initialized);
+	ut_d(LatchDebug::s_initialized = true);
+
+	/** For collecting latch statistic - SHOW ... MUTEX */
+	mutex_monitor = UT_NEW_NOKEY(MutexMonitor());
+
+	/** For trcking mutex creation location */
+	create_tracker = UT_NEW_NOKEY(CreateTracker());
+
+	sync_latch_meta_init();
+
+	/* Init the rw-lock & mutex list and create the mutex to protect it. */
+
+	UT_LIST_INIT(rw_lock_list, &rw_lock_t::list);
+
+	mutex_create(LATCH_ID_RW_LOCK_LIST, &rw_lock_list_mutex);
+
+	ut_d(LatchDebug::init());
+
+	sync_array_init(OS_THREAD_MAX_N);
+}
+
+/** Frees the resources in InnoDB's own synchronization data structures. Use
+os_sync_free() after calling this. */
+void
+sync_check_close()
+{
+	ut_d(LatchDebug::shutdown());
+
+	mutex_free(&rw_lock_list_mutex);
+
+	sync_array_close();
+
+	UT_DELETE(mutex_monitor);
+
+	mutex_monitor = NULL;
+
+	UT_DELETE(create_tracker);
+
+	create_tracker = NULL;
+
+	//	sync_latch_meta_destroy();
+}
+
diff --git a/storage/innobase/sync/sync0rw.cc b/storage/innobase/sync/sync0rw.cc
index 8919716ff9c..7c8aad640a2 100644
--- a/storage/innobase/sync/sync0rw.cc
+++ b/storage/innobase/sync/sync0rw.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2008, Google Inc.
 
 Portions of this file contain modifications contributed and copyrighted by
@@ -36,34 +36,62 @@ Created 9/11/1995 Heikki Tuuri
 #include "sync0arr.ic"
 #endif
 
+#include "ha_prototypes.h"
+
 #include "os0thread.h"
 #include "mem0mem.h"
 #include "srv0srv.h"
-#include "os0sync.h" /* for INNODB_RW_LOCKS_USE_ATOMICS */
+#include "os0event.h"
+#include "srv0mon.h"
+#include "sync0debug.h"
 #include "ha_prototypes.h"
 #include "my_cpu.h"
+#include <my_sys.h>
 
 /*
 	IMPLEMENTATION OF THE RW_LOCK
 	=============================
 The status of a rw_lock is held in lock_word. The initial value of lock_word is
 X_LOCK_DECR. lock_word is decremented by 1 for each s-lock and by X_LOCK_DECR
-for each x-lock. This describes the lock state for each value of lock_word:
+or 1 for each x-lock. This describes the lock state for each value of lock_word:
 
-lock_word == X_LOCK_DECR:      Unlocked.
-0 < lock_word < X_LOCK_DECR:   Read locked, no waiting writers.
-			       (X_LOCK_DECR - lock_word) is the
-			       number of readers that hold the lock.
-lock_word == 0:		       Write locked
--X_LOCK_DECR < lock_word < 0:  Read locked, with a waiting writer.
-			       (-lock_word) is the number of readers
-			       that hold the lock.
-lock_word <= -X_LOCK_DECR:     Recursively write locked. lock_word has been
-			       decremented by X_LOCK_DECR for the first lock
-			       and the first recursive lock, then by 1 for
-			       each recursive lock thereafter.
-			       So the number of locks is:
-			       (lock_copy == 0) ? 1 : 2 - (lock_copy + X_LOCK_DECR)
+lock_word == X_LOCK_DECR:	Unlocked.
+X_LOCK_HALF_DECR < lock_word < X_LOCK_DECR:
+				S locked, no waiting writers.
+				(X_LOCK_DECR - lock_word) is the number
+				of S locks.
+lock_word == X_LOCK_HALF_DECR:	SX locked, no waiting writers.
+0 < lock_word < X_LOCK_HALF_DECR:
+				SX locked AND S locked, no waiting writers.
+				(X_LOCK_HALF_DECR - lock_word) is the number
+				of S locks.
+lock_word == 0:			X locked, no waiting writers.
+-X_LOCK_HALF_DECR < lock_word < 0:
+				S locked, with a waiting writer.
+				(-lock_word) is the number of S locks.
+lock_word == -X_LOCK_HALF_DECR:	X locked and SX locked, no waiting writers.
+-X_LOCK_DECR < lock_word < -X_LOCK_HALF_DECR:
+				S locked, with a waiting writer
+				which has SX lock.
+				-(lock_word + X_LOCK_HALF_DECR) is the number
+				of S locks.
+lock_word == -X_LOCK_DECR:	X locked with recursive X lock (2 X locks).
+-(X_LOCK_DECR + X_LOCK_HALF_DECR) < lock_word < -X_LOCK_DECR:
+				X locked. The number of the X locks is:
+				2 - (lock_word + X_LOCK_DECR)
+lock_word == -(X_LOCK_DECR + X_LOCK_HALF_DECR):
+				X locked with recursive X lock (2 X locks)
+				and SX locked.
+lock_word < -(X_LOCK_DECR + X_LOCK_HALF_DECR):
+				X locked and SX locked.
+				The number of the X locks is:
+				2 - (lock_word + X_LOCK_DECR + X_LOCK_HALF_DECR)
+
+ LOCK COMPATIBILITY MATRIX
+    S SX  X
+ S  +  +  -
+ SX +  -  -
+ X  -  -  -
 
 The lock_word is always read and updated atomically and consistently, so that
 it always represents the state of the lock, and the state of the lock changes
@@ -71,12 +99,13 @@ with a single atomic operation. This lock_word holds all of the information
 that a thread needs in order to determine if it is eligible to gain the lock
 or if it must spin or sleep. The one exception to this is that writer_thread
 must be verified before recursive write locks: to solve this scenario, we make
-writer_thread readable by all threads, but only writeable by the x-lock holder.
+writer_thread readable by all threads, but only writeable by the x-lock or
+sx-lock holder.
 
 The other members of the lock obey the following rules to remain consistent:
 
 recursive:	This and the writer_thread field together control the
-		behaviour of recursive x-locking.
+		behaviour of recursive x-locking or sx-locking.
 		lock->recursive must be FALSE in following states:
 			1) The writer_thread contains garbage i.e.: the
 			lock has just been initialized.
@@ -136,28 +165,13 @@ wait_ex_event:	A thread may only wait on the wait_ex_event after it has
 		   Verify lock_word == 0 (waiting thread holds x_lock)
 */
 
-UNIV_INTERN rw_lock_stats_t	rw_lock_stats;
+rw_lock_stats_t		rw_lock_stats;
 
 /* The global list of rw-locks */
-UNIV_INTERN rw_lock_list_t	rw_lock_list;
-UNIV_INTERN ib_mutex_t		rw_lock_list_mutex;
-
-#ifdef UNIV_PFS_MUTEX
-UNIV_INTERN mysql_pfs_key_t	rw_lock_list_mutex_key;
-UNIV_INTERN mysql_pfs_key_t	rw_lock_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
-
-#ifdef UNIV_SYNC_DEBUG
-/* The global mutex which protects debug info lists of all rw-locks.
-To modify the debug info list of an rw-lock, this mutex has to be
-acquired in addition to the mutex protecting the lock. */
-
-UNIV_INTERN os_fast_mutex_t	rw_lock_debug_mutex;
-
-# ifdef UNIV_PFS_MUTEX
-UNIV_INTERN mysql_pfs_key_t	rw_lock_debug_mutex_key;
-# endif
+rw_lock_list_t		rw_lock_list;
+ib_mutex_t		rw_lock_list_mutex;
 
+#ifdef UNIV_DEBUG
 /******************************************************************//**
 Creates a debug info struct. */
 static
@@ -174,13 +188,13 @@ rw_lock_debug_free(
 
 /******************************************************************//**
 Creates a debug info struct.
-@return	own: debug info struct */
+@return own: debug info struct */
 static
 rw_lock_debug_t*
 rw_lock_debug_create(void)
 /*======================*/
 {
-	return((rw_lock_debug_t*) mem_alloc(sizeof(rw_lock_debug_t)));
+	return((rw_lock_debug_t*) ut_malloc_nokey(sizeof(rw_lock_debug_t)));
 }
 
 /******************************************************************//**
@@ -191,40 +205,40 @@ rw_lock_debug_free(
 /*===============*/
 	rw_lock_debug_t* info)
 {
-	mem_free(info);
+	ut_free(info);
 }
-#endif /* UNIV_SYNC_DEBUG */
+#endif /* UNIV_DEBUG */
 
 /******************************************************************//**
 Creates, or rather, initializes an rw-lock object in a specified memory
 location (which must be appropriately aligned). The rw-lock is initialized
 to the non-locked state. Explicit freeing of the rw-lock with rw_lock_free
 is necessary only if the memory block containing it is freed. */
-UNIV_INTERN
 void
 rw_lock_create_func(
 /*================*/
 	rw_lock_t*	lock,		/*!< in: pointer to memory */
 #ifdef UNIV_DEBUG
-# ifdef UNIV_SYNC_DEBUG
-	ulint		level,		/*!< in: level */
-# endif /* UNIV_SYNC_DEBUG */
+	latch_level_t	level,		/*!< in: level */
 #endif /* UNIV_DEBUG */
 	const char*	cmutex_name,	/*!< in: mutex name */
 	const char*	cfile_name,	/*!< in: file name where created */
 	ulint		cline)		/*!< in: file line where created */
 {
+#if defined(UNIV_DEBUG) && !defined(UNIV_PFS_RWLOCK)
+	/* It should have been created in pfs_rw_lock_create_func() */
+	new(lock) rw_lock_t();
+#endif /* UNIV_DEBUG */
+
 	/* If this is the very first time a synchronization object is
 	created, then the following call initializes the sync system. */
 
 #ifndef INNODB_RW_LOCKS_USE_ATOMICS
-	mutex_create(rw_lock_mutex_key, rw_lock_get_mutex(lock),
-		     SYNC_NO_ORDER_CHECK);
+	mutex_create(LATCH_ID_RW_LOCK_MUTEX, rw_lock_get_mutex(lock));
 
 	lock->mutex.cfile_name = cfile_name;
 	lock->mutex.cline = cline;
 	lock->mutex.lock_name = cmutex_name;
-	ut_d(lock->mutex.ib_mutex_type = 1);
 #else /* INNODB_RW_LOCKS_USE_ATOMICS */
 # ifdef UNIV_DEBUG
 	UT_NOT_USED(cmutex_name);
@@ -238,19 +252,28 @@ rw_lock_create_func(
 	contains garbage at initialization and cannot be used for
 	recursive x-locking. */
 	lock->recursive = FALSE;
+	lock->sx_recursive = 0;
 	/* Silence Valgrind when UNIV_DEBUG_VALGRIND is not enabled. */
 	memset((void*) &lock->writer_thread, 0, sizeof lock->writer_thread);
 	UNIV_MEM_INVALID(&lock->writer_thread, sizeof lock->writer_thread);
 
-#ifdef UNIV_SYNC_DEBUG
-	UT_LIST_INIT(lock->debug_list);
+#ifdef UNIV_DEBUG
+	lock->m_rw_lock = true;
+
+	UT_LIST_INIT(lock->debug_list, &rw_lock_debug_t::list);
+
+	lock->m_id = sync_latch_get_id(sync_latch_get_name(level));
+	ut_a(lock->m_id != LATCH_ID_NONE);
 
 	lock->level = level;
-#endif /* UNIV_SYNC_DEBUG */
-
-	ut_d(lock->magic_n = RW_LOCK_MAGIC_N);
+#endif /* UNIV_DEBUG */
 
 	lock->cfile_name = cfile_name;
+
+	/* This should hold in practice. If it doesn't then we need to
+	split the source file anyway. Or create the locks on lines
+	less than 8192. cline is unsigned:13. */
+	ut_ad(cline <= 8192);
 	lock->cline = (unsigned int) cline;
 	lock->lock_name = cmutex_name;
 	lock->count_os_wait = 0;
@@ -260,15 +283,17 @@ rw_lock_create_func(
 	lock->last_x_file_name = "not yet reserved";
 	lock->last_s_line = 0;
 	lock->last_x_line = 0;
-	lock->event = os_event_create();
-	lock->wait_ex_event = os_event_create();
+	lock->event = os_event_create(0);
+	lock->wait_ex_event = os_event_create(0);
+
+	lock->is_block_lock = 0;
 
 	mutex_enter(&rw_lock_list_mutex);
 
 	ut_ad(UT_LIST_GET_FIRST(rw_lock_list) == NULL
 	      || UT_LIST_GET_FIRST(rw_lock_list)->magic_n == RW_LOCK_MAGIC_N);
 
-	UT_LIST_ADD_FIRST(list, rw_lock_list, lock);
+	UT_LIST_ADD_FIRST(rw_lock_list, lock);
 
 	mutex_exit(&rw_lock_list_mutex);
 }
@@ -277,16 +302,11 @@ rw_lock_create_func(
 Calling this function is obligatory only if the memory buffer containing
 the rw-lock is freed. Removes an rw-lock object from the global list. The
 rw-lock is checked to be in the non-locked state. */
-UNIV_INTERN
 void
 rw_lock_free_func(
 /*==============*/
-	rw_lock_t*	lock)	/*!< in: rw-lock */
+	rw_lock_t*	lock)	/*!< in/out: rw-lock */
 {
-#ifndef INNODB_RW_LOCKS_USE_ATOMICS
-	ib_mutex_t*	mutex;
-#endif /* !INNODB_RW_LOCKS_USE_ATOMICS */
-
 	os_rmb;
 	ut_ad(rw_lock_validate(lock));
 	ut_a(lock->lock_word == X_LOCK_DECR);
@@ -294,65 +314,26 @@ rw_lock_free_func(
 	mutex_enter(&rw_lock_list_mutex);
 
 #ifndef INNODB_RW_LOCKS_USE_ATOMICS
-	mutex = rw_lock_get_mutex(lock);
+	mutex_free(rw_lock_get_mutex(lock));
 #endif /* !INNODB_RW_LOCKS_USE_ATOMICS */
 
-	os_event_free(lock->event);
+	os_event_destroy(lock->event);
 
-	os_event_free(lock->wait_ex_event);
+	os_event_destroy(lock->wait_ex_event);
 
-	ut_ad(UT_LIST_GET_PREV(list, lock) == NULL
-	      || UT_LIST_GET_PREV(list, lock)->magic_n == RW_LOCK_MAGIC_N);
-	ut_ad(UT_LIST_GET_NEXT(list, lock) == NULL
-	      || UT_LIST_GET_NEXT(list, lock)->magic_n == RW_LOCK_MAGIC_N);
-
-	UT_LIST_REMOVE(list, rw_lock_list, lock);
+	UT_LIST_REMOVE(rw_lock_list, lock);
 
 	mutex_exit(&rw_lock_list_mutex);
 
-	ut_d(lock->magic_n = 0);
-
-#ifndef INNODB_RW_LOCKS_USE_ATOMICS
-	/* We have merely removed the rw_lock from the list, the memory
-	has not been freed. Therefore the pointer to mutex is valid. */
-	mutex_free(mutex);
-#endif /* !INNODB_RW_LOCKS_USE_ATOMICS */
+	/* We did an in-place new in rw_lock_create_func() */
+	ut_d(lock->~rw_lock_t());
 }
 
-#ifdef UNIV_DEBUG
-/******************************************************************//**
-Checks that the rw-lock has been initialized and that there are no
-simultaneous shared and exclusive locks.
-@return	TRUE */
-UNIV_INTERN
-ibool
-rw_lock_validate(
-/*=============*/
-	rw_lock_t*	lock)	/*!< in: rw-lock */
-{
-	ulint	waiters;
-	lint	lock_word;
-
-	ut_ad(lock);
-
-	waiters = rw_lock_get_waiters(lock);
-	lock_word = lock->lock_word;
-
-	ut_ad(lock->magic_n == RW_LOCK_MAGIC_N);
-	ut_ad(waiters == 0 || waiters == 1);
-	ut_ad(lock_word > -(2 * X_LOCK_DECR));
-	ut_ad(lock_word <= X_LOCK_DECR);
-
-	return(TRUE);
-}
-#endif /* UNIV_DEBUG */
-
 /******************************************************************//**
 Lock an rw-lock in shared mode for the current thread. If the rw-lock is
 locked in exclusive mode, or there is an exclusive lock request waiting,
-the function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting
+the function spins a preset time (controlled by srv_n_spin_wait_rounds), waiting
 for the lock, before suspending the thread. */
-UNIV_INTERN
 void
 rw_lock_s_lock_spin(
 /*================*/
@@ -362,25 +343,22 @@ rw_lock_s_lock_spin(
 	const char*	file_name, /*!< in: file name where lock requested */
 	ulint		line)	/*!< in: line where requested */
 {
-	ulint		index;	/* index of the reserved wait cell */
 	ulint		i = 0;	/* spin round count */
 	sync_array_t*	sync_arr;
-	size_t		counter_index;
+	ulint		spin_count = 0;
+	uint64_t	count_os_wait = 0;
 
 	/* We reuse the thread id to index into the counter, cache
 	it here for efficiency. */
 
-	counter_index = (size_t) os_thread_get_curr_id();
-
 	ut_ad(rw_lock_validate(lock));
 
-	rw_lock_stats.rw_s_spin_wait_count.add(counter_index, 1);
 lock_loop:
 
 	/* Spin waiting for the writer field to become free */
 	os_rmb;
 	HMT_low();
-	while (i < SYNC_SPIN_ROUNDS && lock->lock_word <= 0) {
+	while (i < srv_n_spin_wait_rounds && lock->lock_word <= 0) {
 		if (srv_spin_wait_delay) {
 			ut_delay(ut_rnd_interval(0, srv_spin_wait_delay));
 		}
@@ -390,44 +368,74 @@ lock_loop:
 	}
 
 	HMT_medium();
-	if (i >= SYNC_SPIN_ROUNDS) {
+	if (i >= srv_n_spin_wait_rounds) {
 		os_thread_yield();
 	}
 
+	++spin_count;
+
 	/* We try once again to obtain the lock */
-	if (TRUE == rw_lock_s_lock_low(lock, pass, file_name, line)) {
-		rw_lock_stats.rw_s_spin_round_count.add(counter_index, i);
+	if (rw_lock_s_lock_low(lock, pass, file_name, line)) {
+
+		if (count_os_wait > 0) {
+			lock->count_os_wait +=
+				static_cast<uint32_t>(count_os_wait);
+			rw_lock_stats.rw_s_os_wait_count.add(count_os_wait);
+		}
+
+		rw_lock_stats.rw_s_spin_round_count.add(spin_count);
 
 		return; /* Success */
 	} else {
 
-		if (i < SYNC_SPIN_ROUNDS) {
+		if (i < srv_n_spin_wait_rounds) {
 			goto lock_loop;
 		}
 
-		rw_lock_stats.rw_s_spin_round_count.add(counter_index, i);
 
-		sync_arr = sync_array_get_and_reserve_cell(lock,
-							   RW_LOCK_SHARED,
-							   file_name,
-							   line, &index);
+		++count_os_wait;
+
+		sync_cell_t*	cell;
+
+		sync_arr = sync_array_get_and_reserve_cell(
+				lock, RW_LOCK_S, file_name, line, &cell);
 
 		/* Set waiters before checking lock_word to ensure wake-up
 		signal is sent. This may lead to some unnecessary signals. */
 		rw_lock_set_waiter_flag(lock);
 
-		if (TRUE == rw_lock_s_lock_low(lock, pass, file_name, line)) {
-			sync_array_free_cell(sync_arr, index);
+		if (rw_lock_s_lock_low(lock, pass, file_name, line)) {
+
+			sync_array_free_cell(sync_arr, cell);
+
+			if (count_os_wait > 0) {
+
+				lock->count_os_wait +=
+					static_cast<uint32_t>(count_os_wait);
+
+				rw_lock_stats.rw_s_os_wait_count.add(
+					count_os_wait);
+			}
+
+			rw_lock_stats.rw_s_spin_round_count.add(spin_count);
+
 			return; /* Success */
 		}
 
-		/* these stats may not be accurate */
-		lock->count_os_wait++;
-		rw_lock_stats.rw_s_os_wait_count.add(counter_index, 1);
-
-		sync_array_wait_event(sync_arr, index);
+		/* see comments in trx_commit_low() to
+		before_trx_state_committed_in_memory explaining
+		this care to invoke the following sync check.*/
+#ifndef DBUG_OFF
+#ifdef UNIV_DEBUG
+		if (lock->get_level() != SYNC_DICT_OPERATION) {
+			DEBUG_SYNC_C("rw_s_lock_waiting");
+		}
+#endif
+#endif
+		sync_array_wait_event(sync_arr, cell);
 
 		i = 0;
+
 		goto lock_loop;
 	}
 }
@@ -440,16 +448,15 @@ read was done. The ownership is moved because we want that the current
 thread is able to acquire a second x-latch which is stored in an mtr.
 This, in turn, is needed to pass the debug checks of index page
 operations. */
-UNIV_INTERN
 void
 rw_lock_x_lock_move_ownership(
 /*==========================*/
 	rw_lock_t*	lock)	/*!< in: lock which was x-locked in the
 				buffer read */
 {
-	ut_ad(rw_lock_is_locked(lock, RW_LOCK_EX));
+	ut_ad(rw_lock_is_locked(lock, RW_LOCK_X));
 
-	rw_lock_set_writer_id_and_recursion_flag(lock, TRUE);
+	rw_lock_set_writer_id_and_recursion_flag(lock, true);
 }
 
 /******************************************************************//**
@@ -457,35 +464,34 @@ Function for the next writer to call. Waits for readers to exit.
 The caller must have already decremented lock_word by X_LOCK_DECR. */
 UNIV_INLINE
 void
-rw_lock_x_lock_wait(
-/*================*/
+rw_lock_x_lock_wait_func(
+/*=====================*/
 	rw_lock_t*	lock,	/*!< in: pointer to rw-lock */
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
 	ulint		pass,	/*!< in: pass value; != 0, if the lock will
 				be passed to another thread to unlock */
 #endif
+	lint		threshold,/*!< in: threshold to wait for */
 	const char*	file_name,/*!< in: file name where lock requested */
 	ulint		line)	/*!< in: line where requested */
 {
-	ulint		index;
 	ulint		i = 0;
+	ulint		n_spins = 0;
 	sync_array_t*	sync_arr;
-	size_t		counter_index;
-
-	/* We reuse the thread id to index into the counter, cache
-	it here for efficiency. */
-
-	counter_index = (size_t) os_thread_get_curr_id();
+	uint64_t	count_os_wait = 0;
 
 	os_rmb;
-	ut_ad(lock->lock_word <= 0);
+	ut_ad(lock->lock_word <= threshold);
 
-        HMT_low();
-	while (lock->lock_word < 0) {
+	while (lock->lock_word < threshold) {
+
+
+		HMT_low();
 		if (srv_spin_wait_delay) {
 			ut_delay(ut_rnd_interval(0, srv_spin_wait_delay));
 		}
-		if(i < SYNC_SPIN_ROUNDS) {
+
+		if (i < srv_n_spin_wait_rounds) {
 			i++;
 			os_rmb;
 			continue;
@@ -493,29 +499,31 @@ rw_lock_x_lock_wait(
 		HMT_medium();
 
 		/* If there is still a reader, then go to sleep.*/
-		rw_lock_stats.rw_x_spin_round_count.add(counter_index, i);
+		++n_spins;
 
-		sync_arr = sync_array_get_and_reserve_cell(lock,
-							   RW_LOCK_WAIT_EX,
-							   file_name,
-							   line, &index);
+		sync_cell_t*	cell;
+
+		sync_arr = sync_array_get_and_reserve_cell(
+			lock, RW_LOCK_X_WAIT, file_name, line, &cell);
 
 		i = 0;
 
 		/* Check lock_word to ensure wake-up isn't missed.*/
-		if (lock->lock_word < 0) {
+		if (lock->lock_word < threshold) {
 
-			/* these stats may not be accurate */
-			lock->count_os_wait++;
-			rw_lock_stats.rw_x_os_wait_count.add(counter_index, 1);
+			++count_os_wait;
 
 			/* Add debug info as it is needed to detect possible
 			deadlock. We must add info for WAIT_EX thread for
 			deadlock detection to work properly. */
-#ifdef UNIV_SYNC_DEBUG
-			rw_lock_add_debug_info(lock, pass, RW_LOCK_WAIT_EX,
-					       file_name, line);
-#endif
+			ut_d(rw_lock_add_debug_info(
+					lock, pass, RW_LOCK_X_WAIT,
+					file_name, line));
+
+			sync_array_wait_event(sync_arr, cell);
+
+			ut_d(rw_lock_remove_debug_info(
+					lock, pass, RW_LOCK_X_WAIT));
 
 			if (srv_instrument_semaphores) {
 				lock->thread_id = os_thread_get_curr_id();
@@ -523,25 +531,43 @@ rw_lock_x_lock_wait(
 				lock->line = line;
 			}
 
-			sync_array_wait_event(sync_arr, index);
-#ifdef UNIV_SYNC_DEBUG
-			rw_lock_remove_debug_info(
-				lock, pass, RW_LOCK_WAIT_EX);
-#endif
 			/* It is possible to wake when lock_word < 0.
 			We must pass the while-loop check to proceed.*/
+
 		} else {
-			sync_array_free_cell(sync_arr, index);
+			sync_array_free_cell(sync_arr, cell);
+			break;
 		}
 		HMT_low();
 	}
 	HMT_medium();
-	rw_lock_stats.rw_x_spin_round_count.add(counter_index, i);
+	rw_lock_stats.rw_x_spin_round_count.add(n_spins);
+
+	if (count_os_wait > 0) {
+		lock->count_os_wait += count_os_wait;
+		rw_lock_stats.rw_x_os_wait_count.add(count_os_wait);
+	}
+
+	rw_lock_stats.rw_x_spin_round_count.add(n_spins);
+
+	if (count_os_wait > 0) {
+		lock->count_os_wait +=
+			static_cast<uint32_t>(count_os_wait);
+		rw_lock_stats.rw_x_os_wait_count.add(count_os_wait);
+	}
 }
 
+#ifdef UNIV_DEBUG
+# define rw_lock_x_lock_wait(L, P, T, F, O)		\
+	rw_lock_x_lock_wait_func(L, P, T, F, O)
+#else
+# define rw_lock_x_lock_wait(L, P, T, F, O)		\
+	rw_lock_x_lock_wait_func(L, T, F, O)
+#endif /* UNIV_DBEUG */
+
 /******************************************************************//**
 Low-level function for acquiring an exclusive lock.
-@return	FALSE if did not succeed, TRUE if success. */
+@return FALSE if did not succeed, TRUE if success. */
 UNIV_INLINE
 ibool
 rw_lock_x_lock_low(
@@ -552,9 +578,8 @@ rw_lock_x_lock_low(
 	const char*	file_name,/*!< in: file name where lock requested */
 	ulint		line)	/*!< in: line where requested */
 {
-	ibool local_recursive= lock->recursive;
+	if (rw_lock_lock_word_decr(lock, X_LOCK_DECR, X_LOCK_HALF_DECR)) {
 
-	if (rw_lock_lock_word_decr(lock, X_LOCK_DECR)) {
 
 		/* lock->recursive also tells us if the writer_thread
 		field is stale or active. As we are going to write
@@ -564,29 +589,47 @@ rw_lock_x_lock_low(
 
 		/* Decrement occurred: we are writer or next-writer. */
 		rw_lock_set_writer_id_and_recursion_flag(
-			lock, pass ? FALSE : TRUE);
+			lock, !pass);
 
-		rw_lock_x_lock_wait(lock,
-#ifdef UNIV_SYNC_DEBUG
-				    pass,
-#endif
-				    file_name, line);
+		rw_lock_x_lock_wait(lock, pass, 0, file_name, line);
 
 	} else {
 		os_thread_id_t	thread_id = os_thread_get_curr_id();
 
-		/* Decrement failed: relock or failed lock
-		Note: recursive must be loaded before writer_thread see
-		comment for rw_lock_set_writer_id_and_recursion_flag().
-		To achieve this we load it before rw_lock_lock_word_decr(),
-		which implies full memory barrier in current implementation. */
-		if (!pass && local_recursive
+		if (!pass) {
+			os_rmb;
+		}
+
+		/* Decrement failed: An X or SX lock is held by either
+		this thread or another. Try to relock. */
+		if (!pass
+		    && lock->recursive
 		    && os_thread_eq(lock->writer_thread, thread_id)) {
-			/* Relock */
-			if (lock->lock_word == 0) {
-				lock->lock_word -= X_LOCK_DECR;
+			/* Other s-locks can be allowed. If it is request x
+			recursively while holding sx lock, this x lock should
+			be along with the latching-order. */
+
+			/* The existing X or SX lock is from this thread */
+			if (rw_lock_lock_word_decr(lock, X_LOCK_DECR, 0)) {
+				/* There is at least one SX-lock from this
+				thread, but no X-lock. */
+
+				/* Wait for any the other S-locks to be
+				released. */
+				rw_lock_x_lock_wait(
+					lock, pass, -X_LOCK_HALF_DECR,
+					file_name, line);
+
 			} else {
-				--lock->lock_word;
+				/* At least one X lock by this thread already
+				exists. Add another. */
+				if (lock->lock_word == 0
+				    || lock->lock_word == -X_LOCK_HALF_DECR) {
+					lock->lock_word -= X_LOCK_DECR;
+				} else {
+					ut_ad(lock->lock_word <= -X_LOCK_DECR);
+					--lock->lock_word;
+				}
 			}
 
 		} else {
@@ -594,9 +637,9 @@ rw_lock_x_lock_low(
 			return(FALSE);
 		}
 	}
-#ifdef UNIV_SYNC_DEBUG
-	rw_lock_add_debug_info(lock, pass, RW_LOCK_EX, file_name, line);
-#endif
+
+	ut_d(rw_lock_add_debug_info(lock, pass, RW_LOCK_X, file_name, line));
+
 
 	if (srv_instrument_semaphores) {
 		lock->thread_id = os_thread_get_curr_id();
@@ -610,16 +653,94 @@ rw_lock_x_lock_low(
 	return(TRUE);
 }
 
+/******************************************************************//**
+Low-level function for acquiring an sx lock.
+@return FALSE if did not succeed, TRUE if success. */
+ibool
+rw_lock_sx_lock_low(
+/*================*/
+	rw_lock_t*	lock,	/*!< in: pointer to rw-lock */
+	ulint		pass,	/*!< in: pass value; != 0, if the lock will
+				be passed to another thread to unlock */
+	const char*	file_name,/*!< in: file name where lock requested */
+	ulint		line)	/*!< in: line where requested */
+{
+	if (rw_lock_lock_word_decr(lock, X_LOCK_HALF_DECR, X_LOCK_HALF_DECR)) {
+
+		/* lock->recursive also tells us if the writer_thread
+		field is stale or active. As we are going to write
+		our own thread id in that field it must be that the
+		current writer_thread value is not active. */
+		ut_a(!lock->recursive);
+
+		/* Decrement occurred: we are the SX lock owner. */
+		rw_lock_set_writer_id_and_recursion_flag(
+			lock, !pass);
+
+		lock->sx_recursive = 1;
+	} else {
+		os_thread_id_t	thread_id = os_thread_get_curr_id();
+
+		if (!pass) {
+			os_rmb;
+		}
+
+		/* Decrement failed: It already has an X or SX lock by this
+		thread or another thread. If it is this thread, relock,
+		else fail. */
+		if (!pass && lock->recursive
+		    && os_thread_eq(lock->writer_thread, thread_id)) {
+			/* This thread owns an X or SX lock */
+			if (lock->sx_recursive++ == 0) {
+				/* This thread is making first SX-lock request
+				and it must be holding at least one X-lock here
+				because:
+
+				* There can't be a WAIT_EX thread because we are
+				  the thread which has it's thread_id written in
+				  the writer_thread field and we are not waiting.
+
+				* Any other X-lock thread cannot exist because
+				  it must update recursive flag only after
+				  updating the thread_id. Had there been
+				  a concurrent X-locking thread which succeeded
+				  in decrementing the lock_word it must have
+				  written it's thread_id before setting the
+				  recursive flag. As we cleared the if()
+				  condition above therefore we must be the only
+				  thread working on this lock and it is safe to
+				  read and write to the lock_word. */
+
+				ut_ad((lock->lock_word == 0)
+				      || ((lock->lock_word <= -X_LOCK_DECR)
+					  && (lock->lock_word
+					      > -(X_LOCK_DECR
+						  + X_LOCK_HALF_DECR))));
+				lock->lock_word -= X_LOCK_HALF_DECR;
+			}
+		} else {
+			/* Another thread locked before us */
+			return(FALSE);
+		}
+	}
+
+	ut_d(rw_lock_add_debug_info(lock, pass, RW_LOCK_SX, file_name, line));
+
+	lock->last_x_file_name = file_name;
+	lock->last_x_line = (unsigned int) line;
+
+	return(TRUE);
+}
+
 /******************************************************************//**
 NOTE! Use the corresponding macro, not directly this function! Lock an
 rw-lock in exclusive mode for the current thread. If the rw-lock is locked
 in shared or exclusive mode, or there is an exclusive lock request waiting,
-the function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting
+the function spins a preset time (controlled by srv_n_spin_wait_rounds), waiting
 for the lock before suspending the thread. If the same thread has an x-lock
 on the rw-lock, locking succeed, with the following exception: if pass != 0,
 only a single x-lock may be taken on the lock. NOTE: If the same thread has
 an s-lock, locking does not succeed! */
-UNIV_INTERN
 void
 rw_lock_x_lock_func(
 /*================*/
@@ -629,113 +750,257 @@ rw_lock_x_lock_func(
 	const char*	file_name,/*!< in: file name where lock requested */
 	ulint		line)	/*!< in: line where requested */
 {
-	ulint		i;	/*!< spin round count */
-	ulint		index;	/*!< index of the reserved wait cell */
+	ulint		i = 0;
 	sync_array_t*	sync_arr;
-	ibool		spinning = FALSE;
-	size_t		counter_index;
-
-	/* We reuse the thread id to index into the counter, cache
-	it here for efficiency. */
-
-	counter_index = (size_t) os_thread_get_curr_id();
+	ulint		spin_count = 0;
+	uint64_t	count_os_wait = 0;
 
 	ut_ad(rw_lock_validate(lock));
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(!rw_lock_own(lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
-
-	i = 0;
+	ut_ad(!rw_lock_own(lock, RW_LOCK_S));
 
 lock_loop:
 
 	if (rw_lock_x_lock_low(lock, pass, file_name, line)) {
-		rw_lock_stats.rw_x_spin_round_count.add(counter_index, i);
 
-		return;	/* Locking succeeded */
+		if (count_os_wait > 0) {
+			lock->count_os_wait +=
+				static_cast<uint32_t>(count_os_wait);
+			rw_lock_stats.rw_x_os_wait_count.add(count_os_wait);
+		}
+
+		rw_lock_stats.rw_x_spin_round_count.add(spin_count);
+
+		/* Locking succeeded */
+		return;
 
 	} else {
 
-		if (!spinning) {
-			spinning = TRUE;
-
-			rw_lock_stats.rw_x_spin_wait_count.add(
-				counter_index, 1);
-		}
-
 		/* Spin waiting for the lock_word to become free */
 		os_rmb;
 		HMT_low();
-		while (i < SYNC_SPIN_ROUNDS
-		       && lock->lock_word <= 0) {
+		while (i < srv_n_spin_wait_rounds
+		       && lock->lock_word <= X_LOCK_HALF_DECR) {
+
 			if (srv_spin_wait_delay) {
-				ut_delay(ut_rnd_interval(0,
-							 srv_spin_wait_delay));
+				ut_delay(ut_rnd_interval(
+						0, srv_spin_wait_delay));
 			}
 
 			i++;
 			os_rmb;
 		}
+
 		HMT_medium();
-		if (i >= SYNC_SPIN_ROUNDS) {
+		spin_count += i;
+
+		if (i >= srv_n_spin_wait_rounds) {
+
 			os_thread_yield();
+
 		} else {
+
 			goto lock_loop;
 		}
 	}
 
-	rw_lock_stats.rw_x_spin_round_count.add(counter_index, i);
+	sync_cell_t*	cell;
 
-	sync_arr = sync_array_get_and_reserve_cell(lock, RW_LOCK_EX,
-						   file_name, line, &index);
+	sync_arr = sync_array_get_and_reserve_cell(
+			lock, RW_LOCK_X, file_name, line, &cell);
 
 	/* Waiters must be set before checking lock_word, to ensure signal
 	is sent. This could lead to a few unnecessary wake-up signals. */
 	rw_lock_set_waiter_flag(lock);
 
 	if (rw_lock_x_lock_low(lock, pass, file_name, line)) {
-		sync_array_free_cell(sync_arr, index);
-		return; /* Locking succeeded */
+		sync_array_free_cell(sync_arr, cell);
+
+		if (count_os_wait > 0) {
+			lock->count_os_wait +=
+				static_cast<uint32_t>(count_os_wait);
+			rw_lock_stats.rw_x_os_wait_count.add(count_os_wait);
+		}
+
+		rw_lock_stats.rw_x_spin_round_count.add(spin_count);
+
+		/* Locking succeeded */
+		return;
 	}
 
-	/* these stats may not be accurate */
-	lock->count_os_wait++;
-	rw_lock_stats.rw_x_os_wait_count.add(counter_index, 1);
+	++count_os_wait;
 
-	sync_array_wait_event(sync_arr, index);
+	sync_array_wait_event(sync_arr, cell);
 
 	i = 0;
+
 	goto lock_loop;
 }
 
-#ifdef UNIV_SYNC_DEBUG
 /******************************************************************//**
-Acquires the debug mutex. We cannot use the mutex defined in sync0sync,
-because the debug mutex is also acquired in sync0arr while holding the OS
-mutex protecting the sync array, and the ordinary mutex_enter might
-recursively call routines in sync0arr, leading to a deadlock on the OS
-mutex. */
-UNIV_INTERN
+NOTE! Use the corresponding macro, not directly this function! Lock an
+rw-lock in SX mode for the current thread. If the rw-lock is locked
+in exclusive mode, or there is an exclusive lock request waiting,
+the function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting
+for the lock, before suspending the thread. If the same thread has an x-lock
+on the rw-lock, locking succeed, with the following exception: if pass != 0,
+only a single sx-lock may be taken on the lock. NOTE: If the same thread has
+an s-lock, locking does not succeed! */
 void
-rw_lock_debug_mutex_enter(void)
-/*===========================*/
+rw_lock_sx_lock_func(
+/*=================*/
+	rw_lock_t*	lock,	/*!< in: pointer to rw-lock */
+	ulint		pass,	/*!< in: pass value; != 0, if the lock will
+				be passed to another thread to unlock */
+	const char*	file_name,/*!< in: file name where lock requested */
+	ulint		line)	/*!< in: line where requested */
+
 {
-	os_fast_mutex_lock(&rw_lock_debug_mutex);
+	ulint		i = 0;
+	sync_array_t*	sync_arr;
+	ulint		spin_count = 0;
+	uint64_t	count_os_wait = 0;
+	ulint		spin_wait_count = 0;
+
+	ut_ad(rw_lock_validate(lock));
+	ut_ad(!rw_lock_own(lock, RW_LOCK_S));
+
+lock_loop:
+
+	if (rw_lock_sx_lock_low(lock, pass, file_name, line)) {
+
+		if (count_os_wait > 0) {
+			lock->count_os_wait +=
+				static_cast<uint32_t>(count_os_wait);
+			rw_lock_stats.rw_sx_os_wait_count.add(count_os_wait);
+		}
+
+		rw_lock_stats.rw_sx_spin_round_count.add(spin_count);
+		rw_lock_stats.rw_sx_spin_wait_count.add(spin_wait_count);
+
+		/* Locking succeeded */
+		return;
+
+	} else {
+
+		++spin_wait_count;
+
+		/* Spin waiting for the lock_word to become free */
+		os_rmb;
+		while (i < srv_n_spin_wait_rounds
+		       && lock->lock_word <= X_LOCK_HALF_DECR) {
+
+			if (srv_spin_wait_delay) {
+				ut_delay(ut_rnd_interval(
+						0, srv_spin_wait_delay));
+			}
+
+			i++;
+		}
+
+		spin_count += i;
+
+		if (i >= srv_n_spin_wait_rounds) {
+
+			os_thread_yield();
+
+		} else {
+
+			goto lock_loop;
+		}
+	}
+
+	sync_cell_t*	cell;
+
+	sync_arr = sync_array_get_and_reserve_cell(
+			lock, RW_LOCK_SX, file_name, line, &cell);
+
+	/* Waiters must be set before checking lock_word, to ensure signal
+	is sent. This could lead to a few unnecessary wake-up signals. */
+	rw_lock_set_waiter_flag(lock);
+
+	if (rw_lock_sx_lock_low(lock, pass, file_name, line)) {
+
+		sync_array_free_cell(sync_arr, cell);
+
+		if (count_os_wait > 0) {
+			lock->count_os_wait +=
+				static_cast<uint32_t>(count_os_wait);
+			rw_lock_stats.rw_sx_os_wait_count.add(count_os_wait);
+		}
+
+		rw_lock_stats.rw_sx_spin_round_count.add(spin_count);
+		rw_lock_stats.rw_sx_spin_wait_count.add(spin_wait_count);
+
+		/* Locking succeeded */
+		return;
+	}
+
+	++count_os_wait;
+
+	sync_array_wait_event(sync_arr, cell);
+
+	i = 0;
+
+	goto lock_loop;
+}
+
+#ifdef UNIV_DEBUG
+
+/******************************************************************//**
+Checks that the rw-lock has been initialized and that there are no
+simultaneous shared and exclusive locks.
+@return true */
+bool
+rw_lock_validate(
+/*=============*/
+	const rw_lock_t*	lock)	/*!< in: rw-lock */
+{
+	ulint	waiters;
+	lint	lock_word;
+
+	ut_ad(lock);
+
+	waiters = rw_lock_get_waiters(lock);
+	lock_word = lock->lock_word;
+
+	ut_ad(lock->magic_n == RW_LOCK_MAGIC_N);
+	ut_ad(waiters == 0 || waiters == 1);
+	ut_ad(lock_word > -(2 * X_LOCK_DECR));
+	ut_ad(lock_word <= X_LOCK_DECR);
+
+	return(true);
 }
 
 /******************************************************************//**
-Releases the debug mutex. */
-UNIV_INTERN
-void
-rw_lock_debug_mutex_exit(void)
-/*==========================*/
+Checks if somebody has locked the rw-lock in the specified mode.
+@return true if locked */
+bool
+rw_lock_is_locked(
+/*==============*/
+	rw_lock_t*	lock,		/*!< in: rw-lock */
+	ulint		lock_type)	/*!< in: lock type: RW_LOCK_S,
+					RW_LOCK_X or RW_LOCK_SX */
 {
-	os_fast_mutex_unlock(&rw_lock_debug_mutex);
+	ut_ad(rw_lock_validate(lock));
+
+	switch (lock_type) {
+	case RW_LOCK_S:
+		return(rw_lock_get_reader_count(lock) > 0);
+
+	case RW_LOCK_X:
+		return(rw_lock_get_writer(lock) == RW_LOCK_X);
+
+	case RW_LOCK_SX:
+		return(rw_lock_get_sx_lock_count(lock) > 0);
+
+	default:
+		ut_error;
+	}
+	return(false);	/* avoid compiler warnings */
 }
 
 /******************************************************************//**
 Inserts the debug information for an rw-lock. */
-UNIV_INTERN
 void
 rw_lock_add_debug_info(
 /*===================*/
@@ -745,35 +1010,43 @@ rw_lock_add_debug_info(
 	const char*	file_name,	/*!< in: file where requested */
 	ulint		line)		/*!< in: line where requested */
 {
-	rw_lock_debug_t*	info;
+	ut_ad(file_name != NULL);
 
-	ut_ad(lock);
-	ut_ad(file_name);
-
-	info = rw_lock_debug_create();
+	rw_lock_debug_t*	info = rw_lock_debug_create();
 
 	rw_lock_debug_mutex_enter();
 
-	info->file_name = file_name;
+	info->pass	= pass;
 	info->line	= line;
 	info->lock_type = lock_type;
+	info->file_name = file_name;
 	info->thread_id = os_thread_get_curr_id();
-	info->pass	= pass;
 
-	UT_LIST_ADD_FIRST(list, lock->debug_list, info);
+	UT_LIST_ADD_FIRST(lock->debug_list, info);
 
 	rw_lock_debug_mutex_exit();
 
-	if ((pass == 0) && (lock_type != RW_LOCK_WAIT_EX)) {
-		sync_thread_add_level(lock, lock->level,
-				      lock_type == RW_LOCK_EX
-				      && lock->lock_word < 0);
+	if (pass == 0 && lock_type != RW_LOCK_X_WAIT) {
+
+		/* Recursive x while holding SX
+		(lock_type == RW_LOCK_X && lock_word == -X_LOCK_HALF_DECR)
+		is treated as not-relock (new lock). */
+
+		if ((lock_type == RW_LOCK_X
+		     && lock->lock_word <  -X_LOCK_HALF_DECR)
+		    || (lock_type == RW_LOCK_SX
+		       && (lock->lock_word < 0 || lock->sx_recursive == 1))) {
+
+			sync_check_lock_validate(lock);
+			sync_check_lock_granted(lock);
+		} else {
+			sync_check_relock(lock);
+		}
 	}
 }
 
 /******************************************************************//**
 Removes a debug information struct for an rw-lock. */
-UNIV_INTERN
 void
 rw_lock_remove_debug_info(
 /*======================*/
@@ -785,122 +1058,161 @@ rw_lock_remove_debug_info(
 
 	ut_ad(lock);
 
-	if ((pass == 0) && (lock_type != RW_LOCK_WAIT_EX)) {
-		sync_thread_reset_level(lock);
+	if (pass == 0 && lock_type != RW_LOCK_X_WAIT) {
+		sync_check_unlock(lock);
 	}
 
 	rw_lock_debug_mutex_enter();
 
-	info = UT_LIST_GET_FIRST(lock->debug_list);
+	for (info = UT_LIST_GET_FIRST(lock->debug_list);
+	     info != 0;
+	     info = UT_LIST_GET_NEXT(list, info)) {
 
-	while (info != NULL) {
-		if ((pass == info->pass)
-		    && ((pass != 0)
+		if (pass == info->pass
+		    && (pass != 0
 			|| os_thread_eq(info->thread_id,
 					os_thread_get_curr_id()))
-		    && (info->lock_type == lock_type)) {
+		    && info->lock_type == lock_type) {
 
 			/* Found! */
-			UT_LIST_REMOVE(list, lock->debug_list, info);
+			UT_LIST_REMOVE(lock->debug_list, info);
+
 			rw_lock_debug_mutex_exit();
 
 			rw_lock_debug_free(info);
 
 			return;
 		}
-
-		info = UT_LIST_GET_NEXT(list, info);
 	}
 
 	ut_error;
 }
-#endif /* UNIV_SYNC_DEBUG */
 
-#ifdef UNIV_SYNC_DEBUG
 /******************************************************************//**
 Checks if the thread has locked the rw-lock in the specified mode, with
 the pass value == 0.
-@return	TRUE if locked */
-UNIV_INTERN
+@return TRUE if locked */
 ibool
 rw_lock_own(
 /*========*/
 	rw_lock_t*	lock,		/*!< in: rw-lock */
-	ulint		lock_type)	/*!< in: lock type: RW_LOCK_SHARED,
-					RW_LOCK_EX */
+	ulint		lock_type)	/*!< in: lock type: RW_LOCK_S,
+					RW_LOCK_X */
 {
-	rw_lock_debug_t*	info;
-
 	ut_ad(lock);
 	ut_ad(rw_lock_validate(lock));
 
 	rw_lock_debug_mutex_enter();
 
-	info = UT_LIST_GET_FIRST(lock->debug_list);
-
-	while (info != NULL) {
+	for (const rw_lock_debug_t* info = UT_LIST_GET_FIRST(lock->debug_list);
+	     info != NULL;
+	     info = UT_LIST_GET_NEXT(list, info)) {
 
 		if (os_thread_eq(info->thread_id, os_thread_get_curr_id())
-		    && (info->pass == 0)
-		    && (info->lock_type == lock_type)) {
+		    && info->pass == 0
+		    && info->lock_type == lock_type) {
 
 			rw_lock_debug_mutex_exit();
 			/* Found! */
 
 			return(TRUE);
 		}
-
-		info = UT_LIST_GET_NEXT(list, info);
 	}
 	rw_lock_debug_mutex_exit();
 
 	return(FALSE);
 }
-#endif /* UNIV_SYNC_DEBUG */
 
-/******************************************************************//**
-Checks if somebody has locked the rw-lock in the specified mode.
-@return	TRUE if locked */
-UNIV_INTERN
-ibool
-rw_lock_is_locked(
-/*==============*/
-	rw_lock_t*	lock,		/*!< in: rw-lock */
-	ulint		lock_type)	/*!< in: lock type: RW_LOCK_SHARED,
-					RW_LOCK_EX */
+/** For collecting the debug information for a thread's rw-lock */
+typedef std::vector<rw_lock_debug_t*> Infos;
+
+/** Get the thread debug info
+@param[in]	infos		The rw-lock mode owned by the threads
+@param[in]	lock		rw-lock to check
+@return the thread debug info or NULL if not found */
+void
+rw_lock_get_debug_info(const rw_lock_t* lock, Infos* infos)
 {
-	ibool	ret	= FALSE;
+	rw_lock_debug_t*	info = NULL;
 
-	ut_ad(lock);
 	ut_ad(rw_lock_validate(lock));
 
-	if (lock_type == RW_LOCK_SHARED) {
-		if (rw_lock_get_reader_count(lock) > 0) {
-			ret = TRUE;
+	rw_lock_debug_mutex_enter();
+
+	for (info = UT_LIST_GET_FIRST(lock->debug_list);
+	     info != NULL;
+	     info = UT_LIST_GET_NEXT(list, info)) {
+
+		if (os_thread_eq(info->thread_id, os_thread_get_curr_id())) {
+
+			infos->push_back(info);
 		}
-	} else if (lock_type == RW_LOCK_EX) {
-		if (rw_lock_get_writer(lock) == RW_LOCK_EX) {
-			ret = TRUE;
-		}
-	} else {
-		ut_error;
 	}
 
-	return(ret);
+	rw_lock_debug_mutex_exit();
+}
+
+/** Checks if the thread has locked the rw-lock in the specified mode, with
+the pass value == 0.
+@param[in]	lock		rw-lock
+@param[in]	flags		specify lock types with OR of the
+				rw_lock_flag_t values
+@return true if locked */
+bool
+rw_lock_own_flagged(
+	const rw_lock_t*	lock,
+	rw_lock_flags_t		flags)
+{
+	Infos	infos;
+
+	rw_lock_get_debug_info(lock, &infos);
+
+	Infos::const_iterator	end = infos.end();
+
+	for (Infos::const_iterator it = infos.begin(); it != end; ++it) {
+
+		const rw_lock_debug_t*	info = *it;
+
+		ut_ad(os_thread_eq(info->thread_id, os_thread_get_curr_id()));
+
+		if (info->pass != 0) {
+			continue;
+		}
+
+		switch (info->lock_type) {
+		case RW_LOCK_S:
+
+			if (flags & RW_LOCK_FLAG_S) {
+				return(true);
+			}
+			break;
+
+		case RW_LOCK_X:
+
+			if (flags & RW_LOCK_FLAG_X) {
+				return(true);
+			}
+			break;
+
+		case RW_LOCK_SX:
+
+			if (flags & RW_LOCK_FLAG_SX) {
+				return(true);
+			}
+		}
+	}
+
+	return(false);
 }
 
-#ifdef UNIV_SYNC_DEBUG
 /***************************************************************//**
 Prints debug info of currently locked rw-locks. */
-UNIV_INTERN
 void
 rw_lock_list_print_info(
 /*====================*/
 	FILE*	file)		/*!< in: file where to print */
 {
-	rw_lock_t*	lock;
-	ulint		count		= 0;
-	rw_lock_debug_t* info;
+	ulint		count = 0;
 
 	mutex_enter(&rw_lock_list_mutex);
 
@@ -908,15 +1220,16 @@ rw_lock_list_print_info(
 	      "RW-LATCH INFO\n"
 	      "-------------\n", file);
 
-	lock = UT_LIST_GET_FIRST(rw_lock_list);
-
-	while (lock != NULL) {
+	for (const rw_lock_t* lock = UT_LIST_GET_FIRST(rw_lock_list);
+	     lock != NULL;
+	     lock = UT_LIST_GET_NEXT(list, lock)) {
 
 		count++;
 
 #ifndef INNODB_RW_LOCKS_USE_ATOMICS
-		mutex_enter(&(lock->mutex));
-#endif
+		mutex_enter(&lock->mutex);
+#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
+
 		if (lock->lock_word != X_LOCK_DECR) {
 
 			fprintf(file, "RW-LOCK: %p ", (void*) lock);
@@ -927,28 +1240,31 @@ rw_lock_list_print_info(
 				putc('\n', file);
 			}
 
+			rw_lock_debug_t* info;
+
 			rw_lock_debug_mutex_enter();
-			info = UT_LIST_GET_FIRST(lock->debug_list);
-			while (info != NULL) {
+
+			for (info = UT_LIST_GET_FIRST(lock->debug_list);
+			     info != NULL;
+			     info = UT_LIST_GET_NEXT(list, info)) {
+
 				rw_lock_debug_print(file, info);
-				info = UT_LIST_GET_NEXT(list, info);
 			}
+
 			rw_lock_debug_mutex_exit();
 		}
-#ifndef INNODB_RW_LOCKS_USE_ATOMICS
-		mutex_exit(&(lock->mutex));
-#endif
 
-		lock = UT_LIST_GET_NEXT(list, lock);
+#ifndef INNODB_RW_LOCKS_USE_ATOMICS
+		mutex_exit(&lock->mutex);
+#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
 	}
 
-	fprintf(file, "Total number of rw-locks %ld\n", count);
+	fprintf(file, "Total number of rw-locks " ULINTPF "\n", count);
 	mutex_exit(&rw_lock_list_mutex);
 }
 
 /***************************************************************//**
 Prints debug info of an rw-lock. */
-UNIV_INTERN
 void
 rw_lock_print(
 /*==========*/
@@ -963,12 +1279,13 @@ rw_lock_print(
 
 #ifndef INNODB_RW_LOCKS_USE_ATOMICS
 	/* We used to acquire lock->mutex here, but it would cause a
-	recursive call to sync_thread_add_level() if UNIV_SYNC_DEBUG
+	recursive call to sync_thread_add_level() if UNIV_DEBUG
 	is defined.  Since this function is only invoked from
 	sync_thread_levels_g(), let us choose the smaller evil:
 	performing dirty reads instead of causing bogus deadlocks or
 	assertion failures. */
-#endif
+#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
+
 	if (lock->lock_word != X_LOCK_DECR) {
 
 		if (rw_lock_get_waiters(lock)) {
@@ -978,73 +1295,128 @@ rw_lock_print(
 		}
 
 		rw_lock_debug_mutex_enter();
-		info = UT_LIST_GET_FIRST(lock->debug_list);
-		while (info != NULL) {
+
+		for (info = UT_LIST_GET_FIRST(lock->debug_list);
+		     info != NULL;
+		     info = UT_LIST_GET_NEXT(list, info)) {
+
 			rw_lock_debug_print(stderr, info);
-			info = UT_LIST_GET_NEXT(list, info);
 		}
+
 		rw_lock_debug_mutex_exit();
 	}
 }
 
 /*********************************************************************//**
 Prints info of a debug struct. */
-UNIV_INTERN
 void
 rw_lock_debug_print(
 /*================*/
 	FILE*			f,	/*!< in: output stream */
-	rw_lock_debug_t*	info)	/*!< in: debug struct */
+	const rw_lock_debug_t*	info)	/*!< in: debug struct */
 {
-	ulint	rwt;
-
-	rwt	  = info->lock_type;
+	ulint	rwt = info->lock_type;
 
 	fprintf(f, "Locked: thread %lu file %s line %lu  ",
-		(ulong) os_thread_pf(info->thread_id), info->file_name,
-		(ulong) info->line);
-	if (rwt == RW_LOCK_SHARED) {
+		static_cast<ulong>(os_thread_pf(info->thread_id)),
+		sync_basename(info->file_name),
+		static_cast<ulong>(info->line));
+
+	switch (rwt) {
+	case RW_LOCK_S:
 		fputs("S-LOCK", f);
-	} else if (rwt == RW_LOCK_EX) {
+		break;
+	case RW_LOCK_X:
 		fputs("X-LOCK", f);
-	} else if (rwt == RW_LOCK_WAIT_EX) {
+		break;
+	case RW_LOCK_SX:
+		fputs("SX-LOCK", f);
+		break;
+	case RW_LOCK_X_WAIT:
 		fputs("WAIT X-LOCK", f);
-	} else {
+		break;
+	default:
 		ut_error;
 	}
+
 	if (info->pass != 0) {
 		fprintf(f, " pass value %lu", (ulong) info->pass);
 	}
-	putc('\n', f);
+
+	fprintf(f, "\n");
 }
 
 /***************************************************************//**
 Returns the number of currently locked rw-locks. Works only in the debug
 version.
-@return	number of locked rw-locks */
-UNIV_INTERN
+@return number of locked rw-locks */
 ulint
 rw_lock_n_locked(void)
 /*==================*/
 {
-	rw_lock_t*	lock;
-	ulint		count		= 0;
+	ulint		count = 0;
 
 	mutex_enter(&rw_lock_list_mutex);
 
-	lock = UT_LIST_GET_FIRST(rw_lock_list);
-
-	while (lock != NULL) {
+	for (const rw_lock_t* lock = UT_LIST_GET_FIRST(rw_lock_list);
+	     lock != NULL;
+	     lock = UT_LIST_GET_NEXT(list, lock)) {
 
 		if (lock->lock_word != X_LOCK_DECR) {
 			count++;
 		}
-
-		lock = UT_LIST_GET_NEXT(list, lock);
 	}
 
 	mutex_exit(&rw_lock_list_mutex);
 
 	return(count);
 }
-#endif /* UNIV_SYNC_DEBUG */
+
+/** Print where it was locked from
+@return the string representation */
+std::string
+rw_lock_t::locked_from() const
+{
+	/* Note: For X locks it can be locked form multiple places because
+	the same thread can call X lock recursively. */
+
+	std::ostringstream	msg;
+	Infos			infos;
+
+	rw_lock_get_debug_info(this, &infos);
+
+	ulint			i = 0;
+	Infos::const_iterator	end = infos.end();
+
+	for (Infos::const_iterator it = infos.begin(); it != end; ++it, ++i) {
+
+		const rw_lock_debug_t*	info = *it;
+
+		ut_ad(os_thread_eq(info->thread_id, os_thread_get_curr_id()));
+
+		if (i > 0) {
+			msg << ", ";
+		}
+
+		msg << info->file_name << ":" << info->line;
+	}
+
+	return(msg.str());
+
+}
+
+/** Print the rw-lock information.
+@return the string representation */
+std::string
+rw_lock_t::to_string() const
+{
+	std::ostringstream	msg;
+
+	msg << "RW-LATCH: "
+	    << "thread id " << os_thread_pf(os_thread_get_curr_id())
+	    << " addr: " << this
+	    << " Locked from: " << locked_from().c_str();
+
+	return(msg.str());
+}
+#endif /* UNIV_DEBUG */
diff --git a/storage/innobase/sync/sync0sync.cc b/storage/innobase/sync/sync0sync.cc
index 3e3ce353724..9aba075caa6 100644
--- a/storage/innobase/sync/sync0sync.cc
+++ b/storage/innobase/sync/sync0sync.cc
@@ -30,1551 +30,244 @@ Mutex, the basic synchronization primitive
 Created 9/5/1995 Heikki Tuuri
 *******************************************************/
 
-#include "sync0sync.h"
-#ifdef UNIV_NONINL
-#include "sync0sync.ic"
-#include "sync0arr.ic"
-#endif
-
+#include "univ.i"
 #include "sync0rw.h"
-#include "buf0buf.h"
-#include "srv0srv.h"
-#include "buf0types.h"
-#include "os0sync.h" /* for HAVE_ATOMIC_BUILTINS */
-#ifdef UNIV_SYNC_DEBUG
-# include "srv0start.h" /* srv_is_being_started */
-#endif /* UNIV_SYNC_DEBUG */
-#include "ha_prototypes.h"
-#include "my_cpu.h"
-
-#include <vector>
-
-/*
-	REASONS FOR IMPLEMENTING THE SPIN LOCK MUTEX
-	============================================
-
-Semaphore operations in operating systems are slow: Solaris on a 1993 Sparc
-takes 3 microseconds (us) for a lock-unlock pair and Windows NT on a 1995
-Pentium takes 20 microseconds for a lock-unlock pair. Therefore, we have to
-implement our own efficient spin lock mutex. Future operating systems may
-provide efficient spin locks, but we cannot count on that.
-
-Another reason for implementing a spin lock is that on multiprocessor systems
-it can be more efficient for a processor to run a loop waiting for the
-semaphore to be released than to switch to a different thread. A thread switch
-takes 25 us on both platforms mentioned above. See Gray and Reuter's book
-Transaction processing for background.
-
-How long should the spin loop last before suspending the thread? On a
-uniprocessor, spinning does not help at all, because if the thread owning the
-mutex is not executing, it cannot be released. Spinning actually wastes
-resources.
-
-On a multiprocessor, we do not know if the thread owning the mutex is
-executing or not. Thus it would make sense to spin as long as the operation
-guarded by the mutex would typically last assuming that the thread is
-executing. If the mutex is not released by that time, we may assume that the
-thread owning the mutex is not executing and suspend the waiting thread.
-
-A typical operation (where no i/o involved) guarded by a mutex or a read-write
-lock may last 1 - 20 us on the current Pentium platform. The longest
-operations are the binary searches on an index node.
-
-We conclude that the best choice is to set the spin time at 20 us. Then the
-system should work well on a multiprocessor. On a uniprocessor we have to
-make sure that thread swithches due to mutex collisions are not frequent,
-i.e., they do not happen every 100 us or so, because that wastes too much
-resources. If the thread switches are not frequent, the 20 us wasted in spin
-loop is not too much.
-
-Empirical studies on the effect of spin time should be done for different
-platforms.
-
-
-	IMPLEMENTATION OF THE MUTEX
-	===========================
-
-For background, see Curt Schimmel's book on Unix implementation on modern
-architectures. The key points in the implementation are atomicity and
-serialization of memory accesses. The test-and-set instruction (XCHG in
-Pentium) must be atomic. As new processors may have weak memory models, also
-serialization of memory references may be necessary. The successor of Pentium,
-P6, has at least one mode where the memory model is weak. As far as we know,
-in Pentium all memory accesses are serialized in the program order and we do
-not have to worry about the memory model. On other processors there are
-special machine instructions called a fence, memory barrier, or storage
-barrier (STBAR in Sparc), which can be used to serialize the memory accesses
-to happen in program order relative to the fence instruction.
-
-Leslie Lamport has devised a "bakery algorithm" to implement a mutex without
-the atomic test-and-set, but his algorithm should be modified for weak memory
-models. We do not use Lamport's algorithm, because we guess it is slower than
-the atomic test-and-set.
-
-Our mutex implementation works as follows: After that we perform the atomic
-test-and-set instruction on the memory word. If the test returns zero, we
-know we got the lock first. If the test returns not zero, some other thread
-was quicker and got the lock: then we spin in a loop reading the memory word,
-waiting it to become zero. It is wise to just read the word in the loop, not
-perform numerous test-and-set instructions, because they generate memory
-traffic between the cache and the main memory. The read loop can just access
-the cache, saving bus bandwidth.
-
-If we cannot acquire the mutex lock in the specified time, we reserve a cell
-in the wait array, set the waiters byte in the mutex to 1. To avoid a race
-condition, after setting the waiters byte and before suspending the waiting
-thread, we still have to check that the mutex is reserved, because it may
-have happened that the thread which was holding the mutex has just released
-it and did not see the waiters byte set to 1, a case which would lead the
-other thread to an infinite wait.
-
-LEMMA 1: After a thread resets the event of a mutex (or rw_lock), some
-======
-thread will eventually call os_event_set() on that particular event.
-Thus no infinite wait is possible in this case.
-
-Proof:	After making the reservation the thread sets the waiters field in the
-mutex to 1. Then it checks that the mutex is still reserved by some thread,
-or it reserves the mutex for itself. In any case, some thread (which may be
-also some earlier thread, not necessarily the one currently holding the mutex)
-will set the waiters field to 0 in mutex_exit, and then call
-os_event_set() with the mutex as an argument.
-Q.E.D.
-
-LEMMA 2: If an os_event_set() call is made after some thread has called
-======
-the os_event_reset() and before it starts wait on that event, the call
-will not be lost to the second thread. This is true even if there is an
-intervening call to os_event_reset() by another thread.
-Thus no infinite wait is possible in this case.
-
-Proof (non-windows platforms): os_event_reset() returns a monotonically
-increasing value of signal_count. This value is increased at every
-call of os_event_set() If thread A has called os_event_reset() followed
-by thread B calling os_event_set() and then some other thread C calling
-os_event_reset(), the is_set flag of the event will be set to FALSE;
-but now if thread A calls os_event_wait_low() with the signal_count
-value returned from the earlier call of os_event_reset(), it will
-return immediately without waiting.
-Q.E.D.
-
-Proof (windows): If there is a writer thread which is forced to wait for
-the lock, it may be able to set the state of rw_lock to RW_LOCK_WAIT_EX
-The design of rw_lock ensures that there is one and only one thread
-that is able to change the state to RW_LOCK_WAIT_EX and this thread is
-guaranteed to acquire the lock after it is released by the current
-holders and before any other waiter gets the lock.
-On windows this thread waits on a separate event i.e.: wait_ex_event.
-Since only one thread can wait on this event there is no chance
-of this event getting reset before the writer starts wait on it.
-Therefore, this thread is guaranteed to catch the os_set_event()
-signalled unconditionally at the release of the lock.
-Q.E.D. */
-
-/* Number of spin waits on mutexes: for performance monitoring */
-
-/** The number of iterations in the mutex_spin_wait() spin loop.
-Intended for performance monitoring. */
-static ib_counter_t<ib_int64_t, IB_N_SLOTS>	mutex_spin_round_count;
-/** The number of mutex_spin_wait() calls.  Intended for
-performance monitoring. */
-static ib_counter_t<ib_int64_t, IB_N_SLOTS>	mutex_spin_wait_count;
-/** The number of OS waits in mutex_spin_wait().  Intended for
-performance monitoring. */
-static ib_counter_t<ib_int64_t, IB_N_SLOTS>	mutex_os_wait_count;
-/** The number of mutex_exit() calls. Intended for performance
-monitoring. */
-UNIV_INTERN ib_int64_t			mutex_exit_count;
-
-/** This variable is set to TRUE when sync_init is called */
-UNIV_INTERN ibool	sync_initialized	= FALSE;
-
-#ifdef UNIV_SYNC_DEBUG
-/** An acquired mutex or rw-lock and its level in the latching order */
-struct sync_level_t;
-/** Mutexes or rw-locks held by a thread */
-struct sync_thread_t;
-
-/** The latch levels currently owned by threads are stored in this data
-structure; the size of this array is OS_THREAD_MAX_N */
-
-UNIV_INTERN sync_thread_t*	sync_thread_level_arrays;
-
-/** Mutex protecting sync_thread_level_arrays */
-UNIV_INTERN ib_mutex_t		sync_thread_mutex;
-
-# ifdef UNIV_PFS_MUTEX
-UNIV_INTERN mysql_pfs_key_t	sync_thread_mutex_key;
-# endif /* UNIV_PFS_MUTEX */
-#endif /* UNIV_SYNC_DEBUG */
-
-/** Global list of database mutexes (not OS mutexes) created. */
-UNIV_INTERN ut_list_base_node_t  mutex_list;
-
-/** Mutex protecting the mutex_list variable */
-UNIV_INTERN ib_mutex_t mutex_list_mutex;
+#include "sync0sync.h"
 
 #ifdef UNIV_PFS_MUTEX
-UNIV_INTERN mysql_pfs_key_t	mutex_list_mutex_key;
+/* Key to register autoinc_mutex with performance schema */
+mysql_pfs_key_t	autoinc_mutex_key;
+mysql_pfs_key_t	buffer_block_mutex_key;
+mysql_pfs_key_t	buf_pool_mutex_key;
+mysql_pfs_key_t	buf_pool_zip_mutex_key;
+mysql_pfs_key_t	cache_last_read_mutex_key;
+mysql_pfs_key_t	dict_foreign_err_mutex_key;
+mysql_pfs_key_t	dict_sys_mutex_key;
+mysql_pfs_key_t	file_format_max_mutex_key;
+mysql_pfs_key_t	fil_system_mutex_key;
+mysql_pfs_key_t	flush_list_mutex_key;
+mysql_pfs_key_t	fts_bg_threads_mutex_key;
+mysql_pfs_key_t	fts_delete_mutex_key;
+mysql_pfs_key_t	fts_optimize_mutex_key;
+mysql_pfs_key_t	fts_doc_id_mutex_key;
+mysql_pfs_key_t	fts_pll_tokenize_mutex_key;
+mysql_pfs_key_t	hash_table_mutex_key;
+mysql_pfs_key_t	ibuf_bitmap_mutex_key;
+mysql_pfs_key_t	ibuf_mutex_key;
+mysql_pfs_key_t	ibuf_pessimistic_insert_mutex_key;
+mysql_pfs_key_t	log_sys_mutex_key;
+mysql_pfs_key_t	log_sys_write_mutex_key;
+mysql_pfs_key_t	log_cmdq_mutex_key;
+mysql_pfs_key_t	log_flush_order_mutex_key;
+mysql_pfs_key_t	mutex_list_mutex_key;
+mysql_pfs_key_t	recalc_pool_mutex_key;
+mysql_pfs_key_t	page_cleaner_mutex_key;
+mysql_pfs_key_t	purge_sys_pq_mutex_key;
+mysql_pfs_key_t	recv_sys_mutex_key;
+mysql_pfs_key_t	recv_writer_mutex_key;
+mysql_pfs_key_t	redo_rseg_mutex_key;
+mysql_pfs_key_t	noredo_rseg_mutex_key;
+mysql_pfs_key_t page_zip_stat_per_index_mutex_key;
+# ifdef UNIV_DEBUG
+mysql_pfs_key_t	sync_thread_mutex_key;
+mysql_pfs_key_t	rw_lock_debug_mutex_key;
+# endif /* UNIV_DEBUG */
+mysql_pfs_key_t rtr_active_mutex_key;
+mysql_pfs_key_t	rtr_match_mutex_key;
+mysql_pfs_key_t	rtr_path_mutex_key;
+mysql_pfs_key_t rtr_ssn_mutex_key;
+mysql_pfs_key_t	rw_lock_list_mutex_key;
+mysql_pfs_key_t	rw_lock_mutex_key;
+mysql_pfs_key_t	srv_dict_tmpfile_mutex_key;
+mysql_pfs_key_t	srv_innodb_monitor_mutex_key;
+mysql_pfs_key_t	srv_misc_tmpfile_mutex_key;
+mysql_pfs_key_t	srv_monitor_file_mutex_key;
+mysql_pfs_key_t	buf_dblwr_mutex_key;
+mysql_pfs_key_t	trx_undo_mutex_key;
+mysql_pfs_key_t	trx_mutex_key;
+mysql_pfs_key_t	trx_pool_mutex_key;
+mysql_pfs_key_t	trx_pool_manager_mutex_key;
+mysql_pfs_key_t	lock_mutex_key;
+mysql_pfs_key_t	lock_wait_mutex_key;
+mysql_pfs_key_t	trx_sys_mutex_key;
+mysql_pfs_key_t	srv_sys_mutex_key;
+mysql_pfs_key_t	srv_threads_mutex_key;
+mysql_pfs_key_t	event_mutex_key;
+mysql_pfs_key_t	event_manager_mutex_key;
+mysql_pfs_key_t	sync_array_mutex_key;
+mysql_pfs_key_t	thread_mutex_key;
+mysql_pfs_key_t zip_pad_mutex_key;
+mysql_pfs_key_t row_drop_list_mutex_key;
+mysql_pfs_key_t	master_key_id_mutex_key;
 #endif /* UNIV_PFS_MUTEX */
-
-#ifdef UNIV_SYNC_DEBUG
-/** Latching order checks start when this is set TRUE */
-UNIV_INTERN ibool	sync_order_checks_on	= FALSE;
-
-/** Number of slots reserved for each OS thread in the sync level array */
-static const ulint SYNC_THREAD_N_LEVELS = 10000;
-
-/** Array for tracking sync levels per thread. */
-typedef std::vector<sync_level_t> sync_arr_t;
-
-
-/** Mutexes or rw-locks held by a thread */
-struct sync_thread_t{
-	os_thread_id_t	id;		/*!< OS thread id */
-	sync_arr_t*	levels;		/*!< level array for this thread; if
-					this is NULL this slot is unused */
-};
-
-/** An acquired mutex or rw-lock and its level in the latching order */
-struct sync_level_t{
-	void*		latch;		/*!< pointer to a mutex or an
-					rw-lock; NULL means that
-					the slot is empty */
-	ulint		level;		/*!< level of the latch in the
-					latching order. This field is
-					overloaded to serve as a node in a
-					linked list of free nodes too. When
-					latch == NULL then this will contain
-					the ordinal value of the next free
-					element */
-};
-#endif /* UNIV_SYNC_DEBUG */
-
-/******************************************************************//**
-Creates, or rather, initializes a mutex object in a specified memory
-location (which must be appropriately aligned). The mutex is initialized
-in the reset state. Explicit freeing of the mutex with mutex_free is
-necessary only if the memory block containing it is freed. */
-UNIV_INTERN
-void
-mutex_create_func(
-/*==============*/
-	ib_mutex_t*	mutex,		/*!< in: pointer to memory */
-	const char*	cmutex_name,	/*!< in: mutex name */
-#ifdef UNIV_DEBUG
-# ifdef UNIV_SYNC_DEBUG
-	ulint		level,		/*!< in: level */
-# endif /* UNIV_SYNC_DEBUG */
-#endif /* UNIV_DEBUG */
-	const char*	cfile_name,	/*!< in: file name where created */
-	ulint		cline)		/*!< in: file line where created */
-{
-#if defined(HAVE_ATOMIC_BUILTINS)
-	mutex_reset_lock_word(mutex);
-#else
-	os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &mutex->os_fast_mutex);
-	mutex->lock_word = 0;
-#endif
-	mutex->event = os_event_create();
-	mutex_set_waiters(mutex, 0);
-#ifdef UNIV_DEBUG
-	mutex->magic_n = MUTEX_MAGIC_N;
-#endif /* UNIV_DEBUG */
-
-	mutex->line = 0;
-	mutex->file_name = "not yet reserved";
-#ifdef UNIV_SYNC_DEBUG
-	mutex->level = level;
-#endif /* UNIV_SYNC_DEBUG */
-	mutex->cfile_name = cfile_name;
-	mutex->cline = cline;
-	mutex->count_os_wait = 0;
-        mutex->cmutex_name = cmutex_name;
-
-	/* Check that lock_word is aligned; this is important on Intel */
-	ut_ad(((ulint)(&(mutex->lock_word))) % 4 == 0);
-
-	/* NOTE! The very first mutexes are not put to the mutex list */
-
-	if (mutex == &mutex_list_mutex
-#ifdef UNIV_SYNC_DEBUG
-	    || mutex == &sync_thread_mutex
-#endif /* UNIV_SYNC_DEBUG */
-	    ) {
-
-		return;
-	}
-
-	mutex_enter(&mutex_list_mutex);
-
-	ut_ad(UT_LIST_GET_LEN(mutex_list) == 0
-	      || UT_LIST_GET_FIRST(mutex_list)->magic_n == MUTEX_MAGIC_N);
-
-	UT_LIST_ADD_FIRST(list, mutex_list, mutex);
-
-	mutex_exit(&mutex_list_mutex);
-}
-
-/******************************************************************//**
-NOTE! Use the corresponding macro mutex_free(), not directly this function!
-Calling this function is obligatory only if the memory buffer containing
-the mutex is freed. Removes a mutex object from the mutex list. The mutex
-is checked to be in the reset state. */
-UNIV_INTERN
-void
-mutex_free_func(
-/*============*/
-	ib_mutex_t*	mutex)	/*!< in: mutex */
-{
-	ut_ad(mutex_validate(mutex));
-	ut_a(mutex_get_lock_word(mutex) == 0);
-	ut_a(mutex_get_waiters(mutex) == 0);
-
-#ifdef UNIV_MEM_DEBUG
-	if (mutex == &mem_hash_mutex) {
-		ut_ad(UT_LIST_GET_LEN(mutex_list) == 1);
-		ut_ad(UT_LIST_GET_FIRST(mutex_list) == &mem_hash_mutex);
-		UT_LIST_REMOVE(list, mutex_list, mutex);
-		goto func_exit;
-	}
-#endif /* UNIV_MEM_DEBUG */
-
-	if (mutex != &mutex_list_mutex
-#ifdef UNIV_SYNC_DEBUG
-	    && mutex != &sync_thread_mutex
-#endif /* UNIV_SYNC_DEBUG */
-	    ) {
-
-		mutex_enter(&mutex_list_mutex);
-
-		ut_ad(!UT_LIST_GET_PREV(list, mutex)
-		      || UT_LIST_GET_PREV(list, mutex)->magic_n
-		      == MUTEX_MAGIC_N);
-		ut_ad(!UT_LIST_GET_NEXT(list, mutex)
-		      || UT_LIST_GET_NEXT(list, mutex)->magic_n
-		      == MUTEX_MAGIC_N);
-
-		UT_LIST_REMOVE(list, mutex_list, mutex);
-
-		mutex_exit(&mutex_list_mutex);
-	}
-
-	os_event_free(mutex->event);
-#ifdef UNIV_MEM_DEBUG
-func_exit:
-#endif /* UNIV_MEM_DEBUG */
-#if !defined(HAVE_ATOMIC_BUILTINS)
-	os_fast_mutex_free(&(mutex->os_fast_mutex));
-#endif
-	/* If we free the mutex protecting the mutex list (freeing is
-	not necessary), we have to reset the magic number AFTER removing
-	it from the list. */
-#ifdef UNIV_DEBUG
-	mutex->magic_n = 0;
-#endif /* UNIV_DEBUG */
-	return;
-}
-
-/********************************************************************//**
-NOTE! Use the corresponding macro in the header file, not this function
-directly. Tries to lock the mutex for the current thread. If the lock is not
-acquired immediately, returns with return value 1.
-@return	0 if succeed, 1 if not */
-UNIV_INTERN
-ulint
-mutex_enter_nowait_func(
-/*====================*/
-	ib_mutex_t*	mutex,		/*!< in: pointer to mutex */
-	const char*	file_name MY_ATTRIBUTE((unused)),
-					/*!< in: file name where mutex
-					requested */
-	ulint		line MY_ATTRIBUTE((unused)))
-					/*!< in: line where requested */
-{
-	ut_ad(mutex_validate(mutex));
-
-	if (!ib_mutex_test_and_set(mutex)) {
-
-		mutex->thread_id = os_thread_get_curr_id();
-#ifdef UNIV_SYNC_DEBUG
-		mutex_set_debug_info(mutex, file_name, line);
-#else
-		if (srv_instrument_semaphores) {
-			mutex->file_name = file_name;
-			mutex->line = line;
-		}
-#endif
-		return(0);	/* Succeeded! */
-	}
-
-	return(1);
-}
-
-#ifdef UNIV_DEBUG
-/******************************************************************//**
-Checks that the mutex has been initialized.
-@return	TRUE */
-UNIV_INTERN
-ibool
-mutex_validate(
-/*===========*/
-	const ib_mutex_t*	mutex)	/*!< in: mutex */
-{
-	ut_a(mutex);
-
-	if (mutex->magic_n != MUTEX_MAGIC_N) {
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Mutex %p not initialized file %s line %lu.",
-			mutex, mutex->cfile_name, mutex->cline);
-	}
-	ut_ad(mutex->magic_n == MUTEX_MAGIC_N);
-
-	return(TRUE);
-}
-
-/******************************************************************//**
-Checks that the current thread owns the mutex. Works only in the debug
-version.
-@return	TRUE if owns */
-UNIV_INTERN
-ibool
-mutex_own(
-/*======*/
-	const ib_mutex_t*	mutex)	/*!< in: mutex */
-{
-	ut_ad(mutex_validate(mutex));
-
-	return(mutex_get_lock_word(mutex) == 1
-	       && os_thread_eq(mutex->thread_id, os_thread_get_curr_id()));
-}
-#endif /* UNIV_DEBUG */
-
-/******************************************************************//**
-Sets the waiters field in a mutex. */
-UNIV_INTERN
-void
-mutex_set_waiters(
-/*==============*/
-	ib_mutex_t*	mutex,	/*!< in: mutex */
-	ulint		n)	/*!< in: value to set */
-{
-	volatile ulint*	ptr;		/* declared volatile to ensure that
-					the value is stored to memory */
-	ut_ad(mutex);
-
-	ptr = &(mutex->waiters);
-
-	*ptr = n;		/* Here we assume that the write of a single
-				word in memory is atomic */
-}
-
-/******************************************************************//**
-Reserves a mutex for the current thread. If the mutex is reserved, the
-function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting
-for the mutex before suspending the thread. */
-UNIV_INTERN
-void
-mutex_spin_wait(
-/*============*/
-	ib_mutex_t*	mutex,		/*!< in: pointer to mutex */
-	const char*	file_name,	/*!< in: file name where mutex
-					requested */
-	ulint		line)		/*!< in: line where requested */
-{
-	ulint		i;		/* spin round count */
-	ulint		index;		/* index of the reserved wait cell */
-	sync_array_t*	sync_arr;
-	size_t		counter_index;
-
-	counter_index = (size_t) os_thread_get_curr_id();
-
-	ut_ad(mutex);
-
-	/* This update is not thread safe, but we don't mind if the count
-	isn't exact. Moved out of ifdef that follows because we are willing
-	to sacrifice the cost of counting this as the data is valuable.
-	Count the number of calls to mutex_spin_wait. */
-	mutex_spin_wait_count.add(counter_index, 1);
-
-mutex_loop:
-
-	i = 0;
-
-	/* Spin waiting for the lock word to become zero. Note that we do
-	not have to assume that the read access to the lock word is atomic,
-	as the actual locking is always committed with atomic test-and-set.
-	In reality, however, all processors probably have an atomic read of
-	a memory word. */
-
-spin_loop:
-
-        HMT_low();
-	os_rmb;
-	while (mutex_get_lock_word(mutex) != 0 && i < SYNC_SPIN_ROUNDS) {
-		if (srv_spin_wait_delay) {
-			ut_delay(ut_rnd_interval(0, srv_spin_wait_delay));
-		}
-		i++;
-	}
-        HMT_medium();
-
-	if (i >= SYNC_SPIN_ROUNDS) {
-		os_thread_yield();
-	}
-
-	mutex_spin_round_count.add(counter_index, i);
-
-	if (ib_mutex_test_and_set(mutex) == 0) {
-		/* Succeeded! */
-
-		mutex->thread_id = os_thread_get_curr_id();
-#ifdef UNIV_SYNC_DEBUG
-		mutex_set_debug_info(mutex, file_name, line);
-#endif
-		if (srv_instrument_semaphores) {
-			mutex->file_name = file_name;
-			mutex->line = line;
-		}
-
-		return;
-	}
-
-	/* We may end up with a situation where lock_word is 0 but the OS
-	fast mutex is still reserved. On FreeBSD the OS does not seem to
-	schedule a thread which is constantly calling pthread_mutex_trylock
-	(in ib_mutex_test_and_set implementation). Then we could end up
-	spinning here indefinitely. The following 'i++' stops this infinite
-	spin. */
-
-	i++;
-
-	if (i < SYNC_SPIN_ROUNDS) {
-		goto spin_loop;
-	}
-
-	sync_arr = sync_array_get_and_reserve_cell(mutex, SYNC_MUTEX,
-						   file_name, line, &index);
-
-	/* The memory order of the array reservation and the change in the
-	waiters field is important: when we suspend a thread, we first
-	reserve the cell and then set waiters field to 1. When threads are
-	released in mutex_exit, the waiters field is first set to zero and
-	then the event is set to the signaled state. */
-
-	mutex_set_waiters(mutex, 1);
-
-	/* Make sure waiters store won't pass over mutex_test_and_set */
-#ifdef __powerpc__
-	os_mb;
-#endif
-
-	/* Try to reserve still a few times */
-	for (i = 0; i < 4; i++) {
-		if (ib_mutex_test_and_set(mutex) == 0) {
-			/* Succeeded! Free the reserved wait cell */
-
-			sync_array_free_cell(sync_arr, index);
-
-			mutex->thread_id = os_thread_get_curr_id();
-#ifdef UNIV_SYNC_DEBUG
-			mutex_set_debug_info(mutex, file_name, line);
-#endif
-			if (srv_instrument_semaphores) {
-				mutex->file_name = file_name;
-				mutex->line = line;
-			}
-
-			return;
-
-			/* Note that in this case we leave the waiters field
-			set to 1. We cannot reset it to zero, as we do not
-			know if there are other waiters. */
-		}
-	}
-
-	/* Now we know that there has been some thread holding the mutex
-	after the change in the wait array and the waiters field was made.
-	Now there is no risk of infinite wait on the event. */
-
-	mutex_os_wait_count.add(counter_index, 1);
-
-	mutex->count_os_wait++;
-
-	sync_array_wait_event(sync_arr, index);
-
-	goto mutex_loop;
-}
-
-/******************************************************************//**
-Releases the threads waiting in the primary wait array for this mutex. */
-UNIV_INTERN
-void
-mutex_signal_object(
-/*================*/
-	ib_mutex_t*	mutex)	/*!< in: mutex */
-{
-	mutex_set_waiters(mutex, 0);
-
-	/* The memory order of resetting the waiters field and
-	signaling the object is important. See LEMMA 1 above. */
-	os_event_set(mutex->event);
-	sync_array_object_signalled();
-}
-
-#ifdef UNIV_SYNC_DEBUG
-/******************************************************************//**
-Sets the debug information for a reserved mutex. */
-UNIV_INTERN
-void
-mutex_set_debug_info(
-/*=================*/
-	ib_mutex_t*	mutex,		/*!< in: mutex */
-	const char*	file_name,	/*!< in: file where requested */
-	ulint		line)		/*!< in: line where requested */
-{
-	ut_ad(mutex);
-	ut_ad(file_name);
-
-	sync_thread_add_level(mutex, mutex->level, FALSE);
-
-	mutex->file_name = file_name;
-	mutex->line	 = line;
-}
-
-/******************************************************************//**
-Gets the debug information for a reserved mutex. */
-UNIV_INTERN
-void
-mutex_get_debug_info(
-/*=================*/
-	ib_mutex_t*	mutex,		/*!< in: mutex */
-	const char**	file_name,	/*!< out: file where requested */
-	ulint*		line,		/*!< out: line where requested */
-	os_thread_id_t* thread_id)	/*!< out: id of the thread which owns
-					the mutex */
-{
-	ut_ad(mutex);
-
-	*file_name = mutex->file_name;
-	*line	   = mutex->line;
-	*thread_id = mutex->thread_id;
-}
-
-/******************************************************************//**
-Prints debug info of currently reserved mutexes. */
+#ifdef UNIV_PFS_RWLOCK
+mysql_pfs_key_t	btr_search_latch_key;
+mysql_pfs_key_t	buf_block_lock_key;
+# ifdef UNIV_DEBUG
+mysql_pfs_key_t	buf_block_debug_latch_key;
+# endif /* UNIV_DEBUG */
+mysql_pfs_key_t	checkpoint_lock_key;
+mysql_pfs_key_t	dict_operation_lock_key;
+mysql_pfs_key_t	dict_table_stats_key;
+mysql_pfs_key_t	hash_table_locks_key;
+mysql_pfs_key_t	index_tree_rw_lock_key;
+mysql_pfs_key_t	index_online_log_key;
+mysql_pfs_key_t	fil_space_latch_key;
+mysql_pfs_key_t	fts_cache_rw_lock_key;
+mysql_pfs_key_t	fts_cache_init_rw_lock_key;
+mysql_pfs_key_t trx_i_s_cache_lock_key;
+mysql_pfs_key_t	trx_purge_latch_key;
+#endif /* UNIV_PFS_RWLOCK */
+
+/** For monitoring active mutexes */
+MutexMonitor*	mutex_monitor;
+
+/**
+Prints wait info of the sync system.
+@param file - where to print */
 static
 void
-mutex_list_print_info(
-/*==================*/
-	FILE*	file)		/*!< in: file where to print */
-{
-	ib_mutex_t*	mutex;
-	const char*	file_name;
-	ulint		line;
-	os_thread_id_t	thread_id;
-	ulint		count		= 0;
-
-	fputs("----------\n"
-	      "MUTEX INFO\n"
-	      "----------\n", file);
-
-	mutex_enter(&mutex_list_mutex);
-
-	mutex = UT_LIST_GET_FIRST(mutex_list);
-
-	while (mutex != NULL) {
-		count++;
-
-		if (mutex_get_lock_word(mutex) != 0) {
-			mutex_get_debug_info(mutex, &file_name, &line,
-					     &thread_id);
-			fprintf(file,
-				"Locked mutex: addr %p thread %ld"
-				" file %s line %ld\n",
-				(void*) mutex, os_thread_pf(thread_id),
-				file_name, line);
-		}
-
-		mutex = UT_LIST_GET_NEXT(list, mutex);
-	}
-
-	fprintf(file, "Total number of mutexes %ld\n", count);
-
-	mutex_exit(&mutex_list_mutex);
-}
-
-/******************************************************************//**
-Counts currently reserved mutexes. Works only in the debug version.
-@return	number of reserved mutexes */
-UNIV_INTERN
-ulint
-mutex_n_reserved(void)
-/*==================*/
-{
-	ib_mutex_t*	mutex;
-	ulint		count	= 0;
-
-	mutex_enter(&mutex_list_mutex);
-
-	for (mutex = UT_LIST_GET_FIRST(mutex_list);
-	     mutex != NULL;
-	     mutex = UT_LIST_GET_NEXT(list, mutex)) {
-
-		if (mutex_get_lock_word(mutex) != 0) {
-
-			count++;
-		}
-	}
-
-	mutex_exit(&mutex_list_mutex);
-
-	ut_a(count >= 1);
-
-	/* Subtract one, because this function itself was holding
-	one mutex (mutex_list_mutex) */
-
-	return(count - 1);
-}
-
-/******************************************************************//**
-Returns TRUE if no mutex or rw-lock is currently locked. Works only in
-the debug version.
-@return	TRUE if no mutexes and rw-locks reserved */
-UNIV_INTERN
-ibool
-sync_all_freed(void)
-/*================*/
-{
-	return(mutex_n_reserved() + rw_lock_n_locked() == 0);
-}
-
-/******************************************************************//**
-Looks for the thread slot for the calling thread.
-@return	pointer to thread slot, NULL if not found */
-static
-sync_thread_t*
-sync_thread_level_arrays_find_slot(void)
-/*====================================*/
-
-{
-	ulint		i;
-	os_thread_id_t	id;
-
-	id = os_thread_get_curr_id();
-
-	for (i = 0; i < OS_THREAD_MAX_N; i++) {
-		sync_thread_t*	slot;
-
-		slot = &sync_thread_level_arrays[i];
-
-		if (slot->levels && os_thread_eq(slot->id, id)) {
-
-			return(slot);
-		}
-	}
-
-	return(NULL);
-}
-
-/******************************************************************//**
-Looks for an unused thread slot.
-@return	pointer to thread slot */
-static
-sync_thread_t*
-sync_thread_level_arrays_find_free(void)
-/*====================================*/
-
-{
-	ulint		i;
-
-	for (i = 0; i < OS_THREAD_MAX_N; i++) {
-		sync_thread_t*	slot;
-
-		slot = &sync_thread_level_arrays[i];
-
-		if (slot->levels == NULL) {
-
-			return(slot);
-		}
-	}
-
-	return(NULL);
-}
-
-/******************************************************************//**
-Print warning. */
-static
-void
-sync_print_warning(
-/*===============*/
-	const sync_level_t*	slot)	/*!< in: slot for which to
-					print warning */
-{
-	ib_mutex_t*	mutex;
-
-	mutex = static_cast<ib_mutex_t*>(slot->latch);
-
-	if (mutex->magic_n == MUTEX_MAGIC_N) {
-		fprintf(stderr,
-			"Mutex created at %s %lu\n",
-			innobase_basename(mutex->cfile_name),
-			(ulong) mutex->cline);
-
-		if (mutex_get_lock_word(mutex) != 0) {
-			ulint		line;
-			const char*	file_name;
-			os_thread_id_t	thread_id;
-
-			mutex_get_debug_info(
-				mutex, &file_name, &line, &thread_id);
-
-			fprintf(stderr,
-				"InnoDB: Locked mutex:"
-				" addr %p thread %ld file %s line %ld\n",
-				(void*) mutex, os_thread_pf(thread_id),
-				file_name, (ulong) line);
-		} else {
-			fputs("Not locked\n", stderr);
-		}
-	} else {
-		rw_lock_t*	lock;
-
-		lock = static_cast<rw_lock_t*>(slot->latch);
-
-		rw_lock_print(lock);
-	}
-}
-
-/******************************************************************//**
-Checks if all the level values stored in the level array are greater than
-the given limit.
-@return	TRUE if all greater */
-static
-ibool
-sync_thread_levels_g(
-/*=================*/
-	sync_arr_t*	arr,	/*!< in: pointer to level array for an OS
-				thread */
-	ulint		limit,	/*!< in: level limit */
-	ulint		warn)	/*!< in: TRUE=display a diagnostic message */
-{
-	ulint		i;
-
-	for (i = 0; i < arr->size(); i++) {
-		const sync_level_t*	slot;
-
-		slot = (const sync_level_t*)&(arr->at(i));
-
-		if (slot->latch != NULL && slot->level <= limit) {
-			if (warn) {
-				fprintf(stderr,
-					"InnoDB: sync levels should be"
-					" > %lu but a level is %lu\n",
-					(ulong) limit, (ulong) slot->level);
-
-				sync_print_warning(slot);
-			}
-
-			return(FALSE);
-		}
-	}
-
-	return(TRUE);
-}
-
-/******************************************************************//**
-Checks if the level value is stored in the level array.
-@return	slot if found or NULL */
-static
-const sync_level_t*
-sync_thread_levels_contain(
-/*=======================*/
-	sync_arr_t*	arr,	/*!< in: pointer to level array for an OS
-				thread */
-	ulint		level)	/*!< in: level */
-{
-	ulint		i;
-
-	for (i = 0; i < arr->size(); i++) {
-		const sync_level_t*	slot;
-
-		slot = (const sync_level_t*)&(arr->at(i));
-
-		if (slot->latch != NULL && slot->level == level) {
-
-			return(slot);
-		}
-	}
-
-	return(NULL);
-}
-
-/******************************************************************//**
-Checks if the level array for the current thread contains a
-mutex or rw-latch at the specified level.
-@return	a matching latch, or NULL if not found */
-UNIV_INTERN
-void*
-sync_thread_levels_contains(
-/*========================*/
-	ulint	level)			/*!< in: latching order level
-					(SYNC_DICT, ...)*/
-{
-	ulint		i;
-	sync_arr_t*	arr;
-	sync_thread_t*	thread_slot;
-
-	if (!sync_order_checks_on) {
-
-		return(NULL);
-	}
-
-	mutex_enter(&sync_thread_mutex);
-
-	thread_slot = sync_thread_level_arrays_find_slot();
-
-	if (thread_slot == NULL) {
-
-		mutex_exit(&sync_thread_mutex);
-
-		return(NULL);
-	}
-
-	arr = thread_slot->levels;
-
-	for (i = 0; i < arr->size(); i++) {
-		sync_level_t*	slot;
-
-		slot = (sync_level_t*)&(arr->at(i));
-
-		if (slot->latch != NULL && slot->level == level) {
-
-			mutex_exit(&sync_thread_mutex);
-			return(slot->latch);
-		}
-	}
-
-	mutex_exit(&sync_thread_mutex);
-
-	return(NULL);
-}
-
-/******************************************************************//**
-Checks that the level array for the current thread is empty.
-@return	a latch, or NULL if empty except the exceptions specified below */
-UNIV_INTERN
-void*
-sync_thread_levels_nonempty_gen(
-/*============================*/
-	ibool	dict_mutex_allowed)	/*!< in: TRUE if dictionary mutex is
-					allowed to be owned by the thread */
-{
-	ulint		i;
-	sync_arr_t*	arr;
-	sync_thread_t*	thread_slot;
-
-	if (!sync_order_checks_on) {
-
-		return(NULL);
-	}
-
-	mutex_enter(&sync_thread_mutex);
-
-	thread_slot = sync_thread_level_arrays_find_slot();
-
-	if (thread_slot == NULL) {
-
-		mutex_exit(&sync_thread_mutex);
-
-		return(NULL);
-	}
-
-	arr = thread_slot->levels;
-
-	for (i = 0; i < arr->size(); ++i) {
-		const sync_level_t*	slot;
-
-		slot = (const sync_level_t*)&(arr->at(i));
-
-		if (slot->latch != NULL
-		    && (!dict_mutex_allowed
-			|| (slot->level != SYNC_DICT
-			    && slot->level != SYNC_DICT_OPERATION
-			    && slot->level != SYNC_FTS_CACHE))) {
-
-			mutex_exit(&sync_thread_mutex);
-			ut_error;
-
-			return(slot->latch);
-		}
-	}
-
-	mutex_exit(&sync_thread_mutex);
-
-	return(NULL);
-}
-
-/******************************************************************//**
-Checks if the level array for the current thread is empty,
-except for the btr_search_latch.
-@return	a latch, or NULL if empty except the exceptions specified below */
-UNIV_INTERN
-void*
-sync_thread_levels_nonempty_trx(
-/*============================*/
-	ibool	has_search_latch)
-				/*!< in: TRUE if and only if the thread
-				is supposed to hold btr_search_latch */
-{
-	ulint		i;
-	sync_arr_t*	arr;
-	sync_thread_t*	thread_slot;
-
-	if (!sync_order_checks_on) {
-
-		return(NULL);
-	}
-
-	ut_a(!has_search_latch
-	     || sync_thread_levels_contains(SYNC_SEARCH_SYS));
-
-	mutex_enter(&sync_thread_mutex);
-
-	thread_slot = sync_thread_level_arrays_find_slot();
-
-	if (thread_slot == NULL) {
-
-		mutex_exit(&sync_thread_mutex);
-
-		return(NULL);
-	}
-
-	arr = thread_slot->levels;
-
-	for (i = 0; i < arr->size(); ++i) {
-		const sync_level_t*	slot;
-
-		slot = (const sync_level_t*)&(arr->at(i));
-
-		if (slot->latch != NULL
-		    && (!has_search_latch
-			|| slot->level != SYNC_SEARCH_SYS)) {
-
-			mutex_exit(&sync_thread_mutex);
-			ut_error;
-
-			return(slot->latch);
-		}
-	}
-
-	mutex_exit(&sync_thread_mutex);
-
-	return(NULL);
-}
-
-/******************************************************************//**
-Adds a latch and its level in the thread level array. Allocates the memory
-for the array if called first time for this OS thread. Makes the checks
-against other latch levels stored in the array for this thread. */
-UNIV_INTERN
-void
-sync_thread_add_level(
-/*==================*/
-	void*	latch,	/*!< in: pointer to a mutex or an rw-lock */
-	ulint	level,	/*!< in: level in the latching order; if
-			SYNC_LEVEL_VARYING, nothing is done */
-	ibool	relock)	/*!< in: TRUE if re-entering an x-lock */
-{
-	sync_arr_t*	array;
-	sync_thread_t*	thread_slot;
-	sync_level_t	sync_level;
-
-	if (!sync_order_checks_on) {
-
-		return;
-	}
-
-	if ((latch == (void*) &sync_thread_mutex)
-	    || (latch == (void*) &mutex_list_mutex)
-	    || (latch == (void*) &rw_lock_debug_mutex)
-	    || (latch == (void*) &rw_lock_list_mutex)) {
-
-		return;
-	}
-
-	if (level == SYNC_LEVEL_VARYING) {
-
-		return;
-	}
-
-	mutex_enter(&sync_thread_mutex);
-
-	thread_slot = sync_thread_level_arrays_find_slot();
-
-	if (thread_slot == NULL) {
-
-		/* We have to allocate the level array for a new thread */
-		array = new sync_arr_t();
-		ut_a(array != NULL);
-		thread_slot = sync_thread_level_arrays_find_free();
-		thread_slot->levels = array;
-		thread_slot->id = os_thread_get_curr_id();
-	}
-
-	array = thread_slot->levels;
-
-	if (relock) {
-		goto levels_ok;
-	}
-
-	/* NOTE that there is a problem with _NODE and _LEAF levels: if the
-	B-tree height changes, then a leaf can change to an internal node
-	or the other way around. We do not know at present if this can cause
-	unnecessary assertion failures below. */
-
-	switch (level) {
-	case SYNC_NO_ORDER_CHECK:
-	case SYNC_EXTERN_STORAGE:
-	case SYNC_TREE_NODE_FROM_HASH:
-		/* Do no order checking */
-		break;
-	case SYNC_TRX_SYS_HEADER:
-		if (srv_is_being_started) {
-			/* This is violated during trx_sys_create_rsegs()
-			when creating additional rollback segments when
-			upgrading in innobase_start_or_create_for_mysql(). */
-			break;
-		}
-	case SYNC_MEM_POOL:
-	case SYNC_MEM_HASH:
-	case SYNC_RECV:
-	case SYNC_FTS_BG_THREADS:
-	case SYNC_WORK_QUEUE:
-	case SYNC_FTS_TOKENIZE:
-	case SYNC_FTS_OPTIMIZE:
-	case SYNC_FTS_CACHE:
-	case SYNC_FTS_CACHE_INIT:
-	case SYNC_LOG:
-	case SYNC_LOG_FLUSH_ORDER:
-	case SYNC_ANY_LATCH:
-	case SYNC_FILE_FORMAT_TAG:
-	case SYNC_DOUBLEWRITE:
-	case SYNC_SEARCH_SYS:
-	case SYNC_THREADS:
-	case SYNC_LOCK_SYS:
-	case SYNC_LOCK_WAIT_SYS:
-	case SYNC_TRX_SYS:
-	case SYNC_IBUF_BITMAP_MUTEX:
-	case SYNC_RSEG:
-	case SYNC_TRX_UNDO:
-	case SYNC_PURGE_LATCH:
-	case SYNC_PURGE_QUEUE:
-	case SYNC_DICT_AUTOINC_MUTEX:
-	case SYNC_DICT_OPERATION:
-	case SYNC_DICT_HEADER:
-	case SYNC_TRX_I_S_RWLOCK:
-	case SYNC_TRX_I_S_LAST_READ:
-	case SYNC_IBUF_MUTEX:
-	case SYNC_INDEX_ONLINE_LOG:
-	case SYNC_STATS_AUTO_RECALC:
-	case SYNC_STATS_DEFRAG:
-		if (!sync_thread_levels_g(array, level, TRUE)) {
-			fprintf(stderr,
-				"InnoDB: sync_thread_levels_g(array, %lu)"
-				" does not hold!\n", level);
-			ut_error;
-		}
-		break;
-	case SYNC_TRX:
-		/* Either the thread must own the lock_sys->mutex, or
-		it is allowed to own only ONE trx->mutex. */
-		if (!sync_thread_levels_g(array, level, FALSE)) {
-			ut_a(sync_thread_levels_g(array, level - 1, TRUE));
-			ut_a(sync_thread_levels_contain(array, SYNC_LOCK_SYS));
-		}
-		break;
-	case SYNC_BUF_FLUSH_LIST:
-	case SYNC_BUF_POOL:
-		/* We can have multiple mutexes of this type therefore we
-		can only check whether the greater than condition holds. */
-		if (!sync_thread_levels_g(array, level-1, TRUE)) {
-			fprintf(stderr,
-				"InnoDB: sync_thread_levels_g(array, %lu)"
-				" does not hold!\n", level-1);
-			ut_error;
-		}
-		break;
-
-
-	case SYNC_BUF_PAGE_HASH:
-		/* Multiple page_hash locks are only allowed during
-		buf_validate and that is where buf_pool mutex is already
-		held. */
-		/* Fall through */
-
-	case SYNC_BUF_BLOCK:
-		/* Either the thread must own the buffer pool mutex
-		(buf_pool->mutex), or it is allowed to latch only ONE
-		buffer block (block->mutex or buf_pool->zip_mutex). */
-		if (!sync_thread_levels_g(array, level, FALSE)) {
-			ut_a(sync_thread_levels_g(array, level - 1, TRUE));
-			ut_a(sync_thread_levels_contain(array, SYNC_BUF_POOL));
-		}
-		break;
-	case SYNC_REC_LOCK:
-		if (sync_thread_levels_contain(array, SYNC_LOCK_SYS)) {
-			ut_a(sync_thread_levels_g(array, SYNC_REC_LOCK - 1,
-						  TRUE));
-		} else {
-			ut_a(sync_thread_levels_g(array, SYNC_REC_LOCK, TRUE));
-		}
-		break;
-	case SYNC_IBUF_BITMAP:
-		/* Either the thread must own the master mutex to all
-		the bitmap pages, or it is allowed to latch only ONE
-		bitmap page. */
-		if (sync_thread_levels_contain(array,
-					       SYNC_IBUF_BITMAP_MUTEX)) {
-			ut_a(sync_thread_levels_g(array, SYNC_IBUF_BITMAP - 1,
-						  TRUE));
-		} else {
-			/* This is violated during trx_sys_create_rsegs()
-			when creating additional rollback segments when
-			upgrading in innobase_start_or_create_for_mysql(). */
-			ut_a(srv_is_being_started
-			     || sync_thread_levels_g(array, SYNC_IBUF_BITMAP,
-						     TRUE));
-		}
-		break;
-	case SYNC_FSP_PAGE:
-		ut_a(sync_thread_levels_contain(array, SYNC_FSP));
-		break;
-	case SYNC_FSP:
-		ut_a(sync_thread_levels_contain(array, SYNC_FSP)
-		     || sync_thread_levels_g(array, SYNC_FSP, TRUE));
-		break;
-	case SYNC_TRX_UNDO_PAGE:
-		/* Purge is allowed to read in as many UNDO pages as it likes,
-		there was a bogus rule here earlier that forced the caller to
-		acquire the purge_sys_t::mutex. The purge mutex did not really
-		protect anything because it was only ever acquired by the
-		single purge thread. The purge thread can read the UNDO pages
-		without any covering mutex. */
-
-		ut_a(sync_thread_levels_contain(array, SYNC_TRX_UNDO)
-		     || sync_thread_levels_contain(array, SYNC_RSEG)
-		     || sync_thread_levels_g(array, level - 1, TRUE));
-		break;
-	case SYNC_RSEG_HEADER:
-		ut_a(sync_thread_levels_contain(array, SYNC_RSEG));
-		break;
-	case SYNC_RSEG_HEADER_NEW:
-		ut_a(sync_thread_levels_contain(array, SYNC_FSP_PAGE));
-		break;
-	case SYNC_TREE_NODE:
-		ut_a(sync_thread_levels_contain(array, SYNC_INDEX_TREE)
-		     || sync_thread_levels_contain(array, SYNC_DICT_OPERATION)
-		     || sync_thread_levels_g(array, SYNC_TREE_NODE - 1, TRUE));
-		break;
-	case SYNC_TREE_NODE_NEW:
-		ut_a(sync_thread_levels_contain(array, SYNC_FSP_PAGE));
-		break;
-	case SYNC_INDEX_TREE:
-		ut_a(sync_thread_levels_g(array, SYNC_TREE_NODE - 1, TRUE));
-		break;
-	case SYNC_IBUF_TREE_NODE:
-		ut_a(sync_thread_levels_contain(array, SYNC_IBUF_INDEX_TREE)
-		     || sync_thread_levels_g(array, SYNC_IBUF_TREE_NODE - 1,
-					     TRUE));
-		break;
-	case SYNC_IBUF_TREE_NODE_NEW:
-		/* ibuf_add_free_page() allocates new pages for the
-		change buffer while only holding the tablespace
-		x-latch. These pre-allocated new pages may only be
-		taken in use while holding ibuf_mutex, in
-		btr_page_alloc_for_ibuf(). */
-		ut_a(sync_thread_levels_contain(array, SYNC_IBUF_MUTEX)
-		     || sync_thread_levels_contain(array, SYNC_FSP));
-		break;
-	case SYNC_IBUF_INDEX_TREE:
-		if (sync_thread_levels_contain(array, SYNC_FSP)) {
-			ut_a(sync_thread_levels_g(array, level - 1, TRUE));
-		} else {
-			ut_a(sync_thread_levels_g(
-				     array, SYNC_IBUF_TREE_NODE - 1, TRUE));
-		}
-		break;
-	case SYNC_IBUF_PESS_INSERT_MUTEX:
-		ut_a(sync_thread_levels_g(array, SYNC_FSP - 1, TRUE));
-		ut_a(!sync_thread_levels_contain(array, SYNC_IBUF_MUTEX));
-		break;
-	case SYNC_IBUF_HEADER:
-		ut_a(sync_thread_levels_g(array, SYNC_FSP - 1, TRUE));
-		ut_a(!sync_thread_levels_contain(array, SYNC_IBUF_MUTEX));
-		ut_a(!sync_thread_levels_contain(array,
-						 SYNC_IBUF_PESS_INSERT_MUTEX));
-		break;
-	case SYNC_DICT:
-#ifdef UNIV_DEBUG
-		ut_a(buf_debug_prints
-		     || sync_thread_levels_g(array, SYNC_DICT, TRUE));
-#else /* UNIV_DEBUG */
-		ut_a(sync_thread_levels_g(array, SYNC_DICT, TRUE));
-#endif /* UNIV_DEBUG */
-		break;
-	default:
-		ut_error;
-	}
-
-levels_ok:
-
-	sync_level.latch = latch;
-	sync_level.level = level;
-	array->push_back(sync_level);
-
-	mutex_exit(&sync_thread_mutex);
-}
-
-/******************************************************************//**
-Removes a latch from the thread level array if it is found there.
-@return TRUE if found in the array; it is no error if the latch is
-not found, as we presently are not able to determine the level for
-every latch reservation the program does */
-UNIV_INTERN
-ibool
-sync_thread_reset_level(
-/*====================*/
-	void*	latch)	/*!< in: pointer to a mutex or an rw-lock */
-{
-	sync_arr_t*	array;
-	sync_thread_t*	thread_slot;
-
-	if (!sync_order_checks_on) {
-
-		return(FALSE);
-	}
-
-	if ((latch == (void*) &sync_thread_mutex)
-	    || (latch == (void*) &mutex_list_mutex)
-	    || (latch == (void*) &rw_lock_debug_mutex)
-	    || (latch == (void*) &rw_lock_list_mutex)) {
-
-		return(FALSE);
-	}
-
-	mutex_enter(&sync_thread_mutex);
-
-	thread_slot = sync_thread_level_arrays_find_slot();
-
-	if (thread_slot == NULL) {
-
-		ut_error;
-
-		mutex_exit(&sync_thread_mutex);
-		return(FALSE);
-	}
-
-	array = thread_slot->levels;
-
-	for (std::vector<sync_level_t>::iterator it = array->begin(); it != array->end(); ++it) {
-		sync_level_t level = *it;
-
-		if (level.latch != latch) {
-			continue;
-		}
-
-		array->erase(it);
-		mutex_exit(&sync_thread_mutex);
-		return(TRUE);
-	}
-
-	if (((ib_mutex_t*) latch)->magic_n != MUTEX_MAGIC_N) {
-		rw_lock_t*	rw_lock;
-
-		rw_lock = (rw_lock_t*) latch;
-
-		if (rw_lock->level == SYNC_LEVEL_VARYING) {
-			mutex_exit(&sync_thread_mutex);
-
-			return(TRUE);
-		}
-	}
-
-	ut_error;
-
-	mutex_exit(&sync_thread_mutex);
-
-	return(FALSE);
-}
-#endif /* UNIV_SYNC_DEBUG */
-
-/******************************************************************//**
-Initializes the synchronization data structures. */
-UNIV_INTERN
-void
-sync_init(void)
-/*===========*/
-{
-	ut_a(sync_initialized == FALSE);
-
-	sync_initialized = TRUE;
-
-	sync_array_init(OS_THREAD_MAX_N);
-
-#ifdef UNIV_SYNC_DEBUG
-	/* Create the thread latch level array where the latch levels
-	are stored for each OS thread */
-
-	sync_thread_level_arrays = static_cast<sync_thread_t*>(
-		calloc(sizeof(sync_thread_t), OS_THREAD_MAX_N));
-
-	ut_a(sync_thread_level_arrays != NULL);
-
-#endif /* UNIV_SYNC_DEBUG */
-	/* Init the mutex list and create the mutex to protect it. */
-
-	UT_LIST_INIT(mutex_list);
-	mutex_create(mutex_list_mutex_key, &mutex_list_mutex,
-		     SYNC_NO_ORDER_CHECK);
-#ifdef UNIV_SYNC_DEBUG
-	mutex_create(sync_thread_mutex_key, &sync_thread_mutex,
-		     SYNC_NO_ORDER_CHECK);
-#endif /* UNIV_SYNC_DEBUG */
-
-	/* Init the rw-lock list and create the mutex to protect it. */
-
-	UT_LIST_INIT(rw_lock_list);
-	mutex_create(rw_lock_list_mutex_key, &rw_lock_list_mutex,
-		     SYNC_NO_ORDER_CHECK);
-
-#ifdef UNIV_SYNC_DEBUG
-	os_fast_mutex_init(rw_lock_debug_mutex_key, &rw_lock_debug_mutex);
-#endif /* UNIV_SYNC_DEBUG */
-}
-
-#ifdef UNIV_SYNC_DEBUG
-/******************************************************************//**
-Frees all debug memory. */
-static
-void
-sync_thread_level_arrays_free(void)
-/*===============================*/
-
-{
-	ulint	i;
-
-	for (i = 0; i < OS_THREAD_MAX_N; i++) {
-		sync_thread_t*	slot;
-
-		slot = &sync_thread_level_arrays[i];
-
-		/* If this slot was allocated then free the slot memory too. */
-		if (slot->levels != NULL) {
-			delete slot->levels;
-		}
-	}
-
-	free(sync_thread_level_arrays);
-	sync_thread_level_arrays = NULL;
-}
-#endif /* UNIV_SYNC_DEBUG */
-
-/******************************************************************//**
-Frees the resources in InnoDB's own synchronization data structures. Use
-os_sync_free() after calling this. */
-UNIV_INTERN
-void
-sync_close(void)
-/*===========*/
-{
-	ib_mutex_t*	mutex;
-
-	sync_array_close();
-
-	for (mutex = UT_LIST_GET_FIRST(mutex_list);
-	     mutex != NULL;
-	     /* No op */) {
-
-#ifdef UNIV_MEM_DEBUG
-		if (mutex == &mem_hash_mutex) {
-			mutex = UT_LIST_GET_NEXT(list, mutex);
-			continue;
-		}
-#endif /* UNIV_MEM_DEBUG */
-
-		mutex_free(mutex);
-
-		mutex = UT_LIST_GET_FIRST(mutex_list);
-	}
-
-	mutex_free(&mutex_list_mutex);
-#ifdef UNIV_SYNC_DEBUG
-	mutex_free(&sync_thread_mutex);
-
-	/* Switch latching order checks on in sync0sync.cc */
-	sync_order_checks_on = FALSE;
-
-	sync_thread_level_arrays_free();
-	os_fast_mutex_free(&rw_lock_debug_mutex);
-#endif /* UNIV_SYNC_DEBUG */
-
-	sync_initialized = FALSE;
-}
-
-/*******************************************************************//**
-Prints wait info of the sync system. */
-UNIV_INTERN
-void
-sync_print_wait_info(
-/*=================*/
-	FILE*	file)		/*!< in: file where to print */
+sync_print_wait_info(FILE* file)
 {
 	fprintf(file,
-		"Mutex spin waits " UINT64PF ", rounds " UINT64PF ", "
-		"OS waits " UINT64PF "\n"
-		"RW-shared spins " UINT64PF ", rounds " UINT64PF ", "
-		"OS waits " UINT64PF "\n"
-		"RW-excl spins " UINT64PF ", rounds " UINT64PF ", "
-		"OS waits " UINT64PF "\n",
-		(ib_uint64_t) mutex_spin_wait_count,
-		(ib_uint64_t) mutex_spin_round_count,
-		(ib_uint64_t) mutex_os_wait_count,
+		"RW-shared spins " UINT64PF ", rounds " UINT64PF ","
+		" OS waits " UINT64PF "\n"
+		"RW-excl spins " UINT64PF ", rounds " UINT64PF ","
+		" OS waits " UINT64PF "\n"
+		"RW-sx spins " UINT64PF ", rounds " UINT64PF ","
+		" OS waits " UINT64PF "\n",
 		(ib_uint64_t) rw_lock_stats.rw_s_spin_wait_count,
 		(ib_uint64_t) rw_lock_stats.rw_s_spin_round_count,
 		(ib_uint64_t) rw_lock_stats.rw_s_os_wait_count,
 		(ib_uint64_t) rw_lock_stats.rw_x_spin_wait_count,
 		(ib_uint64_t) rw_lock_stats.rw_x_spin_round_count,
-		(ib_uint64_t) rw_lock_stats.rw_x_os_wait_count);
+		(ib_uint64_t) rw_lock_stats.rw_x_os_wait_count,
+		(ib_uint64_t) rw_lock_stats.rw_sx_spin_wait_count,
+		(ib_uint64_t) rw_lock_stats.rw_sx_spin_round_count,
+		(ib_uint64_t) rw_lock_stats.rw_sx_os_wait_count);
 
 	fprintf(file,
-		"Spin rounds per wait: %.2f mutex, %.2f RW-shared, "
-		"%.2f RW-excl\n",
-		(double) mutex_spin_round_count /
-		(mutex_spin_wait_count ? mutex_spin_wait_count : 1),
+		"Spin rounds per wait: %.2f RW-shared,"
+		" %.2f RW-excl, %.2f RW-sx\n",
 		(double) rw_lock_stats.rw_s_spin_round_count /
 		(rw_lock_stats.rw_s_spin_wait_count
 		 ? rw_lock_stats.rw_s_spin_wait_count : 1),
 		(double) rw_lock_stats.rw_x_spin_round_count /
 		(rw_lock_stats.rw_x_spin_wait_count
-		 ? rw_lock_stats.rw_x_spin_wait_count : 1));
+		 ? rw_lock_stats.rw_x_spin_wait_count : 1),
+		(double) rw_lock_stats.rw_sx_spin_round_count /
+		(rw_lock_stats.rw_sx_spin_wait_count
+		 ? rw_lock_stats.rw_sx_spin_wait_count : 1));
 }
 
-/*******************************************************************//**
-Prints info of the sync system. */
-UNIV_INTERN
+/**
+Prints info of the sync system.
+@param file - where to print */
 void
-sync_print(
-/*=======*/
-	FILE*	file)		/*!< in: file where to print */
+sync_print(FILE* file)
 {
-#ifdef UNIV_SYNC_DEBUG
-	mutex_list_print_info(file);
-
+#ifdef UNIV_DEBUG
 	rw_lock_list_print_info(file);
-#endif /* UNIV_SYNC_DEBUG */
+#endif /* UNIV_DEBUG */
 
 	sync_array_print(file);
 
 	sync_print_wait_info(file);
 }
+
+/** Print the filename "basename" e.g., p = "/a/b/c/d/e.cc" -> p = "e.cc"
+@param[in]	filename	Name from where to extract the basename
+@return the basename */
+const char*
+sync_basename(const char* filename)
+{
+	const char*	ptr = filename + strlen(filename) - 1;
+
+	while (ptr > filename && *ptr != '/' && *ptr != '\\') {
+		--ptr;
+	}
+
+	++ptr;
+
+	return(ptr);
+}
+
+/** String representation of the filename and line number where the
+latch was created
+@param[in]	id		Latch ID
+@param[in]	created		Filename and line number where it was crated
+@return the string representation */
+std::string
+sync_mutex_to_string(
+	latch_id_t		id,
+	const std::string&	created)
+{
+	std::ostringstream msg;
+
+	msg << "Mutex " << sync_latch_get_name(id) << " "
+	    << "created " << created;
+
+	return(msg.str());
+}
+
+/** Enable the mutex monitoring */
+void
+MutexMonitor::enable()
+{
+	/** Note: We don't add any latch meta-data after startup. Therefore
+	there is no need to use a mutex here. */
+
+	LatchMetaData::iterator	end = latch_meta.end();
+
+	for (LatchMetaData::iterator it = latch_meta.begin(); it != end; ++it) {
+
+		if (*it != NULL) {
+			(*it)->get_counter()->enable();
+		}
+	}
+}
+
+/** Disable the mutex monitoring */
+void
+MutexMonitor::disable()
+{
+	/** Note: We don't add any latch meta-data after startup. Therefore
+	there is no need to use a mutex here. */
+
+	LatchMetaData::iterator	end = latch_meta.end();
+
+	for (LatchMetaData::iterator it = latch_meta.begin(); it != end; ++it) {
+
+		if (*it != NULL) {
+			(*it)->get_counter()->disable();
+		}
+	}
+}
+
+/** Reset the mutex monitoring counters */
+void
+MutexMonitor::reset()
+{
+	/** Note: We don't add any latch meta-data after startup. Therefore
+	there is no need to use a mutex here. */
+
+	LatchMetaData::iterator	end = latch_meta.end();
+
+	for (LatchMetaData::iterator it = latch_meta.begin(); it != end; ++it) {
+
+		if (*it != NULL) {
+			(*it)->get_counter()->reset();
+		}
+	}
+
+	mutex_enter(&rw_lock_list_mutex);
+
+	for (rw_lock_t* rw_lock = UT_LIST_GET_FIRST(rw_lock_list);
+	     rw_lock != NULL;
+	     rw_lock = UT_LIST_GET_NEXT(list, rw_lock)) {
+
+		rw_lock->count_os_wait = 0;
+	}
+
+	mutex_exit(&rw_lock_list_mutex);
+}
diff --git a/storage/innobase/trx/trx0i_s.cc b/storage/innobase/trx/trx0i_s.cc
index 993006efc6d..cf3109c265e 100644
--- a/storage/innobase/trx/trx0i_s.cc
+++ b/storage/innobase/trx/trx0i_s.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2007, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -34,14 +34,13 @@ Created July 17, 2007 Vasil Dimov
    so they must come first.
    From the symptoms, this is related to bug#46587 in the MySQL bug DB.
 */
-#include "univ.i"
 
-#include <mysql/plugin.h>
+#include "ha_prototypes.h"
+#include <sql_class.h>
 
 #include "buf0buf.h"
 #include "dict0dict.h"
 #include "ha0storage.h"
-#include "ha_prototypes.h"
 #include "hash0hash.h"
 #include "lock0iter.h"
 #include "lock0lock.h"
@@ -52,12 +51,10 @@ Created July 17, 2007 Vasil Dimov
 #include "srv0srv.h"
 #include "sync0rw.h"
 #include "sync0sync.h"
-#include "sync0types.h"
 #include "trx0i_s.h"
 #include "trx0sys.h"
 #include "trx0trx.h"
 #include "ut0mem.h"
-#include "ut0ut.h"
 
 /** Initial number of rows in the table cache */
 #define TABLE_CACHE_INITIAL_ROWSNUM	1024
@@ -114,15 +111,15 @@ noop because it will be empty. */
 /* @} */
 
 /** Memory limit passed to ha_storage_put_memlim().
-@param cache	hash storage
-@return		maximum allowed allocation size */
+@param cache hash storage
+@return maximum allowed allocation size */
 #define MAX_ALLOWED_FOR_STORAGE(cache)		\
 	(TRX_I_S_MEM_LIMIT			\
 	 - (cache)->mem_allocd)
 
 /** Memory limit in table_cache_create_empty_row().
-@param cache	hash storage
-@return		maximum allowed allocation size */
+@param cache hash storage
+@return maximum allowed allocation size */
 #define MAX_ALLOWED_FOR_ALLOC(cache)		\
 	(TRX_I_S_MEM_LIMIT			\
 	 - (cache)->mem_allocd			\
@@ -150,11 +147,15 @@ struct i_s_table_cache_t {
 
 /** This structure describes the intermediate buffer */
 struct trx_i_s_cache_t {
-	rw_lock_t	rw_lock;	/*!< read-write lock protecting
+	rw_lock_t*	rw_lock;	/*!< read-write lock protecting
 					the rest of this structure */
-	ullint		last_read;	/*!< last time the cache was read;
+	uintmax_t	last_read;	/*!< last time the cache was read;
 					measured in microseconds since
 					epoch */
+	ib_mutex_t		last_read_mutex;/*!< mutex protecting the
+					last_read member - it is updated
+					inside a shared lock of the
+					rw_lock member */
 	i_s_table_cache_t innodb_trx;	/*!< innodb_trx table */
 	i_s_table_cache_t innodb_locks;	/*!< innodb_locks table */
 	i_s_table_cache_t innodb_lock_waits;/*!< innodb_lock_waits table */
@@ -185,21 +186,12 @@ static trx_i_s_cache_t	trx_i_s_cache_static;
 /** This is the intermediate buffer where data needed to fill the
 INFORMATION SCHEMA tables is fetched and later retrieved by the C++
 code in handler/i_s.cc. */
-UNIV_INTERN trx_i_s_cache_t*	trx_i_s_cache = &trx_i_s_cache_static;
-
-/* Key to register the lock/mutex with performance schema */
-#ifdef UNIV_PFS_RWLOCK
-UNIV_INTERN mysql_pfs_key_t	trx_i_s_cache_lock_key;
-#endif /* UNIV_PFS_RWLOCK */
-
-#ifdef UNIV_PFS_MUTEX
-UNIV_INTERN mysql_pfs_key_t	cache_last_read_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
+trx_i_s_cache_t*	trx_i_s_cache = &trx_i_s_cache_static;
 
 /*******************************************************************//**
 For a record lock that is in waiting state retrieves the only bit that
 is set, for a table lock returns ULINT_UNDEFINED.
-@return	record number within the heap */
+@return record number within the heap */
 static
 ulint
 wait_lock_get_heap_no(
@@ -262,7 +254,7 @@ table_cache_free(
 		/* the memory is actually allocated in
 		table_cache_create_empty_row() */
 		if (table_cache->chunks[i].base) {
-			mem_free(table_cache->chunks[i].base);
+			ut_free(table_cache->chunks[i].base);
 			table_cache->chunks[i].base = NULL;
 		}
 	}
@@ -273,7 +265,7 @@ Returns an empty row from a table cache. The row is allocated if no more
 empty rows are available. The number of used rows is incremented.
 If the memory limit is hit then NULL is returned and nothing is
 allocated.
-@return	empty row, or NULL if out of memory */
+@return empty row, or NULL if out of memory */
 static
 void*
 table_cache_create_empty_row(
@@ -343,16 +335,17 @@ table_cache_create_empty_row(
 
 		chunk = &table_cache->chunks[i];
 
-		chunk->base = mem_alloc2(req_bytes, &got_bytes);
+		got_bytes = req_bytes;
+		chunk->base = ut_malloc_nokey(req_bytes);
 
 		got_rows = got_bytes / table_cache->row_size;
 
 		cache->mem_allocd += got_bytes;
 
 #if 0
-		printf("allocating chunk %d req bytes=%lu, got bytes=%lu, "
-		       "row size=%lu, "
-		       "req rows=%lu, got rows=%lu\n",
+		printf("allocating chunk %d req bytes=%lu, got bytes=%lu,"
+		       " row size=%lu,"
+		       " req rows=%lu, got rows=%lu\n",
 		       i, req_bytes, got_bytes,
 		       table_cache->row_size,
 		       req_rows, got_rows);
@@ -414,14 +407,13 @@ table_cache_create_empty_row(
 #ifdef UNIV_DEBUG
 /*******************************************************************//**
 Validates a row in the locks cache.
-@return	TRUE if valid */
+@return TRUE if valid */
 static
 ibool
 i_s_locks_row_validate(
 /*===================*/
 	const i_s_locks_row_t*	row)	/*!< in: row to validate */
 {
-	ut_ad(row->lock_trx_id != 0);
 	ut_ad(row->lock_mode != NULL);
 	ut_ad(row->lock_type != NULL);
 	ut_ad(row->lock_table != NULL);
@@ -450,7 +442,7 @@ i_s_locks_row_validate(
 /*******************************************************************//**
 Fills i_s_trx_row_t object.
 If memory can not be allocated then FALSE is returned.
-@return	FALSE if allocation fails */
+@return FALSE if allocation fails */
 static
 ibool
 fill_trx_row(
@@ -468,13 +460,12 @@ fill_trx_row(
 						which to copy volatile
 						strings */
 {
-	const char*	stmt;
 	size_t		stmt_len;
 	const char*	s;
 
 	ut_ad(lock_mutex_own());
 
-	row->trx_id = trx->id;
+	row->trx_id = trx_get_id_for_print(trx);
 	row->trx_started = (ib_time_t) trx->start_time;
 	row->trx_state = trx_get_que_state_str(trx);
 	row->requested_lock_row = requested_lock_row;
@@ -490,7 +481,7 @@ fill_trx_row(
 		row->trx_wait_started = 0;
 	}
 
-	row->trx_weight = (ullint) TRX_WEIGHT(trx);
+	row->trx_weight = static_cast<uintmax_t>(TRX_WEIGHT(trx));
 
 	if (trx->mysql_thd == NULL) {
 		/* For internal transactions e.g., purge and transactions
@@ -503,17 +494,10 @@ fill_trx_row(
 
 	row->trx_mysql_thread_id = thd_get_thread_id(trx->mysql_thd);
 
-	stmt = innobase_get_stmt(trx->mysql_thd, &stmt_len);
+	char	query[TRX_I_S_TRX_QUERY_MAX_LEN + 1];
+	stmt_len = innobase_get_stmt_safe(trx->mysql_thd, query, sizeof(query));
 
-	if (stmt != NULL) {
-		char	query[TRX_I_S_TRX_QUERY_MAX_LEN + 1];
-
-		if (stmt_len > TRX_I_S_TRX_QUERY_MAX_LEN) {
-			stmt_len = TRX_I_S_TRX_QUERY_MAX_LEN;
-		}
-
-		memcpy(query, stmt, stmt_len);
-		query[stmt_len] = '\0';
+	if (stmt_len > 0) {
 
 		row->trx_query = static_cast<const char*>(
 			ha_storage_put_memlim(
@@ -550,7 +534,7 @@ thd_done:
 
 	row->trx_tables_in_use = trx->n_mysql_tables_in_use;
 
-	row->trx_tables_locked = trx->mysql_n_tables_locked;
+	row->trx_tables_locked = lock_number_of_tables_locked(&trx->lock);
 
 	/* These are protected by both trx->mutex or lock_sys->mutex,
 	or just lock_sys->mutex. For reading, it suffices to hold
@@ -606,8 +590,6 @@ thd_done:
 
 	row->trx_has_search_latch = (ibool) trx->has_search_latch;
 
-	row->trx_search_latch_timeout = trx->search_latch_timeout;
-
 	row->trx_is_read_only = trx->read_only;
 
 	row->trx_is_autocommit_non_locking = trx_is_autocommit_non_locking(trx);
@@ -619,7 +601,7 @@ thd_done:
 Format the nth field of "rec" and put it in "buf". The result is always
 NUL-terminated. Returns the number of bytes that were written to "buf"
 (including the terminating NUL).
-@return	end of the result */
+@return end of the result */
 static
 ulint
 put_nth_field(
@@ -677,7 +659,7 @@ put_nth_field(
 /*******************************************************************//**
 Fills the "lock_data" member of i_s_locks_row_t object.
 If memory can not be allocated then FALSE is returned.
-@return	FALSE if allocation fails */
+@return FALSE if allocation fails */
 static
 ibool
 fill_lock_data(
@@ -688,18 +670,38 @@ fill_lock_data(
 	trx_i_s_cache_t*	cache)	/*!< in/out: cache where to store
 					volatile data */
 {
+	ut_a(lock_get_type(lock) == LOCK_REC);
+
+	switch (heap_no) {
+	case PAGE_HEAP_NO_INFIMUM:
+	case PAGE_HEAP_NO_SUPREMUM:
+		*lock_data = ha_storage_put_str_memlim(
+			cache->storage,
+			heap_no == PAGE_HEAP_NO_INFIMUM
+			? "infimum pseudo-record"
+			: "supremum pseudo-record",
+			MAX_ALLOWED_FOR_STORAGE(cache));
+		return(*lock_data != NULL);
+	}
+
 	mtr_t			mtr;
 
 	const buf_block_t*	block;
 	const page_t*		page;
 	const rec_t*		rec;
-
-	ut_a(lock_get_type(lock) == LOCK_REC);
+	const dict_index_t*	index;
+	ulint			n_fields;
+	mem_heap_t*		heap;
+	ulint			offsets_onstack[REC_OFFS_NORMAL_SIZE];
+	ulint*			offsets;
+	char			buf[TRX_I_S_LOCK_DATA_MAX_LEN];
+	ulint			buf_used;
+	ulint			i;
 
 	mtr_start(&mtr);
 
-	block = buf_page_try_get(lock_rec_get_space_id(lock),
-				 lock_rec_get_page_no(lock),
+	block = buf_page_try_get(page_id_t(lock_rec_get_space_id(lock),
+					   lock_rec_get_page_no(lock)),
 				 &mtr);
 
 	if (block == NULL) {
@@ -711,66 +713,44 @@ fill_lock_data(
 		return(TRUE);
 	}
 
-	page = (const page_t*) buf_block_get_frame(block);
+	page = reinterpret_cast<const page_t*>(buf_block_get_frame(block));
+
+	rec_offs_init(offsets_onstack);
+	offsets = offsets_onstack;
 
 	rec = page_find_rec_with_heap_no(page, heap_no);
 
-	if (page_rec_is_infimum(rec)) {
+	index = lock_rec_get_index(lock);
 
-		*lock_data = ha_storage_put_str_memlim(
-			cache->storage, "infimum pseudo-record",
-			MAX_ALLOWED_FOR_STORAGE(cache));
-	} else if (page_rec_is_supremum(rec)) {
+	n_fields = dict_index_get_n_unique(index);
 
-		*lock_data = ha_storage_put_str_memlim(
-			cache->storage, "supremum pseudo-record",
-			MAX_ALLOWED_FOR_STORAGE(cache));
-	} else {
+	ut_a(n_fields > 0);
 
-		const dict_index_t*	index;
-		ulint			n_fields;
-		mem_heap_t*		heap;
-		ulint			offsets_onstack[REC_OFFS_NORMAL_SIZE];
-		ulint*			offsets;
-		char			buf[TRX_I_S_LOCK_DATA_MAX_LEN];
-		ulint			buf_used;
-		ulint			i;
+	heap = NULL;
+	offsets = rec_get_offsets(rec, index, offsets, n_fields,
+				  &heap);
 
-		rec_offs_init(offsets_onstack);
-		offsets = offsets_onstack;
+	/* format and store the data */
 
-		index = lock_rec_get_index(lock);
+	buf_used = 0;
+	for (i = 0; i < n_fields; i++) {
 
-		n_fields = dict_index_get_n_unique(index);
+		buf_used += put_nth_field(
+			buf + buf_used, sizeof(buf) - buf_used,
+			i, index, rec, offsets) - 1;
+	}
 
-		ut_a(n_fields > 0);
+	*lock_data = (const char*) ha_storage_put_memlim(
+		cache->storage, buf, buf_used + 1,
+		MAX_ALLOWED_FOR_STORAGE(cache));
 
-		heap = NULL;
-		offsets = rec_get_offsets(rec, index, offsets, n_fields,
-					  &heap);
+	if (heap != NULL) {
 
-		/* format and store the data */
-
-		buf_used = 0;
-		for (i = 0; i < n_fields; i++) {
-
-			buf_used += put_nth_field(
-				buf + buf_used, sizeof(buf) - buf_used,
-				i, index, rec, offsets) - 1;
-		}
-
-		*lock_data = (const char*) ha_storage_put_memlim(
-			cache->storage, buf, buf_used + 1,
-			MAX_ALLOWED_FOR_STORAGE(cache));
-
-		if (UNIV_UNLIKELY(heap != NULL)) {
-
-			/* this means that rec_get_offsets() has created a new
-			heap and has stored offsets in it; check that this is
-			really the case and free the heap */
-			ut_a(offsets != offsets_onstack);
-			mem_heap_free(heap);
-		}
+		/* this means that rec_get_offsets() has created a new
+		heap and has stored offsets in it; check that this is
+		really the case and free the heap */
+		ut_a(offsets != offsets_onstack);
+		mem_heap_free(heap);
 	}
 
 	mtr_commit(&mtr);
@@ -786,7 +766,7 @@ fill_lock_data(
 /*******************************************************************//**
 Fills i_s_locks_row_t object. Returns its first argument.
 If memory can not be allocated then FALSE is returned.
-@return	FALSE if allocation fails */
+@return FALSE if allocation fails */
 static
 ibool
 fill_locks_row(
@@ -804,7 +784,7 @@ fill_locks_row(
 	row->lock_type = lock_get_type_str(lock);
 
 	row->lock_table = ha_storage_put_str_memlim(
-		cache->storage, lock_get_table_name(lock),
+		cache->storage, lock_get_table_name(lock).m_name,
 		MAX_ALLOWED_FOR_STORAGE(cache));
 
 	/* memory could not be allocated */
@@ -860,7 +840,7 @@ fill_locks_row(
 
 /*******************************************************************//**
 Fills i_s_lock_waits_row_t object. Returns its first argument.
-@return	result object that's filled */
+@return result object that's filled */
 static
 i_s_lock_waits_row_t*
 fill_lock_waits_row(
@@ -888,7 +868,7 @@ Calculates a hash fold for a lock. For a record lock the fold is
 calculated from 4 elements, which uniquely identify a lock at a given
 point in time: transaction id, space id, page number, record number.
 For a table lock the fold is table's id.
-@return	fold */
+@return fold */
 static
 ulint
 fold_lock(
@@ -937,7 +917,7 @@ fold_lock(
 
 /*******************************************************************//**
 Checks whether i_s_locks_row_t object represents a lock_t object.
-@return	TRUE if they match */
+@return TRUE if they match */
 static
 ibool
 locks_row_eq_lock(
@@ -981,7 +961,7 @@ locks_row_eq_lock(
 Searches for a row in the innodb_locks cache that has a specified id.
 This happens in O(1) time since a hash table is used. Returns pointer to
 the row or NULL if none is found.
-@return	row or NULL */
+@return row or NULL */
 static
 i_s_locks_row_t*
 search_innodb_locks(
@@ -1024,7 +1004,7 @@ Adds new element to the locks cache, enlarging it if necessary.
 Returns a pointer to the added row. If the row is already present then
 no row is added and a pointer to the existing row is returned.
 If row can not be allocated then NULL is returned.
-@return	row */
+@return row */
 static
 i_s_locks_row_t*
 add_lock_to_cache(
@@ -1091,7 +1071,7 @@ add_lock_to_cache(
 /*******************************************************************//**
 Adds new pair of locks to the lock waits cache.
 If memory can not be allocated then FALSE is returned.
-@return	FALSE if allocation fails */
+@return FALSE if allocation fails */
 static
 ibool
 add_lock_wait_to_cache(
@@ -1128,7 +1108,7 @@ innodb_locks and a pointer to the added row is returned in
 requested_lock_row, otherwise requested_lock_row is set to NULL.
 If rows can not be allocated then FALSE is returned and the value of
 requested_lock_row is undefined.
-@return	FALSE if allocation fails */
+@return FALSE if allocation fails */
 static
 ibool
 add_trx_relevant_locks_to_cache(
@@ -1222,18 +1202,23 @@ the same version of the cache. */
 
 /*******************************************************************//**
 Checks if the cache can safely be updated.
-@return	TRUE if can be updated */
+@return TRUE if can be updated */
 static
 ibool
 can_cache_be_updated(
 /*=================*/
 	trx_i_s_cache_t*	cache)	/*!< in: cache */
 {
-	ullint	now;
+	uintmax_t	now;
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_a(rw_lock_own(&cache->rw_lock, RW_LOCK_EX));
-#endif
+	/* Here we read cache->last_read without acquiring its mutex
+	because last_read is only updated when a shared rw lock on the
+	whole cache is being held (see trx_i_s_cache_end_read()) and
+	we are currently holding an exclusive rw lock on the cache.
+	So it is not possible for last_read to be updated while we are
+	reading it. */
+
+	ut_ad(rw_lock_own(cache->rw_lock, RW_LOCK_X));
 
 	now = ut_time_us(NULL);
 	if (now - cache->last_read > CACHE_MIN_IDLE_TIME_US) {
@@ -1270,17 +1255,14 @@ void
 fetch_data_into_cache_low(
 /*======================*/
 	trx_i_s_cache_t*	cache,		/*!< in/out: cache */
-	ibool			only_ac_nl,	/*!< in: only select non-locking
-						autocommit transactions */
-	trx_list_t*		trx_list)	/*!< in: trx list */
+	bool			read_write,	/*!< in: only read-write
+						transactions */
+	trx_ut_list_t*		trx_list)	/*!< in: trx list */
 {
 	const trx_t*		trx;
+	bool			rw_trx_list = trx_list == &trx_sys->rw_trx_list;
 
-	ut_ad(trx_list == &trx_sys->rw_trx_list
-	      || trx_list == &trx_sys->ro_trx_list
-	      || trx_list == &trx_sys->mysql_trx_list);
-
-	ut_ad(only_ac_nl == (trx_list == &trx_sys->mysql_trx_list));
+	ut_ad(rw_trx_list || trx_list == &trx_sys->mysql_trx_list);
 
 	/* Iterate over the transaction list and add each one
 	to innodb_trx's cache. We also add all locks that are relevant
@@ -1290,26 +1272,24 @@ fetch_data_into_cache_low(
 	for (trx = UT_LIST_GET_FIRST(*trx_list);
 	     trx != NULL;
 	     trx =
-	     (trx_list == &trx_sys->mysql_trx_list
-	      ? UT_LIST_GET_NEXT(mysql_trx_list, trx)
-	      : UT_LIST_GET_NEXT(trx_list, trx))) {
+	     (rw_trx_list
+	      ? UT_LIST_GET_NEXT(trx_list, trx)
+	      : UT_LIST_GET_NEXT(mysql_trx_list, trx))) {
 
 		i_s_trx_row_t*		trx_row;
 		i_s_locks_row_t*	requested_lock_row;
 
-		if (trx->state == TRX_STATE_NOT_STARTED
-		    || (only_ac_nl && !trx_is_autocommit_non_locking(trx))) {
+		/* Note: Read only transactions that modify temporary
+		tables an have a transaction ID */
+		if (!trx_is_started(trx)
+		    || (!rw_trx_list && trx->id != 0 && !trx->read_only)) {
 
 			continue;
 		}
 
 		assert_trx_nonlocking_or_in_list(trx);
 
-		ut_ad(trx->in_ro_trx_list
-		      == (trx_list == &trx_sys->ro_trx_list));
-
-		ut_ad(trx->in_rw_trx_list
-		      == (trx_list == &trx_sys->rw_trx_list));
+		ut_ad(trx->in_rw_trx_list == rw_trx_list);
 
 		if (!add_trx_relevant_locks_to_cache(cache, trx,
 						     &requested_lock_row)) {
@@ -1318,9 +1298,9 @@ fetch_data_into_cache_low(
 			return;
 		}
 
-		trx_row = (i_s_trx_row_t*)
-			table_cache_create_empty_row(&cache->innodb_trx,
-						     cache);
+		trx_row = reinterpret_cast<i_s_trx_row_t*>(
+			table_cache_create_empty_row(
+				&cache->innodb_trx, cache));
 
 		/* memory could not be allocated */
 		if (trx_row == NULL) {
@@ -1332,7 +1312,7 @@ fetch_data_into_cache_low(
 		if (!fill_trx_row(trx_row, trx, requested_lock_row, cache)) {
 
 			/* memory could not be allocated */
-			cache->innodb_trx.rows_used--;
+			--cache->innodb_trx.rows_used;
 			cache->is_truncated = TRUE;
 			return;
 		}
@@ -1349,16 +1329,16 @@ fetch_data_into_cache(
 	trx_i_s_cache_t*	cache)	/*!< in/out: cache */
 {
 	ut_ad(lock_mutex_own());
-	ut_ad(mutex_own(&trx_sys->mutex));
+	ut_ad(trx_sys_mutex_own());
 
 	trx_i_s_cache_clear(cache);
 
-	fetch_data_into_cache_low(cache, FALSE, &trx_sys->rw_trx_list);
-	fetch_data_into_cache_low(cache, FALSE, &trx_sys->ro_trx_list);
+	/* Capture the state of the read-write transactions. This includes
+	internal transactions too. They are not on mysql_trx_list */
+	fetch_data_into_cache_low(cache, true, &trx_sys->rw_trx_list);
 
-	/* Only select autocommit non-locking selects because they can
-	only be on the MySQL transaction list (TRUE). */
-	fetch_data_into_cache_low(cache, TRUE, &trx_sys->mysql_trx_list);
+	/* Capture the state of the read-only active transactions */
+	fetch_data_into_cache_low(cache, false, &trx_sys->mysql_trx_list);
 
 	cache->is_truncated = FALSE;
 }
@@ -1366,19 +1346,12 @@ fetch_data_into_cache(
 /*******************************************************************//**
 Update the transactions cache if it has not been read for some time.
 Called from handler/i_s.cc.
-@return	0 - fetched, 1 - not */
-UNIV_INTERN
+@return 0 - fetched, 1 - not */
 int
 trx_i_s_possibly_fetch_data_into_cache(
 /*===================================*/
 	trx_i_s_cache_t*	cache)	/*!< in/out: cache */
 {
-	ullint	now;
-
-#ifdef UNIV_SYNC_DEBUG
-	ut_a(rw_lock_own(&cache->rw_lock, RW_LOCK_EX));
-#endif
-
 	if (!can_cache_be_updated(cache)) {
 
 		return(1);
@@ -1388,16 +1361,16 @@ trx_i_s_possibly_fetch_data_into_cache(
 
 	lock_mutex_enter();
 
-	mutex_enter(&trx_sys->mutex);
+	trx_sys_mutex_enter();
 
 	fetch_data_into_cache(cache);
 
-	mutex_exit(&trx_sys->mutex);
+	trx_sys_mutex_exit();
 
 	lock_mutex_exit();
 
 	/* update cache last read time */
-	now = ut_time_us(NULL);
+	time_t now = ut_time_us(NULL);
 	cache->last_read = now;
 
 	return(0);
@@ -1406,8 +1379,7 @@ trx_i_s_possibly_fetch_data_into_cache(
 /*******************************************************************//**
 Returns TRUE if the data in the cache is truncated due to the memory
 limit posed by TRX_I_S_MEM_LIMIT.
-@return	TRUE if truncated */
-UNIV_INTERN
+@return TRUE if truncated */
 ibool
 trx_i_s_cache_is_truncated(
 /*=======================*/
@@ -1418,7 +1390,6 @@ trx_i_s_cache_is_truncated(
 
 /*******************************************************************//**
 Initialize INFORMATION SCHEMA trx related cache. */
-UNIV_INTERN
 void
 trx_i_s_cache_init(
 /*===============*/
@@ -1430,13 +1401,20 @@ trx_i_s_cache_init(
 	release lock mutex
 	release trx_i_s_cache_t::rw_lock
 	acquire trx_i_s_cache_t::rw_lock, S
+	acquire trx_i_s_cache_t::last_read_mutex
+	release trx_i_s_cache_t::last_read_mutex
 	release trx_i_s_cache_t::rw_lock */
 
-	rw_lock_create(trx_i_s_cache_lock_key, &cache->rw_lock,
+	cache->rw_lock = static_cast<rw_lock_t*>(
+		ut_malloc_nokey(sizeof(*cache->rw_lock)));
+
+	rw_lock_create(trx_i_s_cache_lock_key, cache->rw_lock,
 		       SYNC_TRX_I_S_RWLOCK);
 
 	cache->last_read = 0;
 
+	mutex_create(LATCH_ID_CACHE_LAST_READ, &cache->last_read_mutex);
+
 	table_cache_init(&cache->innodb_trx, sizeof(i_s_trx_row_t));
 	table_cache_init(&cache->innodb_locks, sizeof(i_s_locks_row_t));
 	table_cache_init(&cache->innodb_lock_waits,
@@ -1454,75 +1432,79 @@ trx_i_s_cache_init(
 
 /*******************************************************************//**
 Free the INFORMATION SCHEMA trx related cache. */
-UNIV_INTERN
 void
 trx_i_s_cache_free(
 /*===============*/
 	trx_i_s_cache_t*	cache)	/*!< in, own: cache to free */
 {
+	rw_lock_free(cache->rw_lock);
+	ut_free(cache->rw_lock);
+	cache->rw_lock = NULL;
+
+	mutex_free(&cache->last_read_mutex);
+
 	hash_table_free(cache->locks_hash);
 	ha_storage_free(cache->storage);
 	table_cache_free(&cache->innodb_trx);
 	table_cache_free(&cache->innodb_locks);
 	table_cache_free(&cache->innodb_lock_waits);
-	memset(cache, 0, sizeof *cache);
 }
 
 /*******************************************************************//**
 Issue a shared/read lock on the tables cache. */
-UNIV_INTERN
 void
 trx_i_s_cache_start_read(
 /*=====================*/
 	trx_i_s_cache_t*	cache)	/*!< in: cache */
 {
-	rw_lock_s_lock(&cache->rw_lock);
+	rw_lock_s_lock(cache->rw_lock);
 }
 
 /*******************************************************************//**
 Release a shared/read lock on the tables cache. */
-UNIV_INTERN
 void
 trx_i_s_cache_end_read(
 /*===================*/
 	trx_i_s_cache_t*	cache)	/*!< in: cache */
 {
-#ifdef UNIV_SYNC_DEBUG
-	ut_a(rw_lock_own(&cache->rw_lock, RW_LOCK_SHARED));
-#endif
+	uintmax_t	now;
 
-	rw_lock_s_unlock(&cache->rw_lock);
+	ut_ad(rw_lock_own(cache->rw_lock, RW_LOCK_S));
+
+	/* update cache last read time */
+	now = ut_time_us(NULL);
+	mutex_enter(&cache->last_read_mutex);
+	cache->last_read = now;
+	mutex_exit(&cache->last_read_mutex);
+
+	rw_lock_s_unlock(cache->rw_lock);
 }
 
 /*******************************************************************//**
 Issue an exclusive/write lock on the tables cache. */
-UNIV_INTERN
 void
 trx_i_s_cache_start_write(
 /*======================*/
 	trx_i_s_cache_t*	cache)	/*!< in: cache */
 {
-	rw_lock_x_lock(&cache->rw_lock);
+	rw_lock_x_lock(cache->rw_lock);
 }
 
 /*******************************************************************//**
 Release an exclusive/write lock on the tables cache. */
-UNIV_INTERN
 void
 trx_i_s_cache_end_write(
 /*====================*/
 	trx_i_s_cache_t*	cache)	/*!< in: cache */
 {
-#ifdef UNIV_SYNC_DEBUG
-	ut_a(rw_lock_own(&cache->rw_lock, RW_LOCK_EX));
-#endif
+	ut_ad(rw_lock_own(cache->rw_lock, RW_LOCK_X));
 
-	rw_lock_x_unlock(&cache->rw_lock);
+	rw_lock_x_unlock(cache->rw_lock);
 }
 
 /*******************************************************************//**
 Selects a INFORMATION SCHEMA table cache from the whole cache.
-@return	table cache */
+@return table cache */
 static
 i_s_table_cache_t*
 cache_select_table(
@@ -1532,10 +1514,8 @@ cache_select_table(
 {
 	i_s_table_cache_t*	table_cache;
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_a(rw_lock_own(&cache->rw_lock, RW_LOCK_SHARED)
-	     || rw_lock_own(&cache->rw_lock, RW_LOCK_EX));
-#endif
+	ut_ad(rw_lock_own(cache->rw_lock, RW_LOCK_S)
+	      || rw_lock_own(cache->rw_lock, RW_LOCK_X));
 
 	switch (table) {
 	case I_S_INNODB_TRX:
@@ -1557,8 +1537,7 @@ cache_select_table(
 /*******************************************************************//**
 Retrieves the number of used rows in the cache for a given
 INFORMATION SCHEMA table.
-@return	number of rows */
-UNIV_INTERN
+@return number of rows */
 ulint
 trx_i_s_cache_get_rows_used(
 /*========================*/
@@ -1575,8 +1554,7 @@ trx_i_s_cache_get_rows_used(
 /*******************************************************************//**
 Retrieves the nth row (zero-based) in the cache for a given
 INFORMATION SCHEMA table.
-@return	row */
-UNIV_INTERN
+@return row */
 void*
 trx_i_s_cache_get_nth_row(
 /*======================*/
@@ -1616,8 +1594,7 @@ Crafts a lock id string from a i_s_locks_row_t object. Returns its
 second argument. This function aborts if there is not enough space in
 lock_id. Be sure to provide at least TRX_I_S_LOCK_ID_MAX_LEN + 1 if you
 want to be 100% sure that it will not abort.
-@return	resulting lock id */
-UNIV_INTERN
+@return resulting lock id */
 char*
 trx_i_s_create_lock_id(
 /*===================*/
@@ -1639,7 +1616,7 @@ trx_i_s_create_lock_id(
 	} else {
 		/* table lock */
 		res_len = ut_snprintf(lock_id, lock_id_size,
-				      TRX_ID_FMT ":" UINT64PF,
+				      TRX_ID_FMT":" UINT64PF,
 				      row->lock_trx_id,
 				      row->lock_table_id);
 	}
diff --git a/storage/innobase/trx/trx0purge.cc b/storage/innobase/trx/trx0purge.cc
index efc600d16b1..7cf8c38bb8e 100644
--- a/storage/innobase/trx/trx0purge.cc
+++ b/storage/innobase/trx/trx0purge.cc
@@ -23,6 +23,8 @@ Purge old versions
 Created 3/26/1996 Heikki Tuuri
 *******************************************************/
 
+#include "ha_prototypes.h"
+
 #include "trx0purge.h"
 
 #ifdef UNIV_NONINL
@@ -30,53 +32,154 @@ Created 3/26/1996 Heikki Tuuri
 #endif
 
 #include "fsp0fsp.h"
-#include "mach0data.h"
-#include "trx0rseg.h"
-#include "trx0trx.h"
-#include "trx0roll.h"
-#include "read0read.h"
 #include "fut0fut.h"
+#include "mach0data.h"
+#include "mtr0log.h"
+#include "os0thread.h"
 #include "que0que.h"
+#include "read0read.h"
 #include "row0purge.h"
 #include "row0upd.h"
-#include "trx0rec.h"
+#include "srv0mon.h"
+#include "fsp0sysspace.h"
 #include "srv0srv.h"
 #include "srv0start.h"
-#include "os0thread.h"
-#include "srv0mon.h"
-#include "mtr0log.h"
+#include "sync0sync.h"
+#include "trx0rec.h"
+#include "trx0roll.h"
+#include "trx0rseg.h"
+#include "trx0trx.h"
 
 /** Maximum allowable purge history length.  <=0 means 'infinite'. */
-UNIV_INTERN ulong		srv_max_purge_lag = 0;
+ulong		srv_max_purge_lag = 0;
 
 /** Max DML user threads delay in micro-seconds. */
-UNIV_INTERN ulong		srv_max_purge_lag_delay = 0;
+ulong		srv_max_purge_lag_delay = 0;
 
 /** The global data structure coordinating a purge */
-UNIV_INTERN trx_purge_t*	purge_sys = NULL;
+trx_purge_t*	purge_sys = NULL;
 
 /** A dummy undo record used as a return value when we have a whole undo log
 which needs no purge */
-UNIV_INTERN trx_undo_rec_t	trx_purge_dummy_rec;
-
-#ifdef UNIV_PFS_RWLOCK
-/* Key to register trx_purge_latch with performance schema */
-UNIV_INTERN mysql_pfs_key_t	trx_purge_latch_key;
-#endif /* UNIV_PFS_RWLOCK */
-
-#ifdef UNIV_PFS_MUTEX
-/* Key to register purge_sys_bh_mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t	purge_sys_bh_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
+trx_undo_rec_t	trx_purge_dummy_rec;
 
 #ifdef UNIV_DEBUG
-UNIV_INTERN my_bool		srv_purge_view_update_only_debug;
+my_bool		srv_purge_view_update_only_debug;
+bool		trx_commit_disallowed = false;
 #endif /* UNIV_DEBUG */
 
+/** Sentinel value */
+const TrxUndoRsegs TrxUndoRsegsIterator::NullElement(UINT64_UNDEFINED);
+
+/** Constructor */
+TrxUndoRsegsIterator::TrxUndoRsegsIterator(trx_purge_t* purge_sys)
+	:
+	m_purge_sys(purge_sys),
+	m_trx_undo_rsegs(NullElement),
+	m_iter(m_trx_undo_rsegs.end())
+{
+}
+
+/** Sets the next rseg to purge in m_purge_sys.
+@return page size of the table for which the log is.
+NOTE: if rseg is NULL when this function returns this means that
+there are no rollback segments to purge and then the returned page
+size object should not be used. */
+const page_size_t
+TrxUndoRsegsIterator::set_next()
+{
+	mutex_enter(&m_purge_sys->pq_mutex);
+
+	/* Only purge consumes events from the priority queue, user
+	threads only produce the events. */
+
+	/* Check if there are more rsegs to process in the
+	current element. */
+	if (m_iter != m_trx_undo_rsegs.end()) {
+
+		/* We are still processing rollback segment from
+		the same transaction and so expected transaction
+		number shouldn't increase. Undo increment of
+		expected trx_no done by caller assuming rollback
+		segments from given transaction are done. */
+		m_purge_sys->iter.trx_no = (*m_iter)->last_trx_no;
+
+	} else if (!m_purge_sys->purge_queue->empty()) {
+
+		/* Read the next element from the queue.
+		Combine elements if they have same transaction number.
+		This can happen if a transaction shares redo rollback segment
+		with another transaction that has already added it to purge
+		queue and former transaction also needs to schedule non-redo
+		rollback segment for purge. */
+		m_trx_undo_rsegs = NullElement;
+
+		while (!m_purge_sys->purge_queue->empty()) {
+
+			if (m_trx_undo_rsegs.get_trx_no() == UINT64_UNDEFINED) {
+				m_trx_undo_rsegs =
+					purge_sys->purge_queue->top();
+			} else if (purge_sys->purge_queue->top().get_trx_no() ==
+					m_trx_undo_rsegs.get_trx_no()) {
+				m_trx_undo_rsegs.append(
+					purge_sys->purge_queue->top());
+			} else {
+				break;
+			}
+
+			m_purge_sys->purge_queue->pop();
+		}
+
+		m_iter = m_trx_undo_rsegs.begin();
+
+	} else {
+		/* Queue is empty, reset iterator. */
+		m_trx_undo_rsegs = NullElement;
+		m_iter = m_trx_undo_rsegs.end();
+
+		mutex_exit(&m_purge_sys->pq_mutex);
+
+		m_purge_sys->rseg = NULL;
+
+		/* return a dummy object, not going to be used by the caller */
+		return(univ_page_size);
+	}
+
+	m_purge_sys->rseg = *m_iter++;
+
+	mutex_exit(&m_purge_sys->pq_mutex);
+
+	ut_a(m_purge_sys->rseg != NULL);
+
+	mutex_enter(&m_purge_sys->rseg->mutex);
+
+	ut_a(m_purge_sys->rseg->last_page_no != FIL_NULL);
+	ut_ad(m_purge_sys->rseg->last_trx_no == m_trx_undo_rsegs.get_trx_no());
+
+	/* We assume in purge of externally stored fields that
+	space id is in the range of UNDO tablespace space ids
+	unless space is system tablespace */
+	ut_a(m_purge_sys->rseg->space <= srv_undo_tablespaces_open
+		|| is_system_tablespace(
+			m_purge_sys->rseg->space));
+
+	const page_size_t	page_size(m_purge_sys->rseg->page_size);
+
+	ut_a(purge_sys->iter.trx_no <= purge_sys->rseg->last_trx_no);
+
+	m_purge_sys->iter.trx_no = m_purge_sys->rseg->last_trx_no;
+	m_purge_sys->hdr_offset = m_purge_sys->rseg->last_offset;
+	m_purge_sys->hdr_page_no = m_purge_sys->rseg->last_page_no;
+
+	mutex_exit(&m_purge_sys->rseg->mutex);
+
+	return(page_size);
+}
+
 /****************************************************************//**
 Builds a purge 'query' graph. The actual purge is performed by executing
 this query graph.
-@return	own: the query graph */
+@return own: the query graph */
 static
 que_t*
 trx_purge_graph_build(
@@ -96,7 +199,7 @@ trx_purge_graph_build(
 	for (i = 0; i < n_purge_threads; ++i) {
 		que_thr_t*	thr;
 
-		thr = que_thr_create(fork, heap);
+		thr = que_thr_create(fork, heap, NULL);
 
 		thr->child = row_purge_node_create(thr, heap);
 	}
@@ -107,31 +210,34 @@ trx_purge_graph_build(
 /********************************************************************//**
 Creates the global purge system control structure and inits the history
 mutex. */
-UNIV_INTERN
 void
 trx_purge_sys_create(
 /*=================*/
 	ulint		n_purge_threads,	/*!< in: number of purge
 						threads */
-	ib_bh_t*	ib_bh)			/*!< in, own: UNDO log min
+	purge_pq_t*	purge_queue)		/*!< in, own: UNDO log min
 						binary heap */
 {
-	purge_sys = static_cast<trx_purge_t*>(mem_zalloc(sizeof(*purge_sys)));
+	purge_sys = static_cast<trx_purge_t*>(
+		ut_zalloc_nokey(sizeof(*purge_sys)));
 
 	purge_sys->state = PURGE_STATE_INIT;
-	purge_sys->event = os_event_create();
+	purge_sys->event = os_event_create(0);
 
-	/* Take ownership of ib_bh, we are responsible for freeing it. */
-	purge_sys->ib_bh = ib_bh;
+	new (&purge_sys->iter) purge_iter_t;
+	new (&purge_sys->limit) purge_iter_t;
+	new (&purge_sys->undo_trunc) undo::Truncate;
+#ifdef UNIV_DEBUG
+	new (&purge_sys->done) purge_iter_t;
+#endif /* UNIV_DEBUG */
+
+	/* Take ownership of purge_queue, we are responsible for freeing it. */
+	purge_sys->purge_queue = purge_queue;
 
 	rw_lock_create(trx_purge_latch_key,
 		       &purge_sys->latch, SYNC_PURGE_LATCH);
 
-	mutex_create(
-		purge_sys_bh_mutex_key, &purge_sys->bh_mutex,
-		SYNC_PURGE_QUEUE);
-
-	purge_sys->heap = mem_heap_create(256);
+	mutex_create(LATCH_ID_PURGE_SYS_PQ, &purge_sys->pq_mutex);
 
 	ut_a(n_purge_threads > 0);
 
@@ -152,12 +258,17 @@ trx_purge_sys_create(
 	purge_sys->query = trx_purge_graph_build(
 		purge_sys->trx, n_purge_threads);
 
-	purge_sys->view = read_view_purge_open(purge_sys->heap);
+	new(&purge_sys->view) ReadView();
+
+	trx_sys->mvcc->clone_oldest_view(&purge_sys->view);
+
+	purge_sys->view_active = true;
+
+	purge_sys->rseg_iter = UT_NEW_NOKEY(TrxUndoRsegsIterator(purge_sys));
 }
 
 /************************************************************************
 Frees the global purge system control structure. */
-UNIV_INTERN
 void
 trx_purge_sys_close(void)
 /*======================*/
@@ -173,20 +284,24 @@ trx_purge_sys_close(void)
 
 	purge_sys->sess = NULL;
 
-	purge_sys->view = NULL;
+	purge_sys->view.close();
+	purge_sys->view.~ReadView();
 
 	rw_lock_free(&purge_sys->latch);
-	mutex_free(&purge_sys->bh_mutex);
+	mutex_free(&purge_sys->pq_mutex);
 
-	mem_heap_free(purge_sys->heap);
+	if (purge_sys->purge_queue != NULL) {
+		UT_DELETE(purge_sys->purge_queue);
+		purge_sys->purge_queue = NULL;
+	}
 
-	ib_bh_free(purge_sys->ib_bh);
-
-	os_event_free(purge_sys->event);
+	os_event_destroy(purge_sys->event);
 
 	purge_sys->event = NULL;
 
-	mem_free(purge_sys);
+	UT_DELETE(purge_sys->rseg_iter);
+
+	ut_free(purge_sys);
 
 	purge_sys = NULL;
 }
@@ -196,25 +311,29 @@ trx_purge_sys_close(void)
 /********************************************************************//**
 Adds the update undo log as the first log in the history list. Removes the
 update undo log segment from the rseg slot if it is too big for reuse. */
-UNIV_INTERN
 void
 trx_purge_add_update_undo_to_history(
 /*=================================*/
-	trx_t*	trx,		/*!< in: transaction */
-	page_t*	undo_page,	/*!< in: update undo log header page,
-				x-latched */
-	mtr_t*	mtr)		/*!< in: mtr */
+	trx_t*		trx,		/*!< in: transaction */
+	trx_undo_ptr_t*	undo_ptr,	/*!< in/out: update undo log. */
+	page_t*		undo_page,	/*!< in: update undo log header page,
+					x-latched */
+	bool		update_rseg_history_len,
+					/*!< in: if true: update rseg history
+					len else skip updating it. */
+	ulint		n_added_logs,	/*!< in: number of logs added */
+	mtr_t*		mtr)		/*!< in: mtr */
 {
 	trx_undo_t*	undo;
 	trx_rseg_t*	rseg;
 	trx_rsegf_t*	rseg_header;
 	trx_ulogf_t*	undo_header;
 
-	undo = trx->update_undo;
+	undo = undo_ptr->update_undo;
 	rseg = undo->rseg;
 
 	rseg_header = trx_rsegf_get(
-		undo->rseg->space, undo->rseg->zip_size, undo->rseg->page_no,
+		undo->rseg->space, undo->rseg->page_no, undo->rseg->page_size,
 		mtr);
 
 	undo_header = undo_page + undo->hdr_offset;
@@ -228,10 +347,7 @@ trx_purge_add_update_undo_to_history(
 		/* The undo log segment will not be reused */
 
 		if (UNIV_UNLIKELY(undo->id >= TRX_RSEG_N_SLOTS)) {
-			fprintf(stderr,
-				"InnoDB: Error: undo->id is %lu\n",
-				(ulong) undo->id);
-			ut_error;
+			ib::fatal() << "undo->id is " << undo->id;
 		}
 
 		trx_rsegf_set_nth_undo(rseg_header, undo->id, FIL_NULL, mtr);
@@ -242,26 +358,24 @@ trx_purge_add_update_undo_to_history(
 			rseg_header + TRX_RSEG_HISTORY_SIZE, MLOG_4BYTES, mtr);
 
 		ut_ad(undo->size == flst_get_len(
-			      seg_header + TRX_UNDO_PAGE_LIST, mtr));
+			      seg_header + TRX_UNDO_PAGE_LIST));
 
 		mlog_write_ulint(
 			rseg_header + TRX_RSEG_HISTORY_SIZE,
 			hist_size + undo->size, MLOG_4BYTES, mtr);
 	}
 
+	ut_ad(!trx_commit_disallowed);
+
 	/* Add the log as the first in the history list */
 	flst_add_first(rseg_header + TRX_RSEG_HISTORY,
 		       undo_header + TRX_UNDO_HISTORY_NODE, mtr);
 
-#ifdef HAVE_ATOMIC_BUILTINS
-	os_atomic_increment_ulint(&trx_sys->rseg_history_len, 1);
-#else
-	mutex_enter(&trx_sys->mutex);
-	++trx_sys->rseg_history_len;
-	mutex_exit(&trx_sys->mutex);
-#endif /* HAVE_ATOMIC_BUILTINS */
-
-	srv_wake_purge_thread_if_not_active();
+	if (update_rseg_history_len) {
+		os_atomic_increment_ulint(
+			&trx_sys->rseg_history_len, n_added_logs);
+		srv_wake_purge_thread_if_not_active();
+	}
 
 	/* Write the trx number to the undo log header */
 	mlog_write_ull(undo_header + TRX_UNDO_TRX_NO, trx->no, mtr);
@@ -290,9 +404,10 @@ trx_purge_free_segment(
 /*===================*/
 	trx_rseg_t*	rseg,		/*!< in: rollback segment */
 	fil_addr_t	hdr_addr,	/*!< in: the file address of log_hdr */
-	ulint		n_removed_logs)	/*!< in: count of how many undo logs we
+	ulint		n_removed_logs,	/*!< in: count of how many undo logs we
 					will cut off from the end of the
 					history list */
+	bool		noredo)		/*!< in: skip redo logging */
 {
 	mtr_t		mtr;
 	trx_rsegf_t*	rseg_hdr;
@@ -300,22 +415,25 @@ trx_purge_free_segment(
 	trx_usegf_t*	seg_hdr;
 	ulint		seg_size;
 	ulint		hist_size;
-	ibool		marked		= FALSE;
-
-	/*	fputs("Freeing an update undo log segment\n", stderr); */
+	bool		marked		= noredo;
 
 	for (;;) {
 		page_t*	undo_page;
 
 		mtr_start(&mtr);
+		if (noredo) {
+			mtr.set_log_mode(MTR_LOG_NO_REDO);
+		}
+		ut_ad(noredo == trx_sys_is_noredo_rseg_slot(rseg->id));
 
 		mutex_enter(&rseg->mutex);
 
 		rseg_hdr = trx_rsegf_get(
-			rseg->space, rseg->zip_size, rseg->page_no, &mtr);
+			rseg->space, rseg->page_no, rseg->page_size, &mtr);
 
 		undo_page = trx_undo_page_get(
-			rseg->space, rseg->zip_size, hdr_addr.page, &mtr);
+			page_id_t(rseg->space, hdr_addr.page), rseg->page_size,
+			&mtr);
 
 		seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
 		log_hdr = undo_page + hdr_addr.boffset;
@@ -327,15 +445,14 @@ trx_purge_free_segment(
 		not try to access them again. */
 
 		if (!marked) {
+			marked = true;
 			mlog_write_ulint(
 				log_hdr + TRX_UNDO_DEL_MARKS, FALSE,
 				MLOG_2BYTES, &mtr);
-
-			marked = TRUE;
 		}
 
 		if (fseg_free_step_not_header(
-			seg_hdr + TRX_UNDO_FSEG_HEADER, &mtr)) {
+			    seg_hdr + TRX_UNDO_FSEG_HEADER, false, &mtr)) {
 
 			break;
 		}
@@ -349,7 +466,7 @@ trx_purge_free_segment(
 	stored in the list base node tells us how big it was before we
 	started the freeing. */
 
-	seg_size = flst_get_len(seg_hdr + TRX_UNDO_PAGE_LIST, &mtr);
+	seg_size = flst_get_len(seg_hdr + TRX_UNDO_PAGE_LIST);
 
 	/* We may free the undo log segment header page; it must be freed
 	within the same mtr as the undo log header is removed from the
@@ -359,13 +476,7 @@ trx_purge_free_segment(
 	flst_cut_end(rseg_hdr + TRX_RSEG_HISTORY,
 		     log_hdr + TRX_UNDO_HISTORY_NODE, n_removed_logs, &mtr);
 
-#ifdef HAVE_ATOMIC_BUILTINS
 	os_atomic_decrement_ulint(&trx_sys->rseg_history_len, n_removed_logs);
-#else
-	mutex_enter(&trx_sys->mutex);
-	trx_sys->rseg_history_len -= n_removed_logs;
-	mutex_exit(&trx_sys->mutex);
-#endif /* HAVE_ATOMIC_BUILTINS */
 
 	do {
 
@@ -374,7 +485,7 @@ trx_purge_free_segment(
 		is not flooded with bufferfixed pages: see the note in
 		fsp0fsp.cc. */
 
-	} while(!fseg_free_step(seg_hdr + TRX_UNDO_FSEG_HEADER, &mtr));
+	} while (!fseg_free_step(seg_hdr + TRX_UNDO_FSEG_HEADER, false, &mtr));
 
 	hist_size = mtr_read_ulint(rseg_hdr + TRX_RSEG_HISTORY_SIZE,
 				   MLOG_4BYTES, &mtr);
@@ -410,12 +521,17 @@ trx_purge_truncate_rseg_history(
 	ulint		n_removed_logs	= 0;
 	mtr_t		mtr;
 	trx_id_t	undo_trx_no;
+	const bool	noredo		= trx_sys_is_noredo_rseg_slot(
+		rseg->id);
 
 	mtr_start(&mtr);
+	if (noredo) {
+		mtr.set_log_mode(MTR_LOG_NO_REDO);
+	}
 	mutex_enter(&(rseg->mutex));
 
-	rseg_hdr = trx_rsegf_get(rseg->space, rseg->zip_size,
-				 rseg->page_no, &mtr);
+	rseg_hdr = trx_rsegf_get(rseg->space, rseg->page_no,
+				 rseg->page_size, &mtr);
 
 	hdr_addr = trx_purge_get_log_from_hist(
 		flst_get_last(rseg_hdr + TRX_RSEG_HISTORY, &mtr));
@@ -429,8 +545,8 @@ loop:
 		return;
 	}
 
-	undo_page = trx_undo_page_get(rseg->space, rseg->zip_size,
-				      hdr_addr.page, &mtr);
+	undo_page = trx_undo_page_get(page_id_t(rseg->space, hdr_addr.page),
+				      rseg->page_size, &mtr);
 
 	log_hdr = undo_page + hdr_addr.boffset;
 
@@ -438,21 +554,19 @@ loop:
 
 	if (undo_trx_no >= limit->trx_no) {
 
-		if (undo_trx_no == limit->trx_no) {
+		/* limit space_id should match the rollback segment
+		space id to avoid freeing of the page belongs to
+		different rollback segment for the same trx_no. */
+		if (undo_trx_no == limit->trx_no
+		    && rseg->space == limit->undo_rseg_space) {
 
 			trx_undo_truncate_start(
-				rseg, rseg->space, hdr_addr.page,
+				rseg, hdr_addr.page,
 				hdr_addr.boffset, limit->undo_no);
 		}
 
-#ifdef HAVE_ATOMIC_BUILTINS
 		os_atomic_decrement_ulint(
 			&trx_sys->rseg_history_len, n_removed_logs);
-#else
-		mutex_enter(&trx_sys->mutex);
-		trx_sys->rseg_history_len -= n_removed_logs;
-		mutex_exit(&trx_sys->mutex);
-#endif /* HAVE_ATOMIC_BUILTINS */
 
 		flst_truncate_end(rseg_hdr + TRX_RSEG_HISTORY,
 				  log_hdr + TRX_UNDO_HISTORY_NODE,
@@ -478,7 +592,7 @@ loop:
 		mutex_exit(&(rseg->mutex));
 		mtr_commit(&mtr);
 
-		trx_purge_free_segment(rseg, hdr_addr, n_removed_logs);
+		trx_purge_free_segment(rseg, hdr_addr, n_removed_logs, noredo);
 
 		n_removed_logs = 0;
 	} else {
@@ -487,16 +601,582 @@ loop:
 	}
 
 	mtr_start(&mtr);
+	if (noredo) {
+		mtr.set_log_mode(MTR_LOG_NO_REDO);
+	}
 	mutex_enter(&(rseg->mutex));
 
-	rseg_hdr = trx_rsegf_get(rseg->space, rseg->zip_size,
-				 rseg->page_no, &mtr);
+	rseg_hdr = trx_rsegf_get(rseg->space, rseg->page_no,
+				 rseg->page_size, &mtr);
 
 	hdr_addr = prev_hdr_addr;
 
 	goto loop;
 }
 
+/** UNDO log truncate logger. Needed to track state of truncate during crash.
+An auxiliary redo log file undo_<space_id>_trunc.log will created while the
+truncate of the UNDO is in progress. This file is required during recovery
+to complete the truncate. */
+
+namespace undo {
+
+	/** Populate log file name based on space_id
+	@param[in]	space_id	id of the undo tablespace.
+	@return DB_SUCCESS or error code */
+	dberr_t populate_log_file_name(
+		ulint	space_id,
+		char*&	log_file_name)
+	{
+		ulint log_file_name_sz =
+			strlen(srv_log_group_home_dir) + 22 + 1 /* NUL */
+			+ strlen(undo::s_log_prefix)
+			+ strlen(undo::s_log_ext);
+
+		log_file_name = new (std::nothrow) char[log_file_name_sz];
+		if (log_file_name == 0) {
+			return(DB_OUT_OF_MEMORY);
+		}
+
+		memset(log_file_name, 0, log_file_name_sz);
+
+		strcpy(log_file_name, srv_log_group_home_dir);
+		ulint	log_file_name_len = strlen(log_file_name);
+
+		if (log_file_name[log_file_name_len - 1]
+				!= OS_PATH_SEPARATOR) {
+
+			log_file_name[log_file_name_len]
+				= OS_PATH_SEPARATOR;
+			log_file_name_len = strlen(log_file_name);
+		}
+
+		ut_snprintf(log_file_name + log_file_name_len,
+			    log_file_name_sz - log_file_name_len,
+			    "%s%lu_%s", undo::s_log_prefix,
+			    (ulong) space_id, s_log_ext);
+
+		return(DB_SUCCESS);
+	}
+
+	/** Create the truncate log file.
+	@param[in]	space_id	id of the undo tablespace to truncate.
+	@return DB_SUCCESS or error code. */
+	dberr_t init(ulint space_id)
+	{
+		dberr_t		err;
+		char*		log_file_name;
+
+		/* Step-1: Create the log file name using the pre-decided
+		prefix/suffix and table id of undo tablepsace to truncate. */
+		err = populate_log_file_name(space_id, log_file_name);
+		if (err != DB_SUCCESS) {
+			return(err);
+		}
+
+		/* Step-2: Create the log file, open it and write 0 to
+		indicate init phase. */
+		bool            ret;
+		os_file_t	handle = os_file_create(
+			innodb_log_file_key, log_file_name, OS_FILE_CREATE,
+			OS_FILE_NORMAL, OS_LOG_FILE, srv_read_only_mode, &ret);
+		if (!ret) {
+			delete[] log_file_name;
+			return(DB_IO_ERROR);
+		}
+
+		ulint	sz = UNIV_PAGE_SIZE;
+		void*	buf = ut_zalloc_nokey(sz + UNIV_PAGE_SIZE);
+		if (buf == NULL) {
+			os_file_close(handle);
+			delete[] log_file_name;
+			return(DB_OUT_OF_MEMORY);
+		}
+
+		byte*	log_buf = static_cast<byte*>(
+			ut_align(buf, UNIV_PAGE_SIZE));
+
+		IORequest	request(IORequest::WRITE);
+
+		request.disable_compression();
+
+		err = os_file_write(
+			request, log_file_name, handle, log_buf, 0, sz);
+
+		os_file_flush(handle);
+		os_file_close(handle);
+
+		ut_free(buf);
+		delete[] log_file_name;
+
+		return(err);
+	}
+
+	/** Mark completion of undo truncate action by writing magic number to
+	the log file and then removing it from the disk.
+	If we are going to remove it from disk then why write magic number ?
+	This is to safeguard from unlink (file-system) anomalies that will keep
+	the link to the file even after unlink action is successfull and
+	ref-count = 0.
+	@param[in]	space_id	id of the undo tablespace to truncate.*/
+	void done(
+		ulint	space_id)
+	{
+		dberr_t		err;
+		char*		log_file_name;
+
+		/* Step-1: Create the log file name using the pre-decided
+		prefix/suffix and table id of undo tablepsace to truncate. */
+		err = populate_log_file_name(space_id, log_file_name);
+		if (err != DB_SUCCESS) {
+			return;
+		}
+
+		/* Step-2: Open log file and write magic number to
+		indicate done phase. */
+		bool    ret;
+		os_file_t	handle =
+			os_file_create_simple_no_error_handling(
+				innodb_log_file_key, log_file_name,
+				OS_FILE_OPEN, OS_FILE_READ_WRITE,
+				srv_read_only_mode, &ret);
+
+		if (!ret) {
+			os_file_delete(innodb_log_file_key, log_file_name);
+			delete[] log_file_name;
+			return;
+		}
+
+		ulint	sz = UNIV_PAGE_SIZE;
+		void*	buf = ut_zalloc_nokey(sz + UNIV_PAGE_SIZE);
+		if (buf == NULL) {
+			os_file_close(handle);
+			os_file_delete(innodb_log_file_key, log_file_name);
+			delete[] log_file_name;
+			return;
+		}
+
+		byte*	log_buf = static_cast<byte*>(
+			ut_align(buf, UNIV_PAGE_SIZE));
+
+		mach_write_to_4(log_buf, undo::s_magic);
+
+		IORequest	request(IORequest::WRITE);
+
+		request.disable_compression();
+
+		err = os_file_write(
+			request, log_file_name, handle, log_buf, 0, sz);
+
+		ut_ad(err == DB_SUCCESS);
+
+		os_file_flush(handle);
+		os_file_close(handle);
+
+		ut_free(buf);
+		os_file_delete(innodb_log_file_key, log_file_name);
+		delete[] log_file_name;
+	}
+
+	/** Check if TRUNCATE_DDL_LOG file exist.
+	@param[in]	space_id	id of the undo tablespace.
+	@return true if exist else false. */
+	bool is_log_present(
+		ulint	space_id)
+	{
+		dberr_t		err;
+		char*		log_file_name;
+
+		/* Step-1: Populate log file name. */
+		err = populate_log_file_name(space_id, log_file_name);
+		if (err != DB_SUCCESS) {
+			return(false);
+		}
+
+		/* Step-2: Check for existence of the file. */
+		bool		exist;
+		os_file_type_t	type;
+		os_file_status(log_file_name, &exist, &type);
+
+		/* Step-3: If file exists, check it for presence of magic
+		number.  If found, then delete the file and report file
+		doesn't exist as presence of magic number suggest that
+		truncate action was complete. */
+
+		if (exist) {
+			bool    ret;
+			os_file_t	handle =
+				os_file_create_simple_no_error_handling(
+					innodb_log_file_key, log_file_name,
+					OS_FILE_OPEN, OS_FILE_READ_WRITE,
+					srv_read_only_mode, &ret);
+			if (!ret) {
+				os_file_delete(innodb_log_file_key,
+					       log_file_name);
+				delete[] log_file_name;
+				return(false);
+			}
+
+			ulint	sz = UNIV_PAGE_SIZE;
+			void*	buf = ut_zalloc_nokey(sz + UNIV_PAGE_SIZE);
+			if (buf == NULL) {
+				os_file_close(handle);
+				os_file_delete(innodb_log_file_key,
+					       log_file_name);
+				delete[] log_file_name;
+				return(false);
+			}
+
+			byte*	log_buf = static_cast<byte*>(
+				ut_align(buf, UNIV_PAGE_SIZE));
+
+			IORequest	request(IORequest::READ);
+
+			request.disable_compression();
+
+			dberr_t	err;
+
+			err = os_file_read(request, handle, log_buf, 0, sz);
+
+			os_file_close(handle);
+
+			if (err != DB_SUCCESS) {
+
+				ib::info()
+					<< "Unable to read '"
+					<< log_file_name << "' : "
+					<< ut_strerr(err);
+
+				os_file_delete(
+					innodb_log_file_key, log_file_name);
+
+				ut_free(buf);
+
+				delete[] log_file_name;
+
+				return(false);
+			}
+
+			ulint	magic_no = mach_read_from_4(log_buf);
+
+			ut_free(buf);
+
+			if (magic_no == undo::s_magic) {
+				/* Found magic number. */
+				os_file_delete(innodb_log_file_key,
+					       log_file_name);
+				delete[] log_file_name;
+				return(false);
+			}
+		}
+
+		delete[] log_file_name;
+
+		return(exist);
+	}
+};
+
+/** Iterate over all the UNDO tablespaces and check if any of the UNDO
+tablespace qualifies for TRUNCATE (size > threshold).
+@param[in,out]	undo_trunc	undo truncate tracker */
+static
+void
+trx_purge_mark_undo_for_truncate(
+	undo::Truncate*	undo_trunc)
+{
+	/* Step-1: If UNDO Tablespace
+		- already marked for truncate (OR)
+		- truncate disabled
+	return immediately else search for qualifying tablespace. */
+	if (undo_trunc->is_marked() || !srv_undo_log_truncate) {
+		return;
+	}
+
+	/* Step-2: Validation/Qualification checks
+	a. At-least 2 UNDO tablespaces so even if one UNDO tablespace
+	   is being truncated server can continue to operate.
+	b. At-least 2 UNDO redo rseg/undo logs (besides the default rseg-0)
+	b. At-least 1 UNDO tablespace size > threshold. */
+	if (srv_undo_tablespaces_active < 2
+	    || (srv_undo_logs < (1 + srv_tmp_undo_logs + 2))) {
+		return;
+	}
+
+	/* Avoid bias selection and so start the scan from immediate next
+	of last selected UNDO tablespace for truncate. */
+	ulint space_id = undo_trunc->get_scan_start();
+
+	for (ulint i = 1; i <= srv_undo_tablespaces_active; i++) {
+
+		if (fil_space_get_size(space_id)
+		    > (srv_max_undo_log_size / srv_page_size)) {
+			/* Tablespace qualifies for truncate. */
+			undo_trunc->mark(space_id);
+			undo::Truncate::add_space_to_trunc_list(space_id);
+			break;
+		}
+
+		space_id = ((space_id + 1) % (srv_undo_tablespaces_active + 1));
+		if (space_id == 0) {
+			/* Note: UNDO tablespace ids starts from 1. */
+			++space_id;
+		}
+	}
+
+	/* Couldn't make any selection. */
+	if (!undo_trunc->is_marked()) {
+		return;
+	}
+
+#ifdef UNIV_DEBUG
+	ib::info() << "UNDO tablespace with space identifier "
+		<< undo_trunc->get_marked_space_id() << " marked for truncate";
+#endif /* UNIV_DEBUG */
+
+	/* Step-3: Iterate over all the rsegs of selected UNDO tablespace
+	and mark them temporarily unavailable for allocation.*/
+	for (ulint i = 0; i < TRX_SYS_N_RSEGS; ++i) {
+		trx_rseg_t*	rseg = trx_sys->rseg_array[i];
+
+		if (rseg != NULL && !trx_sys_is_noredo_rseg_slot(rseg->id)) {
+			if (rseg->space
+				== undo_trunc->get_marked_space_id()) {
+
+				/* Once set this rseg will not be allocated
+				to new booting transaction but we will wait
+				for existing active transaction to finish. */
+				rseg->skip_allocation = true;
+				undo_trunc->add_rseg_to_trunc(rseg);
+			}
+		}
+	}
+}
+
+undo::undo_spaces_t	undo::Truncate::s_spaces_to_truncate;
+
+/** Cleanse purge queue to remove the rseg that reside in undo-tablespace
+marked for truncate.
+@param[in,out]	undo_trunc	undo truncate tracker */
+static
+void
+trx_purge_cleanse_purge_queue(
+	undo::Truncate*	undo_trunc)
+{
+	mutex_enter(&purge_sys->pq_mutex);
+	typedef	std::vector<TrxUndoRsegs>	purge_elem_list_t;
+	purge_elem_list_t			purge_elem_list;
+
+	/* Remove rseg instances that are in the purge queue before we start
+	truncate of corresponding UNDO truncate. */
+	while (!purge_sys->purge_queue->empty()) {
+		purge_elem_list.push_back(purge_sys->purge_queue->top());
+		purge_sys->purge_queue->pop();
+	}
+	ut_ad(purge_sys->purge_queue->empty());
+
+	for (purge_elem_list_t::iterator it = purge_elem_list.begin();
+	     it != purge_elem_list.end();
+	     ++it) {
+
+		for (TrxUndoRsegs::iterator it2 = it->begin();
+		     it2 != it->end();
+		     ++it2) {
+
+			if ((*it2)->space
+				== undo_trunc->get_marked_space_id()) {
+				it->erase(it2);
+				break;
+			}
+		}
+
+		const ulint	size = it->size();
+		if (size != 0) {
+			/* size != 0 suggest that there exist other rsegs that
+			needs processing so add this element to purge queue.
+			Note: Other rseg could be non-redo rsegs. */
+			purge_sys->purge_queue->push(*it);
+		}
+	}
+	mutex_exit(&purge_sys->pq_mutex);
+}
+
+/** Iterate over selected UNDO tablespace and check if all the rsegs
+that resides in the tablespace are free.
+@param[in]	limit		truncate_limit
+@param[in,out]	undo_trunc	undo truncate tracker */
+static
+void
+trx_purge_initiate_truncate(
+	purge_iter_t*	limit,
+	undo::Truncate*	undo_trunc)
+{
+	/* Step-1: Early check to findout if any of the the UNDO tablespace
+	is marked for truncate. */
+	if (!undo_trunc->is_marked()) {
+		/* No tablespace marked for truncate yet. */
+		return;
+	}
+
+	/* Step-2: Scan over each rseg and ensure that it doesn't hold any
+	active undo records. */
+	bool all_free = true;
+
+	for (ulint i = 0; i < undo_trunc->rsegs_size() && all_free; ++i) {
+
+		trx_rseg_t*	rseg = undo_trunc->get_ith_rseg(i);
+
+		mutex_enter(&rseg->mutex);
+
+		if (rseg->trx_ref_count > 0) {
+			/* This rseg is still being held by an active
+			transaction. */
+			all_free = false;
+			mutex_exit(&rseg->mutex);
+			continue;
+		}
+
+		ut_ad(rseg->trx_ref_count == 0);
+		ut_ad(rseg->skip_allocation);
+
+		ulint	size_of_rsegs = rseg->curr_size;
+
+		if (size_of_rsegs == 1) {
+			mutex_exit(&rseg->mutex);
+			continue;
+		} else {
+
+			/* There could be cached undo segment. Check if records
+			in these segments can be purged. Normal purge history
+			will not touch these cached segment. */
+			ulint		cached_undo_size = 0;
+
+			for (trx_undo_t* undo =
+				UT_LIST_GET_FIRST(rseg->update_undo_cached);
+			     undo != NULL && all_free;
+			     undo = UT_LIST_GET_NEXT(undo_list, undo)) {
+
+				if (limit->trx_no < undo->trx_id) {
+					all_free = false;
+				} else {
+					cached_undo_size += undo->size;
+				}
+			}
+
+			for (trx_undo_t* undo =
+				UT_LIST_GET_FIRST(rseg->insert_undo_cached);
+			     undo != NULL && all_free;
+			     undo = UT_LIST_GET_NEXT(undo_list, undo)) {
+
+				if (limit->trx_no < undo->trx_id) {
+					all_free = false;
+				} else {
+					cached_undo_size += undo->size;
+				}
+			}
+
+			ut_ad(size_of_rsegs >= (cached_undo_size + 1));
+
+			if (size_of_rsegs > (cached_undo_size + 1)) {
+				/* There are pages besides cached pages that
+				still hold active data. */
+				all_free = false;
+			}
+		}
+
+		mutex_exit(&rseg->mutex);
+	}
+
+	if (!all_free) {
+		/* rseg still holds active data.*/
+		return;
+	}
+
+
+	/* Step-3: Start the actual truncate.
+	a. log-checkpoint
+	b. Write the DDL log to protect truncate action from CRASH
+	c. Remove rseg instance if added to purge queue before we
+	   initiate truncate.
+	d. Execute actual truncate
+	e. Remove the DDL log. */
+	DBUG_EXECUTE_IF("ib_undo_trunc_before_checkpoint",
+			ib::info() << "ib_undo_trunc_before_checkpoint";
+			DBUG_SUICIDE(););
+
+	/* After truncate if server crashes then redo logging done for this
+	undo tablespace might not stand valid as tablespace has been
+	truncated. */
+	log_make_checkpoint_at(LSN_MAX, TRUE);
+
+	ib::info() << "Truncating UNDO tablespace with space identifier "
+		<< undo_trunc->get_marked_space_id();
+
+	DBUG_EXECUTE_IF("ib_undo_trunc_before_ddl_log_start",
+			ib::info() << "ib_undo_trunc_before_ddl_log_start";
+			DBUG_SUICIDE(););
+
+#ifdef UNIV_DEBUG
+	dberr_t	err =
+#endif /* UNIV_DEBUG */
+		undo_trunc->start_logging(
+			undo_trunc->get_marked_space_id());
+	ut_ad(err == DB_SUCCESS);
+
+	DBUG_EXECUTE_IF("ib_undo_trunc_before_truncate",
+			ib::info() << "ib_undo_trunc_before_truncate";
+			DBUG_SUICIDE(););
+
+	trx_purge_cleanse_purge_queue(undo_trunc);
+
+	bool	success = trx_undo_truncate_tablespace(undo_trunc);
+	if (!success) {
+		/* Note: In case of error we don't enable the rsegs
+		and neither unmark the tablespace so the tablespace
+		continue to remain inactive. */
+		ib::error() << "Failed to truncate UNDO tablespace with"
+			" space identifier "
+			<< undo_trunc->get_marked_space_id();
+		return;
+	}
+
+	if (purge_sys->rseg != NULL
+	    && purge_sys->rseg->last_page_no == FIL_NULL) {
+		/* If purge_sys->rseg is pointing to rseg that was recently
+		truncated then move to next rseg element.
+		Note: Ideally purge_sys->rseg should be NULL because purge
+		should complete processing of all the records but there is
+		purge_batch_size that can force the purge loop to exit before
+		all the records are purged and in this case purge_sys->rseg
+		could point to a valid rseg waiting for next purge cycle. */
+		purge_sys->next_stored = FALSE;
+		purge_sys->rseg = NULL;
+	}
+
+	DBUG_EXECUTE_IF("ib_undo_trunc_before_ddl_log_end",
+			ib::info() << "ib_undo_trunc_before_ddl_log_end";
+			DBUG_SUICIDE(););
+
+	log_make_checkpoint_at(LSN_MAX, TRUE);
+
+	undo_trunc->done_logging(undo_trunc->get_marked_space_id());
+
+	/* Completed truncate. Now it is safe to re-use the tablespace. */
+	for (ulint i = 0; i < undo_trunc->rsegs_size(); ++i) {
+		trx_rseg_t*	rseg = undo_trunc->get_ith_rseg(i);
+		rseg->skip_allocation = false;
+	}
+
+	ib::info() << "Completed truncate of UNDO tablespace with space"
+		" identifier " << undo_trunc->get_marked_space_id();
+
+	undo_trunc->reset();
+	undo::Truncate::clear_trunc_list();
+
+	DBUG_EXECUTE_IF("ib_undo_trunc_trunc_done",
+			ib::info() << "ib_undo_trunc_trunc_done";
+			DBUG_SUICIDE(););
+}
+
 /********************************************************************//**
 Removes unnecessary history data from rollback segments. NOTE that when this
 function is called, the caller must not have any latches on undo log pages! */
@@ -505,19 +1185,20 @@ void
 trx_purge_truncate_history(
 /*========================*/
 	purge_iter_t*		limit,		/*!< in: truncate limit */
-	const read_view_t*	view)		/*!< in: purge view */
+	const ReadView*		view)		/*!< in: purge view */
 {
 	ulint		i;
 
 	/* We play safe and set the truncate limit at most to the purge view
 	low_limit number, though this is not necessary */
 
-	if (limit->trx_no >= view->low_limit_no) {
-		limit->trx_no = view->low_limit_no;
+	if (limit->trx_no >= view->low_limit_no()) {
+		limit->trx_no = view->low_limit_no();
 		limit->undo_no = 0;
+		limit->undo_rseg_space = ULINT_UNDEFINED;
 	}
 
-	ut_ad(limit->trx_no <= purge_sys->view->low_limit_no);
+	ut_ad(limit->trx_no <= purge_sys->view.low_limit_no());
 
 	for (i = 0; i < TRX_SYS_N_RSEGS; ++i) {
 		trx_rseg_t*	rseg = trx_sys->rseg_array[i];
@@ -527,6 +1208,24 @@ trx_purge_truncate_history(
 			trx_purge_truncate_rseg_history(rseg, limit);
 		}
 	}
+
+	for (i = 0; i < TRX_SYS_N_RSEGS; ++i) {
+		trx_rseg_t*	rseg = trx_sys->pending_purge_rseg_array[i];
+
+		if (rseg != NULL) {
+			ut_a(rseg->id == i);
+			trx_purge_truncate_rseg_history(rseg, limit);
+		}
+	}
+
+	/* UNDO tablespace truncate. We will try to truncate as much as we
+	can (greedy approach). This will ensure when the server is idle we
+	try and truncate all the UNDO tablespaces. */
+	ulint	nchances = srv_undo_tablespaces_active;
+	for (i = 0; i < nchances; i++) {
+		trx_purge_mark_undo_for_truncate(&purge_sys->undo_trunc);
+		trx_purge_initiate_truncate(limit, &purge_sys->undo_trunc);
+	}
 }
 
 /***********************************************************************//**
@@ -540,14 +1239,12 @@ trx_purge_rseg_get_next_history_log(
 	ulint*		n_pages_handled)/*!< in/out: number of UNDO pages
 					handled */
 {
-	const void*	ptr;
 	page_t*		undo_page;
 	trx_ulogf_t*	log_hdr;
 	fil_addr_t	prev_log_addr;
 	trx_id_t	trx_no;
 	ibool		del_marks;
 	mtr_t		mtr;
-	rseg_queue_t	rseg_queue;
 
 	mutex_enter(&(rseg->mutex));
 
@@ -555,12 +1252,14 @@ trx_purge_rseg_get_next_history_log(
 
 	purge_sys->iter.trx_no = rseg->last_trx_no + 1;
 	purge_sys->iter.undo_no = 0;
+	purge_sys->iter.undo_rseg_space = ULINT_UNDEFINED;
 	purge_sys->next_stored = FALSE;
 
 	mtr_start(&mtr);
 
 	undo_page = trx_undo_page_get_s_latched(
-		rseg->space, rseg->zip_size, rseg->last_page_no, &mtr);
+		page_id_t(rseg->space, rseg->last_page_no),
+		rseg->page_size, &mtr);
 
 	log_hdr = undo_page + rseg->last_offset;
 
@@ -579,7 +1278,7 @@ trx_purge_rseg_get_next_history_log(
 		mutex_exit(&(rseg->mutex));
 		mtr_commit(&mtr);
 
-		mutex_enter(&trx_sys->mutex);
+		trx_sys_mutex_enter();
 
 		/* Add debug code to track history list corruption reported
 		on the MySQL mailing list on Nov 9, 2004. The fut0lst.cc
@@ -590,19 +1289,15 @@ trx_purge_rseg_get_next_history_log(
 		list cannot be longer than 2000 000 undo logs now. */
 
 		if (trx_sys->rseg_history_len > 2000000) {
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				"  InnoDB: Warning: purge reached the"
-				" head of the history list,\n"
-				"InnoDB: but its length is still"
-				" reported as %lu! Make a detailed bug\n"
-				"InnoDB: report, and submit it"
-				" to http://bugs.mysql.com\n",
-				(ulong) trx_sys->rseg_history_len);
+			ib::warn() << "Purge reached the head of the history"
+				" list, but its length is still reported as "
+				<< trx_sys->rseg_history_len << "! Make"
+				" a detailed bug report, and submit it to"
+				" http://bugs.mysql.com";
 			ut_ad(0);
 		}
 
-		mutex_exit(&trx_sys->mutex);
+		trx_sys_mutex_exit();
 
 		return;
 	}
@@ -614,8 +1309,9 @@ trx_purge_rseg_get_next_history_log(
 	/* Read the trx number and del marks from the previous log header */
 	mtr_start(&mtr);
 
-	log_hdr = trx_undo_page_get_s_latched(rseg->space, rseg->zip_size,
-					      prev_log_addr.page, &mtr)
+	log_hdr = trx_undo_page_get_s_latched(page_id_t(rseg->space,
+							prev_log_addr.page),
+					      rseg->page_size, &mtr)
 		+ prev_log_addr.boffset;
 
 	trx_no = mach_read_from_8(log_hdr + TRX_UNDO_TRX_NO);
@@ -631,94 +1327,36 @@ trx_purge_rseg_get_next_history_log(
 	rseg->last_trx_no = trx_no;
 	rseg->last_del_marks = del_marks;
 
-	rseg_queue.rseg = rseg;
-	rseg_queue.trx_no = rseg->last_trx_no;
+	TrxUndoRsegs elem(rseg->last_trx_no);
+	elem.push_back(rseg);
 
 	/* Purge can also produce events, however these are already ordered
 	in the rollback segment and any user generated event will be greater
 	than the events that Purge produces. ie. Purge can never produce
 	events from an empty rollback segment. */
 
-	mutex_enter(&purge_sys->bh_mutex);
+	mutex_enter(&purge_sys->pq_mutex);
 
-	ptr = ib_bh_push(purge_sys->ib_bh, &rseg_queue);
-	ut_a(ptr != NULL);
+	purge_sys->purge_queue->push(elem);
 
-	mutex_exit(&purge_sys->bh_mutex);
+	mutex_exit(&purge_sys->pq_mutex);
 
 	mutex_exit(&rseg->mutex);
 }
 
-/***********************************************************************//**
-Chooses the rollback segment with the smallest trx_id.
-@return zip_size if log is for a compressed table, ULINT_UNDEFINED if
-	no rollback segments to purge, 0 for non compressed tables. */
-static
-ulint
-trx_purge_get_rseg_with_min_trx_id(
-/*===============================*/
-	trx_purge_t*	purge_sys)		/*!< in/out: purge instance */
-
-{
-	ulint		zip_size = 0;
-
-	mutex_enter(&purge_sys->bh_mutex);
-
-	/* Only purge consumes events from the binary heap, user
-	threads only produce the events. */
-
-	if (!ib_bh_is_empty(purge_sys->ib_bh)) {
-		trx_rseg_t*	rseg;
-
-		rseg = ((rseg_queue_t*) ib_bh_first(purge_sys->ib_bh))->rseg;
-		ib_bh_pop(purge_sys->ib_bh);
-
-		mutex_exit(&purge_sys->bh_mutex);
-
-		purge_sys->rseg = rseg;
-	} else {
-		mutex_exit(&purge_sys->bh_mutex);
-
-		purge_sys->rseg = NULL;
-
-		return(ULINT_UNDEFINED);
-	}
-
-	ut_a(purge_sys->rseg != NULL);
-
-	mutex_enter(&purge_sys->rseg->mutex);
-
-	ut_a(purge_sys->rseg->last_page_no != FIL_NULL);
-
-	/* We assume in purge of externally stored fields that space id is
-	in the range of UNDO tablespace space ids */
-	ut_a(purge_sys->rseg->space <= srv_undo_tablespaces_open);
-
-	zip_size = purge_sys->rseg->zip_size;
-
-	ut_a(purge_sys->iter.trx_no <= purge_sys->rseg->last_trx_no);
-
-	purge_sys->iter.trx_no = purge_sys->rseg->last_trx_no;
-	purge_sys->hdr_offset = purge_sys->rseg->last_offset;
-	purge_sys->hdr_page_no = purge_sys->rseg->last_page_no;
-
-	mutex_exit(&purge_sys->rseg->mutex);
-
-	return(zip_size);
-}
-
-/***********************************************************************//**
-Position the purge sys "iterator" on the undo record to use for purging. */
+/** Position the purge sys "iterator" on the undo record to use for purging.
+@param[in,out]	purge_sys	purge instance
+@param[in]	page_size	page size */
 static
 void
 trx_purge_read_undo_rec(
-/*====================*/
-	trx_purge_t*	purge_sys,		/*!< in/out: purge instance */
-	ulint		zip_size)		/*!< in: block size or 0 */
+	trx_purge_t*		purge_sys,
+	const page_size_t&	page_size)
 {
 	ulint		offset;
 	ulint		page_no;
 	ib_uint64_t	undo_no;
+	ulint		undo_rseg_space;
 
 	purge_sys->hdr_offset = purge_sys->rseg->last_offset;
 	page_no = purge_sys->hdr_page_no = purge_sys->rseg->last_page_no;
@@ -731,28 +1369,32 @@ trx_purge_read_undo_rec(
 
 		undo_rec = trx_undo_get_first_rec(
 			purge_sys->rseg->space,
-			zip_size,
+			page_size,
 			purge_sys->hdr_page_no,
 			purge_sys->hdr_offset, RW_S_LATCH, &mtr);
 
 		if (undo_rec != NULL) {
 			offset = page_offset(undo_rec);
 			undo_no = trx_undo_rec_get_undo_no(undo_rec);
+			undo_rseg_space = purge_sys->rseg->space;
 			page_no = page_get_page_no(page_align(undo_rec));
 		} else {
 			offset = 0;
 			undo_no = 0;
+			undo_rseg_space = ULINT_UNDEFINED;
 		}
 
 		mtr_commit(&mtr);
 	} else {
 		offset = 0;
 		undo_no = 0;
+		undo_rseg_space = ULINT_UNDEFINED;
 	}
 
 	purge_sys->offset = offset;
 	purge_sys->page_no = page_no;
 	purge_sys->iter.undo_no = undo_no;
+	purge_sys->iter.undo_rseg_space = undo_rseg_space;
 
 	purge_sys->next_stored = TRUE;
 }
@@ -767,14 +1409,12 @@ void
 trx_purge_choose_next_log(void)
 /*===========================*/
 {
-	ulint		zip_size;
-
 	ut_ad(purge_sys->next_stored == FALSE);
 
-	zip_size = trx_purge_get_rseg_with_min_trx_id(purge_sys);
+	const page_size_t&	page_size = purge_sys->rseg_iter->set_next();
 
 	if (purge_sys->rseg != NULL) {
-		trx_purge_read_undo_rec(purge_sys, zip_size);
+		trx_purge_read_undo_rec(purge_sys, page_size);
 	} else {
 		/* There is nothing to do yet. */
 		os_thread_yield();
@@ -783,7 +1423,7 @@ trx_purge_choose_next_log(void)
 
 /***********************************************************************//**
 Gets the next record to purge and updates the info in the purge system.
-@return	copy of an undo log record or pointer to the dummy undo log record */
+@return copy of an undo log record or pointer to the dummy undo log record */
 static
 trx_undo_rec_t*
 trx_purge_get_next_rec(
@@ -800,17 +1440,17 @@ trx_purge_get_next_rec(
 	ulint		offset;
 	ulint		page_no;
 	ulint		space;
-	ulint		zip_size;
 	mtr_t		mtr;
 
 	ut_ad(purge_sys->next_stored);
-	ut_ad(purge_sys->iter.trx_no < purge_sys->view->low_limit_no);
+	ut_ad(purge_sys->iter.trx_no < purge_sys->view.low_limit_no());
 
 	space = purge_sys->rseg->space;
-	zip_size = purge_sys->rseg->zip_size;
 	page_no = purge_sys->page_no;
 	offset = purge_sys->offset;
 
+	const page_size_t	page_size(purge_sys->rseg->page_size);
+
 	if (offset == 0) {
 		/* It is the dummy undo log record, which means that there is
 		no need to purge this undo log */
@@ -827,7 +1467,8 @@ trx_purge_get_next_rec(
 
 	mtr_start(&mtr);
 
-	undo_page = trx_undo_page_get_s_latched(space, zip_size, page_no, &mtr);
+	undo_page = trx_undo_page_get_s_latched(page_id_t(space, page_no),
+						page_size, &mtr);
 
 	rec = undo_page + offset;
 
@@ -885,7 +1526,7 @@ trx_purge_get_next_rec(
 		mtr_start(&mtr);
 
 		undo_page = trx_undo_page_get_s_latched(
-			space, zip_size, page_no, &mtr);
+			page_id_t(space, page_no), page_size, &mtr);
 
 		rec = undo_page + offset;
 	} else {
@@ -894,6 +1535,7 @@ trx_purge_get_next_rec(
 		purge_sys->offset = rec2 - page;
 		purge_sys->page_no = page_get_page_no(page);
 		purge_sys->iter.undo_no = trx_undo_rec_get_undo_no(rec2);
+		purge_sys->iter.undo_rseg_space = space;
 
 		if (undo_page != page) {
 			/* We advance to a new page of the undo log: */
@@ -913,7 +1555,7 @@ Fetches the next undo log record from the history list to purge. It must be
 released with the corresponding release function.
 @return copy of an undo log record or pointer to trx_purge_dummy_rec,
 if the whole undo log can skipped in purge; NULL if none left */
-static MY_ATTRIBUTE((warn_unused_result, nonnull))
+static MY_ATTRIBUTE((warn_unused_result))
 trx_undo_rec_t*
 trx_purge_fetch_next_rec(
 /*=====================*/
@@ -926,18 +1568,13 @@ trx_purge_fetch_next_rec(
 		trx_purge_choose_next_log();
 
 		if (!purge_sys->next_stored) {
-
-			if (srv_print_thread_releases) {
-				fprintf(stderr,
-					"Purge: No logs left in the"
-					" history list\n");
-			}
-
+			DBUG_PRINT("ib_purge",
+				   ("no logs left in the history list"));
 			return(NULL);
 		}
 	}
 
-	if (purge_sys->iter.trx_no >= purge_sys->view->low_limit_no) {
+	if (purge_sys->iter.trx_no >= purge_sys->view.low_limit_no()) {
 
 		return(NULL);
 	}
@@ -957,14 +1594,13 @@ trx_purge_fetch_next_rec(
 
 /*******************************************************************//**
 This function runs a purge batch.
-@return	number of undo log pages handled in the batch */
+@return number of undo log pages handled in the batch */
 static
 ulint
 trx_purge_attach_undo_recs(
 /*=======================*/
 	ulint		n_purge_threads,/*!< in: number of purge threads */
 	trx_purge_t*	purge_sys,	/*!< in/out: purge instance */
-	purge_iter_t*	limit,		/*!< out: records read up to */
 	ulint		batch_size)	/*!< in: no. of pages to purge */
 {
 	que_thr_t*	thr;
@@ -974,7 +1610,7 @@ trx_purge_attach_undo_recs(
 
 	ut_a(n_purge_threads > 0);
 
-	*limit = purge_sys->iter;
+	purge_sys->limit = purge_sys->iter;
 
 	/* Debug code to validate some pre-requisites and reset done flag. */
 	for (thr = UT_LIST_GET_FIRST(purge_sys->query->thrs);
@@ -1022,11 +1658,8 @@ trx_purge_attach_undo_recs(
 		/* Track the max {trx_id, undo_no} for truncating the
 		UNDO logs once we have purged the records. */
 
-		if (purge_sys->iter.trx_no > limit->trx_no
-		    || (purge_sys->iter.trx_no == limit->trx_no
-			&& purge_sys->iter.undo_no >= limit->undo_no)) {
-
-			*limit = purge_sys->iter;
+		if (trx_purge_check_limit()) {
+			purge_sys->limit = purge_sys->iter;
 		}
 
 		/* Fetch the next record, and advance the purge_sys->iter. */
@@ -1118,35 +1751,17 @@ trx_purge_wait_for_workers_to_complete(
 {
 	ulint		n_submitted = purge_sys->n_submitted;
 
-#ifdef HAVE_ATOMIC_BUILTINS
 	/* Ensure that the work queue empties out. */
 	while (!os_compare_and_swap_ulint(
 			&purge_sys->n_completed, n_submitted, n_submitted)) {
-#else
-	mutex_enter(&purge_sys->bh_mutex);
-
-	while (purge_sys->n_completed < n_submitted) {
-#endif /* HAVE_ATOMIC_BUILTINS */
-
-#ifndef HAVE_ATOMIC_BUILTINS
-		mutex_exit(&purge_sys->bh_mutex);
-#endif /* !HAVE_ATOMIC_BUILTINS */
 
 		if (srv_get_task_queue_length() > 0) {
 			srv_release_threads(SRV_WORKER, 1);
 		}
 
 		os_thread_yield();
-
-#ifndef HAVE_ATOMIC_BUILTINS
-		mutex_enter(&purge_sys->bh_mutex);
-#endif /* !HAVE_ATOMIC_BUILTINS */
 	}
 
-#ifndef HAVE_ATOMIC_BUILTINS
-	mutex_exit(&purge_sys->bh_mutex);
-#endif /* !HAVE_ATOMIC_BUILTINS */
-
 	/* None of the worker threads should be doing any work. */
 	ut_a(purge_sys->n_submitted == purge_sys->n_completed);
 
@@ -1165,16 +1780,15 @@ trx_purge_truncate(void)
 	ut_ad(trx_purge_check_limit());
 
 	if (purge_sys->limit.trx_no == 0) {
-		trx_purge_truncate_history(&purge_sys->iter, purge_sys->view);
+		trx_purge_truncate_history(&purge_sys->iter, &purge_sys->view);
 	} else {
-		trx_purge_truncate_history(&purge_sys->limit, purge_sys->view);
+		trx_purge_truncate_history(&purge_sys->limit, &purge_sys->view);
 	}
 }
 
 /*******************************************************************//**
 This function runs a purge batch.
-@return	number of undo log pages handled in the batch */
-UNIV_INTERN
+@return number of undo log pages handled in the batch */
 ulint
 trx_purge(
 /*======*/
@@ -1196,11 +1810,11 @@ trx_purge(
 
 	rw_lock_x_lock(&purge_sys->latch);
 
-	purge_sys->view = NULL;
+	purge_sys->view_active = false;
 
-	mem_heap_empty(purge_sys->heap);
+	trx_sys->mvcc->clone_oldest_view(&purge_sys->view);
 
-	purge_sys->view = read_view_purge_open(purge_sys->heap);
+	purge_sys->view_active = true;
 
 	rw_lock_x_unlock(&purge_sys->latch);
 
@@ -1208,11 +1822,11 @@ trx_purge(
 	if (srv_purge_view_update_only_debug) {
 		return(0);
 	}
-#endif
+#endif /* UNIV_DEBUG */
 
 	/* Fetch the UNDO recs that need to be purged. */
 	n_pages_handled = trx_purge_attach_undo_recs(
-		n_purge_threads, purge_sys, &purge_sys->limit, batch_size);
+		n_purge_threads, purge_sys, batch_size);
 
 	/* Do we do an asynchronous purge or not ? */
 	if (n_purge_threads > 1) {
@@ -1246,7 +1860,7 @@ run_synchronously:
 		que_run_threads(thr);
 
 		os_atomic_inc_ulint(
-			&purge_sys->bh_mutex, &purge_sys->n_completed, 1);
+			&purge_sys->pq_mutex, &purge_sys->n_completed, 1);
 
 		if (n_purge_threads > 1) {
 			trx_purge_wait_for_workers_to_complete(purge_sys);
@@ -1278,7 +1892,6 @@ run_synchronously:
 /*******************************************************************//**
 Get the purge state.
 @return purge state. */
-UNIV_INTERN
 purge_state_t
 trx_purge_state(void)
 /*=================*/
@@ -1296,13 +1909,12 @@ trx_purge_state(void)
 
 /*******************************************************************//**
 Stop purge and wait for it to stop, move to PURGE_STATE_STOP. */
-UNIV_INTERN
 void
 trx_purge_stop(void)
 /*================*/
 {
 	purge_state_t	state;
-	ib_int64_t	sig_count = os_event_reset(purge_sys->event);
+	int64_t		sig_count = os_event_reset(purge_sys->event);
 
 	ut_a(srv_n_purge_threads > 0);
 
@@ -1317,7 +1929,7 @@ trx_purge_stop(void)
 	state = purge_sys->state;
 
 	if (state == PURGE_STATE_RUN) {
-		ib_logf(IB_LOG_LEVEL_INFO, "Stopping purge");
+		ib::info() << "Stopping purge";
 
 		/* We need to wakeup the purge thread in case it is suspended,
 		so that it can acknowledge the state change. */
@@ -1334,26 +1946,25 @@ trx_purge_stop(void)
 		/* Wait for purge coordinator to signal that it
 		is suspended. */
 		os_event_wait_low(purge_sys->event, sig_count);
-	} else { 
-		bool	once = true; 
+	} else {
+		bool	once = true;
 
 		rw_lock_x_lock(&purge_sys->latch);
 
-		/* Wait for purge to signal that it has actually stopped. */ 
-		while (purge_sys->running) { 
+		/* Wait for purge to signal that it has actually stopped. */
+		while (purge_sys->running) {
 
-			if (once) { 
-				ib_logf(IB_LOG_LEVEL_INFO,
-					"Waiting for purge to stop");
-				once = false; 
+			if (once) {
+				ib::info() << "Waiting for purge to stop";
+				once = false;
 			}
 
 			rw_lock_x_unlock(&purge_sys->latch);
 
-			os_thread_sleep(10000); 
+			os_thread_sleep(10000);
 
 			rw_lock_x_lock(&purge_sys->latch);
-		} 
+		}
 
 		rw_lock_x_unlock(&purge_sys->latch);
 	}
@@ -1363,14 +1974,13 @@ trx_purge_stop(void)
 
 /*******************************************************************//**
 Resume purge, move to PURGE_STATE_RUN. */
-UNIV_INTERN
 void
 trx_purge_run(void)
 /*===============*/
 {
 	rw_lock_x_lock(&purge_sys->latch);
 
-	switch(purge_sys->state) {
+	switch (purge_sys->state) {
 	case PURGE_STATE_INIT:
 	case PURGE_STATE_EXIT:
 	case PURGE_STATE_DISABLED:
@@ -1389,7 +1999,7 @@ trx_purge_run(void)
 
 		if (purge_sys->n_stop == 0) {
 
-			ib_logf(IB_LOG_LEVEL_INFO, "Resuming purge");
+			ib::info() << "Resuming purge";
 
 			purge_sys->state = PURGE_STATE_RUN;
 		}
diff --git a/storage/innobase/trx/trx0rec.cc b/storage/innobase/trx/trx0rec.cc
index 74a63b60286..b99ce22823b 100644
--- a/storage/innobase/trx/trx0rec.cc
+++ b/storage/innobase/trx/trx0rec.cc
@@ -43,6 +43,8 @@ Created 3/26/1996 Heikki Tuuri
 #include "trx0purge.h"
 #include "trx0rseg.h"
 #include "row0row.h"
+#include "fsp0sysspace.h"
+#include "row0mysql.h"
 
 /*=========== UNDO LOG RECORD CREATION AND DECODING ====================*/
 
@@ -89,8 +91,7 @@ trx_undof_page_add_undo_rec_log(
 
 /***********************************************************//**
 Parses a redo log record of adding an undo log record.
-@return	end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
 byte*
 trx_undo_parse_add_undo_rec(
 /*========================*/
@@ -137,7 +138,7 @@ trx_undo_parse_add_undo_rec(
 #ifndef UNIV_HOTBACKUP
 /**********************************************************************//**
 Calculates the free space left for extending an undo log record.
-@return	bytes left */
+@return bytes left */
 UNIV_INLINE
 ulint
 trx_undo_left(
@@ -155,7 +156,7 @@ trx_undo_left(
 Set the next and previous pointers in the undo page for the undo record
 that was written to ptr. Update the first free value by the number of bytes
 written for this undo record.
-@return	offset of the inserted entry on the page if succeeded, 0 if fail */
+@return offset of the inserted entry on the page if succeeded, 0 if fail */
 static
 ulint
 trx_undo_page_set_next_prev_and_add(
@@ -203,9 +204,262 @@ trx_undo_page_set_next_prev_and_add(
 	return(first_free);
 }
 
+/** Virtual column undo log version. To distinguish it from a length value
+in 5.7.8 undo log, it starts with 0xF1 */
+static const ulint VIRTUAL_COL_UNDO_FORMAT_1 = 0xF1;
+
+/** Write virtual column index info (index id and column position in index)
+to the undo log
+@param[in,out]	undo_page	undo log page
+@param[in]	table           the table
+@param[in]	pos		the virtual column position
+@param[in]      ptr             undo log record being written
+@param[in]	first_v_col	whether this is the first virtual column
+				which could start with a version marker
+@return new undo log pointer */
+static
+byte*
+trx_undo_log_v_idx(
+	page_t*			undo_page,
+	const dict_table_t*	table,
+	ulint			pos,
+	byte*			ptr,
+	bool			first_v_col)
+{
+	ut_ad(pos < table->n_v_def);
+	dict_v_col_t*	vcol = dict_table_get_nth_v_col(table, pos);
+
+	ulint		n_idx = vcol->v_indexes->size();
+	byte*		old_ptr;
+
+	ut_ad(n_idx > 0);
+
+	/* Size to reserve, max 5 bytes for each index id and position, plus
+	5 bytes for num of indexes, 2 bytes for write total length.
+	1 byte for undo log record format version marker */
+	ulint		size = n_idx * (5 + 5) + 5 + 2 + (first_v_col ? 1 : 0);
+
+	if (trx_undo_left(undo_page, ptr) < size) {
+		return(NULL);
+	}
+
+	if (first_v_col) {
+		/* write the version marker */
+		mach_write_to_1(ptr, VIRTUAL_COL_UNDO_FORMAT_1);
+
+		ptr += 1;
+	}
+
+	old_ptr = ptr;
+
+	ptr += 2;
+
+	ptr += mach_write_compressed(ptr, n_idx);
+
+	dict_v_idx_list::iterator       it;
+
+	for (it = vcol->v_indexes->begin();
+	     it != vcol->v_indexes->end(); ++it) {
+		dict_v_idx_t	v_index = *it;
+
+		ptr += mach_write_compressed(
+			ptr, static_cast<ulint>(v_index.index->id));
+
+		ptr += mach_write_compressed(ptr, v_index.nth_field);
+	}
+
+	mach_write_to_2(old_ptr, ptr - old_ptr);
+
+	return(ptr);
+}
+
+/** Read virtual column index from undo log, and verify the column is still
+indexed, and return its position
+@param[in]	table		the table
+@param[in]	ptr		undo log pointer
+@param[out]	col_pos		the column number or ULINT_UNDEFINED
+				if the column is not indexed any more
+@return remaining part of undo log record after reading these values */
+static
+const byte*
+trx_undo_read_v_idx_low(
+	const dict_table_t*	table,
+	const byte*		ptr,
+	ulint*			col_pos)
+{
+	ulint		len = mach_read_from_2(ptr);
+	const byte*	old_ptr = ptr;
+
+	*col_pos = ULINT_UNDEFINED;
+
+	ptr += 2;
+
+	ulint	num_idx = mach_read_next_compressed(&ptr);
+
+	ut_ad(num_idx > 0);
+
+	dict_index_t*	clust_index = dict_table_get_first_index(table);
+
+	for (ulint i = 0; i < num_idx; i++) {
+		index_id_t	id = mach_read_next_compressed(&ptr);
+		ulint		pos = mach_read_next_compressed(&ptr);
+		dict_index_t*	index = dict_table_get_next_index(clust_index);
+
+		while (index != NULL) {
+			/* Return if we find a matching index.
+			TODO: in the future, it might be worth to add
+			checks on other indexes */
+			if (index->id == id) {
+				const dict_col_t* col = dict_index_get_nth_col(
+					index, pos);
+				ut_ad(dict_col_is_virtual(col));
+				const dict_v_col_t*	vcol = reinterpret_cast<
+					const dict_v_col_t*>(col);
+				*col_pos = vcol->v_pos;
+				return(old_ptr + len);
+			}
+
+			index = dict_table_get_next_index(index);
+		}
+	}
+
+	return(old_ptr + len);
+}
+
+/** Read virtual column index from undo log or online log if the log
+contains such info, and in the undo log case, verify the column is
+still indexed, and output its position
+@param[in]	table		the table
+@param[in]	ptr		undo log pointer
+@param[in]	first_v_col	if this is the first virtual column, which
+				has the version marker
+@param[in,out]	is_undo_log	this function is used to parse both undo log,
+				and online log for virtual columns. So
+				check to see if this is undo log. When
+				first_v_col is true, is_undo_log is output,
+				when first_v_col is false, is_undo_log is input
+@param[in,out]	field_no	the column number
+@return remaining part of undo log record after reading these values */
+const byte*
+trx_undo_read_v_idx(
+	const dict_table_t*	table,
+	const byte*		ptr,
+	bool			first_v_col,
+	bool*			is_undo_log,
+	ulint*			field_no)
+{
+	/* Version marker only put on the first virtual column */
+	if (first_v_col) {
+		/* Undo log has the virtual undo log marker */
+		*is_undo_log = (mach_read_from_1(ptr)
+				== VIRTUAL_COL_UNDO_FORMAT_1);
+
+		if (*is_undo_log) {
+			ptr += 1;
+		}
+	}
+
+	if (*is_undo_log) {
+		ptr = trx_undo_read_v_idx_low(table, ptr, field_no);
+	} else {
+		*field_no -= REC_MAX_N_FIELDS;
+	}
+
+	return(ptr);
+}
+
+/** Reports in the undo log of an insert of virtual columns.
+@param[in]	undo_page	undo log page
+@param[in]	table		the table
+@param[in]	row		dtuple contains the virtual columns
+@param[in,out]	ptr		log ptr
+@return true if write goes well, false if out of space */
+static
+bool
+trx_undo_report_insert_virtual(
+	page_t*		undo_page,
+	dict_table_t*	table,
+	const dtuple_t*	row,
+	byte**		ptr)
+{
+	byte*	start = *ptr;
+	bool	first_v_col = true;
+
+	if (trx_undo_left(undo_page, *ptr) < 2) {
+		return(false);
+	}
+
+	/* Reserve 2 bytes to write the number
+	of bytes the stored fields take in this
+	undo record */
+	*ptr += 2;
+
+	for (ulint col_no = 0; col_no < dict_table_get_n_v_cols(table);
+	     col_no++) {
+		dfield_t*       vfield = NULL;
+
+		const dict_v_col_t*     col
+			= dict_table_get_nth_v_col(table, col_no);
+
+		if (col->m_col.ord_part) {
+
+			/* make sure enought space to write the length */
+			if (trx_undo_left(undo_page, *ptr) < 5) {
+				return(false);
+			}
+
+			ulint   pos = col_no;
+			pos += REC_MAX_N_FIELDS;
+			*ptr += mach_write_compressed(*ptr, pos);
+
+			*ptr = trx_undo_log_v_idx(undo_page, table,
+						  col_no, *ptr, first_v_col);
+			first_v_col = false;
+
+			if (*ptr == NULL) {
+				return(false);
+			}
+
+			vfield = dtuple_get_nth_v_field(row, col->v_pos);
+			ulint	flen = vfield->len;
+
+			if (flen != UNIV_SQL_NULL) {
+				ulint	max_len
+					= dict_max_v_field_len_store_undo(
+						table, col_no);
+
+				if (flen > max_len) {
+					flen = max_len;
+				}
+
+				if (trx_undo_left(undo_page, *ptr) < flen + 5) {
+
+					return(false);
+				}
+				*ptr += mach_write_compressed(*ptr, flen);
+
+				ut_memcpy(*ptr, vfield->data, flen);
+				*ptr += flen;
+			} else {
+				if (trx_undo_left(undo_page, *ptr) < 5) {
+
+					return(false);
+				}
+
+				*ptr += mach_write_compressed(*ptr, flen);
+			}
+		}
+	}
+
+	/* Always mark the end of the log with 2 bytes length field */
+	mach_write_to_2(start, *ptr - start);
+
+	return(true);
+}
+
 /**********************************************************************//**
 Reports in the undo log of an insert of a clustered index record.
-@return	offset of the inserted entry on the page if succeed, 0 if fail */
+@return offset of the inserted entry on the page if succeed, 0 if fail */
 static
 ulint
 trx_undo_page_report_insert(
@@ -243,8 +497,8 @@ trx_undo_page_report_insert(
 
 	/* Store first some general parameters to the undo log */
 	*ptr++ = TRX_UNDO_INSERT_REC;
-	ptr += mach_ull_write_much_compressed(ptr, trx->undo_no);
-	ptr += mach_ull_write_much_compressed(ptr, index->table->id);
+	ptr += mach_u64_write_much_compressed(ptr, trx->undo_no);
+	ptr += mach_u64_write_much_compressed(ptr, index->table->id);
 	/*----------------------------------------*/
 	/* Store then the fields required to uniquely determine the record
 	to be inserted in the clustered index */
@@ -272,13 +526,19 @@ trx_undo_page_report_insert(
 		}
 	}
 
+	if (index->table->n_v_cols) {
+		if (!trx_undo_report_insert_virtual(
+			undo_page, index->table, clust_entry, &ptr)) {
+			return(0);
+		}
+	}
+
 	return(trx_undo_page_set_next_prev_and_add(undo_page, ptr, mtr));
 }
 
 /**********************************************************************//**
 Reads from an undo log record the general parameters.
-@return	remaining part of undo log record after reading these values */
-UNIV_INTERN
+@return remaining part of undo log record after reading these values */
 byte*
 trx_undo_rec_get_pars(
 /*==================*/
@@ -292,7 +552,7 @@ trx_undo_rec_get_pars(
 	undo_no_t*	undo_no,	/*!< out: undo log record number */
 	table_id_t*	table_id)	/*!< out: table id */
 {
-	byte*		ptr;
+	const byte*	ptr;
 	ulint		type_cmpl;
 
 	ptr = undo_rec + 2;
@@ -306,31 +566,27 @@ trx_undo_rec_get_pars(
 	*type = type_cmpl & (TRX_UNDO_CMPL_INFO_MULT - 1);
 	*cmpl_info = type_cmpl / TRX_UNDO_CMPL_INFO_MULT;
 
-	*undo_no = mach_ull_read_much_compressed(ptr);
-	ptr += mach_ull_get_much_compressed_size(*undo_no);
+	*undo_no = mach_read_next_much_compressed(&ptr);
+	*table_id = mach_read_next_much_compressed(&ptr);
 
-	*table_id = mach_ull_read_much_compressed(ptr);
-	ptr += mach_ull_get_much_compressed_size(*table_id);
-
-	return(ptr);
+	return(const_cast<byte*>(ptr));
 }
 
-/**********************************************************************//**
-Reads from an undo log record a stored column value.
-@return	remaining part of undo log record after reading these values */
-static
+/** Read from an undo log record a non-virtual column value.
+@param[in,out]	ptr		pointer to remaining part of the undo record
+@param[in,out]	field		stored field
+@param[in,out]	len		length of the field, or UNIV_SQL_NULL
+@param[in,out]	orig_len	original length of the locally stored part
+of an externally stored column, or 0
+@return remaining part of undo log record after reading these values */
 byte*
 trx_undo_rec_get_col_val(
-/*=====================*/
-	byte*	ptr,	/*!< in: pointer to remaining part of undo log record */
-	byte**	field,	/*!< out: pointer to stored field */
-	ulint*	len,	/*!< out: length of the field, or UNIV_SQL_NULL */
-	ulint*	orig_len)/*!< out: original length of the locally
-			stored part of an externally stored column, or 0 */
+	const byte*	ptr,
+	const byte**	field,
+	ulint*		len,
+	ulint*		orig_len)
 {
-	*len = mach_read_compressed(ptr);
-	ptr += mach_get_compressed_size(*len);
-
+	*len = mach_read_next_compressed(&ptr);
 	*orig_len = 0;
 
 	switch (*len) {
@@ -338,12 +594,10 @@ trx_undo_rec_get_col_val(
 		*field = NULL;
 		break;
 	case UNIV_EXTERN_STORAGE_FIELD:
-		*orig_len = mach_read_compressed(ptr);
-		ptr += mach_get_compressed_size(*orig_len);
-		*len = mach_read_compressed(ptr);
-		ptr += mach_get_compressed_size(*len);
+		*orig_len = mach_read_next_compressed(&ptr);
+		*len = mach_read_next_compressed(&ptr);
 		*field = ptr;
-		ptr += *len;
+		ptr += *len & ~SPATIAL_STATUS_MASK;
 
 		ut_ad(*orig_len >= BTR_EXTERN_FIELD_REF_SIZE);
 		ut_ad(*len > *orig_len);
@@ -361,19 +615,19 @@ trx_undo_rec_get_col_val(
 	default:
 		*field = ptr;
 		if (*len >= UNIV_EXTERN_STORAGE_FIELD) {
-			ptr += *len - UNIV_EXTERN_STORAGE_FIELD;
+			ptr += (*len - UNIV_EXTERN_STORAGE_FIELD)
+				& ~SPATIAL_STATUS_MASK;
 		} else {
 			ptr += *len;
 		}
 	}
 
-	return(ptr);
+	return(const_cast<byte*>(ptr));
 }
 
 /*******************************************************************//**
 Builds a row reference from an undo log record.
-@return	pointer to remaining part of undo record */
-UNIV_INTERN
+@return pointer to remaining part of undo record */
 byte*
 trx_undo_rec_get_row_ref(
 /*=====================*/
@@ -402,7 +656,7 @@ trx_undo_rec_get_row_ref(
 
 	for (i = 0; i < ref_len; i++) {
 		dfield_t*	dfield;
-		byte*		field;
+		const byte*	field;
 		ulint		len;
 		ulint		orig_len;
 
@@ -418,8 +672,7 @@ trx_undo_rec_get_row_ref(
 
 /*******************************************************************//**
 Skips a row reference from an undo log record.
-@return	pointer to remaining part of undo record */
-UNIV_INTERN
+@return pointer to remaining part of undo record */
 byte*
 trx_undo_rec_skip_row_ref(
 /*======================*/
@@ -436,9 +689,9 @@ trx_undo_rec_skip_row_ref(
 	ref_len = dict_index_get_n_unique(index);
 
 	for (i = 0; i < ref_len; i++) {
-		byte*	field;
-		ulint	len;
-		ulint	orig_len;
+		const byte*	field;
+		ulint		len;
+		ulint		orig_len;
 
 		ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);
 	}
@@ -446,27 +699,27 @@ trx_undo_rec_skip_row_ref(
 	return(ptr);
 }
 
-/**********************************************************************//**
-Fetch a prefix of an externally stored column, for writing to the undo log
-of an update or delete marking of a clustered index record.
-@return	ext_buf */
+/** Fetch a prefix of an externally stored column, for writing to the undo
+log of an update or delete marking of a clustered index record.
+@param[out]	ext_buf		buffer to hold the prefix data and BLOB pointer
+@param[in]	prefix_len	prefix size to store in the undo log
+@param[in]	page_size	page size
+@param[in]	field		an externally stored column
+@param[in,out]	len		input: length of field; output: used length of
+ext_buf
+@return ext_buf */
 static
 byte*
 trx_undo_page_fetch_ext(
-/*====================*/
-	byte*		ext_buf,	/*!< in: buffer to hold the prefix
-					data and BLOB pointer */
-	ulint		prefix_len,	/*!< in: prefix size to store
-					in the undo log */
-	ulint		zip_size,	/*!< compressed page size in bytes,
-					or 0 for uncompressed BLOB  */
-	const byte*	field,		/*!< in: an externally stored column */
-	ulint*		len)		/*!< in: length of field;
-					out: used length of ext_buf */
+	byte*			ext_buf,
+	ulint			prefix_len,
+	const page_size_t&	page_size,
+	const byte*		field,
+	ulint*			len)
 {
 	/* Fetch the BLOB. */
 	ulint	ext_len = btr_copy_externally_stored_field_prefix(
-		ext_buf, prefix_len, zip_size, field, *len, NULL);
+		ext_buf, prefix_len, page_size, field, *len);
 	/* BLOBs should always be nonempty. */
 	ut_a(ext_len);
 	/* Append the BLOB pointer to the prefix. */
@@ -477,27 +730,56 @@ trx_undo_page_fetch_ext(
 	return(ext_buf);
 }
 
-/**********************************************************************//**
-Writes to the undo log a prefix of an externally stored column.
-@return	undo log position */
+/** Writes to the undo log a prefix of an externally stored column.
+@param[out]	ptr		undo log position, at least 15 bytes must be
+available
+@param[out]	ext_buf		a buffer of DICT_MAX_FIELD_LEN_BY_FORMAT()
+				size, or NULL when should not fetch a longer
+				prefix
+@param[in]	prefix_len	prefix size to store in the undo log
+@param[in]	page_size	page size
+@param[in,out]	field		the locally stored part of the externally
+stored column
+@param[in,out]	len		length of field, in bytes
+@param[in]	spatial_status	whether the column is used by spatial index or
+				regular index
+@return undo log position */
 static
 byte*
 trx_undo_page_report_modify_ext(
-/*============================*/
-	byte*		ptr,		/*!< in: undo log position,
-					at least 15 bytes must be available */
-	byte*		ext_buf,	/*!< in: a buffer of
-					DICT_MAX_FIELD_LEN_BY_FORMAT() size,
-					or NULL when should not fetch
-					a longer prefix */
-	ulint		prefix_len,	/*!< prefix size to store in the
-					undo log */
-	ulint		zip_size,	/*!< compressed page size in bytes,
-					or 0 for uncompressed BLOB  */
-	const byte**	field,		/*!< in/out: the locally stored part of
-					the externally stored column */
-	ulint*		len)		/*!< in/out: length of field, in bytes */
+	byte*			ptr,
+	byte*			ext_buf,
+	ulint			prefix_len,
+	const page_size_t&	page_size,
+	const byte**		field,
+	ulint*			len,
+	spatial_status_t	spatial_status)
 {
+	ulint	spatial_len= 0;
+
+	switch (spatial_status) {
+	case SPATIAL_UNKNOWN:
+	case SPATIAL_NONE:
+		break;
+
+	case SPATIAL_MIXED:
+	case SPATIAL_ONLY:
+		spatial_len = DATA_MBR_LEN;
+		break;
+	}
+
+	/* Encode spatial status into length. */
+	spatial_len |= spatial_status << SPATIAL_STATUS_SHIFT;
+
+	if (spatial_status == SPATIAL_ONLY) {
+		/* If the column is only used by gis index, log its
+		MBR is enough.*/
+		ptr += mach_write_compressed(ptr, UNIV_EXTERN_STORAGE_FIELD
+					     + spatial_len);
+
+		return(ptr);
+	}
+
 	if (ext_buf) {
 		ut_a(prefix_len > 0);
 
@@ -509,18 +791,54 @@ trx_undo_page_report_modify_ext(
 
 		ptr += mach_write_compressed(ptr, *len);
 
-		*field = trx_undo_page_fetch_ext(ext_buf, prefix_len, zip_size,
-						 *field, len);
+		*field = trx_undo_page_fetch_ext(ext_buf, prefix_len,
+						 page_size, *field, len);
 
-		ptr += mach_write_compressed(ptr, *len);
+		ptr += mach_write_compressed(ptr, *len + spatial_len);
 	} else {
 		ptr += mach_write_compressed(ptr, UNIV_EXTERN_STORAGE_FIELD
-					     + *len);
+					     + *len + spatial_len);
 	}
 
 	return(ptr);
 }
 
+/** Get MBR from a Geometry column stored externally
+@param[out]	mbr		MBR to fill
+@param[in]	pagesize	table pagesize
+@param[in]	field		field contain the geometry data
+@param[in,out]	len		length of field, in bytes
+*/
+static
+void
+trx_undo_get_mbr_from_ext(
+/*======================*/
+	double*			mbr,
+	const page_size_t&      page_size,
+	const byte*             field,
+	ulint*			len)
+{
+	uchar*		dptr = NULL;
+	ulint		dlen;
+	mem_heap_t*	heap = mem_heap_create(100);
+
+	dptr = btr_copy_externally_stored_field(
+		&dlen, field, page_size, *len, heap);
+
+	if (dlen <= GEO_DATA_HEADER_SIZE) {
+		for (uint i = 0; i < SPDIMS; ++i) {
+			mbr[i * 2] = DBL_MAX;
+			mbr[i * 2 + 1] = -DBL_MAX;
+		}
+	} else {
+		rtree_mbr_from_wkb(dptr + GEO_DATA_HEADER_SIZE,
+				   static_cast<uint>(dlen
+				   - GEO_DATA_HEADER_SIZE), SPDIMS, mbr);
+	}
+
+	mem_heap_free(heap);
+}
+
 /**********************************************************************//**
 Reports in the undo log of an update or delete marking of a clustered index
 record.
@@ -542,6 +860,8 @@ trx_undo_page_report_modify(
 					a delete, this should be set to NULL */
 	ulint		cmpl_info,	/*!< in: compiler info on secondary
 					index updates */
+	const dtuple_t*	row,		/*!< in: clustered index row contains
+					virtual column info */
 	mtr_t*		mtr)		/*!< in: mtr */
 {
 	dict_table_t*	table;
@@ -554,9 +874,11 @@ trx_undo_page_report_modify(
 	byte*		type_cmpl_ptr;
 	ulint		i;
 	trx_id_t	trx_id;
+	trx_undo_ptr_t*	undo_ptr;
 	ibool		ignore_prefix = FALSE;
 	byte		ext_buf[REC_VERSION_56_MAX_INDEX_COL_LEN
 				+ BTR_EXTERN_FIELD_REF_SIZE];
+	bool		first_v_col = true;
 
 	ut_a(dict_index_is_clust(index));
 	ut_ad(rec_offs_validate(rec, index, offsets));
@@ -564,6 +886,12 @@ trx_undo_page_report_modify(
 			       + TRX_UNDO_PAGE_TYPE) == TRX_UNDO_UPDATE);
 	table = index->table;
 
+	/* If table instance is temporary then select noredo rseg as changes
+	to undo logs don't need REDO logging given that they are not
+	restored on restart as corresponding object doesn't exist on restart.*/
+	undo_ptr = dict_table_is_temporary(index->table)
+		   ? &trx->rsegs.m_noredo : &trx->rsegs.m_redo;
+
 	first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
 				      + TRX_UNDO_PAGE_FREE);
 	ptr = undo_page + first_free;
@@ -601,9 +929,9 @@ trx_undo_page_report_modify(
 	type_cmpl_ptr = ptr;
 
 	*ptr++ = (byte) type_cmpl;
-	ptr += mach_ull_write_much_compressed(ptr, trx->undo_no);
+	ptr += mach_u64_write_much_compressed(ptr, trx->undo_no);
 
-	ptr += mach_ull_write_much_compressed(ptr, table->id);
+	ptr += mach_u64_write_much_compressed(ptr, table->id);
 
 	/*----------------------------------------*/
 	/* Store the state of the info bits */
@@ -625,14 +953,14 @@ trx_undo_page_report_modify(
 	if (ignore_prefix) {
 		ignore_prefix = (trx_id != trx->id);
 	}
-	ptr += mach_ull_write_compressed(ptr, trx_id);
+	ptr += mach_u64_write_compressed(ptr, trx_id);
 
 	field = rec_get_nth_field(rec, offsets,
 				  dict_index_get_sys_col_pos(
 					  index, DATA_ROLL_PTR), &flen);
 	ut_ad(flen == DATA_ROLL_PTR_LEN);
 
-	ptr += mach_ull_write_compressed(ptr, trx_read_roll_ptr(field));
+	ptr += mach_u64_write_compressed(ptr, trx_read_roll_ptr(field));
 
 	/*----------------------------------------*/
 	/* Store then the fields required to uniquely determine the
@@ -673,11 +1001,39 @@ trx_undo_page_report_modify(
 			return(0);
 		}
 
-		ptr += mach_write_compressed(ptr, upd_get_n_fields(update));
+		ulint	n_updated = upd_get_n_fields(update);
+
+		/* If this is an online update while an inplace alter table
+		is in progress and the table has virtual column, we will
+		need to double check if there are any non-indexed columns
+		being registered in update vector in case they will be indexed
+		in new table */
+		if (dict_index_is_online_ddl(index)
+		    && index->table->n_v_cols > 0) {
+			for (i = 0; i < upd_get_n_fields(update); i++) {
+				upd_field_t*	fld = upd_get_nth_field(
+					update, i);
+				ulint		pos = fld->field_no;
+
+				/* These columns must not have an index
+				on them */
+				if (upd_fld_is_virtual_col(fld)
+				    && dict_table_get_nth_v_col(
+					table, pos)->v_indexes->empty()) {
+					n_updated--;
+				}
+			}
+		}
+
+		ptr += mach_write_compressed(ptr, n_updated);
 
 		for (i = 0; i < upd_get_n_fields(update); i++) {
+			upd_field_t*	fld = upd_get_nth_field(update, i);
 
-			ulint	pos = upd_get_nth_field(update, i)->field_no;
+			bool	is_virtual = upd_fld_is_virtual_col(fld);
+			ulint	max_v_log_len = 0;
+
+			ulint	pos = fld->field_no;
 
 			/* Write field number to undo log */
 			if (trx_undo_left(undo_page, ptr) < 5) {
@@ -685,17 +1041,59 @@ trx_undo_page_report_modify(
 				return(0);
 			}
 
+			if (is_virtual) {
+				/* Skip the non-indexed column, during
+				an online alter table */
+				if (dict_index_is_online_ddl(index)
+				    && dict_table_get_nth_v_col(
+					table, pos)->v_indexes->empty()) {
+					continue;
+				}
+
+				/* add REC_MAX_N_FIELDS to mark this
+				is a virtual col */
+				pos += REC_MAX_N_FIELDS;
+			}
+
 			ptr += mach_write_compressed(ptr, pos);
 
 			/* Save the old value of field */
-			field = rec_get_nth_field(rec, offsets, pos, &flen);
+			if (is_virtual) {
+				ut_ad(fld->field_no < table->n_v_def);
+
+				ptr = trx_undo_log_v_idx(undo_page, table,
+							 fld->field_no, ptr,
+							 first_v_col);
+				if (ptr == NULL) {
+					 return(0);
+				}
+				first_v_col = false;
+
+				max_v_log_len
+					= dict_max_v_field_len_store_undo(
+						table, fld->field_no);
+
+				field = static_cast<byte*>(
+					fld->old_v_val->data);
+				flen = fld->old_v_val->len;
+
+				/* Only log sufficient bytes for index
+				record update */
+				if (flen != UNIV_SQL_NULL) {
+					flen = ut_min(
+						flen, max_v_log_len);
+				}
+			} else {
+				field = rec_get_nth_field(rec, offsets,
+							  pos, &flen);
+			}
 
 			if (trx_undo_left(undo_page, ptr) < 15) {
 
 				return(0);
 			}
 
-			if (rec_offs_nth_extern(offsets, pos)) {
+			if (!is_virtual && rec_offs_nth_extern(offsets, pos)) {
 				const dict_col_t*	col
 					= dict_index_get_nth_col(index, pos);
 				ulint			prefix_len
@@ -711,13 +1109,13 @@ trx_undo_page_report_modify(
 					&& !ignore_prefix
 					&& flen < REC_ANTELOPE_MAX_INDEX_COL_LEN
 					? ext_buf : NULL, prefix_len,
-					dict_table_zip_size(table),
-					&field, &flen);
+					dict_table_page_size(table),
+					&field, &flen, SPATIAL_UNKNOWN);
 
 				/* Notify purge that it eventually has to
 				free the old externally stored field */
 
-				trx->update_undo->del_marks = TRUE;
+				undo_ptr->update_undo->del_marks = TRUE;
 
 				*type_cmpl_ptr |= TRX_UNDO_UPD_EXTERN;
 			} else {
@@ -733,9 +1131,40 @@ trx_undo_page_report_modify(
 				ut_memcpy(ptr, field, flen);
 				ptr += flen;
 			}
+
+			/* Also record the new value for virtual column */
+			if (is_virtual) {
+				field = static_cast<byte*>(fld->new_val.data);
+				flen = fld->new_val.len;
+				if (flen != UNIV_SQL_NULL) {
+					flen = ut_min(
+						flen, max_v_log_len);
+				}
+
+				if (trx_undo_left(undo_page, ptr) < 15) {
+
+					return(0);
+				}
+
+				ptr += mach_write_compressed(ptr, flen);
+
+				if (flen != UNIV_SQL_NULL) {
+					if (trx_undo_left(undo_page, ptr) < flen) {
+
+						return(0);
+					}
+
+					ut_memcpy(ptr, field, flen);
+					ptr += flen;
+				}
+			}
 		}
 	}
 
+	/* Reset the first_v_col, so to put the virtual column undo
+	version marker again, when we log all the indexed columns */
+	first_v_col = true;
+
 	/*----------------------------------------*/
 	/* In the case of a delete marking, and also in the case of an update
 	where any ordering field of any index changes, store the values of all
@@ -751,9 +1180,11 @@ trx_undo_page_report_modify(
 	(including BLOBs) are recovered before anything is rolled back. */
 
 	if (!update || !(cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
-		byte*	old_ptr = ptr;
+		byte*		old_ptr = ptr;
+		double		mbr[SPDIMS * 2];
+		mem_heap_t*	row_heap = NULL;
 
-		trx->update_undo->del_marks = TRUE;
+		undo_ptr->update_undo->del_marks = TRUE;
 
 		if (trx_undo_left(undo_page, ptr) < 5) {
 
@@ -770,9 +1201,14 @@ trx_undo_page_report_modify(
 
 			const dict_col_t*	col
 				= dict_table_get_nth_col(table, col_no);
+			const char* col_name = dict_table_get_col_name(table,
+				col_no);
 
 			if (col->ord_part) {
-				ulint	pos;
+				ulint			pos;
+				spatial_status_t	spatial_status;
+
+				spatial_status = SPATIAL_NONE;
 
 				/* Write field number to undo log */
 				if (trx_undo_left(undo_page, ptr) < 5 + 15) {
@@ -783,6 +1219,16 @@ trx_undo_page_report_modify(
 				pos = dict_index_get_nth_col_pos(index,
 								 col_no,
 								 NULL);
+				if (pos == ULINT_UNDEFINED) {
+					ib::error() << "Column " << col_no
+						    << " name " << col_name
+						    << " not found from index " << index->name
+						    << " table. " << table->name.m_name
+						    << " Table has " << dict_table_get_n_cols(table)
+						    << " and index has " << dict_index_get_n_fields(index)
+						    << " fields.";
+				}
+
 				ptr += mach_write_compressed(ptr, pos);
 
 				/* Save the old value of field */
@@ -799,18 +1245,128 @@ trx_undo_page_report_modify(
 
 					ut_a(prefix_len < sizeof ext_buf);
 
+
+					spatial_status =
+						dict_col_get_spatial_status(
+							col);
+
+					/* If there is a spatial index on it,
+					log its MBR */
+					if (spatial_status != SPATIAL_NONE) {
+						ut_ad(DATA_GEOMETRY_MTYPE(
+								col->mtype));
+
+						trx_undo_get_mbr_from_ext(
+							mbr,
+							dict_table_page_size(
+								table),
+							field, &flen);
+					}
+
 					ptr = trx_undo_page_report_modify_ext(
 						ptr,
 						flen < REC_ANTELOPE_MAX_INDEX_COL_LEN
 						&& !ignore_prefix
 						? ext_buf : NULL, prefix_len,
-						dict_table_zip_size(table),
-						&field, &flen);
+						dict_table_page_size(table),
+						&field, &flen,
+						spatial_status);
 				} else {
 					ptr += mach_write_compressed(
 						ptr, flen);
 				}
 
+				if (flen != UNIV_SQL_NULL
+				    && spatial_status != SPATIAL_ONLY) {
+					if (trx_undo_left(undo_page, ptr)
+					    < flen) {
+
+						return(0);
+					}
+
+					ut_memcpy(ptr, field, flen);
+					ptr += flen;
+				}
+
+				if (spatial_status != SPATIAL_NONE) {
+					if (trx_undo_left(undo_page, ptr)
+					    < DATA_MBR_LEN) {
+						return(0);
+					}
+
+					for (int i = 0; i < SPDIMS * 2;
+					     i++) {
+						mach_double_write(
+							ptr, mbr[i]);
+						ptr +=  sizeof(double);
+					}
+				}
+			}
+		}
+
+		for (col_no = 0; col_no < dict_table_get_n_v_cols(table);
+		     col_no++) {
+			dfield_t*	vfield = NULL;
+
+			const dict_v_col_t*     col
+				= dict_table_get_nth_v_col(table, col_no);
+
+			if (col->m_col.ord_part) {
+				ulint   pos = col_no;
+				ulint	max_v_log_len
+					= dict_max_v_field_len_store_undo(
+						table, pos);
+
+				/* Write field number to undo log.
+				Make sure there is enought space in log */
+				if (trx_undo_left(undo_page, ptr) < 5) {
+
+					return(0);
+				}
+
+				pos += REC_MAX_N_FIELDS;
+				ptr += mach_write_compressed(ptr, pos);
+
+				ut_ad(col_no < table->n_v_def);
+				ptr = trx_undo_log_v_idx(undo_page, table,
+							 col_no, ptr,
+							 first_v_col);
+				first_v_col = false;
+
+				if (!ptr) {
+					 return(0);
+				}
+
+				if (update) {
+					ut_ad(!row);
+					if (update->old_vrow == NULL) {
+						flen = UNIV_SQL_NULL;
+					} else {
+						vfield = dtuple_get_nth_v_field(
+							update->old_vrow,
+							col->v_pos);
+					}
+				} else if (row) {
+					vfield = dtuple_get_nth_v_field(
+						row, col->v_pos);
+				} else {
+					ut_ad(0);
+				}
+
+				if (vfield) {
+					field = static_cast<byte*>(vfield->data);
+					flen = vfield->len;
+				} else {
+					ut_ad(flen == UNIV_SQL_NULL);
+				}
+
+				if (flen != UNIV_SQL_NULL) {
+					flen = ut_min(
+						flen, max_v_log_len);
+				}
+
+				ptr += mach_write_compressed(ptr, flen);
+
 				if (flen != UNIV_SQL_NULL) {
 					if (trx_undo_left(undo_page, ptr)
 					    < flen) {
@@ -825,6 +1381,10 @@ trx_undo_page_report_modify(
 		}
 
 		mach_write_to_2(old_ptr, ptr - old_ptr);
+
+		if (row_heap) {
+			mem_heap_free(row_heap);
+		}
 	}
 
 	/*----------------------------------------*/
@@ -851,12 +1411,11 @@ trx_undo_page_report_modify(
 /**********************************************************************//**
 Reads from an undo log update record the system field values of the old
 version.
-@return	remaining part of undo log record after reading these values */
-UNIV_INTERN
+@return remaining part of undo log record after reading these values */
 byte*
 trx_undo_update_rec_get_sys_cols(
 /*=============================*/
-	byte*		ptr,		/*!< in: remaining part of undo
+	const byte*	ptr,		/*!< in: remaining part of undo
 					log record after reading
 					general parameters */
 	trx_id_t*	trx_id,		/*!< out: trx id */
@@ -869,56 +1428,20 @@ trx_undo_update_rec_get_sys_cols(
 
 	/* Read the values of the system columns */
 
-	*trx_id = mach_ull_read_compressed(ptr);
-	ptr += mach_ull_get_compressed_size(*trx_id);
+	*trx_id = mach_u64_read_next_compressed(&ptr);
+	*roll_ptr = mach_u64_read_next_compressed(&ptr);
 
-	*roll_ptr = mach_ull_read_compressed(ptr);
-	ptr += mach_ull_get_compressed_size(*roll_ptr);
-
-	return(ptr);
-}
-
-/**********************************************************************//**
-Reads from an update undo log record the number of updated fields.
-@return	remaining part of undo log record after reading this value */
-UNIV_INLINE
-byte*
-trx_undo_update_rec_get_n_upd_fields(
-/*=================================*/
-	byte*	ptr,	/*!< in: pointer to remaining part of undo log record */
-	ulint*	n)	/*!< out: number of fields */
-{
-	*n = mach_read_compressed(ptr);
-	ptr += mach_get_compressed_size(*n);
-
-	return(ptr);
-}
-
-/**********************************************************************//**
-Reads from an update undo log record a stored field number.
-@return	remaining part of undo log record after reading this value */
-UNIV_INLINE
-byte*
-trx_undo_update_rec_get_field_no(
-/*=============================*/
-	byte*	ptr,	/*!< in: pointer to remaining part of undo log record */
-	ulint*	field_no)/*!< out: field number */
-{
-	*field_no = mach_read_compressed(ptr);
-	ptr += mach_get_compressed_size(*field_no);
-
-	return(ptr);
+	return(const_cast<byte*>(ptr));
 }
 
 /*******************************************************************//**
 Builds an update vector based on a remaining part of an undo log record.
 @return remaining part of the record, NULL if an error detected, which
 means that the record is corrupted */
-UNIV_INTERN
 byte*
 trx_undo_update_rec_get_update(
 /*===========================*/
-	byte*		ptr,	/*!< in: remaining part in update undo log
+	const byte*	ptr,	/*!< in: remaining part in update undo log
 				record, after reading the row reference
 				NOTE that this copy of the undo log record must
 				be preserved as long as the update vector is
@@ -943,11 +1466,14 @@ trx_undo_update_rec_get_update(
 	ulint		n_fields;
 	byte*		buf;
 	ulint		i;
+	bool		first_v_col = true;
+	bool		is_undo_log = true;
+	ulint		n_skip_field = 0;
 
 	ut_a(dict_index_is_clust(index));
 
 	if (type != TRX_UNDO_DEL_MARK_REC) {
-		ptr = trx_undo_update_rec_get_n_upd_fields(ptr, &n_fields);
+		n_fields = mach_read_next_compressed(&ptr);
 	} else {
 		n_fields = 0;
 	}
@@ -984,29 +1510,36 @@ trx_undo_update_rec_get_update(
 
 	for (i = 0; i < n_fields; i++) {
 
-		byte*	field;
-		ulint	len;
-		ulint	field_no;
-		ulint	orig_len;
+		const byte*	field;
+		ulint		len;
+		ulint		field_no;
+		ulint		orig_len;
+		bool		is_virtual;
 
-		ptr = trx_undo_update_rec_get_field_no(ptr, &field_no);
+		field_no = mach_read_next_compressed(&ptr);
+
+		is_virtual = (field_no >= REC_MAX_N_FIELDS);
+
+		if (is_virtual) {
+			/* If new version, we need to check index list to figure
+			out the correct virtual column position */
+			ptr = trx_undo_read_v_idx(
+				index->table, ptr, first_v_col, &is_undo_log,
+				&field_no);
+			first_v_col = false;
+		} else if (field_no >= dict_index_get_n_fields(index)) {
+			ib::error() << "Trying to access update undo rec"
+				" field " << field_no
+				<< " in index " << index->name
+				<< " of table " << index->table->name
+				<< " but index has only "
+				<< dict_index_get_n_fields(index)
+				<< " fields " << BUG_REPORT_MSG
+				<< ". Run also CHECK TABLE "
+				<< index->table->name << "."
+				" n_fields = " << n_fields << ", i = " << i
+				<< ", ptr " << ptr;
 
-		if (field_no >= dict_index_get_n_fields(index)) {
-			fprintf(stderr,
-				"InnoDB: Error: trying to access"
-				" update undo rec field %lu in ",
-				(ulong) field_no);
-			dict_index_name_print(stderr, trx, index);
-			fprintf(stderr, "\n"
-				"InnoDB: but index has only %lu fields\n"
-				"InnoDB: Submit a detailed bug report"
-				" to http://bugs.mysql.com\n"
-				"InnoDB: Run also CHECK TABLE ",
-				(ulong) dict_index_get_n_fields(index));
-			ut_print_name(stderr, trx, TRUE, index->table_name);
-			fprintf(stderr, "\n"
-				"InnoDB: n_fields = %lu, i = %lu, ptr %p\n",
-				(ulong) n_fields, (ulong) i, ptr);
 			ut_ad(0);
 			*upd = NULL;
 			return(NULL);
@@ -1014,7 +1547,25 @@ trx_undo_update_rec_get_update(
 
 		upd_field = upd_get_nth_field(update, i);
 
-		upd_field_set_field_no(upd_field, field_no, index, trx);
+		if (is_virtual) {
+			/* This column could be dropped or no longer indexed */
+			if (field_no == ULINT_UNDEFINED) {
+				/* Mark this is no longer needed */
+				upd_field->field_no = REC_MAX_N_FIELDS;
+
+				ptr = trx_undo_rec_get_col_val(
+					ptr, &field, &len, &orig_len);
+				ptr = trx_undo_rec_get_col_val(
+					ptr, &field, &len, &orig_len);
+				n_skip_field++;
+				continue;
+			}
+
+			upd_field_set_v_field_no(
+				upd_field, field_no, index);
+		} else {
+			upd_field_set_field_no(upd_field, field_no, index, trx);
+		}
 
 		ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);
 
@@ -1030,23 +1581,63 @@ trx_undo_update_rec_get_update(
 			dfield_set_data(&upd_field->new_val, field, len);
 			dfield_set_ext(&upd_field->new_val);
 		}
+
+		if (is_virtual) {
+			upd_field->old_v_val = static_cast<dfield_t*>(
+				mem_heap_alloc(
+					heap, sizeof *upd_field->old_v_val));
+			ptr = trx_undo_rec_get_col_val(
+				ptr, &field, &len, &orig_len);
+	                if (len == UNIV_SQL_NULL) {
+				dfield_set_null(upd_field->old_v_val);
+			} else if (len < UNIV_EXTERN_STORAGE_FIELD) {
+				dfield_set_data(
+					upd_field->old_v_val, field, len);
+			} else {
+				ut_ad(0);
+			}
+		}
 	}
 
-	*upd = update;
+	/* In rare scenario, we could have skipped virtual column (as they
+	are dropped. We will regenerate a update vector and skip them */
+	if (n_skip_field > 0) {
+		ulint	n = 0;
+		ut_ad(n_skip_field <= n_fields);
 
-	return(ptr);
+		upd_t*	new_update = upd_create(
+			n_fields + 2 - n_skip_field, heap);
+
+		for (i = 0; i < n_fields + 2; i++) {
+			upd_field = upd_get_nth_field(update, i);
+
+			if (upd_field->field_no == REC_MAX_N_FIELDS) {
+				continue;
+			}
+
+			upd_field_t*	new_upd_field
+				 = upd_get_nth_field(new_update, n);
+			*new_upd_field = *upd_field;
+			n++;
+		}
+		ut_ad(n == n_fields + 2 - n_skip_field);
+		*upd = new_update;
+	} else {
+		*upd = update;
+	}
+
+	return(const_cast<byte*>(ptr));
 }
 
 /*******************************************************************//**
 Builds a partial row from an update undo log record, for purge.
 It contains the columns which occur as ordering in any index of the table.
 Any missing columns are indicated by col->mtype == DATA_MISSING.
-@return	pointer to remaining part of undo record */
-UNIV_INTERN
+@return pointer to remaining part of undo record */
 byte*
 trx_undo_rec_get_partial_row(
 /*=========================*/
-	byte*		ptr,	/*!< in: remaining part in update undo log
+	const byte*	ptr,	/*!< in: remaining part in update undo log
 				record of a suitable type, at the start of
 				the stored index columns;
 				NOTE that this copy of the undo log record must
@@ -1062,7 +1653,8 @@ trx_undo_rec_get_partial_row(
 				needed is allocated */
 {
 	const byte*	end_ptr;
-	ulint		row_len;
+	bool		first_v_col = true;
+	bool		is_undo_log = true;
 
 	ut_ad(index);
 	ut_ad(ptr);
@@ -1070,51 +1662,122 @@ trx_undo_rec_get_partial_row(
 	ut_ad(heap);
 	ut_ad(dict_index_is_clust(index));
 
-	row_len = dict_table_get_n_cols(index->table);
-
-	*row = dtuple_create(heap, row_len);
+	*row = dtuple_create_with_vcol(
+		heap, dict_table_get_n_cols(index->table),
+		dict_table_get_n_v_cols(index->table));
 
 	/* Mark all columns in the row uninitialized, so that
 	we can distinguish missing fields from fields that are SQL NULL. */
-	for (ulint i = 0; i < row_len; i++) {
+	for (ulint i = 0; i < dict_table_get_n_cols(index->table); i++) {
 		dfield_get_type(dtuple_get_nth_field(*row, i))
 			->mtype = DATA_MISSING;
 	}
 
+	dtuple_init_v_fld(*row);
+
 	end_ptr = ptr + mach_read_from_2(ptr);
 	ptr += 2;
 
 	while (ptr != end_ptr) {
-		dfield_t*		dfield;
-		byte*			field;
-		ulint			field_no;
-		const dict_col_t*	col;
-		ulint			col_no;
-		ulint			len;
-		ulint			orig_len;
+		dfield_t*	dfield;
+		const byte*	field;
+		ulint		field_no;
+		const dict_col_t* col;
+		ulint		col_no;
+		ulint		len;
+		ulint		orig_len;
+		bool		is_virtual;
 
-		ptr = trx_undo_update_rec_get_field_no(ptr, &field_no);
+		field_no = mach_read_next_compressed(&ptr);
 
-		col = dict_index_get_nth_col(index, field_no);
-		col_no = dict_col_get_no(col);
+		is_virtual = (field_no >= REC_MAX_N_FIELDS);
+
+		if (is_virtual) {
+			ptr = trx_undo_read_v_idx(
+				index->table, ptr, first_v_col, &is_undo_log,
+				&field_no);
+			first_v_col = false;
+		}
 
 		ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);
 
-		dfield = dtuple_get_nth_field(*row, col_no);
-		dict_col_copy_type(
-			dict_table_get_nth_col(index->table, col_no),
-			dfield_get_type(dfield));
+		/* This column could be dropped or no longer indexed */
+		if (field_no == ULINT_UNDEFINED) {
+			ut_ad(is_virtual);
+			continue;
+		}
+
+		if (is_virtual) {
+			dict_v_col_t* vcol = dict_table_get_nth_v_col(
+						index->table, field_no);
+			col = &vcol->m_col;
+			col_no = dict_col_get_no(col);
+			dfield = dtuple_get_nth_v_field(*row, vcol->v_pos);
+			dict_col_copy_type(
+				&vcol->m_col,
+				dfield_get_type(dfield));
+		} else {
+			col = dict_index_get_nth_col(index, field_no);
+			col_no = dict_col_get_no(col);
+			dfield = dtuple_get_nth_field(*row, col_no);
+			dict_col_copy_type(
+				dict_table_get_nth_col(index->table, col_no),
+				dfield_get_type(dfield));
+		}
+
 		dfield_set_data(dfield, field, len);
 
 		if (len != UNIV_SQL_NULL
 		    && len >= UNIV_EXTERN_STORAGE_FIELD) {
-			dfield_set_len(dfield,
-				       len - UNIV_EXTERN_STORAGE_FIELD);
+			spatial_status_t	spatial_status;
+
+			/* Decode spatial status. */
+			spatial_status = static_cast<spatial_status_t>(
+				(len & SPATIAL_STATUS_MASK)
+				>> SPATIAL_STATUS_SHIFT);
+			len &= ~SPATIAL_STATUS_MASK;
+
+			/* Keep compatible with 5.7.9 format. */
+			if (spatial_status == SPATIAL_UNKNOWN) {
+				spatial_status =
+					dict_col_get_spatial_status(col);
+			}
+
+			switch (spatial_status) {
+			case SPATIAL_ONLY:
+				ut_ad(len - UNIV_EXTERN_STORAGE_FIELD
+				      == DATA_MBR_LEN);
+				dfield_set_len(
+					dfield,
+					len - UNIV_EXTERN_STORAGE_FIELD);
+				break;
+
+			case SPATIAL_MIXED:
+				dfield_set_len(
+					dfield,
+					len - UNIV_EXTERN_STORAGE_FIELD
+					- DATA_MBR_LEN);
+				break;
+
+			case SPATIAL_NONE:
+				dfield_set_len(
+					dfield,
+					len - UNIV_EXTERN_STORAGE_FIELD);
+				break;
+
+			case SPATIAL_UNKNOWN:
+				ut_ad(0);
+				break;
+			}
+
 			dfield_set_ext(dfield);
+			dfield_set_spatial_status(dfield, spatial_status);
+
 			/* If the prefix of this column is indexed,
 			ensure that enough prefix is stored in the
 			undo log record. */
-			if (!ignore_prefix && col->ord_part) {
+			if (!ignore_prefix && col->ord_part
+			    && spatial_status != SPATIAL_ONLY) {
 				ut_a(dfield_get_len(dfield)
 				     >= BTR_EXTERN_FIELD_REF_SIZE);
 				ut_a(dict_table_get_format(index->table)
@@ -1126,7 +1789,7 @@ trx_undo_rec_get_partial_row(
 		}
 	}
 
-	return(ptr);
+	return(const_cast<byte*>(ptr));
 }
 #endif /* !UNIV_HOTBACKUP */
 
@@ -1153,8 +1816,7 @@ trx_undo_erase_page_end(
 
 /***********************************************************//**
 Parses a redo log record of erasing of an undo page end.
-@return	end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
 byte*
 trx_undo_parse_erase_page_end(
 /*==========================*/
@@ -1163,7 +1825,8 @@ trx_undo_parse_erase_page_end(
 	page_t*	page,	/*!< in: page or NULL */
 	mtr_t*	mtr)	/*!< in: mtr or NULL */
 {
-	ut_ad(ptr && end_ptr);
+	ut_ad(ptr != NULL);
+	ut_ad(end_ptr != NULL);
 
 	if (page == NULL) {
 
@@ -1181,8 +1844,7 @@ Writes information to an undo log about an insert, update, or a delete marking
 of a clustered index record. This information is used in a rollback of the
 transaction and in consistent reads that must look to the history of this
 transaction.
-@return	DB_SUCCESS or error code */
-UNIV_INTERN
+@return DB_SUCCESS or error code */
 dberr_t
 trx_undo_report_row_operation(
 /*==========================*/
@@ -1212,14 +1874,13 @@ trx_undo_report_row_operation(
 	trx_undo_t*	undo;
 	ulint		page_no;
 	buf_block_t*	undo_block;
-	trx_rseg_t*	rseg;
+	trx_undo_ptr_t*	undo_ptr;
 	mtr_t		mtr;
 	dberr_t		err		= DB_SUCCESS;
 #ifdef UNIV_DEBUG
 	int		loop_count	= 0;
 #endif /* UNIV_DEBUG */
 
-	ut_ad(!srv_read_only_mode);
 	ut_a(dict_index_is_clust(index));
 	ut_ad(!rec || rec_offs_validate(rec, index, offsets));
 
@@ -1231,36 +1892,58 @@ trx_undo_report_row_operation(
 	}
 
 	ut_ad(thr);
+	ut_ad(!srv_read_only_mode);
 	ut_ad((op_type != TRX_UNDO_INSERT_OP)
 	      || (clust_entry && !update && !rec));
 
 	trx = thr_get_trx(thr);
 
-	/* This table is visible only to the session that created it. */
-	if (trx->read_only) {
-		ut_ad(!srv_read_only_mode);
+	bool	is_temp_table = dict_table_is_temporary(index->table);
+
+	/* Temporary tables do not go into INFORMATION_SCHEMA.TABLES,
+	so do not bother adding it to the list of modified tables by
+	the transaction - this list is only used for maintaining
+	INFORMATION_SCHEMA.TABLES.UPDATE_TIME. */
+	if (!is_temp_table) {
+		trx->mod_tables.insert(index->table);
+	}
+
+	/* If trx is read-only then only temp-tables can be written.
+	If trx is read-write and involves temp-table only then we
+	assign temporary rseg. */
+	if (trx->read_only || is_temp_table) {
+
+		ut_ad(!srv_read_only_mode || is_temp_table);
+
 		/* MySQL should block writes to non-temporary tables. */
-		ut_a(DICT_TF2_FLAG_IS_SET(index->table, DICT_TF2_TEMPORARY));
-		if (trx->rseg == 0) {
+		ut_a(is_temp_table);
+
+		if (trx->rsegs.m_noredo.rseg == 0) {
 			trx_assign_rseg(trx);
 		}
 	}
 
-	rseg = trx->rseg;
-
-	mtr_start_trx(&mtr, trx);
+	/* If object is temporary, disable REDO logging that is done to track
+	changes done to UNDO logs. This is feasible given that temporary tables
+	are not restored on restart. */
+	mtr_start(&mtr);
+	dict_disable_redo_if_temporary(index->table, &mtr);
 	mutex_enter(&trx->undo_mutex);
 
-	/* If the undo log is not assigned yet, assign one */
+	/* If object is temp-table then select noredo rseg as changes
+	to undo logs don't need REDO logging given that they are not
+	restored on restart as corresponding object doesn't exist on restart.*/
+	undo_ptr = is_temp_table ? &trx->rsegs.m_noredo : &trx->rsegs.m_redo;
 
 	switch (op_type) {
 	case TRX_UNDO_INSERT_OP:
-		undo = trx->insert_undo;
+		undo = undo_ptr->insert_undo;
 
 		if (undo == NULL) {
 
-			err = trx_undo_assign_undo(trx, TRX_UNDO_INSERT);
-			undo = trx->insert_undo;
+			err = trx_undo_assign_undo(
+				trx, undo_ptr, TRX_UNDO_INSERT);
+			undo = undo_ptr->insert_undo;
 
 			if (undo == NULL) {
 				/* Did not succeed */
@@ -1274,11 +1957,12 @@ trx_undo_report_row_operation(
 	default:
 		ut_ad(op_type == TRX_UNDO_MODIFY_OP);
 
-		undo = trx->update_undo;
+		undo = undo_ptr->update_undo;
 
 		if (undo == NULL) {
-			err = trx_undo_assign_undo(trx, TRX_UNDO_UPDATE);
-			undo = trx->update_undo;
+			err = trx_undo_assign_undo(
+				trx, undo_ptr, TRX_UNDO_UPDATE);
+			undo = undo_ptr->update_undo;
 
 			if (undo == NULL) {
 				/* Did not succeed */
@@ -1291,9 +1975,13 @@ trx_undo_report_row_operation(
 	}
 
 	page_no = undo->last_page_no;
+
 	undo_block = buf_page_get_gen(
-		undo->space, undo->zip_size, page_no, RW_X_LATCH,
-		undo->guess_block, BUF_GET, __FILE__, __LINE__, &mtr);
+		page_id_t(undo->space, page_no), undo->page_size, RW_X_LATCH,
+		buf_pool_is_obsolete(undo->withdraw_clock)
+		? NULL : undo->guess_block, BUF_GET, __FILE__, __LINE__,
+		&mtr, &err);
+
 	buf_block_dbg_add_level(undo_block, SYNC_TRX_UNDO_PAGE);
 
 	do {
@@ -1301,7 +1989,7 @@ trx_undo_report_row_operation(
 		ulint		offset;
 
 		undo_page = buf_block_get_frame(undo_block);
-		ut_ad(page_no == buf_block_get_page_no(undo_block));
+		ut_ad(page_no == undo_block->page.id.page_no());
 
 		switch (op_type) {
 		case TRX_UNDO_INSERT_OP:
@@ -1312,7 +2000,7 @@ trx_undo_report_row_operation(
 			ut_ad(op_type == TRX_UNDO_MODIFY_OP);
 			offset = trx_undo_page_report_modify(
 				undo_page, trx, index, rec, offsets, update,
-				cmpl_info, &mtr);
+				cmpl_info, clust_entry, &mtr);
 		}
 
 		if (UNIV_UNLIKELY(offset == 0)) {
@@ -1338,10 +2026,12 @@ trx_undo_report_row_operation(
 
 				mtr_commit(&mtr);
 				mtr_start_trx(&mtr, trx);
+				dict_disable_redo_if_temporary(
+					index->table, &mtr);
 
-				mutex_enter(&rseg->mutex);
+				mutex_enter(&undo_ptr->rseg->mutex);
 				trx_undo_free_last_page(trx, undo, &mtr);
-				mutex_exit(&rseg->mutex);
+				mutex_exit(&undo_ptr->rseg->mutex);
 
 				err = DB_UNDO_RECORD_TOO_BIG;
 				goto err_exit;
@@ -1350,7 +2040,7 @@ trx_undo_report_row_operation(
 			mtr_commit(&mtr);
 		} else {
 			/* Success */
-
+			undo->withdraw_clock = buf_withdraw_clock;
 			mtr_commit(&mtr);
 
 			undo->empty = FALSE;
@@ -1360,12 +2050,13 @@ trx_undo_report_row_operation(
 			undo->guess_block = undo_block;
 
 			trx->undo_no++;
+			trx->undo_rseg_space = undo_ptr->rseg->space;
 
 			mutex_exit(&trx->undo_mutex);
 
 			*roll_ptr = trx_undo_build_roll_ptr(
 				op_type == TRX_UNDO_INSERT_OP,
-				rseg->id, page_no, offset);
+				undo_ptr->rseg->id, page_no, offset);
 			return(DB_SUCCESS);
 		}
 
@@ -1375,18 +2066,34 @@ trx_undo_report_row_operation(
 
 		ut_ad(++loop_count < 2);
 		mtr_start_trx(&mtr, trx);
+		dict_disable_redo_if_temporary(index->table, &mtr);
 
 		/* When we add a page to an undo log, this is analogous to
 		a pessimistic insert in a B-tree, and we must reserve the
 		counterpart of the tree latch, which is the rseg mutex. */
 
-		mutex_enter(&rseg->mutex);
-		undo_block = trx_undo_add_page(trx, undo, &mtr);
-		mutex_exit(&rseg->mutex);
+		mutex_enter(&undo_ptr->rseg->mutex);
+		undo_block = trx_undo_add_page(trx, undo, undo_ptr, &mtr);
+		mutex_exit(&undo_ptr->rseg->mutex);
 
 		page_no = undo->last_page_no;
+
+		DBUG_EXECUTE_IF("ib_err_ins_undo_page_add_failure",
+				undo_block = NULL;);
 	} while (undo_block != NULL);
 
+	ib_errf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+		DB_OUT_OF_FILE_SPACE,
+		//ER_INNODB_UNDO_LOG_FULL,
+		"No more space left over in %s tablespace for allocating UNDO"
+		" log pages. Please add new data file to the tablespace or"
+		" check if filesystem is full or enable auto-extension for"
+		" the tablespace",
+		((undo->space == srv_sys_space.space_id())
+		? "system" :
+		  ((fsp_is_system_temporary(undo->space))
+		   ? "temporary" : "undo")));
+
 	/* Did not succeed: out of space */
 	err = DB_OUT_OF_FILE_SPACE;
 
@@ -1401,13 +2108,13 @@ err_exit:
 /******************************************************************//**
 Copies an undo record to heap. This function can be called if we know that
 the undo log record exists.
-@return	own: copy of the record */
-UNIV_INTERN
+@return own: copy of the record */
 trx_undo_rec_t*
 trx_undo_get_undo_rec_low(
 /*======================*/
 	roll_ptr_t	roll_ptr,	/*!< in: roll pointer to record */
-	mem_heap_t*	heap)		/*!< in: memory heap where copied */
+	mem_heap_t*	heap,		/*!< in: memory heap where copied */
+	bool		is_redo_rseg)	/*!< in: true if redo rseg. */
 {
 	trx_undo_rec_t*	undo_rec;
 	ulint		rseg_id;
@@ -1420,12 +2127,13 @@ trx_undo_get_undo_rec_low(
 
 	trx_undo_decode_roll_ptr(roll_ptr, &is_insert, &rseg_id, &page_no,
 				 &offset);
-	rseg = trx_rseg_get_on_id(rseg_id);
+	rseg = trx_rseg_get_on_id(rseg_id, is_redo_rseg);
 
 	mtr_start(&mtr);
 
-	undo_page = trx_undo_page_get_s_latched(rseg->space, rseg->zip_size,
-						page_no, &mtr);
+	undo_page = trx_undo_page_get_s_latched(
+		page_id_t(rseg->space, page_no), rseg->page_size,
+		&mtr);
 
 	undo_rec = trx_undo_rec_copy(undo_page + offset, heap);
 
@@ -1436,30 +2144,37 @@ trx_undo_get_undo_rec_low(
 
 /******************************************************************//**
 Copies an undo record to heap.
-
-NOTE: the caller must have latches on the clustered index page.
-
+@param[in]	roll_ptr	roll pointer to record
+@param[in]	trx_id		id of the trx that generated
+				the roll pointer: it points to an
+				undo log of this transaction
+@param[in]	heap		memory heap where copied
+@param[in]	is_redo_rseg	true if redo rseg.
+@param[in]	name		table name
+@param[out]	undo_rec	own: copy of the record
 @retval true if the undo log has been
 truncated and we cannot fetch the old version
-@retval false if the undo log record is available  */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
+@retval false if the undo log record is available
+NOTE: the caller must have latches on the clustered index page. */
+static MY_ATTRIBUTE((warn_unused_result))
 bool
 trx_undo_get_undo_rec(
 /*==================*/
-	roll_ptr_t	roll_ptr,	/*!< in: roll pointer to record */
-	trx_id_t	trx_id,		/*!< in: id of the trx that generated
-					the roll pointer: it points to an
-					undo log of this transaction */
-	trx_undo_rec_t**undo_rec,	/*!< out, own: copy of the record */
-	mem_heap_t*	heap)		/*!< in: memory heap where copied */
+	roll_ptr_t		roll_ptr,
+	trx_id_t		trx_id,
+	mem_heap_t*		heap,
+	bool			is_redo_rseg,
+	const table_name_t&	name,
+	trx_undo_rec_t**	undo_rec)
 {
 	bool		missing_history;
 
 	rw_lock_s_lock(&purge_sys->latch);
-	missing_history = read_view_sees_trx_id(purge_sys->view, trx_id);
 
+	missing_history = purge_sys->view.changes_visible(trx_id, name);
 	if (!missing_history) {
-		*undo_rec = trx_undo_get_undo_rec_low(roll_ptr, heap);
+		*undo_rec = trx_undo_get_undo_rec_low(
+			roll_ptr, heap, is_redo_rseg);
 	}
 
 	rw_lock_s_unlock(&purge_sys->latch);
@@ -1479,8 +2194,7 @@ hold a latch on the index page of the clustered index record.
 @retval true if previous version was built, or if it was an insert
 or the table has been rebuilt
 @retval false if the previous version is earlier than purge_view,
-which means that it may have been removed */
-UNIV_INTERN
+or being purged, which means that it may have been removed */
 bool
 trx_undo_prev_version_build(
 /*========================*/
@@ -1495,11 +2209,22 @@ trx_undo_prev_version_build(
 	ulint*		offsets,/*!< in/out: rec_get_offsets(rec, index) */
 	mem_heap_t*	heap,	/*!< in: memory heap from which the memory
 				needed is allocated */
-	rec_t**		old_vers)/*!< out, own: previous version, or NULL if
+	rec_t**		old_vers,/*!< out, own: previous version, or NULL if
 				rec is the first inserted version, or if
 				history data has been deleted (an error),
 				or if the purge COULD have removed the version
 				though it has not yet done so */
+	mem_heap_t*	v_heap,	/* !< in: memory heap used to create vrow
+				dtuple if it is not yet created. This heap
+				diffs from "heap" above in that it could be
+				prebuilt->old_vers_heap for selection */
+	const dtuple_t**vrow,	/*!< out: virtual column info, if any */
+	ulint		v_status)
+				/*!< in: status determine if it is going
+				into this function by purge thread or not.
+				And if we read "after image" of undo log */
+
+
 {
 	trx_undo_rec_t*	undo_rec	= NULL;
 	dtuple_t*	entry;
@@ -1515,9 +2240,8 @@ trx_undo_prev_version_build(
 	ulint		cmpl_info;
 	bool		dummy_extern;
 	byte*		buf;
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(!rw_lock_own(&purge_sys->latch, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
+
+	ut_ad(!rw_lock_own(&purge_sys->latch, RW_LOCK_S));
 	ut_ad(mtr_memo_contains_page(index_mtr, index_rec, MTR_MEMO_PAGE_S_FIX)
 	      || mtr_memo_contains_page(index_mtr, index_rec,
 					MTR_MEMO_PAGE_X_FIX));
@@ -1535,10 +2259,22 @@ trx_undo_prev_version_build(
 
 	rec_trx_id = row_get_rec_trx_id(rec, index, offsets);
 
-	if (trx_undo_get_undo_rec(roll_ptr, rec_trx_id, &undo_rec, heap)) {
-		/* The undo record may already have been purged,
-		during purge or semi-consistent read. */
-		return(false);
+	/* REDO rollback segment are used only for non-temporary objects.
+	For temporary objects NON-REDO rollback segments are used. */
+	bool is_redo_rseg =
+		dict_table_is_temporary(index->table) ? false : true;
+	if (trx_undo_get_undo_rec(
+		roll_ptr, rec_trx_id, heap, is_redo_rseg,
+		index->table->name, &undo_rec)) {
+		if (v_status & TRX_UNDO_PREV_IN_PURGE) {
+			/* We are fetching the record being purged */
+			undo_rec = trx_undo_get_undo_rec_low(
+				roll_ptr, heap, is_redo_rseg);
+		} else {
+			/* The undo record may already have been purged,
+			during purge or semi-consistent read. */
+			return(false);
+		}
 	}
 
 	ptr = trx_undo_rec_get_pars(undo_rec, &type, &cmpl_info,
@@ -1583,10 +2319,6 @@ trx_undo_prev_version_build(
 					     NULL, heap, &update);
 	ut_a(ptr);
 
-# if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
-	ut_a(!rec_offs_any_null_extern(rec, offsets));
-# endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
-
 	if (row_upd_changes_field_size_or_external(index, offsets, update)) {
 		ulint	n_ext;
 
@@ -1611,8 +2343,10 @@ trx_undo_prev_version_build(
 			bool	missing_extern;
 
 			rw_lock_s_lock(&purge_sys->latch);
-			missing_extern = read_view_sees_trx_id(purge_sys->view,
-							       trx_id);
+
+			missing_extern = purge_sys->view.changes_visible(
+				trx_id,	index->table->name);
+
 			rw_lock_s_unlock(&purge_sys->latch);
 
 			if (missing_extern) {
@@ -1636,22 +2370,129 @@ trx_undo_prev_version_build(
 		following call is safe. */
 		row_upd_index_replace_new_col_vals(entry, index, update, heap);
 
-		buf = static_cast<byte*>(
-			mem_heap_alloc(
-				heap,
-				rec_get_converted_size(index, entry, n_ext)));
+		buf = static_cast<byte*>(mem_heap_alloc(
+			heap, rec_get_converted_size(index, entry, n_ext)));
 
 		*old_vers = rec_convert_dtuple_to_rec(buf, index,
 						      entry, n_ext);
 	} else {
-		buf = static_cast<byte*>(
-			mem_heap_alloc(heap, rec_offs_size(offsets)));
+		buf = static_cast<byte*>(mem_heap_alloc(
+			heap, rec_offs_size(offsets)));
 
 		*old_vers = rec_copy(buf, rec, offsets);
 		rec_offs_make_valid(*old_vers, index, offsets);
 		row_upd_rec_in_place(*old_vers, index, offsets, update, NULL);
 	}
 
+	/* Set the old value (which is the after image of an update) in the
+	update vector to dtuple vrow */
+	if (v_status & TRX_UNDO_GET_OLD_V_VALUE) {
+		row_upd_replace_vcol((dtuple_t*)*vrow, index->table, update,
+				     false, NULL, NULL);
+	}
+
+#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
+	ut_a(!rec_offs_any_null_extern(
+		*old_vers, rec_get_offsets(
+			*old_vers, index, NULL, ULINT_UNDEFINED, &heap)));
+#endif // defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
+
+	if (vrow && !(cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
+		if (!(*vrow)) {
+			*vrow = dtuple_create_with_vcol(
+				v_heap ? v_heap : heap,
+				dict_table_get_n_cols(index->table),
+				dict_table_get_n_v_cols(index->table));
+			dtuple_init_v_fld(*vrow);
+		}
+
+		ut_ad(index->table->n_v_cols);
+		trx_undo_read_v_cols(index->table, ptr, *vrow,
+				     v_status & TRX_UNDO_PREV_IN_PURGE, NULL);
+	}
+
+
 	return(true);
 }
+
+/** Read virtual column value from undo log
+@param[in]	table		the table
+@param[in]	ptr		undo log pointer
+@param[in,out]	row		the row struct to fill
+@param[in]	in_purge	called by purge thread
+@param[in]	col_map		online rebuild column map */
+void
+trx_undo_read_v_cols(
+	const dict_table_t*	table,
+	const byte*		ptr,
+	const dtuple_t*		row,
+	bool			in_purge,
+	const ulint*		col_map)
+{
+	const byte*     end_ptr;
+	bool		first_v_col = true;
+	bool		is_undo_log = true;
+
+	end_ptr = ptr + mach_read_from_2(ptr);
+	ptr += 2;
+	while (ptr < end_ptr) {
+		dfield_t*	dfield;
+		const byte*	field;
+		ulint		field_no;
+		ulint		len;
+		ulint		orig_len;
+		bool		is_virtual;
+
+		field_no = mach_read_next_compressed(
+				const_cast<const byte**>(&ptr));
+
+		is_virtual = (field_no >= REC_MAX_N_FIELDS);
+
+		if (is_virtual) {
+			ptr = trx_undo_read_v_idx(
+				table, ptr, first_v_col, &is_undo_log,
+				&field_no);
+			first_v_col = false;
+		}
+
+		ptr = trx_undo_rec_get_col_val(
+			ptr, &field, &len, &orig_len);
+
+		/* The virtual column is no longer indexed or does not exist.
+		This needs to put after trx_undo_rec_get_col_val() so the
+		undo ptr advances */
+		if (field_no == ULINT_UNDEFINED) {
+			ut_ad(is_virtual);
+			continue;
+		}
+
+		if (is_virtual) {
+			ulint		col_no;
+			dict_v_col_t*	vcol = dict_table_get_nth_v_col(
+				table, field_no);
+
+			if (!col_map) {
+				col_no = vcol->v_pos;
+			} else {
+				col_no = col_map[vcol->v_pos];
+			}
+
+			if (col_no == ULINT_UNDEFINED) {
+				continue;
+			}
+
+			dfield = dtuple_get_nth_v_field(row, col_no);
+
+			if (!in_purge
+			    || dfield_get_type(dfield)->mtype == DATA_MISSING) {
+				dict_col_copy_type(
+					&vcol->m_col,
+					dfield_get_type(dfield));
+				dfield_set_data(dfield, field, len);
+			}
+		}
+	}
+
+	ut_ad(ptr == end_ptr);
+}
 #endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/trx/trx0roll.cc b/storage/innobase/trx/trx0roll.cc
index c65d95a9817..a22b903df7a 100644
--- a/storage/innobase/trx/trx0roll.cc
+++ b/storage/innobase/trx/trx0roll.cc
@@ -23,32 +23,37 @@ Transaction rollback
 Created 3/26/1996 Heikki Tuuri
 *******************************************************/
 
+#include "ha_prototypes.h"
+
 #include "trx0roll.h"
 
 #ifdef UNIV_NONINL
 #include "trx0roll.ic"
 #endif
 
+#include <mysql/service_wsrep.h>
+
 #include "fsp0fsp.h"
+#include "lock0lock.h"
 #include "mach0data.h"
+#include "pars0pars.h"
+#include "que0que.h"
+#include "read0read.h"
+#include "row0mysql.h"
+#include "row0undo.h"
+#include "srv0mon.h"
+#include "srv0start.h"
+#include "trx0rec.h"
 #include "trx0rseg.h"
+#include "trx0sys.h"
 #include "trx0trx.h"
 #include "trx0undo.h"
-#include "trx0rec.h"
-#include "que0que.h"
 #include "usr0sess.h"
-#include "srv0start.h"
-#include "read0read.h"
-#include "row0undo.h"
-#include "row0mysql.h"
-#include "lock0lock.h"
-#include "pars0pars.h"
-#include "srv0mon.h"
-#include "trx0sys.h"
+#include "ha_prototypes.h"
 
 /** This many pages must be undone before a truncate is tried within
 rollback */
-#define TRX_ROLL_TRUNC_THRESHOLD	1
+static const ulint TRX_ROLL_TRUNC_THRESHOLD = 1;
 
 /** true if trx_rollback_or_clean_all_recovered() thread is active */
 bool			trx_rollback_or_clean_is_active;
@@ -93,15 +98,19 @@ trx_rollback_to_savepoint_low(
 	if (savept != NULL) {
 		roll_node->partial = TRUE;
 		roll_node->savept = *savept;
-		assert_trx_in_list(trx);
-	}  else {
+		check_trx_state(trx);
+	} else {
 		assert_trx_nonlocking_or_in_list(trx);
 	}
 
 	trx->error_state = DB_SUCCESS;
 
-	if (trx->insert_undo || trx->update_undo) {
-		thr = pars_complete_graph_for_exec(roll_node, trx, heap);
+	if (trx_is_rseg_updated(trx)) {
+
+		ut_ad(trx->rsegs.m_redo.rseg != 0
+		      || trx->rsegs.m_noredo.rseg != 0);
+
+		thr = pars_complete_graph_for_exec(roll_node, trx, heap, NULL);
 
 		ut_a(thr == que_fork_start_command(
 			static_cast<que_fork_t*>(que_node_get_parent(thr))));
@@ -137,8 +146,7 @@ trx_rollback_to_savepoint_low(
 
 /*******************************************************************//**
 Rollback a transaction to a given savepoint or do a complete rollback.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
 dberr_t
 trx_rollback_to_savepoint(
 /*======================*/
@@ -149,7 +157,7 @@ trx_rollback_to_savepoint(
 {
 	ut_ad(!trx_mutex_own(trx));
 
-	trx_start_if_not_started_xa(trx);
+	trx_start_if_not_started_xa(trx, true);
 
 	trx_rollback_to_savepoint_low(trx, savept);
 
@@ -158,7 +166,7 @@ trx_rollback_to_savepoint(
 
 /*******************************************************************//**
 Rollback a transaction used in MySQL.
-@return	error code or DB_SUCCESS */
+@return error code or DB_SUCCESS */
 static
 dberr_t
 trx_rollback_for_mysql_low(
@@ -181,14 +189,13 @@ trx_rollback_for_mysql_low(
 	return(trx->error_state);
 }
 
-/*******************************************************************//**
-Rollback a transaction used in MySQL.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+/** Rollback a transaction used in MySQL
+@param[in, out]	trx	transaction
+@return error code or DB_SUCCESS */
+static
 dberr_t
-trx_rollback_for_mysql(
-/*===================*/
-	trx_t*	trx)	/*!< in/out: transaction */
+trx_rollback_low(
+	trx_t*	trx)
 {
 	/* We are reading trx->state without holding trx_sys->mutex
 	here, because the rollback should be invoked for a running
@@ -196,7 +203,9 @@ trx_rollback_for_mysql(
 	that is associated with the current thread. */
 
 	switch (trx->state) {
+	case TRX_STATE_FORCED_ROLLBACK:
 	case TRX_STATE_NOT_STARTED:
+		trx->will_lock = 0;
 		ut_ad(trx->in_mysql_trx_list);
 		return(DB_SUCCESS);
 
@@ -207,10 +216,54 @@ trx_rollback_for_mysql(
 
 	case TRX_STATE_PREPARED:
 		ut_ad(!trx_is_autocommit_non_locking(trx));
+		if (trx->rsegs.m_redo.rseg != NULL
+		    && trx_is_redo_rseg_updated(trx)) {
+			/* Change the undo log state back from
+			TRX_UNDO_PREPARED to TRX_UNDO_ACTIVE
+			so that if the system gets killed,
+			recovery will perform the rollback. */
+			trx_undo_ptr_t*	undo_ptr = &trx->rsegs.m_redo;
+			mtr_t		mtr;
+			mtr.start();
+			mutex_enter(&trx->rsegs.m_redo.rseg->mutex);
+			if (undo_ptr->insert_undo != NULL) {
+				trx_undo_set_state_at_prepare(
+					trx, undo_ptr->insert_undo,
+					true, &mtr);
+			}
+			if (undo_ptr->update_undo != NULL) {
+				trx_undo_set_state_at_prepare(
+					trx, undo_ptr->update_undo,
+					true, &mtr);
+			}
+			mutex_exit(&trx->rsegs.m_redo.rseg->mutex);
+			/* Persist the XA ROLLBACK, so that crash
+			recovery will replay the rollback in case
+			the redo log gets applied past this point. */
+			mtr.commit();
+			ut_ad(mtr.commit_lsn() > 0);
+		}
+#ifdef ENABLED_DEBUG_SYNC
+		if (trx->mysql_thd == NULL) {
+			/* We could be executing XA ROLLBACK after
+			XA PREPARE and a server restart. */
+		} else if (!trx_is_redo_rseg_updated(trx)) {
+			/* innobase_close_connection() may roll back a
+			transaction that did not generate any
+			persistent undo log. The DEBUG_SYNC
+			would cause an assertion failure for a
+			disconnected thread.
+
+			NOTE: InnoDB will not know about the XID
+			if no persistent undo log was generated. */
+		} else {
+			DEBUG_SYNC_C("trx_xa_rollback");
+		}
+#endif /* ENABLED_DEBUG_SYNC */
 		return(trx_rollback_for_mysql_low(trx));
 
 	case TRX_STATE_COMMITTED_IN_MEMORY:
-		assert_trx_in_list(trx);
+		check_trx_state(trx);
 		break;
 	}
 
@@ -218,10 +271,31 @@ trx_rollback_for_mysql(
 	return(DB_CORRUPTION);
 }
 
+/*******************************************************************//**
+Rollback a transaction used in MySQL.
+@return error code or DB_SUCCESS */
+dberr_t
+trx_rollback_for_mysql(
+/*===================*/
+	trx_t*	trx)	/*!< in/out: transaction */
+{
+	/* Avoid the tracking of async rollback killer
+	thread to enter into InnoDB. */
+	if (TrxInInnoDB::is_async_rollback(trx)) {
+
+		return(trx_rollback_low(trx));
+
+	} else {
+
+		TrxInInnoDB	trx_in_innodb(trx, true);
+
+		return(trx_rollback_low(trx));
+	}
+}
+
 /*******************************************************************//**
 Rollback the latest SQL statement for MySQL.
-@return	error code or DB_SUCCESS */
-UNIV_INTERN
+@return error code or DB_SUCCESS */
 dberr_t
 trx_rollback_last_sql_stat_for_mysql(
 /*=================================*/
@@ -236,8 +310,10 @@ trx_rollback_last_sql_stat_for_mysql(
 	ut_ad(trx->in_mysql_trx_list);
 
 	switch (trx->state) {
+	case TRX_STATE_FORCED_ROLLBACK:
 	case TRX_STATE_NOT_STARTED:
 		return(DB_SUCCESS);
+
 	case TRX_STATE_ACTIVE:
 		assert_trx_nonlocking_or_in_list(trx);
 
@@ -246,7 +322,7 @@ trx_rollback_last_sql_stat_for_mysql(
 		err = trx_rollback_to_savepoint(
 			trx, &trx->last_sql_stat_start);
 
-		if (trx->fts_trx) {
+		if (trx->fts_trx != NULL) {
 			fts_savepoint_rollback_last_stmt(trx);
 		}
 
@@ -257,6 +333,7 @@ trx_rollback_last_sql_stat_for_mysql(
 		trx->op_info = "";
 
 		return(err);
+
 	case TRX_STATE_PREPARED:
 	case TRX_STATE_COMMITTED_IN_MEMORY:
 		/* The statement rollback is only allowed on an ACTIVE
@@ -301,14 +378,14 @@ trx_roll_savepoint_free(
 	trx_t*			trx,	/*!< in: transaction handle */
 	trx_named_savept_t*	savep)	/*!< in: savepoint to free */
 {
-	UT_LIST_REMOVE(trx_savepoints, trx->trx_savepoints, savep);
-	mem_free(savep->name);
-	mem_free(savep);
+	UT_LIST_REMOVE(trx->trx_savepoints, savep);
+
+	ut_free(savep->name);
+	ut_free(savep);
 }
 
 /*******************************************************************//**
 Frees savepoint structs starting from savep. */
-UNIV_INTERN
 void
 trx_roll_savepoints_free(
 /*=====================*/
@@ -342,7 +419,7 @@ trx_rollback_to_savepoint_for_mysql_low(
 /*====================================*/
 	trx_t*			trx,	/*!< in/out: transaction */
 	trx_named_savept_t*	savep,	/*!< in/out: savepoint */
-	ib_int64_t*		mysql_binlog_cache_pos)
+	int64_t*		mysql_binlog_cache_pos)
 					/*!< out: the MySQL binlog
 					cache position corresponding
 					to this savepoint; MySQL needs
@@ -374,6 +451,12 @@ trx_rollback_to_savepoint_for_mysql_low(
 
 	trx->op_info = "";
 
+#ifdef WITH_WSREP
+	if (wsrep_on(trx->mysql_thd) &&
+	    trx->lock.was_chosen_as_deadlock_victim) {
+		trx->lock.was_chosen_as_deadlock_victim = FALSE;
+	}
+#endif
 	return(err);
 }
 
@@ -386,13 +469,12 @@ the row, these locks are naturally released in the rollback. Savepoints which
 were set after this savepoint are deleted.
 @return if no savepoint of the name found then DB_NO_SAVEPOINT,
 otherwise DB_SUCCESS */
-UNIV_INTERN
 dberr_t
 trx_rollback_to_savepoint_for_mysql(
 /*================================*/
 	trx_t*		trx,			/*!< in: transaction handle */
 	const char*	savepoint_name,		/*!< in: savepoint name */
-	ib_int64_t*	mysql_binlog_cache_pos)	/*!< out: the MySQL binlog cache
+	int64_t*	mysql_binlog_cache_pos)	/*!< out: the MySQL binlog cache
 						position corresponding to this
 						savepoint; MySQL needs this
 						information to remove the
@@ -415,14 +497,19 @@ trx_rollback_to_savepoint_for_mysql(
 
 	switch (trx->state) {
 	case TRX_STATE_NOT_STARTED:
-		ut_print_timestamp(stderr);
-		fputs("  InnoDB: Error: transaction has a savepoint ", stderr);
-		ut_print_name(stderr, trx, FALSE, savep->name);
-		fputs(" though it is not started\n", stderr);
+	case TRX_STATE_FORCED_ROLLBACK:
+
+		ib::error() << "Transaction has a savepoint "
+			<< savep->name
+			<< " though it is not started";
+
 		return(DB_ERROR);
+
 	case TRX_STATE_ACTIVE:
+
 		return(trx_rollback_to_savepoint_for_mysql_low(
 				trx, savep, mysql_binlog_cache_pos));
+
 	case TRX_STATE_PREPARED:
 	case TRX_STATE_COMMITTED_IN_MEMORY:
 		/* The savepoint rollback is only allowed on an ACTIVE
@@ -439,36 +526,36 @@ Creates a named savepoint. If the transaction is not yet started, starts it.
 If there is already a savepoint of the same name, this call erases that old
 savepoint and replaces it with a new. Savepoints are deleted in a transaction
 commit or rollback.
-@return	always DB_SUCCESS */
-UNIV_INTERN
+@return always DB_SUCCESS */
 dberr_t
 trx_savepoint_for_mysql(
 /*====================*/
 	trx_t*		trx,			/*!< in: transaction handle */
 	const char*	savepoint_name,		/*!< in: savepoint name */
-	ib_int64_t	binlog_cache_pos)	/*!< in: MySQL binlog cache
+	int64_t		binlog_cache_pos)	/*!< in: MySQL binlog cache
 						position corresponding to this
 						connection at the time of the
 						savepoint */
 {
 	trx_named_savept_t*	savep;
 
-	trx_start_if_not_started_xa(trx);
+	trx_start_if_not_started_xa(trx, false);
 
 	savep = trx_savepoint_find(trx, savepoint_name);
 
 	if (savep) {
 		/* There is a savepoint with the same name: free that */
 
-		UT_LIST_REMOVE(trx_savepoints, trx->trx_savepoints, savep);
+		UT_LIST_REMOVE(trx->trx_savepoints, savep);
 
-		mem_free(savep->name);
-		mem_free(savep);
+		ut_free(savep->name);
+		ut_free(savep);
 	}
 
 	/* Create a new savepoint and add it as the last in the list */
 
-	savep = static_cast<trx_named_savept_t*>(mem_alloc(sizeof(*savep)));
+	savep = static_cast<trx_named_savept_t*>(
+		ut_malloc_nokey(sizeof(*savep)));
 
 	savep->name = mem_strdup(savepoint_name);
 
@@ -476,7 +563,7 @@ trx_savepoint_for_mysql(
 
 	savep->mysql_binlog_cache_pos = binlog_cache_pos;
 
-	UT_LIST_ADD_LAST(trx_savepoints, trx->trx_savepoints, savep);
+	UT_LIST_ADD_LAST(trx->trx_savepoints, savep);
 
 	return(DB_SUCCESS);
 }
@@ -486,7 +573,6 @@ Releases only the named savepoint. Savepoints which were set after this
 savepoint are left as is.
 @return if no savepoint of the name found then DB_NO_SAVEPOINT,
 otherwise DB_SUCCESS */
-UNIV_INTERN
 dberr_t
 trx_release_savepoint_for_mysql(
 /*============================*/
@@ -512,7 +598,6 @@ Determines if this transaction is rolling back an incomplete transaction
 in crash recovery.
 @return TRUE if trx is an incomplete transaction that is being rolled
 back in crash recovery */
-UNIV_INTERN
 ibool
 trx_is_recv(
 /*========*/
@@ -523,8 +608,7 @@ trx_is_recv(
 
 /*******************************************************************//**
 Returns a transaction savepoint taken at this point in time.
-@return	savepoint */
-UNIV_INTERN
+@return savepoint */
 trx_savept_t
 trx_savept_take(
 /*============*/
@@ -550,7 +634,7 @@ trx_rollback_active(
 	que_thr_t*	thr;
 	roll_node_t*	roll_node;
 	dict_table_t*	table;
-	ib_int64_t	rows_to_undo;
+	int64_t		rows_to_undo;
 	const char*	unit		= "";
 	ibool		dictionary_locked = FALSE;
 
@@ -559,7 +643,7 @@ trx_rollback_active(
 	fork = que_fork_create(NULL, NULL, QUE_FORK_RECOVERY, heap);
 	fork->trx = trx;
 
-	thr = que_thr_create(fork, heap);
+	thr = que_thr_create(fork, heap, NULL);
 
 	roll_node = roll_node_create(heap);
 
@@ -570,7 +654,7 @@ trx_rollback_active(
 
 	ut_a(thr == que_fork_start_command(fork));
 
-	mutex_enter(&trx_sys->mutex);
+	trx_sys_mutex_enter();
 
 	trx_roll_crash_recv_trx	= trx;
 
@@ -580,19 +664,17 @@ trx_rollback_active(
 
 	rows_to_undo = trx_roll_max_undo_no;
 
-	mutex_exit(&trx_sys->mutex);
+	trx_sys_mutex_exit();
 
 	if (rows_to_undo > 1000000000) {
 		rows_to_undo = rows_to_undo / 1000000;
 		unit = "M";
 	}
 
-	ut_print_timestamp(stderr);
-	fprintf(stderr,
-		"  InnoDB: Rolling back trx with id " TRX_ID_FMT ", %lu%s"
-		" rows to undo\n",
-		trx->id,
-		(ulong) rows_to_undo, unit);
+	const trx_id_t	trx_id = trx_get_id_for_print(trx);
+
+	ib::info() << "Rolling back trx with id " << trx_id << ", "
+		<< rows_to_undo << unit << " rows to undo";
 
 	if (trx_get_dict_operation(trx) != TRX_DICT_OP_NONE) {
 		row_mysql_lock_data_dictionary(trx);
@@ -615,36 +697,23 @@ trx_rollback_active(
 	if (trx_get_dict_operation(trx) != TRX_DICT_OP_NONE
 	    && trx->table_id != 0) {
 
+		ut_ad(dictionary_locked);
+
 		/* If the transaction was for a dictionary operation,
 		we drop the relevant table only if it is not flagged
 		as DISCARDED. If it still exists. */
 
 		table = dict_table_open_on_id(
-			trx->table_id, dictionary_locked,
-			DICT_TABLE_OP_NORMAL);
+			trx->table_id, TRUE, DICT_TABLE_OP_NORMAL);
 
 		if (table && !dict_table_is_discarded(table)) {
+			ib::warn() << "Dropping table '" << table->name
+				<< "', with id " << trx->table_id
+				<< " in recovery";
 
-			dberr_t	err;
+			dict_table_close_and_drop(trx, table);
 
-			/* Ensure that the table doesn't get evicted from the
-			cache, keeps things simple for drop. */
-
-			if (table->can_be_evicted) {
-				dict_table_move_from_lru_to_non_lru(table);
-			}
-
-			dict_table_close(table, dictionary_locked, FALSE);
-
-			ib_logf(IB_LOG_LEVEL_WARN,
-				"Dropping table '%s', with id " UINT64PF " "
-				"in recovery",
-				table->name, trx->table_id);
-
-			err = row_drop_table_for_mysql(table->name, trx, TRUE, FALSE);
 			trx_commit_for_mysql(trx);
-
-			ut_a(err == DB_SUCCESS);
 		}
 	}
 
@@ -652,8 +721,7 @@ trx_rollback_active(
 		row_mysql_unlock_data_dictionary(trx);
 	}
 
-	ib_logf(IB_LOG_LEVEL_INFO,
-		"Rollback of trx with id " TRX_ID_FMT " completed", trx->id);
+	ib::info() << "Rollback of trx with id " << trx_id << " completed";
 
 	mem_heap_free(heap);
 
@@ -674,7 +742,7 @@ trx_rollback_resurrected(
 	ibool	all)	/*!< in: FALSE=roll back dictionary transactions;
 			TRUE=roll back all non-PREPARED transactions */
 {
-	ut_ad(mutex_own(&trx_sys->mutex));
+	ut_ad(trx_sys_mutex_own());
 
 	/* The trx->is_recovered flag and trx->state are set
 	atomically under the protection of the trx->mutex (and
@@ -692,16 +760,16 @@ trx_rollback_resurrected(
 
 	switch (state) {
 	case TRX_STATE_COMMITTED_IN_MEMORY:
-		mutex_exit(&trx_sys->mutex);
-		fprintf(stderr,
-			"InnoDB: Cleaning up trx with id " TRX_ID_FMT "\n",
-			trx->id);
+		trx_sys_mutex_exit();
+		ib::info() << "Cleaning up trx with id "
+			<< trx_get_id_for_print(trx);
+
 		trx_cleanup_at_db_startup(trx);
-		trx_free_for_background(trx);
+		trx_free_resurrected(trx);
 		return(TRUE);
 	case TRX_STATE_ACTIVE:
 		if (all || trx_get_dict_operation(trx) != TRX_DICT_OP_NONE) {
-			mutex_exit(&trx_sys->mutex);
+			trx_sys_mutex_exit();
 			trx_rollback_active(trx);
 			trx_free_for_background(trx);
 			return(TRUE);
@@ -710,6 +778,7 @@ trx_rollback_resurrected(
 	case TRX_STATE_PREPARED:
 		return(FALSE);
 	case TRX_STATE_NOT_STARTED:
+	case TRX_STATE_FORCED_ROLLBACK:
 		break;
 	}
 
@@ -722,7 +791,6 @@ Rollback or clean up any incomplete transactions which were
 encountered in crash recovery.  If the transaction already was
 committed, then we clean up a possible insert undo log. If the
 transaction was not yet committed, then we roll it back. */
-UNIV_INTERN
 void
 trx_rollback_or_clean_recovered(
 /*============================*/
@@ -739,9 +807,8 @@ trx_rollback_or_clean_recovered(
 	}
 
 	if (all) {
-		fprintf(stderr,
-			"InnoDB: Starting in background the rollback"
-			" of uncommitted transactions\n");
+		ib::info() << "Starting in background the rollback"
+			" of uncommitted transactions";
 	}
 
 	/* Note: For XA recovered transactions, we rely on MySQL to
@@ -753,7 +820,7 @@ trx_rollback_or_clean_recovered(
 	recovered transactions to clean up or recover. */
 
 	do {
-		mutex_enter(&trx_sys->mutex);
+		trx_sys_mutex_enter();
 
 		for (trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
 		     trx != NULL;
@@ -767,21 +834,19 @@ trx_rollback_or_clean_recovered(
 
 			if (trx_rollback_resurrected(trx, all)) {
 
-				mutex_enter(&trx_sys->mutex);
+				trx_sys_mutex_enter();
 
 				break;
 			}
 		}
 
-		mutex_exit(&trx_sys->mutex);
+		trx_sys_mutex_exit();
 
 	} while (trx != NULL);
 
 	if (all) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			"  InnoDB: Rollback of non-prepared"
-			" transactions completed\n");
+		ib::info() << "Rollback of non-prepared transactions"
+			" completed";
 	}
 }
 
@@ -791,8 +856,8 @@ encountered in crash recovery.  If the transaction already was
 committed, then we clean up a possible insert undo log. If the
 transaction was not yet committed, then we roll it back.
 Note: this is done in a background thread.
-@return	a dummy parameter */
-extern "C" UNIV_INTERN
+@return a dummy parameter */
+extern "C"
 os_thread_ret_t
 DECLARE_THREAD(trx_rollback_or_clean_all_recovered)(
 /*================================================*/
@@ -813,213 +878,45 @@ DECLARE_THREAD(trx_rollback_or_clean_all_recovered)(
 	/* We count the number of threads in os_thread_exit(). A created
 	thread should always use that to exit and not use return() to exit. */
 
-	os_thread_exit(NULL);
+	os_thread_exit();
 
 	OS_THREAD_DUMMY_RETURN;
 }
 
-/*******************************************************************//**
-Creates an undo number array.
-@return	own: undo number array */
-static
-trx_undo_arr_t*
-trx_undo_arr_create(
-/*================*/
-	ulint		n_cells)	/*!< Number of cells */
-{
-	trx_undo_arr_t*	arr;
-	mem_heap_t*	heap;
-	ulint		sz = sizeof(*arr) + sizeof(*arr->infos) * n_cells;
-
-	heap = mem_heap_create(sz);
-
-	arr = static_cast<trx_undo_arr_t*>(mem_heap_zalloc(heap, sz));
-
-	arr->n_cells = n_cells;
-
-	arr->infos = (trx_undo_inf_t*) (arr + 1);
-
-	arr->heap = heap;
-
-	return(arr);
-}
-
-/*******************************************************************//**
-Frees an undo number array. */
-UNIV_INTERN
-void
-trx_undo_arr_free(
-/*==============*/
-	trx_undo_arr_t*	arr)	/*!< in: undo number array */
-{
-	mem_heap_free(arr->heap);
-}
-
-/*******************************************************************//**
-Stores info of an undo log record to the array if it is not stored yet.
-@return	FALSE if the record already existed in the array */
-static
-ibool
-trx_undo_arr_store_info(
-/*====================*/
-	trx_t*		trx,	/*!< in: transaction */
-	undo_no_t	undo_no)/*!< in: undo number */
-{
-	ulint		i;
-	trx_undo_arr_t*	arr;
-	ulint		n = 0;
-	ulint		n_used;
-	trx_undo_inf_t*	stored_here = NULL;
-
-	arr = trx->undo_no_arr;
-	n_used = arr->n_used;
-
-	for (i = 0; i < arr->n_cells; i++) {
-		trx_undo_inf_t*	cell;
-
-		cell = trx_undo_arr_get_nth_info(arr, i);
-
-		if (!cell->in_use) {
-			if (!stored_here) {
-				/* Not in use, we may store here */
-				cell->undo_no = undo_no;
-				cell->in_use = TRUE;
-
-				arr->n_used++;
-
-				stored_here = cell;
-			}
-		} else {
-			n++;
-
-			if (cell->undo_no == undo_no) {
-
-				if (stored_here) {
-					stored_here->in_use = FALSE;
-					ut_ad(arr->n_used > 0);
-					arr->n_used--;
-				}
-
-				ut_ad(arr->n_used == n_used);
-
-				return(FALSE);
-			}
-		}
-
-		if (n == n_used && stored_here) {
-
-			ut_ad(arr->n_used == 1 + n_used);
-
-			return(TRUE);
-		}
-	}
-
-	ut_error;
-
-	return(FALSE);
-}
-
-/*******************************************************************//**
-Removes an undo number from the array. */
-static
-void
-trx_undo_arr_remove_info(
-/*=====================*/
-	trx_undo_arr_t*	arr,	/*!< in: undo number array */
-	undo_no_t	undo_no)/*!< in: undo number */
-{
-	ulint		i;
-
-	for (i = 0; i < arr->n_cells; i++) {
-
-		trx_undo_inf_t*	cell;
-
-		cell = trx_undo_arr_get_nth_info(arr, i);
-
-		if (cell->in_use && cell->undo_no == undo_no) {
-			cell->in_use = FALSE;
-			ut_ad(arr->n_used > 0);
-			--arr->n_used;
-			break;
-		}
-	}
-}
-
-/*******************************************************************//**
-Gets the biggest undo number in an array.
-@return	biggest value, 0 if the array is empty */
-static
-undo_no_t
-trx_undo_arr_get_biggest(
-/*=====================*/
-	const trx_undo_arr_t*	arr)	/*!< in: undo number array */
-{
-	ulint		i;
-	undo_no_t	biggest = 0;
-	ulint		n_checked = 0;
-
-	for (i = 0; i < arr->n_cells && n_checked < arr->n_used; ++i) {
-
-		const trx_undo_inf_t*	cell = &arr->infos[i];
-
-		if (cell->in_use) {
-
-			++n_checked;
-
-			if (cell->undo_no > biggest) {
-
-				biggest = cell->undo_no;
-			}
-		}
-	}
-
-	return(biggest);
-}
-
 /***********************************************************************//**
 Tries truncate the undo logs. */
 static
 void
 trx_roll_try_truncate(
 /*==================*/
-	trx_t*	trx)	/*!< in/out: transaction */
+	trx_t*		trx,		/*!< in/out: transaction */
+	trx_undo_ptr_t*	undo_ptr)	/*!< in: rollback segment to look
+					for next undo log record. */
 {
-	undo_no_t		limit;
-	const trx_undo_arr_t*	arr;
-
-	ut_ad(mutex_own(&(trx->undo_mutex)));
-	ut_ad(mutex_own(&((trx->rseg)->mutex)));
+	ut_ad(mutex_own(&trx->undo_mutex));
+	ut_ad(mutex_own(&undo_ptr->rseg->mutex));
 
 	trx->pages_undone = 0;
 
-	arr = trx->undo_no_arr;
-
-	limit = trx->undo_no;
-
-	if (arr->n_used > 0) {
-		undo_no_t	biggest;
-
-		biggest = trx_undo_arr_get_biggest(arr);
-
-		if (biggest >= limit) {
-
-			limit = biggest + 1;
-		}
+	if (undo_ptr->insert_undo) {
+		trx_undo_truncate_end(trx, undo_ptr->insert_undo, trx->undo_no);
 	}
 
-	if (trx->insert_undo) {
-		trx_undo_truncate_end(trx, trx->insert_undo, limit);
+	if (undo_ptr->update_undo) {
+		trx_undo_truncate_end(trx, undo_ptr->update_undo, trx->undo_no);
 	}
 
-	if (trx->update_undo) {
-		trx_undo_truncate_end(trx, trx->update_undo, limit);
+#ifdef WITH_WSREP_OUT
+	if (wsrep_on(trx->mysql_thd)) {
+		trx->lock.was_chosen_as_deadlock_victim = FALSE;
 	}
+#endif /* WITH_WSREP */
 }
 
 /***********************************************************************//**
 Pops the topmost undo log record in a single undo log and updates the info
 about the topmost record in the undo log memory struct.
-@return	undo log record, the page s-latched */
+@return undo log record, the page s-latched */
 static
 trx_undo_rec_t*
 trx_roll_pop_top_rec(
@@ -1028,23 +925,15 @@ trx_roll_pop_top_rec(
 	trx_undo_t*	undo,	/*!< in: undo log */
 	mtr_t*		mtr)	/*!< in: mtr */
 {
-	page_t*		undo_page;
-	ulint		offset;
-	trx_undo_rec_t*	prev_rec;
-	page_t*		prev_rec_page;
-
 	ut_ad(mutex_own(&trx->undo_mutex));
 
-	undo_page = trx_undo_page_get_s_latched(
-		undo->space, undo->zip_size, undo->top_page_no, mtr);
+	page_t*	undo_page = trx_undo_page_get_s_latched(
+		page_id_t(undo->space, undo->top_page_no),
+		undo->page_size, mtr);
 
-	offset = undo->top_offset;
+	ulint	offset = undo->top_offset;
 
-	/*	fprintf(stderr, "Thread %lu undoing trx " TRX_ID_FMT
-			" undo record " TRX_ID_FMT "\n",
-	os_thread_get_curr_id(), trx->id, undo->top_undo_no); */
-
-	prev_rec = trx_undo_get_prev_rec(
+	trx_undo_rec_t*	prev_rec = trx_undo_get_prev_rec(
 		undo_page + offset, undo->hdr_page_no, undo->hdr_offset,
 		true, mtr);
 
@@ -1052,7 +941,7 @@ trx_roll_pop_top_rec(
 
 		undo->empty = TRUE;
 	} else {
-		prev_rec_page = page_align(prev_rec);
+		page_t*	prev_rec_page = page_align(prev_rec);
 
 		if (prev_rec_page != undo_page) {
 
@@ -1067,22 +956,21 @@ trx_roll_pop_top_rec(
 	return(undo_page + offset);
 }
 
+
 /********************************************************************//**
 Pops the topmost record when the two undo logs of a transaction are seen
-as a single stack of records ordered by their undo numbers. Inserts the
-undo number of the popped undo record to the array of currently processed
-undo numbers in the transaction. When the query thread finishes processing
-of this undo record, it must be released with trx_undo_rec_release.
+as a single stack of records ordered by their undo numbers.
 @return undo log record copied to heap, NULL if none left, or if the
 undo number of the top record would be less than the limit */
-UNIV_INTERN
 trx_undo_rec_t*
-trx_roll_pop_top_rec_of_trx(
-/*========================*/
-	trx_t*		trx,	/*!< in: transaction */
-	undo_no_t	limit,	/*!< in: least undo number we need */
-	roll_ptr_t*	roll_ptr,/*!< out: roll pointer to undo record */
-	mem_heap_t*	heap)	/*!< in: memory heap where copied */
+trx_roll_pop_top_rec_of_trx_low(
+/*============================*/
+	trx_t*		trx,		/*!< in/out: transaction */
+	trx_undo_ptr_t*	undo_ptr,	/*!< in: rollback segment to look
+					for next undo log record. */
+	undo_no_t	limit,		/*!< in: least undo number we need */
+	roll_ptr_t*	roll_ptr,	/*!< out: roll pointer to undo record */
+	mem_heap_t*	heap)		/*!< in/out: memory heap where copied */
 {
 	trx_undo_t*	undo;
 	trx_undo_t*	ins_undo;
@@ -1092,23 +980,22 @@ trx_roll_pop_top_rec_of_trx(
 	undo_no_t	undo_no;
 	ibool		is_insert;
 	trx_rseg_t*	rseg;
-	ulint		progress_pct;
 	mtr_t		mtr;
 
-	rseg = trx->rseg;
-try_again:
-	mutex_enter(&(trx->undo_mutex));
+	rseg = undo_ptr->rseg;
+
+	mutex_enter(&trx->undo_mutex);
 
 	if (trx->pages_undone >= TRX_ROLL_TRUNC_THRESHOLD) {
 		mutex_enter(&rseg->mutex);
 
-		trx_roll_try_truncate(trx);
+		trx_roll_try_truncate(trx, undo_ptr);
 
 		mutex_exit(&rseg->mutex);
 	}
 
-	ins_undo = trx->insert_undo;
-	upd_undo = trx->update_undo;
+	ins_undo = undo_ptr->insert_undo;
+	upd_undo = undo_ptr->update_undo;
 
 	if (!ins_undo || ins_undo->empty) {
 		undo = upd_undo;
@@ -1121,19 +1008,10 @@ try_again:
 	}
 
 	if (!undo || undo->empty || limit > undo->top_undo_no) {
-
-		if ((trx->undo_no_arr)->n_used == 0) {
-			/* Rollback is ending */
-
-			mutex_enter(&(rseg->mutex));
-
-			trx_roll_try_truncate(trx);
-
-			mutex_exit(&(rseg->mutex));
-		}
-
-		mutex_exit(&(trx->undo_mutex));
-
+		mutex_enter(&rseg->mutex);
+		trx_roll_try_truncate(trx, undo_ptr);
+		mutex_exit(&rseg->mutex);
+		mutex_exit(&trx->undo_mutex);
 		return(NULL);
 	}
 
@@ -1148,14 +1026,15 @@ try_again:
 
 	undo_no = trx_undo_rec_get_undo_no(undo_rec);
 
-	ut_ad(undo_no + 1 == trx->undo_no);
+	ut_ad(trx_roll_check_undo_rec_ordering(
+		undo_no, undo->rseg->space, trx));
 
 	/* We print rollback progress info if we are in a crash recovery
 	and the transaction has at least 1000 row operations to undo. */
 
 	if (trx == trx_roll_crash_recv_trx && trx_roll_max_undo_no > 1000) {
 
-		progress_pct = 100 - (ulint)
+		ulint	progress_pct = 100 - (ulint)
 			((undo_no * 100) / trx_roll_max_undo_no);
 		if (progress_pct != trx_roll_progress_printed_pct) {
 			if (trx_roll_progress_printed_pct == 0) {
@@ -1172,20 +1051,11 @@ try_again:
 	}
 
 	trx->undo_no = undo_no;
-
-	if (!trx_undo_arr_store_info(trx, undo_no)) {
-		/* A query thread is already processing this undo log record */
-
-		mutex_exit(&(trx->undo_mutex));
-
-		mtr_commit(&mtr);
-
-		goto try_again;
-	}
+	trx->undo_rseg_space = undo->rseg->space;
 
 	undo_rec_copy = trx_undo_rec_copy(undo_rec, heap);
 
-	mutex_exit(&(trx->undo_mutex));
+	mutex_exit(&trx->undo_mutex);
 
 	mtr_commit(&mtr);
 
@@ -1193,46 +1063,30 @@ try_again:
 }
 
 /********************************************************************//**
-Reserves an undo log record for a query thread to undo. This should be
-called if the query thread gets the undo log record not using the pop
-function above.
-@return	TRUE if succeeded */
-UNIV_INTERN
-ibool
-trx_undo_rec_reserve(
-/*=================*/
-	trx_t*		trx,	/*!< in/out: transaction */
-	undo_no_t	undo_no)/*!< in: undo number of the record */
+Get next undo log record from redo and noredo rollback segments.
+@return undo log record copied to heap, NULL if none left, or if the
+undo number of the top record would be less than the limit */
+trx_undo_rec_t*
+trx_roll_pop_top_rec_of_trx(
+/*========================*/
+	trx_t*		trx,		/*!< in: transaction */
+	undo_no_t	limit,		/*!< in: least undo number we need */
+	roll_ptr_t*	roll_ptr,	/*!< out: roll pointer to undo record */
+	mem_heap_t*	heap)		/*!< in: memory heap where copied */
 {
-	ibool	ret;
+	trx_undo_rec_t* undo_rec = 0;
 
-	mutex_enter(&(trx->undo_mutex));
+	if (trx_is_redo_rseg_updated(trx)) {
+		undo_rec = trx_roll_pop_top_rec_of_trx_low(
+			trx, &trx->rsegs.m_redo, limit, roll_ptr, heap);
+	}
 
-	ret = trx_undo_arr_store_info(trx, undo_no);
+	if (undo_rec == 0 && trx_is_noredo_rseg_updated(trx)) {
+		undo_rec = trx_roll_pop_top_rec_of_trx_low(
+			trx, &trx->rsegs.m_noredo, limit, roll_ptr, heap);
+	}
 
-	mutex_exit(&(trx->undo_mutex));
-
-	return(ret);
-}
-
-/*******************************************************************//**
-Releases a reserved undo record. */
-UNIV_INTERN
-void
-trx_undo_rec_release(
-/*=================*/
-	trx_t*		trx,	/*!< in/out: transaction */
-	undo_no_t	undo_no)/*!< in: undo number */
-{
-	trx_undo_arr_t*	arr;
-
-	mutex_enter(&(trx->undo_mutex));
-
-	arr = trx->undo_no_arr;
-
-	trx_undo_arr_remove_info(arr, undo_no);
-
-	mutex_exit(&(trx->undo_mutex));
+	return(undo_rec);
 }
 
 /****************************************************************//**
@@ -1240,12 +1094,12 @@ Builds an undo 'query' graph for a transaction. The actual rollback is
 performed by executing this query graph like a query subprocedure call.
 The reply about the completion of the rollback will be sent by this
 graph.
-@return	own: the query graph */
+@return own: the query graph */
 static
 que_t*
 trx_roll_graph_build(
 /*=================*/
-	trx_t*	trx)	/*!< in: trx handle */
+	trx_t*	trx)	/*!< in/out: transaction */
 {
 	mem_heap_t*	heap;
 	que_fork_t*	fork;
@@ -1257,7 +1111,7 @@ trx_roll_graph_build(
 	fork = que_fork_create(NULL, NULL, QUE_FORK_ROLLBACK, heap);
 	fork->trx = trx;
 
-	thr = que_thr_create(fork, heap);
+	thr = que_thr_create(fork, heap, NULL);
 
 	thr->child = row_undo_node_create(trx, thr, heap);
 
@@ -1277,28 +1131,23 @@ trx_rollback_start(
 					partial undo), 0 if we are rolling back
 					the entire transaction */
 {
-	que_t*		roll_graph;
-
 	ut_ad(trx_mutex_own(trx));
 
-	ut_ad(trx->undo_no_arr == NULL || trx->undo_no_arr->n_used == 0);
-
 	/* Initialize the rollback field in the transaction */
 
+	ut_ad(!trx->roll_limit);
+	ut_ad(!trx->in_rollback);
+
 	trx->roll_limit = roll_limit;
+	ut_d(trx->in_rollback = true);
 
 	ut_a(trx->roll_limit <= trx->undo_no);
 
 	trx->pages_undone = 0;
 
-	if (trx->undo_no_arr == NULL) {
-		/* Single query thread -> 1 */
-		trx->undo_no_arr = trx_undo_arr_create(1);
-	}
-
 	/* Build a 'query' graph which will perform the undo operations */
 
-	roll_graph = trx_roll_graph_build(trx);
+	que_t*	roll_graph = trx_roll_graph_build(trx);
 
 	trx->graph = roll_graph;
 
@@ -1315,17 +1164,16 @@ trx_rollback_finish(
 /*================*/
 	trx_t*		trx)	/*!< in: transaction */
 {
-	ut_a(trx->undo_no_arr == NULL || trx->undo_no_arr->n_used == 0);
-
 	trx_commit(trx);
 
+	trx->mod_tables.clear();
+
 	trx->lock.que_state = TRX_QUE_RUNNING;
 }
 
 /*********************************************************************//**
 Creates a rollback command node struct.
-@return	own: rollback node struct */
-UNIV_INTERN
+@return own: rollback node struct */
 roll_node_t*
 roll_node_create(
 /*=============*/
@@ -1344,8 +1192,7 @@ roll_node_create(
 
 /***********************************************************//**
 Performs an execution step for a rollback command node in a query graph.
-@return	query thread to run next, or NULL */
-UNIV_INTERN
+@return query thread to run next, or NULL */
 que_thr_t*
 trx_rollback_step(
 /*==============*/
@@ -1363,7 +1210,7 @@ trx_rollback_step(
 
 	if (node->state == ROLL_NODE_SEND) {
 		trx_t*		trx;
-		ib_id_t		roll_limit = 0;
+		ib_id_t		roll_limit;
 
 		trx = thr_get_trx(thr);
 
diff --git a/storage/innobase/trx/trx0rseg.cc b/storage/innobase/trx/trx0rseg.cc
index 003d1036a8c..d5520b783b1 100644
--- a/storage/innobase/trx/trx0rseg.cc
+++ b/storage/innobase/trx/trx0rseg.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -33,28 +33,27 @@ Created 3/26/1996 Heikki Tuuri
 #include "fut0lst.h"
 #include "srv0srv.h"
 #include "trx0purge.h"
-#include "ut0bh.h"
 #include "srv0mon.h"
+#include "fsp0sysspace.h"
 
-#ifdef UNIV_PFS_MUTEX
-/* Key to register rseg_mutex_key with performance schema */
-UNIV_INTERN mysql_pfs_key_t	rseg_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
+#include <algorithm>
 
-/****************************************************************//**
-Creates a rollback segment header. This function is called only when
-a new rollback segment is created in the database.
-@return	page number of the created segment, FIL_NULL if fail */
-UNIV_INTERN
+/** Creates a rollback segment header.
+This function is called only when a new rollback segment is created in
+the database.
+@param[in]	space		space id
+@param[in]	page_size	page size
+@param[in]	max_size	max size in pages
+@param[in]	rseg_slot_no	rseg id == slot number in trx sys
+@param[in,out]	mtr		mini-transaction
+@return page number of the created segment, FIL_NULL if fail */
 ulint
 trx_rseg_header_create(
-/*===================*/
-	ulint	space,		/*!< in: space id */
-	ulint	zip_size,	/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	ulint	max_size,	/*!< in: max size in pages */
-	ulint	rseg_slot_no,	/*!< in: rseg id == slot number in trx sys */
-	mtr_t*	mtr)		/*!< in: mtr */
+	ulint			space,
+	const page_size_t&	page_size,
+	ulint			max_size,
+	ulint			rseg_slot_no,
+	mtr_t*			mtr)
 {
 	ulint		page_no;
 	trx_rsegf_t*	rsegf;
@@ -77,10 +76,10 @@ trx_rseg_header_create(
 
 	buf_block_dbg_add_level(block, SYNC_RSEG_HEADER_NEW);
 
-	page_no = buf_block_get_page_no(block);
+	page_no = block->page.id.page_no();
 
 	/* Get the rollback segment file page */
-	rsegf = trx_rsegf_get_new(space, zip_size, page_no, mtr);
+	rsegf = trx_rsegf_get_new(space, page_no, page_size, mtr);
 
 	/* Initialize max size field */
 	mlog_write_ulint(rsegf + TRX_RSEG_MAX_SIZE, max_size,
@@ -97,24 +96,34 @@ trx_rseg_header_create(
 		trx_rsegf_set_nth_undo(rsegf, i, FIL_NULL, mtr);
 	}
 
-	/* Add the rollback segment info to the free slot in
-	the trx system header */
+	if (!trx_sys_is_noredo_rseg_slot(rseg_slot_no)) {
+		/* Non-redo rseg are re-created on restart and so no need
+		to persist this information in sys-header. Anyway, on restart
+		this information is not valid too as there is no space with
+		persisted space-id on restart. */
 
-	sys_header = trx_sysf_get(mtr);
+		/* Add the rollback segment info to the free slot in
+		the trx system header */
 
-	trx_sysf_rseg_set_space(sys_header, rseg_slot_no, space, mtr);
-	trx_sysf_rseg_set_page_no(sys_header, rseg_slot_no, page_no, mtr);
+		sys_header = trx_sysf_get(mtr);
+
+		trx_sysf_rseg_set_space(sys_header, rseg_slot_no, space, mtr);
+
+		trx_sysf_rseg_set_page_no(
+			sys_header, rseg_slot_no, page_no, mtr);
+	}
 
 	return(page_no);
 }
 
 /***********************************************************************//**
 Free's an instance of the rollback segment in memory. */
-UNIV_INTERN
 void
 trx_rseg_mem_free(
 /*==============*/
-	trx_rseg_t*	rseg)	/* in, own: instance to free */
+	trx_rseg_t*	rseg,		/* in, own: instance to free */
+	trx_rseg_t**	rseg_array)	/*!< out: add rseg reference to this
+					central array. */
 {
 	trx_undo_t*	undo;
 	trx_undo_t*	next_undo;
@@ -131,7 +140,7 @@ trx_rseg_mem_free(
 
 		next_undo = UT_LIST_GET_NEXT(undo_list, undo);
 
-		UT_LIST_REMOVE(undo_list, rseg->update_undo_cached, undo);
+		UT_LIST_REMOVE(rseg->update_undo_cached, undo);
 
 		MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_CACHED);
 
@@ -144,40 +153,41 @@ trx_rseg_mem_free(
 
 		next_undo = UT_LIST_GET_NEXT(undo_list, undo);
 
-		UT_LIST_REMOVE(undo_list, rseg->insert_undo_cached, undo);
+		UT_LIST_REMOVE(rseg->insert_undo_cached, undo);
 
 		MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_CACHED);
 
 		trx_undo_mem_free(undo);
 	}
 
-	/* const_cast<trx_rseg_t*>() because this function is
-	like a destructor.  */
+	ut_a(*((trx_rseg_t**) rseg_array + rseg->id) == rseg);
+	*((trx_rseg_t**) rseg_array + rseg->id) = NULL;
 
-	*((trx_rseg_t**) trx_sys->rseg_array + rseg->id) = NULL;
-
-	mem_free(rseg);
+	ut_free(rseg);
 }
 
-/***************************************************************************
-Creates and initializes a rollback segment object. The values for the
-fields are read from the header. The object is inserted to the rseg
-list of the trx system object and a pointer is inserted in the rseg
+/** Creates and initializes a rollback segment object.
+The values for the fields are read from the header. The object is inserted to
+the rseg list of the trx system object and a pointer is inserted in the rseg
 array in the trx system object.
-@return	own: rollback segment object */
+@param[in]	id		rollback segment id
+@param[in]	space		space where the segment is placed
+@param[in]	page_no		page number of the segment header
+@param[in]	page_size	page size
+@param[in,out]	purge_queue	rseg queue
+@param[out]	rseg_array	add rseg reference to this central array
+@param[in,out]	mtr		mini-transaction
+@return own: rollback segment object */
 static
 trx_rseg_t*
 trx_rseg_mem_create(
-/*================*/
-	ulint		id,		/*!< in: rollback segment id */
-	ulint		space,		/*!< in: space where the segment
-					placed */
-	ulint		zip_size,	/*!< in: compressed page size in bytes
-					or 0 for uncompressed pages */
-	ulint		page_no,	/*!< in: page number of the segment
-					header */
-	ib_bh_t*	ib_bh,		/*!< in/out: rseg queue */
-	mtr_t*		mtr)		/*!< in: mtr */
+	ulint			id,
+	ulint			space,
+	ulint			page_no,
+	const page_size_t&	page_size,
+	purge_pq_t*		purge_queue,
+	trx_rseg_t**		rseg_array,
+	mtr_t*			mtr)
 {
 	ulint		len;
 	trx_rseg_t*	rseg;
@@ -186,20 +196,29 @@ trx_rseg_mem_create(
 	trx_ulogf_t*	undo_log_hdr;
 	ulint		sum_of_undo_sizes;
 
-	rseg = static_cast<trx_rseg_t*>(mem_zalloc(sizeof(trx_rseg_t)));
+	rseg = static_cast<trx_rseg_t*>(ut_zalloc_nokey(sizeof(trx_rseg_t)));
 
 	rseg->id = id;
 	rseg->space = space;
-	rseg->zip_size = zip_size;
+	rseg->page_size.copy_from(page_size);
 	rseg->page_no = page_no;
+	rseg->trx_ref_count = 0;
+	rseg->skip_allocation = false;
 
-	mutex_create(rseg_mutex_key, &rseg->mutex, SYNC_RSEG);
+	if (fsp_is_system_temporary(space)) {
+		mutex_create(LATCH_ID_NOREDO_RSEG, &rseg->mutex);
+	} else {
+		mutex_create(LATCH_ID_REDO_RSEG, &rseg->mutex);
+	}
 
-	/* const_cast<trx_rseg_t*>() because this function is
-	like a constructor.  */
-	*((trx_rseg_t**) trx_sys->rseg_array + rseg->id) = rseg;
+	UT_LIST_INIT(rseg->update_undo_list, &trx_undo_t::undo_list);
+	UT_LIST_INIT(rseg->update_undo_cached, &trx_undo_t::undo_list);
+	UT_LIST_INIT(rseg->insert_undo_list, &trx_undo_t::undo_list);
+	UT_LIST_INIT(rseg->insert_undo_cached, &trx_undo_t::undo_list);
 
-	rseg_header = trx_rsegf_get_new(space, zip_size, page_no, mtr);
+	*((trx_rseg_t**) rseg_array + rseg->id) = rseg;
+
+	rseg_header = trx_rsegf_get_new(space, page_no, page_size, mtr);
 
 	rseg->max_size = mtr_read_ulint(
 		rseg_header + TRX_RSEG_MAX_SIZE, MLOG_4BYTES, mtr);
@@ -212,11 +231,9 @@ trx_rseg_mem_create(
 		rseg_header + TRX_RSEG_HISTORY_SIZE, MLOG_4BYTES, mtr)
 		+ 1 + sum_of_undo_sizes;
 
-	len = flst_get_len(rseg_header + TRX_RSEG_HISTORY, mtr);
+	len = flst_get_len(rseg_header + TRX_RSEG_HISTORY);
 
 	if (len > 0) {
-		rseg_queue_t	rseg_queue;
-
 		trx_sys->rseg_history_len += len;
 
 		node_addr = trx_purge_get_log_from_hist(
@@ -226,8 +243,8 @@ trx_rseg_mem_create(
 		rseg->last_offset = node_addr.boffset;
 
 		undo_log_hdr = trx_undo_page_get(
-			rseg->space, rseg->zip_size, node_addr.page,
-			mtr) + node_addr.boffset;
+			page_id_t(rseg->space, node_addr.page),
+			rseg->page_size, mtr) + node_addr.boffset;
 
 		rseg->last_trx_no = mach_read_from_8(
 			undo_log_hdr + TRX_UNDO_TRX_NO);
@@ -235,17 +252,15 @@ trx_rseg_mem_create(
 		rseg->last_del_marks = mtr_read_ulint(
 			undo_log_hdr + TRX_UNDO_DEL_MARKS, MLOG_2BYTES, mtr);
 
-		rseg_queue.rseg = rseg;
-		rseg_queue.trx_no = rseg->last_trx_no;
+		TrxUndoRsegs elem(rseg->last_trx_no);
+		elem.push_back(rseg);
 
 		if (rseg->last_page_no != FIL_NULL) {
-			const void*	ptr;
 
 			/* There is no need to cover this operation by the purge
 			mutex because we are still bootstrapping. */
 
-			ptr = ib_bh_push(ib_bh, &rseg_queue);
-			ut_a(ptr != NULL);
+			purge_queue->push(elem);
 		}
 	} else {
 		rseg->last_page_no = FIL_NULL;
@@ -254,6 +269,49 @@ trx_rseg_mem_create(
 	return(rseg);
 }
 
+/********************************************************************
+Check if rseg in given slot needs to be scheduled for purge. */
+static
+void
+trx_rseg_schedule_pending_purge(
+/*============================*/
+	trx_sysf_t*	sys_header,	/*!< in: trx system header */
+	purge_pq_t*	purge_queue,	/*!< in/out: rseg queue */
+	ulint		slot,		/*!< in: check rseg from given slot. */
+	mtr_t*		mtr)		/*!< in: mtr */
+{
+	ulint	page_no;
+	ulint	space;
+
+	page_no = trx_sysf_rseg_get_page_no(sys_header, slot, mtr);
+	space = trx_sysf_rseg_get_space(sys_header, slot, mtr);
+
+	if (page_no != FIL_NULL
+	    && is_system_or_undo_tablespace(space)) {
+
+		/* rseg resides in system or undo tablespace and so
+		this is an upgrade scenario. trx_rseg_mem_create
+		will add rseg to purge queue if needed. */
+
+		trx_rseg_t*		rseg = NULL;
+		bool			found = true;
+		const page_size_t&	page_size
+			= is_system_tablespace(space)
+			? univ_page_size
+			: fil_space_get_page_size(space, &found);
+
+		ut_ad(found);
+
+		trx_rseg_t** rseg_array =
+			((trx_rseg_t**) trx_sys->pending_purge_rseg_array);
+		rseg = trx_rseg_mem_create(
+			slot, space, page_no, page_size,
+			purge_queue, rseg_array, mtr);
+
+		ut_a(rseg->id == slot);
+	}
+}
+
 /********************************************************************
 Creates the memory copies for the rollback segments and initializes the
 rseg array in trx_sys at a database startup. */
@@ -261,46 +319,70 @@ static
 void
 trx_rseg_create_instance(
 /*=====================*/
-	trx_sysf_t*	sys_header,	/*!< in: trx system header */
-	ib_bh_t*	ib_bh,		/*!< in/out: rseg queue */
-	mtr_t*		mtr)		/*!< in: mtr */
+	purge_pq_t*	purge_queue)	/*!< in/out: rseg queue */
 {
 	ulint		i;
 
 	for (i = 0; i < TRX_SYS_N_RSEGS; i++) {
 		ulint	page_no;
 
-		page_no = trx_sysf_rseg_get_page_no(sys_header, i, mtr);
+		mtr_t	mtr;
+		mtr.start();
+		trx_sysf_t* sys_header = trx_sysf_get(&mtr);
 
-		if (page_no != FIL_NULL) {
+		page_no = trx_sysf_rseg_get_page_no(sys_header, i, &mtr);
+
+		/* Slot-1....Slot-n are reserved for non-redo rsegs.
+		Non-redo rsegs are recreated on server re-start so
+		avoid initializing the existing non-redo rsegs. */
+		if (trx_sys_is_noredo_rseg_slot(i)) {
+
+			/* If this is an upgrade scenario then existing rsegs
+			in range from slot-1....slot-n needs to be scheduled
+			for purge if there are pending purge operation. */
+			trx_rseg_schedule_pending_purge(
+				sys_header, purge_queue, i, &mtr);
+
+		} else if (page_no != FIL_NULL) {
 			ulint		space;
-			ulint		zip_size;
 			trx_rseg_t*	rseg = NULL;
 
-			ut_a(!trx_rseg_get_on_id(i));
+			ut_a(!trx_rseg_get_on_id(i, true));
 
-			space = trx_sysf_rseg_get_space(sys_header, i, mtr);
+			space = trx_sysf_rseg_get_space(sys_header, i, &mtr);
 
-			zip_size = space ? fil_space_get_zip_size(space) : 0;
+			bool			found = true;
+			const page_size_t&	page_size
+				= is_system_tablespace(space)
+				? univ_page_size
+				: fil_space_get_page_size(space, &found);
+
+			ut_ad(found);
+
+			trx_rseg_t** rseg_array =
+				static_cast<trx_rseg_t**>(trx_sys->rseg_array);
 
 			rseg = trx_rseg_mem_create(
-				i, space, zip_size, page_no, ib_bh, mtr);
+				i, space, page_no, page_size,
+				purge_queue, rseg_array, &mtr);
 
 			ut_a(rseg->id == i);
 		} else {
 			ut_a(trx_sys->rseg_array[i] == NULL);
 		}
+		mtr.commit();
 	}
 }
 
 /*********************************************************************
 Creates a rollback segment.
 @return pointer to new rollback segment if create successful */
-UNIV_INTERN
 trx_rseg_t*
 trx_rseg_create(
 /*============*/
-	ulint		space)		/*!< in: id of UNDO tablespace */
+	ulint	space_id,	/*!< in: id of UNDO tablespace */
+	ulint	nth_free_slot)	/*!< in: allocate nth free slot.
+				0 means next free slots. */
 {
 	mtr_t		mtr;
 	ulint		slot_no;
@@ -310,31 +392,48 @@ trx_rseg_create(
 
 	/* To obey the latching order, acquire the file space
 	x-latch before the trx_sys->mutex. */
-	mtr_x_lock(fil_space_get_latch(space, NULL), &mtr);
+	const fil_space_t*	space = mtr_x_lock_space(space_id, &mtr);
 
-	slot_no = trx_sysf_rseg_find_free(&mtr);
+	switch (space->purpose) {
+	case FIL_TYPE_LOG:
+	case FIL_TYPE_IMPORT:
+		ut_ad(0);
+	case FIL_TYPE_TEMPORARY:
+		mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
+		break;
+	case FIL_TYPE_TABLESPACE:
+		break;
+	}
+
+	slot_no = trx_sysf_rseg_find_free(
+		&mtr, space->purpose == FIL_TYPE_TEMPORARY, nth_free_slot);
 
 	if (slot_no != ULINT_UNDEFINED) {
 		ulint		id;
 		ulint		page_no;
-		ulint		zip_size;
 		trx_sysf_t*	sys_header;
+		page_size_t	page_size(space->flags);
 
 		page_no = trx_rseg_header_create(
-			space, 0, ULINT_MAX, slot_no, &mtr);
+			space_id, page_size, ULINT_MAX, slot_no, &mtr);
 
-		ut_a(page_no != FIL_NULL);
+		if (page_no == FIL_NULL) {
+			mtr_commit(&mtr);
+
+			return(rseg);
+		}
 
 		sys_header = trx_sysf_get(&mtr);
 
 		id = trx_sysf_rseg_get_space(sys_header, slot_no, &mtr);
-		ut_a(id == space);
+		ut_a(id == space_id || trx_sys_is_noredo_rseg_slot(slot_no));
 
-		zip_size = space ? fil_space_get_zip_size(space) : 0;
+		trx_rseg_t** rseg_array =
+			((trx_rseg_t**) trx_sys->rseg_array);
 
 		rseg = trx_rseg_mem_create(
-			slot_no, space, zip_size, page_no,
-			purge_sys->ib_bh, &mtr);
+			slot_no, space_id, page_no, page_size,
+			purge_sys->purge_queue, rseg_array, &mtr);
 	}
 
 	mtr_commit(&mtr);
@@ -345,17 +444,14 @@ trx_rseg_create(
 /*********************************************************************//**
 Creates the memory copies for rollback segments and initializes the
 rseg array in trx_sys at a database startup. */
-UNIV_INTERN
 void
 trx_rseg_array_init(
 /*================*/
-	trx_sysf_t*	sys_header,	/* in/out: trx system header */
-	ib_bh_t*	ib_bh,		/*!< in: rseg queue */
-	mtr_t*		mtr)		/*!< in: mtr */
+	purge_pq_t*	purge_queue)	/*!< in: rseg queue */
 {
 	trx_sys->rseg_history_len = 0;
 
-	trx_rseg_create_instance(sys_header, ib_bh, mtr);
+	trx_rseg_create_instance(purge_queue);
 }
 
 /********************************************************************
@@ -364,7 +460,6 @@ The last space id will be the sentinel value ULINT_UNDEFINED. The array
 will be sorted on space id. Note: space_ids should have have space for
 TRX_SYS_N_RSEGS + 1 elements.
 @return number of unique rollback tablespaces in use. */
-UNIV_INTERN
 ulint
 trx_rseg_get_n_undo_tablespaces(
 /*============================*/
@@ -375,7 +470,6 @@ trx_rseg_get_n_undo_tablespaces(
 	mtr_t		mtr;
 	trx_sysf_t*	sys_header;
 	ulint		n_undo_tablespaces = 0;
-	ulint		space_ids_aux[TRX_SYS_N_RSEGS + 1];
 
 	mtr_start(&mtr);
 
@@ -418,7 +512,7 @@ trx_rseg_get_n_undo_tablespaces(
 	space_ids[n_undo_tablespaces] = ULINT_UNDEFINED;
 
 	if (n_undo_tablespaces > 0) {
-		ut_ulint_sort(space_ids, space_ids_aux, 0, n_undo_tablespaces);
+		std::sort(space_ids, space_ids + n_undo_tablespaces);
 	}
 
 	return(n_undo_tablespaces);
diff --git a/storage/innobase/trx/trx0sys.cc b/storage/innobase/trx/trx0sys.cc
index 029d5cce1b8..a253ea72e95 100644
--- a/storage/innobase/trx/trx0sys.cc
+++ b/storage/innobase/trx/trx0sys.cc
@@ -23,8 +23,11 @@ Transaction system
 Created 3/26/1996 Heikki Tuuri
 *******************************************************/
 
-#include "trx0sys.h"
+#include "ha_prototypes.h"
 
+#include "mysqld.h"
+#include "trx0sys.h"
+#include "sql_error.h"
 #ifdef UNIV_NONINL
 #include "trx0sys.ic"
 #endif
@@ -46,6 +49,7 @@ Created 3/26/1996 Heikki Tuuri
 #include "log0recv.h"
 #include "os0file.h"
 #include "read0read.h"
+#include "fsp0sysspace.h"
 
 #include <mysql/service_wsrep.h>
 
@@ -59,28 +63,7 @@ struct file_format_t {
 };
 
 /** The transaction system */
-UNIV_INTERN trx_sys_t*		trx_sys		= NULL;
-
-/** In a MySQL replication slave, in crash recovery we store the master log
-file name and position here. */
-/* @{ */
-/** Master binlog file name */
-UNIV_INTERN char	trx_sys_mysql_master_log_name[TRX_SYS_MYSQL_LOG_NAME_LEN];
-/** Master binlog file position.  We have successfully got the updates
-up to this position.  -1 means that no crash recovery was needed, or
-there was no master log position info inside InnoDB.*/
-UNIV_INTERN ib_int64_t	trx_sys_mysql_master_log_pos	= -1;
-/* @} */
-
-/** If this MySQL server uses binary logging, after InnoDB has been inited
-and if it has done a crash recovery, we store the binlog file name and position
-here. */
-/* @{ */
-/** Binlog file name */
-UNIV_INTERN char	trx_sys_mysql_bin_log_name[TRX_SYS_MYSQL_LOG_NAME_LEN];
-/** Binlog file position, or -1 if unknown */
-UNIV_INTERN ib_int64_t	trx_sys_mysql_bin_log_pos	= -1;
-/* @} */
+trx_sys_t*		trx_sys		= NULL;
 #endif /* !UNIV_HOTBACKUP */
 
 /** List of animal names representing file format. */
@@ -117,16 +100,44 @@ static const char*	file_format_name_map[] = {
 static const ulint	FILE_FORMAT_NAME_N
 	= sizeof(file_format_name_map) / sizeof(file_format_name_map[0]);
 
-#ifdef UNIV_PFS_MUTEX
-/* Key to register the mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t	file_format_max_mutex_key;
-UNIV_INTERN mysql_pfs_key_t	trx_sys_mutex_key;
-#endif /* UNIV_PFS_RWLOCK */
+/** Check whether transaction id is valid.
+@param[in]	id              transaction id to check
+@param[in]      name            table name */
+void
+ReadView::check_trx_id_sanity(
+	trx_id_t		id,
+	const table_name_t&	name)
+{
+	if (id >= trx_sys->max_trx_id) {
+
+		ib::warn() << "A transaction id"
+			   << " in a record of table "
+			   << name
+			   << " is newer than the"
+			   << " system-wide maximum.";
+		ut_ad(0);
+		THD *thd = current_thd;
+		if (thd != NULL) {
+			char    table_name[MAX_FULL_NAME_LEN + 1];
+
+			innobase_format_name(
+				table_name, sizeof(table_name),
+				name.m_name);
+
+			push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+					    ER_SIGNAL_WARN,
+					    "InnoDB: Transaction id"
+					    " in a record of table"
+					    " %s is newer than system-wide"
+					    " maximum.", table_name);
+		}
+	}
+}
 
 #ifndef UNIV_HOTBACKUP
 #ifdef UNIV_DEBUG
 /* Flag to control TRX_RSEG_N_SLOTS behavior debugging. */
-UNIV_INTERN uint	trx_rseg_n_slots_debug = 0;
+uint	trx_rseg_n_slots_debug = 0;
 #endif
 
 /** This is used to track the maximum file format id known to InnoDB. It's
@@ -136,42 +147,37 @@ static	file_format_t	file_format_max;
 
 #ifdef UNIV_DEBUG
 /****************************************************************//**
-Checks whether a trx is in one of rw_trx_list or ro_trx_list.
-@return	TRUE if is in */
-UNIV_INTERN
-ibool
-trx_in_trx_list(
+Checks whether a trx is in one of rw_trx_list
+@return true if is in */
+bool
+trx_in_rw_trx_list(
 /*============*/
 	const trx_t*	in_trx)	/*!< in: transaction */
 {
 	const trx_t*	trx;
-	trx_list_t*	trx_list;
 
 	/* Non-locking autocommits should not hold any locks. */
-	assert_trx_in_list(in_trx);
+	check_trx_state(in_trx);
 
-	trx_list = in_trx->read_only
-		? &trx_sys->ro_trx_list : &trx_sys->rw_trx_list;
-
-	ut_ad(mutex_own(&trx_sys->mutex));
+	ut_ad(trx_sys_mutex_own());
 
 	ut_ad(trx_assert_started(in_trx));
 
-	for (trx = UT_LIST_GET_FIRST(*trx_list);
+	for (trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
 	     trx != NULL && trx != in_trx;
 	     trx = UT_LIST_GET_NEXT(trx_list, trx)) {
 
-		assert_trx_in_list(trx);
-		ut_ad(trx->read_only == (trx_list == &trx_sys->ro_trx_list));
+		check_trx_state(trx);
+
+		ut_ad(trx->rsegs.m_redo.rseg != NULL && !trx->read_only);
 	}
 
-	return(trx != NULL);
+	return(trx != 0);
 }
 #endif /* UNIV_DEBUG */
 
 /*****************************************************************//**
 Writes the value of max_trx_id to the file based trx system header. */
-UNIV_INTERN
 void
 trx_sys_flush_max_trx_id(void)
 /*==========================*/
@@ -180,10 +186,10 @@ trx_sys_flush_max_trx_id(void)
 	trx_sysf_t*	sys_header;
 
 #ifndef WITH_WSREP
-       /* wsrep_fake_trx_id  violates this assert
-        * Copied from trx_sys_get_new_trx_id
-        */
-	ut_ad(mutex_own(&trx_sys->mutex));
+	/* wsrep_fake_trx_id  violates this assert
+	Copied from trx_sys_get_new_trx_id
+	*/
+	ut_ad(trx_sys_mutex_own());
 #endif /* WITH_WSREP */
 
 	if (!srv_read_only_mode) {
@@ -204,22 +210,17 @@ Updates the offset information about the end of the MySQL binlog entry
 which corresponds to the transaction just being committed. In a MySQL
 replication slave updates the latest master binlog position up to which
 replication has proceeded. */
-UNIV_INTERN
 void
 trx_sys_update_mysql_binlog_offset(
 /*===============================*/
 	const char*	file_name,/*!< in: MySQL log file name */
-	ib_int64_t	offset,	/*!< in: position in that log file */
+	int64_t		offset,	/*!< in: position in that log file */
 	ulint		field,	/*!< in: offset of the MySQL log info field in
 				the trx sys header */
-#ifdef WITH_WSREP
         trx_sysf_t*     sys_header, /*!< in: trx sys header */
-#endif /* WITH_WSREP */
 	mtr_t*		mtr)	/*!< in: mtr */
 {
-#ifndef WITH_WSREP
-	trx_sysf_t*	sys_header;
-#endif /* !WITH_WSREP */
+	DBUG_PRINT("InnoDB",("trx_mysql_binlog_offset: %lld", (longlong) offset));
 
 	if (ut_strlen(file_name) >= TRX_SYS_MYSQL_LOG_NAME_LEN) {
 
@@ -228,9 +229,9 @@ trx_sys_update_mysql_binlog_offset(
 		return;
 	}
 
-#ifndef WITH_WSREP
-	sys_header = trx_sysf_get(mtr);
-#endif /* !WITH_WSREP */
+	if (sys_header == NULL) {
+		sys_header = trx_sysf_get(mtr);
+	}
 
 	if (mach_read_from_4(sys_header + field
 			     + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
@@ -270,7 +271,6 @@ trx_sys_update_mysql_binlog_offset(
 /*****************************************************************//**
 Stores the MySQL binlog offset info in the trx system header if
 the magic number shows it valid, and print the info to stderr */
-UNIV_INTERN
 void
 trx_sys_print_mysql_binlog_offset(void)
 /*===================================*/
@@ -300,19 +300,12 @@ trx_sys_print_mysql_binlog_offset(void)
 		sys_header + TRX_SYS_MYSQL_LOG_INFO
 		+ TRX_SYS_MYSQL_LOG_OFFSET_LOW);
 
-	trx_sys_mysql_bin_log_pos
-		= (((ib_int64_t) trx_sys_mysql_bin_log_pos_high) << 32)
-		+ (ib_int64_t) trx_sys_mysql_bin_log_pos_low;
-
-	ut_memcpy(trx_sys_mysql_bin_log_name,
-		  sys_header + TRX_SYS_MYSQL_LOG_INFO
-		  + TRX_SYS_MYSQL_LOG_NAME, TRX_SYS_MYSQL_LOG_NAME_LEN);
-
 	fprintf(stderr,
 		"InnoDB: Last MySQL binlog file position %lu %lu,"
 		" file name %s\n",
 		trx_sys_mysql_bin_log_pos_high, trx_sys_mysql_bin_log_pos_low,
-		trx_sys_mysql_bin_log_name);
+		sys_header + TRX_SYS_MYSQL_LOG_INFO
+		+ TRX_SYS_MYSQL_LOG_NAME);
 
 	mtr_commit(&mtr);
 }
@@ -325,207 +318,197 @@ static unsigned char trx_sys_cur_xid_uuid[16];
 
 long long read_wsrep_xid_seqno(const XID* xid)
 {
-    long long seqno;
-    memcpy(&seqno, xid->data + 24, sizeof(long long));
-    return seqno;
+	long long seqno;
+	memcpy(&seqno, xid->data + 24, sizeof(long long));
+	return seqno;
 }
 
 void read_wsrep_xid_uuid(const XID* xid, unsigned char* buf)
 {
-    memcpy(buf, xid->data + 8, 16);
+	memcpy(buf, xid->data + 8, 16);
 }
 
 #endif /* UNIV_DEBUG */
 
 void
 trx_sys_update_wsrep_checkpoint(
-        const XID*      xid,        /*!< in: transaction XID */
-        trx_sysf_t*     sys_header, /*!< in: sys_header */
-        mtr_t*          mtr)        /*!< in: mtr */
+/*============================*/
+	const XID*	xid,		/*!< in: transaction XID */
+	trx_sysf_t*	sys_header,	/*!< in: sys_header */
+	mtr_t*		mtr)		/*!< in: mtr */
 {
 #ifdef UNIV_DEBUG
-        {
-            /* Check that seqno is monotonically increasing */
-            unsigned char xid_uuid[16];
-            long long xid_seqno = read_wsrep_xid_seqno(xid);
-            read_wsrep_xid_uuid(xid, xid_uuid);
-            if (!memcmp(xid_uuid, trx_sys_cur_xid_uuid, 16))
-            {
-              /*
-                This check is a protection against the initial seqno (-1)
-                assigned in read_wsrep_xid_uuid(), which, if not checked,
-                would cause the following assertion to fail.
-              */
-              if (xid_seqno > -1 )
-              {
-                ut_ad(xid_seqno > trx_sys_cur_xid_seqno);
-              }
-            }
-            else
-            {
-                memcpy(trx_sys_cur_xid_uuid, xid_uuid, 16);
-            }
-            trx_sys_cur_xid_seqno = xid_seqno;
-        }
+	{
+		/* Check that seqno is monotonically increasing */
+		unsigned char xid_uuid[16];
+		long long xid_seqno = read_wsrep_xid_seqno(xid);
+		read_wsrep_xid_uuid(xid, xid_uuid);
+		if (!memcmp(xid_uuid, trx_sys_cur_xid_uuid, 16))
+		{
+			/*
+			This check is a protection against the initial seqno (-1)
+			assigned in read_wsrep_xid_uuid(), which, if not checked,
+			would cause the following assertion to fail.
+			*/
+			if (xid_seqno > -1 )
+			{
+				ut_ad(xid_seqno > trx_sys_cur_xid_seqno);
+			}
+		} else {
+			memcpy(trx_sys_cur_xid_uuid, xid_uuid, 16);
+		}
+		trx_sys_cur_xid_seqno = xid_seqno;
+	}
 #endif /* UNIV_DEBUG */
 
-        ut_ad(xid && mtr);
-        ut_a(xid->formatID == -1 || wsrep_is_wsrep_xid(xid));
+		ut_ad(xid && mtr);
+		ut_a(xid->formatID == -1 || wsrep_is_wsrep_xid(xid));
 
-        if (mach_read_from_4(sys_header + TRX_SYS_WSREP_XID_INFO
-                             + TRX_SYS_WSREP_XID_MAGIC_N_FLD)
-            != TRX_SYS_WSREP_XID_MAGIC_N) {
-                mlog_write_ulint(sys_header + TRX_SYS_WSREP_XID_INFO
-                                 + TRX_SYS_WSREP_XID_MAGIC_N_FLD,
-                                 TRX_SYS_WSREP_XID_MAGIC_N,
-                                 MLOG_4BYTES, mtr);
-        }
+		if (mach_read_from_4(sys_header + TRX_SYS_WSREP_XID_INFO
+				+ TRX_SYS_WSREP_XID_MAGIC_N_FLD)
+			!= TRX_SYS_WSREP_XID_MAGIC_N) {
+			mlog_write_ulint(sys_header + TRX_SYS_WSREP_XID_INFO
+				+ TRX_SYS_WSREP_XID_MAGIC_N_FLD,
+				TRX_SYS_WSREP_XID_MAGIC_N,
+				MLOG_4BYTES, mtr);
+		}
 
-        mlog_write_ulint(sys_header + TRX_SYS_WSREP_XID_INFO
-                         + TRX_SYS_WSREP_XID_FORMAT,
-                         (int)xid->formatID,
-                         MLOG_4BYTES, mtr);
-        mlog_write_ulint(sys_header + TRX_SYS_WSREP_XID_INFO
-                         + TRX_SYS_WSREP_XID_GTRID_LEN,
-                         (int)xid->gtrid_length,
-                         MLOG_4BYTES, mtr);
-        mlog_write_ulint(sys_header + TRX_SYS_WSREP_XID_INFO
-                         + TRX_SYS_WSREP_XID_BQUAL_LEN,
-                         (int)xid->bqual_length,
-                         MLOG_4BYTES, mtr);
-        mlog_write_string(sys_header + TRX_SYS_WSREP_XID_INFO
-                          + TRX_SYS_WSREP_XID_DATA,
-                          (const unsigned char*) xid->data,
-                          XIDDATASIZE, mtr);
+		mlog_write_ulint(sys_header + TRX_SYS_WSREP_XID_INFO
+			+ TRX_SYS_WSREP_XID_FORMAT,
+			(int)xid->formatID,
+			MLOG_4BYTES, mtr);
+		mlog_write_ulint(sys_header + TRX_SYS_WSREP_XID_INFO
+			+ TRX_SYS_WSREP_XID_GTRID_LEN,
+			(int)xid->gtrid_length,
+			MLOG_4BYTES, mtr);
+		mlog_write_ulint(sys_header + TRX_SYS_WSREP_XID_INFO
+			+ TRX_SYS_WSREP_XID_BQUAL_LEN,
+			(int)xid->bqual_length,
+			MLOG_4BYTES, mtr);
+		mlog_write_string(sys_header + TRX_SYS_WSREP_XID_INFO
+			+ TRX_SYS_WSREP_XID_DATA,
+			(const unsigned char*) xid->data,
+			XIDDATASIZE, mtr);
 
 }
 
 void
-trx_sys_read_wsrep_checkpoint(XID* xid)
-/*===================================*/
+trx_sys_read_wsrep_checkpoint(
+/*==========================*/
+	XID* xid)
 {
-        trx_sysf_t* sys_header;
-	mtr_t	    mtr;
-        ulint       magic;
+	trx_sysf_t*	sys_header;
+	mtr_t		mtr;
+	ulint		magic;
 
-        ut_ad(xid);
+	ut_ad(xid);
 
 	mtr_start(&mtr);
 
 	sys_header = trx_sysf_get(&mtr);
 
-        if ((magic = mach_read_from_4(sys_header + TRX_SYS_WSREP_XID_INFO
-                                      + TRX_SYS_WSREP_XID_MAGIC_N_FLD))
-            != TRX_SYS_WSREP_XID_MAGIC_N) {
-                memset(xid, 0, sizeof(*xid));
-                long long seqno= -1;
-                memcpy(xid->data + 24, &seqno, sizeof(long long));
-                xid->formatID = -1;
-                trx_sys_update_wsrep_checkpoint(xid, sys_header, &mtr);
-                mtr_commit(&mtr);
-                return;
-        }
+	if ((magic = mach_read_from_4(sys_header + TRX_SYS_WSREP_XID_INFO
+					+ TRX_SYS_WSREP_XID_MAGIC_N_FLD))
+	    != TRX_SYS_WSREP_XID_MAGIC_N) {
+		memset(xid, 0, sizeof(*xid));
+		long long seqno= -1;
+		memcpy(xid->data + 24, &seqno, sizeof(long long));
+		xid->formatID = -1;
+		trx_sys_update_wsrep_checkpoint(xid, sys_header, &mtr);
+		mtr_commit(&mtr);
+		return;
+	}
 
-        xid->formatID     = (int)mach_read_from_4(
-                sys_header
-                + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_FORMAT);
-        xid->gtrid_length = (int)mach_read_from_4(
-                sys_header
-                + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_GTRID_LEN);
-        xid->bqual_length = (int)mach_read_from_4(
-                sys_header
-                + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_BQUAL_LEN);
-        ut_memcpy(xid->data,
-                  sys_header + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_DATA,
-                  XIDDATASIZE);
+	xid->formatID = (int)mach_read_from_4(
+			sys_header
+			+ TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_FORMAT);
+	xid->gtrid_length = (int)mach_read_from_4(
+			sys_header
+			+ TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_GTRID_LEN);
+	xid->bqual_length = (int)mach_read_from_4(
+			sys_header
+			+ TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_BQUAL_LEN);
+	ut_memcpy(xid->data,
+		  sys_header + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_DATA,
+		  XIDDATASIZE);
 
 	mtr_commit(&mtr);
 }
 
 #endif /* WITH_WSREP */
 
-/*****************************************************************//**
-Prints to stderr the MySQL master log offset info in the trx system header if
-the magic number shows it valid. */
-UNIV_INTERN
-void
-trx_sys_print_mysql_master_log_pos(void)
-/*====================================*/
-{
-	trx_sysf_t*	sys_header;
-	mtr_t		mtr;
-
-	mtr_start(&mtr);
-
-	sys_header = trx_sysf_get(&mtr);
-
-	if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
-			     + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
-	    != TRX_SYS_MYSQL_LOG_MAGIC_N) {
-
-		mtr_commit(&mtr);
-
-		return;
-	}
-
-	fprintf(stderr,
-		"InnoDB: In a MySQL replication slave the last"
-		" master binlog file\n"
-		"InnoDB: position %lu %lu, file name %s\n",
-		(ulong) mach_read_from_4(sys_header
-					 + TRX_SYS_MYSQL_MASTER_LOG_INFO
-					 + TRX_SYS_MYSQL_LOG_OFFSET_HIGH),
-		(ulong) mach_read_from_4(sys_header
-					 + TRX_SYS_MYSQL_MASTER_LOG_INFO
-					 + TRX_SYS_MYSQL_LOG_OFFSET_LOW),
-		sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
-		+ TRX_SYS_MYSQL_LOG_NAME);
-	/* Copy the master log position info to global variables we can
-	use in ha_innobase.cc to initialize glob_mi to right values */
-
-	ut_memcpy(trx_sys_mysql_master_log_name,
-		  sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
-		  + TRX_SYS_MYSQL_LOG_NAME,
-		  TRX_SYS_MYSQL_LOG_NAME_LEN);
-
-	trx_sys_mysql_master_log_pos
-		= (((ib_int64_t) mach_read_from_4(
-			    sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
-			    + TRX_SYS_MYSQL_LOG_OFFSET_HIGH)) << 32)
-		+ ((ib_int64_t) mach_read_from_4(
-			   sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
-			   + TRX_SYS_MYSQL_LOG_OFFSET_LOW));
-	mtr_commit(&mtr);
-}
-
 /****************************************************************//**
 Looks for a free slot for a rollback segment in the trx system file copy.
-@return	slot index or ULINT_UNDEFINED if not found */
-UNIV_INTERN
+@return slot index or ULINT_UNDEFINED if not found */
 ulint
 trx_sysf_rseg_find_free(
 /*====================*/
-	mtr_t*	mtr)	/*!< in: mtr */
+	mtr_t*	mtr,			/*!< in/out: mtr */
+	bool	include_tmp_slots,	/*!< in: if true, report slots reserved
+					for temp-tablespace as free slots. */
+	ulint	nth_free_slots)		/*!< in: allocate nth free slot.
+					0 means next free slot. */
 {
 	ulint		i;
 	trx_sysf_t*	sys_header;
 
 	sys_header = trx_sysf_get(mtr);
 
+	ulint	found_free_slots = 0;
 	for (i = 0; i < TRX_SYS_N_RSEGS; i++) {
 		ulint	page_no;
 
+		if (!include_tmp_slots && trx_sys_is_noredo_rseg_slot(i)) {
+			continue;
+		}
+
 		page_no = trx_sysf_rseg_get_page_no(sys_header, i, mtr);
 
-		if (page_no == FIL_NULL) {
+		if (page_no == FIL_NULL
+		    || (include_tmp_slots
+			&& trx_sys_is_noredo_rseg_slot(i))) {
 
-			return(i);
+			if (found_free_slots++ >= nth_free_slots) {
+				return(i);
+			}
 		}
 	}
 
 	return(ULINT_UNDEFINED);
 }
 
+/****************************************************************//**
+Looks for used slots for redo rollback segment.
+@return number of used slots */
+static
+ulint
+trx_sysf_used_slots_for_redo_rseg(
+/*==============================*/
+	mtr_t*	mtr)			/*!< in: mtr */
+{
+	trx_sysf_t*	sys_header;
+	ulint		n_used = 0;
+
+	sys_header = trx_sysf_get(mtr);
+
+	for (ulint i = 0; i < TRX_SYS_N_RSEGS; i++) {
+
+		if (trx_sys_is_noredo_rseg_slot(i)) {
+			continue;
+		}
+
+		ulint	page_no;
+
+		page_no = trx_sysf_rseg_get_page_no(sys_header, i, mtr);
+
+		if (page_no != FIL_NULL) {
+			++n_used;
+		}
+	}
+
+	return(n_used);
+}
+
 /*****************************************************************//**
 Creates the file page for the transaction system. This function is called only
 at the database creation, before trx_sys_init. */
@@ -549,14 +532,14 @@ trx_sysf_create(
 	then enter the kernel: we must do it in this order to conform
 	to the latching order rules. */
 
-	mtr_x_lock(fil_space_get_latch(TRX_SYS_SPACE, NULL), mtr);
+	mtr_x_lock_space(TRX_SYS_SPACE, mtr);
 
 	/* Create the trx sys file block in a new allocated file segment */
 	block = fseg_create(TRX_SYS_SPACE, 0, TRX_SYS + TRX_SYS_FSEG_HEADER,
 			    mtr);
 	buf_block_dbg_add_level(block, SYNC_TRX_SYS_HEADER);
 
-	ut_a(buf_block_get_page_no(block) == TRX_SYS_PAGE_NO);
+	ut_a(block->page.id.page_no() == TRX_SYS_PAGE_NO);
 
 	page = buf_block_get_frame(block);
 
@@ -592,50 +575,23 @@ trx_sysf_create(
 			+ page - sys_header, mtr);
 
 	/* Create the first rollback segment in the SYSTEM tablespace */
-	slot_no = trx_sysf_rseg_find_free(mtr);
-	page_no = trx_rseg_header_create(TRX_SYS_SPACE, 0, ULINT_MAX, slot_no,
-					 mtr);
+	slot_no = trx_sysf_rseg_find_free(mtr, false, 0);
+	page_no = trx_rseg_header_create(TRX_SYS_SPACE, univ_page_size,
+					 ULINT_MAX, slot_no, mtr);
 
 	ut_a(slot_no == TRX_SYS_SYSTEM_RSEG_ID);
 	ut_a(page_no == FSP_FIRST_RSEG_PAGE_NO);
 }
 
-/*****************************************************************//**
-Compare two trx_rseg_t instances on last_trx_no. */
-static
-int
-trx_rseg_compare_last_trx_no(
-/*=========================*/
-	const void*	p1,		/*!< in: elem to compare */
-	const void*	p2)		/*!< in: elem to compare */
-{
-	ib_int64_t	cmp;
-
-	const rseg_queue_t*	rseg_q1 = (const rseg_queue_t*) p1;
-	const rseg_queue_t*	rseg_q2 = (const rseg_queue_t*) p2;
-
-	cmp = rseg_q1->trx_no - rseg_q2->trx_no;
-
-	if (cmp < 0) {
-		return(-1);
-	} else if (cmp > 0) {
-		return(1);
-	}
-
-	return(0);
-}
-
 /*****************************************************************//**
 Creates and initializes the central memory structures for the transaction
 system. This is called when the database is started.
 @return min binary heap of rsegs to purge */
-UNIV_INTERN
-ib_bh_t*
+purge_pq_t*
 trx_sys_init_at_db_start(void)
 /*==========================*/
 {
-	mtr_t		mtr;
-	ib_bh_t*	ib_bh;
+	purge_pq_t*	purge_queue;
 	trx_sysf_t*	sys_header;
 	ib_uint64_t	rows_to_undo	= 0;
 	const char*	unit		= "";
@@ -643,17 +599,11 @@ trx_sys_init_at_db_start(void)
 	/* We create the min binary heap here and pass ownership to
 	purge when we init the purge sub-system. Purge is responsible
 	for freeing the binary heap. */
-
-	ib_bh = ib_bh_create(
-		trx_rseg_compare_last_trx_no,
-		sizeof(rseg_queue_t), TRX_SYS_N_RSEGS);
-
-	mtr_start(&mtr);
-
-	sys_header = trx_sysf_get(&mtr);
+	purge_queue = UT_NEW_NOKEY(purge_pq_t());
+	ut_a(purge_queue != NULL);
 
 	if (srv_force_recovery < SRV_FORCE_NO_UNDO_LOG_SCAN) {
-		trx_rseg_array_init(sys_header, ib_bh, &mtr);
+		trx_rseg_array_init(purge_queue);
 	}
 
 	/* VERY important: after the database is started, max_trx_id value is
@@ -663,26 +613,28 @@ trx_sys_init_at_db_start(void)
 	to the disk-based header! Thus trx id values will not overlap when
 	the database is repeatedly started! */
 
+	mtr_t	mtr;
+	mtr.start();
+
+	sys_header = trx_sysf_get(&mtr);
+
 	trx_sys->max_trx_id = 2 * TRX_SYS_TRX_ID_WRITE_MARGIN
 		+ ut_uint64_align_up(mach_read_from_8(sys_header
 						   + TRX_SYS_TRX_ID_STORE),
 				     TRX_SYS_TRX_ID_WRITE_MARGIN);
 
+	mtr.commit();
 	ut_d(trx_sys->rw_max_trx_id = trx_sys->max_trx_id);
 
-	UT_LIST_INIT(trx_sys->mysql_trx_list);
-
 	trx_dummy_sess = sess_open();
 
 	trx_lists_init_at_db_start();
 
-	/* This S lock is not strictly required, it is here only to satisfy
+	/* This mutex is not strictly required, it is here only to satisfy
 	the debug code (assertions). We are still running in single threaded
 	bootstrap mode. */
 
-	mutex_enter(&trx_sys->mutex);
-
-	ut_a(UT_LIST_GET_LEN(trx_sys->ro_trx_list) == 0);
+	trx_sys_mutex_enter();
 
 	if (UT_LIST_GET_LEN(trx_sys->rw_trx_list) > 0) {
 		const trx_t*	trx;
@@ -704,43 +656,45 @@ trx_sys_init_at_db_start(void)
 			rows_to_undo = rows_to_undo / 1000000;
 		}
 
-		fprintf(stderr,
-			"InnoDB: %lu transaction(s) which must be"
-			" rolled back or cleaned up\n"
-			"InnoDB: in total %lu%s row operations to undo\n",
-			(ulong) UT_LIST_GET_LEN(trx_sys->rw_trx_list),
-			(ulong) rows_to_undo, unit);
+		ib::info() << UT_LIST_GET_LEN(trx_sys->rw_trx_list)
+			<< " transaction(s) which must be rolled back or"
+			" cleaned up in total " << rows_to_undo << unit
+			<< " row operations to undo";
 
-		fprintf(stderr, "InnoDB: Trx id counter is " TRX_ID_FMT "\n",
-			trx_sys->max_trx_id);
+		ib::info() << "Trx id counter is " << trx_sys->max_trx_id;
 	}
 
-	mutex_exit(&trx_sys->mutex);
+	trx_sys_mutex_exit();
 
-	UT_LIST_INIT(trx_sys->view_list);
-
-	mtr_commit(&mtr);
-
-	return(ib_bh);
+	return(purge_queue);
 }
 
 /*****************************************************************//**
-Creates the trx_sys instance and initializes ib_bh and mutex. */
-UNIV_INTERN
+Creates the trx_sys instance and initializes purge_queue and mutex. */
 void
 trx_sys_create(void)
 /*================*/
 {
 	ut_ad(trx_sys == NULL);
 
-	trx_sys = static_cast<trx_sys_t*>(mem_zalloc(sizeof(*trx_sys)));
+	trx_sys = static_cast<trx_sys_t*>(ut_zalloc_nokey(sizeof(*trx_sys)));
 
-	mutex_create(trx_sys_mutex_key, &trx_sys->mutex, SYNC_TRX_SYS);
+	mutex_create(LATCH_ID_TRX_SYS, &trx_sys->mutex);
+
+	UT_LIST_INIT(trx_sys->serialisation_list, &trx_t::no_list);
+	UT_LIST_INIT(trx_sys->rw_trx_list, &trx_t::trx_list);
+	UT_LIST_INIT(trx_sys->mysql_trx_list, &trx_t::mysql_trx_list);
+
+	trx_sys->mvcc = UT_NEW_NOKEY(MVCC(1024));
+
+	new(&trx_sys->rw_trx_ids) trx_ids_t(ut_allocator<trx_id_t>(
+			mem_key_trx_sys_t_rw_trx_ids));
+
+	new(&trx_sys->rw_trx_set) TrxIdSet();
 }
 
 /*****************************************************************//**
 Creates and initializes the transaction system at the database creation. */
-UNIV_INTERN
 void
 trx_sys_create_sys_pages(void)
 /*==========================*/
@@ -756,7 +710,7 @@ trx_sys_create_sys_pages(void)
 
 /*****************************************************************//**
 Update the file format tag.
-@return	always TRUE */
+@return always TRUE */
 static
 ibool
 trx_sys_file_format_max_write(
@@ -773,7 +727,8 @@ trx_sys_file_format_max_write(
 	mtr_start(&mtr);
 
 	block = buf_page_get(
-		TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, RW_X_LATCH, &mtr);
+		page_id_t(TRX_SYS_SPACE, TRX_SYS_PAGE_NO), univ_page_size,
+		RW_X_LATCH, &mtr);
 
 	file_format_max.id = format_id;
 	file_format_max.name = trx_sys_file_format_id_to_name(format_id);
@@ -794,7 +749,7 @@ trx_sys_file_format_max_write(
 
 /*****************************************************************//**
 Read the file format tag.
-@return	the file format or ULINT_UNDEFINED if not set. */
+@return the file format or ULINT_UNDEFINED if not set. */
 static
 ulint
 trx_sys_file_format_max_read(void)
@@ -810,7 +765,8 @@ trx_sys_file_format_max_read(void)
 	mtr_start(&mtr);
 
 	block = buf_page_get(
-		TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, RW_X_LATCH, &mtr);
+		page_id_t(TRX_SYS_SPACE, TRX_SYS_PAGE_NO), univ_page_size,
+		RW_X_LATCH, &mtr);
 
 	ptr = buf_block_get_frame(block) + TRX_SYS_FILE_FORMAT_TAG;
 	file_format_id = mach_read_from_8(ptr);
@@ -830,8 +786,7 @@ trx_sys_file_format_max_read(void)
 
 /*****************************************************************//**
 Get the name representation of the file format from its id.
-@return	pointer to the name */
-UNIV_INTERN
+@return pointer to the name */
 const char*
 trx_sys_file_format_id_to_name(
 /*===========================*/
@@ -845,8 +800,7 @@ trx_sys_file_format_id_to_name(
 /*****************************************************************//**
 Check for the max file format tag stored on disk. Note: If max_format_id
 is == UNIV_FORMAT_MAX + 1 then we only print a warning.
-@return	DB_SUCCESS or error code */
-UNIV_INTERN
+@return DB_SUCCESS or error code */
 dberr_t
 trx_sys_file_format_max_check(
 /*==========================*/
@@ -864,19 +818,24 @@ trx_sys_file_format_max_check(
 		format_id = UNIV_FORMAT_MIN;
 	}
 
-	ib_logf(IB_LOG_LEVEL_INFO,
-		"Highest supported file format is %s.",
-		trx_sys_file_format_id_to_name(UNIV_FORMAT_MAX));
+	ib::info() << "Highest supported file format is "
+		<< trx_sys_file_format_id_to_name(UNIV_FORMAT_MAX) << ".";
 
 	if (format_id > UNIV_FORMAT_MAX) {
 
 		ut_a(format_id < FILE_FORMAT_NAME_N);
 
-		ib_logf(max_format_id <= UNIV_FORMAT_MAX
-			? IB_LOG_LEVEL_ERROR : IB_LOG_LEVEL_WARN,
-			"The system tablespace is in a file "
-			"format that this version doesn't support - %s.",
-			trx_sys_file_format_id_to_name(format_id));
+		const std::string	msg = std::string("The system"
+			" tablespace is in a file format that this version"
+			" doesn't support - ")
+			+ trx_sys_file_format_id_to_name(format_id)
+			+ ".";
+
+		if (max_format_id <= UNIV_FORMAT_MAX) {
+			ib::error() << msg;
+		} else {
+			ib::warn() << msg;
+		}
 
 		if (max_format_id <= UNIV_FORMAT_MAX) {
 			return(DB_ERROR);
@@ -896,8 +855,7 @@ trx_sys_file_format_max_check(
 /*****************************************************************//**
 Set the file format id unconditionally except if it's already the
 same value.
-@return	TRUE if value updated */
-UNIV_INTERN
+@return TRUE if value updated */
 ibool
 trx_sys_file_format_max_set(
 /*========================*/
@@ -927,7 +885,6 @@ Tags the system table space with minimum format id if it has not been
 tagged yet.
 WARNING: This function is only called during the startup and AFTER the
 redo log application during recovery has finished. */
-UNIV_INTERN
 void
 trx_sys_file_format_tag_init(void)
 /*==============================*/
@@ -945,8 +902,7 @@ trx_sys_file_format_tag_init(void)
 /********************************************************************//**
 Update the file format tag in the system tablespace only if the given
 format id is greater than the known max id.
-@return	TRUE if format_id was bigger than the known max id */
-UNIV_INTERN
+@return TRUE if format_id was bigger than the known max id */
 ibool
 trx_sys_file_format_max_upgrade(
 /*============================*/
@@ -973,8 +929,7 @@ trx_sys_file_format_max_upgrade(
 
 /*****************************************************************//**
 Get the name representation of the file format from its id.
-@return	pointer to the max format name */
-UNIV_INTERN
+@return pointer to the max format name */
 const char*
 trx_sys_file_format_max_get(void)
 /*=============================*/
@@ -984,13 +939,11 @@ trx_sys_file_format_max_get(void)
 
 /*****************************************************************//**
 Initializes the tablespace tag system. */
-UNIV_INTERN
 void
 trx_sys_file_format_init(void)
 /*==========================*/
 {
-	mutex_create(file_format_max_mutex_key,
-		     &file_format_max.mutex, SYNC_FILE_FORMAT_TAG);
+	mutex_create(LATCH_ID_FILE_FORMAT_MAX, &file_format_max.mutex);
 
 	/* We don't need a mutex here, as this function should only
 	be called once at start up. */
@@ -1002,49 +955,92 @@ trx_sys_file_format_init(void)
 
 /*****************************************************************//**
 Closes the tablespace tag system. */
-UNIV_INTERN
 void
 trx_sys_file_format_close(void)
 /*===========================*/
 {
-	/* Does nothing at the moment */
+	mutex_free(&file_format_max.mutex);
+}
+
+/*********************************************************************
+Creates non-redo rollback segments.
+@return number of non-redo rollback segments created. */
+static
+ulint
+trx_sys_create_noredo_rsegs(
+/*========================*/
+	ulint	n_nonredo_rseg)	/*!< number of non-redo rollback segment
+				to create. */
+{
+	ulint n_created = 0;
+
+	/* Create non-redo rollback segments residing in temp-tablespace.
+	non-redo rollback segments don't perform redo logging and so
+	are used for undo logging of objects/table that don't need to be
+	recover on crash.
+	(Non-Redo rollback segments are created on every server startup).
+	Slot-0: reserved for system-tablespace.
+	Slot-1....Slot-N: reserved for temp-tablespace.
+	Slot-N+1....Slot-127: reserved for system/undo-tablespace. */
+	for (ulint i = 0; i < n_nonredo_rseg; i++) {
+		ulint space = srv_tmp_space.space_id();
+		if (trx_rseg_create(space, i) == NULL) {
+			break;
+		}
+		++n_created;
+	}
+
+	return(n_created);
 }
 
 /*********************************************************************
 Creates the rollback segments.
 @return number of rollback segments that are active. */
-UNIV_INTERN
 ulint
 trx_sys_create_rsegs(
 /*=================*/
 	ulint	n_spaces,	/*!< number of tablespaces for UNDO logs */
-	ulint	n_rsegs)	/*!< number of rollback segments to create */
+	ulint	n_rsegs,	/*!< number of rollback segments to create */
+	ulint	n_tmp_rsegs)	/*!< number of rollback segments reserved for
+				temp-tables. */
 {
 	mtr_t	mtr;
 	ulint	n_used;
+	ulint	n_noredo_created;
 
 	ut_a(n_spaces < TRX_SYS_N_RSEGS);
 	ut_a(n_rsegs <= TRX_SYS_N_RSEGS);
+	ut_a(n_tmp_rsegs > 0 && n_tmp_rsegs < TRX_SYS_N_RSEGS);
 
 	if (srv_read_only_mode) {
 		return(ULINT_UNDEFINED);
 	}
 
+	/* Create non-redo rollback segments. */
+	n_noredo_created = trx_sys_create_noredo_rsegs(n_tmp_rsegs);
+
 	/* This is executed in single-threaded mode therefore it is not
 	necessary to use the same mtr in trx_rseg_create(). n_used cannot
 	change while the function is executing. */
-
 	mtr_start(&mtr);
-	n_used = trx_sysf_rseg_find_free(&mtr);
+	n_used = trx_sysf_used_slots_for_redo_rseg(&mtr) + n_noredo_created;
 	mtr_commit(&mtr);
 
-	if (n_used == ULINT_UNDEFINED) {
-		n_used = TRX_SYS_N_RSEGS;
+	ut_ad(n_used <= TRX_SYS_N_RSEGS);
+
+	/* By default 1 redo rseg is always active that is hosted in
+	system tablespace. */
+	ulint	n_redo_active;
+	if (n_rsegs <= n_tmp_rsegs) {
+		n_redo_active = 1;
+	} else if (n_rsegs > n_used) {
+		n_redo_active = n_used - n_tmp_rsegs;
+	} else {
+		n_redo_active = n_rsegs - n_tmp_rsegs;
 	}
 
 	/* Do not create additional rollback segments if innodb_force_recovery
 	has been set and the database was not shutdown cleanly. */
-
 	if (!srv_force_recovery && !recv_needed_recovery && n_used < n_rsegs) {
 		ulint	i;
 		ulint	new_rsegs = n_rsegs - n_used;
@@ -1061,16 +1057,22 @@ trx_sys_create_rsegs(
 				space = 0; /* System tablespace */
 			}
 
-			if (trx_rseg_create(space) != NULL) {
+			if (trx_rseg_create(space, 0) != NULL) {
 				++n_used;
+				++n_redo_active;
 			} else {
 				break;
 			}
 		}
 	}
 
-	ib_logf(IB_LOG_LEVEL_INFO,
-		"%lu rollback segment(s) are active.", n_used);
+	ib::info() << n_used - srv_tmp_undo_logs
+		<< " redo rollback segment(s) found. "
+		<< n_redo_active
+		<< " redo rollback segment(s) are active.";
+
+	ib::info() << n_noredo_created << " non-redo rollback segment(s) are"
+		" active.";
 
 	return(n_used);
 }
@@ -1079,7 +1081,6 @@ trx_sys_create_rsegs(
 /*****************************************************************//**
 Prints to stderr the MySQL binlog info in the system header if the
 magic number shows it valid. */
-UNIV_INTERN
 void
 trx_sys_print_mysql_binlog_offset_from_page(
 /*========================================*/
@@ -1095,17 +1096,15 @@ trx_sys_print_mysql_binlog_offset_from_page(
 			     + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
 	    == TRX_SYS_MYSQL_LOG_MAGIC_N) {
 
-		fprintf(stderr,
-			"mysqlbackup: Last MySQL binlog file position %lu %lu,"
-			" file name %s\n",
-			(ulong) mach_read_from_4(
+		ib::info() << "mysqlbackup: Last MySQL binlog file position "
+			<< mach_read_from_4(
 				sys_header + TRX_SYS_MYSQL_LOG_INFO
-				+ TRX_SYS_MYSQL_LOG_OFFSET_HIGH),
-			(ulong) mach_read_from_4(
+				+ TRX_SYS_MYSQL_LOG_OFFSET_HIGH) << " "
+			<< mach_read_from_4(
 				sys_header + TRX_SYS_MYSQL_LOG_INFO
-				+ TRX_SYS_MYSQL_LOG_OFFSET_LOW),
-			sys_header + TRX_SYS_MYSQL_LOG_INFO
-			+ TRX_SYS_MYSQL_LOG_NAME);
+				+ TRX_SYS_MYSQL_LOG_OFFSET_LOW)
+			<< ", file name " << sys_header
+			+ TRX_SYS_MYSQL_LOG_INFO + TRX_SYS_MYSQL_LOG_NAME;
 	}
 }
 
@@ -1115,7 +1114,6 @@ Even if the call succeeds and returns TRUE, the returned format id
 may be ULINT_UNDEFINED signalling that the format id was not present
 in the data file.
 @return TRUE if call succeeds */
-UNIV_INTERN
 ibool
 trx_sys_read_file_format_id(
 /*========================*/
@@ -1125,7 +1123,7 @@ trx_sys_read_file_format_id(
 				         space */
 {
 	os_file_t	file;
-	ibool		success;
+	bool		success;
 	byte		buf[UNIV_PAGE_SIZE * 2];
 	page_t*		page = ut_align(buf, UNIV_PAGE_SIZE);
 	const byte*	ptr;
@@ -1134,42 +1132,38 @@ trx_sys_read_file_format_id(
 	*format_id = ULINT_UNDEFINED;
 
 	file = os_file_create_simple_no_error_handling(
-		innodb_file_data_key,
+		innodb_data_file_key,
 		pathname,
 		OS_FILE_OPEN,
 		OS_FILE_READ_ONLY,
+		srv_read_only_mode,
 		&success
 	);
 	if (!success) {
 		/* The following call prints an error message */
 		os_file_get_last_error(true);
 
-		ut_print_timestamp(stderr);
-
-		fprintf(stderr,
-			"  mysqlbackup: Error: trying to read system "
-			"tablespace file format,\n"
-			"  mysqlbackup: but could not open the tablespace "
-			"file %s!\n", pathname);
+		ib::error() << "mysqlbackup: Error: trying to read system"
+			" tablespace file format, but could not open the"
+			" tablespace file " << pathname << "!";
 		return(FALSE);
 	}
 
 	/* Read the page on which file format is stored */
 
-	success = os_file_read_no_error_handling(
-		file, page, TRX_SYS_PAGE_NO * UNIV_PAGE_SIZE, UNIV_PAGE_SIZE);
+	IORequest	read_req(IORequest::READ)
 
-	if (!success) {
+	dberr_t	err = os_file_read_no_error_handling(
+		read_req, file, page, TRX_SYS_PAGE_NO * UNIV_PAGE_SIZE,
+		UNIV_PAGE_SIZE, NULL);
+
+	if (err != DB_SUCCESS) {
 		/* The following call prints an error message */
 		os_file_get_last_error(true);
 
-		ut_print_timestamp(stderr);
-
-		fprintf(stderr,
-			"  mysqlbackup: Error: trying to read system "
-			"tablespace file format,\n"
-			"  mysqlbackup: but failed to read the tablespace "
-			"file %s!\n", pathname);
+		ib::error() << "mysqlbackup: Error: trying to read system"
+			" tablespace file format, but failed to read the"
+			" tablespace file " << pathname << "!";
 
 		os_file_close(file);
 		return(FALSE);
@@ -1195,7 +1189,6 @@ trx_sys_read_file_format_id(
 /*****************************************************************//**
 Reads the file format id from the given per-table data file.
 @return TRUE if call succeeds */
-UNIV_INTERN
 ibool
 trx_sys_read_pertable_file_format_id(
 /*=================================*/
@@ -1205,7 +1198,7 @@ trx_sys_read_pertable_file_format_id(
 				         data file */
 {
 	os_file_t	file;
-	ibool		success;
+	bool		success;
 	byte		buf[UNIV_PAGE_SIZE * 2];
 	page_t*		page = ut_align(buf, UNIV_PAGE_SIZE);
 	const byte*	ptr;
@@ -1214,42 +1207,38 @@ trx_sys_read_pertable_file_format_id(
 	*format_id = ULINT_UNDEFINED;
 
 	file = os_file_create_simple_no_error_handling(
-		innodb_file_data_key,
+		innodb_data_file_key,
 		pathname,
 		OS_FILE_OPEN,
 		OS_FILE_READ_ONLY,
+		srv_read_only_mode,
 		&success
 	);
 	if (!success) {
 		/* The following call prints an error message */
 		os_file_get_last_error(true);
 
-		ut_print_timestamp(stderr);
-
-		fprintf(stderr,
-			"  mysqlbackup: Error: trying to read per-table "
-			"tablespace format,\n"
-			"  mysqlbackup: but could not open the tablespace "
-			"file %s!\n", pathname);
+		ib::error() << "mysqlbackup: Error: trying to read per-table"
+			" tablespace format, but could not open the tablespace"
+			" file " << pathname << "!";
 
 		return(FALSE);
 	}
 
+	IORequest	read_req(IORequest::READ);
+
 	/* Read the first page of the per-table datafile */
 
-	success = os_file_read_no_error_handling(file, page, 0, UNIV_PAGE_SIZE);
+	dberr_t	err = os_file_read_no_error_handling(
+		read_req, file, page, 0, UNIV_PAGE_SIZE, NULL);
 
-	if (!success) {
+	if (err != DB_SUCCESS) {
 		/* The following call prints an error message */
 		os_file_get_last_error(true);
 
-		ut_print_timestamp(stderr);
-
-		fprintf(stderr,
-			"  mysqlbackup: Error: trying to per-table data file "
-			"format,\n"
-			"  mysqlbackup: but failed to read the tablespace "
-			"file %s!\n", pathname);
+		ib::error() << "mysqlbackup: Error: trying to per-table data"
+			" file format, but failed to read the tablespace file "
+			<< pathname << "!";
 
 		os_file_close(file);
 		return(FALSE);
@@ -1273,8 +1262,7 @@ trx_sys_read_pertable_file_format_id(
 
 /*****************************************************************//**
 Get the name representation of the file format from its id.
-@return	pointer to the name */
-UNIV_INTERN
+@return pointer to the name */
 const char*
 trx_sys_file_format_id_to_name(
 /*===========================*/
@@ -1293,33 +1281,20 @@ trx_sys_file_format_id_to_name(
 #ifndef UNIV_HOTBACKUP
 /*********************************************************************
 Shutdown/Close the transaction system. */
-UNIV_INTERN
 void
 trx_sys_close(void)
 /*===============*/
 {
-	ulint		i;
-	trx_t*		trx;
-	read_view_t*	view;
-
 	ut_ad(trx_sys != NULL);
 	ut_ad(srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS);
 
-	/* Check that all read views are closed except read view owned
-	by a purge. */
+	ulint	size = trx_sys->mvcc->size();
 
-	mutex_enter(&trx_sys->mutex);
-
-	if (UT_LIST_GET_LEN(trx_sys->view_list) > 1) {
-		fprintf(stderr,
-			"InnoDB: Error: all read views were not closed"
-			" before shutdown:\n"
-			"InnoDB: %lu read views open \n",
-			UT_LIST_GET_LEN(trx_sys->view_list) - 1);
+	if (size > 0) {
+		ib::error() << "All read views were not closed before"
+			" shutdown: " << size << " read views open";
 	}
 
-	mutex_exit(&trx_sys->mutex);
-
 	sess_close(trx_dummy_sess);
 	trx_dummy_sess = NULL;
 
@@ -1328,121 +1303,182 @@ trx_sys_close(void)
 	/* Free the double write data structures. */
 	buf_dblwr_free();
 
-	ut_a(UT_LIST_GET_LEN(trx_sys->ro_trx_list) == 0);
-
 	/* Only prepared transactions may be left in the system. Free them. */
 	ut_a(UT_LIST_GET_LEN(trx_sys->rw_trx_list) == trx_sys->n_prepared_trx);
 
-	while ((trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list)) != NULL) {
+	for (trx_t* trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
+	     trx != NULL;
+	     trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list)) {
+
 		trx_free_prepared(trx);
+
+		UT_LIST_REMOVE(trx_sys->rw_trx_list, trx);
 	}
 
 	/* There can't be any active transactions. */
-	for (i = 0; i < TRX_SYS_N_RSEGS; ++i) {
+	trx_rseg_t** rseg_array = static_cast<trx_rseg_t**>(
+		trx_sys->rseg_array);
+
+	for (ulint i = 0; i < TRX_SYS_N_RSEGS; ++i) {
 		trx_rseg_t*	rseg;
 
 		rseg = trx_sys->rseg_array[i];
 
 		if (rseg != NULL) {
-			trx_rseg_mem_free(rseg);
-		} else {
-			break;
+			trx_rseg_mem_free(rseg, rseg_array);
 		}
 	}
 
-	view = UT_LIST_GET_FIRST(trx_sys->view_list);
+	rseg_array = ((trx_rseg_t**) trx_sys->pending_purge_rseg_array);
 
-	while (view != NULL) {
-		read_view_t*	prev_view = view;
+	for (ulint i = 0; i < TRX_SYS_N_RSEGS; ++i) {
+		trx_rseg_t*	rseg;
 
-		view = UT_LIST_GET_NEXT(view_list, prev_view);
+		rseg = trx_sys->pending_purge_rseg_array[i];
 
-		/* Views are allocated from the trx_sys->global_read_view_heap.
-		So, we simply remove the element here. */
-		UT_LIST_REMOVE(view_list, trx_sys->view_list, prev_view);
+		if (rseg != NULL) {
+			trx_rseg_mem_free(rseg, rseg_array);
+		}
 	}
 
-	ut_a(UT_LIST_GET_LEN(trx_sys->view_list) == 0);
-	ut_a(UT_LIST_GET_LEN(trx_sys->ro_trx_list) == 0);
+	UT_DELETE(trx_sys->mvcc);
+
 	ut_a(UT_LIST_GET_LEN(trx_sys->rw_trx_list) == 0);
 	ut_a(UT_LIST_GET_LEN(trx_sys->mysql_trx_list) == 0);
+	ut_a(UT_LIST_GET_LEN(trx_sys->serialisation_list) == 0);
 
+	/* We used placement new to create this mutex. Call the destructor. */
 	mutex_free(&trx_sys->mutex);
 
-	mem_free(trx_sys);
+	trx_sys->rw_trx_ids.~trx_ids_t();
+
+	trx_sys->rw_trx_set.~TrxIdSet();
+
+	ut_free(trx_sys);
 
 	trx_sys = NULL;
 }
 
+/** @brief Convert an undo log to TRX_UNDO_PREPARED state on shutdown.
+
+If any prepared ACTIVE transactions exist, and their rollback was
+prevented by innodb_force_recovery, we convert these transactions to
+XA PREPARE state in the main-memory data structures, so that shutdown
+will proceed normally. These transactions will again recover as ACTIVE
+on the next restart, and they will be rolled back unless
+innodb_force_recovery prevents it again.
+
+@param[in]	trx	transaction
+@param[in,out]	undo	undo log to convert to TRX_UNDO_PREPARED */
+static
+void
+trx_undo_fake_prepared(
+	const trx_t*	trx,
+	trx_undo_t*	undo)
+{
+	ut_ad(srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO);
+	ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
+	ut_ad(trx->is_recovered);
+
+	if (undo != NULL) {
+		ut_ad(undo->state == TRX_UNDO_ACTIVE);
+		undo->state = TRX_UNDO_PREPARED;
+	}
+}
+
 /*********************************************************************
 Check if there are any active (non-prepared) transactions.
 @return total number of active transactions or 0 if none */
-UNIV_INTERN
 ulint
 trx_sys_any_active_transactions(void)
 /*=================================*/
 {
-	ulint	total_trx = 0;
+	trx_sys_mutex_enter();
 
-	mutex_enter(&trx_sys->mutex);
+	ulint	total_trx = UT_LIST_GET_LEN(trx_sys->mysql_trx_list);
 
-	total_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list)
-		  + UT_LIST_GET_LEN(trx_sys->mysql_trx_list);
+	if (total_trx == 0) {
+		total_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list);
+		ut_a(total_trx >= trx_sys->n_prepared_trx);
 
-	ut_a(total_trx >= trx_sys->n_prepared_trx);
-	total_trx -= trx_sys->n_prepared_trx;
+		if (total_trx > trx_sys->n_prepared_trx
+		    && srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO) {
+			for (trx_t* trx = UT_LIST_GET_FIRST(
+				     trx_sys->rw_trx_list);
+			     trx != NULL;
+			     trx = UT_LIST_GET_NEXT(trx_list, trx)) {
+				if (!trx_state_eq(trx, TRX_STATE_ACTIVE)
+				    || !trx->is_recovered) {
+					continue;
+				}
+				/* This was a recovered transaction
+				whose rollback was disabled by
+				the innodb_force_recovery setting.
+				Pretend that it is in XA PREPARE
+				state so that shutdown will work. */
+				trx_undo_fake_prepared(
+					trx, trx->rsegs.m_redo.insert_undo);
+				trx_undo_fake_prepared(
+					trx, trx->rsegs.m_redo.update_undo);
+				trx_undo_fake_prepared(
+					trx, trx->rsegs.m_noredo.insert_undo);
+				trx_undo_fake_prepared(
+					trx, trx->rsegs.m_noredo.update_undo);
+				trx->state = TRX_STATE_PREPARED;
+				trx_sys->n_prepared_trx++;
+				trx_sys->n_prepared_recovered_trx++;
+			}
+		}
 
-	mutex_exit(&trx_sys->mutex);
+		ut_a(total_trx >= trx_sys->n_prepared_trx);
+		total_trx -= trx_sys->n_prepared_trx;
+	}
+
+	trx_sys_mutex_exit();
 
 	return(total_trx);
 }
 
 #ifdef UNIV_DEBUG
 /*************************************************************//**
-Validate the trx_list_t.
-@return TRUE if valid. */
+Validate the trx_ut_list_t.
+@return true if valid. */
 static
-ibool
+bool
 trx_sys_validate_trx_list_low(
 /*===========================*/
-	trx_list_t*	trx_list)	/*!< in: &trx_sys->ro_trx_list
-					or &trx_sys->rw_trx_list */
+	trx_ut_list_t*	trx_list)	/*!< in: &trx_sys->rw_trx_list */
 {
 	const trx_t*	trx;
 	const trx_t*	prev_trx = NULL;
 
-	ut_ad(mutex_own(&trx_sys->mutex));
+	ut_ad(trx_sys_mutex_own());
 
-	ut_ad(trx_list == &trx_sys->ro_trx_list
-	      || trx_list == &trx_sys->rw_trx_list);
+	ut_ad(trx_list == &trx_sys->rw_trx_list);
 
 	for (trx = UT_LIST_GET_FIRST(*trx_list);
 	     trx != NULL;
 	     prev_trx = trx, trx = UT_LIST_GET_NEXT(trx_list, prev_trx)) {
 
-		assert_trx_in_list(trx);
-		ut_ad(trx->read_only == (trx_list == &trx_sys->ro_trx_list));
-
+		check_trx_state(trx);
 		ut_a(prev_trx == NULL || prev_trx->id > trx->id);
 	}
 
-	return(TRUE);
+	return(true);
 }
 
 /*************************************************************//**
-Validate the trx_sys_t::ro_trx_list and trx_sys_t::rw_trx_list.
-@return TRUE if lists are valid. */
-UNIV_INTERN
-ibool
-trx_sys_validate_trx_list(void)
-/*===========================*/
+Validate the trx_sys_t::rw_trx_list.
+@return true if the list is valid. */
+bool
+trx_sys_validate_trx_list()
+/*=======================*/
 {
-	ut_ad(mutex_own(&trx_sys->mutex));
+	ut_ad(trx_sys_mutex_own());
 
-	ut_a(trx_sys_validate_trx_list_low(&trx_sys->ro_trx_list));
 	ut_a(trx_sys_validate_trx_list_low(&trx_sys->rw_trx_list));
 
-	return(TRUE);
+	return(true);
 }
 #endif /* UNIV_DEBUG */
 #endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc
index a41c1807610..68a60e36ce3 100644
--- a/storage/innobase/trx/trx0trx.cc
+++ b/storage/innobase/trx/trx0trx.cc
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2015, 2016, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -23,127 +24,444 @@ The transaction
 Created 3/26/1996 Heikki Tuuri
 *******************************************************/
 
+#include "ha_prototypes.h"
+
 #include "trx0trx.h"
 
 #ifdef UNIV_NONINL
 #include "trx0trx.ic"
 #endif
 
+#ifdef WITH_WSREP
 #include <mysql/service_wsrep.h>
+#endif
+
+#include <mysql/service_thd_error_context.h>
 
-#include "trx0undo.h"
-#include "trx0rseg.h"
-#include "log0log.h"
-#include "que0que.h"
-#include "lock0lock.h"
-#include "trx0roll.h"
-#include "usr0sess.h"
-#include "read0read.h"
-#include "srv0srv.h"
-#include "srv0start.h"
 #include "btr0sea.h"
+#include "lock0lock.h"
+#include "log0log.h"
 #include "os0proc.h"
-#include "trx0xa.h"
-#include "trx0rec.h"
-#include "trx0purge.h"
-#include "ha_prototypes.h"
+#include "que0que.h"
+#include "read0read.h"
 #include "srv0mon.h"
+#include "srv0srv.h"
+#include "fsp0sysspace.h"
+#include "row0mysql.h"
+#include "srv0start.h"
+#include "trx0purge.h"
+#include "trx0rec.h"
+#include "trx0roll.h"
+#include "trx0rseg.h"
+#include "trx0undo.h"
+#include "trx0xa.h"
+#include "usr0sess.h"
+#include "ut0new.h"
+#include "ut0pool.h"
 #include "ut0vec.h"
 
-#include<set>
+#include <set>
+#include <new>
 
 extern "C"
 int thd_deadlock_victim_preference(const MYSQL_THD thd1, const MYSQL_THD thd2);
 
+static const ulint MAX_DETAILED_ERROR_LEN = 256;
+
 /** Set of table_id */
-typedef std::set<table_id_t>	table_id_set;
+typedef std::set<
+	table_id_t,
+	std::less<table_id_t>,
+	ut_allocator<table_id_t> >	table_id_set;
 
 /** Dummy session used currently in MySQL interface */
-UNIV_INTERN sess_t*		trx_dummy_sess = NULL;
+sess_t*	trx_dummy_sess = NULL;
 
-#ifdef UNIV_PFS_MUTEX
-/* Key to register the mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t	trx_mutex_key;
-/* Key to register the mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t	trx_undo_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
+/** Constructor */
+TrxVersion::TrxVersion(trx_t* trx)
+	:
+	m_trx(trx),
+	m_version(trx->version)
+{
+	/* No op */
+}
+
+/** Set flush observer for the transaction
+@param[in/out]	trx		transaction struct
+@param[in]	observer	flush observer */
+void
+trx_set_flush_observer(
+	trx_t*		trx,
+	FlushObserver*	observer)
+{
+	trx->flush_observer = observer;
+}
 
 /*************************************************************//**
 Set detailed error message for the transaction. */
-UNIV_INTERN
 void
 trx_set_detailed_error(
 /*===================*/
 	trx_t*		trx,	/*!< in: transaction struct */
 	const char*	msg)	/*!< in: detailed error message */
 {
-	ut_strlcpy(trx->detailed_error, msg, sizeof(trx->detailed_error));
+	ut_strlcpy(trx->detailed_error, msg, MAX_DETAILED_ERROR_LEN);
 }
 
 /*************************************************************//**
 Set detailed error message for the transaction from a file. Note that the
 file is rewinded before reading from it. */
-UNIV_INTERN
 void
 trx_set_detailed_error_from_file(
 /*=============================*/
 	trx_t*	trx,	/*!< in: transaction struct */
 	FILE*	file)	/*!< in: file to read message from */
 {
-	os_file_read_string(file, trx->detailed_error,
-			    sizeof(trx->detailed_error));
+	os_file_read_string(file, trx->detailed_error, MAX_DETAILED_ERROR_LEN);
 }
 
-/****************************************************************//**
-Creates and initializes a transaction object. It must be explicitly
-started with trx_start_if_not_started() before using it. The default
-isolation level is TRX_ISO_REPEATABLE_READ.
-@return transaction instance, should never be NULL */
+/********************************************************************//**
+Initialize transaction object.
+@param trx trx to initialize */
 static
-trx_t*
-trx_create(void)
-/*============*/
+void
+trx_init(
+/*=====*/
+	trx_t*	trx)
 {
-	trx_t*		trx;
-	mem_heap_t*	heap;
-	ib_alloc_t*	heap_alloc;
+	/* This is called at the end of commit, do not reset the
+	trx_t::state here to NOT_STARTED. The FORCED_ROLLBACK
+	status is required for asynchronous handling. */
 
-	trx = static_cast<trx_t*>(mem_zalloc(sizeof(*trx)));
-
-	mutex_create(trx_mutex_key, &trx->mutex, SYNC_TRX);
-
-	trx->magic_n = TRX_MAGIC_N;
-
-	trx->state = TRX_STATE_NOT_STARTED;
-
-	trx->active_commit_ordered = 0;
-	trx->isolation_level = TRX_ISO_REPEATABLE_READ;
+	trx->id = 0;
 
 	trx->no = TRX_ID_MAX;
 
-	trx->support_xa = TRUE;
+	trx->is_recovered = false;
 
-	trx->check_foreigns = TRUE;
-	trx->check_unique_secondary = TRUE;
+	trx->op_info = "";
+
+	trx->active_commit_ordered = 0;
+
+	trx->isolation_level = TRX_ISO_REPEATABLE_READ;
+
+	trx->check_foreigns = true;
+
+	trx->check_unique_secondary = true;
+
+	trx->support_xa = true;
+
+	trx->lock.n_rec_locks = 0;
 
 	trx->dict_operation = TRX_DICT_OP_NONE;
 
-	mutex_create(trx_undo_mutex_key, &trx->undo_mutex, SYNC_TRX_UNDO);
+	trx->table_id = 0;
 
 	trx->error_state = DB_SUCCESS;
 
+	trx->error_key_num = ULINT_UNDEFINED;
+
+	trx->undo_no = 0;
+
+	trx->rsegs.m_redo.rseg = NULL;
+
+	trx->rsegs.m_noredo.rseg = NULL;
+
+	trx->read_only = false;
+
+	trx->auto_commit = false;
+
+	trx->will_lock = 0;
+
+	trx->ddl = false;
+
+	trx->internal = false;
+
+#ifdef UNIV_DEBUG
+	trx->is_dd_trx  = false;
+#endif /* UNIV_DEBUG */
+
+	ut_d(trx->start_file = 0);
+
+	ut_d(trx->start_line = 0);
+
+	trx->magic_n = TRX_MAGIC_N;
+
 	trx->lock.que_state = TRX_QUE_RUNNING;
 
-	trx->lock.lock_heap = mem_heap_create_typed(
-		256, MEM_HEAP_FOR_LOCK_HEAP);
+	trx->last_sql_stat_start.least_undo_no = 0;
 
-	trx->search_latch_timeout = BTR_SEA_TIMEOUT;
+	ut_ad(!MVCC::is_view_active(trx->read_view));
 
-	trx->global_read_view_heap = mem_heap_create(256);
+	trx->lock.rec_cached = 0;
 
-	trx->xid.formatID = -1;
+	trx->lock.table_cached = 0;
 
-	trx->op_info = "";
+	/* During asynchronous rollback, we should reset forced rollback flag
+	only after rollback is complete to avoid race with the thread owning
+	the transaction. */
+
+	if (!TrxInInnoDB::is_async_rollback(trx)) {
+
+		os_thread_id_t	thread_id = trx->killed_by;
+		os_compare_and_swap_thread_id(&trx->killed_by, thread_id, 0);
+
+		/* Note: Do not set to 0, the ref count is decremented inside
+		the TrxInInnoDB() destructor. We only need to clear the flags. */
+
+		trx->in_innodb &= TRX_FORCE_ROLLBACK_MASK;
+	}
+
+	/* Note: It's possible that this list is not empty if a transaction
+	was interrupted after it collected the victim transactions and before
+	it got a chance to roll them back asynchronously. */
+
+	trx->hit_list.clear();
+
+	trx->flush_observer = NULL;
+
+	++trx->version;
+}
+
+/** For managing the life-cycle of the trx_t instance that we get
+from the pool. */
+struct TrxFactory {
+
+	/** Initializes a transaction object. It must be explicitly started
+	with trx_start_if_not_started() before using it. The default isolation
+	level is TRX_ISO_REPEATABLE_READ.
+	@param trx Transaction instance to initialise */
+	static void init(trx_t* trx)
+	{
+		/* Explicitly call the constructor of the already
+		allocated object. trx_t objects are allocated by
+		ut_zalloc() in Pool::Pool() which would not call
+		the constructors of the trx_t members. */
+		new(&trx->mod_tables) trx_mod_tables_t();
+
+		new(&trx->lock.rec_pool) lock_pool_t();
+
+		new(&trx->lock.table_pool) lock_pool_t();
+
+		new(&trx->lock.table_locks) lock_pool_t();
+
+		new(&trx->hit_list) hit_list_t();
+
+		trx_init(trx);
+
+		trx->state = TRX_STATE_NOT_STARTED;
+
+		trx->dict_operation_lock_mode = 0;
+
+		trx->xid = UT_NEW_NOKEY(xid_t());
+
+		trx->detailed_error = reinterpret_cast<char*>(
+			ut_zalloc_nokey(MAX_DETAILED_ERROR_LEN));
+
+		trx->lock.lock_heap = mem_heap_create_typed(
+			1024, MEM_HEAP_FOR_LOCK_HEAP);
+
+		lock_trx_lock_list_init(&trx->lock.trx_locks);
+
+		UT_LIST_INIT(
+			trx->trx_savepoints,
+			&trx_named_savept_t::trx_savepoints);
+
+		mutex_create(LATCH_ID_TRX, &trx->mutex);
+		mutex_create(LATCH_ID_TRX_UNDO, &trx->undo_mutex);
+
+		lock_trx_alloc_locks(trx);
+	}
+
+	/** Release resources held by the transaction object.
+	@param trx the transaction for which to release resources */
+	static void destroy(trx_t* trx)
+	{
+		ut_a(trx->magic_n == TRX_MAGIC_N);
+		ut_ad(!trx->in_rw_trx_list);
+		ut_ad(!trx->in_mysql_trx_list);
+
+		ut_a(trx->lock.wait_lock == NULL);
+		ut_a(trx->lock.wait_thr == NULL);
+
+		ut_a(!trx->has_search_latch);
+
+		ut_a(trx->dict_operation_lock_mode == 0);
+
+		if (trx->lock.lock_heap != NULL) {
+			mem_heap_free(trx->lock.lock_heap);
+			trx->lock.lock_heap = NULL;
+		}
+
+		ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
+
+		UT_DELETE(trx->xid);
+		ut_free(trx->detailed_error);
+
+		mutex_free(&trx->mutex);
+		mutex_free(&trx->undo_mutex);
+
+		trx->mod_tables.~trx_mod_tables_t();
+
+		ut_ad(trx->read_view == NULL);
+
+		if (!trx->lock.rec_pool.empty()) {
+
+			/* See lock_trx_alloc_locks() why we only free
+			the first element. */
+
+			ut_free(trx->lock.rec_pool[0]);
+		}
+
+		if (!trx->lock.table_pool.empty()) {
+
+			/* See lock_trx_alloc_locks() why we only free
+			the first element. */
+
+			ut_free(trx->lock.table_pool[0]);
+		}
+
+		trx->lock.rec_pool.~lock_pool_t();
+
+		trx->lock.table_pool.~lock_pool_t();
+
+		trx->lock.table_locks.~lock_pool_t();
+
+		trx->hit_list.~hit_list_t();
+	}
+
+	/** Enforce any invariants here, this is called before the transaction
+	is added to the pool.
+	@return true if all OK */
+	static bool debug(const trx_t* trx)
+	{
+		ut_a(trx->error_state == DB_SUCCESS);
+
+		ut_a(trx->magic_n == TRX_MAGIC_N);
+
+		ut_ad(!trx->read_only);
+
+		ut_ad(trx->state == TRX_STATE_NOT_STARTED
+		      || trx->state == TRX_STATE_FORCED_ROLLBACK);
+
+		ut_ad(trx->dict_operation == TRX_DICT_OP_NONE);
+
+		ut_ad(trx->mysql_thd == 0);
+
+		ut_ad(!trx->in_rw_trx_list);
+		ut_ad(!trx->in_mysql_trx_list);
+
+		ut_a(trx->lock.wait_thr == NULL);
+		ut_a(trx->lock.wait_lock == NULL);
+
+		ut_a(!trx->has_search_latch);
+
+		ut_a(trx->dict_operation_lock_mode == 0);
+
+		ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
+
+		ut_ad(trx->autoinc_locks == NULL);
+
+		ut_ad(trx->lock.table_locks.empty());
+
+		ut_ad(!trx->abort);
+
+		ut_ad(trx->hit_list.empty());
+
+		ut_ad(trx->killed_by == 0);
+
+		return(true);
+	}
+};
+
+/** The lock strategy for TrxPool */
+struct TrxPoolLock {
+	TrxPoolLock() { }
+
+	/** Create the mutex */
+	void create()
+	{
+		mutex_create(LATCH_ID_TRX_POOL, &m_mutex);
+	}
+
+	/** Acquire the mutex */
+	void enter() { mutex_enter(&m_mutex); }
+
+	/** Release the mutex */
+	void exit() { mutex_exit(&m_mutex); }
+
+	/** Free the mutex */
+	void destroy() { mutex_free(&m_mutex); }
+
+	/** Mutex to use */
+	ib_mutex_t	m_mutex;
+};
+
+/** The lock strategy for the TrxPoolManager */
+struct TrxPoolManagerLock {
+	TrxPoolManagerLock() { }
+
+	/** Create the mutex */
+	void create()
+	{
+		mutex_create(LATCH_ID_TRX_POOL_MANAGER, &m_mutex);
+	}
+
+	/** Acquire the mutex */
+	void enter() { mutex_enter(&m_mutex); }
+
+	/** Release the mutex */
+	void exit() { mutex_exit(&m_mutex); }
+
+	/** Free the mutex */
+	void destroy() { mutex_free(&m_mutex); }
+
+	/** Mutex to use */
+	ib_mutex_t	m_mutex;
+};
+
+/** Use explicit mutexes for the trx_t pool and its manager. */
+typedef Pool<trx_t, TrxFactory, TrxPoolLock> trx_pool_t;
+typedef PoolManager<trx_pool_t, TrxPoolManagerLock > trx_pools_t;
+
+/** The trx_t pool manager */
+static trx_pools_t* trx_pools;
+
+/** Size of on trx_t pool in bytes. */
+static const ulint MAX_TRX_BLOCK_SIZE = 1024 * 1024 * 4;
+
+/** Create the trx_t pool */
+void
+trx_pool_init()
+{
+	trx_pools = UT_NEW_NOKEY(trx_pools_t(MAX_TRX_BLOCK_SIZE));
+
+	ut_a(trx_pools != 0);
+}
+
+/** Destroy the trx_t pool */
+void
+trx_pool_close()
+{
+	UT_DELETE(trx_pools);
+
+	trx_pools = 0;
+}
+
+/** @return a trx_t instance from trx_pools. */
+static
+trx_t*
+trx_create_low()
+{
+	trx_t*	trx = trx_pools->get();
+
+	assert_trx_is_free(trx);
+
+	mem_heap_t*	heap;
+	ib_alloc_t*	alloc;
+
+	/* We just got trx from pool, it should be non locking */
+	ut_ad(trx->will_lock == 0);
 
 	trx->api_trx = false;
 
@@ -151,36 +469,76 @@ trx_create(void)
 
 	trx->read_write = true;
 
+	/* Background trx should not be forced to rollback,
+	we will unset the flag for user trx. */
+	trx->in_innodb |= TRX_FORCE_ROLLBACK_DISABLE;
+
+	/* Trx state can be TRX_STATE_FORCED_ROLLBACK if
+	the trx was forced to rollback before it's reused.*/
+	trx->state = TRX_STATE_NOT_STARTED;
+
 	heap = mem_heap_create(sizeof(ib_vector_t) + sizeof(void*) * 8);
-	heap_alloc = ib_heap_allocator_create(heap);
+
+	alloc = ib_heap_allocator_create(heap);
 
 	/* Remember to free the vector explicitly in trx_free(). */
-	trx->autoinc_locks = ib_vector_create(heap_alloc, sizeof(void**), 4);
+	trx->autoinc_locks = ib_vector_create(alloc, sizeof(void**), 4);
 
-	/* Remember to free the vector explicitly in trx_free(). */
-	heap = mem_heap_create(sizeof(ib_vector_t) + sizeof(void*) * 128);
-	heap_alloc = ib_heap_allocator_create(heap);
-
-	trx->lock.table_locks = ib_vector_create(
-		heap_alloc, sizeof(void**), 32);
+	/* Should have been either just initialized or .clear()ed by
+	trx_free(). */
+	ut_a(trx->mod_tables.size() == 0);
 
 #ifdef WITH_WSREP
 	trx->wsrep_event = NULL;
 #endif /* WITH_WSREP */
+
 	return(trx);
 }
 
+/**
+Release a trx_t instance back to the pool.
+@param trx the instance to release. */
+static
+void
+trx_free(trx_t*& trx)
+{
+	assert_trx_is_free(trx);
+
+	trx->mysql_thd = 0;
+	trx->mysql_log_file_name = 0;
+
+	// FIXME: We need to avoid this heap free/alloc for each commit.
+	if (trx->autoinc_locks != NULL) {
+		ut_ad(ib_vector_is_empty(trx->autoinc_locks));
+		/* We allocated a dedicated heap for the vector. */
+		ib_vector_free(trx->autoinc_locks);
+		trx->autoinc_locks = NULL;
+	}
+
+	trx->mod_tables.clear();
+
+	ut_ad(trx->read_view == NULL);
+	ut_ad(trx->is_dd_trx == false);
+
+	/* trx locking state should have been reset before returning trx
+	to pool */
+	ut_ad(trx->will_lock == 0);
+
+	trx_pools->mem_free(trx);
+
+	trx = NULL;
+}
+
 /********************************************************************//**
 Creates a transaction object for background operations by the master thread.
-@return	own: transaction object */
-UNIV_INTERN
+@return own: transaction object */
 trx_t*
 trx_allocate_for_background(void)
 /*=============================*/
 {
 	trx_t*	trx;
 
-	trx = trx_create();
+	trx = trx_create_low();
 
 	trx->sess = trx_dummy_sess;
 
@@ -189,8 +547,7 @@ trx_allocate_for_background(void)
 
 /********************************************************************//**
 Creates a transaction object for MySQL.
-@return	own: transaction object */
-UNIV_INTERN
+@return own: transaction object */
 trx_t*
 trx_allocate_for_mysql(void)
 /*========================*/
@@ -199,79 +556,27 @@ trx_allocate_for_mysql(void)
 
 	trx = trx_allocate_for_background();
 
-	mutex_enter(&trx_sys->mutex);
+	trx_sys_mutex_enter();
 
 	ut_d(trx->in_mysql_trx_list = TRUE);
-	UT_LIST_ADD_FIRST(mysql_trx_list, trx_sys->mysql_trx_list, trx);
+	UT_LIST_ADD_FIRST(trx_sys->mysql_trx_list, trx);
 
-	mutex_exit(&trx_sys->mutex);
+	trx_sys_mutex_exit();
 
 	return(trx);
 }
 
-/********************************************************************//**
-Frees a transaction object. */
+/** Check state of transaction before freeing it.
+@param trx trx object to validate */
 static
 void
-trx_free(
-/*=====*/
-	trx_t*	trx)	/*!< in, own: trx object */
-{
-	ut_a(trx->magic_n == TRX_MAGIC_N);
-	ut_ad(!trx->in_ro_trx_list);
-	ut_ad(!trx->in_rw_trx_list);
-	ut_ad(!trx->in_mysql_trx_list);
-
-	mutex_free(&trx->undo_mutex);
-
-	if (trx->undo_no_arr != NULL) {
-		trx_undo_arr_free(trx->undo_no_arr);
-	}
-
-	ut_a(trx->lock.wait_lock == NULL);
-	ut_a(trx->lock.wait_thr == NULL);
-
-	ut_a(!trx->has_search_latch);
-
-	ut_a(trx->dict_operation_lock_mode == 0);
-
-	if (trx->lock.lock_heap) {
-		mem_heap_free(trx->lock.lock_heap);
-	}
-
-	ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
-
-	if (trx->global_read_view_heap) {
-		mem_heap_free(trx->global_read_view_heap);
-	}
-
-	ut_a(ib_vector_is_empty(trx->autoinc_locks));
-	/* We allocated a dedicated heap for the vector. */
-	ib_vector_free(trx->autoinc_locks);
-
-	if (trx->lock.table_locks != NULL) {
-		/* We allocated a dedicated heap for the vector. */
-		ib_vector_free(trx->lock.table_locks);
-	}
-
-	mutex_free(&trx->mutex);
-
-	mem_free(trx);
-}
-
-/********************************************************************//**
-Frees a transaction object of a background operation of the master thread. */
-UNIV_INTERN
-void
-trx_free_for_background(
-/*====================*/
-	trx_t*	trx)	/*!< in, own: trx object */
+trx_validate_state_before_free(trx_t* trx)
 {
 	if (trx->declared_to_be_inside_innodb) {
 
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Freeing a trx (%p, " TRX_ID_FMT ") which is declared "
-			"to be processing inside InnoDB", trx, trx->id);
+		ib::error() << "Freeing a trx (" << trx << ", "
+			<< trx_get_id_for_print(trx) << ") which is declared"
+			" to be processing inside InnoDB";
 
 		trx_print(stderr, trx, 600);
 		putc('\n', stderr);
@@ -284,29 +589,45 @@ trx_free_for_background(
 	if (trx->n_mysql_tables_in_use != 0
 	    || trx->mysql_n_tables_locked != 0) {
 
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"MySQL is freeing a thd though "
-			"trx->n_mysql_tables_in_use is %lu and "
-			"trx->mysql_n_tables_locked is %lu.",
-			(ulong) trx->n_mysql_tables_in_use,
-			(ulong) trx->mysql_n_tables_locked);
+		ib::error() << "MySQL is freeing a thd though"
+			" trx->n_mysql_tables_in_use is "
+			<< trx->n_mysql_tables_in_use
+			<< " and trx->mysql_n_tables_locked is "
+			<< trx->mysql_n_tables_locked << ".";
 
 		trx_print(stderr, trx, 600);
 		ut_print_buf(stderr, trx, sizeof(trx_t));
 		putc('\n', stderr);
 	}
 
-	ut_a(trx->state == TRX_STATE_NOT_STARTED);
-	ut_a(trx->insert_undo == NULL);
-	ut_a(trx->update_undo == NULL);
-	ut_a(trx->read_view == NULL);
+	trx->dict_operation = TRX_DICT_OP_NONE;
+	assert_trx_is_inactive(trx);
+}
+
+/** Free and initialize a transaction object instantinated during recovery.
+@param trx trx object to free and initialize during recovery */
+void
+trx_free_resurrected(trx_t* trx)
+{
+	trx_validate_state_before_free(trx);
+
+	trx_init(trx);
+
+	trx_free(trx);
+}
+
+/** Free a transaction that was allocated by background or user threads.
+@param trx trx object to free */
+void
+trx_free_for_background(trx_t* trx)
+{
+	trx_validate_state_before_free(trx);
 
 	trx_free(trx);
 }
 
 /********************************************************************//**
 At shutdown, frees a transaction object that is in the PREPARED state. */
-UNIV_INTERN
 void
 trx_free_prepared(
 /*==============*/
@@ -322,94 +643,86 @@ trx_free_prepared(
 
 	ut_a(!trx->read_only);
 
-	UT_LIST_REMOVE(trx_list, trx_sys->rw_trx_list, trx);
 	ut_d(trx->in_rw_trx_list = FALSE);
 
+	trx->state = TRX_STATE_NOT_STARTED;
+
 	/* Undo trx_resurrect_table_locks(). */
-	UT_LIST_INIT(trx->lock.trx_locks);
+	lock_trx_lock_list_init(&trx->lock.trx_locks);
+
+	/* Note: This vector is not guaranteed to be empty because the
+	transaction was never committed and therefore lock_trx_release()
+	was not called. */
+	trx->lock.table_locks.clear();
 
 	trx_free(trx);
 }
 
-/********************************************************************//**
-Frees a transaction object for MySQL. */
-UNIV_INTERN
+/** Disconnect a transaction from MySQL and optionally mark it as if
+it's been recovered. For the marking the transaction must be in prepared state.
+The recovery-marked transaction is going to survive "alone" so its association
+with the mysql handle is destroyed now rather than when it will be
+finally freed.
+@param[in,out]	trx		transaction
+@param[in]	prepared	boolean value to specify whether trx is
+				for recovery or not. */
+inline
 void
-trx_free_for_mysql(
-/*===============*/
-	trx_t*	trx)	/*!< in, own: trx object */
+trx_disconnect_from_mysql(
+	trx_t*	trx,
+	bool	prepared)
 {
-	mutex_enter(&trx_sys->mutex);
+	trx_sys_mutex_enter();
 
 	ut_ad(trx->in_mysql_trx_list);
 	ut_d(trx->in_mysql_trx_list = FALSE);
-	UT_LIST_REMOVE(mysql_trx_list, trx_sys->mysql_trx_list, trx);
+
+	UT_LIST_REMOVE(trx_sys->mysql_trx_list, trx);
+
+	if (trx->read_view != NULL) {
+		trx_sys->mvcc->view_close(trx->read_view, true);
+	}
 
 	ut_ad(trx_sys_validate_trx_list());
 
-	mutex_exit(&trx_sys->mutex);
+	if (prepared) {
 
-	trx_free_for_background(trx);
+		ut_ad(trx_state_eq(trx, TRX_STATE_PREPARED));
+
+		trx->is_recovered = true;
+		trx_sys->n_prepared_recovered_trx++;
+	        trx->mysql_thd = NULL;
+		/* todo/fixme: suggest to do it at innodb prepare */
+		trx->will_lock = 0;
+	}
+
+	trx_sys_mutex_exit();
 }
 
-/****************************************************************//**
-Inserts the trx handle in the trx system trx list in the right position.
-The list is sorted on the trx id so that the biggest id is at the list
-start. This function is used at the database startup to insert incomplete
-transactions to the list. */
-static
+/** Disconnect a transaction from MySQL.
+@param[in,out]	trx	transaction */
+inline
 void
-trx_list_rw_insert_ordered(
-/*=======================*/
-	trx_t*	trx)	/*!< in: trx handle */
+trx_disconnect_plain(trx_t*	trx)
 {
-	trx_t*	trx2;
+	trx_disconnect_from_mysql(trx, false);
+}
 
-	ut_ad(!trx->read_only);
+/** Disconnect a prepared transaction from MySQL.
+@param[in,out]	trx	transaction */
+void
+trx_disconnect_prepared(trx_t*	trx)
+{
+	trx_disconnect_from_mysql(trx, true);
+}
 
-	ut_d(trx->start_file = __FILE__);
-	ut_d(trx->start_line = __LINE__);
-
-	ut_a(srv_is_being_started);
-	ut_ad(!trx->in_ro_trx_list);
-	ut_ad(!trx->in_rw_trx_list);
-	ut_ad(trx->state != TRX_STATE_NOT_STARTED);
-	ut_ad(trx->is_recovered);
-
-	for (trx2 = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
-	     trx2 != NULL;
-	     trx2 = UT_LIST_GET_NEXT(trx_list, trx2)) {
-
-		assert_trx_in_rw_list(trx2);
-
-		if (trx->id >= trx2->id) {
-
-			ut_ad(trx->id > trx2->id);
-			break;
-		}
-	}
-
-	if (trx2 != NULL) {
-		trx2 = UT_LIST_GET_PREV(trx_list, trx2);
-
-		if (trx2 == NULL) {
-			UT_LIST_ADD_FIRST(trx_list, trx_sys->rw_trx_list, trx);
-		} else {
-			UT_LIST_INSERT_AFTER(
-				trx_list, trx_sys->rw_trx_list, trx2, trx);
-		}
-	} else {
-		UT_LIST_ADD_LAST(trx_list, trx_sys->rw_trx_list, trx);
-	}
-
-#ifdef UNIV_DEBUG
-	if (trx->id > trx_sys->rw_max_trx_id) {
-		trx_sys->rw_max_trx_id = trx->id;
-	}
-#endif /* UNIV_DEBUG */
-
-	ut_ad(!trx->in_rw_trx_list);
-	ut_d(trx->in_rw_trx_list = TRUE);
+/** Free a transaction object for MySQL.
+@param[in,out]	trx	transaction */
+void
+trx_free_for_mysql(trx_t*	trx)
+{
+	trx_disconnect_plain(trx);
+	trx_free_for_background(trx);
 }
 
 /****************************************************************//**
@@ -419,6 +732,8 @@ void
 trx_resurrect_table_locks(
 /*======================*/
 	trx_t*			trx,	/*!< in/out: transaction */
+	const trx_undo_ptr_t*	undo_ptr,
+					/*!< in: pointer to undo segment. */
 	const trx_undo_t*	undo)	/*!< in: undo log */
 {
 	mtr_t			mtr;
@@ -426,40 +741,34 @@ trx_resurrect_table_locks(
 	trx_undo_rec_t*		undo_rec;
 	table_id_set		tables;
 
-	ut_ad(undo == trx->insert_undo || undo == trx->update_undo);
+	ut_ad(undo == undo_ptr->insert_undo || undo == undo_ptr->update_undo);
+
+	if (trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY) || undo->empty) {
 
-	if (trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY)
-	    || undo->empty) {
 		return;
 	}
 
 	mtr_start(&mtr);
+
 	/* trx_rseg_mem_create() may have acquired an X-latch on this
 	page, so we cannot acquire an S-latch. */
 	undo_page = trx_undo_page_get(
-		undo->space, undo->zip_size, undo->top_page_no, &mtr);
+		page_id_t(undo->space, undo->top_page_no), undo->page_size,
+		&mtr);
+
 	undo_rec = undo_page + undo->top_offset;
 
 	do {
 		ulint		type;
-		ulint		cmpl_info;
-		bool		updated_extern;
 		undo_no_t	undo_no;
 		table_id_t	table_id;
+		ulint		cmpl_info;
+		bool		updated_extern;
 
 		page_t*		undo_rec_page = page_align(undo_rec);
 
 		if (undo_rec_page != undo_page) {
-			if (!mtr_memo_release(&mtr,
-					      buf_block_align(undo_page),
-					      MTR_MEMO_PAGE_X_FIX)) {
-				/* The page of the previous undo_rec
-				should have been latched by
-				trx_undo_page_get() or
-				trx_undo_get_prev_rec(). */
-				ut_ad(0);
-			}
-
+			mtr.release_page(undo_page, MTR_MEMO_PAGE_X_FIX);
 			undo_page = undo_rec_page;
 		}
 
@@ -488,13 +797,17 @@ trx_resurrect_table_locks(
 				continue;
 			}
 
+			if (trx->state == TRX_STATE_PREPARED) {
+				trx->mod_tables.insert(table);
+			}
 			lock_table_ix_resurrect(table, trx);
 
 			DBUG_PRINT("ib_trx",
 				   ("resurrect" TRX_ID_FMT
 				    "  table '%s' IX lock from %s undo",
-				    trx->id, table->name,
-				    undo == trx->insert_undo
+				    trx_get_id_for_print(trx),
+				    table->name.m_name,
+				    undo == undo_ptr->insert_undo
 				    ? "insert" : "update"));
 
 			dict_table_close(table, FALSE, FALSE);
@@ -505,7 +818,7 @@ trx_resurrect_table_locks(
 /****************************************************************//**
 Resurrect the transactions that were doing inserts the time of the
 crash, they need to be undone.
-@return trx_t instance  */
+@return trx_t instance */
 static
 trx_t*
 trx_resurrect_insert(
@@ -517,11 +830,18 @@ trx_resurrect_insert(
 
 	trx = trx_allocate_for_background();
 
-	trx->rseg = rseg;
-	trx->xid = undo->xid;
+	ut_d(trx->start_file = __FILE__);
+	ut_d(trx->start_line = __LINE__);
+
+	trx->rsegs.m_redo.rseg = rseg;
+	/* For transactions with active data will not have rseg size = 1
+	or will not qualify for purge limit criteria. So it is safe to increment
+	this trx_ref_count w/o mutex protection. */
+	++trx->rsegs.m_redo.rseg->trx_ref_count;
+	*trx->xid = undo->xid;
 	trx->id = undo->trx_id;
-	trx->insert_undo = undo;
-	trx->is_recovered = TRUE;
+	trx->rsegs.m_redo.insert_undo = undo;
+	trx->is_recovered = true;
 
 	/* This is single-threaded startup code, we do not need the
 	protection of trx->mutex or trx_sys->mutex here. */
@@ -533,19 +853,19 @@ trx_resurrect_insert(
 
 		if (undo->state == TRX_UNDO_PREPARED) {
 
-			fprintf(stderr,
-				"InnoDB: Transaction " TRX_ID_FMT " was in the"
-				" XA prepared state.\n", trx->id);
+			ib::info() << "Transaction "
+				<< trx_get_id_for_print(trx)
+				<< " was in the XA prepared state.";
 
 			if (srv_force_recovery == 0) {
 
 				trx->state = TRX_STATE_PREPARED;
-				trx_sys->n_prepared_trx++;
-				trx_sys->n_prepared_recovered_trx++;
+				++trx_sys->n_prepared_trx;
+				++trx_sys->n_prepared_recovered_trx;
 			} else {
-				fprintf(stderr,
-					"InnoDB: Since innodb_force_recovery"
-					" > 0, we will rollback it anyway.\n");
+
+				ib::info() << "Since innodb_force_recovery"
+					" > 0, we will force a rollback.";
 
 				trx->state = TRX_STATE_ACTIVE;
 			}
@@ -560,6 +880,7 @@ trx_resurrect_insert(
 		undo log structure */
 
 		trx->no = trx->id;
+
 	} else {
 		trx->state = TRX_STATE_ACTIVE;
 
@@ -573,6 +894,7 @@ trx_resurrect_insert(
 	start time here.*/
 	if (trx->state == TRX_STATE_ACTIVE
 	    || trx->state == TRX_STATE_PREPARED) {
+
 		trx->start_time = ut_time();
 	}
 
@@ -583,6 +905,7 @@ trx_resurrect_insert(
 
 	if (!undo->empty) {
 		trx->undo_no = undo->top_undo_no + 1;
+		trx->undo_rseg_space = undo->rseg->space;
 	}
 
 	return(trx);
@@ -602,23 +925,24 @@ trx_resurrect_update_in_prepared_state(
 	protection of trx->mutex or trx_sys->mutex here. */
 
 	if (undo->state == TRX_UNDO_PREPARED) {
-		fprintf(stderr,
-			"InnoDB: Transaction " TRX_ID_FMT
-			" was in the XA prepared state.\n", trx->id);
+		ib::info() << "Transaction " << trx_get_id_for_print(trx)
+			<< " was in the XA prepared state.";
 
 		if (srv_force_recovery == 0) {
+
+			ut_ad(trx->state != TRX_STATE_FORCED_ROLLBACK);
+
 			if (trx_state_eq(trx, TRX_STATE_NOT_STARTED)) {
-				trx_sys->n_prepared_trx++;
-				trx_sys->n_prepared_recovered_trx++;
+				++trx_sys->n_prepared_trx;
+				++trx_sys->n_prepared_recovered_trx;
 			} else {
 				ut_ad(trx_state_eq(trx, TRX_STATE_PREPARED));
 			}
 
 			trx->state = TRX_STATE_PREPARED;
 		} else {
-			fprintf(stderr,
-				"InnoDB: Since innodb_force_recovery"
-				" > 0, we will rollback it anyway.\n");
+			ib::info() << "Since innodb_force_recovery > 0, we"
+				" will rollback it anyway.";
 
 			trx->state = TRX_STATE_ACTIVE;
 		}
@@ -638,11 +962,15 @@ trx_resurrect_update(
 	trx_undo_t*	undo,	/*!< in/out: update UNDO record */
 	trx_rseg_t*	rseg)	/*!< in/out: rollback segment */
 {
-	trx->rseg = rseg;
-	trx->xid = undo->xid;
+	trx->rsegs.m_redo.rseg = rseg;
+	/* For transactions with active data will not have rseg size = 1
+	or will not qualify for purge limit criteria. So it is safe to increment
+	this trx_ref_count w/o mutex protection. */
+	++trx->rsegs.m_redo.rseg->trx_ref_count;
+	*trx->xid = undo->xid;
 	trx->id = undo->trx_id;
-	trx->update_undo = undo;
-	trx->is_recovered = TRUE;
+	trx->rsegs.m_redo.update_undo = undo;
+	trx->is_recovered = true;
 
 	/* This is single-threaded startup code, we do not need the
 	protection of trx->mutex or trx_sys->mutex here. */
@@ -678,6 +1006,7 @@ trx_resurrect_update(
 	if (!undo->empty && undo->top_undo_no >= trx->undo_no) {
 
 		trx->undo_no = undo->top_undo_no + 1;
+		trx->undo_rseg_space = undo->rseg->space;
 	}
 }
 
@@ -687,27 +1016,23 @@ trx_sys at database start. Rollback segment and undo log lists must
 already exist when this function is called, because the lists of
 transactions to be rolled back or cleaned up are built based on the
 undo log lists. */
-UNIV_INTERN
 void
 trx_lists_init_at_db_start(void)
 /*============================*/
 {
-	ulint		i;
-
 	ut_a(srv_is_being_started);
 
-	UT_LIST_INIT(trx_sys->ro_trx_list);
-	UT_LIST_INIT(trx_sys->rw_trx_list);
-
 	/* Look from the rollback segments if there exist undo logs for
-	transactions */
+	transactions. Upgrade demands clean shutdown and so there is
+	not need to look at pending_purge_rseg_array for rollbacking
+	transactions. */
 
-	for (i = 0; i < TRX_SYS_N_RSEGS; ++i) {
+	for (ulint i = 0; i < TRX_SYS_N_RSEGS; ++i) {
 		trx_undo_t*	undo;
-		trx_rseg_t*	rseg;
-
-		rseg = trx_sys->rseg_array[i];
+		trx_rseg_t*	rseg = trx_sys->rseg_array[i];
 
+		/* At this stage non-redo rseg slots are all NULL as they are
+		re-created on server start and existing slots are not read. */
 		if (rseg == NULL) {
 			continue;
 		}
@@ -716,113 +1041,292 @@ trx_lists_init_at_db_start(void)
 		for (undo = UT_LIST_GET_FIRST(rseg->insert_undo_list);
 		     undo != NULL;
 		     undo = UT_LIST_GET_NEXT(undo_list, undo)) {
+
 			trx_t*	trx;
 
 			trx = trx_resurrect_insert(undo, rseg);
 
-			trx_list_rw_insert_ordered(trx);
+			trx_sys_rw_trx_add(trx);
 
-			trx_resurrect_table_locks(trx, undo);
+			trx_resurrect_table_locks(
+				trx, &trx->rsegs.m_redo, undo);
 		}
 
 		/* Ressurrect transactions that were doing updates. */
 		for (undo = UT_LIST_GET_FIRST(rseg->update_undo_list);
 		     undo != NULL;
 		     undo = UT_LIST_GET_NEXT(undo_list, undo)) {
-			trx_t*	trx;
-			ibool	trx_created;
 
-			/* Check the trx_sys->rw_trx_list first. */
-			mutex_enter(&trx_sys->mutex);
-			trx = trx_get_rw_trx_by_id(undo->trx_id);
-			mutex_exit(&trx_sys->mutex);
+			/* Check the trx_sys->rw_trx_set first. */
+			trx_sys_mutex_enter();
+
+			trx_t*	trx = trx_get_rw_trx_by_id(undo->trx_id);
+
+			trx_sys_mutex_exit();
 
 			if (trx == NULL) {
 				trx = trx_allocate_for_background();
-				trx_created = TRUE;
-			} else {
-				trx_created = FALSE;
+
+				ut_d(trx->start_file = __FILE__);
+				ut_d(trx->start_line = __LINE__);
 			}
 
 			trx_resurrect_update(trx, undo, rseg);
 
-			if (trx_created) {
-				trx_list_rw_insert_ordered(trx);
-			}
+			trx_sys_rw_trx_add(trx);
 
-			trx_resurrect_table_locks(trx, undo);
+			trx_resurrect_table_locks(
+				trx, &trx->rsegs.m_redo, undo);
 		}
 	}
+
+	TrxIdSet::iterator	end = trx_sys->rw_trx_set.end();
+
+	for (TrxIdSet::iterator it = trx_sys->rw_trx_set.begin();
+	     it != end;
+	     ++it) {
+
+		ut_ad(it->m_trx->in_rw_trx_list);
+#ifdef UNIV_DEBUG
+		if (it->m_trx->id > trx_sys->rw_max_trx_id) {
+			trx_sys->rw_max_trx_id = it->m_trx->id;
+		}
+#endif /* UNIV_DEBUG */
+
+		if (it->m_trx->state == TRX_STATE_ACTIVE
+		    || it->m_trx->state == TRX_STATE_PREPARED) {
+
+			trx_sys->rw_trx_ids.push_back(it->m_id);
+		}
+
+		UT_LIST_ADD_FIRST(trx_sys->rw_trx_list, it->m_trx);
+	}
+}
+
+/******************************************************************//**
+Get next redo rollback segment. (Segment are assigned in round-robin fashion).
+@return assigned rollback segment instance */
+static
+trx_rseg_t*
+get_next_redo_rseg(
+/*===============*/
+	ulong	max_undo_logs,	/*!< in: maximum number of UNDO logs to use */
+	ulint	n_tablespaces)	/*!< in: number of rollback tablespaces */
+{
+	trx_rseg_t*	rseg;
+	static ulint	redo_rseg_slot = 0;
+	ulint		slot = 0;
+
+	slot = redo_rseg_slot++;
+	slot = slot % max_undo_logs;
+
+	/* Skip slots alloted to non-redo also ensure even distribution
+	in selecting next redo slots.
+	For example: If we don't do even distribution then for any value of
+	slot between 1 - 32 ... 33rd slots will be alloted creating
+	skewed distribution. */
+	if (trx_sys_is_noredo_rseg_slot(slot)) {
+
+		if (max_undo_logs > srv_tmp_undo_logs) {
+
+			slot %= (max_undo_logs - srv_tmp_undo_logs);
+
+			if (trx_sys_is_noredo_rseg_slot(slot)) {
+				slot += srv_tmp_undo_logs;
+			}
+
+		} else {
+			slot = 0;
+		}
+	}
+
+#ifdef UNIV_DEBUG
+	ulint	start_scan_slot = slot;
+	bool	look_for_rollover = false;
+#endif /* UNIV_DEBUG */
+
+	bool	allocated = false;
+
+	while (!allocated) {
+
+		for (;;) {
+			rseg = trx_sys->rseg_array[slot];
+
+#ifdef UNIV_DEBUG
+			/* Ensure that we are not revisiting the same
+			slot that we have already inspected. */
+			if (look_for_rollover) {
+				ut_ad(start_scan_slot != slot);
+			}
+			look_for_rollover = true;
+#endif /* UNIV_DEBUG */
+
+			slot = (slot + 1) % max_undo_logs;
+
+			/* Skip slots allocated for noredo rsegs */
+			while (trx_sys_is_noredo_rseg_slot(slot)) {
+				slot = (slot + 1) % max_undo_logs;
+			}
+
+			if (rseg == NULL) {
+				continue;
+			} else if (rseg->space == srv_sys_space.space_id()
+				   && n_tablespaces > 0
+				   && trx_sys->rseg_array[slot] != NULL
+				   && trx_sys->rseg_array[slot]->space
+					!= srv_sys_space.space_id()) {
+				/** If undo-tablespace is configured, skip
+				rseg from system-tablespace and try to use
+				undo-tablespace rseg unless it is not possible
+				due to lower limit of undo-logs. */
+				continue;
+			} else if (rseg->skip_allocation) {
+				/** This rseg resides in the tablespace that
+				has been marked for truncate so avoid using this
+				rseg. Also, this is possible only if there are
+				at-least 2 UNDO tablespaces active and 2 redo
+				rsegs active (other than default system bound
+				rseg-0). */
+				ut_ad(n_tablespaces > 1);
+				ut_ad(max_undo_logs
+					>= (1 + srv_tmp_undo_logs + 2));
+				continue;
+			}
+			break;
+		}
+
+		/* By now we have only selected the rseg but not marked it
+		allocated. By marking it allocated we are ensuring that it will
+		never be selected for UNDO truncate purge. */
+		mutex_enter(&rseg->mutex);
+		if (!rseg->skip_allocation) {
+			rseg->trx_ref_count++;
+			allocated = true;
+		}
+		mutex_exit(&rseg->mutex);
+	}
+
+	ut_ad(rseg->trx_ref_count > 0);
+	ut_ad(!trx_sys_is_noredo_rseg_slot(rseg->id));
+	return(rseg);
+}
+
+/******************************************************************//**
+Get next noredo rollback segment.
+@return assigned rollback segment instance */
+static
+trx_rseg_t*
+get_next_noredo_rseg(
+/*=================*/
+	ulong	max_undo_logs)	/*!< in: maximum number of UNDO logs to use */
+{
+	trx_rseg_t*	rseg;
+	static ulint	noredo_rseg_slot = 1;
+	ulint		slot = 0;
+
+	slot = noredo_rseg_slot++;
+	slot = slot % max_undo_logs;
+	while (!trx_sys_is_noredo_rseg_slot(slot)) {
+		slot = (slot + 1) % max_undo_logs;
+	}
+
+	for (;;) {
+		rseg = trx_sys->rseg_array[slot];
+
+		slot = (slot + 1) % max_undo_logs;
+
+		while (!trx_sys_is_noredo_rseg_slot(slot)) {
+			slot = (slot + 1) % max_undo_logs;
+		}
+
+		if (rseg != NULL) {
+			break;
+		}
+	}
+
+	ut_ad(fsp_is_system_temporary(rseg->space));
+	ut_ad(trx_sys_is_noredo_rseg_slot(rseg->id));
+	return(rseg);
 }
 
 /******************************************************************//**
 Assigns a rollback segment to a transaction in a round-robin fashion.
-@return	assigned rollback segment instance */
+@return assigned rollback segment instance */
 static
 trx_rseg_t*
 trx_assign_rseg_low(
 /*================*/
-	ulong	max_undo_logs,	/*!< in: maximum number of UNDO logs to use */
-	ulint	n_tablespaces)	/*!< in: number of rollback tablespaces */
+	ulong		max_undo_logs,	/*!< in: maximum number of UNDO logs
+					to use */
+	ulint		n_tablespaces,	/*!< in: number of rollback
+					tablespaces */
+	trx_rseg_type_t	rseg_type)	/*!< in: type of rseg to assign. */
 {
-	ulint		i;
-	trx_rseg_t*	rseg;
-	static ulint	latest_rseg = 0;
-
 	if (srv_read_only_mode) {
 		ut_a(max_undo_logs == ULONG_UNDEFINED);
 		return(NULL);
 	}
 
 	/* This breaks true round robin but that should be OK. */
-
-	ut_a(max_undo_logs > 0 && max_undo_logs <= TRX_SYS_N_RSEGS);
-
-	i = latest_rseg++;
-        i %= max_undo_logs;
+	ut_ad(max_undo_logs > 0 && max_undo_logs <= TRX_SYS_N_RSEGS);
 
 	/* Note: The assumption here is that there can't be any gaps in
 	the array. Once we implement more flexible rollback segment
 	management this may not hold. The assertion checks for that case. */
+	ut_ad(trx_sys->rseg_array[0] != NULL);
+	ut_ad(rseg_type == TRX_RSEG_TYPE_REDO
+	      || trx_sys->rseg_array[1] != NULL);
 
-	if (trx_sys->rseg_array[0] == NULL) {
-		return(NULL);
+	/* Slot-0 is always assigned to system-tablespace rseg. */
+	ut_ad(trx_sys->rseg_array[0]->space == srv_sys_space.space_id());
+
+	/* Slot-1 is always assigned to temp-tablespace rseg. */
+	ut_ad(rseg_type == TRX_RSEG_TYPE_REDO
+	      || fsp_is_system_temporary(trx_sys->rseg_array[1]->space));
+
+	trx_rseg_t* rseg = 0;
+
+	switch (rseg_type) {
+	case TRX_RSEG_TYPE_NONE:
+		ut_error;
+
+	case TRX_RSEG_TYPE_REDO:
+		rseg = get_next_redo_rseg(max_undo_logs, n_tablespaces);
+		break;
+
+	case TRX_RSEG_TYPE_NOREDO:
+		rseg = get_next_noredo_rseg(srv_tmp_undo_logs + 1);
+		break;
 	}
 
-	/* Skip the system tablespace if we have more than one tablespace
-	defined for rollback segments. We want all UNDO records to be in
-	the non-system tablespaces. */
-
-	do {
-		rseg = trx_sys->rseg_array[i];
-		ut_a(rseg == NULL || i == rseg->id);
-
-		i = (rseg == NULL) ? 0 : i + 1;
-
-	} while (rseg == NULL
-		 || (rseg->space == 0
-		     && n_tablespaces > 0
-		     && trx_sys->rseg_array[1] != NULL));
-
 	return(rseg);
 }
 
 /****************************************************************//**
-Assign a read-only transaction a rollback-segment, if it is attempting
-to write to a TEMPORARY table. */
-UNIV_INTERN
+Assign a transaction temp-tablespace bounded rollback-segment. */
 void
 trx_assign_rseg(
 /*============*/
-	trx_t*		trx)		/*!< A read-only transaction that
-					needs to be assigned a RBS. */
+	trx_t*		trx)		/*!< transaction that involves write
+					to temp-table. */
 {
-	ut_a(trx->rseg == 0);
-	ut_a(trx->read_only);
-	ut_a(!srv_read_only_mode);
+	ut_a(trx->rsegs.m_noredo.rseg == 0);
 	ut_a(!trx_is_autocommit_non_locking(trx));
 
-	trx->rseg = trx_assign_rseg_low(srv_undo_logs, srv_undo_tablespaces);
+	trx->rsegs.m_noredo.rseg = trx_assign_rseg_low(
+		srv_undo_logs, srv_undo_tablespaces, TRX_RSEG_TYPE_NOREDO);
+
+	if (trx->id == 0) {
+		mutex_enter(&trx_sys->mutex);
+
+		trx->id = trx_sys_get_new_trx_id();
+
+		trx_sys->rw_trx_ids.push_back(trx->id);
+
+		trx_sys->rw_trx_set.insert(TrxTrack(trx->id, trx));
+
+		mutex_exit(&trx_sys->mutex);
+	}
 }
 
 /****************************************************************//**
@@ -831,15 +1335,24 @@ static
 void
 trx_start_low(
 /*==========*/
-	trx_t*	trx)		/*!< in: transaction */
+	trx_t*	trx,		/*!< in: transaction */
+	bool	read_write)	/*!< in: true if read-write transaction */
 {
-	ut_ad(trx->rseg == NULL);
-
-	ut_ad(trx->start_file != 0);
-	ut_ad(trx->start_line != 0);
+	ut_ad(!trx->in_rollback);
 	ut_ad(!trx->is_recovered);
+	ut_ad(trx->hit_list.empty());
+	ut_ad(trx->start_line != 0);
+	ut_ad(trx->start_file != 0);
+	ut_ad(trx->roll_limit == 0);
+	ut_ad(trx->error_state == DB_SUCCESS);
+	ut_ad(trx->rsegs.m_redo.rseg == NULL);
+	ut_ad(trx->rsegs.m_noredo.rseg == NULL);
 	ut_ad(trx_state_eq(trx, TRX_STATE_NOT_STARTED));
 	ut_ad(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
+	ut_ad(!(trx->in_innodb & TRX_FORCE_ROLLBACK));
+	ut_ad(!(trx->in_innodb & TRX_FORCE_ROLLBACK_ASYNC));
+
+	++trx->version;
 
 	/* Check whether it is an AUTOCOMMIT SELECT */
 	trx->auto_commit = (trx->api_trx && trx->api_auto_commit)
@@ -847,23 +1360,28 @@ trx_start_low(
 
 	trx->read_only =
 		(trx->api_trx && !trx->read_write)
-		|| (!trx->ddl && thd_trx_is_read_only(trx->mysql_thd))
+		|| (!trx->ddl && !trx->internal
+		    && thd_trx_is_read_only(trx->mysql_thd))
 		|| srv_read_only_mode;
 
 	if (!trx->auto_commit) {
 		++trx->will_lock;
 	} else if (trx->will_lock == 0) {
-		trx->read_only = TRUE;
+		trx->read_only = true;
 	}
 
-	if (!trx->read_only) {
-		trx->rseg = trx_assign_rseg_low(
-			srv_undo_logs, srv_undo_tablespaces);
+#ifdef UNIV_DEBUG
+	/* If the transaction is DD attachable trx, it should be AC-NL-RO-RC
+	(AutoCommit-NonLocking-ReadOnly-ReadCommited) trx */
+	if (trx->is_dd_trx) {
+		ut_ad(trx->read_only && trx->auto_commit
+		      && trx->isolation_level == TRX_ISO_READ_COMMITTED);
 	}
+#endif /* UNIV_DEBUG */
 
 #ifdef WITH_WSREP
-        memset(&trx->xid, 0, sizeof(trx->xid));
-        trx->xid.formatID = -1;
+	memset(trx->xid, 0, sizeof(xid_t));
+	trx->xid->formatID = -1;
 #endif /* WITH_WSREP */
 
 	/* The initial value for trx->no: TRX_ID_MAX is used in
@@ -872,169 +1390,322 @@ trx_start_low(
 	trx->no = TRX_ID_MAX;
 
 	ut_a(ib_vector_is_empty(trx->autoinc_locks));
-	ut_a(ib_vector_is_empty(trx->lock.table_locks));
-
-	mutex_enter(&trx_sys->mutex);
+	ut_a(trx->lock.table_locks.empty());
 
 	/* If this transaction came from trx_allocate_for_mysql(),
 	trx->in_mysql_trx_list would hold. In that case, the trx->state
 	change must be protected by the trx_sys->mutex, so that
 	lock_print_info_all_transactions() will have a consistent view. */
 
-	trx->state = TRX_STATE_ACTIVE;
-
-	trx->id = trx_sys_get_new_trx_id();
-
 	ut_ad(!trx->in_rw_trx_list);
-	ut_ad(!trx->in_ro_trx_list);
 
-	if (trx->read_only) {
+	/* We tend to over assert and that complicates the code somewhat.
+	e.g., the transaction state can be set earlier but we are forced to
+	set it under the protection of the trx_sys_t::mutex because some
+	trx list assertions are triggered unnecessarily. */
 
-		/* Note: The trx_sys_t::ro_trx_list doesn't really need to
-		be ordered, we should exploit this using a list type that
-		doesn't need a list wide lock to increase concurrency. */
+	/* By default all transactions are in the read-only list unless they
+	are non-locking auto-commit read only transactions or background
+	(internal) transactions. Note: Transactions marked explicitly as
+	read only can write to temporary tables, we put those on the RO
+	list too. */
 
-		if (!trx_is_autocommit_non_locking(trx)) {
-			UT_LIST_ADD_FIRST(trx_list, trx_sys->ro_trx_list, trx);
-			ut_d(trx->in_ro_trx_list = TRUE);
-		}
-	} else {
+	if (!trx->read_only
+	    && (trx->mysql_thd == 0 || read_write || trx->ddl)) {
 
-		ut_ad(trx->rseg != NULL
+		trx->rsegs.m_redo.rseg = trx_assign_rseg_low(
+			srv_undo_logs, srv_undo_tablespaces,
+			TRX_RSEG_TYPE_REDO);
+
+		/* Temporary rseg is assigned only if the transaction
+		updates a temporary table */
+
+		trx_sys_mutex_enter();
+
+		trx->id = trx_sys_get_new_trx_id();
+
+		trx_sys->rw_trx_ids.push_back(trx->id);
+
+		trx_sys_rw_trx_add(trx);
+
+		ut_ad(trx->rsegs.m_redo.rseg != 0
+		      || srv_read_only_mode
 		      || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO);
 
-		ut_ad(!trx_is_autocommit_non_locking(trx));
-		UT_LIST_ADD_FIRST(trx_list, trx_sys->rw_trx_list, trx);
-		ut_d(trx->in_rw_trx_list = TRUE);
+		UT_LIST_ADD_FIRST(trx_sys->rw_trx_list, trx);
+
+		ut_d(trx->in_rw_trx_list = true);
 #ifdef UNIV_DEBUG
 		if (trx->id > trx_sys->rw_max_trx_id) {
 			trx_sys->rw_max_trx_id = trx->id;
 		}
 #endif /* UNIV_DEBUG */
+
+		trx->state = TRX_STATE_ACTIVE;
+
+		ut_ad(trx_sys_validate_trx_list());
+
+		trx_sys_mutex_exit();
+
+	} else {
+		trx->id = 0;
+
+		if (!trx_is_autocommit_non_locking(trx)) {
+
+			/* If this is a read-only transaction that is writing
+			to a temporary table then it needs a transaction id
+			to write to the temporary table. */
+
+			if (read_write) {
+
+				trx_sys_mutex_enter();
+
+				ut_ad(!srv_read_only_mode);
+
+				trx->id = trx_sys_get_new_trx_id();
+
+				trx_sys->rw_trx_ids.push_back(trx->id);
+
+				trx_sys->rw_trx_set.insert(
+					TrxTrack(trx->id, trx));
+
+				trx_sys_mutex_exit();
+			}
+
+			trx->state = TRX_STATE_ACTIVE;
+
+		} else {
+			ut_ad(!read_write);
+			trx->state = TRX_STATE_ACTIVE;
+		}
 	}
 
-	ut_ad(trx_sys_validate_trx_list());
+	if (trx->mysql_thd != NULL) {
+		trx->start_time = thd_start_time_in_secs(trx->mysql_thd);
+	} else {
+		trx->start_time = ut_time();
+	}
 
-	mutex_exit(&trx_sys->mutex);
-
-	trx->start_time = ut_time();
+	ut_a(trx->error_state == DB_SUCCESS);
 
 	MONITOR_INC(MONITOR_TRX_ACTIVE);
 }
 
 /****************************************************************//**
-Set the transaction serialisation number. */
+Set the transaction serialisation number.
+@return true if the transaction number was added to the serialisation_list. */
 static
-void
+bool
 trx_serialisation_number_get(
 /*=========================*/
-	trx_t*		trx)	/*!< in: transaction */
+	trx_t*		trx,			/*!< in/out: transaction */
+	trx_undo_ptr_t*	redo_rseg_undo_ptr,	/*!< in/out: Set trx
+						serialisation number in
+						referred undo rseg. */
+	trx_undo_ptr_t*	noredo_rseg_undo_ptr)	/*!< in/out: Set trx
+						serialisation number in
+						referred undo rseg. */
 {
-	trx_rseg_t*	rseg;
+	bool		added_trx_no;
+	trx_rseg_t*	redo_rseg = 0;
+	trx_rseg_t*	noredo_rseg = 0;
 
-	rseg = trx->rseg;
+	if (redo_rseg_undo_ptr != NULL) {
+		ut_ad(mutex_own(&redo_rseg_undo_ptr->rseg->mutex));
+		redo_rseg = redo_rseg_undo_ptr->rseg;
+	}
 
-	ut_ad(mutex_own(&rseg->mutex));
+	if (noredo_rseg_undo_ptr != NULL) {
+		ut_ad(mutex_own(&noredo_rseg_undo_ptr->rseg->mutex));
+		noredo_rseg = noredo_rseg_undo_ptr->rseg;
+	}
 
-	mutex_enter(&trx_sys->mutex);
+	trx_sys_mutex_enter();
 
 	trx->no = trx_sys_get_new_trx_id();
 
+	/* Track the minimum serialisation number. */
+	if (!trx->read_only) {
+		UT_LIST_ADD_LAST(trx_sys->serialisation_list, trx);
+		added_trx_no = true;
+	} else {
+		added_trx_no = false;
+	}
+
 	/* If the rollack segment is not empty then the
 	new trx_t::no can't be less than any trx_t::no
 	already in the rollback segment. User threads only
 	produce events when a rollback segment is empty. */
+	if ((redo_rseg != NULL && redo_rseg->last_page_no == FIL_NULL)
+	    || (noredo_rseg != NULL && noredo_rseg->last_page_no == FIL_NULL)) {
 
-	if (rseg->last_page_no == FIL_NULL) {
-		void*		ptr;
-		rseg_queue_t	rseg_queue;
+		TrxUndoRsegs	elem(trx->no);
 
-		rseg_queue.rseg = rseg;
-		rseg_queue.trx_no = trx->no;
+		if (redo_rseg != NULL && redo_rseg->last_page_no == FIL_NULL) {
+			elem.push_back(redo_rseg);
+		}
 
-		mutex_enter(&purge_sys->bh_mutex);
+		if (noredo_rseg != NULL
+		    && noredo_rseg->last_page_no == FIL_NULL) {
+
+			elem.push_back(noredo_rseg);
+		}
+
+		mutex_enter(&purge_sys->pq_mutex);
 
 		/* This is to reduce the pressure on the trx_sys_t::mutex
 		though in reality it should make very little (read no)
 		difference because this code path is only taken when the
 		rbs is empty. */
 
-		mutex_exit(&trx_sys->mutex);
+		trx_sys_mutex_exit();
 
-		ptr = ib_bh_push(purge_sys->ib_bh, &rseg_queue);
-		ut_a(ptr);
+		purge_sys->purge_queue->push(elem);
 
-		mutex_exit(&purge_sys->bh_mutex);
+		mutex_exit(&purge_sys->pq_mutex);
 	} else {
-		mutex_exit(&trx_sys->mutex);
+		trx_sys_mutex_exit();
 	}
+
+	return(added_trx_no);
 }
 
 /****************************************************************//**
 Assign the transaction its history serialisation number and write the
-update UNDO log record to the assigned rollback segment. */
-static MY_ATTRIBUTE((nonnull))
-void
+update UNDO log record to the assigned rollback segment.
+@return true if a serialisation log was written */
+bool
 trx_write_serialisation_history(
 /*============================*/
 	trx_t*		trx,	/*!< in/out: transaction */
 	mtr_t*		mtr)	/*!< in/out: mini-transaction */
 {
-#ifdef WITH_WSREP
-        trx_sysf_t* sys_header;
-#endif /* WITH_WSREP */
-	trx_rseg_t*	rseg;
+	trx_sysf_t* sys_header = NULL;
 
-	rseg = trx->rseg;
+	/* Change the undo log segment states from TRX_UNDO_ACTIVE to some
+	other state: these modifications to the file data structure define
+	the transaction as committed in the file based domain, at the
+	serialization point of the log sequence number lsn obtained below. */
 
-	/* Change the undo log segment states from TRX_UNDO_ACTIVE
-	to some other state: these modifications to the file data
-	structure define the transaction as committed in the file
-	based domain, at the serialization point of the log sequence
-	number lsn obtained below. */
+	/* We have to hold the rseg mutex because update log headers have
+	to be put to the history list in the (serialisation) order of the
+	UNDO trx number. This is required for the purge in-memory data
+	structures too. */
 
-	if (trx->update_undo != NULL) {
-		page_t*		undo_hdr_page;
-		trx_undo_t*	undo = trx->update_undo;
+	bool	own_redo_rseg_mutex = false;
+	bool	own_noredo_rseg_mutex = false;
 
-		/* We have to hold the rseg mutex because update
-		log headers have to be put to the history list in the
-		(serialisation) order of the UNDO trx number. This is
-		required for the purge in-memory data structures too. */
+	/* Get rollback segment mutex. */
+	if (trx->rsegs.m_redo.rseg != NULL && trx_is_redo_rseg_updated(trx)) {
 
-		mutex_enter(&rseg->mutex);
-
-		/* Assign the transaction serialisation number and also
-		update the purge min binary heap if this is the first
-		UNDO log being written to the assigned rollback segment. */
-
-		trx_serialisation_number_get(trx);
-
-		/* It is not necessary to obtain trx->undo_mutex here
-		because only a single OS thread is allowed to do the
-		transaction commit for this transaction. */
-
-		undo_hdr_page = trx_undo_set_state_at_finish(undo, mtr);
-
-		trx_undo_update_cleanup(trx, undo_hdr_page, mtr);
-	} else {
-		mutex_enter(&rseg->mutex);
+		mutex_enter(&trx->rsegs.m_redo.rseg->mutex);
+		own_redo_rseg_mutex = true;
 	}
 
-	if (trx->insert_undo != NULL) {
-		trx_undo_set_state_at_finish(trx->insert_undo, mtr);
+	mtr_t	temp_mtr;
+
+	if (trx->rsegs.m_noredo.rseg != NULL
+	    && trx_is_noredo_rseg_updated(trx)) {
+
+		mutex_enter(&trx->rsegs.m_noredo.rseg->mutex);
+		own_noredo_rseg_mutex = true;
+		mtr_start(&temp_mtr);
+		temp_mtr.set_log_mode(MTR_LOG_NO_REDO);
 	}
 
-	mutex_exit(&rseg->mutex);
+	/* If transaction involves insert then truncate undo logs. */
+	if (trx->rsegs.m_redo.insert_undo != NULL) {
+		trx_undo_set_state_at_finish(
+			trx->rsegs.m_redo.insert_undo, mtr);
+	}
+
+	if (trx->rsegs.m_noredo.insert_undo != NULL) {
+		trx_undo_set_state_at_finish(
+			trx->rsegs.m_noredo.insert_undo, &temp_mtr);
+	}
+
+	bool	serialised = false;
+
+	/* If transaction involves update then add rollback segments
+	to purge queue. */
+	if (trx->rsegs.m_redo.update_undo != NULL
+	    || trx->rsegs.m_noredo.update_undo != NULL) {
+
+		/* Assign the transaction serialisation number and add these
+		rollback segments to purge trx-no sorted priority queue
+		if this is the first UNDO log being written to assigned
+		rollback segments. */
+
+		trx_undo_ptr_t* redo_rseg_undo_ptr =
+			trx->rsegs.m_redo.update_undo != NULL
+			? &trx->rsegs.m_redo : NULL;
+
+		trx_undo_ptr_t* noredo_rseg_undo_ptr =
+			trx->rsegs.m_noredo.update_undo != NULL
+			? &trx->rsegs.m_noredo : NULL;
+
+		/* Will set trx->no and will add rseg to purge queue. */
+		serialised = trx_serialisation_number_get(
+			trx, redo_rseg_undo_ptr, noredo_rseg_undo_ptr);
+
+		/* It is not necessary to obtain trx->undo_mutex here because
+		only a single OS thread is allowed to do the transaction commit
+		for this transaction. */
+		if (trx->rsegs.m_redo.update_undo != NULL) {
+
+			page_t*		undo_hdr_page;
+
+			undo_hdr_page = trx_undo_set_state_at_finish(
+				trx->rsegs.m_redo.update_undo, mtr);
+
+			/* Delay update of rseg_history_len if we plan to add
+			non-redo update_undo too. This is to avoid immediate
+			invocation of purge as we need to club these 2 segments
+			with same trx-no as single unit. */
+			bool update_rseg_len =
+				!(trx->rsegs.m_noredo.update_undo != NULL);
+
+			trx_undo_update_cleanup(
+				trx, &trx->rsegs.m_redo, undo_hdr_page,
+				update_rseg_len, (update_rseg_len ? 1 : 0),
+				mtr);
+		}
+
+		DBUG_EXECUTE_IF("ib_trx_crash_during_commit", DBUG_SUICIDE(););
+
+		if (trx->rsegs.m_noredo.update_undo != NULL) {
+			page_t*		undo_hdr_page;
+
+			undo_hdr_page = trx_undo_set_state_at_finish(
+				trx->rsegs.m_noredo.update_undo, &temp_mtr);
+
+			ulint n_added_logs =
+				(redo_rseg_undo_ptr != NULL) ? 2 : 1;
+
+			trx_undo_update_cleanup(
+				trx, &trx->rsegs.m_noredo, undo_hdr_page,
+				true, n_added_logs, &temp_mtr);
+		}
+	}
+
+	if (own_redo_rseg_mutex) {
+		mutex_exit(&trx->rsegs.m_redo.rseg->mutex);
+		own_redo_rseg_mutex = false;
+	}
+
+	if (own_noredo_rseg_mutex) {
+		mutex_exit(&trx->rsegs.m_noredo.rseg->mutex);
+		own_noredo_rseg_mutex = false;
+		mtr_commit(&temp_mtr);
+	}
 
 	MONITOR_INC(MONITOR_TRX_COMMIT_UNDO);
 
 #ifdef WITH_WSREP
 	sys_header = trx_sysf_get(mtr);
 	/* Update latest MySQL wsrep XID in trx sys header. */
-	if (wsrep_is_wsrep_xid(&trx->xid))
-	{
-		trx_sys_update_wsrep_checkpoint(&trx->xid, sys_header, mtr);
+	if (wsrep_is_wsrep_xid(trx->xid)) {
+		trx_sys_update_wsrep_checkpoint(trx->xid, sys_header, mtr);
 	}
 #endif /* WITH_WSREP */
 
@@ -1042,32 +1713,32 @@ trx_write_serialisation_history(
 	in trx sys header if MySQL binlogging is on or the database
 	server is a MySQL replication slave */
 
-	if (trx->mysql_log_file_name
+	if (trx->mysql_log_file_name != NULL
 	    && trx->mysql_log_file_name[0] != '\0') {
 
 		trx_sys_update_mysql_binlog_offset(
 			trx->mysql_log_file_name,
 			trx->mysql_log_offset,
-			TRX_SYS_MYSQL_LOG_INFO, 
-#ifdef WITH_WSREP
-                        sys_header,
-#endif /* WITH_WSREP */
+			TRX_SYS_MYSQL_LOG_INFO,
+			sys_header,
 			mtr);
 
 		trx->mysql_log_file_name = NULL;
 	}
+
+	return(serialised);
 }
 
 /********************************************************************
 Finalize a transaction containing updates for a FTS table. */
-static MY_ATTRIBUTE((nonnull))
+static
 void
 trx_finalize_for_fts_table(
 /*=======================*/
-        fts_trx_table_t*        ftt)            /* in: FTS trx table */
+	fts_trx_table_t*	ftt)	    /* in: FTS trx table */
 {
-	fts_t*                  fts = ftt->table->fts;
-	fts_doc_ids_t*          doc_ids = ftt->added_doc_ids;
+	fts_t*		  fts = ftt->table->fts;
+	fts_doc_ids_t*	  doc_ids = ftt->added_doc_ids;
 
 	mutex_enter(&fts->bg_threads_mutex);
 
@@ -1093,7 +1764,7 @@ trx_finalize_for_fts_table(
 
 /******************************************************************//**
 Finalize a transaction containing updates to FTS tables. */
-static MY_ATTRIBUTE((nonnull))
+static
 void
 trx_finalize_for_fts(
 /*=================*/
@@ -1138,32 +1809,34 @@ trx_flush_log_if_needed_low(
 	lsn_t	lsn)	/*!< in: lsn up to which logs are to be
 			flushed. */
 {
-       DBUG_ENTER("trx_flush_log_if_needed_low");
+#ifdef _WIN32
+	bool	flush = true;
+#else
+	bool	flush = srv_unix_file_flush_method != SRV_UNIX_NOSYNC;
+#endif /* _WIN32 */
+
 	switch (srv_flush_log_at_trx_commit) {
-	case 0:
-		/* Do nothing */
-		break;
-	case 1:
-        case 3:
-		/* Write the log and optionally flush it to disk */
-		log_write_up_to(lsn, LOG_WAIT_ONE_GROUP,
-				srv_unix_file_flush_method != SRV_UNIX_NOSYNC);
-		break;
+	case 3:
 	case 2:
 		/* Write the log but do not flush it to disk */
-		log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
-
-		break;
-	default:
-		ut_error;
+		flush = false;
+		/* fall through */
+	case 1:
+		/* Write the log and optionally flush it to disk */
+		log_write_up_to(lsn, flush);
+		return;
+	case 0:
+		/* Do nothing */
+		return;
 	}
-        DBUG_VOID_RETURN;
+
+	ut_error;
 }
 
 /**********************************************************************//**
 If required, flushes the log to disk based on the value of
 innodb_flush_log_at_trx_commit. */
-static MY_ATTRIBUTE((nonnull))
+static
 void
 trx_flush_log_if_needed(
 /*====================*/
@@ -1176,24 +1849,110 @@ trx_flush_log_if_needed(
 	trx->op_info = "";
 }
 
+/**********************************************************************//**
+For each table that has been modified by the given transaction: update
+its dict_table_t::update_time with the current timestamp. Clear the list
+of the modified tables at the end. */
+static
+void
+trx_update_mod_tables_timestamp(
+/*============================*/
+	trx_t*	trx)	/*!< in: transaction */
+{
+
+	ut_ad(trx->id != 0);
+
+	/* consider using trx->start_time if calling time() is too
+	expensive here */
+	time_t	now = ut_time();
+
+	trx_mod_tables_t::const_iterator	end = trx->mod_tables.end();
+
+	for (trx_mod_tables_t::const_iterator it = trx->mod_tables.begin();
+	     it != end;
+	     ++it) {
+
+		/* This could be executed by multiple threads concurrently
+		on the same table object. This is fine because time_t is
+		word size or less. And _purely_ _theoretically_, even if
+		time_t write is not atomic, likely the value of 'now' is
+		the same in all threads and even if it is not, getting a
+		"garbage" in table->update_time is justified because
+		protecting it with a latch here would be too performance
+		intrusive. */
+		(*it)->update_time = now;
+	}
+
+	trx->mod_tables.clear();
+}
+
+/**
+Erase the transaction from running transaction lists and serialization
+list. Active RW transaction list of a MVCC snapshot(ReadView::prepare)
+won't include this transaction after this call. All implicit locks are
+also released by this call as trx is removed from rw_trx_list.
+@param[in] trx		Transaction to erase, must have an ID > 0
+@param[in] serialised	true if serialisation log was written */
+static
+void
+trx_erase_lists(
+	trx_t*	trx,
+	bool	serialised)
+{
+	ut_ad(trx->id > 0);
+	trx_sys_mutex_enter();
+
+	if (serialised) {
+		UT_LIST_REMOVE(trx_sys->serialisation_list, trx);
+	}
+
+	trx_ids_t::iterator	it = std::lower_bound(
+		trx_sys->rw_trx_ids.begin(),
+		trx_sys->rw_trx_ids.end(),
+		trx->id);
+	ut_ad(*it == trx->id);
+	trx_sys->rw_trx_ids.erase(it);
+
+	if (trx->read_only || trx->rsegs.m_redo.rseg == NULL) {
+
+		ut_ad(!trx->in_rw_trx_list);
+	} else {
+
+		UT_LIST_REMOVE(trx_sys->rw_trx_list, trx);
+		ut_d(trx->in_rw_trx_list = false);
+		ut_ad(trx_sys_validate_trx_list());
+
+		if (trx->read_view != NULL) {
+			trx_sys->mvcc->view_close(trx->read_view, true);
+		}
+	}
+
+	trx_sys->rw_trx_set.erase(TrxTrack(trx->id));
+
+	trx_sys_mutex_exit();
+}
+
 /****************************************************************//**
 Commits a transaction in memory. */
-static MY_ATTRIBUTE((nonnull))
+static
 void
 trx_commit_in_memory(
 /*=================*/
-	trx_t*	trx,	/*!< in/out: transaction */
-	lsn_t	lsn)	/*!< in: log sequence number of the mini-transaction
-			commit of trx_write_serialisation_history(), or 0
-			if the transaction did not modify anything */
+	trx_t*		trx,	/*!< in/out: transaction */
+	const mtr_t*	mtr,	/*!< in: mini-transaction of
+				trx_write_serialisation_history(), or NULL if
+				the transaction did not modify anything */
+	bool		serialised)
+				/*!< in: true if serialisation log was
+				written */
 {
-	trx->must_flush_log_later = FALSE;
+	trx->must_flush_log_later = false;
 
 	if (trx_is_autocommit_non_locking(trx)) {
+		ut_ad(trx->id == 0);
 		ut_ad(trx->read_only);
 		ut_a(!trx->is_recovered);
-		ut_ad(trx->rseg == NULL);
-		ut_ad(!trx->in_ro_trx_list);
+		ut_ad(trx->rsegs.m_redo.rseg == NULL);
 		ut_ad(!trx->in_rw_trx_list);
 
 		/* Note: We are asserting without holding the lock mutex. But
@@ -1212,63 +1971,65 @@ trx_commit_in_memory(
 
 		ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
 
-		trx->state = TRX_STATE_NOT_STARTED;
-
-		read_view_remove(trx->global_read_view, false);
+		if (trx->read_view != NULL) {
+			trx_sys->mvcc->view_close(trx->read_view, false);
+		}
 
 		MONITOR_INC(MONITOR_TRX_NL_RO_COMMIT);
+
+		/* AC-NL-RO transactions can't be rolled back asynchronously. */
+		ut_ad(!trx->abort);
+		ut_ad(!(trx->in_innodb
+			& (TRX_FORCE_ROLLBACK | TRX_FORCE_ROLLBACK_ASYNC)));
+
+		trx->state = TRX_STATE_NOT_STARTED;
+
 	} else {
+
+		if (trx->id > 0) {
+			/* For consistent snapshot, we need to remove current
+			transaction from running transaction id list for mvcc
+			before doing commit and releasing locks. */
+			trx_erase_lists(trx, serialised);
+		}
+
 		lock_trx_release_locks(trx);
 
 		/* Remove the transaction from the list of active
 		transactions now that it no longer holds any user locks. */
 
 		ut_ad(trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY));
+		DEBUG_SYNC_C("after_trx_committed_in_memory");
 
-		mutex_enter(&trx_sys->mutex);
+		if (trx->read_only || trx->rsegs.m_redo.rseg == NULL) {
 
-		assert_trx_in_list(trx);
-
-		if (trx->read_only) {
-			UT_LIST_REMOVE(trx_list, trx_sys->ro_trx_list, trx);
-			ut_d(trx->in_ro_trx_list = FALSE);
 			MONITOR_INC(MONITOR_TRX_RO_COMMIT);
+			if (trx->read_view != NULL) {
+				trx_sys->mvcc->view_close(
+					trx->read_view, false);
+			}
+
 		} else {
-			UT_LIST_REMOVE(trx_list, trx_sys->rw_trx_list, trx);
-			ut_d(trx->in_rw_trx_list = FALSE);
+			ut_ad(trx->id > 0);
 			MONITOR_INC(MONITOR_TRX_RW_COMMIT);
 		}
-
-		/* If this transaction came from trx_allocate_for_mysql(),
-		trx->in_mysql_trx_list would hold. In that case, the
-		trx->state change must be protected by trx_sys->mutex, so that
-		lock_print_info_all_transactions() will have a consistent
-		view. */
-
-		trx->state = TRX_STATE_NOT_STARTED;
-
-		/* We already own the trx_sys_t::mutex, by doing it here we
-		avoid a potential context switch later. */
-		read_view_remove(trx->global_read_view, true);
-
-		ut_ad(trx_sys_validate_trx_list());
-
-		mutex_exit(&trx_sys->mutex);
 	}
 
-	if (trx->global_read_view != NULL) {
-
-		mem_heap_empty(trx->global_read_view_heap);
-
-		trx->global_read_view = NULL;
+	if (trx->rsegs.m_redo.rseg != NULL) {
+		trx_rseg_t*	rseg = trx->rsegs.m_redo.rseg;
+		mutex_enter(&rseg->mutex);
+		ut_ad(rseg->trx_ref_count > 0);
+		--rseg->trx_ref_count;
+		mutex_exit(&rseg->mutex);
 	}
 
-	trx->read_view = NULL;
+	if (mtr != NULL) {
+		if (trx->rsegs.m_redo.insert_undo != NULL) {
+			trx_undo_insert_cleanup(&trx->rsegs.m_redo, false);
+		}
 
-	if (lsn) {
-		if (trx->insert_undo != NULL) {
-
-			trx_undo_insert_cleanup(trx);
+		if (trx->rsegs.m_noredo.insert_undo != NULL) {
+			trx_undo_insert_cleanup(&trx->rsegs.m_noredo, true);
 		}
 
 		/* NOTE that we could possibly make a group commit more
@@ -1299,10 +2060,16 @@ trx_commit_in_memory(
 		mutex would serialize all commits and prevent a group of
 		transactions from gathering. */
 
-		if (trx->flush_log_later) {
+		lsn_t	lsn = mtr->commit_lsn();
+
+		if (lsn == 0) {
+			/* Nothing to be done. */
+		} else if (trx->flush_log_later) {
 			/* Do nothing yet */
-			trx->must_flush_log_later = TRUE;
-		} else if (srv_flush_log_at_trx_commit == 0) {
+			trx->must_flush_log_later = true;
+		} else if (srv_flush_log_at_trx_commit == 0
+			   || thd_requested_durability(trx->mysql_thd)
+			   == HA_IGNORE_DURABILITY) {
 			/* Do nothing */
 		} else {
 			trx_flush_log_if_needed(lsn, trx);
@@ -1317,54 +2084,51 @@ trx_commit_in_memory(
 		srv_active_wake_master_thread();
 	}
 
-	/* undo_no is non-zero if we're doing the final commit. */
-	bool			not_rollback = trx->undo_no != 0;
 	/* Free all savepoints, starting from the first. */
 	trx_named_savept_t*	savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
+
 	trx_roll_savepoints_free(trx, savep);
 
-	trx->rseg = NULL;
-	trx->undo_no = 0;
-	trx->last_sql_stat_start.least_undo_no = 0;
-
-	trx->ddl = false;
-#ifdef UNIV_DEBUG
-	ut_ad(trx->start_file != 0);
-	ut_ad(trx->start_line != 0);
-	trx->start_file = 0;
-	trx->start_line = 0;
-#endif /* UNIV_DEBUG */
-
-	trx->will_lock = 0;
-	trx->read_only = FALSE;
-	trx->auto_commit = FALSE;
-
-        if (trx->fts_trx) {
-                trx_finalize_for_fts(trx, not_rollback);
+        if (trx->fts_trx != NULL) {
+                trx_finalize_for_fts(trx, trx->undo_no != 0);
         }
 
-	ut_ad(trx->lock.wait_thr == NULL);
-	ut_ad(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
-	ut_ad(!trx->in_ro_trx_list);
-	ut_ad(!trx->in_rw_trx_list);
+	trx_mutex_enter(trx);
+	trx->dict_operation = TRX_DICT_OP_NONE;
 
 #ifdef WITH_WSREP
 	if (wsrep_on(trx->mysql_thd)) {
 		trx->lock.was_chosen_as_deadlock_victim = FALSE;
 	}
 #endif
-	trx->dict_operation = TRX_DICT_OP_NONE;
 
-	trx->error_state = DB_SUCCESS;
+	/* Because we can rollback transactions asynchronously, we change
+	the state at the last step. trx_t::abort cannot change once commit
+	or rollback has started because we will have released the locks by
+	the time we get here. */
+
+	if (trx->abort) {
+
+		trx->abort = false;
+		trx->state = TRX_STATE_FORCED_ROLLBACK;
+	} else {
+		trx->state = TRX_STATE_NOT_STARTED;
+	}
 
 	/* trx->in_mysql_trx_list would hold between
 	trx_allocate_for_mysql() and trx_free_for_mysql(). It does not
 	hold for recovered transactions or system transactions. */
+	assert_trx_is_free(trx);
+
+	trx_init(trx);
+
+	trx_mutex_exit(trx);
+
+	ut_a(trx->error_state == DB_SUCCESS);
 }
 
 /****************************************************************//**
 Commits a transaction and a mini-transaction. */
-UNIV_INTERN
 void
 trx_commit_low(
 /*===========*/
@@ -1372,15 +2136,13 @@ trx_commit_low(
 	mtr_t*	mtr)	/*!< in/out: mini-transaction (will be committed),
 			or NULL if trx made no modifications */
 {
-	lsn_t	lsn;
-
 	assert_trx_nonlocking_or_in_list(trx);
 	ut_ad(!trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY));
-	ut_ad(!mtr || mtr->state == MTR_ACTIVE);
-	ut_ad(!mtr == !(trx->insert_undo || trx->update_undo));
+	ut_ad(!mtr || mtr->is_active());
+	ut_ad(!mtr == !(trx_is_rseg_updated(trx)));
 
 	/* undo_no is non-zero if we're doing the final commit. */
-	if (trx->fts_trx && trx->undo_no != 0) {
+	if (trx->fts_trx != NULL && trx->undo_no != 0) {
 		dberr_t	error;
 
 		ut_a(!trx_is_autocommit_non_locking(trx));
@@ -1401,8 +2163,14 @@ trx_commit_low(
 		}
 	}
 
-	if (mtr) {
-		trx_write_serialisation_history(trx, mtr);
+	bool	serialised;
+
+	if (mtr != NULL) {
+
+		mtr->set_sync();
+
+		serialised = trx_write_serialisation_history(trx, mtr);
+
 		/* The following call commits the mini-transaction, making the
 		whole transaction committed in the file-based world, at this
 		log sequence number. The transaction becomes 'durable' when
@@ -1422,30 +2190,52 @@ trx_commit_low(
 
 		/*--------------*/
 		mtr_commit(mtr);
-		/*--------------*/
-		lsn = mtr->end_lsn;
-	} else {
-		lsn = 0;
-	}
 
-	trx_commit_in_memory(trx, lsn);
+		DBUG_EXECUTE_IF("ib_crash_during_trx_commit_in_mem",
+				if (trx_is_rseg_updated(trx)) {
+					log_make_checkpoint_at(LSN_MAX, TRUE);
+					DBUG_SUICIDE();
+				});
+		/*--------------*/
+
+	} else {
+		serialised = false;
+	}
+#ifndef DBUG_OFF
+	/* In case of this function is called from a stack executing
+	   THD::release_resources -> ...
+              innobase_connection_close() ->
+                     trx_rollback_for_mysql... -> .
+           mysql's thd does not seem to have
+           thd->debug_sync_control defined any longer. However the stack
+           is possible only with a prepared trx not updating any data.
+        */
+	if (trx->mysql_thd != NULL && trx_is_redo_rseg_updated(trx)) {
+		DEBUG_SYNC_C("before_trx_state_committed_in_memory");
+	}
+#endif
+
+	trx_commit_in_memory(trx, mtr, serialised);
 }
 
 /****************************************************************//**
 Commits a transaction. */
-UNIV_INTERN
 void
 trx_commit(
 /*=======*/
 	trx_t*	trx)	/*!< in/out: transaction */
 {
-	mtr_t	local_mtr;
 	mtr_t*	mtr;
+	mtr_t	local_mtr;
 
-	if (trx->insert_undo || trx->update_undo) {
+	DBUG_EXECUTE_IF("ib_trx_commit_crash_before_trx_commit_start",
+			DBUG_SUICIDE(););
+
+	if (trx_is_rseg_updated(trx)) {
 		mtr = &local_mtr;
-		mtr_start(mtr);
+		mtr_start_sync(mtr);
 	} else {
+
 		mtr = NULL;
 	}
 
@@ -1456,7 +2246,6 @@ trx_commit(
 Cleans up a transaction at database startup. The cleanup is needed if
 the transaction already got to the middle of a commit when the database
 crashed, and we cannot roll it back. */
-UNIV_INTERN
 void
 trx_cleanup_at_db_startup(
 /*======================*/
@@ -1464,31 +2253,33 @@ trx_cleanup_at_db_startup(
 {
 	ut_ad(trx->is_recovered);
 
-	if (trx->insert_undo != NULL) {
+	/* At db start-up there shouldn't be any active trx on temp-table
+	that needs insert_cleanup as temp-table are not visible on
+	restart and temporary rseg is re-created. */
+	if (trx->rsegs.m_redo.insert_undo != NULL) {
 
-		trx_undo_insert_cleanup(trx);
+		trx_undo_insert_cleanup(&trx->rsegs.m_redo, false);
 	}
 
-	trx->rseg = NULL;
+	memset(&trx->rsegs, 0x0, sizeof(trx->rsegs));
 	trx->undo_no = 0;
+	trx->undo_rseg_space = 0;
 	trx->last_sql_stat_start.least_undo_no = 0;
 
-	mutex_enter(&trx_sys->mutex);
+	trx_sys_mutex_enter();
 
 	ut_a(!trx->read_only);
 
-	UT_LIST_REMOVE(trx_list, trx_sys->rw_trx_list, trx);
+	UT_LIST_REMOVE(trx_sys->rw_trx_list, trx);
 
-	assert_trx_in_rw_list(trx);
 	ut_d(trx->in_rw_trx_list = FALSE);
 
-	mutex_exit(&trx_sys->mutex);
+	trx_sys_mutex_exit();
 
 	/* Change the transaction state without mutex protection, now
 	that it no longer is in the trx_list. Recovered transactions
 	are never placed in the mysql_trx_list. */
 	ut_ad(trx->is_recovered);
-	ut_ad(!trx->in_ro_trx_list);
 	ut_ad(!trx->in_rw_trx_list);
 	ut_ad(!trx->in_mysql_trx_list);
 	trx->state = TRX_STATE_NOT_STARTED;
@@ -1498,25 +2289,21 @@ trx_cleanup_at_db_startup(
 Assigns a read view for a consistent read query. All the consistent reads
 within the same transaction will get the same read view, which is created
 when this function is first called for a new started transaction.
-@return	consistent read view */
-UNIV_INTERN
-read_view_t*
+@return consistent read view */
+ReadView*
 trx_assign_read_view(
 /*=================*/
-	trx_t*	trx)	/*!< in: active transaction */
+	trx_t*		trx)	/*!< in/out: active transaction */
 {
 	ut_ad(trx->state == TRX_STATE_ACTIVE);
 
-	if (trx->read_view != NULL) {
-		return(trx->read_view);
-	}
+	if (srv_read_only_mode) {
 
-	if (!trx->read_view) {
+		ut_ad(trx->read_view == NULL);
+		return(NULL);
 
-		trx->read_view = read_view_open_now(
-			trx->id, trx->global_read_view_heap);
-
-		trx->global_read_view = trx->read_view;
+	} else if (!MVCC::is_view_active(trx->read_view)) {
+		trx_sys->mvcc->view_open(trx->read_view, trx);
 	}
 
 	return(trx->read_view);
@@ -1524,7 +2311,6 @@ trx_assign_read_view(
 
 /****************************************************************//**
 Prepares a transaction for commit/rollback. */
-UNIV_INTERN
 void
 trx_commit_or_rollback_prepare(
 /*===========================*/
@@ -1537,14 +2323,14 @@ trx_commit_or_rollback_prepare(
 
 	switch (trx->state) {
 	case TRX_STATE_NOT_STARTED:
-#ifdef WITH_WSREP
-		ut_d(trx->start_file = __FILE__);
-		ut_d(trx->start_line = __LINE__);
-#endif /* WITH_WSREP */
-		trx_start_low(trx);
+	case TRX_STATE_FORCED_ROLLBACK:
+
+		trx_start_low(trx, true);
 		/* fall through */
+
 	case TRX_STATE_ACTIVE:
 	case TRX_STATE_PREPARED:
+
 		/* If the trx is in a lock wait state, moves the waiting
 		query thread to the suspended state */
 
@@ -1559,6 +2345,7 @@ trx_commit_or_rollback_prepare(
 
 		ut_a(trx->lock.n_active_thrs == 1);
 		return;
+
 	case TRX_STATE_COMMITTED_IN_MEMORY:
 		break;
 	}
@@ -1568,8 +2355,7 @@ trx_commit_or_rollback_prepare(
 
 /*********************************************************************//**
 Creates a commit command node struct.
-@return	own: commit node struct */
-UNIV_INTERN
+@return own: commit node struct */
 commit_node_t*
 trx_commit_node_create(
 /*===================*/
@@ -1586,8 +2372,7 @@ trx_commit_node_create(
 
 /***********************************************************//**
 Performs an execution step for a commit type node in a query graph.
-@return	query thread to run next, or NULL */
-UNIV_INTERN
+@return query thread to run next, or NULL */
 que_thr_t*
 trx_commit_step(
 /*============*/
@@ -1637,41 +2422,44 @@ trx_commit_step(
 
 /**********************************************************************//**
 Does the transaction commit for MySQL.
-@return	DB_SUCCESS or error number */
-UNIV_INTERN
+@return DB_SUCCESS or error number */
 dberr_t
 trx_commit_for_mysql(
 /*=================*/
 	trx_t*	trx)	/*!< in/out: transaction */
 {
+	TrxInInnoDB	trx_in_innodb(trx, true);
+
+	if (trx_in_innodb.is_aborted()
+	    && trx->killed_by != os_thread_get_curr_id()) {
+
+		return(DB_FORCED_ABORT);
+	}
+
 	/* Because we do not do the commit by sending an Innobase
 	sig to the transaction, we must here make sure that trx has been
 	started. */
 
-	ut_a(trx);
-
 	switch (trx->state) {
 	case TRX_STATE_NOT_STARTED:
-		/* Update the info whether we should skip XA steps that eat
-		CPU time.
-
-		For the duration of the transaction trx->support_xa is
-		not reread from thd so any changes in the value take
-		effect in the next transaction. This is to avoid a
-		scenario where some undo log records generated by a
-		transaction contain XA information and other undo log
-		records, generated by the same transaction do not. */
-		trx->support_xa = thd_supports_xa(trx->mysql_thd);
+	case TRX_STATE_FORCED_ROLLBACK:
 
 		ut_d(trx->start_file = __FILE__);
 		ut_d(trx->start_line = __LINE__);
 
-		trx_start_low(trx);
+		trx_start_low(trx, true);
 		/* fall through */
 	case TRX_STATE_ACTIVE:
 	case TRX_STATE_PREPARED:
+
 		trx->op_info = "committing";
+
+		if (trx->id != 0) {
+			trx_update_mod_tables_timestamp(trx);
+		}
+
 		trx_commit(trx);
+
 		MONITOR_DEC(MONITOR_TRX_ACTIVE);
 		trx->op_info = "";
 		return(DB_SUCCESS);
@@ -1685,29 +2473,26 @@ trx_commit_for_mysql(
 /**********************************************************************//**
 If required, flushes the log to disk if we called trx_commit_for_mysql()
 with trx->flush_log_later == TRUE. */
-UNIV_INTERN
 void
 trx_commit_complete_for_mysql(
 /*==========================*/
 	trx_t*	trx)	/*!< in/out: transaction */
 {
-	ut_a(trx);
-        DBUG_ENTER("trx_commit_complete_for_mysql");
+	if (trx->id != 0
+	    || !trx->must_flush_log_later
+	    || thd_requested_durability(trx->mysql_thd)
+	       == HA_IGNORE_DURABILITY) {
 
-	if (!trx->must_flush_log_later
-	    || (srv_flush_log_at_trx_commit == 1 && trx->active_commit_ordered)) {
-		DBUG_VOID_RETURN;
+		return;
 	}
 
 	trx_flush_log_if_needed(trx->commit_lsn, trx);
 
-	trx->must_flush_log_later = FALSE;
-	DBUG_VOID_RETURN;
+	trx->must_flush_log_later = false;
 }
 
 /**********************************************************************//**
 Marks the latest SQL statement ended. */
-UNIV_INTERN
 void
 trx_mark_sql_stat_end(
 /*==================*/
@@ -1720,12 +2505,14 @@ trx_mark_sql_stat_end(
 	case TRX_STATE_COMMITTED_IN_MEMORY:
 		break;
 	case TRX_STATE_NOT_STARTED:
+	case TRX_STATE_FORCED_ROLLBACK:
 		trx->undo_no = 0;
+		trx->undo_rseg_space = 0;
 		/* fall through */
 	case TRX_STATE_ACTIVE:
 		trx->last_sql_stat_start.least_undo_no = trx->undo_no;
 
-		if (trx->fts_trx) {
+		if (trx->fts_trx != NULL) {
 			fts_savepoint_laststmt_refresh(trx);
 		}
 
@@ -1738,7 +2525,6 @@ trx_mark_sql_stat_end(
 /**********************************************************************//**
 Prints info about a transaction.
 Caller must hold trx_sys->mutex. */
-UNIV_INTERN
 void
 trx_print_low(
 /*==========*/
@@ -1759,9 +2545,9 @@ trx_print_low(
 	ibool		newline;
 	const char*	op_info;
 
-	ut_ad(mutex_own(&trx_sys->mutex));
+	ut_ad(trx_sys_mutex_own());
 
-	fprintf(f, "TRANSACTION " TRX_ID_FMT, trx->id);
+	fprintf(f, "TRANSACTION " TRX_ID_FMT, trx_get_id_for_print(trx));
 
 	/* trx->state cannot change from or to NOT_STARTED while we
 	are holding the trx_sys->mutex. It may change from ACTIVE to
@@ -1770,6 +2556,9 @@ trx_print_low(
 	case TRX_STATE_NOT_STARTED:
 		fputs(", not started", f);
 		goto state_ok;
+	case TRX_STATE_FORCED_ROLLBACK:
+		fputs(", forced rollback", f);
+		goto state_ok;
 	case TRX_STATE_ACTIVE:
 		fprintf(f, ", ACTIVE %lu sec",
 			(ulong) difftime(time(NULL), trx->start_time));
@@ -1854,7 +2643,7 @@ state_ok:
 		putc('\n', f);
 	}
 
-	if (trx->mysql_thd != NULL) {
+	if (trx->state != TRX_STATE_NOT_STARTED && trx->mysql_thd != NULL) {
 		innobase_mysql_print_thd(
 			f, trx->mysql_thd, static_cast<uint>(max_query_len));
 	}
@@ -1864,7 +2653,6 @@ state_ok:
 Prints info about a transaction.
 The caller must hold lock_sys->mutex and trx_sys->mutex.
 When possible, use trx_print() instead. */
-UNIV_INTERN
 void
 trx_print_latched(
 /*==============*/
@@ -1874,7 +2662,7 @@ trx_print_latched(
 					or 0 to use the default max length */
 {
 	ut_ad(lock_mutex_own());
-	ut_ad(mutex_own(&trx_sys->mutex));
+	ut_ad(trx_sys_mutex_own());
 
 	trx_print_low(f, trx, max_query_len,
 		      lock_number_of_rows_locked(&trx->lock),
@@ -1885,7 +2673,6 @@ trx_print_latched(
 /**********************************************************************//**
 Prints info about a transaction.
 Acquires and releases lock_sys->mutex and trx_sys->mutex. */
-UNIV_INTERN
 void
 trx_print(
 /*======*/
@@ -1905,8 +2692,10 @@ trx_print(
 	lock_mutex_exit();
 
 	mutex_enter(&trx_sys->mutex);
+
 	trx_print_low(f, trx, max_query_len,
 		      n_rec_locks, n_trx_locks, heap_size);
+
 	mutex_exit(&trx_sys->mutex);
 }
 
@@ -1915,17 +2704,16 @@ trx_print(
 Asserts that a transaction has been started.
 The caller must hold trx_sys->mutex.
 @return TRUE if started */
-UNIV_INTERN
 ibool
 trx_assert_started(
 /*===============*/
 	const trx_t*	trx)	/*!< in: transaction */
 {
-	ut_ad(mutex_own(&trx_sys->mutex));
+	ut_ad(trx_sys_mutex_own());
 
 	/* Non-locking autocommits should not hold any locks and this
 	function is only called from the locking code. */
-	assert_trx_in_list(trx);
+	check_trx_state(trx);
 
 	/* trx->state can change from or to NOT_STARTED while we are holding
 	trx_sys->mutex for non-locking autocommit selects but not for other
@@ -1941,6 +2729,7 @@ trx_assert_started(
 		return(TRUE);
 
 	case TRX_STATE_NOT_STARTED:
+	case TRX_STATE_FORCED_ROLLBACK:
 		break;
 	}
 
@@ -1950,41 +2739,102 @@ trx_assert_started(
 #endif /* UNIV_DEBUG */
 
 /*******************************************************************//**
-Compares the "weight" (or size) of two transactions. The heavier the weight,
-the more reluctant we will be to choose the transaction as a deadlock victim.
-@return	TRUE if weight(a) >= weight(b) */
-UNIV_INTERN
-ibool
+Compares the "weight" (or size) of two transactions. Transactions that
+have edited non-transactional tables are considered heavier than ones
+that have not.
+@return TRUE if weight(a) >= weight(b) */
+bool
 trx_weight_ge(
 /*==========*/
-	const trx_t*	a,	/*!< in: the first transaction to be compared */
-	const trx_t*	b)	/*!< in: the second transaction to be compared */
+	const trx_t*	a,	/*!< in: transaction to be compared */
+	const trx_t*	b)	/*!< in: transaction to be compared */
 {
-	int pref;
+	ibool	a_notrans_edit;
+	ibool	b_notrans_edit;
 
-	/* First ask the upper server layer if it has any preference for which
-	to prefer as a deadlock victim. */
-	pref= thd_deadlock_victim_preference(a->mysql_thd, b->mysql_thd);
-	if (pref < 0) {
-		return FALSE;
-	} else if (pref > 0) {
-		return TRUE;
+	/* If mysql_thd is NULL for a transaction we assume that it has
+	not edited non-transactional tables. */
+
+	a_notrans_edit = a->mysql_thd != NULL
+		&& thd_has_edited_nontrans_tables(a->mysql_thd);
+
+	b_notrans_edit = b->mysql_thd != NULL
+		&& thd_has_edited_nontrans_tables(b->mysql_thd);
+
+	if (a_notrans_edit != b_notrans_edit) {
+
+		return(a_notrans_edit);
 	}
 
-	/* Upper server layer had no preference, we fall back to comparing the
-	number of altered/locked rows. */
-
-#if 0
-	fprintf(stderr,
-		"%s TRX_WEIGHT(a): %lld+%lu, TRX_WEIGHT(b): %lld+%lu\n",
-		__func__,
-		a->undo_no, UT_LIST_GET_LEN(a->lock.trx_locks),
-		b->undo_no, UT_LIST_GET_LEN(b->lock.trx_locks));
-#endif
+	/* Either both had edited non-transactional tables or both had
+	not, we fall back to comparing the number of altered/locked
+	rows. */
 
 	return(TRX_WEIGHT(a) >= TRX_WEIGHT(b));
 }
 
+/****************************************************************//**
+Prepares a transaction for given rollback segment.
+@return lsn_t: lsn assigned for commit of scheduled rollback segment */
+static
+lsn_t
+trx_prepare_low(
+/*============*/
+	trx_t*		trx,		/*!< in/out: transaction */
+	trx_undo_ptr_t*	undo_ptr,	/*!< in/out: pointer to rollback
+					segment scheduled for prepare. */
+	bool		noredo_logging)	/*!< in: turn-off redo logging. */
+{
+	lsn_t		lsn;
+
+	if (undo_ptr->insert_undo != NULL || undo_ptr->update_undo != NULL) {
+		mtr_t		mtr;
+		trx_rseg_t*	rseg = undo_ptr->rseg;
+
+		mtr_start_sync(&mtr);
+
+		if (noredo_logging) {
+			mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
+		}
+
+		/* Change the undo log segment states from TRX_UNDO_ACTIVE to
+		TRX_UNDO_PREPARED: these modifications to the file data
+		structure define the transaction as prepared in the file-based
+		world, at the serialization point of lsn. */
+
+		mutex_enter(&rseg->mutex);
+
+		if (undo_ptr->insert_undo != NULL) {
+
+			/* It is not necessary to obtain trx->undo_mutex here
+			because only a single OS thread is allowed to do the
+			transaction prepare for this transaction. */
+			trx_undo_set_state_at_prepare(
+				trx, undo_ptr->insert_undo, false, &mtr);
+		}
+
+		if (undo_ptr->update_undo != NULL) {
+			trx_undo_set_state_at_prepare(
+				trx, undo_ptr->update_undo, false, &mtr);
+		}
+
+		mutex_exit(&rseg->mutex);
+
+		/*--------------*/
+		/* This mtr commit makes the transaction prepared in
+		file-based world. */
+		mtr_commit(&mtr);
+		/*--------------*/
+
+		lsn = mtr.commit_lsn();
+		ut_ad(noredo_logging || lsn > 0);
+	} else {
+		lsn = 0;
+	}
+
+	return(lsn);
+}
+
 /****************************************************************//**
 Prepares a transaction. */
 static
@@ -1993,63 +2843,49 @@ trx_prepare(
 /*========*/
 	trx_t*	trx)	/*!< in/out: transaction */
 {
-	trx_rseg_t*	rseg;
-	lsn_t		lsn;
-	mtr_t		mtr;
+	/* This transaction has crossed the point of no return and cannot
+	be rolled back asynchronously now. It must commit or rollback
+	synhronously. */
+
+	lsn_t	lsn = 0;
 
-	rseg = trx->rseg;
 	/* Only fresh user transactions can be prepared.
 	Recovered transactions cannot. */
 	ut_a(!trx->is_recovered);
 
-	if (trx->insert_undo != NULL || trx->update_undo != NULL) {
+	if (trx->rsegs.m_redo.rseg != NULL && trx_is_redo_rseg_updated(trx)) {
 
-		mtr_start(&mtr);
+		lsn = trx_prepare_low(trx, &trx->rsegs.m_redo, false);
+	}
 
-		/* Change the undo log segment states from TRX_UNDO_ACTIVE
-		to TRX_UNDO_PREPARED: these modifications to the file data
-		structure define the transaction as prepared in the
-		file-based world, at the serialization point of lsn. */
+	DBUG_EXECUTE_IF("ib_trx_crash_during_xa_prepare_step", DBUG_SUICIDE(););
 
-		mutex_enter(&rseg->mutex);
+	if (trx->rsegs.m_noredo.rseg != NULL
+	    && trx_is_noredo_rseg_updated(trx)) {
 
-		if (trx->insert_undo != NULL) {
-
-			/* It is not necessary to obtain trx->undo_mutex here
-			because only a single OS thread is allowed to do the
-			transaction prepare for this transaction. */
-
-			trx_undo_set_state_at_prepare(trx, trx->insert_undo,
-						      &mtr);
-		}
-
-		if (trx->update_undo) {
-			trx_undo_set_state_at_prepare(
-				trx, trx->update_undo, &mtr);
-		}
-
-		mutex_exit(&rseg->mutex);
-
-		/*--------------*/
-		mtr_commit(&mtr);	/* This mtr commit makes the
-					transaction prepared in the file-based
-					world */
-		/*--------------*/
-		lsn = mtr.end_lsn;
-		ut_ad(lsn);
-	} else {
-		lsn = 0;
+		trx_prepare_low(trx, &trx->rsegs.m_noredo, true);
 	}
 
 	/*--------------------------------------*/
 	ut_a(trx->state == TRX_STATE_ACTIVE);
-	mutex_enter(&trx_sys->mutex);
+	trx_sys_mutex_enter();
 	trx->state = TRX_STATE_PREPARED;
 	trx_sys->n_prepared_trx++;
-	mutex_exit(&trx_sys->mutex);
+	trx_sys_mutex_exit();
 	/*--------------------------------------*/
 
-	if (lsn) {
+	switch (thd_requested_durability(trx->mysql_thd)) {
+	case HA_IGNORE_DURABILITY:
+		/* We set the HA_IGNORE_DURABILITY during prepare phase of
+		binlog group commit to not flush redo log for every transaction
+		here. So that we can flush prepared records of transactions to
+		redo log in a group right before writing them to binary log
+		during flush stage of binlog group commit. */
+		break;
+	case HA_REGULAR_DURABILITY:
+		if (lsn == 0) {
+			break;
+		}
 		/* Depending on the my.cnf options, we may now write the log
 		buffer to the log files, making the prepared state of the
 		transaction durable if the OS does not crash. We may also
@@ -2064,35 +2900,41 @@ trx_prepare(
 		there are > 2 users in the database. Then at least 2 users can
 		gather behind one doing the physical log write to disk.
 
-		TODO: find out if MySQL holds some mutex when calling this.
-		That would spoil our group prepare algorithm. */
+		We must not be holding any mutexes or latches here. */
 
 		trx_flush_log_if_needed(lsn, trx);
 	}
 }
 
-/**********************************************************************//**
-Does the transaction prepare for MySQL. */
-UNIV_INTERN
-void
-trx_prepare_for_mysql(
-/*==================*/
-	trx_t*	trx)	/*!< in/out: trx handle */
+/**
+Does the transaction prepare for MySQL.
+@param[in, out] trx		Transaction instance to prepare */
+dberr_t
+trx_prepare_for_mysql(trx_t* trx)
 {
-	trx_start_if_not_started_xa(trx);
+	trx_start_if_not_started_xa(trx, false);
+
+	TrxInInnoDB	trx_in_innodb(trx, true);
+
+	if (trx_in_innodb.is_aborted()
+	    && trx->killed_by != os_thread_get_curr_id()) {
+
+		return(DB_FORCED_ABORT);
+	}
 
 	trx->op_info = "preparing";
 
 	trx_prepare(trx);
 
 	trx->op_info = "";
+
+	return(DB_SUCCESS);
 }
 
 /**********************************************************************//**
 This function is used to find number of prepared transactions and
 their transaction objects for a recovery.
-@return	number of prepared transactions stored in xid_list */
-UNIV_INTERN
+@return number of prepared transactions stored in xid_list */
 int
 trx_recover_for_mysql(
 /*==================*/
@@ -2108,7 +2950,7 @@ trx_recover_for_mysql(
 	/* We should set those transactions which are in the prepared state
 	to the xid_list */
 
-	mutex_enter(&trx_sys->mutex);
+	trx_sys_mutex_enter();
 
 	for (trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
 	     trx != NULL;
@@ -2121,26 +2963,19 @@ trx_recover_for_mysql(
 		trx_sys->mutex. It may change to PREPARED, but not if
 		trx->is_recovered. It may also change to COMMITTED. */
 		if (trx_state_eq(trx, TRX_STATE_PREPARED)) {
-			xid_list[count] = trx->xid;
+			xid_list[count] = *trx->xid;
 
 			if (count == 0) {
-				ut_print_timestamp(stderr);
-				fprintf(stderr,
-					"  InnoDB: Starting recovery for"
-					" XA transactions...\n");
+				ib::info() << "Starting recovery for"
+					" XA transactions...";
 			}
 
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				"  InnoDB: Transaction " TRX_ID_FMT " in"
-				" prepared state after recovery\n",
-				trx->id);
+			ib::info() << "Transaction "
+				<< trx_get_id_for_print(trx)
+				<< " in prepared state after recovery";
 
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				"  InnoDB: Transaction contains changes"
-				" to " TRX_ID_FMT " rows\n",
-				trx->undo_no);
+			ib::info() << "Transaction contains changes to "
+				<< trx->undo_no << " rows";
 
 			count++;
 
@@ -2150,14 +2985,11 @@ trx_recover_for_mysql(
 		}
 	}
 
-	mutex_exit(&trx_sys->mutex);
+	trx_sys_mutex_exit();
 
 	if (count > 0){
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			"  InnoDB: %d transactions in prepared state"
-			" after recovery\n",
-			int (count));
+		ib::info() << count << " transactions in prepared state"
+			" after recovery";
 	}
 
 	return(int (count));
@@ -2166,19 +2998,19 @@ trx_recover_for_mysql(
 /*******************************************************************//**
 This function is used to find one X/Open XA distributed transaction
 which is in the prepared state
-@return	trx on match, the trx->xid will be invalidated;
+@return trx on match, the trx->xid will be invalidated;
 note that the trx may have been committed, unless the caller is
 holding lock_sys->mutex */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
+static MY_ATTRIBUTE((warn_unused_result))
 trx_t*
 trx_get_trx_by_xid_low(
 /*===================*/
-	const XID*	xid)		/*!< in: X/Open XA transaction
+	XID*	xid)		/*!< in: X/Open XA transaction
 					identifier */
 {
 	trx_t*		trx;
 
-	ut_ad(mutex_own(&trx_sys->mutex));
+	ut_ad(trx_sys_mutex_own());
 
 	for (trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
 	     trx != NULL;
@@ -2193,15 +3025,11 @@ trx_get_trx_by_xid_low(
 
 		if (trx->is_recovered
 		    && trx_state_eq(trx, TRX_STATE_PREPARED)
-		    && xid->gtrid_length == trx->xid.gtrid_length
-		    && xid->bqual_length == trx->xid.bqual_length
-		    && memcmp(xid->data, trx->xid.data,
-			      xid->gtrid_length + xid->bqual_length) == 0) {
+			&& xid->eq((XID*)trx->xid)) {
 
 			/* Invalidate the XID, so that subsequent calls
 			will not find it. */
-			memset(&trx->xid, 0, sizeof(trx->xid));
-			trx->xid.formatID = -1;
+			trx->xid->null();
 			break;
 		}
 	}
@@ -2212,14 +3040,13 @@ trx_get_trx_by_xid_low(
 /*******************************************************************//**
 This function is used to find one X/Open XA distributed transaction
 which is in the prepared state
-@return	trx or NULL; on match, the trx->xid will be invalidated;
+@return trx or NULL; on match, the trx->xid will be invalidated;
 note that the trx may have been committed, unless the caller is
 holding lock_sys->mutex */
-UNIV_INTERN
 trx_t*
 trx_get_trx_by_xid(
 /*===============*/
-	const XID*	xid)	/*!< in: X/Open XA transaction identifier */
+	XID*	xid)	/*!< in: X/Open XA transaction identifier */
 {
 	trx_t*	trx;
 
@@ -2228,42 +3055,43 @@ trx_get_trx_by_xid(
 		return(NULL);
 	}
 
-	mutex_enter(&trx_sys->mutex);
+	trx_sys_mutex_enter();
 
 	/* Recovered/Resurrected transactions are always only on the
 	trx_sys_t::rw_trx_list. */
-	trx = trx_get_trx_by_xid_low(xid);
+	trx = trx_get_trx_by_xid_low((XID*)xid);
 
-	mutex_exit(&trx_sys->mutex);
+	trx_sys_mutex_exit();
 
 	return(trx);
 }
 
 /*************************************************************//**
 Starts the transaction if it is not yet started. */
-UNIV_INTERN
 void
 trx_start_if_not_started_xa_low(
 /*============================*/
-	trx_t*	trx)	/*!< in: transaction */
+	trx_t*	trx,		/*!< in/out: transaction */
+	bool	read_write)	/*!< in: true if read write transaction */
 {
 	switch (trx->state) {
 	case TRX_STATE_NOT_STARTED:
+	case TRX_STATE_FORCED_ROLLBACK:
+		trx_start_low(trx, read_write);
+		return;
 
-		/* Update the info whether we should skip XA steps
-		that eat CPU time.
-
-		For the duration of the transaction trx->support_xa is
-		not reread from thd so any changes in the value take
-		effect in the next transaction. This is to avoid a
-		scenario where some undo generated by a transaction,
-		has XA stuff, and other undo, generated by the same
-		transaction, doesn't. */
-		trx->support_xa = thd_supports_xa(trx->mysql_thd);
-
-		trx_start_low(trx);
-		/* fall through */
 	case TRX_STATE_ACTIVE:
+		if (trx->id == 0 && read_write) {
+			/* If the transaction is tagged as read-only then
+			it can only write to temp tables and for such
+			transactions we don't want to move them to the
+			trx_sys_t::rw_trx_list. */
+			if (!trx->read_only) {
+				trx_set_rw_mode(trx);
+			} else if (!srv_read_only_mode) {
+				trx_assign_rseg(trx);
+			}
+		}
 		return;
 	case TRX_STATE_PREPARED:
 	case TRX_STATE_COMMITTED_IN_MEMORY:
@@ -2275,18 +3103,26 @@ trx_start_if_not_started_xa_low(
 
 /*************************************************************//**
 Starts the transaction if it is not yet started. */
-UNIV_INTERN
 void
 trx_start_if_not_started_low(
-/*=========================*/
-	trx_t*	trx)	/*!< in: transaction */
+/*==========================*/
+	trx_t*	trx,		/*!< in: transaction */
+	bool	read_write)	/*!< in: true if read write transaction */
 {
 	switch (trx->state) {
 	case TRX_STATE_NOT_STARTED:
-		trx_start_low(trx);
-		/* fall through */
-	case TRX_STATE_ACTIVE:
+	case TRX_STATE_FORCED_ROLLBACK:
+
+		trx_start_low(trx, read_write);
 		return;
+
+	case TRX_STATE_ACTIVE:
+
+		if (read_write && trx->id == 0 && !trx->read_only) {
+			trx_set_rw_mode(trx);
+		}
+		return;
+
 	case TRX_STATE_PREPARED:
 	case TRX_STATE_COMMITTED_IN_MEMORY:
 		break;
@@ -2295,9 +3131,41 @@ trx_start_if_not_started_low(
 	ut_error;
 }
 
+/*************************************************************//**
+Starts a transaction for internal processing. */
+void
+trx_start_internal_low(
+/*===================*/
+	trx_t*	trx)		/*!< in/out: transaction */
+{
+	/* Ensure it is not flagged as an auto-commit-non-locking
+	transaction. */
+
+	trx->will_lock = 1;
+
+	trx->internal = true;
+
+	trx_start_low(trx, true);
+}
+
+/** Starts a read-only transaction for internal processing.
+@param[in,out] trx	transaction to be started */
+void
+trx_start_internal_read_only_low(
+	trx_t*	trx)
+{
+	/* Ensure it is not flagged as an auto-commit-non-locking
+	transaction. */
+
+	trx->will_lock = 1;
+
+	trx->internal = true;
+
+	trx_start_low(trx, false);
+}
+
 /*************************************************************//**
 Starts the transaction for a DDL operation. */
-UNIV_INTERN
 void
 trx_start_for_ddl_low(
 /*==================*/
@@ -2306,6 +3174,8 @@ trx_start_for_ddl_low(
 {
 	switch (trx->state) {
 	case TRX_STATE_NOT_STARTED:
+	case TRX_STATE_FORCED_ROLLBACK:
+
 		/* Flag this transaction as a dictionary operation, so that
 		the data dictionary will be locked in crash recovery. */
 
@@ -2315,12 +3185,13 @@ trx_start_for_ddl_low(
 		transation. */
 		trx->will_lock = 1;
 
-		trx->ddl = true;
+		trx->ddl= true;
 
-		trx_start_low(trx);
+		trx_start_internal_low(trx);
 		return;
 
 	case TRX_STATE_ACTIVE:
+
 		/* We have this start if not started idiom, therefore we
 		can't add stronger checks here. */
 		trx->ddl = true;
@@ -2328,6 +3199,7 @@ trx_start_for_ddl_low(
 		ut_ad(trx->dict_operation != TRX_DICT_OP_NONE);
 		ut_ad(trx->will_lock > 0);
 		return;
+
 	case TRX_STATE_PREPARED:
 	case TRX_STATE_COMMITTED_IN_MEMORY:
 		break;
@@ -2336,3 +3208,212 @@ trx_start_for_ddl_low(
 	ut_error;
 }
 
+/*************************************************************//**
+Set the transaction as a read-write transaction if it is not already
+tagged as such. Read-only transactions that are writing to temporary
+tables are assigned an ID and a rollback segment but are not added
+to the trx read-write list because their updates should not be visible
+to other transactions and therefore their changes can be ignored by
+by MVCC. */
+void
+trx_set_rw_mode(
+/*============*/
+	trx_t*		trx)		/*!< in/out: transaction that is RW */
+{
+	ut_ad(trx->rsegs.m_redo.rseg == 0);
+	ut_ad(!trx->in_rw_trx_list);
+	ut_ad(!trx_is_autocommit_non_locking(trx));
+
+	if (srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO) {
+		return;
+	}
+
+	/* Function is promoting existing trx from ro mode to rw mode.
+	In this process it has acquired trx_sys->mutex as it plan to
+	move trx from ro list to rw list. If in future, some other thread
+	looks at this trx object while it is being promoted then ensure
+	that both threads are synced by acquring trx->mutex to avoid decision
+	based on in-consistent view formed during promotion. */
+
+	trx->rsegs.m_redo.rseg = trx_assign_rseg_low(
+		srv_undo_logs, srv_undo_tablespaces, TRX_RSEG_TYPE_REDO);
+
+	ut_ad(trx->rsegs.m_redo.rseg != 0);
+
+	mutex_enter(&trx_sys->mutex);
+
+	ut_ad(trx->id == 0);
+	trx->id = trx_sys_get_new_trx_id();
+
+	trx_sys->rw_trx_ids.push_back(trx->id);
+
+	trx_sys->rw_trx_set.insert(TrxTrack(trx->id, trx));
+
+	/* So that we can see our own changes. */
+	if (MVCC::is_view_active(trx->read_view)) {
+		MVCC::set_view_creator_trx_id(trx->read_view, trx->id);
+	}
+
+#ifdef UNIV_DEBUG
+	if (trx->id > trx_sys->rw_max_trx_id) {
+		trx_sys->rw_max_trx_id = trx->id;
+	}
+#endif /* UNIV_DEBUG */
+
+	if (!trx->read_only) {
+		UT_LIST_ADD_FIRST(trx_sys->rw_trx_list, trx);
+
+		ut_d(trx->in_rw_trx_list = true);
+	}
+
+	mutex_exit(&trx_sys->mutex);
+}
+
+/**
+Kill all transactions that are blocking this transaction from acquiring locks.
+@param[in,out] trx	High priority transaction */
+
+void
+trx_kill_blocking(trx_t* trx)
+{
+	if (trx->hit_list.empty()) {
+		return;
+	}
+
+	DEBUG_SYNC_C("trx_kill_blocking_enter");
+
+	ulint	had_dict_lock = trx->dict_operation_lock_mode;
+
+	switch (had_dict_lock) {
+	case 0:
+		break;
+
+	case RW_S_LATCH:
+		/* Release foreign key check latch */
+		row_mysql_unfreeze_data_dictionary(trx);
+		break;
+
+	default:
+		/* There should never be a lock wait when the
+		dictionary latch is reserved in X mode.  Dictionary
+		transactions should only acquire locks on dictionary
+		tables, not other tables. All access to dictionary
+		tables should be covered by dictionary
+		transactions. */
+		ut_error;
+	}
+
+	ut_a(trx->dict_operation_lock_mode == 0);
+
+	/** Kill the transactions in the lock acquisition order old -> new. */
+	hit_list_t::reverse_iterator	end = trx->hit_list.rend();
+
+	for (hit_list_t::reverse_iterator it = trx->hit_list.rbegin();
+	     it != end;
+	     ++it) {
+
+		trx_t*	victim_trx = it->m_trx;
+		ulint	version = it->m_version;
+
+		/* Shouldn't commit suicide. */
+		ut_ad(victim_trx != trx);
+		ut_ad(victim_trx->mysql_thd != trx->mysql_thd);
+
+		/* Check that the transaction isn't active inside
+		InnoDB code. We have to wait while it is executing
+		in the InnoDB context. This can potentially take a
+		long time */
+
+		trx_mutex_enter(victim_trx);
+		ut_ad(version <= victim_trx->version);
+
+		ulint	loop_count = 0;
+		/* start with optimistic sleep time of 20 micro seconds. */
+		ulint	sleep_time = 20;
+
+		while ((victim_trx->in_innodb & TRX_FORCE_ROLLBACK_MASK) > 0
+		       && victim_trx->version == version) {
+
+			trx_mutex_exit(victim_trx);
+
+			loop_count++;
+			/* If the wait is long, don't hog the cpu. */
+			if (loop_count < 100) {
+				/* 20 microseconds */
+				sleep_time = 20;
+			} else if (loop_count < 1000) {
+				/* 1 millisecond */
+				sleep_time = 1000;
+			} else {
+				/* 100 milliseconds */
+				sleep_time = 100000;
+			}
+
+			os_thread_sleep(sleep_time);
+
+			trx_mutex_enter(victim_trx);
+		}
+
+		/* Compare the version to check if the transaction has
+		already finished */
+		if (victim_trx->version != version) {
+			trx_mutex_exit(victim_trx);
+			continue;
+		}
+
+		/* We should never kill background transactions. */
+		ut_ad(victim_trx->mysql_thd != NULL);
+
+		ut_ad(!(trx->in_innodb & TRX_FORCE_ROLLBACK_DISABLE));
+		ut_ad(victim_trx->in_innodb & TRX_FORCE_ROLLBACK);
+		ut_ad(victim_trx->in_innodb & TRX_FORCE_ROLLBACK_ASYNC);
+		ut_ad(victim_trx->killed_by == os_thread_get_curr_id());
+		ut_ad(victim_trx->version == it->m_version);
+
+		/* We don't kill Read Only, Background or high priority
+		transactions. */
+		ut_a(!victim_trx->read_only);
+		ut_a(victim_trx->mysql_thd != NULL);
+
+		trx_mutex_exit(victim_trx);
+
+#ifdef UNIV_DEBUG
+		char		buffer[1024];
+		char*		thr_text;
+		trx_id_t	id;
+
+		thr_text = thd_get_error_context_description(victim_trx->mysql_thd,
+						buffer, sizeof(buffer),
+						512);
+		id = victim_trx->id;
+#endif /* UNIV_DEBUG */
+		trx_rollback_for_mysql(victim_trx);
+
+#ifdef UNIV_DEBUG
+		ib::info() << "High Priority Transaction (ID): "
+			   << trx->id << " killed transaction (ID): "
+			   << id << " in hit list"
+			   << " - " << thr_text;
+#endif /* UNIV_DEBUG */
+		trx_mutex_enter(victim_trx);
+
+		version++;
+		ut_ad(victim_trx->version == version);
+
+		os_thread_id_t	thread_id = victim_trx->killed_by;
+		os_compare_and_swap_thread_id(&victim_trx->killed_by,
+					      thread_id, 0);
+
+		victim_trx->in_innodb &= TRX_FORCE_ROLLBACK_MASK;
+
+		trx_mutex_exit(victim_trx);
+	}
+
+	trx->hit_list.clear();
+
+	if (had_dict_lock) {
+
+		row_mysql_freeze_data_dictionary(trx);
+	}
+
+}
diff --git a/storage/innobase/trx/trx0undo.cc b/storage/innobase/trx/trx0undo.cc
index cdd23726f2e..932151b948d 100644
--- a/storage/innobase/trx/trx0undo.cc
+++ b/storage/innobase/trx/trx0undo.cc
@@ -23,6 +23,8 @@ Transaction undo log
 Created 3/26/1996 Heikki Tuuri
 *******************************************************/
 
+#include "ha_prototypes.h"
+
 #include "trx0undo.h"
 
 #ifdef UNIV_NONINL
@@ -33,13 +35,13 @@ Created 3/26/1996 Heikki Tuuri
 #ifndef UNIV_HOTBACKUP
 #include "mach0data.h"
 #include "mtr0log.h"
-#include "trx0rseg.h"
-#include "trx0trx.h"
+#include "srv0mon.h"
 #include "srv0srv.h"
 #include "srv0start.h"
-#include "trx0rec.h"
 #include "trx0purge.h"
-#include "srv0mon.h"
+#include "trx0rec.h"
+#include "trx0rseg.h"
+#include "trx0trx.h"
 
 /* How should the old versions in the history list be managed?
    ----------------------------------------------------------
@@ -110,7 +112,7 @@ trx_undo_page_init(
 #ifndef UNIV_HOTBACKUP
 /********************************************************************//**
 Creates and initializes an undo log memory object.
-@return	own: the undo log memory object */
+@return own: the undo log memory object */
 static
 trx_undo_t*
 trx_undo_mem_create(
@@ -129,7 +131,7 @@ trx_undo_mem_create(
 Initializes a cached insert undo log header page for new use. NOTE that this
 function has its own log record type MLOG_UNDO_HDR_REUSE. You must NOT change
 the operation of this function!
-@return	undo log header byte offset on page */
+@return undo log header byte offset on page */
 static
 ulint
 trx_undo_insert_header_reuse(
@@ -151,7 +153,7 @@ trx_undo_discard_latest_update_undo(
 #ifndef UNIV_HOTBACKUP
 /***********************************************************************//**
 Gets the previous record in an undo log from the previous page.
-@return	undo log record, the page s-latched, NULL if none */
+@return undo log record, the page s-latched, NULL if none */
 static
 trx_undo_rec_t*
 trx_undo_get_prev_rec_from_prev_page(
@@ -163,7 +165,6 @@ trx_undo_get_prev_rec_from_prev_page(
 	mtr_t*		mtr)	/*!< in: mtr */
 {
 	ulint	space;
-	ulint	zip_size;
 	ulint	prev_page_no;
 	page_t* prev_page;
 	page_t*	undo_page;
@@ -180,11 +181,17 @@ trx_undo_get_prev_rec_from_prev_page(
 	}
 
 	space = page_get_space_id(undo_page);
-	zip_size = fil_space_get_zip_size(space);
 
-	buf_block_t*	block = buf_page_get(space, zip_size, prev_page_no,
-					     shared ? RW_S_LATCH : RW_X_LATCH,
-					     mtr);
+	bool			found;
+	const page_size_t&	page_size = fil_space_get_page_size(space,
+								    &found);
+
+	ut_ad(found);
+
+	buf_block_t*	block = buf_page_get(
+		page_id_t(space, prev_page_no), page_size,
+		shared ? RW_S_LATCH : RW_X_LATCH, mtr);
+
 	buf_block_dbg_add_level(block, SYNC_TRX_UNDO_PAGE);
 
 	prev_page = buf_block_get_frame(block);
@@ -194,8 +201,7 @@ trx_undo_get_prev_rec_from_prev_page(
 
 /***********************************************************************//**
 Gets the previous record in an undo log.
-@return	undo log record, the page s-latched, NULL if none */
-UNIV_INTERN
+@return undo log record, the page s-latched, NULL if none */
 trx_undo_rec_t*
 trx_undo_get_prev_rec(
 /*==================*/
@@ -221,26 +227,30 @@ trx_undo_get_prev_rec(
 						    shared, mtr));
 }
 
-/***********************************************************************//**
-Gets the next record in an undo log from the next page.
-@return	undo log record, the page latched, NULL if none */
+/** Gets the next record in an undo log from the next page.
+@param[in]	space		undo log header space
+@param[in]	page_size	page size
+@param[in]	undo_page	undo log page
+@param[in]	page_no		undo log header page number
+@param[in]	offset		undo log header offset on page
+@param[in]	mode		latch mode: RW_S_LATCH or RW_X_LATCH
+@param[in,out]	mtr		mini-transaction
+@return undo log record, the page latched, NULL if none */
 static
 trx_undo_rec_t*
 trx_undo_get_next_rec_from_next_page(
-/*=================================*/
-	ulint	space,	/*!< in: undo log header space */
-	ulint	zip_size,/*!< in: compressed page size in bytes
-			or 0 for uncompressed pages */
-	page_t*	undo_page, /*!< in: undo log page */
-	ulint	page_no,/*!< in: undo log header page number */
-	ulint	offset,	/*!< in: undo log header offset on page */
-	ulint	mode,	/*!< in: latch mode: RW_S_LATCH or RW_X_LATCH */
-	mtr_t*	mtr)	/*!< in: mtr */
+	ulint			space,
+	const page_size_t&	page_size,
+	const page_t*		undo_page,
+	ulint			page_no,
+	ulint			offset,
+	ulint			mode,
+	mtr_t*			mtr)
 {
-	trx_ulogf_t*	log_hdr;
-	ulint		next_page_no;
-	page_t*		next_page;
-	ulint		next;
+	const trx_ulogf_t*	log_hdr;
+	ulint			next_page_no;
+	page_t*			next_page;
+	ulint			next;
 
 	if (page_no == page_get_page_no(undo_page)) {
 
@@ -261,13 +271,14 @@ trx_undo_get_next_rec_from_next_page(
 		return(NULL);
 	}
 
+	const page_id_t	next_page_id(space, next_page_no);
+
 	if (mode == RW_S_LATCH) {
-		next_page = trx_undo_page_get_s_latched(space, zip_size,
-							next_page_no, mtr);
+		next_page = trx_undo_page_get_s_latched(
+			next_page_id, page_size, mtr);
 	} else {
 		ut_ad(mode == RW_X_LATCH);
-		next_page = trx_undo_page_get(space, zip_size,
-					      next_page_no, mtr);
+		next_page = trx_undo_page_get(next_page_id, page_size, mtr);
 	}
 
 	return(trx_undo_page_get_first_rec(next_page, page_no, offset));
@@ -275,8 +286,7 @@ trx_undo_get_next_rec_from_next_page(
 
 /***********************************************************************//**
 Gets the next record in an undo log.
-@return	undo log record, the page s-latched, NULL if none */
-UNIV_INTERN
+@return undo log record, the page s-latched, NULL if none */
 trx_undo_rec_t*
 trx_undo_get_next_rec(
 /*==================*/
@@ -286,7 +296,6 @@ trx_undo_get_next_rec(
 	mtr_t*		mtr)	/*!< in: mtr */
 {
 	ulint		space;
-	ulint		zip_size;
 	trx_undo_rec_t*	next_rec;
 
 	next_rec = trx_undo_page_get_next_rec(rec, page_no, offset);
@@ -296,37 +305,46 @@ trx_undo_get_next_rec(
 	}
 
 	space = page_get_space_id(page_align(rec));
-	zip_size = fil_space_get_zip_size(space);
 
-	return(trx_undo_get_next_rec_from_next_page(space, zip_size,
+	bool			found;
+	const page_size_t&	page_size = fil_space_get_page_size(space,
+								    &found);
+
+	ut_ad(found);
+
+	return(trx_undo_get_next_rec_from_next_page(space, page_size,
 						    page_align(rec),
 						    page_no, offset,
 						    RW_S_LATCH, mtr));
 }
 
-/***********************************************************************//**
-Gets the first record in an undo log.
-@return	undo log record, the page latched, NULL if none */
-UNIV_INTERN
+/** Gets the first record in an undo log.
+@param[in]	space		undo log header space
+@param[in]	page_size	page size
+@param[in]	page_no		undo log header page number
+@param[in]	offset		undo log header offset on page
+@param[in]	mode		latching mode: RW_S_LATCH or RW_X_LATCH
+@param[in,out]	mtr		mini-transaction
+@return undo log record, the page latched, NULL if none */
 trx_undo_rec_t*
 trx_undo_get_first_rec(
-/*===================*/
-	ulint	space,	/*!< in: undo log header space */
-	ulint	zip_size,/*!< in: compressed page size in bytes
-			or 0 for uncompressed pages */
-	ulint	page_no,/*!< in: undo log header page number */
-	ulint	offset,	/*!< in: undo log header offset on page */
-	ulint	mode,	/*!< in: latching mode: RW_S_LATCH or RW_X_LATCH */
-	mtr_t*	mtr)	/*!< in: mtr */
+	ulint			space,
+	const page_size_t&	page_size,
+	ulint			page_no,
+	ulint			offset,
+	ulint			mode,
+	mtr_t*			mtr)
 {
 	page_t*		undo_page;
 	trx_undo_rec_t*	rec;
 
+	const page_id_t	page_id(space, page_no);
+
 	if (mode == RW_S_LATCH) {
-		undo_page = trx_undo_page_get_s_latched(space, zip_size,
-							page_no, mtr);
+		undo_page = trx_undo_page_get_s_latched(
+			page_id, page_size, mtr);
 	} else {
-		undo_page = trx_undo_page_get(space, zip_size, page_no, mtr);
+		undo_page = trx_undo_page_get(page_id, page_size, mtr);
 	}
 
 	rec = trx_undo_page_get_first_rec(undo_page, page_no, offset);
@@ -335,7 +353,7 @@ trx_undo_get_first_rec(
 		return(rec);
 	}
 
-	return(trx_undo_get_next_rec_from_next_page(space, zip_size,
+	return(trx_undo_get_next_rec_from_next_page(space, page_size,
 						    undo_page, page_no, offset,
 						    mode, mtr));
 }
@@ -362,19 +380,18 @@ trx_undo_page_init_log(
 
 /***********************************************************//**
 Parses the redo log entry of an undo log page initialization.
-@return	end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
 byte*
 trx_undo_parse_page_init(
 /*=====================*/
-	byte*	ptr,	/*!< in: buffer */
-	byte*	end_ptr,/*!< in: buffer end */
-	page_t*	page,	/*!< in: page or NULL */
-	mtr_t*	mtr)	/*!< in: mtr or NULL */
+	const byte*	ptr,	/*!< in: buffer */
+	const byte*	end_ptr,/*!< in: buffer end */
+	page_t*		page,	/*!< in: page or NULL */
+	mtr_t*		mtr)	/*!< in: mtr or NULL */
 {
 	ulint	type;
 
-	ptr = mach_parse_compressed(ptr, end_ptr, &type);
+	type = mach_parse_compressed(&ptr, end_ptr);
 
 	if (ptr == NULL) {
 
@@ -385,7 +402,7 @@ trx_undo_parse_page_init(
 		trx_undo_page_init(page, type, mtr);
 	}
 
-	return(ptr);
+	return(const_cast<byte*>(ptr));
 }
 
 /********************************************************************//**
@@ -419,7 +436,7 @@ trx_undo_page_init(
 Creates a new undo log segment in file.
 @return DB_SUCCESS if page creation OK possible error codes are:
 DB_TOO_MANY_CONCURRENT_TRXS DB_OUT_OF_FILE_SPACE */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
+static MY_ATTRIBUTE((warn_unused_result))
 dberr_t
 trx_undo_seg_create(
 /*================*/
@@ -440,7 +457,7 @@ trx_undo_seg_create(
 	trx_upagef_t*	page_hdr;
 	trx_usegf_t*	seg_hdr;
 	ulint		n_reserved;
-	ibool		success;
+	bool		success;
 	dberr_t		err = DB_SUCCESS;
 
 	ut_ad(mtr != NULL);
@@ -454,12 +471,9 @@ trx_undo_seg_create(
 	slot_no = trx_rsegf_undo_find_free(rseg_hdr, mtr);
 
 	if (slot_no == ULINT_UNDEFINED) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			"  InnoDB: Warning: cannot find a free slot for"
-			" an undo log. Do you have too\n"
-			"InnoDB: many active transactions"
-			" running concurrently?\n");
+		ib::warn() << "Cannot find a free slot for an undo log. Do"
+			" you have too many active transactions running"
+			" concurrently?";
 
 		return(DB_TOO_MANY_CONCURRENT_TRXS);
 	}
@@ -537,7 +551,7 @@ trx_undo_header_create_log(
 Creates a new undo log header in file. NOTE that this function has its own
 log record type MLOG_UNDO_HDR_CREATE. You must NOT change the operation of
 this function!
-@return	header byte offset on page */
+@return header byte offset on page */
 static
 ulint
 trx_undo_header_create(
@@ -553,7 +567,6 @@ trx_undo_header_create(
 	trx_upagef_t*	page_hdr;
 	trx_usegf_t*	seg_hdr;
 	trx_ulogf_t*	log_hdr;
-	trx_ulogf_t*	prev_log_hdr;
 	ulint		prev_log;
 	ulint		free;
 	ulint		new_free;
@@ -580,6 +593,8 @@ trx_undo_header_create(
 	prev_log = mach_read_from_2(seg_hdr + TRX_UNDO_LAST_LOG);
 
 	if (prev_log != 0) {
+		trx_ulogf_t*	prev_log_hdr;
+
 		prev_log_hdr = undo_page + prev_log;
 
 		mach_write_to_2(prev_log_hdr + TRX_UNDO_NEXT_LOG, free);
@@ -618,15 +633,19 @@ trx_undo_write_xid(
 	mtr_t*		mtr)	/*!< in: mtr */
 {
 	mlog_write_ulint(log_hdr + TRX_UNDO_XA_FORMAT,
-			 (ulint) xid->formatID, MLOG_4BYTES, mtr);
+			 static_cast<ulint>(xid->formatID),
+			 MLOG_4BYTES, mtr);
 
 	mlog_write_ulint(log_hdr + TRX_UNDO_XA_TRID_LEN,
-			 (ulint) xid->gtrid_length, MLOG_4BYTES, mtr);
+			 static_cast<ulint>(xid->gtrid_length),
+			 MLOG_4BYTES, mtr);
 
 	mlog_write_ulint(log_hdr + TRX_UNDO_XA_BQUAL_LEN,
-			 (ulint) xid->bqual_length, MLOG_4BYTES, mtr);
+			 static_cast<ulint>(xid->bqual_length),
+			 MLOG_4BYTES, mtr);
 
-	mlog_write_string(log_hdr + TRX_UNDO_XA_XID, (const byte*) xid->data,
+	mlog_write_string(log_hdr + TRX_UNDO_XA_XID,
+			  reinterpret_cast<const byte*>(xid->data),
 			  XIDDATASIZE, mtr);
 }
 
@@ -639,12 +658,14 @@ trx_undo_read_xid(
 	trx_ulogf_t*	log_hdr,/*!< in: undo log header */
 	XID*		xid)	/*!< out: X/Open XA Transaction Identification */
 {
-	xid->formatID = (long) mach_read_from_4(log_hdr + TRX_UNDO_XA_FORMAT);
+	xid->formatID=static_cast<long>(mach_read_from_4(
+		log_hdr + TRX_UNDO_XA_FORMAT));
 
-	xid->gtrid_length
-		= (long) mach_read_from_4(log_hdr + TRX_UNDO_XA_TRID_LEN);
-	xid->bqual_length
-		= (long) mach_read_from_4(log_hdr + TRX_UNDO_XA_BQUAL_LEN);
+	xid->gtrid_length=static_cast<long>(mach_read_from_4(
+		log_hdr + TRX_UNDO_XA_TRID_LEN));
+
+	xid->bqual_length=static_cast<long>(mach_read_from_4(
+		log_hdr + TRX_UNDO_XA_BQUAL_LEN));
 
 	memcpy(xid->data, log_hdr + TRX_UNDO_XA_XID, XIDDATASIZE);
 }
@@ -705,51 +726,45 @@ trx_undo_insert_header_reuse_log(
 # define trx_undo_insert_header_reuse_log(undo_page,trx_id,mtr) ((void) 0)
 #endif /* !UNIV_HOTBACKUP */
 
-/***********************************************************//**
-Parses the redo log entry of an undo log page header create or reuse.
-@return	end of log record or NULL */
-UNIV_INTERN
+/** Parse the redo log entry of an undo log page header create or reuse.
+@param[in]	type	MLOG_UNDO_HDR_CREATE or MLOG_UNDO_HDR_REUSE
+@param[in]	ptr	redo log record
+@param[in]	end_ptr	end of log buffer
+@param[in,out]	page	page frame or NULL
+@param[in,out]	mtr	mini-transaction or NULL
+@return end of log record or NULL */
 byte*
 trx_undo_parse_page_header(
-/*=======================*/
-	ulint	type,	/*!< in: MLOG_UNDO_HDR_CREATE or MLOG_UNDO_HDR_REUSE */
-	byte*	ptr,	/*!< in: buffer */
-	byte*	end_ptr,/*!< in: buffer end */
-	page_t*	page,	/*!< in: page or NULL */
-	mtr_t*	mtr)	/*!< in: mtr or NULL */
+	mlog_id_t	type,
+	const byte*	ptr,
+	const byte*	end_ptr,
+	page_t*		page,
+	mtr_t*		mtr)
 {
-	trx_id_t	trx_id;
-	/* Silence a GCC warning about possibly uninitialized variable
-	when mach_ull_parse_compressed() is not inlined. */
-	ut_d(trx_id = 0);
-	/* Declare the variable uninitialized in Valgrind, so that the
-	above initialization will not mask any bugs. */
-	UNIV_MEM_INVALID(&trx_id, sizeof trx_id);
+	trx_id_t	trx_id = mach_u64_parse_compressed(&ptr, end_ptr);
 
-	ptr = mach_ull_parse_compressed(ptr, end_ptr, &trx_id);
-
-	if (ptr == NULL) {
-
-		return(NULL);
-	}
-
-	if (page) {
-		if (type == MLOG_UNDO_HDR_CREATE) {
+	if (ptr != NULL && page != NULL) {
+		switch (type) {
+		case MLOG_UNDO_HDR_CREATE:
 			trx_undo_header_create(page, trx_id, mtr);
-		} else {
-			ut_ad(type == MLOG_UNDO_HDR_REUSE);
+			return(const_cast<byte*>(ptr));
+		case MLOG_UNDO_HDR_REUSE:
 			trx_undo_insert_header_reuse(page, trx_id, mtr);
+			return(const_cast<byte*>(ptr));
+		default:
+			break;
 		}
+		ut_ad(0);
 	}
 
-	return(ptr);
+	return(const_cast<byte*>(ptr));
 }
 
 /***************************************************************//**
 Initializes a cached insert undo log header page for new use. NOTE that this
 function has its own log record type MLOG_UNDO_HDR_REUSE. You must NOT change
 the operation of this function!
-@return	undo log header byte offset on page */
+@return undo log header byte offset on page */
 static
 ulint
 trx_undo_insert_header_reuse(
@@ -823,8 +838,7 @@ trx_undo_discard_latest_log(
 
 /***********************************************************//**
 Parses the redo log entry of an undo log page header discard.
-@return	end of log record or NULL */
-UNIV_INTERN
+@return end of log record or NULL */
 byte*
 trx_undo_parse_discard_latest(
 /*==========================*/
@@ -887,16 +901,18 @@ trx_undo_discard_latest_update_undo(
 #ifndef UNIV_HOTBACKUP
 /********************************************************************//**
 Tries to add a page to the undo log segment where the undo log is placed.
-@return	X-latched block if success, else NULL */
-UNIV_INTERN
+@return X-latched block if success, else NULL */
 buf_block_t*
 trx_undo_add_page(
 /*==============*/
-	trx_t*		trx,	/*!< in: transaction */
-	trx_undo_t*	undo,	/*!< in: undo log memory object */
-	mtr_t*		mtr)	/*!< in: mtr which does not have a latch to any
-				undo log page; the caller must have reserved
-				the rollback segment mutex */
+	trx_t*		trx,		/*!< in: transaction */
+	trx_undo_t*	undo,		/*!< in: undo log memory object */
+	trx_undo_ptr_t*	undo_ptr,	/*!< in: assign undo log from
+					referred rollback segment. */
+	mtr_t*		mtr)		/*!< in: mtr which does not have
+					a latch to any undo log page;
+					the caller must have reserved
+					the rollback segment mutex */
 {
 	page_t*		header_page;
 	buf_block_t*	new_block;
@@ -905,17 +921,18 @@ trx_undo_add_page(
 	ulint		n_reserved;
 
 	ut_ad(mutex_own(&(trx->undo_mutex)));
-	ut_ad(mutex_own(&(trx->rseg->mutex)));
+	ut_ad(mutex_own(&(undo_ptr->rseg->mutex)));
 
-	rseg = trx->rseg;
+	rseg = undo_ptr->rseg;
 
 	if (rseg->curr_size == rseg->max_size) {
 
 		return(NULL);
 	}
 
-	header_page = trx_undo_page_get(undo->space, undo->zip_size,
-					undo->hdr_page_no, mtr);
+	header_page = trx_undo_page_get(
+		page_id_t(undo->space, undo->hdr_page_no),
+		undo->page_size, mtr);
 
 	if (!fsp_reserve_free_extents(&n_reserved, undo->space, 1,
 				      FSP_UNDO, mtr)) {
@@ -939,7 +956,7 @@ trx_undo_add_page(
 
 	ut_ad(rw_lock_get_x_lock_count(&new_block->lock) == 1);
 	buf_block_dbg_add_level(new_block, SYNC_TRX_UNDO_PAGE);
-	undo->last_page_no = buf_block_get_page_no(new_block);
+	undo->last_page_no = new_block->page.id.page_no();
 
 	new_page = buf_block_get_frame(new_block);
 
@@ -955,7 +972,7 @@ trx_undo_add_page(
 
 /********************************************************************//**
 Frees an undo log page that is not the header page.
-@return	last page number in remaining log */
+@return last page number in remaining log */
 static
 ulint
 trx_undo_free_page(
@@ -976,30 +993,29 @@ trx_undo_free_page(
 	fil_addr_t	last_addr;
 	trx_rsegf_t*	rseg_header;
 	ulint		hist_size;
-	ulint		zip_size;
 
 	ut_a(hdr_page_no != page_no);
 	ut_ad(mutex_own(&(rseg->mutex)));
 
-	zip_size = rseg->zip_size;
+	undo_page = trx_undo_page_get(
+		page_id_t(space, page_no), rseg->page_size, mtr);
 
-	undo_page = trx_undo_page_get(space, zip_size, page_no, mtr);
-
-	header_page = trx_undo_page_get(space, zip_size, hdr_page_no, mtr);
+	header_page = trx_undo_page_get(
+		page_id_t(space, hdr_page_no), rseg->page_size, mtr);
 
 	flst_remove(header_page + TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST,
 		    undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE, mtr);
 
 	fseg_free_page(header_page + TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER,
-		       space, page_no, mtr);
+		       space, page_no, false, mtr);
 
 	last_addr = flst_get_last(header_page + TRX_UNDO_SEG_HDR
 				  + TRX_UNDO_PAGE_LIST, mtr);
 	rseg->curr_size--;
 
 	if (in_history) {
-		rseg_header = trx_rsegf_get(space, zip_size,
-					    rseg->page_no, mtr);
+		rseg_header = trx_rsegf_get(space, rseg->page_no,
+					    rseg->page_size, mtr);
 
 		hist_size = mtr_read_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE,
 					   MLOG_4BYTES, mtr);
@@ -1014,7 +1030,6 @@ trx_undo_free_page(
 /********************************************************************//**
 Frees the last undo log page.
 The caller must hold the rollback segment mutex. */
-UNIV_INTERN
 void
 trx_undo_free_last_page_func(
 /*==========================*/
@@ -1037,25 +1052,29 @@ trx_undo_free_last_page_func(
 	undo->size--;
 }
 
-/********************************************************************//**
-Empties an undo log header page of undo records for that undo log. Other
-undo logs may still have records on that page, if it is an update undo log. */
+/** Empties an undo log header page of undo records for that undo log.
+Other undo logs may still have records on that page, if it is an update
+undo log.
+@param[in]	space		space
+@param[in]	page_size	page size
+@param[in]	hdr_page_no	header page number
+@param[in]	hdr_offset	header offset
+@param[in,out]	mtr		mini-transaction */
 static
 void
 trx_undo_empty_header_page(
-/*=======================*/
-	ulint	space,		/*!< in: space */
-	ulint	zip_size,	/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	ulint	hdr_page_no,	/*!< in: header page number */
-	ulint	hdr_offset,	/*!< in: header offset */
-	mtr_t*	mtr)		/*!< in: mtr */
+	ulint			space,
+	const page_size_t&	page_size,
+	ulint			hdr_page_no,
+	ulint			hdr_offset,
+	mtr_t*			mtr)
 {
 	page_t*		header_page;
 	trx_ulogf_t*	log_hdr;
 	ulint		end;
 
-	header_page = trx_undo_page_get(space, zip_size, hdr_page_no, mtr);
+	header_page = trx_undo_page_get(
+		page_id_t(space, hdr_page_no), page_size, mtr);
 
 	log_hdr = header_page + hdr_offset;
 
@@ -1067,9 +1086,8 @@ trx_undo_empty_header_page(
 /***********************************************************************//**
 Truncates an undo log from the end. This function is used during a rollback
 to free space from an undo log. */
-UNIV_INTERN
 void
-trx_undo_truncate_end(
+trx_undo_truncate_end_func(
 /*=======================*/
 	trx_t*		trx,	/*!< in: transaction whose undo log it is */
 	trx_undo_t*	undo,	/*!< in: undo log */
@@ -1081,19 +1099,28 @@ trx_undo_truncate_end(
 	trx_undo_rec_t* rec;
 	trx_undo_rec_t* trunc_here;
 	mtr_t		mtr;
+	const bool	noredo = trx_sys_is_noredo_rseg_slot(undo->rseg->id);
 
 	ut_ad(mutex_own(&(trx->undo_mutex)));
-	ut_ad(mutex_own(&(trx->rseg->mutex)));
+
+	ut_ad(mutex_own(&undo->rseg->mutex));
 
 	for (;;) {
 		mtr_start_trx(&mtr, trx);
+		if (noredo) {
+			mtr.set_log_mode(MTR_LOG_NO_REDO);
+			ut_ad(trx->rsegs.m_noredo.rseg == undo->rseg);
+		} else {
+			ut_ad(trx->rsegs.m_redo.rseg == undo->rseg);
+		}
 
 		trunc_here = NULL;
 
 		last_page_no = undo->last_page_no;
 
-		undo_page = trx_undo_page_get(undo->space, undo->zip_size,
-					      last_page_no, &mtr);
+		undo_page = trx_undo_page_get(
+			page_id_t(undo->space, last_page_no),
+			undo->page_size, &mtr);
 
 		rec = trx_undo_page_get_last_rec(undo_page, undo->hdr_page_no,
 						 undo->hdr_offset);
@@ -1132,24 +1159,20 @@ function_exit:
 	mtr_commit(&mtr);
 }
 
-/***********************************************************************//**
-Truncates an undo log from the start. This function is used during a purge
-operation. */
-UNIV_INTERN
+/** Truncate the head of an undo log.
+NOTE that only whole pages are freed; the header page is not
+freed, but emptied, if all the records there are below the limit.
+@param[in,out]	rseg		rollback segment
+@param[in]	hdr_page_no	header page number
+@param[in]	hdr_offset	header offset on the page
+@param[in]	limit		first undo number to preserve
+(everything below the limit will be truncated) */
 void
 trx_undo_truncate_start(
-/*====================*/
-	trx_rseg_t*	rseg,		/*!< in: rollback segment */
-	ulint		space,		/*!< in: space id of the log */
-	ulint		hdr_page_no,	/*!< in: header page number */
-	ulint		hdr_offset,	/*!< in: header offset on the page */
-	undo_no_t	limit)		/*!< in: all undo pages with
-					undo numbers < this value
-					should be truncated; NOTE that
-					the function only frees whole
-					pages; the header page is not
-					freed, but emptied, if all the
-					records there are < limit */
+	trx_rseg_t*	rseg,
+	ulint		hdr_page_no,
+	ulint		hdr_offset,
+	undo_no_t	limit)
 {
 	page_t*		undo_page;
 	trx_undo_rec_t* rec;
@@ -1160,13 +1183,16 @@ trx_undo_truncate_start(
 	ut_ad(mutex_own(&(rseg->mutex)));
 
 	if (!limit) {
-
 		return;
 	}
 loop:
 	mtr_start(&mtr);
 
-	rec = trx_undo_get_first_rec(space, rseg->zip_size,
+	if (trx_sys_is_noredo_rseg_slot(rseg->id)) {
+		mtr.set_log_mode(MTR_LOG_NO_REDO);
+	}
+
+	rec = trx_undo_get_first_rec(rseg->space, rseg->page_size,
 				     hdr_page_no, hdr_offset,
 				     RW_X_LATCH, &mtr);
 	if (rec == NULL) {
@@ -1191,11 +1217,11 @@ loop:
 	page_no = page_get_page_no(undo_page);
 
 	if (page_no == hdr_page_no) {
-		trx_undo_empty_header_page(space, rseg->zip_size,
+		trx_undo_empty_header_page(rseg->space, rseg->page_size,
 					   hdr_page_no, hdr_offset,
 					   &mtr);
 	} else {
-		trx_undo_free_page(rseg, TRUE, space, hdr_page_no,
+		trx_undo_free_page(rseg, TRUE, rseg->space, hdr_page_no,
 				   page_no, &mtr);
 	}
 
@@ -1204,13 +1230,14 @@ loop:
 	goto loop;
 }
 
-/**********************************************************************//**
-Frees an undo log segment which is not in the history list. */
+/** Frees an undo log segment which is not in the history list.
+@param[in]	undo	undo log
+@param[in]	noredo	whether the undo tablespace is redo logged */
 static
 void
 trx_undo_seg_free(
-/*==============*/
-	trx_undo_t*	undo)	/*!< in: undo log */
+	const trx_undo_t*	undo,
+	bool			noredo)
 {
 	trx_rseg_t*	rseg;
 	fseg_header_t*	file_seg;
@@ -1225,20 +1252,25 @@ trx_undo_seg_free(
 
 		mtr_start(&mtr);
 
+		if (noredo) {
+			mtr.set_log_mode(MTR_LOG_NO_REDO);
+		}
+
 		mutex_enter(&(rseg->mutex));
 
-		seg_header = trx_undo_page_get(undo->space, undo->zip_size,
-					       undo->hdr_page_no,
-					       &mtr) + TRX_UNDO_SEG_HDR;
+		seg_header = trx_undo_page_get(page_id_t(undo->space,
+							 undo->hdr_page_no),
+					       undo->page_size, &mtr)
+			+ TRX_UNDO_SEG_HDR;
 
 		file_seg = seg_header + TRX_UNDO_FSEG_HEADER;
 
-		finished = fseg_free_step(file_seg, &mtr);
+		finished = fseg_free_step(file_seg, false, &mtr);
 
 		if (finished) {
 			/* Update the rseg header */
 			rseg_header = trx_rsegf_get(
-				rseg->space, rseg->zip_size, rseg->page_no,
+				rseg->space, rseg->page_no, rseg->page_size,
 				&mtr);
 			trx_rsegf_set_nth_undo(rseg_header, undo->id, FIL_NULL,
 					       &mtr);
@@ -1257,7 +1289,7 @@ trx_undo_seg_free(
 Creates and initializes an undo log memory object according to the values
 in the header in file, when the database is started. The memory object is
 inserted in the appropriate list of rseg.
-@return	own: the undo log memory object */
+@return own: the undo log memory object */
 static
 trx_undo_t*
 trx_undo_mem_create_at_db_start(
@@ -1282,14 +1314,10 @@ trx_undo_mem_create_at_db_start(
 	XID		xid;
 	ibool		xid_exists = FALSE;
 
-	if (id >= TRX_RSEG_N_SLOTS) {
-		fprintf(stderr,
-			"InnoDB: Error: undo->id is %lu\n", (ulong) id);
-		ut_error;
-	}
+	ut_a(id < TRX_RSEG_N_SLOTS);
 
-	undo_page = trx_undo_page_get(rseg->space, rseg->zip_size,
-				      page_no, mtr);
+	undo_page = trx_undo_page_get(
+		page_id_t(rseg->space, page_no), rseg->page_size, mtr);
 
 	page_header = undo_page + TRX_UNDO_PAGE_HDR;
 
@@ -1310,9 +1338,7 @@ trx_undo_mem_create_at_db_start(
 
 	/* Read X/Open XA transaction identification if it exists, or
 	set it to NULL. */
-
-	memset(&xid, 0, sizeof(xid));
-	xid.formatID = -1;
+	xid.null();
 
 	if (xid_exists == TRUE) {
 		trx_undo_read_xid(undo_header, &xid);
@@ -1329,7 +1355,7 @@ trx_undo_mem_create_at_db_start(
 
 	undo->table_id = mach_read_from_8(undo_header + TRX_UNDO_TABLE_ID);
 	undo->state = state;
-	undo->size = flst_get_len(seg_header + TRX_UNDO_PAGE_LIST, mtr);
+	undo->size = flst_get_len(seg_header + TRX_UNDO_PAGE_LIST);
 
 	/* If the log segment is being freed, the page list is inconsistent! */
 	if (state == TRX_UNDO_TO_FREE) {
@@ -1342,8 +1368,9 @@ trx_undo_mem_create_at_db_start(
 	undo->last_page_no = last_addr.page;
 	undo->top_page_no = last_addr.page;
 
-	last_page = trx_undo_page_get(rseg->space, rseg->zip_size,
-				      undo->last_page_no, mtr);
+	last_page = trx_undo_page_get(
+		page_id_t(rseg->space, undo->last_page_no),
+		rseg->page_size, mtr);
 
 	rec = trx_undo_page_get_last_rec(last_page, page_no, offset);
 
@@ -1357,21 +1384,23 @@ trx_undo_mem_create_at_db_start(
 add_to_list:
 	if (type == TRX_UNDO_INSERT) {
 		if (state != TRX_UNDO_CACHED) {
-			UT_LIST_ADD_LAST(undo_list, rseg->insert_undo_list,
-					 undo);
+
+			UT_LIST_ADD_LAST(rseg->insert_undo_list, undo);
 		} else {
-			UT_LIST_ADD_LAST(undo_list, rseg->insert_undo_cached,
-					 undo);
+
+			UT_LIST_ADD_LAST(rseg->insert_undo_cached, undo);
+
 			MONITOR_INC(MONITOR_NUM_UNDO_SLOT_CACHED);
 		}
 	} else {
 		ut_ad(type == TRX_UNDO_UPDATE);
 		if (state != TRX_UNDO_CACHED) {
-			UT_LIST_ADD_LAST(undo_list, rseg->update_undo_list,
-					 undo);
+
+			UT_LIST_ADD_LAST(rseg->update_undo_list, undo);
 		} else {
-			UT_LIST_ADD_LAST(undo_list, rseg->update_undo_cached,
-					 undo);
+
+			UT_LIST_ADD_LAST(rseg->update_undo_cached, undo);
+
 			MONITOR_INC(MONITOR_NUM_UNDO_SLOT_CACHED);
 		}
 	}
@@ -1383,8 +1412,7 @@ add_to_list:
 Initializes the undo log lists for a rollback segment memory copy. This
 function is only called when the database is started or a new rollback
 segment is created.
-@return	the combined size of undo log segments in pages */
-UNIV_INTERN
+@return the combined size of undo log segments in pages */
 ulint
 trx_undo_lists_init(
 /*================*/
@@ -1395,15 +1423,10 @@ trx_undo_lists_init(
 	ulint		i;
 	mtr_t		mtr;
 
-	UT_LIST_INIT(rseg->update_undo_list);
-	UT_LIST_INIT(rseg->update_undo_cached);
-	UT_LIST_INIT(rseg->insert_undo_list);
-	UT_LIST_INIT(rseg->insert_undo_cached);
-
 	mtr_start(&mtr);
 
 	rseg_header = trx_rsegf_get_new(
-		rseg->space, rseg->zip_size, rseg->page_no, &mtr);
+		rseg->space, rseg->page_no, rseg->page_size, &mtr);
 
 	for (i = 0; i < TRX_RSEG_N_SLOTS; i++) {
 		ulint	page_no;
@@ -1430,7 +1453,7 @@ trx_undo_lists_init(
 			mtr_start(&mtr);
 
 			rseg_header = trx_rsegf_get(
-				rseg->space, rseg->zip_size, rseg->page_no,
+				rseg->space, rseg->page_no, rseg->page_size,
 				&mtr);
 
 			/* Found a used slot */
@@ -1445,7 +1468,7 @@ trx_undo_lists_init(
 
 /********************************************************************//**
 Creates and initializes an undo log memory object.
-@return	own: the undo log memory object */
+@return own: the undo log memory object */
 static
 trx_undo_t*
 trx_undo_mem_create(
@@ -1464,13 +1487,9 @@ trx_undo_mem_create(
 
 	ut_ad(mutex_own(&(rseg->mutex)));
 
-	if (id >= TRX_RSEG_N_SLOTS) {
-		fprintf(stderr,
-			"InnoDB: Error: undo->id is %lu\n", (ulong) id);
-		ut_error;
-	}
+	ut_a(id < TRX_RSEG_N_SLOTS);
 
-	undo = static_cast<trx_undo_t*>(mem_alloc(sizeof(*undo)));
+	undo = static_cast<trx_undo_t*>(ut_malloc_nokey(sizeof(*undo)));
 
 	if (undo == NULL) {
 
@@ -1489,7 +1508,7 @@ trx_undo_mem_create(
 	undo->rseg = rseg;
 
 	undo->space = rseg->space;
-	undo->zip_size = rseg->zip_size;
+	undo->page_size.copy_from(rseg->page_size);
 	undo->hdr_page_no = page_no;
 	undo->hdr_offset = offset;
 	undo->last_page_no = page_no;
@@ -1498,6 +1517,7 @@ trx_undo_mem_create(
 	undo->empty = TRUE;
 	undo->top_page_no = page_no;
 	undo->guess_block = NULL;
+	undo->withdraw_clock = 0;
 
 	return(undo);
 }
@@ -1516,13 +1536,7 @@ trx_undo_mem_init_for_reuse(
 {
 	ut_ad(mutex_own(&((undo->rseg)->mutex)));
 
-	if (UNIV_UNLIKELY(undo->id >= TRX_RSEG_N_SLOTS)) {
-		fprintf(stderr, "InnoDB: Error: undo->id is %lu\n",
-			(ulong) undo->id);
-
-		mem_analyze_corruption(undo);
-		ut_error;
-	}
+	ut_a(undo->id < TRX_RSEG_N_SLOTS);
 
 	undo->state = TRX_UNDO_ACTIVE;
 	undo->del_marks = FALSE;
@@ -1537,19 +1551,14 @@ trx_undo_mem_init_for_reuse(
 
 /********************************************************************//**
 Frees an undo log memory copy. */
-UNIV_INTERN
 void
 trx_undo_mem_free(
 /*==============*/
 	trx_undo_t*	undo)	/*!< in: the undo object to be freed */
 {
-	if (undo->id >= TRX_RSEG_N_SLOTS) {
-		fprintf(stderr,
-			"InnoDB: Error: undo->id is %lu\n", (ulong) undo->id);
-		ut_error;
-	}
+	ut_a(undo->id < TRX_RSEG_N_SLOTS);
 
-	mem_free(undo);
+	ut_free(undo);
 }
 
 /**********************************************************************//**
@@ -1588,8 +1597,8 @@ trx_undo_create(
 
 	rseg->curr_size++;
 
-	rseg_header = trx_rsegf_get(rseg->space, rseg->zip_size, rseg->page_no,
-				    mtr);
+	rseg_header = trx_rsegf_get(rseg->space, rseg->page_no,
+				    rseg->page_size, mtr);
 
 	err = trx_undo_seg_create(rseg, rseg_header, type, &id,
 				  &undo_page, mtr);
@@ -1606,10 +1615,7 @@ trx_undo_create(
 
 	offset = trx_undo_header_create(undo_page, trx_id, mtr);
 
-	if (trx->support_xa) {
-		trx_undo_header_add_space_for_xid(undo_page,
-						  undo_page + offset, mtr);
-	}
+	trx_undo_header_add_space_for_xid(undo_page, undo_page + offset, mtr);
 
 	*undo = trx_undo_mem_create(rseg, id, type, trx_id, xid,
 				   page_no, offset);
@@ -1625,7 +1631,7 @@ trx_undo_create(
 
 /********************************************************************//**
 Reuses a cached undo log.
-@return	the undo log memory object, NULL if none cached */
+@return the undo log memory object, NULL if none cached */
 static
 trx_undo_t*
 trx_undo_reuse_cached(
@@ -1653,7 +1659,7 @@ trx_undo_reuse_cached(
 			return(NULL);
 		}
 
-		UT_LIST_REMOVE(undo_list, rseg->insert_undo_cached, undo);
+		UT_LIST_REMOVE(rseg->insert_undo_cached, undo);
 
 		MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_CACHED);
 	} else {
@@ -1665,30 +1671,23 @@ trx_undo_reuse_cached(
 			return(NULL);
 		}
 
-		UT_LIST_REMOVE(undo_list, rseg->update_undo_cached, undo);
+		UT_LIST_REMOVE(rseg->update_undo_cached, undo);
 
 		MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_CACHED);
 	}
 
 	ut_ad(undo->size == 1);
+	ut_a(undo->id < TRX_RSEG_N_SLOTS);
 
-	if (undo->id >= TRX_RSEG_N_SLOTS) {
-		fprintf(stderr, "InnoDB: Error: undo->id is %lu\n",
-			(ulong) undo->id);
-		mem_analyze_corruption(undo);
-		ut_error;
-	}
-
-	undo_page = trx_undo_page_get(undo->space, undo->zip_size,
-				      undo->hdr_page_no, mtr);
+	undo_page = trx_undo_page_get(
+		page_id_t(undo->space, undo->hdr_page_no),
+		undo->page_size, mtr);
 
 	if (type == TRX_UNDO_INSERT) {
 		offset = trx_undo_insert_header_reuse(undo_page, trx_id, mtr);
 
-		if (trx->support_xa) {
-			trx_undo_header_add_space_for_xid(
-				undo_page, undo_page + offset, mtr);
-		}
+		trx_undo_header_add_space_for_xid(
+			undo_page, undo_page + offset, mtr);
 	} else {
 		ut_a(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
 				      + TRX_UNDO_PAGE_TYPE)
@@ -1696,10 +1695,8 @@ trx_undo_reuse_cached(
 
 		offset = trx_undo_header_create(undo_page, trx_id, mtr);
 
-		if (trx->support_xa) {
-			trx_undo_header_add_space_for_xid(
-				undo_page, undo_page + offset, mtr);
-		}
+		trx_undo_header_add_space_for_xid(
+			undo_page, undo_page + offset, mtr);
 	}
 
 	trx_undo_mem_init_for_reuse(undo, trx_id, xid, offset);
@@ -1720,8 +1717,9 @@ trx_undo_mark_as_dict_operation(
 {
 	page_t*	hdr_page;
 
-	hdr_page = trx_undo_page_get(undo->space, undo->zip_size,
-				     undo->hdr_page_no, mtr);
+	hdr_page = trx_undo_page_get(
+		page_id_t(undo->space, undo->hdr_page_no),
+		undo->page_size, mtr);
 
 	switch (trx_get_dict_operation(trx)) {
 	case TRX_DICT_OP_NONE:
@@ -1751,12 +1749,14 @@ undo log reused.
 @return DB_SUCCESS if undo log assign successful, possible error codes
 are: DB_TOO_MANY_CONCURRENT_TRXS DB_OUT_OF_FILE_SPACE DB_READ_ONLY
 DB_OUT_OF_MEMORY */
-UNIV_INTERN
 dberr_t
 trx_undo_assign_undo(
 /*=================*/
-	trx_t*		trx,	/*!< in: transaction */
-	ulint		type)	/*!< in: TRX_UNDO_INSERT or TRX_UNDO_UPDATE */
+	trx_t*		trx,		/*!< in: transaction */
+	trx_undo_ptr_t*	undo_ptr,	/*!< in: assign undo log from
+					referred rollback segment. */
+	ulint		type)		/*!< in: TRX_UNDO_INSERT or
+					TRX_UNDO_UPDATE */
 {
 	trx_rseg_t*	rseg;
 	trx_undo_t*	undo;
@@ -1765,15 +1765,31 @@ trx_undo_assign_undo(
 
 	ut_ad(trx);
 
-	if (trx->rseg == NULL) {
-		return(DB_READ_ONLY);
-	}
+	/* In case of read-only scenario trx->rsegs.m_redo.rseg can be NULL but
+	still request for assigning undo logs is valid as temporary tables
+	can be updated in read-only mode.
+	If there is no rollback segment assigned to trx and still there is
+	object being updated there is something wrong and so this condition
+	check. */
+	ut_ad(trx_is_rseg_assigned(trx));
 
-	rseg = trx->rseg;
+	rseg = undo_ptr->rseg;
 
 	ut_ad(mutex_own(&(trx->undo_mutex)));
 
 	mtr_start_trx(&mtr, trx);
+	if (&trx->rsegs.m_noredo == undo_ptr) {
+		mtr.set_log_mode(MTR_LOG_NO_REDO);;
+	} else {
+		ut_ad(&trx->rsegs.m_redo == undo_ptr);
+	}
+
+	if (trx_sys_is_noredo_rseg_slot(rseg->id)) {
+		mtr.set_log_mode(MTR_LOG_NO_REDO);;
+		ut_ad(undo_ptr == &trx->rsegs.m_noredo);
+	} else {
+		ut_ad(undo_ptr == &trx->rsegs.m_redo);
+	}
 
 	mutex_enter(&rseg->mutex);
 
@@ -1783,10 +1799,10 @@ trx_undo_assign_undo(
 		goto func_exit;
 	);
 
-	undo = trx_undo_reuse_cached(trx, rseg, type, trx->id, &trx->xid,
+	undo = trx_undo_reuse_cached(trx, rseg, type, trx->id, trx->xid,
 				     &mtr);
 	if (undo == NULL) {
-		err = trx_undo_create(trx, rseg, type, trx->id, &trx->xid,
+		err = trx_undo_create(trx, rseg, type, trx->id, trx->xid,
 				      &undo, &mtr);
 		if (err != DB_SUCCESS) {
 
@@ -1795,13 +1811,13 @@ trx_undo_assign_undo(
 	}
 
 	if (type == TRX_UNDO_INSERT) {
-		UT_LIST_ADD_FIRST(undo_list, rseg->insert_undo_list, undo);
-		ut_ad(trx->insert_undo == NULL);
-		trx->insert_undo = undo;
+		UT_LIST_ADD_FIRST(rseg->insert_undo_list, undo);
+		ut_ad(undo_ptr->insert_undo == NULL);
+		undo_ptr->insert_undo = undo;
 	} else {
-		UT_LIST_ADD_FIRST(undo_list, rseg->update_undo_list, undo);
-		ut_ad(trx->update_undo == NULL);
-		trx->update_undo = undo;
+		UT_LIST_ADD_FIRST(rseg->update_undo_list, undo);
+		ut_ad(undo_ptr->update_undo == NULL);
+		undo_ptr->update_undo = undo;
 	}
 
 	if (trx_get_dict_operation(trx) != TRX_DICT_OP_NONE) {
@@ -1817,8 +1833,7 @@ func_exit:
 
 /******************************************************************//**
 Sets the state of the undo log segment at a transaction finish.
-@return	undo log segment header page, x-latched */
-UNIV_INTERN
+@return undo log segment header page, x-latched */
 page_t*
 trx_undo_set_state_at_finish(
 /*=========================*/
@@ -1830,15 +1845,11 @@ trx_undo_set_state_at_finish(
 	page_t*		undo_page;
 	ulint		state;
 
-	if (undo->id >= TRX_RSEG_N_SLOTS) {
-		fprintf(stderr, "InnoDB: Error: undo->id is %lu\n",
-			(ulong) undo->id);
-		mem_analyze_corruption(undo);
-		ut_error;
-	}
+	ut_a(undo->id < TRX_RSEG_N_SLOTS);
 
-	undo_page = trx_undo_page_get(undo->space, undo->zip_size,
-				      undo->hdr_page_no, mtr);
+	undo_page = trx_undo_page_get(
+		page_id_t(undo->space, undo->hdr_page_no),
+		undo->page_size, mtr);
 
 	seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
 	page_hdr = undo_page + TRX_UNDO_PAGE_HDR;
@@ -1863,16 +1874,18 @@ trx_undo_set_state_at_finish(
 	return(undo_page);
 }
 
-/******************************************************************//**
-Sets the state of the undo log segment at a transaction prepare.
-@return	undo log segment header page, x-latched */
-UNIV_INTERN
+/** Set the state of the undo log segment at a XA PREPARE or XA ROLLBACK.
+@param[in,out]	trx		transaction
+@param[in,out]	undo		insert_undo or update_undo log
+@param[in]	rollback	false=XA PREPARE, true=XA ROLLBACK
+@param[in,out]	mtr		mini-transaction
+@return undo log segment header page, x-latched */
 page_t*
 trx_undo_set_state_at_prepare(
-/*==========================*/
-	trx_t*		trx,	/*!< in: transaction */
-	trx_undo_t*	undo,	/*!< in: undo log memory copy */
-	mtr_t*		mtr)	/*!< in: mtr */
+	trx_t*		trx,
+	trx_undo_t*	undo,
+	bool		rollback,
+	mtr_t*		mtr)
 {
 	trx_usegf_t*	seg_hdr;
 	trx_ulogf_t*	undo_header;
@@ -1881,21 +1894,25 @@ trx_undo_set_state_at_prepare(
 
 	ut_ad(trx && undo && mtr);
 
-	if (undo->id >= TRX_RSEG_N_SLOTS) {
-		fprintf(stderr, "InnoDB: Error: undo->id is %lu\n",
-			(ulong) undo->id);
-		mem_analyze_corruption(undo);
-		ut_error;
-	}
+	ut_a(undo->id < TRX_RSEG_N_SLOTS);
 
-	undo_page = trx_undo_page_get(undo->space, undo->zip_size,
-				      undo->hdr_page_no, mtr);
+	undo_page = trx_undo_page_get(
+		page_id_t(undo->space, undo->hdr_page_no),
+		undo->page_size, mtr);
 
 	seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
 
+	if (rollback) {
+		ut_ad(undo->state == TRX_UNDO_PREPARED);
+		mlog_write_ulint(seg_hdr + TRX_UNDO_STATE, TRX_UNDO_ACTIVE,
+				 MLOG_2BYTES, mtr);
+		return(undo_page);
+	}
+
 	/*------------------------------*/
+	ut_ad(undo->state == TRX_UNDO_ACTIVE);
 	undo->state = TRX_UNDO_PREPARED;
-	undo->xid   = trx->xid;
+	undo->xid   = *trx->xid;
 	/*------------------------------*/
 
 	mlog_write_ulint(seg_hdr + TRX_UNDO_STATE, undo->state,
@@ -1916,32 +1933,39 @@ trx_undo_set_state_at_prepare(
 Adds the update undo log header as the first in the history list, and
 frees the memory object, or puts it to the list of cached update undo log
 segments. */
-UNIV_INTERN
 void
 trx_undo_update_cleanup(
 /*====================*/
-	trx_t*	trx,		/*!< in: trx owning the update undo log */
-	page_t*	undo_page,	/*!< in: update undo log header page,
-				x-latched */
-	mtr_t*	mtr)		/*!< in: mtr */
+	trx_t*		trx,		/*!< in: trx owning the update
+					undo log */
+	trx_undo_ptr_t*	undo_ptr,	/*!< in: update undo log. */
+	page_t*		undo_page,	/*!< in: update undo log header page,
+					x-latched */
+	bool		update_rseg_history_len,
+					/*!< in: if true: update rseg history
+					len else skip updating it. */
+	ulint		n_added_logs,	/*!< in: number of logs added */
+	mtr_t*		mtr)		/*!< in: mtr */
 {
 	trx_rseg_t*	rseg;
 	trx_undo_t*	undo;
 
-	undo = trx->update_undo;
-	rseg = trx->rseg;
+	undo = undo_ptr->update_undo;
+	rseg = undo_ptr->rseg;
 
 	ut_ad(mutex_own(&(rseg->mutex)));
 
-	trx_purge_add_update_undo_to_history(trx, undo_page, mtr);
+	trx_purge_add_update_undo_to_history(
+		trx, undo_ptr, undo_page,
+		update_rseg_history_len, n_added_logs, mtr);
 
-	UT_LIST_REMOVE(undo_list, rseg->update_undo_list, undo);
+	UT_LIST_REMOVE(rseg->update_undo_list, undo);
 
-	trx->update_undo = NULL;
+	undo_ptr->update_undo = NULL;
 
 	if (undo->state == TRX_UNDO_CACHED) {
 
-		UT_LIST_ADD_FIRST(undo_list, rseg->update_undo_cached, undo);
+		UT_LIST_ADD_FIRST(rseg->update_undo_cached, undo);
 
 		MONITOR_INC(MONITOR_NUM_UNDO_SLOT_CACHED);
 	} else {
@@ -1951,32 +1975,34 @@ trx_undo_update_cleanup(
 	}
 }
 
-/******************************************************************//**
-Frees or caches an insert undo log after a transaction commit or rollback.
+/** Frees an insert undo log after a transaction commit or rollback.
 Knowledge of inserts is not needed after a commit or rollback, therefore
-the data can be discarded. */
-UNIV_INTERN
+the data can be discarded.
+@param[in,out]	undo_ptr	undo log to clean up
+@param[in]	noredo		whether the undo tablespace is redo logged */
 void
 trx_undo_insert_cleanup(
-/*====================*/
-	trx_t*	trx)	/*!< in: transaction handle */
+	trx_undo_ptr_t*	undo_ptr,
+	bool		noredo)
 {
 	trx_undo_t*	undo;
 	trx_rseg_t*	rseg;
 
-	undo = trx->insert_undo;
-	ut_ad(undo);
+	undo = undo_ptr->insert_undo;
+	ut_ad(undo != NULL);
 
-	rseg = trx->rseg;
+	rseg = undo_ptr->rseg;
+
+	ut_ad(noredo == trx_sys_is_noredo_rseg_slot(rseg->id));
 
 	mutex_enter(&(rseg->mutex));
 
-	UT_LIST_REMOVE(undo_list, rseg->insert_undo_list, undo);
-	trx->insert_undo = NULL;
+	UT_LIST_REMOVE(rseg->insert_undo_list, undo);
+	undo_ptr->insert_undo = NULL;
 
 	if (undo->state == TRX_UNDO_CACHED) {
 
-		UT_LIST_ADD_FIRST(undo_list, rseg->insert_undo_cached, undo);
+		UT_LIST_ADD_FIRST(rseg->insert_undo_cached, undo);
 
 		MONITOR_INC(MONITOR_NUM_UNDO_SLOT_CACHED);
 	} else {
@@ -1986,7 +2012,7 @@ trx_undo_insert_cleanup(
 
 		mutex_exit(&(rseg->mutex));
 
-		trx_undo_seg_free(undo);
+		trx_undo_seg_free(undo, noredo);
 
 		mutex_enter(&(rseg->mutex));
 
@@ -2002,7 +2028,6 @@ trx_undo_insert_cleanup(
 
 /********************************************************************//**
 At shutdown, frees the undo logs of a PREPARED transaction. */
-UNIV_INTERN
 void
 trx_undo_free_prepared(
 /*===================*/
@@ -2010,17 +2035,146 @@ trx_undo_free_prepared(
 {
 	ut_ad(srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS);
 
-	if (trx->update_undo) {
-		ut_a(trx->update_undo->state == TRX_UNDO_PREPARED);
-		UT_LIST_REMOVE(undo_list, trx->rseg->update_undo_list,
-			       trx->update_undo);
-		trx_undo_mem_free(trx->update_undo);
+	if (trx->rsegs.m_redo.update_undo) {
+		ut_a(trx->rsegs.m_redo.update_undo->state == TRX_UNDO_PREPARED);
+		UT_LIST_REMOVE(trx->rsegs.m_redo.rseg->update_undo_list,
+			       trx->rsegs.m_redo.update_undo);
+		trx_undo_mem_free(trx->rsegs.m_redo.update_undo);
+
+		trx->rsegs.m_redo.update_undo = NULL;
 	}
-	if (trx->insert_undo) {
-		ut_a(trx->insert_undo->state == TRX_UNDO_PREPARED);
-		UT_LIST_REMOVE(undo_list, trx->rseg->insert_undo_list,
-			       trx->insert_undo);
-		trx_undo_mem_free(trx->insert_undo);
+
+	if (trx->rsegs.m_redo.insert_undo) {
+		ut_a(trx->rsegs.m_redo.insert_undo->state == TRX_UNDO_PREPARED);
+		UT_LIST_REMOVE(trx->rsegs.m_redo.rseg->insert_undo_list,
+			       trx->rsegs.m_redo.insert_undo);
+		trx_undo_mem_free(trx->rsegs.m_redo.insert_undo);
+
+		trx->rsegs.m_redo.insert_undo = NULL;
+	}
+
+	if (trx->rsegs.m_noredo.update_undo) {
+
+		ut_a(trx->rsegs.m_noredo.update_undo->state
+			== TRX_UNDO_PREPARED);
+
+		UT_LIST_REMOVE(trx->rsegs.m_noredo.rseg->update_undo_list,
+			       trx->rsegs.m_noredo.update_undo);
+		trx_undo_mem_free(trx->rsegs.m_noredo.update_undo);
+
+		trx->rsegs.m_noredo.update_undo = NULL;
+	}
+	if (trx->rsegs.m_noredo.insert_undo) {
+
+		ut_a(trx->rsegs.m_noredo.insert_undo->state
+			== TRX_UNDO_PREPARED);
+
+		UT_LIST_REMOVE(trx->rsegs.m_noredo.rseg->insert_undo_list,
+			       trx->rsegs.m_noredo.insert_undo);
+		trx_undo_mem_free(trx->rsegs.m_noredo.insert_undo);
+
+		trx->rsegs.m_noredo.insert_undo = NULL;
 	}
 }
+
+/** Truncate UNDO tablespace, reinitialize header and rseg.
+@param[in]	undo_trunc	UNDO tablespace handler
+@return true if success else false. */
+bool
+trx_undo_truncate_tablespace(
+	undo::Truncate*	undo_trunc)
+
+{
+	bool	success = true;
+	ulint	space_id = undo_trunc->get_marked_space_id();
+
+	/* Step-1: Truncate tablespace. */
+	success = fil_truncate_tablespace(
+		space_id, SRV_UNDO_TABLESPACE_SIZE_IN_PAGES);
+
+	if (!success) {
+		return(success);
+	}
+
+	/* Step-2: Re-initialize tablespace header.
+	Avoid REDO logging as we don't want to apply the action if server
+	crashes. For fix-up we have UNDO-truncate-ddl-log. */
+	mtr_t		mtr;
+	mtr_start(&mtr);
+	mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
+	fsp_header_init(space_id, SRV_UNDO_TABLESPACE_SIZE_IN_PAGES, &mtr);
+	mtr_commit(&mtr);
+
+	/* Step-3: Re-initialize rollback segment header that resides
+	in truncated tablespaced. */
+	mtr_start(&mtr);
+	mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
+	mtr_x_lock(fil_space_get_latch(space_id, NULL), &mtr);
+
+	for (ulint i = 0; i < undo_trunc->rsegs_size(); ++i) {
+		trx_rsegf_t*	rseg_header;
+
+		trx_rseg_t*	rseg = undo_trunc->get_ith_rseg(i);
+
+		rseg->page_no = trx_rseg_header_create(
+			space_id, univ_page_size, ULINT_MAX, rseg->id, &mtr);
+
+		rseg_header = trx_rsegf_get_new(
+			space_id, rseg->page_no, rseg->page_size, &mtr);
+
+		/* Before re-initialization ensure that we free the existing
+		structure. There can't be any active transactions. */
+		ut_a(UT_LIST_GET_LEN(rseg->update_undo_list) == 0);
+		ut_a(UT_LIST_GET_LEN(rseg->insert_undo_list) == 0);
+
+		trx_undo_t*	next_undo;
+
+		for (trx_undo_t* undo =
+			UT_LIST_GET_FIRST(rseg->update_undo_cached);
+		     undo != NULL;
+		     undo = next_undo) {
+
+			next_undo = UT_LIST_GET_NEXT(undo_list, undo);
+			UT_LIST_REMOVE(rseg->update_undo_cached, undo);
+			MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_CACHED);
+			trx_undo_mem_free(undo);
+		}
+
+		for (trx_undo_t* undo =
+			UT_LIST_GET_FIRST(rseg->insert_undo_cached);
+		     undo != NULL;
+		     undo = next_undo) {
+
+			next_undo = UT_LIST_GET_NEXT(undo_list, undo);
+			UT_LIST_REMOVE(rseg->insert_undo_cached, undo);
+			MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_CACHED);
+			trx_undo_mem_free(undo);
+		}
+
+		UT_LIST_INIT(rseg->update_undo_list, &trx_undo_t::undo_list);
+		UT_LIST_INIT(rseg->update_undo_cached, &trx_undo_t::undo_list);
+		UT_LIST_INIT(rseg->insert_undo_list, &trx_undo_t::undo_list);
+		UT_LIST_INIT(rseg->insert_undo_cached, &trx_undo_t::undo_list);
+
+		rseg->max_size = mtr_read_ulint(
+			rseg_header + TRX_RSEG_MAX_SIZE, MLOG_4BYTES, &mtr);
+
+		/* Initialize the undo log lists according to the rseg header */
+		rseg->curr_size = mtr_read_ulint(
+			rseg_header + TRX_RSEG_HISTORY_SIZE, MLOG_4BYTES, &mtr)
+			+ 1;
+
+		ut_ad(rseg->curr_size == 1);
+
+		rseg->trx_ref_count = 0;
+		rseg->last_page_no = FIL_NULL;
+		rseg->last_offset = 0;
+		rseg->last_trx_no = 0;
+		rseg->last_del_marks = FALSE;
+	}
+	mtr_commit(&mtr);
+
+	return(success);
+}
+
 #endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/usr/usr0sess.cc b/storage/innobase/usr/usr0sess.cc
index ab7ba6bea09..0cb2ea0db15 100644
--- a/storage/innobase/usr/usr0sess.cc
+++ b/storage/innobase/usr/usr0sess.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -33,36 +33,30 @@ Created 6/25/1996 Heikki Tuuri
 
 /*********************************************************************//**
 Opens a session.
-@return	own: session object */
-UNIV_INTERN
+@return own: session object */
 sess_t*
 sess_open(void)
 /*===========*/
 {
 	sess_t*	sess;
 
-	sess = static_cast<sess_t*>(mem_zalloc(sizeof(*sess)));
+	sess = static_cast<sess_t*>(ut_zalloc_nokey(sizeof(*sess)));
 
 	sess->state = SESS_ACTIVE;
 
 	sess->trx = trx_allocate_for_background();
 	sess->trx->sess = sess;
 
-	UT_LIST_INIT(sess->graphs);
-
 	return(sess);
 }
 
 /*********************************************************************//**
 Closes a session, freeing the memory occupied by it. */
-UNIV_INTERN
 void
 sess_close(
 /*=======*/
 	sess_t*	sess)	/*!< in, own: session object */
 {
-	ut_a(UT_LIST_GET_LEN(sess->graphs) == 0);
-
 	trx_free_for_background(sess->trx);
-	mem_free(sess);
+	ut_free(sess);
 }
diff --git a/storage/innobase/ut/ut0bh.cc b/storage/innobase/ut/ut0bh.cc
deleted file mode 100644
index 1a3038a0d71..00000000000
--- a/storage/innobase/ut/ut0bh.cc
+++ /dev/null
@@ -1,159 +0,0 @@
-/***************************************************************************//**
-
-Copyright (c) 2010, 2011, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file ut/ut0bh.cc
-Binary min-heap implementation.
-
-Created 2010-05-28 by Sunny Bains
-*******************************************************/
-
-#include "ut0bh.h"
-#include "ut0mem.h"
-
-#ifdef UNIV_NONINL
-#include "ut0bh.ic"
-#endif
-
-#include <string.h>
-
-/**********************************************************************//**
-Create a binary heap.
-@return a new binary heap */
-UNIV_INTERN
-ib_bh_t*
-ib_bh_create(
-/*=========*/
-	ib_bh_cmp_t	compare,		/*!< in: comparator */
-	ulint		sizeof_elem,		/*!< in: size of one element */
-	ulint		max_elems)		/*!< in: max elements allowed */
-{
-	ulint		sz;
-	ib_bh_t*	ib_bh;
-
-	sz = sizeof(*ib_bh) + (sizeof_elem * max_elems);
-
-	ib_bh = (ib_bh_t*) ut_malloc(sz);
-	memset(ib_bh, 0x0, sz);
-
-	ib_bh->compare = compare;
-	ib_bh->max_elems = max_elems;
-	ib_bh->sizeof_elem = sizeof_elem;
-
-	return(ib_bh);
-}
-
-/**********************************************************************//**
-Free a binary heap.
-@return a new binary heap */
-UNIV_INTERN
-void
-ib_bh_free(
-/*=======*/
-	ib_bh_t*	ib_bh)			/*!< in/own: instance */
-{
-	ut_free(ib_bh);
-}
-
-/**********************************************************************//**
-Add an element to the binary heap. Note: The element is copied.
-@return pointer to added element or NULL if full. */
-UNIV_INTERN
-void*
-ib_bh_push(
-/*=======*/
-	ib_bh_t*	ib_bh,			/*!< in/out: instance */
-	const void*	elem)			/*!< in: element to add */
-{
-	void*		ptr;
-
-	if (ib_bh_is_full(ib_bh)) {
-		return(NULL);
-	} else if (ib_bh_is_empty(ib_bh)) {
-		++ib_bh->n_elems;
-		return(ib_bh_set(ib_bh, 0, elem));
-	} else {
-		ulint	i;
-
-		i = ib_bh->n_elems;
-
-		++ib_bh->n_elems;
-
-		for (ptr = ib_bh_get(ib_bh, i >> 1);
-		     i > 0 && ib_bh->compare(ptr, elem) > 0;
-		     i >>= 1, ptr = ib_bh_get(ib_bh, i >> 1)) {
-
-			ib_bh_set(ib_bh, i, ptr);
-		}
-
-		ptr = ib_bh_set(ib_bh, i, elem);
-	}
-
-	return(ptr);
-}
-
-/**********************************************************************//**
-Remove the first element from the binary heap. */
-UNIV_INTERN
-void
-ib_bh_pop(
-/*======*/
-	ib_bh_t*	ib_bh)			/*!< in/out: instance */
-{
-	byte*		ptr;
-	byte*		last;
-	ulint		parent = 0;
-
-	if (ib_bh_is_empty(ib_bh)) {
-		return;
-	} else if (ib_bh_size(ib_bh) == 1) {
-		--ib_bh->n_elems;
-		return;
-	}
-
-	last = (byte*) ib_bh_last(ib_bh);
-
-	/* Start from the child node */
-	ptr = (byte*) ib_bh_get(ib_bh, 1);
-
-	while (ptr < last) {
-		/* If the "right" child node is < "left" child node */
-		if (ib_bh->compare(ptr + ib_bh->sizeof_elem, ptr) < 0) {
-			ptr += ib_bh->sizeof_elem;
-		}
-
-		if (ib_bh->compare(last, ptr) <= 0) {
-			break;
-		}
-
-		ib_bh_set(ib_bh, parent, ptr);
-
-		parent = (ptr - (byte*) ib_bh_first(ib_bh))
-		       / ib_bh->sizeof_elem;
-
-		if ((parent << 1) >= ib_bh_size(ib_bh)) {
-			break;
-		}
-
-		ptr = (byte*) ib_bh_get(ib_bh, parent << 1);
-	}
-
-	--ib_bh->n_elems;
-
-	ib_bh_set(ib_bh, parent, last);
-}
diff --git a/storage/innobase/ut/ut0crc32.cc b/storage/innobase/ut/ut0crc32.cc
index 2f6429c09b2..2d892be869b 100644
--- a/storage/innobase/ut/ut0crc32.cc
+++ b/storage/innobase/ut/ut0crc32.cc
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
-Copyright (C) 2009, 2010 Facebook, Inc. All Rights Reserved.
-Copyright (c) 2011, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2009, 2010 Facebook, Inc. All Rights Reserved.
+Copyright (c) 2011, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -63,10 +63,9 @@ mysys/my_perf.c, contributed by Facebook under the following license.
 
 /* The below CRC32 implementation is based on the implementation included with
  * zlib with modifications to process 8 bytes at a time and using SSE 4.2
- * extentions when available.  The polynomial constant has been changed to
+ * extensions when available.  The polynomial constant has been changed to
  * match the one used by SSE 4.2 and does not return the same value as the
- * version used by zlib.  This implementation only supports 64-bit
- * little-endian processors.  The original zlib copyright notice follows. */
+ * version used by zlib.  The original zlib copyright notice follows. */
 
 /* crc32.c -- compute the CRC-32 of a buf stream
  * Copyright (C) 1995-2005 Mark Adler
@@ -79,60 +78,60 @@ mysys/my_perf.c, contributed by Facebook under the following license.
  * factor of two increase in speed on a Power PC G4 (PPC7455) using gcc -O3.
  */
 
+// First include (the generated) my_config.h, to get correct platform defines.
+#include "my_config.h"
+#include <string.h>
+
 #include "univ.i"
 #include "ut0crc32.h"
 
-#if defined(__linux__) && defined(__powerpc__)
-/* Used to detect at runtime if we have vpmsum instructions (PowerISA 2.07) */
-#include <sys/auxv.h>
-#include <bits/hwcap.h>
-#endif /* defined(__linux__) && defined(__powerpc__) */
+/** Pointer to CRC32 calculation function. */
+ut_crc32_func_t	ut_crc32;
 
-#include <string.h>
+/** Pointer to CRC32 calculation function, which uses big-endian byte order
+when converting byte strings to integers internally. */
+ut_crc32_func_t	ut_crc32_legacy_big_endian;
 
-ib_ut_crc32_t	ut_crc32;
+/** Pointer to CRC32-byte-by-byte calculation function (byte order agnostic,
+but very slow). */
+ut_crc32_func_t	ut_crc32_byte_by_byte;
 
-/* Precalculated table used to generate the CRC32 if the CPU does not
-have support for it */
-static ib_uint32_t	ut_crc32_slice8_table[8][256];
-static ibool		ut_crc32_slice8_table_initialized = FALSE;
+/** Swap the byte order of an 8 byte integer.
+@param[in]	i	8-byte integer
+@return 8-byte integer */
+inline
+uint64_t
+ut_crc32_swap_byteorder(
+	uint64_t	i)
+{
+	return(i << 56
+	       | (i & 0x000000000000FF00ULL) << 40
+	       | (i & 0x0000000000FF0000ULL) << 24
+	       | (i & 0x00000000FF000000ULL) << 8
+	       | (i & 0x000000FF00000000ULL) >> 8
+	       | (i & 0x0000FF0000000000ULL) >> 24
+	       | (i & 0x00FF000000000000ULL) >> 40
+	       | i >> 56);
+}
+
+/* CRC32 hardware implementation. */
 
 /* Flag that tells whether the CPU supports CRC32 or not */
-UNIV_INTERN bool	ut_crc32_sse2_enabled = false;
-UNIV_INTERN bool		 ut_crc32_power8_enabled = false;
+bool	ut_crc32_sse2_enabled = false;
+UNIV_INTERN bool	ut_crc32_power8_enabled = false;
 
-/********************************************************************//**
-Initializes the table that is used to generate the CRC32 if the CPU does
-not have support for it. */
-#ifndef HAVE_CRC32_VPMSUM
-static
-void
-ut_crc32_slice8_table_init()
-/*========================*/
+#ifdef HAVE_CRC32_VPMSUM
+extern "C" {
+unsigned int crc32c_vpmsum(unsigned int crc, const unsigned char *p, unsigned long len);
+};
+UNIV_INLINE
+ib_uint32_t
+ut_crc32_power8(
+/*===========*/
+		const byte*		buf,		/*!< in: data over which to calculate CRC32 */
+		ulint			len)		/*!< in: data length */
 {
-	/* bit-reversed poly 0x1EDC6F41 (from SSE42 crc32 instruction) */
-	static const ib_uint32_t	poly = 0x82f63b78;
-	ib_uint32_t			n;
-	ib_uint32_t			k;
-	ib_uint32_t			c;
-
-	for (n = 0; n < 256; n++) {
-		c = n;
-		for (k = 0; k < 8; k++) {
-			c = (c & 1) ? (poly ^ (c >> 1)) : (c >> 1);
-		}
-		ut_crc32_slice8_table[0][n] = c;
-	}
-
-	for (n = 0; n < 256; n++) {
-		c = ut_crc32_slice8_table[0][n];
-		for (k = 1; k < 8; k++) {
-			c = ut_crc32_slice8_table[0][c & 0xFF] ^ (c >> 8);
-			ut_crc32_slice8_table[k][n] = c;
-		}
-	}
-
-	ut_crc32_slice8_table_initialized = TRUE;
+	return crc32c_vpmsum(0, buf, len);
 }
 #endif
 
@@ -143,14 +142,14 @@ static
 void
 ut_cpuid(
 /*=====*/
-	ib_uint32_t	vend[3],	/*!< out: CPU vendor */
-	ib_uint32_t*	model,		/*!< out: CPU model */
-	ib_uint32_t*	family,		/*!< out: CPU family */
-	ib_uint32_t*	stepping,	/*!< out: CPU stepping */
-	ib_uint32_t*	features_ecx,	/*!< out: CPU features ecx */
-	ib_uint32_t*	features_edx)	/*!< out: CPU features edx */
+	uint32_t	vend[3],	/*!< out: CPU vendor */
+	uint32_t*	model,		/*!< out: CPU model */
+	uint32_t*	family,		/*!< out: CPU family */
+	uint32_t*	stepping,	/*!< out: CPU stepping */
+	uint32_t*	features_ecx,	/*!< out: CPU features ecx */
+	uint32_t*	features_edx)	/*!< out: CPU features edx */
 {
-	ib_uint32_t	sig;
+	uint32_t	sig;
 	asm("cpuid" : "=b" (vend[0]), "=c" (vend[2]), "=d" (vend[1]) : "a" (0));
 	asm("cpuid" : "=a" (sig), "=c" (*features_ecx), "=d" (*features_edx)
 	    : "a" (1)
@@ -168,147 +167,552 @@ ut_cpuid(
 	}
 }
 
-/* opcodes taken from objdump of "crc32b (%%rdx), %%rcx"
-for RHEL4 support (GCC 3 doesn't support this instruction) */
-#define ut_crc32_sse42_byte \
-	asm(".byte 0xf2, 0x48, 0x0f, 0x38, 0xf0, 0x0a" \
-	    : "=c"(crc) : "c"(crc), "d"(buf)); \
-	len--, buf++
-
-/* opcodes taken from objdump of "crc32q (%%rdx), %%rcx"
-for RHEL4 support (GCC 3 doesn't support this instruction) */
-#define ut_crc32_sse42_quadword \
-	asm(".byte 0xf2, 0x48, 0x0f, 0x38, 0xf1, 0x0a" \
-	    : "=c"(crc) : "c"(crc), "d"(buf)); \
-	len -= 8, buf += 8
-#endif /* defined(__GNUC__) && defined(__x86_64__) */
-
-
-#ifdef HAVE_CRC32_VPMSUM
-extern "C" {
-unsigned int crc32c_vpmsum(unsigned int crc, const unsigned char *p, unsigned long len);
-};
-
-UNIV_INLINE
-ib_uint32_t
-ut_crc32_power8(
-/*===========*/
-		 const byte*		 buf,		 /*!< in: data over which to calculate CRC32 */
-		 ulint			 len)		 /*!< in: data length */
+/** Calculate CRC32 over 8-bit data using a hardware/CPU instruction.
+@param[in,out]	crc	crc32 checksum so far when this function is called,
+when the function ends it will contain the new checksum
+@param[in,out]	data	data to be checksummed, the pointer will be advanced
+with 1 byte
+@param[in,out]	len	remaining bytes, it will be decremented with 1 */
+inline
+void
+ut_crc32_8_hw(
+	uint32_t*	crc,
+	const byte**	data,
+	ulint*		len)
 {
-  return crc32c_vpmsum(0, buf, len);
+	asm("crc32b %1, %0"
+	    /* output operands */
+	    : "+r" (*crc)
+	    /* input operands */
+	    : "rm" ((*data)[0]));
+
+	(*data)++;
+	(*len)--;
 }
-#endif
 
-/********************************************************************//**
-Calculates CRC32 using CPU instructions.
-@return CRC-32C (polynomial 0x11EDC6F41) */
-UNIV_INLINE
-ib_uint32_t
-ut_crc32_sse42(
-/*===========*/
-	const byte*	buf,	/*!< in: data over which to calculate CRC32 */
-	ulint		len)	/*!< in: data length */
+/** Calculate CRC32 over a 64-bit integer using a hardware/CPU instruction.
+@param[in]	crc	crc32 checksum so far
+@param[in]	data	data to be checksummed
+@return resulting checksum of crc + crc(data) */
+inline
+uint32_t
+ut_crc32_64_low_hw(
+	uint32_t	crc,
+	uint64_t	data)
 {
-#if defined(__GNUC__) && defined(__x86_64__)
-	ib_uint64_t	crc = (ib_uint32_t) (-1);
+	uint64_t	crc_64bit = crc;
+
+	asm("crc32q %1, %0"
+	    /* output operands */
+	    : "+r" (crc_64bit)
+	    /* input operands */
+	    : "rm" (data));
+
+	return(static_cast<uint32_t>(crc_64bit));
+}
+
+/** Calculate CRC32 over 64-bit byte string using a hardware/CPU instruction.
+@param[in,out]	crc	crc32 checksum so far when this function is called,
+when the function ends it will contain the new checksum
+@param[in,out]	data	data to be checksummed, the pointer will be advanced
+with 8 bytes
+@param[in,out]	len	remaining bytes, it will be decremented with 8 */
+inline
+void
+ut_crc32_64_hw(
+	uint32_t*	crc,
+	const byte**	data,
+	ulint*		len)
+{
+	uint64_t	data_int = *reinterpret_cast<const uint64_t*>(*data);
+
+#ifdef WORDS_BIGENDIAN
+	/* Currently we only support x86_64 (little endian) CPUs. In case
+	some big endian CPU supports a CRC32 instruction, then maybe we will
+	need a byte order swap here. */
+#error Dont know how to handle big endian CPUs
+	/*
+	data_int = ut_crc32_swap_byteorder(data_int);
+	*/
+#endif /* WORDS_BIGENDIAN */
+
+	*crc = ut_crc32_64_low_hw(*crc, data_int);
+
+	*data += 8;
+	*len -= 8;
+}
+
+/** Calculate CRC32 over 64-bit byte string using a hardware/CPU instruction.
+The byte string is converted to a 64-bit integer using big endian byte order.
+@param[in,out]	crc	crc32 checksum so far when this function is called,
+when the function ends it will contain the new checksum
+@param[in,out]	data	data to be checksummed, the pointer will be advanced
+with 8 bytes
+@param[in,out]	len	remaining bytes, it will be decremented with 8 */
+inline
+void
+ut_crc32_64_legacy_big_endian_hw(
+	uint32_t*	crc,
+	const byte**	data,
+	ulint*		len)
+{
+	uint64_t	data_int = *reinterpret_cast<const uint64_t*>(*data);
+
+#ifndef WORDS_BIGENDIAN
+	data_int = ut_crc32_swap_byteorder(data_int);
+#else
+	/* Currently we only support x86_64 (little endian) CPUs. In case
+	some big endian CPU supports a CRC32 instruction, then maybe we will
+	NOT need a byte order swap here. */
+#error Dont know how to handle big endian CPUs
+#endif /* WORDS_BIGENDIAN */
+
+	*crc = ut_crc32_64_low_hw(*crc, data_int);
+
+	*data += 8;
+	*len -= 8;
+}
+
+/** Calculates CRC32 using hardware/CPU instructions.
+@param[in]	buf	data over which to calculate CRC32
+@param[in]	len	data length
+@return CRC-32C (polynomial 0x11EDC6F41) */
+uint32_t
+ut_crc32_hw(
+	const byte*	buf,
+	ulint		len)
+{
+	uint32_t	crc = 0xFFFFFFFFU;
 
 	ut_a(ut_crc32_sse2_enabled);
 
-	while (len && ((ulint) buf & 7)) {
-		ut_crc32_sse42_byte;
+	/* Calculate byte-by-byte up to an 8-byte aligned address. After
+	this consume the input 8-bytes at a time. */
+	while (len > 0 && (reinterpret_cast<uintptr_t>(buf) & 7) != 0) {
+		ut_crc32_8_hw(&crc, &buf, &len);
 	}
 
-	while (len >= 32) {
-		ut_crc32_sse42_quadword;
-		ut_crc32_sse42_quadword;
-		ut_crc32_sse42_quadword;
-		ut_crc32_sse42_quadword;
+	/* Perf testing
+	./unittest/gunit/innodb/merge_innodb_tests-t --gtest_filter=ut0crc32.perf
+	on CPU "Intel(R) Core(TM) i7-4770 CPU @ 3.40GHz"
+	with different N in "while (len >= N) {" shows:
+	N=16
+	2.867254 sec
+	2.866860 sec
+	2.867973 sec
+
+	N=32
+	2.715725 sec
+	2.713008 sec
+	2.712520 sec
+	(5.36% speedup over N=16)
+
+	N=64
+	2.634140 sec
+	2.636558 sec
+	2.636488 sec
+	(2.88% speedup over N=32)
+
+	N=128
+	2.599534 sec
+	2.599919 sec
+	2.598035 sec
+	(1.39% speedup over N=64)
+
+	N=256
+	2.576993 sec
+	2.576748 sec
+	2.575700 sec
+	(0.87% speedup over N=128)
+
+	N=512
+	2.693928 sec
+	2.691663 sec
+	2.692142 sec
+	(4.51% slowdown over N=256)
+	*/
+	while (len >= 128) {
+		/* This call is repeated 16 times. 16 * 8 = 128. */
+		ut_crc32_64_hw(&crc, &buf, &len);
+		ut_crc32_64_hw(&crc, &buf, &len);
+		ut_crc32_64_hw(&crc, &buf, &len);
+		ut_crc32_64_hw(&crc, &buf, &len);
+		ut_crc32_64_hw(&crc, &buf, &len);
+		ut_crc32_64_hw(&crc, &buf, &len);
+		ut_crc32_64_hw(&crc, &buf, &len);
+		ut_crc32_64_hw(&crc, &buf, &len);
+		ut_crc32_64_hw(&crc, &buf, &len);
+		ut_crc32_64_hw(&crc, &buf, &len);
+		ut_crc32_64_hw(&crc, &buf, &len);
+		ut_crc32_64_hw(&crc, &buf, &len);
+		ut_crc32_64_hw(&crc, &buf, &len);
+		ut_crc32_64_hw(&crc, &buf, &len);
+		ut_crc32_64_hw(&crc, &buf, &len);
+		ut_crc32_64_hw(&crc, &buf, &len);
 	}
 
 	while (len >= 8) {
-		ut_crc32_sse42_quadword;
+		ut_crc32_64_hw(&crc, &buf, &len);
 	}
 
-	while (len) {
-		ut_crc32_sse42_byte;
+	while (len > 0) {
+		ut_crc32_8_hw(&crc, &buf, &len);
 	}
 
-	return((ib_uint32_t) ((~crc) & 0xFFFFFFFF));
-#else
-	ut_error;
-	/* silence compiler warning about unused parameters */
-	return((ib_uint32_t) buf[len]);
-#endif /* defined(__GNUC__) && defined(__x86_64__) */
+	return(~crc);
 }
 
-#define ut_crc32_slice8_byte \
-	crc = (crc >> 8) ^ ut_crc32_slice8_table[0][(crc ^ *buf++) & 0xFF]; \
-	len--
+/** Calculates CRC32 using hardware/CPU instructions.
+This function uses big endian byte ordering when converting byte sequence to
+integers.
+@param[in]	buf	data over which to calculate CRC32
+@param[in]	len	data length
+@return CRC-32C (polynomial 0x11EDC6F41) */
+uint32_t
+ut_crc32_legacy_big_endian_hw(
+	const byte*	buf,
+	ulint		len)
+{
+	uint32_t	crc = 0xFFFFFFFFU;
 
-#define ut_crc32_slice8_quadword \
-	crc ^= *(ib_uint64_t*) buf; \
-	crc = ut_crc32_slice8_table[7][(crc      ) & 0xFF] ^ \
-	      ut_crc32_slice8_table[6][(crc >>  8) & 0xFF] ^ \
-	      ut_crc32_slice8_table[5][(crc >> 16) & 0xFF] ^ \
-	      ut_crc32_slice8_table[4][(crc >> 24) & 0xFF] ^ \
-	      ut_crc32_slice8_table[3][(crc >> 32) & 0xFF] ^ \
-	      ut_crc32_slice8_table[2][(crc >> 40) & 0xFF] ^ \
-	      ut_crc32_slice8_table[1][(crc >> 48) & 0xFF] ^ \
-	      ut_crc32_slice8_table[0][(crc >> 56)]; \
-	len -= 8, buf += 8
+	ut_a(ut_crc32_sse2_enabled);
+
+	/* Calculate byte-by-byte up to an 8-byte aligned address. After
+	this consume the input 8-bytes at a time. */
+	while (len > 0 && (reinterpret_cast<uintptr_t>(buf) & 7) != 0) {
+		ut_crc32_8_hw(&crc, &buf, &len);
+	}
+
+	while (len >= 128) {
+		/* This call is repeated 16 times. 16 * 8 = 128. */
+		ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+	}
+
+	while (len >= 8) {
+		ut_crc32_64_legacy_big_endian_hw(&crc, &buf, &len);
+	}
+
+	while (len > 0) {
+		ut_crc32_8_hw(&crc, &buf, &len);
+	}
+
+	return(~crc);
+}
+
+/** Calculates CRC32 using hardware/CPU instructions.
+This function processes one byte at a time (very slow) and thus it does
+not depend on the byte order of the machine.
+@param[in]	buf	data over which to calculate CRC32
+@param[in]	len	data length
+@return CRC-32C (polynomial 0x11EDC6F41) */
+uint32_t
+ut_crc32_byte_by_byte_hw(
+	const byte*	buf,
+	ulint		len)
+{
+	uint32_t	crc = 0xFFFFFFFFU;
+
+	ut_a(ut_crc32_sse2_enabled);
+
+	while (len > 0) {
+		ut_crc32_8_hw(&crc, &buf, &len);
+	}
+
+	return(~crc);
+}
+#endif /* defined(__GNUC__) && defined(__x86_64__) */
+
+/* CRC32 software implementation. */
+
+/* Precalculated table used to generate the CRC32 if the CPU does not
+have support for it */
+static uint32_t	ut_crc32_slice8_table[8][256];
+static bool	ut_crc32_slice8_table_initialized = false;
 
 /********************************************************************//**
-Calculates CRC32 manually.
-@return CRC-32C (polynomial 0x11EDC6F41) */
-UNIV_INLINE
-ib_uint32_t
-ut_crc32_slice8(
-/*============*/
-	const byte*	buf,	/*!< in: data over which to calculate CRC32 */
-	ulint		len)	/*!< in: data length */
+Initializes the table that is used to generate the CRC32 if the CPU does
+not have support for it. */
+static
+void
+ut_crc32_slice8_table_init()
+/*========================*/
 {
-	ib_uint64_t	crc = (ib_uint32_t) (-1);
+#ifndef HAVE_CRC32_VPMSUM
+	/* bit-reversed poly 0x1EDC6F41 (from SSE42 crc32 instruction) */
+	static const uint32_t	poly = 0x82f63b78;
+	uint32_t		n;
+	uint32_t		k;
+	uint32_t		c;
+
+	for (n = 0; n < 256; n++) {
+		c = n;
+		for (k = 0; k < 8; k++) {
+			c = (c & 1) ? (poly ^ (c >> 1)) : (c >> 1);
+		}
+		ut_crc32_slice8_table[0][n] = c;
+	}
+
+	for (n = 0; n < 256; n++) {
+		c = ut_crc32_slice8_table[0][n];
+		for (k = 1; k < 8; k++) {
+			c = ut_crc32_slice8_table[0][c & 0xFF] ^ (c >> 8);
+			ut_crc32_slice8_table[k][n] = c;
+		}
+	}
+
+	ut_crc32_slice8_table_initialized = true;
+#endif
+}
+
+/** Calculate CRC32 over 8-bit data using a software implementation.
+@param[in,out]	crc	crc32 checksum so far when this function is called,
+when the function ends it will contain the new checksum
+@param[in,out]	data	data to be checksummed, the pointer will be advanced
+with 1 byte
+@param[in,out]	len	remaining bytes, it will be decremented with 1 */
+inline
+void
+ut_crc32_8_sw(
+	uint32_t*	crc,
+	const byte**	data,
+	ulint*		len)
+{
+	const uint8_t	i = (*crc ^ (*data)[0]) & 0xFF;
+
+	*crc = (*crc >> 8) ^ ut_crc32_slice8_table[0][i];
+
+	(*data)++;
+	(*len)--;
+}
+
+/** Calculate CRC32 over a 64-bit integer using a software implementation.
+@param[in]	crc	crc32 checksum so far
+@param[in]	data	data to be checksummed
+@return resulting checksum of crc + crc(data) */
+inline
+uint32_t
+ut_crc32_64_low_sw(
+	uint32_t	crc,
+	uint64_t	data)
+{
+	const uint64_t	i = crc ^ data;
+
+	return(
+		ut_crc32_slice8_table[7][(i      ) & 0xFF] ^
+		ut_crc32_slice8_table[6][(i >>  8) & 0xFF] ^
+		ut_crc32_slice8_table[5][(i >> 16) & 0xFF] ^
+		ut_crc32_slice8_table[4][(i >> 24) & 0xFF] ^
+		ut_crc32_slice8_table[3][(i >> 32) & 0xFF] ^
+		ut_crc32_slice8_table[2][(i >> 40) & 0xFF] ^
+		ut_crc32_slice8_table[1][(i >> 48) & 0xFF] ^
+		ut_crc32_slice8_table[0][(i >> 56)]
+	);
+}
+
+/** Calculate CRC32 over 64-bit byte string using a software implementation.
+@param[in,out]	crc	crc32 checksum so far when this function is called,
+when the function ends it will contain the new checksum
+@param[in,out]	data	data to be checksummed, the pointer will be advanced
+with 8 bytes
+@param[in,out]	len	remaining bytes, it will be decremented with 8 */
+inline
+void
+ut_crc32_64_sw(
+	uint32_t*	crc,
+	const byte**	data,
+	ulint*		len)
+{
+	uint64_t	data_int = *reinterpret_cast<const uint64_t*>(*data);
+
+#ifdef WORDS_BIGENDIAN
+	data_int = ut_crc32_swap_byteorder(data_int);
+#endif /* WORDS_BIGENDIAN */
+
+	*crc = ut_crc32_64_low_sw(*crc, data_int);
+
+	*data += 8;
+	*len -= 8;
+}
+
+/** Calculate CRC32 over 64-bit byte string using a software implementation.
+The byte string is converted to a 64-bit integer using big endian byte order.
+@param[in,out]	crc	crc32 checksum so far when this function is called,
+when the function ends it will contain the new checksum
+@param[in,out]	data	data to be checksummed, the pointer will be advanced
+with 8 bytes
+@param[in,out]	len	remaining bytes, it will be decremented with 8 */
+inline
+void
+ut_crc32_64_legacy_big_endian_sw(
+	uint32_t*	crc,
+	const byte**	data,
+	ulint*		len)
+{
+	uint64_t	data_int = *reinterpret_cast<const uint64_t*>(*data);
+
+#ifndef WORDS_BIGENDIAN
+	data_int = ut_crc32_swap_byteorder(data_int);
+#endif /* WORDS_BIGENDIAN */
+
+	*crc = ut_crc32_64_low_sw(*crc, data_int);
+
+	*data += 8;
+	*len -= 8;
+}
+
+/** Calculates CRC32 in software, without using CPU instructions.
+@param[in]	buf	data over which to calculate CRC32
+@param[in]	len	data length
+@return CRC-32C (polynomial 0x11EDC6F41) */
+uint32_t
+ut_crc32_sw(
+	const byte*	buf,
+	ulint		len)
+{
+	uint32_t	crc = 0xFFFFFFFFU;
 
 	ut_a(ut_crc32_slice8_table_initialized);
 
-	while (len && ((ulint) buf & 7)) {
-		ut_crc32_slice8_byte;
+	/* Calculate byte-by-byte up to an 8-byte aligned address. After
+	this consume the input 8-bytes at a time. */
+	while (len > 0 && (reinterpret_cast<uintptr_t>(buf) & 7) != 0) {
+		ut_crc32_8_sw(&crc, &buf, &len);
 	}
 
-	while (len >= 32) {
-		ut_crc32_slice8_quadword;
-		ut_crc32_slice8_quadword;
-		ut_crc32_slice8_quadword;
-		ut_crc32_slice8_quadword;
+	while (len >= 128) {
+		/* This call is repeated 16 times. 16 * 8 = 128. */
+		ut_crc32_64_sw(&crc, &buf, &len);
+		ut_crc32_64_sw(&crc, &buf, &len);
+		ut_crc32_64_sw(&crc, &buf, &len);
+		ut_crc32_64_sw(&crc, &buf, &len);
+		ut_crc32_64_sw(&crc, &buf, &len);
+		ut_crc32_64_sw(&crc, &buf, &len);
+		ut_crc32_64_sw(&crc, &buf, &len);
+		ut_crc32_64_sw(&crc, &buf, &len);
+		ut_crc32_64_sw(&crc, &buf, &len);
+		ut_crc32_64_sw(&crc, &buf, &len);
+		ut_crc32_64_sw(&crc, &buf, &len);
+		ut_crc32_64_sw(&crc, &buf, &len);
+		ut_crc32_64_sw(&crc, &buf, &len);
+		ut_crc32_64_sw(&crc, &buf, &len);
+		ut_crc32_64_sw(&crc, &buf, &len);
+		ut_crc32_64_sw(&crc, &buf, &len);
 	}
 
 	while (len >= 8) {
-		ut_crc32_slice8_quadword;
+		ut_crc32_64_sw(&crc, &buf, &len);
 	}
 
-	while (len) {
-		ut_crc32_slice8_byte;
+	while (len > 0) {
+		ut_crc32_8_sw(&crc, &buf, &len);
 	}
 
-	return((ib_uint32_t) ((~crc) & 0xFFFFFFFF));
+	return(~crc);
+}
+
+/** Calculates CRC32 in software, without using CPU instructions.
+This function uses big endian byte ordering when converting byte sequence to
+integers.
+@param[in]	buf	data over which to calculate CRC32
+@param[in]	len	data length
+@return CRC-32C (polynomial 0x11EDC6F41) */
+uint32_t
+ut_crc32_legacy_big_endian_sw(
+	const byte*	buf,
+	ulint		len)
+{
+	uint32_t	crc = 0xFFFFFFFFU;
+
+	ut_a(ut_crc32_slice8_table_initialized);
+
+	/* Calculate byte-by-byte up to an 8-byte aligned address. After
+	this consume the input 8-bytes at a time. */
+	while (len > 0 && (reinterpret_cast<uintptr_t>(buf) & 7) != 0) {
+		ut_crc32_8_sw(&crc, &buf, &len);
+	}
+
+	while (len >= 128) {
+		/* This call is repeated 16 times. 16 * 8 = 128. */
+		ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+		ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+	}
+
+	while (len >= 8) {
+		ut_crc32_64_legacy_big_endian_sw(&crc, &buf, &len);
+	}
+
+	while (len > 0) {
+		ut_crc32_8_sw(&crc, &buf, &len);
+	}
+
+	return(~crc);
+}
+
+/** Calculates CRC32 in software, without using CPU instructions.
+This function processes one byte at a time (very slow) and thus it does
+not depend on the byte order of the machine.
+@param[in]	buf	data over which to calculate CRC32
+@param[in]	len	data length
+@return CRC-32C (polynomial 0x11EDC6F41) */
+uint32_t
+ut_crc32_byte_by_byte_sw(
+	const byte*	buf,
+	ulint		len)
+{
+	uint32_t	crc = 0xFFFFFFFFU;
+
+	ut_a(ut_crc32_slice8_table_initialized);
+
+	while (len > 0) {
+		ut_crc32_8_sw(&crc, &buf, &len);
+	}
+
+	return(~crc);
 }
 
 /********************************************************************//**
-Initializes the data structures used by ut_crc32(). Does not do any
+Initializes the data structures used by ut_crc32*(). Does not do any
 allocations, would not hurt if called twice, but would be pointless. */
-UNIV_INTERN
 void
 ut_crc32_init()
 /*===========*/
 {
 #if defined(__GNUC__) && defined(__x86_64__)
-	ib_uint32_t	vend[3];
-	ib_uint32_t	model;
-	ib_uint32_t	family;
-	ib_uint32_t	stepping;
-	ib_uint32_t	features_ecx;
-	ib_uint32_t	features_edx;
+	uint32_t	vend[3];
+	uint32_t	model;
+	uint32_t	family;
+	uint32_t	stepping;
+	uint32_t	features_ecx;
+	uint32_t	features_edx;
 
 	ut_cpuid(vend, &model, &family, &stepping,
 		 &features_ecx, &features_edx);
@@ -333,17 +737,23 @@ ut_crc32_init()
 	ut_crc32_sse2_enabled = (features_ecx >> 20) & 1;
 #endif /* UNIV_DEBUG_VALGRIND */
 
+	if (ut_crc32_sse2_enabled) {
+		ut_crc32 = ut_crc32_hw;
+		ut_crc32_legacy_big_endian = ut_crc32_legacy_big_endian_hw;
+		ut_crc32_byte_by_byte = ut_crc32_byte_by_byte_hw;
+	}
+
 #endif /* defined(__GNUC__) && defined(__x86_64__) */
 
 #ifdef HAVE_CRC32_VPMSUM
 	ut_crc32_power8_enabled = true;
 	ut_crc32 = ut_crc32_power8;
-#else
-	if (ut_crc32_sse2_enabled) {
-		ut_crc32 = ut_crc32_sse42;
-	} else {
-		ut_crc32_slice8_table_init();
-		ut_crc32 = ut_crc32_slice8;
-	}
 #endif
+
+	if (!ut_crc32_sse2_enabled && !ut_crc32_power8_enabled) {
+		ut_crc32_slice8_table_init();
+		ut_crc32 = ut_crc32_sw;
+		ut_crc32_legacy_big_endian = ut_crc32_legacy_big_endian_sw;
+		ut_crc32_byte_by_byte = ut_crc32_byte_by_byte_sw;
+	}
 }
diff --git a/storage/innobase/ut/ut0dbg.cc b/storage/innobase/ut/ut0dbg.cc
index a1cad144da4..bcf9d5fd1b5 100644
--- a/storage/innobase/ut/ut0dbg.cc
+++ b/storage/innobase/ut/ut0dbg.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -23,21 +23,12 @@ Debug utilities for Innobase.
 Created 1/30/1994 Heikki Tuuri
 **********************************************************************/
 
-#include "univ.i"
-#include "ut0dbg.h"
-#ifndef UNIV_HOTBACKUP
-# include "ha_prototypes.h"
-#endif /* !UNIV_HOTBACKUP */
+#include "ha_prototypes.h"
 
-#if defined(__GNUC__) && (__GNUC__ > 2)
-#else
-/** This is used to eliminate compiler warnings */
-UNIV_INTERN ulint	ut_dbg_zero	= 0;
-#endif
+#include "ut0dbg.h"
 
 /*************************************************************//**
 Report a failed assertion. */
-UNIV_INTERN
 void
 ut_dbg_assertion_failed(
 /*====================*/
@@ -51,8 +42,8 @@ ut_dbg_assertion_failed(
 		file, line);
 #else /* UNIV_HOTBACKUP */
 	fprintf(stderr,
-		"  InnoDB: Assertion failure in thread %lu"
-		" in file %s line %lu\n",
+		"  InnoDB: Assertion failure in thread " ULINTPF
+		" in file %s line " ULINTPF "\n",
 		os_thread_pf(os_thread_get_curr_id()),
 		innobase_basename(file), line);
 #endif /* UNIV_HOTBACKUP */
@@ -70,70 +61,8 @@ ut_dbg_assertion_failed(
 	      "InnoDB: corruption in the InnoDB tablespace. Please refer to\n"
 	      "InnoDB: " REFMAN "forcing-innodb-recovery.html\n"
 	      "InnoDB: about forcing recovery.\n", stderr);
+
+	fflush(stderr);
+	fflush(stdout);
+	abort();
 }
-
-#ifdef UNIV_COMPILE_TEST_FUNCS
-
-#include <sys/types.h>
-#include <sys/time.h>
-#include <sys/resource.h>
-
-#include <unistd.h>
-
-#ifndef timersub
-#define timersub(a, b, r)						\
-	do {								\
-		(r)->tv_sec = (a)->tv_sec - (b)->tv_sec;		\
-		(r)->tv_usec = (a)->tv_usec - (b)->tv_usec;		\
-		if ((r)->tv_usec < 0) {					\
-			(r)->tv_sec--;					\
-			(r)->tv_usec += 1000000;			\
-		}							\
-	} while (0)
-#endif /* timersub */
-
-/*******************************************************************//**
-Resets a speedo (records the current time in it). */
-UNIV_INTERN
-void
-speedo_reset(
-/*=========*/
-	speedo_t*	speedo)	/*!< out: speedo */
-{
-	gettimeofday(&speedo->tv, NULL);
-
-	getrusage(RUSAGE_SELF, &speedo->ru);
-}
-
-/*******************************************************************//**
-Shows the time elapsed and usage statistics since the last reset of a
-speedo. */
-UNIV_INTERN
-void
-speedo_show(
-/*========*/
-	const speedo_t*	speedo)	/*!< in: speedo */
-{
-	struct rusage	ru_now;
-	struct timeval	tv_now;
-	struct timeval	tv_diff;
-
-	getrusage(RUSAGE_SELF, &ru_now);
-
-	gettimeofday(&tv_now, NULL);
-
-#define PRINT_TIMEVAL(prefix, tvp)		\
-	fprintf(stderr, "%s% 5ld.%06ld sec\n",	\
-		prefix, (tvp)->tv_sec, (tvp)->tv_usec)
-
-	timersub(&tv_now, &speedo->tv, &tv_diff);
-	PRINT_TIMEVAL("real", &tv_diff);
-
-	timersub(&ru_now.ru_utime, &speedo->ru.ru_utime, &tv_diff);
-	PRINT_TIMEVAL("user", &tv_diff);
-
-	timersub(&ru_now.ru_stime, &speedo->ru.ru_stime, &tv_diff);
-	PRINT_TIMEVAL("sys ", &tv_diff);
-}
-
-#endif /* UNIV_COMPILE_TEST_FUNCS */
diff --git a/storage/innobase/ut/ut0list.cc b/storage/innobase/ut/ut0list.cc
index f906061d185..f62cbf8232b 100644
--- a/storage/innobase/ut/ut0list.cc
+++ b/storage/innobase/ut/ut0list.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2006, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2006, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -30,15 +30,14 @@ Created 4/26/2006 Osku Salerma
 
 /****************************************************************//**
 Create a new list.
-@return	list */
-UNIV_INTERN
+@return list */
 ib_list_t*
 ib_list_create(void)
 /*=================*/
 {
 	ib_list_t*	list;
 
-	list = static_cast<ib_list_t*>(mem_alloc(sizeof(*list)));
+	list = static_cast<ib_list_t*>(ut_malloc_nokey(sizeof(*list)));
 
 	list->first = NULL;
 	list->last = NULL;
@@ -50,8 +49,7 @@ ib_list_create(void)
 /****************************************************************//**
 Create a new list using the given heap. ib_list_free MUST NOT BE CALLED for
 lists created with this function.
-@return	list */
-UNIV_INTERN
+@return list */
 ib_list_t*
 ib_list_create_heap(
 /*================*/
@@ -70,7 +68,6 @@ ib_list_create_heap(
 
 /****************************************************************//**
 Free a list. */
-UNIV_INTERN
 void
 ib_list_free(
 /*=========*/
@@ -82,13 +79,12 @@ ib_list_free(
 	to e.g. have all the nodes allocated from a single heap that is then
 	freed after the list itself is freed. */
 
-	mem_free(list);
+	ut_free(list);
 }
 
 /****************************************************************//**
 Add the data to the start of the list.
-@return	new list node */
-UNIV_INTERN
+@return new list node */
 ib_list_node_t*
 ib_list_add_first(
 /*==============*/
@@ -101,8 +97,7 @@ ib_list_add_first(
 
 /****************************************************************//**
 Add the data to the end of the list.
-@return	new list node */
-UNIV_INTERN
+@return new list node */
 ib_list_node_t*
 ib_list_add_last(
 /*=============*/
@@ -115,8 +110,7 @@ ib_list_add_last(
 
 /****************************************************************//**
 Add the data after the indicated node.
-@return	new list node */
-UNIV_INTERN
+@return new list node */
 ib_list_node_t*
 ib_list_add_after(
 /*==============*/
@@ -172,7 +166,6 @@ ib_list_add_after(
 
 /****************************************************************//**
 Remove the node from the list. */
-UNIV_INTERN
 void
 ib_list_remove(
 /*===========*/
diff --git a/storage/innobase/ut/ut0mem.cc b/storage/innobase/ut/ut0mem.cc
index 2bb5d9ce332..0ad251be157 100644
--- a/storage/innobase/ut/ut0mem.cc
+++ b/storage/innobase/ut/ut0mem.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -32,337 +32,14 @@ Created 5/11/1994 Heikki Tuuri
 #ifndef UNIV_HOTBACKUP
 # include "os0thread.h"
 # include "srv0srv.h"
-
-#include <stdlib.h>
-
-/** The total amount of memory currently allocated from the operating
-system with os_mem_alloc_large() or malloc().  Does not count malloc()
-if srv_use_sys_malloc is set.  Protected by ut_list_mutex. */
-UNIV_INTERN ulint		ut_total_allocated_memory	= 0;
-
-/** Mutex protecting ut_total_allocated_memory and ut_mem_block_list */
-UNIV_INTERN os_fast_mutex_t	ut_list_mutex;
-
-#ifdef UNIV_PFS_MUTEX
-/* Key to register server_mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t	ut_list_mutex_key;
-#endif
-
-/** Dynamically allocated memory block */
-struct ut_mem_block_t{
-	UT_LIST_NODE_T(ut_mem_block_t) mem_block_list;
-			/*!< mem block list node */
-	ulint	size;	/*!< size of allocated memory */
-	ulint	magic_n;/*!< magic number (UT_MEM_MAGIC_N) */
-};
-
-/** The value of ut_mem_block_t::magic_n.  Used in detecting
-memory corruption. */
-#define UT_MEM_MAGIC_N	1601650166
-
-/** List of all memory blocks allocated from the operating system
-with malloc.  Protected by ut_list_mutex. */
-static UT_LIST_BASE_NODE_T(ut_mem_block_t)   ut_mem_block_list;
-
-/** Flag: has ut_mem_block_list been initialized? */
-static ibool  ut_mem_block_list_inited = FALSE;
-
-/** A dummy pointer for generating a null pointer exception in
-ut_malloc_low() */
-static ulint*	ut_mem_null_ptr	= NULL;
-
-/**********************************************************************//**
-Initializes the mem block list at database startup. */
-UNIV_INTERN
-void
-ut_mem_init(void)
-/*=============*/
-{
-	ut_a(!ut_mem_block_list_inited);
-	os_fast_mutex_init(ut_list_mutex_key, &ut_list_mutex);
-	UT_LIST_INIT(ut_mem_block_list);
-	ut_mem_block_list_inited = TRUE;
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/**********************************************************************//**
-Allocates memory.
-@return	own: allocated memory */
-UNIV_INTERN
-void*
-ut_malloc_low(
-/*==========*/
-	ulint	n,		/*!< in: number of bytes to allocate */
-	ibool	assert_on_error)/*!< in: if TRUE, we crash mysqld if the
-				memory cannot be allocated */
-{
-#ifndef UNIV_HOTBACKUP
-	ulint	retry_count;
-	void*	ret;
-
-	if (UNIV_LIKELY(srv_use_sys_malloc)) {
-		ret = malloc(n);
-		ut_a(ret || !assert_on_error);
-
-		return(ret);
-	}
-
-	ut_ad((sizeof(ut_mem_block_t) % 8) == 0); /* check alignment ok */
-	ut_a(ut_mem_block_list_inited);
-
-	retry_count = 0;
-retry:
-	os_fast_mutex_lock(&ut_list_mutex);
-
-	ret = malloc(n + sizeof(ut_mem_block_t));
-
-	if (ret == NULL && retry_count < 60) {
-		if (retry_count == 0) {
-			ut_print_timestamp(stderr);
-
-			fprintf(stderr,
-				"  InnoDB: Error: cannot allocate"
-				" %lu bytes of\n"
-				"InnoDB: memory with malloc!"
-				" Total allocated memory\n"
-				"InnoDB: by InnoDB %lu bytes."
-				" Operating system errno: %lu\n"
-				"InnoDB: Check if you should"
-				" increase the swap file or\n"
-				"InnoDB: ulimits of your operating system.\n"
-				"InnoDB: On FreeBSD check you"
-				" have compiled the OS with\n"
-				"InnoDB: a big enough maximum process size.\n"
-				"InnoDB: Note that in most 32-bit"
-				" computers the process\n"
-				"InnoDB: memory space is limited"
-				" to 2 GB or 4 GB.\n"
-				"InnoDB: We keep retrying"
-				" the allocation for 60 seconds...\n",
-				(ulong) n, (ulong) ut_total_allocated_memory,
-#ifdef __WIN__
-				(ulong) GetLastError()
-#else
-				(ulong) errno
-#endif
-				);
-		}
-
-		os_fast_mutex_unlock(&ut_list_mutex);
-
-		/* Sleep for a second and retry the allocation; maybe this is
-		just a temporary shortage of memory */
-
-		os_thread_sleep(1000000);
-
-		retry_count++;
-
-		goto retry;
-	}
-
-	if (ret == NULL) {
-		/* Flush stderr to make more probable that the error
-		message gets in the error file before we generate a seg
-		fault */
-
-		fflush(stderr);
-
-		os_fast_mutex_unlock(&ut_list_mutex);
-
-		/* Make an intentional seg fault so that we get a stack
-		trace */
-		if (assert_on_error) {
-			ut_print_timestamp(stderr);
-
-			fprintf(stderr,
-				"  InnoDB: We now intentionally"
-				" generate a seg fault so that\n"
-				"InnoDB: on Linux we get a stack trace.\n");
-
-			if (*ut_mem_null_ptr) ut_mem_null_ptr = 0;
-		} else {
-			return(NULL);
-		}
-	}
-
-	UNIV_MEM_ALLOC(ret, n + sizeof(ut_mem_block_t));
-
-	((ut_mem_block_t*) ret)->size = n + sizeof(ut_mem_block_t);
-	((ut_mem_block_t*) ret)->magic_n = UT_MEM_MAGIC_N;
-
-	ut_total_allocated_memory += n + sizeof(ut_mem_block_t);
-
-	UT_LIST_ADD_FIRST(mem_block_list, ut_mem_block_list,
-			  ((ut_mem_block_t*) ret));
-	os_fast_mutex_unlock(&ut_list_mutex);
-
-	return((void*)((byte*) ret + sizeof(ut_mem_block_t)));
-#else /* !UNIV_HOTBACKUP */
-	void*	ret = malloc(n);
-	ut_a(ret || !assert_on_error);
-
-	return(ret);
-#endif /* !UNIV_HOTBACKUP */
-}
-
-/**********************************************************************//**
-Frees a memory block allocated with ut_malloc. Freeing a NULL pointer is
-a nop. */
-UNIV_INTERN
-void
-ut_free(
-/*====*/
-	void* ptr)  /*!< in, own: memory block, can be NULL */
-{
-#ifndef UNIV_HOTBACKUP
-	ut_mem_block_t* block;
-
-	if (ptr == NULL) {
-		return;
-	} else if (UNIV_LIKELY(srv_use_sys_malloc)) {
-		free(ptr);
-		return;
-	}
-
-	block = (ut_mem_block_t*)((byte*) ptr - sizeof(ut_mem_block_t));
-
-	os_fast_mutex_lock(&ut_list_mutex);
-
-	ut_a(block->magic_n == UT_MEM_MAGIC_N);
-	ut_a(ut_total_allocated_memory >= block->size);
-
-	ut_total_allocated_memory -= block->size;
-
-	UT_LIST_REMOVE(mem_block_list, ut_mem_block_list, block);
-	free(block);
-
-	os_fast_mutex_unlock(&ut_list_mutex);
-#else /* !UNIV_HOTBACKUP */
-	free(ptr);
-#endif /* !UNIV_HOTBACKUP */
-}
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Implements realloc. This is needed by /pars/lexyy.cc. Otherwise, you should not
-use this function because the allocation functions in mem0mem.h are the
-recommended ones in InnoDB.
-
-man realloc in Linux, 2004:
-
-       realloc()  changes the size of the memory block pointed to
-       by ptr to size bytes.  The contents will be  unchanged  to
-       the minimum of the old and new sizes; newly allocated mem-
-       ory will be uninitialized.  If ptr is NULL,  the	 call  is
-       equivalent  to malloc(size); if size is equal to zero, the
-       call is equivalent to free(ptr).	 Unless ptr is	NULL,  it
-       must  have  been	 returned by an earlier call to malloc(),
-       calloc() or realloc().
-
-RETURN VALUE
-       realloc() returns a pointer to the newly allocated memory,
-       which is suitably aligned for any kind of variable and may
-       be different from ptr, or NULL if the  request  fails.  If
-       size  was equal to 0, either NULL or a pointer suitable to
-       be passed to free() is returned.	 If realloc()  fails  the
-       original	 block	is  left  untouched  - it is not freed or
-       moved.
-@return	own: pointer to new mem block or NULL */
-UNIV_INTERN
-void*
-ut_realloc(
-/*=======*/
-	void*	ptr,	/*!< in: pointer to old block or NULL */
-	ulint	size)	/*!< in: desired size */
-{
-	ut_mem_block_t* block;
-	ulint		old_size;
-	ulint		min_size;
-	void*		new_ptr;
-
-	if (UNIV_LIKELY(srv_use_sys_malloc)) {
-		return(realloc(ptr, size));
-	}
-
-	if (ptr == NULL) {
-
-		return(ut_malloc(size));
-	}
-
-	if (size == 0) {
-		ut_free(ptr);
-
-		return(NULL);
-	}
-
-	block = (ut_mem_block_t*)((byte*) ptr - sizeof(ut_mem_block_t));
-
-	ut_a(block->magic_n == UT_MEM_MAGIC_N);
-
-	old_size = block->size - sizeof(ut_mem_block_t);
-
-	if (size < old_size) {
-		min_size = size;
-	} else {
-		min_size = old_size;
-	}
-
-	new_ptr = ut_malloc(size);
-
-	if (new_ptr == NULL) {
-
-		return(NULL);
-	}
-
-	/* Copy the old data from ptr */
-	ut_memcpy(new_ptr, ptr, min_size);
-
-	ut_free(ptr);
-
-	return(new_ptr);
-}
-
-/**********************************************************************//**
-Frees in shutdown all allocated memory not freed yet. */
-UNIV_INTERN
-void
-ut_free_all_mem(void)
-/*=================*/
-{
-	ut_mem_block_t* block;
-
-	ut_a(ut_mem_block_list_inited);
-	ut_mem_block_list_inited = FALSE;
-	os_fast_mutex_free(&ut_list_mutex);
-
-	while ((block = UT_LIST_GET_FIRST(ut_mem_block_list))) {
-
-		ut_a(block->magic_n == UT_MEM_MAGIC_N);
-		ut_a(ut_total_allocated_memory >= block->size);
-
-		ut_total_allocated_memory -= block->size;
-
-		UT_LIST_REMOVE(mem_block_list, ut_mem_block_list, block);
-		free(block);
-	}
-
-	if (ut_total_allocated_memory != 0) {
-		fprintf(stderr,
-			"InnoDB: Warning: after shutdown"
-			" total allocated memory is %lu\n",
-			(ulong) ut_total_allocated_memory);
-	}
-
-	ut_mem_block_list_inited = FALSE;
-}
+# include <stdlib.h>
 #endif /* !UNIV_HOTBACKUP */
 
 /**********************************************************************//**
 Copies up to size - 1 characters from the NUL-terminated string src to
 dst, NUL-terminating the result. Returns strlen(src), so truncation
 occurred if the return value >= size.
-@return	strlen(src) */
-UNIV_INTERN
+@return strlen(src) */
 ulint
 ut_strlcpy(
 /*=======*/
@@ -385,8 +62,7 @@ ut_strlcpy(
 /**********************************************************************//**
 Like ut_strlcpy, but if src doesn't fit in dst completely, copies the last
 (size - 1) bytes of src, not the first.
-@return	strlen(src) */
-UNIV_INTERN
+@return strlen(src) */
 ulint
 ut_strlcpy_rev(
 /*===========*/
@@ -409,8 +85,7 @@ ut_strlcpy_rev(
 /**********************************************************************//**
 Return the number of times s2 occurs in s1. Overlapping instances of s2
 are only counted once.
-@return	the number of times s2 occurs in s1 */
-UNIV_INTERN
+@return the number of times s2 occurs in s1 */
 ulint
 ut_strcount(
 /*========*/
@@ -442,12 +117,11 @@ ut_strcount(
 
 /********************************************************************
 Concatenate 3 strings.*/
-
 char*
 ut_str3cat(
 /*=======*/
 				/* out, own: concatenated string, must be
-				freed with mem_free() */
+				freed with ut_free() */
 	const char*	s1,	/* in: string 1 */
 	const char*	s2,	/* in: string 2 */
 	const char*	s3)	/* in: string 3 */
@@ -457,7 +131,7 @@ ut_str3cat(
 	ulint	s2_len = strlen(s2);
 	ulint	s3_len = strlen(s3);
 
-	s = static_cast<char*>(mem_alloc(s1_len + s2_len + s3_len + 1));
+	s = static_cast<char*>(ut_malloc_nokey(s1_len + s2_len + s3_len + 1));
 
 	memcpy(s, s1, s1_len);
 	memcpy(s + s1_len, s2, s2_len);
@@ -470,8 +144,7 @@ ut_str3cat(
 /**********************************************************************//**
 Replace every occurrence of s1 in str with s2. Overlapping instances of s1
 are only replaced once.
-@return	own: modified string, must be freed with mem_free() */
-UNIV_INTERN
+@return own: modified string, must be freed with ut_free() */
 char*
 ut_strreplace(
 /*==========*/
@@ -497,7 +170,7 @@ ut_strreplace(
 	}
 
 	new_str = static_cast<char*>(
-		mem_alloc(str_len + count * len_delta + 1));
+		ut_malloc_nokey(str_len + count * len_delta + 1));
 
 	ptr = new_str;
 
@@ -527,83 +200,4 @@ ut_strreplace(
 	return(new_str);
 }
 
-#ifdef UNIV_COMPILE_TEST_FUNCS
-
-void
-test_ut_str_sql_format()
-{
-	char	buf[128];
-	ulint	ret;
-
-#define CALL_AND_TEST(str, str_len, buf, buf_size, ret_expected, buf_expected)\
-	do {\
-		ibool	ok = TRUE;\
-		memset(buf, 'x', 10);\
-		buf[10] = '\0';\
-		fprintf(stderr, "TESTING \"%s\", %lu, %lu\n",\
-			str, (ulint) str_len, (ulint) buf_size);\
-		ret = ut_str_sql_format(str, str_len, buf, buf_size);\
-		if (ret != ret_expected) {\
-			fprintf(stderr, "expected ret %lu, got %lu\n",\
-				(ulint) ret_expected, ret);\
-			ok = FALSE;\
-		}\
-		if (strcmp((char*) buf, buf_expected) != 0) {\
-			fprintf(stderr, "expected buf \"%s\", got \"%s\"\n",\
-				buf_expected, buf);\
-			ok = FALSE;\
-		}\
-		if (ok) {\
-			fprintf(stderr, "OK: %lu, \"%s\"\n\n",\
-				(ulint) ret, buf);\
-		} else {\
-			return;\
-		}\
-	} while (0)
-
-	CALL_AND_TEST("abcd", 4, buf, 0, 0, "xxxxxxxxxx");
-
-	CALL_AND_TEST("abcd", 4, buf, 1, 1, "");
-
-	CALL_AND_TEST("abcd", 4, buf, 2, 1, "");
-
-	CALL_AND_TEST("abcd", 0, buf, 3, 3, "''");
-	CALL_AND_TEST("abcd", 1, buf, 3, 1, "");
-	CALL_AND_TEST("abcd", 2, buf, 3, 1, "");
-	CALL_AND_TEST("abcd", 3, buf, 3, 1, "");
-	CALL_AND_TEST("abcd", 4, buf, 3, 1, "");
-
-	CALL_AND_TEST("abcd", 0, buf, 4, 3, "''");
-	CALL_AND_TEST("abcd", 1, buf, 4, 4, "'a'");
-	CALL_AND_TEST("abcd", 2, buf, 4, 4, "'a'");
-	CALL_AND_TEST("abcd", 3, buf, 4, 4, "'a'");
-	CALL_AND_TEST("abcd", 4, buf, 4, 4, "'a'");
-	CALL_AND_TEST("abcde", 5, buf, 4, 4, "'a'");
-	CALL_AND_TEST("'", 1, buf, 4, 3, "''");
-	CALL_AND_TEST("''", 2, buf, 4, 3, "''");
-	CALL_AND_TEST("a'", 2, buf, 4, 4, "'a'");
-	CALL_AND_TEST("'a", 2, buf, 4, 3, "''");
-	CALL_AND_TEST("ab", 2, buf, 4, 4, "'a'");
-
-	CALL_AND_TEST("abcdef", 0, buf, 5, 3, "''");
-	CALL_AND_TEST("abcdef", 1, buf, 5, 4, "'a'");
-	CALL_AND_TEST("abcdef", 2, buf, 5, 5, "'ab'");
-	CALL_AND_TEST("abcdef", 3, buf, 5, 5, "'ab'");
-	CALL_AND_TEST("abcdef", 4, buf, 5, 5, "'ab'");
-	CALL_AND_TEST("abcdef", 5, buf, 5, 5, "'ab'");
-	CALL_AND_TEST("abcdef", 6, buf, 5, 5, "'ab'");
-	CALL_AND_TEST("'", 1, buf, 5, 5, "''''");
-	CALL_AND_TEST("''", 2, buf, 5, 5, "''''");
-	CALL_AND_TEST("a'", 2, buf, 5, 4, "'a'");
-	CALL_AND_TEST("'a", 2, buf, 5, 5, "''''");
-	CALL_AND_TEST("ab", 2, buf, 5, 5, "'ab'");
-	CALL_AND_TEST("abc", 3, buf, 5, 5, "'ab'");
-
-	CALL_AND_TEST("ab", 2, buf, 6, 5, "'ab'");
-
-	CALL_AND_TEST("a'b'c", 5, buf, 32, 10, "'a''b''c'");
-	CALL_AND_TEST("a'b'c'", 6, buf, 32, 12, "'a''b''c'''");
-}
-
-#endif /* UNIV_COMPILE_TEST_FUNCS */
 #endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/ut/ut0new.cc b/storage/innobase/ut/ut0new.cc
new file mode 100644
index 00000000000..c2e3eb813af
--- /dev/null
+++ b/storage/innobase/ut/ut0new.cc
@@ -0,0 +1,227 @@
+/*****************************************************************************
+
+Copyright (c) 2014, 2016, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file ut/ut0new.cc
+Instrumented memory allocator.
+
+Created May 26, 2014 Vasil Dimov
+*******************************************************/
+
+#include "univ.i"
+
+#include "ut0new.h"
+
+/** Maximum number of retries to allocate memory. */
+const size_t	alloc_max_retries = 60;
+
+/** Keys for registering allocations with performance schema.
+Keep this list alphabetically sorted. */
+PSI_memory_key	mem_key_ahi;
+PSI_memory_key	mem_key_buf_buf_pool;
+PSI_memory_key	mem_key_dict_stats_bg_recalc_pool_t;
+PSI_memory_key	mem_key_dict_stats_index_map_t;
+PSI_memory_key	mem_key_dict_stats_n_diff_on_level;
+PSI_memory_key	mem_key_other;
+PSI_memory_key	mem_key_row_log_buf;
+PSI_memory_key	mem_key_row_merge_sort;
+PSI_memory_key	mem_key_std;
+PSI_memory_key	mem_key_trx_sys_t_rw_trx_ids;
+PSI_memory_key	mem_key_partitioning;
+
+#ifdef UNIV_PFS_MEMORY
+
+/** Auxiliary array of performance schema 'PSI_memory_info'.
+Each allocation appears in
+performance_schema.memory_summary_global_by_event_name (and alike) in the form
+of e.g. 'memory/innodb/NAME' where the last component NAME is picked from
+the list below:
+1. If key is specified, then the respective name is used
+2. Without a specified key, allocations from inside std::* containers use
+   mem_key_std
+3. Without a specified key, allocations from outside std::* pick up the key
+   based on the file name, and if file name is not found in the predefined list
+   (in ut_new_boot()) then mem_key_other is used.
+Keep this list alphabetically sorted. */
+static PSI_memory_info	pfs_info[] = {
+	{&mem_key_ahi, "adaptive hash index", 0},
+	{&mem_key_buf_buf_pool, "buf_buf_pool", 0},
+	{&mem_key_dict_stats_bg_recalc_pool_t, "dict_stats_bg_recalc_pool_t", 0},
+	{&mem_key_dict_stats_index_map_t, "dict_stats_index_map_t", 0},
+	{&mem_key_dict_stats_n_diff_on_level, "dict_stats_n_diff_on_level", 0},
+	{&mem_key_other, "other", 0},
+	{&mem_key_row_log_buf, "row_log_buf", 0},
+	{&mem_key_row_merge_sort, "row_merge_sort", 0},
+	{&mem_key_std, "std", 0},
+	{&mem_key_trx_sys_t_rw_trx_ids, "trx_sys_t::rw_trx_ids", 0},
+	{&mem_key_partitioning, "partitioning", 0},
+};
+
+/** Map used for default performance schema keys, based on file name of the
+caller. The key is the file name of the caller and the value is a pointer
+to a PSI_memory_key variable to be passed to performance schema methods.
+We use ut_strcmp_functor because by default std::map will compare the pointers
+themselves (cont char*) and not do strcmp(). */
+typedef std::map<const char*, PSI_memory_key*, ut_strcmp_functor>
+	mem_keys_auto_t;
+
+/** Map of filename/pfskey, used for tracing allocations that have not
+provided a manually created pfs key. This map is only ever modified (bulk
+insert) at startup in a single-threaded environment by ut_new_boot().
+Later it is only read (only std::map::find() is called) from multithreaded
+environment, thus it is not protected by any latch. */
+static mem_keys_auto_t	mem_keys_auto;
+
+#endif /* UNIV_PFS_MEMORY */
+
+/** Setup the internal objects needed for UT_NEW() to operate.
+This must be called before the first call to UT_NEW(). */
+void
+ut_new_boot()
+{
+#ifdef UNIV_PFS_MEMORY
+	static const char*	auto_event_names[] = {
+		/* Keep this list alphabetically sorted. */
+		"api0api",
+		"btr0btr",
+		"btr0bulk",
+		"btr0cur",
+		"btr0pcur",
+		"btr0sea",
+		"buf0buf",
+		"buf0dblwr",
+		"buf0dump",
+		"buf0flu",
+		"buf0lru",
+		"dict0dict",
+		"dict0mem",
+		"dict0stats",
+		"dict0stats_bg",
+		"eval0eval",
+		"fil0fil",
+		"fsp0file",
+		"fsp0space",
+		"fsp0sysspace",
+		"fts0ast",
+		"fts0config",
+		"fts0fts",
+		"fts0opt",
+		"fts0pars",
+		"fts0que",
+		"fts0sql",
+		"gis0sea",
+		"ha0ha",
+		"ha_innodb",
+		"handler0alter",
+		"hash0hash",
+		"i_s",
+		"ibuf0ibuf",
+		"lexyy",
+		"lock0lock",
+		"log0log",
+		"log0recv",
+		"mem0mem",
+		"os0event",
+		"os0file",
+		"page0cur",
+		"page0zip",
+		"pars0lex",
+		"read0read",
+		"rem0rec",
+		"row0ftsort",
+		"row0import",
+		"row0log",
+		"row0merge",
+		"row0mysql",
+		"row0sel",
+		"row0trunc",
+		"srv0conc",
+		"srv0srv",
+		"srv0start",
+		"sync0arr",
+		"sync0debug",
+		"sync0rw",
+		"sync0types",
+		"trx0i_s",
+		"trx0purge",
+		"trx0roll",
+		"trx0rseg",
+		"trx0sys",
+		"trx0trx",
+		"trx0undo",
+		"usr0sess",
+		"ut0list",
+		"ut0mem",
+		"ut0mutex",
+		"ut0pool",
+		"ut0rbt",
+		"ut0wqueue",
+	};
+	static const size_t	n_auto = UT_ARR_SIZE(auto_event_names);
+	static PSI_memory_key	auto_event_keys[n_auto];
+	static PSI_memory_info	pfs_info_auto[n_auto];
+
+	for (size_t i = 0; i < n_auto; i++) {
+
+		const std::pair<mem_keys_auto_t::iterator, bool>	ret
+			MY_ATTRIBUTE((unused))
+			= mem_keys_auto.insert(
+			mem_keys_auto_t::value_type(auto_event_names[i],
+						    &auto_event_keys[i]));
+
+		/* ret.second is true if new element has been inserted */
+		ut_a(ret.second);
+
+		/* e.g. "btr0btr" */
+		pfs_info_auto[i].m_name = auto_event_names[i];
+
+		/* a pointer to the pfs key */
+		pfs_info_auto[i].m_key = &auto_event_keys[i];
+
+		pfs_info_auto[i].m_flags = 0;
+	}
+
+	PSI_MEMORY_CALL(register_memory)("innodb",
+					 pfs_info,
+					 UT_ARR_SIZE(pfs_info));
+	PSI_MEMORY_CALL(register_memory)("innodb",
+					 pfs_info_auto,
+					 n_auto);
+#endif /* UNIV_PFS_MEMORY */
+}
+
+#ifdef UNIV_PFS_MEMORY
+
+/** Retrieve a memory key (registered with PFS), given a portion of the file
+name of the caller.
+@param[in]	file	portion of the filename - basename without an extension
+@return registered memory key or PSI_NOT_INSTRUMENTED if not found */
+PSI_memory_key
+ut_new_get_key_by_file(
+	const char*	file)
+{
+	mem_keys_auto_t::const_iterator	el = mem_keys_auto.find(file);
+
+	if (el != mem_keys_auto.end()) {
+		return(*(el->second));
+	}
+
+	return(PSI_NOT_INSTRUMENTED);
+}
+
+#endif /* UNIV_PFS_MEMORY */
diff --git a/storage/innobase/ut/ut0rbt.cc b/storage/innobase/ut/ut0rbt.cc
index e93844af600..8345c2c7593 100644
--- a/storage/innobase/ut/ut0rbt.cc
+++ b/storage/innobase/ut/ut0rbt.cc
@@ -1,6 +1,6 @@
 /***************************************************************************//**
 
-Copyright (c) 2007, 2010, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -23,6 +23,9 @@ Red-Black tree implementation
 Created 2007-03-20 Sunny Bains
 ***********************************************************************/
 
+#include "univ.i"
+
+#include "ut0new.h"
 #include "ut0rbt.h"
 
 /**********************************************************************//**
@@ -71,7 +74,7 @@ rbt_print_subtree(
 
 /**********************************************************************//**
 Verify that the keys are in order.
-@return	TRUE of OK. FALSE if not ordered */
+@return TRUE of OK. FALSE if not ordered */
 static
 ibool
 rbt_check_ordering(
@@ -110,7 +113,7 @@ rbt_check_ordering(
 /**********************************************************************//**
 Check that every path from the root to the leaves has the same count.
 Count is expressed in the number of black nodes.
-@return	0 on failure else black height of the subtree */
+@return 0 on failure else black height of the subtree */
 static
 ibool
 rbt_count_black_nodes(
@@ -388,7 +391,7 @@ rbt_balance_tree(
 
 /**********************************************************************//**
 Find the given node's successor.
-@return	successor node or NULL if no successor */
+@return successor node or NULL if no successor */
 static
 ib_rbt_node_t*
 rbt_find_successor(
@@ -428,7 +431,7 @@ rbt_find_successor(
 
 /**********************************************************************//**
 Find the given node's precedecessor.
-@return	predecessor node or NULL if no predecesor */
+@return predecessor node or NULL if no predecesor */
 static
 ib_rbt_node_t*
 rbt_find_predecessor(
@@ -519,7 +522,7 @@ rbt_replace_node(
 
 /**********************************************************************//**
 Detach node from the tree replacing it with one of it's children.
-@return	the child node that now occupies the position of the detached node */
+@return the child node that now occupies the position of the detached node */
 static
 ib_rbt_node_t*
 rbt_detach_node(
@@ -562,7 +565,7 @@ rbt_detach_node(
 
 /**********************************************************************//**
 Rebalance the right sub-tree after deletion.
-@return	node to rebalance if more rebalancing required else NULL */
+@return node to rebalance if more rebalancing required else NULL */
 static
 ib_rbt_node_t*
 rbt_balance_right(
@@ -622,7 +625,7 @@ rbt_balance_right(
 
 /**********************************************************************//**
 Rebalance the left sub-tree after deletion.
-@return	node to rebalance if more rebalancing required else NULL */
+@return node to rebalance if more rebalancing required else NULL */
 static
 ib_rbt_node_t*
 rbt_balance_left(
@@ -751,7 +754,6 @@ rbt_free_node(
 
 /**********************************************************************//**
 Free all the nodes and free the tree. */
-UNIV_INTERN
 void
 rbt_free(
 /*=====*/
@@ -765,8 +767,7 @@ rbt_free(
 /**********************************************************************//**
 Create an instance of a red black tree, whose comparison function takes
 an argument
-@return	an empty rb tree */
-UNIV_INTERN
+@return an empty rb tree */
 ib_rbt_t*
 rbt_create_arg_cmp(
 /*===============*/
@@ -788,8 +789,7 @@ rbt_create_arg_cmp(
 
 /**********************************************************************//**
 Create an instance of a red black tree.
-@return	an empty rb tree */
-UNIV_INTERN
+@return an empty rb tree */
 ib_rbt_t*
 rbt_create(
 /*=======*/
@@ -799,22 +799,19 @@ rbt_create(
 	ib_rbt_t*	tree;
 	ib_rbt_node_t*	node;
 
-	tree = (ib_rbt_t*) ut_malloc(sizeof(*tree));
-	memset(tree, 0, sizeof(*tree));
+	tree = (ib_rbt_t*) ut_zalloc_nokey(sizeof(*tree));
 
 	tree->sizeof_value = sizeof_value;
 
 	/* Create the sentinel (NIL) node. */
-	node = tree->nil = (ib_rbt_node_t*) ut_malloc(sizeof(*node));
-	memset(node, 0, sizeof(*node));
+	node = tree->nil = (ib_rbt_node_t*) ut_zalloc_nokey(sizeof(*node));
 
 	node->color = IB_RBT_BLACK;
 	node->parent = node->left = node->right = node;
 
 	/* Create the "fake" root, the real root node will be the
 	left child of this node. */
-	node = tree->root = (ib_rbt_node_t*) ut_malloc(sizeof(*node));
-	memset(node, 0, sizeof(*node));
+	node = tree->root = (ib_rbt_node_t*) ut_zalloc_nokey(sizeof(*node));
 
 	node->color = IB_RBT_BLACK;
 	node->parent = node->left = node->right = tree->nil;
@@ -826,8 +823,7 @@ rbt_create(
 
 /**********************************************************************//**
 Generic insert of a value in the rb tree.
-@return	inserted node */
-UNIV_INTERN
+@return inserted node */
 const ib_rbt_node_t*
 rbt_insert(
 /*=======*/
@@ -839,7 +835,7 @@ rbt_insert(
 	ib_rbt_node_t*	node;
 
 	/* Create the node that will hold the value data. */
-	node = (ib_rbt_node_t*) ut_malloc(SIZEOF_NODE(tree));
+	node = (ib_rbt_node_t*) ut_malloc_nokey(SIZEOF_NODE(tree));
 
 	memcpy(node->value, value, tree->sizeof_value);
 	node->parent = node->left = node->right = tree->nil;
@@ -855,8 +851,7 @@ rbt_insert(
 
 /**********************************************************************//**
 Add a new node to the tree, useful for data that is pre-sorted.
-@return	appended node */
-UNIV_INTERN
+@return appended node */
 const ib_rbt_node_t*
 rbt_add_node(
 /*=========*/
@@ -868,7 +863,7 @@ rbt_add_node(
 	ib_rbt_node_t*	node;
 
 	/* Create the node that will hold the value data */
-	node = (ib_rbt_node_t*) ut_malloc(SIZEOF_NODE(tree));
+	node = (ib_rbt_node_t*) ut_malloc_nokey(SIZEOF_NODE(tree));
 
 	memcpy(node->value, value, tree->sizeof_value);
 	node->parent = node->left = node->right = tree->nil;
@@ -893,8 +888,7 @@ rbt_add_node(
 
 /**********************************************************************//**
 Find a matching node in the rb tree.
-@return	NULL if not found else the node where key was found */
-UNIV_INTERN
+@return NULL if not found else the node where key was found */
 const ib_rbt_node_t*
 rbt_lookup(
 /*=======*/
@@ -928,8 +922,7 @@ rbt_lookup(
 
 /**********************************************************************//**
 Delete a node indentified by key.
-@return	TRUE if success FALSE if not found */
-UNIV_INTERN
+@return TRUE if success FALSE if not found */
 ibool
 rbt_delete(
 /*=======*/
@@ -952,8 +945,7 @@ rbt_delete(
 /**********************************************************************//**
 Remove a node from the rb tree, the node is not free'd, that is the
 callers responsibility.
-@return	deleted node but without the const */
-UNIV_INTERN
+@return deleted node but without the const */
 ib_rbt_node_t*
 rbt_remove_node(
 /*============*/
@@ -975,8 +967,7 @@ rbt_remove_node(
 
 /**********************************************************************//**
 Find the node that has the lowest key that is >= key.
-@return	node satisfying the lower bound constraint or NULL */
-UNIV_INTERN
+@return node satisfying the lower bound constraint or NULL */
 const ib_rbt_node_t*
 rbt_lower_bound(
 /*============*/
@@ -1016,8 +1007,7 @@ rbt_lower_bound(
 
 /**********************************************************************//**
 Find the node that has the greatest key that is <= key.
-@return	node satisfying the upper bound constraint or NULL */
-UNIV_INTERN
+@return node satisfying the upper bound constraint or NULL */
 const ib_rbt_node_t*
 rbt_upper_bound(
 /*============*/
@@ -1057,8 +1047,7 @@ rbt_upper_bound(
 
 /**********************************************************************//**
 Find the node that has the greatest key that is <= key.
-@return	value of result */
-UNIV_INTERN
+@return value of result */
 int
 rbt_search(
 /*=======*/
@@ -1098,8 +1087,7 @@ rbt_search(
 /**********************************************************************//**
 Find the node that has the greatest key that is <= key. But use the
 supplied comparison function.
-@return	value of result */
-UNIV_INTERN
+@return value of result */
 int
 rbt_search_cmp(
 /*===========*/
@@ -1143,7 +1131,6 @@ rbt_search_cmp(
 
 /**********************************************************************//**
 Return the left most node in the tree. */
-UNIV_INTERN
 const ib_rbt_node_t*
 rbt_first(
 /*======*/
@@ -1163,8 +1150,7 @@ rbt_first(
 
 /**********************************************************************//**
 Return the right most node in the tree.
-@return	the rightmost node or NULL */
-UNIV_INTERN
+@return the rightmost node or NULL */
 const ib_rbt_node_t*
 rbt_last(
 /*=====*/
@@ -1183,8 +1169,7 @@ rbt_last(
 
 /**********************************************************************//**
 Return the next node.
-@return	node next from current */
-UNIV_INTERN
+@return node next from current */
 const ib_rbt_node_t*
 rbt_next(
 /*=====*/
@@ -1196,8 +1181,7 @@ rbt_next(
 
 /**********************************************************************//**
 Return the previous node.
-@return	node prev from current */
-UNIV_INTERN
+@return node prev from current */
 const ib_rbt_node_t*
 rbt_prev(
 /*=====*/
@@ -1209,7 +1193,6 @@ rbt_prev(
 
 /**********************************************************************//**
 Reset the tree. Delete all the nodes. */
-UNIV_INTERN
 void
 rbt_clear(
 /*======*/
@@ -1223,8 +1206,7 @@ rbt_clear(
 
 /**********************************************************************//**
 Merge the node from dst into src. Return the number of nodes merged.
-@return	no. of recs merged */
-UNIV_INTERN
+@return no. of recs merged */
 ulint
 rbt_merge_uniq(
 /*===========*/
@@ -1254,8 +1236,7 @@ rbt_merge_uniq(
 Merge the node from dst into src. Return the number of nodes merged.
 Delete the nodes from src after copying node to dst. As a side effect
 the duplicates will be left untouched in the src.
-@return	no. of recs merged */
-UNIV_INTERN
+@return no. of recs merged */
 ulint
 rbt_merge_uniq_destructive(
 /*=======================*/
@@ -1301,8 +1282,7 @@ rbt_merge_uniq_destructive(
 /**********************************************************************//**
 Check that every path from the root to the leaves has the same count and
 the tree nodes are in order.
-@return	TRUE if OK FALSE otherwise */
-UNIV_INTERN
+@return TRUE if OK FALSE otherwise */
 ibool
 rbt_validate(
 /*=========*/
@@ -1317,7 +1297,6 @@ rbt_validate(
 
 /**********************************************************************//**
 Iterate over the tree in depth first order. */
-UNIV_INTERN
 void
 rbt_print(
 /*======*/
diff --git a/storage/innobase/ut/ut0rnd.cc b/storage/innobase/ut/ut0rnd.cc
index 3b4d7381181..2d2d8977ad7 100644
--- a/storage/innobase/ut/ut0rnd.cc
+++ b/storage/innobase/ut/ut0rnd.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -37,13 +37,12 @@ Created 5/11/1994 Heikki Tuuri
 /*@}*/
 
 /** Seed value of ut_rnd_gen_ulint(). */
-UNIV_INTERN ulint	ut_rnd_ulint_counter = 65654363;
+ulint	ut_rnd_ulint_counter = 65654363;
 
 /***********************************************************//**
 Looks for a prime number slightly greater than the given argument.
 The prime is chosen so that it is not near any power of 2.
-@return	prime */
-UNIV_INTERN
+@return prime */
 ulint
 ut_find_prime(
 /*==========*/
diff --git a/storage/innobase/ut/ut0ut.cc b/storage/innobase/ut/ut0ut.cc
index bde40220db3..81e3fb1ab90 100644
--- a/storage/innobase/ut/ut0ut.cc
+++ b/storage/innobase/ut/ut0ut.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -23,39 +23,45 @@ Various utilities for Innobase.
 Created 5/11/1994 Heikki Tuuri
 ********************************************************************/
 
-#include "ut0ut.h"
+#include "ha_prototypes.h"
+
+#if HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif
 
 #ifndef UNIV_INNOCHECKSUM
 
-#include "ut0sort.h"
-#include "os0thread.h" /* thread-ID */
+#ifndef UNIV_HOTBACKUP
+# include <mysql_com.h>
+#endif /* !UNIV_HOTBACKUP */
+
+#include "os0thread.h"
+#include "ut0ut.h"
 
 #ifdef UNIV_NONINL
 #include "ut0ut.ic"
 #endif
 
-#include <stdarg.h>
-#include <string.h>
-#include <ctype.h>
-
 #ifndef UNIV_HOTBACKUP
 # include "trx0trx.h"
-# include "ha_prototypes.h"
-# include "mysql_com.h" /* NAME_LEN */
-# include <string>
-#endif /* UNIV_HOTBACKUP */
+#endif /* !UNIV_HOTBACKUP */
 
-#ifdef __WIN__
+# include <string>
+#include "log.h"
+
+/** A constant to prevent the compiler from optimizing ut_delay() away. */
+ibool	ut_always_false	= FALSE;
+#ifdef _WIN32
 /*****************************************************************//**
 NOTE: The Windows epoch starts from 1601/01/01 whereas the Unix
 epoch starts from 1970/1/1. For selection of constant see:
 http://support.microsoft.com/kb/167296/ */
-#define WIN_TO_UNIX_DELTA_USEC  ((ib_int64_t) 11644473600000000ULL)
+#define WIN_TO_UNIX_DELTA_USEC	11644473600000000LL
 
 
 /*****************************************************************//**
 This is the Windows version of gettimeofday(2).
-@return	0 if all OK else -1 */
+@return 0 if all OK else -1 */
 static
 int
 ut_gettimeofday(
@@ -64,7 +70,7 @@ ut_gettimeofday(
 	void*		tz)	/*!< in: not used */
 {
 	FILETIME	ft;
-	ib_int64_t	tm;
+	int64_t		tm;
 
 	if (!tv) {
 		errno = EINVAL;
@@ -73,7 +79,7 @@ ut_gettimeofday(
 
 	GetSystemTimeAsFileTime(&ft);
 
-	tm = (ib_int64_t) ft.dwHighDateTime << 32;
+	tm = (int64_t) ft.dwHighDateTime << 32;
 	tm |= ft.dwLowDateTime;
 
 	ut_a(tm >= 0);	/* If tm wraps over to negative, the quotient / 10
@@ -99,8 +105,7 @@ reimplement this function. */
 /**********************************************************//**
 Returns system time. We do not specify the format of the time returned:
 the only way to manipulate it is to use the function ut_difftime.
-@return	system time */
-UNIV_INTERN
+@return system time */
 ib_time_t
 ut_time(void)
 /*=========*/
@@ -114,8 +119,7 @@ Returns system time.
 Upon successful completion, the value 0 is returned; otherwise the
 value -1 is returned and the global variable errno is set to indicate the
 error.
-@return	0 on success, -1 otherwise */
-UNIV_INTERN
+@return 0 on success, -1 otherwise */
 int
 ut_usectime(
 /*========*/
@@ -133,9 +137,8 @@ ut_usectime(
 
 		if (ret == -1) {
 			errno_gettimeofday = errno;
-			ut_print_timestamp(stderr);
-			fprintf(stderr, "  InnoDB: gettimeofday(): %s\n",
-				strerror(errno_gettimeofday));
+			ib::error() << "gettimeofday(): "
+				<< strerror(errno_gettimeofday);
 			os_thread_sleep(100000);  /* 0.1 sec */
 			errno = errno_gettimeofday;
 		} else {
@@ -155,19 +158,18 @@ ut_usectime(
 Returns the number of microseconds since epoch. Similar to
 time(3), the return value is also stored in *tloc, provided
 that tloc is non-NULL.
-@return	us since epoch */
-UNIV_INTERN
-ullint
+@return us since epoch */
+uintmax_t
 ut_time_us(
 /*=======*/
-	ullint*	tloc)	/*!< out: us since epoch, if non-NULL */
+	uintmax_t*	tloc)	/*!< out: us since epoch, if non-NULL */
 {
 	struct timeval	tv;
-	ullint		us;
+	uintmax_t	us;
 
 	ut_gettimeofday(&tv, NULL);
 
-	us = (ullint) tv.tv_sec * 1000000 + tv.tv_usec;
+	us = static_cast<uintmax_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
 
 	if (tloc != NULL) {
 		*tloc = us;
@@ -180,8 +182,7 @@ ut_time_us(
 Returns the number of milliseconds since some epoch.  The
 value may wrap around.  It should only be used for heuristic
 purposes.
-@return	ms since epoch */
-UNIV_INTERN
+@return ms since epoch */
 ulint
 ut_time_ms(void)
 /*============*/
@@ -196,8 +197,7 @@ ut_time_ms(void)
 
 /**********************************************************//**
 Returns the difference of two times in seconds.
-@return	time2 - time1 expressed in seconds */
-UNIV_INTERN
+@return time2 - time1 expressed in seconds */
 double
 ut_difftime(
 /*========*/
@@ -211,7 +211,6 @@ ut_difftime(
 
 /**********************************************************//**
 Prints a timestamp to a file. */
-UNIV_INTERN
 void
 ut_print_timestamp(
 /*===============*/
@@ -221,35 +220,30 @@ ut_print_timestamp(
 
 #ifndef UNIV_INNOCHECKSUM
 	thread_id = os_thread_pf(os_thread_get_curr_id());
-#endif
+#endif /* !UNIV_INNOCHECKSUM */
 
-#ifdef __WIN__
+#ifdef _WIN32
 	SYSTEMTIME cal_tm;
 
 	GetLocalTime(&cal_tm);
 
-	fprintf(file, "%d-%02d-%02d %02d:%02d:%02d %lx",
+	fprintf(file, "%d-%02d-%02d %02d:%02d:%02d %#llx",
 		(int) cal_tm.wYear,
 		(int) cal_tm.wMonth,
 		(int) cal_tm.wDay,
 		(int) cal_tm.wHour,
 		(int) cal_tm.wMinute,
 		(int) cal_tm.wSecond,
-		thread_id);
+		static_cast<ulonglong>(thread_id));
 #else
 	struct tm* cal_tm_ptr;
 	time_t	   tm;
 
-#ifdef HAVE_LOCALTIME_R
 	struct tm  cal_tm;
 	time(&tm);
 	localtime_r(&tm, &cal_tm);
 	cal_tm_ptr = &cal_tm;
-#else
-	time(&tm);
-	cal_tm_ptr = localtime(&tm);
-#endif
-	fprintf(file, "%d-%02d-%02d %02d:%02d:%02d %lx",
+	fprintf(file, "%d-%02d-%02d %02d:%02d:%02d %#lx",
 		cal_tm_ptr->tm_year + 1900,
 		cal_tm_ptr->tm_mon + 1,
 		cal_tm_ptr->tm_mday,
@@ -264,13 +258,12 @@ ut_print_timestamp(
 
 /**********************************************************//**
 Sprintfs a timestamp to a buffer, 13..14 chars plus terminating NUL. */
-UNIV_INTERN
 void
 ut_sprintf_timestamp(
 /*=================*/
 	char*	buf) /*!< in: buffer where to sprintf */
 {
-#ifdef __WIN__
+#ifdef _WIN32
 	SYSTEMTIME cal_tm;
 
 	GetLocalTime(&cal_tm);
@@ -286,15 +279,10 @@ ut_sprintf_timestamp(
 	struct tm* cal_tm_ptr;
 	time_t	   tm;
 
-#ifdef HAVE_LOCALTIME_R
 	struct tm  cal_tm;
 	time(&tm);
 	localtime_r(&tm, &cal_tm);
 	cal_tm_ptr = &cal_tm;
-#else
-	time(&tm);
-	cal_tm_ptr = localtime(&tm);
-#endif
 	sprintf(buf, "%02d%02d%02d %2d:%02d:%02d",
 		cal_tm_ptr->tm_year % 100,
 		cal_tm_ptr->tm_mon + 1,
@@ -309,13 +297,12 @@ ut_sprintf_timestamp(
 /**********************************************************//**
 Sprintfs a timestamp to a buffer with no spaces and with ':' characters
 replaced by '_'. */
-UNIV_INTERN
 void
 ut_sprintf_timestamp_without_extra_chars(
 /*=====================================*/
 	char*	buf) /*!< in: buffer where to sprintf */
 {
-#ifdef __WIN__
+#ifdef _WIN32
 	SYSTEMTIME cal_tm;
 
 	GetLocalTime(&cal_tm);
@@ -331,15 +318,10 @@ ut_sprintf_timestamp_without_extra_chars(
 	struct tm* cal_tm_ptr;
 	time_t	   tm;
 
-#ifdef HAVE_LOCALTIME_R
 	struct tm  cal_tm;
 	time(&tm);
 	localtime_r(&tm, &cal_tm);
 	cal_tm_ptr = &cal_tm;
-#else
-	time(&tm);
-	cal_tm_ptr = localtime(&tm);
-#endif
 	sprintf(buf, "%02d%02d%02d_%2d_%02d_%02d",
 		cal_tm_ptr->tm_year % 100,
 		cal_tm_ptr->tm_mon + 1,
@@ -352,7 +334,6 @@ ut_sprintf_timestamp_without_extra_chars(
 
 /**********************************************************//**
 Returns current year, month, day. */
-UNIV_INTERN
 void
 ut_get_year_month_day(
 /*==================*/
@@ -360,7 +341,7 @@ ut_get_year_month_day(
 	ulint*	month,	/*!< out: month */
 	ulint*	day)	/*!< out: day */
 {
-#ifdef __WIN__
+#ifdef _WIN32
 	SYSTEMTIME cal_tm;
 
 	GetLocalTime(&cal_tm);
@@ -372,49 +353,48 @@ ut_get_year_month_day(
 	struct tm* cal_tm_ptr;
 	time_t	   tm;
 
-#ifdef HAVE_LOCALTIME_R
 	struct tm  cal_tm;
 	time(&tm);
 	localtime_r(&tm, &cal_tm);
 	cal_tm_ptr = &cal_tm;
-#else
-	time(&tm);
-	cal_tm_ptr = localtime(&tm);
-#endif
 	*year = (ulint) cal_tm_ptr->tm_year + 1900;
 	*month = (ulint) cal_tm_ptr->tm_mon + 1;
 	*day = (ulint) cal_tm_ptr->tm_mday;
 #endif
 }
-#endif /* UNIV_HOTBACKUP */
 
-#ifndef UNIV_HOTBACKUP
+#else /* UNIV_HOTBACKUP */
+
 /*************************************************************//**
 Runs an idle loop on CPU. The argument gives the desired delay
 in microseconds on 100 MHz Pentium + Visual C++.
-@return	dummy value */
-UNIV_INTERN
-void
+@return dummy value */
+ulint
 ut_delay(
 /*=====*/
 	ulint	delay)	/*!< in: delay in microseconds on 100 MHz Pentium */
 {
-	ulint	i;
+	ulint	i, j;
 
 	UT_LOW_PRIORITY_CPU();
 
+	j = 0;
+
 	for (i = 0; i < delay * 50; i++) {
+		j += i;
 		UT_RELAX_CPU();
 		UT_COMPILER_BARRIER();
 	}
 
 	UT_RESUME_PRIORITY_CPU();
+	UT_RESUME_PRIORITY_CPU();
+
+	return(j);
 }
-#endif /* !UNIV_HOTBACKUP */
+#endif /* UNIV_HOTBACKUP */
 
 /*************************************************************//**
 Prints the contents of a memory buffer in hex and ascii. */
-UNIV_INTERN
 void
 ut_print_buf(
 /*=========*/
@@ -427,10 +407,10 @@ ut_print_buf(
 
 	UNIV_MEM_ASSERT_RW(buf, len);
 
-	fprintf(file, " len %lu; hex ", len);
+	fprintf(file, " len " ULINTPF "; hex ", len);
 
 	for (data = (const byte*) buf, i = 0; i < len; i++) {
-		fprintf(file, "%02lx", (ulong)*data++);
+		fprintf(file, "%02lx", static_cast<ulong>(*data++));
 	}
 
 	fputs("; asc ", file);
@@ -445,25 +425,59 @@ ut_print_buf(
 	putc(';', file);
 }
 
-/**********************************************************************//**
-Sort function for ulint arrays. */
-UNIV_INTERN
+/*************************************************************//**
+Prints the contents of a memory buffer in hex. */
 void
-ut_ulint_sort(
-/*==========*/
-	ulint*	arr,		/*!< in/out: array to sort */
-	ulint*	aux_arr,	/*!< in/out: aux array to use in sort */
-	ulint	low,		/*!< in: lower bound */
-	ulint	high)		/*!< in: upper bound */
+ut_print_buf_hex(
+/*=============*/
+	std::ostream&	o,	/*!< in/out: output stream */
+	const void*	buf,	/*!< in: memory buffer */
+	ulint		len)	/*!< in: length of the buffer */
 {
-	UT_SORT_FUNCTION_BODY(ut_ulint_sort, arr, aux_arr, low, high,
-			      ut_ulint_cmp);
+	const byte*		data;
+	ulint			i;
+
+	static const char	hexdigit[16] = {
+		'0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F'
+	};
+
+	UNIV_MEM_ASSERT_RW(buf, len);
+
+	o << "(0x";
+
+	for (data = static_cast<const byte*>(buf), i = 0; i < len; i++) {
+		byte	b = *data++;
+		o << hexdigit[(int) b >> 16] << hexdigit[b & 15];
+	}
+
+	o << ")";
+}
+
+/*************************************************************//**
+Prints the contents of a memory buffer in hex and ascii. */
+void
+ut_print_buf(
+/*=========*/
+	std::ostream&	o,	/*!< in/out: output stream */
+	const void*	buf,	/*!< in: memory buffer */
+	ulint		len)	/*!< in: length of the buffer */
+{
+	const byte*	data;
+	ulint		i;
+
+	UNIV_MEM_ASSERT_RW(buf, len);
+
+	for (data = static_cast<const byte*>(buf), i = 0; i < len; i++) {
+		int	c = static_cast<int>(*data++);
+		o << (isprint(c) ? static_cast<char>(c) : ' ');
+	}
+
+	ut_print_buf_hex(o, buf, len);
 }
 
 /*************************************************************//**
 Calculates fast the number rounded up to the nearest power of 2.
-@return	first power of 2 which is >= n */
-UNIV_INTERN
+@return first power of 2 which is >= n */
 ulint
 ut_2_power_up(
 /*==========*/
@@ -482,65 +496,43 @@ ut_2_power_up(
 	return(res);
 }
 
-/**********************************************************************//**
-Outputs a NUL-terminated file name, quoted with apostrophes. */
-UNIV_INTERN
-void
-ut_print_filename(
-/*==============*/
-	FILE*		f,	/*!< in: output stream */
-	const char*	name)	/*!< in: name to print */
-{
-	putc('\'', f);
-	for (;;) {
-		int	c = *name++;
-		switch (c) {
-		case 0:
-			goto done;
-		case '\'':
-			putc(c, f);
-			/* fall through */
-		default:
-			putc(c, f);
-		}
-	}
-done:
-	putc('\'', f);
-}
 #ifndef UNIV_HOTBACKUP
+/** Get a fixed-length string, quoted as an SQL identifier.
+If the string contains a slash '/', the string will be
+output as two identifiers separated by a period (.),
+as in SQL database_name.identifier.
+ @param		[in]	trx		transaction (NULL=no quotes).
+ @param		[in]	name		table name.
+ @retval	String quoted as an SQL identifier.
+*/
+std::string
+ut_get_name(
+	const trx_t*	trx,
+	const char*	name)
+{
+	/* 2 * NAME_LEN for database and table name,
+	and some slack for the #mysql50# prefix and quotes */
+	char		buf[3 * NAME_LEN];
+	const char*	bufend;
+
+	bufend = innobase_convert_name(buf, sizeof buf,
+				       name, strlen(name),
+				       trx ? trx->mysql_thd : NULL);
+	buf[bufend - buf] = '\0';
+	return(std::string(buf, 0, bufend - buf));
+}
+
 /**********************************************************************//**
 Outputs a fixed-length string, quoted as an SQL identifier.
 If the string contains a slash '/', the string will be
 output as two identifiers separated by a period (.),
 as in SQL database_name.identifier. */
-UNIV_INTERN
 void
 ut_print_name(
 /*==========*/
 	FILE*		f,	/*!< in: output stream */
 	const trx_t*	trx,	/*!< in: transaction */
-	ibool		table_id,/*!< in: TRUE=print a table name,
-				FALSE=print other identifier */
 	const char*	name)	/*!< in: name to print */
-{
-	ut_print_namel(f, trx, table_id, name, strlen(name));
-}
-
-/**********************************************************************//**
-Outputs a fixed-length string, quoted as an SQL identifier.
-If the string contains a slash '/', the string will be
-output as two identifiers separated by a period (.),
-as in SQL database_name.identifier. */
-UNIV_INTERN
-void
-ut_print_namel(
-/*===========*/
-	FILE*		f,	/*!< in: output stream */
-	const trx_t*	trx,	/*!< in: transaction (NULL=no quotes) */
-	ibool		table_id,/*!< in: TRUE=print a table name,
-				FALSE=print other identifier */
-	const char*	name,	/*!< in: name to print */
-	ulint		namelen)/*!< in: length of name */
 {
 	/* 2 * NAME_LEN for database and table name,
 	and some slack for the #mysql50# prefix and quotes */
@@ -548,59 +540,28 @@ ut_print_namel(
 	const char*	bufend;
 
 	bufend = innobase_convert_name(buf, sizeof buf,
-				       name, namelen,
-				       trx ? trx->mysql_thd : NULL,
-				       table_id);
+				       name, strlen(name),
+				       trx ? trx->mysql_thd : NULL);
 
-	fwrite(buf, 1, bufend - buf, f);
+	if (fwrite(buf, 1, bufend - buf, f) != (size_t) (bufend - buf)) {
+		perror("fwrite");
+	}
 }
 
-/**********************************************************************//**
-Outputs a fixed-length string, quoted as an SQL identifier.
-If the string contains a slash '/', the string will be
-output as two identifiers separated by a period (.),
-as in SQL database_name.identifier. */
-UNIV_INTERN
-std::string
-ut_get_name(
-/*=========*/
-	const trx_t*	trx,	/*!< in: transaction (NULL=no quotes) */
-	ibool		table_id,/*!< in: TRUE=print a table name,
-				FALSE=print other identifier */
-	const char*	name)	/*!< in: name to print */
-{
-	/* 2 * NAME_LEN for database and table name,
-	and some slack for the #mysql50# prefix and quotes */
-	char		buf[3 * NAME_LEN];
-	const char*	bufend;
-	ulint		namelen = strlen(name);
-
-	bufend = innobase_convert_name(buf, sizeof buf,
-				       name, namelen,
-				       trx ? trx->mysql_thd : NULL,
-				       table_id);
-	buf[bufend-buf]='\0';
-	std::string str(buf);
-	return str;
-}
-
-/**********************************************************************//**
-Formats a table or index name, quoted as an SQL identifier. If the name
-contains a slash '/', the result will contain two identifiers separated by
-a period (.), as in SQL database_name.identifier.
+/** Format a table name, quoted as an SQL identifier.
+If the name contains a slash '/', the result will contain two
+identifiers separated by a period (.), as in SQL
+database_name.table_name.
+@see table_name_t
+@param[in]	name		table or index name
+@param[out]	formatted	formatted result, will be NUL-terminated
+@param[in]	formatted_size	size of the buffer in bytes
 @return pointer to 'formatted' */
-UNIV_INTERN
 char*
 ut_format_name(
-/*===========*/
-	const char*	name,		/*!< in: table or index name, must be
-					'\0'-terminated */
-	ibool		is_table,	/*!< in: if TRUE then 'name' is a table
-					name */
-	char*		formatted,	/*!< out: formatted result, will be
-					'\0'-terminated */
-	ulint		formatted_size)	/*!< out: no more than this number of
-					bytes will be written to 'formatted' */
+	const char*	name,
+	char*		formatted,
+	ulint		formatted_size)
 {
 	switch (formatted_size) {
 	case 1:
@@ -613,7 +574,7 @@ ut_format_name(
 	char*	end;
 
 	end = innobase_convert_name(formatted, formatted_size,
-				    name, strlen(name), NULL, is_table);
+				    name, strlen(name), NULL);
 
 	/* If the space in 'formatted' was completely used, then sacrifice
 	the last character in order to write '\0' at the end. */
@@ -630,7 +591,6 @@ ut_format_name(
 
 /**********************************************************************//**
 Catenate files. */
-UNIV_INTERN
 void
 ut_copy_file(
 /*=========*/
@@ -646,7 +606,9 @@ ut_copy_file(
 			? (size_t) len
 			: sizeof buf;
 		size_t	size = fread(buf, 1, maxs, src);
-		fwrite(buf, 1, size, dest);
+		if (fwrite(buf, 1, size, dest) != size) {
+			perror("fwrite");
+		}
 		len -= (long) size;
 		if (size < maxs) {
 			break;
@@ -655,7 +617,7 @@ ut_copy_file(
 }
 #endif /* !UNIV_HOTBACKUP */
 
-#ifdef __WIN__
+#ifdef _WIN32
 # include <stdarg.h>
 /**********************************************************************//**
 A substitute for vsnprintf(3), formatted output conversion into
@@ -664,7 +626,6 @@ characters that would have been printed if the buffer was unlimited because
 VC's _vsnprintf() returns -1 in this case and we would need to call
 _vscprintf() in addition to estimate that but we would need another copy
 of "ap" for that and VC does not provide va_copy(). */
-UNIV_INTERN
 void
 ut_vsnprintf(
 /*=========*/
@@ -682,7 +643,6 @@ A substitute for snprintf(3), formatted output conversion into
 a limited buffer.
 @return number of characters that would have been printed if the size
 were unlimited, not including the terminating '\0'. */
-UNIV_INTERN
 int
 ut_snprintf(
 /*========*/
@@ -714,17 +674,41 @@ ut_snprintf(
 
 	return(res);
 }
-#endif /* __WIN__ */
+#endif /* _WIN32 */
 
-/*************************************************************//**
-Convert an error number to a human readable text message. The
-returned string is static and should not be freed or modified.
-@return	string, describing the error */
-UNIV_INTERN
+/** Convert an error number to a human readable text message.
+The returned string is static and should not be freed or modified.
+@param[in]	num	InnoDB internal error number
+@return string, describing the error */
+std::string
+ut_get_name(
+/*=========*/
+	const trx_t*	trx,	/*!< in: transaction (NULL=no quotes) */
+	ibool		table_id,/*!< in: TRUE=print a table name,
+				FALSE=print other identifier */
+	const char*	name)	/*!< in: name to print */
+{
+	/* 2 * NAME_LEN for database and table name,
+	and some slack for the #mysql50# prefix and quotes */
+	char		buf[3 * NAME_LEN];
+	const char*	bufend;
+	ulint		namelen = strlen(name);
+
+	bufend = innobase_convert_name(buf, sizeof buf,
+				       name, namelen,
+				       trx ? trx->mysql_thd : NULL);
+	buf[bufend-buf]='\0';
+	std::string str(buf);
+	return str;
+}
+
+/** Convert an error number to a human readable text message.
+The returned string is static and should not be freed or modified.
+@param[in]	num	InnoDB internal error number
+@return string, describing the error */
 const char*
 ut_strerr(
-/*======*/
-	dberr_t	num)	/*!< in: error number */
+	dberr_t	num)
 {
 	switch (num) {
 	case DB_SUCCESS:
@@ -783,6 +767,8 @@ ut_strerr(
 		return("Tablespace already exists");
 	case DB_TABLESPACE_DELETED:
 		return("Tablespace deleted or being deleted");
+	case DB_TABLESPACE_TRUNCATED:
+		return("Tablespace was truncated");
 	case DB_TABLESPACE_NOT_FOUND:
 		return("Tablespace not found");
 	case DB_LOCK_TABLE_FULL:
@@ -835,20 +821,47 @@ ut_strerr(
 		return("not found");
 	case DB_ONLINE_LOG_TOO_BIG:
 		return("Log size exceeded during online index creation");
-	case DB_DICT_CHANGED:
-		return("Table dictionary has changed");
 	case DB_IDENTIFIER_TOO_LONG:
 		return("Identifier name is too long");
 	case DB_FTS_EXCEED_RESULT_CACHE_LIMIT:
 		return("FTS query exceeds result cache limit");
-	case DB_TEMP_FILE_WRITE_FAILURE:
+	case DB_TEMP_FILE_WRITE_FAIL:
 		return("Temp file write failure");
+	case DB_CANT_CREATE_GEOMETRY_OBJECT:
+		return("Can't create specificed geometry data object");
+	case DB_CANNOT_OPEN_FILE:
+		return("Cannot open a file");
+	case DB_TABLE_CORRUPT:
+		return("Table is corrupted");
 	case DB_FTS_TOO_MANY_WORDS_IN_PHRASE:
 		return("Too many words in a FTS phrase or proximity search");
-	case DB_TOO_BIG_FOR_REDO:
-		return("BLOB record length is greater than 10%% of redo log");
+	case DB_IO_DECOMPRESS_FAIL:
+		return("Page decompress failed after reading from disk");
 	case DB_DECRYPTION_FAILED:
 		return("Table is encrypted but decrypt failed.");
+	case DB_IO_NO_PUNCH_HOLE:
+		return("No punch hole support");
+	case DB_IO_NO_PUNCH_HOLE_FS:
+		return("Punch hole not supported by the file system");
+	case DB_IO_NO_PUNCH_HOLE_TABLESPACE:
+		return("Punch hole not supported by the tablespace");
+	case DB_IO_NO_ENCRYPT_TABLESPACE:
+		return("Page encryption not supported by the tablespace");
+	case DB_IO_DECRYPT_FAIL:
+		return("Page decryption failed after reading from disk");
+	case DB_IO_PARTIAL_FAILED:
+		return("Partial IO failed");
+	case DB_FORCED_ABORT:
+		return("Transaction aborted by another higher priority "
+		       "transaction");
+	case DB_WRONG_FILE_NAME:
+		return("Invalid Filename");
+
+	case DB_COMPUTE_VALUE_FAILED:
+		return("Compute generated column failed");
+	case DB_NO_FK_ON_S_BASE_COL:
+		return("Cannot add foreign key on the base column "
+		       "of stored column");
 
 	/* do not add default: in order to produce a warning if new code
 	is added to the enum but not added here */
@@ -862,4 +875,96 @@ ut_strerr(
 	/* NOT REACHED */
 	return("Unknown error");
 }
+
+#ifdef UNIV_PFS_MEMORY
+
+/** Extract the basename of a file without its extension.
+For example, extract "foo0bar" out of "/path/to/foo0bar.cc".
+@param[in]	file		file path, e.g. "/path/to/foo0bar.cc"
+@param[out]	base		result, e.g. "foo0bar"
+@param[in]	base_size	size of the output buffer 'base', if there
+is not enough space, then the result will be truncated, but always
+'\0'-terminated
+@return number of characters that would have been printed if the size
+were unlimited (not including the final ‘\0’) */
+size_t
+ut_basename_noext(
+	const char*	file,
+	char*		base,
+	size_t		base_size)
+{
+	/* Assuming 'file' contains something like the following,
+	extract the file name without the extenstion out of it by
+	setting 'beg' and 'len'.
+	...mysql-trunk/storage/innobase/dict/dict0dict.cc:302
+                                             ^-- beg, len=9
+	*/
+
+	const char*	beg = strrchr(file, OS_PATH_SEPARATOR);
+
+	if (beg == NULL) {
+		beg = file;
+	} else {
+		beg++;
+	}
+
+	size_t		len = strlen(beg);
+
+	const char*	end = strrchr(beg, '.');
+
+	if (end != NULL) {
+		len = end - beg;
+	}
+
+	const size_t	copy_len = std::min(len, base_size - 1);
+
+	memcpy(base, beg, copy_len);
+
+	base[copy_len] = '\0';
+
+	return(len);
+}
+
+#endif /* UNIV_PFS_MEMORY */
+
+namespace ib {
+
+info::~info()
+{
+	sql_print_information("InnoDB: %s", m_oss.str().c_str());
+}
+
+warn::~warn()
+{
+	sql_print_warning("InnoDB: %s", m_oss.str().c_str());
+}
+
+error::~error()
+{
+	sql_print_error("InnoDB: %s", m_oss.str().c_str());
+}
+
+fatal::~fatal()
+{
+	sql_print_error("[FATAL] InnoDB: %s", m_oss.str().c_str());
+	ut_error;
+}
+
+error_or_warn::~error_or_warn()
+{
+	if (m_error) {
+		sql_print_error("InnoDB: %s", m_oss.str().c_str());
+	} else {
+		sql_print_warning("InnoDB: %s", m_oss.str().c_str());
+	}
+}
+
+fatal_or_error::~fatal_or_error()
+{
+	sql_print_error("InnoDB: %s", m_oss.str().c_str());
+	ut_a(!m_fatal);
+}
+
+} // namespace ib
+
 #endif /* !UNIV_INNOCHECKSUM */
diff --git a/storage/innobase/ut/ut0vec.cc b/storage/innobase/ut/ut0vec.cc
index 5842d9f1c0e..f4a893967c7 100644
--- a/storage/innobase/ut/ut0vec.cc
+++ b/storage/innobase/ut/ut0vec.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2006, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2006, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -31,7 +31,6 @@ Created 4/6/2006 Osku Salerma
 
 /********************************************************************
 Create a new vector with the given initial size. */
-UNIV_INTERN
 ib_vector_t*
 ib_vector_create(
 /*=============*/
@@ -61,7 +60,6 @@ ib_vector_create(
 /********************************************************************
 Resize the vector, currently the vector can only grow and we
 expand the number of elements it can hold by 2 times. */
-UNIV_INTERN
 void
 ib_vector_resize(
 /*=============*/
diff --git a/storage/innobase/ut/ut0wqueue.cc b/storage/innobase/ut/ut0wqueue.cc
index 1607e535a94..cee30925375 100644
--- a/storage/innobase/ut/ut0wqueue.cc
+++ b/storage/innobase/ut/ut0wqueue.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2006, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2006, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -16,6 +16,8 @@ this program; if not, write to the Free Software Foundation, Inc.,
 
 *****************************************************************************/
 
+#include "ut0list.h"
+#include "mem0mem.h"
 #include "ut0wqueue.h"
 
 /*******************************************************************//**
@@ -25,29 +27,36 @@ A work queue
 Created 4/26/2006 Osku Salerma
 ************************************************************************/
 
+/* Work queue. */
+struct ib_wqueue_t {
+	ib_mutex_t	mutex;	/*!< mutex protecting everything */
+	ib_list_t*	items;	/*!< work item list */
+	os_event_t	event;	/*!< event we use to signal additions to list */
+};
+
 /****************************************************************//**
 Create a new work queue.
-@return	work queue */
-UNIV_INTERN
+@return work queue */
 ib_wqueue_t*
 ib_wqueue_create(void)
 /*===================*/
 {
-	ib_wqueue_t*	wq = static_cast<ib_wqueue_t*>(mem_alloc(sizeof(*wq)));
+	ib_wqueue_t*	wq = static_cast<ib_wqueue_t*>(
+		ut_malloc_nokey(sizeof(*wq)));
 
 	/* Function ib_wqueue_create() has not been used anywhere,
 	not necessary to instrument this mutex */
-	mutex_create(PFS_NOT_INSTRUMENTED, &wq->mutex, SYNC_WORK_QUEUE);
+
+	mutex_create(LATCH_ID_WORK_QUEUE, &wq->mutex);
 
 	wq->items = ib_list_create();
-	wq->event = os_event_create();
+	wq->event = os_event_create(0);
 
 	return(wq);
 }
 
 /****************************************************************//**
 Free a work queue. */
-UNIV_INTERN
 void
 ib_wqueue_free(
 /*===========*/
@@ -55,14 +64,13 @@ ib_wqueue_free(
 {
 	mutex_free(&wq->mutex);
 	ib_list_free(wq->items);
-	os_event_free(wq->event);
+	os_event_destroy(wq->event);
 
-	mem_free(wq);
+	ut_free(wq);
 }
 
 /****************************************************************//**
 Add a work item to the queue. */
-UNIV_INTERN
 void
 ib_wqueue_add(
 /*==========*/
@@ -81,8 +89,7 @@ ib_wqueue_add(
 
 /****************************************************************//**
 Wait for a work item to appear in the queue.
-@return	work item */
-UNIV_INTERN
+@return work item */
 void*
 ib_wqueue_wait(
 /*===========*/
@@ -120,7 +127,6 @@ ib_wqueue_wait(
 
 /********************************************************************
 Wait for a work item to appear in the queue for specified time. */
-
 void*
 ib_wqueue_timedwait(
 /*================*/
@@ -132,7 +138,7 @@ ib_wqueue_timedwait(
 
 	for (;;) {
 		ulint		error;
-		ib_int64_t	sig_count;
+		int64_t		sig_count;
 
 		mutex_enter(&wq->mutex);
 
@@ -192,10 +198,8 @@ ib_wqueue_nowait(
 
 	return (node ? node->data : NULL);
 }
-
 /********************************************************************
 Check if queue is empty. */
-
 ibool
 ib_wqueue_is_empty(
 /*===============*/
diff --git a/storage/perfschema/ha_perfschema.cc b/storage/perfschema/ha_perfschema.cc
index 7c85431c57c..d703d5d594a 100644
--- a/storage/perfschema/ha_perfschema.cc
+++ b/storage/perfschema/ha_perfschema.cc
@@ -271,7 +271,6 @@ int ha_perfschema::write_row(uchar *buf)
     DBUG_RETURN(HA_ERR_WRONG_COMMAND);
 
   DBUG_ASSERT(m_table_share);
-  ha_statistic_increment(&SSV::ha_write_count);
   result= m_table_share->write_row(table, buf, table->field);
   DBUG_RETURN(result);
 }
@@ -297,7 +296,6 @@ int ha_perfschema::update_row(const uchar *old_data, uchar *new_data)
     DBUG_RETURN(0);
 
   DBUG_ASSERT(m_table);
-  ha_statistic_increment(&SSV::ha_update_count);
   int result= m_table->update_row(table, old_data, new_data, table->field);
   DBUG_RETURN(result);
 }
@@ -309,7 +307,6 @@ int ha_perfschema::delete_row(const uchar *buf)
     DBUG_RETURN(HA_ERR_WRONG_COMMAND);
 
   DBUG_ASSERT(m_table);
-  ha_statistic_increment(&SSV::ha_delete_count);
   int result= m_table->delete_row(table, buf, table->field);
   DBUG_RETURN(result);
 }
@@ -354,7 +351,6 @@ int ha_perfschema::rnd_next(uchar *buf)
   }
 
   DBUG_ASSERT(m_table);
-  ha_statistic_increment(&SSV::ha_read_rnd_next_count);
 
   int result= m_table->rnd_next();
   if (result == 0)
@@ -386,7 +382,6 @@ int ha_perfschema::rnd_pos(uchar *buf, uchar *pos)
   }
 
   DBUG_ASSERT(m_table);
-  ha_statistic_increment(&SSV::ha_read_rnd_count);
   int result= m_table->rnd_pos(pos);
   if (result == 0)
     result= m_table->read_row(table, buf, table->field);
diff --git a/storage/spider/ha_spider.cc b/storage/spider/ha_spider.cc
index 877e2621f5f..adb4c5c9bcb 100644
--- a/storage/spider/ha_spider.cc
+++ b/storage/spider/ha_spider.cc
@@ -9559,7 +9559,6 @@ int ha_spider::write_row(
     DBUG_RETURN(error_num);
   }
 #endif
-  ha_statistic_increment(&SSV::ha_write_count);
 #if defined(MARIADB_BASE_VERSION) && MYSQL_VERSION_ID >= 100000
 #else
   if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_INSERT)
@@ -9795,7 +9794,6 @@ int ha_spider::update_row(
     DBUG_RETURN(error_num);
   }
 #endif
-  ha_statistic_increment(&SSV::ha_update_count);
 #ifdef HANDLER_HAS_DIRECT_UPDATE_ROWS
   do_direct_update = FALSE;
 #endif
@@ -10138,7 +10136,6 @@ int ha_spider::delete_row(
     DBUG_RETURN(error_num);
   }
 #endif
-  ha_statistic_increment(&SSV::ha_delete_count);
 #ifdef HANDLER_HAS_DIRECT_UPDATE_ROWS
   do_direct_update = FALSE;
 #endif
diff --git a/storage/tokudb/ha_tokudb.cc b/storage/tokudb/ha_tokudb.cc
index 79d85a9b77c..c10dfc0e76e 100644
--- a/storage/tokudb/ha_tokudb.cc
+++ b/storage/tokudb/ha_tokudb.cc
@@ -3988,7 +3988,6 @@ int ha_tokudb::write_row(uchar * record) {
     // some crap that needs to be done because MySQL does not properly abstract
     // this work away from us, namely filling in auto increment and setting auto timestamp
     //
-    ha_statistic_increment(&SSV::ha_write_count);
 #if MYSQL_VERSION_ID < 50600
     if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_INSERT) {
         table->timestamp_field->set_time();
@@ -4174,7 +4173,6 @@ int ha_tokudb::update_row(const uchar * old_row, uchar * new_row) {
     memset((void *) &prim_row, 0, sizeof(prim_row));
     memset((void *) &old_prim_row, 0, sizeof(old_prim_row));
 
-    ha_statistic_increment(&SSV::ha_update_count);
 #if MYSQL_VERSION_ID < 50600
     if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE) {
         table->timestamp_field->set_time();
@@ -4342,8 +4340,6 @@ int ha_tokudb::delete_row(const uchar * record) {
     uint curr_num_DBs;
     tokudb_trx_data* trx = (tokudb_trx_data *) thd_get_ha_data(thd, tokudb_hton);
 
-    ha_statistic_increment(&SSV::ha_delete_count);
-
     //
     // grab reader lock on numDBs_lock
     //
@@ -4883,7 +4879,6 @@ int ha_tokudb::read_full_row(uchar * buf) {
 // 
 int ha_tokudb::index_next_same(uchar* buf, const uchar* key, uint keylen) {
     TOKUDB_HANDLER_DBUG_ENTER("");
-    ha_statistic_increment(&SSV::ha_read_next_count);
 
     DBT curr_key;
     DBT found_key;
@@ -4972,7 +4967,6 @@ int ha_tokudb::index_read(
         cursor->c_remove_restriction(cursor);
     }
 
-    ha_statistic_increment(&SSV::ha_read_key_count);
     memset((void *) &row, 0, sizeof(row));
 
     info.ha = this;
@@ -5632,7 +5626,6 @@ cleanup:
 //
 int ha_tokudb::index_next(uchar * buf) {
     TOKUDB_HANDLER_DBUG_ENTER("");
-    ha_statistic_increment(&SSV::ha_read_next_count);
     int error = get_next(buf, 1, NULL, key_read);
     TOKUDB_HANDLER_DBUG_RETURN(error);
 }
@@ -5654,7 +5647,6 @@ int ha_tokudb::index_read_last(uchar * buf, const uchar * key, uint key_len) {
 //
 int ha_tokudb::index_prev(uchar * buf) {
     TOKUDB_HANDLER_DBUG_ENTER("");
-    ha_statistic_increment(&SSV::ha_read_prev_count);
     int error = get_next(buf, -1, NULL, key_read);
     TOKUDB_HANDLER_DBUG_RETURN(error);
 }
@@ -5678,8 +5670,6 @@ int ha_tokudb::index_first(uchar * buf) {
     tokudb_trx_data* trx = (tokudb_trx_data *) thd_get_ha_data(thd, tokudb_hton);;
     HANDLE_INVALID_CURSOR();
 
-    ha_statistic_increment(&SSV::ha_read_first_count);
-
     info.ha = this;
     info.buf = buf;
     info.keynr = tokudb_active_index;
@@ -5722,8 +5712,6 @@ int ha_tokudb::index_last(uchar * buf) {
     tokudb_trx_data* trx = (tokudb_trx_data *) thd_get_ha_data(thd, tokudb_hton);;
     HANDLE_INVALID_CURSOR();
 
-    ha_statistic_increment(&SSV::ha_read_last_count);
-
     info.ha = this;
     info.buf = buf;
     info.keynr = tokudb_active_index;
@@ -5803,7 +5791,6 @@ int ha_tokudb::rnd_end() {
 //
 int ha_tokudb::rnd_next(uchar * buf) {
     TOKUDB_HANDLER_DBUG_ENTER("");
-    ha_statistic_increment(&SSV::ha_read_rnd_next_count);
     int error = get_next(buf, 1, NULL, false);
     TOKUDB_HANDLER_DBUG_RETURN(error);
 }
@@ -5909,7 +5896,6 @@ int ha_tokudb::rnd_pos(uchar * buf, uchar * pos) {
     DBT* key = get_pos(&db_pos, pos); 
 
     unpack_entire_row = true;
-    ha_statistic_increment(&SSV::ha_read_rnd_count);
     tokudb_active_index = MAX_KEY;
 
     // test rpl slave by inducing a delay before the point query
diff --git a/storage/tokudb/mysql-test/tokudb/r/type_bit_innodb.result b/storage/tokudb/mysql-test/tokudb/r/type_bit_innodb.result
index 64a445ebacb..46d120813a5 100644
--- a/storage/tokudb/mysql-test/tokudb/r/type_bit_innodb.result
+++ b/storage/tokudb/mysql-test/tokudb/r/type_bit_innodb.result
@@ -258,7 +258,7 @@ a+0	b+0
 127	403
 explain select a+0, b+0 from t1 where a > 40 and a < 70 order by 2;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	range	a	a	2	NULL	8	Using where; Using index; Using filesort
+1	SIMPLE	t1	range	a	a	2	NULL	9	Using where; Using index; Using filesort
 select a+0, b+0 from t1 where a > 40 and a < 70 order by 2;
 a+0	b+0
 57	135
diff --git a/storage/xtradb/CMakeLists.txt b/storage/xtradb/CMakeLists.txt
index 51e24b3cd8a..7e0bb9103d2 100644
--- a/storage/xtradb/CMakeLists.txt
+++ b/storage/xtradb/CMakeLists.txt
@@ -503,7 +503,8 @@ IF(HAVE_LIBNUMA)
 ENDIF()
 
 MYSQL_ADD_PLUGIN(xtradb ${INNOBASE_SOURCES} STORAGE_ENGINE
-  DEFAULT RECOMPILE_FOR_EMBEDDED
+  DISABLED # until upgraded to 5.7, see also mysql-test/include/have_xtradb.combinations
+  RECOMPILE_FOR_EMBEDDED
   LINK_LIBRARIES
     ${ZLIB_LIBRARY}
     ${CRC32_VPMSUM_LIBRARY}
diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc
index 1f7c4948916..ca1eb76cbda 100644
--- a/storage/xtradb/handler/ha_innodb.cc
+++ b/storage/xtradb/handler/ha_innodb.cc
@@ -8540,8 +8540,6 @@ ha_innobase::write_row(
 		++trx->will_lock;
 	}
 
-	ha_statistic_increment(&SSV::ha_write_count);
-
 	if (share->ib_table != prebuilt->table) {
 		fprintf(stderr,
 			"InnoDB: Warning: share->ib_table %p prebuilt->table %p table %s is_corrupt %lu.",
@@ -9313,8 +9311,6 @@ ha_innobase::update_row(
 		}
 	}
 
-	ha_statistic_increment(&SSV::ha_update_count);
-
 	if (share->ib_table != prebuilt->table) {
 		fprintf(stderr,
 			"InnoDB: Warning: share->ib_table %p prebuilt->table %p table %s is_corrupt %lu.",
@@ -9472,8 +9468,6 @@ ha_innobase::delete_row(
 		++trx->will_lock;
 	}
 
-	ha_statistic_increment(&SSV::ha_delete_count);
-
 	if (UNIV_UNLIKELY(share && share->ib_table
 			  && share->ib_table->is_corrupt)) {
 		DBUG_RETURN(HA_ERR_CRASHED);
@@ -9772,8 +9766,6 @@ ha_innobase::index_read(
 	ut_a(prebuilt->trx == thd_to_trx(user_thd));
 	ut_ad(key_len != 0 || find_flag != HA_READ_KEY_EXACT);
 
-	ha_statistic_increment(&SSV::ha_read_key_count);
-
 	if (UNIV_UNLIKELY(srv_pass_corrupt_table <= 1 && share
 			  && share->ib_table && share->ib_table->is_corrupt)) {
 		DBUG_RETURN(HA_ERR_CRASHED);
diff --git a/storage/xtradb/include/ha_prototypes.h b/storage/xtradb/include/ha_prototypes.h
index dbb23d81eec..a35f975b13c 100644
--- a/storage/xtradb/include/ha_prototypes.h
+++ b/storage/xtradb/include/ha_prototypes.h
@@ -32,6 +32,7 @@ Created 5/11/2006 Osku Salerma
 #include "my_sys.h"
 #include "m_string.h"
 #include "my_base.h"
+#include "dur_prop.h"
 
 #ifndef UNIV_INNOCHECKSUM
 #include "mysqld_error.h"
diff --git a/storage/xtradb/row/row0upd.cc b/storage/xtradb/row/row0upd.cc
index 924b4ccca0d..d1a23748aa1 100644
--- a/storage/xtradb/row/row0upd.cc
+++ b/storage/xtradb/row/row0upd.cc
@@ -2563,12 +2563,12 @@ row_upd_del_mark_clust_rec(
 			err = DB_SUCCESS;
 			break;
 		case DB_DEADLOCK:
-			if (wsrep_debug) fprintf (stderr, 
+			if (wsrep_debug) fprintf (stderr,
 				"WSREP: clust rec FK check fail for deadlock");
 			break;
 		default:
-			fprintf (stderr, 
-				"WSREP: clust rec referenced FK check fail: %d", 
+			fprintf (stderr,
+				"WSREP: clust rec referenced FK check fail: %d",
 				 (int)err);
 			break;
 		}
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index f32e0270ce4..8e9496ec0ae 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -15,16 +15,23 @@
 
 ADD_DEFINITIONS("-DMYSQL_CLIENT")
 
-INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include)
+
+INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include
+                    ${CMAKE_SOURCE_DIR}/client)
+INCLUDE_DIRECTORIES(BEFORE
+  ${CMAKE_BINARY_DIR}/libmariadb/include
+  ${CMAKE_SOURCE_DIR}/libmariadb/include)
 
 ADD_EXECUTABLE(mysql_client_test mysql_client_test.c)
-TARGET_LINK_LIBRARIES(mysql_client_test mysqlclient)
-SET_TARGET_PROPERTIES(mysql_client_test PROPERTIES LINKER_LANGUAGE CXX)
+SET(CLIENT_LIB mariadbclient mysys)
+
+TARGET_LINK_LIBRARIES(mysql_client_test ${CLIENT_LIB})
+ADD_DEPENDENCIES(mysql_client_test GenError ${CLIENT_LIB})
 
 IF(WITH_UNIT_TESTS)
   ADD_EXECUTABLE(bug25714 bug25714.c)
-  TARGET_LINK_LIBRARIES(bug25714 mysqlclient)
-  SET_TARGET_PROPERTIES(bug25714 PROPERTIES LINKER_LANGUAGE CXX)
+  TARGET_LINK_LIBRARIES(bug25714 ${CLIENT_LIB})
+  ADD_DEPENDENCIES(bug25714 GenError ${CLIENT_LIB})
 ENDIF()
 
 INSTALL(TARGETS mysql_client_test DESTINATION ${INSTALL_BINDIR} COMPONENT Test)
@@ -34,7 +41,6 @@ FIND_LIBRARY(EVENT_LIBRARY event)
 MARK_AS_ADVANCED(EVENT_LIBRARY)
 IF(HAVE_EVENT_H AND EVENT_LIBRARY)
   ADD_EXECUTABLE(async_queries async_queries.c)
-  TARGET_LINK_LIBRARIES(async_queries mysqlclient ${EVENT_LIBRARY})
-  SET_TARGET_PROPERTIES(async_queries PROPERTIES LINKER_LANGUAGE CXX)
-
+  TARGET_LINK_LIBRARIES(async_queries ${CLIENT_LIB} ${EVENT_LIBRARY})
+  ADD_DEPENDENCIES(async_queries GenError ${CLIENT_LIB})
 ENDIF()
diff --git a/tests/mysql_client_fw.c b/tests/mysql_client_fw.c
index b7211989f1f..50ecf6c0860 100644
--- a/tests/mysql_client_fw.c
+++ b/tests/mysql_client_fw.c
@@ -21,6 +21,7 @@
 #include <my_getopt.h>
 #include <m_string.h>
 #include <mysqld_error.h>
+#include <mysql_version.h>
 #include <sql_common.h>
 #include <mysql/client_plugin.h>
 
@@ -363,7 +364,7 @@ static MYSQL* client_connect(ulong flag, uint protocol, my_bool auto_reconnect)
     fprintf(stdout, "\n Check the connection options using --help or -?\n");
     exit(1);
   }
-  mysql->reconnect= auto_reconnect;
+  mysql_options(mysql, MYSQL_OPT_RECONNECT, &auto_reconnect);
 
   if (!opt_silent)
     fprintf(stdout, "OK");
@@ -1145,7 +1146,7 @@ static my_bool thread_query(const char *query)
 {
  MYSQL *l_mysql;
  my_bool error;
-
+ my_bool reconnect= 1;
  error= 0;
  if (!opt_silent)
  fprintf(stdout, "\n in thread_query(%s)", query);
@@ -1162,7 +1163,7 @@ static my_bool thread_query(const char *query)
    error= 1;
    goto end;
  }
- l_mysql->reconnect= 1;
+ mysql_options(l_mysql, MYSQL_OPT_RECONNECT, &reconnect);
  if (mysql_query(l_mysql, query))
  {
    fprintf(stderr, "Query failed (%s)\n", mysql_error(l_mysql));
diff --git a/tests/mysql_client_test.c b/tests/mysql_client_test.c
index 31e3150b237..c855f25d674 100644
--- a/tests/mysql_client_test.c
+++ b/tests/mysql_client_test.c
@@ -34,8 +34,22 @@
 
 #include "mysql_client_fw.c"
 
+static const my_bool my_true= 1;
+
+
 /* Query processing */
 
+static my_bool get_reconnect(MYSQL *mysql)
+{
+#ifdef EMBEDDED_LIBRARY
+  return mysql->reconnect;
+#else
+  my_bool reconnect;
+  mysql_get_option(mysql, MYSQL_OPT_RECONNECT, &reconnect);
+  return reconnect;
+#endif
+}
+
 static void client_query()
 {
   int rc;
@@ -3123,7 +3137,7 @@ static void test_long_data_str1()
   int        rc, i;
   char       data[255];
   long       length;
-  ulong      max_blob_length, blob_length, length1;
+  ulong      max_blob_length, blob_length= 0, length1;
   my_bool    true_value;
   MYSQL_RES  *result;
   MYSQL_BIND my_bind[2];
@@ -4812,7 +4826,7 @@ static void test_stmt_close()
     myerror("connection failed");
     exit(1);
   }
-  lmysql->reconnect= 1;
+  mysql_options(lmysql, MYSQL_OPT_RECONNECT, &my_true);
   if (!opt_silent)
     fprintf(stdout, "OK");
 
@@ -5496,7 +5510,7 @@ DROP TABLE IF EXISTS test_multi_tab";
     fprintf(stdout, "\n connection failed(%s)", mysql_error(mysql_local));
     exit(1);
   }
-  mysql_local->reconnect= 1;
+  mysql_options(mysql_local, MYSQL_OPT_RECONNECT, &my_true);
 
   rc= mysql_query(mysql_local, query);
   myquery(rc);
@@ -5620,7 +5634,7 @@ static void test_prepare_multi_statements()
     fprintf(stderr, "\n connection failed(%s)", mysql_error(mysql_local));
     exit(1);
   }
-  mysql_local->reconnect= 1;
+  mysql_options(mysql_local, MYSQL_OPT_RECONNECT, &my_true);
   strmov(query, "select 1; select 'another value'");
   stmt= mysql_simple_prepare(mysql_local, query);
   check_stmt_r(stmt);
@@ -6338,10 +6352,10 @@ static void test_pure_coverage()
 
   my_bind[0].buffer_type= MYSQL_TYPE_GEOMETRY;
   rc= mysql_stmt_bind_result(stmt, my_bind);
-  check_execute_r(stmt, rc); /* unsupported buffer type */
+  check_execute(stmt, rc); /* MariaDB C/C converts geometry to string */
 
   rc= mysql_stmt_store_result(stmt);
-  DIE_UNLESS(rc);
+  DIE_IF(rc);
 
   rc= mysql_stmt_store_result(stmt);
   DIE_UNLESS(rc); /* Old error must be reset first */
@@ -7225,7 +7239,7 @@ static void test_prepare_grant()
       mysql_close(lmysql);
       exit(1);
     }
-    lmysql->reconnect= 1;
+    mysql_options(lmysql, MYSQL_OPT_RECONNECT, &my_true);
     if (!opt_silent)
       fprintf(stdout, "OK");
 
@@ -7687,7 +7701,7 @@ static void test_drop_temp()
       mysql_close(lmysql);
       exit(1);
     }
-    lmysql->reconnect= 1;
+    mysql_options(lmysql, MYSQL_OPT_RECONNECT, &my_true);
     if (!opt_silent)
       fprintf(stdout, "OK");
 
@@ -13402,10 +13416,7 @@ static void test_bug9478()
       /* Fill in the fetch packet */
       int4store(buff, stmt->stmt_id);
       buff[4]= 1;                               /* prefetch rows */
-      rc= ((*mysql->methods->advanced_command)(mysql, COM_STMT_FETCH,
-                                               (uchar*) buff,
-                                               sizeof(buff), 0,0,1,NULL) ||
-           (*mysql->methods->read_query_result)(mysql));
+      rc= mysql_stmt_fetch(stmt);
       DIE_UNLESS(rc);
       if (!opt_silent && i == 0)
         printf("Got error (as expected): %s\n", mysql_error(mysql));
@@ -14992,7 +15003,7 @@ static void test_bug15510()
 static void test_opt_reconnect()
 {
   MYSQL *lmysql;
-  my_bool my_true= TRUE;
+
 
   myheader("test_opt_reconnect");
 
@@ -15003,8 +15014,8 @@ static void test_opt_reconnect()
   }
 
   if (!opt_silent)
-    fprintf(stdout, "reconnect before mysql_options: %d\n", lmysql->reconnect);
-  DIE_UNLESS(lmysql->reconnect == 0);
+    fprintf(stdout, "reconnect before mysql_options: %d\n", get_reconnect(lmysql));
+  DIE_UNLESS(get_reconnect(lmysql) == 0);
 
   if (mysql_options(lmysql, MYSQL_OPT_RECONNECT, &my_true))
   {
@@ -15014,8 +15025,8 @@ static void test_opt_reconnect()
 
   /* reconnect should be 1 */
   if (!opt_silent)
-    fprintf(stdout, "reconnect after mysql_options: %d\n", lmysql->reconnect);
-  DIE_UNLESS(lmysql->reconnect == 1);
+    fprintf(stdout, "reconnect after mysql_options: %d\n", get_reconnect(lmysql));
+  DIE_UNLESS(get_reconnect(lmysql) == 1);
 
   if (!(mysql_real_connect(lmysql, opt_host, opt_user,
                            opt_password, current_db, opt_port,
@@ -15028,8 +15039,8 @@ static void test_opt_reconnect()
   /* reconnect should still be 1 */
   if (!opt_silent)
     fprintf(stdout, "reconnect after mysql_real_connect: %d\n",
-	    lmysql->reconnect);
-  DIE_UNLESS(lmysql->reconnect == 1);
+	    get_reconnect(lmysql));
+  DIE_UNLESS(get_reconnect(lmysql) == 1);
 
   mysql_close(lmysql);
 
@@ -15040,8 +15051,8 @@ static void test_opt_reconnect()
   }
 
   if (!opt_silent)
-    fprintf(stdout, "reconnect before mysql_real_connect: %d\n", lmysql->reconnect);
-  DIE_UNLESS(lmysql->reconnect == 0);
+    fprintf(stdout, "reconnect before mysql_real_connect: %d\n", get_reconnect(lmysql));
+  DIE_UNLESS(get_reconnect(lmysql) == 0);
 
   if (!(mysql_real_connect(lmysql, opt_host, opt_user,
                            opt_password, current_db, opt_port,
@@ -15054,8 +15065,8 @@ static void test_opt_reconnect()
   /* reconnect should still be 0 */
   if (!opt_silent)
     fprintf(stdout, "reconnect after mysql_real_connect: %d\n",
-	    lmysql->reconnect);
-  DIE_UNLESS(lmysql->reconnect == 0);
+	    get_reconnect(lmysql));
+  DIE_UNLESS(get_reconnect(lmysql) == 0);
 
   mysql_close(lmysql);
 }
@@ -17985,7 +17996,8 @@ static void test_bug43560(void)
   strncpy(buffer, values[2], BUFSIZE);
   length= strlen(buffer);
   rc= mysql_stmt_execute(stmt);
-  DIE_UNLESS(rc && mysql_stmt_errno(stmt) == CR_SERVER_LOST);
+  DIE_UNLESS(rc && (mysql_stmt_errno(stmt) == CR_SERVER_LOST ||
+                    mysql_stmt_errno(stmt) == CR_SERVER_GONE_ERROR));
 
   opt_drop_db= 0;
   client_disconnect(conn);
diff --git a/tests/nonblock-wrappers.h b/tests/nonblock-wrappers.h
index d6f42511f3a..78851854442 100644
--- a/tests/nonblock-wrappers.h
+++ b/tests/nonblock-wrappers.h
@@ -321,42 +321,6 @@ MK_WRAPPER(
   mysql,
   mysql)
 
-MK_WRAPPER(
-  MYSQL_RES *,
-  mysql_list_dbs,
-  (MYSQL *mysql, const char *wild),
-  (&res, mysql, wild),
-  (mysql, wild),
-  mysql,
-  mysql)
-
-MK_WRAPPER(
-  MYSQL_RES *,
-  mysql_list_tables,
-  (MYSQL *mysql, const char *wild),
-  (&res, mysql, wild),
-  (mysql, wild),
-  mysql,
-  mysql)
-
-MK_WRAPPER(
-  MYSQL_RES *,
-  mysql_list_processes,
-  (MYSQL *mysql),
-  (&res, mysql),
-  (mysql),
-  mysql,
-  mysql)
-
-MK_WRAPPER(
-  MYSQL_RES *,
-  mysql_list_fields,
-  (MYSQL *mysql, const char *table, const char *wild),
-  (&res, mysql, table, wild),
-  (mysql, table, wild),
-  mysql,
-  mysql)
-
 MK_WRAPPER(
   my_bool,
   mysql_read_query_result,
@@ -500,7 +464,6 @@ MK_WRAPPER(
 #define mysql_list_dbs wrap_mysql_list_dbs
 #define mysql_list_tables wrap_mysql_list_tables
 #define mysql_list_processes wrap_mysql_list_processes
-#define mysql_list_fields wrap_mysql_list_fields
 #define mysql_read_query_result wrap_mysql_read_query_result
 #define mysql_stmt_prepare wrap_mysql_stmt_prepare
 #define mysql_stmt_execute wrap_mysql_stmt_execute
diff --git a/unittest/mysys/CMakeLists.txt b/unittest/mysys/CMakeLists.txt
index ad5195a843e..0c61ff09af2 100644
--- a/unittest/mysys/CMakeLists.txt
+++ b/unittest/mysys/CMakeLists.txt
@@ -20,8 +20,7 @@ MY_ADD_TESTS(my_vsnprintf LINK_LIBRARIES strings mysys)
 
 ADD_DEFINITIONS(${SSL_DEFINES})
 
-MY_ADD_TESTS(ma_dyncol
-	     LINK_LIBRARIES mysqlclient)
+MY_ADD_TESTS(ma_dyncol LINK_LIBRARIES  mysys)
 
 IF(WIN32)
   MY_ADD_TESTS(my_delete LINK_LIBRARIES mysys)
diff --git a/unittest/mysys/base64-t.c b/unittest/mysys/base64-t.c
index 1cf54f9b673..97558083bef 100644
--- a/unittest/mysys/base64-t.c
+++ b/unittest/mysys/base64-t.c
@@ -49,18 +49,18 @@ main(int argc __attribute__((unused)),char *argv[])
     }
 
     /* Encode */
-    needed_length= base64_needed_encoded_length(src_len);
+    needed_length= my_base64_needed_encoded_length(src_len);
     str= (char *) malloc(needed_length);
     for (k= 0; k < needed_length; k++)
       str[k]= 0xff; /* Fill memory to check correct NUL termination */
-    ok(base64_encode(src, src_len, str) == 0,
-       "base64_encode: size %d", i);
+    ok(my_base64_encode(src, src_len, str) == 0,
+       "my_base64_encode: size %d", i);
     ok(needed_length == strlen(str) + 1,
-       "base64_needed_encoded_length: size %d", i);
+       "my_base64_needed_encoded_length: size %d", i);
 
     /* Decode */
-    dst= (char *) malloc(base64_needed_decoded_length(strlen(str)));
-    dst_len= base64_decode(str, strlen(str), dst, NULL, 0);
+    dst= (char *) malloc(my_base64_needed_decoded_length(strlen(str)));
+    dst_len= my_base64_decode(str, strlen(str), dst, NULL, 0);
     ok(dst_len == src_len, "Comparing lengths");
 
     cmp= memcmp(src, dst, src_len);
diff --git a/unittest/sql/explain_filename-t.cc b/unittest/sql/explain_filename-t.cc
index a737ebec608..5f2e165d6ff 100644
--- a/unittest/sql/explain_filename-t.cc
+++ b/unittest/sql/explain_filename-t.cc
@@ -59,7 +59,7 @@ void test_1(const char *in, const char *exp, enum_explain_filename_mode mode)
   /* length returned by explain_filename is fine */
   bool length = (len1 == strlen(exp));
 
-  ok( (pass && length) , "(%d): %s => %s\n", mode, in, out);
+  ok( (pass && length) , "(%d): %s => %s", mode, in, out);
 }
 
 int main(int argc __attribute__((unused)),char *argv[])
@@ -69,87 +69,87 @@ int main(int argc __attribute__((unused)),char *argv[])
   plan(22);
 
   test_1("test/t1.ibd",
-         "Database \"test\", Table \"t1.ibd\"",
+         "Database `test`, Table `t1.ibd`",
          EXPLAIN_ALL_VERBOSE);
 
   test_1("test/t1.ibd",
-         "\"test\".\"t1.ibd\"",
+         "`test`.`t1.ibd`",
          EXPLAIN_PARTITIONS_VERBOSE);
 
   test_1("test/t1.ibd",
-         "\"test\".\"t1.ibd\"",
+         "`test`.`t1.ibd`",
          EXPLAIN_PARTITIONS_AS_COMMENT);
 
   test_1("test/t1#TMP#",
-         "Database \"test\", Table \"t1#TMP#\"",
+         "Database `test`, Table `t1#TMP#`",
          EXPLAIN_ALL_VERBOSE);
 
   test_1("test/#sql-2882.ibd",
-         "Database \"test\", Table \"#sql-2882.ibd\"",
+         "Database `test`, Table `#sql-2882.ibd`",
          EXPLAIN_ALL_VERBOSE);
 
   test_1("test/t1#REN#",
-         "Database \"test\", Table \"t1#REN#\"",
+         "Database `test`, Table `t1#REN#`",
          EXPLAIN_ALL_VERBOSE);
 
   test_1("test/t1@0023REN@0023",
-         "Database \"test\", Table \"t1#REN#\"",
+         "Database `test`, Table `t1#REN#`",
          EXPLAIN_ALL_VERBOSE);
 
   test_1("test/t1#p#p1",
-         "Database \"test\", Table \"t1\", Partition \"p1\"",
+         "Database `test`, Table `t1`, Partition `p1`",
          EXPLAIN_ALL_VERBOSE);
 
   test_1("test/t1#P#p1",
-         "\"test\".\"t1\" /* Partition \"p1\" */",
+         "`test`.`t1` /* Partition `p1` */",
          EXPLAIN_PARTITIONS_AS_COMMENT);
 
   test_1("test/t1#P#p1@00231",
-         "\"test\".\"t1\" /* Partition \"p1#1\" */",
+         "`test`.`t1` /* Partition `p1#1` */",
          EXPLAIN_PARTITIONS_AS_COMMENT);
 
   test_1("test/t1#P#p1#SP#sp1",
-         "\"test\".\"t1\" /* Partition \"p1\", Subpartition \"sp1\" */",
+         "`test`.`t1` /* Partition `p1`, Subpartition `sp1` */",
          EXPLAIN_PARTITIONS_AS_COMMENT);
 
   test_1("test/t1#p1#SP#sp1",
-         "\"test\".\"t1#p1#SP#sp1\"",
+         "`test`.`t1#p1#SP#sp1`",
          EXPLAIN_PARTITIONS_AS_COMMENT);
 
   test_1("test/t1#p#p1@00232#SP#sp1@00231#REN#",
-         "\"test\".\"t1\" /* Renamed Partition \"p1#2\", Subpartition \"sp1#1\" */",
+         "`test`.`t1` /* Renamed Partition `p1#2`, Subpartition `sp1#1` */",
          EXPLAIN_PARTITIONS_AS_COMMENT);
 
   test_1("test/t1#p#p1#SP#sp1#TMP#",
-         "\"test\".\"t1\" /* Temporary Partition \"p1\", Subpartition \"sp1\" */",
+         "`test`.`t1` /* Temporary Partition `p1`, Subpartition `sp1` */",
          EXPLAIN_PARTITIONS_AS_COMMENT);
 
   test_1("test/#sql-t1#P#p1#SP#sp1#TMP#",
-         "\"test\".\"#sql-t1#P#p1#SP#sp1#TMP#\" /* Temporary Partition \"p1\", Subpartition \"sp1\" */",
+         "`test`.`#sql-t1#P#p1#SP#sp1#TMP#` /* Temporary Partition `p1`, Subpartition `sp1` */",
          EXPLAIN_PARTITIONS_AS_COMMENT);
 
   test_1("test/#sql-t1#P#p1#SP#sp1",
-         "\"test\".\"#sql-t1#P#p1#SP#sp1\" /* Partition \"p1\", Subpartition \"sp1\" */",
+         "`test`.`#sql-t1#P#p1#SP#sp1` /* Partition `p1`, Subpartition `sp1` */",
          EXPLAIN_PARTITIONS_AS_COMMENT);
 
   test_1("test/#sqlx-33",
-         "\"test\".\"#sqlx-33\"",
+         "`test`.`#sqlx-33`",
          EXPLAIN_PARTITIONS_AS_COMMENT);
 
   test_1("test/#mysql50#t",
-         "\"test\".\"#mysql50#t\"",
+         "`test`.`#mysql50#t`",
          EXPLAIN_PARTITIONS_AS_COMMENT);
 
   test_1("#mysql50#t",
-         "\"#mysql50#t\"",
+         "`#mysql50#t`",
          EXPLAIN_PARTITIONS_AS_COMMENT);
 
   test_1("@0023t",
-         "\"#t\"",
+         "`#t`",
          EXPLAIN_PARTITIONS_AS_COMMENT);
 
   test_1("test/t@0023",
-         "\"test\".\"t#\"",
+         "`test`.`t#`",
          EXPLAIN_PARTITIONS_AS_COMMENT);
 
   /*
@@ -157,7 +157,7 @@ int main(int argc __attribute__((unused)),char *argv[])
     then it will not be converted to system_charset_info!
   */
   test_1("test/t@0023#",
-         "\"test\".\"t@0023#\"",
+         "`test`.`t@0023#`",
          EXPLAIN_PARTITIONS_AS_COMMENT);
 
   my_end(0);